{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 90171, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.1873316764831543, "learning_rate": 0.0005999999998179221, "loss": 10.4413, "step": 1 }, { "epoch": 0.0, "grad_norm": 3.3818414211273193, "learning_rate": 0.0005999999992716884, "loss": 9.6408, "step": 2 }, { "epoch": 0.0, "grad_norm": 2.8012495040893555, "learning_rate": 0.0005999999983612989, "loss": 9.0706, "step": 3 }, { "epoch": 0.0, "grad_norm": 2.533276081085205, "learning_rate": 0.0005999999970867536, "loss": 8.6312, "step": 4 }, { "epoch": 0.0, "grad_norm": 2.4713809490203857, "learning_rate": 0.0005999999954480525, "loss": 8.2468, "step": 5 }, { "epoch": 0.0, "grad_norm": 2.3754451274871826, "learning_rate": 0.0005999999934451956, "loss": 7.8833, "step": 6 }, { "epoch": 0.0, "grad_norm": 2.2733170986175537, "learning_rate": 0.0005999999910781829, "loss": 7.5058, "step": 7 }, { "epoch": 0.0, "grad_norm": 2.099640369415283, "learning_rate": 0.0005999999883470145, "loss": 7.1765, "step": 8 }, { "epoch": 0.0, "grad_norm": 1.8562754392623901, "learning_rate": 0.0005999999852516903, "loss": 6.884, "step": 9 }, { "epoch": 0.0, "grad_norm": 1.6445894241333008, "learning_rate": 0.0005999999817922103, "loss": 6.6094, "step": 10 }, { "epoch": 0.0, "grad_norm": 1.4697778224945068, "learning_rate": 0.0005999999779685744, "loss": 6.3931, "step": 11 }, { "epoch": 0.0, "grad_norm": 1.1860110759735107, "learning_rate": 0.0005999999737807829, "loss": 6.2188, "step": 12 }, { "epoch": 0.0, "grad_norm": 0.95598965883255, "learning_rate": 0.0005999999692288357, "loss": 6.0565, "step": 13 }, { "epoch": 0.0, "grad_norm": 0.7745048999786377, "learning_rate": 0.0005999999643127326, "loss": 5.9301, "step": 14 }, { "epoch": 0.0, "grad_norm": 0.6340143084526062, "learning_rate": 0.0005999999590324737, "loss": 5.8668, "step": 15 }, { "epoch": 0.0, "grad_norm": 0.5424383878707886, "learning_rate": 0.0005999999533880591, "loss": 5.7924, "step": 16 }, { "epoch": 0.0, "grad_norm": 0.6173386573791504, "learning_rate": 0.0005999999473794888, "loss": 5.7266, "step": 17 }, { "epoch": 0.0, "grad_norm": 0.7270485758781433, "learning_rate": 0.0005999999410067627, "loss": 5.6248, "step": 18 }, { "epoch": 0.0, "grad_norm": 0.7480476498603821, "learning_rate": 0.000599999934269881, "loss": 5.5254, "step": 19 }, { "epoch": 0.0, "grad_norm": 0.6527186036109924, "learning_rate": 0.0005999999271688435, "loss": 5.4848, "step": 20 }, { "epoch": 0.0, "grad_norm": 0.6333152055740356, "learning_rate": 0.0005999999197036502, "loss": 5.4187, "step": 21 }, { "epoch": 0.0, "grad_norm": 0.6588134765625, "learning_rate": 0.0005999999118743014, "loss": 5.3911, "step": 22 }, { "epoch": 0.0, "grad_norm": 0.7487804293632507, "learning_rate": 0.0005999999036807968, "loss": 5.2479, "step": 23 }, { "epoch": 0.0, "grad_norm": 0.7624616026878357, "learning_rate": 0.0005999998951231365, "loss": 5.2294, "step": 24 }, { "epoch": 0.0, "grad_norm": 0.6286913156509399, "learning_rate": 0.0005999998862013205, "loss": 5.1548, "step": 25 }, { "epoch": 0.0, "grad_norm": 0.7148377299308777, "learning_rate": 0.000599999876915349, "loss": 5.0351, "step": 26 }, { "epoch": 0.0, "grad_norm": 0.6132169365882874, "learning_rate": 0.0005999998672652217, "loss": 5.0784, "step": 27 }, { "epoch": 0.0, "grad_norm": 0.5866793394088745, "learning_rate": 0.0005999998572509389, "loss": 4.9685, "step": 28 }, { "epoch": 0.0, "grad_norm": 0.6250637173652649, "learning_rate": 0.0005999998468725003, "loss": 4.9167, "step": 29 }, { "epoch": 0.0, "grad_norm": 0.5500860214233398, "learning_rate": 0.0005999998361299062, "loss": 4.7983, "step": 30 }, { "epoch": 0.0, "grad_norm": 0.5970760583877563, "learning_rate": 0.0005999998250231564, "loss": 4.8531, "step": 31 }, { "epoch": 0.0, "grad_norm": 0.6141502857208252, "learning_rate": 0.0005999998135522511, "loss": 4.7177, "step": 32 }, { "epoch": 0.0, "grad_norm": 0.540304958820343, "learning_rate": 0.0005999998017171903, "loss": 4.6622, "step": 33 }, { "epoch": 0.0, "grad_norm": 0.5156924724578857, "learning_rate": 0.0005999997895179738, "loss": 4.6279, "step": 34 }, { "epoch": 0.0, "grad_norm": 0.5423431396484375, "learning_rate": 0.0005999997769546018, "loss": 4.5943, "step": 35 }, { "epoch": 0.0, "grad_norm": 0.5369262099266052, "learning_rate": 0.0005999997640270744, "loss": 4.5739, "step": 36 }, { "epoch": 0.0, "grad_norm": 0.6271786093711853, "learning_rate": 0.0005999997507353913, "loss": 4.603, "step": 37 }, { "epoch": 0.0, "grad_norm": 0.5006738901138306, "learning_rate": 0.0005999997370795528, "loss": 4.5401, "step": 38 }, { "epoch": 0.0, "grad_norm": 0.7793611288070679, "learning_rate": 0.0005999997230595589, "loss": 4.4622, "step": 39 }, { "epoch": 0.0, "grad_norm": 0.5402147769927979, "learning_rate": 0.0005999997086754094, "loss": 4.4159, "step": 40 }, { "epoch": 0.0, "grad_norm": 0.6135217547416687, "learning_rate": 0.0005999996939271045, "loss": 4.3856, "step": 41 }, { "epoch": 0.0, "grad_norm": 0.5197340846061707, "learning_rate": 0.0005999996788146442, "loss": 4.4133, "step": 42 }, { "epoch": 0.0, "grad_norm": 0.5789491534233093, "learning_rate": 0.0005999996633380284, "loss": 4.2983, "step": 43 }, { "epoch": 0.0, "grad_norm": 0.5384756326675415, "learning_rate": 0.0005999996474972573, "loss": 4.3702, "step": 44 }, { "epoch": 0.0, "grad_norm": 0.5341759920120239, "learning_rate": 0.0005999996312923308, "loss": 4.2901, "step": 45 }, { "epoch": 0.0, "grad_norm": 0.559344470500946, "learning_rate": 0.000599999614723249, "loss": 4.2759, "step": 46 }, { "epoch": 0.0, "grad_norm": 0.5014417171478271, "learning_rate": 0.0005999995977900119, "loss": 4.1724, "step": 47 }, { "epoch": 0.0, "grad_norm": 0.5669718980789185, "learning_rate": 0.0005999995804926194, "loss": 4.1846, "step": 48 }, { "epoch": 0.0, "grad_norm": 0.5487279891967773, "learning_rate": 0.0005999995628310717, "loss": 4.2237, "step": 49 }, { "epoch": 0.0, "grad_norm": 0.5501391291618347, "learning_rate": 0.0005999995448053686, "loss": 4.198, "step": 50 }, { "epoch": 0.0, "grad_norm": 0.5477765202522278, "learning_rate": 0.0005999995264155104, "loss": 4.1304, "step": 51 }, { "epoch": 0.0, "grad_norm": 0.6033360362052917, "learning_rate": 0.0005999995076614969, "loss": 4.1458, "step": 52 }, { "epoch": 0.0, "grad_norm": 0.5574687123298645, "learning_rate": 0.0005999994885433282, "loss": 4.0775, "step": 53 }, { "epoch": 0.0, "grad_norm": 0.547299325466156, "learning_rate": 0.0005999994690610044, "loss": 4.1366, "step": 54 }, { "epoch": 0.0, "grad_norm": 0.5133364796638489, "learning_rate": 0.0005999994492145254, "loss": 4.0691, "step": 55 }, { "epoch": 0.0, "grad_norm": 0.5382536053657532, "learning_rate": 0.0005999994290038913, "loss": 3.9724, "step": 56 }, { "epoch": 0.0, "grad_norm": 0.572081446647644, "learning_rate": 0.000599999408429102, "loss": 4.0353, "step": 57 }, { "epoch": 0.0, "grad_norm": 0.5347955822944641, "learning_rate": 0.0005999993874901576, "loss": 4.114, "step": 58 }, { "epoch": 0.0, "grad_norm": 0.6046149134635925, "learning_rate": 0.0005999993661870583, "loss": 4.0877, "step": 59 }, { "epoch": 0.0, "grad_norm": 0.5697489380836487, "learning_rate": 0.0005999993445198038, "loss": 4.0401, "step": 60 }, { "epoch": 0.0, "grad_norm": 0.611663281917572, "learning_rate": 0.0005999993224883944, "loss": 4.0935, "step": 61 }, { "epoch": 0.0, "grad_norm": 0.6526895761489868, "learning_rate": 0.0005999993000928301, "loss": 4.0532, "step": 62 }, { "epoch": 0.0, "grad_norm": 0.5604841113090515, "learning_rate": 0.0005999992773331107, "loss": 3.9923, "step": 63 }, { "epoch": 0.0, "grad_norm": 0.7098034024238586, "learning_rate": 0.0005999992542092365, "loss": 4.0125, "step": 64 }, { "epoch": 0.0, "grad_norm": 0.5639750957489014, "learning_rate": 0.0005999992307212073, "loss": 3.9572, "step": 65 }, { "epoch": 0.0, "grad_norm": 0.6704108715057373, "learning_rate": 0.0005999992068690234, "loss": 3.9924, "step": 66 }, { "epoch": 0.0, "grad_norm": 0.60701984167099, "learning_rate": 0.0005999991826526845, "loss": 4.0006, "step": 67 }, { "epoch": 0.0, "grad_norm": 0.5441910624504089, "learning_rate": 0.0005999991580721908, "loss": 3.9497, "step": 68 }, { "epoch": 0.0, "grad_norm": 0.5376867651939392, "learning_rate": 0.0005999991331275425, "loss": 3.9755, "step": 69 }, { "epoch": 0.0, "grad_norm": 0.6340477466583252, "learning_rate": 0.0005999991078187392, "loss": 3.8941, "step": 70 }, { "epoch": 0.0, "grad_norm": 0.6433451771736145, "learning_rate": 0.0005999990821457813, "loss": 3.8859, "step": 71 }, { "epoch": 0.0, "grad_norm": 0.6510988473892212, "learning_rate": 0.0005999990561086687, "loss": 3.906, "step": 72 }, { "epoch": 0.0, "grad_norm": 0.5701102614402771, "learning_rate": 0.0005999990297074015, "loss": 3.8465, "step": 73 }, { "epoch": 0.0, "grad_norm": 0.6091551780700684, "learning_rate": 0.0005999990029419797, "loss": 3.8319, "step": 74 }, { "epoch": 0.0, "grad_norm": 0.6810700297355652, "learning_rate": 0.0005999989758124033, "loss": 3.7773, "step": 75 }, { "epoch": 0.0, "grad_norm": 0.7328987121582031, "learning_rate": 0.0005999989483186723, "loss": 3.8642, "step": 76 }, { "epoch": 0.0, "grad_norm": 0.5573557019233704, "learning_rate": 0.0005999989204607868, "loss": 3.7999, "step": 77 }, { "epoch": 0.0, "grad_norm": 0.7246769070625305, "learning_rate": 0.0005999988922387469, "loss": 3.8201, "step": 78 }, { "epoch": 0.0, "grad_norm": 0.557848334312439, "learning_rate": 0.0005999988636525525, "loss": 3.758, "step": 79 }, { "epoch": 0.0, "grad_norm": 0.6426705121994019, "learning_rate": 0.0005999988347022035, "loss": 3.7261, "step": 80 }, { "epoch": 0.0, "grad_norm": 0.500382125377655, "learning_rate": 0.0005999988053877003, "loss": 3.8179, "step": 81 }, { "epoch": 0.0, "grad_norm": 0.566608726978302, "learning_rate": 0.0005999987757090428, "loss": 3.7547, "step": 82 }, { "epoch": 0.0, "grad_norm": 0.5099013447761536, "learning_rate": 0.0005999987456662309, "loss": 3.7062, "step": 83 }, { "epoch": 0.0, "grad_norm": 0.523576557636261, "learning_rate": 0.0005999987152592647, "loss": 3.7465, "step": 84 }, { "epoch": 0.0, "grad_norm": 0.5547106266021729, "learning_rate": 0.0005999986844881442, "loss": 3.7766, "step": 85 }, { "epoch": 0.0, "grad_norm": 0.5438652038574219, "learning_rate": 0.0005999986533528698, "loss": 3.7034, "step": 86 }, { "epoch": 0.0, "grad_norm": 0.5829522609710693, "learning_rate": 0.0005999986218534409, "loss": 3.688, "step": 87 }, { "epoch": 0.0, "grad_norm": 0.5274013876914978, "learning_rate": 0.000599998589989858, "loss": 3.6256, "step": 88 }, { "epoch": 0.0, "grad_norm": 0.616613507270813, "learning_rate": 0.0005999985577621211, "loss": 3.5614, "step": 89 }, { "epoch": 0.0, "grad_norm": 0.5853050351142883, "learning_rate": 0.00059999852517023, "loss": 3.6014, "step": 90 }, { "epoch": 0.0, "grad_norm": 0.5959306359291077, "learning_rate": 0.000599998492214185, "loss": 3.7145, "step": 91 }, { "epoch": 0.0, "grad_norm": 0.6405519843101501, "learning_rate": 0.000599998458893986, "loss": 3.7061, "step": 92 }, { "epoch": 0.0, "grad_norm": 0.5582771897315979, "learning_rate": 0.0005999984252096332, "loss": 3.6713, "step": 93 }, { "epoch": 0.0, "grad_norm": 0.5390492081642151, "learning_rate": 0.0005999983911611263, "loss": 3.657, "step": 94 }, { "epoch": 0.0, "grad_norm": 0.6461691856384277, "learning_rate": 0.0005999983567484656, "loss": 3.5682, "step": 95 }, { "epoch": 0.0, "grad_norm": 0.5089138746261597, "learning_rate": 0.0005999983219716512, "loss": 3.607, "step": 96 }, { "epoch": 0.0, "grad_norm": 0.5615888237953186, "learning_rate": 0.000599998286830683, "loss": 3.6214, "step": 97 }, { "epoch": 0.0, "grad_norm": 0.5068588256835938, "learning_rate": 0.0005999982513255611, "loss": 3.6135, "step": 98 }, { "epoch": 0.0, "grad_norm": 0.5550532937049866, "learning_rate": 0.0005999982154562854, "loss": 3.5856, "step": 99 }, { "epoch": 0.0, "grad_norm": 0.6319258809089661, "learning_rate": 0.0005999981792228562, "loss": 3.6284, "step": 100 }, { "epoch": 0.0, "grad_norm": 0.5683005452156067, "learning_rate": 0.0005999981426252734, "loss": 3.6178, "step": 101 }, { "epoch": 0.0, "grad_norm": 0.6741005778312683, "learning_rate": 0.000599998105663537, "loss": 3.5919, "step": 102 }, { "epoch": 0.0, "grad_norm": 0.6523358225822449, "learning_rate": 0.0005999980683376471, "loss": 3.578, "step": 103 }, { "epoch": 0.0, "grad_norm": 0.6613895893096924, "learning_rate": 0.0005999980306476038, "loss": 3.6216, "step": 104 }, { "epoch": 0.0, "grad_norm": 0.600468099117279, "learning_rate": 0.0005999979925934071, "loss": 3.6597, "step": 105 }, { "epoch": 0.0, "grad_norm": 0.6687202453613281, "learning_rate": 0.000599997954175057, "loss": 3.5361, "step": 106 }, { "epoch": 0.0, "grad_norm": 0.6592238545417786, "learning_rate": 0.0005999979153925535, "loss": 3.5947, "step": 107 }, { "epoch": 0.0, "grad_norm": 0.6693978309631348, "learning_rate": 0.0005999978762458969, "loss": 3.6156, "step": 108 }, { "epoch": 0.0, "grad_norm": 0.6426326632499695, "learning_rate": 0.000599997836735087, "loss": 3.536, "step": 109 }, { "epoch": 0.0, "grad_norm": 0.64159095287323, "learning_rate": 0.0005999977968601239, "loss": 3.6313, "step": 110 }, { "epoch": 0.0, "grad_norm": 0.6701462268829346, "learning_rate": 0.0005999977566210078, "loss": 3.6487, "step": 111 }, { "epoch": 0.0, "grad_norm": 0.6743172407150269, "learning_rate": 0.0005999977160177386, "loss": 3.579, "step": 112 }, { "epoch": 0.0, "grad_norm": 0.5813566446304321, "learning_rate": 0.0005999976750503162, "loss": 3.5312, "step": 113 }, { "epoch": 0.0, "grad_norm": 0.6538673639297485, "learning_rate": 0.000599997633718741, "loss": 3.4559, "step": 114 }, { "epoch": 0.0, "grad_norm": 0.649744987487793, "learning_rate": 0.0005999975920230128, "loss": 3.4403, "step": 115 }, { "epoch": 0.0, "grad_norm": 0.7156094312667847, "learning_rate": 0.0005999975499631318, "loss": 3.4599, "step": 116 }, { "epoch": 0.0, "grad_norm": 0.6756429076194763, "learning_rate": 0.000599997507539098, "loss": 3.5206, "step": 117 }, { "epoch": 0.0, "grad_norm": 0.6042451858520508, "learning_rate": 0.0005999974647509113, "loss": 3.5437, "step": 118 }, { "epoch": 0.0, "grad_norm": 0.6156610250473022, "learning_rate": 0.0005999974215985719, "loss": 3.417, "step": 119 }, { "epoch": 0.0, "grad_norm": 0.6032337546348572, "learning_rate": 0.0005999973780820799, "loss": 3.515, "step": 120 }, { "epoch": 0.0, "grad_norm": 0.6724061369895935, "learning_rate": 0.0005999973342014353, "loss": 3.512, "step": 121 }, { "epoch": 0.0, "grad_norm": 0.684166431427002, "learning_rate": 0.000599997289956638, "loss": 3.4569, "step": 122 }, { "epoch": 0.0, "grad_norm": 0.6101895570755005, "learning_rate": 0.0005999972453476884, "loss": 3.4351, "step": 123 }, { "epoch": 0.0, "grad_norm": 0.6285821795463562, "learning_rate": 0.0005999972003745862, "loss": 3.4299, "step": 124 }, { "epoch": 0.0, "grad_norm": 0.6135870218276978, "learning_rate": 0.0005999971550373316, "loss": 3.5143, "step": 125 }, { "epoch": 0.0, "grad_norm": 0.635412871837616, "learning_rate": 0.0005999971093359247, "loss": 3.3968, "step": 126 }, { "epoch": 0.0, "grad_norm": 0.5776059627532959, "learning_rate": 0.0005999970632703655, "loss": 3.4618, "step": 127 }, { "epoch": 0.0, "grad_norm": 0.5901331305503845, "learning_rate": 0.000599997016840654, "loss": 3.3999, "step": 128 }, { "epoch": 0.0, "grad_norm": 0.6110070943832397, "learning_rate": 0.0005999969700467904, "loss": 3.4626, "step": 129 }, { "epoch": 0.0, "grad_norm": 0.6228132843971252, "learning_rate": 0.0005999969228887746, "loss": 3.4168, "step": 130 }, { "epoch": 0.0, "grad_norm": 0.5616828203201294, "learning_rate": 0.0005999968753666069, "loss": 3.3999, "step": 131 }, { "epoch": 0.0, "grad_norm": 0.5794464349746704, "learning_rate": 0.0005999968274802872, "loss": 3.4171, "step": 132 }, { "epoch": 0.0, "grad_norm": 0.5725519061088562, "learning_rate": 0.0005999967792298154, "loss": 3.4059, "step": 133 }, { "epoch": 0.0, "grad_norm": 0.6412649154663086, "learning_rate": 0.0005999967306151917, "loss": 3.3558, "step": 134 }, { "epoch": 0.0, "grad_norm": 0.5401954650878906, "learning_rate": 0.0005999966816364162, "loss": 3.3562, "step": 135 }, { "epoch": 0.0, "grad_norm": 0.5995265245437622, "learning_rate": 0.000599996632293489, "loss": 3.3514, "step": 136 }, { "epoch": 0.0, "grad_norm": 0.703134298324585, "learning_rate": 0.0005999965825864101, "loss": 3.3146, "step": 137 }, { "epoch": 0.0, "grad_norm": 0.6041818857192993, "learning_rate": 0.0005999965325151796, "loss": 3.4362, "step": 138 }, { "epoch": 0.0, "grad_norm": 0.6192206144332886, "learning_rate": 0.0005999964820797974, "loss": 3.3308, "step": 139 }, { "epoch": 0.0, "grad_norm": 0.6512753367424011, "learning_rate": 0.0005999964312802637, "loss": 3.319, "step": 140 }, { "epoch": 0.0, "grad_norm": 0.5558834671974182, "learning_rate": 0.0005999963801165785, "loss": 3.2605, "step": 141 }, { "epoch": 0.0, "grad_norm": 0.5949523448944092, "learning_rate": 0.000599996328588742, "loss": 3.3274, "step": 142 }, { "epoch": 0.0, "grad_norm": 0.6761783957481384, "learning_rate": 0.000599996276696754, "loss": 3.3156, "step": 143 }, { "epoch": 0.0, "grad_norm": 0.6234468221664429, "learning_rate": 0.0005999962244406148, "loss": 3.3718, "step": 144 }, { "epoch": 0.0, "grad_norm": 0.6969411373138428, "learning_rate": 0.0005999961718203245, "loss": 3.2202, "step": 145 }, { "epoch": 0.0, "grad_norm": 0.6540246605873108, "learning_rate": 0.000599996118835883, "loss": 3.3204, "step": 146 }, { "epoch": 0.0, "grad_norm": 0.7493084073066711, "learning_rate": 0.0005999960654872903, "loss": 3.3431, "step": 147 }, { "epoch": 0.0, "grad_norm": 0.590627908706665, "learning_rate": 0.0005999960117745466, "loss": 3.2771, "step": 148 }, { "epoch": 0.0, "grad_norm": 0.7904335260391235, "learning_rate": 0.0005999959576976521, "loss": 3.3159, "step": 149 }, { "epoch": 0.0, "grad_norm": 0.5961655378341675, "learning_rate": 0.0005999959032566065, "loss": 3.3058, "step": 150 }, { "epoch": 0.01, "grad_norm": 0.6802785396575928, "learning_rate": 0.0005999958484514102, "loss": 3.2715, "step": 151 }, { "epoch": 0.01, "grad_norm": 0.6245137453079224, "learning_rate": 0.0005999957932820632, "loss": 3.3169, "step": 152 }, { "epoch": 0.01, "grad_norm": 0.6707127094268799, "learning_rate": 0.0005999957377485653, "loss": 3.262, "step": 153 }, { "epoch": 0.01, "grad_norm": 0.6856350898742676, "learning_rate": 0.000599995681850917, "loss": 3.198, "step": 154 }, { "epoch": 0.01, "grad_norm": 0.6017987132072449, "learning_rate": 0.000599995625589118, "loss": 3.2276, "step": 155 }, { "epoch": 0.01, "grad_norm": 0.6031147241592407, "learning_rate": 0.0005999955689631685, "loss": 3.3002, "step": 156 }, { "epoch": 0.01, "grad_norm": 0.6662557721138, "learning_rate": 0.0005999955119730686, "loss": 3.2364, "step": 157 }, { "epoch": 0.01, "grad_norm": 0.6095231175422668, "learning_rate": 0.0005999954546188185, "loss": 3.2172, "step": 158 }, { "epoch": 0.01, "grad_norm": 0.6267956495285034, "learning_rate": 0.000599995396900418, "loss": 3.3077, "step": 159 }, { "epoch": 0.01, "grad_norm": 0.6588349938392639, "learning_rate": 0.0005999953388178673, "loss": 3.2336, "step": 160 }, { "epoch": 0.01, "grad_norm": 0.6498230695724487, "learning_rate": 0.0005999952803711665, "loss": 3.1904, "step": 161 }, { "epoch": 0.01, "grad_norm": 0.7249612808227539, "learning_rate": 0.0005999952215603156, "loss": 3.192, "step": 162 }, { "epoch": 0.01, "grad_norm": 0.6271867156028748, "learning_rate": 0.0005999951623853146, "loss": 3.2172, "step": 163 }, { "epoch": 0.01, "grad_norm": 0.7349063754081726, "learning_rate": 0.0005999951028461639, "loss": 3.2404, "step": 164 }, { "epoch": 0.01, "grad_norm": 0.6244122385978699, "learning_rate": 0.0005999950429428632, "loss": 3.2882, "step": 165 }, { "epoch": 0.01, "grad_norm": 0.7113081812858582, "learning_rate": 0.0005999949826754127, "loss": 3.1851, "step": 166 }, { "epoch": 0.01, "grad_norm": 0.5815573930740356, "learning_rate": 0.0005999949220438126, "loss": 3.1828, "step": 167 }, { "epoch": 0.01, "grad_norm": 0.6336977481842041, "learning_rate": 0.0005999948610480628, "loss": 3.2379, "step": 168 }, { "epoch": 0.01, "grad_norm": 0.6742769479751587, "learning_rate": 0.0005999947996881633, "loss": 3.1913, "step": 169 }, { "epoch": 0.01, "grad_norm": 0.6614216566085815, "learning_rate": 0.0005999947379641145, "loss": 3.1445, "step": 170 }, { "epoch": 0.01, "grad_norm": 0.5848857164382935, "learning_rate": 0.0005999946758759163, "loss": 3.1387, "step": 171 }, { "epoch": 0.01, "grad_norm": 0.6290248036384583, "learning_rate": 0.0005999946134235686, "loss": 3.2447, "step": 172 }, { "epoch": 0.01, "grad_norm": 0.680540144443512, "learning_rate": 0.0005999945506070718, "loss": 3.2413, "step": 173 }, { "epoch": 0.01, "grad_norm": 0.6039167046546936, "learning_rate": 0.0005999944874264258, "loss": 3.2102, "step": 174 }, { "epoch": 0.01, "grad_norm": 0.6231111884117126, "learning_rate": 0.0005999944238816307, "loss": 3.1645, "step": 175 }, { "epoch": 0.01, "grad_norm": 0.6409281492233276, "learning_rate": 0.0005999943599726865, "loss": 3.2542, "step": 176 }, { "epoch": 0.01, "grad_norm": 0.6679972410202026, "learning_rate": 0.0005999942956995934, "loss": 3.2289, "step": 177 }, { "epoch": 0.01, "grad_norm": 0.5697520971298218, "learning_rate": 0.0005999942310623514, "loss": 3.0893, "step": 178 }, { "epoch": 0.01, "grad_norm": 0.6270955204963684, "learning_rate": 0.0005999941660609606, "loss": 3.1455, "step": 179 }, { "epoch": 0.01, "grad_norm": 0.5788727402687073, "learning_rate": 0.0005999941006954211, "loss": 3.1365, "step": 180 }, { "epoch": 0.01, "grad_norm": 0.6131219267845154, "learning_rate": 0.0005999940349657331, "loss": 3.1221, "step": 181 }, { "epoch": 0.01, "grad_norm": 0.6481972336769104, "learning_rate": 0.0005999939688718964, "loss": 3.092, "step": 182 }, { "epoch": 0.01, "grad_norm": 0.644990861415863, "learning_rate": 0.0005999939024139111, "loss": 3.2235, "step": 183 }, { "epoch": 0.01, "grad_norm": 0.6454139947891235, "learning_rate": 0.0005999938355917776, "loss": 3.1448, "step": 184 }, { "epoch": 0.01, "grad_norm": 0.6832898855209351, "learning_rate": 0.0005999937684054958, "loss": 3.1809, "step": 185 }, { "epoch": 0.01, "grad_norm": 0.6663551330566406, "learning_rate": 0.0005999937008550657, "loss": 3.2082, "step": 186 }, { "epoch": 0.01, "grad_norm": 0.6181893944740295, "learning_rate": 0.0005999936329404874, "loss": 3.0557, "step": 187 }, { "epoch": 0.01, "grad_norm": 0.6637733578681946, "learning_rate": 0.0005999935646617611, "loss": 3.0015, "step": 188 }, { "epoch": 0.01, "grad_norm": 0.57765132188797, "learning_rate": 0.0005999934960188867, "loss": 3.0452, "step": 189 }, { "epoch": 0.01, "grad_norm": 0.6659131646156311, "learning_rate": 0.0005999934270118646, "loss": 3.1064, "step": 190 }, { "epoch": 0.01, "grad_norm": 0.6089770197868347, "learning_rate": 0.0005999933576406945, "loss": 3.0943, "step": 191 }, { "epoch": 0.01, "grad_norm": 0.6439929008483887, "learning_rate": 0.0005999932879053768, "loss": 3.1129, "step": 192 }, { "epoch": 0.01, "grad_norm": 0.5804649591445923, "learning_rate": 0.0005999932178059114, "loss": 3.1063, "step": 193 }, { "epoch": 0.01, "grad_norm": 0.6649436354637146, "learning_rate": 0.0005999931473422984, "loss": 3.1012, "step": 194 }, { "epoch": 0.01, "grad_norm": 0.7309350967407227, "learning_rate": 0.0005999930765145379, "loss": 3.0556, "step": 195 }, { "epoch": 0.01, "grad_norm": 0.6228511333465576, "learning_rate": 0.00059999300532263, "loss": 3.0536, "step": 196 }, { "epoch": 0.01, "grad_norm": 0.6384984254837036, "learning_rate": 0.0005999929337665748, "loss": 3.1367, "step": 197 }, { "epoch": 0.01, "grad_norm": 0.6571009159088135, "learning_rate": 0.0005999928618463724, "loss": 3.1361, "step": 198 }, { "epoch": 0.01, "grad_norm": 0.648012638092041, "learning_rate": 0.000599992789562023, "loss": 3.0642, "step": 199 }, { "epoch": 0.01, "grad_norm": 0.6469352841377258, "learning_rate": 0.0005999927169135264, "loss": 3.1702, "step": 200 }, { "epoch": 0.01, "grad_norm": 0.6886048316955566, "learning_rate": 0.000599992643900883, "loss": 3.1269, "step": 201 }, { "epoch": 0.01, "grad_norm": 0.6443532109260559, "learning_rate": 0.0005999925705240925, "loss": 3.0934, "step": 202 }, { "epoch": 0.01, "grad_norm": 0.6442464590072632, "learning_rate": 0.0005999924967831553, "loss": 3.0419, "step": 203 }, { "epoch": 0.01, "grad_norm": 0.654876708984375, "learning_rate": 0.0005999924226780715, "loss": 3.113, "step": 204 }, { "epoch": 0.01, "grad_norm": 0.6062064170837402, "learning_rate": 0.000599992348208841, "loss": 3.0503, "step": 205 }, { "epoch": 0.01, "grad_norm": 0.6821978688240051, "learning_rate": 0.0005999922733754641, "loss": 3.1218, "step": 206 }, { "epoch": 0.01, "grad_norm": 0.6414954662322998, "learning_rate": 0.0005999921981779408, "loss": 3.0292, "step": 207 }, { "epoch": 0.01, "grad_norm": 0.6827059984207153, "learning_rate": 0.000599992122616271, "loss": 3.0778, "step": 208 }, { "epoch": 0.01, "grad_norm": 0.6074067950248718, "learning_rate": 0.0005999920466904551, "loss": 3.0511, "step": 209 }, { "epoch": 0.01, "grad_norm": 0.6127645373344421, "learning_rate": 0.0005999919704004931, "loss": 3.0309, "step": 210 }, { "epoch": 0.01, "grad_norm": 0.6572717428207397, "learning_rate": 0.000599991893746385, "loss": 3.0601, "step": 211 }, { "epoch": 0.01, "grad_norm": 0.6199698448181152, "learning_rate": 0.0005999918167281308, "loss": 3.0848, "step": 212 }, { "epoch": 0.01, "grad_norm": 0.6033151745796204, "learning_rate": 0.0005999917393457308, "loss": 3.0469, "step": 213 }, { "epoch": 0.01, "grad_norm": 0.6459708213806152, "learning_rate": 0.0005999916615991851, "loss": 2.9551, "step": 214 }, { "epoch": 0.01, "grad_norm": 0.6254739761352539, "learning_rate": 0.0005999915834884938, "loss": 3.0317, "step": 215 }, { "epoch": 0.01, "grad_norm": 0.6006566882133484, "learning_rate": 0.0005999915050136568, "loss": 3.0448, "step": 216 }, { "epoch": 0.01, "grad_norm": 0.6664471626281738, "learning_rate": 0.0005999914261746743, "loss": 3.0019, "step": 217 }, { "epoch": 0.01, "grad_norm": 0.6118531823158264, "learning_rate": 0.0005999913469715465, "loss": 3.0392, "step": 218 }, { "epoch": 0.01, "grad_norm": 0.7067995071411133, "learning_rate": 0.0005999912674042732, "loss": 3.0613, "step": 219 }, { "epoch": 0.01, "grad_norm": 0.6433179974555969, "learning_rate": 0.000599991187472855, "loss": 3.0637, "step": 220 }, { "epoch": 0.01, "grad_norm": 0.7384520769119263, "learning_rate": 0.0005999911071772915, "loss": 2.9781, "step": 221 }, { "epoch": 0.01, "grad_norm": 0.6631864905357361, "learning_rate": 0.0005999910265175831, "loss": 3.0202, "step": 222 }, { "epoch": 0.01, "grad_norm": 0.7301493883132935, "learning_rate": 0.0005999909454937298, "loss": 2.9683, "step": 223 }, { "epoch": 0.01, "grad_norm": 0.6627020835876465, "learning_rate": 0.0005999908641057315, "loss": 2.9859, "step": 224 }, { "epoch": 0.01, "grad_norm": 0.6315799951553345, "learning_rate": 0.0005999907823535887, "loss": 2.9233, "step": 225 }, { "epoch": 0.01, "grad_norm": 0.7478646039962769, "learning_rate": 0.0005999907002373012, "loss": 2.9294, "step": 226 }, { "epoch": 0.01, "grad_norm": 0.6395400166511536, "learning_rate": 0.0005999906177568693, "loss": 2.8971, "step": 227 }, { "epoch": 0.01, "grad_norm": 0.8375029563903809, "learning_rate": 0.0005999905349122929, "loss": 3.0124, "step": 228 }, { "epoch": 0.01, "grad_norm": 0.6061581373214722, "learning_rate": 0.0005999904517035721, "loss": 2.8657, "step": 229 }, { "epoch": 0.01, "grad_norm": 0.8228991031646729, "learning_rate": 0.0005999903681307072, "loss": 2.8622, "step": 230 }, { "epoch": 0.01, "grad_norm": 0.672902524471283, "learning_rate": 0.0005999902841936982, "loss": 3.0008, "step": 231 }, { "epoch": 0.01, "grad_norm": 0.71971195936203, "learning_rate": 0.0005999901998925452, "loss": 2.845, "step": 232 }, { "epoch": 0.01, "grad_norm": 0.7250251770019531, "learning_rate": 0.0005999901152272483, "loss": 2.9624, "step": 233 }, { "epoch": 0.01, "grad_norm": 0.6117768287658691, "learning_rate": 0.0005999900301978076, "loss": 3.0514, "step": 234 }, { "epoch": 0.01, "grad_norm": 0.6998549699783325, "learning_rate": 0.0005999899448042232, "loss": 2.9041, "step": 235 }, { "epoch": 0.01, "grad_norm": 0.6761711835861206, "learning_rate": 0.0005999898590464952, "loss": 2.9465, "step": 236 }, { "epoch": 0.01, "grad_norm": 0.7066910266876221, "learning_rate": 0.0005999897729246237, "loss": 2.889, "step": 237 }, { "epoch": 0.01, "grad_norm": 0.6525184512138367, "learning_rate": 0.0005999896864386089, "loss": 2.9566, "step": 238 }, { "epoch": 0.01, "grad_norm": 0.6671560406684875, "learning_rate": 0.0005999895995884507, "loss": 3.0346, "step": 239 }, { "epoch": 0.01, "grad_norm": 0.7496574521064758, "learning_rate": 0.0005999895123741494, "loss": 3.0422, "step": 240 }, { "epoch": 0.01, "grad_norm": 0.63331139087677, "learning_rate": 0.000599989424795705, "loss": 2.8728, "step": 241 }, { "epoch": 0.01, "grad_norm": 0.6716091632843018, "learning_rate": 0.0005999893368531177, "loss": 2.921, "step": 242 }, { "epoch": 0.01, "grad_norm": 0.6660493016242981, "learning_rate": 0.0005999892485463875, "loss": 2.9384, "step": 243 }, { "epoch": 0.01, "grad_norm": 0.7064840197563171, "learning_rate": 0.0005999891598755145, "loss": 2.9227, "step": 244 }, { "epoch": 0.01, "grad_norm": 0.6422150731086731, "learning_rate": 0.0005999890708404989, "loss": 2.8612, "step": 245 }, { "epoch": 0.01, "grad_norm": 0.6296887993812561, "learning_rate": 0.0005999889814413408, "loss": 2.94, "step": 246 }, { "epoch": 0.01, "grad_norm": 0.6823571920394897, "learning_rate": 0.0005999888916780403, "loss": 2.8627, "step": 247 }, { "epoch": 0.01, "grad_norm": 0.596342146396637, "learning_rate": 0.0005999888015505975, "loss": 2.9083, "step": 248 }, { "epoch": 0.01, "grad_norm": 0.6637758612632751, "learning_rate": 0.0005999887110590123, "loss": 2.893, "step": 249 }, { "epoch": 0.01, "grad_norm": 0.6359301209449768, "learning_rate": 0.0005999886202032853, "loss": 2.9064, "step": 250 }, { "epoch": 0.01, "grad_norm": 0.611515998840332, "learning_rate": 0.0005999885289834162, "loss": 3.058, "step": 251 }, { "epoch": 0.01, "grad_norm": 0.6124685406684875, "learning_rate": 0.0005999884373994051, "loss": 2.842, "step": 252 }, { "epoch": 0.01, "grad_norm": 0.622922420501709, "learning_rate": 0.0005999883454512523, "loss": 2.9124, "step": 253 }, { "epoch": 0.01, "grad_norm": 0.6180800199508667, "learning_rate": 0.0005999882531389579, "loss": 2.9031, "step": 254 }, { "epoch": 0.01, "grad_norm": 0.6718301177024841, "learning_rate": 0.0005999881604625219, "loss": 2.8982, "step": 255 }, { "epoch": 0.01, "grad_norm": 0.5936549305915833, "learning_rate": 0.0005999880674219445, "loss": 2.9225, "step": 256 }, { "epoch": 0.01, "grad_norm": 0.7077022194862366, "learning_rate": 0.0005999879740172259, "loss": 2.9728, "step": 257 }, { "epoch": 0.01, "grad_norm": 0.6162145733833313, "learning_rate": 0.000599987880248366, "loss": 2.8202, "step": 258 }, { "epoch": 0.01, "grad_norm": 0.6990219950675964, "learning_rate": 0.000599987786115365, "loss": 2.9072, "step": 259 }, { "epoch": 0.01, "grad_norm": 0.6118119359016418, "learning_rate": 0.000599987691618223, "loss": 2.9415, "step": 260 }, { "epoch": 0.01, "grad_norm": 0.6091951131820679, "learning_rate": 0.0005999875967569403, "loss": 2.85, "step": 261 }, { "epoch": 0.01, "grad_norm": 0.5970466136932373, "learning_rate": 0.0005999875015315167, "loss": 2.9329, "step": 262 }, { "epoch": 0.01, "grad_norm": 0.633264422416687, "learning_rate": 0.0005999874059419525, "loss": 2.8938, "step": 263 }, { "epoch": 0.01, "grad_norm": 0.6362590193748474, "learning_rate": 0.0005999873099882478, "loss": 2.885, "step": 264 }, { "epoch": 0.01, "grad_norm": 0.6464523673057556, "learning_rate": 0.0005999872136704027, "loss": 2.8484, "step": 265 }, { "epoch": 0.01, "grad_norm": 0.6537765264511108, "learning_rate": 0.0005999871169884174, "loss": 2.8858, "step": 266 }, { "epoch": 0.01, "grad_norm": 0.6283228397369385, "learning_rate": 0.0005999870199422919, "loss": 2.9193, "step": 267 }, { "epoch": 0.01, "grad_norm": 0.6523813009262085, "learning_rate": 0.0005999869225320264, "loss": 2.915, "step": 268 }, { "epoch": 0.01, "grad_norm": 0.6542707085609436, "learning_rate": 0.0005999868247576209, "loss": 2.8384, "step": 269 }, { "epoch": 0.01, "grad_norm": 0.7196794152259827, "learning_rate": 0.0005999867266190756, "loss": 2.8277, "step": 270 }, { "epoch": 0.01, "grad_norm": 0.6240473389625549, "learning_rate": 0.0005999866281163907, "loss": 2.7988, "step": 271 }, { "epoch": 0.01, "grad_norm": 0.7449977397918701, "learning_rate": 0.0005999865292495661, "loss": 2.8195, "step": 272 }, { "epoch": 0.01, "grad_norm": 0.6762398481369019, "learning_rate": 0.0005999864300186022, "loss": 2.9048, "step": 273 }, { "epoch": 0.01, "grad_norm": 0.6679018139839172, "learning_rate": 0.0005999863304234988, "loss": 2.8786, "step": 274 }, { "epoch": 0.01, "grad_norm": 0.6249721050262451, "learning_rate": 0.0005999862304642563, "loss": 2.9248, "step": 275 }, { "epoch": 0.01, "grad_norm": 0.7307965755462646, "learning_rate": 0.0005999861301408747, "loss": 2.8613, "step": 276 }, { "epoch": 0.01, "grad_norm": 0.6204913854598999, "learning_rate": 0.0005999860294533542, "loss": 2.9407, "step": 277 }, { "epoch": 0.01, "grad_norm": 0.7189359664916992, "learning_rate": 0.0005999859284016948, "loss": 2.8869, "step": 278 }, { "epoch": 0.01, "grad_norm": 0.6308476328849792, "learning_rate": 0.0005999858269858967, "loss": 2.7759, "step": 279 }, { "epoch": 0.01, "grad_norm": 0.641145646572113, "learning_rate": 0.00059998572520596, "loss": 2.8607, "step": 280 }, { "epoch": 0.01, "grad_norm": 0.6241111755371094, "learning_rate": 0.0005999856230618848, "loss": 2.7878, "step": 281 }, { "epoch": 0.01, "grad_norm": 0.6098552346229553, "learning_rate": 0.0005999855205536714, "loss": 2.8717, "step": 282 }, { "epoch": 0.01, "grad_norm": 0.6857374310493469, "learning_rate": 0.0005999854176813196, "loss": 2.8475, "step": 283 }, { "epoch": 0.01, "grad_norm": 0.630126416683197, "learning_rate": 0.0005999853144448298, "loss": 2.8084, "step": 284 }, { "epoch": 0.01, "grad_norm": 0.668361246585846, "learning_rate": 0.0005999852108442019, "loss": 2.8691, "step": 285 }, { "epoch": 0.01, "grad_norm": 0.6529747247695923, "learning_rate": 0.0005999851068794363, "loss": 2.888, "step": 286 }, { "epoch": 0.01, "grad_norm": 0.6339395642280579, "learning_rate": 0.000599985002550533, "loss": 2.8798, "step": 287 }, { "epoch": 0.01, "grad_norm": 0.682136595249176, "learning_rate": 0.0005999848978574919, "loss": 2.7528, "step": 288 }, { "epoch": 0.01, "grad_norm": 0.590757429599762, "learning_rate": 0.0005999847928003135, "loss": 2.794, "step": 289 }, { "epoch": 0.01, "grad_norm": 0.6622710824012756, "learning_rate": 0.0005999846873789978, "loss": 2.8976, "step": 290 }, { "epoch": 0.01, "grad_norm": 0.6231802105903625, "learning_rate": 0.0005999845815935447, "loss": 2.7849, "step": 291 }, { "epoch": 0.01, "grad_norm": 0.6011998057365417, "learning_rate": 0.0005999844754439547, "loss": 2.7766, "step": 292 }, { "epoch": 0.01, "grad_norm": 0.6464350819587708, "learning_rate": 0.0005999843689302278, "loss": 2.8515, "step": 293 }, { "epoch": 0.01, "grad_norm": 0.6399717926979065, "learning_rate": 0.0005999842620523639, "loss": 2.725, "step": 294 }, { "epoch": 0.01, "grad_norm": 0.6146669983863831, "learning_rate": 0.0005999841548103634, "loss": 2.83, "step": 295 }, { "epoch": 0.01, "grad_norm": 0.6065233945846558, "learning_rate": 0.0005999840472042263, "loss": 2.8467, "step": 296 }, { "epoch": 0.01, "grad_norm": 0.6140422821044922, "learning_rate": 0.0005999839392339529, "loss": 2.8161, "step": 297 }, { "epoch": 0.01, "grad_norm": 0.6225056648254395, "learning_rate": 0.000599983830899543, "loss": 2.8147, "step": 298 }, { "epoch": 0.01, "grad_norm": 0.621091902256012, "learning_rate": 0.0005999837222009969, "loss": 2.8329, "step": 299 }, { "epoch": 0.01, "grad_norm": 0.6321823000907898, "learning_rate": 0.000599983613138315, "loss": 2.7888, "step": 300 }, { "epoch": 0.01, "grad_norm": 0.640440821647644, "learning_rate": 0.000599983503711497, "loss": 2.7973, "step": 301 }, { "epoch": 0.01, "grad_norm": 0.6455057859420776, "learning_rate": 0.0005999833939205433, "loss": 2.8277, "step": 302 }, { "epoch": 0.01, "grad_norm": 0.7002223134040833, "learning_rate": 0.000599983283765454, "loss": 2.8185, "step": 303 }, { "epoch": 0.01, "grad_norm": 0.6219784617424011, "learning_rate": 0.0005999831732462291, "loss": 2.8056, "step": 304 }, { "epoch": 0.01, "grad_norm": 0.6895701289176941, "learning_rate": 0.0005999830623628689, "loss": 2.8341, "step": 305 }, { "epoch": 0.01, "grad_norm": 0.6031991839408875, "learning_rate": 0.0005999829511153734, "loss": 2.8361, "step": 306 }, { "epoch": 0.01, "grad_norm": 0.6523324251174927, "learning_rate": 0.0005999828395037429, "loss": 2.7502, "step": 307 }, { "epoch": 0.01, "grad_norm": 0.6612911224365234, "learning_rate": 0.0005999827275279773, "loss": 2.7635, "step": 308 }, { "epoch": 0.01, "grad_norm": 0.6651524305343628, "learning_rate": 0.000599982615188077, "loss": 2.9353, "step": 309 }, { "epoch": 0.01, "grad_norm": 0.6348247528076172, "learning_rate": 0.0005999825024840419, "loss": 2.7529, "step": 310 }, { "epoch": 0.01, "grad_norm": 0.6513815522193909, "learning_rate": 0.0005999823894158723, "loss": 2.7793, "step": 311 }, { "epoch": 0.01, "grad_norm": 0.6216742396354675, "learning_rate": 0.0005999822759835682, "loss": 2.7431, "step": 312 }, { "epoch": 0.01, "grad_norm": 0.6474078893661499, "learning_rate": 0.0005999821621871299, "loss": 2.8158, "step": 313 }, { "epoch": 0.01, "grad_norm": 0.6085466742515564, "learning_rate": 0.0005999820480265575, "loss": 2.7436, "step": 314 }, { "epoch": 0.01, "grad_norm": 0.6651740074157715, "learning_rate": 0.000599981933501851, "loss": 2.7621, "step": 315 }, { "epoch": 0.01, "grad_norm": 0.6465579271316528, "learning_rate": 0.0005999818186130107, "loss": 2.7167, "step": 316 }, { "epoch": 0.01, "grad_norm": 0.6440083980560303, "learning_rate": 0.0005999817033600366, "loss": 2.7487, "step": 317 }, { "epoch": 0.01, "grad_norm": 0.6901293396949768, "learning_rate": 0.000599981587742929, "loss": 2.798, "step": 318 }, { "epoch": 0.01, "grad_norm": 0.6363867521286011, "learning_rate": 0.0005999814717616878, "loss": 2.7763, "step": 319 }, { "epoch": 0.01, "grad_norm": 0.672336220741272, "learning_rate": 0.0005999813554163134, "loss": 2.8829, "step": 320 }, { "epoch": 0.01, "grad_norm": 0.66645348072052, "learning_rate": 0.0005999812387068059, "loss": 2.8328, "step": 321 }, { "epoch": 0.01, "grad_norm": 0.6474481225013733, "learning_rate": 0.0005999811216331653, "loss": 2.7185, "step": 322 }, { "epoch": 0.01, "grad_norm": 0.6577538847923279, "learning_rate": 0.0005999810041953918, "loss": 2.8315, "step": 323 }, { "epoch": 0.01, "grad_norm": 0.6769696474075317, "learning_rate": 0.0005999808863934856, "loss": 2.8054, "step": 324 }, { "epoch": 0.01, "grad_norm": 0.622761070728302, "learning_rate": 0.0005999807682274468, "loss": 2.737, "step": 325 }, { "epoch": 0.01, "grad_norm": 0.6497820615768433, "learning_rate": 0.0005999806496972755, "loss": 2.6554, "step": 326 }, { "epoch": 0.01, "grad_norm": 0.6154866218566895, "learning_rate": 0.000599980530802972, "loss": 2.7189, "step": 327 }, { "epoch": 0.01, "grad_norm": 0.671398401260376, "learning_rate": 0.0005999804115445363, "loss": 2.688, "step": 328 }, { "epoch": 0.01, "grad_norm": 0.7384951114654541, "learning_rate": 0.0005999802919219685, "loss": 2.7358, "step": 329 }, { "epoch": 0.01, "grad_norm": 0.6409444212913513, "learning_rate": 0.0005999801719352689, "loss": 2.6862, "step": 330 }, { "epoch": 0.01, "grad_norm": 0.6218468546867371, "learning_rate": 0.0005999800515844374, "loss": 2.763, "step": 331 }, { "epoch": 0.01, "grad_norm": 0.6618748307228088, "learning_rate": 0.0005999799308694745, "loss": 2.7478, "step": 332 }, { "epoch": 0.01, "grad_norm": 0.6014834046363831, "learning_rate": 0.0005999798097903802, "loss": 2.6963, "step": 333 }, { "epoch": 0.01, "grad_norm": 0.7133057713508606, "learning_rate": 0.0005999796883471545, "loss": 2.6955, "step": 334 }, { "epoch": 0.01, "grad_norm": 0.6362269520759583, "learning_rate": 0.0005999795665397977, "loss": 2.7098, "step": 335 }, { "epoch": 0.01, "grad_norm": 0.632685661315918, "learning_rate": 0.00059997944436831, "loss": 2.6955, "step": 336 }, { "epoch": 0.01, "grad_norm": 0.6004429459571838, "learning_rate": 0.0005999793218326912, "loss": 2.6468, "step": 337 }, { "epoch": 0.01, "grad_norm": 0.6282788515090942, "learning_rate": 0.0005999791989329419, "loss": 2.7044, "step": 338 }, { "epoch": 0.01, "grad_norm": 0.6343033313751221, "learning_rate": 0.000599979075669062, "loss": 2.6997, "step": 339 }, { "epoch": 0.01, "grad_norm": 0.6819538474082947, "learning_rate": 0.0005999789520410518, "loss": 2.7402, "step": 340 }, { "epoch": 0.01, "grad_norm": 0.6611376404762268, "learning_rate": 0.0005999788280489113, "loss": 2.656, "step": 341 }, { "epoch": 0.01, "grad_norm": 0.648908257484436, "learning_rate": 0.0005999787036926407, "loss": 2.5965, "step": 342 }, { "epoch": 0.01, "grad_norm": 0.653128445148468, "learning_rate": 0.00059997857897224, "loss": 2.7528, "step": 343 }, { "epoch": 0.01, "grad_norm": 0.6607486009597778, "learning_rate": 0.0005999784538877096, "loss": 2.7516, "step": 344 }, { "epoch": 0.01, "grad_norm": 0.6748612523078918, "learning_rate": 0.0005999783284390496, "loss": 2.7685, "step": 345 }, { "epoch": 0.01, "grad_norm": 0.6408125758171082, "learning_rate": 0.0005999782026262601, "loss": 2.6896, "step": 346 }, { "epoch": 0.01, "grad_norm": 0.6164222955703735, "learning_rate": 0.0005999780764493412, "loss": 2.7139, "step": 347 }, { "epoch": 0.01, "grad_norm": 0.6720207929611206, "learning_rate": 0.0005999779499082932, "loss": 2.7086, "step": 348 }, { "epoch": 0.01, "grad_norm": 0.6015383005142212, "learning_rate": 0.0005999778230031161, "loss": 2.7191, "step": 349 }, { "epoch": 0.01, "grad_norm": 0.6625187397003174, "learning_rate": 0.0005999776957338102, "loss": 2.6646, "step": 350 }, { "epoch": 0.01, "grad_norm": 0.6371319890022278, "learning_rate": 0.0005999775681003756, "loss": 2.7145, "step": 351 }, { "epoch": 0.01, "grad_norm": 0.6196424961090088, "learning_rate": 0.0005999774401028123, "loss": 2.6782, "step": 352 }, { "epoch": 0.01, "grad_norm": 0.6352019309997559, "learning_rate": 0.0005999773117411207, "loss": 2.7168, "step": 353 }, { "epoch": 0.01, "grad_norm": 0.6816499829292297, "learning_rate": 0.0005999771830153007, "loss": 2.6283, "step": 354 }, { "epoch": 0.01, "grad_norm": 0.6432925462722778, "learning_rate": 0.0005999770539253528, "loss": 2.7813, "step": 355 }, { "epoch": 0.01, "grad_norm": 0.670444130897522, "learning_rate": 0.0005999769244712768, "loss": 2.7959, "step": 356 }, { "epoch": 0.01, "grad_norm": 0.6401315331459045, "learning_rate": 0.0005999767946530732, "loss": 2.6394, "step": 357 }, { "epoch": 0.01, "grad_norm": 0.625008225440979, "learning_rate": 0.0005999766644707417, "loss": 2.8133, "step": 358 }, { "epoch": 0.01, "grad_norm": 0.62525475025177, "learning_rate": 0.0005999765339242828, "loss": 2.7086, "step": 359 }, { "epoch": 0.01, "grad_norm": 0.652643084526062, "learning_rate": 0.0005999764030136967, "loss": 2.5981, "step": 360 }, { "epoch": 0.01, "grad_norm": 0.6106101274490356, "learning_rate": 0.0005999762717389834, "loss": 2.6523, "step": 361 }, { "epoch": 0.01, "grad_norm": 0.6458849310874939, "learning_rate": 0.0005999761401001432, "loss": 2.6782, "step": 362 }, { "epoch": 0.01, "grad_norm": 0.6584768891334534, "learning_rate": 0.000599976008097176, "loss": 2.7817, "step": 363 }, { "epoch": 0.01, "grad_norm": 0.6841425895690918, "learning_rate": 0.0005999758757300822, "loss": 2.7548, "step": 364 }, { "epoch": 0.01, "grad_norm": 0.6416721343994141, "learning_rate": 0.0005999757429988619, "loss": 2.6803, "step": 365 }, { "epoch": 0.01, "grad_norm": 0.7299079895019531, "learning_rate": 0.0005999756099035152, "loss": 2.699, "step": 366 }, { "epoch": 0.01, "grad_norm": 0.6800252795219421, "learning_rate": 0.0005999754764440424, "loss": 2.6218, "step": 367 }, { "epoch": 0.01, "grad_norm": 0.6470412015914917, "learning_rate": 0.0005999753426204435, "loss": 2.6669, "step": 368 }, { "epoch": 0.01, "grad_norm": 0.6610276103019714, "learning_rate": 0.0005999752084327188, "loss": 2.6337, "step": 369 }, { "epoch": 0.01, "grad_norm": 0.6745165586471558, "learning_rate": 0.0005999750738808683, "loss": 2.6711, "step": 370 }, { "epoch": 0.01, "grad_norm": 0.7228440642356873, "learning_rate": 0.0005999749389648922, "loss": 2.6973, "step": 371 }, { "epoch": 0.01, "grad_norm": 0.6761088967323303, "learning_rate": 0.0005999748036847909, "loss": 2.7859, "step": 372 }, { "epoch": 0.01, "grad_norm": 0.6779002547264099, "learning_rate": 0.0005999746680405642, "loss": 2.6121, "step": 373 }, { "epoch": 0.01, "grad_norm": 0.7465566396713257, "learning_rate": 0.0005999745320322126, "loss": 2.6773, "step": 374 }, { "epoch": 0.01, "grad_norm": 0.6365389227867126, "learning_rate": 0.0005999743956597361, "loss": 2.7016, "step": 375 }, { "epoch": 0.01, "grad_norm": 0.6388929486274719, "learning_rate": 0.0005999742589231349, "loss": 2.6444, "step": 376 }, { "epoch": 0.01, "grad_norm": 0.7233768701553345, "learning_rate": 0.0005999741218224091, "loss": 2.6294, "step": 377 }, { "epoch": 0.01, "grad_norm": 0.6407190561294556, "learning_rate": 0.0005999739843575589, "loss": 2.7021, "step": 378 }, { "epoch": 0.01, "grad_norm": 0.6870217323303223, "learning_rate": 0.0005999738465285845, "loss": 2.6623, "step": 379 }, { "epoch": 0.01, "grad_norm": 0.6366071701049805, "learning_rate": 0.000599973708335486, "loss": 2.6355, "step": 380 }, { "epoch": 0.01, "grad_norm": 0.6308900713920593, "learning_rate": 0.0005999735697782637, "loss": 2.6761, "step": 381 }, { "epoch": 0.01, "grad_norm": 0.684761643409729, "learning_rate": 0.0005999734308569177, "loss": 2.6021, "step": 382 }, { "epoch": 0.01, "grad_norm": 0.6786709427833557, "learning_rate": 0.000599973291571448, "loss": 2.6751, "step": 383 }, { "epoch": 0.01, "grad_norm": 0.6589264273643494, "learning_rate": 0.0005999731519218552, "loss": 2.7074, "step": 384 }, { "epoch": 0.01, "grad_norm": 0.6712338924407959, "learning_rate": 0.0005999730119081389, "loss": 2.6035, "step": 385 }, { "epoch": 0.01, "grad_norm": 0.6534379124641418, "learning_rate": 0.0005999728715302998, "loss": 2.6477, "step": 386 }, { "epoch": 0.01, "grad_norm": 0.6323599815368652, "learning_rate": 0.0005999727307883378, "loss": 2.6866, "step": 387 }, { "epoch": 0.01, "grad_norm": 0.6587765216827393, "learning_rate": 0.0005999725896822529, "loss": 2.6869, "step": 388 }, { "epoch": 0.01, "grad_norm": 0.7090145945549011, "learning_rate": 0.0005999724482120457, "loss": 2.7587, "step": 389 }, { "epoch": 0.01, "grad_norm": 0.6630924940109253, "learning_rate": 0.000599972306377716, "loss": 2.6445, "step": 390 }, { "epoch": 0.01, "grad_norm": 0.6858501434326172, "learning_rate": 0.0005999721641792642, "loss": 2.5517, "step": 391 }, { "epoch": 0.01, "grad_norm": 0.6551648378372192, "learning_rate": 0.0005999720216166904, "loss": 2.5903, "step": 392 }, { "epoch": 0.01, "grad_norm": 0.6710469722747803, "learning_rate": 0.0005999718786899946, "loss": 2.6377, "step": 393 }, { "epoch": 0.01, "grad_norm": 0.6536141037940979, "learning_rate": 0.0005999717353991774, "loss": 2.7152, "step": 394 }, { "epoch": 0.01, "grad_norm": 0.6713338494300842, "learning_rate": 0.0005999715917442384, "loss": 2.6979, "step": 395 }, { "epoch": 0.01, "grad_norm": 0.6976268887519836, "learning_rate": 0.0005999714477251783, "loss": 2.6718, "step": 396 }, { "epoch": 0.01, "grad_norm": 0.6765096187591553, "learning_rate": 0.0005999713033419971, "loss": 2.7299, "step": 397 }, { "epoch": 0.01, "grad_norm": 0.6986914277076721, "learning_rate": 0.0005999711585946948, "loss": 2.6898, "step": 398 }, { "epoch": 0.01, "grad_norm": 0.6317085027694702, "learning_rate": 0.0005999710134832718, "loss": 2.6548, "step": 399 }, { "epoch": 0.01, "grad_norm": 0.627461850643158, "learning_rate": 0.0005999708680077281, "loss": 2.7489, "step": 400 }, { "epoch": 0.01, "grad_norm": 0.6631954312324524, "learning_rate": 0.0005999707221680641, "loss": 2.6017, "step": 401 }, { "epoch": 0.01, "grad_norm": 0.6757644414901733, "learning_rate": 0.0005999705759642798, "loss": 2.6309, "step": 402 }, { "epoch": 0.01, "grad_norm": 0.6909956336021423, "learning_rate": 0.0005999704293963752, "loss": 2.6832, "step": 403 }, { "epoch": 0.01, "grad_norm": 0.7227301597595215, "learning_rate": 0.000599970282464351, "loss": 2.6539, "step": 404 }, { "epoch": 0.01, "grad_norm": 0.6058258414268494, "learning_rate": 0.0005999701351682069, "loss": 2.5965, "step": 405 }, { "epoch": 0.01, "grad_norm": 0.7017897367477417, "learning_rate": 0.0005999699875079434, "loss": 2.6052, "step": 406 }, { "epoch": 0.01, "grad_norm": 0.7069380879402161, "learning_rate": 0.0005999698394835604, "loss": 2.642, "step": 407 }, { "epoch": 0.01, "grad_norm": 0.6994395852088928, "learning_rate": 0.0005999696910950582, "loss": 2.6473, "step": 408 }, { "epoch": 0.01, "grad_norm": 0.7167696952819824, "learning_rate": 0.0005999695423424371, "loss": 2.6252, "step": 409 }, { "epoch": 0.01, "grad_norm": 0.6480597257614136, "learning_rate": 0.0005999693932256971, "loss": 2.6554, "step": 410 }, { "epoch": 0.01, "grad_norm": 0.8165448307991028, "learning_rate": 0.0005999692437448386, "loss": 2.6438, "step": 411 }, { "epoch": 0.01, "grad_norm": 0.6423686742782593, "learning_rate": 0.0005999690938998614, "loss": 2.5785, "step": 412 }, { "epoch": 0.01, "grad_norm": 0.7155359387397766, "learning_rate": 0.000599968943690766, "loss": 2.5828, "step": 413 }, { "epoch": 0.01, "grad_norm": 0.7203779220581055, "learning_rate": 0.0005999687931175525, "loss": 2.637, "step": 414 }, { "epoch": 0.01, "grad_norm": 0.5990158915519714, "learning_rate": 0.0005999686421802213, "loss": 2.6405, "step": 415 }, { "epoch": 0.01, "grad_norm": 0.7736847400665283, "learning_rate": 0.0005999684908787722, "loss": 2.6166, "step": 416 }, { "epoch": 0.01, "grad_norm": 0.6508045196533203, "learning_rate": 0.0005999683392132055, "loss": 2.5971, "step": 417 }, { "epoch": 0.01, "grad_norm": 0.6267573833465576, "learning_rate": 0.0005999681871835214, "loss": 2.5361, "step": 418 }, { "epoch": 0.01, "grad_norm": 0.7108944058418274, "learning_rate": 0.0005999680347897202, "loss": 2.6247, "step": 419 }, { "epoch": 0.01, "grad_norm": 0.6452497839927673, "learning_rate": 0.000599967882031802, "loss": 2.5962, "step": 420 }, { "epoch": 0.01, "grad_norm": 0.6689618229866028, "learning_rate": 0.000599967728909767, "loss": 2.5544, "step": 421 }, { "epoch": 0.01, "grad_norm": 0.674406111240387, "learning_rate": 0.0005999675754236153, "loss": 2.6474, "step": 422 }, { "epoch": 0.01, "grad_norm": 0.6839715838432312, "learning_rate": 0.0005999674215733473, "loss": 2.652, "step": 423 }, { "epoch": 0.01, "grad_norm": 0.7615177631378174, "learning_rate": 0.000599967267358963, "loss": 2.6306, "step": 424 }, { "epoch": 0.01, "grad_norm": 0.653272807598114, "learning_rate": 0.0005999671127804626, "loss": 2.6164, "step": 425 }, { "epoch": 0.01, "grad_norm": 0.6450526714324951, "learning_rate": 0.0005999669578378463, "loss": 2.6239, "step": 426 }, { "epoch": 0.01, "grad_norm": 0.7310366630554199, "learning_rate": 0.0005999668025311143, "loss": 2.6045, "step": 427 }, { "epoch": 0.01, "grad_norm": 0.6927586197853088, "learning_rate": 0.0005999666468602669, "loss": 2.5898, "step": 428 }, { "epoch": 0.01, "grad_norm": 0.7217995524406433, "learning_rate": 0.0005999664908253041, "loss": 2.6197, "step": 429 }, { "epoch": 0.01, "grad_norm": 0.708754301071167, "learning_rate": 0.0005999663344262261, "loss": 2.5713, "step": 430 }, { "epoch": 0.01, "grad_norm": 0.6958082318305969, "learning_rate": 0.0005999661776630333, "loss": 2.6845, "step": 431 }, { "epoch": 0.01, "grad_norm": 0.7090461850166321, "learning_rate": 0.0005999660205357257, "loss": 2.6159, "step": 432 }, { "epoch": 0.01, "grad_norm": 0.724092960357666, "learning_rate": 0.0005999658630443037, "loss": 2.5754, "step": 433 }, { "epoch": 0.01, "grad_norm": 0.6795852184295654, "learning_rate": 0.0005999657051887672, "loss": 2.7022, "step": 434 }, { "epoch": 0.01, "grad_norm": 0.6724846959114075, "learning_rate": 0.0005999655469691165, "loss": 2.5838, "step": 435 }, { "epoch": 0.01, "grad_norm": 0.6819875836372375, "learning_rate": 0.000599965388385352, "loss": 2.6102, "step": 436 }, { "epoch": 0.01, "grad_norm": 0.6649544835090637, "learning_rate": 0.0005999652294374735, "loss": 2.6477, "step": 437 }, { "epoch": 0.01, "grad_norm": 0.645160436630249, "learning_rate": 0.0005999650701254815, "loss": 2.6922, "step": 438 }, { "epoch": 0.01, "grad_norm": 0.6468984484672546, "learning_rate": 0.0005999649104493762, "loss": 2.5879, "step": 439 }, { "epoch": 0.01, "grad_norm": 0.6950235366821289, "learning_rate": 0.0005999647504091576, "loss": 2.5622, "step": 440 }, { "epoch": 0.01, "grad_norm": 0.6316680908203125, "learning_rate": 0.000599964590004826, "loss": 2.536, "step": 441 }, { "epoch": 0.01, "grad_norm": 0.6526159048080444, "learning_rate": 0.0005999644292363815, "loss": 2.5876, "step": 442 }, { "epoch": 0.01, "grad_norm": 0.6949599981307983, "learning_rate": 0.0005999642681038244, "loss": 2.6168, "step": 443 }, { "epoch": 0.01, "grad_norm": 0.6152246594429016, "learning_rate": 0.000599964106607155, "loss": 2.6409, "step": 444 }, { "epoch": 0.01, "grad_norm": 0.6786866188049316, "learning_rate": 0.0005999639447463734, "loss": 2.7056, "step": 445 }, { "epoch": 0.01, "grad_norm": 0.6597288846969604, "learning_rate": 0.0005999637825214796, "loss": 2.6139, "step": 446 }, { "epoch": 0.01, "grad_norm": 0.7074987292289734, "learning_rate": 0.000599963619932474, "loss": 2.5754, "step": 447 }, { "epoch": 0.01, "grad_norm": 0.6621227264404297, "learning_rate": 0.0005999634569793568, "loss": 2.5488, "step": 448 }, { "epoch": 0.01, "grad_norm": 0.6622974276542664, "learning_rate": 0.0005999632936621282, "loss": 2.6545, "step": 449 }, { "epoch": 0.01, "grad_norm": 0.7445245385169983, "learning_rate": 0.0005999631299807884, "loss": 2.6788, "step": 450 }, { "epoch": 0.02, "grad_norm": 0.6510895490646362, "learning_rate": 0.0005999629659353375, "loss": 2.6113, "step": 451 }, { "epoch": 0.02, "grad_norm": 0.7096835970878601, "learning_rate": 0.0005999628015257757, "loss": 2.5649, "step": 452 }, { "epoch": 0.02, "grad_norm": 0.6495921611785889, "learning_rate": 0.0005999626367521033, "loss": 2.6718, "step": 453 }, { "epoch": 0.02, "grad_norm": 0.6290482878684998, "learning_rate": 0.0005999624716143204, "loss": 2.6071, "step": 454 }, { "epoch": 0.02, "grad_norm": 0.6940696835517883, "learning_rate": 0.0005999623061124273, "loss": 2.5565, "step": 455 }, { "epoch": 0.02, "grad_norm": 0.6441452503204346, "learning_rate": 0.0005999621402464242, "loss": 2.6185, "step": 456 }, { "epoch": 0.02, "grad_norm": 0.6504276990890503, "learning_rate": 0.0005999619740163112, "loss": 2.523, "step": 457 }, { "epoch": 0.02, "grad_norm": 0.6543926000595093, "learning_rate": 0.0005999618074220886, "loss": 2.5372, "step": 458 }, { "epoch": 0.02, "grad_norm": 0.6211395859718323, "learning_rate": 0.0005999616404637566, "loss": 2.5665, "step": 459 }, { "epoch": 0.02, "grad_norm": 0.7008002996444702, "learning_rate": 0.0005999614731413153, "loss": 2.5341, "step": 460 }, { "epoch": 0.02, "grad_norm": 0.7139450311660767, "learning_rate": 0.0005999613054547649, "loss": 2.6468, "step": 461 }, { "epoch": 0.02, "grad_norm": 0.6874164938926697, "learning_rate": 0.0005999611374041058, "loss": 2.5833, "step": 462 }, { "epoch": 0.02, "grad_norm": 0.7205750346183777, "learning_rate": 0.0005999609689893381, "loss": 2.5305, "step": 463 }, { "epoch": 0.02, "grad_norm": 0.6419154405593872, "learning_rate": 0.0005999608002104619, "loss": 2.5347, "step": 464 }, { "epoch": 0.02, "grad_norm": 0.6905184984207153, "learning_rate": 0.0005999606310674774, "loss": 2.5899, "step": 465 }, { "epoch": 0.02, "grad_norm": 0.6577203273773193, "learning_rate": 0.000599960461560385, "loss": 2.5757, "step": 466 }, { "epoch": 0.02, "grad_norm": 0.7173852920532227, "learning_rate": 0.0005999602916891848, "loss": 2.6084, "step": 467 }, { "epoch": 0.02, "grad_norm": 0.7292800545692444, "learning_rate": 0.0005999601214538771, "loss": 2.5126, "step": 468 }, { "epoch": 0.02, "grad_norm": 0.6603172421455383, "learning_rate": 0.0005999599508544617, "loss": 2.5566, "step": 469 }, { "epoch": 0.02, "grad_norm": 0.6989210844039917, "learning_rate": 0.0005999597798909393, "loss": 2.5558, "step": 470 }, { "epoch": 0.02, "grad_norm": 0.640343427658081, "learning_rate": 0.00059995960856331, "loss": 2.6232, "step": 471 }, { "epoch": 0.02, "grad_norm": 0.694839596748352, "learning_rate": 0.0005999594368715739, "loss": 2.5796, "step": 472 }, { "epoch": 0.02, "grad_norm": 0.6381158828735352, "learning_rate": 0.0005999592648157313, "loss": 2.5571, "step": 473 }, { "epoch": 0.02, "grad_norm": 0.69490647315979, "learning_rate": 0.0005999590923957822, "loss": 2.5957, "step": 474 }, { "epoch": 0.02, "grad_norm": 0.6490135788917542, "learning_rate": 0.0005999589196117271, "loss": 2.6035, "step": 475 }, { "epoch": 0.02, "grad_norm": 0.6543951630592346, "learning_rate": 0.0005999587464635659, "loss": 2.4976, "step": 476 }, { "epoch": 0.02, "grad_norm": 0.6850594878196716, "learning_rate": 0.0005999585729512991, "loss": 2.5582, "step": 477 }, { "epoch": 0.02, "grad_norm": 0.6662493944168091, "learning_rate": 0.0005999583990749268, "loss": 2.5341, "step": 478 }, { "epoch": 0.02, "grad_norm": 0.6546659469604492, "learning_rate": 0.0005999582248344492, "loss": 2.5947, "step": 479 }, { "epoch": 0.02, "grad_norm": 0.714435338973999, "learning_rate": 0.0005999580502298663, "loss": 2.6733, "step": 480 }, { "epoch": 0.02, "grad_norm": 0.6828724145889282, "learning_rate": 0.0005999578752611788, "loss": 2.5268, "step": 481 }, { "epoch": 0.02, "grad_norm": 0.6983253955841064, "learning_rate": 0.0005999576999283865, "loss": 2.5007, "step": 482 }, { "epoch": 0.02, "grad_norm": 0.6934231519699097, "learning_rate": 0.0005999575242314897, "loss": 2.6123, "step": 483 }, { "epoch": 0.02, "grad_norm": 0.6844442486763, "learning_rate": 0.0005999573481704888, "loss": 2.5582, "step": 484 }, { "epoch": 0.02, "grad_norm": 0.6888948678970337, "learning_rate": 0.0005999571717453839, "loss": 2.5129, "step": 485 }, { "epoch": 0.02, "grad_norm": 0.6629772186279297, "learning_rate": 0.0005999569949561751, "loss": 2.5254, "step": 486 }, { "epoch": 0.02, "grad_norm": 0.6691018342971802, "learning_rate": 0.0005999568178028627, "loss": 2.6056, "step": 487 }, { "epoch": 0.02, "grad_norm": 0.6528480648994446, "learning_rate": 0.0005999566402854469, "loss": 2.5839, "step": 488 }, { "epoch": 0.02, "grad_norm": 0.6461921334266663, "learning_rate": 0.000599956462403928, "loss": 2.5806, "step": 489 }, { "epoch": 0.02, "grad_norm": 0.6722356081008911, "learning_rate": 0.0005999562841583061, "loss": 2.5021, "step": 490 }, { "epoch": 0.02, "grad_norm": 0.6915991306304932, "learning_rate": 0.0005999561055485815, "loss": 2.5856, "step": 491 }, { "epoch": 0.02, "grad_norm": 0.6539553999900818, "learning_rate": 0.0005999559265747543, "loss": 2.518, "step": 492 }, { "epoch": 0.02, "grad_norm": 0.6757172346115112, "learning_rate": 0.0005999557472368248, "loss": 2.6336, "step": 493 }, { "epoch": 0.02, "grad_norm": 0.6744173169136047, "learning_rate": 0.0005999555675347934, "loss": 2.6177, "step": 494 }, { "epoch": 0.02, "grad_norm": 0.7011879682540894, "learning_rate": 0.00059995538746866, "loss": 2.5708, "step": 495 }, { "epoch": 0.02, "grad_norm": 0.6421108841896057, "learning_rate": 0.000599955207038425, "loss": 2.5575, "step": 496 }, { "epoch": 0.02, "grad_norm": 0.6713253259658813, "learning_rate": 0.0005999550262440886, "loss": 2.6115, "step": 497 }, { "epoch": 0.02, "grad_norm": 0.698743462562561, "learning_rate": 0.0005999548450856509, "loss": 2.4953, "step": 498 }, { "epoch": 0.02, "grad_norm": 0.6485822796821594, "learning_rate": 0.0005999546635631122, "loss": 2.5031, "step": 499 }, { "epoch": 0.02, "grad_norm": 0.6354538798332214, "learning_rate": 0.0005999544816764729, "loss": 2.5438, "step": 500 }, { "epoch": 0.02, "grad_norm": 0.6770737767219543, "learning_rate": 0.0005999542994257329, "loss": 2.4641, "step": 501 }, { "epoch": 0.02, "grad_norm": 0.6497158408164978, "learning_rate": 0.0005999541168108926, "loss": 2.519, "step": 502 }, { "epoch": 0.02, "grad_norm": 0.6879367232322693, "learning_rate": 0.0005999539338319524, "loss": 2.5149, "step": 503 }, { "epoch": 0.02, "grad_norm": 0.6706629991531372, "learning_rate": 0.0005999537504889121, "loss": 2.5095, "step": 504 }, { "epoch": 0.02, "grad_norm": 0.6462004780769348, "learning_rate": 0.0005999535667817722, "loss": 2.5595, "step": 505 }, { "epoch": 0.02, "grad_norm": 0.6282006502151489, "learning_rate": 0.0005999533827105327, "loss": 2.5775, "step": 506 }, { "epoch": 0.02, "grad_norm": 0.6613785028457642, "learning_rate": 0.0005999531982751943, "loss": 2.4188, "step": 507 }, { "epoch": 0.02, "grad_norm": 0.7306037545204163, "learning_rate": 0.0005999530134757567, "loss": 2.505, "step": 508 }, { "epoch": 0.02, "grad_norm": 0.6447604894638062, "learning_rate": 0.0005999528283122204, "loss": 2.6254, "step": 509 }, { "epoch": 0.02, "grad_norm": 0.6921395659446716, "learning_rate": 0.0005999526427845856, "loss": 2.5638, "step": 510 }, { "epoch": 0.02, "grad_norm": 0.669836699962616, "learning_rate": 0.0005999524568928525, "loss": 2.5157, "step": 511 }, { "epoch": 0.02, "grad_norm": 0.6389235854148865, "learning_rate": 0.0005999522706370212, "loss": 2.5568, "step": 512 }, { "epoch": 0.02, "grad_norm": 0.6569678783416748, "learning_rate": 0.0005999520840170921, "loss": 2.5223, "step": 513 }, { "epoch": 0.02, "grad_norm": 0.6424312591552734, "learning_rate": 0.0005999518970330654, "loss": 2.541, "step": 514 }, { "epoch": 0.02, "grad_norm": 0.6205988526344299, "learning_rate": 0.0005999517096849413, "loss": 2.5031, "step": 515 }, { "epoch": 0.02, "grad_norm": 0.6362354159355164, "learning_rate": 0.00059995152197272, "loss": 2.5691, "step": 516 }, { "epoch": 0.02, "grad_norm": 0.6457034349441528, "learning_rate": 0.0005999513338964017, "loss": 2.6104, "step": 517 }, { "epoch": 0.02, "grad_norm": 0.6252771615982056, "learning_rate": 0.0005999511454559867, "loss": 2.5546, "step": 518 }, { "epoch": 0.02, "grad_norm": 0.617591917514801, "learning_rate": 0.0005999509566514752, "loss": 2.5552, "step": 519 }, { "epoch": 0.02, "grad_norm": 0.6486532092094421, "learning_rate": 0.0005999507674828674, "loss": 2.598, "step": 520 }, { "epoch": 0.02, "grad_norm": 0.6527370810508728, "learning_rate": 0.0005999505779501637, "loss": 2.5378, "step": 521 }, { "epoch": 0.02, "grad_norm": 0.6281787753105164, "learning_rate": 0.0005999503880533641, "loss": 2.5553, "step": 522 }, { "epoch": 0.02, "grad_norm": 0.6089480519294739, "learning_rate": 0.0005999501977924688, "loss": 2.5962, "step": 523 }, { "epoch": 0.02, "grad_norm": 0.730645477771759, "learning_rate": 0.0005999500071674783, "loss": 2.5611, "step": 524 }, { "epoch": 0.02, "grad_norm": 0.6680302023887634, "learning_rate": 0.0005999498161783928, "loss": 2.5053, "step": 525 }, { "epoch": 0.02, "grad_norm": 0.6365115642547607, "learning_rate": 0.0005999496248252122, "loss": 2.4403, "step": 526 }, { "epoch": 0.02, "grad_norm": 0.6694333553314209, "learning_rate": 0.0005999494331079371, "loss": 2.5444, "step": 527 }, { "epoch": 0.02, "grad_norm": 0.611497700214386, "learning_rate": 0.0005999492410265675, "loss": 2.4448, "step": 528 }, { "epoch": 0.02, "grad_norm": 0.6575146317481995, "learning_rate": 0.0005999490485811038, "loss": 2.6059, "step": 529 }, { "epoch": 0.02, "grad_norm": 0.6439598798751831, "learning_rate": 0.0005999488557715461, "loss": 2.4911, "step": 530 }, { "epoch": 0.02, "grad_norm": 0.6988347768783569, "learning_rate": 0.0005999486625978946, "loss": 2.4253, "step": 531 }, { "epoch": 0.02, "grad_norm": 0.6646966934204102, "learning_rate": 0.0005999484690601498, "loss": 2.5175, "step": 532 }, { "epoch": 0.02, "grad_norm": 0.6479023694992065, "learning_rate": 0.0005999482751583116, "loss": 2.5687, "step": 533 }, { "epoch": 0.02, "grad_norm": 0.6177371144294739, "learning_rate": 0.0005999480808923804, "loss": 2.5779, "step": 534 }, { "epoch": 0.02, "grad_norm": 0.6950321793556213, "learning_rate": 0.0005999478862623565, "loss": 2.5461, "step": 535 }, { "epoch": 0.02, "grad_norm": 0.7030653357505798, "learning_rate": 0.00059994769126824, "loss": 2.5472, "step": 536 }, { "epoch": 0.02, "grad_norm": 0.6401596069335938, "learning_rate": 0.0005999474959100312, "loss": 2.5364, "step": 537 }, { "epoch": 0.02, "grad_norm": 0.6949275732040405, "learning_rate": 0.0005999473001877304, "loss": 2.5054, "step": 538 }, { "epoch": 0.02, "grad_norm": 0.6389299035072327, "learning_rate": 0.0005999471041013377, "loss": 2.4582, "step": 539 }, { "epoch": 0.02, "grad_norm": 0.6910102963447571, "learning_rate": 0.0005999469076508534, "loss": 2.4808, "step": 540 }, { "epoch": 0.02, "grad_norm": 0.6567795872688293, "learning_rate": 0.0005999467108362778, "loss": 2.5645, "step": 541 }, { "epoch": 0.02, "grad_norm": 0.6770609617233276, "learning_rate": 0.0005999465136576111, "loss": 2.5137, "step": 542 }, { "epoch": 0.02, "grad_norm": 0.6796299815177917, "learning_rate": 0.0005999463161148536, "loss": 2.5021, "step": 543 }, { "epoch": 0.02, "grad_norm": 0.6529844999313354, "learning_rate": 0.0005999461182080054, "loss": 2.5704, "step": 544 }, { "epoch": 0.02, "grad_norm": 0.6637080907821655, "learning_rate": 0.0005999459199370667, "loss": 2.5097, "step": 545 }, { "epoch": 0.02, "grad_norm": 0.6288779377937317, "learning_rate": 0.0005999457213020379, "loss": 2.444, "step": 546 }, { "epoch": 0.02, "grad_norm": 0.6591435074806213, "learning_rate": 0.0005999455223029193, "loss": 2.4762, "step": 547 }, { "epoch": 0.02, "grad_norm": 0.6226350665092468, "learning_rate": 0.000599945322939711, "loss": 2.5072, "step": 548 }, { "epoch": 0.02, "grad_norm": 0.6597943305969238, "learning_rate": 0.0005999451232124131, "loss": 2.4965, "step": 549 }, { "epoch": 0.02, "grad_norm": 0.648186445236206, "learning_rate": 0.0005999449231210262, "loss": 2.5863, "step": 550 }, { "epoch": 0.02, "grad_norm": 0.6323103904724121, "learning_rate": 0.0005999447226655503, "loss": 2.4494, "step": 551 }, { "epoch": 0.02, "grad_norm": 0.6838457584381104, "learning_rate": 0.0005999445218459856, "loss": 2.527, "step": 552 }, { "epoch": 0.02, "grad_norm": 0.6294061541557312, "learning_rate": 0.0005999443206623326, "loss": 2.4866, "step": 553 }, { "epoch": 0.02, "grad_norm": 0.6566647291183472, "learning_rate": 0.0005999441191145914, "loss": 2.4855, "step": 554 }, { "epoch": 0.02, "grad_norm": 0.6397304534912109, "learning_rate": 0.0005999439172027621, "loss": 2.5505, "step": 555 }, { "epoch": 0.02, "grad_norm": 0.6590810418128967, "learning_rate": 0.0005999437149268452, "loss": 2.5842, "step": 556 }, { "epoch": 0.02, "grad_norm": 0.6346414685249329, "learning_rate": 0.0005999435122868407, "loss": 2.4933, "step": 557 }, { "epoch": 0.02, "grad_norm": 0.6488456130027771, "learning_rate": 0.0005999433092827491, "loss": 2.5667, "step": 558 }, { "epoch": 0.02, "grad_norm": 0.7010729312896729, "learning_rate": 0.0005999431059145704, "loss": 2.4592, "step": 559 }, { "epoch": 0.02, "grad_norm": 0.6961466670036316, "learning_rate": 0.0005999429021823051, "loss": 2.4473, "step": 560 }, { "epoch": 0.02, "grad_norm": 0.6315332651138306, "learning_rate": 0.0005999426980859531, "loss": 2.5188, "step": 561 }, { "epoch": 0.02, "grad_norm": 0.6505325436592102, "learning_rate": 0.0005999424936255151, "loss": 2.5579, "step": 562 }, { "epoch": 0.02, "grad_norm": 0.6486373543739319, "learning_rate": 0.0005999422888009909, "loss": 2.5497, "step": 563 }, { "epoch": 0.02, "grad_norm": 0.6357128620147705, "learning_rate": 0.0005999420836123811, "loss": 2.5116, "step": 564 }, { "epoch": 0.02, "grad_norm": 0.6251356601715088, "learning_rate": 0.0005999418780596858, "loss": 2.5242, "step": 565 }, { "epoch": 0.02, "grad_norm": 0.6425082087516785, "learning_rate": 0.0005999416721429052, "loss": 2.4892, "step": 566 }, { "epoch": 0.02, "grad_norm": 0.6702527403831482, "learning_rate": 0.0005999414658620396, "loss": 2.4132, "step": 567 }, { "epoch": 0.02, "grad_norm": 0.6609665155410767, "learning_rate": 0.0005999412592170893, "loss": 2.5439, "step": 568 }, { "epoch": 0.02, "grad_norm": 0.6591885685920715, "learning_rate": 0.0005999410522080545, "loss": 2.4753, "step": 569 }, { "epoch": 0.02, "grad_norm": 0.641292929649353, "learning_rate": 0.0005999408448349354, "loss": 2.513, "step": 570 }, { "epoch": 0.02, "grad_norm": 0.6542164087295532, "learning_rate": 0.0005999406370977324, "loss": 2.4971, "step": 571 }, { "epoch": 0.02, "grad_norm": 0.642086386680603, "learning_rate": 0.0005999404289964456, "loss": 2.4575, "step": 572 }, { "epoch": 0.02, "grad_norm": 0.7163092494010925, "learning_rate": 0.0005999402205310754, "loss": 2.4801, "step": 573 }, { "epoch": 0.02, "grad_norm": 0.6194419860839844, "learning_rate": 0.0005999400117016217, "loss": 2.4646, "step": 574 }, { "epoch": 0.02, "grad_norm": 0.6672457456588745, "learning_rate": 0.0005999398025080852, "loss": 2.4694, "step": 575 }, { "epoch": 0.02, "grad_norm": 0.6479537487030029, "learning_rate": 0.000599939592950466, "loss": 2.4411, "step": 576 }, { "epoch": 0.02, "grad_norm": 0.6428293585777283, "learning_rate": 0.0005999393830287644, "loss": 2.5501, "step": 577 }, { "epoch": 0.02, "grad_norm": 0.6548900008201599, "learning_rate": 0.0005999391727429805, "loss": 2.5344, "step": 578 }, { "epoch": 0.02, "grad_norm": 0.6443869471549988, "learning_rate": 0.0005999389620931145, "loss": 2.4695, "step": 579 }, { "epoch": 0.02, "grad_norm": 0.6658505797386169, "learning_rate": 0.000599938751079167, "loss": 2.4622, "step": 580 }, { "epoch": 0.02, "grad_norm": 0.6522718667984009, "learning_rate": 0.000599938539701138, "loss": 2.5132, "step": 581 }, { "epoch": 0.02, "grad_norm": 0.7162045836448669, "learning_rate": 0.0005999383279590277, "loss": 2.5042, "step": 582 }, { "epoch": 0.02, "grad_norm": 0.6381886601448059, "learning_rate": 0.0005999381158528366, "loss": 2.5754, "step": 583 }, { "epoch": 0.02, "grad_norm": 0.6751477122306824, "learning_rate": 0.0005999379033825649, "loss": 2.481, "step": 584 }, { "epoch": 0.02, "grad_norm": 0.6277583837509155, "learning_rate": 0.0005999376905482125, "loss": 2.4201, "step": 585 }, { "epoch": 0.02, "grad_norm": 0.6333678364753723, "learning_rate": 0.0005999374773497801, "loss": 2.4782, "step": 586 }, { "epoch": 0.02, "grad_norm": 0.7078677415847778, "learning_rate": 0.0005999372637872678, "loss": 2.4788, "step": 587 }, { "epoch": 0.02, "grad_norm": 0.6596091389656067, "learning_rate": 0.0005999370498606757, "loss": 2.5318, "step": 588 }, { "epoch": 0.02, "grad_norm": 0.6357480883598328, "learning_rate": 0.0005999368355700045, "loss": 2.4959, "step": 589 }, { "epoch": 0.02, "grad_norm": 0.7173094749450684, "learning_rate": 0.000599936620915254, "loss": 2.4428, "step": 590 }, { "epoch": 0.02, "grad_norm": 0.6377291083335876, "learning_rate": 0.0005999364058964247, "loss": 2.4099, "step": 591 }, { "epoch": 0.02, "grad_norm": 0.6915271282196045, "learning_rate": 0.0005999361905135168, "loss": 2.4424, "step": 592 }, { "epoch": 0.02, "grad_norm": 0.6672067046165466, "learning_rate": 0.0005999359747665304, "loss": 2.4984, "step": 593 }, { "epoch": 0.02, "grad_norm": 0.6318113803863525, "learning_rate": 0.0005999357586554661, "loss": 2.53, "step": 594 }, { "epoch": 0.02, "grad_norm": 0.67086261510849, "learning_rate": 0.0005999355421803239, "loss": 2.5094, "step": 595 }, { "epoch": 0.02, "grad_norm": 0.6739314794540405, "learning_rate": 0.0005999353253411041, "loss": 2.5146, "step": 596 }, { "epoch": 0.02, "grad_norm": 0.6393861174583435, "learning_rate": 0.0005999351081378071, "loss": 2.5445, "step": 597 }, { "epoch": 0.02, "grad_norm": 0.6447678208351135, "learning_rate": 0.000599934890570433, "loss": 2.4668, "step": 598 }, { "epoch": 0.02, "grad_norm": 0.6353607773780823, "learning_rate": 0.0005999346726389823, "loss": 2.4747, "step": 599 }, { "epoch": 0.02, "grad_norm": 0.5961223244667053, "learning_rate": 0.0005999344543434549, "loss": 2.5043, "step": 600 }, { "epoch": 0.02, "grad_norm": 0.6746023297309875, "learning_rate": 0.0005999342356838513, "loss": 2.4657, "step": 601 }, { "epoch": 0.02, "grad_norm": 0.6376559734344482, "learning_rate": 0.0005999340166601718, "loss": 2.4945, "step": 602 }, { "epoch": 0.02, "grad_norm": 0.6301698684692383, "learning_rate": 0.0005999337972724166, "loss": 2.506, "step": 603 }, { "epoch": 0.02, "grad_norm": 0.686466634273529, "learning_rate": 0.0005999335775205859, "loss": 2.3507, "step": 604 }, { "epoch": 0.02, "grad_norm": 0.6309223175048828, "learning_rate": 0.0005999333574046802, "loss": 2.5295, "step": 605 }, { "epoch": 0.02, "grad_norm": 0.7149024605751038, "learning_rate": 0.0005999331369246993, "loss": 2.4513, "step": 606 }, { "epoch": 0.02, "grad_norm": 0.6460164189338684, "learning_rate": 0.000599932916080644, "loss": 2.4325, "step": 607 }, { "epoch": 0.02, "grad_norm": 0.6308166980743408, "learning_rate": 0.0005999326948725142, "loss": 2.4171, "step": 608 }, { "epoch": 0.02, "grad_norm": 0.6719313859939575, "learning_rate": 0.0005999324733003105, "loss": 2.3702, "step": 609 }, { "epoch": 0.02, "grad_norm": 0.6443163752555847, "learning_rate": 0.0005999322513640327, "loss": 2.5086, "step": 610 }, { "epoch": 0.02, "grad_norm": 0.6519343256950378, "learning_rate": 0.0005999320290636815, "loss": 2.4615, "step": 611 }, { "epoch": 0.02, "grad_norm": 0.6575688719749451, "learning_rate": 0.000599931806399257, "loss": 2.4574, "step": 612 }, { "epoch": 0.02, "grad_norm": 0.6614997982978821, "learning_rate": 0.0005999315833707594, "loss": 2.4752, "step": 613 }, { "epoch": 0.02, "grad_norm": 0.6237818002700806, "learning_rate": 0.0005999313599781891, "loss": 2.4516, "step": 614 }, { "epoch": 0.02, "grad_norm": 0.6647166609764099, "learning_rate": 0.0005999311362215465, "loss": 2.466, "step": 615 }, { "epoch": 0.02, "grad_norm": 0.6597481369972229, "learning_rate": 0.0005999309121008314, "loss": 2.3362, "step": 616 }, { "epoch": 0.02, "grad_norm": 0.6583294868469238, "learning_rate": 0.0005999306876160445, "loss": 2.4477, "step": 617 }, { "epoch": 0.02, "grad_norm": 0.6333367824554443, "learning_rate": 0.000599930462767186, "loss": 2.4561, "step": 618 }, { "epoch": 0.02, "grad_norm": 0.6364583969116211, "learning_rate": 0.000599930237554256, "loss": 2.5261, "step": 619 }, { "epoch": 0.02, "grad_norm": 0.6565802693367004, "learning_rate": 0.000599930011977255, "loss": 2.4744, "step": 620 }, { "epoch": 0.02, "grad_norm": 0.6513016819953918, "learning_rate": 0.000599929786036183, "loss": 2.4258, "step": 621 }, { "epoch": 0.02, "grad_norm": 0.6475659012794495, "learning_rate": 0.0005999295597310406, "loss": 2.5508, "step": 622 }, { "epoch": 0.02, "grad_norm": 0.6871708035469055, "learning_rate": 0.0005999293330618277, "loss": 2.4426, "step": 623 }, { "epoch": 0.02, "grad_norm": 0.6729172468185425, "learning_rate": 0.000599929106028545, "loss": 2.4833, "step": 624 }, { "epoch": 0.02, "grad_norm": 0.6638731956481934, "learning_rate": 0.0005999288786311924, "loss": 2.4574, "step": 625 }, { "epoch": 0.02, "grad_norm": 0.6454876065254211, "learning_rate": 0.0005999286508697705, "loss": 2.4276, "step": 626 }, { "epoch": 0.02, "grad_norm": 0.6737560033798218, "learning_rate": 0.0005999284227442793, "loss": 2.5286, "step": 627 }, { "epoch": 0.02, "grad_norm": 0.6609646677970886, "learning_rate": 0.0005999281942547191, "loss": 2.3667, "step": 628 }, { "epoch": 0.02, "grad_norm": 0.6149380207061768, "learning_rate": 0.0005999279654010904, "loss": 2.3642, "step": 629 }, { "epoch": 0.02, "grad_norm": 0.6428014039993286, "learning_rate": 0.0005999277361833933, "loss": 2.5314, "step": 630 }, { "epoch": 0.02, "grad_norm": 0.6770861744880676, "learning_rate": 0.0005999275066016282, "loss": 2.4778, "step": 631 }, { "epoch": 0.02, "grad_norm": 0.6178455352783203, "learning_rate": 0.0005999272766557952, "loss": 2.4811, "step": 632 }, { "epoch": 0.02, "grad_norm": 0.6511662602424622, "learning_rate": 0.0005999270463458947, "loss": 2.404, "step": 633 }, { "epoch": 0.02, "grad_norm": 0.6441854238510132, "learning_rate": 0.0005999268156719269, "loss": 2.4223, "step": 634 }, { "epoch": 0.02, "grad_norm": 0.6234164834022522, "learning_rate": 0.0005999265846338923, "loss": 2.4225, "step": 635 }, { "epoch": 0.02, "grad_norm": 0.6428911089897156, "learning_rate": 0.0005999263532317909, "loss": 2.4608, "step": 636 }, { "epoch": 0.02, "grad_norm": 0.6273331046104431, "learning_rate": 0.000599926121465623, "loss": 2.4153, "step": 637 }, { "epoch": 0.02, "grad_norm": 0.6099276542663574, "learning_rate": 0.0005999258893353891, "loss": 2.4427, "step": 638 }, { "epoch": 0.02, "grad_norm": 0.6152760982513428, "learning_rate": 0.0005999256568410894, "loss": 2.4515, "step": 639 }, { "epoch": 0.02, "grad_norm": 0.6437386274337769, "learning_rate": 0.0005999254239827241, "loss": 2.4203, "step": 640 }, { "epoch": 0.02, "grad_norm": 0.6466348767280579, "learning_rate": 0.0005999251907602936, "loss": 2.5247, "step": 641 }, { "epoch": 0.02, "grad_norm": 0.6195766925811768, "learning_rate": 0.000599924957173798, "loss": 2.4307, "step": 642 }, { "epoch": 0.02, "grad_norm": 0.6787882447242737, "learning_rate": 0.0005999247232232376, "loss": 2.4633, "step": 643 }, { "epoch": 0.02, "grad_norm": 0.6805762052536011, "learning_rate": 0.0005999244889086131, "loss": 2.4904, "step": 644 }, { "epoch": 0.02, "grad_norm": 0.6731105446815491, "learning_rate": 0.0005999242542299242, "loss": 2.5398, "step": 645 }, { "epoch": 0.02, "grad_norm": 0.6728701591491699, "learning_rate": 0.0005999240191871715, "loss": 2.3774, "step": 646 }, { "epoch": 0.02, "grad_norm": 0.724300742149353, "learning_rate": 0.0005999237837803552, "loss": 2.5213, "step": 647 }, { "epoch": 0.02, "grad_norm": 0.6653078198432922, "learning_rate": 0.0005999235480094757, "loss": 2.4323, "step": 648 }, { "epoch": 0.02, "grad_norm": 0.6330647468566895, "learning_rate": 0.0005999233118745333, "loss": 2.4192, "step": 649 }, { "epoch": 0.02, "grad_norm": 0.6548694372177124, "learning_rate": 0.0005999230753755279, "loss": 2.4151, "step": 650 }, { "epoch": 0.02, "grad_norm": 0.6589232087135315, "learning_rate": 0.0005999228385124603, "loss": 2.4176, "step": 651 }, { "epoch": 0.02, "grad_norm": 0.6660078763961792, "learning_rate": 0.0005999226012853306, "loss": 2.4048, "step": 652 }, { "epoch": 0.02, "grad_norm": 0.6675761342048645, "learning_rate": 0.000599922363694139, "loss": 2.4289, "step": 653 }, { "epoch": 0.02, "grad_norm": 0.6860916018486023, "learning_rate": 0.0005999221257388857, "loss": 2.3743, "step": 654 }, { "epoch": 0.02, "grad_norm": 0.7109667062759399, "learning_rate": 0.0005999218874195712, "loss": 2.3884, "step": 655 }, { "epoch": 0.02, "grad_norm": 0.6470160484313965, "learning_rate": 0.0005999216487361958, "loss": 2.5198, "step": 656 }, { "epoch": 0.02, "grad_norm": 0.6242385506629944, "learning_rate": 0.0005999214096887597, "loss": 2.4543, "step": 657 }, { "epoch": 0.02, "grad_norm": 0.6392278075218201, "learning_rate": 0.0005999211702772631, "loss": 2.4334, "step": 658 }, { "epoch": 0.02, "grad_norm": 0.6340214014053345, "learning_rate": 0.0005999209305017065, "loss": 2.4725, "step": 659 }, { "epoch": 0.02, "grad_norm": 0.6411225199699402, "learning_rate": 0.00059992069036209, "loss": 2.388, "step": 660 }, { "epoch": 0.02, "grad_norm": 0.6469171643257141, "learning_rate": 0.000599920449858414, "loss": 2.45, "step": 661 }, { "epoch": 0.02, "grad_norm": 0.6139805912971497, "learning_rate": 0.0005999202089906788, "loss": 2.4471, "step": 662 }, { "epoch": 0.02, "grad_norm": 0.6508941054344177, "learning_rate": 0.0005999199677588846, "loss": 2.4632, "step": 663 }, { "epoch": 0.02, "grad_norm": 0.6274420619010925, "learning_rate": 0.0005999197261630318, "loss": 2.422, "step": 664 }, { "epoch": 0.02, "grad_norm": 0.6540734171867371, "learning_rate": 0.0005999194842031206, "loss": 2.4624, "step": 665 }, { "epoch": 0.02, "grad_norm": 0.6430597901344299, "learning_rate": 0.0005999192418791514, "loss": 2.4283, "step": 666 }, { "epoch": 0.02, "grad_norm": 0.6050016283988953, "learning_rate": 0.0005999189991911244, "loss": 2.4416, "step": 667 }, { "epoch": 0.02, "grad_norm": 0.6334326267242432, "learning_rate": 0.0005999187561390399, "loss": 2.3996, "step": 668 }, { "epoch": 0.02, "grad_norm": 0.6621738076210022, "learning_rate": 0.0005999185127228983, "loss": 2.524, "step": 669 }, { "epoch": 0.02, "grad_norm": 0.6681085228919983, "learning_rate": 0.0005999182689426998, "loss": 2.3474, "step": 670 }, { "epoch": 0.02, "grad_norm": 0.6356897354125977, "learning_rate": 0.0005999180247984446, "loss": 2.4068, "step": 671 }, { "epoch": 0.02, "grad_norm": 0.6515069007873535, "learning_rate": 0.0005999177802901332, "loss": 2.5059, "step": 672 }, { "epoch": 0.02, "grad_norm": 0.6695412993431091, "learning_rate": 0.0005999175354177658, "loss": 2.3882, "step": 673 }, { "epoch": 0.02, "grad_norm": 0.65110844373703, "learning_rate": 0.0005999172901813427, "loss": 2.4347, "step": 674 }, { "epoch": 0.02, "grad_norm": 0.6200897097587585, "learning_rate": 0.0005999170445808641, "loss": 2.4181, "step": 675 }, { "epoch": 0.02, "grad_norm": 0.6733734011650085, "learning_rate": 0.0005999167986163305, "loss": 2.3588, "step": 676 }, { "epoch": 0.02, "grad_norm": 0.7119411826133728, "learning_rate": 0.0005999165522877422, "loss": 2.3884, "step": 677 }, { "epoch": 0.02, "grad_norm": 0.6266691088676453, "learning_rate": 0.0005999163055950992, "loss": 2.3188, "step": 678 }, { "epoch": 0.02, "grad_norm": 0.6496017575263977, "learning_rate": 0.000599916058538402, "loss": 2.4408, "step": 679 }, { "epoch": 0.02, "grad_norm": 0.6651119589805603, "learning_rate": 0.0005999158111176511, "loss": 2.4111, "step": 680 }, { "epoch": 0.02, "grad_norm": 0.647018551826477, "learning_rate": 0.0005999155633328464, "loss": 2.3408, "step": 681 }, { "epoch": 0.02, "grad_norm": 0.6393489837646484, "learning_rate": 0.0005999153151839885, "loss": 2.4218, "step": 682 }, { "epoch": 0.02, "grad_norm": 0.6670609712600708, "learning_rate": 0.0005999150666710775, "loss": 2.4305, "step": 683 }, { "epoch": 0.02, "grad_norm": 0.6290651559829712, "learning_rate": 0.0005999148177941139, "loss": 2.4654, "step": 684 }, { "epoch": 0.02, "grad_norm": 0.6928999423980713, "learning_rate": 0.0005999145685530978, "loss": 2.4237, "step": 685 }, { "epoch": 0.02, "grad_norm": 0.6771054267883301, "learning_rate": 0.0005999143189480298, "loss": 2.384, "step": 686 }, { "epoch": 0.02, "grad_norm": 0.6247697472572327, "learning_rate": 0.0005999140689789098, "loss": 2.4131, "step": 687 }, { "epoch": 0.02, "grad_norm": 0.6471261382102966, "learning_rate": 0.0005999138186457384, "loss": 2.4246, "step": 688 }, { "epoch": 0.02, "grad_norm": 0.6464331150054932, "learning_rate": 0.0005999135679485158, "loss": 2.445, "step": 689 }, { "epoch": 0.02, "grad_norm": 0.6258648633956909, "learning_rate": 0.0005999133168872423, "loss": 2.3684, "step": 690 }, { "epoch": 0.02, "grad_norm": 0.6484378576278687, "learning_rate": 0.0005999130654619183, "loss": 2.3808, "step": 691 }, { "epoch": 0.02, "grad_norm": 0.6362382769584656, "learning_rate": 0.0005999128136725439, "loss": 2.4206, "step": 692 }, { "epoch": 0.02, "grad_norm": 0.6411938667297363, "learning_rate": 0.0005999125615191197, "loss": 2.4681, "step": 693 }, { "epoch": 0.02, "grad_norm": 0.6353893876075745, "learning_rate": 0.0005999123090016456, "loss": 2.4183, "step": 694 }, { "epoch": 0.02, "grad_norm": 0.6422315835952759, "learning_rate": 0.0005999120561201224, "loss": 2.4978, "step": 695 }, { "epoch": 0.02, "grad_norm": 0.6186074018478394, "learning_rate": 0.0005999118028745501, "loss": 2.3987, "step": 696 }, { "epoch": 0.02, "grad_norm": 0.6464399695396423, "learning_rate": 0.0005999115492649289, "loss": 2.4559, "step": 697 }, { "epoch": 0.02, "grad_norm": 0.6348670125007629, "learning_rate": 0.0005999112952912594, "loss": 2.4447, "step": 698 }, { "epoch": 0.02, "grad_norm": 0.621288001537323, "learning_rate": 0.0005999110409535418, "loss": 2.4091, "step": 699 }, { "epoch": 0.02, "grad_norm": 0.6431146264076233, "learning_rate": 0.0005999107862517763, "loss": 2.4095, "step": 700 }, { "epoch": 0.02, "grad_norm": 0.6494776606559753, "learning_rate": 0.0005999105311859633, "loss": 2.3888, "step": 701 }, { "epoch": 0.02, "grad_norm": 0.639898419380188, "learning_rate": 0.0005999102757561032, "loss": 2.4289, "step": 702 }, { "epoch": 0.02, "grad_norm": 0.6483799815177917, "learning_rate": 0.0005999100199621961, "loss": 2.3642, "step": 703 }, { "epoch": 0.02, "grad_norm": 0.653323233127594, "learning_rate": 0.0005999097638042425, "loss": 2.4142, "step": 704 }, { "epoch": 0.02, "grad_norm": 0.6393147110939026, "learning_rate": 0.0005999095072822426, "loss": 2.3484, "step": 705 }, { "epoch": 0.02, "grad_norm": 0.6010264754295349, "learning_rate": 0.0005999092503961968, "loss": 2.4243, "step": 706 }, { "epoch": 0.02, "grad_norm": 0.666234016418457, "learning_rate": 0.0005999089931461054, "loss": 2.4263, "step": 707 }, { "epoch": 0.02, "grad_norm": 0.6374032497406006, "learning_rate": 0.0005999087355319686, "loss": 2.384, "step": 708 }, { "epoch": 0.02, "grad_norm": 0.6759114265441895, "learning_rate": 0.0005999084775537867, "loss": 2.3018, "step": 709 }, { "epoch": 0.02, "grad_norm": 0.6635960340499878, "learning_rate": 0.0005999082192115602, "loss": 2.4221, "step": 710 }, { "epoch": 0.02, "grad_norm": 0.6685530543327332, "learning_rate": 0.0005999079605052893, "loss": 2.3233, "step": 711 }, { "epoch": 0.02, "grad_norm": 0.6592737436294556, "learning_rate": 0.0005999077014349742, "loss": 2.4576, "step": 712 }, { "epoch": 0.02, "grad_norm": 0.631382405757904, "learning_rate": 0.0005999074420006156, "loss": 2.4042, "step": 713 }, { "epoch": 0.02, "grad_norm": 0.7152851819992065, "learning_rate": 0.0005999071822022133, "loss": 2.3572, "step": 714 }, { "epoch": 0.02, "grad_norm": 0.6344745755195618, "learning_rate": 0.000599906922039768, "loss": 2.4582, "step": 715 }, { "epoch": 0.02, "grad_norm": 0.6559484601020813, "learning_rate": 0.0005999066615132799, "loss": 2.4351, "step": 716 }, { "epoch": 0.02, "grad_norm": 0.6149682402610779, "learning_rate": 0.0005999064006227493, "loss": 2.385, "step": 717 }, { "epoch": 0.02, "grad_norm": 0.703098475933075, "learning_rate": 0.0005999061393681765, "loss": 2.4655, "step": 718 }, { "epoch": 0.02, "grad_norm": 0.6642423868179321, "learning_rate": 0.0005999058777495618, "loss": 2.3662, "step": 719 }, { "epoch": 0.02, "grad_norm": 0.6570237874984741, "learning_rate": 0.0005999056157669056, "loss": 2.4549, "step": 720 }, { "epoch": 0.02, "grad_norm": 0.6434747576713562, "learning_rate": 0.0005999053534202081, "loss": 2.4194, "step": 721 }, { "epoch": 0.02, "grad_norm": 0.6184223294258118, "learning_rate": 0.0005999050907094697, "loss": 2.4487, "step": 722 }, { "epoch": 0.02, "grad_norm": 0.6773436665534973, "learning_rate": 0.0005999048276346908, "loss": 2.4226, "step": 723 }, { "epoch": 0.02, "grad_norm": 0.6231591105461121, "learning_rate": 0.0005999045641958715, "loss": 2.3703, "step": 724 }, { "epoch": 0.02, "grad_norm": 0.6430952548980713, "learning_rate": 0.0005999043003930123, "loss": 2.3927, "step": 725 }, { "epoch": 0.02, "grad_norm": 0.6286866664886475, "learning_rate": 0.0005999040362261135, "loss": 2.2843, "step": 726 }, { "epoch": 0.02, "grad_norm": 0.6362537145614624, "learning_rate": 0.0005999037716951755, "loss": 2.3935, "step": 727 }, { "epoch": 0.02, "grad_norm": 0.6292781829833984, "learning_rate": 0.0005999035068001983, "loss": 2.3847, "step": 728 }, { "epoch": 0.02, "grad_norm": 0.656356155872345, "learning_rate": 0.0005999032415411825, "loss": 2.3244, "step": 729 }, { "epoch": 0.02, "grad_norm": 0.6268264651298523, "learning_rate": 0.0005999029759181285, "loss": 2.3973, "step": 730 }, { "epoch": 0.02, "grad_norm": 0.6400675177574158, "learning_rate": 0.0005999027099310363, "loss": 2.3672, "step": 731 }, { "epoch": 0.02, "grad_norm": 0.6430079936981201, "learning_rate": 0.0005999024435799063, "loss": 2.3765, "step": 732 }, { "epoch": 0.02, "grad_norm": 0.6274365186691284, "learning_rate": 0.0005999021768647392, "loss": 2.3203, "step": 733 }, { "epoch": 0.02, "grad_norm": 0.6708753705024719, "learning_rate": 0.0005999019097855349, "loss": 2.3346, "step": 734 }, { "epoch": 0.02, "grad_norm": 0.6330341100692749, "learning_rate": 0.0005999016423422938, "loss": 2.3409, "step": 735 }, { "epoch": 0.02, "grad_norm": 0.6364647746086121, "learning_rate": 0.0005999013745350164, "loss": 2.4701, "step": 736 }, { "epoch": 0.02, "grad_norm": 0.6717507839202881, "learning_rate": 0.000599901106363703, "loss": 2.4041, "step": 737 }, { "epoch": 0.02, "grad_norm": 0.651470959186554, "learning_rate": 0.0005999008378283537, "loss": 2.3816, "step": 738 }, { "epoch": 0.02, "grad_norm": 0.6359846591949463, "learning_rate": 0.000599900568928969, "loss": 2.3382, "step": 739 }, { "epoch": 0.02, "grad_norm": 0.6413174271583557, "learning_rate": 0.0005999002996655493, "loss": 2.3475, "step": 740 }, { "epoch": 0.02, "grad_norm": 0.6312968730926514, "learning_rate": 0.0005999000300380946, "loss": 2.4183, "step": 741 }, { "epoch": 0.02, "grad_norm": 0.6425250172615051, "learning_rate": 0.0005998997600466057, "loss": 2.407, "step": 742 }, { "epoch": 0.02, "grad_norm": 0.6434535980224609, "learning_rate": 0.0005998994896910826, "loss": 2.4532, "step": 743 }, { "epoch": 0.02, "grad_norm": 0.6788220405578613, "learning_rate": 0.0005998992189715257, "loss": 2.376, "step": 744 }, { "epoch": 0.02, "grad_norm": 0.6434285044670105, "learning_rate": 0.0005998989478879352, "loss": 2.4326, "step": 745 }, { "epoch": 0.02, "grad_norm": 0.6785898804664612, "learning_rate": 0.0005998986764403117, "loss": 2.362, "step": 746 }, { "epoch": 0.02, "grad_norm": 0.6506190896034241, "learning_rate": 0.0005998984046286554, "loss": 2.4116, "step": 747 }, { "epoch": 0.02, "grad_norm": 0.6658552885055542, "learning_rate": 0.0005998981324529665, "loss": 2.4219, "step": 748 }, { "epoch": 0.02, "grad_norm": 0.6466109752655029, "learning_rate": 0.0005998978599132456, "loss": 2.3936, "step": 749 }, { "epoch": 0.02, "grad_norm": 0.6581551432609558, "learning_rate": 0.0005998975870094928, "loss": 2.517, "step": 750 }, { "epoch": 0.02, "grad_norm": 0.6455744504928589, "learning_rate": 0.0005998973137417087, "loss": 2.4787, "step": 751 }, { "epoch": 0.03, "grad_norm": 0.6477044224739075, "learning_rate": 0.0005998970401098932, "loss": 2.2847, "step": 752 }, { "epoch": 0.03, "grad_norm": 0.6584809422492981, "learning_rate": 0.000599896766114047, "loss": 2.3744, "step": 753 }, { "epoch": 0.03, "grad_norm": 0.6900264620780945, "learning_rate": 0.0005998964917541703, "loss": 2.4183, "step": 754 }, { "epoch": 0.03, "grad_norm": 0.6276111602783203, "learning_rate": 0.0005998962170302635, "loss": 2.418, "step": 755 }, { "epoch": 0.03, "grad_norm": 0.698955774307251, "learning_rate": 0.0005998959419423268, "loss": 2.4154, "step": 756 }, { "epoch": 0.03, "grad_norm": 0.6443789601325989, "learning_rate": 0.0005998956664903607, "loss": 2.4157, "step": 757 }, { "epoch": 0.03, "grad_norm": 0.663231372833252, "learning_rate": 0.0005998953906743653, "loss": 2.322, "step": 758 }, { "epoch": 0.03, "grad_norm": 0.6942036151885986, "learning_rate": 0.0005998951144943412, "loss": 2.3638, "step": 759 }, { "epoch": 0.03, "grad_norm": 0.6292616128921509, "learning_rate": 0.0005998948379502886, "loss": 2.4518, "step": 760 }, { "epoch": 0.03, "grad_norm": 0.7171483635902405, "learning_rate": 0.0005998945610422078, "loss": 2.307, "step": 761 }, { "epoch": 0.03, "grad_norm": 0.6800011992454529, "learning_rate": 0.0005998942837700992, "loss": 2.3805, "step": 762 }, { "epoch": 0.03, "grad_norm": 0.607219398021698, "learning_rate": 0.0005998940061339633, "loss": 2.3627, "step": 763 }, { "epoch": 0.03, "grad_norm": 0.695919394493103, "learning_rate": 0.0005998937281338001, "loss": 2.3951, "step": 764 }, { "epoch": 0.03, "grad_norm": 0.7085710167884827, "learning_rate": 0.0005998934497696101, "loss": 2.3387, "step": 765 }, { "epoch": 0.03, "grad_norm": 0.6403561234474182, "learning_rate": 0.0005998931710413936, "loss": 2.3761, "step": 766 }, { "epoch": 0.03, "grad_norm": 0.7179182171821594, "learning_rate": 0.0005998928919491511, "loss": 2.2643, "step": 767 }, { "epoch": 0.03, "grad_norm": 0.679777979850769, "learning_rate": 0.0005998926124928828, "loss": 2.3942, "step": 768 }, { "epoch": 0.03, "grad_norm": 0.7635641694068909, "learning_rate": 0.000599892332672589, "loss": 2.4875, "step": 769 }, { "epoch": 0.03, "grad_norm": 0.6885074377059937, "learning_rate": 0.0005998920524882701, "loss": 2.3612, "step": 770 }, { "epoch": 0.03, "grad_norm": 0.6501091122627258, "learning_rate": 0.0005998917719399264, "loss": 2.3224, "step": 771 }, { "epoch": 0.03, "grad_norm": 0.7026351690292358, "learning_rate": 0.0005998914910275584, "loss": 2.3172, "step": 772 }, { "epoch": 0.03, "grad_norm": 0.6292087435722351, "learning_rate": 0.0005998912097511662, "loss": 2.3892, "step": 773 }, { "epoch": 0.03, "grad_norm": 0.6352449059486389, "learning_rate": 0.0005998909281107503, "loss": 2.3263, "step": 774 }, { "epoch": 0.03, "grad_norm": 0.6465269327163696, "learning_rate": 0.000599890646106311, "loss": 2.405, "step": 775 }, { "epoch": 0.03, "grad_norm": 0.6603639721870422, "learning_rate": 0.0005998903637378487, "loss": 2.4385, "step": 776 }, { "epoch": 0.03, "grad_norm": 0.6594853401184082, "learning_rate": 0.0005998900810053636, "loss": 2.4568, "step": 777 }, { "epoch": 0.03, "grad_norm": 0.6575480699539185, "learning_rate": 0.0005998897979088561, "loss": 2.4523, "step": 778 }, { "epoch": 0.03, "grad_norm": 0.6245148181915283, "learning_rate": 0.0005998895144483266, "loss": 2.2833, "step": 779 }, { "epoch": 0.03, "grad_norm": 0.6150282025337219, "learning_rate": 0.0005998892306237755, "loss": 2.272, "step": 780 }, { "epoch": 0.03, "grad_norm": 0.639667809009552, "learning_rate": 0.000599888946435203, "loss": 2.3052, "step": 781 }, { "epoch": 0.03, "grad_norm": 0.6571875214576721, "learning_rate": 0.0005998886618826095, "loss": 2.3453, "step": 782 }, { "epoch": 0.03, "grad_norm": 0.6301316022872925, "learning_rate": 0.0005998883769659953, "loss": 2.4148, "step": 783 }, { "epoch": 0.03, "grad_norm": 0.6749175190925598, "learning_rate": 0.0005998880916853609, "loss": 2.358, "step": 784 }, { "epoch": 0.03, "grad_norm": 0.6313416361808777, "learning_rate": 0.0005998878060407065, "loss": 2.3233, "step": 785 }, { "epoch": 0.03, "grad_norm": 0.6237101554870605, "learning_rate": 0.0005998875200320325, "loss": 2.3159, "step": 786 }, { "epoch": 0.03, "grad_norm": 0.6836129426956177, "learning_rate": 0.0005998872336593392, "loss": 2.3864, "step": 787 }, { "epoch": 0.03, "grad_norm": 0.7373770475387573, "learning_rate": 0.000599886946922627, "loss": 2.4302, "step": 788 }, { "epoch": 0.03, "grad_norm": 0.6281234622001648, "learning_rate": 0.0005998866598218963, "loss": 2.3244, "step": 789 }, { "epoch": 0.03, "grad_norm": 0.6725941896438599, "learning_rate": 0.0005998863723571473, "loss": 2.4204, "step": 790 }, { "epoch": 0.03, "grad_norm": 0.6976016163825989, "learning_rate": 0.0005998860845283804, "loss": 2.4439, "step": 791 }, { "epoch": 0.03, "grad_norm": 0.64137864112854, "learning_rate": 0.0005998857963355961, "loss": 2.3931, "step": 792 }, { "epoch": 0.03, "grad_norm": 0.6826043128967285, "learning_rate": 0.0005998855077787946, "loss": 2.3796, "step": 793 }, { "epoch": 0.03, "grad_norm": 0.6410479545593262, "learning_rate": 0.0005998852188579762, "loss": 2.3025, "step": 794 }, { "epoch": 0.03, "grad_norm": 0.66321861743927, "learning_rate": 0.0005998849295731414, "loss": 2.3952, "step": 795 }, { "epoch": 0.03, "grad_norm": 0.6177728772163391, "learning_rate": 0.0005998846399242905, "loss": 2.3859, "step": 796 }, { "epoch": 0.03, "grad_norm": 0.6435662508010864, "learning_rate": 0.0005998843499114237, "loss": 2.2745, "step": 797 }, { "epoch": 0.03, "grad_norm": 0.6722562313079834, "learning_rate": 0.0005998840595345416, "loss": 2.2747, "step": 798 }, { "epoch": 0.03, "grad_norm": 0.6389245390892029, "learning_rate": 0.0005998837687936445, "loss": 2.4736, "step": 799 }, { "epoch": 0.03, "grad_norm": 0.6315241456031799, "learning_rate": 0.0005998834776887326, "loss": 2.3245, "step": 800 }, { "epoch": 0.03, "grad_norm": 0.6560909152030945, "learning_rate": 0.0005998831862198062, "loss": 2.4161, "step": 801 }, { "epoch": 0.03, "grad_norm": 0.6573176980018616, "learning_rate": 0.000599882894386866, "loss": 2.3555, "step": 802 }, { "epoch": 0.03, "grad_norm": 0.6323138475418091, "learning_rate": 0.0005998826021899121, "loss": 2.3645, "step": 803 }, { "epoch": 0.03, "grad_norm": 0.6638707518577576, "learning_rate": 0.0005998823096289448, "loss": 2.4011, "step": 804 }, { "epoch": 0.03, "grad_norm": 0.6323288083076477, "learning_rate": 0.0005998820167039647, "loss": 2.3576, "step": 805 }, { "epoch": 0.03, "grad_norm": 0.6600755453109741, "learning_rate": 0.000599881723414972, "loss": 2.4468, "step": 806 }, { "epoch": 0.03, "grad_norm": 0.6471022367477417, "learning_rate": 0.0005998814297619671, "loss": 2.3227, "step": 807 }, { "epoch": 0.03, "grad_norm": 0.6462105512619019, "learning_rate": 0.0005998811357449504, "loss": 2.4786, "step": 808 }, { "epoch": 0.03, "grad_norm": 0.6334490180015564, "learning_rate": 0.000599880841363922, "loss": 2.3999, "step": 809 }, { "epoch": 0.03, "grad_norm": 0.6809312105178833, "learning_rate": 0.0005998805466188825, "loss": 2.3503, "step": 810 }, { "epoch": 0.03, "grad_norm": 0.6052764058113098, "learning_rate": 0.0005998802515098322, "loss": 2.3453, "step": 811 }, { "epoch": 0.03, "grad_norm": 0.639549970626831, "learning_rate": 0.0005998799560367714, "loss": 2.3532, "step": 812 }, { "epoch": 0.03, "grad_norm": 0.7518729567527771, "learning_rate": 0.0005998796601997007, "loss": 2.3987, "step": 813 }, { "epoch": 0.03, "grad_norm": 0.6190670132637024, "learning_rate": 0.0005998793639986201, "loss": 2.3112, "step": 814 }, { "epoch": 0.03, "grad_norm": 0.6432064771652222, "learning_rate": 0.0005998790674335302, "loss": 2.3681, "step": 815 }, { "epoch": 0.03, "grad_norm": 0.6376432180404663, "learning_rate": 0.0005998787705044313, "loss": 2.331, "step": 816 }, { "epoch": 0.03, "grad_norm": 0.6078342199325562, "learning_rate": 0.0005998784732113238, "loss": 2.3904, "step": 817 }, { "epoch": 0.03, "grad_norm": 0.6644467711448669, "learning_rate": 0.000599878175554208, "loss": 2.3374, "step": 818 }, { "epoch": 0.03, "grad_norm": 0.640558123588562, "learning_rate": 0.0005998778775330842, "loss": 2.3958, "step": 819 }, { "epoch": 0.03, "grad_norm": 0.6640769839286804, "learning_rate": 0.000599877579147953, "loss": 2.3482, "step": 820 }, { "epoch": 0.03, "grad_norm": 0.6420825123786926, "learning_rate": 0.0005998772803988145, "loss": 2.4041, "step": 821 }, { "epoch": 0.03, "grad_norm": 0.680070161819458, "learning_rate": 0.0005998769812856692, "loss": 2.4293, "step": 822 }, { "epoch": 0.03, "grad_norm": 0.6561995148658752, "learning_rate": 0.0005998766818085174, "loss": 2.3828, "step": 823 }, { "epoch": 0.03, "grad_norm": 0.6150969862937927, "learning_rate": 0.0005998763819673594, "loss": 2.3366, "step": 824 }, { "epoch": 0.03, "grad_norm": 0.67024827003479, "learning_rate": 0.0005998760817621957, "loss": 2.2671, "step": 825 }, { "epoch": 0.03, "grad_norm": 0.628588080406189, "learning_rate": 0.0005998757811930268, "loss": 2.3485, "step": 826 }, { "epoch": 0.03, "grad_norm": 0.6274452805519104, "learning_rate": 0.0005998754802598528, "loss": 2.3857, "step": 827 }, { "epoch": 0.03, "grad_norm": 0.6439820528030396, "learning_rate": 0.000599875178962674, "loss": 2.3545, "step": 828 }, { "epoch": 0.03, "grad_norm": 0.6420619487762451, "learning_rate": 0.0005998748773014911, "loss": 2.3498, "step": 829 }, { "epoch": 0.03, "grad_norm": 0.6058036088943481, "learning_rate": 0.0005998745752763042, "loss": 2.3733, "step": 830 }, { "epoch": 0.03, "grad_norm": 0.6696977615356445, "learning_rate": 0.0005998742728871138, "loss": 2.3449, "step": 831 }, { "epoch": 0.03, "grad_norm": 0.6534010171890259, "learning_rate": 0.0005998739701339201, "loss": 2.4339, "step": 832 }, { "epoch": 0.03, "grad_norm": 0.6909165978431702, "learning_rate": 0.0005998736670167237, "loss": 2.3702, "step": 833 }, { "epoch": 0.03, "grad_norm": 0.6672418713569641, "learning_rate": 0.0005998733635355248, "loss": 2.4151, "step": 834 }, { "epoch": 0.03, "grad_norm": 0.626558244228363, "learning_rate": 0.0005998730596903239, "loss": 2.327, "step": 835 }, { "epoch": 0.03, "grad_norm": 0.6938319802284241, "learning_rate": 0.0005998727554811211, "loss": 2.3787, "step": 836 }, { "epoch": 0.03, "grad_norm": 0.6296847462654114, "learning_rate": 0.0005998724509079172, "loss": 2.3973, "step": 837 }, { "epoch": 0.03, "grad_norm": 0.6628779768943787, "learning_rate": 0.0005998721459707122, "loss": 2.3626, "step": 838 }, { "epoch": 0.03, "grad_norm": 0.6351715922355652, "learning_rate": 0.0005998718406695065, "loss": 2.33, "step": 839 }, { "epoch": 0.03, "grad_norm": 0.6246477365493774, "learning_rate": 0.0005998715350043007, "loss": 2.3094, "step": 840 }, { "epoch": 0.03, "grad_norm": 0.6413275599479675, "learning_rate": 0.0005998712289750952, "loss": 2.4486, "step": 841 }, { "epoch": 0.03, "grad_norm": 0.6291218400001526, "learning_rate": 0.00059987092258189, "loss": 2.3303, "step": 842 }, { "epoch": 0.03, "grad_norm": 0.6484812498092651, "learning_rate": 0.0005998706158246857, "loss": 2.4214, "step": 843 }, { "epoch": 0.03, "grad_norm": 0.6096007823944092, "learning_rate": 0.0005998703087034827, "loss": 2.3819, "step": 844 }, { "epoch": 0.03, "grad_norm": 0.6315252780914307, "learning_rate": 0.0005998700012182812, "loss": 2.3094, "step": 845 }, { "epoch": 0.03, "grad_norm": 0.6110045313835144, "learning_rate": 0.0005998696933690819, "loss": 2.3258, "step": 846 }, { "epoch": 0.03, "grad_norm": 0.6363405585289001, "learning_rate": 0.0005998693851558848, "loss": 2.3524, "step": 847 }, { "epoch": 0.03, "grad_norm": 0.63118976354599, "learning_rate": 0.0005998690765786906, "loss": 2.3939, "step": 848 }, { "epoch": 0.03, "grad_norm": 0.6060614585876465, "learning_rate": 0.0005998687676374995, "loss": 2.3438, "step": 849 }, { "epoch": 0.03, "grad_norm": 0.6720461249351501, "learning_rate": 0.0005998684583323118, "loss": 2.3161, "step": 850 }, { "epoch": 0.03, "grad_norm": 0.6420233249664307, "learning_rate": 0.0005998681486631281, "loss": 2.4393, "step": 851 }, { "epoch": 0.03, "grad_norm": 0.6248483657836914, "learning_rate": 0.0005998678386299486, "loss": 2.3306, "step": 852 }, { "epoch": 0.03, "grad_norm": 0.644038736820221, "learning_rate": 0.0005998675282327736, "loss": 2.3314, "step": 853 }, { "epoch": 0.03, "grad_norm": 0.6314935088157654, "learning_rate": 0.0005998672174716038, "loss": 2.3171, "step": 854 }, { "epoch": 0.03, "grad_norm": 0.6445680856704712, "learning_rate": 0.0005998669063464392, "loss": 2.365, "step": 855 }, { "epoch": 0.03, "grad_norm": 0.6603043675422668, "learning_rate": 0.0005998665948572805, "loss": 2.2977, "step": 856 }, { "epoch": 0.03, "grad_norm": 0.6127786636352539, "learning_rate": 0.0005998662830041278, "loss": 2.3187, "step": 857 }, { "epoch": 0.03, "grad_norm": 0.6308490037918091, "learning_rate": 0.0005998659707869818, "loss": 2.3372, "step": 858 }, { "epoch": 0.03, "grad_norm": 0.6138101816177368, "learning_rate": 0.0005998656582058426, "loss": 2.3463, "step": 859 }, { "epoch": 0.03, "grad_norm": 0.6200218796730042, "learning_rate": 0.0005998653452607106, "loss": 2.3357, "step": 860 }, { "epoch": 0.03, "grad_norm": 0.6312088370323181, "learning_rate": 0.0005998650319515864, "loss": 2.3469, "step": 861 }, { "epoch": 0.03, "grad_norm": 0.6073251366615295, "learning_rate": 0.0005998647182784702, "loss": 2.3543, "step": 862 }, { "epoch": 0.03, "grad_norm": 0.654181957244873, "learning_rate": 0.0005998644042413624, "loss": 2.3297, "step": 863 }, { "epoch": 0.03, "grad_norm": 0.6174125075340271, "learning_rate": 0.0005998640898402633, "loss": 2.3527, "step": 864 }, { "epoch": 0.03, "grad_norm": 0.6612250208854675, "learning_rate": 0.0005998637750751735, "loss": 2.2979, "step": 865 }, { "epoch": 0.03, "grad_norm": 0.5969744324684143, "learning_rate": 0.0005998634599460932, "loss": 2.4007, "step": 866 }, { "epoch": 0.03, "grad_norm": 0.6355924010276794, "learning_rate": 0.0005998631444530229, "loss": 2.2619, "step": 867 }, { "epoch": 0.03, "grad_norm": 0.6107147932052612, "learning_rate": 0.0005998628285959628, "loss": 2.2931, "step": 868 }, { "epoch": 0.03, "grad_norm": 0.6338801980018616, "learning_rate": 0.0005998625123749135, "loss": 2.3492, "step": 869 }, { "epoch": 0.03, "grad_norm": 0.626558244228363, "learning_rate": 0.0005998621957898754, "loss": 2.3287, "step": 870 }, { "epoch": 0.03, "grad_norm": 0.6224081516265869, "learning_rate": 0.0005998618788408485, "loss": 2.3948, "step": 871 }, { "epoch": 0.03, "grad_norm": 0.6225354671478271, "learning_rate": 0.0005998615615278337, "loss": 2.3162, "step": 872 }, { "epoch": 0.03, "grad_norm": 0.6242054104804993, "learning_rate": 0.0005998612438508311, "loss": 2.3639, "step": 873 }, { "epoch": 0.03, "grad_norm": 0.627447783946991, "learning_rate": 0.0005998609258098411, "loss": 2.2594, "step": 874 }, { "epoch": 0.03, "grad_norm": 0.6173492670059204, "learning_rate": 0.0005998606074048641, "loss": 2.2734, "step": 875 }, { "epoch": 0.03, "grad_norm": 0.6178213357925415, "learning_rate": 0.0005998602886359006, "loss": 2.3497, "step": 876 }, { "epoch": 0.03, "grad_norm": 0.6600163578987122, "learning_rate": 0.0005998599695029508, "loss": 2.3091, "step": 877 }, { "epoch": 0.03, "grad_norm": 0.6220263838768005, "learning_rate": 0.0005998596500060152, "loss": 2.2778, "step": 878 }, { "epoch": 0.03, "grad_norm": 0.6285505890846252, "learning_rate": 0.0005998593301450941, "loss": 2.2989, "step": 879 }, { "epoch": 0.03, "grad_norm": 0.6735250353813171, "learning_rate": 0.000599859009920188, "loss": 2.2746, "step": 880 }, { "epoch": 0.03, "grad_norm": 0.6382323503494263, "learning_rate": 0.0005998586893312973, "loss": 2.4235, "step": 881 }, { "epoch": 0.03, "grad_norm": 0.5975667834281921, "learning_rate": 0.0005998583683784223, "loss": 2.2639, "step": 882 }, { "epoch": 0.03, "grad_norm": 0.6582968235015869, "learning_rate": 0.0005998580470615635, "loss": 2.2617, "step": 883 }, { "epoch": 0.03, "grad_norm": 0.6080236434936523, "learning_rate": 0.0005998577253807211, "loss": 2.2547, "step": 884 }, { "epoch": 0.03, "grad_norm": 0.673193097114563, "learning_rate": 0.0005998574033358956, "loss": 2.4451, "step": 885 }, { "epoch": 0.03, "grad_norm": 0.5991887450218201, "learning_rate": 0.0005998570809270875, "loss": 2.3106, "step": 886 }, { "epoch": 0.03, "grad_norm": 0.6332494616508484, "learning_rate": 0.000599856758154297, "loss": 2.3174, "step": 887 }, { "epoch": 0.03, "grad_norm": 0.6065515279769897, "learning_rate": 0.0005998564350175246, "loss": 2.3288, "step": 888 }, { "epoch": 0.03, "grad_norm": 0.6866530179977417, "learning_rate": 0.0005998561115167706, "loss": 2.3525, "step": 889 }, { "epoch": 0.03, "grad_norm": 0.6356687545776367, "learning_rate": 0.0005998557876520355, "loss": 2.3324, "step": 890 }, { "epoch": 0.03, "grad_norm": 0.6847507357597351, "learning_rate": 0.0005998554634233197, "loss": 2.332, "step": 891 }, { "epoch": 0.03, "grad_norm": 0.6175287961959839, "learning_rate": 0.0005998551388306236, "loss": 2.3633, "step": 892 }, { "epoch": 0.03, "grad_norm": 0.6439458727836609, "learning_rate": 0.0005998548138739474, "loss": 2.2327, "step": 893 }, { "epoch": 0.03, "grad_norm": 0.635466456413269, "learning_rate": 0.0005998544885532917, "loss": 2.3297, "step": 894 }, { "epoch": 0.03, "grad_norm": 0.6174236536026001, "learning_rate": 0.0005998541628686569, "loss": 2.3151, "step": 895 }, { "epoch": 0.03, "grad_norm": 0.616378664970398, "learning_rate": 0.0005998538368200433, "loss": 2.3002, "step": 896 }, { "epoch": 0.03, "grad_norm": 0.6642088890075684, "learning_rate": 0.0005998535104074512, "loss": 2.3228, "step": 897 }, { "epoch": 0.03, "grad_norm": 0.6649506688117981, "learning_rate": 0.0005998531836308812, "loss": 2.3651, "step": 898 }, { "epoch": 0.03, "grad_norm": 0.6222115159034729, "learning_rate": 0.0005998528564903337, "loss": 2.3174, "step": 899 }, { "epoch": 0.03, "grad_norm": 0.6015220284461975, "learning_rate": 0.0005998525289858089, "loss": 2.3414, "step": 900 }, { "epoch": 0.03, "grad_norm": 0.640418291091919, "learning_rate": 0.0005998522011173074, "loss": 2.3081, "step": 901 }, { "epoch": 0.03, "grad_norm": 0.6199432611465454, "learning_rate": 0.0005998518728848295, "loss": 2.4142, "step": 902 }, { "epoch": 0.03, "grad_norm": 0.6275074481964111, "learning_rate": 0.0005998515442883755, "loss": 2.3657, "step": 903 }, { "epoch": 0.03, "grad_norm": 0.7171419262886047, "learning_rate": 0.000599851215327946, "loss": 2.3338, "step": 904 }, { "epoch": 0.03, "grad_norm": 0.6200520396232605, "learning_rate": 0.0005998508860035413, "loss": 2.3782, "step": 905 }, { "epoch": 0.03, "grad_norm": 0.6962419748306274, "learning_rate": 0.0005998505563151619, "loss": 2.374, "step": 906 }, { "epoch": 0.03, "grad_norm": 0.6189239025115967, "learning_rate": 0.0005998502262628078, "loss": 2.3853, "step": 907 }, { "epoch": 0.03, "grad_norm": 0.6392756700515747, "learning_rate": 0.00059984989584648, "loss": 2.3391, "step": 908 }, { "epoch": 0.03, "grad_norm": 0.6713096499443054, "learning_rate": 0.0005998495650661786, "loss": 2.2638, "step": 909 }, { "epoch": 0.03, "grad_norm": 0.6336117386817932, "learning_rate": 0.0005998492339219038, "loss": 2.3849, "step": 910 }, { "epoch": 0.03, "grad_norm": 0.6729277968406677, "learning_rate": 0.0005998489024136563, "loss": 2.2904, "step": 911 }, { "epoch": 0.03, "grad_norm": 0.611825168132782, "learning_rate": 0.0005998485705414365, "loss": 2.3592, "step": 912 }, { "epoch": 0.03, "grad_norm": 0.6221752166748047, "learning_rate": 0.0005998482383052446, "loss": 2.341, "step": 913 }, { "epoch": 0.03, "grad_norm": 0.6385249495506287, "learning_rate": 0.0005998479057050812, "loss": 2.2744, "step": 914 }, { "epoch": 0.03, "grad_norm": 0.6038006544113159, "learning_rate": 0.0005998475727409466, "loss": 2.2698, "step": 915 }, { "epoch": 0.03, "grad_norm": 0.6370606422424316, "learning_rate": 0.0005998472394128413, "loss": 2.3498, "step": 916 }, { "epoch": 0.03, "grad_norm": 0.622831404209137, "learning_rate": 0.0005998469057207654, "loss": 2.2852, "step": 917 }, { "epoch": 0.03, "grad_norm": 0.6324684023857117, "learning_rate": 0.0005998465716647198, "loss": 2.3357, "step": 918 }, { "epoch": 0.03, "grad_norm": 0.6036524176597595, "learning_rate": 0.0005998462372447046, "loss": 2.3125, "step": 919 }, { "epoch": 0.03, "grad_norm": 0.6170434951782227, "learning_rate": 0.0005998459024607202, "loss": 2.3774, "step": 920 }, { "epoch": 0.03, "grad_norm": 0.6437896490097046, "learning_rate": 0.000599845567312767, "loss": 2.268, "step": 921 }, { "epoch": 0.03, "grad_norm": 0.6468507647514343, "learning_rate": 0.0005998452318008454, "loss": 2.2662, "step": 922 }, { "epoch": 0.03, "grad_norm": 0.606465220451355, "learning_rate": 0.0005998448959249561, "loss": 2.3322, "step": 923 }, { "epoch": 0.03, "grad_norm": 0.5963767170906067, "learning_rate": 0.000599844559685099, "loss": 2.3516, "step": 924 }, { "epoch": 0.03, "grad_norm": 0.661973774433136, "learning_rate": 0.000599844223081275, "loss": 2.2948, "step": 925 }, { "epoch": 0.03, "grad_norm": 0.6441019177436829, "learning_rate": 0.0005998438861134842, "loss": 2.2813, "step": 926 }, { "epoch": 0.03, "grad_norm": 0.6164405345916748, "learning_rate": 0.0005998435487817271, "loss": 2.2965, "step": 927 }, { "epoch": 0.03, "grad_norm": 0.6457834243774414, "learning_rate": 0.0005998432110860042, "loss": 2.3621, "step": 928 }, { "epoch": 0.03, "grad_norm": 0.6296787858009338, "learning_rate": 0.0005998428730263158, "loss": 2.3498, "step": 929 }, { "epoch": 0.03, "grad_norm": 0.6503202319145203, "learning_rate": 0.0005998425346026622, "loss": 2.3581, "step": 930 }, { "epoch": 0.03, "grad_norm": 0.627009928226471, "learning_rate": 0.000599842195815044, "loss": 2.2831, "step": 931 }, { "epoch": 0.03, "grad_norm": 0.6261371970176697, "learning_rate": 0.0005998418566634616, "loss": 2.2736, "step": 932 }, { "epoch": 0.03, "grad_norm": 0.6390617489814758, "learning_rate": 0.0005998415171479154, "loss": 2.4045, "step": 933 }, { "epoch": 0.03, "grad_norm": 0.6404328346252441, "learning_rate": 0.0005998411772684057, "loss": 2.2376, "step": 934 }, { "epoch": 0.03, "grad_norm": 0.615889847278595, "learning_rate": 0.000599840837024933, "loss": 2.305, "step": 935 }, { "epoch": 0.03, "grad_norm": 0.6433776021003723, "learning_rate": 0.0005998404964174977, "loss": 2.3486, "step": 936 }, { "epoch": 0.03, "grad_norm": 0.6349501013755798, "learning_rate": 0.0005998401554461004, "loss": 2.3301, "step": 937 }, { "epoch": 0.03, "grad_norm": 0.6193860173225403, "learning_rate": 0.0005998398141107411, "loss": 2.1944, "step": 938 }, { "epoch": 0.03, "grad_norm": 0.6333768963813782, "learning_rate": 0.0005998394724114204, "loss": 2.3294, "step": 939 }, { "epoch": 0.03, "grad_norm": 0.6507761478424072, "learning_rate": 0.0005998391303481391, "loss": 2.2907, "step": 940 }, { "epoch": 0.03, "grad_norm": 0.6162818074226379, "learning_rate": 0.0005998387879208969, "loss": 2.2402, "step": 941 }, { "epoch": 0.03, "grad_norm": 0.6238299012184143, "learning_rate": 0.0005998384451296948, "loss": 2.3297, "step": 942 }, { "epoch": 0.03, "grad_norm": 0.6356513500213623, "learning_rate": 0.0005998381019745329, "loss": 2.3315, "step": 943 }, { "epoch": 0.03, "grad_norm": 0.6341591477394104, "learning_rate": 0.0005998377584554118, "loss": 2.3515, "step": 944 }, { "epoch": 0.03, "grad_norm": 0.624022901058197, "learning_rate": 0.0005998374145723319, "loss": 2.2292, "step": 945 }, { "epoch": 0.03, "grad_norm": 0.6109688878059387, "learning_rate": 0.0005998370703252934, "loss": 2.3118, "step": 946 }, { "epoch": 0.03, "grad_norm": 0.6107144951820374, "learning_rate": 0.0005998367257142969, "loss": 2.2489, "step": 947 }, { "epoch": 0.03, "grad_norm": 0.6250410079956055, "learning_rate": 0.0005998363807393429, "loss": 2.305, "step": 948 }, { "epoch": 0.03, "grad_norm": 0.631946325302124, "learning_rate": 0.0005998360354004316, "loss": 2.3205, "step": 949 }, { "epoch": 0.03, "grad_norm": 0.6074698567390442, "learning_rate": 0.0005998356896975637, "loss": 2.2623, "step": 950 }, { "epoch": 0.03, "grad_norm": 0.6082449555397034, "learning_rate": 0.0005998353436307393, "loss": 2.3025, "step": 951 }, { "epoch": 0.03, "grad_norm": 0.6588671207427979, "learning_rate": 0.0005998349971999589, "loss": 2.3764, "step": 952 }, { "epoch": 0.03, "grad_norm": 0.5981591939926147, "learning_rate": 0.0005998346504052232, "loss": 2.2675, "step": 953 }, { "epoch": 0.03, "grad_norm": 0.6307389140129089, "learning_rate": 0.0005998343032465323, "loss": 2.2624, "step": 954 }, { "epoch": 0.03, "grad_norm": 0.6344963312149048, "learning_rate": 0.0005998339557238868, "loss": 2.2708, "step": 955 }, { "epoch": 0.03, "grad_norm": 0.5956718325614929, "learning_rate": 0.0005998336078372869, "loss": 2.3218, "step": 956 }, { "epoch": 0.03, "grad_norm": 0.5948575735092163, "learning_rate": 0.0005998332595867333, "loss": 2.3144, "step": 957 }, { "epoch": 0.03, "grad_norm": 0.6647286415100098, "learning_rate": 0.0005998329109722263, "loss": 2.3818, "step": 958 }, { "epoch": 0.03, "grad_norm": 0.6227065324783325, "learning_rate": 0.0005998325619937663, "loss": 2.3106, "step": 959 }, { "epoch": 0.03, "grad_norm": 0.6775912046432495, "learning_rate": 0.0005998322126513538, "loss": 2.2956, "step": 960 }, { "epoch": 0.03, "grad_norm": 0.6332776546478271, "learning_rate": 0.0005998318629449891, "loss": 2.3649, "step": 961 }, { "epoch": 0.03, "grad_norm": 0.606164813041687, "learning_rate": 0.0005998315128746727, "loss": 2.317, "step": 962 }, { "epoch": 0.03, "grad_norm": 0.6600996255874634, "learning_rate": 0.000599831162440405, "loss": 2.2872, "step": 963 }, { "epoch": 0.03, "grad_norm": 0.6280790567398071, "learning_rate": 0.0005998308116421866, "loss": 2.3483, "step": 964 }, { "epoch": 0.03, "grad_norm": 0.6263936161994934, "learning_rate": 0.0005998304604800176, "loss": 2.3087, "step": 965 }, { "epoch": 0.03, "grad_norm": 0.6696301102638245, "learning_rate": 0.0005998301089538986, "loss": 2.3172, "step": 966 }, { "epoch": 0.03, "grad_norm": 0.6252235770225525, "learning_rate": 0.0005998297570638302, "loss": 2.3776, "step": 967 }, { "epoch": 0.03, "grad_norm": 0.7112988233566284, "learning_rate": 0.0005998294048098125, "loss": 2.3462, "step": 968 }, { "epoch": 0.03, "grad_norm": 0.6188958287239075, "learning_rate": 0.0005998290521918462, "loss": 2.3413, "step": 969 }, { "epoch": 0.03, "grad_norm": 0.6140614748001099, "learning_rate": 0.0005998286992099314, "loss": 2.3504, "step": 970 }, { "epoch": 0.03, "grad_norm": 0.6555911898612976, "learning_rate": 0.0005998283458640689, "loss": 2.3709, "step": 971 }, { "epoch": 0.03, "grad_norm": 0.5998312830924988, "learning_rate": 0.000599827992154259, "loss": 2.3091, "step": 972 }, { "epoch": 0.03, "grad_norm": 0.5970577001571655, "learning_rate": 0.000599827638080502, "loss": 2.2647, "step": 973 }, { "epoch": 0.03, "grad_norm": 0.6054071187973022, "learning_rate": 0.0005998272836427984, "loss": 2.2943, "step": 974 }, { "epoch": 0.03, "grad_norm": 0.5896942615509033, "learning_rate": 0.0005998269288411488, "loss": 2.2867, "step": 975 }, { "epoch": 0.03, "grad_norm": 0.6087167263031006, "learning_rate": 0.0005998265736755534, "loss": 2.2957, "step": 976 }, { "epoch": 0.03, "grad_norm": 0.6010048985481262, "learning_rate": 0.0005998262181460127, "loss": 2.3047, "step": 977 }, { "epoch": 0.03, "grad_norm": 0.6012806296348572, "learning_rate": 0.0005998258622525271, "loss": 2.1875, "step": 978 }, { "epoch": 0.03, "grad_norm": 0.6014810800552368, "learning_rate": 0.0005998255059950972, "loss": 2.3008, "step": 979 }, { "epoch": 0.03, "grad_norm": 0.6230045557022095, "learning_rate": 0.0005998251493737231, "loss": 2.2462, "step": 980 }, { "epoch": 0.03, "grad_norm": 0.5841121673583984, "learning_rate": 0.0005998247923884056, "loss": 2.3055, "step": 981 }, { "epoch": 0.03, "grad_norm": 0.6691957116127014, "learning_rate": 0.0005998244350391451, "loss": 2.2759, "step": 982 }, { "epoch": 0.03, "grad_norm": 0.6195612549781799, "learning_rate": 0.0005998240773259418, "loss": 2.2594, "step": 983 }, { "epoch": 0.03, "grad_norm": 0.614376425743103, "learning_rate": 0.0005998237192487962, "loss": 2.2701, "step": 984 }, { "epoch": 0.03, "grad_norm": 0.6603174209594727, "learning_rate": 0.0005998233608077088, "loss": 2.3132, "step": 985 }, { "epoch": 0.03, "grad_norm": 0.6339285373687744, "learning_rate": 0.0005998230020026801, "loss": 2.3246, "step": 986 }, { "epoch": 0.03, "grad_norm": 0.6408608555793762, "learning_rate": 0.0005998226428337104, "loss": 2.346, "step": 987 }, { "epoch": 0.03, "grad_norm": 0.6475784778594971, "learning_rate": 0.0005998222833008001, "loss": 2.2533, "step": 988 }, { "epoch": 0.03, "grad_norm": 0.6342695951461792, "learning_rate": 0.0005998219234039498, "loss": 2.3353, "step": 989 }, { "epoch": 0.03, "grad_norm": 0.6566219925880432, "learning_rate": 0.00059982156314316, "loss": 2.3384, "step": 990 }, { "epoch": 0.03, "grad_norm": 0.6713873147964478, "learning_rate": 0.0005998212025184308, "loss": 2.3099, "step": 991 }, { "epoch": 0.03, "grad_norm": 0.653944730758667, "learning_rate": 0.0005998208415297629, "loss": 2.345, "step": 992 }, { "epoch": 0.03, "grad_norm": 0.6521421670913696, "learning_rate": 0.0005998204801771566, "loss": 2.3335, "step": 993 }, { "epoch": 0.03, "grad_norm": 0.6595160365104675, "learning_rate": 0.0005998201184606125, "loss": 2.2421, "step": 994 }, { "epoch": 0.03, "grad_norm": 0.6571509838104248, "learning_rate": 0.000599819756380131, "loss": 2.3886, "step": 995 }, { "epoch": 0.03, "grad_norm": 0.6501753330230713, "learning_rate": 0.0005998193939357124, "loss": 2.2913, "step": 996 }, { "epoch": 0.03, "grad_norm": 0.6706587076187134, "learning_rate": 0.0005998190311273572, "loss": 2.2055, "step": 997 }, { "epoch": 0.03, "grad_norm": 0.6269235610961914, "learning_rate": 0.000599818667955066, "loss": 2.2591, "step": 998 }, { "epoch": 0.03, "grad_norm": 0.625082790851593, "learning_rate": 0.000599818304418839, "loss": 2.3668, "step": 999 }, { "epoch": 0.03, "grad_norm": 0.6528321504592896, "learning_rate": 0.0005998179405186769, "loss": 2.2401, "step": 1000 }, { "epoch": 0.03, "grad_norm": 0.6552815437316895, "learning_rate": 0.0005998175762545799, "loss": 2.3039, "step": 1001 }, { "epoch": 0.03, "grad_norm": 0.6269344687461853, "learning_rate": 0.0005998172116265485, "loss": 2.3485, "step": 1002 }, { "epoch": 0.03, "grad_norm": 0.6459618210792542, "learning_rate": 0.0005998168466345832, "loss": 2.3003, "step": 1003 }, { "epoch": 0.03, "grad_norm": 0.6112393140792847, "learning_rate": 0.0005998164812786844, "loss": 2.2018, "step": 1004 }, { "epoch": 0.03, "grad_norm": 0.6573013067245483, "learning_rate": 0.0005998161155588527, "loss": 2.2463, "step": 1005 }, { "epoch": 0.03, "grad_norm": 0.6204313635826111, "learning_rate": 0.0005998157494750883, "loss": 2.2151, "step": 1006 }, { "epoch": 0.03, "grad_norm": 0.736726701259613, "learning_rate": 0.0005998153830273918, "loss": 2.3876, "step": 1007 }, { "epoch": 0.03, "grad_norm": 0.6276524662971497, "learning_rate": 0.0005998150162157636, "loss": 2.2733, "step": 1008 }, { "epoch": 0.03, "grad_norm": 0.6432652473449707, "learning_rate": 0.000599814649040204, "loss": 2.2384, "step": 1009 }, { "epoch": 0.03, "grad_norm": 0.6580753922462463, "learning_rate": 0.0005998142815007139, "loss": 2.3382, "step": 1010 }, { "epoch": 0.03, "grad_norm": 0.658824622631073, "learning_rate": 0.0005998139135972932, "loss": 2.307, "step": 1011 }, { "epoch": 0.03, "grad_norm": 0.639038622379303, "learning_rate": 0.0005998135453299426, "loss": 2.3068, "step": 1012 }, { "epoch": 0.03, "grad_norm": 0.6487348079681396, "learning_rate": 0.0005998131766986626, "loss": 2.3052, "step": 1013 }, { "epoch": 0.03, "grad_norm": 0.6193649172782898, "learning_rate": 0.0005998128077034535, "loss": 2.2924, "step": 1014 }, { "epoch": 0.03, "grad_norm": 0.6436378359794617, "learning_rate": 0.000599812438344316, "loss": 2.3466, "step": 1015 }, { "epoch": 0.03, "grad_norm": 0.6261348724365234, "learning_rate": 0.0005998120686212502, "loss": 2.1989, "step": 1016 }, { "epoch": 0.03, "grad_norm": 0.622144877910614, "learning_rate": 0.0005998116985342567, "loss": 2.1677, "step": 1017 }, { "epoch": 0.03, "grad_norm": 0.627873957157135, "learning_rate": 0.0005998113280833361, "loss": 2.2631, "step": 1018 }, { "epoch": 0.03, "grad_norm": 0.640912652015686, "learning_rate": 0.0005998109572684886, "loss": 2.3655, "step": 1019 }, { "epoch": 0.03, "grad_norm": 0.6241394877433777, "learning_rate": 0.000599810586089715, "loss": 2.3357, "step": 1020 }, { "epoch": 0.03, "grad_norm": 0.6596463918685913, "learning_rate": 0.0005998102145470152, "loss": 2.2142, "step": 1021 }, { "epoch": 0.03, "grad_norm": 0.641255259513855, "learning_rate": 0.0005998098426403903, "loss": 2.2857, "step": 1022 }, { "epoch": 0.03, "grad_norm": 0.648051381111145, "learning_rate": 0.0005998094703698402, "loss": 2.2539, "step": 1023 }, { "epoch": 0.03, "grad_norm": 0.6611939668655396, "learning_rate": 0.0005998090977353657, "loss": 2.2831, "step": 1024 }, { "epoch": 0.03, "grad_norm": 0.593207836151123, "learning_rate": 0.000599808724736967, "loss": 2.24, "step": 1025 }, { "epoch": 0.03, "grad_norm": 0.6114387512207031, "learning_rate": 0.0005998083513746447, "loss": 2.3154, "step": 1026 }, { "epoch": 0.03, "grad_norm": 0.6514421105384827, "learning_rate": 0.0005998079776483994, "loss": 2.2907, "step": 1027 }, { "epoch": 0.03, "grad_norm": 0.6311531066894531, "learning_rate": 0.0005998076035582312, "loss": 2.2837, "step": 1028 }, { "epoch": 0.03, "grad_norm": 0.6256716251373291, "learning_rate": 0.0005998072291041409, "loss": 2.2605, "step": 1029 }, { "epoch": 0.03, "grad_norm": 0.6220216155052185, "learning_rate": 0.0005998068542861287, "loss": 2.2857, "step": 1030 }, { "epoch": 0.03, "grad_norm": 0.6039713621139526, "learning_rate": 0.0005998064791041952, "loss": 2.2066, "step": 1031 }, { "epoch": 0.03, "grad_norm": 0.6550697088241577, "learning_rate": 0.0005998061035583409, "loss": 2.2732, "step": 1032 }, { "epoch": 0.03, "grad_norm": 0.6429526805877686, "learning_rate": 0.000599805727648566, "loss": 2.2648, "step": 1033 }, { "epoch": 0.03, "grad_norm": 0.5942524075508118, "learning_rate": 0.0005998053513748712, "loss": 2.2646, "step": 1034 }, { "epoch": 0.03, "grad_norm": 0.6338121891021729, "learning_rate": 0.0005998049747372567, "loss": 2.3345, "step": 1035 }, { "epoch": 0.03, "grad_norm": 0.6339685320854187, "learning_rate": 0.0005998045977357233, "loss": 2.2716, "step": 1036 }, { "epoch": 0.03, "grad_norm": 0.69040846824646, "learning_rate": 0.0005998042203702714, "loss": 2.2534, "step": 1037 }, { "epoch": 0.03, "grad_norm": 0.6131763458251953, "learning_rate": 0.0005998038426409012, "loss": 2.2, "step": 1038 }, { "epoch": 0.03, "grad_norm": 0.6220892071723938, "learning_rate": 0.0005998034645476133, "loss": 2.3034, "step": 1039 }, { "epoch": 0.03, "grad_norm": 0.6264079809188843, "learning_rate": 0.0005998030860904082, "loss": 2.3025, "step": 1040 }, { "epoch": 0.03, "grad_norm": 0.5926225781440735, "learning_rate": 0.0005998027072692864, "loss": 2.2455, "step": 1041 }, { "epoch": 0.03, "grad_norm": 0.6489809155464172, "learning_rate": 0.0005998023280842481, "loss": 2.3746, "step": 1042 }, { "epoch": 0.03, "grad_norm": 0.6396483182907104, "learning_rate": 0.0005998019485352941, "loss": 2.2918, "step": 1043 }, { "epoch": 0.03, "grad_norm": 0.6168232560157776, "learning_rate": 0.0005998015686224247, "loss": 2.2679, "step": 1044 }, { "epoch": 0.03, "grad_norm": 0.6778334379196167, "learning_rate": 0.0005998011883456404, "loss": 2.3267, "step": 1045 }, { "epoch": 0.03, "grad_norm": 0.5891451835632324, "learning_rate": 0.0005998008077049416, "loss": 2.2507, "step": 1046 }, { "epoch": 0.03, "grad_norm": 0.6240523457527161, "learning_rate": 0.0005998004267003288, "loss": 2.2101, "step": 1047 }, { "epoch": 0.03, "grad_norm": 0.6250547766685486, "learning_rate": 0.0005998000453318024, "loss": 2.3094, "step": 1048 }, { "epoch": 0.03, "grad_norm": 0.5869793891906738, "learning_rate": 0.0005997996635993629, "loss": 2.2752, "step": 1049 }, { "epoch": 0.03, "grad_norm": 0.6097632050514221, "learning_rate": 0.0005997992815030108, "loss": 2.2733, "step": 1050 }, { "epoch": 0.03, "grad_norm": 0.629566490650177, "learning_rate": 0.0005997988990427466, "loss": 2.2913, "step": 1051 }, { "epoch": 0.04, "grad_norm": 0.6308552026748657, "learning_rate": 0.0005997985162185707, "loss": 2.3617, "step": 1052 }, { "epoch": 0.04, "grad_norm": 0.598038911819458, "learning_rate": 0.0005997981330304836, "loss": 2.2786, "step": 1053 }, { "epoch": 0.04, "grad_norm": 0.5910422205924988, "learning_rate": 0.0005997977494784856, "loss": 2.2193, "step": 1054 }, { "epoch": 0.04, "grad_norm": 0.6678149700164795, "learning_rate": 0.0005997973655625775, "loss": 2.2956, "step": 1055 }, { "epoch": 0.04, "grad_norm": 0.6285964250564575, "learning_rate": 0.0005997969812827595, "loss": 2.2899, "step": 1056 }, { "epoch": 0.04, "grad_norm": 0.6283600926399231, "learning_rate": 0.000599796596639032, "loss": 2.3287, "step": 1057 }, { "epoch": 0.04, "grad_norm": 0.6935529112815857, "learning_rate": 0.0005997962116313958, "loss": 2.2709, "step": 1058 }, { "epoch": 0.04, "grad_norm": 0.6233239769935608, "learning_rate": 0.0005997958262598511, "loss": 2.296, "step": 1059 }, { "epoch": 0.04, "grad_norm": 0.6407439708709717, "learning_rate": 0.0005997954405243984, "loss": 2.2528, "step": 1060 }, { "epoch": 0.04, "grad_norm": 0.6417344212532043, "learning_rate": 0.0005997950544250382, "loss": 2.3611, "step": 1061 }, { "epoch": 0.04, "grad_norm": 0.6475839614868164, "learning_rate": 0.000599794667961771, "loss": 2.3031, "step": 1062 }, { "epoch": 0.04, "grad_norm": 0.7122947573661804, "learning_rate": 0.0005997942811345972, "loss": 2.3472, "step": 1063 }, { "epoch": 0.04, "grad_norm": 0.6109170317649841, "learning_rate": 0.0005997938939435176, "loss": 2.2388, "step": 1064 }, { "epoch": 0.04, "grad_norm": 0.6650266647338867, "learning_rate": 0.0005997935063885321, "loss": 2.3213, "step": 1065 }, { "epoch": 0.04, "grad_norm": 0.639949381351471, "learning_rate": 0.0005997931184696415, "loss": 2.1918, "step": 1066 }, { "epoch": 0.04, "grad_norm": 0.5941828489303589, "learning_rate": 0.0005997927301868462, "loss": 2.3288, "step": 1067 }, { "epoch": 0.04, "grad_norm": 0.6807713508605957, "learning_rate": 0.0005997923415401468, "loss": 2.2641, "step": 1068 }, { "epoch": 0.04, "grad_norm": 0.6472964882850647, "learning_rate": 0.0005997919525295435, "loss": 2.2146, "step": 1069 }, { "epoch": 0.04, "grad_norm": 0.6660020351409912, "learning_rate": 0.0005997915631550372, "loss": 2.1986, "step": 1070 }, { "epoch": 0.04, "grad_norm": 0.6144211888313293, "learning_rate": 0.0005997911734166279, "loss": 2.2083, "step": 1071 }, { "epoch": 0.04, "grad_norm": 0.6105664372444153, "learning_rate": 0.0005997907833143163, "loss": 2.2839, "step": 1072 }, { "epoch": 0.04, "grad_norm": 0.6107708811759949, "learning_rate": 0.0005997903928481029, "loss": 2.3508, "step": 1073 }, { "epoch": 0.04, "grad_norm": 0.6062290668487549, "learning_rate": 0.0005997900020179882, "loss": 2.2618, "step": 1074 }, { "epoch": 0.04, "grad_norm": 0.6133992075920105, "learning_rate": 0.0005997896108239726, "loss": 2.2134, "step": 1075 }, { "epoch": 0.04, "grad_norm": 0.6004509925842285, "learning_rate": 0.0005997892192660566, "loss": 2.2875, "step": 1076 }, { "epoch": 0.04, "grad_norm": 0.6025480031967163, "learning_rate": 0.0005997888273442405, "loss": 2.2646, "step": 1077 }, { "epoch": 0.04, "grad_norm": 0.597833514213562, "learning_rate": 0.000599788435058525, "loss": 2.2925, "step": 1078 }, { "epoch": 0.04, "grad_norm": 0.6427432298660278, "learning_rate": 0.0005997880424089106, "loss": 2.272, "step": 1079 }, { "epoch": 0.04, "grad_norm": 0.6260206699371338, "learning_rate": 0.0005997876493953977, "loss": 2.2261, "step": 1080 }, { "epoch": 0.04, "grad_norm": 0.6362218856811523, "learning_rate": 0.0005997872560179866, "loss": 2.2449, "step": 1081 }, { "epoch": 0.04, "grad_norm": 0.6009458303451538, "learning_rate": 0.0005997868622766781, "loss": 2.1872, "step": 1082 }, { "epoch": 0.04, "grad_norm": 0.6555795669555664, "learning_rate": 0.0005997864681714724, "loss": 2.3137, "step": 1083 }, { "epoch": 0.04, "grad_norm": 0.6060916781425476, "learning_rate": 0.0005997860737023702, "loss": 2.303, "step": 1084 }, { "epoch": 0.04, "grad_norm": 0.579628050327301, "learning_rate": 0.0005997856788693719, "loss": 2.2696, "step": 1085 }, { "epoch": 0.04, "grad_norm": 0.6043224334716797, "learning_rate": 0.0005997852836724779, "loss": 2.2405, "step": 1086 }, { "epoch": 0.04, "grad_norm": 0.618512749671936, "learning_rate": 0.0005997848881116887, "loss": 2.322, "step": 1087 }, { "epoch": 0.04, "grad_norm": 0.612895667552948, "learning_rate": 0.0005997844921870049, "loss": 2.3386, "step": 1088 }, { "epoch": 0.04, "grad_norm": 0.5967035889625549, "learning_rate": 0.0005997840958984268, "loss": 2.3062, "step": 1089 }, { "epoch": 0.04, "grad_norm": 0.617530107498169, "learning_rate": 0.0005997836992459551, "loss": 2.2623, "step": 1090 }, { "epoch": 0.04, "grad_norm": 0.6277105808258057, "learning_rate": 0.00059978330222959, "loss": 2.3273, "step": 1091 }, { "epoch": 0.04, "grad_norm": 0.5938559174537659, "learning_rate": 0.0005997829048493323, "loss": 2.2672, "step": 1092 }, { "epoch": 0.04, "grad_norm": 0.6029059290885925, "learning_rate": 0.0005997825071051823, "loss": 2.2488, "step": 1093 }, { "epoch": 0.04, "grad_norm": 0.6082279682159424, "learning_rate": 0.0005997821089971404, "loss": 2.2666, "step": 1094 }, { "epoch": 0.04, "grad_norm": 0.6796607971191406, "learning_rate": 0.0005997817105252072, "loss": 2.3783, "step": 1095 }, { "epoch": 0.04, "grad_norm": 0.620513379573822, "learning_rate": 0.0005997813116893833, "loss": 2.2589, "step": 1096 }, { "epoch": 0.04, "grad_norm": 0.6322491765022278, "learning_rate": 0.000599780912489669, "loss": 2.2598, "step": 1097 }, { "epoch": 0.04, "grad_norm": 0.6160284876823425, "learning_rate": 0.0005997805129260647, "loss": 2.2699, "step": 1098 }, { "epoch": 0.04, "grad_norm": 0.592946469783783, "learning_rate": 0.0005997801129985713, "loss": 2.1864, "step": 1099 }, { "epoch": 0.04, "grad_norm": 0.6036491990089417, "learning_rate": 0.0005997797127071889, "loss": 2.3783, "step": 1100 }, { "epoch": 0.04, "grad_norm": 0.6005523800849915, "learning_rate": 0.000599779312051918, "loss": 2.2303, "step": 1101 }, { "epoch": 0.04, "grad_norm": 0.6152710914611816, "learning_rate": 0.0005997789110327594, "loss": 2.2866, "step": 1102 }, { "epoch": 0.04, "grad_norm": 0.644969642162323, "learning_rate": 0.0005997785096497131, "loss": 2.3016, "step": 1103 }, { "epoch": 0.04, "grad_norm": 0.6161428093910217, "learning_rate": 0.0005997781079027801, "loss": 2.2275, "step": 1104 }, { "epoch": 0.04, "grad_norm": 0.6364483833312988, "learning_rate": 0.0005997777057919605, "loss": 2.3218, "step": 1105 }, { "epoch": 0.04, "grad_norm": 0.6657497882843018, "learning_rate": 0.0005997773033172549, "loss": 2.2764, "step": 1106 }, { "epoch": 0.04, "grad_norm": 0.6458125710487366, "learning_rate": 0.000599776900478664, "loss": 2.3444, "step": 1107 }, { "epoch": 0.04, "grad_norm": 0.6399517059326172, "learning_rate": 0.0005997764972761879, "loss": 2.1936, "step": 1108 }, { "epoch": 0.04, "grad_norm": 0.6388619542121887, "learning_rate": 0.0005997760937098274, "loss": 2.3097, "step": 1109 }, { "epoch": 0.04, "grad_norm": 0.5925429463386536, "learning_rate": 0.0005997756897795829, "loss": 2.2739, "step": 1110 }, { "epoch": 0.04, "grad_norm": 0.6179702281951904, "learning_rate": 0.0005997752854854549, "loss": 2.2503, "step": 1111 }, { "epoch": 0.04, "grad_norm": 0.6324416399002075, "learning_rate": 0.0005997748808274438, "loss": 2.2823, "step": 1112 }, { "epoch": 0.04, "grad_norm": 0.6167786121368408, "learning_rate": 0.0005997744758055503, "loss": 2.3165, "step": 1113 }, { "epoch": 0.04, "grad_norm": 0.6122403144836426, "learning_rate": 0.0005997740704197747, "loss": 2.2288, "step": 1114 }, { "epoch": 0.04, "grad_norm": 0.6412245631217957, "learning_rate": 0.0005997736646701175, "loss": 2.2915, "step": 1115 }, { "epoch": 0.04, "grad_norm": 0.6239608526229858, "learning_rate": 0.0005997732585565793, "loss": 2.289, "step": 1116 }, { "epoch": 0.04, "grad_norm": 0.6334351897239685, "learning_rate": 0.0005997728520791605, "loss": 2.2606, "step": 1117 }, { "epoch": 0.04, "grad_norm": 0.6353530883789062, "learning_rate": 0.0005997724452378616, "loss": 2.2688, "step": 1118 }, { "epoch": 0.04, "grad_norm": 0.5891547799110413, "learning_rate": 0.0005997720380326832, "loss": 2.2151, "step": 1119 }, { "epoch": 0.04, "grad_norm": 0.6624005436897278, "learning_rate": 0.0005997716304636257, "loss": 2.2628, "step": 1120 }, { "epoch": 0.04, "grad_norm": 0.6190704107284546, "learning_rate": 0.0005997712225306895, "loss": 2.2735, "step": 1121 }, { "epoch": 0.04, "grad_norm": 0.612879753112793, "learning_rate": 0.0005997708142338753, "loss": 2.2469, "step": 1122 }, { "epoch": 0.04, "grad_norm": 0.6705514192581177, "learning_rate": 0.0005997704055731834, "loss": 2.1721, "step": 1123 }, { "epoch": 0.04, "grad_norm": 0.6384900808334351, "learning_rate": 0.0005997699965486146, "loss": 2.3545, "step": 1124 }, { "epoch": 0.04, "grad_norm": 0.6205816864967346, "learning_rate": 0.000599769587160169, "loss": 2.2795, "step": 1125 }, { "epoch": 0.04, "grad_norm": 0.593275785446167, "learning_rate": 0.0005997691774078474, "loss": 2.2512, "step": 1126 }, { "epoch": 0.04, "grad_norm": 0.6278716325759888, "learning_rate": 0.0005997687672916501, "loss": 2.3112, "step": 1127 }, { "epoch": 0.04, "grad_norm": 0.6022181510925293, "learning_rate": 0.0005997683568115778, "loss": 2.3042, "step": 1128 }, { "epoch": 0.04, "grad_norm": 0.6388305425643921, "learning_rate": 0.0005997679459676308, "loss": 2.307, "step": 1129 }, { "epoch": 0.04, "grad_norm": 0.6360989212989807, "learning_rate": 0.0005997675347598096, "loss": 2.2009, "step": 1130 }, { "epoch": 0.04, "grad_norm": 0.6066082715988159, "learning_rate": 0.000599767123188115, "loss": 2.3462, "step": 1131 }, { "epoch": 0.04, "grad_norm": 0.6099976301193237, "learning_rate": 0.0005997667112525472, "loss": 2.2537, "step": 1132 }, { "epoch": 0.04, "grad_norm": 0.6206957697868347, "learning_rate": 0.0005997662989531067, "loss": 2.2904, "step": 1133 }, { "epoch": 0.04, "grad_norm": 0.602085530757904, "learning_rate": 0.0005997658862897941, "loss": 2.3153, "step": 1134 }, { "epoch": 0.04, "grad_norm": 0.6370981335639954, "learning_rate": 0.0005997654732626099, "loss": 2.2243, "step": 1135 }, { "epoch": 0.04, "grad_norm": 0.6326861381530762, "learning_rate": 0.0005997650598715546, "loss": 2.2475, "step": 1136 }, { "epoch": 0.04, "grad_norm": 0.602522611618042, "learning_rate": 0.0005997646461166287, "loss": 2.2419, "step": 1137 }, { "epoch": 0.04, "grad_norm": 0.6550295352935791, "learning_rate": 0.0005997642319978326, "loss": 2.275, "step": 1138 }, { "epoch": 0.04, "grad_norm": 0.6037681698799133, "learning_rate": 0.000599763817515167, "loss": 2.2651, "step": 1139 }, { "epoch": 0.04, "grad_norm": 0.6052004098892212, "learning_rate": 0.0005997634026686323, "loss": 2.2695, "step": 1140 }, { "epoch": 0.04, "grad_norm": 0.6424931287765503, "learning_rate": 0.0005997629874582289, "loss": 2.2134, "step": 1141 }, { "epoch": 0.04, "grad_norm": 0.5928449630737305, "learning_rate": 0.0005997625718839574, "loss": 2.2498, "step": 1142 }, { "epoch": 0.04, "grad_norm": 0.633556604385376, "learning_rate": 0.0005997621559458184, "loss": 2.2849, "step": 1143 }, { "epoch": 0.04, "grad_norm": 0.6130008697509766, "learning_rate": 0.0005997617396438123, "loss": 2.2387, "step": 1144 }, { "epoch": 0.04, "grad_norm": 0.6032556295394897, "learning_rate": 0.0005997613229779396, "loss": 2.2843, "step": 1145 }, { "epoch": 0.04, "grad_norm": 0.6234205961227417, "learning_rate": 0.0005997609059482006, "loss": 2.2357, "step": 1146 }, { "epoch": 0.04, "grad_norm": 0.6074967384338379, "learning_rate": 0.0005997604885545964, "loss": 2.262, "step": 1147 }, { "epoch": 0.04, "grad_norm": 0.5947065949440002, "learning_rate": 0.0005997600707971269, "loss": 2.2597, "step": 1148 }, { "epoch": 0.04, "grad_norm": 0.6168065667152405, "learning_rate": 0.000599759652675793, "loss": 2.2798, "step": 1149 }, { "epoch": 0.04, "grad_norm": 0.6234248876571655, "learning_rate": 0.0005997592341905949, "loss": 2.31, "step": 1150 }, { "epoch": 0.04, "grad_norm": 0.6406698822975159, "learning_rate": 0.0005997588153415334, "loss": 2.3632, "step": 1151 }, { "epoch": 0.04, "grad_norm": 0.6160318851470947, "learning_rate": 0.0005997583961286087, "loss": 2.2343, "step": 1152 }, { "epoch": 0.04, "grad_norm": 0.6332279443740845, "learning_rate": 0.0005997579765518215, "loss": 2.2125, "step": 1153 }, { "epoch": 0.04, "grad_norm": 0.6399019956588745, "learning_rate": 0.0005997575566111723, "loss": 2.2208, "step": 1154 }, { "epoch": 0.04, "grad_norm": 0.5937256217002869, "learning_rate": 0.0005997571363066617, "loss": 2.2304, "step": 1155 }, { "epoch": 0.04, "grad_norm": 0.5978190302848816, "learning_rate": 0.0005997567156382901, "loss": 2.278, "step": 1156 }, { "epoch": 0.04, "grad_norm": 0.6837303042411804, "learning_rate": 0.0005997562946060579, "loss": 2.3392, "step": 1157 }, { "epoch": 0.04, "grad_norm": 0.6095519661903381, "learning_rate": 0.0005997558732099657, "loss": 2.2667, "step": 1158 }, { "epoch": 0.04, "grad_norm": 0.6462730169296265, "learning_rate": 0.0005997554514500142, "loss": 2.2775, "step": 1159 }, { "epoch": 0.04, "grad_norm": 0.6009367108345032, "learning_rate": 0.0005997550293262037, "loss": 2.2163, "step": 1160 }, { "epoch": 0.04, "grad_norm": 0.6191241145133972, "learning_rate": 0.0005997546068385347, "loss": 2.2621, "step": 1161 }, { "epoch": 0.04, "grad_norm": 0.5932872891426086, "learning_rate": 0.0005997541839870077, "loss": 2.2745, "step": 1162 }, { "epoch": 0.04, "grad_norm": 0.6079362034797668, "learning_rate": 0.0005997537607716234, "loss": 2.286, "step": 1163 }, { "epoch": 0.04, "grad_norm": 0.6219671964645386, "learning_rate": 0.0005997533371923822, "loss": 2.1662, "step": 1164 }, { "epoch": 0.04, "grad_norm": 0.6261717081069946, "learning_rate": 0.0005997529132492846, "loss": 2.2451, "step": 1165 }, { "epoch": 0.04, "grad_norm": 0.6115270853042603, "learning_rate": 0.0005997524889423311, "loss": 2.1807, "step": 1166 }, { "epoch": 0.04, "grad_norm": 0.653418242931366, "learning_rate": 0.0005997520642715223, "loss": 2.3069, "step": 1167 }, { "epoch": 0.04, "grad_norm": 0.6334195137023926, "learning_rate": 0.0005997516392368586, "loss": 2.3608, "step": 1168 }, { "epoch": 0.04, "grad_norm": 0.6012588143348694, "learning_rate": 0.0005997512138383407, "loss": 2.2783, "step": 1169 }, { "epoch": 0.04, "grad_norm": 0.6919533610343933, "learning_rate": 0.0005997507880759689, "loss": 2.3276, "step": 1170 }, { "epoch": 0.04, "grad_norm": 0.6106324791908264, "learning_rate": 0.0005997503619497437, "loss": 2.1822, "step": 1171 }, { "epoch": 0.04, "grad_norm": 0.6244263052940369, "learning_rate": 0.0005997499354596659, "loss": 2.3113, "step": 1172 }, { "epoch": 0.04, "grad_norm": 0.6181286573410034, "learning_rate": 0.0005997495086057359, "loss": 2.2415, "step": 1173 }, { "epoch": 0.04, "grad_norm": 0.5932324528694153, "learning_rate": 0.0005997490813879539, "loss": 2.2789, "step": 1174 }, { "epoch": 0.04, "grad_norm": 0.6240153312683105, "learning_rate": 0.0005997486538063208, "loss": 2.1931, "step": 1175 }, { "epoch": 0.04, "grad_norm": 0.6019353270530701, "learning_rate": 0.0005997482258608371, "loss": 2.2496, "step": 1176 }, { "epoch": 0.04, "grad_norm": 0.6043697595596313, "learning_rate": 0.0005997477975515031, "loss": 2.1932, "step": 1177 }, { "epoch": 0.04, "grad_norm": 0.6271641254425049, "learning_rate": 0.0005997473688783195, "loss": 2.2523, "step": 1178 }, { "epoch": 0.04, "grad_norm": 0.630821168422699, "learning_rate": 0.0005997469398412868, "loss": 2.2425, "step": 1179 }, { "epoch": 0.04, "grad_norm": 0.6263039708137512, "learning_rate": 0.0005997465104404054, "loss": 2.2234, "step": 1180 }, { "epoch": 0.04, "grad_norm": 0.6299253702163696, "learning_rate": 0.0005997460806756758, "loss": 2.3012, "step": 1181 }, { "epoch": 0.04, "grad_norm": 0.6133812665939331, "learning_rate": 0.0005997456505470989, "loss": 2.2861, "step": 1182 }, { "epoch": 0.04, "grad_norm": 0.6305975317955017, "learning_rate": 0.0005997452200546747, "loss": 2.3132, "step": 1183 }, { "epoch": 0.04, "grad_norm": 0.6314399838447571, "learning_rate": 0.0005997447891984041, "loss": 2.2828, "step": 1184 }, { "epoch": 0.04, "grad_norm": 0.6254546046257019, "learning_rate": 0.0005997443579782875, "loss": 2.312, "step": 1185 }, { "epoch": 0.04, "grad_norm": 0.6475930213928223, "learning_rate": 0.0005997439263943254, "loss": 2.236, "step": 1186 }, { "epoch": 0.04, "grad_norm": 0.6409537196159363, "learning_rate": 0.0005997434944465183, "loss": 2.3227, "step": 1187 }, { "epoch": 0.04, "grad_norm": 0.6339831948280334, "learning_rate": 0.0005997430621348667, "loss": 2.3337, "step": 1188 }, { "epoch": 0.04, "grad_norm": 0.6015129089355469, "learning_rate": 0.0005997426294593712, "loss": 2.2599, "step": 1189 }, { "epoch": 0.04, "grad_norm": 0.6432474255561829, "learning_rate": 0.0005997421964200325, "loss": 2.2881, "step": 1190 }, { "epoch": 0.04, "grad_norm": 0.6163628101348877, "learning_rate": 0.0005997417630168508, "loss": 2.2934, "step": 1191 }, { "epoch": 0.04, "grad_norm": 0.6137922406196594, "learning_rate": 0.0005997413292498267, "loss": 2.2676, "step": 1192 }, { "epoch": 0.04, "grad_norm": 0.634396493434906, "learning_rate": 0.0005997408951189609, "loss": 2.306, "step": 1193 }, { "epoch": 0.04, "grad_norm": 0.5938208699226379, "learning_rate": 0.0005997404606242538, "loss": 2.197, "step": 1194 }, { "epoch": 0.04, "grad_norm": 0.6079937219619751, "learning_rate": 0.000599740025765706, "loss": 2.2133, "step": 1195 }, { "epoch": 0.04, "grad_norm": 0.6017029285430908, "learning_rate": 0.0005997395905433178, "loss": 2.1722, "step": 1196 }, { "epoch": 0.04, "grad_norm": 0.6461874842643738, "learning_rate": 0.0005997391549570901, "loss": 2.2892, "step": 1197 }, { "epoch": 0.04, "grad_norm": 0.6102099418640137, "learning_rate": 0.0005997387190070231, "loss": 2.2587, "step": 1198 }, { "epoch": 0.04, "grad_norm": 0.6447988748550415, "learning_rate": 0.0005997382826931174, "loss": 2.1966, "step": 1199 }, { "epoch": 0.04, "grad_norm": 0.6363989114761353, "learning_rate": 0.0005997378460153737, "loss": 2.2744, "step": 1200 }, { "epoch": 0.04, "grad_norm": 0.592933714389801, "learning_rate": 0.0005997374089737925, "loss": 2.2244, "step": 1201 }, { "epoch": 0.04, "grad_norm": 0.6011362075805664, "learning_rate": 0.0005997369715683741, "loss": 2.2076, "step": 1202 }, { "epoch": 0.04, "grad_norm": 0.5948268175125122, "learning_rate": 0.0005997365337991193, "loss": 2.2361, "step": 1203 }, { "epoch": 0.04, "grad_norm": 0.6035275459289551, "learning_rate": 0.0005997360956660284, "loss": 2.2324, "step": 1204 }, { "epoch": 0.04, "grad_norm": 0.5914225578308105, "learning_rate": 0.0005997356571691021, "loss": 2.2641, "step": 1205 }, { "epoch": 0.04, "grad_norm": 0.5873271226882935, "learning_rate": 0.0005997352183083408, "loss": 2.2448, "step": 1206 }, { "epoch": 0.04, "grad_norm": 0.6398111581802368, "learning_rate": 0.0005997347790837452, "loss": 2.1471, "step": 1207 }, { "epoch": 0.04, "grad_norm": 0.5668551325798035, "learning_rate": 0.0005997343394953157, "loss": 2.2419, "step": 1208 }, { "epoch": 0.04, "grad_norm": 0.5959858894348145, "learning_rate": 0.0005997338995430529, "loss": 2.2372, "step": 1209 }, { "epoch": 0.04, "grad_norm": 0.5843842625617981, "learning_rate": 0.0005997334592269573, "loss": 2.2599, "step": 1210 }, { "epoch": 0.04, "grad_norm": 0.6079297065734863, "learning_rate": 0.0005997330185470294, "loss": 2.2027, "step": 1211 }, { "epoch": 0.04, "grad_norm": 0.6005601286888123, "learning_rate": 0.0005997325775032699, "loss": 2.2728, "step": 1212 }, { "epoch": 0.04, "grad_norm": 0.5983241200447083, "learning_rate": 0.0005997321360956792, "loss": 2.2907, "step": 1213 }, { "epoch": 0.04, "grad_norm": 0.6032999157905579, "learning_rate": 0.0005997316943242578, "loss": 2.2686, "step": 1214 }, { "epoch": 0.04, "grad_norm": 0.6016585826873779, "learning_rate": 0.0005997312521890063, "loss": 2.2943, "step": 1215 }, { "epoch": 0.04, "grad_norm": 0.6082209348678589, "learning_rate": 0.0005997308096899252, "loss": 2.1704, "step": 1216 }, { "epoch": 0.04, "grad_norm": 0.6109758019447327, "learning_rate": 0.000599730366827015, "loss": 2.2838, "step": 1217 }, { "epoch": 0.04, "grad_norm": 0.5768473148345947, "learning_rate": 0.0005997299236002762, "loss": 2.2611, "step": 1218 }, { "epoch": 0.04, "grad_norm": 0.5832444429397583, "learning_rate": 0.0005997294800097097, "loss": 2.2914, "step": 1219 }, { "epoch": 0.04, "grad_norm": 0.5880745649337769, "learning_rate": 0.0005997290360553157, "loss": 2.2651, "step": 1220 }, { "epoch": 0.04, "grad_norm": 0.6341032385826111, "learning_rate": 0.0005997285917370949, "loss": 2.2204, "step": 1221 }, { "epoch": 0.04, "grad_norm": 0.62996506690979, "learning_rate": 0.0005997281470550475, "loss": 2.3057, "step": 1222 }, { "epoch": 0.04, "grad_norm": 0.5915981531143188, "learning_rate": 0.0005997277020091744, "loss": 2.2895, "step": 1223 }, { "epoch": 0.04, "grad_norm": 0.6104966402053833, "learning_rate": 0.0005997272565994761, "loss": 2.2611, "step": 1224 }, { "epoch": 0.04, "grad_norm": 0.6351845860481262, "learning_rate": 0.0005997268108259531, "loss": 2.3677, "step": 1225 }, { "epoch": 0.04, "grad_norm": 0.6118796467781067, "learning_rate": 0.0005997263646886058, "loss": 2.2554, "step": 1226 }, { "epoch": 0.04, "grad_norm": 0.6113844513893127, "learning_rate": 0.0005997259181874349, "loss": 2.273, "step": 1227 }, { "epoch": 0.04, "grad_norm": 0.5979406237602234, "learning_rate": 0.0005997254713224409, "loss": 2.1968, "step": 1228 }, { "epoch": 0.04, "grad_norm": 0.649066150188446, "learning_rate": 0.0005997250240936243, "loss": 2.3604, "step": 1229 }, { "epoch": 0.04, "grad_norm": 0.6007981300354004, "learning_rate": 0.0005997245765009858, "loss": 2.2309, "step": 1230 }, { "epoch": 0.04, "grad_norm": 0.6022560596466064, "learning_rate": 0.0005997241285445257, "loss": 2.2089, "step": 1231 }, { "epoch": 0.04, "grad_norm": 0.5951440334320068, "learning_rate": 0.0005997236802242447, "loss": 2.2958, "step": 1232 }, { "epoch": 0.04, "grad_norm": 0.6027469635009766, "learning_rate": 0.0005997232315401433, "loss": 2.3341, "step": 1233 }, { "epoch": 0.04, "grad_norm": 0.6343717575073242, "learning_rate": 0.0005997227824922222, "loss": 2.2526, "step": 1234 }, { "epoch": 0.04, "grad_norm": 0.58859783411026, "learning_rate": 0.0005997223330804816, "loss": 2.1851, "step": 1235 }, { "epoch": 0.04, "grad_norm": 0.6288921236991882, "learning_rate": 0.0005997218833049224, "loss": 2.2114, "step": 1236 }, { "epoch": 0.04, "grad_norm": 0.6555816531181335, "learning_rate": 0.000599721433165545, "loss": 2.1512, "step": 1237 }, { "epoch": 0.04, "grad_norm": 0.6193161010742188, "learning_rate": 0.0005997209826623499, "loss": 2.3244, "step": 1238 }, { "epoch": 0.04, "grad_norm": 0.6238470077514648, "learning_rate": 0.0005997205317953376, "loss": 2.252, "step": 1239 }, { "epoch": 0.04, "grad_norm": 0.5972769260406494, "learning_rate": 0.0005997200805645088, "loss": 2.2143, "step": 1240 }, { "epoch": 0.04, "grad_norm": 0.5929266810417175, "learning_rate": 0.000599719628969864, "loss": 2.203, "step": 1241 }, { "epoch": 0.04, "grad_norm": 0.614806056022644, "learning_rate": 0.0005997191770114038, "loss": 2.1028, "step": 1242 }, { "epoch": 0.04, "grad_norm": 0.6446940898895264, "learning_rate": 0.0005997187246891285, "loss": 2.2316, "step": 1243 }, { "epoch": 0.04, "grad_norm": 0.6390447020530701, "learning_rate": 0.000599718272003039, "loss": 2.224, "step": 1244 }, { "epoch": 0.04, "grad_norm": 0.6306357979774475, "learning_rate": 0.0005997178189531356, "loss": 2.2919, "step": 1245 }, { "epoch": 0.04, "grad_norm": 0.6282156109809875, "learning_rate": 0.000599717365539419, "loss": 2.3494, "step": 1246 }, { "epoch": 0.04, "grad_norm": 0.6403049826622009, "learning_rate": 0.0005997169117618895, "loss": 2.1959, "step": 1247 }, { "epoch": 0.04, "grad_norm": 0.625257134437561, "learning_rate": 0.000599716457620548, "loss": 2.2244, "step": 1248 }, { "epoch": 0.04, "grad_norm": 0.6039683818817139, "learning_rate": 0.0005997160031153948, "loss": 2.231, "step": 1249 }, { "epoch": 0.04, "grad_norm": 0.6356120705604553, "learning_rate": 0.0005997155482464306, "loss": 2.3094, "step": 1250 }, { "epoch": 0.04, "grad_norm": 0.598628580570221, "learning_rate": 0.0005997150930136559, "loss": 2.1752, "step": 1251 }, { "epoch": 0.04, "grad_norm": 0.6220409274101257, "learning_rate": 0.0005997146374170712, "loss": 2.2448, "step": 1252 }, { "epoch": 0.04, "grad_norm": 0.5634777545928955, "learning_rate": 0.000599714181456677, "loss": 2.2081, "step": 1253 }, { "epoch": 0.04, "grad_norm": 0.565523624420166, "learning_rate": 0.0005997137251324741, "loss": 2.2268, "step": 1254 }, { "epoch": 0.04, "grad_norm": 0.6153210997581482, "learning_rate": 0.0005997132684444628, "loss": 2.196, "step": 1255 }, { "epoch": 0.04, "grad_norm": 0.5618225336074829, "learning_rate": 0.0005997128113926438, "loss": 2.1941, "step": 1256 }, { "epoch": 0.04, "grad_norm": 0.6028639078140259, "learning_rate": 0.0005997123539770176, "loss": 2.2672, "step": 1257 }, { "epoch": 0.04, "grad_norm": 0.6257137060165405, "learning_rate": 0.0005997118961975847, "loss": 2.2863, "step": 1258 }, { "epoch": 0.04, "grad_norm": 0.587209939956665, "learning_rate": 0.0005997114380543458, "loss": 2.2838, "step": 1259 }, { "epoch": 0.04, "grad_norm": 0.6343488097190857, "learning_rate": 0.0005997109795473013, "loss": 2.1957, "step": 1260 }, { "epoch": 0.04, "grad_norm": 0.6011567711830139, "learning_rate": 0.000599710520676452, "loss": 2.3007, "step": 1261 }, { "epoch": 0.04, "grad_norm": 0.6184718012809753, "learning_rate": 0.0005997100614417981, "loss": 2.2008, "step": 1262 }, { "epoch": 0.04, "grad_norm": 0.6124711036682129, "learning_rate": 0.0005997096018433404, "loss": 2.2297, "step": 1263 }, { "epoch": 0.04, "grad_norm": 0.5945311188697815, "learning_rate": 0.0005997091418810795, "loss": 2.2801, "step": 1264 }, { "epoch": 0.04, "grad_norm": 0.6079166531562805, "learning_rate": 0.0005997086815550157, "loss": 2.1419, "step": 1265 }, { "epoch": 0.04, "grad_norm": 0.6001989245414734, "learning_rate": 0.0005997082208651497, "loss": 2.1727, "step": 1266 }, { "epoch": 0.04, "grad_norm": 0.6164124608039856, "learning_rate": 0.0005997077598114823, "loss": 2.1145, "step": 1267 }, { "epoch": 0.04, "grad_norm": 0.6019865274429321, "learning_rate": 0.0005997072983940137, "loss": 2.1381, "step": 1268 }, { "epoch": 0.04, "grad_norm": 0.6522802114486694, "learning_rate": 0.0005997068366127446, "loss": 2.2828, "step": 1269 }, { "epoch": 0.04, "grad_norm": 0.5936710238456726, "learning_rate": 0.0005997063744676756, "loss": 2.2546, "step": 1270 }, { "epoch": 0.04, "grad_norm": 0.6132328510284424, "learning_rate": 0.0005997059119588072, "loss": 2.2405, "step": 1271 }, { "epoch": 0.04, "grad_norm": 0.6272875666618347, "learning_rate": 0.0005997054490861399, "loss": 2.2982, "step": 1272 }, { "epoch": 0.04, "grad_norm": 0.6032499074935913, "learning_rate": 0.0005997049858496744, "loss": 2.247, "step": 1273 }, { "epoch": 0.04, "grad_norm": 0.5919700860977173, "learning_rate": 0.0005997045222494113, "loss": 2.2535, "step": 1274 }, { "epoch": 0.04, "grad_norm": 0.6324141621589661, "learning_rate": 0.000599704058285351, "loss": 2.2899, "step": 1275 }, { "epoch": 0.04, "grad_norm": 0.6160469651222229, "learning_rate": 0.0005997035939574941, "loss": 2.2628, "step": 1276 }, { "epoch": 0.04, "grad_norm": 0.5858384370803833, "learning_rate": 0.0005997031292658412, "loss": 2.2295, "step": 1277 }, { "epoch": 0.04, "grad_norm": 0.6450297236442566, "learning_rate": 0.000599702664210393, "loss": 2.1466, "step": 1278 }, { "epoch": 0.04, "grad_norm": 0.6057591438293457, "learning_rate": 0.0005997021987911497, "loss": 2.2456, "step": 1279 }, { "epoch": 0.04, "grad_norm": 0.6158238649368286, "learning_rate": 0.0005997017330081122, "loss": 2.2954, "step": 1280 }, { "epoch": 0.04, "grad_norm": 0.6378735303878784, "learning_rate": 0.000599701266861281, "loss": 2.2142, "step": 1281 }, { "epoch": 0.04, "grad_norm": 0.6212958097457886, "learning_rate": 0.0005997008003506565, "loss": 2.2696, "step": 1282 }, { "epoch": 0.04, "grad_norm": 0.6028735041618347, "learning_rate": 0.0005997003334762396, "loss": 2.211, "step": 1283 }, { "epoch": 0.04, "grad_norm": 0.6178608536720276, "learning_rate": 0.0005996998662380305, "loss": 2.3334, "step": 1284 }, { "epoch": 0.04, "grad_norm": 0.5876888632774353, "learning_rate": 0.0005996993986360299, "loss": 2.2771, "step": 1285 }, { "epoch": 0.04, "grad_norm": 0.6069101691246033, "learning_rate": 0.0005996989306702384, "loss": 2.2939, "step": 1286 }, { "epoch": 0.04, "grad_norm": 0.6163016557693481, "learning_rate": 0.0005996984623406566, "loss": 2.265, "step": 1287 }, { "epoch": 0.04, "grad_norm": 0.6052428483963013, "learning_rate": 0.0005996979936472851, "loss": 2.2131, "step": 1288 }, { "epoch": 0.04, "grad_norm": 0.6211912631988525, "learning_rate": 0.0005996975245901242, "loss": 2.1635, "step": 1289 }, { "epoch": 0.04, "grad_norm": 0.6056832671165466, "learning_rate": 0.0005996970551691748, "loss": 2.1697, "step": 1290 }, { "epoch": 0.04, "grad_norm": 0.6272751688957214, "learning_rate": 0.0005996965853844373, "loss": 2.2865, "step": 1291 }, { "epoch": 0.04, "grad_norm": 0.6001526713371277, "learning_rate": 0.0005996961152359123, "loss": 2.1889, "step": 1292 }, { "epoch": 0.04, "grad_norm": 0.6266233921051025, "learning_rate": 0.0005996956447236003, "loss": 2.2237, "step": 1293 }, { "epoch": 0.04, "grad_norm": 0.6323155760765076, "learning_rate": 0.0005996951738475021, "loss": 2.2331, "step": 1294 }, { "epoch": 0.04, "grad_norm": 0.6125971078872681, "learning_rate": 0.000599694702607618, "loss": 2.2194, "step": 1295 }, { "epoch": 0.04, "grad_norm": 0.6449416279792786, "learning_rate": 0.0005996942310039487, "loss": 2.3516, "step": 1296 }, { "epoch": 0.04, "grad_norm": 0.5936868786811829, "learning_rate": 0.0005996937590364949, "loss": 2.1727, "step": 1297 }, { "epoch": 0.04, "grad_norm": 0.5817375183105469, "learning_rate": 0.0005996932867052569, "loss": 2.2099, "step": 1298 }, { "epoch": 0.04, "grad_norm": 0.597207248210907, "learning_rate": 0.0005996928140102354, "loss": 2.2195, "step": 1299 }, { "epoch": 0.04, "grad_norm": 0.5953619480133057, "learning_rate": 0.000599692340951431, "loss": 2.2956, "step": 1300 }, { "epoch": 0.04, "grad_norm": 0.5996301174163818, "learning_rate": 0.0005996918675288443, "loss": 2.2634, "step": 1301 }, { "epoch": 0.04, "grad_norm": 0.6001428961753845, "learning_rate": 0.0005996913937424758, "loss": 2.2806, "step": 1302 }, { "epoch": 0.04, "grad_norm": 0.5978271961212158, "learning_rate": 0.0005996909195923261, "loss": 2.258, "step": 1303 }, { "epoch": 0.04, "grad_norm": 0.6132127046585083, "learning_rate": 0.0005996904450783957, "loss": 2.1658, "step": 1304 }, { "epoch": 0.04, "grad_norm": 0.5948593020439148, "learning_rate": 0.0005996899702006855, "loss": 2.2348, "step": 1305 }, { "epoch": 0.04, "grad_norm": 0.6232311725616455, "learning_rate": 0.0005996894949591956, "loss": 2.2674, "step": 1306 }, { "epoch": 0.04, "grad_norm": 0.6127834916114807, "learning_rate": 0.0005996890193539269, "loss": 2.1665, "step": 1307 }, { "epoch": 0.04, "grad_norm": 0.5850775241851807, "learning_rate": 0.00059968854338488, "loss": 2.2795, "step": 1308 }, { "epoch": 0.04, "grad_norm": 0.6072320938110352, "learning_rate": 0.0005996880670520551, "loss": 2.2286, "step": 1309 }, { "epoch": 0.04, "grad_norm": 0.5785049796104431, "learning_rate": 0.0005996875903554532, "loss": 2.235, "step": 1310 }, { "epoch": 0.04, "grad_norm": 0.5867786407470703, "learning_rate": 0.0005996871132950747, "loss": 2.2368, "step": 1311 }, { "epoch": 0.04, "grad_norm": 0.5867288708686829, "learning_rate": 0.0005996866358709203, "loss": 2.1822, "step": 1312 }, { "epoch": 0.04, "grad_norm": 0.5792446136474609, "learning_rate": 0.0005996861580829904, "loss": 2.163, "step": 1313 }, { "epoch": 0.04, "grad_norm": 0.6061267852783203, "learning_rate": 0.0005996856799312855, "loss": 2.2482, "step": 1314 }, { "epoch": 0.04, "grad_norm": 0.6049308180809021, "learning_rate": 0.0005996852014158065, "loss": 2.1582, "step": 1315 }, { "epoch": 0.04, "grad_norm": 0.6406092643737793, "learning_rate": 0.0005996847225365539, "loss": 2.2416, "step": 1316 }, { "epoch": 0.04, "grad_norm": 0.6024240255355835, "learning_rate": 0.0005996842432935281, "loss": 2.139, "step": 1317 }, { "epoch": 0.04, "grad_norm": 0.6358091235160828, "learning_rate": 0.0005996837636867298, "loss": 2.1959, "step": 1318 }, { "epoch": 0.04, "grad_norm": 0.6173562407493591, "learning_rate": 0.0005996832837161595, "loss": 2.2078, "step": 1319 }, { "epoch": 0.04, "grad_norm": 0.6070315837860107, "learning_rate": 0.000599682803381818, "loss": 2.2193, "step": 1320 }, { "epoch": 0.04, "grad_norm": 0.6471150517463684, "learning_rate": 0.0005996823226837056, "loss": 2.2262, "step": 1321 }, { "epoch": 0.04, "grad_norm": 0.6190640330314636, "learning_rate": 0.000599681841621823, "loss": 2.1994, "step": 1322 }, { "epoch": 0.04, "grad_norm": 0.6237561702728271, "learning_rate": 0.000599681360196171, "loss": 2.2027, "step": 1323 }, { "epoch": 0.04, "grad_norm": 0.6113224029541016, "learning_rate": 0.0005996808784067498, "loss": 2.3048, "step": 1324 }, { "epoch": 0.04, "grad_norm": 0.5826603770256042, "learning_rate": 0.0005996803962535602, "loss": 2.1893, "step": 1325 }, { "epoch": 0.04, "grad_norm": 0.6036246418952942, "learning_rate": 0.0005996799137366028, "loss": 2.2205, "step": 1326 }, { "epoch": 0.04, "grad_norm": 0.6152715086936951, "learning_rate": 0.0005996794308558782, "loss": 2.2147, "step": 1327 }, { "epoch": 0.04, "grad_norm": 0.6050938963890076, "learning_rate": 0.0005996789476113868, "loss": 2.2455, "step": 1328 }, { "epoch": 0.04, "grad_norm": 0.648945689201355, "learning_rate": 0.0005996784640031294, "loss": 2.1334, "step": 1329 }, { "epoch": 0.04, "grad_norm": 0.598426878452301, "learning_rate": 0.0005996779800311064, "loss": 2.2129, "step": 1330 }, { "epoch": 0.04, "grad_norm": 0.6385132670402527, "learning_rate": 0.0005996774956953185, "loss": 2.2599, "step": 1331 }, { "epoch": 0.04, "grad_norm": 0.598449170589447, "learning_rate": 0.0005996770109957664, "loss": 2.1787, "step": 1332 }, { "epoch": 0.04, "grad_norm": 0.6193997263908386, "learning_rate": 0.0005996765259324504, "loss": 2.2518, "step": 1333 }, { "epoch": 0.04, "grad_norm": 0.5991164445877075, "learning_rate": 0.0005996760405053715, "loss": 2.3173, "step": 1334 }, { "epoch": 0.04, "grad_norm": 0.6137742400169373, "learning_rate": 0.0005996755547145298, "loss": 2.1637, "step": 1335 }, { "epoch": 0.04, "grad_norm": 0.6448507905006409, "learning_rate": 0.0005996750685599261, "loss": 2.2605, "step": 1336 }, { "epoch": 0.04, "grad_norm": 0.6111886501312256, "learning_rate": 0.0005996745820415611, "loss": 2.2552, "step": 1337 }, { "epoch": 0.04, "grad_norm": 0.6544638872146606, "learning_rate": 0.0005996740951594355, "loss": 2.1807, "step": 1338 }, { "epoch": 0.04, "grad_norm": 0.602088987827301, "learning_rate": 0.0005996736079135495, "loss": 2.1552, "step": 1339 }, { "epoch": 0.04, "grad_norm": 0.6345387101173401, "learning_rate": 0.000599673120303904, "loss": 2.2971, "step": 1340 }, { "epoch": 0.04, "grad_norm": 0.6126169562339783, "learning_rate": 0.0005996726323304994, "loss": 2.2587, "step": 1341 }, { "epoch": 0.04, "grad_norm": 0.6254433393478394, "learning_rate": 0.0005996721439933363, "loss": 2.22, "step": 1342 }, { "epoch": 0.04, "grad_norm": 0.5881884694099426, "learning_rate": 0.0005996716552924156, "loss": 2.2216, "step": 1343 }, { "epoch": 0.04, "grad_norm": 0.6231310367584229, "learning_rate": 0.0005996711662277375, "loss": 2.2766, "step": 1344 }, { "epoch": 0.04, "grad_norm": 0.6078267693519592, "learning_rate": 0.0005996706767993028, "loss": 2.1733, "step": 1345 }, { "epoch": 0.04, "grad_norm": 0.5973578095436096, "learning_rate": 0.000599670187007112, "loss": 2.234, "step": 1346 }, { "epoch": 0.04, "grad_norm": 0.5935834646224976, "learning_rate": 0.0005996696968511659, "loss": 2.2149, "step": 1347 }, { "epoch": 0.04, "grad_norm": 0.6057213544845581, "learning_rate": 0.0005996692063314649, "loss": 2.2109, "step": 1348 }, { "epoch": 0.04, "grad_norm": 0.6007067561149597, "learning_rate": 0.0005996687154480096, "loss": 2.172, "step": 1349 }, { "epoch": 0.04, "grad_norm": 0.6268991827964783, "learning_rate": 0.0005996682242008005, "loss": 2.3013, "step": 1350 }, { "epoch": 0.04, "grad_norm": 0.6347460746765137, "learning_rate": 0.0005996677325898384, "loss": 2.2957, "step": 1351 }, { "epoch": 0.04, "grad_norm": 0.594416618347168, "learning_rate": 0.0005996672406151239, "loss": 2.2211, "step": 1352 }, { "epoch": 0.05, "grad_norm": 0.6072100400924683, "learning_rate": 0.0005996667482766575, "loss": 2.2456, "step": 1353 }, { "epoch": 0.05, "grad_norm": 0.5974112153053284, "learning_rate": 0.0005996662555744399, "loss": 2.1428, "step": 1354 }, { "epoch": 0.05, "grad_norm": 0.5957870483398438, "learning_rate": 0.0005996657625084715, "loss": 2.1475, "step": 1355 }, { "epoch": 0.05, "grad_norm": 0.5991418957710266, "learning_rate": 0.0005996652690787531, "loss": 2.2075, "step": 1356 }, { "epoch": 0.05, "grad_norm": 0.5987755656242371, "learning_rate": 0.0005996647752852852, "loss": 2.1981, "step": 1357 }, { "epoch": 0.05, "grad_norm": 0.5817943811416626, "learning_rate": 0.0005996642811280683, "loss": 2.1572, "step": 1358 }, { "epoch": 0.05, "grad_norm": 0.5979436635971069, "learning_rate": 0.0005996637866071032, "loss": 2.2034, "step": 1359 }, { "epoch": 0.05, "grad_norm": 0.6189172863960266, "learning_rate": 0.0005996632917223904, "loss": 2.3115, "step": 1360 }, { "epoch": 0.05, "grad_norm": 0.5916750431060791, "learning_rate": 0.0005996627964739306, "loss": 2.2762, "step": 1361 }, { "epoch": 0.05, "grad_norm": 0.5756883025169373, "learning_rate": 0.0005996623008617243, "loss": 2.139, "step": 1362 }, { "epoch": 0.05, "grad_norm": 0.5677353143692017, "learning_rate": 0.000599661804885772, "loss": 2.2289, "step": 1363 }, { "epoch": 0.05, "grad_norm": 0.6169404983520508, "learning_rate": 0.0005996613085460746, "loss": 2.1812, "step": 1364 }, { "epoch": 0.05, "grad_norm": 0.5969155430793762, "learning_rate": 0.0005996608118426323, "loss": 2.2245, "step": 1365 }, { "epoch": 0.05, "grad_norm": 0.5762047171592712, "learning_rate": 0.0005996603147754461, "loss": 2.1626, "step": 1366 }, { "epoch": 0.05, "grad_norm": 0.6376729011535645, "learning_rate": 0.0005996598173445164, "loss": 2.2873, "step": 1367 }, { "epoch": 0.05, "grad_norm": 0.6109640002250671, "learning_rate": 0.0005996593195498437, "loss": 2.1893, "step": 1368 }, { "epoch": 0.05, "grad_norm": 0.6314482688903809, "learning_rate": 0.000599658821391429, "loss": 2.2534, "step": 1369 }, { "epoch": 0.05, "grad_norm": 0.586993396282196, "learning_rate": 0.0005996583228692725, "loss": 2.1758, "step": 1370 }, { "epoch": 0.05, "grad_norm": 0.6131721138954163, "learning_rate": 0.0005996578239833749, "loss": 2.1985, "step": 1371 }, { "epoch": 0.05, "grad_norm": 0.5796867609024048, "learning_rate": 0.0005996573247337369, "loss": 2.1937, "step": 1372 }, { "epoch": 0.05, "grad_norm": 0.6193230748176575, "learning_rate": 0.000599656825120359, "loss": 2.3048, "step": 1373 }, { "epoch": 0.05, "grad_norm": 0.5965017080307007, "learning_rate": 0.000599656325143242, "loss": 2.2202, "step": 1374 }, { "epoch": 0.05, "grad_norm": 0.5854979753494263, "learning_rate": 0.0005996558248023864, "loss": 2.2399, "step": 1375 }, { "epoch": 0.05, "grad_norm": 0.6424441933631897, "learning_rate": 0.0005996553240977926, "loss": 2.2974, "step": 1376 }, { "epoch": 0.05, "grad_norm": 0.5946183800697327, "learning_rate": 0.0005996548230294615, "loss": 2.3159, "step": 1377 }, { "epoch": 0.05, "grad_norm": 0.5874350070953369, "learning_rate": 0.0005996543215973935, "loss": 2.2379, "step": 1378 }, { "epoch": 0.05, "grad_norm": 0.6591045260429382, "learning_rate": 0.0005996538198015894, "loss": 2.3211, "step": 1379 }, { "epoch": 0.05, "grad_norm": 0.625835120677948, "learning_rate": 0.0005996533176420497, "loss": 2.2005, "step": 1380 }, { "epoch": 0.05, "grad_norm": 0.597284734249115, "learning_rate": 0.000599652815118775, "loss": 2.2932, "step": 1381 }, { "epoch": 0.05, "grad_norm": 0.6299658417701721, "learning_rate": 0.0005996523122317659, "loss": 2.126, "step": 1382 }, { "epoch": 0.05, "grad_norm": 0.6083636283874512, "learning_rate": 0.0005996518089810232, "loss": 2.2981, "step": 1383 }, { "epoch": 0.05, "grad_norm": 0.584403395652771, "learning_rate": 0.0005996513053665471, "loss": 2.1717, "step": 1384 }, { "epoch": 0.05, "grad_norm": 0.6155949234962463, "learning_rate": 0.0005996508013883387, "loss": 2.1771, "step": 1385 }, { "epoch": 0.05, "grad_norm": 0.599330484867096, "learning_rate": 0.0005996502970463983, "loss": 2.2277, "step": 1386 }, { "epoch": 0.05, "grad_norm": 0.6183393597602844, "learning_rate": 0.0005996497923407266, "loss": 2.2086, "step": 1387 }, { "epoch": 0.05, "grad_norm": 0.6153802275657654, "learning_rate": 0.0005996492872713242, "loss": 2.2565, "step": 1388 }, { "epoch": 0.05, "grad_norm": 0.636961817741394, "learning_rate": 0.0005996487818381917, "loss": 2.1905, "step": 1389 }, { "epoch": 0.05, "grad_norm": 0.592055082321167, "learning_rate": 0.0005996482760413297, "loss": 2.1722, "step": 1390 }, { "epoch": 0.05, "grad_norm": 0.5943842530250549, "learning_rate": 0.0005996477698807389, "loss": 2.2184, "step": 1391 }, { "epoch": 0.05, "grad_norm": 0.6502450108528137, "learning_rate": 0.0005996472633564198, "loss": 2.2131, "step": 1392 }, { "epoch": 0.05, "grad_norm": 0.5795167684555054, "learning_rate": 0.0005996467564683732, "loss": 2.0986, "step": 1393 }, { "epoch": 0.05, "grad_norm": 0.593041181564331, "learning_rate": 0.0005996462492165994, "loss": 2.2821, "step": 1394 }, { "epoch": 0.05, "grad_norm": 0.6482425332069397, "learning_rate": 0.0005996457416010993, "loss": 2.3026, "step": 1395 }, { "epoch": 0.05, "grad_norm": 0.5877549648284912, "learning_rate": 0.0005996452336218735, "loss": 2.2221, "step": 1396 }, { "epoch": 0.05, "grad_norm": 0.6161638498306274, "learning_rate": 0.0005996447252789224, "loss": 2.2207, "step": 1397 }, { "epoch": 0.05, "grad_norm": 0.5929924249649048, "learning_rate": 0.0005996442165722468, "loss": 2.1709, "step": 1398 }, { "epoch": 0.05, "grad_norm": 0.618949830532074, "learning_rate": 0.0005996437075018473, "loss": 2.256, "step": 1399 }, { "epoch": 0.05, "grad_norm": 0.5880789160728455, "learning_rate": 0.0005996431980677245, "loss": 2.1849, "step": 1400 }, { "epoch": 0.05, "grad_norm": 0.5955998301506042, "learning_rate": 0.000599642688269879, "loss": 2.2037, "step": 1401 }, { "epoch": 0.05, "grad_norm": 0.5848546624183655, "learning_rate": 0.0005996421781083115, "loss": 2.1197, "step": 1402 }, { "epoch": 0.05, "grad_norm": 0.5953160524368286, "learning_rate": 0.0005996416675830224, "loss": 2.202, "step": 1403 }, { "epoch": 0.05, "grad_norm": 0.5999587178230286, "learning_rate": 0.0005996411566940125, "loss": 2.1864, "step": 1404 }, { "epoch": 0.05, "grad_norm": 0.5982399582862854, "learning_rate": 0.0005996406454412824, "loss": 2.1791, "step": 1405 }, { "epoch": 0.05, "grad_norm": 0.610358476638794, "learning_rate": 0.0005996401338248328, "loss": 2.2906, "step": 1406 }, { "epoch": 0.05, "grad_norm": 0.6165593266487122, "learning_rate": 0.0005996396218446641, "loss": 2.1507, "step": 1407 }, { "epoch": 0.05, "grad_norm": 0.6136395335197449, "learning_rate": 0.000599639109500777, "loss": 2.1893, "step": 1408 }, { "epoch": 0.05, "grad_norm": 0.5880341529846191, "learning_rate": 0.0005996385967931723, "loss": 2.2229, "step": 1409 }, { "epoch": 0.05, "grad_norm": 0.5739368200302124, "learning_rate": 0.0005996380837218505, "loss": 2.2329, "step": 1410 }, { "epoch": 0.05, "grad_norm": 0.6728112697601318, "learning_rate": 0.0005996375702868121, "loss": 2.2399, "step": 1411 }, { "epoch": 0.05, "grad_norm": 0.6261579990386963, "learning_rate": 0.0005996370564880579, "loss": 2.1123, "step": 1412 }, { "epoch": 0.05, "grad_norm": 0.6409173011779785, "learning_rate": 0.0005996365423255885, "loss": 2.2027, "step": 1413 }, { "epoch": 0.05, "grad_norm": 0.5626652240753174, "learning_rate": 0.0005996360277994045, "loss": 2.16, "step": 1414 }, { "epoch": 0.05, "grad_norm": 0.6128789782524109, "learning_rate": 0.0005996355129095064, "loss": 2.1988, "step": 1415 }, { "epoch": 0.05, "grad_norm": 0.6314391493797302, "learning_rate": 0.000599634997655895, "loss": 2.1852, "step": 1416 }, { "epoch": 0.05, "grad_norm": 0.5830119848251343, "learning_rate": 0.000599634482038571, "loss": 2.1864, "step": 1417 }, { "epoch": 0.05, "grad_norm": 0.6308501958847046, "learning_rate": 0.0005996339660575347, "loss": 2.2065, "step": 1418 }, { "epoch": 0.05, "grad_norm": 0.610977828502655, "learning_rate": 0.000599633449712787, "loss": 2.2023, "step": 1419 }, { "epoch": 0.05, "grad_norm": 0.6080077290534973, "learning_rate": 0.0005996329330043284, "loss": 2.2268, "step": 1420 }, { "epoch": 0.05, "grad_norm": 0.595720648765564, "learning_rate": 0.0005996324159321596, "loss": 2.2162, "step": 1421 }, { "epoch": 0.05, "grad_norm": 0.5915091037750244, "learning_rate": 0.0005996318984962812, "loss": 2.2071, "step": 1422 }, { "epoch": 0.05, "grad_norm": 0.6097871661186218, "learning_rate": 0.0005996313806966937, "loss": 2.1912, "step": 1423 }, { "epoch": 0.05, "grad_norm": 0.657264769077301, "learning_rate": 0.000599630862533398, "loss": 2.2822, "step": 1424 }, { "epoch": 0.05, "grad_norm": 0.6029015183448792, "learning_rate": 0.0005996303440063945, "loss": 2.234, "step": 1425 }, { "epoch": 0.05, "grad_norm": 0.6197046041488647, "learning_rate": 0.000599629825115684, "loss": 2.1637, "step": 1426 }, { "epoch": 0.05, "grad_norm": 0.6060818433761597, "learning_rate": 0.0005996293058612669, "loss": 2.0682, "step": 1427 }, { "epoch": 0.05, "grad_norm": 0.5934303402900696, "learning_rate": 0.0005996287862431441, "loss": 2.187, "step": 1428 }, { "epoch": 0.05, "grad_norm": 0.5783737301826477, "learning_rate": 0.0005996282662613161, "loss": 2.2006, "step": 1429 }, { "epoch": 0.05, "grad_norm": 0.5612322092056274, "learning_rate": 0.0005996277459157834, "loss": 2.1656, "step": 1430 }, { "epoch": 0.05, "grad_norm": 0.5996583700180054, "learning_rate": 0.0005996272252065469, "loss": 2.1968, "step": 1431 }, { "epoch": 0.05, "grad_norm": 0.5868675708770752, "learning_rate": 0.000599626704133607, "loss": 2.2044, "step": 1432 }, { "epoch": 0.05, "grad_norm": 0.5766457319259644, "learning_rate": 0.0005996261826969646, "loss": 2.2117, "step": 1433 }, { "epoch": 0.05, "grad_norm": 0.631594717502594, "learning_rate": 0.00059962566089662, "loss": 2.229, "step": 1434 }, { "epoch": 0.05, "grad_norm": 0.5882089734077454, "learning_rate": 0.000599625138732574, "loss": 2.2033, "step": 1435 }, { "epoch": 0.05, "grad_norm": 0.6061730980873108, "learning_rate": 0.0005996246162048273, "loss": 2.2729, "step": 1436 }, { "epoch": 0.05, "grad_norm": 0.6218544840812683, "learning_rate": 0.0005996240933133805, "loss": 2.1831, "step": 1437 }, { "epoch": 0.05, "grad_norm": 0.5967539548873901, "learning_rate": 0.0005996235700582341, "loss": 2.2638, "step": 1438 }, { "epoch": 0.05, "grad_norm": 0.5831315517425537, "learning_rate": 0.0005996230464393889, "loss": 2.2352, "step": 1439 }, { "epoch": 0.05, "grad_norm": 0.6070137023925781, "learning_rate": 0.0005996225224568454, "loss": 2.1767, "step": 1440 }, { "epoch": 0.05, "grad_norm": 0.5924677848815918, "learning_rate": 0.0005996219981106043, "loss": 2.1997, "step": 1441 }, { "epoch": 0.05, "grad_norm": 0.5708178877830505, "learning_rate": 0.0005996214734006663, "loss": 2.0968, "step": 1442 }, { "epoch": 0.05, "grad_norm": 0.585565984249115, "learning_rate": 0.000599620948327032, "loss": 2.2082, "step": 1443 }, { "epoch": 0.05, "grad_norm": 0.5977367162704468, "learning_rate": 0.0005996204228897019, "loss": 2.2085, "step": 1444 }, { "epoch": 0.05, "grad_norm": 0.6491191387176514, "learning_rate": 0.0005996198970886768, "loss": 2.2532, "step": 1445 }, { "epoch": 0.05, "grad_norm": 0.6028301119804382, "learning_rate": 0.0005996193709239574, "loss": 2.1817, "step": 1446 }, { "epoch": 0.05, "grad_norm": 0.615241527557373, "learning_rate": 0.0005996188443955442, "loss": 2.257, "step": 1447 }, { "epoch": 0.05, "grad_norm": 0.5803001523017883, "learning_rate": 0.0005996183175034377, "loss": 2.1529, "step": 1448 }, { "epoch": 0.05, "grad_norm": 0.5796923637390137, "learning_rate": 0.0005996177902476389, "loss": 2.2544, "step": 1449 }, { "epoch": 0.05, "grad_norm": 0.6204437017440796, "learning_rate": 0.0005996172626281482, "loss": 2.2471, "step": 1450 }, { "epoch": 0.05, "grad_norm": 0.5804863572120667, "learning_rate": 0.0005996167346449661, "loss": 2.1898, "step": 1451 }, { "epoch": 0.05, "grad_norm": 0.5884858965873718, "learning_rate": 0.0005996162062980937, "loss": 2.2373, "step": 1452 }, { "epoch": 0.05, "grad_norm": 0.597038745880127, "learning_rate": 0.0005996156775875313, "loss": 2.2121, "step": 1453 }, { "epoch": 0.05, "grad_norm": 0.5797535181045532, "learning_rate": 0.0005996151485132795, "loss": 2.1537, "step": 1454 }, { "epoch": 0.05, "grad_norm": 0.5849621891975403, "learning_rate": 0.0005996146190753392, "loss": 2.1832, "step": 1455 }, { "epoch": 0.05, "grad_norm": 0.5929831266403198, "learning_rate": 0.0005996140892737109, "loss": 2.0945, "step": 1456 }, { "epoch": 0.05, "grad_norm": 0.5962789058685303, "learning_rate": 0.0005996135591083951, "loss": 2.2423, "step": 1457 }, { "epoch": 0.05, "grad_norm": 0.5819385051727295, "learning_rate": 0.0005996130285793927, "loss": 2.2361, "step": 1458 }, { "epoch": 0.05, "grad_norm": 0.6209235787391663, "learning_rate": 0.0005996124976867041, "loss": 2.1856, "step": 1459 }, { "epoch": 0.05, "grad_norm": 0.5920124650001526, "learning_rate": 0.0005996119664303302, "loss": 2.2554, "step": 1460 }, { "epoch": 0.05, "grad_norm": 0.602815568447113, "learning_rate": 0.0005996114348102716, "loss": 2.1788, "step": 1461 }, { "epoch": 0.05, "grad_norm": 0.6113823056221008, "learning_rate": 0.0005996109028265287, "loss": 2.157, "step": 1462 }, { "epoch": 0.05, "grad_norm": 0.6292237043380737, "learning_rate": 0.0005996103704791024, "loss": 2.1605, "step": 1463 }, { "epoch": 0.05, "grad_norm": 0.6050896048545837, "learning_rate": 0.0005996098377679932, "loss": 2.1994, "step": 1464 }, { "epoch": 0.05, "grad_norm": 0.6287958025932312, "learning_rate": 0.0005996093046932017, "loss": 2.1983, "step": 1465 }, { "epoch": 0.05, "grad_norm": 0.5903158187866211, "learning_rate": 0.0005996087712547289, "loss": 2.2479, "step": 1466 }, { "epoch": 0.05, "grad_norm": 0.5852015614509583, "learning_rate": 0.000599608237452575, "loss": 2.1843, "step": 1467 }, { "epoch": 0.05, "grad_norm": 0.5620484948158264, "learning_rate": 0.0005996077032867408, "loss": 2.25, "step": 1468 }, { "epoch": 0.05, "grad_norm": 0.5870465040206909, "learning_rate": 0.0005996071687572272, "loss": 2.1128, "step": 1469 }, { "epoch": 0.05, "grad_norm": 0.5944859385490417, "learning_rate": 0.0005996066338640345, "loss": 2.2274, "step": 1470 }, { "epoch": 0.05, "grad_norm": 0.6009498834609985, "learning_rate": 0.0005996060986071636, "loss": 2.1027, "step": 1471 }, { "epoch": 0.05, "grad_norm": 0.5931106805801392, "learning_rate": 0.0005996055629866149, "loss": 2.1829, "step": 1472 }, { "epoch": 0.05, "grad_norm": 0.5798143148422241, "learning_rate": 0.0005996050270023894, "loss": 2.1723, "step": 1473 }, { "epoch": 0.05, "grad_norm": 0.6193071007728577, "learning_rate": 0.0005996044906544873, "loss": 2.2322, "step": 1474 }, { "epoch": 0.05, "grad_norm": 0.6089761257171631, "learning_rate": 0.0005996039539429097, "loss": 2.1854, "step": 1475 }, { "epoch": 0.05, "grad_norm": 0.6152776479721069, "learning_rate": 0.0005996034168676569, "loss": 2.1637, "step": 1476 }, { "epoch": 0.05, "grad_norm": 0.6433681845664978, "learning_rate": 0.0005996028794287297, "loss": 2.2338, "step": 1477 }, { "epoch": 0.05, "grad_norm": 0.6021981239318848, "learning_rate": 0.0005996023416261288, "loss": 2.2496, "step": 1478 }, { "epoch": 0.05, "grad_norm": 0.5751898884773254, "learning_rate": 0.0005996018034598548, "loss": 2.1591, "step": 1479 }, { "epoch": 0.05, "grad_norm": 0.6381984949111938, "learning_rate": 0.0005996012649299084, "loss": 2.2409, "step": 1480 }, { "epoch": 0.05, "grad_norm": 0.591857373714447, "learning_rate": 0.0005996007260362902, "loss": 2.2684, "step": 1481 }, { "epoch": 0.05, "grad_norm": 0.5901351571083069, "learning_rate": 0.0005996001867790008, "loss": 2.1563, "step": 1482 }, { "epoch": 0.05, "grad_norm": 0.5834041833877563, "learning_rate": 0.0005995996471580408, "loss": 2.1869, "step": 1483 }, { "epoch": 0.05, "grad_norm": 0.6130004525184631, "learning_rate": 0.0005995991071734112, "loss": 2.2285, "step": 1484 }, { "epoch": 0.05, "grad_norm": 0.5972280502319336, "learning_rate": 0.0005995985668251124, "loss": 2.2044, "step": 1485 }, { "epoch": 0.05, "grad_norm": 0.6394703984260559, "learning_rate": 0.000599598026113145, "loss": 2.1938, "step": 1486 }, { "epoch": 0.05, "grad_norm": 0.6106316447257996, "learning_rate": 0.0005995974850375097, "loss": 2.191, "step": 1487 }, { "epoch": 0.05, "grad_norm": 0.6016885638237, "learning_rate": 0.0005995969435982073, "loss": 2.2418, "step": 1488 }, { "epoch": 0.05, "grad_norm": 0.6277739405632019, "learning_rate": 0.0005995964017952383, "loss": 2.1696, "step": 1489 }, { "epoch": 0.05, "grad_norm": 0.5924421548843384, "learning_rate": 0.0005995958596286035, "loss": 2.1916, "step": 1490 }, { "epoch": 0.05, "grad_norm": 0.5698363780975342, "learning_rate": 0.0005995953170983033, "loss": 2.1403, "step": 1491 }, { "epoch": 0.05, "grad_norm": 0.6081129908561707, "learning_rate": 0.0005995947742043386, "loss": 2.1955, "step": 1492 }, { "epoch": 0.05, "grad_norm": 0.629073977470398, "learning_rate": 0.0005995942309467101, "loss": 2.1397, "step": 1493 }, { "epoch": 0.05, "grad_norm": 0.5785888433456421, "learning_rate": 0.0005995936873254183, "loss": 2.2439, "step": 1494 }, { "epoch": 0.05, "grad_norm": 0.66002357006073, "learning_rate": 0.0005995931433404639, "loss": 2.215, "step": 1495 }, { "epoch": 0.05, "grad_norm": 0.5961662530899048, "learning_rate": 0.0005995925989918474, "loss": 2.1389, "step": 1496 }, { "epoch": 0.05, "grad_norm": 0.5923187136650085, "learning_rate": 0.0005995920542795698, "loss": 2.1356, "step": 1497 }, { "epoch": 0.05, "grad_norm": 0.6632997989654541, "learning_rate": 0.0005995915092036315, "loss": 2.2312, "step": 1498 }, { "epoch": 0.05, "grad_norm": 0.6229614615440369, "learning_rate": 0.0005995909637640333, "loss": 2.2346, "step": 1499 }, { "epoch": 0.05, "grad_norm": 0.6016824841499329, "learning_rate": 0.0005995904179607758, "loss": 2.1421, "step": 1500 }, { "epoch": 0.05, "grad_norm": 0.7000767588615417, "learning_rate": 0.0005995898717938597, "loss": 2.1642, "step": 1501 }, { "epoch": 0.05, "grad_norm": 0.600928783416748, "learning_rate": 0.0005995893252632857, "loss": 2.1654, "step": 1502 }, { "epoch": 0.05, "grad_norm": 0.632920503616333, "learning_rate": 0.0005995887783690543, "loss": 2.1397, "step": 1503 }, { "epoch": 0.05, "grad_norm": 0.635753870010376, "learning_rate": 0.0005995882311111663, "loss": 2.1209, "step": 1504 }, { "epoch": 0.05, "grad_norm": 0.5963720679283142, "learning_rate": 0.0005995876834896223, "loss": 2.1978, "step": 1505 }, { "epoch": 0.05, "grad_norm": 0.6293175220489502, "learning_rate": 0.000599587135504423, "loss": 2.2344, "step": 1506 }, { "epoch": 0.05, "grad_norm": 0.6373875141143799, "learning_rate": 0.0005995865871555691, "loss": 2.1615, "step": 1507 }, { "epoch": 0.05, "grad_norm": 0.6062759160995483, "learning_rate": 0.0005995860384430612, "loss": 2.1778, "step": 1508 }, { "epoch": 0.05, "grad_norm": 0.6406437754631042, "learning_rate": 0.0005995854893669, "loss": 2.2769, "step": 1509 }, { "epoch": 0.05, "grad_norm": 0.6823065280914307, "learning_rate": 0.0005995849399270862, "loss": 2.2533, "step": 1510 }, { "epoch": 0.05, "grad_norm": 0.5978010892868042, "learning_rate": 0.0005995843901236203, "loss": 2.2543, "step": 1511 }, { "epoch": 0.05, "grad_norm": 0.698672890663147, "learning_rate": 0.0005995838399565032, "loss": 2.2024, "step": 1512 }, { "epoch": 0.05, "grad_norm": 0.6391845941543579, "learning_rate": 0.0005995832894257354, "loss": 2.1875, "step": 1513 }, { "epoch": 0.05, "grad_norm": 0.6252427697181702, "learning_rate": 0.0005995827385313177, "loss": 2.2055, "step": 1514 }, { "epoch": 0.05, "grad_norm": 0.6614649295806885, "learning_rate": 0.0005995821872732506, "loss": 2.1362, "step": 1515 }, { "epoch": 0.05, "grad_norm": 0.635962724685669, "learning_rate": 0.0005995816356515348, "loss": 2.2166, "step": 1516 }, { "epoch": 0.05, "grad_norm": 0.6212677359580994, "learning_rate": 0.0005995810836661713, "loss": 2.1392, "step": 1517 }, { "epoch": 0.05, "grad_norm": 0.6663433909416199, "learning_rate": 0.0005995805313171602, "loss": 2.1613, "step": 1518 }, { "epoch": 0.05, "grad_norm": 0.5847664475440979, "learning_rate": 0.0005995799786045027, "loss": 2.2383, "step": 1519 }, { "epoch": 0.05, "grad_norm": 0.6191496849060059, "learning_rate": 0.0005995794255281992, "loss": 2.2628, "step": 1520 }, { "epoch": 0.05, "grad_norm": 0.6293442845344543, "learning_rate": 0.0005995788720882504, "loss": 2.2233, "step": 1521 }, { "epoch": 0.05, "grad_norm": 0.5880812406539917, "learning_rate": 0.000599578318284657, "loss": 2.1293, "step": 1522 }, { "epoch": 0.05, "grad_norm": 0.572504460811615, "learning_rate": 0.0005995777641174197, "loss": 2.2345, "step": 1523 }, { "epoch": 0.05, "grad_norm": 0.5801803469657898, "learning_rate": 0.000599577209586539, "loss": 2.2219, "step": 1524 }, { "epoch": 0.05, "grad_norm": 0.5750479102134705, "learning_rate": 0.0005995766546920159, "loss": 2.2667, "step": 1525 }, { "epoch": 0.05, "grad_norm": 0.5906356573104858, "learning_rate": 0.0005995760994338507, "loss": 2.1496, "step": 1526 }, { "epoch": 0.05, "grad_norm": 0.5795388221740723, "learning_rate": 0.0005995755438120443, "loss": 2.2768, "step": 1527 }, { "epoch": 0.05, "grad_norm": 0.5715104341506958, "learning_rate": 0.0005995749878265974, "loss": 2.1463, "step": 1528 }, { "epoch": 0.05, "grad_norm": 0.5773011445999146, "learning_rate": 0.0005995744314775106, "loss": 2.0869, "step": 1529 }, { "epoch": 0.05, "grad_norm": 0.5832384824752808, "learning_rate": 0.0005995738747647845, "loss": 2.1683, "step": 1530 }, { "epoch": 0.05, "grad_norm": 0.614809513092041, "learning_rate": 0.0005995733176884199, "loss": 2.1977, "step": 1531 }, { "epoch": 0.05, "grad_norm": 0.577678918838501, "learning_rate": 0.0005995727602484175, "loss": 2.224, "step": 1532 }, { "epoch": 0.05, "grad_norm": 0.5874725580215454, "learning_rate": 0.0005995722024447778, "loss": 2.1319, "step": 1533 }, { "epoch": 0.05, "grad_norm": 0.5683822631835938, "learning_rate": 0.0005995716442775017, "loss": 2.2324, "step": 1534 }, { "epoch": 0.05, "grad_norm": 0.585261881351471, "learning_rate": 0.0005995710857465896, "loss": 2.0988, "step": 1535 }, { "epoch": 0.05, "grad_norm": 0.56840580701828, "learning_rate": 0.0005995705268520425, "loss": 2.1469, "step": 1536 }, { "epoch": 0.05, "grad_norm": 0.6235893964767456, "learning_rate": 0.0005995699675938608, "loss": 2.2069, "step": 1537 }, { "epoch": 0.05, "grad_norm": 0.604825496673584, "learning_rate": 0.0005995694079720454, "loss": 2.1831, "step": 1538 }, { "epoch": 0.05, "grad_norm": 0.6165691614151001, "learning_rate": 0.0005995688479865968, "loss": 2.1034, "step": 1539 }, { "epoch": 0.05, "grad_norm": 0.5854617953300476, "learning_rate": 0.0005995682876375159, "loss": 2.0788, "step": 1540 }, { "epoch": 0.05, "grad_norm": 0.595244824886322, "learning_rate": 0.0005995677269248031, "loss": 2.1825, "step": 1541 }, { "epoch": 0.05, "grad_norm": 0.611909806728363, "learning_rate": 0.0005995671658484592, "loss": 2.21, "step": 1542 }, { "epoch": 0.05, "grad_norm": 0.6240494847297668, "learning_rate": 0.000599566604408485, "loss": 2.1899, "step": 1543 }, { "epoch": 0.05, "grad_norm": 0.615867555141449, "learning_rate": 0.0005995660426048809, "loss": 2.1615, "step": 1544 }, { "epoch": 0.05, "grad_norm": 0.6161446571350098, "learning_rate": 0.0005995654804376479, "loss": 2.2047, "step": 1545 }, { "epoch": 0.05, "grad_norm": 0.6038978695869446, "learning_rate": 0.0005995649179067866, "loss": 2.1668, "step": 1546 }, { "epoch": 0.05, "grad_norm": 0.6281976699829102, "learning_rate": 0.0005995643550122976, "loss": 2.2251, "step": 1547 }, { "epoch": 0.05, "grad_norm": 0.5914076566696167, "learning_rate": 0.0005995637917541815, "loss": 2.1499, "step": 1548 }, { "epoch": 0.05, "grad_norm": 0.5887883305549622, "learning_rate": 0.0005995632281324393, "loss": 2.165, "step": 1549 }, { "epoch": 0.05, "grad_norm": 0.6127392649650574, "learning_rate": 0.0005995626641470713, "loss": 2.2381, "step": 1550 }, { "epoch": 0.05, "grad_norm": 0.6232089400291443, "learning_rate": 0.0005995620997980784, "loss": 2.2137, "step": 1551 }, { "epoch": 0.05, "grad_norm": 0.5902795195579529, "learning_rate": 0.0005995615350854613, "loss": 2.1859, "step": 1552 }, { "epoch": 0.05, "grad_norm": 0.620624840259552, "learning_rate": 0.0005995609700092206, "loss": 2.1951, "step": 1553 }, { "epoch": 0.05, "grad_norm": 0.5971177220344543, "learning_rate": 0.000599560404569357, "loss": 2.1538, "step": 1554 }, { "epoch": 0.05, "grad_norm": 0.6106797456741333, "learning_rate": 0.0005995598387658713, "loss": 2.2657, "step": 1555 }, { "epoch": 0.05, "grad_norm": 0.6620863080024719, "learning_rate": 0.0005995592725987639, "loss": 2.1771, "step": 1556 }, { "epoch": 0.05, "grad_norm": 0.5741127729415894, "learning_rate": 0.0005995587060680359, "loss": 2.2103, "step": 1557 }, { "epoch": 0.05, "grad_norm": 0.6088024973869324, "learning_rate": 0.0005995581391736876, "loss": 2.215, "step": 1558 }, { "epoch": 0.05, "grad_norm": 0.6562825441360474, "learning_rate": 0.00059955757191572, "loss": 2.0388, "step": 1559 }, { "epoch": 0.05, "grad_norm": 0.5861339569091797, "learning_rate": 0.0005995570042941335, "loss": 2.1468, "step": 1560 }, { "epoch": 0.05, "grad_norm": 0.6332852244377136, "learning_rate": 0.0005995564363089291, "loss": 2.1888, "step": 1561 }, { "epoch": 0.05, "grad_norm": 0.6338049173355103, "learning_rate": 0.0005995558679601072, "loss": 2.2392, "step": 1562 }, { "epoch": 0.05, "grad_norm": 0.5838112831115723, "learning_rate": 0.0005995552992476686, "loss": 2.1688, "step": 1563 }, { "epoch": 0.05, "grad_norm": 0.621396005153656, "learning_rate": 0.000599554730171614, "loss": 2.2167, "step": 1564 }, { "epoch": 0.05, "grad_norm": 0.6594639420509338, "learning_rate": 0.0005995541607319442, "loss": 2.1762, "step": 1565 }, { "epoch": 0.05, "grad_norm": 0.6011618375778198, "learning_rate": 0.0005995535909286598, "loss": 2.2065, "step": 1566 }, { "epoch": 0.05, "grad_norm": 0.6708057522773743, "learning_rate": 0.0005995530207617614, "loss": 2.2089, "step": 1567 }, { "epoch": 0.05, "grad_norm": 0.6288577914237976, "learning_rate": 0.0005995524502312498, "loss": 2.2314, "step": 1568 }, { "epoch": 0.05, "grad_norm": 0.5940160751342773, "learning_rate": 0.0005995518793371256, "loss": 2.1409, "step": 1569 }, { "epoch": 0.05, "grad_norm": 0.6704548597335815, "learning_rate": 0.0005995513080793896, "loss": 2.2007, "step": 1570 }, { "epoch": 0.05, "grad_norm": 0.5744602084159851, "learning_rate": 0.0005995507364580425, "loss": 2.1445, "step": 1571 }, { "epoch": 0.05, "grad_norm": 0.5702626705169678, "learning_rate": 0.0005995501644730848, "loss": 2.1503, "step": 1572 }, { "epoch": 0.05, "grad_norm": 0.6261996030807495, "learning_rate": 0.0005995495921245175, "loss": 2.2118, "step": 1573 }, { "epoch": 0.05, "grad_norm": 0.594504177570343, "learning_rate": 0.000599549019412341, "loss": 2.0712, "step": 1574 }, { "epoch": 0.05, "grad_norm": 0.5595046877861023, "learning_rate": 0.0005995484463365561, "loss": 2.1699, "step": 1575 }, { "epoch": 0.05, "grad_norm": 0.5979223251342773, "learning_rate": 0.0005995478728971637, "loss": 2.1627, "step": 1576 }, { "epoch": 0.05, "grad_norm": 0.6126227974891663, "learning_rate": 0.0005995472990941642, "loss": 2.277, "step": 1577 }, { "epoch": 0.05, "grad_norm": 0.5627910494804382, "learning_rate": 0.0005995467249275585, "loss": 2.1339, "step": 1578 }, { "epoch": 0.05, "grad_norm": 0.588650107383728, "learning_rate": 0.0005995461503973472, "loss": 2.2492, "step": 1579 }, { "epoch": 0.05, "grad_norm": 0.6007472276687622, "learning_rate": 0.0005995455755035308, "loss": 2.2369, "step": 1580 }, { "epoch": 0.05, "grad_norm": 0.6240850687026978, "learning_rate": 0.0005995450002461105, "loss": 2.1523, "step": 1581 }, { "epoch": 0.05, "grad_norm": 0.5581933259963989, "learning_rate": 0.0005995444246250865, "loss": 2.1932, "step": 1582 }, { "epoch": 0.05, "grad_norm": 0.5985123515129089, "learning_rate": 0.0005995438486404598, "loss": 2.1892, "step": 1583 }, { "epoch": 0.05, "grad_norm": 0.6092238426208496, "learning_rate": 0.0005995432722922311, "loss": 2.1871, "step": 1584 }, { "epoch": 0.05, "grad_norm": 0.5930647850036621, "learning_rate": 0.0005995426955804008, "loss": 2.0965, "step": 1585 }, { "epoch": 0.05, "grad_norm": 0.6034687161445618, "learning_rate": 0.0005995421185049698, "loss": 2.2319, "step": 1586 }, { "epoch": 0.05, "grad_norm": 0.6573421359062195, "learning_rate": 0.000599541541065939, "loss": 2.2053, "step": 1587 }, { "epoch": 0.05, "grad_norm": 0.6166830062866211, "learning_rate": 0.0005995409632633088, "loss": 2.1559, "step": 1588 }, { "epoch": 0.05, "grad_norm": 0.638451099395752, "learning_rate": 0.00059954038509708, "loss": 2.1705, "step": 1589 }, { "epoch": 0.05, "grad_norm": 0.5994205474853516, "learning_rate": 0.0005995398065672533, "loss": 2.2177, "step": 1590 }, { "epoch": 0.05, "grad_norm": 0.6432877779006958, "learning_rate": 0.0005995392276738294, "loss": 2.2686, "step": 1591 }, { "epoch": 0.05, "grad_norm": 0.6368717551231384, "learning_rate": 0.000599538648416809, "loss": 2.192, "step": 1592 }, { "epoch": 0.05, "grad_norm": 0.6061859726905823, "learning_rate": 0.000599538068796193, "loss": 2.1457, "step": 1593 }, { "epoch": 0.05, "grad_norm": 0.5867809653282166, "learning_rate": 0.0005995374888119817, "loss": 2.1332, "step": 1594 }, { "epoch": 0.05, "grad_norm": 0.6268152594566345, "learning_rate": 0.0005995369084641761, "loss": 2.0958, "step": 1595 }, { "epoch": 0.05, "grad_norm": 0.6091243028640747, "learning_rate": 0.0005995363277527768, "loss": 2.2005, "step": 1596 }, { "epoch": 0.05, "grad_norm": 0.6139751076698303, "learning_rate": 0.0005995357466777846, "loss": 2.1879, "step": 1597 }, { "epoch": 0.05, "grad_norm": 0.6103941202163696, "learning_rate": 0.0005995351652392, "loss": 2.162, "step": 1598 }, { "epoch": 0.05, "grad_norm": 0.6074514389038086, "learning_rate": 0.0005995345834370239, "loss": 2.1464, "step": 1599 }, { "epoch": 0.05, "grad_norm": 0.6154850125312805, "learning_rate": 0.0005995340012712571, "loss": 2.1604, "step": 1600 }, { "epoch": 0.05, "grad_norm": 0.5771937966346741, "learning_rate": 0.0005995334187419001, "loss": 2.2569, "step": 1601 }, { "epoch": 0.05, "grad_norm": 0.5847517848014832, "learning_rate": 0.0005995328358489535, "loss": 2.1554, "step": 1602 }, { "epoch": 0.05, "grad_norm": 0.6070889830589294, "learning_rate": 0.0005995322525924183, "loss": 2.1951, "step": 1603 }, { "epoch": 0.05, "grad_norm": 0.5879436731338501, "learning_rate": 0.000599531668972295, "loss": 2.2052, "step": 1604 }, { "epoch": 0.05, "grad_norm": 0.5903270244598389, "learning_rate": 0.0005995310849885845, "loss": 2.1885, "step": 1605 }, { "epoch": 0.05, "grad_norm": 0.6296941041946411, "learning_rate": 0.0005995305006412872, "loss": 2.2462, "step": 1606 }, { "epoch": 0.05, "grad_norm": 0.6156356334686279, "learning_rate": 0.0005995299159304042, "loss": 2.2096, "step": 1607 }, { "epoch": 0.05, "grad_norm": 0.6000471711158752, "learning_rate": 0.0005995293308559361, "loss": 2.1519, "step": 1608 }, { "epoch": 0.05, "grad_norm": 0.6241106986999512, "learning_rate": 0.0005995287454178833, "loss": 2.3368, "step": 1609 }, { "epoch": 0.05, "grad_norm": 0.5817543268203735, "learning_rate": 0.0005995281596162469, "loss": 2.2491, "step": 1610 }, { "epoch": 0.05, "grad_norm": 0.5706751942634583, "learning_rate": 0.0005995275734510274, "loss": 2.1501, "step": 1611 }, { "epoch": 0.05, "grad_norm": 0.582771360874176, "learning_rate": 0.0005995269869222254, "loss": 2.1832, "step": 1612 }, { "epoch": 0.05, "grad_norm": 0.6013129949569702, "learning_rate": 0.000599526400029842, "loss": 2.1441, "step": 1613 }, { "epoch": 0.05, "grad_norm": 0.5960420370101929, "learning_rate": 0.0005995258127738776, "loss": 2.1346, "step": 1614 }, { "epoch": 0.05, "grad_norm": 0.6049381494522095, "learning_rate": 0.000599525225154333, "loss": 2.1315, "step": 1615 }, { "epoch": 0.05, "grad_norm": 0.5789040923118591, "learning_rate": 0.0005995246371712088, "loss": 2.1035, "step": 1616 }, { "epoch": 0.05, "grad_norm": 0.592476487159729, "learning_rate": 0.0005995240488245059, "loss": 2.2298, "step": 1617 }, { "epoch": 0.05, "grad_norm": 0.5931717157363892, "learning_rate": 0.000599523460114225, "loss": 2.2187, "step": 1618 }, { "epoch": 0.05, "grad_norm": 0.636762261390686, "learning_rate": 0.0005995228710403667, "loss": 2.1804, "step": 1619 }, { "epoch": 0.05, "grad_norm": 0.6196839213371277, "learning_rate": 0.0005995222816029317, "loss": 2.2357, "step": 1620 }, { "epoch": 0.05, "grad_norm": 0.6132913827896118, "learning_rate": 0.000599521691801921, "loss": 2.1381, "step": 1621 }, { "epoch": 0.05, "grad_norm": 0.6312117576599121, "learning_rate": 0.0005995211016373349, "loss": 2.1765, "step": 1622 }, { "epoch": 0.05, "grad_norm": 0.6300132870674133, "learning_rate": 0.0005995205111091743, "loss": 2.2192, "step": 1623 }, { "epoch": 0.05, "grad_norm": 0.6065471172332764, "learning_rate": 0.0005995199202174401, "loss": 2.2309, "step": 1624 }, { "epoch": 0.05, "grad_norm": 0.5779330134391785, "learning_rate": 0.0005995193289621328, "loss": 2.1769, "step": 1625 }, { "epoch": 0.05, "grad_norm": 0.6283150315284729, "learning_rate": 0.000599518737343253, "loss": 2.1634, "step": 1626 }, { "epoch": 0.05, "grad_norm": 0.5834591388702393, "learning_rate": 0.0005995181453608017, "loss": 2.1482, "step": 1627 }, { "epoch": 0.05, "grad_norm": 0.5800776481628418, "learning_rate": 0.0005995175530147795, "loss": 2.1282, "step": 1628 }, { "epoch": 0.05, "grad_norm": 0.5687070488929749, "learning_rate": 0.0005995169603051872, "loss": 2.2048, "step": 1629 }, { "epoch": 0.05, "grad_norm": 0.5904674530029297, "learning_rate": 0.0005995163672320253, "loss": 2.1098, "step": 1630 }, { "epoch": 0.05, "grad_norm": 0.6116570830345154, "learning_rate": 0.0005995157737952948, "loss": 2.2599, "step": 1631 }, { "epoch": 0.05, "grad_norm": 0.5784879922866821, "learning_rate": 0.0005995151799949962, "loss": 2.1687, "step": 1632 }, { "epoch": 0.05, "grad_norm": 0.5865231156349182, "learning_rate": 0.0005995145858311303, "loss": 2.2177, "step": 1633 }, { "epoch": 0.05, "grad_norm": 0.5955252051353455, "learning_rate": 0.0005995139913036979, "loss": 2.1236, "step": 1634 }, { "epoch": 0.05, "grad_norm": 0.5929356813430786, "learning_rate": 0.0005995133964126995, "loss": 2.1452, "step": 1635 }, { "epoch": 0.05, "grad_norm": 0.5835482478141785, "learning_rate": 0.000599512801158136, "loss": 2.154, "step": 1636 }, { "epoch": 0.05, "grad_norm": 0.5816847681999207, "learning_rate": 0.0005995122055400083, "loss": 2.0913, "step": 1637 }, { "epoch": 0.05, "grad_norm": 0.5878836512565613, "learning_rate": 0.0005995116095583167, "loss": 2.2285, "step": 1638 }, { "epoch": 0.05, "grad_norm": 0.5854378938674927, "learning_rate": 0.0005995110132130622, "loss": 2.18, "step": 1639 }, { "epoch": 0.05, "grad_norm": 0.5825046896934509, "learning_rate": 0.0005995104165042454, "loss": 2.1496, "step": 1640 }, { "epoch": 0.05, "grad_norm": 0.5885049104690552, "learning_rate": 0.0005995098194318672, "loss": 2.1603, "step": 1641 }, { "epoch": 0.05, "grad_norm": 0.5886325836181641, "learning_rate": 0.0005995092219959281, "loss": 2.1982, "step": 1642 }, { "epoch": 0.05, "grad_norm": 0.5848842859268188, "learning_rate": 0.000599508624196429, "loss": 2.0867, "step": 1643 }, { "epoch": 0.05, "grad_norm": 0.5871114134788513, "learning_rate": 0.0005995080260333706, "loss": 2.1947, "step": 1644 }, { "epoch": 0.05, "grad_norm": 0.5798724293708801, "learning_rate": 0.0005995074275067535, "loss": 2.2376, "step": 1645 }, { "epoch": 0.05, "grad_norm": 0.5834571719169617, "learning_rate": 0.0005995068286165785, "loss": 2.1936, "step": 1646 }, { "epoch": 0.05, "grad_norm": 0.597443699836731, "learning_rate": 0.0005995062293628464, "loss": 2.1789, "step": 1647 }, { "epoch": 0.05, "grad_norm": 0.5914248824119568, "learning_rate": 0.0005995056297455579, "loss": 2.1187, "step": 1648 }, { "epoch": 0.05, "grad_norm": 0.5812674760818481, "learning_rate": 0.0005995050297647137, "loss": 2.1994, "step": 1649 }, { "epoch": 0.05, "grad_norm": 0.6270915865898132, "learning_rate": 0.0005995044294203144, "loss": 2.2267, "step": 1650 }, { "epoch": 0.05, "grad_norm": 0.6224425435066223, "learning_rate": 0.0005995038287123609, "loss": 2.2136, "step": 1651 }, { "epoch": 0.05, "grad_norm": 0.5889942049980164, "learning_rate": 0.0005995032276408539, "loss": 2.2221, "step": 1652 }, { "epoch": 0.05, "grad_norm": 0.5797345042228699, "learning_rate": 0.0005995026262057942, "loss": 2.1338, "step": 1653 }, { "epoch": 0.06, "grad_norm": 0.560631275177002, "learning_rate": 0.0005995020244071822, "loss": 2.1595, "step": 1654 }, { "epoch": 0.06, "grad_norm": 0.5556399822235107, "learning_rate": 0.0005995014222450191, "loss": 2.1893, "step": 1655 }, { "epoch": 0.06, "grad_norm": 0.5882756114006042, "learning_rate": 0.0005995008197193053, "loss": 2.1576, "step": 1656 }, { "epoch": 0.06, "grad_norm": 0.5939819812774658, "learning_rate": 0.0005995002168300418, "loss": 2.0985, "step": 1657 }, { "epoch": 0.06, "grad_norm": 0.5702242851257324, "learning_rate": 0.000599499613577229, "loss": 2.1363, "step": 1658 }, { "epoch": 0.06, "grad_norm": 0.6061195135116577, "learning_rate": 0.0005994990099608678, "loss": 2.1557, "step": 1659 }, { "epoch": 0.06, "grad_norm": 0.599467933177948, "learning_rate": 0.000599498405980959, "loss": 2.2662, "step": 1660 }, { "epoch": 0.06, "grad_norm": 0.6084925532341003, "learning_rate": 0.0005994978016375033, "loss": 2.2219, "step": 1661 }, { "epoch": 0.06, "grad_norm": 0.5877400636672974, "learning_rate": 0.0005994971969305013, "loss": 2.1609, "step": 1662 }, { "epoch": 0.06, "grad_norm": 0.5750791430473328, "learning_rate": 0.0005994965918599539, "loss": 2.163, "step": 1663 }, { "epoch": 0.06, "grad_norm": 0.5943814516067505, "learning_rate": 0.0005994959864258616, "loss": 2.1417, "step": 1664 }, { "epoch": 0.06, "grad_norm": 0.5864423513412476, "learning_rate": 0.0005994953806282255, "loss": 2.163, "step": 1665 }, { "epoch": 0.06, "grad_norm": 0.5788439512252808, "learning_rate": 0.000599494774467046, "loss": 2.1758, "step": 1666 }, { "epoch": 0.06, "grad_norm": 0.5853504538536072, "learning_rate": 0.0005994941679423241, "loss": 2.2079, "step": 1667 }, { "epoch": 0.06, "grad_norm": 0.5982203483581543, "learning_rate": 0.0005994935610540604, "loss": 2.17, "step": 1668 }, { "epoch": 0.06, "grad_norm": 0.6054918766021729, "learning_rate": 0.0005994929538022556, "loss": 2.1077, "step": 1669 }, { "epoch": 0.06, "grad_norm": 0.5955493450164795, "learning_rate": 0.0005994923461869104, "loss": 2.1938, "step": 1670 }, { "epoch": 0.06, "grad_norm": 0.5843051671981812, "learning_rate": 0.0005994917382080257, "loss": 2.1434, "step": 1671 }, { "epoch": 0.06, "grad_norm": 0.6019786596298218, "learning_rate": 0.0005994911298656022, "loss": 2.167, "step": 1672 }, { "epoch": 0.06, "grad_norm": 0.6055600643157959, "learning_rate": 0.0005994905211596404, "loss": 2.1592, "step": 1673 }, { "epoch": 0.06, "grad_norm": 0.5921945571899414, "learning_rate": 0.0005994899120901415, "loss": 2.1135, "step": 1674 }, { "epoch": 0.06, "grad_norm": 0.6307201981544495, "learning_rate": 0.0005994893026571058, "loss": 2.1688, "step": 1675 }, { "epoch": 0.06, "grad_norm": 0.6290737390518188, "learning_rate": 0.0005994886928605344, "loss": 2.1612, "step": 1676 }, { "epoch": 0.06, "grad_norm": 0.6471436619758606, "learning_rate": 0.0005994880827004276, "loss": 2.3074, "step": 1677 }, { "epoch": 0.06, "grad_norm": 0.6567157506942749, "learning_rate": 0.0005994874721767865, "loss": 2.1107, "step": 1678 }, { "epoch": 0.06, "grad_norm": 0.6111415028572083, "learning_rate": 0.0005994868612896118, "loss": 2.1197, "step": 1679 }, { "epoch": 0.06, "grad_norm": 0.6516929864883423, "learning_rate": 0.0005994862500389042, "loss": 2.1341, "step": 1680 }, { "epoch": 0.06, "grad_norm": 0.6780372858047485, "learning_rate": 0.0005994856384246642, "loss": 2.2627, "step": 1681 }, { "epoch": 0.06, "grad_norm": 0.597374439239502, "learning_rate": 0.0005994850264468929, "loss": 2.1705, "step": 1682 }, { "epoch": 0.06, "grad_norm": 0.6580023765563965, "learning_rate": 0.000599484414105591, "loss": 2.1619, "step": 1683 }, { "epoch": 0.06, "grad_norm": 0.6322868466377258, "learning_rate": 0.0005994838014007591, "loss": 2.0723, "step": 1684 }, { "epoch": 0.06, "grad_norm": 0.6036486625671387, "learning_rate": 0.000599483188332398, "loss": 2.1435, "step": 1685 }, { "epoch": 0.06, "grad_norm": 0.719519853591919, "learning_rate": 0.0005994825749005083, "loss": 2.2072, "step": 1686 }, { "epoch": 0.06, "grad_norm": 0.6390025615692139, "learning_rate": 0.000599481961105091, "loss": 2.2638, "step": 1687 }, { "epoch": 0.06, "grad_norm": 0.6209793090820312, "learning_rate": 0.0005994813469461467, "loss": 2.155, "step": 1688 }, { "epoch": 0.06, "grad_norm": 0.6443297863006592, "learning_rate": 0.0005994807324236763, "loss": 2.1171, "step": 1689 }, { "epoch": 0.06, "grad_norm": 0.6067097187042236, "learning_rate": 0.0005994801175376802, "loss": 2.1708, "step": 1690 }, { "epoch": 0.06, "grad_norm": 0.6442194581031799, "learning_rate": 0.0005994795022881595, "loss": 2.1911, "step": 1691 }, { "epoch": 0.06, "grad_norm": 0.6884890794754028, "learning_rate": 0.0005994788866751147, "loss": 2.2125, "step": 1692 }, { "epoch": 0.06, "grad_norm": 0.5959923267364502, "learning_rate": 0.0005994782706985468, "loss": 2.1808, "step": 1693 }, { "epoch": 0.06, "grad_norm": 0.6433253884315491, "learning_rate": 0.0005994776543584563, "loss": 2.1774, "step": 1694 }, { "epoch": 0.06, "grad_norm": 0.6679011583328247, "learning_rate": 0.000599477037654844, "loss": 2.1897, "step": 1695 }, { "epoch": 0.06, "grad_norm": 0.6035841107368469, "learning_rate": 0.0005994764205877109, "loss": 2.1626, "step": 1696 }, { "epoch": 0.06, "grad_norm": 0.5562920570373535, "learning_rate": 0.0005994758031570574, "loss": 2.1336, "step": 1697 }, { "epoch": 0.06, "grad_norm": 0.6419579386711121, "learning_rate": 0.0005994751853628844, "loss": 2.1346, "step": 1698 }, { "epoch": 0.06, "grad_norm": 0.6208237409591675, "learning_rate": 0.0005994745672051927, "loss": 2.2307, "step": 1699 }, { "epoch": 0.06, "grad_norm": 0.6230145692825317, "learning_rate": 0.000599473948683983, "loss": 2.1351, "step": 1700 }, { "epoch": 0.06, "grad_norm": 0.6933835744857788, "learning_rate": 0.000599473329799256, "loss": 2.1946, "step": 1701 }, { "epoch": 0.06, "grad_norm": 0.6089404821395874, "learning_rate": 0.0005994727105510125, "loss": 2.2041, "step": 1702 }, { "epoch": 0.06, "grad_norm": 0.5871599316596985, "learning_rate": 0.0005994720909392534, "loss": 2.1646, "step": 1703 }, { "epoch": 0.06, "grad_norm": 0.6205328702926636, "learning_rate": 0.0005994714709639792, "loss": 2.0833, "step": 1704 }, { "epoch": 0.06, "grad_norm": 0.5627039670944214, "learning_rate": 0.0005994708506251908, "loss": 2.0216, "step": 1705 }, { "epoch": 0.06, "grad_norm": 0.5995532870292664, "learning_rate": 0.0005994702299228888, "loss": 2.1776, "step": 1706 }, { "epoch": 0.06, "grad_norm": 0.5876064896583557, "learning_rate": 0.0005994696088570742, "loss": 2.1573, "step": 1707 }, { "epoch": 0.06, "grad_norm": 0.5640360116958618, "learning_rate": 0.0005994689874277475, "loss": 2.2331, "step": 1708 }, { "epoch": 0.06, "grad_norm": 0.6069567799568176, "learning_rate": 0.0005994683656349096, "loss": 2.1854, "step": 1709 }, { "epoch": 0.06, "grad_norm": 0.6173140406608582, "learning_rate": 0.0005994677434785613, "loss": 2.2349, "step": 1710 }, { "epoch": 0.06, "grad_norm": 0.5795217752456665, "learning_rate": 0.0005994671209587032, "loss": 2.1531, "step": 1711 }, { "epoch": 0.06, "grad_norm": 0.5829873085021973, "learning_rate": 0.0005994664980753362, "loss": 2.1498, "step": 1712 }, { "epoch": 0.06, "grad_norm": 0.6100005507469177, "learning_rate": 0.0005994658748284611, "loss": 2.0946, "step": 1713 }, { "epoch": 0.06, "grad_norm": 0.5856852531433105, "learning_rate": 0.0005994652512180783, "loss": 2.1616, "step": 1714 }, { "epoch": 0.06, "grad_norm": 0.5809016227722168, "learning_rate": 0.0005994646272441891, "loss": 2.1421, "step": 1715 }, { "epoch": 0.06, "grad_norm": 0.6045598983764648, "learning_rate": 0.0005994640029067937, "loss": 2.1745, "step": 1716 }, { "epoch": 0.06, "grad_norm": 0.5855668187141418, "learning_rate": 0.0005994633782058933, "loss": 2.267, "step": 1717 }, { "epoch": 0.06, "grad_norm": 0.5752570033073425, "learning_rate": 0.0005994627531414883, "loss": 2.2247, "step": 1718 }, { "epoch": 0.06, "grad_norm": 0.6056281328201294, "learning_rate": 0.0005994621277135799, "loss": 2.1299, "step": 1719 }, { "epoch": 0.06, "grad_norm": 0.5671921372413635, "learning_rate": 0.0005994615019221684, "loss": 2.0862, "step": 1720 }, { "epoch": 0.06, "grad_norm": 0.5862400531768799, "learning_rate": 0.0005994608757672548, "loss": 2.151, "step": 1721 }, { "epoch": 0.06, "grad_norm": 0.5833638310432434, "learning_rate": 0.0005994602492488399, "loss": 2.1318, "step": 1722 }, { "epoch": 0.06, "grad_norm": 0.569663941860199, "learning_rate": 0.0005994596223669243, "loss": 2.074, "step": 1723 }, { "epoch": 0.06, "grad_norm": 0.6006855368614197, "learning_rate": 0.0005994589951215088, "loss": 2.1334, "step": 1724 }, { "epoch": 0.06, "grad_norm": 0.6258612275123596, "learning_rate": 0.0005994583675125943, "loss": 2.2368, "step": 1725 }, { "epoch": 0.06, "grad_norm": 0.6566634774208069, "learning_rate": 0.0005994577395401814, "loss": 2.1562, "step": 1726 }, { "epoch": 0.06, "grad_norm": 0.590954065322876, "learning_rate": 0.000599457111204271, "loss": 2.1988, "step": 1727 }, { "epoch": 0.06, "grad_norm": 0.623468279838562, "learning_rate": 0.0005994564825048638, "loss": 2.1191, "step": 1728 }, { "epoch": 0.06, "grad_norm": 0.5958022475242615, "learning_rate": 0.0005994558534419604, "loss": 2.1444, "step": 1729 }, { "epoch": 0.06, "grad_norm": 0.5654726624488831, "learning_rate": 0.0005994552240155619, "loss": 2.1987, "step": 1730 }, { "epoch": 0.06, "grad_norm": 0.6020777225494385, "learning_rate": 0.0005994545942256688, "loss": 2.1522, "step": 1731 }, { "epoch": 0.06, "grad_norm": 0.5845451951026917, "learning_rate": 0.0005994539640722819, "loss": 2.0897, "step": 1732 }, { "epoch": 0.06, "grad_norm": 0.5726317167282104, "learning_rate": 0.0005994533335554021, "loss": 2.1099, "step": 1733 }, { "epoch": 0.06, "grad_norm": 0.5972434878349304, "learning_rate": 0.00059945270267503, "loss": 2.1237, "step": 1734 }, { "epoch": 0.06, "grad_norm": 0.5875319838523865, "learning_rate": 0.0005994520714311664, "loss": 2.1837, "step": 1735 }, { "epoch": 0.06, "grad_norm": 0.582582414150238, "learning_rate": 0.0005994514398238122, "loss": 2.1948, "step": 1736 }, { "epoch": 0.06, "grad_norm": 0.5690770149230957, "learning_rate": 0.0005994508078529681, "loss": 2.1033, "step": 1737 }, { "epoch": 0.06, "grad_norm": 0.5798617601394653, "learning_rate": 0.0005994501755186347, "loss": 2.1242, "step": 1738 }, { "epoch": 0.06, "grad_norm": 0.609284520149231, "learning_rate": 0.0005994495428208131, "loss": 2.123, "step": 1739 }, { "epoch": 0.06, "grad_norm": 0.57351154088974, "learning_rate": 0.0005994489097595038, "loss": 2.1197, "step": 1740 }, { "epoch": 0.06, "grad_norm": 0.6032919883728027, "learning_rate": 0.0005994482763347075, "loss": 2.1572, "step": 1741 }, { "epoch": 0.06, "grad_norm": 0.6234201192855835, "learning_rate": 0.0005994476425464253, "loss": 2.1307, "step": 1742 }, { "epoch": 0.06, "grad_norm": 0.6432271599769592, "learning_rate": 0.0005994470083946577, "loss": 2.1981, "step": 1743 }, { "epoch": 0.06, "grad_norm": 0.6046438217163086, "learning_rate": 0.0005994463738794056, "loss": 2.2849, "step": 1744 }, { "epoch": 0.06, "grad_norm": 0.6127291321754456, "learning_rate": 0.0005994457390006698, "loss": 2.158, "step": 1745 }, { "epoch": 0.06, "grad_norm": 0.5844618678092957, "learning_rate": 0.0005994451037584508, "loss": 2.1449, "step": 1746 }, { "epoch": 0.06, "grad_norm": 0.6014270782470703, "learning_rate": 0.0005994444681527496, "loss": 2.2249, "step": 1747 }, { "epoch": 0.06, "grad_norm": 0.556140661239624, "learning_rate": 0.0005994438321835671, "loss": 2.1901, "step": 1748 }, { "epoch": 0.06, "grad_norm": 0.5797984600067139, "learning_rate": 0.0005994431958509038, "loss": 2.2055, "step": 1749 }, { "epoch": 0.06, "grad_norm": 0.5793781876564026, "learning_rate": 0.0005994425591547606, "loss": 2.0753, "step": 1750 }, { "epoch": 0.06, "grad_norm": 0.5841580033302307, "learning_rate": 0.0005994419220951382, "loss": 2.1545, "step": 1751 }, { "epoch": 0.06, "grad_norm": 0.5815284848213196, "learning_rate": 0.0005994412846720374, "loss": 2.1966, "step": 1752 }, { "epoch": 0.06, "grad_norm": 0.5617448687553406, "learning_rate": 0.0005994406468854592, "loss": 2.1241, "step": 1753 }, { "epoch": 0.06, "grad_norm": 0.562745988368988, "learning_rate": 0.000599440008735404, "loss": 2.223, "step": 1754 }, { "epoch": 0.06, "grad_norm": 0.5880846381187439, "learning_rate": 0.0005994393702218728, "loss": 2.1425, "step": 1755 }, { "epoch": 0.06, "grad_norm": 0.5655257105827332, "learning_rate": 0.0005994387313448663, "loss": 2.1758, "step": 1756 }, { "epoch": 0.06, "grad_norm": 0.6423119902610779, "learning_rate": 0.0005994380921043853, "loss": 2.1995, "step": 1757 }, { "epoch": 0.06, "grad_norm": 0.5895735621452332, "learning_rate": 0.0005994374525004305, "loss": 2.171, "step": 1758 }, { "epoch": 0.06, "grad_norm": 0.5902349352836609, "learning_rate": 0.000599436812533003, "loss": 2.1792, "step": 1759 }, { "epoch": 0.06, "grad_norm": 0.5942450761795044, "learning_rate": 0.0005994361722021031, "loss": 2.0889, "step": 1760 }, { "epoch": 0.06, "grad_norm": 0.5814570784568787, "learning_rate": 0.000599435531507732, "loss": 2.1939, "step": 1761 }, { "epoch": 0.06, "grad_norm": 0.5702030062675476, "learning_rate": 0.0005994348904498901, "loss": 2.0871, "step": 1762 }, { "epoch": 0.06, "grad_norm": 0.5864707231521606, "learning_rate": 0.0005994342490285784, "loss": 2.2153, "step": 1763 }, { "epoch": 0.06, "grad_norm": 0.5946178436279297, "learning_rate": 0.0005994336072437977, "loss": 2.1106, "step": 1764 }, { "epoch": 0.06, "grad_norm": 0.5768965482711792, "learning_rate": 0.0005994329650955486, "loss": 2.2012, "step": 1765 }, { "epoch": 0.06, "grad_norm": 0.5613071918487549, "learning_rate": 0.0005994323225838322, "loss": 2.114, "step": 1766 }, { "epoch": 0.06, "grad_norm": 0.5708720684051514, "learning_rate": 0.0005994316797086489, "loss": 2.2182, "step": 1767 }, { "epoch": 0.06, "grad_norm": 0.5724343061447144, "learning_rate": 0.0005994310364699998, "loss": 2.1571, "step": 1768 }, { "epoch": 0.06, "grad_norm": 0.5933946967124939, "learning_rate": 0.0005994303928678854, "loss": 2.0824, "step": 1769 }, { "epoch": 0.06, "grad_norm": 0.5973859429359436, "learning_rate": 0.0005994297489023068, "loss": 2.1571, "step": 1770 }, { "epoch": 0.06, "grad_norm": 0.5684562921524048, "learning_rate": 0.0005994291045732644, "loss": 2.0798, "step": 1771 }, { "epoch": 0.06, "grad_norm": 0.6105960607528687, "learning_rate": 0.0005994284598807593, "loss": 2.1416, "step": 1772 }, { "epoch": 0.06, "grad_norm": 0.6308033466339111, "learning_rate": 0.0005994278148247922, "loss": 2.1498, "step": 1773 }, { "epoch": 0.06, "grad_norm": 0.5883288979530334, "learning_rate": 0.0005994271694053638, "loss": 2.0985, "step": 1774 }, { "epoch": 0.06, "grad_norm": 0.5739120841026306, "learning_rate": 0.0005994265236224749, "loss": 2.2108, "step": 1775 }, { "epoch": 0.06, "grad_norm": 0.5924502611160278, "learning_rate": 0.0005994258774761264, "loss": 2.1518, "step": 1776 }, { "epoch": 0.06, "grad_norm": 0.591280996799469, "learning_rate": 0.0005994252309663189, "loss": 2.1975, "step": 1777 }, { "epoch": 0.06, "grad_norm": 0.574096143245697, "learning_rate": 0.0005994245840930533, "loss": 2.1764, "step": 1778 }, { "epoch": 0.06, "grad_norm": 0.5862751603126526, "learning_rate": 0.0005994239368563304, "loss": 2.239, "step": 1779 }, { "epoch": 0.06, "grad_norm": 0.5797544717788696, "learning_rate": 0.000599423289256151, "loss": 2.1409, "step": 1780 }, { "epoch": 0.06, "grad_norm": 0.5991425514221191, "learning_rate": 0.0005994226412925159, "loss": 2.1299, "step": 1781 }, { "epoch": 0.06, "grad_norm": 0.5818052291870117, "learning_rate": 0.0005994219929654258, "loss": 2.1249, "step": 1782 }, { "epoch": 0.06, "grad_norm": 0.5862244963645935, "learning_rate": 0.0005994213442748814, "loss": 2.1612, "step": 1783 }, { "epoch": 0.06, "grad_norm": 0.5954003930091858, "learning_rate": 0.0005994206952208837, "loss": 2.1353, "step": 1784 }, { "epoch": 0.06, "grad_norm": 0.5920453667640686, "learning_rate": 0.0005994200458034333, "loss": 2.0379, "step": 1785 }, { "epoch": 0.06, "grad_norm": 0.6042633056640625, "learning_rate": 0.0005994193960225312, "loss": 2.1053, "step": 1786 }, { "epoch": 0.06, "grad_norm": 0.5598840117454529, "learning_rate": 0.000599418745878178, "loss": 2.1555, "step": 1787 }, { "epoch": 0.06, "grad_norm": 0.5765354037284851, "learning_rate": 0.0005994180953703747, "loss": 2.1939, "step": 1788 }, { "epoch": 0.06, "grad_norm": 0.5920877456665039, "learning_rate": 0.0005994174444991217, "loss": 2.0758, "step": 1789 }, { "epoch": 0.06, "grad_norm": 0.5803118944168091, "learning_rate": 0.0005994167932644203, "loss": 2.1901, "step": 1790 }, { "epoch": 0.06, "grad_norm": 0.5702794790267944, "learning_rate": 0.0005994161416662708, "loss": 2.135, "step": 1791 }, { "epoch": 0.06, "grad_norm": 0.5726877450942993, "learning_rate": 0.0005994154897046744, "loss": 2.0986, "step": 1792 }, { "epoch": 0.06, "grad_norm": 0.568137526512146, "learning_rate": 0.0005994148373796316, "loss": 2.2275, "step": 1793 }, { "epoch": 0.06, "grad_norm": 0.5885793566703796, "learning_rate": 0.0005994141846911434, "loss": 2.1279, "step": 1794 }, { "epoch": 0.06, "grad_norm": 0.5908080339431763, "learning_rate": 0.0005994135316392103, "loss": 2.1445, "step": 1795 }, { "epoch": 0.06, "grad_norm": 0.574583113193512, "learning_rate": 0.0005994128782238335, "loss": 2.1571, "step": 1796 }, { "epoch": 0.06, "grad_norm": 0.5700152516365051, "learning_rate": 0.0005994122244450136, "loss": 2.1577, "step": 1797 }, { "epoch": 0.06, "grad_norm": 0.6081823706626892, "learning_rate": 0.0005994115703027512, "loss": 2.1453, "step": 1798 }, { "epoch": 0.06, "grad_norm": 0.5714020133018494, "learning_rate": 0.0005994109157970473, "loss": 2.1516, "step": 1799 }, { "epoch": 0.06, "grad_norm": 0.6334883570671082, "learning_rate": 0.0005994102609279029, "loss": 2.1672, "step": 1800 }, { "epoch": 0.06, "grad_norm": 0.5810452103614807, "learning_rate": 0.0005994096056953182, "loss": 2.1489, "step": 1801 }, { "epoch": 0.06, "grad_norm": 0.5789263844490051, "learning_rate": 0.0005994089500992947, "loss": 2.0997, "step": 1802 }, { "epoch": 0.06, "grad_norm": 0.5734512805938721, "learning_rate": 0.0005994082941398326, "loss": 2.0673, "step": 1803 }, { "epoch": 0.06, "grad_norm": 0.5854091644287109, "learning_rate": 0.0005994076378169331, "loss": 2.172, "step": 1804 }, { "epoch": 0.06, "grad_norm": 0.5813238024711609, "learning_rate": 0.0005994069811305967, "loss": 2.153, "step": 1805 }, { "epoch": 0.06, "grad_norm": 0.5687317252159119, "learning_rate": 0.0005994063240808244, "loss": 2.1899, "step": 1806 }, { "epoch": 0.06, "grad_norm": 0.5756397843360901, "learning_rate": 0.000599405666667617, "loss": 2.1203, "step": 1807 }, { "epoch": 0.06, "grad_norm": 0.5958202481269836, "learning_rate": 0.0005994050088909751, "loss": 2.1444, "step": 1808 }, { "epoch": 0.06, "grad_norm": 0.5727547407150269, "learning_rate": 0.0005994043507508998, "loss": 2.2085, "step": 1809 }, { "epoch": 0.06, "grad_norm": 0.578623354434967, "learning_rate": 0.0005994036922473916, "loss": 2.1251, "step": 1810 }, { "epoch": 0.06, "grad_norm": 0.583746075630188, "learning_rate": 0.0005994030333804515, "loss": 2.166, "step": 1811 }, { "epoch": 0.06, "grad_norm": 0.6291376352310181, "learning_rate": 0.0005994023741500802, "loss": 2.0793, "step": 1812 }, { "epoch": 0.06, "grad_norm": 0.6113989949226379, "learning_rate": 0.0005994017145562786, "loss": 2.1471, "step": 1813 }, { "epoch": 0.06, "grad_norm": 0.5654301047325134, "learning_rate": 0.0005994010545990473, "loss": 2.218, "step": 1814 }, { "epoch": 0.06, "grad_norm": 0.6577976942062378, "learning_rate": 0.0005994003942783874, "loss": 2.126, "step": 1815 }, { "epoch": 0.06, "grad_norm": 0.5627081990242004, "learning_rate": 0.0005993997335942995, "loss": 2.0595, "step": 1816 }, { "epoch": 0.06, "grad_norm": 0.582006573677063, "learning_rate": 0.0005993990725467844, "loss": 2.0547, "step": 1817 }, { "epoch": 0.06, "grad_norm": 0.5864017605781555, "learning_rate": 0.0005993984111358428, "loss": 2.1485, "step": 1818 }, { "epoch": 0.06, "grad_norm": 0.6216812133789062, "learning_rate": 0.0005993977493614759, "loss": 2.168, "step": 1819 }, { "epoch": 0.06, "grad_norm": 0.59752357006073, "learning_rate": 0.0005993970872236841, "loss": 2.1781, "step": 1820 }, { "epoch": 0.06, "grad_norm": 0.7039981484413147, "learning_rate": 0.0005993964247224684, "loss": 2.1771, "step": 1821 }, { "epoch": 0.06, "grad_norm": 0.6033048033714294, "learning_rate": 0.0005993957618578295, "loss": 2.1353, "step": 1822 }, { "epoch": 0.06, "grad_norm": 0.5611200928688049, "learning_rate": 0.0005993950986297684, "loss": 2.1253, "step": 1823 }, { "epoch": 0.06, "grad_norm": 0.6883392333984375, "learning_rate": 0.0005993944350382855, "loss": 2.1371, "step": 1824 }, { "epoch": 0.06, "grad_norm": 0.5631914734840393, "learning_rate": 0.0005993937710833821, "loss": 2.1375, "step": 1825 }, { "epoch": 0.06, "grad_norm": 0.6072425246238708, "learning_rate": 0.0005993931067650586, "loss": 2.0728, "step": 1826 }, { "epoch": 0.06, "grad_norm": 0.5899387001991272, "learning_rate": 0.0005993924420833162, "loss": 2.1511, "step": 1827 }, { "epoch": 0.06, "grad_norm": 0.5876649618148804, "learning_rate": 0.0005993917770381553, "loss": 2.1009, "step": 1828 }, { "epoch": 0.06, "grad_norm": 0.574364185333252, "learning_rate": 0.000599391111629577, "loss": 2.1199, "step": 1829 }, { "epoch": 0.06, "grad_norm": 0.6314756870269775, "learning_rate": 0.000599390445857582, "loss": 2.1784, "step": 1830 }, { "epoch": 0.06, "grad_norm": 0.5766462087631226, "learning_rate": 0.000599389779722171, "loss": 2.1151, "step": 1831 }, { "epoch": 0.06, "grad_norm": 0.5610900521278381, "learning_rate": 0.0005993891132233451, "loss": 2.1597, "step": 1832 }, { "epoch": 0.06, "grad_norm": 0.6344895958900452, "learning_rate": 0.0005993884463611047, "loss": 2.1896, "step": 1833 }, { "epoch": 0.06, "grad_norm": 0.6050699949264526, "learning_rate": 0.000599387779135451, "loss": 2.1596, "step": 1834 }, { "epoch": 0.06, "grad_norm": 0.6200644969940186, "learning_rate": 0.0005993871115463846, "loss": 2.1799, "step": 1835 }, { "epoch": 0.06, "grad_norm": 0.5887742638587952, "learning_rate": 0.0005993864435939064, "loss": 2.098, "step": 1836 }, { "epoch": 0.06, "grad_norm": 0.5748148560523987, "learning_rate": 0.0005993857752780172, "loss": 2.1518, "step": 1837 }, { "epoch": 0.06, "grad_norm": 0.6037203073501587, "learning_rate": 0.0005993851065987177, "loss": 2.1445, "step": 1838 }, { "epoch": 0.06, "grad_norm": 0.5871423482894897, "learning_rate": 0.0005993844375560088, "loss": 2.1992, "step": 1839 }, { "epoch": 0.06, "grad_norm": 0.5777676701545715, "learning_rate": 0.0005993837681498913, "loss": 2.2144, "step": 1840 }, { "epoch": 0.06, "grad_norm": 0.5626624226570129, "learning_rate": 0.0005993830983803661, "loss": 2.1018, "step": 1841 }, { "epoch": 0.06, "grad_norm": 0.5843879580497742, "learning_rate": 0.0005993824282474338, "loss": 2.1226, "step": 1842 }, { "epoch": 0.06, "grad_norm": 0.5936573147773743, "learning_rate": 0.0005993817577510954, "loss": 2.18, "step": 1843 }, { "epoch": 0.06, "grad_norm": 0.5928753018379211, "learning_rate": 0.0005993810868913517, "loss": 2.1107, "step": 1844 }, { "epoch": 0.06, "grad_norm": 0.5685057640075684, "learning_rate": 0.0005993804156682035, "loss": 2.1207, "step": 1845 }, { "epoch": 0.06, "grad_norm": 0.5863540172576904, "learning_rate": 0.0005993797440816515, "loss": 2.118, "step": 1846 }, { "epoch": 0.06, "grad_norm": 0.6087128520011902, "learning_rate": 0.0005993790721316966, "loss": 2.1196, "step": 1847 }, { "epoch": 0.06, "grad_norm": 0.5794655680656433, "learning_rate": 0.0005993783998183398, "loss": 2.1448, "step": 1848 }, { "epoch": 0.06, "grad_norm": 0.5601152777671814, "learning_rate": 0.0005993777271415814, "loss": 2.0669, "step": 1849 }, { "epoch": 0.06, "grad_norm": 0.5625869631767273, "learning_rate": 0.0005993770541014229, "loss": 2.0766, "step": 1850 }, { "epoch": 0.06, "grad_norm": 0.5749130845069885, "learning_rate": 0.0005993763806978646, "loss": 2.0587, "step": 1851 }, { "epoch": 0.06, "grad_norm": 0.5788476467132568, "learning_rate": 0.0005993757069309074, "loss": 2.2119, "step": 1852 }, { "epoch": 0.06, "grad_norm": 0.5580559372901917, "learning_rate": 0.0005993750328005524, "loss": 2.1447, "step": 1853 }, { "epoch": 0.06, "grad_norm": 0.5968130230903625, "learning_rate": 0.0005993743583068001, "loss": 2.1933, "step": 1854 }, { "epoch": 0.06, "grad_norm": 0.5892594456672668, "learning_rate": 0.0005993736834496514, "loss": 2.1396, "step": 1855 }, { "epoch": 0.06, "grad_norm": 0.5824000239372253, "learning_rate": 0.0005993730082291072, "loss": 2.1064, "step": 1856 }, { "epoch": 0.06, "grad_norm": 0.5615522265434265, "learning_rate": 0.0005993723326451683, "loss": 2.2068, "step": 1857 }, { "epoch": 0.06, "grad_norm": 0.5746825933456421, "learning_rate": 0.0005993716566978356, "loss": 2.1102, "step": 1858 }, { "epoch": 0.06, "grad_norm": 0.5843670964241028, "learning_rate": 0.0005993709803871096, "loss": 2.1383, "step": 1859 }, { "epoch": 0.06, "grad_norm": 0.5795649886131287, "learning_rate": 0.0005993703037129915, "loss": 2.167, "step": 1860 }, { "epoch": 0.06, "grad_norm": 0.5955684185028076, "learning_rate": 0.0005993696266754819, "loss": 2.1673, "step": 1861 }, { "epoch": 0.06, "grad_norm": 0.5629080533981323, "learning_rate": 0.0005993689492745817, "loss": 2.1554, "step": 1862 }, { "epoch": 0.06, "grad_norm": 0.5896292924880981, "learning_rate": 0.0005993682715102917, "loss": 2.1109, "step": 1863 }, { "epoch": 0.06, "grad_norm": 0.5725752711296082, "learning_rate": 0.0005993675933826127, "loss": 2.1506, "step": 1864 }, { "epoch": 0.06, "grad_norm": 0.5603659749031067, "learning_rate": 0.0005993669148915456, "loss": 2.0643, "step": 1865 }, { "epoch": 0.06, "grad_norm": 0.5704346895217896, "learning_rate": 0.0005993662360370912, "loss": 2.1525, "step": 1866 }, { "epoch": 0.06, "grad_norm": 0.5630252957344055, "learning_rate": 0.0005993655568192502, "loss": 2.0472, "step": 1867 }, { "epoch": 0.06, "grad_norm": 0.5896989703178406, "learning_rate": 0.0005993648772380236, "loss": 2.2581, "step": 1868 }, { "epoch": 0.06, "grad_norm": 0.5556736588478088, "learning_rate": 0.0005993641972934121, "loss": 2.1732, "step": 1869 }, { "epoch": 0.06, "grad_norm": 0.5994260907173157, "learning_rate": 0.0005993635169854166, "loss": 2.0827, "step": 1870 }, { "epoch": 0.06, "grad_norm": 0.59273761510849, "learning_rate": 0.000599362836314038, "loss": 2.1885, "step": 1871 }, { "epoch": 0.06, "grad_norm": 0.565450131893158, "learning_rate": 0.0005993621552792768, "loss": 2.0935, "step": 1872 }, { "epoch": 0.06, "grad_norm": 0.5722746253013611, "learning_rate": 0.0005993614738811342, "loss": 2.1487, "step": 1873 }, { "epoch": 0.06, "grad_norm": 0.5882077813148499, "learning_rate": 0.0005993607921196109, "loss": 2.239, "step": 1874 }, { "epoch": 0.06, "grad_norm": 0.5715191960334778, "learning_rate": 0.0005993601099947076, "loss": 2.1339, "step": 1875 }, { "epoch": 0.06, "grad_norm": 0.5824072360992432, "learning_rate": 0.0005993594275064254, "loss": 2.1988, "step": 1876 }, { "epoch": 0.06, "grad_norm": 0.5751832127571106, "learning_rate": 0.0005993587446547647, "loss": 2.1618, "step": 1877 }, { "epoch": 0.06, "grad_norm": 0.5694040060043335, "learning_rate": 0.0005993580614397269, "loss": 2.1644, "step": 1878 }, { "epoch": 0.06, "grad_norm": 0.6102690696716309, "learning_rate": 0.0005993573778613123, "loss": 2.1174, "step": 1879 }, { "epoch": 0.06, "grad_norm": 0.5910534858703613, "learning_rate": 0.0005993566939195221, "loss": 2.1309, "step": 1880 }, { "epoch": 0.06, "grad_norm": 0.5825152397155762, "learning_rate": 0.0005993560096143569, "loss": 2.1135, "step": 1881 }, { "epoch": 0.06, "grad_norm": 0.5770636200904846, "learning_rate": 0.0005993553249458175, "loss": 2.1884, "step": 1882 }, { "epoch": 0.06, "grad_norm": 0.6194322109222412, "learning_rate": 0.000599354639913905, "loss": 2.143, "step": 1883 }, { "epoch": 0.06, "grad_norm": 0.5730317831039429, "learning_rate": 0.0005993539545186201, "loss": 2.1175, "step": 1884 }, { "epoch": 0.06, "grad_norm": 0.5742388367652893, "learning_rate": 0.0005993532687599636, "loss": 2.1095, "step": 1885 }, { "epoch": 0.06, "grad_norm": 0.6203592419624329, "learning_rate": 0.0005993525826379363, "loss": 2.1684, "step": 1886 }, { "epoch": 0.06, "grad_norm": 0.5616888403892517, "learning_rate": 0.000599351896152539, "loss": 2.0874, "step": 1887 }, { "epoch": 0.06, "grad_norm": 0.6305850744247437, "learning_rate": 0.0005993512093037727, "loss": 2.1166, "step": 1888 }, { "epoch": 0.06, "grad_norm": 0.6206833720207214, "learning_rate": 0.0005993505220916381, "loss": 2.1352, "step": 1889 }, { "epoch": 0.06, "grad_norm": 0.5702216625213623, "learning_rate": 0.0005993498345161361, "loss": 2.1184, "step": 1890 }, { "epoch": 0.06, "grad_norm": 0.6058657765388489, "learning_rate": 0.0005993491465772675, "loss": 2.1136, "step": 1891 }, { "epoch": 0.06, "grad_norm": 0.6162940263748169, "learning_rate": 0.000599348458275033, "loss": 2.1106, "step": 1892 }, { "epoch": 0.06, "grad_norm": 0.5714853405952454, "learning_rate": 0.0005993477696094338, "loss": 2.1042, "step": 1893 }, { "epoch": 0.06, "grad_norm": 0.5771575570106506, "learning_rate": 0.0005993470805804703, "loss": 2.1455, "step": 1894 }, { "epoch": 0.06, "grad_norm": 0.5968284606933594, "learning_rate": 0.0005993463911881438, "loss": 2.1293, "step": 1895 }, { "epoch": 0.06, "grad_norm": 0.5806162357330322, "learning_rate": 0.0005993457014324547, "loss": 2.1594, "step": 1896 }, { "epoch": 0.06, "grad_norm": 0.5714269876480103, "learning_rate": 0.0005993450113134041, "loss": 2.0869, "step": 1897 }, { "epoch": 0.06, "grad_norm": 0.592329740524292, "learning_rate": 0.0005993443208309928, "loss": 2.234, "step": 1898 }, { "epoch": 0.06, "grad_norm": 0.6323411464691162, "learning_rate": 0.0005993436299852214, "loss": 2.1452, "step": 1899 }, { "epoch": 0.06, "grad_norm": 0.5881783962249756, "learning_rate": 0.0005993429387760913, "loss": 2.1117, "step": 1900 }, { "epoch": 0.06, "grad_norm": 0.5723445415496826, "learning_rate": 0.0005993422472036026, "loss": 2.0777, "step": 1901 }, { "epoch": 0.06, "grad_norm": 0.5626874566078186, "learning_rate": 0.0005993415552677568, "loss": 2.138, "step": 1902 }, { "epoch": 0.06, "grad_norm": 0.5963892936706543, "learning_rate": 0.0005993408629685543, "loss": 2.1741, "step": 1903 }, { "epoch": 0.06, "grad_norm": 0.5750165581703186, "learning_rate": 0.0005993401703059962, "loss": 2.1226, "step": 1904 }, { "epoch": 0.06, "grad_norm": 0.5674557089805603, "learning_rate": 0.0005993394772800831, "loss": 2.1131, "step": 1905 }, { "epoch": 0.06, "grad_norm": 0.5994208455085754, "learning_rate": 0.0005993387838908161, "loss": 2.2118, "step": 1906 }, { "epoch": 0.06, "grad_norm": 0.5820030570030212, "learning_rate": 0.0005993380901381959, "loss": 2.1704, "step": 1907 }, { "epoch": 0.06, "grad_norm": 0.6573314070701599, "learning_rate": 0.0005993373960222233, "loss": 2.2592, "step": 1908 }, { "epoch": 0.06, "grad_norm": 0.6089707612991333, "learning_rate": 0.0005993367015428992, "loss": 2.0918, "step": 1909 }, { "epoch": 0.06, "grad_norm": 0.5772912502288818, "learning_rate": 0.0005993360067002246, "loss": 2.1543, "step": 1910 }, { "epoch": 0.06, "grad_norm": 0.6231461763381958, "learning_rate": 0.0005993353114942001, "loss": 2.2355, "step": 1911 }, { "epoch": 0.06, "grad_norm": 0.5849087834358215, "learning_rate": 0.0005993346159248266, "loss": 2.126, "step": 1912 }, { "epoch": 0.06, "grad_norm": 0.6220138669013977, "learning_rate": 0.000599333919992105, "loss": 2.254, "step": 1913 }, { "epoch": 0.06, "grad_norm": 0.5616773962974548, "learning_rate": 0.0005993332236960362, "loss": 2.2005, "step": 1914 }, { "epoch": 0.06, "grad_norm": 0.5775344967842102, "learning_rate": 0.0005993325270366209, "loss": 2.1786, "step": 1915 }, { "epoch": 0.06, "grad_norm": 0.5865954160690308, "learning_rate": 0.00059933183001386, "loss": 2.1286, "step": 1916 }, { "epoch": 0.06, "grad_norm": 0.6173359155654907, "learning_rate": 0.0005993311326277544, "loss": 2.132, "step": 1917 }, { "epoch": 0.06, "grad_norm": 0.5943782925605774, "learning_rate": 0.0005993304348783049, "loss": 2.1373, "step": 1918 }, { "epoch": 0.06, "grad_norm": 0.6037482619285583, "learning_rate": 0.0005993297367655123, "loss": 2.0795, "step": 1919 }, { "epoch": 0.06, "grad_norm": 0.6092273592948914, "learning_rate": 0.0005993290382893776, "loss": 2.1283, "step": 1920 }, { "epoch": 0.06, "grad_norm": 0.577061116695404, "learning_rate": 0.0005993283394499016, "loss": 2.119, "step": 1921 }, { "epoch": 0.06, "grad_norm": 0.5930095314979553, "learning_rate": 0.0005993276402470849, "loss": 2.1007, "step": 1922 }, { "epoch": 0.06, "grad_norm": 0.5568046569824219, "learning_rate": 0.0005993269406809287, "loss": 2.1489, "step": 1923 }, { "epoch": 0.06, "grad_norm": 0.5728887319564819, "learning_rate": 0.0005993262407514336, "loss": 2.0966, "step": 1924 }, { "epoch": 0.06, "grad_norm": 0.5810934901237488, "learning_rate": 0.0005993255404586007, "loss": 2.0258, "step": 1925 }, { "epoch": 0.06, "grad_norm": 0.5679649710655212, "learning_rate": 0.0005993248398024305, "loss": 2.1558, "step": 1926 }, { "epoch": 0.06, "grad_norm": 0.5719403624534607, "learning_rate": 0.0005993241387829242, "loss": 2.1398, "step": 1927 }, { "epoch": 0.06, "grad_norm": 0.5786962509155273, "learning_rate": 0.0005993234374000824, "loss": 2.1071, "step": 1928 }, { "epoch": 0.06, "grad_norm": 0.5467103719711304, "learning_rate": 0.0005993227356539062, "loss": 2.1036, "step": 1929 }, { "epoch": 0.06, "grad_norm": 0.5765705108642578, "learning_rate": 0.0005993220335443962, "loss": 2.1841, "step": 1930 }, { "epoch": 0.06, "grad_norm": 0.5854653716087341, "learning_rate": 0.0005993213310715533, "loss": 2.1031, "step": 1931 }, { "epoch": 0.06, "grad_norm": 0.5715008974075317, "learning_rate": 0.0005993206282353785, "loss": 2.1686, "step": 1932 }, { "epoch": 0.06, "grad_norm": 0.5603537559509277, "learning_rate": 0.0005993199250358724, "loss": 2.1479, "step": 1933 }, { "epoch": 0.06, "grad_norm": 0.5732718110084534, "learning_rate": 0.0005993192214730362, "loss": 2.2001, "step": 1934 }, { "epoch": 0.06, "grad_norm": 0.5537631511688232, "learning_rate": 0.0005993185175468705, "loss": 2.1254, "step": 1935 }, { "epoch": 0.06, "grad_norm": 0.5883611440658569, "learning_rate": 0.0005993178132573762, "loss": 2.1439, "step": 1936 }, { "epoch": 0.06, "grad_norm": 0.583591103553772, "learning_rate": 0.0005993171086045543, "loss": 2.2025, "step": 1937 }, { "epoch": 0.06, "grad_norm": 0.5720039010047913, "learning_rate": 0.0005993164035884053, "loss": 2.1761, "step": 1938 }, { "epoch": 0.06, "grad_norm": 0.5670661330223083, "learning_rate": 0.0005993156982089306, "loss": 2.0762, "step": 1939 }, { "epoch": 0.06, "grad_norm": 0.5564155578613281, "learning_rate": 0.0005993149924661304, "loss": 2.0919, "step": 1940 }, { "epoch": 0.06, "grad_norm": 0.57338547706604, "learning_rate": 0.0005993142863600061, "loss": 2.0387, "step": 1941 }, { "epoch": 0.06, "grad_norm": 0.5542970299720764, "learning_rate": 0.0005993135798905584, "loss": 2.1442, "step": 1942 }, { "epoch": 0.06, "grad_norm": 0.5769582390785217, "learning_rate": 0.000599312873057788, "loss": 2.0833, "step": 1943 }, { "epoch": 0.06, "grad_norm": 0.5537224411964417, "learning_rate": 0.000599312165861696, "loss": 2.0932, "step": 1944 }, { "epoch": 0.06, "grad_norm": 0.5683212280273438, "learning_rate": 0.000599311458302283, "loss": 2.0722, "step": 1945 }, { "epoch": 0.06, "grad_norm": 0.5593692660331726, "learning_rate": 0.0005993107503795501, "loss": 2.087, "step": 1946 }, { "epoch": 0.06, "grad_norm": 0.6254794597625732, "learning_rate": 0.000599310042093498, "loss": 2.1214, "step": 1947 }, { "epoch": 0.06, "grad_norm": 0.5768857598304749, "learning_rate": 0.0005993093334441276, "loss": 2.1091, "step": 1948 }, { "epoch": 0.06, "grad_norm": 0.6216500997543335, "learning_rate": 0.0005993086244314397, "loss": 2.1911, "step": 1949 }, { "epoch": 0.06, "grad_norm": 0.6118332147598267, "learning_rate": 0.0005993079150554353, "loss": 2.239, "step": 1950 }, { "epoch": 0.06, "grad_norm": 0.6081624031066895, "learning_rate": 0.0005993072053161152, "loss": 2.1181, "step": 1951 }, { "epoch": 0.06, "grad_norm": 0.6516484618186951, "learning_rate": 0.0005993064952134803, "loss": 2.2003, "step": 1952 }, { "epoch": 0.06, "grad_norm": 0.5811727643013, "learning_rate": 0.0005993057847475312, "loss": 2.0629, "step": 1953 }, { "epoch": 0.07, "grad_norm": 0.6422243118286133, "learning_rate": 0.0005993050739182692, "loss": 2.2232, "step": 1954 }, { "epoch": 0.07, "grad_norm": 0.5856432318687439, "learning_rate": 0.0005993043627256949, "loss": 2.0864, "step": 1955 }, { "epoch": 0.07, "grad_norm": 0.5610438585281372, "learning_rate": 0.0005993036511698092, "loss": 2.1108, "step": 1956 }, { "epoch": 0.07, "grad_norm": 0.59602952003479, "learning_rate": 0.000599302939250613, "loss": 2.0519, "step": 1957 }, { "epoch": 0.07, "grad_norm": 0.6037943363189697, "learning_rate": 0.0005993022269681071, "loss": 2.1746, "step": 1958 }, { "epoch": 0.07, "grad_norm": 0.5535017848014832, "learning_rate": 0.0005993015143222924, "loss": 2.0172, "step": 1959 }, { "epoch": 0.07, "grad_norm": 0.6103936433792114, "learning_rate": 0.0005993008013131697, "loss": 2.1872, "step": 1960 }, { "epoch": 0.07, "grad_norm": 0.6426987051963806, "learning_rate": 0.00059930008794074, "loss": 2.137, "step": 1961 }, { "epoch": 0.07, "grad_norm": 0.5710868835449219, "learning_rate": 0.0005992993742050041, "loss": 2.1326, "step": 1962 }, { "epoch": 0.07, "grad_norm": 0.5984310507774353, "learning_rate": 0.0005992986601059628, "loss": 2.0982, "step": 1963 }, { "epoch": 0.07, "grad_norm": 0.5792283415794373, "learning_rate": 0.0005992979456436171, "loss": 2.0658, "step": 1964 }, { "epoch": 0.07, "grad_norm": 0.5815472602844238, "learning_rate": 0.0005992972308179676, "loss": 2.1986, "step": 1965 }, { "epoch": 0.07, "grad_norm": 0.628173828125, "learning_rate": 0.0005992965156290155, "loss": 2.1822, "step": 1966 }, { "epoch": 0.07, "grad_norm": 0.5678161978721619, "learning_rate": 0.0005992958000767616, "loss": 2.1156, "step": 1967 }, { "epoch": 0.07, "grad_norm": 0.5609376430511475, "learning_rate": 0.0005992950841612067, "loss": 2.0809, "step": 1968 }, { "epoch": 0.07, "grad_norm": 0.6089234352111816, "learning_rate": 0.0005992943678823515, "loss": 2.0774, "step": 1969 }, { "epoch": 0.07, "grad_norm": 0.6162856221199036, "learning_rate": 0.0005992936512401972, "loss": 2.2661, "step": 1970 }, { "epoch": 0.07, "grad_norm": 0.5665106177330017, "learning_rate": 0.0005992929342347444, "loss": 2.0916, "step": 1971 }, { "epoch": 0.07, "grad_norm": 0.5722401142120361, "learning_rate": 0.0005992922168659941, "loss": 2.1416, "step": 1972 }, { "epoch": 0.07, "grad_norm": 0.57515549659729, "learning_rate": 0.0005992914991339472, "loss": 2.1383, "step": 1973 }, { "epoch": 0.07, "grad_norm": 0.5624233484268188, "learning_rate": 0.0005992907810386044, "loss": 2.1245, "step": 1974 }, { "epoch": 0.07, "grad_norm": 0.5770498514175415, "learning_rate": 0.0005992900625799669, "loss": 2.137, "step": 1975 }, { "epoch": 0.07, "grad_norm": 0.5784457325935364, "learning_rate": 0.0005992893437580352, "loss": 2.0732, "step": 1976 }, { "epoch": 0.07, "grad_norm": 0.6098623275756836, "learning_rate": 0.0005992886245728104, "loss": 2.0847, "step": 1977 }, { "epoch": 0.07, "grad_norm": 0.5807976126670837, "learning_rate": 0.0005992879050242931, "loss": 2.0658, "step": 1978 }, { "epoch": 0.07, "grad_norm": 0.5945260524749756, "learning_rate": 0.0005992871851124847, "loss": 2.187, "step": 1979 }, { "epoch": 0.07, "grad_norm": 0.5892970561981201, "learning_rate": 0.0005992864648373856, "loss": 2.1338, "step": 1980 }, { "epoch": 0.07, "grad_norm": 0.5733892917633057, "learning_rate": 0.0005992857441989969, "loss": 2.0608, "step": 1981 }, { "epoch": 0.07, "grad_norm": 0.5750699639320374, "learning_rate": 0.0005992850231973194, "loss": 2.0993, "step": 1982 }, { "epoch": 0.07, "grad_norm": 0.5889256596565247, "learning_rate": 0.0005992843018323539, "loss": 2.0736, "step": 1983 }, { "epoch": 0.07, "grad_norm": 0.5756434202194214, "learning_rate": 0.0005992835801041013, "loss": 2.1183, "step": 1984 }, { "epoch": 0.07, "grad_norm": 0.5630039572715759, "learning_rate": 0.0005992828580125626, "loss": 2.0963, "step": 1985 }, { "epoch": 0.07, "grad_norm": 0.5735762119293213, "learning_rate": 0.0005992821355577387, "loss": 2.1312, "step": 1986 }, { "epoch": 0.07, "grad_norm": 0.5542962551116943, "learning_rate": 0.0005992814127396302, "loss": 2.1034, "step": 1987 }, { "epoch": 0.07, "grad_norm": 0.5699405670166016, "learning_rate": 0.0005992806895582383, "loss": 2.1664, "step": 1988 }, { "epoch": 0.07, "grad_norm": 0.5667082667350769, "learning_rate": 0.0005992799660135637, "loss": 2.0796, "step": 1989 }, { "epoch": 0.07, "grad_norm": 0.6169920563697815, "learning_rate": 0.0005992792421056074, "loss": 2.0461, "step": 1990 }, { "epoch": 0.07, "grad_norm": 0.5954445600509644, "learning_rate": 0.0005992785178343701, "loss": 2.077, "step": 1991 }, { "epoch": 0.07, "grad_norm": 0.5691748857498169, "learning_rate": 0.0005992777931998528, "loss": 2.0965, "step": 1992 }, { "epoch": 0.07, "grad_norm": 0.5829474329948425, "learning_rate": 0.0005992770682020564, "loss": 2.197, "step": 1993 }, { "epoch": 0.07, "grad_norm": 0.5851922631263733, "learning_rate": 0.0005992763428409816, "loss": 2.1408, "step": 1994 }, { "epoch": 0.07, "grad_norm": 0.5891180038452148, "learning_rate": 0.0005992756171166295, "loss": 2.1419, "step": 1995 }, { "epoch": 0.07, "grad_norm": 0.5740416646003723, "learning_rate": 0.0005992748910290009, "loss": 2.1112, "step": 1996 }, { "epoch": 0.07, "grad_norm": 0.5754095911979675, "learning_rate": 0.0005992741645780967, "loss": 2.1406, "step": 1997 }, { "epoch": 0.07, "grad_norm": 0.5604499578475952, "learning_rate": 0.0005992734377639178, "loss": 2.1181, "step": 1998 }, { "epoch": 0.07, "grad_norm": 0.5443208813667297, "learning_rate": 0.000599272710586465, "loss": 2.1345, "step": 1999 }, { "epoch": 0.07, "grad_norm": 0.5917264223098755, "learning_rate": 0.0005992719830457392, "loss": 2.1175, "step": 2000 }, { "epoch": 0.07, "grad_norm": 0.5629692673683167, "learning_rate": 0.0005992712551417414, "loss": 2.1117, "step": 2001 }, { "epoch": 0.07, "grad_norm": 0.5721187591552734, "learning_rate": 0.0005992705268744722, "loss": 2.0445, "step": 2002 }, { "epoch": 0.07, "grad_norm": 0.5812460780143738, "learning_rate": 0.0005992697982439328, "loss": 2.1718, "step": 2003 }, { "epoch": 0.07, "grad_norm": 0.5888683795928955, "learning_rate": 0.0005992690692501239, "loss": 2.0927, "step": 2004 }, { "epoch": 0.07, "grad_norm": 0.5461477637290955, "learning_rate": 0.0005992683398930466, "loss": 2.1302, "step": 2005 }, { "epoch": 0.07, "grad_norm": 0.5696757435798645, "learning_rate": 0.0005992676101727014, "loss": 2.0896, "step": 2006 }, { "epoch": 0.07, "grad_norm": 0.5764003992080688, "learning_rate": 0.0005992668800890896, "loss": 2.0795, "step": 2007 }, { "epoch": 0.07, "grad_norm": 0.5693321228027344, "learning_rate": 0.0005992661496422118, "loss": 2.1148, "step": 2008 }, { "epoch": 0.07, "grad_norm": 0.5840818881988525, "learning_rate": 0.000599265418832069, "loss": 2.1489, "step": 2009 }, { "epoch": 0.07, "grad_norm": 0.5655527710914612, "learning_rate": 0.0005992646876586622, "loss": 2.0597, "step": 2010 }, { "epoch": 0.07, "grad_norm": 0.5905212759971619, "learning_rate": 0.0005992639561219921, "loss": 2.1755, "step": 2011 }, { "epoch": 0.07, "grad_norm": 0.5699718594551086, "learning_rate": 0.0005992632242220596, "loss": 2.1592, "step": 2012 }, { "epoch": 0.07, "grad_norm": 0.5782071352005005, "learning_rate": 0.0005992624919588656, "loss": 2.1314, "step": 2013 }, { "epoch": 0.07, "grad_norm": 0.5836007595062256, "learning_rate": 0.0005992617593324111, "loss": 2.2398, "step": 2014 }, { "epoch": 0.07, "grad_norm": 0.5640200972557068, "learning_rate": 0.0005992610263426969, "loss": 2.1501, "step": 2015 }, { "epoch": 0.07, "grad_norm": 0.5895861983299255, "learning_rate": 0.000599260292989724, "loss": 2.1685, "step": 2016 }, { "epoch": 0.07, "grad_norm": 0.5884904861450195, "learning_rate": 0.0005992595592734931, "loss": 2.1375, "step": 2017 }, { "epoch": 0.07, "grad_norm": 0.5867611765861511, "learning_rate": 0.0005992588251940051, "loss": 2.1124, "step": 2018 }, { "epoch": 0.07, "grad_norm": 0.5761075615882874, "learning_rate": 0.0005992580907512612, "loss": 2.2181, "step": 2019 }, { "epoch": 0.07, "grad_norm": 0.5700680017471313, "learning_rate": 0.0005992573559452619, "loss": 2.1767, "step": 2020 }, { "epoch": 0.07, "grad_norm": 0.579440712928772, "learning_rate": 0.0005992566207760083, "loss": 2.185, "step": 2021 }, { "epoch": 0.07, "grad_norm": 0.5597874522209167, "learning_rate": 0.0005992558852435013, "loss": 2.1428, "step": 2022 }, { "epoch": 0.07, "grad_norm": 0.58623868227005, "learning_rate": 0.0005992551493477418, "loss": 2.1875, "step": 2023 }, { "epoch": 0.07, "grad_norm": 0.5829589366912842, "learning_rate": 0.0005992544130887305, "loss": 2.0651, "step": 2024 }, { "epoch": 0.07, "grad_norm": 0.5702818036079407, "learning_rate": 0.0005992536764664685, "loss": 2.1415, "step": 2025 }, { "epoch": 0.07, "grad_norm": 0.5787519216537476, "learning_rate": 0.0005992529394809566, "loss": 2.1805, "step": 2026 }, { "epoch": 0.07, "grad_norm": 0.5489597916603088, "learning_rate": 0.0005992522021321957, "loss": 2.1494, "step": 2027 }, { "epoch": 0.07, "grad_norm": 0.5722507238388062, "learning_rate": 0.0005992514644201868, "loss": 2.1211, "step": 2028 }, { "epoch": 0.07, "grad_norm": 0.5553101301193237, "learning_rate": 0.0005992507263449307, "loss": 2.0937, "step": 2029 }, { "epoch": 0.07, "grad_norm": 0.5840539932250977, "learning_rate": 0.0005992499879064284, "loss": 2.0951, "step": 2030 }, { "epoch": 0.07, "grad_norm": 0.5546048879623413, "learning_rate": 0.0005992492491046806, "loss": 2.1264, "step": 2031 }, { "epoch": 0.07, "grad_norm": 0.5579590201377869, "learning_rate": 0.0005992485099396883, "loss": 2.1208, "step": 2032 }, { "epoch": 0.07, "grad_norm": 0.5602448582649231, "learning_rate": 0.0005992477704114524, "loss": 2.1519, "step": 2033 }, { "epoch": 0.07, "grad_norm": 0.5978329181671143, "learning_rate": 0.0005992470305199738, "loss": 2.2238, "step": 2034 }, { "epoch": 0.07, "grad_norm": 0.5763021111488342, "learning_rate": 0.0005992462902652533, "loss": 2.0914, "step": 2035 }, { "epoch": 0.07, "grad_norm": 0.6115826368331909, "learning_rate": 0.0005992455496472922, "loss": 2.2147, "step": 2036 }, { "epoch": 0.07, "grad_norm": 0.5602735877037048, "learning_rate": 0.0005992448086660908, "loss": 2.1151, "step": 2037 }, { "epoch": 0.07, "grad_norm": 0.5929070115089417, "learning_rate": 0.0005992440673216504, "loss": 2.1517, "step": 2038 }, { "epoch": 0.07, "grad_norm": 0.6213457584381104, "learning_rate": 0.0005992433256139717, "loss": 2.1463, "step": 2039 }, { "epoch": 0.07, "grad_norm": 0.5564094185829163, "learning_rate": 0.0005992425835430558, "loss": 2.1648, "step": 2040 }, { "epoch": 0.07, "grad_norm": 0.5882598757743835, "learning_rate": 0.0005992418411089035, "loss": 2.077, "step": 2041 }, { "epoch": 0.07, "grad_norm": 0.5847582817077637, "learning_rate": 0.0005992410983115158, "loss": 2.1043, "step": 2042 }, { "epoch": 0.07, "grad_norm": 0.5603688955307007, "learning_rate": 0.0005992403551508933, "loss": 2.1388, "step": 2043 }, { "epoch": 0.07, "grad_norm": 0.5654299259185791, "learning_rate": 0.0005992396116270372, "loss": 2.1218, "step": 2044 }, { "epoch": 0.07, "grad_norm": 0.5578925609588623, "learning_rate": 0.0005992388677399481, "loss": 2.104, "step": 2045 }, { "epoch": 0.07, "grad_norm": 0.5605745315551758, "learning_rate": 0.0005992381234896274, "loss": 2.1085, "step": 2046 }, { "epoch": 0.07, "grad_norm": 0.5724736452102661, "learning_rate": 0.0005992373788760756, "loss": 2.1768, "step": 2047 }, { "epoch": 0.07, "grad_norm": 0.5742295980453491, "learning_rate": 0.0005992366338992937, "loss": 2.1522, "step": 2048 }, { "epoch": 0.07, "grad_norm": 0.5670706629753113, "learning_rate": 0.0005992358885592825, "loss": 2.0762, "step": 2049 }, { "epoch": 0.07, "grad_norm": 0.5606759190559387, "learning_rate": 0.0005992351428560432, "loss": 2.1763, "step": 2050 }, { "epoch": 0.07, "grad_norm": 0.5609959959983826, "learning_rate": 0.0005992343967895765, "loss": 2.1507, "step": 2051 }, { "epoch": 0.07, "grad_norm": 0.5721037983894348, "learning_rate": 0.0005992336503598833, "loss": 2.1845, "step": 2052 }, { "epoch": 0.07, "grad_norm": 0.572813093662262, "learning_rate": 0.0005992329035669645, "loss": 2.1479, "step": 2053 }, { "epoch": 0.07, "grad_norm": 0.5688937902450562, "learning_rate": 0.000599232156410821, "loss": 2.1445, "step": 2054 }, { "epoch": 0.07, "grad_norm": 0.5606898665428162, "learning_rate": 0.0005992314088914539, "loss": 2.0628, "step": 2055 }, { "epoch": 0.07, "grad_norm": 0.552900493144989, "learning_rate": 0.0005992306610088639, "loss": 2.088, "step": 2056 }, { "epoch": 0.07, "grad_norm": 0.5909915566444397, "learning_rate": 0.0005992299127630519, "loss": 2.1382, "step": 2057 }, { "epoch": 0.07, "grad_norm": 0.5709997415542603, "learning_rate": 0.000599229164154019, "loss": 2.1593, "step": 2058 }, { "epoch": 0.07, "grad_norm": 0.5809268355369568, "learning_rate": 0.000599228415181766, "loss": 2.2139, "step": 2059 }, { "epoch": 0.07, "grad_norm": 0.6287059783935547, "learning_rate": 0.0005992276658462936, "loss": 2.1012, "step": 2060 }, { "epoch": 0.07, "grad_norm": 0.5620371103286743, "learning_rate": 0.0005992269161476031, "loss": 2.1262, "step": 2061 }, { "epoch": 0.07, "grad_norm": 0.5808651447296143, "learning_rate": 0.0005992261660856951, "loss": 2.2163, "step": 2062 }, { "epoch": 0.07, "grad_norm": 0.5714796185493469, "learning_rate": 0.0005992254156605705, "loss": 2.1538, "step": 2063 }, { "epoch": 0.07, "grad_norm": 0.5778899192810059, "learning_rate": 0.0005992246648722306, "loss": 2.1281, "step": 2064 }, { "epoch": 0.07, "grad_norm": 0.5801846385002136, "learning_rate": 0.0005992239137206759, "loss": 2.0914, "step": 2065 }, { "epoch": 0.07, "grad_norm": 0.5845829248428345, "learning_rate": 0.0005992231622059075, "loss": 2.22, "step": 2066 }, { "epoch": 0.07, "grad_norm": 0.541977047920227, "learning_rate": 0.0005992224103279262, "loss": 2.1125, "step": 2067 }, { "epoch": 0.07, "grad_norm": 0.5724338889122009, "learning_rate": 0.0005992216580867332, "loss": 2.1415, "step": 2068 }, { "epoch": 0.07, "grad_norm": 0.5836582779884338, "learning_rate": 0.0005992209054823289, "loss": 2.1803, "step": 2069 }, { "epoch": 0.07, "grad_norm": 0.553810179233551, "learning_rate": 0.0005992201525147146, "loss": 2.1315, "step": 2070 }, { "epoch": 0.07, "grad_norm": 0.571550726890564, "learning_rate": 0.0005992193991838912, "loss": 2.103, "step": 2071 }, { "epoch": 0.07, "grad_norm": 0.5658287405967712, "learning_rate": 0.0005992186454898596, "loss": 2.1885, "step": 2072 }, { "epoch": 0.07, "grad_norm": 0.6098199486732483, "learning_rate": 0.0005992178914326205, "loss": 2.1663, "step": 2073 }, { "epoch": 0.07, "grad_norm": 0.5768270492553711, "learning_rate": 0.000599217137012175, "loss": 2.0914, "step": 2074 }, { "epoch": 0.07, "grad_norm": 0.5861147046089172, "learning_rate": 0.0005992163822285241, "loss": 2.178, "step": 2075 }, { "epoch": 0.07, "grad_norm": 0.5787099003791809, "learning_rate": 0.0005992156270816684, "loss": 2.1535, "step": 2076 }, { "epoch": 0.07, "grad_norm": 0.584491491317749, "learning_rate": 0.0005992148715716092, "loss": 2.1588, "step": 2077 }, { "epoch": 0.07, "grad_norm": 0.6006485819816589, "learning_rate": 0.0005992141156983471, "loss": 2.1287, "step": 2078 }, { "epoch": 0.07, "grad_norm": 0.5722618103027344, "learning_rate": 0.0005992133594618831, "loss": 2.1777, "step": 2079 }, { "epoch": 0.07, "grad_norm": 0.5673444867134094, "learning_rate": 0.0005992126028622184, "loss": 2.0523, "step": 2080 }, { "epoch": 0.07, "grad_norm": 0.58589106798172, "learning_rate": 0.0005992118458993535, "loss": 2.1904, "step": 2081 }, { "epoch": 0.07, "grad_norm": 0.5841467380523682, "learning_rate": 0.0005992110885732896, "loss": 2.1434, "step": 2082 }, { "epoch": 0.07, "grad_norm": 0.5619368553161621, "learning_rate": 0.0005992103308840275, "loss": 2.0669, "step": 2083 }, { "epoch": 0.07, "grad_norm": 0.5660922527313232, "learning_rate": 0.0005992095728315682, "loss": 2.1299, "step": 2084 }, { "epoch": 0.07, "grad_norm": 0.5639536380767822, "learning_rate": 0.0005992088144159124, "loss": 2.1106, "step": 2085 }, { "epoch": 0.07, "grad_norm": 0.5670126676559448, "learning_rate": 0.0005992080556370613, "loss": 2.1893, "step": 2086 }, { "epoch": 0.07, "grad_norm": 0.5674829483032227, "learning_rate": 0.0005992072964950156, "loss": 2.0707, "step": 2087 }, { "epoch": 0.07, "grad_norm": 0.5824692249298096, "learning_rate": 0.0005992065369897766, "loss": 2.1014, "step": 2088 }, { "epoch": 0.07, "grad_norm": 0.5644308924674988, "learning_rate": 0.0005992057771213447, "loss": 2.0408, "step": 2089 }, { "epoch": 0.07, "grad_norm": 0.5644833445549011, "learning_rate": 0.0005992050168897212, "loss": 2.1309, "step": 2090 }, { "epoch": 0.07, "grad_norm": 0.5753765106201172, "learning_rate": 0.0005992042562949068, "loss": 2.0959, "step": 2091 }, { "epoch": 0.07, "grad_norm": 0.5756356120109558, "learning_rate": 0.0005992034953369025, "loss": 2.1417, "step": 2092 }, { "epoch": 0.07, "grad_norm": 0.5628425478935242, "learning_rate": 0.0005992027340157093, "loss": 2.0908, "step": 2093 }, { "epoch": 0.07, "grad_norm": 0.5662164688110352, "learning_rate": 0.0005992019723313281, "loss": 2.0417, "step": 2094 }, { "epoch": 0.07, "grad_norm": 0.5835848450660706, "learning_rate": 0.0005992012102837597, "loss": 2.0397, "step": 2095 }, { "epoch": 0.07, "grad_norm": 0.5581758618354797, "learning_rate": 0.0005992004478730052, "loss": 2.0924, "step": 2096 }, { "epoch": 0.07, "grad_norm": 0.5839980840682983, "learning_rate": 0.0005991996850990654, "loss": 2.0906, "step": 2097 }, { "epoch": 0.07, "grad_norm": 0.577671229839325, "learning_rate": 0.0005991989219619414, "loss": 2.1537, "step": 2098 }, { "epoch": 0.07, "grad_norm": 0.6036311984062195, "learning_rate": 0.0005991981584616338, "loss": 2.1627, "step": 2099 }, { "epoch": 0.07, "grad_norm": 0.5703316926956177, "learning_rate": 0.0005991973945981438, "loss": 2.0553, "step": 2100 }, { "epoch": 0.07, "grad_norm": 0.5630404353141785, "learning_rate": 0.0005991966303714722, "loss": 2.1745, "step": 2101 }, { "epoch": 0.07, "grad_norm": 0.5956981778144836, "learning_rate": 0.0005991958657816201, "loss": 2.1394, "step": 2102 }, { "epoch": 0.07, "grad_norm": 0.5716618895530701, "learning_rate": 0.0005991951008285882, "loss": 2.1633, "step": 2103 }, { "epoch": 0.07, "grad_norm": 0.5616774559020996, "learning_rate": 0.0005991943355123776, "loss": 2.1169, "step": 2104 }, { "epoch": 0.07, "grad_norm": 0.6098520755767822, "learning_rate": 0.0005991935698329891, "loss": 2.1796, "step": 2105 }, { "epoch": 0.07, "grad_norm": 0.5833411812782288, "learning_rate": 0.0005991928037904237, "loss": 2.1496, "step": 2106 }, { "epoch": 0.07, "grad_norm": 0.5681368708610535, "learning_rate": 0.0005991920373846823, "loss": 2.1431, "step": 2107 }, { "epoch": 0.07, "grad_norm": 0.6096708178520203, "learning_rate": 0.0005991912706157659, "loss": 2.126, "step": 2108 }, { "epoch": 0.07, "grad_norm": 0.5664354562759399, "learning_rate": 0.0005991905034836753, "loss": 2.1039, "step": 2109 }, { "epoch": 0.07, "grad_norm": 0.5754225850105286, "learning_rate": 0.0005991897359884116, "loss": 2.0871, "step": 2110 }, { "epoch": 0.07, "grad_norm": 0.6033293604850769, "learning_rate": 0.0005991889681299756, "loss": 2.0419, "step": 2111 }, { "epoch": 0.07, "grad_norm": 0.6227774024009705, "learning_rate": 0.0005991881999083682, "loss": 2.0696, "step": 2112 }, { "epoch": 0.07, "grad_norm": 0.5644597411155701, "learning_rate": 0.0005991874313235906, "loss": 2.1005, "step": 2113 }, { "epoch": 0.07, "grad_norm": 0.5430459976196289, "learning_rate": 0.0005991866623756435, "loss": 2.1603, "step": 2114 }, { "epoch": 0.07, "grad_norm": 0.5649860501289368, "learning_rate": 0.0005991858930645277, "loss": 2.1385, "step": 2115 }, { "epoch": 0.07, "grad_norm": 0.560711681842804, "learning_rate": 0.0005991851233902445, "loss": 2.1017, "step": 2116 }, { "epoch": 0.07, "grad_norm": 0.5826808214187622, "learning_rate": 0.0005991843533527945, "loss": 2.1328, "step": 2117 }, { "epoch": 0.07, "grad_norm": 0.5535783171653748, "learning_rate": 0.0005991835829521788, "loss": 2.1291, "step": 2118 }, { "epoch": 0.07, "grad_norm": 0.5616092681884766, "learning_rate": 0.0005991828121883984, "loss": 2.0579, "step": 2119 }, { "epoch": 0.07, "grad_norm": 0.587812602519989, "learning_rate": 0.0005991820410614541, "loss": 2.1575, "step": 2120 }, { "epoch": 0.07, "grad_norm": 0.5551025867462158, "learning_rate": 0.0005991812695713469, "loss": 2.1566, "step": 2121 }, { "epoch": 0.07, "grad_norm": 0.5805416703224182, "learning_rate": 0.0005991804977180777, "loss": 2.0721, "step": 2122 }, { "epoch": 0.07, "grad_norm": 0.5729260444641113, "learning_rate": 0.0005991797255016475, "loss": 2.0814, "step": 2123 }, { "epoch": 0.07, "grad_norm": 0.5607836842536926, "learning_rate": 0.0005991789529220571, "loss": 2.1457, "step": 2124 }, { "epoch": 0.07, "grad_norm": 0.5855390429496765, "learning_rate": 0.0005991781799793077, "loss": 2.1406, "step": 2125 }, { "epoch": 0.07, "grad_norm": 0.5758717060089111, "learning_rate": 0.0005991774066733999, "loss": 2.1245, "step": 2126 }, { "epoch": 0.07, "grad_norm": 0.5585066080093384, "learning_rate": 0.0005991766330043349, "loss": 2.0706, "step": 2127 }, { "epoch": 0.07, "grad_norm": 0.5678357481956482, "learning_rate": 0.0005991758589721136, "loss": 2.0961, "step": 2128 }, { "epoch": 0.07, "grad_norm": 0.5584987998008728, "learning_rate": 0.0005991750845767369, "loss": 2.1786, "step": 2129 }, { "epoch": 0.07, "grad_norm": 0.563204288482666, "learning_rate": 0.0005991743098182054, "loss": 2.152, "step": 2130 }, { "epoch": 0.07, "grad_norm": 0.5662002563476562, "learning_rate": 0.0005991735346965207, "loss": 2.1265, "step": 2131 }, { "epoch": 0.07, "grad_norm": 0.6022940278053284, "learning_rate": 0.0005991727592116834, "loss": 2.1219, "step": 2132 }, { "epoch": 0.07, "grad_norm": 0.5812125205993652, "learning_rate": 0.0005991719833636943, "loss": 2.1215, "step": 2133 }, { "epoch": 0.07, "grad_norm": 0.601813018321991, "learning_rate": 0.0005991712071525546, "loss": 2.1539, "step": 2134 }, { "epoch": 0.07, "grad_norm": 0.5748568177223206, "learning_rate": 0.0005991704305782651, "loss": 2.0899, "step": 2135 }, { "epoch": 0.07, "grad_norm": 0.6102063059806824, "learning_rate": 0.0005991696536408268, "loss": 2.1155, "step": 2136 }, { "epoch": 0.07, "grad_norm": 0.5844610333442688, "learning_rate": 0.0005991688763402406, "loss": 2.0284, "step": 2137 }, { "epoch": 0.07, "grad_norm": 0.5660625100135803, "learning_rate": 0.0005991680986765075, "loss": 2.0502, "step": 2138 }, { "epoch": 0.07, "grad_norm": 0.5784690380096436, "learning_rate": 0.0005991673206496283, "loss": 2.1259, "step": 2139 }, { "epoch": 0.07, "grad_norm": 0.6131044626235962, "learning_rate": 0.0005991665422596041, "loss": 2.0765, "step": 2140 }, { "epoch": 0.07, "grad_norm": 0.5367569327354431, "learning_rate": 0.0005991657635064358, "loss": 2.0594, "step": 2141 }, { "epoch": 0.07, "grad_norm": 0.5597251653671265, "learning_rate": 0.0005991649843901244, "loss": 2.1415, "step": 2142 }, { "epoch": 0.07, "grad_norm": 0.5785521268844604, "learning_rate": 0.0005991642049106708, "loss": 2.0687, "step": 2143 }, { "epoch": 0.07, "grad_norm": 0.5805356502532959, "learning_rate": 0.0005991634250680759, "loss": 2.1676, "step": 2144 }, { "epoch": 0.07, "grad_norm": 0.5848191380500793, "learning_rate": 0.0005991626448623407, "loss": 2.1773, "step": 2145 }, { "epoch": 0.07, "grad_norm": 0.5810191035270691, "learning_rate": 0.0005991618642934661, "loss": 2.086, "step": 2146 }, { "epoch": 0.07, "grad_norm": 0.5554222464561462, "learning_rate": 0.0005991610833614531, "loss": 2.0795, "step": 2147 }, { "epoch": 0.07, "grad_norm": 0.5758642554283142, "learning_rate": 0.0005991603020663026, "loss": 2.0798, "step": 2148 }, { "epoch": 0.07, "grad_norm": 0.5832762718200684, "learning_rate": 0.0005991595204080156, "loss": 2.1131, "step": 2149 }, { "epoch": 0.07, "grad_norm": 0.5764836668968201, "learning_rate": 0.000599158738386593, "loss": 2.1604, "step": 2150 }, { "epoch": 0.07, "grad_norm": 0.5571916103363037, "learning_rate": 0.0005991579560020357, "loss": 2.1456, "step": 2151 }, { "epoch": 0.07, "grad_norm": 0.5531148910522461, "learning_rate": 0.0005991571732543448, "loss": 2.1705, "step": 2152 }, { "epoch": 0.07, "grad_norm": 0.5582297444343567, "learning_rate": 0.0005991563901435212, "loss": 2.0503, "step": 2153 }, { "epoch": 0.07, "grad_norm": 0.5684230923652649, "learning_rate": 0.0005991556066695658, "loss": 2.1628, "step": 2154 }, { "epoch": 0.07, "grad_norm": 0.5566904544830322, "learning_rate": 0.0005991548228324795, "loss": 2.094, "step": 2155 }, { "epoch": 0.07, "grad_norm": 0.5669516921043396, "learning_rate": 0.0005991540386322634, "loss": 2.0434, "step": 2156 }, { "epoch": 0.07, "grad_norm": 0.5502631664276123, "learning_rate": 0.0005991532540689184, "loss": 2.0919, "step": 2157 }, { "epoch": 0.07, "grad_norm": 0.566527247428894, "learning_rate": 0.0005991524691424453, "loss": 2.0964, "step": 2158 }, { "epoch": 0.07, "grad_norm": 0.5851863026618958, "learning_rate": 0.0005991516838528453, "loss": 2.0939, "step": 2159 }, { "epoch": 0.07, "grad_norm": 0.5938918590545654, "learning_rate": 0.0005991508982001192, "loss": 2.1074, "step": 2160 }, { "epoch": 0.07, "grad_norm": 0.567259669303894, "learning_rate": 0.000599150112184268, "loss": 2.0739, "step": 2161 }, { "epoch": 0.07, "grad_norm": 0.5710794925689697, "learning_rate": 0.0005991493258052926, "loss": 2.0778, "step": 2162 }, { "epoch": 0.07, "grad_norm": 0.5592429041862488, "learning_rate": 0.000599148539063194, "loss": 2.0533, "step": 2163 }, { "epoch": 0.07, "grad_norm": 0.5553925633430481, "learning_rate": 0.0005991477519579732, "loss": 2.1475, "step": 2164 }, { "epoch": 0.07, "grad_norm": 0.6053746342658997, "learning_rate": 0.000599146964489631, "loss": 2.1934, "step": 2165 }, { "epoch": 0.07, "grad_norm": 0.5575861930847168, "learning_rate": 0.0005991461766581686, "loss": 2.1312, "step": 2166 }, { "epoch": 0.07, "grad_norm": 0.5641876459121704, "learning_rate": 0.0005991453884635867, "loss": 2.0244, "step": 2167 }, { "epoch": 0.07, "grad_norm": 0.5666680335998535, "learning_rate": 0.0005991445999058865, "loss": 2.1966, "step": 2168 }, { "epoch": 0.07, "grad_norm": 0.5957633852958679, "learning_rate": 0.0005991438109850687, "loss": 2.2296, "step": 2169 }, { "epoch": 0.07, "grad_norm": 0.5579305291175842, "learning_rate": 0.0005991430217011346, "loss": 2.0786, "step": 2170 }, { "epoch": 0.07, "grad_norm": 0.5572961568832397, "learning_rate": 0.0005991422320540847, "loss": 2.1024, "step": 2171 }, { "epoch": 0.07, "grad_norm": 0.5657761693000793, "learning_rate": 0.0005991414420439203, "loss": 2.1743, "step": 2172 }, { "epoch": 0.07, "grad_norm": 0.5607922077178955, "learning_rate": 0.0005991406516706423, "loss": 2.1489, "step": 2173 }, { "epoch": 0.07, "grad_norm": 0.5638071298599243, "learning_rate": 0.0005991398609342516, "loss": 2.1842, "step": 2174 }, { "epoch": 0.07, "grad_norm": 0.5816912055015564, "learning_rate": 0.0005991390698347493, "loss": 2.1485, "step": 2175 }, { "epoch": 0.07, "grad_norm": 0.5519600510597229, "learning_rate": 0.000599138278372136, "loss": 2.0954, "step": 2176 }, { "epoch": 0.07, "grad_norm": 0.5569462776184082, "learning_rate": 0.0005991374865464131, "loss": 2.1494, "step": 2177 }, { "epoch": 0.07, "grad_norm": 0.5886358618736267, "learning_rate": 0.0005991366943575812, "loss": 2.167, "step": 2178 }, { "epoch": 0.07, "grad_norm": 0.5628255605697632, "learning_rate": 0.0005991359018056417, "loss": 2.0128, "step": 2179 }, { "epoch": 0.07, "grad_norm": 0.5497908592224121, "learning_rate": 0.000599135108890595, "loss": 2.0917, "step": 2180 }, { "epoch": 0.07, "grad_norm": 0.5541443824768066, "learning_rate": 0.0005991343156124424, "loss": 2.0599, "step": 2181 }, { "epoch": 0.07, "grad_norm": 0.6043437719345093, "learning_rate": 0.000599133521971185, "loss": 2.0822, "step": 2182 }, { "epoch": 0.07, "grad_norm": 0.5786370038986206, "learning_rate": 0.0005991327279668235, "loss": 2.0648, "step": 2183 }, { "epoch": 0.07, "grad_norm": 0.5746277570724487, "learning_rate": 0.0005991319335993589, "loss": 2.1197, "step": 2184 }, { "epoch": 0.07, "grad_norm": 0.5720046758651733, "learning_rate": 0.0005991311388687922, "loss": 2.0809, "step": 2185 }, { "epoch": 0.07, "grad_norm": 0.5762616991996765, "learning_rate": 0.0005991303437751244, "loss": 2.1333, "step": 2186 }, { "epoch": 0.07, "grad_norm": 0.5426979064941406, "learning_rate": 0.0005991295483183564, "loss": 2.0594, "step": 2187 }, { "epoch": 0.07, "grad_norm": 0.602645993232727, "learning_rate": 0.0005991287524984893, "loss": 2.1762, "step": 2188 }, { "epoch": 0.07, "grad_norm": 0.6184311509132385, "learning_rate": 0.0005991279563155239, "loss": 2.1604, "step": 2189 }, { "epoch": 0.07, "grad_norm": 0.6359555125236511, "learning_rate": 0.0005991271597694612, "loss": 2.1806, "step": 2190 }, { "epoch": 0.07, "grad_norm": 0.6080181002616882, "learning_rate": 0.0005991263628603023, "loss": 2.0753, "step": 2191 }, { "epoch": 0.07, "grad_norm": 0.5729928016662598, "learning_rate": 0.0005991255655880479, "loss": 2.1666, "step": 2192 }, { "epoch": 0.07, "grad_norm": 0.6163721084594727, "learning_rate": 0.0005991247679526994, "loss": 2.1061, "step": 2193 }, { "epoch": 0.07, "grad_norm": 0.5850648880004883, "learning_rate": 0.0005991239699542573, "loss": 2.0823, "step": 2194 }, { "epoch": 0.07, "grad_norm": 0.5967068076133728, "learning_rate": 0.0005991231715927228, "loss": 2.0968, "step": 2195 }, { "epoch": 0.07, "grad_norm": 0.6245183944702148, "learning_rate": 0.0005991223728680969, "loss": 2.0796, "step": 2196 }, { "epoch": 0.07, "grad_norm": 0.5911595821380615, "learning_rate": 0.0005991215737803805, "loss": 2.1053, "step": 2197 }, { "epoch": 0.07, "grad_norm": 0.5872969031333923, "learning_rate": 0.0005991207743295746, "loss": 2.1597, "step": 2198 }, { "epoch": 0.07, "grad_norm": 0.58231520652771, "learning_rate": 0.00059911997451568, "loss": 2.1283, "step": 2199 }, { "epoch": 0.07, "grad_norm": 0.6042187809944153, "learning_rate": 0.000599119174338698, "loss": 2.021, "step": 2200 }, { "epoch": 0.07, "grad_norm": 0.5885147452354431, "learning_rate": 0.0005991183737986295, "loss": 2.1543, "step": 2201 }, { "epoch": 0.07, "grad_norm": 0.5743886232376099, "learning_rate": 0.0005991175728954751, "loss": 2.1309, "step": 2202 }, { "epoch": 0.07, "grad_norm": 0.5915449261665344, "learning_rate": 0.0005991167716292361, "loss": 2.139, "step": 2203 }, { "epoch": 0.07, "grad_norm": 0.5865210890769958, "learning_rate": 0.0005991159699999135, "loss": 2.151, "step": 2204 }, { "epoch": 0.07, "grad_norm": 0.5747060775756836, "learning_rate": 0.0005991151680075082, "loss": 2.0339, "step": 2205 }, { "epoch": 0.07, "grad_norm": 0.5564755797386169, "learning_rate": 0.000599114365652021, "loss": 2.0944, "step": 2206 }, { "epoch": 0.07, "grad_norm": 0.5452213287353516, "learning_rate": 0.0005991135629334532, "loss": 2.1294, "step": 2207 }, { "epoch": 0.07, "grad_norm": 0.5666419863700867, "learning_rate": 0.0005991127598518056, "loss": 2.0441, "step": 2208 }, { "epoch": 0.07, "grad_norm": 0.5777426362037659, "learning_rate": 0.0005991119564070791, "loss": 2.059, "step": 2209 }, { "epoch": 0.07, "grad_norm": 0.5744949579238892, "learning_rate": 0.0005991111525992748, "loss": 2.1419, "step": 2210 }, { "epoch": 0.07, "grad_norm": 0.5766282677650452, "learning_rate": 0.0005991103484283936, "loss": 2.1548, "step": 2211 }, { "epoch": 0.07, "grad_norm": 0.5744626522064209, "learning_rate": 0.0005991095438944366, "loss": 2.169, "step": 2212 }, { "epoch": 0.07, "grad_norm": 0.5435792207717896, "learning_rate": 0.0005991087389974045, "loss": 2.0735, "step": 2213 }, { "epoch": 0.07, "grad_norm": 0.5724369883537292, "learning_rate": 0.0005991079337372986, "loss": 2.0105, "step": 2214 }, { "epoch": 0.07, "grad_norm": 0.5714365839958191, "learning_rate": 0.0005991071281141197, "loss": 2.0386, "step": 2215 }, { "epoch": 0.07, "grad_norm": 0.5627841949462891, "learning_rate": 0.0005991063221278688, "loss": 2.0625, "step": 2216 }, { "epoch": 0.07, "grad_norm": 0.5931980609893799, "learning_rate": 0.000599105515778547, "loss": 2.1942, "step": 2217 }, { "epoch": 0.07, "grad_norm": 0.5616112351417542, "learning_rate": 0.0005991047090661551, "loss": 2.1837, "step": 2218 }, { "epoch": 0.07, "grad_norm": 0.5757700800895691, "learning_rate": 0.0005991039019906942, "loss": 2.0793, "step": 2219 }, { "epoch": 0.07, "grad_norm": 0.5709431171417236, "learning_rate": 0.0005991030945521651, "loss": 2.1469, "step": 2220 }, { "epoch": 0.07, "grad_norm": 0.5723375678062439, "learning_rate": 0.000599102286750569, "loss": 2.1757, "step": 2221 }, { "epoch": 0.07, "grad_norm": 0.565578043460846, "learning_rate": 0.0005991014785859068, "loss": 1.9801, "step": 2222 }, { "epoch": 0.07, "grad_norm": 0.5974023342132568, "learning_rate": 0.0005991006700581795, "loss": 2.1328, "step": 2223 }, { "epoch": 0.07, "grad_norm": 0.5738764405250549, "learning_rate": 0.000599099861167388, "loss": 2.1136, "step": 2224 }, { "epoch": 0.07, "grad_norm": 0.5966119170188904, "learning_rate": 0.0005990990519135333, "loss": 2.1897, "step": 2225 }, { "epoch": 0.07, "grad_norm": 0.5500781536102295, "learning_rate": 0.0005990982422966166, "loss": 2.1785, "step": 2226 }, { "epoch": 0.07, "grad_norm": 0.595025360584259, "learning_rate": 0.0005990974323166386, "loss": 2.2132, "step": 2227 }, { "epoch": 0.07, "grad_norm": 0.576136589050293, "learning_rate": 0.0005990966219736004, "loss": 2.1096, "step": 2228 }, { "epoch": 0.07, "grad_norm": 0.5905352830886841, "learning_rate": 0.000599095811267503, "loss": 2.1258, "step": 2229 }, { "epoch": 0.07, "grad_norm": 0.5612692832946777, "learning_rate": 0.0005990950001983472, "loss": 2.1346, "step": 2230 }, { "epoch": 0.07, "grad_norm": 0.6032291054725647, "learning_rate": 0.0005990941887661342, "loss": 2.0172, "step": 2231 }, { "epoch": 0.07, "grad_norm": 0.5612120032310486, "learning_rate": 0.0005990933769708651, "loss": 2.0904, "step": 2232 }, { "epoch": 0.07, "grad_norm": 0.5766960382461548, "learning_rate": 0.0005990925648125406, "loss": 2.0635, "step": 2233 }, { "epoch": 0.07, "grad_norm": 0.5741564035415649, "learning_rate": 0.0005990917522911617, "loss": 2.103, "step": 2234 }, { "epoch": 0.07, "grad_norm": 0.5778632760047913, "learning_rate": 0.0005990909394067296, "loss": 2.1205, "step": 2235 }, { "epoch": 0.07, "grad_norm": 0.5837991833686829, "learning_rate": 0.0005990901261592451, "loss": 2.0968, "step": 2236 }, { "epoch": 0.07, "grad_norm": 0.5843037962913513, "learning_rate": 0.0005990893125487093, "loss": 2.177, "step": 2237 }, { "epoch": 0.07, "grad_norm": 0.5627540946006775, "learning_rate": 0.0005990884985751231, "loss": 2.1071, "step": 2238 }, { "epoch": 0.07, "grad_norm": 0.5725405812263489, "learning_rate": 0.0005990876842384876, "loss": 2.083, "step": 2239 }, { "epoch": 0.07, "grad_norm": 0.5557199716567993, "learning_rate": 0.0005990868695388037, "loss": 2.0724, "step": 2240 }, { "epoch": 0.07, "grad_norm": 0.5729764699935913, "learning_rate": 0.0005990860544760724, "loss": 2.0806, "step": 2241 }, { "epoch": 0.07, "grad_norm": 0.5603678822517395, "learning_rate": 0.0005990852390502946, "loss": 2.1091, "step": 2242 }, { "epoch": 0.07, "grad_norm": 0.5415151119232178, "learning_rate": 0.0005990844232614715, "loss": 2.1173, "step": 2243 }, { "epoch": 0.07, "grad_norm": 0.5737321972846985, "learning_rate": 0.000599083607109604, "loss": 2.1516, "step": 2244 }, { "epoch": 0.07, "grad_norm": 0.586911141872406, "learning_rate": 0.0005990827905946929, "loss": 2.0939, "step": 2245 }, { "epoch": 0.07, "grad_norm": 0.5614670515060425, "learning_rate": 0.0005990819737167395, "loss": 2.0435, "step": 2246 }, { "epoch": 0.07, "grad_norm": 0.5620256066322327, "learning_rate": 0.0005990811564757447, "loss": 2.1033, "step": 2247 }, { "epoch": 0.07, "grad_norm": 0.5903826355934143, "learning_rate": 0.0005990803388717093, "loss": 2.1687, "step": 2248 }, { "epoch": 0.07, "grad_norm": 0.5686960816383362, "learning_rate": 0.0005990795209046345, "loss": 2.0957, "step": 2249 }, { "epoch": 0.07, "grad_norm": 0.5748509168624878, "learning_rate": 0.0005990787025745213, "loss": 2.1342, "step": 2250 }, { "epoch": 0.07, "grad_norm": 0.5532273650169373, "learning_rate": 0.0005990778838813705, "loss": 2.0365, "step": 2251 }, { "epoch": 0.07, "grad_norm": 0.5396071672439575, "learning_rate": 0.0005990770648251834, "loss": 2.1082, "step": 2252 }, { "epoch": 0.07, "grad_norm": 0.5737676620483398, "learning_rate": 0.0005990762454059607, "loss": 2.0813, "step": 2253 }, { "epoch": 0.07, "grad_norm": 0.5803379416465759, "learning_rate": 0.0005990754256237034, "loss": 2.104, "step": 2254 }, { "epoch": 0.08, "grad_norm": 0.5385444164276123, "learning_rate": 0.0005990746054784127, "loss": 2.1048, "step": 2255 }, { "epoch": 0.08, "grad_norm": 0.5500285625457764, "learning_rate": 0.0005990737849700895, "loss": 2.0306, "step": 2256 }, { "epoch": 0.08, "grad_norm": 0.5707957148551941, "learning_rate": 0.0005990729640987347, "loss": 2.0793, "step": 2257 }, { "epoch": 0.08, "grad_norm": 0.542357861995697, "learning_rate": 0.0005990721428643496, "loss": 2.0569, "step": 2258 }, { "epoch": 0.08, "grad_norm": 0.5562579035758972, "learning_rate": 0.0005990713212669348, "loss": 2.0502, "step": 2259 }, { "epoch": 0.08, "grad_norm": 0.5667147636413574, "learning_rate": 0.0005990704993064915, "loss": 2.0408, "step": 2260 }, { "epoch": 0.08, "grad_norm": 0.5821934938430786, "learning_rate": 0.0005990696769830208, "loss": 2.1589, "step": 2261 }, { "epoch": 0.08, "grad_norm": 0.5753955841064453, "learning_rate": 0.0005990688542965235, "loss": 2.1625, "step": 2262 }, { "epoch": 0.08, "grad_norm": 0.5690050721168518, "learning_rate": 0.0005990680312470007, "loss": 2.1163, "step": 2263 }, { "epoch": 0.08, "grad_norm": 0.5420405864715576, "learning_rate": 0.0005990672078344533, "loss": 2.1026, "step": 2264 }, { "epoch": 0.08, "grad_norm": 0.5428292751312256, "learning_rate": 0.0005990663840588823, "loss": 2.0044, "step": 2265 }, { "epoch": 0.08, "grad_norm": 0.5547707676887512, "learning_rate": 0.000599065559920289, "loss": 2.1179, "step": 2266 }, { "epoch": 0.08, "grad_norm": 0.5655649304389954, "learning_rate": 0.000599064735418674, "loss": 2.0111, "step": 2267 }, { "epoch": 0.08, "grad_norm": 0.562065064907074, "learning_rate": 0.0005990639105540387, "loss": 2.0714, "step": 2268 }, { "epoch": 0.08, "grad_norm": 0.5612577795982361, "learning_rate": 0.0005990630853263836, "loss": 2.0586, "step": 2269 }, { "epoch": 0.08, "grad_norm": 0.5556376576423645, "learning_rate": 0.0005990622597357102, "loss": 2.0944, "step": 2270 }, { "epoch": 0.08, "grad_norm": 0.5321540236473083, "learning_rate": 0.0005990614337820191, "loss": 2.0869, "step": 2271 }, { "epoch": 0.08, "grad_norm": 0.5490270256996155, "learning_rate": 0.0005990606074653116, "loss": 2.1377, "step": 2272 }, { "epoch": 0.08, "grad_norm": 0.5593518018722534, "learning_rate": 0.0005990597807855885, "loss": 2.1099, "step": 2273 }, { "epoch": 0.08, "grad_norm": 0.5703228116035461, "learning_rate": 0.000599058953742851, "loss": 2.0991, "step": 2274 }, { "epoch": 0.08, "grad_norm": 0.5579848289489746, "learning_rate": 0.0005990581263370999, "loss": 2.0468, "step": 2275 }, { "epoch": 0.08, "grad_norm": 0.5613971948623657, "learning_rate": 0.0005990572985683363, "loss": 2.0428, "step": 2276 }, { "epoch": 0.08, "grad_norm": 0.5850604772567749, "learning_rate": 0.0005990564704365613, "loss": 2.1244, "step": 2277 }, { "epoch": 0.08, "grad_norm": 0.5562517642974854, "learning_rate": 0.0005990556419417758, "loss": 2.0699, "step": 2278 }, { "epoch": 0.08, "grad_norm": 0.5828765034675598, "learning_rate": 0.0005990548130839808, "loss": 2.1354, "step": 2279 }, { "epoch": 0.08, "grad_norm": 0.5586283206939697, "learning_rate": 0.0005990539838631772, "loss": 2.1112, "step": 2280 }, { "epoch": 0.08, "grad_norm": 0.5628592371940613, "learning_rate": 0.0005990531542793662, "loss": 2.046, "step": 2281 }, { "epoch": 0.08, "grad_norm": 0.5566573739051819, "learning_rate": 0.0005990523243325489, "loss": 2.0719, "step": 2282 }, { "epoch": 0.08, "grad_norm": 0.5524289011955261, "learning_rate": 0.0005990514940227259, "loss": 2.1099, "step": 2283 }, { "epoch": 0.08, "grad_norm": 0.5624551773071289, "learning_rate": 0.0005990506633498985, "loss": 2.1735, "step": 2284 }, { "epoch": 0.08, "grad_norm": 0.5565332174301147, "learning_rate": 0.0005990498323140678, "loss": 2.0719, "step": 2285 }, { "epoch": 0.08, "grad_norm": 0.5505738258361816, "learning_rate": 0.0005990490009152345, "loss": 2.0643, "step": 2286 }, { "epoch": 0.08, "grad_norm": 0.588706374168396, "learning_rate": 0.0005990481691533998, "loss": 2.1616, "step": 2287 }, { "epoch": 0.08, "grad_norm": 0.5978451371192932, "learning_rate": 0.0005990473370285648, "loss": 2.1085, "step": 2288 }, { "epoch": 0.08, "grad_norm": 0.5507946610450745, "learning_rate": 0.0005990465045407303, "loss": 2.0495, "step": 2289 }, { "epoch": 0.08, "grad_norm": 0.5662010312080383, "learning_rate": 0.0005990456716898974, "loss": 2.1401, "step": 2290 }, { "epoch": 0.08, "grad_norm": 0.5781177878379822, "learning_rate": 0.0005990448384760672, "loss": 2.1347, "step": 2291 }, { "epoch": 0.08, "grad_norm": 0.5988246202468872, "learning_rate": 0.0005990440048992406, "loss": 2.1356, "step": 2292 }, { "epoch": 0.08, "grad_norm": 0.5863742828369141, "learning_rate": 0.0005990431709594186, "loss": 2.1553, "step": 2293 }, { "epoch": 0.08, "grad_norm": 0.5864470601081848, "learning_rate": 0.0005990423366566022, "loss": 2.1336, "step": 2294 }, { "epoch": 0.08, "grad_norm": 0.5891101360321045, "learning_rate": 0.0005990415019907925, "loss": 2.1416, "step": 2295 }, { "epoch": 0.08, "grad_norm": 0.5558746457099915, "learning_rate": 0.0005990406669619905, "loss": 2.0444, "step": 2296 }, { "epoch": 0.08, "grad_norm": 0.5866143107414246, "learning_rate": 0.0005990398315701972, "loss": 2.1475, "step": 2297 }, { "epoch": 0.08, "grad_norm": 0.5596711039543152, "learning_rate": 0.0005990389958154136, "loss": 2.1167, "step": 2298 }, { "epoch": 0.08, "grad_norm": 0.5744723081588745, "learning_rate": 0.0005990381596976407, "loss": 2.1692, "step": 2299 }, { "epoch": 0.08, "grad_norm": 0.5598595142364502, "learning_rate": 0.0005990373232168797, "loss": 2.144, "step": 2300 }, { "epoch": 0.08, "grad_norm": 0.5692418813705444, "learning_rate": 0.0005990364863731313, "loss": 2.0318, "step": 2301 }, { "epoch": 0.08, "grad_norm": 0.5713515877723694, "learning_rate": 0.0005990356491663967, "loss": 2.1277, "step": 2302 }, { "epoch": 0.08, "grad_norm": 0.5682121515274048, "learning_rate": 0.0005990348115966768, "loss": 2.0956, "step": 2303 }, { "epoch": 0.08, "grad_norm": 0.5648388266563416, "learning_rate": 0.0005990339736639727, "loss": 2.0593, "step": 2304 }, { "epoch": 0.08, "grad_norm": 0.5412865281105042, "learning_rate": 0.0005990331353682855, "loss": 1.9895, "step": 2305 }, { "epoch": 0.08, "grad_norm": 0.5748060941696167, "learning_rate": 0.0005990322967096161, "loss": 2.1088, "step": 2306 }, { "epoch": 0.08, "grad_norm": 0.5661616325378418, "learning_rate": 0.0005990314576879656, "loss": 2.1547, "step": 2307 }, { "epoch": 0.08, "grad_norm": 0.5724782943725586, "learning_rate": 0.0005990306183033349, "loss": 2.14, "step": 2308 }, { "epoch": 0.08, "grad_norm": 0.5714339017868042, "learning_rate": 0.0005990297785557252, "loss": 2.0723, "step": 2309 }, { "epoch": 0.08, "grad_norm": 0.5895097851753235, "learning_rate": 0.0005990289384451373, "loss": 2.125, "step": 2310 }, { "epoch": 0.08, "grad_norm": 0.5472422242164612, "learning_rate": 0.0005990280979715723, "loss": 2.078, "step": 2311 }, { "epoch": 0.08, "grad_norm": 0.5941548943519592, "learning_rate": 0.0005990272571350314, "loss": 2.2004, "step": 2312 }, { "epoch": 0.08, "grad_norm": 0.5453989505767822, "learning_rate": 0.0005990264159355153, "loss": 2.1882, "step": 2313 }, { "epoch": 0.08, "grad_norm": 0.5886030793190002, "learning_rate": 0.0005990255743730253, "loss": 2.1788, "step": 2314 }, { "epoch": 0.08, "grad_norm": 0.5562568306922913, "learning_rate": 0.0005990247324475623, "loss": 2.1206, "step": 2315 }, { "epoch": 0.08, "grad_norm": 0.5607234835624695, "learning_rate": 0.0005990238901591273, "loss": 2.1411, "step": 2316 }, { "epoch": 0.08, "grad_norm": 0.5676803588867188, "learning_rate": 0.0005990230475077213, "loss": 2.1113, "step": 2317 }, { "epoch": 0.08, "grad_norm": 0.5720269083976746, "learning_rate": 0.0005990222044933455, "loss": 2.1203, "step": 2318 }, { "epoch": 0.08, "grad_norm": 0.5697157382965088, "learning_rate": 0.0005990213611160007, "loss": 2.0594, "step": 2319 }, { "epoch": 0.08, "grad_norm": 0.5442907214164734, "learning_rate": 0.0005990205173756881, "loss": 2.0666, "step": 2320 }, { "epoch": 0.08, "grad_norm": 0.5569877028465271, "learning_rate": 0.0005990196732724086, "loss": 2.0747, "step": 2321 }, { "epoch": 0.08, "grad_norm": 0.5649319887161255, "learning_rate": 0.0005990188288061633, "loss": 2.111, "step": 2322 }, { "epoch": 0.08, "grad_norm": 0.5821344256401062, "learning_rate": 0.0005990179839769533, "loss": 2.1219, "step": 2323 }, { "epoch": 0.08, "grad_norm": 0.5539898872375488, "learning_rate": 0.0005990171387847794, "loss": 2.1111, "step": 2324 }, { "epoch": 0.08, "grad_norm": 0.549140214920044, "learning_rate": 0.0005990162932296428, "loss": 2.0783, "step": 2325 }, { "epoch": 0.08, "grad_norm": 0.5604833960533142, "learning_rate": 0.0005990154473115445, "loss": 2.0525, "step": 2326 }, { "epoch": 0.08, "grad_norm": 0.6054310202598572, "learning_rate": 0.0005990146010304854, "loss": 2.1426, "step": 2327 }, { "epoch": 0.08, "grad_norm": 0.547592043876648, "learning_rate": 0.0005990137543864667, "loss": 2.0371, "step": 2328 }, { "epoch": 0.08, "grad_norm": 0.5737352967262268, "learning_rate": 0.0005990129073794894, "loss": 2.016, "step": 2329 }, { "epoch": 0.08, "grad_norm": 0.6076140999794006, "learning_rate": 0.0005990120600095544, "loss": 2.0488, "step": 2330 }, { "epoch": 0.08, "grad_norm": 0.5719677209854126, "learning_rate": 0.0005990112122766629, "loss": 2.1754, "step": 2331 }, { "epoch": 0.08, "grad_norm": 0.57179856300354, "learning_rate": 0.0005990103641808158, "loss": 2.0254, "step": 2332 }, { "epoch": 0.08, "grad_norm": 0.5731461048126221, "learning_rate": 0.0005990095157220142, "loss": 2.0751, "step": 2333 }, { "epoch": 0.08, "grad_norm": 0.5863997340202332, "learning_rate": 0.0005990086669002591, "loss": 2.1401, "step": 2334 }, { "epoch": 0.08, "grad_norm": 0.5788459777832031, "learning_rate": 0.0005990078177155516, "loss": 2.0799, "step": 2335 }, { "epoch": 0.08, "grad_norm": 0.592957079410553, "learning_rate": 0.0005990069681678924, "loss": 2.0465, "step": 2336 }, { "epoch": 0.08, "grad_norm": 0.6211668848991394, "learning_rate": 0.0005990061182572831, "loss": 2.0188, "step": 2337 }, { "epoch": 0.08, "grad_norm": 0.5778286457061768, "learning_rate": 0.0005990052679837243, "loss": 2.0854, "step": 2338 }, { "epoch": 0.08, "grad_norm": 0.5901688933372498, "learning_rate": 0.0005990044173472171, "loss": 2.0538, "step": 2339 }, { "epoch": 0.08, "grad_norm": 0.5791522264480591, "learning_rate": 0.0005990035663477627, "loss": 2.0985, "step": 2340 }, { "epoch": 0.08, "grad_norm": 0.577155590057373, "learning_rate": 0.0005990027149853621, "loss": 2.0604, "step": 2341 }, { "epoch": 0.08, "grad_norm": 0.5934808254241943, "learning_rate": 0.000599001863260016, "loss": 2.0661, "step": 2342 }, { "epoch": 0.08, "grad_norm": 0.588715672492981, "learning_rate": 0.0005990010111717259, "loss": 2.1034, "step": 2343 }, { "epoch": 0.08, "grad_norm": 0.5898228883743286, "learning_rate": 0.0005990001587204927, "loss": 2.1374, "step": 2344 }, { "epoch": 0.08, "grad_norm": 0.561187207698822, "learning_rate": 0.0005989993059063171, "loss": 2.0911, "step": 2345 }, { "epoch": 0.08, "grad_norm": 0.5817907452583313, "learning_rate": 0.0005989984527292006, "loss": 2.0629, "step": 2346 }, { "epoch": 0.08, "grad_norm": 0.5847209095954895, "learning_rate": 0.0005989975991891439, "loss": 2.153, "step": 2347 }, { "epoch": 0.08, "grad_norm": 0.5655422210693359, "learning_rate": 0.0005989967452861483, "loss": 2.0709, "step": 2348 }, { "epoch": 0.08, "grad_norm": 0.5727314352989197, "learning_rate": 0.0005989958910202147, "loss": 2.0345, "step": 2349 }, { "epoch": 0.08, "grad_norm": 0.5678262710571289, "learning_rate": 0.0005989950363913441, "loss": 2.144, "step": 2350 }, { "epoch": 0.08, "grad_norm": 0.5777194499969482, "learning_rate": 0.0005989941813995376, "loss": 2.077, "step": 2351 }, { "epoch": 0.08, "grad_norm": 0.5677919387817383, "learning_rate": 0.0005989933260447961, "loss": 2.0976, "step": 2352 }, { "epoch": 0.08, "grad_norm": 0.5493708252906799, "learning_rate": 0.0005989924703271209, "loss": 2.0604, "step": 2353 }, { "epoch": 0.08, "grad_norm": 0.619003176689148, "learning_rate": 0.0005989916142465128, "loss": 2.1356, "step": 2354 }, { "epoch": 0.08, "grad_norm": 0.6167966723442078, "learning_rate": 0.000598990757802973, "loss": 2.0832, "step": 2355 }, { "epoch": 0.08, "grad_norm": 0.5474147200584412, "learning_rate": 0.0005989899009965024, "loss": 2.0272, "step": 2356 }, { "epoch": 0.08, "grad_norm": 0.6164084076881409, "learning_rate": 0.0005989890438271022, "loss": 2.1221, "step": 2357 }, { "epoch": 0.08, "grad_norm": 0.5524939894676208, "learning_rate": 0.0005989881862947733, "loss": 2.0403, "step": 2358 }, { "epoch": 0.08, "grad_norm": 0.5634545087814331, "learning_rate": 0.0005989873283995169, "loss": 2.1112, "step": 2359 }, { "epoch": 0.08, "grad_norm": 0.5581121444702148, "learning_rate": 0.0005989864701413339, "loss": 2.1329, "step": 2360 }, { "epoch": 0.08, "grad_norm": 0.6015229821205139, "learning_rate": 0.0005989856115202252, "loss": 2.1334, "step": 2361 }, { "epoch": 0.08, "grad_norm": 0.561216413974762, "learning_rate": 0.0005989847525361922, "loss": 2.0004, "step": 2362 }, { "epoch": 0.08, "grad_norm": 0.5717803835868835, "learning_rate": 0.0005989838931892358, "loss": 2.0462, "step": 2363 }, { "epoch": 0.08, "grad_norm": 0.5758969187736511, "learning_rate": 0.0005989830334793569, "loss": 2.0905, "step": 2364 }, { "epoch": 0.08, "grad_norm": 0.5634236335754395, "learning_rate": 0.0005989821734065567, "loss": 2.1076, "step": 2365 }, { "epoch": 0.08, "grad_norm": 0.5610275268554688, "learning_rate": 0.0005989813129708362, "loss": 2.1757, "step": 2366 }, { "epoch": 0.08, "grad_norm": 0.551568329334259, "learning_rate": 0.0005989804521721963, "loss": 2.0165, "step": 2367 }, { "epoch": 0.08, "grad_norm": 0.5644444227218628, "learning_rate": 0.0005989795910106384, "loss": 2.0023, "step": 2368 }, { "epoch": 0.08, "grad_norm": 0.5699844360351562, "learning_rate": 0.0005989787294861631, "loss": 2.139, "step": 2369 }, { "epoch": 0.08, "grad_norm": 0.5729612112045288, "learning_rate": 0.0005989778675987718, "loss": 2.1136, "step": 2370 }, { "epoch": 0.08, "grad_norm": 0.554155170917511, "learning_rate": 0.0005989770053484655, "loss": 2.12, "step": 2371 }, { "epoch": 0.08, "grad_norm": 0.5717793107032776, "learning_rate": 0.000598976142735245, "loss": 2.0873, "step": 2372 }, { "epoch": 0.08, "grad_norm": 0.5706942677497864, "learning_rate": 0.0005989752797591117, "loss": 2.094, "step": 2373 }, { "epoch": 0.08, "grad_norm": 0.5436940789222717, "learning_rate": 0.0005989744164200663, "loss": 2.1107, "step": 2374 }, { "epoch": 0.08, "grad_norm": 0.5956447124481201, "learning_rate": 0.0005989735527181101, "loss": 2.0335, "step": 2375 }, { "epoch": 0.08, "grad_norm": 0.5603022575378418, "learning_rate": 0.000598972688653244, "loss": 2.0976, "step": 2376 }, { "epoch": 0.08, "grad_norm": 0.5805538892745972, "learning_rate": 0.0005989718242254691, "loss": 2.0775, "step": 2377 }, { "epoch": 0.08, "grad_norm": 0.5742956399917603, "learning_rate": 0.0005989709594347865, "loss": 2.135, "step": 2378 }, { "epoch": 0.08, "grad_norm": 0.5493746399879456, "learning_rate": 0.0005989700942811972, "loss": 2.0773, "step": 2379 }, { "epoch": 0.08, "grad_norm": 0.5541057586669922, "learning_rate": 0.0005989692287647023, "loss": 2.0368, "step": 2380 }, { "epoch": 0.08, "grad_norm": 0.5972830057144165, "learning_rate": 0.0005989683628853027, "loss": 2.1418, "step": 2381 }, { "epoch": 0.08, "grad_norm": 0.5774447321891785, "learning_rate": 0.0005989674966429997, "loss": 2.1329, "step": 2382 }, { "epoch": 0.08, "grad_norm": 0.5443942546844482, "learning_rate": 0.0005989666300377942, "loss": 2.0824, "step": 2383 }, { "epoch": 0.08, "grad_norm": 0.5966492295265198, "learning_rate": 0.0005989657630696871, "loss": 2.1813, "step": 2384 }, { "epoch": 0.08, "grad_norm": 0.548755943775177, "learning_rate": 0.0005989648957386797, "loss": 2.0751, "step": 2385 }, { "epoch": 0.08, "grad_norm": 0.5617469549179077, "learning_rate": 0.000598964028044773, "loss": 2.1748, "step": 2386 }, { "epoch": 0.08, "grad_norm": 0.5617350935935974, "learning_rate": 0.0005989631599879681, "loss": 2.1151, "step": 2387 }, { "epoch": 0.08, "grad_norm": 0.556617259979248, "learning_rate": 0.0005989622915682657, "loss": 2.0652, "step": 2388 }, { "epoch": 0.08, "grad_norm": 0.546068012714386, "learning_rate": 0.0005989614227856673, "loss": 2.0973, "step": 2389 }, { "epoch": 0.08, "grad_norm": 0.5795837044715881, "learning_rate": 0.0005989605536401739, "loss": 2.0571, "step": 2390 }, { "epoch": 0.08, "grad_norm": 0.55222088098526, "learning_rate": 0.0005989596841317863, "loss": 2.1285, "step": 2391 }, { "epoch": 0.08, "grad_norm": 0.5613052845001221, "learning_rate": 0.0005989588142605057, "loss": 2.0649, "step": 2392 }, { "epoch": 0.08, "grad_norm": 0.5466474294662476, "learning_rate": 0.0005989579440263331, "loss": 2.0889, "step": 2393 }, { "epoch": 0.08, "grad_norm": 0.5716058015823364, "learning_rate": 0.0005989570734292697, "loss": 2.0517, "step": 2394 }, { "epoch": 0.08, "grad_norm": 0.5553569197654724, "learning_rate": 0.0005989562024693164, "loss": 2.0938, "step": 2395 }, { "epoch": 0.08, "grad_norm": 0.5784845352172852, "learning_rate": 0.0005989553311464744, "loss": 2.1203, "step": 2396 }, { "epoch": 0.08, "grad_norm": 0.586431622505188, "learning_rate": 0.0005989544594607447, "loss": 2.0669, "step": 2397 }, { "epoch": 0.08, "grad_norm": 0.566224217414856, "learning_rate": 0.0005989535874121281, "loss": 2.0818, "step": 2398 }, { "epoch": 0.08, "grad_norm": 0.5464868545532227, "learning_rate": 0.0005989527150006262, "loss": 2.0448, "step": 2399 }, { "epoch": 0.08, "grad_norm": 0.5701156258583069, "learning_rate": 0.0005989518422262395, "loss": 2.1113, "step": 2400 }, { "epoch": 0.08, "grad_norm": 0.591147243976593, "learning_rate": 0.0005989509690889694, "loss": 2.0981, "step": 2401 }, { "epoch": 0.08, "grad_norm": 0.5450275540351868, "learning_rate": 0.000598950095588817, "loss": 2.0688, "step": 2402 }, { "epoch": 0.08, "grad_norm": 0.608165442943573, "learning_rate": 0.000598949221725783, "loss": 2.0599, "step": 2403 }, { "epoch": 0.08, "grad_norm": 0.6016728281974792, "learning_rate": 0.0005989483474998689, "loss": 2.0771, "step": 2404 }, { "epoch": 0.08, "grad_norm": 0.5666861534118652, "learning_rate": 0.0005989474729110755, "loss": 2.1517, "step": 2405 }, { "epoch": 0.08, "grad_norm": 0.6155630946159363, "learning_rate": 0.0005989465979594038, "loss": 2.0858, "step": 2406 }, { "epoch": 0.08, "grad_norm": 0.5577906966209412, "learning_rate": 0.0005989457226448551, "loss": 2.0664, "step": 2407 }, { "epoch": 0.08, "grad_norm": 0.5737019777297974, "learning_rate": 0.0005989448469674303, "loss": 2.0716, "step": 2408 }, { "epoch": 0.08, "grad_norm": 0.5627502799034119, "learning_rate": 0.0005989439709271305, "loss": 2.1061, "step": 2409 }, { "epoch": 0.08, "grad_norm": 0.5416324734687805, "learning_rate": 0.0005989430945239568, "loss": 2.0463, "step": 2410 }, { "epoch": 0.08, "grad_norm": 0.5928105711936951, "learning_rate": 0.0005989422177579102, "loss": 2.1103, "step": 2411 }, { "epoch": 0.08, "grad_norm": 0.5633277893066406, "learning_rate": 0.0005989413406289918, "loss": 2.0838, "step": 2412 }, { "epoch": 0.08, "grad_norm": 0.5582817792892456, "learning_rate": 0.0005989404631372027, "loss": 2.173, "step": 2413 }, { "epoch": 0.08, "grad_norm": 0.5551627278327942, "learning_rate": 0.0005989395852825438, "loss": 2.0822, "step": 2414 }, { "epoch": 0.08, "grad_norm": 0.5719274878501892, "learning_rate": 0.0005989387070650165, "loss": 2.1633, "step": 2415 }, { "epoch": 0.08, "grad_norm": 0.5765621662139893, "learning_rate": 0.0005989378284846215, "loss": 2.2114, "step": 2416 }, { "epoch": 0.08, "grad_norm": 0.5319924354553223, "learning_rate": 0.0005989369495413599, "loss": 2.0854, "step": 2417 }, { "epoch": 0.08, "grad_norm": 0.5463805794715881, "learning_rate": 0.0005989360702352331, "loss": 2.0557, "step": 2418 }, { "epoch": 0.08, "grad_norm": 0.5783299803733826, "learning_rate": 0.000598935190566242, "loss": 2.1939, "step": 2419 }, { "epoch": 0.08, "grad_norm": 0.5527122020721436, "learning_rate": 0.0005989343105343875, "loss": 2.0696, "step": 2420 }, { "epoch": 0.08, "grad_norm": 0.5729843974113464, "learning_rate": 0.0005989334301396708, "loss": 2.0681, "step": 2421 }, { "epoch": 0.08, "grad_norm": 0.5466183423995972, "learning_rate": 0.000598932549382093, "loss": 2.0601, "step": 2422 }, { "epoch": 0.08, "grad_norm": 0.5635239481925964, "learning_rate": 0.0005989316682616551, "loss": 2.0648, "step": 2423 }, { "epoch": 0.08, "grad_norm": 0.5686458945274353, "learning_rate": 0.0005989307867783582, "loss": 2.1035, "step": 2424 }, { "epoch": 0.08, "grad_norm": 0.5818499326705933, "learning_rate": 0.0005989299049322034, "loss": 2.1202, "step": 2425 }, { "epoch": 0.08, "grad_norm": 0.5885784029960632, "learning_rate": 0.0005989290227231917, "loss": 2.1461, "step": 2426 }, { "epoch": 0.08, "grad_norm": 0.6433579325675964, "learning_rate": 0.0005989281401513242, "loss": 2.0919, "step": 2427 }, { "epoch": 0.08, "grad_norm": 0.5427348613739014, "learning_rate": 0.000598927257216602, "loss": 2.0819, "step": 2428 }, { "epoch": 0.08, "grad_norm": 0.5482949018478394, "learning_rate": 0.0005989263739190262, "loss": 2.0624, "step": 2429 }, { "epoch": 0.08, "grad_norm": 0.617440938949585, "learning_rate": 0.0005989254902585979, "loss": 2.0455, "step": 2430 }, { "epoch": 0.08, "grad_norm": 0.5719792246818542, "learning_rate": 0.0005989246062353179, "loss": 2.0688, "step": 2431 }, { "epoch": 0.08, "grad_norm": 0.5640871524810791, "learning_rate": 0.0005989237218491875, "loss": 2.0465, "step": 2432 }, { "epoch": 0.08, "grad_norm": 0.5926010608673096, "learning_rate": 0.0005989228371002079, "loss": 2.0974, "step": 2433 }, { "epoch": 0.08, "grad_norm": 0.5595712661743164, "learning_rate": 0.0005989219519883799, "loss": 2.0829, "step": 2434 }, { "epoch": 0.08, "grad_norm": 0.5875090956687927, "learning_rate": 0.0005989210665137048, "loss": 2.0957, "step": 2435 }, { "epoch": 0.08, "grad_norm": 0.5850825905799866, "learning_rate": 0.0005989201806761834, "loss": 2.0879, "step": 2436 }, { "epoch": 0.08, "grad_norm": 0.5532703995704651, "learning_rate": 0.0005989192944758172, "loss": 2.1256, "step": 2437 }, { "epoch": 0.08, "grad_norm": 0.5530449151992798, "learning_rate": 0.0005989184079126067, "loss": 2.1316, "step": 2438 }, { "epoch": 0.08, "grad_norm": 0.5617064833641052, "learning_rate": 0.0005989175209865535, "loss": 2.0468, "step": 2439 }, { "epoch": 0.08, "grad_norm": 0.5609372854232788, "learning_rate": 0.0005989166336976585, "loss": 2.1083, "step": 2440 }, { "epoch": 0.08, "grad_norm": 0.5818621516227722, "learning_rate": 0.0005989157460459226, "loss": 2.0948, "step": 2441 }, { "epoch": 0.08, "grad_norm": 0.5300642848014832, "learning_rate": 0.0005989148580313471, "loss": 2.0324, "step": 2442 }, { "epoch": 0.08, "grad_norm": 0.5612297654151917, "learning_rate": 0.0005989139696539331, "loss": 2.1683, "step": 2443 }, { "epoch": 0.08, "grad_norm": 0.5626572370529175, "learning_rate": 0.0005989130809136814, "loss": 2.0778, "step": 2444 }, { "epoch": 0.08, "grad_norm": 0.5779831409454346, "learning_rate": 0.0005989121918105934, "loss": 2.1106, "step": 2445 }, { "epoch": 0.08, "grad_norm": 0.5446734428405762, "learning_rate": 0.00059891130234467, "loss": 2.073, "step": 2446 }, { "epoch": 0.08, "grad_norm": 0.5586188435554504, "learning_rate": 0.0005989104125159124, "loss": 2.1231, "step": 2447 }, { "epoch": 0.08, "grad_norm": 0.5986829996109009, "learning_rate": 0.0005989095223243215, "loss": 2.0825, "step": 2448 }, { "epoch": 0.08, "grad_norm": 0.5307755470275879, "learning_rate": 0.0005989086317698984, "loss": 2.0364, "step": 2449 }, { "epoch": 0.08, "grad_norm": 0.5478610992431641, "learning_rate": 0.0005989077408526444, "loss": 2.1069, "step": 2450 }, { "epoch": 0.08, "grad_norm": 0.5752032995223999, "learning_rate": 0.0005989068495725604, "loss": 2.1275, "step": 2451 }, { "epoch": 0.08, "grad_norm": 0.5355430841445923, "learning_rate": 0.0005989059579296476, "loss": 2.1322, "step": 2452 }, { "epoch": 0.08, "grad_norm": 0.5717677474021912, "learning_rate": 0.000598905065923907, "loss": 2.1112, "step": 2453 }, { "epoch": 0.08, "grad_norm": 0.5639476180076599, "learning_rate": 0.0005989041735553396, "loss": 2.1458, "step": 2454 }, { "epoch": 0.08, "grad_norm": 0.5470194220542908, "learning_rate": 0.0005989032808239466, "loss": 2.0787, "step": 2455 }, { "epoch": 0.08, "grad_norm": 0.5311917662620544, "learning_rate": 0.0005989023877297291, "loss": 2.1403, "step": 2456 }, { "epoch": 0.08, "grad_norm": 0.5456987619400024, "learning_rate": 0.0005989014942726881, "loss": 2.1014, "step": 2457 }, { "epoch": 0.08, "grad_norm": 0.5639501810073853, "learning_rate": 0.0005989006004528247, "loss": 2.1152, "step": 2458 }, { "epoch": 0.08, "grad_norm": 0.5383491516113281, "learning_rate": 0.0005988997062701401, "loss": 2.0689, "step": 2459 }, { "epoch": 0.08, "grad_norm": 0.5344719886779785, "learning_rate": 0.0005988988117246353, "loss": 2.1213, "step": 2460 }, { "epoch": 0.08, "grad_norm": 0.551491379737854, "learning_rate": 0.0005988979168163113, "loss": 2.1032, "step": 2461 }, { "epoch": 0.08, "grad_norm": 0.5765730142593384, "learning_rate": 0.0005988970215451693, "loss": 2.0579, "step": 2462 }, { "epoch": 0.08, "grad_norm": 0.5531744956970215, "learning_rate": 0.0005988961259112104, "loss": 2.0754, "step": 2463 }, { "epoch": 0.08, "grad_norm": 0.5563039779663086, "learning_rate": 0.0005988952299144355, "loss": 2.1363, "step": 2464 }, { "epoch": 0.08, "grad_norm": 0.579155683517456, "learning_rate": 0.0005988943335548461, "loss": 2.0843, "step": 2465 }, { "epoch": 0.08, "grad_norm": 0.5564908981323242, "learning_rate": 0.0005988934368324428, "loss": 2.0675, "step": 2466 }, { "epoch": 0.08, "grad_norm": 0.5804965496063232, "learning_rate": 0.0005988925397472269, "loss": 1.9915, "step": 2467 }, { "epoch": 0.08, "grad_norm": 0.5581797957420349, "learning_rate": 0.0005988916422991996, "loss": 2.0004, "step": 2468 }, { "epoch": 0.08, "grad_norm": 0.6145014762878418, "learning_rate": 0.0005988907444883618, "loss": 2.0798, "step": 2469 }, { "epoch": 0.08, "grad_norm": 0.5783571004867554, "learning_rate": 0.0005988898463147147, "loss": 2.0998, "step": 2470 }, { "epoch": 0.08, "grad_norm": 0.5455669164657593, "learning_rate": 0.0005988889477782594, "loss": 2.0247, "step": 2471 }, { "epoch": 0.08, "grad_norm": 0.5397204756736755, "learning_rate": 0.000598888048878997, "loss": 2.0891, "step": 2472 }, { "epoch": 0.08, "grad_norm": 0.5703787803649902, "learning_rate": 0.0005988871496169284, "loss": 2.0842, "step": 2473 }, { "epoch": 0.08, "grad_norm": 0.578685998916626, "learning_rate": 0.000598886249992055, "loss": 2.1019, "step": 2474 }, { "epoch": 0.08, "grad_norm": 0.5391881465911865, "learning_rate": 0.0005988853500043778, "loss": 2.0596, "step": 2475 }, { "epoch": 0.08, "grad_norm": 0.5459556579589844, "learning_rate": 0.0005988844496538975, "loss": 2.1194, "step": 2476 }, { "epoch": 0.08, "grad_norm": 0.5589038729667664, "learning_rate": 0.0005988835489406158, "loss": 2.0956, "step": 2477 }, { "epoch": 0.08, "grad_norm": 0.5645471215248108, "learning_rate": 0.0005988826478645334, "loss": 2.1365, "step": 2478 }, { "epoch": 0.08, "grad_norm": 0.5674598813056946, "learning_rate": 0.0005988817464256515, "loss": 2.1425, "step": 2479 }, { "epoch": 0.08, "grad_norm": 0.5343666672706604, "learning_rate": 0.0005988808446239712, "loss": 2.0997, "step": 2480 }, { "epoch": 0.08, "grad_norm": 0.5521514415740967, "learning_rate": 0.0005988799424594936, "loss": 2.1448, "step": 2481 }, { "epoch": 0.08, "grad_norm": 0.5549847483634949, "learning_rate": 0.0005988790399322198, "loss": 2.0618, "step": 2482 }, { "epoch": 0.08, "grad_norm": 0.5579347610473633, "learning_rate": 0.0005988781370421509, "loss": 2.0834, "step": 2483 }, { "epoch": 0.08, "grad_norm": 0.5592746734619141, "learning_rate": 0.000598877233789288, "loss": 2.0962, "step": 2484 }, { "epoch": 0.08, "grad_norm": 0.5734043717384338, "learning_rate": 0.0005988763301736321, "loss": 2.0952, "step": 2485 }, { "epoch": 0.08, "grad_norm": 0.5540972948074341, "learning_rate": 0.0005988754261951844, "loss": 2.0063, "step": 2486 }, { "epoch": 0.08, "grad_norm": 0.5506333112716675, "learning_rate": 0.0005988745218539459, "loss": 1.9783, "step": 2487 }, { "epoch": 0.08, "grad_norm": 0.5633381605148315, "learning_rate": 0.000598873617149918, "loss": 2.0786, "step": 2488 }, { "epoch": 0.08, "grad_norm": 0.5699738264083862, "learning_rate": 0.0005988727120831013, "loss": 2.1146, "step": 2489 }, { "epoch": 0.08, "grad_norm": 0.5583811402320862, "learning_rate": 0.0005988718066534973, "loss": 2.1039, "step": 2490 }, { "epoch": 0.08, "grad_norm": 0.6260716319084167, "learning_rate": 0.0005988709008611069, "loss": 2.0437, "step": 2491 }, { "epoch": 0.08, "grad_norm": 0.5583456754684448, "learning_rate": 0.0005988699947059314, "loss": 2.0942, "step": 2492 }, { "epoch": 0.08, "grad_norm": 0.5598670840263367, "learning_rate": 0.0005988690881879716, "loss": 2.121, "step": 2493 }, { "epoch": 0.08, "grad_norm": 0.5797551274299622, "learning_rate": 0.0005988681813072288, "loss": 2.0327, "step": 2494 }, { "epoch": 0.08, "grad_norm": 0.5580123066902161, "learning_rate": 0.0005988672740637041, "loss": 2.0861, "step": 2495 }, { "epoch": 0.08, "grad_norm": 0.6171901822090149, "learning_rate": 0.0005988663664573986, "loss": 2.1516, "step": 2496 }, { "epoch": 0.08, "grad_norm": 0.5787709951400757, "learning_rate": 0.0005988654584883133, "loss": 2.0336, "step": 2497 }, { "epoch": 0.08, "grad_norm": 0.5516482591629028, "learning_rate": 0.0005988645501564494, "loss": 2.0442, "step": 2498 }, { "epoch": 0.08, "grad_norm": 0.6187259554862976, "learning_rate": 0.000598863641461808, "loss": 2.155, "step": 2499 }, { "epoch": 0.08, "grad_norm": 0.5571560263633728, "learning_rate": 0.0005988627324043901, "loss": 2.1284, "step": 2500 }, { "epoch": 0.08, "grad_norm": 0.5621789693832397, "learning_rate": 0.0005988618229841969, "loss": 2.0359, "step": 2501 }, { "epoch": 0.08, "grad_norm": 0.5509947538375854, "learning_rate": 0.0005988609132012295, "loss": 2.1157, "step": 2502 }, { "epoch": 0.08, "grad_norm": 0.5887694954872131, "learning_rate": 0.000598860003055489, "loss": 2.0924, "step": 2503 }, { "epoch": 0.08, "grad_norm": 0.5486743450164795, "learning_rate": 0.0005988590925469765, "loss": 2.1407, "step": 2504 }, { "epoch": 0.08, "grad_norm": 0.5988523364067078, "learning_rate": 0.0005988581816756931, "loss": 2.1219, "step": 2505 }, { "epoch": 0.08, "grad_norm": 0.5410465598106384, "learning_rate": 0.0005988572704416398, "loss": 2.1054, "step": 2506 }, { "epoch": 0.08, "grad_norm": 0.5676745176315308, "learning_rate": 0.0005988563588448179, "loss": 2.0347, "step": 2507 }, { "epoch": 0.08, "grad_norm": 0.5826209783554077, "learning_rate": 0.0005988554468852284, "loss": 2.1295, "step": 2508 }, { "epoch": 0.08, "grad_norm": 0.5516262650489807, "learning_rate": 0.0005988545345628724, "loss": 2.0164, "step": 2509 }, { "epoch": 0.08, "grad_norm": 0.6021867394447327, "learning_rate": 0.000598853621877751, "loss": 2.1378, "step": 2510 }, { "epoch": 0.08, "grad_norm": 0.60091233253479, "learning_rate": 0.0005988527088298654, "loss": 2.1019, "step": 2511 }, { "epoch": 0.08, "grad_norm": 0.5347685217857361, "learning_rate": 0.0005988517954192167, "loss": 2.1328, "step": 2512 }, { "epoch": 0.08, "grad_norm": 0.5989383459091187, "learning_rate": 0.0005988508816458058, "loss": 2.0658, "step": 2513 }, { "epoch": 0.08, "grad_norm": 0.5527674555778503, "learning_rate": 0.0005988499675096342, "loss": 2.078, "step": 2514 }, { "epoch": 0.08, "grad_norm": 0.5823447704315186, "learning_rate": 0.0005988490530107025, "loss": 2.0795, "step": 2515 }, { "epoch": 0.08, "grad_norm": 0.5752201080322266, "learning_rate": 0.0005988481381490122, "loss": 2.0316, "step": 2516 }, { "epoch": 0.08, "grad_norm": 0.6185765266418457, "learning_rate": 0.0005988472229245643, "loss": 2.0375, "step": 2517 }, { "epoch": 0.08, "grad_norm": 0.5785713195800781, "learning_rate": 0.00059884630733736, "loss": 2.1572, "step": 2518 }, { "epoch": 0.08, "grad_norm": 0.5581179261207581, "learning_rate": 0.0005988453913874002, "loss": 2.0923, "step": 2519 }, { "epoch": 0.08, "grad_norm": 0.577511191368103, "learning_rate": 0.000598844475074686, "loss": 2.1748, "step": 2520 }, { "epoch": 0.08, "grad_norm": 0.6058073043823242, "learning_rate": 0.0005988435583992188, "loss": 2.0748, "step": 2521 }, { "epoch": 0.08, "grad_norm": 0.5590987205505371, "learning_rate": 0.0005988426413609996, "loss": 2.0581, "step": 2522 }, { "epoch": 0.08, "grad_norm": 0.6016743779182434, "learning_rate": 0.0005988417239600294, "loss": 2.0763, "step": 2523 }, { "epoch": 0.08, "grad_norm": 0.538809061050415, "learning_rate": 0.0005988408061963094, "loss": 2.0931, "step": 2524 }, { "epoch": 0.08, "grad_norm": 0.5444104075431824, "learning_rate": 0.0005988398880698406, "loss": 2.1647, "step": 2525 }, { "epoch": 0.08, "grad_norm": 0.5781583189964294, "learning_rate": 0.0005988389695806244, "loss": 2.0318, "step": 2526 }, { "epoch": 0.08, "grad_norm": 0.5930274724960327, "learning_rate": 0.0005988380507286616, "loss": 2.1151, "step": 2527 }, { "epoch": 0.08, "grad_norm": 0.543589174747467, "learning_rate": 0.0005988371315139534, "loss": 1.9853, "step": 2528 }, { "epoch": 0.08, "grad_norm": 0.5694436430931091, "learning_rate": 0.0005988362119365011, "loss": 2.0697, "step": 2529 }, { "epoch": 0.08, "grad_norm": 0.6460092067718506, "learning_rate": 0.0005988352919963055, "loss": 2.1531, "step": 2530 }, { "epoch": 0.08, "grad_norm": 0.5530660152435303, "learning_rate": 0.0005988343716933679, "loss": 2.0289, "step": 2531 }, { "epoch": 0.08, "grad_norm": 0.6107516884803772, "learning_rate": 0.0005988334510276895, "loss": 2.1242, "step": 2532 }, { "epoch": 0.08, "grad_norm": 0.5900111794471741, "learning_rate": 0.0005988325299992713, "loss": 2.0992, "step": 2533 }, { "epoch": 0.08, "grad_norm": 0.552880585193634, "learning_rate": 0.0005988316086081145, "loss": 2.1987, "step": 2534 }, { "epoch": 0.08, "grad_norm": 0.5804452896118164, "learning_rate": 0.00059883068685422, "loss": 2.1106, "step": 2535 }, { "epoch": 0.08, "grad_norm": 0.5409252047538757, "learning_rate": 0.0005988297647375892, "loss": 2.0336, "step": 2536 }, { "epoch": 0.08, "grad_norm": 0.5829539895057678, "learning_rate": 0.000598828842258223, "loss": 2.1039, "step": 2537 }, { "epoch": 0.08, "grad_norm": 0.600849986076355, "learning_rate": 0.0005988279194161227, "loss": 2.0565, "step": 2538 }, { "epoch": 0.08, "grad_norm": 0.5716837644577026, "learning_rate": 0.0005988269962112894, "loss": 2.1256, "step": 2539 }, { "epoch": 0.08, "grad_norm": 0.5600261688232422, "learning_rate": 0.0005988260726437239, "loss": 2.1508, "step": 2540 }, { "epoch": 0.08, "grad_norm": 0.5670986771583557, "learning_rate": 0.0005988251487134278, "loss": 2.1039, "step": 2541 }, { "epoch": 0.08, "grad_norm": 0.5362632870674133, "learning_rate": 0.0005988242244204019, "loss": 2.0462, "step": 2542 }, { "epoch": 0.08, "grad_norm": 0.5371444225311279, "learning_rate": 0.0005988232997646475, "loss": 2.0942, "step": 2543 }, { "epoch": 0.08, "grad_norm": 0.6005129814147949, "learning_rate": 0.0005988223747461656, "loss": 2.1044, "step": 2544 }, { "epoch": 0.08, "grad_norm": 0.5750254392623901, "learning_rate": 0.0005988214493649573, "loss": 2.0677, "step": 2545 }, { "epoch": 0.08, "grad_norm": 0.565127968788147, "learning_rate": 0.0005988205236210239, "loss": 2.0476, "step": 2546 }, { "epoch": 0.08, "grad_norm": 0.5464212894439697, "learning_rate": 0.0005988195975143664, "loss": 2.1282, "step": 2547 }, { "epoch": 0.08, "grad_norm": 0.5666311979293823, "learning_rate": 0.000598818671044986, "loss": 2.0638, "step": 2548 }, { "epoch": 0.08, "grad_norm": 0.5985363125801086, "learning_rate": 0.0005988177442128836, "loss": 2.107, "step": 2549 }, { "epoch": 0.08, "grad_norm": 0.5501735210418701, "learning_rate": 0.0005988168170180606, "loss": 2.1422, "step": 2550 }, { "epoch": 0.08, "grad_norm": 0.5619029402732849, "learning_rate": 0.000598815889460518, "loss": 2.0523, "step": 2551 }, { "epoch": 0.08, "grad_norm": 0.5584899187088013, "learning_rate": 0.0005988149615402567, "loss": 1.9584, "step": 2552 }, { "epoch": 0.08, "grad_norm": 0.572210967540741, "learning_rate": 0.0005988140332572785, "loss": 2.0376, "step": 2553 }, { "epoch": 0.08, "grad_norm": 0.585378885269165, "learning_rate": 0.0005988131046115837, "loss": 2.1545, "step": 2554 }, { "epoch": 0.09, "grad_norm": 0.568143904209137, "learning_rate": 0.000598812175603174, "loss": 2.0649, "step": 2555 }, { "epoch": 0.09, "grad_norm": 0.564621090888977, "learning_rate": 0.0005988112462320504, "loss": 2.024, "step": 2556 }, { "epoch": 0.09, "grad_norm": 0.5855188965797424, "learning_rate": 0.0005988103164982139, "loss": 2.078, "step": 2557 }, { "epoch": 0.09, "grad_norm": 0.5992969870567322, "learning_rate": 0.0005988093864016657, "loss": 2.1045, "step": 2558 }, { "epoch": 0.09, "grad_norm": 0.5817448496818542, "learning_rate": 0.0005988084559424069, "loss": 2.0899, "step": 2559 }, { "epoch": 0.09, "grad_norm": 0.5906208753585815, "learning_rate": 0.0005988075251204387, "loss": 2.0832, "step": 2560 }, { "epoch": 0.09, "grad_norm": 0.5799587965011597, "learning_rate": 0.0005988065939357622, "loss": 2.0689, "step": 2561 }, { "epoch": 0.09, "grad_norm": 0.5856010913848877, "learning_rate": 0.0005988056623883786, "loss": 2.0061, "step": 2562 }, { "epoch": 0.09, "grad_norm": 0.5380590558052063, "learning_rate": 0.000598804730478289, "loss": 2.1194, "step": 2563 }, { "epoch": 0.09, "grad_norm": 0.5601816773414612, "learning_rate": 0.0005988037982054942, "loss": 2.0806, "step": 2564 }, { "epoch": 0.09, "grad_norm": 0.5740159749984741, "learning_rate": 0.0005988028655699957, "loss": 2.1694, "step": 2565 }, { "epoch": 0.09, "grad_norm": 0.5605655908584595, "learning_rate": 0.0005988019325717947, "loss": 2.0994, "step": 2566 }, { "epoch": 0.09, "grad_norm": 0.5789016485214233, "learning_rate": 0.0005988009992108921, "loss": 2.0225, "step": 2567 }, { "epoch": 0.09, "grad_norm": 0.5522060394287109, "learning_rate": 0.0005988000654872891, "loss": 2.0627, "step": 2568 }, { "epoch": 0.09, "grad_norm": 0.5539917945861816, "learning_rate": 0.0005987991314009869, "loss": 2.0759, "step": 2569 }, { "epoch": 0.09, "grad_norm": 0.5440136194229126, "learning_rate": 0.0005987981969519865, "loss": 2.1782, "step": 2570 }, { "epoch": 0.09, "grad_norm": 0.5909836292266846, "learning_rate": 0.0005987972621402892, "loss": 2.1387, "step": 2571 }, { "epoch": 0.09, "grad_norm": 0.5781180262565613, "learning_rate": 0.000598796326965896, "loss": 2.1325, "step": 2572 }, { "epoch": 0.09, "grad_norm": 0.588778555393219, "learning_rate": 0.000598795391428808, "loss": 2.0835, "step": 2573 }, { "epoch": 0.09, "grad_norm": 0.5565129518508911, "learning_rate": 0.0005987944555290265, "loss": 2.1403, "step": 2574 }, { "epoch": 0.09, "grad_norm": 0.5549280047416687, "learning_rate": 0.0005987935192665526, "loss": 2.1091, "step": 2575 }, { "epoch": 0.09, "grad_norm": 0.5511156916618347, "learning_rate": 0.0005987925826413874, "loss": 2.1667, "step": 2576 }, { "epoch": 0.09, "grad_norm": 0.5407308340072632, "learning_rate": 0.000598791645653532, "loss": 1.9616, "step": 2577 }, { "epoch": 0.09, "grad_norm": 0.5687116384506226, "learning_rate": 0.0005987907083029875, "loss": 2.1182, "step": 2578 }, { "epoch": 0.09, "grad_norm": 0.5501894354820251, "learning_rate": 0.0005987897705897552, "loss": 2.0846, "step": 2579 }, { "epoch": 0.09, "grad_norm": 0.5360901355743408, "learning_rate": 0.0005987888325138361, "loss": 2.1022, "step": 2580 }, { "epoch": 0.09, "grad_norm": 0.5586152076721191, "learning_rate": 0.0005987878940752314, "loss": 2.1177, "step": 2581 }, { "epoch": 0.09, "grad_norm": 0.5814973711967468, "learning_rate": 0.0005987869552739422, "loss": 2.0865, "step": 2582 }, { "epoch": 0.09, "grad_norm": 0.5509131550788879, "learning_rate": 0.0005987860161099697, "loss": 2.116, "step": 2583 }, { "epoch": 0.09, "grad_norm": 0.5620102882385254, "learning_rate": 0.0005987850765833149, "loss": 2.0705, "step": 2584 }, { "epoch": 0.09, "grad_norm": 0.5726593732833862, "learning_rate": 0.0005987841366939792, "loss": 2.1436, "step": 2585 }, { "epoch": 0.09, "grad_norm": 0.5423988103866577, "learning_rate": 0.0005987831964419635, "loss": 2.1111, "step": 2586 }, { "epoch": 0.09, "grad_norm": 0.545548677444458, "learning_rate": 0.0005987822558272689, "loss": 2.1183, "step": 2587 }, { "epoch": 0.09, "grad_norm": 0.5546685457229614, "learning_rate": 0.0005987813148498969, "loss": 2.1893, "step": 2588 }, { "epoch": 0.09, "grad_norm": 0.5788893699645996, "learning_rate": 0.0005987803735098482, "loss": 2.0584, "step": 2589 }, { "epoch": 0.09, "grad_norm": 0.5610098838806152, "learning_rate": 0.0005987794318071243, "loss": 2.063, "step": 2590 }, { "epoch": 0.09, "grad_norm": 0.5715433359146118, "learning_rate": 0.000598778489741726, "loss": 2.0364, "step": 2591 }, { "epoch": 0.09, "grad_norm": 0.5493336915969849, "learning_rate": 0.0005987775473136549, "loss": 2.0882, "step": 2592 }, { "epoch": 0.09, "grad_norm": 0.57710862159729, "learning_rate": 0.0005987766045229116, "loss": 2.1077, "step": 2593 }, { "epoch": 0.09, "grad_norm": 0.5702633261680603, "learning_rate": 0.0005987756613694978, "loss": 2.0753, "step": 2594 }, { "epoch": 0.09, "grad_norm": 0.5802993178367615, "learning_rate": 0.0005987747178534142, "loss": 2.1723, "step": 2595 }, { "epoch": 0.09, "grad_norm": 0.6048011779785156, "learning_rate": 0.0005987737739746622, "loss": 2.2031, "step": 2596 }, { "epoch": 0.09, "grad_norm": 0.5710760951042175, "learning_rate": 0.0005987728297332429, "loss": 2.1692, "step": 2597 }, { "epoch": 0.09, "grad_norm": 0.5669680833816528, "learning_rate": 0.0005987718851291571, "loss": 2.0846, "step": 2598 }, { "epoch": 0.09, "grad_norm": 0.5571067333221436, "learning_rate": 0.0005987709401624066, "loss": 2.034, "step": 2599 }, { "epoch": 0.09, "grad_norm": 0.5810261368751526, "learning_rate": 0.0005987699948329921, "loss": 2.1054, "step": 2600 }, { "epoch": 0.09, "grad_norm": 0.5356662273406982, "learning_rate": 0.0005987690491409148, "loss": 2.0255, "step": 2601 }, { "epoch": 0.09, "grad_norm": 0.5988640785217285, "learning_rate": 0.0005987681030861758, "loss": 2.0366, "step": 2602 }, { "epoch": 0.09, "grad_norm": 0.5807273387908936, "learning_rate": 0.0005987671566687765, "loss": 2.1078, "step": 2603 }, { "epoch": 0.09, "grad_norm": 0.5492330193519592, "learning_rate": 0.0005987662098887179, "loss": 2.1047, "step": 2604 }, { "epoch": 0.09, "grad_norm": 0.5694719552993774, "learning_rate": 0.0005987652627460011, "loss": 2.0154, "step": 2605 }, { "epoch": 0.09, "grad_norm": 0.5696136355400085, "learning_rate": 0.0005987643152406273, "loss": 2.0093, "step": 2606 }, { "epoch": 0.09, "grad_norm": 0.5829319953918457, "learning_rate": 0.0005987633673725976, "loss": 1.9883, "step": 2607 }, { "epoch": 0.09, "grad_norm": 0.6874316930770874, "learning_rate": 0.0005987624191419133, "loss": 2.114, "step": 2608 }, { "epoch": 0.09, "grad_norm": 0.5582162141799927, "learning_rate": 0.0005987614705485753, "loss": 2.1093, "step": 2609 }, { "epoch": 0.09, "grad_norm": 0.5381011962890625, "learning_rate": 0.000598760521592585, "loss": 2.083, "step": 2610 }, { "epoch": 0.09, "grad_norm": 0.5510191917419434, "learning_rate": 0.0005987595722739433, "loss": 2.105, "step": 2611 }, { "epoch": 0.09, "grad_norm": 0.5735052227973938, "learning_rate": 0.0005987586225926517, "loss": 2.0369, "step": 2612 }, { "epoch": 0.09, "grad_norm": 0.5594403147697449, "learning_rate": 0.0005987576725487111, "loss": 2.0678, "step": 2613 }, { "epoch": 0.09, "grad_norm": 0.5428799986839294, "learning_rate": 0.0005987567221421227, "loss": 1.9899, "step": 2614 }, { "epoch": 0.09, "grad_norm": 0.5659263134002686, "learning_rate": 0.0005987557713728876, "loss": 2.0133, "step": 2615 }, { "epoch": 0.09, "grad_norm": 0.5752843618392944, "learning_rate": 0.000598754820241007, "loss": 2.0808, "step": 2616 }, { "epoch": 0.09, "grad_norm": 0.5549869537353516, "learning_rate": 0.0005987538687464821, "loss": 2.0698, "step": 2617 }, { "epoch": 0.09, "grad_norm": 0.6150811910629272, "learning_rate": 0.0005987529168893141, "loss": 2.0393, "step": 2618 }, { "epoch": 0.09, "grad_norm": 0.5798125863075256, "learning_rate": 0.000598751964669504, "loss": 2.0623, "step": 2619 }, { "epoch": 0.09, "grad_norm": 0.5731610655784607, "learning_rate": 0.0005987510120870529, "loss": 2.0268, "step": 2620 }, { "epoch": 0.09, "grad_norm": 0.622230589389801, "learning_rate": 0.0005987500591419624, "loss": 2.0277, "step": 2621 }, { "epoch": 0.09, "grad_norm": 0.5570639371871948, "learning_rate": 0.0005987491058342331, "loss": 2.0494, "step": 2622 }, { "epoch": 0.09, "grad_norm": 0.5857001543045044, "learning_rate": 0.0005987481521638665, "loss": 2.07, "step": 2623 }, { "epoch": 0.09, "grad_norm": 0.61093670129776, "learning_rate": 0.0005987471981308637, "loss": 2.0179, "step": 2624 }, { "epoch": 0.09, "grad_norm": 0.5472989678382874, "learning_rate": 0.0005987462437352257, "loss": 1.9965, "step": 2625 }, { "epoch": 0.09, "grad_norm": 0.5810383558273315, "learning_rate": 0.0005987452889769539, "loss": 2.2164, "step": 2626 }, { "epoch": 0.09, "grad_norm": 0.5817997455596924, "learning_rate": 0.0005987443338560493, "loss": 2.0992, "step": 2627 }, { "epoch": 0.09, "grad_norm": 0.5517278909683228, "learning_rate": 0.000598743378372513, "loss": 2.0869, "step": 2628 }, { "epoch": 0.09, "grad_norm": 0.5501973628997803, "learning_rate": 0.0005987424225263464, "loss": 2.09, "step": 2629 }, { "epoch": 0.09, "grad_norm": 0.5636745691299438, "learning_rate": 0.0005987414663175504, "loss": 2.0917, "step": 2630 }, { "epoch": 0.09, "grad_norm": 0.5594139099121094, "learning_rate": 0.0005987405097461263, "loss": 2.2359, "step": 2631 }, { "epoch": 0.09, "grad_norm": 0.5684758424758911, "learning_rate": 0.0005987395528120753, "loss": 2.0445, "step": 2632 }, { "epoch": 0.09, "grad_norm": 0.5566543936729431, "learning_rate": 0.0005987385955153984, "loss": 2.113, "step": 2633 }, { "epoch": 0.09, "grad_norm": 0.5675779581069946, "learning_rate": 0.000598737637856097, "loss": 2.0693, "step": 2634 }, { "epoch": 0.09, "grad_norm": 0.561450719833374, "learning_rate": 0.0005987366798341721, "loss": 2.0571, "step": 2635 }, { "epoch": 0.09, "grad_norm": 0.5432037711143494, "learning_rate": 0.0005987357214496247, "loss": 2.0487, "step": 2636 }, { "epoch": 0.09, "grad_norm": 0.6164186000823975, "learning_rate": 0.0005987347627024564, "loss": 2.0823, "step": 2637 }, { "epoch": 0.09, "grad_norm": 0.5303338766098022, "learning_rate": 0.000598733803592668, "loss": 2.099, "step": 2638 }, { "epoch": 0.09, "grad_norm": 0.5936246514320374, "learning_rate": 0.0005987328441202608, "loss": 2.0503, "step": 2639 }, { "epoch": 0.09, "grad_norm": 0.5367380380630493, "learning_rate": 0.0005987318842852359, "loss": 2.0901, "step": 2640 }, { "epoch": 0.09, "grad_norm": 0.563468337059021, "learning_rate": 0.0005987309240875946, "loss": 2.1662, "step": 2641 }, { "epoch": 0.09, "grad_norm": 0.5420204401016235, "learning_rate": 0.0005987299635273379, "loss": 2.0584, "step": 2642 }, { "epoch": 0.09, "grad_norm": 0.5748552680015564, "learning_rate": 0.0005987290026044671, "loss": 2.0788, "step": 2643 }, { "epoch": 0.09, "grad_norm": 0.5497375130653381, "learning_rate": 0.0005987280413189832, "loss": 2.1121, "step": 2644 }, { "epoch": 0.09, "grad_norm": 0.5775030255317688, "learning_rate": 0.0005987270796708875, "loss": 2.0454, "step": 2645 }, { "epoch": 0.09, "grad_norm": 0.5806235671043396, "learning_rate": 0.0005987261176601812, "loss": 2.1035, "step": 2646 }, { "epoch": 0.09, "grad_norm": 0.5313108563423157, "learning_rate": 0.0005987251552868653, "loss": 2.1142, "step": 2647 }, { "epoch": 0.09, "grad_norm": 0.5564276576042175, "learning_rate": 0.0005987241925509413, "loss": 2.0819, "step": 2648 }, { "epoch": 0.09, "grad_norm": 0.6128101348876953, "learning_rate": 0.00059872322945241, "loss": 2.0875, "step": 2649 }, { "epoch": 0.09, "grad_norm": 0.5454807281494141, "learning_rate": 0.0005987222659912726, "loss": 2.0334, "step": 2650 }, { "epoch": 0.09, "grad_norm": 0.5745828747749329, "learning_rate": 0.0005987213021675306, "loss": 2.167, "step": 2651 }, { "epoch": 0.09, "grad_norm": 0.5479193925857544, "learning_rate": 0.0005987203379811848, "loss": 2.0743, "step": 2652 }, { "epoch": 0.09, "grad_norm": 0.5582199692726135, "learning_rate": 0.0005987193734322366, "loss": 1.9895, "step": 2653 }, { "epoch": 0.09, "grad_norm": 0.5448271036148071, "learning_rate": 0.000598718408520687, "loss": 2.0192, "step": 2654 }, { "epoch": 0.09, "grad_norm": 0.573101282119751, "learning_rate": 0.0005987174432465374, "loss": 2.1067, "step": 2655 }, { "epoch": 0.09, "grad_norm": 0.548770546913147, "learning_rate": 0.0005987164776097887, "loss": 2.0722, "step": 2656 }, { "epoch": 0.09, "grad_norm": 0.5619913935661316, "learning_rate": 0.0005987155116104424, "loss": 2.0497, "step": 2657 }, { "epoch": 0.09, "grad_norm": 0.5348354578018188, "learning_rate": 0.0005987145452484993, "loss": 2.0792, "step": 2658 }, { "epoch": 0.09, "grad_norm": 0.5611351728439331, "learning_rate": 0.0005987135785239609, "loss": 2.1168, "step": 2659 }, { "epoch": 0.09, "grad_norm": 0.6390474438667297, "learning_rate": 0.0005987126114368281, "loss": 2.0578, "step": 2660 }, { "epoch": 0.09, "grad_norm": 0.5294204354286194, "learning_rate": 0.0005987116439871023, "loss": 2.0247, "step": 2661 }, { "epoch": 0.09, "grad_norm": 0.5460315346717834, "learning_rate": 0.0005987106761747845, "loss": 2.0308, "step": 2662 }, { "epoch": 0.09, "grad_norm": 0.5698131918907166, "learning_rate": 0.000598709707999876, "loss": 2.0721, "step": 2663 }, { "epoch": 0.09, "grad_norm": 0.5468193888664246, "learning_rate": 0.0005987087394623778, "loss": 2.0064, "step": 2664 }, { "epoch": 0.09, "grad_norm": 0.540272057056427, "learning_rate": 0.0005987077705622915, "loss": 2.0259, "step": 2665 }, { "epoch": 0.09, "grad_norm": 0.572232186794281, "learning_rate": 0.0005987068012996178, "loss": 2.1585, "step": 2666 }, { "epoch": 0.09, "grad_norm": 0.5639386773109436, "learning_rate": 0.000598705831674358, "loss": 2.0998, "step": 2667 }, { "epoch": 0.09, "grad_norm": 0.5561578869819641, "learning_rate": 0.0005987048616865133, "loss": 2.1938, "step": 2668 }, { "epoch": 0.09, "grad_norm": 0.5366713404655457, "learning_rate": 0.000598703891336085, "loss": 2.0859, "step": 2669 }, { "epoch": 0.09, "grad_norm": 0.5598679780960083, "learning_rate": 0.0005987029206230743, "loss": 2.1047, "step": 2670 }, { "epoch": 0.09, "grad_norm": 0.5846872329711914, "learning_rate": 0.0005987019495474821, "loss": 2.0255, "step": 2671 }, { "epoch": 0.09, "grad_norm": 0.5456761121749878, "learning_rate": 0.0005987009781093098, "loss": 1.9959, "step": 2672 }, { "epoch": 0.09, "grad_norm": 0.5502136945724487, "learning_rate": 0.0005987000063085585, "loss": 2.1147, "step": 2673 }, { "epoch": 0.09, "grad_norm": 0.5332981944084167, "learning_rate": 0.0005986990341452293, "loss": 2.0858, "step": 2674 }, { "epoch": 0.09, "grad_norm": 0.5723375678062439, "learning_rate": 0.0005986980616193237, "loss": 2.0609, "step": 2675 }, { "epoch": 0.09, "grad_norm": 0.5386002063751221, "learning_rate": 0.0005986970887308426, "loss": 2.0658, "step": 2676 }, { "epoch": 0.09, "grad_norm": 0.5593492984771729, "learning_rate": 0.0005986961154797872, "loss": 2.0571, "step": 2677 }, { "epoch": 0.09, "grad_norm": 0.5823412537574768, "learning_rate": 0.0005986951418661588, "loss": 2.1705, "step": 2678 }, { "epoch": 0.09, "grad_norm": 0.5350669026374817, "learning_rate": 0.0005986941678899584, "loss": 2.0966, "step": 2679 }, { "epoch": 0.09, "grad_norm": 0.5670679807662964, "learning_rate": 0.0005986931935511873, "loss": 2.1028, "step": 2680 }, { "epoch": 0.09, "grad_norm": 0.5498728156089783, "learning_rate": 0.0005986922188498467, "loss": 2.0682, "step": 2681 }, { "epoch": 0.09, "grad_norm": 0.5675755739212036, "learning_rate": 0.0005986912437859378, "loss": 2.1027, "step": 2682 }, { "epoch": 0.09, "grad_norm": 0.5213652849197388, "learning_rate": 0.0005986902683594617, "loss": 2.0935, "step": 2683 }, { "epoch": 0.09, "grad_norm": 0.5576221346855164, "learning_rate": 0.0005986892925704196, "loss": 2.1448, "step": 2684 }, { "epoch": 0.09, "grad_norm": 0.581074595451355, "learning_rate": 0.0005986883164188128, "loss": 2.1973, "step": 2685 }, { "epoch": 0.09, "grad_norm": 0.5352590680122375, "learning_rate": 0.0005986873399046423, "loss": 2.1158, "step": 2686 }, { "epoch": 0.09, "grad_norm": 0.5397080779075623, "learning_rate": 0.0005986863630279095, "loss": 2.0847, "step": 2687 }, { "epoch": 0.09, "grad_norm": 0.6151843070983887, "learning_rate": 0.0005986853857886153, "loss": 2.0625, "step": 2688 }, { "epoch": 0.09, "grad_norm": 0.5359674096107483, "learning_rate": 0.0005986844081867611, "loss": 2.0331, "step": 2689 }, { "epoch": 0.09, "grad_norm": 0.5532729625701904, "learning_rate": 0.0005986834302223481, "loss": 2.0294, "step": 2690 }, { "epoch": 0.09, "grad_norm": 0.5476319789886475, "learning_rate": 0.0005986824518953775, "loss": 2.0722, "step": 2691 }, { "epoch": 0.09, "grad_norm": 0.533793032169342, "learning_rate": 0.0005986814732058502, "loss": 2.0343, "step": 2692 }, { "epoch": 0.09, "grad_norm": 0.585227370262146, "learning_rate": 0.0005986804941537677, "loss": 2.0317, "step": 2693 }, { "epoch": 0.09, "grad_norm": 0.5499274730682373, "learning_rate": 0.0005986795147391312, "loss": 2.1003, "step": 2694 }, { "epoch": 0.09, "grad_norm": 0.5438870787620544, "learning_rate": 0.0005986785349619417, "loss": 2.1281, "step": 2695 }, { "epoch": 0.09, "grad_norm": 0.5695158243179321, "learning_rate": 0.0005986775548222004, "loss": 2.1913, "step": 2696 }, { "epoch": 0.09, "grad_norm": 0.5289108753204346, "learning_rate": 0.0005986765743199085, "loss": 2.0687, "step": 2697 }, { "epoch": 0.09, "grad_norm": 0.5448488593101501, "learning_rate": 0.0005986755934550674, "loss": 2.0885, "step": 2698 }, { "epoch": 0.09, "grad_norm": 0.5424888134002686, "learning_rate": 0.000598674612227678, "loss": 1.9966, "step": 2699 }, { "epoch": 0.09, "grad_norm": 0.5339854955673218, "learning_rate": 0.0005986736306377418, "loss": 2.0228, "step": 2700 }, { "epoch": 0.09, "grad_norm": 0.5505596995353699, "learning_rate": 0.0005986726486852597, "loss": 2.079, "step": 2701 }, { "epoch": 0.09, "grad_norm": 0.5660893321037292, "learning_rate": 0.000598671666370233, "loss": 2.1176, "step": 2702 }, { "epoch": 0.09, "grad_norm": 0.5406308770179749, "learning_rate": 0.000598670683692663, "loss": 2.1139, "step": 2703 }, { "epoch": 0.09, "grad_norm": 0.5771010518074036, "learning_rate": 0.0005986697006525508, "loss": 2.0462, "step": 2704 }, { "epoch": 0.09, "grad_norm": 0.553999662399292, "learning_rate": 0.0005986687172498975, "loss": 2.0925, "step": 2705 }, { "epoch": 0.09, "grad_norm": 0.5530581474304199, "learning_rate": 0.0005986677334847044, "loss": 2.0839, "step": 2706 }, { "epoch": 0.09, "grad_norm": 0.5714851021766663, "learning_rate": 0.0005986667493569727, "loss": 2.1172, "step": 2707 }, { "epoch": 0.09, "grad_norm": 0.5544923543930054, "learning_rate": 0.0005986657648667036, "loss": 2.0504, "step": 2708 }, { "epoch": 0.09, "grad_norm": 0.5444939136505127, "learning_rate": 0.0005986647800138981, "loss": 2.0674, "step": 2709 }, { "epoch": 0.09, "grad_norm": 0.5760992765426636, "learning_rate": 0.0005986637947985577, "loss": 2.1295, "step": 2710 }, { "epoch": 0.09, "grad_norm": 0.5343921780586243, "learning_rate": 0.0005986628092206835, "loss": 2.079, "step": 2711 }, { "epoch": 0.09, "grad_norm": 0.5296609401702881, "learning_rate": 0.0005986618232802766, "loss": 2.0378, "step": 2712 }, { "epoch": 0.09, "grad_norm": 0.5468586087226868, "learning_rate": 0.0005986608369773383, "loss": 2.0887, "step": 2713 }, { "epoch": 0.09, "grad_norm": 0.5472200512886047, "learning_rate": 0.0005986598503118697, "loss": 2.0666, "step": 2714 }, { "epoch": 0.09, "grad_norm": 0.5578466653823853, "learning_rate": 0.0005986588632838721, "loss": 2.1463, "step": 2715 }, { "epoch": 0.09, "grad_norm": 0.5701403021812439, "learning_rate": 0.0005986578758933466, "loss": 2.0778, "step": 2716 }, { "epoch": 0.09, "grad_norm": 0.566523551940918, "learning_rate": 0.0005986568881402944, "loss": 2.0997, "step": 2717 }, { "epoch": 0.09, "grad_norm": 0.5346586108207703, "learning_rate": 0.0005986559000247168, "loss": 1.9755, "step": 2718 }, { "epoch": 0.09, "grad_norm": 0.5623465776443481, "learning_rate": 0.0005986549115466149, "loss": 2.0866, "step": 2719 }, { "epoch": 0.09, "grad_norm": 0.5374993085861206, "learning_rate": 0.00059865392270599, "loss": 2.0399, "step": 2720 }, { "epoch": 0.09, "grad_norm": 0.5549450516700745, "learning_rate": 0.0005986529335028432, "loss": 2.08, "step": 2721 }, { "epoch": 0.09, "grad_norm": 0.5518702268600464, "learning_rate": 0.0005986519439371758, "loss": 2.0647, "step": 2722 }, { "epoch": 0.09, "grad_norm": 0.5519006848335266, "learning_rate": 0.0005986509540089889, "loss": 2.0771, "step": 2723 }, { "epoch": 0.09, "grad_norm": 0.5515415072441101, "learning_rate": 0.0005986499637182837, "loss": 2.0504, "step": 2724 }, { "epoch": 0.09, "grad_norm": 0.5416601300239563, "learning_rate": 0.0005986489730650616, "loss": 2.0927, "step": 2725 }, { "epoch": 0.09, "grad_norm": 0.5596860647201538, "learning_rate": 0.0005986479820493235, "loss": 2.0894, "step": 2726 }, { "epoch": 0.09, "grad_norm": 0.5467536449432373, "learning_rate": 0.0005986469906710708, "loss": 2.0562, "step": 2727 }, { "epoch": 0.09, "grad_norm": 0.5484260320663452, "learning_rate": 0.0005986459989303047, "loss": 2.0848, "step": 2728 }, { "epoch": 0.09, "grad_norm": 0.5453333854675293, "learning_rate": 0.0005986450068270262, "loss": 2.1833, "step": 2729 }, { "epoch": 0.09, "grad_norm": 0.5634725689888, "learning_rate": 0.0005986440143612369, "loss": 1.9602, "step": 2730 }, { "epoch": 0.09, "grad_norm": 0.5949300527572632, "learning_rate": 0.0005986430215329377, "loss": 1.9937, "step": 2731 }, { "epoch": 0.09, "grad_norm": 0.5744431614875793, "learning_rate": 0.0005986420283421298, "loss": 2.0705, "step": 2732 }, { "epoch": 0.09, "grad_norm": 0.5838820338249207, "learning_rate": 0.0005986410347888144, "loss": 2.0101, "step": 2733 }, { "epoch": 0.09, "grad_norm": 0.5620152354240417, "learning_rate": 0.0005986400408729929, "loss": 2.1542, "step": 2734 }, { "epoch": 0.09, "grad_norm": 0.5510026216506958, "learning_rate": 0.0005986390465946665, "loss": 2.0455, "step": 2735 }, { "epoch": 0.09, "grad_norm": 0.6113720536231995, "learning_rate": 0.0005986380519538362, "loss": 2.0697, "step": 2736 }, { "epoch": 0.09, "grad_norm": 0.5470184683799744, "learning_rate": 0.0005986370569505033, "loss": 2.1075, "step": 2737 }, { "epoch": 0.09, "grad_norm": 0.5871354341506958, "learning_rate": 0.0005986360615846689, "loss": 2.1075, "step": 2738 }, { "epoch": 0.09, "grad_norm": 0.5398772954940796, "learning_rate": 0.0005986350658563345, "loss": 2.0522, "step": 2739 }, { "epoch": 0.09, "grad_norm": 0.5508467555046082, "learning_rate": 0.0005986340697655011, "loss": 2.0193, "step": 2740 }, { "epoch": 0.09, "grad_norm": 0.5340198874473572, "learning_rate": 0.0005986330733121698, "loss": 1.9788, "step": 2741 }, { "epoch": 0.09, "grad_norm": 0.5571408271789551, "learning_rate": 0.0005986320764963421, "loss": 2.0394, "step": 2742 }, { "epoch": 0.09, "grad_norm": 0.5557830333709717, "learning_rate": 0.0005986310793180191, "loss": 2.0955, "step": 2743 }, { "epoch": 0.09, "grad_norm": 0.5284414887428284, "learning_rate": 0.0005986300817772018, "loss": 1.997, "step": 2744 }, { "epoch": 0.09, "grad_norm": 0.5765806436538696, "learning_rate": 0.0005986290838738917, "loss": 2.1732, "step": 2745 }, { "epoch": 0.09, "grad_norm": 0.518588662147522, "learning_rate": 0.0005986280856080899, "loss": 2.0645, "step": 2746 }, { "epoch": 0.09, "grad_norm": 0.5548034906387329, "learning_rate": 0.0005986270869797976, "loss": 2.0784, "step": 2747 }, { "epoch": 0.09, "grad_norm": 0.5604894161224365, "learning_rate": 0.0005986260879890159, "loss": 2.0967, "step": 2748 }, { "epoch": 0.09, "grad_norm": 0.5614869594573975, "learning_rate": 0.0005986250886357463, "loss": 2.1511, "step": 2749 }, { "epoch": 0.09, "grad_norm": 0.5355534553527832, "learning_rate": 0.0005986240889199897, "loss": 2.0707, "step": 2750 }, { "epoch": 0.09, "grad_norm": 0.5614394545555115, "learning_rate": 0.0005986230888417475, "loss": 2.0142, "step": 2751 }, { "epoch": 0.09, "grad_norm": 0.551139771938324, "learning_rate": 0.0005986220884010209, "loss": 2.1005, "step": 2752 }, { "epoch": 0.09, "grad_norm": 0.5493529438972473, "learning_rate": 0.0005986210875978111, "loss": 2.0293, "step": 2753 }, { "epoch": 0.09, "grad_norm": 0.5524946451187134, "learning_rate": 0.0005986200864321193, "loss": 2.1341, "step": 2754 }, { "epoch": 0.09, "grad_norm": 0.566623330116272, "learning_rate": 0.0005986190849039467, "loss": 2.0493, "step": 2755 }, { "epoch": 0.09, "grad_norm": 0.5748423933982849, "learning_rate": 0.0005986180830132945, "loss": 2.0107, "step": 2756 }, { "epoch": 0.09, "grad_norm": 0.5940728187561035, "learning_rate": 0.0005986170807601639, "loss": 2.0863, "step": 2757 }, { "epoch": 0.09, "grad_norm": 0.5404765605926514, "learning_rate": 0.0005986160781445563, "loss": 2.0031, "step": 2758 }, { "epoch": 0.09, "grad_norm": 0.5406104922294617, "learning_rate": 0.0005986150751664727, "loss": 2.0396, "step": 2759 }, { "epoch": 0.09, "grad_norm": 0.5818825364112854, "learning_rate": 0.0005986140718259144, "loss": 2.0433, "step": 2760 }, { "epoch": 0.09, "grad_norm": 0.5542431473731995, "learning_rate": 0.0005986130681228826, "loss": 2.0904, "step": 2761 }, { "epoch": 0.09, "grad_norm": 0.5607730150222778, "learning_rate": 0.0005986120640573785, "loss": 2.096, "step": 2762 }, { "epoch": 0.09, "grad_norm": 0.5599477887153625, "learning_rate": 0.0005986110596294035, "loss": 2.0359, "step": 2763 }, { "epoch": 0.09, "grad_norm": 0.5368726253509521, "learning_rate": 0.0005986100548389585, "loss": 2.1036, "step": 2764 }, { "epoch": 0.09, "grad_norm": 0.5588089823722839, "learning_rate": 0.000598609049686045, "loss": 2.1255, "step": 2765 }, { "epoch": 0.09, "grad_norm": 0.5556683540344238, "learning_rate": 0.0005986080441706641, "loss": 2.0221, "step": 2766 }, { "epoch": 0.09, "grad_norm": 0.5292309522628784, "learning_rate": 0.0005986070382928171, "loss": 1.953, "step": 2767 }, { "epoch": 0.09, "grad_norm": 0.5422521233558655, "learning_rate": 0.000598606032052505, "loss": 1.9912, "step": 2768 }, { "epoch": 0.09, "grad_norm": 0.5432966947555542, "learning_rate": 0.0005986050254497292, "loss": 2.0595, "step": 2769 }, { "epoch": 0.09, "grad_norm": 0.5463536381721497, "learning_rate": 0.000598604018484491, "loss": 2.0482, "step": 2770 }, { "epoch": 0.09, "grad_norm": 0.5636502504348755, "learning_rate": 0.0005986030111567914, "loss": 2.0051, "step": 2771 }, { "epoch": 0.09, "grad_norm": 0.548082172870636, "learning_rate": 0.0005986020034666319, "loss": 2.1309, "step": 2772 }, { "epoch": 0.09, "grad_norm": 0.545768678188324, "learning_rate": 0.0005986009954140134, "loss": 2.0159, "step": 2773 }, { "epoch": 0.09, "grad_norm": 0.5424743294715881, "learning_rate": 0.0005985999869989373, "loss": 2.1144, "step": 2774 }, { "epoch": 0.09, "grad_norm": 0.553497850894928, "learning_rate": 0.0005985989782214049, "loss": 2.0483, "step": 2775 }, { "epoch": 0.09, "grad_norm": 0.550682544708252, "learning_rate": 0.0005985979690814173, "loss": 2.0726, "step": 2776 }, { "epoch": 0.09, "grad_norm": 0.5556728839874268, "learning_rate": 0.0005985969595789758, "loss": 1.9908, "step": 2777 }, { "epoch": 0.09, "grad_norm": 0.5744667053222656, "learning_rate": 0.0005985959497140815, "loss": 2.0283, "step": 2778 }, { "epoch": 0.09, "grad_norm": 0.5444409847259521, "learning_rate": 0.0005985949394867358, "loss": 2.0812, "step": 2779 }, { "epoch": 0.09, "grad_norm": 0.5569955706596375, "learning_rate": 0.0005985939288969397, "loss": 2.0931, "step": 2780 }, { "epoch": 0.09, "grad_norm": 0.5579214096069336, "learning_rate": 0.0005985929179446947, "loss": 2.0109, "step": 2781 }, { "epoch": 0.09, "grad_norm": 0.5819805264472961, "learning_rate": 0.000598591906630002, "loss": 2.1052, "step": 2782 }, { "epoch": 0.09, "grad_norm": 0.5352329015731812, "learning_rate": 0.0005985908949528626, "loss": 2.1618, "step": 2783 }, { "epoch": 0.09, "grad_norm": 0.5398685932159424, "learning_rate": 0.0005985898829132777, "loss": 2.0561, "step": 2784 }, { "epoch": 0.09, "grad_norm": 0.5613179206848145, "learning_rate": 0.0005985888705112489, "loss": 2.0868, "step": 2785 }, { "epoch": 0.09, "grad_norm": 0.5603026747703552, "learning_rate": 0.0005985878577467771, "loss": 2.0379, "step": 2786 }, { "epoch": 0.09, "grad_norm": 0.5471217632293701, "learning_rate": 0.0005985868446198635, "loss": 2.1095, "step": 2787 }, { "epoch": 0.09, "grad_norm": 0.5366361737251282, "learning_rate": 0.0005985858311305097, "loss": 2.0591, "step": 2788 }, { "epoch": 0.09, "grad_norm": 0.5502097010612488, "learning_rate": 0.0005985848172787166, "loss": 2.1291, "step": 2789 }, { "epoch": 0.09, "grad_norm": 0.5581034421920776, "learning_rate": 0.0005985838030644855, "loss": 2.1106, "step": 2790 }, { "epoch": 0.09, "grad_norm": 0.5371944904327393, "learning_rate": 0.0005985827884878177, "loss": 2.0829, "step": 2791 }, { "epoch": 0.09, "grad_norm": 0.5618883371353149, "learning_rate": 0.0005985817735487144, "loss": 2.0946, "step": 2792 }, { "epoch": 0.09, "grad_norm": 0.5574508309364319, "learning_rate": 0.0005985807582471768, "loss": 2.0033, "step": 2793 }, { "epoch": 0.09, "grad_norm": 0.5429455637931824, "learning_rate": 0.0005985797425832062, "loss": 2.0896, "step": 2794 }, { "epoch": 0.09, "grad_norm": 0.5258583426475525, "learning_rate": 0.0005985787265568038, "loss": 2.0858, "step": 2795 }, { "epoch": 0.09, "grad_norm": 0.5500630736351013, "learning_rate": 0.0005985777101679706, "loss": 2.0557, "step": 2796 }, { "epoch": 0.09, "grad_norm": 0.5631726980209351, "learning_rate": 0.0005985766934167082, "loss": 2.1035, "step": 2797 }, { "epoch": 0.09, "grad_norm": 0.5500145554542542, "learning_rate": 0.0005985756763030178, "loss": 2.1217, "step": 2798 }, { "epoch": 0.09, "grad_norm": 0.5668510794639587, "learning_rate": 0.0005985746588269004, "loss": 2.1052, "step": 2799 }, { "epoch": 0.09, "grad_norm": 0.5893809199333191, "learning_rate": 0.0005985736409883574, "loss": 2.0171, "step": 2800 }, { "epoch": 0.09, "grad_norm": 0.5349266529083252, "learning_rate": 0.0005985726227873899, "loss": 2.0483, "step": 2801 }, { "epoch": 0.09, "grad_norm": 0.5569685697555542, "learning_rate": 0.0005985716042239994, "loss": 2.0618, "step": 2802 }, { "epoch": 0.09, "grad_norm": 0.5588995218276978, "learning_rate": 0.0005985705852981869, "loss": 1.9417, "step": 2803 }, { "epoch": 0.09, "grad_norm": 0.5540719628334045, "learning_rate": 0.0005985695660099536, "loss": 2.0842, "step": 2804 }, { "epoch": 0.09, "grad_norm": 0.5378603935241699, "learning_rate": 0.000598568546359301, "loss": 2.0522, "step": 2805 }, { "epoch": 0.09, "grad_norm": 0.5739651322364807, "learning_rate": 0.0005985675263462301, "loss": 2.1475, "step": 2806 }, { "epoch": 0.09, "grad_norm": 0.5618023872375488, "learning_rate": 0.0005985665059707421, "loss": 2.1671, "step": 2807 }, { "epoch": 0.09, "grad_norm": 0.5526533722877502, "learning_rate": 0.0005985654852328384, "loss": 2.0129, "step": 2808 }, { "epoch": 0.09, "grad_norm": 0.5575326681137085, "learning_rate": 0.0005985644641325204, "loss": 2.0723, "step": 2809 }, { "epoch": 0.09, "grad_norm": 0.5284706354141235, "learning_rate": 0.0005985634426697889, "loss": 2.0825, "step": 2810 }, { "epoch": 0.09, "grad_norm": 0.5366076827049255, "learning_rate": 0.0005985624208446456, "loss": 2.069, "step": 2811 }, { "epoch": 0.09, "grad_norm": 0.5635382533073425, "learning_rate": 0.0005985613986570912, "loss": 2.0747, "step": 2812 }, { "epoch": 0.09, "grad_norm": 0.5484215617179871, "learning_rate": 0.0005985603761071274, "loss": 2.0177, "step": 2813 }, { "epoch": 0.09, "grad_norm": 0.5457999110221863, "learning_rate": 0.0005985593531947554, "loss": 2.0755, "step": 2814 }, { "epoch": 0.09, "grad_norm": 0.5726468563079834, "learning_rate": 0.0005985583299199762, "loss": 2.0304, "step": 2815 }, { "epoch": 0.09, "grad_norm": 0.5333623290061951, "learning_rate": 0.0005985573062827912, "loss": 1.9966, "step": 2816 }, { "epoch": 0.09, "grad_norm": 0.5146272778511047, "learning_rate": 0.0005985562822832017, "loss": 2.1285, "step": 2817 }, { "epoch": 0.09, "grad_norm": 0.556178629398346, "learning_rate": 0.0005985552579212088, "loss": 1.982, "step": 2818 }, { "epoch": 0.09, "grad_norm": 0.567064642906189, "learning_rate": 0.0005985542331968138, "loss": 2.0213, "step": 2819 }, { "epoch": 0.09, "grad_norm": 0.5377985835075378, "learning_rate": 0.000598553208110018, "loss": 2.0648, "step": 2820 }, { "epoch": 0.09, "grad_norm": 0.5694405436515808, "learning_rate": 0.0005985521826608225, "loss": 2.1154, "step": 2821 }, { "epoch": 0.09, "grad_norm": 0.6146084666252136, "learning_rate": 0.0005985511568492287, "loss": 2.0242, "step": 2822 }, { "epoch": 0.09, "grad_norm": 0.5690845251083374, "learning_rate": 0.0005985501306752378, "loss": 2.1185, "step": 2823 }, { "epoch": 0.09, "grad_norm": 0.5867717266082764, "learning_rate": 0.0005985491041388511, "loss": 2.1509, "step": 2824 }, { "epoch": 0.09, "grad_norm": 0.5962746739387512, "learning_rate": 0.0005985480772400697, "loss": 2.1325, "step": 2825 }, { "epoch": 0.09, "grad_norm": 0.5736508965492249, "learning_rate": 0.0005985470499788949, "loss": 2.1196, "step": 2826 }, { "epoch": 0.09, "grad_norm": 0.5625612139701843, "learning_rate": 0.0005985460223553279, "loss": 2.0774, "step": 2827 }, { "epoch": 0.09, "grad_norm": 0.5552546381950378, "learning_rate": 0.0005985449943693702, "loss": 2.0518, "step": 2828 }, { "epoch": 0.09, "grad_norm": 0.5608000755310059, "learning_rate": 0.0005985439660210227, "loss": 2.0639, "step": 2829 }, { "epoch": 0.09, "grad_norm": 0.5415054559707642, "learning_rate": 0.0005985429373102869, "loss": 2.063, "step": 2830 }, { "epoch": 0.09, "grad_norm": 0.5635521411895752, "learning_rate": 0.0005985419082371639, "loss": 2.1333, "step": 2831 }, { "epoch": 0.09, "grad_norm": 0.5592874884605408, "learning_rate": 0.0005985408788016551, "loss": 2.17, "step": 2832 }, { "epoch": 0.09, "grad_norm": 0.5386610627174377, "learning_rate": 0.0005985398490037616, "loss": 2.1174, "step": 2833 }, { "epoch": 0.09, "grad_norm": 0.5509490370750427, "learning_rate": 0.0005985388188434847, "loss": 2.0179, "step": 2834 }, { "epoch": 0.09, "grad_norm": 0.5699051022529602, "learning_rate": 0.0005985377883208257, "loss": 2.106, "step": 2835 }, { "epoch": 0.09, "grad_norm": 0.5785225629806519, "learning_rate": 0.0005985367574357858, "loss": 2.215, "step": 2836 }, { "epoch": 0.09, "grad_norm": 0.5672889947891235, "learning_rate": 0.0005985357261883664, "loss": 2.0724, "step": 2837 }, { "epoch": 0.09, "grad_norm": 0.5460193157196045, "learning_rate": 0.0005985346945785683, "loss": 2.0165, "step": 2838 }, { "epoch": 0.09, "grad_norm": 0.5557805299758911, "learning_rate": 0.0005985336626063933, "loss": 2.0485, "step": 2839 }, { "epoch": 0.09, "grad_norm": 0.5542431473731995, "learning_rate": 0.0005985326302718423, "loss": 2.0599, "step": 2840 }, { "epoch": 0.09, "grad_norm": 0.5642279386520386, "learning_rate": 0.0005985315975749168, "loss": 2.0662, "step": 2841 }, { "epoch": 0.09, "grad_norm": 0.5315258502960205, "learning_rate": 0.0005985305645156178, "loss": 2.0356, "step": 2842 }, { "epoch": 0.09, "grad_norm": 0.5392822027206421, "learning_rate": 0.0005985295310939468, "loss": 2.0727, "step": 2843 }, { "epoch": 0.09, "grad_norm": 0.5544847846031189, "learning_rate": 0.0005985284973099049, "loss": 2.0035, "step": 2844 }, { "epoch": 0.09, "grad_norm": 0.5781930685043335, "learning_rate": 0.0005985274631634934, "loss": 2.0381, "step": 2845 }, { "epoch": 0.09, "grad_norm": 0.5740823149681091, "learning_rate": 0.0005985264286547136, "loss": 2.1824, "step": 2846 }, { "epoch": 0.09, "grad_norm": 0.5503352880477905, "learning_rate": 0.0005985253937835666, "loss": 2.1161, "step": 2847 }, { "epoch": 0.09, "grad_norm": 0.5526571273803711, "learning_rate": 0.0005985243585500538, "loss": 2.0695, "step": 2848 }, { "epoch": 0.09, "grad_norm": 0.5526812076568604, "learning_rate": 0.0005985233229541763, "loss": 2.1009, "step": 2849 }, { "epoch": 0.09, "grad_norm": 0.5296377539634705, "learning_rate": 0.0005985222869959357, "loss": 2.0733, "step": 2850 }, { "epoch": 0.09, "grad_norm": 0.5669205784797668, "learning_rate": 0.0005985212506753329, "loss": 2.0229, "step": 2851 }, { "epoch": 0.09, "grad_norm": 0.5674510598182678, "learning_rate": 0.0005985202139923692, "loss": 2.071, "step": 2852 }, { "epoch": 0.09, "grad_norm": 0.5535823106765747, "learning_rate": 0.000598519176947046, "loss": 2.1293, "step": 2853 }, { "epoch": 0.09, "grad_norm": 0.5479518175125122, "learning_rate": 0.0005985181395393646, "loss": 2.1086, "step": 2854 }, { "epoch": 0.09, "grad_norm": 0.5357324481010437, "learning_rate": 0.0005985171017693261, "loss": 2.1338, "step": 2855 }, { "epoch": 0.1, "grad_norm": 0.5493662357330322, "learning_rate": 0.0005985160636369318, "loss": 2.0899, "step": 2856 }, { "epoch": 0.1, "grad_norm": 0.5556091666221619, "learning_rate": 0.0005985150251421829, "loss": 2.085, "step": 2857 }, { "epoch": 0.1, "grad_norm": 0.5269097685813904, "learning_rate": 0.0005985139862850809, "loss": 2.0478, "step": 2858 }, { "epoch": 0.1, "grad_norm": 0.5547627210617065, "learning_rate": 0.0005985129470656269, "loss": 2.0537, "step": 2859 }, { "epoch": 0.1, "grad_norm": 0.546626091003418, "learning_rate": 0.0005985119074838222, "loss": 2.1153, "step": 2860 }, { "epoch": 0.1, "grad_norm": 0.5334498286247253, "learning_rate": 0.0005985108675396678, "loss": 2.0178, "step": 2861 }, { "epoch": 0.1, "grad_norm": 0.5322030186653137, "learning_rate": 0.0005985098272331653, "loss": 2.0312, "step": 2862 }, { "epoch": 0.1, "grad_norm": 0.5503826141357422, "learning_rate": 0.0005985087865643159, "loss": 2.0227, "step": 2863 }, { "epoch": 0.1, "grad_norm": 0.5511741042137146, "learning_rate": 0.0005985077455331209, "loss": 2.0874, "step": 2864 }, { "epoch": 0.1, "grad_norm": 0.5393429398536682, "learning_rate": 0.0005985067041395813, "loss": 2.0596, "step": 2865 }, { "epoch": 0.1, "grad_norm": 0.5434579849243164, "learning_rate": 0.0005985056623836987, "loss": 2.0716, "step": 2866 }, { "epoch": 0.1, "grad_norm": 0.5567032098770142, "learning_rate": 0.0005985046202654741, "loss": 2.0913, "step": 2867 }, { "epoch": 0.1, "grad_norm": 0.6167285442352295, "learning_rate": 0.000598503577784909, "loss": 2.0319, "step": 2868 }, { "epoch": 0.1, "grad_norm": 0.535619854927063, "learning_rate": 0.0005985025349420044, "loss": 2.1179, "step": 2869 }, { "epoch": 0.1, "grad_norm": 0.5553693771362305, "learning_rate": 0.0005985014917367616, "loss": 2.0369, "step": 2870 }, { "epoch": 0.1, "grad_norm": 0.5713651180267334, "learning_rate": 0.0005985004481691823, "loss": 2.1084, "step": 2871 }, { "epoch": 0.1, "grad_norm": 0.5612804293632507, "learning_rate": 0.0005984994042392672, "loss": 2.1185, "step": 2872 }, { "epoch": 0.1, "grad_norm": 0.5348591208457947, "learning_rate": 0.0005984983599470179, "loss": 2.1159, "step": 2873 }, { "epoch": 0.1, "grad_norm": 0.603272557258606, "learning_rate": 0.0005984973152924356, "loss": 2.1202, "step": 2874 }, { "epoch": 0.1, "grad_norm": 0.5385249257087708, "learning_rate": 0.0005984962702755214, "loss": 2.0958, "step": 2875 }, { "epoch": 0.1, "grad_norm": 0.5447034239768982, "learning_rate": 0.0005984952248962768, "loss": 2.0151, "step": 2876 }, { "epoch": 0.1, "grad_norm": 0.5691409707069397, "learning_rate": 0.000598494179154703, "loss": 2.055, "step": 2877 }, { "epoch": 0.1, "grad_norm": 0.612940788269043, "learning_rate": 0.0005984931330508012, "loss": 2.0808, "step": 2878 }, { "epoch": 0.1, "grad_norm": 0.5409872531890869, "learning_rate": 0.0005984920865845728, "loss": 2.0583, "step": 2879 }, { "epoch": 0.1, "grad_norm": 0.5788697600364685, "learning_rate": 0.0005984910397560188, "loss": 2.1407, "step": 2880 }, { "epoch": 0.1, "grad_norm": 0.6269764304161072, "learning_rate": 0.0005984899925651409, "loss": 1.9926, "step": 2881 }, { "epoch": 0.1, "grad_norm": 0.5428850650787354, "learning_rate": 0.0005984889450119399, "loss": 2.109, "step": 2882 }, { "epoch": 0.1, "grad_norm": 0.5580993294715881, "learning_rate": 0.0005984878970964174, "loss": 2.1178, "step": 2883 }, { "epoch": 0.1, "grad_norm": 0.6306157112121582, "learning_rate": 0.0005984868488185746, "loss": 2.1049, "step": 2884 }, { "epoch": 0.1, "grad_norm": 0.5679108500480652, "learning_rate": 0.0005984858001784127, "loss": 2.1491, "step": 2885 }, { "epoch": 0.1, "grad_norm": 0.6347718238830566, "learning_rate": 0.0005984847511759331, "loss": 2.0847, "step": 2886 }, { "epoch": 0.1, "grad_norm": 0.5831178426742554, "learning_rate": 0.0005984837018111369, "loss": 2.1746, "step": 2887 }, { "epoch": 0.1, "grad_norm": 0.5755581855773926, "learning_rate": 0.0005984826520840256, "loss": 2.0625, "step": 2888 }, { "epoch": 0.1, "grad_norm": 0.5919280648231506, "learning_rate": 0.0005984816019946002, "loss": 2.0315, "step": 2889 }, { "epoch": 0.1, "grad_norm": 0.6287918090820312, "learning_rate": 0.0005984805515428621, "loss": 2.1203, "step": 2890 }, { "epoch": 0.1, "grad_norm": 0.5542684197425842, "learning_rate": 0.0005984795007288128, "loss": 2.0831, "step": 2891 }, { "epoch": 0.1, "grad_norm": 0.5949580073356628, "learning_rate": 0.0005984784495524532, "loss": 2.1423, "step": 2892 }, { "epoch": 0.1, "grad_norm": 0.5924311876296997, "learning_rate": 0.0005984773980137847, "loss": 2.1237, "step": 2893 }, { "epoch": 0.1, "grad_norm": 0.5373678207397461, "learning_rate": 0.0005984763461128087, "loss": 2.1069, "step": 2894 }, { "epoch": 0.1, "grad_norm": 0.6157113313674927, "learning_rate": 0.0005984752938495263, "loss": 2.0185, "step": 2895 }, { "epoch": 0.1, "grad_norm": 0.5803090333938599, "learning_rate": 0.0005984742412239389, "loss": 2.0203, "step": 2896 }, { "epoch": 0.1, "grad_norm": 0.5257618427276611, "learning_rate": 0.0005984731882360479, "loss": 2.1121, "step": 2897 }, { "epoch": 0.1, "grad_norm": 0.5827527642250061, "learning_rate": 0.0005984721348858543, "loss": 2.0277, "step": 2898 }, { "epoch": 0.1, "grad_norm": 0.592651903629303, "learning_rate": 0.0005984710811733595, "loss": 2.1024, "step": 2899 }, { "epoch": 0.1, "grad_norm": 0.5642887353897095, "learning_rate": 0.0005984700270985647, "loss": 2.0568, "step": 2900 }, { "epoch": 0.1, "grad_norm": 0.544910728931427, "learning_rate": 0.0005984689726614714, "loss": 2.0189, "step": 2901 }, { "epoch": 0.1, "grad_norm": 0.5374506115913391, "learning_rate": 0.0005984679178620808, "loss": 2.0536, "step": 2902 }, { "epoch": 0.1, "grad_norm": 0.5626200437545776, "learning_rate": 0.0005984668627003941, "loss": 2.0162, "step": 2903 }, { "epoch": 0.1, "grad_norm": 0.5611679553985596, "learning_rate": 0.0005984658071764125, "loss": 2.0357, "step": 2904 }, { "epoch": 0.1, "grad_norm": 0.5546395182609558, "learning_rate": 0.0005984647512901374, "loss": 2.0927, "step": 2905 }, { "epoch": 0.1, "grad_norm": 0.5541877150535583, "learning_rate": 0.0005984636950415701, "loss": 2.1301, "step": 2906 }, { "epoch": 0.1, "grad_norm": 0.5480227470397949, "learning_rate": 0.0005984626384307119, "loss": 2.0773, "step": 2907 }, { "epoch": 0.1, "grad_norm": 0.5395261645317078, "learning_rate": 0.000598461581457564, "loss": 2.1109, "step": 2908 }, { "epoch": 0.1, "grad_norm": 0.5338181257247925, "learning_rate": 0.0005984605241221276, "loss": 2.0463, "step": 2909 }, { "epoch": 0.1, "grad_norm": 0.5276668667793274, "learning_rate": 0.0005984594664244043, "loss": 2.0636, "step": 2910 }, { "epoch": 0.1, "grad_norm": 0.5376279950141907, "learning_rate": 0.000598458408364395, "loss": 2.0398, "step": 2911 }, { "epoch": 0.1, "grad_norm": 0.5473574995994568, "learning_rate": 0.0005984573499421013, "loss": 2.0006, "step": 2912 }, { "epoch": 0.1, "grad_norm": 0.5689803957939148, "learning_rate": 0.0005984562911575243, "loss": 2.0702, "step": 2913 }, { "epoch": 0.1, "grad_norm": 0.5348907709121704, "learning_rate": 0.0005984552320106653, "loss": 2.0522, "step": 2914 }, { "epoch": 0.1, "grad_norm": 0.5329413414001465, "learning_rate": 0.0005984541725015257, "loss": 2.065, "step": 2915 }, { "epoch": 0.1, "grad_norm": 0.5367361307144165, "learning_rate": 0.0005984531126301066, "loss": 2.0574, "step": 2916 }, { "epoch": 0.1, "grad_norm": 0.5395270586013794, "learning_rate": 0.0005984520523964096, "loss": 2.0608, "step": 2917 }, { "epoch": 0.1, "grad_norm": 0.5531310439109802, "learning_rate": 0.0005984509918004356, "loss": 2.09, "step": 2918 }, { "epoch": 0.1, "grad_norm": 0.5446557402610779, "learning_rate": 0.0005984499308421861, "loss": 2.0999, "step": 2919 }, { "epoch": 0.1, "grad_norm": 0.5513334274291992, "learning_rate": 0.0005984488695216625, "loss": 2.0849, "step": 2920 }, { "epoch": 0.1, "grad_norm": 0.5547142624855042, "learning_rate": 0.0005984478078388657, "loss": 2.0867, "step": 2921 }, { "epoch": 0.1, "grad_norm": 0.5269076824188232, "learning_rate": 0.0005984467457937974, "loss": 2.0353, "step": 2922 }, { "epoch": 0.1, "grad_norm": 0.5397043824195862, "learning_rate": 0.0005984456833864587, "loss": 2.0637, "step": 2923 }, { "epoch": 0.1, "grad_norm": 0.5372148156166077, "learning_rate": 0.0005984446206168509, "loss": 2.03, "step": 2924 }, { "epoch": 0.1, "grad_norm": 0.539678156375885, "learning_rate": 0.0005984435574849753, "loss": 2.0523, "step": 2925 }, { "epoch": 0.1, "grad_norm": 0.5255147814750671, "learning_rate": 0.0005984424939908332, "loss": 2.001, "step": 2926 }, { "epoch": 0.1, "grad_norm": 0.5528716444969177, "learning_rate": 0.0005984414301344259, "loss": 1.9859, "step": 2927 }, { "epoch": 0.1, "grad_norm": 0.5473964214324951, "learning_rate": 0.0005984403659157547, "loss": 2.1166, "step": 2928 }, { "epoch": 0.1, "grad_norm": 0.5683690905570984, "learning_rate": 0.0005984393013348208, "loss": 2.0943, "step": 2929 }, { "epoch": 0.1, "grad_norm": 0.5424625277519226, "learning_rate": 0.0005984382363916255, "loss": 1.9752, "step": 2930 }, { "epoch": 0.1, "grad_norm": 0.546172559261322, "learning_rate": 0.0005984371710861704, "loss": 2.0241, "step": 2931 }, { "epoch": 0.1, "grad_norm": 0.5898641347885132, "learning_rate": 0.0005984361054184563, "loss": 2.0881, "step": 2932 }, { "epoch": 0.1, "grad_norm": 0.5539506673812866, "learning_rate": 0.000598435039388485, "loss": 2.1122, "step": 2933 }, { "epoch": 0.1, "grad_norm": 0.5868683457374573, "learning_rate": 0.0005984339729962573, "loss": 2.0348, "step": 2934 }, { "epoch": 0.1, "grad_norm": 0.577136218547821, "learning_rate": 0.0005984329062417747, "loss": 2.0108, "step": 2935 }, { "epoch": 0.1, "grad_norm": 0.5395041108131409, "learning_rate": 0.0005984318391250387, "loss": 2.0259, "step": 2936 }, { "epoch": 0.1, "grad_norm": 0.5635567307472229, "learning_rate": 0.0005984307716460504, "loss": 2.0171, "step": 2937 }, { "epoch": 0.1, "grad_norm": 0.5643926858901978, "learning_rate": 0.000598429703804811, "loss": 2.1209, "step": 2938 }, { "epoch": 0.1, "grad_norm": 0.5405653715133667, "learning_rate": 0.000598428635601322, "loss": 2.1084, "step": 2939 }, { "epoch": 0.1, "grad_norm": 0.6290738582611084, "learning_rate": 0.0005984275670355846, "loss": 2.1106, "step": 2940 }, { "epoch": 0.1, "grad_norm": 0.5610577464103699, "learning_rate": 0.0005984264981076001, "loss": 2.0718, "step": 2941 }, { "epoch": 0.1, "grad_norm": 0.5392045974731445, "learning_rate": 0.0005984254288173697, "loss": 1.9808, "step": 2942 }, { "epoch": 0.1, "grad_norm": 0.573258638381958, "learning_rate": 0.0005984243591648949, "loss": 2.0614, "step": 2943 }, { "epoch": 0.1, "grad_norm": 0.5778954029083252, "learning_rate": 0.000598423289150177, "loss": 2.0661, "step": 2944 }, { "epoch": 0.1, "grad_norm": 0.5538321137428284, "learning_rate": 0.000598422218773217, "loss": 2.0914, "step": 2945 }, { "epoch": 0.1, "grad_norm": 0.5598803758621216, "learning_rate": 0.0005984211480340165, "loss": 2.0901, "step": 2946 }, { "epoch": 0.1, "grad_norm": 0.6038488149642944, "learning_rate": 0.0005984200769325767, "loss": 2.0605, "step": 2947 }, { "epoch": 0.1, "grad_norm": 0.5514419078826904, "learning_rate": 0.0005984190054688989, "loss": 2.0735, "step": 2948 }, { "epoch": 0.1, "grad_norm": 0.5520928502082825, "learning_rate": 0.0005984179336429844, "loss": 2.0029, "step": 2949 }, { "epoch": 0.1, "grad_norm": 0.5697736740112305, "learning_rate": 0.0005984168614548344, "loss": 2.077, "step": 2950 }, { "epoch": 0.1, "grad_norm": 0.555232048034668, "learning_rate": 0.0005984157889044504, "loss": 2.0083, "step": 2951 }, { "epoch": 0.1, "grad_norm": 0.5286939740180969, "learning_rate": 0.0005984147159918336, "loss": 2.0474, "step": 2952 }, { "epoch": 0.1, "grad_norm": 0.5657744407653809, "learning_rate": 0.0005984136427169852, "loss": 2.0292, "step": 2953 }, { "epoch": 0.1, "grad_norm": 0.583199143409729, "learning_rate": 0.0005984125690799068, "loss": 2.0507, "step": 2954 }, { "epoch": 0.1, "grad_norm": 0.5375831723213196, "learning_rate": 0.0005984114950805992, "loss": 2.0484, "step": 2955 }, { "epoch": 0.1, "grad_norm": 0.5893176198005676, "learning_rate": 0.0005984104207190644, "loss": 2.0485, "step": 2956 }, { "epoch": 0.1, "grad_norm": 0.557359516620636, "learning_rate": 0.000598409345995303, "loss": 2.0369, "step": 2957 }, { "epoch": 0.1, "grad_norm": 0.5445887446403503, "learning_rate": 0.0005984082709093167, "loss": 2.0079, "step": 2958 }, { "epoch": 0.1, "grad_norm": 0.5645462870597839, "learning_rate": 0.0005984071954611068, "loss": 2.1156, "step": 2959 }, { "epoch": 0.1, "grad_norm": 0.550087571144104, "learning_rate": 0.0005984061196506746, "loss": 2.0884, "step": 2960 }, { "epoch": 0.1, "grad_norm": 0.5450074672698975, "learning_rate": 0.0005984050434780212, "loss": 2.0351, "step": 2961 }, { "epoch": 0.1, "grad_norm": 0.526658833026886, "learning_rate": 0.0005984039669431481, "loss": 2.0058, "step": 2962 }, { "epoch": 0.1, "grad_norm": 0.5483619570732117, "learning_rate": 0.0005984028900460565, "loss": 2.0541, "step": 2963 }, { "epoch": 0.1, "grad_norm": 0.544098436832428, "learning_rate": 0.0005984018127867479, "loss": 2.0587, "step": 2964 }, { "epoch": 0.1, "grad_norm": 0.5300601720809937, "learning_rate": 0.0005984007351652233, "loss": 2.0401, "step": 2965 }, { "epoch": 0.1, "grad_norm": 0.5356205701828003, "learning_rate": 0.0005983996571814843, "loss": 2.0715, "step": 2966 }, { "epoch": 0.1, "grad_norm": 0.5591375231742859, "learning_rate": 0.000598398578835532, "loss": 2.0847, "step": 2967 }, { "epoch": 0.1, "grad_norm": 0.5165755748748779, "learning_rate": 0.0005983975001273678, "loss": 2.0132, "step": 2968 }, { "epoch": 0.1, "grad_norm": 0.5189292430877686, "learning_rate": 0.000598396421056993, "loss": 2.0222, "step": 2969 }, { "epoch": 0.1, "grad_norm": 0.550640344619751, "learning_rate": 0.000598395341624409, "loss": 2.0411, "step": 2970 }, { "epoch": 0.1, "grad_norm": 0.5603201389312744, "learning_rate": 0.0005983942618296168, "loss": 2.0813, "step": 2971 }, { "epoch": 0.1, "grad_norm": 0.5395398139953613, "learning_rate": 0.0005983931816726183, "loss": 2.1258, "step": 2972 }, { "epoch": 0.1, "grad_norm": 0.5501576662063599, "learning_rate": 0.0005983921011534141, "loss": 2.0622, "step": 2973 }, { "epoch": 0.1, "grad_norm": 0.5219244956970215, "learning_rate": 0.000598391020272006, "loss": 2.1256, "step": 2974 }, { "epoch": 0.1, "grad_norm": 0.5360344648361206, "learning_rate": 0.0005983899390283951, "loss": 2.1166, "step": 2975 }, { "epoch": 0.1, "grad_norm": 0.5559628009796143, "learning_rate": 0.000598388857422583, "loss": 2.0358, "step": 2976 }, { "epoch": 0.1, "grad_norm": 0.5248274207115173, "learning_rate": 0.0005983877754545705, "loss": 2.0285, "step": 2977 }, { "epoch": 0.1, "grad_norm": 0.560189425945282, "learning_rate": 0.0005983866931243594, "loss": 2.0499, "step": 2978 }, { "epoch": 0.1, "grad_norm": 0.536977231502533, "learning_rate": 0.0005983856104319508, "loss": 2.136, "step": 2979 }, { "epoch": 0.1, "grad_norm": 0.5477644205093384, "learning_rate": 0.000598384527377346, "loss": 2.1209, "step": 2980 }, { "epoch": 0.1, "grad_norm": 0.5701141357421875, "learning_rate": 0.0005983834439605462, "loss": 2.1616, "step": 2981 }, { "epoch": 0.1, "grad_norm": 0.5697148442268372, "learning_rate": 0.0005983823601815529, "loss": 2.0238, "step": 2982 }, { "epoch": 0.1, "grad_norm": 0.5304200649261475, "learning_rate": 0.0005983812760403676, "loss": 2.0751, "step": 2983 }, { "epoch": 0.1, "grad_norm": 0.569673478603363, "learning_rate": 0.0005983801915369914, "loss": 2.0627, "step": 2984 }, { "epoch": 0.1, "grad_norm": 0.5654293894767761, "learning_rate": 0.0005983791066714254, "loss": 2.058, "step": 2985 }, { "epoch": 0.1, "grad_norm": 0.5237669348716736, "learning_rate": 0.0005983780214436713, "loss": 2.0541, "step": 2986 }, { "epoch": 0.1, "grad_norm": 0.551331639289856, "learning_rate": 0.0005983769358537301, "loss": 2.0631, "step": 2987 }, { "epoch": 0.1, "grad_norm": 0.5873879790306091, "learning_rate": 0.0005983758499016033, "loss": 2.1012, "step": 2988 }, { "epoch": 0.1, "grad_norm": 0.5216169953346252, "learning_rate": 0.0005983747635872922, "loss": 2.0096, "step": 2989 }, { "epoch": 0.1, "grad_norm": 0.5402625799179077, "learning_rate": 0.0005983736769107981, "loss": 2.0043, "step": 2990 }, { "epoch": 0.1, "grad_norm": 0.5655659437179565, "learning_rate": 0.0005983725898721223, "loss": 2.0768, "step": 2991 }, { "epoch": 0.1, "grad_norm": 0.5772464275360107, "learning_rate": 0.0005983715024712661, "loss": 1.9745, "step": 2992 }, { "epoch": 0.1, "grad_norm": 0.5332030057907104, "learning_rate": 0.0005983704147082311, "loss": 1.9427, "step": 2993 }, { "epoch": 0.1, "grad_norm": 0.5370814204216003, "learning_rate": 0.0005983693265830181, "loss": 1.9953, "step": 2994 }, { "epoch": 0.1, "grad_norm": 0.5557817220687866, "learning_rate": 0.0005983682380956288, "loss": 2.0765, "step": 2995 }, { "epoch": 0.1, "grad_norm": 0.5475438237190247, "learning_rate": 0.0005983671492460644, "loss": 2.0133, "step": 2996 }, { "epoch": 0.1, "grad_norm": 0.5393304228782654, "learning_rate": 0.0005983660600343263, "loss": 2.0881, "step": 2997 }, { "epoch": 0.1, "grad_norm": 0.5393722653388977, "learning_rate": 0.0005983649704604157, "loss": 2.1051, "step": 2998 }, { "epoch": 0.1, "grad_norm": 0.5378564596176147, "learning_rate": 0.000598363880524334, "loss": 2.085, "step": 2999 }, { "epoch": 0.1, "grad_norm": 0.5474746823310852, "learning_rate": 0.0005983627902260826, "loss": 2.0901, "step": 3000 }, { "epoch": 0.1, "grad_norm": 0.5383774638175964, "learning_rate": 0.0005983616995656626, "loss": 2.0947, "step": 3001 }, { "epoch": 0.1, "grad_norm": 0.550227701663971, "learning_rate": 0.0005983606085430756, "loss": 2.0525, "step": 3002 }, { "epoch": 0.1, "grad_norm": 0.5494633316993713, "learning_rate": 0.0005983595171583227, "loss": 1.9821, "step": 3003 }, { "epoch": 0.1, "grad_norm": 0.5429326891899109, "learning_rate": 0.0005983584254114053, "loss": 2.0224, "step": 3004 }, { "epoch": 0.1, "grad_norm": 0.531304657459259, "learning_rate": 0.0005983573333023247, "loss": 2.1092, "step": 3005 }, { "epoch": 0.1, "grad_norm": 0.5439173579216003, "learning_rate": 0.0005983562408310823, "loss": 2.0383, "step": 3006 }, { "epoch": 0.1, "grad_norm": 0.5690186023712158, "learning_rate": 0.0005983551479976794, "loss": 2.0812, "step": 3007 }, { "epoch": 0.1, "grad_norm": 0.5254274010658264, "learning_rate": 0.0005983540548021173, "loss": 2.0257, "step": 3008 }, { "epoch": 0.1, "grad_norm": 0.5295664072036743, "learning_rate": 0.0005983529612443974, "loss": 2.0359, "step": 3009 }, { "epoch": 0.1, "grad_norm": 0.541522741317749, "learning_rate": 0.0005983518673245208, "loss": 1.9539, "step": 3010 }, { "epoch": 0.1, "grad_norm": 0.5436214804649353, "learning_rate": 0.0005983507730424892, "loss": 2.0315, "step": 3011 }, { "epoch": 0.1, "grad_norm": 0.5581262707710266, "learning_rate": 0.0005983496783983035, "loss": 2.0885, "step": 3012 }, { "epoch": 0.1, "grad_norm": 0.5487526059150696, "learning_rate": 0.0005983485833919654, "loss": 2.0932, "step": 3013 }, { "epoch": 0.1, "grad_norm": 0.5486400723457336, "learning_rate": 0.0005983474880234761, "loss": 2.0432, "step": 3014 }, { "epoch": 0.1, "grad_norm": 0.5476754307746887, "learning_rate": 0.0005983463922928368, "loss": 2.0826, "step": 3015 }, { "epoch": 0.1, "grad_norm": 0.5694166421890259, "learning_rate": 0.0005983452962000491, "loss": 2.1127, "step": 3016 }, { "epoch": 0.1, "grad_norm": 0.5526332259178162, "learning_rate": 0.0005983441997451139, "loss": 2.0628, "step": 3017 }, { "epoch": 0.1, "grad_norm": 0.5559571981430054, "learning_rate": 0.000598343102928033, "loss": 2.1102, "step": 3018 }, { "epoch": 0.1, "grad_norm": 0.5244128704071045, "learning_rate": 0.0005983420057488075, "loss": 2.0643, "step": 3019 }, { "epoch": 0.1, "grad_norm": 0.5656874179840088, "learning_rate": 0.0005983409082074389, "loss": 2.0409, "step": 3020 }, { "epoch": 0.1, "grad_norm": 0.5677860975265503, "learning_rate": 0.0005983398103039282, "loss": 2.0595, "step": 3021 }, { "epoch": 0.1, "grad_norm": 0.5390700101852417, "learning_rate": 0.000598338712038277, "loss": 2.0297, "step": 3022 }, { "epoch": 0.1, "grad_norm": 0.5292224884033203, "learning_rate": 0.0005983376134104865, "loss": 2.0533, "step": 3023 }, { "epoch": 0.1, "grad_norm": 0.5338518023490906, "learning_rate": 0.0005983365144205581, "loss": 2.046, "step": 3024 }, { "epoch": 0.1, "grad_norm": 0.5143039226531982, "learning_rate": 0.0005983354150684932, "loss": 2.0011, "step": 3025 }, { "epoch": 0.1, "grad_norm": 0.5407007336616516, "learning_rate": 0.0005983343153542931, "loss": 2.0219, "step": 3026 }, { "epoch": 0.1, "grad_norm": 0.5455166697502136, "learning_rate": 0.000598333215277959, "loss": 2.0659, "step": 3027 }, { "epoch": 0.1, "grad_norm": 0.5350786447525024, "learning_rate": 0.0005983321148394923, "loss": 2.0848, "step": 3028 }, { "epoch": 0.1, "grad_norm": 0.5360074043273926, "learning_rate": 0.0005983310140388945, "loss": 2.1012, "step": 3029 }, { "epoch": 0.1, "grad_norm": 0.5537525415420532, "learning_rate": 0.0005983299128761667, "loss": 2.0881, "step": 3030 }, { "epoch": 0.1, "grad_norm": 0.5410940051078796, "learning_rate": 0.0005983288113513104, "loss": 2.0893, "step": 3031 }, { "epoch": 0.1, "grad_norm": 0.5161202549934387, "learning_rate": 0.0005983277094643269, "loss": 1.991, "step": 3032 }, { "epoch": 0.1, "grad_norm": 0.5287944078445435, "learning_rate": 0.0005983266072152175, "loss": 2.0861, "step": 3033 }, { "epoch": 0.1, "grad_norm": 0.5487951040267944, "learning_rate": 0.0005983255046039835, "loss": 2.0691, "step": 3034 }, { "epoch": 0.1, "grad_norm": 0.5591406226158142, "learning_rate": 0.0005983244016306263, "loss": 2.0236, "step": 3035 }, { "epoch": 0.1, "grad_norm": 0.6702946424484253, "learning_rate": 0.0005983232982951473, "loss": 2.1941, "step": 3036 }, { "epoch": 0.1, "grad_norm": 0.5672726035118103, "learning_rate": 0.0005983221945975477, "loss": 2.0568, "step": 3037 }, { "epoch": 0.1, "grad_norm": 0.5553926229476929, "learning_rate": 0.0005983210905378289, "loss": 2.0526, "step": 3038 }, { "epoch": 0.1, "grad_norm": 0.5540581345558167, "learning_rate": 0.0005983199861159923, "loss": 2.088, "step": 3039 }, { "epoch": 0.1, "grad_norm": 0.565750002861023, "learning_rate": 0.0005983188813320393, "loss": 2.0366, "step": 3040 }, { "epoch": 0.1, "grad_norm": 0.5316237211227417, "learning_rate": 0.000598317776185971, "loss": 2.0545, "step": 3041 }, { "epoch": 0.1, "grad_norm": 0.5378943681716919, "learning_rate": 0.0005983166706777889, "loss": 2.0762, "step": 3042 }, { "epoch": 0.1, "grad_norm": 0.5814827680587769, "learning_rate": 0.0005983155648074944, "loss": 2.0637, "step": 3043 }, { "epoch": 0.1, "grad_norm": 0.543817400932312, "learning_rate": 0.0005983144585750886, "loss": 2.0227, "step": 3044 }, { "epoch": 0.1, "grad_norm": 0.5518212914466858, "learning_rate": 0.000598313351980573, "loss": 2.0203, "step": 3045 }, { "epoch": 0.1, "grad_norm": 0.6013411283493042, "learning_rate": 0.0005983122450239491, "loss": 2.1185, "step": 3046 }, { "epoch": 0.1, "grad_norm": 0.529630184173584, "learning_rate": 0.000598311137705218, "loss": 1.9972, "step": 3047 }, { "epoch": 0.1, "grad_norm": 0.5919330716133118, "learning_rate": 0.0005983100300243811, "loss": 2.0715, "step": 3048 }, { "epoch": 0.1, "grad_norm": 0.585028350353241, "learning_rate": 0.0005983089219814398, "loss": 1.9309, "step": 3049 }, { "epoch": 0.1, "grad_norm": 0.5303448438644409, "learning_rate": 0.0005983078135763955, "loss": 2.0711, "step": 3050 }, { "epoch": 0.1, "grad_norm": 0.6317658424377441, "learning_rate": 0.0005983067048092494, "loss": 2.0911, "step": 3051 }, { "epoch": 0.1, "grad_norm": 0.5463818311691284, "learning_rate": 0.000598305595680003, "loss": 2.0451, "step": 3052 }, { "epoch": 0.1, "grad_norm": 0.5441675782203674, "learning_rate": 0.0005983044861886574, "loss": 2.0577, "step": 3053 }, { "epoch": 0.1, "grad_norm": 0.556768000125885, "learning_rate": 0.0005983033763352142, "loss": 2.085, "step": 3054 }, { "epoch": 0.1, "grad_norm": 0.5858275294303894, "learning_rate": 0.0005983022661196747, "loss": 2.1122, "step": 3055 }, { "epoch": 0.1, "grad_norm": 0.5164474844932556, "learning_rate": 0.00059830115554204, "loss": 1.9495, "step": 3056 }, { "epoch": 0.1, "grad_norm": 0.5900667309761047, "learning_rate": 0.0005983000446023119, "loss": 2.0802, "step": 3057 }, { "epoch": 0.1, "grad_norm": 0.6260945796966553, "learning_rate": 0.0005982989333004914, "loss": 2.0121, "step": 3058 }, { "epoch": 0.1, "grad_norm": 0.5632432699203491, "learning_rate": 0.00059829782163658, "loss": 2.0691, "step": 3059 }, { "epoch": 0.1, "grad_norm": 0.5320738554000854, "learning_rate": 0.0005982967096105789, "loss": 2.0106, "step": 3060 }, { "epoch": 0.1, "grad_norm": 0.7068876624107361, "learning_rate": 0.0005982955972224896, "loss": 2.0071, "step": 3061 }, { "epoch": 0.1, "grad_norm": 0.5688079595565796, "learning_rate": 0.0005982944844723133, "loss": 2.0555, "step": 3062 }, { "epoch": 0.1, "grad_norm": 0.5538329482078552, "learning_rate": 0.0005982933713600516, "loss": 2.0481, "step": 3063 }, { "epoch": 0.1, "grad_norm": 0.6110315322875977, "learning_rate": 0.0005982922578857056, "loss": 2.0157, "step": 3064 }, { "epoch": 0.1, "grad_norm": 0.6098164319992065, "learning_rate": 0.0005982911440492768, "loss": 1.9961, "step": 3065 }, { "epoch": 0.1, "grad_norm": 0.5063714385032654, "learning_rate": 0.0005982900298507664, "loss": 1.9304, "step": 3066 }, { "epoch": 0.1, "grad_norm": 0.6167044043540955, "learning_rate": 0.000598288915290176, "loss": 2.0547, "step": 3067 }, { "epoch": 0.1, "grad_norm": 0.5560884475708008, "learning_rate": 0.0005982878003675067, "loss": 2.0651, "step": 3068 }, { "epoch": 0.1, "grad_norm": 0.548321008682251, "learning_rate": 0.00059828668508276, "loss": 2.0382, "step": 3069 }, { "epoch": 0.1, "grad_norm": 0.5683216452598572, "learning_rate": 0.0005982855694359373, "loss": 2.0074, "step": 3070 }, { "epoch": 0.1, "grad_norm": 0.6171270608901978, "learning_rate": 0.0005982844534270398, "loss": 2.0985, "step": 3071 }, { "epoch": 0.1, "grad_norm": 0.5454332232475281, "learning_rate": 0.0005982833370560688, "loss": 2.0121, "step": 3072 }, { "epoch": 0.1, "grad_norm": 0.5436699986457825, "learning_rate": 0.000598282220323026, "loss": 2.0956, "step": 3073 }, { "epoch": 0.1, "grad_norm": 0.6022368669509888, "learning_rate": 0.0005982811032279123, "loss": 2.0759, "step": 3074 }, { "epoch": 0.1, "grad_norm": 0.5507840514183044, "learning_rate": 0.0005982799857707295, "loss": 1.9942, "step": 3075 }, { "epoch": 0.1, "grad_norm": 0.5402359366416931, "learning_rate": 0.0005982788679514786, "loss": 2.0379, "step": 3076 }, { "epoch": 0.1, "grad_norm": 0.599155604839325, "learning_rate": 0.0005982777497701612, "loss": 2.0422, "step": 3077 }, { "epoch": 0.1, "grad_norm": 0.5666580200195312, "learning_rate": 0.0005982766312267785, "loss": 2.029, "step": 3078 }, { "epoch": 0.1, "grad_norm": 0.5688546299934387, "learning_rate": 0.000598275512321332, "loss": 2.0615, "step": 3079 }, { "epoch": 0.1, "grad_norm": 0.5432848930358887, "learning_rate": 0.0005982743930538229, "loss": 2.0276, "step": 3080 }, { "epoch": 0.1, "grad_norm": 0.5436055660247803, "learning_rate": 0.0005982732734242527, "loss": 2.0832, "step": 3081 }, { "epoch": 0.1, "grad_norm": 0.5245271921157837, "learning_rate": 0.0005982721534326227, "loss": 2.1202, "step": 3082 }, { "epoch": 0.1, "grad_norm": 0.5513073801994324, "learning_rate": 0.0005982710330789342, "loss": 2.0109, "step": 3083 }, { "epoch": 0.1, "grad_norm": 0.5315473079681396, "learning_rate": 0.0005982699123631885, "loss": 2.0736, "step": 3084 }, { "epoch": 0.1, "grad_norm": 0.5689852237701416, "learning_rate": 0.0005982687912853873, "loss": 2.1286, "step": 3085 }, { "epoch": 0.1, "grad_norm": 0.518624484539032, "learning_rate": 0.0005982676698455317, "loss": 2.0715, "step": 3086 }, { "epoch": 0.1, "grad_norm": 0.5452926754951477, "learning_rate": 0.0005982665480436229, "loss": 2.0306, "step": 3087 }, { "epoch": 0.1, "grad_norm": 0.5594205260276794, "learning_rate": 0.0005982654258796627, "loss": 2.0108, "step": 3088 }, { "epoch": 0.1, "grad_norm": 0.5322973728179932, "learning_rate": 0.000598264303353652, "loss": 2.0855, "step": 3089 }, { "epoch": 0.1, "grad_norm": 0.5620565414428711, "learning_rate": 0.0005982631804655925, "loss": 2.0312, "step": 3090 }, { "epoch": 0.1, "grad_norm": 0.5525063276290894, "learning_rate": 0.0005982620572154854, "loss": 1.9711, "step": 3091 }, { "epoch": 0.1, "grad_norm": 0.5425020456314087, "learning_rate": 0.0005982609336033322, "loss": 2.109, "step": 3092 }, { "epoch": 0.1, "grad_norm": 0.5489221811294556, "learning_rate": 0.0005982598096291341, "loss": 2.1232, "step": 3093 }, { "epoch": 0.1, "grad_norm": 0.5389361381530762, "learning_rate": 0.0005982586852928926, "loss": 2.0273, "step": 3094 }, { "epoch": 0.1, "grad_norm": 0.5300095677375793, "learning_rate": 0.000598257560594609, "loss": 1.9721, "step": 3095 }, { "epoch": 0.1, "grad_norm": 0.5677724480628967, "learning_rate": 0.0005982564355342846, "loss": 2.0831, "step": 3096 }, { "epoch": 0.1, "grad_norm": 0.5443922281265259, "learning_rate": 0.0005982553101119207, "loss": 2.0461, "step": 3097 }, { "epoch": 0.1, "grad_norm": 0.5225638151168823, "learning_rate": 0.0005982541843275191, "loss": 2.0295, "step": 3098 }, { "epoch": 0.1, "grad_norm": 0.5199664235115051, "learning_rate": 0.0005982530581810807, "loss": 2.0953, "step": 3099 }, { "epoch": 0.1, "grad_norm": 0.553568959236145, "learning_rate": 0.000598251931672607, "loss": 2.0315, "step": 3100 }, { "epoch": 0.1, "grad_norm": 0.5798031687736511, "learning_rate": 0.0005982508048020995, "loss": 2.0931, "step": 3101 }, { "epoch": 0.1, "grad_norm": 0.519590437412262, "learning_rate": 0.0005982496775695595, "loss": 2.018, "step": 3102 }, { "epoch": 0.1, "grad_norm": 0.5665122866630554, "learning_rate": 0.0005982485499749882, "loss": 2.1485, "step": 3103 }, { "epoch": 0.1, "grad_norm": 0.5381284952163696, "learning_rate": 0.0005982474220183871, "loss": 2.0684, "step": 3104 }, { "epoch": 0.1, "grad_norm": 0.5707288384437561, "learning_rate": 0.0005982462936997577, "loss": 2.0928, "step": 3105 }, { "epoch": 0.1, "grad_norm": 0.5396686792373657, "learning_rate": 0.0005982451650191012, "loss": 2.0126, "step": 3106 }, { "epoch": 0.1, "grad_norm": 0.530654788017273, "learning_rate": 0.000598244035976419, "loss": 2.0291, "step": 3107 }, { "epoch": 0.1, "grad_norm": 0.5457500219345093, "learning_rate": 0.0005982429065717125, "loss": 2.0807, "step": 3108 }, { "epoch": 0.1, "grad_norm": 0.5419985055923462, "learning_rate": 0.000598241776804983, "loss": 2.0939, "step": 3109 }, { "epoch": 0.1, "grad_norm": 0.5520175695419312, "learning_rate": 0.000598240646676232, "loss": 2.0759, "step": 3110 }, { "epoch": 0.1, "grad_norm": 0.5234431028366089, "learning_rate": 0.0005982395161854607, "loss": 2.0665, "step": 3111 }, { "epoch": 0.1, "grad_norm": 0.5501080751419067, "learning_rate": 0.0005982383853326707, "loss": 2.1862, "step": 3112 }, { "epoch": 0.1, "grad_norm": 0.523041307926178, "learning_rate": 0.0005982372541178631, "loss": 2.057, "step": 3113 }, { "epoch": 0.1, "grad_norm": 0.5348321795463562, "learning_rate": 0.0005982361225410395, "loss": 1.995, "step": 3114 }, { "epoch": 0.1, "grad_norm": 0.5346137285232544, "learning_rate": 0.0005982349906022012, "loss": 2.026, "step": 3115 }, { "epoch": 0.1, "grad_norm": 0.5551072955131531, "learning_rate": 0.0005982338583013496, "loss": 2.0256, "step": 3116 }, { "epoch": 0.1, "grad_norm": 0.5263389945030212, "learning_rate": 0.0005982327256384859, "loss": 1.9799, "step": 3117 }, { "epoch": 0.1, "grad_norm": 0.5470555424690247, "learning_rate": 0.0005982315926136117, "loss": 2.003, "step": 3118 }, { "epoch": 0.1, "grad_norm": 0.5282202363014221, "learning_rate": 0.0005982304592267283, "loss": 2.033, "step": 3119 }, { "epoch": 0.1, "grad_norm": 0.5538586378097534, "learning_rate": 0.0005982293254778371, "loss": 1.9945, "step": 3120 }, { "epoch": 0.1, "grad_norm": 0.5516627430915833, "learning_rate": 0.0005982281913669395, "loss": 1.9695, "step": 3121 }, { "epoch": 0.1, "grad_norm": 0.5347519516944885, "learning_rate": 0.0005982270568940366, "loss": 2.0575, "step": 3122 }, { "epoch": 0.1, "grad_norm": 0.5615822672843933, "learning_rate": 0.0005982259220591302, "loss": 2.0831, "step": 3123 }, { "epoch": 0.1, "grad_norm": 0.5373365879058838, "learning_rate": 0.0005982247868622213, "loss": 2.1012, "step": 3124 }, { "epoch": 0.1, "grad_norm": 0.5230323076248169, "learning_rate": 0.0005982236513033116, "loss": 2.0106, "step": 3125 }, { "epoch": 0.1, "grad_norm": 0.5414475798606873, "learning_rate": 0.0005982225153824023, "loss": 2.0962, "step": 3126 }, { "epoch": 0.1, "grad_norm": 0.5620279312133789, "learning_rate": 0.0005982213790994947, "loss": 1.9858, "step": 3127 }, { "epoch": 0.1, "grad_norm": 0.5492680668830872, "learning_rate": 0.0005982202424545903, "loss": 2.06, "step": 3128 }, { "epoch": 0.1, "grad_norm": 0.5315338969230652, "learning_rate": 0.0005982191054476906, "loss": 1.924, "step": 3129 }, { "epoch": 0.1, "grad_norm": 0.5282145738601685, "learning_rate": 0.0005982179680787968, "loss": 2.0337, "step": 3130 }, { "epoch": 0.1, "grad_norm": 0.5366833209991455, "learning_rate": 0.0005982168303479102, "loss": 2.0749, "step": 3131 }, { "epoch": 0.1, "grad_norm": 0.5351312160491943, "learning_rate": 0.0005982156922550324, "loss": 2.0223, "step": 3132 }, { "epoch": 0.1, "grad_norm": 0.5732700824737549, "learning_rate": 0.0005982145538001647, "loss": 2.0273, "step": 3133 }, { "epoch": 0.1, "grad_norm": 0.5257164239883423, "learning_rate": 0.0005982134149833085, "loss": 1.9756, "step": 3134 }, { "epoch": 0.1, "grad_norm": 0.5596101880073547, "learning_rate": 0.0005982122758044651, "loss": 1.9435, "step": 3135 }, { "epoch": 0.1, "grad_norm": 0.5478620529174805, "learning_rate": 0.000598211136263636, "loss": 2.0558, "step": 3136 }, { "epoch": 0.1, "grad_norm": 0.5532772541046143, "learning_rate": 0.0005982099963608223, "loss": 2.0775, "step": 3137 }, { "epoch": 0.1, "grad_norm": 0.5745717287063599, "learning_rate": 0.0005982088560960259, "loss": 2.0951, "step": 3138 }, { "epoch": 0.1, "grad_norm": 0.6070360541343689, "learning_rate": 0.0005982077154692478, "loss": 2.1134, "step": 3139 }, { "epoch": 0.1, "grad_norm": 0.5548394918441772, "learning_rate": 0.0005982065744804893, "loss": 2.1039, "step": 3140 }, { "epoch": 0.1, "grad_norm": 0.5466063618659973, "learning_rate": 0.0005982054331297522, "loss": 2.0997, "step": 3141 }, { "epoch": 0.1, "grad_norm": 0.5667439699172974, "learning_rate": 0.0005982042914170375, "loss": 2.0996, "step": 3142 }, { "epoch": 0.1, "grad_norm": 0.5267316699028015, "learning_rate": 0.0005982031493423468, "loss": 2.0189, "step": 3143 }, { "epoch": 0.1, "grad_norm": 0.5343047976493835, "learning_rate": 0.0005982020069056814, "loss": 1.979, "step": 3144 }, { "epoch": 0.1, "grad_norm": 0.5773693919181824, "learning_rate": 0.0005982008641070426, "loss": 2.0921, "step": 3145 }, { "epoch": 0.1, "grad_norm": 0.5517639517784119, "learning_rate": 0.000598199720946432, "loss": 2.0334, "step": 3146 }, { "epoch": 0.1, "grad_norm": 0.5593884587287903, "learning_rate": 0.0005981985774238509, "loss": 2.0053, "step": 3147 }, { "epoch": 0.1, "grad_norm": 0.5415950417518616, "learning_rate": 0.0005981974335393006, "loss": 2.0498, "step": 3148 }, { "epoch": 0.1, "grad_norm": 0.5473461747169495, "learning_rate": 0.0005981962892927825, "loss": 2.099, "step": 3149 }, { "epoch": 0.1, "grad_norm": 0.6119303107261658, "learning_rate": 0.0005981951446842981, "loss": 2.1145, "step": 3150 }, { "epoch": 0.1, "grad_norm": 0.523587703704834, "learning_rate": 0.0005981939997138487, "loss": 2.0344, "step": 3151 }, { "epoch": 0.1, "grad_norm": 0.5521634221076965, "learning_rate": 0.0005981928543814358, "loss": 2.0871, "step": 3152 }, { "epoch": 0.1, "grad_norm": 0.5431866645812988, "learning_rate": 0.0005981917086870607, "loss": 2.0577, "step": 3153 }, { "epoch": 0.1, "grad_norm": 0.5408310294151306, "learning_rate": 0.0005981905626307247, "loss": 2.0231, "step": 3154 }, { "epoch": 0.1, "grad_norm": 0.5445345044136047, "learning_rate": 0.0005981894162124294, "loss": 1.9999, "step": 3155 }, { "epoch": 0.11, "grad_norm": 0.5237771272659302, "learning_rate": 0.0005981882694321762, "loss": 2.1362, "step": 3156 }, { "epoch": 0.11, "grad_norm": 0.5349676609039307, "learning_rate": 0.000598187122289966, "loss": 2.044, "step": 3157 }, { "epoch": 0.11, "grad_norm": 0.5345962047576904, "learning_rate": 0.0005981859747858009, "loss": 1.9759, "step": 3158 }, { "epoch": 0.11, "grad_norm": 0.5428293943405151, "learning_rate": 0.0005981848269196818, "loss": 2.0518, "step": 3159 }, { "epoch": 0.11, "grad_norm": 0.5469644069671631, "learning_rate": 0.0005981836786916104, "loss": 1.9928, "step": 3160 }, { "epoch": 0.11, "grad_norm": 0.5562105178833008, "learning_rate": 0.0005981825301015879, "loss": 1.9907, "step": 3161 }, { "epoch": 0.11, "grad_norm": 0.5448972582817078, "learning_rate": 0.0005981813811496156, "loss": 1.9734, "step": 3162 }, { "epoch": 0.11, "grad_norm": 0.599386990070343, "learning_rate": 0.0005981802318356952, "loss": 2.0485, "step": 3163 }, { "epoch": 0.11, "grad_norm": 0.551118791103363, "learning_rate": 0.0005981790821598279, "loss": 1.9387, "step": 3164 }, { "epoch": 0.11, "grad_norm": 0.5305448174476624, "learning_rate": 0.0005981779321220151, "loss": 2.0272, "step": 3165 }, { "epoch": 0.11, "grad_norm": 0.577312171459198, "learning_rate": 0.0005981767817222583, "loss": 2.0797, "step": 3166 }, { "epoch": 0.11, "grad_norm": 0.5592361092567444, "learning_rate": 0.0005981756309605587, "loss": 2.1293, "step": 3167 }, { "epoch": 0.11, "grad_norm": 0.5243262052536011, "learning_rate": 0.000598174479836918, "loss": 2.0522, "step": 3168 }, { "epoch": 0.11, "grad_norm": 0.5789826512336731, "learning_rate": 0.0005981733283513371, "loss": 2.1, "step": 3169 }, { "epoch": 0.11, "grad_norm": 0.579714834690094, "learning_rate": 0.000598172176503818, "loss": 2.1372, "step": 3170 }, { "epoch": 0.11, "grad_norm": 0.5233738422393799, "learning_rate": 0.0005981710242943617, "loss": 2.0269, "step": 3171 }, { "epoch": 0.11, "grad_norm": 0.57260662317276, "learning_rate": 0.0005981698717229697, "loss": 2.011, "step": 3172 }, { "epoch": 0.11, "grad_norm": 0.5638293623924255, "learning_rate": 0.0005981687187896434, "loss": 1.9933, "step": 3173 }, { "epoch": 0.11, "grad_norm": 0.519216775894165, "learning_rate": 0.0005981675654943842, "loss": 2.0405, "step": 3174 }, { "epoch": 0.11, "grad_norm": 0.5729869604110718, "learning_rate": 0.0005981664118371936, "loss": 2.048, "step": 3175 }, { "epoch": 0.11, "grad_norm": 0.5794818997383118, "learning_rate": 0.0005981652578180729, "loss": 2.0522, "step": 3176 }, { "epoch": 0.11, "grad_norm": 0.5474729537963867, "learning_rate": 0.0005981641034370234, "loss": 1.9824, "step": 3177 }, { "epoch": 0.11, "grad_norm": 0.5360034108161926, "learning_rate": 0.0005981629486940467, "loss": 2.0466, "step": 3178 }, { "epoch": 0.11, "grad_norm": 0.5691466927528381, "learning_rate": 0.0005981617935891441, "loss": 2.0711, "step": 3179 }, { "epoch": 0.11, "grad_norm": 0.5511273145675659, "learning_rate": 0.000598160638122317, "loss": 1.9972, "step": 3180 }, { "epoch": 0.11, "grad_norm": 0.5466075539588928, "learning_rate": 0.0005981594822935669, "loss": 2.0086, "step": 3181 }, { "epoch": 0.11, "grad_norm": 0.5439802408218384, "learning_rate": 0.0005981583261028949, "loss": 2.0835, "step": 3182 }, { "epoch": 0.11, "grad_norm": 0.5466304421424866, "learning_rate": 0.0005981571695503028, "loss": 2.0125, "step": 3183 }, { "epoch": 0.11, "grad_norm": 0.5472601056098938, "learning_rate": 0.0005981560126357919, "loss": 2.0653, "step": 3184 }, { "epoch": 0.11, "grad_norm": 0.5384272336959839, "learning_rate": 0.0005981548553593634, "loss": 2.0496, "step": 3185 }, { "epoch": 0.11, "grad_norm": 0.5524927377700806, "learning_rate": 0.0005981536977210188, "loss": 1.963, "step": 3186 }, { "epoch": 0.11, "grad_norm": 0.5440811514854431, "learning_rate": 0.0005981525397207596, "loss": 2.128, "step": 3187 }, { "epoch": 0.11, "grad_norm": 0.5464842319488525, "learning_rate": 0.0005981513813585872, "loss": 2.0243, "step": 3188 }, { "epoch": 0.11, "grad_norm": 0.5509679317474365, "learning_rate": 0.0005981502226345029, "loss": 1.9518, "step": 3189 }, { "epoch": 0.11, "grad_norm": 0.5620108246803284, "learning_rate": 0.0005981490635485081, "loss": 2.0822, "step": 3190 }, { "epoch": 0.11, "grad_norm": 0.5375082492828369, "learning_rate": 0.0005981479041006044, "loss": 2.0778, "step": 3191 }, { "epoch": 0.11, "grad_norm": 0.555866539478302, "learning_rate": 0.000598146744290793, "loss": 2.0548, "step": 3192 }, { "epoch": 0.11, "grad_norm": 0.5138696432113647, "learning_rate": 0.0005981455841190754, "loss": 1.9963, "step": 3193 }, { "epoch": 0.11, "grad_norm": 0.5482088923454285, "learning_rate": 0.000598144423585453, "loss": 2.0541, "step": 3194 }, { "epoch": 0.11, "grad_norm": 0.5376637578010559, "learning_rate": 0.0005981432626899271, "loss": 2.0236, "step": 3195 }, { "epoch": 0.11, "grad_norm": 0.5488427877426147, "learning_rate": 0.0005981421014324994, "loss": 2.0223, "step": 3196 }, { "epoch": 0.11, "grad_norm": 0.5436266660690308, "learning_rate": 0.0005981409398131709, "loss": 2.0658, "step": 3197 }, { "epoch": 0.11, "grad_norm": 0.5402211546897888, "learning_rate": 0.0005981397778319434, "loss": 2.0315, "step": 3198 }, { "epoch": 0.11, "grad_norm": 0.5455558896064758, "learning_rate": 0.000598138615488818, "loss": 2.0664, "step": 3199 }, { "epoch": 0.11, "grad_norm": 0.5419751405715942, "learning_rate": 0.0005981374527837963, "loss": 2.0093, "step": 3200 }, { "epoch": 0.11, "grad_norm": 0.5550782680511475, "learning_rate": 0.0005981362897168797, "loss": 2.0817, "step": 3201 }, { "epoch": 0.11, "grad_norm": 0.529997706413269, "learning_rate": 0.0005981351262880696, "loss": 2.0633, "step": 3202 }, { "epoch": 0.11, "grad_norm": 0.5278403759002686, "learning_rate": 0.0005981339624973673, "loss": 2.0633, "step": 3203 }, { "epoch": 0.11, "grad_norm": 0.5260804295539856, "learning_rate": 0.0005981327983447744, "loss": 2.032, "step": 3204 }, { "epoch": 0.11, "grad_norm": 0.5265942215919495, "learning_rate": 0.0005981316338302921, "loss": 2.1413, "step": 3205 }, { "epoch": 0.11, "grad_norm": 0.5489891767501831, "learning_rate": 0.000598130468953922, "loss": 2.0461, "step": 3206 }, { "epoch": 0.11, "grad_norm": 0.5222336649894714, "learning_rate": 0.0005981293037156653, "loss": 1.982, "step": 3207 }, { "epoch": 0.11, "grad_norm": 0.5221561789512634, "learning_rate": 0.0005981281381155238, "loss": 2.0579, "step": 3208 }, { "epoch": 0.11, "grad_norm": 0.5338969230651855, "learning_rate": 0.0005981269721534985, "loss": 2.0883, "step": 3209 }, { "epoch": 0.11, "grad_norm": 0.5451109409332275, "learning_rate": 0.000598125805829591, "loss": 2.0418, "step": 3210 }, { "epoch": 0.11, "grad_norm": 0.5219192504882812, "learning_rate": 0.0005981246391438028, "loss": 2.012, "step": 3211 }, { "epoch": 0.11, "grad_norm": 0.5425879955291748, "learning_rate": 0.0005981234720961351, "loss": 2.031, "step": 3212 }, { "epoch": 0.11, "grad_norm": 0.5375082492828369, "learning_rate": 0.0005981223046865894, "loss": 1.9819, "step": 3213 }, { "epoch": 0.11, "grad_norm": 0.5130651593208313, "learning_rate": 0.0005981211369151672, "loss": 2.027, "step": 3214 }, { "epoch": 0.11, "grad_norm": 0.5180054306983948, "learning_rate": 0.0005981199687818699, "loss": 2.0479, "step": 3215 }, { "epoch": 0.11, "grad_norm": 0.5222300291061401, "learning_rate": 0.0005981188002866988, "loss": 2.1103, "step": 3216 }, { "epoch": 0.11, "grad_norm": 0.507105827331543, "learning_rate": 0.0005981176314296555, "loss": 1.9647, "step": 3217 }, { "epoch": 0.11, "grad_norm": 0.5893276929855347, "learning_rate": 0.0005981164622107412, "loss": 2.0428, "step": 3218 }, { "epoch": 0.11, "grad_norm": 0.5195689797401428, "learning_rate": 0.0005981152926299576, "loss": 2.0765, "step": 3219 }, { "epoch": 0.11, "grad_norm": 0.5280464887619019, "learning_rate": 0.0005981141226873059, "loss": 2.1465, "step": 3220 }, { "epoch": 0.11, "grad_norm": 0.5325250029563904, "learning_rate": 0.0005981129523827876, "loss": 2.1325, "step": 3221 }, { "epoch": 0.11, "grad_norm": 0.5500277876853943, "learning_rate": 0.000598111781716404, "loss": 2.095, "step": 3222 }, { "epoch": 0.11, "grad_norm": 0.5375801920890808, "learning_rate": 0.0005981106106881566, "loss": 1.9792, "step": 3223 }, { "epoch": 0.11, "grad_norm": 0.5249388813972473, "learning_rate": 0.0005981094392980469, "loss": 1.9844, "step": 3224 }, { "epoch": 0.11, "grad_norm": 0.5293002128601074, "learning_rate": 0.0005981082675460764, "loss": 1.973, "step": 3225 }, { "epoch": 0.11, "grad_norm": 0.5346702337265015, "learning_rate": 0.0005981070954322463, "loss": 2.0677, "step": 3226 }, { "epoch": 0.11, "grad_norm": 0.5435163974761963, "learning_rate": 0.000598105922956558, "loss": 1.998, "step": 3227 }, { "epoch": 0.11, "grad_norm": 0.5666345357894897, "learning_rate": 0.0005981047501190131, "loss": 1.9911, "step": 3228 }, { "epoch": 0.11, "grad_norm": 0.5298603177070618, "learning_rate": 0.0005981035769196131, "loss": 2.0338, "step": 3229 }, { "epoch": 0.11, "grad_norm": 0.570963442325592, "learning_rate": 0.0005981024033583591, "loss": 2.0716, "step": 3230 }, { "epoch": 0.11, "grad_norm": 0.5464587807655334, "learning_rate": 0.0005981012294352528, "loss": 2.0421, "step": 3231 }, { "epoch": 0.11, "grad_norm": 0.5239063501358032, "learning_rate": 0.0005981000551502954, "loss": 2.0317, "step": 3232 }, { "epoch": 0.11, "grad_norm": 0.566295325756073, "learning_rate": 0.0005980988805034886, "loss": 2.097, "step": 3233 }, { "epoch": 0.11, "grad_norm": 0.5509099364280701, "learning_rate": 0.0005980977054948336, "loss": 2.0365, "step": 3234 }, { "epoch": 0.11, "grad_norm": 0.5404874682426453, "learning_rate": 0.000598096530124332, "loss": 2.0165, "step": 3235 }, { "epoch": 0.11, "grad_norm": 0.5390348434448242, "learning_rate": 0.000598095354391985, "loss": 2.0485, "step": 3236 }, { "epoch": 0.11, "grad_norm": 0.5621742010116577, "learning_rate": 0.0005980941782977942, "loss": 2.1138, "step": 3237 }, { "epoch": 0.11, "grad_norm": 0.5577664375305176, "learning_rate": 0.0005980930018417611, "loss": 2.0523, "step": 3238 }, { "epoch": 0.11, "grad_norm": 0.543839693069458, "learning_rate": 0.000598091825023887, "loss": 2.0526, "step": 3239 }, { "epoch": 0.11, "grad_norm": 0.5072115063667297, "learning_rate": 0.0005980906478441733, "loss": 2.0229, "step": 3240 }, { "epoch": 0.11, "grad_norm": 0.5244655609130859, "learning_rate": 0.0005980894703026215, "loss": 2.0519, "step": 3241 }, { "epoch": 0.11, "grad_norm": 0.5492803454399109, "learning_rate": 0.000598088292399233, "loss": 2.0545, "step": 3242 }, { "epoch": 0.11, "grad_norm": 0.5463140606880188, "learning_rate": 0.0005980871141340091, "loss": 2.0756, "step": 3243 }, { "epoch": 0.11, "grad_norm": 0.5116478204727173, "learning_rate": 0.0005980859355069515, "loss": 1.9889, "step": 3244 }, { "epoch": 0.11, "grad_norm": 0.5348167419433594, "learning_rate": 0.0005980847565180614, "loss": 1.9818, "step": 3245 }, { "epoch": 0.11, "grad_norm": 0.5446670651435852, "learning_rate": 0.0005980835771673404, "loss": 2.0494, "step": 3246 }, { "epoch": 0.11, "grad_norm": 0.5616305470466614, "learning_rate": 0.0005980823974547899, "loss": 2.0227, "step": 3247 }, { "epoch": 0.11, "grad_norm": 0.5347013473510742, "learning_rate": 0.0005980812173804112, "loss": 2.1616, "step": 3248 }, { "epoch": 0.11, "grad_norm": 0.5337652564048767, "learning_rate": 0.0005980800369442059, "loss": 2.0599, "step": 3249 }, { "epoch": 0.11, "grad_norm": 0.5923892259597778, "learning_rate": 0.0005980788561461753, "loss": 2.0312, "step": 3250 }, { "epoch": 0.11, "grad_norm": 0.5537422895431519, "learning_rate": 0.0005980776749863209, "loss": 2.0278, "step": 3251 }, { "epoch": 0.11, "grad_norm": 0.5546167492866516, "learning_rate": 0.000598076493464644, "loss": 2.0806, "step": 3252 }, { "epoch": 0.11, "grad_norm": 0.556625485420227, "learning_rate": 0.0005980753115811464, "loss": 2.0522, "step": 3253 }, { "epoch": 0.11, "grad_norm": 0.5812026262283325, "learning_rate": 0.0005980741293358292, "loss": 2.045, "step": 3254 }, { "epoch": 0.11, "grad_norm": 0.5312222242355347, "learning_rate": 0.0005980729467286939, "loss": 1.9969, "step": 3255 }, { "epoch": 0.11, "grad_norm": 0.5755463242530823, "learning_rate": 0.0005980717637597419, "loss": 2.0546, "step": 3256 }, { "epoch": 0.11, "grad_norm": 0.5216372609138489, "learning_rate": 0.0005980705804289749, "loss": 2.0897, "step": 3257 }, { "epoch": 0.11, "grad_norm": 0.5474995374679565, "learning_rate": 0.0005980693967363939, "loss": 2.068, "step": 3258 }, { "epoch": 0.11, "grad_norm": 0.548043429851532, "learning_rate": 0.0005980682126820006, "loss": 2.0932, "step": 3259 }, { "epoch": 0.11, "grad_norm": 0.528778612613678, "learning_rate": 0.0005980670282657966, "loss": 2.0399, "step": 3260 }, { "epoch": 0.11, "grad_norm": 0.5509863495826721, "learning_rate": 0.0005980658434877829, "loss": 2.1329, "step": 3261 }, { "epoch": 0.11, "grad_norm": 0.5344165563583374, "learning_rate": 0.0005980646583479614, "loss": 2.0121, "step": 3262 }, { "epoch": 0.11, "grad_norm": 0.5359922051429749, "learning_rate": 0.0005980634728463332, "loss": 2.0319, "step": 3263 }, { "epoch": 0.11, "grad_norm": 0.5342604517936707, "learning_rate": 0.0005980622869828999, "loss": 2.0036, "step": 3264 }, { "epoch": 0.11, "grad_norm": 0.5203988552093506, "learning_rate": 0.000598061100757663, "loss": 2.061, "step": 3265 }, { "epoch": 0.11, "grad_norm": 0.5184372663497925, "learning_rate": 0.0005980599141706236, "loss": 2.0184, "step": 3266 }, { "epoch": 0.11, "grad_norm": 0.5875603556632996, "learning_rate": 0.0005980587272217835, "loss": 2.0818, "step": 3267 }, { "epoch": 0.11, "grad_norm": 0.5639689564704895, "learning_rate": 0.000598057539911144, "loss": 2.0285, "step": 3268 }, { "epoch": 0.11, "grad_norm": 0.516823410987854, "learning_rate": 0.0005980563522387066, "loss": 1.9182, "step": 3269 }, { "epoch": 0.11, "grad_norm": 0.5787664651870728, "learning_rate": 0.0005980551642044727, "loss": 2.094, "step": 3270 }, { "epoch": 0.11, "grad_norm": 0.5220449566841125, "learning_rate": 0.0005980539758084438, "loss": 2.0883, "step": 3271 }, { "epoch": 0.11, "grad_norm": 0.5498887300491333, "learning_rate": 0.0005980527870506211, "loss": 1.9649, "step": 3272 }, { "epoch": 0.11, "grad_norm": 0.5981404781341553, "learning_rate": 0.0005980515979310065, "loss": 2.1151, "step": 3273 }, { "epoch": 0.11, "grad_norm": 0.5322548747062683, "learning_rate": 0.000598050408449601, "loss": 1.9785, "step": 3274 }, { "epoch": 0.11, "grad_norm": 0.5342956781387329, "learning_rate": 0.0005980492186064062, "loss": 2.0043, "step": 3275 }, { "epoch": 0.11, "grad_norm": 0.5531709790229797, "learning_rate": 0.0005980480284014237, "loss": 2.0556, "step": 3276 }, { "epoch": 0.11, "grad_norm": 0.5401043891906738, "learning_rate": 0.0005980468378346548, "loss": 2.1111, "step": 3277 }, { "epoch": 0.11, "grad_norm": 0.5451683402061462, "learning_rate": 0.0005980456469061009, "loss": 2.0696, "step": 3278 }, { "epoch": 0.11, "grad_norm": 0.5328221917152405, "learning_rate": 0.0005980444556157635, "loss": 2.0176, "step": 3279 }, { "epoch": 0.11, "grad_norm": 0.5272212028503418, "learning_rate": 0.0005980432639636439, "loss": 2.0118, "step": 3280 }, { "epoch": 0.11, "grad_norm": 0.5264198184013367, "learning_rate": 0.0005980420719497439, "loss": 2.0626, "step": 3281 }, { "epoch": 0.11, "grad_norm": 0.5423667430877686, "learning_rate": 0.0005980408795740647, "loss": 1.9868, "step": 3282 }, { "epoch": 0.11, "grad_norm": 0.5281529426574707, "learning_rate": 0.0005980396868366077, "loss": 2.0893, "step": 3283 }, { "epoch": 0.11, "grad_norm": 0.5485837459564209, "learning_rate": 0.0005980384937373745, "loss": 2.0342, "step": 3284 }, { "epoch": 0.11, "grad_norm": 0.5438937544822693, "learning_rate": 0.0005980373002763665, "loss": 2.0721, "step": 3285 }, { "epoch": 0.11, "grad_norm": 0.5458574295043945, "learning_rate": 0.0005980361064535851, "loss": 2.0034, "step": 3286 }, { "epoch": 0.11, "grad_norm": 0.5411868691444397, "learning_rate": 0.0005980349122690318, "loss": 2.0428, "step": 3287 }, { "epoch": 0.11, "grad_norm": 0.5380954146385193, "learning_rate": 0.0005980337177227081, "loss": 2.0422, "step": 3288 }, { "epoch": 0.11, "grad_norm": 0.548283040523529, "learning_rate": 0.0005980325228146151, "loss": 2.0308, "step": 3289 }, { "epoch": 0.11, "grad_norm": 0.5643056631088257, "learning_rate": 0.0005980313275447548, "loss": 1.9881, "step": 3290 }, { "epoch": 0.11, "grad_norm": 0.5201779007911682, "learning_rate": 0.0005980301319131284, "loss": 2.0183, "step": 3291 }, { "epoch": 0.11, "grad_norm": 0.5395054221153259, "learning_rate": 0.0005980289359197371, "loss": 2.1135, "step": 3292 }, { "epoch": 0.11, "grad_norm": 0.5597591400146484, "learning_rate": 0.0005980277395645828, "loss": 2.0496, "step": 3293 }, { "epoch": 0.11, "grad_norm": 0.5467525124549866, "learning_rate": 0.0005980265428476666, "loss": 2.133, "step": 3294 }, { "epoch": 0.11, "grad_norm": 0.5281027555465698, "learning_rate": 0.0005980253457689901, "loss": 1.9992, "step": 3295 }, { "epoch": 0.11, "grad_norm": 0.5322465300559998, "learning_rate": 0.0005980241483285549, "loss": 2.0427, "step": 3296 }, { "epoch": 0.11, "grad_norm": 0.5567300319671631, "learning_rate": 0.0005980229505263622, "loss": 2.0644, "step": 3297 }, { "epoch": 0.11, "grad_norm": 0.5486049056053162, "learning_rate": 0.0005980217523624135, "loss": 2.0675, "step": 3298 }, { "epoch": 0.11, "grad_norm": 0.5455158948898315, "learning_rate": 0.0005980205538367104, "loss": 1.9867, "step": 3299 }, { "epoch": 0.11, "grad_norm": 0.5415652394294739, "learning_rate": 0.0005980193549492541, "loss": 2.0087, "step": 3300 }, { "epoch": 0.11, "grad_norm": 0.5501754283905029, "learning_rate": 0.0005980181557000464, "loss": 1.9649, "step": 3301 }, { "epoch": 0.11, "grad_norm": 0.5355131030082703, "learning_rate": 0.0005980169560890885, "loss": 1.9524, "step": 3302 }, { "epoch": 0.11, "grad_norm": 0.5589682459831238, "learning_rate": 0.0005980157561163819, "loss": 2.1085, "step": 3303 }, { "epoch": 0.11, "grad_norm": 0.5376133322715759, "learning_rate": 0.0005980145557819281, "loss": 2.1052, "step": 3304 }, { "epoch": 0.11, "grad_norm": 0.5243752002716064, "learning_rate": 0.0005980133550857285, "loss": 2.0104, "step": 3305 }, { "epoch": 0.11, "grad_norm": 0.5625415444374084, "learning_rate": 0.0005980121540277846, "loss": 2.0869, "step": 3306 }, { "epoch": 0.11, "grad_norm": 0.5242630243301392, "learning_rate": 0.0005980109526080978, "loss": 2.0692, "step": 3307 }, { "epoch": 0.11, "grad_norm": 0.5345901250839233, "learning_rate": 0.0005980097508266698, "loss": 2.0286, "step": 3308 }, { "epoch": 0.11, "grad_norm": 0.5567492842674255, "learning_rate": 0.0005980085486835018, "loss": 2.0798, "step": 3309 }, { "epoch": 0.11, "grad_norm": 0.5297060012817383, "learning_rate": 0.0005980073461785952, "loss": 2.0266, "step": 3310 }, { "epoch": 0.11, "grad_norm": 0.5429546236991882, "learning_rate": 0.0005980061433119517, "loss": 2.0231, "step": 3311 }, { "epoch": 0.11, "grad_norm": 0.5207703113555908, "learning_rate": 0.0005980049400835727, "loss": 1.9673, "step": 3312 }, { "epoch": 0.11, "grad_norm": 0.5334242582321167, "learning_rate": 0.0005980037364934596, "loss": 2.0248, "step": 3313 }, { "epoch": 0.11, "grad_norm": 0.6051264405250549, "learning_rate": 0.0005980025325416138, "loss": 2.1109, "step": 3314 }, { "epoch": 0.11, "grad_norm": 0.5511796474456787, "learning_rate": 0.0005980013282280368, "loss": 1.9695, "step": 3315 }, { "epoch": 0.11, "grad_norm": 0.5642991662025452, "learning_rate": 0.0005980001235527302, "loss": 2.0143, "step": 3316 }, { "epoch": 0.11, "grad_norm": 0.6071414947509766, "learning_rate": 0.0005979989185156952, "loss": 2.0403, "step": 3317 }, { "epoch": 0.11, "grad_norm": 0.5724795460700989, "learning_rate": 0.0005979977131169336, "loss": 2.0349, "step": 3318 }, { "epoch": 0.11, "grad_norm": 0.6189790368080139, "learning_rate": 0.0005979965073564466, "loss": 2.0505, "step": 3319 }, { "epoch": 0.11, "grad_norm": 0.5403754711151123, "learning_rate": 0.0005979953012342358, "loss": 2.1124, "step": 3320 }, { "epoch": 0.11, "grad_norm": 0.5186156630516052, "learning_rate": 0.0005979940947503026, "loss": 2.0566, "step": 3321 }, { "epoch": 0.11, "grad_norm": 0.5345191359519958, "learning_rate": 0.0005979928879046484, "loss": 2.0464, "step": 3322 }, { "epoch": 0.11, "grad_norm": 0.555391252040863, "learning_rate": 0.0005979916806972748, "loss": 2.1053, "step": 3323 }, { "epoch": 0.11, "grad_norm": 0.5489069223403931, "learning_rate": 0.0005979904731281833, "loss": 2.012, "step": 3324 }, { "epoch": 0.11, "grad_norm": 0.5279862880706787, "learning_rate": 0.0005979892651973751, "loss": 1.9246, "step": 3325 }, { "epoch": 0.11, "grad_norm": 0.5520200729370117, "learning_rate": 0.000597988056904852, "loss": 1.9675, "step": 3326 }, { "epoch": 0.11, "grad_norm": 0.5452958345413208, "learning_rate": 0.0005979868482506152, "loss": 2.1057, "step": 3327 }, { "epoch": 0.11, "grad_norm": 0.5418918132781982, "learning_rate": 0.0005979856392346662, "loss": 2.0311, "step": 3328 }, { "epoch": 0.11, "grad_norm": 0.5267695188522339, "learning_rate": 0.0005979844298570067, "loss": 1.9992, "step": 3329 }, { "epoch": 0.11, "grad_norm": 0.5187839865684509, "learning_rate": 0.0005979832201176379, "loss": 2.064, "step": 3330 }, { "epoch": 0.11, "grad_norm": 0.5234171152114868, "learning_rate": 0.0005979820100165614, "loss": 2.033, "step": 3331 }, { "epoch": 0.11, "grad_norm": 0.5457786917686462, "learning_rate": 0.0005979807995537787, "loss": 2.0777, "step": 3332 }, { "epoch": 0.11, "grad_norm": 0.5373973250389099, "learning_rate": 0.0005979795887292912, "loss": 2.1014, "step": 3333 }, { "epoch": 0.11, "grad_norm": 0.5234400033950806, "learning_rate": 0.0005979783775431004, "loss": 2.0521, "step": 3334 }, { "epoch": 0.11, "grad_norm": 0.5164280533790588, "learning_rate": 0.0005979771659952077, "loss": 2.0531, "step": 3335 }, { "epoch": 0.11, "grad_norm": 0.5453429222106934, "learning_rate": 0.0005979759540856146, "loss": 2.032, "step": 3336 }, { "epoch": 0.11, "grad_norm": 0.5460479259490967, "learning_rate": 0.0005979747418143227, "loss": 2.0923, "step": 3337 }, { "epoch": 0.11, "grad_norm": 0.5507555603981018, "learning_rate": 0.0005979735291813333, "loss": 2.0407, "step": 3338 }, { "epoch": 0.11, "grad_norm": 0.5490338206291199, "learning_rate": 0.0005979723161866479, "loss": 2.038, "step": 3339 }, { "epoch": 0.11, "grad_norm": 0.5362977385520935, "learning_rate": 0.000597971102830268, "loss": 2.0494, "step": 3340 }, { "epoch": 0.11, "grad_norm": 0.5325959324836731, "learning_rate": 0.0005979698891121952, "loss": 2.1504, "step": 3341 }, { "epoch": 0.11, "grad_norm": 0.570243239402771, "learning_rate": 0.0005979686750324308, "loss": 2.0196, "step": 3342 }, { "epoch": 0.11, "grad_norm": 0.5270243883132935, "learning_rate": 0.0005979674605909763, "loss": 2.0924, "step": 3343 }, { "epoch": 0.11, "grad_norm": 0.5538051724433899, "learning_rate": 0.0005979662457878332, "loss": 2.0815, "step": 3344 }, { "epoch": 0.11, "grad_norm": 0.5406108498573303, "learning_rate": 0.000597965030623003, "loss": 2.0899, "step": 3345 }, { "epoch": 0.11, "grad_norm": 0.5395365357398987, "learning_rate": 0.0005979638150964873, "loss": 1.9474, "step": 3346 }, { "epoch": 0.11, "grad_norm": 0.5344527959823608, "learning_rate": 0.0005979625992082873, "loss": 2.0356, "step": 3347 }, { "epoch": 0.11, "grad_norm": 0.5402331352233887, "learning_rate": 0.0005979613829584046, "loss": 2.0321, "step": 3348 }, { "epoch": 0.11, "grad_norm": 0.5635421276092529, "learning_rate": 0.0005979601663468406, "loss": 1.9948, "step": 3349 }, { "epoch": 0.11, "grad_norm": 0.5423412919044495, "learning_rate": 0.0005979589493735971, "loss": 1.9807, "step": 3350 }, { "epoch": 0.11, "grad_norm": 0.5601910352706909, "learning_rate": 0.0005979577320386752, "loss": 2.0619, "step": 3351 }, { "epoch": 0.11, "grad_norm": 0.5488393902778625, "learning_rate": 0.0005979565143420765, "loss": 2.0106, "step": 3352 }, { "epoch": 0.11, "grad_norm": 0.5203711986541748, "learning_rate": 0.0005979552962838025, "loss": 2.0356, "step": 3353 }, { "epoch": 0.11, "grad_norm": 0.5470808744430542, "learning_rate": 0.0005979540778638547, "loss": 1.9911, "step": 3354 }, { "epoch": 0.11, "grad_norm": 0.5149842500686646, "learning_rate": 0.0005979528590822345, "loss": 1.9664, "step": 3355 }, { "epoch": 0.11, "grad_norm": 0.5689486861228943, "learning_rate": 0.0005979516399389434, "loss": 2.097, "step": 3356 }, { "epoch": 0.11, "grad_norm": 0.53968745470047, "learning_rate": 0.000597950420433983, "loss": 2.061, "step": 3357 }, { "epoch": 0.11, "grad_norm": 0.5712537169456482, "learning_rate": 0.0005979492005673547, "loss": 2.0142, "step": 3358 }, { "epoch": 0.11, "grad_norm": 0.5372639298439026, "learning_rate": 0.0005979479803390599, "loss": 2.0552, "step": 3359 }, { "epoch": 0.11, "grad_norm": 0.5531194806098938, "learning_rate": 0.0005979467597491001, "loss": 2.0444, "step": 3360 }, { "epoch": 0.11, "grad_norm": 0.5339368581771851, "learning_rate": 0.000597945538797477, "loss": 2.0078, "step": 3361 }, { "epoch": 0.11, "grad_norm": 0.55793696641922, "learning_rate": 0.0005979443174841918, "loss": 2.0899, "step": 3362 }, { "epoch": 0.11, "grad_norm": 0.5408821105957031, "learning_rate": 0.0005979430958092461, "loss": 2.0319, "step": 3363 }, { "epoch": 0.11, "grad_norm": 0.5502636432647705, "learning_rate": 0.0005979418737726415, "loss": 2.0416, "step": 3364 }, { "epoch": 0.11, "grad_norm": 0.6389261484146118, "learning_rate": 0.0005979406513743793, "loss": 1.9576, "step": 3365 }, { "epoch": 0.11, "grad_norm": 0.5432513952255249, "learning_rate": 0.000597939428614461, "loss": 1.9015, "step": 3366 }, { "epoch": 0.11, "grad_norm": 0.5360246896743774, "learning_rate": 0.0005979382054928882, "loss": 1.9534, "step": 3367 }, { "epoch": 0.11, "grad_norm": 0.5570465922355652, "learning_rate": 0.0005979369820096623, "loss": 2.0583, "step": 3368 }, { "epoch": 0.11, "grad_norm": 0.5491742491722107, "learning_rate": 0.0005979357581647847, "loss": 2.074, "step": 3369 }, { "epoch": 0.11, "grad_norm": 0.5416978597640991, "learning_rate": 0.0005979345339582571, "loss": 2.0632, "step": 3370 }, { "epoch": 0.11, "grad_norm": 0.547989547252655, "learning_rate": 0.0005979333093900809, "loss": 2.003, "step": 3371 }, { "epoch": 0.11, "grad_norm": 0.5276409387588501, "learning_rate": 0.0005979320844602575, "loss": 2.0497, "step": 3372 }, { "epoch": 0.11, "grad_norm": 0.5429479479789734, "learning_rate": 0.0005979308591687884, "loss": 2.0864, "step": 3373 }, { "epoch": 0.11, "grad_norm": 0.5603103041648865, "learning_rate": 0.0005979296335156752, "loss": 2.1393, "step": 3374 }, { "epoch": 0.11, "grad_norm": 0.5300033688545227, "learning_rate": 0.0005979284075009193, "loss": 2.0734, "step": 3375 }, { "epoch": 0.11, "grad_norm": 0.5498872995376587, "learning_rate": 0.0005979271811245222, "loss": 1.9688, "step": 3376 }, { "epoch": 0.11, "grad_norm": 0.5395509600639343, "learning_rate": 0.0005979259543864853, "loss": 2.1, "step": 3377 }, { "epoch": 0.11, "grad_norm": 0.52984619140625, "learning_rate": 0.0005979247272868104, "loss": 2.0203, "step": 3378 }, { "epoch": 0.11, "grad_norm": 0.5614718198776245, "learning_rate": 0.0005979234998254987, "loss": 2.0168, "step": 3379 }, { "epoch": 0.11, "grad_norm": 0.535969078540802, "learning_rate": 0.0005979222720025517, "loss": 1.9981, "step": 3380 }, { "epoch": 0.11, "grad_norm": 0.5354865193367004, "learning_rate": 0.0005979210438179711, "loss": 2.076, "step": 3381 }, { "epoch": 0.11, "grad_norm": 0.5304152369499207, "learning_rate": 0.0005979198152717581, "loss": 2.046, "step": 3382 }, { "epoch": 0.11, "grad_norm": 0.5169440507888794, "learning_rate": 0.0005979185863639145, "loss": 2.0431, "step": 3383 }, { "epoch": 0.11, "grad_norm": 0.5288981199264526, "learning_rate": 0.0005979173570944415, "loss": 2.0754, "step": 3384 }, { "epoch": 0.11, "grad_norm": 0.5214051604270935, "learning_rate": 0.0005979161274633408, "loss": 1.9827, "step": 3385 }, { "epoch": 0.11, "grad_norm": 0.5218188762664795, "learning_rate": 0.0005979148974706137, "loss": 2.0533, "step": 3386 }, { "epoch": 0.11, "grad_norm": 0.5348088145256042, "learning_rate": 0.0005979136671162619, "loss": 1.9831, "step": 3387 }, { "epoch": 0.11, "grad_norm": 0.522563636302948, "learning_rate": 0.0005979124364002868, "loss": 1.9927, "step": 3388 }, { "epoch": 0.11, "grad_norm": 0.514119029045105, "learning_rate": 0.0005979112053226899, "loss": 2.0904, "step": 3389 }, { "epoch": 0.11, "grad_norm": 0.5804879665374756, "learning_rate": 0.0005979099738834727, "loss": 2.0072, "step": 3390 }, { "epoch": 0.11, "grad_norm": 0.5040454864501953, "learning_rate": 0.0005979087420826366, "loss": 1.9878, "step": 3391 }, { "epoch": 0.11, "grad_norm": 0.5739375352859497, "learning_rate": 0.0005979075099201834, "loss": 2.129, "step": 3392 }, { "epoch": 0.11, "grad_norm": 0.5359160304069519, "learning_rate": 0.0005979062773961142, "loss": 2.0055, "step": 3393 }, { "epoch": 0.11, "grad_norm": 0.5307302474975586, "learning_rate": 0.0005979050445104306, "loss": 2.0487, "step": 3394 }, { "epoch": 0.11, "grad_norm": 0.5702874064445496, "learning_rate": 0.0005979038112631344, "loss": 1.9864, "step": 3395 }, { "epoch": 0.11, "grad_norm": 0.5410826802253723, "learning_rate": 0.0005979025776542266, "loss": 2.0248, "step": 3396 }, { "epoch": 0.11, "grad_norm": 0.5442591905593872, "learning_rate": 0.0005979013436837092, "loss": 2.0238, "step": 3397 }, { "epoch": 0.11, "grad_norm": 0.5443907976150513, "learning_rate": 0.0005979001093515833, "loss": 2.0741, "step": 3398 }, { "epoch": 0.11, "grad_norm": 0.5902267694473267, "learning_rate": 0.0005978988746578507, "loss": 2.1195, "step": 3399 }, { "epoch": 0.11, "grad_norm": 0.5593705177307129, "learning_rate": 0.0005978976396025127, "loss": 2.0226, "step": 3400 }, { "epoch": 0.11, "grad_norm": 0.5197296142578125, "learning_rate": 0.0005978964041855708, "loss": 1.9479, "step": 3401 }, { "epoch": 0.11, "grad_norm": 0.5792983770370483, "learning_rate": 0.0005978951684070266, "loss": 2.0822, "step": 3402 }, { "epoch": 0.11, "grad_norm": 0.5592221617698669, "learning_rate": 0.0005978939322668816, "loss": 2.039, "step": 3403 }, { "epoch": 0.11, "grad_norm": 0.5376015305519104, "learning_rate": 0.0005978926957651373, "loss": 2.0581, "step": 3404 }, { "epoch": 0.11, "grad_norm": 0.5426849722862244, "learning_rate": 0.000597891458901795, "loss": 2.0421, "step": 3405 }, { "epoch": 0.11, "grad_norm": 0.5731310248374939, "learning_rate": 0.0005978902216768565, "loss": 2.0762, "step": 3406 }, { "epoch": 0.11, "grad_norm": 0.5150318145751953, "learning_rate": 0.0005978889840903231, "loss": 2.0223, "step": 3407 }, { "epoch": 0.11, "grad_norm": 0.5352984666824341, "learning_rate": 0.0005978877461421963, "loss": 1.9354, "step": 3408 }, { "epoch": 0.11, "grad_norm": 0.5445837378501892, "learning_rate": 0.0005978865078324777, "loss": 2.0407, "step": 3409 }, { "epoch": 0.11, "grad_norm": 0.5562388300895691, "learning_rate": 0.0005978852691611689, "loss": 2.0471, "step": 3410 }, { "epoch": 0.11, "grad_norm": 0.5047092437744141, "learning_rate": 0.0005978840301282711, "loss": 2.0756, "step": 3411 }, { "epoch": 0.11, "grad_norm": 0.5599055290222168, "learning_rate": 0.0005978827907337861, "loss": 2.0139, "step": 3412 }, { "epoch": 0.11, "grad_norm": 0.5277137756347656, "learning_rate": 0.0005978815509777153, "loss": 2.0169, "step": 3413 }, { "epoch": 0.11, "grad_norm": 0.5232303142547607, "learning_rate": 0.00059788031086006, "loss": 2.0054, "step": 3414 }, { "epoch": 0.11, "grad_norm": 0.5519618391990662, "learning_rate": 0.0005978790703808221, "loss": 2.0589, "step": 3415 }, { "epoch": 0.11, "grad_norm": 0.5450236797332764, "learning_rate": 0.0005978778295400029, "loss": 2.0895, "step": 3416 }, { "epoch": 0.11, "grad_norm": 0.5603067278862, "learning_rate": 0.0005978765883376037, "loss": 1.9545, "step": 3417 }, { "epoch": 0.11, "grad_norm": 0.5856851935386658, "learning_rate": 0.0005978753467736264, "loss": 2.0319, "step": 3418 }, { "epoch": 0.11, "grad_norm": 0.5454334616661072, "learning_rate": 0.0005978741048480723, "loss": 2.0994, "step": 3419 }, { "epoch": 0.11, "grad_norm": 0.5683084726333618, "learning_rate": 0.0005978728625609428, "loss": 2.0776, "step": 3420 }, { "epoch": 0.11, "grad_norm": 0.5227078199386597, "learning_rate": 0.0005978716199122396, "loss": 2.0088, "step": 3421 }, { "epoch": 0.11, "grad_norm": 0.5538684129714966, "learning_rate": 0.0005978703769019642, "loss": 2.0554, "step": 3422 }, { "epoch": 0.11, "grad_norm": 0.5469523072242737, "learning_rate": 0.000597869133530118, "loss": 2.0339, "step": 3423 }, { "epoch": 0.11, "grad_norm": 0.5546985864639282, "learning_rate": 0.0005978678897967027, "loss": 1.9968, "step": 3424 }, { "epoch": 0.11, "grad_norm": 0.5350781679153442, "learning_rate": 0.0005978666457017195, "loss": 1.9857, "step": 3425 }, { "epoch": 0.11, "grad_norm": 0.540770411491394, "learning_rate": 0.0005978654012451701, "loss": 2.0429, "step": 3426 }, { "epoch": 0.11, "grad_norm": 0.5547071695327759, "learning_rate": 0.000597864156427056, "loss": 2.1049, "step": 3427 }, { "epoch": 0.11, "grad_norm": 0.5605418682098389, "learning_rate": 0.0005978629112473787, "loss": 2.0513, "step": 3428 }, { "epoch": 0.11, "grad_norm": 0.5569054484367371, "learning_rate": 0.0005978616657061397, "loss": 2.0691, "step": 3429 }, { "epoch": 0.11, "grad_norm": 0.5463917255401611, "learning_rate": 0.0005978604198033407, "loss": 2.0625, "step": 3430 }, { "epoch": 0.11, "grad_norm": 0.5604130625724792, "learning_rate": 0.0005978591735389828, "loss": 2.0171, "step": 3431 }, { "epoch": 0.11, "grad_norm": 0.5275933742523193, "learning_rate": 0.0005978579269130678, "loss": 1.9933, "step": 3432 }, { "epoch": 0.11, "grad_norm": 0.5556560158729553, "learning_rate": 0.0005978566799255973, "loss": 2.0465, "step": 3433 }, { "epoch": 0.11, "grad_norm": 0.5562360882759094, "learning_rate": 0.0005978554325765725, "loss": 2.057, "step": 3434 }, { "epoch": 0.11, "grad_norm": 0.5394828915596008, "learning_rate": 0.0005978541848659952, "loss": 2.0646, "step": 3435 }, { "epoch": 0.11, "grad_norm": 0.5342943072319031, "learning_rate": 0.0005978529367938668, "loss": 2.0614, "step": 3436 }, { "epoch": 0.11, "grad_norm": 0.5556269884109497, "learning_rate": 0.0005978516883601889, "loss": 2.0478, "step": 3437 }, { "epoch": 0.11, "grad_norm": 0.5274981260299683, "learning_rate": 0.0005978504395649628, "loss": 2.0681, "step": 3438 }, { "epoch": 0.11, "grad_norm": 0.5886461138725281, "learning_rate": 0.0005978491904081902, "loss": 2.0057, "step": 3439 }, { "epoch": 0.11, "grad_norm": 0.5650332570075989, "learning_rate": 0.0005978479408898726, "loss": 1.911, "step": 3440 }, { "epoch": 0.11, "grad_norm": 0.513300359249115, "learning_rate": 0.0005978466910100114, "loss": 1.9786, "step": 3441 }, { "epoch": 0.11, "grad_norm": 0.5609060525894165, "learning_rate": 0.0005978454407686083, "loss": 2.0774, "step": 3442 }, { "epoch": 0.11, "grad_norm": 0.5594989657402039, "learning_rate": 0.0005978441901656647, "loss": 1.965, "step": 3443 }, { "epoch": 0.11, "grad_norm": 0.5247316956520081, "learning_rate": 0.0005978429392011821, "loss": 2.0097, "step": 3444 }, { "epoch": 0.11, "grad_norm": 0.5358147025108337, "learning_rate": 0.0005978416878751621, "loss": 2.021, "step": 3445 }, { "epoch": 0.11, "grad_norm": 0.5172073245048523, "learning_rate": 0.0005978404361876061, "loss": 2.0418, "step": 3446 }, { "epoch": 0.11, "grad_norm": 0.5240306854248047, "learning_rate": 0.0005978391841385158, "loss": 2.0223, "step": 3447 }, { "epoch": 0.11, "grad_norm": 0.5318727493286133, "learning_rate": 0.0005978379317278926, "loss": 2.0637, "step": 3448 }, { "epoch": 0.11, "grad_norm": 0.5431051254272461, "learning_rate": 0.0005978366789557379, "loss": 1.9877, "step": 3449 }, { "epoch": 0.11, "grad_norm": 0.5160372257232666, "learning_rate": 0.0005978354258220535, "loss": 2.0721, "step": 3450 }, { "epoch": 0.11, "grad_norm": 0.5427873134613037, "learning_rate": 0.0005978341723268407, "loss": 2.0824, "step": 3451 }, { "epoch": 0.11, "grad_norm": 0.5368863940238953, "learning_rate": 0.0005978329184701012, "loss": 2.0391, "step": 3452 }, { "epoch": 0.11, "grad_norm": 0.5215761661529541, "learning_rate": 0.0005978316642518362, "loss": 1.9953, "step": 3453 }, { "epoch": 0.11, "grad_norm": 0.523896336555481, "learning_rate": 0.0005978304096720476, "loss": 1.9606, "step": 3454 }, { "epoch": 0.11, "grad_norm": 0.5216464400291443, "learning_rate": 0.0005978291547307368, "loss": 2.0267, "step": 3455 }, { "epoch": 0.11, "grad_norm": 0.5419944524765015, "learning_rate": 0.0005978278994279053, "loss": 1.9474, "step": 3456 }, { "epoch": 0.12, "grad_norm": 0.5393519401550293, "learning_rate": 0.0005978266437635546, "loss": 2.0009, "step": 3457 }, { "epoch": 0.12, "grad_norm": 0.5324592590332031, "learning_rate": 0.0005978253877376861, "loss": 2.0446, "step": 3458 }, { "epoch": 0.12, "grad_norm": 0.5822349190711975, "learning_rate": 0.0005978241313503016, "loss": 2.0218, "step": 3459 }, { "epoch": 0.12, "grad_norm": 0.5541898012161255, "learning_rate": 0.0005978228746014024, "loss": 2.0276, "step": 3460 }, { "epoch": 0.12, "grad_norm": 0.5289289951324463, "learning_rate": 0.0005978216174909901, "loss": 2.0292, "step": 3461 }, { "epoch": 0.12, "grad_norm": 0.5444139242172241, "learning_rate": 0.0005978203600190663, "loss": 1.9702, "step": 3462 }, { "epoch": 0.12, "grad_norm": 0.5188280940055847, "learning_rate": 0.0005978191021856325, "loss": 2.0274, "step": 3463 }, { "epoch": 0.12, "grad_norm": 0.5360382199287415, "learning_rate": 0.0005978178439906902, "loss": 2.0795, "step": 3464 }, { "epoch": 0.12, "grad_norm": 0.5244936943054199, "learning_rate": 0.0005978165854342408, "loss": 2.0383, "step": 3465 }, { "epoch": 0.12, "grad_norm": 0.5305665731430054, "learning_rate": 0.000597815326516286, "loss": 2.0201, "step": 3466 }, { "epoch": 0.12, "grad_norm": 0.5523242354393005, "learning_rate": 0.0005978140672368273, "loss": 2.1291, "step": 3467 }, { "epoch": 0.12, "grad_norm": 0.5324377417564392, "learning_rate": 0.0005978128075958662, "loss": 2.0328, "step": 3468 }, { "epoch": 0.12, "grad_norm": 0.5320661664009094, "learning_rate": 0.0005978115475934043, "loss": 2.0524, "step": 3469 }, { "epoch": 0.12, "grad_norm": 0.5200886130332947, "learning_rate": 0.000597810287229443, "loss": 1.9486, "step": 3470 }, { "epoch": 0.12, "grad_norm": 0.5358641743659973, "learning_rate": 0.0005978090265039838, "loss": 2.0936, "step": 3471 }, { "epoch": 0.12, "grad_norm": 0.5418639183044434, "learning_rate": 0.0005978077654170285, "loss": 2.0964, "step": 3472 }, { "epoch": 0.12, "grad_norm": 0.5471113324165344, "learning_rate": 0.0005978065039685783, "loss": 2.0127, "step": 3473 }, { "epoch": 0.12, "grad_norm": 0.512936532497406, "learning_rate": 0.0005978052421586349, "loss": 2.0288, "step": 3474 }, { "epoch": 0.12, "grad_norm": 0.5323574542999268, "learning_rate": 0.0005978039799871998, "loss": 1.967, "step": 3475 }, { "epoch": 0.12, "grad_norm": 0.556268036365509, "learning_rate": 0.0005978027174542747, "loss": 2.0623, "step": 3476 }, { "epoch": 0.12, "grad_norm": 0.5149837136268616, "learning_rate": 0.0005978014545598609, "loss": 1.9551, "step": 3477 }, { "epoch": 0.12, "grad_norm": 0.5123770236968994, "learning_rate": 0.00059780019130396, "loss": 2.0472, "step": 3478 }, { "epoch": 0.12, "grad_norm": 0.5408891439437866, "learning_rate": 0.0005977989276865735, "loss": 2.0682, "step": 3479 }, { "epoch": 0.12, "grad_norm": 0.6912482380867004, "learning_rate": 0.000597797663707703, "loss": 2.013, "step": 3480 }, { "epoch": 0.12, "grad_norm": 0.535076916217804, "learning_rate": 0.0005977963993673501, "loss": 2.0495, "step": 3481 }, { "epoch": 0.12, "grad_norm": 0.5322169661521912, "learning_rate": 0.0005977951346655162, "loss": 2.0549, "step": 3482 }, { "epoch": 0.12, "grad_norm": 0.5502286553382874, "learning_rate": 0.0005977938696022027, "loss": 2.123, "step": 3483 }, { "epoch": 0.12, "grad_norm": 0.5319870710372925, "learning_rate": 0.0005977926041774116, "loss": 2.0055, "step": 3484 }, { "epoch": 0.12, "grad_norm": 0.5288717746734619, "learning_rate": 0.0005977913383911441, "loss": 2.0892, "step": 3485 }, { "epoch": 0.12, "grad_norm": 0.5389880537986755, "learning_rate": 0.0005977900722434018, "loss": 2.0828, "step": 3486 }, { "epoch": 0.12, "grad_norm": 0.5579193234443665, "learning_rate": 0.0005977888057341861, "loss": 2.0263, "step": 3487 }, { "epoch": 0.12, "grad_norm": 0.5371586084365845, "learning_rate": 0.0005977875388634988, "loss": 2.0607, "step": 3488 }, { "epoch": 0.12, "grad_norm": 0.5149588584899902, "learning_rate": 0.0005977862716313411, "loss": 1.9919, "step": 3489 }, { "epoch": 0.12, "grad_norm": 0.5172721147537231, "learning_rate": 0.0005977850040377149, "loss": 2.0182, "step": 3490 }, { "epoch": 0.12, "grad_norm": 0.53783118724823, "learning_rate": 0.0005977837360826216, "loss": 2.057, "step": 3491 }, { "epoch": 0.12, "grad_norm": 0.5347322821617126, "learning_rate": 0.0005977824677660627, "loss": 2.0816, "step": 3492 }, { "epoch": 0.12, "grad_norm": 0.504487931728363, "learning_rate": 0.0005977811990880397, "loss": 1.9449, "step": 3493 }, { "epoch": 0.12, "grad_norm": 0.5380629897117615, "learning_rate": 0.0005977799300485543, "loss": 2.1145, "step": 3494 }, { "epoch": 0.12, "grad_norm": 0.519880473613739, "learning_rate": 0.0005977786606476079, "loss": 2.0829, "step": 3495 }, { "epoch": 0.12, "grad_norm": 0.5424317717552185, "learning_rate": 0.000597777390885202, "loss": 2.0783, "step": 3496 }, { "epoch": 0.12, "grad_norm": 0.5316056609153748, "learning_rate": 0.0005977761207613383, "loss": 2.1383, "step": 3497 }, { "epoch": 0.12, "grad_norm": 0.5705175399780273, "learning_rate": 0.0005977748502760182, "loss": 2.0114, "step": 3498 }, { "epoch": 0.12, "grad_norm": 0.5408135056495667, "learning_rate": 0.0005977735794292434, "loss": 2.0332, "step": 3499 }, { "epoch": 0.12, "grad_norm": 0.5365865230560303, "learning_rate": 0.0005977723082210154, "loss": 2.0137, "step": 3500 }, { "epoch": 0.12, "grad_norm": 0.5595599412918091, "learning_rate": 0.0005977710366513356, "loss": 1.9987, "step": 3501 }, { "epoch": 0.12, "grad_norm": 0.5380111336708069, "learning_rate": 0.0005977697647202056, "loss": 2.0455, "step": 3502 }, { "epoch": 0.12, "grad_norm": 0.5618389844894409, "learning_rate": 0.000597768492427627, "loss": 2.0285, "step": 3503 }, { "epoch": 0.12, "grad_norm": 0.5865478515625, "learning_rate": 0.0005977672197736015, "loss": 2.1342, "step": 3504 }, { "epoch": 0.12, "grad_norm": 0.5471731424331665, "learning_rate": 0.0005977659467581302, "loss": 1.9844, "step": 3505 }, { "epoch": 0.12, "grad_norm": 0.5364375710487366, "learning_rate": 0.000597764673381215, "loss": 2.1172, "step": 3506 }, { "epoch": 0.12, "grad_norm": 0.543666422367096, "learning_rate": 0.0005977633996428574, "loss": 2.0073, "step": 3507 }, { "epoch": 0.12, "grad_norm": 0.5581921339035034, "learning_rate": 0.000597762125543059, "loss": 2.0657, "step": 3508 }, { "epoch": 0.12, "grad_norm": 0.5522194504737854, "learning_rate": 0.000597760851081821, "loss": 2.0215, "step": 3509 }, { "epoch": 0.12, "grad_norm": 0.5498065948486328, "learning_rate": 0.0005977595762591454, "loss": 2.0014, "step": 3510 }, { "epoch": 0.12, "grad_norm": 0.5781018733978271, "learning_rate": 0.0005977583010750335, "loss": 2.0881, "step": 3511 }, { "epoch": 0.12, "grad_norm": 0.5307928919792175, "learning_rate": 0.0005977570255294869, "loss": 1.9867, "step": 3512 }, { "epoch": 0.12, "grad_norm": 0.5396526455879211, "learning_rate": 0.0005977557496225071, "loss": 1.9466, "step": 3513 }, { "epoch": 0.12, "grad_norm": 0.5916662812232971, "learning_rate": 0.0005977544733540958, "loss": 2.0632, "step": 3514 }, { "epoch": 0.12, "grad_norm": 0.540381669998169, "learning_rate": 0.0005977531967242544, "loss": 2.0652, "step": 3515 }, { "epoch": 0.12, "grad_norm": 0.5476248264312744, "learning_rate": 0.0005977519197329844, "loss": 2.0686, "step": 3516 }, { "epoch": 0.12, "grad_norm": 0.5534717440605164, "learning_rate": 0.0005977506423802875, "loss": 2.0047, "step": 3517 }, { "epoch": 0.12, "grad_norm": 0.5412439703941345, "learning_rate": 0.0005977493646661652, "loss": 1.9854, "step": 3518 }, { "epoch": 0.12, "grad_norm": 0.5488806366920471, "learning_rate": 0.000597748086590619, "loss": 2.0236, "step": 3519 }, { "epoch": 0.12, "grad_norm": 0.533470869064331, "learning_rate": 0.0005977468081536505, "loss": 2.0298, "step": 3520 }, { "epoch": 0.12, "grad_norm": 0.515446662902832, "learning_rate": 0.0005977455293552613, "loss": 2.0861, "step": 3521 }, { "epoch": 0.12, "grad_norm": 0.5257336497306824, "learning_rate": 0.0005977442501954529, "loss": 1.9992, "step": 3522 }, { "epoch": 0.12, "grad_norm": 0.572928786277771, "learning_rate": 0.0005977429706742268, "loss": 2.0815, "step": 3523 }, { "epoch": 0.12, "grad_norm": 0.5389021039009094, "learning_rate": 0.0005977416907915847, "loss": 2.0736, "step": 3524 }, { "epoch": 0.12, "grad_norm": 0.5479616522789001, "learning_rate": 0.0005977404105475279, "loss": 2.0806, "step": 3525 }, { "epoch": 0.12, "grad_norm": 0.573508620262146, "learning_rate": 0.0005977391299420582, "loss": 2.0118, "step": 3526 }, { "epoch": 0.12, "grad_norm": 0.5310660004615784, "learning_rate": 0.000597737848975177, "loss": 2.0505, "step": 3527 }, { "epoch": 0.12, "grad_norm": 0.5462110042572021, "learning_rate": 0.0005977365676468861, "loss": 2.095, "step": 3528 }, { "epoch": 0.12, "grad_norm": 0.5119500160217285, "learning_rate": 0.0005977352859571868, "loss": 1.9946, "step": 3529 }, { "epoch": 0.12, "grad_norm": 0.5301435589790344, "learning_rate": 0.0005977340039060806, "loss": 2.0413, "step": 3530 }, { "epoch": 0.12, "grad_norm": 0.5199999809265137, "learning_rate": 0.0005977327214935693, "loss": 2.0084, "step": 3531 }, { "epoch": 0.12, "grad_norm": 0.5325692892074585, "learning_rate": 0.0005977314387196544, "loss": 2.0227, "step": 3532 }, { "epoch": 0.12, "grad_norm": 0.5452000498771667, "learning_rate": 0.0005977301555843373, "loss": 2.0976, "step": 3533 }, { "epoch": 0.12, "grad_norm": 0.5424582958221436, "learning_rate": 0.0005977288720876195, "loss": 2.0447, "step": 3534 }, { "epoch": 0.12, "grad_norm": 0.5390550494194031, "learning_rate": 0.000597727588229503, "loss": 2.1287, "step": 3535 }, { "epoch": 0.12, "grad_norm": 0.5332249402999878, "learning_rate": 0.000597726304009989, "loss": 2.014, "step": 3536 }, { "epoch": 0.12, "grad_norm": 0.5314800143241882, "learning_rate": 0.0005977250194290791, "loss": 2.043, "step": 3537 }, { "epoch": 0.12, "grad_norm": 0.5690085291862488, "learning_rate": 0.000597723734486775, "loss": 2.025, "step": 3538 }, { "epoch": 0.12, "grad_norm": 0.5172752141952515, "learning_rate": 0.000597722449183078, "loss": 1.9376, "step": 3539 }, { "epoch": 0.12, "grad_norm": 0.5484266877174377, "learning_rate": 0.0005977211635179899, "loss": 2.0467, "step": 3540 }, { "epoch": 0.12, "grad_norm": 0.5135730504989624, "learning_rate": 0.0005977198774915121, "loss": 2.0413, "step": 3541 }, { "epoch": 0.12, "grad_norm": 0.5281062722206116, "learning_rate": 0.0005977185911036464, "loss": 2.0066, "step": 3542 }, { "epoch": 0.12, "grad_norm": 0.5297719240188599, "learning_rate": 0.000597717304354394, "loss": 1.9826, "step": 3543 }, { "epoch": 0.12, "grad_norm": 0.5278794169425964, "learning_rate": 0.0005977160172437568, "loss": 2.0387, "step": 3544 }, { "epoch": 0.12, "grad_norm": 0.5298485159873962, "learning_rate": 0.0005977147297717362, "loss": 2.096, "step": 3545 }, { "epoch": 0.12, "grad_norm": 0.5278101563453674, "learning_rate": 0.0005977134419383337, "loss": 1.9416, "step": 3546 }, { "epoch": 0.12, "grad_norm": 0.5298143029212952, "learning_rate": 0.000597712153743551, "loss": 2.0236, "step": 3547 }, { "epoch": 0.12, "grad_norm": 0.5356826782226562, "learning_rate": 0.0005977108651873896, "loss": 2.0501, "step": 3548 }, { "epoch": 0.12, "grad_norm": 0.6437937021255493, "learning_rate": 0.000597709576269851, "loss": 1.9785, "step": 3549 }, { "epoch": 0.12, "grad_norm": 0.5556550025939941, "learning_rate": 0.0005977082869909371, "loss": 2.0235, "step": 3550 }, { "epoch": 0.12, "grad_norm": 0.5705327391624451, "learning_rate": 0.0005977069973506488, "loss": 2.0558, "step": 3551 }, { "epoch": 0.12, "grad_norm": 0.5309748649597168, "learning_rate": 0.0005977057073489884, "loss": 2.0216, "step": 3552 }, { "epoch": 0.12, "grad_norm": 0.5577542185783386, "learning_rate": 0.0005977044169859571, "loss": 1.9809, "step": 3553 }, { "epoch": 0.12, "grad_norm": 0.5385357737541199, "learning_rate": 0.0005977031262615563, "loss": 1.946, "step": 3554 }, { "epoch": 0.12, "grad_norm": 0.5530093908309937, "learning_rate": 0.000597701835175788, "loss": 2.0277, "step": 3555 }, { "epoch": 0.12, "grad_norm": 0.5094737410545349, "learning_rate": 0.0005977005437286534, "loss": 2.0419, "step": 3556 }, { "epoch": 0.12, "grad_norm": 0.5537251234054565, "learning_rate": 0.0005976992519201543, "loss": 2.0666, "step": 3557 }, { "epoch": 0.12, "grad_norm": 0.5400016903877258, "learning_rate": 0.000597697959750292, "loss": 2.0232, "step": 3558 }, { "epoch": 0.12, "grad_norm": 0.5188530683517456, "learning_rate": 0.0005976966672190684, "loss": 1.9804, "step": 3559 }, { "epoch": 0.12, "grad_norm": 0.5221695899963379, "learning_rate": 0.0005976953743264849, "loss": 2.0028, "step": 3560 }, { "epoch": 0.12, "grad_norm": 0.5200198292732239, "learning_rate": 0.0005976940810725431, "loss": 2.1073, "step": 3561 }, { "epoch": 0.12, "grad_norm": 0.5127568244934082, "learning_rate": 0.0005976927874572444, "loss": 2.0031, "step": 3562 }, { "epoch": 0.12, "grad_norm": 0.5374304056167603, "learning_rate": 0.0005976914934805906, "loss": 1.9944, "step": 3563 }, { "epoch": 0.12, "grad_norm": 0.5097367763519287, "learning_rate": 0.0005976901991425832, "loss": 1.9589, "step": 3564 }, { "epoch": 0.12, "grad_norm": 0.5240688323974609, "learning_rate": 0.0005976889044432238, "loss": 2.0675, "step": 3565 }, { "epoch": 0.12, "grad_norm": 0.5239568948745728, "learning_rate": 0.0005976876093825139, "loss": 2.0431, "step": 3566 }, { "epoch": 0.12, "grad_norm": 0.5298818349838257, "learning_rate": 0.0005976863139604551, "loss": 2.0837, "step": 3567 }, { "epoch": 0.12, "grad_norm": 0.5601015090942383, "learning_rate": 0.000597685018177049, "loss": 1.9901, "step": 3568 }, { "epoch": 0.12, "grad_norm": 0.5236983299255371, "learning_rate": 0.0005976837220322971, "loss": 1.9483, "step": 3569 }, { "epoch": 0.12, "grad_norm": 0.529679000377655, "learning_rate": 0.0005976824255262011, "loss": 2.0913, "step": 3570 }, { "epoch": 0.12, "grad_norm": 0.5589321255683899, "learning_rate": 0.0005976811286587624, "loss": 2.0367, "step": 3571 }, { "epoch": 0.12, "grad_norm": 0.5460565686225891, "learning_rate": 0.0005976798314299827, "loss": 2.0941, "step": 3572 }, { "epoch": 0.12, "grad_norm": 0.5125850439071655, "learning_rate": 0.0005976785338398636, "loss": 2.0393, "step": 3573 }, { "epoch": 0.12, "grad_norm": 0.5048824548721313, "learning_rate": 0.0005976772358884066, "loss": 2.0663, "step": 3574 }, { "epoch": 0.12, "grad_norm": 0.5394979119300842, "learning_rate": 0.0005976759375756133, "loss": 2.0429, "step": 3575 }, { "epoch": 0.12, "grad_norm": 0.5169376730918884, "learning_rate": 0.0005976746389014852, "loss": 2.0489, "step": 3576 }, { "epoch": 0.12, "grad_norm": 0.5175694227218628, "learning_rate": 0.0005976733398660241, "loss": 2.0215, "step": 3577 }, { "epoch": 0.12, "grad_norm": 0.5427377223968506, "learning_rate": 0.0005976720404692313, "loss": 2.1212, "step": 3578 }, { "epoch": 0.12, "grad_norm": 0.5026820302009583, "learning_rate": 0.0005976707407111086, "loss": 2.0409, "step": 3579 }, { "epoch": 0.12, "grad_norm": 0.4934423565864563, "learning_rate": 0.0005976694405916573, "loss": 2.0475, "step": 3580 }, { "epoch": 0.12, "grad_norm": 0.5221704840660095, "learning_rate": 0.0005976681401108793, "loss": 1.9871, "step": 3581 }, { "epoch": 0.12, "grad_norm": 0.5168452858924866, "learning_rate": 0.000597666839268776, "loss": 2.0813, "step": 3582 }, { "epoch": 0.12, "grad_norm": 0.5346972346305847, "learning_rate": 0.000597665538065349, "loss": 1.9429, "step": 3583 }, { "epoch": 0.12, "grad_norm": 0.5307336449623108, "learning_rate": 0.0005976642365006, "loss": 1.9618, "step": 3584 }, { "epoch": 0.12, "grad_norm": 0.5169903039932251, "learning_rate": 0.0005976629345745304, "loss": 2.0551, "step": 3585 }, { "epoch": 0.12, "grad_norm": 0.5267885327339172, "learning_rate": 0.0005976616322871418, "loss": 2.0783, "step": 3586 }, { "epoch": 0.12, "grad_norm": 0.5376043915748596, "learning_rate": 0.0005976603296384359, "loss": 2.0505, "step": 3587 }, { "epoch": 0.12, "grad_norm": 0.5446121096611023, "learning_rate": 0.0005976590266284142, "loss": 2.0645, "step": 3588 }, { "epoch": 0.12, "grad_norm": 0.5281849503517151, "learning_rate": 0.0005976577232570783, "loss": 2.0472, "step": 3589 }, { "epoch": 0.12, "grad_norm": 0.6496939659118652, "learning_rate": 0.0005976564195244297, "loss": 2.0678, "step": 3590 }, { "epoch": 0.12, "grad_norm": 0.5493081212043762, "learning_rate": 0.0005976551154304702, "loss": 1.9883, "step": 3591 }, { "epoch": 0.12, "grad_norm": 0.5422359108924866, "learning_rate": 0.0005976538109752012, "loss": 2.0114, "step": 3592 }, { "epoch": 0.12, "grad_norm": 0.542181670665741, "learning_rate": 0.0005976525061586244, "loss": 2.0626, "step": 3593 }, { "epoch": 0.12, "grad_norm": 0.5191642642021179, "learning_rate": 0.0005976512009807413, "loss": 2.0906, "step": 3594 }, { "epoch": 0.12, "grad_norm": 0.5403131246566772, "learning_rate": 0.0005976498954415534, "loss": 2.0446, "step": 3595 }, { "epoch": 0.12, "grad_norm": 0.5572816133499146, "learning_rate": 0.0005976485895410623, "loss": 2.042, "step": 3596 }, { "epoch": 0.12, "grad_norm": 0.5553923845291138, "learning_rate": 0.0005976472832792699, "loss": 2.0573, "step": 3597 }, { "epoch": 0.12, "grad_norm": 0.5402312278747559, "learning_rate": 0.0005976459766561774, "loss": 2.0452, "step": 3598 }, { "epoch": 0.12, "grad_norm": 0.5169528722763062, "learning_rate": 0.0005976446696717867, "loss": 1.9703, "step": 3599 }, { "epoch": 0.12, "grad_norm": 0.5459778308868408, "learning_rate": 0.000597643362326099, "loss": 2.0165, "step": 3600 }, { "epoch": 0.12, "grad_norm": 0.5410203337669373, "learning_rate": 0.0005976420546191162, "loss": 1.9832, "step": 3601 }, { "epoch": 0.12, "grad_norm": 0.5576975345611572, "learning_rate": 0.0005976407465508399, "loss": 2.0152, "step": 3602 }, { "epoch": 0.12, "grad_norm": 0.5480056405067444, "learning_rate": 0.0005976394381212715, "loss": 2.0595, "step": 3603 }, { "epoch": 0.12, "grad_norm": 0.5397840738296509, "learning_rate": 0.0005976381293304128, "loss": 2.0547, "step": 3604 }, { "epoch": 0.12, "grad_norm": 0.5422259569168091, "learning_rate": 0.0005976368201782651, "loss": 2.049, "step": 3605 }, { "epoch": 0.12, "grad_norm": 0.5210201740264893, "learning_rate": 0.0005976355106648302, "loss": 2.0092, "step": 3606 }, { "epoch": 0.12, "grad_norm": 0.5642973184585571, "learning_rate": 0.0005976342007901097, "loss": 1.9608, "step": 3607 }, { "epoch": 0.12, "grad_norm": 0.5562832355499268, "learning_rate": 0.000597632890554105, "loss": 2.0656, "step": 3608 }, { "epoch": 0.12, "grad_norm": 0.5270005464553833, "learning_rate": 0.0005976315799568181, "loss": 2.0018, "step": 3609 }, { "epoch": 0.12, "grad_norm": 0.5558375716209412, "learning_rate": 0.00059763026899825, "loss": 1.9891, "step": 3610 }, { "epoch": 0.12, "grad_norm": 0.5505795478820801, "learning_rate": 0.0005976289576784028, "loss": 2.0731, "step": 3611 }, { "epoch": 0.12, "grad_norm": 0.5390530824661255, "learning_rate": 0.0005976276459972778, "loss": 2.0633, "step": 3612 }, { "epoch": 0.12, "grad_norm": 0.5636915564537048, "learning_rate": 0.0005976263339548769, "loss": 2.0486, "step": 3613 }, { "epoch": 0.12, "grad_norm": 0.5708433389663696, "learning_rate": 0.0005976250215512012, "loss": 2.0166, "step": 3614 }, { "epoch": 0.12, "grad_norm": 0.531641960144043, "learning_rate": 0.0005976237087862527, "loss": 1.9877, "step": 3615 }, { "epoch": 0.12, "grad_norm": 0.5554384589195251, "learning_rate": 0.0005976223956600329, "loss": 2.06, "step": 3616 }, { "epoch": 0.12, "grad_norm": 0.5839734077453613, "learning_rate": 0.0005976210821725433, "loss": 2.0202, "step": 3617 }, { "epoch": 0.12, "grad_norm": 0.5265007615089417, "learning_rate": 0.0005976197683237856, "loss": 2.0379, "step": 3618 }, { "epoch": 0.12, "grad_norm": 0.5407382845878601, "learning_rate": 0.0005976184541137613, "loss": 2.0838, "step": 3619 }, { "epoch": 0.12, "grad_norm": 0.6356545686721802, "learning_rate": 0.0005976171395424721, "loss": 2.0464, "step": 3620 }, { "epoch": 0.12, "grad_norm": 0.5268267393112183, "learning_rate": 0.0005976158246099196, "loss": 1.9735, "step": 3621 }, { "epoch": 0.12, "grad_norm": 0.5556126832962036, "learning_rate": 0.0005976145093161052, "loss": 2.0621, "step": 3622 }, { "epoch": 0.12, "grad_norm": 0.5613428354263306, "learning_rate": 0.0005976131936610308, "loss": 2.0743, "step": 3623 }, { "epoch": 0.12, "grad_norm": 0.5235967040061951, "learning_rate": 0.0005976118776446978, "loss": 2.0581, "step": 3624 }, { "epoch": 0.12, "grad_norm": 0.5142425298690796, "learning_rate": 0.0005976105612671077, "loss": 2.1056, "step": 3625 }, { "epoch": 0.12, "grad_norm": 0.5582021474838257, "learning_rate": 0.0005976092445282624, "loss": 2.1071, "step": 3626 }, { "epoch": 0.12, "grad_norm": 0.5610502362251282, "learning_rate": 0.0005976079274281632, "loss": 1.9401, "step": 3627 }, { "epoch": 0.12, "grad_norm": 0.5397884845733643, "learning_rate": 0.0005976066099668119, "loss": 2.0607, "step": 3628 }, { "epoch": 0.12, "grad_norm": 0.5421331524848938, "learning_rate": 0.0005976052921442098, "loss": 1.9841, "step": 3629 }, { "epoch": 0.12, "grad_norm": 0.5780400633811951, "learning_rate": 0.000597603973960359, "loss": 2.0972, "step": 3630 }, { "epoch": 0.12, "grad_norm": 0.5374252796173096, "learning_rate": 0.0005976026554152607, "loss": 2.0001, "step": 3631 }, { "epoch": 0.12, "grad_norm": 0.5153918266296387, "learning_rate": 0.0005976013365089167, "loss": 2.0102, "step": 3632 }, { "epoch": 0.12, "grad_norm": 0.5459708571434021, "learning_rate": 0.0005976000172413285, "loss": 2.0859, "step": 3633 }, { "epoch": 0.12, "grad_norm": 0.5292580127716064, "learning_rate": 0.0005975986976124977, "loss": 2.0702, "step": 3634 }, { "epoch": 0.12, "grad_norm": 0.5131518244743347, "learning_rate": 0.000597597377622426, "loss": 2.0083, "step": 3635 }, { "epoch": 0.12, "grad_norm": 0.5195586085319519, "learning_rate": 0.0005975960572711149, "loss": 1.9965, "step": 3636 }, { "epoch": 0.12, "grad_norm": 0.5582557916641235, "learning_rate": 0.000597594736558566, "loss": 2.0091, "step": 3637 }, { "epoch": 0.12, "grad_norm": 0.5460308790206909, "learning_rate": 0.000597593415484781, "loss": 1.9193, "step": 3638 }, { "epoch": 0.12, "grad_norm": 0.515882670879364, "learning_rate": 0.0005975920940497613, "loss": 2.0288, "step": 3639 }, { "epoch": 0.12, "grad_norm": 0.5479389429092407, "learning_rate": 0.0005975907722535088, "loss": 2.0366, "step": 3640 }, { "epoch": 0.12, "grad_norm": 0.5294356346130371, "learning_rate": 0.0005975894500960249, "loss": 2.0138, "step": 3641 }, { "epoch": 0.12, "grad_norm": 0.5580053925514221, "learning_rate": 0.0005975881275773113, "loss": 2.0783, "step": 3642 }, { "epoch": 0.12, "grad_norm": 0.5385705232620239, "learning_rate": 0.0005975868046973695, "loss": 2.0109, "step": 3643 }, { "epoch": 0.12, "grad_norm": 0.5035665035247803, "learning_rate": 0.0005975854814562012, "loss": 1.9717, "step": 3644 }, { "epoch": 0.12, "grad_norm": 0.5344738364219666, "learning_rate": 0.0005975841578538079, "loss": 2.0711, "step": 3645 }, { "epoch": 0.12, "grad_norm": 0.5506207942962646, "learning_rate": 0.0005975828338901914, "loss": 2.0607, "step": 3646 }, { "epoch": 0.12, "grad_norm": 0.5320329070091248, "learning_rate": 0.000597581509565353, "loss": 2.0658, "step": 3647 }, { "epoch": 0.12, "grad_norm": 0.522212028503418, "learning_rate": 0.0005975801848792947, "loss": 1.9955, "step": 3648 }, { "epoch": 0.12, "grad_norm": 0.5354901552200317, "learning_rate": 0.0005975788598320177, "loss": 2.0242, "step": 3649 }, { "epoch": 0.12, "grad_norm": 0.543381929397583, "learning_rate": 0.0005975775344235241, "loss": 2.0598, "step": 3650 }, { "epoch": 0.12, "grad_norm": 0.525300920009613, "learning_rate": 0.000597576208653815, "loss": 2.0641, "step": 3651 }, { "epoch": 0.12, "grad_norm": 0.5509144067764282, "learning_rate": 0.0005975748825228922, "loss": 2.0316, "step": 3652 }, { "epoch": 0.12, "grad_norm": 0.5108699798583984, "learning_rate": 0.0005975735560307575, "loss": 1.9759, "step": 3653 }, { "epoch": 0.12, "grad_norm": 0.5201375484466553, "learning_rate": 0.0005975722291774122, "loss": 2.0839, "step": 3654 }, { "epoch": 0.12, "grad_norm": 0.5285505056381226, "learning_rate": 0.0005975709019628582, "loss": 2.1016, "step": 3655 }, { "epoch": 0.12, "grad_norm": 0.5338219404220581, "learning_rate": 0.0005975695743870969, "loss": 2.0602, "step": 3656 }, { "epoch": 0.12, "grad_norm": 0.5236161351203918, "learning_rate": 0.0005975682464501299, "loss": 2.0682, "step": 3657 }, { "epoch": 0.12, "grad_norm": 0.5159482359886169, "learning_rate": 0.000597566918151959, "loss": 2.0069, "step": 3658 }, { "epoch": 0.12, "grad_norm": 0.5033335089683533, "learning_rate": 0.0005975655894925857, "loss": 1.9843, "step": 3659 }, { "epoch": 0.12, "grad_norm": 0.5208632349967957, "learning_rate": 0.0005975642604720116, "loss": 2.0398, "step": 3660 }, { "epoch": 0.12, "grad_norm": 0.5522626638412476, "learning_rate": 0.0005975629310902382, "loss": 2.0789, "step": 3661 }, { "epoch": 0.12, "grad_norm": 0.5110765695571899, "learning_rate": 0.0005975616013472674, "loss": 2.0181, "step": 3662 }, { "epoch": 0.12, "grad_norm": 0.5054663419723511, "learning_rate": 0.0005975602712431006, "loss": 2.0674, "step": 3663 }, { "epoch": 0.12, "grad_norm": 0.5061655640602112, "learning_rate": 0.0005975589407777396, "loss": 2.0329, "step": 3664 }, { "epoch": 0.12, "grad_norm": 0.532522439956665, "learning_rate": 0.0005975576099511857, "loss": 2.0711, "step": 3665 }, { "epoch": 0.12, "grad_norm": 0.5325838923454285, "learning_rate": 0.0005975562787634408, "loss": 1.9931, "step": 3666 }, { "epoch": 0.12, "grad_norm": 0.51915442943573, "learning_rate": 0.0005975549472145064, "loss": 2.0244, "step": 3667 }, { "epoch": 0.12, "grad_norm": 0.530524730682373, "learning_rate": 0.0005975536153043842, "loss": 1.9914, "step": 3668 }, { "epoch": 0.12, "grad_norm": 0.5889463424682617, "learning_rate": 0.0005975522830330757, "loss": 2.0275, "step": 3669 }, { "epoch": 0.12, "grad_norm": 0.5173787474632263, "learning_rate": 0.0005975509504005825, "loss": 1.9934, "step": 3670 }, { "epoch": 0.12, "grad_norm": 0.5343263149261475, "learning_rate": 0.0005975496174069064, "loss": 2.0443, "step": 3671 }, { "epoch": 0.12, "grad_norm": 0.5310655832290649, "learning_rate": 0.0005975482840520489, "loss": 1.9493, "step": 3672 }, { "epoch": 0.12, "grad_norm": 0.5398275256156921, "learning_rate": 0.0005975469503360116, "loss": 2.09, "step": 3673 }, { "epoch": 0.12, "grad_norm": 0.5323675274848938, "learning_rate": 0.0005975456162587962, "loss": 2.0701, "step": 3674 }, { "epoch": 0.12, "grad_norm": 0.5271369814872742, "learning_rate": 0.000597544281820404, "loss": 1.9534, "step": 3675 }, { "epoch": 0.12, "grad_norm": 0.5382534265518188, "learning_rate": 0.0005975429470208371, "loss": 2.0664, "step": 3676 }, { "epoch": 0.12, "grad_norm": 0.5164488554000854, "learning_rate": 0.0005975416118600969, "loss": 1.9789, "step": 3677 }, { "epoch": 0.12, "grad_norm": 0.5319002866744995, "learning_rate": 0.0005975402763381851, "loss": 2.1153, "step": 3678 }, { "epoch": 0.12, "grad_norm": 0.5511391758918762, "learning_rate": 0.0005975389404551031, "loss": 2.0626, "step": 3679 }, { "epoch": 0.12, "grad_norm": 0.5090054869651794, "learning_rate": 0.0005975376042108528, "loss": 2.0644, "step": 3680 }, { "epoch": 0.12, "grad_norm": 0.597543478012085, "learning_rate": 0.0005975362676054356, "loss": 1.9597, "step": 3681 }, { "epoch": 0.12, "grad_norm": 0.5242806077003479, "learning_rate": 0.0005975349306388532, "loss": 1.984, "step": 3682 }, { "epoch": 0.12, "grad_norm": 0.5301786065101624, "learning_rate": 0.0005975335933111072, "loss": 1.9875, "step": 3683 }, { "epoch": 0.12, "grad_norm": 0.5324355363845825, "learning_rate": 0.0005975322556221993, "loss": 2.0205, "step": 3684 }, { "epoch": 0.12, "grad_norm": 0.5295261740684509, "learning_rate": 0.0005975309175721312, "loss": 2.0201, "step": 3685 }, { "epoch": 0.12, "grad_norm": 0.5228244066238403, "learning_rate": 0.0005975295791609042, "loss": 1.9404, "step": 3686 }, { "epoch": 0.12, "grad_norm": 0.537112295627594, "learning_rate": 0.0005975282403885203, "loss": 2.0112, "step": 3687 }, { "epoch": 0.12, "grad_norm": 0.5391958951950073, "learning_rate": 0.0005975269012549809, "loss": 1.9884, "step": 3688 }, { "epoch": 0.12, "grad_norm": 0.5269535183906555, "learning_rate": 0.0005975255617602876, "loss": 2.0529, "step": 3689 }, { "epoch": 0.12, "grad_norm": 0.5240395069122314, "learning_rate": 0.0005975242219044422, "loss": 1.9096, "step": 3690 }, { "epoch": 0.12, "grad_norm": 0.5442904233932495, "learning_rate": 0.0005975228816874462, "loss": 2.0426, "step": 3691 }, { "epoch": 0.12, "grad_norm": 0.54188072681427, "learning_rate": 0.0005975215411093013, "loss": 2.0705, "step": 3692 }, { "epoch": 0.12, "grad_norm": 0.5193834900856018, "learning_rate": 0.0005975202001700091, "loss": 2.0482, "step": 3693 }, { "epoch": 0.12, "grad_norm": 0.5212423801422119, "learning_rate": 0.0005975188588695711, "loss": 1.9755, "step": 3694 }, { "epoch": 0.12, "grad_norm": 0.5378813743591309, "learning_rate": 0.000597517517207989, "loss": 1.9972, "step": 3695 }, { "epoch": 0.12, "grad_norm": 0.5270572304725647, "learning_rate": 0.0005975161751852646, "loss": 2.0081, "step": 3696 }, { "epoch": 0.12, "grad_norm": 0.5138682126998901, "learning_rate": 0.0005975148328013995, "loss": 2.0663, "step": 3697 }, { "epoch": 0.12, "grad_norm": 0.5605493783950806, "learning_rate": 0.000597513490056395, "loss": 1.9626, "step": 3698 }, { "epoch": 0.12, "grad_norm": 0.5453003644943237, "learning_rate": 0.0005975121469502531, "loss": 1.9609, "step": 3699 }, { "epoch": 0.12, "grad_norm": 0.5283203125, "learning_rate": 0.0005975108034829752, "loss": 2.019, "step": 3700 }, { "epoch": 0.12, "grad_norm": 0.5262095332145691, "learning_rate": 0.0005975094596545632, "loss": 2.0208, "step": 3701 }, { "epoch": 0.12, "grad_norm": 0.5376060605049133, "learning_rate": 0.0005975081154650184, "loss": 2.0207, "step": 3702 }, { "epoch": 0.12, "grad_norm": 0.5207541584968567, "learning_rate": 0.0005975067709143426, "loss": 2.0598, "step": 3703 }, { "epoch": 0.12, "grad_norm": 0.5363463759422302, "learning_rate": 0.0005975054260025374, "loss": 2.0175, "step": 3704 }, { "epoch": 0.12, "grad_norm": 0.5641820430755615, "learning_rate": 0.0005975040807296045, "loss": 2.1004, "step": 3705 }, { "epoch": 0.12, "grad_norm": 0.5538734197616577, "learning_rate": 0.0005975027350955455, "loss": 1.9974, "step": 3706 }, { "epoch": 0.12, "grad_norm": 0.5180948376655579, "learning_rate": 0.0005975013891003619, "loss": 1.9739, "step": 3707 }, { "epoch": 0.12, "grad_norm": 0.5226331949234009, "learning_rate": 0.0005975000427440556, "loss": 2.0569, "step": 3708 }, { "epoch": 0.12, "grad_norm": 0.5488747954368591, "learning_rate": 0.0005974986960266281, "loss": 2.0065, "step": 3709 }, { "epoch": 0.12, "grad_norm": 0.5106396079063416, "learning_rate": 0.0005974973489480807, "loss": 1.9809, "step": 3710 }, { "epoch": 0.12, "grad_norm": 0.542081892490387, "learning_rate": 0.0005974960015084157, "loss": 2.0055, "step": 3711 }, { "epoch": 0.12, "grad_norm": 0.5111557245254517, "learning_rate": 0.0005974946537076342, "loss": 1.9536, "step": 3712 }, { "epoch": 0.12, "grad_norm": 0.5207027792930603, "learning_rate": 0.0005974933055457381, "loss": 2.0576, "step": 3713 }, { "epoch": 0.12, "grad_norm": 0.5401001572608948, "learning_rate": 0.000597491957022729, "loss": 2.0925, "step": 3714 }, { "epoch": 0.12, "grad_norm": 0.5453076958656311, "learning_rate": 0.0005974906081386085, "loss": 2.1096, "step": 3715 }, { "epoch": 0.12, "grad_norm": 0.5237904787063599, "learning_rate": 0.0005974892588933781, "loss": 2.0707, "step": 3716 }, { "epoch": 0.12, "grad_norm": 0.520309329032898, "learning_rate": 0.0005974879092870397, "loss": 1.9822, "step": 3717 }, { "epoch": 0.12, "grad_norm": 0.5185633301734924, "learning_rate": 0.0005974865593195947, "loss": 1.9889, "step": 3718 }, { "epoch": 0.12, "grad_norm": 0.5144222378730774, "learning_rate": 0.0005974852089910449, "loss": 2.0083, "step": 3719 }, { "epoch": 0.12, "grad_norm": 0.5148723125457764, "learning_rate": 0.000597483858301392, "loss": 2.0601, "step": 3720 }, { "epoch": 0.12, "grad_norm": 0.5362280011177063, "learning_rate": 0.0005974825072506373, "loss": 2.045, "step": 3721 }, { "epoch": 0.12, "grad_norm": 0.5502125024795532, "learning_rate": 0.0005974811558387828, "loss": 2.1522, "step": 3722 }, { "epoch": 0.12, "grad_norm": 0.5406608581542969, "learning_rate": 0.00059747980406583, "loss": 1.9461, "step": 3723 }, { "epoch": 0.12, "grad_norm": 0.5316365361213684, "learning_rate": 0.0005974784519317807, "loss": 2.048, "step": 3724 }, { "epoch": 0.12, "grad_norm": 0.5262888073921204, "learning_rate": 0.0005974770994366362, "loss": 2.0277, "step": 3725 }, { "epoch": 0.12, "grad_norm": 0.5286868810653687, "learning_rate": 0.0005974757465803984, "loss": 2.0026, "step": 3726 }, { "epoch": 0.12, "grad_norm": 0.5145726203918457, "learning_rate": 0.0005974743933630688, "loss": 1.9589, "step": 3727 }, { "epoch": 0.12, "grad_norm": 0.5062218904495239, "learning_rate": 0.0005974730397846492, "loss": 1.9801, "step": 3728 }, { "epoch": 0.12, "grad_norm": 0.5437759757041931, "learning_rate": 0.0005974716858451411, "loss": 2.0455, "step": 3729 }, { "epoch": 0.12, "grad_norm": 0.5409976840019226, "learning_rate": 0.0005974703315445463, "loss": 1.962, "step": 3730 }, { "epoch": 0.12, "grad_norm": 0.5339171886444092, "learning_rate": 0.0005974689768828662, "loss": 2.073, "step": 3731 }, { "epoch": 0.12, "grad_norm": 0.5066283345222473, "learning_rate": 0.0005974676218601027, "loss": 1.9988, "step": 3732 }, { "epoch": 0.12, "grad_norm": 0.5442850589752197, "learning_rate": 0.0005974662664762573, "loss": 2.1068, "step": 3733 }, { "epoch": 0.12, "grad_norm": 0.5132122039794922, "learning_rate": 0.0005974649107313316, "loss": 1.97, "step": 3734 }, { "epoch": 0.12, "grad_norm": 0.5478792786598206, "learning_rate": 0.0005974635546253276, "loss": 2.1053, "step": 3735 }, { "epoch": 0.12, "grad_norm": 0.5121914148330688, "learning_rate": 0.0005974621981582464, "loss": 2.0391, "step": 3736 }, { "epoch": 0.12, "grad_norm": 0.5077663660049438, "learning_rate": 0.00059746084133009, "loss": 1.9735, "step": 3737 }, { "epoch": 0.12, "grad_norm": 0.5223761796951294, "learning_rate": 0.00059745948414086, "loss": 2.1105, "step": 3738 }, { "epoch": 0.12, "grad_norm": 0.5121828317642212, "learning_rate": 0.000597458126590558, "loss": 2.0094, "step": 3739 }, { "epoch": 0.12, "grad_norm": 0.5138072371482849, "learning_rate": 0.0005974567686791857, "loss": 1.9577, "step": 3740 }, { "epoch": 0.12, "grad_norm": 0.5185881853103638, "learning_rate": 0.0005974554104067446, "loss": 2.0278, "step": 3741 }, { "epoch": 0.12, "grad_norm": 0.5034616589546204, "learning_rate": 0.0005974540517732366, "loss": 1.996, "step": 3742 }, { "epoch": 0.12, "grad_norm": 0.5122537016868591, "learning_rate": 0.0005974526927786632, "loss": 2.0079, "step": 3743 }, { "epoch": 0.12, "grad_norm": 0.5255740284919739, "learning_rate": 0.0005974513334230259, "loss": 1.9992, "step": 3744 }, { "epoch": 0.12, "grad_norm": 0.513653039932251, "learning_rate": 0.0005974499737063266, "loss": 1.9138, "step": 3745 }, { "epoch": 0.12, "grad_norm": 0.5148603916168213, "learning_rate": 0.0005974486136285669, "loss": 1.9716, "step": 3746 }, { "epoch": 0.12, "grad_norm": 0.5369158983230591, "learning_rate": 0.0005974472531897483, "loss": 2.028, "step": 3747 }, { "epoch": 0.12, "grad_norm": 0.5164811611175537, "learning_rate": 0.0005974458923898727, "loss": 1.9843, "step": 3748 }, { "epoch": 0.12, "grad_norm": 0.5296278595924377, "learning_rate": 0.0005974445312289415, "loss": 2.0912, "step": 3749 }, { "epoch": 0.12, "grad_norm": 0.5215593576431274, "learning_rate": 0.0005974431697069566, "loss": 2.0051, "step": 3750 }, { "epoch": 0.12, "grad_norm": 0.5309680700302124, "learning_rate": 0.0005974418078239195, "loss": 1.9581, "step": 3751 }, { "epoch": 0.12, "grad_norm": 0.5161581635475159, "learning_rate": 0.0005974404455798317, "loss": 2.0876, "step": 3752 }, { "epoch": 0.12, "grad_norm": 0.5170389413833618, "learning_rate": 0.0005974390829746951, "loss": 2.02, "step": 3753 }, { "epoch": 0.12, "grad_norm": 0.5697072148323059, "learning_rate": 0.0005974377200085114, "loss": 2.066, "step": 3754 }, { "epoch": 0.12, "grad_norm": 0.6731492877006531, "learning_rate": 0.0005974363566812821, "loss": 2.0396, "step": 3755 }, { "epoch": 0.12, "grad_norm": 0.5350959897041321, "learning_rate": 0.0005974349929930088, "loss": 1.9693, "step": 3756 }, { "epoch": 0.12, "grad_norm": 0.5399235486984253, "learning_rate": 0.0005974336289436932, "loss": 1.992, "step": 3757 }, { "epoch": 0.13, "grad_norm": 0.5484155416488647, "learning_rate": 0.000597432264533337, "loss": 2.0244, "step": 3758 }, { "epoch": 0.13, "grad_norm": 0.526899516582489, "learning_rate": 0.0005974308997619421, "loss": 2.0123, "step": 3759 }, { "epoch": 0.13, "grad_norm": 0.5455763936042786, "learning_rate": 0.0005974295346295096, "loss": 1.994, "step": 3760 }, { "epoch": 0.13, "grad_norm": 0.5414362549781799, "learning_rate": 0.0005974281691360417, "loss": 2.008, "step": 3761 }, { "epoch": 0.13, "grad_norm": 0.5196579694747925, "learning_rate": 0.0005974268032815397, "loss": 2.1153, "step": 3762 }, { "epoch": 0.13, "grad_norm": 0.5406076312065125, "learning_rate": 0.0005974254370660054, "loss": 1.9892, "step": 3763 }, { "epoch": 0.13, "grad_norm": 0.5840561985969543, "learning_rate": 0.0005974240704894405, "loss": 2.015, "step": 3764 }, { "epoch": 0.13, "grad_norm": 0.5203609466552734, "learning_rate": 0.0005974227035518466, "loss": 2.0044, "step": 3765 }, { "epoch": 0.13, "grad_norm": 0.5731021165847778, "learning_rate": 0.0005974213362532253, "loss": 2.0335, "step": 3766 }, { "epoch": 0.13, "grad_norm": 0.5421993732452393, "learning_rate": 0.0005974199685935784, "loss": 2.0052, "step": 3767 }, { "epoch": 0.13, "grad_norm": 0.5146188735961914, "learning_rate": 0.0005974186005729073, "loss": 1.9746, "step": 3768 }, { "epoch": 0.13, "grad_norm": 0.5410830974578857, "learning_rate": 0.0005974172321912141, "loss": 1.9836, "step": 3769 }, { "epoch": 0.13, "grad_norm": 0.5833935737609863, "learning_rate": 0.0005974158634485, "loss": 1.9451, "step": 3770 }, { "epoch": 0.13, "grad_norm": 0.5148859620094299, "learning_rate": 0.0005974144943447669, "loss": 2.0285, "step": 3771 }, { "epoch": 0.13, "grad_norm": 0.523733377456665, "learning_rate": 0.0005974131248800165, "loss": 2.0375, "step": 3772 }, { "epoch": 0.13, "grad_norm": 0.5414917469024658, "learning_rate": 0.0005974117550542504, "loss": 2.0483, "step": 3773 }, { "epoch": 0.13, "grad_norm": 0.5185580253601074, "learning_rate": 0.0005974103848674702, "loss": 1.9965, "step": 3774 }, { "epoch": 0.13, "grad_norm": 0.5399409532546997, "learning_rate": 0.0005974090143196776, "loss": 2.0014, "step": 3775 }, { "epoch": 0.13, "grad_norm": 0.5441557168960571, "learning_rate": 0.0005974076434108744, "loss": 2.0171, "step": 3776 }, { "epoch": 0.13, "grad_norm": 0.5296130776405334, "learning_rate": 0.0005974062721410619, "loss": 2.0326, "step": 3777 }, { "epoch": 0.13, "grad_norm": 0.5276041030883789, "learning_rate": 0.0005974049005102422, "loss": 2.0374, "step": 3778 }, { "epoch": 0.13, "grad_norm": 0.5500475764274597, "learning_rate": 0.0005974035285184167, "loss": 2.0274, "step": 3779 }, { "epoch": 0.13, "grad_norm": 0.5440822243690491, "learning_rate": 0.0005974021561655871, "loss": 2.1106, "step": 3780 }, { "epoch": 0.13, "grad_norm": 0.5466303825378418, "learning_rate": 0.0005974007834517552, "loss": 2.0532, "step": 3781 }, { "epoch": 0.13, "grad_norm": 0.5356682538986206, "learning_rate": 0.0005973994103769225, "loss": 2.0255, "step": 3782 }, { "epoch": 0.13, "grad_norm": 0.5126577615737915, "learning_rate": 0.0005973980369410908, "loss": 2.0573, "step": 3783 }, { "epoch": 0.13, "grad_norm": 0.5187552571296692, "learning_rate": 0.0005973966631442617, "loss": 1.9491, "step": 3784 }, { "epoch": 0.13, "grad_norm": 0.5407293438911438, "learning_rate": 0.0005973952889864368, "loss": 2.0298, "step": 3785 }, { "epoch": 0.13, "grad_norm": 0.5223588347434998, "learning_rate": 0.0005973939144676178, "loss": 2.0127, "step": 3786 }, { "epoch": 0.13, "grad_norm": 0.5183016061782837, "learning_rate": 0.0005973925395878065, "loss": 2.0455, "step": 3787 }, { "epoch": 0.13, "grad_norm": 0.5444128513336182, "learning_rate": 0.0005973911643470045, "loss": 1.9965, "step": 3788 }, { "epoch": 0.13, "grad_norm": 0.5538346767425537, "learning_rate": 0.0005973897887452134, "loss": 2.0672, "step": 3789 }, { "epoch": 0.13, "grad_norm": 0.5270431637763977, "learning_rate": 0.0005973884127824349, "loss": 2.0386, "step": 3790 }, { "epoch": 0.13, "grad_norm": 0.5691653490066528, "learning_rate": 0.0005973870364586706, "loss": 2.0146, "step": 3791 }, { "epoch": 0.13, "grad_norm": 0.5281660556793213, "learning_rate": 0.0005973856597739224, "loss": 1.9968, "step": 3792 }, { "epoch": 0.13, "grad_norm": 0.531266450881958, "learning_rate": 0.0005973842827281918, "loss": 1.9606, "step": 3793 }, { "epoch": 0.13, "grad_norm": 0.5482826828956604, "learning_rate": 0.0005973829053214805, "loss": 2.0842, "step": 3794 }, { "epoch": 0.13, "grad_norm": 0.5277251601219177, "learning_rate": 0.0005973815275537901, "loss": 1.9748, "step": 3795 }, { "epoch": 0.13, "grad_norm": 0.5420394539833069, "learning_rate": 0.0005973801494251223, "loss": 1.952, "step": 3796 }, { "epoch": 0.13, "grad_norm": 0.5344975590705872, "learning_rate": 0.0005973787709354791, "loss": 2.0391, "step": 3797 }, { "epoch": 0.13, "grad_norm": 0.5425069332122803, "learning_rate": 0.0005973773920848616, "loss": 1.9447, "step": 3798 }, { "epoch": 0.13, "grad_norm": 0.5426194071769714, "learning_rate": 0.0005973760128732718, "loss": 1.9151, "step": 3799 }, { "epoch": 0.13, "grad_norm": 0.545547604560852, "learning_rate": 0.0005973746333007115, "loss": 2.066, "step": 3800 }, { "epoch": 0.13, "grad_norm": 0.5253846049308777, "learning_rate": 0.000597373253367182, "loss": 2.0548, "step": 3801 }, { "epoch": 0.13, "grad_norm": 0.5528062582015991, "learning_rate": 0.0005973718730726854, "loss": 1.9775, "step": 3802 }, { "epoch": 0.13, "grad_norm": 0.5382221937179565, "learning_rate": 0.000597370492417223, "loss": 2.0408, "step": 3803 }, { "epoch": 0.13, "grad_norm": 0.5304912328720093, "learning_rate": 0.0005973691114007967, "loss": 2.042, "step": 3804 }, { "epoch": 0.13, "grad_norm": 0.5225939154624939, "learning_rate": 0.0005973677300234082, "loss": 2.0051, "step": 3805 }, { "epoch": 0.13, "grad_norm": 0.5293869972229004, "learning_rate": 0.0005973663482850589, "loss": 2.0473, "step": 3806 }, { "epoch": 0.13, "grad_norm": 0.5373232364654541, "learning_rate": 0.0005973649661857507, "loss": 2.0845, "step": 3807 }, { "epoch": 0.13, "grad_norm": 0.5447899103164673, "learning_rate": 0.0005973635837254854, "loss": 2.0467, "step": 3808 }, { "epoch": 0.13, "grad_norm": 0.5407389402389526, "learning_rate": 0.0005973622009042644, "loss": 2.0893, "step": 3809 }, { "epoch": 0.13, "grad_norm": 0.5467466115951538, "learning_rate": 0.0005973608177220896, "loss": 2.1262, "step": 3810 }, { "epoch": 0.13, "grad_norm": 0.5482038259506226, "learning_rate": 0.0005973594341789625, "loss": 1.9749, "step": 3811 }, { "epoch": 0.13, "grad_norm": 0.5401748418807983, "learning_rate": 0.0005973580502748849, "loss": 2.0612, "step": 3812 }, { "epoch": 0.13, "grad_norm": 0.5172351002693176, "learning_rate": 0.0005973566660098584, "loss": 2.0363, "step": 3813 }, { "epoch": 0.13, "grad_norm": 0.5783079862594604, "learning_rate": 0.0005973552813838847, "loss": 2.0218, "step": 3814 }, { "epoch": 0.13, "grad_norm": 0.5120468139648438, "learning_rate": 0.0005973538963969656, "loss": 1.9827, "step": 3815 }, { "epoch": 0.13, "grad_norm": 0.5224602818489075, "learning_rate": 0.0005973525110491026, "loss": 2.0422, "step": 3816 }, { "epoch": 0.13, "grad_norm": 0.5709393620491028, "learning_rate": 0.0005973511253402975, "loss": 1.9835, "step": 3817 }, { "epoch": 0.13, "grad_norm": 0.5224089026451111, "learning_rate": 0.0005973497392705518, "loss": 2.0986, "step": 3818 }, { "epoch": 0.13, "grad_norm": 0.4952325224876404, "learning_rate": 0.0005973483528398675, "loss": 1.9967, "step": 3819 }, { "epoch": 0.13, "grad_norm": 0.5771409869194031, "learning_rate": 0.0005973469660482462, "loss": 1.9888, "step": 3820 }, { "epoch": 0.13, "grad_norm": 0.5266734957695007, "learning_rate": 0.0005973455788956893, "loss": 2.0478, "step": 3821 }, { "epoch": 0.13, "grad_norm": 0.5236003398895264, "learning_rate": 0.0005973441913821988, "loss": 2.023, "step": 3822 }, { "epoch": 0.13, "grad_norm": 0.5406265258789062, "learning_rate": 0.0005973428035077762, "loss": 2.0735, "step": 3823 }, { "epoch": 0.13, "grad_norm": 0.522070050239563, "learning_rate": 0.0005973414152724233, "loss": 1.9894, "step": 3824 }, { "epoch": 0.13, "grad_norm": 0.5265271067619324, "learning_rate": 0.0005973400266761417, "loss": 2.1027, "step": 3825 }, { "epoch": 0.13, "grad_norm": 0.5047549605369568, "learning_rate": 0.0005973386377189331, "loss": 2.0092, "step": 3826 }, { "epoch": 0.13, "grad_norm": 0.5188745260238647, "learning_rate": 0.0005973372484007991, "loss": 1.965, "step": 3827 }, { "epoch": 0.13, "grad_norm": 0.5283282995223999, "learning_rate": 0.0005973358587217417, "loss": 2.0339, "step": 3828 }, { "epoch": 0.13, "grad_norm": 0.5328822135925293, "learning_rate": 0.0005973344686817623, "loss": 2.0081, "step": 3829 }, { "epoch": 0.13, "grad_norm": 0.5567528605461121, "learning_rate": 0.0005973330782808626, "loss": 2.0181, "step": 3830 }, { "epoch": 0.13, "grad_norm": 0.5062286853790283, "learning_rate": 0.0005973316875190444, "loss": 2.0007, "step": 3831 }, { "epoch": 0.13, "grad_norm": 0.5549301505088806, "learning_rate": 0.0005973302963963094, "loss": 1.9827, "step": 3832 }, { "epoch": 0.13, "grad_norm": 0.602079451084137, "learning_rate": 0.0005973289049126591, "loss": 2.0139, "step": 3833 }, { "epoch": 0.13, "grad_norm": 0.5187187194824219, "learning_rate": 0.0005973275130680954, "loss": 1.9662, "step": 3834 }, { "epoch": 0.13, "grad_norm": 0.5562520623207092, "learning_rate": 0.00059732612086262, "loss": 2.0681, "step": 3835 }, { "epoch": 0.13, "grad_norm": 0.5688536167144775, "learning_rate": 0.0005973247282962343, "loss": 1.9869, "step": 3836 }, { "epoch": 0.13, "grad_norm": 0.5504691004753113, "learning_rate": 0.0005973233353689404, "loss": 2.0383, "step": 3837 }, { "epoch": 0.13, "grad_norm": 0.527721107006073, "learning_rate": 0.0005973219420807395, "loss": 2.0467, "step": 3838 }, { "epoch": 0.13, "grad_norm": 0.5642004013061523, "learning_rate": 0.0005973205484316338, "loss": 1.9898, "step": 3839 }, { "epoch": 0.13, "grad_norm": 0.527667760848999, "learning_rate": 0.0005973191544216247, "loss": 1.9678, "step": 3840 }, { "epoch": 0.13, "grad_norm": 0.5447821617126465, "learning_rate": 0.000597317760050714, "loss": 2.0335, "step": 3841 }, { "epoch": 0.13, "grad_norm": 0.5383490324020386, "learning_rate": 0.0005973163653189032, "loss": 1.9858, "step": 3842 }, { "epoch": 0.13, "grad_norm": 0.5284570455551147, "learning_rate": 0.0005973149702261944, "loss": 2.0635, "step": 3843 }, { "epoch": 0.13, "grad_norm": 0.506225049495697, "learning_rate": 0.0005973135747725888, "loss": 2.0715, "step": 3844 }, { "epoch": 0.13, "grad_norm": 0.5415328741073608, "learning_rate": 0.0005973121789580884, "loss": 1.9787, "step": 3845 }, { "epoch": 0.13, "grad_norm": 0.5248147249221802, "learning_rate": 0.0005973107827826949, "loss": 2.0185, "step": 3846 }, { "epoch": 0.13, "grad_norm": 0.4927395284175873, "learning_rate": 0.0005973093862464097, "loss": 1.9984, "step": 3847 }, { "epoch": 0.13, "grad_norm": 0.5184661746025085, "learning_rate": 0.000597307989349235, "loss": 2.006, "step": 3848 }, { "epoch": 0.13, "grad_norm": 0.5096274614334106, "learning_rate": 0.000597306592091172, "loss": 1.9125, "step": 3849 }, { "epoch": 0.13, "grad_norm": 0.517440915107727, "learning_rate": 0.0005973051944722226, "loss": 1.9798, "step": 3850 }, { "epoch": 0.13, "grad_norm": 0.5180330872535706, "learning_rate": 0.0005973037964923886, "loss": 2.0151, "step": 3851 }, { "epoch": 0.13, "grad_norm": 0.5137227177619934, "learning_rate": 0.0005973023981516716, "loss": 2.005, "step": 3852 }, { "epoch": 0.13, "grad_norm": 0.5107312202453613, "learning_rate": 0.0005973009994500732, "loss": 2.029, "step": 3853 }, { "epoch": 0.13, "grad_norm": 0.525324821472168, "learning_rate": 0.0005972996003875952, "loss": 2.0456, "step": 3854 }, { "epoch": 0.13, "grad_norm": 0.5214062929153442, "learning_rate": 0.0005972982009642394, "loss": 1.9617, "step": 3855 }, { "epoch": 0.13, "grad_norm": 0.5040543079376221, "learning_rate": 0.0005972968011800074, "loss": 1.9966, "step": 3856 }, { "epoch": 0.13, "grad_norm": 0.5116940140724182, "learning_rate": 0.0005972954010349008, "loss": 1.9584, "step": 3857 }, { "epoch": 0.13, "grad_norm": 0.521060049533844, "learning_rate": 0.0005972940005289214, "loss": 2.0042, "step": 3858 }, { "epoch": 0.13, "grad_norm": 0.5379145741462708, "learning_rate": 0.0005972925996620709, "loss": 2.0993, "step": 3859 }, { "epoch": 0.13, "grad_norm": 0.5711097717285156, "learning_rate": 0.0005972911984343509, "loss": 2.0714, "step": 3860 }, { "epoch": 0.13, "grad_norm": 0.5200422406196594, "learning_rate": 0.0005972897968457633, "loss": 2.0579, "step": 3861 }, { "epoch": 0.13, "grad_norm": 0.5410149693489075, "learning_rate": 0.0005972883948963097, "loss": 1.9936, "step": 3862 }, { "epoch": 0.13, "grad_norm": 0.5094969868659973, "learning_rate": 0.0005972869925859917, "loss": 2.1381, "step": 3863 }, { "epoch": 0.13, "grad_norm": 0.54707932472229, "learning_rate": 0.0005972855899148111, "loss": 2.0908, "step": 3864 }, { "epoch": 0.13, "grad_norm": 0.5459312200546265, "learning_rate": 0.0005972841868827697, "loss": 1.9939, "step": 3865 }, { "epoch": 0.13, "grad_norm": 0.5276985764503479, "learning_rate": 0.000597282783489869, "loss": 1.9246, "step": 3866 }, { "epoch": 0.13, "grad_norm": 0.5125168561935425, "learning_rate": 0.0005972813797361108, "loss": 1.9524, "step": 3867 }, { "epoch": 0.13, "grad_norm": 0.6228373646736145, "learning_rate": 0.0005972799756214969, "loss": 2.0421, "step": 3868 }, { "epoch": 0.13, "grad_norm": 0.5212376117706299, "learning_rate": 0.0005972785711460288, "loss": 2.0142, "step": 3869 }, { "epoch": 0.13, "grad_norm": 0.5289812088012695, "learning_rate": 0.0005972771663097084, "loss": 1.9637, "step": 3870 }, { "epoch": 0.13, "grad_norm": 0.5546970963478088, "learning_rate": 0.0005972757611125373, "loss": 2.03, "step": 3871 }, { "epoch": 0.13, "grad_norm": 0.5471474528312683, "learning_rate": 0.0005972743555545172, "loss": 2.0596, "step": 3872 }, { "epoch": 0.13, "grad_norm": 0.5427412390708923, "learning_rate": 0.0005972729496356499, "loss": 2.0269, "step": 3873 }, { "epoch": 0.13, "grad_norm": 0.5544206500053406, "learning_rate": 0.000597271543355937, "loss": 2.0127, "step": 3874 }, { "epoch": 0.13, "grad_norm": 0.5487514734268188, "learning_rate": 0.0005972701367153802, "loss": 2.0208, "step": 3875 }, { "epoch": 0.13, "grad_norm": 0.5237120985984802, "learning_rate": 0.0005972687297139814, "loss": 2.0781, "step": 3876 }, { "epoch": 0.13, "grad_norm": 0.544416069984436, "learning_rate": 0.000597267322351742, "loss": 2.0208, "step": 3877 }, { "epoch": 0.13, "grad_norm": 0.5232492089271545, "learning_rate": 0.0005972659146286641, "loss": 2.0258, "step": 3878 }, { "epoch": 0.13, "grad_norm": 0.5275058746337891, "learning_rate": 0.0005972645065447489, "loss": 2.0295, "step": 3879 }, { "epoch": 0.13, "grad_norm": 0.5194396376609802, "learning_rate": 0.0005972630980999985, "loss": 2.0207, "step": 3880 }, { "epoch": 0.13, "grad_norm": 0.5333322286605835, "learning_rate": 0.0005972616892944145, "loss": 1.9487, "step": 3881 }, { "epoch": 0.13, "grad_norm": 0.5299907922744751, "learning_rate": 0.0005972602801279987, "loss": 2.0234, "step": 3882 }, { "epoch": 0.13, "grad_norm": 0.5245811343193054, "learning_rate": 0.0005972588706007525, "loss": 2.0119, "step": 3883 }, { "epoch": 0.13, "grad_norm": 0.5150949954986572, "learning_rate": 0.000597257460712678, "loss": 2.0122, "step": 3884 }, { "epoch": 0.13, "grad_norm": 0.5329829454421997, "learning_rate": 0.0005972560504637767, "loss": 2.0452, "step": 3885 }, { "epoch": 0.13, "grad_norm": 0.5257842540740967, "learning_rate": 0.0005972546398540504, "loss": 1.9937, "step": 3886 }, { "epoch": 0.13, "grad_norm": 0.5219881534576416, "learning_rate": 0.0005972532288835006, "loss": 2.0292, "step": 3887 }, { "epoch": 0.13, "grad_norm": 0.5338958501815796, "learning_rate": 0.0005972518175521294, "loss": 2.0867, "step": 3888 }, { "epoch": 0.13, "grad_norm": 0.5131230354309082, "learning_rate": 0.0005972504058599381, "loss": 2.0275, "step": 3889 }, { "epoch": 0.13, "grad_norm": 0.5272248387336731, "learning_rate": 0.0005972489938069288, "loss": 2.0298, "step": 3890 }, { "epoch": 0.13, "grad_norm": 0.5311300754547119, "learning_rate": 0.0005972475813931029, "loss": 2.0596, "step": 3891 }, { "epoch": 0.13, "grad_norm": 0.5160938501358032, "learning_rate": 0.0005972461686184622, "loss": 1.9713, "step": 3892 }, { "epoch": 0.13, "grad_norm": 0.5030280351638794, "learning_rate": 0.0005972447554830085, "loss": 1.9851, "step": 3893 }, { "epoch": 0.13, "grad_norm": 0.5407194495201111, "learning_rate": 0.0005972433419867434, "loss": 1.9811, "step": 3894 }, { "epoch": 0.13, "grad_norm": 0.5178926587104797, "learning_rate": 0.0005972419281296688, "loss": 1.9953, "step": 3895 }, { "epoch": 0.13, "grad_norm": 0.5159750580787659, "learning_rate": 0.0005972405139117864, "loss": 2.0322, "step": 3896 }, { "epoch": 0.13, "grad_norm": 0.5183881521224976, "learning_rate": 0.0005972390993330976, "loss": 2.0163, "step": 3897 }, { "epoch": 0.13, "grad_norm": 0.5099519491195679, "learning_rate": 0.0005972376843936043, "loss": 2.1054, "step": 3898 }, { "epoch": 0.13, "grad_norm": 0.5296371579170227, "learning_rate": 0.0005972362690933083, "loss": 1.9622, "step": 3899 }, { "epoch": 0.13, "grad_norm": 0.5236100554466248, "learning_rate": 0.0005972348534322114, "loss": 2.0556, "step": 3900 }, { "epoch": 0.13, "grad_norm": 0.5025546550750732, "learning_rate": 0.000597233437410315, "loss": 2.0228, "step": 3901 }, { "epoch": 0.13, "grad_norm": 0.5613288283348083, "learning_rate": 0.000597232021027621, "loss": 2.0721, "step": 3902 }, { "epoch": 0.13, "grad_norm": 0.5638744235038757, "learning_rate": 0.0005972306042841313, "loss": 1.9848, "step": 3903 }, { "epoch": 0.13, "grad_norm": 0.512919008731842, "learning_rate": 0.0005972291871798474, "loss": 1.9913, "step": 3904 }, { "epoch": 0.13, "grad_norm": 0.5722904205322266, "learning_rate": 0.0005972277697147708, "loss": 1.95, "step": 3905 }, { "epoch": 0.13, "grad_norm": 0.5412652492523193, "learning_rate": 0.0005972263518889038, "loss": 1.9829, "step": 3906 }, { "epoch": 0.13, "grad_norm": 0.52260822057724, "learning_rate": 0.0005972249337022476, "loss": 2.026, "step": 3907 }, { "epoch": 0.13, "grad_norm": 0.539524257183075, "learning_rate": 0.0005972235151548043, "loss": 2.0693, "step": 3908 }, { "epoch": 0.13, "grad_norm": 0.628760039806366, "learning_rate": 0.0005972220962465754, "loss": 2.0465, "step": 3909 }, { "epoch": 0.13, "grad_norm": 0.5629053711891174, "learning_rate": 0.0005972206769775626, "loss": 1.9437, "step": 3910 }, { "epoch": 0.13, "grad_norm": 0.5153351426124573, "learning_rate": 0.0005972192573477677, "loss": 2.0068, "step": 3911 }, { "epoch": 0.13, "grad_norm": 0.5382303595542908, "learning_rate": 0.0005972178373571925, "loss": 2.0119, "step": 3912 }, { "epoch": 0.13, "grad_norm": 0.5651852488517761, "learning_rate": 0.0005972164170058385, "loss": 2.1068, "step": 3913 }, { "epoch": 0.13, "grad_norm": 0.5404036641120911, "learning_rate": 0.0005972149962937076, "loss": 2.0294, "step": 3914 }, { "epoch": 0.13, "grad_norm": 0.5135457515716553, "learning_rate": 0.0005972135752208016, "loss": 2.0473, "step": 3915 }, { "epoch": 0.13, "grad_norm": 0.530033528804779, "learning_rate": 0.000597212153787122, "loss": 2.0112, "step": 3916 }, { "epoch": 0.13, "grad_norm": 0.4998719394207001, "learning_rate": 0.0005972107319926706, "loss": 2.0258, "step": 3917 }, { "epoch": 0.13, "grad_norm": 0.5499340891838074, "learning_rate": 0.0005972093098374493, "loss": 2.0539, "step": 3918 }, { "epoch": 0.13, "grad_norm": 0.5246642827987671, "learning_rate": 0.0005972078873214597, "loss": 1.9764, "step": 3919 }, { "epoch": 0.13, "grad_norm": 0.5136931538581848, "learning_rate": 0.0005972064644447034, "loss": 2.0235, "step": 3920 }, { "epoch": 0.13, "grad_norm": 0.5473141074180603, "learning_rate": 0.0005972050412071823, "loss": 2.1191, "step": 3921 }, { "epoch": 0.13, "grad_norm": 0.5218515992164612, "learning_rate": 0.0005972036176088981, "loss": 2.05, "step": 3922 }, { "epoch": 0.13, "grad_norm": 0.5295936465263367, "learning_rate": 0.0005972021936498524, "loss": 1.9862, "step": 3923 }, { "epoch": 0.13, "grad_norm": 0.5504136085510254, "learning_rate": 0.000597200769330047, "loss": 2.0486, "step": 3924 }, { "epoch": 0.13, "grad_norm": 0.5297231674194336, "learning_rate": 0.0005971993446494837, "loss": 2.0794, "step": 3925 }, { "epoch": 0.13, "grad_norm": 0.6178233027458191, "learning_rate": 0.0005971979196081643, "loss": 2.0749, "step": 3926 }, { "epoch": 0.13, "grad_norm": 0.5328366756439209, "learning_rate": 0.0005971964942060904, "loss": 2.005, "step": 3927 }, { "epoch": 0.13, "grad_norm": 0.5382066369056702, "learning_rate": 0.0005971950684432637, "loss": 2.0205, "step": 3928 }, { "epoch": 0.13, "grad_norm": 0.5281222462654114, "learning_rate": 0.0005971936423196859, "loss": 2.0021, "step": 3929 }, { "epoch": 0.13, "grad_norm": 0.5232406854629517, "learning_rate": 0.0005971922158353589, "loss": 2.002, "step": 3930 }, { "epoch": 0.13, "grad_norm": 0.5236680507659912, "learning_rate": 0.0005971907889902842, "loss": 1.9797, "step": 3931 }, { "epoch": 0.13, "grad_norm": 0.5249514579772949, "learning_rate": 0.0005971893617844639, "loss": 2.0756, "step": 3932 }, { "epoch": 0.13, "grad_norm": 0.5303453207015991, "learning_rate": 0.0005971879342178993, "loss": 1.9463, "step": 3933 }, { "epoch": 0.13, "grad_norm": 0.5850698947906494, "learning_rate": 0.0005971865062905924, "loss": 2.0374, "step": 3934 }, { "epoch": 0.13, "grad_norm": 0.5172274112701416, "learning_rate": 0.0005971850780025449, "loss": 2.0264, "step": 3935 }, { "epoch": 0.13, "grad_norm": 0.5355613231658936, "learning_rate": 0.0005971836493537585, "loss": 2.0681, "step": 3936 }, { "epoch": 0.13, "grad_norm": 0.5078045129776001, "learning_rate": 0.000597182220344235, "loss": 1.9995, "step": 3937 }, { "epoch": 0.13, "grad_norm": 0.5067571401596069, "learning_rate": 0.0005971807909739759, "loss": 2.0671, "step": 3938 }, { "epoch": 0.13, "grad_norm": 0.5209343433380127, "learning_rate": 0.0005971793612429832, "loss": 2.0614, "step": 3939 }, { "epoch": 0.13, "grad_norm": 0.5183846354484558, "learning_rate": 0.0005971779311512586, "loss": 1.9953, "step": 3940 }, { "epoch": 0.13, "grad_norm": 0.5380080938339233, "learning_rate": 0.0005971765006988038, "loss": 1.9821, "step": 3941 }, { "epoch": 0.13, "grad_norm": 0.5196939706802368, "learning_rate": 0.0005971750698856204, "loss": 2.0401, "step": 3942 }, { "epoch": 0.13, "grad_norm": 0.5223098993301392, "learning_rate": 0.0005971736387117103, "loss": 2.0313, "step": 3943 }, { "epoch": 0.13, "grad_norm": 0.5644048452377319, "learning_rate": 0.0005971722071770752, "loss": 2.0486, "step": 3944 }, { "epoch": 0.13, "grad_norm": 0.5176892876625061, "learning_rate": 0.0005971707752817167, "loss": 2.038, "step": 3945 }, { "epoch": 0.13, "grad_norm": 0.5357971787452698, "learning_rate": 0.0005971693430256369, "loss": 2.0258, "step": 3946 }, { "epoch": 0.13, "grad_norm": 0.5166706442832947, "learning_rate": 0.0005971679104088372, "loss": 2.0313, "step": 3947 }, { "epoch": 0.13, "grad_norm": 0.5123515129089355, "learning_rate": 0.0005971664774313194, "loss": 2.0393, "step": 3948 }, { "epoch": 0.13, "grad_norm": 0.5243241786956787, "learning_rate": 0.0005971650440930853, "loss": 2.0335, "step": 3949 }, { "epoch": 0.13, "grad_norm": 0.5176131725311279, "learning_rate": 0.0005971636103941365, "loss": 1.9936, "step": 3950 }, { "epoch": 0.13, "grad_norm": 0.5242773294448853, "learning_rate": 0.0005971621763344751, "loss": 1.9478, "step": 3951 }, { "epoch": 0.13, "grad_norm": 0.502890408039093, "learning_rate": 0.0005971607419141024, "loss": 2.04, "step": 3952 }, { "epoch": 0.13, "grad_norm": 0.5275682806968689, "learning_rate": 0.0005971593071330205, "loss": 2.0344, "step": 3953 }, { "epoch": 0.13, "grad_norm": 0.5136430263519287, "learning_rate": 0.0005971578719912309, "loss": 1.9816, "step": 3954 }, { "epoch": 0.13, "grad_norm": 0.5083838701248169, "learning_rate": 0.0005971564364887355, "loss": 1.9988, "step": 3955 }, { "epoch": 0.13, "grad_norm": 0.5521374344825745, "learning_rate": 0.0005971550006255359, "loss": 2.0164, "step": 3956 }, { "epoch": 0.13, "grad_norm": 0.5362189412117004, "learning_rate": 0.0005971535644016339, "loss": 2.0067, "step": 3957 }, { "epoch": 0.13, "grad_norm": 0.517987847328186, "learning_rate": 0.0005971521278170313, "loss": 2.0142, "step": 3958 }, { "epoch": 0.13, "grad_norm": 0.5196172595024109, "learning_rate": 0.0005971506908717298, "loss": 1.997, "step": 3959 }, { "epoch": 0.13, "grad_norm": 0.5382868051528931, "learning_rate": 0.0005971492535657312, "loss": 1.9995, "step": 3960 }, { "epoch": 0.13, "grad_norm": 0.5268386602401733, "learning_rate": 0.000597147815899037, "loss": 1.9904, "step": 3961 }, { "epoch": 0.13, "grad_norm": 0.5257652997970581, "learning_rate": 0.0005971463778716493, "loss": 2.0574, "step": 3962 }, { "epoch": 0.13, "grad_norm": 0.5150201320648193, "learning_rate": 0.0005971449394835697, "loss": 2.0179, "step": 3963 }, { "epoch": 0.13, "grad_norm": 0.5303911566734314, "learning_rate": 0.0005971435007347998, "loss": 2.0576, "step": 3964 }, { "epoch": 0.13, "grad_norm": 0.49465832114219666, "learning_rate": 0.0005971420616253416, "loss": 1.9435, "step": 3965 }, { "epoch": 0.13, "grad_norm": 0.5130664110183716, "learning_rate": 0.0005971406221551966, "loss": 1.9618, "step": 3966 }, { "epoch": 0.13, "grad_norm": 0.5178889036178589, "learning_rate": 0.0005971391823243668, "loss": 2.0384, "step": 3967 }, { "epoch": 0.13, "grad_norm": 0.5387059450149536, "learning_rate": 0.0005971377421328537, "loss": 2.0233, "step": 3968 }, { "epoch": 0.13, "grad_norm": 0.5098519921302795, "learning_rate": 0.0005971363015806592, "loss": 1.9845, "step": 3969 }, { "epoch": 0.13, "grad_norm": 0.5146161913871765, "learning_rate": 0.000597134860667785, "loss": 2.066, "step": 3970 }, { "epoch": 0.13, "grad_norm": 0.5511572360992432, "learning_rate": 0.0005971334193942329, "loss": 2.0076, "step": 3971 }, { "epoch": 0.13, "grad_norm": 0.5138170719146729, "learning_rate": 0.0005971319777600045, "loss": 1.9747, "step": 3972 }, { "epoch": 0.13, "grad_norm": 0.5114831924438477, "learning_rate": 0.0005971305357651018, "loss": 2.0642, "step": 3973 }, { "epoch": 0.13, "grad_norm": 0.5017498731613159, "learning_rate": 0.0005971290934095263, "loss": 1.9582, "step": 3974 }, { "epoch": 0.13, "grad_norm": 0.5146438479423523, "learning_rate": 0.0005971276506932799, "loss": 2.0527, "step": 3975 }, { "epoch": 0.13, "grad_norm": 0.5188406705856323, "learning_rate": 0.0005971262076163643, "loss": 2.0392, "step": 3976 }, { "epoch": 0.13, "grad_norm": 0.5048542618751526, "learning_rate": 0.0005971247641787813, "loss": 1.9706, "step": 3977 }, { "epoch": 0.13, "grad_norm": 0.5188419222831726, "learning_rate": 0.0005971233203805326, "loss": 2.0129, "step": 3978 }, { "epoch": 0.13, "grad_norm": 0.5172885656356812, "learning_rate": 0.0005971218762216199, "loss": 2.0668, "step": 3979 }, { "epoch": 0.13, "grad_norm": 0.5186945199966431, "learning_rate": 0.0005971204317020451, "loss": 2.064, "step": 3980 }, { "epoch": 0.13, "grad_norm": 0.5047842860221863, "learning_rate": 0.0005971189868218099, "loss": 1.9875, "step": 3981 }, { "epoch": 0.13, "grad_norm": 0.5061058402061462, "learning_rate": 0.0005971175415809161, "loss": 1.9908, "step": 3982 }, { "epoch": 0.13, "grad_norm": 0.5109639167785645, "learning_rate": 0.0005971160959793653, "loss": 2.0741, "step": 3983 }, { "epoch": 0.13, "grad_norm": 0.5120142698287964, "learning_rate": 0.0005971146500171592, "loss": 2.0005, "step": 3984 }, { "epoch": 0.13, "grad_norm": 0.5233996510505676, "learning_rate": 0.0005971132036942997, "loss": 2.0639, "step": 3985 }, { "epoch": 0.13, "grad_norm": 0.5106987953186035, "learning_rate": 0.0005971117570107887, "loss": 2.066, "step": 3986 }, { "epoch": 0.13, "grad_norm": 0.5271971225738525, "learning_rate": 0.0005971103099666278, "loss": 2.0225, "step": 3987 }, { "epoch": 0.13, "grad_norm": 0.5341157913208008, "learning_rate": 0.0005971088625618187, "loss": 2.0182, "step": 3988 }, { "epoch": 0.13, "grad_norm": 0.5228694081306458, "learning_rate": 0.0005971074147963632, "loss": 2.0661, "step": 3989 }, { "epoch": 0.13, "grad_norm": 0.5067697763442993, "learning_rate": 0.0005971059666702632, "loss": 1.9945, "step": 3990 }, { "epoch": 0.13, "grad_norm": 0.5059853196144104, "learning_rate": 0.0005971045181835202, "loss": 2.064, "step": 3991 }, { "epoch": 0.13, "grad_norm": 0.5067079663276672, "learning_rate": 0.0005971030693361361, "loss": 2.0382, "step": 3992 }, { "epoch": 0.13, "grad_norm": 0.4966829717159271, "learning_rate": 0.0005971016201281128, "loss": 1.967, "step": 3993 }, { "epoch": 0.13, "grad_norm": 0.5176767706871033, "learning_rate": 0.0005971001705594517, "loss": 1.9691, "step": 3994 }, { "epoch": 0.13, "grad_norm": 0.5127904415130615, "learning_rate": 0.0005970987206301548, "loss": 1.9882, "step": 3995 }, { "epoch": 0.13, "grad_norm": 0.5410730838775635, "learning_rate": 0.0005970972703402239, "loss": 1.9964, "step": 3996 }, { "epoch": 0.13, "grad_norm": 0.5267905592918396, "learning_rate": 0.0005970958196896607, "loss": 2.0517, "step": 3997 }, { "epoch": 0.13, "grad_norm": 0.5238786339759827, "learning_rate": 0.0005970943686784669, "loss": 1.9932, "step": 3998 }, { "epoch": 0.13, "grad_norm": 0.5299331545829773, "learning_rate": 0.0005970929173066444, "loss": 1.9592, "step": 3999 }, { "epoch": 0.13, "grad_norm": 0.5113624334335327, "learning_rate": 0.0005970914655741948, "loss": 2.058, "step": 4000 }, { "epoch": 0.13, "grad_norm": 0.521473228931427, "learning_rate": 0.00059709001348112, "loss": 1.899, "step": 4001 }, { "epoch": 0.13, "grad_norm": 0.5126468539237976, "learning_rate": 0.0005970885610274216, "loss": 1.9564, "step": 4002 }, { "epoch": 0.13, "grad_norm": 0.5145747065544128, "learning_rate": 0.0005970871082131015, "loss": 1.9692, "step": 4003 }, { "epoch": 0.13, "grad_norm": 0.5050252676010132, "learning_rate": 0.0005970856550381614, "loss": 2.0295, "step": 4004 }, { "epoch": 0.13, "grad_norm": 0.5490584373474121, "learning_rate": 0.0005970842015026031, "loss": 2.0387, "step": 4005 }, { "epoch": 0.13, "grad_norm": 0.519038736820221, "learning_rate": 0.0005970827476064283, "loss": 2.0538, "step": 4006 }, { "epoch": 0.13, "grad_norm": 0.5204393863677979, "learning_rate": 0.000597081293349639, "loss": 2.0285, "step": 4007 }, { "epoch": 0.13, "grad_norm": 0.521731436252594, "learning_rate": 0.0005970798387322367, "loss": 2.0037, "step": 4008 }, { "epoch": 0.13, "grad_norm": 0.5466792583465576, "learning_rate": 0.0005970783837542232, "loss": 1.9687, "step": 4009 }, { "epoch": 0.13, "grad_norm": 0.5179443359375, "learning_rate": 0.0005970769284156004, "loss": 1.9897, "step": 4010 }, { "epoch": 0.13, "grad_norm": 0.5418857932090759, "learning_rate": 0.0005970754727163699, "loss": 2.0371, "step": 4011 }, { "epoch": 0.13, "grad_norm": 0.5319607853889465, "learning_rate": 0.0005970740166565335, "loss": 2.002, "step": 4012 }, { "epoch": 0.13, "grad_norm": 0.5491147637367249, "learning_rate": 0.0005970725602360931, "loss": 1.9701, "step": 4013 }, { "epoch": 0.13, "grad_norm": 0.5200604200363159, "learning_rate": 0.0005970711034550503, "loss": 1.9813, "step": 4014 }, { "epoch": 0.13, "grad_norm": 0.5703094601631165, "learning_rate": 0.0005970696463134071, "loss": 2.0948, "step": 4015 }, { "epoch": 0.13, "grad_norm": 0.5318568348884583, "learning_rate": 0.000597068188811165, "loss": 1.8987, "step": 4016 }, { "epoch": 0.13, "grad_norm": 0.5168935060501099, "learning_rate": 0.0005970667309483259, "loss": 2.0677, "step": 4017 }, { "epoch": 0.13, "grad_norm": 0.5045108199119568, "learning_rate": 0.0005970652727248916, "loss": 1.9936, "step": 4018 }, { "epoch": 0.13, "grad_norm": 0.5300588607788086, "learning_rate": 0.0005970638141408638, "loss": 1.9577, "step": 4019 }, { "epoch": 0.13, "grad_norm": 0.5176761150360107, "learning_rate": 0.0005970623551962443, "loss": 1.9997, "step": 4020 }, { "epoch": 0.13, "grad_norm": 0.5217195749282837, "learning_rate": 0.0005970608958910349, "loss": 1.9741, "step": 4021 }, { "epoch": 0.13, "grad_norm": 0.5147565007209778, "learning_rate": 0.0005970594362252373, "loss": 2.0139, "step": 4022 }, { "epoch": 0.13, "grad_norm": 0.522641658782959, "learning_rate": 0.0005970579761988533, "loss": 2.0569, "step": 4023 }, { "epoch": 0.13, "grad_norm": 0.5146448612213135, "learning_rate": 0.0005970565158118847, "loss": 2.0524, "step": 4024 }, { "epoch": 0.13, "grad_norm": 0.5138567686080933, "learning_rate": 0.0005970550550643334, "loss": 2.0344, "step": 4025 }, { "epoch": 0.13, "grad_norm": 0.5203951001167297, "learning_rate": 0.0005970535939562009, "loss": 1.9917, "step": 4026 }, { "epoch": 0.13, "grad_norm": 0.519225001335144, "learning_rate": 0.0005970521324874891, "loss": 1.9695, "step": 4027 }, { "epoch": 0.13, "grad_norm": 0.5176544785499573, "learning_rate": 0.0005970506706581999, "loss": 1.975, "step": 4028 }, { "epoch": 0.13, "grad_norm": 0.5671565532684326, "learning_rate": 0.0005970492084683348, "loss": 2.1161, "step": 4029 }, { "epoch": 0.13, "grad_norm": 0.5422240495681763, "learning_rate": 0.0005970477459178958, "loss": 2.063, "step": 4030 }, { "epoch": 0.13, "grad_norm": 0.5475664734840393, "learning_rate": 0.0005970462830068846, "loss": 1.9954, "step": 4031 }, { "epoch": 0.13, "grad_norm": 0.519891619682312, "learning_rate": 0.000597044819735303, "loss": 2.0113, "step": 4032 }, { "epoch": 0.13, "grad_norm": 0.5086625814437866, "learning_rate": 0.0005970433561031526, "loss": 1.8728, "step": 4033 }, { "epoch": 0.13, "grad_norm": 0.5135771036148071, "learning_rate": 0.0005970418921104355, "loss": 1.919, "step": 4034 }, { "epoch": 0.13, "grad_norm": 0.5436887145042419, "learning_rate": 0.0005970404277571533, "loss": 2.0569, "step": 4035 }, { "epoch": 0.13, "grad_norm": 0.5409331917762756, "learning_rate": 0.0005970389630433078, "loss": 1.9919, "step": 4036 }, { "epoch": 0.13, "grad_norm": 0.542194664478302, "learning_rate": 0.0005970374979689006, "loss": 2.1535, "step": 4037 }, { "epoch": 0.13, "grad_norm": 0.5247593522071838, "learning_rate": 0.0005970360325339338, "loss": 2.0896, "step": 4038 }, { "epoch": 0.13, "grad_norm": 0.5214673280715942, "learning_rate": 0.000597034566738409, "loss": 2.0618, "step": 4039 }, { "epoch": 0.13, "grad_norm": 0.5145970582962036, "learning_rate": 0.000597033100582328, "loss": 2.0472, "step": 4040 }, { "epoch": 0.13, "grad_norm": 0.5364016890525818, "learning_rate": 0.0005970316340656926, "loss": 2.0297, "step": 4041 }, { "epoch": 0.13, "grad_norm": 0.5332076549530029, "learning_rate": 0.0005970301671885046, "loss": 1.9846, "step": 4042 }, { "epoch": 0.13, "grad_norm": 0.5056379437446594, "learning_rate": 0.0005970286999507656, "loss": 1.9481, "step": 4043 }, { "epoch": 0.13, "grad_norm": 0.5054441094398499, "learning_rate": 0.0005970272323524776, "loss": 2.0575, "step": 4044 }, { "epoch": 0.13, "grad_norm": 0.5223585963249207, "learning_rate": 0.0005970257643936423, "loss": 1.9592, "step": 4045 }, { "epoch": 0.13, "grad_norm": 0.5289514660835266, "learning_rate": 0.0005970242960742615, "loss": 2.0075, "step": 4046 }, { "epoch": 0.13, "grad_norm": 0.5164185166358948, "learning_rate": 0.000597022827394337, "loss": 2.0545, "step": 4047 }, { "epoch": 0.13, "grad_norm": 0.5173777937889099, "learning_rate": 0.0005970213583538704, "loss": 2.0111, "step": 4048 }, { "epoch": 0.13, "grad_norm": 0.5173216462135315, "learning_rate": 0.0005970198889528639, "loss": 2.0742, "step": 4049 }, { "epoch": 0.13, "grad_norm": 0.5576434135437012, "learning_rate": 0.0005970184191913189, "loss": 1.998, "step": 4050 }, { "epoch": 0.13, "grad_norm": 0.534882128238678, "learning_rate": 0.0005970169490692372, "loss": 2.0357, "step": 4051 }, { "epoch": 0.13, "grad_norm": 0.5249670147895813, "learning_rate": 0.0005970154785866207, "loss": 1.8721, "step": 4052 }, { "epoch": 0.13, "grad_norm": 0.529099702835083, "learning_rate": 0.0005970140077434713, "loss": 2.028, "step": 4053 }, { "epoch": 0.13, "grad_norm": 0.5036508440971375, "learning_rate": 0.0005970125365397905, "loss": 2.0004, "step": 4054 }, { "epoch": 0.13, "grad_norm": 0.5338873863220215, "learning_rate": 0.0005970110649755803, "loss": 2.0375, "step": 4055 }, { "epoch": 0.13, "grad_norm": 0.5350480079650879, "learning_rate": 0.0005970095930508425, "loss": 2.044, "step": 4056 }, { "epoch": 0.13, "grad_norm": 0.5208200812339783, "learning_rate": 0.0005970081207655789, "loss": 2.1066, "step": 4057 }, { "epoch": 0.14, "grad_norm": 0.5115039348602295, "learning_rate": 0.000597006648119791, "loss": 2.0781, "step": 4058 }, { "epoch": 0.14, "grad_norm": 0.5101730227470398, "learning_rate": 0.0005970051751134809, "loss": 2.0182, "step": 4059 }, { "epoch": 0.14, "grad_norm": 0.5361250042915344, "learning_rate": 0.0005970037017466502, "loss": 2.0005, "step": 4060 }, { "epoch": 0.14, "grad_norm": 0.5209878087043762, "learning_rate": 0.0005970022280193008, "loss": 2.0062, "step": 4061 }, { "epoch": 0.14, "grad_norm": 0.501918613910675, "learning_rate": 0.0005970007539314345, "loss": 1.9566, "step": 4062 }, { "epoch": 0.14, "grad_norm": 0.5341736674308777, "learning_rate": 0.000596999279483053, "loss": 1.9832, "step": 4063 }, { "epoch": 0.14, "grad_norm": 0.5247517228126526, "learning_rate": 0.0005969978046741582, "loss": 2.0613, "step": 4064 }, { "epoch": 0.14, "grad_norm": 0.5540316700935364, "learning_rate": 0.0005969963295047517, "loss": 1.9719, "step": 4065 }, { "epoch": 0.14, "grad_norm": 0.5132468938827515, "learning_rate": 0.0005969948539748355, "loss": 2.0112, "step": 4066 }, { "epoch": 0.14, "grad_norm": 0.5384030938148499, "learning_rate": 0.0005969933780844113, "loss": 1.9949, "step": 4067 }, { "epoch": 0.14, "grad_norm": 0.5517051815986633, "learning_rate": 0.000596991901833481, "loss": 1.9721, "step": 4068 }, { "epoch": 0.14, "grad_norm": 0.5222403407096863, "learning_rate": 0.0005969904252220462, "loss": 2.0019, "step": 4069 }, { "epoch": 0.14, "grad_norm": 0.530912458896637, "learning_rate": 0.0005969889482501088, "loss": 2.0531, "step": 4070 }, { "epoch": 0.14, "grad_norm": 0.5362558960914612, "learning_rate": 0.0005969874709176705, "loss": 1.9806, "step": 4071 }, { "epoch": 0.14, "grad_norm": 0.5352299809455872, "learning_rate": 0.0005969859932247332, "loss": 1.972, "step": 4072 }, { "epoch": 0.14, "grad_norm": 0.5018215775489807, "learning_rate": 0.0005969845151712987, "loss": 1.9345, "step": 4073 }, { "epoch": 0.14, "grad_norm": 0.541488766670227, "learning_rate": 0.0005969830367573688, "loss": 1.9831, "step": 4074 }, { "epoch": 0.14, "grad_norm": 0.5052188634872437, "learning_rate": 0.0005969815579829451, "loss": 1.9748, "step": 4075 }, { "epoch": 0.14, "grad_norm": 0.5028575658798218, "learning_rate": 0.0005969800788480298, "loss": 2.0189, "step": 4076 }, { "epoch": 0.14, "grad_norm": 0.6584022641181946, "learning_rate": 0.0005969785993526242, "loss": 2.0873, "step": 4077 }, { "epoch": 0.14, "grad_norm": 0.5046966671943665, "learning_rate": 0.0005969771194967304, "loss": 2.0199, "step": 4078 }, { "epoch": 0.14, "grad_norm": 0.5160455703735352, "learning_rate": 0.0005969756392803501, "loss": 2.0696, "step": 4079 }, { "epoch": 0.14, "grad_norm": 0.5139435529708862, "learning_rate": 0.0005969741587034853, "loss": 1.9939, "step": 4080 }, { "epoch": 0.14, "grad_norm": 0.5457490086555481, "learning_rate": 0.0005969726777661375, "loss": 2.0828, "step": 4081 }, { "epoch": 0.14, "grad_norm": 0.49969011545181274, "learning_rate": 0.0005969711964683086, "loss": 1.9307, "step": 4082 }, { "epoch": 0.14, "grad_norm": 0.5143887996673584, "learning_rate": 0.0005969697148100005, "loss": 1.9415, "step": 4083 }, { "epoch": 0.14, "grad_norm": 0.5203741192817688, "learning_rate": 0.0005969682327912148, "loss": 2.0071, "step": 4084 }, { "epoch": 0.14, "grad_norm": 0.515334963798523, "learning_rate": 0.0005969667504119536, "loss": 2.0398, "step": 4085 }, { "epoch": 0.14, "grad_norm": 0.49648594856262207, "learning_rate": 0.0005969652676722183, "loss": 1.9206, "step": 4086 }, { "epoch": 0.14, "grad_norm": 0.5273497700691223, "learning_rate": 0.0005969637845720111, "loss": 2.0097, "step": 4087 }, { "epoch": 0.14, "grad_norm": 0.5354827046394348, "learning_rate": 0.0005969623011113336, "loss": 1.9614, "step": 4088 }, { "epoch": 0.14, "grad_norm": 0.520550549030304, "learning_rate": 0.0005969608172901876, "loss": 1.969, "step": 4089 }, { "epoch": 0.14, "grad_norm": 0.526591420173645, "learning_rate": 0.0005969593331085749, "loss": 2.0379, "step": 4090 }, { "epoch": 0.14, "grad_norm": 0.5283370018005371, "learning_rate": 0.0005969578485664973, "loss": 2.0379, "step": 4091 }, { "epoch": 0.14, "grad_norm": 0.5164999961853027, "learning_rate": 0.0005969563636639567, "loss": 1.9746, "step": 4092 }, { "epoch": 0.14, "grad_norm": 0.5296373963356018, "learning_rate": 0.0005969548784009549, "loss": 2.0093, "step": 4093 }, { "epoch": 0.14, "grad_norm": 0.4989435076713562, "learning_rate": 0.0005969533927774935, "loss": 1.958, "step": 4094 }, { "epoch": 0.14, "grad_norm": 0.5488994717597961, "learning_rate": 0.0005969519067935744, "loss": 2.1368, "step": 4095 }, { "epoch": 0.14, "grad_norm": 0.5273879766464233, "learning_rate": 0.0005969504204491995, "loss": 2.0051, "step": 4096 }, { "epoch": 0.14, "grad_norm": 0.5250887274742126, "learning_rate": 0.0005969489337443706, "loss": 2.0251, "step": 4097 }, { "epoch": 0.14, "grad_norm": 0.5252031087875366, "learning_rate": 0.0005969474466790893, "loss": 2.0015, "step": 4098 }, { "epoch": 0.14, "grad_norm": 0.5414914488792419, "learning_rate": 0.0005969459592533577, "loss": 1.9369, "step": 4099 }, { "epoch": 0.14, "grad_norm": 0.5301257371902466, "learning_rate": 0.0005969444714671774, "loss": 2.0033, "step": 4100 }, { "epoch": 0.14, "grad_norm": 0.5221079587936401, "learning_rate": 0.0005969429833205504, "loss": 2.0339, "step": 4101 }, { "epoch": 0.14, "grad_norm": 0.4993809759616852, "learning_rate": 0.0005969414948134782, "loss": 1.9159, "step": 4102 }, { "epoch": 0.14, "grad_norm": 0.508328914642334, "learning_rate": 0.0005969400059459628, "loss": 2.0371, "step": 4103 }, { "epoch": 0.14, "grad_norm": 0.5327392816543579, "learning_rate": 0.000596938516718006, "loss": 2.0764, "step": 4104 }, { "epoch": 0.14, "grad_norm": 0.5384989380836487, "learning_rate": 0.0005969370271296095, "loss": 1.9617, "step": 4105 }, { "epoch": 0.14, "grad_norm": 0.5197284817695618, "learning_rate": 0.0005969355371807753, "loss": 1.9801, "step": 4106 }, { "epoch": 0.14, "grad_norm": 0.506139874458313, "learning_rate": 0.0005969340468715052, "loss": 1.9597, "step": 4107 }, { "epoch": 0.14, "grad_norm": 0.522970974445343, "learning_rate": 0.0005969325562018008, "loss": 1.9928, "step": 4108 }, { "epoch": 0.14, "grad_norm": 0.5021836161613464, "learning_rate": 0.000596931065171664, "loss": 1.9735, "step": 4109 }, { "epoch": 0.14, "grad_norm": 0.5282431244850159, "learning_rate": 0.0005969295737810967, "loss": 2.0328, "step": 4110 }, { "epoch": 0.14, "grad_norm": 0.512540340423584, "learning_rate": 0.0005969280820301006, "loss": 2.0093, "step": 4111 }, { "epoch": 0.14, "grad_norm": 0.5196576118469238, "learning_rate": 0.0005969265899186777, "loss": 2.0401, "step": 4112 }, { "epoch": 0.14, "grad_norm": 0.5071231126785278, "learning_rate": 0.0005969250974468296, "loss": 1.9587, "step": 4113 }, { "epoch": 0.14, "grad_norm": 0.5314133763313293, "learning_rate": 0.000596923604614558, "loss": 2.017, "step": 4114 }, { "epoch": 0.14, "grad_norm": 0.5222057700157166, "learning_rate": 0.0005969221114218651, "loss": 2.0715, "step": 4115 }, { "epoch": 0.14, "grad_norm": 0.5001762509346008, "learning_rate": 0.0005969206178687525, "loss": 1.9907, "step": 4116 }, { "epoch": 0.14, "grad_norm": 0.5013043284416199, "learning_rate": 0.000596919123955222, "loss": 1.9938, "step": 4117 }, { "epoch": 0.14, "grad_norm": 0.49699050188064575, "learning_rate": 0.0005969176296812753, "loss": 2.0155, "step": 4118 }, { "epoch": 0.14, "grad_norm": 0.5134289264678955, "learning_rate": 0.0005969161350469145, "loss": 1.9349, "step": 4119 }, { "epoch": 0.14, "grad_norm": 0.500352680683136, "learning_rate": 0.0005969146400521412, "loss": 2.0216, "step": 4120 }, { "epoch": 0.14, "grad_norm": 0.5380183458328247, "learning_rate": 0.0005969131446969572, "loss": 2.029, "step": 4121 }, { "epoch": 0.14, "grad_norm": 0.5075259804725647, "learning_rate": 0.0005969116489813645, "loss": 1.9708, "step": 4122 }, { "epoch": 0.14, "grad_norm": 0.4951256513595581, "learning_rate": 0.0005969101529053649, "loss": 1.9841, "step": 4123 }, { "epoch": 0.14, "grad_norm": 0.5198085308074951, "learning_rate": 0.00059690865646896, "loss": 1.9938, "step": 4124 }, { "epoch": 0.14, "grad_norm": 0.5301989912986755, "learning_rate": 0.0005969071596721517, "loss": 1.8936, "step": 4125 }, { "epoch": 0.14, "grad_norm": 0.534451961517334, "learning_rate": 0.0005969056625149419, "loss": 2.036, "step": 4126 }, { "epoch": 0.14, "grad_norm": 0.5147555470466614, "learning_rate": 0.0005969041649973323, "loss": 1.9438, "step": 4127 }, { "epoch": 0.14, "grad_norm": 0.5277670621871948, "learning_rate": 0.0005969026671193249, "loss": 2.0445, "step": 4128 }, { "epoch": 0.14, "grad_norm": 0.5391527414321899, "learning_rate": 0.0005969011688809213, "loss": 1.9792, "step": 4129 }, { "epoch": 0.14, "grad_norm": 0.5263908505439758, "learning_rate": 0.0005968996702821236, "loss": 1.9057, "step": 4130 }, { "epoch": 0.14, "grad_norm": 0.5132560133934021, "learning_rate": 0.0005968981713229333, "loss": 2.0963, "step": 4131 }, { "epoch": 0.14, "grad_norm": 0.511180579662323, "learning_rate": 0.0005968966720033525, "loss": 1.9245, "step": 4132 }, { "epoch": 0.14, "grad_norm": 0.5356068015098572, "learning_rate": 0.0005968951723233827, "loss": 2.0401, "step": 4133 }, { "epoch": 0.14, "grad_norm": 0.5023120045661926, "learning_rate": 0.000596893672283026, "loss": 1.9881, "step": 4134 }, { "epoch": 0.14, "grad_norm": 0.5008357763290405, "learning_rate": 0.0005968921718822841, "loss": 1.9654, "step": 4135 }, { "epoch": 0.14, "grad_norm": 0.5260744094848633, "learning_rate": 0.0005968906711211588, "loss": 1.9583, "step": 4136 }, { "epoch": 0.14, "grad_norm": 0.5032157897949219, "learning_rate": 0.0005968891699996521, "loss": 2.0366, "step": 4137 }, { "epoch": 0.14, "grad_norm": 0.5105908513069153, "learning_rate": 0.0005968876685177656, "loss": 1.9867, "step": 4138 }, { "epoch": 0.14, "grad_norm": 0.5098409652709961, "learning_rate": 0.0005968861666755013, "loss": 2.0196, "step": 4139 }, { "epoch": 0.14, "grad_norm": 0.5269243121147156, "learning_rate": 0.000596884664472861, "loss": 2.0525, "step": 4140 }, { "epoch": 0.14, "grad_norm": 0.5284143686294556, "learning_rate": 0.0005968831619098462, "loss": 1.9814, "step": 4141 }, { "epoch": 0.14, "grad_norm": 0.5040093064308167, "learning_rate": 0.0005968816589864592, "loss": 2.004, "step": 4142 }, { "epoch": 0.14, "grad_norm": 0.5065739154815674, "learning_rate": 0.0005968801557027015, "loss": 1.9562, "step": 4143 }, { "epoch": 0.14, "grad_norm": 0.5055093765258789, "learning_rate": 0.0005968786520585751, "loss": 1.9488, "step": 4144 }, { "epoch": 0.14, "grad_norm": 0.5069788098335266, "learning_rate": 0.0005968771480540817, "loss": 2.0148, "step": 4145 }, { "epoch": 0.14, "grad_norm": 0.5120553970336914, "learning_rate": 0.0005968756436892232, "loss": 2.0316, "step": 4146 }, { "epoch": 0.14, "grad_norm": 0.5119265913963318, "learning_rate": 0.0005968741389640015, "loss": 2.0593, "step": 4147 }, { "epoch": 0.14, "grad_norm": 0.5208390951156616, "learning_rate": 0.0005968726338784182, "loss": 2.0015, "step": 4148 }, { "epoch": 0.14, "grad_norm": 0.5161821842193604, "learning_rate": 0.0005968711284324754, "loss": 1.9575, "step": 4149 }, { "epoch": 0.14, "grad_norm": 0.5119253396987915, "learning_rate": 0.0005968696226261747, "loss": 2.0363, "step": 4150 }, { "epoch": 0.14, "grad_norm": 0.5267235040664673, "learning_rate": 0.0005968681164595181, "loss": 2.0381, "step": 4151 }, { "epoch": 0.14, "grad_norm": 0.5585176944732666, "learning_rate": 0.0005968666099325072, "loss": 1.9748, "step": 4152 }, { "epoch": 0.14, "grad_norm": 0.5019769072532654, "learning_rate": 0.0005968651030451442, "loss": 1.9546, "step": 4153 }, { "epoch": 0.14, "grad_norm": 0.5683251619338989, "learning_rate": 0.0005968635957974305, "loss": 2.0868, "step": 4154 }, { "epoch": 0.14, "grad_norm": 0.5358943343162537, "learning_rate": 0.0005968620881893682, "loss": 2.0277, "step": 4155 }, { "epoch": 0.14, "grad_norm": 0.5428386330604553, "learning_rate": 0.0005968605802209591, "loss": 1.9865, "step": 4156 }, { "epoch": 0.14, "grad_norm": 0.5011551380157471, "learning_rate": 0.000596859071892205, "loss": 1.9755, "step": 4157 }, { "epoch": 0.14, "grad_norm": 0.5036707520484924, "learning_rate": 0.0005968575632031076, "loss": 2.0001, "step": 4158 }, { "epoch": 0.14, "grad_norm": 0.524336576461792, "learning_rate": 0.0005968560541536691, "loss": 2.0002, "step": 4159 }, { "epoch": 0.14, "grad_norm": 0.5138810276985168, "learning_rate": 0.0005968545447438909, "loss": 2.022, "step": 4160 }, { "epoch": 0.14, "grad_norm": 0.519944429397583, "learning_rate": 0.000596853034973775, "loss": 2.0086, "step": 4161 }, { "epoch": 0.14, "grad_norm": 0.5128277540206909, "learning_rate": 0.0005968515248433234, "loss": 2.0132, "step": 4162 }, { "epoch": 0.14, "grad_norm": 0.507845938205719, "learning_rate": 0.0005968500143525377, "loss": 2.0514, "step": 4163 }, { "epoch": 0.14, "grad_norm": 0.5277928113937378, "learning_rate": 0.0005968485035014198, "loss": 1.9398, "step": 4164 }, { "epoch": 0.14, "grad_norm": 0.526905357837677, "learning_rate": 0.0005968469922899717, "loss": 1.997, "step": 4165 }, { "epoch": 0.14, "grad_norm": 0.5330957174301147, "learning_rate": 0.0005968454807181949, "loss": 2.0356, "step": 4166 }, { "epoch": 0.14, "grad_norm": 0.5163090825080872, "learning_rate": 0.0005968439687860915, "loss": 1.9699, "step": 4167 }, { "epoch": 0.14, "grad_norm": 0.5419760346412659, "learning_rate": 0.0005968424564936633, "loss": 1.9602, "step": 4168 }, { "epoch": 0.14, "grad_norm": 0.5275328755378723, "learning_rate": 0.0005968409438409121, "loss": 2.0081, "step": 4169 }, { "epoch": 0.14, "grad_norm": 0.5128074288368225, "learning_rate": 0.0005968394308278397, "loss": 1.9519, "step": 4170 }, { "epoch": 0.14, "grad_norm": 0.6144908666610718, "learning_rate": 0.0005968379174544479, "loss": 2.0367, "step": 4171 }, { "epoch": 0.14, "grad_norm": 0.5324212908744812, "learning_rate": 0.0005968364037207387, "loss": 2.0007, "step": 4172 }, { "epoch": 0.14, "grad_norm": 0.5191683173179626, "learning_rate": 0.0005968348896267137, "loss": 1.9501, "step": 4173 }, { "epoch": 0.14, "grad_norm": 0.5797976851463318, "learning_rate": 0.0005968333751723751, "loss": 2.0452, "step": 4174 }, { "epoch": 0.14, "grad_norm": 0.5375512838363647, "learning_rate": 0.0005968318603577243, "loss": 1.9648, "step": 4175 }, { "epoch": 0.14, "grad_norm": 0.5410587191581726, "learning_rate": 0.0005968303451827636, "loss": 1.9644, "step": 4176 }, { "epoch": 0.14, "grad_norm": 0.5159870386123657, "learning_rate": 0.0005968288296474944, "loss": 2.0009, "step": 4177 }, { "epoch": 0.14, "grad_norm": 0.514080286026001, "learning_rate": 0.0005968273137519188, "loss": 2.0022, "step": 4178 }, { "epoch": 0.14, "grad_norm": 0.574847936630249, "learning_rate": 0.0005968257974960387, "loss": 2.0831, "step": 4179 }, { "epoch": 0.14, "grad_norm": 0.590179979801178, "learning_rate": 0.0005968242808798555, "loss": 2.0725, "step": 4180 }, { "epoch": 0.14, "grad_norm": 0.5360528826713562, "learning_rate": 0.0005968227639033717, "loss": 2.0178, "step": 4181 }, { "epoch": 0.14, "grad_norm": 0.5351141691207886, "learning_rate": 0.0005968212465665886, "loss": 2.0349, "step": 4182 }, { "epoch": 0.14, "grad_norm": 0.5194584131240845, "learning_rate": 0.0005968197288695082, "loss": 1.9743, "step": 4183 }, { "epoch": 0.14, "grad_norm": 0.5213603377342224, "learning_rate": 0.0005968182108121326, "loss": 1.9617, "step": 4184 }, { "epoch": 0.14, "grad_norm": 0.5390600562095642, "learning_rate": 0.0005968166923944633, "loss": 2.0496, "step": 4185 }, { "epoch": 0.14, "grad_norm": 0.5746343731880188, "learning_rate": 0.0005968151736165022, "loss": 1.993, "step": 4186 }, { "epoch": 0.14, "grad_norm": 0.5185734033584595, "learning_rate": 0.0005968136544782513, "loss": 2.0097, "step": 4187 }, { "epoch": 0.14, "grad_norm": 0.5038643479347229, "learning_rate": 0.0005968121349797124, "loss": 2.0787, "step": 4188 }, { "epoch": 0.14, "grad_norm": 0.4943293333053589, "learning_rate": 0.0005968106151208872, "loss": 1.881, "step": 4189 }, { "epoch": 0.14, "grad_norm": 0.6428413391113281, "learning_rate": 0.0005968090949017777, "loss": 2.0309, "step": 4190 }, { "epoch": 0.14, "grad_norm": 0.5247498750686646, "learning_rate": 0.0005968075743223856, "loss": 1.9685, "step": 4191 }, { "epoch": 0.14, "grad_norm": 0.5281878709793091, "learning_rate": 0.000596806053382713, "loss": 1.9767, "step": 4192 }, { "epoch": 0.14, "grad_norm": 0.5053491592407227, "learning_rate": 0.0005968045320827615, "loss": 1.9743, "step": 4193 }, { "epoch": 0.14, "grad_norm": 0.5091102123260498, "learning_rate": 0.000596803010422533, "loss": 1.9818, "step": 4194 }, { "epoch": 0.14, "grad_norm": 0.5229610204696655, "learning_rate": 0.0005968014884020294, "loss": 2.0195, "step": 4195 }, { "epoch": 0.14, "grad_norm": 0.5390117168426514, "learning_rate": 0.0005967999660212525, "loss": 2.0213, "step": 4196 }, { "epoch": 0.14, "grad_norm": 0.5075545907020569, "learning_rate": 0.0005967984432802043, "loss": 2.0355, "step": 4197 }, { "epoch": 0.14, "grad_norm": 0.5259600281715393, "learning_rate": 0.0005967969201788865, "loss": 1.9772, "step": 4198 }, { "epoch": 0.14, "grad_norm": 0.5267565250396729, "learning_rate": 0.0005967953967173007, "loss": 2.0487, "step": 4199 }, { "epoch": 0.14, "grad_norm": 0.5088816285133362, "learning_rate": 0.0005967938728954493, "loss": 1.9637, "step": 4200 }, { "epoch": 0.14, "grad_norm": 0.5073935389518738, "learning_rate": 0.0005967923487133338, "loss": 2.0289, "step": 4201 }, { "epoch": 0.14, "grad_norm": 0.5046647191047668, "learning_rate": 0.0005967908241709561, "loss": 1.9919, "step": 4202 }, { "epoch": 0.14, "grad_norm": 0.5193099975585938, "learning_rate": 0.000596789299268318, "loss": 2.0116, "step": 4203 }, { "epoch": 0.14, "grad_norm": 0.5143710374832153, "learning_rate": 0.0005967877740054216, "loss": 2.0322, "step": 4204 }, { "epoch": 0.14, "grad_norm": 0.5303717255592346, "learning_rate": 0.0005967862483822684, "loss": 2.0272, "step": 4205 }, { "epoch": 0.14, "grad_norm": 0.5065816640853882, "learning_rate": 0.0005967847223988605, "loss": 2.0162, "step": 4206 }, { "epoch": 0.14, "grad_norm": 0.5008983612060547, "learning_rate": 0.0005967831960551997, "loss": 1.9809, "step": 4207 }, { "epoch": 0.14, "grad_norm": 0.5003839731216431, "learning_rate": 0.0005967816693512877, "loss": 1.9951, "step": 4208 }, { "epoch": 0.14, "grad_norm": 0.5141550302505493, "learning_rate": 0.0005967801422871266, "loss": 1.9508, "step": 4209 }, { "epoch": 0.14, "grad_norm": 0.517692506313324, "learning_rate": 0.0005967786148627182, "loss": 2.0269, "step": 4210 }, { "epoch": 0.14, "grad_norm": 0.5245683193206787, "learning_rate": 0.0005967770870780641, "loss": 2.0774, "step": 4211 }, { "epoch": 0.14, "grad_norm": 0.5064153075218201, "learning_rate": 0.0005967755589331665, "loss": 2.0002, "step": 4212 }, { "epoch": 0.14, "grad_norm": 0.49157559871673584, "learning_rate": 0.0005967740304280271, "loss": 1.9414, "step": 4213 }, { "epoch": 0.14, "grad_norm": 0.5266830325126648, "learning_rate": 0.0005967725015626476, "loss": 2.0689, "step": 4214 }, { "epoch": 0.14, "grad_norm": 0.49461430311203003, "learning_rate": 0.0005967709723370302, "loss": 2.006, "step": 4215 }, { "epoch": 0.14, "grad_norm": 0.5264549851417542, "learning_rate": 0.0005967694427511765, "loss": 2.0465, "step": 4216 }, { "epoch": 0.14, "grad_norm": 0.5079143047332764, "learning_rate": 0.0005967679128050884, "loss": 1.9903, "step": 4217 }, { "epoch": 0.14, "grad_norm": 0.5409762263298035, "learning_rate": 0.0005967663824987677, "loss": 2.0212, "step": 4218 }, { "epoch": 0.14, "grad_norm": 0.49481552839279175, "learning_rate": 0.0005967648518322165, "loss": 1.9898, "step": 4219 }, { "epoch": 0.14, "grad_norm": 0.5091128349304199, "learning_rate": 0.0005967633208054363, "loss": 2.0425, "step": 4220 }, { "epoch": 0.14, "grad_norm": 0.5591050386428833, "learning_rate": 0.0005967617894184293, "loss": 1.9883, "step": 4221 }, { "epoch": 0.14, "grad_norm": 0.49745413661003113, "learning_rate": 0.0005967602576711972, "loss": 1.9379, "step": 4222 }, { "epoch": 0.14, "grad_norm": 0.5059025287628174, "learning_rate": 0.0005967587255637419, "loss": 1.9876, "step": 4223 }, { "epoch": 0.14, "grad_norm": 0.5118593573570251, "learning_rate": 0.0005967571930960651, "loss": 2.0707, "step": 4224 }, { "epoch": 0.14, "grad_norm": 0.49761006236076355, "learning_rate": 0.0005967556602681689, "loss": 1.9768, "step": 4225 }, { "epoch": 0.14, "grad_norm": 0.5050903558731079, "learning_rate": 0.000596754127080055, "loss": 2.0535, "step": 4226 }, { "epoch": 0.14, "grad_norm": 0.5447726249694824, "learning_rate": 0.0005967525935317254, "loss": 2.0557, "step": 4227 }, { "epoch": 0.14, "grad_norm": 0.5268980264663696, "learning_rate": 0.0005967510596231818, "loss": 1.9823, "step": 4228 }, { "epoch": 0.14, "grad_norm": 0.5656799674034119, "learning_rate": 0.0005967495253544262, "loss": 2.0131, "step": 4229 }, { "epoch": 0.14, "grad_norm": 0.4974476993083954, "learning_rate": 0.0005967479907254603, "loss": 1.987, "step": 4230 }, { "epoch": 0.14, "grad_norm": 0.534507691860199, "learning_rate": 0.0005967464557362862, "loss": 1.933, "step": 4231 }, { "epoch": 0.14, "grad_norm": 0.5173467397689819, "learning_rate": 0.0005967449203869055, "loss": 1.961, "step": 4232 }, { "epoch": 0.14, "grad_norm": 0.5167717933654785, "learning_rate": 0.0005967433846773203, "loss": 1.9892, "step": 4233 }, { "epoch": 0.14, "grad_norm": 0.5126582980155945, "learning_rate": 0.0005967418486075323, "loss": 2.0389, "step": 4234 }, { "epoch": 0.14, "grad_norm": 0.5241398811340332, "learning_rate": 0.0005967403121775434, "loss": 2.0168, "step": 4235 }, { "epoch": 0.14, "grad_norm": 0.5245829224586487, "learning_rate": 0.0005967387753873555, "loss": 1.9618, "step": 4236 }, { "epoch": 0.14, "grad_norm": 0.5198970437049866, "learning_rate": 0.0005967372382369705, "loss": 2.0249, "step": 4237 }, { "epoch": 0.14, "grad_norm": 0.5227745175361633, "learning_rate": 0.0005967357007263901, "loss": 1.9185, "step": 4238 }, { "epoch": 0.14, "grad_norm": 0.5432193279266357, "learning_rate": 0.0005967341628556163, "loss": 2.0655, "step": 4239 }, { "epoch": 0.14, "grad_norm": 0.5217452645301819, "learning_rate": 0.000596732624624651, "loss": 1.9911, "step": 4240 }, { "epoch": 0.14, "grad_norm": 0.5259817838668823, "learning_rate": 0.0005967310860334961, "loss": 1.9251, "step": 4241 }, { "epoch": 0.14, "grad_norm": 0.5162447690963745, "learning_rate": 0.0005967295470821533, "loss": 2.0479, "step": 4242 }, { "epoch": 0.14, "grad_norm": 0.5246485471725464, "learning_rate": 0.0005967280077706245, "loss": 1.9403, "step": 4243 }, { "epoch": 0.14, "grad_norm": 0.5157904624938965, "learning_rate": 0.0005967264680989116, "loss": 2.0025, "step": 4244 }, { "epoch": 0.14, "grad_norm": 0.5063915848731995, "learning_rate": 0.0005967249280670167, "loss": 2.0402, "step": 4245 }, { "epoch": 0.14, "grad_norm": 0.5389513373374939, "learning_rate": 0.0005967233876749413, "loss": 1.9891, "step": 4246 }, { "epoch": 0.14, "grad_norm": 0.5140700936317444, "learning_rate": 0.0005967218469226874, "loss": 2.054, "step": 4247 }, { "epoch": 0.14, "grad_norm": 0.5142983794212341, "learning_rate": 0.0005967203058102569, "loss": 1.9529, "step": 4248 }, { "epoch": 0.14, "grad_norm": 0.5200080275535583, "learning_rate": 0.0005967187643376516, "loss": 2.0403, "step": 4249 }, { "epoch": 0.14, "grad_norm": 0.507422924041748, "learning_rate": 0.0005967172225048737, "loss": 1.9296, "step": 4250 }, { "epoch": 0.14, "grad_norm": 0.514758288860321, "learning_rate": 0.0005967156803119246, "loss": 1.9921, "step": 4251 }, { "epoch": 0.14, "grad_norm": 0.5310620069503784, "learning_rate": 0.0005967141377588063, "loss": 1.9729, "step": 4252 }, { "epoch": 0.14, "grad_norm": 0.5036682486534119, "learning_rate": 0.0005967125948455209, "loss": 2.0207, "step": 4253 }, { "epoch": 0.14, "grad_norm": 0.5048800110816956, "learning_rate": 0.0005967110515720702, "loss": 1.9879, "step": 4254 }, { "epoch": 0.14, "grad_norm": 0.4996435344219208, "learning_rate": 0.0005967095079384559, "loss": 2.0179, "step": 4255 }, { "epoch": 0.14, "grad_norm": 0.5073675513267517, "learning_rate": 0.0005967079639446801, "loss": 1.9515, "step": 4256 }, { "epoch": 0.14, "grad_norm": 0.525481104850769, "learning_rate": 0.0005967064195907443, "loss": 1.9834, "step": 4257 }, { "epoch": 0.14, "grad_norm": 0.5077114701271057, "learning_rate": 0.0005967048748766508, "loss": 1.9512, "step": 4258 }, { "epoch": 0.14, "grad_norm": 0.49913302063941956, "learning_rate": 0.0005967033298024013, "loss": 1.9287, "step": 4259 }, { "epoch": 0.14, "grad_norm": 0.5256307125091553, "learning_rate": 0.0005967017843679977, "loss": 2.0054, "step": 4260 }, { "epoch": 0.14, "grad_norm": 0.5034868121147156, "learning_rate": 0.0005967002385734417, "loss": 2.0779, "step": 4261 }, { "epoch": 0.14, "grad_norm": 0.5489572882652283, "learning_rate": 0.0005966986924187354, "loss": 2.0178, "step": 4262 }, { "epoch": 0.14, "grad_norm": 0.523868978023529, "learning_rate": 0.0005966971459038806, "loss": 1.957, "step": 4263 }, { "epoch": 0.14, "grad_norm": 0.5286502838134766, "learning_rate": 0.0005966955990288793, "loss": 1.9761, "step": 4264 }, { "epoch": 0.14, "grad_norm": 0.5169735550880432, "learning_rate": 0.0005966940517937332, "loss": 1.9247, "step": 4265 }, { "epoch": 0.14, "grad_norm": 0.5357780456542969, "learning_rate": 0.0005966925041984442, "loss": 1.9987, "step": 4266 }, { "epoch": 0.14, "grad_norm": 0.53505939245224, "learning_rate": 0.0005966909562430141, "loss": 1.9822, "step": 4267 }, { "epoch": 0.14, "grad_norm": 0.5315968990325928, "learning_rate": 0.000596689407927445, "loss": 1.9599, "step": 4268 }, { "epoch": 0.14, "grad_norm": 0.5542101263999939, "learning_rate": 0.0005966878592517388, "loss": 1.9308, "step": 4269 }, { "epoch": 0.14, "grad_norm": 0.5144140124320984, "learning_rate": 0.000596686310215897, "loss": 2.0008, "step": 4270 }, { "epoch": 0.14, "grad_norm": 0.5216966271400452, "learning_rate": 0.0005966847608199219, "loss": 2.0557, "step": 4271 }, { "epoch": 0.14, "grad_norm": 0.5548645257949829, "learning_rate": 0.0005966832110638153, "loss": 1.9849, "step": 4272 }, { "epoch": 0.14, "grad_norm": 0.5206835865974426, "learning_rate": 0.0005966816609475788, "loss": 1.9959, "step": 4273 }, { "epoch": 0.14, "grad_norm": 0.5257799029350281, "learning_rate": 0.0005966801104712146, "loss": 2.0079, "step": 4274 }, { "epoch": 0.14, "grad_norm": 0.5290924906730652, "learning_rate": 0.0005966785596347244, "loss": 1.9349, "step": 4275 }, { "epoch": 0.14, "grad_norm": 0.5035796761512756, "learning_rate": 0.0005966770084381102, "loss": 1.9768, "step": 4276 }, { "epoch": 0.14, "grad_norm": 0.519736647605896, "learning_rate": 0.0005966754568813738, "loss": 1.9326, "step": 4277 }, { "epoch": 0.14, "grad_norm": 0.52495276927948, "learning_rate": 0.0005966739049645171, "loss": 1.9718, "step": 4278 }, { "epoch": 0.14, "grad_norm": 0.5164002180099487, "learning_rate": 0.000596672352687542, "loss": 1.9712, "step": 4279 }, { "epoch": 0.14, "grad_norm": 0.5175773501396179, "learning_rate": 0.0005966708000504504, "loss": 2.0182, "step": 4280 }, { "epoch": 0.14, "grad_norm": 0.5416942238807678, "learning_rate": 0.0005966692470532442, "loss": 2.0014, "step": 4281 }, { "epoch": 0.14, "grad_norm": 0.5120303630828857, "learning_rate": 0.0005966676936959251, "loss": 1.97, "step": 4282 }, { "epoch": 0.14, "grad_norm": 0.4986669719219208, "learning_rate": 0.0005966661399784952, "loss": 1.992, "step": 4283 }, { "epoch": 0.14, "grad_norm": 0.5548907518386841, "learning_rate": 0.0005966645859009562, "loss": 1.9819, "step": 4284 }, { "epoch": 0.14, "grad_norm": 0.5632163286209106, "learning_rate": 0.0005966630314633102, "loss": 2.0279, "step": 4285 }, { "epoch": 0.14, "grad_norm": 0.5057372450828552, "learning_rate": 0.0005966614766655591, "loss": 2.0197, "step": 4286 }, { "epoch": 0.14, "grad_norm": 0.5093274116516113, "learning_rate": 0.0005966599215077046, "loss": 2.0009, "step": 4287 }, { "epoch": 0.14, "grad_norm": 0.5778422951698303, "learning_rate": 0.0005966583659897487, "loss": 2.001, "step": 4288 }, { "epoch": 0.14, "grad_norm": 0.560664176940918, "learning_rate": 0.0005966568101116932, "loss": 1.9749, "step": 4289 }, { "epoch": 0.14, "grad_norm": 0.5173314809799194, "learning_rate": 0.0005966552538735401, "loss": 1.9636, "step": 4290 }, { "epoch": 0.14, "grad_norm": 0.5424062013626099, "learning_rate": 0.0005966536972752912, "loss": 1.9775, "step": 4291 }, { "epoch": 0.14, "grad_norm": 0.515899121761322, "learning_rate": 0.0005966521403169483, "loss": 1.9711, "step": 4292 }, { "epoch": 0.14, "grad_norm": 0.5403047800064087, "learning_rate": 0.0005966505829985136, "loss": 2.0053, "step": 4293 }, { "epoch": 0.14, "grad_norm": 0.5459654927253723, "learning_rate": 0.0005966490253199887, "loss": 1.9456, "step": 4294 }, { "epoch": 0.14, "grad_norm": 0.5090416073799133, "learning_rate": 0.0005966474672813757, "loss": 1.9571, "step": 4295 }, { "epoch": 0.14, "grad_norm": 0.5019791722297668, "learning_rate": 0.0005966459088826762, "loss": 2.0644, "step": 4296 }, { "epoch": 0.14, "grad_norm": 0.5196108222007751, "learning_rate": 0.0005966443501238924, "loss": 1.9388, "step": 4297 }, { "epoch": 0.14, "grad_norm": 0.5340990424156189, "learning_rate": 0.0005966427910050261, "loss": 2.0425, "step": 4298 }, { "epoch": 0.14, "grad_norm": 0.49984803795814514, "learning_rate": 0.000596641231526079, "loss": 1.9817, "step": 4299 }, { "epoch": 0.14, "grad_norm": 0.5173486471176147, "learning_rate": 0.0005966396716870532, "loss": 1.9911, "step": 4300 }, { "epoch": 0.14, "grad_norm": 0.5086604356765747, "learning_rate": 0.0005966381114879507, "loss": 2.0537, "step": 4301 }, { "epoch": 0.14, "grad_norm": 0.5075545310974121, "learning_rate": 0.0005966365509287732, "loss": 1.883, "step": 4302 }, { "epoch": 0.14, "grad_norm": 0.5087303519248962, "learning_rate": 0.0005966349900095225, "loss": 2.0635, "step": 4303 }, { "epoch": 0.14, "grad_norm": 0.5373004674911499, "learning_rate": 0.0005966334287302007, "loss": 2.0012, "step": 4304 }, { "epoch": 0.14, "grad_norm": 0.5239830017089844, "learning_rate": 0.0005966318670908097, "loss": 1.9857, "step": 4305 }, { "epoch": 0.14, "grad_norm": 0.5006067156791687, "learning_rate": 0.0005966303050913513, "loss": 1.9741, "step": 4306 }, { "epoch": 0.14, "grad_norm": 0.5209824442863464, "learning_rate": 0.0005966287427318273, "loss": 2.0359, "step": 4307 }, { "epoch": 0.14, "grad_norm": 0.5637971758842468, "learning_rate": 0.0005966271800122398, "loss": 2.0481, "step": 4308 }, { "epoch": 0.14, "grad_norm": 0.5777862668037415, "learning_rate": 0.0005966256169325906, "loss": 1.9553, "step": 4309 }, { "epoch": 0.14, "grad_norm": 0.5121198296546936, "learning_rate": 0.0005966240534928816, "loss": 2.0123, "step": 4310 }, { "epoch": 0.14, "grad_norm": 0.5478067994117737, "learning_rate": 0.0005966224896931146, "loss": 2.0275, "step": 4311 }, { "epoch": 0.14, "grad_norm": 0.5203559994697571, "learning_rate": 0.0005966209255332918, "loss": 1.9068, "step": 4312 }, { "epoch": 0.14, "grad_norm": 0.5214829444885254, "learning_rate": 0.0005966193610134149, "loss": 1.9796, "step": 4313 }, { "epoch": 0.14, "grad_norm": 0.5092153549194336, "learning_rate": 0.0005966177961334855, "loss": 2.0326, "step": 4314 }, { "epoch": 0.14, "grad_norm": 0.5220175981521606, "learning_rate": 0.000596616230893506, "loss": 2.0106, "step": 4315 }, { "epoch": 0.14, "grad_norm": 0.5310189723968506, "learning_rate": 0.0005966146652934782, "loss": 2.0491, "step": 4316 }, { "epoch": 0.14, "grad_norm": 0.5375190377235413, "learning_rate": 0.0005966130993334038, "loss": 2.0128, "step": 4317 }, { "epoch": 0.14, "grad_norm": 0.5236002802848816, "learning_rate": 0.0005966115330132848, "loss": 1.9822, "step": 4318 }, { "epoch": 0.14, "grad_norm": 0.5279964208602905, "learning_rate": 0.0005966099663331231, "loss": 1.9586, "step": 4319 }, { "epoch": 0.14, "grad_norm": 0.5047915577888489, "learning_rate": 0.0005966083992929207, "loss": 2.0643, "step": 4320 }, { "epoch": 0.14, "grad_norm": 0.5294702053070068, "learning_rate": 0.0005966068318926793, "loss": 2.0481, "step": 4321 }, { "epoch": 0.14, "grad_norm": 0.5403278470039368, "learning_rate": 0.0005966052641324008, "loss": 1.9705, "step": 4322 }, { "epoch": 0.14, "grad_norm": 0.5103815793991089, "learning_rate": 0.0005966036960120874, "loss": 1.9528, "step": 4323 }, { "epoch": 0.14, "grad_norm": 0.5361819863319397, "learning_rate": 0.0005966021275317408, "loss": 1.9685, "step": 4324 }, { "epoch": 0.14, "grad_norm": 0.5267277359962463, "learning_rate": 0.0005966005586913628, "loss": 2.0395, "step": 4325 }, { "epoch": 0.14, "grad_norm": 0.5089145302772522, "learning_rate": 0.0005965989894909556, "loss": 1.9599, "step": 4326 }, { "epoch": 0.14, "grad_norm": 0.521635890007019, "learning_rate": 0.0005965974199305208, "loss": 1.9865, "step": 4327 }, { "epoch": 0.14, "grad_norm": 0.5216387510299683, "learning_rate": 0.0005965958500100605, "loss": 2.0557, "step": 4328 }, { "epoch": 0.14, "grad_norm": 0.569827139377594, "learning_rate": 0.0005965942797295764, "loss": 2.074, "step": 4329 }, { "epoch": 0.14, "grad_norm": 0.525115430355072, "learning_rate": 0.0005965927090890706, "loss": 2.0588, "step": 4330 }, { "epoch": 0.14, "grad_norm": 0.5207275748252869, "learning_rate": 0.0005965911380885451, "loss": 2.0977, "step": 4331 }, { "epoch": 0.14, "grad_norm": 0.5559849143028259, "learning_rate": 0.0005965895667280016, "loss": 2.029, "step": 4332 }, { "epoch": 0.14, "grad_norm": 0.552032470703125, "learning_rate": 0.000596587995007442, "loss": 2.0497, "step": 4333 }, { "epoch": 0.14, "grad_norm": 0.5245632529258728, "learning_rate": 0.0005965864229268683, "loss": 1.9815, "step": 4334 }, { "epoch": 0.14, "grad_norm": 0.5080227255821228, "learning_rate": 0.0005965848504862825, "loss": 2.0218, "step": 4335 }, { "epoch": 0.14, "grad_norm": 0.5195128917694092, "learning_rate": 0.0005965832776856861, "loss": 2.0007, "step": 4336 }, { "epoch": 0.14, "grad_norm": 0.559783935546875, "learning_rate": 0.0005965817045250815, "loss": 2.0158, "step": 4337 }, { "epoch": 0.14, "grad_norm": 0.5088090896606445, "learning_rate": 0.0005965801310044704, "loss": 2.0565, "step": 4338 }, { "epoch": 0.14, "grad_norm": 0.49318820238113403, "learning_rate": 0.0005965785571238547, "loss": 1.9935, "step": 4339 }, { "epoch": 0.14, "grad_norm": 0.5097852945327759, "learning_rate": 0.0005965769828832364, "loss": 1.9717, "step": 4340 }, { "epoch": 0.14, "grad_norm": 0.5179434418678284, "learning_rate": 0.0005965754082826173, "loss": 1.9812, "step": 4341 }, { "epoch": 0.14, "grad_norm": 0.5136381387710571, "learning_rate": 0.0005965738333219993, "loss": 1.9997, "step": 4342 }, { "epoch": 0.14, "grad_norm": 0.49559128284454346, "learning_rate": 0.0005965722580013844, "loss": 1.9928, "step": 4343 }, { "epoch": 0.14, "grad_norm": 0.4882770776748657, "learning_rate": 0.0005965706823207744, "loss": 1.9921, "step": 4344 }, { "epoch": 0.14, "grad_norm": 0.5147771835327148, "learning_rate": 0.0005965691062801714, "loss": 2.01, "step": 4345 }, { "epoch": 0.14, "grad_norm": 0.5378709435462952, "learning_rate": 0.0005965675298795772, "loss": 2.0517, "step": 4346 }, { "epoch": 0.14, "grad_norm": 0.5036848187446594, "learning_rate": 0.0005965659531189936, "loss": 2.004, "step": 4347 }, { "epoch": 0.14, "grad_norm": 0.501887857913971, "learning_rate": 0.0005965643759984228, "loss": 1.9704, "step": 4348 }, { "epoch": 0.14, "grad_norm": 0.5537142753601074, "learning_rate": 0.0005965627985178663, "loss": 2.0036, "step": 4349 }, { "epoch": 0.14, "grad_norm": 0.526472806930542, "learning_rate": 0.0005965612206773265, "loss": 2.0456, "step": 4350 }, { "epoch": 0.14, "grad_norm": 0.5282976627349854, "learning_rate": 0.000596559642476805, "loss": 2.0726, "step": 4351 }, { "epoch": 0.14, "grad_norm": 0.5274701714515686, "learning_rate": 0.0005965580639163037, "loss": 1.9769, "step": 4352 }, { "epoch": 0.14, "grad_norm": 0.5383975505828857, "learning_rate": 0.0005965564849958247, "loss": 1.9811, "step": 4353 }, { "epoch": 0.14, "grad_norm": 0.5011271834373474, "learning_rate": 0.0005965549057153698, "loss": 2.0068, "step": 4354 }, { "epoch": 0.14, "grad_norm": 0.5103787183761597, "learning_rate": 0.000596553326074941, "loss": 1.979, "step": 4355 }, { "epoch": 0.14, "grad_norm": 0.5249452590942383, "learning_rate": 0.0005965517460745401, "loss": 2.0711, "step": 4356 }, { "epoch": 0.14, "grad_norm": 0.5069141983985901, "learning_rate": 0.000596550165714169, "loss": 2.075, "step": 4357 }, { "epoch": 0.14, "grad_norm": 0.5350857377052307, "learning_rate": 0.0005965485849938299, "loss": 1.9793, "step": 4358 }, { "epoch": 0.15, "grad_norm": 0.5180373787879944, "learning_rate": 0.0005965470039135242, "loss": 2.0075, "step": 4359 }, { "epoch": 0.15, "grad_norm": 0.5071825385093689, "learning_rate": 0.0005965454224732545, "loss": 1.9748, "step": 4360 }, { "epoch": 0.15, "grad_norm": 0.5041677355766296, "learning_rate": 0.000596543840673022, "loss": 2.0823, "step": 4361 }, { "epoch": 0.15, "grad_norm": 0.5337557792663574, "learning_rate": 0.0005965422585128292, "loss": 2.0169, "step": 4362 }, { "epoch": 0.15, "grad_norm": 0.5148792862892151, "learning_rate": 0.0005965406759926777, "loss": 2.0962, "step": 4363 }, { "epoch": 0.15, "grad_norm": 0.5010430216789246, "learning_rate": 0.0005965390931125696, "loss": 1.9965, "step": 4364 }, { "epoch": 0.15, "grad_norm": 0.4918445944786072, "learning_rate": 0.0005965375098725067, "loss": 1.973, "step": 4365 }, { "epoch": 0.15, "grad_norm": 0.49894028902053833, "learning_rate": 0.0005965359262724909, "loss": 2.0416, "step": 4366 }, { "epoch": 0.15, "grad_norm": 0.49631670117378235, "learning_rate": 0.0005965343423125243, "loss": 2.0878, "step": 4367 }, { "epoch": 0.15, "grad_norm": 0.5135817527770996, "learning_rate": 0.0005965327579926086, "loss": 1.952, "step": 4368 }, { "epoch": 0.15, "grad_norm": 0.48569154739379883, "learning_rate": 0.0005965311733127457, "loss": 2.0316, "step": 4369 }, { "epoch": 0.15, "grad_norm": 0.4925585389137268, "learning_rate": 0.0005965295882729378, "loss": 1.982, "step": 4370 }, { "epoch": 0.15, "grad_norm": 0.5159206390380859, "learning_rate": 0.0005965280028731866, "loss": 1.9935, "step": 4371 }, { "epoch": 0.15, "grad_norm": 0.5199046730995178, "learning_rate": 0.0005965264171134942, "loss": 2.0055, "step": 4372 }, { "epoch": 0.15, "grad_norm": 0.5226618647575378, "learning_rate": 0.0005965248309938623, "loss": 2.0009, "step": 4373 }, { "epoch": 0.15, "grad_norm": 0.5116302371025085, "learning_rate": 0.0005965232445142931, "loss": 2.0509, "step": 4374 }, { "epoch": 0.15, "grad_norm": 0.5019338130950928, "learning_rate": 0.0005965216576747882, "loss": 1.9992, "step": 4375 }, { "epoch": 0.15, "grad_norm": 0.5110586881637573, "learning_rate": 0.0005965200704753499, "loss": 1.9715, "step": 4376 }, { "epoch": 0.15, "grad_norm": 0.502588152885437, "learning_rate": 0.0005965184829159798, "loss": 1.9526, "step": 4377 }, { "epoch": 0.15, "grad_norm": 0.5183697938919067, "learning_rate": 0.0005965168949966799, "loss": 2.0277, "step": 4378 }, { "epoch": 0.15, "grad_norm": 0.5151094794273376, "learning_rate": 0.0005965153067174523, "loss": 2.0527, "step": 4379 }, { "epoch": 0.15, "grad_norm": 0.5166131854057312, "learning_rate": 0.0005965137180782987, "loss": 2.0779, "step": 4380 }, { "epoch": 0.15, "grad_norm": 0.5134149193763733, "learning_rate": 0.0005965121290792212, "loss": 2.0162, "step": 4381 }, { "epoch": 0.15, "grad_norm": 0.5321218967437744, "learning_rate": 0.0005965105397202216, "loss": 1.981, "step": 4382 }, { "epoch": 0.15, "grad_norm": 0.5000351071357727, "learning_rate": 0.0005965089500013021, "loss": 1.9873, "step": 4383 }, { "epoch": 0.15, "grad_norm": 0.5011827349662781, "learning_rate": 0.0005965073599224641, "loss": 1.9008, "step": 4384 }, { "epoch": 0.15, "grad_norm": 0.5270920395851135, "learning_rate": 0.00059650576948371, "loss": 1.9632, "step": 4385 }, { "epoch": 0.15, "grad_norm": 0.515744149684906, "learning_rate": 0.0005965041786850417, "loss": 2.0317, "step": 4386 }, { "epoch": 0.15, "grad_norm": 0.5173855423927307, "learning_rate": 0.0005965025875264609, "loss": 1.9705, "step": 4387 }, { "epoch": 0.15, "grad_norm": 0.5410596132278442, "learning_rate": 0.0005965009960079696, "loss": 1.9943, "step": 4388 }, { "epoch": 0.15, "grad_norm": 0.5121062397956848, "learning_rate": 0.0005964994041295699, "loss": 2.0157, "step": 4389 }, { "epoch": 0.15, "grad_norm": 0.5209007263183594, "learning_rate": 0.0005964978118912635, "loss": 1.9484, "step": 4390 }, { "epoch": 0.15, "grad_norm": 0.5033604502677917, "learning_rate": 0.0005964962192930525, "loss": 1.985, "step": 4391 }, { "epoch": 0.15, "grad_norm": 0.5023724436759949, "learning_rate": 0.0005964946263349388, "loss": 1.9899, "step": 4392 }, { "epoch": 0.15, "grad_norm": 0.5095410346984863, "learning_rate": 0.0005964930330169243, "loss": 1.9866, "step": 4393 }, { "epoch": 0.15, "grad_norm": 0.5537382960319519, "learning_rate": 0.0005964914393390109, "loss": 1.9476, "step": 4394 }, { "epoch": 0.15, "grad_norm": 0.5019834637641907, "learning_rate": 0.0005964898453012006, "loss": 1.9862, "step": 4395 }, { "epoch": 0.15, "grad_norm": 0.5073960423469543, "learning_rate": 0.0005964882509034953, "loss": 2.0463, "step": 4396 }, { "epoch": 0.15, "grad_norm": 0.4985153377056122, "learning_rate": 0.000596486656145897, "loss": 1.9789, "step": 4397 }, { "epoch": 0.15, "grad_norm": 0.48535671830177307, "learning_rate": 0.0005964850610284075, "loss": 1.9233, "step": 4398 }, { "epoch": 0.15, "grad_norm": 0.501432478427887, "learning_rate": 0.0005964834655510289, "loss": 2.0035, "step": 4399 }, { "epoch": 0.15, "grad_norm": 0.5117343664169312, "learning_rate": 0.000596481869713763, "loss": 2.0548, "step": 4400 }, { "epoch": 0.15, "grad_norm": 0.5072909593582153, "learning_rate": 0.0005964802735166118, "loss": 1.9342, "step": 4401 }, { "epoch": 0.15, "grad_norm": 0.5147688984870911, "learning_rate": 0.0005964786769595773, "loss": 1.9917, "step": 4402 }, { "epoch": 0.15, "grad_norm": 0.509254515171051, "learning_rate": 0.0005964770800426614, "loss": 2.0574, "step": 4403 }, { "epoch": 0.15, "grad_norm": 0.5267260074615479, "learning_rate": 0.0005964754827658659, "loss": 2.0072, "step": 4404 }, { "epoch": 0.15, "grad_norm": 0.49450254440307617, "learning_rate": 0.0005964738851291928, "loss": 1.9618, "step": 4405 }, { "epoch": 0.15, "grad_norm": 0.5195451378822327, "learning_rate": 0.0005964722871326442, "loss": 2.09, "step": 4406 }, { "epoch": 0.15, "grad_norm": 0.5120917558670044, "learning_rate": 0.0005964706887762219, "loss": 1.9888, "step": 4407 }, { "epoch": 0.15, "grad_norm": 0.502768337726593, "learning_rate": 0.0005964690900599279, "loss": 1.9634, "step": 4408 }, { "epoch": 0.15, "grad_norm": 0.5325756072998047, "learning_rate": 0.000596467490983764, "loss": 2.1158, "step": 4409 }, { "epoch": 0.15, "grad_norm": 0.5117133259773254, "learning_rate": 0.0005964658915477324, "loss": 1.9965, "step": 4410 }, { "epoch": 0.15, "grad_norm": 0.5303094387054443, "learning_rate": 0.0005964642917518348, "loss": 2.0225, "step": 4411 }, { "epoch": 0.15, "grad_norm": 0.5057080984115601, "learning_rate": 0.0005964626915960731, "loss": 2.0387, "step": 4412 }, { "epoch": 0.15, "grad_norm": 0.5241259932518005, "learning_rate": 0.0005964610910804497, "loss": 1.9867, "step": 4413 }, { "epoch": 0.15, "grad_norm": 0.5166147351264954, "learning_rate": 0.000596459490204966, "loss": 1.9756, "step": 4414 }, { "epoch": 0.15, "grad_norm": 0.5026525259017944, "learning_rate": 0.0005964578889696241, "loss": 1.979, "step": 4415 }, { "epoch": 0.15, "grad_norm": 0.5144497156143188, "learning_rate": 0.0005964562873744262, "loss": 2.0461, "step": 4416 }, { "epoch": 0.15, "grad_norm": 0.5192244648933411, "learning_rate": 0.0005964546854193739, "loss": 1.9982, "step": 4417 }, { "epoch": 0.15, "grad_norm": 0.4978957176208496, "learning_rate": 0.0005964530831044693, "loss": 1.9265, "step": 4418 }, { "epoch": 0.15, "grad_norm": 0.5167739987373352, "learning_rate": 0.0005964514804297145, "loss": 1.9486, "step": 4419 }, { "epoch": 0.15, "grad_norm": 0.5162400603294373, "learning_rate": 0.0005964498773951111, "loss": 2.0216, "step": 4420 }, { "epoch": 0.15, "grad_norm": 0.5125443935394287, "learning_rate": 0.0005964482740006612, "loss": 1.9896, "step": 4421 }, { "epoch": 0.15, "grad_norm": 0.5105890035629272, "learning_rate": 0.0005964466702463669, "loss": 1.963, "step": 4422 }, { "epoch": 0.15, "grad_norm": 0.49549755454063416, "learning_rate": 0.0005964450661322299, "loss": 1.9614, "step": 4423 }, { "epoch": 0.15, "grad_norm": 0.5178093910217285, "learning_rate": 0.0005964434616582524, "loss": 1.9683, "step": 4424 }, { "epoch": 0.15, "grad_norm": 0.5484389066696167, "learning_rate": 0.0005964418568244361, "loss": 2.0603, "step": 4425 }, { "epoch": 0.15, "grad_norm": 0.5058382749557495, "learning_rate": 0.0005964402516307832, "loss": 2.0098, "step": 4426 }, { "epoch": 0.15, "grad_norm": 0.5118047595024109, "learning_rate": 0.0005964386460772954, "loss": 2.0239, "step": 4427 }, { "epoch": 0.15, "grad_norm": 0.5202891826629639, "learning_rate": 0.0005964370401639749, "loss": 1.9892, "step": 4428 }, { "epoch": 0.15, "grad_norm": 0.5067073702812195, "learning_rate": 0.0005964354338908234, "loss": 2.0008, "step": 4429 }, { "epoch": 0.15, "grad_norm": 0.5134061574935913, "learning_rate": 0.0005964338272578429, "loss": 2.0136, "step": 4430 }, { "epoch": 0.15, "grad_norm": 0.5262011289596558, "learning_rate": 0.0005964322202650355, "loss": 2.0443, "step": 4431 }, { "epoch": 0.15, "grad_norm": 0.5084136724472046, "learning_rate": 0.0005964306129124031, "loss": 1.9681, "step": 4432 }, { "epoch": 0.15, "grad_norm": 0.5214352607727051, "learning_rate": 0.0005964290051999476, "loss": 1.9561, "step": 4433 }, { "epoch": 0.15, "grad_norm": 0.5192920565605164, "learning_rate": 0.0005964273971276709, "loss": 1.9508, "step": 4434 }, { "epoch": 0.15, "grad_norm": 0.49573859572410583, "learning_rate": 0.000596425788695575, "loss": 1.9997, "step": 4435 }, { "epoch": 0.15, "grad_norm": 0.5248598456382751, "learning_rate": 0.000596424179903662, "loss": 2.0127, "step": 4436 }, { "epoch": 0.15, "grad_norm": 0.507922351360321, "learning_rate": 0.0005964225707519336, "loss": 2.0021, "step": 4437 }, { "epoch": 0.15, "grad_norm": 0.5426156520843506, "learning_rate": 0.0005964209612403919, "loss": 1.9976, "step": 4438 }, { "epoch": 0.15, "grad_norm": 0.516208827495575, "learning_rate": 0.0005964193513690388, "loss": 1.9993, "step": 4439 }, { "epoch": 0.15, "grad_norm": 0.5142326951026917, "learning_rate": 0.0005964177411378763, "loss": 1.8716, "step": 4440 }, { "epoch": 0.15, "grad_norm": 0.6880238056182861, "learning_rate": 0.0005964161305469064, "loss": 2.0742, "step": 4441 }, { "epoch": 0.15, "grad_norm": 0.5071099996566772, "learning_rate": 0.000596414519596131, "loss": 2.0448, "step": 4442 }, { "epoch": 0.15, "grad_norm": 0.5077012181282043, "learning_rate": 0.000596412908285552, "loss": 2.0528, "step": 4443 }, { "epoch": 0.15, "grad_norm": 0.5185523629188538, "learning_rate": 0.0005964112966151714, "loss": 2.0437, "step": 4444 }, { "epoch": 0.15, "grad_norm": 0.500741183757782, "learning_rate": 0.0005964096845849911, "loss": 1.9618, "step": 4445 }, { "epoch": 0.15, "grad_norm": 0.5064855813980103, "learning_rate": 0.0005964080721950132, "loss": 2.0026, "step": 4446 }, { "epoch": 0.15, "grad_norm": 0.5037046670913696, "learning_rate": 0.0005964064594452396, "loss": 2.0326, "step": 4447 }, { "epoch": 0.15, "grad_norm": 0.5168583393096924, "learning_rate": 0.0005964048463356721, "loss": 1.994, "step": 4448 }, { "epoch": 0.15, "grad_norm": 0.5037394762039185, "learning_rate": 0.0005964032328663128, "loss": 2.0129, "step": 4449 }, { "epoch": 0.15, "grad_norm": 0.5202538967132568, "learning_rate": 0.0005964016190371639, "loss": 2.0072, "step": 4450 }, { "epoch": 0.15, "grad_norm": 0.5049629807472229, "learning_rate": 0.0005964000048482268, "loss": 2.0169, "step": 4451 }, { "epoch": 0.15, "grad_norm": 0.5060548186302185, "learning_rate": 0.0005963983902995039, "loss": 1.974, "step": 4452 }, { "epoch": 0.15, "grad_norm": 0.5112819075584412, "learning_rate": 0.0005963967753909971, "loss": 1.9575, "step": 4453 }, { "epoch": 0.15, "grad_norm": 0.5195221304893494, "learning_rate": 0.0005963951601227082, "loss": 2.0426, "step": 4454 }, { "epoch": 0.15, "grad_norm": 0.5249568223953247, "learning_rate": 0.0005963935444946392, "loss": 2.0033, "step": 4455 }, { "epoch": 0.15, "grad_norm": 0.50547856092453, "learning_rate": 0.0005963919285067921, "loss": 2.0162, "step": 4456 }, { "epoch": 0.15, "grad_norm": 0.506409227848053, "learning_rate": 0.0005963903121591691, "loss": 1.9725, "step": 4457 }, { "epoch": 0.15, "grad_norm": 0.5241072773933411, "learning_rate": 0.0005963886954517717, "loss": 1.9751, "step": 4458 }, { "epoch": 0.15, "grad_norm": 0.5096514225006104, "learning_rate": 0.0005963870783846022, "loss": 1.9952, "step": 4459 }, { "epoch": 0.15, "grad_norm": 0.4910796582698822, "learning_rate": 0.0005963854609576624, "loss": 1.9205, "step": 4460 }, { "epoch": 0.15, "grad_norm": 0.5130978226661682, "learning_rate": 0.0005963838431709543, "loss": 1.9716, "step": 4461 }, { "epoch": 0.15, "grad_norm": 0.5531437397003174, "learning_rate": 0.00059638222502448, "loss": 1.9969, "step": 4462 }, { "epoch": 0.15, "grad_norm": 0.5232175588607788, "learning_rate": 0.0005963806065182412, "loss": 1.9809, "step": 4463 }, { "epoch": 0.15, "grad_norm": 0.5404572486877441, "learning_rate": 0.0005963789876522401, "loss": 1.9995, "step": 4464 }, { "epoch": 0.15, "grad_norm": 0.5127017498016357, "learning_rate": 0.0005963773684264786, "loss": 2.0096, "step": 4465 }, { "epoch": 0.15, "grad_norm": 0.5116766691207886, "learning_rate": 0.0005963757488409585, "loss": 1.9745, "step": 4466 }, { "epoch": 0.15, "grad_norm": 0.5339583158493042, "learning_rate": 0.000596374128895682, "loss": 1.9547, "step": 4467 }, { "epoch": 0.15, "grad_norm": 0.5144294500350952, "learning_rate": 0.0005963725085906511, "loss": 2.1274, "step": 4468 }, { "epoch": 0.15, "grad_norm": 0.5210565328598022, "learning_rate": 0.0005963708879258675, "loss": 1.9629, "step": 4469 }, { "epoch": 0.15, "grad_norm": 0.5392456650733948, "learning_rate": 0.0005963692669013334, "loss": 1.9999, "step": 4470 }, { "epoch": 0.15, "grad_norm": 0.5155356526374817, "learning_rate": 0.0005963676455170506, "loss": 2.0287, "step": 4471 }, { "epoch": 0.15, "grad_norm": 0.5356101989746094, "learning_rate": 0.0005963660237730212, "loss": 1.9838, "step": 4472 }, { "epoch": 0.15, "grad_norm": 0.5319983959197998, "learning_rate": 0.0005963644016692471, "loss": 1.9485, "step": 4473 }, { "epoch": 0.15, "grad_norm": 0.5478166937828064, "learning_rate": 0.0005963627792057301, "loss": 2.0265, "step": 4474 }, { "epoch": 0.15, "grad_norm": 0.5396471619606018, "learning_rate": 0.0005963611563824726, "loss": 2.0064, "step": 4475 }, { "epoch": 0.15, "grad_norm": 0.5120327472686768, "learning_rate": 0.0005963595331994763, "loss": 2.023, "step": 4476 }, { "epoch": 0.15, "grad_norm": 0.515011727809906, "learning_rate": 0.0005963579096567431, "loss": 1.9268, "step": 4477 }, { "epoch": 0.15, "grad_norm": 0.5375447273254395, "learning_rate": 0.0005963562857542751, "loss": 1.8971, "step": 4478 }, { "epoch": 0.15, "grad_norm": 0.49820104241371155, "learning_rate": 0.0005963546614920742, "loss": 1.9955, "step": 4479 }, { "epoch": 0.15, "grad_norm": 0.5109092593193054, "learning_rate": 0.0005963530368701426, "loss": 1.9833, "step": 4480 }, { "epoch": 0.15, "grad_norm": 0.5531084537506104, "learning_rate": 0.0005963514118884818, "loss": 2.0604, "step": 4481 }, { "epoch": 0.15, "grad_norm": 0.518345296382904, "learning_rate": 0.0005963497865470942, "loss": 1.9317, "step": 4482 }, { "epoch": 0.15, "grad_norm": 0.5078663229942322, "learning_rate": 0.0005963481608459816, "loss": 1.979, "step": 4483 }, { "epoch": 0.15, "grad_norm": 0.5401236414909363, "learning_rate": 0.0005963465347851461, "loss": 2.0019, "step": 4484 }, { "epoch": 0.15, "grad_norm": 0.5061268210411072, "learning_rate": 0.0005963449083645893, "loss": 1.9929, "step": 4485 }, { "epoch": 0.15, "grad_norm": 0.511231005191803, "learning_rate": 0.0005963432815843137, "loss": 1.9528, "step": 4486 }, { "epoch": 0.15, "grad_norm": 0.5014404058456421, "learning_rate": 0.0005963416544443209, "loss": 1.968, "step": 4487 }, { "epoch": 0.15, "grad_norm": 0.5713531970977783, "learning_rate": 0.000596340026944613, "loss": 1.9097, "step": 4488 }, { "epoch": 0.15, "grad_norm": 0.5138887166976929, "learning_rate": 0.000596338399085192, "loss": 2.0137, "step": 4489 }, { "epoch": 0.15, "grad_norm": 0.5270929932594299, "learning_rate": 0.0005963367708660599, "loss": 2.0746, "step": 4490 }, { "epoch": 0.15, "grad_norm": 0.5477086901664734, "learning_rate": 0.0005963351422872186, "loss": 1.9609, "step": 4491 }, { "epoch": 0.15, "grad_norm": 0.5306520462036133, "learning_rate": 0.00059633351334867, "loss": 2.0418, "step": 4492 }, { "epoch": 0.15, "grad_norm": 0.5456824898719788, "learning_rate": 0.0005963318840504163, "loss": 2.0486, "step": 4493 }, { "epoch": 0.15, "grad_norm": 0.5156098008155823, "learning_rate": 0.0005963302543924592, "loss": 1.9378, "step": 4494 }, { "epoch": 0.15, "grad_norm": 0.5200397372245789, "learning_rate": 0.000596328624374801, "loss": 2.0689, "step": 4495 }, { "epoch": 0.15, "grad_norm": 0.5163953304290771, "learning_rate": 0.0005963269939974435, "loss": 2.0135, "step": 4496 }, { "epoch": 0.15, "grad_norm": 0.5161649584770203, "learning_rate": 0.0005963253632603887, "loss": 1.9506, "step": 4497 }, { "epoch": 0.15, "grad_norm": 0.4932371973991394, "learning_rate": 0.0005963237321636385, "loss": 1.9635, "step": 4498 }, { "epoch": 0.15, "grad_norm": 0.49074503779411316, "learning_rate": 0.0005963221007071949, "loss": 1.924, "step": 4499 }, { "epoch": 0.15, "grad_norm": 0.5258017182350159, "learning_rate": 0.00059632046889106, "loss": 1.9404, "step": 4500 }, { "epoch": 0.15, "grad_norm": 0.5069791078567505, "learning_rate": 0.0005963188367152357, "loss": 1.9763, "step": 4501 }, { "epoch": 0.15, "grad_norm": 0.5036969780921936, "learning_rate": 0.000596317204179724, "loss": 1.9812, "step": 4502 }, { "epoch": 0.15, "grad_norm": 0.500812292098999, "learning_rate": 0.0005963155712845269, "loss": 2.0587, "step": 4503 }, { "epoch": 0.15, "grad_norm": 0.5141744613647461, "learning_rate": 0.0005963139380296463, "loss": 2.0092, "step": 4504 }, { "epoch": 0.15, "grad_norm": 0.5826146602630615, "learning_rate": 0.0005963123044150843, "loss": 1.9362, "step": 4505 }, { "epoch": 0.15, "grad_norm": 0.52879399061203, "learning_rate": 0.0005963106704408428, "loss": 2.1442, "step": 4506 }, { "epoch": 0.15, "grad_norm": 0.518455982208252, "learning_rate": 0.0005963090361069237, "loss": 2.0302, "step": 4507 }, { "epoch": 0.15, "grad_norm": 0.5029184222221375, "learning_rate": 0.0005963074014133292, "loss": 1.9648, "step": 4508 }, { "epoch": 0.15, "grad_norm": 0.5108657479286194, "learning_rate": 0.0005963057663600611, "loss": 2.0108, "step": 4509 }, { "epoch": 0.15, "grad_norm": 0.48801520466804504, "learning_rate": 0.0005963041309471216, "loss": 1.9726, "step": 4510 }, { "epoch": 0.15, "grad_norm": 0.5285921096801758, "learning_rate": 0.0005963024951745124, "loss": 1.9315, "step": 4511 }, { "epoch": 0.15, "grad_norm": 0.5148741006851196, "learning_rate": 0.0005963008590422357, "loss": 2.0413, "step": 4512 }, { "epoch": 0.15, "grad_norm": 0.5015733242034912, "learning_rate": 0.0005962992225502935, "loss": 1.9754, "step": 4513 }, { "epoch": 0.15, "grad_norm": 0.5702207684516907, "learning_rate": 0.0005962975856986875, "loss": 2.0245, "step": 4514 }, { "epoch": 0.15, "grad_norm": 0.501375138759613, "learning_rate": 0.00059629594848742, "loss": 2.0042, "step": 4515 }, { "epoch": 0.15, "grad_norm": 0.5051064491271973, "learning_rate": 0.0005962943109164929, "loss": 1.9781, "step": 4516 }, { "epoch": 0.15, "grad_norm": 0.4887128472328186, "learning_rate": 0.0005962926729859081, "loss": 2.0424, "step": 4517 }, { "epoch": 0.15, "grad_norm": 0.5140904188156128, "learning_rate": 0.0005962910346956677, "loss": 2.0292, "step": 4518 }, { "epoch": 0.15, "grad_norm": 0.5183480978012085, "learning_rate": 0.0005962893960457736, "loss": 1.9681, "step": 4519 }, { "epoch": 0.15, "grad_norm": 0.5115302205085754, "learning_rate": 0.0005962877570362278, "loss": 2.0407, "step": 4520 }, { "epoch": 0.15, "grad_norm": 0.5075383186340332, "learning_rate": 0.0005962861176670325, "loss": 2.0295, "step": 4521 }, { "epoch": 0.15, "grad_norm": 0.5220164060592651, "learning_rate": 0.0005962844779381893, "loss": 2.0727, "step": 4522 }, { "epoch": 0.15, "grad_norm": 0.5302513837814331, "learning_rate": 0.0005962828378497005, "loss": 2.0013, "step": 4523 }, { "epoch": 0.15, "grad_norm": 0.4964735805988312, "learning_rate": 0.000596281197401568, "loss": 1.9886, "step": 4524 }, { "epoch": 0.15, "grad_norm": 0.5062999725341797, "learning_rate": 0.0005962795565937937, "loss": 2.0175, "step": 4525 }, { "epoch": 0.15, "grad_norm": 0.5455882549285889, "learning_rate": 0.0005962779154263798, "loss": 1.9413, "step": 4526 }, { "epoch": 0.15, "grad_norm": 0.4973060190677643, "learning_rate": 0.0005962762738993281, "loss": 1.9751, "step": 4527 }, { "epoch": 0.15, "grad_norm": 0.5084576606750488, "learning_rate": 0.0005962746320126406, "loss": 2.059, "step": 4528 }, { "epoch": 0.15, "grad_norm": 0.5208420157432556, "learning_rate": 0.0005962729897663194, "loss": 1.9088, "step": 4529 }, { "epoch": 0.15, "grad_norm": 0.5075984597206116, "learning_rate": 0.0005962713471603665, "loss": 1.9667, "step": 4530 }, { "epoch": 0.15, "grad_norm": 0.4997411072254181, "learning_rate": 0.0005962697041947837, "loss": 2.027, "step": 4531 }, { "epoch": 0.15, "grad_norm": 0.5166957974433899, "learning_rate": 0.0005962680608695732, "loss": 1.9364, "step": 4532 }, { "epoch": 0.15, "grad_norm": 0.5037457942962646, "learning_rate": 0.000596266417184737, "loss": 1.8951, "step": 4533 }, { "epoch": 0.15, "grad_norm": 0.5091062188148499, "learning_rate": 0.000596264773140277, "loss": 2.0408, "step": 4534 }, { "epoch": 0.15, "grad_norm": 0.5166646838188171, "learning_rate": 0.0005962631287361952, "loss": 1.9401, "step": 4535 }, { "epoch": 0.15, "grad_norm": 0.5264691114425659, "learning_rate": 0.0005962614839724935, "loss": 2.0805, "step": 4536 }, { "epoch": 0.15, "grad_norm": 0.5434930324554443, "learning_rate": 0.0005962598388491741, "loss": 1.9968, "step": 4537 }, { "epoch": 0.15, "grad_norm": 0.532484233379364, "learning_rate": 0.0005962581933662389, "loss": 2.0234, "step": 4538 }, { "epoch": 0.15, "grad_norm": 0.5595505237579346, "learning_rate": 0.0005962565475236899, "loss": 2.0875, "step": 4539 }, { "epoch": 0.15, "grad_norm": 0.5218607187271118, "learning_rate": 0.0005962549013215292, "loss": 2.0133, "step": 4540 }, { "epoch": 0.15, "grad_norm": 0.5066594481468201, "learning_rate": 0.0005962532547597586, "loss": 1.992, "step": 4541 }, { "epoch": 0.15, "grad_norm": 0.5263765454292297, "learning_rate": 0.0005962516078383802, "loss": 1.936, "step": 4542 }, { "epoch": 0.15, "grad_norm": 0.5191621780395508, "learning_rate": 0.0005962499605573961, "loss": 1.9578, "step": 4543 }, { "epoch": 0.15, "grad_norm": 0.5048368573188782, "learning_rate": 0.0005962483129168081, "loss": 2.0108, "step": 4544 }, { "epoch": 0.15, "grad_norm": 0.519954264163971, "learning_rate": 0.0005962466649166182, "loss": 2.0259, "step": 4545 }, { "epoch": 0.15, "grad_norm": 0.5162661075592041, "learning_rate": 0.0005962450165568287, "loss": 1.9783, "step": 4546 }, { "epoch": 0.15, "grad_norm": 0.5159627199172974, "learning_rate": 0.0005962433678374413, "loss": 1.9919, "step": 4547 }, { "epoch": 0.15, "grad_norm": 0.539540708065033, "learning_rate": 0.0005962417187584581, "loss": 1.8962, "step": 4548 }, { "epoch": 0.15, "grad_norm": 0.511191189289093, "learning_rate": 0.0005962400693198812, "loss": 2.0437, "step": 4549 }, { "epoch": 0.15, "grad_norm": 0.5121882557868958, "learning_rate": 0.0005962384195217123, "loss": 2.0469, "step": 4550 }, { "epoch": 0.15, "grad_norm": 0.519080638885498, "learning_rate": 0.0005962367693639537, "loss": 1.9173, "step": 4551 }, { "epoch": 0.15, "grad_norm": 0.515273928642273, "learning_rate": 0.0005962351188466074, "loss": 2.0055, "step": 4552 }, { "epoch": 0.15, "grad_norm": 0.5125707387924194, "learning_rate": 0.0005962334679696752, "loss": 2.0031, "step": 4553 }, { "epoch": 0.15, "grad_norm": 0.5048773288726807, "learning_rate": 0.0005962318167331593, "loss": 1.9745, "step": 4554 }, { "epoch": 0.15, "grad_norm": 0.5028719902038574, "learning_rate": 0.0005962301651370617, "loss": 1.985, "step": 4555 }, { "epoch": 0.15, "grad_norm": 0.5257272124290466, "learning_rate": 0.0005962285131813842, "loss": 1.9399, "step": 4556 }, { "epoch": 0.15, "grad_norm": 0.5296308398246765, "learning_rate": 0.000596226860866129, "loss": 2.0499, "step": 4557 }, { "epoch": 0.15, "grad_norm": 0.5152416825294495, "learning_rate": 0.0005962252081912979, "loss": 1.9554, "step": 4558 }, { "epoch": 0.15, "grad_norm": 0.6223500967025757, "learning_rate": 0.0005962235551568931, "loss": 2.1434, "step": 4559 }, { "epoch": 0.15, "grad_norm": 0.522706151008606, "learning_rate": 0.0005962219017629166, "loss": 1.8993, "step": 4560 }, { "epoch": 0.15, "grad_norm": 0.5280360579490662, "learning_rate": 0.0005962202480093704, "loss": 2.0216, "step": 4561 }, { "epoch": 0.15, "grad_norm": 0.5381008386611938, "learning_rate": 0.0005962185938962564, "loss": 2.0267, "step": 4562 }, { "epoch": 0.15, "grad_norm": 0.5183349251747131, "learning_rate": 0.0005962169394235766, "loss": 2.0371, "step": 4563 }, { "epoch": 0.15, "grad_norm": 0.5112417340278625, "learning_rate": 0.0005962152845913332, "loss": 1.9433, "step": 4564 }, { "epoch": 0.15, "grad_norm": 0.5242570638656616, "learning_rate": 0.000596213629399528, "loss": 2.0183, "step": 4565 }, { "epoch": 0.15, "grad_norm": 0.5375418066978455, "learning_rate": 0.0005962119738481632, "loss": 1.9348, "step": 4566 }, { "epoch": 0.15, "grad_norm": 0.5108201503753662, "learning_rate": 0.0005962103179372407, "loss": 1.9287, "step": 4567 }, { "epoch": 0.15, "grad_norm": 0.5249975919723511, "learning_rate": 0.0005962086616667625, "loss": 2.0639, "step": 4568 }, { "epoch": 0.15, "grad_norm": 0.5319742560386658, "learning_rate": 0.0005962070050367305, "loss": 1.8888, "step": 4569 }, { "epoch": 0.15, "grad_norm": 0.5494804978370667, "learning_rate": 0.0005962053480471469, "loss": 2.0321, "step": 4570 }, { "epoch": 0.15, "grad_norm": 0.5447701811790466, "learning_rate": 0.0005962036906980138, "loss": 2.0399, "step": 4571 }, { "epoch": 0.15, "grad_norm": 0.5092102885246277, "learning_rate": 0.0005962020329893329, "loss": 2.012, "step": 4572 }, { "epoch": 0.15, "grad_norm": 0.5737276077270508, "learning_rate": 0.0005962003749211065, "loss": 2.0268, "step": 4573 }, { "epoch": 0.15, "grad_norm": 0.566455066204071, "learning_rate": 0.0005961987164933364, "loss": 1.9138, "step": 4574 }, { "epoch": 0.15, "grad_norm": 0.5320468544960022, "learning_rate": 0.0005961970577060247, "loss": 2.0504, "step": 4575 }, { "epoch": 0.15, "grad_norm": 1.3533399105072021, "learning_rate": 0.0005961953985591735, "loss": 2.0079, "step": 4576 }, { "epoch": 0.15, "grad_norm": 0.535760223865509, "learning_rate": 0.0005961937390527846, "loss": 2.0613, "step": 4577 }, { "epoch": 0.15, "grad_norm": 0.5295723080635071, "learning_rate": 0.0005961920791868601, "loss": 1.9648, "step": 4578 }, { "epoch": 0.15, "grad_norm": 0.5151484608650208, "learning_rate": 0.0005961904189614023, "loss": 1.9689, "step": 4579 }, { "epoch": 0.15, "grad_norm": 0.5190231204032898, "learning_rate": 0.0005961887583764129, "loss": 1.9322, "step": 4580 }, { "epoch": 0.15, "grad_norm": 0.5197441577911377, "learning_rate": 0.0005961870974318939, "loss": 2.0009, "step": 4581 }, { "epoch": 0.15, "grad_norm": 0.5172673463821411, "learning_rate": 0.0005961854361278474, "loss": 2.0013, "step": 4582 }, { "epoch": 0.15, "grad_norm": 0.4941076636314392, "learning_rate": 0.0005961837744642755, "loss": 1.9545, "step": 4583 }, { "epoch": 0.15, "grad_norm": 0.4923766255378723, "learning_rate": 0.0005961821124411801, "loss": 2.0197, "step": 4584 }, { "epoch": 0.15, "grad_norm": 0.5068364143371582, "learning_rate": 0.0005961804500585632, "loss": 2.1241, "step": 4585 }, { "epoch": 0.15, "grad_norm": 0.5018123984336853, "learning_rate": 0.000596178787316427, "loss": 1.9829, "step": 4586 }, { "epoch": 0.15, "grad_norm": 0.5016072392463684, "learning_rate": 0.0005961771242147733, "loss": 1.9295, "step": 4587 }, { "epoch": 0.15, "grad_norm": 0.49269822239875793, "learning_rate": 0.0005961754607536041, "loss": 1.9377, "step": 4588 }, { "epoch": 0.15, "grad_norm": 0.5116567015647888, "learning_rate": 0.0005961737969329218, "loss": 2.007, "step": 4589 }, { "epoch": 0.15, "grad_norm": 0.5080772042274475, "learning_rate": 0.0005961721327527279, "loss": 2.0508, "step": 4590 }, { "epoch": 0.15, "grad_norm": 0.5081613063812256, "learning_rate": 0.0005961704682130248, "loss": 1.9387, "step": 4591 }, { "epoch": 0.15, "grad_norm": 0.5008671283721924, "learning_rate": 0.0005961688033138144, "loss": 1.9339, "step": 4592 }, { "epoch": 0.15, "grad_norm": 0.5182155966758728, "learning_rate": 0.0005961671380550987, "loss": 1.9354, "step": 4593 }, { "epoch": 0.15, "grad_norm": 0.5088204145431519, "learning_rate": 0.0005961654724368796, "loss": 1.9797, "step": 4594 }, { "epoch": 0.15, "grad_norm": 0.49889203906059265, "learning_rate": 0.0005961638064591595, "loss": 1.9457, "step": 4595 }, { "epoch": 0.15, "grad_norm": 0.5085194110870361, "learning_rate": 0.00059616214012194, "loss": 1.9823, "step": 4596 }, { "epoch": 0.15, "grad_norm": 0.497626930475235, "learning_rate": 0.0005961604734252234, "loss": 1.9642, "step": 4597 }, { "epoch": 0.15, "grad_norm": 0.5246692895889282, "learning_rate": 0.0005961588063690116, "loss": 2.0395, "step": 4598 }, { "epoch": 0.15, "grad_norm": 0.49943023920059204, "learning_rate": 0.0005961571389533065, "loss": 1.9158, "step": 4599 }, { "epoch": 0.15, "grad_norm": 0.5316285490989685, "learning_rate": 0.0005961554711781103, "loss": 2.1017, "step": 4600 }, { "epoch": 0.15, "grad_norm": 0.5368388295173645, "learning_rate": 0.0005961538030434251, "loss": 2.0294, "step": 4601 }, { "epoch": 0.15, "grad_norm": 0.5238897204399109, "learning_rate": 0.0005961521345492529, "loss": 1.9979, "step": 4602 }, { "epoch": 0.15, "grad_norm": 0.5146443247795105, "learning_rate": 0.0005961504656955954, "loss": 2.1171, "step": 4603 }, { "epoch": 0.15, "grad_norm": 0.49720171093940735, "learning_rate": 0.0005961487964824551, "loss": 1.955, "step": 4604 }, { "epoch": 0.15, "grad_norm": 0.5107969641685486, "learning_rate": 0.0005961471269098338, "loss": 1.9656, "step": 4605 }, { "epoch": 0.15, "grad_norm": 0.5271592736244202, "learning_rate": 0.0005961454569777333, "loss": 1.972, "step": 4606 }, { "epoch": 0.15, "grad_norm": 0.5167983770370483, "learning_rate": 0.0005961437866861559, "loss": 1.9753, "step": 4607 }, { "epoch": 0.15, "grad_norm": 0.5252718925476074, "learning_rate": 0.0005961421160351038, "loss": 1.9909, "step": 4608 }, { "epoch": 0.15, "grad_norm": 0.5369163155555725, "learning_rate": 0.0005961404450245786, "loss": 2.06, "step": 4609 }, { "epoch": 0.15, "grad_norm": 0.5069037079811096, "learning_rate": 0.0005961387736545826, "loss": 1.8122, "step": 4610 }, { "epoch": 0.15, "grad_norm": 0.5074437260627747, "learning_rate": 0.0005961371019251178, "loss": 2.0132, "step": 4611 }, { "epoch": 0.15, "grad_norm": 0.5102118253707886, "learning_rate": 0.0005961354298361861, "loss": 2.0217, "step": 4612 }, { "epoch": 0.15, "grad_norm": 0.5146315693855286, "learning_rate": 0.0005961337573877896, "loss": 1.9921, "step": 4613 }, { "epoch": 0.15, "grad_norm": 0.527175784111023, "learning_rate": 0.0005961320845799305, "loss": 2.0007, "step": 4614 }, { "epoch": 0.15, "grad_norm": 0.5159717202186584, "learning_rate": 0.0005961304114126106, "loss": 2.0168, "step": 4615 }, { "epoch": 0.15, "grad_norm": 0.5066176652908325, "learning_rate": 0.0005961287378858321, "loss": 2.005, "step": 4616 }, { "epoch": 0.15, "grad_norm": 0.5030300617218018, "learning_rate": 0.0005961270639995969, "loss": 1.9581, "step": 4617 }, { "epoch": 0.15, "grad_norm": 0.5044295191764832, "learning_rate": 0.0005961253897539069, "loss": 1.9739, "step": 4618 }, { "epoch": 0.15, "grad_norm": 0.5124224424362183, "learning_rate": 0.0005961237151487645, "loss": 1.9942, "step": 4619 }, { "epoch": 0.15, "grad_norm": 0.5007057785987854, "learning_rate": 0.0005961220401841715, "loss": 2.0374, "step": 4620 }, { "epoch": 0.15, "grad_norm": 0.7117940187454224, "learning_rate": 0.00059612036486013, "loss": 2.0323, "step": 4621 }, { "epoch": 0.15, "grad_norm": 0.5066627860069275, "learning_rate": 0.0005961186891766419, "loss": 1.9847, "step": 4622 }, { "epoch": 0.15, "grad_norm": 0.5060510039329529, "learning_rate": 0.0005961170131337094, "loss": 1.9603, "step": 4623 }, { "epoch": 0.15, "grad_norm": 0.5154176950454712, "learning_rate": 0.0005961153367313345, "loss": 2.0388, "step": 4624 }, { "epoch": 0.15, "grad_norm": 0.5002394914627075, "learning_rate": 0.0005961136599695193, "loss": 1.9907, "step": 4625 }, { "epoch": 0.15, "grad_norm": 0.5214799046516418, "learning_rate": 0.0005961119828482656, "loss": 1.9913, "step": 4626 }, { "epoch": 0.15, "grad_norm": 0.5015985369682312, "learning_rate": 0.0005961103053675757, "loss": 2.1041, "step": 4627 }, { "epoch": 0.15, "grad_norm": 0.5451866388320923, "learning_rate": 0.0005961086275274514, "loss": 1.9667, "step": 4628 }, { "epoch": 0.15, "grad_norm": 0.5019696950912476, "learning_rate": 0.0005961069493278948, "loss": 2.0083, "step": 4629 }, { "epoch": 0.15, "grad_norm": 0.5067735314369202, "learning_rate": 0.0005961052707689083, "loss": 2.0203, "step": 4630 }, { "epoch": 0.15, "grad_norm": 0.5312592387199402, "learning_rate": 0.0005961035918504933, "loss": 2.0344, "step": 4631 }, { "epoch": 0.15, "grad_norm": 0.49509817361831665, "learning_rate": 0.0005961019125726524, "loss": 1.9497, "step": 4632 }, { "epoch": 0.15, "grad_norm": 0.49943509697914124, "learning_rate": 0.0005961002329353872, "loss": 2.0115, "step": 4633 }, { "epoch": 0.15, "grad_norm": 0.4969935715198517, "learning_rate": 0.0005960985529387001, "loss": 2.0698, "step": 4634 }, { "epoch": 0.15, "grad_norm": 0.5129790306091309, "learning_rate": 0.0005960968725825929, "loss": 1.9628, "step": 4635 }, { "epoch": 0.15, "grad_norm": 0.5121617913246155, "learning_rate": 0.0005960951918670677, "loss": 2.0095, "step": 4636 }, { "epoch": 0.15, "grad_norm": 0.5361694693565369, "learning_rate": 0.0005960935107921267, "loss": 2.0502, "step": 4637 }, { "epoch": 0.15, "grad_norm": 0.5073434710502625, "learning_rate": 0.0005960918293577717, "loss": 2.0562, "step": 4638 }, { "epoch": 0.15, "grad_norm": 0.5194659233093262, "learning_rate": 0.0005960901475640049, "loss": 1.9591, "step": 4639 }, { "epoch": 0.15, "grad_norm": 0.5150894522666931, "learning_rate": 0.0005960884654108283, "loss": 1.9653, "step": 4640 }, { "epoch": 0.15, "grad_norm": 0.526678740978241, "learning_rate": 0.0005960867828982439, "loss": 1.983, "step": 4641 }, { "epoch": 0.15, "grad_norm": 0.5104970932006836, "learning_rate": 0.0005960851000262538, "loss": 2.0374, "step": 4642 }, { "epoch": 0.15, "grad_norm": 0.5184699892997742, "learning_rate": 0.00059608341679486, "loss": 2.0321, "step": 4643 }, { "epoch": 0.15, "grad_norm": 0.5232757329940796, "learning_rate": 0.0005960817332040646, "loss": 1.9053, "step": 4644 }, { "epoch": 0.15, "grad_norm": 0.5049563050270081, "learning_rate": 0.0005960800492538695, "loss": 2.0208, "step": 4645 }, { "epoch": 0.15, "grad_norm": 0.5153092741966248, "learning_rate": 0.0005960783649442767, "loss": 2.0319, "step": 4646 }, { "epoch": 0.15, "grad_norm": 0.5041597485542297, "learning_rate": 0.0005960766802752888, "loss": 1.9917, "step": 4647 }, { "epoch": 0.15, "grad_norm": 0.4992857575416565, "learning_rate": 0.0005960749952469071, "loss": 1.9565, "step": 4648 }, { "epoch": 0.15, "grad_norm": 0.49445852637290955, "learning_rate": 0.000596073309859134, "loss": 1.9762, "step": 4649 }, { "epoch": 0.15, "grad_norm": 0.49623027443885803, "learning_rate": 0.0005960716241119717, "loss": 1.9745, "step": 4650 }, { "epoch": 0.15, "grad_norm": 0.4871533215045929, "learning_rate": 0.000596069938005422, "loss": 2.0529, "step": 4651 }, { "epoch": 0.15, "grad_norm": 0.49629950523376465, "learning_rate": 0.000596068251539487, "loss": 2.0681, "step": 4652 }, { "epoch": 0.15, "grad_norm": 0.5184028744697571, "learning_rate": 0.0005960665647141688, "loss": 2.0446, "step": 4653 }, { "epoch": 0.15, "grad_norm": 0.5079672336578369, "learning_rate": 0.0005960648775294693, "loss": 1.917, "step": 4654 }, { "epoch": 0.15, "grad_norm": 0.5032964944839478, "learning_rate": 0.0005960631899853909, "loss": 1.9962, "step": 4655 }, { "epoch": 0.15, "grad_norm": 0.5015363097190857, "learning_rate": 0.0005960615020819352, "loss": 2.0372, "step": 4656 }, { "epoch": 0.15, "grad_norm": 0.5070124268531799, "learning_rate": 0.0005960598138191045, "loss": 1.9226, "step": 4657 }, { "epoch": 0.15, "grad_norm": 0.5172252655029297, "learning_rate": 0.0005960581251969008, "loss": 2.0598, "step": 4658 }, { "epoch": 0.16, "grad_norm": 0.5055075287818909, "learning_rate": 0.0005960564362153262, "loss": 2.0279, "step": 4659 }, { "epoch": 0.16, "grad_norm": 0.49408775568008423, "learning_rate": 0.0005960547468743827, "loss": 1.936, "step": 4660 }, { "epoch": 0.16, "grad_norm": 0.5199975371360779, "learning_rate": 0.0005960530571740724, "loss": 2.0313, "step": 4661 }, { "epoch": 0.16, "grad_norm": 0.48981961607933044, "learning_rate": 0.0005960513671143973, "loss": 1.9338, "step": 4662 }, { "epoch": 0.16, "grad_norm": 0.4954701364040375, "learning_rate": 0.0005960496766953594, "loss": 1.9165, "step": 4663 }, { "epoch": 0.16, "grad_norm": 0.5142377614974976, "learning_rate": 0.0005960479859169609, "loss": 1.9349, "step": 4664 }, { "epoch": 0.16, "grad_norm": 0.5183415412902832, "learning_rate": 0.0005960462947792036, "loss": 2.0313, "step": 4665 }, { "epoch": 0.16, "grad_norm": 0.5047487020492554, "learning_rate": 0.0005960446032820899, "loss": 1.9669, "step": 4666 }, { "epoch": 0.16, "grad_norm": 0.4933851957321167, "learning_rate": 0.0005960429114256216, "loss": 2.0283, "step": 4667 }, { "epoch": 0.16, "grad_norm": 0.5149619579315186, "learning_rate": 0.0005960412192098008, "loss": 1.9499, "step": 4668 }, { "epoch": 0.16, "grad_norm": 0.526492178440094, "learning_rate": 0.0005960395266346296, "loss": 2.0079, "step": 4669 }, { "epoch": 0.16, "grad_norm": 0.5424990653991699, "learning_rate": 0.00059603783370011, "loss": 2.0767, "step": 4670 }, { "epoch": 0.16, "grad_norm": 0.5665566325187683, "learning_rate": 0.0005960361404062442, "loss": 2.0229, "step": 4671 }, { "epoch": 0.16, "grad_norm": 0.4891148805618286, "learning_rate": 0.0005960344467530339, "loss": 1.9494, "step": 4672 }, { "epoch": 0.16, "grad_norm": 0.5357549786567688, "learning_rate": 0.0005960327527404816, "loss": 1.9589, "step": 4673 }, { "epoch": 0.16, "grad_norm": 0.5139035582542419, "learning_rate": 0.0005960310583685893, "loss": 1.9951, "step": 4674 }, { "epoch": 0.16, "grad_norm": 0.4998663365840912, "learning_rate": 0.0005960293636373587, "loss": 2.0226, "step": 4675 }, { "epoch": 0.16, "grad_norm": 0.5213618278503418, "learning_rate": 0.0005960276685467921, "loss": 1.9504, "step": 4676 }, { "epoch": 0.16, "grad_norm": 0.4993382394313812, "learning_rate": 0.0005960259730968915, "loss": 1.9449, "step": 4677 }, { "epoch": 0.16, "grad_norm": 0.5096080899238586, "learning_rate": 0.000596024277287659, "loss": 1.9502, "step": 4678 }, { "epoch": 0.16, "grad_norm": 0.5343903303146362, "learning_rate": 0.0005960225811190967, "loss": 2.0017, "step": 4679 }, { "epoch": 0.16, "grad_norm": 0.513977587223053, "learning_rate": 0.0005960208845912065, "loss": 1.9754, "step": 4680 }, { "epoch": 0.16, "grad_norm": 0.5102706551551819, "learning_rate": 0.0005960191877039907, "loss": 1.9775, "step": 4681 }, { "epoch": 0.16, "grad_norm": 0.5114163160324097, "learning_rate": 0.0005960174904574511, "loss": 1.9127, "step": 4682 }, { "epoch": 0.16, "grad_norm": 0.5063499212265015, "learning_rate": 0.00059601579285159, "loss": 2.0247, "step": 4683 }, { "epoch": 0.16, "grad_norm": 0.5023273229598999, "learning_rate": 0.0005960140948864092, "loss": 2.0004, "step": 4684 }, { "epoch": 0.16, "grad_norm": 0.4965467154979706, "learning_rate": 0.0005960123965619111, "loss": 1.9638, "step": 4685 }, { "epoch": 0.16, "grad_norm": 0.4973315894603729, "learning_rate": 0.0005960106978780973, "loss": 1.9795, "step": 4686 }, { "epoch": 0.16, "grad_norm": 0.528002142906189, "learning_rate": 0.0005960089988349703, "loss": 2.025, "step": 4687 }, { "epoch": 0.16, "grad_norm": 0.5108749270439148, "learning_rate": 0.0005960072994325321, "loss": 2.0215, "step": 4688 }, { "epoch": 0.16, "grad_norm": 0.49867939949035645, "learning_rate": 0.0005960055996707845, "loss": 1.9788, "step": 4689 }, { "epoch": 0.16, "grad_norm": 0.49278712272644043, "learning_rate": 0.0005960038995497297, "loss": 2.0221, "step": 4690 }, { "epoch": 0.16, "grad_norm": 0.5047951340675354, "learning_rate": 0.0005960021990693698, "loss": 1.9211, "step": 4691 }, { "epoch": 0.16, "grad_norm": 0.5067641735076904, "learning_rate": 0.0005960004982297069, "loss": 1.963, "step": 4692 }, { "epoch": 0.16, "grad_norm": 0.49945953488349915, "learning_rate": 0.000595998797030743, "loss": 2.0097, "step": 4693 }, { "epoch": 0.16, "grad_norm": 0.4884435534477234, "learning_rate": 0.0005959970954724802, "loss": 2.0051, "step": 4694 }, { "epoch": 0.16, "grad_norm": 0.5116365551948547, "learning_rate": 0.0005959953935549204, "loss": 2.0357, "step": 4695 }, { "epoch": 0.16, "grad_norm": 0.514030396938324, "learning_rate": 0.0005959936912780659, "loss": 1.9951, "step": 4696 }, { "epoch": 0.16, "grad_norm": 0.4924645721912384, "learning_rate": 0.0005959919886419187, "loss": 1.9158, "step": 4697 }, { "epoch": 0.16, "grad_norm": 0.516842782497406, "learning_rate": 0.0005959902856464808, "loss": 2.0099, "step": 4698 }, { "epoch": 0.16, "grad_norm": 0.5293101072311401, "learning_rate": 0.0005959885822917543, "loss": 2.0015, "step": 4699 }, { "epoch": 0.16, "grad_norm": 0.4996415376663208, "learning_rate": 0.0005959868785777412, "loss": 1.9291, "step": 4700 }, { "epoch": 0.16, "grad_norm": 0.5276106595993042, "learning_rate": 0.0005959851745044438, "loss": 2.0448, "step": 4701 }, { "epoch": 0.16, "grad_norm": 0.5483736395835876, "learning_rate": 0.0005959834700718639, "loss": 1.8836, "step": 4702 }, { "epoch": 0.16, "grad_norm": 0.521117091178894, "learning_rate": 0.0005959817652800037, "loss": 1.8728, "step": 4703 }, { "epoch": 0.16, "grad_norm": 0.5156396627426147, "learning_rate": 0.0005959800601288653, "loss": 1.964, "step": 4704 }, { "epoch": 0.16, "grad_norm": 0.6286424398422241, "learning_rate": 0.0005959783546184506, "loss": 2.0442, "step": 4705 }, { "epoch": 0.16, "grad_norm": 0.5474548935890198, "learning_rate": 0.0005959766487487619, "loss": 1.9539, "step": 4706 }, { "epoch": 0.16, "grad_norm": 0.5186886191368103, "learning_rate": 0.0005959749425198011, "loss": 1.9828, "step": 4707 }, { "epoch": 0.16, "grad_norm": 0.5755308270454407, "learning_rate": 0.0005959732359315704, "loss": 1.9672, "step": 4708 }, { "epoch": 0.16, "grad_norm": 0.5395040512084961, "learning_rate": 0.0005959715289840718, "loss": 2.0223, "step": 4709 }, { "epoch": 0.16, "grad_norm": 0.5774486064910889, "learning_rate": 0.0005959698216773073, "loss": 1.9861, "step": 4710 }, { "epoch": 0.16, "grad_norm": 0.5407103300094604, "learning_rate": 0.000595968114011279, "loss": 1.9999, "step": 4711 }, { "epoch": 0.16, "grad_norm": 0.5187853574752808, "learning_rate": 0.0005959664059859891, "loss": 1.9702, "step": 4712 }, { "epoch": 0.16, "grad_norm": 0.5377817749977112, "learning_rate": 0.0005959646976014395, "loss": 2.0392, "step": 4713 }, { "epoch": 0.16, "grad_norm": 0.5049208402633667, "learning_rate": 0.0005959629888576326, "loss": 1.9386, "step": 4714 }, { "epoch": 0.16, "grad_norm": 0.5417398810386658, "learning_rate": 0.0005959612797545701, "loss": 2.0141, "step": 4715 }, { "epoch": 0.16, "grad_norm": 0.5150478482246399, "learning_rate": 0.0005959595702922541, "loss": 2.0604, "step": 4716 }, { "epoch": 0.16, "grad_norm": 0.5283235907554626, "learning_rate": 0.0005959578604706869, "loss": 2.0849, "step": 4717 }, { "epoch": 0.16, "grad_norm": 0.5024678111076355, "learning_rate": 0.0005959561502898705, "loss": 1.9775, "step": 4718 }, { "epoch": 0.16, "grad_norm": 0.521568775177002, "learning_rate": 0.0005959544397498069, "loss": 1.9984, "step": 4719 }, { "epoch": 0.16, "grad_norm": 0.49576351046562195, "learning_rate": 0.0005959527288504981, "loss": 2.0528, "step": 4720 }, { "epoch": 0.16, "grad_norm": 0.5233323574066162, "learning_rate": 0.0005959510175919465, "loss": 2.0042, "step": 4721 }, { "epoch": 0.16, "grad_norm": 0.5165610313415527, "learning_rate": 0.0005959493059741539, "loss": 1.9667, "step": 4722 }, { "epoch": 0.16, "grad_norm": 0.5036175847053528, "learning_rate": 0.0005959475939971224, "loss": 1.9784, "step": 4723 }, { "epoch": 0.16, "grad_norm": 0.5109030604362488, "learning_rate": 0.0005959458816608542, "loss": 2.0066, "step": 4724 }, { "epoch": 0.16, "grad_norm": 0.5089350938796997, "learning_rate": 0.0005959441689653512, "loss": 1.9329, "step": 4725 }, { "epoch": 0.16, "grad_norm": 0.493431955575943, "learning_rate": 0.0005959424559106157, "loss": 1.8837, "step": 4726 }, { "epoch": 0.16, "grad_norm": 0.5124140977859497, "learning_rate": 0.0005959407424966495, "loss": 1.9582, "step": 4727 }, { "epoch": 0.16, "grad_norm": 0.5131300091743469, "learning_rate": 0.000595939028723455, "loss": 1.991, "step": 4728 }, { "epoch": 0.16, "grad_norm": 0.5205848813056946, "learning_rate": 0.000595937314591034, "loss": 1.9969, "step": 4729 }, { "epoch": 0.16, "grad_norm": 0.592516303062439, "learning_rate": 0.0005959356000993888, "loss": 1.9617, "step": 4730 }, { "epoch": 0.16, "grad_norm": 0.4963110387325287, "learning_rate": 0.0005959338852485214, "loss": 1.9885, "step": 4731 }, { "epoch": 0.16, "grad_norm": 0.5345907211303711, "learning_rate": 0.0005959321700384338, "loss": 2.0038, "step": 4732 }, { "epoch": 0.16, "grad_norm": 0.5042508840560913, "learning_rate": 0.0005959304544691282, "loss": 2.0159, "step": 4733 }, { "epoch": 0.16, "grad_norm": 0.542580783367157, "learning_rate": 0.0005959287385406066, "loss": 1.9874, "step": 4734 }, { "epoch": 0.16, "grad_norm": 0.5232778787612915, "learning_rate": 0.0005959270222528712, "loss": 1.9605, "step": 4735 }, { "epoch": 0.16, "grad_norm": 0.5323865413665771, "learning_rate": 0.0005959253056059239, "loss": 2.028, "step": 4736 }, { "epoch": 0.16, "grad_norm": 0.529532253742218, "learning_rate": 0.0005959235885997668, "loss": 1.9979, "step": 4737 }, { "epoch": 0.16, "grad_norm": 0.5103834271430969, "learning_rate": 0.0005959218712344022, "loss": 2.0456, "step": 4738 }, { "epoch": 0.16, "grad_norm": 0.5518916249275208, "learning_rate": 0.000595920153509832, "loss": 2.0617, "step": 4739 }, { "epoch": 0.16, "grad_norm": 0.5604613423347473, "learning_rate": 0.0005959184354260584, "loss": 1.9767, "step": 4740 }, { "epoch": 0.16, "grad_norm": 0.5054436326026917, "learning_rate": 0.0005959167169830834, "loss": 1.9952, "step": 4741 }, { "epoch": 0.16, "grad_norm": 0.4984718859195709, "learning_rate": 0.000595914998180909, "loss": 2.0154, "step": 4742 }, { "epoch": 0.16, "grad_norm": 0.49112468957901, "learning_rate": 0.0005959132790195375, "loss": 1.984, "step": 4743 }, { "epoch": 0.16, "grad_norm": 0.523539125919342, "learning_rate": 0.0005959115594989709, "loss": 1.9444, "step": 4744 }, { "epoch": 0.16, "grad_norm": 0.5052270293235779, "learning_rate": 0.0005959098396192112, "loss": 2.0648, "step": 4745 }, { "epoch": 0.16, "grad_norm": 0.523871660232544, "learning_rate": 0.0005959081193802606, "loss": 2.0939, "step": 4746 }, { "epoch": 0.16, "grad_norm": 0.5159523487091064, "learning_rate": 0.0005959063987821211, "loss": 1.9912, "step": 4747 }, { "epoch": 0.16, "grad_norm": 0.532209038734436, "learning_rate": 0.0005959046778247948, "loss": 1.962, "step": 4748 }, { "epoch": 0.16, "grad_norm": 0.48174577951431274, "learning_rate": 0.000595902956508284, "loss": 2.0274, "step": 4749 }, { "epoch": 0.16, "grad_norm": 0.4943650960922241, "learning_rate": 0.0005959012348325905, "loss": 1.9288, "step": 4750 }, { "epoch": 0.16, "grad_norm": 0.5150046944618225, "learning_rate": 0.0005958995127977164, "loss": 2.0403, "step": 4751 }, { "epoch": 0.16, "grad_norm": 0.49526146054267883, "learning_rate": 0.0005958977904036639, "loss": 2.0013, "step": 4752 }, { "epoch": 0.16, "grad_norm": 0.5199190378189087, "learning_rate": 0.0005958960676504352, "loss": 2.0213, "step": 4753 }, { "epoch": 0.16, "grad_norm": 0.49575868248939514, "learning_rate": 0.0005958943445380322, "loss": 2.0151, "step": 4754 }, { "epoch": 0.16, "grad_norm": 0.5083469748497009, "learning_rate": 0.0005958926210664572, "loss": 1.9069, "step": 4755 }, { "epoch": 0.16, "grad_norm": 0.5177410840988159, "learning_rate": 0.000595890897235712, "loss": 2.0387, "step": 4756 }, { "epoch": 0.16, "grad_norm": 0.5008450746536255, "learning_rate": 0.000595889173045799, "loss": 1.9714, "step": 4757 }, { "epoch": 0.16, "grad_norm": 0.5026960968971252, "learning_rate": 0.0005958874484967199, "loss": 2.0485, "step": 4758 }, { "epoch": 0.16, "grad_norm": 0.5071702599525452, "learning_rate": 0.0005958857235884772, "loss": 1.9866, "step": 4759 }, { "epoch": 0.16, "grad_norm": 0.5144979357719421, "learning_rate": 0.0005958839983210728, "loss": 1.9767, "step": 4760 }, { "epoch": 0.16, "grad_norm": 0.49718594551086426, "learning_rate": 0.0005958822726945088, "loss": 1.9503, "step": 4761 }, { "epoch": 0.16, "grad_norm": 0.5033324360847473, "learning_rate": 0.0005958805467087874, "loss": 1.9766, "step": 4762 }, { "epoch": 0.16, "grad_norm": 0.48741456866264343, "learning_rate": 0.0005958788203639105, "loss": 2.0033, "step": 4763 }, { "epoch": 0.16, "grad_norm": 0.5025791525840759, "learning_rate": 0.0005958770936598804, "loss": 1.9242, "step": 4764 }, { "epoch": 0.16, "grad_norm": 0.5103114247322083, "learning_rate": 0.000595875366596699, "loss": 1.973, "step": 4765 }, { "epoch": 0.16, "grad_norm": 0.5094664692878723, "learning_rate": 0.0005958736391743686, "loss": 1.9758, "step": 4766 }, { "epoch": 0.16, "grad_norm": 0.49574339389801025, "learning_rate": 0.0005958719113928912, "loss": 1.9822, "step": 4767 }, { "epoch": 0.16, "grad_norm": 0.5206531882286072, "learning_rate": 0.0005958701832522688, "loss": 2.0583, "step": 4768 }, { "epoch": 0.16, "grad_norm": 0.5591310858726501, "learning_rate": 0.0005958684547525036, "loss": 2.0377, "step": 4769 }, { "epoch": 0.16, "grad_norm": 0.5451171398162842, "learning_rate": 0.0005958667258935979, "loss": 1.9432, "step": 4770 }, { "epoch": 0.16, "grad_norm": 0.5375843644142151, "learning_rate": 0.0005958649966755533, "loss": 2.0117, "step": 4771 }, { "epoch": 0.16, "grad_norm": 0.5199136734008789, "learning_rate": 0.0005958632670983724, "loss": 1.9978, "step": 4772 }, { "epoch": 0.16, "grad_norm": 0.5335261225700378, "learning_rate": 0.000595861537162057, "loss": 2.0165, "step": 4773 }, { "epoch": 0.16, "grad_norm": 0.5075854063034058, "learning_rate": 0.0005958598068666092, "loss": 2.0041, "step": 4774 }, { "epoch": 0.16, "grad_norm": 0.5435339212417603, "learning_rate": 0.0005958580762120313, "loss": 1.9865, "step": 4775 }, { "epoch": 0.16, "grad_norm": 0.6023865938186646, "learning_rate": 0.0005958563451983252, "loss": 2.0383, "step": 4776 }, { "epoch": 0.16, "grad_norm": 0.5047125220298767, "learning_rate": 0.0005958546138254932, "loss": 2.0103, "step": 4777 }, { "epoch": 0.16, "grad_norm": 0.5024769306182861, "learning_rate": 0.0005958528820935372, "loss": 1.9801, "step": 4778 }, { "epoch": 0.16, "grad_norm": 0.574450671672821, "learning_rate": 0.0005958511500024595, "loss": 2.0724, "step": 4779 }, { "epoch": 0.16, "grad_norm": 0.5237123966217041, "learning_rate": 0.000595849417552262, "loss": 2.0268, "step": 4780 }, { "epoch": 0.16, "grad_norm": 0.5184652209281921, "learning_rate": 0.000595847684742947, "loss": 1.9546, "step": 4781 }, { "epoch": 0.16, "grad_norm": 0.5246908068656921, "learning_rate": 0.0005958459515745164, "loss": 1.9516, "step": 4782 }, { "epoch": 0.16, "grad_norm": 0.5001529455184937, "learning_rate": 0.0005958442180469724, "loss": 1.9392, "step": 4783 }, { "epoch": 0.16, "grad_norm": 0.5084772109985352, "learning_rate": 0.0005958424841603172, "loss": 2.0125, "step": 4784 }, { "epoch": 0.16, "grad_norm": 0.5095394849777222, "learning_rate": 0.0005958407499145527, "loss": 2.035, "step": 4785 }, { "epoch": 0.16, "grad_norm": 0.5360515117645264, "learning_rate": 0.0005958390153096812, "loss": 1.9957, "step": 4786 }, { "epoch": 0.16, "grad_norm": 0.515989363193512, "learning_rate": 0.0005958372803457047, "loss": 1.9779, "step": 4787 }, { "epoch": 0.16, "grad_norm": 0.5136838555335999, "learning_rate": 0.0005958355450226253, "loss": 1.9211, "step": 4788 }, { "epoch": 0.16, "grad_norm": 0.5393736362457275, "learning_rate": 0.0005958338093404452, "loss": 1.9702, "step": 4789 }, { "epoch": 0.16, "grad_norm": 0.5067117810249329, "learning_rate": 0.0005958320732991665, "loss": 2.0284, "step": 4790 }, { "epoch": 0.16, "grad_norm": 0.5319119691848755, "learning_rate": 0.0005958303368987911, "loss": 1.9637, "step": 4791 }, { "epoch": 0.16, "grad_norm": 0.5294891595840454, "learning_rate": 0.0005958286001393213, "loss": 2.0319, "step": 4792 }, { "epoch": 0.16, "grad_norm": 0.5224229693412781, "learning_rate": 0.0005958268630207592, "loss": 1.9913, "step": 4793 }, { "epoch": 0.16, "grad_norm": 0.5339179039001465, "learning_rate": 0.0005958251255431069, "loss": 1.9842, "step": 4794 }, { "epoch": 0.16, "grad_norm": 0.5195781588554382, "learning_rate": 0.0005958233877063665, "loss": 2.0109, "step": 4795 }, { "epoch": 0.16, "grad_norm": 0.4913368821144104, "learning_rate": 0.00059582164951054, "loss": 1.9267, "step": 4796 }, { "epoch": 0.16, "grad_norm": 0.5178647041320801, "learning_rate": 0.0005958199109556297, "loss": 1.9606, "step": 4797 }, { "epoch": 0.16, "grad_norm": 0.5173839330673218, "learning_rate": 0.0005958181720416376, "loss": 2.023, "step": 4798 }, { "epoch": 0.16, "grad_norm": 0.5194208025932312, "learning_rate": 0.0005958164327685658, "loss": 2.0135, "step": 4799 }, { "epoch": 0.16, "grad_norm": 0.5005496740341187, "learning_rate": 0.0005958146931364164, "loss": 1.9405, "step": 4800 }, { "epoch": 0.16, "grad_norm": 0.5329975485801697, "learning_rate": 0.0005958129531451915, "loss": 2.0379, "step": 4801 }, { "epoch": 0.16, "grad_norm": 0.49654000997543335, "learning_rate": 0.0005958112127948934, "loss": 1.9501, "step": 4802 }, { "epoch": 0.16, "grad_norm": 0.5310746431350708, "learning_rate": 0.000595809472085524, "loss": 1.9221, "step": 4803 }, { "epoch": 0.16, "grad_norm": 0.5503776669502258, "learning_rate": 0.0005958077310170855, "loss": 1.9261, "step": 4804 }, { "epoch": 0.16, "grad_norm": 0.4909536838531494, "learning_rate": 0.00059580598958958, "loss": 1.9036, "step": 4805 }, { "epoch": 0.16, "grad_norm": 0.5398523807525635, "learning_rate": 0.0005958042478030096, "loss": 2.0541, "step": 4806 }, { "epoch": 0.16, "grad_norm": 0.5044135451316833, "learning_rate": 0.0005958025056573765, "loss": 1.9838, "step": 4807 }, { "epoch": 0.16, "grad_norm": 0.5229828357696533, "learning_rate": 0.0005958007631526827, "loss": 2.0293, "step": 4808 }, { "epoch": 0.16, "grad_norm": 0.5127906799316406, "learning_rate": 0.0005957990202889303, "loss": 1.8769, "step": 4809 }, { "epoch": 0.16, "grad_norm": 0.536081075668335, "learning_rate": 0.0005957972770661216, "loss": 2.0945, "step": 4810 }, { "epoch": 0.16, "grad_norm": 0.5285680890083313, "learning_rate": 0.0005957955334842585, "loss": 1.9667, "step": 4811 }, { "epoch": 0.16, "grad_norm": 0.5231091976165771, "learning_rate": 0.0005957937895433432, "loss": 1.9938, "step": 4812 }, { "epoch": 0.16, "grad_norm": 0.5267269611358643, "learning_rate": 0.0005957920452433778, "loss": 1.966, "step": 4813 }, { "epoch": 0.16, "grad_norm": 0.5623654723167419, "learning_rate": 0.0005957903005843646, "loss": 1.8865, "step": 4814 }, { "epoch": 0.16, "grad_norm": 0.5181549787521362, "learning_rate": 0.0005957885555663054, "loss": 1.9787, "step": 4815 }, { "epoch": 0.16, "grad_norm": 0.512752890586853, "learning_rate": 0.0005957868101892025, "loss": 1.9471, "step": 4816 }, { "epoch": 0.16, "grad_norm": 0.5383774638175964, "learning_rate": 0.000595785064453058, "loss": 2.0201, "step": 4817 }, { "epoch": 0.16, "grad_norm": 0.5099377036094666, "learning_rate": 0.0005957833183578739, "loss": 2.0756, "step": 4818 }, { "epoch": 0.16, "grad_norm": 0.5075957775115967, "learning_rate": 0.0005957815719036527, "loss": 2.0293, "step": 4819 }, { "epoch": 0.16, "grad_norm": 0.5141518115997314, "learning_rate": 0.000595779825090396, "loss": 1.9794, "step": 4820 }, { "epoch": 0.16, "grad_norm": 0.5183693170547485, "learning_rate": 0.0005957780779181064, "loss": 1.955, "step": 4821 }, { "epoch": 0.16, "grad_norm": 0.5175424814224243, "learning_rate": 0.0005957763303867856, "loss": 1.9982, "step": 4822 }, { "epoch": 0.16, "grad_norm": 0.4969673752784729, "learning_rate": 0.000595774582496436, "loss": 1.9585, "step": 4823 }, { "epoch": 0.16, "grad_norm": 0.5131390690803528, "learning_rate": 0.0005957728342470596, "loss": 2.0051, "step": 4824 }, { "epoch": 0.16, "grad_norm": 0.5510814189910889, "learning_rate": 0.0005957710856386586, "loss": 2.049, "step": 4825 }, { "epoch": 0.16, "grad_norm": 0.4879082143306732, "learning_rate": 0.0005957693366712351, "loss": 1.9001, "step": 4826 }, { "epoch": 0.16, "grad_norm": 0.47839614748954773, "learning_rate": 0.0005957675873447912, "loss": 1.969, "step": 4827 }, { "epoch": 0.16, "grad_norm": 0.5311852693557739, "learning_rate": 0.000595765837659329, "loss": 1.9662, "step": 4828 }, { "epoch": 0.16, "grad_norm": 0.5214430093765259, "learning_rate": 0.0005957640876148506, "loss": 1.9362, "step": 4829 }, { "epoch": 0.16, "grad_norm": 0.4936506152153015, "learning_rate": 0.0005957623372113582, "loss": 1.9942, "step": 4830 }, { "epoch": 0.16, "grad_norm": 0.5279707908630371, "learning_rate": 0.000595760586448854, "loss": 1.9423, "step": 4831 }, { "epoch": 0.16, "grad_norm": 0.5127938985824585, "learning_rate": 0.00059575883532734, "loss": 1.9803, "step": 4832 }, { "epoch": 0.16, "grad_norm": 0.5126243233680725, "learning_rate": 0.0005957570838468183, "loss": 2.0998, "step": 4833 }, { "epoch": 0.16, "grad_norm": 0.521881639957428, "learning_rate": 0.000595755332007291, "loss": 2.0166, "step": 4834 }, { "epoch": 0.16, "grad_norm": 0.4886327385902405, "learning_rate": 0.0005957535798087604, "loss": 1.9906, "step": 4835 }, { "epoch": 0.16, "grad_norm": 0.5059775710105896, "learning_rate": 0.0005957518272512285, "loss": 2.0138, "step": 4836 }, { "epoch": 0.16, "grad_norm": 0.49813172221183777, "learning_rate": 0.0005957500743346975, "loss": 2.0256, "step": 4837 }, { "epoch": 0.16, "grad_norm": 0.518478274345398, "learning_rate": 0.0005957483210591695, "loss": 1.9631, "step": 4838 }, { "epoch": 0.16, "grad_norm": 0.4800408184528351, "learning_rate": 0.0005957465674246466, "loss": 1.9948, "step": 4839 }, { "epoch": 0.16, "grad_norm": 0.5048866271972656, "learning_rate": 0.0005957448134311309, "loss": 1.8971, "step": 4840 }, { "epoch": 0.16, "grad_norm": 0.5084377527236938, "learning_rate": 0.0005957430590786245, "loss": 1.9774, "step": 4841 }, { "epoch": 0.16, "grad_norm": 0.49784788489341736, "learning_rate": 0.0005957413043671298, "loss": 2.0897, "step": 4842 }, { "epoch": 0.16, "grad_norm": 0.5006313920021057, "learning_rate": 0.0005957395492966486, "loss": 1.9953, "step": 4843 }, { "epoch": 0.16, "grad_norm": 0.49547266960144043, "learning_rate": 0.0005957377938671833, "loss": 1.9587, "step": 4844 }, { "epoch": 0.16, "grad_norm": 0.48265063762664795, "learning_rate": 0.0005957360380787357, "loss": 1.9838, "step": 4845 }, { "epoch": 0.16, "grad_norm": 0.4990682899951935, "learning_rate": 0.0005957342819313083, "loss": 1.9687, "step": 4846 }, { "epoch": 0.16, "grad_norm": 0.5058513879776001, "learning_rate": 0.0005957325254249029, "loss": 1.9897, "step": 4847 }, { "epoch": 0.16, "grad_norm": 0.5062679648399353, "learning_rate": 0.0005957307685595218, "loss": 2.0031, "step": 4848 }, { "epoch": 0.16, "grad_norm": 0.5053221583366394, "learning_rate": 0.0005957290113351673, "loss": 2.0372, "step": 4849 }, { "epoch": 0.16, "grad_norm": 0.5029600262641907, "learning_rate": 0.0005957272537518412, "loss": 2.0217, "step": 4850 }, { "epoch": 0.16, "grad_norm": 0.5166429877281189, "learning_rate": 0.0005957254958095458, "loss": 2.0347, "step": 4851 }, { "epoch": 0.16, "grad_norm": 0.5008566379547119, "learning_rate": 0.0005957237375082833, "loss": 2.001, "step": 4852 }, { "epoch": 0.16, "grad_norm": 0.49103328585624695, "learning_rate": 0.0005957219788480557, "loss": 2.0003, "step": 4853 }, { "epoch": 0.16, "grad_norm": 0.5105762481689453, "learning_rate": 0.0005957202198288652, "loss": 1.987, "step": 4854 }, { "epoch": 0.16, "grad_norm": 0.4994581937789917, "learning_rate": 0.0005957184604507139, "loss": 1.9665, "step": 4855 }, { "epoch": 0.16, "grad_norm": 0.4908614456653595, "learning_rate": 0.0005957167007136039, "loss": 2.017, "step": 4856 }, { "epoch": 0.16, "grad_norm": 0.4902609884738922, "learning_rate": 0.0005957149406175374, "loss": 1.9566, "step": 4857 }, { "epoch": 0.16, "grad_norm": 0.5050660967826843, "learning_rate": 0.0005957131801625166, "loss": 2.0808, "step": 4858 }, { "epoch": 0.16, "grad_norm": 0.5192106366157532, "learning_rate": 0.0005957114193485437, "loss": 2.0197, "step": 4859 }, { "epoch": 0.16, "grad_norm": 0.4998486340045929, "learning_rate": 0.0005957096581756206, "loss": 1.9323, "step": 4860 }, { "epoch": 0.16, "grad_norm": 0.49314677715301514, "learning_rate": 0.0005957078966437495, "loss": 2.0437, "step": 4861 }, { "epoch": 0.16, "grad_norm": 0.5419023036956787, "learning_rate": 0.0005957061347529327, "loss": 2.0324, "step": 4862 }, { "epoch": 0.16, "grad_norm": 0.5049213767051697, "learning_rate": 0.000595704372503172, "loss": 2.1209, "step": 4863 }, { "epoch": 0.16, "grad_norm": 0.5047248601913452, "learning_rate": 0.0005957026098944699, "loss": 2.0432, "step": 4864 }, { "epoch": 0.16, "grad_norm": 0.5321318507194519, "learning_rate": 0.0005957008469268284, "loss": 1.9482, "step": 4865 }, { "epoch": 0.16, "grad_norm": 0.5153258442878723, "learning_rate": 0.0005956990836002497, "loss": 1.938, "step": 4866 }, { "epoch": 0.16, "grad_norm": 0.5012543797492981, "learning_rate": 0.0005956973199147357, "loss": 2.0806, "step": 4867 }, { "epoch": 0.16, "grad_norm": 0.5078972578048706, "learning_rate": 0.000595695555870289, "loss": 1.9983, "step": 4868 }, { "epoch": 0.16, "grad_norm": 0.5100287199020386, "learning_rate": 0.0005956937914669112, "loss": 1.9578, "step": 4869 }, { "epoch": 0.16, "grad_norm": 0.49227672815322876, "learning_rate": 0.0005956920267046049, "loss": 1.9447, "step": 4870 }, { "epoch": 0.16, "grad_norm": 0.4861617088317871, "learning_rate": 0.0005956902615833719, "loss": 1.9104, "step": 4871 }, { "epoch": 0.16, "grad_norm": 0.4998449683189392, "learning_rate": 0.0005956884961032145, "loss": 2.0215, "step": 4872 }, { "epoch": 0.16, "grad_norm": 0.5387492179870605, "learning_rate": 0.000595686730264135, "loss": 2.0386, "step": 4873 }, { "epoch": 0.16, "grad_norm": 0.5136713981628418, "learning_rate": 0.0005956849640661353, "loss": 1.9739, "step": 4874 }, { "epoch": 0.16, "grad_norm": 0.49218836426734924, "learning_rate": 0.0005956831975092175, "loss": 2.0345, "step": 4875 }, { "epoch": 0.16, "grad_norm": 0.4996737539768219, "learning_rate": 0.0005956814305933839, "loss": 2.0401, "step": 4876 }, { "epoch": 0.16, "grad_norm": 0.5051903128623962, "learning_rate": 0.0005956796633186367, "loss": 1.9884, "step": 4877 }, { "epoch": 0.16, "grad_norm": 0.5211124420166016, "learning_rate": 0.000595677895684978, "loss": 2.0252, "step": 4878 }, { "epoch": 0.16, "grad_norm": 0.5046551823616028, "learning_rate": 0.0005956761276924097, "loss": 1.9682, "step": 4879 }, { "epoch": 0.16, "grad_norm": 0.5107429623603821, "learning_rate": 0.0005956743593409342, "loss": 1.9917, "step": 4880 }, { "epoch": 0.16, "grad_norm": 0.5018178224563599, "learning_rate": 0.0005956725906305538, "loss": 1.9944, "step": 4881 }, { "epoch": 0.16, "grad_norm": 0.5314888954162598, "learning_rate": 0.0005956708215612702, "loss": 2.0362, "step": 4882 }, { "epoch": 0.16, "grad_norm": 0.5166836380958557, "learning_rate": 0.0005956690521330859, "loss": 1.9357, "step": 4883 }, { "epoch": 0.16, "grad_norm": 0.5092986822128296, "learning_rate": 0.0005956672823460029, "loss": 1.9645, "step": 4884 }, { "epoch": 0.16, "grad_norm": 0.4928189814090729, "learning_rate": 0.0005956655122000234, "loss": 1.9751, "step": 4885 }, { "epoch": 0.16, "grad_norm": 0.48410525918006897, "learning_rate": 0.0005956637416951495, "loss": 1.9768, "step": 4886 }, { "epoch": 0.16, "grad_norm": 0.5089593529701233, "learning_rate": 0.0005956619708313834, "loss": 1.988, "step": 4887 }, { "epoch": 0.16, "grad_norm": 0.5073099136352539, "learning_rate": 0.0005956601996087274, "loss": 1.9583, "step": 4888 }, { "epoch": 0.16, "grad_norm": 0.48665449023246765, "learning_rate": 0.0005956584280271832, "loss": 1.9644, "step": 4889 }, { "epoch": 0.16, "grad_norm": 0.5060454607009888, "learning_rate": 0.0005956566560867534, "loss": 2.0447, "step": 4890 }, { "epoch": 0.16, "grad_norm": 0.5386306047439575, "learning_rate": 0.0005956548837874399, "loss": 1.9841, "step": 4891 }, { "epoch": 0.16, "grad_norm": 0.49927955865859985, "learning_rate": 0.0005956531111292449, "loss": 1.9333, "step": 4892 }, { "epoch": 0.16, "grad_norm": 0.5014883279800415, "learning_rate": 0.0005956513381121707, "loss": 1.9643, "step": 4893 }, { "epoch": 0.16, "grad_norm": 0.5344105362892151, "learning_rate": 0.0005956495647362193, "loss": 1.9363, "step": 4894 }, { "epoch": 0.16, "grad_norm": 0.497456431388855, "learning_rate": 0.000595647791001393, "loss": 1.966, "step": 4895 }, { "epoch": 0.16, "grad_norm": 0.5018097162246704, "learning_rate": 0.0005956460169076937, "loss": 2.0148, "step": 4896 }, { "epoch": 0.16, "grad_norm": 0.5047252178192139, "learning_rate": 0.0005956442424551237, "loss": 1.9294, "step": 4897 }, { "epoch": 0.16, "grad_norm": 0.5043100714683533, "learning_rate": 0.0005956424676436851, "loss": 1.8824, "step": 4898 }, { "epoch": 0.16, "grad_norm": 0.5201050043106079, "learning_rate": 0.0005956406924733802, "loss": 1.9408, "step": 4899 }, { "epoch": 0.16, "grad_norm": 0.498293936252594, "learning_rate": 0.0005956389169442111, "loss": 2.0204, "step": 4900 }, { "epoch": 0.16, "grad_norm": 0.50877445936203, "learning_rate": 0.0005956371410561799, "loss": 1.9407, "step": 4901 }, { "epoch": 0.16, "grad_norm": 0.4952044188976288, "learning_rate": 0.0005956353648092887, "loss": 1.921, "step": 4902 }, { "epoch": 0.16, "grad_norm": 0.5076532959938049, "learning_rate": 0.0005956335882035397, "loss": 1.945, "step": 4903 }, { "epoch": 0.16, "grad_norm": 0.5164033770561218, "learning_rate": 0.0005956318112389352, "loss": 2.0302, "step": 4904 }, { "epoch": 0.16, "grad_norm": 0.49134495854377747, "learning_rate": 0.0005956300339154772, "loss": 2.0707, "step": 4905 }, { "epoch": 0.16, "grad_norm": 0.531956136226654, "learning_rate": 0.0005956282562331679, "loss": 1.897, "step": 4906 }, { "epoch": 0.16, "grad_norm": 0.5202009677886963, "learning_rate": 0.0005956264781920094, "loss": 1.9983, "step": 4907 }, { "epoch": 0.16, "grad_norm": 0.5076751708984375, "learning_rate": 0.000595624699792004, "loss": 2.0173, "step": 4908 }, { "epoch": 0.16, "grad_norm": 0.49302369356155396, "learning_rate": 0.0005956229210331537, "loss": 2.0228, "step": 4909 }, { "epoch": 0.16, "grad_norm": 0.5051253437995911, "learning_rate": 0.0005956211419154608, "loss": 1.9901, "step": 4910 }, { "epoch": 0.16, "grad_norm": 0.5275821089744568, "learning_rate": 0.0005956193624389273, "loss": 2.1008, "step": 4911 }, { "epoch": 0.16, "grad_norm": 0.5030850768089294, "learning_rate": 0.0005956175826035555, "loss": 1.9739, "step": 4912 }, { "epoch": 0.16, "grad_norm": 0.502303421497345, "learning_rate": 0.0005956158024093475, "loss": 2.0186, "step": 4913 }, { "epoch": 0.16, "grad_norm": 0.4914870858192444, "learning_rate": 0.0005956140218563055, "loss": 1.9725, "step": 4914 }, { "epoch": 0.16, "grad_norm": 0.5047833919525146, "learning_rate": 0.0005956122409444315, "loss": 1.9599, "step": 4915 }, { "epoch": 0.16, "grad_norm": 0.5086768269538879, "learning_rate": 0.000595610459673728, "loss": 1.9538, "step": 4916 }, { "epoch": 0.16, "grad_norm": 0.49190253019332886, "learning_rate": 0.0005956086780441969, "loss": 2.0072, "step": 4917 }, { "epoch": 0.16, "grad_norm": 0.49342766404151917, "learning_rate": 0.0005956068960558404, "loss": 2.0391, "step": 4918 }, { "epoch": 0.16, "grad_norm": 0.4937877953052521, "learning_rate": 0.0005956051137086606, "loss": 1.9773, "step": 4919 }, { "epoch": 0.16, "grad_norm": 0.5166295766830444, "learning_rate": 0.0005956033310026598, "loss": 1.9965, "step": 4920 }, { "epoch": 0.16, "grad_norm": 0.49067971110343933, "learning_rate": 0.0005956015479378402, "loss": 1.9449, "step": 4921 }, { "epoch": 0.16, "grad_norm": 0.5623854398727417, "learning_rate": 0.0005955997645142037, "loss": 1.9978, "step": 4922 }, { "epoch": 0.16, "grad_norm": 0.5224607586860657, "learning_rate": 0.0005955979807317529, "loss": 2.085, "step": 4923 }, { "epoch": 0.16, "grad_norm": 0.5131247639656067, "learning_rate": 0.0005955961965904895, "loss": 2.0109, "step": 4924 }, { "epoch": 0.16, "grad_norm": 0.4865434765815735, "learning_rate": 0.0005955944120904159, "loss": 1.9967, "step": 4925 }, { "epoch": 0.16, "grad_norm": 0.5286979675292969, "learning_rate": 0.0005955926272315343, "loss": 1.9992, "step": 4926 }, { "epoch": 0.16, "grad_norm": 0.5249556303024292, "learning_rate": 0.0005955908420138468, "loss": 2.038, "step": 4927 }, { "epoch": 0.16, "grad_norm": 0.506486713886261, "learning_rate": 0.0005955890564373554, "loss": 2.0135, "step": 4928 }, { "epoch": 0.16, "grad_norm": 0.5403449535369873, "learning_rate": 0.0005955872705020627, "loss": 1.9673, "step": 4929 }, { "epoch": 0.16, "grad_norm": 0.49517011642456055, "learning_rate": 0.0005955854842079704, "loss": 1.9587, "step": 4930 }, { "epoch": 0.16, "grad_norm": 0.5185629725456238, "learning_rate": 0.000595583697555081, "loss": 2.0252, "step": 4931 }, { "epoch": 0.16, "grad_norm": 0.5198730826377869, "learning_rate": 0.0005955819105433964, "loss": 1.933, "step": 4932 }, { "epoch": 0.16, "grad_norm": 0.49271366000175476, "learning_rate": 0.0005955801231729191, "loss": 2.0522, "step": 4933 }, { "epoch": 0.16, "grad_norm": 0.5177769660949707, "learning_rate": 0.000595578335443651, "loss": 2.06, "step": 4934 }, { "epoch": 0.16, "grad_norm": 0.5047462582588196, "learning_rate": 0.0005955765473555943, "loss": 2.0213, "step": 4935 }, { "epoch": 0.16, "grad_norm": 0.5107104182243347, "learning_rate": 0.0005955747589087514, "loss": 1.9621, "step": 4936 }, { "epoch": 0.16, "grad_norm": 0.5125019550323486, "learning_rate": 0.0005955729701031241, "loss": 1.9914, "step": 4937 }, { "epoch": 0.16, "grad_norm": 0.562960147857666, "learning_rate": 0.0005955711809387148, "loss": 1.991, "step": 4938 }, { "epoch": 0.16, "grad_norm": 0.5015324950218201, "learning_rate": 0.0005955693914155256, "loss": 2.0436, "step": 4939 }, { "epoch": 0.16, "grad_norm": 0.5254577398300171, "learning_rate": 0.0005955676015335588, "loss": 1.9973, "step": 4940 }, { "epoch": 0.16, "grad_norm": 0.5248375535011292, "learning_rate": 0.0005955658112928165, "loss": 2.023, "step": 4941 }, { "epoch": 0.16, "grad_norm": 0.48355168104171753, "learning_rate": 0.0005955640206933008, "loss": 1.949, "step": 4942 }, { "epoch": 0.16, "grad_norm": 0.5037674903869629, "learning_rate": 0.000595562229735014, "loss": 1.9815, "step": 4943 }, { "epoch": 0.16, "grad_norm": 0.5551549196243286, "learning_rate": 0.0005955604384179582, "loss": 1.964, "step": 4944 }, { "epoch": 0.16, "grad_norm": 0.5100195407867432, "learning_rate": 0.0005955586467421355, "loss": 2.0168, "step": 4945 }, { "epoch": 0.16, "grad_norm": 0.520174503326416, "learning_rate": 0.0005955568547075481, "loss": 1.9728, "step": 4946 }, { "epoch": 0.16, "grad_norm": 0.532913327217102, "learning_rate": 0.0005955550623141983, "loss": 2.0029, "step": 4947 }, { "epoch": 0.16, "grad_norm": 0.5131630301475525, "learning_rate": 0.0005955532695620882, "loss": 1.9642, "step": 4948 }, { "epoch": 0.16, "grad_norm": 0.5318775773048401, "learning_rate": 0.0005955514764512199, "loss": 2.0544, "step": 4949 }, { "epoch": 0.16, "grad_norm": 0.5088479518890381, "learning_rate": 0.0005955496829815958, "loss": 1.9966, "step": 4950 }, { "epoch": 0.16, "grad_norm": 0.48815152049064636, "learning_rate": 0.0005955478891532178, "loss": 1.9396, "step": 4951 }, { "epoch": 0.16, "grad_norm": 0.5353509187698364, "learning_rate": 0.0005955460949660883, "loss": 2.008, "step": 4952 }, { "epoch": 0.16, "grad_norm": 0.5099921822547913, "learning_rate": 0.0005955443004202095, "loss": 1.9398, "step": 4953 }, { "epoch": 0.16, "grad_norm": 0.5213404893875122, "learning_rate": 0.0005955425055155832, "loss": 1.9706, "step": 4954 }, { "epoch": 0.16, "grad_norm": 0.5097898840904236, "learning_rate": 0.0005955407102522121, "loss": 1.9812, "step": 4955 }, { "epoch": 0.16, "grad_norm": 0.5012139678001404, "learning_rate": 0.0005955389146300978, "loss": 2.0389, "step": 4956 }, { "epoch": 0.16, "grad_norm": 0.5261397361755371, "learning_rate": 0.0005955371186492431, "loss": 2.0913, "step": 4957 }, { "epoch": 0.16, "grad_norm": 0.5014441609382629, "learning_rate": 0.0005955353223096498, "loss": 2.0107, "step": 4958 }, { "epoch": 0.16, "grad_norm": 0.4883250296115875, "learning_rate": 0.0005955335256113201, "loss": 1.9623, "step": 4959 }, { "epoch": 0.17, "grad_norm": 0.5260628461837769, "learning_rate": 0.0005955317285542563, "loss": 2.0221, "step": 4960 }, { "epoch": 0.17, "grad_norm": 0.5184400081634521, "learning_rate": 0.0005955299311384605, "loss": 2.0045, "step": 4961 }, { "epoch": 0.17, "grad_norm": 0.5010412931442261, "learning_rate": 0.0005955281333639349, "loss": 2.0134, "step": 4962 }, { "epoch": 0.17, "grad_norm": 0.49841639399528503, "learning_rate": 0.0005955263352306817, "loss": 2.0165, "step": 4963 }, { "epoch": 0.17, "grad_norm": 0.5096634030342102, "learning_rate": 0.0005955245367387031, "loss": 2.0186, "step": 4964 }, { "epoch": 0.17, "grad_norm": 0.5594196319580078, "learning_rate": 0.0005955227378880013, "loss": 1.9671, "step": 4965 }, { "epoch": 0.17, "grad_norm": 0.4841923713684082, "learning_rate": 0.0005955209386785783, "loss": 1.8986, "step": 4966 }, { "epoch": 0.17, "grad_norm": 0.4834287464618683, "learning_rate": 0.0005955191391104366, "loss": 2.0092, "step": 4967 }, { "epoch": 0.17, "grad_norm": 0.4849758744239807, "learning_rate": 0.0005955173391835781, "loss": 2.0292, "step": 4968 }, { "epoch": 0.17, "grad_norm": 0.5130020380020142, "learning_rate": 0.0005955155388980051, "loss": 2.0093, "step": 4969 }, { "epoch": 0.17, "grad_norm": 0.4955274164676666, "learning_rate": 0.0005955137382537198, "loss": 1.9979, "step": 4970 }, { "epoch": 0.17, "grad_norm": 0.4897521436214447, "learning_rate": 0.0005955119372507244, "loss": 1.9844, "step": 4971 }, { "epoch": 0.17, "grad_norm": 0.49560901522636414, "learning_rate": 0.0005955101358890209, "loss": 2.0263, "step": 4972 }, { "epoch": 0.17, "grad_norm": 0.49568262696266174, "learning_rate": 0.0005955083341686117, "loss": 1.9939, "step": 4973 }, { "epoch": 0.17, "grad_norm": 0.5121573805809021, "learning_rate": 0.000595506532089499, "loss": 1.982, "step": 4974 }, { "epoch": 0.17, "grad_norm": 0.4784005582332611, "learning_rate": 0.0005955047296516848, "loss": 2.0201, "step": 4975 }, { "epoch": 0.17, "grad_norm": 0.5044947862625122, "learning_rate": 0.0005955029268551715, "loss": 1.985, "step": 4976 }, { "epoch": 0.17, "grad_norm": 0.5264212489128113, "learning_rate": 0.0005955011236999612, "loss": 2.008, "step": 4977 }, { "epoch": 0.17, "grad_norm": 0.4979681968688965, "learning_rate": 0.000595499320186056, "loss": 2.0387, "step": 4978 }, { "epoch": 0.17, "grad_norm": 0.48234298825263977, "learning_rate": 0.0005954975163134582, "loss": 2.0197, "step": 4979 }, { "epoch": 0.17, "grad_norm": 0.48969635367393494, "learning_rate": 0.00059549571208217, "loss": 1.9965, "step": 4980 }, { "epoch": 0.17, "grad_norm": 0.5086045861244202, "learning_rate": 0.0005954939074921935, "loss": 2.001, "step": 4981 }, { "epoch": 0.17, "grad_norm": 0.5120514631271362, "learning_rate": 0.0005954921025435308, "loss": 1.9988, "step": 4982 }, { "epoch": 0.17, "grad_norm": 0.5012429356575012, "learning_rate": 0.0005954902972361844, "loss": 1.9626, "step": 4983 }, { "epoch": 0.17, "grad_norm": 0.4979434907436371, "learning_rate": 0.0005954884915701562, "loss": 1.9815, "step": 4984 }, { "epoch": 0.17, "grad_norm": 0.49543553590774536, "learning_rate": 0.0005954866855454487, "loss": 1.9593, "step": 4985 }, { "epoch": 0.17, "grad_norm": 0.49819257855415344, "learning_rate": 0.0005954848791620638, "loss": 1.9872, "step": 4986 }, { "epoch": 0.17, "grad_norm": 0.5021565556526184, "learning_rate": 0.0005954830724200038, "loss": 1.9916, "step": 4987 }, { "epoch": 0.17, "grad_norm": 0.5322622656822205, "learning_rate": 0.0005954812653192709, "loss": 1.9918, "step": 4988 }, { "epoch": 0.17, "grad_norm": 0.502804160118103, "learning_rate": 0.0005954794578598672, "loss": 2.1005, "step": 4989 }, { "epoch": 0.17, "grad_norm": 0.48907002806663513, "learning_rate": 0.000595477650041795, "loss": 1.9733, "step": 4990 }, { "epoch": 0.17, "grad_norm": 0.5028116106987, "learning_rate": 0.0005954758418650564, "loss": 2.0348, "step": 4991 }, { "epoch": 0.17, "grad_norm": 0.5146613717079163, "learning_rate": 0.0005954740333296538, "loss": 1.9997, "step": 4992 }, { "epoch": 0.17, "grad_norm": 0.5404258370399475, "learning_rate": 0.0005954722244355894, "loss": 2.0087, "step": 4993 }, { "epoch": 0.17, "grad_norm": 0.5004469752311707, "learning_rate": 0.0005954704151828649, "loss": 1.9815, "step": 4994 }, { "epoch": 0.17, "grad_norm": 0.5207080841064453, "learning_rate": 0.0005954686055714831, "loss": 1.9508, "step": 4995 }, { "epoch": 0.17, "grad_norm": 0.4986746907234192, "learning_rate": 0.0005954667956014458, "loss": 1.9735, "step": 4996 }, { "epoch": 0.17, "grad_norm": 0.5175930857658386, "learning_rate": 0.0005954649852727555, "loss": 2.0119, "step": 4997 }, { "epoch": 0.17, "grad_norm": 0.514506995677948, "learning_rate": 0.000595463174585414, "loss": 1.9379, "step": 4998 }, { "epoch": 0.17, "grad_norm": 0.5021397471427917, "learning_rate": 0.000595461363539424, "loss": 1.9428, "step": 4999 }, { "epoch": 0.17, "grad_norm": 0.5031430721282959, "learning_rate": 0.0005954595521347873, "loss": 1.9371, "step": 5000 }, { "epoch": 0.17, "grad_norm": 0.5395408868789673, "learning_rate": 0.0005954577403715064, "loss": 1.9651, "step": 5001 }, { "epoch": 0.17, "grad_norm": 0.5209595561027527, "learning_rate": 0.000595455928249583, "loss": 1.9777, "step": 5002 }, { "epoch": 0.17, "grad_norm": 0.502898633480072, "learning_rate": 0.00059545411576902, "loss": 2.0045, "step": 5003 }, { "epoch": 0.17, "grad_norm": 0.49949437379837036, "learning_rate": 0.0005954523029298191, "loss": 1.9485, "step": 5004 }, { "epoch": 0.17, "grad_norm": 0.5182323455810547, "learning_rate": 0.0005954504897319826, "loss": 1.9585, "step": 5005 }, { "epoch": 0.17, "grad_norm": 0.49440744519233704, "learning_rate": 0.0005954486761755128, "loss": 1.9939, "step": 5006 }, { "epoch": 0.17, "grad_norm": 0.5246690511703491, "learning_rate": 0.0005954468622604117, "loss": 2.0749, "step": 5007 }, { "epoch": 0.17, "grad_norm": 0.5085793137550354, "learning_rate": 0.0005954450479866818, "loss": 1.9346, "step": 5008 }, { "epoch": 0.17, "grad_norm": 0.48739370703697205, "learning_rate": 0.0005954432333543251, "loss": 1.9553, "step": 5009 }, { "epoch": 0.17, "grad_norm": 0.491189181804657, "learning_rate": 0.0005954414183633438, "loss": 1.9888, "step": 5010 }, { "epoch": 0.17, "grad_norm": 0.4978877007961273, "learning_rate": 0.0005954396030137402, "loss": 1.9848, "step": 5011 }, { "epoch": 0.17, "grad_norm": 0.4948284924030304, "learning_rate": 0.0005954377873055164, "loss": 1.9392, "step": 5012 }, { "epoch": 0.17, "grad_norm": 0.4931459128856659, "learning_rate": 0.0005954359712386747, "loss": 1.8738, "step": 5013 }, { "epoch": 0.17, "grad_norm": 0.493795245885849, "learning_rate": 0.0005954341548132173, "loss": 1.8957, "step": 5014 }, { "epoch": 0.17, "grad_norm": 0.49294090270996094, "learning_rate": 0.0005954323380291462, "loss": 2.0636, "step": 5015 }, { "epoch": 0.17, "grad_norm": 0.49435102939605713, "learning_rate": 0.0005954305208864639, "loss": 2.0179, "step": 5016 }, { "epoch": 0.17, "grad_norm": 0.4899604618549347, "learning_rate": 0.0005954287033851724, "loss": 1.9924, "step": 5017 }, { "epoch": 0.17, "grad_norm": 0.5199936032295227, "learning_rate": 0.0005954268855252741, "loss": 2.0061, "step": 5018 }, { "epoch": 0.17, "grad_norm": 0.5062592625617981, "learning_rate": 0.0005954250673067711, "loss": 2.0229, "step": 5019 }, { "epoch": 0.17, "grad_norm": 0.5001283884048462, "learning_rate": 0.0005954232487296656, "loss": 2.0001, "step": 5020 }, { "epoch": 0.17, "grad_norm": 0.5000923275947571, "learning_rate": 0.0005954214297939597, "loss": 1.9471, "step": 5021 }, { "epoch": 0.17, "grad_norm": 0.5073345899581909, "learning_rate": 0.0005954196104996557, "loss": 1.9678, "step": 5022 }, { "epoch": 0.17, "grad_norm": 0.5100858211517334, "learning_rate": 0.0005954177908467559, "loss": 1.9538, "step": 5023 }, { "epoch": 0.17, "grad_norm": 0.5048137903213501, "learning_rate": 0.0005954159708352625, "loss": 2.0894, "step": 5024 }, { "epoch": 0.17, "grad_norm": 0.5155457854270935, "learning_rate": 0.0005954141504651775, "loss": 1.9311, "step": 5025 }, { "epoch": 0.17, "grad_norm": 0.5050557851791382, "learning_rate": 0.0005954123297365033, "loss": 1.9958, "step": 5026 }, { "epoch": 0.17, "grad_norm": 0.49322712421417236, "learning_rate": 0.0005954105086492421, "loss": 2.0017, "step": 5027 }, { "epoch": 0.17, "grad_norm": 0.5133865475654602, "learning_rate": 0.0005954086872033959, "loss": 1.9632, "step": 5028 }, { "epoch": 0.17, "grad_norm": 0.5158634781837463, "learning_rate": 0.0005954068653989673, "loss": 2.0109, "step": 5029 }, { "epoch": 0.17, "grad_norm": 0.5008237957954407, "learning_rate": 0.0005954050432359581, "loss": 1.9786, "step": 5030 }, { "epoch": 0.17, "grad_norm": 0.5057915449142456, "learning_rate": 0.0005954032207143708, "loss": 1.9915, "step": 5031 }, { "epoch": 0.17, "grad_norm": 0.5184677243232727, "learning_rate": 0.0005954013978342075, "loss": 2.0233, "step": 5032 }, { "epoch": 0.17, "grad_norm": 0.4970099627971649, "learning_rate": 0.0005953995745954704, "loss": 1.9077, "step": 5033 }, { "epoch": 0.17, "grad_norm": 0.5414760112762451, "learning_rate": 0.0005953977509981618, "loss": 2.0447, "step": 5034 }, { "epoch": 0.17, "grad_norm": 0.5124962329864502, "learning_rate": 0.0005953959270422838, "loss": 1.9891, "step": 5035 }, { "epoch": 0.17, "grad_norm": 0.49131760001182556, "learning_rate": 0.0005953941027278388, "loss": 1.9984, "step": 5036 }, { "epoch": 0.17, "grad_norm": 0.5204740166664124, "learning_rate": 0.0005953922780548288, "loss": 1.9796, "step": 5037 }, { "epoch": 0.17, "grad_norm": 0.4876413345336914, "learning_rate": 0.0005953904530232561, "loss": 1.9399, "step": 5038 }, { "epoch": 0.17, "grad_norm": 0.5148354768753052, "learning_rate": 0.000595388627633123, "loss": 2.0567, "step": 5039 }, { "epoch": 0.17, "grad_norm": 0.4657895863056183, "learning_rate": 0.0005953868018844314, "loss": 1.8975, "step": 5040 }, { "epoch": 0.17, "grad_norm": 0.47569626569747925, "learning_rate": 0.0005953849757771839, "loss": 2.0243, "step": 5041 }, { "epoch": 0.17, "grad_norm": 0.5051390528678894, "learning_rate": 0.0005953831493113825, "loss": 1.9506, "step": 5042 }, { "epoch": 0.17, "grad_norm": 0.5208096504211426, "learning_rate": 0.0005953813224870296, "loss": 2.0017, "step": 5043 }, { "epoch": 0.17, "grad_norm": 0.5057733058929443, "learning_rate": 0.0005953794953041272, "loss": 1.9628, "step": 5044 }, { "epoch": 0.17, "grad_norm": 0.4984799921512604, "learning_rate": 0.0005953776677626776, "loss": 1.9597, "step": 5045 }, { "epoch": 0.17, "grad_norm": 0.4885258674621582, "learning_rate": 0.0005953758398626831, "loss": 1.9626, "step": 5046 }, { "epoch": 0.17, "grad_norm": 0.4932624399662018, "learning_rate": 0.0005953740116041458, "loss": 1.9187, "step": 5047 }, { "epoch": 0.17, "grad_norm": 0.5036399364471436, "learning_rate": 0.0005953721829870679, "loss": 1.9707, "step": 5048 }, { "epoch": 0.17, "grad_norm": 0.48519784212112427, "learning_rate": 0.0005953703540114518, "loss": 1.9612, "step": 5049 }, { "epoch": 0.17, "grad_norm": 0.518271267414093, "learning_rate": 0.0005953685246772996, "loss": 1.9882, "step": 5050 }, { "epoch": 0.17, "grad_norm": 0.4979521334171295, "learning_rate": 0.0005953666949846135, "loss": 1.9903, "step": 5051 }, { "epoch": 0.17, "grad_norm": 0.5101299285888672, "learning_rate": 0.0005953648649333958, "loss": 2.0064, "step": 5052 }, { "epoch": 0.17, "grad_norm": 0.4810065031051636, "learning_rate": 0.0005953630345236487, "loss": 2.0067, "step": 5053 }, { "epoch": 0.17, "grad_norm": 0.5031569004058838, "learning_rate": 0.0005953612037553742, "loss": 2.071, "step": 5054 }, { "epoch": 0.17, "grad_norm": 0.5015891790390015, "learning_rate": 0.000595359372628575, "loss": 2.0075, "step": 5055 }, { "epoch": 0.17, "grad_norm": 0.5242478847503662, "learning_rate": 0.0005953575411432528, "loss": 1.98, "step": 5056 }, { "epoch": 0.17, "grad_norm": 0.503993034362793, "learning_rate": 0.0005953557092994102, "loss": 2.0348, "step": 5057 }, { "epoch": 0.17, "grad_norm": 0.5043247938156128, "learning_rate": 0.0005953538770970492, "loss": 1.9528, "step": 5058 }, { "epoch": 0.17, "grad_norm": 0.5775919556617737, "learning_rate": 0.0005953520445361722, "loss": 2.0677, "step": 5059 }, { "epoch": 0.17, "grad_norm": 0.5173640251159668, "learning_rate": 0.0005953502116167813, "loss": 1.8607, "step": 5060 }, { "epoch": 0.17, "grad_norm": 0.48854896426200867, "learning_rate": 0.0005953483783388788, "loss": 1.9687, "step": 5061 }, { "epoch": 0.17, "grad_norm": 0.4953368604183197, "learning_rate": 0.0005953465447024668, "loss": 1.9877, "step": 5062 }, { "epoch": 0.17, "grad_norm": 0.4880703389644623, "learning_rate": 0.0005953447107075476, "loss": 1.976, "step": 5063 }, { "epoch": 0.17, "grad_norm": 0.4893798232078552, "learning_rate": 0.0005953428763541236, "loss": 2.0183, "step": 5064 }, { "epoch": 0.17, "grad_norm": 0.495890349149704, "learning_rate": 0.0005953410416421967, "loss": 1.9404, "step": 5065 }, { "epoch": 0.17, "grad_norm": 0.514496922492981, "learning_rate": 0.0005953392065717694, "loss": 2.0584, "step": 5066 }, { "epoch": 0.17, "grad_norm": 0.5432628393173218, "learning_rate": 0.0005953373711428438, "loss": 1.9875, "step": 5067 }, { "epoch": 0.17, "grad_norm": 0.4941652715206146, "learning_rate": 0.0005953355353554221, "loss": 1.9641, "step": 5068 }, { "epoch": 0.17, "grad_norm": 0.5098749399185181, "learning_rate": 0.0005953336992095066, "loss": 1.9195, "step": 5069 }, { "epoch": 0.17, "grad_norm": 0.5186824798583984, "learning_rate": 0.0005953318627050995, "loss": 1.9014, "step": 5070 }, { "epoch": 0.17, "grad_norm": 0.521876335144043, "learning_rate": 0.0005953300258422031, "loss": 1.9859, "step": 5071 }, { "epoch": 0.17, "grad_norm": 0.5228608846664429, "learning_rate": 0.0005953281886208194, "loss": 1.9494, "step": 5072 }, { "epoch": 0.17, "grad_norm": 0.4862111508846283, "learning_rate": 0.0005953263510409509, "loss": 1.9214, "step": 5073 }, { "epoch": 0.17, "grad_norm": 0.4965882897377014, "learning_rate": 0.0005953245131025997, "loss": 1.9448, "step": 5074 }, { "epoch": 0.17, "grad_norm": 0.5210286378860474, "learning_rate": 0.0005953226748057681, "loss": 2.0246, "step": 5075 }, { "epoch": 0.17, "grad_norm": 0.4907378554344177, "learning_rate": 0.0005953208361504583, "loss": 1.9345, "step": 5076 }, { "epoch": 0.17, "grad_norm": 0.4919658303260803, "learning_rate": 0.0005953189971366724, "loss": 2.0133, "step": 5077 }, { "epoch": 0.17, "grad_norm": 0.5106158256530762, "learning_rate": 0.0005953171577644129, "loss": 1.9866, "step": 5078 }, { "epoch": 0.17, "grad_norm": 0.5021556615829468, "learning_rate": 0.0005953153180336819, "loss": 2.0842, "step": 5079 }, { "epoch": 0.17, "grad_norm": 0.49048519134521484, "learning_rate": 0.0005953134779444814, "loss": 1.9858, "step": 5080 }, { "epoch": 0.17, "grad_norm": 0.5134440064430237, "learning_rate": 0.000595311637496814, "loss": 2.0172, "step": 5081 }, { "epoch": 0.17, "grad_norm": 0.49715080857276917, "learning_rate": 0.0005953097966906818, "loss": 1.9821, "step": 5082 }, { "epoch": 0.17, "grad_norm": 0.5034858584403992, "learning_rate": 0.0005953079555260869, "loss": 1.9771, "step": 5083 }, { "epoch": 0.17, "grad_norm": 0.5031391978263855, "learning_rate": 0.0005953061140030318, "loss": 1.979, "step": 5084 }, { "epoch": 0.17, "grad_norm": 0.515777051448822, "learning_rate": 0.0005953042721215185, "loss": 2.0346, "step": 5085 }, { "epoch": 0.17, "grad_norm": 0.5167722105979919, "learning_rate": 0.0005953024298815493, "loss": 2.005, "step": 5086 }, { "epoch": 0.17, "grad_norm": 0.49629470705986023, "learning_rate": 0.0005953005872831265, "loss": 1.9354, "step": 5087 }, { "epoch": 0.17, "grad_norm": 0.494690865278244, "learning_rate": 0.0005952987443262523, "loss": 1.977, "step": 5088 }, { "epoch": 0.17, "grad_norm": 0.4883388578891754, "learning_rate": 0.000595296901010929, "loss": 1.9657, "step": 5089 }, { "epoch": 0.17, "grad_norm": 0.4930472671985626, "learning_rate": 0.0005952950573371587, "loss": 1.9295, "step": 5090 }, { "epoch": 0.17, "grad_norm": 0.478636234998703, "learning_rate": 0.0005952932133049437, "loss": 1.984, "step": 5091 }, { "epoch": 0.17, "grad_norm": 0.4965232014656067, "learning_rate": 0.0005952913689142862, "loss": 1.8913, "step": 5092 }, { "epoch": 0.17, "grad_norm": 0.4969783127307892, "learning_rate": 0.0005952895241651886, "loss": 1.9202, "step": 5093 }, { "epoch": 0.17, "grad_norm": 0.49491310119628906, "learning_rate": 0.000595287679057653, "loss": 1.9978, "step": 5094 }, { "epoch": 0.17, "grad_norm": 0.4839744567871094, "learning_rate": 0.0005952858335916817, "loss": 1.953, "step": 5095 }, { "epoch": 0.17, "grad_norm": 0.506445050239563, "learning_rate": 0.0005952839877672769, "loss": 2.0589, "step": 5096 }, { "epoch": 0.17, "grad_norm": 0.4791354238986969, "learning_rate": 0.0005952821415844407, "loss": 1.9718, "step": 5097 }, { "epoch": 0.17, "grad_norm": 0.48188239336013794, "learning_rate": 0.0005952802950431757, "loss": 2.0192, "step": 5098 }, { "epoch": 0.17, "grad_norm": 0.49145179986953735, "learning_rate": 0.0005952784481434837, "loss": 1.936, "step": 5099 }, { "epoch": 0.17, "grad_norm": 0.5352071523666382, "learning_rate": 0.0005952766008853673, "loss": 1.951, "step": 5100 }, { "epoch": 0.17, "grad_norm": 0.534572422504425, "learning_rate": 0.0005952747532688286, "loss": 1.9689, "step": 5101 }, { "epoch": 0.17, "grad_norm": 0.5206575393676758, "learning_rate": 0.0005952729052938699, "loss": 1.9268, "step": 5102 }, { "epoch": 0.17, "grad_norm": 0.5268691182136536, "learning_rate": 0.0005952710569604934, "loss": 1.9659, "step": 5103 }, { "epoch": 0.17, "grad_norm": 0.47006773948669434, "learning_rate": 0.0005952692082687012, "loss": 1.7588, "step": 5104 }, { "epoch": 0.17, "grad_norm": 0.5164864659309387, "learning_rate": 0.0005952673592184959, "loss": 2.0502, "step": 5105 }, { "epoch": 0.17, "grad_norm": 0.5086122155189514, "learning_rate": 0.0005952655098098793, "loss": 2.0002, "step": 5106 }, { "epoch": 0.17, "grad_norm": 0.5083465576171875, "learning_rate": 0.0005952636600428541, "loss": 1.9563, "step": 5107 }, { "epoch": 0.17, "grad_norm": 0.5037480592727661, "learning_rate": 0.0005952618099174222, "loss": 2.0183, "step": 5108 }, { "epoch": 0.17, "grad_norm": 0.5275635719299316, "learning_rate": 0.000595259959433586, "loss": 2.0221, "step": 5109 }, { "epoch": 0.17, "grad_norm": 0.5139667987823486, "learning_rate": 0.0005952581085913477, "loss": 1.9479, "step": 5110 }, { "epoch": 0.17, "grad_norm": 0.5037155747413635, "learning_rate": 0.0005952562573907096, "loss": 1.9574, "step": 5111 }, { "epoch": 0.17, "grad_norm": 0.5066400170326233, "learning_rate": 0.0005952544058316739, "loss": 1.9524, "step": 5112 }, { "epoch": 0.17, "grad_norm": 0.5121831893920898, "learning_rate": 0.0005952525539142427, "loss": 1.9596, "step": 5113 }, { "epoch": 0.17, "grad_norm": 0.50080806016922, "learning_rate": 0.0005952507016384186, "loss": 2.0007, "step": 5114 }, { "epoch": 0.17, "grad_norm": 0.5089283585548401, "learning_rate": 0.0005952488490042036, "loss": 1.9834, "step": 5115 }, { "epoch": 0.17, "grad_norm": 0.4881787598133087, "learning_rate": 0.0005952469960115999, "loss": 1.9747, "step": 5116 }, { "epoch": 0.17, "grad_norm": 0.5398640036582947, "learning_rate": 0.00059524514266061, "loss": 2.0451, "step": 5117 }, { "epoch": 0.17, "grad_norm": 0.5817363858222961, "learning_rate": 0.000595243288951236, "loss": 2.0316, "step": 5118 }, { "epoch": 0.17, "grad_norm": 0.4859922528266907, "learning_rate": 0.00059524143488348, "loss": 1.9117, "step": 5119 }, { "epoch": 0.17, "grad_norm": 0.5282029509544373, "learning_rate": 0.0005952395804573444, "loss": 1.9805, "step": 5120 }, { "epoch": 0.17, "grad_norm": 0.5029043555259705, "learning_rate": 0.0005952377256728316, "loss": 1.9887, "step": 5121 }, { "epoch": 0.17, "grad_norm": 0.879298210144043, "learning_rate": 0.0005952358705299437, "loss": 2.0506, "step": 5122 }, { "epoch": 0.17, "grad_norm": 0.5061441659927368, "learning_rate": 0.0005952340150286828, "loss": 2.0167, "step": 5123 }, { "epoch": 0.17, "grad_norm": 0.568149745464325, "learning_rate": 0.0005952321591690514, "loss": 2.088, "step": 5124 }, { "epoch": 0.17, "grad_norm": 0.4894392192363739, "learning_rate": 0.0005952303029510516, "loss": 1.9386, "step": 5125 }, { "epoch": 0.17, "grad_norm": 0.48581427335739136, "learning_rate": 0.0005952284463746857, "loss": 1.9824, "step": 5126 }, { "epoch": 0.17, "grad_norm": 0.49917691946029663, "learning_rate": 0.0005952265894399562, "loss": 1.9763, "step": 5127 }, { "epoch": 0.17, "grad_norm": 0.49775341153144836, "learning_rate": 0.0005952247321468648, "loss": 1.981, "step": 5128 }, { "epoch": 0.17, "grad_norm": 0.47845837473869324, "learning_rate": 0.0005952228744954143, "loss": 1.9329, "step": 5129 }, { "epoch": 0.17, "grad_norm": 0.5010930895805359, "learning_rate": 0.0005952210164856067, "loss": 1.9769, "step": 5130 }, { "epoch": 0.17, "grad_norm": 0.488147497177124, "learning_rate": 0.0005952191581174442, "loss": 1.8988, "step": 5131 }, { "epoch": 0.17, "grad_norm": 0.4745587110519409, "learning_rate": 0.0005952172993909291, "loss": 1.8668, "step": 5132 }, { "epoch": 0.17, "grad_norm": 0.4865618050098419, "learning_rate": 0.0005952154403060638, "loss": 1.9788, "step": 5133 }, { "epoch": 0.17, "grad_norm": 0.506036639213562, "learning_rate": 0.0005952135808628505, "loss": 2.0671, "step": 5134 }, { "epoch": 0.17, "grad_norm": 0.5125438570976257, "learning_rate": 0.0005952117210612913, "loss": 1.9933, "step": 5135 }, { "epoch": 0.17, "grad_norm": 0.5043678283691406, "learning_rate": 0.0005952098609013886, "loss": 1.9715, "step": 5136 }, { "epoch": 0.17, "grad_norm": 0.507493257522583, "learning_rate": 0.0005952080003831446, "loss": 2.0199, "step": 5137 }, { "epoch": 0.17, "grad_norm": 0.49379590153694153, "learning_rate": 0.0005952061395065616, "loss": 1.9112, "step": 5138 }, { "epoch": 0.17, "grad_norm": 0.475649356842041, "learning_rate": 0.000595204278271642, "loss": 1.8812, "step": 5139 }, { "epoch": 0.17, "grad_norm": 0.5016258358955383, "learning_rate": 0.0005952024166783877, "loss": 1.9742, "step": 5140 }, { "epoch": 0.17, "grad_norm": 0.5174158215522766, "learning_rate": 0.0005952005547268012, "loss": 2.0506, "step": 5141 }, { "epoch": 0.17, "grad_norm": 0.49822932481765747, "learning_rate": 0.0005951986924168848, "loss": 2.006, "step": 5142 }, { "epoch": 0.17, "grad_norm": 0.5092324018478394, "learning_rate": 0.0005951968297486405, "loss": 2.0067, "step": 5143 }, { "epoch": 0.17, "grad_norm": 0.5120013952255249, "learning_rate": 0.0005951949667220709, "loss": 2.0417, "step": 5144 }, { "epoch": 0.17, "grad_norm": 0.5031549334526062, "learning_rate": 0.0005951931033371782, "loss": 2.001, "step": 5145 }, { "epoch": 0.17, "grad_norm": 0.49309495091438293, "learning_rate": 0.0005951912395939644, "loss": 1.9345, "step": 5146 }, { "epoch": 0.17, "grad_norm": 0.5136493444442749, "learning_rate": 0.0005951893754924319, "loss": 1.9698, "step": 5147 }, { "epoch": 0.17, "grad_norm": 0.49574658274650574, "learning_rate": 0.0005951875110325831, "loss": 1.948, "step": 5148 }, { "epoch": 0.17, "grad_norm": 0.49646705389022827, "learning_rate": 0.0005951856462144201, "loss": 1.9658, "step": 5149 }, { "epoch": 0.17, "grad_norm": 0.4894091784954071, "learning_rate": 0.0005951837810379453, "loss": 2.0547, "step": 5150 }, { "epoch": 0.17, "grad_norm": 0.49770626425743103, "learning_rate": 0.0005951819155031609, "loss": 1.9454, "step": 5151 }, { "epoch": 0.17, "grad_norm": 0.5053012371063232, "learning_rate": 0.000595180049610069, "loss": 1.9643, "step": 5152 }, { "epoch": 0.17, "grad_norm": 0.47551172971725464, "learning_rate": 0.000595178183358672, "loss": 1.9446, "step": 5153 }, { "epoch": 0.17, "grad_norm": 0.5006399154663086, "learning_rate": 0.0005951763167489723, "loss": 1.963, "step": 5154 }, { "epoch": 0.17, "grad_norm": 0.48503419756889343, "learning_rate": 0.0005951744497809721, "loss": 1.8786, "step": 5155 }, { "epoch": 0.17, "grad_norm": 0.5094938278198242, "learning_rate": 0.0005951725824546734, "loss": 1.9318, "step": 5156 }, { "epoch": 0.17, "grad_norm": 0.49039512872695923, "learning_rate": 0.0005951707147700789, "loss": 1.889, "step": 5157 }, { "epoch": 0.17, "grad_norm": 0.5142824649810791, "learning_rate": 0.0005951688467271905, "loss": 2.0256, "step": 5158 }, { "epoch": 0.17, "grad_norm": 0.49500247836112976, "learning_rate": 0.0005951669783260106, "loss": 1.9403, "step": 5159 }, { "epoch": 0.17, "grad_norm": 0.4973227381706238, "learning_rate": 0.0005951651095665415, "loss": 1.9522, "step": 5160 }, { "epoch": 0.17, "grad_norm": 0.5100184082984924, "learning_rate": 0.0005951632404487856, "loss": 1.9433, "step": 5161 }, { "epoch": 0.17, "grad_norm": 0.505626380443573, "learning_rate": 0.0005951613709727449, "loss": 1.8955, "step": 5162 }, { "epoch": 0.17, "grad_norm": 0.519931972026825, "learning_rate": 0.0005951595011384217, "loss": 1.9593, "step": 5163 }, { "epoch": 0.17, "grad_norm": 0.48629331588745117, "learning_rate": 0.0005951576309458184, "loss": 1.9924, "step": 5164 }, { "epoch": 0.17, "grad_norm": 0.5090648531913757, "learning_rate": 0.0005951557603949373, "loss": 1.9585, "step": 5165 }, { "epoch": 0.17, "grad_norm": 0.4942672550678253, "learning_rate": 0.0005951538894857806, "loss": 1.9422, "step": 5166 }, { "epoch": 0.17, "grad_norm": 0.5240160226821899, "learning_rate": 0.0005951520182183505, "loss": 1.9693, "step": 5167 }, { "epoch": 0.17, "grad_norm": 0.5146387219429016, "learning_rate": 0.0005951501465926494, "loss": 1.9119, "step": 5168 }, { "epoch": 0.17, "grad_norm": 0.4952488839626312, "learning_rate": 0.0005951482746086794, "loss": 1.9039, "step": 5169 }, { "epoch": 0.17, "grad_norm": 0.4929008483886719, "learning_rate": 0.000595146402266443, "loss": 1.9555, "step": 5170 }, { "epoch": 0.17, "grad_norm": 0.49615707993507385, "learning_rate": 0.0005951445295659422, "loss": 1.9845, "step": 5171 }, { "epoch": 0.17, "grad_norm": 0.511385977268219, "learning_rate": 0.0005951426565071797, "loss": 1.9428, "step": 5172 }, { "epoch": 0.17, "grad_norm": 0.5285813212394714, "learning_rate": 0.0005951407830901573, "loss": 2.0336, "step": 5173 }, { "epoch": 0.17, "grad_norm": 0.5015940070152283, "learning_rate": 0.0005951389093148775, "loss": 2.0391, "step": 5174 }, { "epoch": 0.17, "grad_norm": 0.49340400099754333, "learning_rate": 0.0005951370351813426, "loss": 1.9254, "step": 5175 }, { "epoch": 0.17, "grad_norm": 0.49818912148475647, "learning_rate": 0.0005951351606895548, "loss": 1.9862, "step": 5176 }, { "epoch": 0.17, "grad_norm": 0.5180684328079224, "learning_rate": 0.0005951332858395163, "loss": 1.9824, "step": 5177 }, { "epoch": 0.17, "grad_norm": 0.4890119135379791, "learning_rate": 0.0005951314106312296, "loss": 1.8934, "step": 5178 }, { "epoch": 0.17, "grad_norm": 0.5151060223579407, "learning_rate": 0.0005951295350646968, "loss": 1.9984, "step": 5179 }, { "epoch": 0.17, "grad_norm": 0.5110229253768921, "learning_rate": 0.0005951276591399203, "loss": 1.9362, "step": 5180 }, { "epoch": 0.17, "grad_norm": 0.526283323764801, "learning_rate": 0.0005951257828569022, "loss": 1.9627, "step": 5181 }, { "epoch": 0.17, "grad_norm": 0.5397654175758362, "learning_rate": 0.000595123906215645, "loss": 1.9848, "step": 5182 }, { "epoch": 0.17, "grad_norm": 0.4872967302799225, "learning_rate": 0.0005951220292161508, "loss": 1.9657, "step": 5183 }, { "epoch": 0.17, "grad_norm": 0.4976934790611267, "learning_rate": 0.000595120151858422, "loss": 1.9861, "step": 5184 }, { "epoch": 0.17, "grad_norm": 0.5211496949195862, "learning_rate": 0.0005951182741424606, "loss": 1.9271, "step": 5185 }, { "epoch": 0.17, "grad_norm": 0.520073413848877, "learning_rate": 0.0005951163960682694, "loss": 2.0094, "step": 5186 }, { "epoch": 0.17, "grad_norm": 0.49380552768707275, "learning_rate": 0.0005951145176358502, "loss": 1.9669, "step": 5187 }, { "epoch": 0.17, "grad_norm": 0.5222790837287903, "learning_rate": 0.0005951126388452054, "loss": 1.9325, "step": 5188 }, { "epoch": 0.17, "grad_norm": 0.5088186264038086, "learning_rate": 0.0005951107596963376, "loss": 1.9124, "step": 5189 }, { "epoch": 0.17, "grad_norm": 0.5796986222267151, "learning_rate": 0.0005951088801892485, "loss": 1.9988, "step": 5190 }, { "epoch": 0.17, "grad_norm": 0.5088716745376587, "learning_rate": 0.000595107000323941, "loss": 1.9418, "step": 5191 }, { "epoch": 0.17, "grad_norm": 0.5762566924095154, "learning_rate": 0.000595105120100417, "loss": 1.9908, "step": 5192 }, { "epoch": 0.17, "grad_norm": 0.4876179099082947, "learning_rate": 0.0005951032395186787, "loss": 1.8841, "step": 5193 }, { "epoch": 0.17, "grad_norm": 0.485980361700058, "learning_rate": 0.0005951013585787287, "loss": 1.9424, "step": 5194 }, { "epoch": 0.17, "grad_norm": 0.5082634091377258, "learning_rate": 0.000595099477280569, "loss": 1.9672, "step": 5195 }, { "epoch": 0.17, "grad_norm": 0.5181751847267151, "learning_rate": 0.0005950975956242021, "loss": 1.9759, "step": 5196 }, { "epoch": 0.17, "grad_norm": 0.4846656918525696, "learning_rate": 0.0005950957136096303, "loss": 1.9871, "step": 5197 }, { "epoch": 0.17, "grad_norm": 0.48727986216545105, "learning_rate": 0.0005950938312368557, "loss": 1.9834, "step": 5198 }, { "epoch": 0.17, "grad_norm": 0.5167755484580994, "learning_rate": 0.0005950919485058807, "loss": 2.0129, "step": 5199 }, { "epoch": 0.17, "grad_norm": 0.5153496265411377, "learning_rate": 0.0005950900654167074, "loss": 1.941, "step": 5200 }, { "epoch": 0.17, "grad_norm": 0.5091789364814758, "learning_rate": 0.0005950881819693384, "loss": 1.9203, "step": 5201 }, { "epoch": 0.17, "grad_norm": 0.4940543472766876, "learning_rate": 0.0005950862981637758, "loss": 1.9864, "step": 5202 }, { "epoch": 0.17, "grad_norm": 0.5122349262237549, "learning_rate": 0.0005950844140000218, "loss": 1.9841, "step": 5203 }, { "epoch": 0.17, "grad_norm": 0.4910712242126465, "learning_rate": 0.0005950825294780789, "loss": 2.009, "step": 5204 }, { "epoch": 0.17, "grad_norm": 0.49203070998191833, "learning_rate": 0.0005950806445979493, "loss": 1.9439, "step": 5205 }, { "epoch": 0.17, "grad_norm": 0.5066477656364441, "learning_rate": 0.0005950787593596354, "loss": 1.9627, "step": 5206 }, { "epoch": 0.17, "grad_norm": 0.5129390358924866, "learning_rate": 0.0005950768737631391, "loss": 2.0321, "step": 5207 }, { "epoch": 0.17, "grad_norm": 0.5047814846038818, "learning_rate": 0.0005950749878084631, "loss": 1.9041, "step": 5208 }, { "epoch": 0.17, "grad_norm": 0.5109702348709106, "learning_rate": 0.0005950731014956096, "loss": 1.9673, "step": 5209 }, { "epoch": 0.17, "grad_norm": 0.5729833245277405, "learning_rate": 0.0005950712148245807, "loss": 2.0033, "step": 5210 }, { "epoch": 0.17, "grad_norm": 0.5049989223480225, "learning_rate": 0.0005950693277953789, "loss": 1.9261, "step": 5211 }, { "epoch": 0.17, "grad_norm": 0.5128780603408813, "learning_rate": 0.0005950674404080063, "loss": 2.0125, "step": 5212 }, { "epoch": 0.17, "grad_norm": 0.5569694638252258, "learning_rate": 0.0005950655526624654, "loss": 2.0035, "step": 5213 }, { "epoch": 0.17, "grad_norm": 0.49669069051742554, "learning_rate": 0.0005950636645587585, "loss": 1.9364, "step": 5214 }, { "epoch": 0.17, "grad_norm": 0.5025939345359802, "learning_rate": 0.0005950617760968875, "loss": 1.9298, "step": 5215 }, { "epoch": 0.17, "grad_norm": 0.5074467062950134, "learning_rate": 0.0005950598872768552, "loss": 2.0309, "step": 5216 }, { "epoch": 0.17, "grad_norm": 0.4697745740413666, "learning_rate": 0.0005950579980986637, "loss": 1.966, "step": 5217 }, { "epoch": 0.17, "grad_norm": 0.47943878173828125, "learning_rate": 0.0005950561085623151, "loss": 1.9411, "step": 5218 }, { "epoch": 0.17, "grad_norm": 0.4829305410385132, "learning_rate": 0.0005950542186678119, "loss": 2.0113, "step": 5219 }, { "epoch": 0.17, "grad_norm": 0.4905100166797638, "learning_rate": 0.0005950523284151564, "loss": 1.8969, "step": 5220 }, { "epoch": 0.17, "grad_norm": 0.4929892122745514, "learning_rate": 0.0005950504378043509, "loss": 1.9728, "step": 5221 }, { "epoch": 0.17, "grad_norm": 0.49196526408195496, "learning_rate": 0.0005950485468353975, "loss": 1.9379, "step": 5222 }, { "epoch": 0.17, "grad_norm": 0.49611786007881165, "learning_rate": 0.0005950466555082988, "loss": 1.9043, "step": 5223 }, { "epoch": 0.17, "grad_norm": 0.5030035972595215, "learning_rate": 0.0005950447638230569, "loss": 1.9783, "step": 5224 }, { "epoch": 0.17, "grad_norm": 0.5268023014068604, "learning_rate": 0.000595042871779674, "loss": 2.041, "step": 5225 }, { "epoch": 0.17, "grad_norm": 0.5471386313438416, "learning_rate": 0.0005950409793781527, "loss": 2.0252, "step": 5226 }, { "epoch": 0.17, "grad_norm": 0.5121046900749207, "learning_rate": 0.000595039086618495, "loss": 1.9193, "step": 5227 }, { "epoch": 0.17, "grad_norm": 0.4999355971813202, "learning_rate": 0.0005950371935007033, "loss": 1.9625, "step": 5228 }, { "epoch": 0.17, "grad_norm": 0.48848968744277954, "learning_rate": 0.00059503530002478, "loss": 2.0299, "step": 5229 }, { "epoch": 0.17, "grad_norm": 0.48966801166534424, "learning_rate": 0.0005950334061907273, "loss": 1.9702, "step": 5230 }, { "epoch": 0.17, "grad_norm": 0.9483946561813354, "learning_rate": 0.0005950315119985475, "loss": 2.024, "step": 5231 }, { "epoch": 0.17, "grad_norm": 0.5115197896957397, "learning_rate": 0.000595029617448243, "loss": 2.0264, "step": 5232 }, { "epoch": 0.17, "grad_norm": 0.48676419258117676, "learning_rate": 0.0005950277225398159, "loss": 2.0294, "step": 5233 }, { "epoch": 0.17, "grad_norm": 0.49439966678619385, "learning_rate": 0.0005950258272732687, "loss": 1.9897, "step": 5234 }, { "epoch": 0.17, "grad_norm": 0.5344069004058838, "learning_rate": 0.0005950239316486035, "loss": 2.0294, "step": 5235 }, { "epoch": 0.17, "grad_norm": 0.49065932631492615, "learning_rate": 0.0005950220356658228, "loss": 1.921, "step": 5236 }, { "epoch": 0.17, "grad_norm": 0.4990319013595581, "learning_rate": 0.0005950201393249288, "loss": 2.0708, "step": 5237 }, { "epoch": 0.17, "grad_norm": 0.5373263359069824, "learning_rate": 0.0005950182426259238, "loss": 1.9025, "step": 5238 }, { "epoch": 0.17, "grad_norm": 0.4983552396297455, "learning_rate": 0.0005950163455688102, "loss": 2.0062, "step": 5239 }, { "epoch": 0.17, "grad_norm": 0.49193260073661804, "learning_rate": 0.0005950144481535901, "loss": 1.9175, "step": 5240 }, { "epoch": 0.17, "grad_norm": 0.5476084351539612, "learning_rate": 0.0005950125503802661, "loss": 1.9268, "step": 5241 }, { "epoch": 0.17, "grad_norm": 0.5104948282241821, "learning_rate": 0.0005950106522488402, "loss": 2.0118, "step": 5242 }, { "epoch": 0.17, "grad_norm": 0.5066025257110596, "learning_rate": 0.0005950087537593149, "loss": 1.9812, "step": 5243 }, { "epoch": 0.17, "grad_norm": 0.5194165706634521, "learning_rate": 0.0005950068549116924, "loss": 2.0247, "step": 5244 }, { "epoch": 0.17, "grad_norm": 0.5168247818946838, "learning_rate": 0.000595004955705975, "loss": 1.9575, "step": 5245 }, { "epoch": 0.17, "grad_norm": 0.48597973585128784, "learning_rate": 0.0005950030561421651, "loss": 2.0092, "step": 5246 }, { "epoch": 0.17, "grad_norm": 0.48710593581199646, "learning_rate": 0.000595001156220265, "loss": 2.0094, "step": 5247 }, { "epoch": 0.17, "grad_norm": 0.5139614343643188, "learning_rate": 0.0005949992559402768, "loss": 2.0163, "step": 5248 }, { "epoch": 0.17, "grad_norm": 0.4976816773414612, "learning_rate": 0.0005949973553022032, "loss": 2.0264, "step": 5249 }, { "epoch": 0.17, "grad_norm": 0.49273455142974854, "learning_rate": 0.0005949954543060462, "loss": 1.9846, "step": 5250 }, { "epoch": 0.17, "grad_norm": 0.4945806562900543, "learning_rate": 0.0005949935529518081, "loss": 1.9623, "step": 5251 }, { "epoch": 0.17, "grad_norm": 0.5277199149131775, "learning_rate": 0.0005949916512394913, "loss": 2.0226, "step": 5252 }, { "epoch": 0.17, "grad_norm": 0.5105828642845154, "learning_rate": 0.0005949897491690982, "loss": 2.0059, "step": 5253 }, { "epoch": 0.17, "grad_norm": 0.5211586356163025, "learning_rate": 0.000594987846740631, "loss": 1.9931, "step": 5254 }, { "epoch": 0.17, "grad_norm": 0.5362653732299805, "learning_rate": 0.0005949859439540919, "loss": 1.9321, "step": 5255 }, { "epoch": 0.17, "grad_norm": 0.5073714256286621, "learning_rate": 0.0005949840408094834, "loss": 2.0448, "step": 5256 }, { "epoch": 0.17, "grad_norm": 0.47892531752586365, "learning_rate": 0.0005949821373068077, "loss": 1.9083, "step": 5257 }, { "epoch": 0.17, "grad_norm": 0.5150130987167358, "learning_rate": 0.0005949802334460672, "loss": 1.9921, "step": 5258 }, { "epoch": 0.17, "grad_norm": 0.533955454826355, "learning_rate": 0.0005949783292272642, "loss": 1.9199, "step": 5259 }, { "epoch": 0.18, "grad_norm": 0.5402665734291077, "learning_rate": 0.0005949764246504009, "loss": 1.9496, "step": 5260 }, { "epoch": 0.18, "grad_norm": 0.5066125988960266, "learning_rate": 0.0005949745197154797, "loss": 1.9998, "step": 5261 }, { "epoch": 0.18, "grad_norm": 0.5501891374588013, "learning_rate": 0.0005949726144225029, "loss": 2.0376, "step": 5262 }, { "epoch": 0.18, "grad_norm": 0.4874485433101654, "learning_rate": 0.0005949707087714728, "loss": 1.929, "step": 5263 }, { "epoch": 0.18, "grad_norm": 0.48802512884140015, "learning_rate": 0.0005949688027623918, "loss": 1.9536, "step": 5264 }, { "epoch": 0.18, "grad_norm": 0.4881950616836548, "learning_rate": 0.000594966896395262, "loss": 2.0728, "step": 5265 }, { "epoch": 0.18, "grad_norm": 0.5096435546875, "learning_rate": 0.0005949649896700859, "loss": 1.9354, "step": 5266 }, { "epoch": 0.18, "grad_norm": 0.48661336302757263, "learning_rate": 0.0005949630825868657, "loss": 1.9416, "step": 5267 }, { "epoch": 0.18, "grad_norm": 0.47670048475265503, "learning_rate": 0.0005949611751456039, "loss": 1.9104, "step": 5268 }, { "epoch": 0.18, "grad_norm": 0.4846300482749939, "learning_rate": 0.0005949592673463027, "loss": 1.9441, "step": 5269 }, { "epoch": 0.18, "grad_norm": 0.5058528184890747, "learning_rate": 0.0005949573591889644, "loss": 2.0603, "step": 5270 }, { "epoch": 0.18, "grad_norm": 0.4908577799797058, "learning_rate": 0.0005949554506735912, "loss": 1.9333, "step": 5271 }, { "epoch": 0.18, "grad_norm": 0.4957182705402374, "learning_rate": 0.0005949535418001856, "loss": 2.0101, "step": 5272 }, { "epoch": 0.18, "grad_norm": 0.5051513910293579, "learning_rate": 0.0005949516325687499, "loss": 1.9516, "step": 5273 }, { "epoch": 0.18, "grad_norm": 0.4828358590602875, "learning_rate": 0.0005949497229792864, "loss": 1.9537, "step": 5274 }, { "epoch": 0.18, "grad_norm": 0.48198074102401733, "learning_rate": 0.0005949478130317973, "loss": 1.9514, "step": 5275 }, { "epoch": 0.18, "grad_norm": 0.49390238523483276, "learning_rate": 0.0005949459027262852, "loss": 1.9703, "step": 5276 }, { "epoch": 0.18, "grad_norm": 0.5513641834259033, "learning_rate": 0.0005949439920627521, "loss": 2.0555, "step": 5277 }, { "epoch": 0.18, "grad_norm": 0.4952118992805481, "learning_rate": 0.0005949420810412004, "loss": 1.9969, "step": 5278 }, { "epoch": 0.18, "grad_norm": 0.5024313926696777, "learning_rate": 0.0005949401696616325, "loss": 2.0363, "step": 5279 }, { "epoch": 0.18, "grad_norm": 0.4808249771595001, "learning_rate": 0.0005949382579240508, "loss": 2.0093, "step": 5280 }, { "epoch": 0.18, "grad_norm": 0.4978083074092865, "learning_rate": 0.0005949363458284574, "loss": 1.9788, "step": 5281 }, { "epoch": 0.18, "grad_norm": 0.4831140339374542, "learning_rate": 0.0005949344333748548, "loss": 1.9907, "step": 5282 }, { "epoch": 0.18, "grad_norm": 0.5314905047416687, "learning_rate": 0.0005949325205632451, "loss": 1.9575, "step": 5283 }, { "epoch": 0.18, "grad_norm": 0.4796006679534912, "learning_rate": 0.0005949306073936309, "loss": 1.928, "step": 5284 }, { "epoch": 0.18, "grad_norm": 0.4945320188999176, "learning_rate": 0.0005949286938660144, "loss": 2.005, "step": 5285 }, { "epoch": 0.18, "grad_norm": 0.5488598942756653, "learning_rate": 0.0005949267799803979, "loss": 2.0008, "step": 5286 }, { "epoch": 0.18, "grad_norm": 0.48795971274375916, "learning_rate": 0.0005949248657367838, "loss": 1.9559, "step": 5287 }, { "epoch": 0.18, "grad_norm": 0.5180366635322571, "learning_rate": 0.0005949229511351743, "loss": 1.9771, "step": 5288 }, { "epoch": 0.18, "grad_norm": 0.506210446357727, "learning_rate": 0.0005949210361755719, "loss": 2.0784, "step": 5289 }, { "epoch": 0.18, "grad_norm": 0.5125028491020203, "learning_rate": 0.0005949191208579786, "loss": 1.9949, "step": 5290 }, { "epoch": 0.18, "grad_norm": 0.5031513571739197, "learning_rate": 0.0005949172051823972, "loss": 1.9234, "step": 5291 }, { "epoch": 0.18, "grad_norm": 0.49909254908561707, "learning_rate": 0.0005949152891488297, "loss": 1.9002, "step": 5292 }, { "epoch": 0.18, "grad_norm": 0.4898774027824402, "learning_rate": 0.0005949133727572784, "loss": 2.023, "step": 5293 }, { "epoch": 0.18, "grad_norm": 0.5120565295219421, "learning_rate": 0.0005949114560077457, "loss": 2.0244, "step": 5294 }, { "epoch": 0.18, "grad_norm": 0.5047743320465088, "learning_rate": 0.000594909538900234, "loss": 2.0368, "step": 5295 }, { "epoch": 0.18, "grad_norm": 0.5029824376106262, "learning_rate": 0.0005949076214347456, "loss": 1.9994, "step": 5296 }, { "epoch": 0.18, "grad_norm": 0.49236375093460083, "learning_rate": 0.0005949057036112827, "loss": 1.9202, "step": 5297 }, { "epoch": 0.18, "grad_norm": 0.4933236241340637, "learning_rate": 0.0005949037854298478, "loss": 1.994, "step": 5298 }, { "epoch": 0.18, "grad_norm": 0.49933862686157227, "learning_rate": 0.0005949018668904432, "loss": 1.9863, "step": 5299 }, { "epoch": 0.18, "grad_norm": 0.4757123589515686, "learning_rate": 0.0005948999479930712, "loss": 1.9462, "step": 5300 }, { "epoch": 0.18, "grad_norm": 0.5003297328948975, "learning_rate": 0.000594898028737734, "loss": 1.9925, "step": 5301 }, { "epoch": 0.18, "grad_norm": 0.5070134997367859, "learning_rate": 0.0005948961091244341, "loss": 2.1009, "step": 5302 }, { "epoch": 0.18, "grad_norm": 0.48308995366096497, "learning_rate": 0.0005948941891531738, "loss": 1.9687, "step": 5303 }, { "epoch": 0.18, "grad_norm": 0.4979947805404663, "learning_rate": 0.0005948922688239554, "loss": 2.0054, "step": 5304 }, { "epoch": 0.18, "grad_norm": 0.5070236921310425, "learning_rate": 0.0005948903481367812, "loss": 2.0018, "step": 5305 }, { "epoch": 0.18, "grad_norm": 0.5061094760894775, "learning_rate": 0.0005948884270916535, "loss": 1.9566, "step": 5306 }, { "epoch": 0.18, "grad_norm": 0.4975890517234802, "learning_rate": 0.0005948865056885749, "loss": 2.0153, "step": 5307 }, { "epoch": 0.18, "grad_norm": 0.4885285198688507, "learning_rate": 0.0005948845839275474, "loss": 2.0197, "step": 5308 }, { "epoch": 0.18, "grad_norm": 0.7413203716278076, "learning_rate": 0.0005948826618085734, "loss": 1.9881, "step": 5309 }, { "epoch": 0.18, "grad_norm": 0.5076820850372314, "learning_rate": 0.0005948807393316555, "loss": 1.9182, "step": 5310 }, { "epoch": 0.18, "grad_norm": 0.4794643819332123, "learning_rate": 0.0005948788164967956, "loss": 1.9668, "step": 5311 }, { "epoch": 0.18, "grad_norm": 0.4918330907821655, "learning_rate": 0.0005948768933039965, "loss": 1.9827, "step": 5312 }, { "epoch": 0.18, "grad_norm": 0.49455714225769043, "learning_rate": 0.0005948749697532601, "loss": 2.0404, "step": 5313 }, { "epoch": 0.18, "grad_norm": 0.48699337244033813, "learning_rate": 0.000594873045844589, "loss": 2.038, "step": 5314 }, { "epoch": 0.18, "grad_norm": 0.5058645606040955, "learning_rate": 0.0005948711215779855, "loss": 1.9818, "step": 5315 }, { "epoch": 0.18, "grad_norm": 0.6913795471191406, "learning_rate": 0.0005948691969534519, "loss": 2.0192, "step": 5316 }, { "epoch": 0.18, "grad_norm": 0.49011844396591187, "learning_rate": 0.0005948672719709904, "loss": 2.0356, "step": 5317 }, { "epoch": 0.18, "grad_norm": 0.5225269794464111, "learning_rate": 0.0005948653466306038, "loss": 1.8672, "step": 5318 }, { "epoch": 0.18, "grad_norm": 0.4862852096557617, "learning_rate": 0.0005948634209322938, "loss": 2.0173, "step": 5319 }, { "epoch": 0.18, "grad_norm": 0.4925413429737091, "learning_rate": 0.0005948614948760632, "loss": 1.9736, "step": 5320 }, { "epoch": 0.18, "grad_norm": 0.47818535566329956, "learning_rate": 0.0005948595684619141, "loss": 1.9418, "step": 5321 }, { "epoch": 0.18, "grad_norm": 0.5149281024932861, "learning_rate": 0.000594857641689849, "loss": 2.0475, "step": 5322 }, { "epoch": 0.18, "grad_norm": 0.48233357071876526, "learning_rate": 0.0005948557145598702, "loss": 1.9761, "step": 5323 }, { "epoch": 0.18, "grad_norm": 0.47735360264778137, "learning_rate": 0.00059485378707198, "loss": 1.9232, "step": 5324 }, { "epoch": 0.18, "grad_norm": 0.49708542227745056, "learning_rate": 0.0005948518592261806, "loss": 1.9651, "step": 5325 }, { "epoch": 0.18, "grad_norm": 0.4950670599937439, "learning_rate": 0.0005948499310224747, "loss": 1.9957, "step": 5326 }, { "epoch": 0.18, "grad_norm": 0.4924145042896271, "learning_rate": 0.0005948480024608643, "loss": 1.9974, "step": 5327 }, { "epoch": 0.18, "grad_norm": 0.4930672347545624, "learning_rate": 0.0005948460735413519, "loss": 1.9563, "step": 5328 }, { "epoch": 0.18, "grad_norm": 0.464878112077713, "learning_rate": 0.0005948441442639399, "loss": 1.9371, "step": 5329 }, { "epoch": 0.18, "grad_norm": 0.5185195803642273, "learning_rate": 0.0005948422146286303, "loss": 1.9643, "step": 5330 }, { "epoch": 0.18, "grad_norm": 0.5108018517494202, "learning_rate": 0.0005948402846354259, "loss": 1.9983, "step": 5331 }, { "epoch": 0.18, "grad_norm": 0.4945807158946991, "learning_rate": 0.0005948383542843289, "loss": 1.9883, "step": 5332 }, { "epoch": 0.18, "grad_norm": 0.49679064750671387, "learning_rate": 0.0005948364235753414, "loss": 1.9911, "step": 5333 }, { "epoch": 0.18, "grad_norm": 0.5060734152793884, "learning_rate": 0.000594834492508466, "loss": 1.9685, "step": 5334 }, { "epoch": 0.18, "grad_norm": 0.5062717199325562, "learning_rate": 0.000594832561083705, "loss": 1.9652, "step": 5335 }, { "epoch": 0.18, "grad_norm": 0.49505615234375, "learning_rate": 0.0005948306293010607, "loss": 1.9876, "step": 5336 }, { "epoch": 0.18, "grad_norm": 0.5210793614387512, "learning_rate": 0.0005948286971605353, "loss": 1.9725, "step": 5337 }, { "epoch": 0.18, "grad_norm": 0.4935409128665924, "learning_rate": 0.0005948267646621315, "loss": 1.9629, "step": 5338 }, { "epoch": 0.18, "grad_norm": 0.4860256016254425, "learning_rate": 0.0005948248318058514, "loss": 1.9339, "step": 5339 }, { "epoch": 0.18, "grad_norm": 0.48703861236572266, "learning_rate": 0.0005948228985916974, "loss": 1.9471, "step": 5340 }, { "epoch": 0.18, "grad_norm": 0.5041409134864807, "learning_rate": 0.0005948209650196718, "loss": 2.043, "step": 5341 }, { "epoch": 0.18, "grad_norm": 0.4919695556163788, "learning_rate": 0.000594819031089777, "loss": 1.954, "step": 5342 }, { "epoch": 0.18, "grad_norm": 0.4732459485530853, "learning_rate": 0.0005948170968020154, "loss": 1.9678, "step": 5343 }, { "epoch": 0.18, "grad_norm": 0.5202415585517883, "learning_rate": 0.0005948151621563892, "loss": 1.9509, "step": 5344 }, { "epoch": 0.18, "grad_norm": 0.4848218560218811, "learning_rate": 0.0005948132271529008, "loss": 1.9852, "step": 5345 }, { "epoch": 0.18, "grad_norm": 0.4725375473499298, "learning_rate": 0.0005948112917915527, "loss": 1.9276, "step": 5346 }, { "epoch": 0.18, "grad_norm": 0.5065538883209229, "learning_rate": 0.0005948093560723471, "loss": 1.9625, "step": 5347 }, { "epoch": 0.18, "grad_norm": 0.4956659972667694, "learning_rate": 0.0005948074199952863, "loss": 1.9212, "step": 5348 }, { "epoch": 0.18, "grad_norm": 0.4925312399864197, "learning_rate": 0.0005948054835603728, "loss": 1.9655, "step": 5349 }, { "epoch": 0.18, "grad_norm": 0.503628134727478, "learning_rate": 0.0005948035467676089, "loss": 1.9956, "step": 5350 }, { "epoch": 0.18, "grad_norm": 0.52004075050354, "learning_rate": 0.0005948016096169969, "loss": 1.9468, "step": 5351 }, { "epoch": 0.18, "grad_norm": 0.47981390357017517, "learning_rate": 0.0005947996721085392, "loss": 1.9531, "step": 5352 }, { "epoch": 0.18, "grad_norm": 0.49939262866973877, "learning_rate": 0.0005947977342422381, "loss": 2.0303, "step": 5353 }, { "epoch": 0.18, "grad_norm": 0.5399957299232483, "learning_rate": 0.0005947957960180959, "loss": 1.9711, "step": 5354 }, { "epoch": 0.18, "grad_norm": 0.5040106773376465, "learning_rate": 0.0005947938574361152, "loss": 1.9263, "step": 5355 }, { "epoch": 0.18, "grad_norm": 0.49192455410957336, "learning_rate": 0.0005947919184962981, "loss": 2.0367, "step": 5356 }, { "epoch": 0.18, "grad_norm": 0.5082659721374512, "learning_rate": 0.0005947899791986472, "loss": 1.8906, "step": 5357 }, { "epoch": 0.18, "grad_norm": 0.5167986154556274, "learning_rate": 0.0005947880395431645, "loss": 1.9329, "step": 5358 }, { "epoch": 0.18, "grad_norm": 0.5068008899688721, "learning_rate": 0.0005947860995298526, "loss": 1.9954, "step": 5359 }, { "epoch": 0.18, "grad_norm": 0.5323313474655151, "learning_rate": 0.0005947841591587139, "loss": 2.0298, "step": 5360 }, { "epoch": 0.18, "grad_norm": 0.4981512129306793, "learning_rate": 0.0005947822184297506, "loss": 1.9965, "step": 5361 }, { "epoch": 0.18, "grad_norm": 0.5033387541770935, "learning_rate": 0.0005947802773429652, "loss": 1.9068, "step": 5362 }, { "epoch": 0.18, "grad_norm": 0.5022661685943604, "learning_rate": 0.0005947783358983598, "loss": 1.9567, "step": 5363 }, { "epoch": 0.18, "grad_norm": 0.5083442330360413, "learning_rate": 0.0005947763940959371, "loss": 1.9292, "step": 5364 }, { "epoch": 0.18, "grad_norm": 0.4908916652202606, "learning_rate": 0.0005947744519356992, "loss": 2.0253, "step": 5365 }, { "epoch": 0.18, "grad_norm": 0.49361979961395264, "learning_rate": 0.0005947725094176485, "loss": 1.9632, "step": 5366 }, { "epoch": 0.18, "grad_norm": 0.4894043505191803, "learning_rate": 0.0005947705665417875, "loss": 1.952, "step": 5367 }, { "epoch": 0.18, "grad_norm": 0.5065600872039795, "learning_rate": 0.0005947686233081185, "loss": 2.0333, "step": 5368 }, { "epoch": 0.18, "grad_norm": 0.4902355372905731, "learning_rate": 0.0005947666797166438, "loss": 2.0217, "step": 5369 }, { "epoch": 0.18, "grad_norm": 0.48605164885520935, "learning_rate": 0.0005947647357673657, "loss": 1.9936, "step": 5370 }, { "epoch": 0.18, "grad_norm": 0.4917449355125427, "learning_rate": 0.0005947627914602868, "loss": 1.9668, "step": 5371 }, { "epoch": 0.18, "grad_norm": 0.5350658893585205, "learning_rate": 0.0005947608467954091, "loss": 1.8551, "step": 5372 }, { "epoch": 0.18, "grad_norm": 0.48038575053215027, "learning_rate": 0.0005947589017727353, "loss": 1.9776, "step": 5373 }, { "epoch": 0.18, "grad_norm": 0.4903299808502197, "learning_rate": 0.0005947569563922676, "loss": 1.9121, "step": 5374 }, { "epoch": 0.18, "grad_norm": 0.5026282668113708, "learning_rate": 0.0005947550106540085, "loss": 1.9637, "step": 5375 }, { "epoch": 0.18, "grad_norm": 0.48124781250953674, "learning_rate": 0.00059475306455796, "loss": 2.0454, "step": 5376 }, { "epoch": 0.18, "grad_norm": 0.4911069869995117, "learning_rate": 0.0005947511181041248, "loss": 2.0091, "step": 5377 }, { "epoch": 0.18, "grad_norm": 0.4942663013935089, "learning_rate": 0.0005947491712925052, "loss": 2.078, "step": 5378 }, { "epoch": 0.18, "grad_norm": 0.5194820165634155, "learning_rate": 0.0005947472241231036, "loss": 1.8704, "step": 5379 }, { "epoch": 0.18, "grad_norm": 0.48758265376091003, "learning_rate": 0.0005947452765959222, "loss": 1.9818, "step": 5380 }, { "epoch": 0.18, "grad_norm": 0.49264004826545715, "learning_rate": 0.0005947433287109635, "loss": 1.9895, "step": 5381 }, { "epoch": 0.18, "grad_norm": 0.4999961853027344, "learning_rate": 0.0005947413804682298, "loss": 2.0544, "step": 5382 }, { "epoch": 0.18, "grad_norm": 0.5044416189193726, "learning_rate": 0.0005947394318677236, "loss": 1.8751, "step": 5383 }, { "epoch": 0.18, "grad_norm": 0.5313351154327393, "learning_rate": 0.000594737482909447, "loss": 2.0008, "step": 5384 }, { "epoch": 0.18, "grad_norm": 0.4875807464122772, "learning_rate": 0.0005947355335934026, "loss": 1.9536, "step": 5385 }, { "epoch": 0.18, "grad_norm": 0.5068901181221008, "learning_rate": 0.0005947335839195928, "loss": 2.0057, "step": 5386 }, { "epoch": 0.18, "grad_norm": 0.5018950700759888, "learning_rate": 0.0005947316338880197, "loss": 1.9675, "step": 5387 }, { "epoch": 0.18, "grad_norm": 0.49740782380104065, "learning_rate": 0.0005947296834986858, "loss": 1.846, "step": 5388 }, { "epoch": 0.18, "grad_norm": 0.48821792006492615, "learning_rate": 0.0005947277327515935, "loss": 1.9742, "step": 5389 }, { "epoch": 0.18, "grad_norm": 0.48895472288131714, "learning_rate": 0.0005947257816467453, "loss": 1.9035, "step": 5390 }, { "epoch": 0.18, "grad_norm": 0.49382707476615906, "learning_rate": 0.0005947238301841433, "loss": 2.0286, "step": 5391 }, { "epoch": 0.18, "grad_norm": 0.4865427315235138, "learning_rate": 0.0005947218783637901, "loss": 1.9557, "step": 5392 }, { "epoch": 0.18, "grad_norm": 0.5106506943702698, "learning_rate": 0.0005947199261856879, "loss": 2.0048, "step": 5393 }, { "epoch": 0.18, "grad_norm": 0.5007174611091614, "learning_rate": 0.0005947179736498392, "loss": 2.0517, "step": 5394 }, { "epoch": 0.18, "grad_norm": 0.4715003967285156, "learning_rate": 0.0005947160207562461, "loss": 1.9077, "step": 5395 }, { "epoch": 0.18, "grad_norm": 0.49706289172172546, "learning_rate": 0.0005947140675049114, "loss": 1.9671, "step": 5396 }, { "epoch": 0.18, "grad_norm": 0.47157132625579834, "learning_rate": 0.0005947121138958373, "loss": 1.972, "step": 5397 }, { "epoch": 0.18, "grad_norm": 0.507610023021698, "learning_rate": 0.0005947101599290259, "loss": 1.9779, "step": 5398 }, { "epoch": 0.18, "grad_norm": 0.48556438088417053, "learning_rate": 0.00059470820560448, "loss": 1.9289, "step": 5399 }, { "epoch": 0.18, "grad_norm": 0.4847368001937866, "learning_rate": 0.0005947062509222015, "loss": 1.9108, "step": 5400 }, { "epoch": 0.18, "grad_norm": 0.5004438161849976, "learning_rate": 0.0005947042958821931, "loss": 1.9699, "step": 5401 }, { "epoch": 0.18, "grad_norm": 0.46894022822380066, "learning_rate": 0.0005947023404844573, "loss": 1.9702, "step": 5402 }, { "epoch": 0.18, "grad_norm": 0.5038300156593323, "learning_rate": 0.0005947003847289961, "loss": 2.1145, "step": 5403 }, { "epoch": 0.18, "grad_norm": 0.48841843008995056, "learning_rate": 0.0005946984286158121, "loss": 1.9777, "step": 5404 }, { "epoch": 0.18, "grad_norm": 0.4944285750389099, "learning_rate": 0.0005946964721449077, "loss": 1.9286, "step": 5405 }, { "epoch": 0.18, "grad_norm": 0.49107104539871216, "learning_rate": 0.0005946945153162851, "loss": 1.9406, "step": 5406 }, { "epoch": 0.18, "grad_norm": 0.4856160581111908, "learning_rate": 0.0005946925581299468, "loss": 1.96, "step": 5407 }, { "epoch": 0.18, "grad_norm": 0.49180248379707336, "learning_rate": 0.0005946906005858952, "loss": 1.9639, "step": 5408 }, { "epoch": 0.18, "grad_norm": 0.4924662411212921, "learning_rate": 0.0005946886426841326, "loss": 1.9087, "step": 5409 }, { "epoch": 0.18, "grad_norm": 0.4709166884422302, "learning_rate": 0.0005946866844246613, "loss": 1.921, "step": 5410 }, { "epoch": 0.18, "grad_norm": 0.4749336838722229, "learning_rate": 0.000594684725807484, "loss": 1.9465, "step": 5411 }, { "epoch": 0.18, "grad_norm": 0.506822943687439, "learning_rate": 0.0005946827668326028, "loss": 1.8956, "step": 5412 }, { "epoch": 0.18, "grad_norm": 0.5277054905891418, "learning_rate": 0.0005946808075000201, "loss": 1.9921, "step": 5413 }, { "epoch": 0.18, "grad_norm": 0.5165614485740662, "learning_rate": 0.0005946788478097383, "loss": 1.9638, "step": 5414 }, { "epoch": 0.18, "grad_norm": 0.5077412128448486, "learning_rate": 0.0005946768877617599, "loss": 1.8934, "step": 5415 }, { "epoch": 0.18, "grad_norm": 0.5255921483039856, "learning_rate": 0.0005946749273560871, "loss": 2.0446, "step": 5416 }, { "epoch": 0.18, "grad_norm": 0.4915761649608612, "learning_rate": 0.0005946729665927224, "loss": 1.9641, "step": 5417 }, { "epoch": 0.18, "grad_norm": 0.4953855276107788, "learning_rate": 0.000594671005471668, "loss": 1.9759, "step": 5418 }, { "epoch": 0.18, "grad_norm": 0.5102595090866089, "learning_rate": 0.0005946690439929265, "loss": 1.9758, "step": 5419 }, { "epoch": 0.18, "grad_norm": 0.49018433690071106, "learning_rate": 0.0005946670821565002, "loss": 1.9418, "step": 5420 }, { "epoch": 0.18, "grad_norm": 0.5131819248199463, "learning_rate": 0.0005946651199623916, "loss": 1.9558, "step": 5421 }, { "epoch": 0.18, "grad_norm": 0.47510045766830444, "learning_rate": 0.0005946631574106028, "loss": 1.9382, "step": 5422 }, { "epoch": 0.18, "grad_norm": 0.49513256549835205, "learning_rate": 0.0005946611945011365, "loss": 1.9628, "step": 5423 }, { "epoch": 0.18, "grad_norm": 0.5021260976791382, "learning_rate": 0.0005946592312339947, "loss": 1.9204, "step": 5424 }, { "epoch": 0.18, "grad_norm": 0.4942092001438141, "learning_rate": 0.0005946572676091801, "loss": 2.0036, "step": 5425 }, { "epoch": 0.18, "grad_norm": 0.4853740334510803, "learning_rate": 0.0005946553036266951, "loss": 2.015, "step": 5426 }, { "epoch": 0.18, "grad_norm": 0.49136027693748474, "learning_rate": 0.0005946533392865419, "loss": 2.0029, "step": 5427 }, { "epoch": 0.18, "grad_norm": 0.4968080222606659, "learning_rate": 0.000594651374588723, "loss": 2.0533, "step": 5428 }, { "epoch": 0.18, "grad_norm": 0.49196484684944153, "learning_rate": 0.0005946494095332407, "loss": 1.9544, "step": 5429 }, { "epoch": 0.18, "grad_norm": 0.5016577839851379, "learning_rate": 0.0005946474441200974, "loss": 1.9537, "step": 5430 }, { "epoch": 0.18, "grad_norm": 0.517021894454956, "learning_rate": 0.0005946454783492956, "loss": 1.9924, "step": 5431 }, { "epoch": 0.18, "grad_norm": 0.5145630836486816, "learning_rate": 0.0005946435122208375, "loss": 1.9818, "step": 5432 }, { "epoch": 0.18, "grad_norm": 0.4887215793132782, "learning_rate": 0.0005946415457347257, "loss": 1.9314, "step": 5433 }, { "epoch": 0.18, "grad_norm": 0.5172138214111328, "learning_rate": 0.0005946395788909624, "loss": 1.9676, "step": 5434 }, { "epoch": 0.18, "grad_norm": 0.5063222646713257, "learning_rate": 0.0005946376116895501, "loss": 1.9477, "step": 5435 }, { "epoch": 0.18, "grad_norm": 0.47560980916023254, "learning_rate": 0.0005946356441304911, "loss": 1.9421, "step": 5436 }, { "epoch": 0.18, "grad_norm": 0.5018136501312256, "learning_rate": 0.0005946336762137879, "loss": 2.0224, "step": 5437 }, { "epoch": 0.18, "grad_norm": 0.5167814493179321, "learning_rate": 0.0005946317079394427, "loss": 1.9978, "step": 5438 }, { "epoch": 0.18, "grad_norm": 0.48676347732543945, "learning_rate": 0.0005946297393074583, "loss": 1.9911, "step": 5439 }, { "epoch": 0.18, "grad_norm": 0.4951256811618805, "learning_rate": 0.0005946277703178366, "loss": 2.0495, "step": 5440 }, { "epoch": 0.18, "grad_norm": 0.5051433444023132, "learning_rate": 0.0005946258009705802, "loss": 1.9986, "step": 5441 }, { "epoch": 0.18, "grad_norm": 0.48942142724990845, "learning_rate": 0.0005946238312656916, "loss": 1.9917, "step": 5442 }, { "epoch": 0.18, "grad_norm": 0.49349913001060486, "learning_rate": 0.0005946218612031729, "loss": 1.9502, "step": 5443 }, { "epoch": 0.18, "grad_norm": 0.4970780909061432, "learning_rate": 0.0005946198907830268, "loss": 1.9191, "step": 5444 }, { "epoch": 0.18, "grad_norm": 0.5013541579246521, "learning_rate": 0.0005946179200052555, "loss": 1.9341, "step": 5445 }, { "epoch": 0.18, "grad_norm": 0.48909792304039, "learning_rate": 0.0005946159488698615, "loss": 1.9295, "step": 5446 }, { "epoch": 0.18, "grad_norm": 0.4853792190551758, "learning_rate": 0.0005946139773768471, "loss": 1.9683, "step": 5447 }, { "epoch": 0.18, "grad_norm": 0.5104318261146545, "learning_rate": 0.0005946120055262149, "loss": 1.938, "step": 5448 }, { "epoch": 0.18, "grad_norm": 0.5060231685638428, "learning_rate": 0.000594610033317967, "loss": 1.9297, "step": 5449 }, { "epoch": 0.18, "grad_norm": 0.49413996934890747, "learning_rate": 0.000594608060752106, "loss": 1.9447, "step": 5450 }, { "epoch": 0.18, "grad_norm": 0.48708295822143555, "learning_rate": 0.000594606087828634, "loss": 1.9834, "step": 5451 }, { "epoch": 0.18, "grad_norm": 0.4792928993701935, "learning_rate": 0.0005946041145475538, "loss": 1.9883, "step": 5452 }, { "epoch": 0.18, "grad_norm": 0.4984626770019531, "learning_rate": 0.0005946021409088676, "loss": 1.9516, "step": 5453 }, { "epoch": 0.18, "grad_norm": 0.5148974061012268, "learning_rate": 0.0005946001669125779, "loss": 2.0845, "step": 5454 }, { "epoch": 0.18, "grad_norm": 0.48829206824302673, "learning_rate": 0.0005945981925586869, "loss": 2.0457, "step": 5455 }, { "epoch": 0.18, "grad_norm": 0.5486525297164917, "learning_rate": 0.0005945962178471971, "loss": 2.0551, "step": 5456 }, { "epoch": 0.18, "grad_norm": 0.500654399394989, "learning_rate": 0.0005945942427781109, "loss": 1.9299, "step": 5457 }, { "epoch": 0.18, "grad_norm": 0.5031554102897644, "learning_rate": 0.0005945922673514307, "loss": 1.9689, "step": 5458 }, { "epoch": 0.18, "grad_norm": 0.4733639657497406, "learning_rate": 0.0005945902915671589, "loss": 1.9746, "step": 5459 }, { "epoch": 0.18, "grad_norm": 0.48723939061164856, "learning_rate": 0.0005945883154252979, "loss": 1.9143, "step": 5460 }, { "epoch": 0.18, "grad_norm": 0.5395733714103699, "learning_rate": 0.0005945863389258501, "loss": 2.0242, "step": 5461 }, { "epoch": 0.18, "grad_norm": 0.4848390221595764, "learning_rate": 0.0005945843620688179, "loss": 2.0244, "step": 5462 }, { "epoch": 0.18, "grad_norm": 0.47655266523361206, "learning_rate": 0.0005945823848542036, "loss": 1.9239, "step": 5463 }, { "epoch": 0.18, "grad_norm": 0.500599205493927, "learning_rate": 0.0005945804072820098, "loss": 2.0009, "step": 5464 }, { "epoch": 0.18, "grad_norm": 0.5351284146308899, "learning_rate": 0.0005945784293522387, "loss": 2.0474, "step": 5465 }, { "epoch": 0.18, "grad_norm": 0.4830135703086853, "learning_rate": 0.0005945764510648929, "loss": 2.0778, "step": 5466 }, { "epoch": 0.18, "grad_norm": 0.4753499925136566, "learning_rate": 0.0005945744724199746, "loss": 1.9353, "step": 5467 }, { "epoch": 0.18, "grad_norm": 0.5132626295089722, "learning_rate": 0.0005945724934174863, "loss": 2.0758, "step": 5468 }, { "epoch": 0.18, "grad_norm": 0.5405130386352539, "learning_rate": 0.0005945705140574304, "loss": 2.0235, "step": 5469 }, { "epoch": 0.18, "grad_norm": 0.49127107858657837, "learning_rate": 0.0005945685343398093, "loss": 1.9507, "step": 5470 }, { "epoch": 0.18, "grad_norm": 0.5045151710510254, "learning_rate": 0.0005945665542646254, "loss": 2.0785, "step": 5471 }, { "epoch": 0.18, "grad_norm": 0.5243345499038696, "learning_rate": 0.0005945645738318812, "loss": 2.0208, "step": 5472 }, { "epoch": 0.18, "grad_norm": 0.48580512404441833, "learning_rate": 0.0005945625930415789, "loss": 1.9215, "step": 5473 }, { "epoch": 0.18, "grad_norm": 0.48944640159606934, "learning_rate": 0.0005945606118937211, "loss": 1.889, "step": 5474 }, { "epoch": 0.18, "grad_norm": 0.5090672373771667, "learning_rate": 0.00059455863038831, "loss": 1.9425, "step": 5475 }, { "epoch": 0.18, "grad_norm": 0.5029572248458862, "learning_rate": 0.0005945566485253482, "loss": 1.9668, "step": 5476 }, { "epoch": 0.18, "grad_norm": 0.5144270658493042, "learning_rate": 0.000594554666304838, "loss": 1.9817, "step": 5477 }, { "epoch": 0.18, "grad_norm": 0.5068483352661133, "learning_rate": 0.0005945526837267818, "loss": 2.001, "step": 5478 }, { "epoch": 0.18, "grad_norm": 0.501627504825592, "learning_rate": 0.0005945507007911821, "loss": 2.0153, "step": 5479 }, { "epoch": 0.18, "grad_norm": 0.48549434542655945, "learning_rate": 0.0005945487174980413, "loss": 2.0259, "step": 5480 }, { "epoch": 0.18, "grad_norm": 0.48458459973335266, "learning_rate": 0.0005945467338473617, "loss": 1.9337, "step": 5481 }, { "epoch": 0.18, "grad_norm": 0.4938867390155792, "learning_rate": 0.0005945447498391458, "loss": 1.9669, "step": 5482 }, { "epoch": 0.18, "grad_norm": 0.5092560648918152, "learning_rate": 0.0005945427654733959, "loss": 1.9311, "step": 5483 }, { "epoch": 0.18, "grad_norm": 0.4935828745365143, "learning_rate": 0.0005945407807501146, "loss": 2.031, "step": 5484 }, { "epoch": 0.18, "grad_norm": 0.4883565902709961, "learning_rate": 0.000594538795669304, "loss": 1.9295, "step": 5485 }, { "epoch": 0.18, "grad_norm": 0.48509979248046875, "learning_rate": 0.0005945368102309668, "loss": 1.9255, "step": 5486 }, { "epoch": 0.18, "grad_norm": 0.4831327497959137, "learning_rate": 0.0005945348244351053, "loss": 1.931, "step": 5487 }, { "epoch": 0.18, "grad_norm": 0.49949678778648376, "learning_rate": 0.000594532838281722, "loss": 2.0122, "step": 5488 }, { "epoch": 0.18, "grad_norm": 0.48214957118034363, "learning_rate": 0.0005945308517708191, "loss": 1.9795, "step": 5489 }, { "epoch": 0.18, "grad_norm": 0.5005794167518616, "learning_rate": 0.0005945288649023994, "loss": 1.9743, "step": 5490 }, { "epoch": 0.18, "grad_norm": 0.500541627407074, "learning_rate": 0.0005945268776764648, "loss": 1.8848, "step": 5491 }, { "epoch": 0.18, "grad_norm": 0.5097522735595703, "learning_rate": 0.000594524890093018, "loss": 2.0556, "step": 5492 }, { "epoch": 0.18, "grad_norm": 0.4883383810520172, "learning_rate": 0.0005945229021520614, "loss": 1.928, "step": 5493 }, { "epoch": 0.18, "grad_norm": 0.49603819847106934, "learning_rate": 0.0005945209138535975, "loss": 1.9723, "step": 5494 }, { "epoch": 0.18, "grad_norm": 0.5186269283294678, "learning_rate": 0.0005945189251976284, "loss": 1.9256, "step": 5495 }, { "epoch": 0.18, "grad_norm": 0.4816964864730835, "learning_rate": 0.0005945169361841569, "loss": 1.9108, "step": 5496 }, { "epoch": 0.18, "grad_norm": 0.4893399178981781, "learning_rate": 0.0005945149468131851, "loss": 1.9199, "step": 5497 }, { "epoch": 0.18, "grad_norm": 0.48795732855796814, "learning_rate": 0.0005945129570847157, "loss": 1.9605, "step": 5498 }, { "epoch": 0.18, "grad_norm": 0.5155264735221863, "learning_rate": 0.0005945109669987508, "loss": 1.944, "step": 5499 }, { "epoch": 0.18, "grad_norm": 0.5207313895225525, "learning_rate": 0.0005945089765552931, "loss": 1.9588, "step": 5500 }, { "epoch": 0.18, "grad_norm": 0.4945359528064728, "learning_rate": 0.0005945069857543449, "loss": 1.9205, "step": 5501 }, { "epoch": 0.18, "grad_norm": 0.5058776140213013, "learning_rate": 0.0005945049945959086, "loss": 1.9659, "step": 5502 }, { "epoch": 0.18, "grad_norm": 0.5174354314804077, "learning_rate": 0.0005945030030799866, "loss": 2.0004, "step": 5503 }, { "epoch": 0.18, "grad_norm": 0.506343424320221, "learning_rate": 0.0005945010112065813, "loss": 2.0345, "step": 5504 }, { "epoch": 0.18, "grad_norm": 0.488041490316391, "learning_rate": 0.0005944990189756952, "loss": 1.9811, "step": 5505 }, { "epoch": 0.18, "grad_norm": 0.5299025177955627, "learning_rate": 0.0005944970263873307, "loss": 2.0153, "step": 5506 }, { "epoch": 0.18, "grad_norm": 0.5237964987754822, "learning_rate": 0.0005944950334414903, "loss": 1.9335, "step": 5507 }, { "epoch": 0.18, "grad_norm": 0.48012182116508484, "learning_rate": 0.0005944930401381762, "loss": 2.0162, "step": 5508 }, { "epoch": 0.18, "grad_norm": 0.5057949423789978, "learning_rate": 0.000594491046477391, "loss": 1.9338, "step": 5509 }, { "epoch": 0.18, "grad_norm": 0.5436517000198364, "learning_rate": 0.000594489052459137, "loss": 2.0215, "step": 5510 }, { "epoch": 0.18, "grad_norm": 0.49219876527786255, "learning_rate": 0.0005944870580834166, "loss": 2.0038, "step": 5511 }, { "epoch": 0.18, "grad_norm": 0.528242290019989, "learning_rate": 0.0005944850633502325, "loss": 1.94, "step": 5512 }, { "epoch": 0.18, "grad_norm": 0.5115209817886353, "learning_rate": 0.0005944830682595868, "loss": 1.9303, "step": 5513 }, { "epoch": 0.18, "grad_norm": 0.4895211458206177, "learning_rate": 0.0005944810728114821, "loss": 2.0404, "step": 5514 }, { "epoch": 0.18, "grad_norm": 0.50985187292099, "learning_rate": 0.0005944790770059207, "loss": 1.9707, "step": 5515 }, { "epoch": 0.18, "grad_norm": 0.49172040820121765, "learning_rate": 0.0005944770808429052, "loss": 1.9934, "step": 5516 }, { "epoch": 0.18, "grad_norm": 0.49017998576164246, "learning_rate": 0.0005944750843224378, "loss": 2.0288, "step": 5517 }, { "epoch": 0.18, "grad_norm": 0.4702906906604767, "learning_rate": 0.0005944730874445211, "loss": 1.9073, "step": 5518 }, { "epoch": 0.18, "grad_norm": 0.4999055564403534, "learning_rate": 0.0005944710902091575, "loss": 2.0723, "step": 5519 }, { "epoch": 0.18, "grad_norm": 0.4933924973011017, "learning_rate": 0.0005944690926163493, "loss": 2.1021, "step": 5520 }, { "epoch": 0.18, "grad_norm": 0.49155303835868835, "learning_rate": 0.0005944670946660991, "loss": 1.9361, "step": 5521 }, { "epoch": 0.18, "grad_norm": 0.503767192363739, "learning_rate": 0.0005944650963584091, "loss": 1.9983, "step": 5522 }, { "epoch": 0.18, "grad_norm": 0.4947524964809418, "learning_rate": 0.0005944630976932819, "loss": 1.9992, "step": 5523 }, { "epoch": 0.18, "grad_norm": 0.5078985691070557, "learning_rate": 0.0005944610986707199, "loss": 1.8637, "step": 5524 }, { "epoch": 0.18, "grad_norm": 0.48038989305496216, "learning_rate": 0.0005944590992907256, "loss": 1.9323, "step": 5525 }, { "epoch": 0.18, "grad_norm": 0.4918804466724396, "learning_rate": 0.0005944570995533012, "loss": 1.9672, "step": 5526 }, { "epoch": 0.18, "grad_norm": 0.500212550163269, "learning_rate": 0.0005944550994584493, "loss": 1.9634, "step": 5527 }, { "epoch": 0.18, "grad_norm": 0.48323723673820496, "learning_rate": 0.0005944530990061724, "loss": 2.0085, "step": 5528 }, { "epoch": 0.18, "grad_norm": 0.4861829876899719, "learning_rate": 0.0005944510981964728, "loss": 1.9726, "step": 5529 }, { "epoch": 0.18, "grad_norm": 0.49262914061546326, "learning_rate": 0.0005944490970293529, "loss": 2.0069, "step": 5530 }, { "epoch": 0.18, "grad_norm": 0.511517345905304, "learning_rate": 0.0005944470955048152, "loss": 1.9333, "step": 5531 }, { "epoch": 0.18, "grad_norm": 0.500858724117279, "learning_rate": 0.000594445093622862, "loss": 2.0117, "step": 5532 }, { "epoch": 0.18, "grad_norm": 0.49613451957702637, "learning_rate": 0.000594443091383496, "loss": 2.0104, "step": 5533 }, { "epoch": 0.18, "grad_norm": 0.4697084128856659, "learning_rate": 0.0005944410887867194, "loss": 1.9412, "step": 5534 }, { "epoch": 0.18, "grad_norm": 0.48371222615242004, "learning_rate": 0.0005944390858325346, "loss": 1.9914, "step": 5535 }, { "epoch": 0.18, "grad_norm": 0.47997140884399414, "learning_rate": 0.0005944370825209444, "loss": 1.9517, "step": 5536 }, { "epoch": 0.18, "grad_norm": 0.4925888776779175, "learning_rate": 0.0005944350788519508, "loss": 2.0564, "step": 5537 }, { "epoch": 0.18, "grad_norm": 0.5074328184127808, "learning_rate": 0.0005944330748255563, "loss": 2.005, "step": 5538 }, { "epoch": 0.18, "grad_norm": 0.4939091205596924, "learning_rate": 0.0005944310704417636, "loss": 1.9798, "step": 5539 }, { "epoch": 0.18, "grad_norm": 0.5014448165893555, "learning_rate": 0.0005944290657005749, "loss": 1.9551, "step": 5540 }, { "epoch": 0.18, "grad_norm": 0.504347562789917, "learning_rate": 0.0005944270606019927, "loss": 1.95, "step": 5541 }, { "epoch": 0.18, "grad_norm": 0.49686920642852783, "learning_rate": 0.0005944250551460196, "loss": 2.0339, "step": 5542 }, { "epoch": 0.18, "grad_norm": 0.4852305054664612, "learning_rate": 0.0005944230493326576, "loss": 1.9603, "step": 5543 }, { "epoch": 0.18, "grad_norm": 0.5202768445014954, "learning_rate": 0.0005944210431619095, "loss": 1.9408, "step": 5544 }, { "epoch": 0.18, "grad_norm": 0.4987958073616028, "learning_rate": 0.0005944190366337776, "loss": 1.9816, "step": 5545 }, { "epoch": 0.18, "grad_norm": 0.5026680827140808, "learning_rate": 0.0005944170297482644, "loss": 1.9289, "step": 5546 }, { "epoch": 0.18, "grad_norm": 0.515425980091095, "learning_rate": 0.0005944150225053724, "loss": 1.9967, "step": 5547 }, { "epoch": 0.18, "grad_norm": 0.49849364161491394, "learning_rate": 0.0005944130149051038, "loss": 1.8957, "step": 5548 }, { "epoch": 0.18, "grad_norm": 0.4925195574760437, "learning_rate": 0.0005944110069474613, "loss": 1.9067, "step": 5549 }, { "epoch": 0.18, "grad_norm": 0.5034400224685669, "learning_rate": 0.0005944089986324471, "loss": 2.0081, "step": 5550 }, { "epoch": 0.18, "grad_norm": 0.49391403794288635, "learning_rate": 0.0005944069899600637, "loss": 1.9122, "step": 5551 }, { "epoch": 0.18, "grad_norm": 0.5019818544387817, "learning_rate": 0.0005944049809303137, "loss": 1.9918, "step": 5552 }, { "epoch": 0.18, "grad_norm": 0.4866252541542053, "learning_rate": 0.0005944029715431995, "loss": 1.9088, "step": 5553 }, { "epoch": 0.18, "grad_norm": 0.4841640591621399, "learning_rate": 0.0005944009617987234, "loss": 1.9409, "step": 5554 }, { "epoch": 0.18, "grad_norm": 0.490971177816391, "learning_rate": 0.0005943989516968879, "loss": 1.986, "step": 5555 }, { "epoch": 0.18, "grad_norm": 0.48279038071632385, "learning_rate": 0.0005943969412376955, "loss": 1.962, "step": 5556 }, { "epoch": 0.18, "grad_norm": 0.4979201555252075, "learning_rate": 0.0005943949304211485, "loss": 1.9689, "step": 5557 }, { "epoch": 0.18, "grad_norm": 0.4933556318283081, "learning_rate": 0.0005943929192472495, "loss": 2.0454, "step": 5558 }, { "epoch": 0.18, "grad_norm": 0.4939992427825928, "learning_rate": 0.0005943909077160009, "loss": 1.9779, "step": 5559 }, { "epoch": 0.18, "grad_norm": 0.5005645155906677, "learning_rate": 0.000594388895827405, "loss": 1.9799, "step": 5560 }, { "epoch": 0.19, "grad_norm": 0.6743741631507874, "learning_rate": 0.0005943868835814644, "loss": 2.0391, "step": 5561 }, { "epoch": 0.19, "grad_norm": 0.4841320514678955, "learning_rate": 0.0005943848709781815, "loss": 1.9589, "step": 5562 }, { "epoch": 0.19, "grad_norm": 0.513336181640625, "learning_rate": 0.0005943828580175588, "loss": 1.988, "step": 5563 }, { "epoch": 0.19, "grad_norm": 0.5041093230247498, "learning_rate": 0.0005943808446995986, "loss": 2.0062, "step": 5564 }, { "epoch": 0.19, "grad_norm": 0.5174620747566223, "learning_rate": 0.0005943788310243035, "loss": 2.0858, "step": 5565 }, { "epoch": 0.19, "grad_norm": 0.5361422896385193, "learning_rate": 0.0005943768169916759, "loss": 1.9233, "step": 5566 }, { "epoch": 0.19, "grad_norm": 0.5021189451217651, "learning_rate": 0.0005943748026017181, "loss": 1.9934, "step": 5567 }, { "epoch": 0.19, "grad_norm": 0.5060296058654785, "learning_rate": 0.0005943727878544328, "loss": 1.9776, "step": 5568 }, { "epoch": 0.19, "grad_norm": 0.4823267459869385, "learning_rate": 0.0005943707727498222, "loss": 1.938, "step": 5569 }, { "epoch": 0.19, "grad_norm": 0.523342490196228, "learning_rate": 0.0005943687572878889, "loss": 2.045, "step": 5570 }, { "epoch": 0.19, "grad_norm": 0.4971048831939697, "learning_rate": 0.0005943667414686353, "loss": 1.9885, "step": 5571 }, { "epoch": 0.19, "grad_norm": 0.49169671535491943, "learning_rate": 0.0005943647252920639, "loss": 1.9735, "step": 5572 }, { "epoch": 0.19, "grad_norm": 0.5014034509658813, "learning_rate": 0.000594362708758177, "loss": 2.0185, "step": 5573 }, { "epoch": 0.19, "grad_norm": 0.4873928427696228, "learning_rate": 0.0005943606918669772, "loss": 1.9201, "step": 5574 }, { "epoch": 0.19, "grad_norm": 0.5011353492736816, "learning_rate": 0.0005943586746184669, "loss": 2.0092, "step": 5575 }, { "epoch": 0.19, "grad_norm": 0.4967504143714905, "learning_rate": 0.0005943566570126486, "loss": 1.9748, "step": 5576 }, { "epoch": 0.19, "grad_norm": 0.48459434509277344, "learning_rate": 0.0005943546390495245, "loss": 1.9834, "step": 5577 }, { "epoch": 0.19, "grad_norm": 0.5034568309783936, "learning_rate": 0.0005943526207290974, "loss": 1.968, "step": 5578 }, { "epoch": 0.19, "grad_norm": 0.4814424216747284, "learning_rate": 0.0005943506020513695, "loss": 2.0037, "step": 5579 }, { "epoch": 0.19, "grad_norm": 0.5295743346214294, "learning_rate": 0.0005943485830163434, "loss": 1.9197, "step": 5580 }, { "epoch": 0.19, "grad_norm": 0.5180200338363647, "learning_rate": 0.0005943465636240215, "loss": 1.9847, "step": 5581 }, { "epoch": 0.19, "grad_norm": 0.48907995223999023, "learning_rate": 0.0005943445438744062, "loss": 2.0362, "step": 5582 }, { "epoch": 0.19, "grad_norm": 0.592971682548523, "learning_rate": 0.0005943425237675001, "loss": 2.0157, "step": 5583 }, { "epoch": 0.19, "grad_norm": 0.4814493954181671, "learning_rate": 0.0005943405033033053, "loss": 1.9772, "step": 5584 }, { "epoch": 0.19, "grad_norm": 0.5275659561157227, "learning_rate": 0.0005943384824818247, "loss": 1.9075, "step": 5585 }, { "epoch": 0.19, "grad_norm": 0.5249287486076355, "learning_rate": 0.0005943364613030605, "loss": 1.8912, "step": 5586 }, { "epoch": 0.19, "grad_norm": 0.5070383548736572, "learning_rate": 0.0005943344397670152, "loss": 1.9711, "step": 5587 }, { "epoch": 0.19, "grad_norm": 0.5011039972305298, "learning_rate": 0.0005943324178736913, "loss": 1.9534, "step": 5588 }, { "epoch": 0.19, "grad_norm": 0.5094118714332581, "learning_rate": 0.0005943303956230912, "loss": 2.0827, "step": 5589 }, { "epoch": 0.19, "grad_norm": 0.5317793488502502, "learning_rate": 0.0005943283730152173, "loss": 2.0824, "step": 5590 }, { "epoch": 0.19, "grad_norm": 0.5202694535255432, "learning_rate": 0.0005943263500500722, "loss": 1.9936, "step": 5591 }, { "epoch": 0.19, "grad_norm": 0.49130284786224365, "learning_rate": 0.0005943243267276582, "loss": 1.9052, "step": 5592 }, { "epoch": 0.19, "grad_norm": 0.5128852725028992, "learning_rate": 0.0005943223030479779, "loss": 1.9362, "step": 5593 }, { "epoch": 0.19, "grad_norm": 0.49562379717826843, "learning_rate": 0.0005943202790110336, "loss": 1.9251, "step": 5594 }, { "epoch": 0.19, "grad_norm": 0.5040370225906372, "learning_rate": 0.000594318254616828, "loss": 1.9367, "step": 5595 }, { "epoch": 0.19, "grad_norm": 0.4937356114387512, "learning_rate": 0.0005943162298653633, "loss": 1.9625, "step": 5596 }, { "epoch": 0.19, "grad_norm": 0.49055519700050354, "learning_rate": 0.0005943142047566421, "loss": 2.0341, "step": 5597 }, { "epoch": 0.19, "grad_norm": 0.5310484766960144, "learning_rate": 0.0005943121792906668, "loss": 1.94, "step": 5598 }, { "epoch": 0.19, "grad_norm": 0.5285713076591492, "learning_rate": 0.00059431015346744, "loss": 1.9292, "step": 5599 }, { "epoch": 0.19, "grad_norm": 0.4930976629257202, "learning_rate": 0.0005943081272869638, "loss": 1.9261, "step": 5600 }, { "epoch": 0.19, "grad_norm": 0.5186050534248352, "learning_rate": 0.000594306100749241, "loss": 2.0143, "step": 5601 }, { "epoch": 0.19, "grad_norm": 0.5302960872650146, "learning_rate": 0.000594304073854274, "loss": 1.9992, "step": 5602 }, { "epoch": 0.19, "grad_norm": 0.4980563223361969, "learning_rate": 0.0005943020466020652, "loss": 2.003, "step": 5603 }, { "epoch": 0.19, "grad_norm": 0.509075403213501, "learning_rate": 0.0005943000189926171, "loss": 1.9571, "step": 5604 }, { "epoch": 0.19, "grad_norm": 0.4943389892578125, "learning_rate": 0.000594297991025932, "loss": 1.9432, "step": 5605 }, { "epoch": 0.19, "grad_norm": 0.4883762001991272, "learning_rate": 0.0005942959627020127, "loss": 2.0161, "step": 5606 }, { "epoch": 0.19, "grad_norm": 0.5063129663467407, "learning_rate": 0.0005942939340208614, "loss": 1.9008, "step": 5607 }, { "epoch": 0.19, "grad_norm": 0.5182680487632751, "learning_rate": 0.0005942919049824805, "loss": 1.9918, "step": 5608 }, { "epoch": 0.19, "grad_norm": 0.48924100399017334, "learning_rate": 0.0005942898755868728, "loss": 1.9579, "step": 5609 }, { "epoch": 0.19, "grad_norm": 0.4992906451225281, "learning_rate": 0.0005942878458340404, "loss": 2.006, "step": 5610 }, { "epoch": 0.19, "grad_norm": 0.4736185073852539, "learning_rate": 0.000594285815723986, "loss": 1.8873, "step": 5611 }, { "epoch": 0.19, "grad_norm": 0.553299605846405, "learning_rate": 0.000594283785256712, "loss": 1.9542, "step": 5612 }, { "epoch": 0.19, "grad_norm": 0.5119803547859192, "learning_rate": 0.0005942817544322208, "loss": 2.0292, "step": 5613 }, { "epoch": 0.19, "grad_norm": 0.48786601424217224, "learning_rate": 0.0005942797232505149, "loss": 1.9405, "step": 5614 }, { "epoch": 0.19, "grad_norm": 0.49349820613861084, "learning_rate": 0.0005942776917115967, "loss": 1.918, "step": 5615 }, { "epoch": 0.19, "grad_norm": 0.6182123422622681, "learning_rate": 0.0005942756598154688, "loss": 2.0511, "step": 5616 }, { "epoch": 0.19, "grad_norm": 0.48439034819602966, "learning_rate": 0.0005942736275621336, "loss": 1.8882, "step": 5617 }, { "epoch": 0.19, "grad_norm": 0.524045467376709, "learning_rate": 0.0005942715949515937, "loss": 1.9994, "step": 5618 }, { "epoch": 0.19, "grad_norm": 0.5998309254646301, "learning_rate": 0.0005942695619838513, "loss": 1.9725, "step": 5619 }, { "epoch": 0.19, "grad_norm": 0.501114010810852, "learning_rate": 0.000594267528658909, "loss": 1.9929, "step": 5620 }, { "epoch": 0.19, "grad_norm": 0.4912187159061432, "learning_rate": 0.0005942654949767694, "loss": 1.9648, "step": 5621 }, { "epoch": 0.19, "grad_norm": 0.5449076294898987, "learning_rate": 0.0005942634609374347, "loss": 1.8798, "step": 5622 }, { "epoch": 0.19, "grad_norm": 0.4931826889514923, "learning_rate": 0.0005942614265409077, "loss": 1.9089, "step": 5623 }, { "epoch": 0.19, "grad_norm": 0.48198816180229187, "learning_rate": 0.0005942593917871906, "loss": 1.9402, "step": 5624 }, { "epoch": 0.19, "grad_norm": 0.5127915143966675, "learning_rate": 0.0005942573566762859, "loss": 1.9567, "step": 5625 }, { "epoch": 0.19, "grad_norm": 0.49734440445899963, "learning_rate": 0.0005942553212081962, "loss": 1.9206, "step": 5626 }, { "epoch": 0.19, "grad_norm": 0.4932252764701843, "learning_rate": 0.0005942532853829239, "loss": 1.915, "step": 5627 }, { "epoch": 0.19, "grad_norm": 0.48184487223625183, "learning_rate": 0.0005942512492004713, "loss": 1.8611, "step": 5628 }, { "epoch": 0.19, "grad_norm": 0.5419028401374817, "learning_rate": 0.0005942492126608412, "loss": 1.9889, "step": 5629 }, { "epoch": 0.19, "grad_norm": 0.5452249646186829, "learning_rate": 0.0005942471757640359, "loss": 2.0554, "step": 5630 }, { "epoch": 0.19, "grad_norm": 0.4853489100933075, "learning_rate": 0.0005942451385100579, "loss": 1.906, "step": 5631 }, { "epoch": 0.19, "grad_norm": 0.5459063649177551, "learning_rate": 0.0005942431008989096, "loss": 1.9759, "step": 5632 }, { "epoch": 0.19, "grad_norm": 0.5227710008621216, "learning_rate": 0.0005942410629305935, "loss": 1.9241, "step": 5633 }, { "epoch": 0.19, "grad_norm": 0.49197760224342346, "learning_rate": 0.0005942390246051121, "loss": 1.9513, "step": 5634 }, { "epoch": 0.19, "grad_norm": 0.5247339606285095, "learning_rate": 0.000594236985922468, "loss": 1.9935, "step": 5635 }, { "epoch": 0.19, "grad_norm": 0.5140052437782288, "learning_rate": 0.0005942349468826634, "loss": 1.9852, "step": 5636 }, { "epoch": 0.19, "grad_norm": 0.4860531985759735, "learning_rate": 0.0005942329074857012, "loss": 1.9329, "step": 5637 }, { "epoch": 0.19, "grad_norm": 0.5307814478874207, "learning_rate": 0.0005942308677315833, "loss": 1.9113, "step": 5638 }, { "epoch": 0.19, "grad_norm": 0.5143482089042664, "learning_rate": 0.0005942288276203127, "loss": 1.8711, "step": 5639 }, { "epoch": 0.19, "grad_norm": 0.5010172128677368, "learning_rate": 0.0005942267871518915, "loss": 1.9735, "step": 5640 }, { "epoch": 0.19, "grad_norm": 0.514744222164154, "learning_rate": 0.0005942247463263223, "loss": 1.9553, "step": 5641 }, { "epoch": 0.19, "grad_norm": 0.4875638782978058, "learning_rate": 0.0005942227051436077, "loss": 1.9566, "step": 5642 }, { "epoch": 0.19, "grad_norm": 0.47391456365585327, "learning_rate": 0.0005942206636037501, "loss": 1.9214, "step": 5643 }, { "epoch": 0.19, "grad_norm": 0.49167269468307495, "learning_rate": 0.000594218621706752, "loss": 1.9427, "step": 5644 }, { "epoch": 0.19, "grad_norm": 0.5013629794120789, "learning_rate": 0.0005942165794526158, "loss": 1.9281, "step": 5645 }, { "epoch": 0.19, "grad_norm": 0.47368574142456055, "learning_rate": 0.000594214536841344, "loss": 1.94, "step": 5646 }, { "epoch": 0.19, "grad_norm": 0.4856661260128021, "learning_rate": 0.0005942124938729391, "loss": 1.9379, "step": 5647 }, { "epoch": 0.19, "grad_norm": 0.4760356545448303, "learning_rate": 0.0005942104505474036, "loss": 1.9231, "step": 5648 }, { "epoch": 0.19, "grad_norm": 0.4806313216686249, "learning_rate": 0.00059420840686474, "loss": 1.9151, "step": 5649 }, { "epoch": 0.19, "grad_norm": 0.49281471967697144, "learning_rate": 0.0005942063628249508, "loss": 1.9927, "step": 5650 }, { "epoch": 0.19, "grad_norm": 0.49989137053489685, "learning_rate": 0.0005942043184280381, "loss": 1.984, "step": 5651 }, { "epoch": 0.19, "grad_norm": 0.5063142776489258, "learning_rate": 0.000594202273674005, "loss": 1.9675, "step": 5652 }, { "epoch": 0.19, "grad_norm": 0.48803532123565674, "learning_rate": 0.0005942002285628537, "loss": 1.9361, "step": 5653 }, { "epoch": 0.19, "grad_norm": 0.4801821708679199, "learning_rate": 0.0005941981830945866, "loss": 1.9435, "step": 5654 }, { "epoch": 0.19, "grad_norm": 0.5113174915313721, "learning_rate": 0.0005941961372692063, "loss": 2.053, "step": 5655 }, { "epoch": 0.19, "grad_norm": 0.491904079914093, "learning_rate": 0.0005941940910867151, "loss": 1.9543, "step": 5656 }, { "epoch": 0.19, "grad_norm": 0.48495230078697205, "learning_rate": 0.0005941920445471158, "loss": 1.9307, "step": 5657 }, { "epoch": 0.19, "grad_norm": 0.5152410268783569, "learning_rate": 0.0005941899976504105, "loss": 1.9221, "step": 5658 }, { "epoch": 0.19, "grad_norm": 0.4833485186100006, "learning_rate": 0.0005941879503966021, "loss": 2.0425, "step": 5659 }, { "epoch": 0.19, "grad_norm": 0.4860483407974243, "learning_rate": 0.0005941859027856927, "loss": 1.8997, "step": 5660 }, { "epoch": 0.19, "grad_norm": 0.5291685461997986, "learning_rate": 0.000594183854817685, "loss": 2.0315, "step": 5661 }, { "epoch": 0.19, "grad_norm": 0.5276031494140625, "learning_rate": 0.0005941818064925815, "loss": 1.9517, "step": 5662 }, { "epoch": 0.19, "grad_norm": 0.49130529165267944, "learning_rate": 0.0005941797578103847, "loss": 1.9767, "step": 5663 }, { "epoch": 0.19, "grad_norm": 0.5485951900482178, "learning_rate": 0.0005941777087710969, "loss": 1.9744, "step": 5664 }, { "epoch": 0.19, "grad_norm": 0.5508536100387573, "learning_rate": 0.0005941756593747207, "loss": 1.9827, "step": 5665 }, { "epoch": 0.19, "grad_norm": 0.48959776759147644, "learning_rate": 0.0005941736096212587, "loss": 1.9041, "step": 5666 }, { "epoch": 0.19, "grad_norm": 0.5154087543487549, "learning_rate": 0.0005941715595107133, "loss": 2.0355, "step": 5667 }, { "epoch": 0.19, "grad_norm": 0.49487078189849854, "learning_rate": 0.0005941695090430868, "loss": 1.9143, "step": 5668 }, { "epoch": 0.19, "grad_norm": 0.48627763986587524, "learning_rate": 0.0005941674582183819, "loss": 1.9667, "step": 5669 }, { "epoch": 0.19, "grad_norm": 0.5030969381332397, "learning_rate": 0.0005941654070366012, "loss": 1.9082, "step": 5670 }, { "epoch": 0.19, "grad_norm": 0.47575196623802185, "learning_rate": 0.0005941633554977469, "loss": 1.9089, "step": 5671 }, { "epoch": 0.19, "grad_norm": 0.49659252166748047, "learning_rate": 0.0005941613036018216, "loss": 1.9445, "step": 5672 }, { "epoch": 0.19, "grad_norm": 0.4702134430408478, "learning_rate": 0.000594159251348828, "loss": 1.9513, "step": 5673 }, { "epoch": 0.19, "grad_norm": 0.4836875796318054, "learning_rate": 0.0005941571987387683, "loss": 1.9332, "step": 5674 }, { "epoch": 0.19, "grad_norm": 0.47994711995124817, "learning_rate": 0.000594155145771645, "loss": 1.9571, "step": 5675 }, { "epoch": 0.19, "grad_norm": 0.508452296257019, "learning_rate": 0.0005941530924474609, "loss": 1.9979, "step": 5676 }, { "epoch": 0.19, "grad_norm": 0.5106131434440613, "learning_rate": 0.0005941510387662181, "loss": 2.0311, "step": 5677 }, { "epoch": 0.19, "grad_norm": 0.49441370368003845, "learning_rate": 0.0005941489847279194, "loss": 1.9938, "step": 5678 }, { "epoch": 0.19, "grad_norm": 0.4816899299621582, "learning_rate": 0.0005941469303325671, "loss": 2.0159, "step": 5679 }, { "epoch": 0.19, "grad_norm": 0.5174726247787476, "learning_rate": 0.0005941448755801638, "loss": 2.0434, "step": 5680 }, { "epoch": 0.19, "grad_norm": 0.4783342480659485, "learning_rate": 0.000594142820470712, "loss": 1.9612, "step": 5681 }, { "epoch": 0.19, "grad_norm": 0.4725189507007599, "learning_rate": 0.0005941407650042142, "loss": 1.9356, "step": 5682 }, { "epoch": 0.19, "grad_norm": 0.5241659283638, "learning_rate": 0.0005941387091806727, "loss": 1.959, "step": 5683 }, { "epoch": 0.19, "grad_norm": 0.4941283166408539, "learning_rate": 0.0005941366530000902, "loss": 1.9579, "step": 5684 }, { "epoch": 0.19, "grad_norm": 0.49411389231681824, "learning_rate": 0.0005941345964624692, "loss": 2.0125, "step": 5685 }, { "epoch": 0.19, "grad_norm": 0.46775388717651367, "learning_rate": 0.000594132539567812, "loss": 1.8951, "step": 5686 }, { "epoch": 0.19, "grad_norm": 0.4679575562477112, "learning_rate": 0.0005941304823161214, "loss": 1.9476, "step": 5687 }, { "epoch": 0.19, "grad_norm": 0.5021002888679504, "learning_rate": 0.0005941284247073996, "loss": 1.9626, "step": 5688 }, { "epoch": 0.19, "grad_norm": 0.48799610137939453, "learning_rate": 0.0005941263667416495, "loss": 1.9197, "step": 5689 }, { "epoch": 0.19, "grad_norm": 0.4757596254348755, "learning_rate": 0.000594124308418873, "loss": 1.9528, "step": 5690 }, { "epoch": 0.19, "grad_norm": 0.5020637512207031, "learning_rate": 0.000594122249739073, "loss": 1.9741, "step": 5691 }, { "epoch": 0.19, "grad_norm": 0.5029486417770386, "learning_rate": 0.000594120190702252, "loss": 2.0271, "step": 5692 }, { "epoch": 0.19, "grad_norm": 0.49168071150779724, "learning_rate": 0.0005941181313084124, "loss": 1.9473, "step": 5693 }, { "epoch": 0.19, "grad_norm": 0.49126940965652466, "learning_rate": 0.0005941160715575568, "loss": 1.9372, "step": 5694 }, { "epoch": 0.19, "grad_norm": 0.47659024596214294, "learning_rate": 0.0005941140114496875, "loss": 1.9912, "step": 5695 }, { "epoch": 0.19, "grad_norm": 0.5521035194396973, "learning_rate": 0.0005941119509848072, "loss": 1.9632, "step": 5696 }, { "epoch": 0.19, "grad_norm": 0.47710320353507996, "learning_rate": 0.0005941098901629183, "loss": 2.0047, "step": 5697 }, { "epoch": 0.19, "grad_norm": 0.4898768365383148, "learning_rate": 0.0005941078289840234, "loss": 2.0074, "step": 5698 }, { "epoch": 0.19, "grad_norm": 0.49108007550239563, "learning_rate": 0.0005941057674481248, "loss": 2.0227, "step": 5699 }, { "epoch": 0.19, "grad_norm": 0.4858761727809906, "learning_rate": 0.0005941037055552253, "loss": 1.99, "step": 5700 }, { "epoch": 0.19, "grad_norm": 0.4893997609615326, "learning_rate": 0.0005941016433053271, "loss": 1.9881, "step": 5701 }, { "epoch": 0.19, "grad_norm": 0.5034173130989075, "learning_rate": 0.0005940995806984329, "loss": 1.9456, "step": 5702 }, { "epoch": 0.19, "grad_norm": 0.47346192598342896, "learning_rate": 0.000594097517734545, "loss": 1.9703, "step": 5703 }, { "epoch": 0.19, "grad_norm": 0.4852340817451477, "learning_rate": 0.0005940954544136662, "loss": 1.8891, "step": 5704 }, { "epoch": 0.19, "grad_norm": 0.5019658803939819, "learning_rate": 0.0005940933907357988, "loss": 1.9998, "step": 5705 }, { "epoch": 0.19, "grad_norm": 0.48204681277275085, "learning_rate": 0.0005940913267009454, "loss": 1.892, "step": 5706 }, { "epoch": 0.19, "grad_norm": 0.49744707345962524, "learning_rate": 0.0005940892623091085, "loss": 1.956, "step": 5707 }, { "epoch": 0.19, "grad_norm": 0.49264755845069885, "learning_rate": 0.0005940871975602905, "loss": 1.957, "step": 5708 }, { "epoch": 0.19, "grad_norm": 0.5077126026153564, "learning_rate": 0.0005940851324544938, "loss": 1.9508, "step": 5709 }, { "epoch": 0.19, "grad_norm": 0.4882209897041321, "learning_rate": 0.0005940830669917213, "loss": 1.9301, "step": 5710 }, { "epoch": 0.19, "grad_norm": 0.5141485929489136, "learning_rate": 0.0005940810011719753, "loss": 1.8875, "step": 5711 }, { "epoch": 0.19, "grad_norm": 0.479120671749115, "learning_rate": 0.0005940789349952582, "loss": 1.9429, "step": 5712 }, { "epoch": 0.19, "grad_norm": 0.5868924856185913, "learning_rate": 0.0005940768684615726, "loss": 1.9871, "step": 5713 }, { "epoch": 0.19, "grad_norm": 0.4990425109863281, "learning_rate": 0.0005940748015709212, "loss": 1.9777, "step": 5714 }, { "epoch": 0.19, "grad_norm": 0.5257944464683533, "learning_rate": 0.0005940727343233061, "loss": 1.8908, "step": 5715 }, { "epoch": 0.19, "grad_norm": 0.5067377686500549, "learning_rate": 0.0005940706667187301, "loss": 1.9807, "step": 5716 }, { "epoch": 0.19, "grad_norm": 0.5052306652069092, "learning_rate": 0.0005940685987571956, "loss": 1.9682, "step": 5717 }, { "epoch": 0.19, "grad_norm": 0.48577427864074707, "learning_rate": 0.0005940665304387052, "loss": 1.9883, "step": 5718 }, { "epoch": 0.19, "grad_norm": 0.47810912132263184, "learning_rate": 0.0005940644617632614, "loss": 1.8806, "step": 5719 }, { "epoch": 0.19, "grad_norm": 0.4986656606197357, "learning_rate": 0.0005940623927308666, "loss": 1.9391, "step": 5720 }, { "epoch": 0.19, "grad_norm": 0.5201500654220581, "learning_rate": 0.0005940603233415235, "loss": 1.9589, "step": 5721 }, { "epoch": 0.19, "grad_norm": 0.4913565516471863, "learning_rate": 0.0005940582535952343, "loss": 1.975, "step": 5722 }, { "epoch": 0.19, "grad_norm": 0.488871306180954, "learning_rate": 0.0005940561834920018, "loss": 1.9465, "step": 5723 }, { "epoch": 0.19, "grad_norm": 0.4790179133415222, "learning_rate": 0.0005940541130318285, "loss": 1.969, "step": 5724 }, { "epoch": 0.19, "grad_norm": 0.5268075466156006, "learning_rate": 0.0005940520422147167, "loss": 1.9161, "step": 5725 }, { "epoch": 0.19, "grad_norm": 0.5158506631851196, "learning_rate": 0.0005940499710406692, "loss": 1.9122, "step": 5726 }, { "epoch": 0.19, "grad_norm": 0.47966060042381287, "learning_rate": 0.0005940478995096882, "loss": 1.9401, "step": 5727 }, { "epoch": 0.19, "grad_norm": 0.5367653369903564, "learning_rate": 0.0005940458276217765, "loss": 1.9444, "step": 5728 }, { "epoch": 0.19, "grad_norm": 0.530771017074585, "learning_rate": 0.0005940437553769364, "loss": 1.9708, "step": 5729 }, { "epoch": 0.19, "grad_norm": 0.48145076632499695, "learning_rate": 0.0005940416827751706, "loss": 1.9905, "step": 5730 }, { "epoch": 0.19, "grad_norm": 0.5262749195098877, "learning_rate": 0.0005940396098164815, "loss": 1.934, "step": 5731 }, { "epoch": 0.19, "grad_norm": 0.5195491313934326, "learning_rate": 0.0005940375365008716, "loss": 1.9478, "step": 5732 }, { "epoch": 0.19, "grad_norm": 0.4887329339981079, "learning_rate": 0.0005940354628283434, "loss": 1.9347, "step": 5733 }, { "epoch": 0.19, "grad_norm": 0.514630138874054, "learning_rate": 0.0005940333887988995, "loss": 2.0093, "step": 5734 }, { "epoch": 0.19, "grad_norm": 0.5160511136054993, "learning_rate": 0.0005940313144125425, "loss": 2.0072, "step": 5735 }, { "epoch": 0.19, "grad_norm": 0.4736737608909607, "learning_rate": 0.0005940292396692747, "loss": 1.8632, "step": 5736 }, { "epoch": 0.19, "grad_norm": 0.49723947048187256, "learning_rate": 0.0005940271645690987, "loss": 1.969, "step": 5737 }, { "epoch": 0.19, "grad_norm": 0.4910655915737152, "learning_rate": 0.0005940250891120172, "loss": 1.9455, "step": 5738 }, { "epoch": 0.19, "grad_norm": 0.4875837564468384, "learning_rate": 0.0005940230132980324, "loss": 1.8841, "step": 5739 }, { "epoch": 0.19, "grad_norm": 0.4825124740600586, "learning_rate": 0.000594020937127147, "loss": 2.0193, "step": 5740 }, { "epoch": 0.19, "grad_norm": 0.4875616133213043, "learning_rate": 0.0005940188605993637, "loss": 1.9179, "step": 5741 }, { "epoch": 0.19, "grad_norm": 0.504387378692627, "learning_rate": 0.0005940167837146847, "loss": 1.9854, "step": 5742 }, { "epoch": 0.19, "grad_norm": 0.5037978887557983, "learning_rate": 0.0005940147064731126, "loss": 1.9601, "step": 5743 }, { "epoch": 0.19, "grad_norm": 0.4751175045967102, "learning_rate": 0.0005940126288746499, "loss": 1.9217, "step": 5744 }, { "epoch": 0.19, "grad_norm": 0.5061239004135132, "learning_rate": 0.0005940105509192993, "loss": 1.9755, "step": 5745 }, { "epoch": 0.19, "grad_norm": 0.5406847596168518, "learning_rate": 0.0005940084726070632, "loss": 1.993, "step": 5746 }, { "epoch": 0.19, "grad_norm": 0.47452980279922485, "learning_rate": 0.0005940063939379441, "loss": 1.9887, "step": 5747 }, { "epoch": 0.19, "grad_norm": 0.5227171778678894, "learning_rate": 0.0005940043149119446, "loss": 1.9928, "step": 5748 }, { "epoch": 0.19, "grad_norm": 0.492912620306015, "learning_rate": 0.0005940022355290672, "loss": 1.9794, "step": 5749 }, { "epoch": 0.19, "grad_norm": 0.47962185740470886, "learning_rate": 0.0005940001557893144, "loss": 1.9998, "step": 5750 }, { "epoch": 0.19, "grad_norm": 0.49754321575164795, "learning_rate": 0.0005939980756926888, "loss": 1.9449, "step": 5751 }, { "epoch": 0.19, "grad_norm": 0.4709954559803009, "learning_rate": 0.0005939959952391926, "loss": 2.0317, "step": 5752 }, { "epoch": 0.19, "grad_norm": 0.4924710988998413, "learning_rate": 0.0005939939144288288, "loss": 2.0059, "step": 5753 }, { "epoch": 0.19, "grad_norm": 0.4997352659702301, "learning_rate": 0.0005939918332615996, "loss": 1.9945, "step": 5754 }, { "epoch": 0.19, "grad_norm": 0.4971752464771271, "learning_rate": 0.0005939897517375077, "loss": 1.9583, "step": 5755 }, { "epoch": 0.19, "grad_norm": 0.47111907601356506, "learning_rate": 0.0005939876698565556, "loss": 1.9639, "step": 5756 }, { "epoch": 0.19, "grad_norm": 0.5239901542663574, "learning_rate": 0.0005939855876187457, "loss": 1.9103, "step": 5757 }, { "epoch": 0.19, "grad_norm": 0.5100216865539551, "learning_rate": 0.0005939835050240807, "loss": 1.9328, "step": 5758 }, { "epoch": 0.19, "grad_norm": 0.48833686113357544, "learning_rate": 0.0005939814220725628, "loss": 1.9759, "step": 5759 }, { "epoch": 0.19, "grad_norm": 0.499379962682724, "learning_rate": 0.000593979338764195, "loss": 2.0272, "step": 5760 }, { "epoch": 0.19, "grad_norm": 0.5087597370147705, "learning_rate": 0.0005939772550989796, "loss": 2.0202, "step": 5761 }, { "epoch": 0.19, "grad_norm": 0.4988883435726166, "learning_rate": 0.0005939751710769191, "loss": 1.9576, "step": 5762 }, { "epoch": 0.19, "grad_norm": 0.49033260345458984, "learning_rate": 0.000593973086698016, "loss": 1.9429, "step": 5763 }, { "epoch": 0.19, "grad_norm": 0.48676687479019165, "learning_rate": 0.0005939710019622729, "loss": 1.9363, "step": 5764 }, { "epoch": 0.19, "grad_norm": 0.5129943490028381, "learning_rate": 0.0005939689168696923, "loss": 2.0422, "step": 5765 }, { "epoch": 0.19, "grad_norm": 0.4850936233997345, "learning_rate": 0.0005939668314202768, "loss": 1.8922, "step": 5766 }, { "epoch": 0.19, "grad_norm": 0.47951531410217285, "learning_rate": 0.0005939647456140288, "loss": 1.9333, "step": 5767 }, { "epoch": 0.19, "grad_norm": 0.4940553605556488, "learning_rate": 0.0005939626594509511, "loss": 2.0094, "step": 5768 }, { "epoch": 0.19, "grad_norm": 0.4769646227359772, "learning_rate": 0.0005939605729310459, "loss": 1.9655, "step": 5769 }, { "epoch": 0.19, "grad_norm": 0.5205872654914856, "learning_rate": 0.0005939584860543159, "loss": 1.9996, "step": 5770 }, { "epoch": 0.19, "grad_norm": 0.49371060729026794, "learning_rate": 0.0005939563988207636, "loss": 1.9547, "step": 5771 }, { "epoch": 0.19, "grad_norm": 0.4845100939273834, "learning_rate": 0.0005939543112303916, "loss": 2.029, "step": 5772 }, { "epoch": 0.19, "grad_norm": 0.4747048318386078, "learning_rate": 0.0005939522232832023, "loss": 1.9504, "step": 5773 }, { "epoch": 0.19, "grad_norm": 0.4752235412597656, "learning_rate": 0.0005939501349791984, "loss": 2.0211, "step": 5774 }, { "epoch": 0.19, "grad_norm": 0.4903069734573364, "learning_rate": 0.0005939480463183822, "loss": 1.9705, "step": 5775 }, { "epoch": 0.19, "grad_norm": 0.5000359416007996, "learning_rate": 0.0005939459573007565, "loss": 1.9946, "step": 5776 }, { "epoch": 0.19, "grad_norm": 0.47779160737991333, "learning_rate": 0.0005939438679263237, "loss": 1.9004, "step": 5777 }, { "epoch": 0.19, "grad_norm": 0.5229504108428955, "learning_rate": 0.0005939417781950863, "loss": 1.9621, "step": 5778 }, { "epoch": 0.19, "grad_norm": 0.6035635471343994, "learning_rate": 0.0005939396881070471, "loss": 2.0747, "step": 5779 }, { "epoch": 0.19, "grad_norm": 0.47497519850730896, "learning_rate": 0.0005939375976622081, "loss": 1.9826, "step": 5780 }, { "epoch": 0.19, "grad_norm": 0.4991016685962677, "learning_rate": 0.0005939355068605724, "loss": 1.9906, "step": 5781 }, { "epoch": 0.19, "grad_norm": 0.5156857967376709, "learning_rate": 0.0005939334157021422, "loss": 1.9926, "step": 5782 }, { "epoch": 0.19, "grad_norm": 0.5077722072601318, "learning_rate": 0.0005939313241869203, "loss": 1.9313, "step": 5783 }, { "epoch": 0.19, "grad_norm": 0.515559196472168, "learning_rate": 0.0005939292323149089, "loss": 1.997, "step": 5784 }, { "epoch": 0.19, "grad_norm": 0.4929462969303131, "learning_rate": 0.0005939271400861107, "loss": 1.9645, "step": 5785 }, { "epoch": 0.19, "grad_norm": 0.5160275101661682, "learning_rate": 0.0005939250475005284, "loss": 1.9714, "step": 5786 }, { "epoch": 0.19, "grad_norm": 0.4853273928165436, "learning_rate": 0.0005939229545581644, "loss": 1.9836, "step": 5787 }, { "epoch": 0.19, "grad_norm": 0.5152283906936646, "learning_rate": 0.0005939208612590211, "loss": 1.9778, "step": 5788 }, { "epoch": 0.19, "grad_norm": 0.5125997066497803, "learning_rate": 0.0005939187676031013, "loss": 2.0046, "step": 5789 }, { "epoch": 0.19, "grad_norm": 0.5269156098365784, "learning_rate": 0.0005939166735904074, "loss": 1.9489, "step": 5790 }, { "epoch": 0.19, "grad_norm": 0.48521167039871216, "learning_rate": 0.0005939145792209419, "loss": 1.8573, "step": 5791 }, { "epoch": 0.19, "grad_norm": 0.48145005106925964, "learning_rate": 0.0005939124844947075, "loss": 1.9588, "step": 5792 }, { "epoch": 0.19, "grad_norm": 0.47416627407073975, "learning_rate": 0.0005939103894117067, "loss": 1.8969, "step": 5793 }, { "epoch": 0.19, "grad_norm": 0.4985969364643097, "learning_rate": 0.0005939082939719418, "loss": 1.8966, "step": 5794 }, { "epoch": 0.19, "grad_norm": 0.4820381700992584, "learning_rate": 0.0005939061981754157, "loss": 1.9726, "step": 5795 }, { "epoch": 0.19, "grad_norm": 0.4938150644302368, "learning_rate": 0.0005939041020221307, "loss": 2.0208, "step": 5796 }, { "epoch": 0.19, "grad_norm": 0.4813404381275177, "learning_rate": 0.0005939020055120894, "loss": 1.9508, "step": 5797 }, { "epoch": 0.19, "grad_norm": 0.5016685724258423, "learning_rate": 0.0005938999086452945, "loss": 1.9499, "step": 5798 }, { "epoch": 0.19, "grad_norm": 0.5009058713912964, "learning_rate": 0.0005938978114217483, "loss": 1.9852, "step": 5799 }, { "epoch": 0.19, "grad_norm": 0.4954030215740204, "learning_rate": 0.0005938957138414535, "loss": 2.0806, "step": 5800 }, { "epoch": 0.19, "grad_norm": 0.4760737717151642, "learning_rate": 0.0005938936159044126, "loss": 1.9649, "step": 5801 }, { "epoch": 0.19, "grad_norm": 0.49406036734580994, "learning_rate": 0.0005938915176106281, "loss": 1.9266, "step": 5802 }, { "epoch": 0.19, "grad_norm": 0.5086522698402405, "learning_rate": 0.0005938894189601027, "loss": 1.9991, "step": 5803 }, { "epoch": 0.19, "grad_norm": 0.48133039474487305, "learning_rate": 0.0005938873199528387, "loss": 1.8869, "step": 5804 }, { "epoch": 0.19, "grad_norm": 0.5521520376205444, "learning_rate": 0.000593885220588839, "loss": 1.9703, "step": 5805 }, { "epoch": 0.19, "grad_norm": 0.4809570610523224, "learning_rate": 0.0005938831208681058, "loss": 1.9087, "step": 5806 }, { "epoch": 0.19, "grad_norm": 0.509611189365387, "learning_rate": 0.0005938810207906419, "loss": 1.9447, "step": 5807 }, { "epoch": 0.19, "grad_norm": 0.5072821974754333, "learning_rate": 0.0005938789203564496, "loss": 1.9871, "step": 5808 }, { "epoch": 0.19, "grad_norm": 0.497628778219223, "learning_rate": 0.0005938768195655318, "loss": 1.9077, "step": 5809 }, { "epoch": 0.19, "grad_norm": 0.5506167411804199, "learning_rate": 0.0005938747184178906, "loss": 2.0173, "step": 5810 }, { "epoch": 0.19, "grad_norm": 0.49575528502464294, "learning_rate": 0.000593872616913529, "loss": 1.9379, "step": 5811 }, { "epoch": 0.19, "grad_norm": 0.5079115033149719, "learning_rate": 0.0005938705150524492, "loss": 2.0013, "step": 5812 }, { "epoch": 0.19, "grad_norm": 0.49307775497436523, "learning_rate": 0.0005938684128346539, "loss": 1.9679, "step": 5813 }, { "epoch": 0.19, "grad_norm": 0.4916030168533325, "learning_rate": 0.0005938663102601457, "loss": 1.8901, "step": 5814 }, { "epoch": 0.19, "grad_norm": 0.477079302072525, "learning_rate": 0.0005938642073289271, "loss": 1.9659, "step": 5815 }, { "epoch": 0.19, "grad_norm": 0.5016430616378784, "learning_rate": 0.0005938621040410005, "loss": 1.9744, "step": 5816 }, { "epoch": 0.19, "grad_norm": 0.48800820112228394, "learning_rate": 0.0005938600003963689, "loss": 1.9654, "step": 5817 }, { "epoch": 0.19, "grad_norm": 0.4740425944328308, "learning_rate": 0.0005938578963950344, "loss": 1.9288, "step": 5818 }, { "epoch": 0.19, "grad_norm": 0.49731743335723877, "learning_rate": 0.0005938557920369997, "loss": 1.9448, "step": 5819 }, { "epoch": 0.19, "grad_norm": 0.4743991494178772, "learning_rate": 0.0005938536873222673, "loss": 2.0427, "step": 5820 }, { "epoch": 0.19, "grad_norm": 0.4862474501132965, "learning_rate": 0.0005938515822508399, "loss": 1.951, "step": 5821 }, { "epoch": 0.19, "grad_norm": 0.4955729842185974, "learning_rate": 0.00059384947682272, "loss": 1.9803, "step": 5822 }, { "epoch": 0.19, "grad_norm": 0.4924966096878052, "learning_rate": 0.0005938473710379101, "loss": 1.952, "step": 5823 }, { "epoch": 0.19, "grad_norm": 0.4676971733570099, "learning_rate": 0.0005938452648964128, "loss": 1.9847, "step": 5824 }, { "epoch": 0.19, "grad_norm": 0.4800596237182617, "learning_rate": 0.0005938431583982307, "loss": 2.0498, "step": 5825 }, { "epoch": 0.19, "grad_norm": 0.4783078134059906, "learning_rate": 0.0005938410515433662, "loss": 1.9529, "step": 5826 }, { "epoch": 0.19, "grad_norm": 0.49715280532836914, "learning_rate": 0.000593838944331822, "loss": 1.9399, "step": 5827 }, { "epoch": 0.19, "grad_norm": 0.49313458800315857, "learning_rate": 0.0005938368367636006, "loss": 1.9469, "step": 5828 }, { "epoch": 0.19, "grad_norm": 0.49172744154930115, "learning_rate": 0.0005938347288387046, "loss": 1.9444, "step": 5829 }, { "epoch": 0.19, "grad_norm": 0.5076867938041687, "learning_rate": 0.0005938326205571365, "loss": 1.9644, "step": 5830 }, { "epoch": 0.19, "grad_norm": 0.49879691004753113, "learning_rate": 0.0005938305119188989, "loss": 2.0671, "step": 5831 }, { "epoch": 0.19, "grad_norm": 0.5115874409675598, "learning_rate": 0.0005938284029239944, "loss": 1.9324, "step": 5832 }, { "epoch": 0.19, "grad_norm": 0.49291637539863586, "learning_rate": 0.0005938262935724255, "loss": 1.9521, "step": 5833 }, { "epoch": 0.19, "grad_norm": 0.4785429537296295, "learning_rate": 0.0005938241838641948, "loss": 2.0363, "step": 5834 }, { "epoch": 0.19, "grad_norm": 0.5122122168540955, "learning_rate": 0.0005938220737993047, "loss": 1.9063, "step": 5835 }, { "epoch": 0.19, "grad_norm": 0.5024681091308594, "learning_rate": 0.000593819963377758, "loss": 1.9851, "step": 5836 }, { "epoch": 0.19, "grad_norm": 0.4956361651420593, "learning_rate": 0.0005938178525995572, "loss": 1.932, "step": 5837 }, { "epoch": 0.19, "grad_norm": 0.49689194560050964, "learning_rate": 0.0005938157414647047, "loss": 1.9412, "step": 5838 }, { "epoch": 0.19, "grad_norm": 0.4916042983531952, "learning_rate": 0.0005938136299732033, "loss": 1.9872, "step": 5839 }, { "epoch": 0.19, "grad_norm": 0.4797876477241516, "learning_rate": 0.0005938115181250553, "loss": 1.9422, "step": 5840 }, { "epoch": 0.19, "grad_norm": 0.5175765156745911, "learning_rate": 0.0005938094059202635, "loss": 1.9237, "step": 5841 }, { "epoch": 0.19, "grad_norm": 0.4965415894985199, "learning_rate": 0.0005938072933588304, "loss": 1.9584, "step": 5842 }, { "epoch": 0.19, "grad_norm": 0.4781613051891327, "learning_rate": 0.0005938051804407585, "loss": 1.9732, "step": 5843 }, { "epoch": 0.19, "grad_norm": 0.4887511432170868, "learning_rate": 0.0005938030671660503, "loss": 1.977, "step": 5844 }, { "epoch": 0.19, "grad_norm": 0.5248250961303711, "learning_rate": 0.0005938009535347087, "loss": 1.9698, "step": 5845 }, { "epoch": 0.19, "grad_norm": 0.5161917209625244, "learning_rate": 0.0005937988395467358, "loss": 1.9905, "step": 5846 }, { "epoch": 0.19, "grad_norm": 0.47446689009666443, "learning_rate": 0.0005937967252021345, "loss": 1.9355, "step": 5847 }, { "epoch": 0.19, "grad_norm": 0.5030615329742432, "learning_rate": 0.0005937946105009073, "loss": 1.9458, "step": 5848 }, { "epoch": 0.19, "grad_norm": 0.49756526947021484, "learning_rate": 0.0005937924954430567, "loss": 2.0139, "step": 5849 }, { "epoch": 0.19, "grad_norm": 0.4953906238079071, "learning_rate": 0.0005937903800285853, "loss": 2.0037, "step": 5850 }, { "epoch": 0.19, "grad_norm": 0.4876863360404968, "learning_rate": 0.0005937882642574956, "loss": 1.9772, "step": 5851 }, { "epoch": 0.19, "grad_norm": 0.5070708990097046, "learning_rate": 0.0005937861481297903, "loss": 1.9081, "step": 5852 }, { "epoch": 0.19, "grad_norm": 0.4920961558818817, "learning_rate": 0.0005937840316454719, "loss": 1.9299, "step": 5853 }, { "epoch": 0.19, "grad_norm": 0.48808613419532776, "learning_rate": 0.000593781914804543, "loss": 1.908, "step": 5854 }, { "epoch": 0.19, "grad_norm": 0.5093997120857239, "learning_rate": 0.0005937797976070062, "loss": 1.9105, "step": 5855 }, { "epoch": 0.19, "grad_norm": 0.47993558645248413, "learning_rate": 0.0005937776800528638, "loss": 1.9693, "step": 5856 }, { "epoch": 0.19, "grad_norm": 0.48628121614456177, "learning_rate": 0.0005937755621421188, "loss": 1.9384, "step": 5857 }, { "epoch": 0.19, "grad_norm": 0.49293529987335205, "learning_rate": 0.0005937734438747734, "loss": 1.9182, "step": 5858 }, { "epoch": 0.19, "grad_norm": 0.4847615957260132, "learning_rate": 0.0005937713252508305, "loss": 1.9134, "step": 5859 }, { "epoch": 0.19, "grad_norm": 0.6406659483909607, "learning_rate": 0.0005937692062702923, "loss": 1.9855, "step": 5860 }, { "epoch": 0.19, "grad_norm": 0.49666109681129456, "learning_rate": 0.0005937670869331617, "loss": 1.9092, "step": 5861 }, { "epoch": 0.2, "grad_norm": 0.5577508211135864, "learning_rate": 0.0005937649672394411, "loss": 2.0003, "step": 5862 }, { "epoch": 0.2, "grad_norm": 0.49016571044921875, "learning_rate": 0.000593762847189133, "loss": 2.0064, "step": 5863 }, { "epoch": 0.2, "grad_norm": 0.47149285674095154, "learning_rate": 0.0005937607267822402, "loss": 1.9274, "step": 5864 }, { "epoch": 0.2, "grad_norm": 0.49384498596191406, "learning_rate": 0.0005937586060187651, "loss": 1.9646, "step": 5865 }, { "epoch": 0.2, "grad_norm": 0.5012820363044739, "learning_rate": 0.0005937564848987105, "loss": 1.989, "step": 5866 }, { "epoch": 0.2, "grad_norm": 0.4942983388900757, "learning_rate": 0.0005937543634220786, "loss": 1.9957, "step": 5867 }, { "epoch": 0.2, "grad_norm": 0.4659716784954071, "learning_rate": 0.0005937522415888724, "loss": 1.9416, "step": 5868 }, { "epoch": 0.2, "grad_norm": 0.45664653182029724, "learning_rate": 0.0005937501193990941, "loss": 1.9308, "step": 5869 }, { "epoch": 0.2, "grad_norm": 0.4851100742816925, "learning_rate": 0.0005937479968527464, "loss": 1.9064, "step": 5870 }, { "epoch": 0.2, "grad_norm": 0.4881761372089386, "learning_rate": 0.0005937458739498321, "loss": 1.9203, "step": 5871 }, { "epoch": 0.2, "grad_norm": 0.49182307720184326, "learning_rate": 0.0005937437506903535, "loss": 1.9205, "step": 5872 }, { "epoch": 0.2, "grad_norm": 0.4777492880821228, "learning_rate": 0.0005937416270743132, "loss": 1.9241, "step": 5873 }, { "epoch": 0.2, "grad_norm": 0.48279306292533875, "learning_rate": 0.000593739503101714, "loss": 2.0098, "step": 5874 }, { "epoch": 0.2, "grad_norm": 0.48852017521858215, "learning_rate": 0.0005937373787725581, "loss": 1.9593, "step": 5875 }, { "epoch": 0.2, "grad_norm": 0.5742959380149841, "learning_rate": 0.0005937352540868484, "loss": 2.0172, "step": 5876 }, { "epoch": 0.2, "grad_norm": 0.5036603212356567, "learning_rate": 0.0005937331290445875, "loss": 1.9564, "step": 5877 }, { "epoch": 0.2, "grad_norm": 0.4646250605583191, "learning_rate": 0.0005937310036457777, "loss": 1.9238, "step": 5878 }, { "epoch": 0.2, "grad_norm": 0.4848936200141907, "learning_rate": 0.0005937288778904219, "loss": 1.8931, "step": 5879 }, { "epoch": 0.2, "grad_norm": 0.4997841417789459, "learning_rate": 0.0005937267517785224, "loss": 1.953, "step": 5880 }, { "epoch": 0.2, "grad_norm": 0.507296085357666, "learning_rate": 0.0005937246253100819, "loss": 2.0402, "step": 5881 }, { "epoch": 0.2, "grad_norm": 0.5108346343040466, "learning_rate": 0.000593722498485103, "loss": 1.9673, "step": 5882 }, { "epoch": 0.2, "grad_norm": 0.5189114212989807, "learning_rate": 0.0005937203713035883, "loss": 1.933, "step": 5883 }, { "epoch": 0.2, "grad_norm": 0.48411908745765686, "learning_rate": 0.0005937182437655403, "loss": 1.9727, "step": 5884 }, { "epoch": 0.2, "grad_norm": 0.4902961850166321, "learning_rate": 0.0005937161158709617, "loss": 1.9711, "step": 5885 }, { "epoch": 0.2, "grad_norm": 0.5216679573059082, "learning_rate": 0.000593713987619855, "loss": 2.0647, "step": 5886 }, { "epoch": 0.2, "grad_norm": 0.4992954134941101, "learning_rate": 0.0005937118590122227, "loss": 1.8754, "step": 5887 }, { "epoch": 0.2, "grad_norm": 0.4853951334953308, "learning_rate": 0.0005937097300480676, "loss": 1.9816, "step": 5888 }, { "epoch": 0.2, "grad_norm": 0.49414998292922974, "learning_rate": 0.0005937076007273922, "loss": 1.9826, "step": 5889 }, { "epoch": 0.2, "grad_norm": 0.4938090443611145, "learning_rate": 0.0005937054710501989, "loss": 2.0185, "step": 5890 }, { "epoch": 0.2, "grad_norm": 0.503905713558197, "learning_rate": 0.0005937033410164904, "loss": 1.9743, "step": 5891 }, { "epoch": 0.2, "grad_norm": 0.5134013891220093, "learning_rate": 0.0005937012106262694, "loss": 1.9309, "step": 5892 }, { "epoch": 0.2, "grad_norm": 0.49712425470352173, "learning_rate": 0.0005936990798795385, "loss": 1.8861, "step": 5893 }, { "epoch": 0.2, "grad_norm": 0.4841329753398895, "learning_rate": 0.0005936969487763, "loss": 1.9252, "step": 5894 }, { "epoch": 0.2, "grad_norm": 0.505764365196228, "learning_rate": 0.0005936948173165569, "loss": 1.962, "step": 5895 }, { "epoch": 0.2, "grad_norm": 0.5176385641098022, "learning_rate": 0.0005936926855003114, "loss": 2.0295, "step": 5896 }, { "epoch": 0.2, "grad_norm": 0.4758249521255493, "learning_rate": 0.0005936905533275663, "loss": 2.0381, "step": 5897 }, { "epoch": 0.2, "grad_norm": 0.49451184272766113, "learning_rate": 0.0005936884207983241, "loss": 1.9022, "step": 5898 }, { "epoch": 0.2, "grad_norm": 0.5053011775016785, "learning_rate": 0.0005936862879125875, "loss": 2.0195, "step": 5899 }, { "epoch": 0.2, "grad_norm": 0.4839278757572174, "learning_rate": 0.000593684154670359, "loss": 1.8964, "step": 5900 }, { "epoch": 0.2, "grad_norm": 0.48449379205703735, "learning_rate": 0.0005936820210716412, "loss": 1.9925, "step": 5901 }, { "epoch": 0.2, "grad_norm": 0.5121718049049377, "learning_rate": 0.0005936798871164367, "loss": 1.9306, "step": 5902 }, { "epoch": 0.2, "grad_norm": 0.48903554677963257, "learning_rate": 0.0005936777528047481, "loss": 1.9048, "step": 5903 }, { "epoch": 0.2, "grad_norm": 0.47945520281791687, "learning_rate": 0.000593675618136578, "loss": 2.0073, "step": 5904 }, { "epoch": 0.2, "grad_norm": 0.4830543100833893, "learning_rate": 0.0005936734831119289, "loss": 1.9552, "step": 5905 }, { "epoch": 0.2, "grad_norm": 0.49482542276382446, "learning_rate": 0.0005936713477308035, "loss": 1.9837, "step": 5906 }, { "epoch": 0.2, "grad_norm": 0.48225322365760803, "learning_rate": 0.0005936692119932044, "loss": 1.9517, "step": 5907 }, { "epoch": 0.2, "grad_norm": 0.483924001455307, "learning_rate": 0.0005936670758991341, "loss": 1.9437, "step": 5908 }, { "epoch": 0.2, "grad_norm": 0.47855517268180847, "learning_rate": 0.0005936649394485952, "loss": 1.8735, "step": 5909 }, { "epoch": 0.2, "grad_norm": 0.5201607942581177, "learning_rate": 0.0005936628026415904, "loss": 1.9428, "step": 5910 }, { "epoch": 0.2, "grad_norm": 0.480485737323761, "learning_rate": 0.0005936606654781222, "loss": 2.0965, "step": 5911 }, { "epoch": 0.2, "grad_norm": 0.48184630274772644, "learning_rate": 0.0005936585279581933, "loss": 2.0246, "step": 5912 }, { "epoch": 0.2, "grad_norm": 0.4823005199432373, "learning_rate": 0.0005936563900818061, "loss": 1.9834, "step": 5913 }, { "epoch": 0.2, "grad_norm": 0.4922483563423157, "learning_rate": 0.0005936542518489633, "loss": 1.9536, "step": 5914 }, { "epoch": 0.2, "grad_norm": 0.49840766191482544, "learning_rate": 0.0005936521132596676, "loss": 2.0941, "step": 5915 }, { "epoch": 0.2, "grad_norm": 0.5047891139984131, "learning_rate": 0.0005936499743139214, "loss": 1.9808, "step": 5916 }, { "epoch": 0.2, "grad_norm": 0.470702588558197, "learning_rate": 0.0005936478350117275, "loss": 1.8981, "step": 5917 }, { "epoch": 0.2, "grad_norm": 0.4966751039028168, "learning_rate": 0.0005936456953530883, "loss": 1.9415, "step": 5918 }, { "epoch": 0.2, "grad_norm": 0.47616809606552124, "learning_rate": 0.0005936435553380066, "loss": 1.9156, "step": 5919 }, { "epoch": 0.2, "grad_norm": 0.5031400918960571, "learning_rate": 0.0005936414149664848, "loss": 2.0474, "step": 5920 }, { "epoch": 0.2, "grad_norm": 0.49555620551109314, "learning_rate": 0.0005936392742385256, "loss": 1.9755, "step": 5921 }, { "epoch": 0.2, "grad_norm": 0.49003034830093384, "learning_rate": 0.0005936371331541317, "loss": 1.9449, "step": 5922 }, { "epoch": 0.2, "grad_norm": 0.48300281167030334, "learning_rate": 0.0005936349917133054, "loss": 1.9529, "step": 5923 }, { "epoch": 0.2, "grad_norm": 0.49225836992263794, "learning_rate": 0.0005936328499160496, "loss": 1.9986, "step": 5924 }, { "epoch": 0.2, "grad_norm": 0.47729021310806274, "learning_rate": 0.0005936307077623668, "loss": 1.8599, "step": 5925 }, { "epoch": 0.2, "grad_norm": 0.47774484753608704, "learning_rate": 0.0005936285652522596, "loss": 1.9166, "step": 5926 }, { "epoch": 0.2, "grad_norm": 0.5025638341903687, "learning_rate": 0.0005936264223857304, "loss": 1.8991, "step": 5927 }, { "epoch": 0.2, "grad_norm": 0.4660360813140869, "learning_rate": 0.0005936242791627823, "loss": 1.949, "step": 5928 }, { "epoch": 0.2, "grad_norm": 0.48929107189178467, "learning_rate": 0.0005936221355834172, "loss": 1.9389, "step": 5929 }, { "epoch": 0.2, "grad_norm": 0.6555091738700867, "learning_rate": 0.0005936199916476383, "loss": 2.0366, "step": 5930 }, { "epoch": 0.2, "grad_norm": 0.48109474778175354, "learning_rate": 0.0005936178473554481, "loss": 1.9334, "step": 5931 }, { "epoch": 0.2, "grad_norm": 0.46313905715942383, "learning_rate": 0.000593615702706849, "loss": 1.9674, "step": 5932 }, { "epoch": 0.2, "grad_norm": 0.49912387132644653, "learning_rate": 0.0005936135577018437, "loss": 1.9549, "step": 5933 }, { "epoch": 0.2, "grad_norm": 0.4644795358181, "learning_rate": 0.0005936114123404348, "loss": 1.988, "step": 5934 }, { "epoch": 0.2, "grad_norm": 0.4683797061443329, "learning_rate": 0.000593609266622625, "loss": 1.9365, "step": 5935 }, { "epoch": 0.2, "grad_norm": 0.4897684156894684, "learning_rate": 0.0005936071205484166, "loss": 1.8805, "step": 5936 }, { "epoch": 0.2, "grad_norm": 0.45901820063591003, "learning_rate": 0.0005936049741178125, "loss": 1.9468, "step": 5937 }, { "epoch": 0.2, "grad_norm": 0.48811790347099304, "learning_rate": 0.0005936028273308153, "loss": 1.9732, "step": 5938 }, { "epoch": 0.2, "grad_norm": 0.512100100517273, "learning_rate": 0.0005936006801874274, "loss": 1.9987, "step": 5939 }, { "epoch": 0.2, "grad_norm": 0.46686112880706787, "learning_rate": 0.0005935985326876517, "loss": 1.9093, "step": 5940 }, { "epoch": 0.2, "grad_norm": 0.4845374524593353, "learning_rate": 0.0005935963848314905, "loss": 1.972, "step": 5941 }, { "epoch": 0.2, "grad_norm": 0.5017474293708801, "learning_rate": 0.0005935942366189467, "loss": 1.9503, "step": 5942 }, { "epoch": 0.2, "grad_norm": 0.47866111993789673, "learning_rate": 0.0005935920880500226, "loss": 2.0121, "step": 5943 }, { "epoch": 0.2, "grad_norm": 0.49554482102394104, "learning_rate": 0.0005935899391247211, "loss": 1.9923, "step": 5944 }, { "epoch": 0.2, "grad_norm": 0.5011741518974304, "learning_rate": 0.0005935877898430445, "loss": 1.9796, "step": 5945 }, { "epoch": 0.2, "grad_norm": 0.4876568913459778, "learning_rate": 0.0005935856402049957, "loss": 2.0138, "step": 5946 }, { "epoch": 0.2, "grad_norm": 0.4968588352203369, "learning_rate": 0.0005935834902105772, "loss": 1.9859, "step": 5947 }, { "epoch": 0.2, "grad_norm": 0.4669608175754547, "learning_rate": 0.0005935813398597915, "loss": 1.9824, "step": 5948 }, { "epoch": 0.2, "grad_norm": 0.4901241362094879, "learning_rate": 0.0005935791891526414, "loss": 1.9108, "step": 5949 }, { "epoch": 0.2, "grad_norm": 0.5027356743812561, "learning_rate": 0.0005935770380891294, "loss": 2.018, "step": 5950 }, { "epoch": 0.2, "grad_norm": 0.501163899898529, "learning_rate": 0.0005935748866692581, "loss": 2.0496, "step": 5951 }, { "epoch": 0.2, "grad_norm": 0.4745732247829437, "learning_rate": 0.0005935727348930302, "loss": 2.0068, "step": 5952 }, { "epoch": 0.2, "grad_norm": 0.4795570969581604, "learning_rate": 0.0005935705827604483, "loss": 1.9861, "step": 5953 }, { "epoch": 0.2, "grad_norm": 0.4831292927265167, "learning_rate": 0.0005935684302715149, "loss": 1.9995, "step": 5954 }, { "epoch": 0.2, "grad_norm": 0.4859306216239929, "learning_rate": 0.0005935662774262327, "loss": 1.9957, "step": 5955 }, { "epoch": 0.2, "grad_norm": 0.485443651676178, "learning_rate": 0.0005935641242246042, "loss": 1.9081, "step": 5956 }, { "epoch": 0.2, "grad_norm": 0.5014786124229431, "learning_rate": 0.0005935619706666322, "loss": 2.0099, "step": 5957 }, { "epoch": 0.2, "grad_norm": 0.4800710678100586, "learning_rate": 0.0005935598167523192, "loss": 1.9825, "step": 5958 }, { "epoch": 0.2, "grad_norm": 0.4770241677761078, "learning_rate": 0.0005935576624816678, "loss": 2.0053, "step": 5959 }, { "epoch": 0.2, "grad_norm": 0.5197508335113525, "learning_rate": 0.0005935555078546808, "loss": 2.0131, "step": 5960 }, { "epoch": 0.2, "grad_norm": 0.5053058862686157, "learning_rate": 0.0005935533528713605, "loss": 1.9915, "step": 5961 }, { "epoch": 0.2, "grad_norm": 0.480690598487854, "learning_rate": 0.0005935511975317097, "loss": 2.0389, "step": 5962 }, { "epoch": 0.2, "grad_norm": 0.5014980435371399, "learning_rate": 0.000593549041835731, "loss": 1.9909, "step": 5963 }, { "epoch": 0.2, "grad_norm": 0.48855140805244446, "learning_rate": 0.0005935468857834272, "loss": 1.9464, "step": 5964 }, { "epoch": 0.2, "grad_norm": 0.4871937036514282, "learning_rate": 0.0005935447293748005, "loss": 1.9941, "step": 5965 }, { "epoch": 0.2, "grad_norm": 0.48284655809402466, "learning_rate": 0.0005935425726098539, "loss": 1.9594, "step": 5966 }, { "epoch": 0.2, "grad_norm": 0.4935556948184967, "learning_rate": 0.0005935404154885898, "loss": 2.0768, "step": 5967 }, { "epoch": 0.2, "grad_norm": 0.5201449394226074, "learning_rate": 0.0005935382580110109, "loss": 2.0441, "step": 5968 }, { "epoch": 0.2, "grad_norm": 0.4866482615470886, "learning_rate": 0.0005935361001771197, "loss": 2.0096, "step": 5969 }, { "epoch": 0.2, "grad_norm": 0.4676971435546875, "learning_rate": 0.0005935339419869191, "loss": 1.8668, "step": 5970 }, { "epoch": 0.2, "grad_norm": 0.5158078074455261, "learning_rate": 0.0005935317834404114, "loss": 1.8714, "step": 5971 }, { "epoch": 0.2, "grad_norm": 0.5613626837730408, "learning_rate": 0.0005935296245375995, "loss": 1.8903, "step": 5972 }, { "epoch": 0.2, "grad_norm": 0.4983440637588501, "learning_rate": 0.0005935274652784858, "loss": 1.9943, "step": 5973 }, { "epoch": 0.2, "grad_norm": 0.49592891335487366, "learning_rate": 0.000593525305663073, "loss": 1.9735, "step": 5974 }, { "epoch": 0.2, "grad_norm": 0.5061206221580505, "learning_rate": 0.0005935231456913638, "loss": 1.9801, "step": 5975 }, { "epoch": 0.2, "grad_norm": 0.5048776268959045, "learning_rate": 0.0005935209853633607, "loss": 2.0144, "step": 5976 }, { "epoch": 0.2, "grad_norm": 0.4983927607536316, "learning_rate": 0.0005935188246790664, "loss": 1.9152, "step": 5977 }, { "epoch": 0.2, "grad_norm": 0.5204849243164062, "learning_rate": 0.0005935166636384835, "loss": 1.8941, "step": 5978 }, { "epoch": 0.2, "grad_norm": 0.4882519543170929, "learning_rate": 0.0005935145022416146, "loss": 1.9731, "step": 5979 }, { "epoch": 0.2, "grad_norm": 0.4750342071056366, "learning_rate": 0.0005935123404884623, "loss": 1.9477, "step": 5980 }, { "epoch": 0.2, "grad_norm": 0.47778603434562683, "learning_rate": 0.0005935101783790292, "loss": 1.8772, "step": 5981 }, { "epoch": 0.2, "grad_norm": 0.5059049129486084, "learning_rate": 0.0005935080159133182, "loss": 1.9525, "step": 5982 }, { "epoch": 0.2, "grad_norm": 0.4875728189945221, "learning_rate": 0.0005935058530913315, "loss": 1.9381, "step": 5983 }, { "epoch": 0.2, "grad_norm": 0.5093532800674438, "learning_rate": 0.0005935036899130721, "loss": 1.9509, "step": 5984 }, { "epoch": 0.2, "grad_norm": 0.48125824332237244, "learning_rate": 0.0005935015263785424, "loss": 1.8859, "step": 5985 }, { "epoch": 0.2, "grad_norm": 0.5043042898178101, "learning_rate": 0.000593499362487745, "loss": 1.9746, "step": 5986 }, { "epoch": 0.2, "grad_norm": 0.4758026599884033, "learning_rate": 0.0005934971982406828, "loss": 1.943, "step": 5987 }, { "epoch": 0.2, "grad_norm": 0.47780731320381165, "learning_rate": 0.0005934950336373582, "loss": 1.9763, "step": 5988 }, { "epoch": 0.2, "grad_norm": 0.49136707186698914, "learning_rate": 0.0005934928686777737, "loss": 2.0075, "step": 5989 }, { "epoch": 0.2, "grad_norm": 0.5173149704933167, "learning_rate": 0.0005934907033619324, "loss": 2.0246, "step": 5990 }, { "epoch": 0.2, "grad_norm": 0.49879828095436096, "learning_rate": 0.0005934885376898364, "loss": 1.9745, "step": 5991 }, { "epoch": 0.2, "grad_norm": 0.4780958294868469, "learning_rate": 0.0005934863716614886, "loss": 1.971, "step": 5992 }, { "epoch": 0.2, "grad_norm": 0.5168325901031494, "learning_rate": 0.0005934842052768916, "loss": 1.8933, "step": 5993 }, { "epoch": 0.2, "grad_norm": 0.47671201825141907, "learning_rate": 0.0005934820385360482, "loss": 1.93, "step": 5994 }, { "epoch": 0.2, "grad_norm": 0.5127972364425659, "learning_rate": 0.0005934798714389605, "loss": 1.917, "step": 5995 }, { "epoch": 0.2, "grad_norm": 0.5249420404434204, "learning_rate": 0.0005934777039856317, "loss": 2.0103, "step": 5996 }, { "epoch": 0.2, "grad_norm": 0.47693949937820435, "learning_rate": 0.0005934755361760642, "loss": 2.0118, "step": 5997 }, { "epoch": 0.2, "grad_norm": 0.4886006712913513, "learning_rate": 0.0005934733680102606, "loss": 1.8897, "step": 5998 }, { "epoch": 0.2, "grad_norm": 0.5170087218284607, "learning_rate": 0.0005934711994882236, "loss": 1.9412, "step": 5999 }, { "epoch": 0.2, "grad_norm": 0.5046762228012085, "learning_rate": 0.0005934690306099559, "loss": 2.0633, "step": 6000 }, { "epoch": 0.2, "grad_norm": 0.4666849970817566, "learning_rate": 0.0005934668613754599, "loss": 1.889, "step": 6001 }, { "epoch": 0.2, "grad_norm": 0.49790358543395996, "learning_rate": 0.0005934646917847384, "loss": 1.9623, "step": 6002 }, { "epoch": 0.2, "grad_norm": 0.5005450248718262, "learning_rate": 0.0005934625218377939, "loss": 1.9384, "step": 6003 }, { "epoch": 0.2, "grad_norm": 0.49999821186065674, "learning_rate": 0.0005934603515346293, "loss": 1.8859, "step": 6004 }, { "epoch": 0.2, "grad_norm": 0.49907341599464417, "learning_rate": 0.000593458180875247, "loss": 1.9202, "step": 6005 }, { "epoch": 0.2, "grad_norm": 0.5144606828689575, "learning_rate": 0.0005934560098596497, "loss": 1.9388, "step": 6006 }, { "epoch": 0.2, "grad_norm": 0.4750068485736847, "learning_rate": 0.00059345383848784, "loss": 1.9372, "step": 6007 }, { "epoch": 0.2, "grad_norm": 0.5130625367164612, "learning_rate": 0.0005934516667598207, "loss": 1.9699, "step": 6008 }, { "epoch": 0.2, "grad_norm": 0.5241479277610779, "learning_rate": 0.0005934494946755942, "loss": 1.9327, "step": 6009 }, { "epoch": 0.2, "grad_norm": 0.47792285680770874, "learning_rate": 0.0005934473222351633, "loss": 1.9311, "step": 6010 }, { "epoch": 0.2, "grad_norm": 0.4934009909629822, "learning_rate": 0.0005934451494385306, "loss": 2.0219, "step": 6011 }, { "epoch": 0.2, "grad_norm": 0.48836028575897217, "learning_rate": 0.0005934429762856988, "loss": 1.9116, "step": 6012 }, { "epoch": 0.2, "grad_norm": 0.4791862666606903, "learning_rate": 0.0005934408027766703, "loss": 1.9904, "step": 6013 }, { "epoch": 0.2, "grad_norm": 0.49750858545303345, "learning_rate": 0.0005934386289114479, "loss": 1.9167, "step": 6014 }, { "epoch": 0.2, "grad_norm": 0.4810788631439209, "learning_rate": 0.0005934364546900344, "loss": 1.94, "step": 6015 }, { "epoch": 0.2, "grad_norm": 0.47395059466362, "learning_rate": 0.0005934342801124323, "loss": 1.905, "step": 6016 }, { "epoch": 0.2, "grad_norm": 0.47584259510040283, "learning_rate": 0.0005934321051786441, "loss": 1.9065, "step": 6017 }, { "epoch": 0.2, "grad_norm": 0.48115938901901245, "learning_rate": 0.0005934299298886725, "loss": 1.9152, "step": 6018 }, { "epoch": 0.2, "grad_norm": 0.48301026225090027, "learning_rate": 0.0005934277542425203, "loss": 1.9212, "step": 6019 }, { "epoch": 0.2, "grad_norm": 0.4680514335632324, "learning_rate": 0.0005934255782401901, "loss": 1.8809, "step": 6020 }, { "epoch": 0.2, "grad_norm": 0.5084684491157532, "learning_rate": 0.0005934234018816843, "loss": 1.9375, "step": 6021 }, { "epoch": 0.2, "grad_norm": 0.49197739362716675, "learning_rate": 0.0005934212251670059, "loss": 1.9549, "step": 6022 }, { "epoch": 0.2, "grad_norm": 0.4860290288925171, "learning_rate": 0.0005934190480961574, "loss": 1.974, "step": 6023 }, { "epoch": 0.2, "grad_norm": 0.4955637753009796, "learning_rate": 0.0005934168706691412, "loss": 1.875, "step": 6024 }, { "epoch": 0.2, "grad_norm": 0.4801320731639862, "learning_rate": 0.0005934146928859603, "loss": 1.9229, "step": 6025 }, { "epoch": 0.2, "grad_norm": 0.5222907066345215, "learning_rate": 0.0005934125147466172, "loss": 2.0969, "step": 6026 }, { "epoch": 0.2, "grad_norm": 0.494202196598053, "learning_rate": 0.0005934103362511145, "loss": 2.0024, "step": 6027 }, { "epoch": 0.2, "grad_norm": 0.4867865741252899, "learning_rate": 0.0005934081573994548, "loss": 2.0083, "step": 6028 }, { "epoch": 0.2, "grad_norm": 0.4920464754104614, "learning_rate": 0.0005934059781916409, "loss": 1.8566, "step": 6029 }, { "epoch": 0.2, "grad_norm": 0.4956754148006439, "learning_rate": 0.0005934037986276753, "loss": 1.889, "step": 6030 }, { "epoch": 0.2, "grad_norm": 0.47930994629859924, "learning_rate": 0.0005934016187075609, "loss": 1.9069, "step": 6031 }, { "epoch": 0.2, "grad_norm": 0.5178795456886292, "learning_rate": 0.0005933994384313, "loss": 1.9803, "step": 6032 }, { "epoch": 0.2, "grad_norm": 0.48568010330200195, "learning_rate": 0.0005933972577988956, "loss": 1.9966, "step": 6033 }, { "epoch": 0.2, "grad_norm": 0.47778764367103577, "learning_rate": 0.0005933950768103499, "loss": 2.01, "step": 6034 }, { "epoch": 0.2, "grad_norm": 0.47688519954681396, "learning_rate": 0.000593392895465666, "loss": 1.9219, "step": 6035 }, { "epoch": 0.2, "grad_norm": 0.500336229801178, "learning_rate": 0.0005933907137648463, "loss": 1.9759, "step": 6036 }, { "epoch": 0.2, "grad_norm": 0.49526408314704895, "learning_rate": 0.0005933885317078935, "loss": 1.9883, "step": 6037 }, { "epoch": 0.2, "grad_norm": 0.4688076674938202, "learning_rate": 0.0005933863492948103, "loss": 1.8942, "step": 6038 }, { "epoch": 0.2, "grad_norm": 0.5017056465148926, "learning_rate": 0.0005933841665255992, "loss": 2.0149, "step": 6039 }, { "epoch": 0.2, "grad_norm": 0.49291032552719116, "learning_rate": 0.0005933819834002629, "loss": 1.9269, "step": 6040 }, { "epoch": 0.2, "grad_norm": 0.4802488684654236, "learning_rate": 0.0005933797999188043, "loss": 1.9803, "step": 6041 }, { "epoch": 0.2, "grad_norm": 0.4916742444038391, "learning_rate": 0.0005933776160812257, "loss": 1.9621, "step": 6042 }, { "epoch": 0.2, "grad_norm": 0.4803496301174164, "learning_rate": 0.00059337543188753, "loss": 1.9348, "step": 6043 }, { "epoch": 0.2, "grad_norm": 0.5029796957969666, "learning_rate": 0.0005933732473377198, "loss": 1.862, "step": 6044 }, { "epoch": 0.2, "grad_norm": 0.48858752846717834, "learning_rate": 0.0005933710624317976, "loss": 2.0269, "step": 6045 }, { "epoch": 0.2, "grad_norm": 0.6640497446060181, "learning_rate": 0.0005933688771697661, "loss": 2.0804, "step": 6046 }, { "epoch": 0.2, "grad_norm": 0.4804143011569977, "learning_rate": 0.000593366691551628, "loss": 1.9262, "step": 6047 }, { "epoch": 0.2, "grad_norm": 0.4940844178199768, "learning_rate": 0.0005933645055773861, "loss": 1.968, "step": 6048 }, { "epoch": 0.2, "grad_norm": 0.4702639579772949, "learning_rate": 0.0005933623192470428, "loss": 1.8792, "step": 6049 }, { "epoch": 0.2, "grad_norm": 0.48491987586021423, "learning_rate": 0.000593360132560601, "loss": 2.0149, "step": 6050 }, { "epoch": 0.2, "grad_norm": 0.49699363112449646, "learning_rate": 0.0005933579455180631, "loss": 2.0295, "step": 6051 }, { "epoch": 0.2, "grad_norm": 0.4932718873023987, "learning_rate": 0.000593355758119432, "loss": 1.9239, "step": 6052 }, { "epoch": 0.2, "grad_norm": 0.4773760139942169, "learning_rate": 0.0005933535703647101, "loss": 2.0397, "step": 6053 }, { "epoch": 0.2, "grad_norm": 0.48215538263320923, "learning_rate": 0.0005933513822539002, "loss": 1.865, "step": 6054 }, { "epoch": 0.2, "grad_norm": 0.49133771657943726, "learning_rate": 0.0005933491937870051, "loss": 2.0071, "step": 6055 }, { "epoch": 0.2, "grad_norm": 0.49967774748802185, "learning_rate": 0.0005933470049640272, "loss": 1.9446, "step": 6056 }, { "epoch": 0.2, "grad_norm": 0.4809145927429199, "learning_rate": 0.0005933448157849692, "loss": 1.946, "step": 6057 }, { "epoch": 0.2, "grad_norm": 0.6316310167312622, "learning_rate": 0.0005933426262498338, "loss": 1.9726, "step": 6058 }, { "epoch": 0.2, "grad_norm": 0.500190794467926, "learning_rate": 0.0005933404363586237, "loss": 1.9628, "step": 6059 }, { "epoch": 0.2, "grad_norm": 0.5029029250144958, "learning_rate": 0.0005933382461113417, "loss": 1.9544, "step": 6060 }, { "epoch": 0.2, "grad_norm": 0.5117032527923584, "learning_rate": 0.0005933360555079901, "loss": 2.0001, "step": 6061 }, { "epoch": 0.2, "grad_norm": 0.4825131595134735, "learning_rate": 0.0005933338645485719, "loss": 1.9605, "step": 6062 }, { "epoch": 0.2, "grad_norm": 0.5099707245826721, "learning_rate": 0.0005933316732330895, "loss": 1.9, "step": 6063 }, { "epoch": 0.2, "grad_norm": 0.5102372169494629, "learning_rate": 0.0005933294815615456, "loss": 1.9593, "step": 6064 }, { "epoch": 0.2, "grad_norm": 0.46993669867515564, "learning_rate": 0.0005933272895339431, "loss": 1.9625, "step": 6065 }, { "epoch": 0.2, "grad_norm": 0.49727022647857666, "learning_rate": 0.0005933250971502844, "loss": 1.9199, "step": 6066 }, { "epoch": 0.2, "grad_norm": 0.5195161700248718, "learning_rate": 0.0005933229044105722, "loss": 1.8857, "step": 6067 }, { "epoch": 0.2, "grad_norm": 0.5176258087158203, "learning_rate": 0.0005933207113148093, "loss": 2.0625, "step": 6068 }, { "epoch": 0.2, "grad_norm": 0.48542723059654236, "learning_rate": 0.0005933185178629983, "loss": 2.0041, "step": 6069 }, { "epoch": 0.2, "grad_norm": 0.5042951107025146, "learning_rate": 0.0005933163240551418, "loss": 2.0126, "step": 6070 }, { "epoch": 0.2, "grad_norm": 0.4813753068447113, "learning_rate": 0.0005933141298912425, "loss": 1.9871, "step": 6071 }, { "epoch": 0.2, "grad_norm": 0.48643311858177185, "learning_rate": 0.000593311935371303, "loss": 1.8843, "step": 6072 }, { "epoch": 0.2, "grad_norm": 0.4772200584411621, "learning_rate": 0.0005933097404953261, "loss": 2.0408, "step": 6073 }, { "epoch": 0.2, "grad_norm": 0.4847312867641449, "learning_rate": 0.0005933075452633144, "loss": 1.9512, "step": 6074 }, { "epoch": 0.2, "grad_norm": 0.4729306995868683, "learning_rate": 0.0005933053496752705, "loss": 1.9009, "step": 6075 }, { "epoch": 0.2, "grad_norm": 0.5321458578109741, "learning_rate": 0.0005933031537311971, "loss": 1.9012, "step": 6076 }, { "epoch": 0.2, "grad_norm": 0.5046401023864746, "learning_rate": 0.0005933009574310969, "loss": 1.9914, "step": 6077 }, { "epoch": 0.2, "grad_norm": 0.5020663738250732, "learning_rate": 0.0005932987607749727, "loss": 1.9512, "step": 6078 }, { "epoch": 0.2, "grad_norm": 0.48636138439178467, "learning_rate": 0.000593296563762827, "loss": 1.9595, "step": 6079 }, { "epoch": 0.2, "grad_norm": 0.5732472538948059, "learning_rate": 0.0005932943663946623, "loss": 1.9616, "step": 6080 }, { "epoch": 0.2, "grad_norm": 0.4768572151660919, "learning_rate": 0.0005932921686704816, "loss": 1.9911, "step": 6081 }, { "epoch": 0.2, "grad_norm": 0.5064303278923035, "learning_rate": 0.0005932899705902873, "loss": 1.9165, "step": 6082 }, { "epoch": 0.2, "grad_norm": 0.5196678042411804, "learning_rate": 0.0005932877721540824, "loss": 1.9142, "step": 6083 }, { "epoch": 0.2, "grad_norm": 0.4677920341491699, "learning_rate": 0.0005932855733618691, "loss": 1.9538, "step": 6084 }, { "epoch": 0.2, "grad_norm": 0.5011516213417053, "learning_rate": 0.0005932833742136505, "loss": 1.9525, "step": 6085 }, { "epoch": 0.2, "grad_norm": 0.5056595802307129, "learning_rate": 0.000593281174709429, "loss": 2.0069, "step": 6086 }, { "epoch": 0.2, "grad_norm": 0.4959375858306885, "learning_rate": 0.0005932789748492075, "loss": 1.9385, "step": 6087 }, { "epoch": 0.2, "grad_norm": 0.4949890673160553, "learning_rate": 0.0005932767746329884, "loss": 1.9951, "step": 6088 }, { "epoch": 0.2, "grad_norm": 0.4893511235713959, "learning_rate": 0.0005932745740607746, "loss": 1.9805, "step": 6089 }, { "epoch": 0.2, "grad_norm": 0.5066318511962891, "learning_rate": 0.0005932723731325686, "loss": 1.9525, "step": 6090 }, { "epoch": 0.2, "grad_norm": 0.4869982600212097, "learning_rate": 0.0005932701718483732, "loss": 1.9397, "step": 6091 }, { "epoch": 0.2, "grad_norm": 0.48790502548217773, "learning_rate": 0.0005932679702081911, "loss": 1.9838, "step": 6092 }, { "epoch": 0.2, "grad_norm": 0.49389415979385376, "learning_rate": 0.0005932657682120247, "loss": 1.9248, "step": 6093 }, { "epoch": 0.2, "grad_norm": 0.4801628887653351, "learning_rate": 0.000593263565859877, "loss": 1.9565, "step": 6094 }, { "epoch": 0.2, "grad_norm": 0.4767645001411438, "learning_rate": 0.0005932613631517505, "loss": 1.9605, "step": 6095 }, { "epoch": 0.2, "grad_norm": 0.5430610179901123, "learning_rate": 0.000593259160087648, "loss": 1.9538, "step": 6096 }, { "epoch": 0.2, "grad_norm": 0.5050867199897766, "learning_rate": 0.000593256956667572, "loss": 2.0563, "step": 6097 }, { "epoch": 0.2, "grad_norm": 0.48552650213241577, "learning_rate": 0.0005932547528915253, "loss": 1.8959, "step": 6098 }, { "epoch": 0.2, "grad_norm": 0.4866793155670166, "learning_rate": 0.0005932525487595104, "loss": 1.9733, "step": 6099 }, { "epoch": 0.2, "grad_norm": 0.4987476170063019, "learning_rate": 0.0005932503442715303, "loss": 1.9539, "step": 6100 }, { "epoch": 0.2, "grad_norm": 0.4608461260795593, "learning_rate": 0.0005932481394275874, "loss": 1.8526, "step": 6101 }, { "epoch": 0.2, "grad_norm": 0.48089930415153503, "learning_rate": 0.0005932459342276846, "loss": 2.0138, "step": 6102 }, { "epoch": 0.2, "grad_norm": 0.4883859157562256, "learning_rate": 0.0005932437286718243, "loss": 1.8984, "step": 6103 }, { "epoch": 0.2, "grad_norm": 0.4883805215358734, "learning_rate": 0.0005932415227600093, "loss": 1.9662, "step": 6104 }, { "epoch": 0.2, "grad_norm": 0.45872730016708374, "learning_rate": 0.0005932393164922424, "loss": 1.9613, "step": 6105 }, { "epoch": 0.2, "grad_norm": 0.4785185158252716, "learning_rate": 0.0005932371098685261, "loss": 1.9319, "step": 6106 }, { "epoch": 0.2, "grad_norm": 0.48527249693870544, "learning_rate": 0.0005932349028888633, "loss": 1.9521, "step": 6107 }, { "epoch": 0.2, "grad_norm": 0.510094404220581, "learning_rate": 0.0005932326955532563, "loss": 1.9841, "step": 6108 }, { "epoch": 0.2, "grad_norm": 0.4747673571109772, "learning_rate": 0.0005932304878617082, "loss": 1.8941, "step": 6109 }, { "epoch": 0.2, "grad_norm": 0.46201273798942566, "learning_rate": 0.0005932282798142215, "loss": 1.9463, "step": 6110 }, { "epoch": 0.2, "grad_norm": 0.457676500082016, "learning_rate": 0.0005932260714107988, "loss": 1.9543, "step": 6111 }, { "epoch": 0.2, "grad_norm": 0.46886876225471497, "learning_rate": 0.0005932238626514428, "loss": 1.9114, "step": 6112 }, { "epoch": 0.2, "grad_norm": 0.48416852951049805, "learning_rate": 0.0005932216535361563, "loss": 1.9253, "step": 6113 }, { "epoch": 0.2, "grad_norm": 0.477644145488739, "learning_rate": 0.0005932194440649419, "loss": 1.906, "step": 6114 }, { "epoch": 0.2, "grad_norm": 0.48316410183906555, "learning_rate": 0.0005932172342378024, "loss": 1.9172, "step": 6115 }, { "epoch": 0.2, "grad_norm": 0.48528000712394714, "learning_rate": 0.0005932150240547402, "loss": 1.9699, "step": 6116 }, { "epoch": 0.2, "grad_norm": 0.47284412384033203, "learning_rate": 0.0005932128135157583, "loss": 1.9965, "step": 6117 }, { "epoch": 0.2, "grad_norm": 0.4748910069465637, "learning_rate": 0.0005932106026208592, "loss": 1.9916, "step": 6118 }, { "epoch": 0.2, "grad_norm": 0.47577545046806335, "learning_rate": 0.0005932083913700457, "loss": 1.953, "step": 6119 }, { "epoch": 0.2, "grad_norm": 0.5131489038467407, "learning_rate": 0.0005932061797633203, "loss": 1.9261, "step": 6120 }, { "epoch": 0.2, "grad_norm": 0.46705177426338196, "learning_rate": 0.0005932039678006859, "loss": 1.86, "step": 6121 }, { "epoch": 0.2, "grad_norm": 0.4888937473297119, "learning_rate": 0.0005932017554821451, "loss": 1.9055, "step": 6122 }, { "epoch": 0.2, "grad_norm": 0.5050128698348999, "learning_rate": 0.0005931995428077005, "loss": 1.93, "step": 6123 }, { "epoch": 0.2, "grad_norm": 0.4789970815181732, "learning_rate": 0.0005931973297773548, "loss": 2.0072, "step": 6124 }, { "epoch": 0.2, "grad_norm": 0.49020275473594666, "learning_rate": 0.0005931951163911108, "loss": 1.9367, "step": 6125 }, { "epoch": 0.2, "grad_norm": 0.507016658782959, "learning_rate": 0.0005931929026489711, "loss": 1.979, "step": 6126 }, { "epoch": 0.2, "grad_norm": 0.5107402205467224, "learning_rate": 0.0005931906885509384, "loss": 2.0251, "step": 6127 }, { "epoch": 0.2, "grad_norm": 0.4704926013946533, "learning_rate": 0.0005931884740970155, "loss": 1.8901, "step": 6128 }, { "epoch": 0.2, "grad_norm": 0.4842996597290039, "learning_rate": 0.0005931862592872049, "loss": 1.9572, "step": 6129 }, { "epoch": 0.2, "grad_norm": 0.5173564553260803, "learning_rate": 0.0005931840441215095, "loss": 2.0904, "step": 6130 }, { "epoch": 0.2, "grad_norm": 0.49682971835136414, "learning_rate": 0.0005931818285999317, "loss": 1.9171, "step": 6131 }, { "epoch": 0.2, "grad_norm": 0.4693193733692169, "learning_rate": 0.0005931796127224745, "loss": 1.9063, "step": 6132 }, { "epoch": 0.2, "grad_norm": 0.48646047711372375, "learning_rate": 0.0005931773964891404, "loss": 1.9291, "step": 6133 }, { "epoch": 0.2, "grad_norm": 0.4972532391548157, "learning_rate": 0.0005931751798999321, "loss": 1.9157, "step": 6134 }, { "epoch": 0.2, "grad_norm": 0.5212598443031311, "learning_rate": 0.0005931729629548524, "loss": 1.9492, "step": 6135 }, { "epoch": 0.2, "grad_norm": 0.48573586344718933, "learning_rate": 0.0005931707456539039, "loss": 1.9407, "step": 6136 }, { "epoch": 0.2, "grad_norm": 0.5361997485160828, "learning_rate": 0.0005931685279970892, "loss": 1.9849, "step": 6137 }, { "epoch": 0.2, "grad_norm": 0.49866732954978943, "learning_rate": 0.0005931663099844113, "loss": 1.9451, "step": 6138 }, { "epoch": 0.2, "grad_norm": 0.47555404901504517, "learning_rate": 0.0005931640916158725, "loss": 1.9627, "step": 6139 }, { "epoch": 0.2, "grad_norm": 0.5028765201568604, "learning_rate": 0.0005931618728914758, "loss": 2.01, "step": 6140 }, { "epoch": 0.2, "grad_norm": 0.49343055486679077, "learning_rate": 0.0005931596538112238, "loss": 1.9135, "step": 6141 }, { "epoch": 0.2, "grad_norm": 0.49337610602378845, "learning_rate": 0.0005931574343751192, "loss": 1.9076, "step": 6142 }, { "epoch": 0.2, "grad_norm": 0.4913119673728943, "learning_rate": 0.0005931552145831645, "loss": 1.902, "step": 6143 }, { "epoch": 0.2, "grad_norm": 0.5081397891044617, "learning_rate": 0.0005931529944353627, "loss": 1.9536, "step": 6144 }, { "epoch": 0.2, "grad_norm": 0.45219066739082336, "learning_rate": 0.0005931507739317164, "loss": 1.8903, "step": 6145 }, { "epoch": 0.2, "grad_norm": 0.49573469161987305, "learning_rate": 0.0005931485530722281, "loss": 1.9323, "step": 6146 }, { "epoch": 0.2, "grad_norm": 0.4803113639354706, "learning_rate": 0.0005931463318569008, "loss": 1.9299, "step": 6147 }, { "epoch": 0.2, "grad_norm": 0.5004054307937622, "learning_rate": 0.000593144110285737, "loss": 2.0381, "step": 6148 }, { "epoch": 0.2, "grad_norm": 0.5098682045936584, "learning_rate": 0.0005931418883587394, "loss": 2.021, "step": 6149 }, { "epoch": 0.2, "grad_norm": 0.5003572106361389, "learning_rate": 0.0005931396660759108, "loss": 1.853, "step": 6150 }, { "epoch": 0.2, "grad_norm": 0.4801081120967865, "learning_rate": 0.0005931374434372538, "loss": 1.8569, "step": 6151 }, { "epoch": 0.2, "grad_norm": 0.48395586013793945, "learning_rate": 0.0005931352204427711, "loss": 1.9637, "step": 6152 }, { "epoch": 0.2, "grad_norm": 0.49832984805107117, "learning_rate": 0.0005931329970924655, "loss": 2.006, "step": 6153 }, { "epoch": 0.2, "grad_norm": 0.48781588673591614, "learning_rate": 0.0005931307733863397, "loss": 2.0181, "step": 6154 }, { "epoch": 0.2, "grad_norm": 0.4892093539237976, "learning_rate": 0.0005931285493243963, "loss": 2.0048, "step": 6155 }, { "epoch": 0.2, "grad_norm": 0.49521318078041077, "learning_rate": 0.0005931263249066378, "loss": 2.0001, "step": 6156 }, { "epoch": 0.2, "grad_norm": 0.49240055680274963, "learning_rate": 0.0005931241001330674, "loss": 1.9784, "step": 6157 }, { "epoch": 0.2, "grad_norm": 0.48776698112487793, "learning_rate": 0.0005931218750036874, "loss": 1.8742, "step": 6158 }, { "epoch": 0.2, "grad_norm": 0.49272283911705017, "learning_rate": 0.0005931196495185006, "loss": 1.9989, "step": 6159 }, { "epoch": 0.2, "grad_norm": 0.49225959181785583, "learning_rate": 0.0005931174236775098, "loss": 1.9769, "step": 6160 }, { "epoch": 0.2, "grad_norm": 0.4843744933605194, "learning_rate": 0.0005931151974807176, "loss": 1.8873, "step": 6161 }, { "epoch": 0.21, "grad_norm": 0.5183442234992981, "learning_rate": 0.0005931129709281268, "loss": 2.083, "step": 6162 }, { "epoch": 0.21, "grad_norm": 0.5023642778396606, "learning_rate": 0.00059311074401974, "loss": 1.9111, "step": 6163 }, { "epoch": 0.21, "grad_norm": 0.47486191987991333, "learning_rate": 0.0005931085167555601, "loss": 1.9462, "step": 6164 }, { "epoch": 0.21, "grad_norm": 0.4805775582790375, "learning_rate": 0.0005931062891355894, "loss": 1.9567, "step": 6165 }, { "epoch": 0.21, "grad_norm": 0.49064213037490845, "learning_rate": 0.000593104061159831, "loss": 1.9413, "step": 6166 }, { "epoch": 0.21, "grad_norm": 0.5064660310745239, "learning_rate": 0.0005931018328282874, "loss": 1.9644, "step": 6167 }, { "epoch": 0.21, "grad_norm": 0.46034467220306396, "learning_rate": 0.0005930996041409615, "loss": 1.9478, "step": 6168 }, { "epoch": 0.21, "grad_norm": 0.4724526107311249, "learning_rate": 0.0005930973750978557, "loss": 1.9464, "step": 6169 }, { "epoch": 0.21, "grad_norm": 0.5043313503265381, "learning_rate": 0.0005930951456989729, "loss": 1.982, "step": 6170 }, { "epoch": 0.21, "grad_norm": 0.5202935934066772, "learning_rate": 0.0005930929159443158, "loss": 1.9852, "step": 6171 }, { "epoch": 0.21, "grad_norm": 0.4749757647514343, "learning_rate": 0.0005930906858338871, "loss": 1.8872, "step": 6172 }, { "epoch": 0.21, "grad_norm": 0.47890347242355347, "learning_rate": 0.0005930884553676895, "loss": 1.8129, "step": 6173 }, { "epoch": 0.21, "grad_norm": 0.493509978055954, "learning_rate": 0.0005930862245457256, "loss": 1.9966, "step": 6174 }, { "epoch": 0.21, "grad_norm": 0.4835017919540405, "learning_rate": 0.0005930839933679983, "loss": 1.8708, "step": 6175 }, { "epoch": 0.21, "grad_norm": 0.4972645044326782, "learning_rate": 0.0005930817618345102, "loss": 1.9586, "step": 6176 }, { "epoch": 0.21, "grad_norm": 0.4852782189846039, "learning_rate": 0.000593079529945264, "loss": 1.9567, "step": 6177 }, { "epoch": 0.21, "grad_norm": 0.4868864119052887, "learning_rate": 0.0005930772977002625, "loss": 1.9539, "step": 6178 }, { "epoch": 0.21, "grad_norm": 0.4883003234863281, "learning_rate": 0.0005930750650995083, "loss": 1.9545, "step": 6179 }, { "epoch": 0.21, "grad_norm": 0.4894547760486603, "learning_rate": 0.0005930728321430042, "loss": 1.9515, "step": 6180 }, { "epoch": 0.21, "grad_norm": 0.48294180631637573, "learning_rate": 0.0005930705988307528, "loss": 2.0327, "step": 6181 }, { "epoch": 0.21, "grad_norm": 0.46803441643714905, "learning_rate": 0.0005930683651627569, "loss": 1.9415, "step": 6182 }, { "epoch": 0.21, "grad_norm": 0.48960745334625244, "learning_rate": 0.0005930661311390192, "loss": 1.9671, "step": 6183 }, { "epoch": 0.21, "grad_norm": 0.4704042673110962, "learning_rate": 0.0005930638967595424, "loss": 1.9093, "step": 6184 }, { "epoch": 0.21, "grad_norm": 0.4831089973449707, "learning_rate": 0.0005930616620243291, "loss": 1.8398, "step": 6185 }, { "epoch": 0.21, "grad_norm": 0.49155160784721375, "learning_rate": 0.0005930594269333823, "loss": 2.0019, "step": 6186 }, { "epoch": 0.21, "grad_norm": 0.4637356400489807, "learning_rate": 0.0005930571914867043, "loss": 1.9406, "step": 6187 }, { "epoch": 0.21, "grad_norm": 0.4817851483821869, "learning_rate": 0.0005930549556842983, "loss": 1.8767, "step": 6188 }, { "epoch": 0.21, "grad_norm": 0.488059937953949, "learning_rate": 0.0005930527195261667, "loss": 1.9101, "step": 6189 }, { "epoch": 0.21, "grad_norm": 0.4873766601085663, "learning_rate": 0.0005930504830123122, "loss": 1.8754, "step": 6190 }, { "epoch": 0.21, "grad_norm": 0.45721790194511414, "learning_rate": 0.0005930482461427376, "loss": 1.8712, "step": 6191 }, { "epoch": 0.21, "grad_norm": 0.4779190123081207, "learning_rate": 0.0005930460089174456, "loss": 1.9927, "step": 6192 }, { "epoch": 0.21, "grad_norm": 0.4804054796695709, "learning_rate": 0.0005930437713364391, "loss": 1.9566, "step": 6193 }, { "epoch": 0.21, "grad_norm": 0.48801955580711365, "learning_rate": 0.0005930415333997204, "loss": 1.9275, "step": 6194 }, { "epoch": 0.21, "grad_norm": 0.46087679266929626, "learning_rate": 0.0005930392951072926, "loss": 1.9757, "step": 6195 }, { "epoch": 0.21, "grad_norm": 0.46486079692840576, "learning_rate": 0.0005930370564591582, "loss": 1.9055, "step": 6196 }, { "epoch": 0.21, "grad_norm": 0.4957406222820282, "learning_rate": 0.00059303481745532, "loss": 2.0506, "step": 6197 }, { "epoch": 0.21, "grad_norm": 0.49826452136039734, "learning_rate": 0.0005930325780957807, "loss": 1.9819, "step": 6198 }, { "epoch": 0.21, "grad_norm": 0.48139095306396484, "learning_rate": 0.0005930303383805431, "loss": 1.9791, "step": 6199 }, { "epoch": 0.21, "grad_norm": 0.46617481112480164, "learning_rate": 0.0005930280983096099, "loss": 1.8809, "step": 6200 }, { "epoch": 0.21, "grad_norm": 0.4929404556751251, "learning_rate": 0.0005930258578829837, "loss": 1.9909, "step": 6201 }, { "epoch": 0.21, "grad_norm": 0.5039655566215515, "learning_rate": 0.0005930236171006671, "loss": 1.9928, "step": 6202 }, { "epoch": 0.21, "grad_norm": 0.4721285104751587, "learning_rate": 0.0005930213759626632, "loss": 1.9872, "step": 6203 }, { "epoch": 0.21, "grad_norm": 0.4751516282558441, "learning_rate": 0.0005930191344689746, "loss": 1.8829, "step": 6204 }, { "epoch": 0.21, "grad_norm": 0.48073238134384155, "learning_rate": 0.0005930168926196037, "loss": 1.9811, "step": 6205 }, { "epoch": 0.21, "grad_norm": 0.4757058918476105, "learning_rate": 0.0005930146504145536, "loss": 2.009, "step": 6206 }, { "epoch": 0.21, "grad_norm": 0.47555455565452576, "learning_rate": 0.000593012407853827, "loss": 1.9551, "step": 6207 }, { "epoch": 0.21, "grad_norm": 0.5224460959434509, "learning_rate": 0.0005930101649374263, "loss": 1.9769, "step": 6208 }, { "epoch": 0.21, "grad_norm": 0.5003413558006287, "learning_rate": 0.0005930079216653545, "loss": 2.0277, "step": 6209 }, { "epoch": 0.21, "grad_norm": 0.4692377746105194, "learning_rate": 0.0005930056780376143, "loss": 1.96, "step": 6210 }, { "epoch": 0.21, "grad_norm": 0.4829942286014557, "learning_rate": 0.0005930034340542084, "loss": 1.9726, "step": 6211 }, { "epoch": 0.21, "grad_norm": 0.4965220093727112, "learning_rate": 0.0005930011897151394, "loss": 1.9659, "step": 6212 }, { "epoch": 0.21, "grad_norm": 0.476300448179245, "learning_rate": 0.0005929989450204102, "loss": 1.8715, "step": 6213 }, { "epoch": 0.21, "grad_norm": 0.48214608430862427, "learning_rate": 0.0005929966999700234, "loss": 1.9083, "step": 6214 }, { "epoch": 0.21, "grad_norm": 0.4725956320762634, "learning_rate": 0.0005929944545639818, "loss": 1.9728, "step": 6215 }, { "epoch": 0.21, "grad_norm": 0.4761309027671814, "learning_rate": 0.0005929922088022882, "loss": 1.9428, "step": 6216 }, { "epoch": 0.21, "grad_norm": 0.48200976848602295, "learning_rate": 0.000592989962684945, "loss": 1.8888, "step": 6217 }, { "epoch": 0.21, "grad_norm": 0.4866490662097931, "learning_rate": 0.0005929877162119554, "loss": 1.9181, "step": 6218 }, { "epoch": 0.21, "grad_norm": 0.5158212184906006, "learning_rate": 0.0005929854693833218, "loss": 1.979, "step": 6219 }, { "epoch": 0.21, "grad_norm": 0.4705657958984375, "learning_rate": 0.000592983222199047, "loss": 1.9, "step": 6220 }, { "epoch": 0.21, "grad_norm": 0.48740437626838684, "learning_rate": 0.0005929809746591337, "loss": 1.9405, "step": 6221 }, { "epoch": 0.21, "grad_norm": 0.48574522137641907, "learning_rate": 0.0005929787267635847, "loss": 1.9311, "step": 6222 }, { "epoch": 0.21, "grad_norm": 0.5235042572021484, "learning_rate": 0.0005929764785124027, "loss": 1.9444, "step": 6223 }, { "epoch": 0.21, "grad_norm": 0.48842287063598633, "learning_rate": 0.0005929742299055905, "loss": 1.9215, "step": 6224 }, { "epoch": 0.21, "grad_norm": 0.4770457446575165, "learning_rate": 0.0005929719809431506, "loss": 1.9084, "step": 6225 }, { "epoch": 0.21, "grad_norm": 0.5261978507041931, "learning_rate": 0.000592969731625086, "loss": 1.9918, "step": 6226 }, { "epoch": 0.21, "grad_norm": 0.48629721999168396, "learning_rate": 0.0005929674819513993, "loss": 1.937, "step": 6227 }, { "epoch": 0.21, "grad_norm": 0.5117194056510925, "learning_rate": 0.0005929652319220931, "loss": 1.9184, "step": 6228 }, { "epoch": 0.21, "grad_norm": 0.48246803879737854, "learning_rate": 0.0005929629815371704, "loss": 1.9151, "step": 6229 }, { "epoch": 0.21, "grad_norm": 0.48884710669517517, "learning_rate": 0.0005929607307966338, "loss": 1.9012, "step": 6230 }, { "epoch": 0.21, "grad_norm": 0.46962714195251465, "learning_rate": 0.000592958479700486, "loss": 1.9634, "step": 6231 }, { "epoch": 0.21, "grad_norm": 0.5036382079124451, "learning_rate": 0.0005929562282487298, "loss": 1.9422, "step": 6232 }, { "epoch": 0.21, "grad_norm": 0.49271389842033386, "learning_rate": 0.0005929539764413679, "loss": 1.9098, "step": 6233 }, { "epoch": 0.21, "grad_norm": 0.48027920722961426, "learning_rate": 0.0005929517242784031, "loss": 1.9797, "step": 6234 }, { "epoch": 0.21, "grad_norm": 0.4973590672016144, "learning_rate": 0.000592949471759838, "loss": 1.9938, "step": 6235 }, { "epoch": 0.21, "grad_norm": 0.5002408027648926, "learning_rate": 0.0005929472188856755, "loss": 1.9695, "step": 6236 }, { "epoch": 0.21, "grad_norm": 0.4855836033821106, "learning_rate": 0.0005929449656559181, "loss": 1.982, "step": 6237 }, { "epoch": 0.21, "grad_norm": 0.47152653336524963, "learning_rate": 0.0005929427120705687, "loss": 1.936, "step": 6238 }, { "epoch": 0.21, "grad_norm": 0.47915929555892944, "learning_rate": 0.0005929404581296301, "loss": 1.9612, "step": 6239 }, { "epoch": 0.21, "grad_norm": 0.48273199796676636, "learning_rate": 0.0005929382038331048, "loss": 1.9056, "step": 6240 }, { "epoch": 0.21, "grad_norm": 0.49931877851486206, "learning_rate": 0.0005929359491809958, "loss": 1.9556, "step": 6241 }, { "epoch": 0.21, "grad_norm": 0.4889819025993347, "learning_rate": 0.0005929336941733057, "loss": 1.9789, "step": 6242 }, { "epoch": 0.21, "grad_norm": 0.49150988459587097, "learning_rate": 0.0005929314388100372, "loss": 1.9346, "step": 6243 }, { "epoch": 0.21, "grad_norm": 0.49159014225006104, "learning_rate": 0.0005929291830911931, "loss": 2.0045, "step": 6244 }, { "epoch": 0.21, "grad_norm": 0.5578116178512573, "learning_rate": 0.0005929269270167762, "loss": 1.9331, "step": 6245 }, { "epoch": 0.21, "grad_norm": 0.4866790473461151, "learning_rate": 0.0005929246705867892, "loss": 1.975, "step": 6246 }, { "epoch": 0.21, "grad_norm": 0.4696187376976013, "learning_rate": 0.0005929224138012348, "loss": 1.9065, "step": 6247 }, { "epoch": 0.21, "grad_norm": 0.47233203053474426, "learning_rate": 0.0005929201566601156, "loss": 2.0278, "step": 6248 }, { "epoch": 0.21, "grad_norm": 0.49607595801353455, "learning_rate": 0.0005929178991634347, "loss": 1.8966, "step": 6249 }, { "epoch": 0.21, "grad_norm": 0.5213910937309265, "learning_rate": 0.0005929156413111944, "loss": 1.9233, "step": 6250 }, { "epoch": 0.21, "grad_norm": 0.4835604131221771, "learning_rate": 0.0005929133831033979, "loss": 1.9139, "step": 6251 }, { "epoch": 0.21, "grad_norm": 0.4956282079219818, "learning_rate": 0.0005929111245400477, "loss": 1.9256, "step": 6252 }, { "epoch": 0.21, "grad_norm": 0.5109810829162598, "learning_rate": 0.0005929088656211463, "loss": 1.914, "step": 6253 }, { "epoch": 0.21, "grad_norm": 0.5033015012741089, "learning_rate": 0.0005929066063466968, "loss": 1.8771, "step": 6254 }, { "epoch": 0.21, "grad_norm": 0.4707202613353729, "learning_rate": 0.000592904346716702, "loss": 1.9006, "step": 6255 }, { "epoch": 0.21, "grad_norm": 0.4754880964756012, "learning_rate": 0.0005929020867311643, "loss": 1.9827, "step": 6256 }, { "epoch": 0.21, "grad_norm": 0.4800430238246918, "learning_rate": 0.0005928998263900868, "loss": 1.9875, "step": 6257 }, { "epoch": 0.21, "grad_norm": 0.47819459438323975, "learning_rate": 0.000592897565693472, "loss": 1.9406, "step": 6258 }, { "epoch": 0.21, "grad_norm": 0.480609267950058, "learning_rate": 0.0005928953046413226, "loss": 1.9644, "step": 6259 }, { "epoch": 0.21, "grad_norm": 0.5063654780387878, "learning_rate": 0.0005928930432336415, "loss": 1.9051, "step": 6260 }, { "epoch": 0.21, "grad_norm": 0.4714421033859253, "learning_rate": 0.0005928907814704315, "loss": 1.9291, "step": 6261 }, { "epoch": 0.21, "grad_norm": 0.487062007188797, "learning_rate": 0.0005928885193516952, "loss": 1.8462, "step": 6262 }, { "epoch": 0.21, "grad_norm": 0.49309980869293213, "learning_rate": 0.0005928862568774354, "loss": 1.9683, "step": 6263 }, { "epoch": 0.21, "grad_norm": 0.49531644582748413, "learning_rate": 0.0005928839940476547, "loss": 1.9383, "step": 6264 }, { "epoch": 0.21, "grad_norm": 0.488079309463501, "learning_rate": 0.0005928817308623562, "loss": 2.0014, "step": 6265 }, { "epoch": 0.21, "grad_norm": 0.48722460865974426, "learning_rate": 0.0005928794673215423, "loss": 2.0138, "step": 6266 }, { "epoch": 0.21, "grad_norm": 0.46756577491760254, "learning_rate": 0.0005928772034252159, "loss": 2.0041, "step": 6267 }, { "epoch": 0.21, "grad_norm": 0.4882209300994873, "learning_rate": 0.0005928749391733798, "loss": 1.9146, "step": 6268 }, { "epoch": 0.21, "grad_norm": 0.49635472893714905, "learning_rate": 0.0005928726745660365, "loss": 2.0033, "step": 6269 }, { "epoch": 0.21, "grad_norm": 0.4834516942501068, "learning_rate": 0.000592870409603189, "loss": 1.9142, "step": 6270 }, { "epoch": 0.21, "grad_norm": 0.484559565782547, "learning_rate": 0.0005928681442848401, "loss": 2.002, "step": 6271 }, { "epoch": 0.21, "grad_norm": 0.49171215295791626, "learning_rate": 0.0005928658786109924, "loss": 1.9171, "step": 6272 }, { "epoch": 0.21, "grad_norm": 0.48876604437828064, "learning_rate": 0.0005928636125816486, "loss": 1.9454, "step": 6273 }, { "epoch": 0.21, "grad_norm": 0.46696752309799194, "learning_rate": 0.0005928613461968115, "loss": 1.95, "step": 6274 }, { "epoch": 0.21, "grad_norm": 0.4929688572883606, "learning_rate": 0.000592859079456484, "loss": 1.9308, "step": 6275 }, { "epoch": 0.21, "grad_norm": 0.4736703634262085, "learning_rate": 0.0005928568123606686, "loss": 1.8942, "step": 6276 }, { "epoch": 0.21, "grad_norm": 0.48704639077186584, "learning_rate": 0.0005928545449093683, "loss": 1.9446, "step": 6277 }, { "epoch": 0.21, "grad_norm": 0.4883759915828705, "learning_rate": 0.0005928522771025856, "loss": 2.023, "step": 6278 }, { "epoch": 0.21, "grad_norm": 0.4914674162864685, "learning_rate": 0.0005928500089403234, "loss": 1.9715, "step": 6279 }, { "epoch": 0.21, "grad_norm": 0.4812150001525879, "learning_rate": 0.0005928477404225845, "loss": 1.8984, "step": 6280 }, { "epoch": 0.21, "grad_norm": 0.49529361724853516, "learning_rate": 0.0005928454715493716, "loss": 1.9803, "step": 6281 }, { "epoch": 0.21, "grad_norm": 0.4876212477684021, "learning_rate": 0.0005928432023206875, "loss": 1.9326, "step": 6282 }, { "epoch": 0.21, "grad_norm": 0.4983762204647064, "learning_rate": 0.0005928409327365348, "loss": 1.9859, "step": 6283 }, { "epoch": 0.21, "grad_norm": 0.481859415769577, "learning_rate": 0.0005928386627969164, "loss": 2.0243, "step": 6284 }, { "epoch": 0.21, "grad_norm": 0.48034632205963135, "learning_rate": 0.000592836392501835, "loss": 1.9787, "step": 6285 }, { "epoch": 0.21, "grad_norm": 0.5027793645858765, "learning_rate": 0.0005928341218512934, "loss": 2.0221, "step": 6286 }, { "epoch": 0.21, "grad_norm": 0.49513697624206543, "learning_rate": 0.0005928318508452942, "loss": 2.0695, "step": 6287 }, { "epoch": 0.21, "grad_norm": 0.5268055200576782, "learning_rate": 0.0005928295794838404, "loss": 1.9443, "step": 6288 }, { "epoch": 0.21, "grad_norm": 0.5227241516113281, "learning_rate": 0.0005928273077669346, "loss": 1.9341, "step": 6289 }, { "epoch": 0.21, "grad_norm": 0.6302674412727356, "learning_rate": 0.0005928250356945795, "loss": 1.9403, "step": 6290 }, { "epoch": 0.21, "grad_norm": 0.502189576625824, "learning_rate": 0.0005928227632667781, "loss": 1.9187, "step": 6291 }, { "epoch": 0.21, "grad_norm": 0.5183398723602295, "learning_rate": 0.0005928204904835329, "loss": 1.9457, "step": 6292 }, { "epoch": 0.21, "grad_norm": 0.4913739860057831, "learning_rate": 0.0005928182173448468, "loss": 1.9844, "step": 6293 }, { "epoch": 0.21, "grad_norm": 0.49850744009017944, "learning_rate": 0.0005928159438507227, "loss": 1.9316, "step": 6294 }, { "epoch": 0.21, "grad_norm": 0.4834568202495575, "learning_rate": 0.0005928136700011629, "loss": 1.9486, "step": 6295 }, { "epoch": 0.21, "grad_norm": 0.48403945565223694, "learning_rate": 0.0005928113957961706, "loss": 1.99, "step": 6296 }, { "epoch": 0.21, "grad_norm": 0.489739328622818, "learning_rate": 0.0005928091212357485, "loss": 1.9933, "step": 6297 }, { "epoch": 0.21, "grad_norm": 0.4983084797859192, "learning_rate": 0.0005928068463198991, "loss": 1.9588, "step": 6298 }, { "epoch": 0.21, "grad_norm": 0.48923036456108093, "learning_rate": 0.0005928045710486254, "loss": 1.9923, "step": 6299 }, { "epoch": 0.21, "grad_norm": 0.4805993437767029, "learning_rate": 0.0005928022954219301, "loss": 2.0448, "step": 6300 }, { "epoch": 0.21, "grad_norm": 0.4799623489379883, "learning_rate": 0.000592800019439816, "loss": 1.9125, "step": 6301 }, { "epoch": 0.21, "grad_norm": 0.47428181767463684, "learning_rate": 0.0005927977431022858, "loss": 2.0007, "step": 6302 }, { "epoch": 0.21, "grad_norm": 0.5097203254699707, "learning_rate": 0.0005927954664093423, "loss": 1.9778, "step": 6303 }, { "epoch": 0.21, "grad_norm": 0.5059235095977783, "learning_rate": 0.0005927931893609881, "loss": 1.9678, "step": 6304 }, { "epoch": 0.21, "grad_norm": 0.47373199462890625, "learning_rate": 0.0005927909119572263, "loss": 1.9769, "step": 6305 }, { "epoch": 0.21, "grad_norm": 0.46496641635894775, "learning_rate": 0.0005927886341980593, "loss": 1.8803, "step": 6306 }, { "epoch": 0.21, "grad_norm": 0.4967469274997711, "learning_rate": 0.0005927863560834902, "loss": 1.8725, "step": 6307 }, { "epoch": 0.21, "grad_norm": 0.4738098978996277, "learning_rate": 0.0005927840776135215, "loss": 1.9026, "step": 6308 }, { "epoch": 0.21, "grad_norm": 0.46003809571266174, "learning_rate": 0.0005927817987881561, "loss": 1.9433, "step": 6309 }, { "epoch": 0.21, "grad_norm": 0.5000137090682983, "learning_rate": 0.0005927795196073967, "loss": 1.9308, "step": 6310 }, { "epoch": 0.21, "grad_norm": 0.4849227964878082, "learning_rate": 0.0005927772400712461, "loss": 1.8995, "step": 6311 }, { "epoch": 0.21, "grad_norm": 0.47669899463653564, "learning_rate": 0.0005927749601797071, "loss": 1.9599, "step": 6312 }, { "epoch": 0.21, "grad_norm": 0.506626307964325, "learning_rate": 0.0005927726799327825, "loss": 1.9613, "step": 6313 }, { "epoch": 0.21, "grad_norm": 0.4890126883983612, "learning_rate": 0.0005927703993304749, "loss": 1.9909, "step": 6314 }, { "epoch": 0.21, "grad_norm": 0.5021262168884277, "learning_rate": 0.0005927681183727872, "loss": 1.9593, "step": 6315 }, { "epoch": 0.21, "grad_norm": 0.4762481451034546, "learning_rate": 0.0005927658370597222, "loss": 1.9591, "step": 6316 }, { "epoch": 0.21, "grad_norm": 0.4657618999481201, "learning_rate": 0.0005927635553912825, "loss": 1.9591, "step": 6317 }, { "epoch": 0.21, "grad_norm": 0.48315948247909546, "learning_rate": 0.000592761273367471, "loss": 1.9581, "step": 6318 }, { "epoch": 0.21, "grad_norm": 0.5207525491714478, "learning_rate": 0.0005927589909882905, "loss": 1.9496, "step": 6319 }, { "epoch": 0.21, "grad_norm": 0.4651345908641815, "learning_rate": 0.0005927567082537438, "loss": 1.9651, "step": 6320 }, { "epoch": 0.21, "grad_norm": 0.4950130879878998, "learning_rate": 0.0005927544251638335, "loss": 1.9385, "step": 6321 }, { "epoch": 0.21, "grad_norm": 0.4720354974269867, "learning_rate": 0.0005927521417185626, "loss": 1.9949, "step": 6322 }, { "epoch": 0.21, "grad_norm": 0.4874953329563141, "learning_rate": 0.0005927498579179335, "loss": 2.0014, "step": 6323 }, { "epoch": 0.21, "grad_norm": 0.48169928789138794, "learning_rate": 0.0005927475737619493, "loss": 2.0134, "step": 6324 }, { "epoch": 0.21, "grad_norm": 0.48782679438591003, "learning_rate": 0.0005927452892506127, "loss": 1.9911, "step": 6325 }, { "epoch": 0.21, "grad_norm": 0.4688500761985779, "learning_rate": 0.0005927430043839264, "loss": 1.999, "step": 6326 }, { "epoch": 0.21, "grad_norm": 0.4877783954143524, "learning_rate": 0.0005927407191618934, "loss": 1.9571, "step": 6327 }, { "epoch": 0.21, "grad_norm": 0.482928067445755, "learning_rate": 0.000592738433584516, "loss": 1.961, "step": 6328 }, { "epoch": 0.21, "grad_norm": 0.4766257107257843, "learning_rate": 0.0005927361476517975, "loss": 1.9534, "step": 6329 }, { "epoch": 0.21, "grad_norm": 0.4758152365684509, "learning_rate": 0.0005927338613637406, "loss": 1.8815, "step": 6330 }, { "epoch": 0.21, "grad_norm": 0.48347657918930054, "learning_rate": 0.0005927315747203476, "loss": 2.0182, "step": 6331 }, { "epoch": 0.21, "grad_norm": 0.4769449234008789, "learning_rate": 0.0005927292877216218, "loss": 1.9555, "step": 6332 }, { "epoch": 0.21, "grad_norm": 0.47619688510894775, "learning_rate": 0.0005927270003675656, "loss": 1.9471, "step": 6333 }, { "epoch": 0.21, "grad_norm": 0.47515416145324707, "learning_rate": 0.000592724712658182, "loss": 1.8727, "step": 6334 }, { "epoch": 0.21, "grad_norm": 0.47918811440467834, "learning_rate": 0.0005927224245934738, "loss": 1.9978, "step": 6335 }, { "epoch": 0.21, "grad_norm": 0.4716680347919464, "learning_rate": 0.0005927201361734438, "loss": 1.8898, "step": 6336 }, { "epoch": 0.21, "grad_norm": 0.4892192482948303, "learning_rate": 0.0005927178473980946, "loss": 2.0169, "step": 6337 }, { "epoch": 0.21, "grad_norm": 0.4986317455768585, "learning_rate": 0.000592715558267429, "loss": 1.9713, "step": 6338 }, { "epoch": 0.21, "grad_norm": 0.473745197057724, "learning_rate": 0.0005927132687814498, "loss": 2.0082, "step": 6339 }, { "epoch": 0.21, "grad_norm": 0.5097190737724304, "learning_rate": 0.0005927109789401599, "loss": 1.9621, "step": 6340 }, { "epoch": 0.21, "grad_norm": 0.47982197999954224, "learning_rate": 0.000592708688743562, "loss": 1.9513, "step": 6341 }, { "epoch": 0.21, "grad_norm": 0.4868636727333069, "learning_rate": 0.0005927063981916589, "loss": 1.9543, "step": 6342 }, { "epoch": 0.21, "grad_norm": 0.4743725657463074, "learning_rate": 0.0005927041072844533, "loss": 1.946, "step": 6343 }, { "epoch": 0.21, "grad_norm": 0.4889511466026306, "learning_rate": 0.0005927018160219481, "loss": 1.8975, "step": 6344 }, { "epoch": 0.21, "grad_norm": 0.4784942865371704, "learning_rate": 0.000592699524404146, "loss": 1.9735, "step": 6345 }, { "epoch": 0.21, "grad_norm": 0.48686790466308594, "learning_rate": 0.0005926972324310499, "loss": 1.9734, "step": 6346 }, { "epoch": 0.21, "grad_norm": 0.4974079728126526, "learning_rate": 0.0005926949401026623, "loss": 1.8845, "step": 6347 }, { "epoch": 0.21, "grad_norm": 0.48112860321998596, "learning_rate": 0.0005926926474189863, "loss": 1.9355, "step": 6348 }, { "epoch": 0.21, "grad_norm": 0.50291907787323, "learning_rate": 0.0005926903543800246, "loss": 1.9203, "step": 6349 }, { "epoch": 0.21, "grad_norm": 0.4783704876899719, "learning_rate": 0.0005926880609857798, "loss": 1.9321, "step": 6350 }, { "epoch": 0.21, "grad_norm": 0.49332985281944275, "learning_rate": 0.0005926857672362549, "loss": 1.8917, "step": 6351 }, { "epoch": 0.21, "grad_norm": 0.636344850063324, "learning_rate": 0.0005926834731314525, "loss": 1.9594, "step": 6352 }, { "epoch": 0.21, "grad_norm": 0.47778090834617615, "learning_rate": 0.0005926811786713756, "loss": 1.9518, "step": 6353 }, { "epoch": 0.21, "grad_norm": 0.4749651849269867, "learning_rate": 0.0005926788838560268, "loss": 1.8897, "step": 6354 }, { "epoch": 0.21, "grad_norm": 0.4824172854423523, "learning_rate": 0.0005926765886854091, "loss": 1.8959, "step": 6355 }, { "epoch": 0.21, "grad_norm": 0.4912189245223999, "learning_rate": 0.000592674293159525, "loss": 1.865, "step": 6356 }, { "epoch": 0.21, "grad_norm": 0.48049238324165344, "learning_rate": 0.0005926719972783775, "loss": 1.9557, "step": 6357 }, { "epoch": 0.21, "grad_norm": 0.491872102022171, "learning_rate": 0.0005926697010419693, "loss": 1.9382, "step": 6358 }, { "epoch": 0.21, "grad_norm": 0.4706098735332489, "learning_rate": 0.0005926674044503033, "loss": 1.9124, "step": 6359 }, { "epoch": 0.21, "grad_norm": 0.5977447032928467, "learning_rate": 0.0005926651075033821, "loss": 1.8772, "step": 6360 }, { "epoch": 0.21, "grad_norm": 0.4717806875705719, "learning_rate": 0.0005926628102012085, "loss": 1.9192, "step": 6361 }, { "epoch": 0.21, "grad_norm": 0.4730120599269867, "learning_rate": 0.0005926605125437855, "loss": 1.9698, "step": 6362 }, { "epoch": 0.21, "grad_norm": 0.4901424050331116, "learning_rate": 0.0005926582145311157, "loss": 1.9998, "step": 6363 }, { "epoch": 0.21, "grad_norm": 0.4884519577026367, "learning_rate": 0.0005926559161632019, "loss": 1.9865, "step": 6364 }, { "epoch": 0.21, "grad_norm": 0.47645753622055054, "learning_rate": 0.000592653617440047, "loss": 1.9797, "step": 6365 }, { "epoch": 0.21, "grad_norm": 0.4629265069961548, "learning_rate": 0.0005926513183616537, "loss": 1.991, "step": 6366 }, { "epoch": 0.21, "grad_norm": 0.47494399547576904, "learning_rate": 0.0005926490189280249, "loss": 1.9742, "step": 6367 }, { "epoch": 0.21, "grad_norm": 0.4767332673072815, "learning_rate": 0.0005926467191391632, "loss": 1.9907, "step": 6368 }, { "epoch": 0.21, "grad_norm": 0.48055851459503174, "learning_rate": 0.0005926444189950715, "loss": 2.0201, "step": 6369 }, { "epoch": 0.21, "grad_norm": 0.46567249298095703, "learning_rate": 0.0005926421184957527, "loss": 1.9521, "step": 6370 }, { "epoch": 0.21, "grad_norm": 0.4679515063762665, "learning_rate": 0.0005926398176412095, "loss": 1.8975, "step": 6371 }, { "epoch": 0.21, "grad_norm": 0.4639294147491455, "learning_rate": 0.0005926375164314446, "loss": 1.8982, "step": 6372 }, { "epoch": 0.21, "grad_norm": 0.4812804162502289, "learning_rate": 0.0005926352148664608, "loss": 1.9507, "step": 6373 }, { "epoch": 0.21, "grad_norm": 0.4778917729854584, "learning_rate": 0.0005926329129462612, "loss": 1.9809, "step": 6374 }, { "epoch": 0.21, "grad_norm": 0.4511943757534027, "learning_rate": 0.0005926306106708482, "loss": 1.8817, "step": 6375 }, { "epoch": 0.21, "grad_norm": 0.5244089365005493, "learning_rate": 0.0005926283080402247, "loss": 1.9535, "step": 6376 }, { "epoch": 0.21, "grad_norm": 0.4775699973106384, "learning_rate": 0.0005926260050543936, "loss": 1.9296, "step": 6377 }, { "epoch": 0.21, "grad_norm": 0.476171612739563, "learning_rate": 0.0005926237017133577, "loss": 1.9912, "step": 6378 }, { "epoch": 0.21, "grad_norm": 0.46695077419281006, "learning_rate": 0.0005926213980171198, "loss": 1.9167, "step": 6379 }, { "epoch": 0.21, "grad_norm": 0.4920085370540619, "learning_rate": 0.0005926190939656825, "loss": 1.8998, "step": 6380 }, { "epoch": 0.21, "grad_norm": 0.5153905153274536, "learning_rate": 0.0005926167895590487, "loss": 1.9456, "step": 6381 }, { "epoch": 0.21, "grad_norm": 0.47403109073638916, "learning_rate": 0.0005926144847972213, "loss": 1.9513, "step": 6382 }, { "epoch": 0.21, "grad_norm": 0.4742797315120697, "learning_rate": 0.0005926121796802032, "loss": 1.8875, "step": 6383 }, { "epoch": 0.21, "grad_norm": 0.47890210151672363, "learning_rate": 0.0005926098742079968, "loss": 1.9011, "step": 6384 }, { "epoch": 0.21, "grad_norm": 0.48935893177986145, "learning_rate": 0.0005926075683806053, "loss": 1.8796, "step": 6385 }, { "epoch": 0.21, "grad_norm": 0.4829104542732239, "learning_rate": 0.0005926052621980311, "loss": 2.0364, "step": 6386 }, { "epoch": 0.21, "grad_norm": 0.4807887375354767, "learning_rate": 0.0005926029556602774, "loss": 1.8819, "step": 6387 }, { "epoch": 0.21, "grad_norm": 0.5236507654190063, "learning_rate": 0.0005926006487673468, "loss": 1.9334, "step": 6388 }, { "epoch": 0.21, "grad_norm": 0.4851180911064148, "learning_rate": 0.0005925983415192421, "loss": 1.9933, "step": 6389 }, { "epoch": 0.21, "grad_norm": 0.5058286190032959, "learning_rate": 0.0005925960339159661, "loss": 2.0323, "step": 6390 }, { "epoch": 0.21, "grad_norm": 0.5043926239013672, "learning_rate": 0.0005925937259575217, "loss": 1.9445, "step": 6391 }, { "epoch": 0.21, "grad_norm": 0.49419236183166504, "learning_rate": 0.0005925914176439116, "loss": 2.0131, "step": 6392 }, { "epoch": 0.21, "grad_norm": 0.48740309476852417, "learning_rate": 0.0005925891089751386, "loss": 1.9843, "step": 6393 }, { "epoch": 0.21, "grad_norm": 0.5268438458442688, "learning_rate": 0.0005925867999512055, "loss": 1.9943, "step": 6394 }, { "epoch": 0.21, "grad_norm": 0.5016869902610779, "learning_rate": 0.0005925844905721152, "loss": 1.8914, "step": 6395 }, { "epoch": 0.21, "grad_norm": 0.4785556495189667, "learning_rate": 0.0005925821808378704, "loss": 2.0209, "step": 6396 }, { "epoch": 0.21, "grad_norm": 0.4815489947795868, "learning_rate": 0.0005925798707484741, "loss": 1.9246, "step": 6397 }, { "epoch": 0.21, "grad_norm": 0.5440565943717957, "learning_rate": 0.0005925775603039287, "loss": 1.941, "step": 6398 }, { "epoch": 0.21, "grad_norm": 0.5054337978363037, "learning_rate": 0.0005925752495042374, "loss": 1.9482, "step": 6399 }, { "epoch": 0.21, "grad_norm": 0.47375571727752686, "learning_rate": 0.0005925729383494028, "loss": 1.9721, "step": 6400 }, { "epoch": 0.21, "grad_norm": 0.5098514556884766, "learning_rate": 0.0005925706268394277, "loss": 2.0059, "step": 6401 }, { "epoch": 0.21, "grad_norm": 0.4930720925331116, "learning_rate": 0.0005925683149743152, "loss": 1.9459, "step": 6402 }, { "epoch": 0.21, "grad_norm": 0.4885731339454651, "learning_rate": 0.0005925660027540677, "loss": 1.9526, "step": 6403 }, { "epoch": 0.21, "grad_norm": 0.4980004131793976, "learning_rate": 0.0005925636901786882, "loss": 1.9417, "step": 6404 }, { "epoch": 0.21, "grad_norm": 0.4959547817707062, "learning_rate": 0.0005925613772481794, "loss": 2.0954, "step": 6405 }, { "epoch": 0.21, "grad_norm": 0.4855136573314667, "learning_rate": 0.0005925590639625443, "loss": 1.9102, "step": 6406 }, { "epoch": 0.21, "grad_norm": 0.4881640076637268, "learning_rate": 0.0005925567503217857, "loss": 1.9144, "step": 6407 }, { "epoch": 0.21, "grad_norm": 0.4754008948802948, "learning_rate": 0.0005925544363259061, "loss": 1.9732, "step": 6408 }, { "epoch": 0.21, "grad_norm": 0.49048683047294617, "learning_rate": 0.0005925521219749087, "loss": 1.9563, "step": 6409 }, { "epoch": 0.21, "grad_norm": 0.4873412251472473, "learning_rate": 0.000592549807268796, "loss": 1.976, "step": 6410 }, { "epoch": 0.21, "grad_norm": 0.4964951276779175, "learning_rate": 0.0005925474922075709, "loss": 1.8925, "step": 6411 }, { "epoch": 0.21, "grad_norm": 0.49601998925209045, "learning_rate": 0.0005925451767912365, "loss": 1.9476, "step": 6412 }, { "epoch": 0.21, "grad_norm": 0.4681278467178345, "learning_rate": 0.0005925428610197951, "loss": 1.8985, "step": 6413 }, { "epoch": 0.21, "grad_norm": 0.4789222776889801, "learning_rate": 0.0005925405448932499, "loss": 1.985, "step": 6414 }, { "epoch": 0.21, "grad_norm": 0.4716968536376953, "learning_rate": 0.0005925382284116036, "loss": 1.9917, "step": 6415 }, { "epoch": 0.21, "grad_norm": 0.4786698818206787, "learning_rate": 0.0005925359115748588, "loss": 1.9619, "step": 6416 }, { "epoch": 0.21, "grad_norm": 0.49531289935112, "learning_rate": 0.0005925335943830187, "loss": 2.006, "step": 6417 }, { "epoch": 0.21, "grad_norm": 0.48859304189682007, "learning_rate": 0.0005925312768360859, "loss": 1.8976, "step": 6418 }, { "epoch": 0.21, "grad_norm": 0.5079895257949829, "learning_rate": 0.0005925289589340633, "loss": 1.9836, "step": 6419 }, { "epoch": 0.21, "grad_norm": 0.47471362352371216, "learning_rate": 0.0005925266406769535, "loss": 1.9775, "step": 6420 }, { "epoch": 0.21, "grad_norm": 0.48502489924430847, "learning_rate": 0.0005925243220647595, "loss": 1.9686, "step": 6421 }, { "epoch": 0.21, "grad_norm": 0.5189517140388489, "learning_rate": 0.000592522003097484, "loss": 1.8753, "step": 6422 }, { "epoch": 0.21, "grad_norm": 0.48012590408325195, "learning_rate": 0.0005925196837751301, "loss": 2.0279, "step": 6423 }, { "epoch": 0.21, "grad_norm": 0.4899842143058777, "learning_rate": 0.0005925173640977002, "loss": 1.9462, "step": 6424 }, { "epoch": 0.21, "grad_norm": 0.4898128807544708, "learning_rate": 0.0005925150440651974, "loss": 1.959, "step": 6425 }, { "epoch": 0.21, "grad_norm": 0.5065248012542725, "learning_rate": 0.0005925127236776245, "loss": 1.9561, "step": 6426 }, { "epoch": 0.21, "grad_norm": 0.4955993890762329, "learning_rate": 0.0005925104029349841, "loss": 1.9455, "step": 6427 }, { "epoch": 0.21, "grad_norm": 0.49403199553489685, "learning_rate": 0.0005925080818372792, "loss": 1.9585, "step": 6428 }, { "epoch": 0.21, "grad_norm": 0.5050949454307556, "learning_rate": 0.0005925057603845127, "loss": 1.9872, "step": 6429 }, { "epoch": 0.21, "grad_norm": 0.48425737023353577, "learning_rate": 0.0005925034385766872, "loss": 1.897, "step": 6430 }, { "epoch": 0.21, "grad_norm": 0.48280832171440125, "learning_rate": 0.0005925011164138057, "loss": 1.897, "step": 6431 }, { "epoch": 0.21, "grad_norm": 0.4962210953235626, "learning_rate": 0.000592498793895871, "loss": 1.899, "step": 6432 }, { "epoch": 0.21, "grad_norm": 0.49383577704429626, "learning_rate": 0.0005924964710228857, "loss": 1.8622, "step": 6433 }, { "epoch": 0.21, "grad_norm": 0.49864819645881653, "learning_rate": 0.0005924941477948529, "loss": 1.8673, "step": 6434 }, { "epoch": 0.21, "grad_norm": 0.49130916595458984, "learning_rate": 0.0005924918242117753, "loss": 1.9179, "step": 6435 }, { "epoch": 0.21, "grad_norm": 0.485933780670166, "learning_rate": 0.0005924895002736556, "loss": 1.9687, "step": 6436 }, { "epoch": 0.21, "grad_norm": 0.4938409924507141, "learning_rate": 0.0005924871759804968, "loss": 2.0118, "step": 6437 }, { "epoch": 0.21, "grad_norm": 0.4731765687465668, "learning_rate": 0.0005924848513323017, "loss": 1.9409, "step": 6438 }, { "epoch": 0.21, "grad_norm": 0.48351699113845825, "learning_rate": 0.0005924825263290731, "loss": 1.9842, "step": 6439 }, { "epoch": 0.21, "grad_norm": 0.4660389721393585, "learning_rate": 0.0005924802009708138, "loss": 1.9774, "step": 6440 }, { "epoch": 0.21, "grad_norm": 0.4717352092266083, "learning_rate": 0.0005924778752575267, "loss": 1.9219, "step": 6441 }, { "epoch": 0.21, "grad_norm": 0.48077961802482605, "learning_rate": 0.0005924755491892145, "loss": 1.9375, "step": 6442 }, { "epoch": 0.21, "grad_norm": 0.47077539563179016, "learning_rate": 0.0005924732227658799, "loss": 1.9269, "step": 6443 }, { "epoch": 0.21, "grad_norm": 0.4793045222759247, "learning_rate": 0.000592470895987526, "loss": 1.9575, "step": 6444 }, { "epoch": 0.21, "grad_norm": 0.48555850982666016, "learning_rate": 0.0005924685688541557, "loss": 1.9196, "step": 6445 }, { "epoch": 0.21, "grad_norm": 0.4931655526161194, "learning_rate": 0.0005924662413657715, "loss": 1.9283, "step": 6446 }, { "epoch": 0.21, "grad_norm": 0.5005918145179749, "learning_rate": 0.0005924639135223764, "loss": 1.9601, "step": 6447 }, { "epoch": 0.21, "grad_norm": 0.4698428809642792, "learning_rate": 0.0005924615853239733, "loss": 1.9606, "step": 6448 }, { "epoch": 0.21, "grad_norm": 0.4768013060092926, "learning_rate": 0.0005924592567705648, "loss": 1.9662, "step": 6449 }, { "epoch": 0.21, "grad_norm": 0.5219421982765198, "learning_rate": 0.0005924569278621539, "loss": 1.9493, "step": 6450 }, { "epoch": 0.21, "grad_norm": 0.4942512512207031, "learning_rate": 0.0005924545985987433, "loss": 1.9093, "step": 6451 }, { "epoch": 0.21, "grad_norm": 0.4795393645763397, "learning_rate": 0.000592452268980336, "loss": 1.913, "step": 6452 }, { "epoch": 0.21, "grad_norm": 0.5049344301223755, "learning_rate": 0.0005924499390069347, "loss": 1.9827, "step": 6453 }, { "epoch": 0.21, "grad_norm": 0.47433751821517944, "learning_rate": 0.0005924476086785423, "loss": 1.8981, "step": 6454 }, { "epoch": 0.21, "grad_norm": 0.4842030107975006, "learning_rate": 0.0005924452779951615, "loss": 1.8563, "step": 6455 }, { "epoch": 0.21, "grad_norm": 0.48930010199546814, "learning_rate": 0.0005924429469567954, "loss": 1.9372, "step": 6456 }, { "epoch": 0.21, "grad_norm": 0.4865266680717468, "learning_rate": 0.0005924406155634464, "loss": 1.9702, "step": 6457 }, { "epoch": 0.21, "grad_norm": 0.4737839698791504, "learning_rate": 0.0005924382838151178, "loss": 1.8714, "step": 6458 }, { "epoch": 0.21, "grad_norm": 0.4931361973285675, "learning_rate": 0.0005924359517118121, "loss": 1.93, "step": 6459 }, { "epoch": 0.21, "grad_norm": 0.5184804797172546, "learning_rate": 0.0005924336192535323, "loss": 1.8826, "step": 6460 }, { "epoch": 0.21, "grad_norm": 0.4784230589866638, "learning_rate": 0.0005924312864402811, "loss": 1.9901, "step": 6461 }, { "epoch": 0.21, "grad_norm": 0.4773539900779724, "learning_rate": 0.0005924289532720616, "loss": 1.948, "step": 6462 }, { "epoch": 0.22, "grad_norm": 0.4794360399246216, "learning_rate": 0.0005924266197488762, "loss": 1.8762, "step": 6463 }, { "epoch": 0.22, "grad_norm": 0.5051673054695129, "learning_rate": 0.0005924242858707281, "loss": 1.9339, "step": 6464 }, { "epoch": 0.22, "grad_norm": 0.4981233775615692, "learning_rate": 0.00059242195163762, "loss": 1.9049, "step": 6465 }, { "epoch": 0.22, "grad_norm": 0.4774240553379059, "learning_rate": 0.0005924196170495547, "loss": 1.9839, "step": 6466 }, { "epoch": 0.22, "grad_norm": 0.46893027424812317, "learning_rate": 0.000592417282106535, "loss": 1.9062, "step": 6467 }, { "epoch": 0.22, "grad_norm": 0.4715777039527893, "learning_rate": 0.0005924149468085639, "loss": 1.9569, "step": 6468 }, { "epoch": 0.22, "grad_norm": 0.4769662618637085, "learning_rate": 0.0005924126111556442, "loss": 1.9546, "step": 6469 }, { "epoch": 0.22, "grad_norm": 0.4763617515563965, "learning_rate": 0.0005924102751477786, "loss": 1.9852, "step": 6470 }, { "epoch": 0.22, "grad_norm": 0.501762330532074, "learning_rate": 0.00059240793878497, "loss": 1.8789, "step": 6471 }, { "epoch": 0.22, "grad_norm": 0.48534947633743286, "learning_rate": 0.0005924056020672213, "loss": 1.9012, "step": 6472 }, { "epoch": 0.22, "grad_norm": 0.47967329621315, "learning_rate": 0.0005924032649945352, "loss": 2.0037, "step": 6473 }, { "epoch": 0.22, "grad_norm": 0.5084375739097595, "learning_rate": 0.0005924009275669146, "loss": 1.9662, "step": 6474 }, { "epoch": 0.22, "grad_norm": 0.48763903975486755, "learning_rate": 0.0005923985897843626, "loss": 1.9323, "step": 6475 }, { "epoch": 0.22, "grad_norm": 0.48621585965156555, "learning_rate": 0.0005923962516468815, "loss": 1.9751, "step": 6476 }, { "epoch": 0.22, "grad_norm": 0.4882187843322754, "learning_rate": 0.0005923939131544745, "loss": 1.9233, "step": 6477 }, { "epoch": 0.22, "grad_norm": 0.4842853546142578, "learning_rate": 0.0005923915743071444, "loss": 1.9307, "step": 6478 }, { "epoch": 0.22, "grad_norm": 0.46655625104904175, "learning_rate": 0.0005923892351048941, "loss": 1.9313, "step": 6479 }, { "epoch": 0.22, "grad_norm": 0.4942237436771393, "learning_rate": 0.0005923868955477262, "loss": 1.9645, "step": 6480 }, { "epoch": 0.22, "grad_norm": 0.4954930543899536, "learning_rate": 0.0005923845556356439, "loss": 2.0921, "step": 6481 }, { "epoch": 0.22, "grad_norm": 0.4840354025363922, "learning_rate": 0.0005923822153686497, "loss": 1.9274, "step": 6482 }, { "epoch": 0.22, "grad_norm": 0.4958382844924927, "learning_rate": 0.0005923798747467465, "loss": 2.0287, "step": 6483 }, { "epoch": 0.22, "grad_norm": 0.4893358647823334, "learning_rate": 0.0005923775337699373, "loss": 1.8899, "step": 6484 }, { "epoch": 0.22, "grad_norm": 0.47655513882637024, "learning_rate": 0.0005923751924382248, "loss": 1.9218, "step": 6485 }, { "epoch": 0.22, "grad_norm": 0.4985288381576538, "learning_rate": 0.0005923728507516119, "loss": 1.9729, "step": 6486 }, { "epoch": 0.22, "grad_norm": 0.4847705066204071, "learning_rate": 0.0005923705087101015, "loss": 1.9137, "step": 6487 }, { "epoch": 0.22, "grad_norm": 0.46875330805778503, "learning_rate": 0.0005923681663136963, "loss": 1.88, "step": 6488 }, { "epoch": 0.22, "grad_norm": 0.5023247003555298, "learning_rate": 0.0005923658235623993, "loss": 2.0267, "step": 6489 }, { "epoch": 0.22, "grad_norm": 0.48560425639152527, "learning_rate": 0.0005923634804562133, "loss": 1.9871, "step": 6490 }, { "epoch": 0.22, "grad_norm": 0.47054415941238403, "learning_rate": 0.000592361136995141, "loss": 1.9224, "step": 6491 }, { "epoch": 0.22, "grad_norm": 0.49135836958885193, "learning_rate": 0.0005923587931791855, "loss": 2.0594, "step": 6492 }, { "epoch": 0.22, "grad_norm": 0.4724476635456085, "learning_rate": 0.0005923564490083494, "loss": 1.9123, "step": 6493 }, { "epoch": 0.22, "grad_norm": 0.4781063497066498, "learning_rate": 0.0005923541044826357, "loss": 1.9888, "step": 6494 }, { "epoch": 0.22, "grad_norm": 0.5305438041687012, "learning_rate": 0.000592351759602047, "loss": 1.9473, "step": 6495 }, { "epoch": 0.22, "grad_norm": 0.4739114046096802, "learning_rate": 0.0005923494143665866, "loss": 1.8723, "step": 6496 }, { "epoch": 0.22, "grad_norm": 0.5043606758117676, "learning_rate": 0.000592347068776257, "loss": 1.9428, "step": 6497 }, { "epoch": 0.22, "grad_norm": 0.47065269947052, "learning_rate": 0.0005923447228310611, "loss": 1.9255, "step": 6498 }, { "epoch": 0.22, "grad_norm": 0.481731653213501, "learning_rate": 0.0005923423765310018, "loss": 1.8994, "step": 6499 }, { "epoch": 0.22, "grad_norm": 0.47294899821281433, "learning_rate": 0.000592340029876082, "loss": 1.9409, "step": 6500 }, { "epoch": 0.22, "grad_norm": 0.47952985763549805, "learning_rate": 0.0005923376828663044, "loss": 1.9341, "step": 6501 }, { "epoch": 0.22, "grad_norm": 0.47403714060783386, "learning_rate": 0.0005923353355016718, "loss": 1.9661, "step": 6502 }, { "epoch": 0.22, "grad_norm": 0.47658345103263855, "learning_rate": 0.0005923329877821874, "loss": 1.9806, "step": 6503 }, { "epoch": 0.22, "grad_norm": 0.617809534072876, "learning_rate": 0.0005923306397078537, "loss": 2.0738, "step": 6504 }, { "epoch": 0.22, "grad_norm": 0.5007539391517639, "learning_rate": 0.0005923282912786737, "loss": 1.9673, "step": 6505 }, { "epoch": 0.22, "grad_norm": 0.4771345555782318, "learning_rate": 0.0005923259424946502, "loss": 1.9281, "step": 6506 }, { "epoch": 0.22, "grad_norm": 0.4768744707107544, "learning_rate": 0.0005923235933557862, "loss": 2.0202, "step": 6507 }, { "epoch": 0.22, "grad_norm": 0.4864400029182434, "learning_rate": 0.0005923212438620843, "loss": 2.0104, "step": 6508 }, { "epoch": 0.22, "grad_norm": 0.4918610453605652, "learning_rate": 0.0005923188940135475, "loss": 1.9599, "step": 6509 }, { "epoch": 0.22, "grad_norm": 0.4694102704524994, "learning_rate": 0.0005923165438101787, "loss": 1.9519, "step": 6510 }, { "epoch": 0.22, "grad_norm": 0.47900116443634033, "learning_rate": 0.0005923141932519806, "loss": 1.9501, "step": 6511 }, { "epoch": 0.22, "grad_norm": 0.4674634635448456, "learning_rate": 0.0005923118423389563, "loss": 2.0123, "step": 6512 }, { "epoch": 0.22, "grad_norm": 0.48450982570648193, "learning_rate": 0.0005923094910711083, "loss": 1.9318, "step": 6513 }, { "epoch": 0.22, "grad_norm": 0.4598527252674103, "learning_rate": 0.0005923071394484398, "loss": 1.8409, "step": 6514 }, { "epoch": 0.22, "grad_norm": 0.46927160024642944, "learning_rate": 0.0005923047874709534, "loss": 1.9828, "step": 6515 }, { "epoch": 0.22, "grad_norm": 0.47249531745910645, "learning_rate": 0.0005923024351386521, "loss": 1.9459, "step": 6516 }, { "epoch": 0.22, "grad_norm": 0.4728154242038727, "learning_rate": 0.0005923000824515387, "loss": 1.9631, "step": 6517 }, { "epoch": 0.22, "grad_norm": 0.48897212743759155, "learning_rate": 0.000592297729409616, "loss": 2.031, "step": 6518 }, { "epoch": 0.22, "grad_norm": 0.4889355003833771, "learning_rate": 0.000592295376012887, "loss": 1.9596, "step": 6519 }, { "epoch": 0.22, "grad_norm": 0.4807167649269104, "learning_rate": 0.0005922930222613546, "loss": 2.0072, "step": 6520 }, { "epoch": 0.22, "grad_norm": 0.48330157995224, "learning_rate": 0.0005922906681550214, "loss": 1.9288, "step": 6521 }, { "epoch": 0.22, "grad_norm": 0.4850374162197113, "learning_rate": 0.0005922883136938904, "loss": 2.0525, "step": 6522 }, { "epoch": 0.22, "grad_norm": 0.47260838747024536, "learning_rate": 0.0005922859588779643, "loss": 2.0099, "step": 6523 }, { "epoch": 0.22, "grad_norm": 0.47130087018013, "learning_rate": 0.0005922836037072464, "loss": 1.8951, "step": 6524 }, { "epoch": 0.22, "grad_norm": 0.4763706624507904, "learning_rate": 0.0005922812481817392, "loss": 1.827, "step": 6525 }, { "epoch": 0.22, "grad_norm": 0.4940698742866516, "learning_rate": 0.0005922788923014455, "loss": 1.9921, "step": 6526 }, { "epoch": 0.22, "grad_norm": 0.474590003490448, "learning_rate": 0.0005922765360663684, "loss": 2.0052, "step": 6527 }, { "epoch": 0.22, "grad_norm": 0.4763781726360321, "learning_rate": 0.0005922741794765106, "loss": 2.0476, "step": 6528 }, { "epoch": 0.22, "grad_norm": 0.46787917613983154, "learning_rate": 0.000592271822531875, "loss": 1.9681, "step": 6529 }, { "epoch": 0.22, "grad_norm": 0.497531920671463, "learning_rate": 0.0005922694652324645, "loss": 1.9986, "step": 6530 }, { "epoch": 0.22, "grad_norm": 0.47346073389053345, "learning_rate": 0.000592267107578282, "loss": 1.9812, "step": 6531 }, { "epoch": 0.22, "grad_norm": 0.4875708222389221, "learning_rate": 0.0005922647495693302, "loss": 1.9421, "step": 6532 }, { "epoch": 0.22, "grad_norm": 0.4917418658733368, "learning_rate": 0.000592262391205612, "loss": 1.9702, "step": 6533 }, { "epoch": 0.22, "grad_norm": 0.4767063856124878, "learning_rate": 0.0005922600324871304, "loss": 1.9877, "step": 6534 }, { "epoch": 0.22, "grad_norm": 0.4878934919834137, "learning_rate": 0.0005922576734138882, "loss": 2.0071, "step": 6535 }, { "epoch": 0.22, "grad_norm": 0.46905091404914856, "learning_rate": 0.0005922553139858884, "loss": 1.9654, "step": 6536 }, { "epoch": 0.22, "grad_norm": 0.47937560081481934, "learning_rate": 0.0005922529542031334, "loss": 1.8956, "step": 6537 }, { "epoch": 0.22, "grad_norm": 0.4617322087287903, "learning_rate": 0.0005922505940656266, "loss": 1.9104, "step": 6538 }, { "epoch": 0.22, "grad_norm": 0.4977926015853882, "learning_rate": 0.0005922482335733706, "loss": 1.8935, "step": 6539 }, { "epoch": 0.22, "grad_norm": 0.4694302976131439, "learning_rate": 0.0005922458727263682, "loss": 1.9583, "step": 6540 }, { "epoch": 0.22, "grad_norm": 0.5091930627822876, "learning_rate": 0.0005922435115246225, "loss": 2.0392, "step": 6541 }, { "epoch": 0.22, "grad_norm": 0.4934599697589874, "learning_rate": 0.0005922411499681361, "loss": 1.9118, "step": 6542 }, { "epoch": 0.22, "grad_norm": 0.48909032344818115, "learning_rate": 0.0005922387880569121, "loss": 1.9551, "step": 6543 }, { "epoch": 0.22, "grad_norm": 0.4968581795692444, "learning_rate": 0.0005922364257909533, "loss": 1.9763, "step": 6544 }, { "epoch": 0.22, "grad_norm": 0.48012441396713257, "learning_rate": 0.0005922340631702625, "loss": 2.0354, "step": 6545 }, { "epoch": 0.22, "grad_norm": 0.5048379302024841, "learning_rate": 0.0005922317001948426, "loss": 1.9035, "step": 6546 }, { "epoch": 0.22, "grad_norm": 0.48560795187950134, "learning_rate": 0.0005922293368646963, "loss": 1.9462, "step": 6547 }, { "epoch": 0.22, "grad_norm": 0.47436171770095825, "learning_rate": 0.0005922269731798269, "loss": 1.9024, "step": 6548 }, { "epoch": 0.22, "grad_norm": 0.5081202983856201, "learning_rate": 0.0005922246091402369, "loss": 2.003, "step": 6549 }, { "epoch": 0.22, "grad_norm": 0.48937928676605225, "learning_rate": 0.0005922222447459292, "loss": 1.8961, "step": 6550 }, { "epoch": 0.22, "grad_norm": 0.5074338912963867, "learning_rate": 0.0005922198799969069, "loss": 1.9728, "step": 6551 }, { "epoch": 0.22, "grad_norm": 0.5021689534187317, "learning_rate": 0.0005922175148931727, "loss": 1.9901, "step": 6552 }, { "epoch": 0.22, "grad_norm": 0.4919920861721039, "learning_rate": 0.0005922151494347295, "loss": 1.9961, "step": 6553 }, { "epoch": 0.22, "grad_norm": 0.4953654408454895, "learning_rate": 0.0005922127836215801, "loss": 1.929, "step": 6554 }, { "epoch": 0.22, "grad_norm": 0.5169805288314819, "learning_rate": 0.0005922104174537275, "loss": 1.9627, "step": 6555 }, { "epoch": 0.22, "grad_norm": 0.5017324090003967, "learning_rate": 0.0005922080509311744, "loss": 1.9364, "step": 6556 }, { "epoch": 0.22, "grad_norm": 0.4942263066768646, "learning_rate": 0.0005922056840539239, "loss": 1.9321, "step": 6557 }, { "epoch": 0.22, "grad_norm": 0.5108247995376587, "learning_rate": 0.0005922033168219787, "loss": 1.9824, "step": 6558 }, { "epoch": 0.22, "grad_norm": 0.4988256096839905, "learning_rate": 0.0005922009492353418, "loss": 1.9352, "step": 6559 }, { "epoch": 0.22, "grad_norm": 0.4844823181629181, "learning_rate": 0.000592198581294016, "loss": 2.026, "step": 6560 }, { "epoch": 0.22, "grad_norm": 0.47781601548194885, "learning_rate": 0.000592196212998004, "loss": 1.9664, "step": 6561 }, { "epoch": 0.22, "grad_norm": 0.5337867736816406, "learning_rate": 0.0005921938443473091, "loss": 1.9946, "step": 6562 }, { "epoch": 0.22, "grad_norm": 0.4856245219707489, "learning_rate": 0.0005921914753419337, "loss": 2.0364, "step": 6563 }, { "epoch": 0.22, "grad_norm": 0.4996040165424347, "learning_rate": 0.0005921891059818811, "loss": 1.8937, "step": 6564 }, { "epoch": 0.22, "grad_norm": 0.4767638146877289, "learning_rate": 0.0005921867362671539, "loss": 1.8871, "step": 6565 }, { "epoch": 0.22, "grad_norm": 0.48578566312789917, "learning_rate": 0.000592184366197755, "loss": 1.864, "step": 6566 }, { "epoch": 0.22, "grad_norm": 0.4760873019695282, "learning_rate": 0.0005921819957736872, "loss": 2.0415, "step": 6567 }, { "epoch": 0.22, "grad_norm": 0.4640020430088043, "learning_rate": 0.0005921796249949538, "loss": 1.9482, "step": 6568 }, { "epoch": 0.22, "grad_norm": 0.482040673494339, "learning_rate": 0.0005921772538615573, "loss": 1.9217, "step": 6569 }, { "epoch": 0.22, "grad_norm": 0.5136566162109375, "learning_rate": 0.0005921748823735005, "loss": 1.9275, "step": 6570 }, { "epoch": 0.22, "grad_norm": 0.49074816703796387, "learning_rate": 0.0005921725105307866, "loss": 1.917, "step": 6571 }, { "epoch": 0.22, "grad_norm": 0.4655594527721405, "learning_rate": 0.0005921701383334182, "loss": 1.9236, "step": 6572 }, { "epoch": 0.22, "grad_norm": 0.5038384795188904, "learning_rate": 0.0005921677657813985, "loss": 1.9226, "step": 6573 }, { "epoch": 0.22, "grad_norm": 0.49514955282211304, "learning_rate": 0.00059216539287473, "loss": 1.9784, "step": 6574 }, { "epoch": 0.22, "grad_norm": 0.4775465130805969, "learning_rate": 0.0005921630196134159, "loss": 1.8624, "step": 6575 }, { "epoch": 0.22, "grad_norm": 0.48500850796699524, "learning_rate": 0.0005921606459974588, "loss": 2.0199, "step": 6576 }, { "epoch": 0.22, "grad_norm": 0.49871212244033813, "learning_rate": 0.0005921582720268618, "loss": 2.0383, "step": 6577 }, { "epoch": 0.22, "grad_norm": 0.4765196442604065, "learning_rate": 0.0005921558977016277, "loss": 1.987, "step": 6578 }, { "epoch": 0.22, "grad_norm": 0.485749214887619, "learning_rate": 0.0005921535230217594, "loss": 1.9535, "step": 6579 }, { "epoch": 0.22, "grad_norm": 0.47973909974098206, "learning_rate": 0.0005921511479872598, "loss": 1.9412, "step": 6580 }, { "epoch": 0.22, "grad_norm": 0.46993452310562134, "learning_rate": 0.0005921487725981317, "loss": 1.886, "step": 6581 }, { "epoch": 0.22, "grad_norm": 0.4949122667312622, "learning_rate": 0.0005921463968543779, "loss": 1.9144, "step": 6582 }, { "epoch": 0.22, "grad_norm": 0.48729702830314636, "learning_rate": 0.0005921440207560016, "loss": 1.9015, "step": 6583 }, { "epoch": 0.22, "grad_norm": 0.4840121269226074, "learning_rate": 0.0005921416443030055, "loss": 1.9239, "step": 6584 }, { "epoch": 0.22, "grad_norm": 0.49138781428337097, "learning_rate": 0.0005921392674953925, "loss": 1.9097, "step": 6585 }, { "epoch": 0.22, "grad_norm": 0.48734065890312195, "learning_rate": 0.0005921368903331654, "loss": 1.9665, "step": 6586 }, { "epoch": 0.22, "grad_norm": 0.48223188519477844, "learning_rate": 0.0005921345128163272, "loss": 1.9234, "step": 6587 }, { "epoch": 0.22, "grad_norm": 0.5245910286903381, "learning_rate": 0.0005921321349448807, "loss": 1.9098, "step": 6588 }, { "epoch": 0.22, "grad_norm": 0.5041666626930237, "learning_rate": 0.0005921297567188289, "loss": 1.9905, "step": 6589 }, { "epoch": 0.22, "grad_norm": 0.4668820798397064, "learning_rate": 0.0005921273781381745, "loss": 1.9481, "step": 6590 }, { "epoch": 0.22, "grad_norm": 0.4714677333831787, "learning_rate": 0.0005921249992029207, "loss": 2.0065, "step": 6591 }, { "epoch": 0.22, "grad_norm": 0.500164270401001, "learning_rate": 0.00059212261991307, "loss": 1.9179, "step": 6592 }, { "epoch": 0.22, "grad_norm": 0.48542866110801697, "learning_rate": 0.0005921202402686256, "loss": 2.032, "step": 6593 }, { "epoch": 0.22, "grad_norm": 0.5169427990913391, "learning_rate": 0.0005921178602695902, "loss": 2.0146, "step": 6594 }, { "epoch": 0.22, "grad_norm": 0.4649619162082672, "learning_rate": 0.0005921154799159668, "loss": 1.9765, "step": 6595 }, { "epoch": 0.22, "grad_norm": 0.47811663150787354, "learning_rate": 0.0005921130992077582, "loss": 1.9568, "step": 6596 }, { "epoch": 0.22, "grad_norm": 0.5613967776298523, "learning_rate": 0.0005921107181449674, "loss": 1.9673, "step": 6597 }, { "epoch": 0.22, "grad_norm": 0.4665198028087616, "learning_rate": 0.0005921083367275972, "loss": 1.8939, "step": 6598 }, { "epoch": 0.22, "grad_norm": 0.483523428440094, "learning_rate": 0.0005921059549556506, "loss": 2.0026, "step": 6599 }, { "epoch": 0.22, "grad_norm": 0.4759019911289215, "learning_rate": 0.0005921035728291303, "loss": 1.9369, "step": 6600 }, { "epoch": 0.22, "grad_norm": 0.47111403942108154, "learning_rate": 0.0005921011903480393, "loss": 1.9675, "step": 6601 }, { "epoch": 0.22, "grad_norm": 0.4608728587627411, "learning_rate": 0.0005920988075123806, "loss": 1.9713, "step": 6602 }, { "epoch": 0.22, "grad_norm": 0.46774938702583313, "learning_rate": 0.0005920964243221569, "loss": 1.9589, "step": 6603 }, { "epoch": 0.22, "grad_norm": 0.4927993416786194, "learning_rate": 0.0005920940407773712, "loss": 2.0419, "step": 6604 }, { "epoch": 0.22, "grad_norm": 0.478732705116272, "learning_rate": 0.0005920916568780265, "loss": 2.0013, "step": 6605 }, { "epoch": 0.22, "grad_norm": 0.512107253074646, "learning_rate": 0.0005920892726241254, "loss": 1.9111, "step": 6606 }, { "epoch": 0.22, "grad_norm": 0.4845406115055084, "learning_rate": 0.0005920868880156709, "loss": 1.9178, "step": 6607 }, { "epoch": 0.22, "grad_norm": 0.4868394136428833, "learning_rate": 0.0005920845030526661, "loss": 1.9531, "step": 6608 }, { "epoch": 0.22, "grad_norm": 0.4674511253833771, "learning_rate": 0.0005920821177351137, "loss": 1.9451, "step": 6609 }, { "epoch": 0.22, "grad_norm": 0.49481746554374695, "learning_rate": 0.0005920797320630167, "loss": 1.911, "step": 6610 }, { "epoch": 0.22, "grad_norm": 0.48550111055374146, "learning_rate": 0.0005920773460363779, "loss": 1.8884, "step": 6611 }, { "epoch": 0.22, "grad_norm": 0.4731695353984833, "learning_rate": 0.0005920749596552002, "loss": 1.9427, "step": 6612 }, { "epoch": 0.22, "grad_norm": 0.5088483691215515, "learning_rate": 0.0005920725729194866, "loss": 1.9573, "step": 6613 }, { "epoch": 0.22, "grad_norm": 0.49243614077568054, "learning_rate": 0.0005920701858292399, "loss": 1.976, "step": 6614 }, { "epoch": 0.22, "grad_norm": 0.46451297402381897, "learning_rate": 0.000592067798384463, "loss": 1.996, "step": 6615 }, { "epoch": 0.22, "grad_norm": 0.47927606105804443, "learning_rate": 0.0005920654105851589, "loss": 1.943, "step": 6616 }, { "epoch": 0.22, "grad_norm": 0.5148353576660156, "learning_rate": 0.0005920630224313303, "loss": 1.9466, "step": 6617 }, { "epoch": 0.22, "grad_norm": 0.5120405554771423, "learning_rate": 0.0005920606339229803, "loss": 2.0302, "step": 6618 }, { "epoch": 0.22, "grad_norm": 0.4737686514854431, "learning_rate": 0.0005920582450601118, "loss": 1.939, "step": 6619 }, { "epoch": 0.22, "grad_norm": 0.5077882409095764, "learning_rate": 0.0005920558558427275, "loss": 1.9731, "step": 6620 }, { "epoch": 0.22, "grad_norm": 0.5042643547058105, "learning_rate": 0.0005920534662708305, "loss": 1.9368, "step": 6621 }, { "epoch": 0.22, "grad_norm": 0.47598278522491455, "learning_rate": 0.0005920510763444235, "loss": 1.9015, "step": 6622 }, { "epoch": 0.22, "grad_norm": 0.5028097629547119, "learning_rate": 0.0005920486860635097, "loss": 2.0062, "step": 6623 }, { "epoch": 0.22, "grad_norm": 0.46860769391059875, "learning_rate": 0.0005920462954280918, "loss": 1.9058, "step": 6624 }, { "epoch": 0.22, "grad_norm": 0.4639459550380707, "learning_rate": 0.0005920439044381726, "loss": 1.9159, "step": 6625 }, { "epoch": 0.22, "grad_norm": 0.4685554802417755, "learning_rate": 0.0005920415130937553, "loss": 1.9179, "step": 6626 }, { "epoch": 0.22, "grad_norm": 0.4614635109901428, "learning_rate": 0.0005920391213948425, "loss": 1.933, "step": 6627 }, { "epoch": 0.22, "grad_norm": 0.4674985110759735, "learning_rate": 0.0005920367293414373, "loss": 1.8673, "step": 6628 }, { "epoch": 0.22, "grad_norm": 0.4509505331516266, "learning_rate": 0.0005920343369335424, "loss": 1.9483, "step": 6629 }, { "epoch": 0.22, "grad_norm": 0.49216416478157043, "learning_rate": 0.000592031944171161, "loss": 1.94, "step": 6630 }, { "epoch": 0.22, "grad_norm": 0.4799202084541321, "learning_rate": 0.0005920295510542958, "loss": 1.9253, "step": 6631 }, { "epoch": 0.22, "grad_norm": 0.4720112383365631, "learning_rate": 0.0005920271575829498, "loss": 1.9148, "step": 6632 }, { "epoch": 0.22, "grad_norm": 0.48617541790008545, "learning_rate": 0.0005920247637571258, "loss": 1.8996, "step": 6633 }, { "epoch": 0.22, "grad_norm": 0.4756450653076172, "learning_rate": 0.0005920223695768268, "loss": 1.926, "step": 6634 }, { "epoch": 0.22, "grad_norm": 0.48264554142951965, "learning_rate": 0.0005920199750420556, "loss": 1.9857, "step": 6635 }, { "epoch": 0.22, "grad_norm": 0.48860490322113037, "learning_rate": 0.0005920175801528152, "loss": 2.0017, "step": 6636 }, { "epoch": 0.22, "grad_norm": 0.4741964340209961, "learning_rate": 0.0005920151849091086, "loss": 1.9838, "step": 6637 }, { "epoch": 0.22, "grad_norm": 0.4548902213573456, "learning_rate": 0.0005920127893109385, "loss": 1.9349, "step": 6638 }, { "epoch": 0.22, "grad_norm": 0.493269681930542, "learning_rate": 0.0005920103933583079, "loss": 1.9511, "step": 6639 }, { "epoch": 0.22, "grad_norm": 0.47421613335609436, "learning_rate": 0.0005920079970512198, "loss": 2.0116, "step": 6640 }, { "epoch": 0.22, "grad_norm": 0.45168328285217285, "learning_rate": 0.0005920056003896768, "loss": 1.9239, "step": 6641 }, { "epoch": 0.22, "grad_norm": 0.4766046702861786, "learning_rate": 0.0005920032033736822, "loss": 1.8983, "step": 6642 }, { "epoch": 0.22, "grad_norm": 0.4753842353820801, "learning_rate": 0.0005920008060032387, "loss": 1.9203, "step": 6643 }, { "epoch": 0.22, "grad_norm": 0.4785483479499817, "learning_rate": 0.0005919984082783493, "loss": 1.9862, "step": 6644 }, { "epoch": 0.22, "grad_norm": 0.4623538553714752, "learning_rate": 0.0005919960101990169, "loss": 1.9526, "step": 6645 }, { "epoch": 0.22, "grad_norm": 0.5119850039482117, "learning_rate": 0.0005919936117652442, "loss": 1.8714, "step": 6646 }, { "epoch": 0.22, "grad_norm": 0.48346105217933655, "learning_rate": 0.0005919912129770343, "loss": 1.9253, "step": 6647 }, { "epoch": 0.22, "grad_norm": 0.45848920941352844, "learning_rate": 0.0005919888138343901, "loss": 1.8179, "step": 6648 }, { "epoch": 0.22, "grad_norm": 0.51650071144104, "learning_rate": 0.0005919864143373145, "loss": 1.9782, "step": 6649 }, { "epoch": 0.22, "grad_norm": 0.5026246905326843, "learning_rate": 0.0005919840144858105, "loss": 1.8843, "step": 6650 }, { "epoch": 0.22, "grad_norm": 0.484573096036911, "learning_rate": 0.0005919816142798809, "loss": 1.9385, "step": 6651 }, { "epoch": 0.22, "grad_norm": 0.5273947715759277, "learning_rate": 0.0005919792137195286, "loss": 1.8647, "step": 6652 }, { "epoch": 0.22, "grad_norm": 0.4903362989425659, "learning_rate": 0.0005919768128047566, "loss": 1.9367, "step": 6653 }, { "epoch": 0.22, "grad_norm": 0.4791635572910309, "learning_rate": 0.0005919744115355678, "loss": 1.9885, "step": 6654 }, { "epoch": 0.22, "grad_norm": 0.5181887745857239, "learning_rate": 0.000591972009911965, "loss": 1.8895, "step": 6655 }, { "epoch": 0.22, "grad_norm": 0.4896482527256012, "learning_rate": 0.0005919696079339513, "loss": 1.9062, "step": 6656 }, { "epoch": 0.22, "grad_norm": 0.4767754077911377, "learning_rate": 0.0005919672056015294, "loss": 1.9918, "step": 6657 }, { "epoch": 0.22, "grad_norm": 0.5014905333518982, "learning_rate": 0.0005919648029147023, "loss": 1.9416, "step": 6658 }, { "epoch": 0.22, "grad_norm": 0.5255060791969299, "learning_rate": 0.000591962399873473, "loss": 1.8886, "step": 6659 }, { "epoch": 0.22, "grad_norm": 0.49041566252708435, "learning_rate": 0.0005919599964778445, "loss": 2.0047, "step": 6660 }, { "epoch": 0.22, "grad_norm": 0.4805065095424652, "learning_rate": 0.0005919575927278195, "loss": 1.9076, "step": 6661 }, { "epoch": 0.22, "grad_norm": 0.47409456968307495, "learning_rate": 0.0005919551886234009, "loss": 1.9527, "step": 6662 }, { "epoch": 0.22, "grad_norm": 0.49191492795944214, "learning_rate": 0.0005919527841645918, "loss": 1.8877, "step": 6663 }, { "epoch": 0.22, "grad_norm": 0.47038233280181885, "learning_rate": 0.0005919503793513951, "loss": 1.9106, "step": 6664 }, { "epoch": 0.22, "grad_norm": 0.47455477714538574, "learning_rate": 0.0005919479741838136, "loss": 1.9397, "step": 6665 }, { "epoch": 0.22, "grad_norm": 0.4818825125694275, "learning_rate": 0.0005919455686618502, "loss": 1.8912, "step": 6666 }, { "epoch": 0.22, "grad_norm": 0.4800091087818146, "learning_rate": 0.000591943162785508, "loss": 1.8389, "step": 6667 }, { "epoch": 0.22, "grad_norm": 0.5331628918647766, "learning_rate": 0.0005919407565547897, "loss": 1.974, "step": 6668 }, { "epoch": 0.22, "grad_norm": 0.46029120683670044, "learning_rate": 0.0005919383499696985, "loss": 1.9542, "step": 6669 }, { "epoch": 0.22, "grad_norm": 0.4807624816894531, "learning_rate": 0.0005919359430302372, "loss": 1.9302, "step": 6670 }, { "epoch": 0.22, "grad_norm": 0.4801797568798065, "learning_rate": 0.0005919335357364085, "loss": 1.9803, "step": 6671 }, { "epoch": 0.22, "grad_norm": 0.45667538046836853, "learning_rate": 0.0005919311280882155, "loss": 1.8837, "step": 6672 }, { "epoch": 0.22, "grad_norm": 0.46333569288253784, "learning_rate": 0.0005919287200856612, "loss": 1.8689, "step": 6673 }, { "epoch": 0.22, "grad_norm": 0.4727405309677124, "learning_rate": 0.0005919263117287484, "loss": 1.9653, "step": 6674 }, { "epoch": 0.22, "grad_norm": 0.4771972596645355, "learning_rate": 0.0005919239030174801, "loss": 1.9858, "step": 6675 }, { "epoch": 0.22, "grad_norm": 0.4767698645591736, "learning_rate": 0.0005919214939518592, "loss": 1.946, "step": 6676 }, { "epoch": 0.22, "grad_norm": 0.47624149918556213, "learning_rate": 0.0005919190845318885, "loss": 1.9961, "step": 6677 }, { "epoch": 0.22, "grad_norm": 0.4637347459793091, "learning_rate": 0.0005919166747575713, "loss": 1.8073, "step": 6678 }, { "epoch": 0.22, "grad_norm": 0.4683848023414612, "learning_rate": 0.0005919142646289101, "loss": 1.9665, "step": 6679 }, { "epoch": 0.22, "grad_norm": 0.46481406688690186, "learning_rate": 0.000591911854145908, "loss": 1.9729, "step": 6680 }, { "epoch": 0.22, "grad_norm": 0.5047783255577087, "learning_rate": 0.000591909443308568, "loss": 1.901, "step": 6681 }, { "epoch": 0.22, "grad_norm": 0.47218573093414307, "learning_rate": 0.0005919070321168928, "loss": 1.959, "step": 6682 }, { "epoch": 0.22, "grad_norm": 0.5294645428657532, "learning_rate": 0.0005919046205708856, "loss": 2.0575, "step": 6683 }, { "epoch": 0.22, "grad_norm": 0.48210054636001587, "learning_rate": 0.0005919022086705492, "loss": 2.0463, "step": 6684 }, { "epoch": 0.22, "grad_norm": 0.4721742868423462, "learning_rate": 0.0005918997964158864, "loss": 1.9199, "step": 6685 }, { "epoch": 0.22, "grad_norm": 0.5045130848884583, "learning_rate": 0.0005918973838069003, "loss": 1.9859, "step": 6686 }, { "epoch": 0.22, "grad_norm": 0.4934777319431305, "learning_rate": 0.0005918949708435939, "loss": 2.0193, "step": 6687 }, { "epoch": 0.22, "grad_norm": 0.47523781657218933, "learning_rate": 0.00059189255752597, "loss": 2.0227, "step": 6688 }, { "epoch": 0.22, "grad_norm": 0.47129449248313904, "learning_rate": 0.0005918901438540314, "loss": 1.9538, "step": 6689 }, { "epoch": 0.22, "grad_norm": 0.4943375885486603, "learning_rate": 0.0005918877298277813, "loss": 2.0134, "step": 6690 }, { "epoch": 0.22, "grad_norm": 0.4779224991798401, "learning_rate": 0.0005918853154472224, "loss": 1.8224, "step": 6691 }, { "epoch": 0.22, "grad_norm": 0.47575807571411133, "learning_rate": 0.0005918829007123578, "loss": 2.0086, "step": 6692 }, { "epoch": 0.22, "grad_norm": 0.4640434980392456, "learning_rate": 0.0005918804856231903, "loss": 1.9756, "step": 6693 }, { "epoch": 0.22, "grad_norm": 0.4801684021949768, "learning_rate": 0.000591878070179723, "loss": 1.9687, "step": 6694 }, { "epoch": 0.22, "grad_norm": 0.47795870900154114, "learning_rate": 0.0005918756543819587, "loss": 1.9164, "step": 6695 }, { "epoch": 0.22, "grad_norm": 0.471692830324173, "learning_rate": 0.0005918732382299004, "loss": 1.9691, "step": 6696 }, { "epoch": 0.22, "grad_norm": 0.4690624475479126, "learning_rate": 0.000591870821723551, "loss": 1.8646, "step": 6697 }, { "epoch": 0.22, "grad_norm": 0.4767906367778778, "learning_rate": 0.0005918684048629133, "loss": 2.0041, "step": 6698 }, { "epoch": 0.22, "grad_norm": 0.48040997982025146, "learning_rate": 0.0005918659876479904, "loss": 1.9678, "step": 6699 }, { "epoch": 0.22, "grad_norm": 0.47160670161247253, "learning_rate": 0.0005918635700787853, "loss": 1.9538, "step": 6700 }, { "epoch": 0.22, "grad_norm": 0.47653263807296753, "learning_rate": 0.0005918611521553008, "loss": 1.9588, "step": 6701 }, { "epoch": 0.22, "grad_norm": 0.4874219298362732, "learning_rate": 0.0005918587338775398, "loss": 1.9894, "step": 6702 }, { "epoch": 0.22, "grad_norm": 0.4650515615940094, "learning_rate": 0.0005918563152455054, "loss": 1.9316, "step": 6703 }, { "epoch": 0.22, "grad_norm": 0.4606582820415497, "learning_rate": 0.0005918538962592003, "loss": 1.909, "step": 6704 }, { "epoch": 0.22, "grad_norm": 0.468498557806015, "learning_rate": 0.0005918514769186276, "loss": 1.9126, "step": 6705 }, { "epoch": 0.22, "grad_norm": 0.4652952253818512, "learning_rate": 0.0005918490572237902, "loss": 1.8766, "step": 6706 }, { "epoch": 0.22, "grad_norm": 0.5065241456031799, "learning_rate": 0.0005918466371746911, "loss": 1.9374, "step": 6707 }, { "epoch": 0.22, "grad_norm": 0.46856197714805603, "learning_rate": 0.0005918442167713332, "loss": 1.8956, "step": 6708 }, { "epoch": 0.22, "grad_norm": 0.48601585626602173, "learning_rate": 0.0005918417960137194, "loss": 1.9927, "step": 6709 }, { "epoch": 0.22, "grad_norm": 0.4872528612613678, "learning_rate": 0.0005918393749018527, "loss": 2.011, "step": 6710 }, { "epoch": 0.22, "grad_norm": 0.4648600220680237, "learning_rate": 0.000591836953435736, "loss": 1.9672, "step": 6711 }, { "epoch": 0.22, "grad_norm": 0.5003621578216553, "learning_rate": 0.0005918345316153721, "loss": 1.9647, "step": 6712 }, { "epoch": 0.22, "grad_norm": 0.486700177192688, "learning_rate": 0.0005918321094407642, "loss": 1.9057, "step": 6713 }, { "epoch": 0.22, "grad_norm": 0.48751330375671387, "learning_rate": 0.000591829686911915, "loss": 1.8544, "step": 6714 }, { "epoch": 0.22, "grad_norm": 0.47733184695243835, "learning_rate": 0.0005918272640288277, "loss": 1.9744, "step": 6715 }, { "epoch": 0.22, "grad_norm": 0.480328232049942, "learning_rate": 0.0005918248407915051, "loss": 1.8397, "step": 6716 }, { "epoch": 0.22, "grad_norm": 0.48471081256866455, "learning_rate": 0.0005918224171999501, "loss": 1.8847, "step": 6717 }, { "epoch": 0.22, "grad_norm": 0.48855769634246826, "learning_rate": 0.0005918199932541656, "loss": 1.9812, "step": 6718 }, { "epoch": 0.22, "grad_norm": 0.47672098875045776, "learning_rate": 0.0005918175689541547, "loss": 1.9488, "step": 6719 }, { "epoch": 0.22, "grad_norm": 0.4951746165752411, "learning_rate": 0.0005918151442999203, "loss": 2.0022, "step": 6720 }, { "epoch": 0.22, "grad_norm": 0.4766579866409302, "learning_rate": 0.0005918127192914653, "loss": 1.9355, "step": 6721 }, { "epoch": 0.22, "grad_norm": 0.4802343249320984, "learning_rate": 0.0005918102939287927, "loss": 1.9797, "step": 6722 }, { "epoch": 0.22, "grad_norm": 0.4867473244667053, "learning_rate": 0.0005918078682119052, "loss": 1.9598, "step": 6723 }, { "epoch": 0.22, "grad_norm": 0.47343242168426514, "learning_rate": 0.0005918054421408062, "loss": 1.9761, "step": 6724 }, { "epoch": 0.22, "grad_norm": 0.5191920399665833, "learning_rate": 0.0005918030157154983, "loss": 2.0415, "step": 6725 }, { "epoch": 0.22, "grad_norm": 0.4921637177467346, "learning_rate": 0.0005918005889359845, "loss": 1.9273, "step": 6726 }, { "epoch": 0.22, "grad_norm": 0.48035189509391785, "learning_rate": 0.0005917981618022678, "loss": 1.9199, "step": 6727 }, { "epoch": 0.22, "grad_norm": 0.4791738986968994, "learning_rate": 0.0005917957343143511, "loss": 1.9216, "step": 6728 }, { "epoch": 0.22, "grad_norm": 0.48467567563056946, "learning_rate": 0.0005917933064722373, "loss": 2.0219, "step": 6729 }, { "epoch": 0.22, "grad_norm": 0.48315292596817017, "learning_rate": 0.0005917908782759295, "loss": 1.9281, "step": 6730 }, { "epoch": 0.22, "grad_norm": 0.46983417868614197, "learning_rate": 0.0005917884497254306, "loss": 1.9199, "step": 6731 }, { "epoch": 0.22, "grad_norm": 0.47347548604011536, "learning_rate": 0.0005917860208207435, "loss": 1.9185, "step": 6732 }, { "epoch": 0.22, "grad_norm": 0.4765191674232483, "learning_rate": 0.0005917835915618711, "loss": 1.9313, "step": 6733 }, { "epoch": 0.22, "grad_norm": 0.4957314133644104, "learning_rate": 0.0005917811619488164, "loss": 1.9396, "step": 6734 }, { "epoch": 0.22, "grad_norm": 0.47776809334754944, "learning_rate": 0.0005917787319815825, "loss": 1.9754, "step": 6735 }, { "epoch": 0.22, "grad_norm": 0.4976656138896942, "learning_rate": 0.0005917763016601721, "loss": 1.9634, "step": 6736 }, { "epoch": 0.22, "grad_norm": 0.4843595325946808, "learning_rate": 0.0005917738709845884, "loss": 1.9359, "step": 6737 }, { "epoch": 0.22, "grad_norm": 0.46607282757759094, "learning_rate": 0.000591771439954834, "loss": 1.9054, "step": 6738 }, { "epoch": 0.22, "grad_norm": 0.46676719188690186, "learning_rate": 0.0005917690085709122, "loss": 1.9167, "step": 6739 }, { "epoch": 0.22, "grad_norm": 0.4656652808189392, "learning_rate": 0.0005917665768328258, "loss": 1.929, "step": 6740 }, { "epoch": 0.22, "grad_norm": 0.46515920758247375, "learning_rate": 0.0005917641447405778, "loss": 1.8347, "step": 6741 }, { "epoch": 0.22, "grad_norm": 0.47366639971733093, "learning_rate": 0.000591761712294171, "loss": 1.902, "step": 6742 }, { "epoch": 0.22, "grad_norm": 0.46541211009025574, "learning_rate": 0.0005917592794936086, "loss": 1.9645, "step": 6743 }, { "epoch": 0.22, "grad_norm": 0.472347229719162, "learning_rate": 0.0005917568463388933, "loss": 1.8552, "step": 6744 }, { "epoch": 0.22, "grad_norm": 0.46790969371795654, "learning_rate": 0.0005917544128300282, "loss": 1.9043, "step": 6745 }, { "epoch": 0.22, "grad_norm": 0.48030519485473633, "learning_rate": 0.0005917519789670162, "loss": 1.9601, "step": 6746 }, { "epoch": 0.22, "grad_norm": 0.4679052233695984, "learning_rate": 0.0005917495447498604, "loss": 1.9356, "step": 6747 }, { "epoch": 0.22, "grad_norm": 0.4757453203201294, "learning_rate": 0.0005917471101785636, "loss": 1.9038, "step": 6748 }, { "epoch": 0.22, "grad_norm": 0.46807506680488586, "learning_rate": 0.0005917446752531289, "loss": 1.9387, "step": 6749 }, { "epoch": 0.22, "grad_norm": 0.46672412753105164, "learning_rate": 0.000591742239973559, "loss": 2.0317, "step": 6750 }, { "epoch": 0.22, "grad_norm": 0.49999740719795227, "learning_rate": 0.0005917398043398571, "loss": 1.9887, "step": 6751 }, { "epoch": 0.22, "grad_norm": 0.5001593232154846, "learning_rate": 0.0005917373683520258, "loss": 1.8963, "step": 6752 }, { "epoch": 0.22, "grad_norm": 0.5811895132064819, "learning_rate": 0.0005917349320100686, "loss": 1.9085, "step": 6753 }, { "epoch": 0.22, "grad_norm": 0.4663577675819397, "learning_rate": 0.000591732495313988, "loss": 1.9307, "step": 6754 }, { "epoch": 0.22, "grad_norm": 0.502748966217041, "learning_rate": 0.0005917300582637874, "loss": 1.9863, "step": 6755 }, { "epoch": 0.22, "grad_norm": 0.47628822922706604, "learning_rate": 0.0005917276208594693, "loss": 2.0096, "step": 6756 }, { "epoch": 0.22, "grad_norm": 0.4719099700450897, "learning_rate": 0.0005917251831010369, "loss": 1.9561, "step": 6757 }, { "epoch": 0.22, "grad_norm": 0.48141035437583923, "learning_rate": 0.0005917227449884931, "loss": 1.977, "step": 6758 }, { "epoch": 0.22, "grad_norm": 0.48025646805763245, "learning_rate": 0.0005917203065218409, "loss": 1.9374, "step": 6759 }, { "epoch": 0.22, "grad_norm": 0.46828633546829224, "learning_rate": 0.0005917178677010831, "loss": 1.9134, "step": 6760 }, { "epoch": 0.22, "grad_norm": 0.49285146594047546, "learning_rate": 0.0005917154285262229, "loss": 1.9543, "step": 6761 }, { "epoch": 0.22, "grad_norm": 0.4826984703540802, "learning_rate": 0.0005917129889972632, "loss": 1.9073, "step": 6762 }, { "epoch": 0.23, "grad_norm": 0.4728988707065582, "learning_rate": 0.0005917105491142069, "loss": 1.9536, "step": 6763 }, { "epoch": 0.23, "grad_norm": 0.46687644720077515, "learning_rate": 0.0005917081088770569, "loss": 1.8941, "step": 6764 }, { "epoch": 0.23, "grad_norm": 0.4981936812400818, "learning_rate": 0.0005917056682858162, "loss": 1.9241, "step": 6765 }, { "epoch": 0.23, "grad_norm": 0.4894196689128876, "learning_rate": 0.000591703227340488, "loss": 1.9241, "step": 6766 }, { "epoch": 0.23, "grad_norm": 0.4580756425857544, "learning_rate": 0.0005917007860410748, "loss": 1.9081, "step": 6767 }, { "epoch": 0.23, "grad_norm": 0.4823797941207886, "learning_rate": 0.00059169834438758, "loss": 1.9114, "step": 6768 }, { "epoch": 0.23, "grad_norm": 0.4725188612937927, "learning_rate": 0.0005916959023800064, "loss": 1.9125, "step": 6769 }, { "epoch": 0.23, "grad_norm": 0.47225967049598694, "learning_rate": 0.0005916934600183569, "loss": 1.9319, "step": 6770 }, { "epoch": 0.23, "grad_norm": 0.4679523706436157, "learning_rate": 0.0005916910173026345, "loss": 1.9068, "step": 6771 }, { "epoch": 0.23, "grad_norm": 0.48607346415519714, "learning_rate": 0.0005916885742328422, "loss": 1.9943, "step": 6772 }, { "epoch": 0.23, "grad_norm": 0.4779009521007538, "learning_rate": 0.0005916861308089829, "loss": 1.9765, "step": 6773 }, { "epoch": 0.23, "grad_norm": 0.47226130962371826, "learning_rate": 0.0005916836870310597, "loss": 1.8252, "step": 6774 }, { "epoch": 0.23, "grad_norm": 0.47335484623908997, "learning_rate": 0.0005916812428990755, "loss": 1.9875, "step": 6775 }, { "epoch": 0.23, "grad_norm": 0.46814513206481934, "learning_rate": 0.0005916787984130332, "loss": 1.995, "step": 6776 }, { "epoch": 0.23, "grad_norm": 0.4942004382610321, "learning_rate": 0.0005916763535729358, "loss": 2.0018, "step": 6777 }, { "epoch": 0.23, "grad_norm": 0.5772274136543274, "learning_rate": 0.0005916739083787862, "loss": 2.0142, "step": 6778 }, { "epoch": 0.23, "grad_norm": 0.4647122323513031, "learning_rate": 0.0005916714628305875, "loss": 1.9388, "step": 6779 }, { "epoch": 0.23, "grad_norm": 0.48471713066101074, "learning_rate": 0.0005916690169283427, "loss": 1.9008, "step": 6780 }, { "epoch": 0.23, "grad_norm": 0.4971446394920349, "learning_rate": 0.0005916665706720547, "loss": 1.9084, "step": 6781 }, { "epoch": 0.23, "grad_norm": 0.46110424399375916, "learning_rate": 0.0005916641240617265, "loss": 2.0285, "step": 6782 }, { "epoch": 0.23, "grad_norm": 0.49577197432518005, "learning_rate": 0.0005916616770973609, "loss": 1.9495, "step": 6783 }, { "epoch": 0.23, "grad_norm": 0.5192288756370544, "learning_rate": 0.000591659229778961, "loss": 1.9826, "step": 6784 }, { "epoch": 0.23, "grad_norm": 0.4846407473087311, "learning_rate": 0.0005916567821065298, "loss": 1.9916, "step": 6785 }, { "epoch": 0.23, "grad_norm": 0.4979866147041321, "learning_rate": 0.0005916543340800703, "loss": 2.0281, "step": 6786 }, { "epoch": 0.23, "grad_norm": 0.49068140983581543, "learning_rate": 0.0005916518856995853, "loss": 1.8943, "step": 6787 }, { "epoch": 0.23, "grad_norm": 0.48617082834243774, "learning_rate": 0.000591649436965078, "loss": 1.9636, "step": 6788 }, { "epoch": 0.23, "grad_norm": 0.47447654604911804, "learning_rate": 0.0005916469878765512, "loss": 1.9059, "step": 6789 }, { "epoch": 0.23, "grad_norm": 0.4817626476287842, "learning_rate": 0.0005916445384340079, "loss": 1.9805, "step": 6790 }, { "epoch": 0.23, "grad_norm": 0.5247533321380615, "learning_rate": 0.0005916420886374511, "loss": 1.8669, "step": 6791 }, { "epoch": 0.23, "grad_norm": 0.4574476182460785, "learning_rate": 0.0005916396384868838, "loss": 1.9389, "step": 6792 }, { "epoch": 0.23, "grad_norm": 0.4741672873497009, "learning_rate": 0.000591637187982309, "loss": 1.9509, "step": 6793 }, { "epoch": 0.23, "grad_norm": 0.4938278794288635, "learning_rate": 0.0005916347371237295, "loss": 2.0274, "step": 6794 }, { "epoch": 0.23, "grad_norm": 0.4854717254638672, "learning_rate": 0.0005916322859111485, "loss": 1.9329, "step": 6795 }, { "epoch": 0.23, "grad_norm": 0.4743959903717041, "learning_rate": 0.0005916298343445689, "loss": 1.9144, "step": 6796 }, { "epoch": 0.23, "grad_norm": 0.483325719833374, "learning_rate": 0.0005916273824239936, "loss": 1.9697, "step": 6797 }, { "epoch": 0.23, "grad_norm": 0.4809252917766571, "learning_rate": 0.0005916249301494256, "loss": 1.9119, "step": 6798 }, { "epoch": 0.23, "grad_norm": 0.4779937267303467, "learning_rate": 0.000591622477520868, "loss": 1.8959, "step": 6799 }, { "epoch": 0.23, "grad_norm": 0.47630926966667175, "learning_rate": 0.0005916200245383236, "loss": 1.9396, "step": 6800 }, { "epoch": 0.23, "grad_norm": 0.5009976029396057, "learning_rate": 0.0005916175712017955, "loss": 1.9745, "step": 6801 }, { "epoch": 0.23, "grad_norm": 0.4848721921443939, "learning_rate": 0.0005916151175112866, "loss": 1.9999, "step": 6802 }, { "epoch": 0.23, "grad_norm": 0.4686969518661499, "learning_rate": 0.0005916126634667999, "loss": 1.9377, "step": 6803 }, { "epoch": 0.23, "grad_norm": 0.46359702944755554, "learning_rate": 0.0005916102090683384, "loss": 1.9419, "step": 6804 }, { "epoch": 0.23, "grad_norm": 0.4623791575431824, "learning_rate": 0.0005916077543159052, "loss": 1.8599, "step": 6805 }, { "epoch": 0.23, "grad_norm": 0.4652559459209442, "learning_rate": 0.000591605299209503, "loss": 1.9695, "step": 6806 }, { "epoch": 0.23, "grad_norm": 0.45803311467170715, "learning_rate": 0.0005916028437491349, "loss": 1.9455, "step": 6807 }, { "epoch": 0.23, "grad_norm": 0.458161324262619, "learning_rate": 0.0005916003879348041, "loss": 1.9547, "step": 6808 }, { "epoch": 0.23, "grad_norm": 0.4766353666782379, "learning_rate": 0.0005915979317665133, "loss": 1.9164, "step": 6809 }, { "epoch": 0.23, "grad_norm": 0.4697141647338867, "learning_rate": 0.0005915954752442655, "loss": 1.8669, "step": 6810 }, { "epoch": 0.23, "grad_norm": 0.5811225771903992, "learning_rate": 0.0005915930183680638, "loss": 2.0156, "step": 6811 }, { "epoch": 0.23, "grad_norm": 0.48577165603637695, "learning_rate": 0.0005915905611379112, "loss": 1.926, "step": 6812 }, { "epoch": 0.23, "grad_norm": 0.47313904762268066, "learning_rate": 0.0005915881035538106, "loss": 1.8413, "step": 6813 }, { "epoch": 0.23, "grad_norm": 0.4825124740600586, "learning_rate": 0.000591585645615765, "loss": 1.8824, "step": 6814 }, { "epoch": 0.23, "grad_norm": 0.4679250419139862, "learning_rate": 0.0005915831873237773, "loss": 2.0017, "step": 6815 }, { "epoch": 0.23, "grad_norm": 0.48614034056663513, "learning_rate": 0.0005915807286778507, "loss": 1.925, "step": 6816 }, { "epoch": 0.23, "grad_norm": 0.4736821949481964, "learning_rate": 0.000591578269677988, "loss": 1.9468, "step": 6817 }, { "epoch": 0.23, "grad_norm": 0.45392292737960815, "learning_rate": 0.0005915758103241922, "loss": 1.9195, "step": 6818 }, { "epoch": 0.23, "grad_norm": 0.4724878966808319, "learning_rate": 0.0005915733506164664, "loss": 1.9425, "step": 6819 }, { "epoch": 0.23, "grad_norm": 0.48168203234672546, "learning_rate": 0.0005915708905548135, "loss": 1.8692, "step": 6820 }, { "epoch": 0.23, "grad_norm": 0.45943018794059753, "learning_rate": 0.0005915684301392365, "loss": 1.9152, "step": 6821 }, { "epoch": 0.23, "grad_norm": 0.46816983819007874, "learning_rate": 0.0005915659693697384, "loss": 1.8922, "step": 6822 }, { "epoch": 0.23, "grad_norm": 0.4654633402824402, "learning_rate": 0.0005915635082463221, "loss": 1.9136, "step": 6823 }, { "epoch": 0.23, "grad_norm": 0.5079532861709595, "learning_rate": 0.0005915610467689908, "loss": 1.9519, "step": 6824 }, { "epoch": 0.23, "grad_norm": 0.47495242953300476, "learning_rate": 0.0005915585849377473, "loss": 1.9388, "step": 6825 }, { "epoch": 0.23, "grad_norm": 0.47244203090667725, "learning_rate": 0.0005915561227525946, "loss": 1.9362, "step": 6826 }, { "epoch": 0.23, "grad_norm": 0.4895824193954468, "learning_rate": 0.0005915536602135356, "loss": 1.9897, "step": 6827 }, { "epoch": 0.23, "grad_norm": 0.47879669070243835, "learning_rate": 0.0005915511973205736, "loss": 1.9207, "step": 6828 }, { "epoch": 0.23, "grad_norm": 0.4731700122356415, "learning_rate": 0.0005915487340737113, "loss": 1.9024, "step": 6829 }, { "epoch": 0.23, "grad_norm": 0.4762233793735504, "learning_rate": 0.0005915462704729518, "loss": 2.0191, "step": 6830 }, { "epoch": 0.23, "grad_norm": 0.4701041281223297, "learning_rate": 0.0005915438065182981, "loss": 1.8615, "step": 6831 }, { "epoch": 0.23, "grad_norm": 0.4686734974384308, "learning_rate": 0.0005915413422097533, "loss": 1.8636, "step": 6832 }, { "epoch": 0.23, "grad_norm": 0.4602404534816742, "learning_rate": 0.0005915388775473201, "loss": 1.9817, "step": 6833 }, { "epoch": 0.23, "grad_norm": 0.48349127173423767, "learning_rate": 0.0005915364125310018, "loss": 1.936, "step": 6834 }, { "epoch": 0.23, "grad_norm": 0.4761960804462433, "learning_rate": 0.0005915339471608012, "loss": 1.93, "step": 6835 }, { "epoch": 0.23, "grad_norm": 0.48171675205230713, "learning_rate": 0.0005915314814367213, "loss": 1.8192, "step": 6836 }, { "epoch": 0.23, "grad_norm": 0.47320568561553955, "learning_rate": 0.0005915290153587652, "loss": 1.9503, "step": 6837 }, { "epoch": 0.23, "grad_norm": 0.4936966001987457, "learning_rate": 0.0005915265489269358, "loss": 1.9848, "step": 6838 }, { "epoch": 0.23, "grad_norm": 0.48418688774108887, "learning_rate": 0.000591524082141236, "loss": 1.9624, "step": 6839 }, { "epoch": 0.23, "grad_norm": 0.4918253719806671, "learning_rate": 0.0005915216150016691, "loss": 1.974, "step": 6840 }, { "epoch": 0.23, "grad_norm": 0.4895907938480377, "learning_rate": 0.0005915191475082378, "loss": 1.9341, "step": 6841 }, { "epoch": 0.23, "grad_norm": 0.4855082035064697, "learning_rate": 0.0005915166796609453, "loss": 1.9478, "step": 6842 }, { "epoch": 0.23, "grad_norm": 0.5043368935585022, "learning_rate": 0.0005915142114597944, "loss": 1.9786, "step": 6843 }, { "epoch": 0.23, "grad_norm": 0.5008472204208374, "learning_rate": 0.0005915117429047882, "loss": 1.8767, "step": 6844 }, { "epoch": 0.23, "grad_norm": 0.48207059502601624, "learning_rate": 0.0005915092739959298, "loss": 1.8676, "step": 6845 }, { "epoch": 0.23, "grad_norm": 0.47613659501075745, "learning_rate": 0.000591506804733222, "loss": 1.98, "step": 6846 }, { "epoch": 0.23, "grad_norm": 0.501003086566925, "learning_rate": 0.000591504335116668, "loss": 1.9901, "step": 6847 }, { "epoch": 0.23, "grad_norm": 0.49402350187301636, "learning_rate": 0.0005915018651462706, "loss": 1.8836, "step": 6848 }, { "epoch": 0.23, "grad_norm": 0.4949283003807068, "learning_rate": 0.0005914993948220329, "loss": 1.9839, "step": 6849 }, { "epoch": 0.23, "grad_norm": 0.5268737077713013, "learning_rate": 0.0005914969241439579, "loss": 2.0151, "step": 6850 }, { "epoch": 0.23, "grad_norm": 0.5047407150268555, "learning_rate": 0.0005914944531120486, "loss": 1.9237, "step": 6851 }, { "epoch": 0.23, "grad_norm": 0.4762306213378906, "learning_rate": 0.000591491981726308, "loss": 1.9779, "step": 6852 }, { "epoch": 0.23, "grad_norm": 0.5389195680618286, "learning_rate": 0.000591489509986739, "loss": 1.9555, "step": 6853 }, { "epoch": 0.23, "grad_norm": 0.5379624962806702, "learning_rate": 0.0005914870378933447, "loss": 1.8821, "step": 6854 }, { "epoch": 0.23, "grad_norm": 0.48936185240745544, "learning_rate": 0.0005914845654461282, "loss": 1.9745, "step": 6855 }, { "epoch": 0.23, "grad_norm": 0.7946199178695679, "learning_rate": 0.0005914820926450923, "loss": 1.9917, "step": 6856 }, { "epoch": 0.23, "grad_norm": 0.5537733435630798, "learning_rate": 0.00059147961949024, "loss": 1.9484, "step": 6857 }, { "epoch": 0.23, "grad_norm": 0.49534422159194946, "learning_rate": 0.0005914771459815745, "loss": 1.9398, "step": 6858 }, { "epoch": 0.23, "grad_norm": 0.5091673135757446, "learning_rate": 0.0005914746721190987, "loss": 1.9184, "step": 6859 }, { "epoch": 0.23, "grad_norm": 0.5292655825614929, "learning_rate": 0.0005914721979028156, "loss": 1.9956, "step": 6860 }, { "epoch": 0.23, "grad_norm": 0.495104044675827, "learning_rate": 0.0005914697233327281, "loss": 1.9865, "step": 6861 }, { "epoch": 0.23, "grad_norm": 0.4795495271682739, "learning_rate": 0.0005914672484088393, "loss": 1.9402, "step": 6862 }, { "epoch": 0.23, "grad_norm": 0.5045921802520752, "learning_rate": 0.0005914647731311524, "loss": 1.8839, "step": 6863 }, { "epoch": 0.23, "grad_norm": 0.4909382164478302, "learning_rate": 0.00059146229749967, "loss": 1.8894, "step": 6864 }, { "epoch": 0.23, "grad_norm": 0.47599685192108154, "learning_rate": 0.0005914598215143955, "loss": 1.9558, "step": 6865 }, { "epoch": 0.23, "grad_norm": 0.5028247833251953, "learning_rate": 0.0005914573451753316, "loss": 1.971, "step": 6866 }, { "epoch": 0.23, "grad_norm": 0.48861080408096313, "learning_rate": 0.0005914548684824814, "loss": 1.9284, "step": 6867 }, { "epoch": 0.23, "grad_norm": 0.5101702809333801, "learning_rate": 0.000591452391435848, "loss": 1.9067, "step": 6868 }, { "epoch": 0.23, "grad_norm": 0.48119306564331055, "learning_rate": 0.0005914499140354343, "loss": 1.9337, "step": 6869 }, { "epoch": 0.23, "grad_norm": 0.4823196828365326, "learning_rate": 0.0005914474362812434, "loss": 1.947, "step": 6870 }, { "epoch": 0.23, "grad_norm": 0.5076872706413269, "learning_rate": 0.0005914449581732781, "loss": 1.9131, "step": 6871 }, { "epoch": 0.23, "grad_norm": 0.476103276014328, "learning_rate": 0.0005914424797115418, "loss": 1.9638, "step": 6872 }, { "epoch": 0.23, "grad_norm": 0.48309147357940674, "learning_rate": 0.0005914400008960371, "loss": 1.971, "step": 6873 }, { "epoch": 0.23, "grad_norm": 0.5017204880714417, "learning_rate": 0.0005914375217267674, "loss": 2.0195, "step": 6874 }, { "epoch": 0.23, "grad_norm": 0.5086193680763245, "learning_rate": 0.0005914350422037353, "loss": 1.9738, "step": 6875 }, { "epoch": 0.23, "grad_norm": 0.4913451671600342, "learning_rate": 0.0005914325623269439, "loss": 1.8396, "step": 6876 }, { "epoch": 0.23, "grad_norm": 0.47406458854675293, "learning_rate": 0.0005914300820963965, "loss": 1.8866, "step": 6877 }, { "epoch": 0.23, "grad_norm": 0.5332090854644775, "learning_rate": 0.0005914276015120958, "loss": 1.9564, "step": 6878 }, { "epoch": 0.23, "grad_norm": 0.49686136841773987, "learning_rate": 0.000591425120574045, "loss": 1.911, "step": 6879 }, { "epoch": 0.23, "grad_norm": 0.47668400406837463, "learning_rate": 0.0005914226392822471, "loss": 1.9403, "step": 6880 }, { "epoch": 0.23, "grad_norm": 0.4698983430862427, "learning_rate": 0.000591420157636705, "loss": 1.9231, "step": 6881 }, { "epoch": 0.23, "grad_norm": 0.47963935136795044, "learning_rate": 0.0005914176756374217, "loss": 1.9971, "step": 6882 }, { "epoch": 0.23, "grad_norm": 0.5487541556358337, "learning_rate": 0.0005914151932844004, "loss": 1.9186, "step": 6883 }, { "epoch": 0.23, "grad_norm": 0.476121187210083, "learning_rate": 0.0005914127105776439, "loss": 2.0244, "step": 6884 }, { "epoch": 0.23, "grad_norm": 0.5187128186225891, "learning_rate": 0.0005914102275171553, "loss": 2.008, "step": 6885 }, { "epoch": 0.23, "grad_norm": 0.5335493683815002, "learning_rate": 0.0005914077441029376, "loss": 1.9038, "step": 6886 }, { "epoch": 0.23, "grad_norm": 0.4995240271091461, "learning_rate": 0.0005914052603349939, "loss": 1.9292, "step": 6887 }, { "epoch": 0.23, "grad_norm": 0.47123605012893677, "learning_rate": 0.0005914027762133272, "loss": 1.9514, "step": 6888 }, { "epoch": 0.23, "grad_norm": 0.5153216123580933, "learning_rate": 0.0005914002917379404, "loss": 1.9354, "step": 6889 }, { "epoch": 0.23, "grad_norm": 0.48755648732185364, "learning_rate": 0.0005913978069088366, "loss": 1.9411, "step": 6890 }, { "epoch": 0.23, "grad_norm": 0.47598427534103394, "learning_rate": 0.0005913953217260189, "loss": 1.9335, "step": 6891 }, { "epoch": 0.23, "grad_norm": 0.4719538986682892, "learning_rate": 0.00059139283618949, "loss": 1.9246, "step": 6892 }, { "epoch": 0.23, "grad_norm": 0.49159660935401917, "learning_rate": 0.0005913903502992533, "loss": 1.9734, "step": 6893 }, { "epoch": 0.23, "grad_norm": 0.5120252370834351, "learning_rate": 0.0005913878640553115, "loss": 2.061, "step": 6894 }, { "epoch": 0.23, "grad_norm": 0.4792795479297638, "learning_rate": 0.0005913853774576678, "loss": 1.953, "step": 6895 }, { "epoch": 0.23, "grad_norm": 0.5011186599731445, "learning_rate": 0.0005913828905063254, "loss": 1.8871, "step": 6896 }, { "epoch": 0.23, "grad_norm": 0.4501497745513916, "learning_rate": 0.0005913804032012869, "loss": 1.8634, "step": 6897 }, { "epoch": 0.23, "grad_norm": 0.470260888338089, "learning_rate": 0.0005913779155425554, "loss": 1.8797, "step": 6898 }, { "epoch": 0.23, "grad_norm": 0.4789772033691406, "learning_rate": 0.0005913754275301343, "loss": 1.9151, "step": 6899 }, { "epoch": 0.23, "grad_norm": 0.4708709418773651, "learning_rate": 0.0005913729391640262, "loss": 1.9161, "step": 6900 }, { "epoch": 0.23, "grad_norm": 0.47114455699920654, "learning_rate": 0.0005913704504442345, "loss": 1.8585, "step": 6901 }, { "epoch": 0.23, "grad_norm": 0.4667196273803711, "learning_rate": 0.0005913679613707618, "loss": 1.9334, "step": 6902 }, { "epoch": 0.23, "grad_norm": 0.471630334854126, "learning_rate": 0.0005913654719436114, "loss": 1.922, "step": 6903 }, { "epoch": 0.23, "grad_norm": 0.47469064593315125, "learning_rate": 0.0005913629821627861, "loss": 1.8766, "step": 6904 }, { "epoch": 0.23, "grad_norm": 0.467850923538208, "learning_rate": 0.0005913604920282893, "loss": 1.9669, "step": 6905 }, { "epoch": 0.23, "grad_norm": 0.4820750057697296, "learning_rate": 0.0005913580015401238, "loss": 1.9076, "step": 6906 }, { "epoch": 0.23, "grad_norm": 0.48018983006477356, "learning_rate": 0.0005913555106982924, "loss": 1.9895, "step": 6907 }, { "epoch": 0.23, "grad_norm": 0.5477624535560608, "learning_rate": 0.0005913530195027984, "loss": 1.9559, "step": 6908 }, { "epoch": 0.23, "grad_norm": 0.4962730407714844, "learning_rate": 0.0005913505279536448, "loss": 1.9216, "step": 6909 }, { "epoch": 0.23, "grad_norm": 0.5021535754203796, "learning_rate": 0.0005913480360508346, "loss": 1.8275, "step": 6910 }, { "epoch": 0.23, "grad_norm": 0.4971522092819214, "learning_rate": 0.0005913455437943708, "loss": 1.9979, "step": 6911 }, { "epoch": 0.23, "grad_norm": 0.4995318055152893, "learning_rate": 0.0005913430511842565, "loss": 1.9002, "step": 6912 }, { "epoch": 0.23, "grad_norm": 0.4899158179759979, "learning_rate": 0.0005913405582204946, "loss": 1.9508, "step": 6913 }, { "epoch": 0.23, "grad_norm": 0.47604161500930786, "learning_rate": 0.0005913380649030882, "loss": 1.9248, "step": 6914 }, { "epoch": 0.23, "grad_norm": 0.4956238865852356, "learning_rate": 0.0005913355712320403, "loss": 1.9526, "step": 6915 }, { "epoch": 0.23, "grad_norm": 0.4813867211341858, "learning_rate": 0.000591333077207354, "loss": 1.9086, "step": 6916 }, { "epoch": 0.23, "grad_norm": 0.47909650206565857, "learning_rate": 0.0005913305828290323, "loss": 1.8955, "step": 6917 }, { "epoch": 0.23, "grad_norm": 0.48725029826164246, "learning_rate": 0.000591328088097078, "loss": 1.9591, "step": 6918 }, { "epoch": 0.23, "grad_norm": 0.47945594787597656, "learning_rate": 0.0005913255930114945, "loss": 1.9219, "step": 6919 }, { "epoch": 0.23, "grad_norm": 0.4724434018135071, "learning_rate": 0.0005913230975722846, "loss": 1.8824, "step": 6920 }, { "epoch": 0.23, "grad_norm": 0.45664748549461365, "learning_rate": 0.0005913206017794515, "loss": 1.8847, "step": 6921 }, { "epoch": 0.23, "grad_norm": 0.4850921332836151, "learning_rate": 0.0005913181056329979, "loss": 1.9695, "step": 6922 }, { "epoch": 0.23, "grad_norm": 0.4597770869731903, "learning_rate": 0.0005913156091329272, "loss": 1.8993, "step": 6923 }, { "epoch": 0.23, "grad_norm": 0.4882655739784241, "learning_rate": 0.0005913131122792422, "loss": 1.9178, "step": 6924 }, { "epoch": 0.23, "grad_norm": 0.46910661458969116, "learning_rate": 0.0005913106150719461, "loss": 1.8494, "step": 6925 }, { "epoch": 0.23, "grad_norm": 0.484272301197052, "learning_rate": 0.0005913081175110417, "loss": 1.9936, "step": 6926 }, { "epoch": 0.23, "grad_norm": 0.47014379501342773, "learning_rate": 0.0005913056195965323, "loss": 1.8796, "step": 6927 }, { "epoch": 0.23, "grad_norm": 0.47266894578933716, "learning_rate": 0.0005913031213284207, "loss": 1.913, "step": 6928 }, { "epoch": 0.23, "grad_norm": 0.47987863421440125, "learning_rate": 0.00059130062270671, "loss": 1.9005, "step": 6929 }, { "epoch": 0.23, "grad_norm": 0.5066982507705688, "learning_rate": 0.0005912981237314034, "loss": 1.9328, "step": 6930 }, { "epoch": 0.23, "grad_norm": 0.4689052402973175, "learning_rate": 0.0005912956244025037, "loss": 1.8928, "step": 6931 }, { "epoch": 0.23, "grad_norm": 0.46040838956832886, "learning_rate": 0.000591293124720014, "loss": 1.905, "step": 6932 }, { "epoch": 0.23, "grad_norm": 0.4680744409561157, "learning_rate": 0.0005912906246839375, "loss": 1.9394, "step": 6933 }, { "epoch": 0.23, "grad_norm": 0.5109283924102783, "learning_rate": 0.000591288124294277, "loss": 1.9477, "step": 6934 }, { "epoch": 0.23, "grad_norm": 0.4816392660140991, "learning_rate": 0.0005912856235510357, "loss": 1.9341, "step": 6935 }, { "epoch": 0.23, "grad_norm": 0.46400558948516846, "learning_rate": 0.0005912831224542165, "loss": 2.0104, "step": 6936 }, { "epoch": 0.23, "grad_norm": 0.47878140211105347, "learning_rate": 0.0005912806210038225, "loss": 1.9518, "step": 6937 }, { "epoch": 0.23, "grad_norm": 0.47140341997146606, "learning_rate": 0.0005912781191998569, "loss": 1.9726, "step": 6938 }, { "epoch": 0.23, "grad_norm": 0.46251052618026733, "learning_rate": 0.0005912756170423223, "loss": 1.8726, "step": 6939 }, { "epoch": 0.23, "grad_norm": 0.4657667279243469, "learning_rate": 0.0005912731145312223, "loss": 1.8892, "step": 6940 }, { "epoch": 0.23, "grad_norm": 0.48027580976486206, "learning_rate": 0.0005912706116665596, "loss": 1.9021, "step": 6941 }, { "epoch": 0.23, "grad_norm": 0.46561190485954285, "learning_rate": 0.0005912681084483372, "loss": 1.9576, "step": 6942 }, { "epoch": 0.23, "grad_norm": 0.4609284996986389, "learning_rate": 0.0005912656048765582, "loss": 1.8453, "step": 6943 }, { "epoch": 0.23, "grad_norm": 0.4736459255218506, "learning_rate": 0.0005912631009512258, "loss": 1.9592, "step": 6944 }, { "epoch": 0.23, "grad_norm": 0.49837544560432434, "learning_rate": 0.0005912605966723429, "loss": 1.9548, "step": 6945 }, { "epoch": 0.23, "grad_norm": 0.48826876282691956, "learning_rate": 0.0005912580920399125, "loss": 1.9497, "step": 6946 }, { "epoch": 0.23, "grad_norm": 0.4669840633869171, "learning_rate": 0.0005912555870539376, "loss": 1.967, "step": 6947 }, { "epoch": 0.23, "grad_norm": 0.4683871567249298, "learning_rate": 0.0005912530817144215, "loss": 1.967, "step": 6948 }, { "epoch": 0.23, "grad_norm": 0.462353378534317, "learning_rate": 0.000591250576021367, "loss": 1.8437, "step": 6949 }, { "epoch": 0.23, "grad_norm": 0.4634731113910675, "learning_rate": 0.0005912480699747772, "loss": 1.896, "step": 6950 }, { "epoch": 0.23, "grad_norm": 0.46431922912597656, "learning_rate": 0.0005912455635746553, "loss": 1.9486, "step": 6951 }, { "epoch": 0.23, "grad_norm": 0.4742051959037781, "learning_rate": 0.0005912430568210039, "loss": 1.9461, "step": 6952 }, { "epoch": 0.23, "grad_norm": 0.4562639892101288, "learning_rate": 0.0005912405497138266, "loss": 1.9738, "step": 6953 }, { "epoch": 0.23, "grad_norm": 0.5335158705711365, "learning_rate": 0.000591238042253126, "loss": 1.928, "step": 6954 }, { "epoch": 0.23, "grad_norm": 0.46783876419067383, "learning_rate": 0.0005912355344389055, "loss": 1.9275, "step": 6955 }, { "epoch": 0.23, "grad_norm": 0.4745924472808838, "learning_rate": 0.0005912330262711679, "loss": 1.91, "step": 6956 }, { "epoch": 0.23, "grad_norm": 0.47586241364479065, "learning_rate": 0.0005912305177499163, "loss": 1.9851, "step": 6957 }, { "epoch": 0.23, "grad_norm": 0.4678603708744049, "learning_rate": 0.0005912280088751538, "loss": 1.9812, "step": 6958 }, { "epoch": 0.23, "grad_norm": 0.4776037037372589, "learning_rate": 0.0005912254996468833, "loss": 1.9943, "step": 6959 }, { "epoch": 0.23, "grad_norm": 0.4844442307949066, "learning_rate": 0.0005912229900651081, "loss": 2.0018, "step": 6960 }, { "epoch": 0.23, "grad_norm": 0.45601794123649597, "learning_rate": 0.0005912204801298311, "loss": 1.9169, "step": 6961 }, { "epoch": 0.23, "grad_norm": 0.48850151896476746, "learning_rate": 0.0005912179698410552, "loss": 1.9012, "step": 6962 }, { "epoch": 0.23, "grad_norm": 0.4869207441806793, "learning_rate": 0.0005912154591987838, "loss": 1.911, "step": 6963 }, { "epoch": 0.23, "grad_norm": 0.471582293510437, "learning_rate": 0.0005912129482030195, "loss": 1.9002, "step": 6964 }, { "epoch": 0.23, "grad_norm": 0.46323832869529724, "learning_rate": 0.0005912104368537657, "loss": 1.9423, "step": 6965 }, { "epoch": 0.23, "grad_norm": 0.4738946557044983, "learning_rate": 0.0005912079251510254, "loss": 1.9341, "step": 6966 }, { "epoch": 0.23, "grad_norm": 0.5147649645805359, "learning_rate": 0.0005912054130948016, "loss": 1.851, "step": 6967 }, { "epoch": 0.23, "grad_norm": 0.4919931888580322, "learning_rate": 0.0005912029006850971, "loss": 2.0123, "step": 6968 }, { "epoch": 0.23, "grad_norm": 0.4641556143760681, "learning_rate": 0.0005912003879219155, "loss": 1.9604, "step": 6969 }, { "epoch": 0.23, "grad_norm": 0.47998446226119995, "learning_rate": 0.0005911978748052593, "loss": 1.9447, "step": 6970 }, { "epoch": 0.23, "grad_norm": 0.49599406123161316, "learning_rate": 0.0005911953613351319, "loss": 1.9396, "step": 6971 }, { "epoch": 0.23, "grad_norm": 0.506511926651001, "learning_rate": 0.0005911928475115363, "loss": 1.8959, "step": 6972 }, { "epoch": 0.23, "grad_norm": 0.5007303357124329, "learning_rate": 0.0005911903333344753, "loss": 1.8942, "step": 6973 }, { "epoch": 0.23, "grad_norm": 0.4726261794567108, "learning_rate": 0.0005911878188039522, "loss": 1.9281, "step": 6974 }, { "epoch": 0.23, "grad_norm": 0.4608903229236603, "learning_rate": 0.0005911853039199701, "loss": 1.8669, "step": 6975 }, { "epoch": 0.23, "grad_norm": 0.4850502908229828, "learning_rate": 0.0005911827886825318, "loss": 1.9254, "step": 6976 }, { "epoch": 0.23, "grad_norm": 0.4659852385520935, "learning_rate": 0.0005911802730916406, "loss": 1.9439, "step": 6977 }, { "epoch": 0.23, "grad_norm": 0.47202068567276, "learning_rate": 0.0005911777571472995, "loss": 1.8689, "step": 6978 }, { "epoch": 0.23, "grad_norm": 0.4751521050930023, "learning_rate": 0.0005911752408495114, "loss": 1.9696, "step": 6979 }, { "epoch": 0.23, "grad_norm": 0.4840339720249176, "learning_rate": 0.0005911727241982795, "loss": 1.9576, "step": 6980 }, { "epoch": 0.23, "grad_norm": 0.4893823564052582, "learning_rate": 0.0005911702071936069, "loss": 1.9832, "step": 6981 }, { "epoch": 0.23, "grad_norm": 0.5036896467208862, "learning_rate": 0.0005911676898354963, "loss": 1.9326, "step": 6982 }, { "epoch": 0.23, "grad_norm": 0.4845447838306427, "learning_rate": 0.0005911651721239513, "loss": 1.9424, "step": 6983 }, { "epoch": 0.23, "grad_norm": 0.4799391031265259, "learning_rate": 0.0005911626540589746, "loss": 2.0006, "step": 6984 }, { "epoch": 0.23, "grad_norm": 0.49747753143310547, "learning_rate": 0.0005911601356405693, "loss": 1.8711, "step": 6985 }, { "epoch": 0.23, "grad_norm": 0.4812791049480438, "learning_rate": 0.0005911576168687385, "loss": 1.9434, "step": 6986 }, { "epoch": 0.23, "grad_norm": 0.46782422065734863, "learning_rate": 0.0005911550977434853, "loss": 1.9387, "step": 6987 }, { "epoch": 0.23, "grad_norm": 0.4856356382369995, "learning_rate": 0.0005911525782648127, "loss": 1.9541, "step": 6988 }, { "epoch": 0.23, "grad_norm": 0.491860032081604, "learning_rate": 0.0005911500584327238, "loss": 2.0093, "step": 6989 }, { "epoch": 0.23, "grad_norm": 0.4821065664291382, "learning_rate": 0.0005911475382472215, "loss": 1.9743, "step": 6990 }, { "epoch": 0.23, "grad_norm": 0.4732043743133545, "learning_rate": 0.0005911450177083092, "loss": 1.919, "step": 6991 }, { "epoch": 0.23, "grad_norm": 0.5281304121017456, "learning_rate": 0.0005911424968159895, "loss": 1.9087, "step": 6992 }, { "epoch": 0.23, "grad_norm": 0.4628399610519409, "learning_rate": 0.0005911399755702659, "loss": 1.9621, "step": 6993 }, { "epoch": 0.23, "grad_norm": 0.49014773964881897, "learning_rate": 0.0005911374539711412, "loss": 2.0098, "step": 6994 }, { "epoch": 0.23, "grad_norm": 0.4809024930000305, "learning_rate": 0.0005911349320186185, "loss": 1.8927, "step": 6995 }, { "epoch": 0.23, "grad_norm": 0.48092466592788696, "learning_rate": 0.0005911324097127009, "loss": 1.9583, "step": 6996 }, { "epoch": 0.23, "grad_norm": 0.4787135422229767, "learning_rate": 0.0005911298870533915, "loss": 1.8523, "step": 6997 }, { "epoch": 0.23, "grad_norm": 0.4581652879714966, "learning_rate": 0.0005911273640406932, "loss": 1.9494, "step": 6998 }, { "epoch": 0.23, "grad_norm": 0.4576006531715393, "learning_rate": 0.0005911248406746093, "loss": 1.9688, "step": 6999 }, { "epoch": 0.23, "grad_norm": 0.46679237484931946, "learning_rate": 0.0005911223169551427, "loss": 1.9922, "step": 7000 }, { "epoch": 0.23, "grad_norm": 0.4735094904899597, "learning_rate": 0.0005911197928822965, "loss": 1.9004, "step": 7001 }, { "epoch": 0.23, "grad_norm": 0.465617835521698, "learning_rate": 0.0005911172684560737, "loss": 1.9612, "step": 7002 }, { "epoch": 0.23, "grad_norm": 0.46264326572418213, "learning_rate": 0.0005911147436764775, "loss": 1.8881, "step": 7003 }, { "epoch": 0.23, "grad_norm": 0.4646472930908203, "learning_rate": 0.0005911122185435108, "loss": 1.9204, "step": 7004 }, { "epoch": 0.23, "grad_norm": 0.46792998909950256, "learning_rate": 0.0005911096930571769, "loss": 2.0216, "step": 7005 }, { "epoch": 0.23, "grad_norm": 0.45772361755371094, "learning_rate": 0.0005911071672174787, "loss": 1.9422, "step": 7006 }, { "epoch": 0.23, "grad_norm": 0.4626213312149048, "learning_rate": 0.0005911046410244193, "loss": 1.9282, "step": 7007 }, { "epoch": 0.23, "grad_norm": 0.4561755955219269, "learning_rate": 0.0005911021144780017, "loss": 1.961, "step": 7008 }, { "epoch": 0.23, "grad_norm": 0.4696520268917084, "learning_rate": 0.000591099587578229, "loss": 1.8856, "step": 7009 }, { "epoch": 0.23, "grad_norm": 0.4625275731086731, "learning_rate": 0.0005910970603251044, "loss": 1.9425, "step": 7010 }, { "epoch": 0.23, "grad_norm": 0.48698747158050537, "learning_rate": 0.0005910945327186307, "loss": 1.954, "step": 7011 }, { "epoch": 0.23, "grad_norm": 0.4761154055595398, "learning_rate": 0.0005910920047588113, "loss": 1.9035, "step": 7012 }, { "epoch": 0.23, "grad_norm": 0.4757744371891022, "learning_rate": 0.000591089476445649, "loss": 1.8763, "step": 7013 }, { "epoch": 0.23, "grad_norm": 0.4742046594619751, "learning_rate": 0.0005910869477791469, "loss": 1.9145, "step": 7014 }, { "epoch": 0.23, "grad_norm": 0.5599613785743713, "learning_rate": 0.0005910844187593082, "loss": 1.9665, "step": 7015 }, { "epoch": 0.23, "grad_norm": 0.459525465965271, "learning_rate": 0.000591081889386136, "loss": 1.9155, "step": 7016 }, { "epoch": 0.23, "grad_norm": 0.5303286910057068, "learning_rate": 0.0005910793596596332, "loss": 1.9373, "step": 7017 }, { "epoch": 0.23, "grad_norm": 0.4589954912662506, "learning_rate": 0.0005910768295798029, "loss": 1.8844, "step": 7018 }, { "epoch": 0.23, "grad_norm": 0.46644437313079834, "learning_rate": 0.0005910742991466482, "loss": 1.8989, "step": 7019 }, { "epoch": 0.23, "grad_norm": 0.4730057120323181, "learning_rate": 0.0005910717683601723, "loss": 1.9703, "step": 7020 }, { "epoch": 0.23, "grad_norm": 0.4675314128398895, "learning_rate": 0.0005910692372203782, "loss": 1.9449, "step": 7021 }, { "epoch": 0.23, "grad_norm": 0.4882805645465851, "learning_rate": 0.0005910667057272688, "loss": 2.0001, "step": 7022 }, { "epoch": 0.23, "grad_norm": 0.4580654203891754, "learning_rate": 0.0005910641738808473, "loss": 1.9441, "step": 7023 }, { "epoch": 0.23, "grad_norm": 0.4781833291053772, "learning_rate": 0.0005910616416811169, "loss": 1.9615, "step": 7024 }, { "epoch": 0.23, "grad_norm": 0.4723731577396393, "learning_rate": 0.0005910591091280804, "loss": 1.8585, "step": 7025 }, { "epoch": 0.23, "grad_norm": 0.4791072607040405, "learning_rate": 0.0005910565762217412, "loss": 1.851, "step": 7026 }, { "epoch": 0.23, "grad_norm": 0.5381991267204285, "learning_rate": 0.0005910540429621021, "loss": 1.9799, "step": 7027 }, { "epoch": 0.23, "grad_norm": 0.4755701422691345, "learning_rate": 0.0005910515093491662, "loss": 1.9628, "step": 7028 }, { "epoch": 0.23, "grad_norm": 0.471388578414917, "learning_rate": 0.0005910489753829368, "loss": 1.9604, "step": 7029 }, { "epoch": 0.23, "grad_norm": 0.4807485342025757, "learning_rate": 0.0005910464410634167, "loss": 1.9467, "step": 7030 }, { "epoch": 0.23, "grad_norm": 0.47837257385253906, "learning_rate": 0.0005910439063906092, "loss": 1.9693, "step": 7031 }, { "epoch": 0.23, "grad_norm": 0.4806913435459137, "learning_rate": 0.0005910413713645173, "loss": 1.8917, "step": 7032 }, { "epoch": 0.23, "grad_norm": 0.5187926292419434, "learning_rate": 0.0005910388359851441, "loss": 1.9809, "step": 7033 }, { "epoch": 0.23, "grad_norm": 0.46187591552734375, "learning_rate": 0.0005910363002524924, "loss": 1.9062, "step": 7034 }, { "epoch": 0.23, "grad_norm": 0.45883452892303467, "learning_rate": 0.0005910337641665657, "loss": 1.9178, "step": 7035 }, { "epoch": 0.23, "grad_norm": 0.4860750734806061, "learning_rate": 0.0005910312277273669, "loss": 1.9554, "step": 7036 }, { "epoch": 0.23, "grad_norm": 0.46119001507759094, "learning_rate": 0.000591028690934899, "loss": 1.9787, "step": 7037 }, { "epoch": 0.23, "grad_norm": 0.47381964325904846, "learning_rate": 0.0005910261537891651, "loss": 1.9854, "step": 7038 }, { "epoch": 0.23, "grad_norm": 0.4839489758014679, "learning_rate": 0.0005910236162901685, "loss": 1.9719, "step": 7039 }, { "epoch": 0.23, "grad_norm": 0.4712524116039276, "learning_rate": 0.0005910210784379119, "loss": 1.9151, "step": 7040 }, { "epoch": 0.23, "grad_norm": 0.46500566601753235, "learning_rate": 0.0005910185402323988, "loss": 1.9462, "step": 7041 }, { "epoch": 0.23, "grad_norm": 0.47124728560447693, "learning_rate": 0.000591016001673632, "loss": 1.9772, "step": 7042 }, { "epoch": 0.23, "grad_norm": 0.5139182209968567, "learning_rate": 0.0005910134627616146, "loss": 1.9292, "step": 7043 }, { "epoch": 0.23, "grad_norm": 0.5116091966629028, "learning_rate": 0.0005910109234963497, "loss": 1.9876, "step": 7044 }, { "epoch": 0.23, "grad_norm": 0.4614759087562561, "learning_rate": 0.0005910083838778405, "loss": 1.899, "step": 7045 }, { "epoch": 0.23, "grad_norm": 0.4850243330001831, "learning_rate": 0.00059100584390609, "loss": 1.889, "step": 7046 }, { "epoch": 0.23, "grad_norm": 0.501665472984314, "learning_rate": 0.0005910033035811013, "loss": 1.928, "step": 7047 }, { "epoch": 0.23, "grad_norm": 0.46526357531547546, "learning_rate": 0.0005910007629028774, "loss": 1.9731, "step": 7048 }, { "epoch": 0.23, "grad_norm": 0.49033764004707336, "learning_rate": 0.0005909982218714215, "loss": 2.0023, "step": 7049 }, { "epoch": 0.23, "grad_norm": 0.48576244711875916, "learning_rate": 0.0005909956804867367, "loss": 1.9496, "step": 7050 }, { "epoch": 0.23, "grad_norm": 0.4959668815135956, "learning_rate": 0.0005909931387488258, "loss": 1.9879, "step": 7051 }, { "epoch": 0.23, "grad_norm": 0.47813233733177185, "learning_rate": 0.0005909905966576922, "loss": 1.9337, "step": 7052 }, { "epoch": 0.23, "grad_norm": 0.5053783655166626, "learning_rate": 0.0005909880542133391, "loss": 2.0272, "step": 7053 }, { "epoch": 0.23, "grad_norm": 0.481132835149765, "learning_rate": 0.0005909855114157691, "loss": 1.9414, "step": 7054 }, { "epoch": 0.23, "grad_norm": 0.5118699073791504, "learning_rate": 0.0005909829682649856, "loss": 1.9621, "step": 7055 }, { "epoch": 0.23, "grad_norm": 0.5178049206733704, "learning_rate": 0.0005909804247609919, "loss": 1.923, "step": 7056 }, { "epoch": 0.23, "grad_norm": 0.46236008405685425, "learning_rate": 0.0005909778809037906, "loss": 2.0154, "step": 7057 }, { "epoch": 0.23, "grad_norm": 0.49290332198143005, "learning_rate": 0.0005909753366933852, "loss": 1.8783, "step": 7058 }, { "epoch": 0.23, "grad_norm": 0.4811243712902069, "learning_rate": 0.0005909727921297784, "loss": 1.8827, "step": 7059 }, { "epoch": 0.23, "grad_norm": 0.47918838262557983, "learning_rate": 0.0005909702472129736, "loss": 1.9573, "step": 7060 }, { "epoch": 0.23, "grad_norm": 0.48230546712875366, "learning_rate": 0.0005909677019429738, "loss": 1.9981, "step": 7061 }, { "epoch": 0.23, "grad_norm": 0.4767097234725952, "learning_rate": 0.0005909651563197821, "loss": 1.9306, "step": 7062 }, { "epoch": 0.23, "grad_norm": 0.4778335690498352, "learning_rate": 0.0005909626103434016, "loss": 1.9986, "step": 7063 }, { "epoch": 0.24, "grad_norm": 0.48213234543800354, "learning_rate": 0.0005909600640138353, "loss": 1.856, "step": 7064 }, { "epoch": 0.24, "grad_norm": 0.5018220543861389, "learning_rate": 0.0005909575173310863, "loss": 1.9635, "step": 7065 }, { "epoch": 0.24, "grad_norm": 0.48594409227371216, "learning_rate": 0.0005909549702951579, "loss": 2.0548, "step": 7066 }, { "epoch": 0.24, "grad_norm": 0.4688006043434143, "learning_rate": 0.0005909524229060529, "loss": 1.8672, "step": 7067 }, { "epoch": 0.24, "grad_norm": 0.4622936546802521, "learning_rate": 0.0005909498751637746, "loss": 1.8717, "step": 7068 }, { "epoch": 0.24, "grad_norm": 0.47318583726882935, "learning_rate": 0.000590947327068326, "loss": 1.9649, "step": 7069 }, { "epoch": 0.24, "grad_norm": 0.48850759863853455, "learning_rate": 0.0005909447786197102, "loss": 1.9317, "step": 7070 }, { "epoch": 0.24, "grad_norm": 0.46879592537879944, "learning_rate": 0.0005909422298179303, "loss": 1.9508, "step": 7071 }, { "epoch": 0.24, "grad_norm": 0.4794846773147583, "learning_rate": 0.0005909396806629895, "loss": 1.8673, "step": 7072 }, { "epoch": 0.24, "grad_norm": 0.48525160551071167, "learning_rate": 0.0005909371311548907, "loss": 1.9168, "step": 7073 }, { "epoch": 0.24, "grad_norm": 0.4654107391834259, "learning_rate": 0.000590934581293637, "loss": 1.9009, "step": 7074 }, { "epoch": 0.24, "grad_norm": 0.4958961606025696, "learning_rate": 0.0005909320310792318, "loss": 1.9875, "step": 7075 }, { "epoch": 0.24, "grad_norm": 0.4673362374305725, "learning_rate": 0.0005909294805116779, "loss": 1.906, "step": 7076 }, { "epoch": 0.24, "grad_norm": 0.4729600250720978, "learning_rate": 0.0005909269295909784, "loss": 1.9172, "step": 7077 }, { "epoch": 0.24, "grad_norm": 0.4655422866344452, "learning_rate": 0.0005909243783171366, "loss": 1.9561, "step": 7078 }, { "epoch": 0.24, "grad_norm": 0.47979554533958435, "learning_rate": 0.0005909218266901552, "loss": 1.9539, "step": 7079 }, { "epoch": 0.24, "grad_norm": 0.46808066964149475, "learning_rate": 0.0005909192747100379, "loss": 1.9799, "step": 7080 }, { "epoch": 0.24, "grad_norm": 0.4684680998325348, "learning_rate": 0.0005909167223767873, "loss": 1.9301, "step": 7081 }, { "epoch": 0.24, "grad_norm": 0.47205376625061035, "learning_rate": 0.0005909141696904067, "loss": 2.0163, "step": 7082 }, { "epoch": 0.24, "grad_norm": 0.4713956117630005, "learning_rate": 0.0005909116166508991, "loss": 1.9564, "step": 7083 }, { "epoch": 0.24, "grad_norm": 0.4841204881668091, "learning_rate": 0.0005909090632582678, "loss": 1.9201, "step": 7084 }, { "epoch": 0.24, "grad_norm": 0.4681946337223053, "learning_rate": 0.0005909065095125157, "loss": 1.9433, "step": 7085 }, { "epoch": 0.24, "grad_norm": 0.4985325038433075, "learning_rate": 0.0005909039554136459, "loss": 1.9457, "step": 7086 }, { "epoch": 0.24, "grad_norm": 0.4790955185890198, "learning_rate": 0.0005909014009616617, "loss": 1.9475, "step": 7087 }, { "epoch": 0.24, "grad_norm": 0.467214971780777, "learning_rate": 0.0005908988461565659, "loss": 1.9031, "step": 7088 }, { "epoch": 0.24, "grad_norm": 0.4713096022605896, "learning_rate": 0.0005908962909983618, "loss": 1.9768, "step": 7089 }, { "epoch": 0.24, "grad_norm": 0.4915688633918762, "learning_rate": 0.0005908937354870524, "loss": 1.9765, "step": 7090 }, { "epoch": 0.24, "grad_norm": 0.46018752455711365, "learning_rate": 0.0005908911796226409, "loss": 1.9325, "step": 7091 }, { "epoch": 0.24, "grad_norm": 0.46568432450294495, "learning_rate": 0.0005908886234051305, "loss": 1.9387, "step": 7092 }, { "epoch": 0.24, "grad_norm": 0.4692758023738861, "learning_rate": 0.0005908860668345241, "loss": 1.9541, "step": 7093 }, { "epoch": 0.24, "grad_norm": 0.4671519100666046, "learning_rate": 0.000590883509910825, "loss": 1.9155, "step": 7094 }, { "epoch": 0.24, "grad_norm": 0.46857956051826477, "learning_rate": 0.0005908809526340359, "loss": 1.8838, "step": 7095 }, { "epoch": 0.24, "grad_norm": 0.4587407410144806, "learning_rate": 0.0005908783950041604, "loss": 1.9716, "step": 7096 }, { "epoch": 0.24, "grad_norm": 0.4657423794269562, "learning_rate": 0.0005908758370212013, "loss": 1.9453, "step": 7097 }, { "epoch": 0.24, "grad_norm": 0.46907415986061096, "learning_rate": 0.0005908732786851619, "loss": 2.0045, "step": 7098 }, { "epoch": 0.24, "grad_norm": 0.45528000593185425, "learning_rate": 0.0005908707199960451, "loss": 1.9566, "step": 7099 }, { "epoch": 0.24, "grad_norm": 0.4854993522167206, "learning_rate": 0.000590868160953854, "loss": 1.9423, "step": 7100 }, { "epoch": 0.24, "grad_norm": 0.4669322669506073, "learning_rate": 0.0005908656015585919, "loss": 1.975, "step": 7101 }, { "epoch": 0.24, "grad_norm": 0.4702063798904419, "learning_rate": 0.0005908630418102619, "loss": 1.9901, "step": 7102 }, { "epoch": 0.24, "grad_norm": 0.5186735987663269, "learning_rate": 0.0005908604817088669, "loss": 1.9398, "step": 7103 }, { "epoch": 0.24, "grad_norm": 0.47773006558418274, "learning_rate": 0.0005908579212544102, "loss": 1.9359, "step": 7104 }, { "epoch": 0.24, "grad_norm": 0.4783444404602051, "learning_rate": 0.0005908553604468948, "loss": 1.9253, "step": 7105 }, { "epoch": 0.24, "grad_norm": 0.48502597212791443, "learning_rate": 0.000590852799286324, "loss": 1.9326, "step": 7106 }, { "epoch": 0.24, "grad_norm": 0.4972952902317047, "learning_rate": 0.0005908502377727006, "loss": 2.0073, "step": 7107 }, { "epoch": 0.24, "grad_norm": 0.4802503287792206, "learning_rate": 0.0005908476759060279, "loss": 1.8506, "step": 7108 }, { "epoch": 0.24, "grad_norm": 0.47326216101646423, "learning_rate": 0.000590845113686309, "loss": 1.8917, "step": 7109 }, { "epoch": 0.24, "grad_norm": 0.5085257887840271, "learning_rate": 0.000590842551113547, "loss": 1.8999, "step": 7110 }, { "epoch": 0.24, "grad_norm": 0.47109150886535645, "learning_rate": 0.000590839988187745, "loss": 1.9618, "step": 7111 }, { "epoch": 0.24, "grad_norm": 0.48352932929992676, "learning_rate": 0.000590837424908906, "loss": 1.9888, "step": 7112 }, { "epoch": 0.24, "grad_norm": 0.4716511368751526, "learning_rate": 0.0005908348612770332, "loss": 1.9265, "step": 7113 }, { "epoch": 0.24, "grad_norm": 0.4886418581008911, "learning_rate": 0.0005908322972921298, "loss": 2.0634, "step": 7114 }, { "epoch": 0.24, "grad_norm": 0.489240825176239, "learning_rate": 0.0005908297329541989, "loss": 1.9541, "step": 7115 }, { "epoch": 0.24, "grad_norm": 0.5069364309310913, "learning_rate": 0.0005908271682632435, "loss": 1.8958, "step": 7116 }, { "epoch": 0.24, "grad_norm": 0.46129098534584045, "learning_rate": 0.0005908246032192668, "loss": 1.9064, "step": 7117 }, { "epoch": 0.24, "grad_norm": 0.5061575770378113, "learning_rate": 0.000590822037822272, "loss": 2.0215, "step": 7118 }, { "epoch": 0.24, "grad_norm": 0.47892558574676514, "learning_rate": 0.000590819472072262, "loss": 1.9691, "step": 7119 }, { "epoch": 0.24, "grad_norm": 0.48605719208717346, "learning_rate": 0.0005908169059692399, "loss": 1.944, "step": 7120 }, { "epoch": 0.24, "grad_norm": 0.44898781180381775, "learning_rate": 0.000590814339513209, "loss": 1.8705, "step": 7121 }, { "epoch": 0.24, "grad_norm": 0.4775930941104889, "learning_rate": 0.0005908117727041724, "loss": 1.9774, "step": 7122 }, { "epoch": 0.24, "grad_norm": 0.4845072627067566, "learning_rate": 0.000590809205542133, "loss": 1.9547, "step": 7123 }, { "epoch": 0.24, "grad_norm": 0.4564327895641327, "learning_rate": 0.0005908066380270942, "loss": 1.9414, "step": 7124 }, { "epoch": 0.24, "grad_norm": 0.46586957573890686, "learning_rate": 0.000590804070159059, "loss": 1.9306, "step": 7125 }, { "epoch": 0.24, "grad_norm": 0.4659174084663391, "learning_rate": 0.0005908015019380305, "loss": 1.9459, "step": 7126 }, { "epoch": 0.24, "grad_norm": 0.4959661364555359, "learning_rate": 0.0005907989333640117, "loss": 1.9834, "step": 7127 }, { "epoch": 0.24, "grad_norm": 0.48530709743499756, "learning_rate": 0.0005907963644370061, "loss": 1.9715, "step": 7128 }, { "epoch": 0.24, "grad_norm": 0.47203993797302246, "learning_rate": 0.0005907937951570163, "loss": 1.9913, "step": 7129 }, { "epoch": 0.24, "grad_norm": 0.47891995310783386, "learning_rate": 0.0005907912255240459, "loss": 1.9073, "step": 7130 }, { "epoch": 0.24, "grad_norm": 0.472661554813385, "learning_rate": 0.0005907886555380975, "loss": 1.9453, "step": 7131 }, { "epoch": 0.24, "grad_norm": 0.4892173409461975, "learning_rate": 0.0005907860851991747, "loss": 1.8683, "step": 7132 }, { "epoch": 0.24, "grad_norm": 0.46835756301879883, "learning_rate": 0.0005907835145072805, "loss": 1.8512, "step": 7133 }, { "epoch": 0.24, "grad_norm": 0.49077656865119934, "learning_rate": 0.0005907809434624178, "loss": 1.9581, "step": 7134 }, { "epoch": 0.24, "grad_norm": 0.5113186836242676, "learning_rate": 0.0005907783720645901, "loss": 1.9897, "step": 7135 }, { "epoch": 0.24, "grad_norm": 0.4981426000595093, "learning_rate": 0.0005907758003138001, "loss": 2.0513, "step": 7136 }, { "epoch": 0.24, "grad_norm": 0.48541831970214844, "learning_rate": 0.0005907732282100513, "loss": 1.8938, "step": 7137 }, { "epoch": 0.24, "grad_norm": 0.4677581787109375, "learning_rate": 0.0005907706557533465, "loss": 1.9318, "step": 7138 }, { "epoch": 0.24, "grad_norm": 0.47316259145736694, "learning_rate": 0.0005907680829436889, "loss": 1.9377, "step": 7139 }, { "epoch": 0.24, "grad_norm": 0.48402291536331177, "learning_rate": 0.0005907655097810819, "loss": 1.8649, "step": 7140 }, { "epoch": 0.24, "grad_norm": 0.5190715193748474, "learning_rate": 0.0005907629362655282, "loss": 1.9628, "step": 7141 }, { "epoch": 0.24, "grad_norm": 0.475752055644989, "learning_rate": 0.0005907603623970314, "loss": 1.983, "step": 7142 }, { "epoch": 0.24, "grad_norm": 0.45898324251174927, "learning_rate": 0.0005907577881755941, "loss": 1.9208, "step": 7143 }, { "epoch": 0.24, "grad_norm": 0.47904932498931885, "learning_rate": 0.0005907552136012198, "loss": 2.0026, "step": 7144 }, { "epoch": 0.24, "grad_norm": 0.5063309073448181, "learning_rate": 0.0005907526386739114, "loss": 1.9159, "step": 7145 }, { "epoch": 0.24, "grad_norm": 0.46554526686668396, "learning_rate": 0.0005907500633936724, "loss": 1.9256, "step": 7146 }, { "epoch": 0.24, "grad_norm": 0.4728107452392578, "learning_rate": 0.0005907474877605055, "loss": 1.8902, "step": 7147 }, { "epoch": 0.24, "grad_norm": 0.4754559099674225, "learning_rate": 0.0005907449117744139, "loss": 1.9242, "step": 7148 }, { "epoch": 0.24, "grad_norm": 0.4858497679233551, "learning_rate": 0.0005907423354354009, "loss": 1.9489, "step": 7149 }, { "epoch": 0.24, "grad_norm": 0.48954296112060547, "learning_rate": 0.0005907397587434696, "loss": 1.9438, "step": 7150 }, { "epoch": 0.24, "grad_norm": 0.501910924911499, "learning_rate": 0.000590737181698623, "loss": 1.9665, "step": 7151 }, { "epoch": 0.24, "grad_norm": 0.5299274921417236, "learning_rate": 0.0005907346043008644, "loss": 2.0224, "step": 7152 }, { "epoch": 0.24, "grad_norm": 0.47562918066978455, "learning_rate": 0.0005907320265501967, "loss": 1.9677, "step": 7153 }, { "epoch": 0.24, "grad_norm": 0.4865277409553528, "learning_rate": 0.0005907294484466233, "loss": 1.9405, "step": 7154 }, { "epoch": 0.24, "grad_norm": 0.47915366291999817, "learning_rate": 0.0005907268699901471, "loss": 1.9883, "step": 7155 }, { "epoch": 0.24, "grad_norm": 0.4505780339241028, "learning_rate": 0.0005907242911807713, "loss": 1.8989, "step": 7156 }, { "epoch": 0.24, "grad_norm": 0.4824995994567871, "learning_rate": 0.000590721712018499, "loss": 1.8656, "step": 7157 }, { "epoch": 0.24, "grad_norm": 0.47915080189704895, "learning_rate": 0.0005907191325033334, "loss": 1.9951, "step": 7158 }, { "epoch": 0.24, "grad_norm": 0.46077170968055725, "learning_rate": 0.0005907165526352777, "loss": 1.9039, "step": 7159 }, { "epoch": 0.24, "grad_norm": 0.4654260575771332, "learning_rate": 0.0005907139724143349, "loss": 1.8387, "step": 7160 }, { "epoch": 0.24, "grad_norm": 0.46595150232315063, "learning_rate": 0.0005907113918405081, "loss": 1.8815, "step": 7161 }, { "epoch": 0.24, "grad_norm": 0.4796988070011139, "learning_rate": 0.0005907088109138006, "loss": 1.9464, "step": 7162 }, { "epoch": 0.24, "grad_norm": 0.47953659296035767, "learning_rate": 0.0005907062296342154, "loss": 1.9452, "step": 7163 }, { "epoch": 0.24, "grad_norm": 0.47189801931381226, "learning_rate": 0.0005907036480017557, "loss": 1.8952, "step": 7164 }, { "epoch": 0.24, "grad_norm": 0.4578038454055786, "learning_rate": 0.0005907010660164245, "loss": 1.9338, "step": 7165 }, { "epoch": 0.24, "grad_norm": 0.49985471367836, "learning_rate": 0.0005906984836782251, "loss": 1.9683, "step": 7166 }, { "epoch": 0.24, "grad_norm": 0.48295265436172485, "learning_rate": 0.0005906959009871607, "loss": 1.9176, "step": 7167 }, { "epoch": 0.24, "grad_norm": 0.4758375883102417, "learning_rate": 0.0005906933179432341, "loss": 1.8496, "step": 7168 }, { "epoch": 0.24, "grad_norm": 0.4779350161552429, "learning_rate": 0.0005906907345464488, "loss": 1.95, "step": 7169 }, { "epoch": 0.24, "grad_norm": 0.5015595555305481, "learning_rate": 0.0005906881507968077, "loss": 1.9476, "step": 7170 }, { "epoch": 0.24, "grad_norm": 0.4906958341598511, "learning_rate": 0.0005906855666943141, "loss": 1.9878, "step": 7171 }, { "epoch": 0.24, "grad_norm": 0.47000575065612793, "learning_rate": 0.0005906829822389709, "loss": 1.9031, "step": 7172 }, { "epoch": 0.24, "grad_norm": 0.47493240237236023, "learning_rate": 0.0005906803974307815, "loss": 1.9501, "step": 7173 }, { "epoch": 0.24, "grad_norm": 0.48138514161109924, "learning_rate": 0.0005906778122697489, "loss": 1.8234, "step": 7174 }, { "epoch": 0.24, "grad_norm": 0.4764096736907959, "learning_rate": 0.0005906752267558763, "loss": 1.8878, "step": 7175 }, { "epoch": 0.24, "grad_norm": 0.47389087080955505, "learning_rate": 0.0005906726408891667, "loss": 1.9291, "step": 7176 }, { "epoch": 0.24, "grad_norm": 0.5013595223426819, "learning_rate": 0.0005906700546696235, "loss": 2.0062, "step": 7177 }, { "epoch": 0.24, "grad_norm": 0.47916123270988464, "learning_rate": 0.0005906674680972496, "loss": 1.8998, "step": 7178 }, { "epoch": 0.24, "grad_norm": 0.45914870500564575, "learning_rate": 0.0005906648811720482, "loss": 1.928, "step": 7179 }, { "epoch": 0.24, "grad_norm": 0.49761509895324707, "learning_rate": 0.0005906622938940224, "loss": 2.0294, "step": 7180 }, { "epoch": 0.24, "grad_norm": 0.4865241050720215, "learning_rate": 0.0005906597062631755, "loss": 1.9257, "step": 7181 }, { "epoch": 0.24, "grad_norm": 0.4552794396877289, "learning_rate": 0.0005906571182795106, "loss": 1.9595, "step": 7182 }, { "epoch": 0.24, "grad_norm": 0.4954327940940857, "learning_rate": 0.0005906545299430307, "loss": 1.9402, "step": 7183 }, { "epoch": 0.24, "grad_norm": 0.49155551195144653, "learning_rate": 0.000590651941253739, "loss": 1.9404, "step": 7184 }, { "epoch": 0.24, "grad_norm": 0.4805561900138855, "learning_rate": 0.0005906493522116387, "loss": 1.9891, "step": 7185 }, { "epoch": 0.24, "grad_norm": 0.466419517993927, "learning_rate": 0.000590646762816733, "loss": 1.8442, "step": 7186 }, { "epoch": 0.24, "grad_norm": 0.499443918466568, "learning_rate": 0.0005906441730690248, "loss": 1.957, "step": 7187 }, { "epoch": 0.24, "grad_norm": 0.48406657576560974, "learning_rate": 0.0005906415829685175, "loss": 1.9531, "step": 7188 }, { "epoch": 0.24, "grad_norm": 0.49347156286239624, "learning_rate": 0.000590638992515214, "loss": 1.928, "step": 7189 }, { "epoch": 0.24, "grad_norm": 0.4665064811706543, "learning_rate": 0.0005906364017091178, "loss": 1.8837, "step": 7190 }, { "epoch": 0.24, "grad_norm": 0.4646863043308258, "learning_rate": 0.0005906338105502318, "loss": 1.9373, "step": 7191 }, { "epoch": 0.24, "grad_norm": 0.4697533845901489, "learning_rate": 0.000590631219038559, "loss": 1.9339, "step": 7192 }, { "epoch": 0.24, "grad_norm": 0.48757463693618774, "learning_rate": 0.0005906286271741029, "loss": 1.9496, "step": 7193 }, { "epoch": 0.24, "grad_norm": 0.4559788107872009, "learning_rate": 0.0005906260349568663, "loss": 1.8763, "step": 7194 }, { "epoch": 0.24, "grad_norm": 0.5340445637702942, "learning_rate": 0.0005906234423868527, "loss": 1.9603, "step": 7195 }, { "epoch": 0.24, "grad_norm": 0.45712506771087646, "learning_rate": 0.000590620849464065, "loss": 1.9261, "step": 7196 }, { "epoch": 0.24, "grad_norm": 0.4739794433116913, "learning_rate": 0.0005906182561885063, "loss": 1.9519, "step": 7197 }, { "epoch": 0.24, "grad_norm": 0.48839855194091797, "learning_rate": 0.00059061566256018, "loss": 1.9442, "step": 7198 }, { "epoch": 0.24, "grad_norm": 0.49169906973838806, "learning_rate": 0.0005906130685790891, "loss": 2.024, "step": 7199 }, { "epoch": 0.24, "grad_norm": 0.4665173590183258, "learning_rate": 0.0005906104742452367, "loss": 1.9204, "step": 7200 }, { "epoch": 0.24, "grad_norm": 0.48843997716903687, "learning_rate": 0.000590607879558626, "loss": 2.0663, "step": 7201 }, { "epoch": 0.24, "grad_norm": 0.4679379463195801, "learning_rate": 0.0005906052845192601, "loss": 1.9426, "step": 7202 }, { "epoch": 0.24, "grad_norm": 0.46706753969192505, "learning_rate": 0.0005906026891271424, "loss": 1.9294, "step": 7203 }, { "epoch": 0.24, "grad_norm": 0.4520135223865509, "learning_rate": 0.0005906000933822756, "loss": 1.9252, "step": 7204 }, { "epoch": 0.24, "grad_norm": 0.48275136947631836, "learning_rate": 0.0005905974972846632, "loss": 2.0029, "step": 7205 }, { "epoch": 0.24, "grad_norm": 0.49118971824645996, "learning_rate": 0.0005905949008343084, "loss": 1.9615, "step": 7206 }, { "epoch": 0.24, "grad_norm": 0.4626765847206116, "learning_rate": 0.000590592304031214, "loss": 1.955, "step": 7207 }, { "epoch": 0.24, "grad_norm": 0.4878803491592407, "learning_rate": 0.0005905897068753834, "loss": 2.0392, "step": 7208 }, { "epoch": 0.24, "grad_norm": 0.4714432656764984, "learning_rate": 0.0005905871093668198, "loss": 1.9914, "step": 7209 }, { "epoch": 0.24, "grad_norm": 0.4962994158267975, "learning_rate": 0.0005905845115055263, "loss": 1.9433, "step": 7210 }, { "epoch": 0.24, "grad_norm": 0.4750944674015045, "learning_rate": 0.0005905819132915059, "loss": 1.9127, "step": 7211 }, { "epoch": 0.24, "grad_norm": 0.46182820200920105, "learning_rate": 0.0005905793147247619, "loss": 2.0006, "step": 7212 }, { "epoch": 0.24, "grad_norm": 0.4801371395587921, "learning_rate": 0.0005905767158052975, "loss": 2.0047, "step": 7213 }, { "epoch": 0.24, "grad_norm": 0.4584288001060486, "learning_rate": 0.0005905741165331157, "loss": 1.8538, "step": 7214 }, { "epoch": 0.24, "grad_norm": 0.507092297077179, "learning_rate": 0.0005905715169082197, "loss": 1.9357, "step": 7215 }, { "epoch": 0.24, "grad_norm": 0.46347475051879883, "learning_rate": 0.0005905689169306128, "loss": 1.9116, "step": 7216 }, { "epoch": 0.24, "grad_norm": 0.46252068877220154, "learning_rate": 0.0005905663166002979, "loss": 1.9129, "step": 7217 }, { "epoch": 0.24, "grad_norm": 0.4798068106174469, "learning_rate": 0.0005905637159172785, "loss": 1.9058, "step": 7218 }, { "epoch": 0.24, "grad_norm": 0.47645673155784607, "learning_rate": 0.0005905611148815575, "loss": 1.9485, "step": 7219 }, { "epoch": 0.24, "grad_norm": 0.4905371069908142, "learning_rate": 0.000590558513493138, "loss": 1.9396, "step": 7220 }, { "epoch": 0.24, "grad_norm": 0.483594685792923, "learning_rate": 0.0005905559117520234, "loss": 1.9059, "step": 7221 }, { "epoch": 0.24, "grad_norm": 0.4669198989868164, "learning_rate": 0.0005905533096582167, "loss": 1.8583, "step": 7222 }, { "epoch": 0.24, "grad_norm": 0.4633607864379883, "learning_rate": 0.0005905507072117211, "loss": 1.9127, "step": 7223 }, { "epoch": 0.24, "grad_norm": 0.491729199886322, "learning_rate": 0.0005905481044125397, "loss": 1.9864, "step": 7224 }, { "epoch": 0.24, "grad_norm": 0.4568697512149811, "learning_rate": 0.0005905455012606758, "loss": 1.8395, "step": 7225 }, { "epoch": 0.24, "grad_norm": 0.4579152464866638, "learning_rate": 0.0005905428977561325, "loss": 1.8664, "step": 7226 }, { "epoch": 0.24, "grad_norm": 0.4783923327922821, "learning_rate": 0.0005905402938989128, "loss": 1.9167, "step": 7227 }, { "epoch": 0.24, "grad_norm": 0.45827585458755493, "learning_rate": 0.00059053768968902, "loss": 1.8822, "step": 7228 }, { "epoch": 0.24, "grad_norm": 0.46981409192085266, "learning_rate": 0.0005905350851264573, "loss": 1.9633, "step": 7229 }, { "epoch": 0.24, "grad_norm": 0.48637422919273376, "learning_rate": 0.0005905324802112278, "loss": 1.8663, "step": 7230 }, { "epoch": 0.24, "grad_norm": 0.4569685459136963, "learning_rate": 0.0005905298749433347, "loss": 1.9156, "step": 7231 }, { "epoch": 0.24, "grad_norm": 0.48288464546203613, "learning_rate": 0.0005905272693227811, "loss": 1.9105, "step": 7232 }, { "epoch": 0.24, "grad_norm": 0.47096139192581177, "learning_rate": 0.0005905246633495703, "loss": 1.91, "step": 7233 }, { "epoch": 0.24, "grad_norm": 0.47308117151260376, "learning_rate": 0.0005905220570237053, "loss": 1.9624, "step": 7234 }, { "epoch": 0.24, "grad_norm": 0.46789833903312683, "learning_rate": 0.0005905194503451894, "loss": 1.9193, "step": 7235 }, { "epoch": 0.24, "grad_norm": 0.4683310389518738, "learning_rate": 0.0005905168433140256, "loss": 1.9892, "step": 7236 }, { "epoch": 0.24, "grad_norm": 0.46631744503974915, "learning_rate": 0.0005905142359302171, "loss": 1.9126, "step": 7237 }, { "epoch": 0.24, "grad_norm": 0.44637739658355713, "learning_rate": 0.0005905116281937672, "loss": 1.8538, "step": 7238 }, { "epoch": 0.24, "grad_norm": 0.504213809967041, "learning_rate": 0.000590509020104679, "loss": 1.8846, "step": 7239 }, { "epoch": 0.24, "grad_norm": 0.469373881816864, "learning_rate": 0.0005905064116629557, "loss": 1.9022, "step": 7240 }, { "epoch": 0.24, "grad_norm": 0.47349342703819275, "learning_rate": 0.0005905038028686003, "loss": 1.9302, "step": 7241 }, { "epoch": 0.24, "grad_norm": 0.4921645522117615, "learning_rate": 0.0005905011937216161, "loss": 2.0016, "step": 7242 }, { "epoch": 0.24, "grad_norm": 0.47293126583099365, "learning_rate": 0.0005904985842220064, "loss": 1.8939, "step": 7243 }, { "epoch": 0.24, "grad_norm": 0.46334701776504517, "learning_rate": 0.0005904959743697741, "loss": 1.9237, "step": 7244 }, { "epoch": 0.24, "grad_norm": 0.47954854369163513, "learning_rate": 0.0005904933641649226, "loss": 1.9262, "step": 7245 }, { "epoch": 0.24, "grad_norm": 0.5151185393333435, "learning_rate": 0.0005904907536074548, "loss": 1.8851, "step": 7246 }, { "epoch": 0.24, "grad_norm": 0.494067907333374, "learning_rate": 0.0005904881426973741, "loss": 1.9803, "step": 7247 }, { "epoch": 0.24, "grad_norm": 0.4867924451828003, "learning_rate": 0.0005904855314346837, "loss": 1.9886, "step": 7248 }, { "epoch": 0.24, "grad_norm": 0.48659324645996094, "learning_rate": 0.0005904829198193865, "loss": 1.903, "step": 7249 }, { "epoch": 0.24, "grad_norm": 0.4934762418270111, "learning_rate": 0.000590480307851486, "loss": 1.937, "step": 7250 }, { "epoch": 0.24, "grad_norm": 0.4672566056251526, "learning_rate": 0.000590477695530985, "loss": 1.955, "step": 7251 }, { "epoch": 0.24, "grad_norm": 0.47119396924972534, "learning_rate": 0.000590475082857887, "loss": 1.985, "step": 7252 }, { "epoch": 0.24, "grad_norm": 0.5077441930770874, "learning_rate": 0.000590472469832195, "loss": 2.0064, "step": 7253 }, { "epoch": 0.24, "grad_norm": 0.5226559042930603, "learning_rate": 0.0005904698564539123, "loss": 1.9594, "step": 7254 }, { "epoch": 0.24, "grad_norm": 0.46489500999450684, "learning_rate": 0.0005904672427230419, "loss": 1.9308, "step": 7255 }, { "epoch": 0.24, "grad_norm": 0.4850786328315735, "learning_rate": 0.0005904646286395872, "loss": 1.8939, "step": 7256 }, { "epoch": 0.24, "grad_norm": 0.5023227334022522, "learning_rate": 0.0005904620142035511, "loss": 1.9228, "step": 7257 }, { "epoch": 0.24, "grad_norm": 0.46516022086143494, "learning_rate": 0.000590459399414937, "loss": 1.9194, "step": 7258 }, { "epoch": 0.24, "grad_norm": 0.4581068754196167, "learning_rate": 0.0005904567842737479, "loss": 1.9376, "step": 7259 }, { "epoch": 0.24, "grad_norm": 0.480383962392807, "learning_rate": 0.0005904541687799872, "loss": 1.9627, "step": 7260 }, { "epoch": 0.24, "grad_norm": 0.4576911926269531, "learning_rate": 0.0005904515529336578, "loss": 1.9735, "step": 7261 }, { "epoch": 0.24, "grad_norm": 0.48856085538864136, "learning_rate": 0.000590448936734763, "loss": 1.909, "step": 7262 }, { "epoch": 0.24, "grad_norm": 0.46497562527656555, "learning_rate": 0.0005904463201833061, "loss": 1.8835, "step": 7263 }, { "epoch": 0.24, "grad_norm": 0.46354401111602783, "learning_rate": 0.0005904437032792901, "loss": 1.8675, "step": 7264 }, { "epoch": 0.24, "grad_norm": 0.4717491865158081, "learning_rate": 0.0005904410860227182, "loss": 1.9137, "step": 7265 }, { "epoch": 0.24, "grad_norm": 0.4676336944103241, "learning_rate": 0.0005904384684135937, "loss": 1.9205, "step": 7266 }, { "epoch": 0.24, "grad_norm": 0.4612603485584259, "learning_rate": 0.0005904358504519198, "loss": 1.9405, "step": 7267 }, { "epoch": 0.24, "grad_norm": 0.47569891810417175, "learning_rate": 0.0005904332321376993, "loss": 1.8872, "step": 7268 }, { "epoch": 0.24, "grad_norm": 0.4780339002609253, "learning_rate": 0.0005904306134709358, "loss": 2.0077, "step": 7269 }, { "epoch": 0.24, "grad_norm": 0.46953895688056946, "learning_rate": 0.0005904279944516323, "loss": 1.9211, "step": 7270 }, { "epoch": 0.24, "grad_norm": 0.47846460342407227, "learning_rate": 0.000590425375079792, "loss": 1.9263, "step": 7271 }, { "epoch": 0.24, "grad_norm": 0.4745323956012726, "learning_rate": 0.0005904227553554181, "loss": 1.9214, "step": 7272 }, { "epoch": 0.24, "grad_norm": 0.4657294750213623, "learning_rate": 0.0005904201352785138, "loss": 1.9236, "step": 7273 }, { "epoch": 0.24, "grad_norm": 0.6284319758415222, "learning_rate": 0.0005904175148490822, "loss": 1.9211, "step": 7274 }, { "epoch": 0.24, "grad_norm": 0.5056609511375427, "learning_rate": 0.0005904148940671265, "loss": 1.9257, "step": 7275 }, { "epoch": 0.24, "grad_norm": 0.467479944229126, "learning_rate": 0.00059041227293265, "loss": 2.0068, "step": 7276 }, { "epoch": 0.24, "grad_norm": 0.46422550082206726, "learning_rate": 0.0005904096514456557, "loss": 1.8987, "step": 7277 }, { "epoch": 0.24, "grad_norm": 0.45107710361480713, "learning_rate": 0.0005904070296061469, "loss": 1.9242, "step": 7278 }, { "epoch": 0.24, "grad_norm": 0.5101134181022644, "learning_rate": 0.0005904044074141268, "loss": 2.0241, "step": 7279 }, { "epoch": 0.24, "grad_norm": 0.47460901737213135, "learning_rate": 0.0005904017848695985, "loss": 1.9142, "step": 7280 }, { "epoch": 0.24, "grad_norm": 0.5051578283309937, "learning_rate": 0.0005903991619725653, "loss": 1.9362, "step": 7281 }, { "epoch": 0.24, "grad_norm": 0.4769156873226166, "learning_rate": 0.0005903965387230301, "loss": 1.9092, "step": 7282 }, { "epoch": 0.24, "grad_norm": 0.46339327096939087, "learning_rate": 0.0005903939151209965, "loss": 1.9885, "step": 7283 }, { "epoch": 0.24, "grad_norm": 0.4826498031616211, "learning_rate": 0.0005903912911664675, "loss": 2.0072, "step": 7284 }, { "epoch": 0.24, "grad_norm": 0.47277605533599854, "learning_rate": 0.0005903886668594461, "loss": 1.9183, "step": 7285 }, { "epoch": 0.24, "grad_norm": 0.4755549728870392, "learning_rate": 0.0005903860421999356, "loss": 1.9745, "step": 7286 }, { "epoch": 0.24, "grad_norm": 0.4564855098724365, "learning_rate": 0.0005903834171879395, "loss": 1.9328, "step": 7287 }, { "epoch": 0.24, "grad_norm": 0.5434036254882812, "learning_rate": 0.0005903807918234605, "loss": 1.9699, "step": 7288 }, { "epoch": 0.24, "grad_norm": 0.46228843927383423, "learning_rate": 0.000590378166106502, "loss": 1.9719, "step": 7289 }, { "epoch": 0.24, "grad_norm": 0.4590667486190796, "learning_rate": 0.0005903755400370672, "loss": 1.9368, "step": 7290 }, { "epoch": 0.24, "grad_norm": 0.4730857014656067, "learning_rate": 0.0005903729136151595, "loss": 1.9307, "step": 7291 }, { "epoch": 0.24, "grad_norm": 0.4820706844329834, "learning_rate": 0.0005903702868407816, "loss": 2.0388, "step": 7292 }, { "epoch": 0.24, "grad_norm": 0.4732425808906555, "learning_rate": 0.0005903676597139371, "loss": 1.9618, "step": 7293 }, { "epoch": 0.24, "grad_norm": 0.467255175113678, "learning_rate": 0.000590365032234629, "loss": 2.0084, "step": 7294 }, { "epoch": 0.24, "grad_norm": 0.4490678906440735, "learning_rate": 0.0005903624044028605, "loss": 1.9504, "step": 7295 }, { "epoch": 0.24, "grad_norm": 0.4761171042919159, "learning_rate": 0.0005903597762186349, "loss": 1.9536, "step": 7296 }, { "epoch": 0.24, "grad_norm": 0.47601649165153503, "learning_rate": 0.0005903571476819552, "loss": 1.9293, "step": 7297 }, { "epoch": 0.24, "grad_norm": 0.45877501368522644, "learning_rate": 0.0005903545187928247, "loss": 1.9437, "step": 7298 }, { "epoch": 0.24, "grad_norm": 0.4606202244758606, "learning_rate": 0.0005903518895512467, "loss": 1.8669, "step": 7299 }, { "epoch": 0.24, "grad_norm": 0.4604922831058502, "learning_rate": 0.0005903492599572243, "loss": 1.9372, "step": 7300 }, { "epoch": 0.24, "grad_norm": 0.4856853485107422, "learning_rate": 0.0005903466300107606, "loss": 1.8733, "step": 7301 }, { "epoch": 0.24, "grad_norm": 0.47056153416633606, "learning_rate": 0.000590343999711859, "loss": 1.8999, "step": 7302 }, { "epoch": 0.24, "grad_norm": 0.47377637028694153, "learning_rate": 0.0005903413690605225, "loss": 1.8593, "step": 7303 }, { "epoch": 0.24, "grad_norm": 0.49811550974845886, "learning_rate": 0.0005903387380567543, "loss": 1.9006, "step": 7304 }, { "epoch": 0.24, "grad_norm": 0.4731035828590393, "learning_rate": 0.0005903361067005577, "loss": 1.8676, "step": 7305 }, { "epoch": 0.24, "grad_norm": 0.4828616678714752, "learning_rate": 0.0005903334749919358, "loss": 1.8933, "step": 7306 }, { "epoch": 0.24, "grad_norm": 0.46200230717658997, "learning_rate": 0.0005903308429308919, "loss": 1.8667, "step": 7307 }, { "epoch": 0.24, "grad_norm": 0.46410250663757324, "learning_rate": 0.0005903282105174291, "loss": 1.8948, "step": 7308 }, { "epoch": 0.24, "grad_norm": 0.4669843912124634, "learning_rate": 0.0005903255777515505, "loss": 1.8813, "step": 7309 }, { "epoch": 0.24, "grad_norm": 0.49308332800865173, "learning_rate": 0.0005903229446332596, "loss": 1.8402, "step": 7310 }, { "epoch": 0.24, "grad_norm": 0.47144436836242676, "learning_rate": 0.0005903203111625594, "loss": 1.9465, "step": 7311 }, { "epoch": 0.24, "grad_norm": 0.4697073698043823, "learning_rate": 0.0005903176773394531, "loss": 1.9505, "step": 7312 }, { "epoch": 0.24, "grad_norm": 0.4768449068069458, "learning_rate": 0.0005903150431639439, "loss": 1.8731, "step": 7313 }, { "epoch": 0.24, "grad_norm": 0.4726433753967285, "learning_rate": 0.000590312408636035, "loss": 1.7422, "step": 7314 }, { "epoch": 0.24, "grad_norm": 0.48130109906196594, "learning_rate": 0.0005903097737557298, "loss": 1.8901, "step": 7315 }, { "epoch": 0.24, "grad_norm": 0.461769700050354, "learning_rate": 0.000590307138523031, "loss": 1.9162, "step": 7316 }, { "epoch": 0.24, "grad_norm": 0.47742918133735657, "learning_rate": 0.0005903045029379423, "loss": 1.9052, "step": 7317 }, { "epoch": 0.24, "grad_norm": 0.47942855954170227, "learning_rate": 0.0005903018670004668, "loss": 1.9388, "step": 7318 }, { "epoch": 0.24, "grad_norm": 0.4716070890426636, "learning_rate": 0.0005902992307106074, "loss": 1.8846, "step": 7319 }, { "epoch": 0.24, "grad_norm": 0.4767623841762543, "learning_rate": 0.0005902965940683676, "loss": 1.9209, "step": 7320 }, { "epoch": 0.24, "grad_norm": 0.4639464318752289, "learning_rate": 0.0005902939570737504, "loss": 1.9679, "step": 7321 }, { "epoch": 0.24, "grad_norm": 0.5057094693183899, "learning_rate": 0.0005902913197267592, "loss": 1.9838, "step": 7322 }, { "epoch": 0.24, "grad_norm": 0.501417338848114, "learning_rate": 0.0005902886820273971, "loss": 1.8642, "step": 7323 }, { "epoch": 0.24, "grad_norm": 0.4704440236091614, "learning_rate": 0.0005902860439756674, "loss": 1.8981, "step": 7324 }, { "epoch": 0.24, "grad_norm": 0.4968826174736023, "learning_rate": 0.0005902834055715731, "loss": 1.9393, "step": 7325 }, { "epoch": 0.24, "grad_norm": 0.480396568775177, "learning_rate": 0.0005902807668151176, "loss": 1.9623, "step": 7326 }, { "epoch": 0.24, "grad_norm": 0.4875422716140747, "learning_rate": 0.0005902781277063039, "loss": 1.9482, "step": 7327 }, { "epoch": 0.24, "grad_norm": 0.476358026266098, "learning_rate": 0.0005902754882451354, "loss": 1.9216, "step": 7328 }, { "epoch": 0.24, "grad_norm": 0.48958706855773926, "learning_rate": 0.0005902728484316153, "loss": 1.9998, "step": 7329 }, { "epoch": 0.24, "grad_norm": 0.4649438261985779, "learning_rate": 0.0005902702082657466, "loss": 1.8409, "step": 7330 }, { "epoch": 0.24, "grad_norm": 0.4579206705093384, "learning_rate": 0.0005902675677475327, "loss": 1.8762, "step": 7331 }, { "epoch": 0.24, "grad_norm": 0.47364476323127747, "learning_rate": 0.0005902649268769767, "loss": 1.9028, "step": 7332 }, { "epoch": 0.24, "grad_norm": 0.5857799649238586, "learning_rate": 0.0005902622856540819, "loss": 1.9388, "step": 7333 }, { "epoch": 0.24, "grad_norm": 0.4974212944507599, "learning_rate": 0.0005902596440788514, "loss": 1.9069, "step": 7334 }, { "epoch": 0.24, "grad_norm": 0.4822131097316742, "learning_rate": 0.0005902570021512885, "loss": 1.9262, "step": 7335 }, { "epoch": 0.24, "grad_norm": 0.47201862931251526, "learning_rate": 0.0005902543598713964, "loss": 1.9313, "step": 7336 }, { "epoch": 0.24, "grad_norm": 0.47579336166381836, "learning_rate": 0.0005902517172391782, "loss": 1.9814, "step": 7337 }, { "epoch": 0.24, "grad_norm": 0.48086944222450256, "learning_rate": 0.0005902490742546372, "loss": 1.8897, "step": 7338 }, { "epoch": 0.24, "grad_norm": 0.4869002401828766, "learning_rate": 0.0005902464309177766, "loss": 1.8803, "step": 7339 }, { "epoch": 0.24, "grad_norm": 0.47594648599624634, "learning_rate": 0.0005902437872285996, "loss": 1.9457, "step": 7340 }, { "epoch": 0.24, "grad_norm": 0.4617917537689209, "learning_rate": 0.0005902411431871094, "loss": 1.8834, "step": 7341 }, { "epoch": 0.24, "grad_norm": 0.5380831956863403, "learning_rate": 0.0005902384987933094, "loss": 1.8753, "step": 7342 }, { "epoch": 0.24, "grad_norm": 0.4847036302089691, "learning_rate": 0.0005902358540472024, "loss": 1.9146, "step": 7343 }, { "epoch": 0.24, "grad_norm": 0.6082177758216858, "learning_rate": 0.0005902332089487919, "loss": 1.9992, "step": 7344 }, { "epoch": 0.24, "grad_norm": 0.5073963403701782, "learning_rate": 0.0005902305634980811, "loss": 2.0012, "step": 7345 }, { "epoch": 0.24, "grad_norm": 0.4888169765472412, "learning_rate": 0.000590227917695073, "loss": 1.8975, "step": 7346 }, { "epoch": 0.24, "grad_norm": 0.4853806495666504, "learning_rate": 0.000590225271539771, "loss": 1.9688, "step": 7347 }, { "epoch": 0.24, "grad_norm": 0.46715617179870605, "learning_rate": 0.0005902226250321784, "loss": 1.9496, "step": 7348 }, { "epoch": 0.24, "grad_norm": 0.4798438847064972, "learning_rate": 0.0005902199781722983, "loss": 2.0021, "step": 7349 }, { "epoch": 0.24, "grad_norm": 0.4630467891693115, "learning_rate": 0.000590217330960134, "loss": 1.9687, "step": 7350 }, { "epoch": 0.24, "grad_norm": 0.48612821102142334, "learning_rate": 0.0005902146833956884, "loss": 1.9524, "step": 7351 }, { "epoch": 0.24, "grad_norm": 0.46559688448905945, "learning_rate": 0.000590212035478965, "loss": 1.9093, "step": 7352 }, { "epoch": 0.24, "grad_norm": 0.4734443426132202, "learning_rate": 0.000590209387209967, "loss": 1.9276, "step": 7353 }, { "epoch": 0.24, "grad_norm": 0.47296786308288574, "learning_rate": 0.0005902067385886975, "loss": 1.941, "step": 7354 }, { "epoch": 0.24, "grad_norm": 0.48401734232902527, "learning_rate": 0.0005902040896151598, "loss": 1.9886, "step": 7355 }, { "epoch": 0.24, "grad_norm": 0.46131637692451477, "learning_rate": 0.0005902014402893571, "loss": 1.9833, "step": 7356 }, { "epoch": 0.24, "grad_norm": 0.4837026298046112, "learning_rate": 0.0005901987906112926, "loss": 1.9572, "step": 7357 }, { "epoch": 0.24, "grad_norm": 0.4950076937675476, "learning_rate": 0.0005901961405809696, "loss": 1.9121, "step": 7358 }, { "epoch": 0.24, "grad_norm": 0.4642286002635956, "learning_rate": 0.0005901934901983911, "loss": 1.8992, "step": 7359 }, { "epoch": 0.24, "grad_norm": 0.4814976155757904, "learning_rate": 0.0005901908394635606, "loss": 1.9335, "step": 7360 }, { "epoch": 0.24, "grad_norm": 0.4975062310695648, "learning_rate": 0.0005901881883764811, "loss": 1.9172, "step": 7361 }, { "epoch": 0.24, "grad_norm": 0.4756104648113251, "learning_rate": 0.0005901855369371559, "loss": 1.8725, "step": 7362 }, { "epoch": 0.24, "grad_norm": 0.4704524576663971, "learning_rate": 0.0005901828851455883, "loss": 1.9641, "step": 7363 }, { "epoch": 0.25, "grad_norm": 0.5552813410758972, "learning_rate": 0.0005901802330017813, "loss": 1.9185, "step": 7364 }, { "epoch": 0.25, "grad_norm": 0.47392717003822327, "learning_rate": 0.0005901775805057383, "loss": 1.9891, "step": 7365 }, { "epoch": 0.25, "grad_norm": 0.5083379149436951, "learning_rate": 0.0005901749276574626, "loss": 1.9965, "step": 7366 }, { "epoch": 0.25, "grad_norm": 0.4685526192188263, "learning_rate": 0.0005901722744569572, "loss": 1.9005, "step": 7367 }, { "epoch": 0.25, "grad_norm": 0.46190521121025085, "learning_rate": 0.0005901696209042254, "loss": 1.9027, "step": 7368 }, { "epoch": 0.25, "grad_norm": 0.4875643849372864, "learning_rate": 0.0005901669669992704, "loss": 1.982, "step": 7369 }, { "epoch": 0.25, "grad_norm": 0.5033066868782043, "learning_rate": 0.0005901643127420956, "loss": 1.9889, "step": 7370 }, { "epoch": 0.25, "grad_norm": 0.4638156592845917, "learning_rate": 0.0005901616581327039, "loss": 1.917, "step": 7371 }, { "epoch": 0.25, "grad_norm": 0.46396636962890625, "learning_rate": 0.0005901590031710987, "loss": 1.8942, "step": 7372 }, { "epoch": 0.25, "grad_norm": 0.467048317193985, "learning_rate": 0.0005901563478572833, "loss": 1.9327, "step": 7373 }, { "epoch": 0.25, "grad_norm": 0.4711192846298218, "learning_rate": 0.0005901536921912609, "loss": 1.9549, "step": 7374 }, { "epoch": 0.25, "grad_norm": 0.4629039466381073, "learning_rate": 0.0005901510361730346, "loss": 1.8347, "step": 7375 }, { "epoch": 0.25, "grad_norm": 0.4683706760406494, "learning_rate": 0.0005901483798026077, "loss": 1.961, "step": 7376 }, { "epoch": 0.25, "grad_norm": 0.4719892740249634, "learning_rate": 0.0005901457230799834, "loss": 1.9667, "step": 7377 }, { "epoch": 0.25, "grad_norm": 0.4881197512149811, "learning_rate": 0.000590143066005165, "loss": 1.959, "step": 7378 }, { "epoch": 0.25, "grad_norm": 0.46353229880332947, "learning_rate": 0.0005901404085781556, "loss": 1.9732, "step": 7379 }, { "epoch": 0.25, "grad_norm": 0.4627058506011963, "learning_rate": 0.0005901377507989585, "loss": 1.9766, "step": 7380 }, { "epoch": 0.25, "grad_norm": 0.4557356834411621, "learning_rate": 0.000590135092667577, "loss": 2.0855, "step": 7381 }, { "epoch": 0.25, "grad_norm": 0.46939095854759216, "learning_rate": 0.0005901324341840143, "loss": 1.9058, "step": 7382 }, { "epoch": 0.25, "grad_norm": 0.4848373532295227, "learning_rate": 0.0005901297753482733, "loss": 1.9735, "step": 7383 }, { "epoch": 0.25, "grad_norm": 0.452120304107666, "learning_rate": 0.0005901271161603578, "loss": 1.862, "step": 7384 }, { "epoch": 0.25, "grad_norm": 0.46398213505744934, "learning_rate": 0.0005901244566202705, "loss": 1.926, "step": 7385 }, { "epoch": 0.25, "grad_norm": 0.46080446243286133, "learning_rate": 0.000590121796728015, "loss": 1.9266, "step": 7386 }, { "epoch": 0.25, "grad_norm": 0.48389893770217896, "learning_rate": 0.0005901191364835943, "loss": 2.0329, "step": 7387 }, { "epoch": 0.25, "grad_norm": 0.45233601331710815, "learning_rate": 0.0005901164758870119, "loss": 1.8919, "step": 7388 }, { "epoch": 0.25, "grad_norm": 0.45898595452308655, "learning_rate": 0.0005901138149382707, "loss": 1.888, "step": 7389 }, { "epoch": 0.25, "grad_norm": 0.461460143327713, "learning_rate": 0.000590111153637374, "loss": 1.9474, "step": 7390 }, { "epoch": 0.25, "grad_norm": 0.4580414295196533, "learning_rate": 0.0005901084919843251, "loss": 1.9303, "step": 7391 }, { "epoch": 0.25, "grad_norm": 0.46868008375167847, "learning_rate": 0.0005901058299791275, "loss": 1.9175, "step": 7392 }, { "epoch": 0.25, "grad_norm": 0.4775206446647644, "learning_rate": 0.0005901031676217839, "loss": 1.9761, "step": 7393 }, { "epoch": 0.25, "grad_norm": 0.4813472628593445, "learning_rate": 0.0005901005049122979, "loss": 1.925, "step": 7394 }, { "epoch": 0.25, "grad_norm": 0.46602097153663635, "learning_rate": 0.0005900978418506725, "loss": 2.0304, "step": 7395 }, { "epoch": 0.25, "grad_norm": 0.4786089360713959, "learning_rate": 0.0005900951784369113, "loss": 1.9409, "step": 7396 }, { "epoch": 0.25, "grad_norm": 0.4663909375667572, "learning_rate": 0.0005900925146710172, "loss": 1.8614, "step": 7397 }, { "epoch": 0.25, "grad_norm": 0.461365282535553, "learning_rate": 0.0005900898505529935, "loss": 1.9519, "step": 7398 }, { "epoch": 0.25, "grad_norm": 0.46371257305145264, "learning_rate": 0.0005900871860828435, "loss": 1.9459, "step": 7399 }, { "epoch": 0.25, "grad_norm": 0.4713985323905945, "learning_rate": 0.0005900845212605703, "loss": 1.9792, "step": 7400 }, { "epoch": 0.25, "grad_norm": 0.46601155400276184, "learning_rate": 0.0005900818560861774, "loss": 1.8829, "step": 7401 }, { "epoch": 0.25, "grad_norm": 0.4537314176559448, "learning_rate": 0.0005900791905596677, "loss": 1.9511, "step": 7402 }, { "epoch": 0.25, "grad_norm": 0.48678651452064514, "learning_rate": 0.0005900765246810447, "loss": 1.9169, "step": 7403 }, { "epoch": 0.25, "grad_norm": 0.471097469329834, "learning_rate": 0.0005900738584503116, "loss": 1.9121, "step": 7404 }, { "epoch": 0.25, "grad_norm": 0.479830265045166, "learning_rate": 0.0005900711918674714, "loss": 1.9154, "step": 7405 }, { "epoch": 0.25, "grad_norm": 0.49213430285453796, "learning_rate": 0.0005900685249325277, "loss": 1.9295, "step": 7406 }, { "epoch": 0.25, "grad_norm": 0.4920596480369568, "learning_rate": 0.0005900658576454835, "loss": 1.9522, "step": 7407 }, { "epoch": 0.25, "grad_norm": 0.4747672379016876, "learning_rate": 0.000590063190006342, "loss": 1.8794, "step": 7408 }, { "epoch": 0.25, "grad_norm": 0.5017854571342468, "learning_rate": 0.0005900605220151066, "loss": 2.0116, "step": 7409 }, { "epoch": 0.25, "grad_norm": 0.49340182542800903, "learning_rate": 0.0005900578536717805, "loss": 1.9978, "step": 7410 }, { "epoch": 0.25, "grad_norm": 0.4896044135093689, "learning_rate": 0.0005900551849763667, "loss": 1.9767, "step": 7411 }, { "epoch": 0.25, "grad_norm": 0.4729118347167969, "learning_rate": 0.0005900525159288688, "loss": 1.9791, "step": 7412 }, { "epoch": 0.25, "grad_norm": 0.4591975212097168, "learning_rate": 0.0005900498465292899, "loss": 1.8709, "step": 7413 }, { "epoch": 0.25, "grad_norm": 0.4982440769672394, "learning_rate": 0.0005900471767776332, "loss": 1.8693, "step": 7414 }, { "epoch": 0.25, "grad_norm": 0.47226661443710327, "learning_rate": 0.0005900445066739019, "loss": 1.9569, "step": 7415 }, { "epoch": 0.25, "grad_norm": 0.4570385813713074, "learning_rate": 0.0005900418362180995, "loss": 1.9291, "step": 7416 }, { "epoch": 0.25, "grad_norm": 0.4714791774749756, "learning_rate": 0.0005900391654102288, "loss": 1.9663, "step": 7417 }, { "epoch": 0.25, "grad_norm": 0.46146726608276367, "learning_rate": 0.0005900364942502934, "loss": 2.01, "step": 7418 }, { "epoch": 0.25, "grad_norm": 0.45957693457603455, "learning_rate": 0.0005900338227382964, "loss": 1.8868, "step": 7419 }, { "epoch": 0.25, "grad_norm": 0.4531714618206024, "learning_rate": 0.0005900311508742411, "loss": 1.9315, "step": 7420 }, { "epoch": 0.25, "grad_norm": 0.4895194172859192, "learning_rate": 0.0005900284786581307, "loss": 1.9188, "step": 7421 }, { "epoch": 0.25, "grad_norm": 0.4575559198856354, "learning_rate": 0.0005900258060899685, "loss": 1.9699, "step": 7422 }, { "epoch": 0.25, "grad_norm": 0.46109485626220703, "learning_rate": 0.0005900231331697577, "loss": 1.9149, "step": 7423 }, { "epoch": 0.25, "grad_norm": 0.47062405943870544, "learning_rate": 0.0005900204598975015, "loss": 2.0215, "step": 7424 }, { "epoch": 0.25, "grad_norm": 0.49796557426452637, "learning_rate": 0.0005900177862732032, "loss": 1.9354, "step": 7425 }, { "epoch": 0.25, "grad_norm": 0.48865997791290283, "learning_rate": 0.0005900151122968662, "loss": 1.9206, "step": 7426 }, { "epoch": 0.25, "grad_norm": 0.4620823264122009, "learning_rate": 0.0005900124379684934, "loss": 1.8847, "step": 7427 }, { "epoch": 0.25, "grad_norm": 0.47574347257614136, "learning_rate": 0.0005900097632880883, "loss": 2.0023, "step": 7428 }, { "epoch": 0.25, "grad_norm": 0.4916478395462036, "learning_rate": 0.000590007088255654, "loss": 1.9177, "step": 7429 }, { "epoch": 0.25, "grad_norm": 0.4863349199295044, "learning_rate": 0.0005900044128711939, "loss": 1.9295, "step": 7430 }, { "epoch": 0.25, "grad_norm": 0.4662993550300598, "learning_rate": 0.0005900017371347112, "loss": 1.8992, "step": 7431 }, { "epoch": 0.25, "grad_norm": 0.4725639522075653, "learning_rate": 0.000589999061046209, "loss": 1.974, "step": 7432 }, { "epoch": 0.25, "grad_norm": 0.4817410707473755, "learning_rate": 0.0005899963846056908, "loss": 1.9311, "step": 7433 }, { "epoch": 0.25, "grad_norm": 0.4755651652812958, "learning_rate": 0.0005899937078131597, "loss": 1.884, "step": 7434 }, { "epoch": 0.25, "grad_norm": 0.4652555584907532, "learning_rate": 0.0005899910306686189, "loss": 1.9365, "step": 7435 }, { "epoch": 0.25, "grad_norm": 0.4821747839450836, "learning_rate": 0.0005899883531720717, "loss": 1.8814, "step": 7436 }, { "epoch": 0.25, "grad_norm": 0.47358036041259766, "learning_rate": 0.0005899856753235215, "loss": 1.9605, "step": 7437 }, { "epoch": 0.25, "grad_norm": 0.45572420954704285, "learning_rate": 0.0005899829971229713, "loss": 1.9016, "step": 7438 }, { "epoch": 0.25, "grad_norm": 0.4837363362312317, "learning_rate": 0.0005899803185704245, "loss": 1.9353, "step": 7439 }, { "epoch": 0.25, "grad_norm": 0.46586859226226807, "learning_rate": 0.0005899776396658843, "loss": 1.9242, "step": 7440 }, { "epoch": 0.25, "grad_norm": 0.45993590354919434, "learning_rate": 0.000589974960409354, "loss": 1.8733, "step": 7441 }, { "epoch": 0.25, "grad_norm": 0.48070910573005676, "learning_rate": 0.0005899722808008368, "loss": 1.9467, "step": 7442 }, { "epoch": 0.25, "grad_norm": 0.4849048852920532, "learning_rate": 0.0005899696008403361, "loss": 1.8717, "step": 7443 }, { "epoch": 0.25, "grad_norm": 0.4497693181037903, "learning_rate": 0.0005899669205278548, "loss": 1.8615, "step": 7444 }, { "epoch": 0.25, "grad_norm": 0.4647083282470703, "learning_rate": 0.0005899642398633965, "loss": 1.8988, "step": 7445 }, { "epoch": 0.25, "grad_norm": 0.4644961953163147, "learning_rate": 0.0005899615588469644, "loss": 1.8772, "step": 7446 }, { "epoch": 0.25, "grad_norm": 0.45806512236595154, "learning_rate": 0.0005899588774785616, "loss": 1.9361, "step": 7447 }, { "epoch": 0.25, "grad_norm": 0.4611637592315674, "learning_rate": 0.0005899561957581914, "loss": 1.9644, "step": 7448 }, { "epoch": 0.25, "grad_norm": 0.47766241431236267, "learning_rate": 0.0005899535136858572, "loss": 1.9115, "step": 7449 }, { "epoch": 0.25, "grad_norm": 0.4575870931148529, "learning_rate": 0.0005899508312615622, "loss": 1.9421, "step": 7450 }, { "epoch": 0.25, "grad_norm": 0.4684644639492035, "learning_rate": 0.0005899481484853095, "loss": 1.9128, "step": 7451 }, { "epoch": 0.25, "grad_norm": 0.47928744554519653, "learning_rate": 0.0005899454653571026, "loss": 1.992, "step": 7452 }, { "epoch": 0.25, "grad_norm": 0.4780205190181732, "learning_rate": 0.0005899427818769446, "loss": 1.9089, "step": 7453 }, { "epoch": 0.25, "grad_norm": 0.4993824362754822, "learning_rate": 0.0005899400980448387, "loss": 1.9473, "step": 7454 }, { "epoch": 0.25, "grad_norm": 0.46243518590927124, "learning_rate": 0.0005899374138607883, "loss": 1.8848, "step": 7455 }, { "epoch": 0.25, "grad_norm": 0.4583287537097931, "learning_rate": 0.0005899347293247966, "loss": 1.8793, "step": 7456 }, { "epoch": 0.25, "grad_norm": 0.46958598494529724, "learning_rate": 0.0005899320444368668, "loss": 1.9675, "step": 7457 }, { "epoch": 0.25, "grad_norm": 0.45202502608299255, "learning_rate": 0.0005899293591970022, "loss": 1.9243, "step": 7458 }, { "epoch": 0.25, "grad_norm": 0.4925227761268616, "learning_rate": 0.0005899266736052062, "loss": 2.0404, "step": 7459 }, { "epoch": 0.25, "grad_norm": 0.49049392342567444, "learning_rate": 0.0005899239876614818, "loss": 1.9501, "step": 7460 }, { "epoch": 0.25, "grad_norm": 0.46679699420928955, "learning_rate": 0.0005899213013658325, "loss": 1.929, "step": 7461 }, { "epoch": 0.25, "grad_norm": 0.469967782497406, "learning_rate": 0.0005899186147182615, "loss": 1.9056, "step": 7462 }, { "epoch": 0.25, "grad_norm": 0.4670935869216919, "learning_rate": 0.0005899159277187719, "loss": 1.9123, "step": 7463 }, { "epoch": 0.25, "grad_norm": 0.4509499967098236, "learning_rate": 0.0005899132403673671, "loss": 1.8925, "step": 7464 }, { "epoch": 0.25, "grad_norm": 0.46070989966392517, "learning_rate": 0.0005899105526640503, "loss": 1.8871, "step": 7465 }, { "epoch": 0.25, "grad_norm": 0.47796061635017395, "learning_rate": 0.0005899078646088249, "loss": 1.84, "step": 7466 }, { "epoch": 0.25, "grad_norm": 0.48498010635375977, "learning_rate": 0.000589905176201694, "loss": 1.9332, "step": 7467 }, { "epoch": 0.25, "grad_norm": 0.4716363251209259, "learning_rate": 0.000589902487442661, "loss": 1.9233, "step": 7468 }, { "epoch": 0.25, "grad_norm": 0.44870325922966003, "learning_rate": 0.0005898997983317291, "loss": 1.911, "step": 7469 }, { "epoch": 0.25, "grad_norm": 0.4684877395629883, "learning_rate": 0.0005898971088689016, "loss": 1.9908, "step": 7470 }, { "epoch": 0.25, "grad_norm": 0.48347416520118713, "learning_rate": 0.0005898944190541815, "loss": 1.9307, "step": 7471 }, { "epoch": 0.25, "grad_norm": 0.46871182322502136, "learning_rate": 0.0005898917288875725, "loss": 1.9929, "step": 7472 }, { "epoch": 0.25, "grad_norm": 0.4660212993621826, "learning_rate": 0.0005898890383690776, "loss": 1.9505, "step": 7473 }, { "epoch": 0.25, "grad_norm": 0.45879602432250977, "learning_rate": 0.0005898863474987001, "loss": 1.9036, "step": 7474 }, { "epoch": 0.25, "grad_norm": 0.4615931212902069, "learning_rate": 0.0005898836562764434, "loss": 1.9442, "step": 7475 }, { "epoch": 0.25, "grad_norm": 0.4659854769706726, "learning_rate": 0.0005898809647023104, "loss": 1.9715, "step": 7476 }, { "epoch": 0.25, "grad_norm": 0.4755048155784607, "learning_rate": 0.0005898782727763049, "loss": 1.9978, "step": 7477 }, { "epoch": 0.25, "grad_norm": 0.4516962170600891, "learning_rate": 0.0005898755804984297, "loss": 1.9101, "step": 7478 }, { "epoch": 0.25, "grad_norm": 0.4498562216758728, "learning_rate": 0.0005898728878686884, "loss": 1.8771, "step": 7479 }, { "epoch": 0.25, "grad_norm": 0.4729721248149872, "learning_rate": 0.000589870194887084, "loss": 1.863, "step": 7480 }, { "epoch": 0.25, "grad_norm": 0.46867886185646057, "learning_rate": 0.0005898675015536199, "loss": 1.9412, "step": 7481 }, { "epoch": 0.25, "grad_norm": 0.46482911705970764, "learning_rate": 0.0005898648078682994, "loss": 1.9396, "step": 7482 }, { "epoch": 0.25, "grad_norm": 0.4592391848564148, "learning_rate": 0.0005898621138311258, "loss": 1.9185, "step": 7483 }, { "epoch": 0.25, "grad_norm": 0.4751504063606262, "learning_rate": 0.0005898594194421022, "loss": 1.9611, "step": 7484 }, { "epoch": 0.25, "grad_norm": 0.4797346889972687, "learning_rate": 0.000589856724701232, "loss": 1.905, "step": 7485 }, { "epoch": 0.25, "grad_norm": 0.4712795615196228, "learning_rate": 0.0005898540296085184, "loss": 1.978, "step": 7486 }, { "epoch": 0.25, "grad_norm": 0.4812268912792206, "learning_rate": 0.0005898513341639648, "loss": 1.9245, "step": 7487 }, { "epoch": 0.25, "grad_norm": 0.4637150168418884, "learning_rate": 0.0005898486383675744, "loss": 1.9391, "step": 7488 }, { "epoch": 0.25, "grad_norm": 0.4914623200893402, "learning_rate": 0.0005898459422193505, "loss": 2.0431, "step": 7489 }, { "epoch": 0.25, "grad_norm": 0.4573821425437927, "learning_rate": 0.0005898432457192961, "loss": 1.8863, "step": 7490 }, { "epoch": 0.25, "grad_norm": 0.47873449325561523, "learning_rate": 0.000589840548867415, "loss": 1.899, "step": 7491 }, { "epoch": 0.25, "grad_norm": 0.4524047374725342, "learning_rate": 0.0005898378516637099, "loss": 1.8632, "step": 7492 }, { "epoch": 0.25, "grad_norm": 0.46431052684783936, "learning_rate": 0.0005898351541081846, "loss": 1.9768, "step": 7493 }, { "epoch": 0.25, "grad_norm": 0.46983200311660767, "learning_rate": 0.000589832456200842, "loss": 1.8906, "step": 7494 }, { "epoch": 0.25, "grad_norm": 0.47339969873428345, "learning_rate": 0.0005898297579416855, "loss": 1.9516, "step": 7495 }, { "epoch": 0.25, "grad_norm": 0.45758241415023804, "learning_rate": 0.0005898270593307184, "loss": 1.9555, "step": 7496 }, { "epoch": 0.25, "grad_norm": 0.46162280440330505, "learning_rate": 0.0005898243603679439, "loss": 1.9203, "step": 7497 }, { "epoch": 0.25, "grad_norm": 0.4820112884044647, "learning_rate": 0.0005898216610533654, "loss": 1.9329, "step": 7498 }, { "epoch": 0.25, "grad_norm": 0.46578434109687805, "learning_rate": 0.0005898189613869862, "loss": 1.8836, "step": 7499 }, { "epoch": 0.25, "grad_norm": 0.48515480756759644, "learning_rate": 0.0005898162613688094, "loss": 2.007, "step": 7500 }, { "epoch": 0.25, "grad_norm": 0.46137532591819763, "learning_rate": 0.0005898135609988383, "loss": 1.8722, "step": 7501 }, { "epoch": 0.25, "grad_norm": 0.4852593243122101, "learning_rate": 0.0005898108602770762, "loss": 1.9995, "step": 7502 }, { "epoch": 0.25, "grad_norm": 0.4817090928554535, "learning_rate": 0.0005898081592035265, "loss": 1.9382, "step": 7503 }, { "epoch": 0.25, "grad_norm": 0.5078392028808594, "learning_rate": 0.0005898054577781924, "loss": 1.9393, "step": 7504 }, { "epoch": 0.25, "grad_norm": 0.46519342064857483, "learning_rate": 0.0005898027560010771, "loss": 1.864, "step": 7505 }, { "epoch": 0.25, "grad_norm": 0.4680989384651184, "learning_rate": 0.0005898000538721841, "loss": 1.983, "step": 7506 }, { "epoch": 0.25, "grad_norm": 0.4871290326118469, "learning_rate": 0.0005897973513915164, "loss": 1.9454, "step": 7507 }, { "epoch": 0.25, "grad_norm": 0.4688137471675873, "learning_rate": 0.0005897946485590775, "loss": 1.9379, "step": 7508 }, { "epoch": 0.25, "grad_norm": 0.5060245990753174, "learning_rate": 0.0005897919453748705, "loss": 1.9163, "step": 7509 }, { "epoch": 0.25, "grad_norm": 0.4642426669597626, "learning_rate": 0.0005897892418388989, "loss": 1.8718, "step": 7510 }, { "epoch": 0.25, "grad_norm": 0.4571703374385834, "learning_rate": 0.0005897865379511658, "loss": 1.9219, "step": 7511 }, { "epoch": 0.25, "grad_norm": 0.4687348008155823, "learning_rate": 0.0005897838337116745, "loss": 1.9736, "step": 7512 }, { "epoch": 0.25, "grad_norm": 0.45695143938064575, "learning_rate": 0.0005897811291204284, "loss": 1.9223, "step": 7513 }, { "epoch": 0.25, "grad_norm": 0.46327149868011475, "learning_rate": 0.0005897784241774306, "loss": 1.8643, "step": 7514 }, { "epoch": 0.25, "grad_norm": 0.4718669354915619, "learning_rate": 0.0005897757188826847, "loss": 1.9687, "step": 7515 }, { "epoch": 0.25, "grad_norm": 0.47927621006965637, "learning_rate": 0.0005897730132361935, "loss": 1.971, "step": 7516 }, { "epoch": 0.25, "grad_norm": 0.47223448753356934, "learning_rate": 0.0005897703072379608, "loss": 2.0074, "step": 7517 }, { "epoch": 0.25, "grad_norm": 0.4672742187976837, "learning_rate": 0.0005897676008879895, "loss": 1.9701, "step": 7518 }, { "epoch": 0.25, "grad_norm": 0.4607463479042053, "learning_rate": 0.0005897648941862831, "loss": 2.02, "step": 7519 }, { "epoch": 0.25, "grad_norm": 0.5239055752754211, "learning_rate": 0.0005897621871328447, "loss": 1.9578, "step": 7520 }, { "epoch": 0.25, "grad_norm": 0.4803909659385681, "learning_rate": 0.0005897594797276779, "loss": 1.9258, "step": 7521 }, { "epoch": 0.25, "grad_norm": 0.45428118109703064, "learning_rate": 0.0005897567719707856, "loss": 1.9407, "step": 7522 }, { "epoch": 0.25, "grad_norm": 0.48210859298706055, "learning_rate": 0.0005897540638621714, "loss": 2.0682, "step": 7523 }, { "epoch": 0.25, "grad_norm": 0.4709213078022003, "learning_rate": 0.0005897513554018383, "loss": 1.8957, "step": 7524 }, { "epoch": 0.25, "grad_norm": 0.44742274284362793, "learning_rate": 0.0005897486465897898, "loss": 1.9039, "step": 7525 }, { "epoch": 0.25, "grad_norm": 0.4687558710575104, "learning_rate": 0.0005897459374260293, "loss": 1.935, "step": 7526 }, { "epoch": 0.25, "grad_norm": 0.5271329879760742, "learning_rate": 0.0005897432279105598, "loss": 1.9103, "step": 7527 }, { "epoch": 0.25, "grad_norm": 0.47488832473754883, "learning_rate": 0.0005897405180433846, "loss": 2.0744, "step": 7528 }, { "epoch": 0.25, "grad_norm": 0.4604329466819763, "learning_rate": 0.0005897378078245073, "loss": 1.8886, "step": 7529 }, { "epoch": 0.25, "grad_norm": 0.46092867851257324, "learning_rate": 0.0005897350972539309, "loss": 1.9445, "step": 7530 }, { "epoch": 0.25, "grad_norm": 0.47401726245880127, "learning_rate": 0.0005897323863316588, "loss": 1.9022, "step": 7531 }, { "epoch": 0.25, "grad_norm": 0.4617895781993866, "learning_rate": 0.0005897296750576942, "loss": 1.9156, "step": 7532 }, { "epoch": 0.25, "grad_norm": 0.4772278070449829, "learning_rate": 0.0005897269634320406, "loss": 1.9617, "step": 7533 }, { "epoch": 0.25, "grad_norm": 0.45264965295791626, "learning_rate": 0.000589724251454701, "loss": 1.889, "step": 7534 }, { "epoch": 0.25, "grad_norm": 0.4482313394546509, "learning_rate": 0.000589721539125679, "loss": 1.8929, "step": 7535 }, { "epoch": 0.25, "grad_norm": 0.5558047294616699, "learning_rate": 0.0005897188264449777, "loss": 1.9366, "step": 7536 }, { "epoch": 0.25, "grad_norm": 0.45192065834999084, "learning_rate": 0.0005897161134126004, "loss": 1.9676, "step": 7537 }, { "epoch": 0.25, "grad_norm": 0.4723116457462311, "learning_rate": 0.0005897134000285504, "loss": 1.8529, "step": 7538 }, { "epoch": 0.25, "grad_norm": 0.4797671139240265, "learning_rate": 0.0005897106862928311, "loss": 1.9906, "step": 7539 }, { "epoch": 0.25, "grad_norm": 0.45258134603500366, "learning_rate": 0.0005897079722054456, "loss": 1.8768, "step": 7540 }, { "epoch": 0.25, "grad_norm": 0.49923616647720337, "learning_rate": 0.0005897052577663974, "loss": 1.8453, "step": 7541 }, { "epoch": 0.25, "grad_norm": 0.4815346598625183, "learning_rate": 0.0005897025429756897, "loss": 1.9882, "step": 7542 }, { "epoch": 0.25, "grad_norm": 0.466171532869339, "learning_rate": 0.0005896998278333256, "loss": 1.9492, "step": 7543 }, { "epoch": 0.25, "grad_norm": 0.4940423369407654, "learning_rate": 0.0005896971123393089, "loss": 1.9248, "step": 7544 }, { "epoch": 0.25, "grad_norm": 0.4749756157398224, "learning_rate": 0.0005896943964936425, "loss": 1.9021, "step": 7545 }, { "epoch": 0.25, "grad_norm": 0.499237596988678, "learning_rate": 0.0005896916802963296, "loss": 1.9385, "step": 7546 }, { "epoch": 0.25, "grad_norm": 0.4861237406730652, "learning_rate": 0.0005896889637473739, "loss": 1.9356, "step": 7547 }, { "epoch": 0.25, "grad_norm": 0.46383485198020935, "learning_rate": 0.0005896862468467784, "loss": 1.903, "step": 7548 }, { "epoch": 0.25, "grad_norm": 0.4595324695110321, "learning_rate": 0.0005896835295945465, "loss": 1.9304, "step": 7549 }, { "epoch": 0.25, "grad_norm": 0.48206889629364014, "learning_rate": 0.0005896808119906815, "loss": 1.8968, "step": 7550 }, { "epoch": 0.25, "grad_norm": 0.4789714813232422, "learning_rate": 0.0005896780940351866, "loss": 1.9429, "step": 7551 }, { "epoch": 0.25, "grad_norm": 0.45479804277420044, "learning_rate": 0.0005896753757280652, "loss": 1.9193, "step": 7552 }, { "epoch": 0.25, "grad_norm": 0.4537375271320343, "learning_rate": 0.0005896726570693207, "loss": 1.8358, "step": 7553 }, { "epoch": 0.25, "grad_norm": 0.46152710914611816, "learning_rate": 0.0005896699380589561, "loss": 1.9039, "step": 7554 }, { "epoch": 0.25, "grad_norm": 0.5062556862831116, "learning_rate": 0.0005896672186969749, "loss": 1.8835, "step": 7555 }, { "epoch": 0.25, "grad_norm": 0.4600462317466736, "learning_rate": 0.0005896644989833805, "loss": 1.8399, "step": 7556 }, { "epoch": 0.25, "grad_norm": 0.4635319709777832, "learning_rate": 0.000589661778918176, "loss": 1.9042, "step": 7557 }, { "epoch": 0.25, "grad_norm": 0.47806379199028015, "learning_rate": 0.0005896590585013647, "loss": 2.0463, "step": 7558 }, { "epoch": 0.25, "grad_norm": 0.4680658280849457, "learning_rate": 0.0005896563377329501, "loss": 1.8228, "step": 7559 }, { "epoch": 0.25, "grad_norm": 0.46537721157073975, "learning_rate": 0.0005896536166129354, "loss": 1.8474, "step": 7560 }, { "epoch": 0.25, "grad_norm": 0.4795937240123749, "learning_rate": 0.0005896508951413239, "loss": 1.855, "step": 7561 }, { "epoch": 0.25, "grad_norm": 0.4553268551826477, "learning_rate": 0.0005896481733181188, "loss": 1.9488, "step": 7562 }, { "epoch": 0.25, "grad_norm": 0.4692901074886322, "learning_rate": 0.0005896454511433234, "loss": 1.9672, "step": 7563 }, { "epoch": 0.25, "grad_norm": 0.44989222288131714, "learning_rate": 0.0005896427286169413, "loss": 1.952, "step": 7564 }, { "epoch": 0.25, "grad_norm": 0.4799482226371765, "learning_rate": 0.0005896400057389755, "loss": 1.8826, "step": 7565 }, { "epoch": 0.25, "grad_norm": 0.47454673051834106, "learning_rate": 0.0005896372825094294, "loss": 1.969, "step": 7566 }, { "epoch": 0.25, "grad_norm": 0.4584297239780426, "learning_rate": 0.0005896345589283064, "loss": 1.923, "step": 7567 }, { "epoch": 0.25, "grad_norm": 0.4547164738178253, "learning_rate": 0.0005896318349956097, "loss": 1.9064, "step": 7568 }, { "epoch": 0.25, "grad_norm": 0.4398873746395111, "learning_rate": 0.0005896291107113426, "loss": 1.9105, "step": 7569 }, { "epoch": 0.25, "grad_norm": 0.4674394130706787, "learning_rate": 0.0005896263860755085, "loss": 1.9193, "step": 7570 }, { "epoch": 0.25, "grad_norm": 0.47740185260772705, "learning_rate": 0.0005896236610881104, "loss": 1.9128, "step": 7571 }, { "epoch": 0.25, "grad_norm": 0.4641343951225281, "learning_rate": 0.0005896209357491521, "loss": 1.9258, "step": 7572 }, { "epoch": 0.25, "grad_norm": 0.49537962675094604, "learning_rate": 0.0005896182100586366, "loss": 1.9273, "step": 7573 }, { "epoch": 0.25, "grad_norm": 0.4871475398540497, "learning_rate": 0.0005896154840165672, "loss": 1.8779, "step": 7574 }, { "epoch": 0.25, "grad_norm": 0.4816129207611084, "learning_rate": 0.0005896127576229474, "loss": 1.9258, "step": 7575 }, { "epoch": 0.25, "grad_norm": 0.49443385004997253, "learning_rate": 0.0005896100308777803, "loss": 1.9912, "step": 7576 }, { "epoch": 0.25, "grad_norm": 0.49602046608924866, "learning_rate": 0.0005896073037810693, "loss": 1.9762, "step": 7577 }, { "epoch": 0.25, "grad_norm": 0.49452710151672363, "learning_rate": 0.0005896045763328177, "loss": 1.9798, "step": 7578 }, { "epoch": 0.25, "grad_norm": 0.4682186245918274, "learning_rate": 0.0005896018485330288, "loss": 1.9044, "step": 7579 }, { "epoch": 0.25, "grad_norm": 0.48402538895606995, "learning_rate": 0.0005895991203817059, "loss": 1.92, "step": 7580 }, { "epoch": 0.25, "grad_norm": 0.4767116606235504, "learning_rate": 0.0005895963918788525, "loss": 1.8729, "step": 7581 }, { "epoch": 0.25, "grad_norm": 0.464480459690094, "learning_rate": 0.0005895936630244716, "loss": 1.8183, "step": 7582 }, { "epoch": 0.25, "grad_norm": 0.4753304719924927, "learning_rate": 0.0005895909338185667, "loss": 1.9147, "step": 7583 }, { "epoch": 0.25, "grad_norm": 0.4635498523712158, "learning_rate": 0.000589588204261141, "loss": 1.9206, "step": 7584 }, { "epoch": 0.25, "grad_norm": 0.4687578082084656, "learning_rate": 0.000589585474352198, "loss": 1.9391, "step": 7585 }, { "epoch": 0.25, "grad_norm": 0.45833292603492737, "learning_rate": 0.000589582744091741, "loss": 1.9533, "step": 7586 }, { "epoch": 0.25, "grad_norm": 0.4621984660625458, "learning_rate": 0.0005895800134797731, "loss": 2.0039, "step": 7587 }, { "epoch": 0.25, "grad_norm": 0.4603588283061981, "learning_rate": 0.0005895772825162977, "loss": 1.9163, "step": 7588 }, { "epoch": 0.25, "grad_norm": 0.4755050539970398, "learning_rate": 0.0005895745512013181, "loss": 1.999, "step": 7589 }, { "epoch": 0.25, "grad_norm": 0.4741610288619995, "learning_rate": 0.0005895718195348377, "loss": 1.8742, "step": 7590 }, { "epoch": 0.25, "grad_norm": 0.4613668918609619, "learning_rate": 0.0005895690875168598, "loss": 1.985, "step": 7591 }, { "epoch": 0.25, "grad_norm": 0.5772428512573242, "learning_rate": 0.0005895663551473877, "loss": 1.9407, "step": 7592 }, { "epoch": 0.25, "grad_norm": 0.4837901294231415, "learning_rate": 0.0005895636224264247, "loss": 1.8898, "step": 7593 }, { "epoch": 0.25, "grad_norm": 0.47185179591178894, "learning_rate": 0.0005895608893539742, "loss": 1.9168, "step": 7594 }, { "epoch": 0.25, "grad_norm": 0.46998417377471924, "learning_rate": 0.0005895581559300394, "loss": 1.8999, "step": 7595 }, { "epoch": 0.25, "grad_norm": 0.48180708289146423, "learning_rate": 0.0005895554221546236, "loss": 1.946, "step": 7596 }, { "epoch": 0.25, "grad_norm": 0.46590250730514526, "learning_rate": 0.0005895526880277302, "loss": 1.8651, "step": 7597 }, { "epoch": 0.25, "grad_norm": 0.4510563611984253, "learning_rate": 0.0005895499535493626, "loss": 1.9298, "step": 7598 }, { "epoch": 0.25, "grad_norm": 0.4751099646091461, "learning_rate": 0.0005895472187195239, "loss": 1.9141, "step": 7599 }, { "epoch": 0.25, "grad_norm": 0.4693690538406372, "learning_rate": 0.0005895444835382176, "loss": 1.9767, "step": 7600 }, { "epoch": 0.25, "grad_norm": 0.45071858167648315, "learning_rate": 0.0005895417480054469, "loss": 1.8997, "step": 7601 }, { "epoch": 0.25, "grad_norm": 0.4764380156993866, "learning_rate": 0.0005895390121212153, "loss": 1.9336, "step": 7602 }, { "epoch": 0.25, "grad_norm": 0.4601948857307434, "learning_rate": 0.0005895362758855259, "loss": 1.9354, "step": 7603 }, { "epoch": 0.25, "grad_norm": 0.46871834993362427, "learning_rate": 0.0005895335392983822, "loss": 1.8844, "step": 7604 }, { "epoch": 0.25, "grad_norm": 0.4557226002216339, "learning_rate": 0.0005895308023597873, "loss": 1.9181, "step": 7605 }, { "epoch": 0.25, "grad_norm": 0.47065362334251404, "learning_rate": 0.0005895280650697447, "loss": 1.9819, "step": 7606 }, { "epoch": 0.25, "grad_norm": 0.4681626558303833, "learning_rate": 0.0005895253274282578, "loss": 1.8968, "step": 7607 }, { "epoch": 0.25, "grad_norm": 0.4751245677471161, "learning_rate": 0.0005895225894353298, "loss": 1.9887, "step": 7608 }, { "epoch": 0.25, "grad_norm": 0.46128612756729126, "learning_rate": 0.000589519851090964, "loss": 1.9313, "step": 7609 }, { "epoch": 0.25, "grad_norm": 0.5716306567192078, "learning_rate": 0.0005895171123951637, "loss": 1.9916, "step": 7610 }, { "epoch": 0.25, "grad_norm": 0.48166143894195557, "learning_rate": 0.0005895143733479324, "loss": 1.8986, "step": 7611 }, { "epoch": 0.25, "grad_norm": 0.474965363740921, "learning_rate": 0.0005895116339492732, "loss": 1.8759, "step": 7612 }, { "epoch": 0.25, "grad_norm": 0.4646449685096741, "learning_rate": 0.0005895088941991896, "loss": 2.0096, "step": 7613 }, { "epoch": 0.25, "grad_norm": 0.48670539259910583, "learning_rate": 0.0005895061540976849, "loss": 1.9768, "step": 7614 }, { "epoch": 0.25, "grad_norm": 0.48586711287498474, "learning_rate": 0.0005895034136447622, "loss": 1.8886, "step": 7615 }, { "epoch": 0.25, "grad_norm": 0.4564567506313324, "learning_rate": 0.0005895006728404253, "loss": 1.9208, "step": 7616 }, { "epoch": 0.25, "grad_norm": 0.5046635866165161, "learning_rate": 0.000589497931684677, "loss": 1.8328, "step": 7617 }, { "epoch": 0.25, "grad_norm": 0.4862954914569855, "learning_rate": 0.000589495190177521, "loss": 1.9268, "step": 7618 }, { "epoch": 0.25, "grad_norm": 0.46575385332107544, "learning_rate": 0.0005894924483189604, "loss": 1.9198, "step": 7619 }, { "epoch": 0.25, "grad_norm": 0.47829705476760864, "learning_rate": 0.0005894897061089986, "loss": 1.9314, "step": 7620 }, { "epoch": 0.25, "grad_norm": 0.4905634820461273, "learning_rate": 0.000589486963547639, "loss": 1.9715, "step": 7621 }, { "epoch": 0.25, "grad_norm": 0.4862491190433502, "learning_rate": 0.000589484220634885, "loss": 1.9543, "step": 7622 }, { "epoch": 0.25, "grad_norm": 0.5016033053398132, "learning_rate": 0.0005894814773707396, "loss": 1.9461, "step": 7623 }, { "epoch": 0.25, "grad_norm": 0.4608159363269806, "learning_rate": 0.0005894787337552065, "loss": 1.9287, "step": 7624 }, { "epoch": 0.25, "grad_norm": 0.47970420122146606, "learning_rate": 0.0005894759897882888, "loss": 1.9357, "step": 7625 }, { "epoch": 0.25, "grad_norm": 0.6027682423591614, "learning_rate": 0.0005894732454699899, "loss": 1.9091, "step": 7626 }, { "epoch": 0.25, "grad_norm": 0.48076167702674866, "learning_rate": 0.0005894705008003131, "loss": 1.9622, "step": 7627 }, { "epoch": 0.25, "grad_norm": 0.48482048511505127, "learning_rate": 0.0005894677557792618, "loss": 1.9724, "step": 7628 }, { "epoch": 0.25, "grad_norm": 0.4807933270931244, "learning_rate": 0.0005894650104068393, "loss": 1.9602, "step": 7629 }, { "epoch": 0.25, "grad_norm": 0.4772690236568451, "learning_rate": 0.000589462264683049, "loss": 1.901, "step": 7630 }, { "epoch": 0.25, "grad_norm": 0.4701346158981323, "learning_rate": 0.0005894595186078941, "loss": 1.907, "step": 7631 }, { "epoch": 0.25, "grad_norm": 0.48812419176101685, "learning_rate": 0.0005894567721813779, "loss": 1.9921, "step": 7632 }, { "epoch": 0.25, "grad_norm": 0.49147236347198486, "learning_rate": 0.000589454025403504, "loss": 1.8959, "step": 7633 }, { "epoch": 0.25, "grad_norm": 0.45745813846588135, "learning_rate": 0.0005894512782742754, "loss": 1.9475, "step": 7634 }, { "epoch": 0.25, "grad_norm": 0.4647136926651001, "learning_rate": 0.0005894485307936957, "loss": 1.9589, "step": 7635 }, { "epoch": 0.25, "grad_norm": 0.46786993741989136, "learning_rate": 0.0005894457829617681, "loss": 1.9638, "step": 7636 }, { "epoch": 0.25, "grad_norm": 0.4917192757129669, "learning_rate": 0.000589443034778496, "loss": 1.8239, "step": 7637 }, { "epoch": 0.25, "grad_norm": 0.46970272064208984, "learning_rate": 0.0005894402862438826, "loss": 1.8984, "step": 7638 }, { "epoch": 0.25, "grad_norm": 0.46492478251457214, "learning_rate": 0.0005894375373579315, "loss": 1.9454, "step": 7639 }, { "epoch": 0.25, "grad_norm": 0.5290044546127319, "learning_rate": 0.0005894347881206458, "loss": 1.9492, "step": 7640 }, { "epoch": 0.25, "grad_norm": 0.46307867765426636, "learning_rate": 0.0005894320385320289, "loss": 1.8999, "step": 7641 }, { "epoch": 0.25, "grad_norm": 0.4676288366317749, "learning_rate": 0.0005894292885920841, "loss": 1.869, "step": 7642 }, { "epoch": 0.25, "grad_norm": 0.5153958797454834, "learning_rate": 0.0005894265383008148, "loss": 1.9172, "step": 7643 }, { "epoch": 0.25, "grad_norm": 0.4538860321044922, "learning_rate": 0.0005894237876582244, "loss": 1.9111, "step": 7644 }, { "epoch": 0.25, "grad_norm": 0.4759519398212433, "learning_rate": 0.0005894210366643161, "loss": 1.9699, "step": 7645 }, { "epoch": 0.25, "grad_norm": 0.4673496186733246, "learning_rate": 0.0005894182853190934, "loss": 1.907, "step": 7646 }, { "epoch": 0.25, "grad_norm": 0.4604516923427582, "learning_rate": 0.0005894155336225595, "loss": 1.9642, "step": 7647 }, { "epoch": 0.25, "grad_norm": 0.4696701765060425, "learning_rate": 0.0005894127815747177, "loss": 1.9277, "step": 7648 }, { "epoch": 0.25, "grad_norm": 0.46269717812538147, "learning_rate": 0.0005894100291755715, "loss": 1.8754, "step": 7649 }, { "epoch": 0.25, "grad_norm": 0.45055603981018066, "learning_rate": 0.0005894072764251242, "loss": 1.924, "step": 7650 }, { "epoch": 0.25, "grad_norm": 0.46678271889686584, "learning_rate": 0.0005894045233233791, "loss": 1.9646, "step": 7651 }, { "epoch": 0.25, "grad_norm": 0.45511436462402344, "learning_rate": 0.0005894017698703395, "loss": 1.922, "step": 7652 }, { "epoch": 0.25, "grad_norm": 0.44899868965148926, "learning_rate": 0.0005893990160660089, "loss": 1.9214, "step": 7653 }, { "epoch": 0.25, "grad_norm": 0.47009414434432983, "learning_rate": 0.0005893962619103905, "loss": 1.9426, "step": 7654 }, { "epoch": 0.25, "grad_norm": 0.45252251625061035, "learning_rate": 0.0005893935074034875, "loss": 1.9262, "step": 7655 }, { "epoch": 0.25, "grad_norm": 0.4699183404445648, "learning_rate": 0.0005893907525453037, "loss": 1.9752, "step": 7656 }, { "epoch": 0.25, "grad_norm": 0.4590153396129608, "learning_rate": 0.000589387997335842, "loss": 1.9201, "step": 7657 }, { "epoch": 0.25, "grad_norm": 0.4751994013786316, "learning_rate": 0.0005893852417751059, "loss": 1.8838, "step": 7658 }, { "epoch": 0.25, "grad_norm": 0.4911034107208252, "learning_rate": 0.0005893824858630987, "loss": 1.8414, "step": 7659 }, { "epoch": 0.25, "grad_norm": 0.4859771430492401, "learning_rate": 0.0005893797295998239, "loss": 1.9662, "step": 7660 }, { "epoch": 0.25, "grad_norm": 0.4661692976951599, "learning_rate": 0.0005893769729852848, "loss": 1.9632, "step": 7661 }, { "epoch": 0.25, "grad_norm": 0.47786226868629456, "learning_rate": 0.0005893742160194846, "loss": 1.9443, "step": 7662 }, { "epoch": 0.25, "grad_norm": 0.4750540554523468, "learning_rate": 0.0005893714587024267, "loss": 1.9252, "step": 7663 }, { "epoch": 0.25, "grad_norm": 0.4511098861694336, "learning_rate": 0.0005893687010341146, "loss": 1.866, "step": 7664 }, { "epoch": 0.26, "grad_norm": 0.4660460352897644, "learning_rate": 0.0005893659430145514, "loss": 1.9632, "step": 7665 }, { "epoch": 0.26, "grad_norm": 0.45510703325271606, "learning_rate": 0.0005893631846437408, "loss": 1.969, "step": 7666 }, { "epoch": 0.26, "grad_norm": 0.4891083836555481, "learning_rate": 0.0005893604259216858, "loss": 2.023, "step": 7667 }, { "epoch": 0.26, "grad_norm": 0.471953809261322, "learning_rate": 0.0005893576668483899, "loss": 1.9133, "step": 7668 }, { "epoch": 0.26, "grad_norm": 0.45441749691963196, "learning_rate": 0.0005893549074238564, "loss": 1.9303, "step": 7669 }, { "epoch": 0.26, "grad_norm": 0.4589579105377197, "learning_rate": 0.0005893521476480887, "loss": 1.9557, "step": 7670 }, { "epoch": 0.26, "grad_norm": 0.5745034217834473, "learning_rate": 0.00058934938752109, "loss": 1.9017, "step": 7671 }, { "epoch": 0.26, "grad_norm": 0.47195249795913696, "learning_rate": 0.000589346627042864, "loss": 1.9684, "step": 7672 }, { "epoch": 0.26, "grad_norm": 0.46724140644073486, "learning_rate": 0.0005893438662134136, "loss": 1.9019, "step": 7673 }, { "epoch": 0.26, "grad_norm": 0.4565567076206207, "learning_rate": 0.0005893411050327425, "loss": 1.9165, "step": 7674 }, { "epoch": 0.26, "grad_norm": 0.45553767681121826, "learning_rate": 0.000589338343500854, "loss": 1.9179, "step": 7675 }, { "epoch": 0.26, "grad_norm": 0.45543354749679565, "learning_rate": 0.0005893355816177512, "loss": 1.8744, "step": 7676 }, { "epoch": 0.26, "grad_norm": 0.49711015820503235, "learning_rate": 0.0005893328193834378, "loss": 1.9037, "step": 7677 }, { "epoch": 0.26, "grad_norm": 0.461565226316452, "learning_rate": 0.0005893300567979168, "loss": 1.9145, "step": 7678 }, { "epoch": 0.26, "grad_norm": 0.46537530422210693, "learning_rate": 0.0005893272938611921, "loss": 1.9079, "step": 7679 }, { "epoch": 0.26, "grad_norm": 0.4730452299118042, "learning_rate": 0.0005893245305732663, "loss": 2.0086, "step": 7680 }, { "epoch": 0.26, "grad_norm": 0.4546991288661957, "learning_rate": 0.0005893217669341433, "loss": 1.9258, "step": 7681 }, { "epoch": 0.26, "grad_norm": 0.4617752730846405, "learning_rate": 0.0005893190029438264, "loss": 1.8529, "step": 7682 }, { "epoch": 0.26, "grad_norm": 0.465282142162323, "learning_rate": 0.0005893162386023188, "loss": 1.9085, "step": 7683 }, { "epoch": 0.26, "grad_norm": 0.4571429193019867, "learning_rate": 0.0005893134739096238, "loss": 1.9337, "step": 7684 }, { "epoch": 0.26, "grad_norm": 0.47723373770713806, "learning_rate": 0.0005893107088657451, "loss": 1.9511, "step": 7685 }, { "epoch": 0.26, "grad_norm": 0.4649870693683624, "learning_rate": 0.0005893079434706857, "loss": 1.9178, "step": 7686 }, { "epoch": 0.26, "grad_norm": 0.4853307008743286, "learning_rate": 0.000589305177724449, "loss": 1.9309, "step": 7687 }, { "epoch": 0.26, "grad_norm": 0.5050675272941589, "learning_rate": 0.0005893024116270385, "loss": 1.9349, "step": 7688 }, { "epoch": 0.26, "grad_norm": 0.4562227725982666, "learning_rate": 0.0005892996451784575, "loss": 1.8773, "step": 7689 }, { "epoch": 0.26, "grad_norm": 0.45918798446655273, "learning_rate": 0.0005892968783787094, "loss": 1.9809, "step": 7690 }, { "epoch": 0.26, "grad_norm": 0.4666970670223236, "learning_rate": 0.0005892941112277976, "loss": 1.932, "step": 7691 }, { "epoch": 0.26, "grad_norm": 0.47080811858177185, "learning_rate": 0.0005892913437257251, "loss": 1.9265, "step": 7692 }, { "epoch": 0.26, "grad_norm": 0.4717026948928833, "learning_rate": 0.0005892885758724957, "loss": 1.9201, "step": 7693 }, { "epoch": 0.26, "grad_norm": 0.5218877196311951, "learning_rate": 0.0005892858076681127, "loss": 2.0358, "step": 7694 }, { "epoch": 0.26, "grad_norm": 0.4781363308429718, "learning_rate": 0.0005892830391125792, "loss": 1.8811, "step": 7695 }, { "epoch": 0.26, "grad_norm": 0.4767782688140869, "learning_rate": 0.0005892802702058987, "loss": 1.8862, "step": 7696 }, { "epoch": 0.26, "grad_norm": 0.48178499937057495, "learning_rate": 0.0005892775009480747, "loss": 1.974, "step": 7697 }, { "epoch": 0.26, "grad_norm": 0.4649920165538788, "learning_rate": 0.0005892747313391103, "loss": 1.9091, "step": 7698 }, { "epoch": 0.26, "grad_norm": 0.4498368501663208, "learning_rate": 0.0005892719613790091, "loss": 1.8879, "step": 7699 }, { "epoch": 0.26, "grad_norm": 0.5067738890647888, "learning_rate": 0.0005892691910677744, "loss": 1.8835, "step": 7700 }, { "epoch": 0.26, "grad_norm": 0.46799910068511963, "learning_rate": 0.0005892664204054093, "loss": 1.8733, "step": 7701 }, { "epoch": 0.26, "grad_norm": 0.48469868302345276, "learning_rate": 0.0005892636493919177, "loss": 1.9916, "step": 7702 }, { "epoch": 0.26, "grad_norm": 0.4617621898651123, "learning_rate": 0.0005892608780273024, "loss": 1.9305, "step": 7703 }, { "epoch": 0.26, "grad_norm": 0.4633120596408844, "learning_rate": 0.0005892581063115672, "loss": 1.9491, "step": 7704 }, { "epoch": 0.26, "grad_norm": 0.4937560558319092, "learning_rate": 0.0005892553342447151, "loss": 1.9435, "step": 7705 }, { "epoch": 0.26, "grad_norm": 0.50074303150177, "learning_rate": 0.0005892525618267498, "loss": 1.9227, "step": 7706 }, { "epoch": 0.26, "grad_norm": 0.466481477022171, "learning_rate": 0.0005892497890576744, "loss": 1.9127, "step": 7707 }, { "epoch": 0.26, "grad_norm": 0.4459232687950134, "learning_rate": 0.0005892470159374924, "loss": 1.8639, "step": 7708 }, { "epoch": 0.26, "grad_norm": 0.5094759464263916, "learning_rate": 0.0005892442424662072, "loss": 1.968, "step": 7709 }, { "epoch": 0.26, "grad_norm": 0.48581841588020325, "learning_rate": 0.0005892414686438221, "loss": 1.8906, "step": 7710 }, { "epoch": 0.26, "grad_norm": 0.45230796933174133, "learning_rate": 0.0005892386944703404, "loss": 1.8293, "step": 7711 }, { "epoch": 0.26, "grad_norm": 0.49962401390075684, "learning_rate": 0.0005892359199457656, "loss": 1.9343, "step": 7712 }, { "epoch": 0.26, "grad_norm": 0.4863581955432892, "learning_rate": 0.000589233145070101, "loss": 1.9374, "step": 7713 }, { "epoch": 0.26, "grad_norm": 0.4760194718837738, "learning_rate": 0.0005892303698433499, "loss": 1.9447, "step": 7714 }, { "epoch": 0.26, "grad_norm": 0.4577098786830902, "learning_rate": 0.0005892275942655158, "loss": 1.845, "step": 7715 }, { "epoch": 0.26, "grad_norm": 0.4681660234928131, "learning_rate": 0.000589224818336602, "loss": 1.9587, "step": 7716 }, { "epoch": 0.26, "grad_norm": 0.4656054973602295, "learning_rate": 0.000589222042056612, "loss": 1.9423, "step": 7717 }, { "epoch": 0.26, "grad_norm": 0.48073670268058777, "learning_rate": 0.000589219265425549, "loss": 1.8251, "step": 7718 }, { "epoch": 0.26, "grad_norm": 0.4603132903575897, "learning_rate": 0.0005892164884434164, "loss": 1.8812, "step": 7719 }, { "epoch": 0.26, "grad_norm": 0.45391610264778137, "learning_rate": 0.0005892137111102176, "loss": 1.9153, "step": 7720 }, { "epoch": 0.26, "grad_norm": 0.45641958713531494, "learning_rate": 0.000589210933425956, "loss": 1.9173, "step": 7721 }, { "epoch": 0.26, "grad_norm": 0.4728119671344757, "learning_rate": 0.0005892081553906348, "loss": 1.9276, "step": 7722 }, { "epoch": 0.26, "grad_norm": 0.4836510717868805, "learning_rate": 0.0005892053770042577, "loss": 1.9998, "step": 7723 }, { "epoch": 0.26, "grad_norm": 0.47607219219207764, "learning_rate": 0.0005892025982668277, "loss": 1.9168, "step": 7724 }, { "epoch": 0.26, "grad_norm": 0.4867776930332184, "learning_rate": 0.0005891998191783485, "loss": 1.8943, "step": 7725 }, { "epoch": 0.26, "grad_norm": 0.46492981910705566, "learning_rate": 0.0005891970397388233, "loss": 1.8405, "step": 7726 }, { "epoch": 0.26, "grad_norm": 0.4699588119983673, "learning_rate": 0.0005891942599482554, "loss": 1.8973, "step": 7727 }, { "epoch": 0.26, "grad_norm": 0.4760854244232178, "learning_rate": 0.0005891914798066485, "loss": 1.9563, "step": 7728 }, { "epoch": 0.26, "grad_norm": 0.4488527476787567, "learning_rate": 0.0005891886993140056, "loss": 1.9366, "step": 7729 }, { "epoch": 0.26, "grad_norm": 0.4657312333583832, "learning_rate": 0.0005891859184703303, "loss": 1.9132, "step": 7730 }, { "epoch": 0.26, "grad_norm": 0.46267175674438477, "learning_rate": 0.0005891831372756259, "loss": 1.9614, "step": 7731 }, { "epoch": 0.26, "grad_norm": 0.4592669904232025, "learning_rate": 0.0005891803557298958, "loss": 1.9963, "step": 7732 }, { "epoch": 0.26, "grad_norm": 0.4541056454181671, "learning_rate": 0.0005891775738331432, "loss": 1.8788, "step": 7733 }, { "epoch": 0.26, "grad_norm": 0.4569350481033325, "learning_rate": 0.0005891747915853717, "loss": 1.9799, "step": 7734 }, { "epoch": 0.26, "grad_norm": 0.5410537719726562, "learning_rate": 0.0005891720089865847, "loss": 1.8928, "step": 7735 }, { "epoch": 0.26, "grad_norm": 0.47305047512054443, "learning_rate": 0.0005891692260367854, "loss": 1.8975, "step": 7736 }, { "epoch": 0.26, "grad_norm": 0.46677449345588684, "learning_rate": 0.0005891664427359773, "loss": 1.8649, "step": 7737 }, { "epoch": 0.26, "grad_norm": 0.45548245310783386, "learning_rate": 0.0005891636590841637, "loss": 1.8925, "step": 7738 }, { "epoch": 0.26, "grad_norm": 0.4823857247829437, "learning_rate": 0.0005891608750813481, "loss": 1.8629, "step": 7739 }, { "epoch": 0.26, "grad_norm": 0.4564216136932373, "learning_rate": 0.0005891580907275338, "loss": 1.8576, "step": 7740 }, { "epoch": 0.26, "grad_norm": 0.46428799629211426, "learning_rate": 0.0005891553060227241, "loss": 1.9533, "step": 7741 }, { "epoch": 0.26, "grad_norm": 0.46678730845451355, "learning_rate": 0.0005891525209669226, "loss": 1.9284, "step": 7742 }, { "epoch": 0.26, "grad_norm": 0.4813348948955536, "learning_rate": 0.0005891497355601325, "loss": 1.882, "step": 7743 }, { "epoch": 0.26, "grad_norm": 0.4619194269180298, "learning_rate": 0.0005891469498023571, "loss": 1.889, "step": 7744 }, { "epoch": 0.26, "grad_norm": 0.4760085642337799, "learning_rate": 0.0005891441636936, "loss": 2.0006, "step": 7745 }, { "epoch": 0.26, "grad_norm": 0.4803413152694702, "learning_rate": 0.0005891413772338645, "loss": 1.9665, "step": 7746 }, { "epoch": 0.26, "grad_norm": 0.4563789963722229, "learning_rate": 0.0005891385904231541, "loss": 1.9277, "step": 7747 }, { "epoch": 0.26, "grad_norm": 0.4600387513637543, "learning_rate": 0.0005891358032614717, "loss": 1.861, "step": 7748 }, { "epoch": 0.26, "grad_norm": 0.48243317008018494, "learning_rate": 0.0005891330157488213, "loss": 1.9515, "step": 7749 }, { "epoch": 0.26, "grad_norm": 0.4787938594818115, "learning_rate": 0.000589130227885206, "loss": 1.8911, "step": 7750 }, { "epoch": 0.26, "grad_norm": 0.4793386459350586, "learning_rate": 0.0005891274396706293, "loss": 1.9625, "step": 7751 }, { "epoch": 0.26, "grad_norm": 0.4586617052555084, "learning_rate": 0.0005891246511050942, "loss": 1.8935, "step": 7752 }, { "epoch": 0.26, "grad_norm": 0.46725285053253174, "learning_rate": 0.0005891218621886046, "loss": 1.8829, "step": 7753 }, { "epoch": 0.26, "grad_norm": 0.48058873414993286, "learning_rate": 0.0005891190729211636, "loss": 1.9766, "step": 7754 }, { "epoch": 0.26, "grad_norm": 0.46201208233833313, "learning_rate": 0.0005891162833027747, "loss": 1.9285, "step": 7755 }, { "epoch": 0.26, "grad_norm": 0.48117795586586, "learning_rate": 0.0005891134933334411, "loss": 1.9084, "step": 7756 }, { "epoch": 0.26, "grad_norm": 0.47275102138519287, "learning_rate": 0.0005891107030131663, "loss": 1.9221, "step": 7757 }, { "epoch": 0.26, "grad_norm": 0.45499730110168457, "learning_rate": 0.0005891079123419539, "loss": 1.8901, "step": 7758 }, { "epoch": 0.26, "grad_norm": 0.5028313994407654, "learning_rate": 0.000589105121319807, "loss": 1.9848, "step": 7759 }, { "epoch": 0.26, "grad_norm": 0.45169827342033386, "learning_rate": 0.000589102329946729, "loss": 1.8529, "step": 7760 }, { "epoch": 0.26, "grad_norm": 0.4453775882720947, "learning_rate": 0.0005890995382227235, "loss": 2.0304, "step": 7761 }, { "epoch": 0.26, "grad_norm": 0.48606616258621216, "learning_rate": 0.0005890967461477937, "loss": 1.9086, "step": 7762 }, { "epoch": 0.26, "grad_norm": 0.450982004404068, "learning_rate": 0.0005890939537219431, "loss": 1.8504, "step": 7763 }, { "epoch": 0.26, "grad_norm": 0.4738905429840088, "learning_rate": 0.0005890911609451749, "loss": 1.9156, "step": 7764 }, { "epoch": 0.26, "grad_norm": 0.4889985918998718, "learning_rate": 0.0005890883678174927, "loss": 1.9664, "step": 7765 }, { "epoch": 0.26, "grad_norm": 0.47797977924346924, "learning_rate": 0.0005890855743388998, "loss": 1.9049, "step": 7766 }, { "epoch": 0.26, "grad_norm": 0.46902281045913696, "learning_rate": 0.0005890827805093996, "loss": 1.9549, "step": 7767 }, { "epoch": 0.26, "grad_norm": 0.4663983881473541, "learning_rate": 0.0005890799863289955, "loss": 1.8525, "step": 7768 }, { "epoch": 0.26, "grad_norm": 0.46665167808532715, "learning_rate": 0.000589077191797691, "loss": 1.9782, "step": 7769 }, { "epoch": 0.26, "grad_norm": 0.4591754078865051, "learning_rate": 0.0005890743969154893, "loss": 1.931, "step": 7770 }, { "epoch": 0.26, "grad_norm": 0.46648621559143066, "learning_rate": 0.0005890716016823938, "loss": 1.983, "step": 7771 }, { "epoch": 0.26, "grad_norm": 0.462588906288147, "learning_rate": 0.0005890688060984081, "loss": 1.8962, "step": 7772 }, { "epoch": 0.26, "grad_norm": 0.46667319536209106, "learning_rate": 0.0005890660101635354, "loss": 1.8565, "step": 7773 }, { "epoch": 0.26, "grad_norm": 0.4972423017024994, "learning_rate": 0.0005890632138777792, "loss": 1.9455, "step": 7774 }, { "epoch": 0.26, "grad_norm": 0.4586670994758606, "learning_rate": 0.0005890604172411428, "loss": 1.8902, "step": 7775 }, { "epoch": 0.26, "grad_norm": 0.4467881917953491, "learning_rate": 0.0005890576202536296, "loss": 1.9522, "step": 7776 }, { "epoch": 0.26, "grad_norm": 0.44996094703674316, "learning_rate": 0.0005890548229152431, "loss": 1.9083, "step": 7777 }, { "epoch": 0.26, "grad_norm": 0.4747444689273834, "learning_rate": 0.0005890520252259866, "loss": 1.9416, "step": 7778 }, { "epoch": 0.26, "grad_norm": 0.45888274908065796, "learning_rate": 0.0005890492271858636, "loss": 1.9279, "step": 7779 }, { "epoch": 0.26, "grad_norm": 0.4754193127155304, "learning_rate": 0.0005890464287948774, "loss": 2.0396, "step": 7780 }, { "epoch": 0.26, "grad_norm": 0.4592053294181824, "learning_rate": 0.0005890436300530314, "loss": 1.9546, "step": 7781 }, { "epoch": 0.26, "grad_norm": 0.48835188150405884, "learning_rate": 0.0005890408309603291, "loss": 1.9576, "step": 7782 }, { "epoch": 0.26, "grad_norm": 0.4626867473125458, "learning_rate": 0.0005890380315167736, "loss": 1.9528, "step": 7783 }, { "epoch": 0.26, "grad_norm": 0.5151861906051636, "learning_rate": 0.0005890352317223687, "loss": 1.956, "step": 7784 }, { "epoch": 0.26, "grad_norm": 0.48557400703430176, "learning_rate": 0.0005890324315771177, "loss": 1.9117, "step": 7785 }, { "epoch": 0.26, "grad_norm": 0.48259416222572327, "learning_rate": 0.0005890296310810237, "loss": 1.9533, "step": 7786 }, { "epoch": 0.26, "grad_norm": 0.45943987369537354, "learning_rate": 0.0005890268302340904, "loss": 1.8578, "step": 7787 }, { "epoch": 0.26, "grad_norm": 0.49500954151153564, "learning_rate": 0.0005890240290363211, "loss": 1.8952, "step": 7788 }, { "epoch": 0.26, "grad_norm": 0.46932634711265564, "learning_rate": 0.0005890212274877193, "loss": 1.8768, "step": 7789 }, { "epoch": 0.26, "grad_norm": 0.4694530665874481, "learning_rate": 0.0005890184255882883, "loss": 1.9933, "step": 7790 }, { "epoch": 0.26, "grad_norm": 0.48816388845443726, "learning_rate": 0.0005890156233380315, "loss": 1.9258, "step": 7791 }, { "epoch": 0.26, "grad_norm": 0.4657432734966278, "learning_rate": 0.0005890128207369523, "loss": 1.916, "step": 7792 }, { "epoch": 0.26, "grad_norm": 0.46205198764801025, "learning_rate": 0.0005890100177850541, "loss": 1.8848, "step": 7793 }, { "epoch": 0.26, "grad_norm": 0.46457645297050476, "learning_rate": 0.0005890072144823403, "loss": 1.9378, "step": 7794 }, { "epoch": 0.26, "grad_norm": 0.4892413914203644, "learning_rate": 0.0005890044108288144, "loss": 1.9256, "step": 7795 }, { "epoch": 0.26, "grad_norm": 0.45893603563308716, "learning_rate": 0.0005890016068244797, "loss": 1.977, "step": 7796 }, { "epoch": 0.26, "grad_norm": 0.4675251543521881, "learning_rate": 0.0005889988024693395, "loss": 1.9152, "step": 7797 }, { "epoch": 0.26, "grad_norm": 0.4465126693248749, "learning_rate": 0.0005889959977633975, "loss": 1.8714, "step": 7798 }, { "epoch": 0.26, "grad_norm": 0.453121155500412, "learning_rate": 0.000588993192706657, "loss": 1.9102, "step": 7799 }, { "epoch": 0.26, "grad_norm": 0.47730156779289246, "learning_rate": 0.0005889903872991212, "loss": 1.9551, "step": 7800 }, { "epoch": 0.26, "grad_norm": 0.4582560062408447, "learning_rate": 0.0005889875815407936, "loss": 1.9273, "step": 7801 }, { "epoch": 0.26, "grad_norm": 0.4640963077545166, "learning_rate": 0.0005889847754316778, "loss": 1.9345, "step": 7802 }, { "epoch": 0.26, "grad_norm": 0.4862867295742035, "learning_rate": 0.0005889819689717771, "loss": 1.9109, "step": 7803 }, { "epoch": 0.26, "grad_norm": 0.4614889919757843, "learning_rate": 0.0005889791621610947, "loss": 1.9547, "step": 7804 }, { "epoch": 0.26, "grad_norm": 0.49088695645332336, "learning_rate": 0.0005889763549996342, "loss": 1.9363, "step": 7805 }, { "epoch": 0.26, "grad_norm": 0.45987048745155334, "learning_rate": 0.0005889735474873991, "loss": 1.9167, "step": 7806 }, { "epoch": 0.26, "grad_norm": 0.4514651596546173, "learning_rate": 0.0005889707396243926, "loss": 1.9374, "step": 7807 }, { "epoch": 0.26, "grad_norm": 0.45736417174339294, "learning_rate": 0.0005889679314106183, "loss": 1.9013, "step": 7808 }, { "epoch": 0.26, "grad_norm": 0.46174898743629456, "learning_rate": 0.0005889651228460794, "loss": 1.8775, "step": 7809 }, { "epoch": 0.26, "grad_norm": 0.46670615673065186, "learning_rate": 0.0005889623139307794, "loss": 1.9008, "step": 7810 }, { "epoch": 0.26, "grad_norm": 0.46774402260780334, "learning_rate": 0.0005889595046647218, "loss": 1.9244, "step": 7811 }, { "epoch": 0.26, "grad_norm": 0.47469523549079895, "learning_rate": 0.0005889566950479099, "loss": 1.9172, "step": 7812 }, { "epoch": 0.26, "grad_norm": 0.4784245193004608, "learning_rate": 0.0005889538850803473, "loss": 1.9364, "step": 7813 }, { "epoch": 0.26, "grad_norm": 0.46127504110336304, "learning_rate": 0.000588951074762037, "loss": 1.9456, "step": 7814 }, { "epoch": 0.26, "grad_norm": 0.4713807702064514, "learning_rate": 0.0005889482640929829, "loss": 1.8664, "step": 7815 }, { "epoch": 0.26, "grad_norm": 0.4758017957210541, "learning_rate": 0.000588945453073188, "loss": 1.8764, "step": 7816 }, { "epoch": 0.26, "grad_norm": 0.47228100895881653, "learning_rate": 0.000588942641702656, "loss": 1.9463, "step": 7817 }, { "epoch": 0.26, "grad_norm": 0.4707166254520416, "learning_rate": 0.0005889398299813903, "loss": 2.0182, "step": 7818 }, { "epoch": 0.26, "grad_norm": 0.5092988014221191, "learning_rate": 0.000588937017909394, "loss": 1.9001, "step": 7819 }, { "epoch": 0.26, "grad_norm": 0.4607274532318115, "learning_rate": 0.0005889342054866709, "loss": 1.9337, "step": 7820 }, { "epoch": 0.26, "grad_norm": 0.46729597449302673, "learning_rate": 0.0005889313927132241, "loss": 1.8702, "step": 7821 }, { "epoch": 0.26, "grad_norm": 0.47323858737945557, "learning_rate": 0.0005889285795890573, "loss": 1.9944, "step": 7822 }, { "epoch": 0.26, "grad_norm": 0.4571889638900757, "learning_rate": 0.0005889257661141737, "loss": 1.9089, "step": 7823 }, { "epoch": 0.26, "grad_norm": 0.5201534628868103, "learning_rate": 0.0005889229522885767, "loss": 1.9298, "step": 7824 }, { "epoch": 0.26, "grad_norm": 0.45155099034309387, "learning_rate": 0.00058892013811227, "loss": 1.8474, "step": 7825 }, { "epoch": 0.26, "grad_norm": 0.5055352449417114, "learning_rate": 0.0005889173235852568, "loss": 1.9815, "step": 7826 }, { "epoch": 0.26, "grad_norm": 0.4821489453315735, "learning_rate": 0.0005889145087075405, "loss": 1.9495, "step": 7827 }, { "epoch": 0.26, "grad_norm": 0.4484553635120392, "learning_rate": 0.0005889116934791245, "loss": 1.8478, "step": 7828 }, { "epoch": 0.26, "grad_norm": 0.45044413208961487, "learning_rate": 0.0005889088779000123, "loss": 1.8755, "step": 7829 }, { "epoch": 0.26, "grad_norm": 0.5267811417579651, "learning_rate": 0.0005889060619702073, "loss": 1.9104, "step": 7830 }, { "epoch": 0.26, "grad_norm": 0.48425087332725525, "learning_rate": 0.0005889032456897129, "loss": 1.8871, "step": 7831 }, { "epoch": 0.26, "grad_norm": 0.4553564786911011, "learning_rate": 0.0005889004290585326, "loss": 1.9159, "step": 7832 }, { "epoch": 0.26, "grad_norm": 0.48049765825271606, "learning_rate": 0.0005888976120766696, "loss": 1.92, "step": 7833 }, { "epoch": 0.26, "grad_norm": 0.48463350534439087, "learning_rate": 0.0005888947947441276, "loss": 1.9569, "step": 7834 }, { "epoch": 0.26, "grad_norm": 0.4677504599094391, "learning_rate": 0.0005888919770609099, "loss": 1.8764, "step": 7835 }, { "epoch": 0.26, "grad_norm": 0.48510152101516724, "learning_rate": 0.0005888891590270197, "loss": 1.9065, "step": 7836 }, { "epoch": 0.26, "grad_norm": 0.4971458315849304, "learning_rate": 0.0005888863406424608, "loss": 1.9923, "step": 7837 }, { "epoch": 0.26, "grad_norm": 0.4783603549003601, "learning_rate": 0.0005888835219072365, "loss": 1.9114, "step": 7838 }, { "epoch": 0.26, "grad_norm": 0.47029176354408264, "learning_rate": 0.00058888070282135, "loss": 1.9334, "step": 7839 }, { "epoch": 0.26, "grad_norm": 0.49059343338012695, "learning_rate": 0.000588877883384805, "loss": 1.9682, "step": 7840 }, { "epoch": 0.26, "grad_norm": 0.4791501760482788, "learning_rate": 0.0005888750635976048, "loss": 1.9305, "step": 7841 }, { "epoch": 0.26, "grad_norm": 0.47922083735466003, "learning_rate": 0.0005888722434597528, "loss": 1.9662, "step": 7842 }, { "epoch": 0.26, "grad_norm": 0.472431480884552, "learning_rate": 0.0005888694229712524, "loss": 1.9567, "step": 7843 }, { "epoch": 0.26, "grad_norm": 0.508047878742218, "learning_rate": 0.0005888666021321073, "loss": 1.9148, "step": 7844 }, { "epoch": 0.26, "grad_norm": 0.5050553679466248, "learning_rate": 0.0005888637809423206, "loss": 1.9161, "step": 7845 }, { "epoch": 0.26, "grad_norm": 0.46356505155563354, "learning_rate": 0.0005888609594018958, "loss": 2.0011, "step": 7846 }, { "epoch": 0.26, "grad_norm": 0.4812522530555725, "learning_rate": 0.0005888581375108363, "loss": 1.941, "step": 7847 }, { "epoch": 0.26, "grad_norm": 0.48375263810157776, "learning_rate": 0.0005888553152691456, "loss": 1.9499, "step": 7848 }, { "epoch": 0.26, "grad_norm": 0.47912564873695374, "learning_rate": 0.0005888524926768272, "loss": 1.924, "step": 7849 }, { "epoch": 0.26, "grad_norm": 0.4671473503112793, "learning_rate": 0.0005888496697338844, "loss": 2.0627, "step": 7850 }, { "epoch": 0.26, "grad_norm": 0.4628021717071533, "learning_rate": 0.0005888468464403207, "loss": 1.9833, "step": 7851 }, { "epoch": 0.26, "grad_norm": 0.46865662932395935, "learning_rate": 0.0005888440227961394, "loss": 1.9138, "step": 7852 }, { "epoch": 0.26, "grad_norm": 0.4676637351512909, "learning_rate": 0.0005888411988013441, "loss": 1.9502, "step": 7853 }, { "epoch": 0.26, "grad_norm": 0.4598991870880127, "learning_rate": 0.0005888383744559381, "loss": 1.8762, "step": 7854 }, { "epoch": 0.26, "grad_norm": 0.45993778109550476, "learning_rate": 0.0005888355497599249, "loss": 1.9189, "step": 7855 }, { "epoch": 0.26, "grad_norm": 0.4602591097354889, "learning_rate": 0.0005888327247133078, "loss": 1.8465, "step": 7856 }, { "epoch": 0.26, "grad_norm": 0.5628169178962708, "learning_rate": 0.0005888298993160905, "loss": 1.997, "step": 7857 }, { "epoch": 0.26, "grad_norm": 0.48553216457366943, "learning_rate": 0.000588827073568276, "loss": 1.8661, "step": 7858 }, { "epoch": 0.26, "grad_norm": 0.4811690151691437, "learning_rate": 0.0005888242474698682, "loss": 1.9486, "step": 7859 }, { "epoch": 0.26, "grad_norm": 0.5016899704933167, "learning_rate": 0.0005888214210208703, "loss": 1.8782, "step": 7860 }, { "epoch": 0.26, "grad_norm": 0.47374895215034485, "learning_rate": 0.0005888185942212857, "loss": 1.9557, "step": 7861 }, { "epoch": 0.26, "grad_norm": 0.48001667857170105, "learning_rate": 0.0005888157670711179, "loss": 2.0305, "step": 7862 }, { "epoch": 0.26, "grad_norm": 0.4588419497013092, "learning_rate": 0.0005888129395703704, "loss": 1.9441, "step": 7863 }, { "epoch": 0.26, "grad_norm": 0.4701462388038635, "learning_rate": 0.0005888101117190464, "loss": 1.9255, "step": 7864 }, { "epoch": 0.26, "grad_norm": 0.4723292887210846, "learning_rate": 0.0005888072835171495, "loss": 1.9367, "step": 7865 }, { "epoch": 0.26, "grad_norm": 0.4548887312412262, "learning_rate": 0.0005888044549646832, "loss": 1.9773, "step": 7866 }, { "epoch": 0.26, "grad_norm": 0.4576182961463928, "learning_rate": 0.0005888016260616508, "loss": 1.9372, "step": 7867 }, { "epoch": 0.26, "grad_norm": 0.4523201584815979, "learning_rate": 0.0005887987968080558, "loss": 1.8038, "step": 7868 }, { "epoch": 0.26, "grad_norm": 0.4647962152957916, "learning_rate": 0.0005887959672039016, "loss": 1.937, "step": 7869 }, { "epoch": 0.26, "grad_norm": 0.4620111584663391, "learning_rate": 0.0005887931372491915, "loss": 1.909, "step": 7870 }, { "epoch": 0.26, "grad_norm": 0.47218260169029236, "learning_rate": 0.0005887903069439292, "loss": 1.9364, "step": 7871 }, { "epoch": 0.26, "grad_norm": 0.46588245034217834, "learning_rate": 0.0005887874762881182, "loss": 1.9278, "step": 7872 }, { "epoch": 0.26, "grad_norm": 0.47527626156806946, "learning_rate": 0.0005887846452817615, "loss": 1.9137, "step": 7873 }, { "epoch": 0.26, "grad_norm": 0.4714566469192505, "learning_rate": 0.000588781813924863, "loss": 1.9157, "step": 7874 }, { "epoch": 0.26, "grad_norm": 0.4671929180622101, "learning_rate": 0.0005887789822174258, "loss": 1.9621, "step": 7875 }, { "epoch": 0.26, "grad_norm": 0.4817299246788025, "learning_rate": 0.0005887761501594534, "loss": 1.9336, "step": 7876 }, { "epoch": 0.26, "grad_norm": 0.4870968163013458, "learning_rate": 0.0005887733177509494, "loss": 1.9179, "step": 7877 }, { "epoch": 0.26, "grad_norm": 0.4765455424785614, "learning_rate": 0.000588770484991917, "loss": 1.9285, "step": 7878 }, { "epoch": 0.26, "grad_norm": 0.477255642414093, "learning_rate": 0.00058876765188236, "loss": 1.9193, "step": 7879 }, { "epoch": 0.26, "grad_norm": 0.4716678261756897, "learning_rate": 0.0005887648184222815, "loss": 1.8866, "step": 7880 }, { "epoch": 0.26, "grad_norm": 0.4619589149951935, "learning_rate": 0.0005887619846116851, "loss": 1.9564, "step": 7881 }, { "epoch": 0.26, "grad_norm": 0.4487314522266388, "learning_rate": 0.0005887591504505742, "loss": 1.8934, "step": 7882 }, { "epoch": 0.26, "grad_norm": 0.4791174530982971, "learning_rate": 0.0005887563159389522, "loss": 1.9159, "step": 7883 }, { "epoch": 0.26, "grad_norm": 0.4514433443546295, "learning_rate": 0.0005887534810768226, "loss": 1.9394, "step": 7884 }, { "epoch": 0.26, "grad_norm": 0.4668324291706085, "learning_rate": 0.0005887506458641888, "loss": 1.872, "step": 7885 }, { "epoch": 0.26, "grad_norm": 0.47510388493537903, "learning_rate": 0.0005887478103010543, "loss": 1.948, "step": 7886 }, { "epoch": 0.26, "grad_norm": 0.4469450116157532, "learning_rate": 0.0005887449743874224, "loss": 1.8825, "step": 7887 }, { "epoch": 0.26, "grad_norm": 0.49306750297546387, "learning_rate": 0.0005887421381232968, "loss": 1.9007, "step": 7888 }, { "epoch": 0.26, "grad_norm": 0.4536365568637848, "learning_rate": 0.0005887393015086807, "loss": 1.9047, "step": 7889 }, { "epoch": 0.26, "grad_norm": 0.46921291947364807, "learning_rate": 0.0005887364645435776, "loss": 1.9018, "step": 7890 }, { "epoch": 0.26, "grad_norm": 0.47555750608444214, "learning_rate": 0.000588733627227991, "loss": 1.9974, "step": 7891 }, { "epoch": 0.26, "grad_norm": 0.4576584994792938, "learning_rate": 0.0005887307895619244, "loss": 1.8593, "step": 7892 }, { "epoch": 0.26, "grad_norm": 0.4552619755268097, "learning_rate": 0.000588727951545381, "loss": 1.8544, "step": 7893 }, { "epoch": 0.26, "grad_norm": 0.45203933119773865, "learning_rate": 0.0005887251131783646, "loss": 1.9044, "step": 7894 }, { "epoch": 0.26, "grad_norm": 0.47988444566726685, "learning_rate": 0.0005887222744608783, "loss": 1.8895, "step": 7895 }, { "epoch": 0.26, "grad_norm": 0.47332504391670227, "learning_rate": 0.0005887194353929258, "loss": 1.937, "step": 7896 }, { "epoch": 0.26, "grad_norm": 0.45720788836479187, "learning_rate": 0.0005887165959745104, "loss": 1.8319, "step": 7897 }, { "epoch": 0.26, "grad_norm": 0.5007885098457336, "learning_rate": 0.0005887137562056357, "loss": 1.9534, "step": 7898 }, { "epoch": 0.26, "grad_norm": 0.4514923393726349, "learning_rate": 0.0005887109160863049, "loss": 1.892, "step": 7899 }, { "epoch": 0.26, "grad_norm": 0.48028698563575745, "learning_rate": 0.0005887080756165216, "loss": 1.933, "step": 7900 }, { "epoch": 0.26, "grad_norm": 0.4602203369140625, "learning_rate": 0.0005887052347962892, "loss": 1.9313, "step": 7901 }, { "epoch": 0.26, "grad_norm": 0.48240095376968384, "learning_rate": 0.0005887023936256114, "loss": 1.8907, "step": 7902 }, { "epoch": 0.26, "grad_norm": 0.49606436491012573, "learning_rate": 0.0005886995521044912, "loss": 1.9548, "step": 7903 }, { "epoch": 0.26, "grad_norm": 0.4630107283592224, "learning_rate": 0.0005886967102329324, "loss": 1.8764, "step": 7904 }, { "epoch": 0.26, "grad_norm": 0.5020928382873535, "learning_rate": 0.0005886938680109382, "loss": 1.8948, "step": 7905 }, { "epoch": 0.26, "grad_norm": 0.4566645622253418, "learning_rate": 0.0005886910254385123, "loss": 1.9078, "step": 7906 }, { "epoch": 0.26, "grad_norm": 0.4775301218032837, "learning_rate": 0.000588688182515658, "loss": 1.9789, "step": 7907 }, { "epoch": 0.26, "grad_norm": 0.5030649304389954, "learning_rate": 0.0005886853392423788, "loss": 1.939, "step": 7908 }, { "epoch": 0.26, "grad_norm": 0.47660738229751587, "learning_rate": 0.0005886824956186781, "loss": 1.9184, "step": 7909 }, { "epoch": 0.26, "grad_norm": 0.46223825216293335, "learning_rate": 0.0005886796516445595, "loss": 1.9877, "step": 7910 }, { "epoch": 0.26, "grad_norm": 0.4609415829181671, "learning_rate": 0.0005886768073200262, "loss": 1.9123, "step": 7911 }, { "epoch": 0.26, "grad_norm": 0.5148316621780396, "learning_rate": 0.0005886739626450819, "loss": 1.9422, "step": 7912 }, { "epoch": 0.26, "grad_norm": 0.49847137928009033, "learning_rate": 0.0005886711176197298, "loss": 1.9024, "step": 7913 }, { "epoch": 0.26, "grad_norm": 0.4712074100971222, "learning_rate": 0.0005886682722439737, "loss": 1.8834, "step": 7914 }, { "epoch": 0.26, "grad_norm": 0.48536819219589233, "learning_rate": 0.0005886654265178167, "loss": 1.8636, "step": 7915 }, { "epoch": 0.26, "grad_norm": 0.5192809700965881, "learning_rate": 0.0005886625804412624, "loss": 1.9062, "step": 7916 }, { "epoch": 0.26, "grad_norm": 0.48285341262817383, "learning_rate": 0.0005886597340143142, "loss": 1.9311, "step": 7917 }, { "epoch": 0.26, "grad_norm": 0.4753924310207367, "learning_rate": 0.0005886568872369757, "loss": 1.8876, "step": 7918 }, { "epoch": 0.26, "grad_norm": 0.483029842376709, "learning_rate": 0.0005886540401092503, "loss": 1.9329, "step": 7919 }, { "epoch": 0.26, "grad_norm": 0.6001990437507629, "learning_rate": 0.0005886511926311414, "loss": 1.931, "step": 7920 }, { "epoch": 0.26, "grad_norm": 0.4801054894924164, "learning_rate": 0.0005886483448026526, "loss": 1.954, "step": 7921 }, { "epoch": 0.26, "grad_norm": 0.4708860516548157, "learning_rate": 0.0005886454966237871, "loss": 1.9928, "step": 7922 }, { "epoch": 0.26, "grad_norm": 0.49712324142456055, "learning_rate": 0.0005886426480945485, "loss": 1.9132, "step": 7923 }, { "epoch": 0.26, "grad_norm": 0.46827390789985657, "learning_rate": 0.0005886397992149403, "loss": 1.9013, "step": 7924 }, { "epoch": 0.26, "grad_norm": 0.46812114119529724, "learning_rate": 0.0005886369499849658, "loss": 1.9351, "step": 7925 }, { "epoch": 0.26, "grad_norm": 0.471756249666214, "learning_rate": 0.0005886341004046287, "loss": 1.947, "step": 7926 }, { "epoch": 0.26, "grad_norm": 0.4627270996570587, "learning_rate": 0.0005886312504739323, "loss": 1.9149, "step": 7927 }, { "epoch": 0.26, "grad_norm": 0.47353023290634155, "learning_rate": 0.0005886284001928801, "loss": 1.9826, "step": 7928 }, { "epoch": 0.26, "grad_norm": 0.47008296847343445, "learning_rate": 0.0005886255495614756, "loss": 1.9336, "step": 7929 }, { "epoch": 0.26, "grad_norm": 0.45748716592788696, "learning_rate": 0.0005886226985797221, "loss": 1.8866, "step": 7930 }, { "epoch": 0.26, "grad_norm": 0.47839683294296265, "learning_rate": 0.0005886198472476233, "loss": 1.9463, "step": 7931 }, { "epoch": 0.26, "grad_norm": 0.4611395001411438, "learning_rate": 0.0005886169955651824, "loss": 1.884, "step": 7932 }, { "epoch": 0.26, "grad_norm": 0.44633710384368896, "learning_rate": 0.000588614143532403, "loss": 1.8604, "step": 7933 }, { "epoch": 0.26, "grad_norm": 0.48841074109077454, "learning_rate": 0.0005886112911492887, "loss": 1.9737, "step": 7934 }, { "epoch": 0.26, "grad_norm": 0.46738195419311523, "learning_rate": 0.0005886084384158426, "loss": 1.8423, "step": 7935 }, { "epoch": 0.26, "grad_norm": 0.46530240774154663, "learning_rate": 0.0005886055853320685, "loss": 1.8807, "step": 7936 }, { "epoch": 0.26, "grad_norm": 0.4839246869087219, "learning_rate": 0.0005886027318979697, "loss": 1.9977, "step": 7937 }, { "epoch": 0.26, "grad_norm": 0.45988523960113525, "learning_rate": 0.0005885998781135497, "loss": 1.8133, "step": 7938 }, { "epoch": 0.26, "grad_norm": 0.4862901568412781, "learning_rate": 0.0005885970239788122, "loss": 1.967, "step": 7939 }, { "epoch": 0.26, "grad_norm": 0.4769277274608612, "learning_rate": 0.0005885941694937601, "loss": 1.9933, "step": 7940 }, { "epoch": 0.26, "grad_norm": 0.48908883333206177, "learning_rate": 0.0005885913146583974, "loss": 1.9205, "step": 7941 }, { "epoch": 0.26, "grad_norm": 0.45912501215934753, "learning_rate": 0.0005885884594727273, "loss": 1.9482, "step": 7942 }, { "epoch": 0.26, "grad_norm": 0.4736918807029724, "learning_rate": 0.0005885856039367533, "loss": 1.8918, "step": 7943 }, { "epoch": 0.26, "grad_norm": 0.4527428150177002, "learning_rate": 0.000588582748050479, "loss": 1.9336, "step": 7944 }, { "epoch": 0.26, "grad_norm": 0.45991700887680054, "learning_rate": 0.0005885798918139078, "loss": 1.956, "step": 7945 }, { "epoch": 0.26, "grad_norm": 0.4598926901817322, "learning_rate": 0.0005885770352270429, "loss": 1.9759, "step": 7946 }, { "epoch": 0.26, "grad_norm": 0.47546887397766113, "learning_rate": 0.0005885741782898882, "loss": 1.91, "step": 7947 }, { "epoch": 0.26, "grad_norm": 0.4987953007221222, "learning_rate": 0.0005885713210024469, "loss": 1.8521, "step": 7948 }, { "epoch": 0.26, "grad_norm": 0.4519844353199005, "learning_rate": 0.0005885684633647225, "loss": 1.91, "step": 7949 }, { "epoch": 0.26, "grad_norm": 0.4783477485179901, "learning_rate": 0.0005885656053767186, "loss": 1.8656, "step": 7950 }, { "epoch": 0.26, "grad_norm": 0.4449128806591034, "learning_rate": 0.0005885627470384384, "loss": 1.9629, "step": 7951 }, { "epoch": 0.26, "grad_norm": 0.45960181951522827, "learning_rate": 0.0005885598883498858, "loss": 1.8625, "step": 7952 }, { "epoch": 0.26, "grad_norm": 0.48571547865867615, "learning_rate": 0.0005885570293110639, "loss": 1.9983, "step": 7953 }, { "epoch": 0.26, "grad_norm": 0.5005040168762207, "learning_rate": 0.0005885541699219762, "loss": 1.8074, "step": 7954 }, { "epoch": 0.26, "grad_norm": 0.501883864402771, "learning_rate": 0.0005885513101826263, "loss": 1.8923, "step": 7955 }, { "epoch": 0.26, "grad_norm": 0.47674983739852905, "learning_rate": 0.0005885484500930177, "loss": 1.903, "step": 7956 }, { "epoch": 0.26, "grad_norm": 0.5193099975585938, "learning_rate": 0.0005885455896531537, "loss": 1.9845, "step": 7957 }, { "epoch": 0.26, "grad_norm": 0.48736077547073364, "learning_rate": 0.0005885427288630379, "loss": 2.012, "step": 7958 }, { "epoch": 0.26, "grad_norm": 0.46850407123565674, "learning_rate": 0.0005885398677226737, "loss": 1.9849, "step": 7959 }, { "epoch": 0.26, "grad_norm": 0.4830189347267151, "learning_rate": 0.0005885370062320648, "loss": 2.0958, "step": 7960 }, { "epoch": 0.26, "grad_norm": 0.4911070168018341, "learning_rate": 0.0005885341443912143, "loss": 1.9776, "step": 7961 }, { "epoch": 0.26, "grad_norm": 0.4721015691757202, "learning_rate": 0.0005885312822001259, "loss": 1.9768, "step": 7962 }, { "epoch": 0.26, "grad_norm": 0.49836328625679016, "learning_rate": 0.000588528419658803, "loss": 1.959, "step": 7963 }, { "epoch": 0.26, "grad_norm": 0.44860151410102844, "learning_rate": 0.0005885255567672492, "loss": 1.9223, "step": 7964 }, { "epoch": 0.26, "grad_norm": 0.46205559372901917, "learning_rate": 0.0005885226935254678, "loss": 1.9065, "step": 7965 }, { "epoch": 0.27, "grad_norm": 0.4595978856086731, "learning_rate": 0.0005885198299334625, "loss": 1.8705, "step": 7966 }, { "epoch": 0.27, "grad_norm": 0.5016084313392639, "learning_rate": 0.0005885169659912364, "loss": 1.9652, "step": 7967 }, { "epoch": 0.27, "grad_norm": 0.4887418746948242, "learning_rate": 0.0005885141016987935, "loss": 1.871, "step": 7968 }, { "epoch": 0.27, "grad_norm": 0.4793738126754761, "learning_rate": 0.0005885112370561368, "loss": 1.929, "step": 7969 }, { "epoch": 0.27, "grad_norm": 0.45204266905784607, "learning_rate": 0.00058850837206327, "loss": 1.883, "step": 7970 }, { "epoch": 0.27, "grad_norm": 0.463887482881546, "learning_rate": 0.0005885055067201966, "loss": 1.9028, "step": 7971 }, { "epoch": 0.27, "grad_norm": 0.482311874628067, "learning_rate": 0.00058850264102692, "loss": 1.9955, "step": 7972 }, { "epoch": 0.27, "grad_norm": 0.4705394506454468, "learning_rate": 0.0005884997749834437, "loss": 1.9661, "step": 7973 }, { "epoch": 0.27, "grad_norm": 0.4858206510543823, "learning_rate": 0.0005884969085897711, "loss": 1.8832, "step": 7974 }, { "epoch": 0.27, "grad_norm": 0.513629138469696, "learning_rate": 0.0005884940418459059, "loss": 1.9611, "step": 7975 }, { "epoch": 0.27, "grad_norm": 0.4747409522533417, "learning_rate": 0.0005884911747518513, "loss": 1.8707, "step": 7976 }, { "epoch": 0.27, "grad_norm": 0.4756941795349121, "learning_rate": 0.0005884883073076111, "loss": 1.9596, "step": 7977 }, { "epoch": 0.27, "grad_norm": 0.5001152157783508, "learning_rate": 0.0005884854395131885, "loss": 1.941, "step": 7978 }, { "epoch": 0.27, "grad_norm": 0.5067426562309265, "learning_rate": 0.000588482571368587, "loss": 2.0277, "step": 7979 }, { "epoch": 0.27, "grad_norm": 0.4702596068382263, "learning_rate": 0.0005884797028738102, "loss": 1.9133, "step": 7980 }, { "epoch": 0.27, "grad_norm": 0.5021515488624573, "learning_rate": 0.0005884768340288617, "loss": 1.8124, "step": 7981 }, { "epoch": 0.27, "grad_norm": 0.4773160517215729, "learning_rate": 0.0005884739648337447, "loss": 1.8773, "step": 7982 }, { "epoch": 0.27, "grad_norm": 0.46464425325393677, "learning_rate": 0.0005884710952884628, "loss": 1.8873, "step": 7983 }, { "epoch": 0.27, "grad_norm": 0.45135262608528137, "learning_rate": 0.0005884682253930195, "loss": 1.9359, "step": 7984 }, { "epoch": 0.27, "grad_norm": 0.5332615971565247, "learning_rate": 0.0005884653551474183, "loss": 1.8853, "step": 7985 }, { "epoch": 0.27, "grad_norm": 0.47738343477249146, "learning_rate": 0.0005884624845516627, "loss": 1.9468, "step": 7986 }, { "epoch": 0.27, "grad_norm": 0.464883416891098, "learning_rate": 0.0005884596136057561, "loss": 1.9564, "step": 7987 }, { "epoch": 0.27, "grad_norm": 0.4753631353378296, "learning_rate": 0.0005884567423097021, "loss": 1.8924, "step": 7988 }, { "epoch": 0.27, "grad_norm": 0.4795589745044708, "learning_rate": 0.000588453870663504, "loss": 1.9247, "step": 7989 }, { "epoch": 0.27, "grad_norm": 0.4600847661495209, "learning_rate": 0.0005884509986671655, "loss": 1.8601, "step": 7990 }, { "epoch": 0.27, "grad_norm": 0.5154799818992615, "learning_rate": 0.00058844812632069, "loss": 2.0526, "step": 7991 }, { "epoch": 0.27, "grad_norm": 0.466663658618927, "learning_rate": 0.0005884452536240809, "loss": 1.9379, "step": 7992 }, { "epoch": 0.27, "grad_norm": 0.49112728238105774, "learning_rate": 0.0005884423805773418, "loss": 1.8984, "step": 7993 }, { "epoch": 0.27, "grad_norm": 0.4589522182941437, "learning_rate": 0.0005884395071804762, "loss": 1.8747, "step": 7994 }, { "epoch": 0.27, "grad_norm": 0.4711581766605377, "learning_rate": 0.0005884366334334874, "loss": 1.8498, "step": 7995 }, { "epoch": 0.27, "grad_norm": 0.46810564398765564, "learning_rate": 0.0005884337593363791, "loss": 1.8932, "step": 7996 }, { "epoch": 0.27, "grad_norm": 0.48082298040390015, "learning_rate": 0.0005884308848891547, "loss": 1.8895, "step": 7997 }, { "epoch": 0.27, "grad_norm": 0.4909168779850006, "learning_rate": 0.0005884280100918177, "loss": 2.0137, "step": 7998 }, { "epoch": 0.27, "grad_norm": 0.4892588257789612, "learning_rate": 0.0005884251349443717, "loss": 1.898, "step": 7999 }, { "epoch": 0.27, "grad_norm": 0.49303963780403137, "learning_rate": 0.0005884222594468199, "loss": 1.8816, "step": 8000 }, { "epoch": 0.27, "grad_norm": 0.5246244668960571, "learning_rate": 0.0005884193835991662, "loss": 1.9644, "step": 8001 }, { "epoch": 0.27, "grad_norm": 0.4720236361026764, "learning_rate": 0.0005884165074014137, "loss": 1.9833, "step": 8002 }, { "epoch": 0.27, "grad_norm": 0.48296183347702026, "learning_rate": 0.0005884136308535661, "loss": 1.9528, "step": 8003 }, { "epoch": 0.27, "grad_norm": 0.5317869782447815, "learning_rate": 0.0005884107539556268, "loss": 1.9437, "step": 8004 }, { "epoch": 0.27, "grad_norm": 0.4561941921710968, "learning_rate": 0.0005884078767075994, "loss": 1.8735, "step": 8005 }, { "epoch": 0.27, "grad_norm": 0.4911321997642517, "learning_rate": 0.0005884049991094874, "loss": 1.9048, "step": 8006 }, { "epoch": 0.27, "grad_norm": 0.5154805779457092, "learning_rate": 0.0005884021211612941, "loss": 1.8968, "step": 8007 }, { "epoch": 0.27, "grad_norm": 0.4718407988548279, "learning_rate": 0.0005883992428630232, "loss": 1.9396, "step": 8008 }, { "epoch": 0.27, "grad_norm": 0.4847879111766815, "learning_rate": 0.000588396364214678, "loss": 1.959, "step": 8009 }, { "epoch": 0.27, "grad_norm": 0.4772776961326599, "learning_rate": 0.0005883934852162622, "loss": 1.9253, "step": 8010 }, { "epoch": 0.27, "grad_norm": 0.46904999017715454, "learning_rate": 0.0005883906058677793, "loss": 1.8943, "step": 8011 }, { "epoch": 0.27, "grad_norm": 0.4762151539325714, "learning_rate": 0.0005883877261692325, "loss": 1.9267, "step": 8012 }, { "epoch": 0.27, "grad_norm": 0.45729300379753113, "learning_rate": 0.0005883848461206256, "loss": 1.934, "step": 8013 }, { "epoch": 0.27, "grad_norm": 0.4681299328804016, "learning_rate": 0.0005883819657219619, "loss": 1.9433, "step": 8014 }, { "epoch": 0.27, "grad_norm": 0.4754813313484192, "learning_rate": 0.0005883790849732451, "loss": 1.9053, "step": 8015 }, { "epoch": 0.27, "grad_norm": 0.4818790853023529, "learning_rate": 0.0005883762038744785, "loss": 1.8843, "step": 8016 }, { "epoch": 0.27, "grad_norm": 0.4791337549686432, "learning_rate": 0.0005883733224256657, "loss": 2.0101, "step": 8017 }, { "epoch": 0.27, "grad_norm": 0.4556441307067871, "learning_rate": 0.0005883704406268102, "loss": 1.963, "step": 8018 }, { "epoch": 0.27, "grad_norm": 0.4669601321220398, "learning_rate": 0.0005883675584779155, "loss": 1.9296, "step": 8019 }, { "epoch": 0.27, "grad_norm": 0.4970909357070923, "learning_rate": 0.000588364675978985, "loss": 1.9771, "step": 8020 }, { "epoch": 0.27, "grad_norm": 0.48150399327278137, "learning_rate": 0.0005883617931300222, "loss": 1.9009, "step": 8021 }, { "epoch": 0.27, "grad_norm": 0.4961940050125122, "learning_rate": 0.0005883589099310308, "loss": 2.0168, "step": 8022 }, { "epoch": 0.27, "grad_norm": 0.4596496522426605, "learning_rate": 0.0005883560263820141, "loss": 1.9167, "step": 8023 }, { "epoch": 0.27, "grad_norm": 0.543519139289856, "learning_rate": 0.0005883531424829758, "loss": 1.8803, "step": 8024 }, { "epoch": 0.27, "grad_norm": 0.453880250453949, "learning_rate": 0.0005883502582339191, "loss": 1.8162, "step": 8025 }, { "epoch": 0.27, "grad_norm": 0.4560612440109253, "learning_rate": 0.0005883473736348478, "loss": 1.9077, "step": 8026 }, { "epoch": 0.27, "grad_norm": 0.4605187475681305, "learning_rate": 0.0005883444886857652, "loss": 1.9862, "step": 8027 }, { "epoch": 0.27, "grad_norm": 0.45937103033065796, "learning_rate": 0.000588341603386675, "loss": 1.9312, "step": 8028 }, { "epoch": 0.27, "grad_norm": 0.4641837179660797, "learning_rate": 0.0005883387177375804, "loss": 1.9255, "step": 8029 }, { "epoch": 0.27, "grad_norm": 0.45504671335220337, "learning_rate": 0.0005883358317384853, "loss": 1.9815, "step": 8030 }, { "epoch": 0.27, "grad_norm": 0.5542157292366028, "learning_rate": 0.0005883329453893929, "loss": 1.93, "step": 8031 }, { "epoch": 0.27, "grad_norm": 0.4779844582080841, "learning_rate": 0.0005883300586903066, "loss": 1.9062, "step": 8032 }, { "epoch": 0.27, "grad_norm": 0.44314873218536377, "learning_rate": 0.0005883271716412302, "loss": 1.9391, "step": 8033 }, { "epoch": 0.27, "grad_norm": 0.4635753333568573, "learning_rate": 0.0005883242842421672, "loss": 1.9552, "step": 8034 }, { "epoch": 0.27, "grad_norm": 0.47094908356666565, "learning_rate": 0.0005883213964931209, "loss": 1.9067, "step": 8035 }, { "epoch": 0.27, "grad_norm": 0.4397737681865692, "learning_rate": 0.0005883185083940949, "loss": 1.9459, "step": 8036 }, { "epoch": 0.27, "grad_norm": 0.48524272441864014, "learning_rate": 0.0005883156199450928, "loss": 1.9205, "step": 8037 }, { "epoch": 0.27, "grad_norm": 0.46360063552856445, "learning_rate": 0.0005883127311461181, "loss": 1.9461, "step": 8038 }, { "epoch": 0.27, "grad_norm": 0.4565584361553192, "learning_rate": 0.000588309841997174, "loss": 1.9094, "step": 8039 }, { "epoch": 0.27, "grad_norm": 0.49219682812690735, "learning_rate": 0.0005883069524982643, "loss": 1.9296, "step": 8040 }, { "epoch": 0.27, "grad_norm": 0.46372345089912415, "learning_rate": 0.0005883040626493925, "loss": 1.9071, "step": 8041 }, { "epoch": 0.27, "grad_norm": 0.46406683325767517, "learning_rate": 0.000588301172450562, "loss": 1.9483, "step": 8042 }, { "epoch": 0.27, "grad_norm": 0.505431592464447, "learning_rate": 0.0005882982819017765, "loss": 1.8831, "step": 8043 }, { "epoch": 0.27, "grad_norm": 0.46906670928001404, "learning_rate": 0.0005882953910030392, "loss": 1.9086, "step": 8044 }, { "epoch": 0.27, "grad_norm": 0.48221269249916077, "learning_rate": 0.0005882924997543537, "loss": 1.9161, "step": 8045 }, { "epoch": 0.27, "grad_norm": 0.4818400740623474, "learning_rate": 0.0005882896081557238, "loss": 1.9298, "step": 8046 }, { "epoch": 0.27, "grad_norm": 0.4661770462989807, "learning_rate": 0.0005882867162071527, "loss": 1.9636, "step": 8047 }, { "epoch": 0.27, "grad_norm": 0.45191290974617004, "learning_rate": 0.0005882838239086439, "loss": 1.8579, "step": 8048 }, { "epoch": 0.27, "grad_norm": 0.49277177453041077, "learning_rate": 0.0005882809312602012, "loss": 1.9423, "step": 8049 }, { "epoch": 0.27, "grad_norm": 0.48197001218795776, "learning_rate": 0.0005882780382618278, "loss": 1.9115, "step": 8050 }, { "epoch": 0.27, "grad_norm": 0.46665310859680176, "learning_rate": 0.0005882751449135273, "loss": 1.9785, "step": 8051 }, { "epoch": 0.27, "grad_norm": 0.4626825451850891, "learning_rate": 0.0005882722512153034, "loss": 1.9248, "step": 8052 }, { "epoch": 0.27, "grad_norm": 0.46827083826065063, "learning_rate": 0.0005882693571671593, "loss": 1.8781, "step": 8053 }, { "epoch": 0.27, "grad_norm": 0.46166133880615234, "learning_rate": 0.0005882664627690988, "loss": 1.9307, "step": 8054 }, { "epoch": 0.27, "grad_norm": 0.45921146869659424, "learning_rate": 0.0005882635680211253, "loss": 1.8867, "step": 8055 }, { "epoch": 0.27, "grad_norm": 0.47473737597465515, "learning_rate": 0.0005882606729232421, "loss": 1.9873, "step": 8056 }, { "epoch": 0.27, "grad_norm": 0.44699743390083313, "learning_rate": 0.0005882577774754531, "loss": 1.8579, "step": 8057 }, { "epoch": 0.27, "grad_norm": 0.477990061044693, "learning_rate": 0.0005882548816777616, "loss": 1.9761, "step": 8058 }, { "epoch": 0.27, "grad_norm": 0.4732469916343689, "learning_rate": 0.0005882519855301712, "loss": 1.9721, "step": 8059 }, { "epoch": 0.27, "grad_norm": 0.4582688808441162, "learning_rate": 0.0005882490890326853, "loss": 1.9255, "step": 8060 }, { "epoch": 0.27, "grad_norm": 0.4623037278652191, "learning_rate": 0.0005882461921853074, "loss": 1.9117, "step": 8061 }, { "epoch": 0.27, "grad_norm": 0.4541340172290802, "learning_rate": 0.0005882432949880413, "loss": 1.9982, "step": 8062 }, { "epoch": 0.27, "grad_norm": 0.4678913950920105, "learning_rate": 0.0005882403974408901, "loss": 1.9544, "step": 8063 }, { "epoch": 0.27, "grad_norm": 0.4666430950164795, "learning_rate": 0.0005882374995438576, "loss": 1.9774, "step": 8064 }, { "epoch": 0.27, "grad_norm": 0.4539903998374939, "learning_rate": 0.0005882346012969473, "loss": 1.9066, "step": 8065 }, { "epoch": 0.27, "grad_norm": 0.4550251364707947, "learning_rate": 0.0005882317027001625, "loss": 1.9179, "step": 8066 }, { "epoch": 0.27, "grad_norm": 0.4650346338748932, "learning_rate": 0.0005882288037535071, "loss": 1.9342, "step": 8067 }, { "epoch": 0.27, "grad_norm": 0.4647201895713806, "learning_rate": 0.0005882259044569843, "loss": 1.9866, "step": 8068 }, { "epoch": 0.27, "grad_norm": 0.47317078709602356, "learning_rate": 0.0005882230048105977, "loss": 1.9377, "step": 8069 }, { "epoch": 0.27, "grad_norm": 0.4687139093875885, "learning_rate": 0.0005882201048143509, "loss": 1.8439, "step": 8070 }, { "epoch": 0.27, "grad_norm": 0.4734058678150177, "learning_rate": 0.0005882172044682473, "loss": 1.9316, "step": 8071 }, { "epoch": 0.27, "grad_norm": 0.4683353304862976, "learning_rate": 0.0005882143037722905, "loss": 1.9977, "step": 8072 }, { "epoch": 0.27, "grad_norm": 0.47066637873649597, "learning_rate": 0.000588211402726484, "loss": 1.8896, "step": 8073 }, { "epoch": 0.27, "grad_norm": 0.4643194377422333, "learning_rate": 0.0005882085013308313, "loss": 1.9874, "step": 8074 }, { "epoch": 0.27, "grad_norm": 0.47372302412986755, "learning_rate": 0.000588205599585336, "loss": 1.888, "step": 8075 }, { "epoch": 0.27, "grad_norm": 0.47093337774276733, "learning_rate": 0.0005882026974900015, "loss": 1.882, "step": 8076 }, { "epoch": 0.27, "grad_norm": 0.4489365816116333, "learning_rate": 0.0005881997950448315, "loss": 1.8063, "step": 8077 }, { "epoch": 0.27, "grad_norm": 0.4590783715248108, "learning_rate": 0.0005881968922498293, "loss": 1.8858, "step": 8078 }, { "epoch": 0.27, "grad_norm": 0.4622593820095062, "learning_rate": 0.0005881939891049986, "loss": 1.8963, "step": 8079 }, { "epoch": 0.27, "grad_norm": 0.47346916794776917, "learning_rate": 0.0005881910856103428, "loss": 1.9183, "step": 8080 }, { "epoch": 0.27, "grad_norm": 0.46631738543510437, "learning_rate": 0.0005881881817658655, "loss": 1.9369, "step": 8081 }, { "epoch": 0.27, "grad_norm": 0.46728280186653137, "learning_rate": 0.0005881852775715703, "loss": 1.847, "step": 8082 }, { "epoch": 0.27, "grad_norm": 0.444114089012146, "learning_rate": 0.0005881823730274606, "loss": 1.8651, "step": 8083 }, { "epoch": 0.27, "grad_norm": 0.4736056327819824, "learning_rate": 0.0005881794681335398, "loss": 1.8974, "step": 8084 }, { "epoch": 0.27, "grad_norm": 0.46188458800315857, "learning_rate": 0.0005881765628898117, "loss": 1.9653, "step": 8085 }, { "epoch": 0.27, "grad_norm": 0.44700101017951965, "learning_rate": 0.0005881736572962798, "loss": 1.887, "step": 8086 }, { "epoch": 0.27, "grad_norm": 0.45416077971458435, "learning_rate": 0.0005881707513529475, "loss": 2.0031, "step": 8087 }, { "epoch": 0.27, "grad_norm": 0.4680027961730957, "learning_rate": 0.0005881678450598184, "loss": 1.9269, "step": 8088 }, { "epoch": 0.27, "grad_norm": 0.48428472876548767, "learning_rate": 0.0005881649384168959, "loss": 1.8981, "step": 8089 }, { "epoch": 0.27, "grad_norm": 0.4713990092277527, "learning_rate": 0.0005881620314241836, "loss": 1.9599, "step": 8090 }, { "epoch": 0.27, "grad_norm": 0.4635641872882843, "learning_rate": 0.0005881591240816851, "loss": 2.0074, "step": 8091 }, { "epoch": 0.27, "grad_norm": 0.4539172649383545, "learning_rate": 0.0005881562163894038, "loss": 1.9014, "step": 8092 }, { "epoch": 0.27, "grad_norm": 0.4616338610649109, "learning_rate": 0.0005881533083473435, "loss": 1.94, "step": 8093 }, { "epoch": 0.27, "grad_norm": 0.46195098757743835, "learning_rate": 0.0005881503999555075, "loss": 1.9625, "step": 8094 }, { "epoch": 0.27, "grad_norm": 0.4632827639579773, "learning_rate": 0.0005881474912138992, "loss": 1.9224, "step": 8095 }, { "epoch": 0.27, "grad_norm": 0.4501526951789856, "learning_rate": 0.0005881445821225226, "loss": 1.8594, "step": 8096 }, { "epoch": 0.27, "grad_norm": 0.49531182646751404, "learning_rate": 0.0005881416726813807, "loss": 1.9024, "step": 8097 }, { "epoch": 0.27, "grad_norm": 0.472771555185318, "learning_rate": 0.0005881387628904772, "loss": 1.9859, "step": 8098 }, { "epoch": 0.27, "grad_norm": 0.47125551104545593, "learning_rate": 0.0005881358527498159, "loss": 2.0356, "step": 8099 }, { "epoch": 0.27, "grad_norm": 0.4653344452381134, "learning_rate": 0.0005881329422594, "loss": 1.8675, "step": 8100 }, { "epoch": 0.27, "grad_norm": 0.4817312955856323, "learning_rate": 0.0005881300314192332, "loss": 1.8898, "step": 8101 }, { "epoch": 0.27, "grad_norm": 0.4571862816810608, "learning_rate": 0.000588127120229319, "loss": 1.9383, "step": 8102 }, { "epoch": 0.27, "grad_norm": 0.4479465186595917, "learning_rate": 0.000588124208689661, "loss": 1.8678, "step": 8103 }, { "epoch": 0.27, "grad_norm": 0.4644269347190857, "learning_rate": 0.0005881212968002626, "loss": 1.9543, "step": 8104 }, { "epoch": 0.27, "grad_norm": 0.6079177260398865, "learning_rate": 0.0005881183845611273, "loss": 1.8954, "step": 8105 }, { "epoch": 0.27, "grad_norm": 0.47189295291900635, "learning_rate": 0.0005881154719722589, "loss": 2.0209, "step": 8106 }, { "epoch": 0.27, "grad_norm": 0.46068719029426575, "learning_rate": 0.0005881125590336606, "loss": 1.9361, "step": 8107 }, { "epoch": 0.27, "grad_norm": 0.4554450213909149, "learning_rate": 0.0005881096457453363, "loss": 1.9447, "step": 8108 }, { "epoch": 0.27, "grad_norm": 0.48303914070129395, "learning_rate": 0.0005881067321072892, "loss": 1.9094, "step": 8109 }, { "epoch": 0.27, "grad_norm": 0.45036202669143677, "learning_rate": 0.0005881038181195231, "loss": 1.8739, "step": 8110 }, { "epoch": 0.27, "grad_norm": 0.44642770290374756, "learning_rate": 0.0005881009037820412, "loss": 1.9156, "step": 8111 }, { "epoch": 0.27, "grad_norm": 0.47098055481910706, "learning_rate": 0.0005880979890948474, "loss": 1.96, "step": 8112 }, { "epoch": 0.27, "grad_norm": 0.47893473505973816, "learning_rate": 0.0005880950740579452, "loss": 1.958, "step": 8113 }, { "epoch": 0.27, "grad_norm": 0.4553520977497101, "learning_rate": 0.0005880921586713378, "loss": 1.8315, "step": 8114 }, { "epoch": 0.27, "grad_norm": 0.48720139265060425, "learning_rate": 0.000588089242935029, "loss": 1.8837, "step": 8115 }, { "epoch": 0.27, "grad_norm": 0.4664783179759979, "learning_rate": 0.0005880863268490225, "loss": 1.8341, "step": 8116 }, { "epoch": 0.27, "grad_norm": 0.47256314754486084, "learning_rate": 0.0005880834104133215, "loss": 1.9944, "step": 8117 }, { "epoch": 0.27, "grad_norm": 0.44933265447616577, "learning_rate": 0.0005880804936279297, "loss": 1.9438, "step": 8118 }, { "epoch": 0.27, "grad_norm": 0.4449010193347931, "learning_rate": 0.0005880775764928506, "loss": 1.8316, "step": 8119 }, { "epoch": 0.27, "grad_norm": 0.4767729640007019, "learning_rate": 0.0005880746590080879, "loss": 1.9136, "step": 8120 }, { "epoch": 0.27, "grad_norm": 0.4463082253932953, "learning_rate": 0.0005880717411736448, "loss": 1.8815, "step": 8121 }, { "epoch": 0.27, "grad_norm": 0.4548746943473816, "learning_rate": 0.0005880688229895252, "loss": 1.9147, "step": 8122 }, { "epoch": 0.27, "grad_norm": 0.4754865765571594, "learning_rate": 0.0005880659044557326, "loss": 1.9736, "step": 8123 }, { "epoch": 0.27, "grad_norm": 0.4877949357032776, "learning_rate": 0.0005880629855722702, "loss": 1.9329, "step": 8124 }, { "epoch": 0.27, "grad_norm": 0.4510118365287781, "learning_rate": 0.0005880600663391418, "loss": 1.9044, "step": 8125 }, { "epoch": 0.27, "grad_norm": 0.46141234040260315, "learning_rate": 0.0005880571467563511, "loss": 1.968, "step": 8126 }, { "epoch": 0.27, "grad_norm": 0.5060674548149109, "learning_rate": 0.0005880542268239013, "loss": 1.9, "step": 8127 }, { "epoch": 0.27, "grad_norm": 0.46201932430267334, "learning_rate": 0.0005880513065417962, "loss": 1.8963, "step": 8128 }, { "epoch": 0.27, "grad_norm": 0.46917304396629333, "learning_rate": 0.0005880483859100393, "loss": 1.9887, "step": 8129 }, { "epoch": 0.27, "grad_norm": 0.4953446388244629, "learning_rate": 0.000588045464928634, "loss": 1.9141, "step": 8130 }, { "epoch": 0.27, "grad_norm": 0.45989981293678284, "learning_rate": 0.0005880425435975839, "loss": 1.9048, "step": 8131 }, { "epoch": 0.27, "grad_norm": 0.4490468502044678, "learning_rate": 0.0005880396219168928, "loss": 1.89, "step": 8132 }, { "epoch": 0.27, "grad_norm": 0.5010347962379456, "learning_rate": 0.000588036699886564, "loss": 1.8906, "step": 8133 }, { "epoch": 0.27, "grad_norm": 0.48025140166282654, "learning_rate": 0.0005880337775066009, "loss": 1.9325, "step": 8134 }, { "epoch": 0.27, "grad_norm": 0.4633318781852722, "learning_rate": 0.0005880308547770073, "loss": 1.8839, "step": 8135 }, { "epoch": 0.27, "grad_norm": 0.48229527473449707, "learning_rate": 0.0005880279316977868, "loss": 1.9833, "step": 8136 }, { "epoch": 0.27, "grad_norm": 0.4624748229980469, "learning_rate": 0.0005880250082689427, "loss": 1.8583, "step": 8137 }, { "epoch": 0.27, "grad_norm": 0.4609629213809967, "learning_rate": 0.0005880220844904787, "loss": 1.8915, "step": 8138 }, { "epoch": 0.27, "grad_norm": 0.4841667115688324, "learning_rate": 0.0005880191603623984, "loss": 1.9126, "step": 8139 }, { "epoch": 0.27, "grad_norm": 0.4894450008869171, "learning_rate": 0.0005880162358847052, "loss": 1.8847, "step": 8140 }, { "epoch": 0.27, "grad_norm": 0.4746709167957306, "learning_rate": 0.0005880133110574028, "loss": 1.9796, "step": 8141 }, { "epoch": 0.27, "grad_norm": 0.47142401337623596, "learning_rate": 0.0005880103858804946, "loss": 1.9463, "step": 8142 }, { "epoch": 0.27, "grad_norm": 0.5004647374153137, "learning_rate": 0.0005880074603539843, "loss": 1.8596, "step": 8143 }, { "epoch": 0.27, "grad_norm": 0.46002283692359924, "learning_rate": 0.0005880045344778754, "loss": 1.9067, "step": 8144 }, { "epoch": 0.27, "grad_norm": 0.46257275342941284, "learning_rate": 0.0005880016082521714, "loss": 1.9396, "step": 8145 }, { "epoch": 0.27, "grad_norm": 0.4734523892402649, "learning_rate": 0.0005879986816768758, "loss": 1.8518, "step": 8146 }, { "epoch": 0.27, "grad_norm": 0.45828285813331604, "learning_rate": 0.0005879957547519923, "loss": 1.9256, "step": 8147 }, { "epoch": 0.27, "grad_norm": 0.4789900481700897, "learning_rate": 0.0005879928274775244, "loss": 1.9732, "step": 8148 }, { "epoch": 0.27, "grad_norm": 0.4541672468185425, "learning_rate": 0.0005879898998534756, "loss": 1.9294, "step": 8149 }, { "epoch": 0.27, "grad_norm": 0.4601253569126129, "learning_rate": 0.0005879869718798495, "loss": 1.8815, "step": 8150 }, { "epoch": 0.27, "grad_norm": 0.45545637607574463, "learning_rate": 0.0005879840435566498, "loss": 1.9051, "step": 8151 }, { "epoch": 0.27, "grad_norm": 0.4676855206489563, "learning_rate": 0.0005879811148838797, "loss": 1.9297, "step": 8152 }, { "epoch": 0.27, "grad_norm": 0.4605194628238678, "learning_rate": 0.000587978185861543, "loss": 1.8679, "step": 8153 }, { "epoch": 0.27, "grad_norm": 0.46959853172302246, "learning_rate": 0.0005879752564896432, "loss": 1.9159, "step": 8154 }, { "epoch": 0.27, "grad_norm": 0.47542300820350647, "learning_rate": 0.0005879723267681839, "loss": 1.9556, "step": 8155 }, { "epoch": 0.27, "grad_norm": 0.450623095035553, "learning_rate": 0.0005879693966971687, "loss": 1.8438, "step": 8156 }, { "epoch": 0.27, "grad_norm": 0.47271454334259033, "learning_rate": 0.000587966466276601, "loss": 1.9073, "step": 8157 }, { "epoch": 0.27, "grad_norm": 0.45194464921951294, "learning_rate": 0.0005879635355064845, "loss": 1.8601, "step": 8158 }, { "epoch": 0.27, "grad_norm": 0.4644336402416229, "learning_rate": 0.0005879606043868226, "loss": 1.9182, "step": 8159 }, { "epoch": 0.27, "grad_norm": 0.4650089740753174, "learning_rate": 0.000587957672917619, "loss": 1.9446, "step": 8160 }, { "epoch": 0.27, "grad_norm": 0.4694801867008209, "learning_rate": 0.0005879547410988773, "loss": 1.9288, "step": 8161 }, { "epoch": 0.27, "grad_norm": 0.4567956328392029, "learning_rate": 0.0005879518089306009, "loss": 1.945, "step": 8162 }, { "epoch": 0.27, "grad_norm": 0.46242567896842957, "learning_rate": 0.0005879488764127934, "loss": 1.99, "step": 8163 }, { "epoch": 0.27, "grad_norm": 0.47945961356163025, "learning_rate": 0.0005879459435454584, "loss": 1.952, "step": 8164 }, { "epoch": 0.27, "grad_norm": 0.4489418566226959, "learning_rate": 0.0005879430103285995, "loss": 1.9108, "step": 8165 }, { "epoch": 0.27, "grad_norm": 0.45022520422935486, "learning_rate": 0.0005879400767622201, "loss": 1.9484, "step": 8166 }, { "epoch": 0.27, "grad_norm": 0.44649970531463623, "learning_rate": 0.000587937142846324, "loss": 1.9429, "step": 8167 }, { "epoch": 0.27, "grad_norm": 0.4701888859272003, "learning_rate": 0.0005879342085809146, "loss": 1.9172, "step": 8168 }, { "epoch": 0.27, "grad_norm": 0.45305144786834717, "learning_rate": 0.0005879312739659955, "loss": 1.8901, "step": 8169 }, { "epoch": 0.27, "grad_norm": 0.47254619002342224, "learning_rate": 0.0005879283390015702, "loss": 1.8983, "step": 8170 }, { "epoch": 0.27, "grad_norm": 0.46556004881858826, "learning_rate": 0.0005879254036876424, "loss": 1.9484, "step": 8171 }, { "epoch": 0.27, "grad_norm": 0.4753006100654602, "learning_rate": 0.0005879224680242156, "loss": 1.9557, "step": 8172 }, { "epoch": 0.27, "grad_norm": 0.46915003657341003, "learning_rate": 0.0005879195320112932, "loss": 1.9237, "step": 8173 }, { "epoch": 0.27, "grad_norm": 0.45588913559913635, "learning_rate": 0.000587916595648879, "loss": 1.867, "step": 8174 }, { "epoch": 0.27, "grad_norm": 0.465913861989975, "learning_rate": 0.0005879136589369765, "loss": 1.8682, "step": 8175 }, { "epoch": 0.27, "grad_norm": 0.4708918631076813, "learning_rate": 0.0005879107218755893, "loss": 2.0019, "step": 8176 }, { "epoch": 0.27, "grad_norm": 0.48274558782577515, "learning_rate": 0.0005879077844647208, "loss": 1.9881, "step": 8177 }, { "epoch": 0.27, "grad_norm": 0.47089001536369324, "learning_rate": 0.0005879048467043747, "loss": 1.9155, "step": 8178 }, { "epoch": 0.27, "grad_norm": 0.46752429008483887, "learning_rate": 0.0005879019085945546, "loss": 1.8745, "step": 8179 }, { "epoch": 0.27, "grad_norm": 0.45287007093429565, "learning_rate": 0.0005878989701352639, "loss": 1.8741, "step": 8180 }, { "epoch": 0.27, "grad_norm": 0.44538623094558716, "learning_rate": 0.0005878960313265064, "loss": 1.951, "step": 8181 }, { "epoch": 0.27, "grad_norm": 0.47810959815979004, "learning_rate": 0.0005878930921682854, "loss": 1.9832, "step": 8182 }, { "epoch": 0.27, "grad_norm": 0.4745640158653259, "learning_rate": 0.0005878901526606046, "loss": 1.892, "step": 8183 }, { "epoch": 0.27, "grad_norm": 0.4844750463962555, "learning_rate": 0.0005878872128034676, "loss": 1.9152, "step": 8184 }, { "epoch": 0.27, "grad_norm": 0.47406700253486633, "learning_rate": 0.000587884272596878, "loss": 1.9031, "step": 8185 }, { "epoch": 0.27, "grad_norm": 0.4716467559337616, "learning_rate": 0.0005878813320408393, "loss": 1.971, "step": 8186 }, { "epoch": 0.27, "grad_norm": 0.46295326948165894, "learning_rate": 0.000587878391135355, "loss": 1.9751, "step": 8187 }, { "epoch": 0.27, "grad_norm": 0.5054295063018799, "learning_rate": 0.0005878754498804289, "loss": 1.9468, "step": 8188 }, { "epoch": 0.27, "grad_norm": 0.47221440076828003, "learning_rate": 0.0005878725082760644, "loss": 1.9519, "step": 8189 }, { "epoch": 0.27, "grad_norm": 0.46052008867263794, "learning_rate": 0.000587869566322265, "loss": 1.8723, "step": 8190 }, { "epoch": 0.27, "grad_norm": 0.4599047005176544, "learning_rate": 0.0005878666240190343, "loss": 1.9101, "step": 8191 }, { "epoch": 0.27, "grad_norm": 0.47784844040870667, "learning_rate": 0.0005878636813663761, "loss": 1.8962, "step": 8192 }, { "epoch": 0.27, "grad_norm": 0.4713767170906067, "learning_rate": 0.0005878607383642937, "loss": 1.9026, "step": 8193 }, { "epoch": 0.27, "grad_norm": 0.4531618356704712, "learning_rate": 0.0005878577950127907, "loss": 1.8828, "step": 8194 }, { "epoch": 0.27, "grad_norm": 0.466848224401474, "learning_rate": 0.0005878548513118709, "loss": 1.9416, "step": 8195 }, { "epoch": 0.27, "grad_norm": 0.470885694026947, "learning_rate": 0.0005878519072615376, "loss": 1.901, "step": 8196 }, { "epoch": 0.27, "grad_norm": 0.4966026544570923, "learning_rate": 0.0005878489628617946, "loss": 2.0129, "step": 8197 }, { "epoch": 0.27, "grad_norm": 0.506355345249176, "learning_rate": 0.0005878460181126453, "loss": 1.8919, "step": 8198 }, { "epoch": 0.27, "grad_norm": 0.4558583199977875, "learning_rate": 0.0005878430730140935, "loss": 1.8887, "step": 8199 }, { "epoch": 0.27, "grad_norm": 0.5060412883758545, "learning_rate": 0.0005878401275661425, "loss": 1.935, "step": 8200 }, { "epoch": 0.27, "grad_norm": 0.4910069704055786, "learning_rate": 0.000587837181768796, "loss": 1.9664, "step": 8201 }, { "epoch": 0.27, "grad_norm": 0.47079578042030334, "learning_rate": 0.0005878342356220575, "loss": 1.9148, "step": 8202 }, { "epoch": 0.27, "grad_norm": 0.4878486692905426, "learning_rate": 0.0005878312891259308, "loss": 1.9396, "step": 8203 }, { "epoch": 0.27, "grad_norm": 0.4765639007091522, "learning_rate": 0.0005878283422804193, "loss": 1.9081, "step": 8204 }, { "epoch": 0.27, "grad_norm": 0.4799632728099823, "learning_rate": 0.0005878253950855265, "loss": 1.9872, "step": 8205 }, { "epoch": 0.27, "grad_norm": 0.46822622418403625, "learning_rate": 0.0005878224475412561, "loss": 1.8635, "step": 8206 }, { "epoch": 0.27, "grad_norm": 0.4763004779815674, "learning_rate": 0.0005878194996476118, "loss": 1.9502, "step": 8207 }, { "epoch": 0.27, "grad_norm": 0.4554502069950104, "learning_rate": 0.0005878165514045968, "loss": 1.8844, "step": 8208 }, { "epoch": 0.27, "grad_norm": 0.4571152329444885, "learning_rate": 0.0005878136028122151, "loss": 1.8485, "step": 8209 }, { "epoch": 0.27, "grad_norm": 0.4761614501476288, "learning_rate": 0.0005878106538704701, "loss": 1.9134, "step": 8210 }, { "epoch": 0.27, "grad_norm": 0.4802958071231842, "learning_rate": 0.0005878077045793652, "loss": 1.9353, "step": 8211 }, { "epoch": 0.27, "grad_norm": 0.4596386253833771, "learning_rate": 0.0005878047549389043, "loss": 1.9119, "step": 8212 }, { "epoch": 0.27, "grad_norm": 0.46934276819229126, "learning_rate": 0.0005878018049490908, "loss": 1.8733, "step": 8213 }, { "epoch": 0.27, "grad_norm": 0.46726763248443604, "learning_rate": 0.0005877988546099283, "loss": 1.8859, "step": 8214 }, { "epoch": 0.27, "grad_norm": 0.5206489562988281, "learning_rate": 0.0005877959039214205, "loss": 1.9164, "step": 8215 }, { "epoch": 0.27, "grad_norm": 0.45813748240470886, "learning_rate": 0.0005877929528835707, "loss": 1.9002, "step": 8216 }, { "epoch": 0.27, "grad_norm": 0.44594481587409973, "learning_rate": 0.0005877900014963828, "loss": 1.8855, "step": 8217 }, { "epoch": 0.27, "grad_norm": 0.4611562192440033, "learning_rate": 0.0005877870497598601, "loss": 1.926, "step": 8218 }, { "epoch": 0.27, "grad_norm": 0.4566965401172638, "learning_rate": 0.0005877840976740065, "loss": 1.8818, "step": 8219 }, { "epoch": 0.27, "grad_norm": 0.46067914366722107, "learning_rate": 0.0005877811452388253, "loss": 1.9264, "step": 8220 }, { "epoch": 0.27, "grad_norm": 0.4489743411540985, "learning_rate": 0.0005877781924543201, "loss": 1.8698, "step": 8221 }, { "epoch": 0.27, "grad_norm": 0.45521080493927, "learning_rate": 0.0005877752393204949, "loss": 1.8022, "step": 8222 }, { "epoch": 0.27, "grad_norm": 0.4658394455909729, "learning_rate": 0.0005877722858373527, "loss": 1.8684, "step": 8223 }, { "epoch": 0.27, "grad_norm": 0.48760807514190674, "learning_rate": 0.0005877693320048973, "loss": 1.963, "step": 8224 }, { "epoch": 0.27, "grad_norm": 0.4664141535758972, "learning_rate": 0.0005877663778231325, "loss": 1.9353, "step": 8225 }, { "epoch": 0.27, "grad_norm": 0.46685925126075745, "learning_rate": 0.0005877634232920616, "loss": 1.974, "step": 8226 }, { "epoch": 0.27, "grad_norm": 0.4814773499965668, "learning_rate": 0.0005877604684116883, "loss": 1.9112, "step": 8227 }, { "epoch": 0.27, "grad_norm": 0.459333598613739, "learning_rate": 0.0005877575131820163, "loss": 1.9306, "step": 8228 }, { "epoch": 0.27, "grad_norm": 0.46940553188323975, "learning_rate": 0.000587754557603049, "loss": 1.8529, "step": 8229 }, { "epoch": 0.27, "grad_norm": 0.44474953413009644, "learning_rate": 0.00058775160167479, "loss": 1.9108, "step": 8230 }, { "epoch": 0.27, "grad_norm": 0.4623117744922638, "learning_rate": 0.0005877486453972432, "loss": 1.9146, "step": 8231 }, { "epoch": 0.27, "grad_norm": 0.45715513825416565, "learning_rate": 0.0005877456887704117, "loss": 1.8485, "step": 8232 }, { "epoch": 0.27, "grad_norm": 0.47513583302497864, "learning_rate": 0.0005877427317942994, "loss": 1.9474, "step": 8233 }, { "epoch": 0.27, "grad_norm": 0.4620794355869293, "learning_rate": 0.0005877397744689098, "loss": 1.9953, "step": 8234 }, { "epoch": 0.27, "grad_norm": 0.45858749747276306, "learning_rate": 0.0005877368167942465, "loss": 1.9084, "step": 8235 }, { "epoch": 0.27, "grad_norm": 0.47889813780784607, "learning_rate": 0.0005877338587703132, "loss": 1.8847, "step": 8236 }, { "epoch": 0.27, "grad_norm": 0.49274295568466187, "learning_rate": 0.0005877309003971133, "loss": 1.9324, "step": 8237 }, { "epoch": 0.27, "grad_norm": 0.45172154903411865, "learning_rate": 0.0005877279416746505, "loss": 2.017, "step": 8238 }, { "epoch": 0.27, "grad_norm": 0.45541685819625854, "learning_rate": 0.0005877249826029285, "loss": 1.8736, "step": 8239 }, { "epoch": 0.27, "grad_norm": 0.4631109833717346, "learning_rate": 0.0005877220231819507, "loss": 1.9239, "step": 8240 }, { "epoch": 0.27, "grad_norm": 0.46254536509513855, "learning_rate": 0.0005877190634117206, "loss": 2.0065, "step": 8241 }, { "epoch": 0.27, "grad_norm": 0.46355903148651123, "learning_rate": 0.000587716103292242, "loss": 1.9381, "step": 8242 }, { "epoch": 0.27, "grad_norm": 0.4564259648323059, "learning_rate": 0.0005877131428235185, "loss": 1.9466, "step": 8243 }, { "epoch": 0.27, "grad_norm": 0.4659305214881897, "learning_rate": 0.0005877101820055537, "loss": 1.9554, "step": 8244 }, { "epoch": 0.27, "grad_norm": 0.4468884766101837, "learning_rate": 0.0005877072208383511, "loss": 1.8257, "step": 8245 }, { "epoch": 0.27, "grad_norm": 0.4494359493255615, "learning_rate": 0.0005877042593219143, "loss": 1.8742, "step": 8246 }, { "epoch": 0.27, "grad_norm": 0.4528311491012573, "learning_rate": 0.000587701297456247, "loss": 1.8883, "step": 8247 }, { "epoch": 0.27, "grad_norm": 0.4772886335849762, "learning_rate": 0.0005876983352413525, "loss": 1.9613, "step": 8248 }, { "epoch": 0.27, "grad_norm": 0.47851067781448364, "learning_rate": 0.0005876953726772347, "loss": 1.9333, "step": 8249 }, { "epoch": 0.27, "grad_norm": 0.472002774477005, "learning_rate": 0.0005876924097638973, "loss": 1.9567, "step": 8250 }, { "epoch": 0.27, "grad_norm": 0.4628778100013733, "learning_rate": 0.0005876894465013436, "loss": 1.9222, "step": 8251 }, { "epoch": 0.27, "grad_norm": 0.47613200545310974, "learning_rate": 0.0005876864828895771, "loss": 1.9293, "step": 8252 }, { "epoch": 0.27, "grad_norm": 0.45310401916503906, "learning_rate": 0.0005876835189286018, "loss": 1.8599, "step": 8253 }, { "epoch": 0.27, "grad_norm": 0.4651321768760681, "learning_rate": 0.000587680554618421, "loss": 1.9164, "step": 8254 }, { "epoch": 0.27, "grad_norm": 0.5039058327674866, "learning_rate": 0.0005876775899590385, "loss": 1.922, "step": 8255 }, { "epoch": 0.27, "grad_norm": 0.49547505378723145, "learning_rate": 0.0005876746249504578, "loss": 1.8833, "step": 8256 }, { "epoch": 0.27, "grad_norm": 0.46364060044288635, "learning_rate": 0.0005876716595926823, "loss": 1.9234, "step": 8257 }, { "epoch": 0.27, "grad_norm": 0.45930689573287964, "learning_rate": 0.000587668693885716, "loss": 1.8261, "step": 8258 }, { "epoch": 0.27, "grad_norm": 0.48913687467575073, "learning_rate": 0.0005876657278295623, "loss": 1.8829, "step": 8259 }, { "epoch": 0.27, "grad_norm": 0.47906145453453064, "learning_rate": 0.0005876627614242246, "loss": 1.9173, "step": 8260 }, { "epoch": 0.27, "grad_norm": 0.45634594559669495, "learning_rate": 0.0005876597946697068, "loss": 1.9373, "step": 8261 }, { "epoch": 0.27, "grad_norm": 0.5243393182754517, "learning_rate": 0.0005876568275660124, "loss": 1.8756, "step": 8262 }, { "epoch": 0.27, "grad_norm": 0.4664281904697418, "learning_rate": 0.000587653860113145, "loss": 1.9359, "step": 8263 }, { "epoch": 0.27, "grad_norm": 0.45428451895713806, "learning_rate": 0.0005876508923111082, "loss": 1.8161, "step": 8264 }, { "epoch": 0.27, "grad_norm": 0.4545474946498871, "learning_rate": 0.0005876479241599056, "loss": 1.9554, "step": 8265 }, { "epoch": 0.28, "grad_norm": 0.47617584466934204, "learning_rate": 0.0005876449556595409, "loss": 1.9046, "step": 8266 }, { "epoch": 0.28, "grad_norm": 0.4808278977870941, "learning_rate": 0.0005876419868100175, "loss": 1.8883, "step": 8267 }, { "epoch": 0.28, "grad_norm": 0.46863317489624023, "learning_rate": 0.000587639017611339, "loss": 1.9436, "step": 8268 }, { "epoch": 0.28, "grad_norm": 0.46367353200912476, "learning_rate": 0.0005876360480635092, "loss": 1.9564, "step": 8269 }, { "epoch": 0.28, "grad_norm": 0.45344531536102295, "learning_rate": 0.0005876330781665317, "loss": 1.914, "step": 8270 }, { "epoch": 0.28, "grad_norm": 0.45906421542167664, "learning_rate": 0.0005876301079204099, "loss": 1.8818, "step": 8271 }, { "epoch": 0.28, "grad_norm": 0.5226162075996399, "learning_rate": 0.0005876271373251476, "loss": 1.9922, "step": 8272 }, { "epoch": 0.28, "grad_norm": 0.45151957869529724, "learning_rate": 0.0005876241663807484, "loss": 1.8983, "step": 8273 }, { "epoch": 0.28, "grad_norm": 0.45023977756500244, "learning_rate": 0.0005876211950872157, "loss": 1.8655, "step": 8274 }, { "epoch": 0.28, "grad_norm": 0.44933387637138367, "learning_rate": 0.0005876182234445534, "loss": 1.8623, "step": 8275 }, { "epoch": 0.28, "grad_norm": 0.46627387404441833, "learning_rate": 0.0005876152514527649, "loss": 1.9866, "step": 8276 }, { "epoch": 0.28, "grad_norm": 0.47631576657295227, "learning_rate": 0.0005876122791118538, "loss": 1.8357, "step": 8277 }, { "epoch": 0.28, "grad_norm": 0.4823148846626282, "learning_rate": 0.0005876093064218238, "loss": 1.9491, "step": 8278 }, { "epoch": 0.28, "grad_norm": 0.4573466181755066, "learning_rate": 0.0005876063333826784, "loss": 1.9463, "step": 8279 }, { "epoch": 0.28, "grad_norm": 0.46498599648475647, "learning_rate": 0.0005876033599944214, "loss": 1.974, "step": 8280 }, { "epoch": 0.28, "grad_norm": 0.47652488946914673, "learning_rate": 0.0005876003862570564, "loss": 1.9398, "step": 8281 }, { "epoch": 0.28, "grad_norm": 0.4688543677330017, "learning_rate": 0.0005875974121705866, "loss": 1.9899, "step": 8282 }, { "epoch": 0.28, "grad_norm": 0.44979527592658997, "learning_rate": 0.0005875944377350162, "loss": 1.931, "step": 8283 }, { "epoch": 0.28, "grad_norm": 0.45691919326782227, "learning_rate": 0.0005875914629503484, "loss": 1.8983, "step": 8284 }, { "epoch": 0.28, "grad_norm": 0.4573701322078705, "learning_rate": 0.0005875884878165869, "loss": 1.9306, "step": 8285 }, { "epoch": 0.28, "grad_norm": 0.4575372338294983, "learning_rate": 0.0005875855123337355, "loss": 1.9293, "step": 8286 }, { "epoch": 0.28, "grad_norm": 0.4485286772251129, "learning_rate": 0.0005875825365017975, "loss": 1.8832, "step": 8287 }, { "epoch": 0.28, "grad_norm": 0.4602067172527313, "learning_rate": 0.0005875795603207768, "loss": 1.8911, "step": 8288 }, { "epoch": 0.28, "grad_norm": 0.4628494679927826, "learning_rate": 0.0005875765837906769, "loss": 1.8899, "step": 8289 }, { "epoch": 0.28, "grad_norm": 0.4737084209918976, "learning_rate": 0.0005875736069115013, "loss": 1.9155, "step": 8290 }, { "epoch": 0.28, "grad_norm": 0.4505869448184967, "learning_rate": 0.0005875706296832537, "loss": 1.9324, "step": 8291 }, { "epoch": 0.28, "grad_norm": 0.4728771448135376, "learning_rate": 0.0005875676521059378, "loss": 1.8274, "step": 8292 }, { "epoch": 0.28, "grad_norm": 0.4563920497894287, "learning_rate": 0.0005875646741795572, "loss": 1.9514, "step": 8293 }, { "epoch": 0.28, "grad_norm": 0.4635566771030426, "learning_rate": 0.0005875616959041153, "loss": 1.8839, "step": 8294 }, { "epoch": 0.28, "grad_norm": 0.4509199559688568, "learning_rate": 0.000587558717279616, "loss": 1.8986, "step": 8295 }, { "epoch": 0.28, "grad_norm": 0.4701041877269745, "learning_rate": 0.0005875557383060627, "loss": 1.8773, "step": 8296 }, { "epoch": 0.28, "grad_norm": 0.4632871747016907, "learning_rate": 0.0005875527589834591, "loss": 1.8844, "step": 8297 }, { "epoch": 0.28, "grad_norm": 0.4500654935836792, "learning_rate": 0.0005875497793118089, "loss": 1.9297, "step": 8298 }, { "epoch": 0.28, "grad_norm": 0.4604021906852722, "learning_rate": 0.0005875467992911155, "loss": 1.988, "step": 8299 }, { "epoch": 0.28, "grad_norm": 0.47399604320526123, "learning_rate": 0.0005875438189213828, "loss": 1.935, "step": 8300 }, { "epoch": 0.28, "grad_norm": 0.47060805559158325, "learning_rate": 0.0005875408382026142, "loss": 1.8661, "step": 8301 }, { "epoch": 0.28, "grad_norm": 0.48610571026802063, "learning_rate": 0.0005875378571348134, "loss": 1.9184, "step": 8302 }, { "epoch": 0.28, "grad_norm": 0.47401195764541626, "learning_rate": 0.000587534875717984, "loss": 1.9784, "step": 8303 }, { "epoch": 0.28, "grad_norm": 0.4643497169017792, "learning_rate": 0.0005875318939521296, "loss": 1.8925, "step": 8304 }, { "epoch": 0.28, "grad_norm": 0.48219242691993713, "learning_rate": 0.0005875289118372538, "loss": 1.8844, "step": 8305 }, { "epoch": 0.28, "grad_norm": 0.47609803080558777, "learning_rate": 0.0005875259293733605, "loss": 1.9334, "step": 8306 }, { "epoch": 0.28, "grad_norm": 0.44915762543678284, "learning_rate": 0.000587522946560453, "loss": 1.8967, "step": 8307 }, { "epoch": 0.28, "grad_norm": 0.4611637592315674, "learning_rate": 0.0005875199633985349, "loss": 1.8961, "step": 8308 }, { "epoch": 0.28, "grad_norm": 0.4599616527557373, "learning_rate": 0.0005875169798876099, "loss": 1.97, "step": 8309 }, { "epoch": 0.28, "grad_norm": 0.46803396940231323, "learning_rate": 0.0005875139960276818, "loss": 1.888, "step": 8310 }, { "epoch": 0.28, "grad_norm": 0.4534766674041748, "learning_rate": 0.000587511011818754, "loss": 1.9276, "step": 8311 }, { "epoch": 0.28, "grad_norm": 0.46019574999809265, "learning_rate": 0.0005875080272608301, "loss": 1.9112, "step": 8312 }, { "epoch": 0.28, "grad_norm": 0.482036292552948, "learning_rate": 0.000587505042353914, "loss": 1.8965, "step": 8313 }, { "epoch": 0.28, "grad_norm": 0.4645047187805176, "learning_rate": 0.000587502057098009, "loss": 1.9609, "step": 8314 }, { "epoch": 0.28, "grad_norm": 0.4811680018901825, "learning_rate": 0.0005874990714931189, "loss": 1.8051, "step": 8315 }, { "epoch": 0.28, "grad_norm": 0.4617048501968384, "learning_rate": 0.0005874960855392473, "loss": 1.9607, "step": 8316 }, { "epoch": 0.28, "grad_norm": 0.466022789478302, "learning_rate": 0.0005874930992363979, "loss": 1.9819, "step": 8317 }, { "epoch": 0.28, "grad_norm": 0.45976749062538147, "learning_rate": 0.0005874901125845741, "loss": 1.8999, "step": 8318 }, { "epoch": 0.28, "grad_norm": 0.4639078974723816, "learning_rate": 0.0005874871255837796, "loss": 1.9461, "step": 8319 }, { "epoch": 0.28, "grad_norm": 0.46328043937683105, "learning_rate": 0.0005874841382340183, "loss": 1.9182, "step": 8320 }, { "epoch": 0.28, "grad_norm": 0.4743809401988983, "learning_rate": 0.0005874811505352936, "loss": 1.9263, "step": 8321 }, { "epoch": 0.28, "grad_norm": 0.4524271488189697, "learning_rate": 0.000587478162487609, "loss": 1.9249, "step": 8322 }, { "epoch": 0.28, "grad_norm": 0.4698191285133362, "learning_rate": 0.0005874751740909684, "loss": 1.8621, "step": 8323 }, { "epoch": 0.28, "grad_norm": 0.4493880271911621, "learning_rate": 0.0005874721853453751, "loss": 1.852, "step": 8324 }, { "epoch": 0.28, "grad_norm": 0.46921873092651367, "learning_rate": 0.0005874691962508332, "loss": 1.9347, "step": 8325 }, { "epoch": 0.28, "grad_norm": 0.4548396170139313, "learning_rate": 0.0005874662068073458, "loss": 1.8945, "step": 8326 }, { "epoch": 0.28, "grad_norm": 0.44556882977485657, "learning_rate": 0.0005874632170149169, "loss": 1.9045, "step": 8327 }, { "epoch": 0.28, "grad_norm": 0.44871842861175537, "learning_rate": 0.0005874602268735499, "loss": 1.8899, "step": 8328 }, { "epoch": 0.28, "grad_norm": 0.45402976870536804, "learning_rate": 0.0005874572363832487, "loss": 1.8797, "step": 8329 }, { "epoch": 0.28, "grad_norm": 0.4594699740409851, "learning_rate": 0.0005874542455440167, "loss": 1.9245, "step": 8330 }, { "epoch": 0.28, "grad_norm": 0.4421747028827667, "learning_rate": 0.0005874512543558577, "loss": 1.9089, "step": 8331 }, { "epoch": 0.28, "grad_norm": 0.4716961979866028, "learning_rate": 0.0005874482628187751, "loss": 1.8964, "step": 8332 }, { "epoch": 0.28, "grad_norm": 0.4590570628643036, "learning_rate": 0.0005874452709327727, "loss": 1.9391, "step": 8333 }, { "epoch": 0.28, "grad_norm": 0.6718709468841553, "learning_rate": 0.000587442278697854, "loss": 1.9309, "step": 8334 }, { "epoch": 0.28, "grad_norm": 0.5320771336555481, "learning_rate": 0.0005874392861140229, "loss": 1.9095, "step": 8335 }, { "epoch": 0.28, "grad_norm": 0.4618569314479828, "learning_rate": 0.0005874362931812827, "loss": 1.9301, "step": 8336 }, { "epoch": 0.28, "grad_norm": 0.4589373767375946, "learning_rate": 0.0005874332998996373, "loss": 1.9516, "step": 8337 }, { "epoch": 0.28, "grad_norm": 0.45805624127388, "learning_rate": 0.0005874303062690902, "loss": 1.8544, "step": 8338 }, { "epoch": 0.28, "grad_norm": 0.47313016653060913, "learning_rate": 0.0005874273122896451, "loss": 1.8438, "step": 8339 }, { "epoch": 0.28, "grad_norm": 0.4613044559955597, "learning_rate": 0.0005874243179613054, "loss": 1.9443, "step": 8340 }, { "epoch": 0.28, "grad_norm": 0.466945618391037, "learning_rate": 0.0005874213232840752, "loss": 1.9114, "step": 8341 }, { "epoch": 0.28, "grad_norm": 0.4618525803089142, "learning_rate": 0.0005874183282579577, "loss": 1.9985, "step": 8342 }, { "epoch": 0.28, "grad_norm": 0.4773831069469452, "learning_rate": 0.0005874153328829568, "loss": 1.802, "step": 8343 }, { "epoch": 0.28, "grad_norm": 0.4460572898387909, "learning_rate": 0.000587412337159076, "loss": 1.9028, "step": 8344 }, { "epoch": 0.28, "grad_norm": 0.4757978022098541, "learning_rate": 0.0005874093410863188, "loss": 1.8809, "step": 8345 }, { "epoch": 0.28, "grad_norm": 0.45094001293182373, "learning_rate": 0.0005874063446646892, "loss": 1.9732, "step": 8346 }, { "epoch": 0.28, "grad_norm": 0.45366019010543823, "learning_rate": 0.0005874033478941906, "loss": 1.8908, "step": 8347 }, { "epoch": 0.28, "grad_norm": 0.4694182574748993, "learning_rate": 0.0005874003507748267, "loss": 1.8927, "step": 8348 }, { "epoch": 0.28, "grad_norm": 0.46087461709976196, "learning_rate": 0.000587397353306601, "loss": 2.0159, "step": 8349 }, { "epoch": 0.28, "grad_norm": 0.47182464599609375, "learning_rate": 0.0005873943554895174, "loss": 1.9707, "step": 8350 }, { "epoch": 0.28, "grad_norm": 0.4705835282802582, "learning_rate": 0.0005873913573235793, "loss": 1.8909, "step": 8351 }, { "epoch": 0.28, "grad_norm": 0.44851160049438477, "learning_rate": 0.0005873883588087906, "loss": 1.8163, "step": 8352 }, { "epoch": 0.28, "grad_norm": 0.7081316113471985, "learning_rate": 0.0005873853599451547, "loss": 2.0182, "step": 8353 }, { "epoch": 0.28, "grad_norm": 0.5050736665725708, "learning_rate": 0.0005873823607326752, "loss": 1.9752, "step": 8354 }, { "epoch": 0.28, "grad_norm": 0.452768474817276, "learning_rate": 0.0005873793611713559, "loss": 1.8669, "step": 8355 }, { "epoch": 0.28, "grad_norm": 0.494739830493927, "learning_rate": 0.0005873763612612006, "loss": 1.9312, "step": 8356 }, { "epoch": 0.28, "grad_norm": 0.46307918429374695, "learning_rate": 0.0005873733610022125, "loss": 2.007, "step": 8357 }, { "epoch": 0.28, "grad_norm": 0.45790547132492065, "learning_rate": 0.0005873703603943956, "loss": 1.8957, "step": 8358 }, { "epoch": 0.28, "grad_norm": 0.44837749004364014, "learning_rate": 0.0005873673594377533, "loss": 1.8929, "step": 8359 }, { "epoch": 0.28, "grad_norm": 0.46182671189308167, "learning_rate": 0.0005873643581322895, "loss": 1.8671, "step": 8360 }, { "epoch": 0.28, "grad_norm": 0.4515412151813507, "learning_rate": 0.0005873613564780077, "loss": 1.9031, "step": 8361 }, { "epoch": 0.28, "grad_norm": 0.4801325798034668, "learning_rate": 0.0005873583544749115, "loss": 1.9585, "step": 8362 }, { "epoch": 0.28, "grad_norm": 0.4552787244319916, "learning_rate": 0.0005873553521230046, "loss": 1.9105, "step": 8363 }, { "epoch": 0.28, "grad_norm": 0.4675033390522003, "learning_rate": 0.0005873523494222907, "loss": 1.9111, "step": 8364 }, { "epoch": 0.28, "grad_norm": 0.556194543838501, "learning_rate": 0.0005873493463727734, "loss": 1.9767, "step": 8365 }, { "epoch": 0.28, "grad_norm": 0.4808351695537567, "learning_rate": 0.0005873463429744562, "loss": 1.866, "step": 8366 }, { "epoch": 0.28, "grad_norm": 0.4613390266895294, "learning_rate": 0.0005873433392273429, "loss": 1.9006, "step": 8367 }, { "epoch": 0.28, "grad_norm": 0.50178462266922, "learning_rate": 0.0005873403351314372, "loss": 1.8651, "step": 8368 }, { "epoch": 0.28, "grad_norm": 0.47008776664733887, "learning_rate": 0.0005873373306867427, "loss": 1.9372, "step": 8369 }, { "epoch": 0.28, "grad_norm": 0.45956748723983765, "learning_rate": 0.0005873343258932629, "loss": 1.9538, "step": 8370 }, { "epoch": 0.28, "grad_norm": 0.47330161929130554, "learning_rate": 0.0005873313207510017, "loss": 1.9419, "step": 8371 }, { "epoch": 0.28, "grad_norm": 0.5062575340270996, "learning_rate": 0.0005873283152599626, "loss": 2.0207, "step": 8372 }, { "epoch": 0.28, "grad_norm": 0.4547840356826782, "learning_rate": 0.0005873253094201491, "loss": 1.9128, "step": 8373 }, { "epoch": 0.28, "grad_norm": 0.4563090205192566, "learning_rate": 0.0005873223032315652, "loss": 1.9093, "step": 8374 }, { "epoch": 0.28, "grad_norm": 0.47282055020332336, "learning_rate": 0.0005873192966942142, "loss": 1.905, "step": 8375 }, { "epoch": 0.28, "grad_norm": 0.48498886823654175, "learning_rate": 0.0005873162898081, "loss": 1.9088, "step": 8376 }, { "epoch": 0.28, "grad_norm": 0.4555574655532837, "learning_rate": 0.0005873132825732262, "loss": 1.968, "step": 8377 }, { "epoch": 0.28, "grad_norm": 0.4444517195224762, "learning_rate": 0.0005873102749895964, "loss": 1.8514, "step": 8378 }, { "epoch": 0.28, "grad_norm": 0.45443642139434814, "learning_rate": 0.0005873072670572141, "loss": 1.9282, "step": 8379 }, { "epoch": 0.28, "grad_norm": 0.4724332094192505, "learning_rate": 0.0005873042587760832, "loss": 1.961, "step": 8380 }, { "epoch": 0.28, "grad_norm": 0.4643975496292114, "learning_rate": 0.0005873012501462074, "loss": 1.8879, "step": 8381 }, { "epoch": 0.28, "grad_norm": 0.4532594084739685, "learning_rate": 0.00058729824116759, "loss": 1.8736, "step": 8382 }, { "epoch": 0.28, "grad_norm": 0.45028993487358093, "learning_rate": 0.000587295231840235, "loss": 1.9259, "step": 8383 }, { "epoch": 0.28, "grad_norm": 0.47563430666923523, "learning_rate": 0.0005872922221641459, "loss": 1.9095, "step": 8384 }, { "epoch": 0.28, "grad_norm": 0.4775685667991638, "learning_rate": 0.0005872892121393263, "loss": 2.0203, "step": 8385 }, { "epoch": 0.28, "grad_norm": 0.4686965048313141, "learning_rate": 0.0005872862017657801, "loss": 1.8344, "step": 8386 }, { "epoch": 0.28, "grad_norm": 0.45619553327560425, "learning_rate": 0.0005872831910435106, "loss": 1.9507, "step": 8387 }, { "epoch": 0.28, "grad_norm": 0.4775584638118744, "learning_rate": 0.0005872801799725217, "loss": 1.9495, "step": 8388 }, { "epoch": 0.28, "grad_norm": 0.46499699354171753, "learning_rate": 0.0005872771685528171, "loss": 1.8593, "step": 8389 }, { "epoch": 0.28, "grad_norm": 0.4629065692424774, "learning_rate": 0.0005872741567844002, "loss": 1.9437, "step": 8390 }, { "epoch": 0.28, "grad_norm": 0.4509677588939667, "learning_rate": 0.0005872711446672748, "loss": 1.9064, "step": 8391 }, { "epoch": 0.28, "grad_norm": 0.45090821385383606, "learning_rate": 0.0005872681322014446, "loss": 1.9334, "step": 8392 }, { "epoch": 0.28, "grad_norm": 0.45360034704208374, "learning_rate": 0.0005872651193869132, "loss": 1.943, "step": 8393 }, { "epoch": 0.28, "grad_norm": 0.4901430308818817, "learning_rate": 0.0005872621062236843, "loss": 1.9299, "step": 8394 }, { "epoch": 0.28, "grad_norm": 0.4662732183933258, "learning_rate": 0.0005872590927117615, "loss": 1.9527, "step": 8395 }, { "epoch": 0.28, "grad_norm": 0.4651105999946594, "learning_rate": 0.0005872560788511486, "loss": 1.9081, "step": 8396 }, { "epoch": 0.28, "grad_norm": 0.45751363039016724, "learning_rate": 0.0005872530646418489, "loss": 1.936, "step": 8397 }, { "epoch": 0.28, "grad_norm": 0.48128920793533325, "learning_rate": 0.0005872500500838665, "loss": 1.8876, "step": 8398 }, { "epoch": 0.28, "grad_norm": 0.5668368935585022, "learning_rate": 0.0005872470351772048, "loss": 2.0209, "step": 8399 }, { "epoch": 0.28, "grad_norm": 0.46802935004234314, "learning_rate": 0.0005872440199218675, "loss": 1.9691, "step": 8400 }, { "epoch": 0.28, "grad_norm": 0.4691084027290344, "learning_rate": 0.0005872410043178584, "loss": 1.908, "step": 8401 }, { "epoch": 0.28, "grad_norm": 0.4772876501083374, "learning_rate": 0.0005872379883651809, "loss": 1.951, "step": 8402 }, { "epoch": 0.28, "grad_norm": 0.46585312485694885, "learning_rate": 0.0005872349720638389, "loss": 1.971, "step": 8403 }, { "epoch": 0.28, "grad_norm": 0.477143794298172, "learning_rate": 0.000587231955413836, "loss": 1.8492, "step": 8404 }, { "epoch": 0.28, "grad_norm": 0.4830012917518616, "learning_rate": 0.0005872289384151757, "loss": 1.9187, "step": 8405 }, { "epoch": 0.28, "grad_norm": 0.48004403710365295, "learning_rate": 0.0005872259210678619, "loss": 1.9846, "step": 8406 }, { "epoch": 0.28, "grad_norm": 0.467910498380661, "learning_rate": 0.0005872229033718981, "loss": 1.8751, "step": 8407 }, { "epoch": 0.28, "grad_norm": 0.4628021717071533, "learning_rate": 0.000587219885327288, "loss": 1.8893, "step": 8408 }, { "epoch": 0.28, "grad_norm": 0.455189049243927, "learning_rate": 0.0005872168669340352, "loss": 1.9447, "step": 8409 }, { "epoch": 0.28, "grad_norm": 0.478200763463974, "learning_rate": 0.0005872138481921435, "loss": 1.9128, "step": 8410 }, { "epoch": 0.28, "grad_norm": 0.4522005021572113, "learning_rate": 0.0005872108291016167, "loss": 1.9172, "step": 8411 }, { "epoch": 0.28, "grad_norm": 0.4588231146335602, "learning_rate": 0.0005872078096624581, "loss": 1.9065, "step": 8412 }, { "epoch": 0.28, "grad_norm": 0.461102157831192, "learning_rate": 0.0005872047898746716, "loss": 1.8841, "step": 8413 }, { "epoch": 0.28, "grad_norm": 0.4569409489631653, "learning_rate": 0.0005872017697382607, "loss": 1.8604, "step": 8414 }, { "epoch": 0.28, "grad_norm": 0.5036452412605286, "learning_rate": 0.0005871987492532292, "loss": 1.8979, "step": 8415 }, { "epoch": 0.28, "grad_norm": 0.45639580488204956, "learning_rate": 0.0005871957284195808, "loss": 1.8928, "step": 8416 }, { "epoch": 0.28, "grad_norm": 0.4756785035133362, "learning_rate": 0.0005871927072373191, "loss": 1.9131, "step": 8417 }, { "epoch": 0.28, "grad_norm": 0.45055916905403137, "learning_rate": 0.0005871896857064477, "loss": 1.9326, "step": 8418 }, { "epoch": 0.28, "grad_norm": 0.47328510880470276, "learning_rate": 0.0005871866638269703, "loss": 1.934, "step": 8419 }, { "epoch": 0.28, "grad_norm": 0.45327839255332947, "learning_rate": 0.0005871836415988907, "loss": 1.9736, "step": 8420 }, { "epoch": 0.28, "grad_norm": 0.46143290400505066, "learning_rate": 0.0005871806190222125, "loss": 1.873, "step": 8421 }, { "epoch": 0.28, "grad_norm": 0.460581511259079, "learning_rate": 0.0005871775960969393, "loss": 1.916, "step": 8422 }, { "epoch": 0.28, "grad_norm": 0.44430625438690186, "learning_rate": 0.0005871745728230749, "loss": 1.8847, "step": 8423 }, { "epoch": 0.28, "grad_norm": 0.4787541925907135, "learning_rate": 0.0005871715492006226, "loss": 1.8995, "step": 8424 }, { "epoch": 0.28, "grad_norm": 0.4757767915725708, "learning_rate": 0.0005871685252295866, "loss": 1.9252, "step": 8425 }, { "epoch": 0.28, "grad_norm": 0.47041183710098267, "learning_rate": 0.0005871655009099702, "loss": 1.8229, "step": 8426 }, { "epoch": 0.28, "grad_norm": 0.4591774046421051, "learning_rate": 0.0005871624762417774, "loss": 1.8612, "step": 8427 }, { "epoch": 0.28, "grad_norm": 0.44606226682662964, "learning_rate": 0.0005871594512250115, "loss": 1.8607, "step": 8428 }, { "epoch": 0.28, "grad_norm": 0.42808961868286133, "learning_rate": 0.0005871564258596763, "loss": 1.7999, "step": 8429 }, { "epoch": 0.28, "grad_norm": 0.492470383644104, "learning_rate": 0.0005871534001457755, "loss": 1.9698, "step": 8430 }, { "epoch": 0.28, "grad_norm": 0.46714383363723755, "learning_rate": 0.000587150374083313, "loss": 1.9786, "step": 8431 }, { "epoch": 0.28, "grad_norm": 0.4566197693347931, "learning_rate": 0.000587147347672292, "loss": 1.9149, "step": 8432 }, { "epoch": 0.28, "grad_norm": 0.4563707113265991, "learning_rate": 0.0005871443209127166, "loss": 1.9019, "step": 8433 }, { "epoch": 0.28, "grad_norm": 0.4538312554359436, "learning_rate": 0.0005871412938045903, "loss": 1.9548, "step": 8434 }, { "epoch": 0.28, "grad_norm": 0.5070953369140625, "learning_rate": 0.0005871382663479167, "loss": 1.9251, "step": 8435 }, { "epoch": 0.28, "grad_norm": 0.44616758823394775, "learning_rate": 0.0005871352385426995, "loss": 1.9273, "step": 8436 }, { "epoch": 0.28, "grad_norm": 0.43630218505859375, "learning_rate": 0.0005871322103889425, "loss": 1.9545, "step": 8437 }, { "epoch": 0.28, "grad_norm": 0.458021879196167, "learning_rate": 0.0005871291818866493, "loss": 1.957, "step": 8438 }, { "epoch": 0.28, "grad_norm": 0.47257646918296814, "learning_rate": 0.0005871261530358236, "loss": 2.0, "step": 8439 }, { "epoch": 0.28, "grad_norm": 0.4508756697177887, "learning_rate": 0.000587123123836469, "loss": 1.9701, "step": 8440 }, { "epoch": 0.28, "grad_norm": 0.44154849648475647, "learning_rate": 0.0005871200942885892, "loss": 1.9398, "step": 8441 }, { "epoch": 0.28, "grad_norm": 0.45759546756744385, "learning_rate": 0.000587117064392188, "loss": 1.9744, "step": 8442 }, { "epoch": 0.28, "grad_norm": 0.4642433524131775, "learning_rate": 0.000587114034147269, "loss": 1.8923, "step": 8443 }, { "epoch": 0.28, "grad_norm": 0.4509027302265167, "learning_rate": 0.0005871110035538359, "loss": 1.9307, "step": 8444 }, { "epoch": 0.28, "grad_norm": 0.4527028203010559, "learning_rate": 0.0005871079726118922, "loss": 1.9571, "step": 8445 }, { "epoch": 0.28, "grad_norm": 0.4630618989467621, "learning_rate": 0.0005871049413214419, "loss": 1.8523, "step": 8446 }, { "epoch": 0.28, "grad_norm": 0.4598221480846405, "learning_rate": 0.0005871019096824883, "loss": 1.9682, "step": 8447 }, { "epoch": 0.28, "grad_norm": 0.45492488145828247, "learning_rate": 0.0005870988776950354, "loss": 1.9201, "step": 8448 }, { "epoch": 0.28, "grad_norm": 0.47509142756462097, "learning_rate": 0.0005870958453590868, "loss": 1.8888, "step": 8449 }, { "epoch": 0.28, "grad_norm": 0.4623663127422333, "learning_rate": 0.0005870928126746461, "loss": 1.9587, "step": 8450 }, { "epoch": 0.28, "grad_norm": 0.47099199891090393, "learning_rate": 0.000587089779641717, "loss": 1.9359, "step": 8451 }, { "epoch": 0.28, "grad_norm": 0.477172315120697, "learning_rate": 0.0005870867462603033, "loss": 1.8991, "step": 8452 }, { "epoch": 0.28, "grad_norm": 0.46798431873321533, "learning_rate": 0.0005870837125304084, "loss": 1.9084, "step": 8453 }, { "epoch": 0.28, "grad_norm": 0.48484161496162415, "learning_rate": 0.0005870806784520364, "loss": 2.0283, "step": 8454 }, { "epoch": 0.28, "grad_norm": 0.4664222002029419, "learning_rate": 0.0005870776440251907, "loss": 1.9163, "step": 8455 }, { "epoch": 0.28, "grad_norm": 0.4482117295265198, "learning_rate": 0.000587074609249875, "loss": 1.8597, "step": 8456 }, { "epoch": 0.28, "grad_norm": 0.4617467522621155, "learning_rate": 0.000587071574126093, "loss": 1.9063, "step": 8457 }, { "epoch": 0.28, "grad_norm": 0.4575954079627991, "learning_rate": 0.0005870685386538485, "loss": 1.9226, "step": 8458 }, { "epoch": 0.28, "grad_norm": 0.4453284740447998, "learning_rate": 0.0005870655028331449, "loss": 1.9191, "step": 8459 }, { "epoch": 0.28, "grad_norm": 0.46718019247055054, "learning_rate": 0.0005870624666639862, "loss": 1.9125, "step": 8460 }, { "epoch": 0.28, "grad_norm": 0.4601534605026245, "learning_rate": 0.000587059430146376, "loss": 1.951, "step": 8461 }, { "epoch": 0.28, "grad_norm": 0.4957125782966614, "learning_rate": 0.0005870563932803179, "loss": 2.0221, "step": 8462 }, { "epoch": 0.28, "grad_norm": 0.4676719307899475, "learning_rate": 0.0005870533560658156, "loss": 1.8761, "step": 8463 }, { "epoch": 0.28, "grad_norm": 0.45191726088523865, "learning_rate": 0.0005870503185028728, "loss": 1.8797, "step": 8464 }, { "epoch": 0.28, "grad_norm": 0.6850072741508484, "learning_rate": 0.0005870472805914933, "loss": 1.9632, "step": 8465 }, { "epoch": 0.28, "grad_norm": 0.45781072974205017, "learning_rate": 0.0005870442423316807, "loss": 1.9309, "step": 8466 }, { "epoch": 0.28, "grad_norm": 0.47070470452308655, "learning_rate": 0.0005870412037234387, "loss": 2.0179, "step": 8467 }, { "epoch": 0.28, "grad_norm": 0.510797917842865, "learning_rate": 0.0005870381647667709, "loss": 1.8572, "step": 8468 }, { "epoch": 0.28, "grad_norm": 0.4792785346508026, "learning_rate": 0.000587035125461681, "loss": 1.9714, "step": 8469 }, { "epoch": 0.28, "grad_norm": 0.47430968284606934, "learning_rate": 0.0005870320858081729, "loss": 1.9606, "step": 8470 }, { "epoch": 0.28, "grad_norm": 0.45733004808425903, "learning_rate": 0.00058702904580625, "loss": 1.921, "step": 8471 }, { "epoch": 0.28, "grad_norm": 0.4698225259780884, "learning_rate": 0.0005870260054559163, "loss": 1.9361, "step": 8472 }, { "epoch": 0.28, "grad_norm": 0.4493418037891388, "learning_rate": 0.0005870229647571752, "loss": 1.8965, "step": 8473 }, { "epoch": 0.28, "grad_norm": 0.4629957973957062, "learning_rate": 0.0005870199237100305, "loss": 1.9352, "step": 8474 }, { "epoch": 0.28, "grad_norm": 0.44432538747787476, "learning_rate": 0.000587016882314486, "loss": 1.9567, "step": 8475 }, { "epoch": 0.28, "grad_norm": 0.45960676670074463, "learning_rate": 0.0005870138405705452, "loss": 1.9289, "step": 8476 }, { "epoch": 0.28, "grad_norm": 0.4540637135505676, "learning_rate": 0.0005870107984782119, "loss": 1.8286, "step": 8477 }, { "epoch": 0.28, "grad_norm": 0.45162612199783325, "learning_rate": 0.0005870077560374897, "loss": 1.9437, "step": 8478 }, { "epoch": 0.28, "grad_norm": 0.49539220333099365, "learning_rate": 0.0005870047132483826, "loss": 1.9214, "step": 8479 }, { "epoch": 0.28, "grad_norm": 0.459523469209671, "learning_rate": 0.0005870016701108939, "loss": 1.9141, "step": 8480 }, { "epoch": 0.28, "grad_norm": 0.4482365548610687, "learning_rate": 0.0005869986266250275, "loss": 1.8858, "step": 8481 }, { "epoch": 0.28, "grad_norm": 0.4504614472389221, "learning_rate": 0.0005869955827907871, "loss": 1.846, "step": 8482 }, { "epoch": 0.28, "grad_norm": 0.4827730059623718, "learning_rate": 0.0005869925386081762, "loss": 1.9467, "step": 8483 }, { "epoch": 0.28, "grad_norm": 0.4578682482242584, "learning_rate": 0.0005869894940771988, "loss": 1.9593, "step": 8484 }, { "epoch": 0.28, "grad_norm": 0.4649713933467865, "learning_rate": 0.0005869864491978583, "loss": 1.8599, "step": 8485 }, { "epoch": 0.28, "grad_norm": 0.464206725358963, "learning_rate": 0.0005869834039701588, "loss": 2.0164, "step": 8486 }, { "epoch": 0.28, "grad_norm": 0.45198577642440796, "learning_rate": 0.0005869803583941034, "loss": 1.884, "step": 8487 }, { "epoch": 0.28, "grad_norm": 0.4455401301383972, "learning_rate": 0.0005869773124696963, "loss": 1.9326, "step": 8488 }, { "epoch": 0.28, "grad_norm": 0.4544076919555664, "learning_rate": 0.000586974266196941, "loss": 1.9005, "step": 8489 }, { "epoch": 0.28, "grad_norm": 0.47443103790283203, "learning_rate": 0.0005869712195758413, "loss": 1.9258, "step": 8490 }, { "epoch": 0.28, "grad_norm": 0.44792577624320984, "learning_rate": 0.0005869681726064007, "loss": 1.9349, "step": 8491 }, { "epoch": 0.28, "grad_norm": 0.44981834292411804, "learning_rate": 0.0005869651252886232, "loss": 1.8783, "step": 8492 }, { "epoch": 0.28, "grad_norm": 0.46934974193573, "learning_rate": 0.000586962077622512, "loss": 1.9425, "step": 8493 }, { "epoch": 0.28, "grad_norm": 0.4382648468017578, "learning_rate": 0.0005869590296080715, "loss": 1.8984, "step": 8494 }, { "epoch": 0.28, "grad_norm": 0.44485801458358765, "learning_rate": 0.0005869559812453047, "loss": 1.9088, "step": 8495 }, { "epoch": 0.28, "grad_norm": 0.44057852029800415, "learning_rate": 0.0005869529325342158, "loss": 1.9037, "step": 8496 }, { "epoch": 0.28, "grad_norm": 0.46898168325424194, "learning_rate": 0.0005869498834748083, "loss": 1.8942, "step": 8497 }, { "epoch": 0.28, "grad_norm": 0.456699937582016, "learning_rate": 0.0005869468340670859, "loss": 1.9562, "step": 8498 }, { "epoch": 0.28, "grad_norm": 0.48925718665122986, "learning_rate": 0.0005869437843110523, "loss": 1.9874, "step": 8499 }, { "epoch": 0.28, "grad_norm": 0.4329315721988678, "learning_rate": 0.0005869407342067113, "loss": 1.8836, "step": 8500 }, { "epoch": 0.28, "grad_norm": 0.4789026975631714, "learning_rate": 0.0005869376837540664, "loss": 1.854, "step": 8501 }, { "epoch": 0.28, "grad_norm": 0.4672970771789551, "learning_rate": 0.0005869346329531216, "loss": 1.9472, "step": 8502 }, { "epoch": 0.28, "grad_norm": 0.48347243666648865, "learning_rate": 0.0005869315818038803, "loss": 1.9756, "step": 8503 }, { "epoch": 0.28, "grad_norm": 0.5667806267738342, "learning_rate": 0.0005869285303063464, "loss": 1.9038, "step": 8504 }, { "epoch": 0.28, "grad_norm": 0.46155428886413574, "learning_rate": 0.0005869254784605234, "loss": 1.9138, "step": 8505 }, { "epoch": 0.28, "grad_norm": 0.4571916460990906, "learning_rate": 0.0005869224262664153, "loss": 1.9486, "step": 8506 }, { "epoch": 0.28, "grad_norm": 0.4655659794807434, "learning_rate": 0.0005869193737240257, "loss": 1.9459, "step": 8507 }, { "epoch": 0.28, "grad_norm": 0.45961305499076843, "learning_rate": 0.0005869163208333582, "loss": 1.8897, "step": 8508 }, { "epoch": 0.28, "grad_norm": 0.46521076560020447, "learning_rate": 0.0005869132675944165, "loss": 1.9173, "step": 8509 }, { "epoch": 0.28, "grad_norm": 0.4757939875125885, "learning_rate": 0.0005869102140072044, "loss": 1.948, "step": 8510 }, { "epoch": 0.28, "grad_norm": 0.5105773210525513, "learning_rate": 0.0005869071600717256, "loss": 1.8932, "step": 8511 }, { "epoch": 0.28, "grad_norm": 0.4601510763168335, "learning_rate": 0.0005869041057879837, "loss": 1.881, "step": 8512 }, { "epoch": 0.28, "grad_norm": 0.46214374899864197, "learning_rate": 0.0005869010511559826, "loss": 1.9822, "step": 8513 }, { "epoch": 0.28, "grad_norm": 0.4769449532032013, "learning_rate": 0.0005868979961757259, "loss": 1.8744, "step": 8514 }, { "epoch": 0.28, "grad_norm": 0.4844503402709961, "learning_rate": 0.0005868949408472171, "loss": 1.8585, "step": 8515 }, { "epoch": 0.28, "grad_norm": 0.4663065969944, "learning_rate": 0.0005868918851704602, "loss": 1.9381, "step": 8516 }, { "epoch": 0.28, "grad_norm": 0.465323805809021, "learning_rate": 0.0005868888291454589, "loss": 1.8183, "step": 8517 }, { "epoch": 0.28, "grad_norm": 0.4647665321826935, "learning_rate": 0.0005868857727722169, "loss": 1.9203, "step": 8518 }, { "epoch": 0.28, "grad_norm": 0.4611808955669403, "learning_rate": 0.0005868827160507377, "loss": 1.9212, "step": 8519 }, { "epoch": 0.28, "grad_norm": 0.47420158982276917, "learning_rate": 0.0005868796589810252, "loss": 1.9686, "step": 8520 }, { "epoch": 0.28, "grad_norm": 0.45150190591812134, "learning_rate": 0.0005868766015630832, "loss": 1.9545, "step": 8521 }, { "epoch": 0.28, "grad_norm": 0.45412710309028625, "learning_rate": 0.0005868735437969151, "loss": 1.9926, "step": 8522 }, { "epoch": 0.28, "grad_norm": 0.45781409740448, "learning_rate": 0.0005868704856825249, "loss": 1.8896, "step": 8523 }, { "epoch": 0.28, "grad_norm": 0.48068979382514954, "learning_rate": 0.000586867427219916, "loss": 1.883, "step": 8524 }, { "epoch": 0.28, "grad_norm": 0.45195531845092773, "learning_rate": 0.0005868643684090925, "loss": 1.8852, "step": 8525 }, { "epoch": 0.28, "grad_norm": 0.5248519778251648, "learning_rate": 0.0005868613092500579, "loss": 1.975, "step": 8526 }, { "epoch": 0.28, "grad_norm": 0.4726986885070801, "learning_rate": 0.0005868582497428158, "loss": 1.9672, "step": 8527 }, { "epoch": 0.28, "grad_norm": 0.50737464427948, "learning_rate": 0.0005868551898873702, "loss": 2.028, "step": 8528 }, { "epoch": 0.28, "grad_norm": 0.4830937385559082, "learning_rate": 0.0005868521296837247, "loss": 1.8747, "step": 8529 }, { "epoch": 0.28, "grad_norm": 0.5784327387809753, "learning_rate": 0.0005868490691318829, "loss": 1.9513, "step": 8530 }, { "epoch": 0.28, "grad_norm": 0.448466032743454, "learning_rate": 0.0005868460082318486, "loss": 1.9391, "step": 8531 }, { "epoch": 0.28, "grad_norm": 0.48545897006988525, "learning_rate": 0.0005868429469836255, "loss": 1.8792, "step": 8532 }, { "epoch": 0.28, "grad_norm": 0.4805442988872528, "learning_rate": 0.0005868398853872173, "loss": 1.9647, "step": 8533 }, { "epoch": 0.28, "grad_norm": 0.4820933938026428, "learning_rate": 0.0005868368234426278, "loss": 1.8923, "step": 8534 }, { "epoch": 0.28, "grad_norm": 0.4516727924346924, "learning_rate": 0.0005868337611498606, "loss": 1.8966, "step": 8535 }, { "epoch": 0.28, "grad_norm": 0.4477684795856476, "learning_rate": 0.0005868306985089195, "loss": 1.9529, "step": 8536 }, { "epoch": 0.28, "grad_norm": 0.48317891359329224, "learning_rate": 0.0005868276355198083, "loss": 1.8956, "step": 8537 }, { "epoch": 0.28, "grad_norm": 0.47218751907348633, "learning_rate": 0.0005868245721825305, "loss": 1.8704, "step": 8538 }, { "epoch": 0.28, "grad_norm": 0.45293891429901123, "learning_rate": 0.00058682150849709, "loss": 1.9097, "step": 8539 }, { "epoch": 0.28, "grad_norm": 0.4814181625843048, "learning_rate": 0.0005868184444634903, "loss": 1.9475, "step": 8540 }, { "epoch": 0.28, "grad_norm": 0.45631131529808044, "learning_rate": 0.0005868153800817355, "loss": 1.9127, "step": 8541 }, { "epoch": 0.28, "grad_norm": 0.47046831250190735, "learning_rate": 0.0005868123153518289, "loss": 1.9531, "step": 8542 }, { "epoch": 0.28, "grad_norm": 0.47457605600357056, "learning_rate": 0.0005868092502737745, "loss": 1.9718, "step": 8543 }, { "epoch": 0.28, "grad_norm": 0.49902433156967163, "learning_rate": 0.0005868061848475759, "loss": 1.901, "step": 8544 }, { "epoch": 0.28, "grad_norm": 0.48478004336357117, "learning_rate": 0.0005868031190732368, "loss": 1.9338, "step": 8545 }, { "epoch": 0.28, "grad_norm": 0.46570107340812683, "learning_rate": 0.000586800052950761, "loss": 1.8673, "step": 8546 }, { "epoch": 0.28, "grad_norm": 0.5361802577972412, "learning_rate": 0.0005867969864801522, "loss": 1.9116, "step": 8547 }, { "epoch": 0.28, "grad_norm": 0.47544175386428833, "learning_rate": 0.0005867939196614141, "loss": 1.9284, "step": 8548 }, { "epoch": 0.28, "grad_norm": 0.4475666284561157, "learning_rate": 0.0005867908524945505, "loss": 1.9366, "step": 8549 }, { "epoch": 0.28, "grad_norm": 0.4648580551147461, "learning_rate": 0.000586787784979565, "loss": 1.9084, "step": 8550 }, { "epoch": 0.28, "grad_norm": 0.503964900970459, "learning_rate": 0.0005867847171164615, "loss": 1.9029, "step": 8551 }, { "epoch": 0.28, "grad_norm": 0.4624670445919037, "learning_rate": 0.0005867816489052435, "loss": 1.9036, "step": 8552 }, { "epoch": 0.28, "grad_norm": 0.4654484689235687, "learning_rate": 0.0005867785803459149, "loss": 1.946, "step": 8553 }, { "epoch": 0.28, "grad_norm": 0.4821273982524872, "learning_rate": 0.0005867755114384793, "loss": 1.9508, "step": 8554 }, { "epoch": 0.28, "grad_norm": 0.4836250841617584, "learning_rate": 0.0005867724421829406, "loss": 1.8712, "step": 8555 }, { "epoch": 0.28, "grad_norm": 0.4570764899253845, "learning_rate": 0.0005867693725793022, "loss": 1.8981, "step": 8556 }, { "epoch": 0.28, "grad_norm": 0.5166507959365845, "learning_rate": 0.0005867663026275681, "loss": 1.866, "step": 8557 }, { "epoch": 0.28, "grad_norm": 0.48392099142074585, "learning_rate": 0.000586763232327742, "loss": 1.9268, "step": 8558 }, { "epoch": 0.28, "grad_norm": 0.468597412109375, "learning_rate": 0.0005867601616798277, "loss": 1.8713, "step": 8559 }, { "epoch": 0.28, "grad_norm": 0.5816252827644348, "learning_rate": 0.0005867570906838288, "loss": 1.9287, "step": 8560 }, { "epoch": 0.28, "grad_norm": 0.47449514269828796, "learning_rate": 0.0005867540193397489, "loss": 1.9781, "step": 8561 }, { "epoch": 0.28, "grad_norm": 0.4451657235622406, "learning_rate": 0.0005867509476475918, "loss": 1.9342, "step": 8562 }, { "epoch": 0.28, "grad_norm": 0.48812878131866455, "learning_rate": 0.0005867478756073616, "loss": 1.9211, "step": 8563 }, { "epoch": 0.28, "grad_norm": 0.46341466903686523, "learning_rate": 0.0005867448032190615, "loss": 1.9384, "step": 8564 }, { "epoch": 0.28, "grad_norm": 0.4643203616142273, "learning_rate": 0.0005867417304826955, "loss": 1.9552, "step": 8565 }, { "epoch": 0.28, "grad_norm": 0.46323809027671814, "learning_rate": 0.0005867386573982674, "loss": 1.9931, "step": 8566 }, { "epoch": 0.29, "grad_norm": 0.4639223515987396, "learning_rate": 0.0005867355839657807, "loss": 1.9518, "step": 8567 }, { "epoch": 0.29, "grad_norm": 0.458440899848938, "learning_rate": 0.0005867325101852392, "loss": 2.016, "step": 8568 }, { "epoch": 0.29, "grad_norm": 0.48243656754493713, "learning_rate": 0.0005867294360566468, "loss": 1.9543, "step": 8569 }, { "epoch": 0.29, "grad_norm": 0.4498937129974365, "learning_rate": 0.000586726361580007, "loss": 1.9558, "step": 8570 }, { "epoch": 0.29, "grad_norm": 0.4593706429004669, "learning_rate": 0.0005867232867553237, "loss": 2.0064, "step": 8571 }, { "epoch": 0.29, "grad_norm": 0.4767952859401703, "learning_rate": 0.0005867202115826006, "loss": 1.9018, "step": 8572 }, { "epoch": 0.29, "grad_norm": 0.4640752673149109, "learning_rate": 0.0005867171360618414, "loss": 1.8726, "step": 8573 }, { "epoch": 0.29, "grad_norm": 0.4831450581550598, "learning_rate": 0.0005867140601930499, "loss": 1.9929, "step": 8574 }, { "epoch": 0.29, "grad_norm": 0.4506289064884186, "learning_rate": 0.0005867109839762298, "loss": 1.8959, "step": 8575 }, { "epoch": 0.29, "grad_norm": 0.47345367074012756, "learning_rate": 0.0005867079074113846, "loss": 1.8578, "step": 8576 }, { "epoch": 0.29, "grad_norm": 0.4608278274536133, "learning_rate": 0.0005867048304985184, "loss": 1.8497, "step": 8577 }, { "epoch": 0.29, "grad_norm": 0.4701977074146271, "learning_rate": 0.0005867017532376347, "loss": 1.8807, "step": 8578 }, { "epoch": 0.29, "grad_norm": 0.45583558082580566, "learning_rate": 0.0005866986756287374, "loss": 1.9727, "step": 8579 }, { "epoch": 0.29, "grad_norm": 0.4901179075241089, "learning_rate": 0.00058669559767183, "loss": 1.9275, "step": 8580 }, { "epoch": 0.29, "grad_norm": 0.478081077337265, "learning_rate": 0.0005866925193669166, "loss": 1.9576, "step": 8581 }, { "epoch": 0.29, "grad_norm": 0.4404112696647644, "learning_rate": 0.0005866894407140006, "loss": 1.8488, "step": 8582 }, { "epoch": 0.29, "grad_norm": 0.4402756094932556, "learning_rate": 0.0005866863617130859, "loss": 1.8808, "step": 8583 }, { "epoch": 0.29, "grad_norm": 0.4800277352333069, "learning_rate": 0.0005866832823641763, "loss": 1.9087, "step": 8584 }, { "epoch": 0.29, "grad_norm": 0.46864941716194153, "learning_rate": 0.0005866802026672752, "loss": 1.9511, "step": 8585 }, { "epoch": 0.29, "grad_norm": 0.46191585063934326, "learning_rate": 0.0005866771226223867, "loss": 1.8265, "step": 8586 }, { "epoch": 0.29, "grad_norm": 0.4678696393966675, "learning_rate": 0.0005866740422295145, "loss": 1.8159, "step": 8587 }, { "epoch": 0.29, "grad_norm": 0.47212469577789307, "learning_rate": 0.0005866709614886622, "loss": 1.9292, "step": 8588 }, { "epoch": 0.29, "grad_norm": 0.4622797966003418, "learning_rate": 0.0005866678803998335, "loss": 1.9464, "step": 8589 }, { "epoch": 0.29, "grad_norm": 0.49412596225738525, "learning_rate": 0.0005866647989630322, "loss": 2.0018, "step": 8590 }, { "epoch": 0.29, "grad_norm": 0.4892590045928955, "learning_rate": 0.0005866617171782622, "loss": 1.902, "step": 8591 }, { "epoch": 0.29, "grad_norm": 0.4810485243797302, "learning_rate": 0.0005866586350455271, "loss": 1.8884, "step": 8592 }, { "epoch": 0.29, "grad_norm": 0.4688372015953064, "learning_rate": 0.0005866555525648306, "loss": 1.9782, "step": 8593 }, { "epoch": 0.29, "grad_norm": 0.4822257161140442, "learning_rate": 0.0005866524697361765, "loss": 1.9481, "step": 8594 }, { "epoch": 0.29, "grad_norm": 0.4692213237285614, "learning_rate": 0.0005866493865595686, "loss": 1.9277, "step": 8595 }, { "epoch": 0.29, "grad_norm": 0.47107362747192383, "learning_rate": 0.0005866463030350105, "loss": 1.8988, "step": 8596 }, { "epoch": 0.29, "grad_norm": 0.4781239926815033, "learning_rate": 0.0005866432191625062, "loss": 1.9344, "step": 8597 }, { "epoch": 0.29, "grad_norm": 0.5132829546928406, "learning_rate": 0.0005866401349420591, "loss": 1.9834, "step": 8598 }, { "epoch": 0.29, "grad_norm": 0.4616440534591675, "learning_rate": 0.000586637050373673, "loss": 1.9664, "step": 8599 }, { "epoch": 0.29, "grad_norm": 0.46134132146835327, "learning_rate": 0.000586633965457352, "loss": 1.8631, "step": 8600 }, { "epoch": 0.29, "grad_norm": 0.4721229076385498, "learning_rate": 0.0005866308801930995, "loss": 1.9262, "step": 8601 }, { "epoch": 0.29, "grad_norm": 0.463920533657074, "learning_rate": 0.0005866277945809193, "loss": 1.8962, "step": 8602 }, { "epoch": 0.29, "grad_norm": 0.47073090076446533, "learning_rate": 0.0005866247086208152, "loss": 1.897, "step": 8603 }, { "epoch": 0.29, "grad_norm": 0.44678226113319397, "learning_rate": 0.000586621622312791, "loss": 1.8891, "step": 8604 }, { "epoch": 0.29, "grad_norm": 0.45747971534729004, "learning_rate": 0.0005866185356568504, "loss": 1.8653, "step": 8605 }, { "epoch": 0.29, "grad_norm": 0.45165055990219116, "learning_rate": 0.000586615448652997, "loss": 1.9874, "step": 8606 }, { "epoch": 0.29, "grad_norm": 0.45914560556411743, "learning_rate": 0.0005866123613012347, "loss": 1.9458, "step": 8607 }, { "epoch": 0.29, "grad_norm": 0.4462527334690094, "learning_rate": 0.0005866092736015674, "loss": 1.9504, "step": 8608 }, { "epoch": 0.29, "grad_norm": 0.4585701823234558, "learning_rate": 0.0005866061855539985, "loss": 1.8964, "step": 8609 }, { "epoch": 0.29, "grad_norm": 0.4554194509983063, "learning_rate": 0.000586603097158532, "loss": 1.937, "step": 8610 }, { "epoch": 0.29, "grad_norm": 0.44902390241622925, "learning_rate": 0.0005866000084151715, "loss": 1.8887, "step": 8611 }, { "epoch": 0.29, "grad_norm": 0.45283442735671997, "learning_rate": 0.0005865969193239208, "loss": 1.9164, "step": 8612 }, { "epoch": 0.29, "grad_norm": 0.45123526453971863, "learning_rate": 0.0005865938298847838, "loss": 1.8499, "step": 8613 }, { "epoch": 0.29, "grad_norm": 0.4827900528907776, "learning_rate": 0.000586590740097764, "loss": 1.8336, "step": 8614 }, { "epoch": 0.29, "grad_norm": 0.4633367657661438, "learning_rate": 0.0005865876499628654, "loss": 1.9884, "step": 8615 }, { "epoch": 0.29, "grad_norm": 0.458707332611084, "learning_rate": 0.0005865845594800914, "loss": 1.9045, "step": 8616 }, { "epoch": 0.29, "grad_norm": 0.4523097574710846, "learning_rate": 0.0005865814686494462, "loss": 1.9249, "step": 8617 }, { "epoch": 0.29, "grad_norm": 0.4517122507095337, "learning_rate": 0.0005865783774709332, "loss": 1.9915, "step": 8618 }, { "epoch": 0.29, "grad_norm": 0.47029969096183777, "learning_rate": 0.0005865752859445563, "loss": 1.8825, "step": 8619 }, { "epoch": 0.29, "grad_norm": 0.46038275957107544, "learning_rate": 0.0005865721940703192, "loss": 1.8834, "step": 8620 }, { "epoch": 0.29, "grad_norm": 0.4513295590877533, "learning_rate": 0.0005865691018482257, "loss": 1.9661, "step": 8621 }, { "epoch": 0.29, "grad_norm": 0.4674954116344452, "learning_rate": 0.0005865660092782796, "loss": 1.9077, "step": 8622 }, { "epoch": 0.29, "grad_norm": 0.45815178751945496, "learning_rate": 0.0005865629163604846, "loss": 1.9112, "step": 8623 }, { "epoch": 0.29, "grad_norm": 0.4549017548561096, "learning_rate": 0.0005865598230948442, "loss": 1.9127, "step": 8624 }, { "epoch": 0.29, "grad_norm": 0.4448808431625366, "learning_rate": 0.0005865567294813627, "loss": 1.9257, "step": 8625 }, { "epoch": 0.29, "grad_norm": 0.44839027523994446, "learning_rate": 0.0005865536355200433, "loss": 1.8692, "step": 8626 }, { "epoch": 0.29, "grad_norm": 0.45653876662254333, "learning_rate": 0.0005865505412108901, "loss": 1.9816, "step": 8627 }, { "epoch": 0.29, "grad_norm": 0.4588530957698822, "learning_rate": 0.0005865474465539069, "loss": 1.8917, "step": 8628 }, { "epoch": 0.29, "grad_norm": 0.4710981845855713, "learning_rate": 0.0005865443515490972, "loss": 1.9103, "step": 8629 }, { "epoch": 0.29, "grad_norm": 0.46952658891677856, "learning_rate": 0.0005865412561964649, "loss": 1.8981, "step": 8630 }, { "epoch": 0.29, "grad_norm": 0.4515492916107178, "learning_rate": 0.0005865381604960138, "loss": 1.9446, "step": 8631 }, { "epoch": 0.29, "grad_norm": 0.47553038597106934, "learning_rate": 0.0005865350644477475, "loss": 1.8931, "step": 8632 }, { "epoch": 0.29, "grad_norm": 0.4600343108177185, "learning_rate": 0.0005865319680516698, "loss": 1.8657, "step": 8633 }, { "epoch": 0.29, "grad_norm": 0.4551528990268707, "learning_rate": 0.0005865288713077846, "loss": 1.8896, "step": 8634 }, { "epoch": 0.29, "grad_norm": 0.44932159781455994, "learning_rate": 0.0005865257742160956, "loss": 1.8807, "step": 8635 }, { "epoch": 0.29, "grad_norm": 0.4542239010334015, "learning_rate": 0.0005865226767766065, "loss": 1.876, "step": 8636 }, { "epoch": 0.29, "grad_norm": 0.4662773907184601, "learning_rate": 0.000586519578989321, "loss": 1.9042, "step": 8637 }, { "epoch": 0.29, "grad_norm": 0.45755404233932495, "learning_rate": 0.0005865164808542431, "loss": 1.9301, "step": 8638 }, { "epoch": 0.29, "grad_norm": 0.4767671227455139, "learning_rate": 0.0005865133823713762, "loss": 1.9587, "step": 8639 }, { "epoch": 0.29, "grad_norm": 0.45270541310310364, "learning_rate": 0.0005865102835407244, "loss": 1.9097, "step": 8640 }, { "epoch": 0.29, "grad_norm": 0.48837801814079285, "learning_rate": 0.0005865071843622913, "loss": 1.9211, "step": 8641 }, { "epoch": 0.29, "grad_norm": 0.45069682598114014, "learning_rate": 0.0005865040848360807, "loss": 1.9464, "step": 8642 }, { "epoch": 0.29, "grad_norm": 0.46893954277038574, "learning_rate": 0.0005865009849620964, "loss": 1.9799, "step": 8643 }, { "epoch": 0.29, "grad_norm": 0.47086310386657715, "learning_rate": 0.0005864978847403421, "loss": 1.8949, "step": 8644 }, { "epoch": 0.29, "grad_norm": 0.4691374599933624, "learning_rate": 0.0005864947841708215, "loss": 1.9518, "step": 8645 }, { "epoch": 0.29, "grad_norm": 0.4586183726787567, "learning_rate": 0.0005864916832535386, "loss": 1.8891, "step": 8646 }, { "epoch": 0.29, "grad_norm": 0.4724386930465698, "learning_rate": 0.0005864885819884969, "loss": 1.913, "step": 8647 }, { "epoch": 0.29, "grad_norm": 0.47557535767555237, "learning_rate": 0.0005864854803757002, "loss": 1.9905, "step": 8648 }, { "epoch": 0.29, "grad_norm": 0.45777344703674316, "learning_rate": 0.0005864823784151526, "loss": 1.8992, "step": 8649 }, { "epoch": 0.29, "grad_norm": 0.45275843143463135, "learning_rate": 0.0005864792761068573, "loss": 1.8612, "step": 8650 }, { "epoch": 0.29, "grad_norm": 0.4617573320865631, "learning_rate": 0.0005864761734508185, "loss": 1.9087, "step": 8651 }, { "epoch": 0.29, "grad_norm": 0.46023431420326233, "learning_rate": 0.0005864730704470399, "loss": 1.88, "step": 8652 }, { "epoch": 0.29, "grad_norm": 0.447925865650177, "learning_rate": 0.0005864699670955251, "loss": 1.8461, "step": 8653 }, { "epoch": 0.29, "grad_norm": 0.45907798409461975, "learning_rate": 0.0005864668633962781, "loss": 1.9391, "step": 8654 }, { "epoch": 0.29, "grad_norm": 0.4652007818222046, "learning_rate": 0.0005864637593493025, "loss": 2.0483, "step": 8655 }, { "epoch": 0.29, "grad_norm": 0.4622025787830353, "learning_rate": 0.000586460654954602, "loss": 1.9231, "step": 8656 }, { "epoch": 0.29, "grad_norm": 0.4560116231441498, "learning_rate": 0.0005864575502121806, "loss": 1.9718, "step": 8657 }, { "epoch": 0.29, "grad_norm": 0.4511977434158325, "learning_rate": 0.0005864544451220418, "loss": 1.8809, "step": 8658 }, { "epoch": 0.29, "grad_norm": 0.4470655918121338, "learning_rate": 0.0005864513396841896, "loss": 1.9025, "step": 8659 }, { "epoch": 0.29, "grad_norm": 0.45901983976364136, "learning_rate": 0.0005864482338986277, "loss": 1.8795, "step": 8660 }, { "epoch": 0.29, "grad_norm": 0.45921602845191956, "learning_rate": 0.0005864451277653598, "loss": 1.8834, "step": 8661 }, { "epoch": 0.29, "grad_norm": 0.5216953754425049, "learning_rate": 0.0005864420212843897, "loss": 1.9363, "step": 8662 }, { "epoch": 0.29, "grad_norm": 0.46654579043388367, "learning_rate": 0.0005864389144557212, "loss": 1.9487, "step": 8663 }, { "epoch": 0.29, "grad_norm": 0.44759640097618103, "learning_rate": 0.0005864358072793581, "loss": 1.9143, "step": 8664 }, { "epoch": 0.29, "grad_norm": 0.4804803431034088, "learning_rate": 0.0005864326997553042, "loss": 1.9392, "step": 8665 }, { "epoch": 0.29, "grad_norm": 0.45906177163124084, "learning_rate": 0.0005864295918835631, "loss": 1.9104, "step": 8666 }, { "epoch": 0.29, "grad_norm": 0.5555967688560486, "learning_rate": 0.0005864264836641387, "loss": 1.8745, "step": 8667 }, { "epoch": 0.29, "grad_norm": 0.4668692648410797, "learning_rate": 0.0005864233750970348, "loss": 1.9091, "step": 8668 }, { "epoch": 0.29, "grad_norm": 0.440335214138031, "learning_rate": 0.000586420266182255, "loss": 1.9286, "step": 8669 }, { "epoch": 0.29, "grad_norm": 0.4588877558708191, "learning_rate": 0.0005864171569198033, "loss": 1.9336, "step": 8670 }, { "epoch": 0.29, "grad_norm": 0.450242280960083, "learning_rate": 0.0005864140473096834, "loss": 1.8786, "step": 8671 }, { "epoch": 0.29, "grad_norm": 0.44606178998947144, "learning_rate": 0.000586410937351899, "loss": 1.9102, "step": 8672 }, { "epoch": 0.29, "grad_norm": 0.47225144505500793, "learning_rate": 0.0005864078270464538, "loss": 1.9458, "step": 8673 }, { "epoch": 0.29, "grad_norm": 0.4504622519016266, "learning_rate": 0.0005864047163933519, "loss": 1.9362, "step": 8674 }, { "epoch": 0.29, "grad_norm": 0.495259165763855, "learning_rate": 0.0005864016053925968, "loss": 1.9309, "step": 8675 }, { "epoch": 0.29, "grad_norm": 0.4684008061885834, "learning_rate": 0.0005863984940441923, "loss": 1.9394, "step": 8676 }, { "epoch": 0.29, "grad_norm": 0.4528493583202362, "learning_rate": 0.0005863953823481424, "loss": 1.8357, "step": 8677 }, { "epoch": 0.29, "grad_norm": 0.4515206813812256, "learning_rate": 0.0005863922703044505, "loss": 1.9703, "step": 8678 }, { "epoch": 0.29, "grad_norm": 0.4648909866809845, "learning_rate": 0.0005863891579131207, "loss": 1.8869, "step": 8679 }, { "epoch": 0.29, "grad_norm": 0.4849262535572052, "learning_rate": 0.0005863860451741566, "loss": 1.9152, "step": 8680 }, { "epoch": 0.29, "grad_norm": 0.4619896411895752, "learning_rate": 0.0005863829320875621, "loss": 1.9841, "step": 8681 }, { "epoch": 0.29, "grad_norm": 0.4601755738258362, "learning_rate": 0.0005863798186533408, "loss": 1.9212, "step": 8682 }, { "epoch": 0.29, "grad_norm": 0.4653533697128296, "learning_rate": 0.0005863767048714968, "loss": 1.9594, "step": 8683 }, { "epoch": 0.29, "grad_norm": 0.45198407769203186, "learning_rate": 0.0005863735907420335, "loss": 1.9178, "step": 8684 }, { "epoch": 0.29, "grad_norm": 0.4793003499507904, "learning_rate": 0.0005863704762649549, "loss": 1.9524, "step": 8685 }, { "epoch": 0.29, "grad_norm": 0.47179386019706726, "learning_rate": 0.0005863673614402647, "loss": 1.9382, "step": 8686 }, { "epoch": 0.29, "grad_norm": 0.4929550290107727, "learning_rate": 0.0005863642462679668, "loss": 1.9093, "step": 8687 }, { "epoch": 0.29, "grad_norm": 0.46389785408973694, "learning_rate": 0.0005863611307480648, "loss": 1.8771, "step": 8688 }, { "epoch": 0.29, "grad_norm": 0.4596041738986969, "learning_rate": 0.0005863580148805626, "loss": 1.8863, "step": 8689 }, { "epoch": 0.29, "grad_norm": 0.4535101652145386, "learning_rate": 0.0005863548986654641, "loss": 1.8544, "step": 8690 }, { "epoch": 0.29, "grad_norm": 0.45907801389694214, "learning_rate": 0.0005863517821027729, "loss": 1.8837, "step": 8691 }, { "epoch": 0.29, "grad_norm": 0.4661473035812378, "learning_rate": 0.0005863486651924928, "loss": 2.0129, "step": 8692 }, { "epoch": 0.29, "grad_norm": 0.4700203239917755, "learning_rate": 0.0005863455479346275, "loss": 1.9143, "step": 8693 }, { "epoch": 0.29, "grad_norm": 0.47145870327949524, "learning_rate": 0.0005863424303291809, "loss": 1.9573, "step": 8694 }, { "epoch": 0.29, "grad_norm": 0.46842944622039795, "learning_rate": 0.0005863393123761569, "loss": 1.9346, "step": 8695 }, { "epoch": 0.29, "grad_norm": 0.4618527293205261, "learning_rate": 0.0005863361940755591, "loss": 1.9762, "step": 8696 }, { "epoch": 0.29, "grad_norm": 0.4610629379749298, "learning_rate": 0.0005863330754273915, "loss": 1.9377, "step": 8697 }, { "epoch": 0.29, "grad_norm": 0.4607413709163666, "learning_rate": 0.0005863299564316575, "loss": 1.9713, "step": 8698 }, { "epoch": 0.29, "grad_norm": 0.4286853075027466, "learning_rate": 0.0005863268370883613, "loss": 1.8847, "step": 8699 }, { "epoch": 0.29, "grad_norm": 0.4529213011264801, "learning_rate": 0.0005863237173975064, "loss": 1.8783, "step": 8700 }, { "epoch": 0.29, "grad_norm": 0.46973514556884766, "learning_rate": 0.0005863205973590967, "loss": 1.9137, "step": 8701 }, { "epoch": 0.29, "grad_norm": 0.4639507532119751, "learning_rate": 0.000586317476973136, "loss": 1.9068, "step": 8702 }, { "epoch": 0.29, "grad_norm": 0.46785593032836914, "learning_rate": 0.0005863143562396282, "loss": 1.9739, "step": 8703 }, { "epoch": 0.29, "grad_norm": 0.4767730236053467, "learning_rate": 0.0005863112351585768, "loss": 1.915, "step": 8704 }, { "epoch": 0.29, "grad_norm": 0.45814988017082214, "learning_rate": 0.0005863081137299858, "loss": 1.9231, "step": 8705 }, { "epoch": 0.29, "grad_norm": 0.4715750813484192, "learning_rate": 0.000586304991953859, "loss": 1.8993, "step": 8706 }, { "epoch": 0.29, "grad_norm": 0.48842862248420715, "learning_rate": 0.0005863018698302, "loss": 1.8931, "step": 8707 }, { "epoch": 0.29, "grad_norm": 0.4491184949874878, "learning_rate": 0.0005862987473590127, "loss": 1.9814, "step": 8708 }, { "epoch": 0.29, "grad_norm": 0.45754945278167725, "learning_rate": 0.000586295624540301, "loss": 1.9165, "step": 8709 }, { "epoch": 0.29, "grad_norm": 0.4575529098510742, "learning_rate": 0.0005862925013740686, "loss": 1.8691, "step": 8710 }, { "epoch": 0.29, "grad_norm": 0.48517507314682007, "learning_rate": 0.0005862893778603192, "loss": 1.9672, "step": 8711 }, { "epoch": 0.29, "grad_norm": 0.47686606645584106, "learning_rate": 0.0005862862539990568, "loss": 1.9023, "step": 8712 }, { "epoch": 0.29, "grad_norm": 0.5761631727218628, "learning_rate": 0.000586283129790285, "loss": 1.9249, "step": 8713 }, { "epoch": 0.29, "grad_norm": 0.4710002541542053, "learning_rate": 0.0005862800052340077, "loss": 1.8045, "step": 8714 }, { "epoch": 0.29, "grad_norm": 0.45287254452705383, "learning_rate": 0.0005862768803302286, "loss": 1.8731, "step": 8715 }, { "epoch": 0.29, "grad_norm": 0.4555886685848236, "learning_rate": 0.0005862737550789516, "loss": 1.9699, "step": 8716 }, { "epoch": 0.29, "grad_norm": 0.47201836109161377, "learning_rate": 0.0005862706294801804, "loss": 1.8832, "step": 8717 }, { "epoch": 0.29, "grad_norm": 0.4684843122959137, "learning_rate": 0.0005862675035339189, "loss": 1.8985, "step": 8718 }, { "epoch": 0.29, "grad_norm": 0.46576353907585144, "learning_rate": 0.0005862643772401707, "loss": 1.9381, "step": 8719 }, { "epoch": 0.29, "grad_norm": 0.4563361406326294, "learning_rate": 0.0005862612505989399, "loss": 1.9925, "step": 8720 }, { "epoch": 0.29, "grad_norm": 0.4562605619430542, "learning_rate": 0.00058625812361023, "loss": 1.8857, "step": 8721 }, { "epoch": 0.29, "grad_norm": 0.50372713804245, "learning_rate": 0.0005862549962740448, "loss": 1.9554, "step": 8722 }, { "epoch": 0.29, "grad_norm": 0.4770679473876953, "learning_rate": 0.0005862518685903885, "loss": 1.9116, "step": 8723 }, { "epoch": 0.29, "grad_norm": 0.4742656946182251, "learning_rate": 0.0005862487405592643, "loss": 1.9492, "step": 8724 }, { "epoch": 0.29, "grad_norm": 0.49906453490257263, "learning_rate": 0.0005862456121806766, "loss": 1.9291, "step": 8725 }, { "epoch": 0.29, "grad_norm": 0.45044103264808655, "learning_rate": 0.0005862424834546287, "loss": 1.8903, "step": 8726 }, { "epoch": 0.29, "grad_norm": 0.48605984449386597, "learning_rate": 0.0005862393543811247, "loss": 1.947, "step": 8727 }, { "epoch": 0.29, "grad_norm": 0.4919668436050415, "learning_rate": 0.0005862362249601683, "loss": 1.9171, "step": 8728 }, { "epoch": 0.29, "grad_norm": 0.47047439217567444, "learning_rate": 0.0005862330951917632, "loss": 1.9426, "step": 8729 }, { "epoch": 0.29, "grad_norm": 0.4591873288154602, "learning_rate": 0.0005862299650759133, "loss": 1.8897, "step": 8730 }, { "epoch": 0.29, "grad_norm": 0.4619934558868408, "learning_rate": 0.0005862268346126226, "loss": 1.9493, "step": 8731 }, { "epoch": 0.29, "grad_norm": 0.4823140501976013, "learning_rate": 0.0005862237038018946, "loss": 1.9248, "step": 8732 }, { "epoch": 0.29, "grad_norm": 0.46808484196662903, "learning_rate": 0.0005862205726437331, "loss": 1.9598, "step": 8733 }, { "epoch": 0.29, "grad_norm": 0.48128771781921387, "learning_rate": 0.000586217441138142, "loss": 1.9179, "step": 8734 }, { "epoch": 0.29, "grad_norm": 0.44556453824043274, "learning_rate": 0.0005862143092851252, "loss": 1.8416, "step": 8735 }, { "epoch": 0.29, "grad_norm": 0.4710090458393097, "learning_rate": 0.0005862111770846863, "loss": 1.9271, "step": 8736 }, { "epoch": 0.29, "grad_norm": 0.4624212682247162, "learning_rate": 0.0005862080445368293, "loss": 1.8698, "step": 8737 }, { "epoch": 0.29, "grad_norm": 0.4768241047859192, "learning_rate": 0.0005862049116415579, "loss": 1.925, "step": 8738 }, { "epoch": 0.29, "grad_norm": 0.4668635129928589, "learning_rate": 0.0005862017783988758, "loss": 1.8931, "step": 8739 }, { "epoch": 0.29, "grad_norm": 0.4793224632740021, "learning_rate": 0.000586198644808787, "loss": 1.9781, "step": 8740 }, { "epoch": 0.29, "grad_norm": 0.46094822883605957, "learning_rate": 0.0005861955108712952, "loss": 1.8606, "step": 8741 }, { "epoch": 0.29, "grad_norm": 0.47796979546546936, "learning_rate": 0.0005861923765864041, "loss": 1.8806, "step": 8742 }, { "epoch": 0.29, "grad_norm": 0.47491660714149475, "learning_rate": 0.0005861892419541179, "loss": 1.9565, "step": 8743 }, { "epoch": 0.29, "grad_norm": 0.4615965783596039, "learning_rate": 0.0005861861069744399, "loss": 1.9167, "step": 8744 }, { "epoch": 0.29, "grad_norm": 0.45140817761421204, "learning_rate": 0.0005861829716473742, "loss": 1.8663, "step": 8745 }, { "epoch": 0.29, "grad_norm": 0.4640507400035858, "learning_rate": 0.0005861798359729245, "loss": 1.9018, "step": 8746 }, { "epoch": 0.29, "grad_norm": 0.46883684396743774, "learning_rate": 0.0005861766999510947, "loss": 1.9011, "step": 8747 }, { "epoch": 0.29, "grad_norm": 0.46161699295043945, "learning_rate": 0.0005861735635818886, "loss": 1.9287, "step": 8748 }, { "epoch": 0.29, "grad_norm": 0.44493305683135986, "learning_rate": 0.0005861704268653098, "loss": 1.8731, "step": 8749 }, { "epoch": 0.29, "grad_norm": 0.4566843807697296, "learning_rate": 0.0005861672898013623, "loss": 1.9095, "step": 8750 }, { "epoch": 0.29, "grad_norm": 0.47114941477775574, "learning_rate": 0.00058616415239005, "loss": 1.9126, "step": 8751 }, { "epoch": 0.29, "grad_norm": 0.45320412516593933, "learning_rate": 0.0005861610146313765, "loss": 1.8921, "step": 8752 }, { "epoch": 0.29, "grad_norm": 0.4537888765335083, "learning_rate": 0.0005861578765253457, "loss": 1.8441, "step": 8753 }, { "epoch": 0.29, "grad_norm": 0.45747631788253784, "learning_rate": 0.0005861547380719615, "loss": 1.9407, "step": 8754 }, { "epoch": 0.29, "grad_norm": 0.4649295210838318, "learning_rate": 0.0005861515992712275, "loss": 1.9603, "step": 8755 }, { "epoch": 0.29, "grad_norm": 0.4576268792152405, "learning_rate": 0.0005861484601231477, "loss": 1.9918, "step": 8756 }, { "epoch": 0.29, "grad_norm": 0.4538372755050659, "learning_rate": 0.0005861453206277257, "loss": 1.9097, "step": 8757 }, { "epoch": 0.29, "grad_norm": 0.45927849411964417, "learning_rate": 0.0005861421807849654, "loss": 1.9197, "step": 8758 }, { "epoch": 0.29, "grad_norm": 0.4549580216407776, "learning_rate": 0.0005861390405948708, "loss": 1.9281, "step": 8759 }, { "epoch": 0.29, "grad_norm": 0.4537007808685303, "learning_rate": 0.0005861359000574456, "loss": 1.9483, "step": 8760 }, { "epoch": 0.29, "grad_norm": 0.46247589588165283, "learning_rate": 0.0005861327591726935, "loss": 1.9402, "step": 8761 }, { "epoch": 0.29, "grad_norm": 0.45730525255203247, "learning_rate": 0.0005861296179406184, "loss": 1.8936, "step": 8762 }, { "epoch": 0.29, "grad_norm": 0.4720514118671417, "learning_rate": 0.0005861264763612241, "loss": 1.9422, "step": 8763 }, { "epoch": 0.29, "grad_norm": 0.4391583204269409, "learning_rate": 0.0005861233344345144, "loss": 1.8895, "step": 8764 }, { "epoch": 0.29, "grad_norm": 0.4789659082889557, "learning_rate": 0.0005861201921604932, "loss": 1.8917, "step": 8765 }, { "epoch": 0.29, "grad_norm": 0.4731498658657074, "learning_rate": 0.0005861170495391642, "loss": 1.9143, "step": 8766 }, { "epoch": 0.29, "grad_norm": 0.4482530653476715, "learning_rate": 0.0005861139065705313, "loss": 1.8721, "step": 8767 }, { "epoch": 0.29, "grad_norm": 0.46571671962738037, "learning_rate": 0.0005861107632545981, "loss": 1.8206, "step": 8768 }, { "epoch": 0.29, "grad_norm": 0.4624188244342804, "learning_rate": 0.0005861076195913688, "loss": 2.025, "step": 8769 }, { "epoch": 0.29, "grad_norm": 0.4528389871120453, "learning_rate": 0.0005861044755808469, "loss": 1.8824, "step": 8770 }, { "epoch": 0.29, "grad_norm": 0.4627320170402527, "learning_rate": 0.0005861013312230363, "loss": 1.9689, "step": 8771 }, { "epoch": 0.29, "grad_norm": 0.4627532362937927, "learning_rate": 0.0005860981865179409, "loss": 1.9462, "step": 8772 }, { "epoch": 0.29, "grad_norm": 0.4484576880931854, "learning_rate": 0.0005860950414655645, "loss": 1.9393, "step": 8773 }, { "epoch": 0.29, "grad_norm": 0.4413413405418396, "learning_rate": 0.0005860918960659108, "loss": 1.9335, "step": 8774 }, { "epoch": 0.29, "grad_norm": 0.46259212493896484, "learning_rate": 0.0005860887503189837, "loss": 2.0068, "step": 8775 }, { "epoch": 0.29, "grad_norm": 0.46124374866485596, "learning_rate": 0.000586085604224787, "loss": 1.8946, "step": 8776 }, { "epoch": 0.29, "grad_norm": 0.44498762488365173, "learning_rate": 0.0005860824577833246, "loss": 1.9648, "step": 8777 }, { "epoch": 0.29, "grad_norm": 0.46310243010520935, "learning_rate": 0.0005860793109946002, "loss": 1.9478, "step": 8778 }, { "epoch": 0.29, "grad_norm": 0.46079495549201965, "learning_rate": 0.0005860761638586176, "loss": 1.929, "step": 8779 }, { "epoch": 0.29, "grad_norm": 0.4708676338195801, "learning_rate": 0.0005860730163753808, "loss": 1.9333, "step": 8780 }, { "epoch": 0.29, "grad_norm": 0.48322156071662903, "learning_rate": 0.0005860698685448934, "loss": 1.9915, "step": 8781 }, { "epoch": 0.29, "grad_norm": 0.4494864344596863, "learning_rate": 0.0005860667203671595, "loss": 1.8633, "step": 8782 }, { "epoch": 0.29, "grad_norm": 0.4679626226425171, "learning_rate": 0.0005860635718421826, "loss": 1.8735, "step": 8783 }, { "epoch": 0.29, "grad_norm": 0.45220649242401123, "learning_rate": 0.0005860604229699667, "loss": 1.9445, "step": 8784 }, { "epoch": 0.29, "grad_norm": 0.48397937417030334, "learning_rate": 0.0005860572737505156, "loss": 1.9383, "step": 8785 }, { "epoch": 0.29, "grad_norm": 0.4653684198856354, "learning_rate": 0.0005860541241838332, "loss": 1.8782, "step": 8786 }, { "epoch": 0.29, "grad_norm": 0.6589317917823792, "learning_rate": 0.000586050974269923, "loss": 2.0114, "step": 8787 }, { "epoch": 0.29, "grad_norm": 0.4405898153781891, "learning_rate": 0.0005860478240087894, "loss": 1.8647, "step": 8788 }, { "epoch": 0.29, "grad_norm": 0.45367589592933655, "learning_rate": 0.0005860446734004358, "loss": 1.845, "step": 8789 }, { "epoch": 0.29, "grad_norm": 0.45551806688308716, "learning_rate": 0.0005860415224448659, "loss": 1.9173, "step": 8790 }, { "epoch": 0.29, "grad_norm": 0.47964805364608765, "learning_rate": 0.0005860383711420839, "loss": 1.8593, "step": 8791 }, { "epoch": 0.29, "grad_norm": 0.4615478515625, "learning_rate": 0.0005860352194920934, "loss": 1.9313, "step": 8792 }, { "epoch": 0.29, "grad_norm": 0.44510820508003235, "learning_rate": 0.0005860320674948983, "loss": 1.8976, "step": 8793 }, { "epoch": 0.29, "grad_norm": 0.4434642493724823, "learning_rate": 0.0005860289151505025, "loss": 1.9942, "step": 8794 }, { "epoch": 0.29, "grad_norm": 0.4601273834705353, "learning_rate": 0.0005860257624589097, "loss": 1.9383, "step": 8795 }, { "epoch": 0.29, "grad_norm": 0.45037904381752014, "learning_rate": 0.0005860226094201237, "loss": 1.9313, "step": 8796 }, { "epoch": 0.29, "grad_norm": 0.4673239588737488, "learning_rate": 0.0005860194560341485, "loss": 1.9204, "step": 8797 }, { "epoch": 0.29, "grad_norm": 0.44305118918418884, "learning_rate": 0.0005860163023009878, "loss": 1.9348, "step": 8798 }, { "epoch": 0.29, "grad_norm": 0.4583164155483246, "learning_rate": 0.0005860131482206454, "loss": 1.9473, "step": 8799 }, { "epoch": 0.29, "grad_norm": 0.45042651891708374, "learning_rate": 0.0005860099937931252, "loss": 1.8566, "step": 8800 }, { "epoch": 0.29, "grad_norm": 0.4891083836555481, "learning_rate": 0.000586006839018431, "loss": 1.9473, "step": 8801 }, { "epoch": 0.29, "grad_norm": 0.4615400433540344, "learning_rate": 0.0005860036838965666, "loss": 1.9358, "step": 8802 }, { "epoch": 0.29, "grad_norm": 0.468401163816452, "learning_rate": 0.000586000528427536, "loss": 1.8918, "step": 8803 }, { "epoch": 0.29, "grad_norm": 0.4500492513179779, "learning_rate": 0.0005859973726113428, "loss": 1.874, "step": 8804 }, { "epoch": 0.29, "grad_norm": 0.44892242550849915, "learning_rate": 0.000585994216447991, "loss": 1.9153, "step": 8805 }, { "epoch": 0.29, "grad_norm": 0.43873435258865356, "learning_rate": 0.0005859910599374842, "loss": 1.8272, "step": 8806 }, { "epoch": 0.29, "grad_norm": 0.45955386757850647, "learning_rate": 0.0005859879030798266, "loss": 1.9215, "step": 8807 }, { "epoch": 0.29, "grad_norm": 0.46004724502563477, "learning_rate": 0.0005859847458750217, "loss": 1.933, "step": 8808 }, { "epoch": 0.29, "grad_norm": 0.46611878275871277, "learning_rate": 0.0005859815883230735, "loss": 1.8748, "step": 8809 }, { "epoch": 0.29, "grad_norm": 0.4634583592414856, "learning_rate": 0.0005859784304239858, "loss": 1.9618, "step": 8810 }, { "epoch": 0.29, "grad_norm": 0.4564606845378876, "learning_rate": 0.0005859752721777623, "loss": 1.906, "step": 8811 }, { "epoch": 0.29, "grad_norm": 0.4643600583076477, "learning_rate": 0.0005859721135844071, "loss": 1.9544, "step": 8812 }, { "epoch": 0.29, "grad_norm": 0.48736700415611267, "learning_rate": 0.0005859689546439239, "loss": 1.9821, "step": 8813 }, { "epoch": 0.29, "grad_norm": 0.44243931770324707, "learning_rate": 0.0005859657953563163, "loss": 1.8319, "step": 8814 }, { "epoch": 0.29, "grad_norm": 0.4348975419998169, "learning_rate": 0.0005859626357215886, "loss": 1.8543, "step": 8815 }, { "epoch": 0.29, "grad_norm": 0.45622488856315613, "learning_rate": 0.0005859594757397443, "loss": 1.9334, "step": 8816 }, { "epoch": 0.29, "grad_norm": 0.47502076625823975, "learning_rate": 0.0005859563154107873, "loss": 1.9343, "step": 8817 }, { "epoch": 0.29, "grad_norm": 0.44283851981163025, "learning_rate": 0.0005859531547347215, "loss": 1.9172, "step": 8818 }, { "epoch": 0.29, "grad_norm": 0.457438200712204, "learning_rate": 0.0005859499937115508, "loss": 1.9266, "step": 8819 }, { "epoch": 0.29, "grad_norm": 0.4665132462978363, "learning_rate": 0.0005859468323412789, "loss": 1.9384, "step": 8820 }, { "epoch": 0.29, "grad_norm": 0.4541235864162445, "learning_rate": 0.0005859436706239096, "loss": 1.9253, "step": 8821 }, { "epoch": 0.29, "grad_norm": 0.4677686095237732, "learning_rate": 0.0005859405085594469, "loss": 1.9221, "step": 8822 }, { "epoch": 0.29, "grad_norm": 0.4735881984233856, "learning_rate": 0.0005859373461478944, "loss": 1.9709, "step": 8823 }, { "epoch": 0.29, "grad_norm": 0.47020530700683594, "learning_rate": 0.0005859341833892562, "loss": 1.8854, "step": 8824 }, { "epoch": 0.29, "grad_norm": 0.47717469930648804, "learning_rate": 0.0005859310202835361, "loss": 1.9231, "step": 8825 }, { "epoch": 0.29, "grad_norm": 0.4472982883453369, "learning_rate": 0.0005859278568307377, "loss": 1.8325, "step": 8826 }, { "epoch": 0.29, "grad_norm": 0.48291030526161194, "learning_rate": 0.0005859246930308653, "loss": 1.8838, "step": 8827 }, { "epoch": 0.29, "grad_norm": 0.4919765293598175, "learning_rate": 0.0005859215288839222, "loss": 1.9509, "step": 8828 }, { "epoch": 0.29, "grad_norm": 0.4668363630771637, "learning_rate": 0.0005859183643899125, "loss": 1.901, "step": 8829 }, { "epoch": 0.29, "grad_norm": 0.4672375023365021, "learning_rate": 0.0005859151995488401, "loss": 1.9405, "step": 8830 }, { "epoch": 0.29, "grad_norm": 0.4701480567455292, "learning_rate": 0.0005859120343607088, "loss": 1.8883, "step": 8831 }, { "epoch": 0.29, "grad_norm": 0.471442312002182, "learning_rate": 0.0005859088688255224, "loss": 1.9866, "step": 8832 }, { "epoch": 0.29, "grad_norm": 0.4564523994922638, "learning_rate": 0.0005859057029432847, "loss": 1.9619, "step": 8833 }, { "epoch": 0.29, "grad_norm": 0.4615165889263153, "learning_rate": 0.0005859025367139996, "loss": 1.8825, "step": 8834 }, { "epoch": 0.29, "grad_norm": 0.46801233291625977, "learning_rate": 0.000585899370137671, "loss": 1.939, "step": 8835 }, { "epoch": 0.29, "grad_norm": 0.4592408239841461, "learning_rate": 0.0005858962032143027, "loss": 1.9527, "step": 8836 }, { "epoch": 0.29, "grad_norm": 0.47437557578086853, "learning_rate": 0.0005858930359438986, "loss": 1.9752, "step": 8837 }, { "epoch": 0.29, "grad_norm": 0.4752204120159149, "learning_rate": 0.0005858898683264624, "loss": 1.8762, "step": 8838 }, { "epoch": 0.29, "grad_norm": 0.4552459716796875, "learning_rate": 0.000585886700361998, "loss": 1.8603, "step": 8839 }, { "epoch": 0.29, "grad_norm": 0.4755858778953552, "learning_rate": 0.0005858835320505093, "loss": 1.9627, "step": 8840 }, { "epoch": 0.29, "grad_norm": 0.46314409375190735, "learning_rate": 0.0005858803633920001, "loss": 1.858, "step": 8841 }, { "epoch": 0.29, "grad_norm": 0.449535995721817, "learning_rate": 0.0005858771943864743, "loss": 1.8359, "step": 8842 }, { "epoch": 0.29, "grad_norm": 0.4697996973991394, "learning_rate": 0.0005858740250339355, "loss": 1.9514, "step": 8843 }, { "epoch": 0.29, "grad_norm": 0.4676492512226105, "learning_rate": 0.000585870855334388, "loss": 1.8882, "step": 8844 }, { "epoch": 0.29, "grad_norm": 0.4699428677558899, "learning_rate": 0.0005858676852878354, "loss": 1.88, "step": 8845 }, { "epoch": 0.29, "grad_norm": 0.47045227885246277, "learning_rate": 0.0005858645148942814, "loss": 1.97, "step": 8846 }, { "epoch": 0.29, "grad_norm": 0.48051783442497253, "learning_rate": 0.0005858613441537301, "loss": 1.9443, "step": 8847 }, { "epoch": 0.29, "grad_norm": 0.44868040084838867, "learning_rate": 0.0005858581730661853, "loss": 1.8936, "step": 8848 }, { "epoch": 0.29, "grad_norm": 0.4752119481563568, "learning_rate": 0.0005858550016316507, "loss": 1.906, "step": 8849 }, { "epoch": 0.29, "grad_norm": 0.45850276947021484, "learning_rate": 0.0005858518298501302, "loss": 1.8918, "step": 8850 }, { "epoch": 0.29, "grad_norm": 0.4702240824699402, "learning_rate": 0.0005858486577216277, "loss": 1.9715, "step": 8851 }, { "epoch": 0.29, "grad_norm": 0.45663949847221375, "learning_rate": 0.0005858454852461472, "loss": 2.014, "step": 8852 }, { "epoch": 0.29, "grad_norm": 0.4569999873638153, "learning_rate": 0.0005858423124236923, "loss": 2.0384, "step": 8853 }, { "epoch": 0.29, "grad_norm": 0.4437773525714874, "learning_rate": 0.000585839139254267, "loss": 1.9582, "step": 8854 }, { "epoch": 0.29, "grad_norm": 0.43996599316596985, "learning_rate": 0.0005858359657378751, "loss": 1.8829, "step": 8855 }, { "epoch": 0.29, "grad_norm": 0.4499654471874237, "learning_rate": 0.0005858327918745204, "loss": 1.8517, "step": 8856 }, { "epoch": 0.29, "grad_norm": 0.49125924706459045, "learning_rate": 0.0005858296176642068, "loss": 1.9448, "step": 8857 }, { "epoch": 0.29, "grad_norm": 0.4504234790802002, "learning_rate": 0.0005858264431069382, "loss": 1.8565, "step": 8858 }, { "epoch": 0.29, "grad_norm": 0.4426552653312683, "learning_rate": 0.0005858232682027184, "loss": 1.9108, "step": 8859 }, { "epoch": 0.29, "grad_norm": 0.47712409496307373, "learning_rate": 0.0005858200929515513, "loss": 1.9398, "step": 8860 }, { "epoch": 0.29, "grad_norm": 0.4506767690181732, "learning_rate": 0.0005858169173534407, "loss": 1.9334, "step": 8861 }, { "epoch": 0.29, "grad_norm": 0.4614158570766449, "learning_rate": 0.0005858137414083904, "loss": 1.8999, "step": 8862 }, { "epoch": 0.29, "grad_norm": 0.4546664357185364, "learning_rate": 0.0005858105651164045, "loss": 1.9097, "step": 8863 }, { "epoch": 0.29, "grad_norm": 0.45001935958862305, "learning_rate": 0.0005858073884774865, "loss": 1.8527, "step": 8864 }, { "epoch": 0.29, "grad_norm": 0.4550717771053314, "learning_rate": 0.0005858042114916405, "loss": 1.9701, "step": 8865 }, { "epoch": 0.29, "grad_norm": 0.46965450048446655, "learning_rate": 0.0005858010341588703, "loss": 1.9482, "step": 8866 }, { "epoch": 0.3, "grad_norm": 0.43669477105140686, "learning_rate": 0.0005857978564791798, "loss": 1.9116, "step": 8867 }, { "epoch": 0.3, "grad_norm": 0.4451254606246948, "learning_rate": 0.0005857946784525728, "loss": 1.9171, "step": 8868 }, { "epoch": 0.3, "grad_norm": 0.4502668082714081, "learning_rate": 0.0005857915000790531, "loss": 1.9011, "step": 8869 }, { "epoch": 0.3, "grad_norm": 0.4882832169532776, "learning_rate": 0.0005857883213586248, "loss": 1.9974, "step": 8870 }, { "epoch": 0.3, "grad_norm": 0.44461166858673096, "learning_rate": 0.0005857851422912914, "loss": 1.941, "step": 8871 }, { "epoch": 0.3, "grad_norm": 0.44597119092941284, "learning_rate": 0.000585781962877057, "loss": 1.8767, "step": 8872 }, { "epoch": 0.3, "grad_norm": 0.4533108174800873, "learning_rate": 0.0005857787831159255, "loss": 1.9101, "step": 8873 }, { "epoch": 0.3, "grad_norm": 0.4454435110092163, "learning_rate": 0.0005857756030079005, "loss": 1.9503, "step": 8874 }, { "epoch": 0.3, "grad_norm": 0.42781826853752136, "learning_rate": 0.0005857724225529861, "loss": 1.8386, "step": 8875 }, { "epoch": 0.3, "grad_norm": 1.690137267112732, "learning_rate": 0.0005857692417511861, "loss": 1.9354, "step": 8876 }, { "epoch": 0.3, "grad_norm": 0.46515166759490967, "learning_rate": 0.0005857660606025044, "loss": 1.9388, "step": 8877 }, { "epoch": 0.3, "grad_norm": 0.4610128104686737, "learning_rate": 0.0005857628791069448, "loss": 1.8239, "step": 8878 }, { "epoch": 0.3, "grad_norm": 0.45629602670669556, "learning_rate": 0.000585759697264511, "loss": 1.9777, "step": 8879 }, { "epoch": 0.3, "grad_norm": 0.4390534460544586, "learning_rate": 0.0005857565150752072, "loss": 1.9265, "step": 8880 }, { "epoch": 0.3, "grad_norm": 0.4483741819858551, "learning_rate": 0.000585753332539037, "loss": 1.917, "step": 8881 }, { "epoch": 0.3, "grad_norm": 0.48604273796081543, "learning_rate": 0.0005857501496560045, "loss": 1.9635, "step": 8882 }, { "epoch": 0.3, "grad_norm": 0.46852466464042664, "learning_rate": 0.0005857469664261133, "loss": 1.917, "step": 8883 }, { "epoch": 0.3, "grad_norm": 0.46402043104171753, "learning_rate": 0.0005857437828493674, "loss": 1.9119, "step": 8884 }, { "epoch": 0.3, "grad_norm": 0.4501681625843048, "learning_rate": 0.0005857405989257706, "loss": 1.9131, "step": 8885 }, { "epoch": 0.3, "grad_norm": 0.453583300113678, "learning_rate": 0.0005857374146553269, "loss": 1.981, "step": 8886 }, { "epoch": 0.3, "grad_norm": 0.48109766840934753, "learning_rate": 0.0005857342300380401, "loss": 1.8815, "step": 8887 }, { "epoch": 0.3, "grad_norm": 0.4752707779407501, "learning_rate": 0.000585731045073914, "loss": 1.9301, "step": 8888 }, { "epoch": 0.3, "grad_norm": 0.4675494432449341, "learning_rate": 0.0005857278597629525, "loss": 1.9403, "step": 8889 }, { "epoch": 0.3, "grad_norm": 0.4621437191963196, "learning_rate": 0.0005857246741051595, "loss": 1.9291, "step": 8890 }, { "epoch": 0.3, "grad_norm": 0.45801636576652527, "learning_rate": 0.0005857214881005388, "loss": 1.8828, "step": 8891 }, { "epoch": 0.3, "grad_norm": 0.48730120062828064, "learning_rate": 0.0005857183017490944, "loss": 1.9049, "step": 8892 }, { "epoch": 0.3, "grad_norm": 0.49426624178886414, "learning_rate": 0.0005857151150508301, "loss": 1.9508, "step": 8893 }, { "epoch": 0.3, "grad_norm": 0.4663447439670563, "learning_rate": 0.0005857119280057497, "loss": 1.8995, "step": 8894 }, { "epoch": 0.3, "grad_norm": 0.46604734659194946, "learning_rate": 0.000585708740613857, "loss": 1.9447, "step": 8895 }, { "epoch": 0.3, "grad_norm": 0.4465199112892151, "learning_rate": 0.0005857055528751562, "loss": 1.8632, "step": 8896 }, { "epoch": 0.3, "grad_norm": 0.46987393498420715, "learning_rate": 0.0005857023647896508, "loss": 1.9971, "step": 8897 }, { "epoch": 0.3, "grad_norm": 0.44898727536201477, "learning_rate": 0.0005856991763573448, "loss": 1.8817, "step": 8898 }, { "epoch": 0.3, "grad_norm": 0.45423734188079834, "learning_rate": 0.0005856959875782422, "loss": 1.9023, "step": 8899 }, { "epoch": 0.3, "grad_norm": 0.47245341539382935, "learning_rate": 0.0005856927984523467, "loss": 1.9102, "step": 8900 }, { "epoch": 0.3, "grad_norm": 0.46513935923576355, "learning_rate": 0.0005856896089796623, "loss": 1.9281, "step": 8901 }, { "epoch": 0.3, "grad_norm": 0.4385994076728821, "learning_rate": 0.0005856864191601928, "loss": 1.8994, "step": 8902 }, { "epoch": 0.3, "grad_norm": 0.47403448820114136, "learning_rate": 0.000585683228993942, "loss": 1.9577, "step": 8903 }, { "epoch": 0.3, "grad_norm": 0.4773758351802826, "learning_rate": 0.0005856800384809139, "loss": 1.9174, "step": 8904 }, { "epoch": 0.3, "grad_norm": 0.4666139781475067, "learning_rate": 0.0005856768476211124, "loss": 1.9172, "step": 8905 }, { "epoch": 0.3, "grad_norm": 0.4677506983280182, "learning_rate": 0.0005856736564145412, "loss": 1.9024, "step": 8906 }, { "epoch": 0.3, "grad_norm": 0.47532516717910767, "learning_rate": 0.0005856704648612044, "loss": 1.8636, "step": 8907 }, { "epoch": 0.3, "grad_norm": 0.46127504110336304, "learning_rate": 0.0005856672729611055, "loss": 1.9063, "step": 8908 }, { "epoch": 0.3, "grad_norm": 0.4808942675590515, "learning_rate": 0.0005856640807142488, "loss": 1.9175, "step": 8909 }, { "epoch": 0.3, "grad_norm": 0.45583492517471313, "learning_rate": 0.000585660888120638, "loss": 1.9157, "step": 8910 }, { "epoch": 0.3, "grad_norm": 0.47350525856018066, "learning_rate": 0.000585657695180277, "loss": 1.9016, "step": 8911 }, { "epoch": 0.3, "grad_norm": 0.45721927285194397, "learning_rate": 0.0005856545018931696, "loss": 1.9062, "step": 8912 }, { "epoch": 0.3, "grad_norm": 0.5050209164619446, "learning_rate": 0.0005856513082593196, "loss": 1.9426, "step": 8913 }, { "epoch": 0.3, "grad_norm": 0.45531797409057617, "learning_rate": 0.0005856481142787311, "loss": 1.8701, "step": 8914 }, { "epoch": 0.3, "grad_norm": 0.4632778465747833, "learning_rate": 0.000585644919951408, "loss": 1.8763, "step": 8915 }, { "epoch": 0.3, "grad_norm": 0.46097704768180847, "learning_rate": 0.0005856417252773539, "loss": 2.0001, "step": 8916 }, { "epoch": 0.3, "grad_norm": 0.4532124698162079, "learning_rate": 0.0005856385302565729, "loss": 1.8652, "step": 8917 }, { "epoch": 0.3, "grad_norm": 0.46538397669792175, "learning_rate": 0.0005856353348890688, "loss": 1.8661, "step": 8918 }, { "epoch": 0.3, "grad_norm": 0.467966228723526, "learning_rate": 0.0005856321391748456, "loss": 1.9612, "step": 8919 }, { "epoch": 0.3, "grad_norm": 0.448966920375824, "learning_rate": 0.0005856289431139068, "loss": 1.8792, "step": 8920 }, { "epoch": 0.3, "grad_norm": 0.44585075974464417, "learning_rate": 0.0005856257467062567, "loss": 1.835, "step": 8921 }, { "epoch": 0.3, "grad_norm": 0.4510129690170288, "learning_rate": 0.0005856225499518991, "loss": 1.8926, "step": 8922 }, { "epoch": 0.3, "grad_norm": 0.43426838517189026, "learning_rate": 0.0005856193528508376, "loss": 1.8528, "step": 8923 }, { "epoch": 0.3, "grad_norm": 0.4545687139034271, "learning_rate": 0.0005856161554030765, "loss": 1.9486, "step": 8924 }, { "epoch": 0.3, "grad_norm": 0.47048279643058777, "learning_rate": 0.0005856129576086195, "loss": 1.9217, "step": 8925 }, { "epoch": 0.3, "grad_norm": 0.46269890666007996, "learning_rate": 0.0005856097594674702, "loss": 1.9115, "step": 8926 }, { "epoch": 0.3, "grad_norm": 0.47482427954673767, "learning_rate": 0.000585606560979633, "loss": 1.9528, "step": 8927 }, { "epoch": 0.3, "grad_norm": 0.45775917172431946, "learning_rate": 0.0005856033621451114, "loss": 1.8985, "step": 8928 }, { "epoch": 0.3, "grad_norm": 0.4542609453201294, "learning_rate": 0.0005856001629639093, "loss": 1.9343, "step": 8929 }, { "epoch": 0.3, "grad_norm": 0.4790647029876709, "learning_rate": 0.0005855969634360309, "loss": 1.9995, "step": 8930 }, { "epoch": 0.3, "grad_norm": 0.4686971604824066, "learning_rate": 0.0005855937635614797, "loss": 1.8731, "step": 8931 }, { "epoch": 0.3, "grad_norm": 0.4545277953147888, "learning_rate": 0.0005855905633402597, "loss": 1.8943, "step": 8932 }, { "epoch": 0.3, "grad_norm": 0.5007434487342834, "learning_rate": 0.000585587362772375, "loss": 1.9762, "step": 8933 }, { "epoch": 0.3, "grad_norm": 0.46610769629478455, "learning_rate": 0.0005855841618578293, "loss": 1.963, "step": 8934 }, { "epoch": 0.3, "grad_norm": 0.4653041660785675, "learning_rate": 0.0005855809605966265, "loss": 1.8997, "step": 8935 }, { "epoch": 0.3, "grad_norm": 0.43784382939338684, "learning_rate": 0.0005855777589887704, "loss": 1.8756, "step": 8936 }, { "epoch": 0.3, "grad_norm": 0.46850651502609253, "learning_rate": 0.0005855745570342651, "loss": 1.9874, "step": 8937 }, { "epoch": 0.3, "grad_norm": 0.46544477343559265, "learning_rate": 0.0005855713547331143, "loss": 1.8969, "step": 8938 }, { "epoch": 0.3, "grad_norm": 0.46479353308677673, "learning_rate": 0.0005855681520853219, "loss": 1.8818, "step": 8939 }, { "epoch": 0.3, "grad_norm": 0.43280816078186035, "learning_rate": 0.0005855649490908919, "loss": 1.7752, "step": 8940 }, { "epoch": 0.3, "grad_norm": 0.4540841579437256, "learning_rate": 0.0005855617457498282, "loss": 1.9378, "step": 8941 }, { "epoch": 0.3, "grad_norm": 0.46272340416908264, "learning_rate": 0.0005855585420621345, "loss": 1.9558, "step": 8942 }, { "epoch": 0.3, "grad_norm": 0.4586603343486786, "learning_rate": 0.0005855553380278148, "loss": 1.8946, "step": 8943 }, { "epoch": 0.3, "grad_norm": 0.47344771027565, "learning_rate": 0.0005855521336468731, "loss": 1.9487, "step": 8944 }, { "epoch": 0.3, "grad_norm": 0.4730318784713745, "learning_rate": 0.0005855489289193131, "loss": 1.9804, "step": 8945 }, { "epoch": 0.3, "grad_norm": 0.44916847348213196, "learning_rate": 0.0005855457238451389, "loss": 1.883, "step": 8946 }, { "epoch": 0.3, "grad_norm": 0.47175875306129456, "learning_rate": 0.0005855425184243541, "loss": 1.8629, "step": 8947 }, { "epoch": 0.3, "grad_norm": 0.4567245841026306, "learning_rate": 0.0005855393126569628, "loss": 1.9245, "step": 8948 }, { "epoch": 0.3, "grad_norm": 0.4564710259437561, "learning_rate": 0.0005855361065429689, "loss": 1.9501, "step": 8949 }, { "epoch": 0.3, "grad_norm": 0.45373982191085815, "learning_rate": 0.0005855329000823761, "loss": 1.8064, "step": 8950 }, { "epoch": 0.3, "grad_norm": 0.45903170108795166, "learning_rate": 0.0005855296932751886, "loss": 1.8617, "step": 8951 }, { "epoch": 0.3, "grad_norm": 0.4415351152420044, "learning_rate": 0.00058552648612141, "loss": 1.8409, "step": 8952 }, { "epoch": 0.3, "grad_norm": 0.4505980610847473, "learning_rate": 0.0005855232786210444, "loss": 1.9121, "step": 8953 }, { "epoch": 0.3, "grad_norm": 0.454598069190979, "learning_rate": 0.0005855200707740956, "loss": 1.9208, "step": 8954 }, { "epoch": 0.3, "grad_norm": 0.4818348288536072, "learning_rate": 0.0005855168625805675, "loss": 1.9222, "step": 8955 }, { "epoch": 0.3, "grad_norm": 0.44294390082359314, "learning_rate": 0.000585513654040464, "loss": 1.8815, "step": 8956 }, { "epoch": 0.3, "grad_norm": 0.45686593651771545, "learning_rate": 0.000585510445153789, "loss": 1.8513, "step": 8957 }, { "epoch": 0.3, "grad_norm": 0.45311489701271057, "learning_rate": 0.0005855072359205463, "loss": 1.8754, "step": 8958 }, { "epoch": 0.3, "grad_norm": 0.4542310833930969, "learning_rate": 0.00058550402634074, "loss": 1.9729, "step": 8959 }, { "epoch": 0.3, "grad_norm": 0.4526590406894684, "learning_rate": 0.0005855008164143738, "loss": 1.905, "step": 8960 }, { "epoch": 0.3, "grad_norm": 0.44982945919036865, "learning_rate": 0.0005854976061414517, "loss": 1.8848, "step": 8961 }, { "epoch": 0.3, "grad_norm": 0.4554906189441681, "learning_rate": 0.0005854943955219776, "loss": 1.9276, "step": 8962 }, { "epoch": 0.3, "grad_norm": 0.45161837339401245, "learning_rate": 0.0005854911845559553, "loss": 1.9515, "step": 8963 }, { "epoch": 0.3, "grad_norm": 0.4443376362323761, "learning_rate": 0.0005854879732433889, "loss": 2.0068, "step": 8964 }, { "epoch": 0.3, "grad_norm": 0.44828078150749207, "learning_rate": 0.000585484761584282, "loss": 1.922, "step": 8965 }, { "epoch": 0.3, "grad_norm": 0.45317718386650085, "learning_rate": 0.0005854815495786388, "loss": 1.8451, "step": 8966 }, { "epoch": 0.3, "grad_norm": 0.45016995072364807, "learning_rate": 0.000585478337226463, "loss": 1.8045, "step": 8967 }, { "epoch": 0.3, "grad_norm": 0.45454421639442444, "learning_rate": 0.0005854751245277586, "loss": 1.9243, "step": 8968 }, { "epoch": 0.3, "grad_norm": 0.4449240267276764, "learning_rate": 0.0005854719114825294, "loss": 1.9157, "step": 8969 }, { "epoch": 0.3, "grad_norm": 0.4743838310241699, "learning_rate": 0.0005854686980907793, "loss": 1.9869, "step": 8970 }, { "epoch": 0.3, "grad_norm": 0.6564770936965942, "learning_rate": 0.0005854654843525124, "loss": 1.9249, "step": 8971 }, { "epoch": 0.3, "grad_norm": 0.44903504848480225, "learning_rate": 0.0005854622702677324, "loss": 1.9101, "step": 8972 }, { "epoch": 0.3, "grad_norm": 0.45645198225975037, "learning_rate": 0.0005854590558364433, "loss": 1.8716, "step": 8973 }, { "epoch": 0.3, "grad_norm": 0.4939405918121338, "learning_rate": 0.0005854558410586489, "loss": 1.9725, "step": 8974 }, { "epoch": 0.3, "grad_norm": 0.49449530243873596, "learning_rate": 0.0005854526259343532, "loss": 1.9029, "step": 8975 }, { "epoch": 0.3, "grad_norm": 0.4457530081272125, "learning_rate": 0.0005854494104635601, "loss": 1.9068, "step": 8976 }, { "epoch": 0.3, "grad_norm": 0.48124927282333374, "learning_rate": 0.0005854461946462735, "loss": 1.8928, "step": 8977 }, { "epoch": 0.3, "grad_norm": 0.4566872715950012, "learning_rate": 0.0005854429784824972, "loss": 1.8996, "step": 8978 }, { "epoch": 0.3, "grad_norm": 0.4489558935165405, "learning_rate": 0.0005854397619722353, "loss": 1.8841, "step": 8979 }, { "epoch": 0.3, "grad_norm": 0.44918298721313477, "learning_rate": 0.0005854365451154914, "loss": 1.9621, "step": 8980 }, { "epoch": 0.3, "grad_norm": 0.45021313428878784, "learning_rate": 0.0005854333279122697, "loss": 1.955, "step": 8981 }, { "epoch": 0.3, "grad_norm": 0.44240546226501465, "learning_rate": 0.0005854301103625741, "loss": 1.9511, "step": 8982 }, { "epoch": 0.3, "grad_norm": 0.4411778748035431, "learning_rate": 0.0005854268924664083, "loss": 1.8948, "step": 8983 }, { "epoch": 0.3, "grad_norm": 0.4598619043827057, "learning_rate": 0.0005854236742237763, "loss": 1.8543, "step": 8984 }, { "epoch": 0.3, "grad_norm": 0.43445074558258057, "learning_rate": 0.0005854204556346819, "loss": 1.9346, "step": 8985 }, { "epoch": 0.3, "grad_norm": 0.4419728219509125, "learning_rate": 0.0005854172366991293, "loss": 1.973, "step": 8986 }, { "epoch": 0.3, "grad_norm": 0.4330218434333801, "learning_rate": 0.0005854140174171221, "loss": 1.9247, "step": 8987 }, { "epoch": 0.3, "grad_norm": 0.4424241781234741, "learning_rate": 0.0005854107977886645, "loss": 1.8867, "step": 8988 }, { "epoch": 0.3, "grad_norm": 0.5114211440086365, "learning_rate": 0.0005854075778137601, "loss": 1.8497, "step": 8989 }, { "epoch": 0.3, "grad_norm": 0.45765143632888794, "learning_rate": 0.000585404357492413, "loss": 1.9646, "step": 8990 }, { "epoch": 0.3, "grad_norm": 0.45276907086372375, "learning_rate": 0.0005854011368246271, "loss": 1.9437, "step": 8991 }, { "epoch": 0.3, "grad_norm": 0.43076735734939575, "learning_rate": 0.0005853979158104062, "loss": 1.8949, "step": 8992 }, { "epoch": 0.3, "grad_norm": 0.4616854786872864, "learning_rate": 0.0005853946944497543, "loss": 1.8711, "step": 8993 }, { "epoch": 0.3, "grad_norm": 0.4527987241744995, "learning_rate": 0.0005853914727426753, "loss": 1.8712, "step": 8994 }, { "epoch": 0.3, "grad_norm": 0.465254545211792, "learning_rate": 0.0005853882506891731, "loss": 1.9342, "step": 8995 }, { "epoch": 0.3, "grad_norm": 0.44700321555137634, "learning_rate": 0.0005853850282892517, "loss": 1.8966, "step": 8996 }, { "epoch": 0.3, "grad_norm": 0.4462844431400299, "learning_rate": 0.0005853818055429149, "loss": 1.9511, "step": 8997 }, { "epoch": 0.3, "grad_norm": 0.45960065722465515, "learning_rate": 0.0005853785824501666, "loss": 1.9519, "step": 8998 }, { "epoch": 0.3, "grad_norm": 0.4598194360733032, "learning_rate": 0.0005853753590110107, "loss": 1.9157, "step": 8999 }, { "epoch": 0.3, "grad_norm": 0.44446709752082825, "learning_rate": 0.0005853721352254512, "loss": 1.8722, "step": 9000 }, { "epoch": 0.3, "grad_norm": 0.4440546929836273, "learning_rate": 0.000585368911093492, "loss": 1.896, "step": 9001 }, { "epoch": 0.3, "grad_norm": 0.4448349177837372, "learning_rate": 0.0005853656866151368, "loss": 1.9178, "step": 9002 }, { "epoch": 0.3, "grad_norm": 0.4701876640319824, "learning_rate": 0.00058536246179039, "loss": 1.9217, "step": 9003 }, { "epoch": 0.3, "grad_norm": 0.4534897208213806, "learning_rate": 0.000585359236619255, "loss": 1.956, "step": 9004 }, { "epoch": 0.3, "grad_norm": 0.4589197635650635, "learning_rate": 0.000585356011101736, "loss": 1.8774, "step": 9005 }, { "epoch": 0.3, "grad_norm": 0.4574263095855713, "learning_rate": 0.0005853527852378369, "loss": 1.9105, "step": 9006 }, { "epoch": 0.3, "grad_norm": 0.4513111710548401, "learning_rate": 0.0005853495590275616, "loss": 1.8494, "step": 9007 }, { "epoch": 0.3, "grad_norm": 0.4412168860435486, "learning_rate": 0.0005853463324709139, "loss": 1.9342, "step": 9008 }, { "epoch": 0.3, "grad_norm": 0.4719714820384979, "learning_rate": 0.0005853431055678978, "loss": 1.9418, "step": 9009 }, { "epoch": 0.3, "grad_norm": 0.509829044342041, "learning_rate": 0.0005853398783185172, "loss": 2.0074, "step": 9010 }, { "epoch": 0.3, "grad_norm": 0.4665428102016449, "learning_rate": 0.0005853366507227761, "loss": 1.9288, "step": 9011 }, { "epoch": 0.3, "grad_norm": 0.46068546175956726, "learning_rate": 0.0005853334227806783, "loss": 1.9485, "step": 9012 }, { "epoch": 0.3, "grad_norm": 0.458657443523407, "learning_rate": 0.0005853301944922277, "loss": 1.9484, "step": 9013 }, { "epoch": 0.3, "grad_norm": 0.43836626410484314, "learning_rate": 0.0005853269658574284, "loss": 1.9134, "step": 9014 }, { "epoch": 0.3, "grad_norm": 0.45993998646736145, "learning_rate": 0.0005853237368762841, "loss": 1.8923, "step": 9015 }, { "epoch": 0.3, "grad_norm": 0.44888243079185486, "learning_rate": 0.000585320507548799, "loss": 1.8646, "step": 9016 }, { "epoch": 0.3, "grad_norm": 0.4339750111103058, "learning_rate": 0.0005853172778749766, "loss": 1.8739, "step": 9017 }, { "epoch": 0.3, "grad_norm": 0.4633634090423584, "learning_rate": 0.0005853140478548212, "loss": 1.948, "step": 9018 }, { "epoch": 0.3, "grad_norm": 0.4951878488063812, "learning_rate": 0.0005853108174883366, "loss": 1.9053, "step": 9019 }, { "epoch": 0.3, "grad_norm": 0.451012521982193, "learning_rate": 0.0005853075867755267, "loss": 1.9555, "step": 9020 }, { "epoch": 0.3, "grad_norm": 0.47423771023750305, "learning_rate": 0.0005853043557163954, "loss": 1.914, "step": 9021 }, { "epoch": 0.3, "grad_norm": 0.8987909555435181, "learning_rate": 0.0005853011243109466, "loss": 1.9071, "step": 9022 }, { "epoch": 0.3, "grad_norm": 0.45003020763397217, "learning_rate": 0.0005852978925591844, "loss": 1.8665, "step": 9023 }, { "epoch": 0.3, "grad_norm": 0.44914546608924866, "learning_rate": 0.0005852946604611125, "loss": 1.8927, "step": 9024 }, { "epoch": 0.3, "grad_norm": 0.4859262704849243, "learning_rate": 0.000585291428016735, "loss": 1.9063, "step": 9025 }, { "epoch": 0.3, "grad_norm": 0.4975633919239044, "learning_rate": 0.0005852881952260556, "loss": 1.878, "step": 9026 }, { "epoch": 0.3, "grad_norm": 0.44976967573165894, "learning_rate": 0.0005852849620890785, "loss": 1.964, "step": 9027 }, { "epoch": 0.3, "grad_norm": 0.46641311049461365, "learning_rate": 0.0005852817286058074, "loss": 1.889, "step": 9028 }, { "epoch": 0.3, "grad_norm": 0.47409239411354065, "learning_rate": 0.0005852784947762463, "loss": 1.8897, "step": 9029 }, { "epoch": 0.3, "grad_norm": 0.5529802441596985, "learning_rate": 0.0005852752606003992, "loss": 1.918, "step": 9030 }, { "epoch": 0.3, "grad_norm": 0.48341134190559387, "learning_rate": 0.00058527202607827, "loss": 1.9182, "step": 9031 }, { "epoch": 0.3, "grad_norm": 0.4664570689201355, "learning_rate": 0.0005852687912098626, "loss": 1.9603, "step": 9032 }, { "epoch": 0.3, "grad_norm": 0.4694676697254181, "learning_rate": 0.0005852655559951808, "loss": 1.9263, "step": 9033 }, { "epoch": 0.3, "grad_norm": 0.4498826861381531, "learning_rate": 0.0005852623204342288, "loss": 1.9161, "step": 9034 }, { "epoch": 0.3, "grad_norm": 0.4433252513408661, "learning_rate": 0.0005852590845270103, "loss": 1.8585, "step": 9035 }, { "epoch": 0.3, "grad_norm": 0.4717244505882263, "learning_rate": 0.0005852558482735293, "loss": 1.9216, "step": 9036 }, { "epoch": 0.3, "grad_norm": 0.472238153219223, "learning_rate": 0.0005852526116737897, "loss": 1.9174, "step": 9037 }, { "epoch": 0.3, "grad_norm": 0.4808063805103302, "learning_rate": 0.0005852493747277955, "loss": 1.9738, "step": 9038 }, { "epoch": 0.3, "grad_norm": 0.4742281436920166, "learning_rate": 0.0005852461374355506, "loss": 1.9457, "step": 9039 }, { "epoch": 0.3, "grad_norm": 0.460936039686203, "learning_rate": 0.0005852428997970588, "loss": 1.8556, "step": 9040 }, { "epoch": 0.3, "grad_norm": 0.4530850052833557, "learning_rate": 0.0005852396618123243, "loss": 1.9153, "step": 9041 }, { "epoch": 0.3, "grad_norm": 0.43620845675468445, "learning_rate": 0.0005852364234813508, "loss": 1.873, "step": 9042 }, { "epoch": 0.3, "grad_norm": 0.46230050921440125, "learning_rate": 0.0005852331848041424, "loss": 1.8943, "step": 9043 }, { "epoch": 0.3, "grad_norm": 0.4570010304450989, "learning_rate": 0.0005852299457807028, "loss": 1.9294, "step": 9044 }, { "epoch": 0.3, "grad_norm": 0.46150603890419006, "learning_rate": 0.0005852267064110362, "loss": 1.953, "step": 9045 }, { "epoch": 0.3, "grad_norm": 0.4426819682121277, "learning_rate": 0.0005852234666951463, "loss": 1.9022, "step": 9046 }, { "epoch": 0.3, "grad_norm": 0.4392647445201874, "learning_rate": 0.0005852202266330372, "loss": 1.9312, "step": 9047 }, { "epoch": 0.3, "grad_norm": 0.431051105260849, "learning_rate": 0.0005852169862247127, "loss": 1.8, "step": 9048 }, { "epoch": 0.3, "grad_norm": 0.4552067816257477, "learning_rate": 0.000585213745470177, "loss": 1.9764, "step": 9049 }, { "epoch": 0.3, "grad_norm": 0.4366917908191681, "learning_rate": 0.0005852105043694337, "loss": 1.9201, "step": 9050 }, { "epoch": 0.3, "grad_norm": 0.4667671322822571, "learning_rate": 0.0005852072629224869, "loss": 1.9834, "step": 9051 }, { "epoch": 0.3, "grad_norm": 0.45709338784217834, "learning_rate": 0.0005852040211293404, "loss": 1.9768, "step": 9052 }, { "epoch": 0.3, "grad_norm": 0.44258129596710205, "learning_rate": 0.0005852007789899984, "loss": 1.8991, "step": 9053 }, { "epoch": 0.3, "grad_norm": 0.4640619456768036, "learning_rate": 0.0005851975365044645, "loss": 1.927, "step": 9054 }, { "epoch": 0.3, "grad_norm": 0.43933480978012085, "learning_rate": 0.000585194293672743, "loss": 1.9273, "step": 9055 }, { "epoch": 0.3, "grad_norm": 0.44266223907470703, "learning_rate": 0.0005851910504948375, "loss": 1.9019, "step": 9056 }, { "epoch": 0.3, "grad_norm": 0.4475027918815613, "learning_rate": 0.0005851878069707521, "loss": 1.8773, "step": 9057 }, { "epoch": 0.3, "grad_norm": 0.4457840621471405, "learning_rate": 0.0005851845631004908, "loss": 1.8696, "step": 9058 }, { "epoch": 0.3, "grad_norm": 0.44819435477256775, "learning_rate": 0.0005851813188840574, "loss": 1.9067, "step": 9059 }, { "epoch": 0.3, "grad_norm": 0.45034587383270264, "learning_rate": 0.000585178074321456, "loss": 1.8494, "step": 9060 }, { "epoch": 0.3, "grad_norm": 0.4543622136116028, "learning_rate": 0.0005851748294126904, "loss": 1.9504, "step": 9061 }, { "epoch": 0.3, "grad_norm": 0.4660375416278839, "learning_rate": 0.0005851715841577645, "loss": 1.9016, "step": 9062 }, { "epoch": 0.3, "grad_norm": 0.45951777696609497, "learning_rate": 0.0005851683385566823, "loss": 1.8558, "step": 9063 }, { "epoch": 0.3, "grad_norm": 0.47533172369003296, "learning_rate": 0.0005851650926094479, "loss": 1.8915, "step": 9064 }, { "epoch": 0.3, "grad_norm": 0.4516703188419342, "learning_rate": 0.0005851618463160649, "loss": 1.913, "step": 9065 }, { "epoch": 0.3, "grad_norm": 0.4552375078201294, "learning_rate": 0.0005851585996765376, "loss": 1.8872, "step": 9066 }, { "epoch": 0.3, "grad_norm": 0.4528781473636627, "learning_rate": 0.0005851553526908696, "loss": 1.8841, "step": 9067 }, { "epoch": 0.3, "grad_norm": 0.4523967206478119, "learning_rate": 0.0005851521053590652, "loss": 1.8974, "step": 9068 }, { "epoch": 0.3, "grad_norm": 0.46978694200515747, "learning_rate": 0.0005851488576811281, "loss": 1.8679, "step": 9069 }, { "epoch": 0.3, "grad_norm": 0.48002126812934875, "learning_rate": 0.0005851456096570623, "loss": 1.9408, "step": 9070 }, { "epoch": 0.3, "grad_norm": 0.4646340608596802, "learning_rate": 0.0005851423612868717, "loss": 1.9405, "step": 9071 }, { "epoch": 0.3, "grad_norm": 0.4617258906364441, "learning_rate": 0.0005851391125705603, "loss": 1.8204, "step": 9072 }, { "epoch": 0.3, "grad_norm": 0.47371622920036316, "learning_rate": 0.000585135863508132, "loss": 1.8882, "step": 9073 }, { "epoch": 0.3, "grad_norm": 0.460610955953598, "learning_rate": 0.0005851326140995909, "loss": 1.7954, "step": 9074 }, { "epoch": 0.3, "grad_norm": 0.43863746523857117, "learning_rate": 0.0005851293643449408, "loss": 1.9175, "step": 9075 }, { "epoch": 0.3, "grad_norm": 0.45960795879364014, "learning_rate": 0.0005851261142441856, "loss": 1.9142, "step": 9076 }, { "epoch": 0.3, "grad_norm": 0.44502660632133484, "learning_rate": 0.0005851228637973292, "loss": 1.8005, "step": 9077 }, { "epoch": 0.3, "grad_norm": 0.46965399384498596, "learning_rate": 0.0005851196130043758, "loss": 1.9228, "step": 9078 }, { "epoch": 0.3, "grad_norm": 0.4723943769931793, "learning_rate": 0.0005851163618653293, "loss": 1.9743, "step": 9079 }, { "epoch": 0.3, "grad_norm": 0.454291969537735, "learning_rate": 0.0005851131103801933, "loss": 1.9463, "step": 9080 }, { "epoch": 0.3, "grad_norm": 0.4532201886177063, "learning_rate": 0.0005851098585489721, "loss": 1.8646, "step": 9081 }, { "epoch": 0.3, "grad_norm": 0.44725731015205383, "learning_rate": 0.0005851066063716696, "loss": 1.9569, "step": 9082 }, { "epoch": 0.3, "grad_norm": 0.4621776342391968, "learning_rate": 0.0005851033538482897, "loss": 1.9414, "step": 9083 }, { "epoch": 0.3, "grad_norm": 0.4551756680011749, "learning_rate": 0.0005851001009788363, "loss": 1.9676, "step": 9084 }, { "epoch": 0.3, "grad_norm": 0.4466504454612732, "learning_rate": 0.0005850968477633132, "loss": 1.9538, "step": 9085 }, { "epoch": 0.3, "grad_norm": 0.4439307153224945, "learning_rate": 0.0005850935942017248, "loss": 1.9334, "step": 9086 }, { "epoch": 0.3, "grad_norm": 0.45719608664512634, "learning_rate": 0.0005850903402940747, "loss": 1.9553, "step": 9087 }, { "epoch": 0.3, "grad_norm": 0.46560192108154297, "learning_rate": 0.0005850870860403669, "loss": 1.8994, "step": 9088 }, { "epoch": 0.3, "grad_norm": 0.4629181921482086, "learning_rate": 0.0005850838314406054, "loss": 1.8589, "step": 9089 }, { "epoch": 0.3, "grad_norm": 0.47278472781181335, "learning_rate": 0.000585080576494794, "loss": 1.9047, "step": 9090 }, { "epoch": 0.3, "grad_norm": 0.4808503985404968, "learning_rate": 0.000585077321202937, "loss": 1.8773, "step": 9091 }, { "epoch": 0.3, "grad_norm": 0.46250584721565247, "learning_rate": 0.0005850740655650381, "loss": 1.8966, "step": 9092 }, { "epoch": 0.3, "grad_norm": 0.47513607144355774, "learning_rate": 0.0005850708095811012, "loss": 1.9726, "step": 9093 }, { "epoch": 0.3, "grad_norm": 0.4542402923107147, "learning_rate": 0.0005850675532511304, "loss": 1.9088, "step": 9094 }, { "epoch": 0.3, "grad_norm": 0.444983571767807, "learning_rate": 0.0005850642965751295, "loss": 1.9361, "step": 9095 }, { "epoch": 0.3, "grad_norm": 0.4449502229690552, "learning_rate": 0.0005850610395531027, "loss": 1.9569, "step": 9096 }, { "epoch": 0.3, "grad_norm": 0.4631815552711487, "learning_rate": 0.0005850577821850538, "loss": 1.8361, "step": 9097 }, { "epoch": 0.3, "grad_norm": 0.4685165286064148, "learning_rate": 0.0005850545244709866, "loss": 1.8478, "step": 9098 }, { "epoch": 0.3, "grad_norm": 0.45665889978408813, "learning_rate": 0.0005850512664109052, "loss": 1.8578, "step": 9099 }, { "epoch": 0.3, "grad_norm": 0.4473554790019989, "learning_rate": 0.0005850480080048136, "loss": 1.8771, "step": 9100 }, { "epoch": 0.3, "grad_norm": 0.47235462069511414, "learning_rate": 0.0005850447492527159, "loss": 1.9378, "step": 9101 }, { "epoch": 0.3, "grad_norm": 0.4720863699913025, "learning_rate": 0.0005850414901546157, "loss": 1.9076, "step": 9102 }, { "epoch": 0.3, "grad_norm": 0.4613185524940491, "learning_rate": 0.0005850382307105172, "loss": 1.9102, "step": 9103 }, { "epoch": 0.3, "grad_norm": 0.47502434253692627, "learning_rate": 0.0005850349709204243, "loss": 1.9416, "step": 9104 }, { "epoch": 0.3, "grad_norm": 0.4548913240432739, "learning_rate": 0.0005850317107843407, "loss": 1.8964, "step": 9105 }, { "epoch": 0.3, "grad_norm": 0.4329690933227539, "learning_rate": 0.0005850284503022709, "loss": 1.9312, "step": 9106 }, { "epoch": 0.3, "grad_norm": 0.4721789062023163, "learning_rate": 0.0005850251894742185, "loss": 1.9259, "step": 9107 }, { "epoch": 0.3, "grad_norm": 0.4686416685581207, "learning_rate": 0.0005850219283001874, "loss": 1.9668, "step": 9108 }, { "epoch": 0.3, "grad_norm": 0.4499720633029938, "learning_rate": 0.0005850186667801817, "loss": 1.8902, "step": 9109 }, { "epoch": 0.3, "grad_norm": 0.44216734170913696, "learning_rate": 0.0005850154049142055, "loss": 1.9074, "step": 9110 }, { "epoch": 0.3, "grad_norm": 0.4351814389228821, "learning_rate": 0.0005850121427022624, "loss": 1.9656, "step": 9111 }, { "epoch": 0.3, "grad_norm": 0.45739033818244934, "learning_rate": 0.0005850088801443566, "loss": 1.9585, "step": 9112 }, { "epoch": 0.3, "grad_norm": 0.5728788375854492, "learning_rate": 0.0005850056172404921, "loss": 1.9098, "step": 9113 }, { "epoch": 0.3, "grad_norm": 0.46096178889274597, "learning_rate": 0.0005850023539906727, "loss": 1.8703, "step": 9114 }, { "epoch": 0.3, "grad_norm": 0.4603641629219055, "learning_rate": 0.0005849990903949025, "loss": 1.9372, "step": 9115 }, { "epoch": 0.3, "grad_norm": 0.47293949127197266, "learning_rate": 0.0005849958264531853, "loss": 1.8904, "step": 9116 }, { "epoch": 0.3, "grad_norm": 0.47008976340293884, "learning_rate": 0.0005849925621655252, "loss": 1.9333, "step": 9117 }, { "epoch": 0.3, "grad_norm": 0.4451087713241577, "learning_rate": 0.0005849892975319261, "loss": 1.9305, "step": 9118 }, { "epoch": 0.3, "grad_norm": 0.45842546224594116, "learning_rate": 0.000584986032552392, "loss": 1.9176, "step": 9119 }, { "epoch": 0.3, "grad_norm": 0.4632191061973572, "learning_rate": 0.0005849827672269269, "loss": 1.9471, "step": 9120 }, { "epoch": 0.3, "grad_norm": 0.4700564742088318, "learning_rate": 0.0005849795015555347, "loss": 1.844, "step": 9121 }, { "epoch": 0.3, "grad_norm": 0.4553745687007904, "learning_rate": 0.0005849762355382193, "loss": 1.8773, "step": 9122 }, { "epoch": 0.3, "grad_norm": 0.45149561762809753, "learning_rate": 0.0005849729691749847, "loss": 1.9167, "step": 9123 }, { "epoch": 0.3, "grad_norm": 0.4627231955528259, "learning_rate": 0.000584969702465835, "loss": 1.8561, "step": 9124 }, { "epoch": 0.3, "grad_norm": 0.47226443886756897, "learning_rate": 0.0005849664354107742, "loss": 1.9668, "step": 9125 }, { "epoch": 0.3, "grad_norm": 0.44979894161224365, "learning_rate": 0.0005849631680098059, "loss": 1.9534, "step": 9126 }, { "epoch": 0.3, "grad_norm": 0.47881945967674255, "learning_rate": 0.0005849599002629344, "loss": 1.9325, "step": 9127 }, { "epoch": 0.3, "grad_norm": 0.4411131739616394, "learning_rate": 0.0005849566321701635, "loss": 1.9192, "step": 9128 }, { "epoch": 0.3, "grad_norm": 0.46039584279060364, "learning_rate": 0.0005849533637314973, "loss": 1.9587, "step": 9129 }, { "epoch": 0.3, "grad_norm": 0.46013695001602173, "learning_rate": 0.0005849500949469397, "loss": 1.9449, "step": 9130 }, { "epoch": 0.3, "grad_norm": 0.45767712593078613, "learning_rate": 0.0005849468258164946, "loss": 1.9805, "step": 9131 }, { "epoch": 0.3, "grad_norm": 0.4782964885234833, "learning_rate": 0.0005849435563401662, "loss": 1.9385, "step": 9132 }, { "epoch": 0.3, "grad_norm": 0.4581216275691986, "learning_rate": 0.0005849402865179583, "loss": 1.8567, "step": 9133 }, { "epoch": 0.3, "grad_norm": 0.4553969204425812, "learning_rate": 0.0005849370163498748, "loss": 2.0099, "step": 9134 }, { "epoch": 0.3, "grad_norm": 0.4398181140422821, "learning_rate": 0.0005849337458359197, "loss": 1.8904, "step": 9135 }, { "epoch": 0.3, "grad_norm": 0.4387677311897278, "learning_rate": 0.0005849304749760971, "loss": 1.8963, "step": 9136 }, { "epoch": 0.3, "grad_norm": 0.4486343264579773, "learning_rate": 0.0005849272037704108, "loss": 1.8447, "step": 9137 }, { "epoch": 0.3, "grad_norm": 0.5115742087364197, "learning_rate": 0.0005849239322188648, "loss": 1.9427, "step": 9138 }, { "epoch": 0.3, "grad_norm": 0.43098029494285583, "learning_rate": 0.0005849206603214633, "loss": 1.8395, "step": 9139 }, { "epoch": 0.3, "grad_norm": 0.5272759199142456, "learning_rate": 0.00058491738807821, "loss": 1.8968, "step": 9140 }, { "epoch": 0.3, "grad_norm": 0.4448729455471039, "learning_rate": 0.000584914115489109, "loss": 1.9275, "step": 9141 }, { "epoch": 0.3, "grad_norm": 0.442586213350296, "learning_rate": 0.0005849108425541643, "loss": 1.8916, "step": 9142 }, { "epoch": 0.3, "grad_norm": 0.46700435876846313, "learning_rate": 0.0005849075692733797, "loss": 1.9244, "step": 9143 }, { "epoch": 0.3, "grad_norm": 0.45863768458366394, "learning_rate": 0.0005849042956467593, "loss": 1.901, "step": 9144 }, { "epoch": 0.3, "grad_norm": 0.43377000093460083, "learning_rate": 0.0005849010216743072, "loss": 1.8425, "step": 9145 }, { "epoch": 0.3, "grad_norm": 0.4589882493019104, "learning_rate": 0.000584897747356027, "loss": 1.9664, "step": 9146 }, { "epoch": 0.3, "grad_norm": 0.45573076605796814, "learning_rate": 0.0005848944726919231, "loss": 1.8994, "step": 9147 }, { "epoch": 0.3, "grad_norm": 0.4396446645259857, "learning_rate": 0.0005848911976819993, "loss": 1.8956, "step": 9148 }, { "epoch": 0.3, "grad_norm": 0.4509133994579315, "learning_rate": 0.0005848879223262594, "loss": 1.9494, "step": 9149 }, { "epoch": 0.3, "grad_norm": 0.45060035586357117, "learning_rate": 0.0005848846466247077, "loss": 1.9076, "step": 9150 }, { "epoch": 0.3, "grad_norm": 0.4567869007587433, "learning_rate": 0.000584881370577348, "loss": 1.8715, "step": 9151 }, { "epoch": 0.3, "grad_norm": 0.4396345317363739, "learning_rate": 0.0005848780941841842, "loss": 1.8909, "step": 9152 }, { "epoch": 0.3, "grad_norm": 0.4611314535140991, "learning_rate": 0.0005848748174452204, "loss": 1.9166, "step": 9153 }, { "epoch": 0.3, "grad_norm": 0.4453980326652527, "learning_rate": 0.0005848715403604606, "loss": 1.9176, "step": 9154 }, { "epoch": 0.3, "grad_norm": 0.45097774267196655, "learning_rate": 0.0005848682629299087, "loss": 1.9835, "step": 9155 }, { "epoch": 0.3, "grad_norm": 0.45025819540023804, "learning_rate": 0.0005848649851535687, "loss": 1.8747, "step": 9156 }, { "epoch": 0.3, "grad_norm": 0.4608326256275177, "learning_rate": 0.0005848617070314446, "loss": 1.8901, "step": 9157 }, { "epoch": 0.3, "grad_norm": 0.4534934163093567, "learning_rate": 0.0005848584285635403, "loss": 1.8923, "step": 9158 }, { "epoch": 0.3, "grad_norm": 0.4385242760181427, "learning_rate": 0.0005848551497498599, "loss": 1.9208, "step": 9159 }, { "epoch": 0.3, "grad_norm": 0.4456809163093567, "learning_rate": 0.0005848518705904072, "loss": 1.946, "step": 9160 }, { "epoch": 0.3, "grad_norm": 0.4557061791419983, "learning_rate": 0.0005848485910851864, "loss": 1.9071, "step": 9161 }, { "epoch": 0.3, "grad_norm": 0.45790666341781616, "learning_rate": 0.0005848453112342013, "loss": 1.8667, "step": 9162 }, { "epoch": 0.3, "grad_norm": 0.46443209052085876, "learning_rate": 0.0005848420310374562, "loss": 1.9785, "step": 9163 }, { "epoch": 0.3, "grad_norm": 0.4379412829875946, "learning_rate": 0.0005848387504949547, "loss": 1.9106, "step": 9164 }, { "epoch": 0.3, "grad_norm": 0.46259114146232605, "learning_rate": 0.0005848354696067009, "loss": 1.9096, "step": 9165 }, { "epoch": 0.3, "grad_norm": 0.45993754267692566, "learning_rate": 0.0005848321883726987, "loss": 1.8831, "step": 9166 }, { "epoch": 0.3, "grad_norm": 0.45235195755958557, "learning_rate": 0.0005848289067929524, "loss": 1.7881, "step": 9167 }, { "epoch": 0.31, "grad_norm": 0.45682206749916077, "learning_rate": 0.0005848256248674657, "loss": 1.9282, "step": 9168 }, { "epoch": 0.31, "grad_norm": 0.4583154022693634, "learning_rate": 0.0005848223425962426, "loss": 1.8492, "step": 9169 }, { "epoch": 0.31, "grad_norm": 0.47468456625938416, "learning_rate": 0.0005848190599792871, "loss": 1.9054, "step": 9170 }, { "epoch": 0.31, "grad_norm": 0.4532589614391327, "learning_rate": 0.0005848157770166034, "loss": 1.8329, "step": 9171 }, { "epoch": 0.31, "grad_norm": 0.45221850275993347, "learning_rate": 0.0005848124937081952, "loss": 1.804, "step": 9172 }, { "epoch": 0.31, "grad_norm": 0.4502878189086914, "learning_rate": 0.0005848092100540666, "loss": 1.8458, "step": 9173 }, { "epoch": 0.31, "grad_norm": 0.4425504207611084, "learning_rate": 0.0005848059260542217, "loss": 1.9037, "step": 9174 }, { "epoch": 0.31, "grad_norm": 0.44395899772644043, "learning_rate": 0.0005848026417086642, "loss": 1.9674, "step": 9175 }, { "epoch": 0.31, "grad_norm": 0.4510253965854645, "learning_rate": 0.0005847993570173983, "loss": 1.9246, "step": 9176 }, { "epoch": 0.31, "grad_norm": 0.46321558952331543, "learning_rate": 0.000584796071980428, "loss": 1.896, "step": 9177 }, { "epoch": 0.31, "grad_norm": 0.4811834692955017, "learning_rate": 0.0005847927865977572, "loss": 1.9111, "step": 9178 }, { "epoch": 0.31, "grad_norm": 0.4476502239704132, "learning_rate": 0.00058478950086939, "loss": 1.8548, "step": 9179 }, { "epoch": 0.31, "grad_norm": 0.46070149540901184, "learning_rate": 0.0005847862147953301, "loss": 1.8893, "step": 9180 }, { "epoch": 0.31, "grad_norm": 0.46639201045036316, "learning_rate": 0.0005847829283755819, "loss": 1.8944, "step": 9181 }, { "epoch": 0.31, "grad_norm": 0.45262759923934937, "learning_rate": 0.0005847796416101491, "loss": 1.8483, "step": 9182 }, { "epoch": 0.31, "grad_norm": 0.4746364951133728, "learning_rate": 0.0005847763544990358, "loss": 1.8108, "step": 9183 }, { "epoch": 0.31, "grad_norm": 0.45602598786354065, "learning_rate": 0.0005847730670422459, "loss": 1.8908, "step": 9184 }, { "epoch": 0.31, "grad_norm": 0.47328245639801025, "learning_rate": 0.0005847697792397836, "loss": 1.9594, "step": 9185 }, { "epoch": 0.31, "grad_norm": 0.4627877175807953, "learning_rate": 0.0005847664910916526, "loss": 1.8847, "step": 9186 }, { "epoch": 0.31, "grad_norm": 0.4720726013183594, "learning_rate": 0.0005847632025978571, "loss": 1.9016, "step": 9187 }, { "epoch": 0.31, "grad_norm": 0.46296846866607666, "learning_rate": 0.000584759913758401, "loss": 1.9166, "step": 9188 }, { "epoch": 0.31, "grad_norm": 0.48538440465927124, "learning_rate": 0.0005847566245732885, "loss": 1.903, "step": 9189 }, { "epoch": 0.31, "grad_norm": 0.45352256298065186, "learning_rate": 0.0005847533350425232, "loss": 1.9174, "step": 9190 }, { "epoch": 0.31, "grad_norm": 0.4789685010910034, "learning_rate": 0.0005847500451661093, "loss": 1.9289, "step": 9191 }, { "epoch": 0.31, "grad_norm": 0.4737473130226135, "learning_rate": 0.0005847467549440509, "loss": 1.9702, "step": 9192 }, { "epoch": 0.31, "grad_norm": 0.4498308598995209, "learning_rate": 0.0005847434643763518, "loss": 1.914, "step": 9193 }, { "epoch": 0.31, "grad_norm": 0.4758317768573761, "learning_rate": 0.0005847401734630161, "loss": 1.9818, "step": 9194 }, { "epoch": 0.31, "grad_norm": 0.46648627519607544, "learning_rate": 0.000584736882204048, "loss": 1.8393, "step": 9195 }, { "epoch": 0.31, "grad_norm": 0.4611903429031372, "learning_rate": 0.0005847335905994511, "loss": 1.9267, "step": 9196 }, { "epoch": 0.31, "grad_norm": 0.48058176040649414, "learning_rate": 0.0005847302986492296, "loss": 1.8971, "step": 9197 }, { "epoch": 0.31, "grad_norm": 0.46630632877349854, "learning_rate": 0.0005847270063533874, "loss": 1.9519, "step": 9198 }, { "epoch": 0.31, "grad_norm": 0.46803468465805054, "learning_rate": 0.0005847237137119286, "loss": 1.9256, "step": 9199 }, { "epoch": 0.31, "grad_norm": 0.47180378437042236, "learning_rate": 0.0005847204207248572, "loss": 1.9642, "step": 9200 }, { "epoch": 0.31, "grad_norm": 0.4567388892173767, "learning_rate": 0.0005847171273921772, "loss": 1.9908, "step": 9201 }, { "epoch": 0.31, "grad_norm": 0.44253793358802795, "learning_rate": 0.0005847138337138925, "loss": 1.906, "step": 9202 }, { "epoch": 0.31, "grad_norm": 0.48406392335891724, "learning_rate": 0.0005847105396900072, "loss": 1.9038, "step": 9203 }, { "epoch": 0.31, "grad_norm": 0.45273321866989136, "learning_rate": 0.0005847072453205251, "loss": 1.917, "step": 9204 }, { "epoch": 0.31, "grad_norm": 0.47131064534187317, "learning_rate": 0.0005847039506054506, "loss": 1.9467, "step": 9205 }, { "epoch": 0.31, "grad_norm": 0.45788446068763733, "learning_rate": 0.0005847006555447872, "loss": 1.9182, "step": 9206 }, { "epoch": 0.31, "grad_norm": 0.45782890915870667, "learning_rate": 0.0005846973601385394, "loss": 1.8277, "step": 9207 }, { "epoch": 0.31, "grad_norm": 0.4489518702030182, "learning_rate": 0.0005846940643867108, "loss": 1.9677, "step": 9208 }, { "epoch": 0.31, "grad_norm": 0.4690609574317932, "learning_rate": 0.0005846907682893056, "loss": 1.9009, "step": 9209 }, { "epoch": 0.31, "grad_norm": 0.47118011116981506, "learning_rate": 0.0005846874718463277, "loss": 2.014, "step": 9210 }, { "epoch": 0.31, "grad_norm": 0.4554033875465393, "learning_rate": 0.0005846841750577812, "loss": 1.9043, "step": 9211 }, { "epoch": 0.31, "grad_norm": 0.475159227848053, "learning_rate": 0.00058468087792367, "loss": 1.8839, "step": 9212 }, { "epoch": 0.31, "grad_norm": 0.4440816342830658, "learning_rate": 0.0005846775804439983, "loss": 1.9525, "step": 9213 }, { "epoch": 0.31, "grad_norm": 0.459144651889801, "learning_rate": 0.00058467428261877, "loss": 1.9771, "step": 9214 }, { "epoch": 0.31, "grad_norm": 0.45033183693885803, "learning_rate": 0.0005846709844479889, "loss": 1.8979, "step": 9215 }, { "epoch": 0.31, "grad_norm": 0.44896310567855835, "learning_rate": 0.0005846676859316591, "loss": 1.9415, "step": 9216 }, { "epoch": 0.31, "grad_norm": 0.4586807191371918, "learning_rate": 0.0005846643870697848, "loss": 1.9184, "step": 9217 }, { "epoch": 0.31, "grad_norm": 0.46442437171936035, "learning_rate": 0.0005846610878623699, "loss": 1.8732, "step": 9218 }, { "epoch": 0.31, "grad_norm": 0.4505786895751953, "learning_rate": 0.0005846577883094183, "loss": 1.8917, "step": 9219 }, { "epoch": 0.31, "grad_norm": 0.4842357337474823, "learning_rate": 0.0005846544884109342, "loss": 1.9049, "step": 9220 }, { "epoch": 0.31, "grad_norm": 0.4707355797290802, "learning_rate": 0.0005846511881669214, "loss": 1.9496, "step": 9221 }, { "epoch": 0.31, "grad_norm": 0.4650059640407562, "learning_rate": 0.0005846478875773841, "loss": 1.8561, "step": 9222 }, { "epoch": 0.31, "grad_norm": 0.4691299498081207, "learning_rate": 0.0005846445866423261, "loss": 1.8113, "step": 9223 }, { "epoch": 0.31, "grad_norm": 0.4694889485836029, "learning_rate": 0.0005846412853617516, "loss": 1.9043, "step": 9224 }, { "epoch": 0.31, "grad_norm": 0.46628499031066895, "learning_rate": 0.0005846379837356644, "loss": 1.9052, "step": 9225 }, { "epoch": 0.31, "grad_norm": 0.4592144787311554, "learning_rate": 0.0005846346817640687, "loss": 1.9674, "step": 9226 }, { "epoch": 0.31, "grad_norm": 0.4618982672691345, "learning_rate": 0.0005846313794469685, "loss": 1.9264, "step": 9227 }, { "epoch": 0.31, "grad_norm": 0.4844622015953064, "learning_rate": 0.0005846280767843676, "loss": 1.8583, "step": 9228 }, { "epoch": 0.31, "grad_norm": 0.45716845989227295, "learning_rate": 0.0005846247737762702, "loss": 1.8747, "step": 9229 }, { "epoch": 0.31, "grad_norm": 0.4614831507205963, "learning_rate": 0.0005846214704226803, "loss": 1.9333, "step": 9230 }, { "epoch": 0.31, "grad_norm": 0.46728864312171936, "learning_rate": 0.0005846181667236019, "loss": 1.8647, "step": 9231 }, { "epoch": 0.31, "grad_norm": 0.45224714279174805, "learning_rate": 0.0005846148626790389, "loss": 1.8577, "step": 9232 }, { "epoch": 0.31, "grad_norm": 0.465547114610672, "learning_rate": 0.0005846115582889954, "loss": 1.8715, "step": 9233 }, { "epoch": 0.31, "grad_norm": 0.45445847511291504, "learning_rate": 0.0005846082535534755, "loss": 1.967, "step": 9234 }, { "epoch": 0.31, "grad_norm": 0.4524916410446167, "learning_rate": 0.000584604948472483, "loss": 1.8623, "step": 9235 }, { "epoch": 0.31, "grad_norm": 0.4650765359401703, "learning_rate": 0.000584601643046022, "loss": 1.958, "step": 9236 }, { "epoch": 0.31, "grad_norm": 0.4713483452796936, "learning_rate": 0.0005845983372740966, "loss": 1.879, "step": 9237 }, { "epoch": 0.31, "grad_norm": 0.4673597514629364, "learning_rate": 0.0005845950311567109, "loss": 1.9404, "step": 9238 }, { "epoch": 0.31, "grad_norm": 0.4979502558708191, "learning_rate": 0.0005845917246938686, "loss": 1.9449, "step": 9239 }, { "epoch": 0.31, "grad_norm": 0.4588625133037567, "learning_rate": 0.0005845884178855739, "loss": 1.8819, "step": 9240 }, { "epoch": 0.31, "grad_norm": 0.5136809945106506, "learning_rate": 0.0005845851107318308, "loss": 1.9373, "step": 9241 }, { "epoch": 0.31, "grad_norm": 0.4834054708480835, "learning_rate": 0.0005845818032326434, "loss": 1.9966, "step": 9242 }, { "epoch": 0.31, "grad_norm": 0.4657752811908722, "learning_rate": 0.0005845784953880155, "loss": 1.9371, "step": 9243 }, { "epoch": 0.31, "grad_norm": 0.4695526957511902, "learning_rate": 0.0005845751871979513, "loss": 1.9274, "step": 9244 }, { "epoch": 0.31, "grad_norm": 0.4445056617259979, "learning_rate": 0.0005845718786624547, "loss": 1.915, "step": 9245 }, { "epoch": 0.31, "grad_norm": 0.4451717734336853, "learning_rate": 0.0005845685697815298, "loss": 1.863, "step": 9246 }, { "epoch": 0.31, "grad_norm": 0.44185033440589905, "learning_rate": 0.0005845652605551806, "loss": 1.9368, "step": 9247 }, { "epoch": 0.31, "grad_norm": 0.4510974586009979, "learning_rate": 0.0005845619509834111, "loss": 1.9201, "step": 9248 }, { "epoch": 0.31, "grad_norm": 0.44501227140426636, "learning_rate": 0.0005845586410662253, "loss": 1.8436, "step": 9249 }, { "epoch": 0.31, "grad_norm": 0.4641610085964203, "learning_rate": 0.0005845553308036272, "loss": 1.9311, "step": 9250 }, { "epoch": 0.31, "grad_norm": 0.4450419843196869, "learning_rate": 0.0005845520201956209, "loss": 1.8492, "step": 9251 }, { "epoch": 0.31, "grad_norm": 0.45708051323890686, "learning_rate": 0.0005845487092422104, "loss": 1.9825, "step": 9252 }, { "epoch": 0.31, "grad_norm": 0.46131807565689087, "learning_rate": 0.0005845453979433997, "loss": 1.9046, "step": 9253 }, { "epoch": 0.31, "grad_norm": 0.485105961561203, "learning_rate": 0.0005845420862991927, "loss": 1.9322, "step": 9254 }, { "epoch": 0.31, "grad_norm": 0.45498013496398926, "learning_rate": 0.0005845387743095937, "loss": 1.88, "step": 9255 }, { "epoch": 0.31, "grad_norm": 0.46172699332237244, "learning_rate": 0.0005845354619746064, "loss": 1.9908, "step": 9256 }, { "epoch": 0.31, "grad_norm": 0.4574103057384491, "learning_rate": 0.000584532149294235, "loss": 1.9143, "step": 9257 }, { "epoch": 0.31, "grad_norm": 0.4872744679450989, "learning_rate": 0.0005845288362684835, "loss": 1.9656, "step": 9258 }, { "epoch": 0.31, "grad_norm": 0.46415844559669495, "learning_rate": 0.0005845255228973561, "loss": 1.8768, "step": 9259 }, { "epoch": 0.31, "grad_norm": 0.4565630853176117, "learning_rate": 0.0005845222091808564, "loss": 1.926, "step": 9260 }, { "epoch": 0.31, "grad_norm": 0.4884072244167328, "learning_rate": 0.0005845188951189887, "loss": 1.8883, "step": 9261 }, { "epoch": 0.31, "grad_norm": 0.47095760703086853, "learning_rate": 0.0005845155807117571, "loss": 1.9553, "step": 9262 }, { "epoch": 0.31, "grad_norm": 0.4768626391887665, "learning_rate": 0.0005845122659591653, "loss": 1.9513, "step": 9263 }, { "epoch": 0.31, "grad_norm": 0.5923148393630981, "learning_rate": 0.0005845089508612177, "loss": 1.9189, "step": 9264 }, { "epoch": 0.31, "grad_norm": 0.45640242099761963, "learning_rate": 0.0005845056354179182, "loss": 1.8373, "step": 9265 }, { "epoch": 0.31, "grad_norm": 0.4680017828941345, "learning_rate": 0.0005845023196292706, "loss": 1.9386, "step": 9266 }, { "epoch": 0.31, "grad_norm": 0.4693318009376526, "learning_rate": 0.0005844990034952793, "loss": 1.9029, "step": 9267 }, { "epoch": 0.31, "grad_norm": 0.46715211868286133, "learning_rate": 0.000584495687015948, "loss": 1.9561, "step": 9268 }, { "epoch": 0.31, "grad_norm": 0.4745497703552246, "learning_rate": 0.0005844923701912809, "loss": 1.9719, "step": 9269 }, { "epoch": 0.31, "grad_norm": 0.47112905979156494, "learning_rate": 0.0005844890530212819, "loss": 1.89, "step": 9270 }, { "epoch": 0.31, "grad_norm": 0.44664597511291504, "learning_rate": 0.0005844857355059552, "loss": 1.9645, "step": 9271 }, { "epoch": 0.31, "grad_norm": 0.44211527705192566, "learning_rate": 0.0005844824176453047, "loss": 1.9424, "step": 9272 }, { "epoch": 0.31, "grad_norm": 0.46708884835243225, "learning_rate": 0.0005844790994393345, "loss": 1.9015, "step": 9273 }, { "epoch": 0.31, "grad_norm": 0.46638810634613037, "learning_rate": 0.0005844757808880485, "loss": 1.9134, "step": 9274 }, { "epoch": 0.31, "grad_norm": 0.4493345022201538, "learning_rate": 0.000584472461991451, "loss": 1.8737, "step": 9275 }, { "epoch": 0.31, "grad_norm": 0.4392035901546478, "learning_rate": 0.0005844691427495456, "loss": 1.904, "step": 9276 }, { "epoch": 0.31, "grad_norm": 0.46709296107292175, "learning_rate": 0.0005844658231623367, "loss": 1.903, "step": 9277 }, { "epoch": 0.31, "grad_norm": 0.45775842666625977, "learning_rate": 0.0005844625032298283, "loss": 1.8124, "step": 9278 }, { "epoch": 0.31, "grad_norm": 0.46456053853034973, "learning_rate": 0.0005844591829520241, "loss": 1.9247, "step": 9279 }, { "epoch": 0.31, "grad_norm": 0.46625906229019165, "learning_rate": 0.0005844558623289285, "loss": 1.8938, "step": 9280 }, { "epoch": 0.31, "grad_norm": 0.44969090819358826, "learning_rate": 0.0005844525413605454, "loss": 1.8747, "step": 9281 }, { "epoch": 0.31, "grad_norm": 0.4636087715625763, "learning_rate": 0.0005844492200468788, "loss": 1.8791, "step": 9282 }, { "epoch": 0.31, "grad_norm": 0.46165648102760315, "learning_rate": 0.0005844458983879329, "loss": 1.8418, "step": 9283 }, { "epoch": 0.31, "grad_norm": 0.44885823130607605, "learning_rate": 0.0005844425763837114, "loss": 1.9406, "step": 9284 }, { "epoch": 0.31, "grad_norm": 0.45671433210372925, "learning_rate": 0.0005844392540342186, "loss": 1.868, "step": 9285 }, { "epoch": 0.31, "grad_norm": 0.47775858640670776, "learning_rate": 0.0005844359313394585, "loss": 1.896, "step": 9286 }, { "epoch": 0.31, "grad_norm": 0.46029600501060486, "learning_rate": 0.000584432608299435, "loss": 1.9135, "step": 9287 }, { "epoch": 0.31, "grad_norm": 0.46658989787101746, "learning_rate": 0.0005844292849141523, "loss": 1.9222, "step": 9288 }, { "epoch": 0.31, "grad_norm": 0.4560481309890747, "learning_rate": 0.0005844259611836142, "loss": 1.8739, "step": 9289 }, { "epoch": 0.31, "grad_norm": 0.502827525138855, "learning_rate": 0.0005844226371078251, "loss": 1.9627, "step": 9290 }, { "epoch": 0.31, "grad_norm": 0.4428703188896179, "learning_rate": 0.0005844193126867888, "loss": 1.8457, "step": 9291 }, { "epoch": 0.31, "grad_norm": 0.48013928532600403, "learning_rate": 0.0005844159879205093, "loss": 1.9251, "step": 9292 }, { "epoch": 0.31, "grad_norm": 0.49712565541267395, "learning_rate": 0.0005844126628089906, "loss": 1.9278, "step": 9293 }, { "epoch": 0.31, "grad_norm": 0.4447183907032013, "learning_rate": 0.0005844093373522369, "loss": 1.873, "step": 9294 }, { "epoch": 0.31, "grad_norm": 0.46626734733581543, "learning_rate": 0.0005844060115502523, "loss": 1.9761, "step": 9295 }, { "epoch": 0.31, "grad_norm": 0.4774651825428009, "learning_rate": 0.0005844026854030406, "loss": 1.8729, "step": 9296 }, { "epoch": 0.31, "grad_norm": 0.4482278823852539, "learning_rate": 0.000584399358910606, "loss": 1.8596, "step": 9297 }, { "epoch": 0.31, "grad_norm": 0.433525949716568, "learning_rate": 0.0005843960320729523, "loss": 1.9636, "step": 9298 }, { "epoch": 0.31, "grad_norm": 0.4534900486469269, "learning_rate": 0.0005843927048900839, "loss": 1.8692, "step": 9299 }, { "epoch": 0.31, "grad_norm": 0.4566926658153534, "learning_rate": 0.0005843893773620046, "loss": 1.9269, "step": 9300 }, { "epoch": 0.31, "grad_norm": 0.47166377305984497, "learning_rate": 0.0005843860494887186, "loss": 1.9595, "step": 9301 }, { "epoch": 0.31, "grad_norm": 0.45901501178741455, "learning_rate": 0.0005843827212702297, "loss": 1.9282, "step": 9302 }, { "epoch": 0.31, "grad_norm": 0.5130106210708618, "learning_rate": 0.0005843793927065422, "loss": 1.9051, "step": 9303 }, { "epoch": 0.31, "grad_norm": 0.4672677218914032, "learning_rate": 0.0005843760637976598, "loss": 2.0103, "step": 9304 }, { "epoch": 0.31, "grad_norm": 0.43392208218574524, "learning_rate": 0.000584372734543587, "loss": 1.9081, "step": 9305 }, { "epoch": 0.31, "grad_norm": 0.4581402838230133, "learning_rate": 0.0005843694049443276, "loss": 1.8812, "step": 9306 }, { "epoch": 0.31, "grad_norm": 0.44039952754974365, "learning_rate": 0.0005843660749998854, "loss": 1.9395, "step": 9307 }, { "epoch": 0.31, "grad_norm": 0.43762391805648804, "learning_rate": 0.0005843627447102649, "loss": 1.8708, "step": 9308 }, { "epoch": 0.31, "grad_norm": 0.4542752802371979, "learning_rate": 0.0005843594140754699, "loss": 1.9347, "step": 9309 }, { "epoch": 0.31, "grad_norm": 0.45138898491859436, "learning_rate": 0.0005843560830955044, "loss": 1.9038, "step": 9310 }, { "epoch": 0.31, "grad_norm": 0.4516770839691162, "learning_rate": 0.0005843527517703725, "loss": 1.8463, "step": 9311 }, { "epoch": 0.31, "grad_norm": 0.4525715708732605, "learning_rate": 0.0005843494201000783, "loss": 1.9407, "step": 9312 }, { "epoch": 0.31, "grad_norm": 0.44102683663368225, "learning_rate": 0.0005843460880846259, "loss": 1.8196, "step": 9313 }, { "epoch": 0.31, "grad_norm": 0.43515047430992126, "learning_rate": 0.000584342755724019, "loss": 1.8718, "step": 9314 }, { "epoch": 0.31, "grad_norm": 0.45322638750076294, "learning_rate": 0.000584339423018262, "loss": 1.8451, "step": 9315 }, { "epoch": 0.31, "grad_norm": 0.44568607211112976, "learning_rate": 0.0005843360899673589, "loss": 1.862, "step": 9316 }, { "epoch": 0.31, "grad_norm": 0.4588930606842041, "learning_rate": 0.0005843327565713138, "loss": 1.9261, "step": 9317 }, { "epoch": 0.31, "grad_norm": 0.44668498635292053, "learning_rate": 0.0005843294228301304, "loss": 1.9052, "step": 9318 }, { "epoch": 0.31, "grad_norm": 0.4594472348690033, "learning_rate": 0.0005843260887438131, "loss": 1.914, "step": 9319 }, { "epoch": 0.31, "grad_norm": 0.4540697932243347, "learning_rate": 0.0005843227543123657, "loss": 1.9415, "step": 9320 }, { "epoch": 0.31, "grad_norm": 0.45134270191192627, "learning_rate": 0.0005843194195357924, "loss": 1.864, "step": 9321 }, { "epoch": 0.31, "grad_norm": 0.4720765948295593, "learning_rate": 0.0005843160844140973, "loss": 1.8848, "step": 9322 }, { "epoch": 0.31, "grad_norm": 0.46145394444465637, "learning_rate": 0.0005843127489472843, "loss": 1.9502, "step": 9323 }, { "epoch": 0.31, "grad_norm": 0.4577859342098236, "learning_rate": 0.0005843094131353576, "loss": 1.8821, "step": 9324 }, { "epoch": 0.31, "grad_norm": 0.4383751451969147, "learning_rate": 0.0005843060769783212, "loss": 1.8328, "step": 9325 }, { "epoch": 0.31, "grad_norm": 0.44583287835121155, "learning_rate": 0.000584302740476179, "loss": 1.8869, "step": 9326 }, { "epoch": 0.31, "grad_norm": 0.4759216904640198, "learning_rate": 0.0005842994036289351, "loss": 1.8774, "step": 9327 }, { "epoch": 0.31, "grad_norm": 0.44155484437942505, "learning_rate": 0.0005842960664365937, "loss": 1.9234, "step": 9328 }, { "epoch": 0.31, "grad_norm": 0.4448118805885315, "learning_rate": 0.0005842927288991588, "loss": 1.8808, "step": 9329 }, { "epoch": 0.31, "grad_norm": 0.43932726979255676, "learning_rate": 0.0005842893910166344, "loss": 1.9178, "step": 9330 }, { "epoch": 0.31, "grad_norm": 0.4678642749786377, "learning_rate": 0.0005842860527890245, "loss": 1.9037, "step": 9331 }, { "epoch": 0.31, "grad_norm": 0.454990953207016, "learning_rate": 0.0005842827142163333, "loss": 2.0176, "step": 9332 }, { "epoch": 0.31, "grad_norm": 0.4498230516910553, "learning_rate": 0.0005842793752985647, "loss": 1.965, "step": 9333 }, { "epoch": 0.31, "grad_norm": 0.43762850761413574, "learning_rate": 0.0005842760360357229, "loss": 1.9415, "step": 9334 }, { "epoch": 0.31, "grad_norm": 0.46936196088790894, "learning_rate": 0.0005842726964278118, "loss": 1.8863, "step": 9335 }, { "epoch": 0.31, "grad_norm": 0.4545478820800781, "learning_rate": 0.0005842693564748357, "loss": 2.0479, "step": 9336 }, { "epoch": 0.31, "grad_norm": 0.4563594460487366, "learning_rate": 0.0005842660161767982, "loss": 1.9791, "step": 9337 }, { "epoch": 0.31, "grad_norm": 0.45151975750923157, "learning_rate": 0.0005842626755337038, "loss": 1.8995, "step": 9338 }, { "epoch": 0.31, "grad_norm": 0.4556940793991089, "learning_rate": 0.0005842593345455565, "loss": 1.8199, "step": 9339 }, { "epoch": 0.31, "grad_norm": 0.44919493794441223, "learning_rate": 0.0005842559932123601, "loss": 1.9231, "step": 9340 }, { "epoch": 0.31, "grad_norm": 0.45227792859077454, "learning_rate": 0.0005842526515341189, "loss": 1.9063, "step": 9341 }, { "epoch": 0.31, "grad_norm": 0.4708503782749176, "learning_rate": 0.0005842493095108369, "loss": 1.984, "step": 9342 }, { "epoch": 0.31, "grad_norm": 0.44734033942222595, "learning_rate": 0.0005842459671425179, "loss": 1.9353, "step": 9343 }, { "epoch": 0.31, "grad_norm": 0.4518055319786072, "learning_rate": 0.0005842426244291664, "loss": 1.8933, "step": 9344 }, { "epoch": 0.31, "grad_norm": 0.46923860907554626, "learning_rate": 0.0005842392813707861, "loss": 2.0489, "step": 9345 }, { "epoch": 0.31, "grad_norm": 0.4497665762901306, "learning_rate": 0.0005842359379673813, "loss": 1.8748, "step": 9346 }, { "epoch": 0.31, "grad_norm": 0.48680922389030457, "learning_rate": 0.0005842325942189558, "loss": 1.8914, "step": 9347 }, { "epoch": 0.31, "grad_norm": 0.45240816473960876, "learning_rate": 0.000584229250125514, "loss": 1.8301, "step": 9348 }, { "epoch": 0.31, "grad_norm": 0.46558958292007446, "learning_rate": 0.0005842259056870596, "loss": 1.9445, "step": 9349 }, { "epoch": 0.31, "grad_norm": 0.6814724802970886, "learning_rate": 0.0005842225609035969, "loss": 1.8811, "step": 9350 }, { "epoch": 0.31, "grad_norm": 0.4673477113246918, "learning_rate": 0.0005842192157751299, "loss": 1.8956, "step": 9351 }, { "epoch": 0.31, "grad_norm": 0.45137888193130493, "learning_rate": 0.0005842158703016627, "loss": 1.9336, "step": 9352 }, { "epoch": 0.31, "grad_norm": 0.46688997745513916, "learning_rate": 0.0005842125244831992, "loss": 1.9029, "step": 9353 }, { "epoch": 0.31, "grad_norm": 0.4532221257686615, "learning_rate": 0.0005842091783197437, "loss": 1.9295, "step": 9354 }, { "epoch": 0.31, "grad_norm": 0.4635567367076874, "learning_rate": 0.0005842058318113, "loss": 1.8642, "step": 9355 }, { "epoch": 0.31, "grad_norm": 0.45058295130729675, "learning_rate": 0.0005842024849578724, "loss": 1.956, "step": 9356 }, { "epoch": 0.31, "grad_norm": 0.4798228144645691, "learning_rate": 0.0005841991377594647, "loss": 1.9327, "step": 9357 }, { "epoch": 0.31, "grad_norm": 0.4613789916038513, "learning_rate": 0.0005841957902160813, "loss": 1.9422, "step": 9358 }, { "epoch": 0.31, "grad_norm": 0.4532305598258972, "learning_rate": 0.0005841924423277261, "loss": 1.8795, "step": 9359 }, { "epoch": 0.31, "grad_norm": 0.47261154651641846, "learning_rate": 0.000584189094094403, "loss": 1.7972, "step": 9360 }, { "epoch": 0.31, "grad_norm": 0.473554790019989, "learning_rate": 0.0005841857455161163, "loss": 1.9181, "step": 9361 }, { "epoch": 0.31, "grad_norm": 0.46473315358161926, "learning_rate": 0.00058418239659287, "loss": 1.9719, "step": 9362 }, { "epoch": 0.31, "grad_norm": 0.45782867074012756, "learning_rate": 0.000584179047324668, "loss": 1.9035, "step": 9363 }, { "epoch": 0.31, "grad_norm": 0.4558601677417755, "learning_rate": 0.0005841756977115147, "loss": 1.8935, "step": 9364 }, { "epoch": 0.31, "grad_norm": 0.4541167914867401, "learning_rate": 0.0005841723477534138, "loss": 1.9679, "step": 9365 }, { "epoch": 0.31, "grad_norm": 0.4429360330104828, "learning_rate": 0.0005841689974503697, "loss": 1.9158, "step": 9366 }, { "epoch": 0.31, "grad_norm": 0.4574277698993683, "learning_rate": 0.0005841656468023863, "loss": 1.8611, "step": 9367 }, { "epoch": 0.31, "grad_norm": 0.46318405866622925, "learning_rate": 0.0005841622958094676, "loss": 1.9213, "step": 9368 }, { "epoch": 0.31, "grad_norm": 0.4615457355976105, "learning_rate": 0.0005841589444716177, "loss": 1.908, "step": 9369 }, { "epoch": 0.31, "grad_norm": 0.4537729024887085, "learning_rate": 0.0005841555927888407, "loss": 1.9095, "step": 9370 }, { "epoch": 0.31, "grad_norm": 0.4709946811199188, "learning_rate": 0.0005841522407611408, "loss": 1.8833, "step": 9371 }, { "epoch": 0.31, "grad_norm": 0.4382437765598297, "learning_rate": 0.0005841488883885219, "loss": 1.8597, "step": 9372 }, { "epoch": 0.31, "grad_norm": 0.4515056610107422, "learning_rate": 0.0005841455356709881, "loss": 1.8817, "step": 9373 }, { "epoch": 0.31, "grad_norm": 0.4448402523994446, "learning_rate": 0.0005841421826085435, "loss": 1.8861, "step": 9374 }, { "epoch": 0.31, "grad_norm": 0.46519607305526733, "learning_rate": 0.0005841388292011922, "loss": 1.9041, "step": 9375 }, { "epoch": 0.31, "grad_norm": 0.47040629386901855, "learning_rate": 0.0005841354754489382, "loss": 1.9095, "step": 9376 }, { "epoch": 0.31, "grad_norm": 0.4286872148513794, "learning_rate": 0.0005841321213517855, "loss": 1.8445, "step": 9377 }, { "epoch": 0.31, "grad_norm": 0.4568713903427124, "learning_rate": 0.0005841287669097384, "loss": 1.8844, "step": 9378 }, { "epoch": 0.31, "grad_norm": 0.45843932032585144, "learning_rate": 0.0005841254121228007, "loss": 1.9082, "step": 9379 }, { "epoch": 0.31, "grad_norm": 0.4589671790599823, "learning_rate": 0.0005841220569909767, "loss": 1.9668, "step": 9380 }, { "epoch": 0.31, "grad_norm": 0.47812604904174805, "learning_rate": 0.0005841187015142704, "loss": 1.9077, "step": 9381 }, { "epoch": 0.31, "grad_norm": 0.4431799054145813, "learning_rate": 0.0005841153456926859, "loss": 1.8629, "step": 9382 }, { "epoch": 0.31, "grad_norm": 0.48376980423927307, "learning_rate": 0.0005841119895262271, "loss": 1.912, "step": 9383 }, { "epoch": 0.31, "grad_norm": 0.6056078672409058, "learning_rate": 0.0005841086330148983, "loss": 1.9016, "step": 9384 }, { "epoch": 0.31, "grad_norm": 0.44278550148010254, "learning_rate": 0.0005841052761587035, "loss": 1.8486, "step": 9385 }, { "epoch": 0.31, "grad_norm": 0.4970110356807709, "learning_rate": 0.0005841019189576468, "loss": 1.8183, "step": 9386 }, { "epoch": 0.31, "grad_norm": 0.5011756420135498, "learning_rate": 0.000584098561411732, "loss": 1.8954, "step": 9387 }, { "epoch": 0.31, "grad_norm": 0.45833826065063477, "learning_rate": 0.0005840952035209636, "loss": 1.9249, "step": 9388 }, { "epoch": 0.31, "grad_norm": 0.4776044189929962, "learning_rate": 0.0005840918452853455, "loss": 1.9847, "step": 9389 }, { "epoch": 0.31, "grad_norm": 0.4864490330219269, "learning_rate": 0.0005840884867048817, "loss": 1.8757, "step": 9390 }, { "epoch": 0.31, "grad_norm": 0.5027862787246704, "learning_rate": 0.0005840851277795762, "loss": 1.9207, "step": 9391 }, { "epoch": 0.31, "grad_norm": 0.44222989678382874, "learning_rate": 0.0005840817685094334, "loss": 1.849, "step": 9392 }, { "epoch": 0.31, "grad_norm": 0.4896373450756073, "learning_rate": 0.0005840784088944572, "loss": 1.9362, "step": 9393 }, { "epoch": 0.31, "grad_norm": 0.4702832102775574, "learning_rate": 0.0005840750489346516, "loss": 1.9499, "step": 9394 }, { "epoch": 0.31, "grad_norm": 0.48248526453971863, "learning_rate": 0.0005840716886300208, "loss": 2.0041, "step": 9395 }, { "epoch": 0.31, "grad_norm": 0.4736887812614441, "learning_rate": 0.0005840683279805687, "loss": 1.8185, "step": 9396 }, { "epoch": 0.31, "grad_norm": 0.4889930188655853, "learning_rate": 0.0005840649669862995, "loss": 1.89, "step": 9397 }, { "epoch": 0.31, "grad_norm": 0.4968601167201996, "learning_rate": 0.0005840616056472175, "loss": 1.8636, "step": 9398 }, { "epoch": 0.31, "grad_norm": 0.42615967988967896, "learning_rate": 0.0005840582439633265, "loss": 1.8359, "step": 9399 }, { "epoch": 0.31, "grad_norm": 0.49121931195259094, "learning_rate": 0.0005840548819346304, "loss": 1.9154, "step": 9400 }, { "epoch": 0.31, "grad_norm": 0.5516956448554993, "learning_rate": 0.0005840515195611337, "loss": 1.9855, "step": 9401 }, { "epoch": 0.31, "grad_norm": 0.4674080014228821, "learning_rate": 0.0005840481568428405, "loss": 1.9156, "step": 9402 }, { "epoch": 0.31, "grad_norm": 0.47511571645736694, "learning_rate": 0.0005840447937797544, "loss": 1.8675, "step": 9403 }, { "epoch": 0.31, "grad_norm": 0.47908133268356323, "learning_rate": 0.0005840414303718799, "loss": 1.8618, "step": 9404 }, { "epoch": 0.31, "grad_norm": 0.4837036728858948, "learning_rate": 0.0005840380666192209, "loss": 1.8792, "step": 9405 }, { "epoch": 0.31, "grad_norm": 0.455333411693573, "learning_rate": 0.0005840347025217816, "loss": 1.8531, "step": 9406 }, { "epoch": 0.31, "grad_norm": 0.4489746689796448, "learning_rate": 0.000584031338079566, "loss": 1.8807, "step": 9407 }, { "epoch": 0.31, "grad_norm": 0.49940675497055054, "learning_rate": 0.0005840279732925782, "loss": 1.9146, "step": 9408 }, { "epoch": 0.31, "grad_norm": 0.5447152256965637, "learning_rate": 0.0005840246081608223, "loss": 1.9528, "step": 9409 }, { "epoch": 0.31, "grad_norm": 0.4593675434589386, "learning_rate": 0.0005840212426843023, "loss": 1.9457, "step": 9410 }, { "epoch": 0.31, "grad_norm": 0.4852784276008606, "learning_rate": 0.0005840178768630224, "loss": 1.8505, "step": 9411 }, { "epoch": 0.31, "grad_norm": 0.45272988080978394, "learning_rate": 0.0005840145106969868, "loss": 1.9201, "step": 9412 }, { "epoch": 0.31, "grad_norm": 0.44409701228141785, "learning_rate": 0.0005840111441861993, "loss": 1.8739, "step": 9413 }, { "epoch": 0.31, "grad_norm": 0.4435029923915863, "learning_rate": 0.000584007777330664, "loss": 1.8098, "step": 9414 }, { "epoch": 0.31, "grad_norm": 0.4558218717575073, "learning_rate": 0.0005840044101303852, "loss": 1.905, "step": 9415 }, { "epoch": 0.31, "grad_norm": 0.4848504960536957, "learning_rate": 0.000584001042585367, "loss": 1.8333, "step": 9416 }, { "epoch": 0.31, "grad_norm": 0.45885127782821655, "learning_rate": 0.0005839976746956133, "loss": 1.9892, "step": 9417 }, { "epoch": 0.31, "grad_norm": 0.4402659237384796, "learning_rate": 0.0005839943064611283, "loss": 1.8825, "step": 9418 }, { "epoch": 0.31, "grad_norm": 0.4802590310573578, "learning_rate": 0.0005839909378819161, "loss": 1.8425, "step": 9419 }, { "epoch": 0.31, "grad_norm": 0.48648181557655334, "learning_rate": 0.0005839875689579806, "loss": 1.9497, "step": 9420 }, { "epoch": 0.31, "grad_norm": 0.46839800477027893, "learning_rate": 0.0005839841996893261, "loss": 1.8792, "step": 9421 }, { "epoch": 0.31, "grad_norm": 0.4863072633743286, "learning_rate": 0.0005839808300759566, "loss": 1.9168, "step": 9422 }, { "epoch": 0.31, "grad_norm": 0.46723660826683044, "learning_rate": 0.0005839774601178763, "loss": 1.9292, "step": 9423 }, { "epoch": 0.31, "grad_norm": 0.47071969509124756, "learning_rate": 0.0005839740898150891, "loss": 1.9167, "step": 9424 }, { "epoch": 0.31, "grad_norm": 0.4774700701236725, "learning_rate": 0.0005839707191675992, "loss": 1.8747, "step": 9425 }, { "epoch": 0.31, "grad_norm": 0.47889024019241333, "learning_rate": 0.0005839673481754108, "loss": 1.9356, "step": 9426 }, { "epoch": 0.31, "grad_norm": 0.469374418258667, "learning_rate": 0.0005839639768385278, "loss": 1.7845, "step": 9427 }, { "epoch": 0.31, "grad_norm": 0.4563741683959961, "learning_rate": 0.0005839606051569544, "loss": 1.8202, "step": 9428 }, { "epoch": 0.31, "grad_norm": 0.48141446709632874, "learning_rate": 0.0005839572331306946, "loss": 1.8626, "step": 9429 }, { "epoch": 0.31, "grad_norm": 0.47323668003082275, "learning_rate": 0.0005839538607597527, "loss": 1.8822, "step": 9430 }, { "epoch": 0.31, "grad_norm": 0.4606645405292511, "learning_rate": 0.0005839504880441325, "loss": 1.893, "step": 9431 }, { "epoch": 0.31, "grad_norm": 0.4481814503669739, "learning_rate": 0.0005839471149838382, "loss": 1.8877, "step": 9432 }, { "epoch": 0.31, "grad_norm": 0.4604969918727875, "learning_rate": 0.0005839437415788741, "loss": 1.9023, "step": 9433 }, { "epoch": 0.31, "grad_norm": 0.4738272428512573, "learning_rate": 0.0005839403678292441, "loss": 1.9158, "step": 9434 }, { "epoch": 0.31, "grad_norm": 0.457439661026001, "learning_rate": 0.0005839369937349523, "loss": 2.0018, "step": 9435 }, { "epoch": 0.31, "grad_norm": 0.5354843139648438, "learning_rate": 0.0005839336192960028, "loss": 1.9359, "step": 9436 }, { "epoch": 0.31, "grad_norm": 0.4631045460700989, "learning_rate": 0.0005839302445123998, "loss": 1.9133, "step": 9437 }, { "epoch": 0.31, "grad_norm": 0.4605620205402374, "learning_rate": 0.0005839268693841473, "loss": 1.9116, "step": 9438 }, { "epoch": 0.31, "grad_norm": 0.46214303374290466, "learning_rate": 0.0005839234939112493, "loss": 1.871, "step": 9439 }, { "epoch": 0.31, "grad_norm": 0.450421541929245, "learning_rate": 0.0005839201180937101, "loss": 1.9, "step": 9440 }, { "epoch": 0.31, "grad_norm": 0.4620516300201416, "learning_rate": 0.0005839167419315338, "loss": 1.938, "step": 9441 }, { "epoch": 0.31, "grad_norm": 0.4482634365558624, "learning_rate": 0.0005839133654247243, "loss": 1.9052, "step": 9442 }, { "epoch": 0.31, "grad_norm": 0.4616415798664093, "learning_rate": 0.0005839099885732858, "loss": 1.9685, "step": 9443 }, { "epoch": 0.31, "grad_norm": 0.4725007712841034, "learning_rate": 0.0005839066113772224, "loss": 1.884, "step": 9444 }, { "epoch": 0.31, "grad_norm": 0.5208030343055725, "learning_rate": 0.0005839032338365382, "loss": 1.949, "step": 9445 }, { "epoch": 0.31, "grad_norm": 0.4480094313621521, "learning_rate": 0.0005838998559512374, "loss": 1.8719, "step": 9446 }, { "epoch": 0.31, "grad_norm": 0.4616871178150177, "learning_rate": 0.000583896477721324, "loss": 1.9304, "step": 9447 }, { "epoch": 0.31, "grad_norm": 0.4576655626296997, "learning_rate": 0.000583893099146802, "loss": 1.8772, "step": 9448 }, { "epoch": 0.31, "grad_norm": 0.46784842014312744, "learning_rate": 0.0005838897202276757, "loss": 1.9013, "step": 9449 }, { "epoch": 0.31, "grad_norm": 0.45202627778053284, "learning_rate": 0.0005838863409639491, "loss": 1.881, "step": 9450 }, { "epoch": 0.31, "grad_norm": 0.46708670258522034, "learning_rate": 0.0005838829613556263, "loss": 1.9532, "step": 9451 }, { "epoch": 0.31, "grad_norm": 0.46239012479782104, "learning_rate": 0.0005838795814027114, "loss": 1.9544, "step": 9452 }, { "epoch": 0.31, "grad_norm": 0.45259496569633484, "learning_rate": 0.0005838762011052084, "loss": 1.9264, "step": 9453 }, { "epoch": 0.31, "grad_norm": 0.4647716283798218, "learning_rate": 0.0005838728204631217, "loss": 1.979, "step": 9454 }, { "epoch": 0.31, "grad_norm": 0.4423605501651764, "learning_rate": 0.0005838694394764551, "loss": 1.8872, "step": 9455 }, { "epoch": 0.31, "grad_norm": 0.44342708587646484, "learning_rate": 0.000583866058145213, "loss": 1.9751, "step": 9456 }, { "epoch": 0.31, "grad_norm": 0.45759567618370056, "learning_rate": 0.0005838626764693991, "loss": 1.8851, "step": 9457 }, { "epoch": 0.31, "grad_norm": 0.4372331500053406, "learning_rate": 0.000583859294449018, "loss": 1.9113, "step": 9458 }, { "epoch": 0.31, "grad_norm": 0.4477693438529968, "learning_rate": 0.0005838559120840733, "loss": 1.9416, "step": 9459 }, { "epoch": 0.31, "grad_norm": 0.4491868317127228, "learning_rate": 0.0005838525293745695, "loss": 1.8913, "step": 9460 }, { "epoch": 0.31, "grad_norm": 0.4807681441307068, "learning_rate": 0.0005838491463205104, "loss": 2.0475, "step": 9461 }, { "epoch": 0.31, "grad_norm": 0.4369106888771057, "learning_rate": 0.0005838457629219004, "loss": 1.9228, "step": 9462 }, { "epoch": 0.31, "grad_norm": 0.460062712430954, "learning_rate": 0.0005838423791787433, "loss": 1.9599, "step": 9463 }, { "epoch": 0.31, "grad_norm": 0.458060622215271, "learning_rate": 0.0005838389950910436, "loss": 1.9228, "step": 9464 }, { "epoch": 0.31, "grad_norm": 0.4389631152153015, "learning_rate": 0.0005838356106588051, "loss": 1.967, "step": 9465 }, { "epoch": 0.31, "grad_norm": 0.4433906078338623, "learning_rate": 0.000583832225882032, "loss": 1.9633, "step": 9466 }, { "epoch": 0.31, "grad_norm": 0.44356122612953186, "learning_rate": 0.0005838288407607283, "loss": 1.8506, "step": 9467 }, { "epoch": 0.32, "grad_norm": 0.4514838457107544, "learning_rate": 0.0005838254552948982, "loss": 1.8418, "step": 9468 }, { "epoch": 0.32, "grad_norm": 0.4568997323513031, "learning_rate": 0.000583822069484546, "loss": 1.8958, "step": 9469 }, { "epoch": 0.32, "grad_norm": 0.4470226764678955, "learning_rate": 0.0005838186833296754, "loss": 1.8758, "step": 9470 }, { "epoch": 0.32, "grad_norm": 0.4443962872028351, "learning_rate": 0.0005838152968302908, "loss": 1.8602, "step": 9471 }, { "epoch": 0.32, "grad_norm": 0.46012023091316223, "learning_rate": 0.0005838119099863964, "loss": 1.8785, "step": 9472 }, { "epoch": 0.32, "grad_norm": 0.44383856654167175, "learning_rate": 0.000583808522797996, "loss": 1.9591, "step": 9473 }, { "epoch": 0.32, "grad_norm": 0.4524124264717102, "learning_rate": 0.000583805135265094, "loss": 1.9251, "step": 9474 }, { "epoch": 0.32, "grad_norm": 0.4453065097332001, "learning_rate": 0.0005838017473876943, "loss": 1.9325, "step": 9475 }, { "epoch": 0.32, "grad_norm": 0.46361249685287476, "learning_rate": 0.0005837983591658011, "loss": 1.9654, "step": 9476 }, { "epoch": 0.32, "grad_norm": 0.44281116127967834, "learning_rate": 0.0005837949705994185, "loss": 1.9212, "step": 9477 }, { "epoch": 0.32, "grad_norm": 0.5619158744812012, "learning_rate": 0.0005837915816885506, "loss": 1.9611, "step": 9478 }, { "epoch": 0.32, "grad_norm": 0.45044708251953125, "learning_rate": 0.0005837881924332016, "loss": 1.9203, "step": 9479 }, { "epoch": 0.32, "grad_norm": 0.43655791878700256, "learning_rate": 0.0005837848028333756, "loss": 1.8566, "step": 9480 }, { "epoch": 0.32, "grad_norm": 0.445681631565094, "learning_rate": 0.0005837814128890765, "loss": 1.9192, "step": 9481 }, { "epoch": 0.32, "grad_norm": 0.4600815773010254, "learning_rate": 0.0005837780226003086, "loss": 2.0021, "step": 9482 }, { "epoch": 0.32, "grad_norm": 0.46586674451828003, "learning_rate": 0.0005837746319670761, "loss": 1.9734, "step": 9483 }, { "epoch": 0.32, "grad_norm": 0.46088898181915283, "learning_rate": 0.0005837712409893829, "loss": 1.9527, "step": 9484 }, { "epoch": 0.32, "grad_norm": 0.45053842663764954, "learning_rate": 0.0005837678496672333, "loss": 1.9241, "step": 9485 }, { "epoch": 0.32, "grad_norm": 0.4535103738307953, "learning_rate": 0.0005837644580006313, "loss": 2.0063, "step": 9486 }, { "epoch": 0.32, "grad_norm": 0.4793204963207245, "learning_rate": 0.0005837610659895811, "loss": 1.8403, "step": 9487 }, { "epoch": 0.32, "grad_norm": 0.4513554275035858, "learning_rate": 0.0005837576736340867, "loss": 1.9, "step": 9488 }, { "epoch": 0.32, "grad_norm": 0.4556797742843628, "learning_rate": 0.0005837542809341525, "loss": 1.937, "step": 9489 }, { "epoch": 0.32, "grad_norm": 0.45402440428733826, "learning_rate": 0.0005837508878897822, "loss": 1.888, "step": 9490 }, { "epoch": 0.32, "grad_norm": 0.4548763036727905, "learning_rate": 0.0005837474945009802, "loss": 1.8813, "step": 9491 }, { "epoch": 0.32, "grad_norm": 0.466521292924881, "learning_rate": 0.0005837441007677506, "loss": 1.9683, "step": 9492 }, { "epoch": 0.32, "grad_norm": 0.479230135679245, "learning_rate": 0.0005837407066900974, "loss": 1.9918, "step": 9493 }, { "epoch": 0.32, "grad_norm": 0.46772095561027527, "learning_rate": 0.0005837373122680248, "loss": 1.9004, "step": 9494 }, { "epoch": 0.32, "grad_norm": 0.4428817927837372, "learning_rate": 0.0005837339175015369, "loss": 1.9467, "step": 9495 }, { "epoch": 0.32, "grad_norm": 0.45154786109924316, "learning_rate": 0.0005837305223906378, "loss": 1.9231, "step": 9496 }, { "epoch": 0.32, "grad_norm": 0.49562835693359375, "learning_rate": 0.0005837271269353317, "loss": 1.8941, "step": 9497 }, { "epoch": 0.32, "grad_norm": 0.4611656069755554, "learning_rate": 0.0005837237311356227, "loss": 2.0176, "step": 9498 }, { "epoch": 0.32, "grad_norm": 0.4541435241699219, "learning_rate": 0.0005837203349915149, "loss": 1.9029, "step": 9499 }, { "epoch": 0.32, "grad_norm": 0.463408499956131, "learning_rate": 0.0005837169385030124, "loss": 1.9244, "step": 9500 }, { "epoch": 0.32, "grad_norm": 0.4812949597835541, "learning_rate": 0.0005837135416701193, "loss": 1.9542, "step": 9501 }, { "epoch": 0.32, "grad_norm": 0.4406740665435791, "learning_rate": 0.0005837101444928398, "loss": 1.8461, "step": 9502 }, { "epoch": 0.32, "grad_norm": 0.4501236081123352, "learning_rate": 0.0005837067469711779, "loss": 1.8673, "step": 9503 }, { "epoch": 0.32, "grad_norm": 0.47047120332717896, "learning_rate": 0.0005837033491051379, "loss": 1.9194, "step": 9504 }, { "epoch": 0.32, "grad_norm": 0.4552229046821594, "learning_rate": 0.0005836999508947239, "loss": 1.9435, "step": 9505 }, { "epoch": 0.32, "grad_norm": 0.45811787247657776, "learning_rate": 0.0005836965523399398, "loss": 1.9698, "step": 9506 }, { "epoch": 0.32, "grad_norm": 0.4631909430027008, "learning_rate": 0.00058369315344079, "loss": 1.8656, "step": 9507 }, { "epoch": 0.32, "grad_norm": 0.47970837354660034, "learning_rate": 0.0005836897541972784, "loss": 1.8756, "step": 9508 }, { "epoch": 0.32, "grad_norm": 0.45514386892318726, "learning_rate": 0.0005836863546094094, "loss": 1.9717, "step": 9509 }, { "epoch": 0.32, "grad_norm": 0.4666458070278168, "learning_rate": 0.0005836829546771869, "loss": 1.8882, "step": 9510 }, { "epoch": 0.32, "grad_norm": 0.47163480520248413, "learning_rate": 0.0005836795544006151, "loss": 1.9433, "step": 9511 }, { "epoch": 0.32, "grad_norm": 0.47101595997810364, "learning_rate": 0.0005836761537796981, "loss": 1.8996, "step": 9512 }, { "epoch": 0.32, "grad_norm": 0.4986518621444702, "learning_rate": 0.0005836727528144399, "loss": 1.9349, "step": 9513 }, { "epoch": 0.32, "grad_norm": 0.4614099860191345, "learning_rate": 0.0005836693515048451, "loss": 1.9603, "step": 9514 }, { "epoch": 0.32, "grad_norm": 0.46789270639419556, "learning_rate": 0.0005836659498509171, "loss": 1.8995, "step": 9515 }, { "epoch": 0.32, "grad_norm": 0.48489201068878174, "learning_rate": 0.0005836625478526607, "loss": 1.8858, "step": 9516 }, { "epoch": 0.32, "grad_norm": 0.4539143145084381, "learning_rate": 0.0005836591455100798, "loss": 1.9206, "step": 9517 }, { "epoch": 0.32, "grad_norm": 0.4582814574241638, "learning_rate": 0.0005836557428231784, "loss": 1.9874, "step": 9518 }, { "epoch": 0.32, "grad_norm": 0.5090247988700867, "learning_rate": 0.0005836523397919608, "loss": 1.9138, "step": 9519 }, { "epoch": 0.32, "grad_norm": 0.4701465666294098, "learning_rate": 0.0005836489364164309, "loss": 1.9626, "step": 9520 }, { "epoch": 0.32, "grad_norm": 0.4495469331741333, "learning_rate": 0.000583645532696593, "loss": 1.8878, "step": 9521 }, { "epoch": 0.32, "grad_norm": 0.44363340735435486, "learning_rate": 0.0005836421286324514, "loss": 1.8665, "step": 9522 }, { "epoch": 0.32, "grad_norm": 0.433781236410141, "learning_rate": 0.0005836387242240099, "loss": 1.9396, "step": 9523 }, { "epoch": 0.32, "grad_norm": 0.460726797580719, "learning_rate": 0.0005836353194712728, "loss": 1.9447, "step": 9524 }, { "epoch": 0.32, "grad_norm": 0.44615638256073, "learning_rate": 0.0005836319143742443, "loss": 1.8527, "step": 9525 }, { "epoch": 0.32, "grad_norm": 0.4456349015235901, "learning_rate": 0.0005836285089329283, "loss": 1.8917, "step": 9526 }, { "epoch": 0.32, "grad_norm": 0.44775938987731934, "learning_rate": 0.0005836251031473292, "loss": 1.8725, "step": 9527 }, { "epoch": 0.32, "grad_norm": 0.4521576464176178, "learning_rate": 0.000583621697017451, "loss": 1.9707, "step": 9528 }, { "epoch": 0.32, "grad_norm": 0.4522603750228882, "learning_rate": 0.0005836182905432978, "loss": 1.9127, "step": 9529 }, { "epoch": 0.32, "grad_norm": 0.4499242603778839, "learning_rate": 0.0005836148837248738, "loss": 1.8558, "step": 9530 }, { "epoch": 0.32, "grad_norm": 0.4529997408390045, "learning_rate": 0.0005836114765621831, "loss": 1.9983, "step": 9531 }, { "epoch": 0.32, "grad_norm": 0.44608214497566223, "learning_rate": 0.0005836080690552299, "loss": 1.8497, "step": 9532 }, { "epoch": 0.32, "grad_norm": 0.4709364175796509, "learning_rate": 0.0005836046612040182, "loss": 1.8719, "step": 9533 }, { "epoch": 0.32, "grad_norm": 0.4359217584133148, "learning_rate": 0.0005836012530085524, "loss": 1.8569, "step": 9534 }, { "epoch": 0.32, "grad_norm": 0.45769011974334717, "learning_rate": 0.0005835978444688364, "loss": 1.8646, "step": 9535 }, { "epoch": 0.32, "grad_norm": 0.4636549651622772, "learning_rate": 0.0005835944355848743, "loss": 1.9174, "step": 9536 }, { "epoch": 0.32, "grad_norm": 0.456684410572052, "learning_rate": 0.0005835910263566704, "loss": 1.9134, "step": 9537 }, { "epoch": 0.32, "grad_norm": 0.4595373868942261, "learning_rate": 0.0005835876167842288, "loss": 1.8608, "step": 9538 }, { "epoch": 0.32, "grad_norm": 0.43976113200187683, "learning_rate": 0.0005835842068675536, "loss": 1.9745, "step": 9539 }, { "epoch": 0.32, "grad_norm": 0.44218510389328003, "learning_rate": 0.000583580796606649, "loss": 1.9268, "step": 9540 }, { "epoch": 0.32, "grad_norm": 0.45336592197418213, "learning_rate": 0.000583577386001519, "loss": 1.9629, "step": 9541 }, { "epoch": 0.32, "grad_norm": 0.45605286955833435, "learning_rate": 0.0005835739750521679, "loss": 1.902, "step": 9542 }, { "epoch": 0.32, "grad_norm": 0.44449472427368164, "learning_rate": 0.0005835705637585999, "loss": 1.9677, "step": 9543 }, { "epoch": 0.32, "grad_norm": 0.4585656523704529, "learning_rate": 0.0005835671521208188, "loss": 1.8933, "step": 9544 }, { "epoch": 0.32, "grad_norm": 0.4410889744758606, "learning_rate": 0.0005835637401388291, "loss": 1.9215, "step": 9545 }, { "epoch": 0.32, "grad_norm": 0.4625079035758972, "learning_rate": 0.0005835603278126347, "loss": 1.8914, "step": 9546 }, { "epoch": 0.32, "grad_norm": 0.45383521914482117, "learning_rate": 0.0005835569151422399, "loss": 1.9016, "step": 9547 }, { "epoch": 0.32, "grad_norm": 0.4647195339202881, "learning_rate": 0.0005835535021276488, "loss": 1.7786, "step": 9548 }, { "epoch": 0.32, "grad_norm": 0.45893242955207825, "learning_rate": 0.0005835500887688656, "loss": 1.9928, "step": 9549 }, { "epoch": 0.32, "grad_norm": 0.46298104524612427, "learning_rate": 0.0005835466750658941, "loss": 1.9414, "step": 9550 }, { "epoch": 0.32, "grad_norm": 0.4932490587234497, "learning_rate": 0.000583543261018739, "loss": 1.887, "step": 9551 }, { "epoch": 0.32, "grad_norm": 0.4651266634464264, "learning_rate": 0.000583539846627404, "loss": 1.8956, "step": 9552 }, { "epoch": 0.32, "grad_norm": 0.4459232687950134, "learning_rate": 0.0005835364318918935, "loss": 1.874, "step": 9553 }, { "epoch": 0.32, "grad_norm": 0.45268917083740234, "learning_rate": 0.0005835330168122116, "loss": 1.9577, "step": 9554 }, { "epoch": 0.32, "grad_norm": 0.44735226035118103, "learning_rate": 0.0005835296013883622, "loss": 1.7934, "step": 9555 }, { "epoch": 0.32, "grad_norm": 0.45949846506118774, "learning_rate": 0.0005835261856203497, "loss": 1.8419, "step": 9556 }, { "epoch": 0.32, "grad_norm": 0.43794989585876465, "learning_rate": 0.0005835227695081783, "loss": 1.8613, "step": 9557 }, { "epoch": 0.32, "grad_norm": 0.4268173277378082, "learning_rate": 0.000583519353051852, "loss": 1.8242, "step": 9558 }, { "epoch": 0.32, "grad_norm": 0.43903231620788574, "learning_rate": 0.000583515936251375, "loss": 1.9132, "step": 9559 }, { "epoch": 0.32, "grad_norm": 0.46026697754859924, "learning_rate": 0.0005835125191067513, "loss": 1.8842, "step": 9560 }, { "epoch": 0.32, "grad_norm": 0.44722631573677063, "learning_rate": 0.0005835091016179851, "loss": 1.8296, "step": 9561 }, { "epoch": 0.32, "grad_norm": 0.6451650857925415, "learning_rate": 0.0005835056837850809, "loss": 1.8833, "step": 9562 }, { "epoch": 0.32, "grad_norm": 0.44711947441101074, "learning_rate": 0.0005835022656080424, "loss": 1.9474, "step": 9563 }, { "epoch": 0.32, "grad_norm": 0.47340521216392517, "learning_rate": 0.000583498847086874, "loss": 1.8977, "step": 9564 }, { "epoch": 0.32, "grad_norm": 0.47294631600379944, "learning_rate": 0.0005834954282215797, "loss": 1.9238, "step": 9565 }, { "epoch": 0.32, "grad_norm": 0.44946929812431335, "learning_rate": 0.0005834920090121638, "loss": 1.8591, "step": 9566 }, { "epoch": 0.32, "grad_norm": 0.4441746771335602, "learning_rate": 0.0005834885894586303, "loss": 1.8467, "step": 9567 }, { "epoch": 0.32, "grad_norm": 0.43542686104774475, "learning_rate": 0.0005834851695609834, "loss": 1.8564, "step": 9568 }, { "epoch": 0.32, "grad_norm": 0.4621998369693756, "learning_rate": 0.0005834817493192274, "loss": 1.9911, "step": 9569 }, { "epoch": 0.32, "grad_norm": 0.4472021758556366, "learning_rate": 0.0005834783287333662, "loss": 1.9266, "step": 9570 }, { "epoch": 0.32, "grad_norm": 0.44488683342933655, "learning_rate": 0.0005834749078034042, "loss": 1.8557, "step": 9571 }, { "epoch": 0.32, "grad_norm": 0.4521177411079407, "learning_rate": 0.0005834714865293454, "loss": 1.8657, "step": 9572 }, { "epoch": 0.32, "grad_norm": 0.47026535868644714, "learning_rate": 0.0005834680649111939, "loss": 1.879, "step": 9573 }, { "epoch": 0.32, "grad_norm": 0.4755408763885498, "learning_rate": 0.0005834646429489539, "loss": 1.9302, "step": 9574 }, { "epoch": 0.32, "grad_norm": 0.44119516015052795, "learning_rate": 0.0005834612206426297, "loss": 1.8956, "step": 9575 }, { "epoch": 0.32, "grad_norm": 0.4786408543586731, "learning_rate": 0.0005834577979922254, "loss": 1.8851, "step": 9576 }, { "epoch": 0.32, "grad_norm": 0.48887842893600464, "learning_rate": 0.000583454374997745, "loss": 1.9436, "step": 9577 }, { "epoch": 0.32, "grad_norm": 0.4530099332332611, "learning_rate": 0.0005834509516591926, "loss": 1.8603, "step": 9578 }, { "epoch": 0.32, "grad_norm": 0.47827306389808655, "learning_rate": 0.0005834475279765728, "loss": 1.8392, "step": 9579 }, { "epoch": 0.32, "grad_norm": 0.4770766496658325, "learning_rate": 0.0005834441039498893, "loss": 1.8976, "step": 9580 }, { "epoch": 0.32, "grad_norm": 0.45322996377944946, "learning_rate": 0.0005834406795791465, "loss": 1.836, "step": 9581 }, { "epoch": 0.32, "grad_norm": 0.45415058732032776, "learning_rate": 0.0005834372548643484, "loss": 1.8984, "step": 9582 }, { "epoch": 0.32, "grad_norm": 0.4607141315937042, "learning_rate": 0.0005834338298054992, "loss": 2.0209, "step": 9583 }, { "epoch": 0.32, "grad_norm": 0.4613504111766815, "learning_rate": 0.0005834304044026032, "loss": 1.8957, "step": 9584 }, { "epoch": 0.32, "grad_norm": 0.45889198780059814, "learning_rate": 0.0005834269786556645, "loss": 1.9341, "step": 9585 }, { "epoch": 0.32, "grad_norm": 0.4742400646209717, "learning_rate": 0.0005834235525646871, "loss": 1.8224, "step": 9586 }, { "epoch": 0.32, "grad_norm": 0.44524869322776794, "learning_rate": 0.0005834201261296752, "loss": 1.9469, "step": 9587 }, { "epoch": 0.32, "grad_norm": 0.4451008141040802, "learning_rate": 0.0005834166993506332, "loss": 1.8612, "step": 9588 }, { "epoch": 0.32, "grad_norm": 0.44219717383384705, "learning_rate": 0.000583413272227565, "loss": 1.8647, "step": 9589 }, { "epoch": 0.32, "grad_norm": 0.44300639629364014, "learning_rate": 0.0005834098447604748, "loss": 1.9662, "step": 9590 }, { "epoch": 0.32, "grad_norm": 0.48000678420066833, "learning_rate": 0.0005834064169493668, "loss": 1.9024, "step": 9591 }, { "epoch": 0.32, "grad_norm": 0.4652169942855835, "learning_rate": 0.0005834029887942453, "loss": 1.8663, "step": 9592 }, { "epoch": 0.32, "grad_norm": 0.45834627747535706, "learning_rate": 0.0005833995602951142, "loss": 1.9267, "step": 9593 }, { "epoch": 0.32, "grad_norm": 0.45052364468574524, "learning_rate": 0.0005833961314519779, "loss": 1.8577, "step": 9594 }, { "epoch": 0.32, "grad_norm": 0.4536914825439453, "learning_rate": 0.0005833927022648404, "loss": 1.8476, "step": 9595 }, { "epoch": 0.32, "grad_norm": 0.4666554033756256, "learning_rate": 0.0005833892727337059, "loss": 1.8809, "step": 9596 }, { "epoch": 0.32, "grad_norm": 0.4521452486515045, "learning_rate": 0.0005833858428585786, "loss": 1.9627, "step": 9597 }, { "epoch": 0.32, "grad_norm": 0.43932411074638367, "learning_rate": 0.0005833824126394627, "loss": 1.814, "step": 9598 }, { "epoch": 0.32, "grad_norm": 0.43950316309928894, "learning_rate": 0.0005833789820763621, "loss": 1.828, "step": 9599 }, { "epoch": 0.32, "grad_norm": 0.4518667459487915, "learning_rate": 0.0005833755511692813, "loss": 1.954, "step": 9600 }, { "epoch": 0.32, "grad_norm": 0.4410916268825531, "learning_rate": 0.0005833721199182244, "loss": 1.8161, "step": 9601 }, { "epoch": 0.32, "grad_norm": 0.4579460918903351, "learning_rate": 0.0005833686883231954, "loss": 1.9237, "step": 9602 }, { "epoch": 0.32, "grad_norm": 0.45112019777297974, "learning_rate": 0.0005833652563841987, "loss": 1.8618, "step": 9603 }, { "epoch": 0.32, "grad_norm": 0.45526719093322754, "learning_rate": 0.0005833618241012382, "loss": 1.8371, "step": 9604 }, { "epoch": 0.32, "grad_norm": 0.4626651406288147, "learning_rate": 0.0005833583914743182, "loss": 1.9364, "step": 9605 }, { "epoch": 0.32, "grad_norm": 0.4463573396205902, "learning_rate": 0.0005833549585034429, "loss": 1.9332, "step": 9606 }, { "epoch": 0.32, "grad_norm": 0.465392529964447, "learning_rate": 0.0005833515251886164, "loss": 1.8934, "step": 9607 }, { "epoch": 0.32, "grad_norm": 0.4631027579307556, "learning_rate": 0.000583348091529843, "loss": 1.9089, "step": 9608 }, { "epoch": 0.32, "grad_norm": 0.4424969255924225, "learning_rate": 0.0005833446575271267, "loss": 1.9191, "step": 9609 }, { "epoch": 0.32, "grad_norm": 0.45977964997291565, "learning_rate": 0.0005833412231804717, "loss": 1.8982, "step": 9610 }, { "epoch": 0.32, "grad_norm": 0.4775158762931824, "learning_rate": 0.0005833377884898822, "loss": 1.9047, "step": 9611 }, { "epoch": 0.32, "grad_norm": 0.4497112035751343, "learning_rate": 0.0005833343534553624, "loss": 1.9689, "step": 9612 }, { "epoch": 0.32, "grad_norm": 0.44905126094818115, "learning_rate": 0.0005833309180769164, "loss": 1.8976, "step": 9613 }, { "epoch": 0.32, "grad_norm": 0.6534368395805359, "learning_rate": 0.0005833274823545486, "loss": 1.9703, "step": 9614 }, { "epoch": 0.32, "grad_norm": 0.4557802379131317, "learning_rate": 0.0005833240462882628, "loss": 1.9056, "step": 9615 }, { "epoch": 0.32, "grad_norm": 0.4639701843261719, "learning_rate": 0.0005833206098780633, "loss": 1.8933, "step": 9616 }, { "epoch": 0.32, "grad_norm": 0.44103309512138367, "learning_rate": 0.0005833171731239545, "loss": 1.7796, "step": 9617 }, { "epoch": 0.32, "grad_norm": 0.44299668073654175, "learning_rate": 0.0005833137360259403, "loss": 1.8716, "step": 9618 }, { "epoch": 0.32, "grad_norm": 0.46556708216667175, "learning_rate": 0.000583310298584025, "loss": 1.9016, "step": 9619 }, { "epoch": 0.32, "grad_norm": 0.47110995650291443, "learning_rate": 0.0005833068607982128, "loss": 1.9702, "step": 9620 }, { "epoch": 0.32, "grad_norm": 0.44093653559684753, "learning_rate": 0.0005833034226685077, "loss": 1.9138, "step": 9621 }, { "epoch": 0.32, "grad_norm": 0.4604836404323578, "learning_rate": 0.0005832999841949141, "loss": 1.925, "step": 9622 }, { "epoch": 0.32, "grad_norm": 0.441563218832016, "learning_rate": 0.0005832965453774359, "loss": 1.8866, "step": 9623 }, { "epoch": 0.32, "grad_norm": 0.4640088677406311, "learning_rate": 0.0005832931062160775, "loss": 1.9321, "step": 9624 }, { "epoch": 0.32, "grad_norm": 0.45751869678497314, "learning_rate": 0.0005832896667108431, "loss": 1.8293, "step": 9625 }, { "epoch": 0.32, "grad_norm": 0.4412175714969635, "learning_rate": 0.0005832862268617367, "loss": 1.9665, "step": 9626 }, { "epoch": 0.32, "grad_norm": 0.4501083493232727, "learning_rate": 0.0005832827866687626, "loss": 1.8946, "step": 9627 }, { "epoch": 0.32, "grad_norm": 0.4441874921321869, "learning_rate": 0.0005832793461319249, "loss": 1.9258, "step": 9628 }, { "epoch": 0.32, "grad_norm": 0.4582533538341522, "learning_rate": 0.0005832759052512278, "loss": 1.876, "step": 9629 }, { "epoch": 0.32, "grad_norm": 0.4453626275062561, "learning_rate": 0.0005832724640266755, "loss": 1.9088, "step": 9630 }, { "epoch": 0.32, "grad_norm": 0.4603404700756073, "learning_rate": 0.0005832690224582722, "loss": 1.8555, "step": 9631 }, { "epoch": 0.32, "grad_norm": 0.4512964189052582, "learning_rate": 0.000583265580546022, "loss": 1.974, "step": 9632 }, { "epoch": 0.32, "grad_norm": 0.4520200192928314, "learning_rate": 0.0005832621382899292, "loss": 1.9748, "step": 9633 }, { "epoch": 0.32, "grad_norm": 0.4562576115131378, "learning_rate": 0.0005832586956899978, "loss": 1.9919, "step": 9634 }, { "epoch": 0.32, "grad_norm": 0.467233270406723, "learning_rate": 0.0005832552527462322, "loss": 1.8955, "step": 9635 }, { "epoch": 0.32, "grad_norm": 0.4649116098880768, "learning_rate": 0.0005832518094586364, "loss": 1.9304, "step": 9636 }, { "epoch": 0.32, "grad_norm": 0.45895418524742126, "learning_rate": 0.0005832483658272146, "loss": 1.9196, "step": 9637 }, { "epoch": 0.32, "grad_norm": 0.43543577194213867, "learning_rate": 0.0005832449218519711, "loss": 1.8229, "step": 9638 }, { "epoch": 0.32, "grad_norm": 0.47067680954933167, "learning_rate": 0.00058324147753291, "loss": 1.9271, "step": 9639 }, { "epoch": 0.32, "grad_norm": 0.45367738604545593, "learning_rate": 0.0005832380328700354, "loss": 1.9012, "step": 9640 }, { "epoch": 0.32, "grad_norm": 0.4416239261627197, "learning_rate": 0.0005832345878633516, "loss": 1.8567, "step": 9641 }, { "epoch": 0.32, "grad_norm": 0.4474196434020996, "learning_rate": 0.0005832311425128628, "loss": 1.8966, "step": 9642 }, { "epoch": 0.32, "grad_norm": 0.4631859064102173, "learning_rate": 0.000583227696818573, "loss": 1.9304, "step": 9643 }, { "epoch": 0.32, "grad_norm": 0.4647332727909088, "learning_rate": 0.0005832242507804865, "loss": 1.9073, "step": 9644 }, { "epoch": 0.32, "grad_norm": 0.4380158483982086, "learning_rate": 0.0005832208043986076, "loss": 1.9613, "step": 9645 }, { "epoch": 0.32, "grad_norm": 0.455916166305542, "learning_rate": 0.0005832173576729403, "loss": 1.9796, "step": 9646 }, { "epoch": 0.32, "grad_norm": 0.4490136504173279, "learning_rate": 0.000583213910603489, "loss": 1.9084, "step": 9647 }, { "epoch": 0.32, "grad_norm": 0.46156805753707886, "learning_rate": 0.0005832104631902575, "loss": 1.9444, "step": 9648 }, { "epoch": 0.32, "grad_norm": 0.44334569573402405, "learning_rate": 0.0005832070154332504, "loss": 1.9203, "step": 9649 }, { "epoch": 0.32, "grad_norm": 0.4706632196903229, "learning_rate": 0.0005832035673324716, "loss": 1.9665, "step": 9650 }, { "epoch": 0.32, "grad_norm": 0.4439384341239929, "learning_rate": 0.0005832001188879255, "loss": 1.9836, "step": 9651 }, { "epoch": 0.32, "grad_norm": 0.4400418996810913, "learning_rate": 0.0005831966700996162, "loss": 1.9174, "step": 9652 }, { "epoch": 0.32, "grad_norm": 0.45767152309417725, "learning_rate": 0.0005831932209675478, "loss": 1.9157, "step": 9653 }, { "epoch": 0.32, "grad_norm": 0.4502940773963928, "learning_rate": 0.0005831897714917246, "loss": 1.8866, "step": 9654 }, { "epoch": 0.32, "grad_norm": 0.5925095677375793, "learning_rate": 0.0005831863216721507, "loss": 1.9945, "step": 9655 }, { "epoch": 0.32, "grad_norm": 0.437705397605896, "learning_rate": 0.0005831828715088304, "loss": 1.939, "step": 9656 }, { "epoch": 0.32, "grad_norm": 0.454782634973526, "learning_rate": 0.0005831794210017678, "loss": 1.9005, "step": 9657 }, { "epoch": 0.32, "grad_norm": 0.4595246911048889, "learning_rate": 0.0005831759701509671, "loss": 1.9205, "step": 9658 }, { "epoch": 0.32, "grad_norm": 0.4509185254573822, "learning_rate": 0.0005831725189564324, "loss": 1.8836, "step": 9659 }, { "epoch": 0.32, "grad_norm": 0.45248258113861084, "learning_rate": 0.0005831690674181681, "loss": 1.91, "step": 9660 }, { "epoch": 0.32, "grad_norm": 0.4491012990474701, "learning_rate": 0.0005831656155361782, "loss": 1.988, "step": 9661 }, { "epoch": 0.32, "grad_norm": 0.45929116010665894, "learning_rate": 0.000583162163310467, "loss": 1.8824, "step": 9662 }, { "epoch": 0.32, "grad_norm": 0.46811169385910034, "learning_rate": 0.0005831587107410386, "loss": 1.923, "step": 9663 }, { "epoch": 0.32, "grad_norm": 0.4453485608100891, "learning_rate": 0.0005831552578278973, "loss": 1.8951, "step": 9664 }, { "epoch": 0.32, "grad_norm": 0.45339441299438477, "learning_rate": 0.0005831518045710472, "loss": 1.9134, "step": 9665 }, { "epoch": 0.32, "grad_norm": 0.4494154751300812, "learning_rate": 0.0005831483509704927, "loss": 1.8987, "step": 9666 }, { "epoch": 0.32, "grad_norm": 0.45022904872894287, "learning_rate": 0.0005831448970262376, "loss": 1.856, "step": 9667 }, { "epoch": 0.32, "grad_norm": 0.4787893295288086, "learning_rate": 0.0005831414427382865, "loss": 1.8691, "step": 9668 }, { "epoch": 0.32, "grad_norm": 0.4556688070297241, "learning_rate": 0.0005831379881066433, "loss": 1.9368, "step": 9669 }, { "epoch": 0.32, "grad_norm": 0.45114630460739136, "learning_rate": 0.0005831345331313123, "loss": 1.8428, "step": 9670 }, { "epoch": 0.32, "grad_norm": 0.4349932372570038, "learning_rate": 0.0005831310778122977, "loss": 1.9152, "step": 9671 }, { "epoch": 0.32, "grad_norm": 0.4754572808742523, "learning_rate": 0.0005831276221496037, "loss": 1.8703, "step": 9672 }, { "epoch": 0.32, "grad_norm": 0.46884235739707947, "learning_rate": 0.0005831241661432344, "loss": 1.9642, "step": 9673 }, { "epoch": 0.32, "grad_norm": 0.4456155002117157, "learning_rate": 0.0005831207097931943, "loss": 1.9105, "step": 9674 }, { "epoch": 0.32, "grad_norm": 0.4316208064556122, "learning_rate": 0.0005831172530994872, "loss": 1.877, "step": 9675 }, { "epoch": 0.32, "grad_norm": 0.4640134274959564, "learning_rate": 0.0005831137960621175, "loss": 1.9426, "step": 9676 }, { "epoch": 0.32, "grad_norm": 0.466621458530426, "learning_rate": 0.0005831103386810893, "loss": 1.9896, "step": 9677 }, { "epoch": 0.32, "grad_norm": 0.4536707103252411, "learning_rate": 0.000583106880956407, "loss": 2.0387, "step": 9678 }, { "epoch": 0.32, "grad_norm": 0.4763137400150299, "learning_rate": 0.0005831034228880746, "loss": 1.8977, "step": 9679 }, { "epoch": 0.32, "grad_norm": 0.4457247853279114, "learning_rate": 0.0005830999644760964, "loss": 1.8842, "step": 9680 }, { "epoch": 0.32, "grad_norm": 0.45780491828918457, "learning_rate": 0.0005830965057204765, "loss": 1.8522, "step": 9681 }, { "epoch": 0.32, "grad_norm": 0.44123131036758423, "learning_rate": 0.0005830930466212192, "loss": 1.8087, "step": 9682 }, { "epoch": 0.32, "grad_norm": 0.45822611451148987, "learning_rate": 0.0005830895871783285, "loss": 1.9154, "step": 9683 }, { "epoch": 0.32, "grad_norm": 0.45033812522888184, "learning_rate": 0.0005830861273918089, "loss": 1.7996, "step": 9684 }, { "epoch": 0.32, "grad_norm": 0.4395005702972412, "learning_rate": 0.0005830826672616644, "loss": 1.8268, "step": 9685 }, { "epoch": 0.32, "grad_norm": 0.4440217912197113, "learning_rate": 0.0005830792067878993, "loss": 1.9823, "step": 9686 }, { "epoch": 0.32, "grad_norm": 0.4355262219905853, "learning_rate": 0.0005830757459705177, "loss": 1.8855, "step": 9687 }, { "epoch": 0.32, "grad_norm": 0.45104753971099854, "learning_rate": 0.000583072284809524, "loss": 1.8463, "step": 9688 }, { "epoch": 0.32, "grad_norm": 0.45354703068733215, "learning_rate": 0.000583068823304922, "loss": 1.909, "step": 9689 }, { "epoch": 0.32, "grad_norm": 0.4449564218521118, "learning_rate": 0.0005830653614567164, "loss": 1.9041, "step": 9690 }, { "epoch": 0.32, "grad_norm": 0.4567611813545227, "learning_rate": 0.0005830618992649111, "loss": 1.8876, "step": 9691 }, { "epoch": 0.32, "grad_norm": 0.45420634746551514, "learning_rate": 0.0005830584367295102, "loss": 1.8617, "step": 9692 }, { "epoch": 0.32, "grad_norm": 0.47661083936691284, "learning_rate": 0.0005830549738505182, "loss": 1.9696, "step": 9693 }, { "epoch": 0.32, "grad_norm": 0.46839243173599243, "learning_rate": 0.0005830515106279391, "loss": 1.9858, "step": 9694 }, { "epoch": 0.32, "grad_norm": 0.4444237947463989, "learning_rate": 0.0005830480470617773, "loss": 1.9589, "step": 9695 }, { "epoch": 0.32, "grad_norm": 0.45713701844215393, "learning_rate": 0.0005830445831520368, "loss": 1.8366, "step": 9696 }, { "epoch": 0.32, "grad_norm": 0.4818638563156128, "learning_rate": 0.0005830411188987218, "loss": 1.9203, "step": 9697 }, { "epoch": 0.32, "grad_norm": 0.46816039085388184, "learning_rate": 0.0005830376543018367, "loss": 1.9085, "step": 9698 }, { "epoch": 0.32, "grad_norm": 0.43496257066726685, "learning_rate": 0.0005830341893613856, "loss": 1.8556, "step": 9699 }, { "epoch": 0.32, "grad_norm": 0.4726208448410034, "learning_rate": 0.0005830307240773726, "loss": 1.9214, "step": 9700 }, { "epoch": 0.32, "grad_norm": 0.4614158570766449, "learning_rate": 0.000583027258449802, "loss": 1.8673, "step": 9701 }, { "epoch": 0.32, "grad_norm": 0.451090008020401, "learning_rate": 0.0005830237924786782, "loss": 1.8733, "step": 9702 }, { "epoch": 0.32, "grad_norm": 0.45843738317489624, "learning_rate": 0.0005830203261640049, "loss": 1.8873, "step": 9703 }, { "epoch": 0.32, "grad_norm": 0.487753689289093, "learning_rate": 0.0005830168595057869, "loss": 1.8587, "step": 9704 }, { "epoch": 0.32, "grad_norm": 0.44590234756469727, "learning_rate": 0.000583013392504028, "loss": 1.9816, "step": 9705 }, { "epoch": 0.32, "grad_norm": 0.4373418390750885, "learning_rate": 0.0005830099251587326, "loss": 1.8899, "step": 9706 }, { "epoch": 0.32, "grad_norm": 0.46076837182044983, "learning_rate": 0.0005830064574699048, "loss": 1.8642, "step": 9707 }, { "epoch": 0.32, "grad_norm": 0.4618085026741028, "learning_rate": 0.0005830029894375489, "loss": 1.9012, "step": 9708 }, { "epoch": 0.32, "grad_norm": 0.464979887008667, "learning_rate": 0.0005829995210616691, "loss": 1.9866, "step": 9709 }, { "epoch": 0.32, "grad_norm": 0.45212385058403015, "learning_rate": 0.0005829960523422695, "loss": 1.7852, "step": 9710 }, { "epoch": 0.32, "grad_norm": 0.4565717577934265, "learning_rate": 0.0005829925832793545, "loss": 1.8983, "step": 9711 }, { "epoch": 0.32, "grad_norm": 0.44320446252822876, "learning_rate": 0.000582989113872928, "loss": 1.8482, "step": 9712 }, { "epoch": 0.32, "grad_norm": 0.6296229362487793, "learning_rate": 0.0005829856441229946, "loss": 1.9289, "step": 9713 }, { "epoch": 0.32, "grad_norm": 0.4724270701408386, "learning_rate": 0.0005829821740295582, "loss": 1.898, "step": 9714 }, { "epoch": 0.32, "grad_norm": 0.4476870000362396, "learning_rate": 0.0005829787035926233, "loss": 1.8807, "step": 9715 }, { "epoch": 0.32, "grad_norm": 0.4497743248939514, "learning_rate": 0.0005829752328121939, "loss": 1.8669, "step": 9716 }, { "epoch": 0.32, "grad_norm": 0.4702706038951874, "learning_rate": 0.0005829717616882741, "loss": 1.8387, "step": 9717 }, { "epoch": 0.32, "grad_norm": 0.46136030554771423, "learning_rate": 0.0005829682902208684, "loss": 1.8673, "step": 9718 }, { "epoch": 0.32, "grad_norm": 0.44808289408683777, "learning_rate": 0.0005829648184099809, "loss": 1.9583, "step": 9719 }, { "epoch": 0.32, "grad_norm": 0.4514402151107788, "learning_rate": 0.0005829613462556158, "loss": 1.889, "step": 9720 }, { "epoch": 0.32, "grad_norm": 0.44530558586120605, "learning_rate": 0.0005829578737577772, "loss": 1.9639, "step": 9721 }, { "epoch": 0.32, "grad_norm": 0.4677586257457733, "learning_rate": 0.0005829544009164696, "loss": 1.8673, "step": 9722 }, { "epoch": 0.32, "grad_norm": 0.4831028878688812, "learning_rate": 0.000582950927731697, "loss": 1.9002, "step": 9723 }, { "epoch": 0.32, "grad_norm": 0.444620817899704, "learning_rate": 0.0005829474542034637, "loss": 1.9271, "step": 9724 }, { "epoch": 0.32, "grad_norm": 0.44902312755584717, "learning_rate": 0.0005829439803317737, "loss": 1.8206, "step": 9725 }, { "epoch": 0.32, "grad_norm": 0.45135557651519775, "learning_rate": 0.0005829405061166317, "loss": 1.9021, "step": 9726 }, { "epoch": 0.32, "grad_norm": 0.44730815291404724, "learning_rate": 0.0005829370315580413, "loss": 1.9865, "step": 9727 }, { "epoch": 0.32, "grad_norm": 0.4545219838619232, "learning_rate": 0.0005829335566560072, "loss": 1.95, "step": 9728 }, { "epoch": 0.32, "grad_norm": 0.4394533038139343, "learning_rate": 0.0005829300814105334, "loss": 1.8418, "step": 9729 }, { "epoch": 0.32, "grad_norm": 0.44288960099220276, "learning_rate": 0.0005829266058216242, "loss": 1.9363, "step": 9730 }, { "epoch": 0.32, "grad_norm": 0.4499901533126831, "learning_rate": 0.0005829231298892837, "loss": 1.9, "step": 9731 }, { "epoch": 0.32, "grad_norm": 0.4625444710254669, "learning_rate": 0.0005829196536135164, "loss": 1.9173, "step": 9732 }, { "epoch": 0.32, "grad_norm": 0.45803576707839966, "learning_rate": 0.0005829161769943262, "loss": 1.899, "step": 9733 }, { "epoch": 0.32, "grad_norm": 0.43959659337997437, "learning_rate": 0.0005829127000317175, "loss": 1.8538, "step": 9734 }, { "epoch": 0.32, "grad_norm": 0.44759124517440796, "learning_rate": 0.0005829092227256944, "loss": 1.8927, "step": 9735 }, { "epoch": 0.32, "grad_norm": 0.4701021611690521, "learning_rate": 0.0005829057450762612, "loss": 1.9561, "step": 9736 }, { "epoch": 0.32, "grad_norm": 0.4507106840610504, "learning_rate": 0.000582902267083422, "loss": 1.9125, "step": 9737 }, { "epoch": 0.32, "grad_norm": 0.4551997184753418, "learning_rate": 0.0005828987887471814, "loss": 1.9097, "step": 9738 }, { "epoch": 0.32, "grad_norm": 0.436883807182312, "learning_rate": 0.0005828953100675432, "loss": 1.9107, "step": 9739 }, { "epoch": 0.32, "grad_norm": 0.4461759328842163, "learning_rate": 0.0005828918310445117, "loss": 1.8952, "step": 9740 }, { "epoch": 0.32, "grad_norm": 0.4535723030567169, "learning_rate": 0.0005828883516780913, "loss": 1.9571, "step": 9741 }, { "epoch": 0.32, "grad_norm": 0.4562837481498718, "learning_rate": 0.0005828848719682861, "loss": 1.9129, "step": 9742 }, { "epoch": 0.32, "grad_norm": 0.43826112151145935, "learning_rate": 0.0005828813919151005, "loss": 1.8436, "step": 9743 }, { "epoch": 0.32, "grad_norm": 0.4513704180717468, "learning_rate": 0.0005828779115185384, "loss": 1.939, "step": 9744 }, { "epoch": 0.32, "grad_norm": 0.4640747010707855, "learning_rate": 0.0005828744307786042, "loss": 1.934, "step": 9745 }, { "epoch": 0.32, "grad_norm": 0.4494034945964813, "learning_rate": 0.0005828709496953021, "loss": 1.9549, "step": 9746 }, { "epoch": 0.32, "grad_norm": 0.44801944494247437, "learning_rate": 0.0005828674682686364, "loss": 1.9418, "step": 9747 }, { "epoch": 0.32, "grad_norm": 0.4622930884361267, "learning_rate": 0.0005828639864986112, "loss": 1.91, "step": 9748 }, { "epoch": 0.32, "grad_norm": 0.43414032459259033, "learning_rate": 0.0005828605043852309, "loss": 1.9005, "step": 9749 }, { "epoch": 0.32, "grad_norm": 0.46223339438438416, "learning_rate": 0.0005828570219284996, "loss": 1.95, "step": 9750 }, { "epoch": 0.32, "grad_norm": 0.4268327057361603, "learning_rate": 0.0005828535391284215, "loss": 1.8612, "step": 9751 }, { "epoch": 0.32, "grad_norm": 0.43742308020591736, "learning_rate": 0.0005828500559850009, "loss": 1.8796, "step": 9752 }, { "epoch": 0.32, "grad_norm": 0.4465166926383972, "learning_rate": 0.000582846572498242, "loss": 1.8846, "step": 9753 }, { "epoch": 0.32, "grad_norm": 0.43252986669540405, "learning_rate": 0.0005828430886681491, "loss": 1.8302, "step": 9754 }, { "epoch": 0.32, "grad_norm": 0.46250343322753906, "learning_rate": 0.0005828396044947263, "loss": 1.9727, "step": 9755 }, { "epoch": 0.32, "grad_norm": 0.45250025391578674, "learning_rate": 0.000582836119977978, "loss": 1.9044, "step": 9756 }, { "epoch": 0.32, "grad_norm": 0.4523429870605469, "learning_rate": 0.0005828326351179082, "loss": 1.8984, "step": 9757 }, { "epoch": 0.32, "grad_norm": 0.46156856417655945, "learning_rate": 0.0005828291499145213, "loss": 1.8534, "step": 9758 }, { "epoch": 0.32, "grad_norm": 0.4447779953479767, "learning_rate": 0.0005828256643678214, "loss": 1.9201, "step": 9759 }, { "epoch": 0.32, "grad_norm": 0.4502623379230499, "learning_rate": 0.000582822178477813, "loss": 1.9859, "step": 9760 }, { "epoch": 0.32, "grad_norm": 0.43887150287628174, "learning_rate": 0.0005828186922445, "loss": 1.9749, "step": 9761 }, { "epoch": 0.32, "grad_norm": 0.45082682371139526, "learning_rate": 0.0005828152056678868, "loss": 1.9198, "step": 9762 }, { "epoch": 0.32, "grad_norm": 0.45459988713264465, "learning_rate": 0.0005828117187479777, "loss": 1.9543, "step": 9763 }, { "epoch": 0.32, "grad_norm": 0.42761778831481934, "learning_rate": 0.0005828082314847767, "loss": 1.867, "step": 9764 }, { "epoch": 0.32, "grad_norm": 0.45770564675331116, "learning_rate": 0.0005828047438782883, "loss": 1.9277, "step": 9765 }, { "epoch": 0.32, "grad_norm": 0.4507327973842621, "learning_rate": 0.0005828012559285166, "loss": 1.9286, "step": 9766 }, { "epoch": 0.32, "grad_norm": 0.4355168640613556, "learning_rate": 0.0005827977676354659, "loss": 1.795, "step": 9767 }, { "epoch": 0.32, "grad_norm": 0.45034047961235046, "learning_rate": 0.0005827942789991402, "loss": 1.8868, "step": 9768 }, { "epoch": 0.33, "grad_norm": 0.45816507935523987, "learning_rate": 0.000582790790019544, "loss": 1.8793, "step": 9769 }, { "epoch": 0.33, "grad_norm": 0.4490277171134949, "learning_rate": 0.0005827873006966814, "loss": 1.9531, "step": 9770 }, { "epoch": 0.33, "grad_norm": 0.4806554615497589, "learning_rate": 0.0005827838110305568, "loss": 1.8593, "step": 9771 }, { "epoch": 0.33, "grad_norm": 0.45151904225349426, "learning_rate": 0.0005827803210211742, "loss": 1.9739, "step": 9772 }, { "epoch": 0.33, "grad_norm": 0.4498142898082733, "learning_rate": 0.000582776830668538, "loss": 1.8545, "step": 9773 }, { "epoch": 0.33, "grad_norm": 0.4490583837032318, "learning_rate": 0.0005827733399726525, "loss": 1.9118, "step": 9774 }, { "epoch": 0.33, "grad_norm": 0.47482267022132874, "learning_rate": 0.0005827698489335217, "loss": 1.9989, "step": 9775 }, { "epoch": 0.33, "grad_norm": 0.4435735046863556, "learning_rate": 0.00058276635755115, "loss": 1.8785, "step": 9776 }, { "epoch": 0.33, "grad_norm": 0.449511855840683, "learning_rate": 0.0005827628658255416, "loss": 1.9149, "step": 9777 }, { "epoch": 0.33, "grad_norm": 0.4488619565963745, "learning_rate": 0.0005827593737567008, "loss": 1.9363, "step": 9778 }, { "epoch": 0.33, "grad_norm": 0.448802649974823, "learning_rate": 0.0005827558813446316, "loss": 1.8887, "step": 9779 }, { "epoch": 0.33, "grad_norm": 0.6346835494041443, "learning_rate": 0.0005827523885893386, "loss": 1.9246, "step": 9780 }, { "epoch": 0.33, "grad_norm": 0.4897308945655823, "learning_rate": 0.0005827488954908258, "loss": 1.8961, "step": 9781 }, { "epoch": 0.33, "grad_norm": 0.45861315727233887, "learning_rate": 0.0005827454020490975, "loss": 1.9485, "step": 9782 }, { "epoch": 0.33, "grad_norm": 0.455569326877594, "learning_rate": 0.000582741908264158, "loss": 1.8549, "step": 9783 }, { "epoch": 0.33, "grad_norm": 0.4603511393070221, "learning_rate": 0.0005827384141360115, "loss": 1.8695, "step": 9784 }, { "epoch": 0.33, "grad_norm": 0.46030858159065247, "learning_rate": 0.0005827349196646621, "loss": 1.8692, "step": 9785 }, { "epoch": 0.33, "grad_norm": 0.4592655301094055, "learning_rate": 0.0005827314248501143, "loss": 1.9502, "step": 9786 }, { "epoch": 0.33, "grad_norm": 0.47533705830574036, "learning_rate": 0.0005827279296923721, "loss": 1.9997, "step": 9787 }, { "epoch": 0.33, "grad_norm": 0.4518984854221344, "learning_rate": 0.0005827244341914399, "loss": 1.9467, "step": 9788 }, { "epoch": 0.33, "grad_norm": 0.45908811688423157, "learning_rate": 0.0005827209383473219, "loss": 1.8985, "step": 9789 }, { "epoch": 0.33, "grad_norm": 0.44997498393058777, "learning_rate": 0.0005827174421600223, "loss": 1.8514, "step": 9790 }, { "epoch": 0.33, "grad_norm": 0.4751375615596771, "learning_rate": 0.0005827139456295454, "loss": 1.9458, "step": 9791 }, { "epoch": 0.33, "grad_norm": 0.4368918836116791, "learning_rate": 0.0005827104487558955, "loss": 1.9189, "step": 9792 }, { "epoch": 0.33, "grad_norm": 0.436833918094635, "learning_rate": 0.0005827069515390767, "loss": 1.8931, "step": 9793 }, { "epoch": 0.33, "grad_norm": 0.4512230455875397, "learning_rate": 0.0005827034539790933, "loss": 1.8363, "step": 9794 }, { "epoch": 0.33, "grad_norm": 0.4538169503211975, "learning_rate": 0.0005826999560759496, "loss": 1.9718, "step": 9795 }, { "epoch": 0.33, "grad_norm": 0.4713433086872101, "learning_rate": 0.0005826964578296498, "loss": 1.9113, "step": 9796 }, { "epoch": 0.33, "grad_norm": 0.47511017322540283, "learning_rate": 0.0005826929592401981, "loss": 1.8527, "step": 9797 }, { "epoch": 0.33, "grad_norm": 0.4450313150882721, "learning_rate": 0.0005826894603075988, "loss": 1.9745, "step": 9798 }, { "epoch": 0.33, "grad_norm": 0.4499187767505646, "learning_rate": 0.0005826859610318563, "loss": 1.9335, "step": 9799 }, { "epoch": 0.33, "grad_norm": 0.4568063020706177, "learning_rate": 0.0005826824614129746, "loss": 1.9237, "step": 9800 }, { "epoch": 0.33, "grad_norm": 0.4881899654865265, "learning_rate": 0.000582678961450958, "loss": 1.9061, "step": 9801 }, { "epoch": 0.33, "grad_norm": 0.4358535408973694, "learning_rate": 0.0005826754611458108, "loss": 1.972, "step": 9802 }, { "epoch": 0.33, "grad_norm": 0.4448097050189972, "learning_rate": 0.0005826719604975373, "loss": 1.8561, "step": 9803 }, { "epoch": 0.33, "grad_norm": 0.4487856328487396, "learning_rate": 0.0005826684595061416, "loss": 1.8769, "step": 9804 }, { "epoch": 0.33, "grad_norm": 0.46040213108062744, "learning_rate": 0.0005826649581716281, "loss": 1.9223, "step": 9805 }, { "epoch": 0.33, "grad_norm": 0.4488394856452942, "learning_rate": 0.000582661456494001, "loss": 1.8626, "step": 9806 }, { "epoch": 0.33, "grad_norm": 0.4681415855884552, "learning_rate": 0.0005826579544732645, "loss": 1.9676, "step": 9807 }, { "epoch": 0.33, "grad_norm": 0.4571634829044342, "learning_rate": 0.000582654452109423, "loss": 1.8756, "step": 9808 }, { "epoch": 0.33, "grad_norm": 0.45067450404167175, "learning_rate": 0.0005826509494024804, "loss": 1.8492, "step": 9809 }, { "epoch": 0.33, "grad_norm": 0.4504879117012024, "learning_rate": 0.0005826474463524415, "loss": 1.8604, "step": 9810 }, { "epoch": 0.33, "grad_norm": 0.46324989199638367, "learning_rate": 0.00058264394295931, "loss": 1.8331, "step": 9811 }, { "epoch": 0.33, "grad_norm": 0.4663392901420593, "learning_rate": 0.0005826404392230906, "loss": 1.9067, "step": 9812 }, { "epoch": 0.33, "grad_norm": 0.4611254930496216, "learning_rate": 0.0005826369351437872, "loss": 1.9152, "step": 9813 }, { "epoch": 0.33, "grad_norm": 0.4368380010128021, "learning_rate": 0.0005826334307214042, "loss": 1.8543, "step": 9814 }, { "epoch": 0.33, "grad_norm": 0.4868839681148529, "learning_rate": 0.000582629925955946, "loss": 1.8974, "step": 9815 }, { "epoch": 0.33, "grad_norm": 0.44611915946006775, "learning_rate": 0.0005826264208474165, "loss": 1.8834, "step": 9816 }, { "epoch": 0.33, "grad_norm": 0.45455458760261536, "learning_rate": 0.0005826229153958203, "loss": 1.9048, "step": 9817 }, { "epoch": 0.33, "grad_norm": 0.4335471987724304, "learning_rate": 0.0005826194096011616, "loss": 1.8478, "step": 9818 }, { "epoch": 0.33, "grad_norm": 0.46671465039253235, "learning_rate": 0.0005826159034634444, "loss": 1.9704, "step": 9819 }, { "epoch": 0.33, "grad_norm": 0.45898300409317017, "learning_rate": 0.0005826123969826732, "loss": 1.914, "step": 9820 }, { "epoch": 0.33, "grad_norm": 0.4519806504249573, "learning_rate": 0.0005826088901588523, "loss": 1.9346, "step": 9821 }, { "epoch": 0.33, "grad_norm": 0.44865328073501587, "learning_rate": 0.0005826053829919857, "loss": 1.9797, "step": 9822 }, { "epoch": 0.33, "grad_norm": 0.44095033407211304, "learning_rate": 0.0005826018754820779, "loss": 1.9653, "step": 9823 }, { "epoch": 0.33, "grad_norm": 0.44191813468933105, "learning_rate": 0.0005825983676291331, "loss": 1.9414, "step": 9824 }, { "epoch": 0.33, "grad_norm": 0.45963263511657715, "learning_rate": 0.0005825948594331554, "loss": 1.9425, "step": 9825 }, { "epoch": 0.33, "grad_norm": 0.4515535235404968, "learning_rate": 0.0005825913508941493, "loss": 1.9137, "step": 9826 }, { "epoch": 0.33, "grad_norm": 0.4235773980617523, "learning_rate": 0.0005825878420121189, "loss": 1.8457, "step": 9827 }, { "epoch": 0.33, "grad_norm": 0.46136239171028137, "learning_rate": 0.0005825843327870684, "loss": 1.8355, "step": 9828 }, { "epoch": 0.33, "grad_norm": 0.46356070041656494, "learning_rate": 0.0005825808232190023, "loss": 1.8575, "step": 9829 }, { "epoch": 0.33, "grad_norm": 0.6359319686889648, "learning_rate": 0.0005825773133079246, "loss": 1.947, "step": 9830 }, { "epoch": 0.33, "grad_norm": 0.4548555016517639, "learning_rate": 0.0005825738030538398, "loss": 1.8872, "step": 9831 }, { "epoch": 0.33, "grad_norm": 0.46699121594429016, "learning_rate": 0.0005825702924567519, "loss": 1.9033, "step": 9832 }, { "epoch": 0.33, "grad_norm": 0.45395979285240173, "learning_rate": 0.0005825667815166653, "loss": 1.9449, "step": 9833 }, { "epoch": 0.33, "grad_norm": 0.4539110064506531, "learning_rate": 0.0005825632702335843, "loss": 1.9483, "step": 9834 }, { "epoch": 0.33, "grad_norm": 0.4573669135570526, "learning_rate": 0.0005825597586075133, "loss": 1.9345, "step": 9835 }, { "epoch": 0.33, "grad_norm": 0.4605359435081482, "learning_rate": 0.0005825562466384562, "loss": 1.852, "step": 9836 }, { "epoch": 0.33, "grad_norm": 0.4753141701221466, "learning_rate": 0.0005825527343264175, "loss": 1.8719, "step": 9837 }, { "epoch": 0.33, "grad_norm": 0.44251611828804016, "learning_rate": 0.0005825492216714013, "loss": 1.8619, "step": 9838 }, { "epoch": 0.33, "grad_norm": 0.45880627632141113, "learning_rate": 0.0005825457086734121, "loss": 1.9506, "step": 9839 }, { "epoch": 0.33, "grad_norm": 0.4671888053417206, "learning_rate": 0.000582542195332454, "loss": 1.9464, "step": 9840 }, { "epoch": 0.33, "grad_norm": 0.481917142868042, "learning_rate": 0.0005825386816485313, "loss": 1.9938, "step": 9841 }, { "epoch": 0.33, "grad_norm": 0.49336546659469604, "learning_rate": 0.0005825351676216482, "loss": 1.9891, "step": 9842 }, { "epoch": 0.33, "grad_norm": 0.43598005175590515, "learning_rate": 0.0005825316532518092, "loss": 1.8951, "step": 9843 }, { "epoch": 0.33, "grad_norm": 0.4625697135925293, "learning_rate": 0.0005825281385390183, "loss": 1.9424, "step": 9844 }, { "epoch": 0.33, "grad_norm": 0.4427541494369507, "learning_rate": 0.0005825246234832799, "loss": 1.8638, "step": 9845 }, { "epoch": 0.33, "grad_norm": 0.4558562934398651, "learning_rate": 0.0005825211080845983, "loss": 1.992, "step": 9846 }, { "epoch": 0.33, "grad_norm": 0.44355830550193787, "learning_rate": 0.0005825175923429776, "loss": 1.9201, "step": 9847 }, { "epoch": 0.33, "grad_norm": 0.434931218624115, "learning_rate": 0.0005825140762584222, "loss": 1.9422, "step": 9848 }, { "epoch": 0.33, "grad_norm": 0.44912827014923096, "learning_rate": 0.0005825105598309363, "loss": 1.8691, "step": 9849 }, { "epoch": 0.33, "grad_norm": 0.4508875608444214, "learning_rate": 0.0005825070430605242, "loss": 1.9207, "step": 9850 }, { "epoch": 0.33, "grad_norm": 0.4688495397567749, "learning_rate": 0.0005825035259471903, "loss": 1.8484, "step": 9851 }, { "epoch": 0.33, "grad_norm": 0.4525758624076843, "learning_rate": 0.0005825000084909386, "loss": 1.9465, "step": 9852 }, { "epoch": 0.33, "grad_norm": 0.43547672033309937, "learning_rate": 0.0005824964906917736, "loss": 1.8843, "step": 9853 }, { "epoch": 0.33, "grad_norm": 0.4867079555988312, "learning_rate": 0.0005824929725496995, "loss": 1.8553, "step": 9854 }, { "epoch": 0.33, "grad_norm": 0.4555369019508362, "learning_rate": 0.0005824894540647206, "loss": 1.9139, "step": 9855 }, { "epoch": 0.33, "grad_norm": 0.4556601643562317, "learning_rate": 0.000582485935236841, "loss": 1.931, "step": 9856 }, { "epoch": 0.33, "grad_norm": 0.46987059712409973, "learning_rate": 0.0005824824160660651, "loss": 1.8969, "step": 9857 }, { "epoch": 0.33, "grad_norm": 0.4537400007247925, "learning_rate": 0.0005824788965523972, "loss": 1.9394, "step": 9858 }, { "epoch": 0.33, "grad_norm": 0.4675944149494171, "learning_rate": 0.0005824753766958416, "loss": 1.9343, "step": 9859 }, { "epoch": 0.33, "grad_norm": 0.4506225883960724, "learning_rate": 0.0005824718564964024, "loss": 1.9013, "step": 9860 }, { "epoch": 0.33, "grad_norm": 0.44019919633865356, "learning_rate": 0.000582468335954084, "loss": 1.8558, "step": 9861 }, { "epoch": 0.33, "grad_norm": 0.5176698565483093, "learning_rate": 0.0005824648150688908, "loss": 1.8742, "step": 9862 }, { "epoch": 0.33, "grad_norm": 0.45017358660697937, "learning_rate": 0.0005824612938408268, "loss": 1.8689, "step": 9863 }, { "epoch": 0.33, "grad_norm": 0.47571438550949097, "learning_rate": 0.0005824577722698965, "loss": 1.9498, "step": 9864 }, { "epoch": 0.33, "grad_norm": 0.47673743963241577, "learning_rate": 0.000582454250356104, "loss": 1.9207, "step": 9865 }, { "epoch": 0.33, "grad_norm": 0.45632362365722656, "learning_rate": 0.0005824507280994536, "loss": 1.8808, "step": 9866 }, { "epoch": 0.33, "grad_norm": 0.47753873467445374, "learning_rate": 0.0005824472054999498, "loss": 1.9136, "step": 9867 }, { "epoch": 0.33, "grad_norm": 0.47611045837402344, "learning_rate": 0.0005824436825575966, "loss": 1.8688, "step": 9868 }, { "epoch": 0.33, "grad_norm": 0.4523378908634186, "learning_rate": 0.0005824401592723985, "loss": 1.8669, "step": 9869 }, { "epoch": 0.33, "grad_norm": 0.4513145089149475, "learning_rate": 0.0005824366356443595, "loss": 1.9477, "step": 9870 }, { "epoch": 0.33, "grad_norm": 0.4658864140510559, "learning_rate": 0.0005824331116734842, "loss": 1.8796, "step": 9871 }, { "epoch": 0.33, "grad_norm": 0.48231226205825806, "learning_rate": 0.0005824295873597767, "loss": 1.9344, "step": 9872 }, { "epoch": 0.33, "grad_norm": 0.4584903419017792, "learning_rate": 0.0005824260627032412, "loss": 1.8464, "step": 9873 }, { "epoch": 0.33, "grad_norm": 0.44407349824905396, "learning_rate": 0.000582422537703882, "loss": 1.8998, "step": 9874 }, { "epoch": 0.33, "grad_norm": 0.5034991502761841, "learning_rate": 0.0005824190123617036, "loss": 1.9216, "step": 9875 }, { "epoch": 0.33, "grad_norm": 0.45521819591522217, "learning_rate": 0.0005824154866767101, "loss": 1.8032, "step": 9876 }, { "epoch": 0.33, "grad_norm": 0.447476863861084, "learning_rate": 0.0005824119606489058, "loss": 1.8935, "step": 9877 }, { "epoch": 0.33, "grad_norm": 0.446066677570343, "learning_rate": 0.000582408434278295, "loss": 1.9169, "step": 9878 }, { "epoch": 0.33, "grad_norm": 0.4455070197582245, "learning_rate": 0.000582404907564882, "loss": 1.9118, "step": 9879 }, { "epoch": 0.33, "grad_norm": 0.43572697043418884, "learning_rate": 0.000582401380508671, "loss": 1.8466, "step": 9880 }, { "epoch": 0.33, "grad_norm": 0.44915342330932617, "learning_rate": 0.0005823978531096664, "loss": 1.8884, "step": 9881 }, { "epoch": 0.33, "grad_norm": 0.4574548602104187, "learning_rate": 0.0005823943253678723, "loss": 1.8969, "step": 9882 }, { "epoch": 0.33, "grad_norm": 0.4517902433872223, "learning_rate": 0.0005823907972832931, "loss": 1.867, "step": 9883 }, { "epoch": 0.33, "grad_norm": 0.4495895206928253, "learning_rate": 0.0005823872688559331, "loss": 1.9326, "step": 9884 }, { "epoch": 0.33, "grad_norm": 0.462229460477829, "learning_rate": 0.0005823837400857967, "loss": 1.9321, "step": 9885 }, { "epoch": 0.33, "grad_norm": 0.4675584137439728, "learning_rate": 0.000582380210972888, "loss": 1.9263, "step": 9886 }, { "epoch": 0.33, "grad_norm": 0.4661250710487366, "learning_rate": 0.0005823766815172113, "loss": 1.9031, "step": 9887 }, { "epoch": 0.33, "grad_norm": 0.45610272884368896, "learning_rate": 0.0005823731517187709, "loss": 1.9376, "step": 9888 }, { "epoch": 0.33, "grad_norm": 0.4610869884490967, "learning_rate": 0.0005823696215775711, "loss": 1.8467, "step": 9889 }, { "epoch": 0.33, "grad_norm": 0.4292586147785187, "learning_rate": 0.0005823660910936162, "loss": 1.8842, "step": 9890 }, { "epoch": 0.33, "grad_norm": 0.4344119727611542, "learning_rate": 0.0005823625602669103, "loss": 1.8932, "step": 9891 }, { "epoch": 0.33, "grad_norm": 0.47834333777427673, "learning_rate": 0.000582359029097458, "loss": 1.9902, "step": 9892 }, { "epoch": 0.33, "grad_norm": 0.46431154012680054, "learning_rate": 0.0005823554975852635, "loss": 1.9385, "step": 9893 }, { "epoch": 0.33, "grad_norm": 0.44499820470809937, "learning_rate": 0.000582351965730331, "loss": 1.9212, "step": 9894 }, { "epoch": 0.33, "grad_norm": 0.4411293864250183, "learning_rate": 0.0005823484335326648, "loss": 1.9106, "step": 9895 }, { "epoch": 0.33, "grad_norm": 0.4646552801132202, "learning_rate": 0.0005823449009922692, "loss": 1.8551, "step": 9896 }, { "epoch": 0.33, "grad_norm": 0.44130071997642517, "learning_rate": 0.0005823413681091485, "loss": 1.9064, "step": 9897 }, { "epoch": 0.33, "grad_norm": 0.45406588912010193, "learning_rate": 0.0005823378348833071, "loss": 1.9966, "step": 9898 }, { "epoch": 0.33, "grad_norm": 0.4504064619541168, "learning_rate": 0.000582334301314749, "loss": 1.8873, "step": 9899 }, { "epoch": 0.33, "grad_norm": 0.474309504032135, "learning_rate": 0.0005823307674034787, "loss": 1.8839, "step": 9900 }, { "epoch": 0.33, "grad_norm": 0.4695979356765747, "learning_rate": 0.0005823272331495005, "loss": 1.9184, "step": 9901 }, { "epoch": 0.33, "grad_norm": 0.44941312074661255, "learning_rate": 0.0005823236985528184, "loss": 1.9528, "step": 9902 }, { "epoch": 0.33, "grad_norm": 0.4488051235675812, "learning_rate": 0.0005823201636134372, "loss": 1.9802, "step": 9903 }, { "epoch": 0.33, "grad_norm": 0.446823388338089, "learning_rate": 0.0005823166283313608, "loss": 1.8605, "step": 9904 }, { "epoch": 0.33, "grad_norm": 0.4528343379497528, "learning_rate": 0.0005823130927065937, "loss": 1.9133, "step": 9905 }, { "epoch": 0.33, "grad_norm": 0.467813640832901, "learning_rate": 0.00058230955673914, "loss": 1.9146, "step": 9906 }, { "epoch": 0.33, "grad_norm": 0.4693092405796051, "learning_rate": 0.0005823060204290042, "loss": 1.8329, "step": 9907 }, { "epoch": 0.33, "grad_norm": 0.4478989541530609, "learning_rate": 0.0005823024837761904, "loss": 1.8665, "step": 9908 }, { "epoch": 0.33, "grad_norm": 0.4628079831600189, "learning_rate": 0.0005822989467807031, "loss": 1.833, "step": 9909 }, { "epoch": 0.33, "grad_norm": 0.45225343108177185, "learning_rate": 0.0005822954094425463, "loss": 1.8013, "step": 9910 }, { "epoch": 0.33, "grad_norm": 0.4568714499473572, "learning_rate": 0.0005822918717617245, "loss": 1.869, "step": 9911 }, { "epoch": 0.33, "grad_norm": 0.48846036195755005, "learning_rate": 0.000582288333738242, "loss": 1.9447, "step": 9912 }, { "epoch": 0.33, "grad_norm": 0.4611682891845703, "learning_rate": 0.0005822847953721031, "loss": 1.977, "step": 9913 }, { "epoch": 0.33, "grad_norm": 0.4490867555141449, "learning_rate": 0.0005822812566633121, "loss": 1.8616, "step": 9914 }, { "epoch": 0.33, "grad_norm": 0.43630069494247437, "learning_rate": 0.0005822777176118731, "loss": 1.8637, "step": 9915 }, { "epoch": 0.33, "grad_norm": 0.4466363191604614, "learning_rate": 0.0005822741782177907, "loss": 1.8904, "step": 9916 }, { "epoch": 0.33, "grad_norm": 0.4560262858867645, "learning_rate": 0.0005822706384810689, "loss": 1.9537, "step": 9917 }, { "epoch": 0.33, "grad_norm": 0.45566368103027344, "learning_rate": 0.0005822670984017122, "loss": 1.8886, "step": 9918 }, { "epoch": 0.33, "grad_norm": 0.47647804021835327, "learning_rate": 0.0005822635579797248, "loss": 1.7965, "step": 9919 }, { "epoch": 0.33, "grad_norm": 0.4395459294319153, "learning_rate": 0.000582260017215111, "loss": 1.8698, "step": 9920 }, { "epoch": 0.33, "grad_norm": 0.4491654634475708, "learning_rate": 0.0005822564761078751, "loss": 1.9431, "step": 9921 }, { "epoch": 0.33, "grad_norm": 0.4685838222503662, "learning_rate": 0.0005822529346580216, "loss": 1.9339, "step": 9922 }, { "epoch": 0.33, "grad_norm": 0.45672667026519775, "learning_rate": 0.0005822493928655545, "loss": 1.9405, "step": 9923 }, { "epoch": 0.33, "grad_norm": 0.47872304916381836, "learning_rate": 0.0005822458507304783, "loss": 1.9109, "step": 9924 }, { "epoch": 0.33, "grad_norm": 0.45184212923049927, "learning_rate": 0.0005822423082527972, "loss": 1.9679, "step": 9925 }, { "epoch": 0.33, "grad_norm": 0.4515874683856964, "learning_rate": 0.0005822387654325154, "loss": 1.8897, "step": 9926 }, { "epoch": 0.33, "grad_norm": 0.4521515369415283, "learning_rate": 0.0005822352222696374, "loss": 1.9759, "step": 9927 }, { "epoch": 0.33, "grad_norm": 0.4570430815219879, "learning_rate": 0.0005822316787641675, "loss": 1.9019, "step": 9928 }, { "epoch": 0.33, "grad_norm": 0.4351256787776947, "learning_rate": 0.0005822281349161098, "loss": 1.8846, "step": 9929 }, { "epoch": 0.33, "grad_norm": 0.43318191170692444, "learning_rate": 0.0005822245907254689, "loss": 1.8091, "step": 9930 }, { "epoch": 0.33, "grad_norm": 0.4518197476863861, "learning_rate": 0.0005822210461922488, "loss": 1.9474, "step": 9931 }, { "epoch": 0.33, "grad_norm": 0.4469647705554962, "learning_rate": 0.000582217501316454, "loss": 1.9725, "step": 9932 }, { "epoch": 0.33, "grad_norm": 0.4691145420074463, "learning_rate": 0.0005822139560980887, "loss": 1.86, "step": 9933 }, { "epoch": 0.33, "grad_norm": 0.46734604239463806, "learning_rate": 0.0005822104105371572, "loss": 1.8828, "step": 9934 }, { "epoch": 0.33, "grad_norm": 0.4453149437904358, "learning_rate": 0.000582206864633664, "loss": 1.8483, "step": 9935 }, { "epoch": 0.33, "grad_norm": 0.4541454613208771, "learning_rate": 0.000582203318387613, "loss": 1.9707, "step": 9936 }, { "epoch": 0.33, "grad_norm": 0.46994441747665405, "learning_rate": 0.0005821997717990089, "loss": 1.9789, "step": 9937 }, { "epoch": 0.33, "grad_norm": 0.4712381660938263, "learning_rate": 0.0005821962248678559, "loss": 1.9552, "step": 9938 }, { "epoch": 0.33, "grad_norm": 0.4475926160812378, "learning_rate": 0.0005821926775941581, "loss": 1.8918, "step": 9939 }, { "epoch": 0.33, "grad_norm": 0.4800138771533966, "learning_rate": 0.0005821891299779201, "loss": 1.8838, "step": 9940 }, { "epoch": 0.33, "grad_norm": 0.4547518193721771, "learning_rate": 0.0005821855820191461, "loss": 1.9134, "step": 9941 }, { "epoch": 0.33, "grad_norm": 0.6001172661781311, "learning_rate": 0.0005821820337178404, "loss": 1.9477, "step": 9942 }, { "epoch": 0.33, "grad_norm": 0.48410817980766296, "learning_rate": 0.0005821784850740071, "loss": 1.8242, "step": 9943 }, { "epoch": 0.33, "grad_norm": 0.47693702578544617, "learning_rate": 0.0005821749360876508, "loss": 1.9381, "step": 9944 }, { "epoch": 0.33, "grad_norm": 0.45211711525917053, "learning_rate": 0.0005821713867587758, "loss": 1.9691, "step": 9945 }, { "epoch": 0.33, "grad_norm": 0.45227938890457153, "learning_rate": 0.0005821678370873862, "loss": 1.8606, "step": 9946 }, { "epoch": 0.33, "grad_norm": 0.4612169563770294, "learning_rate": 0.0005821642870734864, "loss": 1.831, "step": 9947 }, { "epoch": 0.33, "grad_norm": 0.46649685502052307, "learning_rate": 0.0005821607367170809, "loss": 1.9864, "step": 9948 }, { "epoch": 0.33, "grad_norm": 0.4654768705368042, "learning_rate": 0.0005821571860181737, "loss": 1.8966, "step": 9949 }, { "epoch": 0.33, "grad_norm": 0.4712947607040405, "learning_rate": 0.0005821536349767693, "loss": 1.8871, "step": 9950 }, { "epoch": 0.33, "grad_norm": 0.45642298460006714, "learning_rate": 0.0005821500835928719, "loss": 1.8967, "step": 9951 }, { "epoch": 0.33, "grad_norm": 0.5002059936523438, "learning_rate": 0.0005821465318664858, "loss": 1.8986, "step": 9952 }, { "epoch": 0.33, "grad_norm": 0.44775381684303284, "learning_rate": 0.0005821429797976156, "loss": 1.9015, "step": 9953 }, { "epoch": 0.33, "grad_norm": 0.47311437129974365, "learning_rate": 0.0005821394273862652, "loss": 1.942, "step": 9954 }, { "epoch": 0.33, "grad_norm": 0.4471118152141571, "learning_rate": 0.0005821358746324393, "loss": 1.8723, "step": 9955 }, { "epoch": 0.33, "grad_norm": 0.4557652771472931, "learning_rate": 0.0005821323215361419, "loss": 1.9392, "step": 9956 }, { "epoch": 0.33, "grad_norm": 0.4491640627384186, "learning_rate": 0.0005821287680973774, "loss": 1.8883, "step": 9957 }, { "epoch": 0.33, "grad_norm": 0.4728121757507324, "learning_rate": 0.0005821252143161503, "loss": 1.8775, "step": 9958 }, { "epoch": 0.33, "grad_norm": 0.4480838179588318, "learning_rate": 0.0005821216601924645, "loss": 1.867, "step": 9959 }, { "epoch": 0.33, "grad_norm": 0.45837071537971497, "learning_rate": 0.0005821181057263248, "loss": 1.9064, "step": 9960 }, { "epoch": 0.33, "grad_norm": 0.4545062780380249, "learning_rate": 0.0005821145509177352, "loss": 1.8868, "step": 9961 }, { "epoch": 0.33, "grad_norm": 0.4960069954395294, "learning_rate": 0.0005821109957667, "loss": 1.9587, "step": 9962 }, { "epoch": 0.33, "grad_norm": 0.447314590215683, "learning_rate": 0.0005821074402732238, "loss": 1.9831, "step": 9963 }, { "epoch": 0.33, "grad_norm": 0.46499326825141907, "learning_rate": 0.0005821038844373107, "loss": 1.931, "step": 9964 }, { "epoch": 0.33, "grad_norm": 0.46970218420028687, "learning_rate": 0.000582100328258965, "loss": 1.868, "step": 9965 }, { "epoch": 0.33, "grad_norm": 0.4662726819515228, "learning_rate": 0.0005820967717381911, "loss": 1.8943, "step": 9966 }, { "epoch": 0.33, "grad_norm": 0.4491031765937805, "learning_rate": 0.0005820932148749932, "loss": 1.9031, "step": 9967 }, { "epoch": 0.33, "grad_norm": 0.4518706202507019, "learning_rate": 0.0005820896576693757, "loss": 1.8838, "step": 9968 }, { "epoch": 0.33, "grad_norm": 0.477322518825531, "learning_rate": 0.0005820861001213431, "loss": 1.87, "step": 9969 }, { "epoch": 0.33, "grad_norm": 0.44741910696029663, "learning_rate": 0.0005820825422308994, "loss": 1.8158, "step": 9970 }, { "epoch": 0.33, "grad_norm": 0.4674871861934662, "learning_rate": 0.000582078983998049, "loss": 1.8531, "step": 9971 }, { "epoch": 0.33, "grad_norm": 0.4652385115623474, "learning_rate": 0.0005820754254227964, "loss": 1.9361, "step": 9972 }, { "epoch": 0.33, "grad_norm": 0.47195160388946533, "learning_rate": 0.0005820718665051457, "loss": 2.0148, "step": 9973 }, { "epoch": 0.33, "grad_norm": 0.4560571610927582, "learning_rate": 0.0005820683072451015, "loss": 1.9385, "step": 9974 }, { "epoch": 0.33, "grad_norm": 0.4330275058746338, "learning_rate": 0.0005820647476426677, "loss": 1.8922, "step": 9975 }, { "epoch": 0.33, "grad_norm": 0.4322299063205719, "learning_rate": 0.0005820611876978489, "loss": 1.848, "step": 9976 }, { "epoch": 0.33, "grad_norm": 0.4403972327709198, "learning_rate": 0.0005820576274106494, "loss": 1.8754, "step": 9977 }, { "epoch": 0.33, "grad_norm": 0.4391494393348694, "learning_rate": 0.0005820540667810737, "loss": 1.8444, "step": 9978 }, { "epoch": 0.33, "grad_norm": 0.44736069440841675, "learning_rate": 0.0005820505058091257, "loss": 1.8879, "step": 9979 }, { "epoch": 0.33, "grad_norm": 0.4504871070384979, "learning_rate": 0.00058204694449481, "loss": 1.8759, "step": 9980 }, { "epoch": 0.33, "grad_norm": 0.4537411630153656, "learning_rate": 0.0005820433828381309, "loss": 1.8565, "step": 9981 }, { "epoch": 0.33, "grad_norm": 0.454662024974823, "learning_rate": 0.0005820398208390926, "loss": 1.9438, "step": 9982 }, { "epoch": 0.33, "grad_norm": 0.44206568598747253, "learning_rate": 0.0005820362584976997, "loss": 1.9115, "step": 9983 }, { "epoch": 0.33, "grad_norm": 0.45279499888420105, "learning_rate": 0.0005820326958139562, "loss": 1.9563, "step": 9984 }, { "epoch": 0.33, "grad_norm": 0.44774726033210754, "learning_rate": 0.0005820291327878665, "loss": 1.9227, "step": 9985 }, { "epoch": 0.33, "grad_norm": 0.45054662227630615, "learning_rate": 0.000582025569419435, "loss": 1.8683, "step": 9986 }, { "epoch": 0.33, "grad_norm": 0.45967617630958557, "learning_rate": 0.0005820220057086661, "loss": 1.8858, "step": 9987 }, { "epoch": 0.33, "grad_norm": 0.4419698119163513, "learning_rate": 0.000582018441655564, "loss": 1.8816, "step": 9988 }, { "epoch": 0.33, "grad_norm": 0.4522465467453003, "learning_rate": 0.0005820148772601331, "loss": 1.859, "step": 9989 }, { "epoch": 0.33, "grad_norm": 0.47368383407592773, "learning_rate": 0.0005820113125223777, "loss": 1.8715, "step": 9990 }, { "epoch": 0.33, "grad_norm": 0.46646222472190857, "learning_rate": 0.000582007747442302, "loss": 1.9441, "step": 9991 }, { "epoch": 0.33, "grad_norm": 0.4591224193572998, "learning_rate": 0.0005820041820199105, "loss": 1.9405, "step": 9992 }, { "epoch": 0.33, "grad_norm": 0.43970027565956116, "learning_rate": 0.0005820006162552076, "loss": 1.9209, "step": 9993 }, { "epoch": 0.33, "grad_norm": 0.4460254907608032, "learning_rate": 0.0005819970501481974, "loss": 1.963, "step": 9994 }, { "epoch": 0.33, "grad_norm": 0.4561026394367218, "learning_rate": 0.0005819934836988844, "loss": 1.9052, "step": 9995 }, { "epoch": 0.33, "grad_norm": 0.44871386885643005, "learning_rate": 0.0005819899169072728, "loss": 1.8962, "step": 9996 }, { "epoch": 0.33, "grad_norm": 0.4564613699913025, "learning_rate": 0.000581986349773367, "loss": 1.8963, "step": 9997 }, { "epoch": 0.33, "grad_norm": 0.4354461133480072, "learning_rate": 0.0005819827822971713, "loss": 1.8501, "step": 9998 }, { "epoch": 0.33, "grad_norm": 0.4538634419441223, "learning_rate": 0.0005819792144786901, "loss": 1.9592, "step": 9999 }, { "epoch": 0.33, "grad_norm": 0.45757004618644714, "learning_rate": 0.0005819756463179277, "loss": 1.8378, "step": 10000 }, { "epoch": 0.33, "grad_norm": 0.45339372754096985, "learning_rate": 0.0005819720778148883, "loss": 1.8499, "step": 10001 }, { "epoch": 0.33, "grad_norm": 0.442259281873703, "learning_rate": 0.0005819685089695763, "loss": 1.9493, "step": 10002 }, { "epoch": 0.33, "grad_norm": 0.47107067704200745, "learning_rate": 0.0005819649397819963, "loss": 1.9485, "step": 10003 }, { "epoch": 0.33, "grad_norm": 0.4585452675819397, "learning_rate": 0.0005819613702521523, "loss": 1.8972, "step": 10004 }, { "epoch": 0.33, "grad_norm": 0.4616461992263794, "learning_rate": 0.0005819578003800487, "loss": 1.9662, "step": 10005 }, { "epoch": 0.33, "grad_norm": 0.45700356364250183, "learning_rate": 0.0005819542301656899, "loss": 1.9137, "step": 10006 }, { "epoch": 0.33, "grad_norm": 0.466011106967926, "learning_rate": 0.0005819506596090802, "loss": 1.8964, "step": 10007 }, { "epoch": 0.33, "grad_norm": 0.486378937959671, "learning_rate": 0.0005819470887102239, "loss": 1.8954, "step": 10008 }, { "epoch": 0.33, "grad_norm": 0.44888219237327576, "learning_rate": 0.0005819435174691255, "loss": 1.8543, "step": 10009 }, { "epoch": 0.33, "grad_norm": 0.46870774030685425, "learning_rate": 0.0005819399458857891, "loss": 1.9273, "step": 10010 }, { "epoch": 0.33, "grad_norm": 0.4564020335674286, "learning_rate": 0.0005819363739602193, "loss": 1.8214, "step": 10011 }, { "epoch": 0.33, "grad_norm": 0.4418846070766449, "learning_rate": 0.0005819328016924201, "loss": 1.941, "step": 10012 }, { "epoch": 0.33, "grad_norm": 0.4638187289237976, "learning_rate": 0.0005819292290823961, "loss": 1.9619, "step": 10013 }, { "epoch": 0.33, "grad_norm": 0.4372834265232086, "learning_rate": 0.0005819256561301515, "loss": 1.8632, "step": 10014 }, { "epoch": 0.33, "grad_norm": 0.4478176534175873, "learning_rate": 0.0005819220828356908, "loss": 1.8728, "step": 10015 }, { "epoch": 0.33, "grad_norm": 0.4400782585144043, "learning_rate": 0.0005819185091990182, "loss": 1.8065, "step": 10016 }, { "epoch": 0.33, "grad_norm": 0.4355011582374573, "learning_rate": 0.000581914935220138, "loss": 1.837, "step": 10017 }, { "epoch": 0.33, "grad_norm": 0.44562894105911255, "learning_rate": 0.0005819113608990546, "loss": 1.8933, "step": 10018 }, { "epoch": 0.33, "grad_norm": 0.459598183631897, "learning_rate": 0.0005819077862357724, "loss": 1.9067, "step": 10019 }, { "epoch": 0.33, "grad_norm": 0.4377232789993286, "learning_rate": 0.0005819042112302958, "loss": 1.8377, "step": 10020 }, { "epoch": 0.33, "grad_norm": 0.4485138952732086, "learning_rate": 0.0005819006358826289, "loss": 1.8919, "step": 10021 }, { "epoch": 0.33, "grad_norm": 0.4639836847782135, "learning_rate": 0.0005818970601927762, "loss": 1.8575, "step": 10022 }, { "epoch": 0.33, "grad_norm": 0.4492226541042328, "learning_rate": 0.000581893484160742, "loss": 1.9542, "step": 10023 }, { "epoch": 0.33, "grad_norm": 0.4536761939525604, "learning_rate": 0.0005818899077865304, "loss": 2.0643, "step": 10024 }, { "epoch": 0.33, "grad_norm": 0.44108060002326965, "learning_rate": 0.0005818863310701463, "loss": 1.9507, "step": 10025 }, { "epoch": 0.33, "grad_norm": 0.45959797501564026, "learning_rate": 0.0005818827540115937, "loss": 1.9384, "step": 10026 }, { "epoch": 0.33, "grad_norm": 0.4609130024909973, "learning_rate": 0.0005818791766108768, "loss": 1.9807, "step": 10027 }, { "epoch": 0.33, "grad_norm": 0.45047199726104736, "learning_rate": 0.0005818755988680003, "loss": 1.9095, "step": 10028 }, { "epoch": 0.33, "grad_norm": 0.4490121006965637, "learning_rate": 0.0005818720207829683, "loss": 1.8771, "step": 10029 }, { "epoch": 0.33, "grad_norm": 0.45828187465667725, "learning_rate": 0.0005818684423557852, "loss": 1.8715, "step": 10030 }, { "epoch": 0.33, "grad_norm": 0.4811691343784332, "learning_rate": 0.0005818648635864552, "loss": 1.9973, "step": 10031 }, { "epoch": 0.33, "grad_norm": 0.4518969655036926, "learning_rate": 0.000581861284474983, "loss": 1.8818, "step": 10032 }, { "epoch": 0.33, "grad_norm": 0.43899795413017273, "learning_rate": 0.0005818577050213725, "loss": 1.9074, "step": 10033 }, { "epoch": 0.33, "grad_norm": 0.458217591047287, "learning_rate": 0.0005818541252256284, "loss": 1.9424, "step": 10034 }, { "epoch": 0.33, "grad_norm": 0.44805198907852173, "learning_rate": 0.0005818505450877551, "loss": 1.8702, "step": 10035 }, { "epoch": 0.33, "grad_norm": 0.4442024230957031, "learning_rate": 0.0005818469646077564, "loss": 1.8909, "step": 10036 }, { "epoch": 0.33, "grad_norm": 0.4564054310321808, "learning_rate": 0.0005818433837856373, "loss": 1.9218, "step": 10037 }, { "epoch": 0.33, "grad_norm": 0.4590730369091034, "learning_rate": 0.0005818398026214019, "loss": 1.891, "step": 10038 }, { "epoch": 0.33, "grad_norm": 0.43184903264045715, "learning_rate": 0.0005818362211150543, "loss": 1.8606, "step": 10039 }, { "epoch": 0.33, "grad_norm": 0.4388163387775421, "learning_rate": 0.0005818326392665991, "loss": 1.9037, "step": 10040 }, { "epoch": 0.33, "grad_norm": 0.44634196162223816, "learning_rate": 0.0005818290570760406, "loss": 1.8708, "step": 10041 }, { "epoch": 0.33, "grad_norm": 0.44989660382270813, "learning_rate": 0.0005818254745433832, "loss": 1.9415, "step": 10042 }, { "epoch": 0.33, "grad_norm": 0.4538361132144928, "learning_rate": 0.0005818218916686311, "loss": 1.9585, "step": 10043 }, { "epoch": 0.33, "grad_norm": 0.4284350574016571, "learning_rate": 0.000581818308451789, "loss": 1.8936, "step": 10044 }, { "epoch": 0.33, "grad_norm": 0.47194811701774597, "learning_rate": 0.0005818147248928607, "loss": 1.8607, "step": 10045 }, { "epoch": 0.33, "grad_norm": 0.4438270330429077, "learning_rate": 0.000581811140991851, "loss": 1.8972, "step": 10046 }, { "epoch": 0.33, "grad_norm": 0.45267656445503235, "learning_rate": 0.0005818075567487641, "loss": 1.9253, "step": 10047 }, { "epoch": 0.33, "grad_norm": 0.4428151845932007, "learning_rate": 0.0005818039721636043, "loss": 1.9544, "step": 10048 }, { "epoch": 0.33, "grad_norm": 0.44033730030059814, "learning_rate": 0.0005818003872363759, "loss": 1.9175, "step": 10049 }, { "epoch": 0.33, "grad_norm": 0.5192333459854126, "learning_rate": 0.0005817968019670837, "loss": 1.8958, "step": 10050 }, { "epoch": 0.33, "grad_norm": 0.46012961864471436, "learning_rate": 0.0005817932163557313, "loss": 1.8759, "step": 10051 }, { "epoch": 0.33, "grad_norm": 0.45393505692481995, "learning_rate": 0.0005817896304023236, "loss": 1.86, "step": 10052 }, { "epoch": 0.33, "grad_norm": 0.4551715552806854, "learning_rate": 0.0005817860441068649, "loss": 1.8655, "step": 10053 }, { "epoch": 0.33, "grad_norm": 0.47391456365585327, "learning_rate": 0.0005817824574693595, "loss": 1.938, "step": 10054 }, { "epoch": 0.33, "grad_norm": 0.43977221846580505, "learning_rate": 0.0005817788704898116, "loss": 1.889, "step": 10055 }, { "epoch": 0.33, "grad_norm": 0.46003448963165283, "learning_rate": 0.0005817752831682256, "loss": 1.8614, "step": 10056 }, { "epoch": 0.33, "grad_norm": 0.5224775075912476, "learning_rate": 0.0005817716955046061, "loss": 1.9841, "step": 10057 }, { "epoch": 0.33, "grad_norm": 0.46605920791625977, "learning_rate": 0.0005817681074989572, "loss": 1.9084, "step": 10058 }, { "epoch": 0.33, "grad_norm": 0.4446852207183838, "learning_rate": 0.0005817645191512833, "loss": 1.9639, "step": 10059 }, { "epoch": 0.33, "grad_norm": 0.515666127204895, "learning_rate": 0.0005817609304615888, "loss": 1.9479, "step": 10060 }, { "epoch": 0.33, "grad_norm": 0.47077977657318115, "learning_rate": 0.0005817573414298781, "loss": 1.7653, "step": 10061 }, { "epoch": 0.33, "grad_norm": 0.43214917182922363, "learning_rate": 0.0005817537520561554, "loss": 1.9529, "step": 10062 }, { "epoch": 0.33, "grad_norm": 0.46801283955574036, "learning_rate": 0.0005817501623404252, "loss": 1.9107, "step": 10063 }, { "epoch": 0.33, "grad_norm": 0.45616307854652405, "learning_rate": 0.0005817465722826918, "loss": 1.8207, "step": 10064 }, { "epoch": 0.33, "grad_norm": 0.46344584226608276, "learning_rate": 0.0005817429818829596, "loss": 1.8228, "step": 10065 }, { "epoch": 0.33, "grad_norm": 0.4873335659503937, "learning_rate": 0.000581739391141233, "loss": 1.8328, "step": 10066 }, { "epoch": 0.33, "grad_norm": 0.5033551454544067, "learning_rate": 0.0005817358000575162, "loss": 1.8299, "step": 10067 }, { "epoch": 0.33, "grad_norm": 0.4601900577545166, "learning_rate": 0.0005817322086318137, "loss": 1.8901, "step": 10068 }, { "epoch": 0.33, "grad_norm": 0.4572199285030365, "learning_rate": 0.0005817286168641298, "loss": 1.8583, "step": 10069 }, { "epoch": 0.34, "grad_norm": 0.5034294128417969, "learning_rate": 0.0005817250247544688, "loss": 1.9157, "step": 10070 }, { "epoch": 0.34, "grad_norm": 0.4458197057247162, "learning_rate": 0.0005817214323028352, "loss": 1.8894, "step": 10071 }, { "epoch": 0.34, "grad_norm": 0.44499754905700684, "learning_rate": 0.0005817178395092333, "loss": 1.8777, "step": 10072 }, { "epoch": 0.34, "grad_norm": 0.47027960419654846, "learning_rate": 0.0005817142463736676, "loss": 1.9205, "step": 10073 }, { "epoch": 0.34, "grad_norm": 0.47109928727149963, "learning_rate": 0.000581710652896142, "loss": 1.9531, "step": 10074 }, { "epoch": 0.34, "grad_norm": 0.47431501746177673, "learning_rate": 0.0005817070590766614, "loss": 1.8752, "step": 10075 }, { "epoch": 0.34, "grad_norm": 0.45153379440307617, "learning_rate": 0.0005817034649152298, "loss": 2.0187, "step": 10076 }, { "epoch": 0.34, "grad_norm": 0.494812548160553, "learning_rate": 0.0005816998704118517, "loss": 1.9182, "step": 10077 }, { "epoch": 0.34, "grad_norm": 0.46766847372055054, "learning_rate": 0.0005816962755665317, "loss": 1.8265, "step": 10078 }, { "epoch": 0.34, "grad_norm": 0.4379545748233795, "learning_rate": 0.0005816926803792737, "loss": 1.9505, "step": 10079 }, { "epoch": 0.34, "grad_norm": 0.4479653239250183, "learning_rate": 0.0005816890848500823, "loss": 1.9037, "step": 10080 }, { "epoch": 0.34, "grad_norm": 0.4732535481452942, "learning_rate": 0.000581685488978962, "loss": 1.9166, "step": 10081 }, { "epoch": 0.34, "grad_norm": 0.4755316376686096, "learning_rate": 0.0005816818927659169, "loss": 1.8817, "step": 10082 }, { "epoch": 0.34, "grad_norm": 0.4412815570831299, "learning_rate": 0.0005816782962109515, "loss": 1.8564, "step": 10083 }, { "epoch": 0.34, "grad_norm": 0.4357442259788513, "learning_rate": 0.0005816746993140702, "loss": 1.9025, "step": 10084 }, { "epoch": 0.34, "grad_norm": 0.46342262625694275, "learning_rate": 0.0005816711020752772, "loss": 1.9264, "step": 10085 }, { "epoch": 0.34, "grad_norm": 0.4430831968784332, "learning_rate": 0.0005816675044945771, "loss": 1.88, "step": 10086 }, { "epoch": 0.34, "grad_norm": 0.7079160213470459, "learning_rate": 0.0005816639065719741, "loss": 1.8922, "step": 10087 }, { "epoch": 0.34, "grad_norm": 0.5092609524726868, "learning_rate": 0.0005816603083074727, "loss": 1.8538, "step": 10088 }, { "epoch": 0.34, "grad_norm": 0.4612043797969818, "learning_rate": 0.000581656709701077, "loss": 1.9636, "step": 10089 }, { "epoch": 0.34, "grad_norm": 0.4452045261859894, "learning_rate": 0.0005816531107527917, "loss": 1.9616, "step": 10090 }, { "epoch": 0.34, "grad_norm": 0.47971221804618835, "learning_rate": 0.0005816495114626211, "loss": 1.9811, "step": 10091 }, { "epoch": 0.34, "grad_norm": 0.4572467505931854, "learning_rate": 0.0005816459118305694, "loss": 1.9409, "step": 10092 }, { "epoch": 0.34, "grad_norm": 0.4493407607078552, "learning_rate": 0.000581642311856641, "loss": 1.9802, "step": 10093 }, { "epoch": 0.34, "grad_norm": 0.5210016965866089, "learning_rate": 0.0005816387115408403, "loss": 1.8583, "step": 10094 }, { "epoch": 0.34, "grad_norm": 0.451966255903244, "learning_rate": 0.0005816351108831718, "loss": 1.8835, "step": 10095 }, { "epoch": 0.34, "grad_norm": 0.44362276792526245, "learning_rate": 0.0005816315098836399, "loss": 1.9465, "step": 10096 }, { "epoch": 0.34, "grad_norm": 0.45449358224868774, "learning_rate": 0.0005816279085422486, "loss": 1.8683, "step": 10097 }, { "epoch": 0.34, "grad_norm": 0.445045530796051, "learning_rate": 0.0005816243068590026, "loss": 1.926, "step": 10098 }, { "epoch": 0.34, "grad_norm": 0.4594564437866211, "learning_rate": 0.0005816207048339063, "loss": 1.8885, "step": 10099 }, { "epoch": 0.34, "grad_norm": 0.45286211371421814, "learning_rate": 0.0005816171024669638, "loss": 1.9599, "step": 10100 }, { "epoch": 0.34, "grad_norm": 0.4651879072189331, "learning_rate": 0.0005816134997581797, "loss": 1.9073, "step": 10101 }, { "epoch": 0.34, "grad_norm": 0.43735507130622864, "learning_rate": 0.0005816098967075583, "loss": 1.818, "step": 10102 }, { "epoch": 0.34, "grad_norm": 0.45115211606025696, "learning_rate": 0.000581606293315104, "loss": 1.8405, "step": 10103 }, { "epoch": 0.34, "grad_norm": 0.4461337625980377, "learning_rate": 0.0005816026895808211, "loss": 1.8823, "step": 10104 }, { "epoch": 0.34, "grad_norm": 0.4475882947444916, "learning_rate": 0.0005815990855047141, "loss": 1.9404, "step": 10105 }, { "epoch": 0.34, "grad_norm": 0.44740021228790283, "learning_rate": 0.0005815954810867872, "loss": 1.913, "step": 10106 }, { "epoch": 0.34, "grad_norm": 0.460762083530426, "learning_rate": 0.000581591876327045, "loss": 1.899, "step": 10107 }, { "epoch": 0.34, "grad_norm": 0.4700477123260498, "learning_rate": 0.0005815882712254917, "loss": 1.9298, "step": 10108 }, { "epoch": 0.34, "grad_norm": 0.4485539495944977, "learning_rate": 0.0005815846657821317, "loss": 1.9615, "step": 10109 }, { "epoch": 0.34, "grad_norm": 0.44968631863594055, "learning_rate": 0.0005815810599969694, "loss": 1.9123, "step": 10110 }, { "epoch": 0.34, "grad_norm": 0.44851717352867126, "learning_rate": 0.0005815774538700092, "loss": 1.9176, "step": 10111 }, { "epoch": 0.34, "grad_norm": 0.4438605308532715, "learning_rate": 0.0005815738474012554, "loss": 1.9487, "step": 10112 }, { "epoch": 0.34, "grad_norm": 0.43833717703819275, "learning_rate": 0.0005815702405907126, "loss": 1.8489, "step": 10113 }, { "epoch": 0.34, "grad_norm": 0.4476812779903412, "learning_rate": 0.0005815666334383849, "loss": 1.932, "step": 10114 }, { "epoch": 0.34, "grad_norm": 0.48106133937835693, "learning_rate": 0.0005815630259442768, "loss": 2.0115, "step": 10115 }, { "epoch": 0.34, "grad_norm": 0.45588672161102295, "learning_rate": 0.0005815594181083927, "loss": 1.931, "step": 10116 }, { "epoch": 0.34, "grad_norm": 0.4562884271144867, "learning_rate": 0.0005815558099307369, "loss": 1.9376, "step": 10117 }, { "epoch": 0.34, "grad_norm": 0.44750189781188965, "learning_rate": 0.000581552201411314, "loss": 1.9029, "step": 10118 }, { "epoch": 0.34, "grad_norm": 0.4365910589694977, "learning_rate": 0.000581548592550128, "loss": 1.9171, "step": 10119 }, { "epoch": 0.34, "grad_norm": 0.4438643455505371, "learning_rate": 0.0005815449833471835, "loss": 1.8713, "step": 10120 }, { "epoch": 0.34, "grad_norm": 0.4463149607181549, "learning_rate": 0.000581541373802485, "loss": 1.8895, "step": 10121 }, { "epoch": 0.34, "grad_norm": 0.4684307873249054, "learning_rate": 0.0005815377639160367, "loss": 1.8676, "step": 10122 }, { "epoch": 0.34, "grad_norm": 0.4618755877017975, "learning_rate": 0.000581534153687843, "loss": 1.9691, "step": 10123 }, { "epoch": 0.34, "grad_norm": 0.4361959397792816, "learning_rate": 0.0005815305431179084, "loss": 1.9478, "step": 10124 }, { "epoch": 0.34, "grad_norm": 0.4426918029785156, "learning_rate": 0.0005815269322062372, "loss": 1.7999, "step": 10125 }, { "epoch": 0.34, "grad_norm": 0.48657724261283875, "learning_rate": 0.0005815233209528338, "loss": 1.9045, "step": 10126 }, { "epoch": 0.34, "grad_norm": 0.47633910179138184, "learning_rate": 0.0005815197093577024, "loss": 1.9196, "step": 10127 }, { "epoch": 0.34, "grad_norm": 0.4850965142250061, "learning_rate": 0.0005815160974208478, "loss": 1.9103, "step": 10128 }, { "epoch": 0.34, "grad_norm": 0.45140913128852844, "learning_rate": 0.0005815124851422739, "loss": 1.887, "step": 10129 }, { "epoch": 0.34, "grad_norm": 0.4731918275356293, "learning_rate": 0.0005815088725219855, "loss": 1.8344, "step": 10130 }, { "epoch": 0.34, "grad_norm": 0.4641118347644806, "learning_rate": 0.0005815052595599867, "loss": 1.9276, "step": 10131 }, { "epoch": 0.34, "grad_norm": 0.4581310749053955, "learning_rate": 0.000581501646256282, "loss": 1.8692, "step": 10132 }, { "epoch": 0.34, "grad_norm": 0.4455200135707855, "learning_rate": 0.0005814980326108759, "loss": 1.9175, "step": 10133 }, { "epoch": 0.34, "grad_norm": 0.44821634888648987, "learning_rate": 0.0005814944186237726, "loss": 1.9452, "step": 10134 }, { "epoch": 0.34, "grad_norm": 0.4408460855484009, "learning_rate": 0.0005814908042949764, "loss": 1.9043, "step": 10135 }, { "epoch": 0.34, "grad_norm": 0.439605712890625, "learning_rate": 0.000581487189624492, "loss": 1.9382, "step": 10136 }, { "epoch": 0.34, "grad_norm": 0.43433821201324463, "learning_rate": 0.0005814835746123236, "loss": 1.9154, "step": 10137 }, { "epoch": 0.34, "grad_norm": 0.4553242027759552, "learning_rate": 0.0005814799592584756, "loss": 1.9024, "step": 10138 }, { "epoch": 0.34, "grad_norm": 0.4366094470024109, "learning_rate": 0.0005814763435629524, "loss": 1.8863, "step": 10139 }, { "epoch": 0.34, "grad_norm": 0.443826287984848, "learning_rate": 0.0005814727275257584, "loss": 1.8989, "step": 10140 }, { "epoch": 0.34, "grad_norm": 0.4653720557689667, "learning_rate": 0.000581469111146898, "loss": 1.8295, "step": 10141 }, { "epoch": 0.34, "grad_norm": 0.45463234186172485, "learning_rate": 0.0005814654944263756, "loss": 1.9365, "step": 10142 }, { "epoch": 0.34, "grad_norm": 0.467006117105484, "learning_rate": 0.0005814618773641954, "loss": 1.9887, "step": 10143 }, { "epoch": 0.34, "grad_norm": 0.4273982644081116, "learning_rate": 0.0005814582599603621, "loss": 1.8966, "step": 10144 }, { "epoch": 0.34, "grad_norm": 0.45018306374549866, "learning_rate": 0.0005814546422148799, "loss": 1.968, "step": 10145 }, { "epoch": 0.34, "grad_norm": 0.4601993262767792, "learning_rate": 0.0005814510241277534, "loss": 1.8792, "step": 10146 }, { "epoch": 0.34, "grad_norm": 0.4387359619140625, "learning_rate": 0.0005814474056989866, "loss": 1.9202, "step": 10147 }, { "epoch": 0.34, "grad_norm": 0.45381730794906616, "learning_rate": 0.0005814437869285842, "loss": 1.9757, "step": 10148 }, { "epoch": 0.34, "grad_norm": 0.46755748987197876, "learning_rate": 0.0005814401678165505, "loss": 1.8696, "step": 10149 }, { "epoch": 0.34, "grad_norm": 0.4472579061985016, "learning_rate": 0.0005814365483628899, "loss": 1.8645, "step": 10150 }, { "epoch": 0.34, "grad_norm": 0.46551328897476196, "learning_rate": 0.0005814329285676067, "loss": 1.8136, "step": 10151 }, { "epoch": 0.34, "grad_norm": 0.44756650924682617, "learning_rate": 0.0005814293084307055, "loss": 1.8418, "step": 10152 }, { "epoch": 0.34, "grad_norm": 0.45749136805534363, "learning_rate": 0.0005814256879521908, "loss": 1.9391, "step": 10153 }, { "epoch": 0.34, "grad_norm": 0.43946734070777893, "learning_rate": 0.0005814220671320665, "loss": 1.8696, "step": 10154 }, { "epoch": 0.34, "grad_norm": 0.45185914635658264, "learning_rate": 0.0005814184459703373, "loss": 1.9359, "step": 10155 }, { "epoch": 0.34, "grad_norm": 0.438222199678421, "learning_rate": 0.0005814148244670075, "loss": 1.9182, "step": 10156 }, { "epoch": 0.34, "grad_norm": 0.4398172199726105, "learning_rate": 0.0005814112026220818, "loss": 1.8801, "step": 10157 }, { "epoch": 0.34, "grad_norm": 0.4435931146144867, "learning_rate": 0.0005814075804355642, "loss": 1.8655, "step": 10158 }, { "epoch": 0.34, "grad_norm": 0.4552769958972931, "learning_rate": 0.0005814039579074594, "loss": 1.8697, "step": 10159 }, { "epoch": 0.34, "grad_norm": 0.44600915908813477, "learning_rate": 0.0005814003350377715, "loss": 1.8122, "step": 10160 }, { "epoch": 0.34, "grad_norm": 0.4533253014087677, "learning_rate": 0.0005813967118265052, "loss": 1.8574, "step": 10161 }, { "epoch": 0.34, "grad_norm": 0.44843626022338867, "learning_rate": 0.0005813930882736646, "loss": 1.9497, "step": 10162 }, { "epoch": 0.34, "grad_norm": 0.44474539160728455, "learning_rate": 0.0005813894643792543, "loss": 1.9308, "step": 10163 }, { "epoch": 0.34, "grad_norm": 0.4576430916786194, "learning_rate": 0.0005813858401432787, "loss": 1.9085, "step": 10164 }, { "epoch": 0.34, "grad_norm": 0.44902488589286804, "learning_rate": 0.0005813822155657421, "loss": 1.9717, "step": 10165 }, { "epoch": 0.34, "grad_norm": 0.4564112722873688, "learning_rate": 0.000581378590646649, "loss": 1.9219, "step": 10166 }, { "epoch": 0.34, "grad_norm": 0.42352133989334106, "learning_rate": 0.0005813749653860037, "loss": 1.8747, "step": 10167 }, { "epoch": 0.34, "grad_norm": 0.4471081495285034, "learning_rate": 0.0005813713397838108, "loss": 1.8506, "step": 10168 }, { "epoch": 0.34, "grad_norm": 0.4445807933807373, "learning_rate": 0.0005813677138400744, "loss": 1.8932, "step": 10169 }, { "epoch": 0.34, "grad_norm": 0.45520633459091187, "learning_rate": 0.000581364087554799, "loss": 1.9594, "step": 10170 }, { "epoch": 0.34, "grad_norm": 0.444214791059494, "learning_rate": 0.0005813604609279892, "loss": 1.8867, "step": 10171 }, { "epoch": 0.34, "grad_norm": 0.4425674378871918, "learning_rate": 0.0005813568339596492, "loss": 1.9024, "step": 10172 }, { "epoch": 0.34, "grad_norm": 0.464642733335495, "learning_rate": 0.0005813532066497834, "loss": 1.9961, "step": 10173 }, { "epoch": 0.34, "grad_norm": 0.4457239508628845, "learning_rate": 0.0005813495789983963, "loss": 1.8292, "step": 10174 }, { "epoch": 0.34, "grad_norm": 0.43826112151145935, "learning_rate": 0.0005813459510054923, "loss": 1.8677, "step": 10175 }, { "epoch": 0.34, "grad_norm": 0.4625500440597534, "learning_rate": 0.0005813423226710757, "loss": 1.8958, "step": 10176 }, { "epoch": 0.34, "grad_norm": 0.4526234269142151, "learning_rate": 0.0005813386939951511, "loss": 1.9231, "step": 10177 }, { "epoch": 0.34, "grad_norm": 0.5669804215431213, "learning_rate": 0.0005813350649777227, "loss": 1.9389, "step": 10178 }, { "epoch": 0.34, "grad_norm": 0.4667474329471588, "learning_rate": 0.000581331435618795, "loss": 1.9348, "step": 10179 }, { "epoch": 0.34, "grad_norm": 0.4322737455368042, "learning_rate": 0.0005813278059183724, "loss": 1.8926, "step": 10180 }, { "epoch": 0.34, "grad_norm": 0.437372624874115, "learning_rate": 0.0005813241758764593, "loss": 1.9222, "step": 10181 }, { "epoch": 0.34, "grad_norm": 0.45268934965133667, "learning_rate": 0.0005813205454930601, "loss": 1.955, "step": 10182 }, { "epoch": 0.34, "grad_norm": 0.44135862588882446, "learning_rate": 0.0005813169147681791, "loss": 1.9035, "step": 10183 }, { "epoch": 0.34, "grad_norm": 0.4560764729976654, "learning_rate": 0.0005813132837018209, "loss": 1.934, "step": 10184 }, { "epoch": 0.34, "grad_norm": 0.4477846026420593, "learning_rate": 0.0005813096522939898, "loss": 1.9761, "step": 10185 }, { "epoch": 0.34, "grad_norm": 0.45169273018836975, "learning_rate": 0.0005813060205446903, "loss": 1.9306, "step": 10186 }, { "epoch": 0.34, "grad_norm": 0.43350785970687866, "learning_rate": 0.0005813023884539267, "loss": 1.947, "step": 10187 }, { "epoch": 0.34, "grad_norm": 0.4723268151283264, "learning_rate": 0.0005812987560217034, "loss": 1.8574, "step": 10188 }, { "epoch": 0.34, "grad_norm": 0.45106571912765503, "learning_rate": 0.000581295123248025, "loss": 1.9454, "step": 10189 }, { "epoch": 0.34, "grad_norm": 0.43830010294914246, "learning_rate": 0.0005812914901328956, "loss": 1.8458, "step": 10190 }, { "epoch": 0.34, "grad_norm": 0.457549512386322, "learning_rate": 0.0005812878566763198, "loss": 1.939, "step": 10191 }, { "epoch": 0.34, "grad_norm": 0.4470074772834778, "learning_rate": 0.0005812842228783021, "loss": 1.7703, "step": 10192 }, { "epoch": 0.34, "grad_norm": 0.4861741065979004, "learning_rate": 0.0005812805887388467, "loss": 1.9463, "step": 10193 }, { "epoch": 0.34, "grad_norm": 0.4695436358451843, "learning_rate": 0.0005812769542579581, "loss": 1.7803, "step": 10194 }, { "epoch": 0.34, "grad_norm": 0.461584210395813, "learning_rate": 0.0005812733194356408, "loss": 1.9407, "step": 10195 }, { "epoch": 0.34, "grad_norm": 0.44807109236717224, "learning_rate": 0.000581269684271899, "loss": 1.8745, "step": 10196 }, { "epoch": 0.34, "grad_norm": 0.4453381597995758, "learning_rate": 0.0005812660487667375, "loss": 1.9332, "step": 10197 }, { "epoch": 0.34, "grad_norm": 0.4459324777126312, "learning_rate": 0.0005812624129201602, "loss": 1.8912, "step": 10198 }, { "epoch": 0.34, "grad_norm": 0.4508841335773468, "learning_rate": 0.0005812587767321719, "loss": 1.9144, "step": 10199 }, { "epoch": 0.34, "grad_norm": 0.42958715558052063, "learning_rate": 0.0005812551402027769, "loss": 1.8012, "step": 10200 }, { "epoch": 0.34, "grad_norm": 0.4494663178920746, "learning_rate": 0.0005812515033319795, "loss": 1.8616, "step": 10201 }, { "epoch": 0.34, "grad_norm": 0.442769855260849, "learning_rate": 0.0005812478661197843, "loss": 1.8402, "step": 10202 }, { "epoch": 0.34, "grad_norm": 0.44734472036361694, "learning_rate": 0.0005812442285661956, "loss": 1.8919, "step": 10203 }, { "epoch": 0.34, "grad_norm": 0.4483009874820709, "learning_rate": 0.0005812405906712179, "loss": 1.8861, "step": 10204 }, { "epoch": 0.34, "grad_norm": 0.4632641077041626, "learning_rate": 0.0005812369524348555, "loss": 1.8155, "step": 10205 }, { "epoch": 0.34, "grad_norm": 0.44437548518180847, "learning_rate": 0.0005812333138571129, "loss": 1.9286, "step": 10206 }, { "epoch": 0.34, "grad_norm": 0.6422631740570068, "learning_rate": 0.0005812296749379945, "loss": 1.8762, "step": 10207 }, { "epoch": 0.34, "grad_norm": 0.43670812249183655, "learning_rate": 0.0005812260356775048, "loss": 1.8586, "step": 10208 }, { "epoch": 0.34, "grad_norm": 0.4577082097530365, "learning_rate": 0.0005812223960756479, "loss": 1.9426, "step": 10209 }, { "epoch": 0.34, "grad_norm": 0.4651780426502228, "learning_rate": 0.0005812187561324286, "loss": 1.8622, "step": 10210 }, { "epoch": 0.34, "grad_norm": 0.44271761178970337, "learning_rate": 0.0005812151158478511, "loss": 1.8951, "step": 10211 }, { "epoch": 0.34, "grad_norm": 0.45672163367271423, "learning_rate": 0.00058121147522192, "loss": 1.9207, "step": 10212 }, { "epoch": 0.34, "grad_norm": 0.43800631165504456, "learning_rate": 0.0005812078342546395, "loss": 1.8562, "step": 10213 }, { "epoch": 0.34, "grad_norm": 0.46100056171417236, "learning_rate": 0.0005812041929460142, "loss": 1.8545, "step": 10214 }, { "epoch": 0.34, "grad_norm": 0.4452080726623535, "learning_rate": 0.0005812005512960483, "loss": 1.8676, "step": 10215 }, { "epoch": 0.34, "grad_norm": 0.4355587661266327, "learning_rate": 0.0005811969093047466, "loss": 1.8917, "step": 10216 }, { "epoch": 0.34, "grad_norm": 0.4430834650993347, "learning_rate": 0.0005811932669721131, "loss": 1.8634, "step": 10217 }, { "epoch": 0.34, "grad_norm": 0.45218244194984436, "learning_rate": 0.0005811896242981524, "loss": 1.8264, "step": 10218 }, { "epoch": 0.34, "grad_norm": 0.4579165577888489, "learning_rate": 0.000581185981282869, "loss": 1.9235, "step": 10219 }, { "epoch": 0.34, "grad_norm": 0.4558565020561218, "learning_rate": 0.0005811823379262672, "loss": 1.8741, "step": 10220 }, { "epoch": 0.34, "grad_norm": 0.47238749265670776, "learning_rate": 0.0005811786942283516, "loss": 1.8905, "step": 10221 }, { "epoch": 0.34, "grad_norm": 0.4459846615791321, "learning_rate": 0.0005811750501891264, "loss": 1.9171, "step": 10222 }, { "epoch": 0.34, "grad_norm": 0.4379828870296478, "learning_rate": 0.000581171405808596, "loss": 1.8951, "step": 10223 }, { "epoch": 0.34, "grad_norm": 0.44659873843193054, "learning_rate": 0.0005811677610867652, "loss": 1.8573, "step": 10224 }, { "epoch": 0.34, "grad_norm": 0.46214592456817627, "learning_rate": 0.000581164116023638, "loss": 1.8249, "step": 10225 }, { "epoch": 0.34, "grad_norm": 0.45701727271080017, "learning_rate": 0.000581160470619219, "loss": 1.8965, "step": 10226 }, { "epoch": 0.34, "grad_norm": 0.43752291798591614, "learning_rate": 0.0005811568248735126, "loss": 1.8834, "step": 10227 }, { "epoch": 0.34, "grad_norm": 0.45076796412467957, "learning_rate": 0.0005811531787865232, "loss": 1.7989, "step": 10228 }, { "epoch": 0.34, "grad_norm": 0.4490155577659607, "learning_rate": 0.0005811495323582553, "loss": 1.8644, "step": 10229 }, { "epoch": 0.34, "grad_norm": 0.47161880135536194, "learning_rate": 0.0005811458855887134, "loss": 1.9411, "step": 10230 }, { "epoch": 0.34, "grad_norm": 0.47467753291130066, "learning_rate": 0.0005811422384779017, "loss": 1.8785, "step": 10231 }, { "epoch": 0.34, "grad_norm": 0.46586039662361145, "learning_rate": 0.0005811385910258248, "loss": 1.9155, "step": 10232 }, { "epoch": 0.34, "grad_norm": 0.45451998710632324, "learning_rate": 0.0005811349432324872, "loss": 1.8752, "step": 10233 }, { "epoch": 0.34, "grad_norm": 0.4616340398788452, "learning_rate": 0.000581131295097893, "loss": 1.8972, "step": 10234 }, { "epoch": 0.34, "grad_norm": 0.4621526598930359, "learning_rate": 0.0005811276466220469, "loss": 1.8896, "step": 10235 }, { "epoch": 0.34, "grad_norm": 0.4539826810359955, "learning_rate": 0.0005811239978049533, "loss": 1.894, "step": 10236 }, { "epoch": 0.34, "grad_norm": 0.43916836380958557, "learning_rate": 0.0005811203486466165, "loss": 1.8503, "step": 10237 }, { "epoch": 0.34, "grad_norm": 0.4598793089389801, "learning_rate": 0.000581116699147041, "loss": 1.849, "step": 10238 }, { "epoch": 0.34, "grad_norm": 0.43029990792274475, "learning_rate": 0.0005811130493062314, "loss": 1.8542, "step": 10239 }, { "epoch": 0.34, "grad_norm": 0.4406251013278961, "learning_rate": 0.0005811093991241919, "loss": 1.8812, "step": 10240 }, { "epoch": 0.34, "grad_norm": 0.4404500424861908, "learning_rate": 0.0005811057486009271, "loss": 1.9191, "step": 10241 }, { "epoch": 0.34, "grad_norm": 0.4549616575241089, "learning_rate": 0.0005811020977364411, "loss": 1.9446, "step": 10242 }, { "epoch": 0.34, "grad_norm": 0.4659078121185303, "learning_rate": 0.0005810984465307388, "loss": 1.8642, "step": 10243 }, { "epoch": 0.34, "grad_norm": 0.4447058141231537, "learning_rate": 0.0005810947949838243, "loss": 1.8863, "step": 10244 }, { "epoch": 0.34, "grad_norm": 0.4507121741771698, "learning_rate": 0.0005810911430957022, "loss": 1.8257, "step": 10245 }, { "epoch": 0.34, "grad_norm": 0.4593120217323303, "learning_rate": 0.0005810874908663767, "loss": 1.8916, "step": 10246 }, { "epoch": 0.34, "grad_norm": 0.4642632305622101, "learning_rate": 0.0005810838382958527, "loss": 1.9405, "step": 10247 }, { "epoch": 0.34, "grad_norm": 0.44829061627388, "learning_rate": 0.000581080185384134, "loss": 1.882, "step": 10248 }, { "epoch": 0.34, "grad_norm": 0.5324304103851318, "learning_rate": 0.0005810765321312256, "loss": 1.7936, "step": 10249 }, { "epoch": 0.34, "grad_norm": 0.4681388735771179, "learning_rate": 0.0005810728785371317, "loss": 1.9278, "step": 10250 }, { "epoch": 0.34, "grad_norm": 0.48350533843040466, "learning_rate": 0.0005810692246018566, "loss": 1.8962, "step": 10251 }, { "epoch": 0.34, "grad_norm": 0.45251598954200745, "learning_rate": 0.0005810655703254049, "loss": 1.8234, "step": 10252 }, { "epoch": 0.34, "grad_norm": 0.44681429862976074, "learning_rate": 0.0005810619157077811, "loss": 1.9266, "step": 10253 }, { "epoch": 0.34, "grad_norm": 0.4439592957496643, "learning_rate": 0.0005810582607489895, "loss": 2.0284, "step": 10254 }, { "epoch": 0.34, "grad_norm": 0.45912057161331177, "learning_rate": 0.0005810546054490347, "loss": 1.8787, "step": 10255 }, { "epoch": 0.34, "grad_norm": 0.4524092376232147, "learning_rate": 0.0005810509498079208, "loss": 1.9279, "step": 10256 }, { "epoch": 0.34, "grad_norm": 0.4617561995983124, "learning_rate": 0.0005810472938256526, "loss": 1.9561, "step": 10257 }, { "epoch": 0.34, "grad_norm": 0.4516274333000183, "learning_rate": 0.0005810436375022344, "loss": 1.8554, "step": 10258 }, { "epoch": 0.34, "grad_norm": 0.4687780737876892, "learning_rate": 0.0005810399808376706, "loss": 1.9418, "step": 10259 }, { "epoch": 0.34, "grad_norm": 0.4397346079349518, "learning_rate": 0.0005810363238319656, "loss": 1.84, "step": 10260 }, { "epoch": 0.34, "grad_norm": 0.44744837284088135, "learning_rate": 0.000581032666485124, "loss": 1.8666, "step": 10261 }, { "epoch": 0.34, "grad_norm": 0.5116652250289917, "learning_rate": 0.0005810290087971501, "loss": 1.9141, "step": 10262 }, { "epoch": 0.34, "grad_norm": 0.4610491693019867, "learning_rate": 0.0005810253507680486, "loss": 1.942, "step": 10263 }, { "epoch": 0.34, "grad_norm": 0.43322300910949707, "learning_rate": 0.0005810216923978236, "loss": 1.8234, "step": 10264 }, { "epoch": 0.34, "grad_norm": 0.48434555530548096, "learning_rate": 0.0005810180336864795, "loss": 1.9346, "step": 10265 }, { "epoch": 0.34, "grad_norm": 0.48941725492477417, "learning_rate": 0.0005810143746340211, "loss": 1.9607, "step": 10266 }, { "epoch": 0.34, "grad_norm": 0.43626418709754944, "learning_rate": 0.0005810107152404526, "loss": 1.8863, "step": 10267 }, { "epoch": 0.34, "grad_norm": 0.46510931849479675, "learning_rate": 0.0005810070555057785, "loss": 1.9596, "step": 10268 }, { "epoch": 0.34, "grad_norm": 0.5026150345802307, "learning_rate": 0.0005810033954300032, "loss": 1.8945, "step": 10269 }, { "epoch": 0.34, "grad_norm": 0.45358437299728394, "learning_rate": 0.0005809997350131312, "loss": 1.8618, "step": 10270 }, { "epoch": 0.34, "grad_norm": 0.44990065693855286, "learning_rate": 0.000580996074255167, "loss": 1.9514, "step": 10271 }, { "epoch": 0.34, "grad_norm": 0.4799031913280487, "learning_rate": 0.0005809924131561149, "loss": 1.8855, "step": 10272 }, { "epoch": 0.34, "grad_norm": 0.4546235501766205, "learning_rate": 0.0005809887517159794, "loss": 1.8581, "step": 10273 }, { "epoch": 0.34, "grad_norm": 0.45903265476226807, "learning_rate": 0.0005809850899347649, "loss": 1.9222, "step": 10274 }, { "epoch": 0.34, "grad_norm": 0.46436333656311035, "learning_rate": 0.000580981427812476, "loss": 1.9061, "step": 10275 }, { "epoch": 0.34, "grad_norm": 0.44605353474617004, "learning_rate": 0.0005809777653491171, "loss": 1.9312, "step": 10276 }, { "epoch": 0.34, "grad_norm": 0.45090407133102417, "learning_rate": 0.0005809741025446925, "loss": 1.8998, "step": 10277 }, { "epoch": 0.34, "grad_norm": 0.4550614655017853, "learning_rate": 0.0005809704393992067, "loss": 1.9855, "step": 10278 }, { "epoch": 0.34, "grad_norm": 0.4545705318450928, "learning_rate": 0.0005809667759126643, "loss": 1.9052, "step": 10279 }, { "epoch": 0.34, "grad_norm": 0.45314866304397583, "learning_rate": 0.0005809631120850695, "loss": 1.908, "step": 10280 }, { "epoch": 0.34, "grad_norm": 0.44094255566596985, "learning_rate": 0.0005809594479164271, "loss": 1.8543, "step": 10281 }, { "epoch": 0.34, "grad_norm": 0.4518532454967499, "learning_rate": 0.0005809557834067411, "loss": 1.9634, "step": 10282 }, { "epoch": 0.34, "grad_norm": 0.4466623365879059, "learning_rate": 0.0005809521185560164, "loss": 1.8979, "step": 10283 }, { "epoch": 0.34, "grad_norm": 0.46082329750061035, "learning_rate": 0.000580948453364257, "loss": 1.8893, "step": 10284 }, { "epoch": 0.34, "grad_norm": 0.8214651346206665, "learning_rate": 0.0005809447878314677, "loss": 1.923, "step": 10285 }, { "epoch": 0.34, "grad_norm": 0.4485127329826355, "learning_rate": 0.0005809411219576528, "loss": 1.9443, "step": 10286 }, { "epoch": 0.34, "grad_norm": 0.4965035915374756, "learning_rate": 0.0005809374557428168, "loss": 1.9211, "step": 10287 }, { "epoch": 0.34, "grad_norm": 0.4382873773574829, "learning_rate": 0.000580933789186964, "loss": 1.8442, "step": 10288 }, { "epoch": 0.34, "grad_norm": 0.4587189555168152, "learning_rate": 0.0005809301222900991, "loss": 1.8441, "step": 10289 }, { "epoch": 0.34, "grad_norm": 0.45125797390937805, "learning_rate": 0.0005809264550522263, "loss": 1.8084, "step": 10290 }, { "epoch": 0.34, "grad_norm": 0.45286065340042114, "learning_rate": 0.0005809227874733503, "loss": 1.8494, "step": 10291 }, { "epoch": 0.34, "grad_norm": 0.45961639285087585, "learning_rate": 0.0005809191195534754, "loss": 1.9174, "step": 10292 }, { "epoch": 0.34, "grad_norm": 0.4524768590927124, "learning_rate": 0.000580915451292606, "loss": 1.8946, "step": 10293 }, { "epoch": 0.34, "grad_norm": 0.4557914435863495, "learning_rate": 0.0005809117826907468, "loss": 1.9225, "step": 10294 }, { "epoch": 0.34, "grad_norm": 0.4318476915359497, "learning_rate": 0.0005809081137479018, "loss": 1.8322, "step": 10295 }, { "epoch": 0.34, "grad_norm": 0.4389505982398987, "learning_rate": 0.000580904444464076, "loss": 1.9244, "step": 10296 }, { "epoch": 0.34, "grad_norm": 0.4475744366645813, "learning_rate": 0.0005809007748392736, "loss": 1.9276, "step": 10297 }, { "epoch": 0.34, "grad_norm": 0.4515851140022278, "learning_rate": 0.0005808971048734989, "loss": 1.9016, "step": 10298 }, { "epoch": 0.34, "grad_norm": 0.5116488337516785, "learning_rate": 0.0005808934345667566, "loss": 1.9253, "step": 10299 }, { "epoch": 0.34, "grad_norm": 0.4442738890647888, "learning_rate": 0.000580889763919051, "loss": 1.8514, "step": 10300 }, { "epoch": 0.34, "grad_norm": 0.4533365070819855, "learning_rate": 0.0005808860929303867, "loss": 1.9155, "step": 10301 }, { "epoch": 0.34, "grad_norm": 0.4463171064853668, "learning_rate": 0.000580882421600768, "loss": 1.8369, "step": 10302 }, { "epoch": 0.34, "grad_norm": 0.46039658784866333, "learning_rate": 0.0005808787499301994, "loss": 1.879, "step": 10303 }, { "epoch": 0.34, "grad_norm": 0.4448433518409729, "learning_rate": 0.0005808750779186854, "loss": 1.8449, "step": 10304 }, { "epoch": 0.34, "grad_norm": 0.43052780628204346, "learning_rate": 0.0005808714055662305, "loss": 1.8336, "step": 10305 }, { "epoch": 0.34, "grad_norm": 0.435634970664978, "learning_rate": 0.000580867732872839, "loss": 1.883, "step": 10306 }, { "epoch": 0.34, "grad_norm": 0.4575551152229309, "learning_rate": 0.0005808640598385156, "loss": 1.9683, "step": 10307 }, { "epoch": 0.34, "grad_norm": 0.4494682252407074, "learning_rate": 0.0005808603864632645, "loss": 1.8701, "step": 10308 }, { "epoch": 0.34, "grad_norm": 0.4621509909629822, "learning_rate": 0.0005808567127470903, "loss": 1.9304, "step": 10309 }, { "epoch": 0.34, "grad_norm": 0.4427582025527954, "learning_rate": 0.0005808530386899974, "loss": 1.9251, "step": 10310 }, { "epoch": 0.34, "grad_norm": 0.44726288318634033, "learning_rate": 0.0005808493642919903, "loss": 1.8296, "step": 10311 }, { "epoch": 0.34, "grad_norm": 0.43772706389427185, "learning_rate": 0.0005808456895530736, "loss": 1.8933, "step": 10312 }, { "epoch": 0.34, "grad_norm": 0.4463246464729309, "learning_rate": 0.0005808420144732514, "loss": 1.9511, "step": 10313 }, { "epoch": 0.34, "grad_norm": 0.44780731201171875, "learning_rate": 0.0005808383390525285, "loss": 1.8584, "step": 10314 }, { "epoch": 0.34, "grad_norm": 0.44726428389549255, "learning_rate": 0.0005808346632909092, "loss": 1.8654, "step": 10315 }, { "epoch": 0.34, "grad_norm": 0.4503406286239624, "learning_rate": 0.0005808309871883979, "loss": 1.9005, "step": 10316 }, { "epoch": 0.34, "grad_norm": 0.4523194432258606, "learning_rate": 0.0005808273107449993, "loss": 1.9415, "step": 10317 }, { "epoch": 0.34, "grad_norm": 0.4566049575805664, "learning_rate": 0.0005808236339607177, "loss": 1.9169, "step": 10318 }, { "epoch": 0.34, "grad_norm": 0.45655786991119385, "learning_rate": 0.0005808199568355576, "loss": 1.8947, "step": 10319 }, { "epoch": 0.34, "grad_norm": 0.4338790476322174, "learning_rate": 0.0005808162793695234, "loss": 1.9073, "step": 10320 }, { "epoch": 0.34, "grad_norm": 0.4630075991153717, "learning_rate": 0.0005808126015626196, "loss": 1.947, "step": 10321 }, { "epoch": 0.34, "grad_norm": 0.45904505252838135, "learning_rate": 0.0005808089234148508, "loss": 1.856, "step": 10322 }, { "epoch": 0.34, "grad_norm": 0.4564352333545685, "learning_rate": 0.0005808052449262212, "loss": 1.9064, "step": 10323 }, { "epoch": 0.34, "grad_norm": 0.4521164298057556, "learning_rate": 0.0005808015660967354, "loss": 1.9178, "step": 10324 }, { "epoch": 0.34, "grad_norm": 0.45870280265808105, "learning_rate": 0.000580797886926398, "loss": 2.0216, "step": 10325 }, { "epoch": 0.34, "grad_norm": 0.437370628118515, "learning_rate": 0.0005807942074152133, "loss": 1.8925, "step": 10326 }, { "epoch": 0.34, "grad_norm": 0.4435541331768036, "learning_rate": 0.0005807905275631857, "loss": 1.9391, "step": 10327 }, { "epoch": 0.34, "grad_norm": 0.46545639634132385, "learning_rate": 0.0005807868473703199, "loss": 1.9419, "step": 10328 }, { "epoch": 0.34, "grad_norm": 0.44744572043418884, "learning_rate": 0.0005807831668366202, "loss": 1.9253, "step": 10329 }, { "epoch": 0.34, "grad_norm": 0.4512884020805359, "learning_rate": 0.0005807794859620911, "loss": 1.9651, "step": 10330 }, { "epoch": 0.34, "grad_norm": 0.46083131432533264, "learning_rate": 0.000580775804746737, "loss": 1.9808, "step": 10331 }, { "epoch": 0.34, "grad_norm": 0.4402157962322235, "learning_rate": 0.0005807721231905626, "loss": 1.9153, "step": 10332 }, { "epoch": 0.34, "grad_norm": 0.4444756507873535, "learning_rate": 0.0005807684412935721, "loss": 1.9349, "step": 10333 }, { "epoch": 0.34, "grad_norm": 0.4541022479534149, "learning_rate": 0.0005807647590557701, "loss": 1.8797, "step": 10334 }, { "epoch": 0.34, "grad_norm": 0.44628432393074036, "learning_rate": 0.000580761076477161, "loss": 1.8892, "step": 10335 }, { "epoch": 0.34, "grad_norm": 0.44928401708602905, "learning_rate": 0.0005807573935577494, "loss": 1.9781, "step": 10336 }, { "epoch": 0.34, "grad_norm": 0.4402216076850891, "learning_rate": 0.0005807537102975398, "loss": 1.8631, "step": 10337 }, { "epoch": 0.34, "grad_norm": 0.4384002387523651, "learning_rate": 0.0005807500266965364, "loss": 1.8775, "step": 10338 }, { "epoch": 0.34, "grad_norm": 0.4427952468395233, "learning_rate": 0.0005807463427547438, "loss": 1.9011, "step": 10339 }, { "epoch": 0.34, "grad_norm": 0.46832460165023804, "learning_rate": 0.0005807426584721666, "loss": 1.9697, "step": 10340 }, { "epoch": 0.34, "grad_norm": 0.4649331867694855, "learning_rate": 0.0005807389738488091, "loss": 1.9378, "step": 10341 }, { "epoch": 0.34, "grad_norm": 0.46865004301071167, "learning_rate": 0.0005807352888846759, "loss": 1.9045, "step": 10342 }, { "epoch": 0.34, "grad_norm": 0.4446556568145752, "learning_rate": 0.0005807316035797715, "loss": 1.8629, "step": 10343 }, { "epoch": 0.34, "grad_norm": 0.45500314235687256, "learning_rate": 0.0005807279179341002, "loss": 1.8768, "step": 10344 }, { "epoch": 0.34, "grad_norm": 0.4495317041873932, "learning_rate": 0.0005807242319476666, "loss": 1.8836, "step": 10345 }, { "epoch": 0.34, "grad_norm": 0.45149847865104675, "learning_rate": 0.0005807205456204752, "loss": 1.8413, "step": 10346 }, { "epoch": 0.34, "grad_norm": 0.45478296279907227, "learning_rate": 0.0005807168589525303, "loss": 1.8965, "step": 10347 }, { "epoch": 0.34, "grad_norm": 0.460858017206192, "learning_rate": 0.0005807131719438365, "loss": 1.9438, "step": 10348 }, { "epoch": 0.34, "grad_norm": 0.43531227111816406, "learning_rate": 0.0005807094845943984, "loss": 1.9565, "step": 10349 }, { "epoch": 0.34, "grad_norm": 0.4395657181739807, "learning_rate": 0.0005807057969042203, "loss": 1.8739, "step": 10350 }, { "epoch": 0.34, "grad_norm": 0.45029664039611816, "learning_rate": 0.0005807021088733066, "loss": 1.9233, "step": 10351 }, { "epoch": 0.34, "grad_norm": 0.4539510905742645, "learning_rate": 0.0005806984205016621, "loss": 1.8758, "step": 10352 }, { "epoch": 0.34, "grad_norm": 0.44247180223464966, "learning_rate": 0.0005806947317892909, "loss": 1.8781, "step": 10353 }, { "epoch": 0.34, "grad_norm": 0.44313713908195496, "learning_rate": 0.0005806910427361978, "loss": 1.9094, "step": 10354 }, { "epoch": 0.34, "grad_norm": 0.4781705439090729, "learning_rate": 0.0005806873533423869, "loss": 1.8628, "step": 10355 }, { "epoch": 0.34, "grad_norm": 0.4604341983795166, "learning_rate": 0.0005806836636078631, "loss": 1.8505, "step": 10356 }, { "epoch": 0.34, "grad_norm": 0.43509697914123535, "learning_rate": 0.0005806799735326307, "loss": 1.8423, "step": 10357 }, { "epoch": 0.34, "grad_norm": 0.43951520323753357, "learning_rate": 0.0005806762831166942, "loss": 1.8717, "step": 10358 }, { "epoch": 0.34, "grad_norm": 0.46034711599349976, "learning_rate": 0.0005806725923600579, "loss": 1.9511, "step": 10359 }, { "epoch": 0.34, "grad_norm": 0.4691273272037506, "learning_rate": 0.0005806689012627266, "loss": 1.8996, "step": 10360 }, { "epoch": 0.34, "grad_norm": 0.4351593852043152, "learning_rate": 0.0005806652098247044, "loss": 1.8116, "step": 10361 }, { "epoch": 0.34, "grad_norm": 0.4448799788951874, "learning_rate": 0.0005806615180459961, "loss": 1.841, "step": 10362 }, { "epoch": 0.34, "grad_norm": 0.4702698886394501, "learning_rate": 0.0005806578259266061, "loss": 1.9405, "step": 10363 }, { "epoch": 0.34, "grad_norm": 0.4362969398498535, "learning_rate": 0.0005806541334665388, "loss": 1.8185, "step": 10364 }, { "epoch": 0.34, "grad_norm": 0.44472360610961914, "learning_rate": 0.0005806504406657988, "loss": 1.895, "step": 10365 }, { "epoch": 0.34, "grad_norm": 0.4305821657180786, "learning_rate": 0.0005806467475243905, "loss": 1.8445, "step": 10366 }, { "epoch": 0.34, "grad_norm": 0.4341156780719757, "learning_rate": 0.0005806430540423183, "loss": 1.8644, "step": 10367 }, { "epoch": 0.34, "grad_norm": 0.4446730613708496, "learning_rate": 0.0005806393602195869, "loss": 1.8914, "step": 10368 }, { "epoch": 0.34, "grad_norm": 0.44307446479797363, "learning_rate": 0.0005806356660562006, "loss": 1.8993, "step": 10369 }, { "epoch": 0.35, "grad_norm": 0.44979947805404663, "learning_rate": 0.000580631971552164, "loss": 1.9943, "step": 10370 }, { "epoch": 0.35, "grad_norm": 0.4718833565711975, "learning_rate": 0.0005806282767074815, "loss": 1.8897, "step": 10371 }, { "epoch": 0.35, "grad_norm": 0.4496142566204071, "learning_rate": 0.0005806245815221576, "loss": 1.9049, "step": 10372 }, { "epoch": 0.35, "grad_norm": 0.4557298421859741, "learning_rate": 0.0005806208859961969, "loss": 1.8717, "step": 10373 }, { "epoch": 0.35, "grad_norm": 0.48679861426353455, "learning_rate": 0.0005806171901296036, "loss": 1.919, "step": 10374 }, { "epoch": 0.35, "grad_norm": 0.4486253261566162, "learning_rate": 0.0005806134939223826, "loss": 1.9637, "step": 10375 }, { "epoch": 0.35, "grad_norm": 0.4445493221282959, "learning_rate": 0.000580609797374538, "loss": 1.9286, "step": 10376 }, { "epoch": 0.35, "grad_norm": 0.4756256937980652, "learning_rate": 0.0005806061004860745, "loss": 1.8688, "step": 10377 }, { "epoch": 0.35, "grad_norm": 0.45981886982917786, "learning_rate": 0.0005806024032569966, "loss": 1.9204, "step": 10378 }, { "epoch": 0.35, "grad_norm": 0.4496961236000061, "learning_rate": 0.0005805987056873087, "loss": 1.8696, "step": 10379 }, { "epoch": 0.35, "grad_norm": 0.45997706055641174, "learning_rate": 0.0005805950077770153, "loss": 1.9046, "step": 10380 }, { "epoch": 0.35, "grad_norm": 0.42887282371520996, "learning_rate": 0.0005805913095261209, "loss": 1.8405, "step": 10381 }, { "epoch": 0.35, "grad_norm": 0.4468030333518982, "learning_rate": 0.0005805876109346301, "loss": 1.8466, "step": 10382 }, { "epoch": 0.35, "grad_norm": 0.43543028831481934, "learning_rate": 0.0005805839120025471, "loss": 1.8428, "step": 10383 }, { "epoch": 0.35, "grad_norm": 0.454761803150177, "learning_rate": 0.0005805802127298767, "loss": 1.9359, "step": 10384 }, { "epoch": 0.35, "grad_norm": 0.43472474813461304, "learning_rate": 0.0005805765131166233, "loss": 1.8783, "step": 10385 }, { "epoch": 0.35, "grad_norm": 0.4634365439414978, "learning_rate": 0.0005805728131627911, "loss": 1.8517, "step": 10386 }, { "epoch": 0.35, "grad_norm": 0.4338432848453522, "learning_rate": 0.0005805691128683852, "loss": 1.8348, "step": 10387 }, { "epoch": 0.35, "grad_norm": 0.4298122227191925, "learning_rate": 0.0005805654122334094, "loss": 1.934, "step": 10388 }, { "epoch": 0.35, "grad_norm": 0.43909427523612976, "learning_rate": 0.0005805617112578687, "loss": 1.9034, "step": 10389 }, { "epoch": 0.35, "grad_norm": 0.4584158658981323, "learning_rate": 0.0005805580099417675, "loss": 1.8124, "step": 10390 }, { "epoch": 0.35, "grad_norm": 0.43124932050704956, "learning_rate": 0.0005805543082851101, "loss": 1.8828, "step": 10391 }, { "epoch": 0.35, "grad_norm": 0.4455670714378357, "learning_rate": 0.0005805506062879011, "loss": 1.9068, "step": 10392 }, { "epoch": 0.35, "grad_norm": 0.46626609563827515, "learning_rate": 0.000580546903950145, "loss": 1.9465, "step": 10393 }, { "epoch": 0.35, "grad_norm": 0.45540326833724976, "learning_rate": 0.0005805432012718464, "loss": 2.0664, "step": 10394 }, { "epoch": 0.35, "grad_norm": 0.4305706024169922, "learning_rate": 0.0005805394982530096, "loss": 1.861, "step": 10395 }, { "epoch": 0.35, "grad_norm": 0.4577319622039795, "learning_rate": 0.0005805357948936393, "loss": 1.8713, "step": 10396 }, { "epoch": 0.35, "grad_norm": 0.4525418281555176, "learning_rate": 0.0005805320911937397, "loss": 1.8543, "step": 10397 }, { "epoch": 0.35, "grad_norm": 0.4359629154205322, "learning_rate": 0.0005805283871533156, "loss": 1.8254, "step": 10398 }, { "epoch": 0.35, "grad_norm": 0.45118021965026855, "learning_rate": 0.0005805246827723713, "loss": 1.9211, "step": 10399 }, { "epoch": 0.35, "grad_norm": 0.46118229627609253, "learning_rate": 0.0005805209780509114, "loss": 1.8204, "step": 10400 }, { "epoch": 0.35, "grad_norm": 0.4494231343269348, "learning_rate": 0.0005805172729889403, "loss": 1.8913, "step": 10401 }, { "epoch": 0.35, "grad_norm": 0.4460749626159668, "learning_rate": 0.0005805135675864627, "loss": 1.9972, "step": 10402 }, { "epoch": 0.35, "grad_norm": 0.5141111016273499, "learning_rate": 0.0005805098618434829, "loss": 1.858, "step": 10403 }, { "epoch": 0.35, "grad_norm": 0.44322577118873596, "learning_rate": 0.0005805061557600054, "loss": 1.9072, "step": 10404 }, { "epoch": 0.35, "grad_norm": 0.4374915361404419, "learning_rate": 0.0005805024493360347, "loss": 1.8623, "step": 10405 }, { "epoch": 0.35, "grad_norm": 0.4648449420928955, "learning_rate": 0.0005804987425715756, "loss": 1.906, "step": 10406 }, { "epoch": 0.35, "grad_norm": 0.4423125088214874, "learning_rate": 0.0005804950354666321, "loss": 1.8854, "step": 10407 }, { "epoch": 0.35, "grad_norm": 0.45208442211151123, "learning_rate": 0.0005804913280212092, "loss": 1.9353, "step": 10408 }, { "epoch": 0.35, "grad_norm": 0.46405890583992004, "learning_rate": 0.000580487620235311, "loss": 1.843, "step": 10409 }, { "epoch": 0.35, "grad_norm": 0.4796990752220154, "learning_rate": 0.0005804839121089422, "loss": 1.8549, "step": 10410 }, { "epoch": 0.35, "grad_norm": 0.4599534869194031, "learning_rate": 0.0005804802036421072, "loss": 1.8653, "step": 10411 }, { "epoch": 0.35, "grad_norm": 0.4430409371852875, "learning_rate": 0.0005804764948348106, "loss": 1.856, "step": 10412 }, { "epoch": 0.35, "grad_norm": 0.476698100566864, "learning_rate": 0.0005804727856870568, "loss": 1.8477, "step": 10413 }, { "epoch": 0.35, "grad_norm": 0.4587137699127197, "learning_rate": 0.0005804690761988504, "loss": 1.8766, "step": 10414 }, { "epoch": 0.35, "grad_norm": 0.4510331451892853, "learning_rate": 0.000580465366370196, "loss": 1.8897, "step": 10415 }, { "epoch": 0.35, "grad_norm": 0.4536252021789551, "learning_rate": 0.0005804616562010978, "loss": 1.9133, "step": 10416 }, { "epoch": 0.35, "grad_norm": 0.4424380958080292, "learning_rate": 0.0005804579456915606, "loss": 1.9571, "step": 10417 }, { "epoch": 0.35, "grad_norm": 0.4435243606567383, "learning_rate": 0.0005804542348415887, "loss": 2.0096, "step": 10418 }, { "epoch": 0.35, "grad_norm": 0.43872129917144775, "learning_rate": 0.0005804505236511867, "loss": 1.908, "step": 10419 }, { "epoch": 0.35, "grad_norm": 0.4456300139427185, "learning_rate": 0.0005804468121203591, "loss": 1.8684, "step": 10420 }, { "epoch": 0.35, "grad_norm": 0.44609636068344116, "learning_rate": 0.0005804431002491104, "loss": 1.911, "step": 10421 }, { "epoch": 0.35, "grad_norm": 0.4284324645996094, "learning_rate": 0.000580439388037445, "loss": 1.8531, "step": 10422 }, { "epoch": 0.35, "grad_norm": 0.4492078423500061, "learning_rate": 0.0005804356754853675, "loss": 1.939, "step": 10423 }, { "epoch": 0.35, "grad_norm": 0.4454287886619568, "learning_rate": 0.0005804319625928826, "loss": 1.9462, "step": 10424 }, { "epoch": 0.35, "grad_norm": 0.4525514543056488, "learning_rate": 0.0005804282493599946, "loss": 1.9514, "step": 10425 }, { "epoch": 0.35, "grad_norm": 0.44272369146347046, "learning_rate": 0.0005804245357867078, "loss": 1.914, "step": 10426 }, { "epoch": 0.35, "grad_norm": 0.4438709020614624, "learning_rate": 0.0005804208218730271, "loss": 1.8096, "step": 10427 }, { "epoch": 0.35, "grad_norm": 0.45256343483924866, "learning_rate": 0.0005804171076189568, "loss": 1.8859, "step": 10428 }, { "epoch": 0.35, "grad_norm": 0.4451834261417389, "learning_rate": 0.0005804133930245014, "loss": 1.9156, "step": 10429 }, { "epoch": 0.35, "grad_norm": 0.4445129334926605, "learning_rate": 0.0005804096780896655, "loss": 1.879, "step": 10430 }, { "epoch": 0.35, "grad_norm": 0.44985195994377136, "learning_rate": 0.0005804059628144537, "loss": 1.8743, "step": 10431 }, { "epoch": 0.35, "grad_norm": 0.4327830970287323, "learning_rate": 0.0005804022471988701, "loss": 1.8561, "step": 10432 }, { "epoch": 0.35, "grad_norm": 0.44281652569770813, "learning_rate": 0.0005803985312429198, "loss": 1.9341, "step": 10433 }, { "epoch": 0.35, "grad_norm": 0.42574748396873474, "learning_rate": 0.0005803948149466068, "loss": 1.8674, "step": 10434 }, { "epoch": 0.35, "grad_norm": 0.43412497639656067, "learning_rate": 0.0005803910983099358, "loss": 1.894, "step": 10435 }, { "epoch": 0.35, "grad_norm": 0.43125805258750916, "learning_rate": 0.0005803873813329115, "loss": 1.85, "step": 10436 }, { "epoch": 0.35, "grad_norm": 0.4436001181602478, "learning_rate": 0.000580383664015538, "loss": 1.9118, "step": 10437 }, { "epoch": 0.35, "grad_norm": 0.4433128535747528, "learning_rate": 0.0005803799463578201, "loss": 1.9519, "step": 10438 }, { "epoch": 0.35, "grad_norm": 0.4655853509902954, "learning_rate": 0.0005803762283597623, "loss": 1.8718, "step": 10439 }, { "epoch": 0.35, "grad_norm": 0.44991815090179443, "learning_rate": 0.0005803725100213691, "loss": 1.9109, "step": 10440 }, { "epoch": 0.35, "grad_norm": 0.4468802809715271, "learning_rate": 0.0005803687913426449, "loss": 1.925, "step": 10441 }, { "epoch": 0.35, "grad_norm": 0.4383135139942169, "learning_rate": 0.0005803650723235944, "loss": 1.8395, "step": 10442 }, { "epoch": 0.35, "grad_norm": 0.451609343290329, "learning_rate": 0.0005803613529642219, "loss": 1.8587, "step": 10443 }, { "epoch": 0.35, "grad_norm": 0.45508185029029846, "learning_rate": 0.0005803576332645322, "loss": 1.9306, "step": 10444 }, { "epoch": 0.35, "grad_norm": 0.4416573941707611, "learning_rate": 0.0005803539132245296, "loss": 1.8756, "step": 10445 }, { "epoch": 0.35, "grad_norm": 0.60023033618927, "learning_rate": 0.0005803501928442186, "loss": 1.9159, "step": 10446 }, { "epoch": 0.35, "grad_norm": 0.43882203102111816, "learning_rate": 0.0005803464721236036, "loss": 1.934, "step": 10447 }, { "epoch": 0.35, "grad_norm": 0.4435760974884033, "learning_rate": 0.0005803427510626895, "loss": 1.846, "step": 10448 }, { "epoch": 0.35, "grad_norm": 0.46175000071525574, "learning_rate": 0.0005803390296614805, "loss": 1.8753, "step": 10449 }, { "epoch": 0.35, "grad_norm": 0.4786193370819092, "learning_rate": 0.0005803353079199814, "loss": 1.8793, "step": 10450 }, { "epoch": 0.35, "grad_norm": 0.4294966757297516, "learning_rate": 0.0005803315858381964, "loss": 1.8783, "step": 10451 }, { "epoch": 0.35, "grad_norm": 0.4615168869495392, "learning_rate": 0.0005803278634161302, "loss": 1.8784, "step": 10452 }, { "epoch": 0.35, "grad_norm": 0.4451347887516022, "learning_rate": 0.0005803241406537872, "loss": 1.8116, "step": 10453 }, { "epoch": 0.35, "grad_norm": 0.4322495758533478, "learning_rate": 0.0005803204175511721, "loss": 1.8823, "step": 10454 }, { "epoch": 0.35, "grad_norm": 0.44982725381851196, "learning_rate": 0.0005803166941082893, "loss": 1.8604, "step": 10455 }, { "epoch": 0.35, "grad_norm": 0.45867177844047546, "learning_rate": 0.0005803129703251432, "loss": 1.8414, "step": 10456 }, { "epoch": 0.35, "grad_norm": 0.4413527250289917, "learning_rate": 0.0005803092462017387, "loss": 1.8749, "step": 10457 }, { "epoch": 0.35, "grad_norm": 0.4627029001712799, "learning_rate": 0.0005803055217380799, "loss": 1.8864, "step": 10458 }, { "epoch": 0.35, "grad_norm": 0.45643800497055054, "learning_rate": 0.0005803017969341716, "loss": 1.9007, "step": 10459 }, { "epoch": 0.35, "grad_norm": 0.4506934583187103, "learning_rate": 0.0005802980717900181, "loss": 1.9074, "step": 10460 }, { "epoch": 0.35, "grad_norm": 0.47449925541877747, "learning_rate": 0.0005802943463056241, "loss": 1.9079, "step": 10461 }, { "epoch": 0.35, "grad_norm": 0.44755542278289795, "learning_rate": 0.0005802906204809941, "loss": 1.8855, "step": 10462 }, { "epoch": 0.35, "grad_norm": 0.44457876682281494, "learning_rate": 0.0005802868943161325, "loss": 1.8385, "step": 10463 }, { "epoch": 0.35, "grad_norm": 0.45471927523612976, "learning_rate": 0.000580283167811044, "loss": 1.855, "step": 10464 }, { "epoch": 0.35, "grad_norm": 0.4663692116737366, "learning_rate": 0.000580279440965733, "loss": 1.9213, "step": 10465 }, { "epoch": 0.35, "grad_norm": 0.4519268870353699, "learning_rate": 0.000580275713780204, "loss": 1.9448, "step": 10466 }, { "epoch": 0.35, "grad_norm": 0.4528610110282898, "learning_rate": 0.0005802719862544616, "loss": 1.8429, "step": 10467 }, { "epoch": 0.35, "grad_norm": 0.4493042826652527, "learning_rate": 0.0005802682583885103, "loss": 1.8835, "step": 10468 }, { "epoch": 0.35, "grad_norm": 0.4574086368083954, "learning_rate": 0.0005802645301823548, "loss": 1.8386, "step": 10469 }, { "epoch": 0.35, "grad_norm": 0.44901910424232483, "learning_rate": 0.0005802608016359993, "loss": 1.8994, "step": 10470 }, { "epoch": 0.35, "grad_norm": 0.44627001881599426, "learning_rate": 0.0005802570727494485, "loss": 1.8026, "step": 10471 }, { "epoch": 0.35, "grad_norm": 0.4388201832771301, "learning_rate": 0.0005802533435227069, "loss": 1.8403, "step": 10472 }, { "epoch": 0.35, "grad_norm": 0.450257807970047, "learning_rate": 0.000580249613955779, "loss": 1.9079, "step": 10473 }, { "epoch": 0.35, "grad_norm": 0.4543437063694, "learning_rate": 0.0005802458840486695, "loss": 1.8714, "step": 10474 }, { "epoch": 0.35, "grad_norm": 0.46875420212745667, "learning_rate": 0.0005802421538013827, "loss": 1.9076, "step": 10475 }, { "epoch": 0.35, "grad_norm": 0.4510056972503662, "learning_rate": 0.0005802384232139232, "loss": 1.8162, "step": 10476 }, { "epoch": 0.35, "grad_norm": 0.45413944125175476, "learning_rate": 0.0005802346922862955, "loss": 1.8818, "step": 10477 }, { "epoch": 0.35, "grad_norm": 0.4622085690498352, "learning_rate": 0.0005802309610185042, "loss": 1.8986, "step": 10478 }, { "epoch": 0.35, "grad_norm": 0.45059239864349365, "learning_rate": 0.0005802272294105539, "loss": 1.8622, "step": 10479 }, { "epoch": 0.35, "grad_norm": 0.4456273913383484, "learning_rate": 0.000580223497462449, "loss": 1.9155, "step": 10480 }, { "epoch": 0.35, "grad_norm": 0.4537796676158905, "learning_rate": 0.000580219765174194, "loss": 1.9341, "step": 10481 }, { "epoch": 0.35, "grad_norm": 0.44371306896209717, "learning_rate": 0.0005802160325457935, "loss": 1.8489, "step": 10482 }, { "epoch": 0.35, "grad_norm": 0.4483451843261719, "learning_rate": 0.000580212299577252, "loss": 1.8798, "step": 10483 }, { "epoch": 0.35, "grad_norm": 0.4449736475944519, "learning_rate": 0.0005802085662685742, "loss": 1.879, "step": 10484 }, { "epoch": 0.35, "grad_norm": 0.4432884752750397, "learning_rate": 0.0005802048326197643, "loss": 1.8007, "step": 10485 }, { "epoch": 0.35, "grad_norm": 0.4454466700553894, "learning_rate": 0.0005802010986308272, "loss": 1.9312, "step": 10486 }, { "epoch": 0.35, "grad_norm": 0.43771541118621826, "learning_rate": 0.0005801973643017671, "loss": 1.9213, "step": 10487 }, { "epoch": 0.35, "grad_norm": 0.43357133865356445, "learning_rate": 0.0005801936296325888, "loss": 1.8289, "step": 10488 }, { "epoch": 0.35, "grad_norm": 0.44732269644737244, "learning_rate": 0.0005801898946232966, "loss": 1.8546, "step": 10489 }, { "epoch": 0.35, "grad_norm": 0.45659154653549194, "learning_rate": 0.0005801861592738953, "loss": 1.9106, "step": 10490 }, { "epoch": 0.35, "grad_norm": 0.43738481402397156, "learning_rate": 0.0005801824235843892, "loss": 1.8525, "step": 10491 }, { "epoch": 0.35, "grad_norm": 0.44840431213378906, "learning_rate": 0.0005801786875547828, "loss": 1.899, "step": 10492 }, { "epoch": 0.35, "grad_norm": 0.4419037103652954, "learning_rate": 0.0005801749511850809, "loss": 1.8913, "step": 10493 }, { "epoch": 0.35, "grad_norm": 0.4588243067264557, "learning_rate": 0.0005801712144752879, "loss": 1.9019, "step": 10494 }, { "epoch": 0.35, "grad_norm": 0.44667303562164307, "learning_rate": 0.0005801674774254084, "loss": 1.8971, "step": 10495 }, { "epoch": 0.35, "grad_norm": 0.4583837687969208, "learning_rate": 0.0005801637400354466, "loss": 1.8736, "step": 10496 }, { "epoch": 0.35, "grad_norm": 0.4362625181674957, "learning_rate": 0.0005801600023054076, "loss": 1.9592, "step": 10497 }, { "epoch": 0.35, "grad_norm": 0.4379555284976959, "learning_rate": 0.0005801562642352955, "loss": 1.9769, "step": 10498 }, { "epoch": 0.35, "grad_norm": 0.44928932189941406, "learning_rate": 0.0005801525258251149, "loss": 1.8171, "step": 10499 }, { "epoch": 0.35, "grad_norm": 0.5092734098434448, "learning_rate": 0.0005801487870748705, "loss": 1.9595, "step": 10500 }, { "epoch": 0.35, "grad_norm": 0.44542673230171204, "learning_rate": 0.0005801450479845667, "loss": 1.912, "step": 10501 }, { "epoch": 0.35, "grad_norm": 0.4785294830799103, "learning_rate": 0.0005801413085542082, "loss": 1.9801, "step": 10502 }, { "epoch": 0.35, "grad_norm": 0.45194268226623535, "learning_rate": 0.0005801375687837993, "loss": 1.9312, "step": 10503 }, { "epoch": 0.35, "grad_norm": 0.4522903561592102, "learning_rate": 0.0005801338286733447, "loss": 1.9313, "step": 10504 }, { "epoch": 0.35, "grad_norm": 0.43756791949272156, "learning_rate": 0.0005801300882228489, "loss": 1.858, "step": 10505 }, { "epoch": 0.35, "grad_norm": 0.4626483917236328, "learning_rate": 0.0005801263474323166, "loss": 1.9154, "step": 10506 }, { "epoch": 0.35, "grad_norm": 0.4628337323665619, "learning_rate": 0.000580122606301752, "loss": 1.8907, "step": 10507 }, { "epoch": 0.35, "grad_norm": 0.4519302546977997, "learning_rate": 0.0005801188648311598, "loss": 1.8554, "step": 10508 }, { "epoch": 0.35, "grad_norm": 0.45246055722236633, "learning_rate": 0.0005801151230205448, "loss": 1.8524, "step": 10509 }, { "epoch": 0.35, "grad_norm": 0.47806650400161743, "learning_rate": 0.0005801113808699111, "loss": 1.9008, "step": 10510 }, { "epoch": 0.35, "grad_norm": 0.46785515546798706, "learning_rate": 0.0005801076383792637, "loss": 1.9653, "step": 10511 }, { "epoch": 0.35, "grad_norm": 0.45176827907562256, "learning_rate": 0.0005801038955486066, "loss": 1.8774, "step": 10512 }, { "epoch": 0.35, "grad_norm": 0.46869418025016785, "learning_rate": 0.0005801001523779448, "loss": 1.9473, "step": 10513 }, { "epoch": 0.35, "grad_norm": 0.4546690285205841, "learning_rate": 0.0005800964088672827, "loss": 1.8133, "step": 10514 }, { "epoch": 0.35, "grad_norm": 0.4327971339225769, "learning_rate": 0.0005800926650166248, "loss": 1.8857, "step": 10515 }, { "epoch": 0.35, "grad_norm": 0.4460791349411011, "learning_rate": 0.0005800889208259757, "loss": 1.9102, "step": 10516 }, { "epoch": 0.35, "grad_norm": 0.45281854271888733, "learning_rate": 0.0005800851762953398, "loss": 1.9093, "step": 10517 }, { "epoch": 0.35, "grad_norm": 0.4462309181690216, "learning_rate": 0.0005800814314247219, "loss": 1.9268, "step": 10518 }, { "epoch": 0.35, "grad_norm": 0.4311679005622864, "learning_rate": 0.0005800776862141263, "loss": 1.8291, "step": 10519 }, { "epoch": 0.35, "grad_norm": 0.4411875009536743, "learning_rate": 0.0005800739406635577, "loss": 1.9288, "step": 10520 }, { "epoch": 0.35, "grad_norm": 0.44875118136405945, "learning_rate": 0.0005800701947730208, "loss": 1.8659, "step": 10521 }, { "epoch": 0.35, "grad_norm": 0.44679710268974304, "learning_rate": 0.0005800664485425196, "loss": 1.8654, "step": 10522 }, { "epoch": 0.35, "grad_norm": 0.4475249648094177, "learning_rate": 0.0005800627019720592, "loss": 1.8508, "step": 10523 }, { "epoch": 0.35, "grad_norm": 0.4605700671672821, "learning_rate": 0.0005800589550616439, "loss": 1.888, "step": 10524 }, { "epoch": 0.35, "grad_norm": 0.4355217218399048, "learning_rate": 0.0005800552078112783, "loss": 1.9044, "step": 10525 }, { "epoch": 0.35, "grad_norm": 0.44337302446365356, "learning_rate": 0.0005800514602209671, "loss": 1.8804, "step": 10526 }, { "epoch": 0.35, "grad_norm": 0.4606461226940155, "learning_rate": 0.0005800477122907145, "loss": 1.8942, "step": 10527 }, { "epoch": 0.35, "grad_norm": 0.46744629740715027, "learning_rate": 0.0005800439640205252, "loss": 1.8731, "step": 10528 }, { "epoch": 0.35, "grad_norm": 0.45673081278800964, "learning_rate": 0.0005800402154104039, "loss": 1.9127, "step": 10529 }, { "epoch": 0.35, "grad_norm": 0.44470763206481934, "learning_rate": 0.000580036466460355, "loss": 1.9216, "step": 10530 }, { "epoch": 0.35, "grad_norm": 0.4440303444862366, "learning_rate": 0.0005800327171703832, "loss": 1.9109, "step": 10531 }, { "epoch": 0.35, "grad_norm": 0.4337868392467499, "learning_rate": 0.0005800289675404927, "loss": 1.8514, "step": 10532 }, { "epoch": 0.35, "grad_norm": 0.4537336528301239, "learning_rate": 0.0005800252175706884, "loss": 1.9391, "step": 10533 }, { "epoch": 0.35, "grad_norm": 0.43931740522384644, "learning_rate": 0.0005800214672609748, "loss": 1.8303, "step": 10534 }, { "epoch": 0.35, "grad_norm": 0.4472525715827942, "learning_rate": 0.0005800177166113565, "loss": 1.9429, "step": 10535 }, { "epoch": 0.35, "grad_norm": 0.4533638060092926, "learning_rate": 0.0005800139656218378, "loss": 1.8525, "step": 10536 }, { "epoch": 0.35, "grad_norm": 0.4703262448310852, "learning_rate": 0.0005800102142924234, "loss": 1.9395, "step": 10537 }, { "epoch": 0.35, "grad_norm": 0.4476909339427948, "learning_rate": 0.0005800064626231179, "loss": 1.9414, "step": 10538 }, { "epoch": 0.35, "grad_norm": 0.44326943159103394, "learning_rate": 0.0005800027106139258, "loss": 1.8846, "step": 10539 }, { "epoch": 0.35, "grad_norm": 0.4470882713794708, "learning_rate": 0.0005799989582648517, "loss": 1.9764, "step": 10540 }, { "epoch": 0.35, "grad_norm": 0.436612993478775, "learning_rate": 0.0005799952055759001, "loss": 1.9278, "step": 10541 }, { "epoch": 0.35, "grad_norm": 0.4542991518974304, "learning_rate": 0.0005799914525470755, "loss": 1.9086, "step": 10542 }, { "epoch": 0.35, "grad_norm": 0.45601537823677063, "learning_rate": 0.0005799876991783826, "loss": 1.9104, "step": 10543 }, { "epoch": 0.35, "grad_norm": 0.4432937204837799, "learning_rate": 0.000579983945469826, "loss": 1.893, "step": 10544 }, { "epoch": 0.35, "grad_norm": 0.4471195936203003, "learning_rate": 0.00057998019142141, "loss": 1.95, "step": 10545 }, { "epoch": 0.35, "grad_norm": 0.4504832923412323, "learning_rate": 0.0005799764370331392, "loss": 1.899, "step": 10546 }, { "epoch": 0.35, "grad_norm": 0.4489063620567322, "learning_rate": 0.0005799726823050185, "loss": 1.9363, "step": 10547 }, { "epoch": 0.35, "grad_norm": 0.4421290159225464, "learning_rate": 0.000579968927237052, "loss": 1.9224, "step": 10548 }, { "epoch": 0.35, "grad_norm": 0.4927099049091339, "learning_rate": 0.0005799651718292446, "loss": 1.8946, "step": 10549 }, { "epoch": 0.35, "grad_norm": 0.4637860059738159, "learning_rate": 0.0005799614160816009, "loss": 1.9124, "step": 10550 }, { "epoch": 0.35, "grad_norm": 0.4497062861919403, "learning_rate": 0.000579957659994125, "loss": 1.9579, "step": 10551 }, { "epoch": 0.35, "grad_norm": 0.4816158413887024, "learning_rate": 0.0005799539035668219, "loss": 1.9184, "step": 10552 }, { "epoch": 0.35, "grad_norm": 0.4772294759750366, "learning_rate": 0.0005799501467996959, "loss": 1.8072, "step": 10553 }, { "epoch": 0.35, "grad_norm": 0.44686365127563477, "learning_rate": 0.0005799463896927517, "loss": 1.897, "step": 10554 }, { "epoch": 0.35, "grad_norm": 0.4404882788658142, "learning_rate": 0.0005799426322459939, "loss": 1.8613, "step": 10555 }, { "epoch": 0.35, "grad_norm": 0.4942722022533417, "learning_rate": 0.000579938874459427, "loss": 1.8804, "step": 10556 }, { "epoch": 0.35, "grad_norm": 0.4625425338745117, "learning_rate": 0.0005799351163330554, "loss": 1.8494, "step": 10557 }, { "epoch": 0.35, "grad_norm": 0.44179269671440125, "learning_rate": 0.0005799313578668839, "loss": 1.8697, "step": 10558 }, { "epoch": 0.35, "grad_norm": 0.45922645926475525, "learning_rate": 0.0005799275990609171, "loss": 1.8045, "step": 10559 }, { "epoch": 0.35, "grad_norm": 0.4572364389896393, "learning_rate": 0.0005799238399151593, "loss": 1.8608, "step": 10560 }, { "epoch": 0.35, "grad_norm": 0.4551634192466736, "learning_rate": 0.0005799200804296153, "loss": 1.9021, "step": 10561 }, { "epoch": 0.35, "grad_norm": 0.4616933763027191, "learning_rate": 0.0005799163206042895, "loss": 1.9056, "step": 10562 }, { "epoch": 0.35, "grad_norm": 0.4566098153591156, "learning_rate": 0.0005799125604391865, "loss": 1.9089, "step": 10563 }, { "epoch": 0.35, "grad_norm": 0.4415302872657776, "learning_rate": 0.000579908799934311, "loss": 1.8767, "step": 10564 }, { "epoch": 0.35, "grad_norm": 0.45661067962646484, "learning_rate": 0.0005799050390896674, "loss": 1.9089, "step": 10565 }, { "epoch": 0.35, "grad_norm": 0.6001455187797546, "learning_rate": 0.0005799012779052603, "loss": 1.8779, "step": 10566 }, { "epoch": 0.35, "grad_norm": 0.4671235680580139, "learning_rate": 0.0005798975163810943, "loss": 1.9029, "step": 10567 }, { "epoch": 0.35, "grad_norm": 0.4578271210193634, "learning_rate": 0.0005798937545171739, "loss": 1.9207, "step": 10568 }, { "epoch": 0.35, "grad_norm": 0.44064950942993164, "learning_rate": 0.0005798899923135038, "loss": 2.0043, "step": 10569 }, { "epoch": 0.35, "grad_norm": 0.4451116919517517, "learning_rate": 0.0005798862297700885, "loss": 1.9168, "step": 10570 }, { "epoch": 0.35, "grad_norm": 0.43753042817115784, "learning_rate": 0.0005798824668869326, "loss": 1.936, "step": 10571 }, { "epoch": 0.35, "grad_norm": 0.4643974304199219, "learning_rate": 0.0005798787036640405, "loss": 1.9715, "step": 10572 }, { "epoch": 0.35, "grad_norm": 0.4478277862071991, "learning_rate": 0.0005798749401014169, "loss": 1.9509, "step": 10573 }, { "epoch": 0.35, "grad_norm": 0.44124630093574524, "learning_rate": 0.0005798711761990665, "loss": 1.8352, "step": 10574 }, { "epoch": 0.35, "grad_norm": 0.4414774477481842, "learning_rate": 0.0005798674119569937, "loss": 1.8861, "step": 10575 }, { "epoch": 0.35, "grad_norm": 0.4384428858757019, "learning_rate": 0.0005798636473752029, "loss": 1.9509, "step": 10576 }, { "epoch": 0.35, "grad_norm": 0.4283619523048401, "learning_rate": 0.000579859882453699, "loss": 1.9005, "step": 10577 }, { "epoch": 0.35, "grad_norm": 0.4697933793067932, "learning_rate": 0.0005798561171924865, "loss": 1.8891, "step": 10578 }, { "epoch": 0.35, "grad_norm": 0.4681495726108551, "learning_rate": 0.0005798523515915699, "loss": 1.9214, "step": 10579 }, { "epoch": 0.35, "grad_norm": 0.5102482438087463, "learning_rate": 0.0005798485856509537, "loss": 1.9055, "step": 10580 }, { "epoch": 0.35, "grad_norm": 0.47833627462387085, "learning_rate": 0.0005798448193706426, "loss": 1.9258, "step": 10581 }, { "epoch": 0.35, "grad_norm": 0.4612070322036743, "learning_rate": 0.0005798410527506411, "loss": 1.8993, "step": 10582 }, { "epoch": 0.35, "grad_norm": 0.4422159194946289, "learning_rate": 0.0005798372857909539, "loss": 1.8264, "step": 10583 }, { "epoch": 0.35, "grad_norm": 0.4504132866859436, "learning_rate": 0.0005798335184915853, "loss": 1.877, "step": 10584 }, { "epoch": 0.35, "grad_norm": 0.43739718198776245, "learning_rate": 0.0005798297508525401, "loss": 1.8724, "step": 10585 }, { "epoch": 0.35, "grad_norm": 0.4449903666973114, "learning_rate": 0.0005798259828738228, "loss": 1.8036, "step": 10586 }, { "epoch": 0.35, "grad_norm": 0.4563754200935364, "learning_rate": 0.000579822214555438, "loss": 1.9605, "step": 10587 }, { "epoch": 0.35, "grad_norm": 0.4666394293308258, "learning_rate": 0.0005798184458973903, "loss": 1.8982, "step": 10588 }, { "epoch": 0.35, "grad_norm": 0.4395657181739807, "learning_rate": 0.0005798146768996842, "loss": 1.9132, "step": 10589 }, { "epoch": 0.35, "grad_norm": 0.4257606565952301, "learning_rate": 0.0005798109075623244, "loss": 1.8873, "step": 10590 }, { "epoch": 0.35, "grad_norm": 0.43571484088897705, "learning_rate": 0.0005798071378853153, "loss": 1.8712, "step": 10591 }, { "epoch": 0.35, "grad_norm": 0.437760591506958, "learning_rate": 0.0005798033678686616, "loss": 1.8941, "step": 10592 }, { "epoch": 0.35, "grad_norm": 0.45536208152770996, "learning_rate": 0.0005797995975123678, "loss": 1.8411, "step": 10593 }, { "epoch": 0.35, "grad_norm": 0.4629306495189667, "learning_rate": 0.0005797958268164385, "loss": 1.8898, "step": 10594 }, { "epoch": 0.35, "grad_norm": 0.46129918098449707, "learning_rate": 0.0005797920557808783, "loss": 1.9527, "step": 10595 }, { "epoch": 0.35, "grad_norm": 0.44771715998649597, "learning_rate": 0.0005797882844056917, "loss": 1.9112, "step": 10596 }, { "epoch": 0.35, "grad_norm": 0.4611222743988037, "learning_rate": 0.0005797845126908834, "loss": 1.8886, "step": 10597 }, { "epoch": 0.35, "grad_norm": 0.43575698137283325, "learning_rate": 0.0005797807406364581, "loss": 1.7756, "step": 10598 }, { "epoch": 0.35, "grad_norm": 0.43253499269485474, "learning_rate": 0.00057977696824242, "loss": 1.893, "step": 10599 }, { "epoch": 0.35, "grad_norm": 0.4580724239349365, "learning_rate": 0.000579773195508774, "loss": 1.8794, "step": 10600 }, { "epoch": 0.35, "grad_norm": 0.4420745074748993, "learning_rate": 0.0005797694224355244, "loss": 1.88, "step": 10601 }, { "epoch": 0.35, "grad_norm": 0.453948438167572, "learning_rate": 0.0005797656490226761, "loss": 1.9347, "step": 10602 }, { "epoch": 0.35, "grad_norm": 0.4519580602645874, "learning_rate": 0.0005797618752702335, "loss": 1.9084, "step": 10603 }, { "epoch": 0.35, "grad_norm": 0.43343424797058105, "learning_rate": 0.0005797581011782012, "loss": 1.8736, "step": 10604 }, { "epoch": 0.35, "grad_norm": 0.4313114583492279, "learning_rate": 0.0005797543267465837, "loss": 1.9903, "step": 10605 }, { "epoch": 0.35, "grad_norm": 0.44771090149879456, "learning_rate": 0.0005797505519753858, "loss": 1.8694, "step": 10606 }, { "epoch": 0.35, "grad_norm": 0.4524343013763428, "learning_rate": 0.0005797467768646118, "loss": 1.9286, "step": 10607 }, { "epoch": 0.35, "grad_norm": 0.4453749656677246, "learning_rate": 0.0005797430014142665, "loss": 1.8291, "step": 10608 }, { "epoch": 0.35, "grad_norm": 0.4310527741909027, "learning_rate": 0.0005797392256243544, "loss": 1.848, "step": 10609 }, { "epoch": 0.35, "grad_norm": 0.44703590869903564, "learning_rate": 0.0005797354494948802, "loss": 1.9084, "step": 10610 }, { "epoch": 0.35, "grad_norm": 0.4518510103225708, "learning_rate": 0.0005797316730258483, "loss": 1.9004, "step": 10611 }, { "epoch": 0.35, "grad_norm": 0.44428935647010803, "learning_rate": 0.0005797278962172633, "loss": 1.7989, "step": 10612 }, { "epoch": 0.35, "grad_norm": 0.4459054172039032, "learning_rate": 0.00057972411906913, "loss": 1.9358, "step": 10613 }, { "epoch": 0.35, "grad_norm": 0.4321691393852234, "learning_rate": 0.0005797203415814527, "loss": 1.8969, "step": 10614 }, { "epoch": 0.35, "grad_norm": 0.4492385983467102, "learning_rate": 0.0005797165637542363, "loss": 1.8474, "step": 10615 }, { "epoch": 0.35, "grad_norm": 0.4574967622756958, "learning_rate": 0.000579712785587485, "loss": 1.9784, "step": 10616 }, { "epoch": 0.35, "grad_norm": 0.4444981813430786, "learning_rate": 0.0005797090070812038, "loss": 1.8934, "step": 10617 }, { "epoch": 0.35, "grad_norm": 0.44825446605682373, "learning_rate": 0.0005797052282353967, "loss": 1.7746, "step": 10618 }, { "epoch": 0.35, "grad_norm": 0.4706335663795471, "learning_rate": 0.000579701449050069, "loss": 1.9181, "step": 10619 }, { "epoch": 0.35, "grad_norm": 0.4384963810443878, "learning_rate": 0.0005796976695252249, "loss": 1.8806, "step": 10620 }, { "epoch": 0.35, "grad_norm": 0.44602733850479126, "learning_rate": 0.000579693889660869, "loss": 1.8817, "step": 10621 }, { "epoch": 0.35, "grad_norm": 0.4489690959453583, "learning_rate": 0.000579690109457006, "loss": 1.8955, "step": 10622 }, { "epoch": 0.35, "grad_norm": 0.4770117700099945, "learning_rate": 0.0005796863289136404, "loss": 1.8391, "step": 10623 }, { "epoch": 0.35, "grad_norm": 0.5127845406532288, "learning_rate": 0.0005796825480307767, "loss": 1.9428, "step": 10624 }, { "epoch": 0.35, "grad_norm": 0.4490415155887604, "learning_rate": 0.0005796787668084196, "loss": 1.8433, "step": 10625 }, { "epoch": 0.35, "grad_norm": 0.47943469882011414, "learning_rate": 0.0005796749852465739, "loss": 1.9337, "step": 10626 }, { "epoch": 0.35, "grad_norm": 0.47441741824150085, "learning_rate": 0.0005796712033452437, "loss": 1.9515, "step": 10627 }, { "epoch": 0.35, "grad_norm": 0.44843074679374695, "learning_rate": 0.0005796674211044341, "loss": 1.8975, "step": 10628 }, { "epoch": 0.35, "grad_norm": 0.5053712725639343, "learning_rate": 0.0005796636385241493, "loss": 1.9292, "step": 10629 }, { "epoch": 0.35, "grad_norm": 0.552543580532074, "learning_rate": 0.000579659855604394, "loss": 1.8648, "step": 10630 }, { "epoch": 0.35, "grad_norm": 0.46151524782180786, "learning_rate": 0.000579656072345173, "loss": 1.9349, "step": 10631 }, { "epoch": 0.35, "grad_norm": 0.4584703743457794, "learning_rate": 0.0005796522887464907, "loss": 1.9131, "step": 10632 }, { "epoch": 0.35, "grad_norm": 0.44859567284584045, "learning_rate": 0.0005796485048083517, "loss": 1.9373, "step": 10633 }, { "epoch": 0.35, "grad_norm": 0.4876210689544678, "learning_rate": 0.0005796447205307606, "loss": 1.8251, "step": 10634 }, { "epoch": 0.35, "grad_norm": 0.4702901840209961, "learning_rate": 0.000579640935913722, "loss": 1.877, "step": 10635 }, { "epoch": 0.35, "grad_norm": 0.4486705958843231, "learning_rate": 0.0005796371509572407, "loss": 1.9168, "step": 10636 }, { "epoch": 0.35, "grad_norm": 0.4696379005908966, "learning_rate": 0.0005796333656613209, "loss": 1.8873, "step": 10637 }, { "epoch": 0.35, "grad_norm": 0.5143442153930664, "learning_rate": 0.0005796295800259674, "loss": 1.9065, "step": 10638 }, { "epoch": 0.35, "grad_norm": 0.46998167037963867, "learning_rate": 0.0005796257940511848, "loss": 1.8645, "step": 10639 }, { "epoch": 0.35, "grad_norm": 0.4544202983379364, "learning_rate": 0.0005796220077369776, "loss": 1.8256, "step": 10640 }, { "epoch": 0.35, "grad_norm": 0.4689152240753174, "learning_rate": 0.0005796182210833507, "loss": 1.8915, "step": 10641 }, { "epoch": 0.35, "grad_norm": 0.5042526721954346, "learning_rate": 0.0005796144340903084, "loss": 1.9174, "step": 10642 }, { "epoch": 0.35, "grad_norm": 0.45675358176231384, "learning_rate": 0.0005796106467578553, "loss": 1.8588, "step": 10643 }, { "epoch": 0.35, "grad_norm": 0.452683687210083, "learning_rate": 0.0005796068590859962, "loss": 1.9142, "step": 10644 }, { "epoch": 0.35, "grad_norm": 0.49103105068206787, "learning_rate": 0.0005796030710747354, "loss": 1.8817, "step": 10645 }, { "epoch": 0.35, "grad_norm": 0.4920271933078766, "learning_rate": 0.0005795992827240779, "loss": 1.8768, "step": 10646 }, { "epoch": 0.35, "grad_norm": 0.42916205525398254, "learning_rate": 0.0005795954940340279, "loss": 1.9405, "step": 10647 }, { "epoch": 0.35, "grad_norm": 0.46487218141555786, "learning_rate": 0.0005795917050045901, "loss": 1.818, "step": 10648 }, { "epoch": 0.35, "grad_norm": 0.5305385589599609, "learning_rate": 0.0005795879156357694, "loss": 1.9003, "step": 10649 }, { "epoch": 0.35, "grad_norm": 0.45639654994010925, "learning_rate": 0.00057958412592757, "loss": 1.8936, "step": 10650 }, { "epoch": 0.35, "grad_norm": 0.446490079164505, "learning_rate": 0.0005795803358799967, "loss": 1.9153, "step": 10651 }, { "epoch": 0.35, "grad_norm": 0.47283878922462463, "learning_rate": 0.000579576545493054, "loss": 1.8941, "step": 10652 }, { "epoch": 0.35, "grad_norm": 0.47253045439720154, "learning_rate": 0.0005795727547667467, "loss": 1.8938, "step": 10653 }, { "epoch": 0.35, "grad_norm": 0.46204158663749695, "learning_rate": 0.0005795689637010791, "loss": 1.8487, "step": 10654 }, { "epoch": 0.35, "grad_norm": 0.4610157012939453, "learning_rate": 0.0005795651722960561, "loss": 1.8983, "step": 10655 }, { "epoch": 0.35, "grad_norm": 0.45226970314979553, "learning_rate": 0.0005795613805516822, "loss": 1.9238, "step": 10656 }, { "epoch": 0.35, "grad_norm": 0.4526614546775818, "learning_rate": 0.0005795575884679618, "loss": 1.8745, "step": 10657 }, { "epoch": 0.35, "grad_norm": 0.43890976905822754, "learning_rate": 0.0005795537960448999, "loss": 1.915, "step": 10658 }, { "epoch": 0.35, "grad_norm": 0.4419938623905182, "learning_rate": 0.0005795500032825008, "loss": 1.8786, "step": 10659 }, { "epoch": 0.35, "grad_norm": 0.4499439597129822, "learning_rate": 0.0005795462101807692, "loss": 1.9794, "step": 10660 }, { "epoch": 0.35, "grad_norm": 0.4484931528568268, "learning_rate": 0.0005795424167397096, "loss": 1.9074, "step": 10661 }, { "epoch": 0.35, "grad_norm": 0.44421228766441345, "learning_rate": 0.0005795386229593267, "loss": 1.943, "step": 10662 }, { "epoch": 0.35, "grad_norm": 0.4576515853404999, "learning_rate": 0.0005795348288396252, "loss": 1.9354, "step": 10663 }, { "epoch": 0.35, "grad_norm": 0.43641531467437744, "learning_rate": 0.0005795310343806096, "loss": 1.9007, "step": 10664 }, { "epoch": 0.35, "grad_norm": 0.4380103647708893, "learning_rate": 0.0005795272395822844, "loss": 1.8848, "step": 10665 }, { "epoch": 0.35, "grad_norm": 0.4401785433292389, "learning_rate": 0.0005795234444446543, "loss": 1.8833, "step": 10666 }, { "epoch": 0.35, "grad_norm": 0.44525423645973206, "learning_rate": 0.0005795196489677241, "loss": 1.8555, "step": 10667 }, { "epoch": 0.35, "grad_norm": 0.439413458108902, "learning_rate": 0.0005795158531514982, "loss": 1.8762, "step": 10668 }, { "epoch": 0.35, "grad_norm": 0.4438837170600891, "learning_rate": 0.0005795120569959812, "loss": 1.8954, "step": 10669 }, { "epoch": 0.35, "grad_norm": 0.4506697952747345, "learning_rate": 0.0005795082605011777, "loss": 1.8231, "step": 10670 }, { "epoch": 0.36, "grad_norm": 0.44253331422805786, "learning_rate": 0.0005795044636670922, "loss": 1.875, "step": 10671 }, { "epoch": 0.36, "grad_norm": 0.44651198387145996, "learning_rate": 0.0005795006664937297, "loss": 1.9237, "step": 10672 }, { "epoch": 0.36, "grad_norm": 0.4705500304698944, "learning_rate": 0.0005794968689810946, "loss": 1.8724, "step": 10673 }, { "epoch": 0.36, "grad_norm": 0.4337978661060333, "learning_rate": 0.0005794930711291913, "loss": 1.876, "step": 10674 }, { "epoch": 0.36, "grad_norm": 0.4838900864124298, "learning_rate": 0.0005794892729380248, "loss": 1.8941, "step": 10675 }, { "epoch": 0.36, "grad_norm": 0.45786747336387634, "learning_rate": 0.0005794854744075995, "loss": 1.9433, "step": 10676 }, { "epoch": 0.36, "grad_norm": 0.4693374037742615, "learning_rate": 0.0005794816755379198, "loss": 1.8408, "step": 10677 }, { "epoch": 0.36, "grad_norm": 0.4771139919757843, "learning_rate": 0.0005794778763289906, "loss": 2.0028, "step": 10678 }, { "epoch": 0.36, "grad_norm": 0.4673018753528595, "learning_rate": 0.0005794740767808166, "loss": 1.8617, "step": 10679 }, { "epoch": 0.36, "grad_norm": 0.4494476914405823, "learning_rate": 0.0005794702768934021, "loss": 1.8937, "step": 10680 }, { "epoch": 0.36, "grad_norm": 0.4380241930484772, "learning_rate": 0.0005794664766667518, "loss": 1.8483, "step": 10681 }, { "epoch": 0.36, "grad_norm": 0.46623075008392334, "learning_rate": 0.0005794626761008705, "loss": 1.841, "step": 10682 }, { "epoch": 0.36, "grad_norm": 0.45585042238235474, "learning_rate": 0.0005794588751957627, "loss": 1.8879, "step": 10683 }, { "epoch": 0.36, "grad_norm": 0.4489443302154541, "learning_rate": 0.0005794550739514329, "loss": 1.8653, "step": 10684 }, { "epoch": 0.36, "grad_norm": 0.4429108202457428, "learning_rate": 0.0005794512723678859, "loss": 1.8997, "step": 10685 }, { "epoch": 0.36, "grad_norm": 0.44574686884880066, "learning_rate": 0.0005794474704451262, "loss": 1.9559, "step": 10686 }, { "epoch": 0.36, "grad_norm": 0.469360888004303, "learning_rate": 0.0005794436681831584, "loss": 1.8078, "step": 10687 }, { "epoch": 0.36, "grad_norm": 0.7087701559066772, "learning_rate": 0.0005794398655819872, "loss": 1.8916, "step": 10688 }, { "epoch": 0.36, "grad_norm": 0.4479856789112091, "learning_rate": 0.0005794360626416172, "loss": 1.8683, "step": 10689 }, { "epoch": 0.36, "grad_norm": 0.4614717960357666, "learning_rate": 0.0005794322593620529, "loss": 1.846, "step": 10690 }, { "epoch": 0.36, "grad_norm": 0.44838112592697144, "learning_rate": 0.0005794284557432992, "loss": 1.8815, "step": 10691 }, { "epoch": 0.36, "grad_norm": 0.45600539445877075, "learning_rate": 0.0005794246517853604, "loss": 1.9335, "step": 10692 }, { "epoch": 0.36, "grad_norm": 0.4645812213420868, "learning_rate": 0.0005794208474882412, "loss": 1.7964, "step": 10693 }, { "epoch": 0.36, "grad_norm": 0.4664761424064636, "learning_rate": 0.0005794170428519463, "loss": 1.8611, "step": 10694 }, { "epoch": 0.36, "grad_norm": 0.48588764667510986, "learning_rate": 0.0005794132378764803, "loss": 1.898, "step": 10695 }, { "epoch": 0.36, "grad_norm": 0.4526086151599884, "learning_rate": 0.0005794094325618478, "loss": 1.8572, "step": 10696 }, { "epoch": 0.36, "grad_norm": 0.4509952664375305, "learning_rate": 0.0005794056269080534, "loss": 1.93, "step": 10697 }, { "epoch": 0.36, "grad_norm": 0.44618555903434753, "learning_rate": 0.0005794018209151019, "loss": 1.8388, "step": 10698 }, { "epoch": 0.36, "grad_norm": 0.48920443654060364, "learning_rate": 0.0005793980145829975, "loss": 1.9509, "step": 10699 }, { "epoch": 0.36, "grad_norm": 0.4762386381626129, "learning_rate": 0.0005793942079117452, "loss": 1.9539, "step": 10700 }, { "epoch": 0.36, "grad_norm": 0.4633607864379883, "learning_rate": 0.0005793904009013495, "loss": 1.8565, "step": 10701 }, { "epoch": 0.36, "grad_norm": 0.4418235421180725, "learning_rate": 0.000579386593551815, "loss": 1.8558, "step": 10702 }, { "epoch": 0.36, "grad_norm": 0.4700528085231781, "learning_rate": 0.0005793827858631464, "loss": 1.8602, "step": 10703 }, { "epoch": 0.36, "grad_norm": 0.4559670090675354, "learning_rate": 0.0005793789778353481, "loss": 1.8878, "step": 10704 }, { "epoch": 0.36, "grad_norm": 0.45215651392936707, "learning_rate": 0.0005793751694684251, "loss": 1.9759, "step": 10705 }, { "epoch": 0.36, "grad_norm": 0.4482315480709076, "learning_rate": 0.0005793713607623816, "loss": 1.9168, "step": 10706 }, { "epoch": 0.36, "grad_norm": 0.4546149969100952, "learning_rate": 0.0005793675517172226, "loss": 1.9503, "step": 10707 }, { "epoch": 0.36, "grad_norm": 0.46746864914894104, "learning_rate": 0.0005793637423329524, "loss": 1.9395, "step": 10708 }, { "epoch": 0.36, "grad_norm": 0.4352663457393646, "learning_rate": 0.0005793599326095759, "loss": 1.9718, "step": 10709 }, { "epoch": 0.36, "grad_norm": 0.47126367688179016, "learning_rate": 0.0005793561225470976, "loss": 1.9706, "step": 10710 }, { "epoch": 0.36, "grad_norm": 0.45880305767059326, "learning_rate": 0.0005793523121455221, "loss": 1.8735, "step": 10711 }, { "epoch": 0.36, "grad_norm": 0.44019925594329834, "learning_rate": 0.0005793485014048541, "loss": 1.8747, "step": 10712 }, { "epoch": 0.36, "grad_norm": 0.4351218640804291, "learning_rate": 0.0005793446903250982, "loss": 1.8372, "step": 10713 }, { "epoch": 0.36, "grad_norm": 0.439204603433609, "learning_rate": 0.0005793408789062588, "loss": 1.9126, "step": 10714 }, { "epoch": 0.36, "grad_norm": 0.4592209458351135, "learning_rate": 0.0005793370671483409, "loss": 1.9231, "step": 10715 }, { "epoch": 0.36, "grad_norm": 0.4403083324432373, "learning_rate": 0.000579333255051349, "loss": 1.8923, "step": 10716 }, { "epoch": 0.36, "grad_norm": 0.4474649727344513, "learning_rate": 0.0005793294426152875, "loss": 1.8993, "step": 10717 }, { "epoch": 0.36, "grad_norm": 0.45993486046791077, "learning_rate": 0.0005793256298401614, "loss": 1.983, "step": 10718 }, { "epoch": 0.36, "grad_norm": 0.4459322988986969, "learning_rate": 0.0005793218167259751, "loss": 1.9613, "step": 10719 }, { "epoch": 0.36, "grad_norm": 0.45671412348747253, "learning_rate": 0.0005793180032727332, "loss": 1.8727, "step": 10720 }, { "epoch": 0.36, "grad_norm": 0.45040732622146606, "learning_rate": 0.0005793141894804405, "loss": 1.8379, "step": 10721 }, { "epoch": 0.36, "grad_norm": 0.4391951560974121, "learning_rate": 0.0005793103753491014, "loss": 1.9068, "step": 10722 }, { "epoch": 0.36, "grad_norm": 0.4510685205459595, "learning_rate": 0.0005793065608787208, "loss": 1.8309, "step": 10723 }, { "epoch": 0.36, "grad_norm": 0.43038809299468994, "learning_rate": 0.0005793027460693031, "loss": 1.8996, "step": 10724 }, { "epoch": 0.36, "grad_norm": 0.4532757103443146, "learning_rate": 0.000579298930920853, "loss": 1.9376, "step": 10725 }, { "epoch": 0.36, "grad_norm": 0.4295446574687958, "learning_rate": 0.0005792951154333752, "loss": 1.8961, "step": 10726 }, { "epoch": 0.36, "grad_norm": 0.4466674327850342, "learning_rate": 0.0005792912996068743, "loss": 1.8796, "step": 10727 }, { "epoch": 0.36, "grad_norm": 0.45143720507621765, "learning_rate": 0.0005792874834413548, "loss": 1.9057, "step": 10728 }, { "epoch": 0.36, "grad_norm": 0.45126619935035706, "learning_rate": 0.0005792836669368215, "loss": 1.885, "step": 10729 }, { "epoch": 0.36, "grad_norm": 0.45025163888931274, "learning_rate": 0.0005792798500932791, "loss": 1.8902, "step": 10730 }, { "epoch": 0.36, "grad_norm": 0.43739789724349976, "learning_rate": 0.000579276032910732, "loss": 1.8826, "step": 10731 }, { "epoch": 0.36, "grad_norm": 0.45946863293647766, "learning_rate": 0.0005792722153891849, "loss": 1.8829, "step": 10732 }, { "epoch": 0.36, "grad_norm": 0.46203216910362244, "learning_rate": 0.0005792683975286427, "loss": 1.8668, "step": 10733 }, { "epoch": 0.36, "grad_norm": 0.44697606563568115, "learning_rate": 0.0005792645793291097, "loss": 1.9209, "step": 10734 }, { "epoch": 0.36, "grad_norm": 0.4478580057621002, "learning_rate": 0.0005792607607905905, "loss": 2.0125, "step": 10735 }, { "epoch": 0.36, "grad_norm": 0.45358431339263916, "learning_rate": 0.00057925694191309, "loss": 2.0102, "step": 10736 }, { "epoch": 0.36, "grad_norm": 0.44096025824546814, "learning_rate": 0.0005792531226966127, "loss": 1.8812, "step": 10737 }, { "epoch": 0.36, "grad_norm": 0.464722603559494, "learning_rate": 0.0005792493031411633, "loss": 1.9515, "step": 10738 }, { "epoch": 0.36, "grad_norm": 0.44495394825935364, "learning_rate": 0.0005792454832467464, "loss": 1.8798, "step": 10739 }, { "epoch": 0.36, "grad_norm": 0.4412825107574463, "learning_rate": 0.0005792416630133664, "loss": 1.9007, "step": 10740 }, { "epoch": 0.36, "grad_norm": 0.47942498326301575, "learning_rate": 0.0005792378424410285, "loss": 1.8994, "step": 10741 }, { "epoch": 0.36, "grad_norm": 0.4370187222957611, "learning_rate": 0.0005792340215297367, "loss": 1.8534, "step": 10742 }, { "epoch": 0.36, "grad_norm": 0.4481605887413025, "learning_rate": 0.0005792302002794962, "loss": 1.9258, "step": 10743 }, { "epoch": 0.36, "grad_norm": 0.4398043155670166, "learning_rate": 0.0005792263786903113, "loss": 1.8545, "step": 10744 }, { "epoch": 0.36, "grad_norm": 0.45319104194641113, "learning_rate": 0.0005792225567621867, "loss": 1.928, "step": 10745 }, { "epoch": 0.36, "grad_norm": 0.437173068523407, "learning_rate": 0.000579218734495127, "loss": 1.8351, "step": 10746 }, { "epoch": 0.36, "grad_norm": 0.45701834559440613, "learning_rate": 0.0005792149118891369, "loss": 1.9019, "step": 10747 }, { "epoch": 0.36, "grad_norm": 0.4481047987937927, "learning_rate": 0.0005792110889442211, "loss": 1.9554, "step": 10748 }, { "epoch": 0.36, "grad_norm": 0.46775808930397034, "learning_rate": 0.0005792072656603842, "loss": 1.8827, "step": 10749 }, { "epoch": 0.36, "grad_norm": 0.45188596844673157, "learning_rate": 0.0005792034420376308, "loss": 1.9679, "step": 10750 }, { "epoch": 0.36, "grad_norm": 0.44096890091896057, "learning_rate": 0.0005791996180759656, "loss": 1.911, "step": 10751 }, { "epoch": 0.36, "grad_norm": 0.45891568064689636, "learning_rate": 0.0005791957937753931, "loss": 1.8426, "step": 10752 }, { "epoch": 0.36, "grad_norm": 0.45381489396095276, "learning_rate": 0.0005791919691359181, "loss": 1.9312, "step": 10753 }, { "epoch": 0.36, "grad_norm": 0.48088714480400085, "learning_rate": 0.0005791881441575452, "loss": 1.905, "step": 10754 }, { "epoch": 0.36, "grad_norm": 0.43663865327835083, "learning_rate": 0.000579184318840279, "loss": 1.8789, "step": 10755 }, { "epoch": 0.36, "grad_norm": 0.4471091330051422, "learning_rate": 0.0005791804931841241, "loss": 1.861, "step": 10756 }, { "epoch": 0.36, "grad_norm": 0.46353432536125183, "learning_rate": 0.0005791766671890854, "loss": 2.0092, "step": 10757 }, { "epoch": 0.36, "grad_norm": 0.46839287877082825, "learning_rate": 0.0005791728408551672, "loss": 1.9758, "step": 10758 }, { "epoch": 0.36, "grad_norm": 0.44510334730148315, "learning_rate": 0.0005791690141823744, "loss": 1.8459, "step": 10759 }, { "epoch": 0.36, "grad_norm": 0.4490812122821808, "learning_rate": 0.0005791651871707115, "loss": 1.8813, "step": 10760 }, { "epoch": 0.36, "grad_norm": 0.4684702157974243, "learning_rate": 0.0005791613598201831, "loss": 1.8508, "step": 10761 }, { "epoch": 0.36, "grad_norm": 0.46234625577926636, "learning_rate": 0.000579157532130794, "loss": 1.9385, "step": 10762 }, { "epoch": 0.36, "grad_norm": 0.45116662979125977, "learning_rate": 0.000579153704102549, "loss": 1.9147, "step": 10763 }, { "epoch": 0.36, "grad_norm": 0.4567280411720276, "learning_rate": 0.0005791498757354523, "loss": 1.9241, "step": 10764 }, { "epoch": 0.36, "grad_norm": 0.44831106066703796, "learning_rate": 0.0005791460470295089, "loss": 1.9154, "step": 10765 }, { "epoch": 0.36, "grad_norm": 0.45073702931404114, "learning_rate": 0.0005791422179847232, "loss": 1.8786, "step": 10766 }, { "epoch": 0.36, "grad_norm": 0.4588198661804199, "learning_rate": 0.0005791383886011001, "loss": 1.8693, "step": 10767 }, { "epoch": 0.36, "grad_norm": 0.4426991045475006, "learning_rate": 0.0005791345588786441, "loss": 1.9128, "step": 10768 }, { "epoch": 0.36, "grad_norm": 0.4572407901287079, "learning_rate": 0.0005791307288173598, "loss": 1.9491, "step": 10769 }, { "epoch": 0.36, "grad_norm": 0.4571802318096161, "learning_rate": 0.0005791268984172521, "loss": 1.836, "step": 10770 }, { "epoch": 0.36, "grad_norm": 0.4426925778388977, "learning_rate": 0.0005791230676783253, "loss": 1.8993, "step": 10771 }, { "epoch": 0.36, "grad_norm": 0.4845992624759674, "learning_rate": 0.0005791192366005842, "loss": 1.8345, "step": 10772 }, { "epoch": 0.36, "grad_norm": 0.4405134618282318, "learning_rate": 0.0005791154051840336, "loss": 1.9247, "step": 10773 }, { "epoch": 0.36, "grad_norm": 0.437068372964859, "learning_rate": 0.0005791115734286779, "loss": 1.8528, "step": 10774 }, { "epoch": 0.36, "grad_norm": 0.4325276017189026, "learning_rate": 0.000579107741334522, "loss": 1.9486, "step": 10775 }, { "epoch": 0.36, "grad_norm": 0.4609341323375702, "learning_rate": 0.0005791039089015704, "loss": 1.9406, "step": 10776 }, { "epoch": 0.36, "grad_norm": 0.44146928191185, "learning_rate": 0.0005791000761298277, "loss": 1.9081, "step": 10777 }, { "epoch": 0.36, "grad_norm": 0.446544885635376, "learning_rate": 0.0005790962430192987, "loss": 1.9126, "step": 10778 }, { "epoch": 0.36, "grad_norm": 0.4254537522792816, "learning_rate": 0.000579092409569988, "loss": 1.9119, "step": 10779 }, { "epoch": 0.36, "grad_norm": 0.43366512656211853, "learning_rate": 0.0005790885757819002, "loss": 1.9162, "step": 10780 }, { "epoch": 0.36, "grad_norm": 0.43938687443733215, "learning_rate": 0.00057908474165504, "loss": 1.8926, "step": 10781 }, { "epoch": 0.36, "grad_norm": 0.4595763683319092, "learning_rate": 0.000579080907189412, "loss": 1.8231, "step": 10782 }, { "epoch": 0.36, "grad_norm": 0.43604570627212524, "learning_rate": 0.000579077072385021, "loss": 1.896, "step": 10783 }, { "epoch": 0.36, "grad_norm": 0.43689873814582825, "learning_rate": 0.0005790732372418714, "loss": 1.8633, "step": 10784 }, { "epoch": 0.36, "grad_norm": 0.4515834450721741, "learning_rate": 0.0005790694017599682, "loss": 1.8197, "step": 10785 }, { "epoch": 0.36, "grad_norm": 0.4475829601287842, "learning_rate": 0.0005790655659393157, "loss": 1.8891, "step": 10786 }, { "epoch": 0.36, "grad_norm": 0.4409138560295105, "learning_rate": 0.0005790617297799189, "loss": 1.8624, "step": 10787 }, { "epoch": 0.36, "grad_norm": 0.455031156539917, "learning_rate": 0.0005790578932817821, "loss": 1.865, "step": 10788 }, { "epoch": 0.36, "grad_norm": 0.43908900022506714, "learning_rate": 0.0005790540564449101, "loss": 1.9451, "step": 10789 }, { "epoch": 0.36, "grad_norm": 0.44365477561950684, "learning_rate": 0.0005790502192693078, "loss": 1.954, "step": 10790 }, { "epoch": 0.36, "grad_norm": 0.43726882338523865, "learning_rate": 0.0005790463817549796, "loss": 1.8787, "step": 10791 }, { "epoch": 0.36, "grad_norm": 0.4464501142501831, "learning_rate": 0.0005790425439019301, "loss": 1.9311, "step": 10792 }, { "epoch": 0.36, "grad_norm": 0.4583630859851837, "learning_rate": 0.0005790387057101642, "loss": 1.9162, "step": 10793 }, { "epoch": 0.36, "grad_norm": 0.4391700327396393, "learning_rate": 0.0005790348671796863, "loss": 1.9022, "step": 10794 }, { "epoch": 0.36, "grad_norm": 0.42726537585258484, "learning_rate": 0.0005790310283105012, "loss": 1.8748, "step": 10795 }, { "epoch": 0.36, "grad_norm": 0.4379071295261383, "learning_rate": 0.0005790271891026135, "loss": 1.9492, "step": 10796 }, { "epoch": 0.36, "grad_norm": 0.42830559611320496, "learning_rate": 0.000579023349556028, "loss": 1.8831, "step": 10797 }, { "epoch": 0.36, "grad_norm": 0.43989941477775574, "learning_rate": 0.0005790195096707494, "loss": 1.8906, "step": 10798 }, { "epoch": 0.36, "grad_norm": 0.46759602427482605, "learning_rate": 0.000579015669446782, "loss": 1.9344, "step": 10799 }, { "epoch": 0.36, "grad_norm": 0.44510942697525024, "learning_rate": 0.0005790118288841308, "loss": 1.9916, "step": 10800 }, { "epoch": 0.36, "grad_norm": 0.4521051347255707, "learning_rate": 0.0005790079879828003, "loss": 1.8655, "step": 10801 }, { "epoch": 0.36, "grad_norm": 0.4498908817768097, "learning_rate": 0.0005790041467427951, "loss": 1.8169, "step": 10802 }, { "epoch": 0.36, "grad_norm": 0.43974876403808594, "learning_rate": 0.0005790003051641201, "loss": 1.8693, "step": 10803 }, { "epoch": 0.36, "grad_norm": 0.4406810998916626, "learning_rate": 0.0005789964632467798, "loss": 1.9023, "step": 10804 }, { "epoch": 0.36, "grad_norm": 0.43244802951812744, "learning_rate": 0.000578992620990779, "loss": 1.8978, "step": 10805 }, { "epoch": 0.36, "grad_norm": 0.4421728551387787, "learning_rate": 0.0005789887783961223, "loss": 1.9207, "step": 10806 }, { "epoch": 0.36, "grad_norm": 0.467986136674881, "learning_rate": 0.0005789849354628141, "loss": 1.8732, "step": 10807 }, { "epoch": 0.36, "grad_norm": 0.5027390718460083, "learning_rate": 0.0005789810921908594, "loss": 1.8872, "step": 10808 }, { "epoch": 0.36, "grad_norm": 0.445892333984375, "learning_rate": 0.0005789772485802629, "loss": 1.7701, "step": 10809 }, { "epoch": 0.36, "grad_norm": 0.43090394139289856, "learning_rate": 0.0005789734046310291, "loss": 1.8634, "step": 10810 }, { "epoch": 0.36, "grad_norm": 0.4672454297542572, "learning_rate": 0.0005789695603431624, "loss": 1.9194, "step": 10811 }, { "epoch": 0.36, "grad_norm": 0.43528619408607483, "learning_rate": 0.0005789657157166681, "loss": 1.895, "step": 10812 }, { "epoch": 0.36, "grad_norm": 0.44022369384765625, "learning_rate": 0.0005789618707515503, "loss": 1.8958, "step": 10813 }, { "epoch": 0.36, "grad_norm": 0.4591531455516815, "learning_rate": 0.0005789580254478141, "loss": 1.8549, "step": 10814 }, { "epoch": 0.36, "grad_norm": 0.44654422998428345, "learning_rate": 0.0005789541798054638, "loss": 1.8931, "step": 10815 }, { "epoch": 0.36, "grad_norm": 0.42925602197647095, "learning_rate": 0.0005789503338245042, "loss": 1.8988, "step": 10816 }, { "epoch": 0.36, "grad_norm": 0.4419732391834259, "learning_rate": 0.0005789464875049402, "loss": 1.877, "step": 10817 }, { "epoch": 0.36, "grad_norm": 0.4496077001094818, "learning_rate": 0.0005789426408467761, "loss": 1.8201, "step": 10818 }, { "epoch": 0.36, "grad_norm": 0.4384830594062805, "learning_rate": 0.0005789387938500167, "loss": 1.8774, "step": 10819 }, { "epoch": 0.36, "grad_norm": 0.4466562867164612, "learning_rate": 0.0005789349465146668, "loss": 1.8789, "step": 10820 }, { "epoch": 0.36, "grad_norm": 0.4437323808670044, "learning_rate": 0.0005789310988407309, "loss": 1.8355, "step": 10821 }, { "epoch": 0.36, "grad_norm": 0.4364272654056549, "learning_rate": 0.0005789272508282138, "loss": 1.8953, "step": 10822 }, { "epoch": 0.36, "grad_norm": 0.44624063372612, "learning_rate": 0.0005789234024771202, "loss": 1.8917, "step": 10823 }, { "epoch": 0.36, "grad_norm": 0.46945732831954956, "learning_rate": 0.0005789195537874546, "loss": 1.9194, "step": 10824 }, { "epoch": 0.36, "grad_norm": 0.4594326615333557, "learning_rate": 0.0005789157047592216, "loss": 1.9722, "step": 10825 }, { "epoch": 0.36, "grad_norm": 0.4425339996814728, "learning_rate": 0.0005789118553924263, "loss": 1.8975, "step": 10826 }, { "epoch": 0.36, "grad_norm": 0.4273882508277893, "learning_rate": 0.0005789080056870728, "loss": 1.8549, "step": 10827 }, { "epoch": 0.36, "grad_norm": 0.4559010863304138, "learning_rate": 0.0005789041556431664, "loss": 1.9004, "step": 10828 }, { "epoch": 0.36, "grad_norm": 0.44762104749679565, "learning_rate": 0.0005789003052607113, "loss": 1.825, "step": 10829 }, { "epoch": 0.36, "grad_norm": 0.44187769293785095, "learning_rate": 0.0005788964545397124, "loss": 1.9355, "step": 10830 }, { "epoch": 0.36, "grad_norm": 0.4523533284664154, "learning_rate": 0.0005788926034801742, "loss": 1.8881, "step": 10831 }, { "epoch": 0.36, "grad_norm": 0.486559122800827, "learning_rate": 0.0005788887520821014, "loss": 1.9189, "step": 10832 }, { "epoch": 0.36, "grad_norm": 0.4623219072818756, "learning_rate": 0.0005788849003454989, "loss": 1.8952, "step": 10833 }, { "epoch": 0.36, "grad_norm": 0.4393022358417511, "learning_rate": 0.0005788810482703712, "loss": 1.9501, "step": 10834 }, { "epoch": 0.36, "grad_norm": 0.44922980666160583, "learning_rate": 0.000578877195856723, "loss": 1.8426, "step": 10835 }, { "epoch": 0.36, "grad_norm": 0.4683109223842621, "learning_rate": 0.0005788733431045589, "loss": 1.8368, "step": 10836 }, { "epoch": 0.36, "grad_norm": 0.4397587776184082, "learning_rate": 0.0005788694900138837, "loss": 1.9024, "step": 10837 }, { "epoch": 0.36, "grad_norm": 0.5002171397209167, "learning_rate": 0.000578865636584702, "loss": 1.9458, "step": 10838 }, { "epoch": 0.36, "grad_norm": 0.4444107413291931, "learning_rate": 0.0005788617828170185, "loss": 1.8972, "step": 10839 }, { "epoch": 0.36, "grad_norm": 0.4722050130367279, "learning_rate": 0.000578857928710838, "loss": 1.8236, "step": 10840 }, { "epoch": 0.36, "grad_norm": 0.453206330537796, "learning_rate": 0.0005788540742661649, "loss": 1.8734, "step": 10841 }, { "epoch": 0.36, "grad_norm": 0.4373050034046173, "learning_rate": 0.0005788502194830041, "loss": 1.8434, "step": 10842 }, { "epoch": 0.36, "grad_norm": 0.45296531915664673, "learning_rate": 0.0005788463643613604, "loss": 1.891, "step": 10843 }, { "epoch": 0.36, "grad_norm": 0.6757211089134216, "learning_rate": 0.0005788425089012381, "loss": 1.8948, "step": 10844 }, { "epoch": 0.36, "grad_norm": 0.44260329008102417, "learning_rate": 0.000578838653102642, "loss": 1.8061, "step": 10845 }, { "epoch": 0.36, "grad_norm": 0.43882083892822266, "learning_rate": 0.0005788347969655769, "loss": 1.8815, "step": 10846 }, { "epoch": 0.36, "grad_norm": 0.4507473409175873, "learning_rate": 0.0005788309404900476, "loss": 1.9204, "step": 10847 }, { "epoch": 0.36, "grad_norm": 0.46111366152763367, "learning_rate": 0.0005788270836760586, "loss": 1.9445, "step": 10848 }, { "epoch": 0.36, "grad_norm": 0.45985662937164307, "learning_rate": 0.0005788232265236145, "loss": 1.8335, "step": 10849 }, { "epoch": 0.36, "grad_norm": 0.43973109126091003, "learning_rate": 0.0005788193690327201, "loss": 1.8862, "step": 10850 }, { "epoch": 0.36, "grad_norm": 0.4678918719291687, "learning_rate": 0.0005788155112033802, "loss": 1.8665, "step": 10851 }, { "epoch": 0.36, "grad_norm": 0.4626932740211487, "learning_rate": 0.0005788116530355993, "loss": 1.8475, "step": 10852 }, { "epoch": 0.36, "grad_norm": 0.4811170995235443, "learning_rate": 0.0005788077945293819, "loss": 1.9686, "step": 10853 }, { "epoch": 0.36, "grad_norm": 0.4648742973804474, "learning_rate": 0.0005788039356847332, "loss": 1.8847, "step": 10854 }, { "epoch": 0.36, "grad_norm": 0.4856022000312805, "learning_rate": 0.0005788000765016575, "loss": 1.8639, "step": 10855 }, { "epoch": 0.36, "grad_norm": 0.4373324513435364, "learning_rate": 0.0005787962169801596, "loss": 1.9333, "step": 10856 }, { "epoch": 0.36, "grad_norm": 0.4491588771343231, "learning_rate": 0.0005787923571202441, "loss": 1.8955, "step": 10857 }, { "epoch": 0.36, "grad_norm": 0.49656322598457336, "learning_rate": 0.0005787884969219158, "loss": 1.8931, "step": 10858 }, { "epoch": 0.36, "grad_norm": 0.44483283162117004, "learning_rate": 0.0005787846363851794, "loss": 1.8572, "step": 10859 }, { "epoch": 0.36, "grad_norm": 0.44084739685058594, "learning_rate": 0.0005787807755100395, "loss": 1.9622, "step": 10860 }, { "epoch": 0.36, "grad_norm": 0.459310919046402, "learning_rate": 0.0005787769142965007, "loss": 1.9678, "step": 10861 }, { "epoch": 0.36, "grad_norm": 0.48791900277137756, "learning_rate": 0.000578773052744568, "loss": 1.9423, "step": 10862 }, { "epoch": 0.36, "grad_norm": 0.46762678027153015, "learning_rate": 0.0005787691908542458, "loss": 1.9179, "step": 10863 }, { "epoch": 0.36, "grad_norm": 0.4505501985549927, "learning_rate": 0.0005787653286255389, "loss": 1.8944, "step": 10864 }, { "epoch": 0.36, "grad_norm": 0.45658665895462036, "learning_rate": 0.0005787614660584519, "loss": 1.9329, "step": 10865 }, { "epoch": 0.36, "grad_norm": 0.46693211793899536, "learning_rate": 0.0005787576031529896, "loss": 1.895, "step": 10866 }, { "epoch": 0.36, "grad_norm": 0.44020766019821167, "learning_rate": 0.0005787537399091566, "loss": 1.8394, "step": 10867 }, { "epoch": 0.36, "grad_norm": 0.45051491260528564, "learning_rate": 0.0005787498763269578, "loss": 1.9545, "step": 10868 }, { "epoch": 0.36, "grad_norm": 0.45452287793159485, "learning_rate": 0.0005787460124063975, "loss": 1.9684, "step": 10869 }, { "epoch": 0.36, "grad_norm": 0.4424627125263214, "learning_rate": 0.0005787421481474808, "loss": 1.944, "step": 10870 }, { "epoch": 0.36, "grad_norm": 0.43142640590667725, "learning_rate": 0.000578738283550212, "loss": 1.8919, "step": 10871 }, { "epoch": 0.36, "grad_norm": 0.4478438198566437, "learning_rate": 0.0005787344186145961, "loss": 1.8984, "step": 10872 }, { "epoch": 0.36, "grad_norm": 0.4444599151611328, "learning_rate": 0.0005787305533406377, "loss": 1.9419, "step": 10873 }, { "epoch": 0.36, "grad_norm": 0.44737517833709717, "learning_rate": 0.0005787266877283414, "loss": 1.9267, "step": 10874 }, { "epoch": 0.36, "grad_norm": 0.4329347610473633, "learning_rate": 0.000578722821777712, "loss": 1.8782, "step": 10875 }, { "epoch": 0.36, "grad_norm": 0.45780062675476074, "learning_rate": 0.0005787189554887543, "loss": 1.9217, "step": 10876 }, { "epoch": 0.36, "grad_norm": 0.4518924057483673, "learning_rate": 0.0005787150888614727, "loss": 1.9126, "step": 10877 }, { "epoch": 0.36, "grad_norm": 0.45277294516563416, "learning_rate": 0.0005787112218958721, "loss": 1.8925, "step": 10878 }, { "epoch": 0.36, "grad_norm": 0.451306015253067, "learning_rate": 0.0005787073545919572, "loss": 1.869, "step": 10879 }, { "epoch": 0.36, "grad_norm": 0.4482312798500061, "learning_rate": 0.0005787034869497326, "loss": 1.9592, "step": 10880 }, { "epoch": 0.36, "grad_norm": 0.43843919038772583, "learning_rate": 0.0005786996189692029, "loss": 1.8578, "step": 10881 }, { "epoch": 0.36, "grad_norm": 0.4371994137763977, "learning_rate": 0.0005786957506503731, "loss": 1.896, "step": 10882 }, { "epoch": 0.36, "grad_norm": 0.45374366641044617, "learning_rate": 0.0005786918819932476, "loss": 1.9301, "step": 10883 }, { "epoch": 0.36, "grad_norm": 0.4532347619533539, "learning_rate": 0.0005786880129978313, "loss": 1.9349, "step": 10884 }, { "epoch": 0.36, "grad_norm": 0.4464150667190552, "learning_rate": 0.0005786841436641288, "loss": 1.8592, "step": 10885 }, { "epoch": 0.36, "grad_norm": 0.4406767785549164, "learning_rate": 0.0005786802739921448, "loss": 1.8911, "step": 10886 }, { "epoch": 0.36, "grad_norm": 0.4824250340461731, "learning_rate": 0.0005786764039818839, "loss": 1.9172, "step": 10887 }, { "epoch": 0.36, "grad_norm": 0.45173701643943787, "learning_rate": 0.0005786725336333511, "loss": 1.9128, "step": 10888 }, { "epoch": 0.36, "grad_norm": 0.4612303078174591, "learning_rate": 0.0005786686629465508, "loss": 1.9279, "step": 10889 }, { "epoch": 0.36, "grad_norm": 0.48045408725738525, "learning_rate": 0.0005786647919214879, "loss": 1.8597, "step": 10890 }, { "epoch": 0.36, "grad_norm": 0.44581902027130127, "learning_rate": 0.000578660920558167, "loss": 1.847, "step": 10891 }, { "epoch": 0.36, "grad_norm": 0.44266974925994873, "learning_rate": 0.0005786570488565926, "loss": 1.8622, "step": 10892 }, { "epoch": 0.36, "grad_norm": 0.4317820370197296, "learning_rate": 0.0005786531768167697, "loss": 1.8953, "step": 10893 }, { "epoch": 0.36, "grad_norm": 0.4523729085922241, "learning_rate": 0.000578649304438703, "loss": 1.8468, "step": 10894 }, { "epoch": 0.36, "grad_norm": 0.44289395213127136, "learning_rate": 0.000578645431722397, "loss": 1.8297, "step": 10895 }, { "epoch": 0.36, "grad_norm": 0.45067644119262695, "learning_rate": 0.0005786415586678565, "loss": 1.9119, "step": 10896 }, { "epoch": 0.36, "grad_norm": 0.432268887758255, "learning_rate": 0.0005786376852750863, "loss": 1.8613, "step": 10897 }, { "epoch": 0.36, "grad_norm": 0.44808217883110046, "learning_rate": 0.0005786338115440909, "loss": 1.8974, "step": 10898 }, { "epoch": 0.36, "grad_norm": 0.44806158542633057, "learning_rate": 0.0005786299374748752, "loss": 1.8957, "step": 10899 }, { "epoch": 0.36, "grad_norm": 0.46155545115470886, "learning_rate": 0.0005786260630674438, "loss": 1.824, "step": 10900 }, { "epoch": 0.36, "grad_norm": 0.4460800886154175, "learning_rate": 0.0005786221883218013, "loss": 1.8183, "step": 10901 }, { "epoch": 0.36, "grad_norm": 0.4794941544532776, "learning_rate": 0.0005786183132379526, "loss": 1.9136, "step": 10902 }, { "epoch": 0.36, "grad_norm": 0.4431464672088623, "learning_rate": 0.0005786144378159023, "loss": 1.852, "step": 10903 }, { "epoch": 0.36, "grad_norm": 0.44013339281082153, "learning_rate": 0.0005786105620556552, "loss": 1.8918, "step": 10904 }, { "epoch": 0.36, "grad_norm": 0.4480552673339844, "learning_rate": 0.0005786066859572158, "loss": 1.8893, "step": 10905 }, { "epoch": 0.36, "grad_norm": 0.4575522840023041, "learning_rate": 0.000578602809520589, "loss": 1.9762, "step": 10906 }, { "epoch": 0.36, "grad_norm": 0.45838528871536255, "learning_rate": 0.0005785989327457794, "loss": 1.9407, "step": 10907 }, { "epoch": 0.36, "grad_norm": 0.43173423409461975, "learning_rate": 0.0005785950556327919, "loss": 1.9229, "step": 10908 }, { "epoch": 0.36, "grad_norm": 0.43920278549194336, "learning_rate": 0.000578591178181631, "loss": 1.817, "step": 10909 }, { "epoch": 0.36, "grad_norm": 0.4528522789478302, "learning_rate": 0.0005785873003923013, "loss": 1.8853, "step": 10910 }, { "epoch": 0.36, "grad_norm": 0.4509690999984741, "learning_rate": 0.0005785834222648077, "loss": 1.9442, "step": 10911 }, { "epoch": 0.36, "grad_norm": 0.442791610956192, "learning_rate": 0.000578579543799155, "loss": 1.9018, "step": 10912 }, { "epoch": 0.36, "grad_norm": 0.44897788763046265, "learning_rate": 0.0005785756649953476, "loss": 1.8951, "step": 10913 }, { "epoch": 0.36, "grad_norm": 0.4400477111339569, "learning_rate": 0.0005785717858533906, "loss": 1.8744, "step": 10914 }, { "epoch": 0.36, "grad_norm": 0.4573187232017517, "learning_rate": 0.0005785679063732884, "loss": 1.8468, "step": 10915 }, { "epoch": 0.36, "grad_norm": 0.43039438128471375, "learning_rate": 0.0005785640265550458, "loss": 1.8537, "step": 10916 }, { "epoch": 0.36, "grad_norm": 0.45045599341392517, "learning_rate": 0.0005785601463986675, "loss": 1.8702, "step": 10917 }, { "epoch": 0.36, "grad_norm": 0.49067485332489014, "learning_rate": 0.0005785562659041582, "loss": 1.8706, "step": 10918 }, { "epoch": 0.36, "grad_norm": 0.43896016478538513, "learning_rate": 0.0005785523850715227, "loss": 1.8924, "step": 10919 }, { "epoch": 0.36, "grad_norm": 0.4482285976409912, "learning_rate": 0.0005785485039007656, "loss": 1.9394, "step": 10920 }, { "epoch": 0.36, "grad_norm": 0.4529537260532379, "learning_rate": 0.0005785446223918917, "loss": 1.8153, "step": 10921 }, { "epoch": 0.36, "grad_norm": 0.4504835605621338, "learning_rate": 0.0005785407405449057, "loss": 1.9081, "step": 10922 }, { "epoch": 0.36, "grad_norm": 0.46617546677589417, "learning_rate": 0.0005785368583598122, "loss": 1.876, "step": 10923 }, { "epoch": 0.36, "grad_norm": 0.44838282465934753, "learning_rate": 0.000578532975836616, "loss": 1.9349, "step": 10924 }, { "epoch": 0.36, "grad_norm": 0.490985631942749, "learning_rate": 0.0005785290929753219, "loss": 1.9359, "step": 10925 }, { "epoch": 0.36, "grad_norm": 0.4615025818347931, "learning_rate": 0.0005785252097759345, "loss": 1.9273, "step": 10926 }, { "epoch": 0.36, "grad_norm": 0.44967684149742126, "learning_rate": 0.0005785213262384585, "loss": 1.845, "step": 10927 }, { "epoch": 0.36, "grad_norm": 0.4563731253147125, "learning_rate": 0.0005785174423628987, "loss": 1.8854, "step": 10928 }, { "epoch": 0.36, "grad_norm": 0.4556560814380646, "learning_rate": 0.0005785135581492596, "loss": 1.9045, "step": 10929 }, { "epoch": 0.36, "grad_norm": 0.4474823474884033, "learning_rate": 0.0005785096735975463, "loss": 1.8834, "step": 10930 }, { "epoch": 0.36, "grad_norm": 0.4415002167224884, "learning_rate": 0.000578505788707763, "loss": 1.8653, "step": 10931 }, { "epoch": 0.36, "grad_norm": 0.44407424330711365, "learning_rate": 0.0005785019034799149, "loss": 1.8475, "step": 10932 }, { "epoch": 0.36, "grad_norm": 0.4569564759731293, "learning_rate": 0.0005784980179140065, "loss": 1.9725, "step": 10933 }, { "epoch": 0.36, "grad_norm": 0.4396464228630066, "learning_rate": 0.0005784941320100427, "loss": 1.8625, "step": 10934 }, { "epoch": 0.36, "grad_norm": 0.432928204536438, "learning_rate": 0.0005784902457680279, "loss": 1.8478, "step": 10935 }, { "epoch": 0.36, "grad_norm": 0.46831002831459045, "learning_rate": 0.0005784863591879669, "loss": 1.9183, "step": 10936 }, { "epoch": 0.36, "grad_norm": 0.44247955083847046, "learning_rate": 0.0005784824722698647, "loss": 1.7861, "step": 10937 }, { "epoch": 0.36, "grad_norm": 0.44608452916145325, "learning_rate": 0.0005784785850137256, "loss": 1.9255, "step": 10938 }, { "epoch": 0.36, "grad_norm": 0.4426894783973694, "learning_rate": 0.0005784746974195547, "loss": 1.8229, "step": 10939 }, { "epoch": 0.36, "grad_norm": 0.4759967625141144, "learning_rate": 0.0005784708094873564, "loss": 1.8753, "step": 10940 }, { "epoch": 0.36, "grad_norm": 0.4447648823261261, "learning_rate": 0.0005784669212171356, "loss": 1.9022, "step": 10941 }, { "epoch": 0.36, "grad_norm": 0.4606727361679077, "learning_rate": 0.000578463032608897, "loss": 1.9129, "step": 10942 }, { "epoch": 0.36, "grad_norm": 0.44646185636520386, "learning_rate": 0.0005784591436626454, "loss": 1.8441, "step": 10943 }, { "epoch": 0.36, "grad_norm": 0.44853806495666504, "learning_rate": 0.0005784552543783853, "loss": 1.8884, "step": 10944 }, { "epoch": 0.36, "grad_norm": 0.4481397569179535, "learning_rate": 0.0005784513647561217, "loss": 1.8963, "step": 10945 }, { "epoch": 0.36, "grad_norm": 0.45037174224853516, "learning_rate": 0.000578447474795859, "loss": 1.8595, "step": 10946 }, { "epoch": 0.36, "grad_norm": 0.44836723804473877, "learning_rate": 0.0005784435844976022, "loss": 1.9088, "step": 10947 }, { "epoch": 0.36, "grad_norm": 0.4460747539997101, "learning_rate": 0.0005784396938613559, "loss": 1.8416, "step": 10948 }, { "epoch": 0.36, "grad_norm": 0.4600425660610199, "learning_rate": 0.0005784358028871248, "loss": 1.9646, "step": 10949 }, { "epoch": 0.36, "grad_norm": 0.4417476952075958, "learning_rate": 0.0005784319115749137, "loss": 1.8698, "step": 10950 }, { "epoch": 0.36, "grad_norm": 0.4450165629386902, "learning_rate": 0.0005784280199247273, "loss": 1.9603, "step": 10951 }, { "epoch": 0.36, "grad_norm": 0.4471643269062042, "learning_rate": 0.0005784241279365702, "loss": 1.9023, "step": 10952 }, { "epoch": 0.36, "grad_norm": 0.4509333372116089, "learning_rate": 0.0005784202356104473, "loss": 1.8654, "step": 10953 }, { "epoch": 0.36, "grad_norm": 0.4477790296077728, "learning_rate": 0.0005784163429463634, "loss": 1.8782, "step": 10954 }, { "epoch": 0.36, "grad_norm": 0.4452369213104248, "learning_rate": 0.0005784124499443228, "loss": 1.8256, "step": 10955 }, { "epoch": 0.36, "grad_norm": 0.4333541691303253, "learning_rate": 0.0005784085566043308, "loss": 1.9123, "step": 10956 }, { "epoch": 0.36, "grad_norm": 0.4303109645843506, "learning_rate": 0.0005784046629263917, "loss": 1.9089, "step": 10957 }, { "epoch": 0.36, "grad_norm": 0.480827271938324, "learning_rate": 0.0005784007689105102, "loss": 1.898, "step": 10958 }, { "epoch": 0.36, "grad_norm": 0.4427747130393982, "learning_rate": 0.0005783968745566914, "loss": 1.9374, "step": 10959 }, { "epoch": 0.36, "grad_norm": 0.4223954975605011, "learning_rate": 0.0005783929798649398, "loss": 1.8885, "step": 10960 }, { "epoch": 0.36, "grad_norm": 0.4702553451061249, "learning_rate": 0.00057838908483526, "loss": 1.9288, "step": 10961 }, { "epoch": 0.36, "grad_norm": 0.462780624628067, "learning_rate": 0.0005783851894676571, "loss": 1.9369, "step": 10962 }, { "epoch": 0.36, "grad_norm": 0.45880669355392456, "learning_rate": 0.0005783812937621354, "loss": 1.8936, "step": 10963 }, { "epoch": 0.36, "grad_norm": 0.4421653747558594, "learning_rate": 0.0005783773977186999, "loss": 1.8593, "step": 10964 }, { "epoch": 0.36, "grad_norm": 0.4275628924369812, "learning_rate": 0.0005783735013373553, "loss": 1.9566, "step": 10965 }, { "epoch": 0.36, "grad_norm": 0.43768221139907837, "learning_rate": 0.0005783696046181063, "loss": 1.9184, "step": 10966 }, { "epoch": 0.36, "grad_norm": 0.43164902925491333, "learning_rate": 0.0005783657075609575, "loss": 1.8958, "step": 10967 }, { "epoch": 0.36, "grad_norm": 0.43013545870780945, "learning_rate": 0.0005783618101659137, "loss": 1.8249, "step": 10968 }, { "epoch": 0.36, "grad_norm": 0.4400789141654968, "learning_rate": 0.0005783579124329798, "loss": 1.8371, "step": 10969 }, { "epoch": 0.36, "grad_norm": 0.4257126748561859, "learning_rate": 0.0005783540143621604, "loss": 1.8781, "step": 10970 }, { "epoch": 0.37, "grad_norm": 0.46108806133270264, "learning_rate": 0.0005783501159534602, "loss": 1.8294, "step": 10971 }, { "epoch": 0.37, "grad_norm": 0.4374244511127472, "learning_rate": 0.0005783462172068841, "loss": 1.8613, "step": 10972 }, { "epoch": 0.37, "grad_norm": 0.4344736635684967, "learning_rate": 0.0005783423181224365, "loss": 1.9104, "step": 10973 }, { "epoch": 0.37, "grad_norm": 0.4429295063018799, "learning_rate": 0.0005783384187001224, "loss": 1.9095, "step": 10974 }, { "epoch": 0.37, "grad_norm": 0.452148973941803, "learning_rate": 0.0005783345189399465, "loss": 1.8999, "step": 10975 }, { "epoch": 0.37, "grad_norm": 0.4328334331512451, "learning_rate": 0.0005783306188419135, "loss": 1.8766, "step": 10976 }, { "epoch": 0.37, "grad_norm": 0.4393317699432373, "learning_rate": 0.0005783267184060282, "loss": 1.9018, "step": 10977 }, { "epoch": 0.37, "grad_norm": 0.44118091464042664, "learning_rate": 0.0005783228176322952, "loss": 1.8528, "step": 10978 }, { "epoch": 0.37, "grad_norm": 0.47855114936828613, "learning_rate": 0.0005783189165207194, "loss": 1.7561, "step": 10979 }, { "epoch": 0.37, "grad_norm": 0.44771453738212585, "learning_rate": 0.0005783150150713054, "loss": 1.8878, "step": 10980 }, { "epoch": 0.37, "grad_norm": 0.4579872190952301, "learning_rate": 0.0005783111132840579, "loss": 1.9556, "step": 10981 }, { "epoch": 0.37, "grad_norm": 0.431506872177124, "learning_rate": 0.0005783072111589817, "loss": 1.8027, "step": 10982 }, { "epoch": 0.37, "grad_norm": 0.44233521819114685, "learning_rate": 0.0005783033086960817, "loss": 1.8576, "step": 10983 }, { "epoch": 0.37, "grad_norm": 0.48086243867874146, "learning_rate": 0.0005782994058953625, "loss": 1.9122, "step": 10984 }, { "epoch": 0.37, "grad_norm": 0.45657479763031006, "learning_rate": 0.0005782955027568286, "loss": 1.8456, "step": 10985 }, { "epoch": 0.37, "grad_norm": 0.4530552327632904, "learning_rate": 0.0005782915992804851, "loss": 1.9137, "step": 10986 }, { "epoch": 0.37, "grad_norm": 0.4401099383831024, "learning_rate": 0.0005782876954663367, "loss": 1.8589, "step": 10987 }, { "epoch": 0.37, "grad_norm": 0.43345946073532104, "learning_rate": 0.0005782837913143879, "loss": 1.9732, "step": 10988 }, { "epoch": 0.37, "grad_norm": 0.456788569688797, "learning_rate": 0.0005782798868246437, "loss": 1.8928, "step": 10989 }, { "epoch": 0.37, "grad_norm": 0.466267466545105, "learning_rate": 0.0005782759819971086, "loss": 1.9833, "step": 10990 }, { "epoch": 0.37, "grad_norm": 0.45053112506866455, "learning_rate": 0.0005782720768317875, "loss": 1.8351, "step": 10991 }, { "epoch": 0.37, "grad_norm": 0.4633118808269501, "learning_rate": 0.0005782681713286851, "loss": 1.8497, "step": 10992 }, { "epoch": 0.37, "grad_norm": 0.44753894209861755, "learning_rate": 0.0005782642654878061, "loss": 1.9183, "step": 10993 }, { "epoch": 0.37, "grad_norm": 0.4414623975753784, "learning_rate": 0.0005782603593091554, "loss": 1.8998, "step": 10994 }, { "epoch": 0.37, "grad_norm": 0.44747424125671387, "learning_rate": 0.0005782564527927375, "loss": 1.9043, "step": 10995 }, { "epoch": 0.37, "grad_norm": 0.4581623077392578, "learning_rate": 0.0005782525459385573, "loss": 1.8567, "step": 10996 }, { "epoch": 0.37, "grad_norm": 0.44888392090797424, "learning_rate": 0.0005782486387466196, "loss": 1.8142, "step": 10997 }, { "epoch": 0.37, "grad_norm": 0.4518095552921295, "learning_rate": 0.000578244731216929, "loss": 1.8724, "step": 10998 }, { "epoch": 0.37, "grad_norm": 0.4576171338558197, "learning_rate": 0.0005782408233494903, "loss": 1.9361, "step": 10999 }, { "epoch": 0.37, "grad_norm": 0.45188361406326294, "learning_rate": 0.0005782369151443081, "loss": 1.89, "step": 11000 }, { "epoch": 0.37, "grad_norm": 0.46806129813194275, "learning_rate": 0.0005782330066013875, "loss": 1.8537, "step": 11001 }, { "epoch": 0.37, "grad_norm": 0.4591907262802124, "learning_rate": 0.0005782290977207329, "loss": 1.8767, "step": 11002 }, { "epoch": 0.37, "grad_norm": 0.44586706161499023, "learning_rate": 0.0005782251885023492, "loss": 1.8945, "step": 11003 }, { "epoch": 0.37, "grad_norm": 0.45619091391563416, "learning_rate": 0.0005782212789462411, "loss": 1.9275, "step": 11004 }, { "epoch": 0.37, "grad_norm": 0.43667933344841003, "learning_rate": 0.0005782173690524133, "loss": 1.8492, "step": 11005 }, { "epoch": 0.37, "grad_norm": 0.4551285207271576, "learning_rate": 0.0005782134588208707, "loss": 1.8403, "step": 11006 }, { "epoch": 0.37, "grad_norm": 0.4357225298881531, "learning_rate": 0.0005782095482516179, "loss": 1.7864, "step": 11007 }, { "epoch": 0.37, "grad_norm": 0.4207611382007599, "learning_rate": 0.0005782056373446597, "loss": 1.9295, "step": 11008 }, { "epoch": 0.37, "grad_norm": 0.4502459168434143, "learning_rate": 0.0005782017261000008, "loss": 1.8751, "step": 11009 }, { "epoch": 0.37, "grad_norm": 0.42730221152305603, "learning_rate": 0.0005781978145176462, "loss": 1.8781, "step": 11010 }, { "epoch": 0.37, "grad_norm": 0.4488740861415863, "learning_rate": 0.0005781939025976004, "loss": 1.9362, "step": 11011 }, { "epoch": 0.37, "grad_norm": 0.44234299659729004, "learning_rate": 0.000578189990339868, "loss": 1.8768, "step": 11012 }, { "epoch": 0.37, "grad_norm": 0.43657466769218445, "learning_rate": 0.0005781860777444541, "loss": 1.8114, "step": 11013 }, { "epoch": 0.37, "grad_norm": 0.42675188183784485, "learning_rate": 0.0005781821648113632, "loss": 1.8014, "step": 11014 }, { "epoch": 0.37, "grad_norm": 0.4387103021144867, "learning_rate": 0.0005781782515406001, "loss": 1.913, "step": 11015 }, { "epoch": 0.37, "grad_norm": 0.4382447302341461, "learning_rate": 0.0005781743379321698, "loss": 1.9269, "step": 11016 }, { "epoch": 0.37, "grad_norm": 0.4377336800098419, "learning_rate": 0.0005781704239860766, "loss": 1.8704, "step": 11017 }, { "epoch": 0.37, "grad_norm": 0.4356444180011749, "learning_rate": 0.0005781665097023257, "loss": 1.8887, "step": 11018 }, { "epoch": 0.37, "grad_norm": 0.4315357208251953, "learning_rate": 0.0005781625950809215, "loss": 1.8468, "step": 11019 }, { "epoch": 0.37, "grad_norm": 0.45297276973724365, "learning_rate": 0.0005781586801218689, "loss": 1.9406, "step": 11020 }, { "epoch": 0.37, "grad_norm": 0.43368834257125854, "learning_rate": 0.0005781547648251727, "loss": 1.8484, "step": 11021 }, { "epoch": 0.37, "grad_norm": 0.4443352222442627, "learning_rate": 0.0005781508491908376, "loss": 1.906, "step": 11022 }, { "epoch": 0.37, "grad_norm": 0.4383983910083771, "learning_rate": 0.0005781469332188683, "loss": 1.8403, "step": 11023 }, { "epoch": 0.37, "grad_norm": 0.4330531656742096, "learning_rate": 0.0005781430169092697, "loss": 1.8004, "step": 11024 }, { "epoch": 0.37, "grad_norm": 0.4389399290084839, "learning_rate": 0.0005781391002620464, "loss": 1.8911, "step": 11025 }, { "epoch": 0.37, "grad_norm": 0.43233343958854675, "learning_rate": 0.0005781351832772032, "loss": 1.8547, "step": 11026 }, { "epoch": 0.37, "grad_norm": 0.4440804421901703, "learning_rate": 0.000578131265954745, "loss": 1.9114, "step": 11027 }, { "epoch": 0.37, "grad_norm": 0.45335653424263, "learning_rate": 0.0005781273482946762, "loss": 1.8764, "step": 11028 }, { "epoch": 0.37, "grad_norm": 0.43704667687416077, "learning_rate": 0.0005781234302970019, "loss": 1.9082, "step": 11029 }, { "epoch": 0.37, "grad_norm": 0.44200658798217773, "learning_rate": 0.0005781195119617267, "loss": 1.9458, "step": 11030 }, { "epoch": 0.37, "grad_norm": 0.4322345554828644, "learning_rate": 0.0005781155932888555, "loss": 1.9357, "step": 11031 }, { "epoch": 0.37, "grad_norm": 0.44052812457084656, "learning_rate": 0.0005781116742783929, "loss": 1.8688, "step": 11032 }, { "epoch": 0.37, "grad_norm": 0.4533918797969818, "learning_rate": 0.0005781077549303436, "loss": 1.8763, "step": 11033 }, { "epoch": 0.37, "grad_norm": 0.43813836574554443, "learning_rate": 0.0005781038352447125, "loss": 1.8993, "step": 11034 }, { "epoch": 0.37, "grad_norm": 0.4374554753303528, "learning_rate": 0.0005780999152215044, "loss": 1.9148, "step": 11035 }, { "epoch": 0.37, "grad_norm": 0.4427371621131897, "learning_rate": 0.0005780959948607239, "loss": 1.9276, "step": 11036 }, { "epoch": 0.37, "grad_norm": 0.45503759384155273, "learning_rate": 0.0005780920741623759, "loss": 1.9555, "step": 11037 }, { "epoch": 0.37, "grad_norm": 0.43233370780944824, "learning_rate": 0.000578088153126465, "loss": 1.9074, "step": 11038 }, { "epoch": 0.37, "grad_norm": 0.43952447175979614, "learning_rate": 0.0005780842317529961, "loss": 1.9177, "step": 11039 }, { "epoch": 0.37, "grad_norm": 0.46367141604423523, "learning_rate": 0.000578080310041974, "loss": 1.9408, "step": 11040 }, { "epoch": 0.37, "grad_norm": 0.4641598165035248, "learning_rate": 0.0005780763879934033, "loss": 1.8818, "step": 11041 }, { "epoch": 0.37, "grad_norm": 0.4370865225791931, "learning_rate": 0.000578072465607289, "loss": 1.8571, "step": 11042 }, { "epoch": 0.37, "grad_norm": 0.4522401988506317, "learning_rate": 0.0005780685428836354, "loss": 1.809, "step": 11043 }, { "epoch": 0.37, "grad_norm": 0.45714521408081055, "learning_rate": 0.0005780646198224478, "loss": 1.9837, "step": 11044 }, { "epoch": 0.37, "grad_norm": 0.4512510597705841, "learning_rate": 0.0005780606964237306, "loss": 1.9588, "step": 11045 }, { "epoch": 0.37, "grad_norm": 0.48099485039711, "learning_rate": 0.0005780567726874887, "loss": 2.0204, "step": 11046 }, { "epoch": 0.37, "grad_norm": 0.4451432526111603, "learning_rate": 0.0005780528486137268, "loss": 1.8865, "step": 11047 }, { "epoch": 0.37, "grad_norm": 0.4436076283454895, "learning_rate": 0.0005780489242024499, "loss": 1.9078, "step": 11048 }, { "epoch": 0.37, "grad_norm": 0.42417991161346436, "learning_rate": 0.0005780449994536624, "loss": 1.8223, "step": 11049 }, { "epoch": 0.37, "grad_norm": 0.460802286863327, "learning_rate": 0.0005780410743673693, "loss": 1.9023, "step": 11050 }, { "epoch": 0.37, "grad_norm": 0.45326676964759827, "learning_rate": 0.0005780371489435752, "loss": 1.8349, "step": 11051 }, { "epoch": 0.37, "grad_norm": 0.44621312618255615, "learning_rate": 0.0005780332231822851, "loss": 1.8551, "step": 11052 }, { "epoch": 0.37, "grad_norm": 0.439434677362442, "learning_rate": 0.0005780292970835037, "loss": 1.888, "step": 11053 }, { "epoch": 0.37, "grad_norm": 0.46624982357025146, "learning_rate": 0.0005780253706472356, "loss": 1.8669, "step": 11054 }, { "epoch": 0.37, "grad_norm": 0.473842591047287, "learning_rate": 0.0005780214438734856, "loss": 1.8775, "step": 11055 }, { "epoch": 0.37, "grad_norm": 0.43097415566444397, "learning_rate": 0.0005780175167622586, "loss": 1.9136, "step": 11056 }, { "epoch": 0.37, "grad_norm": 0.4773615300655365, "learning_rate": 0.0005780135893135594, "loss": 1.9175, "step": 11057 }, { "epoch": 0.37, "grad_norm": 0.4826273024082184, "learning_rate": 0.0005780096615273925, "loss": 1.841, "step": 11058 }, { "epoch": 0.37, "grad_norm": 0.4445168375968933, "learning_rate": 0.0005780057334037629, "loss": 1.8112, "step": 11059 }, { "epoch": 0.37, "grad_norm": 0.4450525939464569, "learning_rate": 0.0005780018049426752, "loss": 1.8455, "step": 11060 }, { "epoch": 0.37, "grad_norm": 0.4824235439300537, "learning_rate": 0.0005779978761441344, "loss": 1.9556, "step": 11061 }, { "epoch": 0.37, "grad_norm": 0.4693088233470917, "learning_rate": 0.0005779939470081451, "loss": 1.8648, "step": 11062 }, { "epoch": 0.37, "grad_norm": 0.4570576846599579, "learning_rate": 0.0005779900175347121, "loss": 1.7926, "step": 11063 }, { "epoch": 0.37, "grad_norm": 0.45447850227355957, "learning_rate": 0.0005779860877238402, "loss": 1.844, "step": 11064 }, { "epoch": 0.37, "grad_norm": 0.46036747097969055, "learning_rate": 0.0005779821575755341, "loss": 1.9243, "step": 11065 }, { "epoch": 0.37, "grad_norm": 0.44357728958129883, "learning_rate": 0.0005779782270897987, "loss": 1.8471, "step": 11066 }, { "epoch": 0.37, "grad_norm": 0.44797489047050476, "learning_rate": 0.0005779742962666386, "loss": 1.8278, "step": 11067 }, { "epoch": 0.37, "grad_norm": 0.45418795943260193, "learning_rate": 0.0005779703651060588, "loss": 1.8766, "step": 11068 }, { "epoch": 0.37, "grad_norm": 0.4765958786010742, "learning_rate": 0.0005779664336080639, "loss": 1.8869, "step": 11069 }, { "epoch": 0.37, "grad_norm": 0.4429786801338196, "learning_rate": 0.0005779625017726586, "loss": 1.8355, "step": 11070 }, { "epoch": 0.37, "grad_norm": 0.4451795220375061, "learning_rate": 0.0005779585695998478, "loss": 1.9295, "step": 11071 }, { "epoch": 0.37, "grad_norm": 0.4773015081882477, "learning_rate": 0.0005779546370896364, "loss": 1.8867, "step": 11072 }, { "epoch": 0.37, "grad_norm": 0.4624050557613373, "learning_rate": 0.0005779507042420288, "loss": 1.9504, "step": 11073 }, { "epoch": 0.37, "grad_norm": 0.44640907645225525, "learning_rate": 0.0005779467710570302, "loss": 1.8847, "step": 11074 }, { "epoch": 0.37, "grad_norm": 0.4718053340911865, "learning_rate": 0.000577942837534645, "loss": 1.833, "step": 11075 }, { "epoch": 0.37, "grad_norm": 0.45833122730255127, "learning_rate": 0.0005779389036748783, "loss": 1.8669, "step": 11076 }, { "epoch": 0.37, "grad_norm": 0.48303064703941345, "learning_rate": 0.0005779349694777345, "loss": 2.0047, "step": 11077 }, { "epoch": 0.37, "grad_norm": 0.4554930031299591, "learning_rate": 0.0005779310349432187, "loss": 1.8355, "step": 11078 }, { "epoch": 0.37, "grad_norm": 0.4371699094772339, "learning_rate": 0.0005779271000713358, "loss": 1.9843, "step": 11079 }, { "epoch": 0.37, "grad_norm": 0.48000600934028625, "learning_rate": 0.0005779231648620902, "loss": 1.8571, "step": 11080 }, { "epoch": 0.37, "grad_norm": 0.45086023211479187, "learning_rate": 0.0005779192293154867, "loss": 1.884, "step": 11081 }, { "epoch": 0.37, "grad_norm": 0.4487181603908539, "learning_rate": 0.0005779152934315304, "loss": 1.9377, "step": 11082 }, { "epoch": 0.37, "grad_norm": 0.4394833743572235, "learning_rate": 0.0005779113572102258, "loss": 1.8434, "step": 11083 }, { "epoch": 0.37, "grad_norm": 0.4557562470436096, "learning_rate": 0.0005779074206515777, "loss": 1.8861, "step": 11084 }, { "epoch": 0.37, "grad_norm": 0.4507256746292114, "learning_rate": 0.000577903483755591, "loss": 1.971, "step": 11085 }, { "epoch": 0.37, "grad_norm": 0.4324779212474823, "learning_rate": 0.0005778995465222704, "loss": 1.871, "step": 11086 }, { "epoch": 0.37, "grad_norm": 0.46897539496421814, "learning_rate": 0.0005778956089516207, "loss": 1.8604, "step": 11087 }, { "epoch": 0.37, "grad_norm": 0.43933531641960144, "learning_rate": 0.0005778916710436467, "loss": 1.8776, "step": 11088 }, { "epoch": 0.37, "grad_norm": 0.44768884778022766, "learning_rate": 0.000577887732798353, "loss": 1.8805, "step": 11089 }, { "epoch": 0.37, "grad_norm": 0.4588521420955658, "learning_rate": 0.0005778837942157448, "loss": 1.9044, "step": 11090 }, { "epoch": 0.37, "grad_norm": 0.45338472723960876, "learning_rate": 0.0005778798552958265, "loss": 1.8708, "step": 11091 }, { "epoch": 0.37, "grad_norm": 0.46911463141441345, "learning_rate": 0.0005778759160386029, "loss": 1.847, "step": 11092 }, { "epoch": 0.37, "grad_norm": 0.47132179141044617, "learning_rate": 0.000577871976444079, "loss": 1.9846, "step": 11093 }, { "epoch": 0.37, "grad_norm": 0.4713456630706787, "learning_rate": 0.0005778680365122595, "loss": 1.9212, "step": 11094 }, { "epoch": 0.37, "grad_norm": 0.4813189208507538, "learning_rate": 0.0005778640962431491, "loss": 1.8364, "step": 11095 }, { "epoch": 0.37, "grad_norm": 0.43592989444732666, "learning_rate": 0.0005778601556367525, "loss": 1.8985, "step": 11096 }, { "epoch": 0.37, "grad_norm": 0.43280869722366333, "learning_rate": 0.0005778562146930748, "loss": 1.917, "step": 11097 }, { "epoch": 0.37, "grad_norm": 0.45891767740249634, "learning_rate": 0.0005778522734121204, "loss": 1.911, "step": 11098 }, { "epoch": 0.37, "grad_norm": 0.4603002667427063, "learning_rate": 0.0005778483317938945, "loss": 1.9719, "step": 11099 }, { "epoch": 0.37, "grad_norm": 0.44270801544189453, "learning_rate": 0.0005778443898384016, "loss": 1.9181, "step": 11100 }, { "epoch": 0.37, "grad_norm": 0.6707302331924438, "learning_rate": 0.0005778404475456465, "loss": 1.9945, "step": 11101 }, { "epoch": 0.37, "grad_norm": 0.4585721492767334, "learning_rate": 0.000577836504915634, "loss": 1.9557, "step": 11102 }, { "epoch": 0.37, "grad_norm": 0.5415447950363159, "learning_rate": 0.0005778325619483689, "loss": 1.8514, "step": 11103 }, { "epoch": 0.37, "grad_norm": 0.4480486810207367, "learning_rate": 0.0005778286186438562, "loss": 1.8445, "step": 11104 }, { "epoch": 0.37, "grad_norm": 0.4468555450439453, "learning_rate": 0.0005778246750021003, "loss": 1.8521, "step": 11105 }, { "epoch": 0.37, "grad_norm": 0.45615354180336, "learning_rate": 0.0005778207310231064, "loss": 1.843, "step": 11106 }, { "epoch": 0.37, "grad_norm": 0.45831114053726196, "learning_rate": 0.0005778167867068788, "loss": 1.9487, "step": 11107 }, { "epoch": 0.37, "grad_norm": 0.4539489448070526, "learning_rate": 0.0005778128420534227, "loss": 1.8049, "step": 11108 }, { "epoch": 0.37, "grad_norm": 0.44222456216812134, "learning_rate": 0.0005778088970627426, "loss": 1.8473, "step": 11109 }, { "epoch": 0.37, "grad_norm": 0.4534558653831482, "learning_rate": 0.0005778049517348436, "loss": 1.9502, "step": 11110 }, { "epoch": 0.37, "grad_norm": 0.445705771446228, "learning_rate": 0.0005778010060697303, "loss": 1.8204, "step": 11111 }, { "epoch": 0.37, "grad_norm": 0.4505317509174347, "learning_rate": 0.0005777970600674075, "loss": 1.8377, "step": 11112 }, { "epoch": 0.37, "grad_norm": 0.43295660614967346, "learning_rate": 0.00057779311372788, "loss": 1.8883, "step": 11113 }, { "epoch": 0.37, "grad_norm": 0.44419440627098083, "learning_rate": 0.0005777891670511525, "loss": 1.8906, "step": 11114 }, { "epoch": 0.37, "grad_norm": 0.45407944917678833, "learning_rate": 0.00057778522003723, "loss": 1.8781, "step": 11115 }, { "epoch": 0.37, "grad_norm": 0.4595178961753845, "learning_rate": 0.0005777812726861172, "loss": 1.9284, "step": 11116 }, { "epoch": 0.37, "grad_norm": 0.47158703207969666, "learning_rate": 0.0005777773249978187, "loss": 1.9012, "step": 11117 }, { "epoch": 0.37, "grad_norm": 0.4520000219345093, "learning_rate": 0.0005777733769723396, "loss": 1.8432, "step": 11118 }, { "epoch": 0.37, "grad_norm": 0.4512135982513428, "learning_rate": 0.0005777694286096844, "loss": 1.8877, "step": 11119 }, { "epoch": 0.37, "grad_norm": 0.43382933735847473, "learning_rate": 0.0005777654799098581, "loss": 1.8645, "step": 11120 }, { "epoch": 0.37, "grad_norm": 0.45089659094810486, "learning_rate": 0.0005777615308728655, "loss": 1.8938, "step": 11121 }, { "epoch": 0.37, "grad_norm": 0.4431844651699066, "learning_rate": 0.0005777575814987114, "loss": 1.9193, "step": 11122 }, { "epoch": 0.37, "grad_norm": 0.4462231397628784, "learning_rate": 0.0005777536317874004, "loss": 1.8638, "step": 11123 }, { "epoch": 0.37, "grad_norm": 0.4320586025714874, "learning_rate": 0.0005777496817389373, "loss": 1.8381, "step": 11124 }, { "epoch": 0.37, "grad_norm": 0.4551845192909241, "learning_rate": 0.0005777457313533272, "loss": 1.8731, "step": 11125 }, { "epoch": 0.37, "grad_norm": 0.45851415395736694, "learning_rate": 0.0005777417806305747, "loss": 1.8616, "step": 11126 }, { "epoch": 0.37, "grad_norm": 0.4508703947067261, "learning_rate": 0.0005777378295706845, "loss": 1.948, "step": 11127 }, { "epoch": 0.37, "grad_norm": 0.4194541871547699, "learning_rate": 0.0005777338781736616, "loss": 1.89, "step": 11128 }, { "epoch": 0.37, "grad_norm": 0.43154266476631165, "learning_rate": 0.0005777299264395106, "loss": 1.8516, "step": 11129 }, { "epoch": 0.37, "grad_norm": 0.4497614800930023, "learning_rate": 0.0005777259743682366, "loss": 1.9523, "step": 11130 }, { "epoch": 0.37, "grad_norm": 0.42855125665664673, "learning_rate": 0.0005777220219598439, "loss": 1.8857, "step": 11131 }, { "epoch": 0.37, "grad_norm": 0.4464229345321655, "learning_rate": 0.0005777180692143377, "loss": 1.8371, "step": 11132 }, { "epoch": 0.37, "grad_norm": 0.4252004027366638, "learning_rate": 0.0005777141161317227, "loss": 1.8405, "step": 11133 }, { "epoch": 0.37, "grad_norm": 0.4375405013561249, "learning_rate": 0.0005777101627120036, "loss": 1.9148, "step": 11134 }, { "epoch": 0.37, "grad_norm": 0.43501317501068115, "learning_rate": 0.0005777062089551855, "loss": 1.9758, "step": 11135 }, { "epoch": 0.37, "grad_norm": 0.4545919597148895, "learning_rate": 0.0005777022548612727, "loss": 1.9057, "step": 11136 }, { "epoch": 0.37, "grad_norm": 0.4307425618171692, "learning_rate": 0.0005776983004302705, "loss": 1.9217, "step": 11137 }, { "epoch": 0.37, "grad_norm": 0.44771504402160645, "learning_rate": 0.0005776943456621834, "loss": 1.9429, "step": 11138 }, { "epoch": 0.37, "grad_norm": 0.43819716572761536, "learning_rate": 0.0005776903905570163, "loss": 1.8303, "step": 11139 }, { "epoch": 0.37, "grad_norm": 0.4435611963272095, "learning_rate": 0.000577686435114774, "loss": 1.8082, "step": 11140 }, { "epoch": 0.37, "grad_norm": 0.4368378520011902, "learning_rate": 0.0005776824793354611, "loss": 1.8921, "step": 11141 }, { "epoch": 0.37, "grad_norm": 0.4461146295070648, "learning_rate": 0.0005776785232190828, "loss": 1.8541, "step": 11142 }, { "epoch": 0.37, "grad_norm": 0.5419483780860901, "learning_rate": 0.0005776745667656436, "loss": 1.9356, "step": 11143 }, { "epoch": 0.37, "grad_norm": 0.46334269642829895, "learning_rate": 0.0005776706099751484, "loss": 1.9434, "step": 11144 }, { "epoch": 0.37, "grad_norm": 0.43986496329307556, "learning_rate": 0.000577666652847602, "loss": 1.9354, "step": 11145 }, { "epoch": 0.37, "grad_norm": 0.4302509129047394, "learning_rate": 0.0005776626953830091, "loss": 1.8343, "step": 11146 }, { "epoch": 0.37, "grad_norm": 0.4493831396102905, "learning_rate": 0.0005776587375813748, "loss": 1.9717, "step": 11147 }, { "epoch": 0.37, "grad_norm": 0.4670679569244385, "learning_rate": 0.0005776547794427036, "loss": 1.8786, "step": 11148 }, { "epoch": 0.37, "grad_norm": 0.45189526677131653, "learning_rate": 0.0005776508209670003, "loss": 1.7892, "step": 11149 }, { "epoch": 0.37, "grad_norm": 0.43662935495376587, "learning_rate": 0.00057764686215427, "loss": 1.7701, "step": 11150 }, { "epoch": 0.37, "grad_norm": 0.45376667380332947, "learning_rate": 0.0005776429030045171, "loss": 1.8612, "step": 11151 }, { "epoch": 0.37, "grad_norm": 0.4571147859096527, "learning_rate": 0.0005776389435177469, "loss": 1.8753, "step": 11152 }, { "epoch": 0.37, "grad_norm": 0.45292508602142334, "learning_rate": 0.0005776349836939637, "loss": 1.8723, "step": 11153 }, { "epoch": 0.37, "grad_norm": 0.4615316390991211, "learning_rate": 0.0005776310235331727, "loss": 1.9403, "step": 11154 }, { "epoch": 0.37, "grad_norm": 0.43806612491607666, "learning_rate": 0.0005776270630353784, "loss": 1.8133, "step": 11155 }, { "epoch": 0.37, "grad_norm": 0.4406157433986664, "learning_rate": 0.0005776231022005859, "loss": 1.8257, "step": 11156 }, { "epoch": 0.37, "grad_norm": 0.45745453238487244, "learning_rate": 0.0005776191410287998, "loss": 1.8793, "step": 11157 }, { "epoch": 0.37, "grad_norm": 0.4333636164665222, "learning_rate": 0.000577615179520025, "loss": 1.8934, "step": 11158 }, { "epoch": 0.37, "grad_norm": 0.4407544434070587, "learning_rate": 0.0005776112176742662, "loss": 1.9023, "step": 11159 }, { "epoch": 0.37, "grad_norm": 0.4539138972759247, "learning_rate": 0.0005776072554915283, "loss": 1.8967, "step": 11160 }, { "epoch": 0.37, "grad_norm": 0.43104568123817444, "learning_rate": 0.0005776032929718161, "loss": 1.9506, "step": 11161 }, { "epoch": 0.37, "grad_norm": 0.46178552508354187, "learning_rate": 0.0005775993301151344, "loss": 1.9893, "step": 11162 }, { "epoch": 0.37, "grad_norm": 0.44220027327537537, "learning_rate": 0.000577595366921488, "loss": 1.9244, "step": 11163 }, { "epoch": 0.37, "grad_norm": 0.43023091554641724, "learning_rate": 0.0005775914033908818, "loss": 1.9091, "step": 11164 }, { "epoch": 0.37, "grad_norm": 0.4343217611312866, "learning_rate": 0.0005775874395233205, "loss": 1.8673, "step": 11165 }, { "epoch": 0.37, "grad_norm": 0.44176849722862244, "learning_rate": 0.0005775834753188088, "loss": 1.9252, "step": 11166 }, { "epoch": 0.37, "grad_norm": 0.4331042766571045, "learning_rate": 0.0005775795107773519, "loss": 1.9106, "step": 11167 }, { "epoch": 0.37, "grad_norm": 0.44026613235473633, "learning_rate": 0.0005775755458989543, "loss": 1.9261, "step": 11168 }, { "epoch": 0.37, "grad_norm": 0.447578489780426, "learning_rate": 0.0005775715806836209, "loss": 1.8915, "step": 11169 }, { "epoch": 0.37, "grad_norm": 0.42651036381721497, "learning_rate": 0.0005775676151313564, "loss": 1.8801, "step": 11170 }, { "epoch": 0.37, "grad_norm": 0.4491793215274811, "learning_rate": 0.0005775636492421658, "loss": 1.8411, "step": 11171 }, { "epoch": 0.37, "grad_norm": 0.4247024655342102, "learning_rate": 0.0005775596830160538, "loss": 1.8012, "step": 11172 }, { "epoch": 0.37, "grad_norm": 0.45869922637939453, "learning_rate": 0.0005775557164530253, "loss": 1.8325, "step": 11173 }, { "epoch": 0.37, "grad_norm": 0.46645763516426086, "learning_rate": 0.0005775517495530849, "loss": 1.7708, "step": 11174 }, { "epoch": 0.37, "grad_norm": 0.4562402367591858, "learning_rate": 0.0005775477823162377, "loss": 1.7991, "step": 11175 }, { "epoch": 0.37, "grad_norm": 0.4555603861808777, "learning_rate": 0.0005775438147424883, "loss": 1.9028, "step": 11176 }, { "epoch": 0.37, "grad_norm": 0.5637101531028748, "learning_rate": 0.0005775398468318417, "loss": 1.9526, "step": 11177 }, { "epoch": 0.37, "grad_norm": 0.4380151033401489, "learning_rate": 0.0005775358785843026, "loss": 1.9202, "step": 11178 }, { "epoch": 0.37, "grad_norm": 0.4604642987251282, "learning_rate": 0.0005775319099998759, "loss": 1.8086, "step": 11179 }, { "epoch": 0.37, "grad_norm": 0.46352797746658325, "learning_rate": 0.0005775279410785662, "loss": 1.9515, "step": 11180 }, { "epoch": 0.37, "grad_norm": 0.4480400085449219, "learning_rate": 0.0005775239718203784, "loss": 1.8972, "step": 11181 }, { "epoch": 0.37, "grad_norm": 0.4515686631202698, "learning_rate": 0.0005775200022253177, "loss": 1.9029, "step": 11182 }, { "epoch": 0.37, "grad_norm": 0.49594053626060486, "learning_rate": 0.0005775160322933884, "loss": 1.927, "step": 11183 }, { "epoch": 0.37, "grad_norm": 0.46207332611083984, "learning_rate": 0.0005775120620245956, "loss": 1.9563, "step": 11184 }, { "epoch": 0.37, "grad_norm": 0.4304943382740021, "learning_rate": 0.0005775080914189439, "loss": 1.8079, "step": 11185 }, { "epoch": 0.37, "grad_norm": 0.4351152777671814, "learning_rate": 0.0005775041204764385, "loss": 1.8645, "step": 11186 }, { "epoch": 0.37, "grad_norm": 0.4781375825405121, "learning_rate": 0.0005775001491970838, "loss": 1.9667, "step": 11187 }, { "epoch": 0.37, "grad_norm": 0.4500202536582947, "learning_rate": 0.0005774961775808849, "loss": 1.8605, "step": 11188 }, { "epoch": 0.37, "grad_norm": 0.425498366355896, "learning_rate": 0.0005774922056278464, "loss": 1.9109, "step": 11189 }, { "epoch": 0.37, "grad_norm": 0.4644903242588043, "learning_rate": 0.0005774882333379734, "loss": 1.9481, "step": 11190 }, { "epoch": 0.37, "grad_norm": 0.5066015124320984, "learning_rate": 0.0005774842607112706, "loss": 1.9364, "step": 11191 }, { "epoch": 0.37, "grad_norm": 0.48698750138282776, "learning_rate": 0.0005774802877477426, "loss": 1.9224, "step": 11192 }, { "epoch": 0.37, "grad_norm": 0.4568467438220978, "learning_rate": 0.0005774763144473946, "loss": 1.9206, "step": 11193 }, { "epoch": 0.37, "grad_norm": 0.4671323001384735, "learning_rate": 0.0005774723408102312, "loss": 1.9027, "step": 11194 }, { "epoch": 0.37, "grad_norm": 0.48893770575523376, "learning_rate": 0.0005774683668362572, "loss": 1.8786, "step": 11195 }, { "epoch": 0.37, "grad_norm": 0.4596973657608032, "learning_rate": 0.0005774643925254775, "loss": 1.9044, "step": 11196 }, { "epoch": 0.37, "grad_norm": 0.4705595076084137, "learning_rate": 0.0005774604178778969, "loss": 1.9106, "step": 11197 }, { "epoch": 0.37, "grad_norm": 0.5033197999000549, "learning_rate": 0.0005774564428935203, "loss": 1.9211, "step": 11198 }, { "epoch": 0.37, "grad_norm": 0.45720311999320984, "learning_rate": 0.0005774524675723524, "loss": 1.857, "step": 11199 }, { "epoch": 0.37, "grad_norm": 0.47572800517082214, "learning_rate": 0.0005774484919143981, "loss": 1.9298, "step": 11200 }, { "epoch": 0.37, "grad_norm": 0.4962940812110901, "learning_rate": 0.0005774445159196622, "loss": 1.9182, "step": 11201 }, { "epoch": 0.37, "grad_norm": 0.4728979468345642, "learning_rate": 0.0005774405395881496, "loss": 1.8631, "step": 11202 }, { "epoch": 0.37, "grad_norm": 0.4630140960216522, "learning_rate": 0.000577436562919865, "loss": 1.9081, "step": 11203 }, { "epoch": 0.37, "grad_norm": 0.48475804924964905, "learning_rate": 0.0005774325859148133, "loss": 1.8893, "step": 11204 }, { "epoch": 0.37, "grad_norm": 0.4586033225059509, "learning_rate": 0.0005774286085729992, "loss": 1.8572, "step": 11205 }, { "epoch": 0.37, "grad_norm": 0.4692288339138031, "learning_rate": 0.0005774246308944278, "loss": 1.9631, "step": 11206 }, { "epoch": 0.37, "grad_norm": 0.4443180561065674, "learning_rate": 0.0005774206528791038, "loss": 1.873, "step": 11207 }, { "epoch": 0.37, "grad_norm": 0.457893043756485, "learning_rate": 0.000577416674527032, "loss": 1.9315, "step": 11208 }, { "epoch": 0.37, "grad_norm": 0.48329365253448486, "learning_rate": 0.0005774126958382171, "loss": 1.9152, "step": 11209 }, { "epoch": 0.37, "grad_norm": 0.470525860786438, "learning_rate": 0.0005774087168126642, "loss": 1.8595, "step": 11210 }, { "epoch": 0.37, "grad_norm": 0.4594906270503998, "learning_rate": 0.0005774047374503779, "loss": 1.8475, "step": 11211 }, { "epoch": 0.37, "grad_norm": 0.45400315523147583, "learning_rate": 0.0005774007577513631, "loss": 1.9263, "step": 11212 }, { "epoch": 0.37, "grad_norm": 0.4849601686000824, "learning_rate": 0.0005773967777156247, "loss": 1.8699, "step": 11213 }, { "epoch": 0.37, "grad_norm": 0.45974019169807434, "learning_rate": 0.0005773927973431677, "loss": 1.8855, "step": 11214 }, { "epoch": 0.37, "grad_norm": 0.491477370262146, "learning_rate": 0.0005773888166339964, "loss": 1.9286, "step": 11215 }, { "epoch": 0.37, "grad_norm": 0.4610523283481598, "learning_rate": 0.0005773848355881161, "loss": 1.8643, "step": 11216 }, { "epoch": 0.37, "grad_norm": 0.45145678520202637, "learning_rate": 0.0005773808542055315, "loss": 1.8912, "step": 11217 }, { "epoch": 0.37, "grad_norm": 0.4457523226737976, "learning_rate": 0.0005773768724862473, "loss": 1.9478, "step": 11218 }, { "epoch": 0.37, "grad_norm": 0.44129428267478943, "learning_rate": 0.0005773728904302684, "loss": 1.9368, "step": 11219 }, { "epoch": 0.37, "grad_norm": 0.448223739862442, "learning_rate": 0.0005773689080375998, "loss": 1.8375, "step": 11220 }, { "epoch": 0.37, "grad_norm": 0.44060590863227844, "learning_rate": 0.0005773649253082463, "loss": 1.8743, "step": 11221 }, { "epoch": 0.37, "grad_norm": 0.4440174698829651, "learning_rate": 0.0005773609422422124, "loss": 1.9057, "step": 11222 }, { "epoch": 0.37, "grad_norm": 0.43564286828041077, "learning_rate": 0.0005773569588395034, "loss": 1.8608, "step": 11223 }, { "epoch": 0.37, "grad_norm": 0.45583996176719666, "learning_rate": 0.0005773529751001239, "loss": 1.8428, "step": 11224 }, { "epoch": 0.37, "grad_norm": 0.49645620584487915, "learning_rate": 0.0005773489910240787, "loss": 1.8739, "step": 11225 }, { "epoch": 0.37, "grad_norm": 0.44530829787254333, "learning_rate": 0.0005773450066113726, "loss": 1.9345, "step": 11226 }, { "epoch": 0.37, "grad_norm": 0.43410801887512207, "learning_rate": 0.0005773410218620106, "loss": 1.9188, "step": 11227 }, { "epoch": 0.37, "grad_norm": 0.4599825441837311, "learning_rate": 0.0005773370367759974, "loss": 1.8805, "step": 11228 }, { "epoch": 0.37, "grad_norm": 0.48276445269584656, "learning_rate": 0.000577333051353338, "loss": 1.8891, "step": 11229 }, { "epoch": 0.37, "grad_norm": 0.4283965528011322, "learning_rate": 0.0005773290655940372, "loss": 1.8763, "step": 11230 }, { "epoch": 0.37, "grad_norm": 0.4561840295791626, "learning_rate": 0.0005773250794980997, "loss": 1.9258, "step": 11231 }, { "epoch": 0.37, "grad_norm": 0.4404222369194031, "learning_rate": 0.0005773210930655304, "loss": 1.8542, "step": 11232 }, { "epoch": 0.37, "grad_norm": 0.4579925239086151, "learning_rate": 0.0005773171062963342, "loss": 1.9543, "step": 11233 }, { "epoch": 0.37, "grad_norm": 0.4339450001716614, "learning_rate": 0.0005773131191905158, "loss": 1.8027, "step": 11234 }, { "epoch": 0.37, "grad_norm": 0.44271019101142883, "learning_rate": 0.0005773091317480801, "loss": 1.8059, "step": 11235 }, { "epoch": 0.37, "grad_norm": 0.434490829706192, "learning_rate": 0.0005773051439690321, "loss": 1.8811, "step": 11236 }, { "epoch": 0.37, "grad_norm": 0.4430686831474304, "learning_rate": 0.0005773011558533765, "loss": 2.0003, "step": 11237 }, { "epoch": 0.37, "grad_norm": 0.4295811653137207, "learning_rate": 0.0005772971674011182, "loss": 1.9113, "step": 11238 }, { "epoch": 0.37, "grad_norm": 0.47994306683540344, "learning_rate": 0.0005772931786122619, "loss": 1.8767, "step": 11239 }, { "epoch": 0.37, "grad_norm": 0.4463880658149719, "learning_rate": 0.0005772891894868126, "loss": 1.8497, "step": 11240 }, { "epoch": 0.37, "grad_norm": 0.44124042987823486, "learning_rate": 0.000577285200024775, "loss": 1.9113, "step": 11241 }, { "epoch": 0.37, "grad_norm": 0.4713912308216095, "learning_rate": 0.0005772812102261541, "loss": 1.9796, "step": 11242 }, { "epoch": 0.37, "grad_norm": 0.4392540156841278, "learning_rate": 0.0005772772200909546, "loss": 1.9364, "step": 11243 }, { "epoch": 0.37, "grad_norm": 0.44387170672416687, "learning_rate": 0.0005772732296191815, "loss": 1.8652, "step": 11244 }, { "epoch": 0.37, "grad_norm": 0.4441976547241211, "learning_rate": 0.0005772692388108395, "loss": 1.8504, "step": 11245 }, { "epoch": 0.37, "grad_norm": 0.4611174762248993, "learning_rate": 0.0005772652476659334, "loss": 1.8859, "step": 11246 }, { "epoch": 0.37, "grad_norm": 0.4474222660064697, "learning_rate": 0.0005772612561844684, "loss": 1.812, "step": 11247 }, { "epoch": 0.37, "grad_norm": 0.4587530791759491, "learning_rate": 0.0005772572643664488, "loss": 1.9088, "step": 11248 }, { "epoch": 0.37, "grad_norm": 0.4457646906375885, "learning_rate": 0.0005772532722118799, "loss": 1.8367, "step": 11249 }, { "epoch": 0.37, "grad_norm": 0.43910127878189087, "learning_rate": 0.0005772492797207664, "loss": 1.8432, "step": 11250 }, { "epoch": 0.37, "grad_norm": 0.4404735863208771, "learning_rate": 0.000577245286893113, "loss": 1.8509, "step": 11251 }, { "epoch": 0.37, "grad_norm": 0.4423309564590454, "learning_rate": 0.0005772412937289247, "loss": 1.8992, "step": 11252 }, { "epoch": 0.37, "grad_norm": 0.46782752871513367, "learning_rate": 0.0005772373002282064, "loss": 1.9389, "step": 11253 }, { "epoch": 0.37, "grad_norm": 0.436343789100647, "learning_rate": 0.0005772333063909629, "loss": 1.9496, "step": 11254 }, { "epoch": 0.37, "grad_norm": 0.4686361849308014, "learning_rate": 0.0005772293122171989, "loss": 1.9428, "step": 11255 }, { "epoch": 0.37, "grad_norm": 0.45107123255729675, "learning_rate": 0.0005772253177069194, "loss": 1.866, "step": 11256 }, { "epoch": 0.37, "grad_norm": 0.4400733411312103, "learning_rate": 0.0005772213228601292, "loss": 1.9525, "step": 11257 }, { "epoch": 0.37, "grad_norm": 0.45641687512397766, "learning_rate": 0.0005772173276768331, "loss": 1.9664, "step": 11258 }, { "epoch": 0.37, "grad_norm": 0.4454222023487091, "learning_rate": 0.0005772133321570362, "loss": 1.8815, "step": 11259 }, { "epoch": 0.37, "grad_norm": 0.45197075605392456, "learning_rate": 0.000577209336300743, "loss": 1.8304, "step": 11260 }, { "epoch": 0.37, "grad_norm": 0.5078673362731934, "learning_rate": 0.0005772053401079585, "loss": 1.8921, "step": 11261 }, { "epoch": 0.37, "grad_norm": 0.4805164933204651, "learning_rate": 0.0005772013435786877, "loss": 1.9514, "step": 11262 }, { "epoch": 0.37, "grad_norm": 0.458188533782959, "learning_rate": 0.0005771973467129352, "loss": 1.8886, "step": 11263 }, { "epoch": 0.37, "grad_norm": 0.4544827342033386, "learning_rate": 0.000577193349510706, "loss": 1.86, "step": 11264 }, { "epoch": 0.37, "grad_norm": 0.4455869197845459, "learning_rate": 0.0005771893519720049, "loss": 1.7772, "step": 11265 }, { "epoch": 0.37, "grad_norm": 0.458018034696579, "learning_rate": 0.0005771853540968367, "loss": 1.897, "step": 11266 }, { "epoch": 0.37, "grad_norm": 0.49236345291137695, "learning_rate": 0.0005771813558852065, "loss": 1.9651, "step": 11267 }, { "epoch": 0.37, "grad_norm": 0.45669952034950256, "learning_rate": 0.0005771773573371189, "loss": 1.9484, "step": 11268 }, { "epoch": 0.37, "grad_norm": 0.4227290153503418, "learning_rate": 0.0005771733584525788, "loss": 1.8314, "step": 11269 }, { "epoch": 0.37, "grad_norm": 0.46052083373069763, "learning_rate": 0.000577169359231591, "loss": 1.9038, "step": 11270 }, { "epoch": 0.37, "grad_norm": 0.4357251226902008, "learning_rate": 0.0005771653596741606, "loss": 1.8294, "step": 11271 }, { "epoch": 0.38, "grad_norm": 0.4380595088005066, "learning_rate": 0.0005771613597802921, "loss": 1.8625, "step": 11272 }, { "epoch": 0.38, "grad_norm": 0.43558481335639954, "learning_rate": 0.0005771573595499907, "loss": 1.9595, "step": 11273 }, { "epoch": 0.38, "grad_norm": 0.43945059180259705, "learning_rate": 0.0005771533589832611, "loss": 1.9443, "step": 11274 }, { "epoch": 0.38, "grad_norm": 0.4266366958618164, "learning_rate": 0.0005771493580801081, "loss": 1.8864, "step": 11275 }, { "epoch": 0.38, "grad_norm": 0.4373677372932434, "learning_rate": 0.0005771453568405367, "loss": 1.8536, "step": 11276 }, { "epoch": 0.38, "grad_norm": 0.4334123730659485, "learning_rate": 0.0005771413552645516, "loss": 1.8191, "step": 11277 }, { "epoch": 0.38, "grad_norm": 0.42133209109306335, "learning_rate": 0.0005771373533521577, "loss": 1.8346, "step": 11278 }, { "epoch": 0.38, "grad_norm": 0.45072832703590393, "learning_rate": 0.0005771333511033599, "loss": 1.9014, "step": 11279 }, { "epoch": 0.38, "grad_norm": 0.4324306547641754, "learning_rate": 0.0005771293485181631, "loss": 1.813, "step": 11280 }, { "epoch": 0.38, "grad_norm": 0.44345736503601074, "learning_rate": 0.0005771253455965721, "loss": 1.9228, "step": 11281 }, { "epoch": 0.38, "grad_norm": 0.4805539548397064, "learning_rate": 0.0005771213423385917, "loss": 1.9195, "step": 11282 }, { "epoch": 0.38, "grad_norm": 0.47248417139053345, "learning_rate": 0.0005771173387442268, "loss": 1.9227, "step": 11283 }, { "epoch": 0.38, "grad_norm": 0.463984876871109, "learning_rate": 0.0005771133348134825, "loss": 1.8298, "step": 11284 }, { "epoch": 0.38, "grad_norm": 0.43486645817756653, "learning_rate": 0.0005771093305463632, "loss": 1.8869, "step": 11285 }, { "epoch": 0.38, "grad_norm": 0.4482112526893616, "learning_rate": 0.0005771053259428741, "loss": 1.8911, "step": 11286 }, { "epoch": 0.38, "grad_norm": 0.45238053798675537, "learning_rate": 0.00057710132100302, "loss": 1.8674, "step": 11287 }, { "epoch": 0.38, "grad_norm": 0.4831344485282898, "learning_rate": 0.0005770973157268056, "loss": 1.8776, "step": 11288 }, { "epoch": 0.38, "grad_norm": 0.44987785816192627, "learning_rate": 0.000577093310114236, "loss": 1.8932, "step": 11289 }, { "epoch": 0.38, "grad_norm": 0.4389132857322693, "learning_rate": 0.0005770893041653159, "loss": 1.8472, "step": 11290 }, { "epoch": 0.38, "grad_norm": 0.504761278629303, "learning_rate": 0.0005770852978800502, "loss": 1.88, "step": 11291 }, { "epoch": 0.38, "grad_norm": 0.45205244421958923, "learning_rate": 0.0005770812912584438, "loss": 1.9177, "step": 11292 }, { "epoch": 0.38, "grad_norm": 0.422624409198761, "learning_rate": 0.0005770772843005016, "loss": 1.8986, "step": 11293 }, { "epoch": 0.38, "grad_norm": 0.4487095773220062, "learning_rate": 0.0005770732770062283, "loss": 1.9266, "step": 11294 }, { "epoch": 0.38, "grad_norm": 0.45015034079551697, "learning_rate": 0.0005770692693756289, "loss": 1.8719, "step": 11295 }, { "epoch": 0.38, "grad_norm": 0.4459293782711029, "learning_rate": 0.0005770652614087082, "loss": 1.8881, "step": 11296 }, { "epoch": 0.38, "grad_norm": 0.452910453081131, "learning_rate": 0.0005770612531054712, "loss": 1.9895, "step": 11297 }, { "epoch": 0.38, "grad_norm": 0.4533137083053589, "learning_rate": 0.0005770572444659226, "loss": 1.8825, "step": 11298 }, { "epoch": 0.38, "grad_norm": 0.5593639612197876, "learning_rate": 0.0005770532354900673, "loss": 1.9044, "step": 11299 }, { "epoch": 0.38, "grad_norm": 0.44333580136299133, "learning_rate": 0.0005770492261779102, "loss": 1.8139, "step": 11300 }, { "epoch": 0.38, "grad_norm": 0.44717493653297424, "learning_rate": 0.0005770452165294562, "loss": 1.892, "step": 11301 }, { "epoch": 0.38, "grad_norm": 0.45393022894859314, "learning_rate": 0.0005770412065447102, "loss": 1.9069, "step": 11302 }, { "epoch": 0.38, "grad_norm": 0.4626711308956146, "learning_rate": 0.0005770371962236768, "loss": 2.0106, "step": 11303 }, { "epoch": 0.38, "grad_norm": 0.4385807514190674, "learning_rate": 0.0005770331855663612, "loss": 1.9329, "step": 11304 }, { "epoch": 0.38, "grad_norm": 0.46655839681625366, "learning_rate": 0.0005770291745727681, "loss": 1.8172, "step": 11305 }, { "epoch": 0.38, "grad_norm": 0.4602147340774536, "learning_rate": 0.0005770251632429024, "loss": 1.8698, "step": 11306 }, { "epoch": 0.38, "grad_norm": 0.429402619600296, "learning_rate": 0.000577021151576769, "loss": 1.8692, "step": 11307 }, { "epoch": 0.38, "grad_norm": 0.45849496126174927, "learning_rate": 0.0005770171395743727, "loss": 1.8412, "step": 11308 }, { "epoch": 0.38, "grad_norm": 0.47990989685058594, "learning_rate": 0.0005770131272357184, "loss": 1.938, "step": 11309 }, { "epoch": 0.38, "grad_norm": 0.44881099462509155, "learning_rate": 0.000577009114560811, "loss": 1.8769, "step": 11310 }, { "epoch": 0.38, "grad_norm": 0.4638720750808716, "learning_rate": 0.0005770051015496553, "loss": 1.9329, "step": 11311 }, { "epoch": 0.38, "grad_norm": 0.45371976494789124, "learning_rate": 0.0005770010882022563, "loss": 1.894, "step": 11312 }, { "epoch": 0.38, "grad_norm": 0.4457908272743225, "learning_rate": 0.0005769970745186188, "loss": 1.9229, "step": 11313 }, { "epoch": 0.38, "grad_norm": 0.4424837827682495, "learning_rate": 0.0005769930604987477, "loss": 1.91, "step": 11314 }, { "epoch": 0.38, "grad_norm": 0.44328227639198303, "learning_rate": 0.0005769890461426476, "loss": 1.847, "step": 11315 }, { "epoch": 0.38, "grad_norm": 0.4589977264404297, "learning_rate": 0.0005769850314503238, "loss": 1.8558, "step": 11316 }, { "epoch": 0.38, "grad_norm": 0.4964032769203186, "learning_rate": 0.000576981016421781, "loss": 1.9116, "step": 11317 }, { "epoch": 0.38, "grad_norm": 0.4550262987613678, "learning_rate": 0.000576977001057024, "loss": 2.0058, "step": 11318 }, { "epoch": 0.38, "grad_norm": 0.44470998644828796, "learning_rate": 0.0005769729853560577, "loss": 1.8942, "step": 11319 }, { "epoch": 0.38, "grad_norm": 0.4501494765281677, "learning_rate": 0.0005769689693188871, "loss": 1.939, "step": 11320 }, { "epoch": 0.38, "grad_norm": 0.45698896050453186, "learning_rate": 0.000576964952945517, "loss": 1.9126, "step": 11321 }, { "epoch": 0.38, "grad_norm": 0.4576871991157532, "learning_rate": 0.0005769609362359521, "loss": 1.9654, "step": 11322 }, { "epoch": 0.38, "grad_norm": 0.4373554289340973, "learning_rate": 0.0005769569191901976, "loss": 1.8919, "step": 11323 }, { "epoch": 0.38, "grad_norm": 0.4417315721511841, "learning_rate": 0.0005769529018082581, "loss": 1.9996, "step": 11324 }, { "epoch": 0.38, "grad_norm": 0.46378955245018005, "learning_rate": 0.0005769488840901385, "loss": 1.8142, "step": 11325 }, { "epoch": 0.38, "grad_norm": 0.4763987958431244, "learning_rate": 0.000576944866035844, "loss": 1.9232, "step": 11326 }, { "epoch": 0.38, "grad_norm": 0.42999690771102905, "learning_rate": 0.0005769408476453791, "loss": 1.8561, "step": 11327 }, { "epoch": 0.38, "grad_norm": 0.9046879410743713, "learning_rate": 0.0005769368289187488, "loss": 1.9385, "step": 11328 }, { "epoch": 0.38, "grad_norm": 0.45456719398498535, "learning_rate": 0.0005769328098559581, "loss": 1.8586, "step": 11329 }, { "epoch": 0.38, "grad_norm": 0.4262082874774933, "learning_rate": 0.0005769287904570116, "loss": 1.7964, "step": 11330 }, { "epoch": 0.38, "grad_norm": 0.43920135498046875, "learning_rate": 0.0005769247707219145, "loss": 1.8644, "step": 11331 }, { "epoch": 0.38, "grad_norm": 0.43843600153923035, "learning_rate": 0.0005769207506506715, "loss": 1.8796, "step": 11332 }, { "epoch": 0.38, "grad_norm": 0.45647284388542175, "learning_rate": 0.0005769167302432874, "loss": 1.9051, "step": 11333 }, { "epoch": 0.38, "grad_norm": 0.46718060970306396, "learning_rate": 0.0005769127094997673, "loss": 1.8873, "step": 11334 }, { "epoch": 0.38, "grad_norm": 0.444654643535614, "learning_rate": 0.0005769086884201159, "loss": 1.9075, "step": 11335 }, { "epoch": 0.38, "grad_norm": 0.46308672428131104, "learning_rate": 0.0005769046670043383, "loss": 1.8665, "step": 11336 }, { "epoch": 0.38, "grad_norm": 0.4554377794265747, "learning_rate": 0.0005769006452524391, "loss": 1.9494, "step": 11337 }, { "epoch": 0.38, "grad_norm": 0.4466182291507721, "learning_rate": 0.0005768966231644233, "loss": 1.8318, "step": 11338 }, { "epoch": 0.38, "grad_norm": 0.4277227520942688, "learning_rate": 0.0005768926007402959, "loss": 1.8096, "step": 11339 }, { "epoch": 0.38, "grad_norm": 0.46465641260147095, "learning_rate": 0.0005768885779800617, "loss": 1.9532, "step": 11340 }, { "epoch": 0.38, "grad_norm": 0.45165011286735535, "learning_rate": 0.0005768845548837255, "loss": 1.9037, "step": 11341 }, { "epoch": 0.38, "grad_norm": 0.45493850111961365, "learning_rate": 0.0005768805314512923, "loss": 1.911, "step": 11342 }, { "epoch": 0.38, "grad_norm": 0.46040499210357666, "learning_rate": 0.0005768765076827668, "loss": 1.8591, "step": 11343 }, { "epoch": 0.38, "grad_norm": 0.43637609481811523, "learning_rate": 0.0005768724835781541, "loss": 1.8571, "step": 11344 }, { "epoch": 0.38, "grad_norm": 0.43603819608688354, "learning_rate": 0.000576868459137459, "loss": 1.7739, "step": 11345 }, { "epoch": 0.38, "grad_norm": 0.44279688596725464, "learning_rate": 0.0005768644343606864, "loss": 1.8792, "step": 11346 }, { "epoch": 0.38, "grad_norm": 0.4445318281650543, "learning_rate": 0.0005768604092478411, "loss": 1.8751, "step": 11347 }, { "epoch": 0.38, "grad_norm": 0.4489021301269531, "learning_rate": 0.0005768563837989281, "loss": 1.9607, "step": 11348 }, { "epoch": 0.38, "grad_norm": 0.4613804817199707, "learning_rate": 0.0005768523580139522, "loss": 1.8541, "step": 11349 }, { "epoch": 0.38, "grad_norm": 0.4362277090549469, "learning_rate": 0.0005768483318929185, "loss": 1.8567, "step": 11350 }, { "epoch": 0.38, "grad_norm": 0.4414331912994385, "learning_rate": 0.0005768443054358315, "loss": 1.8703, "step": 11351 }, { "epoch": 0.38, "grad_norm": 0.4430979788303375, "learning_rate": 0.0005768402786426965, "loss": 1.8442, "step": 11352 }, { "epoch": 0.38, "grad_norm": 0.45378538966178894, "learning_rate": 0.000576836251513518, "loss": 1.8556, "step": 11353 }, { "epoch": 0.38, "grad_norm": 0.4482181966304779, "learning_rate": 0.0005768322240483012, "loss": 1.8768, "step": 11354 }, { "epoch": 0.38, "grad_norm": 0.43855658173561096, "learning_rate": 0.0005768281962470509, "loss": 1.8866, "step": 11355 }, { "epoch": 0.38, "grad_norm": 0.4595028758049011, "learning_rate": 0.0005768241681097718, "loss": 1.8646, "step": 11356 }, { "epoch": 0.38, "grad_norm": 0.4756932556629181, "learning_rate": 0.0005768201396364691, "loss": 1.8815, "step": 11357 }, { "epoch": 0.38, "grad_norm": 0.44768989086151123, "learning_rate": 0.0005768161108271476, "loss": 1.9102, "step": 11358 }, { "epoch": 0.38, "grad_norm": 0.47625458240509033, "learning_rate": 0.0005768120816818119, "loss": 1.7995, "step": 11359 }, { "epoch": 0.38, "grad_norm": 0.4691416323184967, "learning_rate": 0.0005768080522004672, "loss": 1.9002, "step": 11360 }, { "epoch": 0.38, "grad_norm": 0.4554780423641205, "learning_rate": 0.0005768040223831185, "loss": 1.8862, "step": 11361 }, { "epoch": 0.38, "grad_norm": 0.4540154039859772, "learning_rate": 0.0005767999922297702, "loss": 1.8919, "step": 11362 }, { "epoch": 0.38, "grad_norm": 0.45584636926651, "learning_rate": 0.0005767959617404276, "loss": 1.9346, "step": 11363 }, { "epoch": 0.38, "grad_norm": 0.48171091079711914, "learning_rate": 0.0005767919309150956, "loss": 1.9037, "step": 11364 }, { "epoch": 0.38, "grad_norm": 0.4476664960384369, "learning_rate": 0.0005767878997537789, "loss": 1.8169, "step": 11365 }, { "epoch": 0.38, "grad_norm": 0.4748559892177582, "learning_rate": 0.0005767838682564824, "loss": 1.9068, "step": 11366 }, { "epoch": 0.38, "grad_norm": 0.46429377794265747, "learning_rate": 0.0005767798364232112, "loss": 1.9803, "step": 11367 }, { "epoch": 0.38, "grad_norm": 0.4551648795604706, "learning_rate": 0.00057677580425397, "loss": 1.9377, "step": 11368 }, { "epoch": 0.38, "grad_norm": 0.453720360994339, "learning_rate": 0.0005767717717487638, "loss": 1.8836, "step": 11369 }, { "epoch": 0.38, "grad_norm": 0.4386711120605469, "learning_rate": 0.0005767677389075974, "loss": 1.8833, "step": 11370 }, { "epoch": 0.38, "grad_norm": 0.48406335711479187, "learning_rate": 0.0005767637057304758, "loss": 1.9145, "step": 11371 }, { "epoch": 0.38, "grad_norm": 0.43849125504493713, "learning_rate": 0.0005767596722174037, "loss": 1.948, "step": 11372 }, { "epoch": 0.38, "grad_norm": 0.43832290172576904, "learning_rate": 0.0005767556383683862, "loss": 1.9003, "step": 11373 }, { "epoch": 0.38, "grad_norm": 0.45171090960502625, "learning_rate": 0.0005767516041834282, "loss": 1.8568, "step": 11374 }, { "epoch": 0.38, "grad_norm": 0.4753950536251068, "learning_rate": 0.0005767475696625346, "loss": 1.9306, "step": 11375 }, { "epoch": 0.38, "grad_norm": 0.4494110941886902, "learning_rate": 0.00057674353480571, "loss": 1.8576, "step": 11376 }, { "epoch": 0.38, "grad_norm": 0.44463539123535156, "learning_rate": 0.0005767394996129597, "loss": 1.9191, "step": 11377 }, { "epoch": 0.38, "grad_norm": 0.44464510679244995, "learning_rate": 0.0005767354640842885, "loss": 1.8646, "step": 11378 }, { "epoch": 0.38, "grad_norm": 0.462783545255661, "learning_rate": 0.0005767314282197009, "loss": 1.9631, "step": 11379 }, { "epoch": 0.38, "grad_norm": 0.4420353174209595, "learning_rate": 0.0005767273920192023, "loss": 1.7846, "step": 11380 }, { "epoch": 0.38, "grad_norm": 0.42511433362960815, "learning_rate": 0.0005767233554827975, "loss": 1.8443, "step": 11381 }, { "epoch": 0.38, "grad_norm": 0.44003233313560486, "learning_rate": 0.0005767193186104913, "loss": 1.8836, "step": 11382 }, { "epoch": 0.38, "grad_norm": 0.4550352990627289, "learning_rate": 0.0005767152814022885, "loss": 1.953, "step": 11383 }, { "epoch": 0.38, "grad_norm": 0.44210994243621826, "learning_rate": 0.0005767112438581942, "loss": 1.8587, "step": 11384 }, { "epoch": 0.38, "grad_norm": 0.4449586570262909, "learning_rate": 0.0005767072059782132, "loss": 1.8848, "step": 11385 }, { "epoch": 0.38, "grad_norm": 0.4361249804496765, "learning_rate": 0.0005767031677623505, "loss": 1.9118, "step": 11386 }, { "epoch": 0.38, "grad_norm": 0.44297298789024353, "learning_rate": 0.0005766991292106108, "loss": 1.8637, "step": 11387 }, { "epoch": 0.38, "grad_norm": 0.4404667019844055, "learning_rate": 0.0005766950903229992, "loss": 1.8726, "step": 11388 }, { "epoch": 0.38, "grad_norm": 0.4348551332950592, "learning_rate": 0.0005766910510995205, "loss": 1.8678, "step": 11389 }, { "epoch": 0.38, "grad_norm": 0.4371618330478668, "learning_rate": 0.0005766870115401796, "loss": 1.846, "step": 11390 }, { "epoch": 0.38, "grad_norm": 0.45640829205513, "learning_rate": 0.0005766829716449815, "loss": 1.9232, "step": 11391 }, { "epoch": 0.38, "grad_norm": 0.4602469801902771, "learning_rate": 0.0005766789314139311, "loss": 1.9353, "step": 11392 }, { "epoch": 0.38, "grad_norm": 0.4345470666885376, "learning_rate": 0.0005766748908470332, "loss": 1.879, "step": 11393 }, { "epoch": 0.38, "grad_norm": 0.4322686493396759, "learning_rate": 0.0005766708499442927, "loss": 1.8038, "step": 11394 }, { "epoch": 0.38, "grad_norm": 0.45556116104125977, "learning_rate": 0.0005766668087057146, "loss": 1.9001, "step": 11395 }, { "epoch": 0.38, "grad_norm": 0.4423745572566986, "learning_rate": 0.0005766627671313037, "loss": 1.9731, "step": 11396 }, { "epoch": 0.38, "grad_norm": 0.44042444229125977, "learning_rate": 0.000576658725221065, "loss": 1.9011, "step": 11397 }, { "epoch": 0.38, "grad_norm": 0.46289992332458496, "learning_rate": 0.0005766546829750034, "loss": 1.9234, "step": 11398 }, { "epoch": 0.38, "grad_norm": 0.4300388693809509, "learning_rate": 0.0005766506403931237, "loss": 1.858, "step": 11399 }, { "epoch": 0.38, "grad_norm": 0.46206212043762207, "learning_rate": 0.0005766465974754309, "loss": 1.912, "step": 11400 }, { "epoch": 0.38, "grad_norm": 0.4547465443611145, "learning_rate": 0.00057664255422193, "loss": 1.8895, "step": 11401 }, { "epoch": 0.38, "grad_norm": 0.4505450427532196, "learning_rate": 0.0005766385106326257, "loss": 1.9254, "step": 11402 }, { "epoch": 0.38, "grad_norm": 0.45283254981040955, "learning_rate": 0.000576634466707523, "loss": 1.8673, "step": 11403 }, { "epoch": 0.38, "grad_norm": 0.44198086857795715, "learning_rate": 0.0005766304224466269, "loss": 1.8045, "step": 11404 }, { "epoch": 0.38, "grad_norm": 0.449983149766922, "learning_rate": 0.0005766263778499422, "loss": 1.9708, "step": 11405 }, { "epoch": 0.38, "grad_norm": 0.46075648069381714, "learning_rate": 0.0005766223329174737, "loss": 1.9909, "step": 11406 }, { "epoch": 0.38, "grad_norm": 0.4433037340641022, "learning_rate": 0.0005766182876492266, "loss": 1.9594, "step": 11407 }, { "epoch": 0.38, "grad_norm": 0.4820578694343567, "learning_rate": 0.0005766142420452056, "loss": 1.8806, "step": 11408 }, { "epoch": 0.38, "grad_norm": 0.4477119743824005, "learning_rate": 0.0005766101961054157, "loss": 1.867, "step": 11409 }, { "epoch": 0.38, "grad_norm": 0.45883721113204956, "learning_rate": 0.0005766061498298617, "loss": 1.9418, "step": 11410 }, { "epoch": 0.38, "grad_norm": 0.4669986069202423, "learning_rate": 0.0005766021032185487, "loss": 1.897, "step": 11411 }, { "epoch": 0.38, "grad_norm": 0.44190165400505066, "learning_rate": 0.0005765980562714815, "loss": 1.8927, "step": 11412 }, { "epoch": 0.38, "grad_norm": 0.46983134746551514, "learning_rate": 0.000576594008988665, "loss": 1.8967, "step": 11413 }, { "epoch": 0.38, "grad_norm": 0.7182090878486633, "learning_rate": 0.000576589961370104, "loss": 1.9367, "step": 11414 }, { "epoch": 0.38, "grad_norm": 0.4450952410697937, "learning_rate": 0.0005765859134158037, "loss": 1.8543, "step": 11415 }, { "epoch": 0.38, "grad_norm": 0.44398486614227295, "learning_rate": 0.0005765818651257687, "loss": 1.9053, "step": 11416 }, { "epoch": 0.38, "grad_norm": 0.4520682096481323, "learning_rate": 0.0005765778165000042, "loss": 1.8483, "step": 11417 }, { "epoch": 0.38, "grad_norm": 0.4490971565246582, "learning_rate": 0.000576573767538515, "loss": 1.8583, "step": 11418 }, { "epoch": 0.38, "grad_norm": 0.43985915184020996, "learning_rate": 0.0005765697182413059, "loss": 1.8584, "step": 11419 }, { "epoch": 0.38, "grad_norm": 0.4502047598361969, "learning_rate": 0.000576565668608382, "loss": 1.9564, "step": 11420 }, { "epoch": 0.38, "grad_norm": 0.43303847312927246, "learning_rate": 0.000576561618639748, "loss": 1.9323, "step": 11421 }, { "epoch": 0.38, "grad_norm": 0.45233356952667236, "learning_rate": 0.000576557568335409, "loss": 1.8737, "step": 11422 }, { "epoch": 0.38, "grad_norm": 0.4583299458026886, "learning_rate": 0.0005765535176953699, "loss": 1.8979, "step": 11423 }, { "epoch": 0.38, "grad_norm": 0.4462535083293915, "learning_rate": 0.0005765494667196356, "loss": 1.8445, "step": 11424 }, { "epoch": 0.38, "grad_norm": 0.4281574487686157, "learning_rate": 0.0005765454154082109, "loss": 1.9375, "step": 11425 }, { "epoch": 0.38, "grad_norm": 0.4409238398075104, "learning_rate": 0.000576541363761101, "loss": 1.8804, "step": 11426 }, { "epoch": 0.38, "grad_norm": 0.45859506726264954, "learning_rate": 0.0005765373117783105, "loss": 1.8283, "step": 11427 }, { "epoch": 0.38, "grad_norm": 0.4298381507396698, "learning_rate": 0.0005765332594598444, "loss": 1.9363, "step": 11428 }, { "epoch": 0.38, "grad_norm": 0.4415186643600464, "learning_rate": 0.0005765292068057076, "loss": 1.9399, "step": 11429 }, { "epoch": 0.38, "grad_norm": 0.45196282863616943, "learning_rate": 0.0005765251538159054, "loss": 1.9432, "step": 11430 }, { "epoch": 0.38, "grad_norm": 0.42953065037727356, "learning_rate": 0.0005765211004904421, "loss": 1.8006, "step": 11431 }, { "epoch": 0.38, "grad_norm": 0.5054654479026794, "learning_rate": 0.0005765170468293231, "loss": 1.9066, "step": 11432 }, { "epoch": 0.38, "grad_norm": 0.4336569905281067, "learning_rate": 0.0005765129928325531, "loss": 1.8585, "step": 11433 }, { "epoch": 0.38, "grad_norm": 0.43599095940589905, "learning_rate": 0.000576508938500137, "loss": 1.8926, "step": 11434 }, { "epoch": 0.38, "grad_norm": 0.44147810339927673, "learning_rate": 0.0005765048838320798, "loss": 1.8532, "step": 11435 }, { "epoch": 0.38, "grad_norm": 0.4363615810871124, "learning_rate": 0.0005765008288283865, "loss": 1.92, "step": 11436 }, { "epoch": 0.38, "grad_norm": 0.4366416335105896, "learning_rate": 0.0005764967734890619, "loss": 1.8375, "step": 11437 }, { "epoch": 0.38, "grad_norm": 0.44003573060035706, "learning_rate": 0.000576492717814111, "loss": 1.9309, "step": 11438 }, { "epoch": 0.38, "grad_norm": 0.4433805048465729, "learning_rate": 0.0005764886618035385, "loss": 1.9335, "step": 11439 }, { "epoch": 0.38, "grad_norm": 0.4459194540977478, "learning_rate": 0.0005764846054573498, "loss": 1.9988, "step": 11440 }, { "epoch": 0.38, "grad_norm": 0.459316611289978, "learning_rate": 0.0005764805487755493, "loss": 1.892, "step": 11441 }, { "epoch": 0.38, "grad_norm": 0.44902119040489197, "learning_rate": 0.0005764764917581422, "loss": 1.9744, "step": 11442 }, { "epoch": 0.38, "grad_norm": 0.44842466711997986, "learning_rate": 0.0005764724344051335, "loss": 1.9184, "step": 11443 }, { "epoch": 0.38, "grad_norm": 0.4276978373527527, "learning_rate": 0.0005764683767165278, "loss": 1.8712, "step": 11444 }, { "epoch": 0.38, "grad_norm": 0.44606900215148926, "learning_rate": 0.0005764643186923303, "loss": 1.8572, "step": 11445 }, { "epoch": 0.38, "grad_norm": 0.44654250144958496, "learning_rate": 0.0005764602603325459, "loss": 1.8337, "step": 11446 }, { "epoch": 0.38, "grad_norm": 0.4474298357963562, "learning_rate": 0.0005764562016371794, "loss": 1.9074, "step": 11447 }, { "epoch": 0.38, "grad_norm": 0.4393729865550995, "learning_rate": 0.0005764521426062358, "loss": 1.901, "step": 11448 }, { "epoch": 0.38, "grad_norm": 0.4642009437084198, "learning_rate": 0.0005764480832397202, "loss": 1.9399, "step": 11449 }, { "epoch": 0.38, "grad_norm": 0.43858522176742554, "learning_rate": 0.0005764440235376372, "loss": 1.9342, "step": 11450 }, { "epoch": 0.38, "grad_norm": 0.4533555805683136, "learning_rate": 0.0005764399634999919, "loss": 1.9528, "step": 11451 }, { "epoch": 0.38, "grad_norm": 0.45607542991638184, "learning_rate": 0.0005764359031267893, "loss": 1.8824, "step": 11452 }, { "epoch": 0.38, "grad_norm": 0.4360472559928894, "learning_rate": 0.0005764318424180342, "loss": 1.8759, "step": 11453 }, { "epoch": 0.38, "grad_norm": 0.44219323992729187, "learning_rate": 0.0005764277813737315, "loss": 1.9163, "step": 11454 }, { "epoch": 0.38, "grad_norm": 0.4361637234687805, "learning_rate": 0.0005764237199938863, "loss": 1.934, "step": 11455 }, { "epoch": 0.38, "grad_norm": 0.49498996138572693, "learning_rate": 0.0005764196582785034, "loss": 1.7902, "step": 11456 }, { "epoch": 0.38, "grad_norm": 0.4301678538322449, "learning_rate": 0.0005764155962275879, "loss": 1.8362, "step": 11457 }, { "epoch": 0.38, "grad_norm": 0.4539566934108734, "learning_rate": 0.0005764115338411443, "loss": 1.9225, "step": 11458 }, { "epoch": 0.38, "grad_norm": 0.4624456763267517, "learning_rate": 0.000576407471119178, "loss": 1.9037, "step": 11459 }, { "epoch": 0.38, "grad_norm": 0.4420298933982849, "learning_rate": 0.0005764034080616938, "loss": 1.8972, "step": 11460 }, { "epoch": 0.38, "grad_norm": 0.4308825135231018, "learning_rate": 0.0005763993446686965, "loss": 1.9027, "step": 11461 }, { "epoch": 0.38, "grad_norm": 0.47081610560417175, "learning_rate": 0.0005763952809401911, "loss": 1.8432, "step": 11462 }, { "epoch": 0.38, "grad_norm": 0.4906693696975708, "learning_rate": 0.0005763912168761826, "loss": 1.8302, "step": 11463 }, { "epoch": 0.38, "grad_norm": 0.4498898684978485, "learning_rate": 0.000576387152476676, "loss": 1.8293, "step": 11464 }, { "epoch": 0.38, "grad_norm": 0.4471476972103119, "learning_rate": 0.000576383087741676, "loss": 1.8905, "step": 11465 }, { "epoch": 0.38, "grad_norm": 0.42689287662506104, "learning_rate": 0.0005763790226711876, "loss": 1.8647, "step": 11466 }, { "epoch": 0.38, "grad_norm": 0.4327724575996399, "learning_rate": 0.0005763749572652159, "loss": 1.9001, "step": 11467 }, { "epoch": 0.38, "grad_norm": 0.44843778014183044, "learning_rate": 0.0005763708915237657, "loss": 1.9332, "step": 11468 }, { "epoch": 0.38, "grad_norm": 0.5356915593147278, "learning_rate": 0.0005763668254468419, "loss": 1.8748, "step": 11469 }, { "epoch": 0.38, "grad_norm": 0.443628191947937, "learning_rate": 0.0005763627590344495, "loss": 1.8094, "step": 11470 }, { "epoch": 0.38, "grad_norm": 0.4343782067298889, "learning_rate": 0.0005763586922865935, "loss": 1.9082, "step": 11471 }, { "epoch": 0.38, "grad_norm": 0.4707154333591461, "learning_rate": 0.0005763546252032787, "loss": 1.8991, "step": 11472 }, { "epoch": 0.38, "grad_norm": 0.4747583270072937, "learning_rate": 0.0005763505577845101, "loss": 1.8894, "step": 11473 }, { "epoch": 0.38, "grad_norm": 0.44785621762275696, "learning_rate": 0.0005763464900302928, "loss": 1.9985, "step": 11474 }, { "epoch": 0.38, "grad_norm": 0.4431658089160919, "learning_rate": 0.0005763424219406314, "loss": 1.9862, "step": 11475 }, { "epoch": 0.38, "grad_norm": 0.4841831922531128, "learning_rate": 0.0005763383535155311, "loss": 1.8676, "step": 11476 }, { "epoch": 0.38, "grad_norm": 0.4405969977378845, "learning_rate": 0.0005763342847549967, "loss": 1.9083, "step": 11477 }, { "epoch": 0.38, "grad_norm": 0.4442649781703949, "learning_rate": 0.0005763302156590333, "loss": 1.9649, "step": 11478 }, { "epoch": 0.38, "grad_norm": 0.4661254584789276, "learning_rate": 0.0005763261462276455, "loss": 1.9437, "step": 11479 }, { "epoch": 0.38, "grad_norm": 0.4330238103866577, "learning_rate": 0.0005763220764608387, "loss": 1.8771, "step": 11480 }, { "epoch": 0.38, "grad_norm": 0.4387906491756439, "learning_rate": 0.0005763180063586176, "loss": 1.8924, "step": 11481 }, { "epoch": 0.38, "grad_norm": 0.4342825710773468, "learning_rate": 0.000576313935920987, "loss": 1.9589, "step": 11482 }, { "epoch": 0.38, "grad_norm": 0.4380265772342682, "learning_rate": 0.0005763098651479522, "loss": 1.9283, "step": 11483 }, { "epoch": 0.38, "grad_norm": 0.432821661233902, "learning_rate": 0.0005763057940395178, "loss": 1.8894, "step": 11484 }, { "epoch": 0.38, "grad_norm": 0.4486836791038513, "learning_rate": 0.0005763017225956889, "loss": 1.8834, "step": 11485 }, { "epoch": 0.38, "grad_norm": 0.44400477409362793, "learning_rate": 0.0005762976508164704, "loss": 1.8781, "step": 11486 }, { "epoch": 0.38, "grad_norm": 0.4413818120956421, "learning_rate": 0.0005762935787018673, "loss": 1.8621, "step": 11487 }, { "epoch": 0.38, "grad_norm": 0.44318822026252747, "learning_rate": 0.0005762895062518845, "loss": 1.8845, "step": 11488 }, { "epoch": 0.38, "grad_norm": 0.436084121465683, "learning_rate": 0.0005762854334665268, "loss": 1.8527, "step": 11489 }, { "epoch": 0.38, "grad_norm": 0.4503399431705475, "learning_rate": 0.0005762813603457995, "loss": 1.9364, "step": 11490 }, { "epoch": 0.38, "grad_norm": 0.46038392186164856, "learning_rate": 0.0005762772868897073, "loss": 1.8783, "step": 11491 }, { "epoch": 0.38, "grad_norm": 0.43618419766426086, "learning_rate": 0.0005762732130982552, "loss": 1.9161, "step": 11492 }, { "epoch": 0.38, "grad_norm": 0.4460335969924927, "learning_rate": 0.0005762691389714481, "loss": 1.8983, "step": 11493 }, { "epoch": 0.38, "grad_norm": 0.4325183928012848, "learning_rate": 0.0005762650645092909, "loss": 1.8931, "step": 11494 }, { "epoch": 0.38, "grad_norm": 0.4515042304992676, "learning_rate": 0.0005762609897117886, "loss": 1.8548, "step": 11495 }, { "epoch": 0.38, "grad_norm": 0.4920239746570587, "learning_rate": 0.0005762569145789464, "loss": 1.9119, "step": 11496 }, { "epoch": 0.38, "grad_norm": 0.43964654207229614, "learning_rate": 0.0005762528391107687, "loss": 1.8997, "step": 11497 }, { "epoch": 0.38, "grad_norm": 0.45061299204826355, "learning_rate": 0.000576248763307261, "loss": 1.786, "step": 11498 }, { "epoch": 0.38, "grad_norm": 0.44460490345954895, "learning_rate": 0.0005762446871684278, "loss": 1.9066, "step": 11499 }, { "epoch": 0.38, "grad_norm": 0.44129982590675354, "learning_rate": 0.0005762406106942743, "loss": 1.9092, "step": 11500 }, { "epoch": 0.38, "grad_norm": 0.45526447892189026, "learning_rate": 0.0005762365338848055, "loss": 1.8522, "step": 11501 }, { "epoch": 0.38, "grad_norm": 0.4357515275478363, "learning_rate": 0.0005762324567400262, "loss": 1.9485, "step": 11502 }, { "epoch": 0.38, "grad_norm": 0.4427317678928375, "learning_rate": 0.0005762283792599414, "loss": 1.9011, "step": 11503 }, { "epoch": 0.38, "grad_norm": 0.44547513127326965, "learning_rate": 0.0005762243014445561, "loss": 1.9184, "step": 11504 }, { "epoch": 0.38, "grad_norm": 0.447211891412735, "learning_rate": 0.0005762202232938752, "loss": 1.8453, "step": 11505 }, { "epoch": 0.38, "grad_norm": 0.46253424882888794, "learning_rate": 0.0005762161448079036, "loss": 1.9099, "step": 11506 }, { "epoch": 0.38, "grad_norm": 0.44437023997306824, "learning_rate": 0.0005762120659866463, "loss": 1.9518, "step": 11507 }, { "epoch": 0.38, "grad_norm": 0.440624475479126, "learning_rate": 0.0005762079868301082, "loss": 1.8769, "step": 11508 }, { "epoch": 0.38, "grad_norm": 0.4418736696243286, "learning_rate": 0.0005762039073382943, "loss": 1.8594, "step": 11509 }, { "epoch": 0.38, "grad_norm": 0.46106067299842834, "learning_rate": 0.0005761998275112096, "loss": 1.8725, "step": 11510 }, { "epoch": 0.38, "grad_norm": 0.4458538889884949, "learning_rate": 0.0005761957473488591, "loss": 1.9557, "step": 11511 }, { "epoch": 0.38, "grad_norm": 0.4353950321674347, "learning_rate": 0.0005761916668512475, "loss": 1.8235, "step": 11512 }, { "epoch": 0.38, "grad_norm": 0.443571001291275, "learning_rate": 0.00057618758601838, "loss": 1.9041, "step": 11513 }, { "epoch": 0.38, "grad_norm": 0.4436916410923004, "learning_rate": 0.0005761835048502614, "loss": 1.9886, "step": 11514 }, { "epoch": 0.38, "grad_norm": 0.42075157165527344, "learning_rate": 0.0005761794233468967, "loss": 1.9269, "step": 11515 }, { "epoch": 0.38, "grad_norm": 0.4516148865222931, "learning_rate": 0.000576175341508291, "loss": 1.8618, "step": 11516 }, { "epoch": 0.38, "grad_norm": 0.4437978267669678, "learning_rate": 0.0005761712593344491, "loss": 1.8283, "step": 11517 }, { "epoch": 0.38, "grad_norm": 0.44365328550338745, "learning_rate": 0.0005761671768253759, "loss": 1.9234, "step": 11518 }, { "epoch": 0.38, "grad_norm": 0.4400249719619751, "learning_rate": 0.0005761630939810765, "loss": 1.878, "step": 11519 }, { "epoch": 0.38, "grad_norm": 0.47441697120666504, "learning_rate": 0.0005761590108015559, "loss": 1.9193, "step": 11520 }, { "epoch": 0.38, "grad_norm": 0.4361281394958496, "learning_rate": 0.0005761549272868187, "loss": 1.9705, "step": 11521 }, { "epoch": 0.38, "grad_norm": 0.45048755407333374, "learning_rate": 0.0005761508434368703, "loss": 1.8472, "step": 11522 }, { "epoch": 0.38, "grad_norm": 0.4520292580127716, "learning_rate": 0.0005761467592517154, "loss": 1.85, "step": 11523 }, { "epoch": 0.38, "grad_norm": 0.44539183378219604, "learning_rate": 0.000576142674731359, "loss": 1.8163, "step": 11524 }, { "epoch": 0.38, "grad_norm": 0.45004764199256897, "learning_rate": 0.0005761385898758061, "loss": 1.9323, "step": 11525 }, { "epoch": 0.38, "grad_norm": 0.4611121416091919, "learning_rate": 0.0005761345046850616, "loss": 1.9045, "step": 11526 }, { "epoch": 0.38, "grad_norm": 0.4353184700012207, "learning_rate": 0.0005761304191591306, "loss": 1.9245, "step": 11527 }, { "epoch": 0.38, "grad_norm": 0.45205846428871155, "learning_rate": 0.0005761263332980179, "loss": 1.8272, "step": 11528 }, { "epoch": 0.38, "grad_norm": 0.4516717791557312, "learning_rate": 0.0005761222471017285, "loss": 1.9302, "step": 11529 }, { "epoch": 0.38, "grad_norm": 0.44985461235046387, "learning_rate": 0.0005761181605702674, "loss": 1.9448, "step": 11530 }, { "epoch": 0.38, "grad_norm": 0.4416482448577881, "learning_rate": 0.0005761140737036395, "loss": 1.8383, "step": 11531 }, { "epoch": 0.38, "grad_norm": 0.4790647625923157, "learning_rate": 0.0005761099865018498, "loss": 1.8349, "step": 11532 }, { "epoch": 0.38, "grad_norm": 0.4536809027194977, "learning_rate": 0.0005761058989649033, "loss": 1.8577, "step": 11533 }, { "epoch": 0.38, "grad_norm": 0.4568406939506531, "learning_rate": 0.0005761018110928049, "loss": 1.8376, "step": 11534 }, { "epoch": 0.38, "grad_norm": 0.4423605501651764, "learning_rate": 0.0005760977228855596, "loss": 1.7887, "step": 11535 }, { "epoch": 0.38, "grad_norm": 0.4409538805484772, "learning_rate": 0.0005760936343431724, "loss": 1.8883, "step": 11536 }, { "epoch": 0.38, "grad_norm": 0.4426231384277344, "learning_rate": 0.0005760895454656481, "loss": 1.8755, "step": 11537 }, { "epoch": 0.38, "grad_norm": 0.4592907726764679, "learning_rate": 0.0005760854562529918, "loss": 1.9844, "step": 11538 }, { "epoch": 0.38, "grad_norm": 0.4524330496788025, "learning_rate": 0.0005760813667052085, "loss": 1.8404, "step": 11539 }, { "epoch": 0.38, "grad_norm": 0.45110073685646057, "learning_rate": 0.000576077276822303, "loss": 1.9153, "step": 11540 }, { "epoch": 0.38, "grad_norm": 0.4311915934085846, "learning_rate": 0.0005760731866042804, "loss": 1.9238, "step": 11541 }, { "epoch": 0.38, "grad_norm": 0.4354173243045807, "learning_rate": 0.0005760690960511457, "loss": 1.9461, "step": 11542 }, { "epoch": 0.38, "grad_norm": 0.46478867530822754, "learning_rate": 0.0005760650051629037, "loss": 1.8608, "step": 11543 }, { "epoch": 0.38, "grad_norm": 0.44784486293792725, "learning_rate": 0.0005760609139395595, "loss": 1.891, "step": 11544 }, { "epoch": 0.38, "grad_norm": 0.4345102906227112, "learning_rate": 0.000576056822381118, "loss": 1.898, "step": 11545 }, { "epoch": 0.38, "grad_norm": 0.41398540139198303, "learning_rate": 0.0005760527304875842, "loss": 1.8562, "step": 11546 }, { "epoch": 0.38, "grad_norm": 0.439500093460083, "learning_rate": 0.0005760486382589632, "loss": 1.8736, "step": 11547 }, { "epoch": 0.38, "grad_norm": 0.4399067163467407, "learning_rate": 0.0005760445456952595, "loss": 1.7951, "step": 11548 }, { "epoch": 0.38, "grad_norm": 0.4265322685241699, "learning_rate": 0.0005760404527964787, "loss": 1.9139, "step": 11549 }, { "epoch": 0.38, "grad_norm": 0.4492839574813843, "learning_rate": 0.0005760363595626254, "loss": 1.9139, "step": 11550 }, { "epoch": 0.38, "grad_norm": 0.45310595631599426, "learning_rate": 0.0005760322659937046, "loss": 1.9141, "step": 11551 }, { "epoch": 0.38, "grad_norm": 0.4301302134990692, "learning_rate": 0.0005760281720897213, "loss": 1.887, "step": 11552 }, { "epoch": 0.38, "grad_norm": 0.44687244296073914, "learning_rate": 0.0005760240778506804, "loss": 1.8516, "step": 11553 }, { "epoch": 0.38, "grad_norm": 0.4523480534553528, "learning_rate": 0.000576019983276587, "loss": 1.7747, "step": 11554 }, { "epoch": 0.38, "grad_norm": 0.43145638704299927, "learning_rate": 0.000576015888367446, "loss": 1.8793, "step": 11555 }, { "epoch": 0.38, "grad_norm": 0.4312300384044647, "learning_rate": 0.0005760117931232623, "loss": 1.8395, "step": 11556 }, { "epoch": 0.38, "grad_norm": 0.44031181931495667, "learning_rate": 0.0005760076975440412, "loss": 1.8724, "step": 11557 }, { "epoch": 0.38, "grad_norm": 0.43354475498199463, "learning_rate": 0.0005760036016297873, "loss": 1.8601, "step": 11558 }, { "epoch": 0.38, "grad_norm": 0.445318341255188, "learning_rate": 0.0005759995053805057, "loss": 1.8362, "step": 11559 }, { "epoch": 0.38, "grad_norm": 0.4410785436630249, "learning_rate": 0.0005759954087962011, "loss": 1.9174, "step": 11560 }, { "epoch": 0.38, "grad_norm": 0.44182780385017395, "learning_rate": 0.0005759913118768791, "loss": 1.8177, "step": 11561 }, { "epoch": 0.38, "grad_norm": 0.43212172389030457, "learning_rate": 0.0005759872146225442, "loss": 1.8433, "step": 11562 }, { "epoch": 0.38, "grad_norm": 0.4357154667377472, "learning_rate": 0.0005759831170332015, "loss": 1.8959, "step": 11563 }, { "epoch": 0.38, "grad_norm": 0.43711262941360474, "learning_rate": 0.0005759790191088559, "loss": 1.864, "step": 11564 }, { "epoch": 0.38, "grad_norm": 0.45115944743156433, "learning_rate": 0.0005759749208495123, "loss": 1.8367, "step": 11565 }, { "epoch": 0.38, "grad_norm": 0.43490085005760193, "learning_rate": 0.000575970822255176, "loss": 1.8698, "step": 11566 }, { "epoch": 0.38, "grad_norm": 0.43342748284339905, "learning_rate": 0.0005759667233258518, "loss": 1.8289, "step": 11567 }, { "epoch": 0.38, "grad_norm": 0.42834073305130005, "learning_rate": 0.0005759626240615447, "loss": 1.8627, "step": 11568 }, { "epoch": 0.38, "grad_norm": 0.43800681829452515, "learning_rate": 0.0005759585244622595, "loss": 1.988, "step": 11569 }, { "epoch": 0.38, "grad_norm": 0.4404410421848297, "learning_rate": 0.0005759544245280015, "loss": 1.8289, "step": 11570 }, { "epoch": 0.38, "grad_norm": 0.4363369047641754, "learning_rate": 0.0005759503242587753, "loss": 1.8911, "step": 11571 }, { "epoch": 0.39, "grad_norm": 0.43826842308044434, "learning_rate": 0.000575946223654586, "loss": 1.8823, "step": 11572 }, { "epoch": 0.39, "grad_norm": 0.42959052324295044, "learning_rate": 0.0005759421227154388, "loss": 1.8929, "step": 11573 }, { "epoch": 0.39, "grad_norm": 0.42809897661209106, "learning_rate": 0.0005759380214413385, "loss": 1.8326, "step": 11574 }, { "epoch": 0.39, "grad_norm": 0.4318525791168213, "learning_rate": 0.0005759339198322901, "loss": 1.7816, "step": 11575 }, { "epoch": 0.39, "grad_norm": 0.4443031847476959, "learning_rate": 0.0005759298178882985, "loss": 1.9176, "step": 11576 }, { "epoch": 0.39, "grad_norm": 0.43796366453170776, "learning_rate": 0.0005759257156093689, "loss": 1.9424, "step": 11577 }, { "epoch": 0.39, "grad_norm": 0.4436497390270233, "learning_rate": 0.000575921612995506, "loss": 1.8824, "step": 11578 }, { "epoch": 0.39, "grad_norm": 0.4425654113292694, "learning_rate": 0.000575917510046715, "loss": 1.9222, "step": 11579 }, { "epoch": 0.39, "grad_norm": 0.4573909342288971, "learning_rate": 0.0005759134067630007, "loss": 1.9189, "step": 11580 }, { "epoch": 0.39, "grad_norm": 0.43196341395378113, "learning_rate": 0.0005759093031443683, "loss": 1.821, "step": 11581 }, { "epoch": 0.39, "grad_norm": 0.43402206897735596, "learning_rate": 0.0005759051991908226, "loss": 1.845, "step": 11582 }, { "epoch": 0.39, "grad_norm": 0.4346536695957184, "learning_rate": 0.0005759010949023686, "loss": 1.9083, "step": 11583 }, { "epoch": 0.39, "grad_norm": 0.42702391743659973, "learning_rate": 0.0005758969902790115, "loss": 1.8021, "step": 11584 }, { "epoch": 0.39, "grad_norm": 0.44829943776130676, "learning_rate": 0.0005758928853207559, "loss": 1.8671, "step": 11585 }, { "epoch": 0.39, "grad_norm": 0.45264625549316406, "learning_rate": 0.000575888780027607, "loss": 1.8734, "step": 11586 }, { "epoch": 0.39, "grad_norm": 0.4213370680809021, "learning_rate": 0.0005758846743995699, "loss": 1.8483, "step": 11587 }, { "epoch": 0.39, "grad_norm": 0.42950475215911865, "learning_rate": 0.0005758805684366494, "loss": 1.8582, "step": 11588 }, { "epoch": 0.39, "grad_norm": 0.4511603116989136, "learning_rate": 0.0005758764621388505, "loss": 1.8477, "step": 11589 }, { "epoch": 0.39, "grad_norm": 0.4461718499660492, "learning_rate": 0.0005758723555061783, "loss": 1.8617, "step": 11590 }, { "epoch": 0.39, "grad_norm": 0.44214490056037903, "learning_rate": 0.0005758682485386377, "loss": 1.8671, "step": 11591 }, { "epoch": 0.39, "grad_norm": 0.43612414598464966, "learning_rate": 0.0005758641412362336, "loss": 1.879, "step": 11592 }, { "epoch": 0.39, "grad_norm": 0.448922723531723, "learning_rate": 0.0005758600335989711, "loss": 1.9485, "step": 11593 }, { "epoch": 0.39, "grad_norm": 0.44051727652549744, "learning_rate": 0.0005758559256268552, "loss": 1.9214, "step": 11594 }, { "epoch": 0.39, "grad_norm": 0.43662184476852417, "learning_rate": 0.0005758518173198909, "loss": 1.8234, "step": 11595 }, { "epoch": 0.39, "grad_norm": 0.4502681493759155, "learning_rate": 0.0005758477086780831, "loss": 1.8739, "step": 11596 }, { "epoch": 0.39, "grad_norm": 0.4383411109447479, "learning_rate": 0.0005758435997014368, "loss": 1.8777, "step": 11597 }, { "epoch": 0.39, "grad_norm": 0.4551936984062195, "learning_rate": 0.000575839490389957, "loss": 1.9617, "step": 11598 }, { "epoch": 0.39, "grad_norm": 0.4439038336277008, "learning_rate": 0.0005758353807436488, "loss": 1.8466, "step": 11599 }, { "epoch": 0.39, "grad_norm": 0.43548068404197693, "learning_rate": 0.000575831270762517, "loss": 1.8884, "step": 11600 }, { "epoch": 0.39, "grad_norm": 0.4368947148323059, "learning_rate": 0.0005758271604465667, "loss": 1.9506, "step": 11601 }, { "epoch": 0.39, "grad_norm": 0.44499412178993225, "learning_rate": 0.0005758230497958028, "loss": 1.9318, "step": 11602 }, { "epoch": 0.39, "grad_norm": 0.43103644251823425, "learning_rate": 0.0005758189388102304, "loss": 1.8882, "step": 11603 }, { "epoch": 0.39, "grad_norm": 0.4341530501842499, "learning_rate": 0.0005758148274898545, "loss": 1.9135, "step": 11604 }, { "epoch": 0.39, "grad_norm": 0.4217469096183777, "learning_rate": 0.0005758107158346801, "loss": 1.8628, "step": 11605 }, { "epoch": 0.39, "grad_norm": 0.4244824945926666, "learning_rate": 0.000575806603844712, "loss": 1.8994, "step": 11606 }, { "epoch": 0.39, "grad_norm": 0.4420197606086731, "learning_rate": 0.0005758024915199554, "loss": 1.8976, "step": 11607 }, { "epoch": 0.39, "grad_norm": 0.44734108448028564, "learning_rate": 0.0005757983788604151, "loss": 1.8587, "step": 11608 }, { "epoch": 0.39, "grad_norm": 0.4501672089099884, "learning_rate": 0.0005757942658660963, "loss": 1.9132, "step": 11609 }, { "epoch": 0.39, "grad_norm": 0.4433594346046448, "learning_rate": 0.0005757901525370038, "loss": 1.8626, "step": 11610 }, { "epoch": 0.39, "grad_norm": 0.456487238407135, "learning_rate": 0.0005757860388731429, "loss": 1.9153, "step": 11611 }, { "epoch": 0.39, "grad_norm": 0.4384755492210388, "learning_rate": 0.0005757819248745182, "loss": 1.939, "step": 11612 }, { "epoch": 0.39, "grad_norm": 0.4540714621543884, "learning_rate": 0.0005757778105411349, "loss": 1.8334, "step": 11613 }, { "epoch": 0.39, "grad_norm": 0.4449613392353058, "learning_rate": 0.000575773695872998, "loss": 1.9132, "step": 11614 }, { "epoch": 0.39, "grad_norm": 0.43635836243629456, "learning_rate": 0.0005757695808701126, "loss": 1.8915, "step": 11615 }, { "epoch": 0.39, "grad_norm": 0.44752055406570435, "learning_rate": 0.0005757654655324833, "loss": 1.8521, "step": 11616 }, { "epoch": 0.39, "grad_norm": 0.42591702938079834, "learning_rate": 0.0005757613498601155, "loss": 1.8658, "step": 11617 }, { "epoch": 0.39, "grad_norm": 0.4666849672794342, "learning_rate": 0.0005757572338530141, "loss": 1.8642, "step": 11618 }, { "epoch": 0.39, "grad_norm": 0.47610825300216675, "learning_rate": 0.000575753117511184, "loss": 1.956, "step": 11619 }, { "epoch": 0.39, "grad_norm": 0.4280093014240265, "learning_rate": 0.0005757490008346303, "loss": 1.8682, "step": 11620 }, { "epoch": 0.39, "grad_norm": 0.4337610900402069, "learning_rate": 0.0005757448838233578, "loss": 1.862, "step": 11621 }, { "epoch": 0.39, "grad_norm": 0.42224496603012085, "learning_rate": 0.0005757407664773716, "loss": 1.8926, "step": 11622 }, { "epoch": 0.39, "grad_norm": 0.4318697452545166, "learning_rate": 0.000575736648796677, "loss": 1.8512, "step": 11623 }, { "epoch": 0.39, "grad_norm": 0.42954543232917786, "learning_rate": 0.0005757325307812785, "loss": 1.8224, "step": 11624 }, { "epoch": 0.39, "grad_norm": 0.4569651782512665, "learning_rate": 0.0005757284124311815, "loss": 1.8764, "step": 11625 }, { "epoch": 0.39, "grad_norm": 0.4362797737121582, "learning_rate": 0.0005757242937463907, "loss": 1.9038, "step": 11626 }, { "epoch": 0.39, "grad_norm": 0.43821483850479126, "learning_rate": 0.0005757201747269113, "loss": 1.8897, "step": 11627 }, { "epoch": 0.39, "grad_norm": 0.47013840079307556, "learning_rate": 0.0005757160553727483, "loss": 1.9051, "step": 11628 }, { "epoch": 0.39, "grad_norm": 0.4510602653026581, "learning_rate": 0.0005757119356839066, "loss": 1.8045, "step": 11629 }, { "epoch": 0.39, "grad_norm": 0.4312387704849243, "learning_rate": 0.0005757078156603911, "loss": 1.9243, "step": 11630 }, { "epoch": 0.39, "grad_norm": 0.5658693909645081, "learning_rate": 0.0005757036953022071, "loss": 1.8905, "step": 11631 }, { "epoch": 0.39, "grad_norm": 0.4539166986942291, "learning_rate": 0.0005756995746093593, "loss": 1.8891, "step": 11632 }, { "epoch": 0.39, "grad_norm": 0.43677660822868347, "learning_rate": 0.0005756954535818529, "loss": 1.9028, "step": 11633 }, { "epoch": 0.39, "grad_norm": 0.4282999336719513, "learning_rate": 0.0005756913322196929, "loss": 1.8463, "step": 11634 }, { "epoch": 0.39, "grad_norm": 0.45388704538345337, "learning_rate": 0.0005756872105228842, "loss": 1.9114, "step": 11635 }, { "epoch": 0.39, "grad_norm": 0.432691365480423, "learning_rate": 0.0005756830884914319, "loss": 1.9338, "step": 11636 }, { "epoch": 0.39, "grad_norm": 0.46829208731651306, "learning_rate": 0.0005756789661253408, "loss": 1.7826, "step": 11637 }, { "epoch": 0.39, "grad_norm": 0.47905296087265015, "learning_rate": 0.0005756748434246161, "loss": 1.9219, "step": 11638 }, { "epoch": 0.39, "grad_norm": 0.43588748574256897, "learning_rate": 0.0005756707203892628, "loss": 1.939, "step": 11639 }, { "epoch": 0.39, "grad_norm": 0.4397253394126892, "learning_rate": 0.000575666597019286, "loss": 1.972, "step": 11640 }, { "epoch": 0.39, "grad_norm": 0.4627622663974762, "learning_rate": 0.0005756624733146904, "loss": 1.9535, "step": 11641 }, { "epoch": 0.39, "grad_norm": 0.4473165273666382, "learning_rate": 0.0005756583492754812, "loss": 1.901, "step": 11642 }, { "epoch": 0.39, "grad_norm": 0.46223896741867065, "learning_rate": 0.0005756542249016634, "loss": 1.8702, "step": 11643 }, { "epoch": 0.39, "grad_norm": 0.45357584953308105, "learning_rate": 0.000575650100193242, "loss": 1.873, "step": 11644 }, { "epoch": 0.39, "grad_norm": 0.4480050206184387, "learning_rate": 0.0005756459751502219, "loss": 1.817, "step": 11645 }, { "epoch": 0.39, "grad_norm": 0.45805805921554565, "learning_rate": 0.0005756418497726083, "loss": 1.899, "step": 11646 }, { "epoch": 0.39, "grad_norm": 0.4878591299057007, "learning_rate": 0.0005756377240604061, "loss": 1.8726, "step": 11647 }, { "epoch": 0.39, "grad_norm": 0.474089652299881, "learning_rate": 0.0005756335980136203, "loss": 1.8529, "step": 11648 }, { "epoch": 0.39, "grad_norm": 0.4538491368293762, "learning_rate": 0.0005756294716322558, "loss": 1.9639, "step": 11649 }, { "epoch": 0.39, "grad_norm": 0.46054500341415405, "learning_rate": 0.0005756253449163179, "loss": 1.9388, "step": 11650 }, { "epoch": 0.39, "grad_norm": 0.4421613812446594, "learning_rate": 0.0005756212178658114, "loss": 1.8652, "step": 11651 }, { "epoch": 0.39, "grad_norm": 0.4410538971424103, "learning_rate": 0.0005756170904807413, "loss": 1.8756, "step": 11652 }, { "epoch": 0.39, "grad_norm": 0.4698851704597473, "learning_rate": 0.0005756129627611128, "loss": 1.9123, "step": 11653 }, { "epoch": 0.39, "grad_norm": 0.43411263823509216, "learning_rate": 0.0005756088347069307, "loss": 1.9243, "step": 11654 }, { "epoch": 0.39, "grad_norm": 0.6110045909881592, "learning_rate": 0.0005756047063182001, "loss": 1.9534, "step": 11655 }, { "epoch": 0.39, "grad_norm": 0.4397624433040619, "learning_rate": 0.0005756005775949259, "loss": 1.8971, "step": 11656 }, { "epoch": 0.39, "grad_norm": 0.43198689818382263, "learning_rate": 0.0005755964485371133, "loss": 1.7959, "step": 11657 }, { "epoch": 0.39, "grad_norm": 0.4461789131164551, "learning_rate": 0.0005755923191447672, "loss": 1.847, "step": 11658 }, { "epoch": 0.39, "grad_norm": 0.43877893686294556, "learning_rate": 0.0005755881894178926, "loss": 1.9612, "step": 11659 }, { "epoch": 0.39, "grad_norm": 0.43218404054641724, "learning_rate": 0.0005755840593564946, "loss": 1.8547, "step": 11660 }, { "epoch": 0.39, "grad_norm": 0.45341965556144714, "learning_rate": 0.0005755799289605781, "loss": 1.8795, "step": 11661 }, { "epoch": 0.39, "grad_norm": 0.43860766291618347, "learning_rate": 0.0005755757982301481, "loss": 1.9551, "step": 11662 }, { "epoch": 0.39, "grad_norm": 0.4464047849178314, "learning_rate": 0.0005755716671652099, "loss": 1.7971, "step": 11663 }, { "epoch": 0.39, "grad_norm": 0.46494585275650024, "learning_rate": 0.0005755675357657681, "loss": 1.888, "step": 11664 }, { "epoch": 0.39, "grad_norm": 0.4467730224132538, "learning_rate": 0.000575563404031828, "loss": 1.8968, "step": 11665 }, { "epoch": 0.39, "grad_norm": 0.4337954819202423, "learning_rate": 0.0005755592719633945, "loss": 1.9585, "step": 11666 }, { "epoch": 0.39, "grad_norm": 0.4392259418964386, "learning_rate": 0.0005755551395604727, "loss": 1.8654, "step": 11667 }, { "epoch": 0.39, "grad_norm": 0.43506303429603577, "learning_rate": 0.0005755510068230676, "loss": 1.9468, "step": 11668 }, { "epoch": 0.39, "grad_norm": 0.4271187484264374, "learning_rate": 0.0005755468737511841, "loss": 1.8216, "step": 11669 }, { "epoch": 0.39, "grad_norm": 0.42206910252571106, "learning_rate": 0.0005755427403448273, "loss": 1.8766, "step": 11670 }, { "epoch": 0.39, "grad_norm": 0.451538622379303, "learning_rate": 0.0005755386066040022, "loss": 1.8175, "step": 11671 }, { "epoch": 0.39, "grad_norm": 0.43342188000679016, "learning_rate": 0.0005755344725287138, "loss": 1.8812, "step": 11672 }, { "epoch": 0.39, "grad_norm": 0.4306390583515167, "learning_rate": 0.0005755303381189672, "loss": 1.8002, "step": 11673 }, { "epoch": 0.39, "grad_norm": 0.44617724418640137, "learning_rate": 0.0005755262033747674, "loss": 1.9204, "step": 11674 }, { "epoch": 0.39, "grad_norm": 0.45690032839775085, "learning_rate": 0.0005755220682961193, "loss": 1.8427, "step": 11675 }, { "epoch": 0.39, "grad_norm": 0.4364267885684967, "learning_rate": 0.0005755179328830281, "loss": 1.9158, "step": 11676 }, { "epoch": 0.39, "grad_norm": 0.4506102502346039, "learning_rate": 0.0005755137971354986, "loss": 1.8928, "step": 11677 }, { "epoch": 0.39, "grad_norm": 0.4334155023097992, "learning_rate": 0.0005755096610535361, "loss": 1.9439, "step": 11678 }, { "epoch": 0.39, "grad_norm": 0.44100266695022583, "learning_rate": 0.0005755055246371454, "loss": 1.8612, "step": 11679 }, { "epoch": 0.39, "grad_norm": 0.4419138729572296, "learning_rate": 0.0005755013878863315, "loss": 1.8745, "step": 11680 }, { "epoch": 0.39, "grad_norm": 0.4280913770198822, "learning_rate": 0.0005754972508010996, "loss": 1.8071, "step": 11681 }, { "epoch": 0.39, "grad_norm": 0.7523778080940247, "learning_rate": 0.0005754931133814547, "loss": 1.9369, "step": 11682 }, { "epoch": 0.39, "grad_norm": 0.4371594488620758, "learning_rate": 0.0005754889756274015, "loss": 1.8183, "step": 11683 }, { "epoch": 0.39, "grad_norm": 0.43907472491264343, "learning_rate": 0.0005754848375389456, "loss": 1.9361, "step": 11684 }, { "epoch": 0.39, "grad_norm": 0.4445502460002899, "learning_rate": 0.0005754806991160915, "loss": 1.8309, "step": 11685 }, { "epoch": 0.39, "grad_norm": 0.4387425184249878, "learning_rate": 0.0005754765603588445, "loss": 1.8537, "step": 11686 }, { "epoch": 0.39, "grad_norm": 0.42971348762512207, "learning_rate": 0.0005754724212672094, "loss": 1.887, "step": 11687 }, { "epoch": 0.39, "grad_norm": 0.4333432614803314, "learning_rate": 0.0005754682818411915, "loss": 1.8802, "step": 11688 }, { "epoch": 0.39, "grad_norm": 0.43858954310417175, "learning_rate": 0.0005754641420807957, "loss": 1.8566, "step": 11689 }, { "epoch": 0.39, "grad_norm": 0.4271206855773926, "learning_rate": 0.000575460001986027, "loss": 1.9002, "step": 11690 }, { "epoch": 0.39, "grad_norm": 0.43764641880989075, "learning_rate": 0.0005754558615568905, "loss": 1.9146, "step": 11691 }, { "epoch": 0.39, "grad_norm": 0.45080217719078064, "learning_rate": 0.0005754517207933911, "loss": 1.8517, "step": 11692 }, { "epoch": 0.39, "grad_norm": 0.42648202180862427, "learning_rate": 0.0005754475796955339, "loss": 1.7818, "step": 11693 }, { "epoch": 0.39, "grad_norm": 0.41448432207107544, "learning_rate": 0.0005754434382633239, "loss": 1.7971, "step": 11694 }, { "epoch": 0.39, "grad_norm": 0.42980191111564636, "learning_rate": 0.0005754392964967661, "loss": 1.82, "step": 11695 }, { "epoch": 0.39, "grad_norm": 0.4170776307582855, "learning_rate": 0.0005754351543958657, "loss": 1.8376, "step": 11696 }, { "epoch": 0.39, "grad_norm": 0.4439554810523987, "learning_rate": 0.0005754310119606275, "loss": 1.8698, "step": 11697 }, { "epoch": 0.39, "grad_norm": 0.6243820190429688, "learning_rate": 0.0005754268691910568, "loss": 1.9343, "step": 11698 }, { "epoch": 0.39, "grad_norm": 0.434826523065567, "learning_rate": 0.0005754227260871583, "loss": 1.8939, "step": 11699 }, { "epoch": 0.39, "grad_norm": 0.44805240631103516, "learning_rate": 0.0005754185826489372, "loss": 1.9812, "step": 11700 }, { "epoch": 0.39, "grad_norm": 0.4319622218608856, "learning_rate": 0.0005754144388763987, "loss": 1.8165, "step": 11701 }, { "epoch": 0.39, "grad_norm": 0.43271517753601074, "learning_rate": 0.0005754102947695475, "loss": 1.885, "step": 11702 }, { "epoch": 0.39, "grad_norm": 0.4501895308494568, "learning_rate": 0.0005754061503283887, "loss": 1.9342, "step": 11703 }, { "epoch": 0.39, "grad_norm": 0.4384821355342865, "learning_rate": 0.0005754020055529276, "loss": 1.8701, "step": 11704 }, { "epoch": 0.39, "grad_norm": 0.434996634721756, "learning_rate": 0.0005753978604431689, "loss": 1.8677, "step": 11705 }, { "epoch": 0.39, "grad_norm": 0.43628886342048645, "learning_rate": 0.0005753937149991179, "loss": 1.8202, "step": 11706 }, { "epoch": 0.39, "grad_norm": 0.43689900636672974, "learning_rate": 0.0005753895692207794, "loss": 1.9102, "step": 11707 }, { "epoch": 0.39, "grad_norm": 0.4241548180580139, "learning_rate": 0.0005753854231081586, "loss": 1.8706, "step": 11708 }, { "epoch": 0.39, "grad_norm": 0.45790350437164307, "learning_rate": 0.0005753812766612604, "loss": 1.8526, "step": 11709 }, { "epoch": 0.39, "grad_norm": 0.4499433934688568, "learning_rate": 0.00057537712988009, "loss": 1.9002, "step": 11710 }, { "epoch": 0.39, "grad_norm": 0.43143704533576965, "learning_rate": 0.0005753729827646523, "loss": 1.8689, "step": 11711 }, { "epoch": 0.39, "grad_norm": 0.4460744261741638, "learning_rate": 0.0005753688353149524, "loss": 1.9213, "step": 11712 }, { "epoch": 0.39, "grad_norm": 0.4265111982822418, "learning_rate": 0.0005753646875309952, "loss": 1.8563, "step": 11713 }, { "epoch": 0.39, "grad_norm": 0.454071581363678, "learning_rate": 0.000575360539412786, "loss": 1.9303, "step": 11714 }, { "epoch": 0.39, "grad_norm": 0.4646734893321991, "learning_rate": 0.0005753563909603296, "loss": 1.8789, "step": 11715 }, { "epoch": 0.39, "grad_norm": 0.46268174052238464, "learning_rate": 0.0005753522421736311, "loss": 1.9078, "step": 11716 }, { "epoch": 0.39, "grad_norm": 0.456440806388855, "learning_rate": 0.0005753480930526956, "loss": 1.8658, "step": 11717 }, { "epoch": 0.39, "grad_norm": 0.43499886989593506, "learning_rate": 0.000575343943597528, "loss": 1.9203, "step": 11718 }, { "epoch": 0.39, "grad_norm": 0.43567976355552673, "learning_rate": 0.0005753397938081335, "loss": 1.9214, "step": 11719 }, { "epoch": 0.39, "grad_norm": 0.455171674489975, "learning_rate": 0.000575335643684517, "loss": 1.8853, "step": 11720 }, { "epoch": 0.39, "grad_norm": 0.4672301411628723, "learning_rate": 0.0005753314932266837, "loss": 1.9012, "step": 11721 }, { "epoch": 0.39, "grad_norm": 0.47995656728744507, "learning_rate": 0.0005753273424346384, "loss": 1.9487, "step": 11722 }, { "epoch": 0.39, "grad_norm": 0.45335808396339417, "learning_rate": 0.0005753231913083863, "loss": 1.9213, "step": 11723 }, { "epoch": 0.39, "grad_norm": 0.44712546467781067, "learning_rate": 0.0005753190398479326, "loss": 1.9398, "step": 11724 }, { "epoch": 0.39, "grad_norm": 0.43704113364219666, "learning_rate": 0.0005753148880532819, "loss": 1.9181, "step": 11725 }, { "epoch": 0.39, "grad_norm": 0.44052180647850037, "learning_rate": 0.0005753107359244395, "loss": 1.8611, "step": 11726 }, { "epoch": 0.39, "grad_norm": 0.45478159189224243, "learning_rate": 0.0005753065834614107, "loss": 1.8888, "step": 11727 }, { "epoch": 0.39, "grad_norm": 0.4495181441307068, "learning_rate": 0.0005753024306642, "loss": 1.8491, "step": 11728 }, { "epoch": 0.39, "grad_norm": 0.4429357051849365, "learning_rate": 0.0005752982775328128, "loss": 1.8549, "step": 11729 }, { "epoch": 0.39, "grad_norm": 0.4494568705558777, "learning_rate": 0.0005752941240672541, "loss": 1.9109, "step": 11730 }, { "epoch": 0.39, "grad_norm": 0.44764626026153564, "learning_rate": 0.000575289970267529, "loss": 1.919, "step": 11731 }, { "epoch": 0.39, "grad_norm": 0.43958771228790283, "learning_rate": 0.0005752858161336421, "loss": 1.9305, "step": 11732 }, { "epoch": 0.39, "grad_norm": 0.4586695730686188, "learning_rate": 0.0005752816616655991, "loss": 1.9657, "step": 11733 }, { "epoch": 0.39, "grad_norm": 0.43174564838409424, "learning_rate": 0.0005752775068634046, "loss": 1.8598, "step": 11734 }, { "epoch": 0.39, "grad_norm": 0.4256381690502167, "learning_rate": 0.0005752733517270638, "loss": 1.8275, "step": 11735 }, { "epoch": 0.39, "grad_norm": 0.4368966519832611, "learning_rate": 0.0005752691962565816, "loss": 1.8403, "step": 11736 }, { "epoch": 0.39, "grad_norm": 0.45411574840545654, "learning_rate": 0.0005752650404519633, "loss": 1.8855, "step": 11737 }, { "epoch": 0.39, "grad_norm": 0.43674352765083313, "learning_rate": 0.0005752608843132138, "loss": 1.8551, "step": 11738 }, { "epoch": 0.39, "grad_norm": 0.45013999938964844, "learning_rate": 0.000575256727840338, "loss": 1.8576, "step": 11739 }, { "epoch": 0.39, "grad_norm": 0.427521288394928, "learning_rate": 0.0005752525710333412, "loss": 1.835, "step": 11740 }, { "epoch": 0.39, "grad_norm": 0.4317208528518677, "learning_rate": 0.0005752484138922283, "loss": 1.9104, "step": 11741 }, { "epoch": 0.39, "grad_norm": 0.451491117477417, "learning_rate": 0.0005752442564170043, "loss": 1.8975, "step": 11742 }, { "epoch": 0.39, "grad_norm": 0.4840371012687683, "learning_rate": 0.0005752400986076744, "loss": 1.8689, "step": 11743 }, { "epoch": 0.39, "grad_norm": 0.4343492388725281, "learning_rate": 0.0005752359404642437, "loss": 1.893, "step": 11744 }, { "epoch": 0.39, "grad_norm": 0.4343034029006958, "learning_rate": 0.000575231781986717, "loss": 1.9015, "step": 11745 }, { "epoch": 0.39, "grad_norm": 0.43273788690567017, "learning_rate": 0.0005752276231750994, "loss": 1.8271, "step": 11746 }, { "epoch": 0.39, "grad_norm": 0.4588202238082886, "learning_rate": 0.0005752234640293961, "loss": 1.863, "step": 11747 }, { "epoch": 0.39, "grad_norm": 0.4276812970638275, "learning_rate": 0.0005752193045496122, "loss": 1.9025, "step": 11748 }, { "epoch": 0.39, "grad_norm": 0.4466277062892914, "learning_rate": 0.0005752151447357525, "loss": 1.9095, "step": 11749 }, { "epoch": 0.39, "grad_norm": 0.4421829283237457, "learning_rate": 0.000575210984587822, "loss": 1.9622, "step": 11750 }, { "epoch": 0.39, "grad_norm": 0.47168052196502686, "learning_rate": 0.0005752068241058262, "loss": 1.8999, "step": 11751 }, { "epoch": 0.39, "grad_norm": 0.45560288429260254, "learning_rate": 0.0005752026632897697, "loss": 1.8392, "step": 11752 }, { "epoch": 0.39, "grad_norm": 0.44126227498054504, "learning_rate": 0.0005751985021396577, "loss": 1.8465, "step": 11753 }, { "epoch": 0.39, "grad_norm": 0.44069725275039673, "learning_rate": 0.0005751943406554954, "loss": 1.8936, "step": 11754 }, { "epoch": 0.39, "grad_norm": 0.4834713339805603, "learning_rate": 0.0005751901788372876, "loss": 1.8078, "step": 11755 }, { "epoch": 0.39, "grad_norm": 0.4759591221809387, "learning_rate": 0.0005751860166850394, "loss": 1.9205, "step": 11756 }, { "epoch": 0.39, "grad_norm": 0.43438783288002014, "learning_rate": 0.0005751818541987561, "loss": 1.8056, "step": 11757 }, { "epoch": 0.39, "grad_norm": 0.43751752376556396, "learning_rate": 0.0005751776913784425, "loss": 1.9347, "step": 11758 }, { "epoch": 0.39, "grad_norm": 0.4510529935359955, "learning_rate": 0.0005751735282241037, "loss": 1.8785, "step": 11759 }, { "epoch": 0.39, "grad_norm": 0.4276916980743408, "learning_rate": 0.0005751693647357448, "loss": 1.8616, "step": 11760 }, { "epoch": 0.39, "grad_norm": 0.4424766004085541, "learning_rate": 0.0005751652009133709, "loss": 1.8238, "step": 11761 }, { "epoch": 0.39, "grad_norm": 0.4643259644508362, "learning_rate": 0.0005751610367569869, "loss": 1.8328, "step": 11762 }, { "epoch": 0.39, "grad_norm": 0.4645092189311981, "learning_rate": 0.000575156872266598, "loss": 1.933, "step": 11763 }, { "epoch": 0.39, "grad_norm": 0.44260719418525696, "learning_rate": 0.0005751527074422092, "loss": 1.9017, "step": 11764 }, { "epoch": 0.39, "grad_norm": 0.4536689221858978, "learning_rate": 0.0005751485422838255, "loss": 1.8362, "step": 11765 }, { "epoch": 0.39, "grad_norm": 0.4828643500804901, "learning_rate": 0.000575144376791452, "loss": 1.8648, "step": 11766 }, { "epoch": 0.39, "grad_norm": 0.46840953826904297, "learning_rate": 0.0005751402109650938, "loss": 1.923, "step": 11767 }, { "epoch": 0.39, "grad_norm": 0.4495162069797516, "learning_rate": 0.000575136044804756, "loss": 1.884, "step": 11768 }, { "epoch": 0.39, "grad_norm": 0.46084803342819214, "learning_rate": 0.0005751318783104436, "loss": 1.8624, "step": 11769 }, { "epoch": 0.39, "grad_norm": 0.456370085477829, "learning_rate": 0.0005751277114821614, "loss": 1.8862, "step": 11770 }, { "epoch": 0.39, "grad_norm": 0.4394741952419281, "learning_rate": 0.000575123544319915, "loss": 1.9418, "step": 11771 }, { "epoch": 0.39, "grad_norm": 0.44104844331741333, "learning_rate": 0.000575119376823709, "loss": 1.8752, "step": 11772 }, { "epoch": 0.39, "grad_norm": 0.44647297263145447, "learning_rate": 0.0005751152089935486, "loss": 1.8781, "step": 11773 }, { "epoch": 0.39, "grad_norm": 0.4642085134983063, "learning_rate": 0.0005751110408294389, "loss": 1.916, "step": 11774 }, { "epoch": 0.39, "grad_norm": 0.4313717484474182, "learning_rate": 0.0005751068723313849, "loss": 1.9338, "step": 11775 }, { "epoch": 0.39, "grad_norm": 0.4374627470970154, "learning_rate": 0.0005751027034993916, "loss": 1.8764, "step": 11776 }, { "epoch": 0.39, "grad_norm": 0.4597265124320984, "learning_rate": 0.0005750985343334643, "loss": 1.9435, "step": 11777 }, { "epoch": 0.39, "grad_norm": 0.41919076442718506, "learning_rate": 0.0005750943648336079, "loss": 1.8725, "step": 11778 }, { "epoch": 0.39, "grad_norm": 0.42729097604751587, "learning_rate": 0.0005750901949998275, "loss": 1.8976, "step": 11779 }, { "epoch": 0.39, "grad_norm": 0.4556572437286377, "learning_rate": 0.000575086024832128, "loss": 1.8419, "step": 11780 }, { "epoch": 0.39, "grad_norm": 0.48657241463661194, "learning_rate": 0.0005750818543305147, "loss": 1.9102, "step": 11781 }, { "epoch": 0.39, "grad_norm": 0.4302806258201599, "learning_rate": 0.0005750776834949926, "loss": 1.873, "step": 11782 }, { "epoch": 0.39, "grad_norm": 0.4361996352672577, "learning_rate": 0.0005750735123255667, "loss": 1.9452, "step": 11783 }, { "epoch": 0.39, "grad_norm": 0.4789985120296478, "learning_rate": 0.0005750693408222421, "loss": 1.9125, "step": 11784 }, { "epoch": 0.39, "grad_norm": 0.4518519937992096, "learning_rate": 0.0005750651689850238, "loss": 1.8404, "step": 11785 }, { "epoch": 0.39, "grad_norm": 0.4508364200592041, "learning_rate": 0.000575060996813917, "loss": 1.8925, "step": 11786 }, { "epoch": 0.39, "grad_norm": 0.44158655405044556, "learning_rate": 0.0005750568243089265, "loss": 1.8501, "step": 11787 }, { "epoch": 0.39, "grad_norm": 0.5357291102409363, "learning_rate": 0.0005750526514700576, "loss": 1.8842, "step": 11788 }, { "epoch": 0.39, "grad_norm": 0.4507107138633728, "learning_rate": 0.0005750484782973154, "loss": 1.7892, "step": 11789 }, { "epoch": 0.39, "grad_norm": 0.4815078377723694, "learning_rate": 0.0005750443047907048, "loss": 1.952, "step": 11790 }, { "epoch": 0.39, "grad_norm": 0.5090914964675903, "learning_rate": 0.000575040130950231, "loss": 1.841, "step": 11791 }, { "epoch": 0.39, "grad_norm": 0.4668079614639282, "learning_rate": 0.000575035956775899, "loss": 1.9285, "step": 11792 }, { "epoch": 0.39, "grad_norm": 0.43593084812164307, "learning_rate": 0.0005750317822677138, "loss": 1.8348, "step": 11793 }, { "epoch": 0.39, "grad_norm": 0.48006221652030945, "learning_rate": 0.0005750276074256806, "loss": 1.8712, "step": 11794 }, { "epoch": 0.39, "grad_norm": 0.4804866313934326, "learning_rate": 0.0005750234322498044, "loss": 1.9171, "step": 11795 }, { "epoch": 0.39, "grad_norm": 0.4444129168987274, "learning_rate": 0.0005750192567400903, "loss": 1.8797, "step": 11796 }, { "epoch": 0.39, "grad_norm": 0.43094855546951294, "learning_rate": 0.0005750150808965433, "loss": 1.8656, "step": 11797 }, { "epoch": 0.39, "grad_norm": 0.44759026169776917, "learning_rate": 0.0005750109047191684, "loss": 1.8427, "step": 11798 }, { "epoch": 0.39, "grad_norm": 0.4931064546108246, "learning_rate": 0.000575006728207971, "loss": 1.9158, "step": 11799 }, { "epoch": 0.39, "grad_norm": 0.4284937381744385, "learning_rate": 0.0005750025513629559, "loss": 1.8336, "step": 11800 }, { "epoch": 0.39, "grad_norm": 0.4295881986618042, "learning_rate": 0.0005749983741841281, "loss": 1.8607, "step": 11801 }, { "epoch": 0.39, "grad_norm": 0.4585285484790802, "learning_rate": 0.000574994196671493, "loss": 1.8918, "step": 11802 }, { "epoch": 0.39, "grad_norm": 0.4816322326660156, "learning_rate": 0.0005749900188250552, "loss": 1.8278, "step": 11803 }, { "epoch": 0.39, "grad_norm": 0.4285564124584198, "learning_rate": 0.0005749858406448202, "loss": 1.8775, "step": 11804 }, { "epoch": 0.39, "grad_norm": 0.4540838301181793, "learning_rate": 0.0005749816621307929, "loss": 1.8748, "step": 11805 }, { "epoch": 0.39, "grad_norm": 0.4574947655200958, "learning_rate": 0.0005749774832829784, "loss": 1.8241, "step": 11806 }, { "epoch": 0.39, "grad_norm": 0.45723336935043335, "learning_rate": 0.0005749733041013817, "loss": 1.9267, "step": 11807 }, { "epoch": 0.39, "grad_norm": 0.4346132278442383, "learning_rate": 0.0005749691245860079, "loss": 1.9274, "step": 11808 }, { "epoch": 0.39, "grad_norm": 0.4404711425304413, "learning_rate": 0.0005749649447368622, "loss": 1.9244, "step": 11809 }, { "epoch": 0.39, "grad_norm": 0.46771499514579773, "learning_rate": 0.0005749607645539496, "loss": 1.8715, "step": 11810 }, { "epoch": 0.39, "grad_norm": 0.45237961411476135, "learning_rate": 0.000574956584037275, "loss": 1.8189, "step": 11811 }, { "epoch": 0.39, "grad_norm": 0.45447760820388794, "learning_rate": 0.0005749524031868436, "loss": 1.9261, "step": 11812 }, { "epoch": 0.39, "grad_norm": 0.4500628113746643, "learning_rate": 0.0005749482220026607, "loss": 1.8796, "step": 11813 }, { "epoch": 0.39, "grad_norm": 0.44091182947158813, "learning_rate": 0.0005749440404847309, "loss": 1.9317, "step": 11814 }, { "epoch": 0.39, "grad_norm": 0.43819212913513184, "learning_rate": 0.0005749398586330597, "loss": 1.834, "step": 11815 }, { "epoch": 0.39, "grad_norm": 0.4643435478210449, "learning_rate": 0.0005749356764476521, "loss": 1.8867, "step": 11816 }, { "epoch": 0.39, "grad_norm": 0.4448716640472412, "learning_rate": 0.0005749314939285129, "loss": 1.914, "step": 11817 }, { "epoch": 0.39, "grad_norm": 0.44811370968818665, "learning_rate": 0.0005749273110756475, "loss": 1.8721, "step": 11818 }, { "epoch": 0.39, "grad_norm": 0.45023131370544434, "learning_rate": 0.0005749231278890609, "loss": 1.9263, "step": 11819 }, { "epoch": 0.39, "grad_norm": 0.4425116777420044, "learning_rate": 0.000574918944368758, "loss": 1.848, "step": 11820 }, { "epoch": 0.39, "grad_norm": 0.4388856291770935, "learning_rate": 0.0005749147605147441, "loss": 1.9017, "step": 11821 }, { "epoch": 0.39, "grad_norm": 0.4625338613986969, "learning_rate": 0.0005749105763270241, "loss": 1.8414, "step": 11822 }, { "epoch": 0.39, "grad_norm": 0.4334571659564972, "learning_rate": 0.0005749063918056033, "loss": 1.9436, "step": 11823 }, { "epoch": 0.39, "grad_norm": 0.43387141823768616, "learning_rate": 0.0005749022069504866, "loss": 1.8907, "step": 11824 }, { "epoch": 0.39, "grad_norm": 0.46109193563461304, "learning_rate": 0.0005748980217616791, "loss": 1.9467, "step": 11825 }, { "epoch": 0.39, "grad_norm": 0.4478473365306854, "learning_rate": 0.0005748938362391858, "loss": 1.9119, "step": 11826 }, { "epoch": 0.39, "grad_norm": 0.4311264455318451, "learning_rate": 0.000574889650383012, "loss": 1.9557, "step": 11827 }, { "epoch": 0.39, "grad_norm": 0.4255058765411377, "learning_rate": 0.0005748854641931627, "loss": 1.8659, "step": 11828 }, { "epoch": 0.39, "grad_norm": 0.44802993535995483, "learning_rate": 0.0005748812776696429, "loss": 1.9258, "step": 11829 }, { "epoch": 0.39, "grad_norm": 0.45072323083877563, "learning_rate": 0.0005748770908124577, "loss": 1.8871, "step": 11830 }, { "epoch": 0.39, "grad_norm": 0.4408174455165863, "learning_rate": 0.0005748729036216122, "loss": 1.9094, "step": 11831 }, { "epoch": 0.39, "grad_norm": 0.4604911804199219, "learning_rate": 0.0005748687160971116, "loss": 1.8651, "step": 11832 }, { "epoch": 0.39, "grad_norm": 0.4440896511077881, "learning_rate": 0.0005748645282389608, "loss": 1.8822, "step": 11833 }, { "epoch": 0.39, "grad_norm": 0.4452151358127594, "learning_rate": 0.000574860340047165, "loss": 1.8549, "step": 11834 }, { "epoch": 0.39, "grad_norm": 0.44205397367477417, "learning_rate": 0.0005748561515217293, "loss": 1.8793, "step": 11835 }, { "epoch": 0.39, "grad_norm": 0.4368695914745331, "learning_rate": 0.0005748519626626586, "loss": 1.8878, "step": 11836 }, { "epoch": 0.39, "grad_norm": 0.4325340688228607, "learning_rate": 0.0005748477734699582, "loss": 1.8916, "step": 11837 }, { "epoch": 0.39, "grad_norm": 0.44169193506240845, "learning_rate": 0.0005748435839436332, "loss": 1.8732, "step": 11838 }, { "epoch": 0.39, "grad_norm": 0.4318515360355377, "learning_rate": 0.0005748393940836884, "loss": 1.851, "step": 11839 }, { "epoch": 0.39, "grad_norm": 0.43809542059898376, "learning_rate": 0.0005748352038901291, "loss": 1.8791, "step": 11840 }, { "epoch": 0.39, "grad_norm": 0.432750940322876, "learning_rate": 0.0005748310133629603, "loss": 1.8812, "step": 11841 }, { "epoch": 0.39, "grad_norm": 0.4441269636154175, "learning_rate": 0.0005748268225021873, "loss": 1.9077, "step": 11842 }, { "epoch": 0.39, "grad_norm": 0.44836825132369995, "learning_rate": 0.000574822631307815, "loss": 1.8602, "step": 11843 }, { "epoch": 0.39, "grad_norm": 0.4487285315990448, "learning_rate": 0.0005748184397798484, "loss": 1.9131, "step": 11844 }, { "epoch": 0.39, "grad_norm": 0.44995561242103577, "learning_rate": 0.0005748142479182929, "loss": 1.8113, "step": 11845 }, { "epoch": 0.39, "grad_norm": 0.43503424525260925, "learning_rate": 0.0005748100557231532, "loss": 1.8738, "step": 11846 }, { "epoch": 0.39, "grad_norm": 0.43731340765953064, "learning_rate": 0.0005748058631944347, "loss": 1.8579, "step": 11847 }, { "epoch": 0.39, "grad_norm": 0.4190795123577118, "learning_rate": 0.0005748016703321424, "loss": 1.865, "step": 11848 }, { "epoch": 0.39, "grad_norm": 0.4372096061706543, "learning_rate": 0.0005747974771362813, "loss": 1.8655, "step": 11849 }, { "epoch": 0.39, "grad_norm": 0.4470100998878479, "learning_rate": 0.0005747932836068565, "loss": 1.9267, "step": 11850 }, { "epoch": 0.39, "grad_norm": 0.45751717686653137, "learning_rate": 0.0005747890897438733, "loss": 1.8689, "step": 11851 }, { "epoch": 0.39, "grad_norm": 0.43444183468818665, "learning_rate": 0.0005747848955473364, "loss": 1.8985, "step": 11852 }, { "epoch": 0.39, "grad_norm": 0.46734315156936646, "learning_rate": 0.0005747807010172513, "loss": 1.9166, "step": 11853 }, { "epoch": 0.39, "grad_norm": 0.45433276891708374, "learning_rate": 0.000574776506153623, "loss": 1.9662, "step": 11854 }, { "epoch": 0.39, "grad_norm": 0.4419472813606262, "learning_rate": 0.0005747723109564563, "loss": 1.8932, "step": 11855 }, { "epoch": 0.39, "grad_norm": 0.5929685235023499, "learning_rate": 0.0005747681154257565, "loss": 1.9231, "step": 11856 }, { "epoch": 0.39, "grad_norm": 0.4592683017253876, "learning_rate": 0.0005747639195615289, "loss": 1.8371, "step": 11857 }, { "epoch": 0.39, "grad_norm": 0.4406043291091919, "learning_rate": 0.0005747597233637782, "loss": 1.9247, "step": 11858 }, { "epoch": 0.39, "grad_norm": 0.44440749287605286, "learning_rate": 0.0005747555268325098, "loss": 1.8788, "step": 11859 }, { "epoch": 0.39, "grad_norm": 0.4401625990867615, "learning_rate": 0.0005747513299677286, "loss": 1.9283, "step": 11860 }, { "epoch": 0.39, "grad_norm": 0.42568451166152954, "learning_rate": 0.0005747471327694398, "loss": 1.9122, "step": 11861 }, { "epoch": 0.39, "grad_norm": 0.43845444917678833, "learning_rate": 0.0005747429352376484, "loss": 1.9117, "step": 11862 }, { "epoch": 0.39, "grad_norm": 0.4365510046482086, "learning_rate": 0.0005747387373723597, "loss": 1.864, "step": 11863 }, { "epoch": 0.39, "grad_norm": 0.6691522598266602, "learning_rate": 0.0005747345391735784, "loss": 1.9604, "step": 11864 }, { "epoch": 0.39, "grad_norm": 0.4422411024570465, "learning_rate": 0.00057473034064131, "loss": 1.9263, "step": 11865 }, { "epoch": 0.39, "grad_norm": 0.4466384947299957, "learning_rate": 0.0005747261417755595, "loss": 1.8656, "step": 11866 }, { "epoch": 0.39, "grad_norm": 0.43747854232788086, "learning_rate": 0.0005747219425763319, "loss": 1.9279, "step": 11867 }, { "epoch": 0.39, "grad_norm": 0.42396804690361023, "learning_rate": 0.0005747177430436324, "loss": 1.8577, "step": 11868 }, { "epoch": 0.39, "grad_norm": 0.45293307304382324, "learning_rate": 0.0005747135431774659, "loss": 1.7689, "step": 11869 }, { "epoch": 0.39, "grad_norm": 0.45281097292900085, "learning_rate": 0.0005747093429778377, "loss": 1.914, "step": 11870 }, { "epoch": 0.39, "grad_norm": 0.45161235332489014, "learning_rate": 0.0005747051424447529, "loss": 1.8689, "step": 11871 }, { "epoch": 0.39, "grad_norm": 0.4506734311580658, "learning_rate": 0.0005747009415782165, "loss": 1.8383, "step": 11872 }, { "epoch": 0.4, "grad_norm": 0.46290478110313416, "learning_rate": 0.0005746967403782335, "loss": 1.9079, "step": 11873 }, { "epoch": 0.4, "grad_norm": 0.5120341181755066, "learning_rate": 0.0005746925388448093, "loss": 1.851, "step": 11874 }, { "epoch": 0.4, "grad_norm": 0.4411928057670593, "learning_rate": 0.0005746883369779488, "loss": 1.8875, "step": 11875 }, { "epoch": 0.4, "grad_norm": 0.49030575156211853, "learning_rate": 0.0005746841347776571, "loss": 1.906, "step": 11876 }, { "epoch": 0.4, "grad_norm": 0.4409823715686798, "learning_rate": 0.0005746799322439393, "loss": 1.9052, "step": 11877 }, { "epoch": 0.4, "grad_norm": 0.4655502736568451, "learning_rate": 0.0005746757293768006, "loss": 1.9321, "step": 11878 }, { "epoch": 0.4, "grad_norm": 0.4496873617172241, "learning_rate": 0.000574671526176246, "loss": 1.9241, "step": 11879 }, { "epoch": 0.4, "grad_norm": 0.434730589389801, "learning_rate": 0.0005746673226422807, "loss": 1.9195, "step": 11880 }, { "epoch": 0.4, "grad_norm": 0.4377002716064453, "learning_rate": 0.0005746631187749097, "loss": 1.8924, "step": 11881 }, { "epoch": 0.4, "grad_norm": 0.4368895888328552, "learning_rate": 0.0005746589145741381, "loss": 1.9784, "step": 11882 }, { "epoch": 0.4, "grad_norm": 0.4446508586406708, "learning_rate": 0.0005746547100399711, "loss": 1.8168, "step": 11883 }, { "epoch": 0.4, "grad_norm": 0.4552851617336273, "learning_rate": 0.0005746505051724138, "loss": 1.9444, "step": 11884 }, { "epoch": 0.4, "grad_norm": 0.4422162175178528, "learning_rate": 0.0005746462999714711, "loss": 1.8705, "step": 11885 }, { "epoch": 0.4, "grad_norm": 0.42851272225379944, "learning_rate": 0.0005746420944371483, "loss": 1.7909, "step": 11886 }, { "epoch": 0.4, "grad_norm": 0.4336490035057068, "learning_rate": 0.0005746378885694506, "loss": 1.8569, "step": 11887 }, { "epoch": 0.4, "grad_norm": 0.44709160923957825, "learning_rate": 0.0005746336823683828, "loss": 1.9539, "step": 11888 }, { "epoch": 0.4, "grad_norm": 0.4468957185745239, "learning_rate": 0.0005746294758339503, "loss": 1.8563, "step": 11889 }, { "epoch": 0.4, "grad_norm": 0.448849618434906, "learning_rate": 0.000574625268966158, "loss": 1.9261, "step": 11890 }, { "epoch": 0.4, "grad_norm": 0.4432523846626282, "learning_rate": 0.0005746210617650112, "loss": 1.8345, "step": 11891 }, { "epoch": 0.4, "grad_norm": 0.44753801822662354, "learning_rate": 0.0005746168542305148, "loss": 1.8694, "step": 11892 }, { "epoch": 0.4, "grad_norm": 0.4543321132659912, "learning_rate": 0.000574612646362674, "loss": 1.8977, "step": 11893 }, { "epoch": 0.4, "grad_norm": 0.44955551624298096, "learning_rate": 0.0005746084381614938, "loss": 1.9218, "step": 11894 }, { "epoch": 0.4, "grad_norm": 0.431670218706131, "learning_rate": 0.0005746042296269797, "loss": 1.959, "step": 11895 }, { "epoch": 0.4, "grad_norm": 0.4392600357532501, "learning_rate": 0.0005746000207591364, "loss": 1.879, "step": 11896 }, { "epoch": 0.4, "grad_norm": 0.4584779143333435, "learning_rate": 0.0005745958115579691, "loss": 1.9097, "step": 11897 }, { "epoch": 0.4, "grad_norm": 0.44587600231170654, "learning_rate": 0.0005745916020234829, "loss": 1.8555, "step": 11898 }, { "epoch": 0.4, "grad_norm": 0.43993398547172546, "learning_rate": 0.000574587392155683, "loss": 1.8899, "step": 11899 }, { "epoch": 0.4, "grad_norm": 0.4408922791481018, "learning_rate": 0.0005745831819545745, "loss": 1.8574, "step": 11900 }, { "epoch": 0.4, "grad_norm": 0.44661572575569153, "learning_rate": 0.0005745789714201624, "loss": 1.8598, "step": 11901 }, { "epoch": 0.4, "grad_norm": 0.45215272903442383, "learning_rate": 0.000574574760552452, "loss": 1.9043, "step": 11902 }, { "epoch": 0.4, "grad_norm": 0.4476437568664551, "learning_rate": 0.0005745705493514482, "loss": 1.9232, "step": 11903 }, { "epoch": 0.4, "grad_norm": 0.42991673946380615, "learning_rate": 0.0005745663378171563, "loss": 1.887, "step": 11904 }, { "epoch": 0.4, "grad_norm": 0.45224347710609436, "learning_rate": 0.0005745621259495811, "loss": 1.8962, "step": 11905 }, { "epoch": 0.4, "grad_norm": 0.45892608165740967, "learning_rate": 0.0005745579137487281, "loss": 1.9603, "step": 11906 }, { "epoch": 0.4, "grad_norm": 0.44122424721717834, "learning_rate": 0.0005745537012146023, "loss": 1.9184, "step": 11907 }, { "epoch": 0.4, "grad_norm": 0.4410025477409363, "learning_rate": 0.0005745494883472086, "loss": 1.8854, "step": 11908 }, { "epoch": 0.4, "grad_norm": 0.45306581258773804, "learning_rate": 0.0005745452751465524, "loss": 1.8184, "step": 11909 }, { "epoch": 0.4, "grad_norm": 0.44077086448669434, "learning_rate": 0.0005745410616126387, "loss": 1.8574, "step": 11910 }, { "epoch": 0.4, "grad_norm": 0.43869587779045105, "learning_rate": 0.0005745368477454725, "loss": 1.8567, "step": 11911 }, { "epoch": 0.4, "grad_norm": 0.4486594498157501, "learning_rate": 0.0005745326335450592, "loss": 1.902, "step": 11912 }, { "epoch": 0.4, "grad_norm": 0.43957388401031494, "learning_rate": 0.0005745284190114036, "loss": 1.915, "step": 11913 }, { "epoch": 0.4, "grad_norm": 0.44306686520576477, "learning_rate": 0.0005745242041445111, "loss": 1.7946, "step": 11914 }, { "epoch": 0.4, "grad_norm": 0.44483089447021484, "learning_rate": 0.0005745199889443863, "loss": 1.8698, "step": 11915 }, { "epoch": 0.4, "grad_norm": 0.4413352608680725, "learning_rate": 0.000574515773411035, "loss": 1.9259, "step": 11916 }, { "epoch": 0.4, "grad_norm": 0.4482608139514923, "learning_rate": 0.000574511557544462, "loss": 1.8904, "step": 11917 }, { "epoch": 0.4, "grad_norm": 0.4316861629486084, "learning_rate": 0.0005745073413446723, "loss": 1.9297, "step": 11918 }, { "epoch": 0.4, "grad_norm": 0.4330880343914032, "learning_rate": 0.0005745031248116711, "loss": 1.8929, "step": 11919 }, { "epoch": 0.4, "grad_norm": 0.4281061291694641, "learning_rate": 0.0005744989079454636, "loss": 1.8264, "step": 11920 }, { "epoch": 0.4, "grad_norm": 0.5671999454498291, "learning_rate": 0.0005744946907460549, "loss": 1.9176, "step": 11921 }, { "epoch": 0.4, "grad_norm": 0.4388304352760315, "learning_rate": 0.0005744904732134501, "loss": 1.7996, "step": 11922 }, { "epoch": 0.4, "grad_norm": 0.41896069049835205, "learning_rate": 0.0005744862553476543, "loss": 1.8265, "step": 11923 }, { "epoch": 0.4, "grad_norm": 0.5038936138153076, "learning_rate": 0.0005744820371486726, "loss": 1.9109, "step": 11924 }, { "epoch": 0.4, "grad_norm": 0.43620267510414124, "learning_rate": 0.0005744778186165101, "loss": 1.8386, "step": 11925 }, { "epoch": 0.4, "grad_norm": 0.44380316138267517, "learning_rate": 0.0005744735997511722, "loss": 1.9708, "step": 11926 }, { "epoch": 0.4, "grad_norm": 0.45265626907348633, "learning_rate": 0.0005744693805526636, "loss": 1.9605, "step": 11927 }, { "epoch": 0.4, "grad_norm": 0.439957857131958, "learning_rate": 0.0005744651610209896, "loss": 1.9487, "step": 11928 }, { "epoch": 0.4, "grad_norm": 0.4379899799823761, "learning_rate": 0.0005744609411561553, "loss": 1.8743, "step": 11929 }, { "epoch": 0.4, "grad_norm": 0.43567872047424316, "learning_rate": 0.000574456720958166, "loss": 1.9537, "step": 11930 }, { "epoch": 0.4, "grad_norm": 0.4313720762729645, "learning_rate": 0.0005744525004270266, "loss": 1.9254, "step": 11931 }, { "epoch": 0.4, "grad_norm": 0.43504583835601807, "learning_rate": 0.0005744482795627423, "loss": 1.8543, "step": 11932 }, { "epoch": 0.4, "grad_norm": 0.4231075048446655, "learning_rate": 0.0005744440583653183, "loss": 1.8591, "step": 11933 }, { "epoch": 0.4, "grad_norm": 0.43634745478630066, "learning_rate": 0.0005744398368347596, "loss": 1.8602, "step": 11934 }, { "epoch": 0.4, "grad_norm": 0.437677264213562, "learning_rate": 0.0005744356149710713, "loss": 1.9109, "step": 11935 }, { "epoch": 0.4, "grad_norm": 0.4459607005119324, "learning_rate": 0.0005744313927742587, "loss": 1.8946, "step": 11936 }, { "epoch": 0.4, "grad_norm": 0.4594866931438446, "learning_rate": 0.0005744271702443267, "loss": 1.8916, "step": 11937 }, { "epoch": 0.4, "grad_norm": 0.44157925248146057, "learning_rate": 0.0005744229473812807, "loss": 1.8741, "step": 11938 }, { "epoch": 0.4, "grad_norm": 0.4416658878326416, "learning_rate": 0.0005744187241851256, "loss": 1.934, "step": 11939 }, { "epoch": 0.4, "grad_norm": 0.44413039088249207, "learning_rate": 0.0005744145006558667, "loss": 1.8367, "step": 11940 }, { "epoch": 0.4, "grad_norm": 0.45524513721466064, "learning_rate": 0.0005744102767935089, "loss": 1.7279, "step": 11941 }, { "epoch": 0.4, "grad_norm": 0.4326198697090149, "learning_rate": 0.0005744060525980575, "loss": 1.8353, "step": 11942 }, { "epoch": 0.4, "grad_norm": 0.43708619475364685, "learning_rate": 0.0005744018280695176, "loss": 1.8467, "step": 11943 }, { "epoch": 0.4, "grad_norm": 0.4504489004611969, "learning_rate": 0.0005743976032078944, "loss": 1.7833, "step": 11944 }, { "epoch": 0.4, "grad_norm": 0.4332549571990967, "learning_rate": 0.0005743933780131928, "loss": 1.827, "step": 11945 }, { "epoch": 0.4, "grad_norm": 0.4291582405567169, "learning_rate": 0.0005743891524854181, "loss": 1.9247, "step": 11946 }, { "epoch": 0.4, "grad_norm": 0.4370098412036896, "learning_rate": 0.0005743849266245754, "loss": 1.8617, "step": 11947 }, { "epoch": 0.4, "grad_norm": 0.433553010225296, "learning_rate": 0.0005743807004306698, "loss": 1.8868, "step": 11948 }, { "epoch": 0.4, "grad_norm": 0.4347041845321655, "learning_rate": 0.0005743764739037066, "loss": 1.8676, "step": 11949 }, { "epoch": 0.4, "grad_norm": 0.45820385217666626, "learning_rate": 0.0005743722470436906, "loss": 1.9367, "step": 11950 }, { "epoch": 0.4, "grad_norm": 0.4466920793056488, "learning_rate": 0.0005743680198506272, "loss": 1.8883, "step": 11951 }, { "epoch": 0.4, "grad_norm": 0.4547451138496399, "learning_rate": 0.0005743637923245215, "loss": 1.9015, "step": 11952 }, { "epoch": 0.4, "grad_norm": 0.4486975371837616, "learning_rate": 0.0005743595644653785, "loss": 1.9077, "step": 11953 }, { "epoch": 0.4, "grad_norm": 0.4371243715286255, "learning_rate": 0.0005743553362732035, "loss": 1.9346, "step": 11954 }, { "epoch": 0.4, "grad_norm": 0.440336674451828, "learning_rate": 0.0005743511077480015, "loss": 1.9238, "step": 11955 }, { "epoch": 0.4, "grad_norm": 0.4658084213733673, "learning_rate": 0.0005743468788897777, "loss": 1.9174, "step": 11956 }, { "epoch": 0.4, "grad_norm": 0.4435138404369354, "learning_rate": 0.0005743426496985372, "loss": 1.8381, "step": 11957 }, { "epoch": 0.4, "grad_norm": 0.4315485656261444, "learning_rate": 0.0005743384201742851, "loss": 1.9562, "step": 11958 }, { "epoch": 0.4, "grad_norm": 0.4404140114784241, "learning_rate": 0.0005743341903170266, "loss": 1.9181, "step": 11959 }, { "epoch": 0.4, "grad_norm": 0.4895190894603729, "learning_rate": 0.0005743299601267668, "loss": 1.8509, "step": 11960 }, { "epoch": 0.4, "grad_norm": 0.4570404887199402, "learning_rate": 0.0005743257296035108, "loss": 1.9209, "step": 11961 }, { "epoch": 0.4, "grad_norm": 0.47445806860923767, "learning_rate": 0.0005743214987472639, "loss": 1.881, "step": 11962 }, { "epoch": 0.4, "grad_norm": 0.4886467158794403, "learning_rate": 0.0005743172675580311, "loss": 1.9239, "step": 11963 }, { "epoch": 0.4, "grad_norm": 0.4488133490085602, "learning_rate": 0.0005743130360358176, "loss": 1.8512, "step": 11964 }, { "epoch": 0.4, "grad_norm": 0.4405922591686249, "learning_rate": 0.0005743088041806284, "loss": 1.8812, "step": 11965 }, { "epoch": 0.4, "grad_norm": 0.47567224502563477, "learning_rate": 0.0005743045719924688, "loss": 1.8331, "step": 11966 }, { "epoch": 0.4, "grad_norm": 0.49630007147789, "learning_rate": 0.0005743003394713439, "loss": 1.8614, "step": 11967 }, { "epoch": 0.4, "grad_norm": 0.44900405406951904, "learning_rate": 0.0005742961066172586, "loss": 1.9249, "step": 11968 }, { "epoch": 0.4, "grad_norm": 0.5029863715171814, "learning_rate": 0.0005742918734302183, "loss": 1.8515, "step": 11969 }, { "epoch": 0.4, "grad_norm": 0.4809436798095703, "learning_rate": 0.0005742876399102283, "loss": 1.8531, "step": 11970 }, { "epoch": 0.4, "grad_norm": 0.46755897998809814, "learning_rate": 0.0005742834060572933, "loss": 1.8368, "step": 11971 }, { "epoch": 0.4, "grad_norm": 0.4344409108161926, "learning_rate": 0.0005742791718714187, "loss": 1.8761, "step": 11972 }, { "epoch": 0.4, "grad_norm": 0.47813642024993896, "learning_rate": 0.0005742749373526096, "loss": 1.8861, "step": 11973 }, { "epoch": 0.4, "grad_norm": 0.4789331555366516, "learning_rate": 0.0005742707025008713, "loss": 1.8942, "step": 11974 }, { "epoch": 0.4, "grad_norm": 0.43151894211769104, "learning_rate": 0.0005742664673162086, "loss": 1.7726, "step": 11975 }, { "epoch": 0.4, "grad_norm": 0.44999122619628906, "learning_rate": 0.0005742622317986268, "loss": 1.855, "step": 11976 }, { "epoch": 0.4, "grad_norm": 0.4426162540912628, "learning_rate": 0.0005742579959481312, "loss": 1.8429, "step": 11977 }, { "epoch": 0.4, "grad_norm": 0.4507998526096344, "learning_rate": 0.0005742537597647267, "loss": 1.8836, "step": 11978 }, { "epoch": 0.4, "grad_norm": 0.4283570945262909, "learning_rate": 0.0005742495232484185, "loss": 1.8645, "step": 11979 }, { "epoch": 0.4, "grad_norm": 0.4645891487598419, "learning_rate": 0.000574245286399212, "loss": 1.8675, "step": 11980 }, { "epoch": 0.4, "grad_norm": 0.4581699073314667, "learning_rate": 0.000574241049217112, "loss": 1.9033, "step": 11981 }, { "epoch": 0.4, "grad_norm": 0.47104281187057495, "learning_rate": 0.0005742368117021237, "loss": 1.8963, "step": 11982 }, { "epoch": 0.4, "grad_norm": 0.44177842140197754, "learning_rate": 0.0005742325738542524, "loss": 1.8495, "step": 11983 }, { "epoch": 0.4, "grad_norm": 0.4321921467781067, "learning_rate": 0.0005742283356735032, "loss": 1.9598, "step": 11984 }, { "epoch": 0.4, "grad_norm": 0.4464617073535919, "learning_rate": 0.0005742240971598811, "loss": 1.8901, "step": 11985 }, { "epoch": 0.4, "grad_norm": 0.44615796208381653, "learning_rate": 0.0005742198583133914, "loss": 1.9209, "step": 11986 }, { "epoch": 0.4, "grad_norm": 0.4536688029766083, "learning_rate": 0.0005742156191340392, "loss": 1.8407, "step": 11987 }, { "epoch": 0.4, "grad_norm": 0.4361073672771454, "learning_rate": 0.0005742113796218297, "loss": 1.8817, "step": 11988 }, { "epoch": 0.4, "grad_norm": 0.45386576652526855, "learning_rate": 0.0005742071397767678, "loss": 1.8695, "step": 11989 }, { "epoch": 0.4, "grad_norm": 0.4380977153778076, "learning_rate": 0.0005742028995988591, "loss": 1.9409, "step": 11990 }, { "epoch": 0.4, "grad_norm": 0.4384751617908478, "learning_rate": 0.0005741986590881084, "loss": 1.9306, "step": 11991 }, { "epoch": 0.4, "grad_norm": 0.46614935994148254, "learning_rate": 0.0005741944182445208, "loss": 1.9853, "step": 11992 }, { "epoch": 0.4, "grad_norm": 0.42672199010849, "learning_rate": 0.0005741901770681015, "loss": 1.8282, "step": 11993 }, { "epoch": 0.4, "grad_norm": 0.44137370586395264, "learning_rate": 0.000574185935558856, "loss": 1.8403, "step": 11994 }, { "epoch": 0.4, "grad_norm": 0.46778035163879395, "learning_rate": 0.0005741816937167891, "loss": 1.9252, "step": 11995 }, { "epoch": 0.4, "grad_norm": 0.43828943371772766, "learning_rate": 0.0005741774515419059, "loss": 1.9133, "step": 11996 }, { "epoch": 0.4, "grad_norm": 0.4388963580131531, "learning_rate": 0.0005741732090342118, "loss": 1.8482, "step": 11997 }, { "epoch": 0.4, "grad_norm": 0.44614529609680176, "learning_rate": 0.0005741689661937117, "loss": 1.9784, "step": 11998 }, { "epoch": 0.4, "grad_norm": 0.42624780535697937, "learning_rate": 0.000574164723020411, "loss": 1.8884, "step": 11999 }, { "epoch": 0.4, "grad_norm": 0.47077277302742004, "learning_rate": 0.0005741604795143146, "loss": 1.8589, "step": 12000 }, { "epoch": 0.4, "grad_norm": 0.4310387969017029, "learning_rate": 0.0005741562356754279, "loss": 1.853, "step": 12001 }, { "epoch": 0.4, "grad_norm": 0.4459100663661957, "learning_rate": 0.0005741519915037558, "loss": 1.8195, "step": 12002 }, { "epoch": 0.4, "grad_norm": 0.4339960515499115, "learning_rate": 0.0005741477469993037, "loss": 1.8493, "step": 12003 }, { "epoch": 0.4, "grad_norm": 0.43860799074172974, "learning_rate": 0.0005741435021620765, "loss": 1.938, "step": 12004 }, { "epoch": 0.4, "grad_norm": 0.439406156539917, "learning_rate": 0.0005741392569920796, "loss": 1.8915, "step": 12005 }, { "epoch": 0.4, "grad_norm": 0.44525769352912903, "learning_rate": 0.000574135011489318, "loss": 1.8451, "step": 12006 }, { "epoch": 0.4, "grad_norm": 0.4344927966594696, "learning_rate": 0.0005741307656537968, "loss": 1.9207, "step": 12007 }, { "epoch": 0.4, "grad_norm": 0.6415059566497803, "learning_rate": 0.0005741265194855213, "loss": 1.9206, "step": 12008 }, { "epoch": 0.4, "grad_norm": 0.4764571487903595, "learning_rate": 0.0005741222729844966, "loss": 1.9816, "step": 12009 }, { "epoch": 0.4, "grad_norm": 0.4328537881374359, "learning_rate": 0.0005741180261507279, "loss": 1.8973, "step": 12010 }, { "epoch": 0.4, "grad_norm": 0.47022584080696106, "learning_rate": 0.0005741137789842202, "loss": 1.874, "step": 12011 }, { "epoch": 0.4, "grad_norm": 0.44512343406677246, "learning_rate": 0.0005741095314849789, "loss": 1.9008, "step": 12012 }, { "epoch": 0.4, "grad_norm": 0.44545280933380127, "learning_rate": 0.000574105283653009, "loss": 1.886, "step": 12013 }, { "epoch": 0.4, "grad_norm": 0.44972899556159973, "learning_rate": 0.0005741010354883156, "loss": 1.9566, "step": 12014 }, { "epoch": 0.4, "grad_norm": 0.4698401093482971, "learning_rate": 0.0005740967869909039, "loss": 1.9709, "step": 12015 }, { "epoch": 0.4, "grad_norm": 0.4539450407028198, "learning_rate": 0.0005740925381607792, "loss": 1.8337, "step": 12016 }, { "epoch": 0.4, "grad_norm": 0.4422764182090759, "learning_rate": 0.0005740882889979464, "loss": 1.956, "step": 12017 }, { "epoch": 0.4, "grad_norm": 0.4483695328235626, "learning_rate": 0.0005740840395024109, "loss": 1.8474, "step": 12018 }, { "epoch": 0.4, "grad_norm": 0.4375894069671631, "learning_rate": 0.0005740797896741777, "loss": 1.7712, "step": 12019 }, { "epoch": 0.4, "grad_norm": 0.4534991979598999, "learning_rate": 0.0005740755395132521, "loss": 1.9445, "step": 12020 }, { "epoch": 0.4, "grad_norm": 0.44042840600013733, "learning_rate": 0.0005740712890196391, "loss": 1.8742, "step": 12021 }, { "epoch": 0.4, "grad_norm": 0.4509821832180023, "learning_rate": 0.000574067038193344, "loss": 1.8541, "step": 12022 }, { "epoch": 0.4, "grad_norm": 0.45627084374427795, "learning_rate": 0.0005740627870343719, "loss": 1.9668, "step": 12023 }, { "epoch": 0.4, "grad_norm": 0.4443897306919098, "learning_rate": 0.0005740585355427279, "loss": 1.8393, "step": 12024 }, { "epoch": 0.4, "grad_norm": 0.4407682418823242, "learning_rate": 0.0005740542837184173, "loss": 1.8647, "step": 12025 }, { "epoch": 0.4, "grad_norm": 0.43188560009002686, "learning_rate": 0.0005740500315614452, "loss": 1.8206, "step": 12026 }, { "epoch": 0.4, "grad_norm": 0.43823373317718506, "learning_rate": 0.0005740457790718165, "loss": 1.9415, "step": 12027 }, { "epoch": 0.4, "grad_norm": 0.44518426060676575, "learning_rate": 0.0005740415262495369, "loss": 1.8294, "step": 12028 }, { "epoch": 0.4, "grad_norm": 0.4451533854007721, "learning_rate": 0.0005740372730946111, "loss": 1.8543, "step": 12029 }, { "epoch": 0.4, "grad_norm": 0.4502401649951935, "learning_rate": 0.0005740330196070444, "loss": 2.0026, "step": 12030 }, { "epoch": 0.4, "grad_norm": 0.44880977272987366, "learning_rate": 0.0005740287657868421, "loss": 1.9218, "step": 12031 }, { "epoch": 0.4, "grad_norm": 0.44322600960731506, "learning_rate": 0.0005740245116340092, "loss": 1.864, "step": 12032 }, { "epoch": 0.4, "grad_norm": 0.44906365871429443, "learning_rate": 0.0005740202571485509, "loss": 1.9035, "step": 12033 }, { "epoch": 0.4, "grad_norm": 0.44759494066238403, "learning_rate": 0.0005740160023304725, "loss": 1.8532, "step": 12034 }, { "epoch": 0.4, "grad_norm": 0.4856276214122772, "learning_rate": 0.0005740117471797789, "loss": 1.8296, "step": 12035 }, { "epoch": 0.4, "grad_norm": 0.43463221192359924, "learning_rate": 0.0005740074916964755, "loss": 1.9068, "step": 12036 }, { "epoch": 0.4, "grad_norm": 0.4315972924232483, "learning_rate": 0.0005740032358805673, "loss": 1.9078, "step": 12037 }, { "epoch": 0.4, "grad_norm": 0.479936420917511, "learning_rate": 0.0005739989797320596, "loss": 1.7691, "step": 12038 }, { "epoch": 0.4, "grad_norm": 0.4522128999233246, "learning_rate": 0.0005739947232509575, "loss": 1.9328, "step": 12039 }, { "epoch": 0.4, "grad_norm": 0.43879255652427673, "learning_rate": 0.0005739904664372662, "loss": 1.8474, "step": 12040 }, { "epoch": 0.4, "grad_norm": 0.4367835819721222, "learning_rate": 0.0005739862092909909, "loss": 1.9205, "step": 12041 }, { "epoch": 0.4, "grad_norm": 0.4570528566837311, "learning_rate": 0.0005739819518121366, "loss": 1.86, "step": 12042 }, { "epoch": 0.4, "grad_norm": 0.4316008687019348, "learning_rate": 0.0005739776940007086, "loss": 1.8987, "step": 12043 }, { "epoch": 0.4, "grad_norm": 0.4514690637588501, "learning_rate": 0.000573973435856712, "loss": 1.9423, "step": 12044 }, { "epoch": 0.4, "grad_norm": 0.5000973343849182, "learning_rate": 0.0005739691773801521, "loss": 1.9088, "step": 12045 }, { "epoch": 0.4, "grad_norm": 0.4482618272304535, "learning_rate": 0.000573964918571034, "loss": 1.8934, "step": 12046 }, { "epoch": 0.4, "grad_norm": 0.44028687477111816, "learning_rate": 0.0005739606594293629, "loss": 1.8875, "step": 12047 }, { "epoch": 0.4, "grad_norm": 0.437242716550827, "learning_rate": 0.0005739563999551438, "loss": 1.8835, "step": 12048 }, { "epoch": 0.4, "grad_norm": 0.47956618666648865, "learning_rate": 0.000573952140148382, "loss": 1.8694, "step": 12049 }, { "epoch": 0.4, "grad_norm": 0.45644545555114746, "learning_rate": 0.0005739478800090827, "loss": 1.8152, "step": 12050 }, { "epoch": 0.4, "grad_norm": 0.43180903792381287, "learning_rate": 0.0005739436195372511, "loss": 1.8848, "step": 12051 }, { "epoch": 0.4, "grad_norm": 0.4514903724193573, "learning_rate": 0.0005739393587328922, "loss": 1.8371, "step": 12052 }, { "epoch": 0.4, "grad_norm": 0.4455411434173584, "learning_rate": 0.0005739350975960114, "loss": 1.8821, "step": 12053 }, { "epoch": 0.4, "grad_norm": 0.4671303331851959, "learning_rate": 0.0005739308361266137, "loss": 1.9034, "step": 12054 }, { "epoch": 0.4, "grad_norm": 0.43708959221839905, "learning_rate": 0.0005739265743247043, "loss": 1.8339, "step": 12055 }, { "epoch": 0.4, "grad_norm": 0.4396675229072571, "learning_rate": 0.0005739223121902884, "loss": 1.8796, "step": 12056 }, { "epoch": 0.4, "grad_norm": 0.4684629440307617, "learning_rate": 0.0005739180497233712, "loss": 1.9158, "step": 12057 }, { "epoch": 0.4, "grad_norm": 0.42334577441215515, "learning_rate": 0.0005739137869239579, "loss": 1.834, "step": 12058 }, { "epoch": 0.4, "grad_norm": 0.4471665024757385, "learning_rate": 0.0005739095237920535, "loss": 1.9575, "step": 12059 }, { "epoch": 0.4, "grad_norm": 0.4467265009880066, "learning_rate": 0.0005739052603276634, "loss": 1.9377, "step": 12060 }, { "epoch": 0.4, "grad_norm": 0.4482952952384949, "learning_rate": 0.0005739009965307927, "loss": 1.9063, "step": 12061 }, { "epoch": 0.4, "grad_norm": 0.4489041268825531, "learning_rate": 0.0005738967324014464, "loss": 1.8337, "step": 12062 }, { "epoch": 0.4, "grad_norm": 0.42877206206321716, "learning_rate": 0.00057389246793963, "loss": 1.8592, "step": 12063 }, { "epoch": 0.4, "grad_norm": 0.44274184107780457, "learning_rate": 0.0005738882031453484, "loss": 1.8411, "step": 12064 }, { "epoch": 0.4, "grad_norm": 0.42570096254348755, "learning_rate": 0.0005738839380186069, "loss": 1.8659, "step": 12065 }, { "epoch": 0.4, "grad_norm": 0.43928250670433044, "learning_rate": 0.0005738796725594106, "loss": 1.9495, "step": 12066 }, { "epoch": 0.4, "grad_norm": 0.43141457438468933, "learning_rate": 0.0005738754067677647, "loss": 1.7938, "step": 12067 }, { "epoch": 0.4, "grad_norm": 0.44712451100349426, "learning_rate": 0.0005738711406436745, "loss": 1.8625, "step": 12068 }, { "epoch": 0.4, "grad_norm": 0.4322642385959625, "learning_rate": 0.0005738668741871451, "loss": 1.9359, "step": 12069 }, { "epoch": 0.4, "grad_norm": 0.43907639384269714, "learning_rate": 0.0005738626073981816, "loss": 1.896, "step": 12070 }, { "epoch": 0.4, "grad_norm": 0.4402408003807068, "learning_rate": 0.0005738583402767893, "loss": 1.91, "step": 12071 }, { "epoch": 0.4, "grad_norm": 0.435149222612381, "learning_rate": 0.0005738540728229733, "loss": 1.8138, "step": 12072 }, { "epoch": 0.4, "grad_norm": 0.4425782561302185, "learning_rate": 0.0005738498050367389, "loss": 1.8306, "step": 12073 }, { "epoch": 0.4, "grad_norm": 0.430512011051178, "learning_rate": 0.000573845536918091, "loss": 1.8512, "step": 12074 }, { "epoch": 0.4, "grad_norm": 0.43425413966178894, "learning_rate": 0.000573841268467035, "loss": 1.8549, "step": 12075 }, { "epoch": 0.4, "grad_norm": 0.4342021644115448, "learning_rate": 0.0005738369996835761, "loss": 1.7888, "step": 12076 }, { "epoch": 0.4, "grad_norm": 0.43545234203338623, "learning_rate": 0.0005738327305677195, "loss": 1.8403, "step": 12077 }, { "epoch": 0.4, "grad_norm": 0.456472784280777, "learning_rate": 0.0005738284611194703, "loss": 1.9094, "step": 12078 }, { "epoch": 0.4, "grad_norm": 0.42830246686935425, "learning_rate": 0.0005738241913388337, "loss": 1.8982, "step": 12079 }, { "epoch": 0.4, "grad_norm": 0.4503733217716217, "learning_rate": 0.0005738199212258148, "loss": 1.927, "step": 12080 }, { "epoch": 0.4, "grad_norm": 0.4312422573566437, "learning_rate": 0.0005738156507804189, "loss": 1.9152, "step": 12081 }, { "epoch": 0.4, "grad_norm": 0.44012513756752014, "learning_rate": 0.0005738113800026511, "loss": 1.8395, "step": 12082 }, { "epoch": 0.4, "grad_norm": 0.45308226346969604, "learning_rate": 0.0005738071088925167, "loss": 1.8411, "step": 12083 }, { "epoch": 0.4, "grad_norm": 0.4303596317768097, "learning_rate": 0.0005738028374500208, "loss": 1.8951, "step": 12084 }, { "epoch": 0.4, "grad_norm": 0.4484589993953705, "learning_rate": 0.0005737985656751685, "loss": 1.831, "step": 12085 }, { "epoch": 0.4, "grad_norm": 0.4398045241832733, "learning_rate": 0.0005737942935679652, "loss": 1.8637, "step": 12086 }, { "epoch": 0.4, "grad_norm": 0.4191463887691498, "learning_rate": 0.0005737900211284159, "loss": 1.8164, "step": 12087 }, { "epoch": 0.4, "grad_norm": 0.4412354528903961, "learning_rate": 0.0005737857483565259, "loss": 1.8546, "step": 12088 }, { "epoch": 0.4, "grad_norm": 0.43612295389175415, "learning_rate": 0.0005737814752523004, "loss": 1.8471, "step": 12089 }, { "epoch": 0.4, "grad_norm": 0.4429141879081726, "learning_rate": 0.0005737772018157444, "loss": 1.8923, "step": 12090 }, { "epoch": 0.4, "grad_norm": 0.44748061895370483, "learning_rate": 0.0005737729280468633, "loss": 1.9066, "step": 12091 }, { "epoch": 0.4, "grad_norm": 0.4359363615512848, "learning_rate": 0.0005737686539456622, "loss": 1.867, "step": 12092 }, { "epoch": 0.4, "grad_norm": 0.4692656099796295, "learning_rate": 0.0005737643795121463, "loss": 1.8704, "step": 12093 }, { "epoch": 0.4, "grad_norm": 0.5366610288619995, "learning_rate": 0.0005737601047463208, "loss": 1.834, "step": 12094 }, { "epoch": 0.4, "grad_norm": 0.4476853907108307, "learning_rate": 0.0005737558296481907, "loss": 1.9068, "step": 12095 }, { "epoch": 0.4, "grad_norm": 0.4370328485965729, "learning_rate": 0.0005737515542177615, "loss": 1.9031, "step": 12096 }, { "epoch": 0.4, "grad_norm": 0.445741206407547, "learning_rate": 0.0005737472784550382, "loss": 1.917, "step": 12097 }, { "epoch": 0.4, "grad_norm": 0.4621383547782898, "learning_rate": 0.0005737430023600261, "loss": 1.8824, "step": 12098 }, { "epoch": 0.4, "grad_norm": 0.4345915615558624, "learning_rate": 0.0005737387259327302, "loss": 1.8113, "step": 12099 }, { "epoch": 0.4, "grad_norm": 0.4479311406612396, "learning_rate": 0.000573734449173156, "loss": 1.8458, "step": 12100 }, { "epoch": 0.4, "grad_norm": 0.4541761577129364, "learning_rate": 0.0005737301720813083, "loss": 1.8583, "step": 12101 }, { "epoch": 0.4, "grad_norm": 0.4718378484249115, "learning_rate": 0.0005737258946571927, "loss": 1.9467, "step": 12102 }, { "epoch": 0.4, "grad_norm": 0.4182814061641693, "learning_rate": 0.0005737216169008142, "loss": 1.8763, "step": 12103 }, { "epoch": 0.4, "grad_norm": 0.43205735087394714, "learning_rate": 0.0005737173388121779, "loss": 1.8853, "step": 12104 }, { "epoch": 0.4, "grad_norm": 0.4317910075187683, "learning_rate": 0.0005737130603912891, "loss": 1.7711, "step": 12105 }, { "epoch": 0.4, "grad_norm": 0.44328945875167847, "learning_rate": 0.000573708781638153, "loss": 1.882, "step": 12106 }, { "epoch": 0.4, "grad_norm": 0.45331838726997375, "learning_rate": 0.0005737045025527746, "loss": 1.8707, "step": 12107 }, { "epoch": 0.4, "grad_norm": 0.4322451949119568, "learning_rate": 0.0005737002231351595, "loss": 1.9283, "step": 12108 }, { "epoch": 0.4, "grad_norm": 0.43415194749832153, "learning_rate": 0.0005736959433853125, "loss": 1.9002, "step": 12109 }, { "epoch": 0.4, "grad_norm": 0.45662182569503784, "learning_rate": 0.000573691663303239, "loss": 1.9178, "step": 12110 }, { "epoch": 0.4, "grad_norm": 0.4339269697666168, "learning_rate": 0.0005736873828889442, "loss": 1.8291, "step": 12111 }, { "epoch": 0.4, "grad_norm": 0.44023486971855164, "learning_rate": 0.0005736831021424331, "loss": 1.8578, "step": 12112 }, { "epoch": 0.4, "grad_norm": 0.4236035943031311, "learning_rate": 0.0005736788210637112, "loss": 1.8545, "step": 12113 }, { "epoch": 0.4, "grad_norm": 0.42626428604125977, "learning_rate": 0.0005736745396527835, "loss": 1.872, "step": 12114 }, { "epoch": 0.4, "grad_norm": 0.44025754928588867, "learning_rate": 0.0005736702579096552, "loss": 1.8449, "step": 12115 }, { "epoch": 0.4, "grad_norm": 0.4584340751171112, "learning_rate": 0.0005736659758343315, "loss": 1.9842, "step": 12116 }, { "epoch": 0.4, "grad_norm": 0.43880903720855713, "learning_rate": 0.0005736616934268178, "loss": 1.8899, "step": 12117 }, { "epoch": 0.4, "grad_norm": 0.42321595549583435, "learning_rate": 0.0005736574106871188, "loss": 1.8787, "step": 12118 }, { "epoch": 0.4, "grad_norm": 0.43248534202575684, "learning_rate": 0.0005736531276152403, "loss": 1.8655, "step": 12119 }, { "epoch": 0.4, "grad_norm": 0.4293038845062256, "learning_rate": 0.0005736488442111872, "loss": 1.8186, "step": 12120 }, { "epoch": 0.4, "grad_norm": 0.43549472093582153, "learning_rate": 0.0005736445604749646, "loss": 1.8583, "step": 12121 }, { "epoch": 0.4, "grad_norm": 0.4475117623806, "learning_rate": 0.0005736402764065779, "loss": 1.9545, "step": 12122 }, { "epoch": 0.4, "grad_norm": 0.4412913918495178, "learning_rate": 0.0005736359920060323, "loss": 1.9508, "step": 12123 }, { "epoch": 0.4, "grad_norm": 0.43781402707099915, "learning_rate": 0.0005736317072733328, "loss": 1.8685, "step": 12124 }, { "epoch": 0.4, "grad_norm": 0.434966117143631, "learning_rate": 0.0005736274222084847, "loss": 1.9186, "step": 12125 }, { "epoch": 0.4, "grad_norm": 0.4322581887245178, "learning_rate": 0.0005736231368114932, "loss": 1.8228, "step": 12126 }, { "epoch": 0.4, "grad_norm": 0.45625975728034973, "learning_rate": 0.0005736188510823637, "loss": 1.8816, "step": 12127 }, { "epoch": 0.4, "grad_norm": 0.4352129399776459, "learning_rate": 0.0005736145650211012, "loss": 1.9469, "step": 12128 }, { "epoch": 0.4, "grad_norm": 0.4512300193309784, "learning_rate": 0.0005736102786277109, "loss": 1.8821, "step": 12129 }, { "epoch": 0.4, "grad_norm": 0.46367523074150085, "learning_rate": 0.0005736059919021979, "loss": 1.8988, "step": 12130 }, { "epoch": 0.4, "grad_norm": 0.44486096501350403, "learning_rate": 0.0005736017048445677, "loss": 1.9057, "step": 12131 }, { "epoch": 0.4, "grad_norm": 0.44972434639930725, "learning_rate": 0.0005735974174548253, "loss": 1.8874, "step": 12132 }, { "epoch": 0.4, "grad_norm": 0.511296272277832, "learning_rate": 0.0005735931297329759, "loss": 1.845, "step": 12133 }, { "epoch": 0.4, "grad_norm": 0.4335121214389801, "learning_rate": 0.0005735888416790249, "loss": 1.8356, "step": 12134 }, { "epoch": 0.4, "grad_norm": 0.4454852342605591, "learning_rate": 0.0005735845532929772, "loss": 1.8993, "step": 12135 }, { "epoch": 0.4, "grad_norm": 0.4355069398880005, "learning_rate": 0.0005735802645748382, "loss": 1.9309, "step": 12136 }, { "epoch": 0.4, "grad_norm": 0.4289010167121887, "learning_rate": 0.0005735759755246131, "loss": 1.8302, "step": 12137 }, { "epoch": 0.4, "grad_norm": 0.44428253173828125, "learning_rate": 0.0005735716861423071, "loss": 1.8809, "step": 12138 }, { "epoch": 0.4, "grad_norm": 0.43089377880096436, "learning_rate": 0.0005735673964279253, "loss": 1.9117, "step": 12139 }, { "epoch": 0.4, "grad_norm": 0.43098801374435425, "learning_rate": 0.0005735631063814731, "loss": 1.8502, "step": 12140 }, { "epoch": 0.4, "grad_norm": 0.42963260412216187, "learning_rate": 0.0005735588160029555, "loss": 1.7961, "step": 12141 }, { "epoch": 0.4, "grad_norm": 0.43606674671173096, "learning_rate": 0.0005735545252923778, "loss": 1.797, "step": 12142 }, { "epoch": 0.4, "grad_norm": 0.4394797682762146, "learning_rate": 0.0005735502342497453, "loss": 1.8545, "step": 12143 }, { "epoch": 0.4, "grad_norm": 0.4394085705280304, "learning_rate": 0.000573545942875063, "loss": 1.8338, "step": 12144 }, { "epoch": 0.4, "grad_norm": 0.42118796706199646, "learning_rate": 0.0005735416511683364, "loss": 1.8253, "step": 12145 }, { "epoch": 0.4, "grad_norm": 0.439223051071167, "learning_rate": 0.0005735373591295704, "loss": 1.9885, "step": 12146 }, { "epoch": 0.4, "grad_norm": 0.44311004877090454, "learning_rate": 0.0005735330667587704, "loss": 1.9736, "step": 12147 }, { "epoch": 0.4, "grad_norm": 0.4429643452167511, "learning_rate": 0.0005735287740559415, "loss": 1.9076, "step": 12148 }, { "epoch": 0.4, "grad_norm": 0.42802491784095764, "learning_rate": 0.000573524481021089, "loss": 1.8437, "step": 12149 }, { "epoch": 0.4, "grad_norm": 0.445613831281662, "learning_rate": 0.0005735201876542181, "loss": 1.9712, "step": 12150 }, { "epoch": 0.4, "grad_norm": 0.5894531607627869, "learning_rate": 0.000573515893955334, "loss": 1.8723, "step": 12151 }, { "epoch": 0.4, "grad_norm": 0.4178468883037567, "learning_rate": 0.0005735115999244419, "loss": 1.8133, "step": 12152 }, { "epoch": 0.4, "grad_norm": 0.4349457025527954, "learning_rate": 0.0005735073055615471, "loss": 1.8143, "step": 12153 }, { "epoch": 0.4, "grad_norm": 0.5519276261329651, "learning_rate": 0.0005735030108666546, "loss": 1.9619, "step": 12154 }, { "epoch": 0.4, "grad_norm": 0.4448012113571167, "learning_rate": 0.0005734987158397698, "loss": 1.9106, "step": 12155 }, { "epoch": 0.4, "grad_norm": 0.44092774391174316, "learning_rate": 0.0005734944204808978, "loss": 1.8243, "step": 12156 }, { "epoch": 0.4, "grad_norm": 0.4363282322883606, "learning_rate": 0.0005734901247900439, "loss": 1.9139, "step": 12157 }, { "epoch": 0.4, "grad_norm": 0.43341800570487976, "learning_rate": 0.0005734858287672134, "loss": 1.8023, "step": 12158 }, { "epoch": 0.4, "grad_norm": 0.4504202902317047, "learning_rate": 0.0005734815324124113, "loss": 1.8547, "step": 12159 }, { "epoch": 0.4, "grad_norm": 0.43351027369499207, "learning_rate": 0.000573477235725643, "loss": 1.8478, "step": 12160 }, { "epoch": 0.4, "grad_norm": 0.43730518221855164, "learning_rate": 0.0005734729387069135, "loss": 1.9297, "step": 12161 }, { "epoch": 0.4, "grad_norm": 0.4232112467288971, "learning_rate": 0.0005734686413562282, "loss": 1.9209, "step": 12162 }, { "epoch": 0.4, "grad_norm": 0.439020037651062, "learning_rate": 0.0005734643436735922, "loss": 1.7888, "step": 12163 }, { "epoch": 0.4, "grad_norm": 0.4317943751811981, "learning_rate": 0.0005734600456590109, "loss": 1.9555, "step": 12164 }, { "epoch": 0.4, "grad_norm": 0.45169997215270996, "learning_rate": 0.0005734557473124893, "loss": 1.8393, "step": 12165 }, { "epoch": 0.4, "grad_norm": 0.4435790181159973, "learning_rate": 0.0005734514486340327, "loss": 1.9461, "step": 12166 }, { "epoch": 0.4, "grad_norm": 0.44646456837654114, "learning_rate": 0.0005734471496236464, "loss": 1.8885, "step": 12167 }, { "epoch": 0.4, "grad_norm": 0.443514883518219, "learning_rate": 0.0005734428502813355, "loss": 1.9215, "step": 12168 }, { "epoch": 0.4, "grad_norm": 0.4384801387786865, "learning_rate": 0.0005734385506071055, "loss": 1.9119, "step": 12169 }, { "epoch": 0.4, "grad_norm": 0.4447494447231293, "learning_rate": 0.000573434250600961, "loss": 1.9378, "step": 12170 }, { "epoch": 0.4, "grad_norm": 0.43249979615211487, "learning_rate": 0.0005734299502629079, "loss": 1.8528, "step": 12171 }, { "epoch": 0.4, "grad_norm": 0.4352436363697052, "learning_rate": 0.000573425649592951, "loss": 1.7963, "step": 12172 }, { "epoch": 0.4, "grad_norm": 0.4684124290943146, "learning_rate": 0.0005734213485910957, "loss": 1.8402, "step": 12173 }, { "epoch": 0.41, "grad_norm": 0.43235471844673157, "learning_rate": 0.0005734170472573471, "loss": 1.903, "step": 12174 }, { "epoch": 0.41, "grad_norm": 0.4459989070892334, "learning_rate": 0.0005734127455917107, "loss": 1.9061, "step": 12175 }, { "epoch": 0.41, "grad_norm": 0.42784005403518677, "learning_rate": 0.0005734084435941913, "loss": 1.9431, "step": 12176 }, { "epoch": 0.41, "grad_norm": 0.4458586871623993, "learning_rate": 0.0005734041412647944, "loss": 1.8692, "step": 12177 }, { "epoch": 0.41, "grad_norm": 0.43567410111427307, "learning_rate": 0.0005733998386035251, "loss": 1.8489, "step": 12178 }, { "epoch": 0.41, "grad_norm": 0.4429904818534851, "learning_rate": 0.0005733955356103887, "loss": 1.9144, "step": 12179 }, { "epoch": 0.41, "grad_norm": 0.4360181987285614, "learning_rate": 0.0005733912322853905, "loss": 1.8904, "step": 12180 }, { "epoch": 0.41, "grad_norm": 0.42749616503715515, "learning_rate": 0.0005733869286285356, "loss": 1.9192, "step": 12181 }, { "epoch": 0.41, "grad_norm": 0.4375655949115753, "learning_rate": 0.0005733826246398291, "loss": 1.8906, "step": 12182 }, { "epoch": 0.41, "grad_norm": 0.44476184248924255, "learning_rate": 0.0005733783203192765, "loss": 1.9253, "step": 12183 }, { "epoch": 0.41, "grad_norm": 0.4315057098865509, "learning_rate": 0.0005733740156668829, "loss": 1.8471, "step": 12184 }, { "epoch": 0.41, "grad_norm": 0.44028326869010925, "learning_rate": 0.0005733697106826535, "loss": 1.9346, "step": 12185 }, { "epoch": 0.41, "grad_norm": 0.42948272824287415, "learning_rate": 0.0005733654053665935, "loss": 1.8203, "step": 12186 }, { "epoch": 0.41, "grad_norm": 0.439418226480484, "learning_rate": 0.0005733610997187083, "loss": 1.87, "step": 12187 }, { "epoch": 0.41, "grad_norm": 0.467380166053772, "learning_rate": 0.0005733567937390028, "loss": 1.9351, "step": 12188 }, { "epoch": 0.41, "grad_norm": 0.45294585824012756, "learning_rate": 0.0005733524874274827, "loss": 1.9099, "step": 12189 }, { "epoch": 0.41, "grad_norm": 0.43407896161079407, "learning_rate": 0.0005733481807841528, "loss": 1.9075, "step": 12190 }, { "epoch": 0.41, "grad_norm": 0.4449014365673065, "learning_rate": 0.0005733438738090184, "loss": 1.8918, "step": 12191 }, { "epoch": 0.41, "grad_norm": 0.44965222477912903, "learning_rate": 0.000573339566502085, "loss": 1.8715, "step": 12192 }, { "epoch": 0.41, "grad_norm": 0.4534381628036499, "learning_rate": 0.0005733352588633576, "loss": 1.9344, "step": 12193 }, { "epoch": 0.41, "grad_norm": 0.44065067172050476, "learning_rate": 0.0005733309508928414, "loss": 1.8669, "step": 12194 }, { "epoch": 0.41, "grad_norm": 0.44479963183403015, "learning_rate": 0.0005733266425905417, "loss": 1.861, "step": 12195 }, { "epoch": 0.41, "grad_norm": 0.44466784596443176, "learning_rate": 0.0005733223339564637, "loss": 1.901, "step": 12196 }, { "epoch": 0.41, "grad_norm": 0.4380582869052887, "learning_rate": 0.0005733180249906128, "loss": 1.8309, "step": 12197 }, { "epoch": 0.41, "grad_norm": 0.4490942358970642, "learning_rate": 0.000573313715692994, "loss": 1.9543, "step": 12198 }, { "epoch": 0.41, "grad_norm": 0.43106570839881897, "learning_rate": 0.0005733094060636126, "loss": 1.8156, "step": 12199 }, { "epoch": 0.41, "grad_norm": 0.4704664349555969, "learning_rate": 0.0005733050961024739, "loss": 1.8463, "step": 12200 }, { "epoch": 0.41, "grad_norm": 0.44462233781814575, "learning_rate": 0.000573300785809583, "loss": 1.9103, "step": 12201 }, { "epoch": 0.41, "grad_norm": 0.4365735352039337, "learning_rate": 0.0005732964751849454, "loss": 1.8246, "step": 12202 }, { "epoch": 0.41, "grad_norm": 0.4436396658420563, "learning_rate": 0.000573292164228566, "loss": 1.7866, "step": 12203 }, { "epoch": 0.41, "grad_norm": 0.45255759358406067, "learning_rate": 0.0005732878529404502, "loss": 1.8619, "step": 12204 }, { "epoch": 0.41, "grad_norm": 0.45319750905036926, "learning_rate": 0.0005732835413206031, "loss": 1.8722, "step": 12205 }, { "epoch": 0.41, "grad_norm": 0.4235638380050659, "learning_rate": 0.0005732792293690303, "loss": 1.9036, "step": 12206 }, { "epoch": 0.41, "grad_norm": 0.4381618797779083, "learning_rate": 0.0005732749170857366, "loss": 1.8831, "step": 12207 }, { "epoch": 0.41, "grad_norm": 0.4419626295566559, "learning_rate": 0.0005732706044707273, "loss": 1.8327, "step": 12208 }, { "epoch": 0.41, "grad_norm": 0.43925994634628296, "learning_rate": 0.000573266291524008, "loss": 1.9708, "step": 12209 }, { "epoch": 0.41, "grad_norm": 0.4343622028827667, "learning_rate": 0.0005732619782455835, "loss": 1.9264, "step": 12210 }, { "epoch": 0.41, "grad_norm": 0.42844656109809875, "learning_rate": 0.0005732576646354594, "loss": 1.8529, "step": 12211 }, { "epoch": 0.41, "grad_norm": 0.4365466237068176, "learning_rate": 0.0005732533506936407, "loss": 1.9037, "step": 12212 }, { "epoch": 0.41, "grad_norm": 0.4399828612804413, "learning_rate": 0.0005732490364201327, "loss": 1.8488, "step": 12213 }, { "epoch": 0.41, "grad_norm": 0.4318384826183319, "learning_rate": 0.0005732447218149406, "loss": 1.8243, "step": 12214 }, { "epoch": 0.41, "grad_norm": 0.43416306376457214, "learning_rate": 0.0005732404068780696, "loss": 1.9431, "step": 12215 }, { "epoch": 0.41, "grad_norm": 0.44892409443855286, "learning_rate": 0.0005732360916095251, "loss": 1.9539, "step": 12216 }, { "epoch": 0.41, "grad_norm": 0.4478091895580292, "learning_rate": 0.0005732317760093123, "loss": 1.9017, "step": 12217 }, { "epoch": 0.41, "grad_norm": 0.43422943353652954, "learning_rate": 0.0005732274600774363, "loss": 1.8795, "step": 12218 }, { "epoch": 0.41, "grad_norm": 0.4606967568397522, "learning_rate": 0.0005732231438139024, "loss": 1.8316, "step": 12219 }, { "epoch": 0.41, "grad_norm": 0.4602341949939728, "learning_rate": 0.0005732188272187158, "loss": 1.9279, "step": 12220 }, { "epoch": 0.41, "grad_norm": 0.42220401763916016, "learning_rate": 0.0005732145102918819, "loss": 1.9217, "step": 12221 }, { "epoch": 0.41, "grad_norm": 0.4288710355758667, "learning_rate": 0.0005732101930334059, "loss": 1.8426, "step": 12222 }, { "epoch": 0.41, "grad_norm": 0.44535186886787415, "learning_rate": 0.0005732058754432929, "loss": 1.8921, "step": 12223 }, { "epoch": 0.41, "grad_norm": 0.4489661157131195, "learning_rate": 0.0005732015575215482, "loss": 1.795, "step": 12224 }, { "epoch": 0.41, "grad_norm": 0.4553002119064331, "learning_rate": 0.0005731972392681771, "loss": 1.8576, "step": 12225 }, { "epoch": 0.41, "grad_norm": 0.470111608505249, "learning_rate": 0.0005731929206831848, "loss": 1.8677, "step": 12226 }, { "epoch": 0.41, "grad_norm": 0.4294833838939667, "learning_rate": 0.0005731886017665765, "loss": 1.8742, "step": 12227 }, { "epoch": 0.41, "grad_norm": 0.43433433771133423, "learning_rate": 0.0005731842825183576, "loss": 1.8551, "step": 12228 }, { "epoch": 0.41, "grad_norm": 0.47655120491981506, "learning_rate": 0.0005731799629385331, "loss": 1.9044, "step": 12229 }, { "epoch": 0.41, "grad_norm": 0.4317702054977417, "learning_rate": 0.0005731756430271084, "loss": 1.8393, "step": 12230 }, { "epoch": 0.41, "grad_norm": 0.45111221075057983, "learning_rate": 0.0005731713227840889, "loss": 1.9085, "step": 12231 }, { "epoch": 0.41, "grad_norm": 0.4438559114933014, "learning_rate": 0.0005731670022094794, "loss": 1.9167, "step": 12232 }, { "epoch": 0.41, "grad_norm": 0.46183449029922485, "learning_rate": 0.0005731626813032856, "loss": 1.9169, "step": 12233 }, { "epoch": 0.41, "grad_norm": 0.45715221762657166, "learning_rate": 0.0005731583600655124, "loss": 1.9503, "step": 12234 }, { "epoch": 0.41, "grad_norm": 0.4424295425415039, "learning_rate": 0.0005731540384961654, "loss": 1.817, "step": 12235 }, { "epoch": 0.41, "grad_norm": 0.4341377317905426, "learning_rate": 0.0005731497165952495, "loss": 1.8427, "step": 12236 }, { "epoch": 0.41, "grad_norm": 0.4612026810646057, "learning_rate": 0.0005731453943627701, "loss": 1.8536, "step": 12237 }, { "epoch": 0.41, "grad_norm": 0.4502672851085663, "learning_rate": 0.0005731410717987324, "loss": 1.9136, "step": 12238 }, { "epoch": 0.41, "grad_norm": 0.43378740549087524, "learning_rate": 0.0005731367489031419, "loss": 1.8892, "step": 12239 }, { "epoch": 0.41, "grad_norm": 0.44683757424354553, "learning_rate": 0.0005731324256760034, "loss": 1.8446, "step": 12240 }, { "epoch": 0.41, "grad_norm": 0.46203410625457764, "learning_rate": 0.0005731281021173225, "loss": 1.8752, "step": 12241 }, { "epoch": 0.41, "grad_norm": 0.4356789290904999, "learning_rate": 0.0005731237782271042, "loss": 1.8175, "step": 12242 }, { "epoch": 0.41, "grad_norm": 0.4484662711620331, "learning_rate": 0.000573119454005354, "loss": 1.828, "step": 12243 }, { "epoch": 0.41, "grad_norm": 0.47054335474967957, "learning_rate": 0.000573115129452077, "loss": 1.8646, "step": 12244 }, { "epoch": 0.41, "grad_norm": 0.44643455743789673, "learning_rate": 0.0005731108045672784, "loss": 1.8424, "step": 12245 }, { "epoch": 0.41, "grad_norm": 0.44444987177848816, "learning_rate": 0.0005731064793509636, "loss": 1.8587, "step": 12246 }, { "epoch": 0.41, "grad_norm": 0.4609036445617676, "learning_rate": 0.0005731021538031378, "loss": 1.926, "step": 12247 }, { "epoch": 0.41, "grad_norm": 0.4466203451156616, "learning_rate": 0.0005730978279238062, "loss": 1.9287, "step": 12248 }, { "epoch": 0.41, "grad_norm": 0.4787379503250122, "learning_rate": 0.000573093501712974, "loss": 1.8593, "step": 12249 }, { "epoch": 0.41, "grad_norm": 0.44355079531669617, "learning_rate": 0.0005730891751706466, "loss": 1.8608, "step": 12250 }, { "epoch": 0.41, "grad_norm": 0.454817533493042, "learning_rate": 0.0005730848482968292, "loss": 1.8724, "step": 12251 }, { "epoch": 0.41, "grad_norm": 0.42816588282585144, "learning_rate": 0.0005730805210915269, "loss": 1.8839, "step": 12252 }, { "epoch": 0.41, "grad_norm": 0.42520514130592346, "learning_rate": 0.0005730761935547452, "loss": 1.8836, "step": 12253 }, { "epoch": 0.41, "grad_norm": 0.43320542573928833, "learning_rate": 0.0005730718656864893, "loss": 1.8869, "step": 12254 }, { "epoch": 0.41, "grad_norm": 0.4407373368740082, "learning_rate": 0.0005730675374867643, "loss": 1.8615, "step": 12255 }, { "epoch": 0.41, "grad_norm": 0.46337205171585083, "learning_rate": 0.0005730632089555756, "loss": 1.9759, "step": 12256 }, { "epoch": 0.41, "grad_norm": 0.4358447194099426, "learning_rate": 0.0005730588800929283, "loss": 1.8953, "step": 12257 }, { "epoch": 0.41, "grad_norm": 0.417740136384964, "learning_rate": 0.0005730545508988278, "loss": 1.9177, "step": 12258 }, { "epoch": 0.41, "grad_norm": 0.437509149312973, "learning_rate": 0.0005730502213732794, "loss": 1.888, "step": 12259 }, { "epoch": 0.41, "grad_norm": 0.4462796449661255, "learning_rate": 0.0005730458915162881, "loss": 1.9859, "step": 12260 }, { "epoch": 0.41, "grad_norm": 0.44506344199180603, "learning_rate": 0.0005730415613278593, "loss": 1.8907, "step": 12261 }, { "epoch": 0.41, "grad_norm": 0.4324875771999359, "learning_rate": 0.0005730372308079985, "loss": 1.8337, "step": 12262 }, { "epoch": 0.41, "grad_norm": 0.4383673369884491, "learning_rate": 0.0005730328999567105, "loss": 1.813, "step": 12263 }, { "epoch": 0.41, "grad_norm": 0.4468529224395752, "learning_rate": 0.000573028568774001, "loss": 1.8807, "step": 12264 }, { "epoch": 0.41, "grad_norm": 0.43542376160621643, "learning_rate": 0.0005730242372598749, "loss": 1.8765, "step": 12265 }, { "epoch": 0.41, "grad_norm": 0.47094908356666565, "learning_rate": 0.0005730199054143376, "loss": 1.8948, "step": 12266 }, { "epoch": 0.41, "grad_norm": 0.5821573734283447, "learning_rate": 0.0005730155732373945, "loss": 1.881, "step": 12267 }, { "epoch": 0.41, "grad_norm": 0.43268081545829773, "learning_rate": 0.0005730112407290505, "loss": 1.8622, "step": 12268 }, { "epoch": 0.41, "grad_norm": 0.4364655911922455, "learning_rate": 0.0005730069078893112, "loss": 1.8646, "step": 12269 }, { "epoch": 0.41, "grad_norm": 0.4411712884902954, "learning_rate": 0.0005730025747181819, "loss": 1.792, "step": 12270 }, { "epoch": 0.41, "grad_norm": 0.4295879006385803, "learning_rate": 0.0005729982412156675, "loss": 1.852, "step": 12271 }, { "epoch": 0.41, "grad_norm": 0.4302555024623871, "learning_rate": 0.0005729939073817735, "loss": 1.8142, "step": 12272 }, { "epoch": 0.41, "grad_norm": 0.47242358326911926, "learning_rate": 0.0005729895732165051, "loss": 1.8654, "step": 12273 }, { "epoch": 0.41, "grad_norm": 0.4532495141029358, "learning_rate": 0.0005729852387198676, "loss": 1.8492, "step": 12274 }, { "epoch": 0.41, "grad_norm": 0.4518159031867981, "learning_rate": 0.0005729809038918663, "loss": 1.8999, "step": 12275 }, { "epoch": 0.41, "grad_norm": 0.43328657746315, "learning_rate": 0.0005729765687325063, "loss": 1.8295, "step": 12276 }, { "epoch": 0.41, "grad_norm": 0.4297710955142975, "learning_rate": 0.000572972233241793, "loss": 1.9494, "step": 12277 }, { "epoch": 0.41, "grad_norm": 0.4414310157299042, "learning_rate": 0.0005729678974197316, "loss": 1.8401, "step": 12278 }, { "epoch": 0.41, "grad_norm": 0.4313057065010071, "learning_rate": 0.0005729635612663275, "loss": 1.8428, "step": 12279 }, { "epoch": 0.41, "grad_norm": 0.4406942129135132, "learning_rate": 0.0005729592247815858, "loss": 1.7915, "step": 12280 }, { "epoch": 0.41, "grad_norm": 0.4471684694290161, "learning_rate": 0.0005729548879655119, "loss": 1.8622, "step": 12281 }, { "epoch": 0.41, "grad_norm": 0.4290776252746582, "learning_rate": 0.0005729505508181108, "loss": 1.9097, "step": 12282 }, { "epoch": 0.41, "grad_norm": 0.45380860567092896, "learning_rate": 0.0005729462133393881, "loss": 1.9009, "step": 12283 }, { "epoch": 0.41, "grad_norm": 0.4416367709636688, "learning_rate": 0.0005729418755293489, "loss": 1.8205, "step": 12284 }, { "epoch": 0.41, "grad_norm": 0.43209314346313477, "learning_rate": 0.0005729375373879984, "loss": 1.8659, "step": 12285 }, { "epoch": 0.41, "grad_norm": 0.42954936623573303, "learning_rate": 0.000572933198915342, "loss": 1.874, "step": 12286 }, { "epoch": 0.41, "grad_norm": 0.4669335186481476, "learning_rate": 0.0005729288601113848, "loss": 1.9316, "step": 12287 }, { "epoch": 0.41, "grad_norm": 0.43229424953460693, "learning_rate": 0.0005729245209761323, "loss": 1.8849, "step": 12288 }, { "epoch": 0.41, "grad_norm": 0.4426420331001282, "learning_rate": 0.0005729201815095897, "loss": 1.8961, "step": 12289 }, { "epoch": 0.41, "grad_norm": 0.4380430281162262, "learning_rate": 0.000572915841711762, "loss": 1.8637, "step": 12290 }, { "epoch": 0.41, "grad_norm": 0.4368613362312317, "learning_rate": 0.0005729115015826549, "loss": 1.8816, "step": 12291 }, { "epoch": 0.41, "grad_norm": 0.4613552987575531, "learning_rate": 0.0005729071611222734, "loss": 1.9806, "step": 12292 }, { "epoch": 0.41, "grad_norm": 0.4305562973022461, "learning_rate": 0.0005729028203306228, "loss": 1.8509, "step": 12293 }, { "epoch": 0.41, "grad_norm": 0.43905362486839294, "learning_rate": 0.0005728984792077083, "loss": 1.9258, "step": 12294 }, { "epoch": 0.41, "grad_norm": 0.45326173305511475, "learning_rate": 0.0005728941377535354, "loss": 1.8627, "step": 12295 }, { "epoch": 0.41, "grad_norm": 0.44709423184394836, "learning_rate": 0.0005728897959681091, "loss": 1.8799, "step": 12296 }, { "epoch": 0.41, "grad_norm": 0.4479709565639496, "learning_rate": 0.0005728854538514349, "loss": 1.8796, "step": 12297 }, { "epoch": 0.41, "grad_norm": 0.4382953941822052, "learning_rate": 0.0005728811114035178, "loss": 1.9434, "step": 12298 }, { "epoch": 0.41, "grad_norm": 0.4534141719341278, "learning_rate": 0.0005728767686243633, "loss": 1.8798, "step": 12299 }, { "epoch": 0.41, "grad_norm": 0.44876450300216675, "learning_rate": 0.0005728724255139767, "loss": 1.9015, "step": 12300 }, { "epoch": 0.41, "grad_norm": 0.4350009560585022, "learning_rate": 0.0005728680820723633, "loss": 1.7488, "step": 12301 }, { "epoch": 0.41, "grad_norm": 0.44458475708961487, "learning_rate": 0.0005728637382995279, "loss": 1.9374, "step": 12302 }, { "epoch": 0.41, "grad_norm": 0.4360443949699402, "learning_rate": 0.0005728593941954764, "loss": 1.8449, "step": 12303 }, { "epoch": 0.41, "grad_norm": 0.4363599419593811, "learning_rate": 0.0005728550497602137, "loss": 1.8485, "step": 12304 }, { "epoch": 0.41, "grad_norm": 0.6759882569313049, "learning_rate": 0.0005728507049937452, "loss": 1.9092, "step": 12305 }, { "epoch": 0.41, "grad_norm": 0.4324794113636017, "learning_rate": 0.0005728463598960762, "loss": 1.938, "step": 12306 }, { "epoch": 0.41, "grad_norm": 0.4235023856163025, "learning_rate": 0.0005728420144672119, "loss": 1.8106, "step": 12307 }, { "epoch": 0.41, "grad_norm": 0.4442819654941559, "learning_rate": 0.0005728376687071575, "loss": 1.9122, "step": 12308 }, { "epoch": 0.41, "grad_norm": 0.4544711709022522, "learning_rate": 0.0005728333226159184, "loss": 1.9358, "step": 12309 }, { "epoch": 0.41, "grad_norm": 0.44730523228645325, "learning_rate": 0.0005728289761934998, "loss": 1.9868, "step": 12310 }, { "epoch": 0.41, "grad_norm": 0.42360180616378784, "learning_rate": 0.0005728246294399072, "loss": 1.8636, "step": 12311 }, { "epoch": 0.41, "grad_norm": 0.426606684923172, "learning_rate": 0.0005728202823551456, "loss": 1.8777, "step": 12312 }, { "epoch": 0.41, "grad_norm": 0.43691617250442505, "learning_rate": 0.0005728159349392203, "loss": 1.8432, "step": 12313 }, { "epoch": 0.41, "grad_norm": 0.4190920293331146, "learning_rate": 0.0005728115871921367, "loss": 1.8997, "step": 12314 }, { "epoch": 0.41, "grad_norm": 0.43665018677711487, "learning_rate": 0.0005728072391139001, "loss": 1.8663, "step": 12315 }, { "epoch": 0.41, "grad_norm": 0.45475101470947266, "learning_rate": 0.0005728028907045156, "loss": 1.9115, "step": 12316 }, { "epoch": 0.41, "grad_norm": 0.43405234813690186, "learning_rate": 0.0005727985419639887, "loss": 1.7708, "step": 12317 }, { "epoch": 0.41, "grad_norm": 0.42847776412963867, "learning_rate": 0.0005727941928923245, "loss": 1.781, "step": 12318 }, { "epoch": 0.41, "grad_norm": 0.4565171003341675, "learning_rate": 0.0005727898434895283, "loss": 1.8949, "step": 12319 }, { "epoch": 0.41, "grad_norm": 0.43343961238861084, "learning_rate": 0.0005727854937556055, "loss": 1.8434, "step": 12320 }, { "epoch": 0.41, "grad_norm": 0.4607372283935547, "learning_rate": 0.0005727811436905614, "loss": 1.879, "step": 12321 }, { "epoch": 0.41, "grad_norm": 0.4378821551799774, "learning_rate": 0.000572776793294401, "loss": 1.8913, "step": 12322 }, { "epoch": 0.41, "grad_norm": 0.42222756147384644, "learning_rate": 0.0005727724425671298, "loss": 1.8476, "step": 12323 }, { "epoch": 0.41, "grad_norm": 0.42343440651893616, "learning_rate": 0.0005727680915087531, "loss": 1.8247, "step": 12324 }, { "epoch": 0.41, "grad_norm": 0.4504584074020386, "learning_rate": 0.0005727637401192761, "loss": 1.8756, "step": 12325 }, { "epoch": 0.41, "grad_norm": 0.45273837447166443, "learning_rate": 0.0005727593883987041, "loss": 1.8408, "step": 12326 }, { "epoch": 0.41, "grad_norm": 0.4521074891090393, "learning_rate": 0.0005727550363470424, "loss": 1.8966, "step": 12327 }, { "epoch": 0.41, "grad_norm": 0.4470721185207367, "learning_rate": 0.0005727506839642963, "loss": 1.8532, "step": 12328 }, { "epoch": 0.41, "grad_norm": 0.4227227568626404, "learning_rate": 0.000572746331250471, "loss": 1.9126, "step": 12329 }, { "epoch": 0.41, "grad_norm": 0.4414573907852173, "learning_rate": 0.0005727419782055719, "loss": 1.9165, "step": 12330 }, { "epoch": 0.41, "grad_norm": 0.44624388217926025, "learning_rate": 0.0005727376248296042, "loss": 1.8918, "step": 12331 }, { "epoch": 0.41, "grad_norm": 0.43371328711509705, "learning_rate": 0.0005727332711225733, "loss": 1.8599, "step": 12332 }, { "epoch": 0.41, "grad_norm": 0.43405041098594666, "learning_rate": 0.0005727289170844843, "loss": 1.9863, "step": 12333 }, { "epoch": 0.41, "grad_norm": 0.4250347912311554, "learning_rate": 0.0005727245627153425, "loss": 1.8298, "step": 12334 }, { "epoch": 0.41, "grad_norm": 0.43278661370277405, "learning_rate": 0.0005727202080151534, "loss": 1.816, "step": 12335 }, { "epoch": 0.41, "grad_norm": 0.4397617280483246, "learning_rate": 0.0005727158529839221, "loss": 1.8471, "step": 12336 }, { "epoch": 0.41, "grad_norm": 0.4345618784427643, "learning_rate": 0.000572711497621654, "loss": 1.8516, "step": 12337 }, { "epoch": 0.41, "grad_norm": 0.43190404772758484, "learning_rate": 0.0005727071419283543, "loss": 1.8495, "step": 12338 }, { "epoch": 0.41, "grad_norm": 0.44894617795944214, "learning_rate": 0.0005727027859040282, "loss": 1.9074, "step": 12339 }, { "epoch": 0.41, "grad_norm": 0.45289623737335205, "learning_rate": 0.0005726984295486812, "loss": 1.9615, "step": 12340 }, { "epoch": 0.41, "grad_norm": 0.433178573846817, "learning_rate": 0.0005726940728623184, "loss": 1.9063, "step": 12341 }, { "epoch": 0.41, "grad_norm": 0.5203028321266174, "learning_rate": 0.0005726897158449453, "loss": 1.8276, "step": 12342 }, { "epoch": 0.41, "grad_norm": 0.4486790895462036, "learning_rate": 0.0005726853584965669, "loss": 1.8781, "step": 12343 }, { "epoch": 0.41, "grad_norm": 0.432544469833374, "learning_rate": 0.0005726810008171888, "loss": 1.8735, "step": 12344 }, { "epoch": 0.41, "grad_norm": 0.4365232288837433, "learning_rate": 0.0005726766428068161, "loss": 1.8736, "step": 12345 }, { "epoch": 0.41, "grad_norm": 0.431408554315567, "learning_rate": 0.0005726722844654541, "loss": 1.8025, "step": 12346 }, { "epoch": 0.41, "grad_norm": 0.4550953805446625, "learning_rate": 0.0005726679257931082, "loss": 1.9361, "step": 12347 }, { "epoch": 0.41, "grad_norm": 0.4360274076461792, "learning_rate": 0.0005726635667897836, "loss": 1.8907, "step": 12348 }, { "epoch": 0.41, "grad_norm": 0.4236355125904083, "learning_rate": 0.0005726592074554855, "loss": 1.8717, "step": 12349 }, { "epoch": 0.41, "grad_norm": 0.4428419768810272, "learning_rate": 0.0005726548477902193, "loss": 1.7996, "step": 12350 }, { "epoch": 0.41, "grad_norm": 0.4411514103412628, "learning_rate": 0.0005726504877939904, "loss": 1.8834, "step": 12351 }, { "epoch": 0.41, "grad_norm": 0.432619571685791, "learning_rate": 0.0005726461274668039, "loss": 1.9787, "step": 12352 }, { "epoch": 0.41, "grad_norm": 0.4248161017894745, "learning_rate": 0.0005726417668086653, "loss": 1.8481, "step": 12353 }, { "epoch": 0.41, "grad_norm": 0.4280874729156494, "learning_rate": 0.0005726374058195797, "loss": 1.8653, "step": 12354 }, { "epoch": 0.41, "grad_norm": 0.4379807710647583, "learning_rate": 0.0005726330444995524, "loss": 1.8697, "step": 12355 }, { "epoch": 0.41, "grad_norm": 0.42818886041641235, "learning_rate": 0.0005726286828485888, "loss": 1.9501, "step": 12356 }, { "epoch": 0.41, "grad_norm": 0.42583173513412476, "learning_rate": 0.0005726243208666941, "loss": 1.9134, "step": 12357 }, { "epoch": 0.41, "grad_norm": 0.4377453029155731, "learning_rate": 0.0005726199585538738, "loss": 1.9906, "step": 12358 }, { "epoch": 0.41, "grad_norm": 0.42640551924705505, "learning_rate": 0.0005726155959101328, "loss": 1.8046, "step": 12359 }, { "epoch": 0.41, "grad_norm": 0.5232694149017334, "learning_rate": 0.0005726112329354768, "loss": 1.9347, "step": 12360 }, { "epoch": 0.41, "grad_norm": 0.4434531331062317, "learning_rate": 0.0005726068696299109, "loss": 1.7787, "step": 12361 }, { "epoch": 0.41, "grad_norm": 0.4395577609539032, "learning_rate": 0.0005726025059934404, "loss": 1.8731, "step": 12362 }, { "epoch": 0.41, "grad_norm": 0.4310930669307709, "learning_rate": 0.0005725981420260707, "loss": 1.8497, "step": 12363 }, { "epoch": 0.41, "grad_norm": 0.442021906375885, "learning_rate": 0.000572593777727807, "loss": 1.8206, "step": 12364 }, { "epoch": 0.41, "grad_norm": 0.42701399326324463, "learning_rate": 0.0005725894130986545, "loss": 1.9155, "step": 12365 }, { "epoch": 0.41, "grad_norm": 0.43341851234436035, "learning_rate": 0.0005725850481386187, "loss": 1.8813, "step": 12366 }, { "epoch": 0.41, "grad_norm": 0.4500124454498291, "learning_rate": 0.0005725806828477049, "loss": 1.9138, "step": 12367 }, { "epoch": 0.41, "grad_norm": 0.45614346861839294, "learning_rate": 0.0005725763172259182, "loss": 1.9163, "step": 12368 }, { "epoch": 0.41, "grad_norm": 0.43632060289382935, "learning_rate": 0.000572571951273264, "loss": 1.8478, "step": 12369 }, { "epoch": 0.41, "grad_norm": 0.45689457654953003, "learning_rate": 0.0005725675849897477, "loss": 1.8918, "step": 12370 }, { "epoch": 0.41, "grad_norm": 0.516311764717102, "learning_rate": 0.0005725632183753744, "loss": 1.8667, "step": 12371 }, { "epoch": 0.41, "grad_norm": 0.4525716006755829, "learning_rate": 0.0005725588514301496, "loss": 1.914, "step": 12372 }, { "epoch": 0.41, "grad_norm": 0.4433175325393677, "learning_rate": 0.0005725544841540784, "loss": 1.8223, "step": 12373 }, { "epoch": 0.41, "grad_norm": 0.43016892671585083, "learning_rate": 0.0005725501165471664, "loss": 1.8332, "step": 12374 }, { "epoch": 0.41, "grad_norm": 0.4352821707725525, "learning_rate": 0.0005725457486094186, "loss": 1.9045, "step": 12375 }, { "epoch": 0.41, "grad_norm": 0.42638900876045227, "learning_rate": 0.0005725413803408405, "loss": 1.8457, "step": 12376 }, { "epoch": 0.41, "grad_norm": 0.4334017038345337, "learning_rate": 0.0005725370117414372, "loss": 1.8755, "step": 12377 }, { "epoch": 0.41, "grad_norm": 0.44064798951148987, "learning_rate": 0.0005725326428112141, "loss": 1.8334, "step": 12378 }, { "epoch": 0.41, "grad_norm": 0.42791324853897095, "learning_rate": 0.0005725282735501768, "loss": 1.9081, "step": 12379 }, { "epoch": 0.41, "grad_norm": 0.4312230348587036, "learning_rate": 0.0005725239039583301, "loss": 1.9115, "step": 12380 }, { "epoch": 0.41, "grad_norm": 0.43455320596694946, "learning_rate": 0.0005725195340356796, "loss": 1.9073, "step": 12381 }, { "epoch": 0.41, "grad_norm": 0.568779468536377, "learning_rate": 0.0005725151637822305, "loss": 1.8688, "step": 12382 }, { "epoch": 0.41, "grad_norm": 0.4259500801563263, "learning_rate": 0.0005725107931979881, "loss": 1.9216, "step": 12383 }, { "epoch": 0.41, "grad_norm": 0.4393669366836548, "learning_rate": 0.0005725064222829578, "loss": 1.8873, "step": 12384 }, { "epoch": 0.41, "grad_norm": 0.43398743867874146, "learning_rate": 0.0005725020510371449, "loss": 1.8989, "step": 12385 }, { "epoch": 0.41, "grad_norm": 0.43256500363349915, "learning_rate": 0.0005724976794605546, "loss": 1.8563, "step": 12386 }, { "epoch": 0.41, "grad_norm": 0.45711061358451843, "learning_rate": 0.0005724933075531924, "loss": 1.8997, "step": 12387 }, { "epoch": 0.41, "grad_norm": 0.4308937191963196, "learning_rate": 0.0005724889353150633, "loss": 1.9237, "step": 12388 }, { "epoch": 0.41, "grad_norm": 0.4291771948337555, "learning_rate": 0.0005724845627461729, "loss": 1.8165, "step": 12389 }, { "epoch": 0.41, "grad_norm": 0.4509933590888977, "learning_rate": 0.0005724801898465263, "loss": 1.8801, "step": 12390 }, { "epoch": 0.41, "grad_norm": 0.4400654137134552, "learning_rate": 0.0005724758166161289, "loss": 1.8145, "step": 12391 }, { "epoch": 0.41, "grad_norm": 0.4403259754180908, "learning_rate": 0.0005724714430549861, "loss": 1.8805, "step": 12392 }, { "epoch": 0.41, "grad_norm": 0.42673856019973755, "learning_rate": 0.0005724670691631031, "loss": 1.9056, "step": 12393 }, { "epoch": 0.41, "grad_norm": 0.45204105973243713, "learning_rate": 0.0005724626949404852, "loss": 1.9307, "step": 12394 }, { "epoch": 0.41, "grad_norm": 0.4515131115913391, "learning_rate": 0.0005724583203871377, "loss": 1.8757, "step": 12395 }, { "epoch": 0.41, "grad_norm": 0.45183196663856506, "learning_rate": 0.000572453945503066, "loss": 1.8437, "step": 12396 }, { "epoch": 0.41, "grad_norm": 0.5029451251029968, "learning_rate": 0.0005724495702882752, "loss": 1.9025, "step": 12397 }, { "epoch": 0.41, "grad_norm": 0.45480164885520935, "learning_rate": 0.0005724451947427709, "loss": 1.918, "step": 12398 }, { "epoch": 0.41, "grad_norm": 0.4284948408603668, "learning_rate": 0.0005724408188665583, "loss": 1.8623, "step": 12399 }, { "epoch": 0.41, "grad_norm": 0.4337185025215149, "learning_rate": 0.0005724364426596427, "loss": 1.8032, "step": 12400 }, { "epoch": 0.41, "grad_norm": 0.4524335265159607, "learning_rate": 0.0005724320661220293, "loss": 1.9313, "step": 12401 }, { "epoch": 0.41, "grad_norm": 0.45360496640205383, "learning_rate": 0.0005724276892537236, "loss": 1.8761, "step": 12402 }, { "epoch": 0.41, "grad_norm": 0.45500409603118896, "learning_rate": 0.0005724233120547308, "loss": 1.9366, "step": 12403 }, { "epoch": 0.41, "grad_norm": 0.45660334825515747, "learning_rate": 0.0005724189345250562, "loss": 1.9309, "step": 12404 }, { "epoch": 0.41, "grad_norm": 0.4483056366443634, "learning_rate": 0.0005724145566647051, "loss": 1.8864, "step": 12405 }, { "epoch": 0.41, "grad_norm": 0.4279900789260864, "learning_rate": 0.0005724101784736831, "loss": 1.8966, "step": 12406 }, { "epoch": 0.41, "grad_norm": 0.454275518655777, "learning_rate": 0.0005724057999519951, "loss": 1.8824, "step": 12407 }, { "epoch": 0.41, "grad_norm": 0.43724560737609863, "learning_rate": 0.0005724014210996465, "loss": 1.8268, "step": 12408 }, { "epoch": 0.41, "grad_norm": 0.4672050476074219, "learning_rate": 0.0005723970419166429, "loss": 1.8478, "step": 12409 }, { "epoch": 0.41, "grad_norm": 0.4441611170768738, "learning_rate": 0.0005723926624029894, "loss": 1.8707, "step": 12410 }, { "epoch": 0.41, "grad_norm": 0.4506472647190094, "learning_rate": 0.0005723882825586912, "loss": 1.8144, "step": 12411 }, { "epoch": 0.41, "grad_norm": 0.4647517502307892, "learning_rate": 0.000572383902383754, "loss": 1.8407, "step": 12412 }, { "epoch": 0.41, "grad_norm": 0.4634721577167511, "learning_rate": 0.0005723795218781827, "loss": 1.8669, "step": 12413 }, { "epoch": 0.41, "grad_norm": 0.4376712143421173, "learning_rate": 0.0005723751410419827, "loss": 1.8435, "step": 12414 }, { "epoch": 0.41, "grad_norm": 0.4530550241470337, "learning_rate": 0.0005723707598751596, "loss": 1.9065, "step": 12415 }, { "epoch": 0.41, "grad_norm": 0.4552222490310669, "learning_rate": 0.0005723663783777185, "loss": 1.9353, "step": 12416 }, { "epoch": 0.41, "grad_norm": 0.43721017241477966, "learning_rate": 0.0005723619965496647, "loss": 1.8919, "step": 12417 }, { "epoch": 0.41, "grad_norm": 0.4461984634399414, "learning_rate": 0.0005723576143910037, "loss": 1.8754, "step": 12418 }, { "epoch": 0.41, "grad_norm": 0.4645538628101349, "learning_rate": 0.0005723532319017405, "loss": 1.8583, "step": 12419 }, { "epoch": 0.41, "grad_norm": 0.4351171553134918, "learning_rate": 0.0005723488490818807, "loss": 1.9284, "step": 12420 }, { "epoch": 0.41, "grad_norm": 0.43675047159194946, "learning_rate": 0.0005723444659314295, "loss": 1.8796, "step": 12421 }, { "epoch": 0.41, "grad_norm": 0.4282635450363159, "learning_rate": 0.0005723400824503923, "loss": 1.9409, "step": 12422 }, { "epoch": 0.41, "grad_norm": 0.42840155959129333, "learning_rate": 0.0005723356986387743, "loss": 1.8475, "step": 12423 }, { "epoch": 0.41, "grad_norm": 0.43985554575920105, "learning_rate": 0.0005723313144965809, "loss": 1.8688, "step": 12424 }, { "epoch": 0.41, "grad_norm": 0.4420359432697296, "learning_rate": 0.0005723269300238174, "loss": 1.9275, "step": 12425 }, { "epoch": 0.41, "grad_norm": 0.4326217770576477, "learning_rate": 0.0005723225452204892, "loss": 1.8889, "step": 12426 }, { "epoch": 0.41, "grad_norm": 0.4403330385684967, "learning_rate": 0.0005723181600866015, "loss": 1.8848, "step": 12427 }, { "epoch": 0.41, "grad_norm": 0.4536798894405365, "learning_rate": 0.0005723137746221597, "loss": 1.8983, "step": 12428 }, { "epoch": 0.41, "grad_norm": 0.4538259208202362, "learning_rate": 0.000572309388827169, "loss": 1.9773, "step": 12429 }, { "epoch": 0.41, "grad_norm": 0.4354068636894226, "learning_rate": 0.000572305002701635, "loss": 1.8742, "step": 12430 }, { "epoch": 0.41, "grad_norm": 0.45288902521133423, "learning_rate": 0.0005723006162455628, "loss": 1.8839, "step": 12431 }, { "epoch": 0.41, "grad_norm": 0.44940781593322754, "learning_rate": 0.0005722962294589577, "loss": 1.9118, "step": 12432 }, { "epoch": 0.41, "grad_norm": 0.43592193722724915, "learning_rate": 0.0005722918423418251, "loss": 1.7989, "step": 12433 }, { "epoch": 0.41, "grad_norm": 0.4570172429084778, "learning_rate": 0.0005722874548941704, "loss": 1.861, "step": 12434 }, { "epoch": 0.41, "grad_norm": 0.4520760774612427, "learning_rate": 0.0005722830671159988, "loss": 1.8463, "step": 12435 }, { "epoch": 0.41, "grad_norm": 0.43585655093193054, "learning_rate": 0.0005722786790073156, "loss": 1.9306, "step": 12436 }, { "epoch": 0.41, "grad_norm": 0.45576897263526917, "learning_rate": 0.0005722742905681263, "loss": 1.8197, "step": 12437 }, { "epoch": 0.41, "grad_norm": 0.4581700563430786, "learning_rate": 0.0005722699017984362, "loss": 1.887, "step": 12438 }, { "epoch": 0.41, "grad_norm": 0.44379621744155884, "learning_rate": 0.0005722655126982505, "loss": 1.8243, "step": 12439 }, { "epoch": 0.41, "grad_norm": 0.43425872921943665, "learning_rate": 0.0005722611232675745, "loss": 1.8943, "step": 12440 }, { "epoch": 0.41, "grad_norm": 0.4408264756202698, "learning_rate": 0.0005722567335064136, "loss": 1.9303, "step": 12441 }, { "epoch": 0.41, "grad_norm": 0.43057936429977417, "learning_rate": 0.0005722523434147733, "loss": 1.8168, "step": 12442 }, { "epoch": 0.41, "grad_norm": 0.4500729739665985, "learning_rate": 0.0005722479529926586, "loss": 1.867, "step": 12443 }, { "epoch": 0.41, "grad_norm": 0.46486997604370117, "learning_rate": 0.0005722435622400751, "loss": 1.8621, "step": 12444 }, { "epoch": 0.41, "grad_norm": 0.4288049042224884, "learning_rate": 0.000572239171157028, "loss": 1.8175, "step": 12445 }, { "epoch": 0.41, "grad_norm": 0.43223002552986145, "learning_rate": 0.0005722347797435227, "loss": 1.8982, "step": 12446 }, { "epoch": 0.41, "grad_norm": 0.4271829128265381, "learning_rate": 0.0005722303879995644, "loss": 1.8471, "step": 12447 }, { "epoch": 0.41, "grad_norm": 0.4526298940181732, "learning_rate": 0.0005722259959251586, "loss": 1.8608, "step": 12448 }, { "epoch": 0.41, "grad_norm": 0.46311911940574646, "learning_rate": 0.0005722216035203105, "loss": 1.9521, "step": 12449 }, { "epoch": 0.41, "grad_norm": 0.44396665692329407, "learning_rate": 0.0005722172107850255, "loss": 1.8279, "step": 12450 }, { "epoch": 0.41, "grad_norm": 0.4262741804122925, "learning_rate": 0.0005722128177193089, "loss": 1.8814, "step": 12451 }, { "epoch": 0.41, "grad_norm": 0.4333778917789459, "learning_rate": 0.0005722084243231661, "loss": 1.8798, "step": 12452 }, { "epoch": 0.41, "grad_norm": 0.44952037930488586, "learning_rate": 0.0005722040305966024, "loss": 1.8494, "step": 12453 }, { "epoch": 0.41, "grad_norm": 0.46335622668266296, "learning_rate": 0.000572199636539623, "loss": 1.9717, "step": 12454 }, { "epoch": 0.41, "grad_norm": 0.43515726923942566, "learning_rate": 0.0005721952421522334, "loss": 1.8913, "step": 12455 }, { "epoch": 0.41, "grad_norm": 0.4461512565612793, "learning_rate": 0.0005721908474344389, "loss": 1.8324, "step": 12456 }, { "epoch": 0.41, "grad_norm": 0.43494492769241333, "learning_rate": 0.0005721864523862448, "loss": 1.8411, "step": 12457 }, { "epoch": 0.41, "grad_norm": 0.46278443932533264, "learning_rate": 0.0005721820570076566, "loss": 1.804, "step": 12458 }, { "epoch": 0.41, "grad_norm": 0.4300703704357147, "learning_rate": 0.0005721776612986792, "loss": 1.8939, "step": 12459 }, { "epoch": 0.41, "grad_norm": 0.43132296204566956, "learning_rate": 0.0005721732652593185, "loss": 1.8595, "step": 12460 }, { "epoch": 0.41, "grad_norm": 0.4278070628643036, "learning_rate": 0.0005721688688895794, "loss": 1.8747, "step": 12461 }, { "epoch": 0.41, "grad_norm": 0.4316784739494324, "learning_rate": 0.0005721644721894673, "loss": 1.9099, "step": 12462 }, { "epoch": 0.41, "grad_norm": 0.4290110170841217, "learning_rate": 0.0005721600751589878, "loss": 1.8685, "step": 12463 }, { "epoch": 0.41, "grad_norm": 0.4375900328159332, "learning_rate": 0.000572155677798146, "loss": 1.9081, "step": 12464 }, { "epoch": 0.41, "grad_norm": 0.47393798828125, "learning_rate": 0.0005721512801069474, "loss": 1.8795, "step": 12465 }, { "epoch": 0.41, "grad_norm": 0.43429848551750183, "learning_rate": 0.0005721468820853972, "loss": 1.8374, "step": 12466 }, { "epoch": 0.41, "grad_norm": 0.44344788789749146, "learning_rate": 0.0005721424837335007, "loss": 1.917, "step": 12467 }, { "epoch": 0.41, "grad_norm": 0.4504789710044861, "learning_rate": 0.0005721380850512635, "loss": 1.8956, "step": 12468 }, { "epoch": 0.41, "grad_norm": 0.4519597291946411, "learning_rate": 0.0005721336860386905, "loss": 1.8414, "step": 12469 }, { "epoch": 0.41, "grad_norm": 0.4420586824417114, "learning_rate": 0.0005721292866957875, "loss": 1.9046, "step": 12470 }, { "epoch": 0.41, "grad_norm": 0.4321914315223694, "learning_rate": 0.0005721248870225597, "loss": 1.8107, "step": 12471 }, { "epoch": 0.41, "grad_norm": 0.43325212597846985, "learning_rate": 0.0005721204870190123, "loss": 1.9041, "step": 12472 }, { "epoch": 0.41, "grad_norm": 0.44367316365242004, "learning_rate": 0.0005721160866851506, "loss": 1.86, "step": 12473 }, { "epoch": 0.42, "grad_norm": 0.47706156969070435, "learning_rate": 0.0005721116860209802, "loss": 1.7777, "step": 12474 }, { "epoch": 0.42, "grad_norm": 0.4501821994781494, "learning_rate": 0.0005721072850265063, "loss": 1.8174, "step": 12475 }, { "epoch": 0.42, "grad_norm": 0.428076833486557, "learning_rate": 0.0005721028837017342, "loss": 1.8011, "step": 12476 }, { "epoch": 0.42, "grad_norm": 0.4474433958530426, "learning_rate": 0.0005720984820466693, "loss": 1.8843, "step": 12477 }, { "epoch": 0.42, "grad_norm": 0.4503043293952942, "learning_rate": 0.0005720940800613169, "loss": 1.8448, "step": 12478 }, { "epoch": 0.42, "grad_norm": 0.4439680576324463, "learning_rate": 0.0005720896777456825, "loss": 1.91, "step": 12479 }, { "epoch": 0.42, "grad_norm": 0.44692063331604004, "learning_rate": 0.0005720852750997713, "loss": 1.8867, "step": 12480 }, { "epoch": 0.42, "grad_norm": 0.45625773072242737, "learning_rate": 0.0005720808721235886, "loss": 1.912, "step": 12481 }, { "epoch": 0.42, "grad_norm": 0.4384276270866394, "learning_rate": 0.0005720764688171398, "loss": 1.8718, "step": 12482 }, { "epoch": 0.42, "grad_norm": 0.43146735429763794, "learning_rate": 0.0005720720651804302, "loss": 1.8307, "step": 12483 }, { "epoch": 0.42, "grad_norm": 0.47237536311149597, "learning_rate": 0.0005720676612134653, "loss": 1.8832, "step": 12484 }, { "epoch": 0.42, "grad_norm": 0.42578810453414917, "learning_rate": 0.0005720632569162504, "loss": 1.9169, "step": 12485 }, { "epoch": 0.42, "grad_norm": 0.4523301124572754, "learning_rate": 0.0005720588522887907, "loss": 1.8121, "step": 12486 }, { "epoch": 0.42, "grad_norm": 0.4219302833080292, "learning_rate": 0.0005720544473310917, "loss": 1.909, "step": 12487 }, { "epoch": 0.42, "grad_norm": 0.45110130310058594, "learning_rate": 0.0005720500420431586, "loss": 1.777, "step": 12488 }, { "epoch": 0.42, "grad_norm": 0.44846856594085693, "learning_rate": 0.0005720456364249969, "loss": 1.8781, "step": 12489 }, { "epoch": 0.42, "grad_norm": 0.44241389632225037, "learning_rate": 0.0005720412304766119, "loss": 1.8561, "step": 12490 }, { "epoch": 0.42, "grad_norm": 0.44001126289367676, "learning_rate": 0.0005720368241980088, "loss": 1.8647, "step": 12491 }, { "epoch": 0.42, "grad_norm": 0.45782506465911865, "learning_rate": 0.0005720324175891932, "loss": 1.9023, "step": 12492 }, { "epoch": 0.42, "grad_norm": 0.411645770072937, "learning_rate": 0.0005720280106501703, "loss": 1.7747, "step": 12493 }, { "epoch": 0.42, "grad_norm": 0.4309186637401581, "learning_rate": 0.0005720236033809454, "loss": 1.9003, "step": 12494 }, { "epoch": 0.42, "grad_norm": 0.4595913887023926, "learning_rate": 0.0005720191957815241, "loss": 1.8184, "step": 12495 }, { "epoch": 0.42, "grad_norm": 0.43678903579711914, "learning_rate": 0.0005720147878519115, "loss": 1.8948, "step": 12496 }, { "epoch": 0.42, "grad_norm": 0.4348442852497101, "learning_rate": 0.000572010379592113, "loss": 1.904, "step": 12497 }, { "epoch": 0.42, "grad_norm": 0.44247809052467346, "learning_rate": 0.000572005971002134, "loss": 1.8089, "step": 12498 }, { "epoch": 0.42, "grad_norm": 0.4338119328022003, "learning_rate": 0.0005720015620819797, "loss": 1.9123, "step": 12499 }, { "epoch": 0.42, "grad_norm": 0.4226301908493042, "learning_rate": 0.0005719971528316557, "loss": 1.8813, "step": 12500 }, { "epoch": 0.42, "grad_norm": 0.448495477437973, "learning_rate": 0.0005719927432511673, "loss": 1.8791, "step": 12501 }, { "epoch": 0.42, "grad_norm": 0.4374564290046692, "learning_rate": 0.0005719883333405197, "loss": 1.8513, "step": 12502 }, { "epoch": 0.42, "grad_norm": 0.4325726628303528, "learning_rate": 0.0005719839230997184, "loss": 1.8185, "step": 12503 }, { "epoch": 0.42, "grad_norm": 0.4441010057926178, "learning_rate": 0.0005719795125287686, "loss": 1.7664, "step": 12504 }, { "epoch": 0.42, "grad_norm": 0.43832409381866455, "learning_rate": 0.0005719751016276759, "loss": 1.8998, "step": 12505 }, { "epoch": 0.42, "grad_norm": 0.43406611680984497, "learning_rate": 0.0005719706903964453, "loss": 1.8247, "step": 12506 }, { "epoch": 0.42, "grad_norm": 0.44942259788513184, "learning_rate": 0.0005719662788350823, "loss": 1.8866, "step": 12507 }, { "epoch": 0.42, "grad_norm": 0.4346320629119873, "learning_rate": 0.0005719618669435926, "loss": 1.8729, "step": 12508 }, { "epoch": 0.42, "grad_norm": 0.4446573853492737, "learning_rate": 0.0005719574547219811, "loss": 1.8283, "step": 12509 }, { "epoch": 0.42, "grad_norm": 0.4389627277851105, "learning_rate": 0.0005719530421702533, "loss": 1.8967, "step": 12510 }, { "epoch": 0.42, "grad_norm": 0.43069779872894287, "learning_rate": 0.0005719486292884146, "loss": 1.8373, "step": 12511 }, { "epoch": 0.42, "grad_norm": 0.4531410336494446, "learning_rate": 0.0005719442160764704, "loss": 1.9278, "step": 12512 }, { "epoch": 0.42, "grad_norm": 0.4431496560573578, "learning_rate": 0.000571939802534426, "loss": 1.9139, "step": 12513 }, { "epoch": 0.42, "grad_norm": 0.46177417039871216, "learning_rate": 0.0005719353886622866, "loss": 1.9743, "step": 12514 }, { "epoch": 0.42, "grad_norm": 0.42979303002357483, "learning_rate": 0.0005719309744600578, "loss": 1.8505, "step": 12515 }, { "epoch": 0.42, "grad_norm": 0.42368176579475403, "learning_rate": 0.0005719265599277448, "loss": 1.8657, "step": 12516 }, { "epoch": 0.42, "grad_norm": 0.4410388469696045, "learning_rate": 0.0005719221450653532, "loss": 1.8706, "step": 12517 }, { "epoch": 0.42, "grad_norm": 0.43370959162712097, "learning_rate": 0.0005719177298728881, "loss": 1.91, "step": 12518 }, { "epoch": 0.42, "grad_norm": 0.44283998012542725, "learning_rate": 0.0005719133143503548, "loss": 1.917, "step": 12519 }, { "epoch": 0.42, "grad_norm": 0.4315895438194275, "learning_rate": 0.0005719088984977589, "loss": 1.9107, "step": 12520 }, { "epoch": 0.42, "grad_norm": 0.4505466818809509, "learning_rate": 0.0005719044823151057, "loss": 1.8334, "step": 12521 }, { "epoch": 0.42, "grad_norm": 0.4399583041667938, "learning_rate": 0.0005719000658024005, "loss": 1.9317, "step": 12522 }, { "epoch": 0.42, "grad_norm": 0.43054425716400146, "learning_rate": 0.0005718956489596486, "loss": 1.9084, "step": 12523 }, { "epoch": 0.42, "grad_norm": 0.465565949678421, "learning_rate": 0.0005718912317868556, "loss": 1.9351, "step": 12524 }, { "epoch": 0.42, "grad_norm": 0.44534242153167725, "learning_rate": 0.0005718868142840267, "loss": 1.8554, "step": 12525 }, { "epoch": 0.42, "grad_norm": 0.45540326833724976, "learning_rate": 0.000571882396451167, "loss": 1.9246, "step": 12526 }, { "epoch": 0.42, "grad_norm": 0.46472588181495667, "learning_rate": 0.0005718779782882825, "loss": 1.8955, "step": 12527 }, { "epoch": 0.42, "grad_norm": 0.448975533246994, "learning_rate": 0.000571873559795378, "loss": 1.8387, "step": 12528 }, { "epoch": 0.42, "grad_norm": 0.41531887650489807, "learning_rate": 0.000571869140972459, "loss": 1.896, "step": 12529 }, { "epoch": 0.42, "grad_norm": 0.45402929186820984, "learning_rate": 0.0005718647218195311, "loss": 2.0025, "step": 12530 }, { "epoch": 0.42, "grad_norm": 0.4475129544734955, "learning_rate": 0.0005718603023365993, "loss": 1.8859, "step": 12531 }, { "epoch": 0.42, "grad_norm": 0.43210604786872864, "learning_rate": 0.0005718558825236692, "loss": 1.9195, "step": 12532 }, { "epoch": 0.42, "grad_norm": 0.4389192461967468, "learning_rate": 0.0005718514623807462, "loss": 1.8761, "step": 12533 }, { "epoch": 0.42, "grad_norm": 0.4416044056415558, "learning_rate": 0.0005718470419078355, "loss": 1.9138, "step": 12534 }, { "epoch": 0.42, "grad_norm": 0.449301153421402, "learning_rate": 0.0005718426211049426, "loss": 1.9352, "step": 12535 }, { "epoch": 0.42, "grad_norm": 0.4572160243988037, "learning_rate": 0.0005718381999720727, "loss": 1.88, "step": 12536 }, { "epoch": 0.42, "grad_norm": 0.46118637919425964, "learning_rate": 0.0005718337785092315, "loss": 1.8178, "step": 12537 }, { "epoch": 0.42, "grad_norm": 0.44468843936920166, "learning_rate": 0.0005718293567164239, "loss": 1.8592, "step": 12538 }, { "epoch": 0.42, "grad_norm": 0.42592853307724, "learning_rate": 0.0005718249345936556, "loss": 1.8461, "step": 12539 }, { "epoch": 0.42, "grad_norm": 0.44404298067092896, "learning_rate": 0.0005718205121409319, "loss": 1.8448, "step": 12540 }, { "epoch": 0.42, "grad_norm": 0.4542348384857178, "learning_rate": 0.0005718160893582582, "loss": 1.8959, "step": 12541 }, { "epoch": 0.42, "grad_norm": 0.42930376529693604, "learning_rate": 0.0005718116662456397, "loss": 1.843, "step": 12542 }, { "epoch": 0.42, "grad_norm": 0.4552595913410187, "learning_rate": 0.0005718072428030819, "loss": 1.9156, "step": 12543 }, { "epoch": 0.42, "grad_norm": 0.44560280442237854, "learning_rate": 0.0005718028190305903, "loss": 1.8053, "step": 12544 }, { "epoch": 0.42, "grad_norm": 0.4441968500614166, "learning_rate": 0.00057179839492817, "loss": 1.8998, "step": 12545 }, { "epoch": 0.42, "grad_norm": 0.434420108795166, "learning_rate": 0.0005717939704958266, "loss": 1.8309, "step": 12546 }, { "epoch": 0.42, "grad_norm": 0.44055870175361633, "learning_rate": 0.0005717895457335653, "loss": 1.797, "step": 12547 }, { "epoch": 0.42, "grad_norm": 0.4153890311717987, "learning_rate": 0.0005717851206413914, "loss": 1.8794, "step": 12548 }, { "epoch": 0.42, "grad_norm": 0.42861801385879517, "learning_rate": 0.0005717806952193106, "loss": 1.8502, "step": 12549 }, { "epoch": 0.42, "grad_norm": 0.4320647120475769, "learning_rate": 0.000571776269467328, "loss": 1.8146, "step": 12550 }, { "epoch": 0.42, "grad_norm": 0.4231082797050476, "learning_rate": 0.0005717718433854492, "loss": 1.8807, "step": 12551 }, { "epoch": 0.42, "grad_norm": 0.4402368366718292, "learning_rate": 0.0005717674169736793, "loss": 1.8928, "step": 12552 }, { "epoch": 0.42, "grad_norm": 0.44418710470199585, "learning_rate": 0.0005717629902320237, "loss": 1.8774, "step": 12553 }, { "epoch": 0.42, "grad_norm": 0.42330145835876465, "learning_rate": 0.000571758563160488, "loss": 1.8109, "step": 12554 }, { "epoch": 0.42, "grad_norm": 0.4360899031162262, "learning_rate": 0.0005717541357590774, "loss": 1.8617, "step": 12555 }, { "epoch": 0.42, "grad_norm": 0.4507215917110443, "learning_rate": 0.0005717497080277974, "loss": 1.9302, "step": 12556 }, { "epoch": 0.42, "grad_norm": 0.43174639344215393, "learning_rate": 0.0005717452799666532, "loss": 1.8605, "step": 12557 }, { "epoch": 0.42, "grad_norm": 0.4255821108818054, "learning_rate": 0.0005717408515756503, "loss": 1.9223, "step": 12558 }, { "epoch": 0.42, "grad_norm": 0.439459890127182, "learning_rate": 0.0005717364228547941, "loss": 1.9041, "step": 12559 }, { "epoch": 0.42, "grad_norm": 0.44380447268486023, "learning_rate": 0.0005717319938040899, "loss": 1.8778, "step": 12560 }, { "epoch": 0.42, "grad_norm": 0.4308900833129883, "learning_rate": 0.000571727564423543, "loss": 1.8806, "step": 12561 }, { "epoch": 0.42, "grad_norm": 0.505469799041748, "learning_rate": 0.000571723134713159, "loss": 1.94, "step": 12562 }, { "epoch": 0.42, "grad_norm": 0.4488708972930908, "learning_rate": 0.0005717187046729431, "loss": 1.8168, "step": 12563 }, { "epoch": 0.42, "grad_norm": 0.4273526668548584, "learning_rate": 0.0005717142743029007, "loss": 1.8865, "step": 12564 }, { "epoch": 0.42, "grad_norm": 0.42680662870407104, "learning_rate": 0.0005717098436030373, "loss": 1.9074, "step": 12565 }, { "epoch": 0.42, "grad_norm": 0.4343331754207611, "learning_rate": 0.000571705412573358, "loss": 1.7943, "step": 12566 }, { "epoch": 0.42, "grad_norm": 0.4457562267780304, "learning_rate": 0.0005717009812138686, "loss": 1.8622, "step": 12567 }, { "epoch": 0.42, "grad_norm": 0.443625807762146, "learning_rate": 0.0005716965495245741, "loss": 1.9301, "step": 12568 }, { "epoch": 0.42, "grad_norm": 0.44198185205459595, "learning_rate": 0.00057169211750548, "loss": 1.9126, "step": 12569 }, { "epoch": 0.42, "grad_norm": 0.4516175091266632, "learning_rate": 0.0005716876851565917, "loss": 1.9116, "step": 12570 }, { "epoch": 0.42, "grad_norm": 0.48540791869163513, "learning_rate": 0.0005716832524779146, "loss": 1.8274, "step": 12571 }, { "epoch": 0.42, "grad_norm": 0.4487248361110687, "learning_rate": 0.0005716788194694542, "loss": 1.8471, "step": 12572 }, { "epoch": 0.42, "grad_norm": 0.418871134519577, "learning_rate": 0.0005716743861312155, "loss": 1.8771, "step": 12573 }, { "epoch": 0.42, "grad_norm": 0.4452773928642273, "learning_rate": 0.0005716699524632044, "loss": 1.9505, "step": 12574 }, { "epoch": 0.42, "grad_norm": 0.45943623781204224, "learning_rate": 0.0005716655184654257, "loss": 1.8736, "step": 12575 }, { "epoch": 0.42, "grad_norm": 0.42846235632896423, "learning_rate": 0.0005716610841378854, "loss": 1.9839, "step": 12576 }, { "epoch": 0.42, "grad_norm": 0.4354405403137207, "learning_rate": 0.0005716566494805884, "loss": 1.8857, "step": 12577 }, { "epoch": 0.42, "grad_norm": 0.45038726925849915, "learning_rate": 0.0005716522144935402, "loss": 1.9038, "step": 12578 }, { "epoch": 0.42, "grad_norm": 0.4548532962799072, "learning_rate": 0.0005716477791767463, "loss": 1.9147, "step": 12579 }, { "epoch": 0.42, "grad_norm": 0.4333040714263916, "learning_rate": 0.000571643343530212, "loss": 1.8761, "step": 12580 }, { "epoch": 0.42, "grad_norm": 0.6983071565628052, "learning_rate": 0.0005716389075539428, "loss": 1.885, "step": 12581 }, { "epoch": 0.42, "grad_norm": 0.46556374430656433, "learning_rate": 0.000571634471247944, "loss": 1.8799, "step": 12582 }, { "epoch": 0.42, "grad_norm": 0.45313870906829834, "learning_rate": 0.0005716300346122208, "loss": 1.878, "step": 12583 }, { "epoch": 0.42, "grad_norm": 0.43639492988586426, "learning_rate": 0.0005716255976467789, "loss": 1.8961, "step": 12584 }, { "epoch": 0.42, "grad_norm": 0.4288875162601471, "learning_rate": 0.0005716211603516235, "loss": 1.9174, "step": 12585 }, { "epoch": 0.42, "grad_norm": 0.4478447735309601, "learning_rate": 0.00057161672272676, "loss": 1.8628, "step": 12586 }, { "epoch": 0.42, "grad_norm": 0.4544125497341156, "learning_rate": 0.0005716122847721939, "loss": 1.8121, "step": 12587 }, { "epoch": 0.42, "grad_norm": 0.43512123823165894, "learning_rate": 0.0005716078464879304, "loss": 1.9415, "step": 12588 }, { "epoch": 0.42, "grad_norm": 0.4324510097503662, "learning_rate": 0.0005716034078739751, "loss": 1.8907, "step": 12589 }, { "epoch": 0.42, "grad_norm": 0.4415685832500458, "learning_rate": 0.0005715989689303333, "loss": 1.922, "step": 12590 }, { "epoch": 0.42, "grad_norm": 0.4536440074443817, "learning_rate": 0.0005715945296570103, "loss": 1.9311, "step": 12591 }, { "epoch": 0.42, "grad_norm": 0.4534930884838104, "learning_rate": 0.0005715900900540116, "loss": 1.8749, "step": 12592 }, { "epoch": 0.42, "grad_norm": 0.43711066246032715, "learning_rate": 0.0005715856501213425, "loss": 1.8687, "step": 12593 }, { "epoch": 0.42, "grad_norm": 0.43683046102523804, "learning_rate": 0.0005715812098590085, "loss": 1.9098, "step": 12594 }, { "epoch": 0.42, "grad_norm": 0.4506862759590149, "learning_rate": 0.0005715767692670149, "loss": 1.9469, "step": 12595 }, { "epoch": 0.42, "grad_norm": 0.4200819432735443, "learning_rate": 0.0005715723283453671, "loss": 1.8486, "step": 12596 }, { "epoch": 0.42, "grad_norm": 0.43528497219085693, "learning_rate": 0.0005715678870940705, "loss": 1.8944, "step": 12597 }, { "epoch": 0.42, "grad_norm": 0.45242518186569214, "learning_rate": 0.0005715634455131305, "loss": 1.8587, "step": 12598 }, { "epoch": 0.42, "grad_norm": 0.45688730478286743, "learning_rate": 0.0005715590036025525, "loss": 1.9182, "step": 12599 }, { "epoch": 0.42, "grad_norm": 0.4425157308578491, "learning_rate": 0.000571554561362342, "loss": 1.8549, "step": 12600 }, { "epoch": 0.42, "grad_norm": 0.4447016417980194, "learning_rate": 0.000571550118792504, "loss": 1.8962, "step": 12601 }, { "epoch": 0.42, "grad_norm": 0.4382379949092865, "learning_rate": 0.0005715456758930443, "loss": 1.8942, "step": 12602 }, { "epoch": 0.42, "grad_norm": 0.4312298595905304, "learning_rate": 0.0005715412326639682, "loss": 1.8994, "step": 12603 }, { "epoch": 0.42, "grad_norm": 0.4387834668159485, "learning_rate": 0.0005715367891052811, "loss": 1.918, "step": 12604 }, { "epoch": 0.42, "grad_norm": 0.44486793875694275, "learning_rate": 0.0005715323452169883, "loss": 1.8967, "step": 12605 }, { "epoch": 0.42, "grad_norm": 0.4859866797924042, "learning_rate": 0.0005715279009990952, "loss": 1.8945, "step": 12606 }, { "epoch": 0.42, "grad_norm": 0.4387191832065582, "learning_rate": 0.0005715234564516072, "loss": 1.87, "step": 12607 }, { "epoch": 0.42, "grad_norm": 0.43614667654037476, "learning_rate": 0.0005715190115745299, "loss": 1.8437, "step": 12608 }, { "epoch": 0.42, "grad_norm": 0.4539276957511902, "learning_rate": 0.0005715145663678684, "loss": 1.944, "step": 12609 }, { "epoch": 0.42, "grad_norm": 0.4302893877029419, "learning_rate": 0.0005715101208316282, "loss": 1.8822, "step": 12610 }, { "epoch": 0.42, "grad_norm": 0.43607309460639954, "learning_rate": 0.0005715056749658147, "loss": 1.9129, "step": 12611 }, { "epoch": 0.42, "grad_norm": 0.46076473593711853, "learning_rate": 0.0005715012287704335, "loss": 1.9583, "step": 12612 }, { "epoch": 0.42, "grad_norm": 0.43781763315200806, "learning_rate": 0.0005714967822454896, "loss": 1.8939, "step": 12613 }, { "epoch": 0.42, "grad_norm": 0.4344386160373688, "learning_rate": 0.0005714923353909887, "loss": 1.8991, "step": 12614 }, { "epoch": 0.42, "grad_norm": 0.4615896940231323, "learning_rate": 0.0005714878882069361, "loss": 1.9521, "step": 12615 }, { "epoch": 0.42, "grad_norm": 0.44030845165252686, "learning_rate": 0.0005714834406933372, "loss": 1.8671, "step": 12616 }, { "epoch": 0.42, "grad_norm": 0.45403236150741577, "learning_rate": 0.0005714789928501975, "loss": 1.8539, "step": 12617 }, { "epoch": 0.42, "grad_norm": 0.4597156345844269, "learning_rate": 0.0005714745446775221, "loss": 1.9085, "step": 12618 }, { "epoch": 0.42, "grad_norm": 0.45887747406959534, "learning_rate": 0.0005714700961753168, "loss": 1.8329, "step": 12619 }, { "epoch": 0.42, "grad_norm": 0.4469269812107086, "learning_rate": 0.0005714656473435867, "loss": 1.8994, "step": 12620 }, { "epoch": 0.42, "grad_norm": 0.4415828287601471, "learning_rate": 0.0005714611981823374, "loss": 1.9359, "step": 12621 }, { "epoch": 0.42, "grad_norm": 0.4368564784526825, "learning_rate": 0.000571456748691574, "loss": 1.8494, "step": 12622 }, { "epoch": 0.42, "grad_norm": 0.43658575415611267, "learning_rate": 0.0005714522988713022, "loss": 1.8834, "step": 12623 }, { "epoch": 0.42, "grad_norm": 0.4410199522972107, "learning_rate": 0.0005714478487215274, "loss": 1.8245, "step": 12624 }, { "epoch": 0.42, "grad_norm": 0.4344688951969147, "learning_rate": 0.0005714433982422549, "loss": 1.8968, "step": 12625 }, { "epoch": 0.42, "grad_norm": 0.45357444882392883, "learning_rate": 0.0005714389474334901, "loss": 1.891, "step": 12626 }, { "epoch": 0.42, "grad_norm": 0.4994887113571167, "learning_rate": 0.0005714344962952383, "loss": 1.9073, "step": 12627 }, { "epoch": 0.42, "grad_norm": 0.4445725083351135, "learning_rate": 0.000571430044827505, "loss": 1.9235, "step": 12628 }, { "epoch": 0.42, "grad_norm": 0.434399276971817, "learning_rate": 0.0005714255930302957, "loss": 1.8485, "step": 12629 }, { "epoch": 0.42, "grad_norm": 0.47305169701576233, "learning_rate": 0.0005714211409036158, "loss": 1.9898, "step": 12630 }, { "epoch": 0.42, "grad_norm": 0.4322941303253174, "learning_rate": 0.0005714166884474705, "loss": 1.8854, "step": 12631 }, { "epoch": 0.42, "grad_norm": 0.4336974620819092, "learning_rate": 0.0005714122356618654, "loss": 1.841, "step": 12632 }, { "epoch": 0.42, "grad_norm": 0.4240562915802002, "learning_rate": 0.0005714077825468058, "loss": 1.8563, "step": 12633 }, { "epoch": 0.42, "grad_norm": 0.43070918321609497, "learning_rate": 0.0005714033291022972, "loss": 1.8419, "step": 12634 }, { "epoch": 0.42, "grad_norm": 0.4506954550743103, "learning_rate": 0.0005713988753283449, "loss": 1.8641, "step": 12635 }, { "epoch": 0.42, "grad_norm": 0.44341763854026794, "learning_rate": 0.0005713944212249544, "loss": 1.8863, "step": 12636 }, { "epoch": 0.42, "grad_norm": 0.46195197105407715, "learning_rate": 0.000571389966792131, "loss": 1.8871, "step": 12637 }, { "epoch": 0.42, "grad_norm": 0.44348761439323425, "learning_rate": 0.0005713855120298802, "loss": 1.8311, "step": 12638 }, { "epoch": 0.42, "grad_norm": 0.43754056096076965, "learning_rate": 0.0005713810569382074, "loss": 1.9214, "step": 12639 }, { "epoch": 0.42, "grad_norm": 0.43646177649497986, "learning_rate": 0.000571376601517118, "loss": 1.8748, "step": 12640 }, { "epoch": 0.42, "grad_norm": 0.42340973019599915, "learning_rate": 0.0005713721457666173, "loss": 1.8785, "step": 12641 }, { "epoch": 0.42, "grad_norm": 0.46234917640686035, "learning_rate": 0.0005713676896867109, "loss": 1.8879, "step": 12642 }, { "epoch": 0.42, "grad_norm": 0.44599294662475586, "learning_rate": 0.000571363233277404, "loss": 1.8882, "step": 12643 }, { "epoch": 0.42, "grad_norm": 0.42590832710266113, "learning_rate": 0.0005713587765387022, "loss": 1.8693, "step": 12644 }, { "epoch": 0.42, "grad_norm": 0.4313161075115204, "learning_rate": 0.0005713543194706108, "loss": 1.8413, "step": 12645 }, { "epoch": 0.42, "grad_norm": 0.4511878490447998, "learning_rate": 0.0005713498620731352, "loss": 1.8596, "step": 12646 }, { "epoch": 0.42, "grad_norm": 0.44178512692451477, "learning_rate": 0.0005713454043462809, "loss": 1.8502, "step": 12647 }, { "epoch": 0.42, "grad_norm": 0.4285600185394287, "learning_rate": 0.0005713409462900532, "loss": 1.8376, "step": 12648 }, { "epoch": 0.42, "grad_norm": 0.42770126461982727, "learning_rate": 0.0005713364879044577, "loss": 1.8912, "step": 12649 }, { "epoch": 0.42, "grad_norm": 0.43174368143081665, "learning_rate": 0.0005713320291894995, "loss": 1.7968, "step": 12650 }, { "epoch": 0.42, "grad_norm": 0.4361148178577423, "learning_rate": 0.0005713275701451844, "loss": 1.9257, "step": 12651 }, { "epoch": 0.42, "grad_norm": 0.45406582951545715, "learning_rate": 0.0005713231107715174, "loss": 1.9093, "step": 12652 }, { "epoch": 0.42, "grad_norm": 0.4316140413284302, "learning_rate": 0.0005713186510685043, "loss": 1.8348, "step": 12653 }, { "epoch": 0.42, "grad_norm": 0.4323173463344574, "learning_rate": 0.0005713141910361503, "loss": 1.9397, "step": 12654 }, { "epoch": 0.42, "grad_norm": 0.432605117559433, "learning_rate": 0.0005713097306744607, "loss": 1.8924, "step": 12655 }, { "epoch": 0.42, "grad_norm": 0.4354884922504425, "learning_rate": 0.0005713052699834411, "loss": 1.9172, "step": 12656 }, { "epoch": 0.42, "grad_norm": 0.44499334692955017, "learning_rate": 0.000571300808963097, "loss": 1.8718, "step": 12657 }, { "epoch": 0.42, "grad_norm": 0.42538800835609436, "learning_rate": 0.0005712963476134337, "loss": 1.775, "step": 12658 }, { "epoch": 0.42, "grad_norm": 0.42383044958114624, "learning_rate": 0.0005712918859344565, "loss": 1.7932, "step": 12659 }, { "epoch": 0.42, "grad_norm": 0.4612674415111542, "learning_rate": 0.0005712874239261709, "loss": 1.9928, "step": 12660 }, { "epoch": 0.42, "grad_norm": 0.4212932586669922, "learning_rate": 0.0005712829615885824, "loss": 1.8311, "step": 12661 }, { "epoch": 0.42, "grad_norm": 0.44004520773887634, "learning_rate": 0.0005712784989216964, "loss": 1.8415, "step": 12662 }, { "epoch": 0.42, "grad_norm": 0.44893983006477356, "learning_rate": 0.0005712740359255183, "loss": 1.9751, "step": 12663 }, { "epoch": 0.42, "grad_norm": 0.43611598014831543, "learning_rate": 0.0005712695726000534, "loss": 1.8642, "step": 12664 }, { "epoch": 0.42, "grad_norm": 0.44421103596687317, "learning_rate": 0.0005712651089453073, "loss": 1.9361, "step": 12665 }, { "epoch": 0.42, "grad_norm": 0.4505617320537567, "learning_rate": 0.0005712606449612852, "loss": 1.8863, "step": 12666 }, { "epoch": 0.42, "grad_norm": 0.4380294680595398, "learning_rate": 0.0005712561806479927, "loss": 1.8727, "step": 12667 }, { "epoch": 0.42, "grad_norm": 0.440644234418869, "learning_rate": 0.0005712517160054352, "loss": 1.8486, "step": 12668 }, { "epoch": 0.42, "grad_norm": 0.4390203654766083, "learning_rate": 0.000571247251033618, "loss": 1.8479, "step": 12669 }, { "epoch": 0.42, "grad_norm": 0.43387576937675476, "learning_rate": 0.0005712427857325466, "loss": 1.883, "step": 12670 }, { "epoch": 0.42, "grad_norm": 0.4316621720790863, "learning_rate": 0.0005712383201022266, "loss": 1.8611, "step": 12671 }, { "epoch": 0.42, "grad_norm": 0.4229664206504822, "learning_rate": 0.0005712338541426632, "loss": 1.9085, "step": 12672 }, { "epoch": 0.42, "grad_norm": 0.46152186393737793, "learning_rate": 0.0005712293878538618, "loss": 1.8929, "step": 12673 }, { "epoch": 0.42, "grad_norm": 0.43797266483306885, "learning_rate": 0.0005712249212358278, "loss": 1.845, "step": 12674 }, { "epoch": 0.42, "grad_norm": 0.43253305554389954, "learning_rate": 0.0005712204542885668, "loss": 1.8592, "step": 12675 }, { "epoch": 0.42, "grad_norm": 0.45566654205322266, "learning_rate": 0.0005712159870120841, "loss": 1.7928, "step": 12676 }, { "epoch": 0.42, "grad_norm": 0.45910730957984924, "learning_rate": 0.0005712115194063853, "loss": 1.8417, "step": 12677 }, { "epoch": 0.42, "grad_norm": 0.44360852241516113, "learning_rate": 0.0005712070514714754, "loss": 1.8297, "step": 12678 }, { "epoch": 0.42, "grad_norm": 0.44746944308280945, "learning_rate": 0.0005712025832073604, "loss": 1.8395, "step": 12679 }, { "epoch": 0.42, "grad_norm": 0.4452711045742035, "learning_rate": 0.0005711981146140451, "loss": 1.9701, "step": 12680 }, { "epoch": 0.42, "grad_norm": 0.4406963288784027, "learning_rate": 0.0005711936456915355, "loss": 1.8562, "step": 12681 }, { "epoch": 0.42, "grad_norm": 0.42643195390701294, "learning_rate": 0.0005711891764398367, "loss": 1.7854, "step": 12682 }, { "epoch": 0.42, "grad_norm": 0.43746218085289, "learning_rate": 0.0005711847068589542, "loss": 1.8669, "step": 12683 }, { "epoch": 0.42, "grad_norm": 0.4335951507091522, "learning_rate": 0.0005711802369488934, "loss": 1.8679, "step": 12684 }, { "epoch": 0.42, "grad_norm": 0.47231143712997437, "learning_rate": 0.0005711757667096598, "loss": 1.8586, "step": 12685 }, { "epoch": 0.42, "grad_norm": 0.4351755380630493, "learning_rate": 0.0005711712961412587, "loss": 1.9265, "step": 12686 }, { "epoch": 0.42, "grad_norm": 0.42944979667663574, "learning_rate": 0.0005711668252436957, "loss": 1.9235, "step": 12687 }, { "epoch": 0.42, "grad_norm": 0.45636555552482605, "learning_rate": 0.0005711623540169761, "loss": 1.8201, "step": 12688 }, { "epoch": 0.42, "grad_norm": 0.4511585533618927, "learning_rate": 0.0005711578824611053, "loss": 1.8423, "step": 12689 }, { "epoch": 0.42, "grad_norm": 0.4295502007007599, "learning_rate": 0.0005711534105760888, "loss": 1.8525, "step": 12690 }, { "epoch": 0.42, "grad_norm": 0.44109493494033813, "learning_rate": 0.000571148938361932, "loss": 1.8741, "step": 12691 }, { "epoch": 0.42, "grad_norm": 0.432065486907959, "learning_rate": 0.0005711444658186404, "loss": 1.8795, "step": 12692 }, { "epoch": 0.42, "grad_norm": 0.43739867210388184, "learning_rate": 0.0005711399929462193, "loss": 1.8131, "step": 12693 }, { "epoch": 0.42, "grad_norm": 0.4558362066745758, "learning_rate": 0.0005711355197446742, "loss": 1.8446, "step": 12694 }, { "epoch": 0.42, "grad_norm": 0.4516024589538574, "learning_rate": 0.0005711310462140106, "loss": 1.8746, "step": 12695 }, { "epoch": 0.42, "grad_norm": 0.43268170952796936, "learning_rate": 0.0005711265723542338, "loss": 1.8867, "step": 12696 }, { "epoch": 0.42, "grad_norm": 0.43016037344932556, "learning_rate": 0.0005711220981653493, "loss": 1.9336, "step": 12697 }, { "epoch": 0.42, "grad_norm": 0.4356989562511444, "learning_rate": 0.0005711176236473626, "loss": 1.8785, "step": 12698 }, { "epoch": 0.42, "grad_norm": 0.43463078141212463, "learning_rate": 0.0005711131488002789, "loss": 1.8788, "step": 12699 }, { "epoch": 0.42, "grad_norm": 0.43930384516716003, "learning_rate": 0.000571108673624104, "loss": 1.8873, "step": 12700 }, { "epoch": 0.42, "grad_norm": 0.4495985805988312, "learning_rate": 0.0005711041981188429, "loss": 1.9411, "step": 12701 }, { "epoch": 0.42, "grad_norm": 0.43608996272087097, "learning_rate": 0.0005710997222845013, "loss": 1.8123, "step": 12702 }, { "epoch": 0.42, "grad_norm": 0.43646425008773804, "learning_rate": 0.0005710952461210846, "loss": 1.8711, "step": 12703 }, { "epoch": 0.42, "grad_norm": 0.46624669432640076, "learning_rate": 0.0005710907696285983, "loss": 1.8952, "step": 12704 }, { "epoch": 0.42, "grad_norm": 0.42828503251075745, "learning_rate": 0.0005710862928070477, "loss": 1.8278, "step": 12705 }, { "epoch": 0.42, "grad_norm": 0.4344724416732788, "learning_rate": 0.0005710818156564382, "loss": 1.8793, "step": 12706 }, { "epoch": 0.42, "grad_norm": 0.44589683413505554, "learning_rate": 0.0005710773381767754, "loss": 1.8152, "step": 12707 }, { "epoch": 0.42, "grad_norm": 0.4536081552505493, "learning_rate": 0.0005710728603680647, "loss": 1.9109, "step": 12708 }, { "epoch": 0.42, "grad_norm": 0.4309053421020508, "learning_rate": 0.0005710683822303114, "loss": 1.9213, "step": 12709 }, { "epoch": 0.42, "grad_norm": 0.42976391315460205, "learning_rate": 0.0005710639037635211, "loss": 1.8798, "step": 12710 }, { "epoch": 0.42, "grad_norm": 0.43157055974006653, "learning_rate": 0.000571059424967699, "loss": 1.9054, "step": 12711 }, { "epoch": 0.42, "grad_norm": 0.45541366934776306, "learning_rate": 0.0005710549458428509, "loss": 1.8809, "step": 12712 }, { "epoch": 0.42, "grad_norm": 0.44159460067749023, "learning_rate": 0.0005710504663889818, "loss": 1.8902, "step": 12713 }, { "epoch": 0.42, "grad_norm": 0.4338376522064209, "learning_rate": 0.0005710459866060976, "loss": 1.889, "step": 12714 }, { "epoch": 0.42, "grad_norm": 0.42805901169776917, "learning_rate": 0.0005710415064942033, "loss": 1.8992, "step": 12715 }, { "epoch": 0.42, "grad_norm": 0.44634634256362915, "learning_rate": 0.0005710370260533047, "loss": 1.9465, "step": 12716 }, { "epoch": 0.42, "grad_norm": 0.439907044172287, "learning_rate": 0.0005710325452834071, "loss": 1.9271, "step": 12717 }, { "epoch": 0.42, "grad_norm": 0.426165372133255, "learning_rate": 0.0005710280641845159, "loss": 1.8143, "step": 12718 }, { "epoch": 0.42, "grad_norm": 0.4290578365325928, "learning_rate": 0.0005710235827566365, "loss": 1.8843, "step": 12719 }, { "epoch": 0.42, "grad_norm": 0.4399246871471405, "learning_rate": 0.0005710191009997745, "loss": 1.8671, "step": 12720 }, { "epoch": 0.42, "grad_norm": 0.4489581882953644, "learning_rate": 0.0005710146189139352, "loss": 1.8722, "step": 12721 }, { "epoch": 0.42, "grad_norm": 0.4492850601673126, "learning_rate": 0.000571010136499124, "loss": 1.8768, "step": 12722 }, { "epoch": 0.42, "grad_norm": 0.4498773515224457, "learning_rate": 0.0005710056537553465, "loss": 1.8632, "step": 12723 }, { "epoch": 0.42, "grad_norm": 0.4363008141517639, "learning_rate": 0.0005710011706826081, "loss": 1.8537, "step": 12724 }, { "epoch": 0.42, "grad_norm": 0.43679094314575195, "learning_rate": 0.0005709966872809142, "loss": 1.8722, "step": 12725 }, { "epoch": 0.42, "grad_norm": 0.44324758648872375, "learning_rate": 0.0005709922035502702, "loss": 1.8632, "step": 12726 }, { "epoch": 0.42, "grad_norm": 0.42678382992744446, "learning_rate": 0.0005709877194906816, "loss": 1.8847, "step": 12727 }, { "epoch": 0.42, "grad_norm": 0.4247625470161438, "learning_rate": 0.0005709832351021538, "loss": 1.844, "step": 12728 }, { "epoch": 0.42, "grad_norm": 0.45552733540534973, "learning_rate": 0.0005709787503846924, "loss": 1.8627, "step": 12729 }, { "epoch": 0.42, "grad_norm": 0.44034963846206665, "learning_rate": 0.0005709742653383027, "loss": 1.9119, "step": 12730 }, { "epoch": 0.42, "grad_norm": 0.42651310563087463, "learning_rate": 0.0005709697799629901, "loss": 1.8197, "step": 12731 }, { "epoch": 0.42, "grad_norm": 0.4294987618923187, "learning_rate": 0.0005709652942587601, "loss": 1.8399, "step": 12732 }, { "epoch": 0.42, "grad_norm": 0.4594533145427704, "learning_rate": 0.0005709608082256183, "loss": 1.8756, "step": 12733 }, { "epoch": 0.42, "grad_norm": 0.437119722366333, "learning_rate": 0.0005709563218635698, "loss": 1.845, "step": 12734 }, { "epoch": 0.42, "grad_norm": 0.4477529227733612, "learning_rate": 0.0005709518351726204, "loss": 1.9071, "step": 12735 }, { "epoch": 0.42, "grad_norm": 0.4256227910518646, "learning_rate": 0.0005709473481527753, "loss": 1.8192, "step": 12736 }, { "epoch": 0.42, "grad_norm": 0.4339819848537445, "learning_rate": 0.0005709428608040402, "loss": 1.8418, "step": 12737 }, { "epoch": 0.42, "grad_norm": 0.43606624007225037, "learning_rate": 0.0005709383731264202, "loss": 1.7921, "step": 12738 }, { "epoch": 0.42, "grad_norm": 0.4388420581817627, "learning_rate": 0.000570933885119921, "loss": 1.7905, "step": 12739 }, { "epoch": 0.42, "grad_norm": 0.4571019411087036, "learning_rate": 0.000570929396784548, "loss": 1.9243, "step": 12740 }, { "epoch": 0.42, "grad_norm": 0.43459606170654297, "learning_rate": 0.0005709249081203066, "loss": 1.8432, "step": 12741 }, { "epoch": 0.42, "grad_norm": 0.41781407594680786, "learning_rate": 0.0005709204191272023, "loss": 1.8766, "step": 12742 }, { "epoch": 0.42, "grad_norm": 0.45570096373558044, "learning_rate": 0.0005709159298052406, "loss": 1.8589, "step": 12743 }, { "epoch": 0.42, "grad_norm": 0.43883320689201355, "learning_rate": 0.0005709114401544268, "loss": 1.8363, "step": 12744 }, { "epoch": 0.42, "grad_norm": 0.4228617548942566, "learning_rate": 0.0005709069501747664, "loss": 1.8341, "step": 12745 }, { "epoch": 0.42, "grad_norm": 0.4549328684806824, "learning_rate": 0.0005709024598662649, "loss": 1.7667, "step": 12746 }, { "epoch": 0.42, "grad_norm": 0.4330790340900421, "learning_rate": 0.0005708979692289278, "loss": 1.8828, "step": 12747 }, { "epoch": 0.42, "grad_norm": 0.4424498975276947, "learning_rate": 0.0005708934782627604, "loss": 1.91, "step": 12748 }, { "epoch": 0.42, "grad_norm": 0.43743324279785156, "learning_rate": 0.0005708889869677683, "loss": 1.8816, "step": 12749 }, { "epoch": 0.42, "grad_norm": 0.4326022267341614, "learning_rate": 0.0005708844953439568, "loss": 1.822, "step": 12750 }, { "epoch": 0.42, "grad_norm": 0.43483325839042664, "learning_rate": 0.0005708800033913316, "loss": 1.9076, "step": 12751 }, { "epoch": 0.42, "grad_norm": 0.44115859270095825, "learning_rate": 0.0005708755111098978, "loss": 1.843, "step": 12752 }, { "epoch": 0.42, "grad_norm": 0.4310254156589508, "learning_rate": 0.0005708710184996611, "loss": 1.8502, "step": 12753 }, { "epoch": 0.42, "grad_norm": 0.4404786229133606, "learning_rate": 0.0005708665255606269, "loss": 1.924, "step": 12754 }, { "epoch": 0.42, "grad_norm": 0.4411952793598175, "learning_rate": 0.0005708620322928007, "loss": 1.831, "step": 12755 }, { "epoch": 0.42, "grad_norm": 0.43890583515167236, "learning_rate": 0.0005708575386961878, "loss": 1.8458, "step": 12756 }, { "epoch": 0.42, "grad_norm": 0.45652836561203003, "learning_rate": 0.0005708530447707938, "loss": 1.9502, "step": 12757 }, { "epoch": 0.42, "grad_norm": 0.4473678469657898, "learning_rate": 0.0005708485505166241, "loss": 1.814, "step": 12758 }, { "epoch": 0.42, "grad_norm": 0.4533615708351135, "learning_rate": 0.0005708440559336842, "loss": 1.8375, "step": 12759 }, { "epoch": 0.42, "grad_norm": 0.451559841632843, "learning_rate": 0.0005708395610219796, "loss": 1.8472, "step": 12760 }, { "epoch": 0.42, "grad_norm": 0.44772157073020935, "learning_rate": 0.0005708350657815155, "loss": 1.8812, "step": 12761 }, { "epoch": 0.42, "grad_norm": 0.4525042474269867, "learning_rate": 0.0005708305702122977, "loss": 1.8884, "step": 12762 }, { "epoch": 0.42, "grad_norm": 0.4458934962749481, "learning_rate": 0.0005708260743143315, "loss": 1.9196, "step": 12763 }, { "epoch": 0.42, "grad_norm": 0.428775817155838, "learning_rate": 0.0005708215780876223, "loss": 1.8716, "step": 12764 }, { "epoch": 0.42, "grad_norm": 0.4480418562889099, "learning_rate": 0.0005708170815321755, "loss": 1.8714, "step": 12765 }, { "epoch": 0.42, "grad_norm": 0.4603748619556427, "learning_rate": 0.0005708125846479968, "loss": 1.8392, "step": 12766 }, { "epoch": 0.42, "grad_norm": 0.44006672501564026, "learning_rate": 0.0005708080874350914, "loss": 1.9247, "step": 12767 }, { "epoch": 0.42, "grad_norm": 0.43911513686180115, "learning_rate": 0.000570803589893465, "loss": 1.892, "step": 12768 }, { "epoch": 0.42, "grad_norm": 0.4504711329936981, "learning_rate": 0.0005707990920231228, "loss": 1.918, "step": 12769 }, { "epoch": 0.42, "grad_norm": 0.4548419713973999, "learning_rate": 0.0005707945938240705, "loss": 1.827, "step": 12770 }, { "epoch": 0.42, "grad_norm": 0.438508003950119, "learning_rate": 0.0005707900952963135, "loss": 1.9004, "step": 12771 }, { "epoch": 0.42, "grad_norm": 0.422769159078598, "learning_rate": 0.0005707855964398572, "loss": 1.7673, "step": 12772 }, { "epoch": 0.42, "grad_norm": 0.4285120666027069, "learning_rate": 0.000570781097254707, "loss": 1.8794, "step": 12773 }, { "epoch": 0.42, "grad_norm": 0.4448935091495514, "learning_rate": 0.0005707765977408686, "loss": 1.8398, "step": 12774 }, { "epoch": 0.43, "grad_norm": 0.4434871971607208, "learning_rate": 0.0005707720978983472, "loss": 1.9364, "step": 12775 }, { "epoch": 0.43, "grad_norm": 0.4524316191673279, "learning_rate": 0.0005707675977271485, "loss": 1.8306, "step": 12776 }, { "epoch": 0.43, "grad_norm": 0.44018158316612244, "learning_rate": 0.0005707630972272777, "loss": 1.8494, "step": 12777 }, { "epoch": 0.43, "grad_norm": 0.4449819028377533, "learning_rate": 0.0005707585963987404, "loss": 1.8787, "step": 12778 }, { "epoch": 0.43, "grad_norm": 0.4499451816082001, "learning_rate": 0.000570754095241542, "loss": 1.8881, "step": 12779 }, { "epoch": 0.43, "grad_norm": 0.43037375807762146, "learning_rate": 0.0005707495937556881, "loss": 1.8751, "step": 12780 }, { "epoch": 0.43, "grad_norm": 0.4376903176307678, "learning_rate": 0.0005707450919411842, "loss": 1.8774, "step": 12781 }, { "epoch": 0.43, "grad_norm": 0.43555930256843567, "learning_rate": 0.0005707405897980354, "loss": 2.0075, "step": 12782 }, { "epoch": 0.43, "grad_norm": 0.4440693259239197, "learning_rate": 0.0005707360873262477, "loss": 1.8848, "step": 12783 }, { "epoch": 0.43, "grad_norm": 0.43251150846481323, "learning_rate": 0.0005707315845258261, "loss": 1.8161, "step": 12784 }, { "epoch": 0.43, "grad_norm": 0.4504711627960205, "learning_rate": 0.0005707270813967763, "loss": 1.8496, "step": 12785 }, { "epoch": 0.43, "grad_norm": 0.44045841693878174, "learning_rate": 0.0005707225779391037, "loss": 1.9429, "step": 12786 }, { "epoch": 0.43, "grad_norm": 0.43196749687194824, "learning_rate": 0.0005707180741528137, "loss": 1.8739, "step": 12787 }, { "epoch": 0.43, "grad_norm": 0.473351389169693, "learning_rate": 0.000570713570037912, "loss": 1.7428, "step": 12788 }, { "epoch": 0.43, "grad_norm": 0.4839686453342438, "learning_rate": 0.0005707090655944038, "loss": 1.9233, "step": 12789 }, { "epoch": 0.43, "grad_norm": 0.43769824504852295, "learning_rate": 0.0005707045608222947, "loss": 1.8555, "step": 12790 }, { "epoch": 0.43, "grad_norm": 0.45619940757751465, "learning_rate": 0.0005707000557215902, "loss": 1.8476, "step": 12791 }, { "epoch": 0.43, "grad_norm": 0.5003817677497864, "learning_rate": 0.0005706955502922956, "loss": 1.8466, "step": 12792 }, { "epoch": 0.43, "grad_norm": 0.4402182698249817, "learning_rate": 0.0005706910445344166, "loss": 1.7988, "step": 12793 }, { "epoch": 0.43, "grad_norm": 0.5886769890785217, "learning_rate": 0.0005706865384479586, "loss": 1.9289, "step": 12794 }, { "epoch": 0.43, "grad_norm": 0.5152260661125183, "learning_rate": 0.0005706820320329269, "loss": 1.9405, "step": 12795 }, { "epoch": 0.43, "grad_norm": 0.4696446657180786, "learning_rate": 0.0005706775252893272, "loss": 1.8457, "step": 12796 }, { "epoch": 0.43, "grad_norm": 0.43110623955726624, "learning_rate": 0.0005706730182171648, "loss": 1.8276, "step": 12797 }, { "epoch": 0.43, "grad_norm": 0.45174160599708557, "learning_rate": 0.0005706685108164452, "loss": 1.8723, "step": 12798 }, { "epoch": 0.43, "grad_norm": 0.463530957698822, "learning_rate": 0.000570664003087174, "loss": 1.8848, "step": 12799 }, { "epoch": 0.43, "grad_norm": 0.4454539120197296, "learning_rate": 0.0005706594950293565, "loss": 1.8731, "step": 12800 }, { "epoch": 0.43, "grad_norm": 0.4489666819572449, "learning_rate": 0.0005706549866429983, "loss": 1.8992, "step": 12801 }, { "epoch": 0.43, "grad_norm": 0.4407198429107666, "learning_rate": 0.0005706504779281049, "loss": 1.8745, "step": 12802 }, { "epoch": 0.43, "grad_norm": 0.4470939636230469, "learning_rate": 0.0005706459688846815, "loss": 1.8934, "step": 12803 }, { "epoch": 0.43, "grad_norm": 0.43935465812683105, "learning_rate": 0.000570641459512734, "loss": 1.7633, "step": 12804 }, { "epoch": 0.43, "grad_norm": 0.4406220018863678, "learning_rate": 0.0005706369498122675, "loss": 1.8839, "step": 12805 }, { "epoch": 0.43, "grad_norm": 0.4445708692073822, "learning_rate": 0.0005706324397832878, "loss": 1.9348, "step": 12806 }, { "epoch": 0.43, "grad_norm": 0.4231261909008026, "learning_rate": 0.0005706279294258, "loss": 1.8915, "step": 12807 }, { "epoch": 0.43, "grad_norm": 0.4408688545227051, "learning_rate": 0.0005706234187398098, "loss": 1.8345, "step": 12808 }, { "epoch": 0.43, "grad_norm": 0.4433721899986267, "learning_rate": 0.0005706189077253226, "loss": 1.9081, "step": 12809 }, { "epoch": 0.43, "grad_norm": 0.4763616919517517, "learning_rate": 0.0005706143963823439, "loss": 1.9211, "step": 12810 }, { "epoch": 0.43, "grad_norm": 0.458848774433136, "learning_rate": 0.0005706098847108795, "loss": 1.9305, "step": 12811 }, { "epoch": 0.43, "grad_norm": 0.43215861916542053, "learning_rate": 0.0005706053727109342, "loss": 1.8541, "step": 12812 }, { "epoch": 0.43, "grad_norm": 0.44384312629699707, "learning_rate": 0.000570600860382514, "loss": 1.8421, "step": 12813 }, { "epoch": 0.43, "grad_norm": 0.48259657621383667, "learning_rate": 0.0005705963477256243, "loss": 1.8529, "step": 12814 }, { "epoch": 0.43, "grad_norm": 0.4568178951740265, "learning_rate": 0.0005705918347402705, "loss": 1.8687, "step": 12815 }, { "epoch": 0.43, "grad_norm": 0.4454846680164337, "learning_rate": 0.0005705873214264579, "loss": 1.9138, "step": 12816 }, { "epoch": 0.43, "grad_norm": 0.475103497505188, "learning_rate": 0.0005705828077841923, "loss": 1.8959, "step": 12817 }, { "epoch": 0.43, "grad_norm": 0.4491928517818451, "learning_rate": 0.0005705782938134789, "loss": 1.8429, "step": 12818 }, { "epoch": 0.43, "grad_norm": 0.44375962018966675, "learning_rate": 0.0005705737795143235, "loss": 1.8442, "step": 12819 }, { "epoch": 0.43, "grad_norm": 0.4538913667201996, "learning_rate": 0.0005705692648867314, "loss": 1.8454, "step": 12820 }, { "epoch": 0.43, "grad_norm": 0.44427627325057983, "learning_rate": 0.000570564749930708, "loss": 1.8619, "step": 12821 }, { "epoch": 0.43, "grad_norm": 0.4475611448287964, "learning_rate": 0.0005705602346462589, "loss": 1.8814, "step": 12822 }, { "epoch": 0.43, "grad_norm": 0.4330610930919647, "learning_rate": 0.0005705557190333895, "loss": 1.8435, "step": 12823 }, { "epoch": 0.43, "grad_norm": 0.4342062175273895, "learning_rate": 0.0005705512030921053, "loss": 1.8651, "step": 12824 }, { "epoch": 0.43, "grad_norm": 0.4450695514678955, "learning_rate": 0.0005705466868224119, "loss": 1.8887, "step": 12825 }, { "epoch": 0.43, "grad_norm": 0.431983083486557, "learning_rate": 0.0005705421702243146, "loss": 1.8558, "step": 12826 }, { "epoch": 0.43, "grad_norm": 0.44061461091041565, "learning_rate": 0.000570537653297819, "loss": 1.843, "step": 12827 }, { "epoch": 0.43, "grad_norm": 0.43589654564857483, "learning_rate": 0.0005705331360429305, "loss": 1.8606, "step": 12828 }, { "epoch": 0.43, "grad_norm": 0.4516054093837738, "learning_rate": 0.0005705286184596547, "loss": 1.9024, "step": 12829 }, { "epoch": 0.43, "grad_norm": 0.4446257948875427, "learning_rate": 0.000570524100547997, "loss": 1.8584, "step": 12830 }, { "epoch": 0.43, "grad_norm": 0.4339485168457031, "learning_rate": 0.0005705195823079629, "loss": 1.8092, "step": 12831 }, { "epoch": 0.43, "grad_norm": 0.46646034717559814, "learning_rate": 0.0005705150637395579, "loss": 1.9178, "step": 12832 }, { "epoch": 0.43, "grad_norm": 0.4744178354740143, "learning_rate": 0.0005705105448427874, "loss": 1.8408, "step": 12833 }, { "epoch": 0.43, "grad_norm": 0.44051048159599304, "learning_rate": 0.000570506025617657, "loss": 1.8865, "step": 12834 }, { "epoch": 0.43, "grad_norm": 0.4544174373149872, "learning_rate": 0.0005705015060641722, "loss": 1.9403, "step": 12835 }, { "epoch": 0.43, "grad_norm": 0.4373350441455841, "learning_rate": 0.0005704969861823384, "loss": 1.9323, "step": 12836 }, { "epoch": 0.43, "grad_norm": 0.4635597765445709, "learning_rate": 0.0005704924659721611, "loss": 1.836, "step": 12837 }, { "epoch": 0.43, "grad_norm": 0.4546859860420227, "learning_rate": 0.0005704879454336457, "loss": 1.9618, "step": 12838 }, { "epoch": 0.43, "grad_norm": 0.43982401490211487, "learning_rate": 0.0005704834245667979, "loss": 1.882, "step": 12839 }, { "epoch": 0.43, "grad_norm": 0.4935661852359772, "learning_rate": 0.0005704789033716231, "loss": 1.8997, "step": 12840 }, { "epoch": 0.43, "grad_norm": 0.4262644946575165, "learning_rate": 0.0005704743818481266, "loss": 1.8721, "step": 12841 }, { "epoch": 0.43, "grad_norm": 0.4454575777053833, "learning_rate": 0.0005704698599963144, "loss": 1.9123, "step": 12842 }, { "epoch": 0.43, "grad_norm": 0.43245092034339905, "learning_rate": 0.0005704653378161913, "loss": 1.8227, "step": 12843 }, { "epoch": 0.43, "grad_norm": 0.46765372157096863, "learning_rate": 0.0005704608153077633, "loss": 1.8567, "step": 12844 }, { "epoch": 0.43, "grad_norm": 0.4392808675765991, "learning_rate": 0.0005704562924710357, "loss": 1.9557, "step": 12845 }, { "epoch": 0.43, "grad_norm": 0.44620999693870544, "learning_rate": 0.0005704517693060139, "loss": 1.8946, "step": 12846 }, { "epoch": 0.43, "grad_norm": 0.4528732895851135, "learning_rate": 0.0005704472458127036, "loss": 1.9179, "step": 12847 }, { "epoch": 0.43, "grad_norm": 0.4311681389808655, "learning_rate": 0.0005704427219911102, "loss": 1.8743, "step": 12848 }, { "epoch": 0.43, "grad_norm": 0.4275861978530884, "learning_rate": 0.0005704381978412391, "loss": 1.9313, "step": 12849 }, { "epoch": 0.43, "grad_norm": 0.6762837171554565, "learning_rate": 0.000570433673363096, "loss": 1.8648, "step": 12850 }, { "epoch": 0.43, "grad_norm": 0.42411482334136963, "learning_rate": 0.0005704291485566862, "loss": 1.9174, "step": 12851 }, { "epoch": 0.43, "grad_norm": 0.4539428949356079, "learning_rate": 0.0005704246234220152, "loss": 1.9247, "step": 12852 }, { "epoch": 0.43, "grad_norm": 0.4345037043094635, "learning_rate": 0.0005704200979590887, "loss": 1.871, "step": 12853 }, { "epoch": 0.43, "grad_norm": 0.43945059180259705, "learning_rate": 0.000570415572167912, "loss": 1.8658, "step": 12854 }, { "epoch": 0.43, "grad_norm": 0.44087401032447815, "learning_rate": 0.0005704110460484906, "loss": 1.8701, "step": 12855 }, { "epoch": 0.43, "grad_norm": 0.42587485909461975, "learning_rate": 0.00057040651960083, "loss": 1.9141, "step": 12856 }, { "epoch": 0.43, "grad_norm": 0.4433830976486206, "learning_rate": 0.0005704019928249358, "loss": 1.9327, "step": 12857 }, { "epoch": 0.43, "grad_norm": 0.43254876136779785, "learning_rate": 0.0005703974657208133, "loss": 1.8917, "step": 12858 }, { "epoch": 0.43, "grad_norm": 0.43762969970703125, "learning_rate": 0.0005703929382884683, "loss": 1.8343, "step": 12859 }, { "epoch": 0.43, "grad_norm": 0.44236117601394653, "learning_rate": 0.000570388410527906, "loss": 1.8846, "step": 12860 }, { "epoch": 0.43, "grad_norm": 0.4441395699977875, "learning_rate": 0.0005703838824391319, "loss": 1.9278, "step": 12861 }, { "epoch": 0.43, "grad_norm": 0.4301706850528717, "learning_rate": 0.0005703793540221517, "loss": 1.8865, "step": 12862 }, { "epoch": 0.43, "grad_norm": 0.43878868222236633, "learning_rate": 0.0005703748252769709, "loss": 1.924, "step": 12863 }, { "epoch": 0.43, "grad_norm": 0.4469741880893707, "learning_rate": 0.0005703702962035948, "loss": 1.8623, "step": 12864 }, { "epoch": 0.43, "grad_norm": 0.45979759097099304, "learning_rate": 0.0005703657668020291, "loss": 1.9049, "step": 12865 }, { "epoch": 0.43, "grad_norm": 0.4538968801498413, "learning_rate": 0.0005703612370722791, "loss": 1.8069, "step": 12866 }, { "epoch": 0.43, "grad_norm": 0.47959643602371216, "learning_rate": 0.0005703567070143504, "loss": 1.8595, "step": 12867 }, { "epoch": 0.43, "grad_norm": 0.4354152977466583, "learning_rate": 0.0005703521766282485, "loss": 1.8948, "step": 12868 }, { "epoch": 0.43, "grad_norm": 0.43947315216064453, "learning_rate": 0.0005703476459139789, "loss": 1.8306, "step": 12869 }, { "epoch": 0.43, "grad_norm": 0.44532495737075806, "learning_rate": 0.0005703431148715471, "loss": 1.8466, "step": 12870 }, { "epoch": 0.43, "grad_norm": 0.44845274090766907, "learning_rate": 0.0005703385835009586, "loss": 1.8667, "step": 12871 }, { "epoch": 0.43, "grad_norm": 0.43793681263923645, "learning_rate": 0.0005703340518022189, "loss": 1.8983, "step": 12872 }, { "epoch": 0.43, "grad_norm": 0.43921688199043274, "learning_rate": 0.0005703295197753334, "loss": 1.9131, "step": 12873 }, { "epoch": 0.43, "grad_norm": 0.44337189197540283, "learning_rate": 0.0005703249874203078, "loss": 1.8186, "step": 12874 }, { "epoch": 0.43, "grad_norm": 0.42032328248023987, "learning_rate": 0.0005703204547371475, "loss": 1.8628, "step": 12875 }, { "epoch": 0.43, "grad_norm": 0.43546491861343384, "learning_rate": 0.0005703159217258579, "loss": 1.8875, "step": 12876 }, { "epoch": 0.43, "grad_norm": 0.43244796991348267, "learning_rate": 0.0005703113883864447, "loss": 1.8989, "step": 12877 }, { "epoch": 0.43, "grad_norm": 0.4369845390319824, "learning_rate": 0.0005703068547189133, "loss": 1.937, "step": 12878 }, { "epoch": 0.43, "grad_norm": 0.4428592026233673, "learning_rate": 0.0005703023207232692, "loss": 1.8635, "step": 12879 }, { "epoch": 0.43, "grad_norm": 0.4344487190246582, "learning_rate": 0.0005702977863995179, "loss": 1.8502, "step": 12880 }, { "epoch": 0.43, "grad_norm": 0.42954087257385254, "learning_rate": 0.0005702932517476649, "loss": 1.8604, "step": 12881 }, { "epoch": 0.43, "grad_norm": 0.43333354592323303, "learning_rate": 0.0005702887167677157, "loss": 1.8706, "step": 12882 }, { "epoch": 0.43, "grad_norm": 0.4343886971473694, "learning_rate": 0.0005702841814596757, "loss": 1.8839, "step": 12883 }, { "epoch": 0.43, "grad_norm": 0.4236249327659607, "learning_rate": 0.0005702796458235507, "loss": 1.9431, "step": 12884 }, { "epoch": 0.43, "grad_norm": 0.4695264399051666, "learning_rate": 0.000570275109859346, "loss": 1.8947, "step": 12885 }, { "epoch": 0.43, "grad_norm": 0.45497599244117737, "learning_rate": 0.0005702705735670671, "loss": 1.9408, "step": 12886 }, { "epoch": 0.43, "grad_norm": 0.44434109330177307, "learning_rate": 0.0005702660369467196, "loss": 1.947, "step": 12887 }, { "epoch": 0.43, "grad_norm": 0.4372534155845642, "learning_rate": 0.000570261499998309, "loss": 1.8999, "step": 12888 }, { "epoch": 0.43, "grad_norm": 0.4339563250541687, "learning_rate": 0.0005702569627218406, "loss": 1.8356, "step": 12889 }, { "epoch": 0.43, "grad_norm": 0.43697062134742737, "learning_rate": 0.0005702524251173203, "loss": 1.8465, "step": 12890 }, { "epoch": 0.43, "grad_norm": 0.4480610191822052, "learning_rate": 0.0005702478871847533, "loss": 1.9174, "step": 12891 }, { "epoch": 0.43, "grad_norm": 0.4479084014892578, "learning_rate": 0.000570243348924145, "loss": 1.8262, "step": 12892 }, { "epoch": 0.43, "grad_norm": 0.4596481919288635, "learning_rate": 0.0005702388103355012, "loss": 1.8552, "step": 12893 }, { "epoch": 0.43, "grad_norm": 0.4316539764404297, "learning_rate": 0.0005702342714188274, "loss": 1.8507, "step": 12894 }, { "epoch": 0.43, "grad_norm": 0.43423646688461304, "learning_rate": 0.0005702297321741288, "loss": 1.8108, "step": 12895 }, { "epoch": 0.43, "grad_norm": 0.438444048166275, "learning_rate": 0.0005702251926014113, "loss": 1.8698, "step": 12896 }, { "epoch": 0.43, "grad_norm": 0.45270225405693054, "learning_rate": 0.0005702206527006802, "loss": 1.8028, "step": 12897 }, { "epoch": 0.43, "grad_norm": 0.43972936272621155, "learning_rate": 0.0005702161124719411, "loss": 1.8916, "step": 12898 }, { "epoch": 0.43, "grad_norm": 0.4215795397758484, "learning_rate": 0.0005702115719151993, "loss": 1.8497, "step": 12899 }, { "epoch": 0.43, "grad_norm": 0.42895209789276123, "learning_rate": 0.0005702070310304605, "loss": 1.8622, "step": 12900 }, { "epoch": 0.43, "grad_norm": 0.47572508454322815, "learning_rate": 0.0005702024898177303, "loss": 1.8566, "step": 12901 }, { "epoch": 0.43, "grad_norm": 0.4437578320503235, "learning_rate": 0.000570197948277014, "loss": 1.9195, "step": 12902 }, { "epoch": 0.43, "grad_norm": 0.4548014998435974, "learning_rate": 0.0005701934064083172, "loss": 1.9184, "step": 12903 }, { "epoch": 0.43, "grad_norm": 0.45963189005851746, "learning_rate": 0.0005701888642116454, "loss": 1.8152, "step": 12904 }, { "epoch": 0.43, "grad_norm": 0.4387136399745941, "learning_rate": 0.0005701843216870041, "loss": 1.8778, "step": 12905 }, { "epoch": 0.43, "grad_norm": 0.44144031405448914, "learning_rate": 0.000570179778834399, "loss": 1.8605, "step": 12906 }, { "epoch": 0.43, "grad_norm": 0.44112613797187805, "learning_rate": 0.0005701752356538353, "loss": 1.8942, "step": 12907 }, { "epoch": 0.43, "grad_norm": 0.44668665528297424, "learning_rate": 0.0005701706921453188, "loss": 1.879, "step": 12908 }, { "epoch": 0.43, "grad_norm": 0.4378131628036499, "learning_rate": 0.0005701661483088548, "loss": 1.8164, "step": 12909 }, { "epoch": 0.43, "grad_norm": 0.43472105264663696, "learning_rate": 0.0005701616041444489, "loss": 1.8256, "step": 12910 }, { "epoch": 0.43, "grad_norm": 0.44537389278411865, "learning_rate": 0.0005701570596521068, "loss": 1.9219, "step": 12911 }, { "epoch": 0.43, "grad_norm": 0.43725308775901794, "learning_rate": 0.0005701525148318335, "loss": 1.9181, "step": 12912 }, { "epoch": 0.43, "grad_norm": 0.4372587203979492, "learning_rate": 0.000570147969683635, "loss": 1.9143, "step": 12913 }, { "epoch": 0.43, "grad_norm": 0.4427453279495239, "learning_rate": 0.0005701434242075167, "loss": 1.8246, "step": 12914 }, { "epoch": 0.43, "grad_norm": 0.45283564925193787, "learning_rate": 0.0005701388784034842, "loss": 1.8868, "step": 12915 }, { "epoch": 0.43, "grad_norm": 0.4594733417034149, "learning_rate": 0.0005701343322715428, "loss": 1.9364, "step": 12916 }, { "epoch": 0.43, "grad_norm": 0.44124114513397217, "learning_rate": 0.0005701297858116981, "loss": 1.8865, "step": 12917 }, { "epoch": 0.43, "grad_norm": 0.44436851143836975, "learning_rate": 0.0005701252390239557, "loss": 1.8844, "step": 12918 }, { "epoch": 0.43, "grad_norm": 0.46447429060935974, "learning_rate": 0.0005701206919083209, "loss": 1.895, "step": 12919 }, { "epoch": 0.43, "grad_norm": 0.4399833679199219, "learning_rate": 0.0005701161444647996, "loss": 1.9234, "step": 12920 }, { "epoch": 0.43, "grad_norm": 0.4554622769355774, "learning_rate": 0.0005701115966933969, "loss": 1.907, "step": 12921 }, { "epoch": 0.43, "grad_norm": 0.43567371368408203, "learning_rate": 0.0005701070485941187, "loss": 1.8757, "step": 12922 }, { "epoch": 0.43, "grad_norm": 0.4307953417301178, "learning_rate": 0.0005701025001669702, "loss": 1.8651, "step": 12923 }, { "epoch": 0.43, "grad_norm": 0.4376070499420166, "learning_rate": 0.0005700979514119571, "loss": 1.885, "step": 12924 }, { "epoch": 0.43, "grad_norm": 0.4510735869407654, "learning_rate": 0.000570093402329085, "loss": 1.9642, "step": 12925 }, { "epoch": 0.43, "grad_norm": 0.44266074895858765, "learning_rate": 0.0005700888529183593, "loss": 1.9351, "step": 12926 }, { "epoch": 0.43, "grad_norm": 0.4740963280200958, "learning_rate": 0.0005700843031797853, "loss": 1.8863, "step": 12927 }, { "epoch": 0.43, "grad_norm": 0.4296816289424896, "learning_rate": 0.0005700797531133689, "loss": 1.9155, "step": 12928 }, { "epoch": 0.43, "grad_norm": 0.4655599594116211, "learning_rate": 0.0005700752027191155, "loss": 1.9267, "step": 12929 }, { "epoch": 0.43, "grad_norm": 0.4569566249847412, "learning_rate": 0.0005700706519970306, "loss": 1.8629, "step": 12930 }, { "epoch": 0.43, "grad_norm": 0.4599647521972656, "learning_rate": 0.0005700661009471197, "loss": 2.0017, "step": 12931 }, { "epoch": 0.43, "grad_norm": 0.48014870285987854, "learning_rate": 0.0005700615495693883, "loss": 1.9172, "step": 12932 }, { "epoch": 0.43, "grad_norm": 0.4365234076976776, "learning_rate": 0.000570056997863842, "loss": 1.911, "step": 12933 }, { "epoch": 0.43, "grad_norm": 0.4294091761112213, "learning_rate": 0.0005700524458304863, "loss": 1.8446, "step": 12934 }, { "epoch": 0.43, "grad_norm": 0.4504241347312927, "learning_rate": 0.0005700478934693267, "loss": 1.9576, "step": 12935 }, { "epoch": 0.43, "grad_norm": 0.44943666458129883, "learning_rate": 0.0005700433407803688, "loss": 1.9401, "step": 12936 }, { "epoch": 0.43, "grad_norm": 0.46129244565963745, "learning_rate": 0.0005700387877636179, "loss": 1.8296, "step": 12937 }, { "epoch": 0.43, "grad_norm": 0.42743927240371704, "learning_rate": 0.0005700342344190799, "loss": 1.8585, "step": 12938 }, { "epoch": 0.43, "grad_norm": 0.429455041885376, "learning_rate": 0.0005700296807467602, "loss": 1.8368, "step": 12939 }, { "epoch": 0.43, "grad_norm": 0.4527624845504761, "learning_rate": 0.0005700251267466641, "loss": 1.9075, "step": 12940 }, { "epoch": 0.43, "grad_norm": 0.460354745388031, "learning_rate": 0.0005700205724187972, "loss": 1.9012, "step": 12941 }, { "epoch": 0.43, "grad_norm": 0.432111531496048, "learning_rate": 0.0005700160177631652, "loss": 1.7829, "step": 12942 }, { "epoch": 0.43, "grad_norm": 0.4628280997276306, "learning_rate": 0.0005700114627797735, "loss": 1.8196, "step": 12943 }, { "epoch": 0.43, "grad_norm": 0.43772971630096436, "learning_rate": 0.0005700069074686278, "loss": 1.9084, "step": 12944 }, { "epoch": 0.43, "grad_norm": 0.512614905834198, "learning_rate": 0.0005700023518297334, "loss": 1.91, "step": 12945 }, { "epoch": 0.43, "grad_norm": 0.4357858896255493, "learning_rate": 0.000569997795863096, "loss": 1.8625, "step": 12946 }, { "epoch": 0.43, "grad_norm": 0.43233799934387207, "learning_rate": 0.000569993239568721, "loss": 1.8858, "step": 12947 }, { "epoch": 0.43, "grad_norm": 0.4388788640499115, "learning_rate": 0.0005699886829466141, "loss": 1.8509, "step": 12948 }, { "epoch": 0.43, "grad_norm": 0.4285619854927063, "learning_rate": 0.0005699841259967804, "loss": 1.8794, "step": 12949 }, { "epoch": 0.43, "grad_norm": 0.4278552532196045, "learning_rate": 0.000569979568719226, "loss": 1.8915, "step": 12950 }, { "epoch": 0.43, "grad_norm": 0.43898698687553406, "learning_rate": 0.0005699750111139562, "loss": 1.7997, "step": 12951 }, { "epoch": 0.43, "grad_norm": 0.4426198899745941, "learning_rate": 0.0005699704531809765, "loss": 1.8269, "step": 12952 }, { "epoch": 0.43, "grad_norm": 0.5329222679138184, "learning_rate": 0.0005699658949202924, "loss": 1.8857, "step": 12953 }, { "epoch": 0.43, "grad_norm": 0.45378315448760986, "learning_rate": 0.0005699613363319095, "loss": 1.8877, "step": 12954 }, { "epoch": 0.43, "grad_norm": 0.4517758786678314, "learning_rate": 0.0005699567774158334, "loss": 1.8907, "step": 12955 }, { "epoch": 0.43, "grad_norm": 0.4315699338912964, "learning_rate": 0.0005699522181720694, "loss": 1.8796, "step": 12956 }, { "epoch": 0.43, "grad_norm": 0.4191713333129883, "learning_rate": 0.0005699476586006233, "loss": 1.9279, "step": 12957 }, { "epoch": 0.43, "grad_norm": 0.43932250142097473, "learning_rate": 0.0005699430987015004, "loss": 1.8332, "step": 12958 }, { "epoch": 0.43, "grad_norm": 0.4492093622684479, "learning_rate": 0.0005699385384747063, "loss": 1.8119, "step": 12959 }, { "epoch": 0.43, "grad_norm": 0.4290931224822998, "learning_rate": 0.0005699339779202468, "loss": 1.8982, "step": 12960 }, { "epoch": 0.43, "grad_norm": 0.4289145767688751, "learning_rate": 0.0005699294170381271, "loss": 1.8484, "step": 12961 }, { "epoch": 0.43, "grad_norm": 0.44275301694869995, "learning_rate": 0.0005699248558283528, "loss": 1.8685, "step": 12962 }, { "epoch": 0.43, "grad_norm": 0.4392007887363434, "learning_rate": 0.0005699202942909296, "loss": 1.8212, "step": 12963 }, { "epoch": 0.43, "grad_norm": 0.43899381160736084, "learning_rate": 0.000569915732425863, "loss": 1.8213, "step": 12964 }, { "epoch": 0.43, "grad_norm": 0.4329622983932495, "learning_rate": 0.0005699111702331583, "loss": 1.817, "step": 12965 }, { "epoch": 0.43, "grad_norm": 0.4379563331604004, "learning_rate": 0.0005699066077128213, "loss": 1.844, "step": 12966 }, { "epoch": 0.43, "grad_norm": 0.41919630765914917, "learning_rate": 0.0005699020448648575, "loss": 1.8797, "step": 12967 }, { "epoch": 0.43, "grad_norm": 0.44693517684936523, "learning_rate": 0.0005698974816892722, "loss": 1.9296, "step": 12968 }, { "epoch": 0.43, "grad_norm": 0.43234702944755554, "learning_rate": 0.0005698929181860713, "loss": 1.8374, "step": 12969 }, { "epoch": 0.43, "grad_norm": 0.4210466742515564, "learning_rate": 0.0005698883543552601, "loss": 1.8403, "step": 12970 }, { "epoch": 0.43, "grad_norm": 0.43108949065208435, "learning_rate": 0.0005698837901968442, "loss": 1.9582, "step": 12971 }, { "epoch": 0.43, "grad_norm": 0.4352577328681946, "learning_rate": 0.0005698792257108292, "loss": 1.8698, "step": 12972 }, { "epoch": 0.43, "grad_norm": 0.4480637013912201, "learning_rate": 0.0005698746608972206, "loss": 1.8031, "step": 12973 }, { "epoch": 0.43, "grad_norm": 0.4324115216732025, "learning_rate": 0.0005698700957560239, "loss": 1.8266, "step": 12974 }, { "epoch": 0.43, "grad_norm": 0.4230906367301941, "learning_rate": 0.0005698655302872446, "loss": 1.8837, "step": 12975 }, { "epoch": 0.43, "grad_norm": 0.4358299970626831, "learning_rate": 0.0005698609644908882, "loss": 1.8994, "step": 12976 }, { "epoch": 0.43, "grad_norm": 0.43987125158309937, "learning_rate": 0.0005698563983669607, "loss": 1.8788, "step": 12977 }, { "epoch": 0.43, "grad_norm": 0.44655394554138184, "learning_rate": 0.0005698518319154671, "loss": 1.9224, "step": 12978 }, { "epoch": 0.43, "grad_norm": 0.42661771178245544, "learning_rate": 0.0005698472651364131, "loss": 1.9327, "step": 12979 }, { "epoch": 0.43, "grad_norm": 0.43681371212005615, "learning_rate": 0.0005698426980298043, "loss": 1.8116, "step": 12980 }, { "epoch": 0.43, "grad_norm": 0.4561691880226135, "learning_rate": 0.0005698381305956462, "loss": 1.8702, "step": 12981 }, { "epoch": 0.43, "grad_norm": 0.4463098347187042, "learning_rate": 0.0005698335628339444, "loss": 1.9776, "step": 12982 }, { "epoch": 0.43, "grad_norm": 0.42676761746406555, "learning_rate": 0.0005698289947447044, "loss": 1.78, "step": 12983 }, { "epoch": 0.43, "grad_norm": 0.42797547578811646, "learning_rate": 0.0005698244263279317, "loss": 1.8591, "step": 12984 }, { "epoch": 0.43, "grad_norm": 0.42383602261543274, "learning_rate": 0.0005698198575836321, "loss": 1.8322, "step": 12985 }, { "epoch": 0.43, "grad_norm": 0.42580872774124146, "learning_rate": 0.0005698152885118108, "loss": 1.8525, "step": 12986 }, { "epoch": 0.43, "grad_norm": 0.462041437625885, "learning_rate": 0.0005698107191124735, "loss": 1.983, "step": 12987 }, { "epoch": 0.43, "grad_norm": 0.4509715139865875, "learning_rate": 0.0005698061493856258, "loss": 1.8646, "step": 12988 }, { "epoch": 0.43, "grad_norm": 0.42998671531677246, "learning_rate": 0.0005698015793312732, "loss": 1.855, "step": 12989 }, { "epoch": 0.43, "grad_norm": 0.41819170117378235, "learning_rate": 0.0005697970089494211, "loss": 1.8603, "step": 12990 }, { "epoch": 0.43, "grad_norm": 0.4302747845649719, "learning_rate": 0.0005697924382400754, "loss": 1.916, "step": 12991 }, { "epoch": 0.43, "grad_norm": 0.45725470781326294, "learning_rate": 0.0005697878672032412, "loss": 1.8894, "step": 12992 }, { "epoch": 0.43, "grad_norm": 0.4568396508693695, "learning_rate": 0.0005697832958389244, "loss": 1.8853, "step": 12993 }, { "epoch": 0.43, "grad_norm": 0.44020363688468933, "learning_rate": 0.0005697787241471303, "loss": 1.8325, "step": 12994 }, { "epoch": 0.43, "grad_norm": 0.4837943911552429, "learning_rate": 0.0005697741521278648, "loss": 1.9235, "step": 12995 }, { "epoch": 0.43, "grad_norm": 0.41888904571533203, "learning_rate": 0.0005697695797811331, "loss": 1.8752, "step": 12996 }, { "epoch": 0.43, "grad_norm": 0.422806978225708, "learning_rate": 0.0005697650071069409, "loss": 1.8915, "step": 12997 }, { "epoch": 0.43, "grad_norm": 0.43523627519607544, "learning_rate": 0.0005697604341052937, "loss": 1.889, "step": 12998 }, { "epoch": 0.43, "grad_norm": 0.45294544100761414, "learning_rate": 0.0005697558607761972, "loss": 1.86, "step": 12999 }, { "epoch": 0.43, "grad_norm": 0.4537821412086487, "learning_rate": 0.0005697512871196567, "loss": 1.8933, "step": 13000 }, { "epoch": 0.43, "grad_norm": 0.44843974709510803, "learning_rate": 0.0005697467131356779, "loss": 1.8385, "step": 13001 }, { "epoch": 0.43, "grad_norm": 0.4504013657569885, "learning_rate": 0.0005697421388242664, "loss": 1.8644, "step": 13002 }, { "epoch": 0.43, "grad_norm": 0.42675191164016724, "learning_rate": 0.0005697375641854276, "loss": 1.8631, "step": 13003 }, { "epoch": 0.43, "grad_norm": 0.4480161964893341, "learning_rate": 0.0005697329892191672, "loss": 1.8971, "step": 13004 }, { "epoch": 0.43, "grad_norm": 0.4314371943473816, "learning_rate": 0.0005697284139254907, "loss": 1.9037, "step": 13005 }, { "epoch": 0.43, "grad_norm": 0.4489063322544098, "learning_rate": 0.0005697238383044036, "loss": 1.8467, "step": 13006 }, { "epoch": 0.43, "grad_norm": 0.4567691385746002, "learning_rate": 0.0005697192623559115, "loss": 1.8445, "step": 13007 }, { "epoch": 0.43, "grad_norm": 0.4399265646934509, "learning_rate": 0.0005697146860800201, "loss": 1.8086, "step": 13008 }, { "epoch": 0.43, "grad_norm": 0.4569454789161682, "learning_rate": 0.0005697101094767346, "loss": 1.8652, "step": 13009 }, { "epoch": 0.43, "grad_norm": 0.44094640016555786, "learning_rate": 0.000569705532546061, "loss": 1.8292, "step": 13010 }, { "epoch": 0.43, "grad_norm": 0.4555749297142029, "learning_rate": 0.0005697009552880044, "loss": 1.8669, "step": 13011 }, { "epoch": 0.43, "grad_norm": 0.4182819724082947, "learning_rate": 0.0005696963777025707, "loss": 1.7848, "step": 13012 }, { "epoch": 0.43, "grad_norm": 0.44983911514282227, "learning_rate": 0.0005696917997897652, "loss": 1.8498, "step": 13013 }, { "epoch": 0.43, "grad_norm": 0.448350727558136, "learning_rate": 0.0005696872215495937, "loss": 1.8482, "step": 13014 }, { "epoch": 0.43, "grad_norm": 0.4530375599861145, "learning_rate": 0.0005696826429820616, "loss": 1.9591, "step": 13015 }, { "epoch": 0.43, "grad_norm": 0.4206078350543976, "learning_rate": 0.0005696780640871747, "loss": 1.849, "step": 13016 }, { "epoch": 0.43, "grad_norm": 0.45168909430503845, "learning_rate": 0.0005696734848649382, "loss": 1.9036, "step": 13017 }, { "epoch": 0.43, "grad_norm": 0.8112386465072632, "learning_rate": 0.0005696689053153578, "loss": 1.8811, "step": 13018 }, { "epoch": 0.43, "grad_norm": 0.4404489994049072, "learning_rate": 0.0005696643254384391, "loss": 1.8675, "step": 13019 }, { "epoch": 0.43, "grad_norm": 0.46280068159103394, "learning_rate": 0.0005696597452341878, "loss": 1.8882, "step": 13020 }, { "epoch": 0.43, "grad_norm": 0.4306124746799469, "learning_rate": 0.000569655164702609, "loss": 1.8757, "step": 13021 }, { "epoch": 0.43, "grad_norm": 0.45386847853660583, "learning_rate": 0.0005696505838437088, "loss": 1.8584, "step": 13022 }, { "epoch": 0.43, "grad_norm": 0.46494460105895996, "learning_rate": 0.0005696460026574925, "loss": 1.8964, "step": 13023 }, { "epoch": 0.43, "grad_norm": 0.47475290298461914, "learning_rate": 0.0005696414211439657, "loss": 1.9295, "step": 13024 }, { "epoch": 0.43, "grad_norm": 0.45512035489082336, "learning_rate": 0.0005696368393031338, "loss": 1.932, "step": 13025 }, { "epoch": 0.43, "grad_norm": 0.4490619897842407, "learning_rate": 0.0005696322571350026, "loss": 1.9199, "step": 13026 }, { "epoch": 0.43, "grad_norm": 0.47581517696380615, "learning_rate": 0.0005696276746395776, "loss": 1.8214, "step": 13027 }, { "epoch": 0.43, "grad_norm": 0.44288671016693115, "learning_rate": 0.0005696230918168642, "loss": 1.8563, "step": 13028 }, { "epoch": 0.43, "grad_norm": 0.4541473686695099, "learning_rate": 0.0005696185086668682, "loss": 1.8511, "step": 13029 }, { "epoch": 0.43, "grad_norm": 0.4212682247161865, "learning_rate": 0.0005696139251895951, "loss": 1.8181, "step": 13030 }, { "epoch": 0.43, "grad_norm": 0.44901740550994873, "learning_rate": 0.0005696093413850503, "loss": 1.92, "step": 13031 }, { "epoch": 0.43, "grad_norm": 0.44394078850746155, "learning_rate": 0.0005696047572532395, "loss": 1.9034, "step": 13032 }, { "epoch": 0.43, "grad_norm": 0.4409829080104828, "learning_rate": 0.0005696001727941684, "loss": 1.8881, "step": 13033 }, { "epoch": 0.43, "grad_norm": 0.46553000807762146, "learning_rate": 0.0005695955880078423, "loss": 1.801, "step": 13034 }, { "epoch": 0.43, "grad_norm": 0.44798725843429565, "learning_rate": 0.0005695910028942669, "loss": 1.9025, "step": 13035 }, { "epoch": 0.43, "grad_norm": 0.43805164098739624, "learning_rate": 0.0005695864174534476, "loss": 1.866, "step": 13036 }, { "epoch": 0.43, "grad_norm": 0.42249470949172974, "learning_rate": 0.0005695818316853904, "loss": 1.9199, "step": 13037 }, { "epoch": 0.43, "grad_norm": 0.42727434635162354, "learning_rate": 0.0005695772455901004, "loss": 1.885, "step": 13038 }, { "epoch": 0.43, "grad_norm": 0.4406639337539673, "learning_rate": 0.0005695726591675833, "loss": 1.8377, "step": 13039 }, { "epoch": 0.43, "grad_norm": 0.42685818672180176, "learning_rate": 0.0005695680724178448, "loss": 1.8585, "step": 13040 }, { "epoch": 0.43, "grad_norm": 0.4448455274105072, "learning_rate": 0.0005695634853408903, "loss": 1.9164, "step": 13041 }, { "epoch": 0.43, "grad_norm": 0.4507567882537842, "learning_rate": 0.0005695588979367256, "loss": 1.8991, "step": 13042 }, { "epoch": 0.43, "grad_norm": 0.4384984076023102, "learning_rate": 0.000569554310205356, "loss": 1.9267, "step": 13043 }, { "epoch": 0.43, "grad_norm": 0.42639297246932983, "learning_rate": 0.0005695497221467872, "loss": 1.8623, "step": 13044 }, { "epoch": 0.43, "grad_norm": 0.41879522800445557, "learning_rate": 0.0005695451337610247, "loss": 1.8616, "step": 13045 }, { "epoch": 0.43, "grad_norm": 0.44006991386413574, "learning_rate": 0.0005695405450480743, "loss": 1.8225, "step": 13046 }, { "epoch": 0.43, "grad_norm": 0.4382479190826416, "learning_rate": 0.0005695359560079411, "loss": 1.8127, "step": 13047 }, { "epoch": 0.43, "grad_norm": 0.43748149275779724, "learning_rate": 0.0005695313666406312, "loss": 1.7641, "step": 13048 }, { "epoch": 0.43, "grad_norm": 0.44313156604766846, "learning_rate": 0.0005695267769461499, "loss": 1.8918, "step": 13049 }, { "epoch": 0.43, "grad_norm": 0.46193310618400574, "learning_rate": 0.0005695221869245028, "loss": 1.9747, "step": 13050 }, { "epoch": 0.43, "grad_norm": 0.43147343397140503, "learning_rate": 0.0005695175965756954, "loss": 1.8257, "step": 13051 }, { "epoch": 0.43, "grad_norm": 0.42964115738868713, "learning_rate": 0.0005695130058997334, "loss": 1.879, "step": 13052 }, { "epoch": 0.43, "grad_norm": 0.4359276294708252, "learning_rate": 0.0005695084148966223, "loss": 1.8218, "step": 13053 }, { "epoch": 0.43, "grad_norm": 0.45587673783302307, "learning_rate": 0.0005695038235663677, "loss": 1.8518, "step": 13054 }, { "epoch": 0.43, "grad_norm": 0.43316400051116943, "learning_rate": 0.0005694992319089752, "loss": 1.9219, "step": 13055 }, { "epoch": 0.43, "grad_norm": 0.4360576570034027, "learning_rate": 0.0005694946399244504, "loss": 1.8324, "step": 13056 }, { "epoch": 0.43, "grad_norm": 0.4529059827327728, "learning_rate": 0.0005694900476127987, "loss": 1.9, "step": 13057 }, { "epoch": 0.43, "grad_norm": 0.4572261571884155, "learning_rate": 0.0005694854549740258, "loss": 1.9314, "step": 13058 }, { "epoch": 0.43, "grad_norm": 0.43905988335609436, "learning_rate": 0.0005694808620081374, "loss": 1.8516, "step": 13059 }, { "epoch": 0.43, "grad_norm": 0.4475623369216919, "learning_rate": 0.0005694762687151388, "loss": 1.8193, "step": 13060 }, { "epoch": 0.43, "grad_norm": 0.45876529812812805, "learning_rate": 0.0005694716750950358, "loss": 1.8384, "step": 13061 }, { "epoch": 0.43, "grad_norm": 0.4275914430618286, "learning_rate": 0.0005694670811478337, "loss": 1.8402, "step": 13062 }, { "epoch": 0.43, "grad_norm": 0.45482978224754333, "learning_rate": 0.0005694624868735385, "loss": 1.8904, "step": 13063 }, { "epoch": 0.43, "grad_norm": 0.4335601329803467, "learning_rate": 0.0005694578922721555, "loss": 1.7818, "step": 13064 }, { "epoch": 0.43, "grad_norm": 0.4344848692417145, "learning_rate": 0.0005694532973436902, "loss": 1.8217, "step": 13065 }, { "epoch": 0.43, "grad_norm": 0.4242386221885681, "learning_rate": 0.0005694487020881484, "loss": 1.8517, "step": 13066 }, { "epoch": 0.43, "grad_norm": 0.439527690410614, "learning_rate": 0.0005694441065055355, "loss": 1.8311, "step": 13067 }, { "epoch": 0.43, "grad_norm": 0.436833918094635, "learning_rate": 0.0005694395105958572, "loss": 1.9676, "step": 13068 }, { "epoch": 0.43, "grad_norm": 0.5242483019828796, "learning_rate": 0.0005694349143591191, "loss": 1.8914, "step": 13069 }, { "epoch": 0.43, "grad_norm": 0.42854467034339905, "learning_rate": 0.0005694303177953266, "loss": 1.883, "step": 13070 }, { "epoch": 0.43, "grad_norm": 0.43444615602493286, "learning_rate": 0.0005694257209044854, "loss": 1.9129, "step": 13071 }, { "epoch": 0.43, "grad_norm": 0.4435056746006012, "learning_rate": 0.0005694211236866012, "loss": 1.8646, "step": 13072 }, { "epoch": 0.43, "grad_norm": 0.4197748005390167, "learning_rate": 0.0005694165261416794, "loss": 1.9071, "step": 13073 }, { "epoch": 0.43, "grad_norm": 0.42492854595184326, "learning_rate": 0.0005694119282697255, "loss": 1.8134, "step": 13074 }, { "epoch": 0.44, "grad_norm": 0.45121586322784424, "learning_rate": 0.0005694073300707454, "loss": 1.8577, "step": 13075 }, { "epoch": 0.44, "grad_norm": 0.4322241544723511, "learning_rate": 0.0005694027315447445, "loss": 1.7884, "step": 13076 }, { "epoch": 0.44, "grad_norm": 0.4306427836418152, "learning_rate": 0.0005693981326917282, "loss": 1.895, "step": 13077 }, { "epoch": 0.44, "grad_norm": 0.4308992326259613, "learning_rate": 0.0005693935335117023, "loss": 1.9218, "step": 13078 }, { "epoch": 0.44, "grad_norm": 0.42649132013320923, "learning_rate": 0.0005693889340046724, "loss": 1.8923, "step": 13079 }, { "epoch": 0.44, "grad_norm": 0.4258394241333008, "learning_rate": 0.0005693843341706441, "loss": 1.8547, "step": 13080 }, { "epoch": 0.44, "grad_norm": 0.433414101600647, "learning_rate": 0.0005693797340096229, "loss": 1.8691, "step": 13081 }, { "epoch": 0.44, "grad_norm": 0.430864155292511, "learning_rate": 0.0005693751335216142, "loss": 1.7482, "step": 13082 }, { "epoch": 0.44, "grad_norm": 0.43946927785873413, "learning_rate": 0.000569370532706624, "loss": 1.8279, "step": 13083 }, { "epoch": 0.44, "grad_norm": 0.426238477230072, "learning_rate": 0.0005693659315646576, "loss": 1.9459, "step": 13084 }, { "epoch": 0.44, "grad_norm": 0.4342706501483917, "learning_rate": 0.0005693613300957206, "loss": 1.9125, "step": 13085 }, { "epoch": 0.44, "grad_norm": 0.4286515414714813, "learning_rate": 0.0005693567282998187, "loss": 1.9101, "step": 13086 }, { "epoch": 0.44, "grad_norm": 0.43051350116729736, "learning_rate": 0.0005693521261769573, "loss": 1.8575, "step": 13087 }, { "epoch": 0.44, "grad_norm": 0.45163285732269287, "learning_rate": 0.0005693475237271423, "loss": 1.8612, "step": 13088 }, { "epoch": 0.44, "grad_norm": 0.44117167592048645, "learning_rate": 0.0005693429209503789, "loss": 1.9095, "step": 13089 }, { "epoch": 0.44, "grad_norm": 0.4495287537574768, "learning_rate": 0.000569338317846673, "loss": 1.9422, "step": 13090 }, { "epoch": 0.44, "grad_norm": 0.42503005266189575, "learning_rate": 0.00056933371441603, "loss": 1.8496, "step": 13091 }, { "epoch": 0.44, "grad_norm": 0.4311065673828125, "learning_rate": 0.0005693291106584555, "loss": 1.8487, "step": 13092 }, { "epoch": 0.44, "grad_norm": 0.43465089797973633, "learning_rate": 0.0005693245065739553, "loss": 1.846, "step": 13093 }, { "epoch": 0.44, "grad_norm": 0.429799348115921, "learning_rate": 0.0005693199021625348, "loss": 1.8739, "step": 13094 }, { "epoch": 0.44, "grad_norm": 0.44621381163597107, "learning_rate": 0.0005693152974241995, "loss": 1.9578, "step": 13095 }, { "epoch": 0.44, "grad_norm": 0.4215291440486908, "learning_rate": 0.0005693106923589552, "loss": 1.8481, "step": 13096 }, { "epoch": 0.44, "grad_norm": 0.4311397671699524, "learning_rate": 0.0005693060869668073, "loss": 1.8855, "step": 13097 }, { "epoch": 0.44, "grad_norm": 0.432790070772171, "learning_rate": 0.0005693014812477616, "loss": 1.8772, "step": 13098 }, { "epoch": 0.44, "grad_norm": 0.4415980577468872, "learning_rate": 0.0005692968752018236, "loss": 1.8967, "step": 13099 }, { "epoch": 0.44, "grad_norm": 0.42676231265068054, "learning_rate": 0.0005692922688289989, "loss": 1.8641, "step": 13100 }, { "epoch": 0.44, "grad_norm": 0.434996098279953, "learning_rate": 0.0005692876621292929, "loss": 1.8825, "step": 13101 }, { "epoch": 0.44, "grad_norm": 0.44723930954933167, "learning_rate": 0.0005692830551027115, "loss": 1.9358, "step": 13102 }, { "epoch": 0.44, "grad_norm": 0.44840070605278015, "learning_rate": 0.0005692784477492601, "loss": 1.8123, "step": 13103 }, { "epoch": 0.44, "grad_norm": 0.4412592351436615, "learning_rate": 0.0005692738400689443, "loss": 1.8735, "step": 13104 }, { "epoch": 0.44, "grad_norm": 0.44352105259895325, "learning_rate": 0.0005692692320617698, "loss": 1.922, "step": 13105 }, { "epoch": 0.44, "grad_norm": 0.4284074604511261, "learning_rate": 0.0005692646237277422, "loss": 1.8904, "step": 13106 }, { "epoch": 0.44, "grad_norm": 0.45031994581222534, "learning_rate": 0.0005692600150668668, "loss": 1.8288, "step": 13107 }, { "epoch": 0.44, "grad_norm": 0.42994454503059387, "learning_rate": 0.0005692554060791496, "loss": 1.8395, "step": 13108 }, { "epoch": 0.44, "grad_norm": 0.440167635679245, "learning_rate": 0.0005692507967645959, "loss": 1.9309, "step": 13109 }, { "epoch": 0.44, "grad_norm": 0.43958693742752075, "learning_rate": 0.0005692461871232115, "loss": 1.8834, "step": 13110 }, { "epoch": 0.44, "grad_norm": 0.4298063814640045, "learning_rate": 0.0005692415771550018, "loss": 1.8904, "step": 13111 }, { "epoch": 0.44, "grad_norm": 0.4567287564277649, "learning_rate": 0.0005692369668599726, "loss": 1.8524, "step": 13112 }, { "epoch": 0.44, "grad_norm": 0.4167679250240326, "learning_rate": 0.0005692323562381293, "loss": 1.8361, "step": 13113 }, { "epoch": 0.44, "grad_norm": 0.4361061453819275, "learning_rate": 0.0005692277452894777, "loss": 1.8511, "step": 13114 }, { "epoch": 0.44, "grad_norm": 0.45471033453941345, "learning_rate": 0.0005692231340140232, "loss": 1.952, "step": 13115 }, { "epoch": 0.44, "grad_norm": 0.4248030185699463, "learning_rate": 0.0005692185224117714, "loss": 1.8506, "step": 13116 }, { "epoch": 0.44, "grad_norm": 0.4254401624202728, "learning_rate": 0.0005692139104827281, "loss": 1.9228, "step": 13117 }, { "epoch": 0.44, "grad_norm": 0.4539600908756256, "learning_rate": 0.0005692092982268989, "loss": 1.8804, "step": 13118 }, { "epoch": 0.44, "grad_norm": 0.43980106711387634, "learning_rate": 0.0005692046856442891, "loss": 1.8824, "step": 13119 }, { "epoch": 0.44, "grad_norm": 0.4279392659664154, "learning_rate": 0.0005692000727349044, "loss": 1.8566, "step": 13120 }, { "epoch": 0.44, "grad_norm": 0.43817389011383057, "learning_rate": 0.0005691954594987507, "loss": 1.8682, "step": 13121 }, { "epoch": 0.44, "grad_norm": 0.4454922378063202, "learning_rate": 0.0005691908459358332, "loss": 1.7868, "step": 13122 }, { "epoch": 0.44, "grad_norm": 0.444642037153244, "learning_rate": 0.0005691862320461577, "loss": 1.8276, "step": 13123 }, { "epoch": 0.44, "grad_norm": 0.49642619490623474, "learning_rate": 0.00056918161782973, "loss": 1.9003, "step": 13124 }, { "epoch": 0.44, "grad_norm": 0.4287252128124237, "learning_rate": 0.0005691770032865552, "loss": 1.8466, "step": 13125 }, { "epoch": 0.44, "grad_norm": 0.45401036739349365, "learning_rate": 0.0005691723884166392, "loss": 1.8472, "step": 13126 }, { "epoch": 0.44, "grad_norm": 0.436269074678421, "learning_rate": 0.0005691677732199877, "loss": 1.9189, "step": 13127 }, { "epoch": 0.44, "grad_norm": 0.43546393513679504, "learning_rate": 0.0005691631576966061, "loss": 1.832, "step": 13128 }, { "epoch": 0.44, "grad_norm": 0.4275013506412506, "learning_rate": 0.0005691585418465002, "loss": 1.8991, "step": 13129 }, { "epoch": 0.44, "grad_norm": 0.43762534856796265, "learning_rate": 0.0005691539256696754, "loss": 1.8973, "step": 13130 }, { "epoch": 0.44, "grad_norm": 0.43770816922187805, "learning_rate": 0.0005691493091661375, "loss": 1.8264, "step": 13131 }, { "epoch": 0.44, "grad_norm": 0.4330645799636841, "learning_rate": 0.0005691446923358919, "loss": 1.8859, "step": 13132 }, { "epoch": 0.44, "grad_norm": 0.4324192702770233, "learning_rate": 0.0005691400751789442, "loss": 1.8597, "step": 13133 }, { "epoch": 0.44, "grad_norm": 0.44011008739471436, "learning_rate": 0.0005691354576953003, "loss": 1.8873, "step": 13134 }, { "epoch": 0.44, "grad_norm": 0.42646458745002747, "learning_rate": 0.0005691308398849654, "loss": 1.9156, "step": 13135 }, { "epoch": 0.44, "grad_norm": 0.46519145369529724, "learning_rate": 0.0005691262217479455, "loss": 1.9285, "step": 13136 }, { "epoch": 0.44, "grad_norm": 0.5382168292999268, "learning_rate": 0.0005691216032842459, "loss": 1.917, "step": 13137 }, { "epoch": 0.44, "grad_norm": 0.4214857816696167, "learning_rate": 0.0005691169844938724, "loss": 1.833, "step": 13138 }, { "epoch": 0.44, "grad_norm": 0.439492791891098, "learning_rate": 0.0005691123653768306, "loss": 1.8466, "step": 13139 }, { "epoch": 0.44, "grad_norm": 0.46472683548927307, "learning_rate": 0.0005691077459331259, "loss": 1.8985, "step": 13140 }, { "epoch": 0.44, "grad_norm": 0.466427743434906, "learning_rate": 0.0005691031261627641, "loss": 1.9219, "step": 13141 }, { "epoch": 0.44, "grad_norm": 0.44090530276298523, "learning_rate": 0.0005690985060657508, "loss": 1.8361, "step": 13142 }, { "epoch": 0.44, "grad_norm": 0.43634214997291565, "learning_rate": 0.0005690938856420915, "loss": 1.891, "step": 13143 }, { "epoch": 0.44, "grad_norm": 0.4380830228328705, "learning_rate": 0.0005690892648917919, "loss": 1.8792, "step": 13144 }, { "epoch": 0.44, "grad_norm": 0.47527268528938293, "learning_rate": 0.0005690846438148576, "loss": 1.8893, "step": 13145 }, { "epoch": 0.44, "grad_norm": 0.45245620608329773, "learning_rate": 0.0005690800224112942, "loss": 1.8716, "step": 13146 }, { "epoch": 0.44, "grad_norm": 0.43894481658935547, "learning_rate": 0.0005690754006811073, "loss": 1.8437, "step": 13147 }, { "epoch": 0.44, "grad_norm": 0.4606027603149414, "learning_rate": 0.0005690707786243024, "loss": 1.8764, "step": 13148 }, { "epoch": 0.44, "grad_norm": 0.4399966895580292, "learning_rate": 0.0005690661562408853, "loss": 1.9193, "step": 13149 }, { "epoch": 0.44, "grad_norm": 0.4385955333709717, "learning_rate": 0.0005690615335308616, "loss": 1.885, "step": 13150 }, { "epoch": 0.44, "grad_norm": 0.44093167781829834, "learning_rate": 0.0005690569104942367, "loss": 1.8297, "step": 13151 }, { "epoch": 0.44, "grad_norm": 0.4326784014701843, "learning_rate": 0.0005690522871310165, "loss": 1.8427, "step": 13152 }, { "epoch": 0.44, "grad_norm": 0.4427151083946228, "learning_rate": 0.0005690476634412063, "loss": 1.7666, "step": 13153 }, { "epoch": 0.44, "grad_norm": 0.44872647523880005, "learning_rate": 0.000569043039424812, "loss": 1.9229, "step": 13154 }, { "epoch": 0.44, "grad_norm": 0.4540719985961914, "learning_rate": 0.000569038415081839, "loss": 1.8319, "step": 13155 }, { "epoch": 0.44, "grad_norm": 0.4453115165233612, "learning_rate": 0.0005690337904122931, "loss": 1.8131, "step": 13156 }, { "epoch": 0.44, "grad_norm": 0.4483800232410431, "learning_rate": 0.0005690291654161798, "loss": 1.896, "step": 13157 }, { "epoch": 0.44, "grad_norm": 0.441217303276062, "learning_rate": 0.0005690245400935048, "loss": 1.8197, "step": 13158 }, { "epoch": 0.44, "grad_norm": 0.4464579224586487, "learning_rate": 0.0005690199144442735, "loss": 1.8763, "step": 13159 }, { "epoch": 0.44, "grad_norm": 0.4279177188873291, "learning_rate": 0.0005690152884684917, "loss": 1.9404, "step": 13160 }, { "epoch": 0.44, "grad_norm": 0.4410872757434845, "learning_rate": 0.000569010662166165, "loss": 1.9393, "step": 13161 }, { "epoch": 0.44, "grad_norm": 0.4679694175720215, "learning_rate": 0.0005690060355372991, "loss": 1.9025, "step": 13162 }, { "epoch": 0.44, "grad_norm": 0.44074490666389465, "learning_rate": 0.0005690014085818993, "loss": 1.85, "step": 13163 }, { "epoch": 0.44, "grad_norm": 0.443646103143692, "learning_rate": 0.0005689967812999715, "loss": 1.925, "step": 13164 }, { "epoch": 0.44, "grad_norm": 0.4567164182662964, "learning_rate": 0.0005689921536915214, "loss": 1.9009, "step": 13165 }, { "epoch": 0.44, "grad_norm": 0.45200565457344055, "learning_rate": 0.0005689875257565543, "loss": 1.8699, "step": 13166 }, { "epoch": 0.44, "grad_norm": 0.43086671829223633, "learning_rate": 0.000568982897495076, "loss": 1.8739, "step": 13167 }, { "epoch": 0.44, "grad_norm": 0.452670156955719, "learning_rate": 0.0005689782689070921, "loss": 1.9754, "step": 13168 }, { "epoch": 0.44, "grad_norm": 0.4306098520755768, "learning_rate": 0.0005689736399926082, "loss": 1.8353, "step": 13169 }, { "epoch": 0.44, "grad_norm": 0.4367850422859192, "learning_rate": 0.00056896901075163, "loss": 1.919, "step": 13170 }, { "epoch": 0.44, "grad_norm": 0.4333650469779968, "learning_rate": 0.0005689643811841629, "loss": 1.9027, "step": 13171 }, { "epoch": 0.44, "grad_norm": 0.448870986700058, "learning_rate": 0.0005689597512902128, "loss": 1.9165, "step": 13172 }, { "epoch": 0.44, "grad_norm": 0.4314959645271301, "learning_rate": 0.0005689551210697852, "loss": 1.921, "step": 13173 }, { "epoch": 0.44, "grad_norm": 0.4463594853878021, "learning_rate": 0.0005689504905228856, "loss": 1.9384, "step": 13174 }, { "epoch": 0.44, "grad_norm": 0.4382988512516022, "learning_rate": 0.0005689458596495199, "loss": 1.8938, "step": 13175 }, { "epoch": 0.44, "grad_norm": 0.4310757517814636, "learning_rate": 0.0005689412284496935, "loss": 1.7906, "step": 13176 }, { "epoch": 0.44, "grad_norm": 0.4421815276145935, "learning_rate": 0.000568936596923412, "loss": 1.9025, "step": 13177 }, { "epoch": 0.44, "grad_norm": 0.4370836317539215, "learning_rate": 0.0005689319650706811, "loss": 1.8617, "step": 13178 }, { "epoch": 0.44, "grad_norm": 0.4506334066390991, "learning_rate": 0.0005689273328915065, "loss": 1.8563, "step": 13179 }, { "epoch": 0.44, "grad_norm": 0.4559342861175537, "learning_rate": 0.0005689227003858936, "loss": 1.8798, "step": 13180 }, { "epoch": 0.44, "grad_norm": 0.4450032711029053, "learning_rate": 0.0005689180675538483, "loss": 1.8831, "step": 13181 }, { "epoch": 0.44, "grad_norm": 0.4334452450275421, "learning_rate": 0.0005689134343953761, "loss": 1.8945, "step": 13182 }, { "epoch": 0.44, "grad_norm": 0.4553660452365875, "learning_rate": 0.0005689088009104826, "loss": 1.8177, "step": 13183 }, { "epoch": 0.44, "grad_norm": 0.45239654183387756, "learning_rate": 0.0005689041670991733, "loss": 1.9221, "step": 13184 }, { "epoch": 0.44, "grad_norm": 0.4221680164337158, "learning_rate": 0.0005688995329614541, "loss": 1.8257, "step": 13185 }, { "epoch": 0.44, "grad_norm": 0.4296249747276306, "learning_rate": 0.0005688948984973306, "loss": 1.8467, "step": 13186 }, { "epoch": 0.44, "grad_norm": 0.4452972412109375, "learning_rate": 0.0005688902637068081, "loss": 1.8494, "step": 13187 }, { "epoch": 0.44, "grad_norm": 0.43367642164230347, "learning_rate": 0.0005688856285898925, "loss": 1.8584, "step": 13188 }, { "epoch": 0.44, "grad_norm": 0.42884737253189087, "learning_rate": 0.0005688809931465894, "loss": 1.8643, "step": 13189 }, { "epoch": 0.44, "grad_norm": 0.43034878373146057, "learning_rate": 0.0005688763573769044, "loss": 1.8361, "step": 13190 }, { "epoch": 0.44, "grad_norm": 0.4395538568496704, "learning_rate": 0.0005688717212808431, "loss": 1.8484, "step": 13191 }, { "epoch": 0.44, "grad_norm": 0.43993258476257324, "learning_rate": 0.000568867084858411, "loss": 1.8388, "step": 13192 }, { "epoch": 0.44, "grad_norm": 0.44793784618377686, "learning_rate": 0.000568862448109614, "loss": 1.8066, "step": 13193 }, { "epoch": 0.44, "grad_norm": 0.45244598388671875, "learning_rate": 0.0005688578110344577, "loss": 1.9066, "step": 13194 }, { "epoch": 0.44, "grad_norm": 0.43228083848953247, "learning_rate": 0.0005688531736329475, "loss": 1.8731, "step": 13195 }, { "epoch": 0.44, "grad_norm": 0.4559875726699829, "learning_rate": 0.0005688485359050892, "loss": 1.9196, "step": 13196 }, { "epoch": 0.44, "grad_norm": 0.48618510365486145, "learning_rate": 0.0005688438978508883, "loss": 1.9165, "step": 13197 }, { "epoch": 0.44, "grad_norm": 0.4461090564727783, "learning_rate": 0.0005688392594703507, "loss": 1.9037, "step": 13198 }, { "epoch": 0.44, "grad_norm": 0.4528179168701172, "learning_rate": 0.0005688346207634817, "loss": 1.8459, "step": 13199 }, { "epoch": 0.44, "grad_norm": 0.6014165282249451, "learning_rate": 0.0005688299817302872, "loss": 1.9139, "step": 13200 }, { "epoch": 0.44, "grad_norm": 0.4440067708492279, "learning_rate": 0.0005688253423707726, "loss": 1.905, "step": 13201 }, { "epoch": 0.44, "grad_norm": 0.4607631266117096, "learning_rate": 0.0005688207026849437, "loss": 1.9166, "step": 13202 }, { "epoch": 0.44, "grad_norm": 0.44068941473960876, "learning_rate": 0.0005688160626728061, "loss": 1.9134, "step": 13203 }, { "epoch": 0.44, "grad_norm": 0.4602486491203308, "learning_rate": 0.0005688114223343654, "loss": 1.8754, "step": 13204 }, { "epoch": 0.44, "grad_norm": 0.4738864600658417, "learning_rate": 0.0005688067816696273, "loss": 1.8896, "step": 13205 }, { "epoch": 0.44, "grad_norm": 0.449743390083313, "learning_rate": 0.0005688021406785972, "loss": 1.8147, "step": 13206 }, { "epoch": 0.44, "grad_norm": 0.4386293590068817, "learning_rate": 0.0005687974993612811, "loss": 1.8588, "step": 13207 }, { "epoch": 0.44, "grad_norm": 0.4345134198665619, "learning_rate": 0.0005687928577176842, "loss": 1.8273, "step": 13208 }, { "epoch": 0.44, "grad_norm": 0.4418821632862091, "learning_rate": 0.0005687882157478126, "loss": 1.8598, "step": 13209 }, { "epoch": 0.44, "grad_norm": 0.4345756769180298, "learning_rate": 0.0005687835734516717, "loss": 1.8603, "step": 13210 }, { "epoch": 0.44, "grad_norm": 0.43838176131248474, "learning_rate": 0.000568778930829267, "loss": 1.8758, "step": 13211 }, { "epoch": 0.44, "grad_norm": 0.4810771644115448, "learning_rate": 0.0005687742878806044, "loss": 1.9397, "step": 13212 }, { "epoch": 0.44, "grad_norm": 0.454789936542511, "learning_rate": 0.0005687696446056894, "loss": 1.7908, "step": 13213 }, { "epoch": 0.44, "grad_norm": 0.429538756608963, "learning_rate": 0.0005687650010045277, "loss": 1.8146, "step": 13214 }, { "epoch": 0.44, "grad_norm": 0.45715850591659546, "learning_rate": 0.0005687603570771248, "loss": 1.8915, "step": 13215 }, { "epoch": 0.44, "grad_norm": 0.4459233283996582, "learning_rate": 0.0005687557128234863, "loss": 1.933, "step": 13216 }, { "epoch": 0.44, "grad_norm": 0.44178131222724915, "learning_rate": 0.0005687510682436182, "loss": 1.856, "step": 13217 }, { "epoch": 0.44, "grad_norm": 0.441821426153183, "learning_rate": 0.0005687464233375258, "loss": 1.9069, "step": 13218 }, { "epoch": 0.44, "grad_norm": 0.43612590432167053, "learning_rate": 0.0005687417781052148, "loss": 1.8411, "step": 13219 }, { "epoch": 0.44, "grad_norm": 0.42369967699050903, "learning_rate": 0.0005687371325466909, "loss": 1.8969, "step": 13220 }, { "epoch": 0.44, "grad_norm": 0.4408564269542694, "learning_rate": 0.0005687324866619598, "loss": 1.9256, "step": 13221 }, { "epoch": 0.44, "grad_norm": 0.4453098475933075, "learning_rate": 0.000568727840451027, "loss": 1.8966, "step": 13222 }, { "epoch": 0.44, "grad_norm": 0.4221802353858948, "learning_rate": 0.0005687231939138982, "loss": 1.8556, "step": 13223 }, { "epoch": 0.44, "grad_norm": 0.45842111110687256, "learning_rate": 0.0005687185470505791, "loss": 1.8809, "step": 13224 }, { "epoch": 0.44, "grad_norm": 0.4302748739719391, "learning_rate": 0.0005687138998610752, "loss": 1.8983, "step": 13225 }, { "epoch": 0.44, "grad_norm": 0.44168782234191895, "learning_rate": 0.0005687092523453922, "loss": 1.8801, "step": 13226 }, { "epoch": 0.44, "grad_norm": 0.7932204604148865, "learning_rate": 0.0005687046045035357, "loss": 1.8766, "step": 13227 }, { "epoch": 0.44, "grad_norm": 0.45646506547927856, "learning_rate": 0.0005686999563355115, "loss": 1.8794, "step": 13228 }, { "epoch": 0.44, "grad_norm": 0.4311800003051758, "learning_rate": 0.0005686953078413252, "loss": 1.812, "step": 13229 }, { "epoch": 0.44, "grad_norm": 0.44408929347991943, "learning_rate": 0.0005686906590209823, "loss": 1.8761, "step": 13230 }, { "epoch": 0.44, "grad_norm": 0.46347859501838684, "learning_rate": 0.0005686860098744885, "loss": 1.9256, "step": 13231 }, { "epoch": 0.44, "grad_norm": 0.4381209909915924, "learning_rate": 0.0005686813604018497, "loss": 1.8705, "step": 13232 }, { "epoch": 0.44, "grad_norm": 0.41741666197776794, "learning_rate": 0.0005686767106030711, "loss": 1.8605, "step": 13233 }, { "epoch": 0.44, "grad_norm": 0.42595767974853516, "learning_rate": 0.0005686720604781586, "loss": 1.8866, "step": 13234 }, { "epoch": 0.44, "grad_norm": 0.4525131583213806, "learning_rate": 0.0005686674100271177, "loss": 1.7729, "step": 13235 }, { "epoch": 0.44, "grad_norm": 0.52106773853302, "learning_rate": 0.0005686627592499543, "loss": 1.8108, "step": 13236 }, { "epoch": 0.44, "grad_norm": 0.4620409607887268, "learning_rate": 0.0005686581081466739, "loss": 1.9375, "step": 13237 }, { "epoch": 0.44, "grad_norm": 0.4362714886665344, "learning_rate": 0.0005686534567172821, "loss": 1.8447, "step": 13238 }, { "epoch": 0.44, "grad_norm": 0.4379154145717621, "learning_rate": 0.0005686488049617846, "loss": 1.901, "step": 13239 }, { "epoch": 0.44, "grad_norm": 0.44423288106918335, "learning_rate": 0.000568644152880187, "loss": 1.8264, "step": 13240 }, { "epoch": 0.44, "grad_norm": 0.4450434446334839, "learning_rate": 0.000568639500472495, "loss": 1.8938, "step": 13241 }, { "epoch": 0.44, "grad_norm": 0.5742548704147339, "learning_rate": 0.0005686348477387142, "loss": 1.9142, "step": 13242 }, { "epoch": 0.44, "grad_norm": 0.4398478865623474, "learning_rate": 0.0005686301946788504, "loss": 1.8841, "step": 13243 }, { "epoch": 0.44, "grad_norm": 0.4408178925514221, "learning_rate": 0.000568625541292909, "loss": 1.8856, "step": 13244 }, { "epoch": 0.44, "grad_norm": 0.44151434302330017, "learning_rate": 0.0005686208875808958, "loss": 1.7932, "step": 13245 }, { "epoch": 0.44, "grad_norm": 0.4296901524066925, "learning_rate": 0.0005686162335428164, "loss": 1.8955, "step": 13246 }, { "epoch": 0.44, "grad_norm": 0.4326537549495697, "learning_rate": 0.0005686115791786765, "loss": 1.8399, "step": 13247 }, { "epoch": 0.44, "grad_norm": 0.43205496668815613, "learning_rate": 0.0005686069244884818, "loss": 1.8002, "step": 13248 }, { "epoch": 0.44, "grad_norm": 0.42365598678588867, "learning_rate": 0.0005686022694722377, "loss": 1.7695, "step": 13249 }, { "epoch": 0.44, "grad_norm": 0.4446738660335541, "learning_rate": 0.0005685976141299501, "loss": 1.875, "step": 13250 }, { "epoch": 0.44, "grad_norm": 0.4554240107536316, "learning_rate": 0.0005685929584616247, "loss": 1.8526, "step": 13251 }, { "epoch": 0.44, "grad_norm": 0.4457094073295593, "learning_rate": 0.0005685883024672668, "loss": 1.9177, "step": 13252 }, { "epoch": 0.44, "grad_norm": 0.45846396684646606, "learning_rate": 0.0005685836461468824, "loss": 1.9568, "step": 13253 }, { "epoch": 0.44, "grad_norm": 0.4494358003139496, "learning_rate": 0.000568578989500477, "loss": 1.9414, "step": 13254 }, { "epoch": 0.44, "grad_norm": 0.4235329329967499, "learning_rate": 0.0005685743325280564, "loss": 1.8972, "step": 13255 }, { "epoch": 0.44, "grad_norm": 0.4501817524433136, "learning_rate": 0.000568569675229626, "loss": 1.8534, "step": 13256 }, { "epoch": 0.44, "grad_norm": 0.43064969778060913, "learning_rate": 0.0005685650176051915, "loss": 1.8641, "step": 13257 }, { "epoch": 0.44, "grad_norm": 0.4276949167251587, "learning_rate": 0.0005685603596547587, "loss": 1.8927, "step": 13258 }, { "epoch": 0.44, "grad_norm": 0.4404529631137848, "learning_rate": 0.0005685557013783333, "loss": 1.8501, "step": 13259 }, { "epoch": 0.44, "grad_norm": 0.44372281432151794, "learning_rate": 0.0005685510427759209, "loss": 1.8387, "step": 13260 }, { "epoch": 0.44, "grad_norm": 0.41945603489875793, "learning_rate": 0.000568546383847527, "loss": 1.814, "step": 13261 }, { "epoch": 0.44, "grad_norm": 0.4243116080760956, "learning_rate": 0.0005685417245931573, "loss": 1.9008, "step": 13262 }, { "epoch": 0.44, "grad_norm": 0.438510000705719, "learning_rate": 0.0005685370650128176, "loss": 1.8518, "step": 13263 }, { "epoch": 0.44, "grad_norm": 0.45429372787475586, "learning_rate": 0.0005685324051065134, "loss": 1.8768, "step": 13264 }, { "epoch": 0.44, "grad_norm": 0.4306124746799469, "learning_rate": 0.0005685277448742504, "loss": 1.8598, "step": 13265 }, { "epoch": 0.44, "grad_norm": 0.4313898980617523, "learning_rate": 0.0005685230843160344, "loss": 1.8903, "step": 13266 }, { "epoch": 0.44, "grad_norm": 0.44353994727134705, "learning_rate": 0.0005685184234318709, "loss": 1.9283, "step": 13267 }, { "epoch": 0.44, "grad_norm": 0.4607488512992859, "learning_rate": 0.0005685137622217656, "loss": 2.0349, "step": 13268 }, { "epoch": 0.44, "grad_norm": 0.42418327927589417, "learning_rate": 0.0005685091006857242, "loss": 1.9597, "step": 13269 }, { "epoch": 0.44, "grad_norm": 0.4567415714263916, "learning_rate": 0.0005685044388237521, "loss": 1.8764, "step": 13270 }, { "epoch": 0.44, "grad_norm": 0.4471067190170288, "learning_rate": 0.0005684997766358554, "loss": 1.9656, "step": 13271 }, { "epoch": 0.44, "grad_norm": 0.4354635179042816, "learning_rate": 0.0005684951141220394, "loss": 1.9574, "step": 13272 }, { "epoch": 0.44, "grad_norm": 0.4416539669036865, "learning_rate": 0.0005684904512823101, "loss": 1.7748, "step": 13273 }, { "epoch": 0.44, "grad_norm": 0.4468379616737366, "learning_rate": 0.0005684857881166727, "loss": 1.8681, "step": 13274 }, { "epoch": 0.44, "grad_norm": 0.4223977029323578, "learning_rate": 0.0005684811246251333, "loss": 1.8882, "step": 13275 }, { "epoch": 0.44, "grad_norm": 0.4304521083831787, "learning_rate": 0.0005684764608076973, "loss": 1.8118, "step": 13276 }, { "epoch": 0.44, "grad_norm": 0.4384711682796478, "learning_rate": 0.0005684717966643704, "loss": 1.9122, "step": 13277 }, { "epoch": 0.44, "grad_norm": 0.4229024350643158, "learning_rate": 0.0005684671321951583, "loss": 1.7802, "step": 13278 }, { "epoch": 0.44, "grad_norm": 0.4297511577606201, "learning_rate": 0.0005684624674000667, "loss": 1.8627, "step": 13279 }, { "epoch": 0.44, "grad_norm": 0.43176034092903137, "learning_rate": 0.0005684578022791012, "loss": 1.9219, "step": 13280 }, { "epoch": 0.44, "grad_norm": 0.4238593876361847, "learning_rate": 0.0005684531368322675, "loss": 1.8279, "step": 13281 }, { "epoch": 0.44, "grad_norm": 0.4314653277397156, "learning_rate": 0.0005684484710595713, "loss": 1.768, "step": 13282 }, { "epoch": 0.44, "grad_norm": 0.44743672013282776, "learning_rate": 0.000568443804961018, "loss": 1.8573, "step": 13283 }, { "epoch": 0.44, "grad_norm": 0.4210391640663147, "learning_rate": 0.0005684391385366137, "loss": 1.923, "step": 13284 }, { "epoch": 0.44, "grad_norm": 0.44224268198013306, "learning_rate": 0.0005684344717863638, "loss": 1.8791, "step": 13285 }, { "epoch": 0.44, "grad_norm": 0.4522964060306549, "learning_rate": 0.0005684298047102739, "loss": 1.7942, "step": 13286 }, { "epoch": 0.44, "grad_norm": 0.4386475682258606, "learning_rate": 0.0005684251373083499, "loss": 1.867, "step": 13287 }, { "epoch": 0.44, "grad_norm": 0.41550278663635254, "learning_rate": 0.0005684204695805972, "loss": 1.8549, "step": 13288 }, { "epoch": 0.44, "grad_norm": 0.44796639680862427, "learning_rate": 0.0005684158015270217, "loss": 1.8482, "step": 13289 }, { "epoch": 0.44, "grad_norm": 0.44725629687309265, "learning_rate": 0.000568411133147629, "loss": 1.8911, "step": 13290 }, { "epoch": 0.44, "grad_norm": 0.42629867792129517, "learning_rate": 0.0005684064644424245, "loss": 1.8369, "step": 13291 }, { "epoch": 0.44, "grad_norm": 0.46574652194976807, "learning_rate": 0.0005684017954114143, "loss": 1.9649, "step": 13292 }, { "epoch": 0.44, "grad_norm": 0.4305368661880493, "learning_rate": 0.0005683971260546039, "loss": 1.8915, "step": 13293 }, { "epoch": 0.44, "grad_norm": 0.43311455845832825, "learning_rate": 0.0005683924563719988, "loss": 1.9133, "step": 13294 }, { "epoch": 0.44, "grad_norm": 0.42383360862731934, "learning_rate": 0.0005683877863636049, "loss": 1.9047, "step": 13295 }, { "epoch": 0.44, "grad_norm": 0.43009960651397705, "learning_rate": 0.0005683831160294278, "loss": 1.8253, "step": 13296 }, { "epoch": 0.44, "grad_norm": 0.4359441101551056, "learning_rate": 0.0005683784453694731, "loss": 1.7975, "step": 13297 }, { "epoch": 0.44, "grad_norm": 0.45345330238342285, "learning_rate": 0.0005683737743837464, "loss": 1.9012, "step": 13298 }, { "epoch": 0.44, "grad_norm": 0.43080008029937744, "learning_rate": 0.0005683691030722535, "loss": 1.7963, "step": 13299 }, { "epoch": 0.44, "grad_norm": 0.43433046340942383, "learning_rate": 0.0005683644314350003, "loss": 1.9274, "step": 13300 }, { "epoch": 0.44, "grad_norm": 0.4319503605365753, "learning_rate": 0.000568359759471992, "loss": 1.8981, "step": 13301 }, { "epoch": 0.44, "grad_norm": 0.44851621985435486, "learning_rate": 0.0005683550871832346, "loss": 1.8849, "step": 13302 }, { "epoch": 0.44, "grad_norm": 0.44733068346977234, "learning_rate": 0.0005683504145687335, "loss": 1.8471, "step": 13303 }, { "epoch": 0.44, "grad_norm": 0.43037253618240356, "learning_rate": 0.0005683457416284947, "loss": 1.7893, "step": 13304 }, { "epoch": 0.44, "grad_norm": 0.4458790719509125, "learning_rate": 0.0005683410683625236, "loss": 1.852, "step": 13305 }, { "epoch": 0.44, "grad_norm": 0.44426316022872925, "learning_rate": 0.0005683363947708261, "loss": 1.8803, "step": 13306 }, { "epoch": 0.44, "grad_norm": 0.43008294701576233, "learning_rate": 0.0005683317208534077, "loss": 1.862, "step": 13307 }, { "epoch": 0.44, "grad_norm": 0.4372454881668091, "learning_rate": 0.0005683270466102741, "loss": 1.786, "step": 13308 }, { "epoch": 0.44, "grad_norm": 0.45348840951919556, "learning_rate": 0.000568322372041431, "loss": 1.8551, "step": 13309 }, { "epoch": 0.44, "grad_norm": 0.459120512008667, "learning_rate": 0.000568317697146884, "loss": 1.8627, "step": 13310 }, { "epoch": 0.44, "grad_norm": 0.4361765682697296, "learning_rate": 0.000568313021926639, "loss": 1.8928, "step": 13311 }, { "epoch": 0.44, "grad_norm": 0.4321878254413605, "learning_rate": 0.0005683083463807015, "loss": 1.8139, "step": 13312 }, { "epoch": 0.44, "grad_norm": 0.44546958804130554, "learning_rate": 0.0005683036705090772, "loss": 1.7984, "step": 13313 }, { "epoch": 0.44, "grad_norm": 0.45226427912712097, "learning_rate": 0.0005682989943117717, "loss": 1.8194, "step": 13314 }, { "epoch": 0.44, "grad_norm": 0.44663968682289124, "learning_rate": 0.0005682943177887908, "loss": 1.8121, "step": 13315 }, { "epoch": 0.44, "grad_norm": 0.4464329481124878, "learning_rate": 0.0005682896409401401, "loss": 2.0149, "step": 13316 }, { "epoch": 0.44, "grad_norm": 0.4438328742980957, "learning_rate": 0.0005682849637658253, "loss": 1.8109, "step": 13317 }, { "epoch": 0.44, "grad_norm": 0.4300136864185333, "learning_rate": 0.0005682802862658522, "loss": 1.797, "step": 13318 }, { "epoch": 0.44, "grad_norm": 0.41441506147384644, "learning_rate": 0.0005682756084402263, "loss": 1.7907, "step": 13319 }, { "epoch": 0.44, "grad_norm": 0.4304235279560089, "learning_rate": 0.0005682709302889532, "loss": 1.8283, "step": 13320 }, { "epoch": 0.44, "grad_norm": 0.4664768874645233, "learning_rate": 0.0005682662518120389, "loss": 1.878, "step": 13321 }, { "epoch": 0.44, "grad_norm": 0.45588433742523193, "learning_rate": 0.0005682615730094888, "loss": 1.8407, "step": 13322 }, { "epoch": 0.44, "grad_norm": 0.4363836646080017, "learning_rate": 0.0005682568938813088, "loss": 1.8518, "step": 13323 }, { "epoch": 0.44, "grad_norm": 0.4523955285549164, "learning_rate": 0.0005682522144275044, "loss": 1.8854, "step": 13324 }, { "epoch": 0.44, "grad_norm": 0.4635237455368042, "learning_rate": 0.0005682475346480813, "loss": 1.8707, "step": 13325 }, { "epoch": 0.44, "grad_norm": 0.4351072311401367, "learning_rate": 0.0005682428545430451, "loss": 1.8258, "step": 13326 }, { "epoch": 0.44, "grad_norm": 0.44533899426460266, "learning_rate": 0.0005682381741124018, "loss": 1.7993, "step": 13327 }, { "epoch": 0.44, "grad_norm": 0.4455473721027374, "learning_rate": 0.0005682334933561568, "loss": 1.8251, "step": 13328 }, { "epoch": 0.44, "grad_norm": 0.48305121064186096, "learning_rate": 0.0005682288122743159, "loss": 1.894, "step": 13329 }, { "epoch": 0.44, "grad_norm": 0.4380761981010437, "learning_rate": 0.0005682241308668847, "loss": 1.9535, "step": 13330 }, { "epoch": 0.44, "grad_norm": 0.44316229224205017, "learning_rate": 0.0005682194491338689, "loss": 1.7822, "step": 13331 }, { "epoch": 0.44, "grad_norm": 0.4328879714012146, "learning_rate": 0.0005682147670752742, "loss": 1.8272, "step": 13332 }, { "epoch": 0.44, "grad_norm": 0.45203322172164917, "learning_rate": 0.0005682100846911063, "loss": 1.8044, "step": 13333 }, { "epoch": 0.44, "grad_norm": 0.44163691997528076, "learning_rate": 0.0005682054019813709, "loss": 2.0035, "step": 13334 }, { "epoch": 0.44, "grad_norm": 0.43226441740989685, "learning_rate": 0.0005682007189460737, "loss": 1.9359, "step": 13335 }, { "epoch": 0.44, "grad_norm": 0.4673289954662323, "learning_rate": 0.0005681960355852203, "loss": 1.8939, "step": 13336 }, { "epoch": 0.44, "grad_norm": 0.43446478247642517, "learning_rate": 0.0005681913518988165, "loss": 1.8176, "step": 13337 }, { "epoch": 0.44, "grad_norm": 0.45141705870628357, "learning_rate": 0.0005681866678868678, "loss": 1.832, "step": 13338 }, { "epoch": 0.44, "grad_norm": 0.430205762386322, "learning_rate": 0.00056818198354938, "loss": 1.8751, "step": 13339 }, { "epoch": 0.44, "grad_norm": 0.42210447788238525, "learning_rate": 0.0005681772988863589, "loss": 1.8836, "step": 13340 }, { "epoch": 0.44, "grad_norm": 0.4432579278945923, "learning_rate": 0.00056817261389781, "loss": 1.8632, "step": 13341 }, { "epoch": 0.44, "grad_norm": 0.4344666004180908, "learning_rate": 0.0005681679285837392, "loss": 1.8654, "step": 13342 }, { "epoch": 0.44, "grad_norm": 0.4305740296840668, "learning_rate": 0.0005681632429441519, "loss": 1.8657, "step": 13343 }, { "epoch": 0.44, "grad_norm": 0.4403694272041321, "learning_rate": 0.000568158556979054, "loss": 1.9026, "step": 13344 }, { "epoch": 0.44, "grad_norm": 0.43622103333473206, "learning_rate": 0.000568153870688451, "loss": 1.9521, "step": 13345 }, { "epoch": 0.44, "grad_norm": 0.42320170998573303, "learning_rate": 0.0005681491840723488, "loss": 1.8579, "step": 13346 }, { "epoch": 0.44, "grad_norm": 0.7088146805763245, "learning_rate": 0.0005681444971307529, "loss": 1.8719, "step": 13347 }, { "epoch": 0.44, "grad_norm": 0.43390142917633057, "learning_rate": 0.0005681398098636693, "loss": 1.8285, "step": 13348 }, { "epoch": 0.44, "grad_norm": 0.43607303500175476, "learning_rate": 0.0005681351222711033, "loss": 1.8858, "step": 13349 }, { "epoch": 0.44, "grad_norm": 0.4370451867580414, "learning_rate": 0.0005681304343530609, "loss": 1.8356, "step": 13350 }, { "epoch": 0.44, "grad_norm": 0.42484819889068604, "learning_rate": 0.0005681257461095477, "loss": 1.9015, "step": 13351 }, { "epoch": 0.44, "grad_norm": 0.43700462579727173, "learning_rate": 0.0005681210575405691, "loss": 1.8228, "step": 13352 }, { "epoch": 0.44, "grad_norm": 0.44633835554122925, "learning_rate": 0.0005681163686461312, "loss": 1.8282, "step": 13353 }, { "epoch": 0.44, "grad_norm": 0.4324343502521515, "learning_rate": 0.0005681116794262396, "loss": 1.8345, "step": 13354 }, { "epoch": 0.44, "grad_norm": 0.43213534355163574, "learning_rate": 0.0005681069898808998, "loss": 1.9006, "step": 13355 }, { "epoch": 0.44, "grad_norm": 0.4314223825931549, "learning_rate": 0.0005681023000101177, "loss": 1.8201, "step": 13356 }, { "epoch": 0.44, "grad_norm": 0.44864919781684875, "learning_rate": 0.0005680976098138989, "loss": 1.894, "step": 13357 }, { "epoch": 0.44, "grad_norm": 0.5181785225868225, "learning_rate": 0.0005680929192922491, "loss": 1.8388, "step": 13358 }, { "epoch": 0.44, "grad_norm": 0.44184228777885437, "learning_rate": 0.0005680882284451739, "loss": 1.9283, "step": 13359 }, { "epoch": 0.44, "grad_norm": 0.43083152174949646, "learning_rate": 0.0005680835372726792, "loss": 1.7819, "step": 13360 }, { "epoch": 0.44, "grad_norm": 0.4385834038257599, "learning_rate": 0.0005680788457747705, "loss": 1.8309, "step": 13361 }, { "epoch": 0.44, "grad_norm": 0.4354974031448364, "learning_rate": 0.0005680741539514537, "loss": 1.8519, "step": 13362 }, { "epoch": 0.44, "grad_norm": 0.42697712779045105, "learning_rate": 0.0005680694618027342, "loss": 1.8361, "step": 13363 }, { "epoch": 0.44, "grad_norm": 0.42186108231544495, "learning_rate": 0.000568064769328618, "loss": 1.8092, "step": 13364 }, { "epoch": 0.44, "grad_norm": 0.43930578231811523, "learning_rate": 0.0005680600765291107, "loss": 1.8318, "step": 13365 }, { "epoch": 0.44, "grad_norm": 0.42106565833091736, "learning_rate": 0.0005680553834042179, "loss": 1.8191, "step": 13366 }, { "epoch": 0.44, "grad_norm": 0.4258260130882263, "learning_rate": 0.0005680506899539454, "loss": 1.8833, "step": 13367 }, { "epoch": 0.44, "grad_norm": 0.4266171157360077, "learning_rate": 0.0005680459961782988, "loss": 1.9006, "step": 13368 }, { "epoch": 0.44, "grad_norm": 0.44170081615448, "learning_rate": 0.0005680413020772839, "loss": 1.8137, "step": 13369 }, { "epoch": 0.44, "grad_norm": 0.44170689582824707, "learning_rate": 0.0005680366076509063, "loss": 1.8158, "step": 13370 }, { "epoch": 0.44, "grad_norm": 0.4363887906074524, "learning_rate": 0.0005680319128991718, "loss": 1.8868, "step": 13371 }, { "epoch": 0.44, "grad_norm": 0.4403896927833557, "learning_rate": 0.0005680272178220861, "loss": 1.9001, "step": 13372 }, { "epoch": 0.44, "grad_norm": 0.4327399432659149, "learning_rate": 0.0005680225224196548, "loss": 1.8319, "step": 13373 }, { "epoch": 0.44, "grad_norm": 0.4218486547470093, "learning_rate": 0.0005680178266918837, "loss": 1.8231, "step": 13374 }, { "epoch": 0.44, "grad_norm": 0.4327847957611084, "learning_rate": 0.0005680131306387782, "loss": 1.8718, "step": 13375 }, { "epoch": 0.45, "grad_norm": 0.44861525297164917, "learning_rate": 0.0005680084342603445, "loss": 1.8762, "step": 13376 }, { "epoch": 0.45, "grad_norm": 0.5001319050788879, "learning_rate": 0.0005680037375565882, "loss": 1.9181, "step": 13377 }, { "epoch": 0.45, "grad_norm": 0.4333534836769104, "learning_rate": 0.0005679990405275145, "loss": 1.8606, "step": 13378 }, { "epoch": 0.45, "grad_norm": 0.44016221165657043, "learning_rate": 0.0005679943431731296, "loss": 1.8874, "step": 13379 }, { "epoch": 0.45, "grad_norm": 0.4321875274181366, "learning_rate": 0.000567989645493439, "loss": 1.8331, "step": 13380 }, { "epoch": 0.45, "grad_norm": 0.4404117166996002, "learning_rate": 0.0005679849474884485, "loss": 1.8959, "step": 13381 }, { "epoch": 0.45, "grad_norm": 0.4228862524032593, "learning_rate": 0.0005679802491581639, "loss": 1.8747, "step": 13382 }, { "epoch": 0.45, "grad_norm": 0.44634881615638733, "learning_rate": 0.0005679755505025905, "loss": 1.9033, "step": 13383 }, { "epoch": 0.45, "grad_norm": 0.42343586683273315, "learning_rate": 0.0005679708515217344, "loss": 1.9018, "step": 13384 }, { "epoch": 0.45, "grad_norm": 0.4592816233634949, "learning_rate": 0.0005679661522156013, "loss": 1.9148, "step": 13385 }, { "epoch": 0.45, "grad_norm": 0.45747897028923035, "learning_rate": 0.0005679614525841965, "loss": 1.9471, "step": 13386 }, { "epoch": 0.45, "grad_norm": 0.43811294436454773, "learning_rate": 0.0005679567526275262, "loss": 1.9006, "step": 13387 }, { "epoch": 0.45, "grad_norm": 0.4170537292957306, "learning_rate": 0.0005679520523455958, "loss": 1.8374, "step": 13388 }, { "epoch": 0.45, "grad_norm": 0.42961668968200684, "learning_rate": 0.000567947351738411, "loss": 1.8738, "step": 13389 }, { "epoch": 0.45, "grad_norm": 0.4672010838985443, "learning_rate": 0.0005679426508059778, "loss": 1.845, "step": 13390 }, { "epoch": 0.45, "grad_norm": 0.4446651339530945, "learning_rate": 0.0005679379495483016, "loss": 1.8704, "step": 13391 }, { "epoch": 0.45, "grad_norm": 0.4485386908054352, "learning_rate": 0.0005679332479653881, "loss": 1.9376, "step": 13392 }, { "epoch": 0.45, "grad_norm": 0.4563668370246887, "learning_rate": 0.0005679285460572432, "loss": 1.84, "step": 13393 }, { "epoch": 0.45, "grad_norm": 0.4959341585636139, "learning_rate": 0.0005679238438238726, "loss": 1.8512, "step": 13394 }, { "epoch": 0.45, "grad_norm": 0.4373377859592438, "learning_rate": 0.0005679191412652819, "loss": 1.8887, "step": 13395 }, { "epoch": 0.45, "grad_norm": 0.47214603424072266, "learning_rate": 0.0005679144383814767, "loss": 1.9422, "step": 13396 }, { "epoch": 0.45, "grad_norm": 0.4510188102722168, "learning_rate": 0.000567909735172463, "loss": 1.9602, "step": 13397 }, { "epoch": 0.45, "grad_norm": 0.46482139825820923, "learning_rate": 0.0005679050316382462, "loss": 1.9244, "step": 13398 }, { "epoch": 0.45, "grad_norm": 0.44653356075286865, "learning_rate": 0.0005679003277788324, "loss": 1.8553, "step": 13399 }, { "epoch": 0.45, "grad_norm": 0.4533618092536926, "learning_rate": 0.0005678956235942269, "loss": 1.8502, "step": 13400 }, { "epoch": 0.45, "grad_norm": 0.42472711205482483, "learning_rate": 0.0005678909190844357, "loss": 1.8792, "step": 13401 }, { "epoch": 0.45, "grad_norm": 0.4527431130409241, "learning_rate": 0.0005678862142494643, "loss": 1.8857, "step": 13402 }, { "epoch": 0.45, "grad_norm": 0.4470196068286896, "learning_rate": 0.0005678815090893185, "loss": 1.9576, "step": 13403 }, { "epoch": 0.45, "grad_norm": 0.437349408864975, "learning_rate": 0.0005678768036040041, "loss": 1.8664, "step": 13404 }, { "epoch": 0.45, "grad_norm": 0.42643916606903076, "learning_rate": 0.0005678720977935266, "loss": 1.824, "step": 13405 }, { "epoch": 0.45, "grad_norm": 0.43402695655822754, "learning_rate": 0.0005678673916578919, "loss": 1.8799, "step": 13406 }, { "epoch": 0.45, "grad_norm": 0.45868900418281555, "learning_rate": 0.0005678626851971058, "loss": 1.858, "step": 13407 }, { "epoch": 0.45, "grad_norm": 0.4415135681629181, "learning_rate": 0.0005678579784111736, "loss": 1.9319, "step": 13408 }, { "epoch": 0.45, "grad_norm": 0.43691474199295044, "learning_rate": 0.0005678532713001015, "loss": 1.8941, "step": 13409 }, { "epoch": 0.45, "grad_norm": 0.41876938939094543, "learning_rate": 0.000567848563863895, "loss": 1.8792, "step": 13410 }, { "epoch": 0.45, "grad_norm": 0.43886739015579224, "learning_rate": 0.0005678438561025597, "loss": 1.9496, "step": 13411 }, { "epoch": 0.45, "grad_norm": 0.4564549922943115, "learning_rate": 0.0005678391480161014, "loss": 1.8574, "step": 13412 }, { "epoch": 0.45, "grad_norm": 0.4610614776611328, "learning_rate": 0.000567834439604526, "loss": 1.9807, "step": 13413 }, { "epoch": 0.45, "grad_norm": 0.4388904571533203, "learning_rate": 0.0005678297308678389, "loss": 1.8896, "step": 13414 }, { "epoch": 0.45, "grad_norm": 0.4348747730255127, "learning_rate": 0.000567825021806046, "loss": 1.8963, "step": 13415 }, { "epoch": 0.45, "grad_norm": 0.4458061456680298, "learning_rate": 0.000567820312419153, "loss": 1.8825, "step": 13416 }, { "epoch": 0.45, "grad_norm": 0.4429440200328827, "learning_rate": 0.0005678156027071657, "loss": 1.829, "step": 13417 }, { "epoch": 0.45, "grad_norm": 0.4362001419067383, "learning_rate": 0.0005678108926700895, "loss": 1.7833, "step": 13418 }, { "epoch": 0.45, "grad_norm": 0.4484997093677521, "learning_rate": 0.0005678061823079305, "loss": 1.9006, "step": 13419 }, { "epoch": 0.45, "grad_norm": 0.43093863129615784, "learning_rate": 0.0005678014716206943, "loss": 1.8757, "step": 13420 }, { "epoch": 0.45, "grad_norm": 0.44389405846595764, "learning_rate": 0.0005677967606083865, "loss": 1.9602, "step": 13421 }, { "epoch": 0.45, "grad_norm": 0.4441358149051666, "learning_rate": 0.0005677920492710127, "loss": 1.8779, "step": 13422 }, { "epoch": 0.45, "grad_norm": 0.4458957016468048, "learning_rate": 0.000567787337608579, "loss": 1.8832, "step": 13423 }, { "epoch": 0.45, "grad_norm": 0.4484136998653412, "learning_rate": 0.000567782625621091, "loss": 1.8974, "step": 13424 }, { "epoch": 0.45, "grad_norm": 0.44978564977645874, "learning_rate": 0.0005677779133085542, "loss": 1.8624, "step": 13425 }, { "epoch": 0.45, "grad_norm": 0.5006571412086487, "learning_rate": 0.0005677732006709747, "loss": 1.8933, "step": 13426 }, { "epoch": 0.45, "grad_norm": 0.4331667721271515, "learning_rate": 0.0005677684877083577, "loss": 1.8872, "step": 13427 }, { "epoch": 0.45, "grad_norm": 0.4479987621307373, "learning_rate": 0.0005677637744207093, "loss": 1.825, "step": 13428 }, { "epoch": 0.45, "grad_norm": 0.4648306965827942, "learning_rate": 0.0005677590608080352, "loss": 1.8698, "step": 13429 }, { "epoch": 0.45, "grad_norm": 0.4666517972946167, "learning_rate": 0.000567754346870341, "loss": 1.9003, "step": 13430 }, { "epoch": 0.45, "grad_norm": 0.4272295832633972, "learning_rate": 0.0005677496326076325, "loss": 1.9006, "step": 13431 }, { "epoch": 0.45, "grad_norm": 0.46537333726882935, "learning_rate": 0.0005677449180199154, "loss": 1.9154, "step": 13432 }, { "epoch": 0.45, "grad_norm": 0.48048636317253113, "learning_rate": 0.0005677402031071954, "loss": 1.8802, "step": 13433 }, { "epoch": 0.45, "grad_norm": 0.4465155303478241, "learning_rate": 0.0005677354878694783, "loss": 1.8216, "step": 13434 }, { "epoch": 0.45, "grad_norm": 0.4422239065170288, "learning_rate": 0.0005677307723067697, "loss": 1.9209, "step": 13435 }, { "epoch": 0.45, "grad_norm": 0.4727148115634918, "learning_rate": 0.0005677260564190753, "loss": 1.7928, "step": 13436 }, { "epoch": 0.45, "grad_norm": 0.4639248549938202, "learning_rate": 0.0005677213402064011, "loss": 1.9513, "step": 13437 }, { "epoch": 0.45, "grad_norm": 0.4666011333465576, "learning_rate": 0.0005677166236687526, "loss": 1.8423, "step": 13438 }, { "epoch": 0.45, "grad_norm": 0.439751535654068, "learning_rate": 0.0005677119068061355, "loss": 1.8598, "step": 13439 }, { "epoch": 0.45, "grad_norm": 0.5064243674278259, "learning_rate": 0.0005677071896185556, "loss": 1.9865, "step": 13440 }, { "epoch": 0.45, "grad_norm": 0.4612678289413452, "learning_rate": 0.0005677024721060187, "loss": 1.8704, "step": 13441 }, { "epoch": 0.45, "grad_norm": 0.46959030628204346, "learning_rate": 0.0005676977542685304, "loss": 1.8397, "step": 13442 }, { "epoch": 0.45, "grad_norm": 0.43979397416114807, "learning_rate": 0.0005676930361060964, "loss": 1.8435, "step": 13443 }, { "epoch": 0.45, "grad_norm": 0.4804595410823822, "learning_rate": 0.0005676883176187225, "loss": 1.85, "step": 13444 }, { "epoch": 0.45, "grad_norm": 0.4640369117259979, "learning_rate": 0.0005676835988064145, "loss": 1.9172, "step": 13445 }, { "epoch": 0.45, "grad_norm": 0.4448428750038147, "learning_rate": 0.0005676788796691781, "loss": 1.9538, "step": 13446 }, { "epoch": 0.45, "grad_norm": 0.47202396392822266, "learning_rate": 0.0005676741602070189, "loss": 1.9374, "step": 13447 }, { "epoch": 0.45, "grad_norm": 0.4505334496498108, "learning_rate": 0.0005676694404199426, "loss": 1.8458, "step": 13448 }, { "epoch": 0.45, "grad_norm": 0.4351734519004822, "learning_rate": 0.0005676647203079553, "loss": 1.8695, "step": 13449 }, { "epoch": 0.45, "grad_norm": 0.4359995722770691, "learning_rate": 0.0005676599998710622, "loss": 1.9576, "step": 13450 }, { "epoch": 0.45, "grad_norm": 0.44376957416534424, "learning_rate": 0.0005676552791092695, "loss": 1.8813, "step": 13451 }, { "epoch": 0.45, "grad_norm": 0.41804975271224976, "learning_rate": 0.0005676505580225826, "loss": 1.845, "step": 13452 }, { "epoch": 0.45, "grad_norm": 0.4216258227825165, "learning_rate": 0.0005676458366110073, "loss": 1.9035, "step": 13453 }, { "epoch": 0.45, "grad_norm": 0.43144261837005615, "learning_rate": 0.0005676411148745496, "loss": 1.8756, "step": 13454 }, { "epoch": 0.45, "grad_norm": 0.4358855187892914, "learning_rate": 0.000567636392813215, "loss": 1.747, "step": 13455 }, { "epoch": 0.45, "grad_norm": 0.4303107261657715, "learning_rate": 0.0005676316704270091, "loss": 1.9067, "step": 13456 }, { "epoch": 0.45, "grad_norm": 0.4400901794433594, "learning_rate": 0.0005676269477159378, "loss": 1.9116, "step": 13457 }, { "epoch": 0.45, "grad_norm": 0.43766719102859497, "learning_rate": 0.000567622224680007, "loss": 1.8813, "step": 13458 }, { "epoch": 0.45, "grad_norm": 0.5033334493637085, "learning_rate": 0.000567617501319222, "loss": 1.8752, "step": 13459 }, { "epoch": 0.45, "grad_norm": 0.43207302689552307, "learning_rate": 0.000567612777633589, "loss": 1.8462, "step": 13460 }, { "epoch": 0.45, "grad_norm": 0.4338045120239258, "learning_rate": 0.0005676080536231134, "loss": 1.8869, "step": 13461 }, { "epoch": 0.45, "grad_norm": 0.44167715311050415, "learning_rate": 0.0005676033292878011, "loss": 1.8789, "step": 13462 }, { "epoch": 0.45, "grad_norm": 0.4371514320373535, "learning_rate": 0.0005675986046276577, "loss": 1.9057, "step": 13463 }, { "epoch": 0.45, "grad_norm": 0.4523946940898895, "learning_rate": 0.0005675938796426891, "loss": 1.8218, "step": 13464 }, { "epoch": 0.45, "grad_norm": 0.4233533442020416, "learning_rate": 0.000567589154332901, "loss": 1.8619, "step": 13465 }, { "epoch": 0.45, "grad_norm": 0.4512957036495209, "learning_rate": 0.000567584428698299, "loss": 1.908, "step": 13466 }, { "epoch": 0.45, "grad_norm": 0.44242042303085327, "learning_rate": 0.000567579702738889, "loss": 1.8852, "step": 13467 }, { "epoch": 0.45, "grad_norm": 0.4384365379810333, "learning_rate": 0.0005675749764546766, "loss": 1.8747, "step": 13468 }, { "epoch": 0.45, "grad_norm": 0.44034916162490845, "learning_rate": 0.0005675702498456677, "loss": 1.8516, "step": 13469 }, { "epoch": 0.45, "grad_norm": 0.4322262108325958, "learning_rate": 0.0005675655229118679, "loss": 1.8694, "step": 13470 }, { "epoch": 0.45, "grad_norm": 0.4443854093551636, "learning_rate": 0.0005675607956532829, "loss": 1.8791, "step": 13471 }, { "epoch": 0.45, "grad_norm": 0.42385703325271606, "learning_rate": 0.0005675560680699186, "loss": 1.9414, "step": 13472 }, { "epoch": 0.45, "grad_norm": 0.4419311285018921, "learning_rate": 0.0005675513401617806, "loss": 1.9015, "step": 13473 }, { "epoch": 0.45, "grad_norm": 0.4781678020954132, "learning_rate": 0.0005675466119288747, "loss": 1.8601, "step": 13474 }, { "epoch": 0.45, "grad_norm": 0.5495123863220215, "learning_rate": 0.0005675418833712068, "loss": 1.8867, "step": 13475 }, { "epoch": 0.45, "grad_norm": 0.4376988708972931, "learning_rate": 0.0005675371544887823, "loss": 1.8532, "step": 13476 }, { "epoch": 0.45, "grad_norm": 0.43464022874832153, "learning_rate": 0.0005675324252816072, "loss": 1.8207, "step": 13477 }, { "epoch": 0.45, "grad_norm": 0.45442792773246765, "learning_rate": 0.000567527695749687, "loss": 1.8675, "step": 13478 }, { "epoch": 0.45, "grad_norm": 0.45221054553985596, "learning_rate": 0.0005675229658930278, "loss": 1.8416, "step": 13479 }, { "epoch": 0.45, "grad_norm": 0.4455322027206421, "learning_rate": 0.000567518235711635, "loss": 1.8416, "step": 13480 }, { "epoch": 0.45, "grad_norm": 0.4604838192462921, "learning_rate": 0.0005675135052055145, "loss": 1.8493, "step": 13481 }, { "epoch": 0.45, "grad_norm": 0.4627312123775482, "learning_rate": 0.0005675087743746721, "loss": 1.8983, "step": 13482 }, { "epoch": 0.45, "grad_norm": 0.4465112090110779, "learning_rate": 0.0005675040432191134, "loss": 1.888, "step": 13483 }, { "epoch": 0.45, "grad_norm": 0.42779290676116943, "learning_rate": 0.0005674993117388442, "loss": 1.8112, "step": 13484 }, { "epoch": 0.45, "grad_norm": 0.43432146310806274, "learning_rate": 0.0005674945799338703, "loss": 1.8334, "step": 13485 }, { "epoch": 0.45, "grad_norm": 0.42641007900238037, "learning_rate": 0.0005674898478041974, "loss": 1.841, "step": 13486 }, { "epoch": 0.45, "grad_norm": 0.45388075709342957, "learning_rate": 0.0005674851153498311, "loss": 1.9791, "step": 13487 }, { "epoch": 0.45, "grad_norm": 0.4252285361289978, "learning_rate": 0.0005674803825707773, "loss": 1.8758, "step": 13488 }, { "epoch": 0.45, "grad_norm": 0.44187745451927185, "learning_rate": 0.0005674756494670419, "loss": 1.9643, "step": 13489 }, { "epoch": 0.45, "grad_norm": 0.43031904101371765, "learning_rate": 0.0005674709160386304, "loss": 1.8774, "step": 13490 }, { "epoch": 0.45, "grad_norm": 0.46944403648376465, "learning_rate": 0.0005674661822855486, "loss": 1.8888, "step": 13491 }, { "epoch": 0.45, "grad_norm": 0.4564390480518341, "learning_rate": 0.0005674614482078023, "loss": 1.7931, "step": 13492 }, { "epoch": 0.45, "grad_norm": 0.41593995690345764, "learning_rate": 0.0005674567138053971, "loss": 1.7698, "step": 13493 }, { "epoch": 0.45, "grad_norm": 0.4703630208969116, "learning_rate": 0.000567451979078339, "loss": 1.8982, "step": 13494 }, { "epoch": 0.45, "grad_norm": 0.45527416467666626, "learning_rate": 0.0005674472440266335, "loss": 1.8219, "step": 13495 }, { "epoch": 0.45, "grad_norm": 0.44180595874786377, "learning_rate": 0.0005674425086502864, "loss": 1.8999, "step": 13496 }, { "epoch": 0.45, "grad_norm": 0.44819405674934387, "learning_rate": 0.0005674377729493037, "loss": 1.8847, "step": 13497 }, { "epoch": 0.45, "grad_norm": 0.44467660784721375, "learning_rate": 0.0005674330369236908, "loss": 1.8028, "step": 13498 }, { "epoch": 0.45, "grad_norm": 0.45563292503356934, "learning_rate": 0.0005674283005734536, "loss": 1.8231, "step": 13499 }, { "epoch": 0.45, "grad_norm": 0.45449212193489075, "learning_rate": 0.0005674235638985979, "loss": 1.8304, "step": 13500 }, { "epoch": 0.45, "grad_norm": 0.43616393208503723, "learning_rate": 0.0005674188268991293, "loss": 1.8944, "step": 13501 }, { "epoch": 0.45, "grad_norm": 0.4393264353275299, "learning_rate": 0.0005674140895750538, "loss": 1.849, "step": 13502 }, { "epoch": 0.45, "grad_norm": 0.4573245048522949, "learning_rate": 0.0005674093519263769, "loss": 1.9003, "step": 13503 }, { "epoch": 0.45, "grad_norm": 0.4471043348312378, "learning_rate": 0.0005674046139531045, "loss": 1.8727, "step": 13504 }, { "epoch": 0.45, "grad_norm": 0.46217137575149536, "learning_rate": 0.0005673998756552422, "loss": 1.871, "step": 13505 }, { "epoch": 0.45, "grad_norm": 0.42876410484313965, "learning_rate": 0.000567395137032796, "loss": 1.8365, "step": 13506 }, { "epoch": 0.45, "grad_norm": 0.44132500886917114, "learning_rate": 0.0005673903980857712, "loss": 1.8253, "step": 13507 }, { "epoch": 0.45, "grad_norm": 0.43752235174179077, "learning_rate": 0.0005673856588141742, "loss": 1.8675, "step": 13508 }, { "epoch": 0.45, "grad_norm": 0.4641895294189453, "learning_rate": 0.0005673809192180103, "loss": 1.8828, "step": 13509 }, { "epoch": 0.45, "grad_norm": 0.4542735517024994, "learning_rate": 0.0005673761792972853, "loss": 1.8244, "step": 13510 }, { "epoch": 0.45, "grad_norm": 0.424772709608078, "learning_rate": 0.000567371439052005, "loss": 1.8828, "step": 13511 }, { "epoch": 0.45, "grad_norm": 0.4370574951171875, "learning_rate": 0.0005673666984821753, "loss": 1.9273, "step": 13512 }, { "epoch": 0.45, "grad_norm": 0.44106119871139526, "learning_rate": 0.0005673619575878018, "loss": 1.8577, "step": 13513 }, { "epoch": 0.45, "grad_norm": 0.4545760750770569, "learning_rate": 0.0005673572163688902, "loss": 1.9086, "step": 13514 }, { "epoch": 0.45, "grad_norm": 0.4417237341403961, "learning_rate": 0.0005673524748254463, "loss": 1.8682, "step": 13515 }, { "epoch": 0.45, "grad_norm": 0.47575831413269043, "learning_rate": 0.000567347732957476, "loss": 1.8571, "step": 13516 }, { "epoch": 0.45, "grad_norm": 0.4480891823768616, "learning_rate": 0.0005673429907649848, "loss": 1.9011, "step": 13517 }, { "epoch": 0.45, "grad_norm": 0.444324791431427, "learning_rate": 0.0005673382482479787, "loss": 1.8817, "step": 13518 }, { "epoch": 0.45, "grad_norm": 0.43732795119285583, "learning_rate": 0.0005673335054064634, "loss": 1.8851, "step": 13519 }, { "epoch": 0.45, "grad_norm": 0.44415557384490967, "learning_rate": 0.0005673287622404445, "loss": 1.8311, "step": 13520 }, { "epoch": 0.45, "grad_norm": 0.43637940287590027, "learning_rate": 0.000567324018749928, "loss": 1.8427, "step": 13521 }, { "epoch": 0.45, "grad_norm": 0.45400795340538025, "learning_rate": 0.0005673192749349195, "loss": 1.8459, "step": 13522 }, { "epoch": 0.45, "grad_norm": 0.43768513202667236, "learning_rate": 0.0005673145307954247, "loss": 1.8207, "step": 13523 }, { "epoch": 0.45, "grad_norm": 0.4258631765842438, "learning_rate": 0.0005673097863314494, "loss": 1.8851, "step": 13524 }, { "epoch": 0.45, "grad_norm": 0.43615835905075073, "learning_rate": 0.0005673050415429996, "loss": 1.8826, "step": 13525 }, { "epoch": 0.45, "grad_norm": 0.4545988142490387, "learning_rate": 0.0005673002964300808, "loss": 1.8736, "step": 13526 }, { "epoch": 0.45, "grad_norm": 0.43598058819770813, "learning_rate": 0.0005672955509926987, "loss": 1.8503, "step": 13527 }, { "epoch": 0.45, "grad_norm": 0.44450998306274414, "learning_rate": 0.0005672908052308593, "loss": 1.8099, "step": 13528 }, { "epoch": 0.45, "grad_norm": 0.42601659893989563, "learning_rate": 0.0005672860591445682, "loss": 1.8752, "step": 13529 }, { "epoch": 0.45, "grad_norm": 0.4427945613861084, "learning_rate": 0.0005672813127338312, "loss": 1.968, "step": 13530 }, { "epoch": 0.45, "grad_norm": 0.44388023018836975, "learning_rate": 0.0005672765659986541, "loss": 1.8356, "step": 13531 }, { "epoch": 0.45, "grad_norm": 0.43183383345603943, "learning_rate": 0.0005672718189390427, "loss": 1.9491, "step": 13532 }, { "epoch": 0.45, "grad_norm": 0.46239161491394043, "learning_rate": 0.0005672670715550026, "loss": 1.9338, "step": 13533 }, { "epoch": 0.45, "grad_norm": 0.5295270085334778, "learning_rate": 0.0005672623238465397, "loss": 1.8614, "step": 13534 }, { "epoch": 0.45, "grad_norm": 0.4673108160495758, "learning_rate": 0.0005672575758136596, "loss": 1.8265, "step": 13535 }, { "epoch": 0.45, "grad_norm": 0.4371955394744873, "learning_rate": 0.0005672528274563684, "loss": 1.9469, "step": 13536 }, { "epoch": 0.45, "grad_norm": 0.4312213957309723, "learning_rate": 0.0005672480787746715, "loss": 1.8761, "step": 13537 }, { "epoch": 0.45, "grad_norm": 0.4684627652168274, "learning_rate": 0.0005672433297685748, "loss": 1.7698, "step": 13538 }, { "epoch": 0.45, "grad_norm": 0.4595852792263031, "learning_rate": 0.0005672385804380841, "loss": 1.8427, "step": 13539 }, { "epoch": 0.45, "grad_norm": 0.4459454417228699, "learning_rate": 0.000567233830783205, "loss": 1.8787, "step": 13540 }, { "epoch": 0.45, "grad_norm": 0.4263923764228821, "learning_rate": 0.0005672290808039437, "loss": 1.9221, "step": 13541 }, { "epoch": 0.45, "grad_norm": 0.44981908798217773, "learning_rate": 0.0005672243305003055, "loss": 1.833, "step": 13542 }, { "epoch": 0.45, "grad_norm": 0.42463934421539307, "learning_rate": 0.0005672195798722963, "loss": 1.9383, "step": 13543 }, { "epoch": 0.45, "grad_norm": 0.44494110345840454, "learning_rate": 0.000567214828919922, "loss": 1.9366, "step": 13544 }, { "epoch": 0.45, "grad_norm": 0.4723748564720154, "learning_rate": 0.0005672100776431882, "loss": 1.8794, "step": 13545 }, { "epoch": 0.45, "grad_norm": 0.44936850666999817, "learning_rate": 0.0005672053260421007, "loss": 1.8, "step": 13546 }, { "epoch": 0.45, "grad_norm": 0.44417092204093933, "learning_rate": 0.0005672005741166655, "loss": 1.8102, "step": 13547 }, { "epoch": 0.45, "grad_norm": 0.4396296441555023, "learning_rate": 0.000567195821866888, "loss": 1.8943, "step": 13548 }, { "epoch": 0.45, "grad_norm": 0.4383077621459961, "learning_rate": 0.0005671910692927742, "loss": 1.9752, "step": 13549 }, { "epoch": 0.45, "grad_norm": 0.4337970018386841, "learning_rate": 0.0005671863163943298, "loss": 1.9086, "step": 13550 }, { "epoch": 0.45, "grad_norm": 0.4377383291721344, "learning_rate": 0.0005671815631715605, "loss": 1.7881, "step": 13551 }, { "epoch": 0.45, "grad_norm": 0.48722562193870544, "learning_rate": 0.0005671768096244722, "loss": 1.8837, "step": 13552 }, { "epoch": 0.45, "grad_norm": 0.4545830190181732, "learning_rate": 0.0005671720557530708, "loss": 1.904, "step": 13553 }, { "epoch": 0.45, "grad_norm": 0.44132837653160095, "learning_rate": 0.0005671673015573616, "loss": 1.9062, "step": 13554 }, { "epoch": 0.45, "grad_norm": 0.42410174012184143, "learning_rate": 0.0005671625470373508, "loss": 1.7899, "step": 13555 }, { "epoch": 0.45, "grad_norm": 0.48027005791664124, "learning_rate": 0.000567157792193044, "loss": 1.9105, "step": 13556 }, { "epoch": 0.45, "grad_norm": 0.45397448539733887, "learning_rate": 0.0005671530370244471, "loss": 1.8536, "step": 13557 }, { "epoch": 0.45, "grad_norm": 0.43177592754364014, "learning_rate": 0.0005671482815315657, "loss": 1.8304, "step": 13558 }, { "epoch": 0.45, "grad_norm": 0.4381555914878845, "learning_rate": 0.0005671435257144057, "loss": 1.8266, "step": 13559 }, { "epoch": 0.45, "grad_norm": 0.49419769644737244, "learning_rate": 0.0005671387695729727, "loss": 1.8686, "step": 13560 }, { "epoch": 0.45, "grad_norm": 0.4365255832672119, "learning_rate": 0.0005671340131072727, "loss": 1.8744, "step": 13561 }, { "epoch": 0.45, "grad_norm": 0.45116496086120605, "learning_rate": 0.0005671292563173113, "loss": 1.9578, "step": 13562 }, { "epoch": 0.45, "grad_norm": 0.4621995687484741, "learning_rate": 0.0005671244992030945, "loss": 1.8652, "step": 13563 }, { "epoch": 0.45, "grad_norm": 0.44873666763305664, "learning_rate": 0.0005671197417646277, "loss": 1.8747, "step": 13564 }, { "epoch": 0.45, "grad_norm": 0.46823179721832275, "learning_rate": 0.000567114984001917, "loss": 1.9156, "step": 13565 }, { "epoch": 0.45, "grad_norm": 0.447398841381073, "learning_rate": 0.0005671102259149681, "loss": 1.9614, "step": 13566 }, { "epoch": 0.45, "grad_norm": 0.42883893847465515, "learning_rate": 0.0005671054675037867, "loss": 1.9025, "step": 13567 }, { "epoch": 0.45, "grad_norm": 0.4426369071006775, "learning_rate": 0.0005671007087683786, "loss": 1.8351, "step": 13568 }, { "epoch": 0.45, "grad_norm": 0.5561906099319458, "learning_rate": 0.0005670959497087496, "loss": 1.856, "step": 13569 }, { "epoch": 0.45, "grad_norm": 0.4271356165409088, "learning_rate": 0.0005670911903249055, "loss": 1.8457, "step": 13570 }, { "epoch": 0.45, "grad_norm": 0.424719899892807, "learning_rate": 0.000567086430616852, "loss": 1.8443, "step": 13571 }, { "epoch": 0.45, "grad_norm": 0.4270842671394348, "learning_rate": 0.000567081670584595, "loss": 1.8493, "step": 13572 }, { "epoch": 0.45, "grad_norm": 0.4441086947917938, "learning_rate": 0.0005670769102281402, "loss": 1.888, "step": 13573 }, { "epoch": 0.45, "grad_norm": 0.42985618114471436, "learning_rate": 0.0005670721495474933, "loss": 1.8536, "step": 13574 }, { "epoch": 0.45, "grad_norm": 0.4369468688964844, "learning_rate": 0.0005670673885426602, "loss": 1.8974, "step": 13575 }, { "epoch": 0.45, "grad_norm": 0.44282448291778564, "learning_rate": 0.0005670626272136467, "loss": 1.8666, "step": 13576 }, { "epoch": 0.45, "grad_norm": 0.4281017780303955, "learning_rate": 0.0005670578655604584, "loss": 1.91, "step": 13577 }, { "epoch": 0.45, "grad_norm": 0.41746532917022705, "learning_rate": 0.0005670531035831014, "loss": 1.8352, "step": 13578 }, { "epoch": 0.45, "grad_norm": 0.4277816414833069, "learning_rate": 0.0005670483412815811, "loss": 1.8377, "step": 13579 }, { "epoch": 0.45, "grad_norm": 0.4401856064796448, "learning_rate": 0.0005670435786559035, "loss": 1.8741, "step": 13580 }, { "epoch": 0.45, "grad_norm": 0.4422750174999237, "learning_rate": 0.0005670388157060744, "loss": 1.9213, "step": 13581 }, { "epoch": 0.45, "grad_norm": 0.41529688239097595, "learning_rate": 0.0005670340524320994, "loss": 1.8623, "step": 13582 }, { "epoch": 0.45, "grad_norm": 0.5217421054840088, "learning_rate": 0.0005670292888339845, "loss": 1.8368, "step": 13583 }, { "epoch": 0.45, "grad_norm": 0.4205881357192993, "learning_rate": 0.0005670245249117355, "loss": 1.9162, "step": 13584 }, { "epoch": 0.45, "grad_norm": 0.43318232893943787, "learning_rate": 0.0005670197606653578, "loss": 1.8783, "step": 13585 }, { "epoch": 0.45, "grad_norm": 0.4336158335208893, "learning_rate": 0.0005670149960948577, "loss": 1.8842, "step": 13586 }, { "epoch": 0.45, "grad_norm": 0.42489922046661377, "learning_rate": 0.0005670102312002407, "loss": 1.8452, "step": 13587 }, { "epoch": 0.45, "grad_norm": 0.4302651882171631, "learning_rate": 0.0005670054659815125, "loss": 1.8649, "step": 13588 }, { "epoch": 0.45, "grad_norm": 0.4290916919708252, "learning_rate": 0.0005670007004386791, "loss": 1.7907, "step": 13589 }, { "epoch": 0.45, "grad_norm": 0.43836843967437744, "learning_rate": 0.0005669959345717461, "loss": 1.9114, "step": 13590 }, { "epoch": 0.45, "grad_norm": 0.4251664876937866, "learning_rate": 0.0005669911683807194, "loss": 1.7693, "step": 13591 }, { "epoch": 0.45, "grad_norm": 0.4275464117527008, "learning_rate": 0.0005669864018656048, "loss": 1.8249, "step": 13592 }, { "epoch": 0.45, "grad_norm": 0.43571358919143677, "learning_rate": 0.000566981635026408, "loss": 1.8475, "step": 13593 }, { "epoch": 0.45, "grad_norm": 0.4236605763435364, "learning_rate": 0.0005669768678631349, "loss": 1.815, "step": 13594 }, { "epoch": 0.45, "grad_norm": 0.41736260056495667, "learning_rate": 0.0005669721003757912, "loss": 1.8072, "step": 13595 }, { "epoch": 0.45, "grad_norm": 0.44704312086105347, "learning_rate": 0.0005669673325643826, "loss": 1.8635, "step": 13596 }, { "epoch": 0.45, "grad_norm": 0.466006875038147, "learning_rate": 0.0005669625644289152, "loss": 1.9114, "step": 13597 }, { "epoch": 0.45, "grad_norm": 0.42630842328071594, "learning_rate": 0.0005669577959693945, "loss": 1.8191, "step": 13598 }, { "epoch": 0.45, "grad_norm": 0.44399040937423706, "learning_rate": 0.0005669530271858263, "loss": 1.9303, "step": 13599 }, { "epoch": 0.45, "grad_norm": 0.4269774556159973, "learning_rate": 0.0005669482580782165, "loss": 1.8596, "step": 13600 }, { "epoch": 0.45, "grad_norm": 0.4313938319683075, "learning_rate": 0.0005669434886465708, "loss": 1.8589, "step": 13601 }, { "epoch": 0.45, "grad_norm": 0.43201377987861633, "learning_rate": 0.0005669387188908952, "loss": 1.8784, "step": 13602 }, { "epoch": 0.45, "grad_norm": 0.42057904601097107, "learning_rate": 0.0005669339488111951, "loss": 1.8146, "step": 13603 }, { "epoch": 0.45, "grad_norm": 0.44032391905784607, "learning_rate": 0.0005669291784074767, "loss": 1.8458, "step": 13604 }, { "epoch": 0.45, "grad_norm": 0.43669116497039795, "learning_rate": 0.0005669244076797455, "loss": 1.8665, "step": 13605 }, { "epoch": 0.45, "grad_norm": 0.4471583664417267, "learning_rate": 0.0005669196366280075, "loss": 1.9625, "step": 13606 }, { "epoch": 0.45, "grad_norm": 0.4217272996902466, "learning_rate": 0.0005669148652522683, "loss": 1.787, "step": 13607 }, { "epoch": 0.45, "grad_norm": 0.43205827474594116, "learning_rate": 0.0005669100935525338, "loss": 1.8589, "step": 13608 }, { "epoch": 0.45, "grad_norm": 0.46075230836868286, "learning_rate": 0.0005669053215288097, "loss": 1.8614, "step": 13609 }, { "epoch": 0.45, "grad_norm": 0.4338098466396332, "learning_rate": 0.000566900549181102, "loss": 1.8292, "step": 13610 }, { "epoch": 0.45, "grad_norm": 0.4425524175167084, "learning_rate": 0.0005668957765094163, "loss": 1.9245, "step": 13611 }, { "epoch": 0.45, "grad_norm": 0.4400666356086731, "learning_rate": 0.0005668910035137585, "loss": 1.8477, "step": 13612 }, { "epoch": 0.45, "grad_norm": 0.44255802035331726, "learning_rate": 0.0005668862301941343, "loss": 1.8999, "step": 13613 }, { "epoch": 0.45, "grad_norm": 0.42883771657943726, "learning_rate": 0.0005668814565505496, "loss": 1.8664, "step": 13614 }, { "epoch": 0.45, "grad_norm": 0.42362654209136963, "learning_rate": 0.0005668766825830101, "loss": 1.8757, "step": 13615 }, { "epoch": 0.45, "grad_norm": 0.4577087461948395, "learning_rate": 0.0005668719082915216, "loss": 1.9093, "step": 13616 }, { "epoch": 0.45, "grad_norm": 0.46304869651794434, "learning_rate": 0.0005668671336760898, "loss": 1.8604, "step": 13617 }, { "epoch": 0.45, "grad_norm": 0.42541995644569397, "learning_rate": 0.0005668623587367209, "loss": 1.8882, "step": 13618 }, { "epoch": 0.45, "grad_norm": 0.4327720105648041, "learning_rate": 0.0005668575834734202, "loss": 1.8606, "step": 13619 }, { "epoch": 0.45, "grad_norm": 0.4317842423915863, "learning_rate": 0.0005668528078861938, "loss": 1.9335, "step": 13620 }, { "epoch": 0.45, "grad_norm": 0.7608576416969299, "learning_rate": 0.0005668480319750474, "loss": 1.9196, "step": 13621 }, { "epoch": 0.45, "grad_norm": 0.42955490946769714, "learning_rate": 0.0005668432557399868, "loss": 1.8412, "step": 13622 }, { "epoch": 0.45, "grad_norm": 0.439976304769516, "learning_rate": 0.0005668384791810177, "loss": 1.8959, "step": 13623 }, { "epoch": 0.45, "grad_norm": 0.43704161047935486, "learning_rate": 0.0005668337022981463, "loss": 1.8837, "step": 13624 }, { "epoch": 0.45, "grad_norm": 0.42805394530296326, "learning_rate": 0.0005668289250913779, "loss": 1.8828, "step": 13625 }, { "epoch": 0.45, "grad_norm": 0.4248453974723816, "learning_rate": 0.0005668241475607185, "loss": 1.8396, "step": 13626 }, { "epoch": 0.45, "grad_norm": 0.4409228265285492, "learning_rate": 0.0005668193697061739, "loss": 1.841, "step": 13627 }, { "epoch": 0.45, "grad_norm": 0.4369001090526581, "learning_rate": 0.00056681459152775, "loss": 1.9157, "step": 13628 }, { "epoch": 0.45, "grad_norm": 0.4469546675682068, "learning_rate": 0.0005668098130254523, "loss": 1.8809, "step": 13629 }, { "epoch": 0.45, "grad_norm": 0.42220479249954224, "learning_rate": 0.0005668050341992869, "loss": 1.8872, "step": 13630 }, { "epoch": 0.45, "grad_norm": 0.4243960380554199, "learning_rate": 0.0005668002550492596, "loss": 1.8731, "step": 13631 }, { "epoch": 0.45, "grad_norm": 0.4474593997001648, "learning_rate": 0.000566795475575376, "loss": 1.7719, "step": 13632 }, { "epoch": 0.45, "grad_norm": 0.419863224029541, "learning_rate": 0.000566790695777642, "loss": 1.8877, "step": 13633 }, { "epoch": 0.45, "grad_norm": 0.43035998940467834, "learning_rate": 0.0005667859156560634, "loss": 1.8483, "step": 13634 }, { "epoch": 0.45, "grad_norm": 0.43868038058280945, "learning_rate": 0.000566781135210646, "loss": 1.8744, "step": 13635 }, { "epoch": 0.45, "grad_norm": 0.4315407872200012, "learning_rate": 0.0005667763544413957, "loss": 1.851, "step": 13636 }, { "epoch": 0.45, "grad_norm": 0.4288221001625061, "learning_rate": 0.0005667715733483181, "loss": 1.8698, "step": 13637 }, { "epoch": 0.45, "grad_norm": 0.44589313864707947, "learning_rate": 0.0005667667919314191, "loss": 1.9331, "step": 13638 }, { "epoch": 0.45, "grad_norm": 0.43609511852264404, "learning_rate": 0.0005667620101907047, "loss": 1.8355, "step": 13639 }, { "epoch": 0.45, "grad_norm": 0.4492764174938202, "learning_rate": 0.0005667572281261804, "loss": 1.9014, "step": 13640 }, { "epoch": 0.45, "grad_norm": 0.4277244508266449, "learning_rate": 0.0005667524457378522, "loss": 1.8441, "step": 13641 }, { "epoch": 0.45, "grad_norm": 0.4413965940475464, "learning_rate": 0.0005667476630257257, "loss": 1.8503, "step": 13642 }, { "epoch": 0.45, "grad_norm": 0.4331616759300232, "learning_rate": 0.0005667428799898069, "loss": 1.8441, "step": 13643 }, { "epoch": 0.45, "grad_norm": 0.45084306597709656, "learning_rate": 0.0005667380966301016, "loss": 1.8871, "step": 13644 }, { "epoch": 0.45, "grad_norm": 0.43722033500671387, "learning_rate": 0.0005667333129466155, "loss": 1.8242, "step": 13645 }, { "epoch": 0.45, "grad_norm": 0.44634687900543213, "learning_rate": 0.0005667285289393545, "loss": 1.9176, "step": 13646 }, { "epoch": 0.45, "grad_norm": 0.4312785863876343, "learning_rate": 0.0005667237446083244, "loss": 1.8285, "step": 13647 }, { "epoch": 0.45, "grad_norm": 0.44317951798439026, "learning_rate": 0.000566718959953531, "loss": 1.8877, "step": 13648 }, { "epoch": 0.45, "grad_norm": 0.4364674687385559, "learning_rate": 0.00056671417497498, "loss": 1.9612, "step": 13649 }, { "epoch": 0.45, "grad_norm": 0.42862263321876526, "learning_rate": 0.0005667093896726772, "loss": 1.9254, "step": 13650 }, { "epoch": 0.45, "grad_norm": 0.4254537522792816, "learning_rate": 0.0005667046040466286, "loss": 1.8168, "step": 13651 }, { "epoch": 0.45, "grad_norm": 0.43640273809432983, "learning_rate": 0.0005666998180968399, "loss": 1.7969, "step": 13652 }, { "epoch": 0.45, "grad_norm": 0.6562361717224121, "learning_rate": 0.0005666950318233169, "loss": 1.9377, "step": 13653 }, { "epoch": 0.45, "grad_norm": 0.46454450488090515, "learning_rate": 0.0005666902452260654, "loss": 1.894, "step": 13654 }, { "epoch": 0.45, "grad_norm": 0.4199541509151459, "learning_rate": 0.0005666854583050913, "loss": 1.8334, "step": 13655 }, { "epoch": 0.45, "grad_norm": 0.4323199987411499, "learning_rate": 0.0005666806710604004, "loss": 1.8495, "step": 13656 }, { "epoch": 0.45, "grad_norm": 0.4345650374889374, "learning_rate": 0.0005666758834919983, "loss": 1.9016, "step": 13657 }, { "epoch": 0.45, "grad_norm": 0.4506990909576416, "learning_rate": 0.0005666710955998911, "loss": 1.8751, "step": 13658 }, { "epoch": 0.45, "grad_norm": 0.44188421964645386, "learning_rate": 0.0005666663073840844, "loss": 1.8439, "step": 13659 }, { "epoch": 0.45, "grad_norm": 0.4458025395870209, "learning_rate": 0.0005666615188445841, "loss": 1.9626, "step": 13660 }, { "epoch": 0.45, "grad_norm": 0.43145647644996643, "learning_rate": 0.0005666567299813961, "loss": 1.8602, "step": 13661 }, { "epoch": 0.45, "grad_norm": 0.43205106258392334, "learning_rate": 0.0005666519407945261, "loss": 1.831, "step": 13662 }, { "epoch": 0.45, "grad_norm": 0.43370959162712097, "learning_rate": 0.0005666471512839799, "loss": 1.9042, "step": 13663 }, { "epoch": 0.45, "grad_norm": 0.4435887336730957, "learning_rate": 0.0005666423614497634, "loss": 1.8369, "step": 13664 }, { "epoch": 0.45, "grad_norm": 0.44017988443374634, "learning_rate": 0.0005666375712918822, "loss": 1.8618, "step": 13665 }, { "epoch": 0.45, "grad_norm": 0.440152645111084, "learning_rate": 0.0005666327808103423, "loss": 1.9027, "step": 13666 }, { "epoch": 0.45, "grad_norm": 0.48057618737220764, "learning_rate": 0.0005666279900051497, "loss": 1.8595, "step": 13667 }, { "epoch": 0.45, "grad_norm": 0.40928664803504944, "learning_rate": 0.0005666231988763099, "loss": 1.8384, "step": 13668 }, { "epoch": 0.45, "grad_norm": 0.44794973731040955, "learning_rate": 0.0005666184074238288, "loss": 1.8579, "step": 13669 }, { "epoch": 0.45, "grad_norm": 0.47384554147720337, "learning_rate": 0.0005666136156477122, "loss": 1.9277, "step": 13670 }, { "epoch": 0.45, "grad_norm": 0.43385154008865356, "learning_rate": 0.000566608823547966, "loss": 1.8731, "step": 13671 }, { "epoch": 0.45, "grad_norm": 0.4338647723197937, "learning_rate": 0.0005666040311245959, "loss": 1.877, "step": 13672 }, { "epoch": 0.45, "grad_norm": 0.4329063594341278, "learning_rate": 0.0005665992383776079, "loss": 1.8928, "step": 13673 }, { "epoch": 0.45, "grad_norm": 0.4509621560573578, "learning_rate": 0.0005665944453070076, "loss": 1.8948, "step": 13674 }, { "epoch": 0.45, "grad_norm": 0.466065913438797, "learning_rate": 0.000566589651912801, "loss": 1.8125, "step": 13675 }, { "epoch": 0.46, "grad_norm": 0.4296039044857025, "learning_rate": 0.0005665848581949938, "loss": 1.8429, "step": 13676 }, { "epoch": 0.46, "grad_norm": 0.43240296840667725, "learning_rate": 0.000566580064153592, "loss": 1.7759, "step": 13677 }, { "epoch": 0.46, "grad_norm": 0.45268070697784424, "learning_rate": 0.000566575269788601, "loss": 1.877, "step": 13678 }, { "epoch": 0.46, "grad_norm": 0.4435883164405823, "learning_rate": 0.0005665704751000271, "loss": 1.9678, "step": 13679 }, { "epoch": 0.46, "grad_norm": 0.4315870404243469, "learning_rate": 0.000566565680087876, "loss": 1.8555, "step": 13680 }, { "epoch": 0.46, "grad_norm": 0.44843369722366333, "learning_rate": 0.0005665608847521533, "loss": 1.8555, "step": 13681 }, { "epoch": 0.46, "grad_norm": 0.4298250079154968, "learning_rate": 0.000566556089092865, "loss": 1.8102, "step": 13682 }, { "epoch": 0.46, "grad_norm": 0.45077094435691833, "learning_rate": 0.0005665512931100167, "loss": 1.8466, "step": 13683 }, { "epoch": 0.46, "grad_norm": 0.427055299282074, "learning_rate": 0.0005665464968036148, "loss": 1.8559, "step": 13684 }, { "epoch": 0.46, "grad_norm": 0.4430537819862366, "learning_rate": 0.0005665417001736644, "loss": 1.9529, "step": 13685 }, { "epoch": 0.46, "grad_norm": 0.42842188477516174, "learning_rate": 0.0005665369032201717, "loss": 1.8666, "step": 13686 }, { "epoch": 0.46, "grad_norm": 0.4323198199272156, "learning_rate": 0.0005665321059431425, "loss": 1.8644, "step": 13687 }, { "epoch": 0.46, "grad_norm": 0.42554786801338196, "learning_rate": 0.0005665273083425826, "loss": 1.8309, "step": 13688 }, { "epoch": 0.46, "grad_norm": 0.43292152881622314, "learning_rate": 0.0005665225104184979, "loss": 1.8431, "step": 13689 }, { "epoch": 0.46, "grad_norm": 0.4435020387172699, "learning_rate": 0.0005665177121708942, "loss": 1.9352, "step": 13690 }, { "epoch": 0.46, "grad_norm": 0.4237324595451355, "learning_rate": 0.000566512913599777, "loss": 1.8582, "step": 13691 }, { "epoch": 0.46, "grad_norm": 0.43507325649261475, "learning_rate": 0.0005665081147051526, "loss": 1.8015, "step": 13692 }, { "epoch": 0.46, "grad_norm": 0.4254830479621887, "learning_rate": 0.0005665033154870266, "loss": 1.8628, "step": 13693 }, { "epoch": 0.46, "grad_norm": 0.4452296197414398, "learning_rate": 0.0005664985159454047, "loss": 1.9148, "step": 13694 }, { "epoch": 0.46, "grad_norm": 0.4264220893383026, "learning_rate": 0.000566493716080293, "loss": 1.9494, "step": 13695 }, { "epoch": 0.46, "grad_norm": 0.41920238733291626, "learning_rate": 0.0005664889158916971, "loss": 1.818, "step": 13696 }, { "epoch": 0.46, "grad_norm": 0.426101416349411, "learning_rate": 0.000566484115379623, "loss": 1.853, "step": 13697 }, { "epoch": 0.46, "grad_norm": 0.43765440583229065, "learning_rate": 0.0005664793145440764, "loss": 1.9046, "step": 13698 }, { "epoch": 0.46, "grad_norm": 0.417710542678833, "learning_rate": 0.0005664745133850633, "loss": 1.8626, "step": 13699 }, { "epoch": 0.46, "grad_norm": 0.44055959582328796, "learning_rate": 0.0005664697119025893, "loss": 1.8497, "step": 13700 }, { "epoch": 0.46, "grad_norm": 0.42369669675827026, "learning_rate": 0.0005664649100966604, "loss": 1.8886, "step": 13701 }, { "epoch": 0.46, "grad_norm": 0.4329646825790405, "learning_rate": 0.0005664601079672822, "loss": 1.8463, "step": 13702 }, { "epoch": 0.46, "grad_norm": 0.44531235098838806, "learning_rate": 0.0005664553055144608, "loss": 1.899, "step": 13703 }, { "epoch": 0.46, "grad_norm": 0.4276288151741028, "learning_rate": 0.000566450502738202, "loss": 1.8581, "step": 13704 }, { "epoch": 0.46, "grad_norm": 0.4275139570236206, "learning_rate": 0.0005664456996385115, "loss": 1.7927, "step": 13705 }, { "epoch": 0.46, "grad_norm": 0.4301585853099823, "learning_rate": 0.0005664408962153951, "loss": 1.8244, "step": 13706 }, { "epoch": 0.46, "grad_norm": 0.46105197072029114, "learning_rate": 0.0005664360924688588, "loss": 1.8006, "step": 13707 }, { "epoch": 0.46, "grad_norm": 0.5887088179588318, "learning_rate": 0.0005664312883989082, "loss": 1.9457, "step": 13708 }, { "epoch": 0.46, "grad_norm": 0.4521386921405792, "learning_rate": 0.0005664264840055494, "loss": 1.9188, "step": 13709 }, { "epoch": 0.46, "grad_norm": 0.4375631511211395, "learning_rate": 0.000566421679288788, "loss": 1.8849, "step": 13710 }, { "epoch": 0.46, "grad_norm": 0.44289785623550415, "learning_rate": 0.00056641687424863, "loss": 1.8803, "step": 13711 }, { "epoch": 0.46, "grad_norm": 0.4382629692554474, "learning_rate": 0.0005664120688850812, "loss": 1.8371, "step": 13712 }, { "epoch": 0.46, "grad_norm": 0.44123029708862305, "learning_rate": 0.0005664072631981474, "loss": 1.7637, "step": 13713 }, { "epoch": 0.46, "grad_norm": 0.44606953859329224, "learning_rate": 0.0005664024571878344, "loss": 1.9275, "step": 13714 }, { "epoch": 0.46, "grad_norm": 0.42789125442504883, "learning_rate": 0.0005663976508541481, "loss": 1.8987, "step": 13715 }, { "epoch": 0.46, "grad_norm": 0.44634050130844116, "learning_rate": 0.0005663928441970943, "loss": 1.8072, "step": 13716 }, { "epoch": 0.46, "grad_norm": 0.4503459930419922, "learning_rate": 0.0005663880372166788, "loss": 1.9615, "step": 13717 }, { "epoch": 0.46, "grad_norm": 0.42891207337379456, "learning_rate": 0.0005663832299129075, "loss": 1.8674, "step": 13718 }, { "epoch": 0.46, "grad_norm": 0.42411017417907715, "learning_rate": 0.0005663784222857863, "loss": 1.8642, "step": 13719 }, { "epoch": 0.46, "grad_norm": 0.42132532596588135, "learning_rate": 0.0005663736143353208, "loss": 1.829, "step": 13720 }, { "epoch": 0.46, "grad_norm": 0.4349176585674286, "learning_rate": 0.0005663688060615169, "loss": 1.7954, "step": 13721 }, { "epoch": 0.46, "grad_norm": 0.4489045739173889, "learning_rate": 0.0005663639974643808, "loss": 1.8066, "step": 13722 }, { "epoch": 0.46, "grad_norm": 0.4385152757167816, "learning_rate": 0.0005663591885439179, "loss": 1.8981, "step": 13723 }, { "epoch": 0.46, "grad_norm": 0.439984530210495, "learning_rate": 0.0005663543793001341, "loss": 1.8776, "step": 13724 }, { "epoch": 0.46, "grad_norm": 0.4401630461215973, "learning_rate": 0.0005663495697330354, "loss": 1.9032, "step": 13725 }, { "epoch": 0.46, "grad_norm": 0.44430428743362427, "learning_rate": 0.0005663447598426276, "loss": 1.78, "step": 13726 }, { "epoch": 0.46, "grad_norm": 0.438594251871109, "learning_rate": 0.0005663399496289165, "loss": 1.8681, "step": 13727 }, { "epoch": 0.46, "grad_norm": 0.45428961515426636, "learning_rate": 0.000566335139091908, "loss": 1.9473, "step": 13728 }, { "epoch": 0.46, "grad_norm": 0.44542863965034485, "learning_rate": 0.0005663303282316078, "loss": 1.9209, "step": 13729 }, { "epoch": 0.46, "grad_norm": 0.4311378598213196, "learning_rate": 0.0005663255170480218, "loss": 1.8789, "step": 13730 }, { "epoch": 0.46, "grad_norm": 0.4431859850883484, "learning_rate": 0.0005663207055411559, "loss": 1.9292, "step": 13731 }, { "epoch": 0.46, "grad_norm": 0.4267284870147705, "learning_rate": 0.0005663158937110161, "loss": 1.7969, "step": 13732 }, { "epoch": 0.46, "grad_norm": 0.4437761604785919, "learning_rate": 0.0005663110815576078, "loss": 1.9171, "step": 13733 }, { "epoch": 0.46, "grad_norm": 0.431530237197876, "learning_rate": 0.0005663062690809371, "loss": 1.9295, "step": 13734 }, { "epoch": 0.46, "grad_norm": 0.432645320892334, "learning_rate": 0.0005663014562810099, "loss": 1.8026, "step": 13735 }, { "epoch": 0.46, "grad_norm": 0.4446331858634949, "learning_rate": 0.000566296643157832, "loss": 1.8521, "step": 13736 }, { "epoch": 0.46, "grad_norm": 0.4338097870349884, "learning_rate": 0.0005662918297114091, "loss": 1.8684, "step": 13737 }, { "epoch": 0.46, "grad_norm": 0.4287196695804596, "learning_rate": 0.0005662870159417471, "loss": 1.8611, "step": 13738 }, { "epoch": 0.46, "grad_norm": 0.44821423292160034, "learning_rate": 0.0005662822018488521, "loss": 1.8381, "step": 13739 }, { "epoch": 0.46, "grad_norm": 0.4241686165332794, "learning_rate": 0.0005662773874327297, "loss": 1.7713, "step": 13740 }, { "epoch": 0.46, "grad_norm": 0.4404289722442627, "learning_rate": 0.0005662725726933857, "loss": 1.9137, "step": 13741 }, { "epoch": 0.46, "grad_norm": 0.42195001244544983, "learning_rate": 0.0005662677576308261, "loss": 1.8658, "step": 13742 }, { "epoch": 0.46, "grad_norm": 0.4359019994735718, "learning_rate": 0.0005662629422450566, "loss": 1.9009, "step": 13743 }, { "epoch": 0.46, "grad_norm": 0.43664461374282837, "learning_rate": 0.0005662581265360831, "loss": 1.8513, "step": 13744 }, { "epoch": 0.46, "grad_norm": 0.42159464955329895, "learning_rate": 0.0005662533105039115, "loss": 1.9209, "step": 13745 }, { "epoch": 0.46, "grad_norm": 0.4330490827560425, "learning_rate": 0.0005662484941485477, "loss": 1.8874, "step": 13746 }, { "epoch": 0.46, "grad_norm": 0.4333248436450958, "learning_rate": 0.0005662436774699974, "loss": 1.9646, "step": 13747 }, { "epoch": 0.46, "grad_norm": 0.4284161329269409, "learning_rate": 0.0005662388604682666, "loss": 1.8391, "step": 13748 }, { "epoch": 0.46, "grad_norm": 0.4345797300338745, "learning_rate": 0.0005662340431433609, "loss": 1.8411, "step": 13749 }, { "epoch": 0.46, "grad_norm": 0.4378755986690521, "learning_rate": 0.0005662292254952863, "loss": 1.8715, "step": 13750 }, { "epoch": 0.46, "grad_norm": 0.42174193263053894, "learning_rate": 0.0005662244075240487, "loss": 1.8969, "step": 13751 }, { "epoch": 0.46, "grad_norm": 0.44136425852775574, "learning_rate": 0.000566219589229654, "loss": 1.8616, "step": 13752 }, { "epoch": 0.46, "grad_norm": 0.44128063321113586, "learning_rate": 0.0005662147706121078, "loss": 1.901, "step": 13753 }, { "epoch": 0.46, "grad_norm": 0.4536518156528473, "learning_rate": 0.0005662099516714161, "loss": 1.8984, "step": 13754 }, { "epoch": 0.46, "grad_norm": 0.5613062977790833, "learning_rate": 0.0005662051324075848, "loss": 1.8894, "step": 13755 }, { "epoch": 0.46, "grad_norm": 0.42384403944015503, "learning_rate": 0.0005662003128206197, "loss": 2.0045, "step": 13756 }, { "epoch": 0.46, "grad_norm": 0.43608152866363525, "learning_rate": 0.0005661954929105265, "loss": 1.9168, "step": 13757 }, { "epoch": 0.46, "grad_norm": 0.447510302066803, "learning_rate": 0.0005661906726773114, "loss": 2.0051, "step": 13758 }, { "epoch": 0.46, "grad_norm": 0.4676464796066284, "learning_rate": 0.0005661858521209799, "loss": 1.846, "step": 13759 }, { "epoch": 0.46, "grad_norm": 0.4345147907733917, "learning_rate": 0.0005661810312415381, "loss": 1.8845, "step": 13760 }, { "epoch": 0.46, "grad_norm": 0.44949591159820557, "learning_rate": 0.0005661762100389917, "loss": 1.9125, "step": 13761 }, { "epoch": 0.46, "grad_norm": 0.4540688097476959, "learning_rate": 0.0005661713885133466, "loss": 1.8908, "step": 13762 }, { "epoch": 0.46, "grad_norm": 0.4323113262653351, "learning_rate": 0.0005661665666646086, "loss": 1.8395, "step": 13763 }, { "epoch": 0.46, "grad_norm": 0.45401859283447266, "learning_rate": 0.0005661617444927836, "loss": 1.8606, "step": 13764 }, { "epoch": 0.46, "grad_norm": 0.48116257786750793, "learning_rate": 0.0005661569219978775, "loss": 1.9198, "step": 13765 }, { "epoch": 0.46, "grad_norm": 0.44348016381263733, "learning_rate": 0.0005661520991798962, "loss": 1.8761, "step": 13766 }, { "epoch": 0.46, "grad_norm": 0.43088725209236145, "learning_rate": 0.0005661472760388453, "loss": 1.8527, "step": 13767 }, { "epoch": 0.46, "grad_norm": 0.4252980053424835, "learning_rate": 0.000566142452574731, "loss": 1.8231, "step": 13768 }, { "epoch": 0.46, "grad_norm": 0.4486372768878937, "learning_rate": 0.0005661376287875589, "loss": 1.8514, "step": 13769 }, { "epoch": 0.46, "grad_norm": 0.43845483660697937, "learning_rate": 0.000566132804677335, "loss": 1.8812, "step": 13770 }, { "epoch": 0.46, "grad_norm": 0.43918389081954956, "learning_rate": 0.000566127980244065, "loss": 1.9082, "step": 13771 }, { "epoch": 0.46, "grad_norm": 0.44436171650886536, "learning_rate": 0.0005661231554877548, "loss": 1.8844, "step": 13772 }, { "epoch": 0.46, "grad_norm": 0.4551357924938202, "learning_rate": 0.0005661183304084104, "loss": 1.86, "step": 13773 }, { "epoch": 0.46, "grad_norm": 0.4444476068019867, "learning_rate": 0.0005661135050060375, "loss": 1.844, "step": 13774 }, { "epoch": 0.46, "grad_norm": 0.4462061822414398, "learning_rate": 0.000566108679280642, "loss": 1.8505, "step": 13775 }, { "epoch": 0.46, "grad_norm": 0.4729178845882416, "learning_rate": 0.0005661038532322298, "loss": 1.8997, "step": 13776 }, { "epoch": 0.46, "grad_norm": 0.43411725759506226, "learning_rate": 0.0005660990268608067, "loss": 1.8642, "step": 13777 }, { "epoch": 0.46, "grad_norm": 0.4579581320285797, "learning_rate": 0.0005660942001663786, "loss": 1.926, "step": 13778 }, { "epoch": 0.46, "grad_norm": 0.444441556930542, "learning_rate": 0.0005660893731489514, "loss": 1.8388, "step": 13779 }, { "epoch": 0.46, "grad_norm": 0.4420185983181, "learning_rate": 0.0005660845458085309, "loss": 1.8662, "step": 13780 }, { "epoch": 0.46, "grad_norm": 0.4266103506088257, "learning_rate": 0.0005660797181451229, "loss": 1.8155, "step": 13781 }, { "epoch": 0.46, "grad_norm": 0.438424676656723, "learning_rate": 0.0005660748901587335, "loss": 1.8502, "step": 13782 }, { "epoch": 0.46, "grad_norm": 0.44905680418014526, "learning_rate": 0.0005660700618493682, "loss": 1.9852, "step": 13783 }, { "epoch": 0.46, "grad_norm": 0.42623111605644226, "learning_rate": 0.0005660652332170332, "loss": 1.8271, "step": 13784 }, { "epoch": 0.46, "grad_norm": 0.4835164248943329, "learning_rate": 0.0005660604042617341, "loss": 1.9482, "step": 13785 }, { "epoch": 0.46, "grad_norm": 0.4405401945114136, "learning_rate": 0.0005660555749834768, "loss": 1.8396, "step": 13786 }, { "epoch": 0.46, "grad_norm": 0.4484750032424927, "learning_rate": 0.0005660507453822673, "loss": 1.8697, "step": 13787 }, { "epoch": 0.46, "grad_norm": 0.4337281584739685, "learning_rate": 0.0005660459154581115, "loss": 1.8583, "step": 13788 }, { "epoch": 0.46, "grad_norm": 0.44458556175231934, "learning_rate": 0.000566041085211015, "loss": 1.8807, "step": 13789 }, { "epoch": 0.46, "grad_norm": 0.4463587999343872, "learning_rate": 0.000566036254640984, "loss": 1.8758, "step": 13790 }, { "epoch": 0.46, "grad_norm": 0.43478015065193176, "learning_rate": 0.000566031423748024, "loss": 1.8612, "step": 13791 }, { "epoch": 0.46, "grad_norm": 0.4268905520439148, "learning_rate": 0.0005660265925321412, "loss": 1.8988, "step": 13792 }, { "epoch": 0.46, "grad_norm": 0.45767876505851746, "learning_rate": 0.0005660217609933411, "loss": 1.9105, "step": 13793 }, { "epoch": 0.46, "grad_norm": 0.4346228837966919, "learning_rate": 0.0005660169291316301, "loss": 1.9023, "step": 13794 }, { "epoch": 0.46, "grad_norm": 0.4258778989315033, "learning_rate": 0.0005660120969470135, "loss": 1.8644, "step": 13795 }, { "epoch": 0.46, "grad_norm": 0.4265362024307251, "learning_rate": 0.0005660072644394975, "loss": 1.773, "step": 13796 }, { "epoch": 0.46, "grad_norm": 0.42601478099823, "learning_rate": 0.0005660024316090878, "loss": 1.8878, "step": 13797 }, { "epoch": 0.46, "grad_norm": 0.42458754777908325, "learning_rate": 0.0005659975984557905, "loss": 1.8904, "step": 13798 }, { "epoch": 0.46, "grad_norm": 0.4299163520336151, "learning_rate": 0.0005659927649796112, "loss": 1.866, "step": 13799 }, { "epoch": 0.46, "grad_norm": 0.43863606452941895, "learning_rate": 0.0005659879311805558, "loss": 1.9484, "step": 13800 }, { "epoch": 0.46, "grad_norm": 0.4225108027458191, "learning_rate": 0.0005659830970586303, "loss": 1.8116, "step": 13801 }, { "epoch": 0.46, "grad_norm": 0.4362483024597168, "learning_rate": 0.0005659782626138406, "loss": 1.8878, "step": 13802 }, { "epoch": 0.46, "grad_norm": 0.43848174810409546, "learning_rate": 0.0005659734278461924, "loss": 1.7915, "step": 13803 }, { "epoch": 0.46, "grad_norm": 0.42539310455322266, "learning_rate": 0.0005659685927556917, "loss": 1.8844, "step": 13804 }, { "epoch": 0.46, "grad_norm": 0.4254956543445587, "learning_rate": 0.0005659637573423443, "loss": 1.8446, "step": 13805 }, { "epoch": 0.46, "grad_norm": 0.4616682231426239, "learning_rate": 0.0005659589216061561, "loss": 1.8351, "step": 13806 }, { "epoch": 0.46, "grad_norm": 0.4183529317378998, "learning_rate": 0.0005659540855471328, "loss": 1.8872, "step": 13807 }, { "epoch": 0.46, "grad_norm": 0.44767138361930847, "learning_rate": 0.0005659492491652806, "loss": 1.896, "step": 13808 }, { "epoch": 0.46, "grad_norm": 0.419623464345932, "learning_rate": 0.0005659444124606051, "loss": 1.8806, "step": 13809 }, { "epoch": 0.46, "grad_norm": 0.43027517199516296, "learning_rate": 0.0005659395754331123, "loss": 1.8765, "step": 13810 }, { "epoch": 0.46, "grad_norm": 0.43362173438072205, "learning_rate": 0.0005659347380828081, "loss": 1.8083, "step": 13811 }, { "epoch": 0.46, "grad_norm": 0.45492175221443176, "learning_rate": 0.0005659299004096983, "loss": 1.8239, "step": 13812 }, { "epoch": 0.46, "grad_norm": 0.46598759293556213, "learning_rate": 0.0005659250624137887, "loss": 1.8394, "step": 13813 }, { "epoch": 0.46, "grad_norm": 0.45045191049575806, "learning_rate": 0.0005659202240950853, "loss": 1.8877, "step": 13814 }, { "epoch": 0.46, "grad_norm": 0.431962788105011, "learning_rate": 0.0005659153854535939, "loss": 1.8242, "step": 13815 }, { "epoch": 0.46, "grad_norm": 0.458603173494339, "learning_rate": 0.0005659105464893205, "loss": 1.8532, "step": 13816 }, { "epoch": 0.46, "grad_norm": 0.4580552279949188, "learning_rate": 0.0005659057072022707, "loss": 1.8875, "step": 13817 }, { "epoch": 0.46, "grad_norm": 0.4458078444004059, "learning_rate": 0.0005659008675924508, "loss": 1.9199, "step": 13818 }, { "epoch": 0.46, "grad_norm": 0.4491817057132721, "learning_rate": 0.0005658960276598662, "loss": 1.8813, "step": 13819 }, { "epoch": 0.46, "grad_norm": 0.43010324239730835, "learning_rate": 0.0005658911874045231, "loss": 1.8086, "step": 13820 }, { "epoch": 0.46, "grad_norm": 0.46506279706954956, "learning_rate": 0.0005658863468264273, "loss": 1.8663, "step": 13821 }, { "epoch": 0.46, "grad_norm": 0.43298786878585815, "learning_rate": 0.0005658815059255846, "loss": 1.8532, "step": 13822 }, { "epoch": 0.46, "grad_norm": 0.41590699553489685, "learning_rate": 0.000565876664702001, "loss": 1.7175, "step": 13823 }, { "epoch": 0.46, "grad_norm": 0.5084617733955383, "learning_rate": 0.0005658718231556822, "loss": 1.8683, "step": 13824 }, { "epoch": 0.46, "grad_norm": 0.4395957291126251, "learning_rate": 0.0005658669812866342, "loss": 1.7756, "step": 13825 }, { "epoch": 0.46, "grad_norm": 0.4236114025115967, "learning_rate": 0.000565862139094863, "loss": 1.9753, "step": 13826 }, { "epoch": 0.46, "grad_norm": 0.44357427954673767, "learning_rate": 0.0005658572965803742, "loss": 1.7746, "step": 13827 }, { "epoch": 0.46, "grad_norm": 0.4384070932865143, "learning_rate": 0.0005658524537431737, "loss": 1.907, "step": 13828 }, { "epoch": 0.46, "grad_norm": 0.4362400472164154, "learning_rate": 0.0005658476105832677, "loss": 1.794, "step": 13829 }, { "epoch": 0.46, "grad_norm": 0.4300287067890167, "learning_rate": 0.0005658427671006618, "loss": 1.8471, "step": 13830 }, { "epoch": 0.46, "grad_norm": 0.4354305863380432, "learning_rate": 0.000565837923295362, "loss": 1.8594, "step": 13831 }, { "epoch": 0.46, "grad_norm": 0.4287560284137726, "learning_rate": 0.000565833079167374, "loss": 1.8945, "step": 13832 }, { "epoch": 0.46, "grad_norm": 0.45667245984077454, "learning_rate": 0.000565828234716704, "loss": 1.9117, "step": 13833 }, { "epoch": 0.46, "grad_norm": 0.46661773324012756, "learning_rate": 0.0005658233899433576, "loss": 1.8914, "step": 13834 }, { "epoch": 0.46, "grad_norm": 0.44241970777511597, "learning_rate": 0.0005658185448473407, "loss": 1.8779, "step": 13835 }, { "epoch": 0.46, "grad_norm": 0.4346640110015869, "learning_rate": 0.0005658136994286593, "loss": 1.8237, "step": 13836 }, { "epoch": 0.46, "grad_norm": 0.43255531787872314, "learning_rate": 0.0005658088536873193, "loss": 1.8773, "step": 13837 }, { "epoch": 0.46, "grad_norm": 0.43124768137931824, "learning_rate": 0.0005658040076233265, "loss": 1.9162, "step": 13838 }, { "epoch": 0.46, "grad_norm": 0.4548931419849396, "learning_rate": 0.0005657991612366867, "loss": 1.9062, "step": 13839 }, { "epoch": 0.46, "grad_norm": 0.4424282908439636, "learning_rate": 0.000565794314527406, "loss": 1.8433, "step": 13840 }, { "epoch": 0.46, "grad_norm": 0.44640347361564636, "learning_rate": 0.0005657894674954902, "loss": 1.9488, "step": 13841 }, { "epoch": 0.46, "grad_norm": 0.4493321478366852, "learning_rate": 0.000565784620140945, "loss": 1.7944, "step": 13842 }, { "epoch": 0.46, "grad_norm": 0.45072901248931885, "learning_rate": 0.0005657797724637764, "loss": 1.9406, "step": 13843 }, { "epoch": 0.46, "grad_norm": 0.44692865014076233, "learning_rate": 0.0005657749244639904, "loss": 1.9726, "step": 13844 }, { "epoch": 0.46, "grad_norm": 0.4363904893398285, "learning_rate": 0.0005657700761415929, "loss": 1.8779, "step": 13845 }, { "epoch": 0.46, "grad_norm": 0.4334798753261566, "learning_rate": 0.0005657652274965896, "loss": 1.8547, "step": 13846 }, { "epoch": 0.46, "grad_norm": 0.4464852213859558, "learning_rate": 0.0005657603785289864, "loss": 1.8565, "step": 13847 }, { "epoch": 0.46, "grad_norm": 0.4425897002220154, "learning_rate": 0.0005657555292387893, "loss": 1.8235, "step": 13848 }, { "epoch": 0.46, "grad_norm": 0.4470697045326233, "learning_rate": 0.0005657506796260042, "loss": 1.9249, "step": 13849 }, { "epoch": 0.46, "grad_norm": 0.4359552562236786, "learning_rate": 0.0005657458296906369, "loss": 1.8644, "step": 13850 }, { "epoch": 0.46, "grad_norm": 0.4371429681777954, "learning_rate": 0.0005657409794326934, "loss": 1.9957, "step": 13851 }, { "epoch": 0.46, "grad_norm": 0.4391307532787323, "learning_rate": 0.0005657361288521793, "loss": 1.8066, "step": 13852 }, { "epoch": 0.46, "grad_norm": 0.43589603900909424, "learning_rate": 0.0005657312779491009, "loss": 1.7703, "step": 13853 }, { "epoch": 0.46, "grad_norm": 0.45779871940612793, "learning_rate": 0.0005657264267234638, "loss": 1.8656, "step": 13854 }, { "epoch": 0.46, "grad_norm": 0.44303977489471436, "learning_rate": 0.000565721575175274, "loss": 1.8567, "step": 13855 }, { "epoch": 0.46, "grad_norm": 0.4326765537261963, "learning_rate": 0.0005657167233045373, "loss": 1.8602, "step": 13856 }, { "epoch": 0.46, "grad_norm": 0.4288615584373474, "learning_rate": 0.0005657118711112597, "loss": 1.88, "step": 13857 }, { "epoch": 0.46, "grad_norm": 0.4406494200229645, "learning_rate": 0.0005657070185954469, "loss": 1.8612, "step": 13858 }, { "epoch": 0.46, "grad_norm": 0.4390321969985962, "learning_rate": 0.0005657021657571052, "loss": 1.8729, "step": 13859 }, { "epoch": 0.46, "grad_norm": 0.43262749910354614, "learning_rate": 0.0005656973125962401, "loss": 1.8404, "step": 13860 }, { "epoch": 0.46, "grad_norm": 0.44195556640625, "learning_rate": 0.0005656924591128576, "loss": 1.9029, "step": 13861 }, { "epoch": 0.46, "grad_norm": 0.4377523362636566, "learning_rate": 0.0005656876053069636, "loss": 1.9617, "step": 13862 }, { "epoch": 0.46, "grad_norm": 0.4256725311279297, "learning_rate": 0.0005656827511785639, "loss": 1.8313, "step": 13863 }, { "epoch": 0.46, "grad_norm": 0.48065274953842163, "learning_rate": 0.0005656778967276646, "loss": 1.9361, "step": 13864 }, { "epoch": 0.46, "grad_norm": 0.43092209100723267, "learning_rate": 0.0005656730419542714, "loss": 1.8972, "step": 13865 }, { "epoch": 0.46, "grad_norm": 0.4290481209754944, "learning_rate": 0.0005656681868583903, "loss": 1.8034, "step": 13866 }, { "epoch": 0.46, "grad_norm": 0.4222116768360138, "learning_rate": 0.0005656633314400272, "loss": 1.794, "step": 13867 }, { "epoch": 0.46, "grad_norm": 0.42243239283561707, "learning_rate": 0.000565658475699188, "loss": 1.7881, "step": 13868 }, { "epoch": 0.46, "grad_norm": 0.4449974596500397, "learning_rate": 0.0005656536196358785, "loss": 1.845, "step": 13869 }, { "epoch": 0.46, "grad_norm": 0.4264356791973114, "learning_rate": 0.0005656487632501047, "loss": 1.8455, "step": 13870 }, { "epoch": 0.46, "grad_norm": 0.42177602648735046, "learning_rate": 0.0005656439065418723, "loss": 1.9013, "step": 13871 }, { "epoch": 0.46, "grad_norm": 0.4240642786026001, "learning_rate": 0.0005656390495111874, "loss": 1.8916, "step": 13872 }, { "epoch": 0.46, "grad_norm": 0.4400467276573181, "learning_rate": 0.0005656341921580559, "loss": 1.8924, "step": 13873 }, { "epoch": 0.46, "grad_norm": 0.44341233372688293, "learning_rate": 0.0005656293344824836, "loss": 1.9021, "step": 13874 }, { "epoch": 0.46, "grad_norm": 0.44246697425842285, "learning_rate": 0.0005656244764844765, "loss": 1.8168, "step": 13875 }, { "epoch": 0.46, "grad_norm": 0.4230198264122009, "learning_rate": 0.0005656196181640402, "loss": 1.8249, "step": 13876 }, { "epoch": 0.46, "grad_norm": 0.42596501111984253, "learning_rate": 0.000565614759521181, "loss": 1.8614, "step": 13877 }, { "epoch": 0.46, "grad_norm": 0.4507182836532593, "learning_rate": 0.0005656099005559046, "loss": 1.8748, "step": 13878 }, { "epoch": 0.46, "grad_norm": 0.44717416167259216, "learning_rate": 0.0005656050412682169, "loss": 1.8912, "step": 13879 }, { "epoch": 0.46, "grad_norm": 0.4290752112865448, "learning_rate": 0.0005656001816581237, "loss": 1.918, "step": 13880 }, { "epoch": 0.46, "grad_norm": 0.42583462595939636, "learning_rate": 0.0005655953217256312, "loss": 1.8296, "step": 13881 }, { "epoch": 0.46, "grad_norm": 0.43845105171203613, "learning_rate": 0.000565590461470745, "loss": 1.8987, "step": 13882 }, { "epoch": 0.46, "grad_norm": 0.44530293345451355, "learning_rate": 0.0005655856008934711, "loss": 1.9848, "step": 13883 }, { "epoch": 0.46, "grad_norm": 0.4229009747505188, "learning_rate": 0.0005655807399938156, "loss": 1.7956, "step": 13884 }, { "epoch": 0.46, "grad_norm": 0.4413776993751526, "learning_rate": 0.0005655758787717841, "loss": 1.9758, "step": 13885 }, { "epoch": 0.46, "grad_norm": 0.42463093996047974, "learning_rate": 0.0005655710172273826, "loss": 1.841, "step": 13886 }, { "epoch": 0.46, "grad_norm": 0.45298030972480774, "learning_rate": 0.0005655661553606169, "loss": 1.8524, "step": 13887 }, { "epoch": 0.46, "grad_norm": 0.42815452814102173, "learning_rate": 0.0005655612931714932, "loss": 1.882, "step": 13888 }, { "epoch": 0.46, "grad_norm": 0.436344176530838, "learning_rate": 0.0005655564306600171, "loss": 1.7859, "step": 13889 }, { "epoch": 0.46, "grad_norm": 0.43701523542404175, "learning_rate": 0.0005655515678261947, "loss": 1.8522, "step": 13890 }, { "epoch": 0.46, "grad_norm": 0.4440441131591797, "learning_rate": 0.0005655467046700317, "loss": 1.8676, "step": 13891 }, { "epoch": 0.46, "grad_norm": 0.4355058968067169, "learning_rate": 0.0005655418411915343, "loss": 1.8397, "step": 13892 }, { "epoch": 0.46, "grad_norm": 0.4364931583404541, "learning_rate": 0.0005655369773907081, "loss": 1.863, "step": 13893 }, { "epoch": 0.46, "grad_norm": 0.436187744140625, "learning_rate": 0.0005655321132675592, "loss": 1.814, "step": 13894 }, { "epoch": 0.46, "grad_norm": 0.4452638626098633, "learning_rate": 0.0005655272488220934, "loss": 1.8868, "step": 13895 }, { "epoch": 0.46, "grad_norm": 0.4493991434574127, "learning_rate": 0.0005655223840543166, "loss": 1.9172, "step": 13896 }, { "epoch": 0.46, "grad_norm": 0.4393939971923828, "learning_rate": 0.000565517518964235, "loss": 1.8311, "step": 13897 }, { "epoch": 0.46, "grad_norm": 0.43706151843070984, "learning_rate": 0.0005655126535518539, "loss": 1.8734, "step": 13898 }, { "epoch": 0.46, "grad_norm": 0.4416479766368866, "learning_rate": 0.0005655077878171798, "loss": 1.8133, "step": 13899 }, { "epoch": 0.46, "grad_norm": 0.4358719289302826, "learning_rate": 0.0005655029217602182, "loss": 1.8364, "step": 13900 }, { "epoch": 0.46, "grad_norm": 0.4417820870876312, "learning_rate": 0.0005654980553809753, "loss": 1.9284, "step": 13901 }, { "epoch": 0.46, "grad_norm": 0.42957139015197754, "learning_rate": 0.0005654931886794569, "loss": 1.8781, "step": 13902 }, { "epoch": 0.46, "grad_norm": 0.45525768399238586, "learning_rate": 0.0005654883216556688, "loss": 1.8518, "step": 13903 }, { "epoch": 0.46, "grad_norm": 0.4479411542415619, "learning_rate": 0.000565483454309617, "loss": 1.9469, "step": 13904 }, { "epoch": 0.46, "grad_norm": 0.43846818804740906, "learning_rate": 0.0005654785866413075, "loss": 1.8838, "step": 13905 }, { "epoch": 0.46, "grad_norm": 0.44085511565208435, "learning_rate": 0.000565473718650746, "loss": 1.8975, "step": 13906 }, { "epoch": 0.46, "grad_norm": 0.4327712953090668, "learning_rate": 0.0005654688503379386, "loss": 1.8605, "step": 13907 }, { "epoch": 0.46, "grad_norm": 0.43781977891921997, "learning_rate": 0.0005654639817028911, "loss": 1.8034, "step": 13908 }, { "epoch": 0.46, "grad_norm": 0.4305483400821686, "learning_rate": 0.0005654591127456094, "loss": 1.8636, "step": 13909 }, { "epoch": 0.46, "grad_norm": 0.4529828727245331, "learning_rate": 0.0005654542434660995, "loss": 1.8306, "step": 13910 }, { "epoch": 0.46, "grad_norm": 0.4487036466598511, "learning_rate": 0.0005654493738643672, "loss": 1.8902, "step": 13911 }, { "epoch": 0.46, "grad_norm": 0.4387046992778778, "learning_rate": 0.0005654445039404185, "loss": 1.8675, "step": 13912 }, { "epoch": 0.46, "grad_norm": 0.4519699215888977, "learning_rate": 0.0005654396336942593, "loss": 1.8722, "step": 13913 }, { "epoch": 0.46, "grad_norm": 0.43376240134239197, "learning_rate": 0.0005654347631258956, "loss": 1.8222, "step": 13914 }, { "epoch": 0.46, "grad_norm": 0.4136113226413727, "learning_rate": 0.000565429892235333, "loss": 1.7996, "step": 13915 }, { "epoch": 0.46, "grad_norm": 0.4645480513572693, "learning_rate": 0.0005654250210225777, "loss": 1.8779, "step": 13916 }, { "epoch": 0.46, "grad_norm": 0.47972631454467773, "learning_rate": 0.0005654201494876357, "loss": 1.8572, "step": 13917 }, { "epoch": 0.46, "grad_norm": 0.4490312933921814, "learning_rate": 0.0005654152776305125, "loss": 1.8905, "step": 13918 }, { "epoch": 0.46, "grad_norm": 0.43708446621894836, "learning_rate": 0.0005654104054512144, "loss": 1.8948, "step": 13919 }, { "epoch": 0.46, "grad_norm": 0.4699840545654297, "learning_rate": 0.0005654055329497472, "loss": 1.8186, "step": 13920 }, { "epoch": 0.46, "grad_norm": 0.46402642130851746, "learning_rate": 0.0005654006601261167, "loss": 1.8448, "step": 13921 }, { "epoch": 0.46, "grad_norm": 0.4374054968357086, "learning_rate": 0.0005653957869803289, "loss": 1.8349, "step": 13922 }, { "epoch": 0.46, "grad_norm": 0.4484032094478607, "learning_rate": 0.0005653909135123898, "loss": 1.9678, "step": 13923 }, { "epoch": 0.46, "grad_norm": 0.47286656498908997, "learning_rate": 0.0005653860397223052, "loss": 1.8204, "step": 13924 }, { "epoch": 0.46, "grad_norm": 0.48230960965156555, "learning_rate": 0.000565381165610081, "loss": 1.8464, "step": 13925 }, { "epoch": 0.46, "grad_norm": 0.44370362162590027, "learning_rate": 0.0005653762911757231, "loss": 1.9469, "step": 13926 }, { "epoch": 0.46, "grad_norm": 0.43744564056396484, "learning_rate": 0.0005653714164192377, "loss": 1.8853, "step": 13927 }, { "epoch": 0.46, "grad_norm": 0.42947161197662354, "learning_rate": 0.0005653665413406304, "loss": 1.8607, "step": 13928 }, { "epoch": 0.46, "grad_norm": 0.45102953910827637, "learning_rate": 0.0005653616659399072, "loss": 1.8698, "step": 13929 }, { "epoch": 0.46, "grad_norm": 0.4352876842021942, "learning_rate": 0.000565356790217074, "loss": 1.8484, "step": 13930 }, { "epoch": 0.46, "grad_norm": 0.43140241503715515, "learning_rate": 0.0005653519141721367, "loss": 1.9563, "step": 13931 }, { "epoch": 0.46, "grad_norm": 0.45809832215309143, "learning_rate": 0.0005653470378051015, "loss": 1.9019, "step": 13932 }, { "epoch": 0.46, "grad_norm": 0.43830615282058716, "learning_rate": 0.0005653421611159739, "loss": 1.8847, "step": 13933 }, { "epoch": 0.46, "grad_norm": 0.43322864174842834, "learning_rate": 0.0005653372841047602, "loss": 1.8236, "step": 13934 }, { "epoch": 0.46, "grad_norm": 0.43212059140205383, "learning_rate": 0.000565332406771466, "loss": 1.8568, "step": 13935 }, { "epoch": 0.46, "grad_norm": 0.4269099533557892, "learning_rate": 0.0005653275291160973, "loss": 1.8395, "step": 13936 }, { "epoch": 0.46, "grad_norm": 0.4395138919353485, "learning_rate": 0.0005653226511386601, "loss": 1.9527, "step": 13937 }, { "epoch": 0.46, "grad_norm": 0.4364766776561737, "learning_rate": 0.0005653177728391605, "loss": 1.8973, "step": 13938 }, { "epoch": 0.46, "grad_norm": 0.4215669631958008, "learning_rate": 0.0005653128942176039, "loss": 1.8868, "step": 13939 }, { "epoch": 0.46, "grad_norm": 0.4273257851600647, "learning_rate": 0.0005653080152739967, "loss": 1.8948, "step": 13940 }, { "epoch": 0.46, "grad_norm": 0.420492947101593, "learning_rate": 0.0005653031360083446, "loss": 1.834, "step": 13941 }, { "epoch": 0.46, "grad_norm": 0.4301360845565796, "learning_rate": 0.0005652982564206537, "loss": 1.8766, "step": 13942 }, { "epoch": 0.46, "grad_norm": 0.43755000829696655, "learning_rate": 0.0005652933765109298, "loss": 1.8327, "step": 13943 }, { "epoch": 0.46, "grad_norm": 0.4345241189002991, "learning_rate": 0.0005652884962791786, "loss": 1.8917, "step": 13944 }, { "epoch": 0.46, "grad_norm": 0.43461504578590393, "learning_rate": 0.0005652836157254065, "loss": 1.8076, "step": 13945 }, { "epoch": 0.46, "grad_norm": 0.43491190671920776, "learning_rate": 0.0005652787348496191, "loss": 1.7857, "step": 13946 }, { "epoch": 0.46, "grad_norm": 0.42819154262542725, "learning_rate": 0.0005652738536518224, "loss": 1.9, "step": 13947 }, { "epoch": 0.46, "grad_norm": 0.43122968077659607, "learning_rate": 0.0005652689721320223, "loss": 1.8555, "step": 13948 }, { "epoch": 0.46, "grad_norm": 0.44073423743247986, "learning_rate": 0.0005652640902902248, "loss": 1.768, "step": 13949 }, { "epoch": 0.46, "grad_norm": 0.44539350271224976, "learning_rate": 0.0005652592081264358, "loss": 1.8727, "step": 13950 }, { "epoch": 0.46, "grad_norm": 0.42515355348587036, "learning_rate": 0.0005652543256406612, "loss": 1.9195, "step": 13951 }, { "epoch": 0.46, "grad_norm": 0.4357066750526428, "learning_rate": 0.0005652494428329068, "loss": 1.8496, "step": 13952 }, { "epoch": 0.46, "grad_norm": 0.43158140778541565, "learning_rate": 0.0005652445597031788, "loss": 1.8165, "step": 13953 }, { "epoch": 0.46, "grad_norm": 0.4310433864593506, "learning_rate": 0.000565239676251483, "loss": 1.8476, "step": 13954 }, { "epoch": 0.46, "grad_norm": 0.4236493408679962, "learning_rate": 0.0005652347924778252, "loss": 1.7348, "step": 13955 }, { "epoch": 0.46, "grad_norm": 0.590605616569519, "learning_rate": 0.0005652299083822116, "loss": 1.9493, "step": 13956 }, { "epoch": 0.46, "grad_norm": 0.4341003894805908, "learning_rate": 0.0005652250239646478, "loss": 1.8417, "step": 13957 }, { "epoch": 0.46, "grad_norm": 0.429913192987442, "learning_rate": 0.00056522013922514, "loss": 1.9223, "step": 13958 }, { "epoch": 0.46, "grad_norm": 0.45490896701812744, "learning_rate": 0.000565215254163694, "loss": 1.7856, "step": 13959 }, { "epoch": 0.46, "grad_norm": 0.42790257930755615, "learning_rate": 0.0005652103687803158, "loss": 1.9185, "step": 13960 }, { "epoch": 0.46, "grad_norm": 0.43044376373291016, "learning_rate": 0.0005652054830750112, "loss": 1.8049, "step": 13961 }, { "epoch": 0.46, "grad_norm": 0.4607248604297638, "learning_rate": 0.0005652005970477863, "loss": 1.9656, "step": 13962 }, { "epoch": 0.46, "grad_norm": 0.42695823311805725, "learning_rate": 0.0005651957106986469, "loss": 1.8357, "step": 13963 }, { "epoch": 0.46, "grad_norm": 0.4721237123012543, "learning_rate": 0.000565190824027599, "loss": 1.8912, "step": 13964 }, { "epoch": 0.46, "grad_norm": 0.45849114656448364, "learning_rate": 0.0005651859370346485, "loss": 1.9443, "step": 13965 }, { "epoch": 0.46, "grad_norm": 0.4467393159866333, "learning_rate": 0.0005651810497198014, "loss": 1.7885, "step": 13966 }, { "epoch": 0.46, "grad_norm": 0.6833686232566833, "learning_rate": 0.0005651761620830636, "loss": 1.895, "step": 13967 }, { "epoch": 0.46, "grad_norm": 0.443668931722641, "learning_rate": 0.0005651712741244409, "loss": 1.8786, "step": 13968 }, { "epoch": 0.46, "grad_norm": 0.43293821811676025, "learning_rate": 0.0005651663858439395, "loss": 1.9343, "step": 13969 }, { "epoch": 0.46, "grad_norm": 0.43072062730789185, "learning_rate": 0.000565161497241565, "loss": 1.8243, "step": 13970 }, { "epoch": 0.46, "grad_norm": 0.4213339388370514, "learning_rate": 0.0005651566083173235, "loss": 1.925, "step": 13971 }, { "epoch": 0.46, "grad_norm": 0.4338012635707855, "learning_rate": 0.0005651517190712211, "loss": 1.8974, "step": 13972 }, { "epoch": 0.46, "grad_norm": 0.41984811425209045, "learning_rate": 0.0005651468295032635, "loss": 1.845, "step": 13973 }, { "epoch": 0.46, "grad_norm": 0.43535345792770386, "learning_rate": 0.0005651419396134567, "loss": 1.8772, "step": 13974 }, { "epoch": 0.46, "grad_norm": 0.4224584698677063, "learning_rate": 0.0005651370494018067, "loss": 1.8754, "step": 13975 }, { "epoch": 0.46, "grad_norm": 0.41492143273353577, "learning_rate": 0.0005651321588683194, "loss": 1.9152, "step": 13976 }, { "epoch": 0.47, "grad_norm": 0.42794081568717957, "learning_rate": 0.0005651272680130007, "loss": 1.94, "step": 13977 }, { "epoch": 0.47, "grad_norm": 0.42294323444366455, "learning_rate": 0.0005651223768358566, "loss": 1.8522, "step": 13978 }, { "epoch": 0.47, "grad_norm": 0.424424409866333, "learning_rate": 0.0005651174853368929, "loss": 1.9035, "step": 13979 }, { "epoch": 0.47, "grad_norm": 0.4531412422657013, "learning_rate": 0.0005651125935161157, "loss": 1.7589, "step": 13980 }, { "epoch": 0.47, "grad_norm": 0.42745769023895264, "learning_rate": 0.0005651077013735308, "loss": 1.8945, "step": 13981 }, { "epoch": 0.47, "grad_norm": 0.4281792938709259, "learning_rate": 0.0005651028089091444, "loss": 1.8496, "step": 13982 }, { "epoch": 0.47, "grad_norm": 0.4392716884613037, "learning_rate": 0.0005650979161229621, "loss": 1.8935, "step": 13983 }, { "epoch": 0.47, "grad_norm": 0.4610500931739807, "learning_rate": 0.00056509302301499, "loss": 1.8552, "step": 13984 }, { "epoch": 0.47, "grad_norm": 0.4370451867580414, "learning_rate": 0.0005650881295852341, "loss": 1.896, "step": 13985 }, { "epoch": 0.47, "grad_norm": 0.4765273332595825, "learning_rate": 0.0005650832358337001, "loss": 1.8549, "step": 13986 }, { "epoch": 0.47, "grad_norm": 0.42587774991989136, "learning_rate": 0.0005650783417603943, "loss": 1.8643, "step": 13987 }, { "epoch": 0.47, "grad_norm": 0.43742337822914124, "learning_rate": 0.0005650734473653224, "loss": 1.8223, "step": 13988 }, { "epoch": 0.47, "grad_norm": 0.4245370328426361, "learning_rate": 0.0005650685526484904, "loss": 1.8077, "step": 13989 }, { "epoch": 0.47, "grad_norm": 0.45000165700912476, "learning_rate": 0.0005650636576099041, "loss": 1.8947, "step": 13990 }, { "epoch": 0.47, "grad_norm": 0.430625319480896, "learning_rate": 0.0005650587622495697, "loss": 1.8422, "step": 13991 }, { "epoch": 0.47, "grad_norm": 0.4235013723373413, "learning_rate": 0.0005650538665674929, "loss": 1.866, "step": 13992 }, { "epoch": 0.47, "grad_norm": 0.4245348870754242, "learning_rate": 0.00056504897056368, "loss": 1.8445, "step": 13993 }, { "epoch": 0.47, "grad_norm": 0.4513336718082428, "learning_rate": 0.0005650440742381366, "loss": 1.8447, "step": 13994 }, { "epoch": 0.47, "grad_norm": 0.45331284403800964, "learning_rate": 0.0005650391775908687, "loss": 1.934, "step": 13995 }, { "epoch": 0.47, "grad_norm": 0.45818030834198, "learning_rate": 0.0005650342806218822, "loss": 1.8631, "step": 13996 }, { "epoch": 0.47, "grad_norm": 0.41203412413597107, "learning_rate": 0.0005650293833311833, "loss": 1.8246, "step": 13997 }, { "epoch": 0.47, "grad_norm": 0.4607313275337219, "learning_rate": 0.0005650244857187777, "loss": 1.9119, "step": 13998 }, { "epoch": 0.47, "grad_norm": 0.4379007816314697, "learning_rate": 0.0005650195877846714, "loss": 1.8518, "step": 13999 }, { "epoch": 0.47, "grad_norm": 0.43688902258872986, "learning_rate": 0.0005650146895288704, "loss": 1.8986, "step": 14000 }, { "epoch": 0.47, "grad_norm": 0.44508227705955505, "learning_rate": 0.0005650097909513807, "loss": 1.8637, "step": 14001 }, { "epoch": 0.47, "grad_norm": 0.4779049754142761, "learning_rate": 0.0005650048920522081, "loss": 1.8711, "step": 14002 }, { "epoch": 0.47, "grad_norm": 0.4321175217628479, "learning_rate": 0.0005649999928313585, "loss": 1.9363, "step": 14003 }, { "epoch": 0.47, "grad_norm": 0.42652153968811035, "learning_rate": 0.000564995093288838, "loss": 1.8838, "step": 14004 }, { "epoch": 0.47, "grad_norm": 0.43119898438453674, "learning_rate": 0.0005649901934246526, "loss": 1.9157, "step": 14005 }, { "epoch": 0.47, "grad_norm": 0.4569961726665497, "learning_rate": 0.0005649852932388081, "loss": 1.8503, "step": 14006 }, { "epoch": 0.47, "grad_norm": 0.4191924035549164, "learning_rate": 0.0005649803927313104, "loss": 1.8274, "step": 14007 }, { "epoch": 0.47, "grad_norm": 0.428383469581604, "learning_rate": 0.0005649754919021657, "loss": 1.9314, "step": 14008 }, { "epoch": 0.47, "grad_norm": 0.4493815302848816, "learning_rate": 0.0005649705907513797, "loss": 1.8624, "step": 14009 }, { "epoch": 0.47, "grad_norm": 0.430061399936676, "learning_rate": 0.0005649656892789585, "loss": 1.7519, "step": 14010 }, { "epoch": 0.47, "grad_norm": 0.43586844205856323, "learning_rate": 0.000564960787484908, "loss": 1.7893, "step": 14011 }, { "epoch": 0.47, "grad_norm": 0.42015495896339417, "learning_rate": 0.0005649558853692341, "loss": 1.8893, "step": 14012 }, { "epoch": 0.47, "grad_norm": 0.44038957357406616, "learning_rate": 0.0005649509829319428, "loss": 1.8516, "step": 14013 }, { "epoch": 0.47, "grad_norm": 0.4205555021762848, "learning_rate": 0.0005649460801730401, "loss": 1.8945, "step": 14014 }, { "epoch": 0.47, "grad_norm": 0.43826207518577576, "learning_rate": 0.0005649411770925318, "loss": 1.9229, "step": 14015 }, { "epoch": 0.47, "grad_norm": 0.43750762939453125, "learning_rate": 0.000564936273690424, "loss": 1.8774, "step": 14016 }, { "epoch": 0.47, "grad_norm": 0.4396626949310303, "learning_rate": 0.0005649313699667226, "loss": 1.9193, "step": 14017 }, { "epoch": 0.47, "grad_norm": 0.42710649967193604, "learning_rate": 0.0005649264659214336, "loss": 1.9435, "step": 14018 }, { "epoch": 0.47, "grad_norm": 0.41676655411720276, "learning_rate": 0.0005649215615545629, "loss": 1.8801, "step": 14019 }, { "epoch": 0.47, "grad_norm": 0.5534006357192993, "learning_rate": 0.0005649166568661163, "loss": 1.9068, "step": 14020 }, { "epoch": 0.47, "grad_norm": 0.4424169957637787, "learning_rate": 0.0005649117518561, "loss": 1.8358, "step": 14021 }, { "epoch": 0.47, "grad_norm": 0.4310010075569153, "learning_rate": 0.00056490684652452, "loss": 1.9102, "step": 14022 }, { "epoch": 0.47, "grad_norm": 0.4264271557331085, "learning_rate": 0.0005649019408713819, "loss": 1.8568, "step": 14023 }, { "epoch": 0.47, "grad_norm": 0.41983872652053833, "learning_rate": 0.0005648970348966921, "loss": 1.8115, "step": 14024 }, { "epoch": 0.47, "grad_norm": 0.42793214321136475, "learning_rate": 0.0005648921286004562, "loss": 1.8465, "step": 14025 }, { "epoch": 0.47, "grad_norm": 0.43929243087768555, "learning_rate": 0.0005648872219826803, "loss": 1.9609, "step": 14026 }, { "epoch": 0.47, "grad_norm": 0.45427435636520386, "learning_rate": 0.0005648823150433704, "loss": 1.9497, "step": 14027 }, { "epoch": 0.47, "grad_norm": 0.44859904050827026, "learning_rate": 0.0005648774077825323, "loss": 1.9281, "step": 14028 }, { "epoch": 0.47, "grad_norm": 0.4247884750366211, "learning_rate": 0.0005648725002001721, "loss": 1.8832, "step": 14029 }, { "epoch": 0.47, "grad_norm": 0.7014753818511963, "learning_rate": 0.0005648675922962957, "loss": 2.0052, "step": 14030 }, { "epoch": 0.47, "grad_norm": 0.44163110852241516, "learning_rate": 0.000564862684070909, "loss": 1.8171, "step": 14031 }, { "epoch": 0.47, "grad_norm": 0.432045578956604, "learning_rate": 0.0005648577755240183, "loss": 1.9309, "step": 14032 }, { "epoch": 0.47, "grad_norm": 0.43204164505004883, "learning_rate": 0.000564852866655629, "loss": 1.9137, "step": 14033 }, { "epoch": 0.47, "grad_norm": 0.45376262068748474, "learning_rate": 0.0005648479574657475, "loss": 1.8287, "step": 14034 }, { "epoch": 0.47, "grad_norm": 0.4406147003173828, "learning_rate": 0.0005648430479543795, "loss": 1.8928, "step": 14035 }, { "epoch": 0.47, "grad_norm": 0.4316047132015228, "learning_rate": 0.000564838138121531, "loss": 1.8846, "step": 14036 }, { "epoch": 0.47, "grad_norm": 0.4424806833267212, "learning_rate": 0.0005648332279672082, "loss": 1.8628, "step": 14037 }, { "epoch": 0.47, "grad_norm": 0.442586749792099, "learning_rate": 0.0005648283174914168, "loss": 1.8707, "step": 14038 }, { "epoch": 0.47, "grad_norm": 0.44304683804512024, "learning_rate": 0.0005648234066941628, "loss": 1.8983, "step": 14039 }, { "epoch": 0.47, "grad_norm": 0.4421229660511017, "learning_rate": 0.0005648184955754523, "loss": 1.7416, "step": 14040 }, { "epoch": 0.47, "grad_norm": 0.43955084681510925, "learning_rate": 0.0005648135841352911, "loss": 1.9151, "step": 14041 }, { "epoch": 0.47, "grad_norm": 0.4375380575656891, "learning_rate": 0.0005648086723736852, "loss": 1.9005, "step": 14042 }, { "epoch": 0.47, "grad_norm": 0.41927409172058105, "learning_rate": 0.0005648037602906408, "loss": 1.8818, "step": 14043 }, { "epoch": 0.47, "grad_norm": 0.42254433035850525, "learning_rate": 0.0005647988478861636, "loss": 1.9175, "step": 14044 }, { "epoch": 0.47, "grad_norm": 0.4280683994293213, "learning_rate": 0.0005647939351602593, "loss": 1.8915, "step": 14045 }, { "epoch": 0.47, "grad_norm": 0.42800623178482056, "learning_rate": 0.0005647890221129344, "loss": 1.8679, "step": 14046 }, { "epoch": 0.47, "grad_norm": 0.813151478767395, "learning_rate": 0.0005647841087441947, "loss": 1.7706, "step": 14047 }, { "epoch": 0.47, "grad_norm": 0.4402431547641754, "learning_rate": 0.0005647791950540459, "loss": 1.924, "step": 14048 }, { "epoch": 0.47, "grad_norm": 0.4260830879211426, "learning_rate": 0.0005647742810424944, "loss": 1.9102, "step": 14049 }, { "epoch": 0.47, "grad_norm": 0.441265344619751, "learning_rate": 0.0005647693667095458, "loss": 1.8039, "step": 14050 }, { "epoch": 0.47, "grad_norm": 0.4320569932460785, "learning_rate": 0.0005647644520552062, "loss": 1.8507, "step": 14051 }, { "epoch": 0.47, "grad_norm": 0.4398382008075714, "learning_rate": 0.0005647595370794817, "loss": 1.9753, "step": 14052 }, { "epoch": 0.47, "grad_norm": 0.4421716630458832, "learning_rate": 0.0005647546217823781, "loss": 1.8566, "step": 14053 }, { "epoch": 0.47, "grad_norm": 0.4469760060310364, "learning_rate": 0.0005647497061639012, "loss": 1.8396, "step": 14054 }, { "epoch": 0.47, "grad_norm": 0.4133918881416321, "learning_rate": 0.0005647447902240574, "loss": 1.8097, "step": 14055 }, { "epoch": 0.47, "grad_norm": 0.4311749339103699, "learning_rate": 0.0005647398739628524, "loss": 1.8256, "step": 14056 }, { "epoch": 0.47, "grad_norm": 0.43619653582572937, "learning_rate": 0.000564734957380292, "loss": 1.7818, "step": 14057 }, { "epoch": 0.47, "grad_norm": 0.44047245383262634, "learning_rate": 0.0005647300404763826, "loss": 1.8775, "step": 14058 }, { "epoch": 0.47, "grad_norm": 0.43726399540901184, "learning_rate": 0.0005647251232511298, "loss": 1.8615, "step": 14059 }, { "epoch": 0.47, "grad_norm": 0.42123016715049744, "learning_rate": 0.0005647202057045398, "loss": 1.8988, "step": 14060 }, { "epoch": 0.47, "grad_norm": 0.4317522346973419, "learning_rate": 0.0005647152878366184, "loss": 1.8252, "step": 14061 }, { "epoch": 0.47, "grad_norm": 0.4570750892162323, "learning_rate": 0.0005647103696473716, "loss": 1.8479, "step": 14062 }, { "epoch": 0.47, "grad_norm": 0.43636971712112427, "learning_rate": 0.0005647054511368055, "loss": 1.9097, "step": 14063 }, { "epoch": 0.47, "grad_norm": 0.43062758445739746, "learning_rate": 0.000564700532304926, "loss": 1.9222, "step": 14064 }, { "epoch": 0.47, "grad_norm": 0.4308522939682007, "learning_rate": 0.000564695613151739, "loss": 1.8329, "step": 14065 }, { "epoch": 0.47, "grad_norm": 0.4497012794017792, "learning_rate": 0.0005646906936772506, "loss": 1.8434, "step": 14066 }, { "epoch": 0.47, "grad_norm": 0.45469731092453003, "learning_rate": 0.0005646857738814666, "loss": 1.8663, "step": 14067 }, { "epoch": 0.47, "grad_norm": 0.429463654756546, "learning_rate": 0.0005646808537643931, "loss": 1.8271, "step": 14068 }, { "epoch": 0.47, "grad_norm": 0.43773889541625977, "learning_rate": 0.0005646759333260361, "loss": 1.8254, "step": 14069 }, { "epoch": 0.47, "grad_norm": 0.41423991322517395, "learning_rate": 0.0005646710125664015, "loss": 1.8124, "step": 14070 }, { "epoch": 0.47, "grad_norm": 0.43778201937675476, "learning_rate": 0.0005646660914854952, "loss": 1.85, "step": 14071 }, { "epoch": 0.47, "grad_norm": 0.4572766423225403, "learning_rate": 0.0005646611700833233, "loss": 1.8571, "step": 14072 }, { "epoch": 0.47, "grad_norm": 0.4483269155025482, "learning_rate": 0.0005646562483598918, "loss": 1.8896, "step": 14073 }, { "epoch": 0.47, "grad_norm": 0.4138452708721161, "learning_rate": 0.0005646513263152066, "loss": 1.7562, "step": 14074 }, { "epoch": 0.47, "grad_norm": 0.4593677818775177, "learning_rate": 0.0005646464039492737, "loss": 1.974, "step": 14075 }, { "epoch": 0.47, "grad_norm": 0.4349339008331299, "learning_rate": 0.0005646414812620989, "loss": 1.8753, "step": 14076 }, { "epoch": 0.47, "grad_norm": 0.4269915223121643, "learning_rate": 0.0005646365582536885, "loss": 1.7902, "step": 14077 }, { "epoch": 0.47, "grad_norm": 0.424313485622406, "learning_rate": 0.0005646316349240482, "loss": 1.8026, "step": 14078 }, { "epoch": 0.47, "grad_norm": 0.45431217551231384, "learning_rate": 0.0005646267112731842, "loss": 1.7884, "step": 14079 }, { "epoch": 0.47, "grad_norm": 0.4302081763744354, "learning_rate": 0.0005646217873011023, "loss": 1.8993, "step": 14080 }, { "epoch": 0.47, "grad_norm": 0.43212786316871643, "learning_rate": 0.0005646168630078085, "loss": 1.8818, "step": 14081 }, { "epoch": 0.47, "grad_norm": 0.42453286051750183, "learning_rate": 0.0005646119383933089, "loss": 1.8345, "step": 14082 }, { "epoch": 0.47, "grad_norm": 0.43338730931282043, "learning_rate": 0.0005646070134576094, "loss": 1.8509, "step": 14083 }, { "epoch": 0.47, "grad_norm": 0.427827924489975, "learning_rate": 0.0005646020882007159, "loss": 1.8717, "step": 14084 }, { "epoch": 0.47, "grad_norm": 0.4437231123447418, "learning_rate": 0.0005645971626226346, "loss": 1.8303, "step": 14085 }, { "epoch": 0.47, "grad_norm": 0.4499370753765106, "learning_rate": 0.0005645922367233711, "loss": 1.9015, "step": 14086 }, { "epoch": 0.47, "grad_norm": 0.4520072638988495, "learning_rate": 0.0005645873105029318, "loss": 1.8688, "step": 14087 }, { "epoch": 0.47, "grad_norm": 0.4268980026245117, "learning_rate": 0.0005645823839613224, "loss": 1.8364, "step": 14088 }, { "epoch": 0.47, "grad_norm": 0.4245148301124573, "learning_rate": 0.000564577457098549, "loss": 1.8402, "step": 14089 }, { "epoch": 0.47, "grad_norm": 0.4342215955257416, "learning_rate": 0.0005645725299146176, "loss": 1.8804, "step": 14090 }, { "epoch": 0.47, "grad_norm": 0.4476023316383362, "learning_rate": 0.000564567602409534, "loss": 1.9137, "step": 14091 }, { "epoch": 0.47, "grad_norm": 0.44914644956588745, "learning_rate": 0.0005645626745833044, "loss": 1.8359, "step": 14092 }, { "epoch": 0.47, "grad_norm": 0.4468584954738617, "learning_rate": 0.0005645577464359346, "loss": 1.8491, "step": 14093 }, { "epoch": 0.47, "grad_norm": 0.4258638918399811, "learning_rate": 0.0005645528179674308, "loss": 1.9147, "step": 14094 }, { "epoch": 0.47, "grad_norm": 0.4404867887496948, "learning_rate": 0.0005645478891777989, "loss": 1.8829, "step": 14095 }, { "epoch": 0.47, "grad_norm": 0.44393274188041687, "learning_rate": 0.0005645429600670447, "loss": 1.8765, "step": 14096 }, { "epoch": 0.47, "grad_norm": 0.4361867904663086, "learning_rate": 0.0005645380306351744, "loss": 1.8847, "step": 14097 }, { "epoch": 0.47, "grad_norm": 0.4521434009075165, "learning_rate": 0.0005645331008821939, "loss": 1.8574, "step": 14098 }, { "epoch": 0.47, "grad_norm": 0.4305596649646759, "learning_rate": 0.0005645281708081092, "loss": 1.8666, "step": 14099 }, { "epoch": 0.47, "grad_norm": 0.44483810663223267, "learning_rate": 0.0005645232404129263, "loss": 1.8955, "step": 14100 }, { "epoch": 0.47, "grad_norm": 0.437082976102829, "learning_rate": 0.000564518309696651, "loss": 1.8574, "step": 14101 }, { "epoch": 0.47, "grad_norm": 0.4489174783229828, "learning_rate": 0.0005645133786592896, "loss": 1.8634, "step": 14102 }, { "epoch": 0.47, "grad_norm": 0.4417820870876312, "learning_rate": 0.0005645084473008479, "loss": 1.8951, "step": 14103 }, { "epoch": 0.47, "grad_norm": 0.44813287258148193, "learning_rate": 0.0005645035156213319, "loss": 1.8214, "step": 14104 }, { "epoch": 0.47, "grad_norm": 0.44035226106643677, "learning_rate": 0.0005644985836207477, "loss": 1.8516, "step": 14105 }, { "epoch": 0.47, "grad_norm": 0.4410640001296997, "learning_rate": 0.000564493651299101, "loss": 1.8785, "step": 14106 }, { "epoch": 0.47, "grad_norm": 0.44878798723220825, "learning_rate": 0.0005644887186563982, "loss": 1.9131, "step": 14107 }, { "epoch": 0.47, "grad_norm": 0.4521503448486328, "learning_rate": 0.000564483785692645, "loss": 1.8742, "step": 14108 }, { "epoch": 0.47, "grad_norm": 0.4829634726047516, "learning_rate": 0.0005644788524078473, "loss": 1.9427, "step": 14109 }, { "epoch": 0.47, "grad_norm": 0.42778831720352173, "learning_rate": 0.0005644739188020114, "loss": 1.8844, "step": 14110 }, { "epoch": 0.47, "grad_norm": 0.45997580885887146, "learning_rate": 0.0005644689848751431, "loss": 1.9052, "step": 14111 }, { "epoch": 0.47, "grad_norm": 0.4734705090522766, "learning_rate": 0.0005644640506272485, "loss": 1.8648, "step": 14112 }, { "epoch": 0.47, "grad_norm": 0.46567192673683167, "learning_rate": 0.0005644591160583333, "loss": 1.8317, "step": 14113 }, { "epoch": 0.47, "grad_norm": 0.43812933564186096, "learning_rate": 0.0005644541811684039, "loss": 1.9246, "step": 14114 }, { "epoch": 0.47, "grad_norm": 0.5962218642234802, "learning_rate": 0.0005644492459574659, "loss": 1.9011, "step": 14115 }, { "epoch": 0.47, "grad_norm": 0.4531645178794861, "learning_rate": 0.0005644443104255257, "loss": 1.7924, "step": 14116 }, { "epoch": 0.47, "grad_norm": 0.4442720413208008, "learning_rate": 0.0005644393745725889, "loss": 1.9188, "step": 14117 }, { "epoch": 0.47, "grad_norm": 0.4481465816497803, "learning_rate": 0.0005644344383986617, "loss": 1.8941, "step": 14118 }, { "epoch": 0.47, "grad_norm": 0.4806085526943207, "learning_rate": 0.0005644295019037501, "loss": 1.8594, "step": 14119 }, { "epoch": 0.47, "grad_norm": 0.4608224332332611, "learning_rate": 0.00056442456508786, "loss": 1.9337, "step": 14120 }, { "epoch": 0.47, "grad_norm": 0.48180514574050903, "learning_rate": 0.0005644196279509974, "loss": 1.9306, "step": 14121 }, { "epoch": 0.47, "grad_norm": 0.4339669644832611, "learning_rate": 0.0005644146904931685, "loss": 1.8453, "step": 14122 }, { "epoch": 0.47, "grad_norm": 0.48724502325057983, "learning_rate": 0.000564409752714379, "loss": 1.8558, "step": 14123 }, { "epoch": 0.47, "grad_norm": 0.4693931043148041, "learning_rate": 0.0005644048146146351, "loss": 1.9637, "step": 14124 }, { "epoch": 0.47, "grad_norm": 0.43010690808296204, "learning_rate": 0.0005643998761939426, "loss": 1.865, "step": 14125 }, { "epoch": 0.47, "grad_norm": 0.4610188901424408, "learning_rate": 0.0005643949374523077, "loss": 1.8363, "step": 14126 }, { "epoch": 0.47, "grad_norm": 0.46803849935531616, "learning_rate": 0.0005643899983897362, "loss": 1.8338, "step": 14127 }, { "epoch": 0.47, "grad_norm": 0.43654340505599976, "learning_rate": 0.0005643850590062343, "loss": 1.9218, "step": 14128 }, { "epoch": 0.47, "grad_norm": 0.42533642053604126, "learning_rate": 0.0005643801193018079, "loss": 1.8904, "step": 14129 }, { "epoch": 0.47, "grad_norm": 0.4484567940235138, "learning_rate": 0.0005643751792764628, "loss": 1.8185, "step": 14130 }, { "epoch": 0.47, "grad_norm": 0.4351798892021179, "learning_rate": 0.0005643702389302054, "loss": 1.8512, "step": 14131 }, { "epoch": 0.47, "grad_norm": 0.4369961619377136, "learning_rate": 0.0005643652982630415, "loss": 1.8789, "step": 14132 }, { "epoch": 0.47, "grad_norm": 0.4411516785621643, "learning_rate": 0.0005643603572749769, "loss": 1.8662, "step": 14133 }, { "epoch": 0.47, "grad_norm": 0.42606645822525024, "learning_rate": 0.0005643554159660179, "loss": 1.8307, "step": 14134 }, { "epoch": 0.47, "grad_norm": 0.4474496841430664, "learning_rate": 0.0005643504743361703, "loss": 1.9099, "step": 14135 }, { "epoch": 0.47, "grad_norm": 0.4290841221809387, "learning_rate": 0.0005643455323854403, "loss": 1.7969, "step": 14136 }, { "epoch": 0.47, "grad_norm": 0.42820683121681213, "learning_rate": 0.0005643405901138336, "loss": 1.8017, "step": 14137 }, { "epoch": 0.47, "grad_norm": 0.43359804153442383, "learning_rate": 0.0005643356475213565, "loss": 1.8452, "step": 14138 }, { "epoch": 0.47, "grad_norm": 0.6761061549186707, "learning_rate": 0.0005643307046080149, "loss": 1.8437, "step": 14139 }, { "epoch": 0.47, "grad_norm": 0.4404872953891754, "learning_rate": 0.0005643257613738148, "loss": 1.8645, "step": 14140 }, { "epoch": 0.47, "grad_norm": 0.44830551743507385, "learning_rate": 0.0005643208178187619, "loss": 1.9226, "step": 14141 }, { "epoch": 0.47, "grad_norm": 0.43816903233528137, "learning_rate": 0.0005643158739428628, "loss": 1.8104, "step": 14142 }, { "epoch": 0.47, "grad_norm": 0.42897820472717285, "learning_rate": 0.000564310929746123, "loss": 1.8039, "step": 14143 }, { "epoch": 0.47, "grad_norm": 0.43913453817367554, "learning_rate": 0.0005643059852285487, "loss": 1.8792, "step": 14144 }, { "epoch": 0.47, "grad_norm": 0.44977623224258423, "learning_rate": 0.0005643010403901459, "loss": 1.9088, "step": 14145 }, { "epoch": 0.47, "grad_norm": 0.44801783561706543, "learning_rate": 0.0005642960952309205, "loss": 1.888, "step": 14146 }, { "epoch": 0.47, "grad_norm": 0.42653876543045044, "learning_rate": 0.0005642911497508787, "loss": 1.9254, "step": 14147 }, { "epoch": 0.47, "grad_norm": 0.4277069866657257, "learning_rate": 0.0005642862039500262, "loss": 1.8434, "step": 14148 }, { "epoch": 0.47, "grad_norm": 0.44986581802368164, "learning_rate": 0.0005642812578283695, "loss": 1.836, "step": 14149 }, { "epoch": 0.47, "grad_norm": 0.6162676215171814, "learning_rate": 0.000564276311385914, "loss": 1.9338, "step": 14150 }, { "epoch": 0.47, "grad_norm": 0.42928215861320496, "learning_rate": 0.0005642713646226661, "loss": 1.8898, "step": 14151 }, { "epoch": 0.47, "grad_norm": 0.4388423264026642, "learning_rate": 0.0005642664175386317, "loss": 1.8355, "step": 14152 }, { "epoch": 0.47, "grad_norm": 0.4259265959262848, "learning_rate": 0.0005642614701338168, "loss": 1.7981, "step": 14153 }, { "epoch": 0.47, "grad_norm": 0.42400774359703064, "learning_rate": 0.0005642565224082274, "loss": 1.8472, "step": 14154 }, { "epoch": 0.47, "grad_norm": 0.4374535083770752, "learning_rate": 0.0005642515743618696, "loss": 1.9014, "step": 14155 }, { "epoch": 0.47, "grad_norm": 0.4274244010448456, "learning_rate": 0.0005642466259947493, "loss": 1.9141, "step": 14156 }, { "epoch": 0.47, "grad_norm": 0.42459115386009216, "learning_rate": 0.0005642416773068725, "loss": 1.8831, "step": 14157 }, { "epoch": 0.47, "grad_norm": 0.4386100769042969, "learning_rate": 0.0005642367282982454, "loss": 1.8679, "step": 14158 }, { "epoch": 0.47, "grad_norm": 0.4330752491950989, "learning_rate": 0.0005642317789688736, "loss": 1.9156, "step": 14159 }, { "epoch": 0.47, "grad_norm": 0.4492386281490326, "learning_rate": 0.0005642268293187634, "loss": 1.9301, "step": 14160 }, { "epoch": 0.47, "grad_norm": 0.42990872263908386, "learning_rate": 0.0005642218793479208, "loss": 1.8867, "step": 14161 }, { "epoch": 0.47, "grad_norm": 0.4288022816181183, "learning_rate": 0.0005642169290563519, "loss": 1.8779, "step": 14162 }, { "epoch": 0.47, "grad_norm": 0.43180209398269653, "learning_rate": 0.0005642119784440623, "loss": 1.8428, "step": 14163 }, { "epoch": 0.47, "grad_norm": 0.43038642406463623, "learning_rate": 0.0005642070275110585, "loss": 1.9165, "step": 14164 }, { "epoch": 0.47, "grad_norm": 0.42112505435943604, "learning_rate": 0.0005642020762573462, "loss": 1.8074, "step": 14165 }, { "epoch": 0.47, "grad_norm": 0.42138543725013733, "learning_rate": 0.0005641971246829316, "loss": 1.8125, "step": 14166 }, { "epoch": 0.47, "grad_norm": 0.4324081242084503, "learning_rate": 0.0005641921727878205, "loss": 1.8387, "step": 14167 }, { "epoch": 0.47, "grad_norm": 0.4400984048843384, "learning_rate": 0.0005641872205720192, "loss": 1.9244, "step": 14168 }, { "epoch": 0.47, "grad_norm": 0.4289780259132385, "learning_rate": 0.0005641822680355334, "loss": 1.8844, "step": 14169 }, { "epoch": 0.47, "grad_norm": 0.42146483063697815, "learning_rate": 0.0005641773151783693, "loss": 1.8583, "step": 14170 }, { "epoch": 0.47, "grad_norm": 0.43936794996261597, "learning_rate": 0.0005641723620005328, "loss": 1.8568, "step": 14171 }, { "epoch": 0.47, "grad_norm": 0.4188408851623535, "learning_rate": 0.00056416740850203, "loss": 1.8993, "step": 14172 }, { "epoch": 0.47, "grad_norm": 0.4185315668582916, "learning_rate": 0.000564162454682867, "loss": 1.8294, "step": 14173 }, { "epoch": 0.47, "grad_norm": 0.43019387125968933, "learning_rate": 0.0005641575005430495, "loss": 1.8899, "step": 14174 }, { "epoch": 0.47, "grad_norm": 0.4266960024833679, "learning_rate": 0.0005641525460825839, "loss": 1.9279, "step": 14175 }, { "epoch": 0.47, "grad_norm": 0.43933260440826416, "learning_rate": 0.000564147591301476, "loss": 1.8304, "step": 14176 }, { "epoch": 0.47, "grad_norm": 0.4319688081741333, "learning_rate": 0.0005641426361997318, "loss": 1.8478, "step": 14177 }, { "epoch": 0.47, "grad_norm": 0.42325443029403687, "learning_rate": 0.0005641376807773573, "loss": 1.8592, "step": 14178 }, { "epoch": 0.47, "grad_norm": 0.42927420139312744, "learning_rate": 0.0005641327250343587, "loss": 1.8814, "step": 14179 }, { "epoch": 0.47, "grad_norm": 0.4386705458164215, "learning_rate": 0.0005641277689707418, "loss": 1.8645, "step": 14180 }, { "epoch": 0.47, "grad_norm": 0.42821118235588074, "learning_rate": 0.0005641228125865127, "loss": 1.8839, "step": 14181 }, { "epoch": 0.47, "grad_norm": 0.4328339695930481, "learning_rate": 0.0005641178558816775, "loss": 1.7975, "step": 14182 }, { "epoch": 0.47, "grad_norm": 0.45394811034202576, "learning_rate": 0.0005641128988562421, "loss": 1.914, "step": 14183 }, { "epoch": 0.47, "grad_norm": 0.4458482563495636, "learning_rate": 0.0005641079415102126, "loss": 1.9424, "step": 14184 }, { "epoch": 0.47, "grad_norm": 0.4262450039386749, "learning_rate": 0.0005641029838435949, "loss": 1.8966, "step": 14185 }, { "epoch": 0.47, "grad_norm": 0.4267314076423645, "learning_rate": 0.0005640980258563951, "loss": 1.8384, "step": 14186 }, { "epoch": 0.47, "grad_norm": 0.41735053062438965, "learning_rate": 0.0005640930675486193, "loss": 1.8285, "step": 14187 }, { "epoch": 0.47, "grad_norm": 0.42856791615486145, "learning_rate": 0.0005640881089202733, "loss": 1.9039, "step": 14188 }, { "epoch": 0.47, "grad_norm": 0.43858063220977783, "learning_rate": 0.0005640831499713633, "loss": 1.8562, "step": 14189 }, { "epoch": 0.47, "grad_norm": 0.4152173399925232, "learning_rate": 0.0005640781907018952, "loss": 1.7757, "step": 14190 }, { "epoch": 0.47, "grad_norm": 0.44162896275520325, "learning_rate": 0.0005640732311118752, "loss": 1.8524, "step": 14191 }, { "epoch": 0.47, "grad_norm": 0.44284388422966003, "learning_rate": 0.0005640682712013093, "loss": 1.941, "step": 14192 }, { "epoch": 0.47, "grad_norm": 0.42437639832496643, "learning_rate": 0.0005640633109702032, "loss": 1.822, "step": 14193 }, { "epoch": 0.47, "grad_norm": 0.44384855031967163, "learning_rate": 0.0005640583504185633, "loss": 1.8644, "step": 14194 }, { "epoch": 0.47, "grad_norm": 0.44325676560401917, "learning_rate": 0.0005640533895463954, "loss": 1.7728, "step": 14195 }, { "epoch": 0.47, "grad_norm": 0.4276529848575592, "learning_rate": 0.0005640484283537055, "loss": 1.873, "step": 14196 }, { "epoch": 0.47, "grad_norm": 0.4428848922252655, "learning_rate": 0.0005640434668404999, "loss": 1.752, "step": 14197 }, { "epoch": 0.47, "grad_norm": 0.4467751085758209, "learning_rate": 0.0005640385050067843, "loss": 1.7761, "step": 14198 }, { "epoch": 0.47, "grad_norm": 0.42750445008277893, "learning_rate": 0.0005640335428525649, "loss": 1.8579, "step": 14199 }, { "epoch": 0.47, "grad_norm": 0.43788424134254456, "learning_rate": 0.0005640285803778477, "loss": 1.8619, "step": 14200 }, { "epoch": 0.47, "grad_norm": 0.4252724349498749, "learning_rate": 0.0005640236175826387, "loss": 1.8862, "step": 14201 }, { "epoch": 0.47, "grad_norm": 0.4695790708065033, "learning_rate": 0.000564018654466944, "loss": 1.9196, "step": 14202 }, { "epoch": 0.47, "grad_norm": 0.4372269809246063, "learning_rate": 0.0005640136910307694, "loss": 1.865, "step": 14203 }, { "epoch": 0.47, "grad_norm": 0.5314594507217407, "learning_rate": 0.0005640087272741212, "loss": 1.8782, "step": 14204 }, { "epoch": 0.47, "grad_norm": 0.41880473494529724, "learning_rate": 0.0005640037631970053, "loss": 1.9127, "step": 14205 }, { "epoch": 0.47, "grad_norm": 0.4474290907382965, "learning_rate": 0.0005639987987994277, "loss": 1.8247, "step": 14206 }, { "epoch": 0.47, "grad_norm": 0.4486980140209198, "learning_rate": 0.0005639938340813945, "loss": 1.8568, "step": 14207 }, { "epoch": 0.47, "grad_norm": 0.4345081150531769, "learning_rate": 0.0005639888690429116, "loss": 1.8987, "step": 14208 }, { "epoch": 0.47, "grad_norm": 0.43371444940567017, "learning_rate": 0.0005639839036839852, "loss": 1.8213, "step": 14209 }, { "epoch": 0.47, "grad_norm": 0.44154104590415955, "learning_rate": 0.000563978938004621, "loss": 1.8482, "step": 14210 }, { "epoch": 0.47, "grad_norm": 0.43679964542388916, "learning_rate": 0.0005639739720048255, "loss": 1.8635, "step": 14211 }, { "epoch": 0.47, "grad_norm": 0.4311158061027527, "learning_rate": 0.0005639690056846045, "loss": 1.8775, "step": 14212 }, { "epoch": 0.47, "grad_norm": 0.4331877827644348, "learning_rate": 0.000563964039043964, "loss": 1.784, "step": 14213 }, { "epoch": 0.47, "grad_norm": 0.4353877604007721, "learning_rate": 0.00056395907208291, "loss": 1.8605, "step": 14214 }, { "epoch": 0.47, "grad_norm": 0.4484020173549652, "learning_rate": 0.0005639541048014486, "loss": 1.8999, "step": 14215 }, { "epoch": 0.47, "grad_norm": 0.4290931522846222, "learning_rate": 0.0005639491371995858, "loss": 1.862, "step": 14216 }, { "epoch": 0.47, "grad_norm": 0.44122499227523804, "learning_rate": 0.0005639441692773278, "loss": 1.9494, "step": 14217 }, { "epoch": 0.47, "grad_norm": 0.43126827478408813, "learning_rate": 0.0005639392010346802, "loss": 1.8675, "step": 14218 }, { "epoch": 0.47, "grad_norm": 0.42748793959617615, "learning_rate": 0.0005639342324716494, "loss": 1.8198, "step": 14219 }, { "epoch": 0.47, "grad_norm": 0.4481889009475708, "learning_rate": 0.0005639292635882413, "loss": 1.8205, "step": 14220 }, { "epoch": 0.47, "grad_norm": 0.42051810026168823, "learning_rate": 0.0005639242943844621, "loss": 1.8542, "step": 14221 }, { "epoch": 0.47, "grad_norm": 0.43620970845222473, "learning_rate": 0.0005639193248603175, "loss": 1.9017, "step": 14222 }, { "epoch": 0.47, "grad_norm": 0.4311196208000183, "learning_rate": 0.0005639143550158138, "loss": 1.8243, "step": 14223 }, { "epoch": 0.47, "grad_norm": 0.44337108731269836, "learning_rate": 0.000563909384850957, "loss": 1.8965, "step": 14224 }, { "epoch": 0.47, "grad_norm": 0.4318910837173462, "learning_rate": 0.0005639044143657531, "loss": 1.8257, "step": 14225 }, { "epoch": 0.47, "grad_norm": 0.42810001969337463, "learning_rate": 0.0005638994435602081, "loss": 1.8869, "step": 14226 }, { "epoch": 0.47, "grad_norm": 0.4209406077861786, "learning_rate": 0.0005638944724343281, "loss": 1.8841, "step": 14227 }, { "epoch": 0.47, "grad_norm": 0.42679470777511597, "learning_rate": 0.000563889500988119, "loss": 1.8776, "step": 14228 }, { "epoch": 0.47, "grad_norm": 0.4206438958644867, "learning_rate": 0.000563884529221587, "loss": 1.9453, "step": 14229 }, { "epoch": 0.47, "grad_norm": 0.45245400071144104, "learning_rate": 0.000563879557134738, "loss": 1.9071, "step": 14230 }, { "epoch": 0.47, "grad_norm": 0.44373422861099243, "learning_rate": 0.0005638745847275782, "loss": 1.8765, "step": 14231 }, { "epoch": 0.47, "grad_norm": 0.453302264213562, "learning_rate": 0.0005638696120001134, "loss": 1.9005, "step": 14232 }, { "epoch": 0.47, "grad_norm": 0.4313761591911316, "learning_rate": 0.0005638646389523499, "loss": 1.8264, "step": 14233 }, { "epoch": 0.47, "grad_norm": 0.45522159337997437, "learning_rate": 0.0005638596655842935, "loss": 1.8958, "step": 14234 }, { "epoch": 0.47, "grad_norm": 0.4503890573978424, "learning_rate": 0.0005638546918959504, "loss": 1.8852, "step": 14235 }, { "epoch": 0.47, "grad_norm": 0.4525817632675171, "learning_rate": 0.0005638497178873267, "loss": 1.8465, "step": 14236 }, { "epoch": 0.47, "grad_norm": 0.43695497512817383, "learning_rate": 0.0005638447435584281, "loss": 1.876, "step": 14237 }, { "epoch": 0.47, "grad_norm": 0.43645179271698, "learning_rate": 0.000563839768909261, "loss": 1.8514, "step": 14238 }, { "epoch": 0.47, "grad_norm": 0.438848078250885, "learning_rate": 0.0005638347939398312, "loss": 1.8612, "step": 14239 }, { "epoch": 0.47, "grad_norm": 0.42987653613090515, "learning_rate": 0.0005638298186501449, "loss": 1.8126, "step": 14240 }, { "epoch": 0.47, "grad_norm": 0.43604177236557007, "learning_rate": 0.000563824843040208, "loss": 1.8635, "step": 14241 }, { "epoch": 0.47, "grad_norm": 0.44164159893989563, "learning_rate": 0.0005638198671100268, "loss": 1.8178, "step": 14242 }, { "epoch": 0.47, "grad_norm": 0.4283613860607147, "learning_rate": 0.0005638148908596069, "loss": 1.8908, "step": 14243 }, { "epoch": 0.47, "grad_norm": 0.43290209770202637, "learning_rate": 0.0005638099142889548, "loss": 1.8474, "step": 14244 }, { "epoch": 0.47, "grad_norm": 0.43565088510513306, "learning_rate": 0.0005638049373980763, "loss": 1.8141, "step": 14245 }, { "epoch": 0.47, "grad_norm": 0.45815181732177734, "learning_rate": 0.0005637999601869774, "loss": 1.8537, "step": 14246 }, { "epoch": 0.47, "grad_norm": 0.43581622838974, "learning_rate": 0.0005637949826556643, "loss": 1.8851, "step": 14247 }, { "epoch": 0.47, "grad_norm": 0.4702039361000061, "learning_rate": 0.0005637900048041429, "loss": 1.83, "step": 14248 }, { "epoch": 0.47, "grad_norm": 0.42236441373825073, "learning_rate": 0.0005637850266324193, "loss": 1.893, "step": 14249 }, { "epoch": 0.47, "grad_norm": 0.4431206285953522, "learning_rate": 0.0005637800481404996, "loss": 1.8659, "step": 14250 }, { "epoch": 0.47, "grad_norm": 0.43807703256607056, "learning_rate": 0.0005637750693283896, "loss": 1.8651, "step": 14251 }, { "epoch": 0.47, "grad_norm": 0.5463303327560425, "learning_rate": 0.0005637700901960957, "loss": 1.9011, "step": 14252 }, { "epoch": 0.47, "grad_norm": 0.4363308846950531, "learning_rate": 0.0005637651107436237, "loss": 1.8891, "step": 14253 }, { "epoch": 0.47, "grad_norm": 0.44053155183792114, "learning_rate": 0.0005637601309709799, "loss": 1.8486, "step": 14254 }, { "epoch": 0.47, "grad_norm": 0.4449859857559204, "learning_rate": 0.00056375515087817, "loss": 1.9002, "step": 14255 }, { "epoch": 0.47, "grad_norm": 0.4443415403366089, "learning_rate": 0.0005637501704652002, "loss": 1.879, "step": 14256 }, { "epoch": 0.47, "grad_norm": 0.44248494505882263, "learning_rate": 0.0005637451897320766, "loss": 1.8629, "step": 14257 }, { "epoch": 0.47, "grad_norm": 0.4295862317085266, "learning_rate": 0.0005637402086788052, "loss": 1.8159, "step": 14258 }, { "epoch": 0.47, "grad_norm": 0.4168693721294403, "learning_rate": 0.0005637352273053921, "loss": 1.7931, "step": 14259 }, { "epoch": 0.47, "grad_norm": 0.4343607425689697, "learning_rate": 0.0005637302456118432, "loss": 1.8765, "step": 14260 }, { "epoch": 0.47, "grad_norm": 0.4299023449420929, "learning_rate": 0.0005637252635981646, "loss": 1.8979, "step": 14261 }, { "epoch": 0.47, "grad_norm": 0.43021371960639954, "learning_rate": 0.0005637202812643626, "loss": 1.7265, "step": 14262 }, { "epoch": 0.47, "grad_norm": 0.43691712617874146, "learning_rate": 0.0005637152986104428, "loss": 1.8927, "step": 14263 }, { "epoch": 0.47, "grad_norm": 0.4282971918582916, "learning_rate": 0.0005637103156364116, "loss": 1.9061, "step": 14264 }, { "epoch": 0.47, "grad_norm": 0.42077574133872986, "learning_rate": 0.0005637053323422749, "loss": 1.8246, "step": 14265 }, { "epoch": 0.47, "grad_norm": 0.4430398643016815, "learning_rate": 0.0005637003487280388, "loss": 1.8833, "step": 14266 }, { "epoch": 0.47, "grad_norm": 0.4253855049610138, "learning_rate": 0.0005636953647937093, "loss": 1.8568, "step": 14267 }, { "epoch": 0.47, "grad_norm": 0.4351200461387634, "learning_rate": 0.0005636903805392926, "loss": 1.8879, "step": 14268 }, { "epoch": 0.47, "grad_norm": 0.5213649868965149, "learning_rate": 0.0005636853959647946, "loss": 1.9313, "step": 14269 }, { "epoch": 0.47, "grad_norm": 0.4207318425178528, "learning_rate": 0.0005636804110702213, "loss": 1.7194, "step": 14270 }, { "epoch": 0.47, "grad_norm": 0.43895456194877625, "learning_rate": 0.0005636754258555788, "loss": 1.916, "step": 14271 }, { "epoch": 0.47, "grad_norm": 0.44009682536125183, "learning_rate": 0.0005636704403208732, "loss": 1.8456, "step": 14272 }, { "epoch": 0.47, "grad_norm": 0.4270464777946472, "learning_rate": 0.0005636654544661106, "loss": 1.7977, "step": 14273 }, { "epoch": 0.47, "grad_norm": 0.4300869107246399, "learning_rate": 0.0005636604682912969, "loss": 1.8803, "step": 14274 }, { "epoch": 0.47, "grad_norm": 0.44270387291908264, "learning_rate": 0.0005636554817964384, "loss": 1.874, "step": 14275 }, { "epoch": 0.47, "grad_norm": 0.42799699306488037, "learning_rate": 0.0005636504949815408, "loss": 1.8708, "step": 14276 }, { "epoch": 0.47, "grad_norm": 0.4433816969394684, "learning_rate": 0.0005636455078466105, "loss": 1.8855, "step": 14277 }, { "epoch": 0.48, "grad_norm": 0.42728638648986816, "learning_rate": 0.0005636405203916533, "loss": 1.7749, "step": 14278 }, { "epoch": 0.48, "grad_norm": 0.43073955178260803, "learning_rate": 0.0005636355326166755, "loss": 1.8368, "step": 14279 }, { "epoch": 0.48, "grad_norm": 0.43920016288757324, "learning_rate": 0.0005636305445216828, "loss": 1.834, "step": 14280 }, { "epoch": 0.48, "grad_norm": 0.4423906207084656, "learning_rate": 0.0005636255561066816, "loss": 1.8964, "step": 14281 }, { "epoch": 0.48, "grad_norm": 0.42887988686561584, "learning_rate": 0.0005636205673716778, "loss": 1.8334, "step": 14282 }, { "epoch": 0.48, "grad_norm": 0.42977070808410645, "learning_rate": 0.0005636155783166775, "loss": 1.9105, "step": 14283 }, { "epoch": 0.48, "grad_norm": 0.43751025199890137, "learning_rate": 0.0005636105889416866, "loss": 1.8695, "step": 14284 }, { "epoch": 0.48, "grad_norm": 0.4513929486274719, "learning_rate": 0.0005636055992467114, "loss": 1.8209, "step": 14285 }, { "epoch": 0.48, "grad_norm": 0.43875205516815186, "learning_rate": 0.0005636006092317579, "loss": 1.9096, "step": 14286 }, { "epoch": 0.48, "grad_norm": 0.42845115065574646, "learning_rate": 0.000563595618896832, "loss": 1.8404, "step": 14287 }, { "epoch": 0.48, "grad_norm": 0.42241188883781433, "learning_rate": 0.0005635906282419398, "loss": 1.8649, "step": 14288 }, { "epoch": 0.48, "grad_norm": 0.4460711181163788, "learning_rate": 0.0005635856372670875, "loss": 1.8106, "step": 14289 }, { "epoch": 0.48, "grad_norm": 0.5159142017364502, "learning_rate": 0.0005635806459722811, "loss": 1.889, "step": 14290 }, { "epoch": 0.48, "grad_norm": 0.4397730231285095, "learning_rate": 0.0005635756543575266, "loss": 1.9134, "step": 14291 }, { "epoch": 0.48, "grad_norm": 0.4541187882423401, "learning_rate": 0.0005635706624228301, "loss": 1.8861, "step": 14292 }, { "epoch": 0.48, "grad_norm": 0.4382014572620392, "learning_rate": 0.0005635656701681975, "loss": 1.8639, "step": 14293 }, { "epoch": 0.48, "grad_norm": 0.43091416358947754, "learning_rate": 0.0005635606775936353, "loss": 1.8477, "step": 14294 }, { "epoch": 0.48, "grad_norm": 0.44998297095298767, "learning_rate": 0.000563555684699149, "loss": 1.8892, "step": 14295 }, { "epoch": 0.48, "grad_norm": 0.425388902425766, "learning_rate": 0.0005635506914847451, "loss": 1.9156, "step": 14296 }, { "epoch": 0.48, "grad_norm": 0.43460142612457275, "learning_rate": 0.0005635456979504295, "loss": 1.8251, "step": 14297 }, { "epoch": 0.48, "grad_norm": 0.45362356305122375, "learning_rate": 0.0005635407040962083, "loss": 1.8909, "step": 14298 }, { "epoch": 0.48, "grad_norm": 0.43065446615219116, "learning_rate": 0.0005635357099220874, "loss": 1.8593, "step": 14299 }, { "epoch": 0.48, "grad_norm": 0.4270760715007782, "learning_rate": 0.0005635307154280729, "loss": 1.8749, "step": 14300 }, { "epoch": 0.48, "grad_norm": 0.4312818646430969, "learning_rate": 0.000563525720614171, "loss": 1.9412, "step": 14301 }, { "epoch": 0.48, "grad_norm": 0.43390125036239624, "learning_rate": 0.0005635207254803878, "loss": 1.8295, "step": 14302 }, { "epoch": 0.48, "grad_norm": 0.45200029015541077, "learning_rate": 0.0005635157300267292, "loss": 1.8649, "step": 14303 }, { "epoch": 0.48, "grad_norm": 0.42788073420524597, "learning_rate": 0.0005635107342532013, "loss": 1.8938, "step": 14304 }, { "epoch": 0.48, "grad_norm": 0.4326246678829193, "learning_rate": 0.0005635057381598102, "loss": 1.8886, "step": 14305 }, { "epoch": 0.48, "grad_norm": 0.44363418221473694, "learning_rate": 0.000563500741746562, "loss": 1.8277, "step": 14306 }, { "epoch": 0.48, "grad_norm": 0.43814608454704285, "learning_rate": 0.0005634957450134628, "loss": 1.8911, "step": 14307 }, { "epoch": 0.48, "grad_norm": 0.43226325511932373, "learning_rate": 0.0005634907479605185, "loss": 1.8475, "step": 14308 }, { "epoch": 0.48, "grad_norm": 0.427710622549057, "learning_rate": 0.0005634857505877353, "loss": 1.8697, "step": 14309 }, { "epoch": 0.48, "grad_norm": 0.41934528946876526, "learning_rate": 0.0005634807528951192, "loss": 1.8232, "step": 14310 }, { "epoch": 0.48, "grad_norm": 0.4359901249408722, "learning_rate": 0.0005634757548826761, "loss": 1.8702, "step": 14311 }, { "epoch": 0.48, "grad_norm": 0.4231777787208557, "learning_rate": 0.0005634707565504125, "loss": 1.832, "step": 14312 }, { "epoch": 0.48, "grad_norm": 0.4601691961288452, "learning_rate": 0.0005634657578983342, "loss": 1.8822, "step": 14313 }, { "epoch": 0.48, "grad_norm": 0.42512181401252747, "learning_rate": 0.0005634607589264473, "loss": 1.8672, "step": 14314 }, { "epoch": 0.48, "grad_norm": 0.4066891372203827, "learning_rate": 0.0005634557596347577, "loss": 1.8459, "step": 14315 }, { "epoch": 0.48, "grad_norm": 0.4523407816886902, "learning_rate": 0.0005634507600232718, "loss": 1.8355, "step": 14316 }, { "epoch": 0.48, "grad_norm": 0.42061540484428406, "learning_rate": 0.0005634457600919954, "loss": 1.8958, "step": 14317 }, { "epoch": 0.48, "grad_norm": 0.4274432063102722, "learning_rate": 0.0005634407598409347, "loss": 1.8994, "step": 14318 }, { "epoch": 0.48, "grad_norm": 0.42869311571121216, "learning_rate": 0.0005634357592700956, "loss": 1.7685, "step": 14319 }, { "epoch": 0.48, "grad_norm": 0.4678325951099396, "learning_rate": 0.0005634307583794845, "loss": 1.7999, "step": 14320 }, { "epoch": 0.48, "grad_norm": 0.4652690291404724, "learning_rate": 0.0005634257571691071, "loss": 1.9076, "step": 14321 }, { "epoch": 0.48, "grad_norm": 0.4414815306663513, "learning_rate": 0.0005634207556389698, "loss": 1.8803, "step": 14322 }, { "epoch": 0.48, "grad_norm": 0.4293888509273529, "learning_rate": 0.0005634157537890785, "loss": 1.7982, "step": 14323 }, { "epoch": 0.48, "grad_norm": 0.4279218018054962, "learning_rate": 0.0005634107516194393, "loss": 1.8506, "step": 14324 }, { "epoch": 0.48, "grad_norm": 0.430795282125473, "learning_rate": 0.0005634057491300581, "loss": 1.8438, "step": 14325 }, { "epoch": 0.48, "grad_norm": 0.46976032853126526, "learning_rate": 0.0005634007463209413, "loss": 1.8748, "step": 14326 }, { "epoch": 0.48, "grad_norm": 0.44845065474510193, "learning_rate": 0.0005633957431920947, "loss": 1.903, "step": 14327 }, { "epoch": 0.48, "grad_norm": 0.4309356212615967, "learning_rate": 0.0005633907397435245, "loss": 1.8392, "step": 14328 }, { "epoch": 0.48, "grad_norm": 0.455689936876297, "learning_rate": 0.0005633857359752368, "loss": 1.7949, "step": 14329 }, { "epoch": 0.48, "grad_norm": 0.43731749057769775, "learning_rate": 0.0005633807318872376, "loss": 1.8621, "step": 14330 }, { "epoch": 0.48, "grad_norm": 0.4311199188232422, "learning_rate": 0.000563375727479533, "loss": 1.8819, "step": 14331 }, { "epoch": 0.48, "grad_norm": 0.4369524121284485, "learning_rate": 0.000563370722752129, "loss": 1.8547, "step": 14332 }, { "epoch": 0.48, "grad_norm": 0.45353978872299194, "learning_rate": 0.0005633657177050318, "loss": 1.8586, "step": 14333 }, { "epoch": 0.48, "grad_norm": 0.47112971544265747, "learning_rate": 0.0005633607123382475, "loss": 1.9635, "step": 14334 }, { "epoch": 0.48, "grad_norm": 0.43209409713745117, "learning_rate": 0.0005633557066517821, "loss": 1.8681, "step": 14335 }, { "epoch": 0.48, "grad_norm": 0.4692333936691284, "learning_rate": 0.0005633507006456416, "loss": 1.9291, "step": 14336 }, { "epoch": 0.48, "grad_norm": 0.4493054449558258, "learning_rate": 0.0005633456943198321, "loss": 1.9417, "step": 14337 }, { "epoch": 0.48, "grad_norm": 0.42937496304512024, "learning_rate": 0.0005633406876743598, "loss": 1.7943, "step": 14338 }, { "epoch": 0.48, "grad_norm": 0.4258347451686859, "learning_rate": 0.0005633356807092307, "loss": 1.7851, "step": 14339 }, { "epoch": 0.48, "grad_norm": 0.4606831669807434, "learning_rate": 0.0005633306734244509, "loss": 1.8058, "step": 14340 }, { "epoch": 0.48, "grad_norm": 0.4409938454627991, "learning_rate": 0.0005633256658200264, "loss": 1.7784, "step": 14341 }, { "epoch": 0.48, "grad_norm": 0.4214242994785309, "learning_rate": 0.0005633206578959635, "loss": 1.7551, "step": 14342 }, { "epoch": 0.48, "grad_norm": 0.4254874587059021, "learning_rate": 0.000563315649652268, "loss": 1.7583, "step": 14343 }, { "epoch": 0.48, "grad_norm": 0.4744369685649872, "learning_rate": 0.000563310641088946, "loss": 1.9317, "step": 14344 }, { "epoch": 0.48, "grad_norm": 0.44014972448349, "learning_rate": 0.0005633056322060039, "loss": 1.8443, "step": 14345 }, { "epoch": 0.48, "grad_norm": 0.44047093391418457, "learning_rate": 0.0005633006230034473, "loss": 1.8771, "step": 14346 }, { "epoch": 0.48, "grad_norm": 0.4259994626045227, "learning_rate": 0.0005632956134812827, "loss": 1.8447, "step": 14347 }, { "epoch": 0.48, "grad_norm": 0.43301939964294434, "learning_rate": 0.000563290603639516, "loss": 1.8531, "step": 14348 }, { "epoch": 0.48, "grad_norm": 0.42627713084220886, "learning_rate": 0.0005632855934781533, "loss": 1.8528, "step": 14349 }, { "epoch": 0.48, "grad_norm": 0.45033881068229675, "learning_rate": 0.0005632805829972007, "loss": 1.8423, "step": 14350 }, { "epoch": 0.48, "grad_norm": 0.452553927898407, "learning_rate": 0.0005632755721966643, "loss": 1.8982, "step": 14351 }, { "epoch": 0.48, "grad_norm": 0.44995686411857605, "learning_rate": 0.0005632705610765501, "loss": 1.9181, "step": 14352 }, { "epoch": 0.48, "grad_norm": 0.4680420458316803, "learning_rate": 0.0005632655496368642, "loss": 1.9132, "step": 14353 }, { "epoch": 0.48, "grad_norm": 0.4406866431236267, "learning_rate": 0.0005632605378776127, "loss": 1.7793, "step": 14354 }, { "epoch": 0.48, "grad_norm": 0.4366621673107147, "learning_rate": 0.0005632555257988018, "loss": 1.9308, "step": 14355 }, { "epoch": 0.48, "grad_norm": 0.4485659897327423, "learning_rate": 0.0005632505134004375, "loss": 1.8714, "step": 14356 }, { "epoch": 0.48, "grad_norm": 0.47418469190597534, "learning_rate": 0.0005632455006825257, "loss": 1.8954, "step": 14357 }, { "epoch": 0.48, "grad_norm": 0.4197419285774231, "learning_rate": 0.0005632404876450728, "loss": 1.8439, "step": 14358 }, { "epoch": 0.48, "grad_norm": 0.43696728348731995, "learning_rate": 0.0005632354742880845, "loss": 1.8633, "step": 14359 }, { "epoch": 0.48, "grad_norm": 0.46980348229408264, "learning_rate": 0.0005632304606115674, "loss": 1.9344, "step": 14360 }, { "epoch": 0.48, "grad_norm": 0.4345586597919464, "learning_rate": 0.0005632254466155271, "loss": 1.9203, "step": 14361 }, { "epoch": 0.48, "grad_norm": 0.42399561405181885, "learning_rate": 0.0005632204322999701, "loss": 1.8361, "step": 14362 }, { "epoch": 0.48, "grad_norm": 0.4509350657463074, "learning_rate": 0.0005632154176649021, "loss": 1.8723, "step": 14363 }, { "epoch": 0.48, "grad_norm": 0.436946302652359, "learning_rate": 0.0005632104027103295, "loss": 1.8602, "step": 14364 }, { "epoch": 0.48, "grad_norm": 0.4292728304862976, "learning_rate": 0.0005632053874362582, "loss": 1.9312, "step": 14365 }, { "epoch": 0.48, "grad_norm": 0.42721930146217346, "learning_rate": 0.0005632003718426943, "loss": 1.8719, "step": 14366 }, { "epoch": 0.48, "grad_norm": 0.43801867961883545, "learning_rate": 0.0005631953559296439, "loss": 1.7612, "step": 14367 }, { "epoch": 0.48, "grad_norm": 0.4459068179130554, "learning_rate": 0.0005631903396971132, "loss": 1.8879, "step": 14368 }, { "epoch": 0.48, "grad_norm": 0.4368239641189575, "learning_rate": 0.0005631853231451082, "loss": 1.7744, "step": 14369 }, { "epoch": 0.48, "grad_norm": 0.4202328324317932, "learning_rate": 0.000563180306273635, "loss": 1.8376, "step": 14370 }, { "epoch": 0.48, "grad_norm": 0.4741261303424835, "learning_rate": 0.0005631752890826996, "loss": 1.9737, "step": 14371 }, { "epoch": 0.48, "grad_norm": 0.4375554025173187, "learning_rate": 0.0005631702715723083, "loss": 1.8891, "step": 14372 }, { "epoch": 0.48, "grad_norm": 0.4406239688396454, "learning_rate": 0.0005631652537424669, "loss": 1.8902, "step": 14373 }, { "epoch": 0.48, "grad_norm": 0.4296265244483948, "learning_rate": 0.0005631602355931817, "loss": 1.877, "step": 14374 }, { "epoch": 0.48, "grad_norm": 0.44078120589256287, "learning_rate": 0.0005631552171244589, "loss": 1.8441, "step": 14375 }, { "epoch": 0.48, "grad_norm": 0.43605390191078186, "learning_rate": 0.0005631501983363044, "loss": 1.8458, "step": 14376 }, { "epoch": 0.48, "grad_norm": 0.4609103798866272, "learning_rate": 0.0005631451792287243, "loss": 1.8898, "step": 14377 }, { "epoch": 0.48, "grad_norm": 0.439841091632843, "learning_rate": 0.0005631401598017246, "loss": 1.7898, "step": 14378 }, { "epoch": 0.48, "grad_norm": 0.4167526662349701, "learning_rate": 0.0005631351400553117, "loss": 1.8691, "step": 14379 }, { "epoch": 0.48, "grad_norm": 0.44927650690078735, "learning_rate": 0.0005631301199894914, "loss": 1.9865, "step": 14380 }, { "epoch": 0.48, "grad_norm": 0.4279504716396332, "learning_rate": 0.00056312509960427, "loss": 1.8946, "step": 14381 }, { "epoch": 0.48, "grad_norm": 0.43269434571266174, "learning_rate": 0.0005631200788996534, "loss": 1.8326, "step": 14382 }, { "epoch": 0.48, "grad_norm": 0.434063583612442, "learning_rate": 0.0005631150578756478, "loss": 1.8936, "step": 14383 }, { "epoch": 0.48, "grad_norm": 0.43301790952682495, "learning_rate": 0.0005631100365322593, "loss": 1.8652, "step": 14384 }, { "epoch": 0.48, "grad_norm": 0.4195480942726135, "learning_rate": 0.000563105014869494, "loss": 1.8284, "step": 14385 }, { "epoch": 0.48, "grad_norm": 0.44064489006996155, "learning_rate": 0.0005630999928873581, "loss": 1.8342, "step": 14386 }, { "epoch": 0.48, "grad_norm": 0.44173377752304077, "learning_rate": 0.0005630949705858574, "loss": 1.8453, "step": 14387 }, { "epoch": 0.48, "grad_norm": 0.4117773473262787, "learning_rate": 0.0005630899479649983, "loss": 1.9061, "step": 14388 }, { "epoch": 0.48, "grad_norm": 0.43777787685394287, "learning_rate": 0.0005630849250247866, "loss": 1.8614, "step": 14389 }, { "epoch": 0.48, "grad_norm": 0.43480685353279114, "learning_rate": 0.0005630799017652288, "loss": 1.8521, "step": 14390 }, { "epoch": 0.48, "grad_norm": 0.454275518655777, "learning_rate": 0.0005630748781863306, "loss": 1.8772, "step": 14391 }, { "epoch": 0.48, "grad_norm": 0.4107423424720764, "learning_rate": 0.0005630698542880982, "loss": 1.8539, "step": 14392 }, { "epoch": 0.48, "grad_norm": 0.41868823766708374, "learning_rate": 0.0005630648300705379, "loss": 1.8835, "step": 14393 }, { "epoch": 0.48, "grad_norm": 0.43590545654296875, "learning_rate": 0.0005630598055336555, "loss": 1.9227, "step": 14394 }, { "epoch": 0.48, "grad_norm": 0.43576958775520325, "learning_rate": 0.0005630547806774575, "loss": 1.8269, "step": 14395 }, { "epoch": 0.48, "grad_norm": 0.4282964766025543, "learning_rate": 0.0005630497555019494, "loss": 1.8135, "step": 14396 }, { "epoch": 0.48, "grad_norm": 0.43877464532852173, "learning_rate": 0.000563044730007138, "loss": 1.8771, "step": 14397 }, { "epoch": 0.48, "grad_norm": 0.43247929215431213, "learning_rate": 0.000563039704193029, "loss": 1.8374, "step": 14398 }, { "epoch": 0.48, "grad_norm": 0.43014347553253174, "learning_rate": 0.0005630346780596284, "loss": 1.8833, "step": 14399 }, { "epoch": 0.48, "grad_norm": 0.43292370438575745, "learning_rate": 0.0005630296516069426, "loss": 1.8599, "step": 14400 }, { "epoch": 0.48, "grad_norm": 0.42466413974761963, "learning_rate": 0.0005630246248349775, "loss": 1.9055, "step": 14401 }, { "epoch": 0.48, "grad_norm": 0.44053903222084045, "learning_rate": 0.0005630195977437392, "loss": 1.8883, "step": 14402 }, { "epoch": 0.48, "grad_norm": 0.43178844451904297, "learning_rate": 0.0005630145703332339, "loss": 1.9296, "step": 14403 }, { "epoch": 0.48, "grad_norm": 0.4383983612060547, "learning_rate": 0.0005630095426034677, "loss": 1.8977, "step": 14404 }, { "epoch": 0.48, "grad_norm": 0.4260345995426178, "learning_rate": 0.0005630045145544467, "loss": 1.8617, "step": 14405 }, { "epoch": 0.48, "grad_norm": 0.42146003246307373, "learning_rate": 0.0005629994861861768, "loss": 1.7984, "step": 14406 }, { "epoch": 0.48, "grad_norm": 0.4314207434654236, "learning_rate": 0.0005629944574986644, "loss": 1.8456, "step": 14407 }, { "epoch": 0.48, "grad_norm": 0.42760327458381653, "learning_rate": 0.0005629894284919155, "loss": 1.8746, "step": 14408 }, { "epoch": 0.48, "grad_norm": 0.4284750819206238, "learning_rate": 0.0005629843991659362, "loss": 1.8763, "step": 14409 }, { "epoch": 0.48, "grad_norm": 0.4281964600086212, "learning_rate": 0.0005629793695207326, "loss": 1.8861, "step": 14410 }, { "epoch": 0.48, "grad_norm": 0.4245941936969757, "learning_rate": 0.0005629743395563106, "loss": 1.8011, "step": 14411 }, { "epoch": 0.48, "grad_norm": 0.43125995993614197, "learning_rate": 0.0005629693092726768, "loss": 1.9161, "step": 14412 }, { "epoch": 0.48, "grad_norm": 0.4240153133869171, "learning_rate": 0.0005629642786698368, "loss": 1.8792, "step": 14413 }, { "epoch": 0.48, "grad_norm": 0.4623980224132538, "learning_rate": 0.000562959247747797, "loss": 1.8834, "step": 14414 }, { "epoch": 0.48, "grad_norm": 0.4250747561454773, "learning_rate": 0.0005629542165065634, "loss": 1.7817, "step": 14415 }, { "epoch": 0.48, "grad_norm": 0.42625734210014343, "learning_rate": 0.0005629491849461419, "loss": 1.8777, "step": 14416 }, { "epoch": 0.48, "grad_norm": 0.44994792342185974, "learning_rate": 0.000562944153066539, "loss": 1.8932, "step": 14417 }, { "epoch": 0.48, "grad_norm": 0.43810567259788513, "learning_rate": 0.0005629391208677607, "loss": 1.9083, "step": 14418 }, { "epoch": 0.48, "grad_norm": 0.44181546568870544, "learning_rate": 0.0005629340883498131, "loss": 1.8997, "step": 14419 }, { "epoch": 0.48, "grad_norm": 0.43499135971069336, "learning_rate": 0.0005629290555127023, "loss": 1.8619, "step": 14420 }, { "epoch": 0.48, "grad_norm": 0.43814224004745483, "learning_rate": 0.0005629240223564342, "loss": 1.8394, "step": 14421 }, { "epoch": 0.48, "grad_norm": 0.45332232117652893, "learning_rate": 0.0005629189888810151, "loss": 1.9452, "step": 14422 }, { "epoch": 0.48, "grad_norm": 0.43421250581741333, "learning_rate": 0.0005629139550864511, "loss": 1.836, "step": 14423 }, { "epoch": 0.48, "grad_norm": 0.44276630878448486, "learning_rate": 0.0005629089209727482, "loss": 1.8648, "step": 14424 }, { "epoch": 0.48, "grad_norm": 0.42249950766563416, "learning_rate": 0.0005629038865399129, "loss": 1.9089, "step": 14425 }, { "epoch": 0.48, "grad_norm": 0.4882490932941437, "learning_rate": 0.0005628988517879508, "loss": 1.9446, "step": 14426 }, { "epoch": 0.48, "grad_norm": 0.4350004196166992, "learning_rate": 0.0005628938167168683, "loss": 1.8379, "step": 14427 }, { "epoch": 0.48, "grad_norm": 0.46627822518348694, "learning_rate": 0.0005628887813266713, "loss": 1.8499, "step": 14428 }, { "epoch": 0.48, "grad_norm": 0.43335455656051636, "learning_rate": 0.0005628837456173663, "loss": 1.877, "step": 14429 }, { "epoch": 0.48, "grad_norm": 0.42430049180984497, "learning_rate": 0.0005628787095889589, "loss": 1.9327, "step": 14430 }, { "epoch": 0.48, "grad_norm": 0.43227189779281616, "learning_rate": 0.0005628736732414556, "loss": 1.8475, "step": 14431 }, { "epoch": 0.48, "grad_norm": 0.4396941363811493, "learning_rate": 0.0005628686365748625, "loss": 1.8452, "step": 14432 }, { "epoch": 0.48, "grad_norm": 0.43514859676361084, "learning_rate": 0.0005628635995891855, "loss": 1.8126, "step": 14433 }, { "epoch": 0.48, "grad_norm": 0.4444495439529419, "learning_rate": 0.0005628585622844308, "loss": 1.9762, "step": 14434 }, { "epoch": 0.48, "grad_norm": 0.43468359112739563, "learning_rate": 0.0005628535246606046, "loss": 1.8451, "step": 14435 }, { "epoch": 0.48, "grad_norm": 0.43515703082084656, "learning_rate": 0.0005628484867177129, "loss": 1.8309, "step": 14436 }, { "epoch": 0.48, "grad_norm": 0.4270675778388977, "learning_rate": 0.0005628434484557619, "loss": 1.8282, "step": 14437 }, { "epoch": 0.48, "grad_norm": 0.4347407817840576, "learning_rate": 0.0005628384098747576, "loss": 1.8512, "step": 14438 }, { "epoch": 0.48, "grad_norm": 0.4157640039920807, "learning_rate": 0.0005628333709747062, "loss": 1.7791, "step": 14439 }, { "epoch": 0.48, "grad_norm": 0.4218878746032715, "learning_rate": 0.000562828331755614, "loss": 1.8421, "step": 14440 }, { "epoch": 0.48, "grad_norm": 0.4235675632953644, "learning_rate": 0.0005628232922174868, "loss": 1.8961, "step": 14441 }, { "epoch": 0.48, "grad_norm": 0.428041934967041, "learning_rate": 0.0005628182523603309, "loss": 1.8833, "step": 14442 }, { "epoch": 0.48, "grad_norm": 0.4393380880355835, "learning_rate": 0.0005628132121841522, "loss": 1.8486, "step": 14443 }, { "epoch": 0.48, "grad_norm": 0.4299579858779907, "learning_rate": 0.0005628081716889573, "loss": 1.8958, "step": 14444 }, { "epoch": 0.48, "grad_norm": 0.44744113087654114, "learning_rate": 0.0005628031308747517, "loss": 1.9026, "step": 14445 }, { "epoch": 0.48, "grad_norm": 0.41488972306251526, "learning_rate": 0.000562798089741542, "loss": 1.7946, "step": 14446 }, { "epoch": 0.48, "grad_norm": 0.4325491189956665, "learning_rate": 0.000562793048289334, "loss": 1.9384, "step": 14447 }, { "epoch": 0.48, "grad_norm": 0.41869890689849854, "learning_rate": 0.0005627880065181341, "loss": 1.8316, "step": 14448 }, { "epoch": 0.48, "grad_norm": 0.43115514516830444, "learning_rate": 0.0005627829644279482, "loss": 1.8663, "step": 14449 }, { "epoch": 0.48, "grad_norm": 0.446004182100296, "learning_rate": 0.0005627779220187825, "loss": 1.9025, "step": 14450 }, { "epoch": 0.48, "grad_norm": 0.43172410130500793, "learning_rate": 0.0005627728792906431, "loss": 1.8772, "step": 14451 }, { "epoch": 0.48, "grad_norm": 0.4315512776374817, "learning_rate": 0.0005627678362435363, "loss": 1.8925, "step": 14452 }, { "epoch": 0.48, "grad_norm": 0.4385579824447632, "learning_rate": 0.0005627627928774679, "loss": 1.8359, "step": 14453 }, { "epoch": 0.48, "grad_norm": 0.44681206345558167, "learning_rate": 0.0005627577491924442, "loss": 1.9348, "step": 14454 }, { "epoch": 0.48, "grad_norm": 0.4260104298591614, "learning_rate": 0.0005627527051884713, "loss": 1.9233, "step": 14455 }, { "epoch": 0.48, "grad_norm": 0.43157121539115906, "learning_rate": 0.0005627476608655554, "loss": 1.777, "step": 14456 }, { "epoch": 0.48, "grad_norm": 0.42432865500450134, "learning_rate": 0.0005627426162237026, "loss": 1.8037, "step": 14457 }, { "epoch": 0.48, "grad_norm": 0.42302006483078003, "learning_rate": 0.0005627375712629188, "loss": 1.8558, "step": 14458 }, { "epoch": 0.48, "grad_norm": 0.43561917543411255, "learning_rate": 0.0005627325259832104, "loss": 1.9243, "step": 14459 }, { "epoch": 0.48, "grad_norm": 0.4479922950267792, "learning_rate": 0.0005627274803845834, "loss": 1.8078, "step": 14460 }, { "epoch": 0.48, "grad_norm": 0.42737218737602234, "learning_rate": 0.000562722434467044, "loss": 1.87, "step": 14461 }, { "epoch": 0.48, "grad_norm": 0.4292556345462799, "learning_rate": 0.0005627173882305983, "loss": 1.8456, "step": 14462 }, { "epoch": 0.48, "grad_norm": 0.47367963194847107, "learning_rate": 0.0005627123416752523, "loss": 1.864, "step": 14463 }, { "epoch": 0.48, "grad_norm": 0.4387977719306946, "learning_rate": 0.0005627072948010122, "loss": 1.879, "step": 14464 }, { "epoch": 0.48, "grad_norm": 0.46189531683921814, "learning_rate": 0.0005627022476078843, "loss": 1.9121, "step": 14465 }, { "epoch": 0.48, "grad_norm": 0.44030359387397766, "learning_rate": 0.0005626972000958745, "loss": 1.8559, "step": 14466 }, { "epoch": 0.48, "grad_norm": 0.4526975750923157, "learning_rate": 0.000562692152264989, "loss": 1.9394, "step": 14467 }, { "epoch": 0.48, "grad_norm": 0.4353369176387787, "learning_rate": 0.000562687104115234, "loss": 1.848, "step": 14468 }, { "epoch": 0.48, "grad_norm": 0.4459981620311737, "learning_rate": 0.0005626820556466155, "loss": 1.7581, "step": 14469 }, { "epoch": 0.48, "grad_norm": 0.47620782256126404, "learning_rate": 0.0005626770068591397, "loss": 1.8751, "step": 14470 }, { "epoch": 0.48, "grad_norm": 0.43882185220718384, "learning_rate": 0.0005626719577528127, "loss": 1.8064, "step": 14471 }, { "epoch": 0.48, "grad_norm": 0.45251020789146423, "learning_rate": 0.0005626669083276407, "loss": 1.8678, "step": 14472 }, { "epoch": 0.48, "grad_norm": 0.457779198884964, "learning_rate": 0.0005626618585836296, "loss": 1.8632, "step": 14473 }, { "epoch": 0.48, "grad_norm": 0.452022910118103, "learning_rate": 0.0005626568085207859, "loss": 1.9441, "step": 14474 }, { "epoch": 0.48, "grad_norm": 0.4387318789958954, "learning_rate": 0.0005626517581391155, "loss": 1.9276, "step": 14475 }, { "epoch": 0.48, "grad_norm": 0.4511900246143341, "learning_rate": 0.0005626467074386245, "loss": 1.8858, "step": 14476 }, { "epoch": 0.48, "grad_norm": 0.45889341831207275, "learning_rate": 0.0005626416564193191, "loss": 1.8734, "step": 14477 }, { "epoch": 0.48, "grad_norm": 0.43643754720687866, "learning_rate": 0.0005626366050812056, "loss": 1.7569, "step": 14478 }, { "epoch": 0.48, "grad_norm": 0.4314347505569458, "learning_rate": 0.0005626315534242898, "loss": 1.8478, "step": 14479 }, { "epoch": 0.48, "grad_norm": 0.4625723659992218, "learning_rate": 0.000562626501448578, "loss": 1.8959, "step": 14480 }, { "epoch": 0.48, "grad_norm": 0.4533512592315674, "learning_rate": 0.0005626214491540763, "loss": 1.8671, "step": 14481 }, { "epoch": 0.48, "grad_norm": 0.44947996735572815, "learning_rate": 0.0005626163965407909, "loss": 1.8759, "step": 14482 }, { "epoch": 0.48, "grad_norm": 0.4318940043449402, "learning_rate": 0.0005626113436087279, "loss": 1.8306, "step": 14483 }, { "epoch": 0.48, "grad_norm": 0.43491512537002563, "learning_rate": 0.0005626062903578933, "loss": 1.9011, "step": 14484 }, { "epoch": 0.48, "grad_norm": 0.4611656963825226, "learning_rate": 0.0005626012367882935, "loss": 1.9047, "step": 14485 }, { "epoch": 0.48, "grad_norm": 0.43658173084259033, "learning_rate": 0.0005625961828999344, "loss": 1.8613, "step": 14486 }, { "epoch": 0.48, "grad_norm": 0.4368780255317688, "learning_rate": 0.0005625911286928223, "loss": 1.8583, "step": 14487 }, { "epoch": 0.48, "grad_norm": 0.4345438778400421, "learning_rate": 0.0005625860741669633, "loss": 1.911, "step": 14488 }, { "epoch": 0.48, "grad_norm": 0.43657585978507996, "learning_rate": 0.0005625810193223634, "loss": 1.9069, "step": 14489 }, { "epoch": 0.48, "grad_norm": 0.4522974193096161, "learning_rate": 0.0005625759641590289, "loss": 1.8786, "step": 14490 }, { "epoch": 0.48, "grad_norm": 0.42688342928886414, "learning_rate": 0.0005625709086769658, "loss": 1.9365, "step": 14491 }, { "epoch": 0.48, "grad_norm": 0.47532159090042114, "learning_rate": 0.0005625658528761804, "loss": 1.7994, "step": 14492 }, { "epoch": 0.48, "grad_norm": 0.44105497002601624, "learning_rate": 0.0005625607967566787, "loss": 1.8399, "step": 14493 }, { "epoch": 0.48, "grad_norm": 0.41821426153182983, "learning_rate": 0.0005625557403184669, "loss": 1.7508, "step": 14494 }, { "epoch": 0.48, "grad_norm": 0.43182501196861267, "learning_rate": 0.000562550683561551, "loss": 1.8532, "step": 14495 }, { "epoch": 0.48, "grad_norm": 0.4494696855545044, "learning_rate": 0.0005625456264859373, "loss": 1.8339, "step": 14496 }, { "epoch": 0.48, "grad_norm": 0.4530490040779114, "learning_rate": 0.0005625405690916319, "loss": 1.7797, "step": 14497 }, { "epoch": 0.48, "grad_norm": 0.4358515441417694, "learning_rate": 0.000562535511378641, "loss": 1.858, "step": 14498 }, { "epoch": 0.48, "grad_norm": 0.43266332149505615, "learning_rate": 0.0005625304533469706, "loss": 1.8575, "step": 14499 }, { "epoch": 0.48, "grad_norm": 0.4720059931278229, "learning_rate": 0.000562525394996627, "loss": 1.8636, "step": 14500 }, { "epoch": 0.48, "grad_norm": 0.44710975885391235, "learning_rate": 0.0005625203363276162, "loss": 1.9004, "step": 14501 }, { "epoch": 0.48, "grad_norm": 0.4267876148223877, "learning_rate": 0.0005625152773399444, "loss": 1.7647, "step": 14502 }, { "epoch": 0.48, "grad_norm": 0.4336038827896118, "learning_rate": 0.0005625102180336178, "loss": 1.8206, "step": 14503 }, { "epoch": 0.48, "grad_norm": 0.45261937379837036, "learning_rate": 0.0005625051584086424, "loss": 1.8464, "step": 14504 }, { "epoch": 0.48, "grad_norm": 0.42866218090057373, "learning_rate": 0.0005625000984650243, "loss": 1.847, "step": 14505 }, { "epoch": 0.48, "grad_norm": 0.44237020611763, "learning_rate": 0.0005624950382027698, "loss": 1.9062, "step": 14506 }, { "epoch": 0.48, "grad_norm": 0.4193964898586273, "learning_rate": 0.0005624899776218851, "loss": 1.7576, "step": 14507 }, { "epoch": 0.48, "grad_norm": 0.4205843210220337, "learning_rate": 0.0005624849167223762, "loss": 1.8128, "step": 14508 }, { "epoch": 0.48, "grad_norm": 0.4355217516422272, "learning_rate": 0.0005624798555042494, "loss": 1.947, "step": 14509 }, { "epoch": 0.48, "grad_norm": 0.43098366260528564, "learning_rate": 0.0005624747939675105, "loss": 1.9182, "step": 14510 }, { "epoch": 0.48, "grad_norm": 0.4343112111091614, "learning_rate": 0.0005624697321121661, "loss": 1.8626, "step": 14511 }, { "epoch": 0.48, "grad_norm": 0.4304189383983612, "learning_rate": 0.0005624646699382219, "loss": 1.9193, "step": 14512 }, { "epoch": 0.48, "grad_norm": 0.44661492109298706, "learning_rate": 0.0005624596074456845, "loss": 1.9032, "step": 14513 }, { "epoch": 0.48, "grad_norm": 0.4343455731868744, "learning_rate": 0.0005624545446345597, "loss": 1.7783, "step": 14514 }, { "epoch": 0.48, "grad_norm": 0.44268152117729187, "learning_rate": 0.0005624494815048537, "loss": 1.8531, "step": 14515 }, { "epoch": 0.48, "grad_norm": 0.42908626794815063, "learning_rate": 0.0005624444180565728, "loss": 1.8603, "step": 14516 }, { "epoch": 0.48, "grad_norm": 0.42348286509513855, "learning_rate": 0.000562439354289723, "loss": 1.8882, "step": 14517 }, { "epoch": 0.48, "grad_norm": 0.4466957747936249, "learning_rate": 0.0005624342902043105, "loss": 1.9099, "step": 14518 }, { "epoch": 0.48, "grad_norm": 0.4484303295612335, "learning_rate": 0.0005624292258003413, "loss": 1.8185, "step": 14519 }, { "epoch": 0.48, "grad_norm": 0.46039092540740967, "learning_rate": 0.0005624241610778218, "loss": 1.9328, "step": 14520 }, { "epoch": 0.48, "grad_norm": 0.4231398403644562, "learning_rate": 0.0005624190960367581, "loss": 1.8661, "step": 14521 }, { "epoch": 0.48, "grad_norm": 0.4255305528640747, "learning_rate": 0.0005624140306771562, "loss": 1.8814, "step": 14522 }, { "epoch": 0.48, "grad_norm": 0.42583101987838745, "learning_rate": 0.0005624089649990224, "loss": 1.8769, "step": 14523 }, { "epoch": 0.48, "grad_norm": 0.41976696252822876, "learning_rate": 0.0005624038990023628, "loss": 1.8931, "step": 14524 }, { "epoch": 0.48, "grad_norm": 0.43939661979675293, "learning_rate": 0.0005623988326871834, "loss": 1.9507, "step": 14525 }, { "epoch": 0.48, "grad_norm": 0.41956329345703125, "learning_rate": 0.0005623937660534905, "loss": 1.8315, "step": 14526 }, { "epoch": 0.48, "grad_norm": 0.4342198073863983, "learning_rate": 0.0005623886991012903, "loss": 1.8525, "step": 14527 }, { "epoch": 0.48, "grad_norm": 0.4292895495891571, "learning_rate": 0.0005623836318305889, "loss": 1.9026, "step": 14528 }, { "epoch": 0.48, "grad_norm": 0.432162880897522, "learning_rate": 0.0005623785642413923, "loss": 1.9009, "step": 14529 }, { "epoch": 0.48, "grad_norm": 0.4162173271179199, "learning_rate": 0.000562373496333707, "loss": 1.8109, "step": 14530 }, { "epoch": 0.48, "grad_norm": 0.4245227873325348, "learning_rate": 0.0005623684281075387, "loss": 1.8241, "step": 14531 }, { "epoch": 0.48, "grad_norm": 0.4336754381656647, "learning_rate": 0.000562363359562894, "loss": 1.8069, "step": 14532 }, { "epoch": 0.48, "grad_norm": 0.41892555356025696, "learning_rate": 0.0005623582906997787, "loss": 1.8568, "step": 14533 }, { "epoch": 0.48, "grad_norm": 0.4376344382762909, "learning_rate": 0.0005623532215181992, "loss": 1.871, "step": 14534 }, { "epoch": 0.48, "grad_norm": 0.433461993932724, "learning_rate": 0.0005623481520181615, "loss": 1.8663, "step": 14535 }, { "epoch": 0.48, "grad_norm": 0.4331231117248535, "learning_rate": 0.0005623430821996717, "loss": 1.9126, "step": 14536 }, { "epoch": 0.48, "grad_norm": 0.4355202913284302, "learning_rate": 0.0005623380120627361, "loss": 1.8647, "step": 14537 }, { "epoch": 0.48, "grad_norm": 0.441866934299469, "learning_rate": 0.0005623329416073609, "loss": 1.8875, "step": 14538 }, { "epoch": 0.48, "grad_norm": 0.4255392849445343, "learning_rate": 0.0005623278708335521, "loss": 1.8591, "step": 14539 }, { "epoch": 0.48, "grad_norm": 0.48238107562065125, "learning_rate": 0.0005623227997413158, "loss": 1.8317, "step": 14540 }, { "epoch": 0.48, "grad_norm": 0.4365127980709076, "learning_rate": 0.0005623177283306584, "loss": 1.825, "step": 14541 }, { "epoch": 0.48, "grad_norm": 0.4490816295146942, "learning_rate": 0.000562312656601586, "loss": 1.8864, "step": 14542 }, { "epoch": 0.48, "grad_norm": 0.44560396671295166, "learning_rate": 0.0005623075845541045, "loss": 1.7981, "step": 14543 }, { "epoch": 0.48, "grad_norm": 0.4291859269142151, "learning_rate": 0.0005623025121882203, "loss": 1.8979, "step": 14544 }, { "epoch": 0.48, "grad_norm": 0.4209861159324646, "learning_rate": 0.0005622974395039395, "loss": 1.9113, "step": 14545 }, { "epoch": 0.48, "grad_norm": 0.44664788246154785, "learning_rate": 0.0005622923665012684, "loss": 1.924, "step": 14546 }, { "epoch": 0.48, "grad_norm": 0.42901715636253357, "learning_rate": 0.0005622872931802129, "loss": 1.8424, "step": 14547 }, { "epoch": 0.48, "grad_norm": 0.43091121315956116, "learning_rate": 0.0005622822195407793, "loss": 1.9044, "step": 14548 }, { "epoch": 0.48, "grad_norm": 0.4299454092979431, "learning_rate": 0.0005622771455829737, "loss": 1.9135, "step": 14549 }, { "epoch": 0.48, "grad_norm": 0.4276728332042694, "learning_rate": 0.0005622720713068022, "loss": 1.8773, "step": 14550 }, { "epoch": 0.48, "grad_norm": 0.4344586730003357, "learning_rate": 0.0005622669967122713, "loss": 1.8341, "step": 14551 }, { "epoch": 0.48, "grad_norm": 0.41887232661247253, "learning_rate": 0.0005622619217993867, "loss": 1.8939, "step": 14552 }, { "epoch": 0.48, "grad_norm": 0.41077762842178345, "learning_rate": 0.0005622568465681548, "loss": 1.8323, "step": 14553 }, { "epoch": 0.48, "grad_norm": 0.43025463819503784, "learning_rate": 0.0005622517710185817, "loss": 1.8272, "step": 14554 }, { "epoch": 0.48, "grad_norm": 0.4367094933986664, "learning_rate": 0.0005622466951506738, "loss": 1.8148, "step": 14555 }, { "epoch": 0.48, "grad_norm": 0.4280675947666168, "learning_rate": 0.0005622416189644368, "loss": 1.9253, "step": 14556 }, { "epoch": 0.48, "grad_norm": 0.43448784947395325, "learning_rate": 0.0005622365424598773, "loss": 1.757, "step": 14557 }, { "epoch": 0.48, "grad_norm": 0.4188024401664734, "learning_rate": 0.0005622314656370011, "loss": 1.8398, "step": 14558 }, { "epoch": 0.48, "grad_norm": 0.424725204706192, "learning_rate": 0.0005622263884958146, "loss": 1.8235, "step": 14559 }, { "epoch": 0.48, "grad_norm": 0.4483807682991028, "learning_rate": 0.000562221311036324, "loss": 1.8235, "step": 14560 }, { "epoch": 0.48, "grad_norm": 0.44746702909469604, "learning_rate": 0.0005622162332585352, "loss": 1.9047, "step": 14561 }, { "epoch": 0.48, "grad_norm": 0.4450940787792206, "learning_rate": 0.0005622111551624547, "loss": 1.8796, "step": 14562 }, { "epoch": 0.48, "grad_norm": 0.4159209132194519, "learning_rate": 0.0005622060767480885, "loss": 1.8662, "step": 14563 }, { "epoch": 0.48, "grad_norm": 0.5764089822769165, "learning_rate": 0.0005622009980154426, "loss": 1.826, "step": 14564 }, { "epoch": 0.48, "grad_norm": 0.4408155381679535, "learning_rate": 0.0005621959189645233, "loss": 1.9022, "step": 14565 }, { "epoch": 0.48, "grad_norm": 0.4205373227596283, "learning_rate": 0.000562190839595337, "loss": 1.9112, "step": 14566 }, { "epoch": 0.48, "grad_norm": 0.41669636964797974, "learning_rate": 0.0005621857599078895, "loss": 1.8943, "step": 14567 }, { "epoch": 0.48, "grad_norm": 0.4309103190898895, "learning_rate": 0.0005621806799021872, "loss": 1.8776, "step": 14568 }, { "epoch": 0.48, "grad_norm": 0.4509873688220978, "learning_rate": 0.0005621755995782361, "loss": 1.8379, "step": 14569 }, { "epoch": 0.48, "grad_norm": 0.4269375503063202, "learning_rate": 0.0005621705189360424, "loss": 1.8464, "step": 14570 }, { "epoch": 0.48, "grad_norm": 0.4245465397834778, "learning_rate": 0.0005621654379756125, "loss": 1.8014, "step": 14571 }, { "epoch": 0.48, "grad_norm": 0.4325501322746277, "learning_rate": 0.0005621603566969523, "loss": 1.9113, "step": 14572 }, { "epoch": 0.48, "grad_norm": 0.4201217591762543, "learning_rate": 0.000562155275100068, "loss": 1.9037, "step": 14573 }, { "epoch": 0.48, "grad_norm": 0.4548444151878357, "learning_rate": 0.0005621501931849659, "loss": 1.8969, "step": 14574 }, { "epoch": 0.48, "grad_norm": 0.4303528666496277, "learning_rate": 0.0005621451109516521, "loss": 1.8731, "step": 14575 }, { "epoch": 0.48, "grad_norm": 0.4474640190601349, "learning_rate": 0.0005621400284001327, "loss": 1.8016, "step": 14576 }, { "epoch": 0.48, "grad_norm": 0.43532660603523254, "learning_rate": 0.000562134945530414, "loss": 1.8719, "step": 14577 }, { "epoch": 0.49, "grad_norm": 0.4406150281429291, "learning_rate": 0.000562129862342502, "loss": 1.9466, "step": 14578 }, { "epoch": 0.49, "grad_norm": 0.42898109555244446, "learning_rate": 0.000562124778836403, "loss": 1.7651, "step": 14579 }, { "epoch": 0.49, "grad_norm": 0.4317047894001007, "learning_rate": 0.0005621196950121232, "loss": 1.8719, "step": 14580 }, { "epoch": 0.49, "grad_norm": 0.44103795289993286, "learning_rate": 0.0005621146108696687, "loss": 1.9391, "step": 14581 }, { "epoch": 0.49, "grad_norm": 0.4490481913089752, "learning_rate": 0.0005621095264090456, "loss": 1.8853, "step": 14582 }, { "epoch": 0.49, "grad_norm": 0.43661928176879883, "learning_rate": 0.0005621044416302603, "loss": 1.8349, "step": 14583 }, { "epoch": 0.49, "grad_norm": 0.44588902592658997, "learning_rate": 0.0005620993565333188, "loss": 1.8787, "step": 14584 }, { "epoch": 0.49, "grad_norm": 0.4264417886734009, "learning_rate": 0.0005620942711182271, "loss": 1.9057, "step": 14585 }, { "epoch": 0.49, "grad_norm": 0.43093234300613403, "learning_rate": 0.0005620891853849919, "loss": 1.8686, "step": 14586 }, { "epoch": 0.49, "grad_norm": 0.43123164772987366, "learning_rate": 0.0005620840993336187, "loss": 1.7943, "step": 14587 }, { "epoch": 0.49, "grad_norm": 0.4289379417896271, "learning_rate": 0.0005620790129641142, "loss": 1.8322, "step": 14588 }, { "epoch": 0.49, "grad_norm": 0.4506388008594513, "learning_rate": 0.0005620739262764844, "loss": 1.8728, "step": 14589 }, { "epoch": 0.49, "grad_norm": 0.42932793498039246, "learning_rate": 0.0005620688392707355, "loss": 1.8716, "step": 14590 }, { "epoch": 0.49, "grad_norm": 0.42764198780059814, "learning_rate": 0.0005620637519468735, "loss": 1.7959, "step": 14591 }, { "epoch": 0.49, "grad_norm": 0.4378912150859833, "learning_rate": 0.0005620586643049048, "loss": 1.9069, "step": 14592 }, { "epoch": 0.49, "grad_norm": 0.42361652851104736, "learning_rate": 0.0005620535763448355, "loss": 1.8175, "step": 14593 }, { "epoch": 0.49, "grad_norm": 0.43966370820999146, "learning_rate": 0.0005620484880666718, "loss": 1.9434, "step": 14594 }, { "epoch": 0.49, "grad_norm": 0.42915698885917664, "learning_rate": 0.0005620433994704199, "loss": 1.8136, "step": 14595 }, { "epoch": 0.49, "grad_norm": 0.432585746049881, "learning_rate": 0.0005620383105560858, "loss": 1.8683, "step": 14596 }, { "epoch": 0.49, "grad_norm": 0.482414186000824, "learning_rate": 0.0005620332213236757, "loss": 1.7962, "step": 14597 }, { "epoch": 0.49, "grad_norm": 0.44892433285713196, "learning_rate": 0.0005620281317731961, "loss": 1.8705, "step": 14598 }, { "epoch": 0.49, "grad_norm": 0.435712069272995, "learning_rate": 0.0005620230419046529, "loss": 1.8616, "step": 14599 }, { "epoch": 0.49, "grad_norm": 0.4418545961380005, "learning_rate": 0.0005620179517180524, "loss": 1.8325, "step": 14600 }, { "epoch": 0.49, "grad_norm": 0.43391457200050354, "learning_rate": 0.0005620128612134006, "loss": 1.8176, "step": 14601 }, { "epoch": 0.49, "grad_norm": 0.44188815355300903, "learning_rate": 0.0005620077703907037, "loss": 1.8889, "step": 14602 }, { "epoch": 0.49, "grad_norm": 0.4292016923427582, "learning_rate": 0.0005620026792499682, "loss": 1.8186, "step": 14603 }, { "epoch": 0.49, "grad_norm": 0.4312640428543091, "learning_rate": 0.0005619975877911998, "loss": 1.8647, "step": 14604 }, { "epoch": 0.49, "grad_norm": 0.45610883831977844, "learning_rate": 0.0005619924960144051, "loss": 1.8527, "step": 14605 }, { "epoch": 0.49, "grad_norm": 0.44647496938705444, "learning_rate": 0.0005619874039195901, "loss": 1.8746, "step": 14606 }, { "epoch": 0.49, "grad_norm": 0.43417903780937195, "learning_rate": 0.000561982311506761, "loss": 1.8346, "step": 14607 }, { "epoch": 0.49, "grad_norm": 0.4295724034309387, "learning_rate": 0.0005619772187759239, "loss": 1.8896, "step": 14608 }, { "epoch": 0.49, "grad_norm": 0.43398240208625793, "learning_rate": 0.0005619721257270852, "loss": 1.8064, "step": 14609 }, { "epoch": 0.49, "grad_norm": 0.4554620385169983, "learning_rate": 0.0005619670323602508, "loss": 1.8412, "step": 14610 }, { "epoch": 0.49, "grad_norm": 0.4407874345779419, "learning_rate": 0.000561961938675427, "loss": 1.81, "step": 14611 }, { "epoch": 0.49, "grad_norm": 0.45172441005706787, "learning_rate": 0.0005619568446726201, "loss": 1.9057, "step": 14612 }, { "epoch": 0.49, "grad_norm": 0.4568668305873871, "learning_rate": 0.0005619517503518362, "loss": 1.8661, "step": 14613 }, { "epoch": 0.49, "grad_norm": 0.4370329976081848, "learning_rate": 0.0005619466557130814, "loss": 1.9082, "step": 14614 }, { "epoch": 0.49, "grad_norm": 0.4331998825073242, "learning_rate": 0.0005619415607563621, "loss": 1.8487, "step": 14615 }, { "epoch": 0.49, "grad_norm": 0.478781133890152, "learning_rate": 0.0005619364654816841, "loss": 2.0119, "step": 14616 }, { "epoch": 0.49, "grad_norm": 0.4239282011985779, "learning_rate": 0.000561931369889054, "loss": 1.8806, "step": 14617 }, { "epoch": 0.49, "grad_norm": 0.43010151386260986, "learning_rate": 0.0005619262739784777, "loss": 1.8814, "step": 14618 }, { "epoch": 0.49, "grad_norm": 0.43706390261650085, "learning_rate": 0.0005619211777499616, "loss": 1.8713, "step": 14619 }, { "epoch": 0.49, "grad_norm": 0.44033581018447876, "learning_rate": 0.0005619160812035118, "loss": 1.8576, "step": 14620 }, { "epoch": 0.49, "grad_norm": 0.4345799386501312, "learning_rate": 0.0005619109843391344, "loss": 1.9176, "step": 14621 }, { "epoch": 0.49, "grad_norm": 0.4352740943431854, "learning_rate": 0.0005619058871568356, "loss": 1.756, "step": 14622 }, { "epoch": 0.49, "grad_norm": 0.4364630877971649, "learning_rate": 0.0005619007896566218, "loss": 1.8426, "step": 14623 }, { "epoch": 0.49, "grad_norm": 0.44080376625061035, "learning_rate": 0.0005618956918384989, "loss": 1.8133, "step": 14624 }, { "epoch": 0.49, "grad_norm": 0.43766483664512634, "learning_rate": 0.0005618905937024733, "loss": 1.8695, "step": 14625 }, { "epoch": 0.49, "grad_norm": 0.4420400559902191, "learning_rate": 0.000561885495248551, "loss": 1.9498, "step": 14626 }, { "epoch": 0.49, "grad_norm": 0.444857120513916, "learning_rate": 0.0005618803964767384, "loss": 1.8974, "step": 14627 }, { "epoch": 0.49, "grad_norm": 0.4246895909309387, "learning_rate": 0.0005618752973870415, "loss": 1.8059, "step": 14628 }, { "epoch": 0.49, "grad_norm": 0.44884392619132996, "learning_rate": 0.0005618701979794666, "loss": 1.9216, "step": 14629 }, { "epoch": 0.49, "grad_norm": 0.43280595541000366, "learning_rate": 0.00056186509825402, "loss": 1.8359, "step": 14630 }, { "epoch": 0.49, "grad_norm": 0.4248908758163452, "learning_rate": 0.0005618599982107076, "loss": 1.8684, "step": 14631 }, { "epoch": 0.49, "grad_norm": 0.4374758005142212, "learning_rate": 0.0005618548978495358, "loss": 1.8837, "step": 14632 }, { "epoch": 0.49, "grad_norm": 0.9181144833564758, "learning_rate": 0.0005618497971705107, "loss": 1.9017, "step": 14633 }, { "epoch": 0.49, "grad_norm": 0.43462440371513367, "learning_rate": 0.0005618446961736386, "loss": 1.8915, "step": 14634 }, { "epoch": 0.49, "grad_norm": 0.43038561940193176, "learning_rate": 0.0005618395948589256, "loss": 1.8816, "step": 14635 }, { "epoch": 0.49, "grad_norm": 0.4238419830799103, "learning_rate": 0.0005618344932263778, "loss": 1.8759, "step": 14636 }, { "epoch": 0.49, "grad_norm": 0.42527326941490173, "learning_rate": 0.0005618293912760016, "loss": 1.8437, "step": 14637 }, { "epoch": 0.49, "grad_norm": 0.4429328441619873, "learning_rate": 0.0005618242890078031, "loss": 1.8678, "step": 14638 }, { "epoch": 0.49, "grad_norm": 0.42701441049575806, "learning_rate": 0.0005618191864217885, "loss": 1.8962, "step": 14639 }, { "epoch": 0.49, "grad_norm": 0.4187668263912201, "learning_rate": 0.000561814083517964, "loss": 1.8686, "step": 14640 }, { "epoch": 0.49, "grad_norm": 0.45190832018852234, "learning_rate": 0.0005618089802963358, "loss": 1.9168, "step": 14641 }, { "epoch": 0.49, "grad_norm": 0.42718586325645447, "learning_rate": 0.00056180387675691, "loss": 1.8727, "step": 14642 }, { "epoch": 0.49, "grad_norm": 0.43604230880737305, "learning_rate": 0.0005617987728996928, "loss": 1.9449, "step": 14643 }, { "epoch": 0.49, "grad_norm": 0.4420173764228821, "learning_rate": 0.0005617936687246905, "loss": 1.8612, "step": 14644 }, { "epoch": 0.49, "grad_norm": 0.4386898875236511, "learning_rate": 0.0005617885642319094, "loss": 1.8822, "step": 14645 }, { "epoch": 0.49, "grad_norm": 0.433020681142807, "learning_rate": 0.0005617834594213554, "loss": 1.9263, "step": 14646 }, { "epoch": 0.49, "grad_norm": 0.42090070247650146, "learning_rate": 0.0005617783542930349, "loss": 1.8591, "step": 14647 }, { "epoch": 0.49, "grad_norm": 0.4413405656814575, "learning_rate": 0.0005617732488469541, "loss": 1.7735, "step": 14648 }, { "epoch": 0.49, "grad_norm": 0.42103099822998047, "learning_rate": 0.0005617681430831192, "loss": 1.9035, "step": 14649 }, { "epoch": 0.49, "grad_norm": 0.4290846288204193, "learning_rate": 0.0005617630370015361, "loss": 1.9118, "step": 14650 }, { "epoch": 0.49, "grad_norm": 0.4363458454608917, "learning_rate": 0.0005617579306022115, "loss": 1.8686, "step": 14651 }, { "epoch": 0.49, "grad_norm": 0.4521642327308655, "learning_rate": 0.0005617528238851512, "loss": 1.8734, "step": 14652 }, { "epoch": 0.49, "grad_norm": 0.44734591245651245, "learning_rate": 0.0005617477168503615, "loss": 1.8787, "step": 14653 }, { "epoch": 0.49, "grad_norm": 0.43422847986221313, "learning_rate": 0.0005617426094978487, "loss": 1.7741, "step": 14654 }, { "epoch": 0.49, "grad_norm": 0.42381152510643005, "learning_rate": 0.000561737501827619, "loss": 1.8609, "step": 14655 }, { "epoch": 0.49, "grad_norm": 0.4413861036300659, "learning_rate": 0.0005617323938396785, "loss": 1.84, "step": 14656 }, { "epoch": 0.49, "grad_norm": 0.4406067430973053, "learning_rate": 0.0005617272855340335, "loss": 1.8223, "step": 14657 }, { "epoch": 0.49, "grad_norm": 0.4405076801776886, "learning_rate": 0.00056172217691069, "loss": 1.8862, "step": 14658 }, { "epoch": 0.49, "grad_norm": 0.43630874156951904, "learning_rate": 0.0005617170679696544, "loss": 1.9176, "step": 14659 }, { "epoch": 0.49, "grad_norm": 0.43592438101768494, "learning_rate": 0.0005617119587109329, "loss": 1.9108, "step": 14660 }, { "epoch": 0.49, "grad_norm": 0.4584639370441437, "learning_rate": 0.0005617068491345316, "loss": 1.9666, "step": 14661 }, { "epoch": 0.49, "grad_norm": 0.43644317984580994, "learning_rate": 0.0005617017392404568, "loss": 1.9276, "step": 14662 }, { "epoch": 0.49, "grad_norm": 0.4279785454273224, "learning_rate": 0.0005616966290287145, "loss": 1.9606, "step": 14663 }, { "epoch": 0.49, "grad_norm": 0.461138516664505, "learning_rate": 0.0005616915184993112, "loss": 1.8663, "step": 14664 }, { "epoch": 0.49, "grad_norm": 0.429207980632782, "learning_rate": 0.0005616864076522529, "loss": 1.839, "step": 14665 }, { "epoch": 0.49, "grad_norm": 0.45394089818000793, "learning_rate": 0.0005616812964875459, "loss": 1.8566, "step": 14666 }, { "epoch": 0.49, "grad_norm": 0.4395495057106018, "learning_rate": 0.0005616761850051963, "loss": 1.8778, "step": 14667 }, { "epoch": 0.49, "grad_norm": 0.43397995829582214, "learning_rate": 0.0005616710732052104, "loss": 1.9117, "step": 14668 }, { "epoch": 0.49, "grad_norm": 0.4187714755535126, "learning_rate": 0.0005616659610875942, "loss": 1.8293, "step": 14669 }, { "epoch": 0.49, "grad_norm": 0.418779581785202, "learning_rate": 0.0005616608486523543, "loss": 1.8748, "step": 14670 }, { "epoch": 0.49, "grad_norm": 0.4248424172401428, "learning_rate": 0.0005616557358994967, "loss": 1.8767, "step": 14671 }, { "epoch": 0.49, "grad_norm": 0.44263285398483276, "learning_rate": 0.0005616506228290274, "loss": 1.9635, "step": 14672 }, { "epoch": 0.49, "grad_norm": 0.4332755506038666, "learning_rate": 0.000561645509440953, "loss": 1.8905, "step": 14673 }, { "epoch": 0.49, "grad_norm": 0.43387371301651, "learning_rate": 0.0005616403957352794, "loss": 1.9364, "step": 14674 }, { "epoch": 0.49, "grad_norm": 0.4119519591331482, "learning_rate": 0.000561635281712013, "loss": 1.8614, "step": 14675 }, { "epoch": 0.49, "grad_norm": 0.44807955622673035, "learning_rate": 0.0005616301673711597, "loss": 1.8901, "step": 14676 }, { "epoch": 0.49, "grad_norm": 0.41995516419410706, "learning_rate": 0.0005616250527127262, "loss": 1.808, "step": 14677 }, { "epoch": 0.49, "grad_norm": 0.4350198805332184, "learning_rate": 0.0005616199377367182, "loss": 1.923, "step": 14678 }, { "epoch": 0.49, "grad_norm": 0.42670121788978577, "learning_rate": 0.0005616148224431422, "loss": 1.8636, "step": 14679 }, { "epoch": 0.49, "grad_norm": 0.41097086668014526, "learning_rate": 0.0005616097068320044, "loss": 1.9042, "step": 14680 }, { "epoch": 0.49, "grad_norm": 0.4171651303768158, "learning_rate": 0.000561604590903311, "loss": 1.8547, "step": 14681 }, { "epoch": 0.49, "grad_norm": 0.42149487137794495, "learning_rate": 0.0005615994746570681, "loss": 1.8612, "step": 14682 }, { "epoch": 0.49, "grad_norm": 0.4375353455543518, "learning_rate": 0.000561594358093282, "loss": 1.9064, "step": 14683 }, { "epoch": 0.49, "grad_norm": 0.44498616456985474, "learning_rate": 0.000561589241211959, "loss": 1.8034, "step": 14684 }, { "epoch": 0.49, "grad_norm": 0.45732954144477844, "learning_rate": 0.0005615841240131051, "loss": 1.8548, "step": 14685 }, { "epoch": 0.49, "grad_norm": 0.4214295744895935, "learning_rate": 0.0005615790064967265, "loss": 1.8175, "step": 14686 }, { "epoch": 0.49, "grad_norm": 0.432519793510437, "learning_rate": 0.0005615738886628297, "loss": 1.8021, "step": 14687 }, { "epoch": 0.49, "grad_norm": 0.4701445996761322, "learning_rate": 0.0005615687705114208, "loss": 1.9194, "step": 14688 }, { "epoch": 0.49, "grad_norm": 0.4292161762714386, "learning_rate": 0.0005615636520425058, "loss": 1.8283, "step": 14689 }, { "epoch": 0.49, "grad_norm": 0.42122703790664673, "learning_rate": 0.0005615585332560911, "loss": 1.9135, "step": 14690 }, { "epoch": 0.49, "grad_norm": 0.637664258480072, "learning_rate": 0.0005615534141521828, "loss": 1.9059, "step": 14691 }, { "epoch": 0.49, "grad_norm": 0.44203197956085205, "learning_rate": 0.0005615482947307873, "loss": 1.8552, "step": 14692 }, { "epoch": 0.49, "grad_norm": 0.4278738498687744, "learning_rate": 0.0005615431749919108, "loss": 1.8717, "step": 14693 }, { "epoch": 0.49, "grad_norm": 0.43780234456062317, "learning_rate": 0.0005615380549355592, "loss": 1.8516, "step": 14694 }, { "epoch": 0.49, "grad_norm": 0.4374159276485443, "learning_rate": 0.0005615329345617391, "loss": 1.9006, "step": 14695 }, { "epoch": 0.49, "grad_norm": 0.44462186098098755, "learning_rate": 0.0005615278138704565, "loss": 1.9128, "step": 14696 }, { "epoch": 0.49, "grad_norm": 0.44586512446403503, "learning_rate": 0.0005615226928617177, "loss": 1.8252, "step": 14697 }, { "epoch": 0.49, "grad_norm": 0.4358446002006531, "learning_rate": 0.0005615175715355288, "loss": 1.8589, "step": 14698 }, { "epoch": 0.49, "grad_norm": 0.432963490486145, "learning_rate": 0.0005615124498918962, "loss": 1.9042, "step": 14699 }, { "epoch": 0.49, "grad_norm": 0.4698122441768646, "learning_rate": 0.000561507327930826, "loss": 1.9355, "step": 14700 }, { "epoch": 0.49, "grad_norm": 0.43133366107940674, "learning_rate": 0.0005615022056523243, "loss": 1.8882, "step": 14701 }, { "epoch": 0.49, "grad_norm": 0.42680689692497253, "learning_rate": 0.0005614970830563977, "loss": 1.9482, "step": 14702 }, { "epoch": 0.49, "grad_norm": 0.45105770230293274, "learning_rate": 0.000561491960143052, "loss": 1.8267, "step": 14703 }, { "epoch": 0.49, "grad_norm": 0.4571240544319153, "learning_rate": 0.0005614868369122935, "loss": 1.8693, "step": 14704 }, { "epoch": 0.49, "grad_norm": 0.439375102519989, "learning_rate": 0.0005614817133641288, "loss": 1.8679, "step": 14705 }, { "epoch": 0.49, "grad_norm": 0.42928001284599304, "learning_rate": 0.0005614765894985636, "loss": 1.8556, "step": 14706 }, { "epoch": 0.49, "grad_norm": 0.42762812972068787, "learning_rate": 0.0005614714653156044, "loss": 1.8021, "step": 14707 }, { "epoch": 0.49, "grad_norm": 0.4220605492591858, "learning_rate": 0.0005614663408152574, "loss": 1.8656, "step": 14708 }, { "epoch": 0.49, "grad_norm": 0.4395214915275574, "learning_rate": 0.0005614612159975288, "loss": 1.8672, "step": 14709 }, { "epoch": 0.49, "grad_norm": 0.4685518741607666, "learning_rate": 0.0005614560908624248, "loss": 1.892, "step": 14710 }, { "epoch": 0.49, "grad_norm": 0.44218316674232483, "learning_rate": 0.0005614509654099515, "loss": 1.8714, "step": 14711 }, { "epoch": 0.49, "grad_norm": 0.4305333197116852, "learning_rate": 0.0005614458396401154, "loss": 1.907, "step": 14712 }, { "epoch": 0.49, "grad_norm": 0.42019620537757874, "learning_rate": 0.0005614407135529226, "loss": 1.8807, "step": 14713 }, { "epoch": 0.49, "grad_norm": 0.4294365346431732, "learning_rate": 0.0005614355871483792, "loss": 1.934, "step": 14714 }, { "epoch": 0.49, "grad_norm": 0.4230903089046478, "learning_rate": 0.0005614304604264914, "loss": 1.8961, "step": 14715 }, { "epoch": 0.49, "grad_norm": 0.4364529550075531, "learning_rate": 0.0005614253333872658, "loss": 1.8658, "step": 14716 }, { "epoch": 0.49, "grad_norm": 0.5439549088478088, "learning_rate": 0.0005614202060307082, "loss": 1.8841, "step": 14717 }, { "epoch": 0.49, "grad_norm": 0.44770991802215576, "learning_rate": 0.000561415078356825, "loss": 1.8293, "step": 14718 }, { "epoch": 0.49, "grad_norm": 0.43728315830230713, "learning_rate": 0.0005614099503656225, "loss": 1.8873, "step": 14719 }, { "epoch": 0.49, "grad_norm": 0.44570761919021606, "learning_rate": 0.0005614048220571068, "loss": 1.9206, "step": 14720 }, { "epoch": 0.49, "grad_norm": 0.42335739731788635, "learning_rate": 0.000561399693431284, "loss": 1.8677, "step": 14721 }, { "epoch": 0.49, "grad_norm": 0.44622519612312317, "learning_rate": 0.0005613945644881607, "loss": 1.8591, "step": 14722 }, { "epoch": 0.49, "grad_norm": 0.43609312176704407, "learning_rate": 0.0005613894352277429, "loss": 1.8418, "step": 14723 }, { "epoch": 0.49, "grad_norm": 0.43874499201774597, "learning_rate": 0.0005613843056500367, "loss": 1.8519, "step": 14724 }, { "epoch": 0.49, "grad_norm": 0.454166442155838, "learning_rate": 0.0005613791757550486, "loss": 1.9072, "step": 14725 }, { "epoch": 0.49, "grad_norm": 0.43555212020874023, "learning_rate": 0.0005613740455427846, "loss": 1.9081, "step": 14726 }, { "epoch": 0.49, "grad_norm": 0.42489662766456604, "learning_rate": 0.0005613689150132511, "loss": 1.8956, "step": 14727 }, { "epoch": 0.49, "grad_norm": 0.41990309953689575, "learning_rate": 0.0005613637841664542, "loss": 1.8442, "step": 14728 }, { "epoch": 0.49, "grad_norm": 0.4400225579738617, "learning_rate": 0.0005613586530024002, "loss": 1.8797, "step": 14729 }, { "epoch": 0.49, "grad_norm": 0.42373111844062805, "learning_rate": 0.0005613535215210953, "loss": 1.8593, "step": 14730 }, { "epoch": 0.49, "grad_norm": 0.42738720774650574, "learning_rate": 0.0005613483897225457, "loss": 1.815, "step": 14731 }, { "epoch": 0.49, "grad_norm": 0.42435580492019653, "learning_rate": 0.0005613432576067577, "loss": 1.9067, "step": 14732 }, { "epoch": 0.49, "grad_norm": 0.4270537495613098, "learning_rate": 0.0005613381251737375, "loss": 1.7831, "step": 14733 }, { "epoch": 0.49, "grad_norm": 0.44475263357162476, "learning_rate": 0.0005613329924234913, "loss": 1.8247, "step": 14734 }, { "epoch": 0.49, "grad_norm": 0.4436880946159363, "learning_rate": 0.0005613278593560254, "loss": 1.8619, "step": 14735 }, { "epoch": 0.49, "grad_norm": 0.42825832962989807, "learning_rate": 0.0005613227259713459, "loss": 1.8652, "step": 14736 }, { "epoch": 0.49, "grad_norm": 0.42736926674842834, "learning_rate": 0.0005613175922694592, "loss": 1.9277, "step": 14737 }, { "epoch": 0.49, "grad_norm": 0.4212489724159241, "learning_rate": 0.0005613124582503714, "loss": 1.8925, "step": 14738 }, { "epoch": 0.49, "grad_norm": 0.44632798433303833, "learning_rate": 0.0005613073239140887, "loss": 1.8582, "step": 14739 }, { "epoch": 0.49, "grad_norm": 0.4413312077522278, "learning_rate": 0.0005613021892606176, "loss": 1.8743, "step": 14740 }, { "epoch": 0.49, "grad_norm": 0.4362127184867859, "learning_rate": 0.0005612970542899639, "loss": 1.852, "step": 14741 }, { "epoch": 0.49, "grad_norm": 0.4550803005695343, "learning_rate": 0.0005612919190021343, "loss": 1.8312, "step": 14742 }, { "epoch": 0.49, "grad_norm": 0.4322059750556946, "learning_rate": 0.0005612867833971346, "loss": 1.8179, "step": 14743 }, { "epoch": 0.49, "grad_norm": 0.4311753809452057, "learning_rate": 0.0005612816474749715, "loss": 1.8574, "step": 14744 }, { "epoch": 0.49, "grad_norm": 0.43753233551979065, "learning_rate": 0.0005612765112356508, "loss": 1.9334, "step": 14745 }, { "epoch": 0.49, "grad_norm": 0.43036237359046936, "learning_rate": 0.0005612713746791789, "loss": 1.8157, "step": 14746 }, { "epoch": 0.49, "grad_norm": 0.4333076477050781, "learning_rate": 0.0005612662378055622, "loss": 1.8503, "step": 14747 }, { "epoch": 0.49, "grad_norm": 0.4399299621582031, "learning_rate": 0.0005612611006148066, "loss": 1.9571, "step": 14748 }, { "epoch": 0.49, "grad_norm": 0.4423615634441376, "learning_rate": 0.0005612559631069186, "loss": 1.965, "step": 14749 }, { "epoch": 0.49, "grad_norm": 0.4334219992160797, "learning_rate": 0.0005612508252819043, "loss": 1.8664, "step": 14750 }, { "epoch": 0.49, "grad_norm": 0.4333447515964508, "learning_rate": 0.0005612456871397701, "loss": 1.9462, "step": 14751 }, { "epoch": 0.49, "grad_norm": 0.44873854517936707, "learning_rate": 0.0005612405486805221, "loss": 1.8317, "step": 14752 }, { "epoch": 0.49, "grad_norm": 0.4243597686290741, "learning_rate": 0.0005612354099041665, "loss": 1.8493, "step": 14753 }, { "epoch": 0.49, "grad_norm": 0.4497939646244049, "learning_rate": 0.0005612302708107097, "loss": 1.8202, "step": 14754 }, { "epoch": 0.49, "grad_norm": 0.4312329888343811, "learning_rate": 0.0005612251314001578, "loss": 1.8645, "step": 14755 }, { "epoch": 0.49, "grad_norm": 0.44444501399993896, "learning_rate": 0.0005612199916725171, "loss": 1.9321, "step": 14756 }, { "epoch": 0.49, "grad_norm": 0.43484801054000854, "learning_rate": 0.0005612148516277938, "loss": 1.8837, "step": 14757 }, { "epoch": 0.49, "grad_norm": 0.4312177300453186, "learning_rate": 0.0005612097112659941, "loss": 1.805, "step": 14758 }, { "epoch": 0.49, "grad_norm": 0.4246552884578705, "learning_rate": 0.0005612045705871244, "loss": 1.8817, "step": 14759 }, { "epoch": 0.49, "grad_norm": 0.42360684275627136, "learning_rate": 0.0005611994295911907, "loss": 1.8599, "step": 14760 }, { "epoch": 0.49, "grad_norm": 0.4374852776527405, "learning_rate": 0.0005611942882781995, "loss": 1.8715, "step": 14761 }, { "epoch": 0.49, "grad_norm": 0.42393842339515686, "learning_rate": 0.0005611891466481569, "loss": 1.8928, "step": 14762 }, { "epoch": 0.49, "grad_norm": 0.4276489317417145, "learning_rate": 0.0005611840047010691, "loss": 1.8955, "step": 14763 }, { "epoch": 0.49, "grad_norm": 0.42803847789764404, "learning_rate": 0.0005611788624369425, "loss": 1.7382, "step": 14764 }, { "epoch": 0.49, "grad_norm": 0.42266228795051575, "learning_rate": 0.0005611737198557832, "loss": 1.8279, "step": 14765 }, { "epoch": 0.49, "grad_norm": 0.5046225786209106, "learning_rate": 0.0005611685769575975, "loss": 1.8872, "step": 14766 }, { "epoch": 0.49, "grad_norm": 0.42531171441078186, "learning_rate": 0.0005611634337423914, "loss": 1.8354, "step": 14767 }, { "epoch": 0.49, "grad_norm": 0.7471887469291687, "learning_rate": 0.0005611582902101717, "loss": 1.9335, "step": 14768 }, { "epoch": 0.49, "grad_norm": 0.45219576358795166, "learning_rate": 0.0005611531463609442, "loss": 1.9, "step": 14769 }, { "epoch": 0.49, "grad_norm": 0.4514072835445404, "learning_rate": 0.0005611480021947152, "loss": 1.9049, "step": 14770 }, { "epoch": 0.49, "grad_norm": 0.43027421832084656, "learning_rate": 0.0005611428577114911, "loss": 1.8176, "step": 14771 }, { "epoch": 0.49, "grad_norm": 0.4194771945476532, "learning_rate": 0.0005611377129112779, "loss": 1.8925, "step": 14772 }, { "epoch": 0.49, "grad_norm": 0.44130629301071167, "learning_rate": 0.0005611325677940823, "loss": 1.9287, "step": 14773 }, { "epoch": 0.49, "grad_norm": 0.4426662027835846, "learning_rate": 0.0005611274223599099, "loss": 1.8637, "step": 14774 }, { "epoch": 0.49, "grad_norm": 0.43627092242240906, "learning_rate": 0.0005611222766087674, "loss": 1.8026, "step": 14775 }, { "epoch": 0.49, "grad_norm": 0.43041545152664185, "learning_rate": 0.0005611171305406609, "loss": 1.8207, "step": 14776 }, { "epoch": 0.49, "grad_norm": 0.4391770660877228, "learning_rate": 0.0005611119841555967, "loss": 1.8871, "step": 14777 }, { "epoch": 0.49, "grad_norm": 0.4324227273464203, "learning_rate": 0.0005611068374535811, "loss": 1.9002, "step": 14778 }, { "epoch": 0.49, "grad_norm": 0.4694407880306244, "learning_rate": 0.0005611016904346202, "loss": 1.8491, "step": 14779 }, { "epoch": 0.49, "grad_norm": 0.44452571868896484, "learning_rate": 0.0005610965430987204, "loss": 1.9931, "step": 14780 }, { "epoch": 0.49, "grad_norm": 0.5023625493049622, "learning_rate": 0.0005610913954458878, "loss": 1.9976, "step": 14781 }, { "epoch": 0.49, "grad_norm": 0.46230649948120117, "learning_rate": 0.0005610862474761287, "loss": 1.8252, "step": 14782 }, { "epoch": 0.49, "grad_norm": 0.45237523317337036, "learning_rate": 0.0005610810991894495, "loss": 1.8428, "step": 14783 }, { "epoch": 0.49, "grad_norm": 0.4171660244464874, "learning_rate": 0.0005610759505858561, "loss": 1.814, "step": 14784 }, { "epoch": 0.49, "grad_norm": 0.4277496933937073, "learning_rate": 0.0005610708016653551, "loss": 1.8266, "step": 14785 }, { "epoch": 0.49, "grad_norm": 0.4537550210952759, "learning_rate": 0.0005610656524279526, "loss": 1.7881, "step": 14786 }, { "epoch": 0.49, "grad_norm": 0.4328032433986664, "learning_rate": 0.0005610605028736549, "loss": 1.8455, "step": 14787 }, { "epoch": 0.49, "grad_norm": 0.4381245970726013, "learning_rate": 0.0005610553530024682, "loss": 1.8621, "step": 14788 }, { "epoch": 0.49, "grad_norm": 0.4385814070701599, "learning_rate": 0.0005610502028143986, "loss": 1.9207, "step": 14789 }, { "epoch": 0.49, "grad_norm": 0.44583767652511597, "learning_rate": 0.0005610450523094527, "loss": 1.9248, "step": 14790 }, { "epoch": 0.49, "grad_norm": 0.43956947326660156, "learning_rate": 0.0005610399014876364, "loss": 1.8836, "step": 14791 }, { "epoch": 0.49, "grad_norm": 0.428190141916275, "learning_rate": 0.0005610347503489563, "loss": 1.8704, "step": 14792 }, { "epoch": 0.49, "grad_norm": 0.4366936981678009, "learning_rate": 0.0005610295988934184, "loss": 1.902, "step": 14793 }, { "epoch": 0.49, "grad_norm": 0.45919281244277954, "learning_rate": 0.000561024447121029, "loss": 1.8609, "step": 14794 }, { "epoch": 0.49, "grad_norm": 0.4293631315231323, "learning_rate": 0.0005610192950317944, "loss": 1.8952, "step": 14795 }, { "epoch": 0.49, "grad_norm": 0.43414315581321716, "learning_rate": 0.0005610141426257209, "loss": 1.9056, "step": 14796 }, { "epoch": 0.49, "grad_norm": 0.43102291226387024, "learning_rate": 0.0005610089899028147, "loss": 1.8794, "step": 14797 }, { "epoch": 0.49, "grad_norm": 0.46710410714149475, "learning_rate": 0.000561003836863082, "loss": 1.9538, "step": 14798 }, { "epoch": 0.49, "grad_norm": 0.4432336390018463, "learning_rate": 0.0005609986835065291, "loss": 1.7325, "step": 14799 }, { "epoch": 0.49, "grad_norm": 0.43124163150787354, "learning_rate": 0.0005609935298331622, "loss": 1.813, "step": 14800 }, { "epoch": 0.49, "grad_norm": 0.47314363718032837, "learning_rate": 0.0005609883758429876, "loss": 1.9009, "step": 14801 }, { "epoch": 0.49, "grad_norm": 0.44577106833457947, "learning_rate": 0.0005609832215360117, "loss": 1.8598, "step": 14802 }, { "epoch": 0.49, "grad_norm": 0.43908512592315674, "learning_rate": 0.0005609780669122406, "loss": 1.8287, "step": 14803 }, { "epoch": 0.49, "grad_norm": 0.4314795434474945, "learning_rate": 0.0005609729119716803, "loss": 1.8653, "step": 14804 }, { "epoch": 0.49, "grad_norm": 0.4327220916748047, "learning_rate": 0.0005609677567143377, "loss": 1.928, "step": 14805 }, { "epoch": 0.49, "grad_norm": 0.4093334376811981, "learning_rate": 0.0005609626011402186, "loss": 1.8708, "step": 14806 }, { "epoch": 0.49, "grad_norm": 0.42750781774520874, "learning_rate": 0.0005609574452493292, "loss": 1.8991, "step": 14807 }, { "epoch": 0.49, "grad_norm": 0.43606212735176086, "learning_rate": 0.0005609522890416761, "loss": 1.8716, "step": 14808 }, { "epoch": 0.49, "grad_norm": 0.43129903078079224, "learning_rate": 0.0005609471325172652, "loss": 1.8632, "step": 14809 }, { "epoch": 0.49, "grad_norm": 0.43959110975265503, "learning_rate": 0.0005609419756761031, "loss": 1.8556, "step": 14810 }, { "epoch": 0.49, "grad_norm": 0.4420414865016937, "learning_rate": 0.0005609368185181958, "loss": 1.9135, "step": 14811 }, { "epoch": 0.49, "grad_norm": 0.43819090723991394, "learning_rate": 0.0005609316610435497, "loss": 1.8935, "step": 14812 }, { "epoch": 0.49, "grad_norm": 0.4576403796672821, "learning_rate": 0.000560926503252171, "loss": 1.881, "step": 14813 }, { "epoch": 0.49, "grad_norm": 0.4349535405635834, "learning_rate": 0.0005609213451440658, "loss": 1.8933, "step": 14814 }, { "epoch": 0.49, "grad_norm": 0.4580141007900238, "learning_rate": 0.0005609161867192407, "loss": 1.8273, "step": 14815 }, { "epoch": 0.49, "grad_norm": 0.4451920986175537, "learning_rate": 0.0005609110279777018, "loss": 1.8466, "step": 14816 }, { "epoch": 0.49, "grad_norm": 0.4160878360271454, "learning_rate": 0.0005609058689194554, "loss": 1.7645, "step": 14817 }, { "epoch": 0.49, "grad_norm": 0.42830076813697815, "learning_rate": 0.0005609007095445076, "loss": 1.8262, "step": 14818 }, { "epoch": 0.49, "grad_norm": 0.44722312688827515, "learning_rate": 0.0005608955498528648, "loss": 1.8685, "step": 14819 }, { "epoch": 0.49, "grad_norm": 0.4419233798980713, "learning_rate": 0.0005608903898445333, "loss": 1.8469, "step": 14820 }, { "epoch": 0.49, "grad_norm": 0.43208977580070496, "learning_rate": 0.0005608852295195192, "loss": 1.8225, "step": 14821 }, { "epoch": 0.49, "grad_norm": 0.44081422686576843, "learning_rate": 0.0005608800688778289, "loss": 1.921, "step": 14822 }, { "epoch": 0.49, "grad_norm": 0.43444329500198364, "learning_rate": 0.0005608749079194687, "loss": 1.8826, "step": 14823 }, { "epoch": 0.49, "grad_norm": 0.4610070288181305, "learning_rate": 0.0005608697466444448, "loss": 1.8972, "step": 14824 }, { "epoch": 0.49, "grad_norm": 0.43061304092407227, "learning_rate": 0.0005608645850527635, "loss": 1.939, "step": 14825 }, { "epoch": 0.49, "grad_norm": 0.4331284463405609, "learning_rate": 0.000560859423144431, "loss": 1.8188, "step": 14826 }, { "epoch": 0.49, "grad_norm": 0.4412248432636261, "learning_rate": 0.0005608542609194534, "loss": 1.8778, "step": 14827 }, { "epoch": 0.49, "grad_norm": 0.42453518509864807, "learning_rate": 0.0005608490983778374, "loss": 1.803, "step": 14828 }, { "epoch": 0.49, "grad_norm": 0.4138389527797699, "learning_rate": 0.000560843935519589, "loss": 1.8287, "step": 14829 }, { "epoch": 0.49, "grad_norm": 0.4280092418193817, "learning_rate": 0.0005608387723447145, "loss": 1.8309, "step": 14830 }, { "epoch": 0.49, "grad_norm": 0.42567524313926697, "learning_rate": 0.0005608336088532201, "loss": 1.8352, "step": 14831 }, { "epoch": 0.49, "grad_norm": 0.4350458085536957, "learning_rate": 0.0005608284450451121, "loss": 1.856, "step": 14832 }, { "epoch": 0.49, "grad_norm": 0.4495469033718109, "learning_rate": 0.0005608232809203969, "loss": 1.8635, "step": 14833 }, { "epoch": 0.49, "grad_norm": 0.4180396497249603, "learning_rate": 0.0005608181164790805, "loss": 1.8204, "step": 14834 }, { "epoch": 0.49, "grad_norm": 0.4420452117919922, "learning_rate": 0.0005608129517211695, "loss": 1.8529, "step": 14835 }, { "epoch": 0.49, "grad_norm": 0.4297234117984772, "learning_rate": 0.00056080778664667, "loss": 1.8886, "step": 14836 }, { "epoch": 0.49, "grad_norm": 0.43374544382095337, "learning_rate": 0.0005608026212555882, "loss": 1.9045, "step": 14837 }, { "epoch": 0.49, "grad_norm": 0.41919827461242676, "learning_rate": 0.0005607974555479305, "loss": 1.8474, "step": 14838 }, { "epoch": 0.49, "grad_norm": 0.4494933784008026, "learning_rate": 0.0005607922895237031, "loss": 1.9139, "step": 14839 }, { "epoch": 0.49, "grad_norm": 0.4444448947906494, "learning_rate": 0.0005607871231829122, "loss": 1.8632, "step": 14840 }, { "epoch": 0.49, "grad_norm": 0.4356776177883148, "learning_rate": 0.0005607819565255643, "loss": 1.8366, "step": 14841 }, { "epoch": 0.49, "grad_norm": 0.4413878321647644, "learning_rate": 0.0005607767895516654, "loss": 1.8505, "step": 14842 }, { "epoch": 0.49, "grad_norm": 0.4193214476108551, "learning_rate": 0.000560771622261222, "loss": 1.8728, "step": 14843 }, { "epoch": 0.49, "grad_norm": 0.48078596591949463, "learning_rate": 0.0005607664546542402, "loss": 1.7611, "step": 14844 }, { "epoch": 0.49, "grad_norm": 0.42223846912384033, "learning_rate": 0.0005607612867307264, "loss": 1.8343, "step": 14845 }, { "epoch": 0.49, "grad_norm": 0.4452604651451111, "learning_rate": 0.0005607561184906868, "loss": 1.8577, "step": 14846 }, { "epoch": 0.49, "grad_norm": 0.4626481533050537, "learning_rate": 0.0005607509499341277, "loss": 1.8781, "step": 14847 }, { "epoch": 0.49, "grad_norm": 0.42682522535324097, "learning_rate": 0.0005607457810610554, "loss": 1.7678, "step": 14848 }, { "epoch": 0.49, "grad_norm": 0.42246103286743164, "learning_rate": 0.0005607406118714761, "loss": 1.8615, "step": 14849 }, { "epoch": 0.49, "grad_norm": 0.44666558504104614, "learning_rate": 0.0005607354423653962, "loss": 1.907, "step": 14850 }, { "epoch": 0.49, "grad_norm": 0.44862911105155945, "learning_rate": 0.0005607302725428217, "loss": 1.8476, "step": 14851 }, { "epoch": 0.49, "grad_norm": 0.43736881017684937, "learning_rate": 0.0005607251024037593, "loss": 1.8675, "step": 14852 }, { "epoch": 0.49, "grad_norm": 0.44278112053871155, "learning_rate": 0.0005607199319482149, "loss": 1.911, "step": 14853 }, { "epoch": 0.49, "grad_norm": 0.5753660798072815, "learning_rate": 0.0005607147611761948, "loss": 1.8949, "step": 14854 }, { "epoch": 0.49, "grad_norm": 0.4507203996181488, "learning_rate": 0.0005607095900877055, "loss": 1.9318, "step": 14855 }, { "epoch": 0.49, "grad_norm": 0.44597288966178894, "learning_rate": 0.0005607044186827533, "loss": 1.9011, "step": 14856 }, { "epoch": 0.49, "grad_norm": 0.43192633986473083, "learning_rate": 0.0005606992469613443, "loss": 1.8912, "step": 14857 }, { "epoch": 0.49, "grad_norm": 0.4544917941093445, "learning_rate": 0.0005606940749234848, "loss": 1.7442, "step": 14858 }, { "epoch": 0.49, "grad_norm": 0.4546368420124054, "learning_rate": 0.000560688902569181, "loss": 1.8935, "step": 14859 }, { "epoch": 0.49, "grad_norm": 0.450742244720459, "learning_rate": 0.0005606837298984395, "loss": 1.9526, "step": 14860 }, { "epoch": 0.49, "grad_norm": 0.44262686371803284, "learning_rate": 0.0005606785569112661, "loss": 1.9362, "step": 14861 }, { "epoch": 0.49, "grad_norm": 0.4340543746948242, "learning_rate": 0.0005606733836076675, "loss": 1.8147, "step": 14862 }, { "epoch": 0.49, "grad_norm": 0.4281103014945984, "learning_rate": 0.0005606682099876498, "loss": 1.8327, "step": 14863 }, { "epoch": 0.49, "grad_norm": 0.4556787312030792, "learning_rate": 0.0005606630360512193, "loss": 1.8937, "step": 14864 }, { "epoch": 0.49, "grad_norm": 0.4301683008670807, "learning_rate": 0.0005606578617983822, "loss": 1.8771, "step": 14865 }, { "epoch": 0.49, "grad_norm": 0.4409704804420471, "learning_rate": 0.000560652687229145, "loss": 1.798, "step": 14866 }, { "epoch": 0.49, "grad_norm": 0.4220051169395447, "learning_rate": 0.0005606475123435138, "loss": 1.8476, "step": 14867 }, { "epoch": 0.49, "grad_norm": 0.4360852539539337, "learning_rate": 0.0005606423371414949, "loss": 1.8329, "step": 14868 }, { "epoch": 0.49, "grad_norm": 0.45484691858291626, "learning_rate": 0.0005606371616230946, "loss": 1.8436, "step": 14869 }, { "epoch": 0.49, "grad_norm": 0.43855082988739014, "learning_rate": 0.0005606319857883192, "loss": 1.9201, "step": 14870 }, { "epoch": 0.49, "grad_norm": 0.43162304162979126, "learning_rate": 0.000560626809637175, "loss": 1.9313, "step": 14871 }, { "epoch": 0.49, "grad_norm": 0.4491789638996124, "learning_rate": 0.0005606216331696681, "loss": 1.9285, "step": 14872 }, { "epoch": 0.49, "grad_norm": 0.47915738821029663, "learning_rate": 0.0005606164563858051, "loss": 1.8691, "step": 14873 }, { "epoch": 0.49, "grad_norm": 0.4296128749847412, "learning_rate": 0.0005606112792855921, "loss": 1.8727, "step": 14874 }, { "epoch": 0.49, "grad_norm": 0.43948084115982056, "learning_rate": 0.0005606061018690354, "loss": 1.8249, "step": 14875 }, { "epoch": 0.49, "grad_norm": 0.44213974475860596, "learning_rate": 0.0005606009241361413, "loss": 1.8644, "step": 14876 }, { "epoch": 0.49, "grad_norm": 0.44855430722236633, "learning_rate": 0.0005605957460869161, "loss": 1.8613, "step": 14877 }, { "epoch": 0.49, "grad_norm": 0.5633888840675354, "learning_rate": 0.000560590567721366, "loss": 1.829, "step": 14878 }, { "epoch": 0.5, "grad_norm": 0.4411979913711548, "learning_rate": 0.0005605853890394974, "loss": 1.9256, "step": 14879 }, { "epoch": 0.5, "grad_norm": 0.4635494351387024, "learning_rate": 0.0005605802100413165, "loss": 1.9027, "step": 14880 }, { "epoch": 0.5, "grad_norm": 0.4351714849472046, "learning_rate": 0.0005605750307268295, "loss": 1.8451, "step": 14881 }, { "epoch": 0.5, "grad_norm": 0.4553126394748688, "learning_rate": 0.0005605698510960429, "loss": 1.8778, "step": 14882 }, { "epoch": 0.5, "grad_norm": 0.46522074937820435, "learning_rate": 0.000560564671148963, "loss": 1.8574, "step": 14883 }, { "epoch": 0.5, "grad_norm": 0.43705853819847107, "learning_rate": 0.0005605594908855959, "loss": 1.8363, "step": 14884 }, { "epoch": 0.5, "grad_norm": 0.453757643699646, "learning_rate": 0.000560554310305948, "loss": 1.8219, "step": 14885 }, { "epoch": 0.5, "grad_norm": 0.6109901070594788, "learning_rate": 0.0005605491294100255, "loss": 1.9396, "step": 14886 }, { "epoch": 0.5, "grad_norm": 0.4480382800102234, "learning_rate": 0.0005605439481978347, "loss": 1.8509, "step": 14887 }, { "epoch": 0.5, "grad_norm": 0.43545541167259216, "learning_rate": 0.000560538766669382, "loss": 1.879, "step": 14888 }, { "epoch": 0.5, "grad_norm": 0.43521255254745483, "learning_rate": 0.0005605335848246737, "loss": 1.8783, "step": 14889 }, { "epoch": 0.5, "grad_norm": 0.43454083800315857, "learning_rate": 0.0005605284026637159, "loss": 1.8214, "step": 14890 }, { "epoch": 0.5, "grad_norm": 0.45046162605285645, "learning_rate": 0.0005605232201865151, "loss": 1.8423, "step": 14891 }, { "epoch": 0.5, "grad_norm": 0.43214407563209534, "learning_rate": 0.0005605180373930774, "loss": 1.8614, "step": 14892 }, { "epoch": 0.5, "grad_norm": 0.4357210695743561, "learning_rate": 0.0005605128542834093, "loss": 1.8022, "step": 14893 }, { "epoch": 0.5, "grad_norm": 0.4415852427482605, "learning_rate": 0.0005605076708575169, "loss": 1.8315, "step": 14894 }, { "epoch": 0.5, "grad_norm": 0.4294370412826538, "learning_rate": 0.0005605024871154065, "loss": 1.8218, "step": 14895 }, { "epoch": 0.5, "grad_norm": 0.43046483397483826, "learning_rate": 0.0005604973030570847, "loss": 1.9806, "step": 14896 }, { "epoch": 0.5, "grad_norm": 0.45140132308006287, "learning_rate": 0.0005604921186825575, "loss": 1.8669, "step": 14897 }, { "epoch": 0.5, "grad_norm": 0.4446234703063965, "learning_rate": 0.000560486933991831, "loss": 1.9122, "step": 14898 }, { "epoch": 0.5, "grad_norm": 0.42243754863739014, "learning_rate": 0.000560481748984912, "loss": 1.9579, "step": 14899 }, { "epoch": 0.5, "grad_norm": 0.41879427433013916, "learning_rate": 0.0005604765636618064, "loss": 1.8385, "step": 14900 }, { "epoch": 0.5, "grad_norm": 0.44231244921684265, "learning_rate": 0.0005604713780225207, "loss": 1.9067, "step": 14901 }, { "epoch": 0.5, "grad_norm": 0.42233145236968994, "learning_rate": 0.0005604661920670612, "loss": 1.9073, "step": 14902 }, { "epoch": 0.5, "grad_norm": 0.4416600465774536, "learning_rate": 0.000560461005795434, "loss": 1.8264, "step": 14903 }, { "epoch": 0.5, "grad_norm": 0.426734983921051, "learning_rate": 0.0005604558192076455, "loss": 1.8455, "step": 14904 }, { "epoch": 0.5, "grad_norm": 0.42316681146621704, "learning_rate": 0.000560450632303702, "loss": 1.9118, "step": 14905 }, { "epoch": 0.5, "grad_norm": 0.4328683614730835, "learning_rate": 0.00056044544508361, "loss": 1.9003, "step": 14906 }, { "epoch": 0.5, "grad_norm": 0.4496576189994812, "learning_rate": 0.0005604402575473753, "loss": 1.7927, "step": 14907 }, { "epoch": 0.5, "grad_norm": 0.4315837323665619, "learning_rate": 0.0005604350696950047, "loss": 1.7617, "step": 14908 }, { "epoch": 0.5, "grad_norm": 0.4296397566795349, "learning_rate": 0.0005604298815265043, "loss": 1.9595, "step": 14909 }, { "epoch": 0.5, "grad_norm": 0.4496515989303589, "learning_rate": 0.0005604246930418804, "loss": 1.8434, "step": 14910 }, { "epoch": 0.5, "grad_norm": 0.42073604464530945, "learning_rate": 0.0005604195042411391, "loss": 1.8609, "step": 14911 }, { "epoch": 0.5, "grad_norm": 0.4259439706802368, "learning_rate": 0.0005604143151242871, "loss": 1.8598, "step": 14912 }, { "epoch": 0.5, "grad_norm": 0.42946258187294006, "learning_rate": 0.0005604091256913305, "loss": 1.8352, "step": 14913 }, { "epoch": 0.5, "grad_norm": 0.43834617733955383, "learning_rate": 0.0005604039359422755, "loss": 1.7288, "step": 14914 }, { "epoch": 0.5, "grad_norm": 0.4322258234024048, "learning_rate": 0.0005603987458771285, "loss": 1.9114, "step": 14915 }, { "epoch": 0.5, "grad_norm": 0.4156986474990845, "learning_rate": 0.0005603935554958958, "loss": 1.8851, "step": 14916 }, { "epoch": 0.5, "grad_norm": 0.43790438771247864, "learning_rate": 0.0005603883647985837, "loss": 1.8666, "step": 14917 }, { "epoch": 0.5, "grad_norm": 0.4437054991722107, "learning_rate": 0.0005603831737851986, "loss": 1.8494, "step": 14918 }, { "epoch": 0.5, "grad_norm": 0.4433327317237854, "learning_rate": 0.0005603779824557466, "loss": 1.8964, "step": 14919 }, { "epoch": 0.5, "grad_norm": 0.43407389521598816, "learning_rate": 0.000560372790810234, "loss": 1.8524, "step": 14920 }, { "epoch": 0.5, "grad_norm": 0.43457913398742676, "learning_rate": 0.0005603675988486673, "loss": 1.897, "step": 14921 }, { "epoch": 0.5, "grad_norm": 0.4390544295310974, "learning_rate": 0.0005603624065710527, "loss": 1.9148, "step": 14922 }, { "epoch": 0.5, "grad_norm": 0.42352795600891113, "learning_rate": 0.0005603572139773964, "loss": 1.789, "step": 14923 }, { "epoch": 0.5, "grad_norm": 0.4349653422832489, "learning_rate": 0.0005603520210677049, "loss": 1.9543, "step": 14924 }, { "epoch": 0.5, "grad_norm": 0.42086678743362427, "learning_rate": 0.0005603468278419844, "loss": 1.8163, "step": 14925 }, { "epoch": 0.5, "grad_norm": 0.44166499376296997, "learning_rate": 0.0005603416343002412, "loss": 1.8202, "step": 14926 }, { "epoch": 0.5, "grad_norm": 0.43842431902885437, "learning_rate": 0.0005603364404424816, "loss": 1.8233, "step": 14927 }, { "epoch": 0.5, "grad_norm": 0.43339237570762634, "learning_rate": 0.000560331246268712, "loss": 1.8205, "step": 14928 }, { "epoch": 0.5, "grad_norm": 0.612387478351593, "learning_rate": 0.0005603260517789385, "loss": 1.8822, "step": 14929 }, { "epoch": 0.5, "grad_norm": 0.4263496994972229, "learning_rate": 0.0005603208569731676, "loss": 1.828, "step": 14930 }, { "epoch": 0.5, "grad_norm": 0.42539340257644653, "learning_rate": 0.0005603156618514055, "loss": 1.8603, "step": 14931 }, { "epoch": 0.5, "grad_norm": 0.43560633063316345, "learning_rate": 0.0005603104664136586, "loss": 1.8726, "step": 14932 }, { "epoch": 0.5, "grad_norm": 0.4241787791252136, "learning_rate": 0.0005603052706599331, "loss": 1.878, "step": 14933 }, { "epoch": 0.5, "grad_norm": 0.42306211590766907, "learning_rate": 0.0005603000745902354, "loss": 1.8252, "step": 14934 }, { "epoch": 0.5, "grad_norm": 0.4367403984069824, "learning_rate": 0.0005602948782045717, "loss": 1.8544, "step": 14935 }, { "epoch": 0.5, "grad_norm": 0.4463269114494324, "learning_rate": 0.0005602896815029484, "loss": 1.9849, "step": 14936 }, { "epoch": 0.5, "grad_norm": 0.4218488335609436, "learning_rate": 0.0005602844844853718, "loss": 1.8563, "step": 14937 }, { "epoch": 0.5, "grad_norm": 0.43588757514953613, "learning_rate": 0.0005602792871518482, "loss": 1.7247, "step": 14938 }, { "epoch": 0.5, "grad_norm": 0.4293118715286255, "learning_rate": 0.0005602740895023838, "loss": 1.8455, "step": 14939 }, { "epoch": 0.5, "grad_norm": 0.43567201495170593, "learning_rate": 0.0005602688915369851, "loss": 1.8631, "step": 14940 }, { "epoch": 0.5, "grad_norm": 0.43138396739959717, "learning_rate": 0.0005602636932556583, "loss": 1.9482, "step": 14941 }, { "epoch": 0.5, "grad_norm": 0.42008745670318604, "learning_rate": 0.0005602584946584097, "loss": 1.902, "step": 14942 }, { "epoch": 0.5, "grad_norm": 0.4239841401576996, "learning_rate": 0.0005602532957452456, "loss": 1.9193, "step": 14943 }, { "epoch": 0.5, "grad_norm": 0.4198615849018097, "learning_rate": 0.0005602480965161724, "loss": 1.8225, "step": 14944 }, { "epoch": 0.5, "grad_norm": 0.41829004883766174, "learning_rate": 0.0005602428969711964, "loss": 1.8737, "step": 14945 }, { "epoch": 0.5, "grad_norm": 0.42924556136131287, "learning_rate": 0.0005602376971103238, "loss": 1.844, "step": 14946 }, { "epoch": 0.5, "grad_norm": 0.4409612715244293, "learning_rate": 0.000560232496933561, "loss": 1.8503, "step": 14947 }, { "epoch": 0.5, "grad_norm": 0.4537055790424347, "learning_rate": 0.0005602272964409143, "loss": 1.8313, "step": 14948 }, { "epoch": 0.5, "grad_norm": 0.4315667748451233, "learning_rate": 0.00056022209563239, "loss": 1.9231, "step": 14949 }, { "epoch": 0.5, "grad_norm": 0.4233337342739105, "learning_rate": 0.0005602168945079944, "loss": 1.7591, "step": 14950 }, { "epoch": 0.5, "grad_norm": 0.4385839104652405, "learning_rate": 0.0005602116930677339, "loss": 1.9209, "step": 14951 }, { "epoch": 0.5, "grad_norm": 0.4358862340450287, "learning_rate": 0.0005602064913116146, "loss": 1.8182, "step": 14952 }, { "epoch": 0.5, "grad_norm": 0.4453023076057434, "learning_rate": 0.0005602012892396431, "loss": 1.9047, "step": 14953 }, { "epoch": 0.5, "grad_norm": 0.4345929026603699, "learning_rate": 0.0005601960868518255, "loss": 1.8404, "step": 14954 }, { "epoch": 0.5, "grad_norm": 0.4505501985549927, "learning_rate": 0.0005601908841481681, "loss": 1.8866, "step": 14955 }, { "epoch": 0.5, "grad_norm": 0.4372144639492035, "learning_rate": 0.0005601856811286775, "loss": 1.8865, "step": 14956 }, { "epoch": 0.5, "grad_norm": 0.43220043182373047, "learning_rate": 0.0005601804777933597, "loss": 1.9009, "step": 14957 }, { "epoch": 0.5, "grad_norm": 0.4355146884918213, "learning_rate": 0.0005601752741422212, "loss": 1.9048, "step": 14958 }, { "epoch": 0.5, "grad_norm": 0.440570592880249, "learning_rate": 0.0005601700701752682, "loss": 1.7979, "step": 14959 }, { "epoch": 0.5, "grad_norm": 0.4263495206832886, "learning_rate": 0.0005601648658925071, "loss": 1.7691, "step": 14960 }, { "epoch": 0.5, "grad_norm": 0.4416339695453644, "learning_rate": 0.0005601596612939442, "loss": 1.9052, "step": 14961 }, { "epoch": 0.5, "grad_norm": 0.42757129669189453, "learning_rate": 0.0005601544563795858, "loss": 1.8955, "step": 14962 }, { "epoch": 0.5, "grad_norm": 0.42631494998931885, "learning_rate": 0.0005601492511494382, "loss": 1.8036, "step": 14963 }, { "epoch": 0.5, "grad_norm": 0.4153353273868561, "learning_rate": 0.0005601440456035077, "loss": 1.8769, "step": 14964 }, { "epoch": 0.5, "grad_norm": 0.4231327474117279, "learning_rate": 0.0005601388397418007, "loss": 1.8619, "step": 14965 }, { "epoch": 0.5, "grad_norm": 0.453818142414093, "learning_rate": 0.0005601336335643234, "loss": 1.7839, "step": 14966 }, { "epoch": 0.5, "grad_norm": 0.4469282329082489, "learning_rate": 0.0005601284270710824, "loss": 1.9071, "step": 14967 }, { "epoch": 0.5, "grad_norm": 0.4336506426334381, "learning_rate": 0.0005601232202620836, "loss": 1.8995, "step": 14968 }, { "epoch": 0.5, "grad_norm": 0.41271838545799255, "learning_rate": 0.0005601180131373336, "loss": 1.8141, "step": 14969 }, { "epoch": 0.5, "grad_norm": 0.4556907117366791, "learning_rate": 0.0005601128056968388, "loss": 1.881, "step": 14970 }, { "epoch": 0.5, "grad_norm": 0.4422508776187897, "learning_rate": 0.0005601075979406052, "loss": 1.886, "step": 14971 }, { "epoch": 0.5, "grad_norm": 0.43968167901039124, "learning_rate": 0.0005601023898686394, "loss": 1.8845, "step": 14972 }, { "epoch": 0.5, "grad_norm": 0.4517677426338196, "learning_rate": 0.0005600971814809476, "loss": 1.901, "step": 14973 }, { "epoch": 0.5, "grad_norm": 0.4296216666698456, "learning_rate": 0.0005600919727775361, "loss": 1.8553, "step": 14974 }, { "epoch": 0.5, "grad_norm": 0.4358520209789276, "learning_rate": 0.0005600867637584113, "loss": 1.8626, "step": 14975 }, { "epoch": 0.5, "grad_norm": 0.4458561837673187, "learning_rate": 0.0005600815544235796, "loss": 1.8547, "step": 14976 }, { "epoch": 0.5, "grad_norm": 0.43366628885269165, "learning_rate": 0.0005600763447730471, "loss": 1.9321, "step": 14977 }, { "epoch": 0.5, "grad_norm": 0.4212920665740967, "learning_rate": 0.0005600711348068202, "loss": 1.7989, "step": 14978 }, { "epoch": 0.5, "grad_norm": 0.45095232129096985, "learning_rate": 0.0005600659245249053, "loss": 1.884, "step": 14979 }, { "epoch": 0.5, "grad_norm": 0.43066665530204773, "learning_rate": 0.0005600607139273087, "loss": 1.8501, "step": 14980 }, { "epoch": 0.5, "grad_norm": 0.43293219804763794, "learning_rate": 0.0005600555030140367, "loss": 1.8795, "step": 14981 }, { "epoch": 0.5, "grad_norm": 0.4216371476650238, "learning_rate": 0.0005600502917850957, "loss": 1.8493, "step": 14982 }, { "epoch": 0.5, "grad_norm": 0.4381338953971863, "learning_rate": 0.0005600450802404919, "loss": 1.8179, "step": 14983 }, { "epoch": 0.5, "grad_norm": 0.4334680438041687, "learning_rate": 0.0005600398683802317, "loss": 1.8168, "step": 14984 }, { "epoch": 0.5, "grad_norm": 0.4474921226501465, "learning_rate": 0.0005600346562043214, "loss": 1.7943, "step": 14985 }, { "epoch": 0.5, "grad_norm": 0.4617338478565216, "learning_rate": 0.0005600294437127673, "loss": 1.9247, "step": 14986 }, { "epoch": 0.5, "grad_norm": 0.4306608736515045, "learning_rate": 0.0005600242309055758, "loss": 1.8295, "step": 14987 }, { "epoch": 0.5, "grad_norm": 0.4460364580154419, "learning_rate": 0.0005600190177827533, "loss": 1.9311, "step": 14988 }, { "epoch": 0.5, "grad_norm": 0.4415617883205414, "learning_rate": 0.0005600138043443059, "loss": 1.9298, "step": 14989 }, { "epoch": 0.5, "grad_norm": 0.44635137915611267, "learning_rate": 0.0005600085905902401, "loss": 1.8636, "step": 14990 }, { "epoch": 0.5, "grad_norm": 0.4266517162322998, "learning_rate": 0.0005600033765205621, "loss": 1.7945, "step": 14991 }, { "epoch": 0.5, "grad_norm": 0.46420010924339294, "learning_rate": 0.0005599981621352784, "loss": 1.9738, "step": 14992 }, { "epoch": 0.5, "grad_norm": 0.4253418445587158, "learning_rate": 0.0005599929474343954, "loss": 1.8106, "step": 14993 }, { "epoch": 0.5, "grad_norm": 0.42397698760032654, "learning_rate": 0.000559987732417919, "loss": 1.8345, "step": 14994 }, { "epoch": 0.5, "grad_norm": 0.42517778277397156, "learning_rate": 0.000559982517085856, "loss": 1.8784, "step": 14995 }, { "epoch": 0.5, "grad_norm": 0.4296269416809082, "learning_rate": 0.0005599773014382123, "loss": 1.8151, "step": 14996 }, { "epoch": 0.5, "grad_norm": 0.4459368884563446, "learning_rate": 0.0005599720854749946, "loss": 1.9242, "step": 14997 }, { "epoch": 0.5, "grad_norm": 0.4532007873058319, "learning_rate": 0.0005599668691962091, "loss": 1.8109, "step": 14998 }, { "epoch": 0.5, "grad_norm": 0.4353012144565582, "learning_rate": 0.0005599616526018621, "loss": 1.91, "step": 14999 }, { "epoch": 0.5, "grad_norm": 0.44110390543937683, "learning_rate": 0.0005599564356919601, "loss": 1.9462, "step": 15000 }, { "epoch": 0.5, "grad_norm": 0.4670344889163971, "learning_rate": 0.000559951218466509, "loss": 1.94, "step": 15001 }, { "epoch": 0.5, "grad_norm": 0.43729540705680847, "learning_rate": 0.0005599460009255157, "loss": 1.8219, "step": 15002 }, { "epoch": 0.5, "grad_norm": 0.45583197474479675, "learning_rate": 0.000559940783068986, "loss": 1.9289, "step": 15003 }, { "epoch": 0.5, "grad_norm": 0.4308597147464752, "learning_rate": 0.0005599355648969267, "loss": 1.8666, "step": 15004 }, { "epoch": 0.5, "grad_norm": 0.4243646562099457, "learning_rate": 0.000559930346409344, "loss": 1.9082, "step": 15005 }, { "epoch": 0.5, "grad_norm": 0.8327832221984863, "learning_rate": 0.0005599251276062439, "loss": 1.7665, "step": 15006 }, { "epoch": 0.5, "grad_norm": 0.4261598289012909, "learning_rate": 0.0005599199084876331, "loss": 1.8583, "step": 15007 }, { "epoch": 0.5, "grad_norm": 0.440876305103302, "learning_rate": 0.0005599146890535178, "loss": 1.8681, "step": 15008 }, { "epoch": 0.5, "grad_norm": 0.44306328892707825, "learning_rate": 0.0005599094693039045, "loss": 1.8865, "step": 15009 }, { "epoch": 0.5, "grad_norm": 0.4250422716140747, "learning_rate": 0.0005599042492387992, "loss": 1.8737, "step": 15010 }, { "epoch": 0.5, "grad_norm": 0.41733914613723755, "learning_rate": 0.0005598990288582086, "loss": 1.8228, "step": 15011 }, { "epoch": 0.5, "grad_norm": 0.4290452301502228, "learning_rate": 0.0005598938081621388, "loss": 1.8162, "step": 15012 }, { "epoch": 0.5, "grad_norm": 0.4377478063106537, "learning_rate": 0.0005598885871505962, "loss": 1.8839, "step": 15013 }, { "epoch": 0.5, "grad_norm": 0.44022560119628906, "learning_rate": 0.0005598833658235872, "loss": 1.8763, "step": 15014 }, { "epoch": 0.5, "grad_norm": 0.44716137647628784, "learning_rate": 0.000559878144181118, "loss": 1.9081, "step": 15015 }, { "epoch": 0.5, "grad_norm": 0.44020935893058777, "learning_rate": 0.000559872922223195, "loss": 1.9117, "step": 15016 }, { "epoch": 0.5, "grad_norm": 0.4428223669528961, "learning_rate": 0.0005598676999498247, "loss": 1.7978, "step": 15017 }, { "epoch": 0.5, "grad_norm": 0.47383373975753784, "learning_rate": 0.0005598624773610132, "loss": 1.8729, "step": 15018 }, { "epoch": 0.5, "grad_norm": 0.4252374470233917, "learning_rate": 0.0005598572544567669, "loss": 1.8843, "step": 15019 }, { "epoch": 0.5, "grad_norm": 0.4321538209915161, "learning_rate": 0.0005598520312370923, "loss": 1.7373, "step": 15020 }, { "epoch": 0.5, "grad_norm": 0.4212619960308075, "learning_rate": 0.0005598468077019956, "loss": 1.8516, "step": 15021 }, { "epoch": 0.5, "grad_norm": 0.44755348563194275, "learning_rate": 0.0005598415838514832, "loss": 1.8624, "step": 15022 }, { "epoch": 0.5, "grad_norm": 0.4465259611606598, "learning_rate": 0.0005598363596855613, "loss": 1.8723, "step": 15023 }, { "epoch": 0.5, "grad_norm": 0.42658764123916626, "learning_rate": 0.0005598311352042364, "loss": 1.9416, "step": 15024 }, { "epoch": 0.5, "grad_norm": 0.42918720841407776, "learning_rate": 0.0005598259104075147, "loss": 1.8275, "step": 15025 }, { "epoch": 0.5, "grad_norm": 0.4304429888725281, "learning_rate": 0.0005598206852954028, "loss": 1.8043, "step": 15026 }, { "epoch": 0.5, "grad_norm": 0.4639158248901367, "learning_rate": 0.0005598154598679067, "loss": 1.8795, "step": 15027 }, { "epoch": 0.5, "grad_norm": 0.4467909038066864, "learning_rate": 0.000559810234125033, "loss": 1.8355, "step": 15028 }, { "epoch": 0.5, "grad_norm": 0.4298335015773773, "learning_rate": 0.000559805008066788, "loss": 1.8316, "step": 15029 }, { "epoch": 0.5, "grad_norm": 0.4501792788505554, "learning_rate": 0.000559799781693178, "loss": 1.8829, "step": 15030 }, { "epoch": 0.5, "grad_norm": 0.4287175238132477, "learning_rate": 0.0005597945550042092, "loss": 1.838, "step": 15031 }, { "epoch": 0.5, "grad_norm": 0.41983693838119507, "learning_rate": 0.0005597893279998882, "loss": 1.8613, "step": 15032 }, { "epoch": 0.5, "grad_norm": 0.43404102325439453, "learning_rate": 0.0005597841006802212, "loss": 1.8749, "step": 15033 }, { "epoch": 0.5, "grad_norm": 0.4506663978099823, "learning_rate": 0.0005597788730452146, "loss": 1.8839, "step": 15034 }, { "epoch": 0.5, "grad_norm": 0.43468210101127625, "learning_rate": 0.0005597736450948748, "loss": 1.8819, "step": 15035 }, { "epoch": 0.5, "grad_norm": 0.45340022444725037, "learning_rate": 0.000559768416829208, "loss": 1.7848, "step": 15036 }, { "epoch": 0.5, "grad_norm": 0.42456918954849243, "learning_rate": 0.0005597631882482205, "loss": 1.9129, "step": 15037 }, { "epoch": 0.5, "grad_norm": 0.417261004447937, "learning_rate": 0.0005597579593519189, "loss": 1.7524, "step": 15038 }, { "epoch": 0.5, "grad_norm": 0.4322691261768341, "learning_rate": 0.0005597527301403094, "loss": 1.8739, "step": 15039 }, { "epoch": 0.5, "grad_norm": 0.4374901354312897, "learning_rate": 0.0005597475006133984, "loss": 1.852, "step": 15040 }, { "epoch": 0.5, "grad_norm": 0.4177898168563843, "learning_rate": 0.0005597422707711922, "loss": 1.8025, "step": 15041 }, { "epoch": 0.5, "grad_norm": 0.4492521286010742, "learning_rate": 0.000559737040613697, "loss": 1.8636, "step": 15042 }, { "epoch": 0.5, "grad_norm": 0.4276603162288666, "learning_rate": 0.0005597318101409194, "loss": 1.826, "step": 15043 }, { "epoch": 0.5, "grad_norm": 0.4141213595867157, "learning_rate": 0.0005597265793528657, "loss": 1.7657, "step": 15044 }, { "epoch": 0.5, "grad_norm": 0.5265322327613831, "learning_rate": 0.0005597213482495422, "loss": 1.9031, "step": 15045 }, { "epoch": 0.5, "grad_norm": 0.44943276047706604, "learning_rate": 0.0005597161168309551, "loss": 1.9064, "step": 15046 }, { "epoch": 0.5, "grad_norm": 0.4363011419773102, "learning_rate": 0.000559710885097111, "loss": 1.8538, "step": 15047 }, { "epoch": 0.5, "grad_norm": 0.43862903118133545, "learning_rate": 0.0005597056530480162, "loss": 1.8194, "step": 15048 }, { "epoch": 0.5, "grad_norm": 0.43147510290145874, "learning_rate": 0.0005597004206836769, "loss": 1.8567, "step": 15049 }, { "epoch": 0.5, "grad_norm": 0.4409583806991577, "learning_rate": 0.0005596951880040997, "loss": 1.8045, "step": 15050 }, { "epoch": 0.5, "grad_norm": 0.4249967932701111, "learning_rate": 0.0005596899550092907, "loss": 1.9624, "step": 15051 }, { "epoch": 0.5, "grad_norm": 0.428949236869812, "learning_rate": 0.0005596847216992564, "loss": 1.8368, "step": 15052 }, { "epoch": 0.5, "grad_norm": 0.4379378855228424, "learning_rate": 0.0005596794880740031, "loss": 1.8308, "step": 15053 }, { "epoch": 0.5, "grad_norm": 0.4378814697265625, "learning_rate": 0.0005596742541335372, "loss": 1.7956, "step": 15054 }, { "epoch": 0.5, "grad_norm": 0.42003902792930603, "learning_rate": 0.0005596690198778649, "loss": 1.8717, "step": 15055 }, { "epoch": 0.5, "grad_norm": 0.4364232122898102, "learning_rate": 0.0005596637853069928, "loss": 1.8739, "step": 15056 }, { "epoch": 0.5, "grad_norm": 0.4236905574798584, "learning_rate": 0.0005596585504209271, "loss": 1.8434, "step": 15057 }, { "epoch": 0.5, "grad_norm": 0.45843055844306946, "learning_rate": 0.0005596533152196741, "loss": 1.8388, "step": 15058 }, { "epoch": 0.5, "grad_norm": 0.44726070761680603, "learning_rate": 0.0005596480797032403, "loss": 1.8045, "step": 15059 }, { "epoch": 0.5, "grad_norm": 0.44105663895606995, "learning_rate": 0.0005596428438716319, "loss": 1.8688, "step": 15060 }, { "epoch": 0.5, "grad_norm": 0.43133261799812317, "learning_rate": 0.0005596376077248554, "loss": 1.8872, "step": 15061 }, { "epoch": 0.5, "grad_norm": 0.43355900049209595, "learning_rate": 0.0005596323712629171, "loss": 1.8779, "step": 15062 }, { "epoch": 0.5, "grad_norm": 0.45558249950408936, "learning_rate": 0.0005596271344858235, "loss": 1.8418, "step": 15063 }, { "epoch": 0.5, "grad_norm": 0.49869662523269653, "learning_rate": 0.0005596218973935806, "loss": 1.9041, "step": 15064 }, { "epoch": 0.5, "grad_norm": 0.43372881412506104, "learning_rate": 0.0005596166599861951, "loss": 1.8419, "step": 15065 }, { "epoch": 0.5, "grad_norm": 0.42052364349365234, "learning_rate": 0.0005596114222636731, "loss": 1.815, "step": 15066 }, { "epoch": 0.5, "grad_norm": 0.4983747601509094, "learning_rate": 0.0005596061842260213, "loss": 1.8866, "step": 15067 }, { "epoch": 0.5, "grad_norm": 0.46511316299438477, "learning_rate": 0.0005596009458732457, "loss": 1.9055, "step": 15068 }, { "epoch": 0.5, "grad_norm": 0.422981321811676, "learning_rate": 0.0005595957072053527, "loss": 1.8788, "step": 15069 }, { "epoch": 0.5, "grad_norm": 0.43086671829223633, "learning_rate": 0.0005595904682223489, "loss": 1.8789, "step": 15070 }, { "epoch": 0.5, "grad_norm": 0.44976118206977844, "learning_rate": 0.0005595852289242405, "loss": 1.8533, "step": 15071 }, { "epoch": 0.5, "grad_norm": 0.4387446343898773, "learning_rate": 0.0005595799893110339, "loss": 1.8692, "step": 15072 }, { "epoch": 0.5, "grad_norm": 0.4442801773548126, "learning_rate": 0.0005595747493827354, "loss": 1.8586, "step": 15073 }, { "epoch": 0.5, "grad_norm": 0.435515820980072, "learning_rate": 0.0005595695091393514, "loss": 1.8961, "step": 15074 }, { "epoch": 0.5, "grad_norm": 0.4256497025489807, "learning_rate": 0.0005595642685808883, "loss": 1.8606, "step": 15075 }, { "epoch": 0.5, "grad_norm": 0.4344136714935303, "learning_rate": 0.0005595590277073524, "loss": 1.9058, "step": 15076 }, { "epoch": 0.5, "grad_norm": 0.4327923655509949, "learning_rate": 0.00055955378651875, "loss": 1.8862, "step": 15077 }, { "epoch": 0.5, "grad_norm": 0.4431811273097992, "learning_rate": 0.0005595485450150877, "loss": 1.8886, "step": 15078 }, { "epoch": 0.5, "grad_norm": 0.42153850197792053, "learning_rate": 0.0005595433031963717, "loss": 1.9, "step": 15079 }, { "epoch": 0.5, "grad_norm": 0.43757444620132446, "learning_rate": 0.0005595380610626083, "loss": 1.8832, "step": 15080 }, { "epoch": 0.5, "grad_norm": 0.4445112347602844, "learning_rate": 0.0005595328186138039, "loss": 1.8779, "step": 15081 }, { "epoch": 0.5, "grad_norm": 0.4520442485809326, "learning_rate": 0.0005595275758499649, "loss": 1.8658, "step": 15082 }, { "epoch": 0.5, "grad_norm": 0.4215058386325836, "learning_rate": 0.0005595223327710978, "loss": 1.9016, "step": 15083 }, { "epoch": 0.5, "grad_norm": 0.4338340759277344, "learning_rate": 0.0005595170893772087, "loss": 1.8164, "step": 15084 }, { "epoch": 0.5, "grad_norm": 0.4332084059715271, "learning_rate": 0.0005595118456683041, "loss": 1.862, "step": 15085 }, { "epoch": 0.5, "grad_norm": 0.44830334186553955, "learning_rate": 0.0005595066016443904, "loss": 1.8624, "step": 15086 }, { "epoch": 0.5, "grad_norm": 0.439735472202301, "learning_rate": 0.0005595013573054738, "loss": 1.8578, "step": 15087 }, { "epoch": 0.5, "grad_norm": 0.43613526225090027, "learning_rate": 0.000559496112651561, "loss": 1.888, "step": 15088 }, { "epoch": 0.5, "grad_norm": 0.43939512968063354, "learning_rate": 0.0005594908676826581, "loss": 1.8246, "step": 15089 }, { "epoch": 0.5, "grad_norm": 0.4298981726169586, "learning_rate": 0.0005594856223987714, "loss": 1.7852, "step": 15090 }, { "epoch": 0.5, "grad_norm": 0.4193440079689026, "learning_rate": 0.0005594803767999075, "loss": 1.8497, "step": 15091 }, { "epoch": 0.5, "grad_norm": 0.4339043200016022, "learning_rate": 0.0005594751308860726, "loss": 1.8555, "step": 15092 }, { "epoch": 0.5, "grad_norm": 0.43279722332954407, "learning_rate": 0.0005594698846572732, "loss": 1.8861, "step": 15093 }, { "epoch": 0.5, "grad_norm": 0.4388607442378998, "learning_rate": 0.0005594646381135154, "loss": 1.9321, "step": 15094 }, { "epoch": 0.5, "grad_norm": 0.48580893874168396, "learning_rate": 0.000559459391254806, "loss": 1.8794, "step": 15095 }, { "epoch": 0.5, "grad_norm": 0.4385170638561249, "learning_rate": 0.000559454144081151, "loss": 1.8397, "step": 15096 }, { "epoch": 0.5, "grad_norm": 0.4517209231853485, "learning_rate": 0.0005594488965925569, "loss": 1.8444, "step": 15097 }, { "epoch": 0.5, "grad_norm": 0.4123697876930237, "learning_rate": 0.0005594436487890301, "loss": 1.8166, "step": 15098 }, { "epoch": 0.5, "grad_norm": 0.4280007481575012, "learning_rate": 0.0005594384006705769, "loss": 1.8517, "step": 15099 }, { "epoch": 0.5, "grad_norm": 0.45048314332962036, "learning_rate": 0.0005594331522372038, "loss": 1.8515, "step": 15100 }, { "epoch": 0.5, "grad_norm": 0.45410430431365967, "learning_rate": 0.000559427903488917, "loss": 1.8222, "step": 15101 }, { "epoch": 0.5, "grad_norm": 0.43695303797721863, "learning_rate": 0.0005594226544257229, "loss": 1.794, "step": 15102 }, { "epoch": 0.5, "grad_norm": 0.43905749917030334, "learning_rate": 0.000559417405047628, "loss": 1.8244, "step": 15103 }, { "epoch": 0.5, "grad_norm": 0.4444117546081543, "learning_rate": 0.0005594121553546385, "loss": 1.9075, "step": 15104 }, { "epoch": 0.5, "grad_norm": 0.46046239137649536, "learning_rate": 0.0005594069053467609, "loss": 1.7903, "step": 15105 }, { "epoch": 0.5, "grad_norm": 0.44481831789016724, "learning_rate": 0.0005594016550240016, "loss": 1.877, "step": 15106 }, { "epoch": 0.5, "grad_norm": 0.43957632780075073, "learning_rate": 0.0005593964043863669, "loss": 1.8956, "step": 15107 }, { "epoch": 0.5, "grad_norm": 0.42930495738983154, "learning_rate": 0.0005593911534338632, "loss": 1.9097, "step": 15108 }, { "epoch": 0.5, "grad_norm": 0.4384136497974396, "learning_rate": 0.0005593859021664968, "loss": 1.8662, "step": 15109 }, { "epoch": 0.5, "grad_norm": 0.4485313892364502, "learning_rate": 0.0005593806505842741, "loss": 1.9064, "step": 15110 }, { "epoch": 0.5, "grad_norm": 0.4295719265937805, "learning_rate": 0.0005593753986872016, "loss": 1.7767, "step": 15111 }, { "epoch": 0.5, "grad_norm": 0.4282493591308594, "learning_rate": 0.0005593701464752856, "loss": 1.8727, "step": 15112 }, { "epoch": 0.5, "grad_norm": 0.4367416799068451, "learning_rate": 0.0005593648939485324, "loss": 1.9007, "step": 15113 }, { "epoch": 0.5, "grad_norm": 0.412097692489624, "learning_rate": 0.0005593596411069485, "loss": 1.8877, "step": 15114 }, { "epoch": 0.5, "grad_norm": 0.43756258487701416, "learning_rate": 0.00055935438795054, "loss": 1.8944, "step": 15115 }, { "epoch": 0.5, "grad_norm": 0.423439085483551, "learning_rate": 0.0005593491344793138, "loss": 1.8161, "step": 15116 }, { "epoch": 0.5, "grad_norm": 0.4454144239425659, "learning_rate": 0.0005593438806932758, "loss": 1.92, "step": 15117 }, { "epoch": 0.5, "grad_norm": 0.45115599036216736, "learning_rate": 0.0005593386265924325, "loss": 1.9369, "step": 15118 }, { "epoch": 0.5, "grad_norm": 0.4466463029384613, "learning_rate": 0.0005593333721767903, "loss": 1.8649, "step": 15119 }, { "epoch": 0.5, "grad_norm": 0.4554729759693146, "learning_rate": 0.0005593281174463558, "loss": 1.8295, "step": 15120 }, { "epoch": 0.5, "grad_norm": 0.44628095626831055, "learning_rate": 0.000559322862401135, "loss": 1.8732, "step": 15121 }, { "epoch": 0.5, "grad_norm": 0.44654586911201477, "learning_rate": 0.0005593176070411345, "loss": 1.8425, "step": 15122 }, { "epoch": 0.5, "grad_norm": 0.4459069073200226, "learning_rate": 0.0005593123513663607, "loss": 1.8146, "step": 15123 }, { "epoch": 0.5, "grad_norm": 0.4896427392959595, "learning_rate": 0.0005593070953768198, "loss": 1.9426, "step": 15124 }, { "epoch": 0.5, "grad_norm": 0.4509585201740265, "learning_rate": 0.0005593018390725183, "loss": 1.845, "step": 15125 }, { "epoch": 0.5, "grad_norm": 0.4420691728591919, "learning_rate": 0.0005592965824534626, "loss": 1.9095, "step": 15126 }, { "epoch": 0.5, "grad_norm": 0.43412449955940247, "learning_rate": 0.000559291325519659, "loss": 1.8813, "step": 15127 }, { "epoch": 0.5, "grad_norm": 0.4230054020881653, "learning_rate": 0.000559286068271114, "loss": 1.8425, "step": 15128 }, { "epoch": 0.5, "grad_norm": 0.4396195113658905, "learning_rate": 0.000559280810707834, "loss": 1.8486, "step": 15129 }, { "epoch": 0.5, "grad_norm": 0.44751423597335815, "learning_rate": 0.0005592755528298252, "loss": 1.9222, "step": 15130 }, { "epoch": 0.5, "grad_norm": 0.4608575403690338, "learning_rate": 0.0005592702946370942, "loss": 1.8427, "step": 15131 }, { "epoch": 0.5, "grad_norm": 0.4438120126724243, "learning_rate": 0.0005592650361296471, "loss": 1.8598, "step": 15132 }, { "epoch": 0.5, "grad_norm": 0.436444491147995, "learning_rate": 0.0005592597773074905, "loss": 1.9405, "step": 15133 }, { "epoch": 0.5, "grad_norm": 0.4415549337863922, "learning_rate": 0.0005592545181706308, "loss": 1.8652, "step": 15134 }, { "epoch": 0.5, "grad_norm": 0.4273093044757843, "learning_rate": 0.0005592492587190742, "loss": 1.8584, "step": 15135 }, { "epoch": 0.5, "grad_norm": 0.4315299689769745, "learning_rate": 0.0005592439989528272, "loss": 1.8246, "step": 15136 }, { "epoch": 0.5, "grad_norm": 0.4951842427253723, "learning_rate": 0.0005592387388718963, "loss": 1.8112, "step": 15137 }, { "epoch": 0.5, "grad_norm": 0.4472070634365082, "learning_rate": 0.0005592334784762877, "loss": 1.8958, "step": 15138 }, { "epoch": 0.5, "grad_norm": 0.42591792345046997, "learning_rate": 0.0005592282177660079, "loss": 1.8743, "step": 15139 }, { "epoch": 0.5, "grad_norm": 0.44272419810295105, "learning_rate": 0.0005592229567410631, "loss": 1.7644, "step": 15140 }, { "epoch": 0.5, "grad_norm": 0.43366894125938416, "learning_rate": 0.0005592176954014599, "loss": 1.8187, "step": 15141 }, { "epoch": 0.5, "grad_norm": 0.4620536267757416, "learning_rate": 0.0005592124337472047, "loss": 1.852, "step": 15142 }, { "epoch": 0.5, "grad_norm": 0.422950804233551, "learning_rate": 0.0005592071717783038, "loss": 1.8033, "step": 15143 }, { "epoch": 0.5, "grad_norm": 0.4170534312725067, "learning_rate": 0.0005592019094947634, "loss": 1.8252, "step": 15144 }, { "epoch": 0.5, "grad_norm": 0.43911466002464294, "learning_rate": 0.0005591966468965902, "loss": 1.8116, "step": 15145 }, { "epoch": 0.5, "grad_norm": 0.44159144163131714, "learning_rate": 0.0005591913839837905, "loss": 1.7774, "step": 15146 }, { "epoch": 0.5, "grad_norm": 0.41224080324172974, "learning_rate": 0.0005591861207563705, "loss": 1.8558, "step": 15147 }, { "epoch": 0.5, "grad_norm": 0.45937198400497437, "learning_rate": 0.0005591808572143369, "loss": 1.8473, "step": 15148 }, { "epoch": 0.5, "grad_norm": 0.4497954249382019, "learning_rate": 0.000559175593357696, "loss": 1.8195, "step": 15149 }, { "epoch": 0.5, "grad_norm": 0.4146408438682556, "learning_rate": 0.0005591703291864539, "loss": 1.8706, "step": 15150 }, { "epoch": 0.5, "grad_norm": 0.424008309841156, "learning_rate": 0.0005591650647006173, "loss": 1.8692, "step": 15151 }, { "epoch": 0.5, "grad_norm": 0.4430982768535614, "learning_rate": 0.0005591597999001925, "loss": 1.8541, "step": 15152 }, { "epoch": 0.5, "grad_norm": 0.4705710709095001, "learning_rate": 0.0005591545347851858, "loss": 1.8316, "step": 15153 }, { "epoch": 0.5, "grad_norm": 0.43124404549598694, "learning_rate": 0.0005591492693556038, "loss": 1.8655, "step": 15154 }, { "epoch": 0.5, "grad_norm": 0.45654284954071045, "learning_rate": 0.0005591440036114527, "loss": 1.8771, "step": 15155 }, { "epoch": 0.5, "grad_norm": 0.4525067210197449, "learning_rate": 0.0005591387375527391, "loss": 1.9097, "step": 15156 }, { "epoch": 0.5, "grad_norm": 0.43177446722984314, "learning_rate": 0.0005591334711794691, "loss": 1.8145, "step": 15157 }, { "epoch": 0.5, "grad_norm": 0.41716307401657104, "learning_rate": 0.0005591282044916493, "loss": 1.8776, "step": 15158 }, { "epoch": 0.5, "grad_norm": 0.4332635998725891, "learning_rate": 0.000559122937489286, "loss": 1.9068, "step": 15159 }, { "epoch": 0.5, "grad_norm": 0.43236327171325684, "learning_rate": 0.0005591176701723857, "loss": 1.8256, "step": 15160 }, { "epoch": 0.5, "grad_norm": 0.4415542483329773, "learning_rate": 0.0005591124025409547, "loss": 1.8443, "step": 15161 }, { "epoch": 0.5, "grad_norm": 0.43111452460289, "learning_rate": 0.0005591071345949994, "loss": 1.8398, "step": 15162 }, { "epoch": 0.5, "grad_norm": 0.4311400055885315, "learning_rate": 0.0005591018663345263, "loss": 1.9237, "step": 15163 }, { "epoch": 0.5, "grad_norm": 0.43590298295021057, "learning_rate": 0.0005590965977595417, "loss": 1.8582, "step": 15164 }, { "epoch": 0.5, "grad_norm": 0.44001078605651855, "learning_rate": 0.0005590913288700519, "loss": 1.8978, "step": 15165 }, { "epoch": 0.5, "grad_norm": 0.4415788948535919, "learning_rate": 0.0005590860596660634, "loss": 1.8347, "step": 15166 }, { "epoch": 0.5, "grad_norm": 0.4249524772167206, "learning_rate": 0.0005590807901475827, "loss": 1.8728, "step": 15167 }, { "epoch": 0.5, "grad_norm": 0.4549696147441864, "learning_rate": 0.0005590755203146161, "loss": 1.9187, "step": 15168 }, { "epoch": 0.5, "grad_norm": 0.44415703415870667, "learning_rate": 0.0005590702501671698, "loss": 1.8209, "step": 15169 }, { "epoch": 0.5, "grad_norm": 0.42667868733406067, "learning_rate": 0.0005590649797052506, "loss": 1.8326, "step": 15170 }, { "epoch": 0.5, "grad_norm": 0.45749524235725403, "learning_rate": 0.0005590597089288646, "loss": 1.8323, "step": 15171 }, { "epoch": 0.5, "grad_norm": 0.4418098032474518, "learning_rate": 0.0005590544378380182, "loss": 1.9103, "step": 15172 }, { "epoch": 0.5, "grad_norm": 0.42595064640045166, "learning_rate": 0.000559049166432718, "loss": 1.9371, "step": 15173 }, { "epoch": 0.5, "grad_norm": 0.4714567959308624, "learning_rate": 0.0005590438947129703, "loss": 1.877, "step": 15174 }, { "epoch": 0.5, "grad_norm": 0.462999552488327, "learning_rate": 0.0005590386226787815, "loss": 1.8708, "step": 15175 }, { "epoch": 0.5, "grad_norm": 0.43481728434562683, "learning_rate": 0.0005590333503301578, "loss": 1.8758, "step": 15176 }, { "epoch": 0.5, "grad_norm": 0.42822280526161194, "learning_rate": 0.000559028077667106, "loss": 1.8593, "step": 15177 }, { "epoch": 0.5, "grad_norm": 0.4271271526813507, "learning_rate": 0.0005590228046896323, "loss": 1.7625, "step": 15178 }, { "epoch": 0.51, "grad_norm": 0.9993783831596375, "learning_rate": 0.0005590175313977428, "loss": 1.8253, "step": 15179 }, { "epoch": 0.51, "grad_norm": 0.4287355840206146, "learning_rate": 0.0005590122577914443, "loss": 1.8546, "step": 15180 }, { "epoch": 0.51, "grad_norm": 0.44800227880477905, "learning_rate": 0.0005590069838707431, "loss": 1.9243, "step": 15181 }, { "epoch": 0.51, "grad_norm": 0.44939154386520386, "learning_rate": 0.0005590017096356457, "loss": 1.778, "step": 15182 }, { "epoch": 0.51, "grad_norm": 0.4171617031097412, "learning_rate": 0.0005589964350861584, "loss": 1.8621, "step": 15183 }, { "epoch": 0.51, "grad_norm": 0.42388537526130676, "learning_rate": 0.0005589911602222874, "loss": 1.8711, "step": 15184 }, { "epoch": 0.51, "grad_norm": 0.43975967168807983, "learning_rate": 0.0005589858850440394, "loss": 1.8461, "step": 15185 }, { "epoch": 0.51, "grad_norm": 0.430540531873703, "learning_rate": 0.0005589806095514206, "loss": 1.8246, "step": 15186 }, { "epoch": 0.51, "grad_norm": 0.43109872937202454, "learning_rate": 0.0005589753337444375, "loss": 1.76, "step": 15187 }, { "epoch": 0.51, "grad_norm": 0.4315304756164551, "learning_rate": 0.0005589700576230967, "loss": 1.901, "step": 15188 }, { "epoch": 0.51, "grad_norm": 0.43196555972099304, "learning_rate": 0.0005589647811874043, "loss": 1.8721, "step": 15189 }, { "epoch": 0.51, "grad_norm": 0.4279762804508209, "learning_rate": 0.0005589595044373667, "loss": 1.9248, "step": 15190 }, { "epoch": 0.51, "grad_norm": 0.4411599040031433, "learning_rate": 0.0005589542273729906, "loss": 1.8483, "step": 15191 }, { "epoch": 0.51, "grad_norm": 0.4362255930900574, "learning_rate": 0.0005589489499942821, "loss": 1.8795, "step": 15192 }, { "epoch": 0.51, "grad_norm": 0.4323142468929291, "learning_rate": 0.0005589436723012478, "loss": 1.8686, "step": 15193 }, { "epoch": 0.51, "grad_norm": 0.43426313996315, "learning_rate": 0.000558938394293894, "loss": 1.8369, "step": 15194 }, { "epoch": 0.51, "grad_norm": 0.4290523827075958, "learning_rate": 0.0005589331159722271, "loss": 1.8713, "step": 15195 }, { "epoch": 0.51, "grad_norm": 0.46044018864631653, "learning_rate": 0.0005589278373362536, "loss": 1.7789, "step": 15196 }, { "epoch": 0.51, "grad_norm": 0.4273592531681061, "learning_rate": 0.0005589225583859798, "loss": 1.865, "step": 15197 }, { "epoch": 0.51, "grad_norm": 0.4613545835018158, "learning_rate": 0.0005589172791214122, "loss": 1.8729, "step": 15198 }, { "epoch": 0.51, "grad_norm": 0.44822394847869873, "learning_rate": 0.0005589119995425573, "loss": 1.8552, "step": 15199 }, { "epoch": 0.51, "grad_norm": 0.4534854590892792, "learning_rate": 0.0005589067196494213, "loss": 1.8696, "step": 15200 }, { "epoch": 0.51, "grad_norm": 0.44702765345573425, "learning_rate": 0.0005589014394420106, "loss": 1.9337, "step": 15201 }, { "epoch": 0.51, "grad_norm": 0.4401167333126068, "learning_rate": 0.0005588961589203318, "loss": 1.7743, "step": 15202 }, { "epoch": 0.51, "grad_norm": 0.44974663853645325, "learning_rate": 0.000558890878084391, "loss": 1.8481, "step": 15203 }, { "epoch": 0.51, "grad_norm": 0.4292556941509247, "learning_rate": 0.0005588855969341951, "loss": 1.8753, "step": 15204 }, { "epoch": 0.51, "grad_norm": 0.4191732704639435, "learning_rate": 0.00055888031546975, "loss": 1.8448, "step": 15205 }, { "epoch": 0.51, "grad_norm": 0.45702314376831055, "learning_rate": 0.0005588750336910625, "loss": 1.7872, "step": 15206 }, { "epoch": 0.51, "grad_norm": 0.4582860767841339, "learning_rate": 0.0005588697515981388, "loss": 1.8238, "step": 15207 }, { "epoch": 0.51, "grad_norm": 0.4381924569606781, "learning_rate": 0.0005588644691909854, "loss": 1.9599, "step": 15208 }, { "epoch": 0.51, "grad_norm": 0.4422686994075775, "learning_rate": 0.0005588591864696087, "loss": 1.9273, "step": 15209 }, { "epoch": 0.51, "grad_norm": 0.45270994305610657, "learning_rate": 0.000558853903434015, "loss": 1.8408, "step": 15210 }, { "epoch": 0.51, "grad_norm": 0.42318907380104065, "learning_rate": 0.0005588486200842109, "loss": 1.8919, "step": 15211 }, { "epoch": 0.51, "grad_norm": 0.43151238560676575, "learning_rate": 0.0005588433364202026, "loss": 1.9592, "step": 15212 }, { "epoch": 0.51, "grad_norm": 0.4558130204677582, "learning_rate": 0.0005588380524419967, "loss": 1.8117, "step": 15213 }, { "epoch": 0.51, "grad_norm": 0.4247613549232483, "learning_rate": 0.0005588327681495996, "loss": 1.8839, "step": 15214 }, { "epoch": 0.51, "grad_norm": 0.4309777617454529, "learning_rate": 0.0005588274835430175, "loss": 1.834, "step": 15215 }, { "epoch": 0.51, "grad_norm": 0.41575899720191956, "learning_rate": 0.000558822198622257, "loss": 1.8513, "step": 15216 }, { "epoch": 0.51, "grad_norm": 0.42225632071495056, "learning_rate": 0.0005588169133873246, "loss": 1.8166, "step": 15217 }, { "epoch": 0.51, "grad_norm": 0.4421314299106598, "learning_rate": 0.0005588116278382265, "loss": 1.7935, "step": 15218 }, { "epoch": 0.51, "grad_norm": 0.42978647351264954, "learning_rate": 0.0005588063419749693, "loss": 1.9005, "step": 15219 }, { "epoch": 0.51, "grad_norm": 0.42549264430999756, "learning_rate": 0.0005588010557975593, "loss": 1.8836, "step": 15220 }, { "epoch": 0.51, "grad_norm": 0.43133485317230225, "learning_rate": 0.000558795769306003, "loss": 1.8541, "step": 15221 }, { "epoch": 0.51, "grad_norm": 0.4313335120677948, "learning_rate": 0.0005587904825003066, "loss": 1.8732, "step": 15222 }, { "epoch": 0.51, "grad_norm": 0.43123558163642883, "learning_rate": 0.0005587851953804769, "loss": 1.9481, "step": 15223 }, { "epoch": 0.51, "grad_norm": 0.41266709566116333, "learning_rate": 0.00055877990794652, "loss": 1.7741, "step": 15224 }, { "epoch": 0.51, "grad_norm": 0.4470561146736145, "learning_rate": 0.0005587746201984424, "loss": 1.8712, "step": 15225 }, { "epoch": 0.51, "grad_norm": 0.4354751706123352, "learning_rate": 0.0005587693321362506, "loss": 1.853, "step": 15226 }, { "epoch": 0.51, "grad_norm": 0.41534385085105896, "learning_rate": 0.0005587640437599509, "loss": 1.8861, "step": 15227 }, { "epoch": 0.51, "grad_norm": 0.430882066488266, "learning_rate": 0.0005587587550695498, "loss": 1.8931, "step": 15228 }, { "epoch": 0.51, "grad_norm": 0.438632994890213, "learning_rate": 0.0005587534660650537, "loss": 1.881, "step": 15229 }, { "epoch": 0.51, "grad_norm": 0.44028493762016296, "learning_rate": 0.000558748176746469, "loss": 1.8085, "step": 15230 }, { "epoch": 0.51, "grad_norm": 0.4156191051006317, "learning_rate": 0.0005587428871138021, "loss": 1.8291, "step": 15231 }, { "epoch": 0.51, "grad_norm": 0.4223487973213196, "learning_rate": 0.0005587375971670595, "loss": 1.8474, "step": 15232 }, { "epoch": 0.51, "grad_norm": 0.4221535325050354, "learning_rate": 0.0005587323069062476, "loss": 1.8758, "step": 15233 }, { "epoch": 0.51, "grad_norm": 0.4211210310459137, "learning_rate": 0.0005587270163313729, "loss": 1.8138, "step": 15234 }, { "epoch": 0.51, "grad_norm": 0.43232306838035583, "learning_rate": 0.0005587217254424416, "loss": 1.848, "step": 15235 }, { "epoch": 0.51, "grad_norm": 0.45384055376052856, "learning_rate": 0.0005587164342394602, "loss": 1.856, "step": 15236 }, { "epoch": 0.51, "grad_norm": 0.422589510679245, "learning_rate": 0.0005587111427224353, "loss": 1.8624, "step": 15237 }, { "epoch": 0.51, "grad_norm": 0.43234559893608093, "learning_rate": 0.000558705850891373, "loss": 1.8779, "step": 15238 }, { "epoch": 0.51, "grad_norm": 0.4329051077365875, "learning_rate": 0.0005587005587462801, "loss": 1.8593, "step": 15239 }, { "epoch": 0.51, "grad_norm": 0.4377087354660034, "learning_rate": 0.0005586952662871628, "loss": 1.9136, "step": 15240 }, { "epoch": 0.51, "grad_norm": 0.4525473713874817, "learning_rate": 0.0005586899735140275, "loss": 1.8075, "step": 15241 }, { "epoch": 0.51, "grad_norm": 0.43117547035217285, "learning_rate": 0.0005586846804268807, "loss": 1.8603, "step": 15242 }, { "epoch": 0.51, "grad_norm": 0.42771896719932556, "learning_rate": 0.0005586793870257289, "loss": 1.8935, "step": 15243 }, { "epoch": 0.51, "grad_norm": 0.4140816032886505, "learning_rate": 0.0005586740933105784, "loss": 1.8516, "step": 15244 }, { "epoch": 0.51, "grad_norm": 0.4558410048484802, "learning_rate": 0.0005586687992814356, "loss": 1.8584, "step": 15245 }, { "epoch": 0.51, "grad_norm": 0.4324086606502533, "learning_rate": 0.000558663504938307, "loss": 1.8404, "step": 15246 }, { "epoch": 0.51, "grad_norm": 0.41757264733314514, "learning_rate": 0.0005586582102811991, "loss": 1.8744, "step": 15247 }, { "epoch": 0.51, "grad_norm": 0.4335598945617676, "learning_rate": 0.0005586529153101183, "loss": 1.8779, "step": 15248 }, { "epoch": 0.51, "grad_norm": 0.43365344405174255, "learning_rate": 0.0005586476200250708, "loss": 1.8356, "step": 15249 }, { "epoch": 0.51, "grad_norm": 0.4405105710029602, "learning_rate": 0.0005586423244260633, "loss": 1.9033, "step": 15250 }, { "epoch": 0.51, "grad_norm": 0.43196168541908264, "learning_rate": 0.0005586370285131021, "loss": 1.8514, "step": 15251 }, { "epoch": 0.51, "grad_norm": 0.43649807572364807, "learning_rate": 0.0005586317322861936, "loss": 1.8915, "step": 15252 }, { "epoch": 0.51, "grad_norm": 0.47087106108665466, "learning_rate": 0.0005586264357453443, "loss": 1.8524, "step": 15253 }, { "epoch": 0.51, "grad_norm": 0.43815311789512634, "learning_rate": 0.0005586211388905608, "loss": 1.8541, "step": 15254 }, { "epoch": 0.51, "grad_norm": 0.43451619148254395, "learning_rate": 0.0005586158417218491, "loss": 1.8792, "step": 15255 }, { "epoch": 0.51, "grad_norm": 0.43332287669181824, "learning_rate": 0.000558610544239216, "loss": 1.8525, "step": 15256 }, { "epoch": 0.51, "grad_norm": 0.43768882751464844, "learning_rate": 0.0005586052464426677, "loss": 1.8326, "step": 15257 }, { "epoch": 0.51, "grad_norm": 0.4500525891780853, "learning_rate": 0.0005585999483322108, "loss": 1.918, "step": 15258 }, { "epoch": 0.51, "grad_norm": 0.4398650825023651, "learning_rate": 0.0005585946499078517, "loss": 1.8901, "step": 15259 }, { "epoch": 0.51, "grad_norm": 0.42526498436927795, "learning_rate": 0.0005585893511695967, "loss": 1.9171, "step": 15260 }, { "epoch": 0.51, "grad_norm": 0.44714823365211487, "learning_rate": 0.0005585840521174525, "loss": 1.8775, "step": 15261 }, { "epoch": 0.51, "grad_norm": 0.4391997754573822, "learning_rate": 0.0005585787527514253, "loss": 1.8212, "step": 15262 }, { "epoch": 0.51, "grad_norm": 0.42634502053260803, "learning_rate": 0.0005585734530715215, "loss": 1.7666, "step": 15263 }, { "epoch": 0.51, "grad_norm": 0.43451127409935, "learning_rate": 0.0005585681530777477, "loss": 1.8644, "step": 15264 }, { "epoch": 0.51, "grad_norm": 0.4453391134738922, "learning_rate": 0.0005585628527701102, "loss": 1.8684, "step": 15265 }, { "epoch": 0.51, "grad_norm": 0.4395527243614197, "learning_rate": 0.0005585575521486156, "loss": 1.8752, "step": 15266 }, { "epoch": 0.51, "grad_norm": 0.4365784525871277, "learning_rate": 0.0005585522512132702, "loss": 1.8435, "step": 15267 }, { "epoch": 0.51, "grad_norm": 0.4261806905269623, "learning_rate": 0.0005585469499640803, "loss": 1.7764, "step": 15268 }, { "epoch": 0.51, "grad_norm": 0.4249812066555023, "learning_rate": 0.0005585416484010528, "loss": 1.8731, "step": 15269 }, { "epoch": 0.51, "grad_norm": 0.4330010712146759, "learning_rate": 0.0005585363465241936, "loss": 1.8658, "step": 15270 }, { "epoch": 0.51, "grad_norm": 0.42576462030410767, "learning_rate": 0.0005585310443335094, "loss": 1.848, "step": 15271 }, { "epoch": 0.51, "grad_norm": 0.4446004033088684, "learning_rate": 0.0005585257418290067, "loss": 1.8617, "step": 15272 }, { "epoch": 0.51, "grad_norm": 0.45461714267730713, "learning_rate": 0.0005585204390106917, "loss": 1.8633, "step": 15273 }, { "epoch": 0.51, "grad_norm": 0.44097477197647095, "learning_rate": 0.000558515135878571, "loss": 1.8957, "step": 15274 }, { "epoch": 0.51, "grad_norm": 0.4260860085487366, "learning_rate": 0.0005585098324326511, "loss": 1.8894, "step": 15275 }, { "epoch": 0.51, "grad_norm": 0.43622279167175293, "learning_rate": 0.0005585045286729384, "loss": 1.8549, "step": 15276 }, { "epoch": 0.51, "grad_norm": 0.4257858097553253, "learning_rate": 0.0005584992245994392, "loss": 1.8448, "step": 15277 }, { "epoch": 0.51, "grad_norm": 0.44078993797302246, "learning_rate": 0.00055849392021216, "loss": 1.787, "step": 15278 }, { "epoch": 0.51, "grad_norm": 0.4451853036880493, "learning_rate": 0.0005584886155111074, "loss": 1.8261, "step": 15279 }, { "epoch": 0.51, "grad_norm": 0.44394469261169434, "learning_rate": 0.0005584833104962875, "loss": 1.8132, "step": 15280 }, { "epoch": 0.51, "grad_norm": 0.43773117661476135, "learning_rate": 0.000558478005167707, "loss": 1.8544, "step": 15281 }, { "epoch": 0.51, "grad_norm": 0.44818976521492004, "learning_rate": 0.0005584726995253725, "loss": 1.8378, "step": 15282 }, { "epoch": 0.51, "grad_norm": 0.47478169202804565, "learning_rate": 0.00055846739356929, "loss": 1.9338, "step": 15283 }, { "epoch": 0.51, "grad_norm": 0.7596960663795471, "learning_rate": 0.0005584620872994662, "loss": 1.9161, "step": 15284 }, { "epoch": 0.51, "grad_norm": 0.45849844813346863, "learning_rate": 0.0005584567807159076, "loss": 1.8344, "step": 15285 }, { "epoch": 0.51, "grad_norm": 0.4523889422416687, "learning_rate": 0.0005584514738186206, "loss": 1.8422, "step": 15286 }, { "epoch": 0.51, "grad_norm": 0.43131956458091736, "learning_rate": 0.0005584461666076114, "loss": 1.8375, "step": 15287 }, { "epoch": 0.51, "grad_norm": 0.4451218545436859, "learning_rate": 0.0005584408590828866, "loss": 1.826, "step": 15288 }, { "epoch": 0.51, "grad_norm": 0.4208396077156067, "learning_rate": 0.0005584355512444529, "loss": 1.8716, "step": 15289 }, { "epoch": 0.51, "grad_norm": 0.42964592576026917, "learning_rate": 0.0005584302430923165, "loss": 1.7775, "step": 15290 }, { "epoch": 0.51, "grad_norm": 0.44381213188171387, "learning_rate": 0.0005584249346264838, "loss": 1.873, "step": 15291 }, { "epoch": 0.51, "grad_norm": 0.4274013340473175, "learning_rate": 0.0005584196258469613, "loss": 1.8644, "step": 15292 }, { "epoch": 0.51, "grad_norm": 0.43252941966056824, "learning_rate": 0.0005584143167537554, "loss": 1.8631, "step": 15293 }, { "epoch": 0.51, "grad_norm": 0.45195695757865906, "learning_rate": 0.0005584090073468726, "loss": 1.8374, "step": 15294 }, { "epoch": 0.51, "grad_norm": 0.42817631363868713, "learning_rate": 0.0005584036976263194, "loss": 1.7867, "step": 15295 }, { "epoch": 0.51, "grad_norm": 0.4370375871658325, "learning_rate": 0.0005583983875921022, "loss": 1.8235, "step": 15296 }, { "epoch": 0.51, "grad_norm": 0.4305312931537628, "learning_rate": 0.0005583930772442274, "loss": 1.8366, "step": 15297 }, { "epoch": 0.51, "grad_norm": 0.44402462244033813, "learning_rate": 0.0005583877665827014, "loss": 1.9032, "step": 15298 }, { "epoch": 0.51, "grad_norm": 0.4357481002807617, "learning_rate": 0.0005583824556075308, "loss": 1.7857, "step": 15299 }, { "epoch": 0.51, "grad_norm": 0.4387427866458893, "learning_rate": 0.000558377144318722, "loss": 1.89, "step": 15300 }, { "epoch": 0.51, "grad_norm": 0.42619937658309937, "learning_rate": 0.0005583718327162813, "loss": 1.9188, "step": 15301 }, { "epoch": 0.51, "grad_norm": 0.43951016664505005, "learning_rate": 0.0005583665208002154, "loss": 1.8155, "step": 15302 }, { "epoch": 0.51, "grad_norm": 0.42641159892082214, "learning_rate": 0.0005583612085705306, "loss": 1.9134, "step": 15303 }, { "epoch": 0.51, "grad_norm": 0.4233463406562805, "learning_rate": 0.0005583558960272333, "loss": 1.8702, "step": 15304 }, { "epoch": 0.51, "grad_norm": 0.41717860102653503, "learning_rate": 0.00055835058317033, "loss": 1.846, "step": 15305 }, { "epoch": 0.51, "grad_norm": 0.4609487056732178, "learning_rate": 0.0005583452699998272, "loss": 1.8489, "step": 15306 }, { "epoch": 0.51, "grad_norm": 0.4604682922363281, "learning_rate": 0.0005583399565157312, "loss": 1.897, "step": 15307 }, { "epoch": 0.51, "grad_norm": 0.4245699644088745, "learning_rate": 0.0005583346427180488, "loss": 1.8835, "step": 15308 }, { "epoch": 0.51, "grad_norm": 0.4310831129550934, "learning_rate": 0.000558329328606786, "loss": 1.891, "step": 15309 }, { "epoch": 0.51, "grad_norm": 0.44865018129348755, "learning_rate": 0.0005583240141819495, "loss": 1.8762, "step": 15310 }, { "epoch": 0.51, "grad_norm": 0.42910104990005493, "learning_rate": 0.0005583186994435458, "loss": 1.8735, "step": 15311 }, { "epoch": 0.51, "grad_norm": 0.6983257532119751, "learning_rate": 0.0005583133843915812, "loss": 1.8429, "step": 15312 }, { "epoch": 0.51, "grad_norm": 0.43024417757987976, "learning_rate": 0.0005583080690260621, "loss": 1.9153, "step": 15313 }, { "epoch": 0.51, "grad_norm": 0.45344555377960205, "learning_rate": 0.0005583027533469952, "loss": 1.9017, "step": 15314 }, { "epoch": 0.51, "grad_norm": 0.4309830069541931, "learning_rate": 0.0005582974373543868, "loss": 1.8699, "step": 15315 }, { "epoch": 0.51, "grad_norm": 0.4272823929786682, "learning_rate": 0.0005582921210482434, "loss": 1.8974, "step": 15316 }, { "epoch": 0.51, "grad_norm": 0.4389766454696655, "learning_rate": 0.0005582868044285712, "loss": 1.8692, "step": 15317 }, { "epoch": 0.51, "grad_norm": 0.433554470539093, "learning_rate": 0.0005582814874953772, "loss": 1.8663, "step": 15318 }, { "epoch": 0.51, "grad_norm": 0.43253985047340393, "learning_rate": 0.0005582761702486674, "loss": 1.7431, "step": 15319 }, { "epoch": 0.51, "grad_norm": 0.4309113025665283, "learning_rate": 0.0005582708526884483, "loss": 1.829, "step": 15320 }, { "epoch": 0.51, "grad_norm": 0.4530225098133087, "learning_rate": 0.0005582655348147265, "loss": 1.859, "step": 15321 }, { "epoch": 0.51, "grad_norm": 0.4433304965496063, "learning_rate": 0.0005582602166275085, "loss": 1.9078, "step": 15322 }, { "epoch": 0.51, "grad_norm": 0.45080214738845825, "learning_rate": 0.0005582548981268005, "loss": 1.8922, "step": 15323 }, { "epoch": 0.51, "grad_norm": 0.43711474537849426, "learning_rate": 0.0005582495793126091, "loss": 1.8924, "step": 15324 }, { "epoch": 0.51, "grad_norm": 0.4744264781475067, "learning_rate": 0.0005582442601849408, "loss": 1.878, "step": 15325 }, { "epoch": 0.51, "grad_norm": 0.44658392667770386, "learning_rate": 0.000558238940743802, "loss": 1.91, "step": 15326 }, { "epoch": 0.51, "grad_norm": 0.41640299558639526, "learning_rate": 0.0005582336209891992, "loss": 1.8725, "step": 15327 }, { "epoch": 0.51, "grad_norm": 0.44558459520339966, "learning_rate": 0.0005582283009211388, "loss": 1.8699, "step": 15328 }, { "epoch": 0.51, "grad_norm": 0.44666850566864014, "learning_rate": 0.0005582229805396274, "loss": 1.7877, "step": 15329 }, { "epoch": 0.51, "grad_norm": 0.4498753845691681, "learning_rate": 0.0005582176598446713, "loss": 1.8297, "step": 15330 }, { "epoch": 0.51, "grad_norm": 0.42604538798332214, "learning_rate": 0.0005582123388362771, "loss": 1.8929, "step": 15331 }, { "epoch": 0.51, "grad_norm": 0.4240584671497345, "learning_rate": 0.000558207017514451, "loss": 1.856, "step": 15332 }, { "epoch": 0.51, "grad_norm": 0.4476080536842346, "learning_rate": 0.0005582016958791997, "loss": 1.9017, "step": 15333 }, { "epoch": 0.51, "grad_norm": 0.46129146218299866, "learning_rate": 0.0005581963739305295, "loss": 1.8991, "step": 15334 }, { "epoch": 0.51, "grad_norm": 0.4459284842014313, "learning_rate": 0.000558191051668447, "loss": 1.9202, "step": 15335 }, { "epoch": 0.51, "grad_norm": 0.43842020630836487, "learning_rate": 0.0005581857290929586, "loss": 1.8982, "step": 15336 }, { "epoch": 0.51, "grad_norm": 0.4345002770423889, "learning_rate": 0.0005581804062040708, "loss": 1.9131, "step": 15337 }, { "epoch": 0.51, "grad_norm": 0.4646436870098114, "learning_rate": 0.00055817508300179, "loss": 1.8504, "step": 15338 }, { "epoch": 0.51, "grad_norm": 0.41895297169685364, "learning_rate": 0.0005581697594861227, "loss": 1.815, "step": 15339 }, { "epoch": 0.51, "grad_norm": 0.4441705644130707, "learning_rate": 0.0005581644356570754, "loss": 1.9354, "step": 15340 }, { "epoch": 0.51, "grad_norm": 0.44407668709754944, "learning_rate": 0.0005581591115146545, "loss": 1.833, "step": 15341 }, { "epoch": 0.51, "grad_norm": 0.4284895062446594, "learning_rate": 0.0005581537870588664, "loss": 1.8035, "step": 15342 }, { "epoch": 0.51, "grad_norm": 0.46122872829437256, "learning_rate": 0.0005581484622897176, "loss": 1.8903, "step": 15343 }, { "epoch": 0.51, "grad_norm": 0.43257051706314087, "learning_rate": 0.0005581431372072148, "loss": 1.7702, "step": 15344 }, { "epoch": 0.51, "grad_norm": 0.44172704219818115, "learning_rate": 0.0005581378118113641, "loss": 1.8119, "step": 15345 }, { "epoch": 0.51, "grad_norm": 0.42853882908821106, "learning_rate": 0.0005581324861021722, "loss": 1.8217, "step": 15346 }, { "epoch": 0.51, "grad_norm": 0.42624861001968384, "learning_rate": 0.0005581271600796455, "loss": 1.8765, "step": 15347 }, { "epoch": 0.51, "grad_norm": 0.42821645736694336, "learning_rate": 0.0005581218337437904, "loss": 1.8274, "step": 15348 }, { "epoch": 0.51, "grad_norm": 0.43481752276420593, "learning_rate": 0.0005581165070946135, "loss": 1.8253, "step": 15349 }, { "epoch": 0.51, "grad_norm": 0.43036818504333496, "learning_rate": 0.0005581111801321212, "loss": 1.8859, "step": 15350 }, { "epoch": 0.51, "grad_norm": 0.4264589846134186, "learning_rate": 0.0005581058528563199, "loss": 1.8342, "step": 15351 }, { "epoch": 0.51, "grad_norm": 0.4222944676876068, "learning_rate": 0.0005581005252672161, "loss": 1.8875, "step": 15352 }, { "epoch": 0.51, "grad_norm": 0.42787662148475647, "learning_rate": 0.0005580951973648163, "loss": 1.8277, "step": 15353 }, { "epoch": 0.51, "grad_norm": 0.44775843620300293, "learning_rate": 0.000558089869149127, "loss": 1.8044, "step": 15354 }, { "epoch": 0.51, "grad_norm": 0.4121188223361969, "learning_rate": 0.0005580845406201547, "loss": 1.7829, "step": 15355 }, { "epoch": 0.51, "grad_norm": 0.4312034249305725, "learning_rate": 0.0005580792117779056, "loss": 1.8542, "step": 15356 }, { "epoch": 0.51, "grad_norm": 0.42901405692100525, "learning_rate": 0.0005580738826223865, "loss": 1.8923, "step": 15357 }, { "epoch": 0.51, "grad_norm": 0.41589099168777466, "learning_rate": 0.0005580685531536038, "loss": 1.8051, "step": 15358 }, { "epoch": 0.51, "grad_norm": 0.42066091299057007, "learning_rate": 0.0005580632233715638, "loss": 1.8029, "step": 15359 }, { "epoch": 0.51, "grad_norm": 0.42304739356040955, "learning_rate": 0.0005580578932762731, "loss": 1.8397, "step": 15360 }, { "epoch": 0.51, "grad_norm": 0.41625213623046875, "learning_rate": 0.0005580525628677381, "loss": 1.9187, "step": 15361 }, { "epoch": 0.51, "grad_norm": 0.4223507344722748, "learning_rate": 0.0005580472321459653, "loss": 1.8548, "step": 15362 }, { "epoch": 0.51, "grad_norm": 0.41520848870277405, "learning_rate": 0.0005580419011109612, "loss": 1.7894, "step": 15363 }, { "epoch": 0.51, "grad_norm": 0.4267715513706207, "learning_rate": 0.0005580365697627323, "loss": 1.792, "step": 15364 }, { "epoch": 0.51, "grad_norm": 0.4318527579307556, "learning_rate": 0.000558031238101285, "loss": 1.8343, "step": 15365 }, { "epoch": 0.51, "grad_norm": 0.4456562101840973, "learning_rate": 0.0005580259061266259, "loss": 1.9181, "step": 15366 }, { "epoch": 0.51, "grad_norm": 0.4270296096801758, "learning_rate": 0.0005580205738387611, "loss": 1.9552, "step": 15367 }, { "epoch": 0.51, "grad_norm": 0.43205970525741577, "learning_rate": 0.0005580152412376975, "loss": 1.8658, "step": 15368 }, { "epoch": 0.51, "grad_norm": 0.4234994649887085, "learning_rate": 0.0005580099083234415, "loss": 1.8374, "step": 15369 }, { "epoch": 0.51, "grad_norm": 0.43547922372817993, "learning_rate": 0.0005580045750959993, "loss": 1.846, "step": 15370 }, { "epoch": 0.51, "grad_norm": 0.4107402563095093, "learning_rate": 0.0005579992415553777, "loss": 1.771, "step": 15371 }, { "epoch": 0.51, "grad_norm": 0.4375465214252472, "learning_rate": 0.000557993907701583, "loss": 1.862, "step": 15372 }, { "epoch": 0.51, "grad_norm": 0.4187200963497162, "learning_rate": 0.0005579885735346217, "loss": 1.895, "step": 15373 }, { "epoch": 0.51, "grad_norm": 0.42772290110588074, "learning_rate": 0.0005579832390545002, "loss": 1.8284, "step": 15374 }, { "epoch": 0.51, "grad_norm": 0.4284980893135071, "learning_rate": 0.0005579779042612252, "loss": 1.8932, "step": 15375 }, { "epoch": 0.51, "grad_norm": 0.4414166212081909, "learning_rate": 0.000557972569154803, "loss": 1.8717, "step": 15376 }, { "epoch": 0.51, "grad_norm": 0.41404062509536743, "learning_rate": 0.0005579672337352401, "loss": 1.8256, "step": 15377 }, { "epoch": 0.51, "grad_norm": 0.4214441478252411, "learning_rate": 0.000557961898002543, "loss": 1.802, "step": 15378 }, { "epoch": 0.51, "grad_norm": 0.4267174005508423, "learning_rate": 0.0005579565619567181, "loss": 1.8352, "step": 15379 }, { "epoch": 0.51, "grad_norm": 0.4247537851333618, "learning_rate": 0.000557951225597772, "loss": 1.7953, "step": 15380 }, { "epoch": 0.51, "grad_norm": 0.4376578629016876, "learning_rate": 0.0005579458889257112, "loss": 1.8499, "step": 15381 }, { "epoch": 0.51, "grad_norm": 0.4162425398826599, "learning_rate": 0.0005579405519405419, "loss": 1.8607, "step": 15382 }, { "epoch": 0.51, "grad_norm": 1.2944321632385254, "learning_rate": 0.0005579352146422709, "loss": 1.8639, "step": 15383 }, { "epoch": 0.51, "grad_norm": 0.44595038890838623, "learning_rate": 0.0005579298770309046, "loss": 1.8469, "step": 15384 }, { "epoch": 0.51, "grad_norm": 0.44586658477783203, "learning_rate": 0.0005579245391064493, "loss": 1.8634, "step": 15385 }, { "epoch": 0.51, "grad_norm": 0.4475589692592621, "learning_rate": 0.0005579192008689117, "loss": 1.882, "step": 15386 }, { "epoch": 0.51, "grad_norm": 0.4434645473957062, "learning_rate": 0.0005579138623182982, "loss": 1.9306, "step": 15387 }, { "epoch": 0.51, "grad_norm": 0.4446863830089569, "learning_rate": 0.0005579085234546152, "loss": 1.9017, "step": 15388 }, { "epoch": 0.51, "grad_norm": 0.4292435944080353, "learning_rate": 0.0005579031842778693, "loss": 1.8767, "step": 15389 }, { "epoch": 0.51, "grad_norm": 0.4330173134803772, "learning_rate": 0.000557897844788067, "loss": 1.7945, "step": 15390 }, { "epoch": 0.51, "grad_norm": 0.47172847390174866, "learning_rate": 0.0005578925049852146, "loss": 1.8717, "step": 15391 }, { "epoch": 0.51, "grad_norm": 0.41819021105766296, "learning_rate": 0.0005578871648693188, "loss": 1.8881, "step": 15392 }, { "epoch": 0.51, "grad_norm": 0.42810073494911194, "learning_rate": 0.0005578818244403858, "loss": 1.8898, "step": 15393 }, { "epoch": 0.51, "grad_norm": 0.41800767183303833, "learning_rate": 0.0005578764836984225, "loss": 1.86, "step": 15394 }, { "epoch": 0.51, "grad_norm": 0.46580302715301514, "learning_rate": 0.0005578711426434351, "loss": 1.858, "step": 15395 }, { "epoch": 0.51, "grad_norm": 0.4248735308647156, "learning_rate": 0.00055786580127543, "loss": 1.8238, "step": 15396 }, { "epoch": 0.51, "grad_norm": 0.42206552624702454, "learning_rate": 0.0005578604595944139, "loss": 1.8752, "step": 15397 }, { "epoch": 0.51, "grad_norm": 0.42659592628479004, "learning_rate": 0.0005578551176003932, "loss": 1.846, "step": 15398 }, { "epoch": 0.51, "grad_norm": 0.4278206527233124, "learning_rate": 0.0005578497752933743, "loss": 1.8267, "step": 15399 }, { "epoch": 0.51, "grad_norm": 0.4252208471298218, "learning_rate": 0.0005578444326733639, "loss": 1.8635, "step": 15400 }, { "epoch": 0.51, "grad_norm": 0.43366190791130066, "learning_rate": 0.0005578390897403682, "loss": 1.8694, "step": 15401 }, { "epoch": 0.51, "grad_norm": 0.42780694365501404, "learning_rate": 0.000557833746494394, "loss": 1.7942, "step": 15402 }, { "epoch": 0.51, "grad_norm": 0.4276171028614044, "learning_rate": 0.0005578284029354474, "loss": 1.7907, "step": 15403 }, { "epoch": 0.51, "grad_norm": 0.4396132230758667, "learning_rate": 0.0005578230590635352, "loss": 1.8875, "step": 15404 }, { "epoch": 0.51, "grad_norm": 0.4289200007915497, "learning_rate": 0.0005578177148786639, "loss": 1.8603, "step": 15405 }, { "epoch": 0.51, "grad_norm": 0.4220370650291443, "learning_rate": 0.0005578123703808398, "loss": 1.8281, "step": 15406 }, { "epoch": 0.51, "grad_norm": 0.4186567962169647, "learning_rate": 0.0005578070255700695, "loss": 1.8317, "step": 15407 }, { "epoch": 0.51, "grad_norm": 0.4233248233795166, "learning_rate": 0.0005578016804463594, "loss": 1.8497, "step": 15408 }, { "epoch": 0.51, "grad_norm": 0.42772242426872253, "learning_rate": 0.0005577963350097161, "loss": 1.9093, "step": 15409 }, { "epoch": 0.51, "grad_norm": 0.42275309562683105, "learning_rate": 0.000557790989260146, "loss": 1.8291, "step": 15410 }, { "epoch": 0.51, "grad_norm": 0.4204627573490143, "learning_rate": 0.0005577856431976556, "loss": 1.9195, "step": 15411 }, { "epoch": 0.51, "grad_norm": 0.4536705017089844, "learning_rate": 0.0005577802968222515, "loss": 1.7776, "step": 15412 }, { "epoch": 0.51, "grad_norm": 0.43009698390960693, "learning_rate": 0.0005577749501339401, "loss": 1.8267, "step": 15413 }, { "epoch": 0.51, "grad_norm": 0.42662033438682556, "learning_rate": 0.0005577696031327278, "loss": 1.8547, "step": 15414 }, { "epoch": 0.51, "grad_norm": 0.43345820903778076, "learning_rate": 0.0005577642558186213, "loss": 1.8624, "step": 15415 }, { "epoch": 0.51, "grad_norm": 0.42640677094459534, "learning_rate": 0.0005577589081916268, "loss": 1.8665, "step": 15416 }, { "epoch": 0.51, "grad_norm": 0.4236682951450348, "learning_rate": 0.0005577535602517512, "loss": 1.801, "step": 15417 }, { "epoch": 0.51, "grad_norm": 0.42677319049835205, "learning_rate": 0.0005577482119990004, "loss": 1.7868, "step": 15418 }, { "epoch": 0.51, "grad_norm": 0.43434110283851624, "learning_rate": 0.0005577428634333816, "loss": 1.9018, "step": 15419 }, { "epoch": 0.51, "grad_norm": 0.4195372760295868, "learning_rate": 0.0005577375145549008, "loss": 1.9271, "step": 15420 }, { "epoch": 0.51, "grad_norm": 0.43753379583358765, "learning_rate": 0.0005577321653635647, "loss": 1.8546, "step": 15421 }, { "epoch": 0.51, "grad_norm": 0.4324348568916321, "learning_rate": 0.0005577268158593797, "loss": 1.8707, "step": 15422 }, { "epoch": 0.51, "grad_norm": 0.42557379603385925, "learning_rate": 0.0005577214660423522, "loss": 1.8544, "step": 15423 }, { "epoch": 0.51, "grad_norm": 0.4276827871799469, "learning_rate": 0.0005577161159124889, "loss": 1.7135, "step": 15424 }, { "epoch": 0.51, "grad_norm": 0.43376243114471436, "learning_rate": 0.0005577107654697963, "loss": 1.802, "step": 15425 }, { "epoch": 0.51, "grad_norm": 0.45333972573280334, "learning_rate": 0.0005577054147142806, "loss": 1.8725, "step": 15426 }, { "epoch": 0.51, "grad_norm": 0.42583346366882324, "learning_rate": 0.0005577000636459487, "loss": 1.8778, "step": 15427 }, { "epoch": 0.51, "grad_norm": 0.42383649945259094, "learning_rate": 0.0005576947122648068, "loss": 1.8798, "step": 15428 }, { "epoch": 0.51, "grad_norm": 0.4337078332901001, "learning_rate": 0.0005576893605708615, "loss": 1.8637, "step": 15429 }, { "epoch": 0.51, "grad_norm": 0.42858967185020447, "learning_rate": 0.0005576840085641193, "loss": 1.7576, "step": 15430 }, { "epoch": 0.51, "grad_norm": 0.4428879916667938, "learning_rate": 0.0005576786562445866, "loss": 1.8737, "step": 15431 }, { "epoch": 0.51, "grad_norm": 0.42190539836883545, "learning_rate": 0.00055767330361227, "loss": 1.8897, "step": 15432 }, { "epoch": 0.51, "grad_norm": 0.41887596249580383, "learning_rate": 0.000557667950667176, "loss": 1.839, "step": 15433 }, { "epoch": 0.51, "grad_norm": 0.432050883769989, "learning_rate": 0.0005576625974093111, "loss": 1.9576, "step": 15434 }, { "epoch": 0.51, "grad_norm": 0.4346857964992523, "learning_rate": 0.0005576572438386818, "loss": 1.8812, "step": 15435 }, { "epoch": 0.51, "grad_norm": 0.42259204387664795, "learning_rate": 0.0005576518899552945, "loss": 1.9214, "step": 15436 }, { "epoch": 0.51, "grad_norm": 0.4415132403373718, "learning_rate": 0.0005576465357591559, "loss": 1.8776, "step": 15437 }, { "epoch": 0.51, "grad_norm": 0.42027100920677185, "learning_rate": 0.0005576411812502722, "loss": 1.8496, "step": 15438 }, { "epoch": 0.51, "grad_norm": 0.4369215667247772, "learning_rate": 0.0005576358264286501, "loss": 1.9352, "step": 15439 }, { "epoch": 0.51, "grad_norm": 0.4244058430194855, "learning_rate": 0.0005576304712942962, "loss": 1.9298, "step": 15440 }, { "epoch": 0.51, "grad_norm": 0.4287278652191162, "learning_rate": 0.0005576251158472167, "loss": 1.8067, "step": 15441 }, { "epoch": 0.51, "grad_norm": 0.42811983823776245, "learning_rate": 0.0005576197600874183, "loss": 1.8448, "step": 15442 }, { "epoch": 0.51, "grad_norm": 0.4314712882041931, "learning_rate": 0.0005576144040149076, "loss": 1.8967, "step": 15443 }, { "epoch": 0.51, "grad_norm": 0.4253188371658325, "learning_rate": 0.0005576090476296908, "loss": 1.8645, "step": 15444 }, { "epoch": 0.51, "grad_norm": 0.4379149079322815, "learning_rate": 0.0005576036909317747, "loss": 1.852, "step": 15445 }, { "epoch": 0.51, "grad_norm": 0.45010802149772644, "learning_rate": 0.0005575983339211656, "loss": 1.8199, "step": 15446 }, { "epoch": 0.51, "grad_norm": 0.42978233098983765, "learning_rate": 0.00055759297659787, "loss": 1.9315, "step": 15447 }, { "epoch": 0.51, "grad_norm": 0.42508405447006226, "learning_rate": 0.0005575876189618947, "loss": 1.8962, "step": 15448 }, { "epoch": 0.51, "grad_norm": 0.4193631708621979, "learning_rate": 0.0005575822610132459, "loss": 1.8651, "step": 15449 }, { "epoch": 0.51, "grad_norm": 0.43088284134864807, "learning_rate": 0.0005575769027519301, "loss": 1.829, "step": 15450 }, { "epoch": 0.51, "grad_norm": 0.4342752695083618, "learning_rate": 0.000557571544177954, "loss": 1.8447, "step": 15451 }, { "epoch": 0.51, "grad_norm": 0.4223009943962097, "learning_rate": 0.0005575661852913239, "loss": 1.8262, "step": 15452 }, { "epoch": 0.51, "grad_norm": 0.4299643635749817, "learning_rate": 0.0005575608260920465, "loss": 1.9183, "step": 15453 }, { "epoch": 0.51, "grad_norm": 0.43076592683792114, "learning_rate": 0.0005575554665801281, "loss": 1.9634, "step": 15454 }, { "epoch": 0.51, "grad_norm": 0.4186311960220337, "learning_rate": 0.0005575501067555754, "loss": 1.8527, "step": 15455 }, { "epoch": 0.51, "grad_norm": 0.43239420652389526, "learning_rate": 0.0005575447466183948, "loss": 1.7613, "step": 15456 }, { "epoch": 0.51, "grad_norm": 0.41500869393348694, "learning_rate": 0.0005575393861685928, "loss": 1.8519, "step": 15457 }, { "epoch": 0.51, "grad_norm": 0.4302661418914795, "learning_rate": 0.0005575340254061759, "loss": 1.8872, "step": 15458 }, { "epoch": 0.51, "grad_norm": 0.40597105026245117, "learning_rate": 0.0005575286643311507, "loss": 1.7974, "step": 15459 }, { "epoch": 0.51, "grad_norm": 0.4157026410102844, "learning_rate": 0.0005575233029435237, "loss": 1.8873, "step": 15460 }, { "epoch": 0.51, "grad_norm": 0.4297885298728943, "learning_rate": 0.0005575179412433013, "loss": 1.7975, "step": 15461 }, { "epoch": 0.51, "grad_norm": 0.43521541357040405, "learning_rate": 0.00055751257923049, "loss": 1.8278, "step": 15462 }, { "epoch": 0.51, "grad_norm": 0.4251174330711365, "learning_rate": 0.0005575072169050965, "loss": 1.7005, "step": 15463 }, { "epoch": 0.51, "grad_norm": 0.4438226819038391, "learning_rate": 0.000557501854267127, "loss": 1.8811, "step": 15464 }, { "epoch": 0.51, "grad_norm": 0.4508385956287384, "learning_rate": 0.0005574964913165883, "loss": 1.9224, "step": 15465 }, { "epoch": 0.51, "grad_norm": 0.44020533561706543, "learning_rate": 0.0005574911280534868, "loss": 1.8296, "step": 15466 }, { "epoch": 0.51, "grad_norm": 0.44606274366378784, "learning_rate": 0.000557485764477829, "loss": 1.9331, "step": 15467 }, { "epoch": 0.51, "grad_norm": 0.4355356991291046, "learning_rate": 0.0005574804005896214, "loss": 1.7639, "step": 15468 }, { "epoch": 0.51, "grad_norm": 0.4646397531032562, "learning_rate": 0.0005574750363888705, "loss": 1.8767, "step": 15469 }, { "epoch": 0.51, "grad_norm": 0.4398011267185211, "learning_rate": 0.0005574696718755829, "loss": 1.8974, "step": 15470 }, { "epoch": 0.51, "grad_norm": 0.41952425241470337, "learning_rate": 0.0005574643070497651, "loss": 1.8253, "step": 15471 }, { "epoch": 0.51, "grad_norm": 0.4099951982498169, "learning_rate": 0.0005574589419114235, "loss": 1.8751, "step": 15472 }, { "epoch": 0.51, "grad_norm": 0.5129289627075195, "learning_rate": 0.0005574535764605647, "loss": 1.9158, "step": 15473 }, { "epoch": 0.51, "grad_norm": 0.4445440471172333, "learning_rate": 0.0005574482106971951, "loss": 1.9082, "step": 15474 }, { "epoch": 0.51, "grad_norm": 0.4459320306777954, "learning_rate": 0.0005574428446213215, "loss": 1.8603, "step": 15475 }, { "epoch": 0.51, "grad_norm": 0.4216578006744385, "learning_rate": 0.0005574374782329501, "loss": 1.8326, "step": 15476 }, { "epoch": 0.51, "grad_norm": 0.47255176305770874, "learning_rate": 0.0005574321115320875, "loss": 1.8467, "step": 15477 }, { "epoch": 0.51, "grad_norm": 0.4449932873249054, "learning_rate": 0.0005574267445187404, "loss": 1.8523, "step": 15478 }, { "epoch": 0.51, "grad_norm": 0.42383646965026855, "learning_rate": 0.000557421377192915, "loss": 1.785, "step": 15479 }, { "epoch": 0.52, "grad_norm": 0.45349645614624023, "learning_rate": 0.000557416009554618, "loss": 1.8955, "step": 15480 }, { "epoch": 0.52, "grad_norm": 0.4306921660900116, "learning_rate": 0.0005574106416038559, "loss": 1.8092, "step": 15481 }, { "epoch": 0.52, "grad_norm": 0.4247809648513794, "learning_rate": 0.0005574052733406352, "loss": 1.9539, "step": 15482 }, { "epoch": 0.52, "grad_norm": 0.4463363289833069, "learning_rate": 0.0005573999047649625, "loss": 1.8551, "step": 15483 }, { "epoch": 0.52, "grad_norm": 0.4334027171134949, "learning_rate": 0.0005573945358768442, "loss": 1.7821, "step": 15484 }, { "epoch": 0.52, "grad_norm": 0.410921573638916, "learning_rate": 0.0005573891666762869, "loss": 1.7807, "step": 15485 }, { "epoch": 0.52, "grad_norm": 0.4356740415096283, "learning_rate": 0.000557383797163297, "loss": 1.8633, "step": 15486 }, { "epoch": 0.52, "grad_norm": 0.4248102903366089, "learning_rate": 0.0005573784273378812, "loss": 1.8407, "step": 15487 }, { "epoch": 0.52, "grad_norm": 0.4458681643009186, "learning_rate": 0.0005573730572000458, "loss": 1.9057, "step": 15488 }, { "epoch": 0.52, "grad_norm": 0.4345322847366333, "learning_rate": 0.0005573676867497975, "loss": 1.8379, "step": 15489 }, { "epoch": 0.52, "grad_norm": 0.4282851219177246, "learning_rate": 0.0005573623159871428, "loss": 1.8449, "step": 15490 }, { "epoch": 0.52, "grad_norm": 0.41822803020477295, "learning_rate": 0.000557356944912088, "loss": 1.8479, "step": 15491 }, { "epoch": 0.52, "grad_norm": 0.45201608538627625, "learning_rate": 0.0005573515735246399, "loss": 1.8677, "step": 15492 }, { "epoch": 0.52, "grad_norm": 0.42571765184402466, "learning_rate": 0.0005573462018248049, "loss": 1.8622, "step": 15493 }, { "epoch": 0.52, "grad_norm": 0.4344915449619293, "learning_rate": 0.0005573408298125894, "loss": 1.8794, "step": 15494 }, { "epoch": 0.52, "grad_norm": 0.42781832814216614, "learning_rate": 0.0005573354574880002, "loss": 1.8941, "step": 15495 }, { "epoch": 0.52, "grad_norm": 0.43309149146080017, "learning_rate": 0.0005573300848510437, "loss": 1.8735, "step": 15496 }, { "epoch": 0.52, "grad_norm": 0.43758636713027954, "learning_rate": 0.0005573247119017264, "loss": 1.8895, "step": 15497 }, { "epoch": 0.52, "grad_norm": 0.4596620798110962, "learning_rate": 0.0005573193386400546, "loss": 1.8618, "step": 15498 }, { "epoch": 0.52, "grad_norm": 0.4402751326560974, "learning_rate": 0.0005573139650660353, "loss": 1.8054, "step": 15499 }, { "epoch": 0.52, "grad_norm": 0.4205799996852875, "learning_rate": 0.0005573085911796746, "loss": 1.8802, "step": 15500 }, { "epoch": 0.52, "grad_norm": 0.4164286255836487, "learning_rate": 0.0005573032169809792, "loss": 1.8465, "step": 15501 }, { "epoch": 0.52, "grad_norm": 0.4471254348754883, "learning_rate": 0.0005572978424699557, "loss": 1.8718, "step": 15502 }, { "epoch": 0.52, "grad_norm": 0.4401874542236328, "learning_rate": 0.0005572924676466105, "loss": 1.8424, "step": 15503 }, { "epoch": 0.52, "grad_norm": 0.45982590317726135, "learning_rate": 0.00055728709251095, "loss": 1.8778, "step": 15504 }, { "epoch": 0.52, "grad_norm": 0.4316844344139099, "learning_rate": 0.000557281717062981, "loss": 1.7851, "step": 15505 }, { "epoch": 0.52, "grad_norm": 0.43945273756980896, "learning_rate": 0.0005572763413027099, "loss": 1.8082, "step": 15506 }, { "epoch": 0.52, "grad_norm": 0.44835811853408813, "learning_rate": 0.0005572709652301432, "loss": 1.7635, "step": 15507 }, { "epoch": 0.52, "grad_norm": 0.45185595750808716, "learning_rate": 0.0005572655888452875, "loss": 1.8616, "step": 15508 }, { "epoch": 0.52, "grad_norm": 0.4432307481765747, "learning_rate": 0.0005572602121481493, "loss": 1.8889, "step": 15509 }, { "epoch": 0.52, "grad_norm": 0.4580113887786865, "learning_rate": 0.000557254835138735, "loss": 1.9495, "step": 15510 }, { "epoch": 0.52, "grad_norm": 0.41965076327323914, "learning_rate": 0.0005572494578170514, "loss": 1.894, "step": 15511 }, { "epoch": 0.52, "grad_norm": 0.43338969349861145, "learning_rate": 0.0005572440801831048, "loss": 1.9278, "step": 15512 }, { "epoch": 0.52, "grad_norm": 0.4356643557548523, "learning_rate": 0.0005572387022369017, "loss": 1.8171, "step": 15513 }, { "epoch": 0.52, "grad_norm": 0.4495495855808258, "learning_rate": 0.0005572333239784489, "loss": 1.7752, "step": 15514 }, { "epoch": 0.52, "grad_norm": 0.43566277623176575, "learning_rate": 0.0005572279454077525, "loss": 1.8652, "step": 15515 }, { "epoch": 0.52, "grad_norm": 0.42244791984558105, "learning_rate": 0.0005572225665248195, "loss": 1.9161, "step": 15516 }, { "epoch": 0.52, "grad_norm": 0.42757105827331543, "learning_rate": 0.0005572171873296561, "loss": 1.7665, "step": 15517 }, { "epoch": 0.52, "grad_norm": 0.4319995939731598, "learning_rate": 0.0005572118078222689, "loss": 1.8812, "step": 15518 }, { "epoch": 0.52, "grad_norm": 0.43899795413017273, "learning_rate": 0.0005572064280026645, "loss": 1.8885, "step": 15519 }, { "epoch": 0.52, "grad_norm": 0.43888556957244873, "learning_rate": 0.0005572010478708493, "loss": 1.8486, "step": 15520 }, { "epoch": 0.52, "grad_norm": 0.42888596653938293, "learning_rate": 0.0005571956674268301, "loss": 1.8683, "step": 15521 }, { "epoch": 0.52, "grad_norm": 0.4312923550605774, "learning_rate": 0.0005571902866706132, "loss": 1.8976, "step": 15522 }, { "epoch": 0.52, "grad_norm": 0.43084225058555603, "learning_rate": 0.0005571849056022052, "loss": 1.8265, "step": 15523 }, { "epoch": 0.52, "grad_norm": 0.4118911325931549, "learning_rate": 0.0005571795242216125, "loss": 1.7676, "step": 15524 }, { "epoch": 0.52, "grad_norm": 0.48670002818107605, "learning_rate": 0.0005571741425288419, "loss": 1.8553, "step": 15525 }, { "epoch": 0.52, "grad_norm": 0.47106507420539856, "learning_rate": 0.0005571687605238996, "loss": 1.8313, "step": 15526 }, { "epoch": 0.52, "grad_norm": 0.4669274687767029, "learning_rate": 0.0005571633782067924, "loss": 1.9202, "step": 15527 }, { "epoch": 0.52, "grad_norm": 0.4559931457042694, "learning_rate": 0.0005571579955775269, "loss": 1.8612, "step": 15528 }, { "epoch": 0.52, "grad_norm": 0.427581787109375, "learning_rate": 0.0005571526126361094, "loss": 1.9053, "step": 15529 }, { "epoch": 0.52, "grad_norm": 0.4560372829437256, "learning_rate": 0.0005571472293825465, "loss": 1.763, "step": 15530 }, { "epoch": 0.52, "grad_norm": 0.4796617329120636, "learning_rate": 0.0005571418458168447, "loss": 1.8953, "step": 15531 }, { "epoch": 0.52, "grad_norm": 0.4385775923728943, "learning_rate": 0.0005571364619390107, "loss": 1.8306, "step": 15532 }, { "epoch": 0.52, "grad_norm": 0.4296518564224243, "learning_rate": 0.0005571310777490508, "loss": 1.8397, "step": 15533 }, { "epoch": 0.52, "grad_norm": 0.4634178876876831, "learning_rate": 0.0005571256932469717, "loss": 1.8236, "step": 15534 }, { "epoch": 0.52, "grad_norm": 0.4415607452392578, "learning_rate": 0.0005571203084327799, "loss": 1.8631, "step": 15535 }, { "epoch": 0.52, "grad_norm": 0.4353099465370178, "learning_rate": 0.0005571149233064819, "loss": 1.8725, "step": 15536 }, { "epoch": 0.52, "grad_norm": 0.44335833191871643, "learning_rate": 0.0005571095378680842, "loss": 1.8947, "step": 15537 }, { "epoch": 0.52, "grad_norm": 0.434659481048584, "learning_rate": 0.0005571041521175936, "loss": 1.8685, "step": 15538 }, { "epoch": 0.52, "grad_norm": 0.42361241579055786, "learning_rate": 0.0005570987660550163, "loss": 1.8253, "step": 15539 }, { "epoch": 0.52, "grad_norm": 0.4131951332092285, "learning_rate": 0.000557093379680359, "loss": 1.8203, "step": 15540 }, { "epoch": 0.52, "grad_norm": 0.41481924057006836, "learning_rate": 0.0005570879929936284, "loss": 1.8686, "step": 15541 }, { "epoch": 0.52, "grad_norm": 0.41934138536453247, "learning_rate": 0.0005570826059948306, "loss": 1.8037, "step": 15542 }, { "epoch": 0.52, "grad_norm": 0.44407373666763306, "learning_rate": 0.0005570772186839725, "loss": 1.8976, "step": 15543 }, { "epoch": 0.52, "grad_norm": 0.4383121430873871, "learning_rate": 0.0005570718310610605, "loss": 1.8969, "step": 15544 }, { "epoch": 0.52, "grad_norm": 0.4304121434688568, "learning_rate": 0.0005570664431261012, "loss": 1.8743, "step": 15545 }, { "epoch": 0.52, "grad_norm": 0.42284074425697327, "learning_rate": 0.0005570610548791011, "loss": 1.8409, "step": 15546 }, { "epoch": 0.52, "grad_norm": 0.45044395327568054, "learning_rate": 0.0005570556663200668, "loss": 1.809, "step": 15547 }, { "epoch": 0.52, "grad_norm": 0.4683808982372284, "learning_rate": 0.0005570502774490048, "loss": 1.8111, "step": 15548 }, { "epoch": 0.52, "grad_norm": 0.7726584672927856, "learning_rate": 0.0005570448882659217, "loss": 1.8313, "step": 15549 }, { "epoch": 0.52, "grad_norm": 0.4440973997116089, "learning_rate": 0.0005570394987708237, "loss": 1.8359, "step": 15550 }, { "epoch": 0.52, "grad_norm": 0.4322557747364044, "learning_rate": 0.0005570341089637179, "loss": 1.9321, "step": 15551 }, { "epoch": 0.52, "grad_norm": 0.4329167306423187, "learning_rate": 0.0005570287188446105, "loss": 1.8845, "step": 15552 }, { "epoch": 0.52, "grad_norm": 0.42498376965522766, "learning_rate": 0.000557023328413508, "loss": 1.9319, "step": 15553 }, { "epoch": 0.52, "grad_norm": 0.43012097477912903, "learning_rate": 0.0005570179376704172, "loss": 2.0065, "step": 15554 }, { "epoch": 0.52, "grad_norm": 0.44258666038513184, "learning_rate": 0.0005570125466153444, "loss": 1.8951, "step": 15555 }, { "epoch": 0.52, "grad_norm": 0.4434777796268463, "learning_rate": 0.0005570071552482963, "loss": 1.8654, "step": 15556 }, { "epoch": 0.52, "grad_norm": 0.4357534646987915, "learning_rate": 0.0005570017635692793, "loss": 1.8941, "step": 15557 }, { "epoch": 0.52, "grad_norm": 0.42304179072380066, "learning_rate": 0.0005569963715783002, "loss": 1.7566, "step": 15558 }, { "epoch": 0.52, "grad_norm": 0.42959603667259216, "learning_rate": 0.0005569909792753652, "loss": 1.8736, "step": 15559 }, { "epoch": 0.52, "grad_norm": 0.43074288964271545, "learning_rate": 0.000556985586660481, "loss": 1.9315, "step": 15560 }, { "epoch": 0.52, "grad_norm": 0.4169415235519409, "learning_rate": 0.0005569801937336542, "loss": 1.8474, "step": 15561 }, { "epoch": 0.52, "grad_norm": 0.4596831798553467, "learning_rate": 0.0005569748004948914, "loss": 1.9062, "step": 15562 }, { "epoch": 0.52, "grad_norm": 0.43840330839157104, "learning_rate": 0.000556969406944199, "loss": 1.8909, "step": 15563 }, { "epoch": 0.52, "grad_norm": 0.44676753878593445, "learning_rate": 0.0005569640130815835, "loss": 1.8306, "step": 15564 }, { "epoch": 0.52, "grad_norm": 0.43338698148727417, "learning_rate": 0.0005569586189070517, "loss": 1.8064, "step": 15565 }, { "epoch": 0.52, "grad_norm": 0.4224487841129303, "learning_rate": 0.0005569532244206099, "loss": 1.8275, "step": 15566 }, { "epoch": 0.52, "grad_norm": 0.430987685918808, "learning_rate": 0.0005569478296222648, "loss": 1.8724, "step": 15567 }, { "epoch": 0.52, "grad_norm": 0.4311853051185608, "learning_rate": 0.0005569424345120229, "loss": 1.8943, "step": 15568 }, { "epoch": 0.52, "grad_norm": 0.43675726652145386, "learning_rate": 0.0005569370390898907, "loss": 1.8362, "step": 15569 }, { "epoch": 0.52, "grad_norm": 0.42671167850494385, "learning_rate": 0.0005569316433558749, "loss": 1.8084, "step": 15570 }, { "epoch": 0.52, "grad_norm": 0.4266829490661621, "learning_rate": 0.0005569262473099818, "loss": 1.912, "step": 15571 }, { "epoch": 0.52, "grad_norm": 0.4337296187877655, "learning_rate": 0.000556920850952218, "loss": 1.8353, "step": 15572 }, { "epoch": 0.52, "grad_norm": 0.4265703856945038, "learning_rate": 0.0005569154542825903, "loss": 1.8636, "step": 15573 }, { "epoch": 0.52, "grad_norm": 0.4795566201210022, "learning_rate": 0.0005569100573011051, "loss": 1.8499, "step": 15574 }, { "epoch": 0.52, "grad_norm": 0.43474918603897095, "learning_rate": 0.000556904660007769, "loss": 1.8198, "step": 15575 }, { "epoch": 0.52, "grad_norm": 0.4296582043170929, "learning_rate": 0.0005568992624025884, "loss": 1.8141, "step": 15576 }, { "epoch": 0.52, "grad_norm": 0.43168842792510986, "learning_rate": 0.00055689386448557, "loss": 1.8748, "step": 15577 }, { "epoch": 0.52, "grad_norm": 0.448131799697876, "learning_rate": 0.0005568884662567203, "loss": 1.8654, "step": 15578 }, { "epoch": 0.52, "grad_norm": 0.43635404109954834, "learning_rate": 0.0005568830677160458, "loss": 1.9494, "step": 15579 }, { "epoch": 0.52, "grad_norm": 0.4497193396091461, "learning_rate": 0.0005568776688635531, "loss": 1.9145, "step": 15580 }, { "epoch": 0.52, "grad_norm": 0.42654696106910706, "learning_rate": 0.0005568722696992488, "loss": 1.8417, "step": 15581 }, { "epoch": 0.52, "grad_norm": 0.43250569701194763, "learning_rate": 0.0005568668702231394, "loss": 1.8873, "step": 15582 }, { "epoch": 0.52, "grad_norm": 0.43063125014305115, "learning_rate": 0.0005568614704352315, "loss": 1.8756, "step": 15583 }, { "epoch": 0.52, "grad_norm": 0.4302690327167511, "learning_rate": 0.0005568560703355316, "loss": 1.8952, "step": 15584 }, { "epoch": 0.52, "grad_norm": 0.45281922817230225, "learning_rate": 0.0005568506699240463, "loss": 1.8206, "step": 15585 }, { "epoch": 0.52, "grad_norm": 0.42970624566078186, "learning_rate": 0.000556845269200782, "loss": 1.8602, "step": 15586 }, { "epoch": 0.52, "grad_norm": 0.4409204423427582, "learning_rate": 0.0005568398681657455, "loss": 1.8401, "step": 15587 }, { "epoch": 0.52, "grad_norm": 0.4334104359149933, "learning_rate": 0.0005568344668189433, "loss": 1.9056, "step": 15588 }, { "epoch": 0.52, "grad_norm": 0.42814525961875916, "learning_rate": 0.0005568290651603818, "loss": 1.8539, "step": 15589 }, { "epoch": 0.52, "grad_norm": 0.4259592890739441, "learning_rate": 0.0005568236631900677, "loss": 1.8196, "step": 15590 }, { "epoch": 0.52, "grad_norm": 0.4302138388156891, "learning_rate": 0.0005568182609080075, "loss": 1.7934, "step": 15591 }, { "epoch": 0.52, "grad_norm": 0.4277327060699463, "learning_rate": 0.0005568128583142077, "loss": 1.9092, "step": 15592 }, { "epoch": 0.52, "grad_norm": 0.43354353308677673, "learning_rate": 0.000556807455408675, "loss": 1.8678, "step": 15593 }, { "epoch": 0.52, "grad_norm": 0.42721492052078247, "learning_rate": 0.0005568020521914159, "loss": 1.8737, "step": 15594 }, { "epoch": 0.52, "grad_norm": 0.4420888125896454, "learning_rate": 0.0005567966486624369, "loss": 1.8319, "step": 15595 }, { "epoch": 0.52, "grad_norm": 0.4382832944393158, "learning_rate": 0.0005567912448217447, "loss": 1.9944, "step": 15596 }, { "epoch": 0.52, "grad_norm": 0.43064895272254944, "learning_rate": 0.0005567858406693457, "loss": 1.834, "step": 15597 }, { "epoch": 0.52, "grad_norm": 0.4288688898086548, "learning_rate": 0.0005567804362052465, "loss": 1.7634, "step": 15598 }, { "epoch": 0.52, "grad_norm": 0.5457156300544739, "learning_rate": 0.0005567750314294537, "loss": 1.9075, "step": 15599 }, { "epoch": 0.52, "grad_norm": 0.44499704241752625, "learning_rate": 0.000556769626341974, "loss": 1.8405, "step": 15600 }, { "epoch": 0.52, "grad_norm": 0.4277693033218384, "learning_rate": 0.0005567642209428136, "loss": 1.8932, "step": 15601 }, { "epoch": 0.52, "grad_norm": 0.4414565861225128, "learning_rate": 0.0005567588152319793, "loss": 1.8624, "step": 15602 }, { "epoch": 0.52, "grad_norm": 0.4372525215148926, "learning_rate": 0.0005567534092094776, "loss": 1.8382, "step": 15603 }, { "epoch": 0.52, "grad_norm": 0.43338271975517273, "learning_rate": 0.0005567480028753153, "loss": 1.8614, "step": 15604 }, { "epoch": 0.52, "grad_norm": 0.414451003074646, "learning_rate": 0.0005567425962294985, "loss": 1.7693, "step": 15605 }, { "epoch": 0.52, "grad_norm": 0.43597710132598877, "learning_rate": 0.0005567371892720342, "loss": 1.84, "step": 15606 }, { "epoch": 0.52, "grad_norm": 0.44580501317977905, "learning_rate": 0.0005567317820029286, "loss": 1.8686, "step": 15607 }, { "epoch": 0.52, "grad_norm": 0.427560418844223, "learning_rate": 0.0005567263744221887, "loss": 1.9334, "step": 15608 }, { "epoch": 0.52, "grad_norm": 0.43040716648101807, "learning_rate": 0.0005567209665298206, "loss": 1.8383, "step": 15609 }, { "epoch": 0.52, "grad_norm": 0.4370681643486023, "learning_rate": 0.0005567155583258312, "loss": 1.8675, "step": 15610 }, { "epoch": 0.52, "grad_norm": 0.42913079261779785, "learning_rate": 0.0005567101498102269, "loss": 1.8291, "step": 15611 }, { "epoch": 0.52, "grad_norm": 0.4193223714828491, "learning_rate": 0.0005567047409830142, "loss": 1.8353, "step": 15612 }, { "epoch": 0.52, "grad_norm": 0.4304560720920563, "learning_rate": 0.0005566993318441999, "loss": 1.8345, "step": 15613 }, { "epoch": 0.52, "grad_norm": 0.4211953282356262, "learning_rate": 0.0005566939223937904, "loss": 1.8568, "step": 15614 }, { "epoch": 0.52, "grad_norm": 0.4204704165458679, "learning_rate": 0.0005566885126317922, "loss": 1.8477, "step": 15615 }, { "epoch": 0.52, "grad_norm": 0.4414638578891754, "learning_rate": 0.0005566831025582121, "loss": 1.8522, "step": 15616 }, { "epoch": 0.52, "grad_norm": 0.42603442072868347, "learning_rate": 0.0005566776921730565, "loss": 1.8942, "step": 15617 }, { "epoch": 0.52, "grad_norm": 0.42820584774017334, "learning_rate": 0.000556672281476332, "loss": 1.8227, "step": 15618 }, { "epoch": 0.52, "grad_norm": 0.43445631861686707, "learning_rate": 0.0005566668704680451, "loss": 1.8469, "step": 15619 }, { "epoch": 0.52, "grad_norm": 0.4415956735610962, "learning_rate": 0.0005566614591482025, "loss": 1.8245, "step": 15620 }, { "epoch": 0.52, "grad_norm": 0.4316364824771881, "learning_rate": 0.0005566560475168107, "loss": 1.8771, "step": 15621 }, { "epoch": 0.52, "grad_norm": 0.44313952326774597, "learning_rate": 0.0005566506355738763, "loss": 1.8144, "step": 15622 }, { "epoch": 0.52, "grad_norm": 0.4250243604183197, "learning_rate": 0.0005566452233194058, "loss": 1.8158, "step": 15623 }, { "epoch": 0.52, "grad_norm": 0.42637932300567627, "learning_rate": 0.0005566398107534058, "loss": 1.8177, "step": 15624 }, { "epoch": 0.52, "grad_norm": 0.4300452768802643, "learning_rate": 0.0005566343978758829, "loss": 1.8029, "step": 15625 }, { "epoch": 0.52, "grad_norm": 0.4620189368724823, "learning_rate": 0.0005566289846868437, "loss": 1.8661, "step": 15626 }, { "epoch": 0.52, "grad_norm": 0.4261973798274994, "learning_rate": 0.0005566235711862946, "loss": 1.8559, "step": 15627 }, { "epoch": 0.52, "grad_norm": 0.46316465735435486, "learning_rate": 0.0005566181573742424, "loss": 1.8664, "step": 15628 }, { "epoch": 0.52, "grad_norm": 0.43695345520973206, "learning_rate": 0.0005566127432506936, "loss": 1.8589, "step": 15629 }, { "epoch": 0.52, "grad_norm": 0.43461933732032776, "learning_rate": 0.0005566073288156547, "loss": 1.8462, "step": 15630 }, { "epoch": 0.52, "grad_norm": 0.42943018674850464, "learning_rate": 0.0005566019140691323, "loss": 1.9023, "step": 15631 }, { "epoch": 0.52, "grad_norm": 0.4336298108100891, "learning_rate": 0.000556596499011133, "loss": 1.8841, "step": 15632 }, { "epoch": 0.52, "grad_norm": 0.43655869364738464, "learning_rate": 0.0005565910836416633, "loss": 1.8785, "step": 15633 }, { "epoch": 0.52, "grad_norm": 0.4310000538825989, "learning_rate": 0.0005565856679607299, "loss": 1.8579, "step": 15634 }, { "epoch": 0.52, "grad_norm": 0.4431482255458832, "learning_rate": 0.0005565802519683393, "loss": 1.9621, "step": 15635 }, { "epoch": 0.52, "grad_norm": 0.4399522542953491, "learning_rate": 0.000556574835664498, "loss": 1.9162, "step": 15636 }, { "epoch": 0.52, "grad_norm": 0.4450700879096985, "learning_rate": 0.0005565694190492128, "loss": 1.8613, "step": 15637 }, { "epoch": 0.52, "grad_norm": 0.4351513981819153, "learning_rate": 0.0005565640021224901, "loss": 1.8542, "step": 15638 }, { "epoch": 0.52, "grad_norm": 0.42167073488235474, "learning_rate": 0.0005565585848843364, "loss": 1.8222, "step": 15639 }, { "epoch": 0.52, "grad_norm": 0.43800732493400574, "learning_rate": 0.0005565531673347585, "loss": 1.9379, "step": 15640 }, { "epoch": 0.52, "grad_norm": 0.4357518255710602, "learning_rate": 0.0005565477494737628, "loss": 1.8488, "step": 15641 }, { "epoch": 0.52, "grad_norm": 0.43307340145111084, "learning_rate": 0.0005565423313013559, "loss": 1.9307, "step": 15642 }, { "epoch": 0.52, "grad_norm": 0.42095044255256653, "learning_rate": 0.0005565369128175444, "loss": 1.867, "step": 15643 }, { "epoch": 0.52, "grad_norm": 0.4597533941268921, "learning_rate": 0.000556531494022335, "loss": 1.8768, "step": 15644 }, { "epoch": 0.52, "grad_norm": 0.43227192759513855, "learning_rate": 0.000556526074915734, "loss": 1.8734, "step": 15645 }, { "epoch": 0.52, "grad_norm": 0.4390966296195984, "learning_rate": 0.0005565206554977482, "loss": 1.9051, "step": 15646 }, { "epoch": 0.52, "grad_norm": 0.4260241389274597, "learning_rate": 0.0005565152357683842, "loss": 1.8556, "step": 15647 }, { "epoch": 0.52, "grad_norm": 0.45301732420921326, "learning_rate": 0.0005565098157276484, "loss": 1.8923, "step": 15648 }, { "epoch": 0.52, "grad_norm": 0.42640259861946106, "learning_rate": 0.0005565043953755475, "loss": 1.8789, "step": 15649 }, { "epoch": 0.52, "grad_norm": 0.4231777489185333, "learning_rate": 0.0005564989747120881, "loss": 1.7644, "step": 15650 }, { "epoch": 0.52, "grad_norm": 0.4351930320262909, "learning_rate": 0.0005564935537372768, "loss": 1.8622, "step": 15651 }, { "epoch": 0.52, "grad_norm": 0.43901336193084717, "learning_rate": 0.0005564881324511199, "loss": 1.8328, "step": 15652 }, { "epoch": 0.52, "grad_norm": 0.4306012690067291, "learning_rate": 0.0005564827108536244, "loss": 1.8835, "step": 15653 }, { "epoch": 0.52, "grad_norm": 0.43568432331085205, "learning_rate": 0.0005564772889447966, "loss": 1.8736, "step": 15654 }, { "epoch": 0.52, "grad_norm": 0.4134973883628845, "learning_rate": 0.0005564718667246432, "loss": 1.7883, "step": 15655 }, { "epoch": 0.52, "grad_norm": 0.46298134326934814, "learning_rate": 0.0005564664441931708, "loss": 1.8486, "step": 15656 }, { "epoch": 0.52, "grad_norm": 0.4289146661758423, "learning_rate": 0.0005564610213503858, "loss": 1.8055, "step": 15657 }, { "epoch": 0.52, "grad_norm": 0.43820077180862427, "learning_rate": 0.000556455598196295, "loss": 1.8568, "step": 15658 }, { "epoch": 0.52, "grad_norm": 0.4322446286678314, "learning_rate": 0.0005564501747309048, "loss": 1.8917, "step": 15659 }, { "epoch": 0.52, "grad_norm": 0.4526808261871338, "learning_rate": 0.0005564447509542219, "loss": 1.7901, "step": 15660 }, { "epoch": 0.52, "grad_norm": 0.47450074553489685, "learning_rate": 0.0005564393268662529, "loss": 1.8498, "step": 15661 }, { "epoch": 0.52, "grad_norm": 0.42476966977119446, "learning_rate": 0.0005564339024670043, "loss": 1.8401, "step": 15662 }, { "epoch": 0.52, "grad_norm": 0.42914119362831116, "learning_rate": 0.0005564284777564828, "loss": 1.8489, "step": 15663 }, { "epoch": 0.52, "grad_norm": 0.42744243144989014, "learning_rate": 0.0005564230527346948, "loss": 1.8359, "step": 15664 }, { "epoch": 0.52, "grad_norm": 0.4345782995223999, "learning_rate": 0.000556417627401647, "loss": 1.8532, "step": 15665 }, { "epoch": 0.52, "grad_norm": 0.42916879057884216, "learning_rate": 0.000556412201757346, "loss": 1.836, "step": 15666 }, { "epoch": 0.52, "grad_norm": 0.4378364086151123, "learning_rate": 0.0005564067758017985, "loss": 1.8906, "step": 15667 }, { "epoch": 0.52, "grad_norm": 0.42005202174186707, "learning_rate": 0.0005564013495350108, "loss": 1.8803, "step": 15668 }, { "epoch": 0.52, "grad_norm": 0.4391533136367798, "learning_rate": 0.0005563959229569898, "loss": 1.8456, "step": 15669 }, { "epoch": 0.52, "grad_norm": 0.43217435479164124, "learning_rate": 0.0005563904960677418, "loss": 1.8527, "step": 15670 }, { "epoch": 0.52, "grad_norm": 0.5178616642951965, "learning_rate": 0.0005563850688672735, "loss": 1.8575, "step": 15671 }, { "epoch": 0.52, "grad_norm": 0.4394051134586334, "learning_rate": 0.0005563796413555915, "loss": 1.8628, "step": 15672 }, { "epoch": 0.52, "grad_norm": 0.43629372119903564, "learning_rate": 0.0005563742135327025, "loss": 1.9207, "step": 15673 }, { "epoch": 0.52, "grad_norm": 0.5213029384613037, "learning_rate": 0.0005563687853986129, "loss": 1.8935, "step": 15674 }, { "epoch": 0.52, "grad_norm": 0.4326315224170685, "learning_rate": 0.0005563633569533295, "loss": 1.8782, "step": 15675 }, { "epoch": 0.52, "grad_norm": 0.4286222457885742, "learning_rate": 0.0005563579281968585, "loss": 1.8572, "step": 15676 }, { "epoch": 0.52, "grad_norm": 0.4278651773929596, "learning_rate": 0.0005563524991292069, "loss": 1.8833, "step": 15677 }, { "epoch": 0.52, "grad_norm": 0.4378994107246399, "learning_rate": 0.0005563470697503811, "loss": 1.8765, "step": 15678 }, { "epoch": 0.52, "grad_norm": 0.4406753182411194, "learning_rate": 0.0005563416400603878, "loss": 1.899, "step": 15679 }, { "epoch": 0.52, "grad_norm": 0.944401741027832, "learning_rate": 0.0005563362100592334, "loss": 1.8828, "step": 15680 }, { "epoch": 0.52, "grad_norm": 0.435452401638031, "learning_rate": 0.0005563307797469248, "loss": 1.892, "step": 15681 }, { "epoch": 0.52, "grad_norm": 0.43170034885406494, "learning_rate": 0.0005563253491234683, "loss": 1.789, "step": 15682 }, { "epoch": 0.52, "grad_norm": 0.43525779247283936, "learning_rate": 0.0005563199181888705, "loss": 1.8105, "step": 15683 }, { "epoch": 0.52, "grad_norm": 0.42769932746887207, "learning_rate": 0.0005563144869431382, "loss": 1.844, "step": 15684 }, { "epoch": 0.52, "grad_norm": 0.4230341613292694, "learning_rate": 0.0005563090553862779, "loss": 1.8795, "step": 15685 }, { "epoch": 0.52, "grad_norm": 0.4259849786758423, "learning_rate": 0.0005563036235182961, "loss": 1.807, "step": 15686 }, { "epoch": 0.52, "grad_norm": 0.44779208302497864, "learning_rate": 0.0005562981913391995, "loss": 1.9511, "step": 15687 }, { "epoch": 0.52, "grad_norm": 0.4405529797077179, "learning_rate": 0.0005562927588489946, "loss": 1.8487, "step": 15688 }, { "epoch": 0.52, "grad_norm": 0.42068156599998474, "learning_rate": 0.000556287326047688, "loss": 1.8262, "step": 15689 }, { "epoch": 0.52, "grad_norm": 0.4354000389575958, "learning_rate": 0.0005562818929352865, "loss": 1.9133, "step": 15690 }, { "epoch": 0.52, "grad_norm": 0.6950392723083496, "learning_rate": 0.0005562764595117965, "loss": 1.8454, "step": 15691 }, { "epoch": 0.52, "grad_norm": 0.42380279302597046, "learning_rate": 0.0005562710257772246, "loss": 1.7806, "step": 15692 }, { "epoch": 0.52, "grad_norm": 0.428314208984375, "learning_rate": 0.0005562655917315775, "loss": 1.8277, "step": 15693 }, { "epoch": 0.52, "grad_norm": 0.43941476941108704, "learning_rate": 0.0005562601573748617, "loss": 1.8931, "step": 15694 }, { "epoch": 0.52, "grad_norm": 0.42540088295936584, "learning_rate": 0.0005562547227070838, "loss": 1.8952, "step": 15695 }, { "epoch": 0.52, "grad_norm": 0.4273897409439087, "learning_rate": 0.0005562492877282505, "loss": 1.9077, "step": 15696 }, { "epoch": 0.52, "grad_norm": 0.4318036437034607, "learning_rate": 0.0005562438524383682, "loss": 1.8423, "step": 15697 }, { "epoch": 0.52, "grad_norm": 0.44267693161964417, "learning_rate": 0.0005562384168374437, "loss": 1.8466, "step": 15698 }, { "epoch": 0.52, "grad_norm": 0.45744481682777405, "learning_rate": 0.0005562329809254835, "loss": 1.8122, "step": 15699 }, { "epoch": 0.52, "grad_norm": 0.4393000602722168, "learning_rate": 0.0005562275447024942, "loss": 1.8443, "step": 15700 }, { "epoch": 0.52, "grad_norm": 0.44180038571357727, "learning_rate": 0.0005562221081684824, "loss": 1.7707, "step": 15701 }, { "epoch": 0.52, "grad_norm": 0.4233378469944, "learning_rate": 0.0005562166713234547, "loss": 1.8586, "step": 15702 }, { "epoch": 0.52, "grad_norm": 0.42988690733909607, "learning_rate": 0.0005562112341674178, "loss": 1.8244, "step": 15703 }, { "epoch": 0.52, "grad_norm": 0.462148517370224, "learning_rate": 0.0005562057967003782, "loss": 1.9261, "step": 15704 }, { "epoch": 0.52, "grad_norm": 0.4583287239074707, "learning_rate": 0.0005562003589223424, "loss": 1.8976, "step": 15705 }, { "epoch": 0.52, "grad_norm": 0.4393197298049927, "learning_rate": 0.0005561949208333172, "loss": 1.8136, "step": 15706 }, { "epoch": 0.52, "grad_norm": 0.43493229150772095, "learning_rate": 0.0005561894824333092, "loss": 1.8488, "step": 15707 }, { "epoch": 0.52, "grad_norm": 0.44390010833740234, "learning_rate": 0.0005561840437223248, "loss": 1.901, "step": 15708 }, { "epoch": 0.52, "grad_norm": 0.43067386746406555, "learning_rate": 0.0005561786047003708, "loss": 1.8133, "step": 15709 }, { "epoch": 0.52, "grad_norm": 0.4217122495174408, "learning_rate": 0.0005561731653674536, "loss": 1.8814, "step": 15710 }, { "epoch": 0.52, "grad_norm": 0.44817838072776794, "learning_rate": 0.00055616772572358, "loss": 1.8848, "step": 15711 }, { "epoch": 0.52, "grad_norm": 0.43601617217063904, "learning_rate": 0.0005561622857687566, "loss": 1.8305, "step": 15712 }, { "epoch": 0.52, "grad_norm": 0.4848988652229309, "learning_rate": 0.0005561568455029897, "loss": 1.9177, "step": 15713 }, { "epoch": 0.52, "grad_norm": 0.43182188272476196, "learning_rate": 0.0005561514049262862, "loss": 1.9392, "step": 15714 }, { "epoch": 0.52, "grad_norm": 0.42667242884635925, "learning_rate": 0.0005561459640386528, "loss": 1.872, "step": 15715 }, { "epoch": 0.52, "grad_norm": 0.4381334185600281, "learning_rate": 0.0005561405228400959, "loss": 1.793, "step": 15716 }, { "epoch": 0.52, "grad_norm": 0.4272977411746979, "learning_rate": 0.0005561350813306221, "loss": 1.8777, "step": 15717 }, { "epoch": 0.52, "grad_norm": 0.43192920088768005, "learning_rate": 0.000556129639510238, "loss": 1.7934, "step": 15718 }, { "epoch": 0.52, "grad_norm": 0.4382492005825043, "learning_rate": 0.0005561241973789503, "loss": 1.8478, "step": 15719 }, { "epoch": 0.52, "grad_norm": 0.44451045989990234, "learning_rate": 0.0005561187549367656, "loss": 1.9128, "step": 15720 }, { "epoch": 0.52, "grad_norm": 0.4182569980621338, "learning_rate": 0.0005561133121836905, "loss": 1.8774, "step": 15721 }, { "epoch": 0.52, "grad_norm": 0.43346521258354187, "learning_rate": 0.0005561078691197314, "loss": 1.8882, "step": 15722 }, { "epoch": 0.52, "grad_norm": 0.4385942816734314, "learning_rate": 0.0005561024257448952, "loss": 1.9035, "step": 15723 }, { "epoch": 0.52, "grad_norm": 0.42873725295066833, "learning_rate": 0.0005560969820591884, "loss": 1.9142, "step": 15724 }, { "epoch": 0.52, "grad_norm": 0.4424024224281311, "learning_rate": 0.0005560915380626176, "loss": 1.91, "step": 15725 }, { "epoch": 0.52, "grad_norm": 0.4336337745189667, "learning_rate": 0.0005560860937551894, "loss": 1.8741, "step": 15726 }, { "epoch": 0.52, "grad_norm": 0.4456844925880432, "learning_rate": 0.0005560806491369104, "loss": 1.8797, "step": 15727 }, { "epoch": 0.52, "grad_norm": 0.45395606756210327, "learning_rate": 0.0005560752042077871, "loss": 1.8437, "step": 15728 }, { "epoch": 0.52, "grad_norm": 0.42904049158096313, "learning_rate": 0.0005560697589678265, "loss": 1.8264, "step": 15729 }, { "epoch": 0.52, "grad_norm": 0.4338851571083069, "learning_rate": 0.0005560643134170347, "loss": 1.8675, "step": 15730 }, { "epoch": 0.52, "grad_norm": 0.4371485412120819, "learning_rate": 0.0005560588675554187, "loss": 1.7922, "step": 15731 }, { "epoch": 0.52, "grad_norm": 0.4509299099445343, "learning_rate": 0.000556053421382985, "loss": 1.8823, "step": 15732 }, { "epoch": 0.52, "grad_norm": 0.46434158086776733, "learning_rate": 0.0005560479748997401, "loss": 1.8871, "step": 15733 }, { "epoch": 0.52, "grad_norm": 0.4350427985191345, "learning_rate": 0.0005560425281056906, "loss": 1.7901, "step": 15734 }, { "epoch": 0.52, "grad_norm": 0.43430015444755554, "learning_rate": 0.0005560370810008433, "loss": 1.7742, "step": 15735 }, { "epoch": 0.52, "grad_norm": 0.4538322389125824, "learning_rate": 0.0005560316335852046, "loss": 1.8864, "step": 15736 }, { "epoch": 0.52, "grad_norm": 0.44988059997558594, "learning_rate": 0.0005560261858587813, "loss": 1.8408, "step": 15737 }, { "epoch": 0.52, "grad_norm": 0.45861154794692993, "learning_rate": 0.0005560207378215799, "loss": 1.8101, "step": 15738 }, { "epoch": 0.52, "grad_norm": 0.43198367953300476, "learning_rate": 0.0005560152894736072, "loss": 1.8526, "step": 15739 }, { "epoch": 0.52, "grad_norm": 0.45093798637390137, "learning_rate": 0.0005560098408148695, "loss": 1.8342, "step": 15740 }, { "epoch": 0.52, "grad_norm": 0.45093879103660583, "learning_rate": 0.0005560043918453737, "loss": 1.909, "step": 15741 }, { "epoch": 0.52, "grad_norm": 0.45416849851608276, "learning_rate": 0.0005559989425651262, "loss": 1.9383, "step": 15742 }, { "epoch": 0.52, "grad_norm": 0.44105643033981323, "learning_rate": 0.0005559934929741337, "loss": 1.8693, "step": 15743 }, { "epoch": 0.52, "grad_norm": 0.4329020380973816, "learning_rate": 0.0005559880430724027, "loss": 1.8847, "step": 15744 }, { "epoch": 0.52, "grad_norm": 0.4669928252696991, "learning_rate": 0.0005559825928599402, "loss": 1.8903, "step": 15745 }, { "epoch": 0.52, "grad_norm": 0.441087007522583, "learning_rate": 0.0005559771423367523, "loss": 1.8602, "step": 15746 }, { "epoch": 0.52, "grad_norm": 0.44653674960136414, "learning_rate": 0.000555971691502846, "loss": 1.8554, "step": 15747 }, { "epoch": 0.52, "grad_norm": 0.438068151473999, "learning_rate": 0.0005559662403582279, "loss": 1.8516, "step": 15748 }, { "epoch": 0.52, "grad_norm": 0.44963282346725464, "learning_rate": 0.0005559607889029042, "loss": 1.9031, "step": 15749 }, { "epoch": 0.52, "grad_norm": 0.43239468336105347, "learning_rate": 0.000555955337136882, "loss": 1.8256, "step": 15750 }, { "epoch": 0.52, "grad_norm": 0.4278714060783386, "learning_rate": 0.0005559498850601678, "loss": 1.8304, "step": 15751 }, { "epoch": 0.52, "grad_norm": 0.4566332995891571, "learning_rate": 0.000555944432672768, "loss": 1.89, "step": 15752 }, { "epoch": 0.52, "grad_norm": 0.47019335627555847, "learning_rate": 0.0005559389799746895, "loss": 1.7509, "step": 15753 }, { "epoch": 0.52, "grad_norm": 0.43492501974105835, "learning_rate": 0.0005559335269659388, "loss": 1.919, "step": 15754 }, { "epoch": 0.52, "grad_norm": 0.4360373914241791, "learning_rate": 0.0005559280736465223, "loss": 1.82, "step": 15755 }, { "epoch": 0.52, "grad_norm": 0.44294893741607666, "learning_rate": 0.000555922620016447, "loss": 1.8406, "step": 15756 }, { "epoch": 0.52, "grad_norm": 0.4250503480434418, "learning_rate": 0.0005559171660757193, "loss": 1.8692, "step": 15757 }, { "epoch": 0.52, "grad_norm": 0.4206579029560089, "learning_rate": 0.0005559117118243459, "loss": 1.8673, "step": 15758 }, { "epoch": 0.52, "grad_norm": 0.4429760277271271, "learning_rate": 0.0005559062572623334, "loss": 1.8988, "step": 15759 }, { "epoch": 0.52, "grad_norm": 0.4518090486526489, "learning_rate": 0.0005559008023896883, "loss": 1.8604, "step": 15760 }, { "epoch": 0.52, "grad_norm": 0.43505120277404785, "learning_rate": 0.0005558953472064173, "loss": 1.834, "step": 15761 }, { "epoch": 0.52, "grad_norm": 0.43221551179885864, "learning_rate": 0.0005558898917125272, "loss": 1.9196, "step": 15762 }, { "epoch": 0.52, "grad_norm": 0.429298460483551, "learning_rate": 0.0005558844359080245, "loss": 1.9107, "step": 15763 }, { "epoch": 0.52, "grad_norm": 0.43848374485969543, "learning_rate": 0.0005558789797929157, "loss": 1.8351, "step": 15764 }, { "epoch": 0.52, "grad_norm": 0.4187607169151306, "learning_rate": 0.0005558735233672076, "loss": 1.8118, "step": 15765 }, { "epoch": 0.52, "grad_norm": 0.43524169921875, "learning_rate": 0.0005558680666309067, "loss": 1.7969, "step": 15766 }, { "epoch": 0.52, "grad_norm": 0.4491107761859894, "learning_rate": 0.0005558626095840195, "loss": 1.8833, "step": 15767 }, { "epoch": 0.52, "grad_norm": 0.4418555498123169, "learning_rate": 0.0005558571522265531, "loss": 1.9596, "step": 15768 }, { "epoch": 0.52, "grad_norm": 0.4817604422569275, "learning_rate": 0.0005558516945585136, "loss": 1.9398, "step": 15769 }, { "epoch": 0.52, "grad_norm": 0.431306928396225, "learning_rate": 0.000555846236579908, "loss": 1.8359, "step": 15770 }, { "epoch": 0.52, "grad_norm": 0.44207507371902466, "learning_rate": 0.0005558407782907427, "loss": 1.8668, "step": 15771 }, { "epoch": 0.52, "grad_norm": 0.4374879002571106, "learning_rate": 0.0005558353196910243, "loss": 1.8692, "step": 15772 }, { "epoch": 0.52, "grad_norm": 0.44331005215644836, "learning_rate": 0.0005558298607807596, "loss": 1.8394, "step": 15773 }, { "epoch": 0.52, "grad_norm": 0.6208699941635132, "learning_rate": 0.0005558244015599551, "loss": 1.8074, "step": 15774 }, { "epoch": 0.52, "grad_norm": 0.42767947912216187, "learning_rate": 0.0005558189420286176, "loss": 1.93, "step": 15775 }, { "epoch": 0.52, "grad_norm": 0.43068623542785645, "learning_rate": 0.0005558134821867535, "loss": 1.9506, "step": 15776 }, { "epoch": 0.52, "grad_norm": 0.44881391525268555, "learning_rate": 0.0005558080220343694, "loss": 1.8445, "step": 15777 }, { "epoch": 0.52, "grad_norm": 0.44308459758758545, "learning_rate": 0.0005558025615714721, "loss": 1.8305, "step": 15778 }, { "epoch": 0.52, "grad_norm": 0.44311976432800293, "learning_rate": 0.0005557971007980684, "loss": 1.8078, "step": 15779 }, { "epoch": 0.53, "grad_norm": 0.4418066143989563, "learning_rate": 0.0005557916397141645, "loss": 1.7622, "step": 15780 }, { "epoch": 0.53, "grad_norm": 0.443991094827652, "learning_rate": 0.0005557861783197673, "loss": 1.8657, "step": 15781 }, { "epoch": 0.53, "grad_norm": 0.4225032925605774, "learning_rate": 0.0005557807166148834, "loss": 1.8351, "step": 15782 }, { "epoch": 0.53, "grad_norm": 0.42629337310791016, "learning_rate": 0.0005557752545995193, "loss": 1.8504, "step": 15783 }, { "epoch": 0.53, "grad_norm": 0.4304008185863495, "learning_rate": 0.0005557697922736818, "loss": 1.8922, "step": 15784 }, { "epoch": 0.53, "grad_norm": 0.44247257709503174, "learning_rate": 0.0005557643296373775, "loss": 1.9176, "step": 15785 }, { "epoch": 0.53, "grad_norm": 0.4169262647628784, "learning_rate": 0.000555758866690613, "loss": 1.8123, "step": 15786 }, { "epoch": 0.53, "grad_norm": 0.4239007532596588, "learning_rate": 0.0005557534034333948, "loss": 1.8693, "step": 15787 }, { "epoch": 0.53, "grad_norm": 0.4397769868373871, "learning_rate": 0.0005557479398657297, "loss": 1.8948, "step": 15788 }, { "epoch": 0.53, "grad_norm": 0.4485938847064972, "learning_rate": 0.0005557424759876243, "loss": 1.8435, "step": 15789 }, { "epoch": 0.53, "grad_norm": 0.43762436509132385, "learning_rate": 0.0005557370117990853, "loss": 1.9139, "step": 15790 }, { "epoch": 0.53, "grad_norm": 0.42977601289749146, "learning_rate": 0.000555731547300119, "loss": 1.9037, "step": 15791 }, { "epoch": 0.53, "grad_norm": 0.43989938497543335, "learning_rate": 0.0005557260824907325, "loss": 1.8142, "step": 15792 }, { "epoch": 0.53, "grad_norm": 0.4171659052371979, "learning_rate": 0.0005557206173709323, "loss": 1.8188, "step": 15793 }, { "epoch": 0.53, "grad_norm": 0.41399043798446655, "learning_rate": 0.0005557151519407248, "loss": 1.8268, "step": 15794 }, { "epoch": 0.53, "grad_norm": 0.4321027100086212, "learning_rate": 0.0005557096862001167, "loss": 1.8408, "step": 15795 }, { "epoch": 0.53, "grad_norm": 0.4325984716415405, "learning_rate": 0.0005557042201491148, "loss": 1.8853, "step": 15796 }, { "epoch": 0.53, "grad_norm": 0.43406641483306885, "learning_rate": 0.0005556987537877257, "loss": 1.844, "step": 15797 }, { "epoch": 0.53, "grad_norm": 0.46396681666374207, "learning_rate": 0.0005556932871159559, "loss": 1.8655, "step": 15798 }, { "epoch": 0.53, "grad_norm": 0.43003615736961365, "learning_rate": 0.0005556878201338122, "loss": 1.8181, "step": 15799 }, { "epoch": 0.53, "grad_norm": 0.4257280230522156, "learning_rate": 0.0005556823528413011, "loss": 1.8992, "step": 15800 }, { "epoch": 0.53, "grad_norm": 0.41807490587234497, "learning_rate": 0.0005556768852384294, "loss": 1.847, "step": 15801 }, { "epoch": 0.53, "grad_norm": 0.44769683480262756, "learning_rate": 0.0005556714173252036, "loss": 1.8593, "step": 15802 }, { "epoch": 0.53, "grad_norm": 0.4390793740749359, "learning_rate": 0.0005556659491016303, "loss": 1.8047, "step": 15803 }, { "epoch": 0.53, "grad_norm": 0.4672923684120178, "learning_rate": 0.0005556604805677163, "loss": 1.8602, "step": 15804 }, { "epoch": 0.53, "grad_norm": 0.4643327593803406, "learning_rate": 0.0005556550117234681, "loss": 1.9152, "step": 15805 }, { "epoch": 0.53, "grad_norm": 0.43774595856666565, "learning_rate": 0.0005556495425688923, "loss": 1.8619, "step": 15806 }, { "epoch": 0.53, "grad_norm": 0.4290864169597626, "learning_rate": 0.0005556440731039957, "loss": 1.8052, "step": 15807 }, { "epoch": 0.53, "grad_norm": 0.4628017246723175, "learning_rate": 0.0005556386033287848, "loss": 1.8147, "step": 15808 }, { "epoch": 0.53, "grad_norm": 0.44164538383483887, "learning_rate": 0.0005556331332432664, "loss": 1.8925, "step": 15809 }, { "epoch": 0.53, "grad_norm": 0.44239798188209534, "learning_rate": 0.000555627662847447, "loss": 1.8488, "step": 15810 }, { "epoch": 0.53, "grad_norm": 0.4156387448310852, "learning_rate": 0.0005556221921413333, "loss": 1.8321, "step": 15811 }, { "epoch": 0.53, "grad_norm": 0.4394665062427521, "learning_rate": 0.0005556167211249319, "loss": 1.8171, "step": 15812 }, { "epoch": 0.53, "grad_norm": 0.43206503987312317, "learning_rate": 0.0005556112497982495, "loss": 1.9198, "step": 15813 }, { "epoch": 0.53, "grad_norm": 0.43822309374809265, "learning_rate": 0.0005556057781612927, "loss": 1.8477, "step": 15814 }, { "epoch": 0.53, "grad_norm": 0.4384195804595947, "learning_rate": 0.0005556003062140682, "loss": 1.9567, "step": 15815 }, { "epoch": 0.53, "grad_norm": 0.45000648498535156, "learning_rate": 0.0005555948339565825, "loss": 1.8191, "step": 15816 }, { "epoch": 0.53, "grad_norm": 0.42922958731651306, "learning_rate": 0.0005555893613888422, "loss": 1.8517, "step": 15817 }, { "epoch": 0.53, "grad_norm": 0.42029446363449097, "learning_rate": 0.0005555838885108543, "loss": 1.8566, "step": 15818 }, { "epoch": 0.53, "grad_norm": 0.4713176488876343, "learning_rate": 0.0005555784153226252, "loss": 1.8171, "step": 15819 }, { "epoch": 0.53, "grad_norm": 0.43454205989837646, "learning_rate": 0.0005555729418241615, "loss": 1.9127, "step": 15820 }, { "epoch": 0.53, "grad_norm": 0.4346049129962921, "learning_rate": 0.0005555674680154699, "loss": 1.9038, "step": 15821 }, { "epoch": 0.53, "grad_norm": 0.4393024742603302, "learning_rate": 0.0005555619938965571, "loss": 1.858, "step": 15822 }, { "epoch": 0.53, "grad_norm": 0.4533761739730835, "learning_rate": 0.0005555565194674296, "loss": 1.8152, "step": 15823 }, { "epoch": 0.53, "grad_norm": 0.4372377097606659, "learning_rate": 0.0005555510447280942, "loss": 1.8984, "step": 15824 }, { "epoch": 0.53, "grad_norm": 0.4211137592792511, "learning_rate": 0.0005555455696785574, "loss": 1.7447, "step": 15825 }, { "epoch": 0.53, "grad_norm": 0.4414568245410919, "learning_rate": 0.0005555400943188262, "loss": 1.8839, "step": 15826 }, { "epoch": 0.53, "grad_norm": 0.4308834671974182, "learning_rate": 0.0005555346186489066, "loss": 1.8937, "step": 15827 }, { "epoch": 0.53, "grad_norm": 0.4519071578979492, "learning_rate": 0.0005555291426688058, "loss": 1.817, "step": 15828 }, { "epoch": 0.53, "grad_norm": 0.4113486409187317, "learning_rate": 0.0005555236663785304, "loss": 1.8389, "step": 15829 }, { "epoch": 0.53, "grad_norm": 0.4260171949863434, "learning_rate": 0.0005555181897780868, "loss": 1.8228, "step": 15830 }, { "epoch": 0.53, "grad_norm": 0.46530914306640625, "learning_rate": 0.0005555127128674817, "loss": 1.8014, "step": 15831 }, { "epoch": 0.53, "grad_norm": 0.42284679412841797, "learning_rate": 0.000555507235646722, "loss": 1.9024, "step": 15832 }, { "epoch": 0.53, "grad_norm": 0.42673927545547485, "learning_rate": 0.000555501758115814, "loss": 1.9026, "step": 15833 }, { "epoch": 0.53, "grad_norm": 0.458646297454834, "learning_rate": 0.0005554962802747646, "loss": 1.8535, "step": 15834 }, { "epoch": 0.53, "grad_norm": 0.4413968324661255, "learning_rate": 0.0005554908021235803, "loss": 1.8972, "step": 15835 }, { "epoch": 0.53, "grad_norm": 0.42414990067481995, "learning_rate": 0.0005554853236622678, "loss": 1.8136, "step": 15836 }, { "epoch": 0.53, "grad_norm": 0.4244992434978485, "learning_rate": 0.0005554798448908337, "loss": 1.8479, "step": 15837 }, { "epoch": 0.53, "grad_norm": 0.43550142645835876, "learning_rate": 0.0005554743658092849, "loss": 1.8134, "step": 15838 }, { "epoch": 0.53, "grad_norm": 0.41808590292930603, "learning_rate": 0.0005554688864176277, "loss": 1.934, "step": 15839 }, { "epoch": 0.53, "grad_norm": 0.4522295892238617, "learning_rate": 0.0005554634067158691, "loss": 1.8862, "step": 15840 }, { "epoch": 0.53, "grad_norm": 0.43238675594329834, "learning_rate": 0.0005554579267040155, "loss": 1.843, "step": 15841 }, { "epoch": 0.53, "grad_norm": 0.4411664307117462, "learning_rate": 0.0005554524463820735, "loss": 1.8843, "step": 15842 }, { "epoch": 0.53, "grad_norm": 0.4290134608745575, "learning_rate": 0.0005554469657500499, "loss": 1.8114, "step": 15843 }, { "epoch": 0.53, "grad_norm": 0.4179016351699829, "learning_rate": 0.0005554414848079513, "loss": 1.8617, "step": 15844 }, { "epoch": 0.53, "grad_norm": 0.4352210760116577, "learning_rate": 0.0005554360035557844, "loss": 1.8754, "step": 15845 }, { "epoch": 0.53, "grad_norm": 0.4437269866466522, "learning_rate": 0.0005554305219935559, "loss": 1.9056, "step": 15846 }, { "epoch": 0.53, "grad_norm": 0.43189460039138794, "learning_rate": 0.0005554250401212723, "loss": 1.8324, "step": 15847 }, { "epoch": 0.53, "grad_norm": 0.4261355996131897, "learning_rate": 0.0005554195579389404, "loss": 1.8584, "step": 15848 }, { "epoch": 0.53, "grad_norm": 0.4295946955680847, "learning_rate": 0.0005554140754465668, "loss": 1.9167, "step": 15849 }, { "epoch": 0.53, "grad_norm": 0.429695188999176, "learning_rate": 0.0005554085926441581, "loss": 1.7816, "step": 15850 }, { "epoch": 0.53, "grad_norm": 0.4365781545639038, "learning_rate": 0.0005554031095317209, "loss": 1.9138, "step": 15851 }, { "epoch": 0.53, "grad_norm": 0.4318675398826599, "learning_rate": 0.0005553976261092622, "loss": 1.8722, "step": 15852 }, { "epoch": 0.53, "grad_norm": 0.412116676568985, "learning_rate": 0.0005553921423767882, "loss": 1.8611, "step": 15853 }, { "epoch": 0.53, "grad_norm": 0.416535884141922, "learning_rate": 0.000555386658334306, "loss": 1.9161, "step": 15854 }, { "epoch": 0.53, "grad_norm": 0.4314701557159424, "learning_rate": 0.0005553811739818219, "loss": 1.8897, "step": 15855 }, { "epoch": 0.53, "grad_norm": 0.42135927081108093, "learning_rate": 0.0005553756893193426, "loss": 1.8266, "step": 15856 }, { "epoch": 0.53, "grad_norm": 0.42018741369247437, "learning_rate": 0.0005553702043468749, "loss": 1.8783, "step": 15857 }, { "epoch": 0.53, "grad_norm": 0.4509839415550232, "learning_rate": 0.0005553647190644253, "loss": 1.8345, "step": 15858 }, { "epoch": 0.53, "grad_norm": 0.43147751688957214, "learning_rate": 0.0005553592334720007, "loss": 1.8625, "step": 15859 }, { "epoch": 0.53, "grad_norm": 0.41453665494918823, "learning_rate": 0.0005553537475696075, "loss": 1.7471, "step": 15860 }, { "epoch": 0.53, "grad_norm": 0.4330436885356903, "learning_rate": 0.0005553482613572527, "loss": 1.8384, "step": 15861 }, { "epoch": 0.53, "grad_norm": 0.42532631754875183, "learning_rate": 0.0005553427748349425, "loss": 1.8439, "step": 15862 }, { "epoch": 0.53, "grad_norm": 0.8088883757591248, "learning_rate": 0.0005553372880026839, "loss": 1.9356, "step": 15863 }, { "epoch": 0.53, "grad_norm": 0.42850959300994873, "learning_rate": 0.0005553318008604835, "loss": 1.8634, "step": 15864 }, { "epoch": 0.53, "grad_norm": 0.4364238977432251, "learning_rate": 0.0005553263134083478, "loss": 1.7748, "step": 15865 }, { "epoch": 0.53, "grad_norm": 0.43573683500289917, "learning_rate": 0.0005553208256462836, "loss": 1.8441, "step": 15866 }, { "epoch": 0.53, "grad_norm": 0.42368558049201965, "learning_rate": 0.0005553153375742975, "loss": 1.8693, "step": 15867 }, { "epoch": 0.53, "grad_norm": 0.4227609932422638, "learning_rate": 0.0005553098491923963, "loss": 1.8567, "step": 15868 }, { "epoch": 0.53, "grad_norm": 0.42476826906204224, "learning_rate": 0.0005553043605005866, "loss": 1.8409, "step": 15869 }, { "epoch": 0.53, "grad_norm": 0.43517857789993286, "learning_rate": 0.0005552988714988749, "loss": 1.8421, "step": 15870 }, { "epoch": 0.53, "grad_norm": 0.46129903197288513, "learning_rate": 0.000555293382187268, "loss": 1.8745, "step": 15871 }, { "epoch": 0.53, "grad_norm": 0.5284821391105652, "learning_rate": 0.0005552878925657726, "loss": 1.8685, "step": 15872 }, { "epoch": 0.53, "grad_norm": 0.43494701385498047, "learning_rate": 0.0005552824026343953, "loss": 1.8786, "step": 15873 }, { "epoch": 0.53, "grad_norm": 0.4429868161678314, "learning_rate": 0.0005552769123931429, "loss": 1.8782, "step": 15874 }, { "epoch": 0.53, "grad_norm": 0.44244280457496643, "learning_rate": 0.0005552714218420217, "loss": 1.8534, "step": 15875 }, { "epoch": 0.53, "grad_norm": 0.44853389263153076, "learning_rate": 0.0005552659309810387, "loss": 1.8316, "step": 15876 }, { "epoch": 0.53, "grad_norm": 0.43433600664138794, "learning_rate": 0.0005552604398102004, "loss": 1.7311, "step": 15877 }, { "epoch": 0.53, "grad_norm": 0.43665483593940735, "learning_rate": 0.0005552549483295138, "loss": 1.7808, "step": 15878 }, { "epoch": 0.53, "grad_norm": 0.43517372012138367, "learning_rate": 0.0005552494565389851, "loss": 1.8879, "step": 15879 }, { "epoch": 0.53, "grad_norm": 0.42968985438346863, "learning_rate": 0.0005552439644386212, "loss": 1.8499, "step": 15880 }, { "epoch": 0.53, "grad_norm": 0.4408871829509735, "learning_rate": 0.0005552384720284287, "loss": 1.8568, "step": 15881 }, { "epoch": 0.53, "grad_norm": 0.4287249445915222, "learning_rate": 0.0005552329793084144, "loss": 1.852, "step": 15882 }, { "epoch": 0.53, "grad_norm": 0.4366148114204407, "learning_rate": 0.0005552274862785847, "loss": 1.905, "step": 15883 }, { "epoch": 0.53, "grad_norm": 0.42808812856674194, "learning_rate": 0.0005552219929389466, "loss": 1.8621, "step": 15884 }, { "epoch": 0.53, "grad_norm": 0.43895360827445984, "learning_rate": 0.0005552164992895066, "loss": 1.8258, "step": 15885 }, { "epoch": 0.53, "grad_norm": 0.4523483216762543, "learning_rate": 0.0005552110053302713, "loss": 1.8716, "step": 15886 }, { "epoch": 0.53, "grad_norm": 0.41394907236099243, "learning_rate": 0.0005552055110612475, "loss": 1.8684, "step": 15887 }, { "epoch": 0.53, "grad_norm": 0.43356794118881226, "learning_rate": 0.0005552000164824418, "loss": 1.8646, "step": 15888 }, { "epoch": 0.53, "grad_norm": 0.4630797803401947, "learning_rate": 0.0005551945215938608, "loss": 1.8142, "step": 15889 }, { "epoch": 0.53, "grad_norm": 0.4240177869796753, "learning_rate": 0.0005551890263955113, "loss": 1.8262, "step": 15890 }, { "epoch": 0.53, "grad_norm": 0.43544039130210876, "learning_rate": 0.0005551835308873999, "loss": 1.7852, "step": 15891 }, { "epoch": 0.53, "grad_norm": 0.4371162950992584, "learning_rate": 0.0005551780350695334, "loss": 1.871, "step": 15892 }, { "epoch": 0.53, "grad_norm": 0.4286912977695465, "learning_rate": 0.0005551725389419183, "loss": 1.8161, "step": 15893 }, { "epoch": 0.53, "grad_norm": 0.4323747158050537, "learning_rate": 0.0005551670425045614, "loss": 1.847, "step": 15894 }, { "epoch": 0.53, "grad_norm": 0.4249522387981415, "learning_rate": 0.0005551615457574691, "loss": 1.7953, "step": 15895 }, { "epoch": 0.53, "grad_norm": 0.4283609986305237, "learning_rate": 0.0005551560487006486, "loss": 1.8246, "step": 15896 }, { "epoch": 0.53, "grad_norm": 0.4304492175579071, "learning_rate": 0.000555150551334106, "loss": 1.8626, "step": 15897 }, { "epoch": 0.53, "grad_norm": 0.42765453457832336, "learning_rate": 0.0005551450536578482, "loss": 1.8944, "step": 15898 }, { "epoch": 0.53, "grad_norm": 0.43062567710876465, "learning_rate": 0.0005551395556718821, "loss": 1.9323, "step": 15899 }, { "epoch": 0.53, "grad_norm": 0.42587047815322876, "learning_rate": 0.000555134057376214, "loss": 1.8925, "step": 15900 }, { "epoch": 0.53, "grad_norm": 0.4584401845932007, "learning_rate": 0.0005551285587708508, "loss": 1.8888, "step": 15901 }, { "epoch": 0.53, "grad_norm": 0.4400385022163391, "learning_rate": 0.0005551230598557991, "loss": 1.9136, "step": 15902 }, { "epoch": 0.53, "grad_norm": 0.42372339963912964, "learning_rate": 0.0005551175606310656, "loss": 1.8528, "step": 15903 }, { "epoch": 0.53, "grad_norm": 0.4251723289489746, "learning_rate": 0.0005551120610966571, "loss": 1.8689, "step": 15904 }, { "epoch": 0.53, "grad_norm": 0.46795302629470825, "learning_rate": 0.00055510656125258, "loss": 1.8449, "step": 15905 }, { "epoch": 0.53, "grad_norm": 0.46684640645980835, "learning_rate": 0.0005551010610988413, "loss": 1.8266, "step": 15906 }, { "epoch": 0.53, "grad_norm": 0.42234212160110474, "learning_rate": 0.0005550955606354473, "loss": 1.9367, "step": 15907 }, { "epoch": 0.53, "grad_norm": 0.4354223608970642, "learning_rate": 0.0005550900598624049, "loss": 1.802, "step": 15908 }, { "epoch": 0.53, "grad_norm": 0.5048592686653137, "learning_rate": 0.0005550845587797208, "loss": 1.9303, "step": 15909 }, { "epoch": 0.53, "grad_norm": 0.4323314428329468, "learning_rate": 0.0005550790573874017, "loss": 1.9167, "step": 15910 }, { "epoch": 0.53, "grad_norm": 0.4178316593170166, "learning_rate": 0.0005550735556854541, "loss": 1.866, "step": 15911 }, { "epoch": 0.53, "grad_norm": 0.4329444169998169, "learning_rate": 0.0005550680536738848, "loss": 1.8052, "step": 15912 }, { "epoch": 0.53, "grad_norm": 0.4625871479511261, "learning_rate": 0.0005550625513527005, "loss": 1.8262, "step": 15913 }, { "epoch": 0.53, "grad_norm": 0.4444617033004761, "learning_rate": 0.0005550570487219079, "loss": 1.8369, "step": 15914 }, { "epoch": 0.53, "grad_norm": 0.44762760400772095, "learning_rate": 0.0005550515457815135, "loss": 1.8847, "step": 15915 }, { "epoch": 0.53, "grad_norm": 0.47771209478378296, "learning_rate": 0.0005550460425315242, "loss": 1.8531, "step": 15916 }, { "epoch": 0.53, "grad_norm": 0.4588567912578583, "learning_rate": 0.0005550405389719465, "loss": 1.826, "step": 15917 }, { "epoch": 0.53, "grad_norm": 0.43168866634368896, "learning_rate": 0.0005550350351027873, "loss": 1.8406, "step": 15918 }, { "epoch": 0.53, "grad_norm": 0.4516579508781433, "learning_rate": 0.0005550295309240531, "loss": 1.8064, "step": 15919 }, { "epoch": 0.53, "grad_norm": 0.446048766374588, "learning_rate": 0.0005550240264357505, "loss": 1.8251, "step": 15920 }, { "epoch": 0.53, "grad_norm": 0.4335116446018219, "learning_rate": 0.0005550185216378864, "loss": 1.8361, "step": 15921 }, { "epoch": 0.53, "grad_norm": 0.4458712339401245, "learning_rate": 0.0005550130165304674, "loss": 1.8389, "step": 15922 }, { "epoch": 0.53, "grad_norm": 0.4208754003047943, "learning_rate": 0.0005550075111135001, "loss": 1.8282, "step": 15923 }, { "epoch": 0.53, "grad_norm": 0.42007380723953247, "learning_rate": 0.0005550020053869913, "loss": 1.8461, "step": 15924 }, { "epoch": 0.53, "grad_norm": 0.43825700879096985, "learning_rate": 0.0005549964993509476, "loss": 1.825, "step": 15925 }, { "epoch": 0.53, "grad_norm": 0.44450342655181885, "learning_rate": 0.0005549909930053758, "loss": 1.9863, "step": 15926 }, { "epoch": 0.53, "grad_norm": 0.4254571497440338, "learning_rate": 0.0005549854863502824, "loss": 1.8976, "step": 15927 }, { "epoch": 0.53, "grad_norm": 0.4337835907936096, "learning_rate": 0.0005549799793856744, "loss": 1.7693, "step": 15928 }, { "epoch": 0.53, "grad_norm": 0.4759424328804016, "learning_rate": 0.000554974472111558, "loss": 1.9661, "step": 15929 }, { "epoch": 0.53, "grad_norm": 0.4442841112613678, "learning_rate": 0.0005549689645279403, "loss": 1.8303, "step": 15930 }, { "epoch": 0.53, "grad_norm": 0.43144258856773376, "learning_rate": 0.0005549634566348279, "loss": 1.8257, "step": 15931 }, { "epoch": 0.53, "grad_norm": 0.42260685563087463, "learning_rate": 0.0005549579484322274, "loss": 1.8198, "step": 15932 }, { "epoch": 0.53, "grad_norm": 0.45135560631752014, "learning_rate": 0.0005549524399201454, "loss": 1.9427, "step": 15933 }, { "epoch": 0.53, "grad_norm": 0.44237199425697327, "learning_rate": 0.0005549469310985889, "loss": 1.8959, "step": 15934 }, { "epoch": 0.53, "grad_norm": 0.44632646441459656, "learning_rate": 0.0005549414219675642, "loss": 1.9484, "step": 15935 }, { "epoch": 0.53, "grad_norm": 0.4432617723941803, "learning_rate": 0.0005549359125270783, "loss": 1.9044, "step": 15936 }, { "epoch": 0.53, "grad_norm": 0.4400237500667572, "learning_rate": 0.0005549304027771377, "loss": 1.8292, "step": 15937 }, { "epoch": 0.53, "grad_norm": 0.41883108019828796, "learning_rate": 0.0005549248927177492, "loss": 1.8912, "step": 15938 }, { "epoch": 0.53, "grad_norm": 0.4308113753795624, "learning_rate": 0.0005549193823489194, "loss": 1.8344, "step": 15939 }, { "epoch": 0.53, "grad_norm": 0.5002424716949463, "learning_rate": 0.000554913871670655, "loss": 1.8429, "step": 15940 }, { "epoch": 0.53, "grad_norm": 0.45203056931495667, "learning_rate": 0.0005549083606829629, "loss": 1.8667, "step": 15941 }, { "epoch": 0.53, "grad_norm": 0.41613420844078064, "learning_rate": 0.0005549028493858494, "loss": 1.8307, "step": 15942 }, { "epoch": 0.53, "grad_norm": 0.48012834787368774, "learning_rate": 0.0005548973377793216, "loss": 1.841, "step": 15943 }, { "epoch": 0.53, "grad_norm": 0.46401816606521606, "learning_rate": 0.0005548918258633858, "loss": 1.8562, "step": 15944 }, { "epoch": 0.53, "grad_norm": 0.4321932792663574, "learning_rate": 0.0005548863136380489, "loss": 1.8711, "step": 15945 }, { "epoch": 0.53, "grad_norm": 0.4370344281196594, "learning_rate": 0.0005548808011033177, "loss": 1.8456, "step": 15946 }, { "epoch": 0.53, "grad_norm": 0.42745187878608704, "learning_rate": 0.0005548752882591986, "loss": 1.7808, "step": 15947 }, { "epoch": 0.53, "grad_norm": 0.44015592336654663, "learning_rate": 0.0005548697751056986, "loss": 1.8949, "step": 15948 }, { "epoch": 0.53, "grad_norm": 0.43906253576278687, "learning_rate": 0.0005548642616428241, "loss": 1.813, "step": 15949 }, { "epoch": 0.53, "grad_norm": 0.42792245745658875, "learning_rate": 0.0005548587478705821, "loss": 1.7917, "step": 15950 }, { "epoch": 0.53, "grad_norm": 0.42587363719940186, "learning_rate": 0.0005548532337889791, "loss": 1.8436, "step": 15951 }, { "epoch": 0.53, "grad_norm": 0.4761868119239807, "learning_rate": 0.0005548477193980218, "loss": 1.9013, "step": 15952 }, { "epoch": 0.53, "grad_norm": 0.428811252117157, "learning_rate": 0.0005548422046977168, "loss": 1.9331, "step": 15953 }, { "epoch": 0.53, "grad_norm": 0.45229020714759827, "learning_rate": 0.000554836689688071, "loss": 1.9012, "step": 15954 }, { "epoch": 0.53, "grad_norm": 0.431995153427124, "learning_rate": 0.000554831174369091, "loss": 1.8547, "step": 15955 }, { "epoch": 0.53, "grad_norm": 0.6196042895317078, "learning_rate": 0.0005548256587407836, "loss": 1.8869, "step": 15956 }, { "epoch": 0.53, "grad_norm": 0.41504600644111633, "learning_rate": 0.0005548201428031553, "loss": 1.7996, "step": 15957 }, { "epoch": 0.53, "grad_norm": 0.43974900245666504, "learning_rate": 0.0005548146265562129, "loss": 1.8974, "step": 15958 }, { "epoch": 0.53, "grad_norm": 0.42704400420188904, "learning_rate": 0.0005548091099999631, "loss": 1.8368, "step": 15959 }, { "epoch": 0.53, "grad_norm": 0.4476533830165863, "learning_rate": 0.0005548035931344126, "loss": 1.8502, "step": 15960 }, { "epoch": 0.53, "grad_norm": 0.4361954629421234, "learning_rate": 0.0005547980759595681, "loss": 1.8594, "step": 15961 }, { "epoch": 0.53, "grad_norm": 0.4430727958679199, "learning_rate": 0.0005547925584754362, "loss": 1.8557, "step": 15962 }, { "epoch": 0.53, "grad_norm": 0.430182546377182, "learning_rate": 0.0005547870406820237, "loss": 1.8286, "step": 15963 }, { "epoch": 0.53, "grad_norm": 0.41938021779060364, "learning_rate": 0.0005547815225793373, "loss": 1.8615, "step": 15964 }, { "epoch": 0.53, "grad_norm": 0.4349631369113922, "learning_rate": 0.0005547760041673837, "loss": 1.7911, "step": 15965 }, { "epoch": 0.53, "grad_norm": 0.4360121488571167, "learning_rate": 0.0005547704854461695, "loss": 1.8314, "step": 15966 }, { "epoch": 0.53, "grad_norm": 0.436095267534256, "learning_rate": 0.0005547649664157015, "loss": 1.8563, "step": 15967 }, { "epoch": 0.53, "grad_norm": 0.44040369987487793, "learning_rate": 0.0005547594470759864, "loss": 1.7955, "step": 15968 }, { "epoch": 0.53, "grad_norm": 0.4406096339225769, "learning_rate": 0.0005547539274270308, "loss": 1.8292, "step": 15969 }, { "epoch": 0.53, "grad_norm": 0.42536646127700806, "learning_rate": 0.0005547484074688415, "loss": 1.7794, "step": 15970 }, { "epoch": 0.53, "grad_norm": 0.45172733068466187, "learning_rate": 0.0005547428872014252, "loss": 1.8684, "step": 15971 }, { "epoch": 0.53, "grad_norm": 0.432788610458374, "learning_rate": 0.0005547373666247885, "loss": 1.85, "step": 15972 }, { "epoch": 0.53, "grad_norm": 0.4304355978965759, "learning_rate": 0.0005547318457389383, "loss": 1.8006, "step": 15973 }, { "epoch": 0.53, "grad_norm": 0.5229274034500122, "learning_rate": 0.000554726324543881, "loss": 1.8604, "step": 15974 }, { "epoch": 0.53, "grad_norm": 0.4190932512283325, "learning_rate": 0.0005547208030396236, "loss": 1.8284, "step": 15975 }, { "epoch": 0.53, "grad_norm": 0.4238629937171936, "learning_rate": 0.0005547152812261727, "loss": 1.842, "step": 15976 }, { "epoch": 0.53, "grad_norm": 0.4404349625110626, "learning_rate": 0.0005547097591035349, "loss": 1.9415, "step": 15977 }, { "epoch": 0.53, "grad_norm": 0.4490164518356323, "learning_rate": 0.0005547042366717169, "loss": 1.8425, "step": 15978 }, { "epoch": 0.53, "grad_norm": 0.42240846157073975, "learning_rate": 0.0005546987139307256, "loss": 1.8544, "step": 15979 }, { "epoch": 0.53, "grad_norm": 0.43246322870254517, "learning_rate": 0.0005546931908805676, "loss": 1.8919, "step": 15980 }, { "epoch": 0.53, "grad_norm": 0.4262731373310089, "learning_rate": 0.0005546876675212495, "loss": 1.9177, "step": 15981 }, { "epoch": 0.53, "grad_norm": 0.4243626594543457, "learning_rate": 0.0005546821438527782, "loss": 1.8177, "step": 15982 }, { "epoch": 0.53, "grad_norm": 0.4272981882095337, "learning_rate": 0.0005546766198751604, "loss": 1.8604, "step": 15983 }, { "epoch": 0.53, "grad_norm": 0.41442933678627014, "learning_rate": 0.0005546710955884025, "loss": 1.8575, "step": 15984 }, { "epoch": 0.53, "grad_norm": 0.42307591438293457, "learning_rate": 0.0005546655709925115, "loss": 1.9009, "step": 15985 }, { "epoch": 0.53, "grad_norm": 0.4404158592224121, "learning_rate": 0.000554660046087494, "loss": 1.9279, "step": 15986 }, { "epoch": 0.53, "grad_norm": 0.4337528944015503, "learning_rate": 0.0005546545208733567, "loss": 1.8606, "step": 15987 }, { "epoch": 0.53, "grad_norm": 0.42922043800354004, "learning_rate": 0.0005546489953501064, "loss": 1.8571, "step": 15988 }, { "epoch": 0.53, "grad_norm": 0.42364799976348877, "learning_rate": 0.0005546434695177498, "loss": 1.854, "step": 15989 }, { "epoch": 0.53, "grad_norm": 0.4397987425327301, "learning_rate": 0.0005546379433762934, "loss": 1.8185, "step": 15990 }, { "epoch": 0.53, "grad_norm": 0.41494837403297424, "learning_rate": 0.0005546324169257441, "loss": 1.805, "step": 15991 }, { "epoch": 0.53, "grad_norm": 0.41641339659690857, "learning_rate": 0.0005546268901661086, "loss": 1.8493, "step": 15992 }, { "epoch": 0.53, "grad_norm": 0.5133998394012451, "learning_rate": 0.0005546213630973936, "loss": 1.8893, "step": 15993 }, { "epoch": 0.53, "grad_norm": 0.4468822479248047, "learning_rate": 0.0005546158357196057, "loss": 1.8564, "step": 15994 }, { "epoch": 0.53, "grad_norm": 0.4416508674621582, "learning_rate": 0.0005546103080327516, "loss": 1.8575, "step": 15995 }, { "epoch": 0.53, "grad_norm": 0.4446694850921631, "learning_rate": 0.0005546047800368384, "loss": 1.9361, "step": 15996 }, { "epoch": 0.53, "grad_norm": 0.4393264949321747, "learning_rate": 0.0005545992517318723, "loss": 1.8484, "step": 15997 }, { "epoch": 0.53, "grad_norm": 0.43412551283836365, "learning_rate": 0.0005545937231178602, "loss": 1.7677, "step": 15998 }, { "epoch": 0.53, "grad_norm": 0.4830847680568695, "learning_rate": 0.000554588194194809, "loss": 1.7777, "step": 15999 }, { "epoch": 0.53, "grad_norm": 0.42454439401626587, "learning_rate": 0.000554582664962725, "loss": 1.7219, "step": 16000 }, { "epoch": 0.53, "grad_norm": 0.4233909249305725, "learning_rate": 0.0005545771354216154, "loss": 1.9135, "step": 16001 }, { "epoch": 0.53, "grad_norm": 0.43845754861831665, "learning_rate": 0.0005545716055714865, "loss": 1.8075, "step": 16002 }, { "epoch": 0.53, "grad_norm": 0.4427163898944855, "learning_rate": 0.0005545660754123452, "loss": 1.8083, "step": 16003 }, { "epoch": 0.53, "grad_norm": 0.4307488799095154, "learning_rate": 0.0005545605449441983, "loss": 1.8417, "step": 16004 }, { "epoch": 0.53, "grad_norm": 0.44145843386650085, "learning_rate": 0.0005545550141670523, "loss": 1.8227, "step": 16005 }, { "epoch": 0.53, "grad_norm": 0.441542387008667, "learning_rate": 0.0005545494830809141, "loss": 1.8687, "step": 16006 }, { "epoch": 0.53, "grad_norm": 0.43354469537734985, "learning_rate": 0.0005545439516857904, "loss": 1.8298, "step": 16007 }, { "epoch": 0.53, "grad_norm": 0.4329485297203064, "learning_rate": 0.0005545384199816876, "loss": 1.8732, "step": 16008 }, { "epoch": 0.53, "grad_norm": 0.42828911542892456, "learning_rate": 0.0005545328879686129, "loss": 1.8419, "step": 16009 }, { "epoch": 0.53, "grad_norm": 0.4211021959781647, "learning_rate": 0.0005545273556465727, "loss": 1.8022, "step": 16010 }, { "epoch": 0.53, "grad_norm": 0.44177696108818054, "learning_rate": 0.0005545218230155738, "loss": 1.9249, "step": 16011 }, { "epoch": 0.53, "grad_norm": 0.4159303605556488, "learning_rate": 0.0005545162900756228, "loss": 1.912, "step": 16012 }, { "epoch": 0.53, "grad_norm": 0.4331721067428589, "learning_rate": 0.0005545107568267265, "loss": 1.8156, "step": 16013 }, { "epoch": 0.53, "grad_norm": 0.4385022521018982, "learning_rate": 0.0005545052232688918, "loss": 1.8742, "step": 16014 }, { "epoch": 0.53, "grad_norm": 0.4410957396030426, "learning_rate": 0.0005544996894021253, "loss": 1.7944, "step": 16015 }, { "epoch": 0.53, "grad_norm": 0.4363005459308624, "learning_rate": 0.0005544941552264336, "loss": 1.8456, "step": 16016 }, { "epoch": 0.53, "grad_norm": 0.450136661529541, "learning_rate": 0.0005544886207418234, "loss": 1.9065, "step": 16017 }, { "epoch": 0.53, "grad_norm": 0.4255457818508148, "learning_rate": 0.0005544830859483016, "loss": 1.8459, "step": 16018 }, { "epoch": 0.53, "grad_norm": 0.42839711904525757, "learning_rate": 0.0005544775508458748, "loss": 1.8615, "step": 16019 }, { "epoch": 0.53, "grad_norm": 0.43422409892082214, "learning_rate": 0.0005544720154345498, "loss": 1.8194, "step": 16020 }, { "epoch": 0.53, "grad_norm": 0.42883819341659546, "learning_rate": 0.0005544664797143332, "loss": 1.8481, "step": 16021 }, { "epoch": 0.53, "grad_norm": 0.4263112246990204, "learning_rate": 0.0005544609436852318, "loss": 1.9286, "step": 16022 }, { "epoch": 0.53, "grad_norm": 0.42420732975006104, "learning_rate": 0.0005544554073472524, "loss": 1.8077, "step": 16023 }, { "epoch": 0.53, "grad_norm": 0.4340900778770447, "learning_rate": 0.0005544498707004016, "loss": 1.8744, "step": 16024 }, { "epoch": 0.53, "grad_norm": 0.44190409779548645, "learning_rate": 0.000554444333744686, "loss": 1.8805, "step": 16025 }, { "epoch": 0.53, "grad_norm": 0.4264719784259796, "learning_rate": 0.0005544387964801126, "loss": 1.8421, "step": 16026 }, { "epoch": 0.53, "grad_norm": 0.4535799026489258, "learning_rate": 0.000554433258906688, "loss": 1.8599, "step": 16027 }, { "epoch": 0.53, "grad_norm": 0.42742350697517395, "learning_rate": 0.0005544277210244189, "loss": 1.7727, "step": 16028 }, { "epoch": 0.53, "grad_norm": 0.4262996315956116, "learning_rate": 0.000554422182833312, "loss": 1.7751, "step": 16029 }, { "epoch": 0.53, "grad_norm": 0.41904860734939575, "learning_rate": 0.0005544166443333742, "loss": 1.8464, "step": 16030 }, { "epoch": 0.53, "grad_norm": 0.4131215512752533, "learning_rate": 0.0005544111055246118, "loss": 1.7865, "step": 16031 }, { "epoch": 0.53, "grad_norm": 0.4470791220664978, "learning_rate": 0.0005544055664070321, "loss": 1.9034, "step": 16032 }, { "epoch": 0.53, "grad_norm": 0.44195592403411865, "learning_rate": 0.0005544000269806413, "loss": 1.9172, "step": 16033 }, { "epoch": 0.53, "grad_norm": 0.42672041058540344, "learning_rate": 0.0005543944872454464, "loss": 1.8584, "step": 16034 }, { "epoch": 0.53, "grad_norm": 0.45714181661605835, "learning_rate": 0.0005543889472014542, "loss": 1.818, "step": 16035 }, { "epoch": 0.53, "grad_norm": 0.45666828751564026, "learning_rate": 0.0005543834068486712, "loss": 1.8266, "step": 16036 }, { "epoch": 0.53, "grad_norm": 0.47310200333595276, "learning_rate": 0.0005543778661871042, "loss": 1.8107, "step": 16037 }, { "epoch": 0.53, "grad_norm": 0.41974857449531555, "learning_rate": 0.00055437232521676, "loss": 1.7644, "step": 16038 }, { "epoch": 0.53, "grad_norm": 0.4723680019378662, "learning_rate": 0.0005543667839376453, "loss": 1.8028, "step": 16039 }, { "epoch": 0.53, "grad_norm": 0.843696117401123, "learning_rate": 0.0005543612423497667, "loss": 1.8917, "step": 16040 }, { "epoch": 0.53, "grad_norm": 0.4336056113243103, "learning_rate": 0.0005543557004531311, "loss": 1.8416, "step": 16041 }, { "epoch": 0.53, "grad_norm": 0.43356800079345703, "learning_rate": 0.0005543501582477452, "loss": 1.775, "step": 16042 }, { "epoch": 0.53, "grad_norm": 0.4842187166213989, "learning_rate": 0.0005543446157336156, "loss": 1.8045, "step": 16043 }, { "epoch": 0.53, "grad_norm": 0.4468887150287628, "learning_rate": 0.0005543390729107492, "loss": 1.842, "step": 16044 }, { "epoch": 0.53, "grad_norm": 0.4457153379917145, "learning_rate": 0.0005543335297791526, "loss": 1.8022, "step": 16045 }, { "epoch": 0.53, "grad_norm": 0.4476679861545563, "learning_rate": 0.0005543279863388326, "loss": 1.9048, "step": 16046 }, { "epoch": 0.53, "grad_norm": 0.4608634114265442, "learning_rate": 0.0005543224425897959, "loss": 1.8189, "step": 16047 }, { "epoch": 0.53, "grad_norm": 0.5063771605491638, "learning_rate": 0.000554316898532049, "loss": 1.8417, "step": 16048 }, { "epoch": 0.53, "grad_norm": 0.434766948223114, "learning_rate": 0.0005543113541655991, "loss": 1.9418, "step": 16049 }, { "epoch": 0.53, "grad_norm": 0.43971261382102966, "learning_rate": 0.0005543058094904526, "loss": 1.8502, "step": 16050 }, { "epoch": 0.53, "grad_norm": 0.43866339325904846, "learning_rate": 0.0005543002645066163, "loss": 1.9112, "step": 16051 }, { "epoch": 0.53, "grad_norm": 0.44102945923805237, "learning_rate": 0.0005542947192140971, "loss": 1.8267, "step": 16052 }, { "epoch": 0.53, "grad_norm": 0.6171929240226746, "learning_rate": 0.0005542891736129015, "loss": 1.8833, "step": 16053 }, { "epoch": 0.53, "grad_norm": 0.4277820885181427, "learning_rate": 0.0005542836277030363, "loss": 1.8061, "step": 16054 }, { "epoch": 0.53, "grad_norm": 0.4284363389015198, "learning_rate": 0.0005542780814845082, "loss": 1.8906, "step": 16055 }, { "epoch": 0.53, "grad_norm": 0.43128693103790283, "learning_rate": 0.0005542725349573241, "loss": 1.9604, "step": 16056 }, { "epoch": 0.53, "grad_norm": 0.4385712444782257, "learning_rate": 0.0005542669881214904, "loss": 1.8834, "step": 16057 }, { "epoch": 0.53, "grad_norm": 0.4136795699596405, "learning_rate": 0.0005542614409770141, "loss": 1.8336, "step": 16058 }, { "epoch": 0.53, "grad_norm": 0.42994925379753113, "learning_rate": 0.0005542558935239021, "loss": 1.8757, "step": 16059 }, { "epoch": 0.53, "grad_norm": 0.41654321551322937, "learning_rate": 0.0005542503457621607, "loss": 1.8534, "step": 16060 }, { "epoch": 0.53, "grad_norm": 0.4275418221950531, "learning_rate": 0.0005542447976917968, "loss": 1.8486, "step": 16061 }, { "epoch": 0.53, "grad_norm": 0.44265273213386536, "learning_rate": 0.0005542392493128173, "loss": 1.8613, "step": 16062 }, { "epoch": 0.53, "grad_norm": 0.4273250699043274, "learning_rate": 0.0005542337006252287, "loss": 1.8537, "step": 16063 }, { "epoch": 0.53, "grad_norm": 0.4178941547870636, "learning_rate": 0.000554228151629038, "loss": 1.8894, "step": 16064 }, { "epoch": 0.53, "grad_norm": 0.44085267186164856, "learning_rate": 0.0005542226023242517, "loss": 1.8344, "step": 16065 }, { "epoch": 0.53, "grad_norm": 0.4273058772087097, "learning_rate": 0.0005542170527108766, "loss": 1.8699, "step": 16066 }, { "epoch": 0.53, "grad_norm": 0.4323921799659729, "learning_rate": 0.0005542115027889195, "loss": 1.8315, "step": 16067 }, { "epoch": 0.53, "grad_norm": 0.4436439573764801, "learning_rate": 0.0005542059525583871, "loss": 1.9509, "step": 16068 }, { "epoch": 0.53, "grad_norm": 0.5000198483467102, "learning_rate": 0.000554200402019286, "loss": 1.8637, "step": 16069 }, { "epoch": 0.53, "grad_norm": 0.43827342987060547, "learning_rate": 0.0005541948511716232, "loss": 1.8467, "step": 16070 }, { "epoch": 0.53, "grad_norm": 0.41593778133392334, "learning_rate": 0.0005541893000154053, "loss": 1.8367, "step": 16071 }, { "epoch": 0.53, "grad_norm": 0.43729785084724426, "learning_rate": 0.000554183748550639, "loss": 1.805, "step": 16072 }, { "epoch": 0.53, "grad_norm": 0.4470590651035309, "learning_rate": 0.0005541781967773311, "loss": 1.8013, "step": 16073 }, { "epoch": 0.53, "grad_norm": 0.43308576941490173, "learning_rate": 0.0005541726446954883, "loss": 1.8359, "step": 16074 }, { "epoch": 0.53, "grad_norm": 0.4387856423854828, "learning_rate": 0.0005541670923051174, "loss": 1.9591, "step": 16075 }, { "epoch": 0.53, "grad_norm": 0.4430141746997833, "learning_rate": 0.000554161539606225, "loss": 1.9254, "step": 16076 }, { "epoch": 0.53, "grad_norm": 0.438162624835968, "learning_rate": 0.000554155986598818, "loss": 1.9339, "step": 16077 }, { "epoch": 0.53, "grad_norm": 0.4275396764278412, "learning_rate": 0.0005541504332829032, "loss": 1.9332, "step": 16078 }, { "epoch": 0.53, "grad_norm": 0.4144039452075958, "learning_rate": 0.0005541448796584871, "loss": 1.8616, "step": 16079 }, { "epoch": 0.53, "grad_norm": 0.43727219104766846, "learning_rate": 0.0005541393257255765, "loss": 1.846, "step": 16080 }, { "epoch": 0.54, "grad_norm": 0.43369489908218384, "learning_rate": 0.0005541337714841784, "loss": 1.8504, "step": 16081 }, { "epoch": 0.54, "grad_norm": 0.4294780492782593, "learning_rate": 0.0005541282169342992, "loss": 1.9333, "step": 16082 }, { "epoch": 0.54, "grad_norm": 0.4165043532848358, "learning_rate": 0.0005541226620759457, "loss": 1.8594, "step": 16083 }, { "epoch": 0.54, "grad_norm": 0.44488778710365295, "learning_rate": 0.0005541171069091249, "loss": 1.8627, "step": 16084 }, { "epoch": 0.54, "grad_norm": 0.42492881417274475, "learning_rate": 0.0005541115514338433, "loss": 1.8598, "step": 16085 }, { "epoch": 0.54, "grad_norm": 0.42283686995506287, "learning_rate": 0.0005541059956501078, "loss": 1.888, "step": 16086 }, { "epoch": 0.54, "grad_norm": 0.4352979362010956, "learning_rate": 0.000554100439557925, "loss": 1.8814, "step": 16087 }, { "epoch": 0.54, "grad_norm": 0.4252767562866211, "learning_rate": 0.0005540948831573016, "loss": 1.8255, "step": 16088 }, { "epoch": 0.54, "grad_norm": 0.43019866943359375, "learning_rate": 0.0005540893264482445, "loss": 1.8315, "step": 16089 }, { "epoch": 0.54, "grad_norm": 0.4415489733219147, "learning_rate": 0.0005540837694307606, "loss": 1.8913, "step": 16090 }, { "epoch": 0.54, "grad_norm": 0.4326750934123993, "learning_rate": 0.0005540782121048563, "loss": 1.8456, "step": 16091 }, { "epoch": 0.54, "grad_norm": 0.4327315390110016, "learning_rate": 0.0005540726544705385, "loss": 1.8406, "step": 16092 }, { "epoch": 0.54, "grad_norm": 0.42946672439575195, "learning_rate": 0.0005540670965278138, "loss": 1.8698, "step": 16093 }, { "epoch": 0.54, "grad_norm": 0.45680367946624756, "learning_rate": 0.0005540615382766893, "loss": 1.8513, "step": 16094 }, { "epoch": 0.54, "grad_norm": 0.43767791986465454, "learning_rate": 0.0005540559797171713, "loss": 1.854, "step": 16095 }, { "epoch": 0.54, "grad_norm": 0.44127070903778076, "learning_rate": 0.0005540504208492669, "loss": 1.8819, "step": 16096 }, { "epoch": 0.54, "grad_norm": 0.42667317390441895, "learning_rate": 0.0005540448616729828, "loss": 1.8218, "step": 16097 }, { "epoch": 0.54, "grad_norm": 0.43506336212158203, "learning_rate": 0.0005540393021883256, "loss": 1.8554, "step": 16098 }, { "epoch": 0.54, "grad_norm": 0.43170225620269775, "learning_rate": 0.000554033742395302, "loss": 1.8902, "step": 16099 }, { "epoch": 0.54, "grad_norm": 0.4346538782119751, "learning_rate": 0.000554028182293919, "loss": 1.7694, "step": 16100 }, { "epoch": 0.54, "grad_norm": 0.4415642023086548, "learning_rate": 0.0005540226218841833, "loss": 1.8707, "step": 16101 }, { "epoch": 0.54, "grad_norm": 0.4509000778198242, "learning_rate": 0.0005540170611661013, "loss": 1.8451, "step": 16102 }, { "epoch": 0.54, "grad_norm": 0.43418997526168823, "learning_rate": 0.0005540115001396802, "loss": 1.7947, "step": 16103 }, { "epoch": 0.54, "grad_norm": 0.44093194603919983, "learning_rate": 0.0005540059388049266, "loss": 1.8488, "step": 16104 }, { "epoch": 0.54, "grad_norm": 0.4410015940666199, "learning_rate": 0.0005540003771618471, "loss": 1.888, "step": 16105 }, { "epoch": 0.54, "grad_norm": 0.44628384709358215, "learning_rate": 0.0005539948152104487, "loss": 1.8429, "step": 16106 }, { "epoch": 0.54, "grad_norm": 0.4474889636039734, "learning_rate": 0.0005539892529507379, "loss": 1.8859, "step": 16107 }, { "epoch": 0.54, "grad_norm": 0.42615121603012085, "learning_rate": 0.0005539836903827217, "loss": 1.8406, "step": 16108 }, { "epoch": 0.54, "grad_norm": 0.4368106722831726, "learning_rate": 0.0005539781275064066, "loss": 1.9356, "step": 16109 }, { "epoch": 0.54, "grad_norm": 0.43428316712379456, "learning_rate": 0.0005539725643217995, "loss": 1.8592, "step": 16110 }, { "epoch": 0.54, "grad_norm": 0.4266311526298523, "learning_rate": 0.0005539670008289071, "loss": 1.9026, "step": 16111 }, { "epoch": 0.54, "grad_norm": 0.4463833272457123, "learning_rate": 0.0005539614370277362, "loss": 1.8416, "step": 16112 }, { "epoch": 0.54, "grad_norm": 0.4393526315689087, "learning_rate": 0.0005539558729182936, "loss": 1.8804, "step": 16113 }, { "epoch": 0.54, "grad_norm": 0.4404776692390442, "learning_rate": 0.0005539503085005859, "loss": 1.8821, "step": 16114 }, { "epoch": 0.54, "grad_norm": 0.43337705731391907, "learning_rate": 0.00055394474377462, "loss": 1.8673, "step": 16115 }, { "epoch": 0.54, "grad_norm": 0.4237087070941925, "learning_rate": 0.0005539391787404026, "loss": 1.8512, "step": 16116 }, { "epoch": 0.54, "grad_norm": 0.41338905692100525, "learning_rate": 0.0005539336133979403, "loss": 1.8486, "step": 16117 }, { "epoch": 0.54, "grad_norm": 0.4109514355659485, "learning_rate": 0.0005539280477472402, "loss": 1.8101, "step": 16118 }, { "epoch": 0.54, "grad_norm": 0.4357595443725586, "learning_rate": 0.0005539224817883088, "loss": 1.8208, "step": 16119 }, { "epoch": 0.54, "grad_norm": 0.4374450445175171, "learning_rate": 0.0005539169155211529, "loss": 1.9244, "step": 16120 }, { "epoch": 0.54, "grad_norm": 0.42247340083122253, "learning_rate": 0.0005539113489457794, "loss": 1.8901, "step": 16121 }, { "epoch": 0.54, "grad_norm": 0.41334807872772217, "learning_rate": 0.0005539057820621948, "loss": 1.8815, "step": 16122 }, { "epoch": 0.54, "grad_norm": 0.43441906571388245, "learning_rate": 0.000553900214870406, "loss": 1.8788, "step": 16123 }, { "epoch": 0.54, "grad_norm": 0.441987544298172, "learning_rate": 0.0005538946473704198, "loss": 1.8662, "step": 16124 }, { "epoch": 0.54, "grad_norm": 0.43504923582077026, "learning_rate": 0.0005538890795622429, "loss": 1.7828, "step": 16125 }, { "epoch": 0.54, "grad_norm": 0.41710153222084045, "learning_rate": 0.000553883511445882, "loss": 1.8282, "step": 16126 }, { "epoch": 0.54, "grad_norm": 0.4239315390586853, "learning_rate": 0.000553877943021344, "loss": 1.8351, "step": 16127 }, { "epoch": 0.54, "grad_norm": 0.42884561419487, "learning_rate": 0.0005538723742886356, "loss": 1.9341, "step": 16128 }, { "epoch": 0.54, "grad_norm": 0.4309595823287964, "learning_rate": 0.0005538668052477635, "loss": 1.8421, "step": 16129 }, { "epoch": 0.54, "grad_norm": 0.44389763474464417, "learning_rate": 0.0005538612358987346, "loss": 1.8515, "step": 16130 }, { "epoch": 0.54, "grad_norm": 0.43573686480522156, "learning_rate": 0.0005538556662415554, "loss": 1.838, "step": 16131 }, { "epoch": 0.54, "grad_norm": 0.4502640664577484, "learning_rate": 0.0005538500962762329, "loss": 1.8229, "step": 16132 }, { "epoch": 0.54, "grad_norm": 0.475393146276474, "learning_rate": 0.0005538445260027737, "loss": 1.8583, "step": 16133 }, { "epoch": 0.54, "grad_norm": 0.432174950838089, "learning_rate": 0.0005538389554211848, "loss": 1.7905, "step": 16134 }, { "epoch": 0.54, "grad_norm": 0.45387932658195496, "learning_rate": 0.0005538333845314727, "loss": 1.8925, "step": 16135 }, { "epoch": 0.54, "grad_norm": 0.4303169548511505, "learning_rate": 0.0005538278133336444, "loss": 1.8061, "step": 16136 }, { "epoch": 0.54, "grad_norm": 0.4599308669567108, "learning_rate": 0.0005538222418277064, "loss": 1.8741, "step": 16137 }, { "epoch": 0.54, "grad_norm": 0.46316343545913696, "learning_rate": 0.0005538166700136657, "loss": 1.8836, "step": 16138 }, { "epoch": 0.54, "grad_norm": 0.44860905408859253, "learning_rate": 0.000553811097891529, "loss": 1.8091, "step": 16139 }, { "epoch": 0.54, "grad_norm": 0.4242016673088074, "learning_rate": 0.0005538055254613028, "loss": 1.8486, "step": 16140 }, { "epoch": 0.54, "grad_norm": 0.4552871882915497, "learning_rate": 0.0005537999527229942, "loss": 1.8876, "step": 16141 }, { "epoch": 0.54, "grad_norm": 0.5144280791282654, "learning_rate": 0.00055379437967661, "loss": 1.8947, "step": 16142 }, { "epoch": 0.54, "grad_norm": 0.46200692653656006, "learning_rate": 0.0005537888063221567, "loss": 1.9921, "step": 16143 }, { "epoch": 0.54, "grad_norm": 0.4505518972873688, "learning_rate": 0.0005537832326596411, "loss": 1.9878, "step": 16144 }, { "epoch": 0.54, "grad_norm": 0.512563169002533, "learning_rate": 0.0005537776586890701, "loss": 1.8184, "step": 16145 }, { "epoch": 0.54, "grad_norm": 0.6799076199531555, "learning_rate": 0.0005537720844104504, "loss": 1.814, "step": 16146 }, { "epoch": 0.54, "grad_norm": 0.44265854358673096, "learning_rate": 0.0005537665098237889, "loss": 1.8965, "step": 16147 }, { "epoch": 0.54, "grad_norm": 0.4542366564273834, "learning_rate": 0.0005537609349290921, "loss": 1.9244, "step": 16148 }, { "epoch": 0.54, "grad_norm": 0.4667331278324127, "learning_rate": 0.000553755359726367, "loss": 1.8248, "step": 16149 }, { "epoch": 0.54, "grad_norm": 0.457381933927536, "learning_rate": 0.0005537497842156203, "loss": 1.8488, "step": 16150 }, { "epoch": 0.54, "grad_norm": 0.455451101064682, "learning_rate": 0.0005537442083968587, "loss": 1.8997, "step": 16151 }, { "epoch": 0.54, "grad_norm": 0.424048513174057, "learning_rate": 0.0005537386322700891, "loss": 1.8403, "step": 16152 }, { "epoch": 0.54, "grad_norm": 0.4687793552875519, "learning_rate": 0.0005537330558353181, "loss": 1.8235, "step": 16153 }, { "epoch": 0.54, "grad_norm": 0.4543423652648926, "learning_rate": 0.0005537274790925525, "loss": 1.8645, "step": 16154 }, { "epoch": 0.54, "grad_norm": 0.41160622239112854, "learning_rate": 0.0005537219020417993, "loss": 1.8126, "step": 16155 }, { "epoch": 0.54, "grad_norm": 0.420565128326416, "learning_rate": 0.0005537163246830649, "loss": 1.8025, "step": 16156 }, { "epoch": 0.54, "grad_norm": 0.4223277270793915, "learning_rate": 0.0005537107470163564, "loss": 1.8809, "step": 16157 }, { "epoch": 0.54, "grad_norm": 0.43231239914894104, "learning_rate": 0.0005537051690416804, "loss": 1.8234, "step": 16158 }, { "epoch": 0.54, "grad_norm": 0.42436784505844116, "learning_rate": 0.0005536995907590436, "loss": 1.9237, "step": 16159 }, { "epoch": 0.54, "grad_norm": 0.4278196096420288, "learning_rate": 0.0005536940121684531, "loss": 1.8794, "step": 16160 }, { "epoch": 0.54, "grad_norm": 0.4274156987667084, "learning_rate": 0.0005536884332699153, "loss": 1.807, "step": 16161 }, { "epoch": 0.54, "grad_norm": 0.4410364329814911, "learning_rate": 0.0005536828540634372, "loss": 1.8176, "step": 16162 }, { "epoch": 0.54, "grad_norm": 0.4204736351966858, "learning_rate": 0.0005536772745490254, "loss": 1.8718, "step": 16163 }, { "epoch": 0.54, "grad_norm": 0.42719075083732605, "learning_rate": 0.0005536716947266868, "loss": 1.8449, "step": 16164 }, { "epoch": 0.54, "grad_norm": 0.41759559512138367, "learning_rate": 0.0005536661145964281, "loss": 1.7813, "step": 16165 }, { "epoch": 0.54, "grad_norm": 0.45509275794029236, "learning_rate": 0.0005536605341582562, "loss": 1.8009, "step": 16166 }, { "epoch": 0.54, "grad_norm": 0.4476545453071594, "learning_rate": 0.0005536549534121779, "loss": 1.8517, "step": 16167 }, { "epoch": 0.54, "grad_norm": 0.41887736320495605, "learning_rate": 0.0005536493723581996, "loss": 1.9075, "step": 16168 }, { "epoch": 0.54, "grad_norm": 0.4449443221092224, "learning_rate": 0.0005536437909963285, "loss": 1.8132, "step": 16169 }, { "epoch": 0.54, "grad_norm": 0.45743200182914734, "learning_rate": 0.0005536382093265711, "loss": 1.8239, "step": 16170 }, { "epoch": 0.54, "grad_norm": 0.4605640470981598, "learning_rate": 0.0005536326273489344, "loss": 1.8717, "step": 16171 }, { "epoch": 0.54, "grad_norm": 0.42916688323020935, "learning_rate": 0.000553627045063425, "loss": 1.8343, "step": 16172 }, { "epoch": 0.54, "grad_norm": 0.4223845303058624, "learning_rate": 0.0005536214624700497, "loss": 1.8619, "step": 16173 }, { "epoch": 0.54, "grad_norm": 0.4105224907398224, "learning_rate": 0.0005536158795688154, "loss": 1.7979, "step": 16174 }, { "epoch": 0.54, "grad_norm": 0.4508291184902191, "learning_rate": 0.0005536102963597287, "loss": 1.8444, "step": 16175 }, { "epoch": 0.54, "grad_norm": 0.4196099638938904, "learning_rate": 0.0005536047128427966, "loss": 1.7983, "step": 16176 }, { "epoch": 0.54, "grad_norm": 0.6161402463912964, "learning_rate": 0.0005535991290180255, "loss": 1.8936, "step": 16177 }, { "epoch": 0.54, "grad_norm": 0.4227447211742401, "learning_rate": 0.0005535935448854227, "loss": 1.8695, "step": 16178 }, { "epoch": 0.54, "grad_norm": 0.44355785846710205, "learning_rate": 0.0005535879604449945, "loss": 1.7762, "step": 16179 }, { "epoch": 0.54, "grad_norm": 0.45351678133010864, "learning_rate": 0.000553582375696748, "loss": 1.8778, "step": 16180 }, { "epoch": 0.54, "grad_norm": 0.41634613275527954, "learning_rate": 0.0005535767906406898, "loss": 1.903, "step": 16181 }, { "epoch": 0.54, "grad_norm": 0.4229961037635803, "learning_rate": 0.0005535712052768268, "loss": 1.8756, "step": 16182 }, { "epoch": 0.54, "grad_norm": 0.42759454250335693, "learning_rate": 0.0005535656196051657, "loss": 1.8515, "step": 16183 }, { "epoch": 0.54, "grad_norm": 0.42275774478912354, "learning_rate": 0.0005535600336257132, "loss": 1.8145, "step": 16184 }, { "epoch": 0.54, "grad_norm": 0.4229567050933838, "learning_rate": 0.0005535544473384763, "loss": 1.9181, "step": 16185 }, { "epoch": 0.54, "grad_norm": 0.4264688491821289, "learning_rate": 0.0005535488607434616, "loss": 1.7896, "step": 16186 }, { "epoch": 0.54, "grad_norm": 0.42741262912750244, "learning_rate": 0.000553543273840676, "loss": 1.919, "step": 16187 }, { "epoch": 0.54, "grad_norm": 0.42077872157096863, "learning_rate": 0.0005535376866301262, "loss": 1.8579, "step": 16188 }, { "epoch": 0.54, "grad_norm": 0.41853266954421997, "learning_rate": 0.0005535320991118189, "loss": 1.824, "step": 16189 }, { "epoch": 0.54, "grad_norm": 0.4184311628341675, "learning_rate": 0.0005535265112857613, "loss": 1.8278, "step": 16190 }, { "epoch": 0.54, "grad_norm": 0.4367064833641052, "learning_rate": 0.0005535209231519596, "loss": 1.9084, "step": 16191 }, { "epoch": 0.54, "grad_norm": 0.4238082468509674, "learning_rate": 0.0005535153347104208, "loss": 1.8081, "step": 16192 }, { "epoch": 0.54, "grad_norm": 0.5972676873207092, "learning_rate": 0.0005535097459611519, "loss": 1.8341, "step": 16193 }, { "epoch": 0.54, "grad_norm": 0.43049338459968567, "learning_rate": 0.0005535041569041594, "loss": 1.9209, "step": 16194 }, { "epoch": 0.54, "grad_norm": 0.43482592701911926, "learning_rate": 0.0005534985675394504, "loss": 1.8019, "step": 16195 }, { "epoch": 0.54, "grad_norm": 0.43254175782203674, "learning_rate": 0.0005534929778670313, "loss": 1.8053, "step": 16196 }, { "epoch": 0.54, "grad_norm": 0.4288894832134247, "learning_rate": 0.0005534873878869091, "loss": 1.7917, "step": 16197 }, { "epoch": 0.54, "grad_norm": 0.4404929578304291, "learning_rate": 0.0005534817975990907, "loss": 1.7388, "step": 16198 }, { "epoch": 0.54, "grad_norm": 0.4360426962375641, "learning_rate": 0.0005534762070035826, "loss": 1.8561, "step": 16199 }, { "epoch": 0.54, "grad_norm": 0.43743664026260376, "learning_rate": 0.0005534706161003918, "loss": 1.9429, "step": 16200 }, { "epoch": 0.54, "grad_norm": 0.43507468700408936, "learning_rate": 0.000553465024889525, "loss": 1.8744, "step": 16201 }, { "epoch": 0.54, "grad_norm": 0.43938374519348145, "learning_rate": 0.000553459433370989, "loss": 1.8336, "step": 16202 }, { "epoch": 0.54, "grad_norm": 0.41855981945991516, "learning_rate": 0.0005534538415447905, "loss": 1.822, "step": 16203 }, { "epoch": 0.54, "grad_norm": 0.42509958148002625, "learning_rate": 0.0005534482494109365, "loss": 1.8503, "step": 16204 }, { "epoch": 0.54, "grad_norm": 0.4463983178138733, "learning_rate": 0.0005534426569694337, "loss": 1.7386, "step": 16205 }, { "epoch": 0.54, "grad_norm": 0.4310453236103058, "learning_rate": 0.0005534370642202887, "loss": 1.8858, "step": 16206 }, { "epoch": 0.54, "grad_norm": 0.42794114351272583, "learning_rate": 0.0005534314711635086, "loss": 1.8646, "step": 16207 }, { "epoch": 0.54, "grad_norm": 0.42887037992477417, "learning_rate": 0.0005534258777991, "loss": 1.7727, "step": 16208 }, { "epoch": 0.54, "grad_norm": 0.42892277240753174, "learning_rate": 0.0005534202841270697, "loss": 1.8141, "step": 16209 }, { "epoch": 0.54, "grad_norm": 0.4323360323905945, "learning_rate": 0.0005534146901474246, "loss": 1.8657, "step": 16210 }, { "epoch": 0.54, "grad_norm": 0.42825692892074585, "learning_rate": 0.0005534090958601712, "loss": 1.9017, "step": 16211 }, { "epoch": 0.54, "grad_norm": 0.4291509687900543, "learning_rate": 0.0005534035012653167, "loss": 1.8572, "step": 16212 }, { "epoch": 0.54, "grad_norm": 0.44972530007362366, "learning_rate": 0.0005533979063628675, "loss": 1.9183, "step": 16213 }, { "epoch": 0.54, "grad_norm": 0.4198296368122101, "learning_rate": 0.0005533923111528308, "loss": 1.8534, "step": 16214 }, { "epoch": 0.54, "grad_norm": 0.4324590265750885, "learning_rate": 0.000553386715635213, "loss": 1.7697, "step": 16215 }, { "epoch": 0.54, "grad_norm": 0.426482617855072, "learning_rate": 0.0005533811198100211, "loss": 1.8875, "step": 16216 }, { "epoch": 0.54, "grad_norm": 0.4217372536659241, "learning_rate": 0.000553375523677262, "loss": 1.7851, "step": 16217 }, { "epoch": 0.54, "grad_norm": 0.43337684869766235, "learning_rate": 0.0005533699272369421, "loss": 1.8452, "step": 16218 }, { "epoch": 0.54, "grad_norm": 0.41735419631004333, "learning_rate": 0.0005533643304890686, "loss": 1.78, "step": 16219 }, { "epoch": 0.54, "grad_norm": 0.4223618507385254, "learning_rate": 0.0005533587334336481, "loss": 1.8734, "step": 16220 }, { "epoch": 0.54, "grad_norm": 0.44651761651039124, "learning_rate": 0.0005533531360706874, "loss": 1.9023, "step": 16221 }, { "epoch": 0.54, "grad_norm": 0.43050050735473633, "learning_rate": 0.0005533475384001934, "loss": 1.8813, "step": 16222 }, { "epoch": 0.54, "grad_norm": 0.41889336705207825, "learning_rate": 0.0005533419404221728, "loss": 1.9021, "step": 16223 }, { "epoch": 0.54, "grad_norm": 0.4131299555301666, "learning_rate": 0.0005533363421366324, "loss": 1.8591, "step": 16224 }, { "epoch": 0.54, "grad_norm": 0.44769179821014404, "learning_rate": 0.0005533307435435789, "loss": 1.7991, "step": 16225 }, { "epoch": 0.54, "grad_norm": 0.4250531792640686, "learning_rate": 0.0005533251446430194, "loss": 1.8459, "step": 16226 }, { "epoch": 0.54, "grad_norm": 0.4265344440937042, "learning_rate": 0.0005533195454349603, "loss": 1.8524, "step": 16227 }, { "epoch": 0.54, "grad_norm": 0.44514405727386475, "learning_rate": 0.0005533139459194086, "loss": 1.8322, "step": 16228 }, { "epoch": 0.54, "grad_norm": 0.4297863245010376, "learning_rate": 0.0005533083460963713, "loss": 1.8488, "step": 16229 }, { "epoch": 0.54, "grad_norm": 0.4239046275615692, "learning_rate": 0.0005533027459658548, "loss": 1.7982, "step": 16230 }, { "epoch": 0.54, "grad_norm": 0.45385587215423584, "learning_rate": 0.0005532971455278662, "loss": 1.8769, "step": 16231 }, { "epoch": 0.54, "grad_norm": 0.430410772562027, "learning_rate": 0.0005532915447824121, "loss": 1.8582, "step": 16232 }, { "epoch": 0.54, "grad_norm": 0.45193758606910706, "learning_rate": 0.0005532859437294995, "loss": 1.9376, "step": 16233 }, { "epoch": 0.54, "grad_norm": 0.42633819580078125, "learning_rate": 0.000553280342369135, "loss": 1.809, "step": 16234 }, { "epoch": 0.54, "grad_norm": 0.4321977198123932, "learning_rate": 0.0005532747407013254, "loss": 1.8564, "step": 16235 }, { "epoch": 0.54, "grad_norm": 0.44531872868537903, "learning_rate": 0.0005532691387260778, "loss": 1.8735, "step": 16236 }, { "epoch": 0.54, "grad_norm": 0.44671979546546936, "learning_rate": 0.0005532635364433987, "loss": 1.864, "step": 16237 }, { "epoch": 0.54, "grad_norm": 0.4283685088157654, "learning_rate": 0.0005532579338532949, "loss": 1.8781, "step": 16238 }, { "epoch": 0.54, "grad_norm": 0.45508620142936707, "learning_rate": 0.0005532523309557733, "loss": 1.871, "step": 16239 }, { "epoch": 0.54, "grad_norm": 0.44951391220092773, "learning_rate": 0.0005532467277508408, "loss": 1.9093, "step": 16240 }, { "epoch": 0.54, "grad_norm": 0.42454034090042114, "learning_rate": 0.0005532411242385039, "loss": 1.7767, "step": 16241 }, { "epoch": 0.54, "grad_norm": 0.4263724386692047, "learning_rate": 0.0005532355204187697, "loss": 1.8925, "step": 16242 }, { "epoch": 0.54, "grad_norm": 0.4385963976383209, "learning_rate": 0.0005532299162916449, "loss": 1.8533, "step": 16243 }, { "epoch": 0.54, "grad_norm": 0.45934179425239563, "learning_rate": 0.0005532243118571363, "loss": 1.9002, "step": 16244 }, { "epoch": 0.54, "grad_norm": 0.4153272807598114, "learning_rate": 0.0005532187071152506, "loss": 1.8546, "step": 16245 }, { "epoch": 0.54, "grad_norm": 0.42895379662513733, "learning_rate": 0.0005532131020659949, "loss": 1.7892, "step": 16246 }, { "epoch": 0.54, "grad_norm": 0.42936190962791443, "learning_rate": 0.0005532074967093757, "loss": 1.8657, "step": 16247 }, { "epoch": 0.54, "grad_norm": 0.44311121106147766, "learning_rate": 0.0005532018910453998, "loss": 1.8872, "step": 16248 }, { "epoch": 0.54, "grad_norm": 0.4359024167060852, "learning_rate": 0.0005531962850740742, "loss": 1.8532, "step": 16249 }, { "epoch": 0.54, "grad_norm": 0.428803414106369, "learning_rate": 0.0005531906787954056, "loss": 1.8125, "step": 16250 }, { "epoch": 0.54, "grad_norm": 0.43963298201560974, "learning_rate": 0.000553185072209401, "loss": 1.9263, "step": 16251 }, { "epoch": 0.54, "grad_norm": 0.4237551987171173, "learning_rate": 0.0005531794653160668, "loss": 1.8228, "step": 16252 }, { "epoch": 0.54, "grad_norm": 0.4189508855342865, "learning_rate": 0.00055317385811541, "loss": 1.9009, "step": 16253 }, { "epoch": 0.54, "grad_norm": 0.4296872317790985, "learning_rate": 0.0005531682506074376, "loss": 1.8641, "step": 16254 }, { "epoch": 0.54, "grad_norm": 0.41990217566490173, "learning_rate": 0.0005531626427921561, "loss": 1.8759, "step": 16255 }, { "epoch": 0.54, "grad_norm": 0.42125698924064636, "learning_rate": 0.0005531570346695725, "loss": 1.8559, "step": 16256 }, { "epoch": 0.54, "grad_norm": 0.42641353607177734, "learning_rate": 0.0005531514262396937, "loss": 1.8468, "step": 16257 }, { "epoch": 0.54, "grad_norm": 0.4493384063243866, "learning_rate": 0.0005531458175025263, "loss": 1.9042, "step": 16258 }, { "epoch": 0.54, "grad_norm": 0.43284377455711365, "learning_rate": 0.0005531402084580771, "loss": 1.8705, "step": 16259 }, { "epoch": 0.54, "grad_norm": 0.4431641399860382, "learning_rate": 0.000553134599106353, "loss": 1.8087, "step": 16260 }, { "epoch": 0.54, "grad_norm": 0.4226352274417877, "learning_rate": 0.0005531289894473608, "loss": 1.8407, "step": 16261 }, { "epoch": 0.54, "grad_norm": 0.42255476117134094, "learning_rate": 0.0005531233794811074, "loss": 1.8459, "step": 16262 }, { "epoch": 0.54, "grad_norm": 0.43620774149894714, "learning_rate": 0.0005531177692075994, "loss": 1.8775, "step": 16263 }, { "epoch": 0.54, "grad_norm": 0.44349405169487, "learning_rate": 0.0005531121586268438, "loss": 1.8365, "step": 16264 }, { "epoch": 0.54, "grad_norm": 0.4299011826515198, "learning_rate": 0.0005531065477388473, "loss": 1.9395, "step": 16265 }, { "epoch": 0.54, "grad_norm": 0.4246847629547119, "learning_rate": 0.0005531009365436167, "loss": 1.8815, "step": 16266 }, { "epoch": 0.54, "grad_norm": 0.5550016164779663, "learning_rate": 0.0005530953250411589, "loss": 1.8115, "step": 16267 }, { "epoch": 0.54, "grad_norm": 0.5559484362602234, "learning_rate": 0.0005530897132314807, "loss": 1.8782, "step": 16268 }, { "epoch": 0.54, "grad_norm": 0.4481920599937439, "learning_rate": 0.0005530841011145888, "loss": 1.9055, "step": 16269 }, { "epoch": 0.54, "grad_norm": 0.4384428858757019, "learning_rate": 0.0005530784886904901, "loss": 1.8356, "step": 16270 }, { "epoch": 0.54, "grad_norm": 0.44155868887901306, "learning_rate": 0.0005530728759591914, "loss": 1.8206, "step": 16271 }, { "epoch": 0.54, "grad_norm": 0.43569216132164, "learning_rate": 0.0005530672629206996, "loss": 1.8583, "step": 16272 }, { "epoch": 0.54, "grad_norm": 0.46995148062705994, "learning_rate": 0.0005530616495750215, "loss": 1.8649, "step": 16273 }, { "epoch": 0.54, "grad_norm": 0.4349820613861084, "learning_rate": 0.0005530560359221637, "loss": 1.8219, "step": 16274 }, { "epoch": 0.54, "grad_norm": 0.42252489924430847, "learning_rate": 0.0005530504219621331, "loss": 1.8797, "step": 16275 }, { "epoch": 0.54, "grad_norm": 0.42655065655708313, "learning_rate": 0.0005530448076949368, "loss": 1.8116, "step": 16276 }, { "epoch": 0.54, "grad_norm": 0.4508638083934784, "learning_rate": 0.0005530391931205812, "loss": 1.8555, "step": 16277 }, { "epoch": 0.54, "grad_norm": 0.432577908039093, "learning_rate": 0.0005530335782390734, "loss": 1.8554, "step": 16278 }, { "epoch": 0.54, "grad_norm": 0.4380548596382141, "learning_rate": 0.00055302796305042, "loss": 1.8679, "step": 16279 }, { "epoch": 0.54, "grad_norm": 0.4271186888217926, "learning_rate": 0.000553022347554628, "loss": 1.8541, "step": 16280 }, { "epoch": 0.54, "grad_norm": 0.42739036679267883, "learning_rate": 0.0005530167317517042, "loss": 1.8987, "step": 16281 }, { "epoch": 0.54, "grad_norm": 0.4271714389324188, "learning_rate": 0.0005530111156416553, "loss": 1.7817, "step": 16282 }, { "epoch": 0.54, "grad_norm": 0.4388083517551422, "learning_rate": 0.0005530054992244882, "loss": 1.9186, "step": 16283 }, { "epoch": 0.54, "grad_norm": 0.4393436312675476, "learning_rate": 0.0005529998825002096, "loss": 1.8578, "step": 16284 }, { "epoch": 0.54, "grad_norm": 0.4377504289150238, "learning_rate": 0.0005529942654688266, "loss": 1.864, "step": 16285 }, { "epoch": 0.54, "grad_norm": 0.4451330900192261, "learning_rate": 0.0005529886481303458, "loss": 1.8405, "step": 16286 }, { "epoch": 0.54, "grad_norm": 0.4220176637172699, "learning_rate": 0.0005529830304847739, "loss": 1.8781, "step": 16287 }, { "epoch": 0.54, "grad_norm": 0.4386489987373352, "learning_rate": 0.0005529774125321181, "loss": 1.9154, "step": 16288 }, { "epoch": 0.54, "grad_norm": 0.433247834444046, "learning_rate": 0.0005529717942723849, "loss": 1.8891, "step": 16289 }, { "epoch": 0.54, "grad_norm": 0.41525399684906006, "learning_rate": 0.0005529661757055813, "loss": 1.894, "step": 16290 }, { "epoch": 0.54, "grad_norm": 0.41918712854385376, "learning_rate": 0.0005529605568317139, "loss": 1.8267, "step": 16291 }, { "epoch": 0.54, "grad_norm": 0.4292393624782562, "learning_rate": 0.0005529549376507897, "loss": 1.883, "step": 16292 }, { "epoch": 0.54, "grad_norm": 0.4260270893573761, "learning_rate": 0.0005529493181628155, "loss": 1.8883, "step": 16293 }, { "epoch": 0.54, "grad_norm": 0.42390263080596924, "learning_rate": 0.0005529436983677981, "loss": 1.8258, "step": 16294 }, { "epoch": 0.54, "grad_norm": 0.4128952920436859, "learning_rate": 0.0005529380782657443, "loss": 1.858, "step": 16295 }, { "epoch": 0.54, "grad_norm": 0.4315881133079529, "learning_rate": 0.0005529324578566609, "loss": 1.8885, "step": 16296 }, { "epoch": 0.54, "grad_norm": 0.421239972114563, "learning_rate": 0.0005529268371405548, "loss": 1.8233, "step": 16297 }, { "epoch": 0.54, "grad_norm": 0.4277915954589844, "learning_rate": 0.0005529212161174328, "loss": 1.8593, "step": 16298 }, { "epoch": 0.54, "grad_norm": 0.42756062746047974, "learning_rate": 0.0005529155947873017, "loss": 1.8435, "step": 16299 }, { "epoch": 0.54, "grad_norm": 0.4262727200984955, "learning_rate": 0.0005529099731501683, "loss": 1.856, "step": 16300 }, { "epoch": 0.54, "grad_norm": 0.7648296356201172, "learning_rate": 0.0005529043512060395, "loss": 1.8638, "step": 16301 }, { "epoch": 0.54, "grad_norm": 0.44577881693840027, "learning_rate": 0.0005528987289549222, "loss": 1.8702, "step": 16302 }, { "epoch": 0.54, "grad_norm": 0.4605843126773834, "learning_rate": 0.0005528931063968229, "loss": 1.8142, "step": 16303 }, { "epoch": 0.54, "grad_norm": 0.4436933398246765, "learning_rate": 0.0005528874835317488, "loss": 1.7983, "step": 16304 }, { "epoch": 0.54, "grad_norm": 0.42162254452705383, "learning_rate": 0.0005528818603597064, "loss": 1.8295, "step": 16305 }, { "epoch": 0.54, "grad_norm": 0.4550192654132843, "learning_rate": 0.0005528762368807027, "loss": 1.9403, "step": 16306 }, { "epoch": 0.54, "grad_norm": 0.43457305431365967, "learning_rate": 0.0005528706130947446, "loss": 1.8464, "step": 16307 }, { "epoch": 0.54, "grad_norm": 0.4404931962490082, "learning_rate": 0.0005528649890018388, "loss": 1.7981, "step": 16308 }, { "epoch": 0.54, "grad_norm": 0.42796647548675537, "learning_rate": 0.0005528593646019921, "loss": 1.8586, "step": 16309 }, { "epoch": 0.54, "grad_norm": 0.44262030720710754, "learning_rate": 0.0005528537398952116, "loss": 1.8937, "step": 16310 }, { "epoch": 0.54, "grad_norm": 0.4249066710472107, "learning_rate": 0.0005528481148815036, "loss": 1.7587, "step": 16311 }, { "epoch": 0.54, "grad_norm": 0.42947760224342346, "learning_rate": 0.0005528424895608755, "loss": 1.86, "step": 16312 }, { "epoch": 0.54, "grad_norm": 0.43902432918548584, "learning_rate": 0.0005528368639333337, "loss": 1.8727, "step": 16313 }, { "epoch": 0.54, "grad_norm": 0.44768235087394714, "learning_rate": 0.0005528312379988853, "loss": 1.9099, "step": 16314 }, { "epoch": 0.54, "grad_norm": 0.42277300357818604, "learning_rate": 0.000552825611757537, "loss": 1.8949, "step": 16315 }, { "epoch": 0.54, "grad_norm": 0.4149271845817566, "learning_rate": 0.0005528199852092957, "loss": 1.8232, "step": 16316 }, { "epoch": 0.54, "grad_norm": 0.44148510694503784, "learning_rate": 0.0005528143583541682, "loss": 1.8695, "step": 16317 }, { "epoch": 0.54, "grad_norm": 0.43907904624938965, "learning_rate": 0.0005528087311921612, "loss": 1.8818, "step": 16318 }, { "epoch": 0.54, "grad_norm": 0.4772639572620392, "learning_rate": 0.0005528031037232817, "loss": 1.806, "step": 16319 }, { "epoch": 0.54, "grad_norm": 0.4584832489490509, "learning_rate": 0.0005527974759475365, "loss": 1.9361, "step": 16320 }, { "epoch": 0.54, "grad_norm": 0.4300011396408081, "learning_rate": 0.0005527918478649324, "loss": 1.8111, "step": 16321 }, { "epoch": 0.54, "grad_norm": 0.4244825839996338, "learning_rate": 0.0005527862194754763, "loss": 1.8616, "step": 16322 }, { "epoch": 0.54, "grad_norm": 0.442483514547348, "learning_rate": 0.000552780590779175, "loss": 1.877, "step": 16323 }, { "epoch": 0.54, "grad_norm": 0.42433154582977295, "learning_rate": 0.0005527749617760352, "loss": 1.8402, "step": 16324 }, { "epoch": 0.54, "grad_norm": 0.4275255799293518, "learning_rate": 0.000552769332466064, "loss": 1.7582, "step": 16325 }, { "epoch": 0.54, "grad_norm": 0.43267685174942017, "learning_rate": 0.0005527637028492678, "loss": 1.941, "step": 16326 }, { "epoch": 0.54, "grad_norm": 0.4209466576576233, "learning_rate": 0.0005527580729256539, "loss": 1.8955, "step": 16327 }, { "epoch": 0.54, "grad_norm": 0.4237968921661377, "learning_rate": 0.0005527524426952289, "loss": 1.8647, "step": 16328 }, { "epoch": 0.54, "grad_norm": 0.41957738995552063, "learning_rate": 0.0005527468121579997, "loss": 1.8827, "step": 16329 }, { "epoch": 0.54, "grad_norm": 0.4315931499004364, "learning_rate": 0.0005527411813139731, "loss": 1.7521, "step": 16330 }, { "epoch": 0.54, "grad_norm": 0.4191472828388214, "learning_rate": 0.000552735550163156, "loss": 1.9004, "step": 16331 }, { "epoch": 0.54, "grad_norm": 0.44324061274528503, "learning_rate": 0.0005527299187055552, "loss": 1.9073, "step": 16332 }, { "epoch": 0.54, "grad_norm": 0.4356357455253601, "learning_rate": 0.0005527242869411773, "loss": 1.8606, "step": 16333 }, { "epoch": 0.54, "grad_norm": 0.4411919116973877, "learning_rate": 0.0005527186548700295, "loss": 1.8429, "step": 16334 }, { "epoch": 0.54, "grad_norm": 0.4600454866886139, "learning_rate": 0.0005527130224921184, "loss": 1.8407, "step": 16335 }, { "epoch": 0.54, "grad_norm": 0.4685693383216858, "learning_rate": 0.0005527073898074511, "loss": 1.8521, "step": 16336 }, { "epoch": 0.54, "grad_norm": 0.4248737394809723, "learning_rate": 0.0005527017568160342, "loss": 1.8904, "step": 16337 }, { "epoch": 0.54, "grad_norm": 0.43237701058387756, "learning_rate": 0.0005526961235178745, "loss": 1.8565, "step": 16338 }, { "epoch": 0.54, "grad_norm": 0.41793179512023926, "learning_rate": 0.000552690489912979, "loss": 1.8097, "step": 16339 }, { "epoch": 0.54, "grad_norm": 0.446963369846344, "learning_rate": 0.0005526848560013546, "loss": 1.8462, "step": 16340 }, { "epoch": 0.54, "grad_norm": 0.43636783957481384, "learning_rate": 0.0005526792217830079, "loss": 1.8368, "step": 16341 }, { "epoch": 0.54, "grad_norm": 0.4163598418235779, "learning_rate": 0.0005526735872579458, "loss": 1.9073, "step": 16342 }, { "epoch": 0.54, "grad_norm": 0.43330779671669006, "learning_rate": 0.0005526679524261753, "loss": 1.7924, "step": 16343 }, { "epoch": 0.54, "grad_norm": 0.43289434909820557, "learning_rate": 0.000552662317287703, "loss": 1.807, "step": 16344 }, { "epoch": 0.54, "grad_norm": 0.4249846637248993, "learning_rate": 0.000552656681842536, "loss": 1.8697, "step": 16345 }, { "epoch": 0.54, "grad_norm": 0.4246293902397156, "learning_rate": 0.000552651046090681, "loss": 1.8557, "step": 16346 }, { "epoch": 0.54, "grad_norm": 0.43396562337875366, "learning_rate": 0.0005526454100321449, "loss": 1.8438, "step": 16347 }, { "epoch": 0.54, "grad_norm": 0.429343044757843, "learning_rate": 0.0005526397736669344, "loss": 1.8841, "step": 16348 }, { "epoch": 0.54, "grad_norm": 0.41826149821281433, "learning_rate": 0.0005526341369950564, "loss": 1.8563, "step": 16349 }, { "epoch": 0.54, "grad_norm": 0.4174599051475525, "learning_rate": 0.000552628500016518, "loss": 1.8183, "step": 16350 }, { "epoch": 0.54, "grad_norm": 0.4666560888290405, "learning_rate": 0.0005526228627313255, "loss": 1.7837, "step": 16351 }, { "epoch": 0.54, "grad_norm": 0.42485281825065613, "learning_rate": 0.0005526172251394863, "loss": 1.879, "step": 16352 }, { "epoch": 0.54, "grad_norm": 0.4298616647720337, "learning_rate": 0.000552611587241007, "loss": 1.8079, "step": 16353 }, { "epoch": 0.54, "grad_norm": 0.4510147273540497, "learning_rate": 0.0005526059490358944, "loss": 1.8938, "step": 16354 }, { "epoch": 0.54, "grad_norm": 0.4408362805843353, "learning_rate": 0.0005526003105241554, "loss": 1.9632, "step": 16355 }, { "epoch": 0.54, "grad_norm": 0.4302745759487152, "learning_rate": 0.0005525946717057969, "loss": 1.8642, "step": 16356 }, { "epoch": 0.54, "grad_norm": 0.4443663954734802, "learning_rate": 0.0005525890325808257, "loss": 1.8413, "step": 16357 }, { "epoch": 0.54, "grad_norm": 0.4330451190471649, "learning_rate": 0.0005525833931492484, "loss": 1.9028, "step": 16358 }, { "epoch": 0.54, "grad_norm": 0.7124127745628357, "learning_rate": 0.0005525777534110723, "loss": 1.8737, "step": 16359 }, { "epoch": 0.54, "grad_norm": 0.42859819531440735, "learning_rate": 0.000552572113366304, "loss": 1.7877, "step": 16360 }, { "epoch": 0.54, "grad_norm": 0.42146068811416626, "learning_rate": 0.0005525664730149503, "loss": 1.9072, "step": 16361 }, { "epoch": 0.54, "grad_norm": 0.4371890723705292, "learning_rate": 0.0005525608323570182, "loss": 1.8601, "step": 16362 }, { "epoch": 0.54, "grad_norm": 0.4399346709251404, "learning_rate": 0.0005525551913925144, "loss": 1.781, "step": 16363 }, { "epoch": 0.54, "grad_norm": 0.43113094568252563, "learning_rate": 0.0005525495501214458, "loss": 1.8307, "step": 16364 }, { "epoch": 0.54, "grad_norm": 0.41527870297431946, "learning_rate": 0.0005525439085438193, "loss": 1.8832, "step": 16365 }, { "epoch": 0.54, "grad_norm": 0.4220277667045593, "learning_rate": 0.0005525382666596416, "loss": 1.8921, "step": 16366 }, { "epoch": 0.54, "grad_norm": 0.4656798541545868, "learning_rate": 0.0005525326244689198, "loss": 1.888, "step": 16367 }, { "epoch": 0.54, "grad_norm": 0.4267946183681488, "learning_rate": 0.0005525269819716606, "loss": 1.8688, "step": 16368 }, { "epoch": 0.54, "grad_norm": 0.4253976345062256, "learning_rate": 0.0005525213391678708, "loss": 1.791, "step": 16369 }, { "epoch": 0.54, "grad_norm": 0.44493886828422546, "learning_rate": 0.0005525156960575572, "loss": 1.869, "step": 16370 }, { "epoch": 0.54, "grad_norm": 0.4675128757953644, "learning_rate": 0.0005525100526407269, "loss": 1.8769, "step": 16371 }, { "epoch": 0.54, "grad_norm": 0.4341355562210083, "learning_rate": 0.0005525044089173866, "loss": 1.8075, "step": 16372 }, { "epoch": 0.54, "grad_norm": 0.4492872953414917, "learning_rate": 0.0005524987648875432, "loss": 1.8261, "step": 16373 }, { "epoch": 0.54, "grad_norm": 0.4489341974258423, "learning_rate": 0.0005524931205512033, "loss": 1.8443, "step": 16374 }, { "epoch": 0.54, "grad_norm": 0.434299498796463, "learning_rate": 0.0005524874759083741, "loss": 1.8414, "step": 16375 }, { "epoch": 0.54, "grad_norm": 0.4113461375236511, "learning_rate": 0.0005524818309590623, "loss": 1.8439, "step": 16376 }, { "epoch": 0.54, "grad_norm": 0.445572167634964, "learning_rate": 0.0005524761857032747, "loss": 1.8939, "step": 16377 }, { "epoch": 0.54, "grad_norm": 0.42325419187545776, "learning_rate": 0.0005524705401410183, "loss": 1.8966, "step": 16378 }, { "epoch": 0.54, "grad_norm": 0.41716596484184265, "learning_rate": 0.0005524648942722997, "loss": 1.7668, "step": 16379 }, { "epoch": 0.54, "grad_norm": 0.4174812436103821, "learning_rate": 0.000552459248097126, "loss": 1.8909, "step": 16380 }, { "epoch": 0.54, "grad_norm": 0.43932268023490906, "learning_rate": 0.000552453601615504, "loss": 1.8369, "step": 16381 }, { "epoch": 0.55, "grad_norm": 0.4294159710407257, "learning_rate": 0.0005524479548274405, "loss": 1.8707, "step": 16382 }, { "epoch": 0.55, "grad_norm": 0.4251810908317566, "learning_rate": 0.0005524423077329424, "loss": 1.8724, "step": 16383 }, { "epoch": 0.55, "grad_norm": 0.41080713272094727, "learning_rate": 0.0005524366603320166, "loss": 1.7537, "step": 16384 }, { "epoch": 0.55, "grad_norm": 0.4504837095737457, "learning_rate": 0.0005524310126246698, "loss": 1.8576, "step": 16385 }, { "epoch": 0.55, "grad_norm": 0.4169269800186157, "learning_rate": 0.000552425364610909, "loss": 1.838, "step": 16386 }, { "epoch": 0.55, "grad_norm": 0.42548346519470215, "learning_rate": 0.0005524197162907409, "loss": 1.8673, "step": 16387 }, { "epoch": 0.55, "grad_norm": 0.43287965655326843, "learning_rate": 0.0005524140676641725, "loss": 1.8203, "step": 16388 }, { "epoch": 0.55, "grad_norm": 0.4389662444591522, "learning_rate": 0.0005524084187312107, "loss": 1.9151, "step": 16389 }, { "epoch": 0.55, "grad_norm": 0.449510782957077, "learning_rate": 0.0005524027694918622, "loss": 1.892, "step": 16390 }, { "epoch": 0.55, "grad_norm": 0.43729692697525024, "learning_rate": 0.0005523971199461339, "loss": 1.8761, "step": 16391 }, { "epoch": 0.55, "grad_norm": 0.4237785041332245, "learning_rate": 0.0005523914700940327, "loss": 1.9237, "step": 16392 }, { "epoch": 0.55, "grad_norm": 0.5481802821159363, "learning_rate": 0.0005523858199355655, "loss": 1.8493, "step": 16393 }, { "epoch": 0.55, "grad_norm": 0.4610329866409302, "learning_rate": 0.0005523801694707391, "loss": 1.927, "step": 16394 }, { "epoch": 0.55, "grad_norm": 0.4274066090583801, "learning_rate": 0.0005523745186995603, "loss": 1.791, "step": 16395 }, { "epoch": 0.55, "grad_norm": 0.4517856538295746, "learning_rate": 0.000552368867622036, "loss": 1.8773, "step": 16396 }, { "epoch": 0.55, "grad_norm": 0.43378975987434387, "learning_rate": 0.0005523632162381731, "loss": 1.9131, "step": 16397 }, { "epoch": 0.55, "grad_norm": 0.437078058719635, "learning_rate": 0.0005523575645479785, "loss": 1.8602, "step": 16398 }, { "epoch": 0.55, "grad_norm": 0.43307092785835266, "learning_rate": 0.000552351912551459, "loss": 1.8204, "step": 16399 }, { "epoch": 0.55, "grad_norm": 0.4259168207645416, "learning_rate": 0.0005523462602486214, "loss": 1.7557, "step": 16400 }, { "epoch": 0.55, "grad_norm": 0.43248289823532104, "learning_rate": 0.0005523406076394726, "loss": 1.8578, "step": 16401 }, { "epoch": 0.55, "grad_norm": 0.4335068166255951, "learning_rate": 0.0005523349547240195, "loss": 1.9147, "step": 16402 }, { "epoch": 0.55, "grad_norm": 0.45339125394821167, "learning_rate": 0.0005523293015022691, "loss": 1.8102, "step": 16403 }, { "epoch": 0.55, "grad_norm": 0.44239452481269836, "learning_rate": 0.000552323647974228, "loss": 1.7586, "step": 16404 }, { "epoch": 0.55, "grad_norm": 0.4278069734573364, "learning_rate": 0.0005523179941399031, "loss": 1.8299, "step": 16405 }, { "epoch": 0.55, "grad_norm": 0.43930673599243164, "learning_rate": 0.0005523123399993013, "loss": 1.7986, "step": 16406 }, { "epoch": 0.55, "grad_norm": 0.4296150207519531, "learning_rate": 0.0005523066855524295, "loss": 1.8968, "step": 16407 }, { "epoch": 0.55, "grad_norm": 0.4372771382331848, "learning_rate": 0.0005523010307992947, "loss": 1.8995, "step": 16408 }, { "epoch": 0.55, "grad_norm": 0.43054646253585815, "learning_rate": 0.0005522953757399036, "loss": 1.8637, "step": 16409 }, { "epoch": 0.55, "grad_norm": 0.5577795505523682, "learning_rate": 0.000552289720374263, "loss": 1.7904, "step": 16410 }, { "epoch": 0.55, "grad_norm": 0.4345437288284302, "learning_rate": 0.00055228406470238, "loss": 1.8116, "step": 16411 }, { "epoch": 0.55, "grad_norm": 0.43476125597953796, "learning_rate": 0.0005522784087242612, "loss": 1.8746, "step": 16412 }, { "epoch": 0.55, "grad_norm": 0.44301488995552063, "learning_rate": 0.0005522727524399136, "loss": 1.9346, "step": 16413 }, { "epoch": 0.55, "grad_norm": 0.4307972490787506, "learning_rate": 0.000552267095849344, "loss": 1.8441, "step": 16414 }, { "epoch": 0.55, "grad_norm": 0.41941338777542114, "learning_rate": 0.0005522614389525594, "loss": 1.8342, "step": 16415 }, { "epoch": 0.55, "grad_norm": 0.4448431432247162, "learning_rate": 0.0005522557817495665, "loss": 1.799, "step": 16416 }, { "epoch": 0.55, "grad_norm": 0.43157336115837097, "learning_rate": 0.0005522501242403723, "loss": 1.839, "step": 16417 }, { "epoch": 0.55, "grad_norm": 0.42184412479400635, "learning_rate": 0.0005522444664249836, "loss": 1.8926, "step": 16418 }, { "epoch": 0.55, "grad_norm": 0.4240396320819855, "learning_rate": 0.0005522388083034073, "loss": 1.8295, "step": 16419 }, { "epoch": 0.55, "grad_norm": 0.4230950176715851, "learning_rate": 0.0005522331498756503, "loss": 1.824, "step": 16420 }, { "epoch": 0.55, "grad_norm": 0.42803654074668884, "learning_rate": 0.0005522274911417194, "loss": 1.8322, "step": 16421 }, { "epoch": 0.55, "grad_norm": 0.43814200162887573, "learning_rate": 0.0005522218321016213, "loss": 1.8105, "step": 16422 }, { "epoch": 0.55, "grad_norm": 0.4218432307243347, "learning_rate": 0.0005522161727553633, "loss": 1.8015, "step": 16423 }, { "epoch": 0.55, "grad_norm": 0.43056464195251465, "learning_rate": 0.000552210513102952, "loss": 1.8808, "step": 16424 }, { "epoch": 0.55, "grad_norm": 0.4255605936050415, "learning_rate": 0.0005522048531443943, "loss": 1.9217, "step": 16425 }, { "epoch": 0.55, "grad_norm": 0.4162229895591736, "learning_rate": 0.0005521991928796969, "loss": 1.8156, "step": 16426 }, { "epoch": 0.55, "grad_norm": 0.4354376494884491, "learning_rate": 0.000552193532308867, "loss": 1.9411, "step": 16427 }, { "epoch": 0.55, "grad_norm": 0.4302966296672821, "learning_rate": 0.0005521878714319112, "loss": 1.8971, "step": 16428 }, { "epoch": 0.55, "grad_norm": 0.4469040036201477, "learning_rate": 0.0005521822102488366, "loss": 1.8697, "step": 16429 }, { "epoch": 0.55, "grad_norm": 0.4416543245315552, "learning_rate": 0.00055217654875965, "loss": 1.7377, "step": 16430 }, { "epoch": 0.55, "grad_norm": 0.4342544674873352, "learning_rate": 0.000552170886964358, "loss": 1.857, "step": 16431 }, { "epoch": 0.55, "grad_norm": 0.42696934938430786, "learning_rate": 0.0005521652248629678, "loss": 1.8967, "step": 16432 }, { "epoch": 0.55, "grad_norm": 0.45271211862564087, "learning_rate": 0.0005521595624554862, "loss": 1.9047, "step": 16433 }, { "epoch": 0.55, "grad_norm": 0.4205457866191864, "learning_rate": 0.0005521538997419202, "loss": 1.878, "step": 16434 }, { "epoch": 0.55, "grad_norm": 0.41591158509254456, "learning_rate": 0.0005521482367222762, "loss": 1.8057, "step": 16435 }, { "epoch": 0.55, "grad_norm": 0.43242135643959045, "learning_rate": 0.0005521425733965616, "loss": 1.7976, "step": 16436 }, { "epoch": 0.55, "grad_norm": 0.41742295026779175, "learning_rate": 0.0005521369097647831, "loss": 1.7776, "step": 16437 }, { "epoch": 0.55, "grad_norm": 0.41678473353385925, "learning_rate": 0.0005521312458269474, "loss": 1.8171, "step": 16438 }, { "epoch": 0.55, "grad_norm": 0.4392411410808563, "learning_rate": 0.0005521255815830616, "loss": 1.8373, "step": 16439 }, { "epoch": 0.55, "grad_norm": 0.4461928606033325, "learning_rate": 0.0005521199170331325, "loss": 1.842, "step": 16440 }, { "epoch": 0.55, "grad_norm": 0.42885082960128784, "learning_rate": 0.0005521142521771669, "loss": 1.7809, "step": 16441 }, { "epoch": 0.55, "grad_norm": 0.43994733691215515, "learning_rate": 0.0005521085870151719, "loss": 1.8798, "step": 16442 }, { "epoch": 0.55, "grad_norm": 0.43091312050819397, "learning_rate": 0.000552102921547154, "loss": 1.8489, "step": 16443 }, { "epoch": 0.55, "grad_norm": 0.4325411915779114, "learning_rate": 0.0005520972557731204, "loss": 1.8184, "step": 16444 }, { "epoch": 0.55, "grad_norm": 0.4281884431838989, "learning_rate": 0.0005520915896930779, "loss": 1.7548, "step": 16445 }, { "epoch": 0.55, "grad_norm": 0.4567030072212219, "learning_rate": 0.0005520859233070334, "loss": 1.8109, "step": 16446 }, { "epoch": 0.55, "grad_norm": 0.42920657992362976, "learning_rate": 0.0005520802566149936, "loss": 1.8313, "step": 16447 }, { "epoch": 0.55, "grad_norm": 0.4367266893386841, "learning_rate": 0.0005520745896169657, "loss": 1.8644, "step": 16448 }, { "epoch": 0.55, "grad_norm": 0.4436182677745819, "learning_rate": 0.0005520689223129562, "loss": 1.8881, "step": 16449 }, { "epoch": 0.55, "grad_norm": 0.4451470971107483, "learning_rate": 0.0005520632547029722, "loss": 1.8793, "step": 16450 }, { "epoch": 0.55, "grad_norm": 0.44765928387641907, "learning_rate": 0.0005520575867870206, "loss": 1.8916, "step": 16451 }, { "epoch": 0.55, "grad_norm": 0.44055435061454773, "learning_rate": 0.0005520519185651082, "loss": 1.7866, "step": 16452 }, { "epoch": 0.55, "grad_norm": 0.43081364035606384, "learning_rate": 0.0005520462500372419, "loss": 1.869, "step": 16453 }, { "epoch": 0.55, "grad_norm": 0.4238356053829193, "learning_rate": 0.0005520405812034287, "loss": 1.8501, "step": 16454 }, { "epoch": 0.55, "grad_norm": 0.4647158980369568, "learning_rate": 0.0005520349120636752, "loss": 1.8156, "step": 16455 }, { "epoch": 0.55, "grad_norm": 0.4173825681209564, "learning_rate": 0.0005520292426179886, "loss": 1.7886, "step": 16456 }, { "epoch": 0.55, "grad_norm": 0.4172569215297699, "learning_rate": 0.0005520235728663755, "loss": 1.8264, "step": 16457 }, { "epoch": 0.55, "grad_norm": 0.43524327874183655, "learning_rate": 0.000552017902808843, "loss": 1.9164, "step": 16458 }, { "epoch": 0.55, "grad_norm": 0.45802250504493713, "learning_rate": 0.0005520122324453979, "loss": 1.9566, "step": 16459 }, { "epoch": 0.55, "grad_norm": 0.4366022050380707, "learning_rate": 0.0005520065617760472, "loss": 1.8564, "step": 16460 }, { "epoch": 0.55, "grad_norm": 0.4214935600757599, "learning_rate": 0.0005520008908007975, "loss": 1.8894, "step": 16461 }, { "epoch": 0.55, "grad_norm": 0.4436858296394348, "learning_rate": 0.0005519952195196558, "loss": 1.8023, "step": 16462 }, { "epoch": 0.55, "grad_norm": 0.4383668005466461, "learning_rate": 0.0005519895479326292, "loss": 1.8706, "step": 16463 }, { "epoch": 0.55, "grad_norm": 0.48278915882110596, "learning_rate": 0.0005519838760397243, "loss": 1.861, "step": 16464 }, { "epoch": 0.55, "grad_norm": 0.44469165802001953, "learning_rate": 0.0005519782038409482, "loss": 1.9667, "step": 16465 }, { "epoch": 0.55, "grad_norm": 0.42665618658065796, "learning_rate": 0.0005519725313363076, "loss": 1.8786, "step": 16466 }, { "epoch": 0.55, "grad_norm": 0.43521246314048767, "learning_rate": 0.0005519668585258095, "loss": 1.9017, "step": 16467 }, { "epoch": 0.55, "grad_norm": 0.4445079267024994, "learning_rate": 0.0005519611854094608, "loss": 1.9197, "step": 16468 }, { "epoch": 0.55, "grad_norm": 0.46724438667297363, "learning_rate": 0.0005519555119872684, "loss": 1.8221, "step": 16469 }, { "epoch": 0.55, "grad_norm": 0.42627158761024475, "learning_rate": 0.000551949838259239, "loss": 1.915, "step": 16470 }, { "epoch": 0.55, "grad_norm": 0.43238699436187744, "learning_rate": 0.0005519441642253796, "loss": 1.8756, "step": 16471 }, { "epoch": 0.55, "grad_norm": 0.4618227481842041, "learning_rate": 0.0005519384898856971, "loss": 1.8608, "step": 16472 }, { "epoch": 0.55, "grad_norm": 0.4258711040019989, "learning_rate": 0.0005519328152401986, "loss": 1.7821, "step": 16473 }, { "epoch": 0.55, "grad_norm": 0.44008567929267883, "learning_rate": 0.0005519271402888907, "loss": 1.8825, "step": 16474 }, { "epoch": 0.55, "grad_norm": 0.4220612347126007, "learning_rate": 0.0005519214650317803, "loss": 1.8774, "step": 16475 }, { "epoch": 0.55, "grad_norm": 0.415277361869812, "learning_rate": 0.0005519157894688742, "loss": 1.7448, "step": 16476 }, { "epoch": 0.55, "grad_norm": 0.41990694403648376, "learning_rate": 0.0005519101136001797, "loss": 1.9012, "step": 16477 }, { "epoch": 0.55, "grad_norm": 0.4340789318084717, "learning_rate": 0.0005519044374257034, "loss": 1.8854, "step": 16478 }, { "epoch": 0.55, "grad_norm": 0.4307385981082916, "learning_rate": 0.000551898760945452, "loss": 1.8566, "step": 16479 }, { "epoch": 0.55, "grad_norm": 0.42426085472106934, "learning_rate": 0.0005518930841594329, "loss": 1.8673, "step": 16480 }, { "epoch": 0.55, "grad_norm": 0.43211865425109863, "learning_rate": 0.0005518874070676526, "loss": 1.8725, "step": 16481 }, { "epoch": 0.55, "grad_norm": 0.43904414772987366, "learning_rate": 0.0005518817296701182, "loss": 1.8653, "step": 16482 }, { "epoch": 0.55, "grad_norm": 0.41952112317085266, "learning_rate": 0.0005518760519668364, "loss": 1.8273, "step": 16483 }, { "epoch": 0.55, "grad_norm": 0.42324116826057434, "learning_rate": 0.0005518703739578141, "loss": 1.8121, "step": 16484 }, { "epoch": 0.55, "grad_norm": 0.4245705306529999, "learning_rate": 0.0005518646956430585, "loss": 1.83, "step": 16485 }, { "epoch": 0.55, "grad_norm": 0.43007752299308777, "learning_rate": 0.0005518590170225761, "loss": 1.8913, "step": 16486 }, { "epoch": 0.55, "grad_norm": 0.44436195492744446, "learning_rate": 0.0005518533380963741, "loss": 1.934, "step": 16487 }, { "epoch": 0.55, "grad_norm": 0.6142632961273193, "learning_rate": 0.0005518476588644591, "loss": 1.8943, "step": 16488 }, { "epoch": 0.55, "grad_norm": 0.43957507610321045, "learning_rate": 0.0005518419793268382, "loss": 1.8721, "step": 16489 }, { "epoch": 0.55, "grad_norm": 0.43514272570610046, "learning_rate": 0.0005518362994835183, "loss": 1.7827, "step": 16490 }, { "epoch": 0.55, "grad_norm": 0.43017321825027466, "learning_rate": 0.0005518306193345062, "loss": 1.8196, "step": 16491 }, { "epoch": 0.55, "grad_norm": 0.4455139935016632, "learning_rate": 0.0005518249388798089, "loss": 1.8561, "step": 16492 }, { "epoch": 0.55, "grad_norm": 0.43372321128845215, "learning_rate": 0.0005518192581194331, "loss": 1.8243, "step": 16493 }, { "epoch": 0.55, "grad_norm": 0.4435942769050598, "learning_rate": 0.0005518135770533859, "loss": 1.8731, "step": 16494 }, { "epoch": 0.55, "grad_norm": 0.4335109293460846, "learning_rate": 0.0005518078956816741, "loss": 1.7562, "step": 16495 }, { "epoch": 0.55, "grad_norm": 0.4210999608039856, "learning_rate": 0.0005518022140043046, "loss": 1.7934, "step": 16496 }, { "epoch": 0.55, "grad_norm": 0.441537469625473, "learning_rate": 0.0005517965320212844, "loss": 1.7232, "step": 16497 }, { "epoch": 0.55, "grad_norm": 0.44731852412223816, "learning_rate": 0.0005517908497326202, "loss": 1.8739, "step": 16498 }, { "epoch": 0.55, "grad_norm": 0.43733981251716614, "learning_rate": 0.0005517851671383191, "loss": 1.8284, "step": 16499 }, { "epoch": 0.55, "grad_norm": 0.4235129952430725, "learning_rate": 0.0005517794842383878, "loss": 1.8949, "step": 16500 }, { "epoch": 0.55, "grad_norm": 0.43889564275741577, "learning_rate": 0.0005517738010328335, "loss": 1.7579, "step": 16501 }, { "epoch": 0.55, "grad_norm": 0.44116660952568054, "learning_rate": 0.0005517681175216627, "loss": 1.9167, "step": 16502 }, { "epoch": 0.55, "grad_norm": 0.5024445056915283, "learning_rate": 0.0005517624337048826, "loss": 1.8752, "step": 16503 }, { "epoch": 0.55, "grad_norm": 0.4269320070743561, "learning_rate": 0.0005517567495825, "loss": 1.8422, "step": 16504 }, { "epoch": 0.55, "grad_norm": 0.44248849153518677, "learning_rate": 0.0005517510651545218, "loss": 1.825, "step": 16505 }, { "epoch": 0.55, "grad_norm": 0.4548918306827545, "learning_rate": 0.0005517453804209548, "loss": 1.911, "step": 16506 }, { "epoch": 0.55, "grad_norm": 0.4158638119697571, "learning_rate": 0.000551739695381806, "loss": 1.814, "step": 16507 }, { "epoch": 0.55, "grad_norm": 0.4213777780532837, "learning_rate": 0.0005517340100370825, "loss": 1.8459, "step": 16508 }, { "epoch": 0.55, "grad_norm": 0.4526965618133545, "learning_rate": 0.0005517283243867909, "loss": 1.8483, "step": 16509 }, { "epoch": 0.55, "grad_norm": 0.4584798514842987, "learning_rate": 0.0005517226384309381, "loss": 1.8498, "step": 16510 }, { "epoch": 0.55, "grad_norm": 0.4179145097732544, "learning_rate": 0.0005517169521695313, "loss": 1.8969, "step": 16511 }, { "epoch": 0.55, "grad_norm": 0.4536944329738617, "learning_rate": 0.000551711265602577, "loss": 1.9663, "step": 16512 }, { "epoch": 0.55, "grad_norm": 0.47180110216140747, "learning_rate": 0.0005517055787300825, "loss": 1.9073, "step": 16513 }, { "epoch": 0.55, "grad_norm": 0.41341081261634827, "learning_rate": 0.0005516998915520544, "loss": 1.8628, "step": 16514 }, { "epoch": 0.55, "grad_norm": 0.43343254923820496, "learning_rate": 0.0005516942040684998, "loss": 1.8848, "step": 16515 }, { "epoch": 0.55, "grad_norm": 0.45845597982406616, "learning_rate": 0.0005516885162794254, "loss": 1.9007, "step": 16516 }, { "epoch": 0.55, "grad_norm": 0.4906502664089203, "learning_rate": 0.0005516828281848383, "loss": 1.8171, "step": 16517 }, { "epoch": 0.55, "grad_norm": 0.43641456961631775, "learning_rate": 0.0005516771397847453, "loss": 1.8625, "step": 16518 }, { "epoch": 0.55, "grad_norm": 0.4210127294063568, "learning_rate": 0.0005516714510791534, "loss": 1.882, "step": 16519 }, { "epoch": 0.55, "grad_norm": 0.4465186297893524, "learning_rate": 0.0005516657620680694, "loss": 1.8858, "step": 16520 }, { "epoch": 0.55, "grad_norm": 0.4585237205028534, "learning_rate": 0.0005516600727515003, "loss": 1.8588, "step": 16521 }, { "epoch": 0.55, "grad_norm": 0.44129717350006104, "learning_rate": 0.0005516543831294528, "loss": 1.8925, "step": 16522 }, { "epoch": 0.55, "grad_norm": 0.4296738803386688, "learning_rate": 0.0005516486932019341, "loss": 1.7567, "step": 16523 }, { "epoch": 0.55, "grad_norm": 0.4745386838912964, "learning_rate": 0.0005516430029689509, "loss": 1.8426, "step": 16524 }, { "epoch": 0.55, "grad_norm": 0.42549556493759155, "learning_rate": 0.0005516373124305102, "loss": 1.8167, "step": 16525 }, { "epoch": 0.55, "grad_norm": 0.4373127818107605, "learning_rate": 0.0005516316215866189, "loss": 1.9349, "step": 16526 }, { "epoch": 0.55, "grad_norm": 0.42553284764289856, "learning_rate": 0.0005516259304372838, "loss": 1.8024, "step": 16527 }, { "epoch": 0.55, "grad_norm": 0.46394264698028564, "learning_rate": 0.000551620238982512, "loss": 1.8233, "step": 16528 }, { "epoch": 0.55, "grad_norm": 0.43459606170654297, "learning_rate": 0.0005516145472223103, "loss": 1.7642, "step": 16529 }, { "epoch": 0.55, "grad_norm": 0.43821513652801514, "learning_rate": 0.0005516088551566856, "loss": 1.8571, "step": 16530 }, { "epoch": 0.55, "grad_norm": 0.42572668194770813, "learning_rate": 0.0005516031627856447, "loss": 1.8856, "step": 16531 }, { "epoch": 0.55, "grad_norm": 0.4756072759628296, "learning_rate": 0.0005515974701091947, "loss": 1.9598, "step": 16532 }, { "epoch": 0.55, "grad_norm": 0.43683668971061707, "learning_rate": 0.0005515917771273426, "loss": 1.8614, "step": 16533 }, { "epoch": 0.55, "grad_norm": 0.4381572902202606, "learning_rate": 0.0005515860838400949, "loss": 1.884, "step": 16534 }, { "epoch": 0.55, "grad_norm": 0.4411894679069519, "learning_rate": 0.000551580390247459, "loss": 1.8494, "step": 16535 }, { "epoch": 0.55, "grad_norm": 0.4337320625782013, "learning_rate": 0.0005515746963494413, "loss": 1.8712, "step": 16536 }, { "epoch": 0.55, "grad_norm": 0.4158182144165039, "learning_rate": 0.0005515690021460492, "loss": 1.8515, "step": 16537 }, { "epoch": 0.55, "grad_norm": 0.43629634380340576, "learning_rate": 0.0005515633076372894, "loss": 1.7882, "step": 16538 }, { "epoch": 0.55, "grad_norm": 0.44509652256965637, "learning_rate": 0.0005515576128231688, "loss": 1.9775, "step": 16539 }, { "epoch": 0.55, "grad_norm": 0.431769996881485, "learning_rate": 0.0005515519177036942, "loss": 1.7813, "step": 16540 }, { "epoch": 0.55, "grad_norm": 0.41756847500801086, "learning_rate": 0.0005515462222788728, "loss": 1.852, "step": 16541 }, { "epoch": 0.55, "grad_norm": 0.42979058623313904, "learning_rate": 0.0005515405265487113, "loss": 1.7868, "step": 16542 }, { "epoch": 0.55, "grad_norm": 0.4367254078388214, "learning_rate": 0.0005515348305132165, "loss": 1.8877, "step": 16543 }, { "epoch": 0.55, "grad_norm": 0.4265404939651489, "learning_rate": 0.0005515291341723957, "loss": 1.8125, "step": 16544 }, { "epoch": 0.55, "grad_norm": 0.420063853263855, "learning_rate": 0.0005515234375262555, "loss": 1.8778, "step": 16545 }, { "epoch": 0.55, "grad_norm": 0.4100492298603058, "learning_rate": 0.0005515177405748029, "loss": 1.7709, "step": 16546 }, { "epoch": 0.55, "grad_norm": 0.4529290497303009, "learning_rate": 0.0005515120433180448, "loss": 1.8066, "step": 16547 }, { "epoch": 0.55, "grad_norm": 0.42064225673675537, "learning_rate": 0.0005515063457559882, "loss": 1.8405, "step": 16548 }, { "epoch": 0.55, "grad_norm": 0.4439014494419098, "learning_rate": 0.0005515006478886399, "loss": 1.9761, "step": 16549 }, { "epoch": 0.55, "grad_norm": 0.42130017280578613, "learning_rate": 0.0005514949497160069, "loss": 1.8124, "step": 16550 }, { "epoch": 0.55, "grad_norm": 0.42094719409942627, "learning_rate": 0.000551489251238096, "loss": 1.8925, "step": 16551 }, { "epoch": 0.55, "grad_norm": 0.43417003750801086, "learning_rate": 0.0005514835524549144, "loss": 1.8012, "step": 16552 }, { "epoch": 0.55, "grad_norm": 0.434204638004303, "learning_rate": 0.0005514778533664686, "loss": 1.9913, "step": 16553 }, { "epoch": 0.55, "grad_norm": 0.44038429856300354, "learning_rate": 0.000551472153972766, "loss": 1.8086, "step": 16554 }, { "epoch": 0.55, "grad_norm": 0.4303134083747864, "learning_rate": 0.0005514664542738129, "loss": 1.7967, "step": 16555 }, { "epoch": 0.55, "grad_norm": 0.44150975346565247, "learning_rate": 0.0005514607542696169, "loss": 1.7759, "step": 16556 }, { "epoch": 0.55, "grad_norm": 0.4357220530509949, "learning_rate": 0.0005514550539601845, "loss": 1.7951, "step": 16557 }, { "epoch": 0.55, "grad_norm": 0.4269140660762787, "learning_rate": 0.0005514493533455226, "loss": 1.8079, "step": 16558 }, { "epoch": 0.55, "grad_norm": 0.4321632385253906, "learning_rate": 0.0005514436524256385, "loss": 1.7972, "step": 16559 }, { "epoch": 0.55, "grad_norm": 0.43819108605384827, "learning_rate": 0.0005514379512005386, "loss": 1.7956, "step": 16560 }, { "epoch": 0.55, "grad_norm": 0.43023526668548584, "learning_rate": 0.0005514322496702302, "loss": 1.8609, "step": 16561 }, { "epoch": 0.55, "grad_norm": 0.43655839562416077, "learning_rate": 0.00055142654783472, "loss": 1.8198, "step": 16562 }, { "epoch": 0.55, "grad_norm": 0.42850446701049805, "learning_rate": 0.0005514208456940151, "loss": 1.7972, "step": 16563 }, { "epoch": 0.55, "grad_norm": 0.4152737855911255, "learning_rate": 0.0005514151432481222, "loss": 1.9101, "step": 16564 }, { "epoch": 0.55, "grad_norm": 0.4204031825065613, "learning_rate": 0.0005514094404970484, "loss": 1.7363, "step": 16565 }, { "epoch": 0.55, "grad_norm": 0.4330283999443054, "learning_rate": 0.0005514037374408007, "loss": 1.8711, "step": 16566 }, { "epoch": 0.55, "grad_norm": 0.4369858205318451, "learning_rate": 0.0005513980340793859, "loss": 1.8098, "step": 16567 }, { "epoch": 0.55, "grad_norm": 0.4465695023536682, "learning_rate": 0.0005513923304128109, "loss": 1.869, "step": 16568 }, { "epoch": 0.55, "grad_norm": 0.4312087297439575, "learning_rate": 0.0005513866264410827, "loss": 1.8355, "step": 16569 }, { "epoch": 0.55, "grad_norm": 0.42632144689559937, "learning_rate": 0.000551380922164208, "loss": 1.9249, "step": 16570 }, { "epoch": 0.55, "grad_norm": 0.4296555817127228, "learning_rate": 0.0005513752175821941, "loss": 1.931, "step": 16571 }, { "epoch": 0.55, "grad_norm": 0.5821378827095032, "learning_rate": 0.0005513695126950476, "loss": 1.9183, "step": 16572 }, { "epoch": 0.55, "grad_norm": 0.4574778378009796, "learning_rate": 0.0005513638075027756, "loss": 1.8372, "step": 16573 }, { "epoch": 0.55, "grad_norm": 0.44396185874938965, "learning_rate": 0.0005513581020053849, "loss": 1.8418, "step": 16574 }, { "epoch": 0.55, "grad_norm": 0.4317459762096405, "learning_rate": 0.0005513523962028825, "loss": 1.8469, "step": 16575 }, { "epoch": 0.55, "grad_norm": 0.4317496716976166, "learning_rate": 0.0005513466900952755, "loss": 1.8766, "step": 16576 }, { "epoch": 0.55, "grad_norm": 0.4398942291736603, "learning_rate": 0.0005513409836825705, "loss": 1.851, "step": 16577 }, { "epoch": 0.55, "grad_norm": 0.4340803623199463, "learning_rate": 0.0005513352769647746, "loss": 1.886, "step": 16578 }, { "epoch": 0.55, "grad_norm": 0.42392686009407043, "learning_rate": 0.0005513295699418947, "loss": 1.8446, "step": 16579 }, { "epoch": 0.55, "grad_norm": 0.4327491521835327, "learning_rate": 0.0005513238626139378, "loss": 1.8231, "step": 16580 }, { "epoch": 0.55, "grad_norm": 0.4283612370491028, "learning_rate": 0.0005513181549809107, "loss": 1.8873, "step": 16581 }, { "epoch": 0.55, "grad_norm": 0.48674893379211426, "learning_rate": 0.0005513124470428204, "loss": 1.796, "step": 16582 }, { "epoch": 0.55, "grad_norm": 0.4169231653213501, "learning_rate": 0.0005513067387996739, "loss": 1.8284, "step": 16583 }, { "epoch": 0.55, "grad_norm": 0.42013469338417053, "learning_rate": 0.0005513010302514779, "loss": 1.828, "step": 16584 }, { "epoch": 0.55, "grad_norm": 0.42789241671562195, "learning_rate": 0.0005512953213982396, "loss": 1.8104, "step": 16585 }, { "epoch": 0.55, "grad_norm": 0.4222569167613983, "learning_rate": 0.0005512896122399657, "loss": 1.7822, "step": 16586 }, { "epoch": 0.55, "grad_norm": 0.4292222261428833, "learning_rate": 0.0005512839027766633, "loss": 1.8283, "step": 16587 }, { "epoch": 0.55, "grad_norm": 0.4199346601963043, "learning_rate": 0.0005512781930083393, "loss": 1.8344, "step": 16588 }, { "epoch": 0.55, "grad_norm": 0.422233521938324, "learning_rate": 0.0005512724829350005, "loss": 1.8938, "step": 16589 }, { "epoch": 0.55, "grad_norm": 0.46155688166618347, "learning_rate": 0.000551266772556654, "loss": 1.8876, "step": 16590 }, { "epoch": 0.55, "grad_norm": 0.427524596452713, "learning_rate": 0.0005512610618733067, "loss": 1.8456, "step": 16591 }, { "epoch": 0.55, "grad_norm": 0.43755441904067993, "learning_rate": 0.0005512553508849654, "loss": 1.8537, "step": 16592 }, { "epoch": 0.55, "grad_norm": 0.42684775590896606, "learning_rate": 0.0005512496395916372, "loss": 1.8584, "step": 16593 }, { "epoch": 0.55, "grad_norm": 0.43401938676834106, "learning_rate": 0.0005512439279933289, "loss": 1.9056, "step": 16594 }, { "epoch": 0.55, "grad_norm": 0.42637544870376587, "learning_rate": 0.0005512382160900475, "loss": 1.8811, "step": 16595 }, { "epoch": 0.55, "grad_norm": 0.4161968529224396, "learning_rate": 0.0005512325038817999, "loss": 1.8788, "step": 16596 }, { "epoch": 0.55, "grad_norm": 0.42998161911964417, "learning_rate": 0.0005512267913685931, "loss": 1.7898, "step": 16597 }, { "epoch": 0.55, "grad_norm": 0.4311586618423462, "learning_rate": 0.000551221078550434, "loss": 1.8458, "step": 16598 }, { "epoch": 0.55, "grad_norm": 0.42431405186653137, "learning_rate": 0.0005512153654273295, "loss": 1.8428, "step": 16599 }, { "epoch": 0.55, "grad_norm": 0.4271078407764435, "learning_rate": 0.0005512096519992866, "loss": 1.9211, "step": 16600 }, { "epoch": 0.55, "grad_norm": 0.434070885181427, "learning_rate": 0.0005512039382663121, "loss": 1.8193, "step": 16601 }, { "epoch": 0.55, "grad_norm": 0.41949281096458435, "learning_rate": 0.0005511982242284131, "loss": 1.8139, "step": 16602 }, { "epoch": 0.55, "grad_norm": 0.4259703457355499, "learning_rate": 0.0005511925098855965, "loss": 1.8313, "step": 16603 }, { "epoch": 0.55, "grad_norm": 0.43431025743484497, "learning_rate": 0.0005511867952378692, "loss": 1.8951, "step": 16604 }, { "epoch": 0.55, "grad_norm": 0.43468084931373596, "learning_rate": 0.0005511810802852381, "loss": 1.858, "step": 16605 }, { "epoch": 0.55, "grad_norm": 0.4250795841217041, "learning_rate": 0.0005511753650277102, "loss": 1.8684, "step": 16606 }, { "epoch": 0.55, "grad_norm": 0.44574058055877686, "learning_rate": 0.0005511696494652924, "loss": 1.8221, "step": 16607 }, { "epoch": 0.55, "grad_norm": 0.4463304877281189, "learning_rate": 0.0005511639335979916, "loss": 1.8889, "step": 16608 }, { "epoch": 0.55, "grad_norm": 0.43620699644088745, "learning_rate": 0.0005511582174258149, "loss": 1.7909, "step": 16609 }, { "epoch": 0.55, "grad_norm": 0.4329310953617096, "learning_rate": 0.0005511525009487691, "loss": 1.832, "step": 16610 }, { "epoch": 0.55, "grad_norm": 0.4220219552516937, "learning_rate": 0.0005511467841668611, "loss": 1.8195, "step": 16611 }, { "epoch": 0.55, "grad_norm": 0.4637102484703064, "learning_rate": 0.000551141067080098, "loss": 1.8553, "step": 16612 }, { "epoch": 0.55, "grad_norm": 0.43018364906311035, "learning_rate": 0.0005511353496884866, "loss": 1.8811, "step": 16613 }, { "epoch": 0.55, "grad_norm": 0.4233319163322449, "learning_rate": 0.0005511296319920339, "loss": 1.8818, "step": 16614 }, { "epoch": 0.55, "grad_norm": 0.43330058455467224, "learning_rate": 0.0005511239139907468, "loss": 1.8802, "step": 16615 }, { "epoch": 0.55, "grad_norm": 0.4428277015686035, "learning_rate": 0.0005511181956846323, "loss": 1.8881, "step": 16616 }, { "epoch": 0.55, "grad_norm": 0.4244388937950134, "learning_rate": 0.0005511124770736974, "loss": 1.808, "step": 16617 }, { "epoch": 0.55, "grad_norm": 0.4232669472694397, "learning_rate": 0.0005511067581579488, "loss": 1.8115, "step": 16618 }, { "epoch": 0.55, "grad_norm": 0.42789793014526367, "learning_rate": 0.0005511010389373937, "loss": 1.8282, "step": 16619 }, { "epoch": 0.55, "grad_norm": 0.460441917181015, "learning_rate": 0.0005510953194120388, "loss": 1.9106, "step": 16620 }, { "epoch": 0.55, "grad_norm": 0.42783570289611816, "learning_rate": 0.0005510895995818913, "loss": 1.9028, "step": 16621 }, { "epoch": 0.55, "grad_norm": 0.4117794334888458, "learning_rate": 0.000551083879446958, "loss": 1.8827, "step": 16622 }, { "epoch": 0.55, "grad_norm": 0.42379623651504517, "learning_rate": 0.0005510781590072459, "loss": 1.8582, "step": 16623 }, { "epoch": 0.55, "grad_norm": 0.43609893321990967, "learning_rate": 0.0005510724382627618, "loss": 1.816, "step": 16624 }, { "epoch": 0.55, "grad_norm": 0.41849246621131897, "learning_rate": 0.0005510667172135129, "loss": 1.8322, "step": 16625 }, { "epoch": 0.55, "grad_norm": 0.5508193969726562, "learning_rate": 0.0005510609958595059, "loss": 1.9554, "step": 16626 }, { "epoch": 0.55, "grad_norm": 0.43116891384124756, "learning_rate": 0.0005510552742007478, "loss": 1.8541, "step": 16627 }, { "epoch": 0.55, "grad_norm": 0.4251875877380371, "learning_rate": 0.0005510495522372457, "loss": 1.8718, "step": 16628 }, { "epoch": 0.55, "grad_norm": 0.42899495363235474, "learning_rate": 0.0005510438299690065, "loss": 1.8658, "step": 16629 }, { "epoch": 0.55, "grad_norm": 0.43665069341659546, "learning_rate": 0.0005510381073960368, "loss": 1.9554, "step": 16630 }, { "epoch": 0.55, "grad_norm": 0.4170153737068176, "learning_rate": 0.000551032384518344, "loss": 1.8761, "step": 16631 }, { "epoch": 0.55, "grad_norm": 0.43079501390457153, "learning_rate": 0.0005510266613359349, "loss": 1.7787, "step": 16632 }, { "epoch": 0.55, "grad_norm": 0.4255819320678711, "learning_rate": 0.0005510209378488164, "loss": 1.8668, "step": 16633 }, { "epoch": 0.55, "grad_norm": 0.4152270555496216, "learning_rate": 0.0005510152140569954, "loss": 1.852, "step": 16634 }, { "epoch": 0.55, "grad_norm": 0.4275285005569458, "learning_rate": 0.0005510094899604791, "loss": 1.9107, "step": 16635 }, { "epoch": 0.55, "grad_norm": 0.4304133355617523, "learning_rate": 0.0005510037655592741, "loss": 1.8233, "step": 16636 }, { "epoch": 0.55, "grad_norm": 0.43672674894332886, "learning_rate": 0.0005509980408533874, "loss": 1.8327, "step": 16637 }, { "epoch": 0.55, "grad_norm": 0.4309988021850586, "learning_rate": 0.0005509923158428263, "loss": 1.9192, "step": 16638 }, { "epoch": 0.55, "grad_norm": 0.4325675070285797, "learning_rate": 0.0005509865905275974, "loss": 1.7902, "step": 16639 }, { "epoch": 0.55, "grad_norm": 0.4462299048900604, "learning_rate": 0.0005509808649077079, "loss": 1.904, "step": 16640 }, { "epoch": 0.55, "grad_norm": 0.46317026019096375, "learning_rate": 0.0005509751389831645, "loss": 1.9613, "step": 16641 }, { "epoch": 0.55, "grad_norm": 0.420835018157959, "learning_rate": 0.0005509694127539741, "loss": 1.8747, "step": 16642 }, { "epoch": 0.55, "grad_norm": 0.4215024709701538, "learning_rate": 0.000550963686220144, "loss": 1.8015, "step": 16643 }, { "epoch": 0.55, "grad_norm": 0.4370325207710266, "learning_rate": 0.0005509579593816809, "loss": 1.842, "step": 16644 }, { "epoch": 0.55, "grad_norm": 0.42438679933547974, "learning_rate": 0.0005509522322385919, "loss": 1.8213, "step": 16645 }, { "epoch": 0.55, "grad_norm": 0.42590609192848206, "learning_rate": 0.0005509465047908839, "loss": 1.7905, "step": 16646 }, { "epoch": 0.55, "grad_norm": 0.438007116317749, "learning_rate": 0.0005509407770385637, "loss": 1.7888, "step": 16647 }, { "epoch": 0.55, "grad_norm": 0.43031010031700134, "learning_rate": 0.0005509350489816383, "loss": 1.9022, "step": 16648 }, { "epoch": 0.55, "grad_norm": 0.44663891196250916, "learning_rate": 0.0005509293206201148, "loss": 1.9329, "step": 16649 }, { "epoch": 0.55, "grad_norm": 0.42924201488494873, "learning_rate": 0.0005509235919540002, "loss": 1.8198, "step": 16650 }, { "epoch": 0.55, "grad_norm": 0.43021321296691895, "learning_rate": 0.0005509178629833011, "loss": 1.8886, "step": 16651 }, { "epoch": 0.55, "grad_norm": 0.4422219693660736, "learning_rate": 0.0005509121337080249, "loss": 1.8786, "step": 16652 }, { "epoch": 0.55, "grad_norm": 0.4364023804664612, "learning_rate": 0.0005509064041281782, "loss": 1.7638, "step": 16653 }, { "epoch": 0.55, "grad_norm": 0.42746543884277344, "learning_rate": 0.0005509006742437681, "loss": 1.8046, "step": 16654 }, { "epoch": 0.55, "grad_norm": 0.4555107355117798, "learning_rate": 0.0005508949440548017, "loss": 1.8803, "step": 16655 }, { "epoch": 0.55, "grad_norm": 0.4423319399356842, "learning_rate": 0.0005508892135612857, "loss": 1.9183, "step": 16656 }, { "epoch": 0.55, "grad_norm": 0.42238420248031616, "learning_rate": 0.0005508834827632271, "loss": 1.8263, "step": 16657 }, { "epoch": 0.55, "grad_norm": 0.43049687147140503, "learning_rate": 0.0005508777516606329, "loss": 1.9081, "step": 16658 }, { "epoch": 0.55, "grad_norm": 0.4689251184463501, "learning_rate": 0.0005508720202535102, "loss": 1.8289, "step": 16659 }, { "epoch": 0.55, "grad_norm": 0.4361140727996826, "learning_rate": 0.0005508662885418658, "loss": 1.8663, "step": 16660 }, { "epoch": 0.55, "grad_norm": 0.42377975583076477, "learning_rate": 0.0005508605565257067, "loss": 1.8586, "step": 16661 }, { "epoch": 0.55, "grad_norm": 0.44501936435699463, "learning_rate": 0.0005508548242050399, "loss": 1.9462, "step": 16662 }, { "epoch": 0.55, "grad_norm": 0.4429769814014435, "learning_rate": 0.0005508490915798722, "loss": 1.8469, "step": 16663 }, { "epoch": 0.55, "grad_norm": 0.43356853723526, "learning_rate": 0.0005508433586502106, "loss": 1.9071, "step": 16664 }, { "epoch": 0.55, "grad_norm": 0.42459070682525635, "learning_rate": 0.0005508376254160622, "loss": 1.828, "step": 16665 }, { "epoch": 0.55, "grad_norm": 0.4289540648460388, "learning_rate": 0.000550831891877434, "loss": 1.8244, "step": 16666 }, { "epoch": 0.55, "grad_norm": 0.4267435073852539, "learning_rate": 0.0005508261580343327, "loss": 1.9718, "step": 16667 }, { "epoch": 0.55, "grad_norm": 0.41728079319000244, "learning_rate": 0.0005508204238867655, "loss": 1.8569, "step": 16668 }, { "epoch": 0.55, "grad_norm": 0.4305528998374939, "learning_rate": 0.0005508146894347392, "loss": 1.8568, "step": 16669 }, { "epoch": 0.55, "grad_norm": 0.42987027764320374, "learning_rate": 0.0005508089546782608, "loss": 1.8363, "step": 16670 }, { "epoch": 0.55, "grad_norm": 0.4314642548561096, "learning_rate": 0.0005508032196173373, "loss": 1.8412, "step": 16671 }, { "epoch": 0.55, "grad_norm": 0.4026009440422058, "learning_rate": 0.0005507974842519757, "loss": 1.8072, "step": 16672 }, { "epoch": 0.55, "grad_norm": 0.43016645312309265, "learning_rate": 0.0005507917485821828, "loss": 1.8223, "step": 16673 }, { "epoch": 0.55, "grad_norm": 0.4309269189834595, "learning_rate": 0.0005507860126079657, "loss": 1.8852, "step": 16674 }, { "epoch": 0.55, "grad_norm": 0.4438443183898926, "learning_rate": 0.0005507802763293314, "loss": 1.7859, "step": 16675 }, { "epoch": 0.55, "grad_norm": 0.42651593685150146, "learning_rate": 0.0005507745397462867, "loss": 1.8705, "step": 16676 }, { "epoch": 0.55, "grad_norm": 0.4561865031719208, "learning_rate": 0.0005507688028588387, "loss": 1.7793, "step": 16677 }, { "epoch": 0.55, "grad_norm": 0.4601884186267853, "learning_rate": 0.0005507630656669944, "loss": 1.8101, "step": 16678 }, { "epoch": 0.55, "grad_norm": 0.4457774758338928, "learning_rate": 0.0005507573281707607, "loss": 1.9167, "step": 16679 }, { "epoch": 0.55, "grad_norm": 0.4534588158130646, "learning_rate": 0.0005507515903701444, "loss": 1.8916, "step": 16680 }, { "epoch": 0.55, "grad_norm": 0.433160662651062, "learning_rate": 0.0005507458522651527, "loss": 1.8501, "step": 16681 }, { "epoch": 0.56, "grad_norm": 0.4250931143760681, "learning_rate": 0.0005507401138557924, "loss": 1.8797, "step": 16682 }, { "epoch": 0.56, "grad_norm": 0.42088940739631653, "learning_rate": 0.0005507343751420707, "loss": 1.8159, "step": 16683 }, { "epoch": 0.56, "grad_norm": 0.44118571281433105, "learning_rate": 0.0005507286361239944, "loss": 1.8423, "step": 16684 }, { "epoch": 0.56, "grad_norm": 0.4438532590866089, "learning_rate": 0.0005507228968015704, "loss": 1.892, "step": 16685 }, { "epoch": 0.56, "grad_norm": 0.4195805788040161, "learning_rate": 0.0005507171571748058, "loss": 1.8819, "step": 16686 }, { "epoch": 0.56, "grad_norm": 0.4451044201850891, "learning_rate": 0.0005507114172437074, "loss": 1.8633, "step": 16687 }, { "epoch": 0.56, "grad_norm": 0.4659269452095032, "learning_rate": 0.0005507056770082823, "loss": 1.8415, "step": 16688 }, { "epoch": 0.56, "grad_norm": 0.41787198185920715, "learning_rate": 0.0005506999364685376, "loss": 1.8934, "step": 16689 }, { "epoch": 0.56, "grad_norm": 0.43675699830055237, "learning_rate": 0.00055069419562448, "loss": 1.8673, "step": 16690 }, { "epoch": 0.56, "grad_norm": 0.47591444849967957, "learning_rate": 0.0005506884544761166, "loss": 1.8322, "step": 16691 }, { "epoch": 0.56, "grad_norm": 0.4547501504421234, "learning_rate": 0.0005506827130234542, "loss": 1.8144, "step": 16692 }, { "epoch": 0.56, "grad_norm": 0.4783356487751007, "learning_rate": 0.0005506769712665002, "loss": 1.8144, "step": 16693 }, { "epoch": 0.56, "grad_norm": 0.42139315605163574, "learning_rate": 0.0005506712292052612, "loss": 1.8569, "step": 16694 }, { "epoch": 0.56, "grad_norm": 0.43212199211120605, "learning_rate": 0.0005506654868397442, "loss": 1.7628, "step": 16695 }, { "epoch": 0.56, "grad_norm": 0.4485580325126648, "learning_rate": 0.0005506597441699562, "loss": 1.7742, "step": 16696 }, { "epoch": 0.56, "grad_norm": 0.4219341278076172, "learning_rate": 0.0005506540011959043, "loss": 1.7657, "step": 16697 }, { "epoch": 0.56, "grad_norm": 0.41288405656814575, "learning_rate": 0.0005506482579175953, "loss": 1.8746, "step": 16698 }, { "epoch": 0.56, "grad_norm": 0.4475197196006775, "learning_rate": 0.0005506425143350363, "loss": 1.8293, "step": 16699 }, { "epoch": 0.56, "grad_norm": 0.438100665807724, "learning_rate": 0.0005506367704482343, "loss": 1.852, "step": 16700 }, { "epoch": 0.56, "grad_norm": 0.4606945216655731, "learning_rate": 0.0005506310262571961, "loss": 1.7978, "step": 16701 }, { "epoch": 0.56, "grad_norm": 0.45698994398117065, "learning_rate": 0.0005506252817619287, "loss": 1.8566, "step": 16702 }, { "epoch": 0.56, "grad_norm": 0.43271398544311523, "learning_rate": 0.0005506195369624392, "loss": 1.8601, "step": 16703 }, { "epoch": 0.56, "grad_norm": 0.43771475553512573, "learning_rate": 0.0005506137918587344, "loss": 1.8334, "step": 16704 }, { "epoch": 0.56, "grad_norm": 0.4293350875377655, "learning_rate": 0.0005506080464508216, "loss": 1.8576, "step": 16705 }, { "epoch": 0.56, "grad_norm": 0.4129677414894104, "learning_rate": 0.0005506023007387075, "loss": 1.865, "step": 16706 }, { "epoch": 0.56, "grad_norm": 0.4344840347766876, "learning_rate": 0.000550596554722399, "loss": 1.8236, "step": 16707 }, { "epoch": 0.56, "grad_norm": 0.4189230799674988, "learning_rate": 0.0005505908084019033, "loss": 1.8015, "step": 16708 }, { "epoch": 0.56, "grad_norm": 0.43019217252731323, "learning_rate": 0.0005505850617772272, "loss": 1.8442, "step": 16709 }, { "epoch": 0.56, "grad_norm": 0.41385024785995483, "learning_rate": 0.0005505793148483779, "loss": 1.8209, "step": 16710 }, { "epoch": 0.56, "grad_norm": 0.4305790066719055, "learning_rate": 0.0005505735676153621, "loss": 1.8938, "step": 16711 }, { "epoch": 0.56, "grad_norm": 0.45616137981414795, "learning_rate": 0.000550567820078187, "loss": 1.8641, "step": 16712 }, { "epoch": 0.56, "grad_norm": 0.42416226863861084, "learning_rate": 0.0005505620722368594, "loss": 1.8093, "step": 16713 }, { "epoch": 0.56, "grad_norm": 0.4337279200553894, "learning_rate": 0.0005505563240913864, "loss": 1.8428, "step": 16714 }, { "epoch": 0.56, "grad_norm": 0.4338124096393585, "learning_rate": 0.0005505505756417749, "loss": 1.9037, "step": 16715 }, { "epoch": 0.56, "grad_norm": 0.44367194175720215, "learning_rate": 0.000550544826888032, "loss": 1.8607, "step": 16716 }, { "epoch": 0.56, "grad_norm": 0.4347403347492218, "learning_rate": 0.0005505390778301647, "loss": 1.9012, "step": 16717 }, { "epoch": 0.56, "grad_norm": 0.4207814931869507, "learning_rate": 0.0005505333284681796, "loss": 1.8208, "step": 16718 }, { "epoch": 0.56, "grad_norm": 0.44159960746765137, "learning_rate": 0.0005505275788020842, "loss": 1.8703, "step": 16719 }, { "epoch": 0.56, "grad_norm": 0.4243798851966858, "learning_rate": 0.0005505218288318851, "loss": 1.9305, "step": 16720 }, { "epoch": 0.56, "grad_norm": 0.42432528734207153, "learning_rate": 0.0005505160785575894, "loss": 1.8061, "step": 16721 }, { "epoch": 0.56, "grad_norm": 0.421964168548584, "learning_rate": 0.0005505103279792042, "loss": 1.8396, "step": 16722 }, { "epoch": 0.56, "grad_norm": 0.4270528554916382, "learning_rate": 0.0005505045770967362, "loss": 1.7981, "step": 16723 }, { "epoch": 0.56, "grad_norm": 0.43604278564453125, "learning_rate": 0.0005504988259101927, "loss": 1.8283, "step": 16724 }, { "epoch": 0.56, "grad_norm": 0.4314991235733032, "learning_rate": 0.0005504930744195805, "loss": 1.8921, "step": 16725 }, { "epoch": 0.56, "grad_norm": 0.4096953868865967, "learning_rate": 0.0005504873226249066, "loss": 1.8233, "step": 16726 }, { "epoch": 0.56, "grad_norm": 0.4384068250656128, "learning_rate": 0.000550481570526178, "loss": 1.8294, "step": 16727 }, { "epoch": 0.56, "grad_norm": 0.43752819299697876, "learning_rate": 0.0005504758181234016, "loss": 1.8555, "step": 16728 }, { "epoch": 0.56, "grad_norm": 0.42004042863845825, "learning_rate": 0.0005504700654165846, "loss": 1.7948, "step": 16729 }, { "epoch": 0.56, "grad_norm": 0.4151001274585724, "learning_rate": 0.0005504643124057337, "loss": 1.8126, "step": 16730 }, { "epoch": 0.56, "grad_norm": 0.4158431887626648, "learning_rate": 0.0005504585590908562, "loss": 1.8588, "step": 16731 }, { "epoch": 0.56, "grad_norm": 0.4277273118495941, "learning_rate": 0.0005504528054719589, "loss": 1.819, "step": 16732 }, { "epoch": 0.56, "grad_norm": 0.43607330322265625, "learning_rate": 0.0005504470515490487, "loss": 1.8652, "step": 16733 }, { "epoch": 0.56, "grad_norm": 0.43748748302459717, "learning_rate": 0.0005504412973221328, "loss": 1.8579, "step": 16734 }, { "epoch": 0.56, "grad_norm": 0.42872104048728943, "learning_rate": 0.0005504355427912178, "loss": 1.7932, "step": 16735 }, { "epoch": 0.56, "grad_norm": 0.42466503381729126, "learning_rate": 0.0005504297879563113, "loss": 1.8416, "step": 16736 }, { "epoch": 0.56, "grad_norm": 0.4306568205356598, "learning_rate": 0.0005504240328174197, "loss": 1.7117, "step": 16737 }, { "epoch": 0.56, "grad_norm": 0.43643221259117126, "learning_rate": 0.0005504182773745503, "loss": 1.8934, "step": 16738 }, { "epoch": 0.56, "grad_norm": 0.4562361240386963, "learning_rate": 0.00055041252162771, "loss": 1.8592, "step": 16739 }, { "epoch": 0.56, "grad_norm": 0.43136894702911377, "learning_rate": 0.0005504067655769058, "loss": 1.7689, "step": 16740 }, { "epoch": 0.56, "grad_norm": 0.41843780875205994, "learning_rate": 0.0005504010092221447, "loss": 1.8796, "step": 16741 }, { "epoch": 0.56, "grad_norm": 0.4331740736961365, "learning_rate": 0.0005503952525634336, "loss": 1.8713, "step": 16742 }, { "epoch": 0.56, "grad_norm": 0.4241769015789032, "learning_rate": 0.0005503894956007796, "loss": 1.8864, "step": 16743 }, { "epoch": 0.56, "grad_norm": 0.4468964636325836, "learning_rate": 0.0005503837383341897, "loss": 1.8674, "step": 16744 }, { "epoch": 0.56, "grad_norm": 0.4330957233905792, "learning_rate": 0.0005503779807636707, "loss": 1.7802, "step": 16745 }, { "epoch": 0.56, "grad_norm": 0.4369971752166748, "learning_rate": 0.0005503722228892299, "loss": 1.9215, "step": 16746 }, { "epoch": 0.56, "grad_norm": 0.44394993782043457, "learning_rate": 0.0005503664647108739, "loss": 1.8421, "step": 16747 }, { "epoch": 0.56, "grad_norm": 0.43153512477874756, "learning_rate": 0.00055036070622861, "loss": 1.8471, "step": 16748 }, { "epoch": 0.56, "grad_norm": 0.426850825548172, "learning_rate": 0.0005503549474424452, "loss": 1.8553, "step": 16749 }, { "epoch": 0.56, "grad_norm": 0.429520845413208, "learning_rate": 0.0005503491883523862, "loss": 1.8798, "step": 16750 }, { "epoch": 0.56, "grad_norm": 0.439982533454895, "learning_rate": 0.0005503434289584404, "loss": 1.8332, "step": 16751 }, { "epoch": 0.56, "grad_norm": 0.45916587114334106, "learning_rate": 0.0005503376692606143, "loss": 1.8457, "step": 16752 }, { "epoch": 0.56, "grad_norm": 0.42426833510398865, "learning_rate": 0.0005503319092589154, "loss": 1.9146, "step": 16753 }, { "epoch": 0.56, "grad_norm": 0.42890551686286926, "learning_rate": 0.0005503261489533503, "loss": 1.8228, "step": 16754 }, { "epoch": 0.56, "grad_norm": 0.4652020037174225, "learning_rate": 0.0005503203883439262, "loss": 1.8556, "step": 16755 }, { "epoch": 0.56, "grad_norm": 0.4338252544403076, "learning_rate": 0.00055031462743065, "loss": 1.7959, "step": 16756 }, { "epoch": 0.56, "grad_norm": 0.5013778805732727, "learning_rate": 0.0005503088662135286, "loss": 1.8954, "step": 16757 }, { "epoch": 0.56, "grad_norm": 0.4292178750038147, "learning_rate": 0.0005503031046925693, "loss": 1.8032, "step": 16758 }, { "epoch": 0.56, "grad_norm": 0.45069047808647156, "learning_rate": 0.0005502973428677789, "loss": 1.9006, "step": 16759 }, { "epoch": 0.56, "grad_norm": 0.42954933643341064, "learning_rate": 0.0005502915807391643, "loss": 1.9202, "step": 16760 }, { "epoch": 0.56, "grad_norm": 0.42532098293304443, "learning_rate": 0.0005502858183067327, "loss": 1.8328, "step": 16761 }, { "epoch": 0.56, "grad_norm": 0.42525941133499146, "learning_rate": 0.000550280055570491, "loss": 1.7849, "step": 16762 }, { "epoch": 0.56, "grad_norm": 0.4182347357273102, "learning_rate": 0.0005502742925304461, "loss": 1.8339, "step": 16763 }, { "epoch": 0.56, "grad_norm": 0.4410443902015686, "learning_rate": 0.000550268529186605, "loss": 1.8803, "step": 16764 }, { "epoch": 0.56, "grad_norm": 0.4273034334182739, "learning_rate": 0.000550262765538975, "loss": 1.8356, "step": 16765 }, { "epoch": 0.56, "grad_norm": 0.4283672869205475, "learning_rate": 0.0005502570015875628, "loss": 1.839, "step": 16766 }, { "epoch": 0.56, "grad_norm": 0.43062323331832886, "learning_rate": 0.0005502512373323755, "loss": 1.835, "step": 16767 }, { "epoch": 0.56, "grad_norm": 0.44717681407928467, "learning_rate": 0.00055024547277342, "loss": 1.8951, "step": 16768 }, { "epoch": 0.56, "grad_norm": 0.42478132247924805, "learning_rate": 0.0005502397079107034, "loss": 1.8221, "step": 16769 }, { "epoch": 0.56, "grad_norm": 0.45494192838668823, "learning_rate": 0.0005502339427442326, "loss": 1.8236, "step": 16770 }, { "epoch": 0.56, "grad_norm": 0.4384683668613434, "learning_rate": 0.0005502281772740148, "loss": 1.8877, "step": 16771 }, { "epoch": 0.56, "grad_norm": 0.4802318513393402, "learning_rate": 0.0005502224115000569, "loss": 1.857, "step": 16772 }, { "epoch": 0.56, "grad_norm": 0.4304984211921692, "learning_rate": 0.0005502166454223656, "loss": 1.8497, "step": 16773 }, { "epoch": 0.56, "grad_norm": 0.43058067560195923, "learning_rate": 0.0005502108790409483, "loss": 1.8245, "step": 16774 }, { "epoch": 0.56, "grad_norm": 0.542539119720459, "learning_rate": 0.0005502051123558118, "loss": 1.8679, "step": 16775 }, { "epoch": 0.56, "grad_norm": 0.42315688729286194, "learning_rate": 0.0005501993453669633, "loss": 1.8585, "step": 16776 }, { "epoch": 0.56, "grad_norm": 0.4282878637313843, "learning_rate": 0.0005501935780744096, "loss": 1.8918, "step": 16777 }, { "epoch": 0.56, "grad_norm": 0.4382306635379791, "learning_rate": 0.0005501878104781578, "loss": 1.8791, "step": 16778 }, { "epoch": 0.56, "grad_norm": 0.5393406748771667, "learning_rate": 0.0005501820425782148, "loss": 1.8346, "step": 16779 }, { "epoch": 0.56, "grad_norm": 0.4479967951774597, "learning_rate": 0.0005501762743745877, "loss": 1.9015, "step": 16780 }, { "epoch": 0.56, "grad_norm": 0.41960856318473816, "learning_rate": 0.0005501705058672834, "loss": 1.8085, "step": 16781 }, { "epoch": 0.56, "grad_norm": 0.445010244846344, "learning_rate": 0.000550164737056309, "loss": 1.8235, "step": 16782 }, { "epoch": 0.56, "grad_norm": 0.4246234595775604, "learning_rate": 0.0005501589679416714, "loss": 1.8004, "step": 16783 }, { "epoch": 0.56, "grad_norm": 0.43803784251213074, "learning_rate": 0.0005501531985233778, "loss": 1.9306, "step": 16784 }, { "epoch": 0.56, "grad_norm": 0.4224769175052643, "learning_rate": 0.0005501474288014351, "loss": 1.8085, "step": 16785 }, { "epoch": 0.56, "grad_norm": 0.4276638627052307, "learning_rate": 0.0005501416587758501, "loss": 1.7927, "step": 16786 }, { "epoch": 0.56, "grad_norm": 0.42223119735717773, "learning_rate": 0.0005501358884466302, "loss": 1.8663, "step": 16787 }, { "epoch": 0.56, "grad_norm": 0.4328366219997406, "learning_rate": 0.000550130117813782, "loss": 1.7851, "step": 16788 }, { "epoch": 0.56, "grad_norm": 0.9261441826820374, "learning_rate": 0.0005501243468773127, "loss": 1.8919, "step": 16789 }, { "epoch": 0.56, "grad_norm": 0.4476100206375122, "learning_rate": 0.0005501185756372294, "loss": 1.9433, "step": 16790 }, { "epoch": 0.56, "grad_norm": 0.43682345747947693, "learning_rate": 0.000550112804093539, "loss": 1.8603, "step": 16791 }, { "epoch": 0.56, "grad_norm": 0.4339708387851715, "learning_rate": 0.0005501070322462485, "loss": 1.8945, "step": 16792 }, { "epoch": 0.56, "grad_norm": 0.4306145906448364, "learning_rate": 0.0005501012600953648, "loss": 1.9107, "step": 16793 }, { "epoch": 0.56, "grad_norm": 0.428771436214447, "learning_rate": 0.0005500954876408952, "loss": 1.8131, "step": 16794 }, { "epoch": 0.56, "grad_norm": 0.4225485026836395, "learning_rate": 0.0005500897148828464, "loss": 1.8194, "step": 16795 }, { "epoch": 0.56, "grad_norm": 0.42698803544044495, "learning_rate": 0.0005500839418212255, "loss": 1.8999, "step": 16796 }, { "epoch": 0.56, "grad_norm": 0.4304558336734772, "learning_rate": 0.0005500781684560397, "loss": 1.9448, "step": 16797 }, { "epoch": 0.56, "grad_norm": 0.43158966302871704, "learning_rate": 0.0005500723947872957, "loss": 1.8562, "step": 16798 }, { "epoch": 0.56, "grad_norm": 0.4465050995349884, "learning_rate": 0.0005500666208150007, "loss": 1.8995, "step": 16799 }, { "epoch": 0.56, "grad_norm": 0.44106757640838623, "learning_rate": 0.0005500608465391618, "loss": 1.9274, "step": 16800 }, { "epoch": 0.56, "grad_norm": 0.44732528924942017, "learning_rate": 0.0005500550719597857, "loss": 1.8792, "step": 16801 }, { "epoch": 0.56, "grad_norm": 0.44768673181533813, "learning_rate": 0.0005500492970768796, "loss": 1.9268, "step": 16802 }, { "epoch": 0.56, "grad_norm": 0.4358614683151245, "learning_rate": 0.0005500435218904506, "loss": 1.8246, "step": 16803 }, { "epoch": 0.56, "grad_norm": 0.42916372418403625, "learning_rate": 0.0005500377464005056, "loss": 1.8618, "step": 16804 }, { "epoch": 0.56, "grad_norm": 0.42264118790626526, "learning_rate": 0.0005500319706070516, "loss": 1.8159, "step": 16805 }, { "epoch": 0.56, "grad_norm": 0.43375712633132935, "learning_rate": 0.0005500261945100955, "loss": 1.8765, "step": 16806 }, { "epoch": 0.56, "grad_norm": 0.43017491698265076, "learning_rate": 0.0005500204181096446, "loss": 1.8054, "step": 16807 }, { "epoch": 0.56, "grad_norm": 0.43603330850601196, "learning_rate": 0.0005500146414057057, "loss": 1.8515, "step": 16808 }, { "epoch": 0.56, "grad_norm": 0.44120338559150696, "learning_rate": 0.0005500088643982859, "loss": 1.8702, "step": 16809 }, { "epoch": 0.56, "grad_norm": 0.44527962803840637, "learning_rate": 0.0005500030870873921, "loss": 1.8536, "step": 16810 }, { "epoch": 0.56, "grad_norm": 0.4333982765674591, "learning_rate": 0.0005499973094730315, "loss": 1.9069, "step": 16811 }, { "epoch": 0.56, "grad_norm": 0.4083925485610962, "learning_rate": 0.0005499915315552109, "loss": 1.8064, "step": 16812 }, { "epoch": 0.56, "grad_norm": 0.4448046088218689, "learning_rate": 0.0005499857533339373, "loss": 1.9052, "step": 16813 }, { "epoch": 0.56, "grad_norm": 0.4338386356830597, "learning_rate": 0.000549979974809218, "loss": 1.8435, "step": 16814 }, { "epoch": 0.56, "grad_norm": 0.43823540210723877, "learning_rate": 0.0005499741959810599, "loss": 1.8471, "step": 16815 }, { "epoch": 0.56, "grad_norm": 0.43197447061538696, "learning_rate": 0.0005499684168494698, "loss": 1.8106, "step": 16816 }, { "epoch": 0.56, "grad_norm": 0.42113375663757324, "learning_rate": 0.000549962637414455, "loss": 1.8325, "step": 16817 }, { "epoch": 0.56, "grad_norm": 0.4213325083255768, "learning_rate": 0.0005499568576760223, "loss": 1.7997, "step": 16818 }, { "epoch": 0.56, "grad_norm": 0.46897441148757935, "learning_rate": 0.0005499510776341788, "loss": 1.8695, "step": 16819 }, { "epoch": 0.56, "grad_norm": 0.44494935870170593, "learning_rate": 0.0005499452972889317, "loss": 1.9548, "step": 16820 }, { "epoch": 0.56, "grad_norm": 0.42738890647888184, "learning_rate": 0.0005499395166402876, "loss": 1.8443, "step": 16821 }, { "epoch": 0.56, "grad_norm": 0.41755211353302, "learning_rate": 0.0005499337356882538, "loss": 1.8172, "step": 16822 }, { "epoch": 0.56, "grad_norm": 0.42547744512557983, "learning_rate": 0.0005499279544328373, "loss": 1.8247, "step": 16823 }, { "epoch": 0.56, "grad_norm": 0.42074310779571533, "learning_rate": 0.000549922172874045, "loss": 1.8494, "step": 16824 }, { "epoch": 0.56, "grad_norm": 0.4941447377204895, "learning_rate": 0.0005499163910118842, "loss": 1.9162, "step": 16825 }, { "epoch": 0.56, "grad_norm": 0.4489629566669464, "learning_rate": 0.0005499106088463617, "loss": 1.8145, "step": 16826 }, { "epoch": 0.56, "grad_norm": 0.4327927231788635, "learning_rate": 0.0005499048263774845, "loss": 1.8405, "step": 16827 }, { "epoch": 0.56, "grad_norm": 0.4310019314289093, "learning_rate": 0.0005498990436052596, "loss": 1.8417, "step": 16828 }, { "epoch": 0.56, "grad_norm": 0.41827207803726196, "learning_rate": 0.0005498932605296941, "loss": 1.8297, "step": 16829 }, { "epoch": 0.56, "grad_norm": 0.4334719777107239, "learning_rate": 0.0005498874771507951, "loss": 1.9342, "step": 16830 }, { "epoch": 0.56, "grad_norm": 0.43246111273765564, "learning_rate": 0.0005498816934685694, "loss": 1.7825, "step": 16831 }, { "epoch": 0.56, "grad_norm": 0.413908988237381, "learning_rate": 0.0005498759094830242, "loss": 1.8717, "step": 16832 }, { "epoch": 0.56, "grad_norm": 0.42564499378204346, "learning_rate": 0.0005498701251941665, "loss": 1.8833, "step": 16833 }, { "epoch": 0.56, "grad_norm": 0.4288875162601471, "learning_rate": 0.0005498643406020032, "loss": 1.8554, "step": 16834 }, { "epoch": 0.56, "grad_norm": 0.5271149277687073, "learning_rate": 0.0005498585557065414, "loss": 1.8489, "step": 16835 }, { "epoch": 0.56, "grad_norm": 0.43476998805999756, "learning_rate": 0.0005498527705077882, "loss": 1.8433, "step": 16836 }, { "epoch": 0.56, "grad_norm": 0.4191795289516449, "learning_rate": 0.0005498469850057505, "loss": 1.7654, "step": 16837 }, { "epoch": 0.56, "grad_norm": 0.42642390727996826, "learning_rate": 0.0005498411992004354, "loss": 1.8773, "step": 16838 }, { "epoch": 0.56, "grad_norm": 0.4340152144432068, "learning_rate": 0.0005498354130918499, "loss": 1.8487, "step": 16839 }, { "epoch": 0.56, "grad_norm": 0.41856321692466736, "learning_rate": 0.000549829626680001, "loss": 1.8653, "step": 16840 }, { "epoch": 0.56, "grad_norm": 0.42104220390319824, "learning_rate": 0.0005498238399648958, "loss": 1.7975, "step": 16841 }, { "epoch": 0.56, "grad_norm": 0.44656360149383545, "learning_rate": 0.0005498180529465412, "loss": 1.8892, "step": 16842 }, { "epoch": 0.56, "grad_norm": 0.43022987246513367, "learning_rate": 0.0005498122656249442, "loss": 1.8067, "step": 16843 }, { "epoch": 0.56, "grad_norm": 0.43706297874450684, "learning_rate": 0.0005498064780001122, "loss": 1.8731, "step": 16844 }, { "epoch": 0.56, "grad_norm": 0.4237925410270691, "learning_rate": 0.0005498006900720517, "loss": 1.833, "step": 16845 }, { "epoch": 0.56, "grad_norm": 0.41607367992401123, "learning_rate": 0.0005497949018407701, "loss": 1.8089, "step": 16846 }, { "epoch": 0.56, "grad_norm": 0.4216940701007843, "learning_rate": 0.0005497891133062742, "loss": 1.8404, "step": 16847 }, { "epoch": 0.56, "grad_norm": 0.4228236973285675, "learning_rate": 0.0005497833244685712, "loss": 1.857, "step": 16848 }, { "epoch": 0.56, "grad_norm": 0.4276042580604553, "learning_rate": 0.000549777535327668, "loss": 1.9108, "step": 16849 }, { "epoch": 0.56, "grad_norm": 0.4721314311027527, "learning_rate": 0.0005497717458835717, "loss": 1.8268, "step": 16850 }, { "epoch": 0.56, "grad_norm": 0.42415091395378113, "learning_rate": 0.0005497659561362893, "loss": 1.8019, "step": 16851 }, { "epoch": 0.56, "grad_norm": 0.42557576298713684, "learning_rate": 0.0005497601660858278, "loss": 1.849, "step": 16852 }, { "epoch": 0.56, "grad_norm": 0.45562049746513367, "learning_rate": 0.0005497543757321942, "loss": 1.827, "step": 16853 }, { "epoch": 0.56, "grad_norm": 0.4522586464881897, "learning_rate": 0.0005497485850753957, "loss": 1.8928, "step": 16854 }, { "epoch": 0.56, "grad_norm": 0.43005821108818054, "learning_rate": 0.0005497427941154392, "loss": 1.863, "step": 16855 }, { "epoch": 0.56, "grad_norm": 0.4260449707508087, "learning_rate": 0.0005497370028523317, "loss": 1.8618, "step": 16856 }, { "epoch": 0.56, "grad_norm": 0.4634857773780823, "learning_rate": 0.0005497312112860803, "loss": 1.8229, "step": 16857 }, { "epoch": 0.56, "grad_norm": 0.45122966170310974, "learning_rate": 0.0005497254194166921, "loss": 1.8857, "step": 16858 }, { "epoch": 0.56, "grad_norm": 0.4214053153991699, "learning_rate": 0.0005497196272441739, "loss": 1.8054, "step": 16859 }, { "epoch": 0.56, "grad_norm": 0.4425117075443268, "learning_rate": 0.0005497138347685329, "loss": 1.977, "step": 16860 }, { "epoch": 0.56, "grad_norm": 0.4239414930343628, "learning_rate": 0.0005497080419897761, "loss": 1.8203, "step": 16861 }, { "epoch": 0.56, "grad_norm": 0.4371362030506134, "learning_rate": 0.0005497022489079106, "loss": 1.9041, "step": 16862 }, { "epoch": 0.56, "grad_norm": 0.45737364888191223, "learning_rate": 0.0005496964555229433, "loss": 1.8453, "step": 16863 }, { "epoch": 0.56, "grad_norm": 0.4274306893348694, "learning_rate": 0.0005496906618348813, "loss": 1.8534, "step": 16864 }, { "epoch": 0.56, "grad_norm": 0.4134228229522705, "learning_rate": 0.0005496848678437316, "loss": 1.8725, "step": 16865 }, { "epoch": 0.56, "grad_norm": 0.4407852292060852, "learning_rate": 0.0005496790735495012, "loss": 1.8448, "step": 16866 }, { "epoch": 0.56, "grad_norm": 0.4254128634929657, "learning_rate": 0.0005496732789521974, "loss": 1.85, "step": 16867 }, { "epoch": 0.56, "grad_norm": 0.4433356821537018, "learning_rate": 0.0005496674840518268, "loss": 1.8188, "step": 16868 }, { "epoch": 0.56, "grad_norm": 0.4343172609806061, "learning_rate": 0.0005496616888483967, "loss": 1.7991, "step": 16869 }, { "epoch": 0.56, "grad_norm": 0.4343636929988861, "learning_rate": 0.0005496558933419142, "loss": 1.8361, "step": 16870 }, { "epoch": 0.56, "grad_norm": 0.45588940382003784, "learning_rate": 0.0005496500975323863, "loss": 1.7928, "step": 16871 }, { "epoch": 0.56, "grad_norm": 0.45744743943214417, "learning_rate": 0.0005496443014198198, "loss": 1.8709, "step": 16872 }, { "epoch": 0.56, "grad_norm": 0.4372616410255432, "learning_rate": 0.0005496385050042219, "loss": 1.8342, "step": 16873 }, { "epoch": 0.56, "grad_norm": 0.43272683024406433, "learning_rate": 0.0005496327082855998, "loss": 1.8676, "step": 16874 }, { "epoch": 0.56, "grad_norm": 0.4299810826778412, "learning_rate": 0.0005496269112639602, "loss": 1.8787, "step": 16875 }, { "epoch": 0.56, "grad_norm": 0.43498748540878296, "learning_rate": 0.0005496211139393104, "loss": 1.9002, "step": 16876 }, { "epoch": 0.56, "grad_norm": 0.42551347613334656, "learning_rate": 0.0005496153163116572, "loss": 1.7813, "step": 16877 }, { "epoch": 0.56, "grad_norm": 0.4511811137199402, "learning_rate": 0.000549609518381008, "loss": 1.8188, "step": 16878 }, { "epoch": 0.56, "grad_norm": 0.43882912397384644, "learning_rate": 0.0005496037201473695, "loss": 1.8677, "step": 16879 }, { "epoch": 0.56, "grad_norm": 0.4897972643375397, "learning_rate": 0.000549597921610749, "loss": 1.8946, "step": 16880 }, { "epoch": 0.56, "grad_norm": 0.43223968148231506, "learning_rate": 0.0005495921227711533, "loss": 1.8546, "step": 16881 }, { "epoch": 0.56, "grad_norm": 0.4327697455883026, "learning_rate": 0.0005495863236285894, "loss": 1.7777, "step": 16882 }, { "epoch": 0.56, "grad_norm": 0.44269993901252747, "learning_rate": 0.0005495805241830647, "loss": 1.9005, "step": 16883 }, { "epoch": 0.56, "grad_norm": 0.42388755083084106, "learning_rate": 0.0005495747244345859, "loss": 1.917, "step": 16884 }, { "epoch": 0.56, "grad_norm": 0.43453171849250793, "learning_rate": 0.0005495689243831602, "loss": 1.8084, "step": 16885 }, { "epoch": 0.56, "grad_norm": 0.4198623597621918, "learning_rate": 0.0005495631240287946, "loss": 1.8443, "step": 16886 }, { "epoch": 0.56, "grad_norm": 0.42379745841026306, "learning_rate": 0.0005495573233714961, "loss": 1.8835, "step": 16887 }, { "epoch": 0.56, "grad_norm": 0.4415871202945709, "learning_rate": 0.0005495515224112718, "loss": 1.7998, "step": 16888 }, { "epoch": 0.56, "grad_norm": 0.4230858385562897, "learning_rate": 0.0005495457211481288, "loss": 1.8137, "step": 16889 }, { "epoch": 0.56, "grad_norm": 0.426626592874527, "learning_rate": 0.0005495399195820739, "loss": 1.8225, "step": 16890 }, { "epoch": 0.56, "grad_norm": 0.4228813946247101, "learning_rate": 0.0005495341177131145, "loss": 1.8379, "step": 16891 }, { "epoch": 0.56, "grad_norm": 0.4273809492588043, "learning_rate": 0.0005495283155412573, "loss": 1.8432, "step": 16892 }, { "epoch": 0.56, "grad_norm": 0.43577322363853455, "learning_rate": 0.0005495225130665095, "loss": 1.8519, "step": 16893 }, { "epoch": 0.56, "grad_norm": 0.4352421462535858, "learning_rate": 0.0005495167102888782, "loss": 1.768, "step": 16894 }, { "epoch": 0.56, "grad_norm": 0.5046554207801819, "learning_rate": 0.0005495109072083703, "loss": 1.884, "step": 16895 }, { "epoch": 0.56, "grad_norm": 0.44629743695259094, "learning_rate": 0.0005495051038249929, "loss": 1.8421, "step": 16896 }, { "epoch": 0.56, "grad_norm": 0.44560226798057556, "learning_rate": 0.0005494993001387532, "loss": 1.9486, "step": 16897 }, { "epoch": 0.56, "grad_norm": 0.4377208352088928, "learning_rate": 0.000549493496149658, "loss": 1.9279, "step": 16898 }, { "epoch": 0.56, "grad_norm": 0.4297143518924713, "learning_rate": 0.0005494876918577145, "loss": 1.8631, "step": 16899 }, { "epoch": 0.56, "grad_norm": 0.4377898871898651, "learning_rate": 0.0005494818872629297, "loss": 1.7951, "step": 16900 }, { "epoch": 0.56, "grad_norm": 0.4257996678352356, "learning_rate": 0.0005494760823653108, "loss": 1.8346, "step": 16901 }, { "epoch": 0.56, "grad_norm": 0.4328583776950836, "learning_rate": 0.0005494702771648646, "loss": 1.8003, "step": 16902 }, { "epoch": 0.56, "grad_norm": 0.43284714221954346, "learning_rate": 0.0005494644716615982, "loss": 1.8332, "step": 16903 }, { "epoch": 0.56, "grad_norm": 0.4200490415096283, "learning_rate": 0.0005494586658555186, "loss": 1.861, "step": 16904 }, { "epoch": 0.56, "grad_norm": 0.44179433584213257, "learning_rate": 0.0005494528597466329, "loss": 1.9503, "step": 16905 }, { "epoch": 0.56, "grad_norm": 0.4319058954715729, "learning_rate": 0.0005494470533349483, "loss": 1.767, "step": 16906 }, { "epoch": 0.56, "grad_norm": 0.4239315390586853, "learning_rate": 0.0005494412466204719, "loss": 1.8409, "step": 16907 }, { "epoch": 0.56, "grad_norm": 0.433248907327652, "learning_rate": 0.0005494354396032103, "loss": 1.8529, "step": 16908 }, { "epoch": 0.56, "grad_norm": 0.4228888154029846, "learning_rate": 0.000549429632283171, "loss": 1.8289, "step": 16909 }, { "epoch": 0.56, "grad_norm": 0.42317986488342285, "learning_rate": 0.0005494238246603608, "loss": 1.7861, "step": 16910 }, { "epoch": 0.56, "grad_norm": 0.43654805421829224, "learning_rate": 0.0005494180167347868, "loss": 1.8534, "step": 16911 }, { "epoch": 0.56, "grad_norm": 0.4802464246749878, "learning_rate": 0.0005494122085064561, "loss": 1.9189, "step": 16912 }, { "epoch": 0.56, "grad_norm": 0.43152472376823425, "learning_rate": 0.0005494063999753757, "loss": 1.8231, "step": 16913 }, { "epoch": 0.56, "grad_norm": 0.4202324151992798, "learning_rate": 0.0005494005911415528, "loss": 1.8749, "step": 16914 }, { "epoch": 0.56, "grad_norm": 0.4408661723136902, "learning_rate": 0.0005493947820049942, "loss": 1.9226, "step": 16915 }, { "epoch": 0.56, "grad_norm": 0.41468948125839233, "learning_rate": 0.0005493889725657072, "loss": 1.8427, "step": 16916 }, { "epoch": 0.56, "grad_norm": 0.423174649477005, "learning_rate": 0.0005493831628236987, "loss": 1.8742, "step": 16917 }, { "epoch": 0.56, "grad_norm": 0.41216176748275757, "learning_rate": 0.0005493773527789757, "loss": 1.8687, "step": 16918 }, { "epoch": 0.56, "grad_norm": 0.43166786432266235, "learning_rate": 0.0005493715424315454, "loss": 1.795, "step": 16919 }, { "epoch": 0.56, "grad_norm": 0.4303443133831024, "learning_rate": 0.0005493657317814148, "loss": 1.8873, "step": 16920 }, { "epoch": 0.56, "grad_norm": 0.4520275294780731, "learning_rate": 0.0005493599208285909, "loss": 1.862, "step": 16921 }, { "epoch": 0.56, "grad_norm": 0.4337400197982788, "learning_rate": 0.0005493541095730809, "loss": 1.8108, "step": 16922 }, { "epoch": 0.56, "grad_norm": 0.43186599016189575, "learning_rate": 0.0005493482980148917, "loss": 1.8496, "step": 16923 }, { "epoch": 0.56, "grad_norm": 0.45160236954689026, "learning_rate": 0.0005493424861540303, "loss": 1.9226, "step": 16924 }, { "epoch": 0.56, "grad_norm": 0.4324498176574707, "learning_rate": 0.000549336673990504, "loss": 1.8851, "step": 16925 }, { "epoch": 0.56, "grad_norm": 0.4294401705265045, "learning_rate": 0.0005493308615243197, "loss": 1.8528, "step": 16926 }, { "epoch": 0.56, "grad_norm": 0.4376828074455261, "learning_rate": 0.0005493250487554844, "loss": 1.9014, "step": 16927 }, { "epoch": 0.56, "grad_norm": 0.4779600501060486, "learning_rate": 0.0005493192356840052, "loss": 1.8703, "step": 16928 }, { "epoch": 0.56, "grad_norm": 0.8616841435432434, "learning_rate": 0.0005493134223098893, "loss": 1.8898, "step": 16929 }, { "epoch": 0.56, "grad_norm": 0.4344339072704315, "learning_rate": 0.0005493076086331435, "loss": 1.8212, "step": 16930 }, { "epoch": 0.56, "grad_norm": 0.4331149756908417, "learning_rate": 0.0005493017946537752, "loss": 1.9453, "step": 16931 }, { "epoch": 0.56, "grad_norm": 0.439556360244751, "learning_rate": 0.000549295980371791, "loss": 1.8239, "step": 16932 }, { "epoch": 0.56, "grad_norm": 0.4213981032371521, "learning_rate": 0.0005492901657871983, "loss": 1.8449, "step": 16933 }, { "epoch": 0.56, "grad_norm": 0.4305882751941681, "learning_rate": 0.0005492843509000042, "loss": 1.8902, "step": 16934 }, { "epoch": 0.56, "grad_norm": 0.4258289337158203, "learning_rate": 0.0005492785357102156, "loss": 1.7444, "step": 16935 }, { "epoch": 0.56, "grad_norm": 0.4619946777820587, "learning_rate": 0.0005492727202178395, "loss": 1.8463, "step": 16936 }, { "epoch": 0.56, "grad_norm": 0.4256192743778229, "learning_rate": 0.0005492669044228831, "loss": 1.8933, "step": 16937 }, { "epoch": 0.56, "grad_norm": 0.4299631118774414, "learning_rate": 0.0005492610883253534, "loss": 1.8644, "step": 16938 }, { "epoch": 0.56, "grad_norm": 0.43791237473487854, "learning_rate": 0.0005492552719252574, "loss": 1.9229, "step": 16939 }, { "epoch": 0.56, "grad_norm": 0.41399580240249634, "learning_rate": 0.0005492494552226023, "loss": 1.8803, "step": 16940 }, { "epoch": 0.56, "grad_norm": 0.43500441312789917, "learning_rate": 0.000549243638217395, "loss": 1.7936, "step": 16941 }, { "epoch": 0.56, "grad_norm": 0.48836037516593933, "learning_rate": 0.0005492378209096428, "loss": 1.8251, "step": 16942 }, { "epoch": 0.56, "grad_norm": 0.42986634373664856, "learning_rate": 0.0005492320032993525, "loss": 1.8912, "step": 16943 }, { "epoch": 0.56, "grad_norm": 0.43946215510368347, "learning_rate": 0.0005492261853865313, "loss": 1.8902, "step": 16944 }, { "epoch": 0.56, "grad_norm": 0.44783201813697815, "learning_rate": 0.0005492203671711863, "loss": 1.858, "step": 16945 }, { "epoch": 0.56, "grad_norm": 0.4256703555583954, "learning_rate": 0.0005492145486533243, "loss": 1.8615, "step": 16946 }, { "epoch": 0.56, "grad_norm": 0.4606783390045166, "learning_rate": 0.0005492087298329527, "loss": 1.867, "step": 16947 }, { "epoch": 0.56, "grad_norm": 0.4509383738040924, "learning_rate": 0.0005492029107100784, "loss": 1.9215, "step": 16948 }, { "epoch": 0.56, "grad_norm": 0.4272952675819397, "learning_rate": 0.0005491970912847085, "loss": 1.8009, "step": 16949 }, { "epoch": 0.56, "grad_norm": 0.42853114008903503, "learning_rate": 0.00054919127155685, "loss": 1.8826, "step": 16950 }, { "epoch": 0.56, "grad_norm": 0.4294467270374298, "learning_rate": 0.0005491854515265101, "loss": 1.9246, "step": 16951 }, { "epoch": 0.56, "grad_norm": 0.4391745328903198, "learning_rate": 0.0005491796311936956, "loss": 1.8967, "step": 16952 }, { "epoch": 0.56, "grad_norm": 0.443143367767334, "learning_rate": 0.0005491738105584139, "loss": 1.8045, "step": 16953 }, { "epoch": 0.56, "grad_norm": 0.4295486509799957, "learning_rate": 0.0005491679896206719, "loss": 1.7726, "step": 16954 }, { "epoch": 0.56, "grad_norm": 0.45618563890457153, "learning_rate": 0.0005491621683804766, "loss": 1.7912, "step": 16955 }, { "epoch": 0.56, "grad_norm": 0.4226175844669342, "learning_rate": 0.0005491563468378352, "loss": 1.7695, "step": 16956 }, { "epoch": 0.56, "grad_norm": 0.4682912528514862, "learning_rate": 0.0005491505249927547, "loss": 1.8478, "step": 16957 }, { "epoch": 0.56, "grad_norm": 0.4477025270462036, "learning_rate": 0.0005491447028452422, "loss": 1.8461, "step": 16958 }, { "epoch": 0.56, "grad_norm": 0.41621848940849304, "learning_rate": 0.0005491388803953046, "loss": 1.8254, "step": 16959 }, { "epoch": 0.56, "grad_norm": 0.4389137625694275, "learning_rate": 0.0005491330576429493, "loss": 1.7917, "step": 16960 }, { "epoch": 0.56, "grad_norm": 0.430410236120224, "learning_rate": 0.000549127234588183, "loss": 1.8177, "step": 16961 }, { "epoch": 0.56, "grad_norm": 0.43510523438453674, "learning_rate": 0.0005491214112310131, "loss": 1.7893, "step": 16962 }, { "epoch": 0.56, "grad_norm": 0.426785945892334, "learning_rate": 0.0005491155875714464, "loss": 1.7961, "step": 16963 }, { "epoch": 0.56, "grad_norm": 0.4330722987651825, "learning_rate": 0.0005491097636094902, "loss": 1.823, "step": 16964 }, { "epoch": 0.56, "grad_norm": 0.4288458526134491, "learning_rate": 0.0005491039393451514, "loss": 1.8412, "step": 16965 }, { "epoch": 0.56, "grad_norm": 0.41989612579345703, "learning_rate": 0.0005490981147784371, "loss": 1.8525, "step": 16966 }, { "epoch": 0.56, "grad_norm": 0.43852075934410095, "learning_rate": 0.0005490922899093543, "loss": 1.8232, "step": 16967 }, { "epoch": 0.56, "grad_norm": 0.4310451149940491, "learning_rate": 0.0005490864647379103, "loss": 1.8844, "step": 16968 }, { "epoch": 0.56, "grad_norm": 0.44142234325408936, "learning_rate": 0.000549080639264112, "loss": 1.8683, "step": 16969 }, { "epoch": 0.56, "grad_norm": 0.4482875168323517, "learning_rate": 0.0005490748134879665, "loss": 1.9442, "step": 16970 }, { "epoch": 0.56, "grad_norm": 0.42439180612564087, "learning_rate": 0.0005490689874094809, "loss": 1.8763, "step": 16971 }, { "epoch": 0.56, "grad_norm": 0.42181068658828735, "learning_rate": 0.0005490631610286621, "loss": 1.9454, "step": 16972 }, { "epoch": 0.56, "grad_norm": 0.44278690218925476, "learning_rate": 0.0005490573343455175, "loss": 1.8754, "step": 16973 }, { "epoch": 0.56, "grad_norm": 0.4177592396736145, "learning_rate": 0.000549051507360054, "loss": 1.8951, "step": 16974 }, { "epoch": 0.56, "grad_norm": 0.41579777002334595, "learning_rate": 0.0005490456800722785, "loss": 1.7943, "step": 16975 }, { "epoch": 0.56, "grad_norm": 0.41696345806121826, "learning_rate": 0.0005490398524821984, "loss": 1.8354, "step": 16976 }, { "epoch": 0.56, "grad_norm": 0.4176197052001953, "learning_rate": 0.0005490340245898205, "loss": 1.8353, "step": 16977 }, { "epoch": 0.56, "grad_norm": 0.4460090696811676, "learning_rate": 0.000549028196395152, "loss": 1.8078, "step": 16978 }, { "epoch": 0.56, "grad_norm": 0.8291410207748413, "learning_rate": 0.0005490223678982, "loss": 1.9103, "step": 16979 }, { "epoch": 0.56, "grad_norm": 0.44727587699890137, "learning_rate": 0.0005490165390989716, "loss": 1.8979, "step": 16980 }, { "epoch": 0.56, "grad_norm": 0.4447486102581024, "learning_rate": 0.0005490107099974737, "loss": 1.8945, "step": 16981 }, { "epoch": 0.56, "grad_norm": 0.4467175006866455, "learning_rate": 0.0005490048805937136, "loss": 1.8537, "step": 16982 }, { "epoch": 0.57, "grad_norm": 0.42470279335975647, "learning_rate": 0.0005489990508876981, "loss": 1.861, "step": 16983 }, { "epoch": 0.57, "grad_norm": 0.4323107600212097, "learning_rate": 0.0005489932208794346, "loss": 1.7967, "step": 16984 }, { "epoch": 0.57, "grad_norm": 0.4457351267337799, "learning_rate": 0.00054898739056893, "loss": 1.7723, "step": 16985 }, { "epoch": 0.57, "grad_norm": 0.4736650288105011, "learning_rate": 0.0005489815599561913, "loss": 1.8929, "step": 16986 }, { "epoch": 0.57, "grad_norm": 0.42573514580726624, "learning_rate": 0.0005489757290412258, "loss": 1.8272, "step": 16987 }, { "epoch": 0.57, "grad_norm": 0.4486733376979828, "learning_rate": 0.0005489698978240403, "loss": 1.8856, "step": 16988 }, { "epoch": 0.57, "grad_norm": 0.4525403082370758, "learning_rate": 0.0005489640663046421, "loss": 1.8837, "step": 16989 }, { "epoch": 0.57, "grad_norm": 0.434757262468338, "learning_rate": 0.0005489582344830382, "loss": 1.8711, "step": 16990 }, { "epoch": 0.57, "grad_norm": 0.4387377202510834, "learning_rate": 0.0005489524023592357, "loss": 1.7988, "step": 16991 }, { "epoch": 0.57, "grad_norm": 0.4330480694770813, "learning_rate": 0.0005489465699332417, "loss": 1.8347, "step": 16992 }, { "epoch": 0.57, "grad_norm": 0.4441904127597809, "learning_rate": 0.0005489407372050632, "loss": 1.8037, "step": 16993 }, { "epoch": 0.57, "grad_norm": 0.4484635591506958, "learning_rate": 0.0005489349041747073, "loss": 1.9366, "step": 16994 }, { "epoch": 0.57, "grad_norm": 0.45050740242004395, "learning_rate": 0.0005489290708421811, "loss": 1.7667, "step": 16995 }, { "epoch": 0.57, "grad_norm": 0.4707121253013611, "learning_rate": 0.0005489232372074918, "loss": 1.9233, "step": 16996 }, { "epoch": 0.57, "grad_norm": 0.45690760016441345, "learning_rate": 0.0005489174032706463, "loss": 1.9126, "step": 16997 }, { "epoch": 0.57, "grad_norm": 0.4244072139263153, "learning_rate": 0.0005489115690316517, "loss": 1.8643, "step": 16998 }, { "epoch": 0.57, "grad_norm": 0.43367472290992737, "learning_rate": 0.0005489057344905151, "loss": 1.8299, "step": 16999 }, { "epoch": 0.57, "grad_norm": 0.45921099185943604, "learning_rate": 0.0005488998996472438, "loss": 1.8592, "step": 17000 }, { "epoch": 0.57, "grad_norm": 0.7421258091926575, "learning_rate": 0.0005488940645018445, "loss": 1.9162, "step": 17001 }, { "epoch": 0.57, "grad_norm": 0.43537217378616333, "learning_rate": 0.0005488882290543245, "loss": 1.8471, "step": 17002 }, { "epoch": 0.57, "grad_norm": 0.43182072043418884, "learning_rate": 0.000548882393304691, "loss": 1.9203, "step": 17003 }, { "epoch": 0.57, "grad_norm": 0.43439388275146484, "learning_rate": 0.0005488765572529508, "loss": 1.8098, "step": 17004 }, { "epoch": 0.57, "grad_norm": 0.4465843439102173, "learning_rate": 0.0005488707208991112, "loss": 1.907, "step": 17005 }, { "epoch": 0.57, "grad_norm": 0.4221879243850708, "learning_rate": 0.0005488648842431793, "loss": 1.8261, "step": 17006 }, { "epoch": 0.57, "grad_norm": 0.4416757822036743, "learning_rate": 0.0005488590472851619, "loss": 1.8539, "step": 17007 }, { "epoch": 0.57, "grad_norm": 0.4297625422477722, "learning_rate": 0.0005488532100250664, "loss": 1.8116, "step": 17008 }, { "epoch": 0.57, "grad_norm": 0.4256293773651123, "learning_rate": 0.0005488473724628998, "loss": 1.7867, "step": 17009 }, { "epoch": 0.57, "grad_norm": 0.43674832582473755, "learning_rate": 0.000548841534598669, "loss": 1.8197, "step": 17010 }, { "epoch": 0.57, "grad_norm": 0.4406280517578125, "learning_rate": 0.0005488356964323815, "loss": 1.8545, "step": 17011 }, { "epoch": 0.57, "grad_norm": 0.42699697613716125, "learning_rate": 0.0005488298579640439, "loss": 1.84, "step": 17012 }, { "epoch": 0.57, "grad_norm": 0.42715585231781006, "learning_rate": 0.0005488240191936636, "loss": 1.8439, "step": 17013 }, { "epoch": 0.57, "grad_norm": 0.43228697776794434, "learning_rate": 0.0005488181801212476, "loss": 1.8332, "step": 17014 }, { "epoch": 0.57, "grad_norm": 0.4237598776817322, "learning_rate": 0.0005488123407468029, "loss": 1.817, "step": 17015 }, { "epoch": 0.57, "grad_norm": 0.4263209402561188, "learning_rate": 0.0005488065010703369, "loss": 1.8269, "step": 17016 }, { "epoch": 0.57, "grad_norm": 0.41755783557891846, "learning_rate": 0.0005488006610918563, "loss": 1.9312, "step": 17017 }, { "epoch": 0.57, "grad_norm": 0.43273231387138367, "learning_rate": 0.0005487948208113684, "loss": 1.8382, "step": 17018 }, { "epoch": 0.57, "grad_norm": 0.4186711609363556, "learning_rate": 0.0005487889802288803, "loss": 1.8617, "step": 17019 }, { "epoch": 0.57, "grad_norm": 0.4218244254589081, "learning_rate": 0.0005487831393443989, "loss": 1.792, "step": 17020 }, { "epoch": 0.57, "grad_norm": 0.42452555894851685, "learning_rate": 0.0005487772981579315, "loss": 1.8678, "step": 17021 }, { "epoch": 0.57, "grad_norm": 0.44619977474212646, "learning_rate": 0.0005487714566694851, "loss": 1.8834, "step": 17022 }, { "epoch": 0.57, "grad_norm": 0.430058091878891, "learning_rate": 0.0005487656148790669, "loss": 1.8385, "step": 17023 }, { "epoch": 0.57, "grad_norm": 0.4330134391784668, "learning_rate": 0.0005487597727866838, "loss": 1.8787, "step": 17024 }, { "epoch": 0.57, "grad_norm": 0.4424980878829956, "learning_rate": 0.0005487539303923429, "loss": 1.8069, "step": 17025 }, { "epoch": 0.57, "grad_norm": 0.44620901346206665, "learning_rate": 0.0005487480876960515, "loss": 1.8479, "step": 17026 }, { "epoch": 0.57, "grad_norm": 0.4147549867630005, "learning_rate": 0.0005487422446978167, "loss": 1.8323, "step": 17027 }, { "epoch": 0.57, "grad_norm": 0.43622925877571106, "learning_rate": 0.0005487364013976453, "loss": 1.826, "step": 17028 }, { "epoch": 0.57, "grad_norm": 0.43453502655029297, "learning_rate": 0.0005487305577955447, "loss": 1.8679, "step": 17029 }, { "epoch": 0.57, "grad_norm": 0.41807547211647034, "learning_rate": 0.0005487247138915216, "loss": 1.8684, "step": 17030 }, { "epoch": 0.57, "grad_norm": 0.40986645221710205, "learning_rate": 0.0005487188696855835, "loss": 1.8117, "step": 17031 }, { "epoch": 0.57, "grad_norm": 0.4339440166950226, "learning_rate": 0.0005487130251777373, "loss": 1.83, "step": 17032 }, { "epoch": 0.57, "grad_norm": 0.4282039403915405, "learning_rate": 0.0005487071803679902, "loss": 1.8537, "step": 17033 }, { "epoch": 0.57, "grad_norm": 0.4355344772338867, "learning_rate": 0.0005487013352563491, "loss": 1.8933, "step": 17034 }, { "epoch": 0.57, "grad_norm": 0.4245086908340454, "learning_rate": 0.0005486954898428213, "loss": 1.812, "step": 17035 }, { "epoch": 0.57, "grad_norm": 0.43918028473854065, "learning_rate": 0.0005486896441274138, "loss": 1.8034, "step": 17036 }, { "epoch": 0.57, "grad_norm": 0.4309488534927368, "learning_rate": 0.0005486837981101339, "loss": 1.9241, "step": 17037 }, { "epoch": 0.57, "grad_norm": 0.42015916109085083, "learning_rate": 0.0005486779517909883, "loss": 1.8186, "step": 17038 }, { "epoch": 0.57, "grad_norm": 0.43865612149238586, "learning_rate": 0.0005486721051699844, "loss": 1.8897, "step": 17039 }, { "epoch": 0.57, "grad_norm": 0.43288281559944153, "learning_rate": 0.0005486662582471291, "loss": 1.7392, "step": 17040 }, { "epoch": 0.57, "grad_norm": 0.4241265654563904, "learning_rate": 0.0005486604110224297, "loss": 1.8382, "step": 17041 }, { "epoch": 0.57, "grad_norm": 0.42426156997680664, "learning_rate": 0.0005486545634958932, "loss": 1.8533, "step": 17042 }, { "epoch": 0.57, "grad_norm": 0.41926392912864685, "learning_rate": 0.0005486487156675266, "loss": 1.7606, "step": 17043 }, { "epoch": 0.57, "grad_norm": 0.4358319044113159, "learning_rate": 0.0005486428675373372, "loss": 1.8197, "step": 17044 }, { "epoch": 0.57, "grad_norm": 0.8293890357017517, "learning_rate": 0.0005486370191053319, "loss": 1.9441, "step": 17045 }, { "epoch": 0.57, "grad_norm": 0.4208797812461853, "learning_rate": 0.000548631170371518, "loss": 1.8151, "step": 17046 }, { "epoch": 0.57, "grad_norm": 0.42148008942604065, "learning_rate": 0.0005486253213359025, "loss": 1.8596, "step": 17047 }, { "epoch": 0.57, "grad_norm": 0.4253288805484772, "learning_rate": 0.0005486194719984923, "loss": 1.8175, "step": 17048 }, { "epoch": 0.57, "grad_norm": 0.4237837493419647, "learning_rate": 0.0005486136223592948, "loss": 1.7704, "step": 17049 }, { "epoch": 0.57, "grad_norm": 0.4229780435562134, "learning_rate": 0.0005486077724183171, "loss": 1.7983, "step": 17050 }, { "epoch": 0.57, "grad_norm": 0.4342804253101349, "learning_rate": 0.0005486019221755662, "loss": 1.9184, "step": 17051 }, { "epoch": 0.57, "grad_norm": 0.4382694959640503, "learning_rate": 0.0005485960716310491, "loss": 1.9604, "step": 17052 }, { "epoch": 0.57, "grad_norm": 0.4183763265609741, "learning_rate": 0.0005485902207847729, "loss": 1.9125, "step": 17053 }, { "epoch": 0.57, "grad_norm": 0.44150277972221375, "learning_rate": 0.000548584369636745, "loss": 1.898, "step": 17054 }, { "epoch": 0.57, "grad_norm": 0.6348189115524292, "learning_rate": 0.0005485785181869722, "loss": 1.8525, "step": 17055 }, { "epoch": 0.57, "grad_norm": 0.4370438754558563, "learning_rate": 0.0005485726664354617, "loss": 1.8132, "step": 17056 }, { "epoch": 0.57, "grad_norm": 0.43387162685394287, "learning_rate": 0.0005485668143822206, "loss": 1.837, "step": 17057 }, { "epoch": 0.57, "grad_norm": 0.43872377276420593, "learning_rate": 0.0005485609620272561, "loss": 1.9178, "step": 17058 }, { "epoch": 0.57, "grad_norm": 0.4362902343273163, "learning_rate": 0.0005485551093705751, "loss": 1.8787, "step": 17059 }, { "epoch": 0.57, "grad_norm": 0.42576369643211365, "learning_rate": 0.000548549256412185, "loss": 1.8921, "step": 17060 }, { "epoch": 0.57, "grad_norm": 0.4434097111225128, "learning_rate": 0.0005485434031520925, "loss": 1.8882, "step": 17061 }, { "epoch": 0.57, "grad_norm": 0.4396209418773651, "learning_rate": 0.000548537549590305, "loss": 1.8868, "step": 17062 }, { "epoch": 0.57, "grad_norm": 0.4376482665538788, "learning_rate": 0.0005485316957268296, "loss": 1.8151, "step": 17063 }, { "epoch": 0.57, "grad_norm": 0.42843520641326904, "learning_rate": 0.0005485258415616733, "loss": 1.8048, "step": 17064 }, { "epoch": 0.57, "grad_norm": 0.4288136065006256, "learning_rate": 0.0005485199870948433, "loss": 1.8259, "step": 17065 }, { "epoch": 0.57, "grad_norm": 0.4275403320789337, "learning_rate": 0.0005485141323263465, "loss": 1.8626, "step": 17066 }, { "epoch": 0.57, "grad_norm": 0.4339764714241028, "learning_rate": 0.0005485082772561904, "loss": 1.8705, "step": 17067 }, { "epoch": 0.57, "grad_norm": 0.4387620985507965, "learning_rate": 0.0005485024218843817, "loss": 1.8509, "step": 17068 }, { "epoch": 0.57, "grad_norm": 0.4125381112098694, "learning_rate": 0.0005484965662109277, "loss": 1.8591, "step": 17069 }, { "epoch": 0.57, "grad_norm": 0.4264558255672455, "learning_rate": 0.0005484907102358354, "loss": 1.8818, "step": 17070 }, { "epoch": 0.57, "grad_norm": 0.43094873428344727, "learning_rate": 0.0005484848539591121, "loss": 1.7851, "step": 17071 }, { "epoch": 0.57, "grad_norm": 0.4337371587753296, "learning_rate": 0.0005484789973807648, "loss": 1.8179, "step": 17072 }, { "epoch": 0.57, "grad_norm": 0.41983339190483093, "learning_rate": 0.0005484731405008006, "loss": 1.8218, "step": 17073 }, { "epoch": 0.57, "grad_norm": 0.44013267755508423, "learning_rate": 0.0005484672833192267, "loss": 1.8783, "step": 17074 }, { "epoch": 0.57, "grad_norm": 0.45036473870277405, "learning_rate": 0.00054846142583605, "loss": 1.7515, "step": 17075 }, { "epoch": 0.57, "grad_norm": 0.4827476143836975, "learning_rate": 0.0005484555680512778, "loss": 1.8996, "step": 17076 }, { "epoch": 0.57, "grad_norm": 0.44414523243904114, "learning_rate": 0.000548449709964917, "loss": 1.8792, "step": 17077 }, { "epoch": 0.57, "grad_norm": 0.4361928105354309, "learning_rate": 0.0005484438515769751, "loss": 1.8003, "step": 17078 }, { "epoch": 0.57, "grad_norm": 0.4118213951587677, "learning_rate": 0.0005484379928874588, "loss": 1.8367, "step": 17079 }, { "epoch": 0.57, "grad_norm": 0.44505369663238525, "learning_rate": 0.0005484321338963754, "loss": 1.8107, "step": 17080 }, { "epoch": 0.57, "grad_norm": 0.4354952573776245, "learning_rate": 0.0005484262746037321, "loss": 1.8639, "step": 17081 }, { "epoch": 0.57, "grad_norm": 0.44060927629470825, "learning_rate": 0.0005484204150095358, "loss": 1.9158, "step": 17082 }, { "epoch": 0.57, "grad_norm": 0.4298968017101288, "learning_rate": 0.0005484145551137938, "loss": 1.8473, "step": 17083 }, { "epoch": 0.57, "grad_norm": 0.4174984097480774, "learning_rate": 0.0005484086949165131, "loss": 1.8922, "step": 17084 }, { "epoch": 0.57, "grad_norm": 0.45973390340805054, "learning_rate": 0.0005484028344177009, "loss": 1.8964, "step": 17085 }, { "epoch": 0.57, "grad_norm": 0.44760218262672424, "learning_rate": 0.0005483969736173641, "loss": 1.8551, "step": 17086 }, { "epoch": 0.57, "grad_norm": 0.4261757731437683, "learning_rate": 0.0005483911125155101, "loss": 1.9231, "step": 17087 }, { "epoch": 0.57, "grad_norm": 0.4439890384674072, "learning_rate": 0.0005483852511121458, "loss": 1.8741, "step": 17088 }, { "epoch": 0.57, "grad_norm": 0.43798643350601196, "learning_rate": 0.0005483793894072784, "loss": 1.9037, "step": 17089 }, { "epoch": 0.57, "grad_norm": 0.4426339864730835, "learning_rate": 0.0005483735274009152, "loss": 1.8192, "step": 17090 }, { "epoch": 0.57, "grad_norm": 0.4378907084465027, "learning_rate": 0.000548367665093063, "loss": 1.8972, "step": 17091 }, { "epoch": 0.57, "grad_norm": 0.44357502460479736, "learning_rate": 0.000548361802483729, "loss": 1.848, "step": 17092 }, { "epoch": 0.57, "grad_norm": 0.4326915144920349, "learning_rate": 0.0005483559395729204, "loss": 1.8811, "step": 17093 }, { "epoch": 0.57, "grad_norm": 0.44264838099479675, "learning_rate": 0.0005483500763606443, "loss": 1.8584, "step": 17094 }, { "epoch": 0.57, "grad_norm": 0.4547170102596283, "learning_rate": 0.0005483442128469079, "loss": 1.8577, "step": 17095 }, { "epoch": 0.57, "grad_norm": 0.4328364431858063, "learning_rate": 0.0005483383490317181, "loss": 1.8429, "step": 17096 }, { "epoch": 0.57, "grad_norm": 0.43329918384552, "learning_rate": 0.0005483324849150822, "loss": 1.8241, "step": 17097 }, { "epoch": 0.57, "grad_norm": 0.44388383626937866, "learning_rate": 0.0005483266204970072, "loss": 1.8188, "step": 17098 }, { "epoch": 0.57, "grad_norm": 0.44773951172828674, "learning_rate": 0.0005483207557775002, "loss": 1.8536, "step": 17099 }, { "epoch": 0.57, "grad_norm": 0.44870543479919434, "learning_rate": 0.0005483148907565685, "loss": 1.8304, "step": 17100 }, { "epoch": 0.57, "grad_norm": 0.44997113943099976, "learning_rate": 0.0005483090254342192, "loss": 1.8166, "step": 17101 }, { "epoch": 0.57, "grad_norm": 0.430718332529068, "learning_rate": 0.0005483031598104592, "loss": 1.7761, "step": 17102 }, { "epoch": 0.57, "grad_norm": 0.43084362149238586, "learning_rate": 0.0005482972938852958, "loss": 1.7951, "step": 17103 }, { "epoch": 0.57, "grad_norm": 0.44389408826828003, "learning_rate": 0.000548291427658736, "loss": 1.9396, "step": 17104 }, { "epoch": 0.57, "grad_norm": 0.44915738701820374, "learning_rate": 0.0005482855611307869, "loss": 1.9184, "step": 17105 }, { "epoch": 0.57, "grad_norm": 0.41607537865638733, "learning_rate": 0.000548279694301456, "loss": 1.8388, "step": 17106 }, { "epoch": 0.57, "grad_norm": 0.4190097749233246, "learning_rate": 0.0005482738271707499, "loss": 1.941, "step": 17107 }, { "epoch": 0.57, "grad_norm": 0.43524476885795593, "learning_rate": 0.0005482679597386761, "loss": 1.8161, "step": 17108 }, { "epoch": 0.57, "grad_norm": 0.43499112129211426, "learning_rate": 0.0005482620920052414, "loss": 1.9428, "step": 17109 }, { "epoch": 0.57, "grad_norm": 0.4694141149520874, "learning_rate": 0.0005482562239704533, "loss": 1.8408, "step": 17110 }, { "epoch": 0.57, "grad_norm": 0.42330247163772583, "learning_rate": 0.0005482503556343186, "loss": 1.7832, "step": 17111 }, { "epoch": 0.57, "grad_norm": 0.434930682182312, "learning_rate": 0.0005482444869968446, "loss": 1.8376, "step": 17112 }, { "epoch": 0.57, "grad_norm": 0.4384979009628296, "learning_rate": 0.0005482386180580383, "loss": 1.8344, "step": 17113 }, { "epoch": 0.57, "grad_norm": 0.48378369212150574, "learning_rate": 0.0005482327488179069, "loss": 1.836, "step": 17114 }, { "epoch": 0.57, "grad_norm": 0.41952502727508545, "learning_rate": 0.0005482268792764576, "loss": 1.74, "step": 17115 }, { "epoch": 0.57, "grad_norm": 0.4398210048675537, "learning_rate": 0.0005482210094336974, "loss": 1.8012, "step": 17116 }, { "epoch": 0.57, "grad_norm": 0.43597790598869324, "learning_rate": 0.0005482151392896334, "loss": 1.8373, "step": 17117 }, { "epoch": 0.57, "grad_norm": 0.4058835208415985, "learning_rate": 0.0005482092688442729, "loss": 1.7923, "step": 17118 }, { "epoch": 0.57, "grad_norm": 0.433743417263031, "learning_rate": 0.0005482033980976229, "loss": 1.7972, "step": 17119 }, { "epoch": 0.57, "grad_norm": 0.45245638489723206, "learning_rate": 0.0005481975270496906, "loss": 1.8452, "step": 17120 }, { "epoch": 0.57, "grad_norm": 0.4464637041091919, "learning_rate": 0.000548191655700483, "loss": 1.8114, "step": 17121 }, { "epoch": 0.57, "grad_norm": 0.42523014545440674, "learning_rate": 0.0005481857840500073, "loss": 1.7448, "step": 17122 }, { "epoch": 0.57, "grad_norm": 0.436954140663147, "learning_rate": 0.0005481799120982706, "loss": 1.858, "step": 17123 }, { "epoch": 0.57, "grad_norm": 0.4309489130973816, "learning_rate": 0.0005481740398452801, "loss": 1.8299, "step": 17124 }, { "epoch": 0.57, "grad_norm": 0.4292290508747101, "learning_rate": 0.0005481681672910429, "loss": 1.8498, "step": 17125 }, { "epoch": 0.57, "grad_norm": 0.4326219856739044, "learning_rate": 0.000548162294435566, "loss": 1.8771, "step": 17126 }, { "epoch": 0.57, "grad_norm": 0.438396692276001, "learning_rate": 0.0005481564212788567, "loss": 1.8196, "step": 17127 }, { "epoch": 0.57, "grad_norm": 0.4389297366142273, "learning_rate": 0.000548150547820922, "loss": 1.7981, "step": 17128 }, { "epoch": 0.57, "grad_norm": 0.432637482881546, "learning_rate": 0.0005481446740617693, "loss": 1.8754, "step": 17129 }, { "epoch": 0.57, "grad_norm": 0.4340321123600006, "learning_rate": 0.0005481388000014055, "loss": 1.7817, "step": 17130 }, { "epoch": 0.57, "grad_norm": 0.4431383013725281, "learning_rate": 0.0005481329256398376, "loss": 1.8393, "step": 17131 }, { "epoch": 0.57, "grad_norm": 0.431903213262558, "learning_rate": 0.000548127050977073, "loss": 1.8434, "step": 17132 }, { "epoch": 0.57, "grad_norm": 0.46219030022621155, "learning_rate": 0.0005481211760131185, "loss": 1.8818, "step": 17133 }, { "epoch": 0.57, "grad_norm": 0.43857839703559875, "learning_rate": 0.0005481153007479818, "loss": 1.8297, "step": 17134 }, { "epoch": 0.57, "grad_norm": 0.4257875382900238, "learning_rate": 0.0005481094251816695, "loss": 1.8129, "step": 17135 }, { "epoch": 0.57, "grad_norm": 0.43018290400505066, "learning_rate": 0.0005481035493141888, "loss": 1.7987, "step": 17136 }, { "epoch": 0.57, "grad_norm": 0.4265097677707672, "learning_rate": 0.0005480976731455471, "loss": 1.9377, "step": 17137 }, { "epoch": 0.57, "grad_norm": 0.4238194525241852, "learning_rate": 0.0005480917966757514, "loss": 1.7781, "step": 17138 }, { "epoch": 0.57, "grad_norm": 0.42803794145584106, "learning_rate": 0.0005480859199048087, "loss": 1.821, "step": 17139 }, { "epoch": 0.57, "grad_norm": 0.4322625696659088, "learning_rate": 0.0005480800428327263, "loss": 1.9157, "step": 17140 }, { "epoch": 0.57, "grad_norm": 0.4215008616447449, "learning_rate": 0.0005480741654595113, "loss": 1.748, "step": 17141 }, { "epoch": 0.57, "grad_norm": 0.43263813853263855, "learning_rate": 0.0005480682877851709, "loss": 1.8098, "step": 17142 }, { "epoch": 0.57, "grad_norm": 0.42746612429618835, "learning_rate": 0.000548062409809712, "loss": 1.7867, "step": 17143 }, { "epoch": 0.57, "grad_norm": 0.44035229086875916, "learning_rate": 0.0005480565315331419, "loss": 1.8414, "step": 17144 }, { "epoch": 0.57, "grad_norm": 0.4286646246910095, "learning_rate": 0.0005480506529554678, "loss": 1.8704, "step": 17145 }, { "epoch": 0.57, "grad_norm": 0.44104453921318054, "learning_rate": 0.0005480447740766968, "loss": 1.942, "step": 17146 }, { "epoch": 0.57, "grad_norm": 0.42920613288879395, "learning_rate": 0.0005480388948968359, "loss": 1.8485, "step": 17147 }, { "epoch": 0.57, "grad_norm": 0.47808900475502014, "learning_rate": 0.0005480330154158924, "loss": 1.8479, "step": 17148 }, { "epoch": 0.57, "grad_norm": 0.4330281615257263, "learning_rate": 0.0005480271356338731, "loss": 1.836, "step": 17149 }, { "epoch": 0.57, "grad_norm": 0.41653990745544434, "learning_rate": 0.0005480212555507857, "loss": 1.8324, "step": 17150 }, { "epoch": 0.57, "grad_norm": 0.45295658707618713, "learning_rate": 0.000548015375166637, "loss": 1.8265, "step": 17151 }, { "epoch": 0.57, "grad_norm": 0.43638908863067627, "learning_rate": 0.0005480094944814341, "loss": 1.803, "step": 17152 }, { "epoch": 0.57, "grad_norm": 0.42695605754852295, "learning_rate": 0.0005480036134951843, "loss": 1.8032, "step": 17153 }, { "epoch": 0.57, "grad_norm": 0.42331430315971375, "learning_rate": 0.0005479977322078945, "loss": 1.8024, "step": 17154 }, { "epoch": 0.57, "grad_norm": 0.4408468008041382, "learning_rate": 0.0005479918506195721, "loss": 1.8945, "step": 17155 }, { "epoch": 0.57, "grad_norm": 0.44357284903526306, "learning_rate": 0.0005479859687302241, "loss": 1.8612, "step": 17156 }, { "epoch": 0.57, "grad_norm": 0.4286467134952545, "learning_rate": 0.0005479800865398577, "loss": 1.8663, "step": 17157 }, { "epoch": 0.57, "grad_norm": 0.4449898600578308, "learning_rate": 0.00054797420404848, "loss": 1.8785, "step": 17158 }, { "epoch": 0.57, "grad_norm": 0.4611419141292572, "learning_rate": 0.0005479683212560982, "loss": 1.8339, "step": 17159 }, { "epoch": 0.57, "grad_norm": 0.43756014108657837, "learning_rate": 0.0005479624381627194, "loss": 1.8345, "step": 17160 }, { "epoch": 0.57, "grad_norm": 0.42193639278411865, "learning_rate": 0.0005479565547683507, "loss": 1.89, "step": 17161 }, { "epoch": 0.57, "grad_norm": 0.43732669949531555, "learning_rate": 0.0005479506710729992, "loss": 1.8527, "step": 17162 }, { "epoch": 0.57, "grad_norm": 0.42117545008659363, "learning_rate": 0.0005479447870766723, "loss": 1.8039, "step": 17163 }, { "epoch": 0.57, "grad_norm": 0.4413728713989258, "learning_rate": 0.0005479389027793768, "loss": 1.8668, "step": 17164 }, { "epoch": 0.57, "grad_norm": 0.45086467266082764, "learning_rate": 0.00054793301818112, "loss": 1.9053, "step": 17165 }, { "epoch": 0.57, "grad_norm": 0.4245499074459076, "learning_rate": 0.0005479271332819091, "loss": 1.8327, "step": 17166 }, { "epoch": 0.57, "grad_norm": 0.43125391006469727, "learning_rate": 0.0005479212480817513, "loss": 1.9357, "step": 17167 }, { "epoch": 0.57, "grad_norm": 0.44819897413253784, "learning_rate": 0.0005479153625806534, "loss": 1.836, "step": 17168 }, { "epoch": 0.57, "grad_norm": 0.43027839064598083, "learning_rate": 0.000547909476778623, "loss": 1.7058, "step": 17169 }, { "epoch": 0.57, "grad_norm": 0.43293997645378113, "learning_rate": 0.0005479035906756669, "loss": 1.9019, "step": 17170 }, { "epoch": 0.57, "grad_norm": 0.435973197221756, "learning_rate": 0.0005478977042717925, "loss": 1.8243, "step": 17171 }, { "epoch": 0.57, "grad_norm": 0.43827831745147705, "learning_rate": 0.0005478918175670066, "loss": 1.8724, "step": 17172 }, { "epoch": 0.57, "grad_norm": 0.4168608784675598, "learning_rate": 0.0005478859305613167, "loss": 1.8787, "step": 17173 }, { "epoch": 0.57, "grad_norm": 0.4238969683647156, "learning_rate": 0.0005478800432547298, "loss": 1.8235, "step": 17174 }, { "epoch": 0.57, "grad_norm": 0.7354540824890137, "learning_rate": 0.000547874155647253, "loss": 1.8804, "step": 17175 }, { "epoch": 0.57, "grad_norm": 0.4196159541606903, "learning_rate": 0.0005478682677388935, "loss": 1.8327, "step": 17176 }, { "epoch": 0.57, "grad_norm": 0.4436001777648926, "learning_rate": 0.0005478623795296584, "loss": 1.8481, "step": 17177 }, { "epoch": 0.57, "grad_norm": 0.42833375930786133, "learning_rate": 0.000547856491019555, "loss": 1.8941, "step": 17178 }, { "epoch": 0.57, "grad_norm": 0.4265992343425751, "learning_rate": 0.0005478506022085904, "loss": 1.8426, "step": 17179 }, { "epoch": 0.57, "grad_norm": 0.4277384877204895, "learning_rate": 0.0005478447130967715, "loss": 1.8726, "step": 17180 }, { "epoch": 0.57, "grad_norm": 0.4311712384223938, "learning_rate": 0.0005478388236841058, "loss": 1.8427, "step": 17181 }, { "epoch": 0.57, "grad_norm": 0.6465120911598206, "learning_rate": 0.0005478329339706001, "loss": 1.8173, "step": 17182 }, { "epoch": 0.57, "grad_norm": 0.433535635471344, "learning_rate": 0.0005478270439562619, "loss": 1.9622, "step": 17183 }, { "epoch": 0.57, "grad_norm": 0.4367808401584625, "learning_rate": 0.0005478211536410981, "loss": 1.8385, "step": 17184 }, { "epoch": 0.57, "grad_norm": 0.42793163657188416, "learning_rate": 0.000547815263025116, "loss": 1.7684, "step": 17185 }, { "epoch": 0.57, "grad_norm": 0.441999226808548, "learning_rate": 0.0005478093721083226, "loss": 1.8477, "step": 17186 }, { "epoch": 0.57, "grad_norm": 0.46664512157440186, "learning_rate": 0.0005478034808907253, "loss": 1.893, "step": 17187 }, { "epoch": 0.57, "grad_norm": 0.4259668290615082, "learning_rate": 0.0005477975893723309, "loss": 1.818, "step": 17188 }, { "epoch": 0.57, "grad_norm": 0.4136134386062622, "learning_rate": 0.0005477916975531468, "loss": 1.8018, "step": 17189 }, { "epoch": 0.57, "grad_norm": 0.4383559226989746, "learning_rate": 0.0005477858054331801, "loss": 1.8759, "step": 17190 }, { "epoch": 0.57, "grad_norm": 0.46551719307899475, "learning_rate": 0.0005477799130124379, "loss": 1.8528, "step": 17191 }, { "epoch": 0.57, "grad_norm": 0.4340988099575043, "learning_rate": 0.0005477740202909274, "loss": 1.8165, "step": 17192 }, { "epoch": 0.57, "grad_norm": 0.41246628761291504, "learning_rate": 0.0005477681272686558, "loss": 1.8491, "step": 17193 }, { "epoch": 0.57, "grad_norm": 0.43836572766304016, "learning_rate": 0.0005477622339456302, "loss": 1.8952, "step": 17194 }, { "epoch": 0.57, "grad_norm": 0.4221886098384857, "learning_rate": 0.0005477563403218578, "loss": 1.7462, "step": 17195 }, { "epoch": 0.57, "grad_norm": 0.4123983681201935, "learning_rate": 0.0005477504463973456, "loss": 1.8687, "step": 17196 }, { "epoch": 0.57, "grad_norm": 0.4295193552970886, "learning_rate": 0.0005477445521721009, "loss": 1.8051, "step": 17197 }, { "epoch": 0.57, "grad_norm": 0.4115070104598999, "learning_rate": 0.0005477386576461308, "loss": 1.8005, "step": 17198 }, { "epoch": 0.57, "grad_norm": 0.43465355038642883, "learning_rate": 0.0005477327628194425, "loss": 1.8335, "step": 17199 }, { "epoch": 0.57, "grad_norm": 0.4147978723049164, "learning_rate": 0.0005477268676920431, "loss": 1.8177, "step": 17200 }, { "epoch": 0.57, "grad_norm": 0.477741539478302, "learning_rate": 0.0005477209722639398, "loss": 1.8286, "step": 17201 }, { "epoch": 0.57, "grad_norm": 0.4662248492240906, "learning_rate": 0.0005477150765351398, "loss": 1.8336, "step": 17202 }, { "epoch": 0.57, "grad_norm": 0.4368358850479126, "learning_rate": 0.0005477091805056501, "loss": 1.7889, "step": 17203 }, { "epoch": 0.57, "grad_norm": 0.4251212477684021, "learning_rate": 0.0005477032841754779, "loss": 1.7765, "step": 17204 }, { "epoch": 0.57, "grad_norm": 0.4349833130836487, "learning_rate": 0.0005476973875446306, "loss": 1.8766, "step": 17205 }, { "epoch": 0.57, "grad_norm": 0.4135032594203949, "learning_rate": 0.000547691490613115, "loss": 1.7986, "step": 17206 }, { "epoch": 0.57, "grad_norm": 0.4315882921218872, "learning_rate": 0.0005476855933809386, "loss": 1.8243, "step": 17207 }, { "epoch": 0.57, "grad_norm": 0.43895280361175537, "learning_rate": 0.0005476796958481082, "loss": 1.8646, "step": 17208 }, { "epoch": 0.57, "grad_norm": 0.45674312114715576, "learning_rate": 0.0005476737980146312, "loss": 1.8345, "step": 17209 }, { "epoch": 0.57, "grad_norm": 0.45368263125419617, "learning_rate": 0.0005476678998805148, "loss": 1.7872, "step": 17210 }, { "epoch": 0.57, "grad_norm": 0.4364786148071289, "learning_rate": 0.0005476620014457659, "loss": 1.849, "step": 17211 }, { "epoch": 0.57, "grad_norm": 0.42625197768211365, "learning_rate": 0.0005476561027103919, "loss": 1.8147, "step": 17212 }, { "epoch": 0.57, "grad_norm": 0.4438791871070862, "learning_rate": 0.0005476502036743999, "loss": 1.8965, "step": 17213 }, { "epoch": 0.57, "grad_norm": 0.4508223831653595, "learning_rate": 0.0005476443043377971, "loss": 1.8042, "step": 17214 }, { "epoch": 0.57, "grad_norm": 0.46519583463668823, "learning_rate": 0.0005476384047005905, "loss": 1.8631, "step": 17215 }, { "epoch": 0.57, "grad_norm": 0.42856067419052124, "learning_rate": 0.0005476325047627874, "loss": 1.8786, "step": 17216 }, { "epoch": 0.57, "grad_norm": 0.4473493993282318, "learning_rate": 0.000547626604524395, "loss": 1.8283, "step": 17217 }, { "epoch": 0.57, "grad_norm": 0.44278332591056824, "learning_rate": 0.0005476207039854203, "loss": 1.8536, "step": 17218 }, { "epoch": 0.57, "grad_norm": 0.4335629642009735, "learning_rate": 0.0005476148031458705, "loss": 1.7602, "step": 17219 }, { "epoch": 0.57, "grad_norm": 0.4249056875705719, "learning_rate": 0.000547608902005753, "loss": 1.8056, "step": 17220 }, { "epoch": 0.57, "grad_norm": 0.42492857575416565, "learning_rate": 0.0005476030005650745, "loss": 1.8055, "step": 17221 }, { "epoch": 0.57, "grad_norm": 0.4334864318370819, "learning_rate": 0.0005475970988238427, "loss": 1.8702, "step": 17222 }, { "epoch": 0.57, "grad_norm": 0.4354741871356964, "learning_rate": 0.0005475911967820643, "loss": 1.8345, "step": 17223 }, { "epoch": 0.57, "grad_norm": 0.4371764063835144, "learning_rate": 0.0005475852944397468, "loss": 1.8211, "step": 17224 }, { "epoch": 0.57, "grad_norm": 0.42623963952064514, "learning_rate": 0.0005475793917968972, "loss": 1.8087, "step": 17225 }, { "epoch": 0.57, "grad_norm": 0.44148755073547363, "learning_rate": 0.0005475734888535226, "loss": 1.7996, "step": 17226 }, { "epoch": 0.57, "grad_norm": 0.4487341046333313, "learning_rate": 0.0005475675856096304, "loss": 1.8054, "step": 17227 }, { "epoch": 0.57, "grad_norm": 0.46200481057167053, "learning_rate": 0.0005475616820652277, "loss": 1.8289, "step": 17228 }, { "epoch": 0.57, "grad_norm": 0.43347713351249695, "learning_rate": 0.0005475557782203214, "loss": 1.8678, "step": 17229 }, { "epoch": 0.57, "grad_norm": 0.44095414876937866, "learning_rate": 0.000547549874074919, "loss": 1.8375, "step": 17230 }, { "epoch": 0.57, "grad_norm": 0.4399895966053009, "learning_rate": 0.0005475439696290275, "loss": 1.7645, "step": 17231 }, { "epoch": 0.57, "grad_norm": 0.437315434217453, "learning_rate": 0.000547538064882654, "loss": 1.9357, "step": 17232 }, { "epoch": 0.57, "grad_norm": 0.4572124481201172, "learning_rate": 0.0005475321598358058, "loss": 1.8759, "step": 17233 }, { "epoch": 0.57, "grad_norm": 0.4581071436405182, "learning_rate": 0.0005475262544884901, "loss": 1.8272, "step": 17234 }, { "epoch": 0.57, "grad_norm": 0.4239978790283203, "learning_rate": 0.000547520348840714, "loss": 1.8348, "step": 17235 }, { "epoch": 0.57, "grad_norm": 0.4404447376728058, "learning_rate": 0.0005475144428924845, "loss": 1.8053, "step": 17236 }, { "epoch": 0.57, "grad_norm": 0.4342992603778839, "learning_rate": 0.0005475085366438092, "loss": 1.7686, "step": 17237 }, { "epoch": 0.57, "grad_norm": 0.4559510052204132, "learning_rate": 0.0005475026300946948, "loss": 1.7425, "step": 17238 }, { "epoch": 0.57, "grad_norm": 0.436812162399292, "learning_rate": 0.0005474967232451488, "loss": 1.859, "step": 17239 }, { "epoch": 0.57, "grad_norm": 0.47192975878715515, "learning_rate": 0.0005474908160951782, "loss": 1.8382, "step": 17240 }, { "epoch": 0.57, "grad_norm": 0.43824124336242676, "learning_rate": 0.0005474849086447902, "loss": 1.8453, "step": 17241 }, { "epoch": 0.57, "grad_norm": 0.45319169759750366, "learning_rate": 0.0005474790008939919, "loss": 1.8677, "step": 17242 }, { "epoch": 0.57, "grad_norm": 0.4474639296531677, "learning_rate": 0.0005474730928427907, "loss": 1.895, "step": 17243 }, { "epoch": 0.57, "grad_norm": 0.4617239534854889, "learning_rate": 0.0005474671844911935, "loss": 1.8669, "step": 17244 }, { "epoch": 0.57, "grad_norm": 0.44443565607070923, "learning_rate": 0.0005474612758392078, "loss": 1.8967, "step": 17245 }, { "epoch": 0.57, "grad_norm": 0.42849183082580566, "learning_rate": 0.0005474553668868403, "loss": 1.9113, "step": 17246 }, { "epoch": 0.57, "grad_norm": 0.42962756752967834, "learning_rate": 0.0005474494576340987, "loss": 1.7703, "step": 17247 }, { "epoch": 0.57, "grad_norm": 0.41465526819229126, "learning_rate": 0.0005474435480809899, "loss": 1.9198, "step": 17248 }, { "epoch": 0.57, "grad_norm": 0.4374309182167053, "learning_rate": 0.0005474376382275209, "loss": 1.8316, "step": 17249 }, { "epoch": 0.57, "grad_norm": 0.44120079278945923, "learning_rate": 0.0005474317280736991, "loss": 1.8756, "step": 17250 }, { "epoch": 0.57, "grad_norm": 0.4115590751171112, "learning_rate": 0.0005474258176195317, "loss": 1.7313, "step": 17251 }, { "epoch": 0.57, "grad_norm": 0.4185815751552582, "learning_rate": 0.0005474199068650258, "loss": 1.7982, "step": 17252 }, { "epoch": 0.57, "grad_norm": 0.44835641980171204, "learning_rate": 0.0005474139958101887, "loss": 1.8689, "step": 17253 }, { "epoch": 0.57, "grad_norm": 0.4321570098400116, "learning_rate": 0.0005474080844550274, "loss": 1.8296, "step": 17254 }, { "epoch": 0.57, "grad_norm": 0.4220450818538666, "learning_rate": 0.0005474021727995491, "loss": 1.8384, "step": 17255 }, { "epoch": 0.57, "grad_norm": 0.4420001804828644, "learning_rate": 0.000547396260843761, "loss": 1.8502, "step": 17256 }, { "epoch": 0.57, "grad_norm": 0.43047603964805603, "learning_rate": 0.0005473903485876703, "loss": 1.831, "step": 17257 }, { "epoch": 0.57, "grad_norm": 0.43328922986984253, "learning_rate": 0.0005473844360312841, "loss": 1.8878, "step": 17258 }, { "epoch": 0.57, "grad_norm": 0.42164289951324463, "learning_rate": 0.0005473785231746098, "loss": 1.8716, "step": 17259 }, { "epoch": 0.57, "grad_norm": 0.41680124402046204, "learning_rate": 0.0005473726100176544, "loss": 1.8765, "step": 17260 }, { "epoch": 0.57, "grad_norm": 0.43660274147987366, "learning_rate": 0.000547366696560425, "loss": 1.9072, "step": 17261 }, { "epoch": 0.57, "grad_norm": 0.4411434829235077, "learning_rate": 0.0005473607828029289, "loss": 1.7851, "step": 17262 }, { "epoch": 0.57, "grad_norm": 0.4678376019001007, "learning_rate": 0.0005473548687451734, "loss": 1.7827, "step": 17263 }, { "epoch": 0.57, "grad_norm": 0.46213722229003906, "learning_rate": 0.0005473489543871653, "loss": 1.8222, "step": 17264 }, { "epoch": 0.57, "grad_norm": 0.441353976726532, "learning_rate": 0.0005473430397289122, "loss": 1.8258, "step": 17265 }, { "epoch": 0.57, "grad_norm": 0.4262196123600006, "learning_rate": 0.000547337124770421, "loss": 1.8219, "step": 17266 }, { "epoch": 0.57, "grad_norm": 0.43617236614227295, "learning_rate": 0.000547331209511699, "loss": 1.8662, "step": 17267 }, { "epoch": 0.57, "grad_norm": 0.44177311658859253, "learning_rate": 0.0005473252939527534, "loss": 1.8184, "step": 17268 }, { "epoch": 0.57, "grad_norm": 0.453527569770813, "learning_rate": 0.0005473193780935914, "loss": 1.8917, "step": 17269 }, { "epoch": 0.57, "grad_norm": 0.41601964831352234, "learning_rate": 0.0005473134619342199, "loss": 1.8787, "step": 17270 }, { "epoch": 0.57, "grad_norm": 0.4168050289154053, "learning_rate": 0.0005473075454746465, "loss": 1.8488, "step": 17271 }, { "epoch": 0.57, "grad_norm": 0.43220871686935425, "learning_rate": 0.000547301628714878, "loss": 1.9416, "step": 17272 }, { "epoch": 0.57, "grad_norm": 0.42388486862182617, "learning_rate": 0.0005472957116549219, "loss": 1.8158, "step": 17273 }, { "epoch": 0.57, "grad_norm": 0.42610883712768555, "learning_rate": 0.0005472897942947852, "loss": 1.8528, "step": 17274 }, { "epoch": 0.57, "grad_norm": 0.4430513381958008, "learning_rate": 0.000547283876634475, "loss": 1.8575, "step": 17275 }, { "epoch": 0.57, "grad_norm": 0.41279879212379456, "learning_rate": 0.0005472779586739988, "loss": 1.8429, "step": 17276 }, { "epoch": 0.57, "grad_norm": 0.4190906882286072, "learning_rate": 0.0005472720404133635, "loss": 1.8649, "step": 17277 }, { "epoch": 0.57, "grad_norm": 0.430721640586853, "learning_rate": 0.0005472661218525765, "loss": 1.8207, "step": 17278 }, { "epoch": 0.57, "grad_norm": 0.43027254939079285, "learning_rate": 0.0005472602029916447, "loss": 1.7188, "step": 17279 }, { "epoch": 0.57, "grad_norm": 0.4357110261917114, "learning_rate": 0.0005472542838305754, "loss": 1.8303, "step": 17280 }, { "epoch": 0.57, "grad_norm": 0.4365999698638916, "learning_rate": 0.000547248364369376, "loss": 1.9496, "step": 17281 }, { "epoch": 0.57, "grad_norm": 0.4370296597480774, "learning_rate": 0.0005472424446080535, "loss": 1.8497, "step": 17282 }, { "epoch": 0.58, "grad_norm": 0.42554399371147156, "learning_rate": 0.000547236524546615, "loss": 1.875, "step": 17283 }, { "epoch": 0.58, "grad_norm": 0.4287129044532776, "learning_rate": 0.0005472306041850679, "loss": 1.88, "step": 17284 }, { "epoch": 0.58, "grad_norm": 0.46174356341362, "learning_rate": 0.0005472246835234192, "loss": 1.8208, "step": 17285 }, { "epoch": 0.58, "grad_norm": 0.45642390847206116, "learning_rate": 0.0005472187625616762, "loss": 1.7778, "step": 17286 }, { "epoch": 0.58, "grad_norm": 0.45064467191696167, "learning_rate": 0.000547212841299846, "loss": 1.8262, "step": 17287 }, { "epoch": 0.58, "grad_norm": 0.42840802669525146, "learning_rate": 0.0005472069197379358, "loss": 1.8126, "step": 17288 }, { "epoch": 0.58, "grad_norm": 0.43980011343955994, "learning_rate": 0.000547200997875953, "loss": 1.9043, "step": 17289 }, { "epoch": 0.58, "grad_norm": 0.4490205943584442, "learning_rate": 0.0005471950757139044, "loss": 1.8436, "step": 17290 }, { "epoch": 0.58, "grad_norm": 0.4576629400253296, "learning_rate": 0.0005471891532517974, "loss": 1.877, "step": 17291 }, { "epoch": 0.58, "grad_norm": 0.44090577960014343, "learning_rate": 0.0005471832304896394, "loss": 1.8239, "step": 17292 }, { "epoch": 0.58, "grad_norm": 0.4346437454223633, "learning_rate": 0.0005471773074274373, "loss": 1.8769, "step": 17293 }, { "epoch": 0.58, "grad_norm": 0.4193761646747589, "learning_rate": 0.0005471713840651983, "loss": 1.801, "step": 17294 }, { "epoch": 0.58, "grad_norm": 0.43376240134239197, "learning_rate": 0.0005471654604029297, "loss": 1.7855, "step": 17295 }, { "epoch": 0.58, "grad_norm": 0.535800576210022, "learning_rate": 0.0005471595364406386, "loss": 1.8466, "step": 17296 }, { "epoch": 0.58, "grad_norm": 0.4296000003814697, "learning_rate": 0.0005471536121783323, "loss": 1.8541, "step": 17297 }, { "epoch": 0.58, "grad_norm": 0.44583702087402344, "learning_rate": 0.0005471476876160179, "loss": 1.8734, "step": 17298 }, { "epoch": 0.58, "grad_norm": 0.4311850965023041, "learning_rate": 0.0005471417627537027, "loss": 1.8242, "step": 17299 }, { "epoch": 0.58, "grad_norm": 0.4287923574447632, "learning_rate": 0.0005471358375913937, "loss": 1.8428, "step": 17300 }, { "epoch": 0.58, "grad_norm": 0.4130536913871765, "learning_rate": 0.0005471299121290982, "loss": 1.855, "step": 17301 }, { "epoch": 0.58, "grad_norm": 0.4489728808403015, "learning_rate": 0.0005471239863668235, "loss": 1.8501, "step": 17302 }, { "epoch": 0.58, "grad_norm": 0.43372097611427307, "learning_rate": 0.0005471180603045767, "loss": 1.8404, "step": 17303 }, { "epoch": 0.58, "grad_norm": 0.43049508333206177, "learning_rate": 0.0005471121339423649, "loss": 1.8199, "step": 17304 }, { "epoch": 0.58, "grad_norm": 0.41348743438720703, "learning_rate": 0.0005471062072801954, "loss": 1.857, "step": 17305 }, { "epoch": 0.58, "grad_norm": 0.43211016058921814, "learning_rate": 0.0005471002803180753, "loss": 1.8223, "step": 17306 }, { "epoch": 0.58, "grad_norm": 0.4302797019481659, "learning_rate": 0.0005470943530560121, "loss": 1.9273, "step": 17307 }, { "epoch": 0.58, "grad_norm": 0.4222186505794525, "learning_rate": 0.0005470884254940126, "loss": 1.8042, "step": 17308 }, { "epoch": 0.58, "grad_norm": 0.4209614098072052, "learning_rate": 0.0005470824976320841, "loss": 1.8644, "step": 17309 }, { "epoch": 0.58, "grad_norm": 0.42003190517425537, "learning_rate": 0.000547076569470234, "loss": 1.8431, "step": 17310 }, { "epoch": 0.58, "grad_norm": 0.4382179081439972, "learning_rate": 0.0005470706410084693, "loss": 1.7987, "step": 17311 }, { "epoch": 0.58, "grad_norm": 0.42576494812965393, "learning_rate": 0.0005470647122467971, "loss": 1.8925, "step": 17312 }, { "epoch": 0.58, "grad_norm": 0.41424185037612915, "learning_rate": 0.000547058783185225, "loss": 1.8169, "step": 17313 }, { "epoch": 0.58, "grad_norm": 0.426537424325943, "learning_rate": 0.0005470528538237597, "loss": 1.8778, "step": 17314 }, { "epoch": 0.58, "grad_norm": 0.4311671257019043, "learning_rate": 0.0005470469241624089, "loss": 1.8169, "step": 17315 }, { "epoch": 0.58, "grad_norm": 0.4275170564651489, "learning_rate": 0.0005470409942011793, "loss": 1.9027, "step": 17316 }, { "epoch": 0.58, "grad_norm": 0.4215073883533478, "learning_rate": 0.0005470350639400784, "loss": 1.8344, "step": 17317 }, { "epoch": 0.58, "grad_norm": 0.41704681515693665, "learning_rate": 0.0005470291333791133, "loss": 1.8756, "step": 17318 }, { "epoch": 0.58, "grad_norm": 0.42963555455207825, "learning_rate": 0.0005470232025182913, "loss": 1.8592, "step": 17319 }, { "epoch": 0.58, "grad_norm": 0.4288923442363739, "learning_rate": 0.0005470172713576194, "loss": 1.8551, "step": 17320 }, { "epoch": 0.58, "grad_norm": 0.4246644675731659, "learning_rate": 0.0005470113398971052, "loss": 1.842, "step": 17321 }, { "epoch": 0.58, "grad_norm": 0.42442139983177185, "learning_rate": 0.0005470054081367554, "loss": 1.8254, "step": 17322 }, { "epoch": 0.58, "grad_norm": 0.4227149784564972, "learning_rate": 0.0005469994760765775, "loss": 1.8522, "step": 17323 }, { "epoch": 0.58, "grad_norm": 0.4211028814315796, "learning_rate": 0.0005469935437165786, "loss": 1.8874, "step": 17324 }, { "epoch": 0.58, "grad_norm": 0.4248273968696594, "learning_rate": 0.0005469876110567659, "loss": 1.8375, "step": 17325 }, { "epoch": 0.58, "grad_norm": 0.42907199263572693, "learning_rate": 0.0005469816780971468, "loss": 1.8875, "step": 17326 }, { "epoch": 0.58, "grad_norm": 0.4319453537464142, "learning_rate": 0.0005469757448377282, "loss": 1.7948, "step": 17327 }, { "epoch": 0.58, "grad_norm": 0.44766882061958313, "learning_rate": 0.0005469698112785175, "loss": 1.8555, "step": 17328 }, { "epoch": 0.58, "grad_norm": 0.4447594881057739, "learning_rate": 0.0005469638774195216, "loss": 1.8982, "step": 17329 }, { "epoch": 0.58, "grad_norm": 0.416053831577301, "learning_rate": 0.0005469579432607482, "loss": 1.8595, "step": 17330 }, { "epoch": 0.58, "grad_norm": 0.4186727702617645, "learning_rate": 0.0005469520088022042, "loss": 1.7737, "step": 17331 }, { "epoch": 0.58, "grad_norm": 0.43986955285072327, "learning_rate": 0.0005469460740438969, "loss": 1.8813, "step": 17332 }, { "epoch": 0.58, "grad_norm": 0.4574683904647827, "learning_rate": 0.0005469401389858334, "loss": 1.8589, "step": 17333 }, { "epoch": 0.58, "grad_norm": 0.4216669201850891, "learning_rate": 0.0005469342036280209, "loss": 1.8394, "step": 17334 }, { "epoch": 0.58, "grad_norm": 0.4336509108543396, "learning_rate": 0.0005469282679704666, "loss": 1.9047, "step": 17335 }, { "epoch": 0.58, "grad_norm": 0.4340974688529968, "learning_rate": 0.0005469223320131779, "loss": 1.865, "step": 17336 }, { "epoch": 0.58, "grad_norm": 0.4360249638557434, "learning_rate": 0.0005469163957561618, "loss": 1.885, "step": 17337 }, { "epoch": 0.58, "grad_norm": 0.5065973401069641, "learning_rate": 0.0005469104591994256, "loss": 1.8741, "step": 17338 }, { "epoch": 0.58, "grad_norm": 0.4386346638202667, "learning_rate": 0.0005469045223429765, "loss": 1.8982, "step": 17339 }, { "epoch": 0.58, "grad_norm": 0.4251817762851715, "learning_rate": 0.0005468985851868217, "loss": 1.8861, "step": 17340 }, { "epoch": 0.58, "grad_norm": 0.45064103603363037, "learning_rate": 0.0005468926477309684, "loss": 1.8873, "step": 17341 }, { "epoch": 0.58, "grad_norm": 0.41012370586395264, "learning_rate": 0.0005468867099754237, "loss": 1.9347, "step": 17342 }, { "epoch": 0.58, "grad_norm": 0.4278475344181061, "learning_rate": 0.000546880771920195, "loss": 1.9208, "step": 17343 }, { "epoch": 0.58, "grad_norm": 0.43150290846824646, "learning_rate": 0.0005468748335652895, "loss": 1.8762, "step": 17344 }, { "epoch": 0.58, "grad_norm": 0.4422946274280548, "learning_rate": 0.0005468688949107142, "loss": 1.8192, "step": 17345 }, { "epoch": 0.58, "grad_norm": 0.4261646866798401, "learning_rate": 0.0005468629559564765, "loss": 1.9083, "step": 17346 }, { "epoch": 0.58, "grad_norm": 0.4136403203010559, "learning_rate": 0.0005468570167025835, "loss": 1.7871, "step": 17347 }, { "epoch": 0.58, "grad_norm": 0.43762946128845215, "learning_rate": 0.0005468510771490425, "loss": 1.9526, "step": 17348 }, { "epoch": 0.58, "grad_norm": 0.43022117018699646, "learning_rate": 0.0005468451372958607, "loss": 1.8973, "step": 17349 }, { "epoch": 0.58, "grad_norm": 0.4379335045814514, "learning_rate": 0.0005468391971430452, "loss": 1.8463, "step": 17350 }, { "epoch": 0.58, "grad_norm": 0.41873040795326233, "learning_rate": 0.0005468332566906032, "loss": 1.8393, "step": 17351 }, { "epoch": 0.58, "grad_norm": 0.4222276210784912, "learning_rate": 0.0005468273159385421, "loss": 1.8141, "step": 17352 }, { "epoch": 0.58, "grad_norm": 0.4340069890022278, "learning_rate": 0.000546821374886869, "loss": 1.8834, "step": 17353 }, { "epoch": 0.58, "grad_norm": 0.42703887820243835, "learning_rate": 0.0005468154335355911, "loss": 1.8908, "step": 17354 }, { "epoch": 0.58, "grad_norm": 0.433403342962265, "learning_rate": 0.0005468094918847157, "loss": 1.9201, "step": 17355 }, { "epoch": 0.58, "grad_norm": 0.4392319917678833, "learning_rate": 0.0005468035499342498, "loss": 1.9279, "step": 17356 }, { "epoch": 0.58, "grad_norm": 0.42116519808769226, "learning_rate": 0.0005467976076842009, "loss": 1.8748, "step": 17357 }, { "epoch": 0.58, "grad_norm": 0.43042707443237305, "learning_rate": 0.0005467916651345759, "loss": 1.8808, "step": 17358 }, { "epoch": 0.58, "grad_norm": 0.4410739839076996, "learning_rate": 0.0005467857222853824, "loss": 1.809, "step": 17359 }, { "epoch": 0.58, "grad_norm": 0.45892927050590515, "learning_rate": 0.0005467797791366273, "loss": 1.9065, "step": 17360 }, { "epoch": 0.58, "grad_norm": 0.4261471927165985, "learning_rate": 0.0005467738356883179, "loss": 1.9169, "step": 17361 }, { "epoch": 0.58, "grad_norm": 0.43329983949661255, "learning_rate": 0.0005467678919404615, "loss": 1.871, "step": 17362 }, { "epoch": 0.58, "grad_norm": 0.46418869495391846, "learning_rate": 0.000546761947893065, "loss": 1.8825, "step": 17363 }, { "epoch": 0.58, "grad_norm": 0.4416811168193817, "learning_rate": 0.0005467560035461361, "loss": 1.8823, "step": 17364 }, { "epoch": 0.58, "grad_norm": 0.4410824775695801, "learning_rate": 0.0005467500588996817, "loss": 1.8943, "step": 17365 }, { "epoch": 0.58, "grad_norm": 0.4547141492366791, "learning_rate": 0.000546744113953709, "loss": 1.8315, "step": 17366 }, { "epoch": 0.58, "grad_norm": 0.4502398669719696, "learning_rate": 0.0005467381687082254, "loss": 1.8644, "step": 17367 }, { "epoch": 0.58, "grad_norm": 0.4496721625328064, "learning_rate": 0.000546732223163238, "loss": 1.8713, "step": 17368 }, { "epoch": 0.58, "grad_norm": 0.4146259129047394, "learning_rate": 0.000546726277318754, "loss": 1.85, "step": 17369 }, { "epoch": 0.58, "grad_norm": 0.4482557773590088, "learning_rate": 0.0005467203311747807, "loss": 1.9102, "step": 17370 }, { "epoch": 0.58, "grad_norm": 0.432277113199234, "learning_rate": 0.0005467143847313253, "loss": 1.7819, "step": 17371 }, { "epoch": 0.58, "grad_norm": 0.4353451132774353, "learning_rate": 0.0005467084379883949, "loss": 1.7915, "step": 17372 }, { "epoch": 0.58, "grad_norm": 0.42209696769714355, "learning_rate": 0.0005467024909459968, "loss": 1.8116, "step": 17373 }, { "epoch": 0.58, "grad_norm": 0.41639646887779236, "learning_rate": 0.0005466965436041383, "loss": 1.8375, "step": 17374 }, { "epoch": 0.58, "grad_norm": 0.42671141028404236, "learning_rate": 0.0005466905959628265, "loss": 1.8765, "step": 17375 }, { "epoch": 0.58, "grad_norm": 0.41824793815612793, "learning_rate": 0.0005466846480220687, "loss": 1.8836, "step": 17376 }, { "epoch": 0.58, "grad_norm": 0.4325384199619293, "learning_rate": 0.0005466786997818721, "loss": 1.8438, "step": 17377 }, { "epoch": 0.58, "grad_norm": 0.4143505096435547, "learning_rate": 0.0005466727512422439, "loss": 1.8092, "step": 17378 }, { "epoch": 0.58, "grad_norm": 0.41691669821739197, "learning_rate": 0.0005466668024031912, "loss": 1.8086, "step": 17379 }, { "epoch": 0.58, "grad_norm": 0.44449254870414734, "learning_rate": 0.0005466608532647215, "loss": 1.9371, "step": 17380 }, { "epoch": 0.58, "grad_norm": 0.4434865713119507, "learning_rate": 0.0005466549038268419, "loss": 1.862, "step": 17381 }, { "epoch": 0.58, "grad_norm": 0.42889532446861267, "learning_rate": 0.0005466489540895594, "loss": 1.8071, "step": 17382 }, { "epoch": 0.58, "grad_norm": 0.4215066730976105, "learning_rate": 0.0005466430040528815, "loss": 1.8602, "step": 17383 }, { "epoch": 0.58, "grad_norm": 0.442618727684021, "learning_rate": 0.0005466370537168154, "loss": 1.8429, "step": 17384 }, { "epoch": 0.58, "grad_norm": 0.4339880347251892, "learning_rate": 0.0005466311030813681, "loss": 1.792, "step": 17385 }, { "epoch": 0.58, "grad_norm": 0.43195539712905884, "learning_rate": 0.0005466251521465471, "loss": 1.8819, "step": 17386 }, { "epoch": 0.58, "grad_norm": 0.4369543492794037, "learning_rate": 0.0005466192009123595, "loss": 1.7948, "step": 17387 }, { "epoch": 0.58, "grad_norm": 0.44435805082321167, "learning_rate": 0.0005466132493788126, "loss": 1.8251, "step": 17388 }, { "epoch": 0.58, "grad_norm": 0.42729172110557556, "learning_rate": 0.0005466072975459134, "loss": 1.8664, "step": 17389 }, { "epoch": 0.58, "grad_norm": 0.4231511950492859, "learning_rate": 0.0005466013454136694, "loss": 1.7455, "step": 17390 }, { "epoch": 0.58, "grad_norm": 0.43956702947616577, "learning_rate": 0.0005465953929820876, "loss": 1.8316, "step": 17391 }, { "epoch": 0.58, "grad_norm": 0.4254951477050781, "learning_rate": 0.0005465894402511754, "loss": 1.7727, "step": 17392 }, { "epoch": 0.58, "grad_norm": 0.44154343008995056, "learning_rate": 0.0005465834872209398, "loss": 1.8205, "step": 17393 }, { "epoch": 0.58, "grad_norm": 0.4505404829978943, "learning_rate": 0.0005465775338913884, "loss": 1.9222, "step": 17394 }, { "epoch": 0.58, "grad_norm": 0.4194376468658447, "learning_rate": 0.0005465715802625281, "loss": 1.8239, "step": 17395 }, { "epoch": 0.58, "grad_norm": 0.4452316164970398, "learning_rate": 0.0005465656263343663, "loss": 1.8495, "step": 17396 }, { "epoch": 0.58, "grad_norm": 0.47643736004829407, "learning_rate": 0.00054655967210691, "loss": 1.8549, "step": 17397 }, { "epoch": 0.58, "grad_norm": 0.4436444044113159, "learning_rate": 0.0005465537175801667, "loss": 1.8411, "step": 17398 }, { "epoch": 0.58, "grad_norm": 0.4297867715358734, "learning_rate": 0.0005465477627541435, "loss": 1.8159, "step": 17399 }, { "epoch": 0.58, "grad_norm": 0.4300742447376251, "learning_rate": 0.0005465418076288477, "loss": 1.8481, "step": 17400 }, { "epoch": 0.58, "grad_norm": 0.4592542350292206, "learning_rate": 0.0005465358522042864, "loss": 1.8338, "step": 17401 }, { "epoch": 0.58, "grad_norm": 0.4586908221244812, "learning_rate": 0.0005465298964804669, "loss": 1.8632, "step": 17402 }, { "epoch": 0.58, "grad_norm": 0.4495760500431061, "learning_rate": 0.0005465239404573965, "loss": 1.8067, "step": 17403 }, { "epoch": 0.58, "grad_norm": 0.4523005485534668, "learning_rate": 0.0005465179841350823, "loss": 1.8004, "step": 17404 }, { "epoch": 0.58, "grad_norm": 0.4474194049835205, "learning_rate": 0.0005465120275135316, "loss": 1.8476, "step": 17405 }, { "epoch": 0.58, "grad_norm": 0.4487355947494507, "learning_rate": 0.0005465060705927516, "loss": 1.824, "step": 17406 }, { "epoch": 0.58, "grad_norm": 0.42719289660453796, "learning_rate": 0.0005465001133727496, "loss": 1.7663, "step": 17407 }, { "epoch": 0.58, "grad_norm": 0.4378451406955719, "learning_rate": 0.0005464941558535327, "loss": 1.8002, "step": 17408 }, { "epoch": 0.58, "grad_norm": 0.41673344373703003, "learning_rate": 0.0005464881980351083, "loss": 1.8419, "step": 17409 }, { "epoch": 0.58, "grad_norm": 0.431236207485199, "learning_rate": 0.0005464822399174835, "loss": 1.7945, "step": 17410 }, { "epoch": 0.58, "grad_norm": 0.4187890887260437, "learning_rate": 0.0005464762815006655, "loss": 1.8835, "step": 17411 }, { "epoch": 0.58, "grad_norm": 0.44508126378059387, "learning_rate": 0.0005464703227846618, "loss": 1.8963, "step": 17412 }, { "epoch": 0.58, "grad_norm": 0.4232822358608246, "learning_rate": 0.0005464643637694794, "loss": 1.8604, "step": 17413 }, { "epoch": 0.58, "grad_norm": 0.42388463020324707, "learning_rate": 0.0005464584044551254, "loss": 1.9378, "step": 17414 }, { "epoch": 0.58, "grad_norm": 0.42356207966804504, "learning_rate": 0.0005464524448416073, "loss": 1.8328, "step": 17415 }, { "epoch": 0.58, "grad_norm": 0.800632119178772, "learning_rate": 0.0005464464849289324, "loss": 1.8864, "step": 17416 }, { "epoch": 0.58, "grad_norm": 0.41644081473350525, "learning_rate": 0.0005464405247171076, "loss": 1.8053, "step": 17417 }, { "epoch": 0.58, "grad_norm": 0.42063406109809875, "learning_rate": 0.0005464345642061404, "loss": 1.872, "step": 17418 }, { "epoch": 0.58, "grad_norm": 0.4424031972885132, "learning_rate": 0.0005464286033960378, "loss": 1.8278, "step": 17419 }, { "epoch": 0.58, "grad_norm": 0.4237266182899475, "learning_rate": 0.0005464226422868074, "loss": 1.8017, "step": 17420 }, { "epoch": 0.58, "grad_norm": 0.4198475182056427, "learning_rate": 0.0005464166808784561, "loss": 1.8818, "step": 17421 }, { "epoch": 0.58, "grad_norm": 0.42679837346076965, "learning_rate": 0.0005464107191709913, "loss": 1.8478, "step": 17422 }, { "epoch": 0.58, "grad_norm": 0.433023065328598, "learning_rate": 0.0005464047571644201, "loss": 1.7611, "step": 17423 }, { "epoch": 0.58, "grad_norm": 0.42254963517189026, "learning_rate": 0.00054639879485875, "loss": 1.8717, "step": 17424 }, { "epoch": 0.58, "grad_norm": 0.43546438217163086, "learning_rate": 0.000546392832253988, "loss": 1.8353, "step": 17425 }, { "epoch": 0.58, "grad_norm": 0.43107399344444275, "learning_rate": 0.0005463868693501414, "loss": 1.8919, "step": 17426 }, { "epoch": 0.58, "grad_norm": 0.43577098846435547, "learning_rate": 0.0005463809061472174, "loss": 1.9281, "step": 17427 }, { "epoch": 0.58, "grad_norm": 0.4225974380970001, "learning_rate": 0.0005463749426452234, "loss": 1.8244, "step": 17428 }, { "epoch": 0.58, "grad_norm": 0.45964497327804565, "learning_rate": 0.0005463689788441663, "loss": 1.868, "step": 17429 }, { "epoch": 0.58, "grad_norm": 0.422724187374115, "learning_rate": 0.0005463630147440538, "loss": 1.8367, "step": 17430 }, { "epoch": 0.58, "grad_norm": 0.44798240065574646, "learning_rate": 0.0005463570503448927, "loss": 1.8133, "step": 17431 }, { "epoch": 0.58, "grad_norm": 0.4203415513038635, "learning_rate": 0.0005463510856466905, "loss": 1.8556, "step": 17432 }, { "epoch": 0.58, "grad_norm": 0.4154452383518219, "learning_rate": 0.0005463451206494545, "loss": 1.8133, "step": 17433 }, { "epoch": 0.58, "grad_norm": 0.44414666295051575, "learning_rate": 0.0005463391553531916, "loss": 1.8349, "step": 17434 }, { "epoch": 0.58, "grad_norm": 0.42021870613098145, "learning_rate": 0.0005463331897579094, "loss": 1.796, "step": 17435 }, { "epoch": 0.58, "grad_norm": 0.43787214159965515, "learning_rate": 0.0005463272238636151, "loss": 1.7848, "step": 17436 }, { "epoch": 0.58, "grad_norm": 0.4303097128868103, "learning_rate": 0.0005463212576703158, "loss": 1.8028, "step": 17437 }, { "epoch": 0.58, "grad_norm": 0.449002742767334, "learning_rate": 0.0005463152911780186, "loss": 1.8677, "step": 17438 }, { "epoch": 0.58, "grad_norm": 0.42697685956954956, "learning_rate": 0.0005463093243867312, "loss": 1.8519, "step": 17439 }, { "epoch": 0.58, "grad_norm": 0.42236027121543884, "learning_rate": 0.0005463033572964603, "loss": 1.86, "step": 17440 }, { "epoch": 0.58, "grad_norm": 0.45469343662261963, "learning_rate": 0.0005462973899072136, "loss": 1.8895, "step": 17441 }, { "epoch": 0.58, "grad_norm": 0.45170876383781433, "learning_rate": 0.0005462914222189981, "loss": 1.8404, "step": 17442 }, { "epoch": 0.58, "grad_norm": 0.42461949586868286, "learning_rate": 0.0005462854542318211, "loss": 1.8744, "step": 17443 }, { "epoch": 0.58, "grad_norm": 0.4401474595069885, "learning_rate": 0.0005462794859456898, "loss": 1.7921, "step": 17444 }, { "epoch": 0.58, "grad_norm": 0.44863834977149963, "learning_rate": 0.0005462735173606116, "loss": 1.9389, "step": 17445 }, { "epoch": 0.58, "grad_norm": 0.43066516518592834, "learning_rate": 0.0005462675484765935, "loss": 1.8357, "step": 17446 }, { "epoch": 0.58, "grad_norm": 0.42861539125442505, "learning_rate": 0.000546261579293643, "loss": 1.8536, "step": 17447 }, { "epoch": 0.58, "grad_norm": 0.4230435788631439, "learning_rate": 0.0005462556098117672, "loss": 1.8462, "step": 17448 }, { "epoch": 0.58, "grad_norm": 0.43823057413101196, "learning_rate": 0.0005462496400309734, "loss": 1.8249, "step": 17449 }, { "epoch": 0.58, "grad_norm": 0.45902055501937866, "learning_rate": 0.0005462436699512688, "loss": 1.9032, "step": 17450 }, { "epoch": 0.58, "grad_norm": 0.43211525678634644, "learning_rate": 0.0005462376995726606, "loss": 1.8558, "step": 17451 }, { "epoch": 0.58, "grad_norm": 0.4305119812488556, "learning_rate": 0.0005462317288951561, "loss": 1.8279, "step": 17452 }, { "epoch": 0.58, "grad_norm": 0.43165042996406555, "learning_rate": 0.0005462257579187627, "loss": 1.8446, "step": 17453 }, { "epoch": 0.58, "grad_norm": 0.4663831293582916, "learning_rate": 0.0005462197866434875, "loss": 1.8578, "step": 17454 }, { "epoch": 0.58, "grad_norm": 0.45499464869499207, "learning_rate": 0.0005462138150693377, "loss": 1.882, "step": 17455 }, { "epoch": 0.58, "grad_norm": 0.4239957630634308, "learning_rate": 0.0005462078431963205, "loss": 1.7812, "step": 17456 }, { "epoch": 0.58, "grad_norm": 0.4326235353946686, "learning_rate": 0.0005462018710244434, "loss": 1.768, "step": 17457 }, { "epoch": 0.58, "grad_norm": 0.470295786857605, "learning_rate": 0.0005461958985537135, "loss": 1.8524, "step": 17458 }, { "epoch": 0.58, "grad_norm": 0.4340074956417084, "learning_rate": 0.0005461899257841379, "loss": 1.7956, "step": 17459 }, { "epoch": 0.58, "grad_norm": 0.4345663785934448, "learning_rate": 0.0005461839527157242, "loss": 1.8725, "step": 17460 }, { "epoch": 0.58, "grad_norm": 0.4339330494403839, "learning_rate": 0.0005461779793484794, "loss": 1.8283, "step": 17461 }, { "epoch": 0.58, "grad_norm": 0.46538716554641724, "learning_rate": 0.0005461720056824107, "loss": 1.8214, "step": 17462 }, { "epoch": 0.58, "grad_norm": 0.44064995646476746, "learning_rate": 0.0005461660317175256, "loss": 1.9542, "step": 17463 }, { "epoch": 0.58, "grad_norm": 0.42479991912841797, "learning_rate": 0.0005461600574538312, "loss": 1.8524, "step": 17464 }, { "epoch": 0.58, "grad_norm": 0.4550491273403168, "learning_rate": 0.0005461540828913347, "loss": 1.822, "step": 17465 }, { "epoch": 0.58, "grad_norm": 0.4179175794124603, "learning_rate": 0.0005461481080300433, "loss": 1.8194, "step": 17466 }, { "epoch": 0.58, "grad_norm": 0.42447155714035034, "learning_rate": 0.0005461421328699646, "loss": 1.8735, "step": 17467 }, { "epoch": 0.58, "grad_norm": 0.4361807405948639, "learning_rate": 0.0005461361574111054, "loss": 1.8573, "step": 17468 }, { "epoch": 0.58, "grad_norm": 0.4321713149547577, "learning_rate": 0.0005461301816534733, "loss": 1.8082, "step": 17469 }, { "epoch": 0.58, "grad_norm": 0.4358696937561035, "learning_rate": 0.0005461242055970754, "loss": 1.8659, "step": 17470 }, { "epoch": 0.58, "grad_norm": 0.41939112544059753, "learning_rate": 0.000546118229241919, "loss": 1.7912, "step": 17471 }, { "epoch": 0.58, "grad_norm": 0.4314337968826294, "learning_rate": 0.0005461122525880115, "loss": 1.7715, "step": 17472 }, { "epoch": 0.58, "grad_norm": 0.4367202818393707, "learning_rate": 0.0005461062756353597, "loss": 1.834, "step": 17473 }, { "epoch": 0.58, "grad_norm": 0.4204953610897064, "learning_rate": 0.0005461002983839712, "loss": 1.8399, "step": 17474 }, { "epoch": 0.58, "grad_norm": 0.44444286823272705, "learning_rate": 0.0005460943208338532, "loss": 1.8818, "step": 17475 }, { "epoch": 0.58, "grad_norm": 0.4604310691356659, "learning_rate": 0.000546088342985013, "loss": 1.8682, "step": 17476 }, { "epoch": 0.58, "grad_norm": 0.44671469926834106, "learning_rate": 0.0005460823648374579, "loss": 1.8943, "step": 17477 }, { "epoch": 0.58, "grad_norm": 0.43582281470298767, "learning_rate": 0.0005460763863911949, "loss": 1.8239, "step": 17478 }, { "epoch": 0.58, "grad_norm": 0.4641169011592865, "learning_rate": 0.0005460704076462315, "loss": 1.8532, "step": 17479 }, { "epoch": 0.58, "grad_norm": 0.4553990662097931, "learning_rate": 0.000546064428602575, "loss": 1.8998, "step": 17480 }, { "epoch": 0.58, "grad_norm": 0.6125157475471497, "learning_rate": 0.0005460584492602324, "loss": 1.9091, "step": 17481 }, { "epoch": 0.58, "grad_norm": 0.4490720331668854, "learning_rate": 0.0005460524696192111, "loss": 1.7493, "step": 17482 }, { "epoch": 0.58, "grad_norm": 0.46985769271850586, "learning_rate": 0.0005460464896795183, "loss": 1.8676, "step": 17483 }, { "epoch": 0.58, "grad_norm": 0.4696594774723053, "learning_rate": 0.0005460405094411614, "loss": 1.9211, "step": 17484 }, { "epoch": 0.58, "grad_norm": 0.4458865821361542, "learning_rate": 0.0005460345289041475, "loss": 1.8259, "step": 17485 }, { "epoch": 0.58, "grad_norm": 0.44513896107673645, "learning_rate": 0.0005460285480684841, "loss": 1.7731, "step": 17486 }, { "epoch": 0.58, "grad_norm": 0.46356165409088135, "learning_rate": 0.0005460225669341782, "loss": 1.7968, "step": 17487 }, { "epoch": 0.58, "grad_norm": 0.5669059157371521, "learning_rate": 0.0005460165855012371, "loss": 1.8548, "step": 17488 }, { "epoch": 0.58, "grad_norm": 0.4330838620662689, "learning_rate": 0.0005460106037696681, "loss": 1.8473, "step": 17489 }, { "epoch": 0.58, "grad_norm": 0.4647662937641144, "learning_rate": 0.0005460046217394786, "loss": 1.7842, "step": 17490 }, { "epoch": 0.58, "grad_norm": 0.477664589881897, "learning_rate": 0.0005459986394106757, "loss": 1.8842, "step": 17491 }, { "epoch": 0.58, "grad_norm": 0.4131772220134735, "learning_rate": 0.0005459926567832667, "loss": 1.8099, "step": 17492 }, { "epoch": 0.58, "grad_norm": 0.42442071437835693, "learning_rate": 0.0005459866738572588, "loss": 1.875, "step": 17493 }, { "epoch": 0.58, "grad_norm": 0.4372476041316986, "learning_rate": 0.0005459806906326593, "loss": 1.8072, "step": 17494 }, { "epoch": 0.58, "grad_norm": 0.44259995222091675, "learning_rate": 0.0005459747071094755, "loss": 1.7254, "step": 17495 }, { "epoch": 0.58, "grad_norm": 0.44432899355888367, "learning_rate": 0.0005459687232877147, "loss": 1.8393, "step": 17496 }, { "epoch": 0.58, "grad_norm": 0.4327445328235626, "learning_rate": 0.0005459627391673842, "loss": 1.832, "step": 17497 }, { "epoch": 0.58, "grad_norm": 0.42927923798561096, "learning_rate": 0.000545956754748491, "loss": 1.7684, "step": 17498 }, { "epoch": 0.58, "grad_norm": 0.42773792147636414, "learning_rate": 0.0005459507700310426, "loss": 1.8455, "step": 17499 }, { "epoch": 0.58, "grad_norm": 0.41886818408966064, "learning_rate": 0.0005459447850150463, "loss": 1.8138, "step": 17500 }, { "epoch": 0.58, "grad_norm": 0.42209869623184204, "learning_rate": 0.0005459387997005091, "loss": 1.8237, "step": 17501 }, { "epoch": 0.58, "grad_norm": 0.43518027663230896, "learning_rate": 0.0005459328140874385, "loss": 1.8687, "step": 17502 }, { "epoch": 0.58, "grad_norm": 0.4224169850349426, "learning_rate": 0.0005459268281758417, "loss": 1.8395, "step": 17503 }, { "epoch": 0.58, "grad_norm": 0.4391484260559082, "learning_rate": 0.0005459208419657261, "loss": 1.8517, "step": 17504 }, { "epoch": 0.58, "grad_norm": 0.43398648500442505, "learning_rate": 0.0005459148554570986, "loss": 1.9201, "step": 17505 }, { "epoch": 0.58, "grad_norm": 0.41635316610336304, "learning_rate": 0.0005459088686499668, "loss": 1.8572, "step": 17506 }, { "epoch": 0.58, "grad_norm": 0.4132500886917114, "learning_rate": 0.000545902881544338, "loss": 1.8679, "step": 17507 }, { "epoch": 0.58, "grad_norm": 0.4305953085422516, "learning_rate": 0.0005458968941402193, "loss": 1.8422, "step": 17508 }, { "epoch": 0.58, "grad_norm": 0.43227094411849976, "learning_rate": 0.0005458909064376177, "loss": 1.8619, "step": 17509 }, { "epoch": 0.58, "grad_norm": 0.43003132939338684, "learning_rate": 0.0005458849184365411, "loss": 1.8671, "step": 17510 }, { "epoch": 0.58, "grad_norm": 0.45043814182281494, "learning_rate": 0.0005458789301369964, "loss": 1.908, "step": 17511 }, { "epoch": 0.58, "grad_norm": 0.4315791130065918, "learning_rate": 0.0005458729415389907, "loss": 1.8543, "step": 17512 }, { "epoch": 0.58, "grad_norm": 0.4420665502548218, "learning_rate": 0.0005458669526425317, "loss": 1.8333, "step": 17513 }, { "epoch": 0.58, "grad_norm": 0.4270801246166229, "learning_rate": 0.0005458609634476264, "loss": 1.852, "step": 17514 }, { "epoch": 0.58, "grad_norm": 0.4190695285797119, "learning_rate": 0.000545854973954282, "loss": 1.7509, "step": 17515 }, { "epoch": 0.58, "grad_norm": 0.43757161498069763, "learning_rate": 0.000545848984162506, "loss": 1.8009, "step": 17516 }, { "epoch": 0.58, "grad_norm": 0.43060168623924255, "learning_rate": 0.0005458429940723054, "loss": 1.8304, "step": 17517 }, { "epoch": 0.58, "grad_norm": 0.43802616000175476, "learning_rate": 0.0005458370036836878, "loss": 1.957, "step": 17518 }, { "epoch": 0.58, "grad_norm": 0.44939252734184265, "learning_rate": 0.0005458310129966603, "loss": 1.7824, "step": 17519 }, { "epoch": 0.58, "grad_norm": 0.44574013352394104, "learning_rate": 0.0005458250220112301, "loss": 1.8414, "step": 17520 }, { "epoch": 0.58, "grad_norm": 0.44898930191993713, "learning_rate": 0.0005458190307274046, "loss": 1.8725, "step": 17521 }, { "epoch": 0.58, "grad_norm": 0.44818297028541565, "learning_rate": 0.0005458130391451909, "loss": 1.8976, "step": 17522 }, { "epoch": 0.58, "grad_norm": 0.4380794167518616, "learning_rate": 0.0005458070472645965, "loss": 1.7333, "step": 17523 }, { "epoch": 0.58, "grad_norm": 0.43503132462501526, "learning_rate": 0.0005458010550856285, "loss": 1.7698, "step": 17524 }, { "epoch": 0.58, "grad_norm": 0.4255322217941284, "learning_rate": 0.0005457950626082943, "loss": 1.8358, "step": 17525 }, { "epoch": 0.58, "grad_norm": 0.42363038659095764, "learning_rate": 0.000545789069832601, "loss": 1.7989, "step": 17526 }, { "epoch": 0.58, "grad_norm": 0.4332757890224457, "learning_rate": 0.0005457830767585561, "loss": 1.8724, "step": 17527 }, { "epoch": 0.58, "grad_norm": 0.43676578998565674, "learning_rate": 0.0005457770833861668, "loss": 1.8494, "step": 17528 }, { "epoch": 0.58, "grad_norm": 0.41945695877075195, "learning_rate": 0.0005457710897154402, "loss": 1.8127, "step": 17529 }, { "epoch": 0.58, "grad_norm": 0.44089844822883606, "learning_rate": 0.0005457650957463838, "loss": 1.8995, "step": 17530 }, { "epoch": 0.58, "grad_norm": 0.42140093445777893, "learning_rate": 0.0005457591014790047, "loss": 1.8794, "step": 17531 }, { "epoch": 0.58, "grad_norm": 0.42164814472198486, "learning_rate": 0.0005457531069133104, "loss": 1.7891, "step": 17532 }, { "epoch": 0.58, "grad_norm": 0.4329441785812378, "learning_rate": 0.0005457471120493078, "loss": 1.8917, "step": 17533 }, { "epoch": 0.58, "grad_norm": 0.43084055185317993, "learning_rate": 0.0005457411168870047, "loss": 1.8469, "step": 17534 }, { "epoch": 0.58, "grad_norm": 0.445677250623703, "learning_rate": 0.0005457351214264079, "loss": 1.7152, "step": 17535 }, { "epoch": 0.58, "grad_norm": 0.418809711933136, "learning_rate": 0.0005457291256675249, "loss": 1.755, "step": 17536 }, { "epoch": 0.58, "grad_norm": 0.43911507725715637, "learning_rate": 0.000545723129610363, "loss": 1.9239, "step": 17537 }, { "epoch": 0.58, "grad_norm": 0.42166829109191895, "learning_rate": 0.0005457171332549294, "loss": 1.8486, "step": 17538 }, { "epoch": 0.58, "grad_norm": 0.42527663707733154, "learning_rate": 0.0005457111366012314, "loss": 1.8497, "step": 17539 }, { "epoch": 0.58, "grad_norm": 0.4487476348876953, "learning_rate": 0.0005457051396492764, "loss": 1.8525, "step": 17540 }, { "epoch": 0.58, "grad_norm": 0.4338861405849457, "learning_rate": 0.0005456991423990715, "loss": 1.8513, "step": 17541 }, { "epoch": 0.58, "grad_norm": 0.41711917519569397, "learning_rate": 0.0005456931448506239, "loss": 1.8592, "step": 17542 }, { "epoch": 0.58, "grad_norm": 0.4117943048477173, "learning_rate": 0.0005456871470039411, "loss": 1.8213, "step": 17543 }, { "epoch": 0.58, "grad_norm": 0.41056814789772034, "learning_rate": 0.0005456811488590304, "loss": 1.8282, "step": 17544 }, { "epoch": 0.58, "grad_norm": 0.42180004715919495, "learning_rate": 0.0005456751504158988, "loss": 1.7749, "step": 17545 }, { "epoch": 0.58, "grad_norm": 0.42881014943122864, "learning_rate": 0.0005456691516745538, "loss": 1.875, "step": 17546 }, { "epoch": 0.58, "grad_norm": 0.4280381202697754, "learning_rate": 0.0005456631526350029, "loss": 1.8582, "step": 17547 }, { "epoch": 0.58, "grad_norm": 0.4377425014972687, "learning_rate": 0.0005456571532972529, "loss": 1.8508, "step": 17548 }, { "epoch": 0.58, "grad_norm": 0.4273545444011688, "learning_rate": 0.0005456511536613113, "loss": 1.8276, "step": 17549 }, { "epoch": 0.58, "grad_norm": 0.43422195315361023, "learning_rate": 0.0005456451537271855, "loss": 1.876, "step": 17550 }, { "epoch": 0.58, "grad_norm": 0.4425750970840454, "learning_rate": 0.0005456391534948826, "loss": 1.8544, "step": 17551 }, { "epoch": 0.58, "grad_norm": 0.4365496039390564, "learning_rate": 0.00054563315296441, "loss": 1.8699, "step": 17552 }, { "epoch": 0.58, "grad_norm": 0.43073418736457825, "learning_rate": 0.0005456271521357749, "loss": 1.8345, "step": 17553 }, { "epoch": 0.58, "grad_norm": 0.4409838914871216, "learning_rate": 0.0005456211510089848, "loss": 1.8218, "step": 17554 }, { "epoch": 0.58, "grad_norm": 0.4253353476524353, "learning_rate": 0.0005456151495840467, "loss": 1.8602, "step": 17555 }, { "epoch": 0.58, "grad_norm": 0.443064421415329, "learning_rate": 0.0005456091478609679, "loss": 1.8534, "step": 17556 }, { "epoch": 0.58, "grad_norm": 0.7900946140289307, "learning_rate": 0.0005456031458397558, "loss": 1.9524, "step": 17557 }, { "epoch": 0.58, "grad_norm": 0.430448979139328, "learning_rate": 0.0005455971435204178, "loss": 1.8061, "step": 17558 }, { "epoch": 0.58, "grad_norm": 0.4197891056537628, "learning_rate": 0.000545591140902961, "loss": 1.9022, "step": 17559 }, { "epoch": 0.58, "grad_norm": 0.4333368241786957, "learning_rate": 0.0005455851379873927, "loss": 1.8269, "step": 17560 }, { "epoch": 0.58, "grad_norm": 0.4255889058113098, "learning_rate": 0.0005455791347737202, "loss": 1.8076, "step": 17561 }, { "epoch": 0.58, "grad_norm": 0.426866739988327, "learning_rate": 0.0005455731312619509, "loss": 1.8798, "step": 17562 }, { "epoch": 0.58, "grad_norm": 0.42847684025764465, "learning_rate": 0.000545567127452092, "loss": 1.8037, "step": 17563 }, { "epoch": 0.58, "grad_norm": 0.42663562297821045, "learning_rate": 0.0005455611233441508, "loss": 1.8303, "step": 17564 }, { "epoch": 0.58, "grad_norm": 0.4280451238155365, "learning_rate": 0.0005455551189381345, "loss": 1.9137, "step": 17565 }, { "epoch": 0.58, "grad_norm": 0.43307510018348694, "learning_rate": 0.0005455491142340505, "loss": 1.8786, "step": 17566 }, { "epoch": 0.58, "grad_norm": 0.43119707703590393, "learning_rate": 0.0005455431092319061, "loss": 1.8836, "step": 17567 }, { "epoch": 0.58, "grad_norm": 0.42052778601646423, "learning_rate": 0.0005455371039317085, "loss": 1.7798, "step": 17568 }, { "epoch": 0.58, "grad_norm": 0.4272928833961487, "learning_rate": 0.0005455310983334651, "loss": 1.8401, "step": 17569 }, { "epoch": 0.58, "grad_norm": 0.4378955364227295, "learning_rate": 0.0005455250924371831, "loss": 1.8395, "step": 17570 }, { "epoch": 0.58, "grad_norm": 0.4157077968120575, "learning_rate": 0.0005455190862428697, "loss": 1.829, "step": 17571 }, { "epoch": 0.58, "grad_norm": 0.44315004348754883, "learning_rate": 0.0005455130797505323, "loss": 1.8555, "step": 17572 }, { "epoch": 0.58, "grad_norm": 0.4318007230758667, "learning_rate": 0.0005455070729601784, "loss": 1.8813, "step": 17573 }, { "epoch": 0.58, "grad_norm": 0.429235577583313, "learning_rate": 0.0005455010658718149, "loss": 1.7071, "step": 17574 }, { "epoch": 0.58, "grad_norm": 0.4445580542087555, "learning_rate": 0.0005454950584854493, "loss": 1.8458, "step": 17575 }, { "epoch": 0.58, "grad_norm": 0.46763530373573303, "learning_rate": 0.0005454890508010889, "loss": 1.7779, "step": 17576 }, { "epoch": 0.58, "grad_norm": 0.4272938370704651, "learning_rate": 0.0005454830428187411, "loss": 1.8518, "step": 17577 }, { "epoch": 0.58, "grad_norm": 0.44228172302246094, "learning_rate": 0.000545477034538413, "loss": 1.8666, "step": 17578 }, { "epoch": 0.58, "grad_norm": 0.4503336548805237, "learning_rate": 0.0005454710259601118, "loss": 1.8858, "step": 17579 }, { "epoch": 0.58, "grad_norm": 0.43441906571388245, "learning_rate": 0.000545465017083845, "loss": 1.7578, "step": 17580 }, { "epoch": 0.58, "grad_norm": 0.4346061646938324, "learning_rate": 0.0005454590079096199, "loss": 1.8936, "step": 17581 }, { "epoch": 0.58, "grad_norm": 0.42495033144950867, "learning_rate": 0.0005454529984374437, "loss": 1.8638, "step": 17582 }, { "epoch": 0.58, "grad_norm": 0.43968895077705383, "learning_rate": 0.0005454469886673238, "loss": 1.8282, "step": 17583 }, { "epoch": 0.59, "grad_norm": 0.42673832178115845, "learning_rate": 0.0005454409785992673, "loss": 1.8538, "step": 17584 }, { "epoch": 0.59, "grad_norm": 0.4222044348716736, "learning_rate": 0.0005454349682332817, "loss": 1.8232, "step": 17585 }, { "epoch": 0.59, "grad_norm": 0.4312448501586914, "learning_rate": 0.0005454289575693741, "loss": 1.7957, "step": 17586 }, { "epoch": 0.59, "grad_norm": 0.43444058299064636, "learning_rate": 0.0005454229466075519, "loss": 1.8798, "step": 17587 }, { "epoch": 0.59, "grad_norm": 0.44308584928512573, "learning_rate": 0.0005454169353478226, "loss": 1.8628, "step": 17588 }, { "epoch": 0.59, "grad_norm": 0.43281957507133484, "learning_rate": 0.0005454109237901932, "loss": 1.9066, "step": 17589 }, { "epoch": 0.59, "grad_norm": 0.4266991913318634, "learning_rate": 0.000545404911934671, "loss": 1.9178, "step": 17590 }, { "epoch": 0.59, "grad_norm": 0.4235233962535858, "learning_rate": 0.0005453988997812635, "loss": 1.8328, "step": 17591 }, { "epoch": 0.59, "grad_norm": 0.4232182800769806, "learning_rate": 0.0005453928873299778, "loss": 1.8563, "step": 17592 }, { "epoch": 0.59, "grad_norm": 0.42137017846107483, "learning_rate": 0.0005453868745808215, "loss": 1.8268, "step": 17593 }, { "epoch": 0.59, "grad_norm": 0.43247997760772705, "learning_rate": 0.0005453808615338015, "loss": 1.8672, "step": 17594 }, { "epoch": 0.59, "grad_norm": 0.41959109902381897, "learning_rate": 0.0005453748481889254, "loss": 1.7732, "step": 17595 }, { "epoch": 0.59, "grad_norm": 0.43105125427246094, "learning_rate": 0.0005453688345462003, "loss": 1.7919, "step": 17596 }, { "epoch": 0.59, "grad_norm": 0.424925297498703, "learning_rate": 0.0005453628206056337, "loss": 1.8441, "step": 17597 }, { "epoch": 0.59, "grad_norm": 0.4201333224773407, "learning_rate": 0.0005453568063672326, "loss": 1.8917, "step": 17598 }, { "epoch": 0.59, "grad_norm": 0.42774778604507446, "learning_rate": 0.0005453507918310046, "loss": 1.8392, "step": 17599 }, { "epoch": 0.59, "grad_norm": 0.4304448366165161, "learning_rate": 0.0005453447769969569, "loss": 1.7734, "step": 17600 }, { "epoch": 0.59, "grad_norm": 0.4271390736103058, "learning_rate": 0.0005453387618650968, "loss": 1.8173, "step": 17601 }, { "epoch": 0.59, "grad_norm": 0.4233592450618744, "learning_rate": 0.0005453327464354315, "loss": 1.7831, "step": 17602 }, { "epoch": 0.59, "grad_norm": 0.4290076792240143, "learning_rate": 0.0005453267307079686, "loss": 1.7968, "step": 17603 }, { "epoch": 0.59, "grad_norm": 0.4343157708644867, "learning_rate": 0.000545320714682715, "loss": 1.8391, "step": 17604 }, { "epoch": 0.59, "grad_norm": 0.41978588700294495, "learning_rate": 0.0005453146983596783, "loss": 1.7426, "step": 17605 }, { "epoch": 0.59, "grad_norm": 0.4267764687538147, "learning_rate": 0.0005453086817388655, "loss": 1.761, "step": 17606 }, { "epoch": 0.59, "grad_norm": 0.438455730676651, "learning_rate": 0.0005453026648202843, "loss": 1.8811, "step": 17607 }, { "epoch": 0.59, "grad_norm": 0.4220263361930847, "learning_rate": 0.0005452966476039418, "loss": 1.8783, "step": 17608 }, { "epoch": 0.59, "grad_norm": 0.42219623923301697, "learning_rate": 0.0005452906300898452, "loss": 1.8582, "step": 17609 }, { "epoch": 0.59, "grad_norm": 0.43224817514419556, "learning_rate": 0.0005452846122780022, "loss": 1.8342, "step": 17610 }, { "epoch": 0.59, "grad_norm": 0.43959274888038635, "learning_rate": 0.0005452785941684195, "loss": 1.8258, "step": 17611 }, { "epoch": 0.59, "grad_norm": 0.4249471426010132, "learning_rate": 0.0005452725757611049, "loss": 1.8576, "step": 17612 }, { "epoch": 0.59, "grad_norm": 0.4202369153499603, "learning_rate": 0.0005452665570560655, "loss": 1.842, "step": 17613 }, { "epoch": 0.59, "grad_norm": 0.42868635058403015, "learning_rate": 0.0005452605380533086, "loss": 1.8121, "step": 17614 }, { "epoch": 0.59, "grad_norm": 0.41222453117370605, "learning_rate": 0.0005452545187528416, "loss": 1.8361, "step": 17615 }, { "epoch": 0.59, "grad_norm": 0.4275868237018585, "learning_rate": 0.0005452484991546717, "loss": 1.8519, "step": 17616 }, { "epoch": 0.59, "grad_norm": 0.4184007942676544, "learning_rate": 0.0005452424792588063, "loss": 1.7971, "step": 17617 }, { "epoch": 0.59, "grad_norm": 0.41798004508018494, "learning_rate": 0.0005452364590652525, "loss": 1.9091, "step": 17618 }, { "epoch": 0.59, "grad_norm": 0.4315205514431, "learning_rate": 0.000545230438574018, "loss": 1.873, "step": 17619 }, { "epoch": 0.59, "grad_norm": 0.4423676133155823, "learning_rate": 0.0005452244177851099, "loss": 1.8102, "step": 17620 }, { "epoch": 0.59, "grad_norm": 0.46572038531303406, "learning_rate": 0.0005452183966985353, "loss": 1.8672, "step": 17621 }, { "epoch": 0.59, "grad_norm": 0.43562161922454834, "learning_rate": 0.0005452123753143018, "loss": 1.8749, "step": 17622 }, { "epoch": 0.59, "grad_norm": 0.4582187533378601, "learning_rate": 0.0005452063536324165, "loss": 1.8876, "step": 17623 }, { "epoch": 0.59, "grad_norm": 0.4242297112941742, "learning_rate": 0.000545200331652887, "loss": 1.839, "step": 17624 }, { "epoch": 0.59, "grad_norm": 0.44559112191200256, "learning_rate": 0.0005451943093757203, "loss": 1.8923, "step": 17625 }, { "epoch": 0.59, "grad_norm": 0.4250849485397339, "learning_rate": 0.0005451882868009239, "loss": 1.8863, "step": 17626 }, { "epoch": 0.59, "grad_norm": 0.4359094500541687, "learning_rate": 0.0005451822639285049, "loss": 1.8931, "step": 17627 }, { "epoch": 0.59, "grad_norm": 0.44498100876808167, "learning_rate": 0.000545176240758471, "loss": 1.8635, "step": 17628 }, { "epoch": 0.59, "grad_norm": 0.4321901798248291, "learning_rate": 0.0005451702172908291, "loss": 1.854, "step": 17629 }, { "epoch": 0.59, "grad_norm": 0.4456159472465515, "learning_rate": 0.0005451641935255867, "loss": 1.8077, "step": 17630 }, { "epoch": 0.59, "grad_norm": 0.43807318806648254, "learning_rate": 0.0005451581694627511, "loss": 1.9164, "step": 17631 }, { "epoch": 0.59, "grad_norm": 0.42980024218559265, "learning_rate": 0.0005451521451023296, "loss": 1.8378, "step": 17632 }, { "epoch": 0.59, "grad_norm": 0.4317436218261719, "learning_rate": 0.0005451461204443296, "loss": 1.8943, "step": 17633 }, { "epoch": 0.59, "grad_norm": 0.4178318977355957, "learning_rate": 0.0005451400954887582, "loss": 1.8801, "step": 17634 }, { "epoch": 0.59, "grad_norm": 0.43221771717071533, "learning_rate": 0.0005451340702356228, "loss": 1.9338, "step": 17635 }, { "epoch": 0.59, "grad_norm": 0.4376005530357361, "learning_rate": 0.000545128044684931, "loss": 1.855, "step": 17636 }, { "epoch": 0.59, "grad_norm": 0.43123486638069153, "learning_rate": 0.0005451220188366897, "loss": 1.8098, "step": 17637 }, { "epoch": 0.59, "grad_norm": 0.4324275851249695, "learning_rate": 0.0005451159926909065, "loss": 1.8474, "step": 17638 }, { "epoch": 0.59, "grad_norm": 0.4224264323711395, "learning_rate": 0.0005451099662475885, "loss": 1.8575, "step": 17639 }, { "epoch": 0.59, "grad_norm": 0.433828204870224, "learning_rate": 0.0005451039395067431, "loss": 1.8347, "step": 17640 }, { "epoch": 0.59, "grad_norm": 0.4386619031429291, "learning_rate": 0.0005450979124683777, "loss": 1.8765, "step": 17641 }, { "epoch": 0.59, "grad_norm": 0.4240468144416809, "learning_rate": 0.0005450918851324995, "loss": 1.871, "step": 17642 }, { "epoch": 0.59, "grad_norm": 0.4329424798488617, "learning_rate": 0.0005450858574991159, "loss": 1.9038, "step": 17643 }, { "epoch": 0.59, "grad_norm": 0.4301093816757202, "learning_rate": 0.0005450798295682341, "loss": 1.9289, "step": 17644 }, { "epoch": 0.59, "grad_norm": 0.440266489982605, "learning_rate": 0.0005450738013398616, "loss": 1.8234, "step": 17645 }, { "epoch": 0.59, "grad_norm": 0.42428433895111084, "learning_rate": 0.0005450677728140056, "loss": 1.8547, "step": 17646 }, { "epoch": 0.59, "grad_norm": 0.43518003821372986, "learning_rate": 0.0005450617439906733, "loss": 1.89, "step": 17647 }, { "epoch": 0.59, "grad_norm": 0.4444316327571869, "learning_rate": 0.0005450557148698723, "loss": 1.9039, "step": 17648 }, { "epoch": 0.59, "grad_norm": 0.4281753599643707, "learning_rate": 0.0005450496854516098, "loss": 1.9318, "step": 17649 }, { "epoch": 0.59, "grad_norm": 0.42138898372650146, "learning_rate": 0.0005450436557358929, "loss": 1.8547, "step": 17650 }, { "epoch": 0.59, "grad_norm": 0.42264801263809204, "learning_rate": 0.0005450376257227293, "loss": 1.8341, "step": 17651 }, { "epoch": 0.59, "grad_norm": 0.4214225709438324, "learning_rate": 0.000545031595412126, "loss": 1.7911, "step": 17652 }, { "epoch": 0.59, "grad_norm": 0.42748114466667175, "learning_rate": 0.0005450255648040905, "loss": 1.8127, "step": 17653 }, { "epoch": 0.59, "grad_norm": 0.4250580966472626, "learning_rate": 0.0005450195338986302, "loss": 1.9054, "step": 17654 }, { "epoch": 0.59, "grad_norm": 0.42249593138694763, "learning_rate": 0.0005450135026957521, "loss": 1.8994, "step": 17655 }, { "epoch": 0.59, "grad_norm": 0.43631547689437866, "learning_rate": 0.0005450074711954637, "loss": 1.8714, "step": 17656 }, { "epoch": 0.59, "grad_norm": 0.4249836802482605, "learning_rate": 0.0005450014393977724, "loss": 1.8558, "step": 17657 }, { "epoch": 0.59, "grad_norm": 0.43035176396369934, "learning_rate": 0.0005449954073026854, "loss": 1.8755, "step": 17658 }, { "epoch": 0.59, "grad_norm": 0.4212206304073334, "learning_rate": 0.0005449893749102101, "loss": 1.7691, "step": 17659 }, { "epoch": 0.59, "grad_norm": 0.4509930908679962, "learning_rate": 0.0005449833422203539, "loss": 1.9263, "step": 17660 }, { "epoch": 0.59, "grad_norm": 0.4382088780403137, "learning_rate": 0.0005449773092331239, "loss": 1.839, "step": 17661 }, { "epoch": 0.59, "grad_norm": 0.4426965117454529, "learning_rate": 0.0005449712759485276, "loss": 1.8531, "step": 17662 }, { "epoch": 0.59, "grad_norm": 0.44413331151008606, "learning_rate": 0.0005449652423665722, "loss": 1.7841, "step": 17663 }, { "epoch": 0.59, "grad_norm": 0.44917672872543335, "learning_rate": 0.0005449592084872652, "loss": 1.9239, "step": 17664 }, { "epoch": 0.59, "grad_norm": 0.426592618227005, "learning_rate": 0.0005449531743106138, "loss": 1.825, "step": 17665 }, { "epoch": 0.59, "grad_norm": 0.42880305647850037, "learning_rate": 0.0005449471398366252, "loss": 1.8088, "step": 17666 }, { "epoch": 0.59, "grad_norm": 0.45085975527763367, "learning_rate": 0.000544941105065307, "loss": 1.9504, "step": 17667 }, { "epoch": 0.59, "grad_norm": 0.429086834192276, "learning_rate": 0.0005449350699966663, "loss": 1.8462, "step": 17668 }, { "epoch": 0.59, "grad_norm": 0.4519447088241577, "learning_rate": 0.0005449290346307106, "loss": 1.7547, "step": 17669 }, { "epoch": 0.59, "grad_norm": 0.42700254917144775, "learning_rate": 0.000544922998967447, "loss": 1.8837, "step": 17670 }, { "epoch": 0.59, "grad_norm": 0.4783603847026825, "learning_rate": 0.0005449169630068831, "loss": 2.0055, "step": 17671 }, { "epoch": 0.59, "grad_norm": 0.42216283082962036, "learning_rate": 0.0005449109267490261, "loss": 1.7892, "step": 17672 }, { "epoch": 0.59, "grad_norm": 0.41958174109458923, "learning_rate": 0.0005449048901938832, "loss": 1.7967, "step": 17673 }, { "epoch": 0.59, "grad_norm": 0.4293203055858612, "learning_rate": 0.0005448988533414619, "loss": 1.8745, "step": 17674 }, { "epoch": 0.59, "grad_norm": 0.43649449944496155, "learning_rate": 0.0005448928161917696, "loss": 1.7873, "step": 17675 }, { "epoch": 0.59, "grad_norm": 0.42807796597480774, "learning_rate": 0.0005448867787448134, "loss": 1.9142, "step": 17676 }, { "epoch": 0.59, "grad_norm": 0.4551611840724945, "learning_rate": 0.0005448807410006006, "loss": 1.8274, "step": 17677 }, { "epoch": 0.59, "grad_norm": 0.4365096688270569, "learning_rate": 0.0005448747029591389, "loss": 1.8797, "step": 17678 }, { "epoch": 0.59, "grad_norm": 0.43987390398979187, "learning_rate": 0.0005448686646204353, "loss": 1.91, "step": 17679 }, { "epoch": 0.59, "grad_norm": 0.45094674825668335, "learning_rate": 0.0005448626259844972, "loss": 1.8122, "step": 17680 }, { "epoch": 0.59, "grad_norm": 0.4718201756477356, "learning_rate": 0.000544856587051332, "loss": 1.8427, "step": 17681 }, { "epoch": 0.59, "grad_norm": 0.4268905520439148, "learning_rate": 0.0005448505478209469, "loss": 1.8152, "step": 17682 }, { "epoch": 0.59, "grad_norm": 0.42587047815322876, "learning_rate": 0.0005448445082933493, "loss": 1.9262, "step": 17683 }, { "epoch": 0.59, "grad_norm": 0.44428008794784546, "learning_rate": 0.0005448384684685467, "loss": 1.8999, "step": 17684 }, { "epoch": 0.59, "grad_norm": 0.45484837889671326, "learning_rate": 0.000544832428346546, "loss": 1.8407, "step": 17685 }, { "epoch": 0.59, "grad_norm": 0.46112701296806335, "learning_rate": 0.0005448263879273552, "loss": 1.9015, "step": 17686 }, { "epoch": 0.59, "grad_norm": 0.42566585540771484, "learning_rate": 0.0005448203472109809, "loss": 1.8037, "step": 17687 }, { "epoch": 0.59, "grad_norm": 0.43550291657447815, "learning_rate": 0.0005448143061974308, "loss": 1.8034, "step": 17688 }, { "epoch": 0.59, "grad_norm": 0.4585619568824768, "learning_rate": 0.0005448082648867124, "loss": 1.8393, "step": 17689 }, { "epoch": 0.59, "grad_norm": 0.45912685990333557, "learning_rate": 0.0005448022232788326, "loss": 1.8109, "step": 17690 }, { "epoch": 0.59, "grad_norm": 0.41065555810928345, "learning_rate": 0.0005447961813737992, "loss": 1.8043, "step": 17691 }, { "epoch": 0.59, "grad_norm": 0.4383697807788849, "learning_rate": 0.0005447901391716192, "loss": 1.8055, "step": 17692 }, { "epoch": 0.59, "grad_norm": 0.7212966680526733, "learning_rate": 0.0005447840966723, "loss": 1.8861, "step": 17693 }, { "epoch": 0.59, "grad_norm": 0.44787415862083435, "learning_rate": 0.000544778053875849, "loss": 1.7716, "step": 17694 }, { "epoch": 0.59, "grad_norm": 0.4240906834602356, "learning_rate": 0.0005447720107822736, "loss": 1.7959, "step": 17695 }, { "epoch": 0.59, "grad_norm": 0.4311298727989197, "learning_rate": 0.0005447659673915809, "loss": 1.8778, "step": 17696 }, { "epoch": 0.59, "grad_norm": 0.46561649441719055, "learning_rate": 0.0005447599237037785, "loss": 1.8889, "step": 17697 }, { "epoch": 0.59, "grad_norm": 0.4220055043697357, "learning_rate": 0.0005447538797188736, "loss": 1.841, "step": 17698 }, { "epoch": 0.59, "grad_norm": 0.4294928312301636, "learning_rate": 0.0005447478354368735, "loss": 1.8098, "step": 17699 }, { "epoch": 0.59, "grad_norm": 0.42515432834625244, "learning_rate": 0.0005447417908577856, "loss": 1.8631, "step": 17700 }, { "epoch": 0.59, "grad_norm": 0.4319549798965454, "learning_rate": 0.0005447357459816173, "loss": 1.7361, "step": 17701 }, { "epoch": 0.59, "grad_norm": 0.42947182059288025, "learning_rate": 0.0005447297008083757, "loss": 1.9031, "step": 17702 }, { "epoch": 0.59, "grad_norm": 0.41789790987968445, "learning_rate": 0.0005447236553380684, "loss": 1.8741, "step": 17703 }, { "epoch": 0.59, "grad_norm": 0.4206308126449585, "learning_rate": 0.0005447176095707026, "loss": 1.8809, "step": 17704 }, { "epoch": 0.59, "grad_norm": 0.45578938722610474, "learning_rate": 0.0005447115635062857, "loss": 1.8129, "step": 17705 }, { "epoch": 0.59, "grad_norm": 0.4269660711288452, "learning_rate": 0.0005447055171448252, "loss": 1.7744, "step": 17706 }, { "epoch": 0.59, "grad_norm": 0.4201751947402954, "learning_rate": 0.0005446994704863281, "loss": 1.8056, "step": 17707 }, { "epoch": 0.59, "grad_norm": 0.43958818912506104, "learning_rate": 0.0005446934235308019, "loss": 1.8418, "step": 17708 }, { "epoch": 0.59, "grad_norm": 0.4593752920627594, "learning_rate": 0.0005446873762782539, "loss": 1.8446, "step": 17709 }, { "epoch": 0.59, "grad_norm": 0.4316268265247345, "learning_rate": 0.0005446813287286915, "loss": 1.8644, "step": 17710 }, { "epoch": 0.59, "grad_norm": 0.4617275595664978, "learning_rate": 0.000544675280882122, "loss": 1.8122, "step": 17711 }, { "epoch": 0.59, "grad_norm": 0.4275481402873993, "learning_rate": 0.0005446692327385528, "loss": 1.7988, "step": 17712 }, { "epoch": 0.59, "grad_norm": 0.4255252182483673, "learning_rate": 0.0005446631842979912, "loss": 1.8781, "step": 17713 }, { "epoch": 0.59, "grad_norm": 0.4196436107158661, "learning_rate": 0.0005446571355604445, "loss": 1.9043, "step": 17714 }, { "epoch": 0.59, "grad_norm": 0.4409258961677551, "learning_rate": 0.0005446510865259202, "loss": 1.8258, "step": 17715 }, { "epoch": 0.59, "grad_norm": 0.42957603931427, "learning_rate": 0.0005446450371944255, "loss": 1.9138, "step": 17716 }, { "epoch": 0.59, "grad_norm": 0.42850276827812195, "learning_rate": 0.0005446389875659677, "loss": 1.804, "step": 17717 }, { "epoch": 0.59, "grad_norm": 0.4199739992618561, "learning_rate": 0.0005446329376405541, "loss": 1.8533, "step": 17718 }, { "epoch": 0.59, "grad_norm": 0.4614371359348297, "learning_rate": 0.0005446268874181924, "loss": 1.9548, "step": 17719 }, { "epoch": 0.59, "grad_norm": 0.44883888959884644, "learning_rate": 0.0005446208368988896, "loss": 1.8685, "step": 17720 }, { "epoch": 0.59, "grad_norm": 0.42580804228782654, "learning_rate": 0.0005446147860826531, "loss": 1.8013, "step": 17721 }, { "epoch": 0.59, "grad_norm": 0.45566636323928833, "learning_rate": 0.0005446087349694904, "loss": 1.786, "step": 17722 }, { "epoch": 0.59, "grad_norm": 0.4438585042953491, "learning_rate": 0.0005446026835594087, "loss": 1.8232, "step": 17723 }, { "epoch": 0.59, "grad_norm": 0.43018007278442383, "learning_rate": 0.0005445966318524153, "loss": 1.8502, "step": 17724 }, { "epoch": 0.59, "grad_norm": 0.4388073980808258, "learning_rate": 0.0005445905798485177, "loss": 1.8852, "step": 17725 }, { "epoch": 0.59, "grad_norm": 0.4517027735710144, "learning_rate": 0.0005445845275477231, "loss": 1.8311, "step": 17726 }, { "epoch": 0.59, "grad_norm": 0.4699263572692871, "learning_rate": 0.000544578474950039, "loss": 1.8205, "step": 17727 }, { "epoch": 0.59, "grad_norm": 0.429524302482605, "learning_rate": 0.0005445724220554726, "loss": 1.8543, "step": 17728 }, { "epoch": 0.59, "grad_norm": 0.4435075521469116, "learning_rate": 0.0005445663688640315, "loss": 1.8253, "step": 17729 }, { "epoch": 0.59, "grad_norm": 0.45053648948669434, "learning_rate": 0.0005445603153757226, "loss": 1.9132, "step": 17730 }, { "epoch": 0.59, "grad_norm": 0.42310598492622375, "learning_rate": 0.0005445542615905537, "loss": 1.8357, "step": 17731 }, { "epoch": 0.59, "grad_norm": 0.4215940237045288, "learning_rate": 0.0005445482075085319, "loss": 1.8889, "step": 17732 }, { "epoch": 0.59, "grad_norm": 0.4208211302757263, "learning_rate": 0.0005445421531296646, "loss": 1.8061, "step": 17733 }, { "epoch": 0.59, "grad_norm": 0.43540748953819275, "learning_rate": 0.0005445360984539592, "loss": 1.8292, "step": 17734 }, { "epoch": 0.59, "grad_norm": 0.4457186758518219, "learning_rate": 0.000544530043481423, "loss": 1.8834, "step": 17735 }, { "epoch": 0.59, "grad_norm": 0.43178099393844604, "learning_rate": 0.0005445239882120634, "loss": 1.7981, "step": 17736 }, { "epoch": 0.59, "grad_norm": 0.41647717356681824, "learning_rate": 0.0005445179326458876, "loss": 1.8127, "step": 17737 }, { "epoch": 0.59, "grad_norm": 0.42120644450187683, "learning_rate": 0.0005445118767829031, "loss": 1.8712, "step": 17738 }, { "epoch": 0.59, "grad_norm": 0.46574047207832336, "learning_rate": 0.0005445058206231171, "loss": 1.8249, "step": 17739 }, { "epoch": 0.59, "grad_norm": 0.4312596321105957, "learning_rate": 0.0005444997641665372, "loss": 1.8637, "step": 17740 }, { "epoch": 0.59, "grad_norm": 0.42675507068634033, "learning_rate": 0.0005444937074131706, "loss": 1.8058, "step": 17741 }, { "epoch": 0.59, "grad_norm": 0.4430937170982361, "learning_rate": 0.0005444876503630246, "loss": 1.8529, "step": 17742 }, { "epoch": 0.59, "grad_norm": 0.45485278964042664, "learning_rate": 0.0005444815930161067, "loss": 1.9324, "step": 17743 }, { "epoch": 0.59, "grad_norm": 0.42352885007858276, "learning_rate": 0.0005444755353724241, "loss": 1.8008, "step": 17744 }, { "epoch": 0.59, "grad_norm": 0.421398788690567, "learning_rate": 0.0005444694774319843, "loss": 1.8361, "step": 17745 }, { "epoch": 0.59, "grad_norm": 0.5688957571983337, "learning_rate": 0.0005444634191947945, "loss": 1.7657, "step": 17746 }, { "epoch": 0.59, "grad_norm": 0.43599051237106323, "learning_rate": 0.0005444573606608622, "loss": 1.9428, "step": 17747 }, { "epoch": 0.59, "grad_norm": 0.44842493534088135, "learning_rate": 0.0005444513018301946, "loss": 1.7765, "step": 17748 }, { "epoch": 0.59, "grad_norm": 0.41714969277381897, "learning_rate": 0.0005444452427027992, "loss": 1.7753, "step": 17749 }, { "epoch": 0.59, "grad_norm": 0.4409710168838501, "learning_rate": 0.0005444391832786832, "loss": 1.8772, "step": 17750 }, { "epoch": 0.59, "grad_norm": 0.4311201274394989, "learning_rate": 0.0005444331235578541, "loss": 1.8147, "step": 17751 }, { "epoch": 0.59, "grad_norm": 0.4280090630054474, "learning_rate": 0.0005444270635403193, "loss": 1.8005, "step": 17752 }, { "epoch": 0.59, "grad_norm": 0.42064592242240906, "learning_rate": 0.0005444210032260859, "loss": 1.7628, "step": 17753 }, { "epoch": 0.59, "grad_norm": 0.43724557757377625, "learning_rate": 0.0005444149426151616, "loss": 1.8696, "step": 17754 }, { "epoch": 0.59, "grad_norm": 0.44897156953811646, "learning_rate": 0.0005444088817075533, "loss": 1.8958, "step": 17755 }, { "epoch": 0.59, "grad_norm": 0.4305155873298645, "learning_rate": 0.0005444028205032689, "loss": 1.851, "step": 17756 }, { "epoch": 0.59, "grad_norm": 0.41977521777153015, "learning_rate": 0.0005443967590023154, "loss": 1.8208, "step": 17757 }, { "epoch": 0.59, "grad_norm": 0.4416859447956085, "learning_rate": 0.0005443906972047002, "loss": 1.872, "step": 17758 }, { "epoch": 0.59, "grad_norm": 0.44746509194374084, "learning_rate": 0.0005443846351104307, "loss": 1.8123, "step": 17759 }, { "epoch": 0.59, "grad_norm": 0.43407419323921204, "learning_rate": 0.0005443785727195143, "loss": 1.7979, "step": 17760 }, { "epoch": 0.59, "grad_norm": 0.42761650681495667, "learning_rate": 0.0005443725100319583, "loss": 1.9022, "step": 17761 }, { "epoch": 0.59, "grad_norm": 0.4272707402706146, "learning_rate": 0.0005443664470477701, "loss": 1.8048, "step": 17762 }, { "epoch": 0.59, "grad_norm": 0.4413277804851532, "learning_rate": 0.000544360383766957, "loss": 1.7993, "step": 17763 }, { "epoch": 0.59, "grad_norm": 0.4217207431793213, "learning_rate": 0.0005443543201895265, "loss": 1.8211, "step": 17764 }, { "epoch": 0.59, "grad_norm": 0.4154110848903656, "learning_rate": 0.0005443482563154858, "loss": 1.8108, "step": 17765 }, { "epoch": 0.59, "grad_norm": 0.4413139522075653, "learning_rate": 0.0005443421921448423, "loss": 1.7939, "step": 17766 }, { "epoch": 0.59, "grad_norm": 0.440807044506073, "learning_rate": 0.0005443361276776034, "loss": 1.8418, "step": 17767 }, { "epoch": 0.59, "grad_norm": 0.4276200532913208, "learning_rate": 0.0005443300629137764, "loss": 1.8674, "step": 17768 }, { "epoch": 0.59, "grad_norm": 0.4253212511539459, "learning_rate": 0.0005443239978533687, "loss": 1.9271, "step": 17769 }, { "epoch": 0.59, "grad_norm": 0.42514607310295105, "learning_rate": 0.0005443179324963877, "loss": 1.7741, "step": 17770 }, { "epoch": 0.59, "grad_norm": 0.46058210730552673, "learning_rate": 0.0005443118668428407, "loss": 1.8653, "step": 17771 }, { "epoch": 0.59, "grad_norm": 0.47156456112861633, "learning_rate": 0.0005443058008927352, "loss": 1.8182, "step": 17772 }, { "epoch": 0.59, "grad_norm": 0.4257657527923584, "learning_rate": 0.0005442997346460783, "loss": 1.8474, "step": 17773 }, { "epoch": 0.59, "grad_norm": 0.4354163706302643, "learning_rate": 0.0005442936681028775, "loss": 1.829, "step": 17774 }, { "epoch": 0.59, "grad_norm": 0.4349834620952606, "learning_rate": 0.0005442876012631404, "loss": 1.8065, "step": 17775 }, { "epoch": 0.59, "grad_norm": 0.46076643466949463, "learning_rate": 0.000544281534126874, "loss": 1.9389, "step": 17776 }, { "epoch": 0.59, "grad_norm": 0.4398781955242157, "learning_rate": 0.0005442754666940858, "loss": 1.8557, "step": 17777 }, { "epoch": 0.59, "grad_norm": 0.43984097242355347, "learning_rate": 0.0005442693989647832, "loss": 1.7957, "step": 17778 }, { "epoch": 0.59, "grad_norm": 0.43399980664253235, "learning_rate": 0.0005442633309389736, "loss": 1.8016, "step": 17779 }, { "epoch": 0.59, "grad_norm": 0.43110013008117676, "learning_rate": 0.0005442572626166642, "loss": 1.7729, "step": 17780 }, { "epoch": 0.59, "grad_norm": 0.4358002543449402, "learning_rate": 0.0005442511939978626, "loss": 1.8778, "step": 17781 }, { "epoch": 0.59, "grad_norm": 0.46243584156036377, "learning_rate": 0.000544245125082576, "loss": 1.9152, "step": 17782 }, { "epoch": 0.59, "grad_norm": 0.4331039786338806, "learning_rate": 0.0005442390558708117, "loss": 1.8409, "step": 17783 }, { "epoch": 0.59, "grad_norm": 0.4307841956615448, "learning_rate": 0.0005442329863625773, "loss": 1.9168, "step": 17784 }, { "epoch": 0.59, "grad_norm": 0.4220733344554901, "learning_rate": 0.00054422691655788, "loss": 1.8611, "step": 17785 }, { "epoch": 0.59, "grad_norm": 0.43046778440475464, "learning_rate": 0.0005442208464567272, "loss": 1.8332, "step": 17786 }, { "epoch": 0.59, "grad_norm": 0.4464053809642792, "learning_rate": 0.0005442147760591263, "loss": 1.7926, "step": 17787 }, { "epoch": 0.59, "grad_norm": 0.44929346442222595, "learning_rate": 0.0005442087053650846, "loss": 1.857, "step": 17788 }, { "epoch": 0.59, "grad_norm": 0.42490366101264954, "learning_rate": 0.0005442026343746095, "loss": 1.85, "step": 17789 }, { "epoch": 0.59, "grad_norm": 0.4331822991371155, "learning_rate": 0.0005441965630877085, "loss": 1.897, "step": 17790 }, { "epoch": 0.59, "grad_norm": 0.4293938875198364, "learning_rate": 0.0005441904915043887, "loss": 1.8472, "step": 17791 }, { "epoch": 0.59, "grad_norm": 0.4290270209312439, "learning_rate": 0.0005441844196246577, "loss": 1.8519, "step": 17792 }, { "epoch": 0.59, "grad_norm": 0.42899826169013977, "learning_rate": 0.0005441783474485227, "loss": 1.8238, "step": 17793 }, { "epoch": 0.59, "grad_norm": 0.4275190532207489, "learning_rate": 0.0005441722749759913, "loss": 1.8169, "step": 17794 }, { "epoch": 0.59, "grad_norm": 0.4434622526168823, "learning_rate": 0.0005441662022070706, "loss": 1.8755, "step": 17795 }, { "epoch": 0.59, "grad_norm": 0.43387123942375183, "learning_rate": 0.0005441601291417684, "loss": 1.9136, "step": 17796 }, { "epoch": 0.59, "grad_norm": 0.4316423237323761, "learning_rate": 0.0005441540557800914, "loss": 1.8579, "step": 17797 }, { "epoch": 0.59, "grad_norm": 0.43075132369995117, "learning_rate": 0.0005441479821220476, "loss": 1.8518, "step": 17798 }, { "epoch": 0.59, "grad_norm": 0.4418870210647583, "learning_rate": 0.000544141908167644, "loss": 1.8008, "step": 17799 }, { "epoch": 0.59, "grad_norm": 0.42846301198005676, "learning_rate": 0.0005441358339168882, "loss": 1.7941, "step": 17800 }, { "epoch": 0.59, "grad_norm": 0.4170700013637543, "learning_rate": 0.0005441297593697874, "loss": 1.8162, "step": 17801 }, { "epoch": 0.59, "grad_norm": 0.43449243903160095, "learning_rate": 0.0005441236845263491, "loss": 1.9399, "step": 17802 }, { "epoch": 0.59, "grad_norm": 0.420118123292923, "learning_rate": 0.0005441176093865805, "loss": 1.8128, "step": 17803 }, { "epoch": 0.59, "grad_norm": 0.4195474088191986, "learning_rate": 0.0005441115339504892, "loss": 1.8751, "step": 17804 }, { "epoch": 0.59, "grad_norm": 0.42745262384414673, "learning_rate": 0.0005441054582180823, "loss": 1.808, "step": 17805 }, { "epoch": 0.59, "grad_norm": 0.4245304465293884, "learning_rate": 0.0005440993821893675, "loss": 1.786, "step": 17806 }, { "epoch": 0.59, "grad_norm": 0.44723066687583923, "learning_rate": 0.000544093305864352, "loss": 1.8483, "step": 17807 }, { "epoch": 0.59, "grad_norm": 0.43840739130973816, "learning_rate": 0.0005440872292430432, "loss": 1.8748, "step": 17808 }, { "epoch": 0.59, "grad_norm": 0.4361031949520111, "learning_rate": 0.0005440811523254484, "loss": 1.857, "step": 17809 }, { "epoch": 0.59, "grad_norm": 0.427785187959671, "learning_rate": 0.0005440750751115751, "loss": 1.8538, "step": 17810 }, { "epoch": 0.59, "grad_norm": 0.43331271409988403, "learning_rate": 0.0005440689976014306, "loss": 1.8195, "step": 17811 }, { "epoch": 0.59, "grad_norm": 0.4290182590484619, "learning_rate": 0.0005440629197950224, "loss": 1.8752, "step": 17812 }, { "epoch": 0.59, "grad_norm": 0.4384291470050812, "learning_rate": 0.0005440568416923576, "loss": 1.9649, "step": 17813 }, { "epoch": 0.59, "grad_norm": 0.43159589171409607, "learning_rate": 0.0005440507632934439, "loss": 1.9311, "step": 17814 }, { "epoch": 0.59, "grad_norm": 0.4393189549446106, "learning_rate": 0.0005440446845982885, "loss": 1.8485, "step": 17815 }, { "epoch": 0.59, "grad_norm": 0.42331352829933167, "learning_rate": 0.0005440386056068988, "loss": 1.8878, "step": 17816 }, { "epoch": 0.59, "grad_norm": 0.43313130736351013, "learning_rate": 0.0005440325263192821, "loss": 1.8653, "step": 17817 }, { "epoch": 0.59, "grad_norm": 0.4135584235191345, "learning_rate": 0.0005440264467354459, "loss": 1.9138, "step": 17818 }, { "epoch": 0.59, "grad_norm": 0.42584434151649475, "learning_rate": 0.0005440203668553976, "loss": 1.919, "step": 17819 }, { "epoch": 0.59, "grad_norm": 0.43769046664237976, "learning_rate": 0.0005440142866791445, "loss": 1.8289, "step": 17820 }, { "epoch": 0.59, "grad_norm": 0.43565845489501953, "learning_rate": 0.000544008206206694, "loss": 1.8451, "step": 17821 }, { "epoch": 0.59, "grad_norm": 0.4214085340499878, "learning_rate": 0.0005440021254380536, "loss": 1.8273, "step": 17822 }, { "epoch": 0.59, "grad_norm": 0.4416976571083069, "learning_rate": 0.0005439960443732305, "loss": 1.8516, "step": 17823 }, { "epoch": 0.59, "grad_norm": 0.43714722990989685, "learning_rate": 0.0005439899630122322, "loss": 1.8565, "step": 17824 }, { "epoch": 0.59, "grad_norm": 0.422395795583725, "learning_rate": 0.0005439838813550659, "loss": 1.8173, "step": 17825 }, { "epoch": 0.59, "grad_norm": 0.4486069083213806, "learning_rate": 0.0005439777994017392, "loss": 1.781, "step": 17826 }, { "epoch": 0.59, "grad_norm": 0.42971086502075195, "learning_rate": 0.0005439717171522594, "loss": 1.8211, "step": 17827 }, { "epoch": 0.59, "grad_norm": 0.4288226068019867, "learning_rate": 0.0005439656346066339, "loss": 1.8213, "step": 17828 }, { "epoch": 0.59, "grad_norm": 0.4255200922489166, "learning_rate": 0.00054395955176487, "loss": 1.8498, "step": 17829 }, { "epoch": 0.59, "grad_norm": 0.42367252707481384, "learning_rate": 0.0005439534686269752, "loss": 1.8201, "step": 17830 }, { "epoch": 0.59, "grad_norm": 0.4262806475162506, "learning_rate": 0.0005439473851929569, "loss": 1.8329, "step": 17831 }, { "epoch": 0.59, "grad_norm": 0.43354472517967224, "learning_rate": 0.0005439413014628223, "loss": 1.8391, "step": 17832 }, { "epoch": 0.59, "grad_norm": 0.42961955070495605, "learning_rate": 0.000543935217436579, "loss": 1.8775, "step": 17833 }, { "epoch": 0.59, "grad_norm": 0.4454714059829712, "learning_rate": 0.0005439291331142342, "loss": 1.8584, "step": 17834 }, { "epoch": 0.59, "grad_norm": 0.42926156520843506, "learning_rate": 0.0005439230484957954, "loss": 1.8541, "step": 17835 }, { "epoch": 0.59, "grad_norm": 0.43602636456489563, "learning_rate": 0.00054391696358127, "loss": 1.8, "step": 17836 }, { "epoch": 0.59, "grad_norm": 0.4360736310482025, "learning_rate": 0.0005439108783706654, "loss": 1.8771, "step": 17837 }, { "epoch": 0.59, "grad_norm": 0.4231795072555542, "learning_rate": 0.0005439047928639888, "loss": 1.7734, "step": 17838 }, { "epoch": 0.59, "grad_norm": 0.44279682636260986, "learning_rate": 0.0005438987070612478, "loss": 1.8859, "step": 17839 }, { "epoch": 0.59, "grad_norm": 0.4679460823535919, "learning_rate": 0.0005438926209624497, "loss": 1.9377, "step": 17840 }, { "epoch": 0.59, "grad_norm": 0.5981004238128662, "learning_rate": 0.0005438865345676018, "loss": 1.8112, "step": 17841 }, { "epoch": 0.59, "grad_norm": 0.4360046088695526, "learning_rate": 0.0005438804478767117, "loss": 1.8645, "step": 17842 }, { "epoch": 0.59, "grad_norm": 0.4303043484687805, "learning_rate": 0.0005438743608897867, "loss": 1.8088, "step": 17843 }, { "epoch": 0.59, "grad_norm": 0.43994417786598206, "learning_rate": 0.000543868273606834, "loss": 1.8738, "step": 17844 }, { "epoch": 0.59, "grad_norm": 0.4317677915096283, "learning_rate": 0.0005438621860278612, "loss": 1.7895, "step": 17845 }, { "epoch": 0.59, "grad_norm": 0.40573740005493164, "learning_rate": 0.0005438560981528757, "loss": 1.7951, "step": 17846 }, { "epoch": 0.59, "grad_norm": 0.42666974663734436, "learning_rate": 0.0005438500099818848, "loss": 1.7842, "step": 17847 }, { "epoch": 0.59, "grad_norm": 0.443459153175354, "learning_rate": 0.0005438439215148959, "loss": 1.8091, "step": 17848 }, { "epoch": 0.59, "grad_norm": 0.45302459597587585, "learning_rate": 0.0005438378327519165, "loss": 1.8227, "step": 17849 }, { "epoch": 0.59, "grad_norm": 0.44104114174842834, "learning_rate": 0.0005438317436929537, "loss": 1.9135, "step": 17850 }, { "epoch": 0.59, "grad_norm": 0.44042879343032837, "learning_rate": 0.0005438256543380153, "loss": 1.8728, "step": 17851 }, { "epoch": 0.59, "grad_norm": 0.43800145387649536, "learning_rate": 0.0005438195646871083, "loss": 1.8017, "step": 17852 }, { "epoch": 0.59, "grad_norm": 0.43234533071517944, "learning_rate": 0.0005438134747402404, "loss": 1.8456, "step": 17853 }, { "epoch": 0.59, "grad_norm": 0.4163581132888794, "learning_rate": 0.0005438073844974189, "loss": 1.8195, "step": 17854 }, { "epoch": 0.59, "grad_norm": 0.43416810035705566, "learning_rate": 0.000543801293958651, "loss": 1.876, "step": 17855 }, { "epoch": 0.59, "grad_norm": 0.4203394651412964, "learning_rate": 0.0005437952031239443, "loss": 1.883, "step": 17856 }, { "epoch": 0.59, "grad_norm": 0.43563157320022583, "learning_rate": 0.0005437891119933063, "loss": 1.9197, "step": 17857 }, { "epoch": 0.59, "grad_norm": 0.4171801507472992, "learning_rate": 0.0005437830205667441, "loss": 1.8964, "step": 17858 }, { "epoch": 0.59, "grad_norm": 0.43258973956108093, "learning_rate": 0.0005437769288442652, "loss": 1.821, "step": 17859 }, { "epoch": 0.59, "grad_norm": 0.4399896562099457, "learning_rate": 0.0005437708368258771, "loss": 1.8762, "step": 17860 }, { "epoch": 0.59, "grad_norm": 0.42634865641593933, "learning_rate": 0.0005437647445115871, "loss": 1.8964, "step": 17861 }, { "epoch": 0.59, "grad_norm": 0.4270406663417816, "learning_rate": 0.0005437586519014027, "loss": 1.8131, "step": 17862 }, { "epoch": 0.59, "grad_norm": 0.4182854890823364, "learning_rate": 0.0005437525589953311, "loss": 1.8747, "step": 17863 }, { "epoch": 0.59, "grad_norm": 0.4281139075756073, "learning_rate": 0.0005437464657933799, "loss": 1.8303, "step": 17864 }, { "epoch": 0.59, "grad_norm": 0.4334900677204132, "learning_rate": 0.0005437403722955563, "loss": 1.839, "step": 17865 }, { "epoch": 0.59, "grad_norm": 0.42449620366096497, "learning_rate": 0.0005437342785018679, "loss": 1.8888, "step": 17866 }, { "epoch": 0.59, "grad_norm": 0.44749337434768677, "learning_rate": 0.0005437281844123218, "loss": 1.8347, "step": 17867 }, { "epoch": 0.59, "grad_norm": 0.4301488399505615, "learning_rate": 0.0005437220900269259, "loss": 1.8654, "step": 17868 }, { "epoch": 0.59, "grad_norm": 0.4405619204044342, "learning_rate": 0.000543715995345687, "loss": 1.7793, "step": 17869 }, { "epoch": 0.59, "grad_norm": 0.41671034693717957, "learning_rate": 0.0005437099003686131, "loss": 1.8302, "step": 17870 }, { "epoch": 0.59, "grad_norm": 0.4376845061779022, "learning_rate": 0.0005437038050957111, "loss": 1.9117, "step": 17871 }, { "epoch": 0.59, "grad_norm": 0.429375559091568, "learning_rate": 0.0005436977095269886, "loss": 1.8761, "step": 17872 }, { "epoch": 0.59, "grad_norm": 0.4254567325115204, "learning_rate": 0.000543691613662453, "loss": 1.8373, "step": 17873 }, { "epoch": 0.59, "grad_norm": 0.4247928857803345, "learning_rate": 0.0005436855175021116, "loss": 1.7973, "step": 17874 }, { "epoch": 0.59, "grad_norm": 0.4295167326927185, "learning_rate": 0.000543679421045972, "loss": 1.7854, "step": 17875 }, { "epoch": 0.59, "grad_norm": 0.43745800852775574, "learning_rate": 0.0005436733242940414, "loss": 1.9274, "step": 17876 }, { "epoch": 0.59, "grad_norm": 0.43218228220939636, "learning_rate": 0.0005436672272463274, "loss": 1.9054, "step": 17877 }, { "epoch": 0.59, "grad_norm": 0.43340277671813965, "learning_rate": 0.0005436611299028374, "loss": 1.8563, "step": 17878 }, { "epoch": 0.59, "grad_norm": 0.437537282705307, "learning_rate": 0.0005436550322635785, "loss": 1.8823, "step": 17879 }, { "epoch": 0.59, "grad_norm": 0.4274522662162781, "learning_rate": 0.0005436489343285582, "loss": 1.8624, "step": 17880 }, { "epoch": 0.59, "grad_norm": 0.44816920161247253, "learning_rate": 0.0005436428360977841, "loss": 1.7633, "step": 17881 }, { "epoch": 0.59, "grad_norm": 0.4337778687477112, "learning_rate": 0.0005436367375712635, "loss": 1.9149, "step": 17882 }, { "epoch": 0.59, "grad_norm": 0.42165952920913696, "learning_rate": 0.0005436306387490038, "loss": 1.8021, "step": 17883 }, { "epoch": 0.6, "grad_norm": 0.42993220686912537, "learning_rate": 0.0005436245396310124, "loss": 1.8407, "step": 17884 }, { "epoch": 0.6, "grad_norm": 0.4301559627056122, "learning_rate": 0.0005436184402172968, "loss": 1.8998, "step": 17885 }, { "epoch": 0.6, "grad_norm": 0.4140164256095886, "learning_rate": 0.0005436123405078643, "loss": 1.8022, "step": 17886 }, { "epoch": 0.6, "grad_norm": 0.41156524419784546, "learning_rate": 0.0005436062405027221, "loss": 1.7444, "step": 17887 }, { "epoch": 0.6, "grad_norm": 0.44353097677230835, "learning_rate": 0.000543600140201878, "loss": 1.8138, "step": 17888 }, { "epoch": 0.6, "grad_norm": 0.429646760225296, "learning_rate": 0.0005435940396053392, "loss": 1.8622, "step": 17889 }, { "epoch": 0.6, "grad_norm": 0.441781222820282, "learning_rate": 0.0005435879387131131, "loss": 1.8376, "step": 17890 }, { "epoch": 0.6, "grad_norm": 0.4230888783931732, "learning_rate": 0.0005435818375252071, "loss": 1.8001, "step": 17891 }, { "epoch": 0.6, "grad_norm": 0.436570405960083, "learning_rate": 0.0005435757360416287, "loss": 1.9233, "step": 17892 }, { "epoch": 0.6, "grad_norm": 0.42272430658340454, "learning_rate": 0.0005435696342623853, "loss": 1.8537, "step": 17893 }, { "epoch": 0.6, "grad_norm": 0.43632280826568604, "learning_rate": 0.0005435635321874841, "loss": 1.8446, "step": 17894 }, { "epoch": 0.6, "grad_norm": 0.41285616159439087, "learning_rate": 0.0005435574298169328, "loss": 1.8298, "step": 17895 }, { "epoch": 0.6, "grad_norm": 0.41598716378211975, "learning_rate": 0.0005435513271507385, "loss": 1.8309, "step": 17896 }, { "epoch": 0.6, "grad_norm": 0.44278478622436523, "learning_rate": 0.000543545224188909, "loss": 1.9088, "step": 17897 }, { "epoch": 0.6, "grad_norm": 0.43141406774520874, "learning_rate": 0.0005435391209314513, "loss": 1.8645, "step": 17898 }, { "epoch": 0.6, "grad_norm": 0.43065139651298523, "learning_rate": 0.000543533017378373, "loss": 1.8779, "step": 17899 }, { "epoch": 0.6, "grad_norm": 0.43604618310928345, "learning_rate": 0.0005435269135296816, "loss": 1.8211, "step": 17900 }, { "epoch": 0.6, "grad_norm": 0.4385765790939331, "learning_rate": 0.0005435208093853844, "loss": 1.8791, "step": 17901 }, { "epoch": 0.6, "grad_norm": 0.42855605483055115, "learning_rate": 0.0005435147049454887, "loss": 1.8596, "step": 17902 }, { "epoch": 0.6, "grad_norm": 0.44485121965408325, "learning_rate": 0.0005435086002100021, "loss": 1.832, "step": 17903 }, { "epoch": 0.6, "grad_norm": 0.4393715262413025, "learning_rate": 0.000543502495178932, "loss": 1.8059, "step": 17904 }, { "epoch": 0.6, "grad_norm": 0.43604278564453125, "learning_rate": 0.0005434963898522856, "loss": 1.8815, "step": 17905 }, { "epoch": 0.6, "grad_norm": 0.4407932460308075, "learning_rate": 0.0005434902842300706, "loss": 1.8389, "step": 17906 }, { "epoch": 0.6, "grad_norm": 0.462315171957016, "learning_rate": 0.0005434841783122942, "loss": 1.7803, "step": 17907 }, { "epoch": 0.6, "grad_norm": 0.42668741941452026, "learning_rate": 0.0005434780720989639, "loss": 1.8482, "step": 17908 }, { "epoch": 0.6, "grad_norm": 0.4277181923389435, "learning_rate": 0.0005434719655900872, "loss": 1.7839, "step": 17909 }, { "epoch": 0.6, "grad_norm": 0.4394077956676483, "learning_rate": 0.0005434658587856713, "loss": 1.8683, "step": 17910 }, { "epoch": 0.6, "grad_norm": 0.4325641095638275, "learning_rate": 0.0005434597516857237, "loss": 1.8514, "step": 17911 }, { "epoch": 0.6, "grad_norm": 0.4418916702270508, "learning_rate": 0.0005434536442902518, "loss": 1.8661, "step": 17912 }, { "epoch": 0.6, "grad_norm": 0.4213816523551941, "learning_rate": 0.0005434475365992631, "loss": 1.8829, "step": 17913 }, { "epoch": 0.6, "grad_norm": 0.4548610746860504, "learning_rate": 0.000543441428612765, "loss": 1.8783, "step": 17914 }, { "epoch": 0.6, "grad_norm": 0.4620591700077057, "learning_rate": 0.0005434353203307648, "loss": 1.87, "step": 17915 }, { "epoch": 0.6, "grad_norm": 0.4519790709018707, "learning_rate": 0.0005434292117532701, "loss": 1.8441, "step": 17916 }, { "epoch": 0.6, "grad_norm": 0.4240247905254364, "learning_rate": 0.0005434231028802882, "loss": 1.8138, "step": 17917 }, { "epoch": 0.6, "grad_norm": 0.48145151138305664, "learning_rate": 0.0005434169937118265, "loss": 1.829, "step": 17918 }, { "epoch": 0.6, "grad_norm": 0.45940038561820984, "learning_rate": 0.0005434108842478924, "loss": 1.843, "step": 17919 }, { "epoch": 0.6, "grad_norm": 0.41648033261299133, "learning_rate": 0.0005434047744884932, "loss": 1.8684, "step": 17920 }, { "epoch": 0.6, "grad_norm": 0.42788195610046387, "learning_rate": 0.0005433986644336366, "loss": 1.8644, "step": 17921 }, { "epoch": 0.6, "grad_norm": 0.45677900314331055, "learning_rate": 0.0005433925540833301, "loss": 1.83, "step": 17922 }, { "epoch": 0.6, "grad_norm": 0.46893584728240967, "learning_rate": 0.0005433864434375806, "loss": 1.9563, "step": 17923 }, { "epoch": 0.6, "grad_norm": 0.4297110140323639, "learning_rate": 0.000543380332496396, "loss": 1.938, "step": 17924 }, { "epoch": 0.6, "grad_norm": 0.41558608412742615, "learning_rate": 0.0005433742212597834, "loss": 1.8299, "step": 17925 }, { "epoch": 0.6, "grad_norm": 0.44794711470603943, "learning_rate": 0.0005433681097277505, "loss": 1.8997, "step": 17926 }, { "epoch": 0.6, "grad_norm": 0.4376979470252991, "learning_rate": 0.0005433619979003044, "loss": 1.7964, "step": 17927 }, { "epoch": 0.6, "grad_norm": 0.4261869788169861, "learning_rate": 0.0005433558857774527, "loss": 1.8688, "step": 17928 }, { "epoch": 0.6, "grad_norm": 0.4268263578414917, "learning_rate": 0.0005433497733592029, "loss": 1.8805, "step": 17929 }, { "epoch": 0.6, "grad_norm": 1.0784037113189697, "learning_rate": 0.0005433436606455623, "loss": 1.8156, "step": 17930 }, { "epoch": 0.6, "grad_norm": 0.42866453528404236, "learning_rate": 0.0005433375476365384, "loss": 1.8852, "step": 17931 }, { "epoch": 0.6, "grad_norm": 0.4043666422367096, "learning_rate": 0.0005433314343321387, "loss": 1.7955, "step": 17932 }, { "epoch": 0.6, "grad_norm": 0.4264467656612396, "learning_rate": 0.0005433253207323702, "loss": 1.8499, "step": 17933 }, { "epoch": 0.6, "grad_norm": 0.44908609986305237, "learning_rate": 0.0005433192068372408, "loss": 1.949, "step": 17934 }, { "epoch": 0.6, "grad_norm": 0.43291041254997253, "learning_rate": 0.0005433130926467577, "loss": 1.8881, "step": 17935 }, { "epoch": 0.6, "grad_norm": 0.4423317015171051, "learning_rate": 0.0005433069781609284, "loss": 1.8547, "step": 17936 }, { "epoch": 0.6, "grad_norm": 0.4259137213230133, "learning_rate": 0.0005433008633797602, "loss": 1.8786, "step": 17937 }, { "epoch": 0.6, "grad_norm": 0.41428032517433167, "learning_rate": 0.0005432947483032605, "loss": 1.804, "step": 17938 }, { "epoch": 0.6, "grad_norm": 0.4502376914024353, "learning_rate": 0.000543288632931437, "loss": 1.8068, "step": 17939 }, { "epoch": 0.6, "grad_norm": 0.42736074328422546, "learning_rate": 0.0005432825172642969, "loss": 1.8717, "step": 17940 }, { "epoch": 0.6, "grad_norm": 0.4309779703617096, "learning_rate": 0.0005432764013018476, "loss": 1.8327, "step": 17941 }, { "epoch": 0.6, "grad_norm": 0.4167243540287018, "learning_rate": 0.0005432702850440967, "loss": 1.8516, "step": 17942 }, { "epoch": 0.6, "grad_norm": 0.4385926425457001, "learning_rate": 0.0005432641684910515, "loss": 1.8747, "step": 17943 }, { "epoch": 0.6, "grad_norm": 0.42952626943588257, "learning_rate": 0.0005432580516427193, "loss": 1.8905, "step": 17944 }, { "epoch": 0.6, "grad_norm": 0.4134112000465393, "learning_rate": 0.0005432519344991079, "loss": 1.8173, "step": 17945 }, { "epoch": 0.6, "grad_norm": 0.4331250786781311, "learning_rate": 0.0005432458170602243, "loss": 1.8195, "step": 17946 }, { "epoch": 0.6, "grad_norm": 0.42493027448654175, "learning_rate": 0.0005432396993260762, "loss": 1.7983, "step": 17947 }, { "epoch": 0.6, "grad_norm": 0.4262878894805908, "learning_rate": 0.0005432335812966709, "loss": 1.8336, "step": 17948 }, { "epoch": 0.6, "grad_norm": 0.43142935633659363, "learning_rate": 0.0005432274629720159, "loss": 1.8982, "step": 17949 }, { "epoch": 0.6, "grad_norm": 0.42925480008125305, "learning_rate": 0.0005432213443521186, "loss": 1.8236, "step": 17950 }, { "epoch": 0.6, "grad_norm": 0.4326409101486206, "learning_rate": 0.0005432152254369866, "loss": 1.891, "step": 17951 }, { "epoch": 0.6, "grad_norm": 0.4179116487503052, "learning_rate": 0.0005432091062266269, "loss": 1.8358, "step": 17952 }, { "epoch": 0.6, "grad_norm": 0.4440641403198242, "learning_rate": 0.0005432029867210472, "loss": 1.7972, "step": 17953 }, { "epoch": 0.6, "grad_norm": 0.4414218068122864, "learning_rate": 0.000543196866920255, "loss": 1.8473, "step": 17954 }, { "epoch": 0.6, "grad_norm": 0.45744022727012634, "learning_rate": 0.0005431907468242576, "loss": 1.8296, "step": 17955 }, { "epoch": 0.6, "grad_norm": 0.444661945104599, "learning_rate": 0.0005431846264330625, "loss": 1.8607, "step": 17956 }, { "epoch": 0.6, "grad_norm": 0.43952083587646484, "learning_rate": 0.000543178505746677, "loss": 1.8658, "step": 17957 }, { "epoch": 0.6, "grad_norm": 0.44221028685569763, "learning_rate": 0.0005431723847651087, "loss": 1.8587, "step": 17958 }, { "epoch": 0.6, "grad_norm": 0.4410639703273773, "learning_rate": 0.000543166263488365, "loss": 1.8909, "step": 17959 }, { "epoch": 0.6, "grad_norm": 0.43131157755851746, "learning_rate": 0.0005431601419164533, "loss": 1.8082, "step": 17960 }, { "epoch": 0.6, "grad_norm": 0.4729379713535309, "learning_rate": 0.0005431540200493809, "loss": 1.8226, "step": 17961 }, { "epoch": 0.6, "grad_norm": 0.7572393417358398, "learning_rate": 0.0005431478978871555, "loss": 1.8078, "step": 17962 }, { "epoch": 0.6, "grad_norm": 0.4404008686542511, "learning_rate": 0.0005431417754297842, "loss": 1.9247, "step": 17963 }, { "epoch": 0.6, "grad_norm": 0.42709967494010925, "learning_rate": 0.0005431356526772747, "loss": 1.9128, "step": 17964 }, { "epoch": 0.6, "grad_norm": 0.43468496203422546, "learning_rate": 0.0005431295296296343, "loss": 1.8938, "step": 17965 }, { "epoch": 0.6, "grad_norm": 0.44627389311790466, "learning_rate": 0.0005431234062868705, "loss": 1.7618, "step": 17966 }, { "epoch": 0.6, "grad_norm": 0.4237014353275299, "learning_rate": 0.0005431172826489907, "loss": 1.8603, "step": 17967 }, { "epoch": 0.6, "grad_norm": 0.4296060800552368, "learning_rate": 0.0005431111587160023, "loss": 1.7918, "step": 17968 }, { "epoch": 0.6, "grad_norm": 0.4106874465942383, "learning_rate": 0.000543105034487913, "loss": 1.8672, "step": 17969 }, { "epoch": 0.6, "grad_norm": 0.4212205708026886, "learning_rate": 0.0005430989099647298, "loss": 1.808, "step": 17970 }, { "epoch": 0.6, "grad_norm": 0.41532525420188904, "learning_rate": 0.0005430927851464603, "loss": 1.9426, "step": 17971 }, { "epoch": 0.6, "grad_norm": 0.42066439986228943, "learning_rate": 0.0005430866600331121, "loss": 1.8992, "step": 17972 }, { "epoch": 0.6, "grad_norm": 0.42866337299346924, "learning_rate": 0.0005430805346246923, "loss": 1.8527, "step": 17973 }, { "epoch": 0.6, "grad_norm": 0.42361176013946533, "learning_rate": 0.0005430744089212088, "loss": 1.8237, "step": 17974 }, { "epoch": 0.6, "grad_norm": 0.4188612401485443, "learning_rate": 0.0005430682829226688, "loss": 1.8422, "step": 17975 }, { "epoch": 0.6, "grad_norm": 0.44541266560554504, "learning_rate": 0.0005430621566290796, "loss": 1.8364, "step": 17976 }, { "epoch": 0.6, "grad_norm": 0.4305615723133087, "learning_rate": 0.0005430560300404486, "loss": 1.9227, "step": 17977 }, { "epoch": 0.6, "grad_norm": 0.43043142557144165, "learning_rate": 0.0005430499031567836, "loss": 1.8646, "step": 17978 }, { "epoch": 0.6, "grad_norm": 0.41968730092048645, "learning_rate": 0.0005430437759780918, "loss": 1.7399, "step": 17979 }, { "epoch": 0.6, "grad_norm": 0.42626968026161194, "learning_rate": 0.0005430376485043806, "loss": 1.8379, "step": 17980 }, { "epoch": 0.6, "grad_norm": 0.4367285668849945, "learning_rate": 0.0005430315207356576, "loss": 1.8056, "step": 17981 }, { "epoch": 0.6, "grad_norm": 0.4318913519382477, "learning_rate": 0.0005430253926719302, "loss": 1.8107, "step": 17982 }, { "epoch": 0.6, "grad_norm": 0.43361222743988037, "learning_rate": 0.0005430192643132056, "loss": 1.8567, "step": 17983 }, { "epoch": 0.6, "grad_norm": 0.43789127469062805, "learning_rate": 0.0005430131356594915, "loss": 1.8309, "step": 17984 }, { "epoch": 0.6, "grad_norm": 0.5781199932098389, "learning_rate": 0.0005430070067107953, "loss": 1.9336, "step": 17985 }, { "epoch": 0.6, "grad_norm": 0.4334247410297394, "learning_rate": 0.0005430008774671243, "loss": 1.885, "step": 17986 }, { "epoch": 0.6, "grad_norm": 0.4335513114929199, "learning_rate": 0.0005429947479284862, "loss": 1.8167, "step": 17987 }, { "epoch": 0.6, "grad_norm": 0.42714351415634155, "learning_rate": 0.0005429886180948881, "loss": 1.8415, "step": 17988 }, { "epoch": 0.6, "grad_norm": 0.4170060157775879, "learning_rate": 0.0005429824879663377, "loss": 1.844, "step": 17989 }, { "epoch": 0.6, "grad_norm": 0.4340451657772064, "learning_rate": 0.0005429763575428424, "loss": 1.8594, "step": 17990 }, { "epoch": 0.6, "grad_norm": 0.4349386692047119, "learning_rate": 0.0005429702268244096, "loss": 1.834, "step": 17991 }, { "epoch": 0.6, "grad_norm": 0.42477184534072876, "learning_rate": 0.0005429640958110467, "loss": 1.8638, "step": 17992 }, { "epoch": 0.6, "grad_norm": 0.41698941588401794, "learning_rate": 0.0005429579645027611, "loss": 1.8031, "step": 17993 }, { "epoch": 0.6, "grad_norm": 0.44720321893692017, "learning_rate": 0.0005429518328995605, "loss": 1.8874, "step": 17994 }, { "epoch": 0.6, "grad_norm": 0.43446817994117737, "learning_rate": 0.0005429457010014521, "loss": 1.9188, "step": 17995 }, { "epoch": 0.6, "grad_norm": 0.4330277740955353, "learning_rate": 0.0005429395688084434, "loss": 1.9278, "step": 17996 }, { "epoch": 0.6, "grad_norm": 0.42954570055007935, "learning_rate": 0.0005429334363205419, "loss": 1.8985, "step": 17997 }, { "epoch": 0.6, "grad_norm": 0.41811424493789673, "learning_rate": 0.000542927303537755, "loss": 1.835, "step": 17998 }, { "epoch": 0.6, "grad_norm": 0.41732293367385864, "learning_rate": 0.00054292117046009, "loss": 1.8062, "step": 17999 }, { "epoch": 0.6, "grad_norm": 0.43884268403053284, "learning_rate": 0.0005429150370875547, "loss": 1.8455, "step": 18000 }, { "epoch": 0.6, "grad_norm": 0.4329114258289337, "learning_rate": 0.0005429089034201562, "loss": 1.7539, "step": 18001 }, { "epoch": 0.6, "grad_norm": 0.42790108919143677, "learning_rate": 0.0005429027694579022, "loss": 1.8549, "step": 18002 }, { "epoch": 0.6, "grad_norm": 0.5142188668251038, "learning_rate": 0.0005428966352007999, "loss": 1.9152, "step": 18003 }, { "epoch": 0.6, "grad_norm": 0.42245814204216003, "learning_rate": 0.000542890500648857, "loss": 1.8475, "step": 18004 }, { "epoch": 0.6, "grad_norm": 0.45636317133903503, "learning_rate": 0.0005428843658020808, "loss": 1.9123, "step": 18005 }, { "epoch": 0.6, "grad_norm": 0.4242183566093445, "learning_rate": 0.0005428782306604787, "loss": 1.8265, "step": 18006 }, { "epoch": 0.6, "grad_norm": 0.41121020913124084, "learning_rate": 0.0005428720952240581, "loss": 1.869, "step": 18007 }, { "epoch": 0.6, "grad_norm": 0.43597477674484253, "learning_rate": 0.0005428659594928268, "loss": 1.823, "step": 18008 }, { "epoch": 0.6, "grad_norm": 0.41871097683906555, "learning_rate": 0.000542859823466792, "loss": 1.8612, "step": 18009 }, { "epoch": 0.6, "grad_norm": 0.41173046827316284, "learning_rate": 0.000542853687145961, "loss": 1.8428, "step": 18010 }, { "epoch": 0.6, "grad_norm": 0.4248446524143219, "learning_rate": 0.0005428475505303415, "loss": 1.8805, "step": 18011 }, { "epoch": 0.6, "grad_norm": 0.4424401819705963, "learning_rate": 0.0005428414136199408, "loss": 1.9055, "step": 18012 }, { "epoch": 0.6, "grad_norm": 0.4158661663532257, "learning_rate": 0.0005428352764147664, "loss": 1.8222, "step": 18013 }, { "epoch": 0.6, "grad_norm": 0.4364006817340851, "learning_rate": 0.0005428291389148258, "loss": 1.8434, "step": 18014 }, { "epoch": 0.6, "grad_norm": 0.42867758870124817, "learning_rate": 0.0005428230011201263, "loss": 1.8016, "step": 18015 }, { "epoch": 0.6, "grad_norm": 0.4149106442928314, "learning_rate": 0.0005428168630306757, "loss": 1.8251, "step": 18016 }, { "epoch": 0.6, "grad_norm": 0.43282634019851685, "learning_rate": 0.000542810724646481, "loss": 1.8663, "step": 18017 }, { "epoch": 0.6, "grad_norm": 0.42299672961235046, "learning_rate": 0.00054280458596755, "loss": 1.8609, "step": 18018 }, { "epoch": 0.6, "grad_norm": 0.43296557664871216, "learning_rate": 0.0005427984469938899, "loss": 1.9125, "step": 18019 }, { "epoch": 0.6, "grad_norm": 0.4323115348815918, "learning_rate": 0.0005427923077255082, "loss": 1.8264, "step": 18020 }, { "epoch": 0.6, "grad_norm": 0.4139590859413147, "learning_rate": 0.0005427861681624126, "loss": 1.852, "step": 18021 }, { "epoch": 0.6, "grad_norm": 0.41751816868782043, "learning_rate": 0.0005427800283046102, "loss": 1.8029, "step": 18022 }, { "epoch": 0.6, "grad_norm": 0.4214443862438202, "learning_rate": 0.0005427738881521086, "loss": 1.7422, "step": 18023 }, { "epoch": 0.6, "grad_norm": 0.4267491400241852, "learning_rate": 0.0005427677477049153, "loss": 1.8702, "step": 18024 }, { "epoch": 0.6, "grad_norm": 0.41898858547210693, "learning_rate": 0.0005427616069630379, "loss": 1.8141, "step": 18025 }, { "epoch": 0.6, "grad_norm": 0.42879432439804077, "learning_rate": 0.0005427554659264835, "loss": 1.8775, "step": 18026 }, { "epoch": 0.6, "grad_norm": 0.4326145350933075, "learning_rate": 0.0005427493245952598, "loss": 1.8098, "step": 18027 }, { "epoch": 0.6, "grad_norm": 0.43102777004241943, "learning_rate": 0.0005427431829693742, "loss": 1.8703, "step": 18028 }, { "epoch": 0.6, "grad_norm": 0.4141283333301544, "learning_rate": 0.0005427370410488341, "loss": 1.8783, "step": 18029 }, { "epoch": 0.6, "grad_norm": 0.4260614812374115, "learning_rate": 0.000542730898833647, "loss": 1.7947, "step": 18030 }, { "epoch": 0.6, "grad_norm": 0.42235180735588074, "learning_rate": 0.0005427247563238204, "loss": 1.8636, "step": 18031 }, { "epoch": 0.6, "grad_norm": 0.4383959174156189, "learning_rate": 0.0005427186135193616, "loss": 1.8187, "step": 18032 }, { "epoch": 0.6, "grad_norm": 0.4129420518875122, "learning_rate": 0.0005427124704202782, "loss": 1.8124, "step": 18033 }, { "epoch": 0.6, "grad_norm": 0.42775532603263855, "learning_rate": 0.0005427063270265777, "loss": 1.8069, "step": 18034 }, { "epoch": 0.6, "grad_norm": 0.4187789857387543, "learning_rate": 0.0005427001833382674, "loss": 1.8587, "step": 18035 }, { "epoch": 0.6, "grad_norm": 0.42487889528274536, "learning_rate": 0.0005426940393553549, "loss": 1.9318, "step": 18036 }, { "epoch": 0.6, "grad_norm": 0.44294920563697815, "learning_rate": 0.0005426878950778476, "loss": 1.7474, "step": 18037 }, { "epoch": 0.6, "grad_norm": 0.420785516500473, "learning_rate": 0.0005426817505057529, "loss": 1.83, "step": 18038 }, { "epoch": 0.6, "grad_norm": 0.42719241976737976, "learning_rate": 0.0005426756056390783, "loss": 1.8725, "step": 18039 }, { "epoch": 0.6, "grad_norm": 0.41665568947792053, "learning_rate": 0.0005426694604778314, "loss": 1.8487, "step": 18040 }, { "epoch": 0.6, "grad_norm": 0.43106335401535034, "learning_rate": 0.0005426633150220194, "loss": 1.8255, "step": 18041 }, { "epoch": 0.6, "grad_norm": 0.43404775857925415, "learning_rate": 0.00054265716927165, "loss": 1.7976, "step": 18042 }, { "epoch": 0.6, "grad_norm": 0.5314675569534302, "learning_rate": 0.0005426510232267304, "loss": 1.9011, "step": 18043 }, { "epoch": 0.6, "grad_norm": 0.43467772006988525, "learning_rate": 0.0005426448768872683, "loss": 1.8161, "step": 18044 }, { "epoch": 0.6, "grad_norm": 0.4154885411262512, "learning_rate": 0.0005426387302532711, "loss": 1.9154, "step": 18045 }, { "epoch": 0.6, "grad_norm": 0.418344110250473, "learning_rate": 0.0005426325833247462, "loss": 1.8511, "step": 18046 }, { "epoch": 0.6, "grad_norm": 0.41560766100883484, "learning_rate": 0.000542626436101701, "loss": 1.7782, "step": 18047 }, { "epoch": 0.6, "grad_norm": 0.4178884029388428, "learning_rate": 0.0005426202885841432, "loss": 1.7983, "step": 18048 }, { "epoch": 0.6, "grad_norm": 0.4431256055831909, "learning_rate": 0.00054261414077208, "loss": 1.8399, "step": 18049 }, { "epoch": 0.6, "grad_norm": 0.44044414162635803, "learning_rate": 0.0005426079926655192, "loss": 1.7596, "step": 18050 }, { "epoch": 0.6, "grad_norm": 0.44504132866859436, "learning_rate": 0.0005426018442644678, "loss": 1.895, "step": 18051 }, { "epoch": 0.6, "grad_norm": 0.4456881880760193, "learning_rate": 0.0005425956955689336, "loss": 1.8668, "step": 18052 }, { "epoch": 0.6, "grad_norm": 0.4263615012168884, "learning_rate": 0.0005425895465789239, "loss": 1.8804, "step": 18053 }, { "epoch": 0.6, "grad_norm": 0.42888107895851135, "learning_rate": 0.0005425833972944464, "loss": 1.8875, "step": 18054 }, { "epoch": 0.6, "grad_norm": 0.4339417815208435, "learning_rate": 0.0005425772477155082, "loss": 1.8251, "step": 18055 }, { "epoch": 0.6, "grad_norm": 0.42391470074653625, "learning_rate": 0.0005425710978421172, "loss": 1.7796, "step": 18056 }, { "epoch": 0.6, "grad_norm": 0.4263664782047272, "learning_rate": 0.0005425649476742805, "loss": 1.815, "step": 18057 }, { "epoch": 0.6, "grad_norm": 0.41950729489326477, "learning_rate": 0.0005425587972120056, "loss": 1.8665, "step": 18058 }, { "epoch": 0.6, "grad_norm": 0.45021718740463257, "learning_rate": 0.0005425526464553002, "loss": 1.8636, "step": 18059 }, { "epoch": 0.6, "grad_norm": 0.42539650201797485, "learning_rate": 0.0005425464954041714, "loss": 1.8332, "step": 18060 }, { "epoch": 0.6, "grad_norm": 0.4159063398838043, "learning_rate": 0.0005425403440586272, "loss": 1.8912, "step": 18061 }, { "epoch": 0.6, "grad_norm": 0.43437260389328003, "learning_rate": 0.0005425341924186745, "loss": 1.8743, "step": 18062 }, { "epoch": 0.6, "grad_norm": 0.42242565751075745, "learning_rate": 0.0005425280404843213, "loss": 1.821, "step": 18063 }, { "epoch": 0.6, "grad_norm": 0.45761242508888245, "learning_rate": 0.0005425218882555746, "loss": 1.8923, "step": 18064 }, { "epoch": 0.6, "grad_norm": 0.43374139070510864, "learning_rate": 0.0005425157357324421, "loss": 1.8104, "step": 18065 }, { "epoch": 0.6, "grad_norm": 0.4327515959739685, "learning_rate": 0.0005425095829149312, "loss": 1.8485, "step": 18066 }, { "epoch": 0.6, "grad_norm": 0.43456515669822693, "learning_rate": 0.0005425034298030494, "loss": 1.8556, "step": 18067 }, { "epoch": 0.6, "grad_norm": 0.4362095296382904, "learning_rate": 0.0005424972763968043, "loss": 1.845, "step": 18068 }, { "epoch": 0.6, "grad_norm": 0.4185603857040405, "learning_rate": 0.0005424911226962031, "loss": 1.8019, "step": 18069 }, { "epoch": 0.6, "grad_norm": 0.4434072971343994, "learning_rate": 0.0005424849687012534, "loss": 1.7561, "step": 18070 }, { "epoch": 0.6, "grad_norm": 0.41919374465942383, "learning_rate": 0.0005424788144119628, "loss": 1.8351, "step": 18071 }, { "epoch": 0.6, "grad_norm": 0.4415231943130493, "learning_rate": 0.0005424726598283385, "loss": 1.9002, "step": 18072 }, { "epoch": 0.6, "grad_norm": 0.43497416377067566, "learning_rate": 0.0005424665049503883, "loss": 1.8483, "step": 18073 }, { "epoch": 0.6, "grad_norm": 0.42203831672668457, "learning_rate": 0.0005424603497781193, "loss": 1.8303, "step": 18074 }, { "epoch": 0.6, "grad_norm": 0.4366825819015503, "learning_rate": 0.0005424541943115392, "loss": 1.8051, "step": 18075 }, { "epoch": 0.6, "grad_norm": 0.42344269156455994, "learning_rate": 0.0005424480385506555, "loss": 1.8058, "step": 18076 }, { "epoch": 0.6, "grad_norm": 0.44146332144737244, "learning_rate": 0.0005424418824954755, "loss": 1.8679, "step": 18077 }, { "epoch": 0.6, "grad_norm": 0.43913349509239197, "learning_rate": 0.0005424357261460069, "loss": 1.8306, "step": 18078 }, { "epoch": 0.6, "grad_norm": 0.42574334144592285, "learning_rate": 0.000542429569502257, "loss": 1.8074, "step": 18079 }, { "epoch": 0.6, "grad_norm": 0.42717382311820984, "learning_rate": 0.0005424234125642333, "loss": 1.7857, "step": 18080 }, { "epoch": 0.6, "grad_norm": 0.4266504943370819, "learning_rate": 0.0005424172553319433, "loss": 1.8527, "step": 18081 }, { "epoch": 0.6, "grad_norm": 0.43936455249786377, "learning_rate": 0.0005424110978053944, "loss": 1.7943, "step": 18082 }, { "epoch": 0.6, "grad_norm": 0.4501200318336487, "learning_rate": 0.0005424049399845943, "loss": 1.8832, "step": 18083 }, { "epoch": 0.6, "grad_norm": 0.42112091183662415, "learning_rate": 0.0005423987818695503, "loss": 1.8201, "step": 18084 }, { "epoch": 0.6, "grad_norm": 0.4438951313495636, "learning_rate": 0.0005423926234602698, "loss": 1.8226, "step": 18085 }, { "epoch": 0.6, "grad_norm": 0.8213252425193787, "learning_rate": 0.0005423864647567604, "loss": 1.9194, "step": 18086 }, { "epoch": 0.6, "grad_norm": 0.4270336329936981, "learning_rate": 0.0005423803057590296, "loss": 1.8789, "step": 18087 }, { "epoch": 0.6, "grad_norm": 0.4392230212688446, "learning_rate": 0.0005423741464670848, "loss": 1.8363, "step": 18088 }, { "epoch": 0.6, "grad_norm": 0.4285270869731903, "learning_rate": 0.0005423679868809335, "loss": 1.9336, "step": 18089 }, { "epoch": 0.6, "grad_norm": 0.4255814552307129, "learning_rate": 0.0005423618270005831, "loss": 1.8144, "step": 18090 }, { "epoch": 0.6, "grad_norm": 0.4232911467552185, "learning_rate": 0.0005423556668260412, "loss": 1.7843, "step": 18091 }, { "epoch": 0.6, "grad_norm": 0.4118044376373291, "learning_rate": 0.0005423495063573153, "loss": 1.7474, "step": 18092 }, { "epoch": 0.6, "grad_norm": 0.42287740111351013, "learning_rate": 0.0005423433455944127, "loss": 1.7859, "step": 18093 }, { "epoch": 0.6, "grad_norm": 0.4227907657623291, "learning_rate": 0.0005423371845373411, "loss": 1.8076, "step": 18094 }, { "epoch": 0.6, "grad_norm": 0.43239787220954895, "learning_rate": 0.0005423310231861077, "loss": 1.8928, "step": 18095 }, { "epoch": 0.6, "grad_norm": 0.4253128170967102, "learning_rate": 0.0005423248615407203, "loss": 1.8038, "step": 18096 }, { "epoch": 0.6, "grad_norm": 0.45032355189323425, "learning_rate": 0.0005423186996011862, "loss": 1.879, "step": 18097 }, { "epoch": 0.6, "grad_norm": 0.44456255435943604, "learning_rate": 0.0005423125373675128, "loss": 1.839, "step": 18098 }, { "epoch": 0.6, "grad_norm": 0.44984322786331177, "learning_rate": 0.0005423063748397078, "loss": 1.884, "step": 18099 }, { "epoch": 0.6, "grad_norm": 0.4230586588382721, "learning_rate": 0.0005423002120177785, "loss": 1.8808, "step": 18100 }, { "epoch": 0.6, "grad_norm": 0.41483378410339355, "learning_rate": 0.0005422940489017324, "loss": 1.7652, "step": 18101 }, { "epoch": 0.6, "grad_norm": 0.4241340756416321, "learning_rate": 0.0005422878854915771, "loss": 1.8385, "step": 18102 }, { "epoch": 0.6, "grad_norm": 0.4406721591949463, "learning_rate": 0.0005422817217873199, "loss": 1.8227, "step": 18103 }, { "epoch": 0.6, "grad_norm": 0.4163239896297455, "learning_rate": 0.0005422755577889685, "loss": 1.857, "step": 18104 }, { "epoch": 0.6, "grad_norm": 0.4336931109428406, "learning_rate": 0.0005422693934965302, "loss": 1.8496, "step": 18105 }, { "epoch": 0.6, "grad_norm": 0.5078912377357483, "learning_rate": 0.0005422632289100127, "loss": 1.877, "step": 18106 }, { "epoch": 0.6, "grad_norm": 0.4139546751976013, "learning_rate": 0.0005422570640294232, "loss": 1.7879, "step": 18107 }, { "epoch": 0.6, "grad_norm": 0.424380898475647, "learning_rate": 0.0005422508988547694, "loss": 1.917, "step": 18108 }, { "epoch": 0.6, "grad_norm": 0.4229814112186432, "learning_rate": 0.0005422447333860586, "loss": 1.7662, "step": 18109 }, { "epoch": 0.6, "grad_norm": 0.4275428354740143, "learning_rate": 0.0005422385676232984, "loss": 1.8308, "step": 18110 }, { "epoch": 0.6, "grad_norm": 0.4177895188331604, "learning_rate": 0.0005422324015664964, "loss": 1.8392, "step": 18111 }, { "epoch": 0.6, "grad_norm": 0.41950708627700806, "learning_rate": 0.0005422262352156598, "loss": 1.8288, "step": 18112 }, { "epoch": 0.6, "grad_norm": 0.4317207336425781, "learning_rate": 0.0005422200685707963, "loss": 1.8827, "step": 18113 }, { "epoch": 0.6, "grad_norm": 0.4251399338245392, "learning_rate": 0.0005422139016319134, "loss": 1.7972, "step": 18114 }, { "epoch": 0.6, "grad_norm": 0.43107396364212036, "learning_rate": 0.0005422077343990184, "loss": 1.881, "step": 18115 }, { "epoch": 0.6, "grad_norm": 0.43013831973075867, "learning_rate": 0.000542201566872119, "loss": 1.838, "step": 18116 }, { "epoch": 0.6, "grad_norm": 0.4302443265914917, "learning_rate": 0.0005421953990512225, "loss": 1.8894, "step": 18117 }, { "epoch": 0.6, "grad_norm": 0.4328378736972809, "learning_rate": 0.0005421892309363365, "loss": 1.8922, "step": 18118 }, { "epoch": 0.6, "grad_norm": 0.4433540105819702, "learning_rate": 0.0005421830625274685, "loss": 1.853, "step": 18119 }, { "epoch": 0.6, "grad_norm": 0.42301157116889954, "learning_rate": 0.0005421768938246259, "loss": 1.8733, "step": 18120 }, { "epoch": 0.6, "grad_norm": 0.41635116934776306, "learning_rate": 0.0005421707248278163, "loss": 1.8757, "step": 18121 }, { "epoch": 0.6, "grad_norm": 0.4566482603549957, "learning_rate": 0.000542164555537047, "loss": 1.8089, "step": 18122 }, { "epoch": 0.6, "grad_norm": 0.4278963804244995, "learning_rate": 0.0005421583859523256, "loss": 1.8274, "step": 18123 }, { "epoch": 0.6, "grad_norm": 0.44934338331222534, "learning_rate": 0.0005421522160736598, "loss": 1.8148, "step": 18124 }, { "epoch": 0.6, "grad_norm": 0.4277570843696594, "learning_rate": 0.0005421460459010567, "loss": 1.8513, "step": 18125 }, { "epoch": 0.6, "grad_norm": 0.45763203501701355, "learning_rate": 0.000542139875434524, "loss": 1.8072, "step": 18126 }, { "epoch": 0.6, "grad_norm": 0.470450758934021, "learning_rate": 0.0005421337046740693, "loss": 1.8022, "step": 18127 }, { "epoch": 0.6, "grad_norm": 0.4325360059738159, "learning_rate": 0.0005421275336196999, "loss": 1.8749, "step": 18128 }, { "epoch": 0.6, "grad_norm": 0.4286392629146576, "learning_rate": 0.0005421213622714232, "loss": 1.8011, "step": 18129 }, { "epoch": 0.6, "grad_norm": 0.4344902038574219, "learning_rate": 0.000542115190629247, "loss": 1.8561, "step": 18130 }, { "epoch": 0.6, "grad_norm": 0.42534908652305603, "learning_rate": 0.0005421090186931786, "loss": 1.9139, "step": 18131 }, { "epoch": 0.6, "grad_norm": 0.4374818801879883, "learning_rate": 0.0005421028464632255, "loss": 1.8514, "step": 18132 }, { "epoch": 0.6, "grad_norm": 0.4306107759475708, "learning_rate": 0.0005420966739393952, "loss": 1.8303, "step": 18133 }, { "epoch": 0.6, "grad_norm": 0.4453926980495453, "learning_rate": 0.0005420905011216953, "loss": 1.7968, "step": 18134 }, { "epoch": 0.6, "grad_norm": 0.43522727489471436, "learning_rate": 0.0005420843280101331, "loss": 1.787, "step": 18135 }, { "epoch": 0.6, "grad_norm": 0.4368663430213928, "learning_rate": 0.0005420781546047163, "loss": 1.8983, "step": 18136 }, { "epoch": 0.6, "grad_norm": 0.4212523102760315, "learning_rate": 0.0005420719809054522, "loss": 1.8246, "step": 18137 }, { "epoch": 0.6, "grad_norm": 0.42914876341819763, "learning_rate": 0.0005420658069123483, "loss": 1.7888, "step": 18138 }, { "epoch": 0.6, "grad_norm": 0.41430631279945374, "learning_rate": 0.0005420596326254124, "loss": 1.7782, "step": 18139 }, { "epoch": 0.6, "grad_norm": 0.42247751355171204, "learning_rate": 0.0005420534580446517, "loss": 1.8501, "step": 18140 }, { "epoch": 0.6, "grad_norm": 0.42220255732536316, "learning_rate": 0.0005420472831700737, "loss": 1.7961, "step": 18141 }, { "epoch": 0.6, "grad_norm": 0.44921237230300903, "learning_rate": 0.000542041108001686, "loss": 1.9374, "step": 18142 }, { "epoch": 0.6, "grad_norm": 0.4358104467391968, "learning_rate": 0.0005420349325394961, "loss": 1.8445, "step": 18143 }, { "epoch": 0.6, "grad_norm": 0.43142813444137573, "learning_rate": 0.0005420287567835114, "loss": 1.8497, "step": 18144 }, { "epoch": 0.6, "grad_norm": 0.44341546297073364, "learning_rate": 0.0005420225807337394, "loss": 1.8662, "step": 18145 }, { "epoch": 0.6, "grad_norm": 0.41440197825431824, "learning_rate": 0.0005420164043901877, "loss": 1.902, "step": 18146 }, { "epoch": 0.6, "grad_norm": 0.4374389052391052, "learning_rate": 0.0005420102277528638, "loss": 1.8674, "step": 18147 }, { "epoch": 0.6, "grad_norm": 0.43260639905929565, "learning_rate": 0.0005420040508217751, "loss": 1.8855, "step": 18148 }, { "epoch": 0.6, "grad_norm": 0.4420101046562195, "learning_rate": 0.000541997873596929, "loss": 1.9349, "step": 18149 }, { "epoch": 0.6, "grad_norm": 0.43768200278282166, "learning_rate": 0.0005419916960783332, "loss": 1.842, "step": 18150 }, { "epoch": 0.6, "grad_norm": 0.41683563590049744, "learning_rate": 0.0005419855182659953, "loss": 1.9058, "step": 18151 }, { "epoch": 0.6, "grad_norm": 0.45804691314697266, "learning_rate": 0.0005419793401599225, "loss": 1.8239, "step": 18152 }, { "epoch": 0.6, "grad_norm": 0.43052470684051514, "learning_rate": 0.0005419731617601225, "loss": 1.8513, "step": 18153 }, { "epoch": 0.6, "grad_norm": 0.43038487434387207, "learning_rate": 0.0005419669830666027, "loss": 1.8551, "step": 18154 }, { "epoch": 0.6, "grad_norm": 0.4332825839519501, "learning_rate": 0.0005419608040793707, "loss": 1.84, "step": 18155 }, { "epoch": 0.6, "grad_norm": 0.42188265919685364, "learning_rate": 0.0005419546247984338, "loss": 1.7837, "step": 18156 }, { "epoch": 0.6, "grad_norm": 0.6642127633094788, "learning_rate": 0.0005419484452237998, "loss": 1.8335, "step": 18157 }, { "epoch": 0.6, "grad_norm": 0.4349176287651062, "learning_rate": 0.0005419422653554759, "loss": 1.8038, "step": 18158 }, { "epoch": 0.6, "grad_norm": 0.42532098293304443, "learning_rate": 0.0005419360851934698, "loss": 1.9274, "step": 18159 }, { "epoch": 0.6, "grad_norm": 0.43170493841171265, "learning_rate": 0.0005419299047377889, "loss": 1.8381, "step": 18160 }, { "epoch": 0.6, "grad_norm": 0.4449862837791443, "learning_rate": 0.0005419237239884407, "loss": 1.8667, "step": 18161 }, { "epoch": 0.6, "grad_norm": 0.42255187034606934, "learning_rate": 0.0005419175429454329, "loss": 1.8874, "step": 18162 }, { "epoch": 0.6, "grad_norm": 0.43407824635505676, "learning_rate": 0.0005419113616087727, "loss": 1.9116, "step": 18163 }, { "epoch": 0.6, "grad_norm": 0.4367678463459015, "learning_rate": 0.0005419051799784679, "loss": 1.859, "step": 18164 }, { "epoch": 0.6, "grad_norm": 0.4422072470188141, "learning_rate": 0.0005418989980545258, "loss": 1.8974, "step": 18165 }, { "epoch": 0.6, "grad_norm": 0.4479376971721649, "learning_rate": 0.0005418928158369539, "loss": 1.8139, "step": 18166 }, { "epoch": 0.6, "grad_norm": 0.4360787570476532, "learning_rate": 0.0005418866333257598, "loss": 1.8909, "step": 18167 }, { "epoch": 0.6, "grad_norm": 0.42229732871055603, "learning_rate": 0.0005418804505209508, "loss": 1.8382, "step": 18168 }, { "epoch": 0.6, "grad_norm": 0.42134276032447815, "learning_rate": 0.0005418742674225349, "loss": 1.8305, "step": 18169 }, { "epoch": 0.6, "grad_norm": 0.4185774624347687, "learning_rate": 0.0005418680840305191, "loss": 1.8225, "step": 18170 }, { "epoch": 0.6, "grad_norm": 0.4217720925807953, "learning_rate": 0.000541861900344911, "loss": 1.7652, "step": 18171 }, { "epoch": 0.6, "grad_norm": 0.43175578117370605, "learning_rate": 0.0005418557163657183, "loss": 1.728, "step": 18172 }, { "epoch": 0.6, "grad_norm": 0.4335349500179291, "learning_rate": 0.0005418495320929484, "loss": 1.8481, "step": 18173 }, { "epoch": 0.6, "grad_norm": 0.44412288069725037, "learning_rate": 0.0005418433475266088, "loss": 1.8478, "step": 18174 }, { "epoch": 0.6, "grad_norm": 0.41737836599349976, "learning_rate": 0.000541837162666707, "loss": 1.8287, "step": 18175 }, { "epoch": 0.6, "grad_norm": 0.42010143399238586, "learning_rate": 0.0005418309775132506, "loss": 1.7937, "step": 18176 }, { "epoch": 0.6, "grad_norm": 0.41926586627960205, "learning_rate": 0.0005418247920662469, "loss": 1.7616, "step": 18177 }, { "epoch": 0.6, "grad_norm": 0.4269782602787018, "learning_rate": 0.0005418186063257034, "loss": 1.7981, "step": 18178 }, { "epoch": 0.6, "grad_norm": 0.4330681562423706, "learning_rate": 0.0005418124202916279, "loss": 1.8606, "step": 18179 }, { "epoch": 0.6, "grad_norm": 0.4368734359741211, "learning_rate": 0.0005418062339640279, "loss": 1.8617, "step": 18180 }, { "epoch": 0.6, "grad_norm": 0.42748722434043884, "learning_rate": 0.0005418000473429106, "loss": 1.8337, "step": 18181 }, { "epoch": 0.6, "grad_norm": 0.4301142394542694, "learning_rate": 0.0005417938604282837, "loss": 1.7785, "step": 18182 }, { "epoch": 0.6, "grad_norm": 0.4164092540740967, "learning_rate": 0.0005417876732201546, "loss": 1.8274, "step": 18183 }, { "epoch": 0.6, "grad_norm": 0.42750027775764465, "learning_rate": 0.000541781485718531, "loss": 1.8699, "step": 18184 }, { "epoch": 0.61, "grad_norm": 0.4082266092300415, "learning_rate": 0.0005417752979234203, "loss": 1.7795, "step": 18185 }, { "epoch": 0.61, "grad_norm": 0.4366660714149475, "learning_rate": 0.0005417691098348299, "loss": 1.8994, "step": 18186 }, { "epoch": 0.61, "grad_norm": 0.4493752717971802, "learning_rate": 0.0005417629214527675, "loss": 1.8913, "step": 18187 }, { "epoch": 0.61, "grad_norm": 0.4362671971321106, "learning_rate": 0.0005417567327772406, "loss": 1.8823, "step": 18188 }, { "epoch": 0.61, "grad_norm": 0.949373185634613, "learning_rate": 0.0005417505438082566, "loss": 1.9448, "step": 18189 }, { "epoch": 0.61, "grad_norm": 0.47373074293136597, "learning_rate": 0.000541744354545823, "loss": 1.8405, "step": 18190 }, { "epoch": 0.61, "grad_norm": 0.4407157301902771, "learning_rate": 0.0005417381649899474, "loss": 1.8621, "step": 18191 }, { "epoch": 0.61, "grad_norm": 0.4435567259788513, "learning_rate": 0.0005417319751406373, "loss": 1.9086, "step": 18192 }, { "epoch": 0.61, "grad_norm": 0.4384385645389557, "learning_rate": 0.0005417257849979002, "loss": 1.8356, "step": 18193 }, { "epoch": 0.61, "grad_norm": 0.4354080557823181, "learning_rate": 0.0005417195945617436, "loss": 1.8081, "step": 18194 }, { "epoch": 0.61, "grad_norm": 0.4307553768157959, "learning_rate": 0.0005417134038321751, "loss": 1.8238, "step": 18195 }, { "epoch": 0.61, "grad_norm": 0.4350625276565552, "learning_rate": 0.000541707212809202, "loss": 1.8298, "step": 18196 }, { "epoch": 0.61, "grad_norm": 0.44393548369407654, "learning_rate": 0.000541701021492832, "loss": 1.876, "step": 18197 }, { "epoch": 0.61, "grad_norm": 0.43619778752326965, "learning_rate": 0.0005416948298830728, "loss": 1.8, "step": 18198 }, { "epoch": 0.61, "grad_norm": 0.451612651348114, "learning_rate": 0.0005416886379799315, "loss": 1.8736, "step": 18199 }, { "epoch": 0.61, "grad_norm": 0.4231310784816742, "learning_rate": 0.0005416824457834158, "loss": 1.819, "step": 18200 }, { "epoch": 0.61, "grad_norm": 0.4271835684776306, "learning_rate": 0.0005416762532935331, "loss": 1.8249, "step": 18201 }, { "epoch": 0.61, "grad_norm": 0.43834617733955383, "learning_rate": 0.0005416700605102912, "loss": 1.7796, "step": 18202 }, { "epoch": 0.61, "grad_norm": 0.4340671896934509, "learning_rate": 0.0005416638674336974, "loss": 1.9087, "step": 18203 }, { "epoch": 0.61, "grad_norm": 0.44188937544822693, "learning_rate": 0.0005416576740637593, "loss": 1.8492, "step": 18204 }, { "epoch": 0.61, "grad_norm": 0.4284335970878601, "learning_rate": 0.0005416514804004843, "loss": 1.8894, "step": 18205 }, { "epoch": 0.61, "grad_norm": 0.4165290296077728, "learning_rate": 0.0005416452864438802, "loss": 1.9381, "step": 18206 }, { "epoch": 0.61, "grad_norm": 0.4387080669403076, "learning_rate": 0.0005416390921939542, "loss": 1.9068, "step": 18207 }, { "epoch": 0.61, "grad_norm": 0.4325588643550873, "learning_rate": 0.000541632897650714, "loss": 1.9117, "step": 18208 }, { "epoch": 0.61, "grad_norm": 0.44127947092056274, "learning_rate": 0.000541626702814167, "loss": 1.8979, "step": 18209 }, { "epoch": 0.61, "grad_norm": 0.42873483896255493, "learning_rate": 0.0005416205076843208, "loss": 1.855, "step": 18210 }, { "epoch": 0.61, "grad_norm": 0.41528502106666565, "learning_rate": 0.0005416143122611829, "loss": 1.8261, "step": 18211 }, { "epoch": 0.61, "grad_norm": 0.4288574755191803, "learning_rate": 0.0005416081165447609, "loss": 1.8107, "step": 18212 }, { "epoch": 0.61, "grad_norm": 0.43061333894729614, "learning_rate": 0.0005416019205350622, "loss": 1.8678, "step": 18213 }, { "epoch": 0.61, "grad_norm": 0.41467469930648804, "learning_rate": 0.0005415957242320944, "loss": 1.9105, "step": 18214 }, { "epoch": 0.61, "grad_norm": 0.42704787850379944, "learning_rate": 0.0005415895276358649, "loss": 1.8599, "step": 18215 }, { "epoch": 0.61, "grad_norm": 0.4179653823375702, "learning_rate": 0.0005415833307463813, "loss": 1.848, "step": 18216 }, { "epoch": 0.61, "grad_norm": 0.42143312096595764, "learning_rate": 0.0005415771335636511, "loss": 1.8606, "step": 18217 }, { "epoch": 0.61, "grad_norm": 0.42457517981529236, "learning_rate": 0.0005415709360876821, "loss": 1.7694, "step": 18218 }, { "epoch": 0.61, "grad_norm": 0.4348103106021881, "learning_rate": 0.0005415647383184815, "loss": 1.9159, "step": 18219 }, { "epoch": 0.61, "grad_norm": 0.4372923672199249, "learning_rate": 0.0005415585402560567, "loss": 1.8681, "step": 18220 }, { "epoch": 0.61, "grad_norm": 0.4178422689437866, "learning_rate": 0.0005415523419004155, "loss": 1.8424, "step": 18221 }, { "epoch": 0.61, "grad_norm": 0.4187041223049164, "learning_rate": 0.0005415461432515654, "loss": 1.856, "step": 18222 }, { "epoch": 0.61, "grad_norm": 0.43419352173805237, "learning_rate": 0.0005415399443095139, "loss": 1.848, "step": 18223 }, { "epoch": 0.61, "grad_norm": 0.4466956853866577, "learning_rate": 0.0005415337450742685, "loss": 1.9127, "step": 18224 }, { "epoch": 0.61, "grad_norm": 0.41850772500038147, "learning_rate": 0.0005415275455458367, "loss": 1.8371, "step": 18225 }, { "epoch": 0.61, "grad_norm": 0.42338237166404724, "learning_rate": 0.0005415213457242259, "loss": 1.841, "step": 18226 }, { "epoch": 0.61, "grad_norm": 0.4390296936035156, "learning_rate": 0.000541515145609444, "loss": 1.8768, "step": 18227 }, { "epoch": 0.61, "grad_norm": 0.42534875869750977, "learning_rate": 0.0005415089452014982, "loss": 1.8244, "step": 18228 }, { "epoch": 0.61, "grad_norm": 0.43514853715896606, "learning_rate": 0.0005415027445003961, "loss": 1.8571, "step": 18229 }, { "epoch": 0.61, "grad_norm": 0.43772849440574646, "learning_rate": 0.0005414965435061453, "loss": 1.8935, "step": 18230 }, { "epoch": 0.61, "grad_norm": 0.4261346757411957, "learning_rate": 0.0005414903422187533, "loss": 1.8349, "step": 18231 }, { "epoch": 0.61, "grad_norm": 0.42570510506629944, "learning_rate": 0.0005414841406382275, "loss": 1.8029, "step": 18232 }, { "epoch": 0.61, "grad_norm": 0.4295739233493805, "learning_rate": 0.0005414779387645757, "loss": 1.8272, "step": 18233 }, { "epoch": 0.61, "grad_norm": 0.44024407863616943, "learning_rate": 0.0005414717365978051, "loss": 1.8702, "step": 18234 }, { "epoch": 0.61, "grad_norm": 0.4433479905128479, "learning_rate": 0.0005414655341379234, "loss": 1.8301, "step": 18235 }, { "epoch": 0.61, "grad_norm": 0.5179204344749451, "learning_rate": 0.0005414593313849383, "loss": 1.7941, "step": 18236 }, { "epoch": 0.61, "grad_norm": 0.4410650432109833, "learning_rate": 0.000541453128338857, "loss": 1.877, "step": 18237 }, { "epoch": 0.61, "grad_norm": 0.44072553515434265, "learning_rate": 0.0005414469249996872, "loss": 1.888, "step": 18238 }, { "epoch": 0.61, "grad_norm": 0.42231228947639465, "learning_rate": 0.0005414407213674363, "loss": 1.8204, "step": 18239 }, { "epoch": 0.61, "grad_norm": 0.43496689200401306, "learning_rate": 0.0005414345174421121, "loss": 1.884, "step": 18240 }, { "epoch": 0.61, "grad_norm": 0.4260518252849579, "learning_rate": 0.000541428313223722, "loss": 1.8848, "step": 18241 }, { "epoch": 0.61, "grad_norm": 0.44401657581329346, "learning_rate": 0.0005414221087122733, "loss": 1.973, "step": 18242 }, { "epoch": 0.61, "grad_norm": 0.43801143765449524, "learning_rate": 0.0005414159039077739, "loss": 1.8313, "step": 18243 }, { "epoch": 0.61, "grad_norm": 0.44096478819847107, "learning_rate": 0.0005414096988102311, "loss": 1.8665, "step": 18244 }, { "epoch": 0.61, "grad_norm": 0.4362410306930542, "learning_rate": 0.0005414034934196524, "loss": 1.8375, "step": 18245 }, { "epoch": 0.61, "grad_norm": 0.4303131699562073, "learning_rate": 0.0005413972877360456, "loss": 1.8028, "step": 18246 }, { "epoch": 0.61, "grad_norm": 0.42890891432762146, "learning_rate": 0.000541391081759418, "loss": 1.8333, "step": 18247 }, { "epoch": 0.61, "grad_norm": 0.43738487362861633, "learning_rate": 0.0005413848754897773, "loss": 1.8882, "step": 18248 }, { "epoch": 0.61, "grad_norm": 0.43946439027786255, "learning_rate": 0.0005413786689271307, "loss": 1.7828, "step": 18249 }, { "epoch": 0.61, "grad_norm": 0.44256728887557983, "learning_rate": 0.0005413724620714861, "loss": 1.8344, "step": 18250 }, { "epoch": 0.61, "grad_norm": 0.4515180289745331, "learning_rate": 0.0005413662549228509, "loss": 1.8785, "step": 18251 }, { "epoch": 0.61, "grad_norm": 0.42922109365463257, "learning_rate": 0.0005413600474812327, "loss": 1.7835, "step": 18252 }, { "epoch": 0.61, "grad_norm": 0.4237455427646637, "learning_rate": 0.0005413538397466388, "loss": 1.8199, "step": 18253 }, { "epoch": 0.61, "grad_norm": 0.45501136779785156, "learning_rate": 0.000541347631719077, "loss": 1.9272, "step": 18254 }, { "epoch": 0.61, "grad_norm": 0.42206791043281555, "learning_rate": 0.0005413414233985547, "loss": 1.8196, "step": 18255 }, { "epoch": 0.61, "grad_norm": 0.45392611622810364, "learning_rate": 0.0005413352147850795, "loss": 1.9004, "step": 18256 }, { "epoch": 0.61, "grad_norm": 0.45382535457611084, "learning_rate": 0.0005413290058786588, "loss": 1.864, "step": 18257 }, { "epoch": 0.61, "grad_norm": 0.43648627400398254, "learning_rate": 0.0005413227966793003, "loss": 1.8482, "step": 18258 }, { "epoch": 0.61, "grad_norm": 0.4313012659549713, "learning_rate": 0.0005413165871870116, "loss": 1.8783, "step": 18259 }, { "epoch": 0.61, "grad_norm": 0.4242250919342041, "learning_rate": 0.0005413103774018001, "loss": 1.8474, "step": 18260 }, { "epoch": 0.61, "grad_norm": 0.43497806787490845, "learning_rate": 0.0005413041673236732, "loss": 1.7352, "step": 18261 }, { "epoch": 0.61, "grad_norm": 0.4386000633239746, "learning_rate": 0.0005412979569526387, "loss": 1.8417, "step": 18262 }, { "epoch": 0.61, "grad_norm": 0.429080992937088, "learning_rate": 0.000541291746288704, "loss": 1.8402, "step": 18263 }, { "epoch": 0.61, "grad_norm": 0.41860565543174744, "learning_rate": 0.0005412855353318768, "loss": 1.8881, "step": 18264 }, { "epoch": 0.61, "grad_norm": 0.43461859226226807, "learning_rate": 0.0005412793240821643, "loss": 1.8733, "step": 18265 }, { "epoch": 0.61, "grad_norm": 0.4296170175075531, "learning_rate": 0.0005412731125395744, "loss": 1.8332, "step": 18266 }, { "epoch": 0.61, "grad_norm": 0.4246489107608795, "learning_rate": 0.0005412669007041145, "loss": 1.8861, "step": 18267 }, { "epoch": 0.61, "grad_norm": 0.42309483885765076, "learning_rate": 0.000541260688575792, "loss": 1.8831, "step": 18268 }, { "epoch": 0.61, "grad_norm": 0.43723830580711365, "learning_rate": 0.0005412544761546147, "loss": 1.9286, "step": 18269 }, { "epoch": 0.61, "grad_norm": 0.4343549609184265, "learning_rate": 0.0005412482634405899, "loss": 1.8038, "step": 18270 }, { "epoch": 0.61, "grad_norm": 0.4234847128391266, "learning_rate": 0.0005412420504337254, "loss": 1.8062, "step": 18271 }, { "epoch": 0.61, "grad_norm": 0.41805627942085266, "learning_rate": 0.0005412358371340285, "loss": 1.8535, "step": 18272 }, { "epoch": 0.61, "grad_norm": 0.42476245760917664, "learning_rate": 0.0005412296235415068, "loss": 1.8088, "step": 18273 }, { "epoch": 0.61, "grad_norm": 0.44197168946266174, "learning_rate": 0.000541223409656168, "loss": 1.9056, "step": 18274 }, { "epoch": 0.61, "grad_norm": 0.4195595681667328, "learning_rate": 0.0005412171954780194, "loss": 1.7871, "step": 18275 }, { "epoch": 0.61, "grad_norm": 0.4237626791000366, "learning_rate": 0.0005412109810070687, "loss": 1.8325, "step": 18276 }, { "epoch": 0.61, "grad_norm": 0.47660207748413086, "learning_rate": 0.0005412047662433236, "loss": 1.8647, "step": 18277 }, { "epoch": 0.61, "grad_norm": 0.41843128204345703, "learning_rate": 0.0005411985511867912, "loss": 1.8437, "step": 18278 }, { "epoch": 0.61, "grad_norm": 0.43246278166770935, "learning_rate": 0.0005411923358374794, "loss": 1.8973, "step": 18279 }, { "epoch": 0.61, "grad_norm": 0.4448755383491516, "learning_rate": 0.0005411861201953956, "loss": 1.8833, "step": 18280 }, { "epoch": 0.61, "grad_norm": 0.4269198775291443, "learning_rate": 0.0005411799042605474, "loss": 1.8837, "step": 18281 }, { "epoch": 0.61, "grad_norm": 0.44227975606918335, "learning_rate": 0.0005411736880329423, "loss": 1.8848, "step": 18282 }, { "epoch": 0.61, "grad_norm": 0.41722747683525085, "learning_rate": 0.000541167471512588, "loss": 1.7716, "step": 18283 }, { "epoch": 0.61, "grad_norm": 0.423836350440979, "learning_rate": 0.0005411612546994918, "loss": 1.8613, "step": 18284 }, { "epoch": 0.61, "grad_norm": 0.4454798400402069, "learning_rate": 0.0005411550375936615, "loss": 1.8919, "step": 18285 }, { "epoch": 0.61, "grad_norm": 0.4352746307849884, "learning_rate": 0.0005411488201951044, "loss": 1.8648, "step": 18286 }, { "epoch": 0.61, "grad_norm": 0.4352804720401764, "learning_rate": 0.0005411426025038282, "loss": 1.9069, "step": 18287 }, { "epoch": 0.61, "grad_norm": 0.4583914279937744, "learning_rate": 0.0005411363845198403, "loss": 1.8538, "step": 18288 }, { "epoch": 0.61, "grad_norm": 0.413102924823761, "learning_rate": 0.0005411301662431486, "loss": 1.7641, "step": 18289 }, { "epoch": 0.61, "grad_norm": 0.4260675013065338, "learning_rate": 0.0005411239476737602, "loss": 1.8777, "step": 18290 }, { "epoch": 0.61, "grad_norm": 0.43636274337768555, "learning_rate": 0.000541117728811683, "loss": 1.8041, "step": 18291 }, { "epoch": 0.61, "grad_norm": 0.4451645016670227, "learning_rate": 0.0005411115096569243, "loss": 1.9194, "step": 18292 }, { "epoch": 0.61, "grad_norm": 0.5404552221298218, "learning_rate": 0.0005411052902094918, "loss": 1.9156, "step": 18293 }, { "epoch": 0.61, "grad_norm": 0.44493648409843445, "learning_rate": 0.000541099070469393, "loss": 1.7698, "step": 18294 }, { "epoch": 0.61, "grad_norm": 0.42257803678512573, "learning_rate": 0.0005410928504366355, "loss": 1.9258, "step": 18295 }, { "epoch": 0.61, "grad_norm": 0.4325559139251709, "learning_rate": 0.0005410866301112267, "loss": 1.8004, "step": 18296 }, { "epoch": 0.61, "grad_norm": 0.4139001667499542, "learning_rate": 0.0005410804094931744, "loss": 1.8617, "step": 18297 }, { "epoch": 0.61, "grad_norm": 0.4238743484020233, "learning_rate": 0.0005410741885824858, "loss": 1.9314, "step": 18298 }, { "epoch": 0.61, "grad_norm": 0.4271613359451294, "learning_rate": 0.0005410679673791689, "loss": 1.8147, "step": 18299 }, { "epoch": 0.61, "grad_norm": 0.41945910453796387, "learning_rate": 0.0005410617458832307, "loss": 1.854, "step": 18300 }, { "epoch": 0.61, "grad_norm": 0.4236515462398529, "learning_rate": 0.0005410555240946793, "loss": 1.7758, "step": 18301 }, { "epoch": 0.61, "grad_norm": 0.4286212921142578, "learning_rate": 0.0005410493020135219, "loss": 1.863, "step": 18302 }, { "epoch": 0.61, "grad_norm": 0.46178728342056274, "learning_rate": 0.0005410430796397662, "loss": 1.8411, "step": 18303 }, { "epoch": 0.61, "grad_norm": 0.43149667978286743, "learning_rate": 0.0005410368569734197, "loss": 1.9211, "step": 18304 }, { "epoch": 0.61, "grad_norm": 0.4327675402164459, "learning_rate": 0.0005410306340144901, "loss": 1.8865, "step": 18305 }, { "epoch": 0.61, "grad_norm": 0.44040432572364807, "learning_rate": 0.0005410244107629847, "loss": 1.8785, "step": 18306 }, { "epoch": 0.61, "grad_norm": 0.4462635815143585, "learning_rate": 0.0005410181872189113, "loss": 1.8315, "step": 18307 }, { "epoch": 0.61, "grad_norm": 0.4150193929672241, "learning_rate": 0.0005410119633822772, "loss": 1.8313, "step": 18308 }, { "epoch": 0.61, "grad_norm": 0.4297008812427521, "learning_rate": 0.0005410057392530901, "loss": 1.8728, "step": 18309 }, { "epoch": 0.61, "grad_norm": 0.43448159098625183, "learning_rate": 0.0005409995148313575, "loss": 1.8666, "step": 18310 }, { "epoch": 0.61, "grad_norm": 0.4282359480857849, "learning_rate": 0.0005409932901170871, "loss": 1.8079, "step": 18311 }, { "epoch": 0.61, "grad_norm": 0.4214501678943634, "learning_rate": 0.0005409870651102862, "loss": 1.8563, "step": 18312 }, { "epoch": 0.61, "grad_norm": 0.4402005970478058, "learning_rate": 0.0005409808398109627, "loss": 1.9321, "step": 18313 }, { "epoch": 0.61, "grad_norm": 0.4553736746311188, "learning_rate": 0.0005409746142191239, "loss": 1.9306, "step": 18314 }, { "epoch": 0.61, "grad_norm": 0.4191625118255615, "learning_rate": 0.0005409683883347774, "loss": 1.8497, "step": 18315 }, { "epoch": 0.61, "grad_norm": 0.4311486482620239, "learning_rate": 0.0005409621621579307, "loss": 1.889, "step": 18316 }, { "epoch": 0.61, "grad_norm": 0.426749587059021, "learning_rate": 0.0005409559356885917, "loss": 1.8882, "step": 18317 }, { "epoch": 0.61, "grad_norm": 0.45368295907974243, "learning_rate": 0.0005409497089267674, "loss": 1.8863, "step": 18318 }, { "epoch": 0.61, "grad_norm": 0.45475804805755615, "learning_rate": 0.0005409434818724659, "loss": 1.8429, "step": 18319 }, { "epoch": 0.61, "grad_norm": 0.4246087670326233, "learning_rate": 0.0005409372545256944, "loss": 1.83, "step": 18320 }, { "epoch": 0.61, "grad_norm": 0.6265209317207336, "learning_rate": 0.0005409310268864605, "loss": 1.8385, "step": 18321 }, { "epoch": 0.61, "grad_norm": 0.45370325446128845, "learning_rate": 0.0005409247989547719, "loss": 1.8021, "step": 18322 }, { "epoch": 0.61, "grad_norm": 0.4402036964893341, "learning_rate": 0.0005409185707306362, "loss": 1.8697, "step": 18323 }, { "epoch": 0.61, "grad_norm": 0.4728141725063324, "learning_rate": 0.0005409123422140607, "loss": 1.865, "step": 18324 }, { "epoch": 0.61, "grad_norm": 0.41359588503837585, "learning_rate": 0.0005409061134050532, "loss": 1.7878, "step": 18325 }, { "epoch": 0.61, "grad_norm": 0.4334338307380676, "learning_rate": 0.0005408998843036212, "loss": 1.8426, "step": 18326 }, { "epoch": 0.61, "grad_norm": 0.4177301526069641, "learning_rate": 0.0005408936549097721, "loss": 1.7979, "step": 18327 }, { "epoch": 0.61, "grad_norm": 0.4379143714904785, "learning_rate": 0.0005408874252235137, "loss": 1.7858, "step": 18328 }, { "epoch": 0.61, "grad_norm": 0.43299025297164917, "learning_rate": 0.0005408811952448534, "loss": 1.8453, "step": 18329 }, { "epoch": 0.61, "grad_norm": 0.4292519986629486, "learning_rate": 0.0005408749649737989, "loss": 1.8664, "step": 18330 }, { "epoch": 0.61, "grad_norm": 0.421565443277359, "learning_rate": 0.0005408687344103576, "loss": 1.8469, "step": 18331 }, { "epoch": 0.61, "grad_norm": 0.41831740736961365, "learning_rate": 0.0005408625035545372, "loss": 1.7774, "step": 18332 }, { "epoch": 0.61, "grad_norm": 0.430968701839447, "learning_rate": 0.0005408562724063452, "loss": 1.815, "step": 18333 }, { "epoch": 0.61, "grad_norm": 0.4599558115005493, "learning_rate": 0.0005408500409657892, "loss": 1.8801, "step": 18334 }, { "epoch": 0.61, "grad_norm": 0.4246785044670105, "learning_rate": 0.0005408438092328766, "loss": 1.8281, "step": 18335 }, { "epoch": 0.61, "grad_norm": 0.43205130100250244, "learning_rate": 0.0005408375772076153, "loss": 1.8256, "step": 18336 }, { "epoch": 0.61, "grad_norm": 0.4294445216655731, "learning_rate": 0.0005408313448900126, "loss": 1.8101, "step": 18337 }, { "epoch": 0.61, "grad_norm": 0.44520407915115356, "learning_rate": 0.0005408251122800761, "loss": 1.8836, "step": 18338 }, { "epoch": 0.61, "grad_norm": 0.42920154333114624, "learning_rate": 0.0005408188793778134, "loss": 1.7906, "step": 18339 }, { "epoch": 0.61, "grad_norm": 0.4365840554237366, "learning_rate": 0.000540812646183232, "loss": 1.9195, "step": 18340 }, { "epoch": 0.61, "grad_norm": 0.45056629180908203, "learning_rate": 0.0005408064126963396, "loss": 1.9173, "step": 18341 }, { "epoch": 0.61, "grad_norm": 0.46050354838371277, "learning_rate": 0.0005408001789171437, "loss": 1.8145, "step": 18342 }, { "epoch": 0.61, "grad_norm": 0.4165479838848114, "learning_rate": 0.0005407939448456518, "loss": 1.7977, "step": 18343 }, { "epoch": 0.61, "grad_norm": 0.43452978134155273, "learning_rate": 0.0005407877104818717, "loss": 1.8587, "step": 18344 }, { "epoch": 0.61, "grad_norm": 0.4291161298751831, "learning_rate": 0.0005407814758258107, "loss": 1.8431, "step": 18345 }, { "epoch": 0.61, "grad_norm": 0.44024601578712463, "learning_rate": 0.0005407752408774765, "loss": 1.9088, "step": 18346 }, { "epoch": 0.61, "grad_norm": 0.4161055386066437, "learning_rate": 0.0005407690056368766, "loss": 1.8899, "step": 18347 }, { "epoch": 0.61, "grad_norm": 0.4288797974586487, "learning_rate": 0.0005407627701040185, "loss": 1.8878, "step": 18348 }, { "epoch": 0.61, "grad_norm": 0.4390718340873718, "learning_rate": 0.0005407565342789099, "loss": 1.8388, "step": 18349 }, { "epoch": 0.61, "grad_norm": 0.43982717394828796, "learning_rate": 0.0005407502981615585, "loss": 1.8719, "step": 18350 }, { "epoch": 0.61, "grad_norm": 0.43429699540138245, "learning_rate": 0.0005407440617519716, "loss": 2.0146, "step": 18351 }, { "epoch": 0.61, "grad_norm": 0.4396451413631439, "learning_rate": 0.0005407378250501569, "loss": 1.9012, "step": 18352 }, { "epoch": 0.61, "grad_norm": 0.44051602482795715, "learning_rate": 0.0005407315880561219, "loss": 1.8293, "step": 18353 }, { "epoch": 0.61, "grad_norm": 0.44050875306129456, "learning_rate": 0.0005407253507698743, "loss": 1.867, "step": 18354 }, { "epoch": 0.61, "grad_norm": 0.4281065762042999, "learning_rate": 0.0005407191131914216, "loss": 1.8247, "step": 18355 }, { "epoch": 0.61, "grad_norm": 0.42492932081222534, "learning_rate": 0.0005407128753207713, "loss": 1.8398, "step": 18356 }, { "epoch": 0.61, "grad_norm": 0.44532686471939087, "learning_rate": 0.0005407066371579311, "loss": 1.8737, "step": 18357 }, { "epoch": 0.61, "grad_norm": 0.4299968183040619, "learning_rate": 0.0005407003987029084, "loss": 1.8519, "step": 18358 }, { "epoch": 0.61, "grad_norm": 0.43759286403656006, "learning_rate": 0.000540694159955711, "loss": 1.8765, "step": 18359 }, { "epoch": 0.61, "grad_norm": 0.41909655928611755, "learning_rate": 0.0005406879209163462, "loss": 1.7644, "step": 18360 }, { "epoch": 0.61, "grad_norm": 0.42355358600616455, "learning_rate": 0.000540681681584822, "loss": 1.88, "step": 18361 }, { "epoch": 0.61, "grad_norm": 0.42866480350494385, "learning_rate": 0.0005406754419611454, "loss": 1.8686, "step": 18362 }, { "epoch": 0.61, "grad_norm": 0.4300248920917511, "learning_rate": 0.0005406692020453245, "loss": 1.8708, "step": 18363 }, { "epoch": 0.61, "grad_norm": 0.4270619750022888, "learning_rate": 0.0005406629618373665, "loss": 1.886, "step": 18364 }, { "epoch": 0.61, "grad_norm": 0.42524030804634094, "learning_rate": 0.0005406567213372792, "loss": 1.804, "step": 18365 }, { "epoch": 0.61, "grad_norm": 0.4154144525527954, "learning_rate": 0.0005406504805450701, "loss": 1.8295, "step": 18366 }, { "epoch": 0.61, "grad_norm": 0.4263312518596649, "learning_rate": 0.0005406442394607469, "loss": 1.8632, "step": 18367 }, { "epoch": 0.61, "grad_norm": 0.4286877512931824, "learning_rate": 0.0005406379980843168, "loss": 1.8641, "step": 18368 }, { "epoch": 0.61, "grad_norm": 0.4258524775505066, "learning_rate": 0.0005406317564157878, "loss": 1.8218, "step": 18369 }, { "epoch": 0.61, "grad_norm": 0.41134703159332275, "learning_rate": 0.0005406255144551672, "loss": 1.8905, "step": 18370 }, { "epoch": 0.61, "grad_norm": 0.4373665750026703, "learning_rate": 0.0005406192722024627, "loss": 1.8665, "step": 18371 }, { "epoch": 0.61, "grad_norm": 0.44545331597328186, "learning_rate": 0.0005406130296576819, "loss": 1.7593, "step": 18372 }, { "epoch": 0.61, "grad_norm": 0.4240911304950714, "learning_rate": 0.0005406067868208323, "loss": 1.804, "step": 18373 }, { "epoch": 0.61, "grad_norm": 0.5540503263473511, "learning_rate": 0.0005406005436919215, "loss": 1.9516, "step": 18374 }, { "epoch": 0.61, "grad_norm": 0.43010687828063965, "learning_rate": 0.0005405943002709571, "loss": 1.8624, "step": 18375 }, { "epoch": 0.61, "grad_norm": 0.4317372739315033, "learning_rate": 0.0005405880565579467, "loss": 1.8435, "step": 18376 }, { "epoch": 0.61, "grad_norm": 0.4321932792663574, "learning_rate": 0.0005405818125528978, "loss": 1.8417, "step": 18377 }, { "epoch": 0.61, "grad_norm": 0.42884349822998047, "learning_rate": 0.0005405755682558181, "loss": 1.8247, "step": 18378 }, { "epoch": 0.61, "grad_norm": 0.43941256403923035, "learning_rate": 0.0005405693236667151, "loss": 1.9054, "step": 18379 }, { "epoch": 0.61, "grad_norm": 0.41971805691719055, "learning_rate": 0.0005405630787855964, "loss": 1.8942, "step": 18380 }, { "epoch": 0.61, "grad_norm": 0.4317457973957062, "learning_rate": 0.0005405568336124695, "loss": 1.8081, "step": 18381 }, { "epoch": 0.61, "grad_norm": 0.43209317326545715, "learning_rate": 0.000540550588147342, "loss": 1.7552, "step": 18382 }, { "epoch": 0.61, "grad_norm": 0.40832358598709106, "learning_rate": 0.0005405443423902217, "loss": 1.8934, "step": 18383 }, { "epoch": 0.61, "grad_norm": 0.4152403175830841, "learning_rate": 0.0005405380963411159, "loss": 1.8384, "step": 18384 }, { "epoch": 0.61, "grad_norm": 0.42012253403663635, "learning_rate": 0.0005405318500000323, "loss": 1.7815, "step": 18385 }, { "epoch": 0.61, "grad_norm": 0.42757001519203186, "learning_rate": 0.0005405256033669785, "loss": 1.8447, "step": 18386 }, { "epoch": 0.61, "grad_norm": 0.42800524830818176, "learning_rate": 0.000540519356441962, "loss": 1.8462, "step": 18387 }, { "epoch": 0.61, "grad_norm": 0.41705313324928284, "learning_rate": 0.0005405131092249904, "loss": 1.8108, "step": 18388 }, { "epoch": 0.61, "grad_norm": 0.43150991201400757, "learning_rate": 0.0005405068617160715, "loss": 1.8162, "step": 18389 }, { "epoch": 0.61, "grad_norm": 0.41827619075775146, "learning_rate": 0.0005405006139152126, "loss": 1.7902, "step": 18390 }, { "epoch": 0.61, "grad_norm": 0.42774689197540283, "learning_rate": 0.0005404943658224214, "loss": 1.8486, "step": 18391 }, { "epoch": 0.61, "grad_norm": 0.41810595989227295, "learning_rate": 0.0005404881174377055, "loss": 1.761, "step": 18392 }, { "epoch": 0.61, "grad_norm": 0.42352885007858276, "learning_rate": 0.0005404818687610724, "loss": 1.8246, "step": 18393 }, { "epoch": 0.61, "grad_norm": 0.4582543671131134, "learning_rate": 0.0005404756197925298, "loss": 1.8336, "step": 18394 }, { "epoch": 0.61, "grad_norm": 0.41416987776756287, "learning_rate": 0.0005404693705320852, "loss": 1.8468, "step": 18395 }, { "epoch": 0.61, "grad_norm": 0.43412405252456665, "learning_rate": 0.0005404631209797462, "loss": 1.8761, "step": 18396 }, { "epoch": 0.61, "grad_norm": 0.42363739013671875, "learning_rate": 0.0005404568711355205, "loss": 1.8483, "step": 18397 }, { "epoch": 0.61, "grad_norm": 0.4200616180896759, "learning_rate": 0.0005404506209994155, "loss": 1.8822, "step": 18398 }, { "epoch": 0.61, "grad_norm": 0.43801382184028625, "learning_rate": 0.0005404443705714388, "loss": 1.9132, "step": 18399 }, { "epoch": 0.61, "grad_norm": 0.42437925934791565, "learning_rate": 0.0005404381198515982, "loss": 1.9034, "step": 18400 }, { "epoch": 0.61, "grad_norm": 0.4264315366744995, "learning_rate": 0.0005404318688399011, "loss": 1.7598, "step": 18401 }, { "epoch": 0.61, "grad_norm": 0.4119463264942169, "learning_rate": 0.0005404256175363551, "loss": 1.8638, "step": 18402 }, { "epoch": 0.61, "grad_norm": 0.42227888107299805, "learning_rate": 0.000540419365940968, "loss": 1.8509, "step": 18403 }, { "epoch": 0.61, "grad_norm": 0.41472089290618896, "learning_rate": 0.000540413114053747, "loss": 1.8354, "step": 18404 }, { "epoch": 0.61, "grad_norm": 0.4311777949333191, "learning_rate": 0.0005404068618747, "loss": 1.8904, "step": 18405 }, { "epoch": 0.61, "grad_norm": 0.41893458366394043, "learning_rate": 0.0005404006094038345, "loss": 1.8668, "step": 18406 }, { "epoch": 0.61, "grad_norm": 0.4249982237815857, "learning_rate": 0.000540394356641158, "loss": 1.8354, "step": 18407 }, { "epoch": 0.61, "grad_norm": 0.4989071190357208, "learning_rate": 0.0005403881035866782, "loss": 1.8879, "step": 18408 }, { "epoch": 0.61, "grad_norm": 0.4303048253059387, "learning_rate": 0.0005403818502404028, "loss": 1.8482, "step": 18409 }, { "epoch": 0.61, "grad_norm": 0.42401811480522156, "learning_rate": 0.0005403755966023391, "loss": 1.8252, "step": 18410 }, { "epoch": 0.61, "grad_norm": 0.4279913306236267, "learning_rate": 0.0005403693426724948, "loss": 1.7401, "step": 18411 }, { "epoch": 0.61, "grad_norm": 0.42229047417640686, "learning_rate": 0.0005403630884508777, "loss": 1.9105, "step": 18412 }, { "epoch": 0.61, "grad_norm": 0.42686617374420166, "learning_rate": 0.0005403568339374951, "loss": 1.8574, "step": 18413 }, { "epoch": 0.61, "grad_norm": 0.4379846751689911, "learning_rate": 0.0005403505791323547, "loss": 1.8179, "step": 18414 }, { "epoch": 0.61, "grad_norm": 0.4310215413570404, "learning_rate": 0.0005403443240354643, "loss": 1.766, "step": 18415 }, { "epoch": 0.61, "grad_norm": 0.45199471712112427, "learning_rate": 0.0005403380686468312, "loss": 1.8787, "step": 18416 }, { "epoch": 0.61, "grad_norm": 0.42566362023353577, "learning_rate": 0.000540331812966463, "loss": 1.8097, "step": 18417 }, { "epoch": 0.61, "grad_norm": 0.4307243525981903, "learning_rate": 0.0005403255569943674, "loss": 1.8399, "step": 18418 }, { "epoch": 0.61, "grad_norm": 0.43321657180786133, "learning_rate": 0.000540319300730552, "loss": 1.9013, "step": 18419 }, { "epoch": 0.61, "grad_norm": 0.4307057559490204, "learning_rate": 0.0005403130441750245, "loss": 1.7693, "step": 18420 }, { "epoch": 0.61, "grad_norm": 0.42801767587661743, "learning_rate": 0.0005403067873277923, "loss": 1.8792, "step": 18421 }, { "epoch": 0.61, "grad_norm": 0.4292723536491394, "learning_rate": 0.000540300530188863, "loss": 1.7544, "step": 18422 }, { "epoch": 0.61, "grad_norm": 0.4187231659889221, "learning_rate": 0.0005402942727582445, "loss": 1.7588, "step": 18423 }, { "epoch": 0.61, "grad_norm": 0.43554627895355225, "learning_rate": 0.0005402880150359438, "loss": 1.8813, "step": 18424 }, { "epoch": 0.61, "grad_norm": 0.41525933146476746, "learning_rate": 0.0005402817570219691, "loss": 1.8594, "step": 18425 }, { "epoch": 0.61, "grad_norm": 0.43279504776000977, "learning_rate": 0.0005402754987163278, "loss": 1.7786, "step": 18426 }, { "epoch": 0.61, "grad_norm": 0.4423847794532776, "learning_rate": 0.0005402692401190272, "loss": 1.8434, "step": 18427 }, { "epoch": 0.61, "grad_norm": 0.43053126335144043, "learning_rate": 0.0005402629812300754, "loss": 1.8451, "step": 18428 }, { "epoch": 0.61, "grad_norm": 0.45105230808258057, "learning_rate": 0.0005402567220494796, "loss": 1.8765, "step": 18429 }, { "epoch": 0.61, "grad_norm": 0.4350469410419464, "learning_rate": 0.0005402504625772475, "loss": 1.8156, "step": 18430 }, { "epoch": 0.61, "grad_norm": 0.4216930568218231, "learning_rate": 0.0005402442028133869, "loss": 1.851, "step": 18431 }, { "epoch": 0.61, "grad_norm": 0.4795244634151459, "learning_rate": 0.0005402379427579051, "loss": 1.7948, "step": 18432 }, { "epoch": 0.61, "grad_norm": 0.44853827357292175, "learning_rate": 0.0005402316824108099, "loss": 1.8042, "step": 18433 }, { "epoch": 0.61, "grad_norm": 0.41545844078063965, "learning_rate": 0.0005402254217721087, "loss": 1.8463, "step": 18434 }, { "epoch": 0.61, "grad_norm": 0.4460448622703552, "learning_rate": 0.0005402191608418095, "loss": 1.8441, "step": 18435 }, { "epoch": 0.61, "grad_norm": 0.41756653785705566, "learning_rate": 0.0005402128996199194, "loss": 1.8367, "step": 18436 }, { "epoch": 0.61, "grad_norm": 0.43722930550575256, "learning_rate": 0.0005402066381064464, "loss": 1.8631, "step": 18437 }, { "epoch": 0.61, "grad_norm": 0.437592476606369, "learning_rate": 0.0005402003763013978, "loss": 1.8544, "step": 18438 }, { "epoch": 0.61, "grad_norm": 0.4454730451107025, "learning_rate": 0.0005401941142047813, "loss": 1.8785, "step": 18439 }, { "epoch": 0.61, "grad_norm": 0.43762630224227905, "learning_rate": 0.0005401878518166047, "loss": 1.8341, "step": 18440 }, { "epoch": 0.61, "grad_norm": 0.4291559159755707, "learning_rate": 0.0005401815891368753, "loss": 1.7653, "step": 18441 }, { "epoch": 0.61, "grad_norm": 0.4406738877296448, "learning_rate": 0.0005401753261656009, "loss": 1.8686, "step": 18442 }, { "epoch": 0.61, "grad_norm": 0.4289017617702484, "learning_rate": 0.0005401690629027891, "loss": 1.8634, "step": 18443 }, { "epoch": 0.61, "grad_norm": 0.4325636029243469, "learning_rate": 0.0005401627993484473, "loss": 1.8979, "step": 18444 }, { "epoch": 0.61, "grad_norm": 0.4225277900695801, "learning_rate": 0.0005401565355025833, "loss": 1.8919, "step": 18445 }, { "epoch": 0.61, "grad_norm": 0.44141408801078796, "learning_rate": 0.0005401502713652047, "loss": 1.8511, "step": 18446 }, { "epoch": 0.61, "grad_norm": 0.4652163088321686, "learning_rate": 0.000540144006936319, "loss": 1.8395, "step": 18447 }, { "epoch": 0.61, "grad_norm": 0.4359642565250397, "learning_rate": 0.000540137742215934, "loss": 1.8954, "step": 18448 }, { "epoch": 0.61, "grad_norm": 0.4727412164211273, "learning_rate": 0.000540131477204057, "loss": 1.8964, "step": 18449 }, { "epoch": 0.61, "grad_norm": 0.4254462718963623, "learning_rate": 0.0005401252119006958, "loss": 1.8258, "step": 18450 }, { "epoch": 0.61, "grad_norm": 0.42252281308174133, "learning_rate": 0.0005401189463058581, "loss": 1.8468, "step": 18451 }, { "epoch": 0.61, "grad_norm": 0.4623674154281616, "learning_rate": 0.0005401126804195513, "loss": 1.8256, "step": 18452 }, { "epoch": 0.61, "grad_norm": 0.4262453615665436, "learning_rate": 0.000540106414241783, "loss": 1.8261, "step": 18453 }, { "epoch": 0.61, "grad_norm": 0.42406681180000305, "learning_rate": 0.000540100147772561, "loss": 1.7863, "step": 18454 }, { "epoch": 0.61, "grad_norm": 0.4239937365055084, "learning_rate": 0.0005400938810118927, "loss": 1.8276, "step": 18455 }, { "epoch": 0.61, "grad_norm": 0.4246024191379547, "learning_rate": 0.0005400876139597859, "loss": 1.741, "step": 18456 }, { "epoch": 0.61, "grad_norm": 0.42363399267196655, "learning_rate": 0.0005400813466162481, "loss": 1.8239, "step": 18457 }, { "epoch": 0.61, "grad_norm": 0.4415356516838074, "learning_rate": 0.0005400750789812868, "loss": 1.8392, "step": 18458 }, { "epoch": 0.61, "grad_norm": 0.4428830146789551, "learning_rate": 0.0005400688110549099, "loss": 1.8831, "step": 18459 }, { "epoch": 0.61, "grad_norm": 0.4196144640445709, "learning_rate": 0.0005400625428371247, "loss": 1.8562, "step": 18460 }, { "epoch": 0.61, "grad_norm": 0.4252186417579651, "learning_rate": 0.000540056274327939, "loss": 1.8177, "step": 18461 }, { "epoch": 0.61, "grad_norm": 0.4465648829936981, "learning_rate": 0.0005400500055273603, "loss": 1.8431, "step": 18462 }, { "epoch": 0.61, "grad_norm": 0.42594248056411743, "learning_rate": 0.0005400437364353963, "loss": 1.7984, "step": 18463 }, { "epoch": 0.61, "grad_norm": 0.4386802315711975, "learning_rate": 0.0005400374670520546, "loss": 1.8312, "step": 18464 }, { "epoch": 0.61, "grad_norm": 0.42476508021354675, "learning_rate": 0.0005400311973773428, "loss": 1.8663, "step": 18465 }, { "epoch": 0.61, "grad_norm": 0.42798563838005066, "learning_rate": 0.0005400249274112685, "loss": 1.8102, "step": 18466 }, { "epoch": 0.61, "grad_norm": 0.4120221436023712, "learning_rate": 0.0005400186571538392, "loss": 1.8441, "step": 18467 }, { "epoch": 0.61, "grad_norm": 0.434198260307312, "learning_rate": 0.0005400123866050627, "loss": 1.8409, "step": 18468 }, { "epoch": 0.61, "grad_norm": 0.445901483297348, "learning_rate": 0.0005400061157649465, "loss": 1.8452, "step": 18469 }, { "epoch": 0.61, "grad_norm": 0.4534487724304199, "learning_rate": 0.0005399998446334982, "loss": 1.9511, "step": 18470 }, { "epoch": 0.61, "grad_norm": 0.42520737648010254, "learning_rate": 0.0005399935732107255, "loss": 1.8701, "step": 18471 }, { "epoch": 0.61, "grad_norm": 0.44488513469696045, "learning_rate": 0.000539987301496636, "loss": 1.8879, "step": 18472 }, { "epoch": 0.61, "grad_norm": 0.5091753602027893, "learning_rate": 0.0005399810294912372, "loss": 1.8023, "step": 18473 }, { "epoch": 0.61, "grad_norm": 0.41915446519851685, "learning_rate": 0.0005399747571945369, "loss": 1.7887, "step": 18474 }, { "epoch": 0.61, "grad_norm": 1.0340713262557983, "learning_rate": 0.0005399684846065425, "loss": 1.8518, "step": 18475 }, { "epoch": 0.61, "grad_norm": 0.438053160905838, "learning_rate": 0.0005399622117272618, "loss": 1.8547, "step": 18476 }, { "epoch": 0.61, "grad_norm": 0.433725506067276, "learning_rate": 0.0005399559385567022, "loss": 1.7866, "step": 18477 }, { "epoch": 0.61, "grad_norm": 0.4400116801261902, "learning_rate": 0.0005399496650948715, "loss": 1.9253, "step": 18478 }, { "epoch": 0.61, "grad_norm": 0.43167465925216675, "learning_rate": 0.0005399433913417774, "loss": 1.8561, "step": 18479 }, { "epoch": 0.61, "grad_norm": 0.4314253330230713, "learning_rate": 0.0005399371172974273, "loss": 1.7851, "step": 18480 }, { "epoch": 0.61, "grad_norm": 0.42285534739494324, "learning_rate": 0.0005399308429618288, "loss": 1.8561, "step": 18481 }, { "epoch": 0.61, "grad_norm": 0.4249167740345001, "learning_rate": 0.0005399245683349896, "loss": 1.7861, "step": 18482 }, { "epoch": 0.61, "grad_norm": 0.42428305745124817, "learning_rate": 0.0005399182934169174, "loss": 1.7631, "step": 18483 }, { "epoch": 0.61, "grad_norm": 0.43261781334877014, "learning_rate": 0.0005399120182076198, "loss": 1.81, "step": 18484 }, { "epoch": 0.61, "grad_norm": 0.43424278497695923, "learning_rate": 0.0005399057427071043, "loss": 1.7823, "step": 18485 }, { "epoch": 0.62, "grad_norm": 0.43874096870422363, "learning_rate": 0.0005398994669153786, "loss": 1.8759, "step": 18486 }, { "epoch": 0.62, "grad_norm": 0.4271235466003418, "learning_rate": 0.0005398931908324503, "loss": 1.8207, "step": 18487 }, { "epoch": 0.62, "grad_norm": 0.4257873296737671, "learning_rate": 0.0005398869144583269, "loss": 1.875, "step": 18488 }, { "epoch": 0.62, "grad_norm": 0.4527156949043274, "learning_rate": 0.0005398806377930162, "loss": 1.8233, "step": 18489 }, { "epoch": 0.62, "grad_norm": 0.4173165261745453, "learning_rate": 0.0005398743608365258, "loss": 1.8054, "step": 18490 }, { "epoch": 0.62, "grad_norm": 0.4458087384700775, "learning_rate": 0.0005398680835888631, "loss": 1.8306, "step": 18491 }, { "epoch": 0.62, "grad_norm": 0.44007396697998047, "learning_rate": 0.0005398618060500361, "loss": 1.8298, "step": 18492 }, { "epoch": 0.62, "grad_norm": 0.4252033829689026, "learning_rate": 0.0005398555282200521, "loss": 1.9078, "step": 18493 }, { "epoch": 0.62, "grad_norm": 0.4648483395576477, "learning_rate": 0.0005398492500989189, "loss": 1.8255, "step": 18494 }, { "epoch": 0.62, "grad_norm": 0.41897088289260864, "learning_rate": 0.000539842971686644, "loss": 1.8637, "step": 18495 }, { "epoch": 0.62, "grad_norm": 0.42621898651123047, "learning_rate": 0.0005398366929832351, "loss": 1.8388, "step": 18496 }, { "epoch": 0.62, "grad_norm": 0.4335111677646637, "learning_rate": 0.0005398304139886997, "loss": 1.8933, "step": 18497 }, { "epoch": 0.62, "grad_norm": 0.4314165413379669, "learning_rate": 0.0005398241347030456, "loss": 1.8028, "step": 18498 }, { "epoch": 0.62, "grad_norm": 0.43656688928604126, "learning_rate": 0.0005398178551262804, "loss": 1.8978, "step": 18499 }, { "epoch": 0.62, "grad_norm": 0.42793482542037964, "learning_rate": 0.0005398115752584116, "loss": 1.8102, "step": 18500 }, { "epoch": 0.62, "grad_norm": 0.44285523891448975, "learning_rate": 0.0005398052950994468, "loss": 1.847, "step": 18501 }, { "epoch": 0.62, "grad_norm": 0.43536677956581116, "learning_rate": 0.0005397990146493938, "loss": 1.8289, "step": 18502 }, { "epoch": 0.62, "grad_norm": 0.4402660131454468, "learning_rate": 0.0005397927339082601, "loss": 1.9177, "step": 18503 }, { "epoch": 0.62, "grad_norm": 0.41808223724365234, "learning_rate": 0.0005397864528760534, "loss": 1.886, "step": 18504 }, { "epoch": 0.62, "grad_norm": 0.42941832542419434, "learning_rate": 0.0005397801715527812, "loss": 1.8267, "step": 18505 }, { "epoch": 0.62, "grad_norm": 0.4449635148048401, "learning_rate": 0.0005397738899384512, "loss": 1.8838, "step": 18506 }, { "epoch": 0.62, "grad_norm": 0.42519623041152954, "learning_rate": 0.0005397676080330711, "loss": 1.8318, "step": 18507 }, { "epoch": 0.62, "grad_norm": 0.43117809295654297, "learning_rate": 0.0005397613258366484, "loss": 1.8532, "step": 18508 }, { "epoch": 0.62, "grad_norm": 0.4142376780509949, "learning_rate": 0.0005397550433491908, "loss": 1.8464, "step": 18509 }, { "epoch": 0.62, "grad_norm": 0.4210285544395447, "learning_rate": 0.0005397487605707059, "loss": 1.813, "step": 18510 }, { "epoch": 0.62, "grad_norm": 0.43853920698165894, "learning_rate": 0.0005397424775012013, "loss": 1.8696, "step": 18511 }, { "epoch": 0.62, "grad_norm": 0.42648613452911377, "learning_rate": 0.0005397361941406846, "loss": 1.8712, "step": 18512 }, { "epoch": 0.62, "grad_norm": 0.44128498435020447, "learning_rate": 0.0005397299104891636, "loss": 1.886, "step": 18513 }, { "epoch": 0.62, "grad_norm": 0.4192463159561157, "learning_rate": 0.0005397236265466458, "loss": 1.8276, "step": 18514 }, { "epoch": 0.62, "grad_norm": 0.42106008529663086, "learning_rate": 0.0005397173423131388, "loss": 1.8628, "step": 18515 }, { "epoch": 0.62, "grad_norm": 0.4310165047645569, "learning_rate": 0.0005397110577886503, "loss": 1.9424, "step": 18516 }, { "epoch": 0.62, "grad_norm": 0.43135905265808105, "learning_rate": 0.0005397047729731879, "loss": 1.8335, "step": 18517 }, { "epoch": 0.62, "grad_norm": 0.42795923352241516, "learning_rate": 0.0005396984878667592, "loss": 1.8173, "step": 18518 }, { "epoch": 0.62, "grad_norm": 0.4489186108112335, "learning_rate": 0.0005396922024693718, "loss": 1.9292, "step": 18519 }, { "epoch": 0.62, "grad_norm": 0.562159538269043, "learning_rate": 0.0005396859167810335, "loss": 1.8544, "step": 18520 }, { "epoch": 0.62, "grad_norm": 0.43883487582206726, "learning_rate": 0.0005396796308017517, "loss": 1.8002, "step": 18521 }, { "epoch": 0.62, "grad_norm": 0.44501280784606934, "learning_rate": 0.0005396733445315342, "loss": 1.8533, "step": 18522 }, { "epoch": 0.62, "grad_norm": 0.44518694281578064, "learning_rate": 0.0005396670579703887, "loss": 1.8685, "step": 18523 }, { "epoch": 0.62, "grad_norm": 0.42780065536499023, "learning_rate": 0.0005396607711183225, "loss": 1.8868, "step": 18524 }, { "epoch": 0.62, "grad_norm": 0.4531894028186798, "learning_rate": 0.0005396544839753434, "loss": 1.8244, "step": 18525 }, { "epoch": 0.62, "grad_norm": 0.44991156458854675, "learning_rate": 0.0005396481965414592, "loss": 1.8918, "step": 18526 }, { "epoch": 0.62, "grad_norm": 0.42964762449264526, "learning_rate": 0.0005396419088166774, "loss": 1.8325, "step": 18527 }, { "epoch": 0.62, "grad_norm": 0.4282326400279999, "learning_rate": 0.0005396356208010056, "loss": 1.8894, "step": 18528 }, { "epoch": 0.62, "grad_norm": 0.6055902242660522, "learning_rate": 0.0005396293324944515, "loss": 1.8282, "step": 18529 }, { "epoch": 0.62, "grad_norm": 0.4151105284690857, "learning_rate": 0.0005396230438970227, "loss": 1.8092, "step": 18530 }, { "epoch": 0.62, "grad_norm": 0.43083062767982483, "learning_rate": 0.0005396167550087268, "loss": 1.9082, "step": 18531 }, { "epoch": 0.62, "grad_norm": 0.4317833483219147, "learning_rate": 0.0005396104658295715, "loss": 1.8139, "step": 18532 }, { "epoch": 0.62, "grad_norm": 0.4367656409740448, "learning_rate": 0.0005396041763595643, "loss": 1.859, "step": 18533 }, { "epoch": 0.62, "grad_norm": 0.4302249252796173, "learning_rate": 0.000539597886598713, "loss": 1.8705, "step": 18534 }, { "epoch": 0.62, "grad_norm": 0.4159892499446869, "learning_rate": 0.0005395915965470252, "loss": 1.8567, "step": 18535 }, { "epoch": 0.62, "grad_norm": 0.41113317012786865, "learning_rate": 0.0005395853062045083, "loss": 1.8315, "step": 18536 }, { "epoch": 0.62, "grad_norm": 0.45025864243507385, "learning_rate": 0.0005395790155711704, "loss": 1.8259, "step": 18537 }, { "epoch": 0.62, "grad_norm": 0.4265735447406769, "learning_rate": 0.0005395727246470188, "loss": 1.8872, "step": 18538 }, { "epoch": 0.62, "grad_norm": 0.42681261897087097, "learning_rate": 0.0005395664334320612, "loss": 1.8626, "step": 18539 }, { "epoch": 0.62, "grad_norm": 0.4245053827762604, "learning_rate": 0.0005395601419263054, "loss": 1.8846, "step": 18540 }, { "epoch": 0.62, "grad_norm": 0.4317849278450012, "learning_rate": 0.0005395538501297586, "loss": 1.78, "step": 18541 }, { "epoch": 0.62, "grad_norm": 0.42889603972435, "learning_rate": 0.0005395475580424289, "loss": 1.7175, "step": 18542 }, { "epoch": 0.62, "grad_norm": 0.4380738139152527, "learning_rate": 0.0005395412656643237, "loss": 1.8277, "step": 18543 }, { "epoch": 0.62, "grad_norm": 0.42793264985084534, "learning_rate": 0.0005395349729954507, "loss": 1.81, "step": 18544 }, { "epoch": 0.62, "grad_norm": 0.5594058632850647, "learning_rate": 0.0005395286800358175, "loss": 1.9487, "step": 18545 }, { "epoch": 0.62, "grad_norm": 0.4323098957538605, "learning_rate": 0.0005395223867854319, "loss": 1.8464, "step": 18546 }, { "epoch": 0.62, "grad_norm": 0.42976105213165283, "learning_rate": 0.0005395160932443014, "loss": 1.9277, "step": 18547 }, { "epoch": 0.62, "grad_norm": 0.4458369016647339, "learning_rate": 0.0005395097994124336, "loss": 1.838, "step": 18548 }, { "epoch": 0.62, "grad_norm": 0.42404884099960327, "learning_rate": 0.0005395035052898361, "loss": 1.8895, "step": 18549 }, { "epoch": 0.62, "grad_norm": 0.41488564014434814, "learning_rate": 0.0005394972108765167, "loss": 1.7721, "step": 18550 }, { "epoch": 0.62, "grad_norm": 0.43724098801612854, "learning_rate": 0.000539490916172483, "loss": 1.8228, "step": 18551 }, { "epoch": 0.62, "grad_norm": 0.44705772399902344, "learning_rate": 0.0005394846211777427, "loss": 1.8229, "step": 18552 }, { "epoch": 0.62, "grad_norm": 0.43701738119125366, "learning_rate": 0.0005394783258923032, "loss": 1.8351, "step": 18553 }, { "epoch": 0.62, "grad_norm": 0.4388968050479889, "learning_rate": 0.0005394720303161724, "loss": 1.8382, "step": 18554 }, { "epoch": 0.62, "grad_norm": 0.42458266019821167, "learning_rate": 0.0005394657344493578, "loss": 1.7395, "step": 18555 }, { "epoch": 0.62, "grad_norm": 0.45021674036979675, "learning_rate": 0.000539459438291867, "loss": 1.9179, "step": 18556 }, { "epoch": 0.62, "grad_norm": 0.42914509773254395, "learning_rate": 0.0005394531418437079, "loss": 1.8296, "step": 18557 }, { "epoch": 0.62, "grad_norm": 0.43184295296669006, "learning_rate": 0.0005394468451048878, "loss": 1.8586, "step": 18558 }, { "epoch": 0.62, "grad_norm": 0.414483904838562, "learning_rate": 0.0005394405480754147, "loss": 1.8164, "step": 18559 }, { "epoch": 0.62, "grad_norm": 0.4374593496322632, "learning_rate": 0.000539434250755296, "loss": 1.9155, "step": 18560 }, { "epoch": 0.62, "grad_norm": 0.44901013374328613, "learning_rate": 0.0005394279531445393, "loss": 1.9493, "step": 18561 }, { "epoch": 0.62, "grad_norm": 0.4289083480834961, "learning_rate": 0.0005394216552431524, "loss": 1.8603, "step": 18562 }, { "epoch": 0.62, "grad_norm": 0.4324427843093872, "learning_rate": 0.0005394153570511429, "loss": 1.8167, "step": 18563 }, { "epoch": 0.62, "grad_norm": 0.41829606890678406, "learning_rate": 0.0005394090585685183, "loss": 1.916, "step": 18564 }, { "epoch": 0.62, "grad_norm": 0.44947680830955505, "learning_rate": 0.0005394027597952865, "loss": 1.8019, "step": 18565 }, { "epoch": 0.62, "grad_norm": 0.4455040693283081, "learning_rate": 0.0005393964607314551, "loss": 1.7892, "step": 18566 }, { "epoch": 0.62, "grad_norm": 0.4449928104877472, "learning_rate": 0.0005393901613770315, "loss": 1.7499, "step": 18567 }, { "epoch": 0.62, "grad_norm": 0.4359288513660431, "learning_rate": 0.0005393838617320236, "loss": 1.9098, "step": 18568 }, { "epoch": 0.62, "grad_norm": 0.4486132264137268, "learning_rate": 0.000539377561796439, "loss": 1.9177, "step": 18569 }, { "epoch": 0.62, "grad_norm": 0.43557825684547424, "learning_rate": 0.0005393712615702853, "loss": 1.8431, "step": 18570 }, { "epoch": 0.62, "grad_norm": 0.44305670261383057, "learning_rate": 0.00053936496105357, "loss": 1.8137, "step": 18571 }, { "epoch": 0.62, "grad_norm": 0.41872337460517883, "learning_rate": 0.0005393586602463011, "loss": 1.7864, "step": 18572 }, { "epoch": 0.62, "grad_norm": 0.576416015625, "learning_rate": 0.0005393523591484859, "loss": 1.8319, "step": 18573 }, { "epoch": 0.62, "grad_norm": 0.4404011070728302, "learning_rate": 0.0005393460577601324, "loss": 1.8743, "step": 18574 }, { "epoch": 0.62, "grad_norm": 0.4507150650024414, "learning_rate": 0.000539339756081248, "loss": 1.7955, "step": 18575 }, { "epoch": 0.62, "grad_norm": 0.4214291274547577, "learning_rate": 0.0005393334541118403, "loss": 1.8149, "step": 18576 }, { "epoch": 0.62, "grad_norm": 0.4287148714065552, "learning_rate": 0.000539327151851917, "loss": 1.8261, "step": 18577 }, { "epoch": 0.62, "grad_norm": 0.4360913038253784, "learning_rate": 0.0005393208493014859, "loss": 1.8994, "step": 18578 }, { "epoch": 0.62, "grad_norm": 0.5214502811431885, "learning_rate": 0.0005393145464605546, "loss": 1.8318, "step": 18579 }, { "epoch": 0.62, "grad_norm": 0.4452712833881378, "learning_rate": 0.0005393082433291306, "loss": 1.8396, "step": 18580 }, { "epoch": 0.62, "grad_norm": 0.4297488331794739, "learning_rate": 0.0005393019399072218, "loss": 1.8084, "step": 18581 }, { "epoch": 0.62, "grad_norm": 0.43608558177948, "learning_rate": 0.0005392956361948356, "loss": 1.847, "step": 18582 }, { "epoch": 0.62, "grad_norm": 0.4330788850784302, "learning_rate": 0.0005392893321919797, "loss": 1.8608, "step": 18583 }, { "epoch": 0.62, "grad_norm": 0.414805144071579, "learning_rate": 0.0005392830278986619, "loss": 1.7956, "step": 18584 }, { "epoch": 0.62, "grad_norm": 0.42959386110305786, "learning_rate": 0.0005392767233148897, "loss": 1.9267, "step": 18585 }, { "epoch": 0.62, "grad_norm": 0.427354097366333, "learning_rate": 0.0005392704184406708, "loss": 1.8228, "step": 18586 }, { "epoch": 0.62, "grad_norm": 0.45091739296913147, "learning_rate": 0.0005392641132760129, "loss": 1.7849, "step": 18587 }, { "epoch": 0.62, "grad_norm": 0.41919249296188354, "learning_rate": 0.0005392578078209237, "loss": 1.7689, "step": 18588 }, { "epoch": 0.62, "grad_norm": 0.44945695996284485, "learning_rate": 0.0005392515020754108, "loss": 1.8843, "step": 18589 }, { "epoch": 0.62, "grad_norm": 0.42685163021087646, "learning_rate": 0.0005392451960394817, "loss": 1.7325, "step": 18590 }, { "epoch": 0.62, "grad_norm": 0.43863537907600403, "learning_rate": 0.0005392388897131442, "loss": 1.8628, "step": 18591 }, { "epoch": 0.62, "grad_norm": 0.43616175651550293, "learning_rate": 0.000539232583096406, "loss": 1.8694, "step": 18592 }, { "epoch": 0.62, "grad_norm": 0.48010730743408203, "learning_rate": 0.0005392262761892745, "loss": 1.9243, "step": 18593 }, { "epoch": 0.62, "grad_norm": 0.4245617985725403, "learning_rate": 0.0005392199689917578, "loss": 1.7867, "step": 18594 }, { "epoch": 0.62, "grad_norm": 0.42955800890922546, "learning_rate": 0.0005392136615038633, "loss": 1.8286, "step": 18595 }, { "epoch": 0.62, "grad_norm": 0.42618969082832336, "learning_rate": 0.0005392073537255985, "loss": 1.8759, "step": 18596 }, { "epoch": 0.62, "grad_norm": 0.45078033208847046, "learning_rate": 0.0005392010456569713, "loss": 1.8376, "step": 18597 }, { "epoch": 0.62, "grad_norm": 0.4358522891998291, "learning_rate": 0.0005391947372979892, "loss": 1.9618, "step": 18598 }, { "epoch": 0.62, "grad_norm": 0.4156564176082611, "learning_rate": 0.00053918842864866, "loss": 1.7665, "step": 18599 }, { "epoch": 0.62, "grad_norm": 0.43792814016342163, "learning_rate": 0.0005391821197089912, "loss": 1.9007, "step": 18600 }, { "epoch": 0.62, "grad_norm": 0.41456469893455505, "learning_rate": 0.0005391758104789907, "loss": 1.856, "step": 18601 }, { "epoch": 0.62, "grad_norm": 0.42650532722473145, "learning_rate": 0.000539169500958666, "loss": 1.8997, "step": 18602 }, { "epoch": 0.62, "grad_norm": 0.43620434403419495, "learning_rate": 0.0005391631911480247, "loss": 1.8914, "step": 18603 }, { "epoch": 0.62, "grad_norm": 0.42664432525634766, "learning_rate": 0.0005391568810470744, "loss": 1.9198, "step": 18604 }, { "epoch": 0.62, "grad_norm": 0.44140446186065674, "learning_rate": 0.0005391505706558231, "loss": 1.8807, "step": 18605 }, { "epoch": 0.62, "grad_norm": 0.4329710900783539, "learning_rate": 0.0005391442599742781, "loss": 1.878, "step": 18606 }, { "epoch": 0.62, "grad_norm": 0.43867063522338867, "learning_rate": 0.0005391379490024473, "loss": 1.8039, "step": 18607 }, { "epoch": 0.62, "grad_norm": 0.42429399490356445, "learning_rate": 0.0005391316377403382, "loss": 1.8634, "step": 18608 }, { "epoch": 0.62, "grad_norm": 0.4399555027484894, "learning_rate": 0.0005391253261879586, "loss": 1.7632, "step": 18609 }, { "epoch": 0.62, "grad_norm": 0.4571470618247986, "learning_rate": 0.0005391190143453161, "loss": 1.8349, "step": 18610 }, { "epoch": 0.62, "grad_norm": 0.4349307417869568, "learning_rate": 0.0005391127022124182, "loss": 1.8228, "step": 18611 }, { "epoch": 0.62, "grad_norm": 0.43100008368492126, "learning_rate": 0.0005391063897892729, "loss": 1.8919, "step": 18612 }, { "epoch": 0.62, "grad_norm": 0.4591788351535797, "learning_rate": 0.0005391000770758875, "loss": 1.8751, "step": 18613 }, { "epoch": 0.62, "grad_norm": 0.4343818128108978, "learning_rate": 0.0005390937640722699, "loss": 1.7908, "step": 18614 }, { "epoch": 0.62, "grad_norm": 0.4176282584667206, "learning_rate": 0.0005390874507784277, "loss": 1.8461, "step": 18615 }, { "epoch": 0.62, "grad_norm": 0.42132869362831116, "learning_rate": 0.0005390811371943686, "loss": 1.8373, "step": 18616 }, { "epoch": 0.62, "grad_norm": 0.425454318523407, "learning_rate": 0.0005390748233201002, "loss": 1.8931, "step": 18617 }, { "epoch": 0.62, "grad_norm": 0.4454372823238373, "learning_rate": 0.0005390685091556302, "loss": 1.8696, "step": 18618 }, { "epoch": 0.62, "grad_norm": 0.425121009349823, "learning_rate": 0.0005390621947009663, "loss": 1.7989, "step": 18619 }, { "epoch": 0.62, "grad_norm": 0.42515745759010315, "learning_rate": 0.0005390558799561161, "loss": 1.8936, "step": 18620 }, { "epoch": 0.62, "grad_norm": 0.4412250518798828, "learning_rate": 0.0005390495649210872, "loss": 1.8782, "step": 18621 }, { "epoch": 0.62, "grad_norm": 0.43437182903289795, "learning_rate": 0.0005390432495958874, "loss": 1.7765, "step": 18622 }, { "epoch": 0.62, "grad_norm": 0.47162437438964844, "learning_rate": 0.0005390369339805242, "loss": 1.8229, "step": 18623 }, { "epoch": 0.62, "grad_norm": 0.4292655289173126, "learning_rate": 0.0005390306180750056, "loss": 1.8135, "step": 18624 }, { "epoch": 0.62, "grad_norm": 0.43750905990600586, "learning_rate": 0.0005390243018793389, "loss": 1.8178, "step": 18625 }, { "epoch": 0.62, "grad_norm": 0.42629560828208923, "learning_rate": 0.000539017985393532, "loss": 1.7869, "step": 18626 }, { "epoch": 0.62, "grad_norm": 0.4322071373462677, "learning_rate": 0.0005390116686175924, "loss": 1.8478, "step": 18627 }, { "epoch": 0.62, "grad_norm": 0.4422281086444855, "learning_rate": 0.000539005351551528, "loss": 1.9161, "step": 18628 }, { "epoch": 0.62, "grad_norm": 0.42751482129096985, "learning_rate": 0.0005389990341953462, "loss": 1.8519, "step": 18629 }, { "epoch": 0.62, "grad_norm": 0.4116816818714142, "learning_rate": 0.0005389927165490548, "loss": 1.8224, "step": 18630 }, { "epoch": 0.62, "grad_norm": 0.4217243194580078, "learning_rate": 0.0005389863986126614, "loss": 1.8863, "step": 18631 }, { "epoch": 0.62, "grad_norm": 0.423726886510849, "learning_rate": 0.0005389800803861738, "loss": 1.7908, "step": 18632 }, { "epoch": 0.62, "grad_norm": 0.44621768593788147, "learning_rate": 0.0005389737618695996, "loss": 1.7963, "step": 18633 }, { "epoch": 0.62, "grad_norm": 0.434756338596344, "learning_rate": 0.0005389674430629464, "loss": 1.7979, "step": 18634 }, { "epoch": 0.62, "grad_norm": 0.4241786003112793, "learning_rate": 0.000538961123966222, "loss": 1.8848, "step": 18635 }, { "epoch": 0.62, "grad_norm": 0.4198082983493805, "learning_rate": 0.000538954804579434, "loss": 1.8694, "step": 18636 }, { "epoch": 0.62, "grad_norm": 0.4526844620704651, "learning_rate": 0.00053894848490259, "loss": 1.8762, "step": 18637 }, { "epoch": 0.62, "grad_norm": 0.42351987957954407, "learning_rate": 0.0005389421649356978, "loss": 1.8156, "step": 18638 }, { "epoch": 0.62, "grad_norm": 0.4385344684123993, "learning_rate": 0.000538935844678765, "loss": 1.8431, "step": 18639 }, { "epoch": 0.62, "grad_norm": 0.43732574582099915, "learning_rate": 0.0005389295241317993, "loss": 1.9088, "step": 18640 }, { "epoch": 0.62, "grad_norm": 0.4057128429412842, "learning_rate": 0.0005389232032948085, "loss": 1.8252, "step": 18641 }, { "epoch": 0.62, "grad_norm": 0.4326750636100769, "learning_rate": 0.0005389168821678, "loss": 1.8082, "step": 18642 }, { "epoch": 0.62, "grad_norm": 0.42591291666030884, "learning_rate": 0.0005389105607507816, "loss": 1.8054, "step": 18643 }, { "epoch": 0.62, "grad_norm": 0.43825554847717285, "learning_rate": 0.000538904239043761, "loss": 1.7636, "step": 18644 }, { "epoch": 0.62, "grad_norm": 0.43442294001579285, "learning_rate": 0.0005388979170467459, "loss": 1.8383, "step": 18645 }, { "epoch": 0.62, "grad_norm": 0.4623115658760071, "learning_rate": 0.0005388915947597438, "loss": 1.8426, "step": 18646 }, { "epoch": 0.62, "grad_norm": 0.42413845658302307, "learning_rate": 0.0005388852721827627, "loss": 1.7824, "step": 18647 }, { "epoch": 0.62, "grad_norm": 0.42681044340133667, "learning_rate": 0.0005388789493158099, "loss": 1.819, "step": 18648 }, { "epoch": 0.62, "grad_norm": 0.4187995195388794, "learning_rate": 0.0005388726261588933, "loss": 1.8253, "step": 18649 }, { "epoch": 0.62, "grad_norm": 0.4600253105163574, "learning_rate": 0.0005388663027120205, "loss": 1.8193, "step": 18650 }, { "epoch": 0.62, "grad_norm": 0.4719791114330292, "learning_rate": 0.0005388599789751993, "loss": 1.8257, "step": 18651 }, { "epoch": 0.62, "grad_norm": 0.42364686727523804, "learning_rate": 0.0005388536549484372, "loss": 1.8221, "step": 18652 }, { "epoch": 0.62, "grad_norm": 0.4365013837814331, "learning_rate": 0.0005388473306317421, "loss": 1.881, "step": 18653 }, { "epoch": 0.62, "grad_norm": 0.46204617619514465, "learning_rate": 0.0005388410060251213, "loss": 1.8743, "step": 18654 }, { "epoch": 0.62, "grad_norm": 0.4326980412006378, "learning_rate": 0.0005388346811285829, "loss": 1.7878, "step": 18655 }, { "epoch": 0.62, "grad_norm": 0.43199077248573303, "learning_rate": 0.0005388283559421344, "loss": 1.7908, "step": 18656 }, { "epoch": 0.62, "grad_norm": 0.43444859981536865, "learning_rate": 0.0005388220304657833, "loss": 1.8512, "step": 18657 }, { "epoch": 0.62, "grad_norm": 0.44809216260910034, "learning_rate": 0.0005388157046995375, "loss": 1.8276, "step": 18658 }, { "epoch": 0.62, "grad_norm": 0.4300098419189453, "learning_rate": 0.0005388093786434046, "loss": 1.879, "step": 18659 }, { "epoch": 0.62, "grad_norm": 0.4317711293697357, "learning_rate": 0.0005388030522973924, "loss": 1.8489, "step": 18660 }, { "epoch": 0.62, "grad_norm": 0.43282249569892883, "learning_rate": 0.0005387967256615084, "loss": 1.8412, "step": 18661 }, { "epoch": 0.62, "grad_norm": 0.4397609531879425, "learning_rate": 0.0005387903987357605, "loss": 1.8682, "step": 18662 }, { "epoch": 0.62, "grad_norm": 0.44037944078445435, "learning_rate": 0.0005387840715201561, "loss": 1.917, "step": 18663 }, { "epoch": 0.62, "grad_norm": 0.42385944724082947, "learning_rate": 0.000538777744014703, "loss": 1.7786, "step": 18664 }, { "epoch": 0.62, "grad_norm": 0.4292883574962616, "learning_rate": 0.0005387714162194089, "loss": 1.8235, "step": 18665 }, { "epoch": 0.62, "grad_norm": 0.44936051964759827, "learning_rate": 0.0005387650881342816, "loss": 1.8096, "step": 18666 }, { "epoch": 0.62, "grad_norm": 0.44388240575790405, "learning_rate": 0.0005387587597593287, "loss": 1.8396, "step": 18667 }, { "epoch": 0.62, "grad_norm": 0.4317530691623688, "learning_rate": 0.0005387524310945576, "loss": 1.7716, "step": 18668 }, { "epoch": 0.62, "grad_norm": 0.42867568135261536, "learning_rate": 0.0005387461021399764, "loss": 1.845, "step": 18669 }, { "epoch": 0.62, "grad_norm": 0.42978501319885254, "learning_rate": 0.0005387397728955926, "loss": 1.8245, "step": 18670 }, { "epoch": 0.62, "grad_norm": 0.4517759382724762, "learning_rate": 0.0005387334433614139, "loss": 1.8522, "step": 18671 }, { "epoch": 0.62, "grad_norm": 0.4302895665168762, "learning_rate": 0.0005387271135374479, "loss": 1.8436, "step": 18672 }, { "epoch": 0.62, "grad_norm": 0.4220993220806122, "learning_rate": 0.0005387207834237023, "loss": 1.8426, "step": 18673 }, { "epoch": 0.62, "grad_norm": 0.42009711265563965, "learning_rate": 0.0005387144530201851, "loss": 1.855, "step": 18674 }, { "epoch": 0.62, "grad_norm": 0.4550582766532898, "learning_rate": 0.0005387081223269035, "loss": 1.8808, "step": 18675 }, { "epoch": 0.62, "grad_norm": 0.4241742193698883, "learning_rate": 0.0005387017913438656, "loss": 1.8413, "step": 18676 }, { "epoch": 0.62, "grad_norm": 0.4390430450439453, "learning_rate": 0.0005386954600710787, "loss": 1.8096, "step": 18677 }, { "epoch": 0.62, "grad_norm": 0.4248911142349243, "learning_rate": 0.0005386891285085508, "loss": 1.8278, "step": 18678 }, { "epoch": 0.62, "grad_norm": 0.4353489577770233, "learning_rate": 0.0005386827966562894, "loss": 1.9034, "step": 18679 }, { "epoch": 0.62, "grad_norm": 0.42438673973083496, "learning_rate": 0.0005386764645143022, "loss": 1.8125, "step": 18680 }, { "epoch": 0.62, "grad_norm": 0.4391813278198242, "learning_rate": 0.0005386701320825971, "loss": 1.7203, "step": 18681 }, { "epoch": 0.62, "grad_norm": 0.41812482476234436, "learning_rate": 0.0005386637993611816, "loss": 1.8147, "step": 18682 }, { "epoch": 0.62, "grad_norm": 0.4170687198638916, "learning_rate": 0.0005386574663500633, "loss": 1.8223, "step": 18683 }, { "epoch": 0.62, "grad_norm": 0.43362903594970703, "learning_rate": 0.0005386511330492501, "loss": 1.8833, "step": 18684 }, { "epoch": 0.62, "grad_norm": 0.4295610189437866, "learning_rate": 0.0005386447994587495, "loss": 1.9099, "step": 18685 }, { "epoch": 0.62, "grad_norm": 0.42692500352859497, "learning_rate": 0.0005386384655785693, "loss": 1.8359, "step": 18686 }, { "epoch": 0.62, "grad_norm": 0.417220801115036, "learning_rate": 0.0005386321314087173, "loss": 1.8612, "step": 18687 }, { "epoch": 0.62, "grad_norm": 0.46214330196380615, "learning_rate": 0.0005386257969492009, "loss": 1.939, "step": 18688 }, { "epoch": 0.62, "grad_norm": 0.426595002412796, "learning_rate": 0.000538619462200028, "loss": 1.8796, "step": 18689 }, { "epoch": 0.62, "grad_norm": 0.4219334125518799, "learning_rate": 0.0005386131271612062, "loss": 1.8342, "step": 18690 }, { "epoch": 0.62, "grad_norm": 0.4526344835758209, "learning_rate": 0.0005386067918327432, "loss": 1.8977, "step": 18691 }, { "epoch": 0.62, "grad_norm": 0.4271283447742462, "learning_rate": 0.0005386004562146468, "loss": 1.8796, "step": 18692 }, { "epoch": 0.62, "grad_norm": 0.43726029992103577, "learning_rate": 0.0005385941203069246, "loss": 1.866, "step": 18693 }, { "epoch": 0.62, "grad_norm": 0.4241340756416321, "learning_rate": 0.0005385877841095842, "loss": 1.8428, "step": 18694 }, { "epoch": 0.62, "grad_norm": 0.4285070598125458, "learning_rate": 0.0005385814476226334, "loss": 1.8153, "step": 18695 }, { "epoch": 0.62, "grad_norm": 0.41734910011291504, "learning_rate": 0.0005385751108460799, "loss": 1.8688, "step": 18696 }, { "epoch": 0.62, "grad_norm": 0.4159299433231354, "learning_rate": 0.0005385687737799314, "loss": 1.8275, "step": 18697 }, { "epoch": 0.62, "grad_norm": 0.41884350776672363, "learning_rate": 0.0005385624364241956, "loss": 1.788, "step": 18698 }, { "epoch": 0.62, "grad_norm": 0.4373016655445099, "learning_rate": 0.0005385560987788801, "loss": 1.8539, "step": 18699 }, { "epoch": 0.62, "grad_norm": 0.4235939681529999, "learning_rate": 0.0005385497608439926, "loss": 1.8332, "step": 18700 }, { "epoch": 0.62, "grad_norm": 0.450085312128067, "learning_rate": 0.0005385434226195409, "loss": 1.816, "step": 18701 }, { "epoch": 0.62, "grad_norm": 0.42808952927589417, "learning_rate": 0.0005385370841055326, "loss": 1.814, "step": 18702 }, { "epoch": 0.62, "grad_norm": 0.44205594062805176, "learning_rate": 0.0005385307453019755, "loss": 1.7589, "step": 18703 }, { "epoch": 0.62, "grad_norm": 0.43717268109321594, "learning_rate": 0.000538524406208877, "loss": 1.8061, "step": 18704 }, { "epoch": 0.62, "grad_norm": 0.45116788148880005, "learning_rate": 0.0005385180668262453, "loss": 1.9116, "step": 18705 }, { "epoch": 0.62, "grad_norm": 0.44748029112815857, "learning_rate": 0.0005385117271540876, "loss": 1.8128, "step": 18706 }, { "epoch": 0.62, "grad_norm": 0.42066872119903564, "learning_rate": 0.0005385053871924119, "loss": 1.8346, "step": 18707 }, { "epoch": 0.62, "grad_norm": 0.42212313413619995, "learning_rate": 0.0005384990469412258, "loss": 1.8335, "step": 18708 }, { "epoch": 0.62, "grad_norm": 0.443340927362442, "learning_rate": 0.000538492706400537, "loss": 1.9333, "step": 18709 }, { "epoch": 0.62, "grad_norm": 0.4571857452392578, "learning_rate": 0.0005384863655703532, "loss": 1.8493, "step": 18710 }, { "epoch": 0.62, "grad_norm": 0.43780457973480225, "learning_rate": 0.0005384800244506821, "loss": 1.8672, "step": 18711 }, { "epoch": 0.62, "grad_norm": 0.4211026728153229, "learning_rate": 0.0005384736830415314, "loss": 1.8083, "step": 18712 }, { "epoch": 0.62, "grad_norm": 0.5270724892616272, "learning_rate": 0.0005384673413429087, "loss": 1.901, "step": 18713 }, { "epoch": 0.62, "grad_norm": 0.4599865674972534, "learning_rate": 0.0005384609993548218, "loss": 1.8194, "step": 18714 }, { "epoch": 0.62, "grad_norm": 0.43708670139312744, "learning_rate": 0.0005384546570772784, "loss": 1.8591, "step": 18715 }, { "epoch": 0.62, "grad_norm": 0.42856335639953613, "learning_rate": 0.0005384483145102862, "loss": 1.8308, "step": 18716 }, { "epoch": 0.62, "grad_norm": 0.43137326836586, "learning_rate": 0.0005384419716538529, "loss": 1.9156, "step": 18717 }, { "epoch": 0.62, "grad_norm": 0.46817001700401306, "learning_rate": 0.0005384356285079861, "loss": 1.8014, "step": 18718 }, { "epoch": 0.62, "grad_norm": 0.4316125512123108, "learning_rate": 0.0005384292850726937, "loss": 1.878, "step": 18719 }, { "epoch": 0.62, "grad_norm": 0.4289005398750305, "learning_rate": 0.0005384229413479832, "loss": 1.7854, "step": 18720 }, { "epoch": 0.62, "grad_norm": 0.4335613548755646, "learning_rate": 0.0005384165973338624, "loss": 1.8593, "step": 18721 }, { "epoch": 0.62, "grad_norm": 0.46097666025161743, "learning_rate": 0.000538410253030339, "loss": 1.8315, "step": 18722 }, { "epoch": 0.62, "grad_norm": 0.42354705929756165, "learning_rate": 0.0005384039084374206, "loss": 1.8558, "step": 18723 }, { "epoch": 0.62, "grad_norm": 0.4211243987083435, "learning_rate": 0.000538397563555115, "loss": 1.7616, "step": 18724 }, { "epoch": 0.62, "grad_norm": 0.4605497717857361, "learning_rate": 0.00053839121838343, "loss": 1.8634, "step": 18725 }, { "epoch": 0.62, "grad_norm": 0.4284732937812805, "learning_rate": 0.000538384872922373, "loss": 1.7747, "step": 18726 }, { "epoch": 0.62, "grad_norm": 0.4164714217185974, "learning_rate": 0.000538378527171952, "loss": 1.8572, "step": 18727 }, { "epoch": 0.62, "grad_norm": 0.4276811182498932, "learning_rate": 0.0005383721811321746, "loss": 1.8016, "step": 18728 }, { "epoch": 0.62, "grad_norm": 0.4340321123600006, "learning_rate": 0.0005383658348030486, "loss": 1.8902, "step": 18729 }, { "epoch": 0.62, "grad_norm": 0.4220091700553894, "learning_rate": 0.0005383594881845814, "loss": 1.8584, "step": 18730 }, { "epoch": 0.62, "grad_norm": 0.41318777203559875, "learning_rate": 0.000538353141276781, "loss": 1.7965, "step": 18731 }, { "epoch": 0.62, "grad_norm": 0.4101816713809967, "learning_rate": 0.000538346794079655, "loss": 1.8759, "step": 18732 }, { "epoch": 0.62, "grad_norm": 0.4420957565307617, "learning_rate": 0.0005383404465932111, "loss": 1.8205, "step": 18733 }, { "epoch": 0.62, "grad_norm": 0.41831713914871216, "learning_rate": 0.0005383340988174569, "loss": 1.7456, "step": 18734 }, { "epoch": 0.62, "grad_norm": 0.432974636554718, "learning_rate": 0.0005383277507524004, "loss": 1.8203, "step": 18735 }, { "epoch": 0.62, "grad_norm": 0.4230183959007263, "learning_rate": 0.0005383214023980492, "loss": 1.8297, "step": 18736 }, { "epoch": 0.62, "grad_norm": 0.43223482370376587, "learning_rate": 0.0005383150537544107, "loss": 1.8768, "step": 18737 }, { "epoch": 0.62, "grad_norm": 0.46755364537239075, "learning_rate": 0.0005383087048214929, "loss": 1.8257, "step": 18738 }, { "epoch": 0.62, "grad_norm": 0.4266452491283417, "learning_rate": 0.0005383023555993035, "loss": 1.8322, "step": 18739 }, { "epoch": 0.62, "grad_norm": 0.4237172603607178, "learning_rate": 0.0005382960060878501, "loss": 1.8122, "step": 18740 }, { "epoch": 0.62, "grad_norm": 0.43051040172576904, "learning_rate": 0.0005382896562871405, "loss": 1.8248, "step": 18741 }, { "epoch": 0.62, "grad_norm": 0.4417085349559784, "learning_rate": 0.0005382833061971824, "loss": 1.8795, "step": 18742 }, { "epoch": 0.62, "grad_norm": 0.42491117119789124, "learning_rate": 0.0005382769558179833, "loss": 1.8234, "step": 18743 }, { "epoch": 0.62, "grad_norm": 0.41868314146995544, "learning_rate": 0.0005382706051495513, "loss": 1.8835, "step": 18744 }, { "epoch": 0.62, "grad_norm": 0.41769301891326904, "learning_rate": 0.0005382642541918938, "loss": 1.9436, "step": 18745 }, { "epoch": 0.62, "grad_norm": 0.44290784001350403, "learning_rate": 0.0005382579029450187, "loss": 1.7944, "step": 18746 }, { "epoch": 0.62, "grad_norm": 0.43093955516815186, "learning_rate": 0.0005382515514089335, "loss": 1.8277, "step": 18747 }, { "epoch": 0.62, "grad_norm": 0.43195056915283203, "learning_rate": 0.000538245199583646, "loss": 1.8911, "step": 18748 }, { "epoch": 0.62, "grad_norm": 0.4478115737438202, "learning_rate": 0.000538238847469164, "loss": 1.7772, "step": 18749 }, { "epoch": 0.62, "grad_norm": 0.4623764157295227, "learning_rate": 0.0005382324950654951, "loss": 1.9036, "step": 18750 }, { "epoch": 0.62, "grad_norm": 0.43874943256378174, "learning_rate": 0.000538226142372647, "loss": 1.7625, "step": 18751 }, { "epoch": 0.62, "grad_norm": 0.42589500546455383, "learning_rate": 0.0005382197893906277, "loss": 1.8024, "step": 18752 }, { "epoch": 0.62, "grad_norm": 0.4328243136405945, "learning_rate": 0.0005382134361194444, "loss": 1.7875, "step": 18753 }, { "epoch": 0.62, "grad_norm": 0.47806137800216675, "learning_rate": 0.0005382070825591052, "loss": 1.8042, "step": 18754 }, { "epoch": 0.62, "grad_norm": 0.46350646018981934, "learning_rate": 0.0005382007287096177, "loss": 1.8531, "step": 18755 }, { "epoch": 0.62, "grad_norm": 0.41640833020210266, "learning_rate": 0.0005381943745709897, "loss": 1.8351, "step": 18756 }, { "epoch": 0.62, "grad_norm": 0.4549822509288788, "learning_rate": 0.0005381880201432287, "loss": 1.8103, "step": 18757 }, { "epoch": 0.62, "grad_norm": 0.4256558120250702, "learning_rate": 0.0005381816654263425, "loss": 1.8176, "step": 18758 }, { "epoch": 0.62, "grad_norm": 0.446798712015152, "learning_rate": 0.000538175310420339, "loss": 1.8421, "step": 18759 }, { "epoch": 0.62, "grad_norm": 0.4297889173030853, "learning_rate": 0.0005381689551252257, "loss": 1.8213, "step": 18760 }, { "epoch": 0.62, "grad_norm": 0.4371426999568939, "learning_rate": 0.0005381625995410104, "loss": 1.8119, "step": 18761 }, { "epoch": 0.62, "grad_norm": 0.43391740322113037, "learning_rate": 0.0005381562436677007, "loss": 1.8172, "step": 18762 }, { "epoch": 0.62, "grad_norm": 0.42990779876708984, "learning_rate": 0.0005381498875053045, "loss": 1.8861, "step": 18763 }, { "epoch": 0.62, "grad_norm": 0.4393913745880127, "learning_rate": 0.0005381435310538294, "loss": 1.8726, "step": 18764 }, { "epoch": 0.62, "grad_norm": 0.4327715337276459, "learning_rate": 0.0005381371743132832, "loss": 1.8692, "step": 18765 }, { "epoch": 0.62, "grad_norm": 0.4048803150653839, "learning_rate": 0.0005381308172836734, "loss": 1.8146, "step": 18766 }, { "epoch": 0.62, "grad_norm": 0.4306398630142212, "learning_rate": 0.0005381244599650081, "loss": 1.818, "step": 18767 }, { "epoch": 0.62, "grad_norm": 0.442061185836792, "learning_rate": 0.0005381181023572946, "loss": 1.8575, "step": 18768 }, { "epoch": 0.62, "grad_norm": 0.42191848158836365, "learning_rate": 0.0005381117444605409, "loss": 1.8591, "step": 18769 }, { "epoch": 0.62, "grad_norm": 0.43287381529808044, "learning_rate": 0.0005381053862747546, "loss": 1.8165, "step": 18770 }, { "epoch": 0.62, "grad_norm": 0.43811583518981934, "learning_rate": 0.0005380990277999436, "loss": 1.9006, "step": 18771 }, { "epoch": 0.62, "grad_norm": 0.42608365416526794, "learning_rate": 0.0005380926690361152, "loss": 1.8007, "step": 18772 }, { "epoch": 0.62, "grad_norm": 0.42895734310150146, "learning_rate": 0.0005380863099832774, "loss": 1.8237, "step": 18773 }, { "epoch": 0.62, "grad_norm": 1.0743513107299805, "learning_rate": 0.0005380799506414381, "loss": 1.8816, "step": 18774 }, { "epoch": 0.62, "grad_norm": 0.423580139875412, "learning_rate": 0.0005380735910106047, "loss": 1.7797, "step": 18775 }, { "epoch": 0.62, "grad_norm": 0.43027716875076294, "learning_rate": 0.0005380672310907851, "loss": 1.8145, "step": 18776 }, { "epoch": 0.62, "grad_norm": 0.43490445613861084, "learning_rate": 0.0005380608708819869, "loss": 1.8764, "step": 18777 }, { "epoch": 0.62, "grad_norm": 0.4456257224082947, "learning_rate": 0.0005380545103842179, "loss": 1.8127, "step": 18778 }, { "epoch": 0.62, "grad_norm": 0.4365766644477844, "learning_rate": 0.0005380481495974857, "loss": 1.8816, "step": 18779 }, { "epoch": 0.62, "grad_norm": 0.41941192746162415, "learning_rate": 0.0005380417885217983, "loss": 1.7882, "step": 18780 }, { "epoch": 0.62, "grad_norm": 0.43709197640419006, "learning_rate": 0.0005380354271571632, "loss": 1.8368, "step": 18781 }, { "epoch": 0.62, "grad_norm": 0.4401100277900696, "learning_rate": 0.0005380290655035881, "loss": 1.9068, "step": 18782 }, { "epoch": 0.62, "grad_norm": 0.425695538520813, "learning_rate": 0.0005380227035610808, "loss": 1.8149, "step": 18783 }, { "epoch": 0.62, "grad_norm": 0.413387268781662, "learning_rate": 0.000538016341329649, "loss": 1.795, "step": 18784 }, { "epoch": 0.62, "grad_norm": 0.44225507974624634, "learning_rate": 0.0005380099788093004, "loss": 1.836, "step": 18785 }, { "epoch": 0.63, "grad_norm": 0.406446248292923, "learning_rate": 0.0005380036160000429, "loss": 1.7381, "step": 18786 }, { "epoch": 0.63, "grad_norm": 0.4846126139163971, "learning_rate": 0.0005379972529018839, "loss": 1.8194, "step": 18787 }, { "epoch": 0.63, "grad_norm": 0.6004512906074524, "learning_rate": 0.0005379908895148315, "loss": 1.9144, "step": 18788 }, { "epoch": 0.63, "grad_norm": 0.43839406967163086, "learning_rate": 0.0005379845258388931, "loss": 1.8532, "step": 18789 }, { "epoch": 0.63, "grad_norm": 0.425556480884552, "learning_rate": 0.0005379781618740765, "loss": 1.8454, "step": 18790 }, { "epoch": 0.63, "grad_norm": 0.4332889914512634, "learning_rate": 0.0005379717976203895, "loss": 1.9114, "step": 18791 }, { "epoch": 0.63, "grad_norm": 0.4538551867008209, "learning_rate": 0.0005379654330778399, "loss": 1.8097, "step": 18792 }, { "epoch": 0.63, "grad_norm": 0.4530382752418518, "learning_rate": 0.0005379590682464353, "loss": 1.9028, "step": 18793 }, { "epoch": 0.63, "grad_norm": 0.42085185647010803, "learning_rate": 0.0005379527031261833, "loss": 1.807, "step": 18794 }, { "epoch": 0.63, "grad_norm": 0.459385484457016, "learning_rate": 0.000537946337717092, "loss": 1.8472, "step": 18795 }, { "epoch": 0.63, "grad_norm": 0.4335078299045563, "learning_rate": 0.0005379399720191687, "loss": 1.8741, "step": 18796 }, { "epoch": 0.63, "grad_norm": 0.44007548689842224, "learning_rate": 0.0005379336060324215, "loss": 1.8065, "step": 18797 }, { "epoch": 0.63, "grad_norm": 0.41190916299819946, "learning_rate": 0.0005379272397568579, "loss": 1.7924, "step": 18798 }, { "epoch": 0.63, "grad_norm": 0.43710702657699585, "learning_rate": 0.0005379208731924858, "loss": 1.786, "step": 18799 }, { "epoch": 0.63, "grad_norm": 0.43096476793289185, "learning_rate": 0.0005379145063393126, "loss": 1.8615, "step": 18800 }, { "epoch": 0.63, "grad_norm": 0.41873082518577576, "learning_rate": 0.0005379081391973464, "loss": 1.8041, "step": 18801 }, { "epoch": 0.63, "grad_norm": 0.42256802320480347, "learning_rate": 0.0005379017717665947, "loss": 1.7937, "step": 18802 }, { "epoch": 0.63, "grad_norm": 0.43930745124816895, "learning_rate": 0.0005378954040470653, "loss": 1.8564, "step": 18803 }, { "epoch": 0.63, "grad_norm": 0.4319706857204437, "learning_rate": 0.000537889036038766, "loss": 1.8615, "step": 18804 }, { "epoch": 0.63, "grad_norm": 0.415226548910141, "learning_rate": 0.0005378826677417043, "loss": 1.8851, "step": 18805 }, { "epoch": 0.63, "grad_norm": 0.42044055461883545, "learning_rate": 0.0005378762991558883, "loss": 1.8705, "step": 18806 }, { "epoch": 0.63, "grad_norm": 0.42343318462371826, "learning_rate": 0.0005378699302813253, "loss": 1.8038, "step": 18807 }, { "epoch": 0.63, "grad_norm": 0.4507865011692047, "learning_rate": 0.0005378635611180235, "loss": 1.8226, "step": 18808 }, { "epoch": 0.63, "grad_norm": 0.44787392020225525, "learning_rate": 0.0005378571916659902, "loss": 1.7793, "step": 18809 }, { "epoch": 0.63, "grad_norm": 0.436029851436615, "learning_rate": 0.0005378508219252334, "loss": 1.8134, "step": 18810 }, { "epoch": 0.63, "grad_norm": 0.41157180070877075, "learning_rate": 0.0005378444518957607, "loss": 1.8628, "step": 18811 }, { "epoch": 0.63, "grad_norm": 0.43815240263938904, "learning_rate": 0.00053783808157758, "loss": 1.8477, "step": 18812 }, { "epoch": 0.63, "grad_norm": 0.43308213353157043, "learning_rate": 0.0005378317109706988, "loss": 1.8096, "step": 18813 }, { "epoch": 0.63, "grad_norm": 0.45048728585243225, "learning_rate": 0.000537825340075125, "loss": 1.82, "step": 18814 }, { "epoch": 0.63, "grad_norm": 0.45355600118637085, "learning_rate": 0.0005378189688908662, "loss": 1.8826, "step": 18815 }, { "epoch": 0.63, "grad_norm": 0.4239637851715088, "learning_rate": 0.0005378125974179303, "loss": 1.8024, "step": 18816 }, { "epoch": 0.63, "grad_norm": 0.4320642352104187, "learning_rate": 0.0005378062256563248, "loss": 1.7783, "step": 18817 }, { "epoch": 0.63, "grad_norm": 0.4262620806694031, "learning_rate": 0.0005377998536060577, "loss": 1.778, "step": 18818 }, { "epoch": 0.63, "grad_norm": 0.4324806332588196, "learning_rate": 0.0005377934812671367, "loss": 1.8374, "step": 18819 }, { "epoch": 0.63, "grad_norm": 0.4327678978443146, "learning_rate": 0.0005377871086395693, "loss": 1.8085, "step": 18820 }, { "epoch": 0.63, "grad_norm": 0.4396694302558899, "learning_rate": 0.0005377807357233635, "loss": 1.8176, "step": 18821 }, { "epoch": 0.63, "grad_norm": 0.42875152826309204, "learning_rate": 0.0005377743625185268, "loss": 1.8049, "step": 18822 }, { "epoch": 0.63, "grad_norm": 0.4340842366218567, "learning_rate": 0.0005377679890250672, "loss": 1.8252, "step": 18823 }, { "epoch": 0.63, "grad_norm": 0.4267880618572235, "learning_rate": 0.0005377616152429922, "loss": 1.8479, "step": 18824 }, { "epoch": 0.63, "grad_norm": 0.44604945182800293, "learning_rate": 0.0005377552411723097, "loss": 1.8479, "step": 18825 }, { "epoch": 0.63, "grad_norm": 0.46330612897872925, "learning_rate": 0.0005377488668130272, "loss": 1.8362, "step": 18826 }, { "epoch": 0.63, "grad_norm": 0.43336471915245056, "learning_rate": 0.0005377424921651528, "loss": 1.8513, "step": 18827 }, { "epoch": 0.63, "grad_norm": 0.4360126256942749, "learning_rate": 0.0005377361172286939, "loss": 1.7605, "step": 18828 }, { "epoch": 0.63, "grad_norm": 0.43225544691085815, "learning_rate": 0.0005377297420036584, "loss": 1.8871, "step": 18829 }, { "epoch": 0.63, "grad_norm": 0.416424036026001, "learning_rate": 0.0005377233664900542, "loss": 1.8, "step": 18830 }, { "epoch": 0.63, "grad_norm": 0.43902596831321716, "learning_rate": 0.0005377169906878886, "loss": 1.8466, "step": 18831 }, { "epoch": 0.63, "grad_norm": 0.42492204904556274, "learning_rate": 0.0005377106145971698, "loss": 1.82, "step": 18832 }, { "epoch": 0.63, "grad_norm": 0.4537718892097473, "learning_rate": 0.0005377042382179053, "loss": 1.7571, "step": 18833 }, { "epoch": 0.63, "grad_norm": 0.41428911685943604, "learning_rate": 0.0005376978615501028, "loss": 1.8007, "step": 18834 }, { "epoch": 0.63, "grad_norm": 0.43177181482315063, "learning_rate": 0.0005376914845937702, "loss": 1.8377, "step": 18835 }, { "epoch": 0.63, "grad_norm": 0.4406563639640808, "learning_rate": 0.0005376851073489151, "loss": 1.8458, "step": 18836 }, { "epoch": 0.63, "grad_norm": 0.43450498580932617, "learning_rate": 0.0005376787298155453, "loss": 1.8389, "step": 18837 }, { "epoch": 0.63, "grad_norm": 0.44209903478622437, "learning_rate": 0.0005376723519936686, "loss": 1.8209, "step": 18838 }, { "epoch": 0.63, "grad_norm": 0.4266240894794464, "learning_rate": 0.0005376659738832927, "loss": 1.7775, "step": 18839 }, { "epoch": 0.63, "grad_norm": 0.42921486496925354, "learning_rate": 0.0005376595954844252, "loss": 1.8227, "step": 18840 }, { "epoch": 0.63, "grad_norm": 0.4320884644985199, "learning_rate": 0.0005376532167970741, "loss": 1.8695, "step": 18841 }, { "epoch": 0.63, "grad_norm": 0.49234023690223694, "learning_rate": 0.000537646837821247, "loss": 1.9078, "step": 18842 }, { "epoch": 0.63, "grad_norm": 0.4271017611026764, "learning_rate": 0.0005376404585569516, "loss": 1.7959, "step": 18843 }, { "epoch": 0.63, "grad_norm": 0.43362781405448914, "learning_rate": 0.0005376340790041957, "loss": 1.8163, "step": 18844 }, { "epoch": 0.63, "grad_norm": 0.4238341450691223, "learning_rate": 0.000537627699162987, "loss": 1.8259, "step": 18845 }, { "epoch": 0.63, "grad_norm": 0.4238992929458618, "learning_rate": 0.0005376213190333333, "loss": 1.7432, "step": 18846 }, { "epoch": 0.63, "grad_norm": 0.42691588401794434, "learning_rate": 0.0005376149386152424, "loss": 1.8635, "step": 18847 }, { "epoch": 0.63, "grad_norm": 0.43096280097961426, "learning_rate": 0.000537608557908722, "loss": 1.816, "step": 18848 }, { "epoch": 0.63, "grad_norm": 0.42940133810043335, "learning_rate": 0.0005376021769137798, "loss": 1.8035, "step": 18849 }, { "epoch": 0.63, "grad_norm": 0.43401235342025757, "learning_rate": 0.0005375957956304235, "loss": 1.8575, "step": 18850 }, { "epoch": 0.63, "grad_norm": 0.42275628447532654, "learning_rate": 0.0005375894140586609, "loss": 1.835, "step": 18851 }, { "epoch": 0.63, "grad_norm": 0.430408239364624, "learning_rate": 0.0005375830321984998, "loss": 1.8157, "step": 18852 }, { "epoch": 0.63, "grad_norm": 0.4464294910430908, "learning_rate": 0.000537576650049948, "loss": 1.8645, "step": 18853 }, { "epoch": 0.63, "grad_norm": 0.42501339316368103, "learning_rate": 0.000537570267613013, "loss": 1.8683, "step": 18854 }, { "epoch": 0.63, "grad_norm": 0.4155629873275757, "learning_rate": 0.0005375638848877028, "loss": 1.7679, "step": 18855 }, { "epoch": 0.63, "grad_norm": 0.43752023577690125, "learning_rate": 0.000537557501874025, "loss": 1.8551, "step": 18856 }, { "epoch": 0.63, "grad_norm": 0.4254901707172394, "learning_rate": 0.0005375511185719875, "loss": 1.8224, "step": 18857 }, { "epoch": 0.63, "grad_norm": 0.4309634566307068, "learning_rate": 0.0005375447349815978, "loss": 1.8532, "step": 18858 }, { "epoch": 0.63, "grad_norm": 0.4314379394054413, "learning_rate": 0.0005375383511028639, "loss": 1.8761, "step": 18859 }, { "epoch": 0.63, "grad_norm": 0.4179456830024719, "learning_rate": 0.0005375319669357933, "loss": 1.8341, "step": 18860 }, { "epoch": 0.63, "grad_norm": 0.4172208905220032, "learning_rate": 0.000537525582480394, "loss": 1.8649, "step": 18861 }, { "epoch": 0.63, "grad_norm": 0.44044360518455505, "learning_rate": 0.0005375191977366736, "loss": 1.7963, "step": 18862 }, { "epoch": 0.63, "grad_norm": 0.4170936346054077, "learning_rate": 0.0005375128127046399, "loss": 1.7897, "step": 18863 }, { "epoch": 0.63, "grad_norm": 0.42861777544021606, "learning_rate": 0.0005375064273843007, "loss": 1.7631, "step": 18864 }, { "epoch": 0.63, "grad_norm": 0.4198864996433258, "learning_rate": 0.0005375000417756635, "loss": 1.859, "step": 18865 }, { "epoch": 0.63, "grad_norm": 0.435428649187088, "learning_rate": 0.0005374936558787364, "loss": 1.8631, "step": 18866 }, { "epoch": 0.63, "grad_norm": 0.444722443819046, "learning_rate": 0.000537487269693527, "loss": 1.7957, "step": 18867 }, { "epoch": 0.63, "grad_norm": 0.4507623314857483, "learning_rate": 0.000537480883220043, "loss": 1.8083, "step": 18868 }, { "epoch": 0.63, "grad_norm": 0.4383471608161926, "learning_rate": 0.0005374744964582923, "loss": 1.8526, "step": 18869 }, { "epoch": 0.63, "grad_norm": 0.4269614517688751, "learning_rate": 0.0005374681094082824, "loss": 1.7775, "step": 18870 }, { "epoch": 0.63, "grad_norm": 0.43862470984458923, "learning_rate": 0.0005374617220700213, "loss": 1.8549, "step": 18871 }, { "epoch": 0.63, "grad_norm": 0.44317948818206787, "learning_rate": 0.0005374553344435167, "loss": 1.7841, "step": 18872 }, { "epoch": 0.63, "grad_norm": 0.4387820363044739, "learning_rate": 0.0005374489465287762, "loss": 1.9436, "step": 18873 }, { "epoch": 0.63, "grad_norm": 0.45249444246292114, "learning_rate": 0.0005374425583258077, "loss": 1.8177, "step": 18874 }, { "epoch": 0.63, "grad_norm": 0.41908830404281616, "learning_rate": 0.0005374361698346189, "loss": 1.8775, "step": 18875 }, { "epoch": 0.63, "grad_norm": 0.4226613938808441, "learning_rate": 0.0005374297810552176, "loss": 1.7965, "step": 18876 }, { "epoch": 0.63, "grad_norm": 0.4389953911304474, "learning_rate": 0.0005374233919876116, "loss": 1.7928, "step": 18877 }, { "epoch": 0.63, "grad_norm": 0.45197197794914246, "learning_rate": 0.0005374170026318085, "loss": 1.9141, "step": 18878 }, { "epoch": 0.63, "grad_norm": 0.42299944162368774, "learning_rate": 0.0005374106129878162, "loss": 1.781, "step": 18879 }, { "epoch": 0.63, "grad_norm": 0.43350499868392944, "learning_rate": 0.0005374042230556423, "loss": 1.8482, "step": 18880 }, { "epoch": 0.63, "grad_norm": 0.4298011064529419, "learning_rate": 0.0005373978328352948, "loss": 1.8095, "step": 18881 }, { "epoch": 0.63, "grad_norm": 0.4315776824951172, "learning_rate": 0.0005373914423267811, "loss": 1.8624, "step": 18882 }, { "epoch": 0.63, "grad_norm": 0.41884845495224, "learning_rate": 0.0005373850515301093, "loss": 1.8458, "step": 18883 }, { "epoch": 0.63, "grad_norm": 0.40934571623802185, "learning_rate": 0.000537378660445287, "loss": 1.8394, "step": 18884 }, { "epoch": 0.63, "grad_norm": 0.4420531988143921, "learning_rate": 0.000537372269072322, "loss": 1.88, "step": 18885 }, { "epoch": 0.63, "grad_norm": 0.4496673047542572, "learning_rate": 0.0005373658774112221, "loss": 1.8918, "step": 18886 }, { "epoch": 0.63, "grad_norm": 0.43486928939819336, "learning_rate": 0.0005373594854619949, "loss": 1.8467, "step": 18887 }, { "epoch": 0.63, "grad_norm": 0.43232640624046326, "learning_rate": 0.0005373530932246482, "loss": 1.8118, "step": 18888 }, { "epoch": 0.63, "grad_norm": 0.43112239241600037, "learning_rate": 0.0005373467006991899, "loss": 1.8917, "step": 18889 }, { "epoch": 0.63, "grad_norm": 0.44002342224121094, "learning_rate": 0.0005373403078856278, "loss": 1.9031, "step": 18890 }, { "epoch": 0.63, "grad_norm": 0.4328131675720215, "learning_rate": 0.0005373339147839694, "loss": 1.8535, "step": 18891 }, { "epoch": 0.63, "grad_norm": 0.44263604283332825, "learning_rate": 0.0005373275213942226, "loss": 1.8737, "step": 18892 }, { "epoch": 0.63, "grad_norm": 0.43162164092063904, "learning_rate": 0.0005373211277163952, "loss": 1.8484, "step": 18893 }, { "epoch": 0.63, "grad_norm": 0.45756059885025024, "learning_rate": 0.0005373147337504949, "loss": 1.793, "step": 18894 }, { "epoch": 0.63, "grad_norm": 0.420555055141449, "learning_rate": 0.0005373083394965294, "loss": 1.8086, "step": 18895 }, { "epoch": 0.63, "grad_norm": 0.4330153465270996, "learning_rate": 0.0005373019449545067, "loss": 1.8024, "step": 18896 }, { "epoch": 0.63, "grad_norm": 0.44170087575912476, "learning_rate": 0.0005372955501244343, "loss": 1.8659, "step": 18897 }, { "epoch": 0.63, "grad_norm": 0.4399036467075348, "learning_rate": 0.00053728915500632, "loss": 1.8929, "step": 18898 }, { "epoch": 0.63, "grad_norm": 0.4231940805912018, "learning_rate": 0.0005372827596001718, "loss": 1.8585, "step": 18899 }, { "epoch": 0.63, "grad_norm": 0.42996251583099365, "learning_rate": 0.0005372763639059973, "loss": 1.8509, "step": 18900 }, { "epoch": 0.63, "grad_norm": 0.4407115578651428, "learning_rate": 0.000537269967923804, "loss": 1.8347, "step": 18901 }, { "epoch": 0.63, "grad_norm": 0.433270126581192, "learning_rate": 0.0005372635716536002, "loss": 1.8722, "step": 18902 }, { "epoch": 0.63, "grad_norm": 0.4114929437637329, "learning_rate": 0.0005372571750953931, "loss": 1.8788, "step": 18903 }, { "epoch": 0.63, "grad_norm": 0.42615067958831787, "learning_rate": 0.000537250778249191, "loss": 1.8782, "step": 18904 }, { "epoch": 0.63, "grad_norm": 0.42991548776626587, "learning_rate": 0.0005372443811150014, "loss": 1.843, "step": 18905 }, { "epoch": 0.63, "grad_norm": 0.435109943151474, "learning_rate": 0.0005372379836928319, "loss": 1.9143, "step": 18906 }, { "epoch": 0.63, "grad_norm": 0.4294882118701935, "learning_rate": 0.0005372315859826905, "loss": 1.8397, "step": 18907 }, { "epoch": 0.63, "grad_norm": 0.42398086190223694, "learning_rate": 0.000537225187984585, "loss": 1.8378, "step": 18908 }, { "epoch": 0.63, "grad_norm": 0.4351353049278259, "learning_rate": 0.000537218789698523, "loss": 1.8248, "step": 18909 }, { "epoch": 0.63, "grad_norm": 0.44234398007392883, "learning_rate": 0.0005372123911245125, "loss": 1.867, "step": 18910 }, { "epoch": 0.63, "grad_norm": 0.42357656359672546, "learning_rate": 0.0005372059922625609, "loss": 1.8139, "step": 18911 }, { "epoch": 0.63, "grad_norm": 0.4399651885032654, "learning_rate": 0.0005371995931126762, "loss": 1.8492, "step": 18912 }, { "epoch": 0.63, "grad_norm": 0.44223445653915405, "learning_rate": 0.0005371931936748663, "loss": 1.8358, "step": 18913 }, { "epoch": 0.63, "grad_norm": 0.4451466500759125, "learning_rate": 0.0005371867939491387, "loss": 1.8414, "step": 18914 }, { "epoch": 0.63, "grad_norm": 0.4327448606491089, "learning_rate": 0.0005371803939355012, "loss": 1.8653, "step": 18915 }, { "epoch": 0.63, "grad_norm": 0.4342046082019806, "learning_rate": 0.0005371739936339618, "loss": 1.959, "step": 18916 }, { "epoch": 0.63, "grad_norm": 0.4242742955684662, "learning_rate": 0.000537167593044528, "loss": 1.9108, "step": 18917 }, { "epoch": 0.63, "grad_norm": 0.4519595503807068, "learning_rate": 0.0005371611921672078, "loss": 1.8388, "step": 18918 }, { "epoch": 0.63, "grad_norm": 0.43479955196380615, "learning_rate": 0.0005371547910020088, "loss": 1.8456, "step": 18919 }, { "epoch": 0.63, "grad_norm": 0.42220258712768555, "learning_rate": 0.0005371483895489389, "loss": 1.87, "step": 18920 }, { "epoch": 0.63, "grad_norm": 0.4277897775173187, "learning_rate": 0.0005371419878080057, "loss": 1.8437, "step": 18921 }, { "epoch": 0.63, "grad_norm": 0.4256991446018219, "learning_rate": 0.000537135585779217, "loss": 1.808, "step": 18922 }, { "epoch": 0.63, "grad_norm": 0.4368319511413574, "learning_rate": 0.0005371291834625808, "loss": 1.8172, "step": 18923 }, { "epoch": 0.63, "grad_norm": 0.4497651755809784, "learning_rate": 0.0005371227808581046, "loss": 1.8435, "step": 18924 }, { "epoch": 0.63, "grad_norm": 0.4223995804786682, "learning_rate": 0.0005371163779657964, "loss": 1.7267, "step": 18925 }, { "epoch": 0.63, "grad_norm": 0.42110323905944824, "learning_rate": 0.0005371099747856638, "loss": 1.7692, "step": 18926 }, { "epoch": 0.63, "grad_norm": 0.45099446177482605, "learning_rate": 0.0005371035713177145, "loss": 1.8594, "step": 18927 }, { "epoch": 0.63, "grad_norm": 0.4319709539413452, "learning_rate": 0.0005370971675619565, "loss": 1.8133, "step": 18928 }, { "epoch": 0.63, "grad_norm": 0.4509694576263428, "learning_rate": 0.0005370907635183975, "loss": 1.8678, "step": 18929 }, { "epoch": 0.63, "grad_norm": 0.46467289328575134, "learning_rate": 0.0005370843591870453, "loss": 1.8146, "step": 18930 }, { "epoch": 0.63, "grad_norm": 0.43386179208755493, "learning_rate": 0.0005370779545679075, "loss": 1.8823, "step": 18931 }, { "epoch": 0.63, "grad_norm": 0.4367173910140991, "learning_rate": 0.000537071549660992, "loss": 1.8105, "step": 18932 }, { "epoch": 0.63, "grad_norm": 0.43080002069473267, "learning_rate": 0.0005370651444663066, "loss": 1.8059, "step": 18933 }, { "epoch": 0.63, "grad_norm": 0.4890010356903076, "learning_rate": 0.0005370587389838591, "loss": 1.8108, "step": 18934 }, { "epoch": 0.63, "grad_norm": 0.43259337544441223, "learning_rate": 0.0005370523332136573, "loss": 1.8826, "step": 18935 }, { "epoch": 0.63, "grad_norm": 0.4298785328865051, "learning_rate": 0.0005370459271557086, "loss": 1.8934, "step": 18936 }, { "epoch": 0.63, "grad_norm": 0.4386887550354004, "learning_rate": 0.0005370395208100213, "loss": 1.8794, "step": 18937 }, { "epoch": 0.63, "grad_norm": 0.43853214383125305, "learning_rate": 0.0005370331141766029, "loss": 1.8005, "step": 18938 }, { "epoch": 0.63, "grad_norm": 0.4546217620372772, "learning_rate": 0.0005370267072554612, "loss": 1.8638, "step": 18939 }, { "epoch": 0.63, "grad_norm": 0.43762558698654175, "learning_rate": 0.000537020300046604, "loss": 1.9676, "step": 18940 }, { "epoch": 0.63, "grad_norm": 0.45391830801963806, "learning_rate": 0.0005370138925500391, "loss": 1.8133, "step": 18941 }, { "epoch": 0.63, "grad_norm": 0.45221880078315735, "learning_rate": 0.0005370074847657743, "loss": 1.7962, "step": 18942 }, { "epoch": 0.63, "grad_norm": 0.4247068166732788, "learning_rate": 0.0005370010766938173, "loss": 1.7919, "step": 18943 }, { "epoch": 0.63, "grad_norm": 0.4228276312351227, "learning_rate": 0.0005369946683341759, "loss": 1.8241, "step": 18944 }, { "epoch": 0.63, "grad_norm": 0.4291420578956604, "learning_rate": 0.0005369882596868579, "loss": 1.9349, "step": 18945 }, { "epoch": 0.63, "grad_norm": 0.44337302446365356, "learning_rate": 0.000536981850751871, "loss": 1.8471, "step": 18946 }, { "epoch": 0.63, "grad_norm": 0.4376850426197052, "learning_rate": 0.0005369754415292232, "loss": 1.8765, "step": 18947 }, { "epoch": 0.63, "grad_norm": 0.44385573267936707, "learning_rate": 0.000536969032018922, "loss": 1.8715, "step": 18948 }, { "epoch": 0.63, "grad_norm": 0.4205641746520996, "learning_rate": 0.0005369626222209754, "loss": 1.824, "step": 18949 }, { "epoch": 0.63, "grad_norm": 0.42254507541656494, "learning_rate": 0.0005369562121353911, "loss": 1.866, "step": 18950 }, { "epoch": 0.63, "grad_norm": 0.4342370927333832, "learning_rate": 0.0005369498017621767, "loss": 1.8648, "step": 18951 }, { "epoch": 0.63, "grad_norm": 0.4347129464149475, "learning_rate": 0.0005369433911013403, "loss": 1.8255, "step": 18952 }, { "epoch": 0.63, "grad_norm": 0.4233395755290985, "learning_rate": 0.0005369369801528895, "loss": 1.7825, "step": 18953 }, { "epoch": 0.63, "grad_norm": 0.4297527074813843, "learning_rate": 0.0005369305689168322, "loss": 1.8462, "step": 18954 }, { "epoch": 0.63, "grad_norm": 0.44858306646347046, "learning_rate": 0.000536924157393176, "loss": 1.8223, "step": 18955 }, { "epoch": 0.63, "grad_norm": 0.4433324337005615, "learning_rate": 0.0005369177455819288, "loss": 1.7641, "step": 18956 }, { "epoch": 0.63, "grad_norm": 0.42785733938217163, "learning_rate": 0.0005369113334830984, "loss": 1.8598, "step": 18957 }, { "epoch": 0.63, "grad_norm": 0.41892027854919434, "learning_rate": 0.0005369049210966926, "loss": 1.8955, "step": 18958 }, { "epoch": 0.63, "grad_norm": 0.43110141158103943, "learning_rate": 0.0005368985084227189, "loss": 1.7794, "step": 18959 }, { "epoch": 0.63, "grad_norm": 0.44099512696266174, "learning_rate": 0.0005368920954611856, "loss": 1.8913, "step": 18960 }, { "epoch": 0.63, "grad_norm": 0.42043429613113403, "learning_rate": 0.0005368856822121, "loss": 1.8047, "step": 18961 }, { "epoch": 0.63, "grad_norm": 0.46404922008514404, "learning_rate": 0.0005368792686754701, "loss": 1.8837, "step": 18962 }, { "epoch": 0.63, "grad_norm": 0.43066757917404175, "learning_rate": 0.0005368728548513038, "loss": 1.8499, "step": 18963 }, { "epoch": 0.63, "grad_norm": 0.4415172040462494, "learning_rate": 0.0005368664407396086, "loss": 1.8478, "step": 18964 }, { "epoch": 0.63, "grad_norm": 0.43107178807258606, "learning_rate": 0.0005368600263403925, "loss": 1.8908, "step": 18965 }, { "epoch": 0.63, "grad_norm": 0.4318891167640686, "learning_rate": 0.0005368536116536633, "loss": 1.7671, "step": 18966 }, { "epoch": 0.63, "grad_norm": 0.4385095536708832, "learning_rate": 0.0005368471966794287, "loss": 1.824, "step": 18967 }, { "epoch": 0.63, "grad_norm": 0.43365252017974854, "learning_rate": 0.0005368407814176965, "loss": 1.8776, "step": 18968 }, { "epoch": 0.63, "grad_norm": 0.43704551458358765, "learning_rate": 0.0005368343658684744, "loss": 1.9381, "step": 18969 }, { "epoch": 0.63, "grad_norm": 0.43465128540992737, "learning_rate": 0.0005368279500317703, "loss": 1.8297, "step": 18970 }, { "epoch": 0.63, "grad_norm": 0.4154488444328308, "learning_rate": 0.000536821533907592, "loss": 1.8879, "step": 18971 }, { "epoch": 0.63, "grad_norm": 0.43096187710762024, "learning_rate": 0.0005368151174959473, "loss": 1.8619, "step": 18972 }, { "epoch": 0.63, "grad_norm": 0.42447376251220703, "learning_rate": 0.0005368087007968439, "loss": 1.8695, "step": 18973 }, { "epoch": 0.63, "grad_norm": 0.4217119514942169, "learning_rate": 0.0005368022838102896, "loss": 1.8603, "step": 18974 }, { "epoch": 0.63, "grad_norm": 0.43283510208129883, "learning_rate": 0.0005367958665362922, "loss": 1.8927, "step": 18975 }, { "epoch": 0.63, "grad_norm": 0.44318345189094543, "learning_rate": 0.0005367894489748597, "loss": 1.924, "step": 18976 }, { "epoch": 0.63, "grad_norm": 0.45013707876205444, "learning_rate": 0.0005367830311259993, "loss": 1.8224, "step": 18977 }, { "epoch": 0.63, "grad_norm": 0.4307398200035095, "learning_rate": 0.0005367766129897195, "loss": 1.825, "step": 18978 }, { "epoch": 0.63, "grad_norm": 0.41291019320487976, "learning_rate": 0.0005367701945660278, "loss": 1.7759, "step": 18979 }, { "epoch": 0.63, "grad_norm": 0.42371565103530884, "learning_rate": 0.0005367637758549317, "loss": 1.8379, "step": 18980 }, { "epoch": 0.63, "grad_norm": 0.4184201657772064, "learning_rate": 0.0005367573568564396, "loss": 1.8497, "step": 18981 }, { "epoch": 0.63, "grad_norm": 0.4258304238319397, "learning_rate": 0.0005367509375705587, "loss": 1.8265, "step": 18982 }, { "epoch": 0.63, "grad_norm": 0.45763248205184937, "learning_rate": 0.0005367445179972972, "loss": 1.8352, "step": 18983 }, { "epoch": 0.63, "grad_norm": 0.44961315393447876, "learning_rate": 0.0005367380981366627, "loss": 1.8358, "step": 18984 }, { "epoch": 0.63, "grad_norm": 0.4407831132411957, "learning_rate": 0.000536731677988663, "loss": 1.8002, "step": 18985 }, { "epoch": 0.63, "grad_norm": 0.4411596655845642, "learning_rate": 0.0005367252575533059, "loss": 1.8528, "step": 18986 }, { "epoch": 0.63, "grad_norm": 0.4349578022956848, "learning_rate": 0.0005367188368305992, "loss": 1.8215, "step": 18987 }, { "epoch": 0.63, "grad_norm": 1.330849289894104, "learning_rate": 0.0005367124158205509, "loss": 1.9426, "step": 18988 }, { "epoch": 0.63, "grad_norm": 0.4334946274757385, "learning_rate": 0.0005367059945231684, "loss": 1.9082, "step": 18989 }, { "epoch": 0.63, "grad_norm": 0.4485098123550415, "learning_rate": 0.0005366995729384598, "loss": 1.7972, "step": 18990 }, { "epoch": 0.63, "grad_norm": 0.4214193820953369, "learning_rate": 0.0005366931510664327, "loss": 1.8539, "step": 18991 }, { "epoch": 0.63, "grad_norm": 0.437459796667099, "learning_rate": 0.000536686728907095, "loss": 1.8068, "step": 18992 }, { "epoch": 0.63, "grad_norm": 0.438760906457901, "learning_rate": 0.0005366803064604547, "loss": 1.9038, "step": 18993 }, { "epoch": 0.63, "grad_norm": 0.46063897013664246, "learning_rate": 0.0005366738837265191, "loss": 1.9024, "step": 18994 }, { "epoch": 0.63, "grad_norm": 0.4282126724720001, "learning_rate": 0.0005366674607052964, "loss": 1.8193, "step": 18995 }, { "epoch": 0.63, "grad_norm": 0.44200918078422546, "learning_rate": 0.0005366610373967942, "loss": 1.8373, "step": 18996 }, { "epoch": 0.63, "grad_norm": 0.45097512006759644, "learning_rate": 0.0005366546138010206, "loss": 1.8581, "step": 18997 }, { "epoch": 0.63, "grad_norm": 0.44163280725479126, "learning_rate": 0.0005366481899179828, "loss": 1.8551, "step": 18998 }, { "epoch": 0.63, "grad_norm": 0.41939008235931396, "learning_rate": 0.0005366417657476892, "loss": 1.784, "step": 18999 }, { "epoch": 0.63, "grad_norm": 0.438454270362854, "learning_rate": 0.0005366353412901474, "loss": 1.9422, "step": 19000 }, { "epoch": 0.63, "grad_norm": 0.4352928698062897, "learning_rate": 0.0005366289165453651, "loss": 1.7726, "step": 19001 }, { "epoch": 0.63, "grad_norm": 0.4521993100643158, "learning_rate": 0.0005366224915133501, "loss": 1.8675, "step": 19002 }, { "epoch": 0.63, "grad_norm": 0.42161786556243896, "learning_rate": 0.0005366160661941105, "loss": 1.7796, "step": 19003 }, { "epoch": 0.63, "grad_norm": 0.4581996500492096, "learning_rate": 0.0005366096405876536, "loss": 1.8567, "step": 19004 }, { "epoch": 0.63, "grad_norm": 0.5056584477424622, "learning_rate": 0.0005366032146939876, "loss": 1.8774, "step": 19005 }, { "epoch": 0.63, "grad_norm": 0.4346347451210022, "learning_rate": 0.00053659678851312, "loss": 1.7713, "step": 19006 }, { "epoch": 0.63, "grad_norm": 0.44757261872291565, "learning_rate": 0.0005365903620450588, "loss": 1.8506, "step": 19007 }, { "epoch": 0.63, "grad_norm": 0.4776240289211273, "learning_rate": 0.0005365839352898119, "loss": 1.8508, "step": 19008 }, { "epoch": 0.63, "grad_norm": 0.5151570439338684, "learning_rate": 0.0005365775082473869, "loss": 1.8312, "step": 19009 }, { "epoch": 0.63, "grad_norm": 0.4352738559246063, "learning_rate": 0.0005365710809177916, "loss": 1.8012, "step": 19010 }, { "epoch": 0.63, "grad_norm": 0.4351431429386139, "learning_rate": 0.0005365646533010339, "loss": 1.8405, "step": 19011 }, { "epoch": 0.63, "grad_norm": 0.4485569894313812, "learning_rate": 0.0005365582253971217, "loss": 1.8657, "step": 19012 }, { "epoch": 0.63, "grad_norm": 0.45429399609565735, "learning_rate": 0.0005365517972060625, "loss": 1.8643, "step": 19013 }, { "epoch": 0.63, "grad_norm": 0.41447100043296814, "learning_rate": 0.0005365453687278642, "loss": 1.7894, "step": 19014 }, { "epoch": 0.63, "grad_norm": 0.4262515604496002, "learning_rate": 0.0005365389399625348, "loss": 1.8107, "step": 19015 }, { "epoch": 0.63, "grad_norm": 0.4476872980594635, "learning_rate": 0.000536532510910082, "loss": 1.8907, "step": 19016 }, { "epoch": 0.63, "grad_norm": 0.42554837465286255, "learning_rate": 0.0005365260815705135, "loss": 1.778, "step": 19017 }, { "epoch": 0.63, "grad_norm": 0.4270292818546295, "learning_rate": 0.0005365196519438373, "loss": 1.8049, "step": 19018 }, { "epoch": 0.63, "grad_norm": 0.42963075637817383, "learning_rate": 0.0005365132220300611, "loss": 1.8012, "step": 19019 }, { "epoch": 0.63, "grad_norm": 0.4367339015007019, "learning_rate": 0.0005365067918291926, "loss": 1.8915, "step": 19020 }, { "epoch": 0.63, "grad_norm": 0.438660591840744, "learning_rate": 0.0005365003613412398, "loss": 1.8052, "step": 19021 }, { "epoch": 0.63, "grad_norm": 0.433184415102005, "learning_rate": 0.0005364939305662102, "loss": 1.8425, "step": 19022 }, { "epoch": 0.63, "grad_norm": 0.4316580593585968, "learning_rate": 0.000536487499504112, "loss": 1.8676, "step": 19023 }, { "epoch": 0.63, "grad_norm": 0.423929899930954, "learning_rate": 0.0005364810681549528, "loss": 1.8311, "step": 19024 }, { "epoch": 0.63, "grad_norm": 0.438547283411026, "learning_rate": 0.0005364746365187404, "loss": 1.866, "step": 19025 }, { "epoch": 0.63, "grad_norm": 0.43122750520706177, "learning_rate": 0.0005364682045954826, "loss": 1.8351, "step": 19026 }, { "epoch": 0.63, "grad_norm": 0.42539453506469727, "learning_rate": 0.0005364617723851873, "loss": 1.8507, "step": 19027 }, { "epoch": 0.63, "grad_norm": 0.44941020011901855, "learning_rate": 0.0005364553398878622, "loss": 1.8006, "step": 19028 }, { "epoch": 0.63, "grad_norm": 0.4252969026565552, "learning_rate": 0.0005364489071035152, "loss": 1.8695, "step": 19029 }, { "epoch": 0.63, "grad_norm": 0.43536946177482605, "learning_rate": 0.0005364424740321541, "loss": 1.8805, "step": 19030 }, { "epoch": 0.63, "grad_norm": 0.42803072929382324, "learning_rate": 0.0005364360406737865, "loss": 1.766, "step": 19031 }, { "epoch": 0.63, "grad_norm": 0.42711082100868225, "learning_rate": 0.0005364296070284205, "loss": 1.8856, "step": 19032 }, { "epoch": 0.63, "grad_norm": 0.41832679510116577, "learning_rate": 0.0005364231730960639, "loss": 1.8685, "step": 19033 }, { "epoch": 0.63, "grad_norm": 0.4218008816242218, "learning_rate": 0.0005364167388767243, "loss": 1.8226, "step": 19034 }, { "epoch": 0.63, "grad_norm": 0.4100753366947174, "learning_rate": 0.0005364103043704096, "loss": 1.8897, "step": 19035 }, { "epoch": 0.63, "grad_norm": 0.4204835593700409, "learning_rate": 0.0005364038695771276, "loss": 1.863, "step": 19036 }, { "epoch": 0.63, "grad_norm": 0.441018670797348, "learning_rate": 0.0005363974344968863, "loss": 1.9165, "step": 19037 }, { "epoch": 0.63, "grad_norm": 0.4388655722141266, "learning_rate": 0.0005363909991296931, "loss": 1.8257, "step": 19038 }, { "epoch": 0.63, "grad_norm": 0.4351770877838135, "learning_rate": 0.0005363845634755562, "loss": 1.7992, "step": 19039 }, { "epoch": 0.63, "grad_norm": 0.43563807010650635, "learning_rate": 0.0005363781275344833, "loss": 1.8954, "step": 19040 }, { "epoch": 0.63, "grad_norm": 0.43395739793777466, "learning_rate": 0.0005363716913064821, "loss": 1.8565, "step": 19041 }, { "epoch": 0.63, "grad_norm": 0.4477514326572418, "learning_rate": 0.0005363652547915606, "loss": 1.809, "step": 19042 }, { "epoch": 0.63, "grad_norm": 0.41812893748283386, "learning_rate": 0.0005363588179897264, "loss": 1.8928, "step": 19043 }, { "epoch": 0.63, "grad_norm": 0.4715697765350342, "learning_rate": 0.0005363523809009874, "loss": 1.8727, "step": 19044 }, { "epoch": 0.63, "grad_norm": 0.44741618633270264, "learning_rate": 0.0005363459435253515, "loss": 1.8674, "step": 19045 }, { "epoch": 0.63, "grad_norm": 0.41862502694129944, "learning_rate": 0.0005363395058628265, "loss": 1.7938, "step": 19046 }, { "epoch": 0.63, "grad_norm": 0.44457900524139404, "learning_rate": 0.0005363330679134201, "loss": 1.8835, "step": 19047 }, { "epoch": 0.63, "grad_norm": 0.45591646432876587, "learning_rate": 0.0005363266296771401, "loss": 1.9052, "step": 19048 }, { "epoch": 0.63, "grad_norm": 0.4164997935295105, "learning_rate": 0.0005363201911539945, "loss": 1.8382, "step": 19049 }, { "epoch": 0.63, "grad_norm": 0.4199081361293793, "learning_rate": 0.000536313752343991, "loss": 1.7747, "step": 19050 }, { "epoch": 0.63, "grad_norm": 0.4398405849933624, "learning_rate": 0.0005363073132471374, "loss": 1.8898, "step": 19051 }, { "epoch": 0.63, "grad_norm": 0.4314925968647003, "learning_rate": 0.0005363008738634415, "loss": 1.85, "step": 19052 }, { "epoch": 0.63, "grad_norm": 0.42696720361709595, "learning_rate": 0.0005362944341929112, "loss": 1.8261, "step": 19053 }, { "epoch": 0.63, "grad_norm": 0.4274708032608032, "learning_rate": 0.0005362879942355543, "loss": 1.824, "step": 19054 }, { "epoch": 0.63, "grad_norm": 0.42437124252319336, "learning_rate": 0.0005362815539913786, "loss": 1.8255, "step": 19055 }, { "epoch": 0.63, "grad_norm": 0.4256325662136078, "learning_rate": 0.0005362751134603918, "loss": 1.7602, "step": 19056 }, { "epoch": 0.63, "grad_norm": 0.4391017556190491, "learning_rate": 0.000536268672642602, "loss": 1.8825, "step": 19057 }, { "epoch": 0.63, "grad_norm": 0.42922210693359375, "learning_rate": 0.0005362622315380166, "loss": 1.8045, "step": 19058 }, { "epoch": 0.63, "grad_norm": 0.46192988753318787, "learning_rate": 0.0005362557901466439, "loss": 1.8056, "step": 19059 }, { "epoch": 0.63, "grad_norm": 0.4354894757270813, "learning_rate": 0.0005362493484684914, "loss": 1.828, "step": 19060 }, { "epoch": 0.63, "grad_norm": 0.42885157465934753, "learning_rate": 0.0005362429065035669, "loss": 1.9096, "step": 19061 }, { "epoch": 0.63, "grad_norm": 0.4266047477722168, "learning_rate": 0.0005362364642518784, "loss": 1.8577, "step": 19062 }, { "epoch": 0.63, "grad_norm": 0.6125790476799011, "learning_rate": 0.0005362300217134337, "loss": 1.8489, "step": 19063 }, { "epoch": 0.63, "grad_norm": 0.41105636954307556, "learning_rate": 0.0005362235788882405, "loss": 1.873, "step": 19064 }, { "epoch": 0.63, "grad_norm": 0.41614627838134766, "learning_rate": 0.0005362171357763067, "loss": 1.7777, "step": 19065 }, { "epoch": 0.63, "grad_norm": 0.42559242248535156, "learning_rate": 0.0005362106923776401, "loss": 1.8677, "step": 19066 }, { "epoch": 0.63, "grad_norm": 0.44589507579803467, "learning_rate": 0.0005362042486922485, "loss": 1.7867, "step": 19067 }, { "epoch": 0.63, "grad_norm": 0.4337685704231262, "learning_rate": 0.0005361978047201398, "loss": 1.8231, "step": 19068 }, { "epoch": 0.63, "grad_norm": 0.43100425601005554, "learning_rate": 0.0005361913604613217, "loss": 1.7697, "step": 19069 }, { "epoch": 0.63, "grad_norm": 0.4264790415763855, "learning_rate": 0.0005361849159158021, "loss": 1.8768, "step": 19070 }, { "epoch": 0.63, "grad_norm": 0.4365420937538147, "learning_rate": 0.0005361784710835888, "loss": 1.9106, "step": 19071 }, { "epoch": 0.63, "grad_norm": 0.4331054985523224, "learning_rate": 0.0005361720259646897, "loss": 1.8841, "step": 19072 }, { "epoch": 0.63, "grad_norm": 0.4162275791168213, "learning_rate": 0.0005361655805591125, "loss": 1.7773, "step": 19073 }, { "epoch": 0.63, "grad_norm": 0.44472578167915344, "learning_rate": 0.0005361591348668651, "loss": 1.8663, "step": 19074 }, { "epoch": 0.63, "grad_norm": 0.4169731140136719, "learning_rate": 0.0005361526888879554, "loss": 1.8833, "step": 19075 }, { "epoch": 0.63, "grad_norm": 0.4199233949184418, "learning_rate": 0.000536146242622391, "loss": 1.8823, "step": 19076 }, { "epoch": 0.63, "grad_norm": 1.2542393207550049, "learning_rate": 0.0005361397960701799, "loss": 1.8589, "step": 19077 }, { "epoch": 0.63, "grad_norm": 0.43731218576431274, "learning_rate": 0.0005361333492313299, "loss": 1.8396, "step": 19078 }, { "epoch": 0.63, "grad_norm": 0.4337296187877655, "learning_rate": 0.0005361269021058487, "loss": 1.836, "step": 19079 }, { "epoch": 0.63, "grad_norm": 0.7085175514221191, "learning_rate": 0.0005361204546937443, "loss": 1.8856, "step": 19080 }, { "epoch": 0.63, "grad_norm": 0.42409294843673706, "learning_rate": 0.0005361140069950245, "loss": 1.8522, "step": 19081 }, { "epoch": 0.63, "grad_norm": 0.44111546874046326, "learning_rate": 0.0005361075590096971, "loss": 1.8749, "step": 19082 }, { "epoch": 0.63, "grad_norm": 0.4537266194820404, "learning_rate": 0.0005361011107377698, "loss": 1.8701, "step": 19083 }, { "epoch": 0.63, "grad_norm": 0.4238896071910858, "learning_rate": 0.0005360946621792506, "loss": 1.8355, "step": 19084 }, { "epoch": 0.63, "grad_norm": 0.42415568232536316, "learning_rate": 0.0005360882133341474, "loss": 1.7967, "step": 19085 }, { "epoch": 0.63, "grad_norm": 0.4380486309528351, "learning_rate": 0.0005360817642024677, "loss": 1.7985, "step": 19086 }, { "epoch": 0.64, "grad_norm": 0.44146716594696045, "learning_rate": 0.0005360753147842196, "loss": 1.8051, "step": 19087 }, { "epoch": 0.64, "grad_norm": 0.4309752285480499, "learning_rate": 0.0005360688650794109, "loss": 1.8433, "step": 19088 }, { "epoch": 0.64, "grad_norm": 0.4303102195262909, "learning_rate": 0.0005360624150880493, "loss": 1.8052, "step": 19089 }, { "epoch": 0.64, "grad_norm": 0.42879536747932434, "learning_rate": 0.0005360559648101427, "loss": 1.7332, "step": 19090 }, { "epoch": 0.64, "grad_norm": 0.43003806471824646, "learning_rate": 0.000536049514245699, "loss": 1.7977, "step": 19091 }, { "epoch": 0.64, "grad_norm": 0.4341285228729248, "learning_rate": 0.0005360430633947259, "loss": 1.81, "step": 19092 }, { "epoch": 0.64, "grad_norm": 0.4296919107437134, "learning_rate": 0.0005360366122572315, "loss": 1.8799, "step": 19093 }, { "epoch": 0.64, "grad_norm": 0.44122618436813354, "learning_rate": 0.0005360301608332232, "loss": 1.7985, "step": 19094 }, { "epoch": 0.64, "grad_norm": 0.4250083565711975, "learning_rate": 0.0005360237091227092, "loss": 1.8851, "step": 19095 }, { "epoch": 0.64, "grad_norm": 0.4564439058303833, "learning_rate": 0.0005360172571256971, "loss": 1.8706, "step": 19096 }, { "epoch": 0.64, "grad_norm": 0.4369330406188965, "learning_rate": 0.0005360108048421949, "loss": 1.8081, "step": 19097 }, { "epoch": 0.64, "grad_norm": 0.41481804847717285, "learning_rate": 0.0005360043522722103, "loss": 1.8719, "step": 19098 }, { "epoch": 0.64, "grad_norm": 0.42609068751335144, "learning_rate": 0.0005359978994157513, "loss": 1.8199, "step": 19099 }, { "epoch": 0.64, "grad_norm": 0.4282873868942261, "learning_rate": 0.0005359914462728254, "loss": 1.8316, "step": 19100 }, { "epoch": 0.64, "grad_norm": 0.4377647340297699, "learning_rate": 0.0005359849928434408, "loss": 1.8192, "step": 19101 }, { "epoch": 0.64, "grad_norm": 0.44156813621520996, "learning_rate": 0.0005359785391276052, "loss": 1.8731, "step": 19102 }, { "epoch": 0.64, "grad_norm": 0.4394984841346741, "learning_rate": 0.0005359720851253264, "loss": 1.8221, "step": 19103 }, { "epoch": 0.64, "grad_norm": 0.4128917455673218, "learning_rate": 0.0005359656308366122, "loss": 1.7699, "step": 19104 }, { "epoch": 0.64, "grad_norm": 0.43712788820266724, "learning_rate": 0.0005359591762614707, "loss": 1.8726, "step": 19105 }, { "epoch": 0.64, "grad_norm": 0.42822226881980896, "learning_rate": 0.0005359527213999093, "loss": 1.8635, "step": 19106 }, { "epoch": 0.64, "grad_norm": 0.8428131937980652, "learning_rate": 0.0005359462662519361, "loss": 1.9151, "step": 19107 }, { "epoch": 0.64, "grad_norm": 0.4310707747936249, "learning_rate": 0.0005359398108175591, "loss": 1.8516, "step": 19108 }, { "epoch": 0.64, "grad_norm": 0.46661943197250366, "learning_rate": 0.0005359333550967857, "loss": 1.9639, "step": 19109 }, { "epoch": 0.64, "grad_norm": 0.4267575442790985, "learning_rate": 0.000535926899089624, "loss": 1.8075, "step": 19110 }, { "epoch": 0.64, "grad_norm": 0.4308350682258606, "learning_rate": 0.0005359204427960819, "loss": 1.9169, "step": 19111 }, { "epoch": 0.64, "grad_norm": 0.4191274642944336, "learning_rate": 0.0005359139862161671, "loss": 1.7743, "step": 19112 }, { "epoch": 0.64, "grad_norm": 0.43332186341285706, "learning_rate": 0.0005359075293498875, "loss": 1.8894, "step": 19113 }, { "epoch": 0.64, "grad_norm": 0.4246678352355957, "learning_rate": 0.0005359010721972508, "loss": 1.8389, "step": 19114 }, { "epoch": 0.64, "grad_norm": 0.42444419860839844, "learning_rate": 0.0005358946147582651, "loss": 1.8044, "step": 19115 }, { "epoch": 0.64, "grad_norm": 0.4249272644519806, "learning_rate": 0.000535888157032938, "loss": 1.9134, "step": 19116 }, { "epoch": 0.64, "grad_norm": 0.4340013861656189, "learning_rate": 0.0005358816990212775, "loss": 1.9137, "step": 19117 }, { "epoch": 0.64, "grad_norm": 0.4150760769844055, "learning_rate": 0.0005358752407232913, "loss": 1.8215, "step": 19118 }, { "epoch": 0.64, "grad_norm": 0.4219599664211273, "learning_rate": 0.0005358687821389875, "loss": 1.8088, "step": 19119 }, { "epoch": 0.64, "grad_norm": 0.43506887555122375, "learning_rate": 0.0005358623232683735, "loss": 1.8515, "step": 19120 }, { "epoch": 0.64, "grad_norm": 0.42658066749572754, "learning_rate": 0.0005358558641114575, "loss": 1.8067, "step": 19121 }, { "epoch": 0.64, "grad_norm": 0.44224822521209717, "learning_rate": 0.0005358494046682473, "loss": 1.7806, "step": 19122 }, { "epoch": 0.64, "grad_norm": 0.438812255859375, "learning_rate": 0.0005358429449387506, "loss": 1.7956, "step": 19123 }, { "epoch": 0.64, "grad_norm": 0.4231714606285095, "learning_rate": 0.0005358364849229753, "loss": 1.8175, "step": 19124 }, { "epoch": 0.64, "grad_norm": 0.4237048923969269, "learning_rate": 0.0005358300246209294, "loss": 1.8108, "step": 19125 }, { "epoch": 0.64, "grad_norm": 0.43221813440322876, "learning_rate": 0.0005358235640326205, "loss": 1.812, "step": 19126 }, { "epoch": 0.64, "grad_norm": 0.435178279876709, "learning_rate": 0.0005358171031580565, "loss": 1.8165, "step": 19127 }, { "epoch": 0.64, "grad_norm": 0.44356176257133484, "learning_rate": 0.0005358106419972452, "loss": 1.7842, "step": 19128 }, { "epoch": 0.64, "grad_norm": 0.4324283301830292, "learning_rate": 0.0005358041805501947, "loss": 1.8474, "step": 19129 }, { "epoch": 0.64, "grad_norm": 0.433836966753006, "learning_rate": 0.0005357977188169126, "loss": 1.8636, "step": 19130 }, { "epoch": 0.64, "grad_norm": 0.4555802345275879, "learning_rate": 0.0005357912567974068, "loss": 1.8514, "step": 19131 }, { "epoch": 0.64, "grad_norm": 0.4125802516937256, "learning_rate": 0.0005357847944916852, "loss": 1.8415, "step": 19132 }, { "epoch": 0.64, "grad_norm": 0.44909095764160156, "learning_rate": 0.0005357783318997555, "loss": 1.876, "step": 19133 }, { "epoch": 0.64, "grad_norm": 0.4106493890285492, "learning_rate": 0.0005357718690216258, "loss": 1.7942, "step": 19134 }, { "epoch": 0.64, "grad_norm": 0.45678359270095825, "learning_rate": 0.0005357654058573036, "loss": 1.8152, "step": 19135 }, { "epoch": 0.64, "grad_norm": 0.42961418628692627, "learning_rate": 0.0005357589424067971, "loss": 1.8499, "step": 19136 }, { "epoch": 0.64, "grad_norm": 0.4253241717815399, "learning_rate": 0.000535752478670114, "loss": 1.8985, "step": 19137 }, { "epoch": 0.64, "grad_norm": 0.4318019151687622, "learning_rate": 0.000535746014647262, "loss": 1.8273, "step": 19138 }, { "epoch": 0.64, "grad_norm": 0.43424397706985474, "learning_rate": 0.0005357395503382491, "loss": 1.8561, "step": 19139 }, { "epoch": 0.64, "grad_norm": 0.44931918382644653, "learning_rate": 0.0005357330857430831, "loss": 1.8702, "step": 19140 }, { "epoch": 0.64, "grad_norm": 0.42353326082229614, "learning_rate": 0.000535726620861772, "loss": 1.8192, "step": 19141 }, { "epoch": 0.64, "grad_norm": 0.42008471488952637, "learning_rate": 0.0005357201556943234, "loss": 1.9106, "step": 19142 }, { "epoch": 0.64, "grad_norm": 0.4173583388328552, "learning_rate": 0.0005357136902407453, "loss": 1.7762, "step": 19143 }, { "epoch": 0.64, "grad_norm": 0.43559861183166504, "learning_rate": 0.0005357072245010455, "loss": 1.9346, "step": 19144 }, { "epoch": 0.64, "grad_norm": 0.42984381318092346, "learning_rate": 0.0005357007584752319, "loss": 1.7126, "step": 19145 }, { "epoch": 0.64, "grad_norm": 0.40994352102279663, "learning_rate": 0.0005356942921633122, "loss": 1.859, "step": 19146 }, { "epoch": 0.64, "grad_norm": 0.44357627630233765, "learning_rate": 0.0005356878255652944, "loss": 1.8897, "step": 19147 }, { "epoch": 0.64, "grad_norm": 0.429650217294693, "learning_rate": 0.0005356813586811863, "loss": 1.8017, "step": 19148 }, { "epoch": 0.64, "grad_norm": 0.4403252601623535, "learning_rate": 0.0005356748915109959, "loss": 1.804, "step": 19149 }, { "epoch": 0.64, "grad_norm": 0.44097378849983215, "learning_rate": 0.0005356684240547308, "loss": 1.8191, "step": 19150 }, { "epoch": 0.64, "grad_norm": 0.42965948581695557, "learning_rate": 0.000535661956312399, "loss": 1.8532, "step": 19151 }, { "epoch": 0.64, "grad_norm": 0.42763110995292664, "learning_rate": 0.0005356554882840083, "loss": 1.8458, "step": 19152 }, { "epoch": 0.64, "grad_norm": 0.437694787979126, "learning_rate": 0.0005356490199695664, "loss": 1.9119, "step": 19153 }, { "epoch": 0.64, "grad_norm": 0.43960872292518616, "learning_rate": 0.0005356425513690815, "loss": 1.8202, "step": 19154 }, { "epoch": 0.64, "grad_norm": 0.4296695590019226, "learning_rate": 0.0005356360824825612, "loss": 1.8863, "step": 19155 }, { "epoch": 0.64, "grad_norm": 0.4308547377586365, "learning_rate": 0.0005356296133100134, "loss": 1.7722, "step": 19156 }, { "epoch": 0.64, "grad_norm": 0.42477041482925415, "learning_rate": 0.000535623143851446, "loss": 1.8875, "step": 19157 }, { "epoch": 0.64, "grad_norm": 0.43150654435157776, "learning_rate": 0.0005356166741068667, "loss": 1.8336, "step": 19158 }, { "epoch": 0.64, "grad_norm": 0.4627658426761627, "learning_rate": 0.0005356102040762837, "loss": 1.7806, "step": 19159 }, { "epoch": 0.64, "grad_norm": 0.4202214777469635, "learning_rate": 0.0005356037337597044, "loss": 1.8214, "step": 19160 }, { "epoch": 0.64, "grad_norm": 0.4250571131706238, "learning_rate": 0.000535597263157137, "loss": 1.8415, "step": 19161 }, { "epoch": 0.64, "grad_norm": 0.44076788425445557, "learning_rate": 0.0005355907922685892, "loss": 1.7894, "step": 19162 }, { "epoch": 0.64, "grad_norm": 0.440878689289093, "learning_rate": 0.000535584321094069, "loss": 1.8221, "step": 19163 }, { "epoch": 0.64, "grad_norm": 0.45145365595817566, "learning_rate": 0.0005355778496335839, "loss": 1.8477, "step": 19164 }, { "epoch": 0.64, "grad_norm": 0.43693265318870544, "learning_rate": 0.0005355713778871422, "loss": 1.8182, "step": 19165 }, { "epoch": 0.64, "grad_norm": 0.4217476546764374, "learning_rate": 0.0005355649058547515, "loss": 1.7764, "step": 19166 }, { "epoch": 0.64, "grad_norm": 0.4436999559402466, "learning_rate": 0.0005355584335364198, "loss": 1.8346, "step": 19167 }, { "epoch": 0.64, "grad_norm": 0.4397556483745575, "learning_rate": 0.0005355519609321548, "loss": 1.8474, "step": 19168 }, { "epoch": 0.64, "grad_norm": 0.43511685729026794, "learning_rate": 0.0005355454880419644, "loss": 1.871, "step": 19169 }, { "epoch": 0.64, "grad_norm": 0.44039830565452576, "learning_rate": 0.0005355390148658565, "loss": 1.8728, "step": 19170 }, { "epoch": 0.64, "grad_norm": 0.4287404716014862, "learning_rate": 0.000535532541403839, "loss": 1.8092, "step": 19171 }, { "epoch": 0.64, "grad_norm": 0.4254741072654724, "learning_rate": 0.0005355260676559195, "loss": 1.8547, "step": 19172 }, { "epoch": 0.64, "grad_norm": 0.45195019245147705, "learning_rate": 0.0005355195936221064, "loss": 1.9822, "step": 19173 }, { "epoch": 0.64, "grad_norm": 0.4356512129306793, "learning_rate": 0.000535513119302407, "loss": 1.8084, "step": 19174 }, { "epoch": 0.64, "grad_norm": 0.44626039266586304, "learning_rate": 0.0005355066446968293, "loss": 1.7971, "step": 19175 }, { "epoch": 0.64, "grad_norm": 0.45792901515960693, "learning_rate": 0.0005355001698053814, "loss": 1.8731, "step": 19176 }, { "epoch": 0.64, "grad_norm": 0.42912110686302185, "learning_rate": 0.0005354936946280708, "loss": 1.8072, "step": 19177 }, { "epoch": 0.64, "grad_norm": 0.4442174434661865, "learning_rate": 0.0005354872191649057, "loss": 1.8761, "step": 19178 }, { "epoch": 0.64, "grad_norm": 0.41988784074783325, "learning_rate": 0.0005354807434158938, "loss": 1.8537, "step": 19179 }, { "epoch": 0.64, "grad_norm": 0.433244913816452, "learning_rate": 0.000535474267381043, "loss": 1.9169, "step": 19180 }, { "epoch": 0.64, "grad_norm": 0.42772918939590454, "learning_rate": 0.0005354677910603612, "loss": 1.9032, "step": 19181 }, { "epoch": 0.64, "grad_norm": 0.42813989520072937, "learning_rate": 0.0005354613144538561, "loss": 1.8934, "step": 19182 }, { "epoch": 0.64, "grad_norm": 0.43343862891197205, "learning_rate": 0.0005354548375615355, "loss": 1.8811, "step": 19183 }, { "epoch": 0.64, "grad_norm": 0.44782522320747375, "learning_rate": 0.0005354483603834076, "loss": 1.8239, "step": 19184 }, { "epoch": 0.64, "grad_norm": 0.4305383861064911, "learning_rate": 0.00053544188291948, "loss": 1.8812, "step": 19185 }, { "epoch": 0.64, "grad_norm": 0.41144511103630066, "learning_rate": 0.0005354354051697608, "loss": 1.8663, "step": 19186 }, { "epoch": 0.64, "grad_norm": 0.42688924074172974, "learning_rate": 0.0005354289271342575, "loss": 1.8218, "step": 19187 }, { "epoch": 0.64, "grad_norm": 0.40893232822418213, "learning_rate": 0.0005354224488129783, "loss": 1.7939, "step": 19188 }, { "epoch": 0.64, "grad_norm": 0.4229283332824707, "learning_rate": 0.0005354159702059309, "loss": 1.7997, "step": 19189 }, { "epoch": 0.64, "grad_norm": 0.42594239115715027, "learning_rate": 0.0005354094913131234, "loss": 1.8435, "step": 19190 }, { "epoch": 0.64, "grad_norm": 0.42267125844955444, "learning_rate": 0.0005354030121345632, "loss": 1.8249, "step": 19191 }, { "epoch": 0.64, "grad_norm": 0.42312535643577576, "learning_rate": 0.0005353965326702584, "loss": 1.8767, "step": 19192 }, { "epoch": 0.64, "grad_norm": 0.405153751373291, "learning_rate": 0.000535390052920217, "loss": 1.8295, "step": 19193 }, { "epoch": 0.64, "grad_norm": 0.4231431782245636, "learning_rate": 0.0005353835728844468, "loss": 1.8911, "step": 19194 }, { "epoch": 0.64, "grad_norm": 0.43113839626312256, "learning_rate": 0.0005353770925629555, "loss": 1.8728, "step": 19195 }, { "epoch": 0.64, "grad_norm": 0.4309438467025757, "learning_rate": 0.0005353706119557512, "loss": 1.8443, "step": 19196 }, { "epoch": 0.64, "grad_norm": 0.42653390765190125, "learning_rate": 0.0005353641310628417, "loss": 1.8355, "step": 19197 }, { "epoch": 0.64, "grad_norm": 0.42763277888298035, "learning_rate": 0.0005353576498842347, "loss": 1.8536, "step": 19198 }, { "epoch": 0.64, "grad_norm": 0.43397819995880127, "learning_rate": 0.0005353511684199383, "loss": 1.9447, "step": 19199 }, { "epoch": 0.64, "grad_norm": 0.4474031925201416, "learning_rate": 0.0005353446866699601, "loss": 1.8241, "step": 19200 }, { "epoch": 0.64, "grad_norm": 0.41988497972488403, "learning_rate": 0.0005353382046343083, "loss": 1.9241, "step": 19201 }, { "epoch": 0.64, "grad_norm": 0.4416735768318176, "learning_rate": 0.0005353317223129905, "loss": 1.8183, "step": 19202 }, { "epoch": 0.64, "grad_norm": 0.42425811290740967, "learning_rate": 0.0005353252397060147, "loss": 1.8897, "step": 19203 }, { "epoch": 0.64, "grad_norm": 0.4234631359577179, "learning_rate": 0.0005353187568133888, "loss": 1.8558, "step": 19204 }, { "epoch": 0.64, "grad_norm": 0.4180057644844055, "learning_rate": 0.0005353122736351204, "loss": 1.8946, "step": 19205 }, { "epoch": 0.64, "grad_norm": 0.4306269884109497, "learning_rate": 0.0005353057901712177, "loss": 1.8933, "step": 19206 }, { "epoch": 0.64, "grad_norm": 0.4252813458442688, "learning_rate": 0.0005352993064216884, "loss": 1.7882, "step": 19207 }, { "epoch": 0.64, "grad_norm": 0.4463161528110504, "learning_rate": 0.0005352928223865405, "loss": 1.8372, "step": 19208 }, { "epoch": 0.64, "grad_norm": 0.4385070204734802, "learning_rate": 0.0005352863380657817, "loss": 1.8134, "step": 19209 }, { "epoch": 0.64, "grad_norm": 0.4188264012336731, "learning_rate": 0.00053527985345942, "loss": 1.8897, "step": 19210 }, { "epoch": 0.64, "grad_norm": 0.43819427490234375, "learning_rate": 0.0005352733685674632, "loss": 1.8783, "step": 19211 }, { "epoch": 0.64, "grad_norm": 0.44870442152023315, "learning_rate": 0.0005352668833899193, "loss": 1.7872, "step": 19212 }, { "epoch": 0.64, "grad_norm": 0.4302687346935272, "learning_rate": 0.000535260397926796, "loss": 1.9161, "step": 19213 }, { "epoch": 0.64, "grad_norm": 0.42553260922431946, "learning_rate": 0.0005352539121781011, "loss": 1.8535, "step": 19214 }, { "epoch": 0.64, "grad_norm": 0.4410904347896576, "learning_rate": 0.0005352474261438427, "loss": 1.8307, "step": 19215 }, { "epoch": 0.64, "grad_norm": 0.422315388917923, "learning_rate": 0.0005352409398240286, "loss": 1.8503, "step": 19216 }, { "epoch": 0.64, "grad_norm": 0.4212471842765808, "learning_rate": 0.0005352344532186667, "loss": 1.7802, "step": 19217 }, { "epoch": 0.64, "grad_norm": 0.4301930069923401, "learning_rate": 0.0005352279663277648, "loss": 1.7957, "step": 19218 }, { "epoch": 0.64, "grad_norm": 0.42399072647094727, "learning_rate": 0.0005352214791513308, "loss": 1.8448, "step": 19219 }, { "epoch": 0.64, "grad_norm": 0.42527320981025696, "learning_rate": 0.0005352149916893727, "loss": 1.8488, "step": 19220 }, { "epoch": 0.64, "grad_norm": 0.42274990677833557, "learning_rate": 0.0005352085039418982, "loss": 1.8378, "step": 19221 }, { "epoch": 0.64, "grad_norm": 0.4299333989620209, "learning_rate": 0.0005352020159089151, "loss": 1.8158, "step": 19222 }, { "epoch": 0.64, "grad_norm": 0.4459965229034424, "learning_rate": 0.0005351955275904314, "loss": 1.8524, "step": 19223 }, { "epoch": 0.64, "grad_norm": 0.4119117856025696, "learning_rate": 0.0005351890389864552, "loss": 1.827, "step": 19224 }, { "epoch": 0.64, "grad_norm": 0.42971277236938477, "learning_rate": 0.000535182550096994, "loss": 1.8325, "step": 19225 }, { "epoch": 0.64, "grad_norm": 0.4297665059566498, "learning_rate": 0.0005351760609220559, "loss": 1.8418, "step": 19226 }, { "epoch": 0.64, "grad_norm": 0.43724143505096436, "learning_rate": 0.0005351695714616487, "loss": 1.8237, "step": 19227 }, { "epoch": 0.64, "grad_norm": 0.4595589339733124, "learning_rate": 0.0005351630817157803, "loss": 1.8528, "step": 19228 }, { "epoch": 0.64, "grad_norm": 0.4163261353969574, "learning_rate": 0.0005351565916844585, "loss": 1.8723, "step": 19229 }, { "epoch": 0.64, "grad_norm": 0.46522271633148193, "learning_rate": 0.0005351501013676912, "loss": 1.8524, "step": 19230 }, { "epoch": 0.64, "grad_norm": 0.4326495826244354, "learning_rate": 0.0005351436107654865, "loss": 1.8257, "step": 19231 }, { "epoch": 0.64, "grad_norm": 0.43452632427215576, "learning_rate": 0.0005351371198778521, "loss": 1.7464, "step": 19232 }, { "epoch": 0.64, "grad_norm": 0.42149192094802856, "learning_rate": 0.0005351306287047958, "loss": 1.8103, "step": 19233 }, { "epoch": 0.64, "grad_norm": 0.44443705677986145, "learning_rate": 0.0005351241372463255, "loss": 1.8849, "step": 19234 }, { "epoch": 0.64, "grad_norm": 0.44328901171684265, "learning_rate": 0.0005351176455024492, "loss": 1.8491, "step": 19235 }, { "epoch": 0.64, "grad_norm": 0.42331674695014954, "learning_rate": 0.0005351111534731748, "loss": 1.9077, "step": 19236 }, { "epoch": 0.64, "grad_norm": 0.41543182730674744, "learning_rate": 0.00053510466115851, "loss": 1.8084, "step": 19237 }, { "epoch": 0.64, "grad_norm": 0.4440794885158539, "learning_rate": 0.0005350981685584628, "loss": 1.8627, "step": 19238 }, { "epoch": 0.64, "grad_norm": 0.4514015316963196, "learning_rate": 0.0005350916756730411, "loss": 1.8368, "step": 19239 }, { "epoch": 0.64, "grad_norm": 0.42103007435798645, "learning_rate": 0.0005350851825022527, "loss": 1.859, "step": 19240 }, { "epoch": 0.64, "grad_norm": 0.4267968237400055, "learning_rate": 0.0005350786890461056, "loss": 1.9289, "step": 19241 }, { "epoch": 0.64, "grad_norm": 0.4351463317871094, "learning_rate": 0.0005350721953046075, "loss": 1.8546, "step": 19242 }, { "epoch": 0.64, "grad_norm": 0.44617539644241333, "learning_rate": 0.0005350657012777665, "loss": 1.8542, "step": 19243 }, { "epoch": 0.64, "grad_norm": 0.4317740201950073, "learning_rate": 0.0005350592069655903, "loss": 1.8609, "step": 19244 }, { "epoch": 0.64, "grad_norm": 0.4429931044578552, "learning_rate": 0.0005350527123680869, "loss": 1.9136, "step": 19245 }, { "epoch": 0.64, "grad_norm": 0.4435403645038605, "learning_rate": 0.0005350462174852641, "loss": 1.8156, "step": 19246 }, { "epoch": 0.64, "grad_norm": 0.4607568681240082, "learning_rate": 0.0005350397223171299, "loss": 1.8201, "step": 19247 }, { "epoch": 0.64, "grad_norm": 0.4185953438282013, "learning_rate": 0.000535033226863692, "loss": 1.8419, "step": 19248 }, { "epoch": 0.64, "grad_norm": 0.42311158776283264, "learning_rate": 0.0005350267311249584, "loss": 1.857, "step": 19249 }, { "epoch": 0.64, "grad_norm": 0.44770529866218567, "learning_rate": 0.0005350202351009371, "loss": 1.8547, "step": 19250 }, { "epoch": 0.64, "grad_norm": 0.45818108320236206, "learning_rate": 0.0005350137387916357, "loss": 1.8513, "step": 19251 }, { "epoch": 0.64, "grad_norm": 0.4327790141105652, "learning_rate": 0.0005350072421970624, "loss": 1.8255, "step": 19252 }, { "epoch": 0.64, "grad_norm": 0.4224204123020172, "learning_rate": 0.0005350007453172248, "loss": 1.8409, "step": 19253 }, { "epoch": 0.64, "grad_norm": 0.4496234059333801, "learning_rate": 0.000534994248152131, "loss": 1.8755, "step": 19254 }, { "epoch": 0.64, "grad_norm": 0.42854538559913635, "learning_rate": 0.0005349877507017888, "loss": 1.8708, "step": 19255 }, { "epoch": 0.64, "grad_norm": 0.42034396529197693, "learning_rate": 0.0005349812529662062, "loss": 1.8516, "step": 19256 }, { "epoch": 0.64, "grad_norm": 0.4529024064540863, "learning_rate": 0.0005349747549453908, "loss": 1.8035, "step": 19257 }, { "epoch": 0.64, "grad_norm": 0.4645957350730896, "learning_rate": 0.0005349682566393508, "loss": 1.8134, "step": 19258 }, { "epoch": 0.64, "grad_norm": 0.432676762342453, "learning_rate": 0.0005349617580480938, "loss": 1.7983, "step": 19259 }, { "epoch": 0.64, "grad_norm": 0.4593525826931, "learning_rate": 0.000534955259171628, "loss": 1.8502, "step": 19260 }, { "epoch": 0.64, "grad_norm": 0.4331851899623871, "learning_rate": 0.000534948760009961, "loss": 1.7883, "step": 19261 }, { "epoch": 0.64, "grad_norm": 0.4564131498336792, "learning_rate": 0.000534942260563101, "loss": 1.853, "step": 19262 }, { "epoch": 0.64, "grad_norm": 0.41667303442955017, "learning_rate": 0.0005349357608310557, "loss": 1.9325, "step": 19263 }, { "epoch": 0.64, "grad_norm": 0.45711687207221985, "learning_rate": 0.0005349292608138328, "loss": 1.856, "step": 19264 }, { "epoch": 0.64, "grad_norm": 0.42783695459365845, "learning_rate": 0.0005349227605114406, "loss": 1.8801, "step": 19265 }, { "epoch": 0.64, "grad_norm": 0.4271656274795532, "learning_rate": 0.0005349162599238866, "loss": 1.8242, "step": 19266 }, { "epoch": 0.64, "grad_norm": 0.4432980418205261, "learning_rate": 0.000534909759051179, "loss": 1.8803, "step": 19267 }, { "epoch": 0.64, "grad_norm": 0.43308186531066895, "learning_rate": 0.0005349032578933256, "loss": 1.8443, "step": 19268 }, { "epoch": 0.64, "grad_norm": 0.4344536364078522, "learning_rate": 0.0005348967564503341, "loss": 1.8997, "step": 19269 }, { "epoch": 0.64, "grad_norm": 0.47402915358543396, "learning_rate": 0.0005348902547222126, "loss": 1.8208, "step": 19270 }, { "epoch": 0.64, "grad_norm": 0.4795972406864166, "learning_rate": 0.0005348837527089692, "loss": 1.8669, "step": 19271 }, { "epoch": 0.64, "grad_norm": 0.42554861307144165, "learning_rate": 0.0005348772504106112, "loss": 1.8471, "step": 19272 }, { "epoch": 0.64, "grad_norm": 0.4517807066440582, "learning_rate": 0.0005348707478271469, "loss": 1.8924, "step": 19273 }, { "epoch": 0.64, "grad_norm": 0.45776405930519104, "learning_rate": 0.0005348642449585841, "loss": 1.7845, "step": 19274 }, { "epoch": 0.64, "grad_norm": 0.4570860266685486, "learning_rate": 0.0005348577418049309, "loss": 1.7973, "step": 19275 }, { "epoch": 0.64, "grad_norm": 0.42088010907173157, "learning_rate": 0.0005348512383661949, "loss": 1.8417, "step": 19276 }, { "epoch": 0.64, "grad_norm": 0.44134849309921265, "learning_rate": 0.0005348447346423841, "loss": 1.847, "step": 19277 }, { "epoch": 0.64, "grad_norm": 0.4320332705974579, "learning_rate": 0.0005348382306335063, "loss": 1.7792, "step": 19278 }, { "epoch": 0.64, "grad_norm": 0.4590950012207031, "learning_rate": 0.0005348317263395696, "loss": 1.8747, "step": 19279 }, { "epoch": 0.64, "grad_norm": 0.4263657331466675, "learning_rate": 0.0005348252217605816, "loss": 1.7906, "step": 19280 }, { "epoch": 0.64, "grad_norm": 0.42778536677360535, "learning_rate": 0.0005348187168965505, "loss": 1.8522, "step": 19281 }, { "epoch": 0.64, "grad_norm": 0.4185959994792938, "learning_rate": 0.0005348122117474842, "loss": 1.8583, "step": 19282 }, { "epoch": 0.64, "grad_norm": 0.4312124252319336, "learning_rate": 0.0005348057063133904, "loss": 1.8939, "step": 19283 }, { "epoch": 0.64, "grad_norm": 0.4347156286239624, "learning_rate": 0.0005347992005942771, "loss": 1.8423, "step": 19284 }, { "epoch": 0.64, "grad_norm": 0.4405059516429901, "learning_rate": 0.0005347926945901521, "loss": 1.8129, "step": 19285 }, { "epoch": 0.64, "grad_norm": 0.4225256145000458, "learning_rate": 0.0005347861883010233, "loss": 1.8557, "step": 19286 }, { "epoch": 0.64, "grad_norm": 0.4399254322052002, "learning_rate": 0.0005347796817268988, "loss": 1.8333, "step": 19287 }, { "epoch": 0.64, "grad_norm": 0.47099727392196655, "learning_rate": 0.0005347731748677863, "loss": 1.8136, "step": 19288 }, { "epoch": 0.64, "grad_norm": 0.4426063597202301, "learning_rate": 0.0005347666677236937, "loss": 1.8406, "step": 19289 }, { "epoch": 0.64, "grad_norm": 0.4344702661037445, "learning_rate": 0.000534760160294629, "loss": 1.9009, "step": 19290 }, { "epoch": 0.64, "grad_norm": 0.44100940227508545, "learning_rate": 0.0005347536525806001, "loss": 1.8563, "step": 19291 }, { "epoch": 0.64, "grad_norm": 0.43364232778549194, "learning_rate": 0.0005347471445816148, "loss": 1.759, "step": 19292 }, { "epoch": 0.64, "grad_norm": 0.43449699878692627, "learning_rate": 0.0005347406362976811, "loss": 1.8156, "step": 19293 }, { "epoch": 0.64, "grad_norm": 0.43284687399864197, "learning_rate": 0.0005347341277288068, "loss": 1.7956, "step": 19294 }, { "epoch": 0.64, "grad_norm": 0.42344406247138977, "learning_rate": 0.0005347276188749999, "loss": 1.7622, "step": 19295 }, { "epoch": 0.64, "grad_norm": 0.43244269490242004, "learning_rate": 0.0005347211097362683, "loss": 1.8163, "step": 19296 }, { "epoch": 0.64, "grad_norm": 0.4242142140865326, "learning_rate": 0.0005347146003126198, "loss": 1.8197, "step": 19297 }, { "epoch": 0.64, "grad_norm": 0.461273193359375, "learning_rate": 0.0005347080906040623, "loss": 1.804, "step": 19298 }, { "epoch": 0.64, "grad_norm": 0.4394066035747528, "learning_rate": 0.0005347015806106038, "loss": 1.7951, "step": 19299 }, { "epoch": 0.64, "grad_norm": 0.4098702073097229, "learning_rate": 0.0005346950703322522, "loss": 1.8347, "step": 19300 }, { "epoch": 0.64, "grad_norm": 0.4304954707622528, "learning_rate": 0.0005346885597690154, "loss": 1.8177, "step": 19301 }, { "epoch": 0.64, "grad_norm": 0.4462234675884247, "learning_rate": 0.0005346820489209012, "loss": 1.9007, "step": 19302 }, { "epoch": 0.64, "grad_norm": 0.4410138428211212, "learning_rate": 0.0005346755377879176, "loss": 1.7534, "step": 19303 }, { "epoch": 0.64, "grad_norm": 0.4234982430934906, "learning_rate": 0.0005346690263700725, "loss": 1.7876, "step": 19304 }, { "epoch": 0.64, "grad_norm": 0.4190671741962433, "learning_rate": 0.0005346625146673737, "loss": 1.8799, "step": 19305 }, { "epoch": 0.64, "grad_norm": 0.4064694941043854, "learning_rate": 0.0005346560026798292, "loss": 1.8129, "step": 19306 }, { "epoch": 0.64, "grad_norm": 0.43724820017814636, "learning_rate": 0.000534649490407447, "loss": 1.8992, "step": 19307 }, { "epoch": 0.64, "grad_norm": 0.42320653796195984, "learning_rate": 0.0005346429778502348, "loss": 1.8381, "step": 19308 }, { "epoch": 0.64, "grad_norm": 0.41728031635284424, "learning_rate": 0.0005346364650082006, "loss": 1.8633, "step": 19309 }, { "epoch": 0.64, "grad_norm": 0.4255422055721283, "learning_rate": 0.0005346299518813522, "loss": 1.7857, "step": 19310 }, { "epoch": 0.64, "grad_norm": 0.4344603717327118, "learning_rate": 0.0005346234384696977, "loss": 1.8899, "step": 19311 }, { "epoch": 0.64, "grad_norm": 0.43770474195480347, "learning_rate": 0.000534616924773245, "loss": 1.8532, "step": 19312 }, { "epoch": 0.64, "grad_norm": 0.45096689462661743, "learning_rate": 0.0005346104107920018, "loss": 1.7985, "step": 19313 }, { "epoch": 0.64, "grad_norm": 0.419725239276886, "learning_rate": 0.0005346038965259762, "loss": 1.7543, "step": 19314 }, { "epoch": 0.64, "grad_norm": 0.4418698847293854, "learning_rate": 0.0005345973819751761, "loss": 1.8529, "step": 19315 }, { "epoch": 0.64, "grad_norm": 0.44122788310050964, "learning_rate": 0.0005345908671396092, "loss": 1.8294, "step": 19316 }, { "epoch": 0.64, "grad_norm": 0.4249097406864166, "learning_rate": 0.0005345843520192837, "loss": 1.8776, "step": 19317 }, { "epoch": 0.64, "grad_norm": 0.41795697808265686, "learning_rate": 0.0005345778366142072, "loss": 1.8844, "step": 19318 }, { "epoch": 0.64, "grad_norm": 0.4322197437286377, "learning_rate": 0.0005345713209243879, "loss": 1.8252, "step": 19319 }, { "epoch": 0.64, "grad_norm": 0.42842811346054077, "learning_rate": 0.0005345648049498334, "loss": 1.935, "step": 19320 }, { "epoch": 0.64, "grad_norm": 0.4280261695384979, "learning_rate": 0.000534558288690552, "loss": 1.8336, "step": 19321 }, { "epoch": 0.64, "grad_norm": 0.4355701506137848, "learning_rate": 0.0005345517721465513, "loss": 1.8028, "step": 19322 }, { "epoch": 0.64, "grad_norm": 0.43797263503074646, "learning_rate": 0.0005345452553178393, "loss": 1.8472, "step": 19323 }, { "epoch": 0.64, "grad_norm": 0.44545239210128784, "learning_rate": 0.0005345387382044239, "loss": 1.8267, "step": 19324 }, { "epoch": 0.64, "grad_norm": 0.4204387366771698, "learning_rate": 0.0005345322208063131, "loss": 1.7538, "step": 19325 }, { "epoch": 0.64, "grad_norm": 0.4209275245666504, "learning_rate": 0.0005345257031235147, "loss": 1.8837, "step": 19326 }, { "epoch": 0.64, "grad_norm": 0.42023468017578125, "learning_rate": 0.0005345191851560368, "loss": 1.8648, "step": 19327 }, { "epoch": 0.64, "grad_norm": 0.42943012714385986, "learning_rate": 0.0005345126669038869, "loss": 1.8205, "step": 19328 }, { "epoch": 0.64, "grad_norm": 0.43574661016464233, "learning_rate": 0.0005345061483670734, "loss": 1.7881, "step": 19329 }, { "epoch": 0.64, "grad_norm": 0.4269911050796509, "learning_rate": 0.0005344996295456039, "loss": 1.8539, "step": 19330 }, { "epoch": 0.64, "grad_norm": 0.43114930391311646, "learning_rate": 0.0005344931104394864, "loss": 1.8735, "step": 19331 }, { "epoch": 0.64, "grad_norm": 0.45226162672042847, "learning_rate": 0.0005344865910487288, "loss": 1.9197, "step": 19332 }, { "epoch": 0.64, "grad_norm": 0.406329482793808, "learning_rate": 0.000534480071373339, "loss": 1.7848, "step": 19333 }, { "epoch": 0.64, "grad_norm": 0.4322396218776703, "learning_rate": 0.0005344735514133249, "loss": 1.8474, "step": 19334 }, { "epoch": 0.64, "grad_norm": 0.4402585029602051, "learning_rate": 0.0005344670311686945, "loss": 1.8721, "step": 19335 }, { "epoch": 0.64, "grad_norm": 0.44857802987098694, "learning_rate": 0.0005344605106394558, "loss": 1.8377, "step": 19336 }, { "epoch": 0.64, "grad_norm": 0.41797158122062683, "learning_rate": 0.0005344539898256165, "loss": 1.8054, "step": 19337 }, { "epoch": 0.64, "grad_norm": 0.44292306900024414, "learning_rate": 0.0005344474687271846, "loss": 1.8893, "step": 19338 }, { "epoch": 0.64, "grad_norm": 0.4123746156692505, "learning_rate": 0.000534440947344168, "loss": 1.8076, "step": 19339 }, { "epoch": 0.64, "grad_norm": 0.4365834891796112, "learning_rate": 0.0005344344256765747, "loss": 1.8429, "step": 19340 }, { "epoch": 0.64, "grad_norm": 0.43331918120384216, "learning_rate": 0.0005344279037244124, "loss": 1.8722, "step": 19341 }, { "epoch": 0.64, "grad_norm": 0.41174495220184326, "learning_rate": 0.0005344213814876893, "loss": 1.8008, "step": 19342 }, { "epoch": 0.64, "grad_norm": 0.4253195822238922, "learning_rate": 0.0005344148589664132, "loss": 1.8227, "step": 19343 }, { "epoch": 0.64, "grad_norm": 0.42112496495246887, "learning_rate": 0.000534408336160592, "loss": 1.8877, "step": 19344 }, { "epoch": 0.64, "grad_norm": 0.41557416319847107, "learning_rate": 0.0005344018130702335, "loss": 1.883, "step": 19345 }, { "epoch": 0.64, "grad_norm": 0.42593273520469666, "learning_rate": 0.0005343952896953458, "loss": 1.7734, "step": 19346 }, { "epoch": 0.64, "grad_norm": 0.41568687558174133, "learning_rate": 0.0005343887660359367, "loss": 1.8204, "step": 19347 }, { "epoch": 0.64, "grad_norm": 0.42554736137390137, "learning_rate": 0.0005343822420920143, "loss": 1.7822, "step": 19348 }, { "epoch": 0.64, "grad_norm": 0.41481316089630127, "learning_rate": 0.0005343757178635863, "loss": 1.819, "step": 19349 }, { "epoch": 0.64, "grad_norm": 0.4252856373786926, "learning_rate": 0.0005343691933506607, "loss": 1.8386, "step": 19350 }, { "epoch": 0.64, "grad_norm": 0.4221270680427551, "learning_rate": 0.0005343626685532455, "loss": 1.7485, "step": 19351 }, { "epoch": 0.64, "grad_norm": 0.42990773916244507, "learning_rate": 0.0005343561434713485, "loss": 1.7836, "step": 19352 }, { "epoch": 0.64, "grad_norm": 0.433682918548584, "learning_rate": 0.0005343496181049777, "loss": 1.8247, "step": 19353 }, { "epoch": 0.64, "grad_norm": 0.43391406536102295, "learning_rate": 0.000534343092454141, "loss": 1.8544, "step": 19354 }, { "epoch": 0.64, "grad_norm": 0.44107598066329956, "learning_rate": 0.0005343365665188462, "loss": 1.8782, "step": 19355 }, { "epoch": 0.64, "grad_norm": 0.4318625330924988, "learning_rate": 0.0005343300402991015, "loss": 1.8233, "step": 19356 }, { "epoch": 0.64, "grad_norm": 0.42052608728408813, "learning_rate": 0.0005343235137949147, "loss": 1.9153, "step": 19357 }, { "epoch": 0.64, "grad_norm": 0.4453863203525543, "learning_rate": 0.0005343169870062935, "loss": 1.8993, "step": 19358 }, { "epoch": 0.64, "grad_norm": 0.4256196618080139, "learning_rate": 0.0005343104599332461, "loss": 1.8843, "step": 19359 }, { "epoch": 0.64, "grad_norm": 0.428317666053772, "learning_rate": 0.0005343039325757802, "loss": 1.8309, "step": 19360 }, { "epoch": 0.64, "grad_norm": 0.4292358458042145, "learning_rate": 0.000534297404933904, "loss": 1.818, "step": 19361 }, { "epoch": 0.64, "grad_norm": 0.43956688046455383, "learning_rate": 0.0005342908770076252, "loss": 1.8301, "step": 19362 }, { "epoch": 0.64, "grad_norm": 0.44647976756095886, "learning_rate": 0.0005342843487969519, "loss": 1.7944, "step": 19363 }, { "epoch": 0.64, "grad_norm": 0.4307345151901245, "learning_rate": 0.0005342778203018918, "loss": 1.8321, "step": 19364 }, { "epoch": 0.64, "grad_norm": 0.4339122772216797, "learning_rate": 0.0005342712915224529, "loss": 1.8615, "step": 19365 }, { "epoch": 0.64, "grad_norm": 0.45929381251335144, "learning_rate": 0.0005342647624586433, "loss": 1.8881, "step": 19366 }, { "epoch": 0.64, "grad_norm": 0.4401441812515259, "learning_rate": 0.0005342582331104707, "loss": 1.8583, "step": 19367 }, { "epoch": 0.64, "grad_norm": 0.45207107067108154, "learning_rate": 0.0005342517034779432, "loss": 1.8865, "step": 19368 }, { "epoch": 0.64, "grad_norm": 0.43554648756980896, "learning_rate": 0.0005342451735610687, "loss": 1.7885, "step": 19369 }, { "epoch": 0.64, "grad_norm": 0.4417756199836731, "learning_rate": 0.000534238643359855, "loss": 1.8736, "step": 19370 }, { "epoch": 0.64, "grad_norm": 0.44071635603904724, "learning_rate": 0.0005342321128743101, "loss": 1.8175, "step": 19371 }, { "epoch": 0.64, "grad_norm": 0.4202786684036255, "learning_rate": 0.0005342255821044418, "loss": 1.8154, "step": 19372 }, { "epoch": 0.64, "grad_norm": 0.45216861367225647, "learning_rate": 0.0005342190510502584, "loss": 1.8465, "step": 19373 }, { "epoch": 0.64, "grad_norm": 0.444429486989975, "learning_rate": 0.0005342125197117675, "loss": 1.8537, "step": 19374 }, { "epoch": 0.64, "grad_norm": 0.42956018447875977, "learning_rate": 0.0005342059880889769, "loss": 1.8088, "step": 19375 }, { "epoch": 0.64, "grad_norm": 0.43455591797828674, "learning_rate": 0.000534199456181895, "loss": 1.8449, "step": 19376 }, { "epoch": 0.64, "grad_norm": 0.43064749240875244, "learning_rate": 0.0005341929239905293, "loss": 1.8352, "step": 19377 }, { "epoch": 0.64, "grad_norm": 0.4319576919078827, "learning_rate": 0.000534186391514888, "loss": 1.8317, "step": 19378 }, { "epoch": 0.64, "grad_norm": 0.43508148193359375, "learning_rate": 0.0005341798587549789, "loss": 1.7918, "step": 19379 }, { "epoch": 0.64, "grad_norm": 0.43254420161247253, "learning_rate": 0.00053417332571081, "loss": 1.8189, "step": 19380 }, { "epoch": 0.64, "grad_norm": 0.4672873616218567, "learning_rate": 0.0005341667923823891, "loss": 1.8626, "step": 19381 }, { "epoch": 0.64, "grad_norm": 0.5027326941490173, "learning_rate": 0.0005341602587697243, "loss": 1.8919, "step": 19382 }, { "epoch": 0.64, "grad_norm": 0.43638935685157776, "learning_rate": 0.0005341537248728234, "loss": 1.8012, "step": 19383 }, { "epoch": 0.64, "grad_norm": 0.43407124280929565, "learning_rate": 0.0005341471906916943, "loss": 1.8317, "step": 19384 }, { "epoch": 0.64, "grad_norm": 0.4313732981681824, "learning_rate": 0.0005341406562263452, "loss": 1.8552, "step": 19385 }, { "epoch": 0.64, "grad_norm": 0.41145384311676025, "learning_rate": 0.0005341341214767836, "loss": 1.8246, "step": 19386 }, { "epoch": 0.65, "grad_norm": 0.4365402162075043, "learning_rate": 0.000534127586443018, "loss": 1.7945, "step": 19387 }, { "epoch": 0.65, "grad_norm": 0.44202128052711487, "learning_rate": 0.0005341210511250557, "loss": 1.8738, "step": 19388 }, { "epoch": 0.65, "grad_norm": 0.4293016791343689, "learning_rate": 0.0005341145155229051, "loss": 1.8094, "step": 19389 }, { "epoch": 0.65, "grad_norm": 0.42799055576324463, "learning_rate": 0.0005341079796365739, "loss": 1.8549, "step": 19390 }, { "epoch": 0.65, "grad_norm": 0.4264897108078003, "learning_rate": 0.0005341014434660701, "loss": 1.9386, "step": 19391 }, { "epoch": 0.65, "grad_norm": 0.43991556763648987, "learning_rate": 0.0005340949070114018, "loss": 1.9451, "step": 19392 }, { "epoch": 0.65, "grad_norm": 0.43218761682510376, "learning_rate": 0.0005340883702725767, "loss": 1.8375, "step": 19393 }, { "epoch": 0.65, "grad_norm": 0.4261605143547058, "learning_rate": 0.0005340818332496027, "loss": 1.9141, "step": 19394 }, { "epoch": 0.65, "grad_norm": 0.42252394556999207, "learning_rate": 0.0005340752959424879, "loss": 1.8655, "step": 19395 }, { "epoch": 0.65, "grad_norm": 0.43045759201049805, "learning_rate": 0.0005340687583512402, "loss": 1.7698, "step": 19396 }, { "epoch": 0.65, "grad_norm": 0.43114274740219116, "learning_rate": 0.0005340622204758675, "loss": 1.7687, "step": 19397 }, { "epoch": 0.65, "grad_norm": 0.4175599217414856, "learning_rate": 0.0005340556823163777, "loss": 1.8623, "step": 19398 }, { "epoch": 0.65, "grad_norm": 0.41500043869018555, "learning_rate": 0.0005340491438727788, "loss": 1.8434, "step": 19399 }, { "epoch": 0.65, "grad_norm": 0.4360157549381256, "learning_rate": 0.0005340426051450789, "loss": 1.8683, "step": 19400 }, { "epoch": 0.65, "grad_norm": 0.4419127106666565, "learning_rate": 0.0005340360661332857, "loss": 1.8754, "step": 19401 }, { "epoch": 0.65, "grad_norm": 0.42072564363479614, "learning_rate": 0.000534029526837407, "loss": 1.8167, "step": 19402 }, { "epoch": 0.65, "grad_norm": 0.4272651970386505, "learning_rate": 0.0005340229872574511, "loss": 1.8283, "step": 19403 }, { "epoch": 0.65, "grad_norm": 0.4268512427806854, "learning_rate": 0.0005340164473934258, "loss": 1.8183, "step": 19404 }, { "epoch": 0.65, "grad_norm": 0.43464624881744385, "learning_rate": 0.000534009907245339, "loss": 1.846, "step": 19405 }, { "epoch": 0.65, "grad_norm": 0.42097970843315125, "learning_rate": 0.0005340033668131985, "loss": 1.8271, "step": 19406 }, { "epoch": 0.65, "grad_norm": 0.4153079688549042, "learning_rate": 0.0005339968260970125, "loss": 1.8195, "step": 19407 }, { "epoch": 0.65, "grad_norm": 0.4402174651622772, "learning_rate": 0.0005339902850967889, "loss": 1.7273, "step": 19408 }, { "epoch": 0.65, "grad_norm": 0.42653605341911316, "learning_rate": 0.0005339837438125355, "loss": 1.7843, "step": 19409 }, { "epoch": 0.65, "grad_norm": 0.43213793635368347, "learning_rate": 0.0005339772022442603, "loss": 1.8674, "step": 19410 }, { "epoch": 0.65, "grad_norm": 0.42638036608695984, "learning_rate": 0.0005339706603919712, "loss": 1.9266, "step": 19411 }, { "epoch": 0.65, "grad_norm": 0.45245686173439026, "learning_rate": 0.0005339641182556762, "loss": 1.8613, "step": 19412 }, { "epoch": 0.65, "grad_norm": 0.42727336287498474, "learning_rate": 0.0005339575758353834, "loss": 1.8268, "step": 19413 }, { "epoch": 0.65, "grad_norm": 0.4272540807723999, "learning_rate": 0.0005339510331311005, "loss": 1.8693, "step": 19414 }, { "epoch": 0.65, "grad_norm": 0.44821876287460327, "learning_rate": 0.0005339444901428355, "loss": 1.8686, "step": 19415 }, { "epoch": 0.65, "grad_norm": 0.45935025811195374, "learning_rate": 0.0005339379468705963, "loss": 1.8985, "step": 19416 }, { "epoch": 0.65, "grad_norm": 0.43262460827827454, "learning_rate": 0.000533931403314391, "loss": 1.8463, "step": 19417 }, { "epoch": 0.65, "grad_norm": 0.4449905455112457, "learning_rate": 0.0005339248594742273, "loss": 1.9509, "step": 19418 }, { "epoch": 0.65, "grad_norm": 0.4342837333679199, "learning_rate": 0.0005339183153501134, "loss": 1.8316, "step": 19419 }, { "epoch": 0.65, "grad_norm": 0.43333929777145386, "learning_rate": 0.0005339117709420571, "loss": 1.8247, "step": 19420 }, { "epoch": 0.65, "grad_norm": 0.4628714323043823, "learning_rate": 0.0005339052262500663, "loss": 1.7912, "step": 19421 }, { "epoch": 0.65, "grad_norm": 0.452360600233078, "learning_rate": 0.0005338986812741492, "loss": 1.9201, "step": 19422 }, { "epoch": 0.65, "grad_norm": 0.5616388916969299, "learning_rate": 0.0005338921360143134, "loss": 1.866, "step": 19423 }, { "epoch": 0.65, "grad_norm": 0.42345765233039856, "learning_rate": 0.0005338855904705672, "loss": 1.8256, "step": 19424 }, { "epoch": 0.65, "grad_norm": 0.4368697702884674, "learning_rate": 0.0005338790446429182, "loss": 1.8795, "step": 19425 }, { "epoch": 0.65, "grad_norm": 0.4421482980251312, "learning_rate": 0.0005338724985313745, "loss": 1.7809, "step": 19426 }, { "epoch": 0.65, "grad_norm": 0.4253458082675934, "learning_rate": 0.000533865952135944, "loss": 1.8133, "step": 19427 }, { "epoch": 0.65, "grad_norm": 0.4319053888320923, "learning_rate": 0.0005338594054566349, "loss": 1.8342, "step": 19428 }, { "epoch": 0.65, "grad_norm": 0.4229772388935089, "learning_rate": 0.0005338528584934547, "loss": 1.8372, "step": 19429 }, { "epoch": 0.65, "grad_norm": 0.4234473705291748, "learning_rate": 0.0005338463112464118, "loss": 1.7874, "step": 19430 }, { "epoch": 0.65, "grad_norm": 0.8382336497306824, "learning_rate": 0.0005338397637155138, "loss": 1.8155, "step": 19431 }, { "epoch": 0.65, "grad_norm": 0.42110466957092285, "learning_rate": 0.0005338332159007688, "loss": 1.8282, "step": 19432 }, { "epoch": 0.65, "grad_norm": 0.4181232750415802, "learning_rate": 0.0005338266678021847, "loss": 1.814, "step": 19433 }, { "epoch": 0.65, "grad_norm": 0.44033852219581604, "learning_rate": 0.0005338201194197696, "loss": 1.8622, "step": 19434 }, { "epoch": 0.65, "grad_norm": 0.46413129568099976, "learning_rate": 0.0005338135707535312, "loss": 1.8262, "step": 19435 }, { "epoch": 0.65, "grad_norm": 0.4343981146812439, "learning_rate": 0.0005338070218034776, "loss": 1.7863, "step": 19436 }, { "epoch": 0.65, "grad_norm": 0.43570515513420105, "learning_rate": 0.0005338004725696168, "loss": 1.8473, "step": 19437 }, { "epoch": 0.65, "grad_norm": 0.4388354420661926, "learning_rate": 0.0005337939230519566, "loss": 1.8412, "step": 19438 }, { "epoch": 0.65, "grad_norm": 0.42487919330596924, "learning_rate": 0.000533787373250505, "loss": 1.8322, "step": 19439 }, { "epoch": 0.65, "grad_norm": 0.4393578767776489, "learning_rate": 0.0005337808231652701, "loss": 1.854, "step": 19440 }, { "epoch": 0.65, "grad_norm": 0.4529252052307129, "learning_rate": 0.0005337742727962596, "loss": 1.7389, "step": 19441 }, { "epoch": 0.65, "grad_norm": 0.4379308521747589, "learning_rate": 0.0005337677221434816, "loss": 1.7258, "step": 19442 }, { "epoch": 0.65, "grad_norm": 0.44832220673561096, "learning_rate": 0.0005337611712069441, "loss": 1.8065, "step": 19443 }, { "epoch": 0.65, "grad_norm": 0.4481778144836426, "learning_rate": 0.000533754619986655, "loss": 1.8646, "step": 19444 }, { "epoch": 0.65, "grad_norm": 0.4303385615348816, "learning_rate": 0.000533748068482622, "loss": 1.8306, "step": 19445 }, { "epoch": 0.65, "grad_norm": 0.4221721589565277, "learning_rate": 0.0005337415166948536, "loss": 1.8203, "step": 19446 }, { "epoch": 0.65, "grad_norm": 0.43257415294647217, "learning_rate": 0.0005337349646233572, "loss": 1.784, "step": 19447 }, { "epoch": 0.65, "grad_norm": 0.4548753798007965, "learning_rate": 0.0005337284122681412, "loss": 1.8892, "step": 19448 }, { "epoch": 0.65, "grad_norm": 0.4475536644458771, "learning_rate": 0.0005337218596292132, "loss": 1.9036, "step": 19449 }, { "epoch": 0.65, "grad_norm": 0.4553985893726349, "learning_rate": 0.0005337153067065814, "loss": 1.8799, "step": 19450 }, { "epoch": 0.65, "grad_norm": 0.4415610432624817, "learning_rate": 0.0005337087535002535, "loss": 1.8341, "step": 19451 }, { "epoch": 0.65, "grad_norm": 0.4212995767593384, "learning_rate": 0.0005337022000102377, "loss": 1.8554, "step": 19452 }, { "epoch": 0.65, "grad_norm": 0.4427182078361511, "learning_rate": 0.0005336956462365419, "loss": 1.8103, "step": 19453 }, { "epoch": 0.65, "grad_norm": 0.43832847476005554, "learning_rate": 0.000533689092179174, "loss": 1.8622, "step": 19454 }, { "epoch": 0.65, "grad_norm": 0.46878331899642944, "learning_rate": 0.0005336825378381419, "loss": 1.8148, "step": 19455 }, { "epoch": 0.65, "grad_norm": 0.4355866014957428, "learning_rate": 0.0005336759832134537, "loss": 1.8377, "step": 19456 }, { "epoch": 0.65, "grad_norm": 0.4484444558620453, "learning_rate": 0.0005336694283051172, "loss": 1.95, "step": 19457 }, { "epoch": 0.65, "grad_norm": 0.4368322193622589, "learning_rate": 0.0005336628731131406, "loss": 1.7619, "step": 19458 }, { "epoch": 0.65, "grad_norm": 0.4597588777542114, "learning_rate": 0.0005336563176375317, "loss": 1.9228, "step": 19459 }, { "epoch": 0.65, "grad_norm": 0.45083457231521606, "learning_rate": 0.0005336497618782983, "loss": 1.8447, "step": 19460 }, { "epoch": 0.65, "grad_norm": 0.42677611112594604, "learning_rate": 0.0005336432058354486, "loss": 1.8918, "step": 19461 }, { "epoch": 0.65, "grad_norm": 0.4455950856208801, "learning_rate": 0.0005336366495089904, "loss": 1.8317, "step": 19462 }, { "epoch": 0.65, "grad_norm": 0.4315015375614166, "learning_rate": 0.0005336300928989318, "loss": 1.8404, "step": 19463 }, { "epoch": 0.65, "grad_norm": 0.42597657442092896, "learning_rate": 0.0005336235360052807, "loss": 1.8453, "step": 19464 }, { "epoch": 0.65, "grad_norm": 0.4371647834777832, "learning_rate": 0.0005336169788280451, "loss": 1.8893, "step": 19465 }, { "epoch": 0.65, "grad_norm": 0.4231361746788025, "learning_rate": 0.0005336104213672329, "loss": 1.8025, "step": 19466 }, { "epoch": 0.65, "grad_norm": 0.4281184673309326, "learning_rate": 0.000533603863622852, "loss": 1.8583, "step": 19467 }, { "epoch": 0.65, "grad_norm": 0.43899431824684143, "learning_rate": 0.0005335973055949104, "loss": 1.9434, "step": 19468 }, { "epoch": 0.65, "grad_norm": 0.43917933106422424, "learning_rate": 0.000533590747283416, "loss": 1.816, "step": 19469 }, { "epoch": 0.65, "grad_norm": 0.4315093457698822, "learning_rate": 0.0005335841886883771, "loss": 1.7926, "step": 19470 }, { "epoch": 0.65, "grad_norm": 0.42631542682647705, "learning_rate": 0.0005335776298098013, "loss": 1.7857, "step": 19471 }, { "epoch": 0.65, "grad_norm": 0.41134509444236755, "learning_rate": 0.0005335710706476966, "loss": 1.7271, "step": 19472 }, { "epoch": 0.65, "grad_norm": 0.42371058464050293, "learning_rate": 0.0005335645112020712, "loss": 1.9499, "step": 19473 }, { "epoch": 0.65, "grad_norm": 0.42254504561424255, "learning_rate": 0.0005335579514729328, "loss": 1.7782, "step": 19474 }, { "epoch": 0.65, "grad_norm": 0.6649870276451111, "learning_rate": 0.0005335513914602894, "loss": 1.8786, "step": 19475 }, { "epoch": 0.65, "grad_norm": 0.4296513795852661, "learning_rate": 0.0005335448311641491, "loss": 1.8237, "step": 19476 }, { "epoch": 0.65, "grad_norm": 0.4301939904689789, "learning_rate": 0.0005335382705845198, "loss": 1.8533, "step": 19477 }, { "epoch": 0.65, "grad_norm": 0.44220298528671265, "learning_rate": 0.0005335317097214094, "loss": 1.8388, "step": 19478 }, { "epoch": 0.65, "grad_norm": 0.4338691234588623, "learning_rate": 0.000533525148574826, "loss": 1.8809, "step": 19479 }, { "epoch": 0.65, "grad_norm": 0.4270743727684021, "learning_rate": 0.0005335185871447774, "loss": 1.8156, "step": 19480 }, { "epoch": 0.65, "grad_norm": 0.4343222677707672, "learning_rate": 0.0005335120254312717, "loss": 1.8708, "step": 19481 }, { "epoch": 0.65, "grad_norm": 0.44088271260261536, "learning_rate": 0.0005335054634343167, "loss": 1.7817, "step": 19482 }, { "epoch": 0.65, "grad_norm": 0.4274631440639496, "learning_rate": 0.0005334989011539205, "loss": 1.8351, "step": 19483 }, { "epoch": 0.65, "grad_norm": 0.4388681650161743, "learning_rate": 0.0005334923385900912, "loss": 1.7878, "step": 19484 }, { "epoch": 0.65, "grad_norm": 0.42890506982803345, "learning_rate": 0.0005334857757428364, "loss": 1.8774, "step": 19485 }, { "epoch": 0.65, "grad_norm": 0.428315132856369, "learning_rate": 0.0005334792126121643, "loss": 1.7769, "step": 19486 }, { "epoch": 0.65, "grad_norm": 0.42159849405288696, "learning_rate": 0.0005334726491980829, "loss": 1.845, "step": 19487 }, { "epoch": 0.65, "grad_norm": 0.46066051721572876, "learning_rate": 0.0005334660855006001, "loss": 1.7856, "step": 19488 }, { "epoch": 0.65, "grad_norm": 0.4257484972476959, "learning_rate": 0.0005334595215197239, "loss": 1.79, "step": 19489 }, { "epoch": 0.65, "grad_norm": 0.4142511785030365, "learning_rate": 0.0005334529572554622, "loss": 1.8298, "step": 19490 }, { "epoch": 0.65, "grad_norm": 0.42978203296661377, "learning_rate": 0.0005334463927078231, "loss": 1.7961, "step": 19491 }, { "epoch": 0.65, "grad_norm": 0.46165576577186584, "learning_rate": 0.0005334398278768142, "loss": 1.8551, "step": 19492 }, { "epoch": 0.65, "grad_norm": 0.43257492780685425, "learning_rate": 0.000533433262762444, "loss": 1.8427, "step": 19493 }, { "epoch": 0.65, "grad_norm": 0.4289698004722595, "learning_rate": 0.0005334266973647203, "loss": 1.8389, "step": 19494 }, { "epoch": 0.65, "grad_norm": 0.4204951822757721, "learning_rate": 0.0005334201316836507, "loss": 1.7725, "step": 19495 }, { "epoch": 0.65, "grad_norm": 0.4398709535598755, "learning_rate": 0.0005334135657192436, "loss": 1.8314, "step": 19496 }, { "epoch": 0.65, "grad_norm": 0.42329201102256775, "learning_rate": 0.0005334069994715068, "loss": 1.849, "step": 19497 }, { "epoch": 0.65, "grad_norm": 0.44513580203056335, "learning_rate": 0.0005334004329404482, "loss": 1.8974, "step": 19498 }, { "epoch": 0.65, "grad_norm": 0.41442587971687317, "learning_rate": 0.0005333938661260761, "loss": 1.8473, "step": 19499 }, { "epoch": 0.65, "grad_norm": 0.4391283690929413, "learning_rate": 0.000533387299028398, "loss": 1.8332, "step": 19500 }, { "epoch": 0.65, "grad_norm": 0.46521249413490295, "learning_rate": 0.0005333807316474223, "loss": 1.8543, "step": 19501 }, { "epoch": 0.65, "grad_norm": 0.44015756249427795, "learning_rate": 0.0005333741639831567, "loss": 1.8, "step": 19502 }, { "epoch": 0.65, "grad_norm": 0.43898189067840576, "learning_rate": 0.0005333675960356092, "loss": 1.9506, "step": 19503 }, { "epoch": 0.65, "grad_norm": 0.4585893154144287, "learning_rate": 0.0005333610278047879, "loss": 1.8265, "step": 19504 }, { "epoch": 0.65, "grad_norm": 0.4381260871887207, "learning_rate": 0.0005333544592907006, "loss": 1.8228, "step": 19505 }, { "epoch": 0.65, "grad_norm": 0.4314812123775482, "learning_rate": 0.0005333478904933555, "loss": 1.808, "step": 19506 }, { "epoch": 0.65, "grad_norm": 0.44784247875213623, "learning_rate": 0.0005333413214127603, "loss": 1.8695, "step": 19507 }, { "epoch": 0.65, "grad_norm": 0.4510577321052551, "learning_rate": 0.0005333347520489232, "loss": 1.8707, "step": 19508 }, { "epoch": 0.65, "grad_norm": 0.4461531341075897, "learning_rate": 0.000533328182401852, "loss": 1.911, "step": 19509 }, { "epoch": 0.65, "grad_norm": 0.42294830083847046, "learning_rate": 0.0005333216124715549, "loss": 1.7205, "step": 19510 }, { "epoch": 0.65, "grad_norm": 0.44546735286712646, "learning_rate": 0.0005333150422580396, "loss": 1.8112, "step": 19511 }, { "epoch": 0.65, "grad_norm": 0.4362192451953888, "learning_rate": 0.0005333084717613144, "loss": 1.8495, "step": 19512 }, { "epoch": 0.65, "grad_norm": 0.4308621287345886, "learning_rate": 0.000533301900981387, "loss": 1.7618, "step": 19513 }, { "epoch": 0.65, "grad_norm": 0.4583785831928253, "learning_rate": 0.0005332953299182655, "loss": 1.8227, "step": 19514 }, { "epoch": 0.65, "grad_norm": 0.43565014004707336, "learning_rate": 0.0005332887585719578, "loss": 1.8872, "step": 19515 }, { "epoch": 0.65, "grad_norm": 0.4312088191509247, "learning_rate": 0.0005332821869424719, "loss": 1.8246, "step": 19516 }, { "epoch": 0.65, "grad_norm": 0.4370109736919403, "learning_rate": 0.0005332756150298159, "loss": 1.8497, "step": 19517 }, { "epoch": 0.65, "grad_norm": 0.436163067817688, "learning_rate": 0.0005332690428339975, "loss": 1.8868, "step": 19518 }, { "epoch": 0.65, "grad_norm": 0.4399884045124054, "learning_rate": 0.000533262470355025, "loss": 1.8548, "step": 19519 }, { "epoch": 0.65, "grad_norm": 0.4190957248210907, "learning_rate": 0.0005332558975929061, "loss": 1.8777, "step": 19520 }, { "epoch": 0.65, "grad_norm": 0.4343631863594055, "learning_rate": 0.000533249324547649, "loss": 1.8192, "step": 19521 }, { "epoch": 0.65, "grad_norm": 0.45312991738319397, "learning_rate": 0.0005332427512192616, "loss": 1.856, "step": 19522 }, { "epoch": 0.65, "grad_norm": 0.44617587327957153, "learning_rate": 0.0005332361776077518, "loss": 1.9184, "step": 19523 }, { "epoch": 0.65, "grad_norm": 0.4430382251739502, "learning_rate": 0.0005332296037131277, "loss": 1.786, "step": 19524 }, { "epoch": 0.65, "grad_norm": 0.4370080828666687, "learning_rate": 0.0005332230295353972, "loss": 1.9597, "step": 19525 }, { "epoch": 0.65, "grad_norm": 0.4463804066181183, "learning_rate": 0.0005332164550745684, "loss": 1.8612, "step": 19526 }, { "epoch": 0.65, "grad_norm": 0.44233402609825134, "learning_rate": 0.0005332098803306491, "loss": 1.8649, "step": 19527 }, { "epoch": 0.65, "grad_norm": 0.4242982864379883, "learning_rate": 0.0005332033053036473, "loss": 1.8167, "step": 19528 }, { "epoch": 0.65, "grad_norm": 0.5135170221328735, "learning_rate": 0.0005331967299935711, "loss": 1.9115, "step": 19529 }, { "epoch": 0.65, "grad_norm": 0.4540247321128845, "learning_rate": 0.0005331901544004285, "loss": 1.8356, "step": 19530 }, { "epoch": 0.65, "grad_norm": 0.44050779938697815, "learning_rate": 0.0005331835785242273, "loss": 1.8512, "step": 19531 }, { "epoch": 0.65, "grad_norm": 0.4247351288795471, "learning_rate": 0.0005331770023649757, "loss": 1.7848, "step": 19532 }, { "epoch": 0.65, "grad_norm": 0.4385499656200409, "learning_rate": 0.0005331704259226816, "loss": 1.7948, "step": 19533 }, { "epoch": 0.65, "grad_norm": 0.44105419516563416, "learning_rate": 0.0005331638491973529, "loss": 1.7944, "step": 19534 }, { "epoch": 0.65, "grad_norm": 0.43727245926856995, "learning_rate": 0.0005331572721889976, "loss": 1.832, "step": 19535 }, { "epoch": 0.65, "grad_norm": 0.44425761699676514, "learning_rate": 0.0005331506948976237, "loss": 1.9132, "step": 19536 }, { "epoch": 0.65, "grad_norm": 0.45165693759918213, "learning_rate": 0.0005331441173232394, "loss": 1.7774, "step": 19537 }, { "epoch": 0.65, "grad_norm": 0.9316374063491821, "learning_rate": 0.0005331375394658522, "loss": 1.8335, "step": 19538 }, { "epoch": 0.65, "grad_norm": 0.4468662142753601, "learning_rate": 0.0005331309613254707, "loss": 1.8712, "step": 19539 }, { "epoch": 0.65, "grad_norm": 0.47612297534942627, "learning_rate": 0.0005331243829021023, "loss": 1.8765, "step": 19540 }, { "epoch": 0.65, "grad_norm": 0.4264213442802429, "learning_rate": 0.0005331178041957554, "loss": 1.8374, "step": 19541 }, { "epoch": 0.65, "grad_norm": 0.45294708013534546, "learning_rate": 0.0005331112252064379, "loss": 1.8419, "step": 19542 }, { "epoch": 0.65, "grad_norm": 0.4228575825691223, "learning_rate": 0.0005331046459341575, "loss": 1.8488, "step": 19543 }, { "epoch": 0.65, "grad_norm": 0.4283124506473541, "learning_rate": 0.0005330980663789225, "loss": 1.8254, "step": 19544 }, { "epoch": 0.65, "grad_norm": 0.4317983090877533, "learning_rate": 0.0005330914865407408, "loss": 1.7833, "step": 19545 }, { "epoch": 0.65, "grad_norm": 0.4390207529067993, "learning_rate": 0.0005330849064196204, "loss": 1.7709, "step": 19546 }, { "epoch": 0.65, "grad_norm": 0.4411962330341339, "learning_rate": 0.0005330783260155692, "loss": 1.8626, "step": 19547 }, { "epoch": 0.65, "grad_norm": 0.4279806613922119, "learning_rate": 0.0005330717453285954, "loss": 1.8251, "step": 19548 }, { "epoch": 0.65, "grad_norm": 0.42247435450553894, "learning_rate": 0.0005330651643587067, "loss": 1.8202, "step": 19549 }, { "epoch": 0.65, "grad_norm": 0.45975369215011597, "learning_rate": 0.0005330585831059113, "loss": 1.9272, "step": 19550 }, { "epoch": 0.65, "grad_norm": 0.4142126441001892, "learning_rate": 0.000533052001570217, "loss": 1.8858, "step": 19551 }, { "epoch": 0.65, "grad_norm": 0.42964208126068115, "learning_rate": 0.0005330454197516321, "loss": 1.8274, "step": 19552 }, { "epoch": 0.65, "grad_norm": 0.4241228997707367, "learning_rate": 0.0005330388376501643, "loss": 1.8668, "step": 19553 }, { "epoch": 0.65, "grad_norm": 0.4292185306549072, "learning_rate": 0.0005330322552658217, "loss": 1.8281, "step": 19554 }, { "epoch": 0.65, "grad_norm": 0.4389492869377136, "learning_rate": 0.0005330256725986123, "loss": 1.8347, "step": 19555 }, { "epoch": 0.65, "grad_norm": 0.42508894205093384, "learning_rate": 0.000533019089648544, "loss": 1.7915, "step": 19556 }, { "epoch": 0.65, "grad_norm": 0.4103180766105652, "learning_rate": 0.000533012506415625, "loss": 1.8405, "step": 19557 }, { "epoch": 0.65, "grad_norm": 0.4149577021598816, "learning_rate": 0.000533005922899863, "loss": 1.8034, "step": 19558 }, { "epoch": 0.65, "grad_norm": 0.42321521043777466, "learning_rate": 0.0005329993391012661, "loss": 1.8767, "step": 19559 }, { "epoch": 0.65, "grad_norm": 0.5258448719978333, "learning_rate": 0.0005329927550198425, "loss": 1.851, "step": 19560 }, { "epoch": 0.65, "grad_norm": 0.42350074648857117, "learning_rate": 0.0005329861706555999, "loss": 1.8806, "step": 19561 }, { "epoch": 0.65, "grad_norm": 0.4418090283870697, "learning_rate": 0.0005329795860085465, "loss": 1.8395, "step": 19562 }, { "epoch": 0.65, "grad_norm": 0.433206170797348, "learning_rate": 0.0005329730010786902, "loss": 1.8256, "step": 19563 }, { "epoch": 0.65, "grad_norm": 0.4175008237361908, "learning_rate": 0.000532966415866039, "loss": 1.8666, "step": 19564 }, { "epoch": 0.65, "grad_norm": 0.41248244047164917, "learning_rate": 0.0005329598303706009, "loss": 1.8159, "step": 19565 }, { "epoch": 0.65, "grad_norm": 0.42843034863471985, "learning_rate": 0.0005329532445923839, "loss": 1.8182, "step": 19566 }, { "epoch": 0.65, "grad_norm": 0.43576836585998535, "learning_rate": 0.0005329466585313958, "loss": 1.9074, "step": 19567 }, { "epoch": 0.65, "grad_norm": 0.42354297637939453, "learning_rate": 0.000532940072187645, "loss": 1.8922, "step": 19568 }, { "epoch": 0.65, "grad_norm": 0.4629043638706207, "learning_rate": 0.0005329334855611392, "loss": 1.8571, "step": 19569 }, { "epoch": 0.65, "grad_norm": 0.41857340931892395, "learning_rate": 0.0005329268986518865, "loss": 1.8293, "step": 19570 }, { "epoch": 0.65, "grad_norm": 0.4440211057662964, "learning_rate": 0.0005329203114598949, "loss": 1.8703, "step": 19571 }, { "epoch": 0.65, "grad_norm": 0.42473235726356506, "learning_rate": 0.0005329137239851724, "loss": 1.8039, "step": 19572 }, { "epoch": 0.65, "grad_norm": 0.4306718707084656, "learning_rate": 0.0005329071362277268, "loss": 1.8368, "step": 19573 }, { "epoch": 0.65, "grad_norm": 0.4130488634109497, "learning_rate": 0.0005329005481875662, "loss": 1.8055, "step": 19574 }, { "epoch": 0.65, "grad_norm": 0.4334789216518402, "learning_rate": 0.0005328939598646988, "loss": 1.8238, "step": 19575 }, { "epoch": 0.65, "grad_norm": 0.42656248807907104, "learning_rate": 0.0005328873712591324, "loss": 1.8545, "step": 19576 }, { "epoch": 0.65, "grad_norm": 0.42533206939697266, "learning_rate": 0.000532880782370875, "loss": 1.8646, "step": 19577 }, { "epoch": 0.65, "grad_norm": 0.4434411823749542, "learning_rate": 0.0005328741931999348, "loss": 1.9068, "step": 19578 }, { "epoch": 0.65, "grad_norm": 0.44149887561798096, "learning_rate": 0.0005328676037463196, "loss": 1.8315, "step": 19579 }, { "epoch": 0.65, "grad_norm": 0.427921324968338, "learning_rate": 0.0005328610140100373, "loss": 1.7948, "step": 19580 }, { "epoch": 0.65, "grad_norm": 0.4544288218021393, "learning_rate": 0.0005328544239910962, "loss": 1.8142, "step": 19581 }, { "epoch": 0.65, "grad_norm": 0.41813772916793823, "learning_rate": 0.0005328478336895041, "loss": 1.8224, "step": 19582 }, { "epoch": 0.65, "grad_norm": 0.4331945776939392, "learning_rate": 0.0005328412431052689, "loss": 1.8437, "step": 19583 }, { "epoch": 0.65, "grad_norm": 0.43296411633491516, "learning_rate": 0.0005328346522383989, "loss": 1.8277, "step": 19584 }, { "epoch": 0.65, "grad_norm": 0.4477292597293854, "learning_rate": 0.0005328280610889018, "loss": 1.7941, "step": 19585 }, { "epoch": 0.65, "grad_norm": 0.4443099796772003, "learning_rate": 0.0005328214696567858, "loss": 1.8695, "step": 19586 }, { "epoch": 0.65, "grad_norm": 0.44092845916748047, "learning_rate": 0.000532814877942059, "loss": 1.8256, "step": 19587 }, { "epoch": 0.65, "grad_norm": 0.4373556077480316, "learning_rate": 0.000532808285944729, "loss": 1.8426, "step": 19588 }, { "epoch": 0.65, "grad_norm": 0.47729814052581787, "learning_rate": 0.0005328016936648042, "loss": 1.8446, "step": 19589 }, { "epoch": 0.65, "grad_norm": 0.43358245491981506, "learning_rate": 0.0005327951011022924, "loss": 1.8205, "step": 19590 }, { "epoch": 0.65, "grad_norm": 0.417424738407135, "learning_rate": 0.0005327885082572016, "loss": 1.7764, "step": 19591 }, { "epoch": 0.65, "grad_norm": 0.4216088056564331, "learning_rate": 0.0005327819151295399, "loss": 1.8526, "step": 19592 }, { "epoch": 0.65, "grad_norm": 0.44988176226615906, "learning_rate": 0.0005327753217193152, "loss": 1.9108, "step": 19593 }, { "epoch": 0.65, "grad_norm": 0.44599202275276184, "learning_rate": 0.0005327687280265356, "loss": 1.8917, "step": 19594 }, { "epoch": 0.65, "grad_norm": 0.4270685017108917, "learning_rate": 0.0005327621340512091, "loss": 1.8181, "step": 19595 }, { "epoch": 0.65, "grad_norm": 0.4182131886482239, "learning_rate": 0.0005327555397933436, "loss": 1.8101, "step": 19596 }, { "epoch": 0.65, "grad_norm": 0.4279341995716095, "learning_rate": 0.0005327489452529472, "loss": 1.8344, "step": 19597 }, { "epoch": 0.65, "grad_norm": 0.425597608089447, "learning_rate": 0.0005327423504300279, "loss": 1.8869, "step": 19598 }, { "epoch": 0.65, "grad_norm": 0.42465120553970337, "learning_rate": 0.0005327357553245937, "loss": 1.8225, "step": 19599 }, { "epoch": 0.65, "grad_norm": 0.4789349436759949, "learning_rate": 0.0005327291599366525, "loss": 1.8102, "step": 19600 }, { "epoch": 0.65, "grad_norm": 0.4305516183376312, "learning_rate": 0.0005327225642662124, "loss": 1.9033, "step": 19601 }, { "epoch": 0.65, "grad_norm": 0.4367537498474121, "learning_rate": 0.0005327159683132815, "loss": 1.9089, "step": 19602 }, { "epoch": 0.65, "grad_norm": 0.4283991754055023, "learning_rate": 0.0005327093720778676, "loss": 1.8457, "step": 19603 }, { "epoch": 0.65, "grad_norm": 0.42350655794143677, "learning_rate": 0.0005327027755599788, "loss": 1.8029, "step": 19604 }, { "epoch": 0.65, "grad_norm": 0.42599278688430786, "learning_rate": 0.0005326961787596231, "loss": 1.806, "step": 19605 }, { "epoch": 0.65, "grad_norm": 0.41277769207954407, "learning_rate": 0.0005326895816768086, "loss": 1.8434, "step": 19606 }, { "epoch": 0.65, "grad_norm": 0.42875784635543823, "learning_rate": 0.0005326829843115432, "loss": 1.8273, "step": 19607 }, { "epoch": 0.65, "grad_norm": 0.44355008006095886, "learning_rate": 0.000532676386663835, "loss": 1.8411, "step": 19608 }, { "epoch": 0.65, "grad_norm": 0.45021599531173706, "learning_rate": 0.0005326697887336919, "loss": 1.8986, "step": 19609 }, { "epoch": 0.65, "grad_norm": 0.424139142036438, "learning_rate": 0.000532663190521122, "loss": 1.8292, "step": 19610 }, { "epoch": 0.65, "grad_norm": 0.45287081599235535, "learning_rate": 0.0005326565920261333, "loss": 1.8594, "step": 19611 }, { "epoch": 0.65, "grad_norm": 0.4583272933959961, "learning_rate": 0.0005326499932487336, "loss": 1.9492, "step": 19612 }, { "epoch": 0.65, "grad_norm": 0.42710793018341064, "learning_rate": 0.0005326433941889312, "loss": 1.8189, "step": 19613 }, { "epoch": 0.65, "grad_norm": 0.4323318600654602, "learning_rate": 0.0005326367948467341, "loss": 1.8876, "step": 19614 }, { "epoch": 0.65, "grad_norm": 0.44751182198524475, "learning_rate": 0.00053263019522215, "loss": 1.9437, "step": 19615 }, { "epoch": 0.65, "grad_norm": 0.4512227475643158, "learning_rate": 0.0005326235953151872, "loss": 1.8204, "step": 19616 }, { "epoch": 0.65, "grad_norm": 0.42389219999313354, "learning_rate": 0.0005326169951258538, "loss": 1.8977, "step": 19617 }, { "epoch": 0.65, "grad_norm": 0.4377213418483734, "learning_rate": 0.0005326103946541574, "loss": 1.9187, "step": 19618 }, { "epoch": 0.65, "grad_norm": 0.44164326786994934, "learning_rate": 0.0005326037939001063, "loss": 1.8636, "step": 19619 }, { "epoch": 0.65, "grad_norm": 0.4340358376502991, "learning_rate": 0.0005325971928637086, "loss": 1.8595, "step": 19620 }, { "epoch": 0.65, "grad_norm": 0.4171273112297058, "learning_rate": 0.0005325905915449721, "loss": 1.8082, "step": 19621 }, { "epoch": 0.65, "grad_norm": 0.4137687683105469, "learning_rate": 0.0005325839899439048, "loss": 1.7698, "step": 19622 }, { "epoch": 0.65, "grad_norm": 0.44294676184654236, "learning_rate": 0.0005325773880605149, "loss": 1.8023, "step": 19623 }, { "epoch": 0.65, "grad_norm": 0.4144689440727234, "learning_rate": 0.0005325707858948104, "loss": 1.7692, "step": 19624 }, { "epoch": 0.65, "grad_norm": 0.42377978563308716, "learning_rate": 0.0005325641834467991, "loss": 1.8525, "step": 19625 }, { "epoch": 0.65, "grad_norm": 0.4293731451034546, "learning_rate": 0.0005325575807164892, "loss": 1.8357, "step": 19626 }, { "epoch": 0.65, "grad_norm": 0.4425990581512451, "learning_rate": 0.0005325509777038887, "loss": 1.8579, "step": 19627 }, { "epoch": 0.65, "grad_norm": 0.42708176374435425, "learning_rate": 0.0005325443744090055, "loss": 1.8495, "step": 19628 }, { "epoch": 0.65, "grad_norm": 0.43503981828689575, "learning_rate": 0.0005325377708318477, "loss": 1.9131, "step": 19629 }, { "epoch": 0.65, "grad_norm": 0.4247001111507416, "learning_rate": 0.0005325311669724233, "loss": 1.8436, "step": 19630 }, { "epoch": 0.65, "grad_norm": 0.4372502565383911, "learning_rate": 0.0005325245628307404, "loss": 1.8582, "step": 19631 }, { "epoch": 0.65, "grad_norm": 0.40852344036102295, "learning_rate": 0.000532517958406807, "loss": 1.7848, "step": 19632 }, { "epoch": 0.65, "grad_norm": 0.44875437021255493, "learning_rate": 0.0005325113537006309, "loss": 1.7745, "step": 19633 }, { "epoch": 0.65, "grad_norm": 0.4312431216239929, "learning_rate": 0.0005325047487122204, "loss": 1.8531, "step": 19634 }, { "epoch": 0.65, "grad_norm": 0.4249407947063446, "learning_rate": 0.0005324981434415833, "loss": 1.8332, "step": 19635 }, { "epoch": 0.65, "grad_norm": 0.4391557276248932, "learning_rate": 0.0005324915378887277, "loss": 1.8519, "step": 19636 }, { "epoch": 0.65, "grad_norm": 0.4201493263244629, "learning_rate": 0.0005324849320536616, "loss": 1.8545, "step": 19637 }, { "epoch": 0.65, "grad_norm": 0.42054831981658936, "learning_rate": 0.0005324783259363933, "loss": 1.8274, "step": 19638 }, { "epoch": 0.65, "grad_norm": 0.439046174287796, "learning_rate": 0.0005324717195369303, "loss": 1.8587, "step": 19639 }, { "epoch": 0.65, "grad_norm": 0.4417918920516968, "learning_rate": 0.000532465112855281, "loss": 1.9331, "step": 19640 }, { "epoch": 0.65, "grad_norm": 0.4280169904232025, "learning_rate": 0.0005324585058914533, "loss": 1.8339, "step": 19641 }, { "epoch": 0.65, "grad_norm": 0.4167465567588806, "learning_rate": 0.0005324518986454552, "loss": 1.9137, "step": 19642 }, { "epoch": 0.65, "grad_norm": 0.4393085837364197, "learning_rate": 0.0005324452911172948, "loss": 1.8724, "step": 19643 }, { "epoch": 0.65, "grad_norm": 0.43380051851272583, "learning_rate": 0.00053243868330698, "loss": 1.8516, "step": 19644 }, { "epoch": 0.65, "grad_norm": 0.42056670784950256, "learning_rate": 0.0005324320752145189, "loss": 1.8512, "step": 19645 }, { "epoch": 0.65, "grad_norm": 0.4314101040363312, "learning_rate": 0.0005324254668399195, "loss": 1.876, "step": 19646 }, { "epoch": 0.65, "grad_norm": 0.4571521282196045, "learning_rate": 0.0005324188581831898, "loss": 1.8896, "step": 19647 }, { "epoch": 0.65, "grad_norm": 0.43212535977363586, "learning_rate": 0.0005324122492443379, "loss": 1.8567, "step": 19648 }, { "epoch": 0.65, "grad_norm": 0.44179055094718933, "learning_rate": 0.0005324056400233718, "loss": 1.7912, "step": 19649 }, { "epoch": 0.65, "grad_norm": 0.4222862124443054, "learning_rate": 0.0005323990305202994, "loss": 1.8659, "step": 19650 }, { "epoch": 0.65, "grad_norm": 0.4350671172142029, "learning_rate": 0.0005323924207351289, "loss": 1.7453, "step": 19651 }, { "epoch": 0.65, "grad_norm": 0.437487930059433, "learning_rate": 0.0005323858106678682, "loss": 1.8122, "step": 19652 }, { "epoch": 0.65, "grad_norm": 0.43491891026496887, "learning_rate": 0.0005323792003185255, "loss": 1.8384, "step": 19653 }, { "epoch": 0.65, "grad_norm": 0.437113493680954, "learning_rate": 0.0005323725896871085, "loss": 1.8364, "step": 19654 }, { "epoch": 0.65, "grad_norm": 0.45726966857910156, "learning_rate": 0.0005323659787736254, "loss": 1.88, "step": 19655 }, { "epoch": 0.65, "grad_norm": 0.44236499071121216, "learning_rate": 0.0005323593675780843, "loss": 1.8288, "step": 19656 }, { "epoch": 0.65, "grad_norm": 0.4253287613391876, "learning_rate": 0.0005323527561004933, "loss": 1.8576, "step": 19657 }, { "epoch": 0.65, "grad_norm": 0.4379783868789673, "learning_rate": 0.0005323461443408602, "loss": 1.8388, "step": 19658 }, { "epoch": 0.65, "grad_norm": 0.4305705726146698, "learning_rate": 0.0005323395322991931, "loss": 1.8334, "step": 19659 }, { "epoch": 0.65, "grad_norm": 0.4226277768611908, "learning_rate": 0.0005323329199755, "loss": 1.7692, "step": 19660 }, { "epoch": 0.65, "grad_norm": 0.4189615547657013, "learning_rate": 0.000532326307369789, "loss": 1.8139, "step": 19661 }, { "epoch": 0.65, "grad_norm": 0.4330776631832123, "learning_rate": 0.0005323196944820682, "loss": 1.8223, "step": 19662 }, { "epoch": 0.65, "grad_norm": 0.43508902192115784, "learning_rate": 0.0005323130813123454, "loss": 1.8954, "step": 19663 }, { "epoch": 0.65, "grad_norm": 0.5377509593963623, "learning_rate": 0.0005323064678606288, "loss": 1.8258, "step": 19664 }, { "epoch": 0.65, "grad_norm": 0.4215301275253296, "learning_rate": 0.0005322998541269264, "loss": 1.7694, "step": 19665 }, { "epoch": 0.65, "grad_norm": 0.4456365704536438, "learning_rate": 0.0005322932401112461, "loss": 1.8278, "step": 19666 }, { "epoch": 0.65, "grad_norm": 0.42694810032844543, "learning_rate": 0.000532286625813596, "loss": 1.8993, "step": 19667 }, { "epoch": 0.65, "grad_norm": 0.4214436113834381, "learning_rate": 0.0005322800112339843, "loss": 1.858, "step": 19668 }, { "epoch": 0.65, "grad_norm": 0.45692723989486694, "learning_rate": 0.0005322733963724188, "loss": 1.8457, "step": 19669 }, { "epoch": 0.65, "grad_norm": 0.4204116761684418, "learning_rate": 0.0005322667812289077, "loss": 1.8891, "step": 19670 }, { "epoch": 0.65, "grad_norm": 0.4357639253139496, "learning_rate": 0.000532260165803459, "loss": 1.7434, "step": 19671 }, { "epoch": 0.65, "grad_norm": 0.4358767867088318, "learning_rate": 0.0005322535500960805, "loss": 1.8314, "step": 19672 }, { "epoch": 0.65, "grad_norm": 0.4276045262813568, "learning_rate": 0.0005322469341067805, "loss": 1.8386, "step": 19673 }, { "epoch": 0.65, "grad_norm": 0.4270603358745575, "learning_rate": 0.0005322403178355669, "loss": 1.8315, "step": 19674 }, { "epoch": 0.65, "grad_norm": 0.4213237166404724, "learning_rate": 0.0005322337012824479, "loss": 1.7601, "step": 19675 }, { "epoch": 0.65, "grad_norm": 0.42469364404678345, "learning_rate": 0.0005322270844474313, "loss": 1.8996, "step": 19676 }, { "epoch": 0.65, "grad_norm": 0.42630958557128906, "learning_rate": 0.0005322204673305253, "loss": 1.7723, "step": 19677 }, { "epoch": 0.65, "grad_norm": 0.4263550639152527, "learning_rate": 0.0005322138499317379, "loss": 1.8155, "step": 19678 }, { "epoch": 0.65, "grad_norm": 0.4430106282234192, "learning_rate": 0.0005322072322510769, "loss": 1.8721, "step": 19679 }, { "epoch": 0.65, "grad_norm": 0.4354186952114105, "learning_rate": 0.0005322006142885506, "loss": 1.8137, "step": 19680 }, { "epoch": 0.65, "grad_norm": 0.4201551079750061, "learning_rate": 0.0005321939960441672, "loss": 1.8121, "step": 19681 }, { "epoch": 0.65, "grad_norm": 0.4227009117603302, "learning_rate": 0.0005321873775179343, "loss": 1.8386, "step": 19682 }, { "epoch": 0.65, "grad_norm": 0.4187747538089752, "learning_rate": 0.0005321807587098602, "loss": 1.8199, "step": 19683 }, { "epoch": 0.65, "grad_norm": 0.4429970681667328, "learning_rate": 0.0005321741396199528, "loss": 1.8382, "step": 19684 }, { "epoch": 0.65, "grad_norm": 0.4442330598831177, "learning_rate": 0.0005321675202482202, "loss": 1.8917, "step": 19685 }, { "epoch": 0.65, "grad_norm": 0.4226309061050415, "learning_rate": 0.0005321609005946705, "loss": 1.7714, "step": 19686 }, { "epoch": 0.65, "grad_norm": 0.43465563654899597, "learning_rate": 0.0005321542806593117, "loss": 1.8894, "step": 19687 }, { "epoch": 0.66, "grad_norm": 0.9426162838935852, "learning_rate": 0.0005321476604421518, "loss": 1.831, "step": 19688 }, { "epoch": 0.66, "grad_norm": 0.43643996119499207, "learning_rate": 0.0005321410399431987, "loss": 1.8636, "step": 19689 }, { "epoch": 0.66, "grad_norm": 0.4405168294906616, "learning_rate": 0.0005321344191624608, "loss": 1.8724, "step": 19690 }, { "epoch": 0.66, "grad_norm": 0.43412813544273376, "learning_rate": 0.0005321277980999457, "loss": 1.8019, "step": 19691 }, { "epoch": 0.66, "grad_norm": 0.44772714376449585, "learning_rate": 0.0005321211767556618, "loss": 1.904, "step": 19692 }, { "epoch": 0.66, "grad_norm": 0.42029914259910583, "learning_rate": 0.000532114555129617, "loss": 1.871, "step": 19693 }, { "epoch": 0.66, "grad_norm": 0.44051289558410645, "learning_rate": 0.0005321079332218194, "loss": 1.8178, "step": 19694 }, { "epoch": 0.66, "grad_norm": 0.4245150089263916, "learning_rate": 0.0005321013110322768, "loss": 1.8622, "step": 19695 }, { "epoch": 0.66, "grad_norm": 0.44101324677467346, "learning_rate": 0.0005320946885609974, "loss": 1.8359, "step": 19696 }, { "epoch": 0.66, "grad_norm": 0.4291737675666809, "learning_rate": 0.0005320880658079894, "loss": 1.8377, "step": 19697 }, { "epoch": 0.66, "grad_norm": 0.4242168962955475, "learning_rate": 0.0005320814427732606, "loss": 1.852, "step": 19698 }, { "epoch": 0.66, "grad_norm": 0.4291514754295349, "learning_rate": 0.000532074819456819, "loss": 1.8508, "step": 19699 }, { "epoch": 0.66, "grad_norm": 0.4314379096031189, "learning_rate": 0.0005320681958586729, "loss": 1.8845, "step": 19700 }, { "epoch": 0.66, "grad_norm": 0.433429479598999, "learning_rate": 0.0005320615719788302, "loss": 1.8275, "step": 19701 }, { "epoch": 0.66, "grad_norm": 0.5649835467338562, "learning_rate": 0.0005320549478172989, "loss": 1.8455, "step": 19702 }, { "epoch": 0.66, "grad_norm": 0.442220002412796, "learning_rate": 0.0005320483233740872, "loss": 1.8469, "step": 19703 }, { "epoch": 0.66, "grad_norm": 0.4562045931816101, "learning_rate": 0.0005320416986492029, "loss": 1.95, "step": 19704 }, { "epoch": 0.66, "grad_norm": 0.43140748143196106, "learning_rate": 0.0005320350736426542, "loss": 1.7685, "step": 19705 }, { "epoch": 0.66, "grad_norm": 0.4367330074310303, "learning_rate": 0.0005320284483544491, "loss": 1.7913, "step": 19706 }, { "epoch": 0.66, "grad_norm": 0.43418484926223755, "learning_rate": 0.0005320218227845957, "loss": 1.8865, "step": 19707 }, { "epoch": 0.66, "grad_norm": 0.4373633563518524, "learning_rate": 0.0005320151969331019, "loss": 1.828, "step": 19708 }, { "epoch": 0.66, "grad_norm": 0.43025144934654236, "learning_rate": 0.000532008570799976, "loss": 1.8127, "step": 19709 }, { "epoch": 0.66, "grad_norm": 0.4312182366847992, "learning_rate": 0.0005320019443852258, "loss": 1.8612, "step": 19710 }, { "epoch": 0.66, "grad_norm": 0.4353571832180023, "learning_rate": 0.0005319953176888593, "loss": 1.8459, "step": 19711 }, { "epoch": 0.66, "grad_norm": 0.44416606426239014, "learning_rate": 0.0005319886907108848, "loss": 1.8552, "step": 19712 }, { "epoch": 0.66, "grad_norm": 0.43616628646850586, "learning_rate": 0.0005319820634513102, "loss": 1.8585, "step": 19713 }, { "epoch": 0.66, "grad_norm": 0.4298590421676636, "learning_rate": 0.0005319754359101436, "loss": 1.8354, "step": 19714 }, { "epoch": 0.66, "grad_norm": 0.4313850402832031, "learning_rate": 0.0005319688080873929, "loss": 1.8944, "step": 19715 }, { "epoch": 0.66, "grad_norm": 0.4251362979412079, "learning_rate": 0.0005319621799830663, "loss": 1.7812, "step": 19716 }, { "epoch": 0.66, "grad_norm": 0.45413604378700256, "learning_rate": 0.0005319555515971718, "loss": 1.8023, "step": 19717 }, { "epoch": 0.66, "grad_norm": 0.4151758849620819, "learning_rate": 0.0005319489229297173, "loss": 1.8442, "step": 19718 }, { "epoch": 0.66, "grad_norm": 0.4279152452945709, "learning_rate": 0.0005319422939807112, "loss": 1.901, "step": 19719 }, { "epoch": 0.66, "grad_norm": 0.4617117941379547, "learning_rate": 0.0005319356647501613, "loss": 1.8899, "step": 19720 }, { "epoch": 0.66, "grad_norm": 0.4417676627635956, "learning_rate": 0.0005319290352380756, "loss": 1.7842, "step": 19721 }, { "epoch": 0.66, "grad_norm": 0.4407649636268616, "learning_rate": 0.0005319224054444622, "loss": 1.7783, "step": 19722 }, { "epoch": 0.66, "grad_norm": 0.41988804936408997, "learning_rate": 0.0005319157753693292, "loss": 1.8053, "step": 19723 }, { "epoch": 0.66, "grad_norm": 0.45606136322021484, "learning_rate": 0.0005319091450126846, "loss": 1.8197, "step": 19724 }, { "epoch": 0.66, "grad_norm": 0.4346429407596588, "learning_rate": 0.0005319025143745365, "loss": 1.8247, "step": 19725 }, { "epoch": 0.66, "grad_norm": 0.4220471680164337, "learning_rate": 0.0005318958834548929, "loss": 1.7991, "step": 19726 }, { "epoch": 0.66, "grad_norm": 0.41870275139808655, "learning_rate": 0.0005318892522537618, "loss": 1.8202, "step": 19727 }, { "epoch": 0.66, "grad_norm": 0.4482274651527405, "learning_rate": 0.0005318826207711514, "loss": 1.8965, "step": 19728 }, { "epoch": 0.66, "grad_norm": 0.4370990991592407, "learning_rate": 0.0005318759890070697, "loss": 1.9278, "step": 19729 }, { "epoch": 0.66, "grad_norm": 0.44796255230903625, "learning_rate": 0.0005318693569615247, "loss": 1.7546, "step": 19730 }, { "epoch": 0.66, "grad_norm": 0.4486229419708252, "learning_rate": 0.0005318627246345244, "loss": 1.8477, "step": 19731 }, { "epoch": 0.66, "grad_norm": 0.4369545578956604, "learning_rate": 0.0005318560920260769, "loss": 1.9399, "step": 19732 }, { "epoch": 0.66, "grad_norm": 0.42860618233680725, "learning_rate": 0.0005318494591361904, "loss": 1.8591, "step": 19733 }, { "epoch": 0.66, "grad_norm": 0.4320754110813141, "learning_rate": 0.0005318428259648727, "loss": 1.8347, "step": 19734 }, { "epoch": 0.66, "grad_norm": 0.4358687400817871, "learning_rate": 0.000531836192512132, "loss": 1.8248, "step": 19735 }, { "epoch": 0.66, "grad_norm": 0.4535898268222809, "learning_rate": 0.0005318295587779763, "loss": 1.8224, "step": 19736 }, { "epoch": 0.66, "grad_norm": 0.43211430311203003, "learning_rate": 0.0005318229247624137, "loss": 1.8851, "step": 19737 }, { "epoch": 0.66, "grad_norm": 0.43086111545562744, "learning_rate": 0.0005318162904654523, "loss": 1.8918, "step": 19738 }, { "epoch": 0.66, "grad_norm": 0.44794797897338867, "learning_rate": 0.0005318096558871, "loss": 1.8554, "step": 19739 }, { "epoch": 0.66, "grad_norm": 0.43134716153144836, "learning_rate": 0.0005318030210273649, "loss": 1.9118, "step": 19740 }, { "epoch": 0.66, "grad_norm": 0.4466423988342285, "learning_rate": 0.0005317963858862551, "loss": 1.8566, "step": 19741 }, { "epoch": 0.66, "grad_norm": 0.42322811484336853, "learning_rate": 0.0005317897504637786, "loss": 1.7669, "step": 19742 }, { "epoch": 0.66, "grad_norm": 0.4383533000946045, "learning_rate": 0.0005317831147599437, "loss": 1.85, "step": 19743 }, { "epoch": 0.66, "grad_norm": 0.42663833498954773, "learning_rate": 0.0005317764787747581, "loss": 1.9354, "step": 19744 }, { "epoch": 0.66, "grad_norm": 0.4303894639015198, "learning_rate": 0.00053176984250823, "loss": 1.8479, "step": 19745 }, { "epoch": 0.66, "grad_norm": 0.4356992542743683, "learning_rate": 0.0005317632059603675, "loss": 1.8397, "step": 19746 }, { "epoch": 0.66, "grad_norm": 0.43619197607040405, "learning_rate": 0.0005317565691311786, "loss": 1.8721, "step": 19747 }, { "epoch": 0.66, "grad_norm": 0.448464572429657, "learning_rate": 0.0005317499320206713, "loss": 1.7873, "step": 19748 }, { "epoch": 0.66, "grad_norm": 0.41833797097206116, "learning_rate": 0.0005317432946288539, "loss": 1.7892, "step": 19749 }, { "epoch": 0.66, "grad_norm": 0.46350643038749695, "learning_rate": 0.0005317366569557342, "loss": 1.7476, "step": 19750 }, { "epoch": 0.66, "grad_norm": 0.43767040967941284, "learning_rate": 0.0005317300190013203, "loss": 1.9152, "step": 19751 }, { "epoch": 0.66, "grad_norm": 0.4359123706817627, "learning_rate": 0.0005317233807656203, "loss": 1.8966, "step": 19752 }, { "epoch": 0.66, "grad_norm": 0.43897342681884766, "learning_rate": 0.0005317167422486422, "loss": 1.8276, "step": 19753 }, { "epoch": 0.66, "grad_norm": 0.45030656456947327, "learning_rate": 0.0005317101034503943, "loss": 1.9031, "step": 19754 }, { "epoch": 0.66, "grad_norm": 0.4699050784111023, "learning_rate": 0.0005317034643708843, "loss": 1.8472, "step": 19755 }, { "epoch": 0.66, "grad_norm": 0.4236946403980255, "learning_rate": 0.0005316968250101206, "loss": 1.7447, "step": 19756 }, { "epoch": 0.66, "grad_norm": 0.44567155838012695, "learning_rate": 0.000531690185368111, "loss": 1.7802, "step": 19757 }, { "epoch": 0.66, "grad_norm": 0.5757372975349426, "learning_rate": 0.0005316835454448637, "loss": 1.8285, "step": 19758 }, { "epoch": 0.66, "grad_norm": 0.4284535348415375, "learning_rate": 0.0005316769052403867, "loss": 1.8843, "step": 19759 }, { "epoch": 0.66, "grad_norm": 0.41932711005210876, "learning_rate": 0.0005316702647546879, "loss": 1.9202, "step": 19760 }, { "epoch": 0.66, "grad_norm": 0.4655548334121704, "learning_rate": 0.0005316636239877758, "loss": 1.9358, "step": 19761 }, { "epoch": 0.66, "grad_norm": 0.4219432771205902, "learning_rate": 0.000531656982939658, "loss": 1.833, "step": 19762 }, { "epoch": 0.66, "grad_norm": 0.42911016941070557, "learning_rate": 0.0005316503416103428, "loss": 1.8886, "step": 19763 }, { "epoch": 0.66, "grad_norm": 0.42804232239723206, "learning_rate": 0.0005316436999998383, "loss": 1.8327, "step": 19764 }, { "epoch": 0.66, "grad_norm": 0.4373232424259186, "learning_rate": 0.0005316370581081525, "loss": 1.8764, "step": 19765 }, { "epoch": 0.66, "grad_norm": 0.4216512441635132, "learning_rate": 0.0005316304159352932, "loss": 1.8224, "step": 19766 }, { "epoch": 0.66, "grad_norm": 0.4192383885383606, "learning_rate": 0.0005316237734812689, "loss": 1.8627, "step": 19767 }, { "epoch": 0.66, "grad_norm": 0.4243147075176239, "learning_rate": 0.0005316171307460875, "loss": 1.7874, "step": 19768 }, { "epoch": 0.66, "grad_norm": 0.4412315785884857, "learning_rate": 0.0005316104877297569, "loss": 1.8082, "step": 19769 }, { "epoch": 0.66, "grad_norm": 0.42488813400268555, "learning_rate": 0.0005316038444322854, "loss": 1.8715, "step": 19770 }, { "epoch": 0.66, "grad_norm": 0.446987509727478, "learning_rate": 0.000531597200853681, "loss": 1.7666, "step": 19771 }, { "epoch": 0.66, "grad_norm": 0.4242463707923889, "learning_rate": 0.0005315905569939516, "loss": 1.8534, "step": 19772 }, { "epoch": 0.66, "grad_norm": 0.4168100953102112, "learning_rate": 0.0005315839128531055, "loss": 1.8287, "step": 19773 }, { "epoch": 0.66, "grad_norm": 0.42267778515815735, "learning_rate": 0.0005315772684311506, "loss": 1.8332, "step": 19774 }, { "epoch": 0.66, "grad_norm": 0.4254748821258545, "learning_rate": 0.000531570623728095, "loss": 1.7623, "step": 19775 }, { "epoch": 0.66, "grad_norm": 0.4480523467063904, "learning_rate": 0.0005315639787439468, "loss": 1.8117, "step": 19776 }, { "epoch": 0.66, "grad_norm": 0.4294802248477936, "learning_rate": 0.0005315573334787139, "loss": 1.877, "step": 19777 }, { "epoch": 0.66, "grad_norm": 0.4248204827308655, "learning_rate": 0.0005315506879324047, "loss": 1.8016, "step": 19778 }, { "epoch": 0.66, "grad_norm": 0.42135196924209595, "learning_rate": 0.000531544042105027, "loss": 1.7513, "step": 19779 }, { "epoch": 0.66, "grad_norm": 0.4229028820991516, "learning_rate": 0.000531537395996589, "loss": 1.7446, "step": 19780 }, { "epoch": 0.66, "grad_norm": 0.4290418326854706, "learning_rate": 0.0005315307496070987, "loss": 1.7948, "step": 19781 }, { "epoch": 0.66, "grad_norm": 0.43329882621765137, "learning_rate": 0.0005315241029365641, "loss": 1.8239, "step": 19782 }, { "epoch": 0.66, "grad_norm": 0.4320010244846344, "learning_rate": 0.0005315174559849935, "loss": 1.8907, "step": 19783 }, { "epoch": 0.66, "grad_norm": 0.42421844601631165, "learning_rate": 0.0005315108087523947, "loss": 1.7781, "step": 19784 }, { "epoch": 0.66, "grad_norm": 0.43254050612449646, "learning_rate": 0.000531504161238776, "loss": 1.9206, "step": 19785 }, { "epoch": 0.66, "grad_norm": 0.4495536983013153, "learning_rate": 0.0005314975134441453, "loss": 1.8674, "step": 19786 }, { "epoch": 0.66, "grad_norm": 0.43360039591789246, "learning_rate": 0.0005314908653685107, "loss": 1.8515, "step": 19787 }, { "epoch": 0.66, "grad_norm": 0.4220738708972931, "learning_rate": 0.0005314842170118804, "loss": 1.7491, "step": 19788 }, { "epoch": 0.66, "grad_norm": 0.4311569929122925, "learning_rate": 0.0005314775683742623, "loss": 1.9805, "step": 19789 }, { "epoch": 0.66, "grad_norm": 0.4359046518802643, "learning_rate": 0.0005314709194556645, "loss": 1.8397, "step": 19790 }, { "epoch": 0.66, "grad_norm": 0.42891162633895874, "learning_rate": 0.0005314642702560952, "loss": 1.8602, "step": 19791 }, { "epoch": 0.66, "grad_norm": 0.4317976236343384, "learning_rate": 0.0005314576207755623, "loss": 1.9022, "step": 19792 }, { "epoch": 0.66, "grad_norm": 0.42904096841812134, "learning_rate": 0.0005314509710140739, "loss": 1.8432, "step": 19793 }, { "epoch": 0.66, "grad_norm": 0.43066227436065674, "learning_rate": 0.0005314443209716383, "loss": 1.8054, "step": 19794 }, { "epoch": 0.66, "grad_norm": 0.4385503828525543, "learning_rate": 0.0005314376706482632, "loss": 1.8384, "step": 19795 }, { "epoch": 0.66, "grad_norm": 0.4128795266151428, "learning_rate": 0.000531431020043957, "loss": 1.8132, "step": 19796 }, { "epoch": 0.66, "grad_norm": 0.4175747036933899, "learning_rate": 0.0005314243691587276, "loss": 1.8338, "step": 19797 }, { "epoch": 0.66, "grad_norm": 0.4348542094230652, "learning_rate": 0.0005314177179925831, "loss": 1.8124, "step": 19798 }, { "epoch": 0.66, "grad_norm": 0.4177962839603424, "learning_rate": 0.0005314110665455316, "loss": 1.8629, "step": 19799 }, { "epoch": 0.66, "grad_norm": 0.43155092000961304, "learning_rate": 0.0005314044148175813, "loss": 1.8545, "step": 19800 }, { "epoch": 0.66, "grad_norm": 0.4193941652774811, "learning_rate": 0.00053139776280874, "loss": 1.8006, "step": 19801 }, { "epoch": 0.66, "grad_norm": 0.4422592520713806, "learning_rate": 0.0005313911105190159, "loss": 1.8608, "step": 19802 }, { "epoch": 0.66, "grad_norm": 0.43297815322875977, "learning_rate": 0.000531384457948417, "loss": 1.7916, "step": 19803 }, { "epoch": 0.66, "grad_norm": 0.43527480959892273, "learning_rate": 0.0005313778050969516, "loss": 1.8352, "step": 19804 }, { "epoch": 0.66, "grad_norm": 0.4220321774482727, "learning_rate": 0.0005313711519646277, "loss": 1.8186, "step": 19805 }, { "epoch": 0.66, "grad_norm": 0.44824379682540894, "learning_rate": 0.0005313644985514531, "loss": 1.8387, "step": 19806 }, { "epoch": 0.66, "grad_norm": 0.6913121938705444, "learning_rate": 0.0005313578448574363, "loss": 1.8473, "step": 19807 }, { "epoch": 0.66, "grad_norm": 0.4150426983833313, "learning_rate": 0.000531351190882585, "loss": 1.8039, "step": 19808 }, { "epoch": 0.66, "grad_norm": 0.4248576760292053, "learning_rate": 0.0005313445366269075, "loss": 1.839, "step": 19809 }, { "epoch": 0.66, "grad_norm": 0.4190714955329895, "learning_rate": 0.0005313378820904119, "loss": 1.8031, "step": 19810 }, { "epoch": 0.66, "grad_norm": 0.4273129105567932, "learning_rate": 0.0005313312272731061, "loss": 1.8849, "step": 19811 }, { "epoch": 0.66, "grad_norm": 0.4107787013053894, "learning_rate": 0.0005313245721749983, "loss": 1.8439, "step": 19812 }, { "epoch": 0.66, "grad_norm": 0.4166185259819031, "learning_rate": 0.0005313179167960965, "loss": 1.8632, "step": 19813 }, { "epoch": 0.66, "grad_norm": 0.4203939139842987, "learning_rate": 0.000531311261136409, "loss": 1.8094, "step": 19814 }, { "epoch": 0.66, "grad_norm": 0.43552160263061523, "learning_rate": 0.0005313046051959436, "loss": 1.7409, "step": 19815 }, { "epoch": 0.66, "grad_norm": 0.43184083700180054, "learning_rate": 0.0005312979489747084, "loss": 1.8789, "step": 19816 }, { "epoch": 0.66, "grad_norm": 0.4288916289806366, "learning_rate": 0.0005312912924727117, "loss": 1.8642, "step": 19817 }, { "epoch": 0.66, "grad_norm": 0.4307668209075928, "learning_rate": 0.0005312846356899614, "loss": 1.822, "step": 19818 }, { "epoch": 0.66, "grad_norm": 0.46066567301750183, "learning_rate": 0.0005312779786264656, "loss": 1.9119, "step": 19819 }, { "epoch": 0.66, "grad_norm": 0.44186872243881226, "learning_rate": 0.0005312713212822325, "loss": 1.8461, "step": 19820 }, { "epoch": 0.66, "grad_norm": 0.4396805465221405, "learning_rate": 0.0005312646636572701, "loss": 1.8618, "step": 19821 }, { "epoch": 0.66, "grad_norm": 0.45690059661865234, "learning_rate": 0.0005312580057515864, "loss": 1.7981, "step": 19822 }, { "epoch": 0.66, "grad_norm": 0.4723489284515381, "learning_rate": 0.0005312513475651896, "loss": 1.8094, "step": 19823 }, { "epoch": 0.66, "grad_norm": 0.4415833353996277, "learning_rate": 0.0005312446890980876, "loss": 1.8512, "step": 19824 }, { "epoch": 0.66, "grad_norm": 0.4462795853614807, "learning_rate": 0.0005312380303502888, "loss": 1.8023, "step": 19825 }, { "epoch": 0.66, "grad_norm": 0.4386537969112396, "learning_rate": 0.0005312313713218011, "loss": 1.9047, "step": 19826 }, { "epoch": 0.66, "grad_norm": 0.43417608737945557, "learning_rate": 0.0005312247120126324, "loss": 1.8395, "step": 19827 }, { "epoch": 0.66, "grad_norm": 0.42824703454971313, "learning_rate": 0.0005312180524227911, "loss": 1.8672, "step": 19828 }, { "epoch": 0.66, "grad_norm": 0.446564257144928, "learning_rate": 0.0005312113925522852, "loss": 1.8459, "step": 19829 }, { "epoch": 0.66, "grad_norm": 0.45862340927124023, "learning_rate": 0.0005312047324011227, "loss": 1.8184, "step": 19830 }, { "epoch": 0.66, "grad_norm": 0.4383390545845032, "learning_rate": 0.0005311980719693117, "loss": 1.8987, "step": 19831 }, { "epoch": 0.66, "grad_norm": 0.42078274488449097, "learning_rate": 0.0005311914112568604, "loss": 1.765, "step": 19832 }, { "epoch": 0.66, "grad_norm": 0.4417283535003662, "learning_rate": 0.0005311847502637765, "loss": 1.8688, "step": 19833 }, { "epoch": 0.66, "grad_norm": 0.4343259036540985, "learning_rate": 0.0005311780889900686, "loss": 1.82, "step": 19834 }, { "epoch": 0.66, "grad_norm": 0.43865731358528137, "learning_rate": 0.0005311714274357446, "loss": 1.7798, "step": 19835 }, { "epoch": 0.66, "grad_norm": 0.4262656569480896, "learning_rate": 0.0005311647656008124, "loss": 1.8297, "step": 19836 }, { "epoch": 0.66, "grad_norm": 0.457891583442688, "learning_rate": 0.0005311581034852805, "loss": 1.9464, "step": 19837 }, { "epoch": 0.66, "grad_norm": 0.4438548982143402, "learning_rate": 0.0005311514410891564, "loss": 1.8838, "step": 19838 }, { "epoch": 0.66, "grad_norm": 0.43334129452705383, "learning_rate": 0.0005311447784124486, "loss": 1.8485, "step": 19839 }, { "epoch": 0.66, "grad_norm": 0.42491522431373596, "learning_rate": 0.0005311381154551652, "loss": 1.9097, "step": 19840 }, { "epoch": 0.66, "grad_norm": 0.4381813108921051, "learning_rate": 0.0005311314522173142, "loss": 1.8073, "step": 19841 }, { "epoch": 0.66, "grad_norm": 0.45040056109428406, "learning_rate": 0.0005311247886989036, "loss": 1.853, "step": 19842 }, { "epoch": 0.66, "grad_norm": 0.4387620985507965, "learning_rate": 0.0005311181248999415, "loss": 1.8189, "step": 19843 }, { "epoch": 0.66, "grad_norm": 0.42004117369651794, "learning_rate": 0.0005311114608204361, "loss": 1.7563, "step": 19844 }, { "epoch": 0.66, "grad_norm": 0.42298585176467896, "learning_rate": 0.0005311047964603955, "loss": 1.8408, "step": 19845 }, { "epoch": 0.66, "grad_norm": 0.4158727824687958, "learning_rate": 0.0005310981318198277, "loss": 1.8085, "step": 19846 }, { "epoch": 0.66, "grad_norm": 0.4323653280735016, "learning_rate": 0.0005310914668987407, "loss": 1.8882, "step": 19847 }, { "epoch": 0.66, "grad_norm": 0.43316319584846497, "learning_rate": 0.0005310848016971429, "loss": 1.8499, "step": 19848 }, { "epoch": 0.66, "grad_norm": 0.4186382591724396, "learning_rate": 0.0005310781362150421, "loss": 1.7554, "step": 19849 }, { "epoch": 0.66, "grad_norm": 0.4250483810901642, "learning_rate": 0.0005310714704524465, "loss": 1.8026, "step": 19850 }, { "epoch": 0.66, "grad_norm": 0.42005762457847595, "learning_rate": 0.0005310648044093641, "loss": 1.7726, "step": 19851 }, { "epoch": 0.66, "grad_norm": 0.4391716420650482, "learning_rate": 0.0005310581380858032, "loss": 1.9345, "step": 19852 }, { "epoch": 0.66, "grad_norm": 0.4356389343738556, "learning_rate": 0.0005310514714817718, "loss": 1.8661, "step": 19853 }, { "epoch": 0.66, "grad_norm": 0.42155179381370544, "learning_rate": 0.0005310448045972778, "loss": 1.8564, "step": 19854 }, { "epoch": 0.66, "grad_norm": 0.42152372002601624, "learning_rate": 0.0005310381374323296, "loss": 1.8098, "step": 19855 }, { "epoch": 0.66, "grad_norm": 0.4241859018802643, "learning_rate": 0.000531031469986935, "loss": 1.876, "step": 19856 }, { "epoch": 0.66, "grad_norm": 0.4275669455528259, "learning_rate": 0.0005310248022611023, "loss": 1.9059, "step": 19857 }, { "epoch": 0.66, "grad_norm": 0.44118279218673706, "learning_rate": 0.0005310181342548396, "loss": 1.845, "step": 19858 }, { "epoch": 0.66, "grad_norm": 0.4307081997394562, "learning_rate": 0.0005310114659681549, "loss": 1.8278, "step": 19859 }, { "epoch": 0.66, "grad_norm": 0.443179726600647, "learning_rate": 0.0005310047974010563, "loss": 1.8479, "step": 19860 }, { "epoch": 0.66, "grad_norm": 0.43348240852355957, "learning_rate": 0.0005309981285535518, "loss": 1.8541, "step": 19861 }, { "epoch": 0.66, "grad_norm": 0.42361772060394287, "learning_rate": 0.0005309914594256497, "loss": 1.8458, "step": 19862 }, { "epoch": 0.66, "grad_norm": 0.4203508794307709, "learning_rate": 0.000530984790017358, "loss": 1.8008, "step": 19863 }, { "epoch": 0.66, "grad_norm": 0.4923954904079437, "learning_rate": 0.0005309781203286848, "loss": 1.8608, "step": 19864 }, { "epoch": 0.66, "grad_norm": 0.43041208386421204, "learning_rate": 0.0005309714503596382, "loss": 1.8335, "step": 19865 }, { "epoch": 0.66, "grad_norm": 0.4399539530277252, "learning_rate": 0.0005309647801102263, "loss": 1.8466, "step": 19866 }, { "epoch": 0.66, "grad_norm": 0.49101656675338745, "learning_rate": 0.0005309581095804572, "loss": 1.8433, "step": 19867 }, { "epoch": 0.66, "grad_norm": 0.41657406091690063, "learning_rate": 0.0005309514387703388, "loss": 1.8537, "step": 19868 }, { "epoch": 0.66, "grad_norm": 0.44860848784446716, "learning_rate": 0.0005309447676798796, "loss": 1.8728, "step": 19869 }, { "epoch": 0.66, "grad_norm": 0.4424850344657898, "learning_rate": 0.0005309380963090876, "loss": 1.805, "step": 19870 }, { "epoch": 0.66, "grad_norm": 0.4199628233909607, "learning_rate": 0.0005309314246579705, "loss": 1.7979, "step": 19871 }, { "epoch": 0.66, "grad_norm": 0.43367066979408264, "learning_rate": 0.0005309247527265368, "loss": 1.8331, "step": 19872 }, { "epoch": 0.66, "grad_norm": 0.45125988125801086, "learning_rate": 0.0005309180805147944, "loss": 1.8643, "step": 19873 }, { "epoch": 0.66, "grad_norm": 0.4400729238986969, "learning_rate": 0.0005309114080227516, "loss": 1.8275, "step": 19874 }, { "epoch": 0.66, "grad_norm": 0.42216625809669495, "learning_rate": 0.0005309047352504162, "loss": 1.8495, "step": 19875 }, { "epoch": 0.66, "grad_norm": 0.43331676721572876, "learning_rate": 0.0005308980621977966, "loss": 1.8535, "step": 19876 }, { "epoch": 0.66, "grad_norm": 0.4504327178001404, "learning_rate": 0.0005308913888649008, "loss": 1.8429, "step": 19877 }, { "epoch": 0.66, "grad_norm": 0.43919581174850464, "learning_rate": 0.0005308847152517369, "loss": 1.882, "step": 19878 }, { "epoch": 0.66, "grad_norm": 0.4413069486618042, "learning_rate": 0.0005308780413583128, "loss": 1.8475, "step": 19879 }, { "epoch": 0.66, "grad_norm": 0.4171585440635681, "learning_rate": 0.0005308713671846369, "loss": 1.7449, "step": 19880 }, { "epoch": 0.66, "grad_norm": 0.416007936000824, "learning_rate": 0.0005308646927307172, "loss": 1.8049, "step": 19881 }, { "epoch": 0.66, "grad_norm": 0.4185517132282257, "learning_rate": 0.0005308580179965616, "loss": 1.8409, "step": 19882 }, { "epoch": 0.66, "grad_norm": 0.43677809834480286, "learning_rate": 0.0005308513429821786, "loss": 1.8619, "step": 19883 }, { "epoch": 0.66, "grad_norm": 0.41449838876724243, "learning_rate": 0.0005308446676875761, "loss": 1.7936, "step": 19884 }, { "epoch": 0.66, "grad_norm": 0.42690643668174744, "learning_rate": 0.000530837992112762, "loss": 1.8602, "step": 19885 }, { "epoch": 0.66, "grad_norm": 0.43412965536117554, "learning_rate": 0.0005308313162577447, "loss": 1.8596, "step": 19886 }, { "epoch": 0.66, "grad_norm": 0.42311549186706543, "learning_rate": 0.0005308246401225321, "loss": 1.8683, "step": 19887 }, { "epoch": 0.66, "grad_norm": 0.4522187411785126, "learning_rate": 0.0005308179637071325, "loss": 1.8392, "step": 19888 }, { "epoch": 0.66, "grad_norm": 0.4409796893596649, "learning_rate": 0.0005308112870115538, "loss": 1.8257, "step": 19889 }, { "epoch": 0.66, "grad_norm": 0.4396944046020508, "learning_rate": 0.0005308046100358043, "loss": 1.8602, "step": 19890 }, { "epoch": 0.66, "grad_norm": 0.42343005537986755, "learning_rate": 0.000530797932779892, "loss": 1.7638, "step": 19891 }, { "epoch": 0.66, "grad_norm": 0.4634813964366913, "learning_rate": 0.0005307912552438251, "loss": 1.8484, "step": 19892 }, { "epoch": 0.66, "grad_norm": 0.44654494524002075, "learning_rate": 0.0005307845774276114, "loss": 1.8371, "step": 19893 }, { "epoch": 0.66, "grad_norm": 0.42840924859046936, "learning_rate": 0.0005307778993312594, "loss": 1.8883, "step": 19894 }, { "epoch": 0.66, "grad_norm": 0.4318998456001282, "learning_rate": 0.0005307712209547769, "loss": 1.8255, "step": 19895 }, { "epoch": 0.66, "grad_norm": 0.42689085006713867, "learning_rate": 0.0005307645422981723, "loss": 1.8071, "step": 19896 }, { "epoch": 0.66, "grad_norm": 0.4561961889266968, "learning_rate": 0.0005307578633614535, "loss": 1.8865, "step": 19897 }, { "epoch": 0.66, "grad_norm": 0.4419161379337311, "learning_rate": 0.0005307511841446286, "loss": 1.8218, "step": 19898 }, { "epoch": 0.66, "grad_norm": 0.4153722822666168, "learning_rate": 0.0005307445046477058, "loss": 1.7872, "step": 19899 }, { "epoch": 0.66, "grad_norm": 0.4212765395641327, "learning_rate": 0.0005307378248706931, "loss": 1.8341, "step": 19900 }, { "epoch": 0.66, "grad_norm": 0.4494006037712097, "learning_rate": 0.0005307311448135988, "loss": 1.7487, "step": 19901 }, { "epoch": 0.66, "grad_norm": 0.42835089564323425, "learning_rate": 0.0005307244644764308, "loss": 1.9313, "step": 19902 }, { "epoch": 0.66, "grad_norm": 0.42244720458984375, "learning_rate": 0.0005307177838591973, "loss": 1.8797, "step": 19903 }, { "epoch": 0.66, "grad_norm": 0.43852749466896057, "learning_rate": 0.0005307111029619064, "loss": 1.868, "step": 19904 }, { "epoch": 0.66, "grad_norm": 0.4293987452983856, "learning_rate": 0.0005307044217845663, "loss": 1.8316, "step": 19905 }, { "epoch": 0.66, "grad_norm": 0.4185284972190857, "learning_rate": 0.000530697740327185, "loss": 1.8364, "step": 19906 }, { "epoch": 0.66, "grad_norm": 0.40557000041007996, "learning_rate": 0.0005306910585897705, "loss": 1.8493, "step": 19907 }, { "epoch": 0.66, "grad_norm": 0.4285920262336731, "learning_rate": 0.0005306843765723312, "loss": 1.8783, "step": 19908 }, { "epoch": 0.66, "grad_norm": 0.4274823069572449, "learning_rate": 0.000530677694274875, "loss": 1.8024, "step": 19909 }, { "epoch": 0.66, "grad_norm": 0.6352213621139526, "learning_rate": 0.00053067101169741, "loss": 1.8518, "step": 19910 }, { "epoch": 0.66, "grad_norm": 0.4215436577796936, "learning_rate": 0.0005306643288399445, "loss": 1.7942, "step": 19911 }, { "epoch": 0.66, "grad_norm": 0.4332997798919678, "learning_rate": 0.0005306576457024864, "loss": 1.7673, "step": 19912 }, { "epoch": 0.66, "grad_norm": 0.43523693084716797, "learning_rate": 0.000530650962285044, "loss": 1.8835, "step": 19913 }, { "epoch": 0.66, "grad_norm": 0.4371711015701294, "learning_rate": 0.0005306442785876252, "loss": 1.8936, "step": 19914 }, { "epoch": 0.66, "grad_norm": 0.4156649112701416, "learning_rate": 0.0005306375946102383, "loss": 1.8347, "step": 19915 }, { "epoch": 0.66, "grad_norm": 0.4372937083244324, "learning_rate": 0.0005306309103528913, "loss": 1.8643, "step": 19916 }, { "epoch": 0.66, "grad_norm": 0.4319177567958832, "learning_rate": 0.0005306242258155924, "loss": 1.8896, "step": 19917 }, { "epoch": 0.66, "grad_norm": 0.4213450253009796, "learning_rate": 0.0005306175409983497, "loss": 1.8238, "step": 19918 }, { "epoch": 0.66, "grad_norm": 0.430059552192688, "learning_rate": 0.0005306108559011712, "loss": 1.8275, "step": 19919 }, { "epoch": 0.66, "grad_norm": 0.43621405959129333, "learning_rate": 0.0005306041705240652, "loss": 1.8491, "step": 19920 }, { "epoch": 0.66, "grad_norm": 0.4150322675704956, "learning_rate": 0.0005305974848670396, "loss": 1.806, "step": 19921 }, { "epoch": 0.66, "grad_norm": 0.4368019700050354, "learning_rate": 0.0005305907989301027, "loss": 1.7759, "step": 19922 }, { "epoch": 0.66, "grad_norm": 0.43099716305732727, "learning_rate": 0.0005305841127132625, "loss": 1.8754, "step": 19923 }, { "epoch": 0.66, "grad_norm": 0.4269660711288452, "learning_rate": 0.0005305774262165273, "loss": 1.8089, "step": 19924 }, { "epoch": 0.66, "grad_norm": 0.4376527965068817, "learning_rate": 0.0005305707394399049, "loss": 1.9481, "step": 19925 }, { "epoch": 0.66, "grad_norm": 0.44622740149497986, "learning_rate": 0.0005305640523834037, "loss": 1.8932, "step": 19926 }, { "epoch": 0.66, "grad_norm": 0.4420362710952759, "learning_rate": 0.0005305573650470316, "loss": 1.8175, "step": 19927 }, { "epoch": 0.66, "grad_norm": 0.4193359613418579, "learning_rate": 0.000530550677430797, "loss": 1.8151, "step": 19928 }, { "epoch": 0.66, "grad_norm": 0.42885807156562805, "learning_rate": 0.0005305439895347077, "loss": 1.8725, "step": 19929 }, { "epoch": 0.66, "grad_norm": 0.423747718334198, "learning_rate": 0.000530537301358772, "loss": 1.8137, "step": 19930 }, { "epoch": 0.66, "grad_norm": 0.42189791798591614, "learning_rate": 0.0005305306129029981, "loss": 1.8289, "step": 19931 }, { "epoch": 0.66, "grad_norm": 0.4150775372982025, "learning_rate": 0.000530523924167394, "loss": 1.8249, "step": 19932 }, { "epoch": 0.66, "grad_norm": 0.43220680952072144, "learning_rate": 0.0005305172351519677, "loss": 1.8545, "step": 19933 }, { "epoch": 0.66, "grad_norm": 0.4378082752227783, "learning_rate": 0.0005305105458567273, "loss": 1.8353, "step": 19934 }, { "epoch": 0.66, "grad_norm": 0.4283750653266907, "learning_rate": 0.0005305038562816813, "loss": 1.7869, "step": 19935 }, { "epoch": 0.66, "grad_norm": 0.4356663227081299, "learning_rate": 0.0005304971664268376, "loss": 1.7791, "step": 19936 }, { "epoch": 0.66, "grad_norm": 0.42895951867103577, "learning_rate": 0.0005304904762922041, "loss": 1.8599, "step": 19937 }, { "epoch": 0.66, "grad_norm": 0.4439663589000702, "learning_rate": 0.0005304837858777894, "loss": 1.9223, "step": 19938 }, { "epoch": 0.66, "grad_norm": 0.4251537024974823, "learning_rate": 0.0005304770951836011, "loss": 1.8145, "step": 19939 }, { "epoch": 0.66, "grad_norm": 0.4308672547340393, "learning_rate": 0.0005304704042096477, "loss": 1.8705, "step": 19940 }, { "epoch": 0.66, "grad_norm": 0.430350124835968, "learning_rate": 0.0005304637129559371, "loss": 1.7815, "step": 19941 }, { "epoch": 0.66, "grad_norm": 0.4266277253627777, "learning_rate": 0.0005304570214224776, "loss": 1.8417, "step": 19942 }, { "epoch": 0.66, "grad_norm": 0.4262464940547943, "learning_rate": 0.0005304503296092772, "loss": 1.8869, "step": 19943 }, { "epoch": 0.66, "grad_norm": 0.4352900981903076, "learning_rate": 0.000530443637516344, "loss": 1.7462, "step": 19944 }, { "epoch": 0.66, "grad_norm": 0.4394214451313019, "learning_rate": 0.0005304369451436862, "loss": 1.8498, "step": 19945 }, { "epoch": 0.66, "grad_norm": 0.4310915768146515, "learning_rate": 0.000530430252491312, "loss": 1.8587, "step": 19946 }, { "epoch": 0.66, "grad_norm": 0.42672014236450195, "learning_rate": 0.0005304235595592293, "loss": 1.7996, "step": 19947 }, { "epoch": 0.66, "grad_norm": 0.4256514012813568, "learning_rate": 0.0005304168663474465, "loss": 1.8183, "step": 19948 }, { "epoch": 0.66, "grad_norm": 0.43902289867401123, "learning_rate": 0.0005304101728559713, "loss": 1.8121, "step": 19949 }, { "epoch": 0.66, "grad_norm": 0.4357716143131256, "learning_rate": 0.0005304034790848123, "loss": 1.847, "step": 19950 }, { "epoch": 0.66, "grad_norm": 0.4340459406375885, "learning_rate": 0.0005303967850339774, "loss": 1.7683, "step": 19951 }, { "epoch": 0.66, "grad_norm": 0.4220580756664276, "learning_rate": 0.0005303900907034748, "loss": 1.878, "step": 19952 }, { "epoch": 0.66, "grad_norm": 0.4238223731517792, "learning_rate": 0.0005303833960933125, "loss": 1.838, "step": 19953 }, { "epoch": 0.66, "grad_norm": 0.4312550723552704, "learning_rate": 0.0005303767012034987, "loss": 1.8489, "step": 19954 }, { "epoch": 0.66, "grad_norm": 0.4445832073688507, "learning_rate": 0.0005303700060340416, "loss": 1.7339, "step": 19955 }, { "epoch": 0.66, "grad_norm": 0.4379589855670929, "learning_rate": 0.0005303633105849491, "loss": 1.8685, "step": 19956 }, { "epoch": 0.66, "grad_norm": 0.42815741896629333, "learning_rate": 0.0005303566148562297, "loss": 1.8396, "step": 19957 }, { "epoch": 0.66, "grad_norm": 0.4401731491088867, "learning_rate": 0.0005303499188478912, "loss": 1.8231, "step": 19958 }, { "epoch": 0.66, "grad_norm": 0.4308299124240875, "learning_rate": 0.0005303432225599419, "loss": 1.9161, "step": 19959 }, { "epoch": 0.66, "grad_norm": 0.4252532720565796, "learning_rate": 0.0005303365259923897, "loss": 1.8246, "step": 19960 }, { "epoch": 0.66, "grad_norm": 0.44251230359077454, "learning_rate": 0.0005303298291452431, "loss": 1.8376, "step": 19961 }, { "epoch": 0.66, "grad_norm": 0.44683030247688293, "learning_rate": 0.0005303231320185099, "loss": 1.8291, "step": 19962 }, { "epoch": 0.66, "grad_norm": 0.42409202456474304, "learning_rate": 0.0005303164346121984, "loss": 1.9069, "step": 19963 }, { "epoch": 0.66, "grad_norm": 0.41940367221832275, "learning_rate": 0.0005303097369263166, "loss": 1.877, "step": 19964 }, { "epoch": 0.66, "grad_norm": 0.42194709181785583, "learning_rate": 0.0005303030389608728, "loss": 1.8295, "step": 19965 }, { "epoch": 0.66, "grad_norm": 0.4578731060028076, "learning_rate": 0.000530296340715875, "loss": 1.8644, "step": 19966 }, { "epoch": 0.66, "grad_norm": 0.4267294704914093, "learning_rate": 0.0005302896421913314, "loss": 1.8096, "step": 19967 }, { "epoch": 0.66, "grad_norm": 0.4371381998062134, "learning_rate": 0.0005302829433872501, "loss": 1.8922, "step": 19968 }, { "epoch": 0.66, "grad_norm": 0.43523111939430237, "learning_rate": 0.0005302762443036394, "loss": 1.8422, "step": 19969 }, { "epoch": 0.66, "grad_norm": 0.4504503011703491, "learning_rate": 0.000530269544940507, "loss": 1.9168, "step": 19970 }, { "epoch": 0.66, "grad_norm": 0.4478406608104706, "learning_rate": 0.0005302628452978614, "loss": 1.9104, "step": 19971 }, { "epoch": 0.66, "grad_norm": 0.434939980506897, "learning_rate": 0.0005302561453757107, "loss": 1.8685, "step": 19972 }, { "epoch": 0.66, "grad_norm": 0.4552784562110901, "learning_rate": 0.0005302494451740629, "loss": 1.8414, "step": 19973 }, { "epoch": 0.66, "grad_norm": 0.4267880618572235, "learning_rate": 0.0005302427446929261, "loss": 1.8742, "step": 19974 }, { "epoch": 0.66, "grad_norm": 0.42489081621170044, "learning_rate": 0.0005302360439323088, "loss": 1.8425, "step": 19975 }, { "epoch": 0.66, "grad_norm": 0.456549733877182, "learning_rate": 0.0005302293428922186, "loss": 1.8718, "step": 19976 }, { "epoch": 0.66, "grad_norm": 0.4450586140155792, "learning_rate": 0.0005302226415726641, "loss": 1.8521, "step": 19977 }, { "epoch": 0.66, "grad_norm": 0.42683088779449463, "learning_rate": 0.0005302159399736532, "loss": 1.7642, "step": 19978 }, { "epoch": 0.66, "grad_norm": 0.4462655484676361, "learning_rate": 0.000530209238095194, "loss": 1.845, "step": 19979 }, { "epoch": 0.66, "grad_norm": 0.43956026434898376, "learning_rate": 0.0005302025359372947, "loss": 1.8863, "step": 19980 }, { "epoch": 0.66, "grad_norm": 0.44435685873031616, "learning_rate": 0.0005301958334999635, "loss": 1.8523, "step": 19981 }, { "epoch": 0.66, "grad_norm": 0.4330778121948242, "learning_rate": 0.0005301891307832084, "loss": 1.9178, "step": 19982 }, { "epoch": 0.66, "grad_norm": 0.43012601137161255, "learning_rate": 0.0005301824277870376, "loss": 1.7744, "step": 19983 }, { "epoch": 0.66, "grad_norm": 0.42632851004600525, "learning_rate": 0.0005301757245114593, "loss": 1.8863, "step": 19984 }, { "epoch": 0.66, "grad_norm": 0.4455721378326416, "learning_rate": 0.0005301690209564816, "loss": 1.8349, "step": 19985 }, { "epoch": 0.66, "grad_norm": 0.42617371678352356, "learning_rate": 0.0005301623171221127, "loss": 1.7993, "step": 19986 }, { "epoch": 0.66, "grad_norm": 0.414480060338974, "learning_rate": 0.0005301556130083605, "loss": 1.7298, "step": 19987 }, { "epoch": 0.67, "grad_norm": 0.4443800151348114, "learning_rate": 0.0005301489086152333, "loss": 1.8126, "step": 19988 }, { "epoch": 0.67, "grad_norm": 0.43969422578811646, "learning_rate": 0.0005301422039427393, "loss": 1.9065, "step": 19989 }, { "epoch": 0.67, "grad_norm": 0.48031824827194214, "learning_rate": 0.0005301354989908866, "loss": 1.8804, "step": 19990 }, { "epoch": 0.67, "grad_norm": 0.42631447315216064, "learning_rate": 0.0005301287937596832, "loss": 1.8781, "step": 19991 }, { "epoch": 0.67, "grad_norm": 0.4372301399707794, "learning_rate": 0.0005301220882491375, "loss": 1.8297, "step": 19992 }, { "epoch": 0.67, "grad_norm": 0.8597722053527832, "learning_rate": 0.0005301153824592575, "loss": 1.8707, "step": 19993 }, { "epoch": 0.67, "grad_norm": 0.7902175188064575, "learning_rate": 0.0005301086763900512, "loss": 1.8227, "step": 19994 }, { "epoch": 0.67, "grad_norm": 0.4231184124946594, "learning_rate": 0.000530101970041527, "loss": 1.8337, "step": 19995 }, { "epoch": 0.67, "grad_norm": 0.45718878507614136, "learning_rate": 0.0005300952634136928, "loss": 1.9184, "step": 19996 }, { "epoch": 0.67, "grad_norm": 0.47773388028144836, "learning_rate": 0.0005300885565065569, "loss": 1.8309, "step": 19997 }, { "epoch": 0.67, "grad_norm": 0.4325644075870514, "learning_rate": 0.0005300818493201274, "loss": 1.8662, "step": 19998 }, { "epoch": 0.67, "grad_norm": 0.427704393863678, "learning_rate": 0.0005300751418544124, "loss": 1.7913, "step": 19999 }, { "epoch": 0.67, "grad_norm": 0.4346369206905365, "learning_rate": 0.0005300684341094201, "loss": 1.8585, "step": 20000 }, { "epoch": 0.67, "grad_norm": 0.4343297481536865, "learning_rate": 0.0005300617260851588, "loss": 1.8488, "step": 20001 }, { "epoch": 0.67, "grad_norm": 0.4434277415275574, "learning_rate": 0.0005300550177816362, "loss": 1.8292, "step": 20002 }, { "epoch": 0.67, "grad_norm": 0.43838682770729065, "learning_rate": 0.0005300483091988607, "loss": 1.8874, "step": 20003 }, { "epoch": 0.67, "grad_norm": 0.41588732600212097, "learning_rate": 0.0005300416003368405, "loss": 1.812, "step": 20004 }, { "epoch": 0.67, "grad_norm": 0.44572511315345764, "learning_rate": 0.0005300348911955839, "loss": 1.8824, "step": 20005 }, { "epoch": 0.67, "grad_norm": 0.4462253749370575, "learning_rate": 0.0005300281817750987, "loss": 1.8083, "step": 20006 }, { "epoch": 0.67, "grad_norm": 0.45450299978256226, "learning_rate": 0.0005300214720753931, "loss": 1.8111, "step": 20007 }, { "epoch": 0.67, "grad_norm": 0.4400970935821533, "learning_rate": 0.0005300147620964753, "loss": 1.7998, "step": 20008 }, { "epoch": 0.67, "grad_norm": 0.44293051958084106, "learning_rate": 0.0005300080518383536, "loss": 1.869, "step": 20009 }, { "epoch": 0.67, "grad_norm": 0.4523765444755554, "learning_rate": 0.0005300013413010359, "loss": 1.8351, "step": 20010 }, { "epoch": 0.67, "grad_norm": 0.4329696595668793, "learning_rate": 0.0005299946304845306, "loss": 1.7864, "step": 20011 }, { "epoch": 0.67, "grad_norm": 0.4269461929798126, "learning_rate": 0.0005299879193888456, "loss": 1.8283, "step": 20012 }, { "epoch": 0.67, "grad_norm": 0.4306386411190033, "learning_rate": 0.0005299812080139892, "loss": 1.8161, "step": 20013 }, { "epoch": 0.67, "grad_norm": 0.4378893971443176, "learning_rate": 0.0005299744963599695, "loss": 1.8613, "step": 20014 }, { "epoch": 0.67, "grad_norm": 0.45122647285461426, "learning_rate": 0.0005299677844267947, "loss": 1.8471, "step": 20015 }, { "epoch": 0.67, "grad_norm": 0.45157063007354736, "learning_rate": 0.0005299610722144727, "loss": 1.8205, "step": 20016 }, { "epoch": 0.67, "grad_norm": 0.44386935234069824, "learning_rate": 0.000529954359723012, "loss": 1.865, "step": 20017 }, { "epoch": 0.67, "grad_norm": 0.4621644616127014, "learning_rate": 0.0005299476469524206, "loss": 1.8707, "step": 20018 }, { "epoch": 0.67, "grad_norm": 0.44217735528945923, "learning_rate": 0.0005299409339027066, "loss": 1.861, "step": 20019 }, { "epoch": 0.67, "grad_norm": 0.4449761211872101, "learning_rate": 0.0005299342205738781, "loss": 1.7939, "step": 20020 }, { "epoch": 0.67, "grad_norm": 0.4100760817527771, "learning_rate": 0.0005299275069659435, "loss": 1.8772, "step": 20021 }, { "epoch": 0.67, "grad_norm": 0.428879976272583, "learning_rate": 0.0005299207930789106, "loss": 1.892, "step": 20022 }, { "epoch": 0.67, "grad_norm": 0.4272291362285614, "learning_rate": 0.0005299140789127879, "loss": 1.7814, "step": 20023 }, { "epoch": 0.67, "grad_norm": 0.42772969603538513, "learning_rate": 0.0005299073644675833, "loss": 1.8574, "step": 20024 }, { "epoch": 0.67, "grad_norm": 0.4112016260623932, "learning_rate": 0.0005299006497433051, "loss": 1.8614, "step": 20025 }, { "epoch": 0.67, "grad_norm": 0.4101989269256592, "learning_rate": 0.0005298939347399614, "loss": 1.7743, "step": 20026 }, { "epoch": 0.67, "grad_norm": 0.4182378351688385, "learning_rate": 0.0005298872194575601, "loss": 1.8094, "step": 20027 }, { "epoch": 0.67, "grad_norm": 0.4182056784629822, "learning_rate": 0.0005298805038961099, "loss": 1.7969, "step": 20028 }, { "epoch": 0.67, "grad_norm": 0.4241759777069092, "learning_rate": 0.0005298737880556184, "loss": 1.827, "step": 20029 }, { "epoch": 0.67, "grad_norm": 0.4236237406730652, "learning_rate": 0.0005298670719360942, "loss": 1.8149, "step": 20030 }, { "epoch": 0.67, "grad_norm": 0.43753543496131897, "learning_rate": 0.0005298603555375451, "loss": 1.9407, "step": 20031 }, { "epoch": 0.67, "grad_norm": 0.4291974902153015, "learning_rate": 0.0005298536388599794, "loss": 1.9045, "step": 20032 }, { "epoch": 0.67, "grad_norm": 0.42273974418640137, "learning_rate": 0.0005298469219034053, "loss": 1.8024, "step": 20033 }, { "epoch": 0.67, "grad_norm": 0.4404544532299042, "learning_rate": 0.0005298402046678309, "loss": 1.8934, "step": 20034 }, { "epoch": 0.67, "grad_norm": 0.4279402494430542, "learning_rate": 0.0005298334871532643, "loss": 1.8819, "step": 20035 }, { "epoch": 0.67, "grad_norm": 0.4268794357776642, "learning_rate": 0.0005298267693597138, "loss": 1.849, "step": 20036 }, { "epoch": 0.67, "grad_norm": 0.41070088744163513, "learning_rate": 0.0005298200512871874, "loss": 1.8078, "step": 20037 }, { "epoch": 0.67, "grad_norm": 0.4232877194881439, "learning_rate": 0.0005298133329356933, "loss": 1.8415, "step": 20038 }, { "epoch": 0.67, "grad_norm": 0.4236524701118469, "learning_rate": 0.0005298066143052398, "loss": 1.8437, "step": 20039 }, { "epoch": 0.67, "grad_norm": 0.5230655074119568, "learning_rate": 0.0005297998953958348, "loss": 1.8669, "step": 20040 }, { "epoch": 0.67, "grad_norm": 0.44313523173332214, "learning_rate": 0.0005297931762074866, "loss": 1.9071, "step": 20041 }, { "epoch": 0.67, "grad_norm": 0.4390505850315094, "learning_rate": 0.0005297864567402034, "loss": 1.8163, "step": 20042 }, { "epoch": 0.67, "grad_norm": 0.43289220333099365, "learning_rate": 0.0005297797369939934, "loss": 1.7787, "step": 20043 }, { "epoch": 0.67, "grad_norm": 0.4184402823448181, "learning_rate": 0.0005297730169688644, "loss": 1.8148, "step": 20044 }, { "epoch": 0.67, "grad_norm": 0.4255923330783844, "learning_rate": 0.000529766296664825, "loss": 1.8647, "step": 20045 }, { "epoch": 0.67, "grad_norm": 0.4304946959018707, "learning_rate": 0.0005297595760818831, "loss": 1.8142, "step": 20046 }, { "epoch": 0.67, "grad_norm": 0.41534584760665894, "learning_rate": 0.000529752855220047, "loss": 1.7625, "step": 20047 }, { "epoch": 0.67, "grad_norm": 0.42368289828300476, "learning_rate": 0.0005297461340793247, "loss": 1.8435, "step": 20048 }, { "epoch": 0.67, "grad_norm": 0.43344855308532715, "learning_rate": 0.0005297394126597245, "loss": 1.8341, "step": 20049 }, { "epoch": 0.67, "grad_norm": 0.4202341139316559, "learning_rate": 0.0005297326909612543, "loss": 1.7989, "step": 20050 }, { "epoch": 0.67, "grad_norm": 0.4457783102989197, "learning_rate": 0.0005297259689839228, "loss": 1.837, "step": 20051 }, { "epoch": 0.67, "grad_norm": 0.44744598865509033, "learning_rate": 0.0005297192467277376, "loss": 1.788, "step": 20052 }, { "epoch": 0.67, "grad_norm": 0.4331846833229065, "learning_rate": 0.0005297125241927071, "loss": 1.8454, "step": 20053 }, { "epoch": 0.67, "grad_norm": 0.4203116297721863, "learning_rate": 0.0005297058013788396, "loss": 1.8475, "step": 20054 }, { "epoch": 0.67, "grad_norm": 0.43700915575027466, "learning_rate": 0.0005296990782861429, "loss": 1.9348, "step": 20055 }, { "epoch": 0.67, "grad_norm": 0.4263926148414612, "learning_rate": 0.0005296923549146255, "loss": 1.8115, "step": 20056 }, { "epoch": 0.67, "grad_norm": 0.4247751235961914, "learning_rate": 0.0005296856312642954, "loss": 1.8448, "step": 20057 }, { "epoch": 0.67, "grad_norm": 0.4183782637119293, "learning_rate": 0.0005296789073351607, "loss": 1.8806, "step": 20058 }, { "epoch": 0.67, "grad_norm": 0.4285242557525635, "learning_rate": 0.0005296721831272298, "loss": 1.9116, "step": 20059 }, { "epoch": 0.67, "grad_norm": 0.4335113763809204, "learning_rate": 0.0005296654586405106, "loss": 1.7726, "step": 20060 }, { "epoch": 0.67, "grad_norm": 0.4227138161659241, "learning_rate": 0.0005296587338750113, "loss": 1.8458, "step": 20061 }, { "epoch": 0.67, "grad_norm": 0.45494717359542847, "learning_rate": 0.0005296520088307403, "loss": 1.8865, "step": 20062 }, { "epoch": 0.67, "grad_norm": 0.43791061639785767, "learning_rate": 0.0005296452835077055, "loss": 1.8618, "step": 20063 }, { "epoch": 0.67, "grad_norm": 0.4444701373577118, "learning_rate": 0.0005296385579059152, "loss": 1.839, "step": 20064 }, { "epoch": 0.67, "grad_norm": 0.4488934874534607, "learning_rate": 0.0005296318320253775, "loss": 1.8091, "step": 20065 }, { "epoch": 0.67, "grad_norm": 0.445033997297287, "learning_rate": 0.0005296251058661006, "loss": 1.8118, "step": 20066 }, { "epoch": 0.67, "grad_norm": 0.4324631094932556, "learning_rate": 0.0005296183794280927, "loss": 1.8589, "step": 20067 }, { "epoch": 0.67, "grad_norm": 0.4228070080280304, "learning_rate": 0.0005296116527113619, "loss": 1.8596, "step": 20068 }, { "epoch": 0.67, "grad_norm": 0.4442294239997864, "learning_rate": 0.0005296049257159163, "loss": 1.7524, "step": 20069 }, { "epoch": 0.67, "grad_norm": 0.4445967376232147, "learning_rate": 0.0005295981984417644, "loss": 1.8269, "step": 20070 }, { "epoch": 0.67, "grad_norm": 0.438772052526474, "learning_rate": 0.0005295914708889138, "loss": 1.9242, "step": 20071 }, { "epoch": 0.67, "grad_norm": 0.4483848810195923, "learning_rate": 0.0005295847430573731, "loss": 1.8245, "step": 20072 }, { "epoch": 0.67, "grad_norm": 0.4339457154273987, "learning_rate": 0.0005295780149471505, "loss": 1.8793, "step": 20073 }, { "epoch": 0.67, "grad_norm": 0.42195364832878113, "learning_rate": 0.0005295712865582539, "loss": 1.7692, "step": 20074 }, { "epoch": 0.67, "grad_norm": 0.44779515266418457, "learning_rate": 0.0005295645578906915, "loss": 1.8458, "step": 20075 }, { "epoch": 0.67, "grad_norm": 0.4641708433628082, "learning_rate": 0.0005295578289444716, "loss": 1.8245, "step": 20076 }, { "epoch": 0.67, "grad_norm": 0.4357823431491852, "learning_rate": 0.0005295510997196023, "loss": 1.8507, "step": 20077 }, { "epoch": 0.67, "grad_norm": 0.44007447361946106, "learning_rate": 0.0005295443702160919, "loss": 1.8118, "step": 20078 }, { "epoch": 0.67, "grad_norm": 0.43470555543899536, "learning_rate": 0.0005295376404339483, "loss": 1.8561, "step": 20079 }, { "epoch": 0.67, "grad_norm": 0.45328211784362793, "learning_rate": 0.00052953091037318, "loss": 1.8025, "step": 20080 }, { "epoch": 0.67, "grad_norm": 0.4369814991950989, "learning_rate": 0.0005295241800337948, "loss": 1.9139, "step": 20081 }, { "epoch": 0.67, "grad_norm": 0.43976834416389465, "learning_rate": 0.0005295174494158012, "loss": 1.8162, "step": 20082 }, { "epoch": 0.67, "grad_norm": 0.4354977011680603, "learning_rate": 0.0005295107185192072, "loss": 1.8288, "step": 20083 }, { "epoch": 0.67, "grad_norm": 0.4455438256263733, "learning_rate": 0.0005295039873440211, "loss": 1.8188, "step": 20084 }, { "epoch": 0.67, "grad_norm": 0.4131985008716583, "learning_rate": 0.0005294972558902508, "loss": 1.8156, "step": 20085 }, { "epoch": 0.67, "grad_norm": 0.41888993978500366, "learning_rate": 0.0005294905241579048, "loss": 1.8482, "step": 20086 }, { "epoch": 0.67, "grad_norm": 0.45874685049057007, "learning_rate": 0.000529483792146991, "loss": 1.7903, "step": 20087 }, { "epoch": 0.67, "grad_norm": 0.41713786125183105, "learning_rate": 0.0005294770598575177, "loss": 1.8576, "step": 20088 }, { "epoch": 0.67, "grad_norm": 0.4412778913974762, "learning_rate": 0.0005294703272894931, "loss": 1.8105, "step": 20089 }, { "epoch": 0.67, "grad_norm": 0.43757468461990356, "learning_rate": 0.0005294635944429253, "loss": 1.787, "step": 20090 }, { "epoch": 0.67, "grad_norm": 0.4301016926765442, "learning_rate": 0.0005294568613178226, "loss": 1.8791, "step": 20091 }, { "epoch": 0.67, "grad_norm": 0.41841575503349304, "learning_rate": 0.000529450127914193, "loss": 1.877, "step": 20092 }, { "epoch": 0.67, "grad_norm": 0.43132713437080383, "learning_rate": 0.0005294433942320448, "loss": 1.9149, "step": 20093 }, { "epoch": 0.67, "grad_norm": 0.43201926350593567, "learning_rate": 0.0005294366602713861, "loss": 1.8338, "step": 20094 }, { "epoch": 0.67, "grad_norm": 0.43742015957832336, "learning_rate": 0.0005294299260322251, "loss": 1.8522, "step": 20095 }, { "epoch": 0.67, "grad_norm": 0.4328155815601349, "learning_rate": 0.0005294231915145699, "loss": 1.8114, "step": 20096 }, { "epoch": 0.67, "grad_norm": 0.44338494539260864, "learning_rate": 0.0005294164567184289, "loss": 1.7646, "step": 20097 }, { "epoch": 0.67, "grad_norm": 0.4299219846725464, "learning_rate": 0.0005294097216438099, "loss": 1.7992, "step": 20098 }, { "epoch": 0.67, "grad_norm": 0.4359605610370636, "learning_rate": 0.0005294029862907216, "loss": 1.8705, "step": 20099 }, { "epoch": 0.67, "grad_norm": 0.4686601459980011, "learning_rate": 0.0005293962506591717, "loss": 1.9095, "step": 20100 }, { "epoch": 0.67, "grad_norm": 0.42390498518943787, "learning_rate": 0.0005293895147491685, "loss": 1.8625, "step": 20101 }, { "epoch": 0.67, "grad_norm": 0.42185208201408386, "learning_rate": 0.0005293827785607204, "loss": 1.8115, "step": 20102 }, { "epoch": 0.67, "grad_norm": 0.43583038449287415, "learning_rate": 0.0005293760420938353, "loss": 1.7841, "step": 20103 }, { "epoch": 0.67, "grad_norm": 0.4297700822353363, "learning_rate": 0.0005293693053485214, "loss": 1.8959, "step": 20104 }, { "epoch": 0.67, "grad_norm": 0.43825435638427734, "learning_rate": 0.0005293625683247871, "loss": 1.8143, "step": 20105 }, { "epoch": 0.67, "grad_norm": 0.43368393182754517, "learning_rate": 0.0005293558310226404, "loss": 1.8166, "step": 20106 }, { "epoch": 0.67, "grad_norm": 0.4430084526538849, "learning_rate": 0.0005293490934420895, "loss": 1.8568, "step": 20107 }, { "epoch": 0.67, "grad_norm": 0.4509200155735016, "learning_rate": 0.0005293423555831426, "loss": 1.8131, "step": 20108 }, { "epoch": 0.67, "grad_norm": 0.4485143721103668, "learning_rate": 0.0005293356174458078, "loss": 1.821, "step": 20109 }, { "epoch": 0.67, "grad_norm": 0.44075798988342285, "learning_rate": 0.0005293288790300934, "loss": 1.8826, "step": 20110 }, { "epoch": 0.67, "grad_norm": 0.44061189889907837, "learning_rate": 0.0005293221403360076, "loss": 1.8902, "step": 20111 }, { "epoch": 0.67, "grad_norm": 0.4330475330352783, "learning_rate": 0.0005293154013635584, "loss": 1.8522, "step": 20112 }, { "epoch": 0.67, "grad_norm": 0.4367847144603729, "learning_rate": 0.0005293086621127542, "loss": 1.8894, "step": 20113 }, { "epoch": 0.67, "grad_norm": 0.4201911687850952, "learning_rate": 0.000529301922583603, "loss": 1.8181, "step": 20114 }, { "epoch": 0.67, "grad_norm": 0.4264540672302246, "learning_rate": 0.000529295182776113, "loss": 1.7997, "step": 20115 }, { "epoch": 0.67, "grad_norm": 0.4459370970726013, "learning_rate": 0.0005292884426902926, "loss": 1.8695, "step": 20116 }, { "epoch": 0.67, "grad_norm": 0.4398198127746582, "learning_rate": 0.0005292817023261496, "loss": 1.9133, "step": 20117 }, { "epoch": 0.67, "grad_norm": 0.42050397396087646, "learning_rate": 0.0005292749616836926, "loss": 1.798, "step": 20118 }, { "epoch": 0.67, "grad_norm": 0.4236479699611664, "learning_rate": 0.0005292682207629295, "loss": 1.765, "step": 20119 }, { "epoch": 0.67, "grad_norm": 0.889875054359436, "learning_rate": 0.0005292614795638685, "loss": 1.7864, "step": 20120 }, { "epoch": 0.67, "grad_norm": 0.42472732067108154, "learning_rate": 0.000529254738086518, "loss": 1.8495, "step": 20121 }, { "epoch": 0.67, "grad_norm": 0.4231374263763428, "learning_rate": 0.0005292479963308859, "loss": 1.8815, "step": 20122 }, { "epoch": 0.67, "grad_norm": 0.4323965609073639, "learning_rate": 0.0005292412542969805, "loss": 1.8187, "step": 20123 }, { "epoch": 0.67, "grad_norm": 0.44272056221961975, "learning_rate": 0.00052923451198481, "loss": 1.8503, "step": 20124 }, { "epoch": 0.67, "grad_norm": 0.45304974913597107, "learning_rate": 0.0005292277693943827, "loss": 1.7948, "step": 20125 }, { "epoch": 0.67, "grad_norm": 0.4207344949245453, "learning_rate": 0.0005292210265257065, "loss": 1.7934, "step": 20126 }, { "epoch": 0.67, "grad_norm": 0.4344432055950165, "learning_rate": 0.0005292142833787899, "loss": 1.9113, "step": 20127 }, { "epoch": 0.67, "grad_norm": 0.4177221953868866, "learning_rate": 0.0005292075399536408, "loss": 1.8329, "step": 20128 }, { "epoch": 0.67, "grad_norm": 0.43193843960762024, "learning_rate": 0.0005292007962502675, "loss": 1.7863, "step": 20129 }, { "epoch": 0.67, "grad_norm": 0.42680826783180237, "learning_rate": 0.0005291940522686783, "loss": 1.8462, "step": 20130 }, { "epoch": 0.67, "grad_norm": 0.4472271800041199, "learning_rate": 0.0005291873080088813, "loss": 1.8606, "step": 20131 }, { "epoch": 0.67, "grad_norm": 0.4371057450771332, "learning_rate": 0.0005291805634708846, "loss": 1.8286, "step": 20132 }, { "epoch": 0.67, "grad_norm": 0.8565987944602966, "learning_rate": 0.0005291738186546965, "loss": 1.8239, "step": 20133 }, { "epoch": 0.67, "grad_norm": 0.41854584217071533, "learning_rate": 0.0005291670735603252, "loss": 1.767, "step": 20134 }, { "epoch": 0.67, "grad_norm": 0.427817702293396, "learning_rate": 0.0005291603281877788, "loss": 1.7832, "step": 20135 }, { "epoch": 0.67, "grad_norm": 0.43972042202949524, "learning_rate": 0.0005291535825370655, "loss": 1.8393, "step": 20136 }, { "epoch": 0.67, "grad_norm": 0.44550326466560364, "learning_rate": 0.0005291468366081936, "loss": 1.8424, "step": 20137 }, { "epoch": 0.67, "grad_norm": 0.44280484318733215, "learning_rate": 0.0005291400904011711, "loss": 1.8771, "step": 20138 }, { "epoch": 0.67, "grad_norm": 0.4375941753387451, "learning_rate": 0.0005291333439160063, "loss": 1.8873, "step": 20139 }, { "epoch": 0.67, "grad_norm": 0.42940667271614075, "learning_rate": 0.0005291265971527075, "loss": 1.9759, "step": 20140 }, { "epoch": 0.67, "grad_norm": 0.45110639929771423, "learning_rate": 0.0005291198501112827, "loss": 1.7659, "step": 20141 }, { "epoch": 0.67, "grad_norm": 0.4591543674468994, "learning_rate": 0.0005291131027917401, "loss": 1.883, "step": 20142 }, { "epoch": 0.67, "grad_norm": 0.4338165521621704, "learning_rate": 0.000529106355194088, "loss": 1.871, "step": 20143 }, { "epoch": 0.67, "grad_norm": 0.43365344405174255, "learning_rate": 0.0005290996073183346, "loss": 1.8591, "step": 20144 }, { "epoch": 0.67, "grad_norm": 0.44088152050971985, "learning_rate": 0.0005290928591644879, "loss": 1.7427, "step": 20145 }, { "epoch": 0.67, "grad_norm": 0.41278913617134094, "learning_rate": 0.0005290861107325564, "loss": 1.7437, "step": 20146 }, { "epoch": 0.67, "grad_norm": 0.4325486123561859, "learning_rate": 0.000529079362022548, "loss": 1.8029, "step": 20147 }, { "epoch": 0.67, "grad_norm": 0.44656267762184143, "learning_rate": 0.000529072613034471, "loss": 1.7725, "step": 20148 }, { "epoch": 0.67, "grad_norm": 0.4368663728237152, "learning_rate": 0.0005290658637683336, "loss": 1.9293, "step": 20149 }, { "epoch": 0.67, "grad_norm": 0.45144417881965637, "learning_rate": 0.000529059114224144, "loss": 1.856, "step": 20150 }, { "epoch": 0.67, "grad_norm": 0.43920832872390747, "learning_rate": 0.0005290523644019105, "loss": 1.8735, "step": 20151 }, { "epoch": 0.67, "grad_norm": 0.4186136722564697, "learning_rate": 0.0005290456143016409, "loss": 1.8208, "step": 20152 }, { "epoch": 0.67, "grad_norm": 0.432517409324646, "learning_rate": 0.000529038863923344, "loss": 1.8411, "step": 20153 }, { "epoch": 0.67, "grad_norm": 0.44002765417099, "learning_rate": 0.0005290321132670275, "loss": 1.8331, "step": 20154 }, { "epoch": 0.67, "grad_norm": 0.4542735517024994, "learning_rate": 0.0005290253623326998, "loss": 1.8657, "step": 20155 }, { "epoch": 0.67, "grad_norm": 0.42194563150405884, "learning_rate": 0.000529018611120369, "loss": 1.8645, "step": 20156 }, { "epoch": 0.67, "grad_norm": 0.4313092529773712, "learning_rate": 0.0005290118596300434, "loss": 1.8974, "step": 20157 }, { "epoch": 0.67, "grad_norm": 0.4148652255535126, "learning_rate": 0.0005290051078617311, "loss": 1.7958, "step": 20158 }, { "epoch": 0.67, "grad_norm": 0.4111582040786743, "learning_rate": 0.0005289983558154403, "loss": 1.801, "step": 20159 }, { "epoch": 0.67, "grad_norm": 0.4338917136192322, "learning_rate": 0.0005289916034911794, "loss": 1.8349, "step": 20160 }, { "epoch": 0.67, "grad_norm": 0.4367062449455261, "learning_rate": 0.0005289848508889563, "loss": 1.8783, "step": 20161 }, { "epoch": 0.67, "grad_norm": 0.41140034794807434, "learning_rate": 0.0005289780980087794, "loss": 1.7958, "step": 20162 }, { "epoch": 0.67, "grad_norm": 0.4193744659423828, "learning_rate": 0.0005289713448506569, "loss": 1.8542, "step": 20163 }, { "epoch": 0.67, "grad_norm": 0.4127149283885956, "learning_rate": 0.0005289645914145967, "loss": 1.7379, "step": 20164 }, { "epoch": 0.67, "grad_norm": 0.425417959690094, "learning_rate": 0.0005289578377006075, "loss": 1.8413, "step": 20165 }, { "epoch": 0.67, "grad_norm": 0.4438565969467163, "learning_rate": 0.0005289510837086969, "loss": 1.8391, "step": 20166 }, { "epoch": 0.67, "grad_norm": 0.4274519383907318, "learning_rate": 0.0005289443294388737, "loss": 1.8366, "step": 20167 }, { "epoch": 0.67, "grad_norm": 0.43694552779197693, "learning_rate": 0.0005289375748911457, "loss": 1.8763, "step": 20168 }, { "epoch": 0.67, "grad_norm": 0.4445979595184326, "learning_rate": 0.0005289308200655212, "loss": 1.8228, "step": 20169 }, { "epoch": 0.67, "grad_norm": 0.43696221709251404, "learning_rate": 0.0005289240649620084, "loss": 1.8223, "step": 20170 }, { "epoch": 0.67, "grad_norm": 0.4398118555545807, "learning_rate": 0.0005289173095806155, "loss": 1.9137, "step": 20171 }, { "epoch": 0.67, "grad_norm": 0.4314940571784973, "learning_rate": 0.0005289105539213508, "loss": 1.7999, "step": 20172 }, { "epoch": 0.67, "grad_norm": 0.42276138067245483, "learning_rate": 0.0005289037979842225, "loss": 1.7775, "step": 20173 }, { "epoch": 0.67, "grad_norm": 0.4227626919746399, "learning_rate": 0.0005288970417692385, "loss": 1.7178, "step": 20174 }, { "epoch": 0.67, "grad_norm": 0.4344136416912079, "learning_rate": 0.0005288902852764074, "loss": 1.8461, "step": 20175 }, { "epoch": 0.67, "grad_norm": 0.42762288451194763, "learning_rate": 0.0005288835285057371, "loss": 1.8333, "step": 20176 }, { "epoch": 0.67, "grad_norm": 0.43539291620254517, "learning_rate": 0.0005288767714572359, "loss": 1.8269, "step": 20177 }, { "epoch": 0.67, "grad_norm": 0.436440110206604, "learning_rate": 0.0005288700141309121, "loss": 1.8928, "step": 20178 }, { "epoch": 0.67, "grad_norm": 0.4370252192020416, "learning_rate": 0.0005288632565267739, "loss": 1.8764, "step": 20179 }, { "epoch": 0.67, "grad_norm": 0.44284555315971375, "learning_rate": 0.0005288564986448293, "loss": 1.7857, "step": 20180 }, { "epoch": 0.67, "grad_norm": 0.43190687894821167, "learning_rate": 0.0005288497404850867, "loss": 1.8188, "step": 20181 }, { "epoch": 0.67, "grad_norm": 0.4283178150653839, "learning_rate": 0.0005288429820475541, "loss": 1.8382, "step": 20182 }, { "epoch": 0.67, "grad_norm": 0.4300995469093323, "learning_rate": 0.00052883622333224, "loss": 1.9014, "step": 20183 }, { "epoch": 0.67, "grad_norm": 0.447912335395813, "learning_rate": 0.0005288294643391523, "loss": 1.8595, "step": 20184 }, { "epoch": 0.67, "grad_norm": 0.4553677439689636, "learning_rate": 0.0005288227050682995, "loss": 1.8483, "step": 20185 }, { "epoch": 0.67, "grad_norm": 0.4298684895038605, "learning_rate": 0.0005288159455196895, "loss": 1.9055, "step": 20186 }, { "epoch": 0.67, "grad_norm": 0.455623596906662, "learning_rate": 0.0005288091856933308, "loss": 1.8601, "step": 20187 }, { "epoch": 0.67, "grad_norm": 0.4083084166049957, "learning_rate": 0.0005288024255892314, "loss": 1.8915, "step": 20188 }, { "epoch": 0.67, "grad_norm": 0.45305827260017395, "learning_rate": 0.0005287956652073996, "loss": 1.8223, "step": 20189 }, { "epoch": 0.67, "grad_norm": 0.4323377311229706, "learning_rate": 0.0005287889045478435, "loss": 1.8702, "step": 20190 }, { "epoch": 0.67, "grad_norm": 0.42555850744247437, "learning_rate": 0.0005287821436105715, "loss": 1.8001, "step": 20191 }, { "epoch": 0.67, "grad_norm": 0.44225358963012695, "learning_rate": 0.0005287753823955915, "loss": 1.8166, "step": 20192 }, { "epoch": 0.67, "grad_norm": 0.43894797563552856, "learning_rate": 0.000528768620902912, "loss": 1.7914, "step": 20193 }, { "epoch": 0.67, "grad_norm": 0.4336179494857788, "learning_rate": 0.0005287618591325411, "loss": 1.8795, "step": 20194 }, { "epoch": 0.67, "grad_norm": 0.4338674247264862, "learning_rate": 0.000528755097084487, "loss": 1.8041, "step": 20195 }, { "epoch": 0.67, "grad_norm": 0.4344214200973511, "learning_rate": 0.000528748334758758, "loss": 1.9323, "step": 20196 }, { "epoch": 0.67, "grad_norm": 0.4503941535949707, "learning_rate": 0.0005287415721553621, "loss": 1.8305, "step": 20197 }, { "epoch": 0.67, "grad_norm": 0.4366878867149353, "learning_rate": 0.0005287348092743077, "loss": 1.786, "step": 20198 }, { "epoch": 0.67, "grad_norm": 0.43148449063301086, "learning_rate": 0.000528728046115603, "loss": 1.8195, "step": 20199 }, { "epoch": 0.67, "grad_norm": 0.4205608069896698, "learning_rate": 0.0005287212826792561, "loss": 1.8576, "step": 20200 }, { "epoch": 0.67, "grad_norm": 0.4464477598667145, "learning_rate": 0.0005287145189652752, "loss": 1.9444, "step": 20201 }, { "epoch": 0.67, "grad_norm": 0.4591667056083679, "learning_rate": 0.0005287077549736687, "loss": 1.7884, "step": 20202 }, { "epoch": 0.67, "grad_norm": 0.42220163345336914, "learning_rate": 0.0005287009907044446, "loss": 1.8318, "step": 20203 }, { "epoch": 0.67, "grad_norm": 0.4272004961967468, "learning_rate": 0.0005286942261576112, "loss": 1.8881, "step": 20204 }, { "epoch": 0.67, "grad_norm": 0.4410209655761719, "learning_rate": 0.0005286874613331768, "loss": 1.8428, "step": 20205 }, { "epoch": 0.67, "grad_norm": 0.42317700386047363, "learning_rate": 0.0005286806962311494, "loss": 1.8698, "step": 20206 }, { "epoch": 0.67, "grad_norm": 0.43226489424705505, "learning_rate": 0.0005286739308515372, "loss": 1.7985, "step": 20207 }, { "epoch": 0.67, "grad_norm": 0.4317435920238495, "learning_rate": 0.0005286671651943487, "loss": 1.7743, "step": 20208 }, { "epoch": 0.67, "grad_norm": 0.46606630086898804, "learning_rate": 0.000528660399259592, "loss": 1.9162, "step": 20209 }, { "epoch": 0.67, "grad_norm": 0.4271900951862335, "learning_rate": 0.0005286536330472752, "loss": 1.9165, "step": 20210 }, { "epoch": 0.67, "grad_norm": 0.4409591257572174, "learning_rate": 0.0005286468665574066, "loss": 1.7952, "step": 20211 }, { "epoch": 0.67, "grad_norm": 0.42725569009780884, "learning_rate": 0.0005286400997899944, "loss": 1.8106, "step": 20212 }, { "epoch": 0.67, "grad_norm": 0.4200964570045471, "learning_rate": 0.0005286333327450469, "loss": 1.7973, "step": 20213 }, { "epoch": 0.67, "grad_norm": 0.4452890455722809, "learning_rate": 0.0005286265654225721, "loss": 1.8553, "step": 20214 }, { "epoch": 0.67, "grad_norm": 0.4172634184360504, "learning_rate": 0.0005286197978225783, "loss": 1.8377, "step": 20215 }, { "epoch": 0.67, "grad_norm": 0.42013582587242126, "learning_rate": 0.0005286130299450738, "loss": 1.8049, "step": 20216 }, { "epoch": 0.67, "grad_norm": 0.4253743290901184, "learning_rate": 0.0005286062617900668, "loss": 1.8285, "step": 20217 }, { "epoch": 0.67, "grad_norm": 0.44476836919784546, "learning_rate": 0.0005285994933575655, "loss": 1.89, "step": 20218 }, { "epoch": 0.67, "grad_norm": 0.43420785665512085, "learning_rate": 0.0005285927246475781, "loss": 1.8199, "step": 20219 }, { "epoch": 0.67, "grad_norm": 0.4291638135910034, "learning_rate": 0.0005285859556601127, "loss": 1.8295, "step": 20220 }, { "epoch": 0.67, "grad_norm": 0.43738171458244324, "learning_rate": 0.0005285791863951777, "loss": 1.8548, "step": 20221 }, { "epoch": 0.67, "grad_norm": 0.43232616782188416, "learning_rate": 0.0005285724168527813, "loss": 1.7946, "step": 20222 }, { "epoch": 0.67, "grad_norm": 0.43164199590682983, "learning_rate": 0.0005285656470329315, "loss": 1.8578, "step": 20223 }, { "epoch": 0.67, "grad_norm": 0.4266836941242218, "learning_rate": 0.0005285588769356369, "loss": 1.8157, "step": 20224 }, { "epoch": 0.67, "grad_norm": 0.4495663642883301, "learning_rate": 0.0005285521065609053, "loss": 1.8804, "step": 20225 }, { "epoch": 0.67, "grad_norm": 0.4270748794078827, "learning_rate": 0.0005285453359087452, "loss": 1.7491, "step": 20226 }, { "epoch": 0.67, "grad_norm": 0.4149811863899231, "learning_rate": 0.0005285385649791649, "loss": 1.8683, "step": 20227 }, { "epoch": 0.67, "grad_norm": 0.4272349178791046, "learning_rate": 0.0005285317937721722, "loss": 1.8193, "step": 20228 }, { "epoch": 0.67, "grad_norm": 0.4264763593673706, "learning_rate": 0.0005285250222877756, "loss": 1.85, "step": 20229 }, { "epoch": 0.67, "grad_norm": 0.4391981363296509, "learning_rate": 0.0005285182505259835, "loss": 1.8166, "step": 20230 }, { "epoch": 0.67, "grad_norm": 0.4433019459247589, "learning_rate": 0.0005285114784868037, "loss": 1.8413, "step": 20231 }, { "epoch": 0.67, "grad_norm": 0.4224751591682434, "learning_rate": 0.0005285047061702448, "loss": 1.8747, "step": 20232 }, { "epoch": 0.67, "grad_norm": 0.44612517952919006, "learning_rate": 0.0005284979335763147, "loss": 1.8028, "step": 20233 }, { "epoch": 0.67, "grad_norm": 0.43920719623565674, "learning_rate": 0.0005284911607050218, "loss": 1.8482, "step": 20234 }, { "epoch": 0.67, "grad_norm": 0.43377166986465454, "learning_rate": 0.0005284843875563744, "loss": 1.7712, "step": 20235 }, { "epoch": 0.67, "grad_norm": 0.4351351857185364, "learning_rate": 0.0005284776141303806, "loss": 1.8251, "step": 20236 }, { "epoch": 0.67, "grad_norm": 0.4419845938682556, "learning_rate": 0.0005284708404270487, "loss": 1.9043, "step": 20237 }, { "epoch": 0.67, "grad_norm": 0.42851343750953674, "learning_rate": 0.0005284640664463868, "loss": 1.9286, "step": 20238 }, { "epoch": 0.67, "grad_norm": 0.4480953514575958, "learning_rate": 0.000528457292188403, "loss": 1.7643, "step": 20239 }, { "epoch": 0.67, "grad_norm": 0.4273625910282135, "learning_rate": 0.0005284505176531059, "loss": 1.778, "step": 20240 }, { "epoch": 0.67, "grad_norm": 0.4508304297924042, "learning_rate": 0.0005284437428405035, "loss": 1.8123, "step": 20241 }, { "epoch": 0.67, "grad_norm": 0.42663365602493286, "learning_rate": 0.0005284369677506041, "loss": 1.7812, "step": 20242 }, { "epoch": 0.67, "grad_norm": 0.43466395139694214, "learning_rate": 0.0005284301923834158, "loss": 1.8896, "step": 20243 }, { "epoch": 0.67, "grad_norm": 0.43883442878723145, "learning_rate": 0.0005284234167389469, "loss": 1.8238, "step": 20244 }, { "epoch": 0.67, "grad_norm": 0.4356479048728943, "learning_rate": 0.0005284166408172058, "loss": 1.8605, "step": 20245 }, { "epoch": 0.67, "grad_norm": 0.47693929076194763, "learning_rate": 0.0005284098646182004, "loss": 1.8801, "step": 20246 }, { "epoch": 0.67, "grad_norm": 0.42340558767318726, "learning_rate": 0.000528403088141939, "loss": 1.7817, "step": 20247 }, { "epoch": 0.67, "grad_norm": 0.4298819303512573, "learning_rate": 0.0005283963113884301, "loss": 1.7751, "step": 20248 }, { "epoch": 0.67, "grad_norm": 0.4199047088623047, "learning_rate": 0.0005283895343576816, "loss": 1.8703, "step": 20249 }, { "epoch": 0.67, "grad_norm": 0.4329882562160492, "learning_rate": 0.0005283827570497019, "loss": 1.802, "step": 20250 }, { "epoch": 0.67, "grad_norm": 0.4389045834541321, "learning_rate": 0.0005283759794644992, "loss": 1.8105, "step": 20251 }, { "epoch": 0.67, "grad_norm": 0.43854403495788574, "learning_rate": 0.0005283692016020817, "loss": 1.8974, "step": 20252 }, { "epoch": 0.67, "grad_norm": 0.4410320222377777, "learning_rate": 0.0005283624234624576, "loss": 1.8346, "step": 20253 }, { "epoch": 0.67, "grad_norm": 0.4212051331996918, "learning_rate": 0.0005283556450456353, "loss": 1.895, "step": 20254 }, { "epoch": 0.67, "grad_norm": 0.43108829855918884, "learning_rate": 0.0005283488663516227, "loss": 1.8203, "step": 20255 }, { "epoch": 0.67, "grad_norm": 0.42228513956069946, "learning_rate": 0.0005283420873804283, "loss": 1.8157, "step": 20256 }, { "epoch": 0.67, "grad_norm": 0.4213704764842987, "learning_rate": 0.0005283353081320602, "loss": 1.8146, "step": 20257 }, { "epoch": 0.67, "grad_norm": 0.4236072599887848, "learning_rate": 0.0005283285286065268, "loss": 1.7861, "step": 20258 }, { "epoch": 0.67, "grad_norm": 0.4255898594856262, "learning_rate": 0.0005283217488038361, "loss": 1.8734, "step": 20259 }, { "epoch": 0.67, "grad_norm": 0.4228956401348114, "learning_rate": 0.0005283149687239965, "loss": 1.8523, "step": 20260 }, { "epoch": 0.67, "grad_norm": 0.42469409108161926, "learning_rate": 0.0005283081883670161, "loss": 1.8347, "step": 20261 }, { "epoch": 0.67, "grad_norm": 0.42180126905441284, "learning_rate": 0.0005283014077329033, "loss": 1.7812, "step": 20262 }, { "epoch": 0.67, "grad_norm": 0.4324059784412384, "learning_rate": 0.0005282946268216663, "loss": 1.8325, "step": 20263 }, { "epoch": 0.67, "grad_norm": 0.42322343587875366, "learning_rate": 0.000528287845633313, "loss": 1.8862, "step": 20264 }, { "epoch": 0.67, "grad_norm": 0.43319040536880493, "learning_rate": 0.0005282810641678521, "loss": 1.848, "step": 20265 }, { "epoch": 0.67, "grad_norm": 0.4485810101032257, "learning_rate": 0.0005282742824252916, "loss": 1.7907, "step": 20266 }, { "epoch": 0.67, "grad_norm": 0.4299144148826599, "learning_rate": 0.0005282675004056397, "loss": 1.7991, "step": 20267 }, { "epoch": 0.67, "grad_norm": 0.43630120158195496, "learning_rate": 0.0005282607181089047, "loss": 1.8026, "step": 20268 }, { "epoch": 0.67, "grad_norm": 0.43144240975379944, "learning_rate": 0.0005282539355350948, "loss": 1.9085, "step": 20269 }, { "epoch": 0.67, "grad_norm": 0.4370034337043762, "learning_rate": 0.0005282471526842183, "loss": 1.882, "step": 20270 }, { "epoch": 0.67, "grad_norm": 0.43051522970199585, "learning_rate": 0.0005282403695562833, "loss": 1.8215, "step": 20271 }, { "epoch": 0.67, "grad_norm": 0.4147031903266907, "learning_rate": 0.0005282335861512982, "loss": 1.7921, "step": 20272 }, { "epoch": 0.67, "grad_norm": 0.45046287775039673, "learning_rate": 0.0005282268024692712, "loss": 1.8943, "step": 20273 }, { "epoch": 0.67, "grad_norm": 0.4314925968647003, "learning_rate": 0.0005282200185102104, "loss": 1.7792, "step": 20274 }, { "epoch": 0.67, "grad_norm": 0.4303025007247925, "learning_rate": 0.0005282132342741242, "loss": 1.802, "step": 20275 }, { "epoch": 0.67, "grad_norm": 0.43419694900512695, "learning_rate": 0.0005282064497610207, "loss": 1.8357, "step": 20276 }, { "epoch": 0.67, "grad_norm": 0.43043676018714905, "learning_rate": 0.0005281996649709082, "loss": 1.7973, "step": 20277 }, { "epoch": 0.67, "grad_norm": 0.4258967936038971, "learning_rate": 0.0005281928799037949, "loss": 1.8852, "step": 20278 }, { "epoch": 0.67, "grad_norm": 0.4223340153694153, "learning_rate": 0.0005281860945596891, "loss": 1.8203, "step": 20279 }, { "epoch": 0.67, "grad_norm": 0.4155799448490143, "learning_rate": 0.000528179308938599, "loss": 1.7673, "step": 20280 }, { "epoch": 0.67, "grad_norm": 0.4318402409553528, "learning_rate": 0.0005281725230405329, "loss": 1.887, "step": 20281 }, { "epoch": 0.67, "grad_norm": 0.43028971552848816, "learning_rate": 0.0005281657368654989, "loss": 1.7842, "step": 20282 }, { "epoch": 0.67, "grad_norm": 0.4665820002555847, "learning_rate": 0.0005281589504135053, "loss": 1.7789, "step": 20283 }, { "epoch": 0.67, "grad_norm": 0.4328245222568512, "learning_rate": 0.0005281521636845604, "loss": 1.887, "step": 20284 }, { "epoch": 0.67, "grad_norm": 0.5293883085250854, "learning_rate": 0.0005281453766786725, "loss": 1.7808, "step": 20285 }, { "epoch": 0.67, "grad_norm": 0.43313518166542053, "learning_rate": 0.0005281385893958496, "loss": 1.8292, "step": 20286 }, { "epoch": 0.67, "grad_norm": 0.45224931836128235, "learning_rate": 0.0005281318018361, "loss": 1.8687, "step": 20287 }, { "epoch": 0.67, "grad_norm": 0.4222164452075958, "learning_rate": 0.0005281250139994321, "loss": 1.854, "step": 20288 }, { "epoch": 0.68, "grad_norm": 0.42499396204948425, "learning_rate": 0.000528118225885854, "loss": 1.8579, "step": 20289 }, { "epoch": 0.68, "grad_norm": 0.45575976371765137, "learning_rate": 0.000528111437495374, "loss": 1.8907, "step": 20290 }, { "epoch": 0.68, "grad_norm": 0.43921685218811035, "learning_rate": 0.0005281046488280004, "loss": 1.8562, "step": 20291 }, { "epoch": 0.68, "grad_norm": 0.4372541010379791, "learning_rate": 0.0005280978598837413, "loss": 1.8597, "step": 20292 }, { "epoch": 0.68, "grad_norm": 0.4188487231731415, "learning_rate": 0.000528091070662605, "loss": 1.8897, "step": 20293 }, { "epoch": 0.68, "grad_norm": 0.4385310709476471, "learning_rate": 0.0005280842811645998, "loss": 1.8643, "step": 20294 }, { "epoch": 0.68, "grad_norm": 0.43928608298301697, "learning_rate": 0.0005280774913897339, "loss": 1.8687, "step": 20295 }, { "epoch": 0.68, "grad_norm": 1.3363044261932373, "learning_rate": 0.0005280707013380154, "loss": 1.762, "step": 20296 }, { "epoch": 0.68, "grad_norm": 0.44897693395614624, "learning_rate": 0.0005280639110094527, "loss": 1.8421, "step": 20297 }, { "epoch": 0.68, "grad_norm": 0.4397863745689392, "learning_rate": 0.0005280571204040541, "loss": 1.8549, "step": 20298 }, { "epoch": 0.68, "grad_norm": 0.42747703194618225, "learning_rate": 0.0005280503295218278, "loss": 1.9299, "step": 20299 }, { "epoch": 0.68, "grad_norm": 0.4498867094516754, "learning_rate": 0.0005280435383627818, "loss": 1.8526, "step": 20300 }, { "epoch": 0.68, "grad_norm": 0.4235362410545349, "learning_rate": 0.0005280367469269247, "loss": 1.8356, "step": 20301 }, { "epoch": 0.68, "grad_norm": 0.41951143741607666, "learning_rate": 0.0005280299552142645, "loss": 1.8829, "step": 20302 }, { "epoch": 0.68, "grad_norm": 0.44739413261413574, "learning_rate": 0.0005280231632248097, "loss": 1.8093, "step": 20303 }, { "epoch": 0.68, "grad_norm": 0.42692312598228455, "learning_rate": 0.0005280163709585683, "loss": 1.7453, "step": 20304 }, { "epoch": 0.68, "grad_norm": 0.42278701066970825, "learning_rate": 0.0005280095784155486, "loss": 1.8114, "step": 20305 }, { "epoch": 0.68, "grad_norm": 0.4419078230857849, "learning_rate": 0.0005280027855957588, "loss": 1.8499, "step": 20306 }, { "epoch": 0.68, "grad_norm": 0.42743274569511414, "learning_rate": 0.0005279959924992073, "loss": 1.8299, "step": 20307 }, { "epoch": 0.68, "grad_norm": 0.4414307773113251, "learning_rate": 0.0005279891991259023, "loss": 1.8766, "step": 20308 }, { "epoch": 0.68, "grad_norm": 0.4374157190322876, "learning_rate": 0.0005279824054758519, "loss": 1.7918, "step": 20309 }, { "epoch": 0.68, "grad_norm": 0.4451872408390045, "learning_rate": 0.0005279756115490644, "loss": 1.813, "step": 20310 }, { "epoch": 0.68, "grad_norm": 0.42524847388267517, "learning_rate": 0.0005279688173455483, "loss": 1.8137, "step": 20311 }, { "epoch": 0.68, "grad_norm": 0.4471561908721924, "learning_rate": 0.0005279620228653115, "loss": 1.8338, "step": 20312 }, { "epoch": 0.68, "grad_norm": 0.44536685943603516, "learning_rate": 0.0005279552281083625, "loss": 1.8911, "step": 20313 }, { "epoch": 0.68, "grad_norm": 0.4522720277309418, "learning_rate": 0.0005279484330747093, "loss": 1.8817, "step": 20314 }, { "epoch": 0.68, "grad_norm": 0.4480644464492798, "learning_rate": 0.0005279416377643604, "loss": 1.8481, "step": 20315 }, { "epoch": 0.68, "grad_norm": 0.42720353603363037, "learning_rate": 0.0005279348421773238, "loss": 1.849, "step": 20316 }, { "epoch": 0.68, "grad_norm": 0.4315430223941803, "learning_rate": 0.000527928046313608, "loss": 1.8398, "step": 20317 }, { "epoch": 0.68, "grad_norm": 0.4263695776462555, "learning_rate": 0.0005279212501732211, "loss": 1.8874, "step": 20318 }, { "epoch": 0.68, "grad_norm": 0.7406672239303589, "learning_rate": 0.0005279144537561714, "loss": 1.9036, "step": 20319 }, { "epoch": 0.68, "grad_norm": 0.42313459515571594, "learning_rate": 0.0005279076570624671, "loss": 1.891, "step": 20320 }, { "epoch": 0.68, "grad_norm": 0.44088876247406006, "learning_rate": 0.0005279008600921167, "loss": 1.8069, "step": 20321 }, { "epoch": 0.68, "grad_norm": 0.44097229838371277, "learning_rate": 0.0005278940628451279, "loss": 1.8886, "step": 20322 }, { "epoch": 0.68, "grad_norm": 0.4309934675693512, "learning_rate": 0.0005278872653215096, "loss": 1.7993, "step": 20323 }, { "epoch": 0.68, "grad_norm": 0.4313264787197113, "learning_rate": 0.0005278804675212694, "loss": 1.7568, "step": 20324 }, { "epoch": 0.68, "grad_norm": 0.4317576587200165, "learning_rate": 0.0005278736694444161, "loss": 1.7961, "step": 20325 }, { "epoch": 0.68, "grad_norm": 0.583773136138916, "learning_rate": 0.0005278668710909577, "loss": 1.8479, "step": 20326 }, { "epoch": 0.68, "grad_norm": 0.43089520931243896, "learning_rate": 0.0005278600724609025, "loss": 1.9808, "step": 20327 }, { "epoch": 0.68, "grad_norm": 0.43019452691078186, "learning_rate": 0.0005278532735542588, "loss": 1.8186, "step": 20328 }, { "epoch": 0.68, "grad_norm": 0.4279603660106659, "learning_rate": 0.0005278464743710348, "loss": 1.8322, "step": 20329 }, { "epoch": 0.68, "grad_norm": 0.4330015182495117, "learning_rate": 0.0005278396749112387, "loss": 1.8724, "step": 20330 }, { "epoch": 0.68, "grad_norm": 0.4334743618965149, "learning_rate": 0.0005278328751748787, "loss": 1.8331, "step": 20331 }, { "epoch": 0.68, "grad_norm": 0.4324412941932678, "learning_rate": 0.0005278260751619633, "loss": 1.9399, "step": 20332 }, { "epoch": 0.68, "grad_norm": 0.4308492839336395, "learning_rate": 0.0005278192748725006, "loss": 1.8479, "step": 20333 }, { "epoch": 0.68, "grad_norm": 0.4305492639541626, "learning_rate": 0.0005278124743064988, "loss": 1.8041, "step": 20334 }, { "epoch": 0.68, "grad_norm": 0.4404592514038086, "learning_rate": 0.0005278056734639662, "loss": 1.8276, "step": 20335 }, { "epoch": 0.68, "grad_norm": 0.4369315803050995, "learning_rate": 0.0005277988723449113, "loss": 1.8143, "step": 20336 }, { "epoch": 0.68, "grad_norm": 0.43130937218666077, "learning_rate": 0.0005277920709493419, "loss": 1.8547, "step": 20337 }, { "epoch": 0.68, "grad_norm": 0.43281298875808716, "learning_rate": 0.0005277852692772665, "loss": 1.8679, "step": 20338 }, { "epoch": 0.68, "grad_norm": 0.42437297105789185, "learning_rate": 0.0005277784673286936, "loss": 1.8196, "step": 20339 }, { "epoch": 0.68, "grad_norm": 0.4320785105228424, "learning_rate": 0.0005277716651036309, "loss": 1.7678, "step": 20340 }, { "epoch": 0.68, "grad_norm": 0.43026819825172424, "learning_rate": 0.0005277648626020871, "loss": 1.8976, "step": 20341 }, { "epoch": 0.68, "grad_norm": 0.4342110753059387, "learning_rate": 0.0005277580598240703, "loss": 1.9097, "step": 20342 }, { "epoch": 0.68, "grad_norm": 0.426174521446228, "learning_rate": 0.0005277512567695888, "loss": 1.7814, "step": 20343 }, { "epoch": 0.68, "grad_norm": 0.42209604382514954, "learning_rate": 0.0005277444534386507, "loss": 1.7652, "step": 20344 }, { "epoch": 0.68, "grad_norm": 0.4117288291454315, "learning_rate": 0.0005277376498312646, "loss": 1.777, "step": 20345 }, { "epoch": 0.68, "grad_norm": 0.4186009168624878, "learning_rate": 0.0005277308459474383, "loss": 1.7708, "step": 20346 }, { "epoch": 0.68, "grad_norm": 0.4330538213253021, "learning_rate": 0.0005277240417871805, "loss": 1.8897, "step": 20347 }, { "epoch": 0.68, "grad_norm": 0.4537063539028168, "learning_rate": 0.0005277172373504993, "loss": 1.8144, "step": 20348 }, { "epoch": 0.68, "grad_norm": 0.43248245120048523, "learning_rate": 0.0005277104326374027, "loss": 1.8166, "step": 20349 }, { "epoch": 0.68, "grad_norm": 0.4377444088459015, "learning_rate": 0.0005277036276478994, "loss": 1.842, "step": 20350 }, { "epoch": 0.68, "grad_norm": 0.43313494324684143, "learning_rate": 0.0005276968223819974, "loss": 1.7743, "step": 20351 }, { "epoch": 0.68, "grad_norm": 0.4205879867076874, "learning_rate": 0.0005276900168397049, "loss": 1.8552, "step": 20352 }, { "epoch": 0.68, "grad_norm": 0.4390937387943268, "learning_rate": 0.0005276832110210304, "loss": 1.8437, "step": 20353 }, { "epoch": 0.68, "grad_norm": 0.42753955721855164, "learning_rate": 0.000527676404925982, "loss": 1.8209, "step": 20354 }, { "epoch": 0.68, "grad_norm": 0.4208548963069916, "learning_rate": 0.0005276695985545679, "loss": 1.8607, "step": 20355 }, { "epoch": 0.68, "grad_norm": 0.4155499339103699, "learning_rate": 0.0005276627919067966, "loss": 1.7926, "step": 20356 }, { "epoch": 0.68, "grad_norm": 0.41945067048072815, "learning_rate": 0.0005276559849826761, "loss": 1.8201, "step": 20357 }, { "epoch": 0.68, "grad_norm": 0.4376509487628937, "learning_rate": 0.0005276491777822148, "loss": 1.9027, "step": 20358 }, { "epoch": 0.68, "grad_norm": 0.4270709156990051, "learning_rate": 0.0005276423703054209, "loss": 1.8641, "step": 20359 }, { "epoch": 0.68, "grad_norm": 0.43205544352531433, "learning_rate": 0.0005276355625523027, "loss": 1.8239, "step": 20360 }, { "epoch": 0.68, "grad_norm": 0.4312463700771332, "learning_rate": 0.0005276287545228686, "loss": 1.8814, "step": 20361 }, { "epoch": 0.68, "grad_norm": 0.4254394769668579, "learning_rate": 0.0005276219462171266, "loss": 1.8059, "step": 20362 }, { "epoch": 0.68, "grad_norm": 0.41973453760147095, "learning_rate": 0.0005276151376350852, "loss": 1.8959, "step": 20363 }, { "epoch": 0.68, "grad_norm": 0.4379764199256897, "learning_rate": 0.0005276083287767525, "loss": 1.7362, "step": 20364 }, { "epoch": 0.68, "grad_norm": 0.41930946707725525, "learning_rate": 0.0005276015196421369, "loss": 1.7788, "step": 20365 }, { "epoch": 0.68, "grad_norm": 0.4295048117637634, "learning_rate": 0.0005275947102312465, "loss": 1.8916, "step": 20366 }, { "epoch": 0.68, "grad_norm": 0.43311798572540283, "learning_rate": 0.0005275879005440896, "loss": 1.8185, "step": 20367 }, { "epoch": 0.68, "grad_norm": 0.42324206233024597, "learning_rate": 0.0005275810905806747, "loss": 1.7882, "step": 20368 }, { "epoch": 0.68, "grad_norm": 0.4473230540752411, "learning_rate": 0.0005275742803410098, "loss": 1.8499, "step": 20369 }, { "epoch": 0.68, "grad_norm": 0.44501161575317383, "learning_rate": 0.0005275674698251032, "loss": 1.7802, "step": 20370 }, { "epoch": 0.68, "grad_norm": 0.4462064802646637, "learning_rate": 0.0005275606590329633, "loss": 1.8502, "step": 20371 }, { "epoch": 0.68, "grad_norm": 0.4229672849178314, "learning_rate": 0.0005275538479645983, "loss": 1.7909, "step": 20372 }, { "epoch": 0.68, "grad_norm": 0.4197074770927429, "learning_rate": 0.0005275470366200163, "loss": 1.8724, "step": 20373 }, { "epoch": 0.68, "grad_norm": 0.4403885006904602, "learning_rate": 0.0005275402249992259, "loss": 1.8268, "step": 20374 }, { "epoch": 0.68, "grad_norm": 0.4352027177810669, "learning_rate": 0.0005275334131022351, "loss": 1.807, "step": 20375 }, { "epoch": 0.68, "grad_norm": 0.44190940260887146, "learning_rate": 0.0005275266009290523, "loss": 1.8444, "step": 20376 }, { "epoch": 0.68, "grad_norm": 0.4211944043636322, "learning_rate": 0.0005275197884796858, "loss": 1.8579, "step": 20377 }, { "epoch": 0.68, "grad_norm": 0.4531008303165436, "learning_rate": 0.0005275129757541437, "loss": 1.8494, "step": 20378 }, { "epoch": 0.68, "grad_norm": 0.4267720878124237, "learning_rate": 0.0005275061627524344, "loss": 1.8401, "step": 20379 }, { "epoch": 0.68, "grad_norm": 0.42020630836486816, "learning_rate": 0.0005274993494745661, "loss": 1.8112, "step": 20380 }, { "epoch": 0.68, "grad_norm": 0.4288039803504944, "learning_rate": 0.0005274925359205472, "loss": 1.8546, "step": 20381 }, { "epoch": 0.68, "grad_norm": 0.42806488275527954, "learning_rate": 0.0005274857220903858, "loss": 1.842, "step": 20382 }, { "epoch": 0.68, "grad_norm": 0.414824515581131, "learning_rate": 0.0005274789079840903, "loss": 1.8358, "step": 20383 }, { "epoch": 0.68, "grad_norm": 0.4358390271663666, "learning_rate": 0.000527472093601669, "loss": 1.8459, "step": 20384 }, { "epoch": 0.68, "grad_norm": 0.43803706765174866, "learning_rate": 0.00052746527894313, "loss": 1.7496, "step": 20385 }, { "epoch": 0.68, "grad_norm": 0.43567371368408203, "learning_rate": 0.0005274584640084816, "loss": 1.917, "step": 20386 }, { "epoch": 0.68, "grad_norm": 0.4351249933242798, "learning_rate": 0.0005274516487977322, "loss": 1.8014, "step": 20387 }, { "epoch": 0.68, "grad_norm": 0.4304196536540985, "learning_rate": 0.00052744483331089, "loss": 1.8446, "step": 20388 }, { "epoch": 0.68, "grad_norm": 0.4365759789943695, "learning_rate": 0.0005274380175479633, "loss": 1.8326, "step": 20389 }, { "epoch": 0.68, "grad_norm": 0.41970935463905334, "learning_rate": 0.0005274312015089603, "loss": 1.7964, "step": 20390 }, { "epoch": 0.68, "grad_norm": 0.42633283138275146, "learning_rate": 0.0005274243851938895, "loss": 1.7642, "step": 20391 }, { "epoch": 0.68, "grad_norm": 0.4422265887260437, "learning_rate": 0.0005274175686027589, "loss": 1.8559, "step": 20392 }, { "epoch": 0.68, "grad_norm": 0.442388653755188, "learning_rate": 0.0005274107517355769, "loss": 1.9042, "step": 20393 }, { "epoch": 0.68, "grad_norm": 0.4978560507297516, "learning_rate": 0.0005274039345923517, "loss": 1.8174, "step": 20394 }, { "epoch": 0.68, "grad_norm": 0.4436311721801758, "learning_rate": 0.0005273971171730917, "loss": 1.8476, "step": 20395 }, { "epoch": 0.68, "grad_norm": 0.43093129992485046, "learning_rate": 0.0005273902994778049, "loss": 1.8322, "step": 20396 }, { "epoch": 0.68, "grad_norm": 0.43479955196380615, "learning_rate": 0.0005273834815065, "loss": 1.8822, "step": 20397 }, { "epoch": 0.68, "grad_norm": 0.4229435920715332, "learning_rate": 0.0005273766632591851, "loss": 1.8497, "step": 20398 }, { "epoch": 0.68, "grad_norm": 0.43668705224990845, "learning_rate": 0.0005273698447358684, "loss": 1.8883, "step": 20399 }, { "epoch": 0.68, "grad_norm": 0.4267177879810333, "learning_rate": 0.0005273630259365581, "loss": 1.7775, "step": 20400 }, { "epoch": 0.68, "grad_norm": 0.42692041397094727, "learning_rate": 0.0005273562068612626, "loss": 1.839, "step": 20401 }, { "epoch": 0.68, "grad_norm": 0.42885997891426086, "learning_rate": 0.0005273493875099902, "loss": 1.7426, "step": 20402 }, { "epoch": 0.68, "grad_norm": 0.41649356484413147, "learning_rate": 0.0005273425678827492, "loss": 1.8607, "step": 20403 }, { "epoch": 0.68, "grad_norm": 0.43118953704833984, "learning_rate": 0.0005273357479795477, "loss": 1.9077, "step": 20404 }, { "epoch": 0.68, "grad_norm": 0.4302089512348175, "learning_rate": 0.0005273289278003941, "loss": 1.8051, "step": 20405 }, { "epoch": 0.68, "grad_norm": 0.4249623715877533, "learning_rate": 0.0005273221073452968, "loss": 1.8577, "step": 20406 }, { "epoch": 0.68, "grad_norm": 0.4242691099643707, "learning_rate": 0.0005273152866142639, "loss": 1.8674, "step": 20407 }, { "epoch": 0.68, "grad_norm": 0.42119988799095154, "learning_rate": 0.0005273084656073038, "loss": 1.8699, "step": 20408 }, { "epoch": 0.68, "grad_norm": 0.41484615206718445, "learning_rate": 0.0005273016443244246, "loss": 1.8508, "step": 20409 }, { "epoch": 0.68, "grad_norm": 0.43391868472099304, "learning_rate": 0.0005272948227656348, "loss": 1.7522, "step": 20410 }, { "epoch": 0.68, "grad_norm": 0.42878562211990356, "learning_rate": 0.0005272880009309424, "loss": 1.8402, "step": 20411 }, { "epoch": 0.68, "grad_norm": 0.43090951442718506, "learning_rate": 0.000527281178820356, "loss": 1.8191, "step": 20412 }, { "epoch": 0.68, "grad_norm": 0.4266485869884491, "learning_rate": 0.0005272743564338837, "loss": 1.8458, "step": 20413 }, { "epoch": 0.68, "grad_norm": 0.4384427070617676, "learning_rate": 0.0005272675337715338, "loss": 1.7518, "step": 20414 }, { "epoch": 0.68, "grad_norm": 0.46173566579818726, "learning_rate": 0.0005272607108333147, "loss": 1.8706, "step": 20415 }, { "epoch": 0.68, "grad_norm": 0.41546276211738586, "learning_rate": 0.0005272538876192344, "loss": 1.8358, "step": 20416 }, { "epoch": 0.68, "grad_norm": 0.45620283484458923, "learning_rate": 0.0005272470641293016, "loss": 1.8642, "step": 20417 }, { "epoch": 0.68, "grad_norm": 0.6842698454856873, "learning_rate": 0.0005272402403635241, "loss": 1.8439, "step": 20418 }, { "epoch": 0.68, "grad_norm": 0.42437946796417236, "learning_rate": 0.0005272334163219106, "loss": 1.8385, "step": 20419 }, { "epoch": 0.68, "grad_norm": 0.4341936409473419, "learning_rate": 0.0005272265920044692, "loss": 1.7661, "step": 20420 }, { "epoch": 0.68, "grad_norm": 0.4369426965713501, "learning_rate": 0.0005272197674112081, "loss": 1.8599, "step": 20421 }, { "epoch": 0.68, "grad_norm": 0.42879801988601685, "learning_rate": 0.0005272129425421358, "loss": 1.8601, "step": 20422 }, { "epoch": 0.68, "grad_norm": 0.4551779329776764, "learning_rate": 0.0005272061173972604, "loss": 1.8613, "step": 20423 }, { "epoch": 0.68, "grad_norm": 0.4146762788295746, "learning_rate": 0.0005271992919765903, "loss": 1.8029, "step": 20424 }, { "epoch": 0.68, "grad_norm": 0.43403762578964233, "learning_rate": 0.0005271924662801336, "loss": 1.8104, "step": 20425 }, { "epoch": 0.68, "grad_norm": 0.4471675157546997, "learning_rate": 0.0005271856403078988, "loss": 1.8712, "step": 20426 }, { "epoch": 0.68, "grad_norm": 0.4349175989627838, "learning_rate": 0.0005271788140598941, "loss": 1.8409, "step": 20427 }, { "epoch": 0.68, "grad_norm": 0.4246508479118347, "learning_rate": 0.000527171987536128, "loss": 1.8041, "step": 20428 }, { "epoch": 0.68, "grad_norm": 0.4356014132499695, "learning_rate": 0.0005271651607366083, "loss": 1.8108, "step": 20429 }, { "epoch": 0.68, "grad_norm": 0.42504292726516724, "learning_rate": 0.0005271583336613437, "loss": 1.7823, "step": 20430 }, { "epoch": 0.68, "grad_norm": 0.419523686170578, "learning_rate": 0.0005271515063103423, "loss": 1.8136, "step": 20431 }, { "epoch": 0.68, "grad_norm": 0.4496827721595764, "learning_rate": 0.0005271446786836125, "loss": 1.7996, "step": 20432 }, { "epoch": 0.68, "grad_norm": 0.4237605929374695, "learning_rate": 0.0005271378507811624, "loss": 1.8444, "step": 20433 }, { "epoch": 0.68, "grad_norm": 0.42176660895347595, "learning_rate": 0.0005271310226030005, "loss": 1.844, "step": 20434 }, { "epoch": 0.68, "grad_norm": 0.4199618399143219, "learning_rate": 0.000527124194149135, "loss": 1.8432, "step": 20435 }, { "epoch": 0.68, "grad_norm": 0.42937466502189636, "learning_rate": 0.0005271173654195743, "loss": 1.833, "step": 20436 }, { "epoch": 0.68, "grad_norm": 0.4378226697444916, "learning_rate": 0.0005271105364143264, "loss": 1.8073, "step": 20437 }, { "epoch": 0.68, "grad_norm": 0.42360469698905945, "learning_rate": 0.0005271037071333999, "loss": 1.8196, "step": 20438 }, { "epoch": 0.68, "grad_norm": 0.42624950408935547, "learning_rate": 0.0005270968775768029, "loss": 1.7976, "step": 20439 }, { "epoch": 0.68, "grad_norm": 0.42907464504241943, "learning_rate": 0.0005270900477445438, "loss": 1.9626, "step": 20440 }, { "epoch": 0.68, "grad_norm": 0.42252662777900696, "learning_rate": 0.0005270832176366307, "loss": 1.825, "step": 20441 }, { "epoch": 0.68, "grad_norm": 0.4336209297180176, "learning_rate": 0.0005270763872530723, "loss": 1.8919, "step": 20442 }, { "epoch": 0.68, "grad_norm": 0.42989471554756165, "learning_rate": 0.0005270695565938764, "loss": 1.8731, "step": 20443 }, { "epoch": 0.68, "grad_norm": 0.4253782331943512, "learning_rate": 0.0005270627256590515, "loss": 1.7994, "step": 20444 }, { "epoch": 0.68, "grad_norm": 0.4160114824771881, "learning_rate": 0.0005270558944486061, "loss": 1.726, "step": 20445 }, { "epoch": 0.68, "grad_norm": 0.4435305893421173, "learning_rate": 0.0005270490629625482, "loss": 1.8902, "step": 20446 }, { "epoch": 0.68, "grad_norm": 0.4327394962310791, "learning_rate": 0.0005270422312008862, "loss": 1.8613, "step": 20447 }, { "epoch": 0.68, "grad_norm": 0.42638444900512695, "learning_rate": 0.0005270353991636283, "loss": 1.9175, "step": 20448 }, { "epoch": 0.68, "grad_norm": 0.43767499923706055, "learning_rate": 0.0005270285668507829, "loss": 1.8488, "step": 20449 }, { "epoch": 0.68, "grad_norm": 0.4213142395019531, "learning_rate": 0.0005270217342623584, "loss": 1.8142, "step": 20450 }, { "epoch": 0.68, "grad_norm": 0.44610416889190674, "learning_rate": 0.0005270149013983629, "loss": 1.7501, "step": 20451 }, { "epoch": 0.68, "grad_norm": 0.41589707136154175, "learning_rate": 0.0005270080682588047, "loss": 1.8226, "step": 20452 }, { "epoch": 0.68, "grad_norm": 0.4274088144302368, "learning_rate": 0.0005270012348436922, "loss": 1.8007, "step": 20453 }, { "epoch": 0.68, "grad_norm": 0.4274974763393402, "learning_rate": 0.0005269944011530336, "loss": 1.8713, "step": 20454 }, { "epoch": 0.68, "grad_norm": 0.42015162110328674, "learning_rate": 0.0005269875671868372, "loss": 1.8799, "step": 20455 }, { "epoch": 0.68, "grad_norm": 0.425801157951355, "learning_rate": 0.0005269807329451115, "loss": 1.9005, "step": 20456 }, { "epoch": 0.68, "grad_norm": 0.45086875557899475, "learning_rate": 0.0005269738984278646, "loss": 1.818, "step": 20457 }, { "epoch": 0.68, "grad_norm": 0.41479936242103577, "learning_rate": 0.0005269670636351047, "loss": 1.8548, "step": 20458 }, { "epoch": 0.68, "grad_norm": 0.42750197649002075, "learning_rate": 0.0005269602285668404, "loss": 1.791, "step": 20459 }, { "epoch": 0.68, "grad_norm": 0.43052008748054504, "learning_rate": 0.0005269533932230797, "loss": 1.799, "step": 20460 }, { "epoch": 0.68, "grad_norm": 0.4304203391075134, "learning_rate": 0.000526946557603831, "loss": 1.8495, "step": 20461 }, { "epoch": 0.68, "grad_norm": 0.43047019839286804, "learning_rate": 0.0005269397217091028, "loss": 1.8536, "step": 20462 }, { "epoch": 0.68, "grad_norm": 0.4380532503128052, "learning_rate": 0.000526932885538903, "loss": 1.8128, "step": 20463 }, { "epoch": 0.68, "grad_norm": 0.4272099733352661, "learning_rate": 0.0005269260490932402, "loss": 1.8553, "step": 20464 }, { "epoch": 0.68, "grad_norm": 0.4249746799468994, "learning_rate": 0.0005269192123721226, "loss": 1.7507, "step": 20465 }, { "epoch": 0.68, "grad_norm": 0.43733879923820496, "learning_rate": 0.0005269123753755586, "loss": 1.8659, "step": 20466 }, { "epoch": 0.68, "grad_norm": 0.42919060587882996, "learning_rate": 0.0005269055381035563, "loss": 1.8256, "step": 20467 }, { "epoch": 0.68, "grad_norm": 0.4125317931175232, "learning_rate": 0.0005268987005561242, "loss": 1.8448, "step": 20468 }, { "epoch": 0.68, "grad_norm": 0.42735230922698975, "learning_rate": 0.0005268918627332704, "loss": 1.8417, "step": 20469 }, { "epoch": 0.68, "grad_norm": 0.4240467846393585, "learning_rate": 0.0005268850246350034, "loss": 1.8077, "step": 20470 }, { "epoch": 0.68, "grad_norm": 0.42431390285491943, "learning_rate": 0.0005268781862613314, "loss": 1.8278, "step": 20471 }, { "epoch": 0.68, "grad_norm": 0.4249415695667267, "learning_rate": 0.0005268713476122627, "loss": 1.8742, "step": 20472 }, { "epoch": 0.68, "grad_norm": 0.43075305223464966, "learning_rate": 0.0005268645086878056, "loss": 1.762, "step": 20473 }, { "epoch": 0.68, "grad_norm": 0.4392284154891968, "learning_rate": 0.0005268576694879684, "loss": 1.8124, "step": 20474 }, { "epoch": 0.68, "grad_norm": 0.43718183040618896, "learning_rate": 0.0005268508300127594, "loss": 1.8636, "step": 20475 }, { "epoch": 0.68, "grad_norm": 0.4211423099040985, "learning_rate": 0.000526843990262187, "loss": 1.7424, "step": 20476 }, { "epoch": 0.68, "grad_norm": 0.4341590702533722, "learning_rate": 0.0005268371502362592, "loss": 1.8032, "step": 20477 }, { "epoch": 0.68, "grad_norm": 0.4434497356414795, "learning_rate": 0.0005268303099349847, "loss": 1.873, "step": 20478 }, { "epoch": 0.68, "grad_norm": 0.42289721965789795, "learning_rate": 0.0005268234693583717, "loss": 1.8579, "step": 20479 }, { "epoch": 0.68, "grad_norm": 0.44074341654777527, "learning_rate": 0.0005268166285064283, "loss": 1.8042, "step": 20480 }, { "epoch": 0.68, "grad_norm": 0.4372251629829407, "learning_rate": 0.000526809787379163, "loss": 1.8685, "step": 20481 }, { "epoch": 0.68, "grad_norm": 0.444282591342926, "learning_rate": 0.000526802945976584, "loss": 1.8422, "step": 20482 }, { "epoch": 0.68, "grad_norm": 0.42057496309280396, "learning_rate": 0.0005267961042986997, "loss": 1.8787, "step": 20483 }, { "epoch": 0.68, "grad_norm": 0.438313364982605, "learning_rate": 0.0005267892623455183, "loss": 1.8832, "step": 20484 }, { "epoch": 0.68, "grad_norm": 0.5913727283477783, "learning_rate": 0.0005267824201170481, "loss": 1.8744, "step": 20485 }, { "epoch": 0.68, "grad_norm": 0.42746832966804504, "learning_rate": 0.0005267755776132975, "loss": 1.8016, "step": 20486 }, { "epoch": 0.68, "grad_norm": 0.4315783977508545, "learning_rate": 0.0005267687348342748, "loss": 1.8831, "step": 20487 }, { "epoch": 0.68, "grad_norm": 0.4437658190727234, "learning_rate": 0.0005267618917799882, "loss": 1.9403, "step": 20488 }, { "epoch": 0.68, "grad_norm": 0.4420064687728882, "learning_rate": 0.0005267550484504461, "loss": 1.8795, "step": 20489 }, { "epoch": 0.68, "grad_norm": 0.4299720227718353, "learning_rate": 0.0005267482048456568, "loss": 1.752, "step": 20490 }, { "epoch": 0.68, "grad_norm": 0.42948052287101746, "learning_rate": 0.0005267413609656286, "loss": 1.8957, "step": 20491 }, { "epoch": 0.68, "grad_norm": 0.4387151896953583, "learning_rate": 0.0005267345168103698, "loss": 1.8273, "step": 20492 }, { "epoch": 0.68, "grad_norm": 0.43870052695274353, "learning_rate": 0.0005267276723798887, "loss": 1.8722, "step": 20493 }, { "epoch": 0.68, "grad_norm": 0.4336914122104645, "learning_rate": 0.0005267208276741937, "loss": 1.9062, "step": 20494 }, { "epoch": 0.68, "grad_norm": 0.41999560594558716, "learning_rate": 0.0005267139826932928, "loss": 1.7889, "step": 20495 }, { "epoch": 0.68, "grad_norm": 0.4442950487136841, "learning_rate": 0.0005267071374371946, "loss": 1.8617, "step": 20496 }, { "epoch": 0.68, "grad_norm": 0.4360891580581665, "learning_rate": 0.0005267002919059075, "loss": 1.9245, "step": 20497 }, { "epoch": 0.68, "grad_norm": 0.4319659173488617, "learning_rate": 0.0005266934460994395, "loss": 1.847, "step": 20498 }, { "epoch": 0.68, "grad_norm": 0.41788342595100403, "learning_rate": 0.0005266866000177991, "loss": 1.8631, "step": 20499 }, { "epoch": 0.68, "grad_norm": 0.44361430406570435, "learning_rate": 0.0005266797536609946, "loss": 1.8639, "step": 20500 }, { "epoch": 0.68, "grad_norm": 0.4430995285511017, "learning_rate": 0.0005266729070290342, "loss": 1.7526, "step": 20501 }, { "epoch": 0.68, "grad_norm": 0.4400624632835388, "learning_rate": 0.0005266660601219263, "loss": 1.854, "step": 20502 }, { "epoch": 0.68, "grad_norm": 0.4241693615913391, "learning_rate": 0.0005266592129396793, "loss": 1.793, "step": 20503 }, { "epoch": 0.68, "grad_norm": 0.5402628183364868, "learning_rate": 0.0005266523654823013, "loss": 1.8357, "step": 20504 }, { "epoch": 0.68, "grad_norm": 0.4270091652870178, "learning_rate": 0.0005266455177498007, "loss": 1.7854, "step": 20505 }, { "epoch": 0.68, "grad_norm": 0.43477189540863037, "learning_rate": 0.0005266386697421859, "loss": 1.8631, "step": 20506 }, { "epoch": 0.68, "grad_norm": 0.4326498210430145, "learning_rate": 0.0005266318214594652, "loss": 1.8595, "step": 20507 }, { "epoch": 0.68, "grad_norm": 0.43508216738700867, "learning_rate": 0.0005266249729016468, "loss": 1.8601, "step": 20508 }, { "epoch": 0.68, "grad_norm": 0.44390901923179626, "learning_rate": 0.0005266181240687391, "loss": 1.884, "step": 20509 }, { "epoch": 0.68, "grad_norm": 0.4749302566051483, "learning_rate": 0.0005266112749607504, "loss": 1.7928, "step": 20510 }, { "epoch": 0.68, "grad_norm": 0.4310998320579529, "learning_rate": 0.0005266044255776889, "loss": 1.8889, "step": 20511 }, { "epoch": 0.68, "grad_norm": 0.4302184581756592, "learning_rate": 0.0005265975759195631, "loss": 1.8157, "step": 20512 }, { "epoch": 0.68, "grad_norm": 0.42550966143608093, "learning_rate": 0.0005265907259863812, "loss": 1.7499, "step": 20513 }, { "epoch": 0.68, "grad_norm": 0.4492860436439514, "learning_rate": 0.0005265838757781517, "loss": 1.8523, "step": 20514 }, { "epoch": 0.68, "grad_norm": 0.42614516615867615, "learning_rate": 0.0005265770252948826, "loss": 1.8786, "step": 20515 }, { "epoch": 0.68, "grad_norm": 0.44091445207595825, "learning_rate": 0.0005265701745365824, "loss": 1.8626, "step": 20516 }, { "epoch": 0.68, "grad_norm": 0.43666887283325195, "learning_rate": 0.0005265633235032594, "loss": 1.8267, "step": 20517 }, { "epoch": 0.68, "grad_norm": 0.43895572423934937, "learning_rate": 0.0005265564721949218, "loss": 1.8521, "step": 20518 }, { "epoch": 0.68, "grad_norm": 0.4289247989654541, "learning_rate": 0.0005265496206115783, "loss": 1.8334, "step": 20519 }, { "epoch": 0.68, "grad_norm": 0.4280698299407959, "learning_rate": 0.0005265427687532367, "loss": 1.7807, "step": 20520 }, { "epoch": 0.68, "grad_norm": 0.43971961736679077, "learning_rate": 0.0005265359166199056, "loss": 1.8589, "step": 20521 }, { "epoch": 0.68, "grad_norm": 0.44468948245048523, "learning_rate": 0.0005265290642115934, "loss": 1.8236, "step": 20522 }, { "epoch": 0.68, "grad_norm": 0.4259647727012634, "learning_rate": 0.0005265222115283082, "loss": 1.8258, "step": 20523 }, { "epoch": 0.68, "grad_norm": 0.43383634090423584, "learning_rate": 0.0005265153585700584, "loss": 1.779, "step": 20524 }, { "epoch": 0.68, "grad_norm": 0.454573392868042, "learning_rate": 0.0005265085053368523, "loss": 1.817, "step": 20525 }, { "epoch": 0.68, "grad_norm": 0.4334258735179901, "learning_rate": 0.0005265016518286983, "loss": 1.8633, "step": 20526 }, { "epoch": 0.68, "grad_norm": 0.4383736848831177, "learning_rate": 0.0005264947980456046, "loss": 1.8519, "step": 20527 }, { "epoch": 0.68, "grad_norm": 0.46132707595825195, "learning_rate": 0.0005264879439875797, "loss": 1.7677, "step": 20528 }, { "epoch": 0.68, "grad_norm": 0.4914330840110779, "learning_rate": 0.0005264810896546317, "loss": 1.8525, "step": 20529 }, { "epoch": 0.68, "grad_norm": 0.4204322099685669, "learning_rate": 0.000526474235046769, "loss": 1.8495, "step": 20530 }, { "epoch": 0.68, "grad_norm": 0.4430014193058014, "learning_rate": 0.0005264673801640001, "loss": 1.8491, "step": 20531 }, { "epoch": 0.68, "grad_norm": 0.43647894263267517, "learning_rate": 0.0005264605250063331, "loss": 1.8865, "step": 20532 }, { "epoch": 0.68, "grad_norm": 0.44268015027046204, "learning_rate": 0.0005264536695737764, "loss": 1.8817, "step": 20533 }, { "epoch": 0.68, "grad_norm": 0.4240880012512207, "learning_rate": 0.0005264468138663383, "loss": 1.8723, "step": 20534 }, { "epoch": 0.68, "grad_norm": 0.42847222089767456, "learning_rate": 0.000526439957884027, "loss": 1.8431, "step": 20535 }, { "epoch": 0.68, "grad_norm": 0.42988792061805725, "learning_rate": 0.0005264331016268512, "loss": 1.8716, "step": 20536 }, { "epoch": 0.68, "grad_norm": 0.43624448776245117, "learning_rate": 0.0005264262450948189, "loss": 1.8775, "step": 20537 }, { "epoch": 0.68, "grad_norm": 0.43234241008758545, "learning_rate": 0.0005264193882879383, "loss": 1.8922, "step": 20538 }, { "epoch": 0.68, "grad_norm": 0.4436122477054596, "learning_rate": 0.0005264125312062182, "loss": 1.8859, "step": 20539 }, { "epoch": 0.68, "grad_norm": 0.42694908380508423, "learning_rate": 0.0005264056738496666, "loss": 1.9074, "step": 20540 }, { "epoch": 0.68, "grad_norm": 0.4211306571960449, "learning_rate": 0.0005263988162182918, "loss": 1.8566, "step": 20541 }, { "epoch": 0.68, "grad_norm": 0.42632097005844116, "learning_rate": 0.0005263919583121022, "loss": 1.8548, "step": 20542 }, { "epoch": 0.68, "grad_norm": 0.43242985010147095, "learning_rate": 0.0005263851001311061, "loss": 1.8595, "step": 20543 }, { "epoch": 0.68, "grad_norm": 0.41518762707710266, "learning_rate": 0.0005263782416753118, "loss": 1.7987, "step": 20544 }, { "epoch": 0.68, "grad_norm": 0.43775877356529236, "learning_rate": 0.0005263713829447278, "loss": 1.7824, "step": 20545 }, { "epoch": 0.68, "grad_norm": 0.43159979581832886, "learning_rate": 0.0005263645239393623, "loss": 1.8873, "step": 20546 }, { "epoch": 0.68, "grad_norm": 0.4467698037624359, "learning_rate": 0.0005263576646592235, "loss": 1.759, "step": 20547 }, { "epoch": 0.68, "grad_norm": 0.4253159761428833, "learning_rate": 0.0005263508051043199, "loss": 1.8481, "step": 20548 }, { "epoch": 0.68, "grad_norm": 0.44082191586494446, "learning_rate": 0.0005263439452746597, "loss": 1.8075, "step": 20549 }, { "epoch": 0.68, "grad_norm": 0.4210634231567383, "learning_rate": 0.0005263370851702515, "loss": 1.8611, "step": 20550 }, { "epoch": 0.68, "grad_norm": 0.587344229221344, "learning_rate": 0.0005263302247911032, "loss": 1.7998, "step": 20551 }, { "epoch": 0.68, "grad_norm": 0.439858615398407, "learning_rate": 0.0005263233641372235, "loss": 1.8419, "step": 20552 }, { "epoch": 0.68, "grad_norm": 0.43736469745635986, "learning_rate": 0.0005263165032086206, "loss": 1.832, "step": 20553 }, { "epoch": 0.68, "grad_norm": 0.43568864464759827, "learning_rate": 0.0005263096420053026, "loss": 1.7898, "step": 20554 }, { "epoch": 0.68, "grad_norm": 0.4398568272590637, "learning_rate": 0.0005263027805272783, "loss": 1.8382, "step": 20555 }, { "epoch": 0.68, "grad_norm": 0.4491257071495056, "learning_rate": 0.0005262959187745557, "loss": 1.8397, "step": 20556 }, { "epoch": 0.68, "grad_norm": 0.4368950128555298, "learning_rate": 0.0005262890567471431, "loss": 1.8797, "step": 20557 }, { "epoch": 0.68, "grad_norm": 0.43729233741760254, "learning_rate": 0.000526282194445049, "loss": 1.784, "step": 20558 }, { "epoch": 0.68, "grad_norm": 0.43642181158065796, "learning_rate": 0.0005262753318682818, "loss": 1.8406, "step": 20559 }, { "epoch": 0.68, "grad_norm": 0.42357337474823, "learning_rate": 0.0005262684690168493, "loss": 1.8024, "step": 20560 }, { "epoch": 0.68, "grad_norm": 0.4363960027694702, "learning_rate": 0.0005262616058907605, "loss": 1.883, "step": 20561 }, { "epoch": 0.68, "grad_norm": 0.4206409156322479, "learning_rate": 0.0005262547424900234, "loss": 1.8836, "step": 20562 }, { "epoch": 0.68, "grad_norm": 0.4177224934101105, "learning_rate": 0.0005262478788146464, "loss": 1.8983, "step": 20563 }, { "epoch": 0.68, "grad_norm": 0.45101428031921387, "learning_rate": 0.0005262410148646378, "loss": 1.826, "step": 20564 }, { "epoch": 0.68, "grad_norm": 0.43062135577201843, "learning_rate": 0.000526234150640006, "loss": 1.8303, "step": 20565 }, { "epoch": 0.68, "grad_norm": 0.44355303049087524, "learning_rate": 0.0005262272861407592, "loss": 1.858, "step": 20566 }, { "epoch": 0.68, "grad_norm": 0.4157021939754486, "learning_rate": 0.0005262204213669058, "loss": 1.8294, "step": 20567 }, { "epoch": 0.68, "grad_norm": 0.420736163854599, "learning_rate": 0.0005262135563184542, "loss": 1.7917, "step": 20568 }, { "epoch": 0.68, "grad_norm": 0.4230799376964569, "learning_rate": 0.0005262066909954126, "loss": 1.7578, "step": 20569 }, { "epoch": 0.68, "grad_norm": 0.44223159551620483, "learning_rate": 0.0005261998253977894, "loss": 1.8941, "step": 20570 }, { "epoch": 0.68, "grad_norm": 0.43939536809921265, "learning_rate": 0.000526192959525593, "loss": 1.8262, "step": 20571 }, { "epoch": 0.68, "grad_norm": 0.42818793654441833, "learning_rate": 0.0005261860933788316, "loss": 1.85, "step": 20572 }, { "epoch": 0.68, "grad_norm": 0.4699842929840088, "learning_rate": 0.0005261792269575136, "loss": 1.8426, "step": 20573 }, { "epoch": 0.68, "grad_norm": 0.44807669520378113, "learning_rate": 0.0005261723602616474, "loss": 1.7791, "step": 20574 }, { "epoch": 0.68, "grad_norm": 0.42202913761138916, "learning_rate": 0.0005261654932912413, "loss": 1.8674, "step": 20575 }, { "epoch": 0.68, "grad_norm": 0.43505004048347473, "learning_rate": 0.0005261586260463037, "loss": 1.8306, "step": 20576 }, { "epoch": 0.68, "grad_norm": 0.4416085183620453, "learning_rate": 0.0005261517585268427, "loss": 1.8452, "step": 20577 }, { "epoch": 0.68, "grad_norm": 0.43529120087623596, "learning_rate": 0.0005261448907328669, "loss": 1.8664, "step": 20578 }, { "epoch": 0.68, "grad_norm": 0.47118911147117615, "learning_rate": 0.0005261380226643845, "loss": 1.8363, "step": 20579 }, { "epoch": 0.68, "grad_norm": 0.4304916262626648, "learning_rate": 0.0005261311543214037, "loss": 1.7988, "step": 20580 }, { "epoch": 0.68, "grad_norm": 0.429275780916214, "learning_rate": 0.0005261242857039332, "loss": 1.8734, "step": 20581 }, { "epoch": 0.68, "grad_norm": 0.44796261191368103, "learning_rate": 0.000526117416811981, "loss": 1.8028, "step": 20582 }, { "epoch": 0.68, "grad_norm": 0.4622417092323303, "learning_rate": 0.0005261105476455556, "loss": 1.8146, "step": 20583 }, { "epoch": 0.68, "grad_norm": 0.4222731292247772, "learning_rate": 0.0005261036782046654, "loss": 1.7983, "step": 20584 }, { "epoch": 0.68, "grad_norm": 0.42162176966667175, "learning_rate": 0.0005260968084893186, "loss": 1.7822, "step": 20585 }, { "epoch": 0.68, "grad_norm": 0.48423531651496887, "learning_rate": 0.0005260899384995236, "loss": 1.8333, "step": 20586 }, { "epoch": 0.68, "grad_norm": 0.4799596965312958, "learning_rate": 0.0005260830682352887, "loss": 1.8609, "step": 20587 }, { "epoch": 0.68, "grad_norm": 0.435831755399704, "learning_rate": 0.0005260761976966222, "loss": 1.9032, "step": 20588 }, { "epoch": 0.68, "grad_norm": 0.4543958902359009, "learning_rate": 0.0005260693268835326, "loss": 1.8681, "step": 20589 }, { "epoch": 0.69, "grad_norm": 0.46805283427238464, "learning_rate": 0.0005260624557960281, "loss": 1.792, "step": 20590 }, { "epoch": 0.69, "grad_norm": 0.4474555552005768, "learning_rate": 0.0005260555844341172, "loss": 1.9047, "step": 20591 }, { "epoch": 0.69, "grad_norm": 0.42379030585289, "learning_rate": 0.000526048712797808, "loss": 1.8396, "step": 20592 }, { "epoch": 0.69, "grad_norm": 0.4797168970108032, "learning_rate": 0.000526041840887109, "loss": 1.7991, "step": 20593 }, { "epoch": 0.69, "grad_norm": 0.4980211853981018, "learning_rate": 0.0005260349687020285, "loss": 1.8768, "step": 20594 }, { "epoch": 0.69, "grad_norm": 0.4482768177986145, "learning_rate": 0.0005260280962425749, "loss": 1.8491, "step": 20595 }, { "epoch": 0.69, "grad_norm": 0.4201333522796631, "learning_rate": 0.0005260212235087564, "loss": 1.8429, "step": 20596 }, { "epoch": 0.69, "grad_norm": 0.44646331667900085, "learning_rate": 0.0005260143505005816, "loss": 1.8937, "step": 20597 }, { "epoch": 0.69, "grad_norm": 0.49480903148651123, "learning_rate": 0.0005260074772180586, "loss": 1.8423, "step": 20598 }, { "epoch": 0.69, "grad_norm": 0.46134862303733826, "learning_rate": 0.0005260006036611958, "loss": 1.8826, "step": 20599 }, { "epoch": 0.69, "grad_norm": 0.42474111914634705, "learning_rate": 0.0005259937298300015, "loss": 1.8706, "step": 20600 }, { "epoch": 0.69, "grad_norm": 0.4813714027404785, "learning_rate": 0.0005259868557244842, "loss": 1.89, "step": 20601 }, { "epoch": 0.69, "grad_norm": 0.6701968312263489, "learning_rate": 0.0005259799813446522, "loss": 1.931, "step": 20602 }, { "epoch": 0.69, "grad_norm": 0.4354065954685211, "learning_rate": 0.0005259731066905137, "loss": 1.7839, "step": 20603 }, { "epoch": 0.69, "grad_norm": 0.4318124055862427, "learning_rate": 0.0005259662317620772, "loss": 1.8536, "step": 20604 }, { "epoch": 0.69, "grad_norm": 0.4420067071914673, "learning_rate": 0.0005259593565593509, "loss": 1.8533, "step": 20605 }, { "epoch": 0.69, "grad_norm": 0.48130711913108826, "learning_rate": 0.0005259524810823433, "loss": 1.7626, "step": 20606 }, { "epoch": 0.69, "grad_norm": 0.4354901611804962, "learning_rate": 0.0005259456053310627, "loss": 1.9043, "step": 20607 }, { "epoch": 0.69, "grad_norm": 0.4851897656917572, "learning_rate": 0.0005259387293055174, "loss": 1.8453, "step": 20608 }, { "epoch": 0.69, "grad_norm": 0.45246198773384094, "learning_rate": 0.0005259318530057159, "loss": 1.754, "step": 20609 }, { "epoch": 0.69, "grad_norm": 0.4728808104991913, "learning_rate": 0.0005259249764316663, "loss": 1.7855, "step": 20610 }, { "epoch": 0.69, "grad_norm": 0.424896240234375, "learning_rate": 0.000525918099583377, "loss": 1.7815, "step": 20611 }, { "epoch": 0.69, "grad_norm": 0.42109474539756775, "learning_rate": 0.0005259112224608565, "loss": 1.7939, "step": 20612 }, { "epoch": 0.69, "grad_norm": 0.4474724531173706, "learning_rate": 0.000525904345064113, "loss": 1.8128, "step": 20613 }, { "epoch": 0.69, "grad_norm": 0.45135191082954407, "learning_rate": 0.0005258974673931551, "loss": 1.8221, "step": 20614 }, { "epoch": 0.69, "grad_norm": 0.44782131910324097, "learning_rate": 0.0005258905894479907, "loss": 1.7866, "step": 20615 }, { "epoch": 0.69, "grad_norm": 0.44579100608825684, "learning_rate": 0.0005258837112286285, "loss": 1.805, "step": 20616 }, { "epoch": 0.69, "grad_norm": 0.41848817467689514, "learning_rate": 0.0005258768327350768, "loss": 1.8178, "step": 20617 }, { "epoch": 0.69, "grad_norm": 0.4611976146697998, "learning_rate": 0.000525869953967344, "loss": 1.8445, "step": 20618 }, { "epoch": 0.69, "grad_norm": 0.4584443271160126, "learning_rate": 0.0005258630749254383, "loss": 1.8366, "step": 20619 }, { "epoch": 0.69, "grad_norm": 0.43012067675590515, "learning_rate": 0.000525856195609368, "loss": 1.8055, "step": 20620 }, { "epoch": 0.69, "grad_norm": 0.42912834882736206, "learning_rate": 0.0005258493160191416, "loss": 1.7615, "step": 20621 }, { "epoch": 0.69, "grad_norm": 0.4418506324291229, "learning_rate": 0.0005258424361547674, "loss": 1.9052, "step": 20622 }, { "epoch": 0.69, "grad_norm": 0.44152355194091797, "learning_rate": 0.0005258355560162538, "loss": 1.876, "step": 20623 }, { "epoch": 0.69, "grad_norm": 0.4276822507381439, "learning_rate": 0.0005258286756036091, "loss": 1.854, "step": 20624 }, { "epoch": 0.69, "grad_norm": 0.4324226975440979, "learning_rate": 0.0005258217949168417, "loss": 1.7778, "step": 20625 }, { "epoch": 0.69, "grad_norm": 0.42650479078292847, "learning_rate": 0.0005258149139559598, "loss": 1.779, "step": 20626 }, { "epoch": 0.69, "grad_norm": 0.6039311289787292, "learning_rate": 0.0005258080327209719, "loss": 1.8793, "step": 20627 }, { "epoch": 0.69, "grad_norm": 0.43435800075531006, "learning_rate": 0.0005258011512118863, "loss": 1.8501, "step": 20628 }, { "epoch": 0.69, "grad_norm": 0.4330693185329437, "learning_rate": 0.0005257942694287115, "loss": 1.7639, "step": 20629 }, { "epoch": 0.69, "grad_norm": 0.4346480667591095, "learning_rate": 0.0005257873873714557, "loss": 1.8896, "step": 20630 }, { "epoch": 0.69, "grad_norm": 0.4669162929058075, "learning_rate": 0.0005257805050401272, "loss": 1.8241, "step": 20631 }, { "epoch": 0.69, "grad_norm": 0.4391760528087616, "learning_rate": 0.0005257736224347344, "loss": 1.7914, "step": 20632 }, { "epoch": 0.69, "grad_norm": 0.4117818772792816, "learning_rate": 0.0005257667395552858, "loss": 1.9098, "step": 20633 }, { "epoch": 0.69, "grad_norm": 0.42599907517433167, "learning_rate": 0.0005257598564017896, "loss": 1.7953, "step": 20634 }, { "epoch": 0.69, "grad_norm": 0.4143058657646179, "learning_rate": 0.0005257529729742542, "loss": 1.838, "step": 20635 }, { "epoch": 0.69, "grad_norm": 0.43025898933410645, "learning_rate": 0.000525746089272688, "loss": 1.7983, "step": 20636 }, { "epoch": 0.69, "grad_norm": 0.4243263304233551, "learning_rate": 0.0005257392052970992, "loss": 1.8321, "step": 20637 }, { "epoch": 0.69, "grad_norm": 0.4229939877986908, "learning_rate": 0.0005257323210474963, "loss": 1.7685, "step": 20638 }, { "epoch": 0.69, "grad_norm": 0.43967685103416443, "learning_rate": 0.0005257254365238877, "loss": 1.884, "step": 20639 }, { "epoch": 0.69, "grad_norm": 0.42240801453590393, "learning_rate": 0.0005257185517262816, "loss": 1.786, "step": 20640 }, { "epoch": 0.69, "grad_norm": 0.4139336049556732, "learning_rate": 0.0005257116666546864, "loss": 1.7839, "step": 20641 }, { "epoch": 0.69, "grad_norm": 0.4420436918735504, "learning_rate": 0.0005257047813091106, "loss": 1.8517, "step": 20642 }, { "epoch": 0.69, "grad_norm": 0.432681143283844, "learning_rate": 0.0005256978956895624, "loss": 1.874, "step": 20643 }, { "epoch": 0.69, "grad_norm": 0.4179816246032715, "learning_rate": 0.0005256910097960503, "loss": 1.7796, "step": 20644 }, { "epoch": 0.69, "grad_norm": 0.4515169858932495, "learning_rate": 0.0005256841236285824, "loss": 1.9006, "step": 20645 }, { "epoch": 0.69, "grad_norm": 0.435591459274292, "learning_rate": 0.0005256772371871674, "loss": 1.8106, "step": 20646 }, { "epoch": 0.69, "grad_norm": 0.4247695207595825, "learning_rate": 0.0005256703504718134, "loss": 1.8267, "step": 20647 }, { "epoch": 0.69, "grad_norm": 0.4399816393852234, "learning_rate": 0.0005256634634825288, "loss": 1.8943, "step": 20648 }, { "epoch": 0.69, "grad_norm": 0.4244173467159271, "learning_rate": 0.000525656576219322, "loss": 1.8307, "step": 20649 }, { "epoch": 0.69, "grad_norm": 0.435508131980896, "learning_rate": 0.0005256496886822014, "loss": 1.8785, "step": 20650 }, { "epoch": 0.69, "grad_norm": 0.4159301817417145, "learning_rate": 0.0005256428008711754, "loss": 1.791, "step": 20651 }, { "epoch": 0.69, "grad_norm": 0.4228020906448364, "learning_rate": 0.0005256359127862522, "loss": 1.8756, "step": 20652 }, { "epoch": 0.69, "grad_norm": 0.4163612127304077, "learning_rate": 0.0005256290244274403, "loss": 1.9343, "step": 20653 }, { "epoch": 0.69, "grad_norm": 0.43445298075675964, "learning_rate": 0.0005256221357947481, "loss": 1.8156, "step": 20654 }, { "epoch": 0.69, "grad_norm": 0.43698835372924805, "learning_rate": 0.0005256152468881836, "loss": 1.8531, "step": 20655 }, { "epoch": 0.69, "grad_norm": 0.43558621406555176, "learning_rate": 0.0005256083577077557, "loss": 1.9147, "step": 20656 }, { "epoch": 0.69, "grad_norm": 0.42934250831604004, "learning_rate": 0.0005256014682534723, "loss": 1.8932, "step": 20657 }, { "epoch": 0.69, "grad_norm": 0.432788610458374, "learning_rate": 0.000525594578525342, "loss": 1.8339, "step": 20658 }, { "epoch": 0.69, "grad_norm": 0.4510256350040436, "learning_rate": 0.0005255876885233732, "loss": 1.864, "step": 20659 }, { "epoch": 0.69, "grad_norm": 0.44384515285491943, "learning_rate": 0.0005255807982475742, "loss": 1.9122, "step": 20660 }, { "epoch": 0.69, "grad_norm": 0.43015211820602417, "learning_rate": 0.0005255739076979532, "loss": 1.8279, "step": 20661 }, { "epoch": 0.69, "grad_norm": 0.44613176584243774, "learning_rate": 0.0005255670168745188, "loss": 1.8209, "step": 20662 }, { "epoch": 0.69, "grad_norm": 0.4211727976799011, "learning_rate": 0.0005255601257772793, "loss": 1.7789, "step": 20663 }, { "epoch": 0.69, "grad_norm": 0.4246387779712677, "learning_rate": 0.0005255532344062431, "loss": 1.8471, "step": 20664 }, { "epoch": 0.69, "grad_norm": 0.42367300391197205, "learning_rate": 0.0005255463427614184, "loss": 1.805, "step": 20665 }, { "epoch": 0.69, "grad_norm": 0.4153004288673401, "learning_rate": 0.0005255394508428138, "loss": 1.8237, "step": 20666 }, { "epoch": 0.69, "grad_norm": 0.42405804991722107, "learning_rate": 0.0005255325586504373, "loss": 1.8141, "step": 20667 }, { "epoch": 0.69, "grad_norm": 0.4245099723339081, "learning_rate": 0.0005255256661842977, "loss": 1.8773, "step": 20668 }, { "epoch": 0.69, "grad_norm": 0.4607090353965759, "learning_rate": 0.0005255187734444031, "loss": 1.824, "step": 20669 }, { "epoch": 0.69, "grad_norm": 0.4288371503353119, "learning_rate": 0.000525511880430762, "loss": 1.7761, "step": 20670 }, { "epoch": 0.69, "grad_norm": 0.4257955551147461, "learning_rate": 0.0005255049871433826, "loss": 1.7775, "step": 20671 }, { "epoch": 0.69, "grad_norm": 0.454009473323822, "learning_rate": 0.0005254980935822736, "loss": 1.8377, "step": 20672 }, { "epoch": 0.69, "grad_norm": 0.4381121098995209, "learning_rate": 0.0005254911997474429, "loss": 1.8478, "step": 20673 }, { "epoch": 0.69, "grad_norm": 0.43673086166381836, "learning_rate": 0.0005254843056388992, "loss": 1.8346, "step": 20674 }, { "epoch": 0.69, "grad_norm": 0.4373895525932312, "learning_rate": 0.0005254774112566508, "loss": 1.8479, "step": 20675 }, { "epoch": 0.69, "grad_norm": 0.43838393688201904, "learning_rate": 0.0005254705166007059, "loss": 1.8908, "step": 20676 }, { "epoch": 0.69, "grad_norm": 0.4374212920665741, "learning_rate": 0.0005254636216710731, "loss": 1.7653, "step": 20677 }, { "epoch": 0.69, "grad_norm": 0.44776734709739685, "learning_rate": 0.0005254567264677607, "loss": 1.7962, "step": 20678 }, { "epoch": 0.69, "grad_norm": 0.4228378236293793, "learning_rate": 0.000525449830990777, "loss": 1.8373, "step": 20679 }, { "epoch": 0.69, "grad_norm": 0.47341224551200867, "learning_rate": 0.0005254429352401306, "loss": 1.8435, "step": 20680 }, { "epoch": 0.69, "grad_norm": 0.41909652948379517, "learning_rate": 0.0005254360392158296, "loss": 1.733, "step": 20681 }, { "epoch": 0.69, "grad_norm": 0.4266570210456848, "learning_rate": 0.0005254291429178824, "loss": 1.7399, "step": 20682 }, { "epoch": 0.69, "grad_norm": 0.43661364912986755, "learning_rate": 0.0005254222463462975, "loss": 1.8047, "step": 20683 }, { "epoch": 0.69, "grad_norm": 0.4696623384952545, "learning_rate": 0.0005254153495010832, "loss": 1.8361, "step": 20684 }, { "epoch": 0.69, "grad_norm": 0.4244747459888458, "learning_rate": 0.0005254084523822479, "loss": 1.8197, "step": 20685 }, { "epoch": 0.69, "grad_norm": 0.45441409945487976, "learning_rate": 0.0005254015549897999, "loss": 1.8357, "step": 20686 }, { "epoch": 0.69, "grad_norm": 0.4504636228084564, "learning_rate": 0.0005253946573237476, "loss": 1.8549, "step": 20687 }, { "epoch": 0.69, "grad_norm": 0.4235895872116089, "learning_rate": 0.0005253877593840994, "loss": 1.8165, "step": 20688 }, { "epoch": 0.69, "grad_norm": 0.448547899723053, "learning_rate": 0.0005253808611708638, "loss": 1.806, "step": 20689 }, { "epoch": 0.69, "grad_norm": 0.44210001826286316, "learning_rate": 0.0005253739626840489, "loss": 1.8686, "step": 20690 }, { "epoch": 0.69, "grad_norm": 0.4312763810157776, "learning_rate": 0.0005253670639236632, "loss": 1.8337, "step": 20691 }, { "epoch": 0.69, "grad_norm": 0.4464505910873413, "learning_rate": 0.0005253601648897152, "loss": 1.8531, "step": 20692 }, { "epoch": 0.69, "grad_norm": 0.41918355226516724, "learning_rate": 0.0005253532655822131, "loss": 1.8438, "step": 20693 }, { "epoch": 0.69, "grad_norm": 0.43162527680397034, "learning_rate": 0.0005253463660011654, "loss": 1.8136, "step": 20694 }, { "epoch": 0.69, "grad_norm": 0.43941786885261536, "learning_rate": 0.0005253394661465804, "loss": 1.7967, "step": 20695 }, { "epoch": 0.69, "grad_norm": 0.44518569111824036, "learning_rate": 0.0005253325660184665, "loss": 1.8508, "step": 20696 }, { "epoch": 0.69, "grad_norm": 0.4434564411640167, "learning_rate": 0.000525325665616832, "loss": 1.8806, "step": 20697 }, { "epoch": 0.69, "grad_norm": 0.458625465631485, "learning_rate": 0.0005253187649416854, "loss": 1.8827, "step": 20698 }, { "epoch": 0.69, "grad_norm": 0.4264802038669586, "learning_rate": 0.000525311863993035, "loss": 1.8911, "step": 20699 }, { "epoch": 0.69, "grad_norm": 0.43062713742256165, "learning_rate": 0.0005253049627708891, "loss": 1.8468, "step": 20700 }, { "epoch": 0.69, "grad_norm": 0.42831259965896606, "learning_rate": 0.0005252980612752562, "loss": 1.814, "step": 20701 }, { "epoch": 0.69, "grad_norm": 0.42305612564086914, "learning_rate": 0.0005252911595061448, "loss": 1.8662, "step": 20702 }, { "epoch": 0.69, "grad_norm": 0.45187175273895264, "learning_rate": 0.000525284257463563, "loss": 1.9006, "step": 20703 }, { "epoch": 0.69, "grad_norm": 0.4523293375968933, "learning_rate": 0.0005252773551475195, "loss": 1.811, "step": 20704 }, { "epoch": 0.69, "grad_norm": 0.4209915101528168, "learning_rate": 0.0005252704525580223, "loss": 1.8459, "step": 20705 }, { "epoch": 0.69, "grad_norm": 0.4288921654224396, "learning_rate": 0.0005252635496950799, "loss": 1.8277, "step": 20706 }, { "epoch": 0.69, "grad_norm": 0.4447813928127289, "learning_rate": 0.0005252566465587008, "loss": 1.773, "step": 20707 }, { "epoch": 0.69, "grad_norm": 0.4441981315612793, "learning_rate": 0.0005252497431488933, "loss": 1.7455, "step": 20708 }, { "epoch": 0.69, "grad_norm": 0.40889355540275574, "learning_rate": 0.0005252428394656659, "loss": 1.7977, "step": 20709 }, { "epoch": 0.69, "grad_norm": 0.44874176383018494, "learning_rate": 0.0005252359355090268, "loss": 1.8288, "step": 20710 }, { "epoch": 0.69, "grad_norm": 0.4375860095024109, "learning_rate": 0.0005252290312789846, "loss": 1.8203, "step": 20711 }, { "epoch": 0.69, "grad_norm": 0.44999250769615173, "learning_rate": 0.0005252221267755473, "loss": 1.7662, "step": 20712 }, { "epoch": 0.69, "grad_norm": 0.4419907033443451, "learning_rate": 0.0005252152219987236, "loss": 1.8191, "step": 20713 }, { "epoch": 0.69, "grad_norm": 0.4576215445995331, "learning_rate": 0.000525208316948522, "loss": 1.8401, "step": 20714 }, { "epoch": 0.69, "grad_norm": 0.45204290747642517, "learning_rate": 0.0005252014116249506, "loss": 1.8355, "step": 20715 }, { "epoch": 0.69, "grad_norm": 0.4469241201877594, "learning_rate": 0.0005251945060280178, "loss": 1.8091, "step": 20716 }, { "epoch": 0.69, "grad_norm": 0.4534398317337036, "learning_rate": 0.000525187600157732, "loss": 1.8229, "step": 20717 }, { "epoch": 0.69, "grad_norm": 0.4575905501842499, "learning_rate": 0.0005251806940141017, "loss": 1.7992, "step": 20718 }, { "epoch": 0.69, "grad_norm": 0.4620194435119629, "learning_rate": 0.0005251737875971353, "loss": 1.8347, "step": 20719 }, { "epoch": 0.69, "grad_norm": 0.426873117685318, "learning_rate": 0.0005251668809068409, "loss": 1.8161, "step": 20720 }, { "epoch": 0.69, "grad_norm": 0.45561304688453674, "learning_rate": 0.0005251599739432273, "loss": 1.7827, "step": 20721 }, { "epoch": 0.69, "grad_norm": 0.4319440722465515, "learning_rate": 0.0005251530667063025, "loss": 1.7344, "step": 20722 }, { "epoch": 0.69, "grad_norm": 0.44148918986320496, "learning_rate": 0.0005251461591960751, "loss": 1.8544, "step": 20723 }, { "epoch": 0.69, "grad_norm": 0.42857083678245544, "learning_rate": 0.0005251392514125535, "loss": 1.799, "step": 20724 }, { "epoch": 0.69, "grad_norm": 0.4322882890701294, "learning_rate": 0.000525132343355746, "loss": 1.8512, "step": 20725 }, { "epoch": 0.69, "grad_norm": 0.42592838406562805, "learning_rate": 0.000525125435025661, "loss": 1.844, "step": 20726 }, { "epoch": 0.69, "grad_norm": 0.4238996207714081, "learning_rate": 0.0005251185264223069, "loss": 1.7952, "step": 20727 }, { "epoch": 0.69, "grad_norm": 0.42812320590019226, "learning_rate": 0.000525111617545692, "loss": 1.9131, "step": 20728 }, { "epoch": 0.69, "grad_norm": 0.42371439933776855, "learning_rate": 0.0005251047083958248, "loss": 1.8526, "step": 20729 }, { "epoch": 0.69, "grad_norm": 0.4447241425514221, "learning_rate": 0.0005250977989727137, "loss": 1.8282, "step": 20730 }, { "epoch": 0.69, "grad_norm": 0.4328014850616455, "learning_rate": 0.000525090889276367, "loss": 1.8485, "step": 20731 }, { "epoch": 0.69, "grad_norm": 0.42676275968551636, "learning_rate": 0.0005250839793067932, "loss": 1.7813, "step": 20732 }, { "epoch": 0.69, "grad_norm": 0.4352942407131195, "learning_rate": 0.0005250770690640005, "loss": 1.8625, "step": 20733 }, { "epoch": 0.69, "grad_norm": 0.4505216181278229, "learning_rate": 0.0005250701585479974, "loss": 1.8202, "step": 20734 }, { "epoch": 0.69, "grad_norm": 0.43627920746803284, "learning_rate": 0.0005250632477587924, "loss": 1.8703, "step": 20735 }, { "epoch": 0.69, "grad_norm": 0.42050525546073914, "learning_rate": 0.0005250563366963937, "loss": 1.7405, "step": 20736 }, { "epoch": 0.69, "grad_norm": 0.4342898428440094, "learning_rate": 0.0005250494253608098, "loss": 1.7281, "step": 20737 }, { "epoch": 0.69, "grad_norm": 0.46266666054725647, "learning_rate": 0.000525042513752049, "loss": 1.8291, "step": 20738 }, { "epoch": 0.69, "grad_norm": 0.4267745018005371, "learning_rate": 0.0005250356018701198, "loss": 1.7729, "step": 20739 }, { "epoch": 0.69, "grad_norm": 0.44185173511505127, "learning_rate": 0.0005250286897150305, "loss": 1.8696, "step": 20740 }, { "epoch": 0.69, "grad_norm": 0.48610731959342957, "learning_rate": 0.0005250217772867896, "loss": 1.8293, "step": 20741 }, { "epoch": 0.69, "grad_norm": 0.4273633062839508, "learning_rate": 0.0005250148645854054, "loss": 1.783, "step": 20742 }, { "epoch": 0.69, "grad_norm": 0.4268427789211273, "learning_rate": 0.0005250079516108863, "loss": 1.8376, "step": 20743 }, { "epoch": 0.69, "grad_norm": 0.43083456158638, "learning_rate": 0.0005250010383632406, "loss": 1.7736, "step": 20744 }, { "epoch": 0.69, "grad_norm": 0.42869070172309875, "learning_rate": 0.0005249941248424769, "loss": 1.8295, "step": 20745 }, { "epoch": 0.69, "grad_norm": 0.43322914838790894, "learning_rate": 0.0005249872110486035, "loss": 1.8407, "step": 20746 }, { "epoch": 0.69, "grad_norm": 0.43176960945129395, "learning_rate": 0.0005249802969816287, "loss": 1.8022, "step": 20747 }, { "epoch": 0.69, "grad_norm": 0.4329923987388611, "learning_rate": 0.000524973382641561, "loss": 1.8408, "step": 20748 }, { "epoch": 0.69, "grad_norm": 0.4401164650917053, "learning_rate": 0.0005249664680284089, "loss": 1.8993, "step": 20749 }, { "epoch": 0.69, "grad_norm": 0.4344277083873749, "learning_rate": 0.0005249595531421804, "loss": 1.688, "step": 20750 }, { "epoch": 0.69, "grad_norm": 0.4410598576068878, "learning_rate": 0.0005249526379828843, "loss": 1.8684, "step": 20751 }, { "epoch": 0.69, "grad_norm": 0.42997488379478455, "learning_rate": 0.0005249457225505288, "loss": 1.8108, "step": 20752 }, { "epoch": 0.69, "grad_norm": 0.4699892997741699, "learning_rate": 0.0005249388068451224, "loss": 1.8822, "step": 20753 }, { "epoch": 0.69, "grad_norm": 0.44758495688438416, "learning_rate": 0.0005249318908666734, "loss": 1.8349, "step": 20754 }, { "epoch": 0.69, "grad_norm": 0.44093358516693115, "learning_rate": 0.0005249249746151902, "loss": 1.8899, "step": 20755 }, { "epoch": 0.69, "grad_norm": 0.4735203981399536, "learning_rate": 0.0005249180580906811, "loss": 1.8321, "step": 20756 }, { "epoch": 0.69, "grad_norm": 0.4668571352958679, "learning_rate": 0.0005249111412931548, "loss": 1.832, "step": 20757 }, { "epoch": 0.69, "grad_norm": 0.4421306550502777, "learning_rate": 0.0005249042242226195, "loss": 1.732, "step": 20758 }, { "epoch": 0.69, "grad_norm": 0.4200541377067566, "learning_rate": 0.0005248973068790835, "loss": 1.8733, "step": 20759 }, { "epoch": 0.69, "grad_norm": 0.45085322856903076, "learning_rate": 0.0005248903892625554, "loss": 1.8056, "step": 20760 }, { "epoch": 0.69, "grad_norm": 0.4408382475376129, "learning_rate": 0.0005248834713730435, "loss": 1.9143, "step": 20761 }, { "epoch": 0.69, "grad_norm": 0.7132011651992798, "learning_rate": 0.0005248765532105562, "loss": 1.7619, "step": 20762 }, { "epoch": 0.69, "grad_norm": 0.4309537410736084, "learning_rate": 0.0005248696347751018, "loss": 1.8038, "step": 20763 }, { "epoch": 0.69, "grad_norm": 0.4278548061847687, "learning_rate": 0.000524862716066689, "loss": 1.77, "step": 20764 }, { "epoch": 0.69, "grad_norm": 0.414384663105011, "learning_rate": 0.0005248557970853258, "loss": 1.8223, "step": 20765 }, { "epoch": 0.69, "grad_norm": 0.42379230260849, "learning_rate": 0.0005248488778310209, "loss": 1.8613, "step": 20766 }, { "epoch": 0.69, "grad_norm": 0.42369791865348816, "learning_rate": 0.0005248419583037825, "loss": 1.7398, "step": 20767 }, { "epoch": 0.69, "grad_norm": 0.4207488000392914, "learning_rate": 0.0005248350385036192, "loss": 1.8002, "step": 20768 }, { "epoch": 0.69, "grad_norm": 0.41844648122787476, "learning_rate": 0.0005248281184305392, "loss": 1.8158, "step": 20769 }, { "epoch": 0.69, "grad_norm": 0.43718859553337097, "learning_rate": 0.0005248211980845511, "loss": 1.7597, "step": 20770 }, { "epoch": 0.69, "grad_norm": 0.4255359172821045, "learning_rate": 0.000524814277465663, "loss": 1.8841, "step": 20771 }, { "epoch": 0.69, "grad_norm": 0.42537233233451843, "learning_rate": 0.0005248073565738837, "loss": 1.8317, "step": 20772 }, { "epoch": 0.69, "grad_norm": 0.43344631791114807, "learning_rate": 0.0005248004354092213, "loss": 1.7834, "step": 20773 }, { "epoch": 0.69, "grad_norm": 0.5197214484214783, "learning_rate": 0.0005247935139716843, "loss": 1.815, "step": 20774 }, { "epoch": 0.69, "grad_norm": 0.4310862720012665, "learning_rate": 0.0005247865922612811, "loss": 1.8661, "step": 20775 }, { "epoch": 0.69, "grad_norm": 0.4550793766975403, "learning_rate": 0.0005247796702780201, "loss": 1.9615, "step": 20776 }, { "epoch": 0.69, "grad_norm": 0.4178146421909332, "learning_rate": 0.0005247727480219097, "loss": 1.8424, "step": 20777 }, { "epoch": 0.69, "grad_norm": 0.43854406476020813, "learning_rate": 0.0005247658254929582, "loss": 1.8164, "step": 20778 }, { "epoch": 0.69, "grad_norm": 0.41786327958106995, "learning_rate": 0.0005247589026911743, "loss": 1.8935, "step": 20779 }, { "epoch": 0.69, "grad_norm": 1.498464822769165, "learning_rate": 0.0005247519796165661, "loss": 1.847, "step": 20780 }, { "epoch": 0.69, "grad_norm": 0.44363802671432495, "learning_rate": 0.0005247450562691422, "loss": 1.8416, "step": 20781 }, { "epoch": 0.69, "grad_norm": 0.43906736373901367, "learning_rate": 0.0005247381326489108, "loss": 1.925, "step": 20782 }, { "epoch": 0.69, "grad_norm": 0.42986637353897095, "learning_rate": 0.0005247312087558804, "loss": 1.8629, "step": 20783 }, { "epoch": 0.69, "grad_norm": 0.4199444651603699, "learning_rate": 0.0005247242845900595, "loss": 1.8141, "step": 20784 }, { "epoch": 0.69, "grad_norm": 0.4332495629787445, "learning_rate": 0.0005247173601514565, "loss": 1.8125, "step": 20785 }, { "epoch": 0.69, "grad_norm": 0.43948298692703247, "learning_rate": 0.0005247104354400796, "loss": 1.8315, "step": 20786 }, { "epoch": 0.69, "grad_norm": 0.43430233001708984, "learning_rate": 0.0005247035104559373, "loss": 1.7765, "step": 20787 }, { "epoch": 0.69, "grad_norm": 0.43643879890441895, "learning_rate": 0.0005246965851990382, "loss": 1.8581, "step": 20788 }, { "epoch": 0.69, "grad_norm": 0.42492857575416565, "learning_rate": 0.0005246896596693905, "loss": 1.8663, "step": 20789 }, { "epoch": 0.69, "grad_norm": 0.4609282612800598, "learning_rate": 0.0005246827338670026, "loss": 1.8165, "step": 20790 }, { "epoch": 0.69, "grad_norm": 0.44973841309547424, "learning_rate": 0.000524675807791883, "loss": 1.7703, "step": 20791 }, { "epoch": 0.69, "grad_norm": 0.42816901206970215, "learning_rate": 0.0005246688814440402, "loss": 1.7399, "step": 20792 }, { "epoch": 0.69, "grad_norm": 0.46198078989982605, "learning_rate": 0.0005246619548234823, "loss": 1.8102, "step": 20793 }, { "epoch": 0.69, "grad_norm": 0.44468212127685547, "learning_rate": 0.000524655027930218, "loss": 1.8622, "step": 20794 }, { "epoch": 0.69, "grad_norm": 0.45706596970558167, "learning_rate": 0.0005246481007642555, "loss": 1.7872, "step": 20795 }, { "epoch": 0.69, "grad_norm": 0.42912372946739197, "learning_rate": 0.0005246411733256034, "loss": 1.8756, "step": 20796 }, { "epoch": 0.69, "grad_norm": 0.46060124039649963, "learning_rate": 0.00052463424561427, "loss": 1.8581, "step": 20797 }, { "epoch": 0.69, "grad_norm": 0.4426920711994171, "learning_rate": 0.0005246273176302637, "loss": 1.8011, "step": 20798 }, { "epoch": 0.69, "grad_norm": 0.4568139910697937, "learning_rate": 0.000524620389373593, "loss": 1.8228, "step": 20799 }, { "epoch": 0.69, "grad_norm": 0.4329000413417816, "learning_rate": 0.0005246134608442661, "loss": 1.8204, "step": 20800 }, { "epoch": 0.69, "grad_norm": 0.45109081268310547, "learning_rate": 0.0005246065320422917, "loss": 1.7581, "step": 20801 }, { "epoch": 0.69, "grad_norm": 0.45761746168136597, "learning_rate": 0.0005245996029676779, "loss": 1.822, "step": 20802 }, { "epoch": 0.69, "grad_norm": 0.4492633640766144, "learning_rate": 0.0005245926736204334, "loss": 1.8275, "step": 20803 }, { "epoch": 0.69, "grad_norm": 0.4541639983654022, "learning_rate": 0.0005245857440005665, "loss": 1.836, "step": 20804 }, { "epoch": 0.69, "grad_norm": 0.47617480158805847, "learning_rate": 0.0005245788141080856, "loss": 1.7992, "step": 20805 }, { "epoch": 0.69, "grad_norm": 0.5388520956039429, "learning_rate": 0.000524571883942999, "loss": 1.9015, "step": 20806 }, { "epoch": 0.69, "grad_norm": 0.4352852702140808, "learning_rate": 0.0005245649535053152, "loss": 1.8921, "step": 20807 }, { "epoch": 0.69, "grad_norm": 0.44997438788414, "learning_rate": 0.0005245580227950428, "loss": 1.8262, "step": 20808 }, { "epoch": 0.69, "grad_norm": 0.45584535598754883, "learning_rate": 0.00052455109181219, "loss": 1.9028, "step": 20809 }, { "epoch": 0.69, "grad_norm": 0.4554724097251892, "learning_rate": 0.0005245441605567652, "loss": 1.8216, "step": 20810 }, { "epoch": 0.69, "grad_norm": 0.46637505292892456, "learning_rate": 0.000524537229028777, "loss": 1.8583, "step": 20811 }, { "epoch": 0.69, "grad_norm": 0.42270082235336304, "learning_rate": 0.0005245302972282336, "loss": 1.8319, "step": 20812 }, { "epoch": 0.69, "grad_norm": 0.43491390347480774, "learning_rate": 0.0005245233651551435, "loss": 1.8115, "step": 20813 }, { "epoch": 0.69, "grad_norm": 0.4760540723800659, "learning_rate": 0.000524516432809515, "loss": 1.855, "step": 20814 }, { "epoch": 0.69, "grad_norm": 0.4520161747932434, "learning_rate": 0.0005245095001913568, "loss": 1.7814, "step": 20815 }, { "epoch": 0.69, "grad_norm": 0.4451102614402771, "learning_rate": 0.0005245025673006771, "loss": 1.8639, "step": 20816 }, { "epoch": 0.69, "grad_norm": 0.43143460154533386, "learning_rate": 0.0005244956341374843, "loss": 1.8708, "step": 20817 }, { "epoch": 0.69, "grad_norm": 0.4200075566768646, "learning_rate": 0.000524488700701787, "loss": 1.9026, "step": 20818 }, { "epoch": 0.69, "grad_norm": 0.47760701179504395, "learning_rate": 0.0005244817669935933, "loss": 1.7753, "step": 20819 }, { "epoch": 0.69, "grad_norm": 0.4375128149986267, "learning_rate": 0.000524474833012912, "loss": 1.8039, "step": 20820 }, { "epoch": 0.69, "grad_norm": 0.44804251194000244, "learning_rate": 0.0005244678987597513, "loss": 1.8888, "step": 20821 }, { "epoch": 0.69, "grad_norm": 0.4335691034793854, "learning_rate": 0.0005244609642341197, "loss": 1.878, "step": 20822 }, { "epoch": 0.69, "grad_norm": 0.4383352994918823, "learning_rate": 0.0005244540294360254, "loss": 1.8333, "step": 20823 }, { "epoch": 0.69, "grad_norm": 0.4492672383785248, "learning_rate": 0.0005244470943654771, "loss": 1.782, "step": 20824 }, { "epoch": 0.69, "grad_norm": 0.4208466112613678, "learning_rate": 0.000524440159022483, "loss": 1.8113, "step": 20825 }, { "epoch": 0.69, "grad_norm": 0.45152634382247925, "learning_rate": 0.0005244332234070516, "loss": 1.8242, "step": 20826 }, { "epoch": 0.69, "grad_norm": 0.4282470941543579, "learning_rate": 0.0005244262875191914, "loss": 1.8031, "step": 20827 }, { "epoch": 0.69, "grad_norm": 0.44290557503700256, "learning_rate": 0.0005244193513589109, "loss": 1.8641, "step": 20828 }, { "epoch": 0.69, "grad_norm": 0.42892125248908997, "learning_rate": 0.0005244124149262182, "loss": 1.8201, "step": 20829 }, { "epoch": 0.69, "grad_norm": 0.4379004240036011, "learning_rate": 0.0005244054782211219, "loss": 1.7528, "step": 20830 }, { "epoch": 0.69, "grad_norm": 0.4408252537250519, "learning_rate": 0.0005243985412436305, "loss": 1.8961, "step": 20831 }, { "epoch": 0.69, "grad_norm": 0.44693514704704285, "learning_rate": 0.0005243916039937522, "loss": 1.8463, "step": 20832 }, { "epoch": 0.69, "grad_norm": 0.4311571419239044, "learning_rate": 0.0005243846664714956, "loss": 1.8215, "step": 20833 }, { "epoch": 0.69, "grad_norm": 0.4468173682689667, "learning_rate": 0.0005243777286768691, "loss": 1.8052, "step": 20834 }, { "epoch": 0.69, "grad_norm": 0.44130921363830566, "learning_rate": 0.0005243707906098811, "loss": 1.8595, "step": 20835 }, { "epoch": 0.69, "grad_norm": 0.4558037519454956, "learning_rate": 0.0005243638522705401, "loss": 1.7693, "step": 20836 }, { "epoch": 0.69, "grad_norm": 0.43966144323349, "learning_rate": 0.0005243569136588543, "loss": 1.7504, "step": 20837 }, { "epoch": 0.69, "grad_norm": 0.4414212703704834, "learning_rate": 0.0005243499747748324, "loss": 1.8289, "step": 20838 }, { "epoch": 0.69, "grad_norm": 0.44138768315315247, "learning_rate": 0.0005243430356184826, "loss": 1.7781, "step": 20839 }, { "epoch": 0.69, "grad_norm": 0.4412766695022583, "learning_rate": 0.0005243360961898134, "loss": 1.8904, "step": 20840 }, { "epoch": 0.69, "grad_norm": 0.4608716666698456, "learning_rate": 0.0005243291564888332, "loss": 1.914, "step": 20841 }, { "epoch": 0.69, "grad_norm": 0.42323869466781616, "learning_rate": 0.0005243222165155506, "loss": 1.7421, "step": 20842 }, { "epoch": 0.69, "grad_norm": 0.4335727095603943, "learning_rate": 0.0005243152762699737, "loss": 1.7962, "step": 20843 }, { "epoch": 0.69, "grad_norm": 0.4585666358470917, "learning_rate": 0.0005243083357521112, "loss": 1.8074, "step": 20844 }, { "epoch": 0.69, "grad_norm": 0.44319984316825867, "learning_rate": 0.0005243013949619715, "loss": 1.7871, "step": 20845 }, { "epoch": 0.69, "grad_norm": 0.41384100914001465, "learning_rate": 0.0005242944538995628, "loss": 1.8154, "step": 20846 }, { "epoch": 0.69, "grad_norm": 0.4219360947608948, "learning_rate": 0.0005242875125648939, "loss": 1.8337, "step": 20847 }, { "epoch": 0.69, "grad_norm": 0.41944268345832825, "learning_rate": 0.0005242805709579729, "loss": 1.8614, "step": 20848 }, { "epoch": 0.69, "grad_norm": 0.4265478551387787, "learning_rate": 0.0005242736290788083, "loss": 1.8173, "step": 20849 }, { "epoch": 0.69, "grad_norm": 0.44904983043670654, "learning_rate": 0.0005242666869274085, "loss": 1.8792, "step": 20850 }, { "epoch": 0.69, "grad_norm": 0.4290856719017029, "learning_rate": 0.0005242597445037821, "loss": 1.8683, "step": 20851 }, { "epoch": 0.69, "grad_norm": 0.4462382197380066, "learning_rate": 0.0005242528018079373, "loss": 1.8535, "step": 20852 }, { "epoch": 0.69, "grad_norm": 0.4264793395996094, "learning_rate": 0.0005242458588398828, "loss": 1.7829, "step": 20853 }, { "epoch": 0.69, "grad_norm": 0.4305579662322998, "learning_rate": 0.0005242389155996268, "loss": 1.8033, "step": 20854 }, { "epoch": 0.69, "grad_norm": 0.44016173481941223, "learning_rate": 0.0005242319720871777, "loss": 1.7885, "step": 20855 }, { "epoch": 0.69, "grad_norm": 0.43691495060920715, "learning_rate": 0.0005242250283025441, "loss": 1.8083, "step": 20856 }, { "epoch": 0.69, "grad_norm": 0.41358429193496704, "learning_rate": 0.0005242180842457344, "loss": 1.7882, "step": 20857 }, { "epoch": 0.69, "grad_norm": 0.4136999845504761, "learning_rate": 0.000524211139916757, "loss": 1.7824, "step": 20858 }, { "epoch": 0.69, "grad_norm": 0.4510633647441864, "learning_rate": 0.0005242041953156201, "loss": 1.8455, "step": 20859 }, { "epoch": 0.69, "grad_norm": 0.44325074553489685, "learning_rate": 0.0005241972504423326, "loss": 1.8183, "step": 20860 }, { "epoch": 0.69, "grad_norm": 0.4331861436367035, "learning_rate": 0.0005241903052969026, "loss": 1.865, "step": 20861 }, { "epoch": 0.69, "grad_norm": 0.4090026021003723, "learning_rate": 0.0005241833598793386, "loss": 1.7771, "step": 20862 }, { "epoch": 0.69, "grad_norm": 0.4331410825252533, "learning_rate": 0.0005241764141896488, "loss": 1.801, "step": 20863 }, { "epoch": 0.69, "grad_norm": 0.43613967299461365, "learning_rate": 0.0005241694682278422, "loss": 1.8447, "step": 20864 }, { "epoch": 0.69, "grad_norm": 0.43097618222236633, "learning_rate": 0.0005241625219939267, "loss": 1.8551, "step": 20865 }, { "epoch": 0.69, "grad_norm": 0.41887640953063965, "learning_rate": 0.000524155575487911, "loss": 1.7695, "step": 20866 }, { "epoch": 0.69, "grad_norm": 0.42343804240226746, "learning_rate": 0.0005241486287098034, "loss": 1.8481, "step": 20867 }, { "epoch": 0.69, "grad_norm": 0.4499536156654358, "learning_rate": 0.0005241416816596125, "loss": 1.88, "step": 20868 }, { "epoch": 0.69, "grad_norm": 0.41434842348098755, "learning_rate": 0.0005241347343373466, "loss": 1.8216, "step": 20869 }, { "epoch": 0.69, "grad_norm": 0.4287601113319397, "learning_rate": 0.000524127786743014, "loss": 1.7677, "step": 20870 }, { "epoch": 0.69, "grad_norm": 0.4347923994064331, "learning_rate": 0.0005241208388766235, "loss": 1.785, "step": 20871 }, { "epoch": 0.69, "grad_norm": 0.42275017499923706, "learning_rate": 0.0005241138907381832, "loss": 1.8332, "step": 20872 }, { "epoch": 0.69, "grad_norm": 0.42883622646331787, "learning_rate": 0.0005241069423277017, "loss": 1.8403, "step": 20873 }, { "epoch": 0.69, "grad_norm": 0.4264111816883087, "learning_rate": 0.0005240999936451874, "loss": 1.8432, "step": 20874 }, { "epoch": 0.69, "grad_norm": 0.4368421733379364, "learning_rate": 0.0005240930446906486, "loss": 1.814, "step": 20875 }, { "epoch": 0.69, "grad_norm": 0.41807112097740173, "learning_rate": 0.0005240860954640941, "loss": 1.8249, "step": 20876 }, { "epoch": 0.69, "grad_norm": 0.4517713189125061, "learning_rate": 0.000524079145965532, "loss": 1.9179, "step": 20877 }, { "epoch": 0.69, "grad_norm": 0.7712112665176392, "learning_rate": 0.0005240721961949707, "loss": 1.8319, "step": 20878 }, { "epoch": 0.69, "grad_norm": 0.4304616153240204, "learning_rate": 0.000524065246152419, "loss": 1.9151, "step": 20879 }, { "epoch": 0.69, "grad_norm": 0.4338337182998657, "learning_rate": 0.0005240582958378849, "loss": 1.7946, "step": 20880 }, { "epoch": 0.69, "grad_norm": 0.44139665365219116, "learning_rate": 0.0005240513452513771, "loss": 1.9104, "step": 20881 }, { "epoch": 0.69, "grad_norm": 0.43761134147644043, "learning_rate": 0.0005240443943929041, "loss": 1.762, "step": 20882 }, { "epoch": 0.69, "grad_norm": 0.41862091422080994, "learning_rate": 0.0005240374432624741, "loss": 1.8972, "step": 20883 }, { "epoch": 0.69, "grad_norm": 0.4490426182746887, "learning_rate": 0.0005240304918600956, "loss": 1.799, "step": 20884 }, { "epoch": 0.69, "grad_norm": 0.45579010248184204, "learning_rate": 0.0005240235401857772, "loss": 1.856, "step": 20885 }, { "epoch": 0.69, "grad_norm": 0.4344565272331238, "learning_rate": 0.0005240165882395272, "loss": 1.7834, "step": 20886 }, { "epoch": 0.69, "grad_norm": 0.4490876793861389, "learning_rate": 0.000524009636021354, "loss": 1.9398, "step": 20887 }, { "epoch": 0.69, "grad_norm": 0.45292770862579346, "learning_rate": 0.0005240026835312662, "loss": 1.8455, "step": 20888 }, { "epoch": 0.69, "grad_norm": 0.42116162180900574, "learning_rate": 0.0005239957307692721, "loss": 1.8518, "step": 20889 }, { "epoch": 0.7, "grad_norm": 0.43288516998291016, "learning_rate": 0.0005239887777353802, "loss": 1.8293, "step": 20890 }, { "epoch": 0.7, "grad_norm": 0.4374936819076538, "learning_rate": 0.0005239818244295989, "loss": 1.7802, "step": 20891 }, { "epoch": 0.7, "grad_norm": 0.45007801055908203, "learning_rate": 0.0005239748708519368, "loss": 1.7866, "step": 20892 }, { "epoch": 0.7, "grad_norm": 0.43873879313468933, "learning_rate": 0.000523967917002402, "loss": 1.7984, "step": 20893 }, { "epoch": 0.7, "grad_norm": 0.43798643350601196, "learning_rate": 0.0005239609628810033, "loss": 1.8937, "step": 20894 }, { "epoch": 0.7, "grad_norm": 0.43875110149383545, "learning_rate": 0.0005239540084877489, "loss": 1.8449, "step": 20895 }, { "epoch": 0.7, "grad_norm": 0.4669423997402191, "learning_rate": 0.0005239470538226474, "loss": 1.7728, "step": 20896 }, { "epoch": 0.7, "grad_norm": 0.43695077300071716, "learning_rate": 0.0005239400988857072, "loss": 1.8424, "step": 20897 }, { "epoch": 0.7, "grad_norm": 0.46607229113578796, "learning_rate": 0.0005239331436769366, "loss": 1.854, "step": 20898 }, { "epoch": 0.7, "grad_norm": 0.443914532661438, "learning_rate": 0.0005239261881963443, "loss": 1.8397, "step": 20899 }, { "epoch": 0.7, "grad_norm": 0.439535528421402, "learning_rate": 0.0005239192324439385, "loss": 1.8313, "step": 20900 }, { "epoch": 0.7, "grad_norm": 0.44152528047561646, "learning_rate": 0.0005239122764197278, "loss": 1.7454, "step": 20901 }, { "epoch": 0.7, "grad_norm": 0.4436757564544678, "learning_rate": 0.0005239053201237206, "loss": 1.8533, "step": 20902 }, { "epoch": 0.7, "grad_norm": 0.43825891613960266, "learning_rate": 0.0005238983635559253, "loss": 1.7923, "step": 20903 }, { "epoch": 0.7, "grad_norm": 0.4254399240016937, "learning_rate": 0.0005238914067163503, "loss": 1.796, "step": 20904 }, { "epoch": 0.7, "grad_norm": 0.42940443754196167, "learning_rate": 0.0005238844496050042, "loss": 1.9064, "step": 20905 }, { "epoch": 0.7, "grad_norm": 0.43193966150283813, "learning_rate": 0.0005238774922218954, "loss": 1.8314, "step": 20906 }, { "epoch": 0.7, "grad_norm": 0.4097893238067627, "learning_rate": 0.0005238705345670322, "loss": 1.8225, "step": 20907 }, { "epoch": 0.7, "grad_norm": 0.425069659948349, "learning_rate": 0.0005238635766404232, "loss": 1.8115, "step": 20908 }, { "epoch": 0.7, "grad_norm": 0.4266899824142456, "learning_rate": 0.0005238566184420769, "loss": 1.8307, "step": 20909 }, { "epoch": 0.7, "grad_norm": 0.43558958172798157, "learning_rate": 0.0005238496599720016, "loss": 1.8504, "step": 20910 }, { "epoch": 0.7, "grad_norm": 0.4273589551448822, "learning_rate": 0.0005238427012302057, "loss": 1.8461, "step": 20911 }, { "epoch": 0.7, "grad_norm": 0.415147989988327, "learning_rate": 0.0005238357422166979, "loss": 1.677, "step": 20912 }, { "epoch": 0.7, "grad_norm": 0.44124433398246765, "learning_rate": 0.0005238287829314864, "loss": 1.8061, "step": 20913 }, { "epoch": 0.7, "grad_norm": 0.42068636417388916, "learning_rate": 0.0005238218233745797, "loss": 1.7143, "step": 20914 }, { "epoch": 0.7, "grad_norm": 0.4253475069999695, "learning_rate": 0.0005238148635459864, "loss": 1.8552, "step": 20915 }, { "epoch": 0.7, "grad_norm": 0.41328173875808716, "learning_rate": 0.0005238079034457147, "loss": 1.8015, "step": 20916 }, { "epoch": 0.7, "grad_norm": 0.42546796798706055, "learning_rate": 0.0005238009430737732, "loss": 1.8987, "step": 20917 }, { "epoch": 0.7, "grad_norm": 0.4175966680049896, "learning_rate": 0.0005237939824301705, "loss": 1.7859, "step": 20918 }, { "epoch": 0.7, "grad_norm": 0.42516782879829407, "learning_rate": 0.0005237870215149147, "loss": 1.8308, "step": 20919 }, { "epoch": 0.7, "grad_norm": 0.42879927158355713, "learning_rate": 0.0005237800603280145, "loss": 1.792, "step": 20920 }, { "epoch": 0.7, "grad_norm": 0.418391615152359, "learning_rate": 0.0005237730988694782, "loss": 1.8479, "step": 20921 }, { "epoch": 0.7, "grad_norm": 0.43712201714515686, "learning_rate": 0.0005237661371393144, "loss": 1.8747, "step": 20922 }, { "epoch": 0.7, "grad_norm": 0.4183669090270996, "learning_rate": 0.0005237591751375315, "loss": 1.8, "step": 20923 }, { "epoch": 0.7, "grad_norm": 0.4163545072078705, "learning_rate": 0.0005237522128641379, "loss": 1.8746, "step": 20924 }, { "epoch": 0.7, "grad_norm": 0.428964227437973, "learning_rate": 0.0005237452503191421, "loss": 1.7633, "step": 20925 }, { "epoch": 0.7, "grad_norm": 0.6973416209220886, "learning_rate": 0.0005237382875025525, "loss": 1.8319, "step": 20926 }, { "epoch": 0.7, "grad_norm": 0.4146261513233185, "learning_rate": 0.0005237313244143776, "loss": 1.7683, "step": 20927 }, { "epoch": 0.7, "grad_norm": 0.4252983033657074, "learning_rate": 0.0005237243610546258, "loss": 1.8009, "step": 20928 }, { "epoch": 0.7, "grad_norm": 0.43831586837768555, "learning_rate": 0.0005237173974233057, "loss": 1.8829, "step": 20929 }, { "epoch": 0.7, "grad_norm": 0.44110411405563354, "learning_rate": 0.0005237104335204255, "loss": 1.8073, "step": 20930 }, { "epoch": 0.7, "grad_norm": 0.4440554976463318, "learning_rate": 0.000523703469345994, "loss": 1.8499, "step": 20931 }, { "epoch": 0.7, "grad_norm": 0.4404781758785248, "learning_rate": 0.0005236965049000192, "loss": 1.8372, "step": 20932 }, { "epoch": 0.7, "grad_norm": 0.42576441168785095, "learning_rate": 0.00052368954018251, "loss": 1.9048, "step": 20933 }, { "epoch": 0.7, "grad_norm": 0.43114978075027466, "learning_rate": 0.0005236825751934745, "loss": 1.8775, "step": 20934 }, { "epoch": 0.7, "grad_norm": 0.4235895872116089, "learning_rate": 0.0005236756099329215, "loss": 1.88, "step": 20935 }, { "epoch": 0.7, "grad_norm": 0.4350973069667816, "learning_rate": 0.000523668644400859, "loss": 1.8666, "step": 20936 }, { "epoch": 0.7, "grad_norm": 0.4210470914840698, "learning_rate": 0.0005236616785972959, "loss": 1.7886, "step": 20937 }, { "epoch": 0.7, "grad_norm": 0.425699919462204, "learning_rate": 0.0005236547125222405, "loss": 1.7779, "step": 20938 }, { "epoch": 0.7, "grad_norm": 0.4642001688480377, "learning_rate": 0.0005236477461757012, "loss": 1.8304, "step": 20939 }, { "epoch": 0.7, "grad_norm": 0.431959867477417, "learning_rate": 0.0005236407795576864, "loss": 1.8126, "step": 20940 }, { "epoch": 0.7, "grad_norm": 0.4196149408817291, "learning_rate": 0.0005236338126682048, "loss": 1.7894, "step": 20941 }, { "epoch": 0.7, "grad_norm": 0.4501287043094635, "learning_rate": 0.0005236268455072646, "loss": 1.8876, "step": 20942 }, { "epoch": 0.7, "grad_norm": 0.4410650432109833, "learning_rate": 0.0005236198780748743, "loss": 1.8059, "step": 20943 }, { "epoch": 0.7, "grad_norm": 0.42992347478866577, "learning_rate": 0.0005236129103710426, "loss": 1.8207, "step": 20944 }, { "epoch": 0.7, "grad_norm": 0.43536072969436646, "learning_rate": 0.0005236059423957776, "loss": 1.8638, "step": 20945 }, { "epoch": 0.7, "grad_norm": 0.4237358868122101, "learning_rate": 0.000523598974149088, "loss": 1.7902, "step": 20946 }, { "epoch": 0.7, "grad_norm": 0.4294927716255188, "learning_rate": 0.0005235920056309823, "loss": 1.8287, "step": 20947 }, { "epoch": 0.7, "grad_norm": 0.440850168466568, "learning_rate": 0.0005235850368414687, "loss": 1.7938, "step": 20948 }, { "epoch": 0.7, "grad_norm": 0.4471141993999481, "learning_rate": 0.0005235780677805558, "loss": 1.8067, "step": 20949 }, { "epoch": 0.7, "grad_norm": 0.4319598078727722, "learning_rate": 0.0005235710984482522, "loss": 1.7687, "step": 20950 }, { "epoch": 0.7, "grad_norm": 0.4179530739784241, "learning_rate": 0.0005235641288445661, "loss": 1.8362, "step": 20951 }, { "epoch": 0.7, "grad_norm": 0.44051507115364075, "learning_rate": 0.0005235571589695062, "loss": 1.8867, "step": 20952 }, { "epoch": 0.7, "grad_norm": 0.44472017884254456, "learning_rate": 0.0005235501888230808, "loss": 1.7531, "step": 20953 }, { "epoch": 0.7, "grad_norm": 0.43221330642700195, "learning_rate": 0.0005235432184052984, "loss": 1.8595, "step": 20954 }, { "epoch": 0.7, "grad_norm": 0.41062983870506287, "learning_rate": 0.0005235362477161675, "loss": 1.8181, "step": 20955 }, { "epoch": 0.7, "grad_norm": 0.42148950695991516, "learning_rate": 0.0005235292767556965, "loss": 1.7799, "step": 20956 }, { "epoch": 0.7, "grad_norm": 0.44586095213890076, "learning_rate": 0.0005235223055238938, "loss": 1.8782, "step": 20957 }, { "epoch": 0.7, "grad_norm": 0.4371674954891205, "learning_rate": 0.0005235153340207681, "loss": 1.8476, "step": 20958 }, { "epoch": 0.7, "grad_norm": 0.42885953187942505, "learning_rate": 0.0005235083622463277, "loss": 1.7852, "step": 20959 }, { "epoch": 0.7, "grad_norm": 0.4430962800979614, "learning_rate": 0.000523501390200581, "loss": 1.8814, "step": 20960 }, { "epoch": 0.7, "grad_norm": 0.4717482626438141, "learning_rate": 0.0005234944178835367, "loss": 1.8554, "step": 20961 }, { "epoch": 0.7, "grad_norm": 0.45529401302337646, "learning_rate": 0.0005234874452952029, "loss": 1.8793, "step": 20962 }, { "epoch": 0.7, "grad_norm": 0.43755221366882324, "learning_rate": 0.0005234804724355885, "loss": 1.8618, "step": 20963 }, { "epoch": 0.7, "grad_norm": 0.4473780393600464, "learning_rate": 0.0005234734993047015, "loss": 1.8054, "step": 20964 }, { "epoch": 0.7, "grad_norm": 0.43120718002319336, "learning_rate": 0.0005234665259025508, "loss": 1.7784, "step": 20965 }, { "epoch": 0.7, "grad_norm": 0.46008041501045227, "learning_rate": 0.0005234595522291446, "loss": 1.9059, "step": 20966 }, { "epoch": 0.7, "grad_norm": 0.4508095681667328, "learning_rate": 0.0005234525782844912, "loss": 1.9068, "step": 20967 }, { "epoch": 0.7, "grad_norm": 0.4318203926086426, "learning_rate": 0.0005234456040685996, "loss": 1.8113, "step": 20968 }, { "epoch": 0.7, "grad_norm": 0.43216124176979065, "learning_rate": 0.0005234386295814779, "loss": 1.8273, "step": 20969 }, { "epoch": 0.7, "grad_norm": 0.45735007524490356, "learning_rate": 0.0005234316548231345, "loss": 1.8905, "step": 20970 }, { "epoch": 0.7, "grad_norm": 0.4436899423599243, "learning_rate": 0.0005234246797935781, "loss": 1.8111, "step": 20971 }, { "epoch": 0.7, "grad_norm": 0.4309805631637573, "learning_rate": 0.000523417704492817, "loss": 1.7998, "step": 20972 }, { "epoch": 0.7, "grad_norm": 0.424011766910553, "learning_rate": 0.0005234107289208597, "loss": 1.8367, "step": 20973 }, { "epoch": 0.7, "grad_norm": 0.43144676089286804, "learning_rate": 0.0005234037530777149, "loss": 1.8241, "step": 20974 }, { "epoch": 0.7, "grad_norm": 0.440015584230423, "learning_rate": 0.0005233967769633906, "loss": 1.7332, "step": 20975 }, { "epoch": 0.7, "grad_norm": 0.4222286641597748, "learning_rate": 0.0005233898005778956, "loss": 1.8976, "step": 20976 }, { "epoch": 0.7, "grad_norm": 0.42448535561561584, "learning_rate": 0.0005233828239212385, "loss": 1.718, "step": 20977 }, { "epoch": 0.7, "grad_norm": 0.43670743703842163, "learning_rate": 0.0005233758469934273, "loss": 1.9099, "step": 20978 }, { "epoch": 0.7, "grad_norm": 0.43690699338912964, "learning_rate": 0.0005233688697944708, "loss": 1.8257, "step": 20979 }, { "epoch": 0.7, "grad_norm": 0.43013995885849, "learning_rate": 0.0005233618923243774, "loss": 1.8209, "step": 20980 }, { "epoch": 0.7, "grad_norm": 0.42018017172813416, "learning_rate": 0.0005233549145831557, "loss": 1.8737, "step": 20981 }, { "epoch": 0.7, "grad_norm": 0.4074753522872925, "learning_rate": 0.0005233479365708139, "loss": 1.8203, "step": 20982 }, { "epoch": 0.7, "grad_norm": 0.42902666330337524, "learning_rate": 0.0005233409582873608, "loss": 1.8697, "step": 20983 }, { "epoch": 0.7, "grad_norm": 0.43045586347579956, "learning_rate": 0.0005233339797328044, "loss": 1.865, "step": 20984 }, { "epoch": 0.7, "grad_norm": 0.41507938504219055, "learning_rate": 0.0005233270009071538, "loss": 1.7967, "step": 20985 }, { "epoch": 0.7, "grad_norm": 0.4352715015411377, "learning_rate": 0.0005233200218104168, "loss": 1.8484, "step": 20986 }, { "epoch": 0.7, "grad_norm": 0.4182316064834595, "learning_rate": 0.0005233130424426024, "loss": 1.8154, "step": 20987 }, { "epoch": 0.7, "grad_norm": 0.445128858089447, "learning_rate": 0.0005233060628037188, "loss": 1.8118, "step": 20988 }, { "epoch": 0.7, "grad_norm": 0.44186198711395264, "learning_rate": 0.0005232990828937745, "loss": 1.7821, "step": 20989 }, { "epoch": 0.7, "grad_norm": 0.44141653180122375, "learning_rate": 0.0005232921027127781, "loss": 1.9545, "step": 20990 }, { "epoch": 0.7, "grad_norm": 0.42097049951553345, "learning_rate": 0.000523285122260738, "loss": 1.8424, "step": 20991 }, { "epoch": 0.7, "grad_norm": 0.4498615264892578, "learning_rate": 0.0005232781415376625, "loss": 1.8269, "step": 20992 }, { "epoch": 0.7, "grad_norm": 0.43019816279411316, "learning_rate": 0.0005232711605435604, "loss": 1.8943, "step": 20993 }, { "epoch": 0.7, "grad_norm": 0.4362512528896332, "learning_rate": 0.0005232641792784399, "loss": 1.7412, "step": 20994 }, { "epoch": 0.7, "grad_norm": 0.4445171356201172, "learning_rate": 0.0005232571977423097, "loss": 1.8832, "step": 20995 }, { "epoch": 0.7, "grad_norm": 0.4550311267375946, "learning_rate": 0.0005232502159351781, "loss": 1.8427, "step": 20996 }, { "epoch": 0.7, "grad_norm": 0.4220826327800751, "learning_rate": 0.0005232432338570536, "loss": 1.7937, "step": 20997 }, { "epoch": 0.7, "grad_norm": 0.4249114394187927, "learning_rate": 0.0005232362515079447, "loss": 1.811, "step": 20998 }, { "epoch": 0.7, "grad_norm": 0.4354275166988373, "learning_rate": 0.00052322926888786, "loss": 1.7994, "step": 20999 }, { "epoch": 0.7, "grad_norm": 0.4292207956314087, "learning_rate": 0.0005232222859968078, "loss": 1.8437, "step": 21000 }, { "epoch": 0.7, "grad_norm": 0.4358406066894531, "learning_rate": 0.0005232153028347966, "loss": 1.7524, "step": 21001 }, { "epoch": 0.7, "grad_norm": 0.44510501623153687, "learning_rate": 0.0005232083194018349, "loss": 1.8143, "step": 21002 }, { "epoch": 0.7, "grad_norm": 0.4282318353652954, "learning_rate": 0.0005232013356979312, "loss": 1.8198, "step": 21003 }, { "epoch": 0.7, "grad_norm": 0.42486709356307983, "learning_rate": 0.000523194351723094, "loss": 1.7894, "step": 21004 }, { "epoch": 0.7, "grad_norm": 0.4327680468559265, "learning_rate": 0.0005231873674773317, "loss": 1.892, "step": 21005 }, { "epoch": 0.7, "grad_norm": 0.4275698661804199, "learning_rate": 0.0005231803829606528, "loss": 1.8316, "step": 21006 }, { "epoch": 0.7, "grad_norm": 0.4207042157649994, "learning_rate": 0.0005231733981730658, "loss": 1.8134, "step": 21007 }, { "epoch": 0.7, "grad_norm": 0.4361725151538849, "learning_rate": 0.0005231664131145793, "loss": 1.8064, "step": 21008 }, { "epoch": 0.7, "grad_norm": 0.4184924364089966, "learning_rate": 0.0005231594277852015, "loss": 1.855, "step": 21009 }, { "epoch": 0.7, "grad_norm": 0.4218734800815582, "learning_rate": 0.0005231524421849412, "loss": 1.7913, "step": 21010 }, { "epoch": 0.7, "grad_norm": 0.4200286865234375, "learning_rate": 0.0005231454563138065, "loss": 1.7598, "step": 21011 }, { "epoch": 0.7, "grad_norm": 0.4294576942920685, "learning_rate": 0.0005231384701718063, "loss": 1.851, "step": 21012 }, { "epoch": 0.7, "grad_norm": 0.43964457511901855, "learning_rate": 0.0005231314837589487, "loss": 1.7607, "step": 21013 }, { "epoch": 0.7, "grad_norm": 0.4495927691459656, "learning_rate": 0.0005231244970752425, "loss": 1.8716, "step": 21014 }, { "epoch": 0.7, "grad_norm": 0.4381231963634491, "learning_rate": 0.000523117510120696, "loss": 1.8065, "step": 21015 }, { "epoch": 0.7, "grad_norm": 0.43698349595069885, "learning_rate": 0.0005231105228953178, "loss": 1.8638, "step": 21016 }, { "epoch": 0.7, "grad_norm": 0.4600284695625305, "learning_rate": 0.0005231035353991161, "loss": 1.7553, "step": 21017 }, { "epoch": 0.7, "grad_norm": 0.43467164039611816, "learning_rate": 0.0005230965476320996, "loss": 1.8717, "step": 21018 }, { "epoch": 0.7, "grad_norm": 0.432849645614624, "learning_rate": 0.000523089559594277, "loss": 1.9194, "step": 21019 }, { "epoch": 0.7, "grad_norm": 0.44919353723526, "learning_rate": 0.0005230825712856564, "loss": 1.8862, "step": 21020 }, { "epoch": 0.7, "grad_norm": 0.42281827330589294, "learning_rate": 0.0005230755827062463, "loss": 1.8163, "step": 21021 }, { "epoch": 0.7, "grad_norm": 0.42275771498680115, "learning_rate": 0.0005230685938560555, "loss": 1.8399, "step": 21022 }, { "epoch": 0.7, "grad_norm": 0.4307354688644409, "learning_rate": 0.0005230616047350922, "loss": 1.8787, "step": 21023 }, { "epoch": 0.7, "grad_norm": 0.44228318333625793, "learning_rate": 0.000523054615343365, "loss": 1.8014, "step": 21024 }, { "epoch": 0.7, "grad_norm": 0.43331119418144226, "learning_rate": 0.0005230476256808824, "loss": 1.7391, "step": 21025 }, { "epoch": 0.7, "grad_norm": 0.43937912583351135, "learning_rate": 0.0005230406357476529, "loss": 1.8491, "step": 21026 }, { "epoch": 0.7, "grad_norm": 0.428132563829422, "learning_rate": 0.0005230336455436848, "loss": 1.8422, "step": 21027 }, { "epoch": 0.7, "grad_norm": 0.4328852891921997, "learning_rate": 0.0005230266550689867, "loss": 1.9016, "step": 21028 }, { "epoch": 0.7, "grad_norm": 0.4241923391819, "learning_rate": 0.0005230196643235672, "loss": 1.7757, "step": 21029 }, { "epoch": 0.7, "grad_norm": 0.43883055448532104, "learning_rate": 0.0005230126733074347, "loss": 1.8286, "step": 21030 }, { "epoch": 0.7, "grad_norm": 0.4407213628292084, "learning_rate": 0.0005230056820205977, "loss": 1.7518, "step": 21031 }, { "epoch": 0.7, "grad_norm": 0.4223296642303467, "learning_rate": 0.0005229986904630644, "loss": 1.836, "step": 21032 }, { "epoch": 0.7, "grad_norm": 0.45683029294013977, "learning_rate": 0.0005229916986348439, "loss": 1.8606, "step": 21033 }, { "epoch": 0.7, "grad_norm": 0.4611934721469879, "learning_rate": 0.0005229847065359441, "loss": 1.8514, "step": 21034 }, { "epoch": 0.7, "grad_norm": 0.4171944260597229, "learning_rate": 0.0005229777141663738, "loss": 1.8332, "step": 21035 }, { "epoch": 0.7, "grad_norm": 0.4422403872013092, "learning_rate": 0.0005229707215261413, "loss": 1.8658, "step": 21036 }, { "epoch": 0.7, "grad_norm": 0.43102511763572693, "learning_rate": 0.0005229637286152554, "loss": 1.8458, "step": 21037 }, { "epoch": 0.7, "grad_norm": 0.44886714220046997, "learning_rate": 0.0005229567354337242, "loss": 1.7731, "step": 21038 }, { "epoch": 0.7, "grad_norm": 0.4309975504875183, "learning_rate": 0.0005229497419815565, "loss": 1.8002, "step": 21039 }, { "epoch": 0.7, "grad_norm": 0.429439514875412, "learning_rate": 0.0005229427482587606, "loss": 1.9034, "step": 21040 }, { "epoch": 0.7, "grad_norm": 0.43144601583480835, "learning_rate": 0.0005229357542653451, "loss": 1.8225, "step": 21041 }, { "epoch": 0.7, "grad_norm": 0.4334797263145447, "learning_rate": 0.0005229287600013184, "loss": 1.8706, "step": 21042 }, { "epoch": 0.7, "grad_norm": 0.45339271426200867, "learning_rate": 0.000522921765466689, "loss": 1.8469, "step": 21043 }, { "epoch": 0.7, "grad_norm": 0.4467748999595642, "learning_rate": 0.0005229147706614655, "loss": 1.7829, "step": 21044 }, { "epoch": 0.7, "grad_norm": 0.4475078284740448, "learning_rate": 0.0005229077755856563, "loss": 1.7867, "step": 21045 }, { "epoch": 0.7, "grad_norm": 0.42906519770622253, "learning_rate": 0.0005229007802392699, "loss": 1.8106, "step": 21046 }, { "epoch": 0.7, "grad_norm": 0.42301902174949646, "learning_rate": 0.0005228937846223148, "loss": 1.778, "step": 21047 }, { "epoch": 0.7, "grad_norm": 0.4338705837726593, "learning_rate": 0.0005228867887347994, "loss": 1.9022, "step": 21048 }, { "epoch": 0.7, "grad_norm": 0.4354555904865265, "learning_rate": 0.0005228797925767324, "loss": 1.7961, "step": 21049 }, { "epoch": 0.7, "grad_norm": 0.4462803304195404, "learning_rate": 0.0005228727961481221, "loss": 1.8631, "step": 21050 }, { "epoch": 0.7, "grad_norm": 0.4342404901981354, "learning_rate": 0.0005228657994489771, "loss": 1.8122, "step": 21051 }, { "epoch": 0.7, "grad_norm": 0.44866159558296204, "learning_rate": 0.0005228588024793058, "loss": 1.8349, "step": 21052 }, { "epoch": 0.7, "grad_norm": 0.4343024492263794, "learning_rate": 0.0005228518052391169, "loss": 1.8674, "step": 21053 }, { "epoch": 0.7, "grad_norm": 0.43155714869499207, "learning_rate": 0.0005228448077284186, "loss": 1.8989, "step": 21054 }, { "epoch": 0.7, "grad_norm": 0.43528494238853455, "learning_rate": 0.0005228378099472196, "loss": 1.8551, "step": 21055 }, { "epoch": 0.7, "grad_norm": 0.4214203357696533, "learning_rate": 0.0005228308118955283, "loss": 1.7097, "step": 21056 }, { "epoch": 0.7, "grad_norm": 0.42380329966545105, "learning_rate": 0.0005228238135733532, "loss": 1.8402, "step": 21057 }, { "epoch": 0.7, "grad_norm": 0.4267920255661011, "learning_rate": 0.0005228168149807028, "loss": 1.8394, "step": 21058 }, { "epoch": 0.7, "grad_norm": 0.4327879548072815, "learning_rate": 0.0005228098161175858, "loss": 1.8737, "step": 21059 }, { "epoch": 0.7, "grad_norm": 0.435737669467926, "learning_rate": 0.0005228028169840103, "loss": 1.8604, "step": 21060 }, { "epoch": 0.7, "grad_norm": 0.4433765709400177, "learning_rate": 0.0005227958175799851, "loss": 1.8354, "step": 21061 }, { "epoch": 0.7, "grad_norm": 0.41526004672050476, "learning_rate": 0.0005227888179055186, "loss": 1.7608, "step": 21062 }, { "epoch": 0.7, "grad_norm": 0.43563154339790344, "learning_rate": 0.0005227818179606193, "loss": 1.8078, "step": 21063 }, { "epoch": 0.7, "grad_norm": 0.42470815777778625, "learning_rate": 0.0005227748177452957, "loss": 1.8488, "step": 21064 }, { "epoch": 0.7, "grad_norm": 0.6954278349876404, "learning_rate": 0.0005227678172595563, "loss": 1.7429, "step": 21065 }, { "epoch": 0.7, "grad_norm": 0.4323836863040924, "learning_rate": 0.0005227608165034096, "loss": 1.8636, "step": 21066 }, { "epoch": 0.7, "grad_norm": 0.4551701843738556, "learning_rate": 0.000522753815476864, "loss": 1.8191, "step": 21067 }, { "epoch": 0.7, "grad_norm": 0.43223968148231506, "learning_rate": 0.0005227468141799283, "loss": 1.7991, "step": 21068 }, { "epoch": 0.7, "grad_norm": 0.46164730191230774, "learning_rate": 0.0005227398126126106, "loss": 1.9407, "step": 21069 }, { "epoch": 0.7, "grad_norm": 0.4159463047981262, "learning_rate": 0.0005227328107749196, "loss": 1.8829, "step": 21070 }, { "epoch": 0.7, "grad_norm": 0.44438520073890686, "learning_rate": 0.0005227258086668639, "loss": 1.7726, "step": 21071 }, { "epoch": 0.7, "grad_norm": 0.43280044198036194, "learning_rate": 0.0005227188062884518, "loss": 1.8489, "step": 21072 }, { "epoch": 0.7, "grad_norm": 0.4523535668849945, "learning_rate": 0.0005227118036396918, "loss": 1.8926, "step": 21073 }, { "epoch": 0.7, "grad_norm": 0.4385790228843689, "learning_rate": 0.0005227048007205926, "loss": 1.8282, "step": 21074 }, { "epoch": 0.7, "grad_norm": 0.42271092534065247, "learning_rate": 0.0005226977975311627, "loss": 1.7952, "step": 21075 }, { "epoch": 0.7, "grad_norm": 0.4281412363052368, "learning_rate": 0.0005226907940714103, "loss": 1.8292, "step": 21076 }, { "epoch": 0.7, "grad_norm": 0.4277830421924591, "learning_rate": 0.0005226837903413442, "loss": 1.8641, "step": 21077 }, { "epoch": 0.7, "grad_norm": 0.4476010799407959, "learning_rate": 0.0005226767863409727, "loss": 1.8501, "step": 21078 }, { "epoch": 0.7, "grad_norm": 0.4514416754245758, "learning_rate": 0.0005226697820703044, "loss": 1.8383, "step": 21079 }, { "epoch": 0.7, "grad_norm": 0.43597540259361267, "learning_rate": 0.0005226627775293479, "loss": 1.8652, "step": 21080 }, { "epoch": 0.7, "grad_norm": 0.43873268365859985, "learning_rate": 0.0005226557727181115, "loss": 1.9422, "step": 21081 }, { "epoch": 0.7, "grad_norm": 0.4570566415786743, "learning_rate": 0.0005226487676366037, "loss": 1.7762, "step": 21082 }, { "epoch": 0.7, "grad_norm": 0.44322243332862854, "learning_rate": 0.0005226417622848333, "loss": 1.8134, "step": 21083 }, { "epoch": 0.7, "grad_norm": 0.42365872859954834, "learning_rate": 0.0005226347566628085, "loss": 1.8402, "step": 21084 }, { "epoch": 0.7, "grad_norm": 0.4597077965736389, "learning_rate": 0.000522627750770538, "loss": 1.8219, "step": 21085 }, { "epoch": 0.7, "grad_norm": 0.4358597993850708, "learning_rate": 0.0005226207446080301, "loss": 1.8349, "step": 21086 }, { "epoch": 0.7, "grad_norm": 0.4555552899837494, "learning_rate": 0.0005226137381752935, "loss": 1.8595, "step": 21087 }, { "epoch": 0.7, "grad_norm": 0.4303693473339081, "learning_rate": 0.0005226067314723366, "loss": 1.8183, "step": 21088 }, { "epoch": 0.7, "grad_norm": 0.434574156999588, "learning_rate": 0.0005225997244991678, "loss": 1.8224, "step": 21089 }, { "epoch": 0.7, "grad_norm": 0.4288797378540039, "learning_rate": 0.0005225927172557959, "loss": 1.885, "step": 21090 }, { "epoch": 0.7, "grad_norm": 0.4247240126132965, "learning_rate": 0.0005225857097422292, "loss": 1.7849, "step": 21091 }, { "epoch": 0.7, "grad_norm": 0.4214761257171631, "learning_rate": 0.0005225787019584763, "loss": 1.7538, "step": 21092 }, { "epoch": 0.7, "grad_norm": 0.4211898148059845, "learning_rate": 0.0005225716939045455, "loss": 1.8628, "step": 21093 }, { "epoch": 0.7, "grad_norm": 0.41697537899017334, "learning_rate": 0.0005225646855804455, "loss": 1.8802, "step": 21094 }, { "epoch": 0.7, "grad_norm": 0.4628288447856903, "learning_rate": 0.0005225576769861848, "loss": 1.7961, "step": 21095 }, { "epoch": 0.7, "grad_norm": 0.42987221479415894, "learning_rate": 0.000522550668121772, "loss": 1.8304, "step": 21096 }, { "epoch": 0.7, "grad_norm": 0.43677595257759094, "learning_rate": 0.0005225436589872153, "loss": 1.7709, "step": 21097 }, { "epoch": 0.7, "grad_norm": 0.45339593291282654, "learning_rate": 0.0005225366495825234, "loss": 1.7605, "step": 21098 }, { "epoch": 0.7, "grad_norm": 0.4383971393108368, "learning_rate": 0.0005225296399077048, "loss": 1.7884, "step": 21099 }, { "epoch": 0.7, "grad_norm": 0.43537241220474243, "learning_rate": 0.000522522629962768, "loss": 1.7936, "step": 21100 }, { "epoch": 0.7, "grad_norm": 0.4420267939567566, "learning_rate": 0.0005225156197477217, "loss": 1.8547, "step": 21101 }, { "epoch": 0.7, "grad_norm": 0.4381304383277893, "learning_rate": 0.000522508609262574, "loss": 1.793, "step": 21102 }, { "epoch": 0.7, "grad_norm": 0.4392670691013336, "learning_rate": 0.0005225015985073338, "loss": 1.8587, "step": 21103 }, { "epoch": 0.7, "grad_norm": 0.43069130182266235, "learning_rate": 0.0005224945874820094, "loss": 1.8037, "step": 21104 }, { "epoch": 0.7, "grad_norm": 0.4324231445789337, "learning_rate": 0.0005224875761866092, "loss": 1.8397, "step": 21105 }, { "epoch": 0.7, "grad_norm": 0.430867075920105, "learning_rate": 0.000522480564621142, "loss": 1.8114, "step": 21106 }, { "epoch": 0.7, "grad_norm": 0.444636732339859, "learning_rate": 0.0005224735527856162, "loss": 1.7805, "step": 21107 }, { "epoch": 0.7, "grad_norm": 0.45549505949020386, "learning_rate": 0.0005224665406800402, "loss": 1.88, "step": 21108 }, { "epoch": 0.7, "grad_norm": 0.4383436441421509, "learning_rate": 0.0005224595283044227, "loss": 1.7959, "step": 21109 }, { "epoch": 0.7, "grad_norm": 0.44986969232559204, "learning_rate": 0.000522452515658772, "loss": 1.8494, "step": 21110 }, { "epoch": 0.7, "grad_norm": 0.43307772278785706, "learning_rate": 0.0005224455027430968, "loss": 1.8141, "step": 21111 }, { "epoch": 0.7, "grad_norm": 0.43383869528770447, "learning_rate": 0.0005224384895574055, "loss": 1.8605, "step": 21112 }, { "epoch": 0.7, "grad_norm": 0.43419164419174194, "learning_rate": 0.0005224314761017066, "loss": 1.8332, "step": 21113 }, { "epoch": 0.7, "grad_norm": 0.4339519143104553, "learning_rate": 0.0005224244623760087, "loss": 1.8211, "step": 21114 }, { "epoch": 0.7, "grad_norm": 0.4357932209968567, "learning_rate": 0.0005224174483803202, "loss": 1.7478, "step": 21115 }, { "epoch": 0.7, "grad_norm": 0.42638638615608215, "learning_rate": 0.0005224104341146499, "loss": 1.8292, "step": 21116 }, { "epoch": 0.7, "grad_norm": 0.43040409684181213, "learning_rate": 0.000522403419579006, "loss": 1.8682, "step": 21117 }, { "epoch": 0.7, "grad_norm": 0.44112032651901245, "learning_rate": 0.0005223964047733972, "loss": 1.816, "step": 21118 }, { "epoch": 0.7, "grad_norm": 0.43808281421661377, "learning_rate": 0.0005223893896978318, "loss": 1.7669, "step": 21119 }, { "epoch": 0.7, "grad_norm": 0.4416479468345642, "learning_rate": 0.0005223823743523185, "loss": 1.8158, "step": 21120 }, { "epoch": 0.7, "grad_norm": 0.4810755252838135, "learning_rate": 0.0005223753587368658, "loss": 1.9233, "step": 21121 }, { "epoch": 0.7, "grad_norm": 0.4504704475402832, "learning_rate": 0.0005223683428514821, "loss": 1.851, "step": 21122 }, { "epoch": 0.7, "grad_norm": 0.4237769544124603, "learning_rate": 0.0005223613266961759, "loss": 1.8353, "step": 21123 }, { "epoch": 0.7, "grad_norm": 0.4436888098716736, "learning_rate": 0.0005223543102709561, "loss": 1.9259, "step": 21124 }, { "epoch": 0.7, "grad_norm": 0.4508984684944153, "learning_rate": 0.0005223472935758307, "loss": 1.8412, "step": 21125 }, { "epoch": 0.7, "grad_norm": 0.47759661078453064, "learning_rate": 0.0005223402766108085, "loss": 1.8716, "step": 21126 }, { "epoch": 0.7, "grad_norm": 0.4387320578098297, "learning_rate": 0.0005223332593758981, "loss": 1.9203, "step": 21127 }, { "epoch": 0.7, "grad_norm": 0.4306657016277313, "learning_rate": 0.0005223262418711077, "loss": 1.8341, "step": 21128 }, { "epoch": 0.7, "grad_norm": 0.42727550864219666, "learning_rate": 0.0005223192240964461, "loss": 1.868, "step": 21129 }, { "epoch": 0.7, "grad_norm": 0.4875178337097168, "learning_rate": 0.0005223122060519215, "loss": 1.8544, "step": 21130 }, { "epoch": 0.7, "grad_norm": 0.4559398889541626, "learning_rate": 0.0005223051877375429, "loss": 1.8281, "step": 21131 }, { "epoch": 0.7, "grad_norm": 0.4360824525356293, "learning_rate": 0.0005222981691533186, "loss": 1.8725, "step": 21132 }, { "epoch": 0.7, "grad_norm": 0.4287302494049072, "learning_rate": 0.0005222911502992568, "loss": 1.9261, "step": 21133 }, { "epoch": 0.7, "grad_norm": 0.4531969726085663, "learning_rate": 0.0005222841311753665, "loss": 1.8207, "step": 21134 }, { "epoch": 0.7, "grad_norm": 0.4458872973918915, "learning_rate": 0.000522277111781656, "loss": 1.8471, "step": 21135 }, { "epoch": 0.7, "grad_norm": 0.4428602159023285, "learning_rate": 0.0005222700921181338, "loss": 1.8863, "step": 21136 }, { "epoch": 0.7, "grad_norm": 0.4130476117134094, "learning_rate": 0.0005222630721848084, "loss": 1.7641, "step": 21137 }, { "epoch": 0.7, "grad_norm": 0.4291084408760071, "learning_rate": 0.0005222560519816885, "loss": 1.8463, "step": 21138 }, { "epoch": 0.7, "grad_norm": 0.43095171451568604, "learning_rate": 0.0005222490315087825, "loss": 1.806, "step": 21139 }, { "epoch": 0.7, "grad_norm": 0.4519530236721039, "learning_rate": 0.0005222420107660989, "loss": 1.8035, "step": 21140 }, { "epoch": 0.7, "grad_norm": 0.4228016138076782, "learning_rate": 0.0005222349897536461, "loss": 1.8315, "step": 21141 }, { "epoch": 0.7, "grad_norm": 1.471983551979065, "learning_rate": 0.0005222279684714329, "loss": 1.8299, "step": 21142 }, { "epoch": 0.7, "grad_norm": 0.45140692591667175, "learning_rate": 0.0005222209469194676, "loss": 1.7741, "step": 21143 }, { "epoch": 0.7, "grad_norm": 0.47398850321769714, "learning_rate": 0.0005222139250977589, "loss": 1.8771, "step": 21144 }, { "epoch": 0.7, "grad_norm": 0.44430050253868103, "learning_rate": 0.0005222069030063152, "loss": 1.912, "step": 21145 }, { "epoch": 0.7, "grad_norm": 0.449444055557251, "learning_rate": 0.0005221998806451452, "loss": 1.8708, "step": 21146 }, { "epoch": 0.7, "grad_norm": 0.4664631485939026, "learning_rate": 0.0005221928580142571, "loss": 1.8579, "step": 21147 }, { "epoch": 0.7, "grad_norm": 0.422035813331604, "learning_rate": 0.0005221858351136597, "loss": 1.8825, "step": 21148 }, { "epoch": 0.7, "grad_norm": 0.43659207224845886, "learning_rate": 0.0005221788119433614, "loss": 1.85, "step": 21149 }, { "epoch": 0.7, "grad_norm": 0.4334861636161804, "learning_rate": 0.0005221717885033708, "loss": 1.8521, "step": 21150 }, { "epoch": 0.7, "grad_norm": 0.4382447898387909, "learning_rate": 0.0005221647647936962, "loss": 1.8478, "step": 21151 }, { "epoch": 0.7, "grad_norm": 0.4375614821910858, "learning_rate": 0.0005221577408143466, "loss": 1.7908, "step": 21152 }, { "epoch": 0.7, "grad_norm": 0.44129621982574463, "learning_rate": 0.0005221507165653301, "loss": 1.8306, "step": 21153 }, { "epoch": 0.7, "grad_norm": 0.4339844882488251, "learning_rate": 0.0005221436920466553, "loss": 1.764, "step": 21154 }, { "epoch": 0.7, "grad_norm": 0.42398104071617126, "learning_rate": 0.0005221366672583308, "loss": 1.8481, "step": 21155 }, { "epoch": 0.7, "grad_norm": 0.41870346665382385, "learning_rate": 0.0005221296422003652, "loss": 1.826, "step": 21156 }, { "epoch": 0.7, "grad_norm": 0.449174165725708, "learning_rate": 0.0005221226168727669, "loss": 1.9357, "step": 21157 }, { "epoch": 0.7, "grad_norm": 0.43329891562461853, "learning_rate": 0.0005221155912755444, "loss": 1.8671, "step": 21158 }, { "epoch": 0.7, "grad_norm": 0.46009400486946106, "learning_rate": 0.0005221085654087065, "loss": 1.8193, "step": 21159 }, { "epoch": 0.7, "grad_norm": 0.4212043583393097, "learning_rate": 0.0005221015392722614, "loss": 1.7924, "step": 21160 }, { "epoch": 0.7, "grad_norm": 1.1150983572006226, "learning_rate": 0.0005220945128662178, "loss": 1.8713, "step": 21161 }, { "epoch": 0.7, "grad_norm": 0.42696306109428406, "learning_rate": 0.0005220874861905841, "loss": 1.9055, "step": 21162 }, { "epoch": 0.7, "grad_norm": 0.4273970127105713, "learning_rate": 0.0005220804592453689, "loss": 1.8355, "step": 21163 }, { "epoch": 0.7, "grad_norm": 0.44392526149749756, "learning_rate": 0.0005220734320305809, "loss": 1.8809, "step": 21164 }, { "epoch": 0.7, "grad_norm": 0.4515552520751953, "learning_rate": 0.0005220664045462284, "loss": 1.9046, "step": 21165 }, { "epoch": 0.7, "grad_norm": 0.43362829089164734, "learning_rate": 0.00052205937679232, "loss": 1.8156, "step": 21166 }, { "epoch": 0.7, "grad_norm": 0.43622562289237976, "learning_rate": 0.0005220523487688641, "loss": 1.8657, "step": 21167 }, { "epoch": 0.7, "grad_norm": 0.4314477741718292, "learning_rate": 0.0005220453204758695, "loss": 1.8145, "step": 21168 }, { "epoch": 0.7, "grad_norm": 0.43770700693130493, "learning_rate": 0.0005220382919133447, "loss": 1.8913, "step": 21169 }, { "epoch": 0.7, "grad_norm": 0.426920086145401, "learning_rate": 0.0005220312630812979, "loss": 1.8888, "step": 21170 }, { "epoch": 0.7, "grad_norm": 0.4430129826068878, "learning_rate": 0.000522024233979738, "loss": 1.8173, "step": 21171 }, { "epoch": 0.7, "grad_norm": 0.43022486567497253, "learning_rate": 0.0005220172046086735, "loss": 1.8567, "step": 21172 }, { "epoch": 0.7, "grad_norm": 0.43761974573135376, "learning_rate": 0.0005220101749681126, "loss": 1.8328, "step": 21173 }, { "epoch": 0.7, "grad_norm": 0.43280738592147827, "learning_rate": 0.0005220031450580641, "loss": 1.8321, "step": 21174 }, { "epoch": 0.7, "grad_norm": 0.42319655418395996, "learning_rate": 0.0005219961148785366, "loss": 1.7892, "step": 21175 }, { "epoch": 0.7, "grad_norm": 0.440804123878479, "learning_rate": 0.0005219890844295385, "loss": 1.8918, "step": 21176 }, { "epoch": 0.7, "grad_norm": 0.4278748333454132, "learning_rate": 0.0005219820537110783, "loss": 1.8596, "step": 21177 }, { "epoch": 0.7, "grad_norm": 0.435960590839386, "learning_rate": 0.0005219750227231646, "loss": 1.8173, "step": 21178 }, { "epoch": 0.7, "grad_norm": 0.4299274981021881, "learning_rate": 0.0005219679914658061, "loss": 1.8805, "step": 21179 }, { "epoch": 0.7, "grad_norm": 0.4272708296775818, "learning_rate": 0.000521960959939011, "loss": 1.855, "step": 21180 }, { "epoch": 0.7, "grad_norm": 0.43853625655174255, "learning_rate": 0.0005219539281427879, "loss": 1.764, "step": 21181 }, { "epoch": 0.7, "grad_norm": 0.4395977258682251, "learning_rate": 0.0005219468960771457, "loss": 1.8495, "step": 21182 }, { "epoch": 0.7, "grad_norm": 0.4282273054122925, "learning_rate": 0.0005219398637420924, "loss": 1.8932, "step": 21183 }, { "epoch": 0.7, "grad_norm": 0.4496726095676422, "learning_rate": 0.0005219328311376369, "loss": 1.8796, "step": 21184 }, { "epoch": 0.7, "grad_norm": 0.42759764194488525, "learning_rate": 0.0005219257982637878, "loss": 1.8978, "step": 21185 }, { "epoch": 0.7, "grad_norm": 0.42504119873046875, "learning_rate": 0.0005219187651205533, "loss": 1.8833, "step": 21186 }, { "epoch": 0.7, "grad_norm": 0.42466023564338684, "learning_rate": 0.0005219117317079422, "loss": 1.7998, "step": 21187 }, { "epoch": 0.7, "grad_norm": 0.5489749908447266, "learning_rate": 0.000521904698025963, "loss": 1.8716, "step": 21188 }, { "epoch": 0.7, "grad_norm": 0.41473543643951416, "learning_rate": 0.0005218976640746241, "loss": 1.7551, "step": 21189 }, { "epoch": 0.7, "grad_norm": 0.4214373826980591, "learning_rate": 0.000521890629853934, "loss": 1.8899, "step": 21190 }, { "epoch": 0.71, "grad_norm": 0.43166643381118774, "learning_rate": 0.0005218835953639015, "loss": 1.8372, "step": 21191 }, { "epoch": 0.71, "grad_norm": 0.4265960454940796, "learning_rate": 0.0005218765606045351, "loss": 1.8463, "step": 21192 }, { "epoch": 0.71, "grad_norm": 0.42583560943603516, "learning_rate": 0.0005218695255758432, "loss": 1.8378, "step": 21193 }, { "epoch": 0.71, "grad_norm": 0.4346538782119751, "learning_rate": 0.0005218624902778344, "loss": 1.8155, "step": 21194 }, { "epoch": 0.71, "grad_norm": 0.42571309208869934, "learning_rate": 0.0005218554547105172, "loss": 1.7944, "step": 21195 }, { "epoch": 0.71, "grad_norm": 0.4325740337371826, "learning_rate": 0.0005218484188739001, "loss": 1.8552, "step": 21196 }, { "epoch": 0.71, "grad_norm": 0.4442281126976013, "learning_rate": 0.0005218413827679918, "loss": 1.8252, "step": 21197 }, { "epoch": 0.71, "grad_norm": 0.44698116183280945, "learning_rate": 0.0005218343463928007, "loss": 1.8563, "step": 21198 }, { "epoch": 0.71, "grad_norm": 0.4206990599632263, "learning_rate": 0.0005218273097483354, "loss": 1.8295, "step": 21199 }, { "epoch": 0.71, "grad_norm": 0.4307212233543396, "learning_rate": 0.0005218202728346044, "loss": 1.8538, "step": 21200 }, { "epoch": 0.71, "grad_norm": 0.42706623673439026, "learning_rate": 0.0005218132356516164, "loss": 1.8324, "step": 21201 }, { "epoch": 0.71, "grad_norm": 0.4262615740299225, "learning_rate": 0.0005218061981993796, "loss": 1.8132, "step": 21202 }, { "epoch": 0.71, "grad_norm": 0.4301571846008301, "learning_rate": 0.000521799160477903, "loss": 1.9268, "step": 21203 }, { "epoch": 0.71, "grad_norm": 0.4361459016799927, "learning_rate": 0.0005217921224871947, "loss": 1.8146, "step": 21204 }, { "epoch": 0.71, "grad_norm": 0.438353955745697, "learning_rate": 0.0005217850842272635, "loss": 1.8131, "step": 21205 }, { "epoch": 0.71, "grad_norm": 0.42093613743782043, "learning_rate": 0.0005217780456981179, "loss": 1.7607, "step": 21206 }, { "epoch": 0.71, "grad_norm": 0.4260038733482361, "learning_rate": 0.0005217710068997664, "loss": 1.777, "step": 21207 }, { "epoch": 0.71, "grad_norm": 0.43573635816574097, "learning_rate": 0.0005217639678322176, "loss": 1.7903, "step": 21208 }, { "epoch": 0.71, "grad_norm": 0.429385244846344, "learning_rate": 0.0005217569284954801, "loss": 1.8012, "step": 21209 }, { "epoch": 0.71, "grad_norm": 0.4475518465042114, "learning_rate": 0.0005217498888895623, "loss": 1.8837, "step": 21210 }, { "epoch": 0.71, "grad_norm": 0.4220882058143616, "learning_rate": 0.0005217428490144728, "loss": 1.7852, "step": 21211 }, { "epoch": 0.71, "grad_norm": 0.4255184531211853, "learning_rate": 0.0005217358088702201, "loss": 1.83, "step": 21212 }, { "epoch": 0.71, "grad_norm": 0.42181411385536194, "learning_rate": 0.000521728768456813, "loss": 1.8333, "step": 21213 }, { "epoch": 0.71, "grad_norm": 0.4584621489048004, "learning_rate": 0.0005217217277742595, "loss": 1.8617, "step": 21214 }, { "epoch": 0.71, "grad_norm": 0.4535532593727112, "learning_rate": 0.0005217146868225688, "loss": 1.7737, "step": 21215 }, { "epoch": 0.71, "grad_norm": 0.4426797330379486, "learning_rate": 0.000521707645601749, "loss": 1.8734, "step": 21216 }, { "epoch": 0.71, "grad_norm": 0.417672336101532, "learning_rate": 0.0005217006041118088, "loss": 1.7951, "step": 21217 }, { "epoch": 0.71, "grad_norm": 0.4348212480545044, "learning_rate": 0.0005216935623527567, "loss": 1.8689, "step": 21218 }, { "epoch": 0.71, "grad_norm": 0.4326713979244232, "learning_rate": 0.0005216865203246014, "loss": 1.8724, "step": 21219 }, { "epoch": 0.71, "grad_norm": 0.45296043157577515, "learning_rate": 0.0005216794780273513, "loss": 1.7934, "step": 21220 }, { "epoch": 0.71, "grad_norm": 0.42130622267723083, "learning_rate": 0.0005216724354610148, "loss": 1.8337, "step": 21221 }, { "epoch": 0.71, "grad_norm": 0.44172587990760803, "learning_rate": 0.0005216653926256008, "loss": 1.8343, "step": 21222 }, { "epoch": 0.71, "grad_norm": 0.44361600279808044, "learning_rate": 0.0005216583495211177, "loss": 1.8383, "step": 21223 }, { "epoch": 0.71, "grad_norm": 0.4439113438129425, "learning_rate": 0.0005216513061475739, "loss": 1.8214, "step": 21224 }, { "epoch": 0.71, "grad_norm": 0.44024455547332764, "learning_rate": 0.0005216442625049781, "loss": 1.8835, "step": 21225 }, { "epoch": 0.71, "grad_norm": 0.44172006845474243, "learning_rate": 0.0005216372185933389, "loss": 1.8039, "step": 21226 }, { "epoch": 0.71, "grad_norm": 0.4282498359680176, "learning_rate": 0.0005216301744126647, "loss": 1.7703, "step": 21227 }, { "epoch": 0.71, "grad_norm": 0.4356370270252228, "learning_rate": 0.0005216231299629642, "loss": 1.814, "step": 21228 }, { "epoch": 0.71, "grad_norm": 0.4380134642124176, "learning_rate": 0.0005216160852442458, "loss": 1.7913, "step": 21229 }, { "epoch": 0.71, "grad_norm": 0.41952553391456604, "learning_rate": 0.0005216090402565181, "loss": 1.7825, "step": 21230 }, { "epoch": 0.71, "grad_norm": 0.43808677792549133, "learning_rate": 0.0005216019949997897, "loss": 1.8897, "step": 21231 }, { "epoch": 0.71, "grad_norm": 0.4263239800930023, "learning_rate": 0.0005215949494740693, "loss": 1.8244, "step": 21232 }, { "epoch": 0.71, "grad_norm": 0.43712517619132996, "learning_rate": 0.0005215879036793651, "loss": 1.7651, "step": 21233 }, { "epoch": 0.71, "grad_norm": 0.4092442989349365, "learning_rate": 0.0005215808576156858, "loss": 1.8087, "step": 21234 }, { "epoch": 0.71, "grad_norm": 0.4553133249282837, "learning_rate": 0.0005215738112830401, "loss": 1.8956, "step": 21235 }, { "epoch": 0.71, "grad_norm": 0.42086732387542725, "learning_rate": 0.0005215667646814364, "loss": 1.8433, "step": 21236 }, { "epoch": 0.71, "grad_norm": 0.43961137533187866, "learning_rate": 0.0005215597178108833, "loss": 1.9149, "step": 21237 }, { "epoch": 0.71, "grad_norm": 0.42943820357322693, "learning_rate": 0.0005215526706713894, "loss": 1.8966, "step": 21238 }, { "epoch": 0.71, "grad_norm": 0.4371103048324585, "learning_rate": 0.000521545623262963, "loss": 1.8525, "step": 21239 }, { "epoch": 0.71, "grad_norm": 0.4273782968521118, "learning_rate": 0.0005215385755856131, "loss": 1.7461, "step": 21240 }, { "epoch": 0.71, "grad_norm": 0.42601707577705383, "learning_rate": 0.0005215315276393479, "loss": 1.8523, "step": 21241 }, { "epoch": 0.71, "grad_norm": 0.43302327394485474, "learning_rate": 0.000521524479424176, "loss": 1.8449, "step": 21242 }, { "epoch": 0.71, "grad_norm": 0.41418057680130005, "learning_rate": 0.0005215174309401062, "loss": 1.7747, "step": 21243 }, { "epoch": 0.71, "grad_norm": 0.4185602366924286, "learning_rate": 0.0005215103821871467, "loss": 1.8067, "step": 21244 }, { "epoch": 0.71, "grad_norm": 0.43710610270500183, "learning_rate": 0.0005215033331653064, "loss": 1.8676, "step": 21245 }, { "epoch": 0.71, "grad_norm": 0.42215076088905334, "learning_rate": 0.0005214962838745935, "loss": 1.8755, "step": 21246 }, { "epoch": 0.71, "grad_norm": 0.440894216299057, "learning_rate": 0.0005214892343150168, "loss": 1.8564, "step": 21247 }, { "epoch": 0.71, "grad_norm": 0.41662362217903137, "learning_rate": 0.000521482184486585, "loss": 1.8386, "step": 21248 }, { "epoch": 0.71, "grad_norm": 0.4360628128051758, "learning_rate": 0.0005214751343893063, "loss": 1.7671, "step": 21249 }, { "epoch": 0.71, "grad_norm": 0.44479724764823914, "learning_rate": 0.0005214680840231894, "loss": 1.8257, "step": 21250 }, { "epoch": 0.71, "grad_norm": 0.4172266125679016, "learning_rate": 0.0005214610333882429, "loss": 1.7711, "step": 21251 }, { "epoch": 0.71, "grad_norm": 0.41898027062416077, "learning_rate": 0.0005214539824844755, "loss": 1.8245, "step": 21252 }, { "epoch": 0.71, "grad_norm": 0.41950348019599915, "learning_rate": 0.0005214469313118953, "loss": 1.8445, "step": 21253 }, { "epoch": 0.71, "grad_norm": 0.45028674602508545, "learning_rate": 0.0005214398798705114, "loss": 1.8532, "step": 21254 }, { "epoch": 0.71, "grad_norm": 0.41939544677734375, "learning_rate": 0.0005214328281603319, "loss": 1.7735, "step": 21255 }, { "epoch": 0.71, "grad_norm": 0.45967036485671997, "learning_rate": 0.0005214257761813657, "loss": 1.9861, "step": 21256 }, { "epoch": 0.71, "grad_norm": 0.438526451587677, "learning_rate": 0.0005214187239336212, "loss": 1.8398, "step": 21257 }, { "epoch": 0.71, "grad_norm": 0.4300481975078583, "learning_rate": 0.0005214116714171071, "loss": 1.8238, "step": 21258 }, { "epoch": 0.71, "grad_norm": 0.44773900508880615, "learning_rate": 0.0005214046186318317, "loss": 1.8538, "step": 21259 }, { "epoch": 0.71, "grad_norm": 0.437459260225296, "learning_rate": 0.0005213975655778038, "loss": 1.8382, "step": 21260 }, { "epoch": 0.71, "grad_norm": 0.4496385455131531, "learning_rate": 0.0005213905122550318, "loss": 1.8475, "step": 21261 }, { "epoch": 0.71, "grad_norm": 0.4270399808883667, "learning_rate": 0.0005213834586635244, "loss": 1.8603, "step": 21262 }, { "epoch": 0.71, "grad_norm": 0.4249850809574127, "learning_rate": 0.0005213764048032901, "loss": 1.8188, "step": 21263 }, { "epoch": 0.71, "grad_norm": 0.43056076765060425, "learning_rate": 0.0005213693506743375, "loss": 1.743, "step": 21264 }, { "epoch": 0.71, "grad_norm": 0.4417617917060852, "learning_rate": 0.0005213622962766751, "loss": 1.8095, "step": 21265 }, { "epoch": 0.71, "grad_norm": 0.44923433661460876, "learning_rate": 0.0005213552416103114, "loss": 1.802, "step": 21266 }, { "epoch": 0.71, "grad_norm": 0.4249516427516937, "learning_rate": 0.0005213481866752552, "loss": 1.8672, "step": 21267 }, { "epoch": 0.71, "grad_norm": 0.4301784038543701, "learning_rate": 0.0005213411314715148, "loss": 1.8246, "step": 21268 }, { "epoch": 0.71, "grad_norm": 0.42129087448120117, "learning_rate": 0.000521334075999099, "loss": 1.8048, "step": 21269 }, { "epoch": 0.71, "grad_norm": 0.4355948269367218, "learning_rate": 0.0005213270202580161, "loss": 1.8884, "step": 21270 }, { "epoch": 0.71, "grad_norm": 0.42800459265708923, "learning_rate": 0.0005213199642482749, "loss": 1.8464, "step": 21271 }, { "epoch": 0.71, "grad_norm": 0.43005603551864624, "learning_rate": 0.0005213129079698839, "loss": 1.7797, "step": 21272 }, { "epoch": 0.71, "grad_norm": 0.43718576431274414, "learning_rate": 0.0005213058514228515, "loss": 1.7724, "step": 21273 }, { "epoch": 0.71, "grad_norm": 0.4493166506290436, "learning_rate": 0.0005212987946071865, "loss": 1.7878, "step": 21274 }, { "epoch": 0.71, "grad_norm": 0.41817769408226013, "learning_rate": 0.0005212917375228974, "loss": 1.8254, "step": 21275 }, { "epoch": 0.71, "grad_norm": 0.43689292669296265, "learning_rate": 0.0005212846801699927, "loss": 1.8524, "step": 21276 }, { "epoch": 0.71, "grad_norm": 0.4382191300392151, "learning_rate": 0.000521277622548481, "loss": 1.7992, "step": 21277 }, { "epoch": 0.71, "grad_norm": 0.4173564314842224, "learning_rate": 0.0005212705646583708, "loss": 1.8257, "step": 21278 }, { "epoch": 0.71, "grad_norm": 0.7039594650268555, "learning_rate": 0.0005212635064996709, "loss": 1.8655, "step": 21279 }, { "epoch": 0.71, "grad_norm": 0.44122985005378723, "learning_rate": 0.0005212564480723895, "loss": 1.8887, "step": 21280 }, { "epoch": 0.71, "grad_norm": 0.41987812519073486, "learning_rate": 0.0005212493893765356, "loss": 1.8128, "step": 21281 }, { "epoch": 0.71, "grad_norm": 0.4328310191631317, "learning_rate": 0.0005212423304121174, "loss": 1.7646, "step": 21282 }, { "epoch": 0.71, "grad_norm": 0.4585478603839874, "learning_rate": 0.0005212352711791436, "loss": 1.8612, "step": 21283 }, { "epoch": 0.71, "grad_norm": 0.44601064920425415, "learning_rate": 0.0005212282116776229, "loss": 1.8026, "step": 21284 }, { "epoch": 0.71, "grad_norm": 0.4465829133987427, "learning_rate": 0.0005212211519075636, "loss": 1.8402, "step": 21285 }, { "epoch": 0.71, "grad_norm": 0.4337792992591858, "learning_rate": 0.0005212140918689745, "loss": 1.8024, "step": 21286 }, { "epoch": 0.71, "grad_norm": 0.45708146691322327, "learning_rate": 0.000521207031561864, "loss": 1.9303, "step": 21287 }, { "epoch": 0.71, "grad_norm": 0.4348754584789276, "learning_rate": 0.0005211999709862408, "loss": 1.8789, "step": 21288 }, { "epoch": 0.71, "grad_norm": 0.42101040482521057, "learning_rate": 0.0005211929101421136, "loss": 1.8325, "step": 21289 }, { "epoch": 0.71, "grad_norm": 0.47593915462493896, "learning_rate": 0.0005211858490294906, "loss": 1.8639, "step": 21290 }, { "epoch": 0.71, "grad_norm": 0.4428612291812897, "learning_rate": 0.0005211787876483806, "loss": 1.8492, "step": 21291 }, { "epoch": 0.71, "grad_norm": 0.4371277987957001, "learning_rate": 0.0005211717259987922, "loss": 1.8521, "step": 21292 }, { "epoch": 0.71, "grad_norm": 0.4243661165237427, "learning_rate": 0.0005211646640807337, "loss": 1.7871, "step": 21293 }, { "epoch": 0.71, "grad_norm": 0.45639923214912415, "learning_rate": 0.0005211576018942141, "loss": 1.8936, "step": 21294 }, { "epoch": 0.71, "grad_norm": 0.4293202757835388, "learning_rate": 0.0005211505394392417, "loss": 1.7878, "step": 21295 }, { "epoch": 0.71, "grad_norm": 0.4232805371284485, "learning_rate": 0.0005211434767158252, "loss": 1.8368, "step": 21296 }, { "epoch": 0.71, "grad_norm": 0.4320579767227173, "learning_rate": 0.000521136413723973, "loss": 1.8611, "step": 21297 }, { "epoch": 0.71, "grad_norm": 0.46501633524894714, "learning_rate": 0.0005211293504636938, "loss": 1.8739, "step": 21298 }, { "epoch": 0.71, "grad_norm": 0.42474663257598877, "learning_rate": 0.0005211222869349963, "loss": 1.884, "step": 21299 }, { "epoch": 0.71, "grad_norm": 0.4286298155784607, "learning_rate": 0.0005211152231378888, "loss": 1.8363, "step": 21300 }, { "epoch": 0.71, "grad_norm": 0.4250563085079193, "learning_rate": 0.00052110815907238, "loss": 1.8302, "step": 21301 }, { "epoch": 0.71, "grad_norm": 0.4398568570613861, "learning_rate": 0.0005211010947384784, "loss": 1.7931, "step": 21302 }, { "epoch": 0.71, "grad_norm": 0.4212889075279236, "learning_rate": 0.0005210940301361926, "loss": 1.834, "step": 21303 }, { "epoch": 0.71, "grad_norm": 0.43157243728637695, "learning_rate": 0.0005210869652655315, "loss": 1.8462, "step": 21304 }, { "epoch": 0.71, "grad_norm": 0.4292054772377014, "learning_rate": 0.0005210799001265032, "loss": 1.9062, "step": 21305 }, { "epoch": 0.71, "grad_norm": 0.43378546833992004, "learning_rate": 0.0005210728347191166, "loss": 1.7607, "step": 21306 }, { "epoch": 0.71, "grad_norm": 0.4284727871417999, "learning_rate": 0.00052106576904338, "loss": 1.7788, "step": 21307 }, { "epoch": 0.71, "grad_norm": 0.43820470571517944, "learning_rate": 0.0005210587030993022, "loss": 1.8881, "step": 21308 }, { "epoch": 0.71, "grad_norm": 0.43605491518974304, "learning_rate": 0.0005210516368868917, "loss": 1.8202, "step": 21309 }, { "epoch": 0.71, "grad_norm": 0.4244251549243927, "learning_rate": 0.0005210445704061571, "loss": 1.8186, "step": 21310 }, { "epoch": 0.71, "grad_norm": 0.4264986515045166, "learning_rate": 0.0005210375036571069, "loss": 1.8262, "step": 21311 }, { "epoch": 0.71, "grad_norm": 0.43424808979034424, "learning_rate": 0.0005210304366397499, "loss": 1.8576, "step": 21312 }, { "epoch": 0.71, "grad_norm": 0.43938419222831726, "learning_rate": 0.0005210233693540944, "loss": 1.8875, "step": 21313 }, { "epoch": 0.71, "grad_norm": 0.4410247206687927, "learning_rate": 0.0005210163018001491, "loss": 1.8084, "step": 21314 }, { "epoch": 0.71, "grad_norm": 0.43537065386772156, "learning_rate": 0.0005210092339779226, "loss": 1.7982, "step": 21315 }, { "epoch": 0.71, "grad_norm": 0.42774781584739685, "learning_rate": 0.0005210021658874234, "loss": 1.8367, "step": 21316 }, { "epoch": 0.71, "grad_norm": 0.4381715953350067, "learning_rate": 0.0005209950975286601, "loss": 1.8417, "step": 21317 }, { "epoch": 0.71, "grad_norm": 0.4320298731327057, "learning_rate": 0.0005209880289016414, "loss": 1.8459, "step": 21318 }, { "epoch": 0.71, "grad_norm": 0.42873793840408325, "learning_rate": 0.0005209809600063757, "loss": 1.8444, "step": 21319 }, { "epoch": 0.71, "grad_norm": 0.42995163798332214, "learning_rate": 0.0005209738908428718, "loss": 1.7772, "step": 21320 }, { "epoch": 0.71, "grad_norm": 0.42881250381469727, "learning_rate": 0.0005209668214111381, "loss": 1.8341, "step": 21321 }, { "epoch": 0.71, "grad_norm": 0.4409390687942505, "learning_rate": 0.0005209597517111833, "loss": 1.8161, "step": 21322 }, { "epoch": 0.71, "grad_norm": 0.4256380796432495, "learning_rate": 0.0005209526817430159, "loss": 1.8076, "step": 21323 }, { "epoch": 0.71, "grad_norm": 0.42087873816490173, "learning_rate": 0.0005209456115066445, "loss": 1.8121, "step": 21324 }, { "epoch": 0.71, "grad_norm": 0.4617651402950287, "learning_rate": 0.0005209385410020775, "loss": 1.8426, "step": 21325 }, { "epoch": 0.71, "grad_norm": 0.42335644364356995, "learning_rate": 0.0005209314702293238, "loss": 1.8764, "step": 21326 }, { "epoch": 0.71, "grad_norm": 0.445252001285553, "learning_rate": 0.0005209243991883918, "loss": 1.8793, "step": 21327 }, { "epoch": 0.71, "grad_norm": 0.7164362072944641, "learning_rate": 0.0005209173278792902, "loss": 1.9184, "step": 21328 }, { "epoch": 0.71, "grad_norm": 0.43465420603752136, "learning_rate": 0.0005209102563020275, "loss": 1.8504, "step": 21329 }, { "epoch": 0.71, "grad_norm": 0.430216521024704, "learning_rate": 0.0005209031844566124, "loss": 1.8213, "step": 21330 }, { "epoch": 0.71, "grad_norm": 0.4440605342388153, "learning_rate": 0.0005208961123430532, "loss": 1.8682, "step": 21331 }, { "epoch": 0.71, "grad_norm": 0.4430389106273651, "learning_rate": 0.0005208890399613587, "loss": 1.8114, "step": 21332 }, { "epoch": 0.71, "grad_norm": 0.43108853697776794, "learning_rate": 0.0005208819673115376, "loss": 1.793, "step": 21333 }, { "epoch": 0.71, "grad_norm": 0.44037777185440063, "learning_rate": 0.0005208748943935982, "loss": 1.9361, "step": 21334 }, { "epoch": 0.71, "grad_norm": 0.4232087731361389, "learning_rate": 0.0005208678212075492, "loss": 1.8397, "step": 21335 }, { "epoch": 0.71, "grad_norm": 0.4246220588684082, "learning_rate": 0.0005208607477533993, "loss": 1.7803, "step": 21336 }, { "epoch": 0.71, "grad_norm": 0.4278148412704468, "learning_rate": 0.000520853674031157, "loss": 1.8252, "step": 21337 }, { "epoch": 0.71, "grad_norm": 0.42941319942474365, "learning_rate": 0.0005208466000408308, "loss": 1.8858, "step": 21338 }, { "epoch": 0.71, "grad_norm": 0.4237855076789856, "learning_rate": 0.0005208395257824295, "loss": 1.8856, "step": 21339 }, { "epoch": 0.71, "grad_norm": 0.43418052792549133, "learning_rate": 0.0005208324512559615, "loss": 1.8171, "step": 21340 }, { "epoch": 0.71, "grad_norm": 0.44410189986228943, "learning_rate": 0.0005208253764614353, "loss": 1.9057, "step": 21341 }, { "epoch": 0.71, "grad_norm": 0.429337739944458, "learning_rate": 0.0005208183013988597, "loss": 1.8746, "step": 21342 }, { "epoch": 0.71, "grad_norm": 0.430098295211792, "learning_rate": 0.0005208112260682433, "loss": 1.8391, "step": 21343 }, { "epoch": 0.71, "grad_norm": 0.43548470735549927, "learning_rate": 0.0005208041504695946, "loss": 1.81, "step": 21344 }, { "epoch": 0.71, "grad_norm": 0.425619900226593, "learning_rate": 0.0005207970746029221, "loss": 1.794, "step": 21345 }, { "epoch": 0.71, "grad_norm": 0.43410131335258484, "learning_rate": 0.0005207899984682345, "loss": 1.9117, "step": 21346 }, { "epoch": 0.71, "grad_norm": 0.4283220171928406, "learning_rate": 0.0005207829220655405, "loss": 1.8189, "step": 21347 }, { "epoch": 0.71, "grad_norm": 0.43426352739334106, "learning_rate": 0.0005207758453948484, "loss": 1.8444, "step": 21348 }, { "epoch": 0.71, "grad_norm": 0.4142709970474243, "learning_rate": 0.0005207687684561671, "loss": 1.7478, "step": 21349 }, { "epoch": 0.71, "grad_norm": 0.43318668007850647, "learning_rate": 0.000520761691249505, "loss": 1.8649, "step": 21350 }, { "epoch": 0.71, "grad_norm": 0.42693057656288147, "learning_rate": 0.0005207546137748707, "loss": 1.8144, "step": 21351 }, { "epoch": 0.71, "grad_norm": 0.4499768912792206, "learning_rate": 0.0005207475360322728, "loss": 1.8624, "step": 21352 }, { "epoch": 0.71, "grad_norm": 0.4525132477283478, "learning_rate": 0.00052074045802172, "loss": 1.9372, "step": 21353 }, { "epoch": 0.71, "grad_norm": 0.4388931691646576, "learning_rate": 0.0005207333797432207, "loss": 1.7693, "step": 21354 }, { "epoch": 0.71, "grad_norm": 0.4364243149757385, "learning_rate": 0.0005207263011967837, "loss": 1.824, "step": 21355 }, { "epoch": 0.71, "grad_norm": 0.42889517545700073, "learning_rate": 0.0005207192223824174, "loss": 1.8411, "step": 21356 }, { "epoch": 0.71, "grad_norm": 0.44859060645103455, "learning_rate": 0.0005207121433001305, "loss": 1.7408, "step": 21357 }, { "epoch": 0.71, "grad_norm": 0.6276658177375793, "learning_rate": 0.0005207050639499317, "loss": 1.9138, "step": 21358 }, { "epoch": 0.71, "grad_norm": 0.4254819452762604, "learning_rate": 0.0005206979843318293, "loss": 1.7676, "step": 21359 }, { "epoch": 0.71, "grad_norm": 0.4825432002544403, "learning_rate": 0.0005206909044458323, "loss": 1.8782, "step": 21360 }, { "epoch": 0.71, "grad_norm": 0.4175907373428345, "learning_rate": 0.0005206838242919489, "loss": 1.7981, "step": 21361 }, { "epoch": 0.71, "grad_norm": 0.4372648596763611, "learning_rate": 0.0005206767438701879, "loss": 1.9462, "step": 21362 }, { "epoch": 0.71, "grad_norm": 0.4217284619808197, "learning_rate": 0.0005206696631805577, "loss": 1.7991, "step": 21363 }, { "epoch": 0.71, "grad_norm": 0.4448961019515991, "learning_rate": 0.0005206625822230673, "loss": 1.7911, "step": 21364 }, { "epoch": 0.71, "grad_norm": 0.4308907985687256, "learning_rate": 0.0005206555009977248, "loss": 1.794, "step": 21365 }, { "epoch": 0.71, "grad_norm": 0.4447050094604492, "learning_rate": 0.0005206484195045391, "loss": 1.8276, "step": 21366 }, { "epoch": 0.71, "grad_norm": 0.45202383399009705, "learning_rate": 0.0005206413377435188, "loss": 1.829, "step": 21367 }, { "epoch": 0.71, "grad_norm": 0.42396771907806396, "learning_rate": 0.0005206342557146724, "loss": 1.7524, "step": 21368 }, { "epoch": 0.71, "grad_norm": 0.4403601288795471, "learning_rate": 0.0005206271734180085, "loss": 1.8408, "step": 21369 }, { "epoch": 0.71, "grad_norm": 0.4486795663833618, "learning_rate": 0.0005206200908535358, "loss": 1.8543, "step": 21370 }, { "epoch": 0.71, "grad_norm": 0.44277510046958923, "learning_rate": 0.0005206130080212628, "loss": 1.9098, "step": 21371 }, { "epoch": 0.71, "grad_norm": 0.4478139281272888, "learning_rate": 0.000520605924921198, "loss": 1.8751, "step": 21372 }, { "epoch": 0.71, "grad_norm": 0.43251368403434753, "learning_rate": 0.0005205988415533501, "loss": 1.8401, "step": 21373 }, { "epoch": 0.71, "grad_norm": 0.4103361666202545, "learning_rate": 0.0005205917579177278, "loss": 1.8193, "step": 21374 }, { "epoch": 0.71, "grad_norm": 0.4329988360404968, "learning_rate": 0.0005205846740143396, "loss": 1.8304, "step": 21375 }, { "epoch": 0.71, "grad_norm": 0.44825631380081177, "learning_rate": 0.0005205775898431941, "loss": 1.816, "step": 21376 }, { "epoch": 0.71, "grad_norm": 0.4339866042137146, "learning_rate": 0.0005205705054042998, "loss": 1.8772, "step": 21377 }, { "epoch": 0.71, "grad_norm": 0.4275878667831421, "learning_rate": 0.0005205634206976655, "loss": 1.7824, "step": 21378 }, { "epoch": 0.71, "grad_norm": 0.42602625489234924, "learning_rate": 0.0005205563357232999, "loss": 1.8365, "step": 21379 }, { "epoch": 0.71, "grad_norm": 0.41906386613845825, "learning_rate": 0.0005205492504812111, "loss": 1.7702, "step": 21380 }, { "epoch": 0.71, "grad_norm": 0.45133164525032043, "learning_rate": 0.0005205421649714082, "loss": 1.867, "step": 21381 }, { "epoch": 0.71, "grad_norm": 0.4569014012813568, "learning_rate": 0.0005205350791938995, "loss": 1.8425, "step": 21382 }, { "epoch": 0.71, "grad_norm": 0.4377654194831848, "learning_rate": 0.0005205279931486938, "loss": 1.8504, "step": 21383 }, { "epoch": 0.71, "grad_norm": 0.43534302711486816, "learning_rate": 0.0005205209068357995, "loss": 1.7507, "step": 21384 }, { "epoch": 0.71, "grad_norm": 0.4355045557022095, "learning_rate": 0.0005205138202552253, "loss": 1.7793, "step": 21385 }, { "epoch": 0.71, "grad_norm": 0.4451741874217987, "learning_rate": 0.00052050673340698, "loss": 1.8071, "step": 21386 }, { "epoch": 0.71, "grad_norm": 0.441599041223526, "learning_rate": 0.0005204996462910719, "loss": 1.7881, "step": 21387 }, { "epoch": 0.71, "grad_norm": 0.439567506313324, "learning_rate": 0.0005204925589075096, "loss": 1.862, "step": 21388 }, { "epoch": 0.71, "grad_norm": 0.46468591690063477, "learning_rate": 0.0005204854712563019, "loss": 1.7859, "step": 21389 }, { "epoch": 0.71, "grad_norm": 0.49333620071411133, "learning_rate": 0.0005204783833374574, "loss": 1.8423, "step": 21390 }, { "epoch": 0.71, "grad_norm": 0.48130032420158386, "learning_rate": 0.0005204712951509846, "loss": 1.8666, "step": 21391 }, { "epoch": 0.71, "grad_norm": 0.44127050042152405, "learning_rate": 0.0005204642066968921, "loss": 1.8382, "step": 21392 }, { "epoch": 0.71, "grad_norm": 0.4393925964832306, "learning_rate": 0.0005204571179751886, "loss": 1.8718, "step": 21393 }, { "epoch": 0.71, "grad_norm": 0.41980865597724915, "learning_rate": 0.0005204500289858826, "loss": 1.8113, "step": 21394 }, { "epoch": 0.71, "grad_norm": 0.432186096906662, "learning_rate": 0.0005204429397289826, "loss": 1.7719, "step": 21395 }, { "epoch": 0.71, "grad_norm": 0.42527589201927185, "learning_rate": 0.0005204358502044976, "loss": 1.7767, "step": 21396 }, { "epoch": 0.71, "grad_norm": 0.4195697009563446, "learning_rate": 0.0005204287604124359, "loss": 1.7474, "step": 21397 }, { "epoch": 0.71, "grad_norm": 0.41899198293685913, "learning_rate": 0.0005204216703528061, "loss": 1.8528, "step": 21398 }, { "epoch": 0.71, "grad_norm": 0.4120182693004608, "learning_rate": 0.0005204145800256169, "loss": 1.8184, "step": 21399 }, { "epoch": 0.71, "grad_norm": 0.4421871304512024, "learning_rate": 0.0005204074894308769, "loss": 1.9019, "step": 21400 }, { "epoch": 0.71, "grad_norm": 0.41910552978515625, "learning_rate": 0.0005204003985685945, "loss": 1.689, "step": 21401 }, { "epoch": 0.71, "grad_norm": 0.4288325309753418, "learning_rate": 0.0005203933074387788, "loss": 1.8623, "step": 21402 }, { "epoch": 0.71, "grad_norm": 0.4275190830230713, "learning_rate": 0.000520386216041438, "loss": 1.7824, "step": 21403 }, { "epoch": 0.71, "grad_norm": 0.4281691610813141, "learning_rate": 0.0005203791243765806, "loss": 1.9091, "step": 21404 }, { "epoch": 0.71, "grad_norm": 0.4226861894130707, "learning_rate": 0.0005203720324442156, "loss": 1.8204, "step": 21405 }, { "epoch": 0.71, "grad_norm": 0.43685922026634216, "learning_rate": 0.0005203649402443514, "loss": 1.7579, "step": 21406 }, { "epoch": 0.71, "grad_norm": 0.4342169761657715, "learning_rate": 0.0005203578477769967, "loss": 1.8286, "step": 21407 }, { "epoch": 0.71, "grad_norm": 0.4533976912498474, "learning_rate": 0.0005203507550421599, "loss": 1.8466, "step": 21408 }, { "epoch": 0.71, "grad_norm": 0.432521253824234, "learning_rate": 0.0005203436620398497, "loss": 1.8318, "step": 21409 }, { "epoch": 0.71, "grad_norm": 0.4557022154331207, "learning_rate": 0.0005203365687700749, "loss": 1.8862, "step": 21410 }, { "epoch": 0.71, "grad_norm": 0.43032968044281006, "learning_rate": 0.0005203294752328439, "loss": 1.8154, "step": 21411 }, { "epoch": 0.71, "grad_norm": 0.43894511461257935, "learning_rate": 0.0005203223814281655, "loss": 1.8405, "step": 21412 }, { "epoch": 0.71, "grad_norm": 0.44395825266838074, "learning_rate": 0.0005203152873560481, "loss": 1.9333, "step": 21413 }, { "epoch": 0.71, "grad_norm": 0.4343821406364441, "learning_rate": 0.0005203081930165004, "loss": 1.8599, "step": 21414 }, { "epoch": 0.71, "grad_norm": 0.43803951144218445, "learning_rate": 0.000520301098409531, "loss": 1.8615, "step": 21415 }, { "epoch": 0.71, "grad_norm": 0.44652289152145386, "learning_rate": 0.0005202940035351487, "loss": 1.7655, "step": 21416 }, { "epoch": 0.71, "grad_norm": 0.4495299756526947, "learning_rate": 0.0005202869083933616, "loss": 1.8515, "step": 21417 }, { "epoch": 0.71, "grad_norm": 0.4356241822242737, "learning_rate": 0.0005202798129841788, "loss": 1.8159, "step": 21418 }, { "epoch": 0.71, "grad_norm": 0.42098990082740784, "learning_rate": 0.0005202727173076089, "loss": 1.8422, "step": 21419 }, { "epoch": 0.71, "grad_norm": 0.42192474007606506, "learning_rate": 0.0005202656213636602, "loss": 1.7557, "step": 21420 }, { "epoch": 0.71, "grad_norm": 0.44136250019073486, "learning_rate": 0.0005202585251523416, "loss": 1.8576, "step": 21421 }, { "epoch": 0.71, "grad_norm": 0.43369585275650024, "learning_rate": 0.0005202514286736617, "loss": 1.8494, "step": 21422 }, { "epoch": 0.71, "grad_norm": 0.4233431816101074, "learning_rate": 0.0005202443319276288, "loss": 1.8591, "step": 21423 }, { "epoch": 0.71, "grad_norm": 0.4197064936161041, "learning_rate": 0.0005202372349142519, "loss": 1.8078, "step": 21424 }, { "epoch": 0.71, "grad_norm": 0.41315656900405884, "learning_rate": 0.0005202301376335393, "loss": 1.8119, "step": 21425 }, { "epoch": 0.71, "grad_norm": 0.4362715482711792, "learning_rate": 0.0005202230400854998, "loss": 1.8478, "step": 21426 }, { "epoch": 0.71, "grad_norm": 0.42600181698799133, "learning_rate": 0.0005202159422701421, "loss": 1.8066, "step": 21427 }, { "epoch": 0.71, "grad_norm": 0.4564136862754822, "learning_rate": 0.0005202088441874746, "loss": 1.8446, "step": 21428 }, { "epoch": 0.71, "grad_norm": 0.41880345344543457, "learning_rate": 0.000520201745837506, "loss": 1.7616, "step": 21429 }, { "epoch": 0.71, "grad_norm": 0.43826767802238464, "learning_rate": 0.0005201946472202449, "loss": 1.9215, "step": 21430 }, { "epoch": 0.71, "grad_norm": 0.43067461252212524, "learning_rate": 0.0005201875483357001, "loss": 1.8411, "step": 21431 }, { "epoch": 0.71, "grad_norm": 0.6960236430168152, "learning_rate": 0.00052018044918388, "loss": 1.866, "step": 21432 }, { "epoch": 0.71, "grad_norm": 0.43831923604011536, "learning_rate": 0.000520173349764793, "loss": 1.808, "step": 21433 }, { "epoch": 0.71, "grad_norm": 0.41983041167259216, "learning_rate": 0.0005201662500784483, "loss": 1.8913, "step": 21434 }, { "epoch": 0.71, "grad_norm": 0.417825847864151, "learning_rate": 0.0005201591501248541, "loss": 1.7961, "step": 21435 }, { "epoch": 0.71, "grad_norm": 0.4240630865097046, "learning_rate": 0.0005201520499040192, "loss": 1.7554, "step": 21436 }, { "epoch": 0.71, "grad_norm": 0.4427826702594757, "learning_rate": 0.0005201449494159522, "loss": 1.8508, "step": 21437 }, { "epoch": 0.71, "grad_norm": 0.43358469009399414, "learning_rate": 0.0005201378486606614, "loss": 1.8428, "step": 21438 }, { "epoch": 0.71, "grad_norm": 0.4322483539581299, "learning_rate": 0.000520130747638156, "loss": 1.9255, "step": 21439 }, { "epoch": 0.71, "grad_norm": 0.4589824974536896, "learning_rate": 0.0005201236463484441, "loss": 1.8538, "step": 21440 }, { "epoch": 0.71, "grad_norm": 0.425698459148407, "learning_rate": 0.0005201165447915346, "loss": 1.7579, "step": 21441 }, { "epoch": 0.71, "grad_norm": 0.43017882108688354, "learning_rate": 0.0005201094429674361, "loss": 1.8166, "step": 21442 }, { "epoch": 0.71, "grad_norm": 0.45398178696632385, "learning_rate": 0.000520102340876157, "loss": 1.8389, "step": 21443 }, { "epoch": 0.71, "grad_norm": 0.42307138442993164, "learning_rate": 0.0005200952385177063, "loss": 1.8228, "step": 21444 }, { "epoch": 0.71, "grad_norm": 0.43670961260795593, "learning_rate": 0.0005200881358920922, "loss": 1.7199, "step": 21445 }, { "epoch": 0.71, "grad_norm": 0.4323011636734009, "learning_rate": 0.0005200810329993238, "loss": 1.7972, "step": 21446 }, { "epoch": 0.71, "grad_norm": 0.43362686038017273, "learning_rate": 0.0005200739298394093, "loss": 1.8492, "step": 21447 }, { "epoch": 0.71, "grad_norm": 0.44581139087677, "learning_rate": 0.0005200668264123574, "loss": 1.8524, "step": 21448 }, { "epoch": 0.71, "grad_norm": 0.44461315870285034, "learning_rate": 0.0005200597227181769, "loss": 1.8438, "step": 21449 }, { "epoch": 0.71, "grad_norm": 0.4379030466079712, "learning_rate": 0.0005200526187568762, "loss": 1.8453, "step": 21450 }, { "epoch": 0.71, "grad_norm": 0.44429221749305725, "learning_rate": 0.0005200455145284641, "loss": 1.9041, "step": 21451 }, { "epoch": 0.71, "grad_norm": 0.4538288712501526, "learning_rate": 0.0005200384100329492, "loss": 1.8648, "step": 21452 }, { "epoch": 0.71, "grad_norm": 0.43646106123924255, "learning_rate": 0.0005200313052703402, "loss": 1.8072, "step": 21453 }, { "epoch": 0.71, "grad_norm": 0.4306378662586212, "learning_rate": 0.0005200242002406456, "loss": 1.8041, "step": 21454 }, { "epoch": 0.71, "grad_norm": 0.4297950267791748, "learning_rate": 0.0005200170949438737, "loss": 1.8418, "step": 21455 }, { "epoch": 0.71, "grad_norm": 0.4485045373439789, "learning_rate": 0.0005200099893800338, "loss": 1.805, "step": 21456 }, { "epoch": 0.71, "grad_norm": 0.431192547082901, "learning_rate": 0.0005200028835491341, "loss": 1.7789, "step": 21457 }, { "epoch": 0.71, "grad_norm": 0.42803338170051575, "learning_rate": 0.0005199957774511834, "loss": 1.8419, "step": 21458 }, { "epoch": 0.71, "grad_norm": 0.4373224377632141, "learning_rate": 0.0005199886710861901, "loss": 1.7974, "step": 21459 }, { "epoch": 0.71, "grad_norm": 0.45886945724487305, "learning_rate": 0.0005199815644541631, "loss": 1.8439, "step": 21460 }, { "epoch": 0.71, "grad_norm": 0.4328558146953583, "learning_rate": 0.0005199744575551107, "loss": 1.8464, "step": 21461 }, { "epoch": 0.71, "grad_norm": 0.42715784907341003, "learning_rate": 0.000519967350389042, "loss": 1.8415, "step": 21462 }, { "epoch": 0.71, "grad_norm": 0.44203251600265503, "learning_rate": 0.0005199602429559651, "loss": 1.8366, "step": 21463 }, { "epoch": 0.71, "grad_norm": 0.44048115611076355, "learning_rate": 0.0005199531352558889, "loss": 1.8634, "step": 21464 }, { "epoch": 0.71, "grad_norm": 0.4382447600364685, "learning_rate": 0.0005199460272888221, "loss": 1.7902, "step": 21465 }, { "epoch": 0.71, "grad_norm": 0.43749698996543884, "learning_rate": 0.0005199389190547732, "loss": 1.8966, "step": 21466 }, { "epoch": 0.71, "grad_norm": 0.44833138585090637, "learning_rate": 0.0005199318105537508, "loss": 1.8587, "step": 21467 }, { "epoch": 0.71, "grad_norm": 0.41448214650154114, "learning_rate": 0.0005199247017857638, "loss": 1.7963, "step": 21468 }, { "epoch": 0.71, "grad_norm": 0.44613394141197205, "learning_rate": 0.0005199175927508204, "loss": 1.7603, "step": 21469 }, { "epoch": 0.71, "grad_norm": 0.42743629217147827, "learning_rate": 0.0005199104834489295, "loss": 1.7553, "step": 21470 }, { "epoch": 0.71, "grad_norm": 0.42920881509780884, "learning_rate": 0.0005199033738800997, "loss": 1.7846, "step": 21471 }, { "epoch": 0.71, "grad_norm": 0.44136324524879456, "learning_rate": 0.0005198962640443395, "loss": 1.8277, "step": 21472 }, { "epoch": 0.71, "grad_norm": 0.46991732716560364, "learning_rate": 0.0005198891539416578, "loss": 1.7985, "step": 21473 }, { "epoch": 0.71, "grad_norm": 0.4525049328804016, "learning_rate": 0.000519882043572063, "loss": 1.8369, "step": 21474 }, { "epoch": 0.71, "grad_norm": 0.4165741801261902, "learning_rate": 0.0005198749329355637, "loss": 1.8287, "step": 21475 }, { "epoch": 0.71, "grad_norm": 0.41979485750198364, "learning_rate": 0.0005198678220321688, "loss": 1.8716, "step": 21476 }, { "epoch": 0.71, "grad_norm": 0.42912018299102783, "learning_rate": 0.0005198607108618866, "loss": 1.7645, "step": 21477 }, { "epoch": 0.71, "grad_norm": 0.4438766837120056, "learning_rate": 0.000519853599424726, "loss": 1.8327, "step": 21478 }, { "epoch": 0.71, "grad_norm": 0.4468691647052765, "learning_rate": 0.0005198464877206955, "loss": 1.8388, "step": 21479 }, { "epoch": 0.71, "grad_norm": 0.4362303912639618, "learning_rate": 0.0005198393757498037, "loss": 1.9118, "step": 21480 }, { "epoch": 0.71, "grad_norm": 0.4271066188812256, "learning_rate": 0.0005198322635120593, "loss": 1.8361, "step": 21481 }, { "epoch": 0.71, "grad_norm": 0.4594763517379761, "learning_rate": 0.000519825151007471, "loss": 1.925, "step": 21482 }, { "epoch": 0.71, "grad_norm": 0.4350511133670807, "learning_rate": 0.0005198180382360473, "loss": 1.7841, "step": 21483 }, { "epoch": 0.71, "grad_norm": 0.430423766374588, "learning_rate": 0.000519810925197797, "loss": 1.8141, "step": 21484 }, { "epoch": 0.71, "grad_norm": 0.43302449584007263, "learning_rate": 0.0005198038118927285, "loss": 1.8145, "step": 21485 }, { "epoch": 0.71, "grad_norm": 0.43282413482666016, "learning_rate": 0.0005197966983208506, "loss": 1.8051, "step": 21486 }, { "epoch": 0.71, "grad_norm": 0.4499875009059906, "learning_rate": 0.0005197895844821717, "loss": 1.8419, "step": 21487 }, { "epoch": 0.71, "grad_norm": 0.4323539733886719, "learning_rate": 0.0005197824703767009, "loss": 1.8186, "step": 21488 }, { "epoch": 0.71, "grad_norm": 0.4259810447692871, "learning_rate": 0.0005197753560044465, "loss": 1.8985, "step": 21489 }, { "epoch": 0.71, "grad_norm": 0.4304448068141937, "learning_rate": 0.0005197682413654172, "loss": 1.8544, "step": 21490 }, { "epoch": 0.72, "grad_norm": 0.44753074645996094, "learning_rate": 0.0005197611264596216, "loss": 1.9387, "step": 21491 }, { "epoch": 0.72, "grad_norm": 0.4358997642993927, "learning_rate": 0.0005197540112870685, "loss": 1.8525, "step": 21492 }, { "epoch": 0.72, "grad_norm": 0.44120579957962036, "learning_rate": 0.0005197468958477662, "loss": 1.785, "step": 21493 }, { "epoch": 0.72, "grad_norm": 0.42805367708206177, "learning_rate": 0.0005197397801417236, "loss": 1.8281, "step": 21494 }, { "epoch": 0.72, "grad_norm": 0.42995092272758484, "learning_rate": 0.0005197326641689493, "loss": 1.8564, "step": 21495 }, { "epoch": 0.72, "grad_norm": 0.4292137622833252, "learning_rate": 0.000519725547929452, "loss": 1.819, "step": 21496 }, { "epoch": 0.72, "grad_norm": 0.4235672950744629, "learning_rate": 0.0005197184314232402, "loss": 1.8679, "step": 21497 }, { "epoch": 0.72, "grad_norm": 0.4318234622478485, "learning_rate": 0.0005197113146503226, "loss": 1.8366, "step": 21498 }, { "epoch": 0.72, "grad_norm": 0.42567673325538635, "learning_rate": 0.0005197041976107078, "loss": 1.7348, "step": 21499 }, { "epoch": 0.72, "grad_norm": 0.41432294249534607, "learning_rate": 0.0005196970803044046, "loss": 1.8868, "step": 21500 }, { "epoch": 0.72, "grad_norm": 0.4388326108455658, "learning_rate": 0.0005196899627314214, "loss": 1.799, "step": 21501 }, { "epoch": 0.72, "grad_norm": 0.4236922860145569, "learning_rate": 0.0005196828448917669, "loss": 1.7993, "step": 21502 }, { "epoch": 0.72, "grad_norm": 0.43578994274139404, "learning_rate": 0.0005196757267854499, "loss": 1.889, "step": 21503 }, { "epoch": 0.72, "grad_norm": 0.42403462529182434, "learning_rate": 0.000519668608412479, "loss": 1.8782, "step": 21504 }, { "epoch": 0.72, "grad_norm": 0.43661925196647644, "learning_rate": 0.0005196614897728627, "loss": 1.8025, "step": 21505 }, { "epoch": 0.72, "grad_norm": 0.42373666167259216, "learning_rate": 0.0005196543708666097, "loss": 1.7993, "step": 21506 }, { "epoch": 0.72, "grad_norm": 0.4229566752910614, "learning_rate": 0.0005196472516937286, "loss": 1.8045, "step": 21507 }, { "epoch": 0.72, "grad_norm": 0.4374980926513672, "learning_rate": 0.0005196401322542282, "loss": 1.8286, "step": 21508 }, { "epoch": 0.72, "grad_norm": 0.46573472023010254, "learning_rate": 0.0005196330125481171, "loss": 1.8913, "step": 21509 }, { "epoch": 0.72, "grad_norm": 0.4304506778717041, "learning_rate": 0.0005196258925754037, "loss": 1.8154, "step": 21510 }, { "epoch": 0.72, "grad_norm": 0.4409709572792053, "learning_rate": 0.000519618772336097, "loss": 1.7665, "step": 21511 }, { "epoch": 0.72, "grad_norm": 0.4342154860496521, "learning_rate": 0.0005196116518302053, "loss": 1.8634, "step": 21512 }, { "epoch": 0.72, "grad_norm": 0.4209083020687103, "learning_rate": 0.0005196045310577376, "loss": 1.8523, "step": 21513 }, { "epoch": 0.72, "grad_norm": 0.4355504810810089, "learning_rate": 0.0005195974100187022, "loss": 1.8351, "step": 21514 }, { "epoch": 0.72, "grad_norm": 0.42861899733543396, "learning_rate": 0.000519590288713108, "loss": 1.9198, "step": 21515 }, { "epoch": 0.72, "grad_norm": 0.4391408860683441, "learning_rate": 0.0005195831671409635, "loss": 1.7706, "step": 21516 }, { "epoch": 0.72, "grad_norm": 0.43854984641075134, "learning_rate": 0.0005195760453022773, "loss": 1.8347, "step": 21517 }, { "epoch": 0.72, "grad_norm": 0.4332773983478546, "learning_rate": 0.0005195689231970584, "loss": 1.8213, "step": 21518 }, { "epoch": 0.72, "grad_norm": 0.4371435344219208, "learning_rate": 0.0005195618008253149, "loss": 1.8241, "step": 21519 }, { "epoch": 0.72, "grad_norm": 0.4300730526447296, "learning_rate": 0.0005195546781870559, "loss": 1.7955, "step": 21520 }, { "epoch": 0.72, "grad_norm": 0.4427627921104431, "learning_rate": 0.0005195475552822896, "loss": 1.8775, "step": 21521 }, { "epoch": 0.72, "grad_norm": 0.46582549810409546, "learning_rate": 0.0005195404321110252, "loss": 1.8574, "step": 21522 }, { "epoch": 0.72, "grad_norm": 0.404090940952301, "learning_rate": 0.000519533308673271, "loss": 1.7365, "step": 21523 }, { "epoch": 0.72, "grad_norm": 0.4509526789188385, "learning_rate": 0.0005195261849690357, "loss": 1.7567, "step": 21524 }, { "epoch": 0.72, "grad_norm": 0.43454840779304504, "learning_rate": 0.000519519060998328, "loss": 1.7796, "step": 21525 }, { "epoch": 0.72, "grad_norm": 0.449826717376709, "learning_rate": 0.0005195119367611564, "loss": 1.8026, "step": 21526 }, { "epoch": 0.72, "grad_norm": 0.41862502694129944, "learning_rate": 0.0005195048122575297, "loss": 1.8497, "step": 21527 }, { "epoch": 0.72, "grad_norm": 0.4427494406700134, "learning_rate": 0.0005194976874874566, "loss": 1.8621, "step": 21528 }, { "epoch": 0.72, "grad_norm": 0.4602842628955841, "learning_rate": 0.0005194905624509455, "loss": 1.8503, "step": 21529 }, { "epoch": 0.72, "grad_norm": 0.4396617114543915, "learning_rate": 0.0005194834371480053, "loss": 1.8562, "step": 21530 }, { "epoch": 0.72, "grad_norm": 0.413078248500824, "learning_rate": 0.0005194763115786444, "loss": 1.79, "step": 21531 }, { "epoch": 0.72, "grad_norm": 0.4276827573776245, "learning_rate": 0.0005194691857428717, "loss": 1.8522, "step": 21532 }, { "epoch": 0.72, "grad_norm": 0.4364811182022095, "learning_rate": 0.0005194620596406959, "loss": 1.8881, "step": 21533 }, { "epoch": 0.72, "grad_norm": 0.44884660840034485, "learning_rate": 0.0005194549332721252, "loss": 1.8363, "step": 21534 }, { "epoch": 0.72, "grad_norm": 0.43983379006385803, "learning_rate": 0.0005194478066371688, "loss": 1.8502, "step": 21535 }, { "epoch": 0.72, "grad_norm": 0.44922590255737305, "learning_rate": 0.0005194406797358348, "loss": 1.8865, "step": 21536 }, { "epoch": 0.72, "grad_norm": 0.4274628460407257, "learning_rate": 0.0005194335525681324, "loss": 1.8655, "step": 21537 }, { "epoch": 0.72, "grad_norm": 0.43387532234191895, "learning_rate": 0.00051942642513407, "loss": 1.838, "step": 21538 }, { "epoch": 0.72, "grad_norm": 0.44221270084381104, "learning_rate": 0.0005194192974336562, "loss": 1.8629, "step": 21539 }, { "epoch": 0.72, "grad_norm": 0.45109856128692627, "learning_rate": 0.0005194121694668996, "loss": 1.814, "step": 21540 }, { "epoch": 0.72, "grad_norm": 0.42851540446281433, "learning_rate": 0.0005194050412338091, "loss": 1.8625, "step": 21541 }, { "epoch": 0.72, "grad_norm": 0.45379048585891724, "learning_rate": 0.0005193979127343932, "loss": 1.8344, "step": 21542 }, { "epoch": 0.72, "grad_norm": 0.4314580261707306, "learning_rate": 0.0005193907839686604, "loss": 1.758, "step": 21543 }, { "epoch": 0.72, "grad_norm": 0.4193069636821747, "learning_rate": 0.0005193836549366197, "loss": 1.8285, "step": 21544 }, { "epoch": 0.72, "grad_norm": 0.43168047070503235, "learning_rate": 0.0005193765256382794, "loss": 1.8218, "step": 21545 }, { "epoch": 0.72, "grad_norm": 0.4419393837451935, "learning_rate": 0.0005193693960736485, "loss": 1.828, "step": 21546 }, { "epoch": 0.72, "grad_norm": 0.4519319534301758, "learning_rate": 0.0005193622662427353, "loss": 1.853, "step": 21547 }, { "epoch": 0.72, "grad_norm": 0.4316153824329376, "learning_rate": 0.0005193551361455487, "loss": 1.7671, "step": 21548 }, { "epoch": 0.72, "grad_norm": 0.44822439551353455, "learning_rate": 0.0005193480057820973, "loss": 1.8565, "step": 21549 }, { "epoch": 0.72, "grad_norm": 0.43999791145324707, "learning_rate": 0.0005193408751523898, "loss": 1.8367, "step": 21550 }, { "epoch": 0.72, "grad_norm": 0.4334600567817688, "learning_rate": 0.0005193337442564348, "loss": 1.897, "step": 21551 }, { "epoch": 0.72, "grad_norm": 0.43560612201690674, "learning_rate": 0.0005193266130942408, "loss": 1.7971, "step": 21552 }, { "epoch": 0.72, "grad_norm": 0.4818150997161865, "learning_rate": 0.0005193194816658168, "loss": 1.8524, "step": 21553 }, { "epoch": 0.72, "grad_norm": 0.43797439336776733, "learning_rate": 0.0005193123499711711, "loss": 1.8306, "step": 21554 }, { "epoch": 0.72, "grad_norm": 0.4306866228580475, "learning_rate": 0.0005193052180103126, "loss": 1.8366, "step": 21555 }, { "epoch": 0.72, "grad_norm": 0.4294807016849518, "learning_rate": 0.0005192980857832498, "loss": 1.8647, "step": 21556 }, { "epoch": 0.72, "grad_norm": 0.43076959252357483, "learning_rate": 0.0005192909532899915, "loss": 1.8865, "step": 21557 }, { "epoch": 0.72, "grad_norm": 0.43239930272102356, "learning_rate": 0.0005192838205305462, "loss": 1.846, "step": 21558 }, { "epoch": 0.72, "grad_norm": 0.44185590744018555, "learning_rate": 0.0005192766875049228, "loss": 1.8225, "step": 21559 }, { "epoch": 0.72, "grad_norm": 0.43001848459243774, "learning_rate": 0.0005192695542131298, "loss": 1.8305, "step": 21560 }, { "epoch": 0.72, "grad_norm": 0.4501085579395294, "learning_rate": 0.0005192624206551758, "loss": 1.7544, "step": 21561 }, { "epoch": 0.72, "grad_norm": 0.4431197941303253, "learning_rate": 0.0005192552868310696, "loss": 1.8669, "step": 21562 }, { "epoch": 0.72, "grad_norm": 0.4608260989189148, "learning_rate": 0.0005192481527408197, "loss": 1.8108, "step": 21563 }, { "epoch": 0.72, "grad_norm": 0.4317283630371094, "learning_rate": 0.0005192410183844349, "loss": 1.836, "step": 21564 }, { "epoch": 0.72, "grad_norm": 0.4207867980003357, "learning_rate": 0.0005192338837619238, "loss": 1.817, "step": 21565 }, { "epoch": 0.72, "grad_norm": 0.4495191276073456, "learning_rate": 0.000519226748873295, "loss": 1.8466, "step": 21566 }, { "epoch": 0.72, "grad_norm": 0.4424930214881897, "learning_rate": 0.0005192196137185573, "loss": 1.8564, "step": 21567 }, { "epoch": 0.72, "grad_norm": 0.4334566295146942, "learning_rate": 0.0005192124782977193, "loss": 1.782, "step": 21568 }, { "epoch": 0.72, "grad_norm": 0.4575729966163635, "learning_rate": 0.0005192053426107896, "loss": 1.8534, "step": 21569 }, { "epoch": 0.72, "grad_norm": 0.4109201729297638, "learning_rate": 0.000519198206657777, "loss": 1.7877, "step": 21570 }, { "epoch": 0.72, "grad_norm": 0.4418041408061981, "learning_rate": 0.0005191910704386899, "loss": 1.8446, "step": 21571 }, { "epoch": 0.72, "grad_norm": 0.42079445719718933, "learning_rate": 0.0005191839339535373, "loss": 1.764, "step": 21572 }, { "epoch": 0.72, "grad_norm": 0.4294041693210602, "learning_rate": 0.0005191767972023277, "loss": 1.8573, "step": 21573 }, { "epoch": 0.72, "grad_norm": 0.42489564418792725, "learning_rate": 0.0005191696601850697, "loss": 1.8591, "step": 21574 }, { "epoch": 0.72, "grad_norm": 0.435336172580719, "learning_rate": 0.000519162522901772, "loss": 1.8443, "step": 21575 }, { "epoch": 0.72, "grad_norm": 0.4319619834423065, "learning_rate": 0.0005191553853524433, "loss": 1.9048, "step": 21576 }, { "epoch": 0.72, "grad_norm": 0.4229182302951813, "learning_rate": 0.0005191482475370924, "loss": 1.8485, "step": 21577 }, { "epoch": 0.72, "grad_norm": 0.43462470173835754, "learning_rate": 0.0005191411094557277, "loss": 1.8395, "step": 21578 }, { "epoch": 0.72, "grad_norm": 0.4191819727420807, "learning_rate": 0.0005191339711083579, "loss": 1.8298, "step": 21579 }, { "epoch": 0.72, "grad_norm": 0.43795815110206604, "learning_rate": 0.0005191268324949919, "loss": 1.8932, "step": 21580 }, { "epoch": 0.72, "grad_norm": 0.43700656294822693, "learning_rate": 0.0005191196936156381, "loss": 1.8427, "step": 21581 }, { "epoch": 0.72, "grad_norm": 0.42906612157821655, "learning_rate": 0.0005191125544703052, "loss": 1.833, "step": 21582 }, { "epoch": 0.72, "grad_norm": 0.4333629608154297, "learning_rate": 0.0005191054150590021, "loss": 1.8701, "step": 21583 }, { "epoch": 0.72, "grad_norm": 0.42930707335472107, "learning_rate": 0.0005190982753817373, "loss": 1.7799, "step": 21584 }, { "epoch": 0.72, "grad_norm": 0.43808093667030334, "learning_rate": 0.0005190911354385195, "loss": 1.8083, "step": 21585 }, { "epoch": 0.72, "grad_norm": 0.41312670707702637, "learning_rate": 0.0005190839952293573, "loss": 1.8881, "step": 21586 }, { "epoch": 0.72, "grad_norm": 0.4245738089084625, "learning_rate": 0.0005190768547542594, "loss": 1.8347, "step": 21587 }, { "epoch": 0.72, "grad_norm": 0.44216012954711914, "learning_rate": 0.0005190697140132344, "loss": 1.7841, "step": 21588 }, { "epoch": 0.72, "grad_norm": 0.45949819684028625, "learning_rate": 0.0005190625730062912, "loss": 1.8065, "step": 21589 }, { "epoch": 0.72, "grad_norm": 0.42366963624954224, "learning_rate": 0.0005190554317334382, "loss": 1.8394, "step": 21590 }, { "epoch": 0.72, "grad_norm": 0.4423048198223114, "learning_rate": 0.0005190482901946843, "loss": 1.8303, "step": 21591 }, { "epoch": 0.72, "grad_norm": 0.43342986702919006, "learning_rate": 0.0005190411483900379, "loss": 1.811, "step": 21592 }, { "epoch": 0.72, "grad_norm": 0.4158911406993866, "learning_rate": 0.0005190340063195079, "loss": 1.7672, "step": 21593 }, { "epoch": 0.72, "grad_norm": 0.42988407611846924, "learning_rate": 0.000519026863983103, "loss": 1.8247, "step": 21594 }, { "epoch": 0.72, "grad_norm": 0.42741307616233826, "learning_rate": 0.0005190197213808315, "loss": 1.8485, "step": 21595 }, { "epoch": 0.72, "grad_norm": 0.41891783475875854, "learning_rate": 0.0005190125785127025, "loss": 1.8171, "step": 21596 }, { "epoch": 0.72, "grad_norm": 0.4320553243160248, "learning_rate": 0.0005190054353787244, "loss": 1.8039, "step": 21597 }, { "epoch": 0.72, "grad_norm": 0.419048547744751, "learning_rate": 0.000518998291978906, "loss": 1.8518, "step": 21598 }, { "epoch": 0.72, "grad_norm": 0.4254513382911682, "learning_rate": 0.0005189911483132561, "loss": 1.8342, "step": 21599 }, { "epoch": 0.72, "grad_norm": 0.41255491971969604, "learning_rate": 0.000518984004381783, "loss": 1.8402, "step": 21600 }, { "epoch": 0.72, "grad_norm": 0.4110663831233978, "learning_rate": 0.0005189768601844958, "loss": 1.8047, "step": 21601 }, { "epoch": 0.72, "grad_norm": 0.42609426379203796, "learning_rate": 0.0005189697157214028, "loss": 1.9011, "step": 21602 }, { "epoch": 0.72, "grad_norm": 0.41490641236305237, "learning_rate": 0.0005189625709925128, "loss": 1.7985, "step": 21603 }, { "epoch": 0.72, "grad_norm": 0.4293304979801178, "learning_rate": 0.0005189554259978346, "loss": 1.8634, "step": 21604 }, { "epoch": 0.72, "grad_norm": 0.44630396366119385, "learning_rate": 0.0005189482807373768, "loss": 1.7939, "step": 21605 }, { "epoch": 0.72, "grad_norm": 0.4272245466709137, "learning_rate": 0.000518941135211148, "loss": 1.8149, "step": 21606 }, { "epoch": 0.72, "grad_norm": 0.4267536997795105, "learning_rate": 0.0005189339894191568, "loss": 1.8738, "step": 21607 }, { "epoch": 0.72, "grad_norm": 0.4572129249572754, "learning_rate": 0.0005189268433614121, "loss": 1.8931, "step": 21608 }, { "epoch": 0.72, "grad_norm": 0.44397369027137756, "learning_rate": 0.0005189196970379226, "loss": 1.8013, "step": 21609 }, { "epoch": 0.72, "grad_norm": 0.4374764561653137, "learning_rate": 0.0005189125504486967, "loss": 1.8214, "step": 21610 }, { "epoch": 0.72, "grad_norm": 0.42875605821609497, "learning_rate": 0.0005189054035937432, "loss": 1.7413, "step": 21611 }, { "epoch": 0.72, "grad_norm": 0.43581563234329224, "learning_rate": 0.0005188982564730708, "loss": 1.8593, "step": 21612 }, { "epoch": 0.72, "grad_norm": 0.43352562189102173, "learning_rate": 0.0005188911090866882, "loss": 1.8387, "step": 21613 }, { "epoch": 0.72, "grad_norm": 0.42464250326156616, "learning_rate": 0.0005188839614346041, "loss": 1.8176, "step": 21614 }, { "epoch": 0.72, "grad_norm": 0.42819318175315857, "learning_rate": 0.0005188768135168271, "loss": 1.881, "step": 21615 }, { "epoch": 0.72, "grad_norm": 0.43628576397895813, "learning_rate": 0.0005188696653333658, "loss": 1.831, "step": 21616 }, { "epoch": 0.72, "grad_norm": 0.44500380754470825, "learning_rate": 0.000518862516884229, "loss": 1.8649, "step": 21617 }, { "epoch": 0.72, "grad_norm": 0.4448901414871216, "learning_rate": 0.0005188553681694254, "loss": 1.8325, "step": 21618 }, { "epoch": 0.72, "grad_norm": 0.4343060255050659, "learning_rate": 0.0005188482191889637, "loss": 1.7722, "step": 21619 }, { "epoch": 0.72, "grad_norm": 0.5049884915351868, "learning_rate": 0.0005188410699428524, "loss": 1.8463, "step": 21620 }, { "epoch": 0.72, "grad_norm": 0.4288758635520935, "learning_rate": 0.0005188339204311003, "loss": 1.8157, "step": 21621 }, { "epoch": 0.72, "grad_norm": 0.4594808518886566, "learning_rate": 0.0005188267706537161, "loss": 1.8406, "step": 21622 }, { "epoch": 0.72, "grad_norm": 0.4338279366493225, "learning_rate": 0.0005188196206107083, "loss": 1.8261, "step": 21623 }, { "epoch": 0.72, "grad_norm": 0.43386343121528625, "learning_rate": 0.000518812470302086, "loss": 1.7533, "step": 21624 }, { "epoch": 0.72, "grad_norm": 0.41480669379234314, "learning_rate": 0.0005188053197278574, "loss": 1.7797, "step": 21625 }, { "epoch": 0.72, "grad_norm": 0.4345320761203766, "learning_rate": 0.0005187981688880314, "loss": 1.7709, "step": 21626 }, { "epoch": 0.72, "grad_norm": 0.4501725137233734, "learning_rate": 0.0005187910177826167, "loss": 1.848, "step": 21627 }, { "epoch": 0.72, "grad_norm": 0.43771809339523315, "learning_rate": 0.0005187838664116219, "loss": 1.8551, "step": 21628 }, { "epoch": 0.72, "grad_norm": 0.44503235816955566, "learning_rate": 0.0005187767147750559, "loss": 1.8748, "step": 21629 }, { "epoch": 0.72, "grad_norm": 0.4298803210258484, "learning_rate": 0.000518769562872927, "loss": 1.8973, "step": 21630 }, { "epoch": 0.72, "grad_norm": 0.4238227903842926, "learning_rate": 0.0005187624107052441, "loss": 1.7763, "step": 21631 }, { "epoch": 0.72, "grad_norm": 0.44320785999298096, "learning_rate": 0.0005187552582720159, "loss": 1.8331, "step": 21632 }, { "epoch": 0.72, "grad_norm": 0.4344122111797333, "learning_rate": 0.0005187481055732511, "loss": 1.856, "step": 21633 }, { "epoch": 0.72, "grad_norm": 0.4584481418132782, "learning_rate": 0.0005187409526089583, "loss": 1.7862, "step": 21634 }, { "epoch": 0.72, "grad_norm": 0.6788888573646545, "learning_rate": 0.0005187337993791462, "loss": 1.9938, "step": 21635 }, { "epoch": 0.72, "grad_norm": 0.4237127900123596, "learning_rate": 0.0005187266458838235, "loss": 1.8019, "step": 21636 }, { "epoch": 0.72, "grad_norm": 0.432204931974411, "learning_rate": 0.0005187194921229989, "loss": 1.8363, "step": 21637 }, { "epoch": 0.72, "grad_norm": 0.4296506941318512, "learning_rate": 0.000518712338096681, "loss": 1.8244, "step": 21638 }, { "epoch": 0.72, "grad_norm": 0.4250642657279968, "learning_rate": 0.0005187051838048785, "loss": 1.8446, "step": 21639 }, { "epoch": 0.72, "grad_norm": 0.4330917298793793, "learning_rate": 0.0005186980292476003, "loss": 1.8633, "step": 21640 }, { "epoch": 0.72, "grad_norm": 0.5056847333908081, "learning_rate": 0.0005186908744248548, "loss": 1.8783, "step": 21641 }, { "epoch": 0.72, "grad_norm": 0.43074867129325867, "learning_rate": 0.0005186837193366509, "loss": 1.7944, "step": 21642 }, { "epoch": 0.72, "grad_norm": 0.4467333257198334, "learning_rate": 0.0005186765639829971, "loss": 1.866, "step": 21643 }, { "epoch": 0.72, "grad_norm": 0.41446587443351746, "learning_rate": 0.0005186694083639022, "loss": 1.7371, "step": 21644 }, { "epoch": 0.72, "grad_norm": 0.43024736642837524, "learning_rate": 0.0005186622524793749, "loss": 1.8295, "step": 21645 }, { "epoch": 0.72, "grad_norm": 0.5418362617492676, "learning_rate": 0.0005186550963294237, "loss": 1.882, "step": 21646 }, { "epoch": 0.72, "grad_norm": 0.4373325705528259, "learning_rate": 0.0005186479399140575, "loss": 1.7088, "step": 21647 }, { "epoch": 0.72, "grad_norm": 0.4335334002971649, "learning_rate": 0.000518640783233285, "loss": 1.7496, "step": 21648 }, { "epoch": 0.72, "grad_norm": 0.43799734115600586, "learning_rate": 0.0005186336262871147, "loss": 1.885, "step": 21649 }, { "epoch": 0.72, "grad_norm": 0.4264332354068756, "learning_rate": 0.0005186264690755555, "loss": 1.8347, "step": 21650 }, { "epoch": 0.72, "grad_norm": 0.43632379174232483, "learning_rate": 0.0005186193115986159, "loss": 1.8937, "step": 21651 }, { "epoch": 0.72, "grad_norm": 0.44124987721443176, "learning_rate": 0.0005186121538563047, "loss": 1.7769, "step": 21652 }, { "epoch": 0.72, "grad_norm": 0.4340810775756836, "learning_rate": 0.0005186049958486305, "loss": 1.8924, "step": 21653 }, { "epoch": 0.72, "grad_norm": 0.42337074875831604, "learning_rate": 0.0005185978375756022, "loss": 1.883, "step": 21654 }, { "epoch": 0.72, "grad_norm": 0.4235166609287262, "learning_rate": 0.0005185906790372282, "loss": 1.8236, "step": 21655 }, { "epoch": 0.72, "grad_norm": 0.43082907795906067, "learning_rate": 0.0005185835202335174, "loss": 1.851, "step": 21656 }, { "epoch": 0.72, "grad_norm": 0.4313778877258301, "learning_rate": 0.0005185763611644783, "loss": 1.9134, "step": 21657 }, { "epoch": 0.72, "grad_norm": 0.4259680509567261, "learning_rate": 0.0005185692018301198, "loss": 1.766, "step": 21658 }, { "epoch": 0.72, "grad_norm": 0.4177028238773346, "learning_rate": 0.0005185620422304503, "loss": 1.7859, "step": 21659 }, { "epoch": 0.72, "grad_norm": 0.4322143495082855, "learning_rate": 0.0005185548823654789, "loss": 1.8361, "step": 21660 }, { "epoch": 0.72, "grad_norm": 0.4391261637210846, "learning_rate": 0.0005185477222352141, "loss": 1.8282, "step": 21661 }, { "epoch": 0.72, "grad_norm": 0.5173368453979492, "learning_rate": 0.0005185405618396644, "loss": 1.8531, "step": 21662 }, { "epoch": 0.72, "grad_norm": 0.4099453389644623, "learning_rate": 0.0005185334011788388, "loss": 1.8724, "step": 21663 }, { "epoch": 0.72, "grad_norm": 0.4424230754375458, "learning_rate": 0.0005185262402527457, "loss": 1.8392, "step": 21664 }, { "epoch": 0.72, "grad_norm": 0.437144011259079, "learning_rate": 0.0005185190790613941, "loss": 1.7918, "step": 21665 }, { "epoch": 0.72, "grad_norm": 0.4355512857437134, "learning_rate": 0.0005185119176047925, "loss": 1.8156, "step": 21666 }, { "epoch": 0.72, "grad_norm": 0.4299149215221405, "learning_rate": 0.0005185047558829496, "loss": 1.8255, "step": 21667 }, { "epoch": 0.72, "grad_norm": 0.4799412488937378, "learning_rate": 0.000518497593895874, "loss": 1.8549, "step": 21668 }, { "epoch": 0.72, "grad_norm": 0.4616527557373047, "learning_rate": 0.0005184904316435746, "loss": 1.9024, "step": 21669 }, { "epoch": 0.72, "grad_norm": 0.4356035590171814, "learning_rate": 0.0005184832691260602, "loss": 1.8762, "step": 21670 }, { "epoch": 0.72, "grad_norm": 0.42713579535484314, "learning_rate": 0.0005184761063433392, "loss": 1.848, "step": 21671 }, { "epoch": 0.72, "grad_norm": 0.43859800696372986, "learning_rate": 0.0005184689432954203, "loss": 1.7648, "step": 21672 }, { "epoch": 0.72, "grad_norm": 0.4542383551597595, "learning_rate": 0.0005184617799823124, "loss": 1.809, "step": 21673 }, { "epoch": 0.72, "grad_norm": 0.42758041620254517, "learning_rate": 0.000518454616404024, "loss": 1.8476, "step": 21674 }, { "epoch": 0.72, "grad_norm": 0.4358416199684143, "learning_rate": 0.0005184474525605639, "loss": 1.8091, "step": 21675 }, { "epoch": 0.72, "grad_norm": 0.42620381712913513, "learning_rate": 0.0005184402884519407, "loss": 1.8044, "step": 21676 }, { "epoch": 0.72, "grad_norm": 0.426490843296051, "learning_rate": 0.0005184331240781633, "loss": 1.8747, "step": 21677 }, { "epoch": 0.72, "grad_norm": 0.4221903383731842, "learning_rate": 0.0005184259594392403, "loss": 1.801, "step": 21678 }, { "epoch": 0.72, "grad_norm": 0.4353925287723541, "learning_rate": 0.0005184187945351803, "loss": 1.8736, "step": 21679 }, { "epoch": 0.72, "grad_norm": 0.44731998443603516, "learning_rate": 0.000518411629365992, "loss": 1.8427, "step": 21680 }, { "epoch": 0.72, "grad_norm": 0.43698516488075256, "learning_rate": 0.0005184044639316843, "loss": 1.8212, "step": 21681 }, { "epoch": 0.72, "grad_norm": 0.42907607555389404, "learning_rate": 0.0005183972982322657, "loss": 1.778, "step": 21682 }, { "epoch": 0.72, "grad_norm": 0.44614189863204956, "learning_rate": 0.0005183901322677449, "loss": 1.8608, "step": 21683 }, { "epoch": 0.72, "grad_norm": 0.43164220452308655, "learning_rate": 0.0005183829660381307, "loss": 1.7417, "step": 21684 }, { "epoch": 0.72, "grad_norm": 0.4395604431629181, "learning_rate": 0.0005183757995434319, "loss": 1.8105, "step": 21685 }, { "epoch": 0.72, "grad_norm": 0.42319926619529724, "learning_rate": 0.0005183686327836568, "loss": 1.7784, "step": 21686 }, { "epoch": 0.72, "grad_norm": 0.4682675302028656, "learning_rate": 0.0005183614657588145, "loss": 1.8051, "step": 21687 }, { "epoch": 0.72, "grad_norm": 0.45733189582824707, "learning_rate": 0.0005183542984689136, "loss": 1.7857, "step": 21688 }, { "epoch": 0.72, "grad_norm": 0.4356701076030731, "learning_rate": 0.0005183471309139627, "loss": 1.7455, "step": 21689 }, { "epoch": 0.72, "grad_norm": 0.4527234435081482, "learning_rate": 0.0005183399630939705, "loss": 1.9001, "step": 21690 }, { "epoch": 0.72, "grad_norm": 0.4482981264591217, "learning_rate": 0.0005183327950089458, "loss": 1.9009, "step": 21691 }, { "epoch": 0.72, "grad_norm": 0.4403637647628784, "learning_rate": 0.0005183256266588972, "loss": 1.7968, "step": 21692 }, { "epoch": 0.72, "grad_norm": 0.4516003131866455, "learning_rate": 0.0005183184580438336, "loss": 1.8737, "step": 21693 }, { "epoch": 0.72, "grad_norm": 0.44147157669067383, "learning_rate": 0.0005183112891637634, "loss": 1.8761, "step": 21694 }, { "epoch": 0.72, "grad_norm": 0.4338710606098175, "learning_rate": 0.0005183041200186955, "loss": 1.812, "step": 21695 }, { "epoch": 0.72, "grad_norm": 0.4428546130657196, "learning_rate": 0.0005182969506086387, "loss": 1.9112, "step": 21696 }, { "epoch": 0.72, "grad_norm": 0.4475468695163727, "learning_rate": 0.0005182897809336014, "loss": 1.7957, "step": 21697 }, { "epoch": 0.72, "grad_norm": 0.4283246397972107, "learning_rate": 0.0005182826109935925, "loss": 1.8413, "step": 21698 }, { "epoch": 0.72, "grad_norm": 0.4363339841365814, "learning_rate": 0.0005182754407886208, "loss": 1.8892, "step": 21699 }, { "epoch": 0.72, "grad_norm": 0.43571236729621887, "learning_rate": 0.0005182682703186947, "loss": 1.8898, "step": 21700 }, { "epoch": 0.72, "grad_norm": 0.4134293794631958, "learning_rate": 0.0005182610995838232, "loss": 1.8802, "step": 21701 }, { "epoch": 0.72, "grad_norm": 0.4210169017314911, "learning_rate": 0.0005182539285840149, "loss": 1.8735, "step": 21702 }, { "epoch": 0.72, "grad_norm": 0.43445730209350586, "learning_rate": 0.0005182467573192785, "loss": 1.8463, "step": 21703 }, { "epoch": 0.72, "grad_norm": 0.42513301968574524, "learning_rate": 0.0005182395857896225, "loss": 1.8277, "step": 21704 }, { "epoch": 0.72, "grad_norm": 0.4312775433063507, "learning_rate": 0.000518232413995056, "loss": 1.82, "step": 21705 }, { "epoch": 0.72, "grad_norm": 0.4147627651691437, "learning_rate": 0.0005182252419355873, "loss": 1.7599, "step": 21706 }, { "epoch": 0.72, "grad_norm": 0.4246074855327606, "learning_rate": 0.0005182180696112255, "loss": 1.8277, "step": 21707 }, { "epoch": 0.72, "grad_norm": 0.4356321692466736, "learning_rate": 0.0005182108970219791, "loss": 1.875, "step": 21708 }, { "epoch": 0.72, "grad_norm": 0.4608955383300781, "learning_rate": 0.0005182037241678568, "loss": 1.7999, "step": 21709 }, { "epoch": 0.72, "grad_norm": 0.4274962842464447, "learning_rate": 0.0005181965510488673, "loss": 1.8438, "step": 21710 }, { "epoch": 0.72, "grad_norm": 0.49983811378479004, "learning_rate": 0.0005181893776650193, "loss": 1.8441, "step": 21711 }, { "epoch": 0.72, "grad_norm": 0.4574068784713745, "learning_rate": 0.0005181822040163216, "loss": 1.801, "step": 21712 }, { "epoch": 0.72, "grad_norm": 0.45583340525627136, "learning_rate": 0.000518175030102783, "loss": 1.8675, "step": 21713 }, { "epoch": 0.72, "grad_norm": 0.447760671377182, "learning_rate": 0.0005181678559244118, "loss": 1.8779, "step": 21714 }, { "epoch": 0.72, "grad_norm": 0.4130687415599823, "learning_rate": 0.0005181606814812171, "loss": 1.7948, "step": 21715 }, { "epoch": 0.72, "grad_norm": 0.5860568284988403, "learning_rate": 0.0005181535067732074, "loss": 1.8653, "step": 21716 }, { "epoch": 0.72, "grad_norm": 0.43951642513275146, "learning_rate": 0.0005181463318003916, "loss": 1.8088, "step": 21717 }, { "epoch": 0.72, "grad_norm": 0.43353185057640076, "learning_rate": 0.0005181391565627783, "loss": 1.8158, "step": 21718 }, { "epoch": 0.72, "grad_norm": 0.47018054127693176, "learning_rate": 0.0005181319810603761, "loss": 1.8042, "step": 21719 }, { "epoch": 0.72, "grad_norm": 0.45137298107147217, "learning_rate": 0.0005181248052931938, "loss": 1.9154, "step": 21720 }, { "epoch": 0.72, "grad_norm": 0.4344518482685089, "learning_rate": 0.0005181176292612402, "loss": 1.8237, "step": 21721 }, { "epoch": 0.72, "grad_norm": 0.43761128187179565, "learning_rate": 0.0005181104529645239, "loss": 1.8019, "step": 21722 }, { "epoch": 0.72, "grad_norm": 0.435030072927475, "learning_rate": 0.0005181032764030537, "loss": 1.849, "step": 21723 }, { "epoch": 0.72, "grad_norm": 0.4414038360118866, "learning_rate": 0.0005180960995768381, "loss": 1.8325, "step": 21724 }, { "epoch": 0.72, "grad_norm": 0.43038731813430786, "learning_rate": 0.0005180889224858862, "loss": 1.8892, "step": 21725 }, { "epoch": 0.72, "grad_norm": 0.43373024463653564, "learning_rate": 0.0005180817451302064, "loss": 1.8376, "step": 21726 }, { "epoch": 0.72, "grad_norm": 0.42170023918151855, "learning_rate": 0.0005180745675098075, "loss": 1.76, "step": 21727 }, { "epoch": 0.72, "grad_norm": 0.4452226459980011, "learning_rate": 0.000518067389624698, "loss": 1.7632, "step": 21728 }, { "epoch": 0.72, "grad_norm": 0.4343259036540985, "learning_rate": 0.0005180602114748872, "loss": 1.7386, "step": 21729 }, { "epoch": 0.72, "grad_norm": 0.4376579821109772, "learning_rate": 0.0005180530330603831, "loss": 1.7584, "step": 21730 }, { "epoch": 0.72, "grad_norm": 0.43215566873550415, "learning_rate": 0.000518045854381195, "loss": 1.8599, "step": 21731 }, { "epoch": 0.72, "grad_norm": 0.4341966509819031, "learning_rate": 0.0005180386754373311, "loss": 1.7667, "step": 21732 }, { "epoch": 0.72, "grad_norm": 0.42800119519233704, "learning_rate": 0.0005180314962288006, "loss": 1.7552, "step": 21733 }, { "epoch": 0.72, "grad_norm": 0.42972785234451294, "learning_rate": 0.0005180243167556118, "loss": 1.889, "step": 21734 }, { "epoch": 0.72, "grad_norm": 0.4531661570072174, "learning_rate": 0.0005180171370177737, "loss": 1.8569, "step": 21735 }, { "epoch": 0.72, "grad_norm": 0.4388469457626343, "learning_rate": 0.000518009957015295, "loss": 1.8021, "step": 21736 }, { "epoch": 0.72, "grad_norm": 0.43195661902427673, "learning_rate": 0.0005180027767481843, "loss": 1.7821, "step": 21737 }, { "epoch": 0.72, "grad_norm": 0.4449574649333954, "learning_rate": 0.0005179955962164503, "loss": 1.7818, "step": 21738 }, { "epoch": 0.72, "grad_norm": 0.4426048994064331, "learning_rate": 0.0005179884154201017, "loss": 1.7924, "step": 21739 }, { "epoch": 0.72, "grad_norm": 0.434927374124527, "learning_rate": 0.0005179812343591474, "loss": 1.8556, "step": 21740 }, { "epoch": 0.72, "grad_norm": 0.45036786794662476, "learning_rate": 0.0005179740530335959, "loss": 1.8803, "step": 21741 }, { "epoch": 0.72, "grad_norm": 0.4474732279777527, "learning_rate": 0.000517966871443456, "loss": 1.8161, "step": 21742 }, { "epoch": 0.72, "grad_norm": 0.4639265239238739, "learning_rate": 0.0005179596895887366, "loss": 1.8817, "step": 21743 }, { "epoch": 0.72, "grad_norm": 0.4334072470664978, "learning_rate": 0.0005179525074694461, "loss": 1.8173, "step": 21744 }, { "epoch": 0.72, "grad_norm": 0.46875521540641785, "learning_rate": 0.0005179453250855934, "loss": 1.8136, "step": 21745 }, { "epoch": 0.72, "grad_norm": 0.44786694645881653, "learning_rate": 0.0005179381424371873, "loss": 1.8412, "step": 21746 }, { "epoch": 0.72, "grad_norm": 0.4369478225708008, "learning_rate": 0.0005179309595242363, "loss": 1.7928, "step": 21747 }, { "epoch": 0.72, "grad_norm": 0.4365237355232239, "learning_rate": 0.0005179237763467493, "loss": 1.7915, "step": 21748 }, { "epoch": 0.72, "grad_norm": 0.43401214480400085, "learning_rate": 0.0005179165929047348, "loss": 1.8187, "step": 21749 }, { "epoch": 0.72, "grad_norm": 0.446250855922699, "learning_rate": 0.0005179094091982019, "loss": 1.8221, "step": 21750 }, { "epoch": 0.72, "grad_norm": 0.4508137106895447, "learning_rate": 0.0005179022252271589, "loss": 1.8014, "step": 21751 }, { "epoch": 0.72, "grad_norm": 0.4176098108291626, "learning_rate": 0.0005178950409916147, "loss": 1.7892, "step": 21752 }, { "epoch": 0.72, "grad_norm": 0.43086501955986023, "learning_rate": 0.000517887856491578, "loss": 1.7853, "step": 21753 }, { "epoch": 0.72, "grad_norm": 0.446670800447464, "learning_rate": 0.0005178806717270576, "loss": 1.7956, "step": 21754 }, { "epoch": 0.72, "grad_norm": 0.42234015464782715, "learning_rate": 0.0005178734866980622, "loss": 1.7759, "step": 21755 }, { "epoch": 0.72, "grad_norm": 0.43395841121673584, "learning_rate": 0.0005178663014046006, "loss": 1.7736, "step": 21756 }, { "epoch": 0.72, "grad_norm": 0.43108606338500977, "learning_rate": 0.0005178591158466813, "loss": 1.8663, "step": 21757 }, { "epoch": 0.72, "grad_norm": 0.43748512864112854, "learning_rate": 0.000517851930024313, "loss": 1.8443, "step": 21758 }, { "epoch": 0.72, "grad_norm": 0.47329825162887573, "learning_rate": 0.0005178447439375047, "loss": 1.8929, "step": 21759 }, { "epoch": 0.72, "grad_norm": 0.43828293681144714, "learning_rate": 0.000517837557586265, "loss": 1.8166, "step": 21760 }, { "epoch": 0.72, "grad_norm": 0.43890610337257385, "learning_rate": 0.0005178303709706025, "loss": 1.8807, "step": 21761 }, { "epoch": 0.72, "grad_norm": 0.44799143075942993, "learning_rate": 0.0005178231840905261, "loss": 1.7824, "step": 21762 }, { "epoch": 0.72, "grad_norm": 0.42364364862442017, "learning_rate": 0.0005178159969460444, "loss": 1.7941, "step": 21763 }, { "epoch": 0.72, "grad_norm": 0.431962788105011, "learning_rate": 0.0005178088095371663, "loss": 1.8571, "step": 21764 }, { "epoch": 0.72, "grad_norm": 0.43697842955589294, "learning_rate": 0.0005178016218639002, "loss": 1.9403, "step": 21765 }, { "epoch": 0.72, "grad_norm": 0.45038357377052307, "learning_rate": 0.0005177944339262552, "loss": 1.8197, "step": 21766 }, { "epoch": 0.72, "grad_norm": 0.4457504451274872, "learning_rate": 0.0005177872457242398, "loss": 1.8694, "step": 21767 }, { "epoch": 0.72, "grad_norm": 0.45071178674697876, "learning_rate": 0.0005177800572578626, "loss": 1.9345, "step": 21768 }, { "epoch": 0.72, "grad_norm": 0.45914119482040405, "learning_rate": 0.0005177728685271328, "loss": 1.945, "step": 21769 }, { "epoch": 0.72, "grad_norm": 0.4253150522708893, "learning_rate": 0.0005177656795320587, "loss": 1.8536, "step": 21770 }, { "epoch": 0.72, "grad_norm": 0.4301612675189972, "learning_rate": 0.000517758490272649, "loss": 1.8302, "step": 21771 }, { "epoch": 0.72, "grad_norm": 0.43868622183799744, "learning_rate": 0.0005177513007489127, "loss": 1.8077, "step": 21772 }, { "epoch": 0.72, "grad_norm": 0.43476781249046326, "learning_rate": 0.0005177441109608584, "loss": 1.7597, "step": 21773 }, { "epoch": 0.72, "grad_norm": 0.44347137212753296, "learning_rate": 0.0005177369209084948, "loss": 1.8829, "step": 21774 }, { "epoch": 0.72, "grad_norm": 0.44829732179641724, "learning_rate": 0.0005177297305918308, "loss": 1.8005, "step": 21775 }, { "epoch": 0.72, "grad_norm": 0.7656357288360596, "learning_rate": 0.0005177225400108749, "loss": 1.8368, "step": 21776 }, { "epoch": 0.72, "grad_norm": 0.4299716055393219, "learning_rate": 0.0005177153491656358, "loss": 1.7836, "step": 21777 }, { "epoch": 0.72, "grad_norm": 0.43222105503082275, "learning_rate": 0.0005177081580561225, "loss": 1.8746, "step": 21778 }, { "epoch": 0.72, "grad_norm": 0.43589910864830017, "learning_rate": 0.0005177009666823435, "loss": 1.7629, "step": 21779 }, { "epoch": 0.72, "grad_norm": 0.4308894872665405, "learning_rate": 0.0005176937750443078, "loss": 1.8679, "step": 21780 }, { "epoch": 0.72, "grad_norm": 0.4411970376968384, "learning_rate": 0.0005176865831420236, "loss": 1.8279, "step": 21781 }, { "epoch": 0.72, "grad_norm": 0.4204850494861603, "learning_rate": 0.0005176793909755002, "loss": 1.7875, "step": 21782 }, { "epoch": 0.72, "grad_norm": 0.4394345283508301, "learning_rate": 0.0005176721985447459, "loss": 1.8447, "step": 21783 }, { "epoch": 0.72, "grad_norm": 0.43362095952033997, "learning_rate": 0.0005176650058497698, "loss": 1.8819, "step": 21784 }, { "epoch": 0.72, "grad_norm": 0.42932450771331787, "learning_rate": 0.0005176578128905804, "loss": 1.8246, "step": 21785 }, { "epoch": 0.72, "grad_norm": 0.42815712094306946, "learning_rate": 0.0005176506196671864, "loss": 1.8792, "step": 21786 }, { "epoch": 0.72, "grad_norm": 0.418861985206604, "learning_rate": 0.0005176434261795967, "loss": 1.7661, "step": 21787 }, { "epoch": 0.72, "grad_norm": 0.4302656948566437, "learning_rate": 0.00051763623242782, "loss": 1.8814, "step": 21788 }, { "epoch": 0.72, "grad_norm": 0.4365486800670624, "learning_rate": 0.0005176290384118649, "loss": 1.7952, "step": 21789 }, { "epoch": 0.72, "grad_norm": 0.4434650242328644, "learning_rate": 0.0005176218441317402, "loss": 1.8742, "step": 21790 }, { "epoch": 0.72, "grad_norm": 0.4426707923412323, "learning_rate": 0.0005176146495874547, "loss": 1.8403, "step": 21791 }, { "epoch": 0.73, "grad_norm": 0.4286908805370331, "learning_rate": 0.0005176074547790171, "loss": 1.8263, "step": 21792 }, { "epoch": 0.73, "grad_norm": 0.43214064836502075, "learning_rate": 0.000517600259706436, "loss": 1.853, "step": 21793 }, { "epoch": 0.73, "grad_norm": 0.43893224000930786, "learning_rate": 0.0005175930643697204, "loss": 1.8293, "step": 21794 }, { "epoch": 0.73, "grad_norm": 0.42809224128723145, "learning_rate": 0.0005175858687688788, "loss": 1.7999, "step": 21795 }, { "epoch": 0.73, "grad_norm": 0.4428248405456543, "learning_rate": 0.00051757867290392, "loss": 1.8319, "step": 21796 }, { "epoch": 0.73, "grad_norm": 0.4405048191547394, "learning_rate": 0.0005175714767748528, "loss": 1.8671, "step": 21797 }, { "epoch": 0.73, "grad_norm": 0.43474164605140686, "learning_rate": 0.0005175642803816858, "loss": 1.8991, "step": 21798 }, { "epoch": 0.73, "grad_norm": 0.44436657428741455, "learning_rate": 0.0005175570837244279, "loss": 1.8386, "step": 21799 }, { "epoch": 0.73, "grad_norm": 0.4213885962963104, "learning_rate": 0.0005175498868030876, "loss": 1.8265, "step": 21800 }, { "epoch": 0.73, "grad_norm": 0.4457772970199585, "learning_rate": 0.000517542689617674, "loss": 1.8074, "step": 21801 }, { "epoch": 0.73, "grad_norm": 0.42230722308158875, "learning_rate": 0.0005175354921681956, "loss": 1.7947, "step": 21802 }, { "epoch": 0.73, "grad_norm": 0.433265745639801, "learning_rate": 0.0005175282944546611, "loss": 1.8128, "step": 21803 }, { "epoch": 0.73, "grad_norm": 0.43377241492271423, "learning_rate": 0.0005175210964770793, "loss": 1.7761, "step": 21804 }, { "epoch": 0.73, "grad_norm": 0.43908798694610596, "learning_rate": 0.0005175138982354589, "loss": 1.9096, "step": 21805 }, { "epoch": 0.73, "grad_norm": 0.43557849526405334, "learning_rate": 0.0005175066997298088, "loss": 1.8262, "step": 21806 }, { "epoch": 0.73, "grad_norm": 0.4276167154312134, "learning_rate": 0.0005174995009601376, "loss": 1.7941, "step": 21807 }, { "epoch": 0.73, "grad_norm": 0.44604918360710144, "learning_rate": 0.0005174923019264539, "loss": 1.8723, "step": 21808 }, { "epoch": 0.73, "grad_norm": 0.43340781331062317, "learning_rate": 0.0005174851026287667, "loss": 1.8304, "step": 21809 }, { "epoch": 0.73, "grad_norm": 0.41720259189605713, "learning_rate": 0.0005174779030670847, "loss": 1.8374, "step": 21810 }, { "epoch": 0.73, "grad_norm": 0.44357478618621826, "learning_rate": 0.0005174707032414164, "loss": 1.7787, "step": 21811 }, { "epoch": 0.73, "grad_norm": 0.42712029814720154, "learning_rate": 0.0005174635031517708, "loss": 1.8381, "step": 21812 }, { "epoch": 0.73, "grad_norm": 0.4344443678855896, "learning_rate": 0.0005174563027981566, "loss": 1.8427, "step": 21813 }, { "epoch": 0.73, "grad_norm": 0.4221883714199066, "learning_rate": 0.0005174491021805825, "loss": 1.8709, "step": 21814 }, { "epoch": 0.73, "grad_norm": 0.43273288011550903, "learning_rate": 0.0005174419012990571, "loss": 1.7498, "step": 21815 }, { "epoch": 0.73, "grad_norm": 0.4271000623703003, "learning_rate": 0.0005174347001535893, "loss": 1.7994, "step": 21816 }, { "epoch": 0.73, "grad_norm": 0.4358873665332794, "learning_rate": 0.0005174274987441879, "loss": 1.8148, "step": 21817 }, { "epoch": 0.73, "grad_norm": 0.42542076110839844, "learning_rate": 0.0005174202970708615, "loss": 1.8451, "step": 21818 }, { "epoch": 0.73, "grad_norm": 0.4387345314025879, "learning_rate": 0.0005174130951336187, "loss": 1.7844, "step": 21819 }, { "epoch": 0.73, "grad_norm": 0.42665985226631165, "learning_rate": 0.0005174058929324688, "loss": 1.8417, "step": 21820 }, { "epoch": 0.73, "grad_norm": 0.45739495754241943, "learning_rate": 0.00051739869046742, "loss": 1.7981, "step": 21821 }, { "epoch": 0.73, "grad_norm": 0.5010709166526794, "learning_rate": 0.0005173914877384812, "loss": 1.8082, "step": 21822 }, { "epoch": 0.73, "grad_norm": 0.4249192178249359, "learning_rate": 0.0005173842847456612, "loss": 1.7796, "step": 21823 }, { "epoch": 0.73, "grad_norm": 0.437040239572525, "learning_rate": 0.0005173770814889687, "loss": 1.8314, "step": 21824 }, { "epoch": 0.73, "grad_norm": 0.4591888189315796, "learning_rate": 0.0005173698779684125, "loss": 1.935, "step": 21825 }, { "epoch": 0.73, "grad_norm": 0.4551887512207031, "learning_rate": 0.0005173626741840012, "loss": 1.8163, "step": 21826 }, { "epoch": 0.73, "grad_norm": 0.4171202480792999, "learning_rate": 0.0005173554701357438, "loss": 1.8177, "step": 21827 }, { "epoch": 0.73, "grad_norm": 0.4231874346733093, "learning_rate": 0.0005173482658236487, "loss": 1.8959, "step": 21828 }, { "epoch": 0.73, "grad_norm": 0.42821794748306274, "learning_rate": 0.0005173410612477249, "loss": 1.8268, "step": 21829 }, { "epoch": 0.73, "grad_norm": 0.4393225610256195, "learning_rate": 0.0005173338564079811, "loss": 1.7725, "step": 21830 }, { "epoch": 0.73, "grad_norm": 0.4455973505973816, "learning_rate": 0.000517326651304426, "loss": 1.8473, "step": 21831 }, { "epoch": 0.73, "grad_norm": 0.42550307512283325, "learning_rate": 0.0005173194459370683, "loss": 1.8582, "step": 21832 }, { "epoch": 0.73, "grad_norm": 0.4245850443840027, "learning_rate": 0.000517312240305917, "loss": 1.9165, "step": 21833 }, { "epoch": 0.73, "grad_norm": 0.44312751293182373, "learning_rate": 0.0005173050344109805, "loss": 1.7181, "step": 21834 }, { "epoch": 0.73, "grad_norm": 0.4424593150615692, "learning_rate": 0.0005172978282522678, "loss": 1.8358, "step": 21835 }, { "epoch": 0.73, "grad_norm": 0.4298592209815979, "learning_rate": 0.0005172906218297874, "loss": 1.7925, "step": 21836 }, { "epoch": 0.73, "grad_norm": 0.4285754859447479, "learning_rate": 0.0005172834151435483, "loss": 1.8247, "step": 21837 }, { "epoch": 0.73, "grad_norm": 0.4429777264595032, "learning_rate": 0.0005172762081935593, "loss": 1.9182, "step": 21838 }, { "epoch": 0.73, "grad_norm": 0.5113509893417358, "learning_rate": 0.0005172690009798288, "loss": 1.8749, "step": 21839 }, { "epoch": 0.73, "grad_norm": 0.4286421537399292, "learning_rate": 0.0005172617935023658, "loss": 1.8349, "step": 21840 }, { "epoch": 0.73, "grad_norm": 0.46037399768829346, "learning_rate": 0.000517254585761179, "loss": 1.8051, "step": 21841 }, { "epoch": 0.73, "grad_norm": 0.4412227272987366, "learning_rate": 0.0005172473777562771, "loss": 1.8104, "step": 21842 }, { "epoch": 0.73, "grad_norm": 0.43624791502952576, "learning_rate": 0.000517240169487669, "loss": 1.8454, "step": 21843 }, { "epoch": 0.73, "grad_norm": 0.4371053874492645, "learning_rate": 0.0005172329609553632, "loss": 1.8878, "step": 21844 }, { "epoch": 0.73, "grad_norm": 0.42666739225387573, "learning_rate": 0.0005172257521593687, "loss": 1.785, "step": 21845 }, { "epoch": 0.73, "grad_norm": 0.433574914932251, "learning_rate": 0.0005172185430996942, "loss": 1.8333, "step": 21846 }, { "epoch": 0.73, "grad_norm": 0.44306480884552, "learning_rate": 0.0005172113337763482, "loss": 1.8601, "step": 21847 }, { "epoch": 0.73, "grad_norm": 0.4464474320411682, "learning_rate": 0.0005172041241893398, "loss": 1.8249, "step": 21848 }, { "epoch": 0.73, "grad_norm": 0.42646417021751404, "learning_rate": 0.0005171969143386775, "loss": 1.8034, "step": 21849 }, { "epoch": 0.73, "grad_norm": 0.42594218254089355, "learning_rate": 0.0005171897042243703, "loss": 1.8335, "step": 21850 }, { "epoch": 0.73, "grad_norm": 0.4550638496875763, "learning_rate": 0.0005171824938464267, "loss": 1.9181, "step": 21851 }, { "epoch": 0.73, "grad_norm": 0.45130205154418945, "learning_rate": 0.0005171752832048554, "loss": 1.8227, "step": 21852 }, { "epoch": 0.73, "grad_norm": 0.42792457342147827, "learning_rate": 0.0005171680722996655, "loss": 1.812, "step": 21853 }, { "epoch": 0.73, "grad_norm": 0.4531056880950928, "learning_rate": 0.0005171608611308655, "loss": 1.8713, "step": 21854 }, { "epoch": 0.73, "grad_norm": 0.4235857129096985, "learning_rate": 0.0005171536496984643, "loss": 1.8198, "step": 21855 }, { "epoch": 0.73, "grad_norm": 0.454805850982666, "learning_rate": 0.0005171464380024705, "loss": 1.8456, "step": 21856 }, { "epoch": 0.73, "grad_norm": 0.432285338640213, "learning_rate": 0.0005171392260428929, "loss": 1.8169, "step": 21857 }, { "epoch": 0.73, "grad_norm": 0.4398694336414337, "learning_rate": 0.0005171320138197403, "loss": 1.8252, "step": 21858 }, { "epoch": 0.73, "grad_norm": 0.45757725834846497, "learning_rate": 0.0005171248013330214, "loss": 1.7616, "step": 21859 }, { "epoch": 0.73, "grad_norm": 0.44099095463752747, "learning_rate": 0.0005171175885827449, "loss": 1.7633, "step": 21860 }, { "epoch": 0.73, "grad_norm": 0.4313531816005707, "learning_rate": 0.0005171103755689198, "loss": 1.7605, "step": 21861 }, { "epoch": 0.73, "grad_norm": 0.4653591811656952, "learning_rate": 0.0005171031622915546, "loss": 1.8044, "step": 21862 }, { "epoch": 0.73, "grad_norm": 0.492756187915802, "learning_rate": 0.0005170959487506582, "loss": 1.7936, "step": 21863 }, { "epoch": 0.73, "grad_norm": 0.8512386679649353, "learning_rate": 0.0005170887349462392, "loss": 1.9109, "step": 21864 }, { "epoch": 0.73, "grad_norm": 0.4213694930076599, "learning_rate": 0.0005170815208783067, "loss": 1.8014, "step": 21865 }, { "epoch": 0.73, "grad_norm": 0.4272633492946625, "learning_rate": 0.000517074306546869, "loss": 1.763, "step": 21866 }, { "epoch": 0.73, "grad_norm": 0.4532919228076935, "learning_rate": 0.0005170670919519352, "loss": 1.8476, "step": 21867 }, { "epoch": 0.73, "grad_norm": 0.45231351256370544, "learning_rate": 0.0005170598770935139, "loss": 1.7218, "step": 21868 }, { "epoch": 0.73, "grad_norm": 0.42153239250183105, "learning_rate": 0.0005170526619716138, "loss": 1.8272, "step": 21869 }, { "epoch": 0.73, "grad_norm": 0.4383576810359955, "learning_rate": 0.0005170454465862438, "loss": 1.7928, "step": 21870 }, { "epoch": 0.73, "grad_norm": 0.4713294804096222, "learning_rate": 0.0005170382309374126, "loss": 1.7566, "step": 21871 }, { "epoch": 0.73, "grad_norm": 0.4451095163822174, "learning_rate": 0.000517031015025129, "loss": 1.8709, "step": 21872 }, { "epoch": 0.73, "grad_norm": 0.4430522918701172, "learning_rate": 0.0005170237988494018, "loss": 1.8367, "step": 21873 }, { "epoch": 0.73, "grad_norm": 0.4219326972961426, "learning_rate": 0.0005170165824102395, "loss": 1.8818, "step": 21874 }, { "epoch": 0.73, "grad_norm": 0.42985329031944275, "learning_rate": 0.0005170093657076511, "loss": 1.8803, "step": 21875 }, { "epoch": 0.73, "grad_norm": 0.44460996985435486, "learning_rate": 0.0005170021487416454, "loss": 1.746, "step": 21876 }, { "epoch": 0.73, "grad_norm": 0.42145007848739624, "learning_rate": 0.0005169949315122311, "loss": 1.8514, "step": 21877 }, { "epoch": 0.73, "grad_norm": 0.42517217993736267, "learning_rate": 0.0005169877140194167, "loss": 1.798, "step": 21878 }, { "epoch": 0.73, "grad_norm": 0.43558889627456665, "learning_rate": 0.0005169804962632114, "loss": 1.8281, "step": 21879 }, { "epoch": 0.73, "grad_norm": 0.42459261417388916, "learning_rate": 0.0005169732782436238, "loss": 1.7848, "step": 21880 }, { "epoch": 0.73, "grad_norm": 0.4204125702381134, "learning_rate": 0.0005169660599606624, "loss": 1.799, "step": 21881 }, { "epoch": 0.73, "grad_norm": 0.4466122090816498, "learning_rate": 0.0005169588414143362, "loss": 1.9174, "step": 21882 }, { "epoch": 0.73, "grad_norm": 0.45455309748649597, "learning_rate": 0.000516951622604654, "loss": 1.8382, "step": 21883 }, { "epoch": 0.73, "grad_norm": 0.43407508730888367, "learning_rate": 0.0005169444035316245, "loss": 1.7718, "step": 21884 }, { "epoch": 0.73, "grad_norm": 0.4227302372455597, "learning_rate": 0.0005169371841952565, "loss": 1.8139, "step": 21885 }, { "epoch": 0.73, "grad_norm": 0.434763103723526, "learning_rate": 0.0005169299645955585, "loss": 1.828, "step": 21886 }, { "epoch": 0.73, "grad_norm": 0.4529958665370941, "learning_rate": 0.0005169227447325398, "loss": 1.8262, "step": 21887 }, { "epoch": 0.73, "grad_norm": 0.4484739601612091, "learning_rate": 0.0005169155246062087, "loss": 1.8155, "step": 21888 }, { "epoch": 0.73, "grad_norm": 0.4115532636642456, "learning_rate": 0.0005169083042165741, "loss": 1.7857, "step": 21889 }, { "epoch": 0.73, "grad_norm": 0.4288921356201172, "learning_rate": 0.0005169010835636449, "loss": 1.859, "step": 21890 }, { "epoch": 0.73, "grad_norm": 0.43863800168037415, "learning_rate": 0.0005168938626474295, "loss": 1.8812, "step": 21891 }, { "epoch": 0.73, "grad_norm": 0.43410760164260864, "learning_rate": 0.0005168866414679371, "loss": 1.8406, "step": 21892 }, { "epoch": 0.73, "grad_norm": 0.4160363972187042, "learning_rate": 0.0005168794200251762, "loss": 1.7956, "step": 21893 }, { "epoch": 0.73, "grad_norm": 0.43412256240844727, "learning_rate": 0.0005168721983191557, "loss": 1.7832, "step": 21894 }, { "epoch": 0.73, "grad_norm": 0.4316442608833313, "learning_rate": 0.0005168649763498842, "loss": 1.855, "step": 21895 }, { "epoch": 0.73, "grad_norm": 0.43374794721603394, "learning_rate": 0.0005168577541173707, "loss": 1.7453, "step": 21896 }, { "epoch": 0.73, "grad_norm": 0.434969037771225, "learning_rate": 0.0005168505316216238, "loss": 1.8012, "step": 21897 }, { "epoch": 0.73, "grad_norm": 0.42668500542640686, "learning_rate": 0.0005168433088626524, "loss": 1.8117, "step": 21898 }, { "epoch": 0.73, "grad_norm": 0.4370132088661194, "learning_rate": 0.000516836085840465, "loss": 1.8349, "step": 21899 }, { "epoch": 0.73, "grad_norm": 0.42217332124710083, "learning_rate": 0.0005168288625550705, "loss": 1.8281, "step": 21900 }, { "epoch": 0.73, "grad_norm": 0.42196500301361084, "learning_rate": 0.0005168216390064778, "loss": 1.8538, "step": 21901 }, { "epoch": 0.73, "grad_norm": 0.42465940117836, "learning_rate": 0.0005168144151946957, "loss": 1.8661, "step": 21902 }, { "epoch": 0.73, "grad_norm": 0.4225635230541229, "learning_rate": 0.0005168071911197327, "loss": 1.9254, "step": 21903 }, { "epoch": 0.73, "grad_norm": 0.45386335253715515, "learning_rate": 0.0005167999667815978, "loss": 1.854, "step": 21904 }, { "epoch": 0.73, "grad_norm": 0.4354526102542877, "learning_rate": 0.0005167927421802996, "loss": 1.7991, "step": 21905 }, { "epoch": 0.73, "grad_norm": 0.4455648362636566, "learning_rate": 0.0005167855173158469, "loss": 1.8384, "step": 21906 }, { "epoch": 0.73, "grad_norm": 0.4339281916618347, "learning_rate": 0.0005167782921882487, "loss": 1.9325, "step": 21907 }, { "epoch": 0.73, "grad_norm": 0.4411880075931549, "learning_rate": 0.0005167710667975134, "loss": 1.875, "step": 21908 }, { "epoch": 0.73, "grad_norm": 0.4502103626728058, "learning_rate": 0.0005167638411436501, "loss": 1.8451, "step": 21909 }, { "epoch": 0.73, "grad_norm": 0.43922802805900574, "learning_rate": 0.0005167566152266674, "loss": 1.8124, "step": 21910 }, { "epoch": 0.73, "grad_norm": 0.423276424407959, "learning_rate": 0.0005167493890465742, "loss": 1.8538, "step": 21911 }, { "epoch": 0.73, "grad_norm": 0.4447159171104431, "learning_rate": 0.000516742162603379, "loss": 1.7786, "step": 21912 }, { "epoch": 0.73, "grad_norm": 0.4460090398788452, "learning_rate": 0.0005167349358970909, "loss": 1.7683, "step": 21913 }, { "epoch": 0.73, "grad_norm": 0.43879029154777527, "learning_rate": 0.0005167277089277185, "loss": 1.8139, "step": 21914 }, { "epoch": 0.73, "grad_norm": 0.43327265977859497, "learning_rate": 0.0005167204816952705, "loss": 1.8174, "step": 21915 }, { "epoch": 0.73, "grad_norm": 0.4401167333126068, "learning_rate": 0.0005167132541997558, "loss": 1.8497, "step": 21916 }, { "epoch": 0.73, "grad_norm": 0.43183913826942444, "learning_rate": 0.0005167060264411833, "loss": 1.7916, "step": 21917 }, { "epoch": 0.73, "grad_norm": 0.4298411011695862, "learning_rate": 0.0005166987984195614, "loss": 1.81, "step": 21918 }, { "epoch": 0.73, "grad_norm": 0.4335043728351593, "learning_rate": 0.0005166915701348991, "loss": 1.7872, "step": 21919 }, { "epoch": 0.73, "grad_norm": 0.42248770594596863, "learning_rate": 0.0005166843415872052, "loss": 1.8763, "step": 21920 }, { "epoch": 0.73, "grad_norm": 0.41511470079421997, "learning_rate": 0.0005166771127764886, "loss": 1.7759, "step": 21921 }, { "epoch": 0.73, "grad_norm": 0.46862104535102844, "learning_rate": 0.0005166698837027578, "loss": 1.7902, "step": 21922 }, { "epoch": 0.73, "grad_norm": 0.4468306005001068, "learning_rate": 0.0005166626543660216, "loss": 1.8688, "step": 21923 }, { "epoch": 0.73, "grad_norm": 0.43521755933761597, "learning_rate": 0.000516655424766289, "loss": 1.8358, "step": 21924 }, { "epoch": 0.73, "grad_norm": 0.447326123714447, "learning_rate": 0.0005166481949035685, "loss": 1.8147, "step": 21925 }, { "epoch": 0.73, "grad_norm": 0.44273850321769714, "learning_rate": 0.000516640964777869, "loss": 1.846, "step": 21926 }, { "epoch": 0.73, "grad_norm": 0.4286278188228607, "learning_rate": 0.0005166337343891994, "loss": 1.7842, "step": 21927 }, { "epoch": 0.73, "grad_norm": 0.43410539627075195, "learning_rate": 0.0005166265037375683, "loss": 1.8, "step": 21928 }, { "epoch": 0.73, "grad_norm": 0.4292392432689667, "learning_rate": 0.0005166192728229846, "loss": 1.8464, "step": 21929 }, { "epoch": 0.73, "grad_norm": 0.42767074704170227, "learning_rate": 0.000516612041645457, "loss": 1.8364, "step": 21930 }, { "epoch": 0.73, "grad_norm": 0.43119940161705017, "learning_rate": 0.0005166048102049943, "loss": 1.9133, "step": 21931 }, { "epoch": 0.73, "grad_norm": 0.437435120344162, "learning_rate": 0.0005165975785016053, "loss": 1.828, "step": 21932 }, { "epoch": 0.73, "grad_norm": 0.43472954630851746, "learning_rate": 0.0005165903465352987, "loss": 1.8567, "step": 21933 }, { "epoch": 0.73, "grad_norm": 0.44040149450302124, "learning_rate": 0.0005165831143060834, "loss": 1.791, "step": 21934 }, { "epoch": 0.73, "grad_norm": 0.42586082220077515, "learning_rate": 0.000516575881813968, "loss": 1.7841, "step": 21935 }, { "epoch": 0.73, "grad_norm": 0.4214228689670563, "learning_rate": 0.0005165686490589614, "loss": 1.8068, "step": 21936 }, { "epoch": 0.73, "grad_norm": 0.4277007281780243, "learning_rate": 0.0005165614160410725, "loss": 1.7902, "step": 21937 }, { "epoch": 0.73, "grad_norm": 0.4327252507209778, "learning_rate": 0.0005165541827603098, "loss": 1.8447, "step": 21938 }, { "epoch": 0.73, "grad_norm": 0.4481295049190521, "learning_rate": 0.0005165469492166824, "loss": 1.7738, "step": 21939 }, { "epoch": 0.73, "grad_norm": 0.4422396719455719, "learning_rate": 0.0005165397154101987, "loss": 1.8154, "step": 21940 }, { "epoch": 0.73, "grad_norm": 0.42041489481925964, "learning_rate": 0.0005165324813408679, "loss": 1.8045, "step": 21941 }, { "epoch": 0.73, "grad_norm": 0.43594732880592346, "learning_rate": 0.0005165252470086984, "loss": 1.7936, "step": 21942 }, { "epoch": 0.73, "grad_norm": 0.4204171299934387, "learning_rate": 0.0005165180124136993, "loss": 1.7737, "step": 21943 }, { "epoch": 0.73, "grad_norm": 0.4412636458873749, "learning_rate": 0.0005165107775558791, "loss": 1.7928, "step": 21944 }, { "epoch": 0.73, "grad_norm": 0.4237067699432373, "learning_rate": 0.0005165035424352469, "loss": 1.7475, "step": 21945 }, { "epoch": 0.73, "grad_norm": 0.4399079382419586, "learning_rate": 0.0005164963070518111, "loss": 1.796, "step": 21946 }, { "epoch": 0.73, "grad_norm": 0.425914466381073, "learning_rate": 0.0005164890714055809, "loss": 1.8447, "step": 21947 }, { "epoch": 0.73, "grad_norm": 0.4352530241012573, "learning_rate": 0.0005164818354965646, "loss": 1.9311, "step": 21948 }, { "epoch": 0.73, "grad_norm": 0.43077221512794495, "learning_rate": 0.0005164745993247714, "loss": 1.8445, "step": 21949 }, { "epoch": 0.73, "grad_norm": 0.44237202405929565, "learning_rate": 0.0005164673628902099, "loss": 1.8789, "step": 21950 }, { "epoch": 0.73, "grad_norm": 0.4564652144908905, "learning_rate": 0.0005164601261928889, "loss": 1.849, "step": 21951 }, { "epoch": 0.73, "grad_norm": 0.42040354013442993, "learning_rate": 0.0005164528892328173, "loss": 1.8203, "step": 21952 }, { "epoch": 0.73, "grad_norm": 0.435030996799469, "learning_rate": 0.0005164456520100037, "loss": 1.8342, "step": 21953 }, { "epoch": 0.73, "grad_norm": 0.41359031200408936, "learning_rate": 0.000516438414524457, "loss": 1.8375, "step": 21954 }, { "epoch": 0.73, "grad_norm": 0.46150466799736023, "learning_rate": 0.000516431176776186, "loss": 1.8303, "step": 21955 }, { "epoch": 0.73, "grad_norm": 0.4389123320579529, "learning_rate": 0.0005164239387651993, "loss": 1.8297, "step": 21956 }, { "epoch": 0.73, "grad_norm": 0.42125236988067627, "learning_rate": 0.000516416700491506, "loss": 1.849, "step": 21957 }, { "epoch": 0.73, "grad_norm": 0.42795756459236145, "learning_rate": 0.0005164094619551145, "loss": 1.8544, "step": 21958 }, { "epoch": 0.73, "grad_norm": 0.41605833172798157, "learning_rate": 0.0005164022231560339, "loss": 1.7785, "step": 21959 }, { "epoch": 0.73, "grad_norm": 0.445089191198349, "learning_rate": 0.000516394984094273, "loss": 1.7824, "step": 21960 }, { "epoch": 0.73, "grad_norm": 0.42300036549568176, "learning_rate": 0.0005163877447698403, "loss": 1.8284, "step": 21961 }, { "epoch": 0.73, "grad_norm": 0.4109185039997101, "learning_rate": 0.0005163805051827449, "loss": 1.847, "step": 21962 }, { "epoch": 0.73, "grad_norm": 0.4271949827671051, "learning_rate": 0.0005163732653329954, "loss": 1.7444, "step": 21963 }, { "epoch": 0.73, "grad_norm": 0.4161792993545532, "learning_rate": 0.0005163660252206005, "loss": 1.808, "step": 21964 }, { "epoch": 0.73, "grad_norm": 0.43706902861595154, "learning_rate": 0.0005163587848455694, "loss": 1.9425, "step": 21965 }, { "epoch": 0.73, "grad_norm": 0.42613494396209717, "learning_rate": 0.0005163515442079104, "loss": 1.8471, "step": 21966 }, { "epoch": 0.73, "grad_norm": 0.4367574155330658, "learning_rate": 0.0005163443033076327, "loss": 1.8105, "step": 21967 }, { "epoch": 0.73, "grad_norm": 0.4566901922225952, "learning_rate": 0.0005163370621447446, "loss": 1.8599, "step": 21968 }, { "epoch": 0.73, "grad_norm": 0.4398126006126404, "learning_rate": 0.0005163298207192554, "loss": 1.8589, "step": 21969 }, { "epoch": 0.73, "grad_norm": 0.43108001351356506, "learning_rate": 0.0005163225790311736, "loss": 1.8515, "step": 21970 }, { "epoch": 0.73, "grad_norm": 0.6801282167434692, "learning_rate": 0.0005163153370805081, "loss": 1.8958, "step": 21971 }, { "epoch": 0.73, "grad_norm": 0.44249072670936584, "learning_rate": 0.0005163080948672676, "loss": 1.8328, "step": 21972 }, { "epoch": 0.73, "grad_norm": 0.4256688952445984, "learning_rate": 0.000516300852391461, "loss": 1.8097, "step": 21973 }, { "epoch": 0.73, "grad_norm": 0.44229310750961304, "learning_rate": 0.000516293609653097, "loss": 1.9038, "step": 21974 }, { "epoch": 0.73, "grad_norm": 0.47172608971595764, "learning_rate": 0.0005162863666521845, "loss": 1.858, "step": 21975 }, { "epoch": 0.73, "grad_norm": 0.4342520833015442, "learning_rate": 0.0005162791233887321, "loss": 1.878, "step": 21976 }, { "epoch": 0.73, "grad_norm": 0.46983569860458374, "learning_rate": 0.0005162718798627489, "loss": 1.8163, "step": 21977 }, { "epoch": 0.73, "grad_norm": 0.4225779175758362, "learning_rate": 0.0005162646360742433, "loss": 1.7636, "step": 21978 }, { "epoch": 0.73, "grad_norm": 0.4327314794063568, "learning_rate": 0.0005162573920232244, "loss": 1.8376, "step": 21979 }, { "epoch": 0.73, "grad_norm": 0.45709890127182007, "learning_rate": 0.0005162501477097008, "loss": 1.7715, "step": 21980 }, { "epoch": 0.73, "grad_norm": 0.46191421151161194, "learning_rate": 0.0005162429031336814, "loss": 1.888, "step": 21981 }, { "epoch": 0.73, "grad_norm": 0.443290650844574, "learning_rate": 0.0005162356582951751, "loss": 1.887, "step": 21982 }, { "epoch": 0.73, "grad_norm": 0.4418594241142273, "learning_rate": 0.0005162284131941905, "loss": 1.8651, "step": 21983 }, { "epoch": 0.73, "grad_norm": 0.45945802330970764, "learning_rate": 0.0005162211678307365, "loss": 1.8278, "step": 21984 }, { "epoch": 0.73, "grad_norm": 0.43834978342056274, "learning_rate": 0.0005162139222048218, "loss": 1.8224, "step": 21985 }, { "epoch": 0.73, "grad_norm": 0.4180302917957306, "learning_rate": 0.0005162066763164553, "loss": 1.8224, "step": 21986 }, { "epoch": 0.73, "grad_norm": 0.44579118490219116, "learning_rate": 0.0005161994301656457, "loss": 1.8572, "step": 21987 }, { "epoch": 0.73, "grad_norm": 0.4310036301612854, "learning_rate": 0.0005161921837524019, "loss": 1.8238, "step": 21988 }, { "epoch": 0.73, "grad_norm": 0.4236229360103607, "learning_rate": 0.0005161849370767326, "loss": 1.8007, "step": 21989 }, { "epoch": 0.73, "grad_norm": 0.42366907000541687, "learning_rate": 0.0005161776901386466, "loss": 1.7808, "step": 21990 }, { "epoch": 0.73, "grad_norm": 0.4387992024421692, "learning_rate": 0.0005161704429381529, "loss": 1.882, "step": 21991 }, { "epoch": 0.73, "grad_norm": 0.43368715047836304, "learning_rate": 0.00051616319547526, "loss": 1.7803, "step": 21992 }, { "epoch": 0.73, "grad_norm": 0.44666987657546997, "learning_rate": 0.0005161559477499768, "loss": 1.8079, "step": 21993 }, { "epoch": 0.73, "grad_norm": 0.4142008423805237, "learning_rate": 0.0005161486997623122, "loss": 1.7689, "step": 21994 }, { "epoch": 0.73, "grad_norm": 0.4252571761608124, "learning_rate": 0.000516141451512275, "loss": 1.8006, "step": 21995 }, { "epoch": 0.73, "grad_norm": 0.42406168580055237, "learning_rate": 0.0005161342029998738, "loss": 1.8351, "step": 21996 }, { "epoch": 0.73, "grad_norm": 0.44200798869132996, "learning_rate": 0.0005161269542251175, "loss": 1.8759, "step": 21997 }, { "epoch": 0.73, "grad_norm": 0.4355272948741913, "learning_rate": 0.0005161197051880151, "loss": 1.8071, "step": 21998 }, { "epoch": 0.73, "grad_norm": 0.44177812337875366, "learning_rate": 0.0005161124558885751, "loss": 1.8192, "step": 21999 }, { "epoch": 0.73, "grad_norm": 0.4102894067764282, "learning_rate": 0.0005161052063268063, "loss": 1.7946, "step": 22000 }, { "epoch": 0.73, "grad_norm": 0.41666215658187866, "learning_rate": 0.0005160979565027178, "loss": 1.8209, "step": 22001 }, { "epoch": 0.73, "grad_norm": 0.42744436860084534, "learning_rate": 0.0005160907064163182, "loss": 1.8901, "step": 22002 }, { "epoch": 0.73, "grad_norm": 0.4411560595035553, "learning_rate": 0.0005160834560676163, "loss": 1.8476, "step": 22003 }, { "epoch": 0.73, "grad_norm": 0.6154453754425049, "learning_rate": 0.0005160762054566209, "loss": 1.8372, "step": 22004 }, { "epoch": 0.73, "grad_norm": 0.4269779920578003, "learning_rate": 0.0005160689545833408, "loss": 1.8885, "step": 22005 }, { "epoch": 0.73, "grad_norm": 0.42429783940315247, "learning_rate": 0.000516061703447785, "loss": 1.9281, "step": 22006 }, { "epoch": 0.73, "grad_norm": 0.4174947440624237, "learning_rate": 0.000516054452049962, "loss": 1.7845, "step": 22007 }, { "epoch": 0.73, "grad_norm": 0.45036306977272034, "learning_rate": 0.0005160472003898806, "loss": 1.8637, "step": 22008 }, { "epoch": 0.73, "grad_norm": 0.41943618655204773, "learning_rate": 0.0005160399484675499, "loss": 1.9062, "step": 22009 }, { "epoch": 0.73, "grad_norm": 0.41372397541999817, "learning_rate": 0.0005160326962829785, "loss": 1.7877, "step": 22010 }, { "epoch": 0.73, "grad_norm": 0.4209025800228119, "learning_rate": 0.0005160254438361752, "loss": 1.8042, "step": 22011 }, { "epoch": 0.73, "grad_norm": 0.426014244556427, "learning_rate": 0.0005160181911271489, "loss": 1.7651, "step": 22012 }, { "epoch": 0.73, "grad_norm": 0.4295835494995117, "learning_rate": 0.0005160109381559083, "loss": 1.8938, "step": 22013 }, { "epoch": 0.73, "grad_norm": 0.42648181319236755, "learning_rate": 0.0005160036849224623, "loss": 1.7686, "step": 22014 }, { "epoch": 0.73, "grad_norm": 0.4566446542739868, "learning_rate": 0.0005159964314268196, "loss": 1.8777, "step": 22015 }, { "epoch": 0.73, "grad_norm": 0.42643871903419495, "learning_rate": 0.000515989177668989, "loss": 1.8221, "step": 22016 }, { "epoch": 0.73, "grad_norm": 0.4686088263988495, "learning_rate": 0.0005159819236489794, "loss": 1.8234, "step": 22017 }, { "epoch": 0.73, "grad_norm": 0.42888134717941284, "learning_rate": 0.0005159746693667997, "loss": 1.8464, "step": 22018 }, { "epoch": 0.73, "grad_norm": 0.4345413148403168, "learning_rate": 0.0005159674148224585, "loss": 1.8341, "step": 22019 }, { "epoch": 0.73, "grad_norm": 0.4385340213775635, "learning_rate": 0.0005159601600159647, "loss": 1.8899, "step": 22020 }, { "epoch": 0.73, "grad_norm": 0.43515288829803467, "learning_rate": 0.000515952904947327, "loss": 1.7959, "step": 22021 }, { "epoch": 0.73, "grad_norm": 0.42964041233062744, "learning_rate": 0.0005159456496165543, "loss": 1.8039, "step": 22022 }, { "epoch": 0.73, "grad_norm": 0.43538570404052734, "learning_rate": 0.0005159383940236555, "loss": 1.8099, "step": 22023 }, { "epoch": 0.73, "grad_norm": 0.426191508769989, "learning_rate": 0.0005159311381686391, "loss": 1.8242, "step": 22024 }, { "epoch": 0.73, "grad_norm": 0.45052000880241394, "learning_rate": 0.0005159238820515143, "loss": 1.7814, "step": 22025 }, { "epoch": 0.73, "grad_norm": 0.4403986930847168, "learning_rate": 0.0005159166256722897, "loss": 1.8354, "step": 22026 }, { "epoch": 0.73, "grad_norm": 0.45375657081604004, "learning_rate": 0.0005159093690309741, "loss": 1.8046, "step": 22027 }, { "epoch": 0.73, "grad_norm": 0.43008580803871155, "learning_rate": 0.0005159021121275764, "loss": 1.8459, "step": 22028 }, { "epoch": 0.73, "grad_norm": 0.4476415514945984, "learning_rate": 0.0005158948549621053, "loss": 1.8415, "step": 22029 }, { "epoch": 0.73, "grad_norm": 0.44123536348342896, "learning_rate": 0.0005158875975345696, "loss": 1.7566, "step": 22030 }, { "epoch": 0.73, "grad_norm": 0.44577640295028687, "learning_rate": 0.0005158803398449782, "loss": 1.8796, "step": 22031 }, { "epoch": 0.73, "grad_norm": 0.4321536421775818, "learning_rate": 0.00051587308189334, "loss": 1.8106, "step": 22032 }, { "epoch": 0.73, "grad_norm": 0.446213960647583, "learning_rate": 0.0005158658236796635, "loss": 1.8417, "step": 22033 }, { "epoch": 0.73, "grad_norm": 0.44583913683891296, "learning_rate": 0.0005158585652039578, "loss": 1.8388, "step": 22034 }, { "epoch": 0.73, "grad_norm": 0.43125978112220764, "learning_rate": 0.0005158513064662315, "loss": 1.8491, "step": 22035 }, { "epoch": 0.73, "grad_norm": 0.46267759799957275, "learning_rate": 0.0005158440474664937, "loss": 1.9152, "step": 22036 }, { "epoch": 0.73, "grad_norm": 0.43758711218833923, "learning_rate": 0.000515836788204753, "loss": 1.8454, "step": 22037 }, { "epoch": 0.73, "grad_norm": 0.44005706906318665, "learning_rate": 0.0005158295286810181, "loss": 1.8279, "step": 22038 }, { "epoch": 0.73, "grad_norm": 0.42237553000450134, "learning_rate": 0.000515822268895298, "loss": 1.8452, "step": 22039 }, { "epoch": 0.73, "grad_norm": 0.4432384669780731, "learning_rate": 0.0005158150088476015, "loss": 1.8902, "step": 22040 }, { "epoch": 0.73, "grad_norm": 0.4368032217025757, "learning_rate": 0.0005158077485379374, "loss": 1.8148, "step": 22041 }, { "epoch": 0.73, "grad_norm": 0.42406928539276123, "learning_rate": 0.0005158004879663145, "loss": 1.9095, "step": 22042 }, { "epoch": 0.73, "grad_norm": 0.44756564497947693, "learning_rate": 0.0005157932271327415, "loss": 1.8186, "step": 22043 }, { "epoch": 0.73, "grad_norm": 0.4506782591342926, "learning_rate": 0.0005157859660372274, "loss": 1.8841, "step": 22044 }, { "epoch": 0.73, "grad_norm": 0.45817241072654724, "learning_rate": 0.0005157787046797809, "loss": 1.7122, "step": 22045 }, { "epoch": 0.73, "grad_norm": 0.4282384216785431, "learning_rate": 0.0005157714430604109, "loss": 1.7942, "step": 22046 }, { "epoch": 0.73, "grad_norm": 0.42623603343963623, "learning_rate": 0.0005157641811791261, "loss": 1.8041, "step": 22047 }, { "epoch": 0.73, "grad_norm": 0.4398767650127411, "learning_rate": 0.0005157569190359354, "loss": 1.8165, "step": 22048 }, { "epoch": 0.73, "grad_norm": 0.4597671926021576, "learning_rate": 0.0005157496566308475, "loss": 1.8319, "step": 22049 }, { "epoch": 0.73, "grad_norm": 0.42548176646232605, "learning_rate": 0.0005157423939638714, "loss": 1.8606, "step": 22050 }, { "epoch": 0.73, "grad_norm": 0.43454188108444214, "learning_rate": 0.0005157351310350159, "loss": 1.8341, "step": 22051 }, { "epoch": 0.73, "grad_norm": 0.43901556730270386, "learning_rate": 0.0005157278678442896, "loss": 1.9126, "step": 22052 }, { "epoch": 0.73, "grad_norm": 0.4440282881259918, "learning_rate": 0.0005157206043917015, "loss": 1.902, "step": 22053 }, { "epoch": 0.73, "grad_norm": 0.44157135486602783, "learning_rate": 0.0005157133406772604, "loss": 1.814, "step": 22054 }, { "epoch": 0.73, "grad_norm": 0.4508439004421234, "learning_rate": 0.0005157060767009751, "loss": 1.8627, "step": 22055 }, { "epoch": 0.73, "grad_norm": 0.46592825651168823, "learning_rate": 0.0005156988124628544, "loss": 1.8864, "step": 22056 }, { "epoch": 0.73, "grad_norm": 0.4329180121421814, "learning_rate": 0.000515691547962907, "loss": 1.8909, "step": 22057 }, { "epoch": 0.73, "grad_norm": 0.5063648223876953, "learning_rate": 0.000515684283201142, "loss": 1.781, "step": 22058 }, { "epoch": 0.73, "grad_norm": 0.46194693446159363, "learning_rate": 0.0005156770181775679, "loss": 1.78, "step": 22059 }, { "epoch": 0.73, "grad_norm": 0.42289796471595764, "learning_rate": 0.0005156697528921939, "loss": 1.7399, "step": 22060 }, { "epoch": 0.73, "grad_norm": 0.4467894732952118, "learning_rate": 0.0005156624873450284, "loss": 1.8774, "step": 22061 }, { "epoch": 0.73, "grad_norm": 0.4537132978439331, "learning_rate": 0.0005156552215360806, "loss": 1.8493, "step": 22062 }, { "epoch": 0.73, "grad_norm": 0.46264833211898804, "learning_rate": 0.0005156479554653589, "loss": 1.8639, "step": 22063 }, { "epoch": 0.73, "grad_norm": 0.43217048048973083, "learning_rate": 0.0005156406891328726, "loss": 1.8258, "step": 22064 }, { "epoch": 0.73, "grad_norm": 0.47090408205986023, "learning_rate": 0.0005156334225386301, "loss": 1.8341, "step": 22065 }, { "epoch": 0.73, "grad_norm": 0.4301377236843109, "learning_rate": 0.0005156261556826404, "loss": 1.9081, "step": 22066 }, { "epoch": 0.73, "grad_norm": 0.43584901094436646, "learning_rate": 0.0005156188885649123, "loss": 1.9107, "step": 22067 }, { "epoch": 0.73, "grad_norm": 0.4149693548679352, "learning_rate": 0.0005156116211854548, "loss": 1.8534, "step": 22068 }, { "epoch": 0.73, "grad_norm": 0.4562765955924988, "learning_rate": 0.0005156043535442764, "loss": 1.7294, "step": 22069 }, { "epoch": 0.73, "grad_norm": 0.4342767596244812, "learning_rate": 0.0005155970856413863, "loss": 1.8853, "step": 22070 }, { "epoch": 0.73, "grad_norm": 0.4297507703304291, "learning_rate": 0.000515589817476793, "loss": 1.8388, "step": 22071 }, { "epoch": 0.73, "grad_norm": 0.4286862909793854, "learning_rate": 0.0005155825490505053, "loss": 1.8107, "step": 22072 }, { "epoch": 0.73, "grad_norm": 0.4758458435535431, "learning_rate": 0.0005155752803625322, "loss": 1.849, "step": 22073 }, { "epoch": 0.73, "grad_norm": 0.4398660659790039, "learning_rate": 0.0005155680114128826, "loss": 1.855, "step": 22074 }, { "epoch": 0.73, "grad_norm": 0.4638945758342743, "learning_rate": 0.000515560742201565, "loss": 1.8391, "step": 22075 }, { "epoch": 0.73, "grad_norm": 0.44848543405532837, "learning_rate": 0.0005155534727285886, "loss": 1.8708, "step": 22076 }, { "epoch": 0.73, "grad_norm": 0.4581908583641052, "learning_rate": 0.0005155462029939619, "loss": 1.8861, "step": 22077 }, { "epoch": 0.73, "grad_norm": 0.41429877281188965, "learning_rate": 0.000515538932997694, "loss": 1.8278, "step": 22078 }, { "epoch": 0.73, "grad_norm": 0.4368226230144501, "learning_rate": 0.0005155316627397936, "loss": 1.9146, "step": 22079 }, { "epoch": 0.73, "grad_norm": 0.4474126100540161, "learning_rate": 0.0005155243922202695, "loss": 1.88, "step": 22080 }, { "epoch": 0.73, "grad_norm": 0.4175715446472168, "learning_rate": 0.0005155171214391304, "loss": 1.8129, "step": 22081 }, { "epoch": 0.73, "grad_norm": 0.4396110773086548, "learning_rate": 0.0005155098503963854, "loss": 1.8081, "step": 22082 }, { "epoch": 0.73, "grad_norm": 0.441844642162323, "learning_rate": 0.000515502579092043, "loss": 1.7787, "step": 22083 }, { "epoch": 0.73, "grad_norm": 0.42588016390800476, "learning_rate": 0.0005154953075261125, "loss": 1.8243, "step": 22084 }, { "epoch": 0.73, "grad_norm": 0.43173980712890625, "learning_rate": 0.0005154880356986023, "loss": 1.844, "step": 22085 }, { "epoch": 0.73, "grad_norm": 0.460803747177124, "learning_rate": 0.0005154807636095214, "loss": 1.8792, "step": 22086 }, { "epoch": 0.73, "grad_norm": 0.4202677309513092, "learning_rate": 0.0005154734912588786, "loss": 1.7931, "step": 22087 }, { "epoch": 0.73, "grad_norm": 0.4425472915172577, "learning_rate": 0.0005154662186466828, "loss": 1.841, "step": 22088 }, { "epoch": 0.73, "grad_norm": 0.42943981289863586, "learning_rate": 0.0005154589457729427, "loss": 1.8783, "step": 22089 }, { "epoch": 0.73, "grad_norm": 0.4364860951900482, "learning_rate": 0.0005154516726376671, "loss": 1.8355, "step": 22090 }, { "epoch": 0.73, "grad_norm": 0.4463038146495819, "learning_rate": 0.0005154443992408651, "loss": 1.8181, "step": 22091 }, { "epoch": 0.74, "grad_norm": 0.4336657226085663, "learning_rate": 0.0005154371255825452, "loss": 1.8392, "step": 22092 }, { "epoch": 0.74, "grad_norm": 0.4446767568588257, "learning_rate": 0.0005154298516627165, "loss": 1.8237, "step": 22093 }, { "epoch": 0.74, "grad_norm": 0.41559797525405884, "learning_rate": 0.0005154225774813876, "loss": 1.8665, "step": 22094 }, { "epoch": 0.74, "grad_norm": 0.4364315867424011, "learning_rate": 0.0005154153030385674, "loss": 1.7804, "step": 22095 }, { "epoch": 0.74, "grad_norm": 0.43409159779548645, "learning_rate": 0.0005154080283342648, "loss": 1.8157, "step": 22096 }, { "epoch": 0.74, "grad_norm": 0.44430622458457947, "learning_rate": 0.0005154007533684886, "loss": 1.825, "step": 22097 }, { "epoch": 0.74, "grad_norm": 0.42912763357162476, "learning_rate": 0.0005153934781412477, "loss": 1.7747, "step": 22098 }, { "epoch": 0.74, "grad_norm": 0.42017197608947754, "learning_rate": 0.0005153862026525507, "loss": 1.8022, "step": 22099 }, { "epoch": 0.74, "grad_norm": 0.42134225368499756, "learning_rate": 0.0005153789269024066, "loss": 1.8045, "step": 22100 }, { "epoch": 0.74, "grad_norm": 0.4271484911441803, "learning_rate": 0.0005153716508908243, "loss": 1.8287, "step": 22101 }, { "epoch": 0.74, "grad_norm": 0.4313153624534607, "learning_rate": 0.0005153643746178126, "loss": 1.8592, "step": 22102 }, { "epoch": 0.74, "grad_norm": 0.4337535500526428, "learning_rate": 0.0005153570980833803, "loss": 1.8197, "step": 22103 }, { "epoch": 0.74, "grad_norm": 0.43244263529777527, "learning_rate": 0.000515349821287536, "loss": 1.8587, "step": 22104 }, { "epoch": 0.74, "grad_norm": 0.43423864245414734, "learning_rate": 0.0005153425442302889, "loss": 1.7961, "step": 22105 }, { "epoch": 0.74, "grad_norm": 0.411288857460022, "learning_rate": 0.0005153352669116477, "loss": 1.8568, "step": 22106 }, { "epoch": 0.74, "grad_norm": 0.42645594477653503, "learning_rate": 0.0005153279893316212, "loss": 1.8323, "step": 22107 }, { "epoch": 0.74, "grad_norm": 0.43200746178627014, "learning_rate": 0.0005153207114902182, "loss": 1.8543, "step": 22108 }, { "epoch": 0.74, "grad_norm": 0.439937949180603, "learning_rate": 0.0005153134333874476, "loss": 1.8352, "step": 22109 }, { "epoch": 0.74, "grad_norm": 0.4259151220321655, "learning_rate": 0.0005153061550233182, "loss": 1.7931, "step": 22110 }, { "epoch": 0.74, "grad_norm": 0.4353710114955902, "learning_rate": 0.0005152988763978389, "loss": 1.746, "step": 22111 }, { "epoch": 0.74, "grad_norm": 0.43517783284187317, "learning_rate": 0.0005152915975110184, "loss": 1.7343, "step": 22112 }, { "epoch": 0.74, "grad_norm": 0.4279780089855194, "learning_rate": 0.0005152843183628658, "loss": 1.7405, "step": 22113 }, { "epoch": 0.74, "grad_norm": 0.44182848930358887, "learning_rate": 0.0005152770389533895, "loss": 1.8469, "step": 22114 }, { "epoch": 0.74, "grad_norm": 0.4908692240715027, "learning_rate": 0.0005152697592825988, "loss": 1.9146, "step": 22115 }, { "epoch": 0.74, "grad_norm": 0.42638030648231506, "learning_rate": 0.0005152624793505023, "loss": 1.8046, "step": 22116 }, { "epoch": 0.74, "grad_norm": 0.4415668845176697, "learning_rate": 0.0005152551991571088, "loss": 1.8589, "step": 22117 }, { "epoch": 0.74, "grad_norm": 0.42526382207870483, "learning_rate": 0.0005152479187024272, "loss": 1.8263, "step": 22118 }, { "epoch": 0.74, "grad_norm": 0.4263375997543335, "learning_rate": 0.0005152406379864664, "loss": 1.8935, "step": 22119 }, { "epoch": 0.74, "grad_norm": 0.4393347501754761, "learning_rate": 0.0005152333570092351, "loss": 1.7672, "step": 22120 }, { "epoch": 0.74, "grad_norm": 0.4247581958770752, "learning_rate": 0.0005152260757707424, "loss": 1.8193, "step": 22121 }, { "epoch": 0.74, "grad_norm": 0.439954936504364, "learning_rate": 0.0005152187942709968, "loss": 1.8534, "step": 22122 }, { "epoch": 0.74, "grad_norm": 0.42600947618484497, "learning_rate": 0.0005152115125100073, "loss": 1.8584, "step": 22123 }, { "epoch": 0.74, "grad_norm": 0.43349242210388184, "learning_rate": 0.0005152042304877828, "loss": 1.877, "step": 22124 }, { "epoch": 0.74, "grad_norm": 0.41974106431007385, "learning_rate": 0.000515196948204332, "loss": 1.7558, "step": 22125 }, { "epoch": 0.74, "grad_norm": 0.4453602731227875, "learning_rate": 0.0005151896656596638, "loss": 1.8212, "step": 22126 }, { "epoch": 0.74, "grad_norm": 0.4224245548248291, "learning_rate": 0.0005151823828537871, "loss": 1.7895, "step": 22127 }, { "epoch": 0.74, "grad_norm": 0.42718857526779175, "learning_rate": 0.0005151750997867107, "loss": 1.8253, "step": 22128 }, { "epoch": 0.74, "grad_norm": 0.41824576258659363, "learning_rate": 0.0005151678164584435, "loss": 1.8608, "step": 22129 }, { "epoch": 0.74, "grad_norm": 0.4372731149196625, "learning_rate": 0.0005151605328689941, "loss": 1.8776, "step": 22130 }, { "epoch": 0.74, "grad_norm": 0.7213465571403503, "learning_rate": 0.0005151532490183718, "loss": 1.9084, "step": 22131 }, { "epoch": 0.74, "grad_norm": 0.4409559965133667, "learning_rate": 0.0005151459649065848, "loss": 1.862, "step": 22132 }, { "epoch": 0.74, "grad_norm": 0.42347070574760437, "learning_rate": 0.0005151386805336425, "loss": 1.7932, "step": 22133 }, { "epoch": 0.74, "grad_norm": 0.42522764205932617, "learning_rate": 0.0005151313958995535, "loss": 1.821, "step": 22134 }, { "epoch": 0.74, "grad_norm": 0.4233776330947876, "learning_rate": 0.0005151241110043268, "loss": 1.8653, "step": 22135 }, { "epoch": 0.74, "grad_norm": 0.4293110966682434, "learning_rate": 0.0005151168258479709, "loss": 1.8194, "step": 22136 }, { "epoch": 0.74, "grad_norm": 0.4130534827709198, "learning_rate": 0.0005151095404304951, "loss": 1.7887, "step": 22137 }, { "epoch": 0.74, "grad_norm": 0.42952674627304077, "learning_rate": 0.0005151022547519079, "loss": 1.7935, "step": 22138 }, { "epoch": 0.74, "grad_norm": 0.42496588826179504, "learning_rate": 0.0005150949688122182, "loss": 1.8367, "step": 22139 }, { "epoch": 0.74, "grad_norm": 0.4326506555080414, "learning_rate": 0.000515087682611435, "loss": 1.7791, "step": 22140 }, { "epoch": 0.74, "grad_norm": 0.41856956481933594, "learning_rate": 0.000515080396149567, "loss": 1.783, "step": 22141 }, { "epoch": 0.74, "grad_norm": 0.4236949384212494, "learning_rate": 0.000515073109426623, "loss": 1.8511, "step": 22142 }, { "epoch": 0.74, "grad_norm": 0.4276489019393921, "learning_rate": 0.000515065822442612, "loss": 1.9199, "step": 22143 }, { "epoch": 0.74, "grad_norm": 0.43134763836860657, "learning_rate": 0.000515058535197543, "loss": 1.8289, "step": 22144 }, { "epoch": 0.74, "grad_norm": 0.4246567487716675, "learning_rate": 0.0005150512476914243, "loss": 1.9106, "step": 22145 }, { "epoch": 0.74, "grad_norm": 0.4247914254665375, "learning_rate": 0.0005150439599242653, "loss": 1.8666, "step": 22146 }, { "epoch": 0.74, "grad_norm": 0.4316551685333252, "learning_rate": 0.0005150366718960744, "loss": 1.8338, "step": 22147 }, { "epoch": 0.74, "grad_norm": 0.43838903307914734, "learning_rate": 0.0005150293836068609, "loss": 1.8681, "step": 22148 }, { "epoch": 0.74, "grad_norm": 0.43425002694129944, "learning_rate": 0.0005150220950566332, "loss": 1.7552, "step": 22149 }, { "epoch": 0.74, "grad_norm": 0.4191170930862427, "learning_rate": 0.0005150148062454006, "loss": 1.8516, "step": 22150 }, { "epoch": 0.74, "grad_norm": 0.4249037802219391, "learning_rate": 0.0005150075171731714, "loss": 1.8608, "step": 22151 }, { "epoch": 0.74, "grad_norm": 0.42647767066955566, "learning_rate": 0.0005150002278399551, "loss": 1.8393, "step": 22152 }, { "epoch": 0.74, "grad_norm": 0.4210183918476105, "learning_rate": 0.00051499293824576, "loss": 1.8193, "step": 22153 }, { "epoch": 0.74, "grad_norm": 0.42922767996788025, "learning_rate": 0.0005149856483905952, "loss": 1.9, "step": 22154 }, { "epoch": 0.74, "grad_norm": 0.47147712111473083, "learning_rate": 0.0005149783582744694, "loss": 1.8003, "step": 22155 }, { "epoch": 0.74, "grad_norm": 0.4442039430141449, "learning_rate": 0.0005149710678973916, "loss": 1.841, "step": 22156 }, { "epoch": 0.74, "grad_norm": 0.4486211836338043, "learning_rate": 0.0005149637772593706, "loss": 1.8692, "step": 22157 }, { "epoch": 0.74, "grad_norm": 0.44706541299819946, "learning_rate": 0.0005149564863604153, "loss": 1.8238, "step": 22158 }, { "epoch": 0.74, "grad_norm": 0.4264580309391022, "learning_rate": 0.0005149491952005345, "loss": 1.862, "step": 22159 }, { "epoch": 0.74, "grad_norm": 0.4368310868740082, "learning_rate": 0.0005149419037797369, "loss": 1.8441, "step": 22160 }, { "epoch": 0.74, "grad_norm": 0.4453659653663635, "learning_rate": 0.0005149346120980316, "loss": 1.8657, "step": 22161 }, { "epoch": 0.74, "grad_norm": 0.4313133955001831, "learning_rate": 0.0005149273201554274, "loss": 1.8394, "step": 22162 }, { "epoch": 0.74, "grad_norm": 0.43780457973480225, "learning_rate": 0.0005149200279519331, "loss": 1.7947, "step": 22163 }, { "epoch": 0.74, "grad_norm": 0.4276627004146576, "learning_rate": 0.0005149127354875574, "loss": 1.7855, "step": 22164 }, { "epoch": 0.74, "grad_norm": 0.42713311314582825, "learning_rate": 0.0005149054427623095, "loss": 1.8325, "step": 22165 }, { "epoch": 0.74, "grad_norm": 0.4265596866607666, "learning_rate": 0.0005148981497761979, "loss": 1.8139, "step": 22166 }, { "epoch": 0.74, "grad_norm": 0.44624197483062744, "learning_rate": 0.0005148908565292317, "loss": 1.8378, "step": 22167 }, { "epoch": 0.74, "grad_norm": 0.5128772258758545, "learning_rate": 0.0005148835630214198, "loss": 1.8456, "step": 22168 }, { "epoch": 0.74, "grad_norm": 0.44765424728393555, "learning_rate": 0.0005148762692527707, "loss": 1.7761, "step": 22169 }, { "epoch": 0.74, "grad_norm": 2.1179566383361816, "learning_rate": 0.0005148689752232935, "loss": 1.891, "step": 22170 }, { "epoch": 0.74, "grad_norm": 0.4501163363456726, "learning_rate": 0.0005148616809329971, "loss": 1.8638, "step": 22171 }, { "epoch": 0.74, "grad_norm": 0.47268423438072205, "learning_rate": 0.0005148543863818903, "loss": 1.7999, "step": 22172 }, { "epoch": 0.74, "grad_norm": 0.4498361051082611, "learning_rate": 0.0005148470915699818, "loss": 1.8431, "step": 22173 }, { "epoch": 0.74, "grad_norm": 0.4996034801006317, "learning_rate": 0.0005148397964972807, "loss": 1.8543, "step": 22174 }, { "epoch": 0.74, "grad_norm": 0.4314349889755249, "learning_rate": 0.0005148325011637957, "loss": 1.8164, "step": 22175 }, { "epoch": 0.74, "grad_norm": 0.4269539713859558, "learning_rate": 0.0005148252055695357, "loss": 1.7665, "step": 22176 }, { "epoch": 0.74, "grad_norm": 0.4432578682899475, "learning_rate": 0.0005148179097145095, "loss": 1.8608, "step": 22177 }, { "epoch": 0.74, "grad_norm": 0.42120060324668884, "learning_rate": 0.0005148106135987261, "loss": 1.8332, "step": 22178 }, { "epoch": 0.74, "grad_norm": 0.43583133816719055, "learning_rate": 0.0005148033172221942, "loss": 1.8032, "step": 22179 }, { "epoch": 0.74, "grad_norm": 0.4466792643070221, "learning_rate": 0.0005147960205849227, "loss": 1.9015, "step": 22180 }, { "epoch": 0.74, "grad_norm": 0.443339079618454, "learning_rate": 0.0005147887236869206, "loss": 1.9037, "step": 22181 }, { "epoch": 0.74, "grad_norm": 0.4295099973678589, "learning_rate": 0.0005147814265281965, "loss": 1.8662, "step": 22182 }, { "epoch": 0.74, "grad_norm": 0.43842756748199463, "learning_rate": 0.0005147741291087596, "loss": 1.9076, "step": 22183 }, { "epoch": 0.74, "grad_norm": 0.4507172703742981, "learning_rate": 0.0005147668314286184, "loss": 1.8678, "step": 22184 }, { "epoch": 0.74, "grad_norm": 0.41430139541625977, "learning_rate": 0.0005147595334877818, "loss": 1.9166, "step": 22185 }, { "epoch": 0.74, "grad_norm": 0.4412980377674103, "learning_rate": 0.000514752235286259, "loss": 1.8476, "step": 22186 }, { "epoch": 0.74, "grad_norm": 0.4261827766895294, "learning_rate": 0.0005147449368240585, "loss": 1.8742, "step": 22187 }, { "epoch": 0.74, "grad_norm": 0.4678417444229126, "learning_rate": 0.0005147376381011893, "loss": 1.7998, "step": 22188 }, { "epoch": 0.74, "grad_norm": 0.42305487394332886, "learning_rate": 0.0005147303391176602, "loss": 1.8378, "step": 22189 }, { "epoch": 0.74, "grad_norm": 0.4411981701850891, "learning_rate": 0.0005147230398734802, "loss": 1.8158, "step": 22190 }, { "epoch": 0.74, "grad_norm": 0.4389565885066986, "learning_rate": 0.000514715740368658, "loss": 1.8432, "step": 22191 }, { "epoch": 0.74, "grad_norm": 0.43063750863075256, "learning_rate": 0.0005147084406032026, "loss": 1.8699, "step": 22192 }, { "epoch": 0.74, "grad_norm": 0.423504114151001, "learning_rate": 0.0005147011405771228, "loss": 1.7491, "step": 22193 }, { "epoch": 0.74, "grad_norm": 0.4306027591228485, "learning_rate": 0.0005146938402904274, "loss": 1.745, "step": 22194 }, { "epoch": 0.74, "grad_norm": 0.4448348581790924, "learning_rate": 0.0005146865397431253, "loss": 1.7774, "step": 22195 }, { "epoch": 0.74, "grad_norm": 0.4220469892024994, "learning_rate": 0.0005146792389352253, "loss": 1.8253, "step": 22196 }, { "epoch": 0.74, "grad_norm": 0.4153817296028137, "learning_rate": 0.0005146719378667365, "loss": 1.8337, "step": 22197 }, { "epoch": 0.74, "grad_norm": 0.42677873373031616, "learning_rate": 0.0005146646365376675, "loss": 1.8016, "step": 22198 }, { "epoch": 0.74, "grad_norm": 0.4608430862426758, "learning_rate": 0.0005146573349480273, "loss": 1.8553, "step": 22199 }, { "epoch": 0.74, "grad_norm": 0.4208042323589325, "learning_rate": 0.0005146500330978247, "loss": 1.7653, "step": 22200 }, { "epoch": 0.74, "grad_norm": 0.4301561713218689, "learning_rate": 0.0005146427309870686, "loss": 1.8945, "step": 22201 }, { "epoch": 0.74, "grad_norm": 0.4589823782444, "learning_rate": 0.0005146354286157678, "loss": 1.7783, "step": 22202 }, { "epoch": 0.74, "grad_norm": 0.43388205766677856, "learning_rate": 0.0005146281259839312, "loss": 1.8547, "step": 22203 }, { "epoch": 0.74, "grad_norm": 0.43519559502601624, "learning_rate": 0.0005146208230915678, "loss": 1.8551, "step": 22204 }, { "epoch": 0.74, "grad_norm": 0.4310184717178345, "learning_rate": 0.0005146135199386862, "loss": 1.766, "step": 22205 }, { "epoch": 0.74, "grad_norm": 0.43927833437919617, "learning_rate": 0.0005146062165252955, "loss": 1.8944, "step": 22206 }, { "epoch": 0.74, "grad_norm": 0.44556036591529846, "learning_rate": 0.0005145989128514045, "loss": 1.8066, "step": 22207 }, { "epoch": 0.74, "grad_norm": 0.43245264887809753, "learning_rate": 0.000514591608917022, "loss": 1.8795, "step": 22208 }, { "epoch": 0.74, "grad_norm": 0.4375206530094147, "learning_rate": 0.0005145843047221569, "loss": 1.8699, "step": 22209 }, { "epoch": 0.74, "grad_norm": 0.4297807812690735, "learning_rate": 0.0005145770002668181, "loss": 1.8034, "step": 22210 }, { "epoch": 0.74, "grad_norm": 0.42649054527282715, "learning_rate": 0.0005145696955510144, "loss": 1.8281, "step": 22211 }, { "epoch": 0.74, "grad_norm": 0.42858487367630005, "learning_rate": 0.0005145623905747547, "loss": 1.7818, "step": 22212 }, { "epoch": 0.74, "grad_norm": 0.42473188042640686, "learning_rate": 0.0005145550853380479, "loss": 1.7691, "step": 22213 }, { "epoch": 0.74, "grad_norm": 0.4457378089427948, "learning_rate": 0.0005145477798409029, "loss": 1.8646, "step": 22214 }, { "epoch": 0.74, "grad_norm": 0.4109339714050293, "learning_rate": 0.0005145404740833283, "loss": 1.7378, "step": 22215 }, { "epoch": 0.74, "grad_norm": 0.44527697563171387, "learning_rate": 0.0005145331680653334, "loss": 1.9071, "step": 22216 }, { "epoch": 0.74, "grad_norm": 0.44141435623168945, "learning_rate": 0.0005145258617869268, "loss": 1.7919, "step": 22217 }, { "epoch": 0.74, "grad_norm": 0.4250872731208801, "learning_rate": 0.0005145185552481174, "loss": 1.8722, "step": 22218 }, { "epoch": 0.74, "grad_norm": 0.43187031149864197, "learning_rate": 0.0005145112484489139, "loss": 1.8446, "step": 22219 }, { "epoch": 0.74, "grad_norm": 0.4376360774040222, "learning_rate": 0.0005145039413893256, "loss": 1.8548, "step": 22220 }, { "epoch": 0.74, "grad_norm": 0.4469243586063385, "learning_rate": 0.0005144966340693611, "loss": 1.7894, "step": 22221 }, { "epoch": 0.74, "grad_norm": 0.43389078974723816, "learning_rate": 0.0005144893264890291, "loss": 1.8071, "step": 22222 }, { "epoch": 0.74, "grad_norm": 0.4327983856201172, "learning_rate": 0.0005144820186483388, "loss": 1.7815, "step": 22223 }, { "epoch": 0.74, "grad_norm": 0.44056442379951477, "learning_rate": 0.0005144747105472988, "loss": 1.8138, "step": 22224 }, { "epoch": 0.74, "grad_norm": 0.4527975618839264, "learning_rate": 0.0005144674021859184, "loss": 1.7948, "step": 22225 }, { "epoch": 0.74, "grad_norm": 0.43605896830558777, "learning_rate": 0.0005144600935642059, "loss": 1.8257, "step": 22226 }, { "epoch": 0.74, "grad_norm": 0.4199567437171936, "learning_rate": 0.0005144527846821706, "loss": 1.7889, "step": 22227 }, { "epoch": 0.74, "grad_norm": 0.43689119815826416, "learning_rate": 0.000514445475539821, "loss": 1.8646, "step": 22228 }, { "epoch": 0.74, "grad_norm": 0.46381351351737976, "learning_rate": 0.0005144381661371665, "loss": 1.8285, "step": 22229 }, { "epoch": 0.74, "grad_norm": 0.4475833475589752, "learning_rate": 0.0005144308564742154, "loss": 1.8954, "step": 22230 }, { "epoch": 0.74, "grad_norm": 0.4315040111541748, "learning_rate": 0.000514423546550977, "loss": 1.7227, "step": 22231 }, { "epoch": 0.74, "grad_norm": 0.4627726376056671, "learning_rate": 0.0005144162363674599, "loss": 1.817, "step": 22232 }, { "epoch": 0.74, "grad_norm": 0.4483693838119507, "learning_rate": 0.0005144089259236732, "loss": 1.799, "step": 22233 }, { "epoch": 0.74, "grad_norm": 0.438444584608078, "learning_rate": 0.0005144016152196256, "loss": 1.7861, "step": 22234 }, { "epoch": 0.74, "grad_norm": 0.4175858795642853, "learning_rate": 0.000514394304255326, "loss": 1.8327, "step": 22235 }, { "epoch": 0.74, "grad_norm": 0.45263218879699707, "learning_rate": 0.0005143869930307833, "loss": 1.8474, "step": 22236 }, { "epoch": 0.74, "grad_norm": 0.44803211092948914, "learning_rate": 0.0005143796815460064, "loss": 1.903, "step": 22237 }, { "epoch": 0.74, "grad_norm": 0.42539700865745544, "learning_rate": 0.0005143723698010041, "loss": 1.8176, "step": 22238 }, { "epoch": 0.74, "grad_norm": 0.43542036414146423, "learning_rate": 0.0005143650577957853, "loss": 1.8041, "step": 22239 }, { "epoch": 0.74, "grad_norm": 0.43257373571395874, "learning_rate": 0.000514357745530359, "loss": 1.8442, "step": 22240 }, { "epoch": 0.74, "grad_norm": 0.42300719022750854, "learning_rate": 0.0005143504330047341, "loss": 1.7984, "step": 22241 }, { "epoch": 0.74, "grad_norm": 0.42346814274787903, "learning_rate": 0.0005143431202189191, "loss": 1.7441, "step": 22242 }, { "epoch": 0.74, "grad_norm": 0.43358245491981506, "learning_rate": 0.0005143358071729232, "loss": 1.8053, "step": 22243 }, { "epoch": 0.74, "grad_norm": 0.42716169357299805, "learning_rate": 0.0005143284938667553, "loss": 1.802, "step": 22244 }, { "epoch": 0.74, "grad_norm": 0.42274829745292664, "learning_rate": 0.0005143211803004242, "loss": 1.8315, "step": 22245 }, { "epoch": 0.74, "grad_norm": 0.4606064260005951, "learning_rate": 0.0005143138664739388, "loss": 1.7885, "step": 22246 }, { "epoch": 0.74, "grad_norm": 0.42612767219543457, "learning_rate": 0.0005143065523873077, "loss": 1.7938, "step": 22247 }, { "epoch": 0.74, "grad_norm": 0.42382046580314636, "learning_rate": 0.0005142992380405403, "loss": 1.8364, "step": 22248 }, { "epoch": 0.74, "grad_norm": 0.4176531434059143, "learning_rate": 0.000514291923433645, "loss": 1.8248, "step": 22249 }, { "epoch": 0.74, "grad_norm": 0.43976616859436035, "learning_rate": 0.0005142846085666311, "loss": 1.7872, "step": 22250 }, { "epoch": 0.74, "grad_norm": 0.43365195393562317, "learning_rate": 0.000514277293439507, "loss": 1.8375, "step": 22251 }, { "epoch": 0.74, "grad_norm": 0.4277319610118866, "learning_rate": 0.000514269978052282, "loss": 1.7308, "step": 22252 }, { "epoch": 0.74, "grad_norm": 0.4272157847881317, "learning_rate": 0.0005142626624049648, "loss": 1.7985, "step": 22253 }, { "epoch": 0.74, "grad_norm": 0.43233951926231384, "learning_rate": 0.0005142553464975643, "loss": 1.7889, "step": 22254 }, { "epoch": 0.74, "grad_norm": 0.43041497468948364, "learning_rate": 0.0005142480303300894, "loss": 1.8385, "step": 22255 }, { "epoch": 0.74, "grad_norm": 0.4430597722530365, "learning_rate": 0.0005142407139025488, "loss": 1.8329, "step": 22256 }, { "epoch": 0.74, "grad_norm": 0.4346576929092407, "learning_rate": 0.0005142333972149517, "loss": 1.8296, "step": 22257 }, { "epoch": 0.74, "grad_norm": 0.4339996576309204, "learning_rate": 0.0005142260802673068, "loss": 1.8663, "step": 22258 }, { "epoch": 0.74, "grad_norm": 0.4401446580886841, "learning_rate": 0.000514218763059623, "loss": 1.8087, "step": 22259 }, { "epoch": 0.74, "grad_norm": 0.4394190311431885, "learning_rate": 0.0005142114455919092, "loss": 1.841, "step": 22260 }, { "epoch": 0.74, "grad_norm": 0.41960015892982483, "learning_rate": 0.0005142041278641742, "loss": 1.7742, "step": 22261 }, { "epoch": 0.74, "grad_norm": 0.4355633556842804, "learning_rate": 0.000514196809876427, "loss": 1.8739, "step": 22262 }, { "epoch": 0.74, "grad_norm": 0.5806989073753357, "learning_rate": 0.0005141894916286763, "loss": 1.8294, "step": 22263 }, { "epoch": 0.74, "grad_norm": 0.42281392216682434, "learning_rate": 0.0005141821731209313, "loss": 1.8607, "step": 22264 }, { "epoch": 0.74, "grad_norm": 0.4339330196380615, "learning_rate": 0.0005141748543532007, "loss": 1.8506, "step": 22265 }, { "epoch": 0.74, "grad_norm": 0.4229794144630432, "learning_rate": 0.0005141675353254933, "loss": 1.8776, "step": 22266 }, { "epoch": 0.74, "grad_norm": 0.4250817596912384, "learning_rate": 0.000514160216037818, "loss": 1.8064, "step": 22267 }, { "epoch": 0.74, "grad_norm": 0.42579445242881775, "learning_rate": 0.0005141528964901839, "loss": 1.8646, "step": 22268 }, { "epoch": 0.74, "grad_norm": 0.4333672821521759, "learning_rate": 0.0005141455766825997, "loss": 1.864, "step": 22269 }, { "epoch": 0.74, "grad_norm": 0.46838271617889404, "learning_rate": 0.0005141382566150742, "loss": 1.8358, "step": 22270 }, { "epoch": 0.74, "grad_norm": 0.4387168288230896, "learning_rate": 0.0005141309362876166, "loss": 1.8978, "step": 22271 }, { "epoch": 0.74, "grad_norm": 0.43768250942230225, "learning_rate": 0.0005141236157002355, "loss": 1.7958, "step": 22272 }, { "epoch": 0.74, "grad_norm": 0.4220837354660034, "learning_rate": 0.0005141162948529398, "loss": 1.7187, "step": 22273 }, { "epoch": 0.74, "grad_norm": 0.45280811190605164, "learning_rate": 0.0005141089737457385, "loss": 1.8913, "step": 22274 }, { "epoch": 0.74, "grad_norm": 0.43416446447372437, "learning_rate": 0.0005141016523786406, "loss": 1.8288, "step": 22275 }, { "epoch": 0.74, "grad_norm": 0.434525728225708, "learning_rate": 0.0005140943307516547, "loss": 1.7614, "step": 22276 }, { "epoch": 0.74, "grad_norm": 0.46431615948677063, "learning_rate": 0.0005140870088647899, "loss": 1.8426, "step": 22277 }, { "epoch": 0.74, "grad_norm": 0.4345826208591461, "learning_rate": 0.000514079686718055, "loss": 1.7244, "step": 22278 }, { "epoch": 0.74, "grad_norm": 0.42461469769477844, "learning_rate": 0.0005140723643114589, "loss": 1.7886, "step": 22279 }, { "epoch": 0.74, "grad_norm": 0.4271133244037628, "learning_rate": 0.0005140650416450104, "loss": 1.8575, "step": 22280 }, { "epoch": 0.74, "grad_norm": 0.44611504673957825, "learning_rate": 0.0005140577187187186, "loss": 1.7873, "step": 22281 }, { "epoch": 0.74, "grad_norm": 0.42390841245651245, "learning_rate": 0.0005140503955325922, "loss": 1.838, "step": 22282 }, { "epoch": 0.74, "grad_norm": 0.43459564447402954, "learning_rate": 0.0005140430720866402, "loss": 1.811, "step": 22283 }, { "epoch": 0.74, "grad_norm": 0.4281041622161865, "learning_rate": 0.0005140357483808715, "loss": 1.7971, "step": 22284 }, { "epoch": 0.74, "grad_norm": 0.45865440368652344, "learning_rate": 0.0005140284244152948, "loss": 1.9232, "step": 22285 }, { "epoch": 0.74, "grad_norm": 0.4239373803138733, "learning_rate": 0.0005140211001899192, "loss": 1.8304, "step": 22286 }, { "epoch": 0.74, "grad_norm": 0.4525326192378998, "learning_rate": 0.0005140137757047536, "loss": 1.8103, "step": 22287 }, { "epoch": 0.74, "grad_norm": 0.42786505818367004, "learning_rate": 0.0005140064509598067, "loss": 1.859, "step": 22288 }, { "epoch": 0.74, "grad_norm": 0.4221165180206299, "learning_rate": 0.0005139991259550875, "loss": 1.8154, "step": 22289 }, { "epoch": 0.74, "grad_norm": 0.4262162148952484, "learning_rate": 0.000513991800690605, "loss": 1.8262, "step": 22290 }, { "epoch": 0.74, "grad_norm": 0.4282175302505493, "learning_rate": 0.0005139844751663678, "loss": 1.8933, "step": 22291 }, { "epoch": 0.74, "grad_norm": 0.41741862893104553, "learning_rate": 0.0005139771493823851, "loss": 1.7751, "step": 22292 }, { "epoch": 0.74, "grad_norm": 0.4320457875728607, "learning_rate": 0.0005139698233386657, "loss": 1.8274, "step": 22293 }, { "epoch": 0.74, "grad_norm": 0.4257673919200897, "learning_rate": 0.0005139624970352184, "loss": 1.8193, "step": 22294 }, { "epoch": 0.74, "grad_norm": 0.4357946813106537, "learning_rate": 0.0005139551704720521, "loss": 1.832, "step": 22295 }, { "epoch": 0.74, "grad_norm": 0.43414056301116943, "learning_rate": 0.000513947843649176, "loss": 1.8427, "step": 22296 }, { "epoch": 0.74, "grad_norm": 0.4414157569408417, "learning_rate": 0.0005139405165665985, "loss": 1.8349, "step": 22297 }, { "epoch": 0.74, "grad_norm": 0.5673916339874268, "learning_rate": 0.0005139331892243288, "loss": 1.8475, "step": 22298 }, { "epoch": 0.74, "grad_norm": 0.4297020733356476, "learning_rate": 0.0005139258616223757, "loss": 1.8403, "step": 22299 }, { "epoch": 0.74, "grad_norm": 0.43296724557876587, "learning_rate": 0.0005139185337607482, "loss": 1.8669, "step": 22300 }, { "epoch": 0.74, "grad_norm": 0.44526463747024536, "learning_rate": 0.0005139112056394549, "loss": 1.8455, "step": 22301 }, { "epoch": 0.74, "grad_norm": 0.43852826952934265, "learning_rate": 0.0005139038772585052, "loss": 1.8301, "step": 22302 }, { "epoch": 0.74, "grad_norm": 0.4290858507156372, "learning_rate": 0.0005138965486179076, "loss": 1.8284, "step": 22303 }, { "epoch": 0.74, "grad_norm": 0.4374184310436249, "learning_rate": 0.000513889219717671, "loss": 1.8235, "step": 22304 }, { "epoch": 0.74, "grad_norm": 0.4218876361846924, "learning_rate": 0.0005138818905578046, "loss": 1.8145, "step": 22305 }, { "epoch": 0.74, "grad_norm": 0.4388258159160614, "learning_rate": 0.000513874561138317, "loss": 1.8447, "step": 22306 }, { "epoch": 0.74, "grad_norm": 0.4331088662147522, "learning_rate": 0.0005138672314592171, "loss": 1.858, "step": 22307 }, { "epoch": 0.74, "grad_norm": 0.44264742732048035, "learning_rate": 0.000513859901520514, "loss": 1.9202, "step": 22308 }, { "epoch": 0.74, "grad_norm": 0.43339186906814575, "learning_rate": 0.0005138525713222165, "loss": 1.8017, "step": 22309 }, { "epoch": 0.74, "grad_norm": 0.44025281071662903, "learning_rate": 0.0005138452408643335, "loss": 1.7641, "step": 22310 }, { "epoch": 0.74, "grad_norm": 0.44319355487823486, "learning_rate": 0.0005138379101468739, "loss": 1.7913, "step": 22311 }, { "epoch": 0.74, "grad_norm": 0.42945346236228943, "learning_rate": 0.0005138305791698466, "loss": 1.8738, "step": 22312 }, { "epoch": 0.74, "grad_norm": 0.4314228892326355, "learning_rate": 0.0005138232479332604, "loss": 1.8207, "step": 22313 }, { "epoch": 0.74, "grad_norm": 0.42434725165367126, "learning_rate": 0.0005138159164371243, "loss": 1.7865, "step": 22314 }, { "epoch": 0.74, "grad_norm": 0.42779475450515747, "learning_rate": 0.0005138085846814472, "loss": 1.8609, "step": 22315 }, { "epoch": 0.74, "grad_norm": 0.4638085663318634, "learning_rate": 0.0005138012526662379, "loss": 1.8655, "step": 22316 }, { "epoch": 0.74, "grad_norm": 0.43004512786865234, "learning_rate": 0.0005137939203915054, "loss": 1.8638, "step": 22317 }, { "epoch": 0.74, "grad_norm": 0.42607754468917847, "learning_rate": 0.0005137865878572587, "loss": 1.745, "step": 22318 }, { "epoch": 0.74, "grad_norm": 0.48909062147140503, "learning_rate": 0.0005137792550635065, "loss": 1.879, "step": 22319 }, { "epoch": 0.74, "grad_norm": 0.42327040433883667, "learning_rate": 0.0005137719220102579, "loss": 1.7973, "step": 22320 }, { "epoch": 0.74, "grad_norm": 0.44030120968818665, "learning_rate": 0.0005137645886975215, "loss": 1.8802, "step": 22321 }, { "epoch": 0.74, "grad_norm": 0.44346490502357483, "learning_rate": 0.0005137572551253065, "loss": 1.8297, "step": 22322 }, { "epoch": 0.74, "grad_norm": 0.4398421347141266, "learning_rate": 0.0005137499212936216, "loss": 1.8923, "step": 22323 }, { "epoch": 0.74, "grad_norm": 0.42588719725608826, "learning_rate": 0.0005137425872024759, "loss": 1.8392, "step": 22324 }, { "epoch": 0.74, "grad_norm": 0.4214766025543213, "learning_rate": 0.0005137352528518781, "loss": 1.8605, "step": 22325 }, { "epoch": 0.74, "grad_norm": 0.4550679922103882, "learning_rate": 0.0005137279182418372, "loss": 1.853, "step": 22326 }, { "epoch": 0.74, "grad_norm": 0.42873615026474, "learning_rate": 0.0005137205833723622, "loss": 1.8343, "step": 22327 }, { "epoch": 0.74, "grad_norm": 0.4371400773525238, "learning_rate": 0.0005137132482434618, "loss": 1.8881, "step": 22328 }, { "epoch": 0.74, "grad_norm": 0.4328497648239136, "learning_rate": 0.000513705912855145, "loss": 1.8083, "step": 22329 }, { "epoch": 0.74, "grad_norm": 0.4287586212158203, "learning_rate": 0.0005136985772074208, "loss": 1.7882, "step": 22330 }, { "epoch": 0.74, "grad_norm": 0.4381542205810547, "learning_rate": 0.0005136912413002979, "loss": 1.8228, "step": 22331 }, { "epoch": 0.74, "grad_norm": 0.44104206562042236, "learning_rate": 0.0005136839051337854, "loss": 1.8226, "step": 22332 }, { "epoch": 0.74, "grad_norm": 0.4238381087779999, "learning_rate": 0.0005136765687078921, "loss": 1.7757, "step": 22333 }, { "epoch": 0.74, "grad_norm": 0.4256150722503662, "learning_rate": 0.0005136692320226269, "loss": 1.8397, "step": 22334 }, { "epoch": 0.74, "grad_norm": 0.4436739981174469, "learning_rate": 0.0005136618950779989, "loss": 1.8596, "step": 22335 }, { "epoch": 0.74, "grad_norm": 0.4440900385379791, "learning_rate": 0.0005136545578740167, "loss": 1.9181, "step": 22336 }, { "epoch": 0.74, "grad_norm": 0.43477925658226013, "learning_rate": 0.0005136472204106894, "loss": 1.8015, "step": 22337 }, { "epoch": 0.74, "grad_norm": 0.432034969329834, "learning_rate": 0.0005136398826880257, "loss": 1.7834, "step": 22338 }, { "epoch": 0.74, "grad_norm": 0.5390058159828186, "learning_rate": 0.0005136325447060348, "loss": 1.8629, "step": 22339 }, { "epoch": 0.74, "grad_norm": 0.4804958403110504, "learning_rate": 0.0005136252064647255, "loss": 1.7975, "step": 22340 }, { "epoch": 0.74, "grad_norm": 0.43481966853141785, "learning_rate": 0.0005136178679641066, "loss": 1.8703, "step": 22341 }, { "epoch": 0.74, "grad_norm": 0.4647406041622162, "learning_rate": 0.000513610529204187, "loss": 1.8004, "step": 22342 }, { "epoch": 0.74, "grad_norm": 0.4277257025241852, "learning_rate": 0.0005136031901849759, "loss": 1.8575, "step": 22343 }, { "epoch": 0.74, "grad_norm": 0.43762341141700745, "learning_rate": 0.0005135958509064818, "loss": 1.8678, "step": 22344 }, { "epoch": 0.74, "grad_norm": 0.4297579526901245, "learning_rate": 0.000513588511368714, "loss": 1.8498, "step": 22345 }, { "epoch": 0.74, "grad_norm": 0.42248499393463135, "learning_rate": 0.0005135811715716811, "loss": 1.7942, "step": 22346 }, { "epoch": 0.74, "grad_norm": 0.43455928564071655, "learning_rate": 0.0005135738315153922, "loss": 1.7847, "step": 22347 }, { "epoch": 0.74, "grad_norm": 0.4342697858810425, "learning_rate": 0.0005135664911998561, "loss": 1.9153, "step": 22348 }, { "epoch": 0.74, "grad_norm": 0.4411275088787079, "learning_rate": 0.0005135591506250817, "loss": 1.8661, "step": 22349 }, { "epoch": 0.74, "grad_norm": 0.4176816940307617, "learning_rate": 0.000513551809791078, "loss": 1.8174, "step": 22350 }, { "epoch": 0.74, "grad_norm": 0.42418769001960754, "learning_rate": 0.0005135444686978538, "loss": 1.8735, "step": 22351 }, { "epoch": 0.74, "grad_norm": 0.42770543694496155, "learning_rate": 0.0005135371273454182, "loss": 1.8515, "step": 22352 }, { "epoch": 0.74, "grad_norm": 0.4298376441001892, "learning_rate": 0.0005135297857337799, "loss": 1.8321, "step": 22353 }, { "epoch": 0.74, "grad_norm": 0.4375046193599701, "learning_rate": 0.0005135224438629479, "loss": 1.8045, "step": 22354 }, { "epoch": 0.74, "grad_norm": 0.4228458106517792, "learning_rate": 0.0005135151017329311, "loss": 1.7662, "step": 22355 }, { "epoch": 0.74, "grad_norm": 0.434829980134964, "learning_rate": 0.0005135077593437385, "loss": 1.7894, "step": 22356 }, { "epoch": 0.74, "grad_norm": 0.4197445511817932, "learning_rate": 0.0005135004166953789, "loss": 1.8333, "step": 22357 }, { "epoch": 0.74, "grad_norm": 0.4359777867794037, "learning_rate": 0.0005134930737878613, "loss": 1.7589, "step": 22358 }, { "epoch": 0.74, "grad_norm": 0.5167267322540283, "learning_rate": 0.0005134857306211946, "loss": 1.8374, "step": 22359 }, { "epoch": 0.74, "grad_norm": 0.44144266843795776, "learning_rate": 0.0005134783871953875, "loss": 1.8214, "step": 22360 }, { "epoch": 0.74, "grad_norm": 0.4307234585285187, "learning_rate": 0.0005134710435104492, "loss": 1.8076, "step": 22361 }, { "epoch": 0.74, "grad_norm": 0.4437943398952484, "learning_rate": 0.0005134636995663886, "loss": 1.902, "step": 22362 }, { "epoch": 0.74, "grad_norm": 0.42885738611221313, "learning_rate": 0.0005134563553632144, "loss": 1.7598, "step": 22363 }, { "epoch": 0.74, "grad_norm": 0.4242725968360901, "learning_rate": 0.0005134490109009356, "loss": 1.7977, "step": 22364 }, { "epoch": 0.74, "grad_norm": 0.4496989846229553, "learning_rate": 0.0005134416661795612, "loss": 1.8748, "step": 22365 }, { "epoch": 0.74, "grad_norm": 0.45105743408203125, "learning_rate": 0.0005134343211991001, "loss": 1.7244, "step": 22366 }, { "epoch": 0.74, "grad_norm": 0.45508915185928345, "learning_rate": 0.0005134269759595612, "loss": 1.9548, "step": 22367 }, { "epoch": 0.74, "grad_norm": 0.4370826780796051, "learning_rate": 0.0005134196304609533, "loss": 1.8348, "step": 22368 }, { "epoch": 0.74, "grad_norm": 0.4393792152404785, "learning_rate": 0.0005134122847032855, "loss": 1.8761, "step": 22369 }, { "epoch": 0.74, "grad_norm": 0.4416160583496094, "learning_rate": 0.0005134049386865666, "loss": 1.8626, "step": 22370 }, { "epoch": 0.74, "grad_norm": 0.42709922790527344, "learning_rate": 0.0005133975924108055, "loss": 1.8483, "step": 22371 }, { "epoch": 0.74, "grad_norm": 0.4391805827617645, "learning_rate": 0.0005133902458760112, "loss": 1.8848, "step": 22372 }, { "epoch": 0.74, "grad_norm": 0.45906567573547363, "learning_rate": 0.0005133828990821926, "loss": 1.8241, "step": 22373 }, { "epoch": 0.74, "grad_norm": 0.44686296582221985, "learning_rate": 0.0005133755520293587, "loss": 1.8885, "step": 22374 }, { "epoch": 0.74, "grad_norm": 0.424824059009552, "learning_rate": 0.0005133682047175183, "loss": 1.8522, "step": 22375 }, { "epoch": 0.74, "grad_norm": 0.4354795813560486, "learning_rate": 0.0005133608571466802, "loss": 1.7495, "step": 22376 }, { "epoch": 0.74, "grad_norm": 0.4401378929615021, "learning_rate": 0.0005133535093168535, "loss": 1.8118, "step": 22377 }, { "epoch": 0.74, "grad_norm": 0.4470144212245941, "learning_rate": 0.0005133461612280471, "loss": 1.7457, "step": 22378 }, { "epoch": 0.74, "grad_norm": 0.4455999732017517, "learning_rate": 0.0005133388128802698, "loss": 1.9044, "step": 22379 }, { "epoch": 0.74, "grad_norm": 0.43996161222457886, "learning_rate": 0.0005133314642735308, "loss": 1.8639, "step": 22380 }, { "epoch": 0.74, "grad_norm": 0.42400631308555603, "learning_rate": 0.0005133241154078387, "loss": 1.7394, "step": 22381 }, { "epoch": 0.74, "grad_norm": 0.42274606227874756, "learning_rate": 0.0005133167662832027, "loss": 1.7877, "step": 22382 }, { "epoch": 0.74, "grad_norm": 0.4199800193309784, "learning_rate": 0.0005133094168996313, "loss": 1.8483, "step": 22383 }, { "epoch": 0.74, "grad_norm": 0.4608990252017975, "learning_rate": 0.0005133020672571339, "loss": 1.8139, "step": 22384 }, { "epoch": 0.74, "grad_norm": 0.44961434602737427, "learning_rate": 0.0005132947173557193, "loss": 1.7794, "step": 22385 }, { "epoch": 0.74, "grad_norm": 0.4234723746776581, "learning_rate": 0.0005132873671953961, "loss": 1.7727, "step": 22386 }, { "epoch": 0.74, "grad_norm": 0.4466524124145508, "learning_rate": 0.0005132800167761736, "loss": 1.869, "step": 22387 }, { "epoch": 0.74, "grad_norm": 0.432088166475296, "learning_rate": 0.0005132726660980606, "loss": 1.8183, "step": 22388 }, { "epoch": 0.74, "grad_norm": 0.4291554093360901, "learning_rate": 0.000513265315161066, "loss": 1.8148, "step": 22389 }, { "epoch": 0.74, "grad_norm": 0.4438250958919525, "learning_rate": 0.0005132579639651987, "loss": 1.7813, "step": 22390 }, { "epoch": 0.74, "grad_norm": 0.43017303943634033, "learning_rate": 0.0005132506125104676, "loss": 1.8929, "step": 22391 }, { "epoch": 0.74, "grad_norm": 0.4488685429096222, "learning_rate": 0.0005132432607968818, "loss": 1.876, "step": 22392 }, { "epoch": 0.75, "grad_norm": 0.4400176703929901, "learning_rate": 0.0005132359088244501, "loss": 1.8168, "step": 22393 }, { "epoch": 0.75, "grad_norm": 0.4355785846710205, "learning_rate": 0.0005132285565931813, "loss": 1.8366, "step": 22394 }, { "epoch": 0.75, "grad_norm": 0.44832664728164673, "learning_rate": 0.0005132212041030845, "loss": 1.8437, "step": 22395 }, { "epoch": 0.75, "grad_norm": 0.45164790749549866, "learning_rate": 0.0005132138513541687, "loss": 1.8926, "step": 22396 }, { "epoch": 0.75, "grad_norm": 0.43586739897727966, "learning_rate": 0.0005132064983464426, "loss": 1.878, "step": 22397 }, { "epoch": 0.75, "grad_norm": 0.4346707761287689, "learning_rate": 0.0005131991450799152, "loss": 1.8483, "step": 22398 }, { "epoch": 0.75, "grad_norm": 0.4431897699832916, "learning_rate": 0.0005131917915545955, "loss": 1.8332, "step": 22399 }, { "epoch": 0.75, "grad_norm": 0.4441259801387787, "learning_rate": 0.0005131844377704922, "loss": 1.8007, "step": 22400 }, { "epoch": 0.75, "grad_norm": 0.4296083450317383, "learning_rate": 0.0005131770837276147, "loss": 1.7538, "step": 22401 }, { "epoch": 0.75, "grad_norm": 0.4457133710384369, "learning_rate": 0.0005131697294259715, "loss": 1.9011, "step": 22402 }, { "epoch": 0.75, "grad_norm": 0.45258426666259766, "learning_rate": 0.0005131623748655716, "loss": 1.8709, "step": 22403 }, { "epoch": 0.75, "grad_norm": 0.4347307085990906, "learning_rate": 0.000513155020046424, "loss": 1.8341, "step": 22404 }, { "epoch": 0.75, "grad_norm": 0.43623653054237366, "learning_rate": 0.0005131476649685376, "loss": 1.9162, "step": 22405 }, { "epoch": 0.75, "grad_norm": 0.42818379402160645, "learning_rate": 0.0005131403096319215, "loss": 1.8875, "step": 22406 }, { "epoch": 0.75, "grad_norm": 0.46994104981422424, "learning_rate": 0.0005131329540365842, "loss": 1.7949, "step": 22407 }, { "epoch": 0.75, "grad_norm": 0.4650861322879791, "learning_rate": 0.0005131255981825351, "loss": 1.8173, "step": 22408 }, { "epoch": 0.75, "grad_norm": 0.4415408670902252, "learning_rate": 0.000513118242069783, "loss": 1.7758, "step": 22409 }, { "epoch": 0.75, "grad_norm": 0.4479466676712036, "learning_rate": 0.0005131108856983365, "loss": 1.7041, "step": 22410 }, { "epoch": 0.75, "grad_norm": 0.4354216456413269, "learning_rate": 0.0005131035290682049, "loss": 1.8064, "step": 22411 }, { "epoch": 0.75, "grad_norm": 0.4368559420108795, "learning_rate": 0.000513096172179397, "loss": 1.7493, "step": 22412 }, { "epoch": 0.75, "grad_norm": 0.4388484060764313, "learning_rate": 0.0005130888150319218, "loss": 1.8359, "step": 22413 }, { "epoch": 0.75, "grad_norm": 0.44483980536460876, "learning_rate": 0.0005130814576257883, "loss": 1.92, "step": 22414 }, { "epoch": 0.75, "grad_norm": 0.4331752061843872, "learning_rate": 0.000513074099961005, "loss": 1.7676, "step": 22415 }, { "epoch": 0.75, "grad_norm": 0.43857333064079285, "learning_rate": 0.0005130667420375814, "loss": 1.8056, "step": 22416 }, { "epoch": 0.75, "grad_norm": 0.4391634464263916, "learning_rate": 0.000513059383855526, "loss": 1.9145, "step": 22417 }, { "epoch": 0.75, "grad_norm": 0.4224874675273895, "learning_rate": 0.000513052025414848, "loss": 1.8304, "step": 22418 }, { "epoch": 0.75, "grad_norm": 0.43573233485221863, "learning_rate": 0.0005130446667155562, "loss": 1.8095, "step": 22419 }, { "epoch": 0.75, "grad_norm": 0.43952566385269165, "learning_rate": 0.0005130373077576596, "loss": 1.7818, "step": 22420 }, { "epoch": 0.75, "grad_norm": 0.43152499198913574, "learning_rate": 0.000513029948541167, "loss": 1.8196, "step": 22421 }, { "epoch": 0.75, "grad_norm": 0.4243873655796051, "learning_rate": 0.0005130225890660875, "loss": 1.7747, "step": 22422 }, { "epoch": 0.75, "grad_norm": 0.43918001651763916, "learning_rate": 0.0005130152293324299, "loss": 1.908, "step": 22423 }, { "epoch": 0.75, "grad_norm": 0.4319082498550415, "learning_rate": 0.0005130078693402032, "loss": 1.8602, "step": 22424 }, { "epoch": 0.75, "grad_norm": 0.4287106990814209, "learning_rate": 0.0005130005090894165, "loss": 1.7791, "step": 22425 }, { "epoch": 0.75, "grad_norm": 0.4326300621032715, "learning_rate": 0.0005129931485800784, "loss": 1.8631, "step": 22426 }, { "epoch": 0.75, "grad_norm": 0.4163632094860077, "learning_rate": 0.0005129857878121982, "loss": 1.7579, "step": 22427 }, { "epoch": 0.75, "grad_norm": 0.4366891086101532, "learning_rate": 0.0005129784267857844, "loss": 1.7881, "step": 22428 }, { "epoch": 0.75, "grad_norm": 0.6014719009399414, "learning_rate": 0.0005129710655008463, "loss": 1.864, "step": 22429 }, { "epoch": 0.75, "grad_norm": 0.41861802339553833, "learning_rate": 0.0005129637039573927, "loss": 1.8199, "step": 22430 }, { "epoch": 0.75, "grad_norm": 0.42933616042137146, "learning_rate": 0.0005129563421554324, "loss": 1.9036, "step": 22431 }, { "epoch": 0.75, "grad_norm": 0.46301156282424927, "learning_rate": 0.0005129489800949746, "loss": 1.8589, "step": 22432 }, { "epoch": 0.75, "grad_norm": 0.4246731698513031, "learning_rate": 0.0005129416177760281, "loss": 1.8003, "step": 22433 }, { "epoch": 0.75, "grad_norm": 0.4214409291744232, "learning_rate": 0.0005129342551986019, "loss": 1.8085, "step": 22434 }, { "epoch": 0.75, "grad_norm": 0.43331247568130493, "learning_rate": 0.0005129268923627048, "loss": 1.795, "step": 22435 }, { "epoch": 0.75, "grad_norm": 0.4256919026374817, "learning_rate": 0.0005129195292683459, "loss": 1.8702, "step": 22436 }, { "epoch": 0.75, "grad_norm": 0.43138402700424194, "learning_rate": 0.0005129121659155339, "loss": 1.8774, "step": 22437 }, { "epoch": 0.75, "grad_norm": 0.43005675077438354, "learning_rate": 0.000512904802304278, "loss": 1.7698, "step": 22438 }, { "epoch": 0.75, "grad_norm": 0.42285946011543274, "learning_rate": 0.0005128974384345871, "loss": 1.8298, "step": 22439 }, { "epoch": 0.75, "grad_norm": 0.44471806287765503, "learning_rate": 0.00051289007430647, "loss": 1.7905, "step": 22440 }, { "epoch": 0.75, "grad_norm": 0.42927300930023193, "learning_rate": 0.0005128827099199359, "loss": 1.7816, "step": 22441 }, { "epoch": 0.75, "grad_norm": 0.43309271335601807, "learning_rate": 0.0005128753452749934, "loss": 1.8678, "step": 22442 }, { "epoch": 0.75, "grad_norm": 0.447601854801178, "learning_rate": 0.0005128679803716516, "loss": 1.8401, "step": 22443 }, { "epoch": 0.75, "grad_norm": 0.4276639521121979, "learning_rate": 0.0005128606152099195, "loss": 1.8183, "step": 22444 }, { "epoch": 0.75, "grad_norm": 0.43904727697372437, "learning_rate": 0.000512853249789806, "loss": 1.8088, "step": 22445 }, { "epoch": 0.75, "grad_norm": 0.4328107535839081, "learning_rate": 0.0005128458841113199, "loss": 1.766, "step": 22446 }, { "epoch": 0.75, "grad_norm": 0.43961063027381897, "learning_rate": 0.0005128385181744703, "loss": 1.7956, "step": 22447 }, { "epoch": 0.75, "grad_norm": 0.42806315422058105, "learning_rate": 0.0005128311519792661, "loss": 1.7244, "step": 22448 }, { "epoch": 0.75, "grad_norm": 0.4272380769252777, "learning_rate": 0.0005128237855257162, "loss": 1.7858, "step": 22449 }, { "epoch": 0.75, "grad_norm": 0.4249558448791504, "learning_rate": 0.0005128164188138297, "loss": 1.7281, "step": 22450 }, { "epoch": 0.75, "grad_norm": 0.5924388766288757, "learning_rate": 0.0005128090518436153, "loss": 1.8479, "step": 22451 }, { "epoch": 0.75, "grad_norm": 0.4532424509525299, "learning_rate": 0.0005128016846150822, "loss": 1.8158, "step": 22452 }, { "epoch": 0.75, "grad_norm": 0.42722976207733154, "learning_rate": 0.000512794317128239, "loss": 1.7524, "step": 22453 }, { "epoch": 0.75, "grad_norm": 0.4325593113899231, "learning_rate": 0.000512786949383095, "loss": 1.8019, "step": 22454 }, { "epoch": 0.75, "grad_norm": 0.44217199087142944, "learning_rate": 0.000512779581379659, "loss": 1.7809, "step": 22455 }, { "epoch": 0.75, "grad_norm": 0.4341357946395874, "learning_rate": 0.0005127722131179401, "loss": 1.8004, "step": 22456 }, { "epoch": 0.75, "grad_norm": 0.442221999168396, "learning_rate": 0.0005127648445979468, "loss": 1.7296, "step": 22457 }, { "epoch": 0.75, "grad_norm": 0.4365571439266205, "learning_rate": 0.0005127574758196884, "loss": 1.8251, "step": 22458 }, { "epoch": 0.75, "grad_norm": 0.43655356764793396, "learning_rate": 0.0005127501067831739, "loss": 1.8295, "step": 22459 }, { "epoch": 0.75, "grad_norm": 0.4327436685562134, "learning_rate": 0.000512742737488412, "loss": 1.8491, "step": 22460 }, { "epoch": 0.75, "grad_norm": 0.4375174343585968, "learning_rate": 0.0005127353679354118, "loss": 1.815, "step": 22461 }, { "epoch": 0.75, "grad_norm": 0.44043272733688354, "learning_rate": 0.0005127279981241821, "loss": 1.9033, "step": 22462 }, { "epoch": 0.75, "grad_norm": 0.4208037555217743, "learning_rate": 0.0005127206280547321, "loss": 1.7477, "step": 22463 }, { "epoch": 0.75, "grad_norm": 0.45498034358024597, "learning_rate": 0.0005127132577270705, "loss": 1.9522, "step": 22464 }, { "epoch": 0.75, "grad_norm": 0.4361720085144043, "learning_rate": 0.0005127058871412064, "loss": 1.7946, "step": 22465 }, { "epoch": 0.75, "grad_norm": 0.4545924663543701, "learning_rate": 0.0005126985162971487, "loss": 1.8055, "step": 22466 }, { "epoch": 0.75, "grad_norm": 0.4385960102081299, "learning_rate": 0.0005126911451949064, "loss": 1.8379, "step": 22467 }, { "epoch": 0.75, "grad_norm": 0.4366908371448517, "learning_rate": 0.0005126837738344882, "loss": 1.757, "step": 22468 }, { "epoch": 0.75, "grad_norm": 0.438846230506897, "learning_rate": 0.0005126764022159035, "loss": 1.8313, "step": 22469 }, { "epoch": 0.75, "grad_norm": 0.4371049404144287, "learning_rate": 0.0005126690303391608, "loss": 1.7699, "step": 22470 }, { "epoch": 0.75, "grad_norm": 0.44923192262649536, "learning_rate": 0.0005126616582042692, "loss": 1.8327, "step": 22471 }, { "epoch": 0.75, "grad_norm": 0.4421432614326477, "learning_rate": 0.0005126542858112378, "loss": 1.8455, "step": 22472 }, { "epoch": 0.75, "grad_norm": 0.5456640124320984, "learning_rate": 0.0005126469131600754, "loss": 1.8372, "step": 22473 }, { "epoch": 0.75, "grad_norm": 0.4423084259033203, "learning_rate": 0.000512639540250791, "loss": 1.8302, "step": 22474 }, { "epoch": 0.75, "grad_norm": 0.45648491382598877, "learning_rate": 0.0005126321670833934, "loss": 1.7967, "step": 22475 }, { "epoch": 0.75, "grad_norm": 0.4340287744998932, "learning_rate": 0.0005126247936578918, "loss": 1.7673, "step": 22476 }, { "epoch": 0.75, "grad_norm": 0.43602949380874634, "learning_rate": 0.0005126174199742951, "loss": 1.722, "step": 22477 }, { "epoch": 0.75, "grad_norm": 0.4729960858821869, "learning_rate": 0.000512610046032612, "loss": 1.8003, "step": 22478 }, { "epoch": 0.75, "grad_norm": 0.4510505497455597, "learning_rate": 0.0005126026718328517, "loss": 1.8249, "step": 22479 }, { "epoch": 0.75, "grad_norm": 0.43934327363967896, "learning_rate": 0.0005125952973750231, "loss": 1.8469, "step": 22480 }, { "epoch": 0.75, "grad_norm": 0.43681880831718445, "learning_rate": 0.0005125879226591351, "loss": 1.8209, "step": 22481 }, { "epoch": 0.75, "grad_norm": 0.451539546251297, "learning_rate": 0.0005125805476851968, "loss": 1.8418, "step": 22482 }, { "epoch": 0.75, "grad_norm": 0.4346647262573242, "learning_rate": 0.0005125731724532169, "loss": 1.8538, "step": 22483 }, { "epoch": 0.75, "grad_norm": 0.43307632207870483, "learning_rate": 0.0005125657969632045, "loss": 1.8794, "step": 22484 }, { "epoch": 0.75, "grad_norm": 0.4390501379966736, "learning_rate": 0.0005125584212151686, "loss": 1.7777, "step": 22485 }, { "epoch": 0.75, "grad_norm": 0.44999638199806213, "learning_rate": 0.0005125510452091181, "loss": 1.8765, "step": 22486 }, { "epoch": 0.75, "grad_norm": 0.46390560269355774, "learning_rate": 0.0005125436689450618, "loss": 1.8707, "step": 22487 }, { "epoch": 0.75, "grad_norm": 0.4388706684112549, "learning_rate": 0.0005125362924230089, "loss": 1.8456, "step": 22488 }, { "epoch": 0.75, "grad_norm": 0.4498712122440338, "learning_rate": 0.0005125289156429684, "loss": 1.8056, "step": 22489 }, { "epoch": 0.75, "grad_norm": 0.45822328329086304, "learning_rate": 0.000512521538604949, "loss": 1.8198, "step": 22490 }, { "epoch": 0.75, "grad_norm": 0.46206411719322205, "learning_rate": 0.0005125141613089597, "loss": 1.8196, "step": 22491 }, { "epoch": 0.75, "grad_norm": 0.4336298406124115, "learning_rate": 0.0005125067837550097, "loss": 1.8093, "step": 22492 }, { "epoch": 0.75, "grad_norm": 0.45406466722488403, "learning_rate": 0.0005124994059431076, "loss": 1.8015, "step": 22493 }, { "epoch": 0.75, "grad_norm": 0.4515228271484375, "learning_rate": 0.0005124920278732625, "loss": 1.7726, "step": 22494 }, { "epoch": 0.75, "grad_norm": 0.4560481011867523, "learning_rate": 0.0005124846495454836, "loss": 1.8647, "step": 22495 }, { "epoch": 0.75, "grad_norm": 0.4299792945384979, "learning_rate": 0.0005124772709597796, "loss": 1.8423, "step": 22496 }, { "epoch": 0.75, "grad_norm": 0.47211238741874695, "learning_rate": 0.0005124698921161594, "loss": 1.7999, "step": 22497 }, { "epoch": 0.75, "grad_norm": 0.5292174816131592, "learning_rate": 0.000512462513014632, "loss": 1.8761, "step": 22498 }, { "epoch": 0.75, "grad_norm": 0.4406203031539917, "learning_rate": 0.0005124551336552066, "loss": 1.8571, "step": 22499 }, { "epoch": 0.75, "grad_norm": 0.4405563175678253, "learning_rate": 0.0005124477540378918, "loss": 1.776, "step": 22500 }, { "epoch": 0.75, "grad_norm": 0.4531126916408539, "learning_rate": 0.0005124403741626968, "loss": 1.84, "step": 22501 }, { "epoch": 0.75, "grad_norm": 0.4530159831047058, "learning_rate": 0.0005124329940296305, "loss": 1.8759, "step": 22502 }, { "epoch": 0.75, "grad_norm": 0.4245409369468689, "learning_rate": 0.0005124256136387018, "loss": 1.8247, "step": 22503 }, { "epoch": 0.75, "grad_norm": 0.4492343068122864, "learning_rate": 0.0005124182329899197, "loss": 1.8466, "step": 22504 }, { "epoch": 0.75, "grad_norm": 0.4211380183696747, "learning_rate": 0.0005124108520832932, "loss": 1.8531, "step": 22505 }, { "epoch": 0.75, "grad_norm": 0.4540805518627167, "learning_rate": 0.0005124034709188312, "loss": 1.8628, "step": 22506 }, { "epoch": 0.75, "grad_norm": 0.44084060192108154, "learning_rate": 0.0005123960894965427, "loss": 1.8919, "step": 22507 }, { "epoch": 0.75, "grad_norm": 0.43717435002326965, "learning_rate": 0.0005123887078164367, "loss": 1.8512, "step": 22508 }, { "epoch": 0.75, "grad_norm": 0.5305008888244629, "learning_rate": 0.0005123813258785221, "loss": 1.9213, "step": 22509 }, { "epoch": 0.75, "grad_norm": 0.4494874179363251, "learning_rate": 0.0005123739436828078, "loss": 1.856, "step": 22510 }, { "epoch": 0.75, "grad_norm": 0.42710980772972107, "learning_rate": 0.0005123665612293027, "loss": 1.7859, "step": 22511 }, { "epoch": 0.75, "grad_norm": 0.42302244901657104, "learning_rate": 0.0005123591785180161, "loss": 1.8054, "step": 22512 }, { "epoch": 0.75, "grad_norm": 0.450284868478775, "learning_rate": 0.0005123517955489567, "loss": 1.9243, "step": 22513 }, { "epoch": 0.75, "grad_norm": 0.43856900930404663, "learning_rate": 0.0005123444123221334, "loss": 1.8633, "step": 22514 }, { "epoch": 0.75, "grad_norm": 0.4361434280872345, "learning_rate": 0.0005123370288375554, "loss": 1.9125, "step": 22515 }, { "epoch": 0.75, "grad_norm": 0.4233710765838623, "learning_rate": 0.0005123296450952314, "loss": 1.844, "step": 22516 }, { "epoch": 0.75, "grad_norm": 0.4377411901950836, "learning_rate": 0.0005123222610951706, "loss": 1.8255, "step": 22517 }, { "epoch": 0.75, "grad_norm": 0.4361121654510498, "learning_rate": 0.0005123148768373819, "loss": 1.8964, "step": 22518 }, { "epoch": 0.75, "grad_norm": 0.4265752136707306, "learning_rate": 0.0005123074923218742, "loss": 1.8723, "step": 22519 }, { "epoch": 0.75, "grad_norm": 0.4384409487247467, "learning_rate": 0.0005123001075486563, "loss": 1.8248, "step": 22520 }, { "epoch": 0.75, "grad_norm": 0.4324226677417755, "learning_rate": 0.0005122927225177375, "loss": 1.8487, "step": 22521 }, { "epoch": 0.75, "grad_norm": 0.4253929555416107, "learning_rate": 0.0005122853372291267, "loss": 1.88, "step": 22522 }, { "epoch": 0.75, "grad_norm": 0.43852201104164124, "learning_rate": 0.0005122779516828327, "loss": 1.8283, "step": 22523 }, { "epoch": 0.75, "grad_norm": 0.4394186735153198, "learning_rate": 0.0005122705658788645, "loss": 1.9283, "step": 22524 }, { "epoch": 0.75, "grad_norm": 0.42524027824401855, "learning_rate": 0.0005122631798172312, "loss": 1.7768, "step": 22525 }, { "epoch": 0.75, "grad_norm": 0.4278486371040344, "learning_rate": 0.0005122557934979415, "loss": 1.8123, "step": 22526 }, { "epoch": 0.75, "grad_norm": 0.42883244156837463, "learning_rate": 0.0005122484069210047, "loss": 1.8468, "step": 22527 }, { "epoch": 0.75, "grad_norm": 0.43196526169776917, "learning_rate": 0.0005122410200864295, "loss": 1.8561, "step": 22528 }, { "epoch": 0.75, "grad_norm": 0.4485495090484619, "learning_rate": 0.0005122336329942251, "loss": 1.8743, "step": 22529 }, { "epoch": 0.75, "grad_norm": 0.44147008657455444, "learning_rate": 0.0005122262456444003, "loss": 1.8671, "step": 22530 }, { "epoch": 0.75, "grad_norm": 0.4664837419986725, "learning_rate": 0.000512218858036964, "loss": 1.8936, "step": 22531 }, { "epoch": 0.75, "grad_norm": 0.4480106830596924, "learning_rate": 0.0005122114701719254, "loss": 1.9275, "step": 22532 }, { "epoch": 0.75, "grad_norm": 0.4365065097808838, "learning_rate": 0.0005122040820492933, "loss": 1.8891, "step": 22533 }, { "epoch": 0.75, "grad_norm": 0.42679181694984436, "learning_rate": 0.0005121966936690767, "loss": 1.9373, "step": 22534 }, { "epoch": 0.75, "grad_norm": 0.43025973439216614, "learning_rate": 0.0005121893050312845, "loss": 1.842, "step": 22535 }, { "epoch": 0.75, "grad_norm": 0.4295489490032196, "learning_rate": 0.0005121819161359259, "loss": 1.8169, "step": 22536 }, { "epoch": 0.75, "grad_norm": 0.42940250039100647, "learning_rate": 0.0005121745269830098, "loss": 1.8357, "step": 22537 }, { "epoch": 0.75, "grad_norm": 0.44527971744537354, "learning_rate": 0.0005121671375725449, "loss": 1.7792, "step": 22538 }, { "epoch": 0.75, "grad_norm": 0.43394163250923157, "learning_rate": 0.0005121597479045405, "loss": 1.9163, "step": 22539 }, { "epoch": 0.75, "grad_norm": 0.4286365509033203, "learning_rate": 0.0005121523579790053, "loss": 1.8746, "step": 22540 }, { "epoch": 0.75, "grad_norm": 0.43444421887397766, "learning_rate": 0.0005121449677959484, "loss": 1.7966, "step": 22541 }, { "epoch": 0.75, "grad_norm": 0.44696149230003357, "learning_rate": 0.0005121375773553787, "loss": 1.8501, "step": 22542 }, { "epoch": 0.75, "grad_norm": 0.4633139669895172, "learning_rate": 0.0005121301866573054, "loss": 1.8904, "step": 22543 }, { "epoch": 0.75, "grad_norm": 0.4346594512462616, "learning_rate": 0.0005121227957017372, "loss": 1.8378, "step": 22544 }, { "epoch": 0.75, "grad_norm": 0.45092257857322693, "learning_rate": 0.0005121154044886833, "loss": 1.8031, "step": 22545 }, { "epoch": 0.75, "grad_norm": 0.44084957242012024, "learning_rate": 0.0005121080130181524, "loss": 1.7659, "step": 22546 }, { "epoch": 0.75, "grad_norm": 0.44223636388778687, "learning_rate": 0.0005121006212901538, "loss": 1.844, "step": 22547 }, { "epoch": 0.75, "grad_norm": 0.43556463718414307, "learning_rate": 0.0005120932293046961, "loss": 1.7961, "step": 22548 }, { "epoch": 0.75, "grad_norm": 0.45964741706848145, "learning_rate": 0.0005120858370617886, "loss": 1.7868, "step": 22549 }, { "epoch": 0.75, "grad_norm": 0.44045254588127136, "learning_rate": 0.0005120784445614401, "loss": 1.8165, "step": 22550 }, { "epoch": 0.75, "grad_norm": 0.4417882561683655, "learning_rate": 0.0005120710518036596, "loss": 1.8432, "step": 22551 }, { "epoch": 0.75, "grad_norm": 0.4639577269554138, "learning_rate": 0.0005120636587884561, "loss": 1.8059, "step": 22552 }, { "epoch": 0.75, "grad_norm": 0.42376479506492615, "learning_rate": 0.0005120562655158387, "loss": 1.7702, "step": 22553 }, { "epoch": 0.75, "grad_norm": 0.43040212988853455, "learning_rate": 0.0005120488719858161, "loss": 1.7626, "step": 22554 }, { "epoch": 0.75, "grad_norm": 0.456290066242218, "learning_rate": 0.0005120414781983974, "loss": 1.8624, "step": 22555 }, { "epoch": 0.75, "grad_norm": 0.4276905357837677, "learning_rate": 0.0005120340841535916, "loss": 1.8142, "step": 22556 }, { "epoch": 0.75, "grad_norm": 0.4354969561100006, "learning_rate": 0.0005120266898514077, "loss": 1.909, "step": 22557 }, { "epoch": 0.75, "grad_norm": 0.4333457946777344, "learning_rate": 0.0005120192952918546, "loss": 1.868, "step": 22558 }, { "epoch": 0.75, "grad_norm": 0.43400469422340393, "learning_rate": 0.0005120119004749414, "loss": 1.7577, "step": 22559 }, { "epoch": 0.75, "grad_norm": 0.42789918184280396, "learning_rate": 0.000512004505400677, "loss": 1.8302, "step": 22560 }, { "epoch": 0.75, "grad_norm": 0.4417583644390106, "learning_rate": 0.0005119971100690701, "loss": 1.7895, "step": 22561 }, { "epoch": 0.75, "grad_norm": 0.44204995036125183, "learning_rate": 0.0005119897144801302, "loss": 1.7894, "step": 22562 }, { "epoch": 0.75, "grad_norm": 0.4505833387374878, "learning_rate": 0.0005119823186338661, "loss": 1.7988, "step": 22563 }, { "epoch": 0.75, "grad_norm": 0.4401875138282776, "learning_rate": 0.0005119749225302864, "loss": 1.8542, "step": 22564 }, { "epoch": 0.75, "grad_norm": 0.4327358901500702, "learning_rate": 0.0005119675261694007, "loss": 1.7865, "step": 22565 }, { "epoch": 0.75, "grad_norm": 0.43062782287597656, "learning_rate": 0.0005119601295512174, "loss": 1.8617, "step": 22566 }, { "epoch": 0.75, "grad_norm": 0.4689706265926361, "learning_rate": 0.0005119527326757458, "loss": 1.8185, "step": 22567 }, { "epoch": 0.75, "grad_norm": 0.435083270072937, "learning_rate": 0.000511945335542995, "loss": 1.8121, "step": 22568 }, { "epoch": 0.75, "grad_norm": 0.45200908184051514, "learning_rate": 0.0005119379381529736, "loss": 1.8766, "step": 22569 }, { "epoch": 0.75, "grad_norm": 0.4421970844268799, "learning_rate": 0.0005119305405056907, "loss": 1.8457, "step": 22570 }, { "epoch": 0.75, "grad_norm": 0.4746045768260956, "learning_rate": 0.0005119231426011555, "loss": 1.8523, "step": 22571 }, { "epoch": 0.75, "grad_norm": 0.4342169165611267, "learning_rate": 0.0005119157444393768, "loss": 1.8075, "step": 22572 }, { "epoch": 0.75, "grad_norm": 0.450827956199646, "learning_rate": 0.0005119083460203637, "loss": 1.9341, "step": 22573 }, { "epoch": 0.75, "grad_norm": 0.4906819760799408, "learning_rate": 0.0005119009473441248, "loss": 1.8869, "step": 22574 }, { "epoch": 0.75, "grad_norm": 0.43372008204460144, "learning_rate": 0.0005118935484106697, "loss": 1.868, "step": 22575 }, { "epoch": 0.75, "grad_norm": 0.4456728994846344, "learning_rate": 0.0005118861492200068, "loss": 1.7568, "step": 22576 }, { "epoch": 0.75, "grad_norm": 0.4538787305355072, "learning_rate": 0.0005118787497721455, "loss": 1.7908, "step": 22577 }, { "epoch": 0.75, "grad_norm": 0.43152859807014465, "learning_rate": 0.0005118713500670946, "loss": 1.877, "step": 22578 }, { "epoch": 0.75, "grad_norm": 0.4217429757118225, "learning_rate": 0.000511863950104863, "loss": 1.8102, "step": 22579 }, { "epoch": 0.75, "grad_norm": 0.4520169496536255, "learning_rate": 0.0005118565498854599, "loss": 1.8783, "step": 22580 }, { "epoch": 0.75, "grad_norm": 0.44848981499671936, "learning_rate": 0.0005118491494088941, "loss": 1.8263, "step": 22581 }, { "epoch": 0.75, "grad_norm": 0.4201241433620453, "learning_rate": 0.0005118417486751746, "loss": 1.7539, "step": 22582 }, { "epoch": 0.75, "grad_norm": 0.4867587685585022, "learning_rate": 0.0005118343476843106, "loss": 1.7548, "step": 22583 }, { "epoch": 0.75, "grad_norm": 0.44432249665260315, "learning_rate": 0.0005118269464363108, "loss": 1.8025, "step": 22584 }, { "epoch": 0.75, "grad_norm": 0.4305360019207001, "learning_rate": 0.0005118195449311842, "loss": 1.869, "step": 22585 }, { "epoch": 0.75, "grad_norm": 0.4412381947040558, "learning_rate": 0.0005118121431689401, "loss": 1.8711, "step": 22586 }, { "epoch": 0.75, "grad_norm": 0.6690993905067444, "learning_rate": 0.000511804741149587, "loss": 1.8391, "step": 22587 }, { "epoch": 0.75, "grad_norm": 0.44326692819595337, "learning_rate": 0.0005117973388731344, "loss": 1.7764, "step": 22588 }, { "epoch": 0.75, "grad_norm": 0.4383382499217987, "learning_rate": 0.000511789936339591, "loss": 1.8769, "step": 22589 }, { "epoch": 0.75, "grad_norm": 0.4544498324394226, "learning_rate": 0.0005117825335489657, "loss": 1.7976, "step": 22590 }, { "epoch": 0.75, "grad_norm": 0.4430884122848511, "learning_rate": 0.0005117751305012677, "loss": 1.843, "step": 22591 }, { "epoch": 0.75, "grad_norm": 0.4432608187198639, "learning_rate": 0.0005117677271965058, "loss": 1.8343, "step": 22592 }, { "epoch": 0.75, "grad_norm": 0.45917198061943054, "learning_rate": 0.0005117603236346892, "loss": 1.8126, "step": 22593 }, { "epoch": 0.75, "grad_norm": 0.4354691803455353, "learning_rate": 0.0005117529198158267, "loss": 1.8377, "step": 22594 }, { "epoch": 0.75, "grad_norm": 0.5320915579795837, "learning_rate": 0.0005117455157399274, "loss": 1.795, "step": 22595 }, { "epoch": 0.75, "grad_norm": 0.4620078504085541, "learning_rate": 0.0005117381114070001, "loss": 1.795, "step": 22596 }, { "epoch": 0.75, "grad_norm": 0.45462360978126526, "learning_rate": 0.0005117307068170541, "loss": 1.8414, "step": 22597 }, { "epoch": 0.75, "grad_norm": 0.4325051009654999, "learning_rate": 0.0005117233019700982, "loss": 1.8043, "step": 22598 }, { "epoch": 0.75, "grad_norm": 0.4463983178138733, "learning_rate": 0.0005117158968661414, "loss": 1.8467, "step": 22599 }, { "epoch": 0.75, "grad_norm": 0.445602685213089, "learning_rate": 0.0005117084915051926, "loss": 1.8171, "step": 22600 }, { "epoch": 0.75, "grad_norm": 0.4155844449996948, "learning_rate": 0.000511701085887261, "loss": 1.8458, "step": 22601 }, { "epoch": 0.75, "grad_norm": 0.4151807129383087, "learning_rate": 0.0005116936800123554, "loss": 1.8102, "step": 22602 }, { "epoch": 0.75, "grad_norm": 0.4232000708580017, "learning_rate": 0.0005116862738804849, "loss": 1.8011, "step": 22603 }, { "epoch": 0.75, "grad_norm": 0.4275300204753876, "learning_rate": 0.0005116788674916585, "loss": 1.8234, "step": 22604 }, { "epoch": 0.75, "grad_norm": 0.4312078058719635, "learning_rate": 0.0005116714608458851, "loss": 1.8364, "step": 22605 }, { "epoch": 0.75, "grad_norm": 0.43514513969421387, "learning_rate": 0.0005116640539431736, "loss": 1.8497, "step": 22606 }, { "epoch": 0.75, "grad_norm": 0.4282759428024292, "learning_rate": 0.0005116566467835333, "loss": 1.7739, "step": 22607 }, { "epoch": 0.75, "grad_norm": 0.4313425123691559, "learning_rate": 0.0005116492393669729, "loss": 1.8933, "step": 22608 }, { "epoch": 0.75, "grad_norm": 0.4322846829891205, "learning_rate": 0.0005116418316935017, "loss": 1.7991, "step": 22609 }, { "epoch": 0.75, "grad_norm": 0.4294861853122711, "learning_rate": 0.0005116344237631284, "loss": 1.8441, "step": 22610 }, { "epoch": 0.75, "grad_norm": 0.4379913806915283, "learning_rate": 0.000511627015575862, "loss": 1.8455, "step": 22611 }, { "epoch": 0.75, "grad_norm": 0.4326377213001251, "learning_rate": 0.0005116196071317117, "loss": 1.7122, "step": 22612 }, { "epoch": 0.75, "grad_norm": 0.4305272102355957, "learning_rate": 0.0005116121984306863, "loss": 1.788, "step": 22613 }, { "epoch": 0.75, "grad_norm": 0.4429979920387268, "learning_rate": 0.0005116047894727949, "loss": 1.8121, "step": 22614 }, { "epoch": 0.75, "grad_norm": 0.44416743516921997, "learning_rate": 0.0005115973802580464, "loss": 1.9204, "step": 22615 }, { "epoch": 0.75, "grad_norm": 0.4345872700214386, "learning_rate": 0.0005115899707864499, "loss": 1.8671, "step": 22616 }, { "epoch": 0.75, "grad_norm": 0.44463488459587097, "learning_rate": 0.0005115825610580144, "loss": 1.8338, "step": 22617 }, { "epoch": 0.75, "grad_norm": 0.4340740144252777, "learning_rate": 0.0005115751510727489, "loss": 1.8257, "step": 22618 }, { "epoch": 0.75, "grad_norm": 0.4239250719547272, "learning_rate": 0.0005115677408306622, "loss": 1.9263, "step": 22619 }, { "epoch": 0.75, "grad_norm": 0.43375951051712036, "learning_rate": 0.0005115603303317636, "loss": 1.7987, "step": 22620 }, { "epoch": 0.75, "grad_norm": 0.4536539912223816, "learning_rate": 0.0005115529195760618, "loss": 1.854, "step": 22621 }, { "epoch": 0.75, "grad_norm": 0.4281671643257141, "learning_rate": 0.0005115455085635659, "loss": 1.8519, "step": 22622 }, { "epoch": 0.75, "grad_norm": 0.4356977641582489, "learning_rate": 0.000511538097294285, "loss": 1.8327, "step": 22623 }, { "epoch": 0.75, "grad_norm": 0.4575395882129669, "learning_rate": 0.000511530685768228, "loss": 1.8011, "step": 22624 }, { "epoch": 0.75, "grad_norm": 0.4326643943786621, "learning_rate": 0.000511523273985404, "loss": 1.868, "step": 22625 }, { "epoch": 0.75, "grad_norm": 0.4280822277069092, "learning_rate": 0.0005115158619458217, "loss": 1.815, "step": 22626 }, { "epoch": 0.75, "grad_norm": 0.43841370940208435, "learning_rate": 0.0005115084496494904, "loss": 1.8087, "step": 22627 }, { "epoch": 0.75, "grad_norm": 0.509223997592926, "learning_rate": 0.0005115010370964191, "loss": 1.8338, "step": 22628 }, { "epoch": 0.75, "grad_norm": 0.4324682652950287, "learning_rate": 0.0005114936242866166, "loss": 1.8257, "step": 22629 }, { "epoch": 0.75, "grad_norm": 0.4375236928462982, "learning_rate": 0.0005114862112200921, "loss": 1.8882, "step": 22630 }, { "epoch": 0.75, "grad_norm": 0.44005173444747925, "learning_rate": 0.0005114787978968544, "loss": 1.801, "step": 22631 }, { "epoch": 0.75, "grad_norm": 0.4194662868976593, "learning_rate": 0.0005114713843169127, "loss": 1.7708, "step": 22632 }, { "epoch": 0.75, "grad_norm": 0.42510005831718445, "learning_rate": 0.0005114639704802759, "loss": 1.734, "step": 22633 }, { "epoch": 0.75, "grad_norm": 0.4347379803657532, "learning_rate": 0.0005114565563869529, "loss": 1.8233, "step": 22634 }, { "epoch": 0.75, "grad_norm": 0.45698556303977966, "learning_rate": 0.0005114491420369528, "loss": 1.8447, "step": 22635 }, { "epoch": 0.75, "grad_norm": 0.4384004771709442, "learning_rate": 0.0005114417274302847, "loss": 1.7937, "step": 22636 }, { "epoch": 0.75, "grad_norm": 0.44024136662483215, "learning_rate": 0.0005114343125669574, "loss": 1.7983, "step": 22637 }, { "epoch": 0.75, "grad_norm": 0.4222566783428192, "learning_rate": 0.0005114268974469801, "loss": 1.7685, "step": 22638 }, { "epoch": 0.75, "grad_norm": 0.43704941868782043, "learning_rate": 0.0005114194820703616, "loss": 1.8144, "step": 22639 }, { "epoch": 0.75, "grad_norm": 0.4543927013874054, "learning_rate": 0.0005114120664371111, "loss": 1.8323, "step": 22640 }, { "epoch": 0.75, "grad_norm": 0.45757052302360535, "learning_rate": 0.0005114046505472373, "loss": 1.8298, "step": 22641 }, { "epoch": 0.75, "grad_norm": 0.44930726289749146, "learning_rate": 0.0005113972344007496, "loss": 1.8748, "step": 22642 }, { "epoch": 0.75, "grad_norm": 0.4391022324562073, "learning_rate": 0.0005113898179976567, "loss": 1.9014, "step": 22643 }, { "epoch": 0.75, "grad_norm": 0.4224289059638977, "learning_rate": 0.0005113824013379677, "loss": 1.8392, "step": 22644 }, { "epoch": 0.75, "grad_norm": 0.4565059542655945, "learning_rate": 0.0005113749844216917, "loss": 1.7778, "step": 22645 }, { "epoch": 0.75, "grad_norm": 0.4374092221260071, "learning_rate": 0.0005113675672488375, "loss": 1.7887, "step": 22646 }, { "epoch": 0.75, "grad_norm": 0.43290919065475464, "learning_rate": 0.0005113601498194143, "loss": 1.9016, "step": 22647 }, { "epoch": 0.75, "grad_norm": 0.440750390291214, "learning_rate": 0.000511352732133431, "loss": 1.8397, "step": 22648 }, { "epoch": 0.75, "grad_norm": 0.42821812629699707, "learning_rate": 0.0005113453141908967, "loss": 1.918, "step": 22649 }, { "epoch": 0.75, "grad_norm": 0.42720258235931396, "learning_rate": 0.0005113378959918203, "loss": 1.7939, "step": 22650 }, { "epoch": 0.75, "grad_norm": 0.424278199672699, "learning_rate": 0.0005113304775362108, "loss": 1.73, "step": 22651 }, { "epoch": 0.75, "grad_norm": 0.46552300453186035, "learning_rate": 0.0005113230588240773, "loss": 1.8162, "step": 22652 }, { "epoch": 0.75, "grad_norm": 0.44434845447540283, "learning_rate": 0.0005113156398554287, "loss": 1.7851, "step": 22653 }, { "epoch": 0.75, "grad_norm": 0.4318709075450897, "learning_rate": 0.0005113082206302738, "loss": 1.8797, "step": 22654 }, { "epoch": 0.75, "grad_norm": 0.4379248023033142, "learning_rate": 0.0005113008011486222, "loss": 1.8512, "step": 22655 }, { "epoch": 0.75, "grad_norm": 0.46559324860572815, "learning_rate": 0.0005112933814104824, "loss": 1.8251, "step": 22656 }, { "epoch": 0.75, "grad_norm": 0.4417264461517334, "learning_rate": 0.0005112859614158636, "loss": 1.7721, "step": 22657 }, { "epoch": 0.75, "grad_norm": 0.4201953411102295, "learning_rate": 0.0005112785411647749, "loss": 1.8352, "step": 22658 }, { "epoch": 0.75, "grad_norm": 0.4324376583099365, "learning_rate": 0.000511271120657225, "loss": 1.819, "step": 22659 }, { "epoch": 0.75, "grad_norm": 0.435325562953949, "learning_rate": 0.0005112636998932231, "loss": 1.7858, "step": 22660 }, { "epoch": 0.75, "grad_norm": 0.5323565006256104, "learning_rate": 0.0005112562788727784, "loss": 1.9645, "step": 22661 }, { "epoch": 0.75, "grad_norm": 0.4424019753932953, "learning_rate": 0.0005112488575958994, "loss": 1.9133, "step": 22662 }, { "epoch": 0.75, "grad_norm": 0.452755868434906, "learning_rate": 0.0005112414360625956, "loss": 1.8184, "step": 22663 }, { "epoch": 0.75, "grad_norm": 0.4368119239807129, "learning_rate": 0.0005112340142728758, "loss": 1.8786, "step": 22664 }, { "epoch": 0.75, "grad_norm": 0.45259353518486023, "learning_rate": 0.000511226592226749, "loss": 1.8143, "step": 22665 }, { "epoch": 0.75, "grad_norm": 0.44305118918418884, "learning_rate": 0.0005112191699242243, "loss": 1.8569, "step": 22666 }, { "epoch": 0.75, "grad_norm": 0.413608580827713, "learning_rate": 0.0005112117473653106, "loss": 1.7768, "step": 22667 }, { "epoch": 0.75, "grad_norm": 0.43071675300598145, "learning_rate": 0.000511204324550017, "loss": 1.8049, "step": 22668 }, { "epoch": 0.75, "grad_norm": 0.428137868642807, "learning_rate": 0.0005111969014783524, "loss": 1.8669, "step": 22669 }, { "epoch": 0.75, "grad_norm": 0.43202143907546997, "learning_rate": 0.000511189478150326, "loss": 1.8135, "step": 22670 }, { "epoch": 0.75, "grad_norm": 0.44179633259773254, "learning_rate": 0.0005111820545659466, "loss": 1.8471, "step": 22671 }, { "epoch": 0.75, "grad_norm": 0.4368229806423187, "learning_rate": 0.0005111746307252233, "loss": 1.8068, "step": 22672 }, { "epoch": 0.75, "grad_norm": 0.4261348843574524, "learning_rate": 0.0005111672066281651, "loss": 1.83, "step": 22673 }, { "epoch": 0.75, "grad_norm": 0.4274984300136566, "learning_rate": 0.000511159782274781, "loss": 1.8051, "step": 22674 }, { "epoch": 0.75, "grad_norm": 0.44243302941322327, "learning_rate": 0.0005111523576650801, "loss": 1.8429, "step": 22675 }, { "epoch": 0.75, "grad_norm": 0.4315737187862396, "learning_rate": 0.0005111449327990713, "loss": 1.8344, "step": 22676 }, { "epoch": 0.75, "grad_norm": 0.4593862295150757, "learning_rate": 0.0005111375076767637, "loss": 1.8131, "step": 22677 }, { "epoch": 0.75, "grad_norm": 0.47839924693107605, "learning_rate": 0.0005111300822981664, "loss": 1.8786, "step": 22678 }, { "epoch": 0.75, "grad_norm": 0.4334715008735657, "learning_rate": 0.0005111226566632881, "loss": 1.7959, "step": 22679 }, { "epoch": 0.75, "grad_norm": 0.45316281914711, "learning_rate": 0.0005111152307721381, "loss": 1.7745, "step": 22680 }, { "epoch": 0.75, "grad_norm": 0.4651864767074585, "learning_rate": 0.0005111078046247253, "loss": 1.8318, "step": 22681 }, { "epoch": 0.75, "grad_norm": 0.4525780975818634, "learning_rate": 0.0005111003782210586, "loss": 1.8291, "step": 22682 }, { "epoch": 0.75, "grad_norm": 0.43008068203926086, "learning_rate": 0.0005110929515611474, "loss": 1.8254, "step": 22683 }, { "epoch": 0.75, "grad_norm": 0.4512609839439392, "learning_rate": 0.0005110855246450003, "loss": 1.8421, "step": 22684 }, { "epoch": 0.75, "grad_norm": 0.44940122961997986, "learning_rate": 0.0005110780974726264, "loss": 1.8284, "step": 22685 }, { "epoch": 0.75, "grad_norm": 0.464542955160141, "learning_rate": 0.000511070670044035, "loss": 1.7824, "step": 22686 }, { "epoch": 0.75, "grad_norm": 0.4258626699447632, "learning_rate": 0.0005110632423592348, "loss": 1.825, "step": 22687 }, { "epoch": 0.75, "grad_norm": 0.4171628952026367, "learning_rate": 0.0005110558144182348, "loss": 1.7956, "step": 22688 }, { "epoch": 0.75, "grad_norm": 0.45499059557914734, "learning_rate": 0.0005110483862210442, "loss": 1.8382, "step": 22689 }, { "epoch": 0.75, "grad_norm": 0.4465486407279968, "learning_rate": 0.0005110409577676722, "loss": 1.7917, "step": 22690 }, { "epoch": 0.75, "grad_norm": 0.43945208191871643, "learning_rate": 0.0005110335290581273, "loss": 1.7731, "step": 22691 }, { "epoch": 0.75, "grad_norm": 0.4367699921131134, "learning_rate": 0.0005110261000924189, "loss": 1.765, "step": 22692 }, { "epoch": 0.75, "grad_norm": 0.44463270902633667, "learning_rate": 0.0005110186708705557, "loss": 1.9044, "step": 22693 }, { "epoch": 0.76, "grad_norm": 0.4230807423591614, "learning_rate": 0.0005110112413925472, "loss": 1.8609, "step": 22694 }, { "epoch": 0.76, "grad_norm": 0.45175328850746155, "learning_rate": 0.0005110038116584022, "loss": 1.794, "step": 22695 }, { "epoch": 0.76, "grad_norm": 0.4277242124080658, "learning_rate": 0.0005109963816681294, "loss": 1.8205, "step": 22696 }, { "epoch": 0.76, "grad_norm": 0.4697290062904358, "learning_rate": 0.0005109889514217381, "loss": 1.7969, "step": 22697 }, { "epoch": 0.76, "grad_norm": 0.4672442376613617, "learning_rate": 0.0005109815209192374, "loss": 1.848, "step": 22698 }, { "epoch": 0.76, "grad_norm": 0.44273141026496887, "learning_rate": 0.0005109740901606363, "loss": 1.8163, "step": 22699 }, { "epoch": 0.76, "grad_norm": 0.4235283136367798, "learning_rate": 0.0005109666591459436, "loss": 1.8781, "step": 22700 }, { "epoch": 0.76, "grad_norm": 0.4323073923587799, "learning_rate": 0.0005109592278751686, "loss": 1.904, "step": 22701 }, { "epoch": 0.76, "grad_norm": 0.42692995071411133, "learning_rate": 0.00051095179634832, "loss": 1.8984, "step": 22702 }, { "epoch": 0.76, "grad_norm": 0.43886706233024597, "learning_rate": 0.0005109443645654071, "loss": 1.8412, "step": 22703 }, { "epoch": 0.76, "grad_norm": 0.4449685215950012, "learning_rate": 0.0005109369325264389, "loss": 1.7823, "step": 22704 }, { "epoch": 0.76, "grad_norm": 0.45158669352531433, "learning_rate": 0.0005109295002314242, "loss": 1.8457, "step": 22705 }, { "epoch": 0.76, "grad_norm": 0.4246525168418884, "learning_rate": 0.0005109220676803722, "loss": 1.84, "step": 22706 }, { "epoch": 0.76, "grad_norm": 0.45548921823501587, "learning_rate": 0.000510914634873292, "loss": 1.8607, "step": 22707 }, { "epoch": 0.76, "grad_norm": 0.4360966384410858, "learning_rate": 0.0005109072018101924, "loss": 1.7499, "step": 22708 }, { "epoch": 0.76, "grad_norm": 0.4445458948612213, "learning_rate": 0.0005108997684910825, "loss": 1.9112, "step": 22709 }, { "epoch": 0.76, "grad_norm": 0.43990418314933777, "learning_rate": 0.0005108923349159715, "loss": 1.8184, "step": 22710 }, { "epoch": 0.76, "grad_norm": 0.4495408236980438, "learning_rate": 0.0005108849010848681, "loss": 1.8436, "step": 22711 }, { "epoch": 0.76, "grad_norm": 0.430088073015213, "learning_rate": 0.0005108774669977817, "loss": 1.816, "step": 22712 }, { "epoch": 0.76, "grad_norm": 0.4425995349884033, "learning_rate": 0.0005108700326547209, "loss": 1.9123, "step": 22713 }, { "epoch": 0.76, "grad_norm": 0.4411042034626007, "learning_rate": 0.0005108625980556952, "loss": 1.7625, "step": 22714 }, { "epoch": 0.76, "grad_norm": 0.6657302379608154, "learning_rate": 0.0005108551632007132, "loss": 1.8046, "step": 22715 }, { "epoch": 0.76, "grad_norm": 0.44571927189826965, "learning_rate": 0.0005108477280897841, "loss": 1.8188, "step": 22716 }, { "epoch": 0.76, "grad_norm": 0.4234514534473419, "learning_rate": 0.000510840292722917, "loss": 1.8782, "step": 22717 }, { "epoch": 0.76, "grad_norm": 0.45206061005592346, "learning_rate": 0.0005108328571001208, "loss": 1.907, "step": 22718 }, { "epoch": 0.76, "grad_norm": 0.42653611302375793, "learning_rate": 0.0005108254212214047, "loss": 1.8259, "step": 22719 }, { "epoch": 0.76, "grad_norm": 0.46692654490470886, "learning_rate": 0.0005108179850867774, "loss": 1.8719, "step": 22720 }, { "epoch": 0.76, "grad_norm": 0.44907882809638977, "learning_rate": 0.0005108105486962483, "loss": 1.7972, "step": 22721 }, { "epoch": 0.76, "grad_norm": 0.4271600842475891, "learning_rate": 0.0005108031120498261, "loss": 1.8591, "step": 22722 }, { "epoch": 0.76, "grad_norm": 0.4528951644897461, "learning_rate": 0.0005107956751475201, "loss": 1.7991, "step": 22723 }, { "epoch": 0.76, "grad_norm": 0.4233626127243042, "learning_rate": 0.0005107882379893391, "loss": 1.7909, "step": 22724 }, { "epoch": 0.76, "grad_norm": 0.44094160199165344, "learning_rate": 0.0005107808005752924, "loss": 1.8756, "step": 22725 }, { "epoch": 0.76, "grad_norm": 0.43503159284591675, "learning_rate": 0.0005107733629053889, "loss": 1.8093, "step": 22726 }, { "epoch": 0.76, "grad_norm": 0.42646270990371704, "learning_rate": 0.0005107659249796374, "loss": 1.7689, "step": 22727 }, { "epoch": 0.76, "grad_norm": 0.43710076808929443, "learning_rate": 0.0005107584867980471, "loss": 1.7953, "step": 22728 }, { "epoch": 0.76, "grad_norm": 0.4251137375831604, "learning_rate": 0.0005107510483606273, "loss": 1.815, "step": 22729 }, { "epoch": 0.76, "grad_norm": 0.5206286907196045, "learning_rate": 0.0005107436096673865, "loss": 1.802, "step": 22730 }, { "epoch": 0.76, "grad_norm": 0.4374914765357971, "learning_rate": 0.0005107361707183341, "loss": 1.8059, "step": 22731 }, { "epoch": 0.76, "grad_norm": 0.43029096722602844, "learning_rate": 0.0005107287315134791, "loss": 1.8332, "step": 22732 }, { "epoch": 0.76, "grad_norm": 0.42646732926368713, "learning_rate": 0.0005107212920528305, "loss": 1.7188, "step": 22733 }, { "epoch": 0.76, "grad_norm": 0.4264260530471802, "learning_rate": 0.0005107138523363972, "loss": 1.7913, "step": 22734 }, { "epoch": 0.76, "grad_norm": 0.44036975502967834, "learning_rate": 0.0005107064123641885, "loss": 1.8542, "step": 22735 }, { "epoch": 0.76, "grad_norm": 0.4370866119861603, "learning_rate": 0.0005106989721362131, "loss": 1.8201, "step": 22736 }, { "epoch": 0.76, "grad_norm": 0.41353100538253784, "learning_rate": 0.0005106915316524801, "loss": 1.8015, "step": 22737 }, { "epoch": 0.76, "grad_norm": 0.42808449268341064, "learning_rate": 0.0005106840909129988, "loss": 1.8104, "step": 22738 }, { "epoch": 0.76, "grad_norm": 0.43688395619392395, "learning_rate": 0.0005106766499177781, "loss": 1.8491, "step": 22739 }, { "epoch": 0.76, "grad_norm": 0.4285764694213867, "learning_rate": 0.0005106692086668269, "loss": 1.821, "step": 22740 }, { "epoch": 0.76, "grad_norm": 0.43042656779289246, "learning_rate": 0.0005106617671601542, "loss": 1.8469, "step": 22741 }, { "epoch": 0.76, "grad_norm": 0.4379929304122925, "learning_rate": 0.0005106543253977693, "loss": 1.9258, "step": 22742 }, { "epoch": 0.76, "grad_norm": 0.4482623040676117, "learning_rate": 0.0005106468833796812, "loss": 1.8175, "step": 22743 }, { "epoch": 0.76, "grad_norm": 0.43816521763801575, "learning_rate": 0.0005106394411058986, "loss": 1.8618, "step": 22744 }, { "epoch": 0.76, "grad_norm": 0.4396289587020874, "learning_rate": 0.0005106319985764309, "loss": 1.8149, "step": 22745 }, { "epoch": 0.76, "grad_norm": 0.4442294239997864, "learning_rate": 0.0005106245557912868, "loss": 1.8301, "step": 22746 }, { "epoch": 0.76, "grad_norm": 0.4271582067012787, "learning_rate": 0.0005106171127504757, "loss": 1.7577, "step": 22747 }, { "epoch": 0.76, "grad_norm": 0.4266779124736786, "learning_rate": 0.0005106096694540063, "loss": 1.8121, "step": 22748 }, { "epoch": 0.76, "grad_norm": 0.44390520453453064, "learning_rate": 0.000510602225901888, "loss": 1.697, "step": 22749 }, { "epoch": 0.76, "grad_norm": 0.4400770962238312, "learning_rate": 0.0005105947820941295, "loss": 1.9109, "step": 22750 }, { "epoch": 0.76, "grad_norm": 0.4438555836677551, "learning_rate": 0.0005105873380307401, "loss": 1.7919, "step": 22751 }, { "epoch": 0.76, "grad_norm": 0.4326059818267822, "learning_rate": 0.0005105798937117286, "loss": 1.8619, "step": 22752 }, { "epoch": 0.76, "grad_norm": 0.4392111599445343, "learning_rate": 0.0005105724491371041, "loss": 1.8383, "step": 22753 }, { "epoch": 0.76, "grad_norm": 0.4178561866283417, "learning_rate": 0.0005105650043068756, "loss": 1.8102, "step": 22754 }, { "epoch": 0.76, "grad_norm": 1.5312676429748535, "learning_rate": 0.0005105575592210524, "loss": 1.8734, "step": 22755 }, { "epoch": 0.76, "grad_norm": 0.4345892369747162, "learning_rate": 0.0005105501138796432, "loss": 1.8034, "step": 22756 }, { "epoch": 0.76, "grad_norm": 0.4297325015068054, "learning_rate": 0.0005105426682826573, "loss": 1.8165, "step": 22757 }, { "epoch": 0.76, "grad_norm": 0.4326631426811218, "learning_rate": 0.0005105352224301035, "loss": 1.8205, "step": 22758 }, { "epoch": 0.76, "grad_norm": 0.42462480068206787, "learning_rate": 0.000510527776321991, "loss": 1.8089, "step": 22759 }, { "epoch": 0.76, "grad_norm": 0.42356041073799133, "learning_rate": 0.0005105203299583289, "loss": 1.7984, "step": 22760 }, { "epoch": 0.76, "grad_norm": 0.431498259305954, "learning_rate": 0.000510512883339126, "loss": 1.8256, "step": 22761 }, { "epoch": 0.76, "grad_norm": 0.4460766315460205, "learning_rate": 0.0005105054364643916, "loss": 1.7758, "step": 22762 }, { "epoch": 0.76, "grad_norm": 0.4296690821647644, "learning_rate": 0.0005104979893341345, "loss": 1.7817, "step": 22763 }, { "epoch": 0.76, "grad_norm": 0.42395952343940735, "learning_rate": 0.0005104905419483641, "loss": 1.8224, "step": 22764 }, { "epoch": 0.76, "grad_norm": 0.4316265881061554, "learning_rate": 0.0005104830943070889, "loss": 1.8736, "step": 22765 }, { "epoch": 0.76, "grad_norm": 0.4439958930015564, "learning_rate": 0.0005104756464103184, "loss": 1.9143, "step": 22766 }, { "epoch": 0.76, "grad_norm": 0.41264909505844116, "learning_rate": 0.0005104681982580615, "loss": 1.844, "step": 22767 }, { "epoch": 0.76, "grad_norm": 0.4647796154022217, "learning_rate": 0.000510460749850327, "loss": 1.7571, "step": 22768 }, { "epoch": 0.76, "grad_norm": 0.43286752700805664, "learning_rate": 0.0005104533011871243, "loss": 1.8553, "step": 22769 }, { "epoch": 0.76, "grad_norm": 0.43660569190979004, "learning_rate": 0.0005104458522684623, "loss": 1.8831, "step": 22770 }, { "epoch": 0.76, "grad_norm": 0.4223141372203827, "learning_rate": 0.0005104384030943501, "loss": 1.8277, "step": 22771 }, { "epoch": 0.76, "grad_norm": 0.44857242703437805, "learning_rate": 0.0005104309536647966, "loss": 1.8759, "step": 22772 }, { "epoch": 0.76, "grad_norm": 0.43857860565185547, "learning_rate": 0.000510423503979811, "loss": 1.8837, "step": 22773 }, { "epoch": 0.76, "grad_norm": 0.4303925633430481, "learning_rate": 0.0005104160540394023, "loss": 1.7642, "step": 22774 }, { "epoch": 0.76, "grad_norm": 0.4277864694595337, "learning_rate": 0.0005104086038435794, "loss": 1.87, "step": 22775 }, { "epoch": 0.76, "grad_norm": 0.42676568031311035, "learning_rate": 0.0005104011533923515, "loss": 1.8112, "step": 22776 }, { "epoch": 0.76, "grad_norm": 0.4354487657546997, "learning_rate": 0.0005103937026857276, "loss": 1.7582, "step": 22777 }, { "epoch": 0.76, "grad_norm": 0.43208053708076477, "learning_rate": 0.0005103862517237168, "loss": 1.7641, "step": 22778 }, { "epoch": 0.76, "grad_norm": 0.44345808029174805, "learning_rate": 0.000510378800506328, "loss": 1.7735, "step": 22779 }, { "epoch": 0.76, "grad_norm": 0.45190203189849854, "learning_rate": 0.0005103713490335704, "loss": 1.8471, "step": 22780 }, { "epoch": 0.76, "grad_norm": 0.4710352420806885, "learning_rate": 0.0005103638973054528, "loss": 1.8365, "step": 22781 }, { "epoch": 0.76, "grad_norm": 0.4283009469509125, "learning_rate": 0.0005103564453219847, "loss": 1.8038, "step": 22782 }, { "epoch": 0.76, "grad_norm": 0.43421652913093567, "learning_rate": 0.0005103489930831747, "loss": 1.8551, "step": 22783 }, { "epoch": 0.76, "grad_norm": 0.4599844217300415, "learning_rate": 0.0005103415405890321, "loss": 1.7905, "step": 22784 }, { "epoch": 0.76, "grad_norm": 0.4326696991920471, "learning_rate": 0.0005103340878395658, "loss": 1.8082, "step": 22785 }, { "epoch": 0.76, "grad_norm": 0.4264735281467438, "learning_rate": 0.0005103266348347849, "loss": 1.9174, "step": 22786 }, { "epoch": 0.76, "grad_norm": 0.4328160285949707, "learning_rate": 0.0005103191815746986, "loss": 1.8849, "step": 22787 }, { "epoch": 0.76, "grad_norm": 0.45222336053848267, "learning_rate": 0.0005103117280593156, "loss": 1.8238, "step": 22788 }, { "epoch": 0.76, "grad_norm": 0.43962106108665466, "learning_rate": 0.0005103042742886452, "loss": 1.8603, "step": 22789 }, { "epoch": 0.76, "grad_norm": 0.46206337213516235, "learning_rate": 0.0005102968202626966, "loss": 1.8853, "step": 22790 }, { "epoch": 0.76, "grad_norm": 0.4158799350261688, "learning_rate": 0.0005102893659814783, "loss": 1.8316, "step": 22791 }, { "epoch": 0.76, "grad_norm": 0.5971035957336426, "learning_rate": 0.000510281911445, "loss": 1.7922, "step": 22792 }, { "epoch": 0.76, "grad_norm": 0.42335060238838196, "learning_rate": 0.0005102744566532702, "loss": 1.8185, "step": 22793 }, { "epoch": 0.76, "grad_norm": 0.4253753125667572, "learning_rate": 0.0005102670016062984, "loss": 1.8068, "step": 22794 }, { "epoch": 0.76, "grad_norm": 0.444085955619812, "learning_rate": 0.0005102595463040934, "loss": 1.8354, "step": 22795 }, { "epoch": 0.76, "grad_norm": 0.4304662346839905, "learning_rate": 0.0005102520907466642, "loss": 1.772, "step": 22796 }, { "epoch": 0.76, "grad_norm": 0.4290556311607361, "learning_rate": 0.00051024463493402, "loss": 1.8064, "step": 22797 }, { "epoch": 0.76, "grad_norm": 0.4307916760444641, "learning_rate": 0.0005102371788661698, "loss": 1.7688, "step": 22798 }, { "epoch": 0.76, "grad_norm": 0.4452212154865265, "learning_rate": 0.0005102297225431226, "loss": 1.7725, "step": 22799 }, { "epoch": 0.76, "grad_norm": 0.433148592710495, "learning_rate": 0.0005102222659648875, "loss": 1.7674, "step": 22800 }, { "epoch": 0.76, "grad_norm": 0.43626755475997925, "learning_rate": 0.0005102148091314736, "loss": 1.7865, "step": 22801 }, { "epoch": 0.76, "grad_norm": 0.43460193276405334, "learning_rate": 0.0005102073520428898, "loss": 1.8111, "step": 22802 }, { "epoch": 0.76, "grad_norm": 0.4488176107406616, "learning_rate": 0.0005101998946991453, "loss": 1.8258, "step": 22803 }, { "epoch": 0.76, "grad_norm": 0.4434794485569, "learning_rate": 0.0005101924371002491, "loss": 1.7878, "step": 22804 }, { "epoch": 0.76, "grad_norm": 0.4113618731498718, "learning_rate": 0.0005101849792462103, "loss": 1.8844, "step": 22805 }, { "epoch": 0.76, "grad_norm": 0.4543786346912384, "learning_rate": 0.0005101775211370379, "loss": 1.8283, "step": 22806 }, { "epoch": 0.76, "grad_norm": 0.43713560700416565, "learning_rate": 0.0005101700627727409, "loss": 1.822, "step": 22807 }, { "epoch": 0.76, "grad_norm": 0.42134585976600647, "learning_rate": 0.0005101626041533284, "loss": 1.8188, "step": 22808 }, { "epoch": 0.76, "grad_norm": 0.44545844197273254, "learning_rate": 0.0005101551452788095, "loss": 1.8215, "step": 22809 }, { "epoch": 0.76, "grad_norm": 0.44646692276000977, "learning_rate": 0.0005101476861491932, "loss": 1.7759, "step": 22810 }, { "epoch": 0.76, "grad_norm": 0.43503236770629883, "learning_rate": 0.0005101402267644886, "loss": 1.8693, "step": 22811 }, { "epoch": 0.76, "grad_norm": 0.4360944330692291, "learning_rate": 0.0005101327671247047, "loss": 1.8016, "step": 22812 }, { "epoch": 0.76, "grad_norm": 0.4344715178012848, "learning_rate": 0.0005101253072298506, "loss": 1.8265, "step": 22813 }, { "epoch": 0.76, "grad_norm": 0.45311686396598816, "learning_rate": 0.0005101178470799353, "loss": 1.8529, "step": 22814 }, { "epoch": 0.76, "grad_norm": 0.4408239424228668, "learning_rate": 0.0005101103866749679, "loss": 1.7638, "step": 22815 }, { "epoch": 0.76, "grad_norm": 0.4317665994167328, "learning_rate": 0.0005101029260149575, "loss": 1.8221, "step": 22816 }, { "epoch": 0.76, "grad_norm": 0.4303569197654724, "learning_rate": 0.0005100954650999131, "loss": 1.8505, "step": 22817 }, { "epoch": 0.76, "grad_norm": 0.4455883800983429, "learning_rate": 0.0005100880039298437, "loss": 1.88, "step": 22818 }, { "epoch": 0.76, "grad_norm": 0.4289839565753937, "learning_rate": 0.0005100805425047585, "loss": 1.7727, "step": 22819 }, { "epoch": 0.76, "grad_norm": 0.43617528676986694, "learning_rate": 0.0005100730808246664, "loss": 1.8195, "step": 22820 }, { "epoch": 0.76, "grad_norm": 0.4535270929336548, "learning_rate": 0.0005100656188895766, "loss": 1.8177, "step": 22821 }, { "epoch": 0.76, "grad_norm": 0.464851438999176, "learning_rate": 0.0005100581566994981, "loss": 1.859, "step": 22822 }, { "epoch": 0.76, "grad_norm": 0.8498680591583252, "learning_rate": 0.0005100506942544398, "loss": 1.8756, "step": 22823 }, { "epoch": 0.76, "grad_norm": 0.43126675486564636, "learning_rate": 0.000510043231554411, "loss": 1.8261, "step": 22824 }, { "epoch": 0.76, "grad_norm": 0.43875738978385925, "learning_rate": 0.0005100357685994207, "loss": 1.787, "step": 22825 }, { "epoch": 0.76, "grad_norm": 0.44876378774642944, "learning_rate": 0.0005100283053894779, "loss": 1.8407, "step": 22826 }, { "epoch": 0.76, "grad_norm": 0.4511433243751526, "learning_rate": 0.0005100208419245917, "loss": 1.822, "step": 22827 }, { "epoch": 0.76, "grad_norm": 0.4356812536716461, "learning_rate": 0.000510013378204771, "loss": 1.8578, "step": 22828 }, { "epoch": 0.76, "grad_norm": 0.42515498399734497, "learning_rate": 0.0005100059142300252, "loss": 1.7808, "step": 22829 }, { "epoch": 0.76, "grad_norm": 0.42551663517951965, "learning_rate": 0.0005099984500003631, "loss": 1.8395, "step": 22830 }, { "epoch": 0.76, "grad_norm": 0.4445522725582123, "learning_rate": 0.0005099909855157939, "loss": 1.891, "step": 22831 }, { "epoch": 0.76, "grad_norm": 0.4366131126880646, "learning_rate": 0.0005099835207763265, "loss": 1.7599, "step": 22832 }, { "epoch": 0.76, "grad_norm": 0.42566606402397156, "learning_rate": 0.00050997605578197, "loss": 1.7901, "step": 22833 }, { "epoch": 0.76, "grad_norm": 0.43687593936920166, "learning_rate": 0.0005099685905327335, "loss": 1.8523, "step": 22834 }, { "epoch": 0.76, "grad_norm": 0.43835458159446716, "learning_rate": 0.0005099611250286261, "loss": 1.8164, "step": 22835 }, { "epoch": 0.76, "grad_norm": 0.4192153215408325, "learning_rate": 0.0005099536592696569, "loss": 1.7796, "step": 22836 }, { "epoch": 0.76, "grad_norm": 0.45956704020500183, "learning_rate": 0.0005099461932558349, "loss": 1.8649, "step": 22837 }, { "epoch": 0.76, "grad_norm": 0.5886701345443726, "learning_rate": 0.000509938726987169, "loss": 1.849, "step": 22838 }, { "epoch": 0.76, "grad_norm": 0.4552409052848816, "learning_rate": 0.0005099312604636686, "loss": 1.8231, "step": 22839 }, { "epoch": 0.76, "grad_norm": 0.45260000228881836, "learning_rate": 0.0005099237936853424, "loss": 1.8373, "step": 22840 }, { "epoch": 0.76, "grad_norm": 0.45612654089927673, "learning_rate": 0.0005099163266521998, "loss": 1.851, "step": 22841 }, { "epoch": 0.76, "grad_norm": 0.4464075565338135, "learning_rate": 0.0005099088593642497, "loss": 1.8548, "step": 22842 }, { "epoch": 0.76, "grad_norm": 0.42996543645858765, "learning_rate": 0.000509901391821501, "loss": 1.8154, "step": 22843 }, { "epoch": 0.76, "grad_norm": 0.42888343334198, "learning_rate": 0.0005098939240239632, "loss": 1.8574, "step": 22844 }, { "epoch": 0.76, "grad_norm": 0.4457641541957855, "learning_rate": 0.0005098864559716449, "loss": 1.8874, "step": 22845 }, { "epoch": 0.76, "grad_norm": 0.4658178687095642, "learning_rate": 0.0005098789876645555, "loss": 1.8992, "step": 22846 }, { "epoch": 0.76, "grad_norm": 0.43364453315734863, "learning_rate": 0.0005098715191027038, "loss": 1.8382, "step": 22847 }, { "epoch": 0.76, "grad_norm": 0.4181368052959442, "learning_rate": 0.000509864050286099, "loss": 1.9257, "step": 22848 }, { "epoch": 0.76, "grad_norm": 0.42600852251052856, "learning_rate": 0.0005098565812147503, "loss": 1.7796, "step": 22849 }, { "epoch": 0.76, "grad_norm": 0.4416165351867676, "learning_rate": 0.0005098491118886666, "loss": 1.8692, "step": 22850 }, { "epoch": 0.76, "grad_norm": 0.4460667371749878, "learning_rate": 0.0005098416423078569, "loss": 1.9115, "step": 22851 }, { "epoch": 0.76, "grad_norm": 0.4259670078754425, "learning_rate": 0.0005098341724723305, "loss": 1.8326, "step": 22852 }, { "epoch": 0.76, "grad_norm": 0.43783530592918396, "learning_rate": 0.0005098267023820962, "loss": 1.9221, "step": 22853 }, { "epoch": 0.76, "grad_norm": 0.430914044380188, "learning_rate": 0.0005098192320371633, "loss": 1.8225, "step": 22854 }, { "epoch": 0.76, "grad_norm": 0.4210014343261719, "learning_rate": 0.0005098117614375407, "loss": 1.802, "step": 22855 }, { "epoch": 0.76, "grad_norm": 0.42540243268013, "learning_rate": 0.0005098042905832376, "loss": 1.8137, "step": 22856 }, { "epoch": 0.76, "grad_norm": 0.4443187117576599, "learning_rate": 0.0005097968194742629, "loss": 1.9151, "step": 22857 }, { "epoch": 0.76, "grad_norm": 0.4421814978122711, "learning_rate": 0.0005097893481106259, "loss": 1.8021, "step": 22858 }, { "epoch": 0.76, "grad_norm": 0.4338543713092804, "learning_rate": 0.0005097818764923355, "loss": 1.869, "step": 22859 }, { "epoch": 0.76, "grad_norm": 0.4239920377731323, "learning_rate": 0.0005097744046194008, "loss": 1.8435, "step": 22860 }, { "epoch": 0.76, "grad_norm": 0.449648916721344, "learning_rate": 0.0005097669324918309, "loss": 1.8046, "step": 22861 }, { "epoch": 0.76, "grad_norm": 0.42898955941200256, "learning_rate": 0.0005097594601096348, "loss": 1.8103, "step": 22862 }, { "epoch": 0.76, "grad_norm": 0.4296468496322632, "learning_rate": 0.0005097519874728216, "loss": 1.8785, "step": 22863 }, { "epoch": 0.76, "grad_norm": 0.41926026344299316, "learning_rate": 0.0005097445145814005, "loss": 1.7717, "step": 22864 }, { "epoch": 0.76, "grad_norm": 0.4362160265445709, "learning_rate": 0.0005097370414353803, "loss": 1.8338, "step": 22865 }, { "epoch": 0.76, "grad_norm": 0.4516400098800659, "learning_rate": 0.0005097295680347704, "loss": 1.7941, "step": 22866 }, { "epoch": 0.76, "grad_norm": 0.43252724409103394, "learning_rate": 0.0005097220943795797, "loss": 1.8468, "step": 22867 }, { "epoch": 0.76, "grad_norm": 0.446111798286438, "learning_rate": 0.0005097146204698171, "loss": 1.8019, "step": 22868 }, { "epoch": 0.76, "grad_norm": 0.43674734234809875, "learning_rate": 0.0005097071463054921, "loss": 1.8164, "step": 22869 }, { "epoch": 0.76, "grad_norm": 0.4469844400882721, "learning_rate": 0.0005096996718866134, "loss": 1.8572, "step": 22870 }, { "epoch": 0.76, "grad_norm": 0.42485159635543823, "learning_rate": 0.0005096921972131902, "loss": 1.8308, "step": 22871 }, { "epoch": 0.76, "grad_norm": 0.4259839653968811, "learning_rate": 0.0005096847222852315, "loss": 1.8266, "step": 22872 }, { "epoch": 0.76, "grad_norm": 0.457349956035614, "learning_rate": 0.0005096772471027465, "loss": 1.801, "step": 22873 }, { "epoch": 0.76, "grad_norm": 0.43699151277542114, "learning_rate": 0.0005096697716657442, "loss": 1.8496, "step": 22874 }, { "epoch": 0.76, "grad_norm": 0.9366758465766907, "learning_rate": 0.0005096622959742338, "loss": 1.8022, "step": 22875 }, { "epoch": 0.76, "grad_norm": 0.4244013726711273, "learning_rate": 0.0005096548200282242, "loss": 1.7713, "step": 22876 }, { "epoch": 0.76, "grad_norm": 0.4254143238067627, "learning_rate": 0.0005096473438277244, "loss": 1.7889, "step": 22877 }, { "epoch": 0.76, "grad_norm": 0.4285137355327606, "learning_rate": 0.0005096398673727438, "loss": 1.8708, "step": 22878 }, { "epoch": 0.76, "grad_norm": 0.43658798933029175, "learning_rate": 0.0005096323906632912, "loss": 1.88, "step": 22879 }, { "epoch": 0.76, "grad_norm": 0.4355491101741791, "learning_rate": 0.0005096249136993758, "loss": 1.8797, "step": 22880 }, { "epoch": 0.76, "grad_norm": 0.4144207835197449, "learning_rate": 0.0005096174364810067, "loss": 1.7638, "step": 22881 }, { "epoch": 0.76, "grad_norm": 0.442532479763031, "learning_rate": 0.0005096099590081927, "loss": 1.758, "step": 22882 }, { "epoch": 0.76, "grad_norm": 0.43398064374923706, "learning_rate": 0.0005096024812809433, "loss": 1.8574, "step": 22883 }, { "epoch": 0.76, "grad_norm": 0.42188194394111633, "learning_rate": 0.0005095950032992674, "loss": 1.8699, "step": 22884 }, { "epoch": 0.76, "grad_norm": 0.43583038449287415, "learning_rate": 0.0005095875250631739, "loss": 1.8378, "step": 22885 }, { "epoch": 0.76, "grad_norm": 0.422509104013443, "learning_rate": 0.0005095800465726721, "loss": 1.8143, "step": 22886 }, { "epoch": 0.76, "grad_norm": 0.43326878547668457, "learning_rate": 0.0005095725678277709, "loss": 1.7864, "step": 22887 }, { "epoch": 0.76, "grad_norm": 0.4490135908126831, "learning_rate": 0.0005095650888284796, "loss": 1.922, "step": 22888 }, { "epoch": 0.76, "grad_norm": 0.43256962299346924, "learning_rate": 0.0005095576095748072, "loss": 1.7751, "step": 22889 }, { "epoch": 0.76, "grad_norm": 0.4298590421676636, "learning_rate": 0.0005095501300667627, "loss": 1.8204, "step": 22890 }, { "epoch": 0.76, "grad_norm": 0.44350987672805786, "learning_rate": 0.000509542650304355, "loss": 1.8842, "step": 22891 }, { "epoch": 0.76, "grad_norm": 0.4703059196472168, "learning_rate": 0.0005095351702875937, "loss": 1.8352, "step": 22892 }, { "epoch": 0.76, "grad_norm": 0.4600505232810974, "learning_rate": 0.0005095276900164875, "loss": 1.8734, "step": 22893 }, { "epoch": 0.76, "grad_norm": 0.45226818323135376, "learning_rate": 0.0005095202094910455, "loss": 1.894, "step": 22894 }, { "epoch": 0.76, "grad_norm": 0.4580889642238617, "learning_rate": 0.0005095127287112768, "loss": 1.819, "step": 22895 }, { "epoch": 0.76, "grad_norm": 0.4490048289299011, "learning_rate": 0.0005095052476771907, "loss": 1.8028, "step": 22896 }, { "epoch": 0.76, "grad_norm": 0.4372166097164154, "learning_rate": 0.000509497766388796, "loss": 1.8424, "step": 22897 }, { "epoch": 0.76, "grad_norm": 0.4723593592643738, "learning_rate": 0.0005094902848461018, "loss": 1.9033, "step": 22898 }, { "epoch": 0.76, "grad_norm": 0.4378105700016022, "learning_rate": 0.0005094828030491172, "loss": 1.8487, "step": 22899 }, { "epoch": 0.76, "grad_norm": 0.4567359685897827, "learning_rate": 0.0005094753209978516, "loss": 1.799, "step": 22900 }, { "epoch": 0.76, "grad_norm": 0.453317791223526, "learning_rate": 0.0005094678386923136, "loss": 1.8228, "step": 22901 }, { "epoch": 0.76, "grad_norm": 0.4310309886932373, "learning_rate": 0.0005094603561325126, "loss": 1.8357, "step": 22902 }, { "epoch": 0.76, "grad_norm": 0.4549374580383301, "learning_rate": 0.0005094528733184576, "loss": 1.8837, "step": 22903 }, { "epoch": 0.76, "grad_norm": 0.4673365652561188, "learning_rate": 0.0005094453902501576, "loss": 1.7357, "step": 22904 }, { "epoch": 0.76, "grad_norm": 0.45886439085006714, "learning_rate": 0.0005094379069276218, "loss": 1.8271, "step": 22905 }, { "epoch": 0.76, "grad_norm": 0.41940003633499146, "learning_rate": 0.0005094304233508593, "loss": 1.8168, "step": 22906 }, { "epoch": 0.76, "grad_norm": 0.46147438883781433, "learning_rate": 0.000509422939519879, "loss": 1.8299, "step": 22907 }, { "epoch": 0.76, "grad_norm": 0.4277230203151703, "learning_rate": 0.0005094154554346902, "loss": 1.8549, "step": 22908 }, { "epoch": 0.76, "grad_norm": 0.448036253452301, "learning_rate": 0.000509407971095302, "loss": 1.8177, "step": 22909 }, { "epoch": 0.76, "grad_norm": 0.42377084493637085, "learning_rate": 0.0005094004865017232, "loss": 1.8262, "step": 22910 }, { "epoch": 0.76, "grad_norm": 0.4406408369541168, "learning_rate": 0.0005093930016539631, "loss": 1.8011, "step": 22911 }, { "epoch": 0.76, "grad_norm": 0.46783512830734253, "learning_rate": 0.0005093855165520308, "loss": 1.8623, "step": 22912 }, { "epoch": 0.76, "grad_norm": 0.4602794945240021, "learning_rate": 0.0005093780311959353, "loss": 1.8036, "step": 22913 }, { "epoch": 0.76, "grad_norm": 0.43561726808547974, "learning_rate": 0.0005093705455856857, "loss": 1.9003, "step": 22914 }, { "epoch": 0.76, "grad_norm": 0.4275458753108978, "learning_rate": 0.0005093630597212912, "loss": 1.8157, "step": 22915 }, { "epoch": 0.76, "grad_norm": 0.5139406323432922, "learning_rate": 0.0005093555736027606, "loss": 1.8855, "step": 22916 }, { "epoch": 0.76, "grad_norm": 0.44359347224235535, "learning_rate": 0.0005093480872301033, "loss": 1.803, "step": 22917 }, { "epoch": 0.76, "grad_norm": 0.43934980034828186, "learning_rate": 0.0005093406006033283, "loss": 1.8385, "step": 22918 }, { "epoch": 0.76, "grad_norm": 0.46853122115135193, "learning_rate": 0.0005093331137224447, "loss": 1.8453, "step": 22919 }, { "epoch": 0.76, "grad_norm": 0.450796514749527, "learning_rate": 0.0005093256265874614, "loss": 1.8149, "step": 22920 }, { "epoch": 0.76, "grad_norm": 0.47156089544296265, "learning_rate": 0.0005093181391983878, "loss": 1.8405, "step": 22921 }, { "epoch": 0.76, "grad_norm": 0.45944738388061523, "learning_rate": 0.0005093106515552326, "loss": 1.8383, "step": 22922 }, { "epoch": 0.76, "grad_norm": 0.44379130005836487, "learning_rate": 0.0005093031636580052, "loss": 1.7841, "step": 22923 }, { "epoch": 0.76, "grad_norm": 0.8359838724136353, "learning_rate": 0.0005092956755067148, "loss": 1.854, "step": 22924 }, { "epoch": 0.76, "grad_norm": 0.46147143840789795, "learning_rate": 0.00050928818710137, "loss": 1.7937, "step": 22925 }, { "epoch": 0.76, "grad_norm": 0.41508588194847107, "learning_rate": 0.0005092806984419803, "loss": 1.8468, "step": 22926 }, { "epoch": 0.76, "grad_norm": 0.4342302680015564, "learning_rate": 0.0005092732095285546, "loss": 1.8419, "step": 22927 }, { "epoch": 0.76, "grad_norm": 0.44271767139434814, "learning_rate": 0.0005092657203611022, "loss": 1.801, "step": 22928 }, { "epoch": 0.76, "grad_norm": 0.42764878273010254, "learning_rate": 0.0005092582309396321, "loss": 1.7285, "step": 22929 }, { "epoch": 0.76, "grad_norm": 0.4378083050251007, "learning_rate": 0.0005092507412641532, "loss": 1.797, "step": 22930 }, { "epoch": 0.76, "grad_norm": 0.43388697504997253, "learning_rate": 0.0005092432513346747, "loss": 1.7823, "step": 22931 }, { "epoch": 0.76, "grad_norm": 0.4393235445022583, "learning_rate": 0.0005092357611512057, "loss": 1.854, "step": 22932 }, { "epoch": 0.76, "grad_norm": 0.4443531036376953, "learning_rate": 0.0005092282707137555, "loss": 1.7958, "step": 22933 }, { "epoch": 0.76, "grad_norm": 0.4356309175491333, "learning_rate": 0.0005092207800223329, "loss": 1.8649, "step": 22934 }, { "epoch": 0.76, "grad_norm": 0.4259154200553894, "learning_rate": 0.000509213289076947, "loss": 1.8104, "step": 22935 }, { "epoch": 0.76, "grad_norm": 0.4466305077075958, "learning_rate": 0.0005092057978776071, "loss": 1.7888, "step": 22936 }, { "epoch": 0.76, "grad_norm": 0.4457896649837494, "learning_rate": 0.0005091983064243221, "loss": 1.8289, "step": 22937 }, { "epoch": 0.76, "grad_norm": 0.4535825252532959, "learning_rate": 0.0005091908147171013, "loss": 1.829, "step": 22938 }, { "epoch": 0.76, "grad_norm": 0.42617589235305786, "learning_rate": 0.0005091833227559537, "loss": 1.89, "step": 22939 }, { "epoch": 0.76, "grad_norm": 0.415250688791275, "learning_rate": 0.0005091758305408883, "loss": 1.8332, "step": 22940 }, { "epoch": 0.76, "grad_norm": 0.4445681571960449, "learning_rate": 0.0005091683380719143, "loss": 1.8585, "step": 22941 }, { "epoch": 0.76, "grad_norm": 0.4593457579612732, "learning_rate": 0.0005091608453490406, "loss": 1.9163, "step": 22942 }, { "epoch": 0.76, "grad_norm": 0.4617462754249573, "learning_rate": 0.0005091533523722766, "loss": 1.7735, "step": 22943 }, { "epoch": 0.76, "grad_norm": 0.4305301606655121, "learning_rate": 0.0005091458591416311, "loss": 1.8296, "step": 22944 }, { "epoch": 0.76, "grad_norm": 0.45035865902900696, "learning_rate": 0.0005091383656571135, "loss": 1.8464, "step": 22945 }, { "epoch": 0.76, "grad_norm": 0.43755030632019043, "learning_rate": 0.0005091308719187327, "loss": 1.8809, "step": 22946 }, { "epoch": 0.76, "grad_norm": 0.4359147846698761, "learning_rate": 0.0005091233779264978, "loss": 1.8942, "step": 22947 }, { "epoch": 0.76, "grad_norm": 0.42432984709739685, "learning_rate": 0.000509115883680418, "loss": 1.8084, "step": 22948 }, { "epoch": 0.76, "grad_norm": 0.45199868083000183, "learning_rate": 0.0005091083891805024, "loss": 1.8652, "step": 22949 }, { "epoch": 0.76, "grad_norm": 0.42546722292900085, "learning_rate": 0.0005091008944267598, "loss": 1.8426, "step": 22950 }, { "epoch": 0.76, "grad_norm": 0.42260897159576416, "learning_rate": 0.0005090933994191996, "loss": 1.8396, "step": 22951 }, { "epoch": 0.76, "grad_norm": 0.46127957105636597, "learning_rate": 0.0005090859041578309, "loss": 1.7989, "step": 22952 }, { "epoch": 0.76, "grad_norm": 0.4304232597351074, "learning_rate": 0.0005090784086426627, "loss": 1.842, "step": 22953 }, { "epoch": 0.76, "grad_norm": 0.44543105363845825, "learning_rate": 0.0005090709128737042, "loss": 1.8073, "step": 22954 }, { "epoch": 0.76, "grad_norm": 0.6021592020988464, "learning_rate": 0.0005090634168509643, "loss": 1.8775, "step": 22955 }, { "epoch": 0.76, "grad_norm": 0.4627974331378937, "learning_rate": 0.0005090559205744522, "loss": 1.7733, "step": 22956 }, { "epoch": 0.76, "grad_norm": 0.44077667593955994, "learning_rate": 0.0005090484240441771, "loss": 1.8407, "step": 22957 }, { "epoch": 0.76, "grad_norm": 0.43166986107826233, "learning_rate": 0.0005090409272601479, "loss": 1.8502, "step": 22958 }, { "epoch": 0.76, "grad_norm": 0.44019070267677307, "learning_rate": 0.0005090334302223739, "loss": 1.889, "step": 22959 }, { "epoch": 0.76, "grad_norm": 0.4299216866493225, "learning_rate": 0.000509025932930864, "loss": 1.8263, "step": 22960 }, { "epoch": 0.76, "grad_norm": 0.4349110424518585, "learning_rate": 0.0005090184353856276, "loss": 1.8, "step": 22961 }, { "epoch": 0.76, "grad_norm": 0.4211193025112152, "learning_rate": 0.0005090109375866735, "loss": 1.8045, "step": 22962 }, { "epoch": 0.76, "grad_norm": 0.4364239275455475, "learning_rate": 0.0005090034395340109, "loss": 1.8041, "step": 22963 }, { "epoch": 0.76, "grad_norm": 0.4359591007232666, "learning_rate": 0.000508995941227649, "loss": 1.8545, "step": 22964 }, { "epoch": 0.76, "grad_norm": 0.42595958709716797, "learning_rate": 0.0005089884426675967, "loss": 1.7379, "step": 22965 }, { "epoch": 0.76, "grad_norm": 0.4575723111629486, "learning_rate": 0.0005089809438538635, "loss": 1.8957, "step": 22966 }, { "epoch": 0.76, "grad_norm": 0.434736967086792, "learning_rate": 0.0005089734447864579, "loss": 1.8417, "step": 22967 }, { "epoch": 0.76, "grad_norm": 0.4310111403465271, "learning_rate": 0.0005089659454653896, "loss": 1.8391, "step": 22968 }, { "epoch": 0.76, "grad_norm": 0.4408726394176483, "learning_rate": 0.0005089584458906673, "loss": 1.8491, "step": 22969 }, { "epoch": 0.76, "grad_norm": 0.4569953978061676, "learning_rate": 0.0005089509460623002, "loss": 1.9051, "step": 22970 }, { "epoch": 0.76, "grad_norm": 0.47112491726875305, "learning_rate": 0.0005089434459802975, "loss": 1.8516, "step": 22971 }, { "epoch": 0.76, "grad_norm": 0.47203171253204346, "learning_rate": 0.0005089359456446681, "loss": 1.7985, "step": 22972 }, { "epoch": 0.76, "grad_norm": 0.4291154444217682, "learning_rate": 0.0005089284450554214, "loss": 1.7741, "step": 22973 }, { "epoch": 0.76, "grad_norm": 0.43784424662590027, "learning_rate": 0.0005089209442125664, "loss": 1.8487, "step": 22974 }, { "epoch": 0.76, "grad_norm": 0.4350718557834625, "learning_rate": 0.000508913443116112, "loss": 1.8602, "step": 22975 }, { "epoch": 0.76, "grad_norm": 0.44192567467689514, "learning_rate": 0.0005089059417660675, "loss": 1.8572, "step": 22976 }, { "epoch": 0.76, "grad_norm": 0.45421624183654785, "learning_rate": 0.0005088984401624419, "loss": 1.8116, "step": 22977 }, { "epoch": 0.76, "grad_norm": 0.4282580614089966, "learning_rate": 0.0005088909383052446, "loss": 1.7393, "step": 22978 }, { "epoch": 0.76, "grad_norm": 0.42147016525268555, "learning_rate": 0.0005088834361944843, "loss": 1.8195, "step": 22979 }, { "epoch": 0.76, "grad_norm": 0.44950446486473083, "learning_rate": 0.0005088759338301703, "loss": 1.9025, "step": 22980 }, { "epoch": 0.76, "grad_norm": 0.4509216547012329, "learning_rate": 0.0005088684312123117, "loss": 1.8233, "step": 22981 }, { "epoch": 0.76, "grad_norm": 0.4340876042842865, "learning_rate": 0.0005088609283409178, "loss": 1.9106, "step": 22982 }, { "epoch": 0.76, "grad_norm": 0.4322662055492401, "learning_rate": 0.0005088534252159973, "loss": 1.8171, "step": 22983 }, { "epoch": 0.76, "grad_norm": 0.43597885966300964, "learning_rate": 0.0005088459218375595, "loss": 1.8308, "step": 22984 }, { "epoch": 0.76, "grad_norm": 0.4442203938961029, "learning_rate": 0.0005088384182056136, "loss": 1.829, "step": 22985 }, { "epoch": 0.76, "grad_norm": 0.42653214931488037, "learning_rate": 0.0005088309143201686, "loss": 1.7595, "step": 22986 }, { "epoch": 0.76, "grad_norm": 0.42972415685653687, "learning_rate": 0.0005088234101812336, "loss": 1.8033, "step": 22987 }, { "epoch": 0.76, "grad_norm": 0.4151160418987274, "learning_rate": 0.0005088159057888179, "loss": 1.7812, "step": 22988 }, { "epoch": 0.76, "grad_norm": 0.4331655502319336, "learning_rate": 0.0005088084011429303, "loss": 1.8222, "step": 22989 }, { "epoch": 0.76, "grad_norm": 0.43444621562957764, "learning_rate": 0.0005088008962435801, "loss": 1.8003, "step": 22990 }, { "epoch": 0.76, "grad_norm": 0.44435304403305054, "learning_rate": 0.0005087933910907764, "loss": 1.8298, "step": 22991 }, { "epoch": 0.76, "grad_norm": 0.42809173464775085, "learning_rate": 0.0005087858856845283, "loss": 1.741, "step": 22992 }, { "epoch": 0.76, "grad_norm": 0.5084367394447327, "learning_rate": 0.0005087783800248449, "loss": 1.8584, "step": 22993 }, { "epoch": 0.77, "grad_norm": 0.5829678177833557, "learning_rate": 0.0005087708741117353, "loss": 1.7487, "step": 22994 }, { "epoch": 0.77, "grad_norm": 0.4365997016429901, "learning_rate": 0.0005087633679452086, "loss": 1.8467, "step": 22995 }, { "epoch": 0.77, "grad_norm": 0.43544724583625793, "learning_rate": 0.0005087558615252739, "loss": 1.8266, "step": 22996 }, { "epoch": 0.77, "grad_norm": 0.42500704526901245, "learning_rate": 0.0005087483548519404, "loss": 1.7507, "step": 22997 }, { "epoch": 0.77, "grad_norm": 0.44338735938072205, "learning_rate": 0.0005087408479252172, "loss": 1.7454, "step": 22998 }, { "epoch": 0.77, "grad_norm": 0.437826007604599, "learning_rate": 0.0005087333407451133, "loss": 1.8003, "step": 22999 }, { "epoch": 0.77, "grad_norm": 0.4284641444683075, "learning_rate": 0.0005087258333116379, "loss": 1.8588, "step": 23000 }, { "epoch": 0.77, "grad_norm": 0.446140319108963, "learning_rate": 0.0005087183256248, "loss": 1.9004, "step": 23001 }, { "epoch": 0.77, "grad_norm": 0.42195653915405273, "learning_rate": 0.0005087108176846089, "loss": 1.7399, "step": 23002 }, { "epoch": 0.77, "grad_norm": 0.448613703250885, "learning_rate": 0.0005087033094910736, "loss": 1.8543, "step": 23003 }, { "epoch": 0.77, "grad_norm": 0.4459913969039917, "learning_rate": 0.0005086958010442033, "loss": 1.8221, "step": 23004 }, { "epoch": 0.77, "grad_norm": 0.5311578512191772, "learning_rate": 0.0005086882923440069, "loss": 1.8812, "step": 23005 }, { "epoch": 0.77, "grad_norm": 0.434566468000412, "learning_rate": 0.0005086807833904938, "loss": 1.7264, "step": 23006 }, { "epoch": 0.77, "grad_norm": 0.4441468417644501, "learning_rate": 0.0005086732741836729, "loss": 1.8662, "step": 23007 }, { "epoch": 0.77, "grad_norm": 0.4421376883983612, "learning_rate": 0.0005086657647235535, "loss": 1.756, "step": 23008 }, { "epoch": 0.77, "grad_norm": 0.43717584013938904, "learning_rate": 0.0005086582550101445, "loss": 1.8147, "step": 23009 }, { "epoch": 0.77, "grad_norm": 0.4346950054168701, "learning_rate": 0.0005086507450434551, "loss": 1.8091, "step": 23010 }, { "epoch": 0.77, "grad_norm": 0.43897292017936707, "learning_rate": 0.0005086432348234946, "loss": 1.8055, "step": 23011 }, { "epoch": 0.77, "grad_norm": 0.4290336072444916, "learning_rate": 0.0005086357243502717, "loss": 1.8623, "step": 23012 }, { "epoch": 0.77, "grad_norm": 0.47633978724479675, "learning_rate": 0.000508628213623796, "loss": 1.8785, "step": 23013 }, { "epoch": 0.77, "grad_norm": 0.4466772675514221, "learning_rate": 0.0005086207026440763, "loss": 1.8336, "step": 23014 }, { "epoch": 0.77, "grad_norm": 0.4343824088573456, "learning_rate": 0.0005086131914111218, "loss": 1.8856, "step": 23015 }, { "epoch": 0.77, "grad_norm": 0.5368759036064148, "learning_rate": 0.0005086056799249416, "loss": 1.8976, "step": 23016 }, { "epoch": 0.77, "grad_norm": 0.44087353348731995, "learning_rate": 0.0005085981681855449, "loss": 1.8877, "step": 23017 }, { "epoch": 0.77, "grad_norm": 0.4383585751056671, "learning_rate": 0.0005085906561929408, "loss": 1.8642, "step": 23018 }, { "epoch": 0.77, "grad_norm": 0.4479564428329468, "learning_rate": 0.0005085831439471382, "loss": 1.8455, "step": 23019 }, { "epoch": 0.77, "grad_norm": 0.430462121963501, "learning_rate": 0.0005085756314481465, "loss": 1.7915, "step": 23020 }, { "epoch": 0.77, "grad_norm": 0.43590232729911804, "learning_rate": 0.0005085681186959747, "loss": 1.8935, "step": 23021 }, { "epoch": 0.77, "grad_norm": 0.4496060311794281, "learning_rate": 0.000508560605690632, "loss": 1.7381, "step": 23022 }, { "epoch": 0.77, "grad_norm": 0.44334807991981506, "learning_rate": 0.0005085530924321274, "loss": 1.8547, "step": 23023 }, { "epoch": 0.77, "grad_norm": 0.4400150775909424, "learning_rate": 0.00050854557892047, "loss": 1.873, "step": 23024 }, { "epoch": 0.77, "grad_norm": 0.44326305389404297, "learning_rate": 0.0005085380651556692, "loss": 1.8723, "step": 23025 }, { "epoch": 0.77, "grad_norm": 0.4518696963787079, "learning_rate": 0.0005085305511377339, "loss": 1.7836, "step": 23026 }, { "epoch": 0.77, "grad_norm": 0.4470706880092621, "learning_rate": 0.000508523036866673, "loss": 1.8519, "step": 23027 }, { "epoch": 0.77, "grad_norm": 0.43310797214508057, "learning_rate": 0.000508515522342496, "loss": 1.8709, "step": 23028 }, { "epoch": 0.77, "grad_norm": 0.4276578426361084, "learning_rate": 0.0005085080075652119, "loss": 1.7708, "step": 23029 }, { "epoch": 0.77, "grad_norm": 0.4235503077507019, "learning_rate": 0.0005085004925348298, "loss": 1.7235, "step": 23030 }, { "epoch": 0.77, "grad_norm": 0.44574037194252014, "learning_rate": 0.0005084929772513588, "loss": 1.848, "step": 23031 }, { "epoch": 0.77, "grad_norm": 0.4308740198612213, "learning_rate": 0.0005084854617148079, "loss": 1.8278, "step": 23032 }, { "epoch": 0.77, "grad_norm": 0.4324786067008972, "learning_rate": 0.0005084779459251866, "loss": 1.8695, "step": 23033 }, { "epoch": 0.77, "grad_norm": 0.4462031126022339, "learning_rate": 0.0005084704298825036, "loss": 1.7798, "step": 23034 }, { "epoch": 0.77, "grad_norm": 0.444877028465271, "learning_rate": 0.0005084629135867684, "loss": 1.8733, "step": 23035 }, { "epoch": 0.77, "grad_norm": 0.42262646555900574, "learning_rate": 0.0005084553970379898, "loss": 1.7866, "step": 23036 }, { "epoch": 0.77, "grad_norm": 0.43628832697868347, "learning_rate": 0.0005084478802361771, "loss": 1.8343, "step": 23037 }, { "epoch": 0.77, "grad_norm": 0.4456733465194702, "learning_rate": 0.0005084403631813395, "loss": 1.7885, "step": 23038 }, { "epoch": 0.77, "grad_norm": 0.4343055784702301, "learning_rate": 0.0005084328458734858, "loss": 1.8613, "step": 23039 }, { "epoch": 0.77, "grad_norm": 0.42991864681243896, "learning_rate": 0.0005084253283126255, "loss": 1.7933, "step": 23040 }, { "epoch": 0.77, "grad_norm": 0.44177505373954773, "learning_rate": 0.0005084178104987675, "loss": 1.836, "step": 23041 }, { "epoch": 0.77, "grad_norm": 0.6230074763298035, "learning_rate": 0.000508410292431921, "loss": 1.8437, "step": 23042 }, { "epoch": 0.77, "grad_norm": 0.4409521818161011, "learning_rate": 0.0005084027741120952, "loss": 1.8825, "step": 23043 }, { "epoch": 0.77, "grad_norm": 0.4403631389141083, "learning_rate": 0.000508395255539299, "loss": 1.9078, "step": 23044 }, { "epoch": 0.77, "grad_norm": 0.434349924325943, "learning_rate": 0.0005083877367135417, "loss": 1.8765, "step": 23045 }, { "epoch": 0.77, "grad_norm": 0.4301992654800415, "learning_rate": 0.0005083802176348324, "loss": 1.8949, "step": 23046 }, { "epoch": 0.77, "grad_norm": 0.43526825308799744, "learning_rate": 0.0005083726983031802, "loss": 1.8225, "step": 23047 }, { "epoch": 0.77, "grad_norm": 0.43230941891670227, "learning_rate": 0.0005083651787185943, "loss": 1.8706, "step": 23048 }, { "epoch": 0.77, "grad_norm": 0.42249831557273865, "learning_rate": 0.0005083576588810838, "loss": 1.8739, "step": 23049 }, { "epoch": 0.77, "grad_norm": 1.9168325662612915, "learning_rate": 0.0005083501387906576, "loss": 1.9648, "step": 23050 }, { "epoch": 0.77, "grad_norm": 0.4603408873081207, "learning_rate": 0.0005083426184473253, "loss": 1.8661, "step": 23051 }, { "epoch": 0.77, "grad_norm": 0.4244069755077362, "learning_rate": 0.0005083350978510956, "loss": 1.7979, "step": 23052 }, { "epoch": 0.77, "grad_norm": 0.43665146827697754, "learning_rate": 0.0005083275770019778, "loss": 1.8034, "step": 23053 }, { "epoch": 0.77, "grad_norm": 0.4328423738479614, "learning_rate": 0.0005083200558999811, "loss": 1.7969, "step": 23054 }, { "epoch": 0.77, "grad_norm": 0.42299866676330566, "learning_rate": 0.0005083125345451145, "loss": 1.7921, "step": 23055 }, { "epoch": 0.77, "grad_norm": 0.44276562333106995, "learning_rate": 0.0005083050129373871, "loss": 1.844, "step": 23056 }, { "epoch": 0.77, "grad_norm": 0.4359835088253021, "learning_rate": 0.0005082974910768082, "loss": 1.8677, "step": 23057 }, { "epoch": 0.77, "grad_norm": 0.7133337259292603, "learning_rate": 0.0005082899689633868, "loss": 1.787, "step": 23058 }, { "epoch": 0.77, "grad_norm": 0.4444441795349121, "learning_rate": 0.0005082824465971321, "loss": 1.8465, "step": 23059 }, { "epoch": 0.77, "grad_norm": 0.44144195318222046, "learning_rate": 0.0005082749239780532, "loss": 1.8655, "step": 23060 }, { "epoch": 0.77, "grad_norm": 0.47403520345687866, "learning_rate": 0.0005082674011061592, "loss": 1.7457, "step": 23061 }, { "epoch": 0.77, "grad_norm": 0.43121692538261414, "learning_rate": 0.0005082598779814592, "loss": 1.818, "step": 23062 }, { "epoch": 0.77, "grad_norm": 0.4430334270000458, "learning_rate": 0.0005082523546039625, "loss": 1.8563, "step": 23063 }, { "epoch": 0.77, "grad_norm": 0.43855026364326477, "learning_rate": 0.0005082448309736781, "loss": 1.8375, "step": 23064 }, { "epoch": 0.77, "grad_norm": 0.42297327518463135, "learning_rate": 0.0005082373070906151, "loss": 1.7862, "step": 23065 }, { "epoch": 0.77, "grad_norm": 0.448785662651062, "learning_rate": 0.0005082297829547827, "loss": 1.7854, "step": 23066 }, { "epoch": 0.77, "grad_norm": 0.4334597885608673, "learning_rate": 0.0005082222585661902, "loss": 1.7011, "step": 23067 }, { "epoch": 0.77, "grad_norm": 0.4384314715862274, "learning_rate": 0.0005082147339248463, "loss": 1.8312, "step": 23068 }, { "epoch": 0.77, "grad_norm": 0.4557705223560333, "learning_rate": 0.0005082072090307606, "loss": 1.8254, "step": 23069 }, { "epoch": 0.77, "grad_norm": 0.44184479117393494, "learning_rate": 0.000508199683883942, "loss": 1.7917, "step": 23070 }, { "epoch": 0.77, "grad_norm": 0.4176601469516754, "learning_rate": 0.0005081921584843995, "loss": 1.8508, "step": 23071 }, { "epoch": 0.77, "grad_norm": 0.4358190894126892, "learning_rate": 0.0005081846328321425, "loss": 1.7851, "step": 23072 }, { "epoch": 0.77, "grad_norm": 0.43666258454322815, "learning_rate": 0.0005081771069271801, "loss": 1.7842, "step": 23073 }, { "epoch": 0.77, "grad_norm": 0.4158976078033447, "learning_rate": 0.0005081695807695212, "loss": 1.8265, "step": 23074 }, { "epoch": 0.77, "grad_norm": 0.4393189251422882, "learning_rate": 0.0005081620543591752, "loss": 1.7001, "step": 23075 }, { "epoch": 0.77, "grad_norm": 0.43245184421539307, "learning_rate": 0.0005081545276961511, "loss": 1.7923, "step": 23076 }, { "epoch": 0.77, "grad_norm": 0.44199490547180176, "learning_rate": 0.0005081470007804581, "loss": 1.9168, "step": 23077 }, { "epoch": 0.77, "grad_norm": 0.44645678997039795, "learning_rate": 0.0005081394736121054, "loss": 1.8623, "step": 23078 }, { "epoch": 0.77, "grad_norm": 0.4130510091781616, "learning_rate": 0.0005081319461911019, "loss": 1.8216, "step": 23079 }, { "epoch": 0.77, "grad_norm": 0.4516286253929138, "learning_rate": 0.0005081244185174569, "loss": 1.8671, "step": 23080 }, { "epoch": 0.77, "grad_norm": 0.4408939778804779, "learning_rate": 0.0005081168905911797, "loss": 1.8403, "step": 23081 }, { "epoch": 0.77, "grad_norm": 0.41061750054359436, "learning_rate": 0.000508109362412279, "loss": 1.7778, "step": 23082 }, { "epoch": 0.77, "grad_norm": 0.4214402139186859, "learning_rate": 0.0005081018339807644, "loss": 1.8303, "step": 23083 }, { "epoch": 0.77, "grad_norm": 0.42361244559288025, "learning_rate": 0.0005080943052966447, "loss": 1.8346, "step": 23084 }, { "epoch": 0.77, "grad_norm": 0.41405239701271057, "learning_rate": 0.0005080867763599293, "loss": 1.7731, "step": 23085 }, { "epoch": 0.77, "grad_norm": 0.4292665421962738, "learning_rate": 0.0005080792471706271, "loss": 1.9019, "step": 23086 }, { "epoch": 0.77, "grad_norm": 0.42615142464637756, "learning_rate": 0.0005080717177287474, "loss": 1.8459, "step": 23087 }, { "epoch": 0.77, "grad_norm": 0.4345541298389435, "learning_rate": 0.0005080641880342993, "loss": 1.8939, "step": 23088 }, { "epoch": 0.77, "grad_norm": 0.42043444514274597, "learning_rate": 0.0005080566580872919, "loss": 1.8446, "step": 23089 }, { "epoch": 0.77, "grad_norm": 0.4330170452594757, "learning_rate": 0.0005080491278877344, "loss": 1.7813, "step": 23090 }, { "epoch": 0.77, "grad_norm": 0.44392943382263184, "learning_rate": 0.0005080415974356359, "loss": 1.8641, "step": 23091 }, { "epoch": 0.77, "grad_norm": 0.43106746673583984, "learning_rate": 0.0005080340667310056, "loss": 1.8465, "step": 23092 }, { "epoch": 0.77, "grad_norm": 0.4715515375137329, "learning_rate": 0.0005080265357738527, "loss": 1.8075, "step": 23093 }, { "epoch": 0.77, "grad_norm": 0.422072172164917, "learning_rate": 0.0005080190045641859, "loss": 1.8167, "step": 23094 }, { "epoch": 0.77, "grad_norm": 0.43443161249160767, "learning_rate": 0.000508011473102015, "loss": 1.827, "step": 23095 }, { "epoch": 0.77, "grad_norm": 0.4485305845737457, "learning_rate": 0.0005080039413873486, "loss": 1.8359, "step": 23096 }, { "epoch": 0.77, "grad_norm": 0.44191044569015503, "learning_rate": 0.0005079964094201962, "loss": 1.7884, "step": 23097 }, { "epoch": 0.77, "grad_norm": 0.4224311113357544, "learning_rate": 0.0005079888772005668, "loss": 1.8101, "step": 23098 }, { "epoch": 0.77, "grad_norm": 0.43596819043159485, "learning_rate": 0.0005079813447284695, "loss": 1.826, "step": 23099 }, { "epoch": 0.77, "grad_norm": 0.43088605999946594, "learning_rate": 0.0005079738120039135, "loss": 1.7807, "step": 23100 }, { "epoch": 0.77, "grad_norm": 0.4401952922344208, "learning_rate": 0.000507966279026908, "loss": 1.8372, "step": 23101 }, { "epoch": 0.77, "grad_norm": 0.4266150891780853, "learning_rate": 0.000507958745797462, "loss": 1.8866, "step": 23102 }, { "epoch": 0.77, "grad_norm": 0.4393031895160675, "learning_rate": 0.0005079512123155847, "loss": 1.7792, "step": 23103 }, { "epoch": 0.77, "grad_norm": 0.4361589550971985, "learning_rate": 0.0005079436785812853, "loss": 1.8399, "step": 23104 }, { "epoch": 0.77, "grad_norm": 0.45277976989746094, "learning_rate": 0.0005079361445945729, "loss": 1.7901, "step": 23105 }, { "epoch": 0.77, "grad_norm": 0.45644497871398926, "learning_rate": 0.0005079286103554568, "loss": 1.8379, "step": 23106 }, { "epoch": 0.77, "grad_norm": 0.42835569381713867, "learning_rate": 0.0005079210758639458, "loss": 1.8242, "step": 23107 }, { "epoch": 0.77, "grad_norm": 0.45144742727279663, "learning_rate": 0.0005079135411200494, "loss": 1.8218, "step": 23108 }, { "epoch": 0.77, "grad_norm": 0.42708268761634827, "learning_rate": 0.0005079060061237764, "loss": 1.8184, "step": 23109 }, { "epoch": 0.77, "grad_norm": 0.44343388080596924, "learning_rate": 0.0005078984708751363, "loss": 1.9047, "step": 23110 }, { "epoch": 0.77, "grad_norm": 0.4227116107940674, "learning_rate": 0.0005078909353741382, "loss": 1.8511, "step": 23111 }, { "epoch": 0.77, "grad_norm": 0.41499194502830505, "learning_rate": 0.0005078833996207909, "loss": 1.8682, "step": 23112 }, { "epoch": 0.77, "grad_norm": 0.4381977319717407, "learning_rate": 0.0005078758636151039, "loss": 1.8491, "step": 23113 }, { "epoch": 0.77, "grad_norm": 0.4525242745876312, "learning_rate": 0.0005078683273570862, "loss": 1.7883, "step": 23114 }, { "epoch": 0.77, "grad_norm": 0.44463732838630676, "learning_rate": 0.000507860790846747, "loss": 1.8659, "step": 23115 }, { "epoch": 0.77, "grad_norm": 0.42432069778442383, "learning_rate": 0.0005078532540840954, "loss": 1.9047, "step": 23116 }, { "epoch": 0.77, "grad_norm": 0.5110430121421814, "learning_rate": 0.0005078457170691407, "loss": 1.8393, "step": 23117 }, { "epoch": 0.77, "grad_norm": 0.4225422143936157, "learning_rate": 0.0005078381798018917, "loss": 1.7679, "step": 23118 }, { "epoch": 0.77, "grad_norm": 0.44056257605552673, "learning_rate": 0.0005078306422823579, "loss": 1.8526, "step": 23119 }, { "epoch": 0.77, "grad_norm": 0.4355851113796234, "learning_rate": 0.0005078231045105483, "loss": 1.8103, "step": 23120 }, { "epoch": 0.77, "grad_norm": 0.407321572303772, "learning_rate": 0.0005078155664864721, "loss": 1.8203, "step": 23121 }, { "epoch": 0.77, "grad_norm": 0.4333434998989105, "learning_rate": 0.0005078080282101384, "loss": 1.8633, "step": 23122 }, { "epoch": 0.77, "grad_norm": 0.43171820044517517, "learning_rate": 0.0005078004896815565, "loss": 1.7906, "step": 23123 }, { "epoch": 0.77, "grad_norm": 0.42229726910591125, "learning_rate": 0.0005077929509007353, "loss": 1.8194, "step": 23124 }, { "epoch": 0.77, "grad_norm": 0.43789058923721313, "learning_rate": 0.000507785411867684, "loss": 1.7123, "step": 23125 }, { "epoch": 0.77, "grad_norm": 0.42562979459762573, "learning_rate": 0.0005077778725824119, "loss": 1.8311, "step": 23126 }, { "epoch": 0.77, "grad_norm": 0.4244256317615509, "learning_rate": 0.0005077703330449281, "loss": 1.9072, "step": 23127 }, { "epoch": 0.77, "grad_norm": 0.43991395831108093, "learning_rate": 0.0005077627932552418, "loss": 1.8252, "step": 23128 }, { "epoch": 0.77, "grad_norm": 0.4341139793395996, "learning_rate": 0.0005077552532133619, "loss": 1.8116, "step": 23129 }, { "epoch": 0.77, "grad_norm": 0.42927947640419006, "learning_rate": 0.0005077477129192979, "loss": 1.7756, "step": 23130 }, { "epoch": 0.77, "grad_norm": 0.4280872941017151, "learning_rate": 0.0005077401723730587, "loss": 1.7791, "step": 23131 }, { "epoch": 0.77, "grad_norm": 0.43433573842048645, "learning_rate": 0.0005077326315746535, "loss": 1.8762, "step": 23132 }, { "epoch": 0.77, "grad_norm": 0.4370672404766083, "learning_rate": 0.0005077250905240916, "loss": 1.8532, "step": 23133 }, { "epoch": 0.77, "grad_norm": 0.4528353214263916, "learning_rate": 0.000507717549221382, "loss": 1.8984, "step": 23134 }, { "epoch": 0.77, "grad_norm": 0.45041370391845703, "learning_rate": 0.0005077100076665338, "loss": 1.8162, "step": 23135 }, { "epoch": 0.77, "grad_norm": 0.42071759700775146, "learning_rate": 0.0005077024658595564, "loss": 1.7675, "step": 23136 }, { "epoch": 0.77, "grad_norm": 0.4386065602302551, "learning_rate": 0.0005076949238004587, "loss": 1.8172, "step": 23137 }, { "epoch": 0.77, "grad_norm": 0.4367527365684509, "learning_rate": 0.0005076873814892501, "loss": 1.8758, "step": 23138 }, { "epoch": 0.77, "grad_norm": 0.451709508895874, "learning_rate": 0.0005076798389259395, "loss": 1.8572, "step": 23139 }, { "epoch": 0.77, "grad_norm": 0.4423274099826813, "learning_rate": 0.0005076722961105363, "loss": 1.7963, "step": 23140 }, { "epoch": 0.77, "grad_norm": 0.4306148886680603, "learning_rate": 0.0005076647530430494, "loss": 1.8167, "step": 23141 }, { "epoch": 0.77, "grad_norm": 0.4598701298236847, "learning_rate": 0.0005076572097234882, "loss": 1.84, "step": 23142 }, { "epoch": 0.77, "grad_norm": 0.4274905323982239, "learning_rate": 0.0005076496661518616, "loss": 1.7334, "step": 23143 }, { "epoch": 0.77, "grad_norm": 0.452119380235672, "learning_rate": 0.000507642122328179, "loss": 1.8912, "step": 23144 }, { "epoch": 0.77, "grad_norm": 0.43407338857650757, "learning_rate": 0.0005076345782524495, "loss": 1.8348, "step": 23145 }, { "epoch": 0.77, "grad_norm": 0.4518958032131195, "learning_rate": 0.0005076270339246822, "loss": 1.8355, "step": 23146 }, { "epoch": 0.77, "grad_norm": 0.5263976454734802, "learning_rate": 0.0005076194893448862, "loss": 1.8512, "step": 23147 }, { "epoch": 0.77, "grad_norm": 0.4659641981124878, "learning_rate": 0.0005076119445130708, "loss": 1.8281, "step": 23148 }, { "epoch": 0.77, "grad_norm": 0.4559883773326874, "learning_rate": 0.000507604399429245, "loss": 1.9374, "step": 23149 }, { "epoch": 0.77, "grad_norm": 0.42808496952056885, "learning_rate": 0.0005075968540934181, "loss": 1.8402, "step": 23150 }, { "epoch": 0.77, "grad_norm": 0.5259381532669067, "learning_rate": 0.0005075893085055992, "loss": 1.7985, "step": 23151 }, { "epoch": 0.77, "grad_norm": 0.45261019468307495, "learning_rate": 0.0005075817626657975, "loss": 1.7982, "step": 23152 }, { "epoch": 0.77, "grad_norm": 0.4294240176677704, "learning_rate": 0.0005075742165740222, "loss": 1.8527, "step": 23153 }, { "epoch": 0.77, "grad_norm": 0.440970778465271, "learning_rate": 0.0005075666702302822, "loss": 1.8407, "step": 23154 }, { "epoch": 0.77, "grad_norm": 0.4543685019016266, "learning_rate": 0.0005075591236345869, "loss": 1.7978, "step": 23155 }, { "epoch": 0.77, "grad_norm": 0.46214646100997925, "learning_rate": 0.0005075515767869455, "loss": 1.8144, "step": 23156 }, { "epoch": 0.77, "grad_norm": 0.4268014430999756, "learning_rate": 0.000507544029687367, "loss": 1.8661, "step": 23157 }, { "epoch": 0.77, "grad_norm": 0.42534762620925903, "learning_rate": 0.0005075364823358607, "loss": 1.7714, "step": 23158 }, { "epoch": 0.77, "grad_norm": 0.4326792061328888, "learning_rate": 0.0005075289347324355, "loss": 1.8538, "step": 23159 }, { "epoch": 0.77, "grad_norm": 0.44912955164909363, "learning_rate": 0.0005075213868771009, "loss": 1.8434, "step": 23160 }, { "epoch": 0.77, "grad_norm": 0.45790788531303406, "learning_rate": 0.000507513838769866, "loss": 1.8773, "step": 23161 }, { "epoch": 0.77, "grad_norm": 0.43797942996025085, "learning_rate": 0.0005075062904107396, "loss": 1.8159, "step": 23162 }, { "epoch": 0.77, "grad_norm": 0.4310346245765686, "learning_rate": 0.0005074987417997313, "loss": 1.8561, "step": 23163 }, { "epoch": 0.77, "grad_norm": 0.43448883295059204, "learning_rate": 0.0005074911929368502, "loss": 1.8843, "step": 23164 }, { "epoch": 0.77, "grad_norm": 0.44974738359451294, "learning_rate": 0.0005074836438221053, "loss": 1.768, "step": 23165 }, { "epoch": 0.77, "grad_norm": 0.415823757648468, "learning_rate": 0.0005074760944555057, "loss": 1.8135, "step": 23166 }, { "epoch": 0.77, "grad_norm": 0.4354568421840668, "learning_rate": 0.0005074685448370608, "loss": 1.8972, "step": 23167 }, { "epoch": 0.77, "grad_norm": 0.444145143032074, "learning_rate": 0.0005074609949667797, "loss": 1.8633, "step": 23168 }, { "epoch": 0.77, "grad_norm": 0.44089001417160034, "learning_rate": 0.0005074534448446715, "loss": 1.8837, "step": 23169 }, { "epoch": 0.77, "grad_norm": 0.4320952892303467, "learning_rate": 0.0005074458944707452, "loss": 1.7578, "step": 23170 }, { "epoch": 0.77, "grad_norm": 0.44017842411994934, "learning_rate": 0.0005074383438450103, "loss": 1.7925, "step": 23171 }, { "epoch": 0.77, "grad_norm": 0.44614022970199585, "learning_rate": 0.0005074307929674759, "loss": 1.8549, "step": 23172 }, { "epoch": 0.77, "grad_norm": 0.43415507674217224, "learning_rate": 0.000507423241838151, "loss": 1.8641, "step": 23173 }, { "epoch": 0.77, "grad_norm": 0.4430251717567444, "learning_rate": 0.0005074156904570448, "loss": 1.8262, "step": 23174 }, { "epoch": 0.77, "grad_norm": 0.43518897891044617, "learning_rate": 0.0005074081388241666, "loss": 1.7689, "step": 23175 }, { "epoch": 0.77, "grad_norm": 0.42886900901794434, "learning_rate": 0.0005074005869395254, "loss": 1.86, "step": 23176 }, { "epoch": 0.77, "grad_norm": 0.4283304214477539, "learning_rate": 0.0005073930348031305, "loss": 1.8688, "step": 23177 }, { "epoch": 0.77, "grad_norm": 0.424348384141922, "learning_rate": 0.0005073854824149909, "loss": 1.8169, "step": 23178 }, { "epoch": 0.77, "grad_norm": 0.44221004843711853, "learning_rate": 0.0005073779297751161, "loss": 1.8473, "step": 23179 }, { "epoch": 0.77, "grad_norm": 0.43365931510925293, "learning_rate": 0.0005073703768835149, "loss": 1.8509, "step": 23180 }, { "epoch": 0.77, "grad_norm": 0.4361015260219574, "learning_rate": 0.0005073628237401966, "loss": 1.7818, "step": 23181 }, { "epoch": 0.77, "grad_norm": 0.42548108100891113, "learning_rate": 0.0005073552703451704, "loss": 1.8137, "step": 23182 }, { "epoch": 0.77, "grad_norm": 0.42488616704940796, "learning_rate": 0.0005073477166984455, "loss": 1.797, "step": 23183 }, { "epoch": 0.77, "grad_norm": 0.4165554940700531, "learning_rate": 0.0005073401628000311, "loss": 1.7992, "step": 23184 }, { "epoch": 0.77, "grad_norm": 0.43947678804397583, "learning_rate": 0.0005073326086499362, "loss": 1.7961, "step": 23185 }, { "epoch": 0.77, "grad_norm": 0.4304405152797699, "learning_rate": 0.0005073250542481701, "loss": 1.8594, "step": 23186 }, { "epoch": 0.77, "grad_norm": 0.41859808564186096, "learning_rate": 0.0005073174995947418, "loss": 1.8092, "step": 23187 }, { "epoch": 0.77, "grad_norm": 0.43294379115104675, "learning_rate": 0.0005073099446896608, "loss": 1.8035, "step": 23188 }, { "epoch": 0.77, "grad_norm": 0.433883935213089, "learning_rate": 0.0005073023895329358, "loss": 1.868, "step": 23189 }, { "epoch": 0.77, "grad_norm": 0.4321000576019287, "learning_rate": 0.0005072948341245765, "loss": 1.7642, "step": 23190 }, { "epoch": 0.77, "grad_norm": 0.43792951107025146, "learning_rate": 0.0005072872784645916, "loss": 1.8163, "step": 23191 }, { "epoch": 0.77, "grad_norm": 0.42128342390060425, "learning_rate": 0.0005072797225529907, "loss": 1.845, "step": 23192 }, { "epoch": 0.77, "grad_norm": 0.43169546127319336, "learning_rate": 0.0005072721663897827, "loss": 1.7995, "step": 23193 }, { "epoch": 0.77, "grad_norm": 0.41474831104278564, "learning_rate": 0.0005072646099749768, "loss": 1.8055, "step": 23194 }, { "epoch": 0.77, "grad_norm": 0.43514707684516907, "learning_rate": 0.0005072570533085822, "loss": 1.7913, "step": 23195 }, { "epoch": 0.77, "grad_norm": 0.4392206370830536, "learning_rate": 0.0005072494963906081, "loss": 1.7762, "step": 23196 }, { "epoch": 0.77, "grad_norm": 0.433461457490921, "learning_rate": 0.0005072419392210635, "loss": 1.8254, "step": 23197 }, { "epoch": 0.77, "grad_norm": 0.5679571032524109, "learning_rate": 0.0005072343817999579, "loss": 1.841, "step": 23198 }, { "epoch": 0.77, "grad_norm": 0.42085322737693787, "learning_rate": 0.0005072268241273002, "loss": 1.8238, "step": 23199 }, { "epoch": 0.77, "grad_norm": 0.43835076689720154, "learning_rate": 0.0005072192662030998, "loss": 1.8247, "step": 23200 }, { "epoch": 0.77, "grad_norm": 0.43014442920684814, "learning_rate": 0.0005072117080273656, "loss": 1.8209, "step": 23201 }, { "epoch": 0.77, "grad_norm": 0.4344819188117981, "learning_rate": 0.000507204149600107, "loss": 1.827, "step": 23202 }, { "epoch": 0.77, "grad_norm": 0.42175188660621643, "learning_rate": 0.000507196590921333, "loss": 1.8786, "step": 23203 }, { "epoch": 0.77, "grad_norm": 0.4485098421573639, "learning_rate": 0.0005071890319910529, "loss": 1.7872, "step": 23204 }, { "epoch": 0.77, "grad_norm": 0.4475528597831726, "learning_rate": 0.0005071814728092759, "loss": 1.7188, "step": 23205 }, { "epoch": 0.77, "grad_norm": 0.42923644185066223, "learning_rate": 0.0005071739133760111, "loss": 1.7449, "step": 23206 }, { "epoch": 0.77, "grad_norm": 0.43404051661491394, "learning_rate": 0.0005071663536912677, "loss": 1.7324, "step": 23207 }, { "epoch": 0.77, "grad_norm": 0.4214796721935272, "learning_rate": 0.0005071587937550549, "loss": 1.7706, "step": 23208 }, { "epoch": 0.77, "grad_norm": 0.4481857120990753, "learning_rate": 0.0005071512335673818, "loss": 1.8534, "step": 23209 }, { "epoch": 0.77, "grad_norm": 0.44070106744766235, "learning_rate": 0.0005071436731282576, "loss": 1.894, "step": 23210 }, { "epoch": 0.77, "grad_norm": 0.4356177747249603, "learning_rate": 0.0005071361124376915, "loss": 1.7848, "step": 23211 }, { "epoch": 0.77, "grad_norm": 0.44184669852256775, "learning_rate": 0.0005071285514956928, "loss": 1.8043, "step": 23212 }, { "epoch": 0.77, "grad_norm": 0.43119654059410095, "learning_rate": 0.0005071209903022704, "loss": 1.8093, "step": 23213 }, { "epoch": 0.77, "grad_norm": 0.43703144788742065, "learning_rate": 0.0005071134288574337, "loss": 1.8654, "step": 23214 }, { "epoch": 0.77, "grad_norm": 0.4301903247833252, "learning_rate": 0.0005071058671611919, "loss": 1.8544, "step": 23215 }, { "epoch": 0.77, "grad_norm": 0.4301823377609253, "learning_rate": 0.000507098305213554, "loss": 1.8472, "step": 23216 }, { "epoch": 0.77, "grad_norm": 0.4397888481616974, "learning_rate": 0.0005070907430145292, "loss": 1.8179, "step": 23217 }, { "epoch": 0.77, "grad_norm": 0.4289150536060333, "learning_rate": 0.000507083180564127, "loss": 1.8312, "step": 23218 }, { "epoch": 0.77, "grad_norm": 0.42932453751564026, "learning_rate": 0.0005070756178623561, "loss": 1.8006, "step": 23219 }, { "epoch": 0.77, "grad_norm": 0.433408260345459, "learning_rate": 0.000507068054909226, "loss": 1.7812, "step": 23220 }, { "epoch": 0.77, "grad_norm": 0.4272615313529968, "learning_rate": 0.0005070604917047456, "loss": 1.8374, "step": 23221 }, { "epoch": 0.77, "grad_norm": 0.42898598313331604, "learning_rate": 0.0005070529282489246, "loss": 1.8526, "step": 23222 }, { "epoch": 0.77, "grad_norm": 0.4259156286716461, "learning_rate": 0.0005070453645417717, "loss": 1.8315, "step": 23223 }, { "epoch": 0.77, "grad_norm": 0.4231148958206177, "learning_rate": 0.0005070378005832962, "loss": 1.7684, "step": 23224 }, { "epoch": 0.77, "grad_norm": 0.42738667130470276, "learning_rate": 0.0005070302363735072, "loss": 1.7888, "step": 23225 }, { "epoch": 0.77, "grad_norm": 0.43771618604660034, "learning_rate": 0.0005070226719124141, "loss": 1.8008, "step": 23226 }, { "epoch": 0.77, "grad_norm": 0.4331016540527344, "learning_rate": 0.0005070151072000262, "loss": 1.8857, "step": 23227 }, { "epoch": 0.77, "grad_norm": 0.44968217611312866, "learning_rate": 0.0005070075422363522, "loss": 1.866, "step": 23228 }, { "epoch": 0.77, "grad_norm": 0.5017082691192627, "learning_rate": 0.0005069999770214017, "loss": 1.7884, "step": 23229 }, { "epoch": 0.77, "grad_norm": 0.45499497652053833, "learning_rate": 0.0005069924115551837, "loss": 1.7964, "step": 23230 }, { "epoch": 0.77, "grad_norm": 0.43004220724105835, "learning_rate": 0.0005069848458377073, "loss": 1.7955, "step": 23231 }, { "epoch": 0.77, "grad_norm": 0.4306838810443878, "learning_rate": 0.0005069772798689819, "loss": 1.8744, "step": 23232 }, { "epoch": 0.77, "grad_norm": 0.45572397112846375, "learning_rate": 0.0005069697136490165, "loss": 1.7996, "step": 23233 }, { "epoch": 0.77, "grad_norm": 0.4510442614555359, "learning_rate": 0.0005069621471778204, "loss": 1.7843, "step": 23234 }, { "epoch": 0.77, "grad_norm": 0.424605131149292, "learning_rate": 0.0005069545804554028, "loss": 1.8065, "step": 23235 }, { "epoch": 0.77, "grad_norm": 0.44164952635765076, "learning_rate": 0.0005069470134817728, "loss": 1.8131, "step": 23236 }, { "epoch": 0.77, "grad_norm": 0.4314112663269043, "learning_rate": 0.0005069394462569396, "loss": 1.7884, "step": 23237 }, { "epoch": 0.77, "grad_norm": 0.4407646059989929, "learning_rate": 0.0005069318787809124, "loss": 1.8395, "step": 23238 }, { "epoch": 0.77, "grad_norm": 0.4284727871417999, "learning_rate": 0.0005069243110537003, "loss": 1.7839, "step": 23239 }, { "epoch": 0.77, "grad_norm": 0.4344116449356079, "learning_rate": 0.0005069167430753127, "loss": 1.8918, "step": 23240 }, { "epoch": 0.77, "grad_norm": 0.43501487374305725, "learning_rate": 0.0005069091748457587, "loss": 1.8043, "step": 23241 }, { "epoch": 0.77, "grad_norm": 0.44347822666168213, "learning_rate": 0.0005069016063650473, "loss": 1.856, "step": 23242 }, { "epoch": 0.77, "grad_norm": 0.44323253631591797, "learning_rate": 0.0005068940376331879, "loss": 1.9249, "step": 23243 }, { "epoch": 0.77, "grad_norm": 0.43416664004325867, "learning_rate": 0.0005068864686501897, "loss": 1.8649, "step": 23244 }, { "epoch": 0.77, "grad_norm": 0.44455376267433167, "learning_rate": 0.0005068788994160617, "loss": 1.838, "step": 23245 }, { "epoch": 0.77, "grad_norm": 0.4479841887950897, "learning_rate": 0.0005068713299308132, "loss": 1.7082, "step": 23246 }, { "epoch": 0.77, "grad_norm": 0.4154607951641083, "learning_rate": 0.0005068637601944535, "loss": 1.8018, "step": 23247 }, { "epoch": 0.77, "grad_norm": 0.44378286600112915, "learning_rate": 0.0005068561902069916, "loss": 1.8152, "step": 23248 }, { "epoch": 0.77, "grad_norm": 0.4561261534690857, "learning_rate": 0.0005068486199684368, "loss": 1.7971, "step": 23249 }, { "epoch": 0.77, "grad_norm": 0.46246573328971863, "learning_rate": 0.0005068410494787982, "loss": 1.8408, "step": 23250 }, { "epoch": 0.77, "grad_norm": 0.41953045129776, "learning_rate": 0.0005068334787380851, "loss": 1.7995, "step": 23251 }, { "epoch": 0.77, "grad_norm": 0.45461341738700867, "learning_rate": 0.0005068259077463065, "loss": 1.7952, "step": 23252 }, { "epoch": 0.77, "grad_norm": 0.4474612772464752, "learning_rate": 0.0005068183365034718, "loss": 1.8187, "step": 23253 }, { "epoch": 0.77, "grad_norm": 0.430269330739975, "learning_rate": 0.0005068107650095902, "loss": 1.8369, "step": 23254 }, { "epoch": 0.77, "grad_norm": 0.44091856479644775, "learning_rate": 0.0005068031932646708, "loss": 1.7209, "step": 23255 }, { "epoch": 0.77, "grad_norm": 0.4275251030921936, "learning_rate": 0.0005067956212687228, "loss": 1.8038, "step": 23256 }, { "epoch": 0.77, "grad_norm": 0.45128124952316284, "learning_rate": 0.0005067880490217552, "loss": 1.8099, "step": 23257 }, { "epoch": 0.77, "grad_norm": 0.4264534115791321, "learning_rate": 0.0005067804765237774, "loss": 1.8234, "step": 23258 }, { "epoch": 0.77, "grad_norm": 0.44040724635124207, "learning_rate": 0.0005067729037747988, "loss": 1.8908, "step": 23259 }, { "epoch": 0.77, "grad_norm": 0.4437360167503357, "learning_rate": 0.0005067653307748282, "loss": 1.7494, "step": 23260 }, { "epoch": 0.77, "grad_norm": 0.4252959191799164, "learning_rate": 0.0005067577575238749, "loss": 1.8239, "step": 23261 }, { "epoch": 0.77, "grad_norm": 0.4200991690158844, "learning_rate": 0.0005067501840219483, "loss": 1.7913, "step": 23262 }, { "epoch": 0.77, "grad_norm": 0.4292871654033661, "learning_rate": 0.0005067426102690574, "loss": 1.8134, "step": 23263 }, { "epoch": 0.77, "grad_norm": 0.42460158467292786, "learning_rate": 0.0005067350362652114, "loss": 1.8271, "step": 23264 }, { "epoch": 0.77, "grad_norm": 0.4360608756542206, "learning_rate": 0.0005067274620104195, "loss": 1.797, "step": 23265 }, { "epoch": 0.77, "grad_norm": 0.43098461627960205, "learning_rate": 0.0005067198875046909, "loss": 1.8518, "step": 23266 }, { "epoch": 0.77, "grad_norm": 0.4375718832015991, "learning_rate": 0.000506712312748035, "loss": 1.9119, "step": 23267 }, { "epoch": 0.77, "grad_norm": 0.4221698045730591, "learning_rate": 0.0005067047377404606, "loss": 1.8391, "step": 23268 }, { "epoch": 0.77, "grad_norm": 0.41821029782295227, "learning_rate": 0.0005066971624819772, "loss": 1.8105, "step": 23269 }, { "epoch": 0.77, "grad_norm": 0.4378514289855957, "learning_rate": 0.0005066895869725938, "loss": 1.8303, "step": 23270 }, { "epoch": 0.77, "grad_norm": 0.4412345588207245, "learning_rate": 0.0005066820112123199, "loss": 1.8941, "step": 23271 }, { "epoch": 0.77, "grad_norm": 0.43108856678009033, "learning_rate": 0.0005066744352011644, "loss": 1.8393, "step": 23272 }, { "epoch": 0.77, "grad_norm": 0.42713892459869385, "learning_rate": 0.0005066668589391364, "loss": 1.8001, "step": 23273 }, { "epoch": 0.77, "grad_norm": 0.43528079986572266, "learning_rate": 0.0005066592824262455, "loss": 1.7952, "step": 23274 }, { "epoch": 0.77, "grad_norm": 0.4459848701953888, "learning_rate": 0.0005066517056625007, "loss": 1.7953, "step": 23275 }, { "epoch": 0.77, "grad_norm": 0.4344334602355957, "learning_rate": 0.000506644128647911, "loss": 1.7755, "step": 23276 }, { "epoch": 0.77, "grad_norm": 0.4642731547355652, "learning_rate": 0.0005066365513824859, "loss": 1.8418, "step": 23277 }, { "epoch": 0.77, "grad_norm": 0.4259066879749298, "learning_rate": 0.0005066289738662345, "loss": 1.8409, "step": 23278 }, { "epoch": 0.77, "grad_norm": 0.4335508644580841, "learning_rate": 0.0005066213960991659, "loss": 1.804, "step": 23279 }, { "epoch": 0.77, "grad_norm": 0.4469907879829407, "learning_rate": 0.0005066138180812893, "loss": 1.8196, "step": 23280 }, { "epoch": 0.77, "grad_norm": 0.45025479793548584, "learning_rate": 0.000506606239812614, "loss": 1.8154, "step": 23281 }, { "epoch": 0.77, "grad_norm": 0.45371076464653015, "learning_rate": 0.0005065986612931493, "loss": 1.8978, "step": 23282 }, { "epoch": 0.77, "grad_norm": 0.4354526102542877, "learning_rate": 0.0005065910825229041, "loss": 1.79, "step": 23283 }, { "epoch": 0.77, "grad_norm": 0.806358277797699, "learning_rate": 0.0005065835035018879, "loss": 1.9106, "step": 23284 }, { "epoch": 0.77, "grad_norm": 0.44318172335624695, "learning_rate": 0.0005065759242301098, "loss": 1.8172, "step": 23285 }, { "epoch": 0.77, "grad_norm": 0.45660561323165894, "learning_rate": 0.0005065683447075789, "loss": 1.8055, "step": 23286 }, { "epoch": 0.77, "grad_norm": 0.45026206970214844, "learning_rate": 0.0005065607649343045, "loss": 1.8736, "step": 23287 }, { "epoch": 0.77, "grad_norm": 0.4671342074871063, "learning_rate": 0.0005065531849102956, "loss": 1.803, "step": 23288 }, { "epoch": 0.77, "grad_norm": 0.42758312821388245, "learning_rate": 0.0005065456046355617, "loss": 1.8218, "step": 23289 }, { "epoch": 0.77, "grad_norm": 0.4545923173427582, "learning_rate": 0.0005065380241101119, "loss": 1.8917, "step": 23290 }, { "epoch": 0.77, "grad_norm": 0.5364032983779907, "learning_rate": 0.0005065304433339553, "loss": 1.8793, "step": 23291 }, { "epoch": 0.77, "grad_norm": 0.4348003566265106, "learning_rate": 0.0005065228623071012, "loss": 1.7853, "step": 23292 }, { "epoch": 0.77, "grad_norm": 0.4394172430038452, "learning_rate": 0.0005065152810295588, "loss": 1.8443, "step": 23293 }, { "epoch": 0.77, "grad_norm": 0.4465126097202301, "learning_rate": 0.0005065076995013372, "loss": 1.8038, "step": 23294 }, { "epoch": 0.78, "grad_norm": 0.4421328604221344, "learning_rate": 0.0005065001177224457, "loss": 1.8337, "step": 23295 }, { "epoch": 0.78, "grad_norm": 0.45039504766464233, "learning_rate": 0.0005064925356928936, "loss": 1.7778, "step": 23296 }, { "epoch": 0.78, "grad_norm": 0.42181915044784546, "learning_rate": 0.0005064849534126899, "loss": 1.839, "step": 23297 }, { "epoch": 0.78, "grad_norm": 0.42529138922691345, "learning_rate": 0.0005064773708818438, "loss": 1.8035, "step": 23298 }, { "epoch": 0.78, "grad_norm": 0.4395824670791626, "learning_rate": 0.0005064697881003647, "loss": 1.781, "step": 23299 }, { "epoch": 0.78, "grad_norm": 0.44333064556121826, "learning_rate": 0.0005064622050682617, "loss": 1.8155, "step": 23300 }, { "epoch": 0.78, "grad_norm": 0.42547607421875, "learning_rate": 0.000506454621785544, "loss": 1.814, "step": 23301 }, { "epoch": 0.78, "grad_norm": 0.4287360906600952, "learning_rate": 0.0005064470382522208, "loss": 1.833, "step": 23302 }, { "epoch": 0.78, "grad_norm": 0.425509512424469, "learning_rate": 0.0005064394544683012, "loss": 1.7671, "step": 23303 }, { "epoch": 0.78, "grad_norm": 0.44594478607177734, "learning_rate": 0.0005064318704337947, "loss": 1.8854, "step": 23304 }, { "epoch": 0.78, "grad_norm": 0.42609530687332153, "learning_rate": 0.0005064242861487103, "loss": 1.7875, "step": 23305 }, { "epoch": 0.78, "grad_norm": 0.4345800280570984, "learning_rate": 0.0005064167016130571, "loss": 1.8474, "step": 23306 }, { "epoch": 0.78, "grad_norm": 0.42007607221603394, "learning_rate": 0.0005064091168268446, "loss": 1.7951, "step": 23307 }, { "epoch": 0.78, "grad_norm": 0.4188355803489685, "learning_rate": 0.0005064015317900817, "loss": 1.7788, "step": 23308 }, { "epoch": 0.78, "grad_norm": 0.4351705014705658, "learning_rate": 0.0005063939465027778, "loss": 1.827, "step": 23309 }, { "epoch": 0.78, "grad_norm": 0.4510401785373688, "learning_rate": 0.0005063863609649421, "loss": 1.718, "step": 23310 }, { "epoch": 0.78, "grad_norm": 0.44021767377853394, "learning_rate": 0.0005063787751765839, "loss": 1.9066, "step": 23311 }, { "epoch": 0.78, "grad_norm": 0.4248812794685364, "learning_rate": 0.000506371189137712, "loss": 1.8513, "step": 23312 }, { "epoch": 0.78, "grad_norm": 0.43700289726257324, "learning_rate": 0.000506363602848336, "loss": 1.7956, "step": 23313 }, { "epoch": 0.78, "grad_norm": 0.4527130424976349, "learning_rate": 0.000506356016308465, "loss": 1.8213, "step": 23314 }, { "epoch": 0.78, "grad_norm": 0.4326907992362976, "learning_rate": 0.0005063484295181083, "loss": 1.8194, "step": 23315 }, { "epoch": 0.78, "grad_norm": 0.428329735994339, "learning_rate": 0.0005063408424772749, "loss": 1.7854, "step": 23316 }, { "epoch": 0.78, "grad_norm": 0.4328565299510956, "learning_rate": 0.000506333255185974, "loss": 1.8068, "step": 23317 }, { "epoch": 0.78, "grad_norm": 0.4289585053920746, "learning_rate": 0.0005063256676442152, "loss": 1.8525, "step": 23318 }, { "epoch": 0.78, "grad_norm": 0.44250717759132385, "learning_rate": 0.0005063180798520072, "loss": 1.8158, "step": 23319 }, { "epoch": 0.78, "grad_norm": 0.4206402897834778, "learning_rate": 0.0005063104918093596, "loss": 1.8423, "step": 23320 }, { "epoch": 0.78, "grad_norm": 0.4220009446144104, "learning_rate": 0.0005063029035162815, "loss": 1.7908, "step": 23321 }, { "epoch": 0.78, "grad_norm": 0.43447232246398926, "learning_rate": 0.0005062953149727819, "loss": 1.8549, "step": 23322 }, { "epoch": 0.78, "grad_norm": 0.4297981560230255, "learning_rate": 0.0005062877261788703, "loss": 1.8878, "step": 23323 }, { "epoch": 0.78, "grad_norm": 0.4168260991573334, "learning_rate": 0.0005062801371345558, "loss": 1.8664, "step": 23324 }, { "epoch": 0.78, "grad_norm": 0.4431629776954651, "learning_rate": 0.0005062725478398476, "loss": 1.8492, "step": 23325 }, { "epoch": 0.78, "grad_norm": 0.43729689717292786, "learning_rate": 0.0005062649582947548, "loss": 1.8578, "step": 23326 }, { "epoch": 0.78, "grad_norm": 0.4540271759033203, "learning_rate": 0.0005062573684992868, "loss": 1.8621, "step": 23327 }, { "epoch": 0.78, "grad_norm": 0.42372438311576843, "learning_rate": 0.0005062497784534528, "loss": 1.8974, "step": 23328 }, { "epoch": 0.78, "grad_norm": 0.44131624698638916, "learning_rate": 0.000506242188157262, "loss": 1.771, "step": 23329 }, { "epoch": 0.78, "grad_norm": 0.4226696193218231, "learning_rate": 0.0005062345976107234, "loss": 1.7992, "step": 23330 }, { "epoch": 0.78, "grad_norm": 0.4477006793022156, "learning_rate": 0.0005062270068138465, "loss": 1.7759, "step": 23331 }, { "epoch": 0.78, "grad_norm": 0.4262274205684662, "learning_rate": 0.0005062194157666404, "loss": 1.761, "step": 23332 }, { "epoch": 0.78, "grad_norm": 0.4284304678440094, "learning_rate": 0.0005062118244691143, "loss": 1.7614, "step": 23333 }, { "epoch": 0.78, "grad_norm": 0.43950405716896057, "learning_rate": 0.0005062042329212775, "loss": 1.8365, "step": 23334 }, { "epoch": 0.78, "grad_norm": 0.453144371509552, "learning_rate": 0.0005061966411231392, "loss": 1.805, "step": 23335 }, { "epoch": 0.78, "grad_norm": 0.4367642402648926, "learning_rate": 0.0005061890490747083, "loss": 1.8309, "step": 23336 }, { "epoch": 0.78, "grad_norm": 0.45226937532424927, "learning_rate": 0.0005061814567759944, "loss": 1.8306, "step": 23337 }, { "epoch": 0.78, "grad_norm": 0.4422827959060669, "learning_rate": 0.0005061738642270066, "loss": 1.7661, "step": 23338 }, { "epoch": 0.78, "grad_norm": 0.4397495985031128, "learning_rate": 0.000506166271427754, "loss": 1.6896, "step": 23339 }, { "epoch": 0.78, "grad_norm": 0.4358983635902405, "learning_rate": 0.0005061586783782461, "loss": 1.8537, "step": 23340 }, { "epoch": 0.78, "grad_norm": 0.4433443546295166, "learning_rate": 0.0005061510850784919, "loss": 1.7948, "step": 23341 }, { "epoch": 0.78, "grad_norm": 0.44027262926101685, "learning_rate": 0.0005061434915285006, "loss": 1.8304, "step": 23342 }, { "epoch": 0.78, "grad_norm": 0.4368825852870941, "learning_rate": 0.0005061358977282815, "loss": 1.8151, "step": 23343 }, { "epoch": 0.78, "grad_norm": 0.41764357686042786, "learning_rate": 0.0005061283036778438, "loss": 1.8367, "step": 23344 }, { "epoch": 0.78, "grad_norm": 0.42542123794555664, "learning_rate": 0.0005061207093771967, "loss": 1.8681, "step": 23345 }, { "epoch": 0.78, "grad_norm": 0.43595775961875916, "learning_rate": 0.0005061131148263495, "loss": 1.825, "step": 23346 }, { "epoch": 0.78, "grad_norm": 0.4451887607574463, "learning_rate": 0.0005061055200253113, "loss": 1.7781, "step": 23347 }, { "epoch": 0.78, "grad_norm": 0.4407825767993927, "learning_rate": 0.0005060979249740913, "loss": 1.8204, "step": 23348 }, { "epoch": 0.78, "grad_norm": 0.42976102232933044, "learning_rate": 0.0005060903296726988, "loss": 1.8162, "step": 23349 }, { "epoch": 0.78, "grad_norm": 0.42985010147094727, "learning_rate": 0.0005060827341211431, "loss": 1.9005, "step": 23350 }, { "epoch": 0.78, "grad_norm": 0.4337475895881653, "learning_rate": 0.0005060751383194332, "loss": 1.8596, "step": 23351 }, { "epoch": 0.78, "grad_norm": 0.4352869987487793, "learning_rate": 0.0005060675422675786, "loss": 1.7852, "step": 23352 }, { "epoch": 0.78, "grad_norm": 0.4418196678161621, "learning_rate": 0.0005060599459655882, "loss": 1.8702, "step": 23353 }, { "epoch": 0.78, "grad_norm": 0.4291747212409973, "learning_rate": 0.0005060523494134716, "loss": 1.7925, "step": 23354 }, { "epoch": 0.78, "grad_norm": 0.4401260018348694, "learning_rate": 0.0005060447526112376, "loss": 1.8148, "step": 23355 }, { "epoch": 0.78, "grad_norm": 0.4382261633872986, "learning_rate": 0.0005060371555588956, "loss": 1.7876, "step": 23356 }, { "epoch": 0.78, "grad_norm": 0.42939460277557373, "learning_rate": 0.000506029558256455, "loss": 1.8275, "step": 23357 }, { "epoch": 0.78, "grad_norm": 0.4310045540332794, "learning_rate": 0.0005060219607039249, "loss": 1.8956, "step": 23358 }, { "epoch": 0.78, "grad_norm": 0.453042209148407, "learning_rate": 0.0005060143629013145, "loss": 1.8402, "step": 23359 }, { "epoch": 0.78, "grad_norm": 0.44001299142837524, "learning_rate": 0.0005060067648486329, "loss": 1.8141, "step": 23360 }, { "epoch": 0.78, "grad_norm": 0.449733167886734, "learning_rate": 0.0005059991665458894, "loss": 1.7924, "step": 23361 }, { "epoch": 0.78, "grad_norm": 0.44184595346450806, "learning_rate": 0.0005059915679930934, "loss": 1.8367, "step": 23362 }, { "epoch": 0.78, "grad_norm": 0.4403466582298279, "learning_rate": 0.0005059839691902539, "loss": 1.705, "step": 23363 }, { "epoch": 0.78, "grad_norm": 0.4476379454135895, "learning_rate": 0.0005059763701373803, "loss": 1.7593, "step": 23364 }, { "epoch": 0.78, "grad_norm": 0.44240760803222656, "learning_rate": 0.0005059687708344817, "loss": 1.7951, "step": 23365 }, { "epoch": 0.78, "grad_norm": 0.4314841628074646, "learning_rate": 0.0005059611712815674, "loss": 1.7957, "step": 23366 }, { "epoch": 0.78, "grad_norm": 0.43210679292678833, "learning_rate": 0.0005059535714786465, "loss": 1.786, "step": 23367 }, { "epoch": 0.78, "grad_norm": 0.43867769837379456, "learning_rate": 0.0005059459714257283, "loss": 1.7669, "step": 23368 }, { "epoch": 0.78, "grad_norm": 0.4392300844192505, "learning_rate": 0.0005059383711228221, "loss": 1.8168, "step": 23369 }, { "epoch": 0.78, "grad_norm": 0.4299815893173218, "learning_rate": 0.0005059307705699371, "loss": 1.8538, "step": 23370 }, { "epoch": 0.78, "grad_norm": 0.4415084719657898, "learning_rate": 0.0005059231697670824, "loss": 1.8216, "step": 23371 }, { "epoch": 0.78, "grad_norm": 0.4331328570842743, "learning_rate": 0.0005059155687142673, "loss": 1.8867, "step": 23372 }, { "epoch": 0.78, "grad_norm": 0.43055278062820435, "learning_rate": 0.000505907967411501, "loss": 1.842, "step": 23373 }, { "epoch": 0.78, "grad_norm": 0.4484366476535797, "learning_rate": 0.0005059003658587929, "loss": 1.8141, "step": 23374 }, { "epoch": 0.78, "grad_norm": 0.44453221559524536, "learning_rate": 0.000505892764056152, "loss": 1.8504, "step": 23375 }, { "epoch": 0.78, "grad_norm": 0.4382486045360565, "learning_rate": 0.0005058851620035877, "loss": 1.8212, "step": 23376 }, { "epoch": 0.78, "grad_norm": 0.4300523102283478, "learning_rate": 0.0005058775597011091, "loss": 1.8284, "step": 23377 }, { "epoch": 0.78, "grad_norm": 0.4276939630508423, "learning_rate": 0.0005058699571487254, "loss": 1.8147, "step": 23378 }, { "epoch": 0.78, "grad_norm": 0.419116348028183, "learning_rate": 0.0005058623543464461, "loss": 1.8546, "step": 23379 }, { "epoch": 0.78, "grad_norm": 0.43449315428733826, "learning_rate": 0.0005058547512942802, "loss": 1.7747, "step": 23380 }, { "epoch": 0.78, "grad_norm": 0.4259836971759796, "learning_rate": 0.0005058471479922369, "loss": 1.8491, "step": 23381 }, { "epoch": 0.78, "grad_norm": 0.4315144717693329, "learning_rate": 0.0005058395444403254, "loss": 1.7966, "step": 23382 }, { "epoch": 0.78, "grad_norm": 0.43055108189582825, "learning_rate": 0.0005058319406385551, "loss": 1.7963, "step": 23383 }, { "epoch": 0.78, "grad_norm": 0.4256037175655365, "learning_rate": 0.0005058243365869352, "loss": 1.8396, "step": 23384 }, { "epoch": 0.78, "grad_norm": 0.45383214950561523, "learning_rate": 0.0005058167322854747, "loss": 1.8637, "step": 23385 }, { "epoch": 0.78, "grad_norm": 0.4288366138935089, "learning_rate": 0.0005058091277341832, "loss": 1.754, "step": 23386 }, { "epoch": 0.78, "grad_norm": 0.7864153981208801, "learning_rate": 0.0005058015229330697, "loss": 1.8666, "step": 23387 }, { "epoch": 0.78, "grad_norm": 0.4549192786216736, "learning_rate": 0.0005057939178821435, "loss": 1.8687, "step": 23388 }, { "epoch": 0.78, "grad_norm": 0.4433930218219757, "learning_rate": 0.0005057863125814138, "loss": 1.8334, "step": 23389 }, { "epoch": 0.78, "grad_norm": 0.431569904088974, "learning_rate": 0.0005057787070308898, "loss": 1.8592, "step": 23390 }, { "epoch": 0.78, "grad_norm": 0.4329860508441925, "learning_rate": 0.0005057711012305807, "loss": 1.7366, "step": 23391 }, { "epoch": 0.78, "grad_norm": 0.44576209783554077, "learning_rate": 0.000505763495180496, "loss": 1.9014, "step": 23392 }, { "epoch": 0.78, "grad_norm": 0.4384591281414032, "learning_rate": 0.0005057558888806446, "loss": 1.735, "step": 23393 }, { "epoch": 0.78, "grad_norm": 0.42787373065948486, "learning_rate": 0.0005057482823310359, "loss": 1.8555, "step": 23394 }, { "epoch": 0.78, "grad_norm": 0.42695778608322144, "learning_rate": 0.0005057406755316791, "loss": 1.8236, "step": 23395 }, { "epoch": 0.78, "grad_norm": 0.4432990252971649, "learning_rate": 0.0005057330684825834, "loss": 1.8756, "step": 23396 }, { "epoch": 0.78, "grad_norm": 0.4522722661495209, "learning_rate": 0.0005057254611837581, "loss": 1.6983, "step": 23397 }, { "epoch": 0.78, "grad_norm": 0.41630589962005615, "learning_rate": 0.0005057178536352124, "loss": 1.7186, "step": 23398 }, { "epoch": 0.78, "grad_norm": 0.44049033522605896, "learning_rate": 0.0005057102458369554, "loss": 1.8496, "step": 23399 }, { "epoch": 0.78, "grad_norm": 0.45194074511528015, "learning_rate": 0.0005057026377889967, "loss": 1.8987, "step": 23400 }, { "epoch": 0.78, "grad_norm": 0.6551506519317627, "learning_rate": 0.0005056950294913451, "loss": 1.8732, "step": 23401 }, { "epoch": 0.78, "grad_norm": 0.42889949679374695, "learning_rate": 0.0005056874209440102, "loss": 1.787, "step": 23402 }, { "epoch": 0.78, "grad_norm": 0.42427515983581543, "learning_rate": 0.0005056798121470011, "loss": 1.8243, "step": 23403 }, { "epoch": 0.78, "grad_norm": 0.43531280755996704, "learning_rate": 0.0005056722031003269, "loss": 1.8291, "step": 23404 }, { "epoch": 0.78, "grad_norm": 0.4295330047607422, "learning_rate": 0.000505664593803997, "loss": 1.8651, "step": 23405 }, { "epoch": 0.78, "grad_norm": 0.4402754306793213, "learning_rate": 0.0005056569842580206, "loss": 1.8266, "step": 23406 }, { "epoch": 0.78, "grad_norm": 0.4271703064441681, "learning_rate": 0.0005056493744624069, "loss": 1.8869, "step": 23407 }, { "epoch": 0.78, "grad_norm": 0.43971508741378784, "learning_rate": 0.0005056417644171651, "loss": 1.8727, "step": 23408 }, { "epoch": 0.78, "grad_norm": 0.4673473536968231, "learning_rate": 0.0005056341541223046, "loss": 1.814, "step": 23409 }, { "epoch": 0.78, "grad_norm": 0.44007301330566406, "learning_rate": 0.0005056265435778345, "loss": 1.7526, "step": 23410 }, { "epoch": 0.78, "grad_norm": 0.43551385402679443, "learning_rate": 0.0005056189327837641, "loss": 1.8122, "step": 23411 }, { "epoch": 0.78, "grad_norm": 0.4264789819717407, "learning_rate": 0.0005056113217401025, "loss": 1.8424, "step": 23412 }, { "epoch": 0.78, "grad_norm": 0.467805951833725, "learning_rate": 0.0005056037104468592, "loss": 1.8478, "step": 23413 }, { "epoch": 0.78, "grad_norm": 0.4630458950996399, "learning_rate": 0.0005055960989040432, "loss": 1.8086, "step": 23414 }, { "epoch": 0.78, "grad_norm": 0.43722158670425415, "learning_rate": 0.0005055884871116638, "loss": 1.8509, "step": 23415 }, { "epoch": 0.78, "grad_norm": 0.448184996843338, "learning_rate": 0.0005055808750697304, "loss": 1.8592, "step": 23416 }, { "epoch": 0.78, "grad_norm": 0.5950466990470886, "learning_rate": 0.000505573262778252, "loss": 1.8488, "step": 23417 }, { "epoch": 0.78, "grad_norm": 0.44152212142944336, "learning_rate": 0.0005055656502372379, "loss": 1.7618, "step": 23418 }, { "epoch": 0.78, "grad_norm": 0.43124839663505554, "learning_rate": 0.0005055580374466976, "loss": 1.8759, "step": 23419 }, { "epoch": 0.78, "grad_norm": 0.42953383922576904, "learning_rate": 0.0005055504244066399, "loss": 1.8687, "step": 23420 }, { "epoch": 0.78, "grad_norm": 0.46026411652565, "learning_rate": 0.0005055428111170744, "loss": 1.8402, "step": 23421 }, { "epoch": 0.78, "grad_norm": 0.4476511776447296, "learning_rate": 0.0005055351975780101, "loss": 1.9063, "step": 23422 }, { "epoch": 0.78, "grad_norm": 0.45043233036994934, "learning_rate": 0.0005055275837894565, "loss": 1.7037, "step": 23423 }, { "epoch": 0.78, "grad_norm": 0.4466686546802521, "learning_rate": 0.0005055199697514226, "loss": 1.8179, "step": 23424 }, { "epoch": 0.78, "grad_norm": 0.443103164434433, "learning_rate": 0.0005055123554639178, "loss": 1.8367, "step": 23425 }, { "epoch": 0.78, "grad_norm": 0.4665660858154297, "learning_rate": 0.0005055047409269512, "loss": 1.864, "step": 23426 }, { "epoch": 0.78, "grad_norm": 0.4226124584674835, "learning_rate": 0.0005054971261405322, "loss": 1.7919, "step": 23427 }, { "epoch": 0.78, "grad_norm": 0.4395389258861542, "learning_rate": 0.00050548951110467, "loss": 1.8539, "step": 23428 }, { "epoch": 0.78, "grad_norm": 0.45431098341941833, "learning_rate": 0.0005054818958193738, "loss": 1.865, "step": 23429 }, { "epoch": 0.78, "grad_norm": 0.4345165193080902, "learning_rate": 0.0005054742802846528, "loss": 1.8784, "step": 23430 }, { "epoch": 0.78, "grad_norm": 0.42162054777145386, "learning_rate": 0.0005054666645005162, "loss": 1.8027, "step": 23431 }, { "epoch": 0.78, "grad_norm": 0.4409880042076111, "learning_rate": 0.0005054590484669735, "loss": 1.8233, "step": 23432 }, { "epoch": 0.78, "grad_norm": 0.433320552110672, "learning_rate": 0.0005054514321840336, "loss": 1.7291, "step": 23433 }, { "epoch": 0.78, "grad_norm": 0.4325742721557617, "learning_rate": 0.0005054438156517062, "loss": 1.8068, "step": 23434 }, { "epoch": 0.78, "grad_norm": 0.42798924446105957, "learning_rate": 0.0005054361988700002, "loss": 1.7768, "step": 23435 }, { "epoch": 0.78, "grad_norm": 0.43018102645874023, "learning_rate": 0.0005054285818389248, "loss": 1.7999, "step": 23436 }, { "epoch": 0.78, "grad_norm": 0.4432487487792969, "learning_rate": 0.0005054209645584894, "loss": 1.8874, "step": 23437 }, { "epoch": 0.78, "grad_norm": 0.4378264248371124, "learning_rate": 0.0005054133470287033, "loss": 1.8178, "step": 23438 }, { "epoch": 0.78, "grad_norm": 0.44022661447525024, "learning_rate": 0.0005054057292495757, "loss": 1.8444, "step": 23439 }, { "epoch": 0.78, "grad_norm": 0.4298475384712219, "learning_rate": 0.0005053981112211157, "loss": 1.8457, "step": 23440 }, { "epoch": 0.78, "grad_norm": 0.42987895011901855, "learning_rate": 0.0005053904929433326, "loss": 1.8364, "step": 23441 }, { "epoch": 0.78, "grad_norm": 0.4209143817424774, "learning_rate": 0.0005053828744162358, "loss": 1.8263, "step": 23442 }, { "epoch": 0.78, "grad_norm": 0.4342063069343567, "learning_rate": 0.0005053752556398345, "loss": 1.8548, "step": 23443 }, { "epoch": 0.78, "grad_norm": 0.43915891647338867, "learning_rate": 0.0005053676366141379, "loss": 1.7992, "step": 23444 }, { "epoch": 0.78, "grad_norm": 0.45416805148124695, "learning_rate": 0.0005053600173391551, "loss": 1.8187, "step": 23445 }, { "epoch": 0.78, "grad_norm": 0.4397936463356018, "learning_rate": 0.0005053523978148956, "loss": 1.8512, "step": 23446 }, { "epoch": 0.78, "grad_norm": 0.44065821170806885, "learning_rate": 0.0005053447780413685, "loss": 1.8623, "step": 23447 }, { "epoch": 0.78, "grad_norm": 0.4366947114467621, "learning_rate": 0.0005053371580185832, "loss": 1.8285, "step": 23448 }, { "epoch": 0.78, "grad_norm": 0.4499412775039673, "learning_rate": 0.0005053295377465488, "loss": 1.9623, "step": 23449 }, { "epoch": 0.78, "grad_norm": 0.43273890018463135, "learning_rate": 0.0005053219172252746, "loss": 1.8741, "step": 23450 }, { "epoch": 0.78, "grad_norm": 0.43477126955986023, "learning_rate": 0.0005053142964547698, "loss": 1.8563, "step": 23451 }, { "epoch": 0.78, "grad_norm": 0.43613362312316895, "learning_rate": 0.0005053066754350437, "loss": 1.8077, "step": 23452 }, { "epoch": 0.78, "grad_norm": 0.43699613213539124, "learning_rate": 0.0005052990541661056, "loss": 1.7967, "step": 23453 }, { "epoch": 0.78, "grad_norm": 0.4286363422870636, "learning_rate": 0.0005052914326479646, "loss": 1.8015, "step": 23454 }, { "epoch": 0.78, "grad_norm": 0.420427531003952, "learning_rate": 0.0005052838108806301, "loss": 1.7536, "step": 23455 }, { "epoch": 0.78, "grad_norm": 0.41381263732910156, "learning_rate": 0.0005052761888641113, "loss": 1.7937, "step": 23456 }, { "epoch": 0.78, "grad_norm": 0.43542540073394775, "learning_rate": 0.0005052685665984175, "loss": 1.8428, "step": 23457 }, { "epoch": 0.78, "grad_norm": 0.447773814201355, "learning_rate": 0.0005052609440835579, "loss": 1.8185, "step": 23458 }, { "epoch": 0.78, "grad_norm": 0.435412734746933, "learning_rate": 0.0005052533213195417, "loss": 1.8458, "step": 23459 }, { "epoch": 0.78, "grad_norm": 0.45591506361961365, "learning_rate": 0.0005052456983063782, "loss": 1.8814, "step": 23460 }, { "epoch": 0.78, "grad_norm": 0.42740845680236816, "learning_rate": 0.0005052380750440767, "loss": 1.8317, "step": 23461 }, { "epoch": 0.78, "grad_norm": 0.8879558444023132, "learning_rate": 0.0005052304515326464, "loss": 1.8707, "step": 23462 }, { "epoch": 0.78, "grad_norm": 0.4382641017436981, "learning_rate": 0.0005052228277720966, "loss": 1.8671, "step": 23463 }, { "epoch": 0.78, "grad_norm": 0.4193400740623474, "learning_rate": 0.0005052152037624364, "loss": 1.8708, "step": 23464 }, { "epoch": 0.78, "grad_norm": 0.44488295912742615, "learning_rate": 0.0005052075795036754, "loss": 1.878, "step": 23465 }, { "epoch": 0.78, "grad_norm": 0.43535536527633667, "learning_rate": 0.0005051999549958226, "loss": 1.7874, "step": 23466 }, { "epoch": 0.78, "grad_norm": 0.43797680735588074, "learning_rate": 0.0005051923302388871, "loss": 1.861, "step": 23467 }, { "epoch": 0.78, "grad_norm": 0.4268811345100403, "learning_rate": 0.0005051847052328785, "loss": 1.8592, "step": 23468 }, { "epoch": 0.78, "grad_norm": 0.44284406304359436, "learning_rate": 0.0005051770799778059, "loss": 1.7758, "step": 23469 }, { "epoch": 0.78, "grad_norm": 0.4623674750328064, "learning_rate": 0.0005051694544736785, "loss": 1.7906, "step": 23470 }, { "epoch": 0.78, "grad_norm": 0.43743956089019775, "learning_rate": 0.0005051618287205056, "loss": 1.7788, "step": 23471 }, { "epoch": 0.78, "grad_norm": 0.43466681241989136, "learning_rate": 0.0005051542027182965, "loss": 1.9502, "step": 23472 }, { "epoch": 0.78, "grad_norm": 0.4611086845397949, "learning_rate": 0.0005051465764670604, "loss": 1.8572, "step": 23473 }, { "epoch": 0.78, "grad_norm": 0.4427364766597748, "learning_rate": 0.0005051389499668066, "loss": 1.7884, "step": 23474 }, { "epoch": 0.78, "grad_norm": 0.4372059404850006, "learning_rate": 0.0005051313232175444, "loss": 1.819, "step": 23475 }, { "epoch": 0.78, "grad_norm": 0.4300118088722229, "learning_rate": 0.000505123696219283, "loss": 1.8015, "step": 23476 }, { "epoch": 0.78, "grad_norm": 0.4376213848590851, "learning_rate": 0.0005051160689720317, "loss": 1.8017, "step": 23477 }, { "epoch": 0.78, "grad_norm": 0.44048985838890076, "learning_rate": 0.0005051084414757995, "loss": 1.9111, "step": 23478 }, { "epoch": 0.78, "grad_norm": 0.42843541502952576, "learning_rate": 0.000505100813730596, "loss": 1.8146, "step": 23479 }, { "epoch": 0.78, "grad_norm": 0.44083133339881897, "learning_rate": 0.0005050931857364302, "loss": 1.7517, "step": 23480 }, { "epoch": 0.78, "grad_norm": 0.4383629262447357, "learning_rate": 0.0005050855574933117, "loss": 1.9132, "step": 23481 }, { "epoch": 0.78, "grad_norm": 0.45609965920448303, "learning_rate": 0.0005050779290012496, "loss": 1.7683, "step": 23482 }, { "epoch": 0.78, "grad_norm": 0.4401324689388275, "learning_rate": 0.0005050703002602529, "loss": 1.876, "step": 23483 }, { "epoch": 0.78, "grad_norm": 0.4238940477371216, "learning_rate": 0.0005050626712703311, "loss": 1.8208, "step": 23484 }, { "epoch": 0.78, "grad_norm": 0.4322647452354431, "learning_rate": 0.0005050550420314934, "loss": 1.7856, "step": 23485 }, { "epoch": 0.78, "grad_norm": 0.4357215166091919, "learning_rate": 0.0005050474125437493, "loss": 1.7863, "step": 23486 }, { "epoch": 0.78, "grad_norm": 0.4279349148273468, "learning_rate": 0.0005050397828071076, "loss": 1.7945, "step": 23487 }, { "epoch": 0.78, "grad_norm": 0.532750129699707, "learning_rate": 0.0005050321528215781, "loss": 1.883, "step": 23488 }, { "epoch": 0.78, "grad_norm": 0.43092310428619385, "learning_rate": 0.0005050245225871696, "loss": 1.8722, "step": 23489 }, { "epoch": 0.78, "grad_norm": 0.43298113346099854, "learning_rate": 0.0005050168921038916, "loss": 1.8577, "step": 23490 }, { "epoch": 0.78, "grad_norm": 0.44570493698120117, "learning_rate": 0.0005050092613717532, "loss": 1.8074, "step": 23491 }, { "epoch": 0.78, "grad_norm": 0.45594751834869385, "learning_rate": 0.0005050016303907637, "loss": 1.8601, "step": 23492 }, { "epoch": 0.78, "grad_norm": 0.4309964179992676, "learning_rate": 0.0005049939991609326, "loss": 1.8361, "step": 23493 }, { "epoch": 0.78, "grad_norm": 0.4306660592556, "learning_rate": 0.000504986367682269, "loss": 1.8515, "step": 23494 }, { "epoch": 0.78, "grad_norm": 0.43709251284599304, "learning_rate": 0.000504978735954782, "loss": 1.7507, "step": 23495 }, { "epoch": 0.78, "grad_norm": 0.4578756093978882, "learning_rate": 0.0005049711039784811, "loss": 1.8368, "step": 23496 }, { "epoch": 0.78, "grad_norm": 0.4325082302093506, "learning_rate": 0.0005049634717533755, "loss": 1.8108, "step": 23497 }, { "epoch": 0.78, "grad_norm": 0.4372955560684204, "learning_rate": 0.0005049558392794744, "loss": 1.7576, "step": 23498 }, { "epoch": 0.78, "grad_norm": 0.44402655959129333, "learning_rate": 0.0005049482065567873, "loss": 1.8544, "step": 23499 }, { "epoch": 0.78, "grad_norm": 0.46503588557243347, "learning_rate": 0.000504940573585323, "loss": 1.7923, "step": 23500 }, { "epoch": 0.78, "grad_norm": 0.4497211277484894, "learning_rate": 0.0005049329403650913, "loss": 1.8511, "step": 23501 }, { "epoch": 0.78, "grad_norm": 0.4471878409385681, "learning_rate": 0.0005049253068961009, "loss": 1.8534, "step": 23502 }, { "epoch": 0.78, "grad_norm": 0.43934595584869385, "learning_rate": 0.0005049176731783616, "loss": 1.8499, "step": 23503 }, { "epoch": 0.78, "grad_norm": 0.40582191944122314, "learning_rate": 0.0005049100392118823, "loss": 1.7972, "step": 23504 }, { "epoch": 0.78, "grad_norm": 0.42214539647102356, "learning_rate": 0.0005049024049966725, "loss": 1.8619, "step": 23505 }, { "epoch": 0.78, "grad_norm": 0.43928220868110657, "learning_rate": 0.0005048947705327414, "loss": 1.8572, "step": 23506 }, { "epoch": 0.78, "grad_norm": 0.43544304370880127, "learning_rate": 0.0005048871358200982, "loss": 1.8321, "step": 23507 }, { "epoch": 0.78, "grad_norm": 0.43221354484558105, "learning_rate": 0.0005048795008587522, "loss": 1.9116, "step": 23508 }, { "epoch": 0.78, "grad_norm": 0.44491147994995117, "learning_rate": 0.0005048718656487127, "loss": 1.7431, "step": 23509 }, { "epoch": 0.78, "grad_norm": 0.4363889992237091, "learning_rate": 0.0005048642301899889, "loss": 1.8441, "step": 23510 }, { "epoch": 0.78, "grad_norm": 0.42332589626312256, "learning_rate": 0.0005048565944825901, "loss": 1.7882, "step": 23511 }, { "epoch": 0.78, "grad_norm": 0.4382871687412262, "learning_rate": 0.0005048489585265255, "loss": 1.8403, "step": 23512 }, { "epoch": 0.78, "grad_norm": 0.4145738482475281, "learning_rate": 0.0005048413223218047, "loss": 1.7604, "step": 23513 }, { "epoch": 0.78, "grad_norm": 0.44033849239349365, "learning_rate": 0.0005048336858684365, "loss": 1.8365, "step": 23514 }, { "epoch": 0.78, "grad_norm": 0.4152756631374359, "learning_rate": 0.0005048260491664304, "loss": 1.8434, "step": 23515 }, { "epoch": 0.78, "grad_norm": 0.4330012798309326, "learning_rate": 0.0005048184122157958, "loss": 1.8199, "step": 23516 }, { "epoch": 0.78, "grad_norm": 0.4330046474933624, "learning_rate": 0.0005048107750165416, "loss": 1.8197, "step": 23517 }, { "epoch": 0.78, "grad_norm": 0.45101162791252136, "learning_rate": 0.0005048031375686774, "loss": 1.8111, "step": 23518 }, { "epoch": 0.78, "grad_norm": 0.4273817241191864, "learning_rate": 0.0005047954998722125, "loss": 1.8481, "step": 23519 }, { "epoch": 0.78, "grad_norm": 0.41893747448921204, "learning_rate": 0.0005047878619271559, "loss": 1.8095, "step": 23520 }, { "epoch": 0.78, "grad_norm": 0.4382643699645996, "learning_rate": 0.0005047802237335171, "loss": 1.7573, "step": 23521 }, { "epoch": 0.78, "grad_norm": 0.44343292713165283, "learning_rate": 0.0005047725852913051, "loss": 1.7755, "step": 23522 }, { "epoch": 0.78, "grad_norm": 0.41867804527282715, "learning_rate": 0.0005047649466005295, "loss": 1.8, "step": 23523 }, { "epoch": 0.78, "grad_norm": 0.44029107689857483, "learning_rate": 0.0005047573076611995, "loss": 1.807, "step": 23524 }, { "epoch": 0.78, "grad_norm": 0.42551282048225403, "learning_rate": 0.0005047496684733242, "loss": 1.7486, "step": 23525 }, { "epoch": 0.78, "grad_norm": 0.43735405802726746, "learning_rate": 0.0005047420290369129, "loss": 1.77, "step": 23526 }, { "epoch": 0.78, "grad_norm": 0.4342438280582428, "learning_rate": 0.000504734389351975, "loss": 1.7546, "step": 23527 }, { "epoch": 0.78, "grad_norm": 0.4320845901966095, "learning_rate": 0.0005047267494185197, "loss": 1.8172, "step": 23528 }, { "epoch": 0.78, "grad_norm": 0.4346599578857422, "learning_rate": 0.0005047191092365564, "loss": 1.8701, "step": 23529 }, { "epoch": 0.78, "grad_norm": 0.4127534329891205, "learning_rate": 0.0005047114688060941, "loss": 1.7875, "step": 23530 }, { "epoch": 0.78, "grad_norm": 0.43208611011505127, "learning_rate": 0.0005047038281271424, "loss": 1.8178, "step": 23531 }, { "epoch": 0.78, "grad_norm": 0.4261557161808014, "learning_rate": 0.0005046961871997103, "loss": 1.7978, "step": 23532 }, { "epoch": 0.78, "grad_norm": 1.0272691249847412, "learning_rate": 0.0005046885460238072, "loss": 1.8015, "step": 23533 }, { "epoch": 0.78, "grad_norm": 0.43561115860939026, "learning_rate": 0.0005046809045994425, "loss": 1.7848, "step": 23534 }, { "epoch": 0.78, "grad_norm": 0.45550429821014404, "learning_rate": 0.0005046732629266251, "loss": 1.8547, "step": 23535 }, { "epoch": 0.78, "grad_norm": 0.43760862946510315, "learning_rate": 0.0005046656210053648, "loss": 1.8181, "step": 23536 }, { "epoch": 0.78, "grad_norm": 0.43561631441116333, "learning_rate": 0.0005046579788356704, "loss": 1.8372, "step": 23537 }, { "epoch": 0.78, "grad_norm": 0.42032769322395325, "learning_rate": 0.0005046503364175514, "loss": 1.7922, "step": 23538 }, { "epoch": 0.78, "grad_norm": 0.4415300190448761, "learning_rate": 0.0005046426937510172, "loss": 1.7973, "step": 23539 }, { "epoch": 0.78, "grad_norm": 0.4513537585735321, "learning_rate": 0.0005046350508360767, "loss": 1.777, "step": 23540 }, { "epoch": 0.78, "grad_norm": 0.44224458932876587, "learning_rate": 0.0005046274076727394, "loss": 1.8329, "step": 23541 }, { "epoch": 0.78, "grad_norm": 0.43807533383369446, "learning_rate": 0.0005046197642610147, "loss": 1.9334, "step": 23542 }, { "epoch": 0.78, "grad_norm": 0.44342944025993347, "learning_rate": 0.0005046121206009116, "loss": 1.9347, "step": 23543 }, { "epoch": 0.78, "grad_norm": 0.4333099126815796, "learning_rate": 0.0005046044766924397, "loss": 1.7662, "step": 23544 }, { "epoch": 0.78, "grad_norm": 0.44802945852279663, "learning_rate": 0.0005045968325356081, "loss": 1.9247, "step": 23545 }, { "epoch": 0.78, "grad_norm": 0.43717584013938904, "learning_rate": 0.000504589188130426, "loss": 1.8199, "step": 23546 }, { "epoch": 0.78, "grad_norm": 0.4524165987968445, "learning_rate": 0.0005045815434769028, "loss": 1.8678, "step": 23547 }, { "epoch": 0.78, "grad_norm": 0.4278081953525543, "learning_rate": 0.0005045738985750478, "loss": 1.8464, "step": 23548 }, { "epoch": 0.78, "grad_norm": 0.42924928665161133, "learning_rate": 0.0005045662534248703, "loss": 1.8141, "step": 23549 }, { "epoch": 0.78, "grad_norm": 0.4362591505050659, "learning_rate": 0.0005045586080263794, "loss": 1.8925, "step": 23550 }, { "epoch": 0.78, "grad_norm": 0.43865638971328735, "learning_rate": 0.0005045509623795844, "loss": 1.8267, "step": 23551 }, { "epoch": 0.78, "grad_norm": 0.45739760994911194, "learning_rate": 0.0005045433164844948, "loss": 1.7587, "step": 23552 }, { "epoch": 0.78, "grad_norm": 0.44745001196861267, "learning_rate": 0.0005045356703411197, "loss": 1.8043, "step": 23553 }, { "epoch": 0.78, "grad_norm": 0.4400891661643982, "learning_rate": 0.0005045280239494686, "loss": 1.8366, "step": 23554 }, { "epoch": 0.78, "grad_norm": 0.429321825504303, "learning_rate": 0.0005045203773095503, "loss": 1.861, "step": 23555 }, { "epoch": 0.78, "grad_norm": 0.42544078826904297, "learning_rate": 0.0005045127304213746, "loss": 1.8174, "step": 23556 }, { "epoch": 0.78, "grad_norm": 0.44584032893180847, "learning_rate": 0.0005045050832849506, "loss": 1.8173, "step": 23557 }, { "epoch": 0.78, "grad_norm": 0.44592440128326416, "learning_rate": 0.0005044974359002876, "loss": 1.8369, "step": 23558 }, { "epoch": 0.78, "grad_norm": 0.44519326090812683, "learning_rate": 0.0005044897882673947, "loss": 1.8222, "step": 23559 }, { "epoch": 0.78, "grad_norm": 0.44475874304771423, "learning_rate": 0.0005044821403862814, "loss": 1.834, "step": 23560 }, { "epoch": 0.78, "grad_norm": 0.42595216631889343, "learning_rate": 0.0005044744922569569, "loss": 1.813, "step": 23561 }, { "epoch": 0.78, "grad_norm": 0.4390806257724762, "learning_rate": 0.0005044668438794304, "loss": 1.7936, "step": 23562 }, { "epoch": 0.78, "grad_norm": 0.43698883056640625, "learning_rate": 0.0005044591952537114, "loss": 1.8059, "step": 23563 }, { "epoch": 0.78, "grad_norm": 0.4570315480232239, "learning_rate": 0.0005044515463798091, "loss": 1.8553, "step": 23564 }, { "epoch": 0.78, "grad_norm": 0.4417436420917511, "learning_rate": 0.0005044438972577326, "loss": 1.8748, "step": 23565 }, { "epoch": 0.78, "grad_norm": 0.44640064239501953, "learning_rate": 0.0005044362478874914, "loss": 1.8555, "step": 23566 }, { "epoch": 0.78, "grad_norm": 0.42312365770339966, "learning_rate": 0.0005044285982690947, "loss": 1.8072, "step": 23567 }, { "epoch": 0.78, "grad_norm": 0.4335636794567108, "learning_rate": 0.0005044209484025518, "loss": 1.8547, "step": 23568 }, { "epoch": 0.78, "grad_norm": 0.42469748854637146, "learning_rate": 0.000504413298287872, "loss": 1.831, "step": 23569 }, { "epoch": 0.78, "grad_norm": 0.43633443117141724, "learning_rate": 0.0005044056479250647, "loss": 1.8511, "step": 23570 }, { "epoch": 0.78, "grad_norm": 0.4252776801586151, "learning_rate": 0.0005043979973141389, "loss": 1.847, "step": 23571 }, { "epoch": 0.78, "grad_norm": 0.44186192750930786, "learning_rate": 0.0005043903464551041, "loss": 1.8017, "step": 23572 }, { "epoch": 0.78, "grad_norm": 0.434933602809906, "learning_rate": 0.0005043826953479695, "loss": 1.8182, "step": 23573 }, { "epoch": 0.78, "grad_norm": 0.426757276058197, "learning_rate": 0.0005043750439927444, "loss": 1.7509, "step": 23574 }, { "epoch": 0.78, "grad_norm": 0.4647325575351715, "learning_rate": 0.0005043673923894382, "loss": 1.8832, "step": 23575 }, { "epoch": 0.78, "grad_norm": 0.45417627692222595, "learning_rate": 0.0005043597405380601, "loss": 1.8827, "step": 23576 }, { "epoch": 0.78, "grad_norm": 0.4528251588344574, "learning_rate": 0.0005043520884386193, "loss": 1.8339, "step": 23577 }, { "epoch": 0.78, "grad_norm": 0.4232577383518219, "learning_rate": 0.0005043444360911252, "loss": 1.8044, "step": 23578 }, { "epoch": 0.78, "grad_norm": 0.4350047707557678, "learning_rate": 0.0005043367834955872, "loss": 1.7866, "step": 23579 }, { "epoch": 0.78, "grad_norm": 0.44101986289024353, "learning_rate": 0.0005043291306520142, "loss": 1.827, "step": 23580 }, { "epoch": 0.78, "grad_norm": 0.42759954929351807, "learning_rate": 0.000504321477560416, "loss": 1.8359, "step": 23581 }, { "epoch": 0.78, "grad_norm": 0.4506329894065857, "learning_rate": 0.0005043138242208015, "loss": 1.8121, "step": 23582 }, { "epoch": 0.78, "grad_norm": 0.4362208843231201, "learning_rate": 0.0005043061706331802, "loss": 1.8582, "step": 23583 }, { "epoch": 0.78, "grad_norm": 0.44020506739616394, "learning_rate": 0.0005042985167975612, "loss": 1.8762, "step": 23584 }, { "epoch": 0.78, "grad_norm": 0.4345492124557495, "learning_rate": 0.0005042908627139539, "loss": 1.8446, "step": 23585 }, { "epoch": 0.78, "grad_norm": 0.42256850004196167, "learning_rate": 0.0005042832083823676, "loss": 1.7902, "step": 23586 }, { "epoch": 0.78, "grad_norm": 0.4274963438510895, "learning_rate": 0.0005042755538028117, "loss": 1.6871, "step": 23587 }, { "epoch": 0.78, "grad_norm": 0.44593432545661926, "learning_rate": 0.0005042678989752953, "loss": 1.7878, "step": 23588 }, { "epoch": 0.78, "grad_norm": 0.45381438732147217, "learning_rate": 0.0005042602438998278, "loss": 1.8254, "step": 23589 }, { "epoch": 0.78, "grad_norm": 0.4323441684246063, "learning_rate": 0.0005042525885764184, "loss": 1.8143, "step": 23590 }, { "epoch": 0.78, "grad_norm": 0.4304811358451843, "learning_rate": 0.0005042449330050763, "loss": 1.84, "step": 23591 }, { "epoch": 0.78, "grad_norm": 0.44488275051116943, "learning_rate": 0.0005042372771858112, "loss": 1.8341, "step": 23592 }, { "epoch": 0.78, "grad_norm": 0.40918949246406555, "learning_rate": 0.0005042296211186321, "loss": 1.7694, "step": 23593 }, { "epoch": 0.78, "grad_norm": 0.4386848509311676, "learning_rate": 0.0005042219648035484, "loss": 1.8865, "step": 23594 }, { "epoch": 0.79, "grad_norm": 0.4383961260318756, "learning_rate": 0.0005042143082405691, "loss": 1.8223, "step": 23595 }, { "epoch": 0.79, "grad_norm": 0.4296896755695343, "learning_rate": 0.0005042066514297039, "loss": 1.8701, "step": 23596 }, { "epoch": 0.79, "grad_norm": 0.4478153884410858, "learning_rate": 0.0005041989943709618, "loss": 1.856, "step": 23597 }, { "epoch": 0.79, "grad_norm": 0.47394007444381714, "learning_rate": 0.0005041913370643523, "loss": 1.849, "step": 23598 }, { "epoch": 0.79, "grad_norm": 0.44616109132766724, "learning_rate": 0.0005041836795098846, "loss": 1.7667, "step": 23599 }, { "epoch": 0.79, "grad_norm": 0.43640291690826416, "learning_rate": 0.000504176021707568, "loss": 1.8242, "step": 23600 }, { "epoch": 0.79, "grad_norm": 0.45690444111824036, "learning_rate": 0.0005041683636574117, "loss": 1.8764, "step": 23601 }, { "epoch": 0.79, "grad_norm": 0.430739164352417, "learning_rate": 0.0005041607053594251, "loss": 1.7765, "step": 23602 }, { "epoch": 0.79, "grad_norm": 0.4318269193172455, "learning_rate": 0.0005041530468136176, "loss": 1.8835, "step": 23603 }, { "epoch": 0.79, "grad_norm": 0.42232516407966614, "learning_rate": 0.0005041453880199983, "loss": 1.8108, "step": 23604 }, { "epoch": 0.79, "grad_norm": 0.41954129934310913, "learning_rate": 0.0005041377289785766, "loss": 1.7705, "step": 23605 }, { "epoch": 0.79, "grad_norm": 0.44449517130851746, "learning_rate": 0.0005041300696893618, "loss": 1.7645, "step": 23606 }, { "epoch": 0.79, "grad_norm": 0.4264243543148041, "learning_rate": 0.0005041224101523631, "loss": 1.8569, "step": 23607 }, { "epoch": 0.79, "grad_norm": 0.42635247111320496, "learning_rate": 0.00050411475036759, "loss": 1.8415, "step": 23608 }, { "epoch": 0.79, "grad_norm": 0.45240476727485657, "learning_rate": 0.0005041070903350515, "loss": 1.8161, "step": 23609 }, { "epoch": 0.79, "grad_norm": 0.4246039390563965, "learning_rate": 0.0005040994300547571, "loss": 1.8404, "step": 23610 }, { "epoch": 0.79, "grad_norm": 0.4376929998397827, "learning_rate": 0.0005040917695267162, "loss": 1.8744, "step": 23611 }, { "epoch": 0.79, "grad_norm": 0.43773332238197327, "learning_rate": 0.0005040841087509379, "loss": 1.8013, "step": 23612 }, { "epoch": 0.79, "grad_norm": 0.4356006383895874, "learning_rate": 0.0005040764477274315, "loss": 1.8686, "step": 23613 }, { "epoch": 0.79, "grad_norm": 0.4383465051651001, "learning_rate": 0.0005040687864562064, "loss": 1.7763, "step": 23614 }, { "epoch": 0.79, "grad_norm": 0.4375729560852051, "learning_rate": 0.0005040611249372719, "loss": 1.8328, "step": 23615 }, { "epoch": 0.79, "grad_norm": 0.434509813785553, "learning_rate": 0.0005040534631706372, "loss": 1.8219, "step": 23616 }, { "epoch": 0.79, "grad_norm": 0.43744921684265137, "learning_rate": 0.0005040458011563117, "loss": 1.817, "step": 23617 }, { "epoch": 0.79, "grad_norm": 0.4437158405780792, "learning_rate": 0.0005040381388943045, "loss": 1.8098, "step": 23618 }, { "epoch": 0.79, "grad_norm": 0.43364545702934265, "learning_rate": 0.0005040304763846253, "loss": 1.8754, "step": 23619 }, { "epoch": 0.79, "grad_norm": 0.42912977933883667, "learning_rate": 0.0005040228136272831, "loss": 1.8161, "step": 23620 }, { "epoch": 0.79, "grad_norm": 0.42078664898872375, "learning_rate": 0.0005040151506222872, "loss": 1.8308, "step": 23621 }, { "epoch": 0.79, "grad_norm": 0.4235309362411499, "learning_rate": 0.000504007487369647, "loss": 1.7624, "step": 23622 }, { "epoch": 0.79, "grad_norm": 0.4445638656616211, "learning_rate": 0.0005039998238693718, "loss": 1.8736, "step": 23623 }, { "epoch": 0.79, "grad_norm": 0.44210782647132874, "learning_rate": 0.0005039921601214709, "loss": 1.7953, "step": 23624 }, { "epoch": 0.79, "grad_norm": 0.43291157484054565, "learning_rate": 0.0005039844961259534, "loss": 1.8428, "step": 23625 }, { "epoch": 0.79, "grad_norm": 0.4252338111400604, "learning_rate": 0.000503976831882829, "loss": 1.7959, "step": 23626 }, { "epoch": 0.79, "grad_norm": 0.43810492753982544, "learning_rate": 0.0005039691673921066, "loss": 1.8026, "step": 23627 }, { "epoch": 0.79, "grad_norm": 0.426733136177063, "learning_rate": 0.0005039615026537958, "loss": 1.7946, "step": 23628 }, { "epoch": 0.79, "grad_norm": 0.4471312463283539, "learning_rate": 0.0005039538376679058, "loss": 1.8129, "step": 23629 }, { "epoch": 0.79, "grad_norm": 0.42990460991859436, "learning_rate": 0.0005039461724344459, "loss": 1.8013, "step": 23630 }, { "epoch": 0.79, "grad_norm": 0.4202376902103424, "learning_rate": 0.0005039385069534252, "loss": 1.8109, "step": 23631 }, { "epoch": 0.79, "grad_norm": 0.4460245668888092, "learning_rate": 0.0005039308412248534, "loss": 1.7859, "step": 23632 }, { "epoch": 0.79, "grad_norm": 0.4415507912635803, "learning_rate": 0.0005039231752487395, "loss": 1.8216, "step": 23633 }, { "epoch": 0.79, "grad_norm": 0.42385604977607727, "learning_rate": 0.000503915509025093, "loss": 1.853, "step": 23634 }, { "epoch": 0.79, "grad_norm": 0.4530341923236847, "learning_rate": 0.0005039078425539231, "loss": 1.8396, "step": 23635 }, { "epoch": 0.79, "grad_norm": 0.44416147470474243, "learning_rate": 0.000503900175835239, "loss": 1.8264, "step": 23636 }, { "epoch": 0.79, "grad_norm": 0.44025367498397827, "learning_rate": 0.0005038925088690503, "loss": 1.8006, "step": 23637 }, { "epoch": 0.79, "grad_norm": 0.4295850396156311, "learning_rate": 0.0005038848416553661, "loss": 1.8259, "step": 23638 }, { "epoch": 0.79, "grad_norm": 0.45036378502845764, "learning_rate": 0.0005038771741941956, "loss": 1.8708, "step": 23639 }, { "epoch": 0.79, "grad_norm": 0.43352967500686646, "learning_rate": 0.0005038695064855485, "loss": 1.8031, "step": 23640 }, { "epoch": 0.79, "grad_norm": 0.44193756580352783, "learning_rate": 0.0005038618385294337, "loss": 1.8762, "step": 23641 }, { "epoch": 0.79, "grad_norm": 0.4201461374759674, "learning_rate": 0.0005038541703258606, "loss": 1.817, "step": 23642 }, { "epoch": 0.79, "grad_norm": 0.44597765803337097, "learning_rate": 0.0005038465018748386, "loss": 1.8718, "step": 23643 }, { "epoch": 0.79, "grad_norm": 0.4374120235443115, "learning_rate": 0.0005038388331763771, "loss": 1.7671, "step": 23644 }, { "epoch": 0.79, "grad_norm": 0.42375850677490234, "learning_rate": 0.0005038311642304853, "loss": 1.7617, "step": 23645 }, { "epoch": 0.79, "grad_norm": 0.45679691433906555, "learning_rate": 0.0005038234950371724, "loss": 1.7885, "step": 23646 }, { "epoch": 0.79, "grad_norm": 0.4126129448413849, "learning_rate": 0.0005038158255964479, "loss": 1.8141, "step": 23647 }, { "epoch": 0.79, "grad_norm": 0.44728702306747437, "learning_rate": 0.0005038081559083209, "loss": 1.7918, "step": 23648 }, { "epoch": 0.79, "grad_norm": 0.4436434209346771, "learning_rate": 0.0005038004859728008, "loss": 1.8224, "step": 23649 }, { "epoch": 0.79, "grad_norm": 0.46345841884613037, "learning_rate": 0.0005037928157898971, "loss": 1.8148, "step": 23650 }, { "epoch": 0.79, "grad_norm": 0.4331457018852234, "learning_rate": 0.000503785145359619, "loss": 1.9124, "step": 23651 }, { "epoch": 0.79, "grad_norm": 0.43083277344703674, "learning_rate": 0.0005037774746819755, "loss": 1.8052, "step": 23652 }, { "epoch": 0.79, "grad_norm": 0.4405635595321655, "learning_rate": 0.0005037698037569763, "loss": 1.8136, "step": 23653 }, { "epoch": 0.79, "grad_norm": 0.4454600214958191, "learning_rate": 0.0005037621325846306, "loss": 1.7678, "step": 23654 }, { "epoch": 0.79, "grad_norm": 0.4470149874687195, "learning_rate": 0.0005037544611649477, "loss": 1.8416, "step": 23655 }, { "epoch": 0.79, "grad_norm": 0.44198301434516907, "learning_rate": 0.000503746789497937, "loss": 1.8212, "step": 23656 }, { "epoch": 0.79, "grad_norm": 0.4429284334182739, "learning_rate": 0.0005037391175836076, "loss": 1.8293, "step": 23657 }, { "epoch": 0.79, "grad_norm": 0.4355638325214386, "learning_rate": 0.0005037314454219688, "loss": 1.8673, "step": 23658 }, { "epoch": 0.79, "grad_norm": 0.4581352472305298, "learning_rate": 0.0005037237730130302, "loss": 1.7894, "step": 23659 }, { "epoch": 0.79, "grad_norm": 0.43616870045661926, "learning_rate": 0.000503716100356801, "loss": 1.8469, "step": 23660 }, { "epoch": 0.79, "grad_norm": 0.43377819657325745, "learning_rate": 0.0005037084274532904, "loss": 1.8514, "step": 23661 }, { "epoch": 0.79, "grad_norm": 0.43438270688056946, "learning_rate": 0.0005037007543025078, "loss": 1.9032, "step": 23662 }, { "epoch": 0.79, "grad_norm": 0.47876113653182983, "learning_rate": 0.0005036930809044624, "loss": 1.8212, "step": 23663 }, { "epoch": 0.79, "grad_norm": 0.43592599034309387, "learning_rate": 0.0005036854072591637, "loss": 1.7714, "step": 23664 }, { "epoch": 0.79, "grad_norm": 0.46205055713653564, "learning_rate": 0.000503677733366621, "loss": 1.8003, "step": 23665 }, { "epoch": 0.79, "grad_norm": 0.43765825033187866, "learning_rate": 0.0005036700592268435, "loss": 1.8136, "step": 23666 }, { "epoch": 0.79, "grad_norm": 0.41782158613204956, "learning_rate": 0.0005036623848398404, "loss": 1.892, "step": 23667 }, { "epoch": 0.79, "grad_norm": 0.43167218565940857, "learning_rate": 0.0005036547102056214, "loss": 1.8098, "step": 23668 }, { "epoch": 0.79, "grad_norm": 0.43905726075172424, "learning_rate": 0.0005036470353241955, "loss": 1.7916, "step": 23669 }, { "epoch": 0.79, "grad_norm": 0.45202386379241943, "learning_rate": 0.0005036393601955721, "loss": 1.8137, "step": 23670 }, { "epoch": 0.79, "grad_norm": 0.4387354254722595, "learning_rate": 0.0005036316848197605, "loss": 1.7707, "step": 23671 }, { "epoch": 0.79, "grad_norm": 0.43895426392555237, "learning_rate": 0.0005036240091967701, "loss": 1.9647, "step": 23672 }, { "epoch": 0.79, "grad_norm": 0.4312973618507385, "learning_rate": 0.00050361633332661, "loss": 1.7903, "step": 23673 }, { "epoch": 0.79, "grad_norm": 0.44526997208595276, "learning_rate": 0.0005036086572092898, "loss": 1.9063, "step": 23674 }, { "epoch": 0.79, "grad_norm": 0.4204085171222687, "learning_rate": 0.0005036009808448188, "loss": 1.8081, "step": 23675 }, { "epoch": 0.79, "grad_norm": 0.42301037907600403, "learning_rate": 0.000503593304233206, "loss": 1.7586, "step": 23676 }, { "epoch": 0.79, "grad_norm": 0.6636647582054138, "learning_rate": 0.000503585627374461, "loss": 1.7556, "step": 23677 }, { "epoch": 0.79, "grad_norm": 0.4390425682067871, "learning_rate": 0.0005035779502685932, "loss": 1.7866, "step": 23678 }, { "epoch": 0.79, "grad_norm": 0.43692150712013245, "learning_rate": 0.0005035702729156115, "loss": 1.8582, "step": 23679 }, { "epoch": 0.79, "grad_norm": 0.434419721364975, "learning_rate": 0.0005035625953155256, "loss": 1.7952, "step": 23680 }, { "epoch": 0.79, "grad_norm": 0.43066564202308655, "learning_rate": 0.0005035549174683448, "loss": 1.69, "step": 23681 }, { "epoch": 0.79, "grad_norm": 0.4367597699165344, "learning_rate": 0.0005035472393740781, "loss": 1.7876, "step": 23682 }, { "epoch": 0.79, "grad_norm": 0.4344409108161926, "learning_rate": 0.0005035395610327352, "loss": 1.752, "step": 23683 }, { "epoch": 0.79, "grad_norm": 0.4265711009502411, "learning_rate": 0.0005035318824443252, "loss": 1.8288, "step": 23684 }, { "epoch": 0.79, "grad_norm": 0.42591315507888794, "learning_rate": 0.0005035242036088576, "loss": 1.8145, "step": 23685 }, { "epoch": 0.79, "grad_norm": 0.425334095954895, "learning_rate": 0.0005035165245263415, "loss": 1.8175, "step": 23686 }, { "epoch": 0.79, "grad_norm": 0.42562633752822876, "learning_rate": 0.0005035088451967863, "loss": 1.859, "step": 23687 }, { "epoch": 0.79, "grad_norm": 0.4353838860988617, "learning_rate": 0.0005035011656202015, "loss": 1.8125, "step": 23688 }, { "epoch": 0.79, "grad_norm": 0.4308969974517822, "learning_rate": 0.000503493485796596, "loss": 1.816, "step": 23689 }, { "epoch": 0.79, "grad_norm": 0.43968138098716736, "learning_rate": 0.0005034858057259797, "loss": 1.7577, "step": 23690 }, { "epoch": 0.79, "grad_norm": 0.4332529902458191, "learning_rate": 0.0005034781254083615, "loss": 1.9012, "step": 23691 }, { "epoch": 0.79, "grad_norm": 0.42215609550476074, "learning_rate": 0.0005034704448437508, "loss": 1.8501, "step": 23692 }, { "epoch": 0.79, "grad_norm": 0.5173299908638, "learning_rate": 0.000503462764032157, "loss": 1.855, "step": 23693 }, { "epoch": 0.79, "grad_norm": 0.4430263936519623, "learning_rate": 0.0005034550829735892, "loss": 1.867, "step": 23694 }, { "epoch": 0.79, "grad_norm": 0.43974629044532776, "learning_rate": 0.0005034474016680572, "loss": 1.8295, "step": 23695 }, { "epoch": 0.79, "grad_norm": 0.4499370753765106, "learning_rate": 0.0005034397201155699, "loss": 1.8683, "step": 23696 }, { "epoch": 0.79, "grad_norm": 0.4383259415626526, "learning_rate": 0.0005034320383161367, "loss": 1.8605, "step": 23697 }, { "epoch": 0.79, "grad_norm": 0.4470852315425873, "learning_rate": 0.000503424356269767, "loss": 1.7917, "step": 23698 }, { "epoch": 0.79, "grad_norm": 0.46130216121673584, "learning_rate": 0.0005034166739764701, "loss": 1.8577, "step": 23699 }, { "epoch": 0.79, "grad_norm": 0.4207781255245209, "learning_rate": 0.0005034089914362554, "loss": 1.8442, "step": 23700 }, { "epoch": 0.79, "grad_norm": 0.4443817734718323, "learning_rate": 0.0005034013086491321, "loss": 1.8419, "step": 23701 }, { "epoch": 0.79, "grad_norm": 0.42289555072784424, "learning_rate": 0.0005033936256151096, "loss": 1.8781, "step": 23702 }, { "epoch": 0.79, "grad_norm": 0.43510302901268005, "learning_rate": 0.0005033859423341972, "loss": 1.86, "step": 23703 }, { "epoch": 0.79, "grad_norm": 0.42394956946372986, "learning_rate": 0.0005033782588064042, "loss": 1.8367, "step": 23704 }, { "epoch": 0.79, "grad_norm": 0.4332229197025299, "learning_rate": 0.0005033705750317399, "loss": 1.8359, "step": 23705 }, { "epoch": 0.79, "grad_norm": 0.44358137249946594, "learning_rate": 0.0005033628910102139, "loss": 1.917, "step": 23706 }, { "epoch": 0.79, "grad_norm": 0.4441671073436737, "learning_rate": 0.0005033552067418352, "loss": 1.8274, "step": 23707 }, { "epoch": 0.79, "grad_norm": 0.4248586893081665, "learning_rate": 0.0005033475222266132, "loss": 1.7894, "step": 23708 }, { "epoch": 0.79, "grad_norm": 0.45146626234054565, "learning_rate": 0.0005033398374645573, "loss": 1.8403, "step": 23709 }, { "epoch": 0.79, "grad_norm": 0.44357937574386597, "learning_rate": 0.0005033321524556769, "loss": 1.8147, "step": 23710 }, { "epoch": 0.79, "grad_norm": 0.41526487469673157, "learning_rate": 0.000503324467199981, "loss": 1.8121, "step": 23711 }, { "epoch": 0.79, "grad_norm": 0.4304639995098114, "learning_rate": 0.0005033167816974793, "loss": 1.8761, "step": 23712 }, { "epoch": 0.79, "grad_norm": 0.43975186347961426, "learning_rate": 0.000503309095948181, "loss": 1.8564, "step": 23713 }, { "epoch": 0.79, "grad_norm": 0.44887402653694153, "learning_rate": 0.0005033014099520954, "loss": 1.8077, "step": 23714 }, { "epoch": 0.79, "grad_norm": 0.43250772356987, "learning_rate": 0.0005032937237092318, "loss": 1.8188, "step": 23715 }, { "epoch": 0.79, "grad_norm": 0.4330293834209442, "learning_rate": 0.0005032860372195996, "loss": 1.772, "step": 23716 }, { "epoch": 0.79, "grad_norm": 0.4497438669204712, "learning_rate": 0.0005032783504832081, "loss": 1.8579, "step": 23717 }, { "epoch": 0.79, "grad_norm": 0.4455534517765045, "learning_rate": 0.0005032706635000667, "loss": 1.8237, "step": 23718 }, { "epoch": 0.79, "grad_norm": 0.44006237387657166, "learning_rate": 0.0005032629762701846, "loss": 1.7507, "step": 23719 }, { "epoch": 0.79, "grad_norm": 0.43492087721824646, "learning_rate": 0.0005032552887935713, "loss": 1.8652, "step": 23720 }, { "epoch": 0.79, "grad_norm": 0.4537920355796814, "learning_rate": 0.0005032476010702359, "loss": 1.8012, "step": 23721 }, { "epoch": 0.79, "grad_norm": 0.443086713552475, "learning_rate": 0.0005032399131001879, "loss": 1.8483, "step": 23722 }, { "epoch": 0.79, "grad_norm": 0.4398415982723236, "learning_rate": 0.0005032322248834366, "loss": 1.7288, "step": 23723 }, { "epoch": 0.79, "grad_norm": 0.4342430830001831, "learning_rate": 0.0005032245364199912, "loss": 1.8702, "step": 23724 }, { "epoch": 0.79, "grad_norm": 0.43060243129730225, "learning_rate": 0.0005032168477098613, "loss": 1.8561, "step": 23725 }, { "epoch": 0.79, "grad_norm": 0.4520089328289032, "learning_rate": 0.0005032091587530561, "loss": 1.8206, "step": 23726 }, { "epoch": 0.79, "grad_norm": 0.4329392910003662, "learning_rate": 0.0005032014695495848, "loss": 1.8388, "step": 23727 }, { "epoch": 0.79, "grad_norm": 0.43080320954322815, "learning_rate": 0.0005031937800994569, "loss": 1.8534, "step": 23728 }, { "epoch": 0.79, "grad_norm": 0.4269883930683136, "learning_rate": 0.0005031860904026817, "loss": 1.798, "step": 23729 }, { "epoch": 0.79, "grad_norm": 0.4459506571292877, "learning_rate": 0.0005031784004592687, "loss": 1.7827, "step": 23730 }, { "epoch": 0.79, "grad_norm": 0.4417954683303833, "learning_rate": 0.0005031707102692268, "loss": 1.8387, "step": 23731 }, { "epoch": 0.79, "grad_norm": 0.43387824296951294, "learning_rate": 0.0005031630198325658, "loss": 1.7892, "step": 23732 }, { "epoch": 0.79, "grad_norm": 0.43254178762435913, "learning_rate": 0.0005031553291492947, "loss": 1.812, "step": 23733 }, { "epoch": 0.79, "grad_norm": 0.4432801604270935, "learning_rate": 0.000503147638219423, "loss": 1.8274, "step": 23734 }, { "epoch": 0.79, "grad_norm": 0.45316609740257263, "learning_rate": 0.0005031399470429599, "loss": 1.8352, "step": 23735 }, { "epoch": 0.79, "grad_norm": 0.4347156286239624, "learning_rate": 0.0005031322556199149, "loss": 1.7956, "step": 23736 }, { "epoch": 0.79, "grad_norm": 0.44168803095817566, "learning_rate": 0.0005031245639502974, "loss": 1.7958, "step": 23737 }, { "epoch": 0.79, "grad_norm": 0.4430702030658722, "learning_rate": 0.0005031168720341164, "loss": 1.8517, "step": 23738 }, { "epoch": 0.79, "grad_norm": 0.45638710260391235, "learning_rate": 0.0005031091798713815, "loss": 1.8003, "step": 23739 }, { "epoch": 0.79, "grad_norm": 0.4333113729953766, "learning_rate": 0.0005031014874621021, "loss": 1.7495, "step": 23740 }, { "epoch": 0.79, "grad_norm": 0.4238041937351227, "learning_rate": 0.0005030937948062873, "loss": 1.7884, "step": 23741 }, { "epoch": 0.79, "grad_norm": 0.43635496497154236, "learning_rate": 0.0005030861019039467, "loss": 1.8525, "step": 23742 }, { "epoch": 0.79, "grad_norm": 0.4341946244239807, "learning_rate": 0.0005030784087550894, "loss": 1.8787, "step": 23743 }, { "epoch": 0.79, "grad_norm": 0.44159647822380066, "learning_rate": 0.0005030707153597248, "loss": 1.8871, "step": 23744 }, { "epoch": 0.79, "grad_norm": 0.47082844376564026, "learning_rate": 0.0005030630217178624, "loss": 1.8334, "step": 23745 }, { "epoch": 0.79, "grad_norm": 0.4292353689670563, "learning_rate": 0.0005030553278295112, "loss": 1.7641, "step": 23746 }, { "epoch": 0.79, "grad_norm": 0.44404172897338867, "learning_rate": 0.000503047633694681, "loss": 1.8018, "step": 23747 }, { "epoch": 0.79, "grad_norm": 0.4259513020515442, "learning_rate": 0.0005030399393133808, "loss": 1.7891, "step": 23748 }, { "epoch": 0.79, "grad_norm": 0.43808436393737793, "learning_rate": 0.0005030322446856199, "loss": 1.8514, "step": 23749 }, { "epoch": 0.79, "grad_norm": 0.43532201647758484, "learning_rate": 0.0005030245498114079, "loss": 1.7626, "step": 23750 }, { "epoch": 0.79, "grad_norm": 0.4229345917701721, "learning_rate": 0.0005030168546907539, "loss": 1.7609, "step": 23751 }, { "epoch": 0.79, "grad_norm": 0.43843814730644226, "learning_rate": 0.0005030091593236675, "loss": 1.8388, "step": 23752 }, { "epoch": 0.79, "grad_norm": 0.44854217767715454, "learning_rate": 0.0005030014637101578, "loss": 1.8587, "step": 23753 }, { "epoch": 0.79, "grad_norm": 0.4177141487598419, "learning_rate": 0.0005029937678502342, "loss": 1.801, "step": 23754 }, { "epoch": 0.79, "grad_norm": 0.4319051206111908, "learning_rate": 0.0005029860717439061, "loss": 1.7385, "step": 23755 }, { "epoch": 0.79, "grad_norm": 0.4348786175251007, "learning_rate": 0.0005029783753911829, "loss": 1.8842, "step": 23756 }, { "epoch": 0.79, "grad_norm": 0.4617583751678467, "learning_rate": 0.0005029706787920738, "loss": 1.8985, "step": 23757 }, { "epoch": 0.79, "grad_norm": 0.4265451729297638, "learning_rate": 0.0005029629819465883, "loss": 1.8483, "step": 23758 }, { "epoch": 0.79, "grad_norm": 0.44100722670555115, "learning_rate": 0.0005029552848547355, "loss": 1.8599, "step": 23759 }, { "epoch": 0.79, "grad_norm": 0.4468626379966736, "learning_rate": 0.0005029475875165249, "loss": 1.8543, "step": 23760 }, { "epoch": 0.79, "grad_norm": 0.4324725866317749, "learning_rate": 0.000502939889931966, "loss": 1.8038, "step": 23761 }, { "epoch": 0.79, "grad_norm": 0.41349315643310547, "learning_rate": 0.0005029321921010678, "loss": 1.8187, "step": 23762 }, { "epoch": 0.79, "grad_norm": 0.44783833622932434, "learning_rate": 0.0005029244940238398, "loss": 1.8328, "step": 23763 }, { "epoch": 0.79, "grad_norm": 0.46155568957328796, "learning_rate": 0.0005029167957002915, "loss": 1.8351, "step": 23764 }, { "epoch": 0.79, "grad_norm": 0.4733055531978607, "learning_rate": 0.000502909097130432, "loss": 1.8121, "step": 23765 }, { "epoch": 0.79, "grad_norm": 0.4517907500267029, "learning_rate": 0.0005029013983142707, "loss": 1.8857, "step": 23766 }, { "epoch": 0.79, "grad_norm": 0.422344446182251, "learning_rate": 0.0005028936992518172, "loss": 1.8391, "step": 23767 }, { "epoch": 0.79, "grad_norm": 0.4338931441307068, "learning_rate": 0.0005028859999430806, "loss": 1.768, "step": 23768 }, { "epoch": 0.79, "grad_norm": 0.43581581115722656, "learning_rate": 0.0005028783003880701, "loss": 1.9473, "step": 23769 }, { "epoch": 0.79, "grad_norm": 0.43785977363586426, "learning_rate": 0.0005028706005867955, "loss": 1.8702, "step": 23770 }, { "epoch": 0.79, "grad_norm": 0.42043349146842957, "learning_rate": 0.0005028629005392657, "loss": 1.8232, "step": 23771 }, { "epoch": 0.79, "grad_norm": 0.42691570520401, "learning_rate": 0.0005028552002454903, "loss": 1.8263, "step": 23772 }, { "epoch": 0.79, "grad_norm": 0.4322200119495392, "learning_rate": 0.0005028474997054786, "loss": 1.8594, "step": 23773 }, { "epoch": 0.79, "grad_norm": 0.43189290165901184, "learning_rate": 0.0005028397989192398, "loss": 1.8749, "step": 23774 }, { "epoch": 0.79, "grad_norm": 0.4239073395729065, "learning_rate": 0.0005028320978867835, "loss": 1.8794, "step": 23775 }, { "epoch": 0.79, "grad_norm": 0.43042662739753723, "learning_rate": 0.0005028243966081188, "loss": 1.8294, "step": 23776 }, { "epoch": 0.79, "grad_norm": 0.42170315980911255, "learning_rate": 0.0005028166950832552, "loss": 1.8464, "step": 23777 }, { "epoch": 0.79, "grad_norm": 0.44700297713279724, "learning_rate": 0.0005028089933122021, "loss": 1.8664, "step": 23778 }, { "epoch": 0.79, "grad_norm": 0.44434022903442383, "learning_rate": 0.0005028012912949687, "loss": 1.8294, "step": 23779 }, { "epoch": 0.79, "grad_norm": 0.4400434195995331, "learning_rate": 0.0005027935890315644, "loss": 1.8038, "step": 23780 }, { "epoch": 0.79, "grad_norm": 0.45959508419036865, "learning_rate": 0.0005027858865219986, "loss": 1.8313, "step": 23781 }, { "epoch": 0.79, "grad_norm": 0.4329383969306946, "learning_rate": 0.0005027781837662806, "loss": 1.7663, "step": 23782 }, { "epoch": 0.79, "grad_norm": 0.4411253333091736, "learning_rate": 0.0005027704807644199, "loss": 1.7582, "step": 23783 }, { "epoch": 0.79, "grad_norm": 0.44177836179733276, "learning_rate": 0.0005027627775164254, "loss": 1.8105, "step": 23784 }, { "epoch": 0.79, "grad_norm": 0.44945934414863586, "learning_rate": 0.000502755074022307, "loss": 1.7985, "step": 23785 }, { "epoch": 0.79, "grad_norm": 0.43993058800697327, "learning_rate": 0.0005027473702820738, "loss": 1.8194, "step": 23786 }, { "epoch": 0.79, "grad_norm": 0.4450151026248932, "learning_rate": 0.0005027396662957352, "loss": 1.7876, "step": 23787 }, { "epoch": 0.79, "grad_norm": 0.4303300678730011, "learning_rate": 0.0005027319620633003, "loss": 1.774, "step": 23788 }, { "epoch": 0.79, "grad_norm": 0.4357203245162964, "learning_rate": 0.0005027242575847788, "loss": 1.8436, "step": 23789 }, { "epoch": 0.79, "grad_norm": 0.4466763436794281, "learning_rate": 0.00050271655286018, "loss": 1.7879, "step": 23790 }, { "epoch": 0.79, "grad_norm": 0.631205141544342, "learning_rate": 0.0005027088478895131, "loss": 1.8582, "step": 23791 }, { "epoch": 0.79, "grad_norm": 0.7294052243232727, "learning_rate": 0.0005027011426727875, "loss": 1.8255, "step": 23792 }, { "epoch": 0.79, "grad_norm": 0.4428750276565552, "learning_rate": 0.0005026934372100127, "loss": 1.8427, "step": 23793 }, { "epoch": 0.79, "grad_norm": 0.4483124017715454, "learning_rate": 0.0005026857315011978, "loss": 1.7988, "step": 23794 }, { "epoch": 0.79, "grad_norm": 0.43602466583251953, "learning_rate": 0.0005026780255463523, "loss": 1.8305, "step": 23795 }, { "epoch": 0.79, "grad_norm": 0.4462722837924957, "learning_rate": 0.0005026703193454856, "loss": 1.8332, "step": 23796 }, { "epoch": 0.79, "grad_norm": 0.8362200856208801, "learning_rate": 0.0005026626128986069, "loss": 1.8202, "step": 23797 }, { "epoch": 0.79, "grad_norm": 0.4211427569389343, "learning_rate": 0.0005026549062057258, "loss": 1.8819, "step": 23798 }, { "epoch": 0.79, "grad_norm": 0.4375818073749542, "learning_rate": 0.0005026471992668513, "loss": 1.868, "step": 23799 }, { "epoch": 0.79, "grad_norm": 0.4341084063053131, "learning_rate": 0.0005026394920819931, "loss": 1.817, "step": 23800 }, { "epoch": 0.79, "grad_norm": 0.4415566921234131, "learning_rate": 0.0005026317846511604, "loss": 1.7905, "step": 23801 }, { "epoch": 0.79, "grad_norm": 0.44221723079681396, "learning_rate": 0.0005026240769743626, "loss": 1.8203, "step": 23802 }, { "epoch": 0.79, "grad_norm": 0.43682602047920227, "learning_rate": 0.000502616369051609, "loss": 1.8554, "step": 23803 }, { "epoch": 0.79, "grad_norm": 0.42559629678726196, "learning_rate": 0.0005026086608829089, "loss": 1.8488, "step": 23804 }, { "epoch": 0.79, "grad_norm": 0.4305066764354706, "learning_rate": 0.0005026009524682719, "loss": 1.8268, "step": 23805 }, { "epoch": 0.79, "grad_norm": 0.4291721284389496, "learning_rate": 0.000502593243807707, "loss": 1.8429, "step": 23806 }, { "epoch": 0.79, "grad_norm": 0.4259933531284332, "learning_rate": 0.000502585534901224, "loss": 1.8152, "step": 23807 }, { "epoch": 0.79, "grad_norm": 0.4205470383167267, "learning_rate": 0.0005025778257488318, "loss": 1.7818, "step": 23808 }, { "epoch": 0.79, "grad_norm": 0.44146737456321716, "learning_rate": 0.0005025701163505399, "loss": 1.8213, "step": 23809 }, { "epoch": 0.79, "grad_norm": 0.43181103467941284, "learning_rate": 0.000502562406706358, "loss": 1.786, "step": 23810 }, { "epoch": 0.79, "grad_norm": 0.44600915908813477, "learning_rate": 0.000502554696816295, "loss": 1.872, "step": 23811 }, { "epoch": 0.79, "grad_norm": 0.43354514241218567, "learning_rate": 0.0005025469866803604, "loss": 1.751, "step": 23812 }, { "epoch": 0.79, "grad_norm": 0.46139803528785706, "learning_rate": 0.0005025392762985638, "loss": 1.7755, "step": 23813 }, { "epoch": 0.79, "grad_norm": 0.44576624035835266, "learning_rate": 0.0005025315656709143, "loss": 1.8314, "step": 23814 }, { "epoch": 0.79, "grad_norm": 0.43735700845718384, "learning_rate": 0.0005025238547974212, "loss": 1.8052, "step": 23815 }, { "epoch": 0.79, "grad_norm": 0.4409002363681793, "learning_rate": 0.000502516143678094, "loss": 1.8802, "step": 23816 }, { "epoch": 0.79, "grad_norm": 0.5041043758392334, "learning_rate": 0.0005025084323129421, "loss": 1.8322, "step": 23817 }, { "epoch": 0.79, "grad_norm": 0.4448186457157135, "learning_rate": 0.0005025007207019748, "loss": 1.8227, "step": 23818 }, { "epoch": 0.79, "grad_norm": 0.433871865272522, "learning_rate": 0.0005024930088452015, "loss": 1.7613, "step": 23819 }, { "epoch": 0.79, "grad_norm": 0.4326378405094147, "learning_rate": 0.0005024852967426314, "loss": 1.8369, "step": 23820 }, { "epoch": 0.79, "grad_norm": 0.42336535453796387, "learning_rate": 0.0005024775843942741, "loss": 1.8317, "step": 23821 }, { "epoch": 0.79, "grad_norm": 0.42355531454086304, "learning_rate": 0.0005024698718001389, "loss": 1.8048, "step": 23822 }, { "epoch": 0.79, "grad_norm": 0.4340417981147766, "learning_rate": 0.0005024621589602349, "loss": 1.7714, "step": 23823 }, { "epoch": 0.79, "grad_norm": 0.45481184124946594, "learning_rate": 0.0005024544458745718, "loss": 1.8413, "step": 23824 }, { "epoch": 0.79, "grad_norm": 0.4379604160785675, "learning_rate": 0.0005024467325431589, "loss": 1.7635, "step": 23825 }, { "epoch": 0.79, "grad_norm": 0.4382072687149048, "learning_rate": 0.0005024390189660053, "loss": 1.8856, "step": 23826 }, { "epoch": 0.79, "grad_norm": 0.5361348390579224, "learning_rate": 0.0005024313051431207, "loss": 1.8302, "step": 23827 }, { "epoch": 0.79, "grad_norm": 0.47145405411720276, "learning_rate": 0.0005024235910745143, "loss": 1.8193, "step": 23828 }, { "epoch": 0.79, "grad_norm": 0.4238201379776001, "learning_rate": 0.0005024158767601956, "loss": 1.782, "step": 23829 }, { "epoch": 0.79, "grad_norm": 0.4432904124259949, "learning_rate": 0.0005024081622001736, "loss": 1.8609, "step": 23830 }, { "epoch": 0.79, "grad_norm": 0.4314914047718048, "learning_rate": 0.000502400447394458, "loss": 1.7934, "step": 23831 }, { "epoch": 0.79, "grad_norm": 0.4690689444541931, "learning_rate": 0.0005023927323430582, "loss": 1.8238, "step": 23832 }, { "epoch": 0.79, "grad_norm": 0.435678631067276, "learning_rate": 0.0005023850170459834, "loss": 1.8557, "step": 23833 }, { "epoch": 0.79, "grad_norm": 0.4366663694381714, "learning_rate": 0.000502377301503243, "loss": 1.8271, "step": 23834 }, { "epoch": 0.79, "grad_norm": 0.4400956630706787, "learning_rate": 0.0005023695857148463, "loss": 1.8114, "step": 23835 }, { "epoch": 0.79, "grad_norm": 0.43661683797836304, "learning_rate": 0.0005023618696808028, "loss": 1.8635, "step": 23836 }, { "epoch": 0.79, "grad_norm": 0.4497276544570923, "learning_rate": 0.0005023541534011218, "loss": 1.843, "step": 23837 }, { "epoch": 0.79, "grad_norm": 0.4361473321914673, "learning_rate": 0.0005023464368758127, "loss": 1.7786, "step": 23838 }, { "epoch": 0.79, "grad_norm": 0.4213569760322571, "learning_rate": 0.0005023387201048849, "loss": 1.823, "step": 23839 }, { "epoch": 0.79, "grad_norm": 0.42029890418052673, "learning_rate": 0.0005023310030883476, "loss": 1.8215, "step": 23840 }, { "epoch": 0.79, "grad_norm": 0.7395110726356506, "learning_rate": 0.0005023232858262102, "loss": 1.8158, "step": 23841 }, { "epoch": 0.79, "grad_norm": 0.43307721614837646, "learning_rate": 0.0005023155683184823, "loss": 1.8119, "step": 23842 }, { "epoch": 0.79, "grad_norm": 0.4428878426551819, "learning_rate": 0.0005023078505651731, "loss": 1.8017, "step": 23843 }, { "epoch": 0.79, "grad_norm": 0.43568915128707886, "learning_rate": 0.0005023001325662918, "loss": 1.818, "step": 23844 }, { "epoch": 0.79, "grad_norm": 0.42800551652908325, "learning_rate": 0.0005022924143218482, "loss": 1.7274, "step": 23845 }, { "epoch": 0.79, "grad_norm": 0.4274759888648987, "learning_rate": 0.0005022846958318512, "loss": 1.7954, "step": 23846 }, { "epoch": 0.79, "grad_norm": 0.43160679936408997, "learning_rate": 0.0005022769770963106, "loss": 1.8203, "step": 23847 }, { "epoch": 0.79, "grad_norm": 0.4233206510543823, "learning_rate": 0.0005022692581152353, "loss": 1.8235, "step": 23848 }, { "epoch": 0.79, "grad_norm": 0.42987748980522156, "learning_rate": 0.0005022615388886352, "loss": 1.7421, "step": 23849 }, { "epoch": 0.79, "grad_norm": 0.43572452664375305, "learning_rate": 0.0005022538194165192, "loss": 1.7767, "step": 23850 }, { "epoch": 0.79, "grad_norm": 0.4284224510192871, "learning_rate": 0.0005022460996988968, "loss": 1.7998, "step": 23851 }, { "epoch": 0.79, "grad_norm": 0.42807164788246155, "learning_rate": 0.0005022383797357776, "loss": 1.7985, "step": 23852 }, { "epoch": 0.79, "grad_norm": 0.4506995677947998, "learning_rate": 0.0005022306595271707, "loss": 1.7898, "step": 23853 }, { "epoch": 0.79, "grad_norm": 0.42406564950942993, "learning_rate": 0.0005022229390730857, "loss": 1.8163, "step": 23854 }, { "epoch": 0.79, "grad_norm": 0.4400666356086731, "learning_rate": 0.0005022152183735318, "loss": 1.7962, "step": 23855 }, { "epoch": 0.79, "grad_norm": 0.43098345398902893, "learning_rate": 0.0005022074974285184, "loss": 1.8947, "step": 23856 }, { "epoch": 0.79, "grad_norm": 0.42913711071014404, "learning_rate": 0.0005021997762380548, "loss": 1.8387, "step": 23857 }, { "epoch": 0.79, "grad_norm": 0.43577590584754944, "learning_rate": 0.0005021920548021506, "loss": 1.8557, "step": 23858 }, { "epoch": 0.79, "grad_norm": 0.41942572593688965, "learning_rate": 0.000502184333120815, "loss": 1.7957, "step": 23859 }, { "epoch": 0.79, "grad_norm": 0.4284871816635132, "learning_rate": 0.0005021766111940574, "loss": 1.7637, "step": 23860 }, { "epoch": 0.79, "grad_norm": 0.45056501030921936, "learning_rate": 0.0005021688890218871, "loss": 1.8378, "step": 23861 }, { "epoch": 0.79, "grad_norm": 0.44309288263320923, "learning_rate": 0.0005021611666043137, "loss": 1.7934, "step": 23862 }, { "epoch": 0.79, "grad_norm": 0.4407106935977936, "learning_rate": 0.0005021534439413463, "loss": 1.7671, "step": 23863 }, { "epoch": 0.79, "grad_norm": 0.4256008267402649, "learning_rate": 0.0005021457210329946, "loss": 1.7826, "step": 23864 }, { "epoch": 0.79, "grad_norm": 0.44153550267219543, "learning_rate": 0.0005021379978792677, "loss": 1.8534, "step": 23865 }, { "epoch": 0.79, "grad_norm": 0.44540390372276306, "learning_rate": 0.0005021302744801749, "loss": 1.8781, "step": 23866 }, { "epoch": 0.79, "grad_norm": 0.4394663870334625, "learning_rate": 0.0005021225508357259, "loss": 1.8413, "step": 23867 }, { "epoch": 0.79, "grad_norm": 0.4190162420272827, "learning_rate": 0.0005021148269459298, "loss": 1.7771, "step": 23868 }, { "epoch": 0.79, "grad_norm": 0.49725770950317383, "learning_rate": 0.0005021071028107961, "loss": 1.8519, "step": 23869 }, { "epoch": 0.79, "grad_norm": 0.42740464210510254, "learning_rate": 0.0005020993784303343, "loss": 1.8454, "step": 23870 }, { "epoch": 0.79, "grad_norm": 0.4328887164592743, "learning_rate": 0.0005020916538045534, "loss": 1.8235, "step": 23871 }, { "epoch": 0.79, "grad_norm": 0.4360197186470032, "learning_rate": 0.0005020839289334631, "loss": 1.8305, "step": 23872 }, { "epoch": 0.79, "grad_norm": 0.4348331093788147, "learning_rate": 0.0005020762038170728, "loss": 1.7109, "step": 23873 }, { "epoch": 0.79, "grad_norm": 0.4404865801334381, "learning_rate": 0.0005020684784553916, "loss": 1.86, "step": 23874 }, { "epoch": 0.79, "grad_norm": 0.4262631833553314, "learning_rate": 0.0005020607528484291, "loss": 1.8345, "step": 23875 }, { "epoch": 0.79, "grad_norm": 0.4259594678878784, "learning_rate": 0.0005020530269961947, "loss": 1.8331, "step": 23876 }, { "epoch": 0.79, "grad_norm": 0.4344017207622528, "learning_rate": 0.0005020453008986976, "loss": 1.8224, "step": 23877 }, { "epoch": 0.79, "grad_norm": 0.4341888427734375, "learning_rate": 0.0005020375745559473, "loss": 1.8732, "step": 23878 }, { "epoch": 0.79, "grad_norm": 0.4415111243724823, "learning_rate": 0.0005020298479679531, "loss": 1.9105, "step": 23879 }, { "epoch": 0.79, "grad_norm": 0.4676530659198761, "learning_rate": 0.0005020221211347245, "loss": 1.8452, "step": 23880 }, { "epoch": 0.79, "grad_norm": 0.4271795153617859, "learning_rate": 0.0005020143940562707, "loss": 1.883, "step": 23881 }, { "epoch": 0.79, "grad_norm": 0.425608366727829, "learning_rate": 0.0005020066667326014, "loss": 1.8026, "step": 23882 }, { "epoch": 0.79, "grad_norm": 0.44640517234802246, "learning_rate": 0.0005019989391637257, "loss": 1.8635, "step": 23883 }, { "epoch": 0.79, "grad_norm": 0.43795591592788696, "learning_rate": 0.000501991211349653, "loss": 1.7749, "step": 23884 }, { "epoch": 0.79, "grad_norm": 0.44135481119155884, "learning_rate": 0.0005019834832903926, "loss": 1.8135, "step": 23885 }, { "epoch": 0.79, "grad_norm": 0.43772098422050476, "learning_rate": 0.0005019757549859542, "loss": 1.8017, "step": 23886 }, { "epoch": 0.79, "grad_norm": 0.42911434173583984, "learning_rate": 0.000501968026436347, "loss": 1.7541, "step": 23887 }, { "epoch": 0.79, "grad_norm": 0.43094366788864136, "learning_rate": 0.0005019602976415803, "loss": 1.8013, "step": 23888 }, { "epoch": 0.79, "grad_norm": 0.41228941082954407, "learning_rate": 0.0005019525686016636, "loss": 1.798, "step": 23889 }, { "epoch": 0.79, "grad_norm": 0.42224907875061035, "learning_rate": 0.0005019448393166062, "loss": 1.8295, "step": 23890 }, { "epoch": 0.79, "grad_norm": 0.4374091923236847, "learning_rate": 0.0005019371097864175, "loss": 1.8262, "step": 23891 }, { "epoch": 0.79, "grad_norm": 0.43246859312057495, "learning_rate": 0.000501929380011107, "loss": 1.8642, "step": 23892 }, { "epoch": 0.79, "grad_norm": 0.41937056183815, "learning_rate": 0.0005019216499906839, "loss": 1.7792, "step": 23893 }, { "epoch": 0.79, "grad_norm": 0.4291311502456665, "learning_rate": 0.0005019139197251577, "loss": 1.8084, "step": 23894 }, { "epoch": 0.79, "grad_norm": 0.43890392780303955, "learning_rate": 0.0005019061892145378, "loss": 1.766, "step": 23895 }, { "epoch": 0.8, "grad_norm": 0.43247202038764954, "learning_rate": 0.0005018984584588334, "loss": 1.7869, "step": 23896 }, { "epoch": 0.8, "grad_norm": 0.43336591124534607, "learning_rate": 0.0005018907274580541, "loss": 1.753, "step": 23897 }, { "epoch": 0.8, "grad_norm": 0.43361783027648926, "learning_rate": 0.0005018829962122092, "loss": 1.7618, "step": 23898 }, { "epoch": 0.8, "grad_norm": 0.4402778446674347, "learning_rate": 0.0005018752647213082, "loss": 1.8836, "step": 23899 }, { "epoch": 0.8, "grad_norm": 0.419001966714859, "learning_rate": 0.0005018675329853602, "loss": 1.8847, "step": 23900 }, { "epoch": 0.8, "grad_norm": 0.4424550533294678, "learning_rate": 0.0005018598010043748, "loss": 1.7554, "step": 23901 }, { "epoch": 0.8, "grad_norm": 0.45254233479499817, "learning_rate": 0.0005018520687783614, "loss": 1.844, "step": 23902 }, { "epoch": 0.8, "grad_norm": 0.4340200126171112, "learning_rate": 0.0005018443363073293, "loss": 1.8853, "step": 23903 }, { "epoch": 0.8, "grad_norm": 0.4260116517543793, "learning_rate": 0.0005018366035912879, "loss": 1.8359, "step": 23904 }, { "epoch": 0.8, "grad_norm": 0.45438429713249207, "learning_rate": 0.0005018288706302466, "loss": 1.8611, "step": 23905 }, { "epoch": 0.8, "grad_norm": 0.447639137506485, "learning_rate": 0.0005018211374242148, "loss": 1.8509, "step": 23906 }, { "epoch": 0.8, "grad_norm": 0.5341251492500305, "learning_rate": 0.0005018134039732019, "loss": 1.7958, "step": 23907 }, { "epoch": 0.8, "grad_norm": 0.4291900098323822, "learning_rate": 0.0005018056702772172, "loss": 1.7842, "step": 23908 }, { "epoch": 0.8, "grad_norm": 0.7362930178642273, "learning_rate": 0.0005017979363362702, "loss": 1.8395, "step": 23909 }, { "epoch": 0.8, "grad_norm": 0.42406561970710754, "learning_rate": 0.0005017902021503702, "loss": 1.7785, "step": 23910 }, { "epoch": 0.8, "grad_norm": 0.42877212166786194, "learning_rate": 0.0005017824677195267, "loss": 1.869, "step": 23911 }, { "epoch": 0.8, "grad_norm": 0.4336351156234741, "learning_rate": 0.0005017747330437489, "loss": 1.8722, "step": 23912 }, { "epoch": 0.8, "grad_norm": 0.45689815282821655, "learning_rate": 0.0005017669981230465, "loss": 1.8326, "step": 23913 }, { "epoch": 0.8, "grad_norm": 0.4430589973926544, "learning_rate": 0.0005017592629574286, "loss": 1.8431, "step": 23914 }, { "epoch": 0.8, "grad_norm": 0.42276531457901, "learning_rate": 0.0005017515275469046, "loss": 1.8256, "step": 23915 }, { "epoch": 0.8, "grad_norm": 0.4228343963623047, "learning_rate": 0.0005017437918914839, "loss": 1.7949, "step": 23916 }, { "epoch": 0.8, "grad_norm": 0.4418308734893799, "learning_rate": 0.0005017360559911762, "loss": 1.8304, "step": 23917 }, { "epoch": 0.8, "grad_norm": 0.44385820627212524, "learning_rate": 0.0005017283198459905, "loss": 1.7617, "step": 23918 }, { "epoch": 0.8, "grad_norm": 0.42914044857025146, "learning_rate": 0.0005017205834559364, "loss": 1.8177, "step": 23919 }, { "epoch": 0.8, "grad_norm": 0.4559824466705322, "learning_rate": 0.0005017128468210233, "loss": 1.8291, "step": 23920 }, { "epoch": 0.8, "grad_norm": 0.4323534667491913, "learning_rate": 0.0005017051099412605, "loss": 1.7668, "step": 23921 }, { "epoch": 0.8, "grad_norm": 0.44766509532928467, "learning_rate": 0.0005016973728166573, "loss": 1.9015, "step": 23922 }, { "epoch": 0.8, "grad_norm": 0.4476070702075958, "learning_rate": 0.0005016896354472233, "loss": 1.7332, "step": 23923 }, { "epoch": 0.8, "grad_norm": 0.44108039140701294, "learning_rate": 0.0005016818978329677, "loss": 1.7868, "step": 23924 }, { "epoch": 0.8, "grad_norm": 0.44096839427948, "learning_rate": 0.0005016741599739, "loss": 1.8333, "step": 23925 }, { "epoch": 0.8, "grad_norm": 0.4401371479034424, "learning_rate": 0.0005016664218700297, "loss": 1.8159, "step": 23926 }, { "epoch": 0.8, "grad_norm": 0.44512832164764404, "learning_rate": 0.0005016586835213661, "loss": 1.7995, "step": 23927 }, { "epoch": 0.8, "grad_norm": 0.4308231472969055, "learning_rate": 0.0005016509449279185, "loss": 1.8255, "step": 23928 }, { "epoch": 0.8, "grad_norm": 0.4287693500518799, "learning_rate": 0.0005016432060896964, "loss": 1.7894, "step": 23929 }, { "epoch": 0.8, "grad_norm": 0.4249029755592346, "learning_rate": 0.0005016354670067092, "loss": 1.7827, "step": 23930 }, { "epoch": 0.8, "grad_norm": 0.4591481387615204, "learning_rate": 0.0005016277276789661, "loss": 1.7656, "step": 23931 }, { "epoch": 0.8, "grad_norm": 0.42510223388671875, "learning_rate": 0.0005016199881064767, "loss": 1.9071, "step": 23932 }, { "epoch": 0.8, "grad_norm": 0.6440587639808655, "learning_rate": 0.0005016122482892503, "loss": 1.8976, "step": 23933 }, { "epoch": 0.8, "grad_norm": 0.44664570689201355, "learning_rate": 0.0005016045082272964, "loss": 1.8005, "step": 23934 }, { "epoch": 0.8, "grad_norm": 0.4444105625152588, "learning_rate": 0.0005015967679206244, "loss": 1.8473, "step": 23935 }, { "epoch": 0.8, "grad_norm": 0.422516405582428, "learning_rate": 0.0005015890273692435, "loss": 1.8401, "step": 23936 }, { "epoch": 0.8, "grad_norm": 0.4261603057384491, "learning_rate": 0.0005015812865731634, "loss": 1.7866, "step": 23937 }, { "epoch": 0.8, "grad_norm": 0.45042672753334045, "learning_rate": 0.0005015735455323931, "loss": 1.8401, "step": 23938 }, { "epoch": 0.8, "grad_norm": 0.44126397371292114, "learning_rate": 0.0005015658042469423, "loss": 1.7789, "step": 23939 }, { "epoch": 0.8, "grad_norm": 0.43638181686401367, "learning_rate": 0.0005015580627168204, "loss": 1.8657, "step": 23940 }, { "epoch": 0.8, "grad_norm": 0.44579753279685974, "learning_rate": 0.0005015503209420366, "loss": 1.8742, "step": 23941 }, { "epoch": 0.8, "grad_norm": 0.47549325227737427, "learning_rate": 0.0005015425789226004, "loss": 1.7829, "step": 23942 }, { "epoch": 0.8, "grad_norm": 0.44494134187698364, "learning_rate": 0.0005015348366585213, "loss": 1.7936, "step": 23943 }, { "epoch": 0.8, "grad_norm": 0.44163820147514343, "learning_rate": 0.0005015270941498086, "loss": 1.7877, "step": 23944 }, { "epoch": 0.8, "grad_norm": 0.4345747232437134, "learning_rate": 0.0005015193513964717, "loss": 1.8448, "step": 23945 }, { "epoch": 0.8, "grad_norm": 0.4626671075820923, "learning_rate": 0.0005015116083985199, "loss": 1.7412, "step": 23946 }, { "epoch": 0.8, "grad_norm": 0.4583011567592621, "learning_rate": 0.0005015038651559628, "loss": 1.7304, "step": 23947 }, { "epoch": 0.8, "grad_norm": 0.4396069645881653, "learning_rate": 0.0005014961216688095, "loss": 1.8035, "step": 23948 }, { "epoch": 0.8, "grad_norm": 0.4213295578956604, "learning_rate": 0.0005014883779370698, "loss": 1.7557, "step": 23949 }, { "epoch": 0.8, "grad_norm": 0.43689265847206116, "learning_rate": 0.0005014806339607529, "loss": 1.8269, "step": 23950 }, { "epoch": 0.8, "grad_norm": 0.43822914361953735, "learning_rate": 0.0005014728897398681, "loss": 1.8026, "step": 23951 }, { "epoch": 0.8, "grad_norm": 0.42900562286376953, "learning_rate": 0.000501465145274425, "loss": 1.8505, "step": 23952 }, { "epoch": 0.8, "grad_norm": 0.43136903643608093, "learning_rate": 0.0005014574005644328, "loss": 1.8414, "step": 23953 }, { "epoch": 0.8, "grad_norm": 0.4421447515487671, "learning_rate": 0.0005014496556099011, "loss": 1.7758, "step": 23954 }, { "epoch": 0.8, "grad_norm": 0.41396158933639526, "learning_rate": 0.0005014419104108391, "loss": 1.772, "step": 23955 }, { "epoch": 0.8, "grad_norm": 0.43420112133026123, "learning_rate": 0.0005014341649672564, "loss": 1.8015, "step": 23956 }, { "epoch": 0.8, "grad_norm": 0.4202629327774048, "learning_rate": 0.0005014264192791622, "loss": 1.8634, "step": 23957 }, { "epoch": 0.8, "grad_norm": 0.44181787967681885, "learning_rate": 0.000501418673346566, "loss": 1.8277, "step": 23958 }, { "epoch": 0.8, "grad_norm": 0.44965365529060364, "learning_rate": 0.0005014109271694772, "loss": 1.7957, "step": 23959 }, { "epoch": 0.8, "grad_norm": 0.42574045062065125, "learning_rate": 0.0005014031807479054, "loss": 1.7598, "step": 23960 }, { "epoch": 0.8, "grad_norm": 0.43047717213630676, "learning_rate": 0.0005013954340818596, "loss": 1.8661, "step": 23961 }, { "epoch": 0.8, "grad_norm": 0.4305060803890228, "learning_rate": 0.0005013876871713496, "loss": 1.7728, "step": 23962 }, { "epoch": 0.8, "grad_norm": 0.46234628558158875, "learning_rate": 0.0005013799400163844, "loss": 1.8622, "step": 23963 }, { "epoch": 0.8, "grad_norm": 0.42942366003990173, "learning_rate": 0.0005013721926169738, "loss": 1.8339, "step": 23964 }, { "epoch": 0.8, "grad_norm": 0.42351970076560974, "learning_rate": 0.000501364444973127, "loss": 1.8, "step": 23965 }, { "epoch": 0.8, "grad_norm": 0.43220165371894836, "learning_rate": 0.0005013566970848535, "loss": 1.8258, "step": 23966 }, { "epoch": 0.8, "grad_norm": 0.43429309129714966, "learning_rate": 0.0005013489489521625, "loss": 1.8188, "step": 23967 }, { "epoch": 0.8, "grad_norm": 0.4516466557979584, "learning_rate": 0.0005013412005750635, "loss": 1.8148, "step": 23968 }, { "epoch": 0.8, "grad_norm": 0.4273706078529358, "learning_rate": 0.0005013334519535661, "loss": 1.8391, "step": 23969 }, { "epoch": 0.8, "grad_norm": 0.41952434182167053, "learning_rate": 0.0005013257030876795, "loss": 1.8266, "step": 23970 }, { "epoch": 0.8, "grad_norm": 0.43170273303985596, "learning_rate": 0.0005013179539774132, "loss": 1.8485, "step": 23971 }, { "epoch": 0.8, "grad_norm": 0.4325435757637024, "learning_rate": 0.0005013102046227764, "loss": 1.8214, "step": 23972 }, { "epoch": 0.8, "grad_norm": 0.4458538889884949, "learning_rate": 0.0005013024550237789, "loss": 1.7955, "step": 23973 }, { "epoch": 0.8, "grad_norm": 0.4406857192516327, "learning_rate": 0.0005012947051804298, "loss": 1.8945, "step": 23974 }, { "epoch": 0.8, "grad_norm": 0.43607667088508606, "learning_rate": 0.0005012869550927385, "loss": 1.8037, "step": 23975 }, { "epoch": 0.8, "grad_norm": 0.4326898157596588, "learning_rate": 0.0005012792047607146, "loss": 1.8902, "step": 23976 }, { "epoch": 0.8, "grad_norm": 0.42322808504104614, "learning_rate": 0.0005012714541843674, "loss": 1.7506, "step": 23977 }, { "epoch": 0.8, "grad_norm": 0.43861496448516846, "learning_rate": 0.0005012637033637062, "loss": 1.861, "step": 23978 }, { "epoch": 0.8, "grad_norm": 0.4353376626968384, "learning_rate": 0.0005012559522987406, "loss": 1.7941, "step": 23979 }, { "epoch": 0.8, "grad_norm": 0.4341481626033783, "learning_rate": 0.00050124820098948, "loss": 1.8229, "step": 23980 }, { "epoch": 0.8, "grad_norm": 0.46014419198036194, "learning_rate": 0.0005012404494359336, "loss": 1.8278, "step": 23981 }, { "epoch": 0.8, "grad_norm": 0.4347953200340271, "learning_rate": 0.000501232697638111, "loss": 1.8771, "step": 23982 }, { "epoch": 0.8, "grad_norm": 0.4461366534233093, "learning_rate": 0.0005012249455960216, "loss": 1.746, "step": 23983 }, { "epoch": 0.8, "grad_norm": 0.43777719140052795, "learning_rate": 0.0005012171933096747, "loss": 1.8028, "step": 23984 }, { "epoch": 0.8, "grad_norm": 0.43156030774116516, "learning_rate": 0.0005012094407790798, "loss": 1.8156, "step": 23985 }, { "epoch": 0.8, "grad_norm": 0.43273818492889404, "learning_rate": 0.0005012016880042462, "loss": 1.8447, "step": 23986 }, { "epoch": 0.8, "grad_norm": 0.44390401244163513, "learning_rate": 0.0005011939349851835, "loss": 1.8614, "step": 23987 }, { "epoch": 0.8, "grad_norm": 0.4345172047615051, "learning_rate": 0.000501186181721901, "loss": 1.7396, "step": 23988 }, { "epoch": 0.8, "grad_norm": 0.4255566895008087, "learning_rate": 0.0005011784282144081, "loss": 1.8111, "step": 23989 }, { "epoch": 0.8, "grad_norm": 0.4375223219394684, "learning_rate": 0.0005011706744627142, "loss": 1.8416, "step": 23990 }, { "epoch": 0.8, "grad_norm": 0.4542793333530426, "learning_rate": 0.0005011629204668288, "loss": 1.7782, "step": 23991 }, { "epoch": 0.8, "grad_norm": 0.4242735207080841, "learning_rate": 0.0005011551662267613, "loss": 1.7783, "step": 23992 }, { "epoch": 0.8, "grad_norm": 0.4317241311073303, "learning_rate": 0.000501147411742521, "loss": 1.7912, "step": 23993 }, { "epoch": 0.8, "grad_norm": 0.44586557149887085, "learning_rate": 0.0005011396570141173, "loss": 1.9209, "step": 23994 }, { "epoch": 0.8, "grad_norm": 0.42293184995651245, "learning_rate": 0.0005011319020415599, "loss": 1.8126, "step": 23995 }, { "epoch": 0.8, "grad_norm": 0.4343424141407013, "learning_rate": 0.0005011241468248578, "loss": 1.81, "step": 23996 }, { "epoch": 0.8, "grad_norm": 0.432091623544693, "learning_rate": 0.0005011163913640206, "loss": 1.8545, "step": 23997 }, { "epoch": 0.8, "grad_norm": 0.43613484501838684, "learning_rate": 0.0005011086356590579, "loss": 1.7752, "step": 23998 }, { "epoch": 0.8, "grad_norm": 0.43863993883132935, "learning_rate": 0.0005011008797099789, "loss": 1.822, "step": 23999 }, { "epoch": 0.8, "grad_norm": 0.4288416802883148, "learning_rate": 0.000501093123516793, "loss": 1.68, "step": 24000 }, { "epoch": 0.8, "grad_norm": 0.4406014680862427, "learning_rate": 0.0005010853670795098, "loss": 1.7906, "step": 24001 }, { "epoch": 0.8, "grad_norm": 0.4285587966442108, "learning_rate": 0.0005010776103981384, "loss": 1.8296, "step": 24002 }, { "epoch": 0.8, "grad_norm": 0.44205188751220703, "learning_rate": 0.0005010698534726886, "loss": 1.7626, "step": 24003 }, { "epoch": 0.8, "grad_norm": 0.4739205837249756, "learning_rate": 0.0005010620963031695, "loss": 1.8628, "step": 24004 }, { "epoch": 0.8, "grad_norm": 0.4430874288082123, "learning_rate": 0.0005010543388895907, "loss": 1.8646, "step": 24005 }, { "epoch": 0.8, "grad_norm": 0.43356600403785706, "learning_rate": 0.0005010465812319616, "loss": 1.8188, "step": 24006 }, { "epoch": 0.8, "grad_norm": 0.43583670258522034, "learning_rate": 0.0005010388233302915, "loss": 1.8675, "step": 24007 }, { "epoch": 0.8, "grad_norm": 0.4263737201690674, "learning_rate": 0.0005010310651845899, "loss": 1.8818, "step": 24008 }, { "epoch": 0.8, "grad_norm": 0.4304726719856262, "learning_rate": 0.0005010233067948662, "loss": 1.8303, "step": 24009 }, { "epoch": 0.8, "grad_norm": 0.4412420988082886, "learning_rate": 0.0005010155481611298, "loss": 1.8457, "step": 24010 }, { "epoch": 0.8, "grad_norm": 0.41977035999298096, "learning_rate": 0.0005010077892833902, "loss": 1.8292, "step": 24011 }, { "epoch": 0.8, "grad_norm": 0.4277260899543762, "learning_rate": 0.0005010000301616567, "loss": 1.7829, "step": 24012 }, { "epoch": 0.8, "grad_norm": 0.43359142541885376, "learning_rate": 0.0005009922707959389, "loss": 1.816, "step": 24013 }, { "epoch": 0.8, "grad_norm": 0.4370150864124298, "learning_rate": 0.000500984511186246, "loss": 1.7929, "step": 24014 }, { "epoch": 0.8, "grad_norm": 0.43152865767478943, "learning_rate": 0.0005009767513325875, "loss": 1.8237, "step": 24015 }, { "epoch": 0.8, "grad_norm": 0.444894015789032, "learning_rate": 0.0005009689912349729, "loss": 1.8223, "step": 24016 }, { "epoch": 0.8, "grad_norm": 0.4600767493247986, "learning_rate": 0.0005009612308934116, "loss": 1.8962, "step": 24017 }, { "epoch": 0.8, "grad_norm": 0.4336899518966675, "learning_rate": 0.0005009534703079129, "loss": 1.8965, "step": 24018 }, { "epoch": 0.8, "grad_norm": 0.43250706791877747, "learning_rate": 0.0005009457094784862, "loss": 1.8065, "step": 24019 }, { "epoch": 0.8, "grad_norm": 0.430194616317749, "learning_rate": 0.0005009379484051411, "loss": 1.8749, "step": 24020 }, { "epoch": 0.8, "grad_norm": 0.4311935007572174, "learning_rate": 0.0005009301870878871, "loss": 1.7504, "step": 24021 }, { "epoch": 0.8, "grad_norm": 0.4329831600189209, "learning_rate": 0.0005009224255267333, "loss": 1.6938, "step": 24022 }, { "epoch": 0.8, "grad_norm": 0.425824910402298, "learning_rate": 0.0005009146637216892, "loss": 1.7966, "step": 24023 }, { "epoch": 0.8, "grad_norm": 0.4364316761493683, "learning_rate": 0.0005009069016727644, "loss": 1.9004, "step": 24024 }, { "epoch": 0.8, "grad_norm": 0.4253937005996704, "learning_rate": 0.0005008991393799682, "loss": 1.7767, "step": 24025 }, { "epoch": 0.8, "grad_norm": 0.4428001642227173, "learning_rate": 0.0005008913768433101, "loss": 1.8263, "step": 24026 }, { "epoch": 0.8, "grad_norm": 0.44042620062828064, "learning_rate": 0.0005008836140627995, "loss": 1.8084, "step": 24027 }, { "epoch": 0.8, "grad_norm": 0.4382290244102478, "learning_rate": 0.0005008758510384457, "loss": 1.8813, "step": 24028 }, { "epoch": 0.8, "grad_norm": 0.446983277797699, "learning_rate": 0.0005008680877702583, "loss": 1.7129, "step": 24029 }, { "epoch": 0.8, "grad_norm": 0.4517742693424225, "learning_rate": 0.0005008603242582464, "loss": 1.8649, "step": 24030 }, { "epoch": 0.8, "grad_norm": 0.47011756896972656, "learning_rate": 0.00050085256050242, "loss": 1.7636, "step": 24031 }, { "epoch": 0.8, "grad_norm": 0.4387260973453522, "learning_rate": 0.000500844796502788, "loss": 1.84, "step": 24032 }, { "epoch": 0.8, "grad_norm": 0.45109036564826965, "learning_rate": 0.00050083703225936, "loss": 1.7847, "step": 24033 }, { "epoch": 0.8, "grad_norm": 0.4623962938785553, "learning_rate": 0.0005008292677721454, "loss": 1.7528, "step": 24034 }, { "epoch": 0.8, "grad_norm": 0.4484393000602722, "learning_rate": 0.0005008215030411538, "loss": 1.8504, "step": 24035 }, { "epoch": 0.8, "grad_norm": 0.42597129940986633, "learning_rate": 0.0005008137380663945, "loss": 1.7364, "step": 24036 }, { "epoch": 0.8, "grad_norm": 0.42345014214515686, "learning_rate": 0.0005008059728478768, "loss": 1.7603, "step": 24037 }, { "epoch": 0.8, "grad_norm": 0.45511046051979065, "learning_rate": 0.0005007982073856102, "loss": 1.7923, "step": 24038 }, { "epoch": 0.8, "grad_norm": 0.4558877646923065, "learning_rate": 0.0005007904416796042, "loss": 1.8274, "step": 24039 }, { "epoch": 0.8, "grad_norm": 0.43410736322402954, "learning_rate": 0.0005007826757298682, "loss": 1.8253, "step": 24040 }, { "epoch": 0.8, "grad_norm": 0.4389081299304962, "learning_rate": 0.0005007749095364116, "loss": 1.8636, "step": 24041 }, { "epoch": 0.8, "grad_norm": 0.4589079022407532, "learning_rate": 0.0005007671430992439, "loss": 1.8118, "step": 24042 }, { "epoch": 0.8, "grad_norm": 0.43881258368492126, "learning_rate": 0.0005007593764183745, "loss": 1.7606, "step": 24043 }, { "epoch": 0.8, "grad_norm": 0.42965665459632874, "learning_rate": 0.0005007516094938126, "loss": 1.8051, "step": 24044 }, { "epoch": 0.8, "grad_norm": 0.4304415285587311, "learning_rate": 0.0005007438423255681, "loss": 1.7635, "step": 24045 }, { "epoch": 0.8, "grad_norm": 0.4478853940963745, "learning_rate": 0.00050073607491365, "loss": 1.8395, "step": 24046 }, { "epoch": 0.8, "grad_norm": 0.4228461980819702, "learning_rate": 0.0005007283072580679, "loss": 1.7738, "step": 24047 }, { "epoch": 0.8, "grad_norm": 0.41143423318862915, "learning_rate": 0.0005007205393588312, "loss": 1.8202, "step": 24048 }, { "epoch": 0.8, "grad_norm": 0.4266625940799713, "learning_rate": 0.0005007127712159493, "loss": 1.791, "step": 24049 }, { "epoch": 0.8, "grad_norm": 0.42287343740463257, "learning_rate": 0.0005007050028294317, "loss": 1.8223, "step": 24050 }, { "epoch": 0.8, "grad_norm": 0.4443584084510803, "learning_rate": 0.0005006972341992879, "loss": 1.8136, "step": 24051 }, { "epoch": 0.8, "grad_norm": 0.41706228256225586, "learning_rate": 0.0005006894653255272, "loss": 1.8243, "step": 24052 }, { "epoch": 0.8, "grad_norm": 0.4552173614501953, "learning_rate": 0.0005006816962081589, "loss": 1.813, "step": 24053 }, { "epoch": 0.8, "grad_norm": 0.6340399980545044, "learning_rate": 0.0005006739268471928, "loss": 1.8305, "step": 24054 }, { "epoch": 0.8, "grad_norm": 0.4196164608001709, "learning_rate": 0.0005006661572426379, "loss": 1.826, "step": 24055 }, { "epoch": 0.8, "grad_norm": 0.421812504529953, "learning_rate": 0.000500658387394504, "loss": 1.9043, "step": 24056 }, { "epoch": 0.8, "grad_norm": 0.4206804931163788, "learning_rate": 0.0005006506173028004, "loss": 1.8391, "step": 24057 }, { "epoch": 0.8, "grad_norm": 0.4408244490623474, "learning_rate": 0.0005006428469675364, "loss": 1.8438, "step": 24058 }, { "epoch": 0.8, "grad_norm": 0.4505203664302826, "learning_rate": 0.0005006350763887217, "loss": 1.8304, "step": 24059 }, { "epoch": 0.8, "grad_norm": 0.47369110584259033, "learning_rate": 0.0005006273055663654, "loss": 1.8263, "step": 24060 }, { "epoch": 0.8, "grad_norm": 0.4462306499481201, "learning_rate": 0.0005006195345004773, "loss": 1.8406, "step": 24061 }, { "epoch": 0.8, "grad_norm": 0.42966559529304504, "learning_rate": 0.0005006117631910665, "loss": 1.8337, "step": 24062 }, { "epoch": 0.8, "grad_norm": 0.45061272382736206, "learning_rate": 0.0005006039916381427, "loss": 1.8416, "step": 24063 }, { "epoch": 0.8, "grad_norm": 0.450867623090744, "learning_rate": 0.000500596219841715, "loss": 1.796, "step": 24064 }, { "epoch": 0.8, "grad_norm": 0.4528796374797821, "learning_rate": 0.0005005884478017933, "loss": 1.8253, "step": 24065 }, { "epoch": 0.8, "grad_norm": 0.42567524313926697, "learning_rate": 0.0005005806755183867, "loss": 1.7921, "step": 24066 }, { "epoch": 0.8, "grad_norm": 0.45847752690315247, "learning_rate": 0.0005005729029915046, "loss": 1.8285, "step": 24067 }, { "epoch": 0.8, "grad_norm": 0.4287249743938446, "learning_rate": 0.0005005651302211567, "loss": 1.8643, "step": 24068 }, { "epoch": 0.8, "grad_norm": 0.47231972217559814, "learning_rate": 0.0005005573572073522, "loss": 1.7784, "step": 24069 }, { "epoch": 0.8, "grad_norm": 0.4327874779701233, "learning_rate": 0.0005005495839501006, "loss": 1.7491, "step": 24070 }, { "epoch": 0.8, "grad_norm": 0.4379255771636963, "learning_rate": 0.0005005418104494113, "loss": 1.8184, "step": 24071 }, { "epoch": 0.8, "grad_norm": 0.4447363615036011, "learning_rate": 0.0005005340367052939, "loss": 1.8271, "step": 24072 }, { "epoch": 0.8, "grad_norm": 0.4496482312679291, "learning_rate": 0.0005005262627177578, "loss": 1.8886, "step": 24073 }, { "epoch": 0.8, "grad_norm": 0.42423972487449646, "learning_rate": 0.0005005184884868122, "loss": 1.755, "step": 24074 }, { "epoch": 0.8, "grad_norm": 0.4379657208919525, "learning_rate": 0.0005005107140124668, "loss": 1.7597, "step": 24075 }, { "epoch": 0.8, "grad_norm": 0.45129284262657166, "learning_rate": 0.000500502939294731, "loss": 1.8537, "step": 24076 }, { "epoch": 0.8, "grad_norm": 0.4350079596042633, "learning_rate": 0.000500495164333614, "loss": 1.792, "step": 24077 }, { "epoch": 0.8, "grad_norm": 0.4365828335285187, "learning_rate": 0.0005004873891291255, "loss": 1.7466, "step": 24078 }, { "epoch": 0.8, "grad_norm": 0.4337666928768158, "learning_rate": 0.0005004796136812748, "loss": 1.8382, "step": 24079 }, { "epoch": 0.8, "grad_norm": 0.4566630721092224, "learning_rate": 0.0005004718379900714, "loss": 1.8901, "step": 24080 }, { "epoch": 0.8, "grad_norm": 0.43554195761680603, "learning_rate": 0.0005004640620555248, "loss": 1.8414, "step": 24081 }, { "epoch": 0.8, "grad_norm": 0.442999929189682, "learning_rate": 0.0005004562858776443, "loss": 1.8275, "step": 24082 }, { "epoch": 0.8, "grad_norm": 0.4404575526714325, "learning_rate": 0.0005004485094564395, "loss": 1.8546, "step": 24083 }, { "epoch": 0.8, "grad_norm": 0.4422624409198761, "learning_rate": 0.0005004407327919197, "loss": 1.7906, "step": 24084 }, { "epoch": 0.8, "grad_norm": 0.43000349402427673, "learning_rate": 0.0005004329558840943, "loss": 1.8448, "step": 24085 }, { "epoch": 0.8, "grad_norm": 0.9041035175323486, "learning_rate": 0.0005004251787329728, "loss": 1.9443, "step": 24086 }, { "epoch": 0.8, "grad_norm": 0.4374547004699707, "learning_rate": 0.0005004174013385648, "loss": 1.8278, "step": 24087 }, { "epoch": 0.8, "grad_norm": 0.4345872402191162, "learning_rate": 0.0005004096237008795, "loss": 1.8542, "step": 24088 }, { "epoch": 0.8, "grad_norm": 0.4396754503250122, "learning_rate": 0.0005004018458199266, "loss": 1.8512, "step": 24089 }, { "epoch": 0.8, "grad_norm": 0.46501269936561584, "learning_rate": 0.0005003940676957153, "loss": 1.8394, "step": 24090 }, { "epoch": 0.8, "grad_norm": 0.42536085844039917, "learning_rate": 0.0005003862893282551, "loss": 1.8292, "step": 24091 }, { "epoch": 0.8, "grad_norm": 0.4437344968318939, "learning_rate": 0.0005003785107175555, "loss": 1.8727, "step": 24092 }, { "epoch": 0.8, "grad_norm": 0.45687052607536316, "learning_rate": 0.0005003707318636258, "loss": 1.877, "step": 24093 }, { "epoch": 0.8, "grad_norm": 0.4703294336795807, "learning_rate": 0.0005003629527664757, "loss": 1.8879, "step": 24094 }, { "epoch": 0.8, "grad_norm": 0.44960203766822815, "learning_rate": 0.0005003551734261144, "loss": 1.7979, "step": 24095 }, { "epoch": 0.8, "grad_norm": 0.4384036362171173, "learning_rate": 0.0005003473938425515, "loss": 1.7992, "step": 24096 }, { "epoch": 0.8, "grad_norm": 0.4697202444076538, "learning_rate": 0.0005003396140157964, "loss": 1.7667, "step": 24097 }, { "epoch": 0.8, "grad_norm": 0.4543507695198059, "learning_rate": 0.0005003318339458585, "loss": 1.89, "step": 24098 }, { "epoch": 0.8, "grad_norm": 0.4545838534832001, "learning_rate": 0.0005003240536327472, "loss": 1.842, "step": 24099 }, { "epoch": 0.8, "grad_norm": 0.43466052412986755, "learning_rate": 0.0005003162730764721, "loss": 1.8388, "step": 24100 }, { "epoch": 0.8, "grad_norm": 0.4851348102092743, "learning_rate": 0.0005003084922770426, "loss": 1.849, "step": 24101 }, { "epoch": 0.8, "grad_norm": 0.4378919005393982, "learning_rate": 0.000500300711234468, "loss": 1.8149, "step": 24102 }, { "epoch": 0.8, "grad_norm": 0.43555986881256104, "learning_rate": 0.0005002929299487579, "loss": 1.8268, "step": 24103 }, { "epoch": 0.8, "grad_norm": 0.447782963514328, "learning_rate": 0.0005002851484199217, "loss": 1.8497, "step": 24104 }, { "epoch": 0.8, "grad_norm": 0.4376147389411926, "learning_rate": 0.0005002773666479687, "loss": 1.8192, "step": 24105 }, { "epoch": 0.8, "grad_norm": 0.4242609143257141, "learning_rate": 0.0005002695846329087, "loss": 1.8469, "step": 24106 }, { "epoch": 0.8, "grad_norm": 0.4220837652683258, "learning_rate": 0.0005002618023747508, "loss": 1.7435, "step": 24107 }, { "epoch": 0.8, "grad_norm": 0.4291026294231415, "learning_rate": 0.0005002540198735046, "loss": 1.7562, "step": 24108 }, { "epoch": 0.8, "grad_norm": 0.43412747979164124, "learning_rate": 0.0005002462371291797, "loss": 1.8392, "step": 24109 }, { "epoch": 0.8, "grad_norm": 0.4319693446159363, "learning_rate": 0.0005002384541417852, "loss": 1.816, "step": 24110 }, { "epoch": 0.8, "grad_norm": 0.42448312044143677, "learning_rate": 0.0005002306709113307, "loss": 1.857, "step": 24111 }, { "epoch": 0.8, "grad_norm": 0.45035383105278015, "learning_rate": 0.0005002228874378258, "loss": 1.8168, "step": 24112 }, { "epoch": 0.8, "grad_norm": 0.42666518688201904, "learning_rate": 0.0005002151037212796, "loss": 1.8159, "step": 24113 }, { "epoch": 0.8, "grad_norm": 0.45563119649887085, "learning_rate": 0.0005002073197617019, "loss": 1.8134, "step": 24114 }, { "epoch": 0.8, "grad_norm": 0.4304100573062897, "learning_rate": 0.0005001995355591021, "loss": 1.8595, "step": 24115 }, { "epoch": 0.8, "grad_norm": 0.4333271086215973, "learning_rate": 0.0005001917511134895, "loss": 1.8109, "step": 24116 }, { "epoch": 0.8, "grad_norm": 0.435766339302063, "learning_rate": 0.0005001839664248735, "loss": 1.755, "step": 24117 }, { "epoch": 0.8, "grad_norm": 0.43714818358421326, "learning_rate": 0.0005001761814932637, "loss": 1.8137, "step": 24118 }, { "epoch": 0.8, "grad_norm": 0.41915643215179443, "learning_rate": 0.0005001683963186697, "loss": 1.8409, "step": 24119 }, { "epoch": 0.8, "grad_norm": 0.42989516258239746, "learning_rate": 0.0005001606109011006, "loss": 1.8801, "step": 24120 }, { "epoch": 0.8, "grad_norm": 0.44346901774406433, "learning_rate": 0.000500152825240566, "loss": 1.7999, "step": 24121 }, { "epoch": 0.8, "grad_norm": 0.44684189558029175, "learning_rate": 0.0005001450393370754, "loss": 1.8434, "step": 24122 }, { "epoch": 0.8, "grad_norm": 0.43807879090309143, "learning_rate": 0.0005001372531906382, "loss": 1.7936, "step": 24123 }, { "epoch": 0.8, "grad_norm": 0.4496234655380249, "learning_rate": 0.0005001294668012639, "loss": 1.907, "step": 24124 }, { "epoch": 0.8, "grad_norm": 0.4253339469432831, "learning_rate": 0.0005001216801689618, "loss": 1.7993, "step": 24125 }, { "epoch": 0.8, "grad_norm": 0.4346015155315399, "learning_rate": 0.0005001138932937415, "loss": 1.8308, "step": 24126 }, { "epoch": 0.8, "grad_norm": 0.43205565214157104, "learning_rate": 0.0005001061061756125, "loss": 1.732, "step": 24127 }, { "epoch": 0.8, "grad_norm": 0.4330992102622986, "learning_rate": 0.0005000983188145841, "loss": 1.8086, "step": 24128 }, { "epoch": 0.8, "grad_norm": 0.44126102328300476, "learning_rate": 0.000500090531210666, "loss": 1.8757, "step": 24129 }, { "epoch": 0.8, "grad_norm": 0.4486740231513977, "learning_rate": 0.0005000827433638672, "loss": 1.8635, "step": 24130 }, { "epoch": 0.8, "grad_norm": 0.41807031631469727, "learning_rate": 0.0005000749552741976, "loss": 1.7705, "step": 24131 }, { "epoch": 0.8, "grad_norm": 0.44873982667922974, "learning_rate": 0.0005000671669416664, "loss": 1.885, "step": 24132 }, { "epoch": 0.8, "grad_norm": 0.4367630183696747, "learning_rate": 0.0005000593783662833, "loss": 1.8189, "step": 24133 }, { "epoch": 0.8, "grad_norm": 0.43980568647384644, "learning_rate": 0.0005000515895480575, "loss": 1.8133, "step": 24134 }, { "epoch": 0.8, "grad_norm": 0.4234495162963867, "learning_rate": 0.0005000438004869984, "loss": 1.7406, "step": 24135 }, { "epoch": 0.8, "grad_norm": 0.4367624819278717, "learning_rate": 0.0005000360111831158, "loss": 1.885, "step": 24136 }, { "epoch": 0.8, "grad_norm": 0.43032360076904297, "learning_rate": 0.0005000282216364189, "loss": 1.808, "step": 24137 }, { "epoch": 0.8, "grad_norm": 0.43582671880722046, "learning_rate": 0.0005000204318469172, "loss": 1.8478, "step": 24138 }, { "epoch": 0.8, "grad_norm": 0.4362103343009949, "learning_rate": 0.0005000126418146201, "loss": 1.7764, "step": 24139 }, { "epoch": 0.8, "grad_norm": 0.4341312348842621, "learning_rate": 0.0005000048515395372, "loss": 1.8327, "step": 24140 }, { "epoch": 0.8, "grad_norm": 0.4256095588207245, "learning_rate": 0.0004999970610216779, "loss": 1.8058, "step": 24141 }, { "epoch": 0.8, "grad_norm": 0.4324401319026947, "learning_rate": 0.0004999892702610515, "loss": 1.8192, "step": 24142 }, { "epoch": 0.8, "grad_norm": 0.4286469519138336, "learning_rate": 0.0004999814792576678, "loss": 1.8092, "step": 24143 }, { "epoch": 0.8, "grad_norm": 0.43598097562789917, "learning_rate": 0.0004999736880115359, "loss": 1.8905, "step": 24144 }, { "epoch": 0.8, "grad_norm": 0.42916131019592285, "learning_rate": 0.0004999658965226655, "loss": 1.9379, "step": 24145 }, { "epoch": 0.8, "grad_norm": 0.43084266781806946, "learning_rate": 0.0004999581047910659, "loss": 1.7999, "step": 24146 }, { "epoch": 0.8, "grad_norm": 0.4557206928730011, "learning_rate": 0.0004999503128167467, "loss": 1.8285, "step": 24147 }, { "epoch": 0.8, "grad_norm": 0.43544676899909973, "learning_rate": 0.0004999425205997172, "loss": 1.8443, "step": 24148 }, { "epoch": 0.8, "grad_norm": 0.4349460303783417, "learning_rate": 0.0004999347281399869, "loss": 1.9187, "step": 24149 }, { "epoch": 0.8, "grad_norm": 0.44849449396133423, "learning_rate": 0.0004999269354375653, "loss": 1.8237, "step": 24150 }, { "epoch": 0.8, "grad_norm": 0.43441876769065857, "learning_rate": 0.000499919142492462, "loss": 1.843, "step": 24151 }, { "epoch": 0.8, "grad_norm": 0.4540807902812958, "learning_rate": 0.0004999113493046862, "loss": 1.8316, "step": 24152 }, { "epoch": 0.8, "grad_norm": 0.42606258392333984, "learning_rate": 0.0004999035558742475, "loss": 1.8012, "step": 24153 }, { "epoch": 0.8, "grad_norm": 0.44819387793540955, "learning_rate": 0.0004998957622011554, "loss": 1.847, "step": 24154 }, { "epoch": 0.8, "grad_norm": 0.45489969849586487, "learning_rate": 0.0004998879682854193, "loss": 1.7652, "step": 24155 }, { "epoch": 0.8, "grad_norm": 0.4451962411403656, "learning_rate": 0.0004998801741270486, "loss": 1.839, "step": 24156 }, { "epoch": 0.8, "grad_norm": 0.45782455801963806, "learning_rate": 0.0004998723797260527, "loss": 1.8143, "step": 24157 }, { "epoch": 0.8, "grad_norm": 0.4328131079673767, "learning_rate": 0.0004998645850824414, "loss": 1.7503, "step": 24158 }, { "epoch": 0.8, "grad_norm": 0.4368000328540802, "learning_rate": 0.0004998567901962238, "loss": 1.8108, "step": 24159 }, { "epoch": 0.8, "grad_norm": 0.4305550754070282, "learning_rate": 0.0004998489950674095, "loss": 1.8218, "step": 24160 }, { "epoch": 0.8, "grad_norm": 0.4666666090488434, "learning_rate": 0.0004998411996960081, "loss": 1.8627, "step": 24161 }, { "epoch": 0.8, "grad_norm": 0.44446197152137756, "learning_rate": 0.0004998334040820289, "loss": 1.8248, "step": 24162 }, { "epoch": 0.8, "grad_norm": 0.4612729847431183, "learning_rate": 0.0004998256082254812, "loss": 1.8128, "step": 24163 }, { "epoch": 0.8, "grad_norm": 0.43870094418525696, "learning_rate": 0.0004998178121263749, "loss": 1.8262, "step": 24164 }, { "epoch": 0.8, "grad_norm": 0.4260777533054352, "learning_rate": 0.0004998100157847192, "loss": 1.8985, "step": 24165 }, { "epoch": 0.8, "grad_norm": 0.4405929446220398, "learning_rate": 0.0004998022192005234, "loss": 1.8301, "step": 24166 }, { "epoch": 0.8, "grad_norm": 0.43439584970474243, "learning_rate": 0.0004997944223737973, "loss": 1.8513, "step": 24167 }, { "epoch": 0.8, "grad_norm": 0.4398666024208069, "learning_rate": 0.0004997866253045501, "loss": 1.8109, "step": 24168 }, { "epoch": 0.8, "grad_norm": 0.44281089305877686, "learning_rate": 0.0004997788279927915, "loss": 1.903, "step": 24169 }, { "epoch": 0.8, "grad_norm": 0.442427396774292, "learning_rate": 0.0004997710304385307, "loss": 1.8712, "step": 24170 }, { "epoch": 0.8, "grad_norm": 0.43866512179374695, "learning_rate": 0.0004997632326417774, "loss": 1.8419, "step": 24171 }, { "epoch": 0.8, "grad_norm": 0.44170722365379333, "learning_rate": 0.000499755434602541, "loss": 1.7601, "step": 24172 }, { "epoch": 0.8, "grad_norm": 0.4388373792171478, "learning_rate": 0.0004997476363208308, "loss": 1.9488, "step": 24173 }, { "epoch": 0.8, "grad_norm": 0.4301609992980957, "learning_rate": 0.0004997398377966564, "loss": 1.7781, "step": 24174 }, { "epoch": 0.8, "grad_norm": 0.43735837936401367, "learning_rate": 0.0004997320390300274, "loss": 1.8098, "step": 24175 }, { "epoch": 0.8, "grad_norm": 0.4378575086593628, "learning_rate": 0.0004997242400209531, "loss": 1.8777, "step": 24176 }, { "epoch": 0.8, "grad_norm": 0.43187472224235535, "learning_rate": 0.000499716440769443, "loss": 1.8308, "step": 24177 }, { "epoch": 0.8, "grad_norm": 0.42840856313705444, "learning_rate": 0.0004997086412755065, "loss": 1.8238, "step": 24178 }, { "epoch": 0.8, "grad_norm": 0.43382737040519714, "learning_rate": 0.0004997008415391532, "loss": 1.7779, "step": 24179 }, { "epoch": 0.8, "grad_norm": 0.4358721375465393, "learning_rate": 0.0004996930415603924, "loss": 1.8438, "step": 24180 }, { "epoch": 0.8, "grad_norm": 0.423862099647522, "learning_rate": 0.0004996852413392338, "loss": 1.8826, "step": 24181 }, { "epoch": 0.8, "grad_norm": 0.42476657032966614, "learning_rate": 0.0004996774408756867, "loss": 1.8877, "step": 24182 }, { "epoch": 0.8, "grad_norm": 0.43409264087677, "learning_rate": 0.0004996696401697605, "loss": 1.7818, "step": 24183 }, { "epoch": 0.8, "grad_norm": 0.443661093711853, "learning_rate": 0.000499661839221465, "loss": 1.789, "step": 24184 }, { "epoch": 0.8, "grad_norm": 0.45789673924446106, "learning_rate": 0.0004996540380308093, "loss": 1.8045, "step": 24185 }, { "epoch": 0.8, "grad_norm": 0.4269183576107025, "learning_rate": 0.0004996462365978031, "loss": 1.789, "step": 24186 }, { "epoch": 0.8, "grad_norm": 0.42692625522613525, "learning_rate": 0.0004996384349224556, "loss": 1.8657, "step": 24187 }, { "epoch": 0.8, "grad_norm": 0.4396437406539917, "learning_rate": 0.0004996306330047766, "loss": 1.8447, "step": 24188 }, { "epoch": 0.8, "grad_norm": 0.44469597935676575, "learning_rate": 0.0004996228308447755, "loss": 1.8527, "step": 24189 }, { "epoch": 0.8, "grad_norm": 0.4352304935455322, "learning_rate": 0.0004996150284424615, "loss": 1.8332, "step": 24190 }, { "epoch": 0.8, "grad_norm": 0.45642033219337463, "learning_rate": 0.0004996072257978444, "loss": 1.8725, "step": 24191 }, { "epoch": 0.8, "grad_norm": 0.4534394443035126, "learning_rate": 0.0004995994229109335, "loss": 1.7418, "step": 24192 }, { "epoch": 0.8, "grad_norm": 0.44586440920829773, "learning_rate": 0.0004995916197817384, "loss": 1.8546, "step": 24193 }, { "epoch": 0.8, "grad_norm": 0.44573935866355896, "learning_rate": 0.0004995838164102683, "loss": 1.7251, "step": 24194 }, { "epoch": 0.8, "grad_norm": 0.4363340139389038, "learning_rate": 0.000499576012796533, "loss": 1.8194, "step": 24195 }, { "epoch": 0.81, "grad_norm": 0.4379567503929138, "learning_rate": 0.0004995682089405418, "loss": 1.8369, "step": 24196 }, { "epoch": 0.81, "grad_norm": 0.4453180730342865, "learning_rate": 0.000499560404842304, "loss": 1.787, "step": 24197 }, { "epoch": 0.81, "grad_norm": 0.4296753704547882, "learning_rate": 0.0004995526005018296, "loss": 1.7967, "step": 24198 }, { "epoch": 0.81, "grad_norm": 0.4367721378803253, "learning_rate": 0.0004995447959191276, "loss": 1.8713, "step": 24199 }, { "epoch": 0.81, "grad_norm": 0.43839797377586365, "learning_rate": 0.0004995369910942075, "loss": 1.7882, "step": 24200 }, { "epoch": 0.81, "grad_norm": 0.45951345562934875, "learning_rate": 0.0004995291860270791, "loss": 1.7856, "step": 24201 }, { "epoch": 0.81, "grad_norm": 0.44370710849761963, "learning_rate": 0.0004995213807177515, "loss": 1.8049, "step": 24202 }, { "epoch": 0.81, "grad_norm": 0.4184986352920532, "learning_rate": 0.0004995135751662345, "loss": 1.8443, "step": 24203 }, { "epoch": 0.81, "grad_norm": 0.42473042011260986, "learning_rate": 0.0004995057693725373, "loss": 1.8496, "step": 24204 }, { "epoch": 0.81, "grad_norm": 0.4303884506225586, "learning_rate": 0.0004994979633366696, "loss": 1.8246, "step": 24205 }, { "epoch": 0.81, "grad_norm": 0.42532068490982056, "learning_rate": 0.0004994901570586406, "loss": 1.7995, "step": 24206 }, { "epoch": 0.81, "grad_norm": 0.42408841848373413, "learning_rate": 0.0004994823505384601, "loss": 1.7461, "step": 24207 }, { "epoch": 0.81, "grad_norm": 0.43596482276916504, "learning_rate": 0.0004994745437761374, "loss": 1.7641, "step": 24208 }, { "epoch": 0.81, "grad_norm": 0.4389725923538208, "learning_rate": 0.0004994667367716819, "loss": 1.8634, "step": 24209 }, { "epoch": 0.81, "grad_norm": 0.4533979892730713, "learning_rate": 0.0004994589295251032, "loss": 1.8106, "step": 24210 }, { "epoch": 0.81, "grad_norm": 0.43503183126449585, "learning_rate": 0.0004994511220364107, "loss": 1.8181, "step": 24211 }, { "epoch": 0.81, "grad_norm": 0.41779860854148865, "learning_rate": 0.0004994433143056141, "loss": 1.775, "step": 24212 }, { "epoch": 0.81, "grad_norm": 0.441087007522583, "learning_rate": 0.0004994355063327226, "loss": 1.833, "step": 24213 }, { "epoch": 0.81, "grad_norm": 0.4251638948917389, "learning_rate": 0.0004994276981177458, "loss": 1.7265, "step": 24214 }, { "epoch": 0.81, "grad_norm": 0.4298308789730072, "learning_rate": 0.0004994198896606931, "loss": 1.7647, "step": 24215 }, { "epoch": 0.81, "grad_norm": 0.4404739737510681, "learning_rate": 0.000499412080961574, "loss": 1.8487, "step": 24216 }, { "epoch": 0.81, "grad_norm": 0.477922260761261, "learning_rate": 0.0004994042720203981, "loss": 1.8155, "step": 24217 }, { "epoch": 0.81, "grad_norm": 0.43462949991226196, "learning_rate": 0.0004993964628371748, "loss": 1.7615, "step": 24218 }, { "epoch": 0.81, "grad_norm": 0.4473750591278076, "learning_rate": 0.0004993886534119136, "loss": 1.8763, "step": 24219 }, { "epoch": 0.81, "grad_norm": 0.43925487995147705, "learning_rate": 0.0004993808437446239, "loss": 1.7839, "step": 24220 }, { "epoch": 0.81, "grad_norm": 0.43545156717300415, "learning_rate": 0.0004993730338353153, "loss": 1.8715, "step": 24221 }, { "epoch": 0.81, "grad_norm": 0.4229479432106018, "learning_rate": 0.0004993652236839971, "loss": 1.7916, "step": 24222 }, { "epoch": 0.81, "grad_norm": 0.41869404911994934, "learning_rate": 0.000499357413290679, "loss": 1.8371, "step": 24223 }, { "epoch": 0.81, "grad_norm": 0.44376882910728455, "learning_rate": 0.0004993496026553702, "loss": 1.7169, "step": 24224 }, { "epoch": 0.81, "grad_norm": 0.45653221011161804, "learning_rate": 0.0004993417917780805, "loss": 1.7827, "step": 24225 }, { "epoch": 0.81, "grad_norm": 0.4211428463459015, "learning_rate": 0.0004993339806588193, "loss": 1.7788, "step": 24226 }, { "epoch": 0.81, "grad_norm": 0.42920634150505066, "learning_rate": 0.0004993261692975959, "loss": 1.8193, "step": 24227 }, { "epoch": 0.81, "grad_norm": 0.4520058035850525, "learning_rate": 0.0004993183576944199, "loss": 1.7785, "step": 24228 }, { "epoch": 0.81, "grad_norm": 0.44351816177368164, "learning_rate": 0.0004993105458493009, "loss": 1.7728, "step": 24229 }, { "epoch": 0.81, "grad_norm": 0.4432716369628906, "learning_rate": 0.0004993027337622482, "loss": 1.8275, "step": 24230 }, { "epoch": 0.81, "grad_norm": 0.41896694898605347, "learning_rate": 0.0004992949214332712, "loss": 1.7957, "step": 24231 }, { "epoch": 0.81, "grad_norm": 0.44847333431243896, "learning_rate": 0.0004992871088623798, "loss": 1.8025, "step": 24232 }, { "epoch": 0.81, "grad_norm": 0.43212732672691345, "learning_rate": 0.000499279296049583, "loss": 1.7866, "step": 24233 }, { "epoch": 0.81, "grad_norm": 0.4284989833831787, "learning_rate": 0.0004992714829948905, "loss": 1.7672, "step": 24234 }, { "epoch": 0.81, "grad_norm": 0.43458008766174316, "learning_rate": 0.0004992636696983119, "loss": 1.8014, "step": 24235 }, { "epoch": 0.81, "grad_norm": 0.43654853105545044, "learning_rate": 0.0004992558561598565, "loss": 1.8978, "step": 24236 }, { "epoch": 0.81, "grad_norm": 0.43481916189193726, "learning_rate": 0.0004992480423795339, "loss": 1.8066, "step": 24237 }, { "epoch": 0.81, "grad_norm": 0.43691521883010864, "learning_rate": 0.0004992402283573535, "loss": 1.7683, "step": 24238 }, { "epoch": 0.81, "grad_norm": 0.4297814667224884, "learning_rate": 0.0004992324140933248, "loss": 1.7949, "step": 24239 }, { "epoch": 0.81, "grad_norm": 0.4381592869758606, "learning_rate": 0.0004992245995874574, "loss": 1.8564, "step": 24240 }, { "epoch": 0.81, "grad_norm": 0.4583911597728729, "learning_rate": 0.0004992167848397606, "loss": 1.9419, "step": 24241 }, { "epoch": 0.81, "grad_norm": 0.43235963582992554, "learning_rate": 0.000499208969850244, "loss": 1.8163, "step": 24242 }, { "epoch": 0.81, "grad_norm": 0.4200843870639801, "learning_rate": 0.000499201154618917, "loss": 1.82, "step": 24243 }, { "epoch": 0.81, "grad_norm": 0.4286198019981384, "learning_rate": 0.0004991933391457892, "loss": 1.8682, "step": 24244 }, { "epoch": 0.81, "grad_norm": 0.43521276116371155, "learning_rate": 0.00049918552343087, "loss": 1.8128, "step": 24245 }, { "epoch": 0.81, "grad_norm": 0.4336794316768646, "learning_rate": 0.000499177707474169, "loss": 1.775, "step": 24246 }, { "epoch": 0.81, "grad_norm": 0.44950807094573975, "learning_rate": 0.0004991698912756955, "loss": 1.7592, "step": 24247 }, { "epoch": 0.81, "grad_norm": 0.4338010251522064, "learning_rate": 0.0004991620748354593, "loss": 1.9187, "step": 24248 }, { "epoch": 0.81, "grad_norm": 0.42584580183029175, "learning_rate": 0.0004991542581534695, "loss": 1.8224, "step": 24249 }, { "epoch": 0.81, "grad_norm": 0.43446260690689087, "learning_rate": 0.0004991464412297358, "loss": 1.8151, "step": 24250 }, { "epoch": 0.81, "grad_norm": 0.4485401213169098, "learning_rate": 0.0004991386240642677, "loss": 1.8822, "step": 24251 }, { "epoch": 0.81, "grad_norm": 0.43705013394355774, "learning_rate": 0.0004991308066570747, "loss": 1.8091, "step": 24252 }, { "epoch": 0.81, "grad_norm": 0.43992823362350464, "learning_rate": 0.0004991229890081662, "loss": 1.7905, "step": 24253 }, { "epoch": 0.81, "grad_norm": 0.4174157977104187, "learning_rate": 0.0004991151711175517, "loss": 1.8163, "step": 24254 }, { "epoch": 0.81, "grad_norm": 0.4327375590801239, "learning_rate": 0.0004991073529852408, "loss": 1.7626, "step": 24255 }, { "epoch": 0.81, "grad_norm": 0.46971365809440613, "learning_rate": 0.0004990995346112428, "loss": 1.8853, "step": 24256 }, { "epoch": 0.81, "grad_norm": 0.46314114332199097, "learning_rate": 0.0004990917159955673, "loss": 1.8496, "step": 24257 }, { "epoch": 0.81, "grad_norm": 0.9755862951278687, "learning_rate": 0.0004990838971382238, "loss": 1.8469, "step": 24258 }, { "epoch": 0.81, "grad_norm": 0.4634054899215698, "learning_rate": 0.0004990760780392219, "loss": 1.916, "step": 24259 }, { "epoch": 0.81, "grad_norm": 0.4654880166053772, "learning_rate": 0.0004990682586985709, "loss": 1.7723, "step": 24260 }, { "epoch": 0.81, "grad_norm": 0.44907334446907043, "learning_rate": 0.0004990604391162804, "loss": 1.769, "step": 24261 }, { "epoch": 0.81, "grad_norm": 0.45074161887168884, "learning_rate": 0.0004990526192923597, "loss": 1.8229, "step": 24262 }, { "epoch": 0.81, "grad_norm": 0.43997132778167725, "learning_rate": 0.0004990447992268187, "loss": 1.7851, "step": 24263 }, { "epoch": 0.81, "grad_norm": 0.43458935618400574, "learning_rate": 0.0004990369789196665, "loss": 1.8432, "step": 24264 }, { "epoch": 0.81, "grad_norm": 0.44052034616470337, "learning_rate": 0.0004990291583709127, "loss": 1.9093, "step": 24265 }, { "epoch": 0.81, "grad_norm": 0.42570972442626953, "learning_rate": 0.0004990213375805669, "loss": 1.8752, "step": 24266 }, { "epoch": 0.81, "grad_norm": 0.44641441106796265, "learning_rate": 0.0004990135165486385, "loss": 1.8851, "step": 24267 }, { "epoch": 0.81, "grad_norm": 0.43737664818763733, "learning_rate": 0.000499005695275137, "loss": 1.8403, "step": 24268 }, { "epoch": 0.81, "grad_norm": 0.43071243166923523, "learning_rate": 0.0004989978737600719, "loss": 1.7818, "step": 24269 }, { "epoch": 0.81, "grad_norm": 0.4452369809150696, "learning_rate": 0.0004989900520034527, "loss": 1.8866, "step": 24270 }, { "epoch": 0.81, "grad_norm": 0.46723777055740356, "learning_rate": 0.0004989822300052889, "loss": 1.864, "step": 24271 }, { "epoch": 0.81, "grad_norm": 0.4395403563976288, "learning_rate": 0.0004989744077655901, "loss": 1.8503, "step": 24272 }, { "epoch": 0.81, "grad_norm": 0.4217536151409149, "learning_rate": 0.0004989665852843656, "loss": 1.8159, "step": 24273 }, { "epoch": 0.81, "grad_norm": 0.43396738171577454, "learning_rate": 0.0004989587625616249, "loss": 1.7339, "step": 24274 }, { "epoch": 0.81, "grad_norm": 0.446281373500824, "learning_rate": 0.0004989509395973776, "loss": 1.8398, "step": 24275 }, { "epoch": 0.81, "grad_norm": 0.4274408519268036, "learning_rate": 0.0004989431163916333, "loss": 1.8219, "step": 24276 }, { "epoch": 0.81, "grad_norm": 0.44028910994529724, "learning_rate": 0.0004989352929444013, "loss": 1.8094, "step": 24277 }, { "epoch": 0.81, "grad_norm": 0.4400925636291504, "learning_rate": 0.0004989274692556912, "loss": 1.8291, "step": 24278 }, { "epoch": 0.81, "grad_norm": 0.4267245829105377, "learning_rate": 0.0004989196453255123, "loss": 1.8331, "step": 24279 }, { "epoch": 0.81, "grad_norm": 0.44162318110466003, "learning_rate": 0.0004989118211538745, "loss": 1.8107, "step": 24280 }, { "epoch": 0.81, "grad_norm": 0.4427354633808136, "learning_rate": 0.0004989039967407869, "loss": 1.8114, "step": 24281 }, { "epoch": 0.81, "grad_norm": 0.4264518916606903, "learning_rate": 0.0004988961720862593, "loss": 1.7776, "step": 24282 }, { "epoch": 0.81, "grad_norm": 0.4334586262702942, "learning_rate": 0.0004988883471903009, "loss": 1.8052, "step": 24283 }, { "epoch": 0.81, "grad_norm": 0.43473100662231445, "learning_rate": 0.0004988805220529213, "loss": 1.8485, "step": 24284 }, { "epoch": 0.81, "grad_norm": 0.43439698219299316, "learning_rate": 0.0004988726966741302, "loss": 1.8198, "step": 24285 }, { "epoch": 0.81, "grad_norm": 0.43552443385124207, "learning_rate": 0.0004988648710539369, "loss": 1.8177, "step": 24286 }, { "epoch": 0.81, "grad_norm": 0.43505555391311646, "learning_rate": 0.000498857045192351, "loss": 1.8022, "step": 24287 }, { "epoch": 0.81, "grad_norm": 0.4342060983181, "learning_rate": 0.0004988492190893818, "loss": 1.8317, "step": 24288 }, { "epoch": 0.81, "grad_norm": 0.436268150806427, "learning_rate": 0.000498841392745039, "loss": 1.9414, "step": 24289 }, { "epoch": 0.81, "grad_norm": 0.4419306814670563, "learning_rate": 0.0004988335661593322, "loss": 1.8154, "step": 24290 }, { "epoch": 0.81, "grad_norm": 0.4548443853855133, "learning_rate": 0.0004988257393322707, "loss": 1.8238, "step": 24291 }, { "epoch": 0.81, "grad_norm": 0.4343705177307129, "learning_rate": 0.000498817912263864, "loss": 1.8353, "step": 24292 }, { "epoch": 0.81, "grad_norm": 0.4438428580760956, "learning_rate": 0.0004988100849541216, "loss": 1.8072, "step": 24293 }, { "epoch": 0.81, "grad_norm": 0.43712320923805237, "learning_rate": 0.0004988022574030531, "loss": 1.8375, "step": 24294 }, { "epoch": 0.81, "grad_norm": 0.4490646421909332, "learning_rate": 0.0004987944296106679, "loss": 1.7845, "step": 24295 }, { "epoch": 0.81, "grad_norm": 0.4262683689594269, "learning_rate": 0.0004987866015769756, "loss": 1.8383, "step": 24296 }, { "epoch": 0.81, "grad_norm": 0.43857309222221375, "learning_rate": 0.0004987787733019857, "loss": 1.7703, "step": 24297 }, { "epoch": 0.81, "grad_norm": 0.44397562742233276, "learning_rate": 0.0004987709447857075, "loss": 1.8264, "step": 24298 }, { "epoch": 0.81, "grad_norm": 0.433786541223526, "learning_rate": 0.0004987631160281508, "loss": 1.8125, "step": 24299 }, { "epoch": 0.81, "grad_norm": 0.4375405013561249, "learning_rate": 0.0004987552870293248, "loss": 1.8077, "step": 24300 }, { "epoch": 0.81, "grad_norm": 0.4377029538154602, "learning_rate": 0.0004987474577892394, "loss": 1.8478, "step": 24301 }, { "epoch": 0.81, "grad_norm": 0.42412135004997253, "learning_rate": 0.0004987396283079037, "loss": 1.7796, "step": 24302 }, { "epoch": 0.81, "grad_norm": 0.427945613861084, "learning_rate": 0.0004987317985853275, "loss": 1.822, "step": 24303 }, { "epoch": 0.81, "grad_norm": 0.44231152534484863, "learning_rate": 0.0004987239686215201, "loss": 1.8057, "step": 24304 }, { "epoch": 0.81, "grad_norm": 0.431115061044693, "learning_rate": 0.0004987161384164911, "loss": 1.7857, "step": 24305 }, { "epoch": 0.81, "grad_norm": 0.4199386239051819, "learning_rate": 0.00049870830797025, "loss": 1.8141, "step": 24306 }, { "epoch": 0.81, "grad_norm": 0.46119827032089233, "learning_rate": 0.0004987004772828063, "loss": 1.8418, "step": 24307 }, { "epoch": 0.81, "grad_norm": 0.416233628988266, "learning_rate": 0.0004986926463541694, "loss": 1.8689, "step": 24308 }, { "epoch": 0.81, "grad_norm": 0.41412553191185, "learning_rate": 0.0004986848151843491, "loss": 1.8593, "step": 24309 }, { "epoch": 0.81, "grad_norm": 0.4503810703754425, "learning_rate": 0.0004986769837733545, "loss": 1.8565, "step": 24310 }, { "epoch": 0.81, "grad_norm": 0.4406494200229645, "learning_rate": 0.0004986691521211955, "loss": 1.7763, "step": 24311 }, { "epoch": 0.81, "grad_norm": 0.4266161322593689, "learning_rate": 0.0004986613202278812, "loss": 1.8315, "step": 24312 }, { "epoch": 0.81, "grad_norm": 0.4307381510734558, "learning_rate": 0.0004986534880934215, "loss": 1.7817, "step": 24313 }, { "epoch": 0.81, "grad_norm": 0.4276275038719177, "learning_rate": 0.0004986456557178257, "loss": 1.7634, "step": 24314 }, { "epoch": 0.81, "grad_norm": 0.4454764723777771, "learning_rate": 0.0004986378231011034, "loss": 1.8332, "step": 24315 }, { "epoch": 0.81, "grad_norm": 0.45888713002204895, "learning_rate": 0.0004986299902432639, "loss": 1.8962, "step": 24316 }, { "epoch": 0.81, "grad_norm": 0.4308088421821594, "learning_rate": 0.000498622157144317, "loss": 1.8022, "step": 24317 }, { "epoch": 0.81, "grad_norm": 0.4372214376926422, "learning_rate": 0.000498614323804272, "loss": 1.8689, "step": 24318 }, { "epoch": 0.81, "grad_norm": 0.43149399757385254, "learning_rate": 0.0004986064902231384, "loss": 1.7672, "step": 24319 }, { "epoch": 0.81, "grad_norm": 0.43147894740104675, "learning_rate": 0.000498598656400926, "loss": 1.8318, "step": 24320 }, { "epoch": 0.81, "grad_norm": 0.42686641216278076, "learning_rate": 0.0004985908223376439, "loss": 1.8544, "step": 24321 }, { "epoch": 0.81, "grad_norm": 0.4310588538646698, "learning_rate": 0.0004985829880333018, "loss": 1.7642, "step": 24322 }, { "epoch": 0.81, "grad_norm": 0.4345501959323883, "learning_rate": 0.0004985751534879094, "loss": 1.8496, "step": 24323 }, { "epoch": 0.81, "grad_norm": 0.4454340636730194, "learning_rate": 0.0004985673187014759, "loss": 1.8522, "step": 24324 }, { "epoch": 0.81, "grad_norm": 0.41724592447280884, "learning_rate": 0.0004985594836740109, "loss": 1.8108, "step": 24325 }, { "epoch": 0.81, "grad_norm": 0.4368675649166107, "learning_rate": 0.0004985516484055239, "loss": 1.9287, "step": 24326 }, { "epoch": 0.81, "grad_norm": 0.4514259696006775, "learning_rate": 0.0004985438128960246, "loss": 1.7608, "step": 24327 }, { "epoch": 0.81, "grad_norm": 0.4157147705554962, "learning_rate": 0.0004985359771455223, "loss": 1.7998, "step": 24328 }, { "epoch": 0.81, "grad_norm": 0.4474613070487976, "learning_rate": 0.0004985281411540264, "loss": 1.9478, "step": 24329 }, { "epoch": 0.81, "grad_norm": 0.43392854928970337, "learning_rate": 0.0004985203049215469, "loss": 1.8137, "step": 24330 }, { "epoch": 0.81, "grad_norm": 0.44566836953163147, "learning_rate": 0.0004985124684480929, "loss": 1.8453, "step": 24331 }, { "epoch": 0.81, "grad_norm": 0.43146780133247375, "learning_rate": 0.0004985046317336739, "loss": 1.9193, "step": 24332 }, { "epoch": 0.81, "grad_norm": 0.43123191595077515, "learning_rate": 0.0004984967947782997, "loss": 1.7852, "step": 24333 }, { "epoch": 0.81, "grad_norm": 0.4241643249988556, "learning_rate": 0.0004984889575819795, "loss": 1.8092, "step": 24334 }, { "epoch": 0.81, "grad_norm": 0.44013336300849915, "learning_rate": 0.0004984811201447229, "loss": 1.8171, "step": 24335 }, { "epoch": 0.81, "grad_norm": 0.4332360029220581, "learning_rate": 0.0004984732824665396, "loss": 1.8277, "step": 24336 }, { "epoch": 0.81, "grad_norm": 0.42620864510536194, "learning_rate": 0.0004984654445474389, "loss": 1.8329, "step": 24337 }, { "epoch": 0.81, "grad_norm": 0.43289482593536377, "learning_rate": 0.0004984576063874305, "loss": 1.8612, "step": 24338 }, { "epoch": 0.81, "grad_norm": 0.43431657552719116, "learning_rate": 0.0004984497679865238, "loss": 1.8344, "step": 24339 }, { "epoch": 0.81, "grad_norm": 0.44184452295303345, "learning_rate": 0.0004984419293447281, "loss": 1.8162, "step": 24340 }, { "epoch": 0.81, "grad_norm": 0.4492318332195282, "learning_rate": 0.0004984340904620534, "loss": 1.8509, "step": 24341 }, { "epoch": 0.81, "grad_norm": 0.45002269744873047, "learning_rate": 0.0004984262513385088, "loss": 1.8345, "step": 24342 }, { "epoch": 0.81, "grad_norm": 0.4361887276172638, "learning_rate": 0.000498418411974104, "loss": 1.7526, "step": 24343 }, { "epoch": 0.81, "grad_norm": 0.45376890897750854, "learning_rate": 0.0004984105723688485, "loss": 1.862, "step": 24344 }, { "epoch": 0.81, "grad_norm": 1.5949833393096924, "learning_rate": 0.0004984027325227518, "loss": 1.9083, "step": 24345 }, { "epoch": 0.81, "grad_norm": 0.4554360806941986, "learning_rate": 0.0004983948924358234, "loss": 1.7713, "step": 24346 }, { "epoch": 0.81, "grad_norm": 0.45524081587791443, "learning_rate": 0.0004983870521080728, "loss": 1.7857, "step": 24347 }, { "epoch": 0.81, "grad_norm": 0.46917426586151123, "learning_rate": 0.0004983792115395096, "loss": 1.8111, "step": 24348 }, { "epoch": 0.81, "grad_norm": 0.5248304009437561, "learning_rate": 0.0004983713707301433, "loss": 1.8073, "step": 24349 }, { "epoch": 0.81, "grad_norm": 0.459561824798584, "learning_rate": 0.0004983635296799833, "loss": 1.8099, "step": 24350 }, { "epoch": 0.81, "grad_norm": 0.43883588910102844, "learning_rate": 0.0004983556883890393, "loss": 1.8555, "step": 24351 }, { "epoch": 0.81, "grad_norm": 0.43800801038742065, "learning_rate": 0.0004983478468573206, "loss": 1.8083, "step": 24352 }, { "epoch": 0.81, "grad_norm": 0.48858851194381714, "learning_rate": 0.000498340005084837, "loss": 1.8687, "step": 24353 }, { "epoch": 0.81, "grad_norm": 0.46211206912994385, "learning_rate": 0.0004983321630715977, "loss": 1.8686, "step": 24354 }, { "epoch": 0.81, "grad_norm": 0.42241060733795166, "learning_rate": 0.0004983243208176125, "loss": 1.8081, "step": 24355 }, { "epoch": 0.81, "grad_norm": 0.45770028233528137, "learning_rate": 0.0004983164783228907, "loss": 1.771, "step": 24356 }, { "epoch": 0.81, "grad_norm": 0.4467722475528717, "learning_rate": 0.0004983086355874419, "loss": 1.8428, "step": 24357 }, { "epoch": 0.81, "grad_norm": 0.4444597363471985, "learning_rate": 0.0004983007926112758, "loss": 1.8563, "step": 24358 }, { "epoch": 0.81, "grad_norm": 0.4321969449520111, "learning_rate": 0.0004982929493944015, "loss": 1.7886, "step": 24359 }, { "epoch": 0.81, "grad_norm": 0.4664849638938904, "learning_rate": 0.000498285105936829, "loss": 1.8696, "step": 24360 }, { "epoch": 0.81, "grad_norm": 0.45296555757522583, "learning_rate": 0.0004982772622385674, "loss": 1.8484, "step": 24361 }, { "epoch": 0.81, "grad_norm": 0.44054871797561646, "learning_rate": 0.0004982694182996267, "loss": 1.76, "step": 24362 }, { "epoch": 0.81, "grad_norm": 0.5323629379272461, "learning_rate": 0.0004982615741200159, "loss": 1.8346, "step": 24363 }, { "epoch": 0.81, "grad_norm": 0.42956966161727905, "learning_rate": 0.0004982537296997448, "loss": 1.8281, "step": 24364 }, { "epoch": 0.81, "grad_norm": 0.709787905216217, "learning_rate": 0.000498245885038823, "loss": 1.7785, "step": 24365 }, { "epoch": 0.81, "grad_norm": 0.4272986650466919, "learning_rate": 0.0004982380401372597, "loss": 1.8158, "step": 24366 }, { "epoch": 0.81, "grad_norm": 0.4253421723842621, "learning_rate": 0.0004982301949950648, "loss": 1.7738, "step": 24367 }, { "epoch": 0.81, "grad_norm": 0.43539556860923767, "learning_rate": 0.0004982223496122477, "loss": 1.8004, "step": 24368 }, { "epoch": 0.81, "grad_norm": 0.4394170045852661, "learning_rate": 0.0004982145039888177, "loss": 1.8072, "step": 24369 }, { "epoch": 0.81, "grad_norm": 0.43666741251945496, "learning_rate": 0.0004982066581247847, "loss": 1.8056, "step": 24370 }, { "epoch": 0.81, "grad_norm": 0.4333725869655609, "learning_rate": 0.0004981988120201579, "loss": 1.8639, "step": 24371 }, { "epoch": 0.81, "grad_norm": 0.44707703590393066, "learning_rate": 0.0004981909656749468, "loss": 1.8364, "step": 24372 }, { "epoch": 0.81, "grad_norm": 0.4458482563495636, "learning_rate": 0.0004981831190891613, "loss": 1.8411, "step": 24373 }, { "epoch": 0.81, "grad_norm": 0.44281458854675293, "learning_rate": 0.0004981752722628106, "loss": 1.857, "step": 24374 }, { "epoch": 0.81, "grad_norm": 0.4465867578983307, "learning_rate": 0.0004981674251959043, "loss": 1.8122, "step": 24375 }, { "epoch": 0.81, "grad_norm": 0.4252590537071228, "learning_rate": 0.000498159577888452, "loss": 1.7892, "step": 24376 }, { "epoch": 0.81, "grad_norm": 0.4410470128059387, "learning_rate": 0.0004981517303404631, "loss": 1.8076, "step": 24377 }, { "epoch": 0.81, "grad_norm": 0.43318644165992737, "learning_rate": 0.0004981438825519473, "loss": 1.7829, "step": 24378 }, { "epoch": 0.81, "grad_norm": 0.4363451302051544, "learning_rate": 0.000498136034522914, "loss": 1.8097, "step": 24379 }, { "epoch": 0.81, "grad_norm": 0.46472740173339844, "learning_rate": 0.0004981281862533727, "loss": 1.8611, "step": 24380 }, { "epoch": 0.81, "grad_norm": 0.4379580318927765, "learning_rate": 0.0004981203377433329, "loss": 1.7834, "step": 24381 }, { "epoch": 0.81, "grad_norm": 0.42192256450653076, "learning_rate": 0.0004981124889928044, "loss": 1.8161, "step": 24382 }, { "epoch": 0.81, "grad_norm": 0.43208861351013184, "learning_rate": 0.0004981046400017963, "loss": 1.8381, "step": 24383 }, { "epoch": 0.81, "grad_norm": 0.43817469477653503, "learning_rate": 0.0004980967907703184, "loss": 1.8308, "step": 24384 }, { "epoch": 0.81, "grad_norm": 0.43775853514671326, "learning_rate": 0.0004980889412983803, "loss": 1.8509, "step": 24385 }, { "epoch": 0.81, "grad_norm": 0.43689286708831787, "learning_rate": 0.0004980810915859914, "loss": 1.8474, "step": 24386 }, { "epoch": 0.81, "grad_norm": 0.4335862994194031, "learning_rate": 0.0004980732416331612, "loss": 1.827, "step": 24387 }, { "epoch": 0.81, "grad_norm": 0.42765629291534424, "learning_rate": 0.0004980653914398993, "loss": 1.7912, "step": 24388 }, { "epoch": 0.81, "grad_norm": 0.42970868945121765, "learning_rate": 0.0004980575410062151, "loss": 1.8317, "step": 24389 }, { "epoch": 0.81, "grad_norm": 0.44418859481811523, "learning_rate": 0.0004980496903321184, "loss": 1.7501, "step": 24390 }, { "epoch": 0.81, "grad_norm": 0.45949098467826843, "learning_rate": 0.0004980418394176184, "loss": 1.8048, "step": 24391 }, { "epoch": 0.81, "grad_norm": 0.4445611536502838, "learning_rate": 0.0004980339882627248, "loss": 1.8611, "step": 24392 }, { "epoch": 0.81, "grad_norm": 0.42907413840293884, "learning_rate": 0.0004980261368674473, "loss": 1.8101, "step": 24393 }, { "epoch": 0.81, "grad_norm": 0.42729437351226807, "learning_rate": 0.000498018285231795, "loss": 1.8751, "step": 24394 }, { "epoch": 0.81, "grad_norm": 0.4141031801700592, "learning_rate": 0.0004980104333557778, "loss": 1.8411, "step": 24395 }, { "epoch": 0.81, "grad_norm": 0.4295829236507416, "learning_rate": 0.0004980025812394051, "loss": 1.8492, "step": 24396 }, { "epoch": 0.81, "grad_norm": 0.43088269233703613, "learning_rate": 0.0004979947288826864, "loss": 1.8179, "step": 24397 }, { "epoch": 0.81, "grad_norm": 0.42838001251220703, "learning_rate": 0.0004979868762856315, "loss": 1.7389, "step": 24398 }, { "epoch": 0.81, "grad_norm": 0.4395155608654022, "learning_rate": 0.0004979790234482494, "loss": 1.7702, "step": 24399 }, { "epoch": 0.81, "grad_norm": 0.42888081073760986, "learning_rate": 0.00049797117037055, "loss": 1.7837, "step": 24400 }, { "epoch": 0.81, "grad_norm": 0.4297400414943695, "learning_rate": 0.0004979633170525429, "loss": 1.8673, "step": 24401 }, { "epoch": 0.81, "grad_norm": 0.4405592381954193, "learning_rate": 0.0004979554634942374, "loss": 1.8493, "step": 24402 }, { "epoch": 0.81, "grad_norm": 0.4429483413696289, "learning_rate": 0.0004979476096956431, "loss": 1.8234, "step": 24403 }, { "epoch": 0.81, "grad_norm": 0.4287540316581726, "learning_rate": 0.0004979397556567697, "loss": 1.8289, "step": 24404 }, { "epoch": 0.81, "grad_norm": 0.4320976436138153, "learning_rate": 0.0004979319013776265, "loss": 1.7854, "step": 24405 }, { "epoch": 0.81, "grad_norm": 0.4278927743434906, "learning_rate": 0.0004979240468582232, "loss": 1.8052, "step": 24406 }, { "epoch": 0.81, "grad_norm": 0.44675007462501526, "learning_rate": 0.0004979161920985692, "loss": 1.8723, "step": 24407 }, { "epoch": 0.81, "grad_norm": 0.4266309142112732, "learning_rate": 0.0004979083370986742, "loss": 1.8111, "step": 24408 }, { "epoch": 0.81, "grad_norm": 0.4268650710582733, "learning_rate": 0.0004979004818585474, "loss": 1.7367, "step": 24409 }, { "epoch": 0.81, "grad_norm": 0.42898333072662354, "learning_rate": 0.0004978926263781989, "loss": 1.8089, "step": 24410 }, { "epoch": 0.81, "grad_norm": 0.42508652806282043, "learning_rate": 0.0004978847706576377, "loss": 1.822, "step": 24411 }, { "epoch": 0.81, "grad_norm": 0.4335891306400299, "learning_rate": 0.0004978769146968737, "loss": 1.8983, "step": 24412 }, { "epoch": 0.81, "grad_norm": 0.44173920154571533, "learning_rate": 0.0004978690584959162, "loss": 1.8288, "step": 24413 }, { "epoch": 0.81, "grad_norm": 0.44195041060447693, "learning_rate": 0.0004978612020547748, "loss": 1.8276, "step": 24414 }, { "epoch": 0.81, "grad_norm": 0.4339602291584015, "learning_rate": 0.000497853345373459, "loss": 1.8025, "step": 24415 }, { "epoch": 0.81, "grad_norm": 0.43337780237197876, "learning_rate": 0.0004978454884519784, "loss": 1.8375, "step": 24416 }, { "epoch": 0.81, "grad_norm": 0.43740612268447876, "learning_rate": 0.0004978376312903426, "loss": 1.7662, "step": 24417 }, { "epoch": 0.81, "grad_norm": 0.4341183602809906, "learning_rate": 0.0004978297738885611, "loss": 1.8037, "step": 24418 }, { "epoch": 0.81, "grad_norm": 0.442432165145874, "learning_rate": 0.0004978219162466433, "loss": 1.7626, "step": 24419 }, { "epoch": 0.81, "grad_norm": 0.4362519681453705, "learning_rate": 0.0004978140583645988, "loss": 1.7153, "step": 24420 }, { "epoch": 0.81, "grad_norm": 0.4338573217391968, "learning_rate": 0.0004978062002424374, "loss": 1.8331, "step": 24421 }, { "epoch": 0.81, "grad_norm": 0.42576420307159424, "learning_rate": 0.0004977983418801682, "loss": 1.7421, "step": 24422 }, { "epoch": 0.81, "grad_norm": 0.4327487349510193, "learning_rate": 0.000497790483277801, "loss": 1.9245, "step": 24423 }, { "epoch": 0.81, "grad_norm": 0.4421479105949402, "learning_rate": 0.0004977826244353454, "loss": 1.7607, "step": 24424 }, { "epoch": 0.81, "grad_norm": 0.45799967646598816, "learning_rate": 0.0004977747653528108, "loss": 1.8493, "step": 24425 }, { "epoch": 0.81, "grad_norm": 0.4522039294242859, "learning_rate": 0.0004977669060302068, "loss": 1.814, "step": 24426 }, { "epoch": 0.81, "grad_norm": 0.4348258376121521, "learning_rate": 0.0004977590464675429, "loss": 1.7681, "step": 24427 }, { "epoch": 0.81, "grad_norm": 0.4378761649131775, "learning_rate": 0.0004977511866648286, "loss": 1.8744, "step": 24428 }, { "epoch": 0.81, "grad_norm": 0.47290703654289246, "learning_rate": 0.0004977433266220734, "loss": 1.7435, "step": 24429 }, { "epoch": 0.81, "grad_norm": 0.4374184310436249, "learning_rate": 0.0004977354663392872, "loss": 1.8128, "step": 24430 }, { "epoch": 0.81, "grad_norm": 0.43159955739974976, "learning_rate": 0.000497727605816479, "loss": 1.798, "step": 24431 }, { "epoch": 0.81, "grad_norm": 0.4503467082977295, "learning_rate": 0.0004977197450536588, "loss": 1.8109, "step": 24432 }, { "epoch": 0.81, "grad_norm": 0.43874961137771606, "learning_rate": 0.000497711884050836, "loss": 1.7843, "step": 24433 }, { "epoch": 0.81, "grad_norm": 0.43906092643737793, "learning_rate": 0.00049770402280802, "loss": 1.8729, "step": 24434 }, { "epoch": 0.81, "grad_norm": 0.44863855838775635, "learning_rate": 0.0004976961613252205, "loss": 1.8485, "step": 24435 }, { "epoch": 0.81, "grad_norm": 0.425512433052063, "learning_rate": 0.0004976882996024468, "loss": 1.7838, "step": 24436 }, { "epoch": 0.81, "grad_norm": 0.4467480778694153, "learning_rate": 0.0004976804376397089, "loss": 1.8904, "step": 24437 }, { "epoch": 0.81, "grad_norm": 0.4658227562904358, "learning_rate": 0.0004976725754370161, "loss": 1.8757, "step": 24438 }, { "epoch": 0.81, "grad_norm": 0.4335552155971527, "learning_rate": 0.0004976647129943776, "loss": 1.8032, "step": 24439 }, { "epoch": 0.81, "grad_norm": 0.43873733282089233, "learning_rate": 0.0004976568503118035, "loss": 1.8291, "step": 24440 }, { "epoch": 0.81, "grad_norm": 0.4707601070404053, "learning_rate": 0.0004976489873893031, "loss": 1.8399, "step": 24441 }, { "epoch": 0.81, "grad_norm": 0.4574353098869324, "learning_rate": 0.0004976411242268859, "loss": 1.8848, "step": 24442 }, { "epoch": 0.81, "grad_norm": 0.45317956805229187, "learning_rate": 0.0004976332608245614, "loss": 1.8053, "step": 24443 }, { "epoch": 0.81, "grad_norm": 0.4321722388267517, "learning_rate": 0.0004976253971823393, "loss": 1.8743, "step": 24444 }, { "epoch": 0.81, "grad_norm": 0.43893733620643616, "learning_rate": 0.0004976175333002292, "loss": 1.8323, "step": 24445 }, { "epoch": 0.81, "grad_norm": 0.4472908079624176, "learning_rate": 0.0004976096691782405, "loss": 1.9132, "step": 24446 }, { "epoch": 0.81, "grad_norm": 0.45226338505744934, "learning_rate": 0.0004976018048163827, "loss": 1.838, "step": 24447 }, { "epoch": 0.81, "grad_norm": 0.4350663423538208, "learning_rate": 0.0004975939402146655, "loss": 1.7867, "step": 24448 }, { "epoch": 0.81, "grad_norm": 0.4338635802268982, "learning_rate": 0.0004975860753730983, "loss": 1.7367, "step": 24449 }, { "epoch": 0.81, "grad_norm": 0.47255390882492065, "learning_rate": 0.0004975782102916908, "loss": 1.7522, "step": 24450 }, { "epoch": 0.81, "grad_norm": 0.4824554920196533, "learning_rate": 0.0004975703449704524, "loss": 1.9637, "step": 24451 }, { "epoch": 0.81, "grad_norm": 0.4600900411605835, "learning_rate": 0.0004975624794093928, "loss": 1.8823, "step": 24452 }, { "epoch": 0.81, "grad_norm": 0.47352248430252075, "learning_rate": 0.0004975546136085213, "loss": 1.8185, "step": 24453 }, { "epoch": 0.81, "grad_norm": 0.43676742911338806, "learning_rate": 0.0004975467475678478, "loss": 1.9, "step": 24454 }, { "epoch": 0.81, "grad_norm": 0.43555912375450134, "learning_rate": 0.0004975388812873815, "loss": 1.8791, "step": 24455 }, { "epoch": 0.81, "grad_norm": 0.44374123215675354, "learning_rate": 0.0004975310147671321, "loss": 1.8301, "step": 24456 }, { "epoch": 0.81, "grad_norm": 0.4322371780872345, "learning_rate": 0.0004975231480071092, "loss": 1.7746, "step": 24457 }, { "epoch": 0.81, "grad_norm": 0.4409717321395874, "learning_rate": 0.0004975152810073224, "loss": 1.8793, "step": 24458 }, { "epoch": 0.81, "grad_norm": 0.4372721314430237, "learning_rate": 0.000497507413767781, "loss": 1.7617, "step": 24459 }, { "epoch": 0.81, "grad_norm": 0.42920228838920593, "learning_rate": 0.0004974995462884948, "loss": 1.849, "step": 24460 }, { "epoch": 0.81, "grad_norm": 0.4347635805606842, "learning_rate": 0.0004974916785694732, "loss": 1.8441, "step": 24461 }, { "epoch": 0.81, "grad_norm": 0.4331646263599396, "learning_rate": 0.0004974838106107258, "loss": 1.8523, "step": 24462 }, { "epoch": 0.81, "grad_norm": 0.4805443286895752, "learning_rate": 0.0004974759424122621, "loss": 1.7771, "step": 24463 }, { "epoch": 0.81, "grad_norm": 0.44466984272003174, "learning_rate": 0.0004974680739740919, "loss": 1.8461, "step": 24464 }, { "epoch": 0.81, "grad_norm": 0.42742300033569336, "learning_rate": 0.0004974602052962244, "loss": 1.8486, "step": 24465 }, { "epoch": 0.81, "grad_norm": 0.4405030906200409, "learning_rate": 0.0004974523363786693, "loss": 1.9164, "step": 24466 }, { "epoch": 0.81, "grad_norm": 0.42960742115974426, "learning_rate": 0.0004974444672214361, "loss": 1.8479, "step": 24467 }, { "epoch": 0.81, "grad_norm": 0.4445459544658661, "learning_rate": 0.0004974365978245345, "loss": 1.8187, "step": 24468 }, { "epoch": 0.81, "grad_norm": 0.4466681480407715, "learning_rate": 0.0004974287281879741, "loss": 1.8562, "step": 24469 }, { "epoch": 0.81, "grad_norm": 0.44260841608047485, "learning_rate": 0.0004974208583117642, "loss": 1.8261, "step": 24470 }, { "epoch": 0.81, "grad_norm": 0.4305431842803955, "learning_rate": 0.0004974129881959143, "loss": 1.8066, "step": 24471 }, { "epoch": 0.81, "grad_norm": 0.43271586298942566, "learning_rate": 0.0004974051178404343, "loss": 1.7845, "step": 24472 }, { "epoch": 0.81, "grad_norm": 0.4277677834033966, "learning_rate": 0.0004973972472453335, "loss": 1.8064, "step": 24473 }, { "epoch": 0.81, "grad_norm": 0.45140230655670166, "learning_rate": 0.0004973893764106216, "loss": 1.7904, "step": 24474 }, { "epoch": 0.81, "grad_norm": 0.4513813257217407, "learning_rate": 0.000497381505336308, "loss": 1.8471, "step": 24475 }, { "epoch": 0.81, "grad_norm": 0.4241681396961212, "learning_rate": 0.0004973736340224024, "loss": 1.7948, "step": 24476 }, { "epoch": 0.81, "grad_norm": 0.4350779354572296, "learning_rate": 0.0004973657624689142, "loss": 1.8399, "step": 24477 }, { "epoch": 0.81, "grad_norm": 0.44153866171836853, "learning_rate": 0.0004973578906758531, "loss": 1.8088, "step": 24478 }, { "epoch": 0.81, "grad_norm": 0.4481884241104126, "learning_rate": 0.0004973500186432287, "loss": 1.8238, "step": 24479 }, { "epoch": 0.81, "grad_norm": 1.1205918788909912, "learning_rate": 0.0004973421463710504, "loss": 1.8236, "step": 24480 }, { "epoch": 0.81, "grad_norm": 0.42700913548469543, "learning_rate": 0.0004973342738593277, "loss": 1.8145, "step": 24481 }, { "epoch": 0.81, "grad_norm": 0.42822059988975525, "learning_rate": 0.0004973264011080703, "loss": 1.7988, "step": 24482 }, { "epoch": 0.81, "grad_norm": 0.4550127387046814, "learning_rate": 0.0004973185281172878, "loss": 1.7747, "step": 24483 }, { "epoch": 0.81, "grad_norm": 0.42416492104530334, "learning_rate": 0.0004973106548869897, "loss": 1.8142, "step": 24484 }, { "epoch": 0.81, "grad_norm": 0.4378978908061981, "learning_rate": 0.0004973027814171854, "loss": 1.7759, "step": 24485 }, { "epoch": 0.81, "grad_norm": 0.43456676602363586, "learning_rate": 0.0004972949077078848, "loss": 1.7301, "step": 24486 }, { "epoch": 0.81, "grad_norm": 0.4429720342159271, "learning_rate": 0.0004972870337590971, "loss": 1.8163, "step": 24487 }, { "epoch": 0.81, "grad_norm": 0.4352876543998718, "learning_rate": 0.000497279159570832, "loss": 1.753, "step": 24488 }, { "epoch": 0.81, "grad_norm": 0.4417855739593506, "learning_rate": 0.0004972712851430991, "loss": 1.8701, "step": 24489 }, { "epoch": 0.81, "grad_norm": 0.45527079701423645, "learning_rate": 0.000497263410475908, "loss": 1.8298, "step": 24490 }, { "epoch": 0.81, "grad_norm": 0.4476654529571533, "learning_rate": 0.000497255535569268, "loss": 1.741, "step": 24491 }, { "epoch": 0.81, "grad_norm": 0.4359451234340668, "learning_rate": 0.000497247660423189, "loss": 1.7767, "step": 24492 }, { "epoch": 0.81, "grad_norm": 0.43278515338897705, "learning_rate": 0.0004972397850376804, "loss": 1.8304, "step": 24493 }, { "epoch": 0.81, "grad_norm": 0.4479126036167145, "learning_rate": 0.0004972319094127517, "loss": 1.7842, "step": 24494 }, { "epoch": 0.81, "grad_norm": 0.450283020734787, "learning_rate": 0.0004972240335484125, "loss": 1.719, "step": 24495 }, { "epoch": 0.81, "grad_norm": 0.45723050832748413, "learning_rate": 0.0004972161574446725, "loss": 1.8281, "step": 24496 }, { "epoch": 0.82, "grad_norm": 0.4344497323036194, "learning_rate": 0.0004972082811015411, "loss": 1.8151, "step": 24497 }, { "epoch": 0.82, "grad_norm": 0.44189220666885376, "learning_rate": 0.0004972004045190278, "loss": 1.8615, "step": 24498 }, { "epoch": 0.82, "grad_norm": 0.4363963305950165, "learning_rate": 0.0004971925276971424, "loss": 1.8245, "step": 24499 }, { "epoch": 0.82, "grad_norm": 0.42881903052330017, "learning_rate": 0.0004971846506358944, "loss": 1.8439, "step": 24500 }, { "epoch": 0.82, "grad_norm": 0.4312807321548462, "learning_rate": 0.000497176773335293, "loss": 1.7478, "step": 24501 }, { "epoch": 0.82, "grad_norm": 0.4287278354167938, "learning_rate": 0.0004971688957953484, "loss": 1.8841, "step": 24502 }, { "epoch": 0.82, "grad_norm": 0.4282469153404236, "learning_rate": 0.0004971610180160695, "loss": 1.8407, "step": 24503 }, { "epoch": 0.82, "grad_norm": 0.45118191838264465, "learning_rate": 0.0004971531399974664, "loss": 1.8469, "step": 24504 }, { "epoch": 0.82, "grad_norm": 0.4196934401988983, "learning_rate": 0.0004971452617395483, "loss": 1.8565, "step": 24505 }, { "epoch": 0.82, "grad_norm": 0.4379214644432068, "learning_rate": 0.0004971373832423248, "loss": 1.9093, "step": 24506 }, { "epoch": 0.82, "grad_norm": 0.43419787287712097, "learning_rate": 0.0004971295045058059, "loss": 1.7361, "step": 24507 }, { "epoch": 0.82, "grad_norm": 0.4434046745300293, "learning_rate": 0.0004971216255300004, "loss": 1.853, "step": 24508 }, { "epoch": 0.82, "grad_norm": 0.4375298321247101, "learning_rate": 0.0004971137463149186, "loss": 1.8325, "step": 24509 }, { "epoch": 0.82, "grad_norm": 0.4422430098056793, "learning_rate": 0.0004971058668605697, "loss": 1.837, "step": 24510 }, { "epoch": 0.82, "grad_norm": 0.436348021030426, "learning_rate": 0.0004970979871669634, "loss": 1.8361, "step": 24511 }, { "epoch": 0.82, "grad_norm": 0.4535137414932251, "learning_rate": 0.0004970901072341089, "loss": 1.8527, "step": 24512 }, { "epoch": 0.82, "grad_norm": 0.4319559335708618, "learning_rate": 0.0004970822270620163, "loss": 1.7541, "step": 24513 }, { "epoch": 0.82, "grad_norm": 0.44414278864860535, "learning_rate": 0.0004970743466506949, "loss": 1.7833, "step": 24514 }, { "epoch": 0.82, "grad_norm": 0.43246930837631226, "learning_rate": 0.0004970664660001542, "loss": 1.8021, "step": 24515 }, { "epoch": 0.82, "grad_norm": 0.4292377829551697, "learning_rate": 0.0004970585851104038, "loss": 1.836, "step": 24516 }, { "epoch": 0.82, "grad_norm": 0.42853686213493347, "learning_rate": 0.0004970507039814534, "loss": 1.8327, "step": 24517 }, { "epoch": 0.82, "grad_norm": 0.4418899416923523, "learning_rate": 0.0004970428226133125, "loss": 1.9058, "step": 24518 }, { "epoch": 0.82, "grad_norm": 0.44123613834381104, "learning_rate": 0.0004970349410059905, "loss": 1.7014, "step": 24519 }, { "epoch": 0.82, "grad_norm": 0.4438697397708893, "learning_rate": 0.0004970270591594973, "loss": 1.8897, "step": 24520 }, { "epoch": 0.82, "grad_norm": 0.4266742765903473, "learning_rate": 0.0004970191770738423, "loss": 1.8554, "step": 24521 }, { "epoch": 0.82, "grad_norm": 0.4301162660121918, "learning_rate": 0.0004970112947490349, "loss": 1.7795, "step": 24522 }, { "epoch": 0.82, "grad_norm": 0.45092442631721497, "learning_rate": 0.0004970034121850848, "loss": 1.735, "step": 24523 }, { "epoch": 0.82, "grad_norm": 0.4406643807888031, "learning_rate": 0.0004969955293820016, "loss": 1.8504, "step": 24524 }, { "epoch": 0.82, "grad_norm": 0.4354780614376068, "learning_rate": 0.000496987646339795, "loss": 1.815, "step": 24525 }, { "epoch": 0.82, "grad_norm": 0.4480184316635132, "learning_rate": 0.0004969797630584742, "loss": 1.841, "step": 24526 }, { "epoch": 0.82, "grad_norm": 0.44681909680366516, "learning_rate": 0.0004969718795380493, "loss": 1.8519, "step": 24527 }, { "epoch": 0.82, "grad_norm": 0.4324387311935425, "learning_rate": 0.0004969639957785293, "loss": 1.7883, "step": 24528 }, { "epoch": 0.82, "grad_norm": 0.4308302402496338, "learning_rate": 0.0004969561117799241, "loss": 1.8209, "step": 24529 }, { "epoch": 0.82, "grad_norm": 0.44060224294662476, "learning_rate": 0.0004969482275422432, "loss": 1.866, "step": 24530 }, { "epoch": 0.82, "grad_norm": 0.4468841850757599, "learning_rate": 0.0004969403430654962, "loss": 1.8487, "step": 24531 }, { "epoch": 0.82, "grad_norm": 0.43902352452278137, "learning_rate": 0.0004969324583496927, "loss": 1.8462, "step": 24532 }, { "epoch": 0.82, "grad_norm": 0.44610095024108887, "learning_rate": 0.0004969245733948421, "loss": 1.8575, "step": 24533 }, { "epoch": 0.82, "grad_norm": 0.44658634066581726, "learning_rate": 0.000496916688200954, "loss": 1.8201, "step": 24534 }, { "epoch": 0.82, "grad_norm": 0.44565871357917786, "learning_rate": 0.0004969088027680382, "loss": 1.9399, "step": 24535 }, { "epoch": 0.82, "grad_norm": 0.4368402659893036, "learning_rate": 0.0004969009170961041, "loss": 1.8688, "step": 24536 }, { "epoch": 0.82, "grad_norm": 0.4333760738372803, "learning_rate": 0.0004968930311851613, "loss": 1.7896, "step": 24537 }, { "epoch": 0.82, "grad_norm": 0.43243443965911865, "learning_rate": 0.0004968851450352194, "loss": 1.8285, "step": 24538 }, { "epoch": 0.82, "grad_norm": 0.45017850399017334, "learning_rate": 0.0004968772586462879, "loss": 1.8526, "step": 24539 }, { "epoch": 0.82, "grad_norm": 0.4691626727581024, "learning_rate": 0.0004968693720183764, "loss": 1.8138, "step": 24540 }, { "epoch": 0.82, "grad_norm": 0.44825100898742676, "learning_rate": 0.0004968614851514945, "loss": 1.7978, "step": 24541 }, { "epoch": 0.82, "grad_norm": 0.4269465208053589, "learning_rate": 0.0004968535980456519, "loss": 1.7944, "step": 24542 }, { "epoch": 0.82, "grad_norm": 0.43386372923851013, "learning_rate": 0.0004968457107008579, "loss": 1.7722, "step": 24543 }, { "epoch": 0.82, "grad_norm": 0.43190908432006836, "learning_rate": 0.0004968378231171222, "loss": 1.797, "step": 24544 }, { "epoch": 0.82, "grad_norm": 0.4330817461013794, "learning_rate": 0.0004968299352944545, "loss": 1.7605, "step": 24545 }, { "epoch": 0.82, "grad_norm": 0.43402454257011414, "learning_rate": 0.0004968220472328641, "loss": 1.7904, "step": 24546 }, { "epoch": 0.82, "grad_norm": 0.45297014713287354, "learning_rate": 0.0004968141589323608, "loss": 1.8885, "step": 24547 }, { "epoch": 0.82, "grad_norm": 0.4448694884777069, "learning_rate": 0.000496806270392954, "loss": 1.8356, "step": 24548 }, { "epoch": 0.82, "grad_norm": 0.44398534297943115, "learning_rate": 0.0004967983816146536, "loss": 1.7528, "step": 24549 }, { "epoch": 0.82, "grad_norm": 0.4370169937610626, "learning_rate": 0.0004967904925974689, "loss": 1.7964, "step": 24550 }, { "epoch": 0.82, "grad_norm": 0.4434519410133362, "learning_rate": 0.0004967826033414095, "loss": 1.8407, "step": 24551 }, { "epoch": 0.82, "grad_norm": 0.46228647232055664, "learning_rate": 0.0004967747138464851, "loss": 1.7761, "step": 24552 }, { "epoch": 0.82, "grad_norm": 0.44015589356422424, "learning_rate": 0.0004967668241127051, "loss": 1.7814, "step": 24553 }, { "epoch": 0.82, "grad_norm": 0.4228600859642029, "learning_rate": 0.0004967589341400791, "loss": 1.8508, "step": 24554 }, { "epoch": 0.82, "grad_norm": 0.46137523651123047, "learning_rate": 0.0004967510439286168, "loss": 1.7771, "step": 24555 }, { "epoch": 0.82, "grad_norm": 0.4445226490497589, "learning_rate": 0.0004967431534783278, "loss": 1.8895, "step": 24556 }, { "epoch": 0.82, "grad_norm": 0.45146307349205017, "learning_rate": 0.0004967352627892216, "loss": 1.8605, "step": 24557 }, { "epoch": 0.82, "grad_norm": 0.43799889087677, "learning_rate": 0.0004967273718613077, "loss": 1.8398, "step": 24558 }, { "epoch": 0.82, "grad_norm": 0.48797446489334106, "learning_rate": 0.0004967194806945957, "loss": 1.8371, "step": 24559 }, { "epoch": 0.82, "grad_norm": 0.43910863995552063, "learning_rate": 0.0004967115892890953, "loss": 1.8615, "step": 24560 }, { "epoch": 0.82, "grad_norm": 0.4369131922721863, "learning_rate": 0.0004967036976448161, "loss": 1.7963, "step": 24561 }, { "epoch": 0.82, "grad_norm": 0.4425639510154724, "learning_rate": 0.0004966958057617675, "loss": 1.8171, "step": 24562 }, { "epoch": 0.82, "grad_norm": 0.45224469900131226, "learning_rate": 0.0004966879136399593, "loss": 1.7899, "step": 24563 }, { "epoch": 0.82, "grad_norm": 0.440646767616272, "learning_rate": 0.0004966800212794008, "loss": 1.8625, "step": 24564 }, { "epoch": 0.82, "grad_norm": 0.4360401928424835, "learning_rate": 0.0004966721286801017, "loss": 1.8752, "step": 24565 }, { "epoch": 0.82, "grad_norm": 0.44400548934936523, "learning_rate": 0.0004966642358420717, "loss": 1.8443, "step": 24566 }, { "epoch": 0.82, "grad_norm": 0.4579903185367584, "learning_rate": 0.0004966563427653204, "loss": 1.7984, "step": 24567 }, { "epoch": 0.82, "grad_norm": 0.4299785792827606, "learning_rate": 0.0004966484494498571, "loss": 1.8232, "step": 24568 }, { "epoch": 0.82, "grad_norm": 0.43833938241004944, "learning_rate": 0.0004966405558956916, "loss": 1.8024, "step": 24569 }, { "epoch": 0.82, "grad_norm": 0.46909692883491516, "learning_rate": 0.0004966326621028335, "loss": 1.8069, "step": 24570 }, { "epoch": 0.82, "grad_norm": 0.4355703890323639, "learning_rate": 0.0004966247680712923, "loss": 1.7672, "step": 24571 }, { "epoch": 0.82, "grad_norm": 0.4321970045566559, "learning_rate": 0.0004966168738010776, "loss": 1.7642, "step": 24572 }, { "epoch": 0.82, "grad_norm": 0.4527561366558075, "learning_rate": 0.000496608979292199, "loss": 1.8139, "step": 24573 }, { "epoch": 0.82, "grad_norm": 0.43065208196640015, "learning_rate": 0.0004966010845446659, "loss": 1.8087, "step": 24574 }, { "epoch": 0.82, "grad_norm": 0.44403547048568726, "learning_rate": 0.0004965931895584883, "loss": 1.8153, "step": 24575 }, { "epoch": 0.82, "grad_norm": 0.44956064224243164, "learning_rate": 0.0004965852943336754, "loss": 1.8689, "step": 24576 }, { "epoch": 0.82, "grad_norm": 0.45515504479408264, "learning_rate": 0.000496577398870237, "loss": 1.768, "step": 24577 }, { "epoch": 0.82, "grad_norm": 0.4373031556606293, "learning_rate": 0.0004965695031681826, "loss": 1.9067, "step": 24578 }, { "epoch": 0.82, "grad_norm": 0.45951130986213684, "learning_rate": 0.0004965616072275216, "loss": 1.8371, "step": 24579 }, { "epoch": 0.82, "grad_norm": 0.44101682305336, "learning_rate": 0.0004965537110482639, "loss": 1.8063, "step": 24580 }, { "epoch": 0.82, "grad_norm": 0.4496728479862213, "learning_rate": 0.000496545814630419, "loss": 1.8899, "step": 24581 }, { "epoch": 0.82, "grad_norm": 0.4479188323020935, "learning_rate": 0.0004965379179739964, "loss": 1.8151, "step": 24582 }, { "epoch": 0.82, "grad_norm": 0.4295373857021332, "learning_rate": 0.0004965300210790057, "loss": 1.8529, "step": 24583 }, { "epoch": 0.82, "grad_norm": 0.4345338046550751, "learning_rate": 0.0004965221239454567, "loss": 1.8892, "step": 24584 }, { "epoch": 0.82, "grad_norm": 0.4345232844352722, "learning_rate": 0.0004965142265733586, "loss": 1.794, "step": 24585 }, { "epoch": 0.82, "grad_norm": 0.4386454224586487, "learning_rate": 0.0004965063289627212, "loss": 1.8352, "step": 24586 }, { "epoch": 0.82, "grad_norm": 0.4504382908344269, "learning_rate": 0.0004964984311135541, "loss": 1.8536, "step": 24587 }, { "epoch": 0.82, "grad_norm": 0.4394088685512543, "learning_rate": 0.0004964905330258668, "loss": 1.7922, "step": 24588 }, { "epoch": 0.82, "grad_norm": 0.43315792083740234, "learning_rate": 0.000496482634699669, "loss": 1.8047, "step": 24589 }, { "epoch": 0.82, "grad_norm": 0.45070695877075195, "learning_rate": 0.0004964747361349703, "loss": 1.8171, "step": 24590 }, { "epoch": 0.82, "grad_norm": 0.5108566880226135, "learning_rate": 0.0004964668373317801, "loss": 1.8768, "step": 24591 }, { "epoch": 0.82, "grad_norm": 0.43301790952682495, "learning_rate": 0.0004964589382901081, "loss": 1.804, "step": 24592 }, { "epoch": 0.82, "grad_norm": 0.440922349691391, "learning_rate": 0.000496451039009964, "loss": 1.807, "step": 24593 }, { "epoch": 0.82, "grad_norm": 0.44110777974128723, "learning_rate": 0.0004964431394913572, "loss": 1.8039, "step": 24594 }, { "epoch": 0.82, "grad_norm": 0.44484367966651917, "learning_rate": 0.0004964352397342974, "loss": 1.8687, "step": 24595 }, { "epoch": 0.82, "grad_norm": 0.43780985474586487, "learning_rate": 0.0004964273397387942, "loss": 1.835, "step": 24596 }, { "epoch": 0.82, "grad_norm": 0.4512471854686737, "learning_rate": 0.0004964194395048571, "loss": 1.7806, "step": 24597 }, { "epoch": 0.82, "grad_norm": 0.43144431710243225, "learning_rate": 0.0004964115390324958, "loss": 1.7881, "step": 24598 }, { "epoch": 0.82, "grad_norm": 0.44594913721084595, "learning_rate": 0.0004964036383217198, "loss": 1.8049, "step": 24599 }, { "epoch": 0.82, "grad_norm": 0.42259106040000916, "learning_rate": 0.0004963957373725386, "loss": 1.7893, "step": 24600 }, { "epoch": 0.82, "grad_norm": 0.4536319971084595, "learning_rate": 0.0004963878361849621, "loss": 1.8691, "step": 24601 }, { "epoch": 0.82, "grad_norm": 0.43655306100845337, "learning_rate": 0.0004963799347589997, "loss": 1.878, "step": 24602 }, { "epoch": 0.82, "grad_norm": 0.4522727131843567, "learning_rate": 0.0004963720330946608, "loss": 1.8973, "step": 24603 }, { "epoch": 0.82, "grad_norm": 0.43035414814949036, "learning_rate": 0.0004963641311919554, "loss": 1.8011, "step": 24604 }, { "epoch": 0.82, "grad_norm": 0.44864824414253235, "learning_rate": 0.0004963562290508928, "loss": 1.8077, "step": 24605 }, { "epoch": 0.82, "grad_norm": 0.4347574710845947, "learning_rate": 0.0004963483266714826, "loss": 1.8297, "step": 24606 }, { "epoch": 0.82, "grad_norm": 0.4203509986400604, "learning_rate": 0.0004963404240537346, "loss": 1.8482, "step": 24607 }, { "epoch": 0.82, "grad_norm": 0.44812992215156555, "learning_rate": 0.0004963325211976581, "loss": 1.7661, "step": 24608 }, { "epoch": 0.82, "grad_norm": 0.4462190866470337, "learning_rate": 0.0004963246181032629, "loss": 1.8216, "step": 24609 }, { "epoch": 0.82, "grad_norm": 0.42732998728752136, "learning_rate": 0.0004963167147705586, "loss": 1.8844, "step": 24610 }, { "epoch": 0.82, "grad_norm": 0.4506043791770935, "learning_rate": 0.0004963088111995547, "loss": 1.814, "step": 24611 }, { "epoch": 0.82, "grad_norm": 0.44671303033828735, "learning_rate": 0.0004963009073902609, "loss": 1.8024, "step": 24612 }, { "epoch": 0.82, "grad_norm": 0.44540271162986755, "learning_rate": 0.0004962930033426866, "loss": 1.7921, "step": 24613 }, { "epoch": 0.82, "grad_norm": 0.43972843885421753, "learning_rate": 0.0004962850990568416, "loss": 1.8237, "step": 24614 }, { "epoch": 0.82, "grad_norm": 0.4479525089263916, "learning_rate": 0.0004962771945327352, "loss": 1.7414, "step": 24615 }, { "epoch": 0.82, "grad_norm": 0.44747665524482727, "learning_rate": 0.0004962692897703775, "loss": 1.7883, "step": 24616 }, { "epoch": 0.82, "grad_norm": 0.4580002725124359, "learning_rate": 0.0004962613847697777, "loss": 1.7945, "step": 24617 }, { "epoch": 0.82, "grad_norm": 0.4471871256828308, "learning_rate": 0.0004962534795309455, "loss": 1.7539, "step": 24618 }, { "epoch": 0.82, "grad_norm": 0.4276464283466339, "learning_rate": 0.0004962455740538906, "loss": 1.812, "step": 24619 }, { "epoch": 0.82, "grad_norm": 0.4554916322231293, "learning_rate": 0.0004962376683386223, "loss": 1.8289, "step": 24620 }, { "epoch": 0.82, "grad_norm": 0.4607551097869873, "learning_rate": 0.0004962297623851504, "loss": 1.8372, "step": 24621 }, { "epoch": 0.82, "grad_norm": 0.45143529772758484, "learning_rate": 0.0004962218561934845, "loss": 1.7763, "step": 24622 }, { "epoch": 0.82, "grad_norm": 0.4352880120277405, "learning_rate": 0.0004962139497636342, "loss": 1.7672, "step": 24623 }, { "epoch": 0.82, "grad_norm": 0.4602271616458893, "learning_rate": 0.0004962060430956091, "loss": 1.8806, "step": 24624 }, { "epoch": 0.82, "grad_norm": 0.436312198638916, "learning_rate": 0.0004961981361894187, "loss": 1.7606, "step": 24625 }, { "epoch": 0.82, "grad_norm": 0.4327167570590973, "learning_rate": 0.0004961902290450728, "loss": 1.8019, "step": 24626 }, { "epoch": 0.82, "grad_norm": 0.448063462972641, "learning_rate": 0.0004961823216625808, "loss": 1.8235, "step": 24627 }, { "epoch": 0.82, "grad_norm": 0.4519064426422119, "learning_rate": 0.0004961744140419524, "loss": 1.8541, "step": 24628 }, { "epoch": 0.82, "grad_norm": 0.43154528737068176, "learning_rate": 0.0004961665061831971, "loss": 1.7876, "step": 24629 }, { "epoch": 0.82, "grad_norm": 0.4523225724697113, "learning_rate": 0.0004961585980863245, "loss": 1.845, "step": 24630 }, { "epoch": 0.82, "grad_norm": 0.42093509435653687, "learning_rate": 0.0004961506897513445, "loss": 1.7807, "step": 24631 }, { "epoch": 0.82, "grad_norm": 0.4593132734298706, "learning_rate": 0.0004961427811782663, "loss": 1.8675, "step": 24632 }, { "epoch": 0.82, "grad_norm": 0.45218032598495483, "learning_rate": 0.0004961348723670996, "loss": 1.8043, "step": 24633 }, { "epoch": 0.82, "grad_norm": 0.47504380345344543, "learning_rate": 0.0004961269633178541, "loss": 1.7713, "step": 24634 }, { "epoch": 0.82, "grad_norm": 0.43914803862571716, "learning_rate": 0.0004961190540305394, "loss": 1.7371, "step": 24635 }, { "epoch": 0.82, "grad_norm": 0.4375460147857666, "learning_rate": 0.0004961111445051651, "loss": 1.8628, "step": 24636 }, { "epoch": 0.82, "grad_norm": 0.4491770267486572, "learning_rate": 0.0004961032347417408, "loss": 1.8686, "step": 24637 }, { "epoch": 0.82, "grad_norm": 0.4512919783592224, "learning_rate": 0.000496095324740276, "loss": 1.8574, "step": 24638 }, { "epoch": 0.82, "grad_norm": 0.4697633683681488, "learning_rate": 0.0004960874145007805, "loss": 1.7897, "step": 24639 }, { "epoch": 0.82, "grad_norm": 0.4664505124092102, "learning_rate": 0.0004960795040232635, "loss": 1.8394, "step": 24640 }, { "epoch": 0.82, "grad_norm": 0.43690815567970276, "learning_rate": 0.0004960715933077351, "loss": 1.8724, "step": 24641 }, { "epoch": 0.82, "grad_norm": 0.43015703558921814, "learning_rate": 0.0004960636823542045, "loss": 1.8363, "step": 24642 }, { "epoch": 0.82, "grad_norm": 0.4637601971626282, "learning_rate": 0.0004960557711626816, "loss": 1.8705, "step": 24643 }, { "epoch": 0.82, "grad_norm": 0.46390122175216675, "learning_rate": 0.0004960478597331759, "loss": 1.8427, "step": 24644 }, { "epoch": 0.82, "grad_norm": 0.4424407184123993, "learning_rate": 0.000496039948065697, "loss": 1.8082, "step": 24645 }, { "epoch": 0.82, "grad_norm": 1.7646880149841309, "learning_rate": 0.0004960320361602545, "loss": 1.8501, "step": 24646 }, { "epoch": 0.82, "grad_norm": 0.451031893491745, "learning_rate": 0.0004960241240168578, "loss": 1.8102, "step": 24647 }, { "epoch": 0.82, "grad_norm": 0.4666483998298645, "learning_rate": 0.0004960162116355169, "loss": 1.7835, "step": 24648 }, { "epoch": 0.82, "grad_norm": 0.43492770195007324, "learning_rate": 0.0004960082990162411, "loss": 1.8028, "step": 24649 }, { "epoch": 0.82, "grad_norm": 0.42928779125213623, "learning_rate": 0.0004960003861590401, "loss": 1.9255, "step": 24650 }, { "epoch": 0.82, "grad_norm": 0.43990403413772583, "learning_rate": 0.0004959924730639235, "loss": 1.8693, "step": 24651 }, { "epoch": 0.82, "grad_norm": 0.43272164463996887, "learning_rate": 0.000495984559730901, "loss": 1.8233, "step": 24652 }, { "epoch": 0.82, "grad_norm": 0.44348230957984924, "learning_rate": 0.0004959766461599822, "loss": 1.8441, "step": 24653 }, { "epoch": 0.82, "grad_norm": 0.48514804244041443, "learning_rate": 0.0004959687323511764, "loss": 1.8381, "step": 24654 }, { "epoch": 0.82, "grad_norm": 0.4486778676509857, "learning_rate": 0.0004959608183044935, "loss": 1.8577, "step": 24655 }, { "epoch": 0.82, "grad_norm": 0.44031497836112976, "learning_rate": 0.000495952904019943, "loss": 1.8711, "step": 24656 }, { "epoch": 0.82, "grad_norm": 0.44278252124786377, "learning_rate": 0.0004959449894975348, "loss": 1.8511, "step": 24657 }, { "epoch": 0.82, "grad_norm": 0.4590941369533539, "learning_rate": 0.000495937074737278, "loss": 1.7834, "step": 24658 }, { "epoch": 0.82, "grad_norm": 0.4382309913635254, "learning_rate": 0.0004959291597391826, "loss": 1.8274, "step": 24659 }, { "epoch": 0.82, "grad_norm": 0.42742058634757996, "learning_rate": 0.000495921244503258, "loss": 1.8649, "step": 24660 }, { "epoch": 0.82, "grad_norm": 0.4263596534729004, "learning_rate": 0.0004959133290295138, "loss": 1.8379, "step": 24661 }, { "epoch": 0.82, "grad_norm": 0.4296037554740906, "learning_rate": 0.0004959054133179599, "loss": 1.7652, "step": 24662 }, { "epoch": 0.82, "grad_norm": 0.4359447658061981, "learning_rate": 0.0004958974973686054, "loss": 1.8254, "step": 24663 }, { "epoch": 0.82, "grad_norm": 0.4633456766605377, "learning_rate": 0.0004958895811814605, "loss": 1.8227, "step": 24664 }, { "epoch": 0.82, "grad_norm": 0.4397364854812622, "learning_rate": 0.0004958816647565343, "loss": 1.8278, "step": 24665 }, { "epoch": 0.82, "grad_norm": 0.4509977400302887, "learning_rate": 0.0004958737480938367, "loss": 1.8713, "step": 24666 }, { "epoch": 0.82, "grad_norm": 0.41996970772743225, "learning_rate": 0.0004958658311933772, "loss": 1.792, "step": 24667 }, { "epoch": 0.82, "grad_norm": 0.4561668038368225, "learning_rate": 0.0004958579140551655, "loss": 1.7532, "step": 24668 }, { "epoch": 0.82, "grad_norm": 0.4337112009525299, "learning_rate": 0.0004958499966792112, "loss": 1.8285, "step": 24669 }, { "epoch": 0.82, "grad_norm": 0.4369237422943115, "learning_rate": 0.0004958420790655237, "loss": 1.8311, "step": 24670 }, { "epoch": 0.82, "grad_norm": 0.44481757283210754, "learning_rate": 0.0004958341612141129, "loss": 1.799, "step": 24671 }, { "epoch": 0.82, "grad_norm": 0.43690261244773865, "learning_rate": 0.0004958262431249882, "loss": 1.7504, "step": 24672 }, { "epoch": 0.82, "grad_norm": 0.44171378016471863, "learning_rate": 0.0004958183247981594, "loss": 1.848, "step": 24673 }, { "epoch": 0.82, "grad_norm": 0.4584585130214691, "learning_rate": 0.0004958104062336359, "loss": 1.8432, "step": 24674 }, { "epoch": 0.82, "grad_norm": 0.4430198669433594, "learning_rate": 0.0004958024874314275, "loss": 1.7999, "step": 24675 }, { "epoch": 0.82, "grad_norm": 0.43415915966033936, "learning_rate": 0.0004957945683915436, "loss": 1.8012, "step": 24676 }, { "epoch": 0.82, "grad_norm": 0.44012561440467834, "learning_rate": 0.000495786649113994, "loss": 1.8277, "step": 24677 }, { "epoch": 0.82, "grad_norm": 0.43092867732048035, "learning_rate": 0.0004957787295987884, "loss": 1.8203, "step": 24678 }, { "epoch": 0.82, "grad_norm": 0.4332020878791809, "learning_rate": 0.0004957708098459361, "loss": 1.8345, "step": 24679 }, { "epoch": 0.82, "grad_norm": 0.4422680139541626, "learning_rate": 0.0004957628898554469, "loss": 1.833, "step": 24680 }, { "epoch": 0.82, "grad_norm": 0.4352484941482544, "learning_rate": 0.0004957549696273305, "loss": 1.7975, "step": 24681 }, { "epoch": 0.82, "grad_norm": 0.4229072332382202, "learning_rate": 0.0004957470491615963, "loss": 1.8448, "step": 24682 }, { "epoch": 0.82, "grad_norm": 0.4460401237010956, "learning_rate": 0.000495739128458254, "loss": 1.8664, "step": 24683 }, { "epoch": 0.82, "grad_norm": 0.4541158974170685, "learning_rate": 0.0004957312075173134, "loss": 1.8437, "step": 24684 }, { "epoch": 0.82, "grad_norm": 0.42642274498939514, "learning_rate": 0.0004957232863387838, "loss": 1.8644, "step": 24685 }, { "epoch": 0.82, "grad_norm": 0.45153722167015076, "learning_rate": 0.0004957153649226751, "loss": 1.8506, "step": 24686 }, { "epoch": 0.82, "grad_norm": 0.4542739987373352, "learning_rate": 0.0004957074432689968, "loss": 1.9376, "step": 24687 }, { "epoch": 0.82, "grad_norm": 0.5037698149681091, "learning_rate": 0.0004956995213777583, "loss": 1.9098, "step": 24688 }, { "epoch": 0.82, "grad_norm": 0.4697358310222626, "learning_rate": 0.0004956915992489696, "loss": 1.8596, "step": 24689 }, { "epoch": 0.82, "grad_norm": 0.45674458146095276, "learning_rate": 0.0004956836768826399, "loss": 1.9117, "step": 24690 }, { "epoch": 0.82, "grad_norm": 0.48065274953842163, "learning_rate": 0.0004956757542787793, "loss": 1.8303, "step": 24691 }, { "epoch": 0.82, "grad_norm": 0.47094857692718506, "learning_rate": 0.0004956678314373971, "loss": 1.8434, "step": 24692 }, { "epoch": 0.82, "grad_norm": 0.45884430408477783, "learning_rate": 0.000495659908358503, "loss": 1.7901, "step": 24693 }, { "epoch": 0.82, "grad_norm": 0.4305378496646881, "learning_rate": 0.0004956519850421065, "loss": 1.784, "step": 24694 }, { "epoch": 0.82, "grad_norm": 0.4661400318145752, "learning_rate": 0.0004956440614882173, "loss": 1.7713, "step": 24695 }, { "epoch": 0.82, "grad_norm": 0.4548858106136322, "learning_rate": 0.0004956361376968452, "loss": 1.8206, "step": 24696 }, { "epoch": 0.82, "grad_norm": 0.4408542811870575, "learning_rate": 0.0004956282136679994, "loss": 1.8395, "step": 24697 }, { "epoch": 0.82, "grad_norm": 0.4435741603374481, "learning_rate": 0.0004956202894016901, "loss": 1.8034, "step": 24698 }, { "epoch": 0.82, "grad_norm": 0.47349268198013306, "learning_rate": 0.0004956123648979264, "loss": 1.8718, "step": 24699 }, { "epoch": 0.82, "grad_norm": 0.4564029574394226, "learning_rate": 0.0004956044401567181, "loss": 1.9135, "step": 24700 }, { "epoch": 0.82, "grad_norm": 0.42988842725753784, "learning_rate": 0.000495596515178075, "loss": 1.7495, "step": 24701 }, { "epoch": 0.82, "grad_norm": 0.4428999722003937, "learning_rate": 0.0004955885899620064, "loss": 1.8292, "step": 24702 }, { "epoch": 0.82, "grad_norm": 0.43893346190452576, "learning_rate": 0.0004955806645085221, "loss": 1.8703, "step": 24703 }, { "epoch": 0.82, "grad_norm": 0.4202237129211426, "learning_rate": 0.0004955727388176316, "loss": 1.7437, "step": 24704 }, { "epoch": 0.82, "grad_norm": 0.42639195919036865, "learning_rate": 0.0004955648128893447, "loss": 1.8456, "step": 24705 }, { "epoch": 0.82, "grad_norm": 0.4369575083255768, "learning_rate": 0.000495556886723671, "loss": 1.8479, "step": 24706 }, { "epoch": 0.82, "grad_norm": 0.44795700907707214, "learning_rate": 0.0004955489603206199, "loss": 1.8055, "step": 24707 }, { "epoch": 0.82, "grad_norm": 0.43398478627204895, "learning_rate": 0.0004955410336802013, "loss": 1.8458, "step": 24708 }, { "epoch": 0.82, "grad_norm": 0.44668591022491455, "learning_rate": 0.0004955331068024247, "loss": 1.8458, "step": 24709 }, { "epoch": 0.82, "grad_norm": 0.4239940047264099, "learning_rate": 0.0004955251796872996, "loss": 1.7789, "step": 24710 }, { "epoch": 0.82, "grad_norm": 0.42288437485694885, "learning_rate": 0.0004955172523348359, "loss": 1.8453, "step": 24711 }, { "epoch": 0.82, "grad_norm": 0.4458483159542084, "learning_rate": 0.0004955093247450429, "loss": 1.875, "step": 24712 }, { "epoch": 0.82, "grad_norm": 0.4422883093357086, "learning_rate": 0.0004955013969179305, "loss": 1.7988, "step": 24713 }, { "epoch": 0.82, "grad_norm": 0.43211328983306885, "learning_rate": 0.0004954934688535082, "loss": 1.7823, "step": 24714 }, { "epoch": 0.82, "grad_norm": 0.4402126669883728, "learning_rate": 0.0004954855405517856, "loss": 1.8751, "step": 24715 }, { "epoch": 0.82, "grad_norm": 0.43980172276496887, "learning_rate": 0.0004954776120127723, "loss": 1.8463, "step": 24716 }, { "epoch": 0.82, "grad_norm": 0.44663310050964355, "learning_rate": 0.000495469683236478, "loss": 1.845, "step": 24717 }, { "epoch": 0.82, "grad_norm": 0.42848852276802063, "learning_rate": 0.0004954617542229124, "loss": 1.8548, "step": 24718 }, { "epoch": 0.82, "grad_norm": 0.4435386657714844, "learning_rate": 0.000495453824972085, "loss": 1.8354, "step": 24719 }, { "epoch": 0.82, "grad_norm": 0.4476247727870941, "learning_rate": 0.0004954458954840054, "loss": 1.8189, "step": 24720 }, { "epoch": 0.82, "grad_norm": 0.46327105164527893, "learning_rate": 0.0004954379657586834, "loss": 1.8192, "step": 24721 }, { "epoch": 0.82, "grad_norm": 0.4222187399864197, "learning_rate": 0.0004954300357961284, "loss": 1.7543, "step": 24722 }, { "epoch": 0.82, "grad_norm": 0.43596869707107544, "learning_rate": 0.0004954221055963502, "loss": 1.8035, "step": 24723 }, { "epoch": 0.82, "grad_norm": 0.4609581232070923, "learning_rate": 0.0004954141751593584, "loss": 1.7856, "step": 24724 }, { "epoch": 0.82, "grad_norm": 0.6438840627670288, "learning_rate": 0.0004954062444851624, "loss": 1.8591, "step": 24725 }, { "epoch": 0.82, "grad_norm": 0.4521346390247345, "learning_rate": 0.0004953983135737721, "loss": 1.8709, "step": 24726 }, { "epoch": 0.82, "grad_norm": 0.4371442496776581, "learning_rate": 0.000495390382425197, "loss": 1.7741, "step": 24727 }, { "epoch": 0.82, "grad_norm": 0.4252527356147766, "learning_rate": 0.0004953824510394468, "loss": 1.7923, "step": 24728 }, { "epoch": 0.82, "grad_norm": 0.44373929500579834, "learning_rate": 0.0004953745194165311, "loss": 1.8031, "step": 24729 }, { "epoch": 0.82, "grad_norm": 0.44395220279693604, "learning_rate": 0.0004953665875564596, "loss": 1.7921, "step": 24730 }, { "epoch": 0.82, "grad_norm": 0.4355523884296417, "learning_rate": 0.0004953586554592418, "loss": 1.8733, "step": 24731 }, { "epoch": 0.82, "grad_norm": 0.44826579093933105, "learning_rate": 0.0004953507231248873, "loss": 1.814, "step": 24732 }, { "epoch": 0.82, "grad_norm": 0.4489199221134186, "learning_rate": 0.0004953427905534058, "loss": 1.831, "step": 24733 }, { "epoch": 0.82, "grad_norm": 0.4402827322483063, "learning_rate": 0.000495334857744807, "loss": 1.812, "step": 24734 }, { "epoch": 0.82, "grad_norm": 0.433677613735199, "learning_rate": 0.0004953269246991004, "loss": 1.778, "step": 24735 }, { "epoch": 0.82, "grad_norm": 0.444342702627182, "learning_rate": 0.0004953189914162959, "loss": 1.9171, "step": 24736 }, { "epoch": 0.82, "grad_norm": 0.4356880187988281, "learning_rate": 0.0004953110578964027, "loss": 1.815, "step": 24737 }, { "epoch": 0.82, "grad_norm": 0.47071027755737305, "learning_rate": 0.0004953031241394308, "loss": 1.8769, "step": 24738 }, { "epoch": 0.82, "grad_norm": 0.4391278028488159, "learning_rate": 0.0004952951901453895, "loss": 1.824, "step": 24739 }, { "epoch": 0.82, "grad_norm": 0.45631107687950134, "learning_rate": 0.0004952872559142887, "loss": 1.866, "step": 24740 }, { "epoch": 0.82, "grad_norm": 0.44817864894866943, "learning_rate": 0.000495279321446138, "loss": 1.7842, "step": 24741 }, { "epoch": 0.82, "grad_norm": 0.4464716911315918, "learning_rate": 0.0004952713867409469, "loss": 1.8308, "step": 24742 }, { "epoch": 0.82, "grad_norm": 0.4305228292942047, "learning_rate": 0.0004952634517987252, "loss": 1.8033, "step": 24743 }, { "epoch": 0.82, "grad_norm": 0.4516773521900177, "learning_rate": 0.0004952555166194824, "loss": 1.8212, "step": 24744 }, { "epoch": 0.82, "grad_norm": 0.44001641869544983, "learning_rate": 0.0004952475812032282, "loss": 1.8005, "step": 24745 }, { "epoch": 0.82, "grad_norm": 0.44840216636657715, "learning_rate": 0.0004952396455499722, "loss": 1.78, "step": 24746 }, { "epoch": 0.82, "grad_norm": 0.4517490863800049, "learning_rate": 0.0004952317096597239, "loss": 1.7949, "step": 24747 }, { "epoch": 0.82, "grad_norm": 0.45274797081947327, "learning_rate": 0.0004952237735324932, "loss": 1.8066, "step": 24748 }, { "epoch": 0.82, "grad_norm": 0.42698243260383606, "learning_rate": 0.0004952158371682897, "loss": 1.843, "step": 24749 }, { "epoch": 0.82, "grad_norm": 0.4472223222255707, "learning_rate": 0.0004952079005671227, "loss": 1.8776, "step": 24750 }, { "epoch": 0.82, "grad_norm": 0.44220659136772156, "learning_rate": 0.0004951999637290022, "loss": 1.7836, "step": 24751 }, { "epoch": 0.82, "grad_norm": 0.4392073452472687, "learning_rate": 0.0004951920266539377, "loss": 1.812, "step": 24752 }, { "epoch": 0.82, "grad_norm": 0.5101681351661682, "learning_rate": 0.0004951840893419389, "loss": 1.8253, "step": 24753 }, { "epoch": 0.82, "grad_norm": 0.4276731014251709, "learning_rate": 0.0004951761517930153, "loss": 1.7915, "step": 24754 }, { "epoch": 0.82, "grad_norm": 0.43901491165161133, "learning_rate": 0.0004951682140071766, "loss": 1.8047, "step": 24755 }, { "epoch": 0.82, "grad_norm": 0.44382327795028687, "learning_rate": 0.0004951602759844326, "loss": 1.8502, "step": 24756 }, { "epoch": 0.82, "grad_norm": 0.428021639585495, "learning_rate": 0.0004951523377247925, "loss": 1.8659, "step": 24757 }, { "epoch": 0.82, "grad_norm": 0.4510030746459961, "learning_rate": 0.0004951443992282664, "loss": 1.8475, "step": 24758 }, { "epoch": 0.82, "grad_norm": 0.437611848115921, "learning_rate": 0.0004951364604948638, "loss": 1.7845, "step": 24759 }, { "epoch": 0.82, "grad_norm": 0.4452652037143707, "learning_rate": 0.0004951285215245942, "loss": 1.801, "step": 24760 }, { "epoch": 0.82, "grad_norm": 0.4576543867588043, "learning_rate": 0.0004951205823174672, "loss": 1.8375, "step": 24761 }, { "epoch": 0.82, "grad_norm": 0.43345898389816284, "learning_rate": 0.0004951126428734927, "loss": 1.7996, "step": 24762 }, { "epoch": 0.82, "grad_norm": 0.43971166014671326, "learning_rate": 0.0004951047031926803, "loss": 1.8243, "step": 24763 }, { "epoch": 0.82, "grad_norm": 0.4321542978286743, "learning_rate": 0.0004950967632750394, "loss": 1.8147, "step": 24764 }, { "epoch": 0.82, "grad_norm": 0.451104998588562, "learning_rate": 0.0004950888231205798, "loss": 1.9573, "step": 24765 }, { "epoch": 0.82, "grad_norm": 0.44968292117118835, "learning_rate": 0.0004950808827293112, "loss": 1.7535, "step": 24766 }, { "epoch": 0.82, "grad_norm": 0.4467078745365143, "learning_rate": 0.000495072942101243, "loss": 1.8797, "step": 24767 }, { "epoch": 0.82, "grad_norm": 0.4342060387134552, "learning_rate": 0.0004950650012363851, "loss": 1.8775, "step": 24768 }, { "epoch": 0.82, "grad_norm": 0.42103439569473267, "learning_rate": 0.0004950570601347469, "loss": 1.8266, "step": 24769 }, { "epoch": 0.82, "grad_norm": 0.44741395115852356, "learning_rate": 0.0004950491187963383, "loss": 1.8652, "step": 24770 }, { "epoch": 0.82, "grad_norm": 0.4402265250682831, "learning_rate": 0.0004950411772211687, "loss": 1.8163, "step": 24771 }, { "epoch": 0.82, "grad_norm": 0.4461316466331482, "learning_rate": 0.000495033235409248, "loss": 1.8159, "step": 24772 }, { "epoch": 0.82, "grad_norm": 0.44913309812545776, "learning_rate": 0.0004950252933605856, "loss": 1.8428, "step": 24773 }, { "epoch": 0.82, "grad_norm": 0.424532026052475, "learning_rate": 0.0004950173510751912, "loss": 1.8445, "step": 24774 }, { "epoch": 0.82, "grad_norm": 0.4390873312950134, "learning_rate": 0.0004950094085530745, "loss": 1.8023, "step": 24775 }, { "epoch": 0.82, "grad_norm": 0.4450169503688812, "learning_rate": 0.0004950014657942452, "loss": 1.8462, "step": 24776 }, { "epoch": 0.82, "grad_norm": 0.4474535584449768, "learning_rate": 0.0004949935227987127, "loss": 1.8063, "step": 24777 }, { "epoch": 0.82, "grad_norm": 0.4306367039680481, "learning_rate": 0.0004949855795664869, "loss": 1.7916, "step": 24778 }, { "epoch": 0.82, "grad_norm": 0.4462798535823822, "learning_rate": 0.0004949776360975773, "loss": 1.8397, "step": 24779 }, { "epoch": 0.82, "grad_norm": 0.43324777483940125, "learning_rate": 0.0004949696923919936, "loss": 1.7029, "step": 24780 }, { "epoch": 0.82, "grad_norm": 0.460266649723053, "learning_rate": 0.0004949617484497455, "loss": 1.8887, "step": 24781 }, { "epoch": 0.82, "grad_norm": 0.43355295062065125, "learning_rate": 0.0004949538042708423, "loss": 1.8683, "step": 24782 }, { "epoch": 0.82, "grad_norm": 0.42407840490341187, "learning_rate": 0.0004949458598552942, "loss": 1.8016, "step": 24783 }, { "epoch": 0.82, "grad_norm": 0.44250842928886414, "learning_rate": 0.0004949379152031104, "loss": 1.7585, "step": 24784 }, { "epoch": 0.82, "grad_norm": 0.43533140420913696, "learning_rate": 0.0004949299703143007, "loss": 1.7546, "step": 24785 }, { "epoch": 0.82, "grad_norm": 0.43254825472831726, "learning_rate": 0.0004949220251888748, "loss": 1.7758, "step": 24786 }, { "epoch": 0.82, "grad_norm": 0.42882558703422546, "learning_rate": 0.0004949140798268421, "loss": 1.8759, "step": 24787 }, { "epoch": 0.82, "grad_norm": 0.4193824529647827, "learning_rate": 0.0004949061342282127, "loss": 1.7814, "step": 24788 }, { "epoch": 0.82, "grad_norm": 0.445220947265625, "learning_rate": 0.0004948981883929958, "loss": 1.9044, "step": 24789 }, { "epoch": 0.82, "grad_norm": 0.4394852817058563, "learning_rate": 0.0004948902423212013, "loss": 1.8221, "step": 24790 }, { "epoch": 0.82, "grad_norm": 0.4298219680786133, "learning_rate": 0.0004948822960128386, "loss": 1.7493, "step": 24791 }, { "epoch": 0.82, "grad_norm": 0.44545918703079224, "learning_rate": 0.0004948743494679177, "loss": 1.8808, "step": 24792 }, { "epoch": 0.82, "grad_norm": 1.3241100311279297, "learning_rate": 0.0004948664026864479, "loss": 1.8723, "step": 24793 }, { "epoch": 0.82, "grad_norm": 0.440079003572464, "learning_rate": 0.0004948584556684391, "loss": 1.8502, "step": 24794 }, { "epoch": 0.82, "grad_norm": 0.4536376893520355, "learning_rate": 0.0004948505084139008, "loss": 1.7608, "step": 24795 }, { "epoch": 0.82, "grad_norm": 0.4165564477443695, "learning_rate": 0.0004948425609228427, "loss": 1.8246, "step": 24796 }, { "epoch": 0.82, "grad_norm": 0.4592825174331665, "learning_rate": 0.0004948346131952745, "loss": 1.7691, "step": 24797 }, { "epoch": 0.83, "grad_norm": 0.4425928592681885, "learning_rate": 0.0004948266652312057, "loss": 1.7392, "step": 24798 }, { "epoch": 0.83, "grad_norm": 0.4273589551448822, "learning_rate": 0.0004948187170306462, "loss": 1.8341, "step": 24799 }, { "epoch": 0.83, "grad_norm": 0.4189026653766632, "learning_rate": 0.0004948107685936053, "loss": 1.777, "step": 24800 }, { "epoch": 0.83, "grad_norm": 0.42680609226226807, "learning_rate": 0.0004948028199200929, "loss": 1.8331, "step": 24801 }, { "epoch": 0.83, "grad_norm": 0.4414292275905609, "learning_rate": 0.0004947948710101185, "loss": 1.8401, "step": 24802 }, { "epoch": 0.83, "grad_norm": 0.44166478514671326, "learning_rate": 0.0004947869218636919, "loss": 1.7814, "step": 24803 }, { "epoch": 0.83, "grad_norm": 0.45275017619132996, "learning_rate": 0.0004947789724808227, "loss": 1.8231, "step": 24804 }, { "epoch": 0.83, "grad_norm": 0.4249511957168579, "learning_rate": 0.0004947710228615205, "loss": 1.7996, "step": 24805 }, { "epoch": 0.83, "grad_norm": 0.4510747194290161, "learning_rate": 0.0004947630730057949, "loss": 1.8858, "step": 24806 }, { "epoch": 0.83, "grad_norm": 0.47826212644577026, "learning_rate": 0.0004947551229136558, "loss": 1.8622, "step": 24807 }, { "epoch": 0.83, "grad_norm": 0.4278881251811981, "learning_rate": 0.0004947471725851125, "loss": 1.8247, "step": 24808 }, { "epoch": 0.83, "grad_norm": 0.4431394040584564, "learning_rate": 0.0004947392220201748, "loss": 1.8672, "step": 24809 }, { "epoch": 0.83, "grad_norm": 0.45338138937950134, "learning_rate": 0.0004947312712188526, "loss": 1.7691, "step": 24810 }, { "epoch": 0.83, "grad_norm": 0.4582565426826477, "learning_rate": 0.0004947233201811552, "loss": 1.821, "step": 24811 }, { "epoch": 0.83, "grad_norm": 0.4398147463798523, "learning_rate": 0.0004947153689070924, "loss": 1.8061, "step": 24812 }, { "epoch": 0.83, "grad_norm": 0.45370960235595703, "learning_rate": 0.0004947074173966738, "loss": 1.8591, "step": 24813 }, { "epoch": 0.83, "grad_norm": 0.4292025566101074, "learning_rate": 0.0004946994656499091, "loss": 1.8071, "step": 24814 }, { "epoch": 0.83, "grad_norm": 0.44662895798683167, "learning_rate": 0.0004946915136668079, "loss": 1.8467, "step": 24815 }, { "epoch": 0.83, "grad_norm": 0.44179585576057434, "learning_rate": 0.00049468356144738, "loss": 1.8225, "step": 24816 }, { "epoch": 0.83, "grad_norm": 0.47969770431518555, "learning_rate": 0.0004946756089916349, "loss": 1.7474, "step": 24817 }, { "epoch": 0.83, "grad_norm": 0.4266946017742157, "learning_rate": 0.0004946676562995822, "loss": 1.8229, "step": 24818 }, { "epoch": 0.83, "grad_norm": 0.4345346689224243, "learning_rate": 0.0004946597033712318, "loss": 1.8565, "step": 24819 }, { "epoch": 0.83, "grad_norm": 0.44395554065704346, "learning_rate": 0.0004946517502065931, "loss": 1.7939, "step": 24820 }, { "epoch": 0.83, "grad_norm": 0.458050400018692, "learning_rate": 0.0004946437968056758, "loss": 1.8932, "step": 24821 }, { "epoch": 0.83, "grad_norm": 0.4266519546508789, "learning_rate": 0.0004946358431684897, "loss": 1.7909, "step": 24822 }, { "epoch": 0.83, "grad_norm": 0.43565428256988525, "learning_rate": 0.0004946278892950444, "loss": 1.7881, "step": 24823 }, { "epoch": 0.83, "grad_norm": 0.44551166892051697, "learning_rate": 0.0004946199351853494, "loss": 1.8672, "step": 24824 }, { "epoch": 0.83, "grad_norm": 0.4464551508426666, "learning_rate": 0.0004946119808394146, "loss": 1.8667, "step": 24825 }, { "epoch": 0.83, "grad_norm": 0.4276275336742401, "learning_rate": 0.0004946040262572495, "loss": 1.9202, "step": 24826 }, { "epoch": 0.83, "grad_norm": 0.42789483070373535, "learning_rate": 0.0004945960714388638, "loss": 1.7774, "step": 24827 }, { "epoch": 0.83, "grad_norm": 0.4312894344329834, "learning_rate": 0.000494588116384267, "loss": 1.7717, "step": 24828 }, { "epoch": 0.83, "grad_norm": 0.44275176525115967, "learning_rate": 0.000494580161093469, "loss": 1.8178, "step": 24829 }, { "epoch": 0.83, "grad_norm": 0.44527196884155273, "learning_rate": 0.0004945722055664794, "loss": 1.724, "step": 24830 }, { "epoch": 0.83, "grad_norm": 0.4399394094944, "learning_rate": 0.0004945642498033078, "loss": 1.8479, "step": 24831 }, { "epoch": 0.83, "grad_norm": 0.4648835361003876, "learning_rate": 0.0004945562938039637, "loss": 1.8096, "step": 24832 }, { "epoch": 0.83, "grad_norm": 0.4361305832862854, "learning_rate": 0.0004945483375684572, "loss": 1.8788, "step": 24833 }, { "epoch": 0.83, "grad_norm": 0.4331774413585663, "learning_rate": 0.0004945403810967975, "loss": 1.7488, "step": 24834 }, { "epoch": 0.83, "grad_norm": 0.43744975328445435, "learning_rate": 0.0004945324243889944, "loss": 1.7479, "step": 24835 }, { "epoch": 0.83, "grad_norm": 0.477274090051651, "learning_rate": 0.0004945244674450577, "loss": 1.9136, "step": 24836 }, { "epoch": 0.83, "grad_norm": 0.4453704059123993, "learning_rate": 0.000494516510264997, "loss": 1.8399, "step": 24837 }, { "epoch": 0.83, "grad_norm": 0.4250186085700989, "learning_rate": 0.0004945085528488218, "loss": 1.7371, "step": 24838 }, { "epoch": 0.83, "grad_norm": 0.4408642649650574, "learning_rate": 0.0004945005951965419, "loss": 1.8039, "step": 24839 }, { "epoch": 0.83, "grad_norm": 0.44800323247909546, "learning_rate": 0.0004944926373081671, "loss": 1.7973, "step": 24840 }, { "epoch": 0.83, "grad_norm": 0.4316841661930084, "learning_rate": 0.0004944846791837068, "loss": 1.8326, "step": 24841 }, { "epoch": 0.83, "grad_norm": 0.4294845163822174, "learning_rate": 0.0004944767208231707, "loss": 1.7791, "step": 24842 }, { "epoch": 0.83, "grad_norm": 0.41735130548477173, "learning_rate": 0.0004944687622265684, "loss": 1.7766, "step": 24843 }, { "epoch": 0.83, "grad_norm": 0.44618821144104004, "learning_rate": 0.0004944608033939098, "loss": 1.816, "step": 24844 }, { "epoch": 0.83, "grad_norm": 0.4372856020927429, "learning_rate": 0.0004944528443252045, "loss": 1.7289, "step": 24845 }, { "epoch": 0.83, "grad_norm": 0.4480106234550476, "learning_rate": 0.0004944448850204621, "loss": 1.7729, "step": 24846 }, { "epoch": 0.83, "grad_norm": 0.4582880735397339, "learning_rate": 0.0004944369254796921, "loss": 1.8569, "step": 24847 }, { "epoch": 0.83, "grad_norm": 0.47271355986595154, "learning_rate": 0.0004944289657029046, "loss": 1.8124, "step": 24848 }, { "epoch": 0.83, "grad_norm": 0.44460752606391907, "learning_rate": 0.0004944210056901087, "loss": 1.8009, "step": 24849 }, { "epoch": 0.83, "grad_norm": 0.432187020778656, "learning_rate": 0.0004944130454413145, "loss": 1.8095, "step": 24850 }, { "epoch": 0.83, "grad_norm": 0.4431699514389038, "learning_rate": 0.0004944050849565315, "loss": 1.8215, "step": 24851 }, { "epoch": 0.83, "grad_norm": 0.44349202513694763, "learning_rate": 0.0004943971242357693, "loss": 1.8873, "step": 24852 }, { "epoch": 0.83, "grad_norm": 0.4465171694755554, "learning_rate": 0.0004943891632790377, "loss": 1.7915, "step": 24853 }, { "epoch": 0.83, "grad_norm": 0.4324565529823303, "learning_rate": 0.0004943812020863463, "loss": 1.7597, "step": 24854 }, { "epoch": 0.83, "grad_norm": 0.4341706335544586, "learning_rate": 0.0004943732406577047, "loss": 1.8143, "step": 24855 }, { "epoch": 0.83, "grad_norm": 0.4369414150714874, "learning_rate": 0.0004943652789931226, "loss": 1.8396, "step": 24856 }, { "epoch": 0.83, "grad_norm": 0.4183739125728607, "learning_rate": 0.0004943573170926097, "loss": 1.7672, "step": 24857 }, { "epoch": 0.83, "grad_norm": 0.4251086711883545, "learning_rate": 0.0004943493549561758, "loss": 1.7567, "step": 24858 }, { "epoch": 0.83, "grad_norm": 0.43269065022468567, "learning_rate": 0.0004943413925838303, "loss": 1.7947, "step": 24859 }, { "epoch": 0.83, "grad_norm": 0.45218172669410706, "learning_rate": 0.000494333429975583, "loss": 1.7858, "step": 24860 }, { "epoch": 0.83, "grad_norm": 0.4564662575721741, "learning_rate": 0.0004943254671314435, "loss": 1.8749, "step": 24861 }, { "epoch": 0.83, "grad_norm": 0.43918371200561523, "learning_rate": 0.0004943175040514217, "loss": 1.6905, "step": 24862 }, { "epoch": 0.83, "grad_norm": 0.5870794057846069, "learning_rate": 0.0004943095407355268, "loss": 1.8298, "step": 24863 }, { "epoch": 0.83, "grad_norm": 0.44418659806251526, "learning_rate": 0.000494301577183769, "loss": 1.8231, "step": 24864 }, { "epoch": 0.83, "grad_norm": 0.44256287813186646, "learning_rate": 0.0004942936133961576, "loss": 1.7669, "step": 24865 }, { "epoch": 0.83, "grad_norm": 0.42146381735801697, "learning_rate": 0.0004942856493727024, "loss": 1.7626, "step": 24866 }, { "epoch": 0.83, "grad_norm": 0.44411778450012207, "learning_rate": 0.000494277685113413, "loss": 1.7507, "step": 24867 }, { "epoch": 0.83, "grad_norm": 0.4349193274974823, "learning_rate": 0.0004942697206182993, "loss": 1.803, "step": 24868 }, { "epoch": 0.83, "grad_norm": 0.43636906147003174, "learning_rate": 0.0004942617558873706, "loss": 1.7771, "step": 24869 }, { "epoch": 0.83, "grad_norm": 0.4443204998970032, "learning_rate": 0.0004942537909206368, "loss": 1.8121, "step": 24870 }, { "epoch": 0.83, "grad_norm": 0.4413522183895111, "learning_rate": 0.0004942458257181074, "loss": 1.7855, "step": 24871 }, { "epoch": 0.83, "grad_norm": 0.4319510757923126, "learning_rate": 0.0004942378602797923, "loss": 1.7688, "step": 24872 }, { "epoch": 0.83, "grad_norm": 0.44061926007270813, "learning_rate": 0.0004942298946057012, "loss": 1.7966, "step": 24873 }, { "epoch": 0.83, "grad_norm": 0.4443017840385437, "learning_rate": 0.0004942219286958434, "loss": 1.7913, "step": 24874 }, { "epoch": 0.83, "grad_norm": 0.43152695894241333, "learning_rate": 0.0004942139625502289, "loss": 1.8393, "step": 24875 }, { "epoch": 0.83, "grad_norm": 0.4304906129837036, "learning_rate": 0.0004942059961688673, "loss": 1.8896, "step": 24876 }, { "epoch": 0.83, "grad_norm": 0.49982619285583496, "learning_rate": 0.0004941980295517681, "loss": 1.887, "step": 24877 }, { "epoch": 0.83, "grad_norm": 0.436947226524353, "learning_rate": 0.0004941900626989412, "loss": 1.782, "step": 24878 }, { "epoch": 0.83, "grad_norm": 0.4289610981941223, "learning_rate": 0.0004941820956103961, "loss": 1.8101, "step": 24879 }, { "epoch": 0.83, "grad_norm": 0.43868762254714966, "learning_rate": 0.0004941741282861427, "loss": 1.793, "step": 24880 }, { "epoch": 0.83, "grad_norm": 0.45517265796661377, "learning_rate": 0.0004941661607261904, "loss": 1.898, "step": 24881 }, { "epoch": 0.83, "grad_norm": 0.4435786008834839, "learning_rate": 0.000494158192930549, "loss": 1.8263, "step": 24882 }, { "epoch": 0.83, "grad_norm": 0.44996902346611023, "learning_rate": 0.0004941502248992283, "loss": 1.7389, "step": 24883 }, { "epoch": 0.83, "grad_norm": 0.4525693953037262, "learning_rate": 0.0004941422566322376, "loss": 1.8683, "step": 24884 }, { "epoch": 0.83, "grad_norm": 0.4562804102897644, "learning_rate": 0.0004941342881295869, "loss": 1.8728, "step": 24885 }, { "epoch": 0.83, "grad_norm": 0.4556276798248291, "learning_rate": 0.0004941263193912859, "loss": 1.7889, "step": 24886 }, { "epoch": 0.83, "grad_norm": 0.45349738001823425, "learning_rate": 0.000494118350417344, "loss": 1.8477, "step": 24887 }, { "epoch": 0.83, "grad_norm": 0.44106635451316833, "learning_rate": 0.0004941103812077712, "loss": 1.7941, "step": 24888 }, { "epoch": 0.83, "grad_norm": 0.43524765968322754, "learning_rate": 0.0004941024117625768, "loss": 1.7988, "step": 24889 }, { "epoch": 0.83, "grad_norm": 0.47034913301467896, "learning_rate": 0.0004940944420817708, "loss": 1.7922, "step": 24890 }, { "epoch": 0.83, "grad_norm": 0.43861448764801025, "learning_rate": 0.0004940864721653626, "loss": 1.8008, "step": 24891 }, { "epoch": 0.83, "grad_norm": 0.4372107982635498, "learning_rate": 0.0004940785020133621, "loss": 1.8859, "step": 24892 }, { "epoch": 0.83, "grad_norm": 0.4489884376525879, "learning_rate": 0.000494070531625779, "loss": 1.8829, "step": 24893 }, { "epoch": 0.83, "grad_norm": 0.44630420207977295, "learning_rate": 0.0004940625610026227, "loss": 1.875, "step": 24894 }, { "epoch": 0.83, "grad_norm": 0.4439793527126312, "learning_rate": 0.0004940545901439031, "loss": 1.8311, "step": 24895 }, { "epoch": 0.83, "grad_norm": 0.4430796802043915, "learning_rate": 0.0004940466190496299, "loss": 1.8538, "step": 24896 }, { "epoch": 0.83, "grad_norm": 0.46192261576652527, "learning_rate": 0.0004940386477198126, "loss": 1.8537, "step": 24897 }, { "epoch": 0.83, "grad_norm": 0.4306585192680359, "learning_rate": 0.0004940306761544611, "loss": 1.7505, "step": 24898 }, { "epoch": 0.83, "grad_norm": 0.4732915163040161, "learning_rate": 0.0004940227043535847, "loss": 1.7842, "step": 24899 }, { "epoch": 0.83, "grad_norm": 0.4426063895225525, "learning_rate": 0.0004940147323171935, "loss": 1.8556, "step": 24900 }, { "epoch": 0.83, "grad_norm": 0.4298054575920105, "learning_rate": 0.0004940067600452971, "loss": 1.79, "step": 24901 }, { "epoch": 0.83, "grad_norm": 0.43096673488616943, "learning_rate": 0.000493998787537905, "loss": 1.8781, "step": 24902 }, { "epoch": 0.83, "grad_norm": 0.44309359788894653, "learning_rate": 0.0004939908147950268, "loss": 1.7906, "step": 24903 }, { "epoch": 0.83, "grad_norm": 0.47355514764785767, "learning_rate": 0.0004939828418166724, "loss": 1.8528, "step": 24904 }, { "epoch": 0.83, "grad_norm": 0.4451797902584076, "learning_rate": 0.0004939748686028515, "loss": 1.8202, "step": 24905 }, { "epoch": 0.83, "grad_norm": 0.43665391206741333, "learning_rate": 0.0004939668951535737, "loss": 1.8008, "step": 24906 }, { "epoch": 0.83, "grad_norm": 0.46986809372901917, "learning_rate": 0.0004939589214688486, "loss": 1.7989, "step": 24907 }, { "epoch": 0.83, "grad_norm": 0.4237216114997864, "learning_rate": 0.000493950947548686, "loss": 1.8277, "step": 24908 }, { "epoch": 0.83, "grad_norm": 0.44312459230422974, "learning_rate": 0.0004939429733930955, "loss": 1.8204, "step": 24909 }, { "epoch": 0.83, "grad_norm": 0.44613969326019287, "learning_rate": 0.0004939349990020868, "loss": 1.8308, "step": 24910 }, { "epoch": 0.83, "grad_norm": 0.5081579685211182, "learning_rate": 0.0004939270243756696, "loss": 1.85, "step": 24911 }, { "epoch": 0.83, "grad_norm": 0.4275520443916321, "learning_rate": 0.0004939190495138535, "loss": 1.8157, "step": 24912 }, { "epoch": 0.83, "grad_norm": 0.43096035718917847, "learning_rate": 0.0004939110744166484, "loss": 1.9624, "step": 24913 }, { "epoch": 0.83, "grad_norm": 0.45604753494262695, "learning_rate": 0.0004939030990840637, "loss": 1.7834, "step": 24914 }, { "epoch": 0.83, "grad_norm": 0.453340619802475, "learning_rate": 0.0004938951235161092, "loss": 1.8211, "step": 24915 }, { "epoch": 0.83, "grad_norm": 0.4541306495666504, "learning_rate": 0.0004938871477127946, "loss": 1.757, "step": 24916 }, { "epoch": 0.83, "grad_norm": 0.4341704547405243, "learning_rate": 0.0004938791716741296, "loss": 1.7565, "step": 24917 }, { "epoch": 0.83, "grad_norm": 0.4597186744213104, "learning_rate": 0.0004938711954001239, "loss": 1.7484, "step": 24918 }, { "epoch": 0.83, "grad_norm": 0.4883681833744049, "learning_rate": 0.000493863218890787, "loss": 1.8578, "step": 24919 }, { "epoch": 0.83, "grad_norm": 0.45185673236846924, "learning_rate": 0.0004938552421461289, "loss": 1.825, "step": 24920 }, { "epoch": 0.83, "grad_norm": 0.4338739514350891, "learning_rate": 0.000493847265166159, "loss": 1.8181, "step": 24921 }, { "epoch": 0.83, "grad_norm": 0.45454472303390503, "learning_rate": 0.000493839287950887, "loss": 1.8081, "step": 24922 }, { "epoch": 0.83, "grad_norm": 0.49339696764945984, "learning_rate": 0.0004938313105003227, "loss": 1.802, "step": 24923 }, { "epoch": 0.83, "grad_norm": 0.44075676798820496, "learning_rate": 0.000493823332814476, "loss": 1.8525, "step": 24924 }, { "epoch": 0.83, "grad_norm": 0.8578864932060242, "learning_rate": 0.000493815354893356, "loss": 1.93, "step": 24925 }, { "epoch": 0.83, "grad_norm": 0.43892771005630493, "learning_rate": 0.000493807376736973, "loss": 1.8363, "step": 24926 }, { "epoch": 0.83, "grad_norm": 0.45468324422836304, "learning_rate": 0.000493799398345336, "loss": 1.8416, "step": 24927 }, { "epoch": 0.83, "grad_norm": 0.7790617346763611, "learning_rate": 0.0004937914197184555, "loss": 1.8106, "step": 24928 }, { "epoch": 0.83, "grad_norm": 0.45524054765701294, "learning_rate": 0.0004937834408563406, "loss": 1.7679, "step": 24929 }, { "epoch": 0.83, "grad_norm": 0.44315555691719055, "learning_rate": 0.0004937754617590012, "loss": 1.8495, "step": 24930 }, { "epoch": 0.83, "grad_norm": 0.466139018535614, "learning_rate": 0.0004937674824264469, "loss": 1.7885, "step": 24931 }, { "epoch": 0.83, "grad_norm": 0.449493944644928, "learning_rate": 0.0004937595028586874, "loss": 1.8049, "step": 24932 }, { "epoch": 0.83, "grad_norm": 0.43924254179000854, "learning_rate": 0.0004937515230557324, "loss": 1.8446, "step": 24933 }, { "epoch": 0.83, "grad_norm": 0.4384383261203766, "learning_rate": 0.0004937435430175917, "loss": 1.8807, "step": 24934 }, { "epoch": 0.83, "grad_norm": 0.4384116232395172, "learning_rate": 0.0004937355627442748, "loss": 1.8526, "step": 24935 }, { "epoch": 0.83, "grad_norm": 0.4622862935066223, "learning_rate": 0.0004937275822357915, "loss": 1.8202, "step": 24936 }, { "epoch": 0.83, "grad_norm": 0.44600075483322144, "learning_rate": 0.0004937196014921514, "loss": 1.8382, "step": 24937 }, { "epoch": 0.83, "grad_norm": 0.4348732531070709, "learning_rate": 0.0004937116205133643, "loss": 1.8306, "step": 24938 }, { "epoch": 0.83, "grad_norm": 0.4464181363582611, "learning_rate": 0.0004937036392994398, "loss": 1.8303, "step": 24939 }, { "epoch": 0.83, "grad_norm": 0.4199923574924469, "learning_rate": 0.0004936956578503877, "loss": 1.7378, "step": 24940 }, { "epoch": 0.83, "grad_norm": 0.45323020219802856, "learning_rate": 0.0004936876761662176, "loss": 1.7782, "step": 24941 }, { "epoch": 0.83, "grad_norm": 0.451505571603775, "learning_rate": 0.0004936796942469392, "loss": 1.8313, "step": 24942 }, { "epoch": 0.83, "grad_norm": 0.4516597092151642, "learning_rate": 0.0004936717120925621, "loss": 1.839, "step": 24943 }, { "epoch": 0.83, "grad_norm": 0.43350493907928467, "learning_rate": 0.0004936637297030962, "loss": 1.7975, "step": 24944 }, { "epoch": 0.83, "grad_norm": 0.43386906385421753, "learning_rate": 0.0004936557470785509, "loss": 1.7854, "step": 24945 }, { "epoch": 0.83, "grad_norm": 0.6412615180015564, "learning_rate": 0.0004936477642189362, "loss": 1.856, "step": 24946 }, { "epoch": 0.83, "grad_norm": 0.44075751304626465, "learning_rate": 0.0004936397811242617, "loss": 1.8069, "step": 24947 }, { "epoch": 0.83, "grad_norm": 0.4363747537136078, "learning_rate": 0.0004936317977945368, "loss": 1.8367, "step": 24948 }, { "epoch": 0.83, "grad_norm": 0.43342146277427673, "learning_rate": 0.0004936238142297716, "loss": 1.8067, "step": 24949 }, { "epoch": 0.83, "grad_norm": 0.44200628995895386, "learning_rate": 0.0004936158304299756, "loss": 1.9003, "step": 24950 }, { "epoch": 0.83, "grad_norm": 0.44432270526885986, "learning_rate": 0.0004936078463951585, "loss": 1.8222, "step": 24951 }, { "epoch": 0.83, "grad_norm": 0.42051953077316284, "learning_rate": 0.0004935998621253301, "loss": 1.8396, "step": 24952 }, { "epoch": 0.83, "grad_norm": 0.44685301184654236, "learning_rate": 0.0004935918776204999, "loss": 1.7729, "step": 24953 }, { "epoch": 0.83, "grad_norm": 0.4251781404018402, "learning_rate": 0.0004935838928806776, "loss": 1.7497, "step": 24954 }, { "epoch": 0.83, "grad_norm": 0.4228835999965668, "learning_rate": 0.0004935759079058731, "loss": 1.8033, "step": 24955 }, { "epoch": 0.83, "grad_norm": 0.4267730116844177, "learning_rate": 0.000493567922696096, "loss": 1.7958, "step": 24956 }, { "epoch": 0.83, "grad_norm": 0.4384485185146332, "learning_rate": 0.0004935599372513558, "loss": 1.7904, "step": 24957 }, { "epoch": 0.83, "grad_norm": 0.4308636784553528, "learning_rate": 0.0004935519515716625, "loss": 1.833, "step": 24958 }, { "epoch": 0.83, "grad_norm": 0.4786173403263092, "learning_rate": 0.0004935439656570255, "loss": 1.847, "step": 24959 }, { "epoch": 0.83, "grad_norm": 0.45850086212158203, "learning_rate": 0.0004935359795074548, "loss": 1.845, "step": 24960 }, { "epoch": 0.83, "grad_norm": 0.4574769139289856, "learning_rate": 0.0004935279931229599, "loss": 1.7515, "step": 24961 }, { "epoch": 0.83, "grad_norm": 0.4412347674369812, "learning_rate": 0.0004935200065035504, "loss": 1.78, "step": 24962 }, { "epoch": 0.83, "grad_norm": 0.458894282579422, "learning_rate": 0.0004935120196492363, "loss": 1.8039, "step": 24963 }, { "epoch": 0.83, "grad_norm": 0.44372719526290894, "learning_rate": 0.000493504032560027, "loss": 1.8191, "step": 24964 }, { "epoch": 0.83, "grad_norm": 0.437166690826416, "learning_rate": 0.0004934960452359325, "loss": 1.8083, "step": 24965 }, { "epoch": 0.83, "grad_norm": 0.4761161506175995, "learning_rate": 0.0004934880576769621, "loss": 1.84, "step": 24966 }, { "epoch": 0.83, "grad_norm": 0.42260491847991943, "learning_rate": 0.0004934800698831258, "loss": 1.8131, "step": 24967 }, { "epoch": 0.83, "grad_norm": 0.44281208515167236, "learning_rate": 0.0004934720818544332, "loss": 1.833, "step": 24968 }, { "epoch": 0.83, "grad_norm": 0.44787248969078064, "learning_rate": 0.0004934640935908939, "loss": 1.766, "step": 24969 }, { "epoch": 0.83, "grad_norm": 0.45400574803352356, "learning_rate": 0.0004934561050925179, "loss": 1.82, "step": 24970 }, { "epoch": 0.83, "grad_norm": 0.4446202218532562, "learning_rate": 0.0004934481163593146, "loss": 1.806, "step": 24971 }, { "epoch": 0.83, "grad_norm": 0.4144167900085449, "learning_rate": 0.0004934401273912937, "loss": 1.8083, "step": 24972 }, { "epoch": 0.83, "grad_norm": 0.4286596179008484, "learning_rate": 0.0004934321381884651, "loss": 1.7972, "step": 24973 }, { "epoch": 0.83, "grad_norm": 0.43705713748931885, "learning_rate": 0.0004934241487508383, "loss": 1.794, "step": 24974 }, { "epoch": 0.83, "grad_norm": 0.4355934262275696, "learning_rate": 0.0004934161590784232, "loss": 1.8911, "step": 24975 }, { "epoch": 0.83, "grad_norm": 0.4493962526321411, "learning_rate": 0.0004934081691712293, "loss": 1.8004, "step": 24976 }, { "epoch": 0.83, "grad_norm": 0.4317586421966553, "learning_rate": 0.0004934001790292663, "loss": 1.8206, "step": 24977 }, { "epoch": 0.83, "grad_norm": 0.4645940959453583, "learning_rate": 0.0004933921886525442, "loss": 1.7876, "step": 24978 }, { "epoch": 0.83, "grad_norm": 0.4277937114238739, "learning_rate": 0.0004933841980410722, "loss": 1.7623, "step": 24979 }, { "epoch": 0.83, "grad_norm": 0.4427727460861206, "learning_rate": 0.0004933762071948605, "loss": 1.8174, "step": 24980 }, { "epoch": 0.83, "grad_norm": 0.442208856344223, "learning_rate": 0.0004933682161139184, "loss": 1.8405, "step": 24981 }, { "epoch": 0.83, "grad_norm": 0.42208895087242126, "learning_rate": 0.0004933602247982559, "loss": 1.7755, "step": 24982 }, { "epoch": 0.83, "grad_norm": 0.5026349425315857, "learning_rate": 0.0004933522332478827, "loss": 1.8327, "step": 24983 }, { "epoch": 0.83, "grad_norm": 0.42452287673950195, "learning_rate": 0.0004933442414628081, "loss": 1.7956, "step": 24984 }, { "epoch": 0.83, "grad_norm": 0.43644484877586365, "learning_rate": 0.0004933362494430423, "loss": 1.7799, "step": 24985 }, { "epoch": 0.83, "grad_norm": 0.43334683775901794, "learning_rate": 0.0004933282571885945, "loss": 1.8353, "step": 24986 }, { "epoch": 0.83, "grad_norm": 0.45391878485679626, "learning_rate": 0.000493320264699475, "loss": 1.8309, "step": 24987 }, { "epoch": 0.83, "grad_norm": 0.4501679837703705, "learning_rate": 0.0004933122719756931, "loss": 1.7912, "step": 24988 }, { "epoch": 0.83, "grad_norm": 0.42911168932914734, "learning_rate": 0.0004933042790172585, "loss": 1.8625, "step": 24989 }, { "epoch": 0.83, "grad_norm": 0.42551785707473755, "learning_rate": 0.0004932962858241811, "loss": 1.8181, "step": 24990 }, { "epoch": 0.83, "grad_norm": 0.4379696846008301, "learning_rate": 0.0004932882923964704, "loss": 1.8344, "step": 24991 }, { "epoch": 0.83, "grad_norm": 0.42167899012565613, "learning_rate": 0.0004932802987341362, "loss": 1.8561, "step": 24992 }, { "epoch": 0.83, "grad_norm": 0.443372517824173, "learning_rate": 0.0004932723048371883, "loss": 1.8301, "step": 24993 }, { "epoch": 0.83, "grad_norm": 0.459323912858963, "learning_rate": 0.000493264310705636, "loss": 1.8542, "step": 24994 }, { "epoch": 0.83, "grad_norm": 0.42713943123817444, "learning_rate": 0.0004932563163394896, "loss": 1.8601, "step": 24995 }, { "epoch": 0.83, "grad_norm": 0.5238545536994934, "learning_rate": 0.0004932483217387583, "loss": 1.877, "step": 24996 }, { "epoch": 0.83, "grad_norm": 0.4545424282550812, "learning_rate": 0.0004932403269034523, "loss": 1.8372, "step": 24997 }, { "epoch": 0.83, "grad_norm": 0.44181352853775024, "learning_rate": 0.0004932323318335808, "loss": 1.7692, "step": 24998 }, { "epoch": 0.83, "grad_norm": 0.4343973696231842, "learning_rate": 0.0004932243365291537, "loss": 1.894, "step": 24999 }, { "epoch": 0.83, "grad_norm": 0.4398014545440674, "learning_rate": 0.0004932163409901809, "loss": 1.8419, "step": 25000 }, { "epoch": 0.83, "grad_norm": 0.44421660900115967, "learning_rate": 0.0004932083452166717, "loss": 1.7991, "step": 25001 }, { "epoch": 0.83, "grad_norm": 0.45838773250579834, "learning_rate": 0.0004932003492086361, "loss": 1.8456, "step": 25002 }, { "epoch": 0.83, "grad_norm": 0.4385281503200531, "learning_rate": 0.0004931923529660839, "loss": 1.8849, "step": 25003 }, { "epoch": 0.83, "grad_norm": 0.44164755940437317, "learning_rate": 0.0004931843564890245, "loss": 1.8103, "step": 25004 }, { "epoch": 0.83, "grad_norm": 0.4763500392436981, "learning_rate": 0.0004931763597774678, "loss": 1.8697, "step": 25005 }, { "epoch": 0.83, "grad_norm": 0.4323580265045166, "learning_rate": 0.0004931683628314235, "loss": 1.8166, "step": 25006 }, { "epoch": 0.83, "grad_norm": 0.43198660016059875, "learning_rate": 0.0004931603656509012, "loss": 1.8226, "step": 25007 }, { "epoch": 0.83, "grad_norm": 0.43641701340675354, "learning_rate": 0.0004931523682359108, "loss": 1.8174, "step": 25008 }, { "epoch": 0.83, "grad_norm": 0.4435802698135376, "learning_rate": 0.0004931443705864616, "loss": 1.7458, "step": 25009 }, { "epoch": 0.83, "grad_norm": 0.43931278586387634, "learning_rate": 0.0004931363727025639, "loss": 1.8714, "step": 25010 }, { "epoch": 0.83, "grad_norm": 0.43616601824760437, "learning_rate": 0.000493128374584227, "loss": 1.8524, "step": 25011 }, { "epoch": 0.83, "grad_norm": 0.43289923667907715, "learning_rate": 0.0004931203762314607, "loss": 1.7836, "step": 25012 }, { "epoch": 0.83, "grad_norm": 0.43154656887054443, "learning_rate": 0.0004931123776442748, "loss": 1.8216, "step": 25013 }, { "epoch": 0.83, "grad_norm": 0.4448075294494629, "learning_rate": 0.0004931043788226788, "loss": 1.8396, "step": 25014 }, { "epoch": 0.83, "grad_norm": 0.4520721137523651, "learning_rate": 0.0004930963797666826, "loss": 1.8152, "step": 25015 }, { "epoch": 0.83, "grad_norm": 0.42405927181243896, "learning_rate": 0.0004930883804762957, "loss": 1.7719, "step": 25016 }, { "epoch": 0.83, "grad_norm": 0.45558395981788635, "learning_rate": 0.0004930803809515282, "loss": 1.7981, "step": 25017 }, { "epoch": 0.83, "grad_norm": 0.45302867889404297, "learning_rate": 0.0004930723811923895, "loss": 1.8632, "step": 25018 }, { "epoch": 0.83, "grad_norm": 0.4465799629688263, "learning_rate": 0.0004930643811988893, "loss": 1.8014, "step": 25019 }, { "epoch": 0.83, "grad_norm": 0.43816328048706055, "learning_rate": 0.0004930563809710373, "loss": 1.8711, "step": 25020 }, { "epoch": 0.83, "grad_norm": 0.42616006731987, "learning_rate": 0.0004930483805088435, "loss": 1.8499, "step": 25021 }, { "epoch": 0.83, "grad_norm": 0.44019708037376404, "learning_rate": 0.0004930403798123174, "loss": 1.8991, "step": 25022 }, { "epoch": 0.83, "grad_norm": 0.4559054374694824, "learning_rate": 0.0004930323788814685, "loss": 1.8017, "step": 25023 }, { "epoch": 0.83, "grad_norm": 0.4419316053390503, "learning_rate": 0.0004930243777163069, "loss": 1.8178, "step": 25024 }, { "epoch": 0.83, "grad_norm": 0.4355422258377075, "learning_rate": 0.0004930163763168423, "loss": 1.7816, "step": 25025 }, { "epoch": 0.83, "grad_norm": 0.4443648159503937, "learning_rate": 0.000493008374683084, "loss": 1.7734, "step": 25026 }, { "epoch": 0.83, "grad_norm": 0.42674142122268677, "learning_rate": 0.000493000372815042, "loss": 1.8193, "step": 25027 }, { "epoch": 0.83, "grad_norm": 0.4251997172832489, "learning_rate": 0.0004929923707127261, "loss": 1.7899, "step": 25028 }, { "epoch": 0.83, "grad_norm": 0.4242856800556183, "learning_rate": 0.0004929843683761458, "loss": 1.8673, "step": 25029 }, { "epoch": 0.83, "grad_norm": 0.43017295002937317, "learning_rate": 0.000492976365805311, "loss": 1.8567, "step": 25030 }, { "epoch": 0.83, "grad_norm": 0.4366452097892761, "learning_rate": 0.0004929683630002312, "loss": 1.8478, "step": 25031 }, { "epoch": 0.83, "grad_norm": 0.43384185433387756, "learning_rate": 0.0004929603599609164, "loss": 1.8066, "step": 25032 }, { "epoch": 0.83, "grad_norm": 0.44265979528427124, "learning_rate": 0.000492952356687376, "loss": 1.8458, "step": 25033 }, { "epoch": 0.83, "grad_norm": 0.4233665466308594, "learning_rate": 0.0004929443531796199, "loss": 1.8068, "step": 25034 }, { "epoch": 0.83, "grad_norm": 0.42819663882255554, "learning_rate": 0.0004929363494376579, "loss": 1.8002, "step": 25035 }, { "epoch": 0.83, "grad_norm": 0.4276278018951416, "learning_rate": 0.0004929283454614995, "loss": 1.8441, "step": 25036 }, { "epoch": 0.83, "grad_norm": 0.44573670625686646, "learning_rate": 0.0004929203412511546, "loss": 1.8361, "step": 25037 }, { "epoch": 0.83, "grad_norm": 0.43007558584213257, "learning_rate": 0.0004929123368066328, "loss": 1.7602, "step": 25038 }, { "epoch": 0.83, "grad_norm": 0.43967002630233765, "learning_rate": 0.0004929043321279437, "loss": 1.8459, "step": 25039 }, { "epoch": 0.83, "grad_norm": 0.44616928696632385, "learning_rate": 0.0004928963272150974, "loss": 1.7727, "step": 25040 }, { "epoch": 0.83, "grad_norm": 0.4365844428539276, "learning_rate": 0.0004928883220681032, "loss": 1.8193, "step": 25041 }, { "epoch": 0.83, "grad_norm": 0.42676571011543274, "learning_rate": 0.000492880316686971, "loss": 1.8386, "step": 25042 }, { "epoch": 0.83, "grad_norm": 0.4396607577800751, "learning_rate": 0.0004928723110717106, "loss": 1.7701, "step": 25043 }, { "epoch": 0.83, "grad_norm": 0.44305849075317383, "learning_rate": 0.0004928643052223317, "loss": 1.8231, "step": 25044 }, { "epoch": 0.83, "grad_norm": 0.4514467120170593, "learning_rate": 0.0004928562991388439, "loss": 1.7842, "step": 25045 }, { "epoch": 0.83, "grad_norm": 0.45814210176467896, "learning_rate": 0.0004928482928212568, "loss": 1.7955, "step": 25046 }, { "epoch": 0.83, "grad_norm": 0.44841268658638, "learning_rate": 0.0004928402862695804, "loss": 1.7721, "step": 25047 }, { "epoch": 0.83, "grad_norm": 0.4630608856678009, "learning_rate": 0.0004928322794838244, "loss": 1.7868, "step": 25048 }, { "epoch": 0.83, "grad_norm": 0.45370906591415405, "learning_rate": 0.0004928242724639983, "loss": 1.7374, "step": 25049 }, { "epoch": 0.83, "grad_norm": 0.43860000371932983, "learning_rate": 0.000492816265210112, "loss": 1.7818, "step": 25050 }, { "epoch": 0.83, "grad_norm": 0.4280592203140259, "learning_rate": 0.0004928082577221752, "loss": 1.7936, "step": 25051 }, { "epoch": 0.83, "grad_norm": 0.4541260600090027, "learning_rate": 0.0004928002500001975, "loss": 1.7917, "step": 25052 }, { "epoch": 0.83, "grad_norm": 0.46432721614837646, "learning_rate": 0.0004927922420441888, "loss": 1.8973, "step": 25053 }, { "epoch": 0.83, "grad_norm": 0.4555749297142029, "learning_rate": 0.0004927842338541586, "loss": 1.8804, "step": 25054 }, { "epoch": 0.83, "grad_norm": 0.4434456527233124, "learning_rate": 0.0004927762254301168, "loss": 1.7856, "step": 25055 }, { "epoch": 0.83, "grad_norm": 0.4527880847454071, "learning_rate": 0.000492768216772073, "loss": 1.87, "step": 25056 }, { "epoch": 0.83, "grad_norm": 0.47550296783447266, "learning_rate": 0.0004927602078800373, "loss": 1.8357, "step": 25057 }, { "epoch": 0.83, "grad_norm": 0.45290353894233704, "learning_rate": 0.0004927521987540187, "loss": 1.8596, "step": 25058 }, { "epoch": 0.83, "grad_norm": 0.575006902217865, "learning_rate": 0.0004927441893940276, "loss": 1.8052, "step": 25059 }, { "epoch": 0.83, "grad_norm": 0.4431101083755493, "learning_rate": 0.0004927361798000734, "loss": 1.855, "step": 25060 }, { "epoch": 0.83, "grad_norm": 0.45811042189598083, "learning_rate": 0.0004927281699721658, "loss": 1.8154, "step": 25061 }, { "epoch": 0.83, "grad_norm": 0.4375636875629425, "learning_rate": 0.0004927201599103147, "loss": 1.7507, "step": 25062 }, { "epoch": 0.83, "grad_norm": 0.4562259912490845, "learning_rate": 0.0004927121496145296, "loss": 1.8413, "step": 25063 }, { "epoch": 0.83, "grad_norm": 0.4438076615333557, "learning_rate": 0.0004927041390848204, "loss": 1.794, "step": 25064 }, { "epoch": 0.83, "grad_norm": 0.45678091049194336, "learning_rate": 0.0004926961283211968, "loss": 1.8251, "step": 25065 }, { "epoch": 0.83, "grad_norm": 0.44738492369651794, "learning_rate": 0.0004926881173236684, "loss": 1.8361, "step": 25066 }, { "epoch": 0.83, "grad_norm": 0.47423961758613586, "learning_rate": 0.0004926801060922451, "loss": 1.8298, "step": 25067 }, { "epoch": 0.83, "grad_norm": 0.47029703855514526, "learning_rate": 0.0004926720946269366, "loss": 1.7796, "step": 25068 }, { "epoch": 0.83, "grad_norm": 0.4319598078727722, "learning_rate": 0.0004926640829277525, "loss": 1.8154, "step": 25069 }, { "epoch": 0.83, "grad_norm": 0.4377363324165344, "learning_rate": 0.0004926560709947026, "loss": 1.8558, "step": 25070 }, { "epoch": 0.83, "grad_norm": 0.44302070140838623, "learning_rate": 0.0004926480588277965, "loss": 1.8076, "step": 25071 }, { "epoch": 0.83, "grad_norm": 0.4460959732532501, "learning_rate": 0.0004926400464270441, "loss": 1.7412, "step": 25072 }, { "epoch": 0.83, "grad_norm": 0.43111321330070496, "learning_rate": 0.0004926320337924552, "loss": 1.8013, "step": 25073 }, { "epoch": 0.83, "grad_norm": 0.45350155234336853, "learning_rate": 0.0004926240209240393, "loss": 1.9036, "step": 25074 }, { "epoch": 0.83, "grad_norm": 0.47077903151512146, "learning_rate": 0.0004926160078218062, "loss": 1.8476, "step": 25075 }, { "epoch": 0.83, "grad_norm": 0.43634894490242004, "learning_rate": 0.0004926079944857656, "loss": 1.803, "step": 25076 }, { "epoch": 0.83, "grad_norm": 0.43273797631263733, "learning_rate": 0.0004925999809159274, "loss": 1.8249, "step": 25077 }, { "epoch": 0.83, "grad_norm": 0.4340868592262268, "learning_rate": 0.0004925919671123012, "loss": 1.7876, "step": 25078 }, { "epoch": 0.83, "grad_norm": 0.4455646872520447, "learning_rate": 0.0004925839530748967, "loss": 1.8274, "step": 25079 }, { "epoch": 0.83, "grad_norm": 0.4640588164329529, "learning_rate": 0.0004925759388037235, "loss": 1.8977, "step": 25080 }, { "epoch": 0.83, "grad_norm": 0.4414648413658142, "learning_rate": 0.0004925679242987917, "loss": 1.89, "step": 25081 }, { "epoch": 0.83, "grad_norm": 0.4368596374988556, "learning_rate": 0.0004925599095601108, "loss": 1.805, "step": 25082 }, { "epoch": 0.83, "grad_norm": 0.43615615367889404, "learning_rate": 0.0004925518945876903, "loss": 1.7775, "step": 25083 }, { "epoch": 0.83, "grad_norm": 0.4502648711204529, "learning_rate": 0.0004925438793815405, "loss": 1.9519, "step": 25084 }, { "epoch": 0.83, "grad_norm": 0.4323579668998718, "learning_rate": 0.0004925358639416706, "loss": 1.7952, "step": 25085 }, { "epoch": 0.83, "grad_norm": 0.45839783549308777, "learning_rate": 0.0004925278482680905, "loss": 1.7918, "step": 25086 }, { "epoch": 0.83, "grad_norm": 0.45104628801345825, "learning_rate": 0.0004925198323608101, "loss": 1.7985, "step": 25087 }, { "epoch": 0.83, "grad_norm": 0.4585670828819275, "learning_rate": 0.000492511816219839, "loss": 1.8074, "step": 25088 }, { "epoch": 0.83, "grad_norm": 0.44837313890457153, "learning_rate": 0.0004925037998451868, "loss": 1.7879, "step": 25089 }, { "epoch": 0.83, "grad_norm": 0.4461536109447479, "learning_rate": 0.0004924957832368633, "loss": 1.7994, "step": 25090 }, { "epoch": 0.83, "grad_norm": 0.43731504678726196, "learning_rate": 0.0004924877663948784, "loss": 1.7447, "step": 25091 }, { "epoch": 0.83, "grad_norm": 0.4409432113170624, "learning_rate": 0.0004924797493192417, "loss": 1.8256, "step": 25092 }, { "epoch": 0.83, "grad_norm": 0.43243980407714844, "learning_rate": 0.0004924717320099629, "loss": 1.8214, "step": 25093 }, { "epoch": 0.83, "grad_norm": 0.43984153866767883, "learning_rate": 0.0004924637144670519, "loss": 1.8225, "step": 25094 }, { "epoch": 0.83, "grad_norm": 0.4541149139404297, "learning_rate": 0.0004924556966905181, "loss": 1.8539, "step": 25095 }, { "epoch": 0.83, "grad_norm": 0.44656282663345337, "learning_rate": 0.0004924476786803716, "loss": 1.8232, "step": 25096 }, { "epoch": 0.83, "grad_norm": 0.4369962811470032, "learning_rate": 0.000492439660436622, "loss": 1.8245, "step": 25097 }, { "epoch": 0.84, "grad_norm": 0.43792232871055603, "learning_rate": 0.0004924316419592789, "loss": 1.8439, "step": 25098 }, { "epoch": 0.84, "grad_norm": 0.43533629179000854, "learning_rate": 0.0004924236232483522, "loss": 1.7923, "step": 25099 }, { "epoch": 0.84, "grad_norm": 0.4372943341732025, "learning_rate": 0.0004924156043038515, "loss": 1.8437, "step": 25100 }, { "epoch": 0.84, "grad_norm": 0.4524759352207184, "learning_rate": 0.0004924075851257867, "loss": 1.84, "step": 25101 }, { "epoch": 0.84, "grad_norm": 0.44057637453079224, "learning_rate": 0.0004923995657141675, "loss": 1.72, "step": 25102 }, { "epoch": 0.84, "grad_norm": 0.41889873147010803, "learning_rate": 0.0004923915460690034, "loss": 1.8261, "step": 25103 }, { "epoch": 0.84, "grad_norm": 0.4404805600643158, "learning_rate": 0.0004923835261903043, "loss": 1.8212, "step": 25104 }, { "epoch": 0.84, "grad_norm": 0.4474680423736572, "learning_rate": 0.0004923755060780801, "loss": 1.8427, "step": 25105 }, { "epoch": 0.84, "grad_norm": 0.4238976836204529, "learning_rate": 0.0004923674857323404, "loss": 1.8063, "step": 25106 }, { "epoch": 0.84, "grad_norm": 0.4357374608516693, "learning_rate": 0.0004923594651530948, "loss": 1.7664, "step": 25107 }, { "epoch": 0.84, "grad_norm": 0.42263954877853394, "learning_rate": 0.0004923514443403532, "loss": 1.7699, "step": 25108 }, { "epoch": 0.84, "grad_norm": 0.4409908056259155, "learning_rate": 0.0004923434232941252, "loss": 1.7598, "step": 25109 }, { "epoch": 0.84, "grad_norm": 0.4471517503261566, "learning_rate": 0.0004923354020144208, "loss": 1.8093, "step": 25110 }, { "epoch": 0.84, "grad_norm": 0.44038063287734985, "learning_rate": 0.0004923273805012494, "loss": 1.8013, "step": 25111 }, { "epoch": 0.84, "grad_norm": 0.43873366713523865, "learning_rate": 0.000492319358754621, "loss": 1.8302, "step": 25112 }, { "epoch": 0.84, "grad_norm": 0.44644537568092346, "learning_rate": 0.0004923113367745453, "loss": 1.9299, "step": 25113 }, { "epoch": 0.84, "grad_norm": 0.4426291882991791, "learning_rate": 0.0004923033145610318, "loss": 1.8466, "step": 25114 }, { "epoch": 0.84, "grad_norm": 0.43622496724128723, "learning_rate": 0.0004922952921140904, "loss": 1.7591, "step": 25115 }, { "epoch": 0.84, "grad_norm": 0.4403994679450989, "learning_rate": 0.000492287269433731, "loss": 1.8202, "step": 25116 }, { "epoch": 0.84, "grad_norm": 0.44364193081855774, "learning_rate": 0.0004922792465199631, "loss": 1.8313, "step": 25117 }, { "epoch": 0.84, "grad_norm": 0.4303661584854126, "learning_rate": 0.0004922712233727965, "loss": 1.8001, "step": 25118 }, { "epoch": 0.84, "grad_norm": 0.425564169883728, "learning_rate": 0.000492263199992241, "loss": 1.746, "step": 25119 }, { "epoch": 0.84, "grad_norm": 0.43448057770729065, "learning_rate": 0.0004922551763783063, "loss": 1.8369, "step": 25120 }, { "epoch": 0.84, "grad_norm": 0.447177916765213, "learning_rate": 0.0004922471525310022, "loss": 1.8308, "step": 25121 }, { "epoch": 0.84, "grad_norm": 0.4223690927028656, "learning_rate": 0.0004922391284503384, "loss": 1.7526, "step": 25122 }, { "epoch": 0.84, "grad_norm": 0.4435596466064453, "learning_rate": 0.0004922311041363244, "loss": 1.7818, "step": 25123 }, { "epoch": 0.84, "grad_norm": 0.44464099407196045, "learning_rate": 0.0004922230795889704, "loss": 1.8249, "step": 25124 }, { "epoch": 0.84, "grad_norm": 0.47437167167663574, "learning_rate": 0.0004922150548082859, "loss": 1.869, "step": 25125 }, { "epoch": 0.84, "grad_norm": 0.43391457200050354, "learning_rate": 0.0004922070297942806, "loss": 1.9377, "step": 25126 }, { "epoch": 0.84, "grad_norm": 0.43419310450553894, "learning_rate": 0.0004921990045469642, "loss": 1.8421, "step": 25127 }, { "epoch": 0.84, "grad_norm": 0.43323764204978943, "learning_rate": 0.0004921909790663465, "loss": 1.8817, "step": 25128 }, { "epoch": 0.84, "grad_norm": 0.42385783791542053, "learning_rate": 0.0004921829533524373, "loss": 1.7987, "step": 25129 }, { "epoch": 0.84, "grad_norm": 0.44207096099853516, "learning_rate": 0.0004921749274052465, "loss": 1.8607, "step": 25130 }, { "epoch": 0.84, "grad_norm": 0.4602227210998535, "learning_rate": 0.0004921669012247834, "loss": 1.8152, "step": 25131 }, { "epoch": 0.84, "grad_norm": 0.4275493621826172, "learning_rate": 0.0004921588748110583, "loss": 1.84, "step": 25132 }, { "epoch": 0.84, "grad_norm": 0.4422134459018707, "learning_rate": 0.0004921508481640803, "loss": 1.8473, "step": 25133 }, { "epoch": 0.84, "grad_norm": 0.4913092255592346, "learning_rate": 0.0004921428212838597, "loss": 1.832, "step": 25134 }, { "epoch": 0.84, "grad_norm": 0.4565618932247162, "learning_rate": 0.000492134794170406, "loss": 1.8196, "step": 25135 }, { "epoch": 0.84, "grad_norm": 0.4303886890411377, "learning_rate": 0.0004921267668237289, "loss": 1.8104, "step": 25136 }, { "epoch": 0.84, "grad_norm": 0.44650664925575256, "learning_rate": 0.0004921187392438383, "loss": 1.8495, "step": 25137 }, { "epoch": 0.84, "grad_norm": 0.4610515832901001, "learning_rate": 0.0004921107114307437, "loss": 1.8258, "step": 25138 }, { "epoch": 0.84, "grad_norm": 0.4739922285079956, "learning_rate": 0.0004921026833844552, "loss": 1.8475, "step": 25139 }, { "epoch": 0.84, "grad_norm": 0.43297311663627625, "learning_rate": 0.0004920946551049822, "loss": 1.8611, "step": 25140 }, { "epoch": 0.84, "grad_norm": 0.464102566242218, "learning_rate": 0.0004920866265923346, "loss": 1.7842, "step": 25141 }, { "epoch": 0.84, "grad_norm": 0.4426890015602112, "learning_rate": 0.0004920785978465222, "loss": 1.8489, "step": 25142 }, { "epoch": 0.84, "grad_norm": 0.5977101922035217, "learning_rate": 0.0004920705688675547, "loss": 1.7462, "step": 25143 }, { "epoch": 0.84, "grad_norm": 0.4445802867412567, "learning_rate": 0.0004920625396554418, "loss": 1.877, "step": 25144 }, { "epoch": 0.84, "grad_norm": 0.45288801193237305, "learning_rate": 0.0004920545102101932, "loss": 1.7371, "step": 25145 }, { "epoch": 0.84, "grad_norm": 0.4632208049297333, "learning_rate": 0.0004920464805318189, "loss": 1.846, "step": 25146 }, { "epoch": 0.84, "grad_norm": 0.46254801750183105, "learning_rate": 0.0004920384506203284, "loss": 1.8046, "step": 25147 }, { "epoch": 0.84, "grad_norm": 0.4253154397010803, "learning_rate": 0.0004920304204757313, "loss": 1.8041, "step": 25148 }, { "epoch": 0.84, "grad_norm": 0.44746795296669006, "learning_rate": 0.0004920223900980379, "loss": 1.7449, "step": 25149 }, { "epoch": 0.84, "grad_norm": 0.47401848435401917, "learning_rate": 0.0004920143594872575, "loss": 1.8725, "step": 25150 }, { "epoch": 0.84, "grad_norm": 0.45201027393341064, "learning_rate": 0.0004920063286433998, "loss": 1.845, "step": 25151 }, { "epoch": 0.84, "grad_norm": 0.45231813192367554, "learning_rate": 0.0004919982975664749, "loss": 1.8226, "step": 25152 }, { "epoch": 0.84, "grad_norm": 0.4592888057231903, "learning_rate": 0.0004919902662564922, "loss": 1.8137, "step": 25153 }, { "epoch": 0.84, "grad_norm": 0.48757290840148926, "learning_rate": 0.0004919822347134617, "loss": 1.7926, "step": 25154 }, { "epoch": 0.84, "grad_norm": 0.4516923427581787, "learning_rate": 0.0004919742029373931, "loss": 1.7596, "step": 25155 }, { "epoch": 0.84, "grad_norm": 0.43268972635269165, "learning_rate": 0.000491966170928296, "loss": 1.8802, "step": 25156 }, { "epoch": 0.84, "grad_norm": 0.4405357837677002, "learning_rate": 0.0004919581386861803, "loss": 1.8364, "step": 25157 }, { "epoch": 0.84, "grad_norm": 0.4624278247356415, "learning_rate": 0.0004919501062110558, "loss": 1.9363, "step": 25158 }, { "epoch": 0.84, "grad_norm": 0.44896066188812256, "learning_rate": 0.000491942073502932, "loss": 1.8398, "step": 25159 }, { "epoch": 0.84, "grad_norm": 0.4305794835090637, "learning_rate": 0.0004919340405618188, "loss": 1.8186, "step": 25160 }, { "epoch": 0.84, "grad_norm": 0.4254397749900818, "learning_rate": 0.0004919260073877261, "loss": 1.8336, "step": 25161 }, { "epoch": 0.84, "grad_norm": 0.9627796411514282, "learning_rate": 0.0004919179739806634, "loss": 1.9064, "step": 25162 }, { "epoch": 0.84, "grad_norm": 0.44098854064941406, "learning_rate": 0.0004919099403406405, "loss": 1.8241, "step": 25163 }, { "epoch": 0.84, "grad_norm": 0.43581944704055786, "learning_rate": 0.0004919019064676673, "loss": 1.743, "step": 25164 }, { "epoch": 0.84, "grad_norm": 0.4542880654335022, "learning_rate": 0.0004918938723617536, "loss": 1.7945, "step": 25165 }, { "epoch": 0.84, "grad_norm": 0.42797788977622986, "learning_rate": 0.0004918858380229087, "loss": 1.7612, "step": 25166 }, { "epoch": 0.84, "grad_norm": 0.4246059060096741, "learning_rate": 0.0004918778034511429, "loss": 1.8332, "step": 25167 }, { "epoch": 0.84, "grad_norm": 0.43612101674079895, "learning_rate": 0.0004918697686464656, "loss": 1.7437, "step": 25168 }, { "epoch": 0.84, "grad_norm": 0.4497697651386261, "learning_rate": 0.0004918617336088868, "loss": 1.8168, "step": 25169 }, { "epoch": 0.84, "grad_norm": 0.43187087774276733, "learning_rate": 0.000491853698338416, "loss": 1.811, "step": 25170 }, { "epoch": 0.84, "grad_norm": 0.42735886573791504, "learning_rate": 0.0004918456628350632, "loss": 1.8537, "step": 25171 }, { "epoch": 0.84, "grad_norm": 0.4501238465309143, "learning_rate": 0.0004918376270988379, "loss": 1.9779, "step": 25172 }, { "epoch": 0.84, "grad_norm": 0.4538530707359314, "learning_rate": 0.00049182959112975, "loss": 1.8646, "step": 25173 }, { "epoch": 0.84, "grad_norm": 0.4561004638671875, "learning_rate": 0.0004918215549278094, "loss": 1.7655, "step": 25174 }, { "epoch": 0.84, "grad_norm": 0.44345879554748535, "learning_rate": 0.0004918135184930254, "loss": 1.7668, "step": 25175 }, { "epoch": 0.84, "grad_norm": 0.4269472658634186, "learning_rate": 0.0004918054818254082, "loss": 1.8705, "step": 25176 }, { "epoch": 0.84, "grad_norm": 0.43423891067504883, "learning_rate": 0.0004917974449249675, "loss": 1.9101, "step": 25177 }, { "epoch": 0.84, "grad_norm": 0.4606133997440338, "learning_rate": 0.0004917894077917129, "loss": 1.819, "step": 25178 }, { "epoch": 0.84, "grad_norm": 0.43163052201271057, "learning_rate": 0.0004917813704256543, "loss": 1.8178, "step": 25179 }, { "epoch": 0.84, "grad_norm": 0.4290473461151123, "learning_rate": 0.0004917733328268012, "loss": 1.7962, "step": 25180 }, { "epoch": 0.84, "grad_norm": 0.44651803374290466, "learning_rate": 0.0004917652949951636, "loss": 1.7786, "step": 25181 }, { "epoch": 0.84, "grad_norm": 0.43715527653694153, "learning_rate": 0.0004917572569307512, "loss": 1.7683, "step": 25182 }, { "epoch": 0.84, "grad_norm": 0.4450876712799072, "learning_rate": 0.0004917492186335738, "loss": 1.7657, "step": 25183 }, { "epoch": 0.84, "grad_norm": 0.576281726360321, "learning_rate": 0.0004917411801036411, "loss": 1.904, "step": 25184 }, { "epoch": 0.84, "grad_norm": 0.4327838718891144, "learning_rate": 0.0004917331413409628, "loss": 1.844, "step": 25185 }, { "epoch": 0.84, "grad_norm": 0.421955406665802, "learning_rate": 0.0004917251023455486, "loss": 1.844, "step": 25186 }, { "epoch": 0.84, "grad_norm": 0.43760794401168823, "learning_rate": 0.0004917170631174087, "loss": 1.7521, "step": 25187 }, { "epoch": 0.84, "grad_norm": 0.4324500262737274, "learning_rate": 0.0004917090236565522, "loss": 1.8624, "step": 25188 }, { "epoch": 0.84, "grad_norm": 0.4281361699104309, "learning_rate": 0.0004917009839629894, "loss": 1.8033, "step": 25189 }, { "epoch": 0.84, "grad_norm": 0.4380891025066376, "learning_rate": 0.0004916929440367297, "loss": 1.7831, "step": 25190 }, { "epoch": 0.84, "grad_norm": 0.43402352929115295, "learning_rate": 0.0004916849038777831, "loss": 1.8397, "step": 25191 }, { "epoch": 0.84, "grad_norm": 0.4541707932949066, "learning_rate": 0.0004916768634861592, "loss": 1.773, "step": 25192 }, { "epoch": 0.84, "grad_norm": 0.43833622336387634, "learning_rate": 0.000491668822861868, "loss": 1.7988, "step": 25193 }, { "epoch": 0.84, "grad_norm": 0.422518789768219, "learning_rate": 0.000491660782004919, "loss": 1.7673, "step": 25194 }, { "epoch": 0.84, "grad_norm": 0.45302391052246094, "learning_rate": 0.0004916527409153219, "loss": 1.8716, "step": 25195 }, { "epoch": 0.84, "grad_norm": 0.4465115964412689, "learning_rate": 0.0004916446995930868, "loss": 1.8149, "step": 25196 }, { "epoch": 0.84, "grad_norm": 0.4299549162387848, "learning_rate": 0.0004916366580382232, "loss": 1.7513, "step": 25197 }, { "epoch": 0.84, "grad_norm": 0.4371398687362671, "learning_rate": 0.0004916286162507409, "loss": 1.8433, "step": 25198 }, { "epoch": 0.84, "grad_norm": 0.4295656979084015, "learning_rate": 0.0004916205742306499, "loss": 1.7584, "step": 25199 }, { "epoch": 0.84, "grad_norm": 0.43213197588920593, "learning_rate": 0.0004916125319779595, "loss": 1.8129, "step": 25200 }, { "epoch": 0.84, "grad_norm": 0.43327704071998596, "learning_rate": 0.0004916044894926798, "loss": 1.8589, "step": 25201 }, { "epoch": 0.84, "grad_norm": 0.4488297998905182, "learning_rate": 0.0004915964467748206, "loss": 1.8126, "step": 25202 }, { "epoch": 0.84, "grad_norm": 0.42394328117370605, "learning_rate": 0.0004915884038243914, "loss": 1.7544, "step": 25203 }, { "epoch": 0.84, "grad_norm": 0.4546111524105072, "learning_rate": 0.0004915803606414021, "loss": 1.8562, "step": 25204 }, { "epoch": 0.84, "grad_norm": 0.4362952411174774, "learning_rate": 0.0004915723172258625, "loss": 1.8265, "step": 25205 }, { "epoch": 0.84, "grad_norm": 0.42419299483299255, "learning_rate": 0.0004915642735777824, "loss": 1.8343, "step": 25206 }, { "epoch": 0.84, "grad_norm": 0.44161972403526306, "learning_rate": 0.0004915562296971714, "loss": 1.824, "step": 25207 }, { "epoch": 0.84, "grad_norm": 0.4438364803791046, "learning_rate": 0.0004915481855840394, "loss": 1.8177, "step": 25208 }, { "epoch": 0.84, "grad_norm": 0.44167208671569824, "learning_rate": 0.0004915401412383962, "loss": 1.8266, "step": 25209 }, { "epoch": 0.84, "grad_norm": 0.43879446387290955, "learning_rate": 0.0004915320966602513, "loss": 1.8874, "step": 25210 }, { "epoch": 0.84, "grad_norm": 0.43731164932250977, "learning_rate": 0.0004915240518496149, "loss": 1.9071, "step": 25211 }, { "epoch": 0.84, "grad_norm": 0.4347710609436035, "learning_rate": 0.0004915160068064964, "loss": 1.7435, "step": 25212 }, { "epoch": 0.84, "grad_norm": 0.44430673122406006, "learning_rate": 0.0004915079615309056, "loss": 1.7633, "step": 25213 }, { "epoch": 0.84, "grad_norm": 0.4154895842075348, "learning_rate": 0.0004914999160228526, "loss": 1.8485, "step": 25214 }, { "epoch": 0.84, "grad_norm": 0.44591426849365234, "learning_rate": 0.0004914918702823467, "loss": 1.8503, "step": 25215 }, { "epoch": 0.84, "grad_norm": 0.4587739408016205, "learning_rate": 0.000491483824309398, "loss": 1.798, "step": 25216 }, { "epoch": 0.84, "grad_norm": 0.4560215175151825, "learning_rate": 0.0004914757781040161, "loss": 1.7175, "step": 25217 }, { "epoch": 0.84, "grad_norm": 0.4401452839374542, "learning_rate": 0.0004914677316662108, "loss": 1.7926, "step": 25218 }, { "epoch": 0.84, "grad_norm": 0.4346867799758911, "learning_rate": 0.0004914596849959919, "loss": 1.9445, "step": 25219 }, { "epoch": 0.84, "grad_norm": 0.43204912543296814, "learning_rate": 0.0004914516380933691, "loss": 1.8558, "step": 25220 }, { "epoch": 0.84, "grad_norm": 0.44605323672294617, "learning_rate": 0.0004914435909583523, "loss": 1.7856, "step": 25221 }, { "epoch": 0.84, "grad_norm": 0.43593835830688477, "learning_rate": 0.0004914355435909513, "loss": 1.7842, "step": 25222 }, { "epoch": 0.84, "grad_norm": 0.4329543709754944, "learning_rate": 0.0004914274959911755, "loss": 1.8176, "step": 25223 }, { "epoch": 0.84, "grad_norm": 0.455221563577652, "learning_rate": 0.000491419448159035, "loss": 1.8713, "step": 25224 }, { "epoch": 0.84, "grad_norm": 0.4393480122089386, "learning_rate": 0.0004914114000945396, "loss": 1.7518, "step": 25225 }, { "epoch": 0.84, "grad_norm": 0.4436478316783905, "learning_rate": 0.0004914033517976989, "loss": 1.8553, "step": 25226 }, { "epoch": 0.84, "grad_norm": 0.45452961325645447, "learning_rate": 0.0004913953032685228, "loss": 1.8231, "step": 25227 }, { "epoch": 0.84, "grad_norm": 0.4631213843822479, "learning_rate": 0.0004913872545070208, "loss": 1.8774, "step": 25228 }, { "epoch": 0.84, "grad_norm": 0.4248303472995758, "learning_rate": 0.000491379205513203, "loss": 1.727, "step": 25229 }, { "epoch": 0.84, "grad_norm": 0.43683263659477234, "learning_rate": 0.0004913711562870792, "loss": 1.8351, "step": 25230 }, { "epoch": 0.84, "grad_norm": 0.4249191880226135, "learning_rate": 0.0004913631068286589, "loss": 1.8138, "step": 25231 }, { "epoch": 0.84, "grad_norm": 0.4501473903656006, "learning_rate": 0.0004913550571379519, "loss": 1.8276, "step": 25232 }, { "epoch": 0.84, "grad_norm": 0.43292436003685, "learning_rate": 0.000491347007214968, "loss": 1.8467, "step": 25233 }, { "epoch": 0.84, "grad_norm": 0.4342300593852997, "learning_rate": 0.0004913389570597172, "loss": 1.7939, "step": 25234 }, { "epoch": 0.84, "grad_norm": 0.46308237314224243, "learning_rate": 0.0004913309066722091, "loss": 1.7508, "step": 25235 }, { "epoch": 0.84, "grad_norm": 0.4204292595386505, "learning_rate": 0.0004913228560524533, "loss": 1.8056, "step": 25236 }, { "epoch": 0.84, "grad_norm": 0.4314628541469574, "learning_rate": 0.00049131480520046, "loss": 1.8268, "step": 25237 }, { "epoch": 0.84, "grad_norm": 0.43788576126098633, "learning_rate": 0.0004913067541162385, "loss": 1.9016, "step": 25238 }, { "epoch": 0.84, "grad_norm": 0.4691116511821747, "learning_rate": 0.0004912987027997989, "loss": 1.8972, "step": 25239 }, { "epoch": 0.84, "grad_norm": 0.4449295103549957, "learning_rate": 0.0004912906512511507, "loss": 1.7512, "step": 25240 }, { "epoch": 0.84, "grad_norm": 0.4364625811576843, "learning_rate": 0.0004912825994703039, "loss": 1.8639, "step": 25241 }, { "epoch": 0.84, "grad_norm": 0.431728720664978, "learning_rate": 0.0004912745474572683, "loss": 1.8641, "step": 25242 }, { "epoch": 0.84, "grad_norm": 0.4519456923007965, "learning_rate": 0.0004912664952120535, "loss": 1.838, "step": 25243 }, { "epoch": 0.84, "grad_norm": 0.4399673640727997, "learning_rate": 0.0004912584427346694, "loss": 1.7817, "step": 25244 }, { "epoch": 0.84, "grad_norm": 0.46040773391723633, "learning_rate": 0.0004912503900251258, "loss": 1.8677, "step": 25245 }, { "epoch": 0.84, "grad_norm": 0.4549959599971771, "learning_rate": 0.0004912423370834324, "loss": 1.8634, "step": 25246 }, { "epoch": 0.84, "grad_norm": 0.4448481798171997, "learning_rate": 0.0004912342839095989, "loss": 1.8921, "step": 25247 }, { "epoch": 0.84, "grad_norm": 0.4310232102870941, "learning_rate": 0.000491226230503635, "loss": 1.814, "step": 25248 }, { "epoch": 0.84, "grad_norm": 0.4244750440120697, "learning_rate": 0.0004912181768655508, "loss": 1.84, "step": 25249 }, { "epoch": 0.84, "grad_norm": 0.43133002519607544, "learning_rate": 0.0004912101229953559, "loss": 1.8813, "step": 25250 }, { "epoch": 0.84, "grad_norm": 0.42022690176963806, "learning_rate": 0.0004912020688930601, "loss": 1.8413, "step": 25251 }, { "epoch": 0.84, "grad_norm": 0.4257275462150574, "learning_rate": 0.0004911940145586732, "loss": 1.734, "step": 25252 }, { "epoch": 0.84, "grad_norm": 0.44496122002601624, "learning_rate": 0.0004911859599922049, "loss": 1.8768, "step": 25253 }, { "epoch": 0.84, "grad_norm": 0.44539546966552734, "learning_rate": 0.000491177905193665, "loss": 1.7927, "step": 25254 }, { "epoch": 0.84, "grad_norm": 0.4288448095321655, "learning_rate": 0.0004911698501630633, "loss": 1.7791, "step": 25255 }, { "epoch": 0.84, "grad_norm": 0.4311659038066864, "learning_rate": 0.0004911617949004095, "loss": 1.8286, "step": 25256 }, { "epoch": 0.84, "grad_norm": 0.6405590772628784, "learning_rate": 0.0004911537394057137, "loss": 1.8627, "step": 25257 }, { "epoch": 0.84, "grad_norm": 0.4381638765335083, "learning_rate": 0.0004911456836789852, "loss": 1.9142, "step": 25258 }, { "epoch": 0.84, "grad_norm": 0.4568091928958893, "learning_rate": 0.000491137627720234, "loss": 1.8335, "step": 25259 }, { "epoch": 0.84, "grad_norm": 0.4363097846508026, "learning_rate": 0.0004911295715294699, "loss": 1.9086, "step": 25260 }, { "epoch": 0.84, "grad_norm": 0.44758620858192444, "learning_rate": 0.0004911215151067027, "loss": 1.8275, "step": 25261 }, { "epoch": 0.84, "grad_norm": 0.4352617859840393, "learning_rate": 0.0004911134584519422, "loss": 1.839, "step": 25262 }, { "epoch": 0.84, "grad_norm": 0.44853565096855164, "learning_rate": 0.000491105401565198, "loss": 1.809, "step": 25263 }, { "epoch": 0.84, "grad_norm": 0.4348893463611603, "learning_rate": 0.00049109734444648, "loss": 1.8383, "step": 25264 }, { "epoch": 0.84, "grad_norm": 0.4319959580898285, "learning_rate": 0.000491089287095798, "loss": 1.8244, "step": 25265 }, { "epoch": 0.84, "grad_norm": 0.45430535078048706, "learning_rate": 0.0004910812295131618, "loss": 1.841, "step": 25266 }, { "epoch": 0.84, "grad_norm": 0.4667578339576721, "learning_rate": 0.0004910731716985812, "loss": 1.8164, "step": 25267 }, { "epoch": 0.84, "grad_norm": 0.44779202342033386, "learning_rate": 0.0004910651136520658, "loss": 1.7345, "step": 25268 }, { "epoch": 0.84, "grad_norm": 0.45934736728668213, "learning_rate": 0.0004910570553736256, "loss": 1.8746, "step": 25269 }, { "epoch": 0.84, "grad_norm": 0.4471322298049927, "learning_rate": 0.0004910489968632702, "loss": 1.7516, "step": 25270 }, { "epoch": 0.84, "grad_norm": 0.444816529750824, "learning_rate": 0.0004910409381210096, "loss": 1.8181, "step": 25271 }, { "epoch": 0.84, "grad_norm": 0.45039862394332886, "learning_rate": 0.0004910328791468534, "loss": 1.8136, "step": 25272 }, { "epoch": 0.84, "grad_norm": 0.4508827328681946, "learning_rate": 0.0004910248199408113, "loss": 1.773, "step": 25273 }, { "epoch": 0.84, "grad_norm": 0.4366098940372467, "learning_rate": 0.0004910167605028933, "loss": 1.8372, "step": 25274 }, { "epoch": 0.84, "grad_norm": 0.4409109950065613, "learning_rate": 0.0004910087008331091, "loss": 1.756, "step": 25275 }, { "epoch": 0.84, "grad_norm": 0.42783036828041077, "learning_rate": 0.0004910006409314685, "loss": 1.8171, "step": 25276 }, { "epoch": 0.84, "grad_norm": 0.4287678897380829, "learning_rate": 0.0004909925807979813, "loss": 1.7645, "step": 25277 }, { "epoch": 0.84, "grad_norm": 0.42779970169067383, "learning_rate": 0.0004909845204326572, "loss": 1.792, "step": 25278 }, { "epoch": 0.84, "grad_norm": 0.4513307213783264, "learning_rate": 0.000490976459835506, "loss": 1.7681, "step": 25279 }, { "epoch": 0.84, "grad_norm": 0.41996631026268005, "learning_rate": 0.0004909683990065376, "loss": 1.7619, "step": 25280 }, { "epoch": 0.84, "grad_norm": 0.4364532232284546, "learning_rate": 0.0004909603379457616, "loss": 1.8627, "step": 25281 }, { "epoch": 0.84, "grad_norm": 0.41775181889533997, "learning_rate": 0.000490952276653188, "loss": 1.7576, "step": 25282 }, { "epoch": 0.84, "grad_norm": 0.42847225069999695, "learning_rate": 0.0004909442151288263, "loss": 1.7709, "step": 25283 }, { "epoch": 0.84, "grad_norm": 0.4171796143054962, "learning_rate": 0.0004909361533726866, "loss": 1.7525, "step": 25284 }, { "epoch": 0.84, "grad_norm": 0.4456580579280853, "learning_rate": 0.0004909280913847786, "loss": 1.8271, "step": 25285 }, { "epoch": 0.84, "grad_norm": 0.4354417622089386, "learning_rate": 0.0004909200291651119, "loss": 1.7783, "step": 25286 }, { "epoch": 0.84, "grad_norm": 0.433594286441803, "learning_rate": 0.0004909119667136965, "loss": 1.8336, "step": 25287 }, { "epoch": 0.84, "grad_norm": 0.41590356826782227, "learning_rate": 0.000490903904030542, "loss": 1.8485, "step": 25288 }, { "epoch": 0.84, "grad_norm": 0.4462050199508667, "learning_rate": 0.0004908958411156584, "loss": 1.8688, "step": 25289 }, { "epoch": 0.84, "grad_norm": 0.4410248398780823, "learning_rate": 0.0004908877779690552, "loss": 1.7907, "step": 25290 }, { "epoch": 0.84, "grad_norm": 0.4181244969367981, "learning_rate": 0.0004908797145907425, "loss": 1.8058, "step": 25291 }, { "epoch": 0.84, "grad_norm": 0.42825397849082947, "learning_rate": 0.0004908716509807301, "loss": 1.7922, "step": 25292 }, { "epoch": 0.84, "grad_norm": 0.4523417055606842, "learning_rate": 0.0004908635871390274, "loss": 1.7864, "step": 25293 }, { "epoch": 0.84, "grad_norm": 0.4354794919490814, "learning_rate": 0.0004908555230656445, "loss": 1.8175, "step": 25294 }, { "epoch": 0.84, "grad_norm": 0.45751869678497314, "learning_rate": 0.0004908474587605911, "loss": 1.8717, "step": 25295 }, { "epoch": 0.84, "grad_norm": 0.4565066397190094, "learning_rate": 0.000490839394223877, "loss": 1.773, "step": 25296 }, { "epoch": 0.84, "grad_norm": 0.4205923080444336, "learning_rate": 0.0004908313294555121, "loss": 1.7639, "step": 25297 }, { "epoch": 0.84, "grad_norm": 0.437206506729126, "learning_rate": 0.0004908232644555059, "loss": 1.8108, "step": 25298 }, { "epoch": 0.84, "grad_norm": 0.43911013007164, "learning_rate": 0.0004908151992238685, "loss": 1.8455, "step": 25299 }, { "epoch": 0.84, "grad_norm": 0.44220513105392456, "learning_rate": 0.0004908071337606096, "loss": 1.7708, "step": 25300 }, { "epoch": 0.84, "grad_norm": 0.4410582184791565, "learning_rate": 0.0004907990680657389, "loss": 1.7302, "step": 25301 }, { "epoch": 0.84, "grad_norm": 0.4442160427570343, "learning_rate": 0.0004907910021392663, "loss": 1.7492, "step": 25302 }, { "epoch": 0.84, "grad_norm": 0.46233972907066345, "learning_rate": 0.0004907829359812013, "loss": 1.8537, "step": 25303 }, { "epoch": 0.84, "grad_norm": 0.4905976355075836, "learning_rate": 0.0004907748695915542, "loss": 1.8112, "step": 25304 }, { "epoch": 0.84, "grad_norm": 0.4439905881881714, "learning_rate": 0.0004907668029703344, "loss": 1.8475, "step": 25305 }, { "epoch": 0.84, "grad_norm": 0.4512534737586975, "learning_rate": 0.0004907587361175518, "loss": 1.6833, "step": 25306 }, { "epoch": 0.84, "grad_norm": 1.3535891771316528, "learning_rate": 0.0004907506690332162, "loss": 1.8718, "step": 25307 }, { "epoch": 0.84, "grad_norm": 0.43582895398139954, "learning_rate": 0.0004907426017173375, "loss": 1.7762, "step": 25308 }, { "epoch": 0.84, "grad_norm": 0.44543132185935974, "learning_rate": 0.0004907345341699254, "loss": 1.805, "step": 25309 }, { "epoch": 0.84, "grad_norm": 0.4356473386287689, "learning_rate": 0.0004907264663909894, "loss": 1.8905, "step": 25310 }, { "epoch": 0.84, "grad_norm": 0.4451379179954529, "learning_rate": 0.0004907183983805398, "loss": 1.7826, "step": 25311 }, { "epoch": 0.84, "grad_norm": 0.461479127407074, "learning_rate": 0.0004907103301385862, "loss": 1.8013, "step": 25312 }, { "epoch": 0.84, "grad_norm": 0.4349531829357147, "learning_rate": 0.0004907022616651382, "loss": 1.8045, "step": 25313 }, { "epoch": 0.84, "grad_norm": 0.43267470598220825, "learning_rate": 0.0004906941929602059, "loss": 1.8422, "step": 25314 }, { "epoch": 0.84, "grad_norm": 0.44955456256866455, "learning_rate": 0.0004906861240237989, "loss": 1.7686, "step": 25315 }, { "epoch": 0.84, "grad_norm": 0.4454166293144226, "learning_rate": 0.000490678054855927, "loss": 1.781, "step": 25316 }, { "epoch": 0.84, "grad_norm": 0.4274531304836273, "learning_rate": 0.0004906699854566, "loss": 1.7721, "step": 25317 }, { "epoch": 0.84, "grad_norm": 0.4782451093196869, "learning_rate": 0.0004906619158258278, "loss": 1.8739, "step": 25318 }, { "epoch": 0.84, "grad_norm": 0.46050071716308594, "learning_rate": 0.0004906538459636202, "loss": 1.8423, "step": 25319 }, { "epoch": 0.84, "grad_norm": 0.44374528527259827, "learning_rate": 0.0004906457758699868, "loss": 1.8718, "step": 25320 }, { "epoch": 0.84, "grad_norm": 0.4316265881061554, "learning_rate": 0.0004906377055449375, "loss": 1.728, "step": 25321 }, { "epoch": 0.84, "grad_norm": 0.4433348774909973, "learning_rate": 0.0004906296349884823, "loss": 1.8264, "step": 25322 }, { "epoch": 0.84, "grad_norm": 0.4589647650718689, "learning_rate": 0.0004906215642006307, "loss": 1.8871, "step": 25323 }, { "epoch": 0.84, "grad_norm": 0.44670382142066956, "learning_rate": 0.0004906134931813925, "loss": 1.7424, "step": 25324 }, { "epoch": 0.84, "grad_norm": 0.4191669225692749, "learning_rate": 0.0004906054219307777, "loss": 1.8284, "step": 25325 }, { "epoch": 0.84, "grad_norm": 0.4379432201385498, "learning_rate": 0.000490597350448796, "loss": 1.772, "step": 25326 }, { "epoch": 0.84, "grad_norm": 0.44113239645957947, "learning_rate": 0.0004905892787354572, "loss": 1.7917, "step": 25327 }, { "epoch": 0.84, "grad_norm": 0.43266162276268005, "learning_rate": 0.0004905812067907711, "loss": 1.7829, "step": 25328 }, { "epoch": 0.84, "grad_norm": 0.43454739451408386, "learning_rate": 0.0004905731346147475, "loss": 1.7559, "step": 25329 }, { "epoch": 0.84, "grad_norm": 0.4242534041404724, "learning_rate": 0.0004905650622073962, "loss": 1.8457, "step": 25330 }, { "epoch": 0.84, "grad_norm": 0.427259236574173, "learning_rate": 0.0004905569895687269, "loss": 1.8563, "step": 25331 }, { "epoch": 0.84, "grad_norm": 0.4556480050086975, "learning_rate": 0.0004905489166987496, "loss": 1.8914, "step": 25332 }, { "epoch": 0.84, "grad_norm": 0.44512251019477844, "learning_rate": 0.000490540843597474, "loss": 1.874, "step": 25333 }, { "epoch": 0.84, "grad_norm": 0.441257119178772, "learning_rate": 0.0004905327702649099, "loss": 1.888, "step": 25334 }, { "epoch": 0.84, "grad_norm": 0.4331640303134918, "learning_rate": 0.0004905246967010671, "loss": 1.8264, "step": 25335 }, { "epoch": 0.84, "grad_norm": 0.44239771366119385, "learning_rate": 0.0004905166229059552, "loss": 1.7561, "step": 25336 }, { "epoch": 0.84, "grad_norm": 0.44650745391845703, "learning_rate": 0.0004905085488795844, "loss": 1.9117, "step": 25337 }, { "epoch": 0.84, "grad_norm": 0.4418601095676422, "learning_rate": 0.0004905004746219642, "loss": 1.7787, "step": 25338 }, { "epoch": 0.84, "grad_norm": 0.42940980195999146, "learning_rate": 0.0004904924001331045, "loss": 1.8364, "step": 25339 }, { "epoch": 0.84, "grad_norm": 0.4524956941604614, "learning_rate": 0.0004904843254130151, "loss": 1.7993, "step": 25340 }, { "epoch": 0.84, "grad_norm": 0.43734613060951233, "learning_rate": 0.0004904762504617058, "loss": 1.8089, "step": 25341 }, { "epoch": 0.84, "grad_norm": 0.4263930916786194, "learning_rate": 0.0004904681752791864, "loss": 1.7132, "step": 25342 }, { "epoch": 0.84, "grad_norm": 0.459477037191391, "learning_rate": 0.0004904600998654668, "loss": 1.814, "step": 25343 }, { "epoch": 0.84, "grad_norm": 0.43820202350616455, "learning_rate": 0.0004904520242205565, "loss": 1.715, "step": 25344 }, { "epoch": 0.84, "grad_norm": 0.43212249875068665, "learning_rate": 0.0004904439483444656, "loss": 1.7989, "step": 25345 }, { "epoch": 0.84, "grad_norm": 0.43114882707595825, "learning_rate": 0.0004904358722372038, "loss": 1.8263, "step": 25346 }, { "epoch": 0.84, "grad_norm": 0.46675363183021545, "learning_rate": 0.000490427795898781, "loss": 1.8203, "step": 25347 }, { "epoch": 0.84, "grad_norm": 0.44644221663475037, "learning_rate": 0.0004904197193292067, "loss": 1.8403, "step": 25348 }, { "epoch": 0.84, "grad_norm": 0.44512203335762024, "learning_rate": 0.0004904116425284912, "loss": 1.8251, "step": 25349 }, { "epoch": 0.84, "grad_norm": 0.450198769569397, "learning_rate": 0.0004904035654966438, "loss": 1.7881, "step": 25350 }, { "epoch": 0.84, "grad_norm": 0.43194109201431274, "learning_rate": 0.0004903954882336746, "loss": 1.7413, "step": 25351 }, { "epoch": 0.84, "grad_norm": 0.4552714228630066, "learning_rate": 0.0004903874107395934, "loss": 1.7962, "step": 25352 }, { "epoch": 0.84, "grad_norm": 0.42558154463768005, "learning_rate": 0.0004903793330144098, "loss": 1.7794, "step": 25353 }, { "epoch": 0.84, "grad_norm": 0.4455709755420685, "learning_rate": 0.0004903712550581339, "loss": 1.6912, "step": 25354 }, { "epoch": 0.84, "grad_norm": 0.43143388628959656, "learning_rate": 0.0004903631768707751, "loss": 1.7882, "step": 25355 }, { "epoch": 0.84, "grad_norm": 0.442855566740036, "learning_rate": 0.0004903550984523438, "loss": 1.8028, "step": 25356 }, { "epoch": 0.84, "grad_norm": 0.44047918915748596, "learning_rate": 0.0004903470198028492, "loss": 1.8315, "step": 25357 }, { "epoch": 0.84, "grad_norm": 0.44521838426589966, "learning_rate": 0.0004903389409223014, "loss": 1.7783, "step": 25358 }, { "epoch": 0.84, "grad_norm": 0.4322309195995331, "learning_rate": 0.0004903308618107102, "loss": 1.8434, "step": 25359 }, { "epoch": 0.84, "grad_norm": 0.4466182291507721, "learning_rate": 0.0004903227824680854, "loss": 1.8018, "step": 25360 }, { "epoch": 0.84, "grad_norm": 0.43964603543281555, "learning_rate": 0.0004903147028944368, "loss": 1.7067, "step": 25361 }, { "epoch": 0.84, "grad_norm": 0.44476741552352905, "learning_rate": 0.0004903066230897741, "loss": 1.758, "step": 25362 }, { "epoch": 0.84, "grad_norm": 0.4453060030937195, "learning_rate": 0.0004902985430541073, "loss": 1.8166, "step": 25363 }, { "epoch": 0.84, "grad_norm": 0.4448882043361664, "learning_rate": 0.0004902904627874461, "loss": 1.7974, "step": 25364 }, { "epoch": 0.84, "grad_norm": 0.43945273756980896, "learning_rate": 0.0004902823822898002, "loss": 1.8437, "step": 25365 }, { "epoch": 0.84, "grad_norm": 0.43096935749053955, "learning_rate": 0.0004902743015611796, "loss": 1.756, "step": 25366 }, { "epoch": 0.84, "grad_norm": 0.4558437466621399, "learning_rate": 0.000490266220601594, "loss": 1.824, "step": 25367 }, { "epoch": 0.84, "grad_norm": 0.4595387578010559, "learning_rate": 0.0004902581394110533, "loss": 1.9094, "step": 25368 }, { "epoch": 0.84, "grad_norm": 0.45557069778442383, "learning_rate": 0.0004902500579895673, "loss": 1.9136, "step": 25369 }, { "epoch": 0.84, "grad_norm": 0.45711827278137207, "learning_rate": 0.0004902419763371457, "loss": 1.7279, "step": 25370 }, { "epoch": 0.84, "grad_norm": 0.4333178400993347, "learning_rate": 0.0004902338944537983, "loss": 1.8631, "step": 25371 }, { "epoch": 0.84, "grad_norm": 0.43863359093666077, "learning_rate": 0.0004902258123395351, "loss": 1.7273, "step": 25372 }, { "epoch": 0.84, "grad_norm": 0.438295841217041, "learning_rate": 0.0004902177299943658, "loss": 1.8939, "step": 25373 }, { "epoch": 0.84, "grad_norm": 0.4447036683559418, "learning_rate": 0.0004902096474183, "loss": 1.8001, "step": 25374 }, { "epoch": 0.84, "grad_norm": 0.42096588015556335, "learning_rate": 0.0004902015646113479, "loss": 1.8705, "step": 25375 }, { "epoch": 0.84, "grad_norm": 0.4354279339313507, "learning_rate": 0.0004901934815735191, "loss": 1.8467, "step": 25376 }, { "epoch": 0.84, "grad_norm": 0.43187007308006287, "learning_rate": 0.0004901853983048234, "loss": 1.828, "step": 25377 }, { "epoch": 0.84, "grad_norm": 0.4285748600959778, "learning_rate": 0.0004901773148052707, "loss": 1.7964, "step": 25378 }, { "epoch": 0.84, "grad_norm": 0.45211562514305115, "learning_rate": 0.0004901692310748707, "loss": 1.8148, "step": 25379 }, { "epoch": 0.84, "grad_norm": 0.43140909075737, "learning_rate": 0.0004901611471136334, "loss": 1.7631, "step": 25380 }, { "epoch": 0.84, "grad_norm": 0.430834025144577, "learning_rate": 0.0004901530629215684, "loss": 1.7974, "step": 25381 }, { "epoch": 0.84, "grad_norm": 0.43143483996391296, "learning_rate": 0.0004901449784986855, "loss": 1.8353, "step": 25382 }, { "epoch": 0.84, "grad_norm": 0.45661860704421997, "learning_rate": 0.0004901368938449947, "loss": 1.7962, "step": 25383 }, { "epoch": 0.84, "grad_norm": 0.4381757974624634, "learning_rate": 0.0004901288089605057, "loss": 1.7069, "step": 25384 }, { "epoch": 0.84, "grad_norm": 0.4277966618537903, "learning_rate": 0.0004901207238452284, "loss": 1.8421, "step": 25385 }, { "epoch": 0.84, "grad_norm": 0.43077099323272705, "learning_rate": 0.0004901126384991725, "loss": 1.8217, "step": 25386 }, { "epoch": 0.84, "grad_norm": 0.4600427746772766, "learning_rate": 0.0004901045529223479, "loss": 1.8824, "step": 25387 }, { "epoch": 0.84, "grad_norm": 0.4376477301120758, "learning_rate": 0.0004900964671147644, "loss": 1.8367, "step": 25388 }, { "epoch": 0.84, "grad_norm": 0.4375241696834564, "learning_rate": 0.0004900883810764318, "loss": 1.901, "step": 25389 }, { "epoch": 0.84, "grad_norm": 0.44189906120300293, "learning_rate": 0.0004900802948073598, "loss": 1.8685, "step": 25390 }, { "epoch": 0.84, "grad_norm": 0.4427129030227661, "learning_rate": 0.0004900722083075584, "loss": 1.8535, "step": 25391 }, { "epoch": 0.84, "grad_norm": 0.4323488175868988, "learning_rate": 0.0004900641215770373, "loss": 1.781, "step": 25392 }, { "epoch": 0.84, "grad_norm": 0.4350859224796295, "learning_rate": 0.0004900560346158065, "loss": 1.7802, "step": 25393 }, { "epoch": 0.84, "grad_norm": 0.43910595774650574, "learning_rate": 0.0004900479474238756, "loss": 1.8057, "step": 25394 }, { "epoch": 0.84, "grad_norm": 0.4576228857040405, "learning_rate": 0.0004900398600012545, "loss": 1.781, "step": 25395 }, { "epoch": 0.84, "grad_norm": 0.44165828824043274, "learning_rate": 0.0004900317723479531, "loss": 1.7893, "step": 25396 }, { "epoch": 0.84, "grad_norm": 0.4391113221645355, "learning_rate": 0.000490023684463981, "loss": 1.9148, "step": 25397 }, { "epoch": 0.84, "grad_norm": 0.44402018189430237, "learning_rate": 0.000490015596349348, "loss": 1.8463, "step": 25398 }, { "epoch": 0.85, "grad_norm": 0.4363758862018585, "learning_rate": 0.0004900075080040644, "loss": 1.8791, "step": 25399 }, { "epoch": 0.85, "grad_norm": 0.4625270664691925, "learning_rate": 0.0004899994194281395, "loss": 1.7945, "step": 25400 }, { "epoch": 0.85, "grad_norm": 0.4366454482078552, "learning_rate": 0.0004899913306215833, "loss": 1.8011, "step": 25401 }, { "epoch": 0.85, "grad_norm": 0.4570983350276947, "learning_rate": 0.0004899832415844056, "loss": 1.9057, "step": 25402 }, { "epoch": 0.85, "grad_norm": 0.4432145357131958, "learning_rate": 0.0004899751523166163, "loss": 1.7727, "step": 25403 }, { "epoch": 0.85, "grad_norm": 0.4686509072780609, "learning_rate": 0.0004899670628182251, "loss": 1.8565, "step": 25404 }, { "epoch": 0.85, "grad_norm": 0.43642154335975647, "learning_rate": 0.000489958973089242, "loss": 1.8239, "step": 25405 }, { "epoch": 0.85, "grad_norm": 0.44501012563705444, "learning_rate": 0.0004899508831296767, "loss": 1.81, "step": 25406 }, { "epoch": 0.85, "grad_norm": 0.4404071271419525, "learning_rate": 0.0004899427929395388, "loss": 1.8511, "step": 25407 }, { "epoch": 0.85, "grad_norm": 0.43030697107315063, "learning_rate": 0.0004899347025188385, "loss": 1.8419, "step": 25408 }, { "epoch": 0.85, "grad_norm": 0.44177088141441345, "learning_rate": 0.0004899266118675854, "loss": 1.8173, "step": 25409 }, { "epoch": 0.85, "grad_norm": 0.4386305809020996, "learning_rate": 0.0004899185209857893, "loss": 1.8466, "step": 25410 }, { "epoch": 0.85, "grad_norm": 0.4523477554321289, "learning_rate": 0.0004899104298734604, "loss": 1.7737, "step": 25411 }, { "epoch": 0.85, "grad_norm": 0.4374633729457855, "learning_rate": 0.0004899023385306079, "loss": 1.8133, "step": 25412 }, { "epoch": 0.85, "grad_norm": 0.4286957383155823, "learning_rate": 0.000489894246957242, "loss": 1.8397, "step": 25413 }, { "epoch": 0.85, "grad_norm": 0.4458913505077362, "learning_rate": 0.0004898861551533726, "loss": 1.8291, "step": 25414 }, { "epoch": 0.85, "grad_norm": 0.4604819416999817, "learning_rate": 0.0004898780631190093, "loss": 1.8723, "step": 25415 }, { "epoch": 0.85, "grad_norm": 0.43484529852867126, "learning_rate": 0.000489869970854162, "loss": 1.9052, "step": 25416 }, { "epoch": 0.85, "grad_norm": 0.4316100478172302, "learning_rate": 0.0004898618783588405, "loss": 1.7607, "step": 25417 }, { "epoch": 0.85, "grad_norm": 0.43741747736930847, "learning_rate": 0.0004898537856330548, "loss": 1.8259, "step": 25418 }, { "epoch": 0.85, "grad_norm": 0.4455810785293579, "learning_rate": 0.0004898456926768145, "loss": 1.7899, "step": 25419 }, { "epoch": 0.85, "grad_norm": 0.437582403421402, "learning_rate": 0.0004898375994901293, "loss": 1.7777, "step": 25420 }, { "epoch": 0.85, "grad_norm": 0.4453144073486328, "learning_rate": 0.0004898295060730096, "loss": 1.8444, "step": 25421 }, { "epoch": 0.85, "grad_norm": 0.4417768716812134, "learning_rate": 0.0004898214124254645, "loss": 1.7303, "step": 25422 }, { "epoch": 0.85, "grad_norm": 0.4591033458709717, "learning_rate": 0.0004898133185475043, "loss": 1.8147, "step": 25423 }, { "epoch": 0.85, "grad_norm": 0.45639508962631226, "learning_rate": 0.0004898052244391388, "loss": 1.8811, "step": 25424 }, { "epoch": 0.85, "grad_norm": 0.5433292984962463, "learning_rate": 0.0004897971301003777, "loss": 1.7983, "step": 25425 }, { "epoch": 0.85, "grad_norm": 0.4538750946521759, "learning_rate": 0.0004897890355312308, "loss": 1.7927, "step": 25426 }, { "epoch": 0.85, "grad_norm": 0.43187424540519714, "learning_rate": 0.0004897809407317079, "loss": 1.7944, "step": 25427 }, { "epoch": 0.85, "grad_norm": 0.4301108717918396, "learning_rate": 0.000489772845701819, "loss": 1.7722, "step": 25428 }, { "epoch": 0.85, "grad_norm": 0.4315330684185028, "learning_rate": 0.0004897647504415737, "loss": 1.8367, "step": 25429 }, { "epoch": 0.85, "grad_norm": 0.43591368198394775, "learning_rate": 0.0004897566549509822, "loss": 1.799, "step": 25430 }, { "epoch": 0.85, "grad_norm": 0.4519297778606415, "learning_rate": 0.0004897485592300539, "loss": 1.7968, "step": 25431 }, { "epoch": 0.85, "grad_norm": 0.4509645700454712, "learning_rate": 0.0004897404632787988, "loss": 1.853, "step": 25432 }, { "epoch": 0.85, "grad_norm": 0.4390939176082611, "learning_rate": 0.0004897323670972268, "loss": 1.8592, "step": 25433 }, { "epoch": 0.85, "grad_norm": 0.4277443587779999, "learning_rate": 0.0004897242706853476, "loss": 1.8474, "step": 25434 }, { "epoch": 0.85, "grad_norm": 0.4492104947566986, "learning_rate": 0.0004897161740431711, "loss": 1.7799, "step": 25435 }, { "epoch": 0.85, "grad_norm": 0.4401208162307739, "learning_rate": 0.0004897080771707072, "loss": 1.8211, "step": 25436 }, { "epoch": 0.85, "grad_norm": 0.47727638483047485, "learning_rate": 0.0004896999800679656, "loss": 1.8709, "step": 25437 }, { "epoch": 0.85, "grad_norm": 0.4251078963279724, "learning_rate": 0.0004896918827349562, "loss": 1.839, "step": 25438 }, { "epoch": 0.85, "grad_norm": 0.438657283782959, "learning_rate": 0.0004896837851716887, "loss": 1.7988, "step": 25439 }, { "epoch": 0.85, "grad_norm": 0.4258216321468353, "learning_rate": 0.0004896756873781731, "loss": 1.8496, "step": 25440 }, { "epoch": 0.85, "grad_norm": 0.4613244831562042, "learning_rate": 0.0004896675893544193, "loss": 1.8418, "step": 25441 }, { "epoch": 0.85, "grad_norm": 0.4456126093864441, "learning_rate": 0.0004896594911004367, "loss": 1.7599, "step": 25442 }, { "epoch": 0.85, "grad_norm": 0.4374763071537018, "learning_rate": 0.0004896513926162358, "loss": 1.7421, "step": 25443 }, { "epoch": 0.85, "grad_norm": 0.4536445438861847, "learning_rate": 0.0004896432939018257, "loss": 1.7745, "step": 25444 }, { "epoch": 0.85, "grad_norm": 0.44559499621391296, "learning_rate": 0.0004896351949572167, "loss": 1.8618, "step": 25445 }, { "epoch": 0.85, "grad_norm": 0.4524059593677521, "learning_rate": 0.0004896270957824185, "loss": 1.867, "step": 25446 }, { "epoch": 0.85, "grad_norm": 0.4419872462749481, "learning_rate": 0.0004896189963774411, "loss": 1.8127, "step": 25447 }, { "epoch": 0.85, "grad_norm": 0.4565487802028656, "learning_rate": 0.000489610896742294, "loss": 1.8872, "step": 25448 }, { "epoch": 0.85, "grad_norm": 0.460079163312912, "learning_rate": 0.0004896027968769873, "loss": 1.8578, "step": 25449 }, { "epoch": 0.85, "grad_norm": 0.4352262616157532, "learning_rate": 0.0004895946967815307, "loss": 1.7979, "step": 25450 }, { "epoch": 0.85, "grad_norm": 0.43622565269470215, "learning_rate": 0.0004895865964559341, "loss": 1.8597, "step": 25451 }, { "epoch": 0.85, "grad_norm": 0.44468924403190613, "learning_rate": 0.0004895784959002072, "loss": 1.8127, "step": 25452 }, { "epoch": 0.85, "grad_norm": 0.44468945264816284, "learning_rate": 0.0004895703951143601, "loss": 1.8303, "step": 25453 }, { "epoch": 0.85, "grad_norm": 0.46894246339797974, "learning_rate": 0.0004895622940984023, "loss": 1.917, "step": 25454 }, { "epoch": 0.85, "grad_norm": 0.4493049681186676, "learning_rate": 0.000489554192852344, "loss": 1.8527, "step": 25455 }, { "epoch": 0.85, "grad_norm": 0.44312524795532227, "learning_rate": 0.0004895460913761948, "loss": 1.8065, "step": 25456 }, { "epoch": 0.85, "grad_norm": 0.4375711977481842, "learning_rate": 0.0004895379896699645, "loss": 1.8687, "step": 25457 }, { "epoch": 0.85, "grad_norm": 0.443386971950531, "learning_rate": 0.000489529887733663, "loss": 1.8662, "step": 25458 }, { "epoch": 0.85, "grad_norm": 0.4544534385204315, "learning_rate": 0.0004895217855673002, "loss": 1.8486, "step": 25459 }, { "epoch": 0.85, "grad_norm": 0.44259291887283325, "learning_rate": 0.0004895136831708859, "loss": 1.8039, "step": 25460 }, { "epoch": 0.85, "grad_norm": 0.4491897225379944, "learning_rate": 0.0004895055805444298, "loss": 1.7372, "step": 25461 }, { "epoch": 0.85, "grad_norm": 0.45913955569267273, "learning_rate": 0.0004894974776879418, "loss": 1.8387, "step": 25462 }, { "epoch": 0.85, "grad_norm": 0.5107418298721313, "learning_rate": 0.000489489374601432, "loss": 1.7946, "step": 25463 }, { "epoch": 0.85, "grad_norm": 0.42977631092071533, "learning_rate": 0.0004894812712849098, "loss": 1.8495, "step": 25464 }, { "epoch": 0.85, "grad_norm": 0.4283623695373535, "learning_rate": 0.0004894731677383855, "loss": 1.9001, "step": 25465 }, { "epoch": 0.85, "grad_norm": 0.4491872191429138, "learning_rate": 0.0004894650639618685, "loss": 1.7945, "step": 25466 }, { "epoch": 0.85, "grad_norm": 0.4299043118953705, "learning_rate": 0.0004894569599553689, "loss": 1.8757, "step": 25467 }, { "epoch": 0.85, "grad_norm": 0.4323311746120453, "learning_rate": 0.0004894488557188964, "loss": 1.7849, "step": 25468 }, { "epoch": 0.85, "grad_norm": 0.46346983313560486, "learning_rate": 0.000489440751252461, "loss": 1.8126, "step": 25469 }, { "epoch": 0.85, "grad_norm": 1.0970267057418823, "learning_rate": 0.0004894326465560724, "loss": 1.8964, "step": 25470 }, { "epoch": 0.85, "grad_norm": 0.43650513887405396, "learning_rate": 0.0004894245416297404, "loss": 1.7684, "step": 25471 }, { "epoch": 0.85, "grad_norm": 0.42367854714393616, "learning_rate": 0.0004894164364734751, "loss": 1.7785, "step": 25472 }, { "epoch": 0.85, "grad_norm": 0.43820664286613464, "learning_rate": 0.000489408331087286, "loss": 1.8331, "step": 25473 }, { "epoch": 0.85, "grad_norm": 0.4165359139442444, "learning_rate": 0.0004894002254711831, "loss": 1.8072, "step": 25474 }, { "epoch": 0.85, "grad_norm": 0.448866069316864, "learning_rate": 0.0004893921196251764, "loss": 1.853, "step": 25475 }, { "epoch": 0.85, "grad_norm": 0.42718350887298584, "learning_rate": 0.0004893840135492754, "loss": 1.796, "step": 25476 }, { "epoch": 0.85, "grad_norm": 0.44162967801094055, "learning_rate": 0.0004893759072434901, "loss": 1.8018, "step": 25477 }, { "epoch": 0.85, "grad_norm": 0.4175942540168762, "learning_rate": 0.0004893678007078304, "loss": 1.8226, "step": 25478 }, { "epoch": 0.85, "grad_norm": 0.4397214353084564, "learning_rate": 0.0004893596939423062, "loss": 1.8202, "step": 25479 }, { "epoch": 0.85, "grad_norm": 0.42761924862861633, "learning_rate": 0.0004893515869469271, "loss": 1.7799, "step": 25480 }, { "epoch": 0.85, "grad_norm": 0.43291914463043213, "learning_rate": 0.0004893434797217031, "loss": 1.8518, "step": 25481 }, { "epoch": 0.85, "grad_norm": 0.43275049328804016, "learning_rate": 0.0004893353722666441, "loss": 1.8201, "step": 25482 }, { "epoch": 0.85, "grad_norm": 0.44443830847740173, "learning_rate": 0.0004893272645817596, "loss": 1.822, "step": 25483 }, { "epoch": 0.85, "grad_norm": 0.4354822039604187, "learning_rate": 0.0004893191566670599, "loss": 1.7328, "step": 25484 }, { "epoch": 0.85, "grad_norm": 0.4343986213207245, "learning_rate": 0.0004893110485225547, "loss": 1.7874, "step": 25485 }, { "epoch": 0.85, "grad_norm": 0.4334000051021576, "learning_rate": 0.0004893029401482537, "loss": 1.8808, "step": 25486 }, { "epoch": 0.85, "grad_norm": 0.4355141520500183, "learning_rate": 0.0004892948315441668, "loss": 1.8213, "step": 25487 }, { "epoch": 0.85, "grad_norm": 0.4166695773601532, "learning_rate": 0.0004892867227103038, "loss": 1.6836, "step": 25488 }, { "epoch": 0.85, "grad_norm": 0.4545496702194214, "learning_rate": 0.0004892786136466747, "loss": 1.8147, "step": 25489 }, { "epoch": 0.85, "grad_norm": 0.4395888149738312, "learning_rate": 0.0004892705043532893, "loss": 1.8241, "step": 25490 }, { "epoch": 0.85, "grad_norm": 0.43092089891433716, "learning_rate": 0.0004892623948301574, "loss": 1.8061, "step": 25491 }, { "epoch": 0.85, "grad_norm": 0.4468435049057007, "learning_rate": 0.0004892542850772887, "loss": 1.8401, "step": 25492 }, { "epoch": 0.85, "grad_norm": 0.4662918746471405, "learning_rate": 0.0004892461750946932, "loss": 1.7562, "step": 25493 }, { "epoch": 0.85, "grad_norm": 0.452053040266037, "learning_rate": 0.0004892380648823808, "loss": 1.8442, "step": 25494 }, { "epoch": 0.85, "grad_norm": 0.43129536509513855, "learning_rate": 0.0004892299544403613, "loss": 1.7981, "step": 25495 }, { "epoch": 0.85, "grad_norm": 0.4612747132778168, "learning_rate": 0.0004892218437686444, "loss": 1.8592, "step": 25496 }, { "epoch": 0.85, "grad_norm": 0.43979698419570923, "learning_rate": 0.0004892137328672403, "loss": 1.8584, "step": 25497 }, { "epoch": 0.85, "grad_norm": 0.46361684799194336, "learning_rate": 0.0004892056217361584, "loss": 1.917, "step": 25498 }, { "epoch": 0.85, "grad_norm": 0.4290042519569397, "learning_rate": 0.0004891975103754087, "loss": 1.7912, "step": 25499 }, { "epoch": 0.85, "grad_norm": 0.4343264102935791, "learning_rate": 0.0004891893987850012, "loss": 1.8014, "step": 25500 }, { "epoch": 0.85, "grad_norm": 0.4319233000278473, "learning_rate": 0.0004891812869649457, "loss": 1.7728, "step": 25501 }, { "epoch": 0.85, "grad_norm": 0.4271835684776306, "learning_rate": 0.000489173174915252, "loss": 1.7927, "step": 25502 }, { "epoch": 0.85, "grad_norm": 0.4764362573623657, "learning_rate": 0.0004891650626359298, "loss": 1.8356, "step": 25503 }, { "epoch": 0.85, "grad_norm": 0.44219428300857544, "learning_rate": 0.0004891569501269892, "loss": 1.8117, "step": 25504 }, { "epoch": 0.85, "grad_norm": 0.46274757385253906, "learning_rate": 0.0004891488373884399, "loss": 1.7969, "step": 25505 }, { "epoch": 0.85, "grad_norm": 0.4593639075756073, "learning_rate": 0.0004891407244202918, "loss": 1.8781, "step": 25506 }, { "epoch": 0.85, "grad_norm": 0.43996143341064453, "learning_rate": 0.0004891326112225546, "loss": 1.8049, "step": 25507 }, { "epoch": 0.85, "grad_norm": 0.4545309245586395, "learning_rate": 0.0004891244977952385, "loss": 1.7913, "step": 25508 }, { "epoch": 0.85, "grad_norm": 0.4363746643066406, "learning_rate": 0.0004891163841383529, "loss": 1.7366, "step": 25509 }, { "epoch": 0.85, "grad_norm": 0.43900367617607117, "learning_rate": 0.000489108270251908, "loss": 1.7169, "step": 25510 }, { "epoch": 0.85, "grad_norm": 0.4236401915550232, "learning_rate": 0.0004891001561359136, "loss": 1.744, "step": 25511 }, { "epoch": 0.85, "grad_norm": 0.45605379343032837, "learning_rate": 0.0004890920417903793, "loss": 1.8044, "step": 25512 }, { "epoch": 0.85, "grad_norm": 0.4257887005805969, "learning_rate": 0.0004890839272153152, "loss": 1.82, "step": 25513 }, { "epoch": 0.85, "grad_norm": 0.4231697916984558, "learning_rate": 0.0004890758124107311, "loss": 1.8057, "step": 25514 }, { "epoch": 0.85, "grad_norm": 0.4368627965450287, "learning_rate": 0.0004890676973766368, "loss": 1.8187, "step": 25515 }, { "epoch": 0.85, "grad_norm": 0.4316833019256592, "learning_rate": 0.0004890595821130423, "loss": 1.7936, "step": 25516 }, { "epoch": 0.85, "grad_norm": 0.417642742395401, "learning_rate": 0.0004890514666199571, "loss": 1.8099, "step": 25517 }, { "epoch": 0.85, "grad_norm": 0.43079712986946106, "learning_rate": 0.0004890433508973913, "loss": 1.8361, "step": 25518 }, { "epoch": 0.85, "grad_norm": 0.4284680485725403, "learning_rate": 0.0004890352349453548, "loss": 1.8354, "step": 25519 }, { "epoch": 0.85, "grad_norm": 0.6667837500572205, "learning_rate": 0.0004890271187638574, "loss": 1.8273, "step": 25520 }, { "epoch": 0.85, "grad_norm": 0.43500587344169617, "learning_rate": 0.000489019002352909, "loss": 1.8195, "step": 25521 }, { "epoch": 0.85, "grad_norm": 0.4237455725669861, "learning_rate": 0.0004890108857125192, "loss": 1.8524, "step": 25522 }, { "epoch": 0.85, "grad_norm": 0.4337769150733948, "learning_rate": 0.0004890027688426982, "loss": 1.8067, "step": 25523 }, { "epoch": 0.85, "grad_norm": 0.4370041787624359, "learning_rate": 0.0004889946517434555, "loss": 1.8142, "step": 25524 }, { "epoch": 0.85, "grad_norm": 0.42505544424057007, "learning_rate": 0.0004889865344148013, "loss": 1.8158, "step": 25525 }, { "epoch": 0.85, "grad_norm": 0.46154847741127014, "learning_rate": 0.0004889784168567452, "loss": 1.8723, "step": 25526 }, { "epoch": 0.85, "grad_norm": 0.4274677634239197, "learning_rate": 0.0004889702990692972, "loss": 1.763, "step": 25527 }, { "epoch": 0.85, "grad_norm": 0.4245186448097229, "learning_rate": 0.0004889621810524671, "loss": 1.8926, "step": 25528 }, { "epoch": 0.85, "grad_norm": 0.4368140399456024, "learning_rate": 0.0004889540628062647, "loss": 1.8991, "step": 25529 }, { "epoch": 0.85, "grad_norm": 0.4250204861164093, "learning_rate": 0.0004889459443307, "loss": 1.8573, "step": 25530 }, { "epoch": 0.85, "grad_norm": 0.43055492639541626, "learning_rate": 0.0004889378256257827, "loss": 1.8469, "step": 25531 }, { "epoch": 0.85, "grad_norm": 0.42137208580970764, "learning_rate": 0.0004889297066915227, "loss": 1.8043, "step": 25532 }, { "epoch": 0.85, "grad_norm": 0.44846343994140625, "learning_rate": 0.00048892158752793, "loss": 1.7928, "step": 25533 }, { "epoch": 0.85, "grad_norm": 0.4207574725151062, "learning_rate": 0.0004889134681350142, "loss": 1.8511, "step": 25534 }, { "epoch": 0.85, "grad_norm": 0.45206090807914734, "learning_rate": 0.0004889053485127852, "loss": 1.8088, "step": 25535 }, { "epoch": 0.85, "grad_norm": 0.4391728639602661, "learning_rate": 0.0004888972286612532, "loss": 1.7581, "step": 25536 }, { "epoch": 0.85, "grad_norm": 0.4399160146713257, "learning_rate": 0.0004888891085804276, "loss": 1.7753, "step": 25537 }, { "epoch": 0.85, "grad_norm": 0.4563206434249878, "learning_rate": 0.0004888809882703186, "loss": 1.7903, "step": 25538 }, { "epoch": 0.85, "grad_norm": 0.42943280935287476, "learning_rate": 0.0004888728677309357, "loss": 1.7831, "step": 25539 }, { "epoch": 0.85, "grad_norm": 0.4478228688240051, "learning_rate": 0.0004888647469622891, "loss": 1.846, "step": 25540 }, { "epoch": 0.85, "grad_norm": 0.5934393405914307, "learning_rate": 0.0004888566259643886, "loss": 1.8494, "step": 25541 }, { "epoch": 0.85, "grad_norm": 0.45311856269836426, "learning_rate": 0.0004888485047372438, "loss": 1.8125, "step": 25542 }, { "epoch": 0.85, "grad_norm": 0.42531082034111023, "learning_rate": 0.0004888403832808649, "loss": 1.8348, "step": 25543 }, { "epoch": 0.85, "grad_norm": 0.4388347566127777, "learning_rate": 0.0004888322615952614, "loss": 1.917, "step": 25544 }, { "epoch": 0.85, "grad_norm": 0.43762126564979553, "learning_rate": 0.0004888241396804435, "loss": 1.7964, "step": 25545 }, { "epoch": 0.85, "grad_norm": 0.457385390996933, "learning_rate": 0.000488816017536421, "loss": 1.8786, "step": 25546 }, { "epoch": 0.85, "grad_norm": 0.4381371736526489, "learning_rate": 0.0004888078951632036, "loss": 1.7411, "step": 25547 }, { "epoch": 0.85, "grad_norm": 0.4409239888191223, "learning_rate": 0.0004887997725608013, "loss": 1.7752, "step": 25548 }, { "epoch": 0.85, "grad_norm": 0.42944803833961487, "learning_rate": 0.0004887916497292238, "loss": 1.8043, "step": 25549 }, { "epoch": 0.85, "grad_norm": 0.4468300938606262, "learning_rate": 0.0004887835266684811, "loss": 1.8355, "step": 25550 }, { "epoch": 0.85, "grad_norm": 0.4395400881767273, "learning_rate": 0.0004887754033785828, "loss": 1.7721, "step": 25551 }, { "epoch": 0.85, "grad_norm": 0.45417797565460205, "learning_rate": 0.0004887672798595393, "loss": 1.7594, "step": 25552 }, { "epoch": 0.85, "grad_norm": 0.43041688203811646, "learning_rate": 0.00048875915611136, "loss": 1.8433, "step": 25553 }, { "epoch": 0.85, "grad_norm": 0.4464009702205658, "learning_rate": 0.0004887510321340548, "loss": 1.7783, "step": 25554 }, { "epoch": 0.85, "grad_norm": 0.4431675970554352, "learning_rate": 0.0004887429079276338, "loss": 1.9113, "step": 25555 }, { "epoch": 0.85, "grad_norm": 0.45522138476371765, "learning_rate": 0.0004887347834921067, "loss": 1.8458, "step": 25556 }, { "epoch": 0.85, "grad_norm": 0.47693368792533875, "learning_rate": 0.0004887266588274833, "loss": 1.7481, "step": 25557 }, { "epoch": 0.85, "grad_norm": 0.4527865946292877, "learning_rate": 0.0004887185339337737, "loss": 1.9455, "step": 25558 }, { "epoch": 0.85, "grad_norm": 0.43754634261131287, "learning_rate": 0.0004887104088109874, "loss": 1.8638, "step": 25559 }, { "epoch": 0.85, "grad_norm": 0.4346218705177307, "learning_rate": 0.0004887022834591346, "loss": 1.7701, "step": 25560 }, { "epoch": 0.85, "grad_norm": 0.5569607019424438, "learning_rate": 0.0004886941578782249, "loss": 1.8207, "step": 25561 }, { "epoch": 0.85, "grad_norm": 0.43983933329582214, "learning_rate": 0.0004886860320682685, "loss": 1.8749, "step": 25562 }, { "epoch": 0.85, "grad_norm": 0.43365606665611267, "learning_rate": 0.000488677906029275, "loss": 1.8292, "step": 25563 }, { "epoch": 0.85, "grad_norm": 0.4375458359718323, "learning_rate": 0.0004886697797612542, "loss": 1.7949, "step": 25564 }, { "epoch": 0.85, "grad_norm": 0.42471203207969666, "learning_rate": 0.0004886616532642162, "loss": 1.7924, "step": 25565 }, { "epoch": 0.85, "grad_norm": 0.4302068054676056, "learning_rate": 0.0004886535265381707, "loss": 1.7274, "step": 25566 }, { "epoch": 0.85, "grad_norm": 0.4561426341533661, "learning_rate": 0.0004886453995831277, "loss": 1.8113, "step": 25567 }, { "epoch": 0.85, "grad_norm": 0.46094945073127747, "learning_rate": 0.0004886372723990969, "loss": 1.7982, "step": 25568 }, { "epoch": 0.85, "grad_norm": 0.42924025654792786, "learning_rate": 0.0004886291449860882, "loss": 1.7852, "step": 25569 }, { "epoch": 0.85, "grad_norm": 0.43956243991851807, "learning_rate": 0.0004886210173441116, "loss": 1.7308, "step": 25570 }, { "epoch": 0.85, "grad_norm": 0.43206146359443665, "learning_rate": 0.0004886128894731768, "loss": 1.7389, "step": 25571 }, { "epoch": 0.85, "grad_norm": 0.42590636014938354, "learning_rate": 0.0004886047613732939, "loss": 1.795, "step": 25572 }, { "epoch": 0.85, "grad_norm": 0.4447662830352783, "learning_rate": 0.0004885966330444724, "loss": 1.7521, "step": 25573 }, { "epoch": 0.85, "grad_norm": 0.4504168927669525, "learning_rate": 0.0004885885044867224, "loss": 1.8021, "step": 25574 }, { "epoch": 0.85, "grad_norm": 0.43430012464523315, "learning_rate": 0.0004885803757000539, "loss": 1.82, "step": 25575 }, { "epoch": 0.85, "grad_norm": 0.4704938530921936, "learning_rate": 0.0004885722466844765, "loss": 1.843, "step": 25576 }, { "epoch": 0.85, "grad_norm": 0.4365924000740051, "learning_rate": 0.0004885641174400002, "loss": 1.815, "step": 25577 }, { "epoch": 0.85, "grad_norm": 0.4453883767127991, "learning_rate": 0.0004885559879666349, "loss": 1.8433, "step": 25578 }, { "epoch": 0.85, "grad_norm": 0.4371846914291382, "learning_rate": 0.0004885478582643904, "loss": 1.7925, "step": 25579 }, { "epoch": 0.85, "grad_norm": 0.4437720775604248, "learning_rate": 0.0004885397283332765, "loss": 1.8365, "step": 25580 }, { "epoch": 0.85, "grad_norm": 0.45076224207878113, "learning_rate": 0.0004885315981733032, "loss": 1.7836, "step": 25581 }, { "epoch": 0.85, "grad_norm": 0.440891295671463, "learning_rate": 0.0004885234677844804, "loss": 1.799, "step": 25582 }, { "epoch": 0.85, "grad_norm": 0.44634830951690674, "learning_rate": 0.0004885153371668177, "loss": 1.8114, "step": 25583 }, { "epoch": 0.85, "grad_norm": 0.4314451217651367, "learning_rate": 0.0004885072063203253, "loss": 1.7967, "step": 25584 }, { "epoch": 0.85, "grad_norm": 0.4255025386810303, "learning_rate": 0.0004884990752450129, "loss": 1.7905, "step": 25585 }, { "epoch": 0.85, "grad_norm": 0.4302091896533966, "learning_rate": 0.0004884909439408902, "loss": 1.7492, "step": 25586 }, { "epoch": 0.85, "grad_norm": 0.4363962411880493, "learning_rate": 0.0004884828124079675, "loss": 1.697, "step": 25587 }, { "epoch": 0.85, "grad_norm": 0.44824934005737305, "learning_rate": 0.0004884746806462546, "loss": 1.736, "step": 25588 }, { "epoch": 0.85, "grad_norm": 0.4493132531642914, "learning_rate": 0.0004884665486557608, "loss": 1.8472, "step": 25589 }, { "epoch": 0.85, "grad_norm": 0.46931251883506775, "learning_rate": 0.0004884584164364966, "loss": 1.8447, "step": 25590 }, { "epoch": 0.85, "grad_norm": 0.42202991247177124, "learning_rate": 0.0004884502839884716, "loss": 1.8383, "step": 25591 }, { "epoch": 0.85, "grad_norm": 0.43789294362068176, "learning_rate": 0.0004884421513116959, "loss": 1.8054, "step": 25592 }, { "epoch": 0.85, "grad_norm": 0.4346106946468353, "learning_rate": 0.000488434018406179, "loss": 1.824, "step": 25593 }, { "epoch": 0.85, "grad_norm": 0.4414513409137726, "learning_rate": 0.000488425885271931, "loss": 1.8667, "step": 25594 }, { "epoch": 0.85, "grad_norm": 0.4364352226257324, "learning_rate": 0.0004884177519089617, "loss": 1.8722, "step": 25595 }, { "epoch": 0.85, "grad_norm": 0.43481752276420593, "learning_rate": 0.000488409618317281, "loss": 1.8378, "step": 25596 }, { "epoch": 0.85, "grad_norm": 0.4496579170227051, "learning_rate": 0.0004884014844968988, "loss": 1.7651, "step": 25597 }, { "epoch": 0.85, "grad_norm": 0.4324153959751129, "learning_rate": 0.0004883933504478251, "loss": 1.8543, "step": 25598 }, { "epoch": 0.85, "grad_norm": 0.43055805563926697, "learning_rate": 0.0004883852161700695, "loss": 1.8529, "step": 25599 }, { "epoch": 0.85, "grad_norm": 0.43976497650146484, "learning_rate": 0.0004883770816636421, "loss": 1.8465, "step": 25600 }, { "epoch": 0.85, "grad_norm": 0.4383642077445984, "learning_rate": 0.0004883689469285526, "loss": 1.8688, "step": 25601 }, { "epoch": 0.85, "grad_norm": 0.4231463372707367, "learning_rate": 0.0004883608119648109, "loss": 1.8418, "step": 25602 }, { "epoch": 0.85, "grad_norm": 0.43109843134880066, "learning_rate": 0.000488352676772427, "loss": 1.8154, "step": 25603 }, { "epoch": 0.85, "grad_norm": 0.420968234539032, "learning_rate": 0.0004883445413514107, "loss": 1.7761, "step": 25604 }, { "epoch": 0.85, "grad_norm": 0.4327365458011627, "learning_rate": 0.0004883364057017719, "loss": 1.7443, "step": 25605 }, { "epoch": 0.85, "grad_norm": 0.438877135515213, "learning_rate": 0.0004883282698235204, "loss": 1.8153, "step": 25606 }, { "epoch": 0.85, "grad_norm": 0.4270020127296448, "learning_rate": 0.0004883201337166661, "loss": 1.8177, "step": 25607 }, { "epoch": 0.85, "grad_norm": 0.4144987165927887, "learning_rate": 0.000488311997381219, "loss": 1.7476, "step": 25608 }, { "epoch": 0.85, "grad_norm": 0.44242870807647705, "learning_rate": 0.0004883038608171888, "loss": 1.7973, "step": 25609 }, { "epoch": 0.85, "grad_norm": 0.4196506142616272, "learning_rate": 0.0004882957240245855, "loss": 1.7566, "step": 25610 }, { "epoch": 0.85, "grad_norm": 0.4423029124736786, "learning_rate": 0.000488287587003419, "loss": 1.808, "step": 25611 }, { "epoch": 0.85, "grad_norm": 0.4410160183906555, "learning_rate": 0.000488279449753699, "loss": 1.809, "step": 25612 }, { "epoch": 0.85, "grad_norm": 0.42970356345176697, "learning_rate": 0.0004882713122754356, "loss": 1.8316, "step": 25613 }, { "epoch": 0.85, "grad_norm": 0.4283231794834137, "learning_rate": 0.0004882631745686385, "loss": 1.8072, "step": 25614 }, { "epoch": 0.85, "grad_norm": 0.44569772481918335, "learning_rate": 0.0004882550366333177, "loss": 1.8269, "step": 25615 }, { "epoch": 0.85, "grad_norm": 0.43745002150535583, "learning_rate": 0.00048824689846948285, "loss": 1.7813, "step": 25616 }, { "epoch": 0.85, "grad_norm": 0.4441753327846527, "learning_rate": 0.0004882387600771442, "loss": 1.8008, "step": 25617 }, { "epoch": 0.85, "grad_norm": 0.4484531879425049, "learning_rate": 0.00048823062145631136, "loss": 1.8119, "step": 25618 }, { "epoch": 0.85, "grad_norm": 0.44392409920692444, "learning_rate": 0.00048822248260699426, "loss": 1.8164, "step": 25619 }, { "epoch": 0.85, "grad_norm": 0.45213741064071655, "learning_rate": 0.0004882143435292028, "loss": 1.8594, "step": 25620 }, { "epoch": 0.85, "grad_norm": 0.4528000056743622, "learning_rate": 0.00048820620422294686, "loss": 1.8783, "step": 25621 }, { "epoch": 0.85, "grad_norm": 0.4533548951148987, "learning_rate": 0.0004881980646882364, "loss": 1.7684, "step": 25622 }, { "epoch": 0.85, "grad_norm": 0.42671045660972595, "learning_rate": 0.0004881899249250811, "loss": 1.8085, "step": 25623 }, { "epoch": 0.85, "grad_norm": 0.42812579870224, "learning_rate": 0.000488181784933491, "loss": 1.7891, "step": 25624 }, { "epoch": 0.85, "grad_norm": 0.43039411306381226, "learning_rate": 0.000488173644713476, "loss": 1.827, "step": 25625 }, { "epoch": 0.85, "grad_norm": 0.43249353766441345, "learning_rate": 0.00048816550426504583, "loss": 1.8141, "step": 25626 }, { "epoch": 0.85, "grad_norm": 0.43190300464630127, "learning_rate": 0.00048815736358821056, "loss": 1.7627, "step": 25627 }, { "epoch": 0.85, "grad_norm": 0.42760324478149414, "learning_rate": 0.0004881492226829799, "loss": 1.8428, "step": 25628 }, { "epoch": 0.85, "grad_norm": 0.4434289336204529, "learning_rate": 0.0004881410815493638, "loss": 1.8618, "step": 25629 }, { "epoch": 0.85, "grad_norm": 0.4373418390750885, "learning_rate": 0.0004881329401873722, "loss": 1.8689, "step": 25630 }, { "epoch": 0.85, "grad_norm": 0.4364174008369446, "learning_rate": 0.0004881247985970149, "loss": 1.784, "step": 25631 }, { "epoch": 0.85, "grad_norm": 0.45380085706710815, "learning_rate": 0.00048811665677830186, "loss": 1.7972, "step": 25632 }, { "epoch": 0.85, "grad_norm": 0.4452737867832184, "learning_rate": 0.0004881085147312429, "loss": 1.85, "step": 25633 }, { "epoch": 0.85, "grad_norm": 0.4628554582595825, "learning_rate": 0.00048810037245584784, "loss": 1.8061, "step": 25634 }, { "epoch": 0.85, "grad_norm": 0.4300835132598877, "learning_rate": 0.0004880922299521267, "loss": 1.8376, "step": 25635 }, { "epoch": 0.85, "grad_norm": 0.43349552154541016, "learning_rate": 0.00048808408722008934, "loss": 1.8186, "step": 25636 }, { "epoch": 0.85, "grad_norm": 0.4316382110118866, "learning_rate": 0.00048807594425974557, "loss": 1.796, "step": 25637 }, { "epoch": 0.85, "grad_norm": 0.4395952820777893, "learning_rate": 0.0004880678010711054, "loss": 1.7806, "step": 25638 }, { "epoch": 0.85, "grad_norm": 0.42648419737815857, "learning_rate": 0.00048805965765417845, "loss": 1.7571, "step": 25639 }, { "epoch": 0.85, "grad_norm": 0.45375093817710876, "learning_rate": 0.00048805151400897496, "loss": 1.7883, "step": 25640 }, { "epoch": 0.85, "grad_norm": 0.4605046808719635, "learning_rate": 0.0004880433701355045, "loss": 1.8826, "step": 25641 }, { "epoch": 0.85, "grad_norm": 0.4464687705039978, "learning_rate": 0.00048803522603377714, "loss": 1.8831, "step": 25642 }, { "epoch": 0.85, "grad_norm": 0.45815029740333557, "learning_rate": 0.0004880270817038028, "loss": 1.8655, "step": 25643 }, { "epoch": 0.85, "grad_norm": 0.45374414324760437, "learning_rate": 0.0004880189371455912, "loss": 1.7185, "step": 25644 }, { "epoch": 0.85, "grad_norm": 0.46030139923095703, "learning_rate": 0.0004880107923591524, "loss": 1.9423, "step": 25645 }, { "epoch": 0.85, "grad_norm": 0.6451436877250671, "learning_rate": 0.00048800264734449606, "loss": 1.8231, "step": 25646 }, { "epoch": 0.85, "grad_norm": 0.4314894676208496, "learning_rate": 0.0004879945021016324, "loss": 1.8842, "step": 25647 }, { "epoch": 0.85, "grad_norm": 0.43175655603408813, "learning_rate": 0.00048798635663057104, "loss": 1.8269, "step": 25648 }, { "epoch": 0.85, "grad_norm": 0.4386526644229889, "learning_rate": 0.00048797821093132175, "loss": 1.8229, "step": 25649 }, { "epoch": 0.85, "grad_norm": 0.7009567022323608, "learning_rate": 0.00048797006500389485, "loss": 1.841, "step": 25650 }, { "epoch": 0.85, "grad_norm": 0.43691286444664, "learning_rate": 0.0004879619188482999, "loss": 1.766, "step": 25651 }, { "epoch": 0.85, "grad_norm": 0.4425886273384094, "learning_rate": 0.00048795377246454687, "loss": 1.7591, "step": 25652 }, { "epoch": 0.85, "grad_norm": 0.4403477609157562, "learning_rate": 0.00048794562585264574, "loss": 1.7948, "step": 25653 }, { "epoch": 0.85, "grad_norm": 0.42761221528053284, "learning_rate": 0.00048793747901260614, "loss": 1.8816, "step": 25654 }, { "epoch": 0.85, "grad_norm": 0.45264431834220886, "learning_rate": 0.0004879293319444383, "loss": 1.7791, "step": 25655 }, { "epoch": 0.85, "grad_norm": 0.43144121766090393, "learning_rate": 0.0004879211846481518, "loss": 1.788, "step": 25656 }, { "epoch": 0.85, "grad_norm": 0.42603081464767456, "learning_rate": 0.00048791303712375673, "loss": 1.8729, "step": 25657 }, { "epoch": 0.85, "grad_norm": 0.4373166859149933, "learning_rate": 0.000487904889371263, "loss": 1.8567, "step": 25658 }, { "epoch": 0.85, "grad_norm": 0.53965824842453, "learning_rate": 0.0004878967413906803, "loss": 1.868, "step": 25659 }, { "epoch": 0.85, "grad_norm": 0.4338054955005646, "learning_rate": 0.00048788859318201876, "loss": 1.7674, "step": 25660 }, { "epoch": 0.85, "grad_norm": 0.43129441142082214, "learning_rate": 0.00048788044474528807, "loss": 1.7946, "step": 25661 }, { "epoch": 0.85, "grad_norm": 0.4443644881248474, "learning_rate": 0.00048787229608049826, "loss": 1.7686, "step": 25662 }, { "epoch": 0.85, "grad_norm": 0.4339055120944977, "learning_rate": 0.00048786414718765905, "loss": 1.8373, "step": 25663 }, { "epoch": 0.85, "grad_norm": 0.42651626467704773, "learning_rate": 0.0004878559980667805, "loss": 1.7782, "step": 25664 }, { "epoch": 0.85, "grad_norm": 0.45049849152565, "learning_rate": 0.0004878478487178725, "loss": 1.815, "step": 25665 }, { "epoch": 0.85, "grad_norm": 0.42715027928352356, "learning_rate": 0.00048783969914094485, "loss": 1.804, "step": 25666 }, { "epoch": 0.85, "grad_norm": 0.4324836730957031, "learning_rate": 0.0004878315493360075, "loss": 1.7562, "step": 25667 }, { "epoch": 0.85, "grad_norm": 0.4424462914466858, "learning_rate": 0.0004878233993030703, "loss": 1.9307, "step": 25668 }, { "epoch": 0.85, "grad_norm": 0.43218183517456055, "learning_rate": 0.0004878152490421432, "loss": 1.7638, "step": 25669 }, { "epoch": 0.85, "grad_norm": 0.4366948902606964, "learning_rate": 0.0004878070985532361, "loss": 1.8441, "step": 25670 }, { "epoch": 0.85, "grad_norm": 0.42229360342025757, "learning_rate": 0.0004877989478363587, "loss": 1.8321, "step": 25671 }, { "epoch": 0.85, "grad_norm": 0.4395754635334015, "learning_rate": 0.0004877907968915212, "loss": 1.8153, "step": 25672 }, { "epoch": 0.85, "grad_norm": 0.44089433550834656, "learning_rate": 0.00048778264571873323, "loss": 1.87, "step": 25673 }, { "epoch": 0.85, "grad_norm": 0.4665074050426483, "learning_rate": 0.00048777449431800483, "loss": 1.8078, "step": 25674 }, { "epoch": 0.85, "grad_norm": 0.4220038950443268, "learning_rate": 0.0004877663426893459, "loss": 1.8232, "step": 25675 }, { "epoch": 0.85, "grad_norm": 0.4332762658596039, "learning_rate": 0.0004877581908327663, "loss": 1.7696, "step": 25676 }, { "epoch": 0.85, "grad_norm": 0.44926831126213074, "learning_rate": 0.0004877500387482759, "loss": 1.8138, "step": 25677 }, { "epoch": 0.85, "grad_norm": 0.4316149353981018, "learning_rate": 0.00048774188643588463, "loss": 1.7921, "step": 25678 }, { "epoch": 0.85, "grad_norm": 0.4363933801651001, "learning_rate": 0.0004877337338956023, "loss": 1.8769, "step": 25679 }, { "epoch": 0.85, "grad_norm": 0.44067075848579407, "learning_rate": 0.000487725581127439, "loss": 1.7901, "step": 25680 }, { "epoch": 0.85, "grad_norm": 0.44318604469299316, "learning_rate": 0.0004877174281314044, "loss": 1.8497, "step": 25681 }, { "epoch": 0.85, "grad_norm": 0.4341988265514374, "learning_rate": 0.0004877092749075086, "loss": 1.8515, "step": 25682 }, { "epoch": 0.85, "grad_norm": 0.4418644607067108, "learning_rate": 0.0004877011214557613, "loss": 1.8681, "step": 25683 }, { "epoch": 0.85, "grad_norm": 0.44518306851387024, "learning_rate": 0.00048769296777617253, "loss": 1.7506, "step": 25684 }, { "epoch": 0.85, "grad_norm": 0.4499245882034302, "learning_rate": 0.00048768481386875223, "loss": 1.8015, "step": 25685 }, { "epoch": 0.85, "grad_norm": 0.4525834619998932, "learning_rate": 0.00048767665973351016, "loss": 1.7503, "step": 25686 }, { "epoch": 0.85, "grad_norm": 0.44449201226234436, "learning_rate": 0.0004876685053704563, "loss": 1.8345, "step": 25687 }, { "epoch": 0.85, "grad_norm": 0.43243545293807983, "learning_rate": 0.00048766035077960036, "loss": 1.8607, "step": 25688 }, { "epoch": 0.85, "grad_norm": 0.45361700654029846, "learning_rate": 0.00048765219596095253, "loss": 1.8196, "step": 25689 }, { "epoch": 0.85, "grad_norm": 0.44898882508277893, "learning_rate": 0.00048764404091452266, "loss": 1.7972, "step": 25690 }, { "epoch": 0.85, "grad_norm": 0.4423837661743164, "learning_rate": 0.00048763588564032046, "loss": 1.7942, "step": 25691 }, { "epoch": 0.85, "grad_norm": 0.43521571159362793, "learning_rate": 0.00048762773013835605, "loss": 1.7701, "step": 25692 }, { "epoch": 0.85, "grad_norm": 0.4447399377822876, "learning_rate": 0.00048761957440863915, "loss": 1.8415, "step": 25693 }, { "epoch": 0.85, "grad_norm": 1.1268657445907593, "learning_rate": 0.00048761141845117973, "loss": 1.786, "step": 25694 }, { "epoch": 0.85, "grad_norm": 0.44588738679885864, "learning_rate": 0.0004876032622659877, "loss": 1.8159, "step": 25695 }, { "epoch": 0.85, "grad_norm": 0.44032207131385803, "learning_rate": 0.0004875951058530729, "loss": 1.8572, "step": 25696 }, { "epoch": 0.85, "grad_norm": 0.49863481521606445, "learning_rate": 0.0004875869492124454, "loss": 1.8664, "step": 25697 }, { "epoch": 0.85, "grad_norm": 0.4535183012485504, "learning_rate": 0.0004875787923441149, "loss": 1.8695, "step": 25698 }, { "epoch": 0.86, "grad_norm": 0.4484650194644928, "learning_rate": 0.00048757063524809143, "loss": 1.8583, "step": 25699 }, { "epoch": 0.86, "grad_norm": 0.4201699495315552, "learning_rate": 0.00048756247792438483, "loss": 1.8585, "step": 25700 }, { "epoch": 0.86, "grad_norm": 0.42986705899238586, "learning_rate": 0.000487554320373005, "loss": 1.776, "step": 25701 }, { "epoch": 0.86, "grad_norm": 0.42388710379600525, "learning_rate": 0.00048754616259396184, "loss": 1.7042, "step": 25702 }, { "epoch": 0.86, "grad_norm": 0.43501800298690796, "learning_rate": 0.0004875380045872652, "loss": 1.8526, "step": 25703 }, { "epoch": 0.86, "grad_norm": 0.45007064938545227, "learning_rate": 0.00048752984635292524, "loss": 1.8403, "step": 25704 }, { "epoch": 0.86, "grad_norm": 0.43970388174057007, "learning_rate": 0.00048752168789095165, "loss": 1.797, "step": 25705 }, { "epoch": 0.86, "grad_norm": 0.4273022413253784, "learning_rate": 0.00048751352920135425, "loss": 1.764, "step": 25706 }, { "epoch": 0.86, "grad_norm": 0.42681804299354553, "learning_rate": 0.0004875053702841431, "loss": 1.7961, "step": 25707 }, { "epoch": 0.86, "grad_norm": 0.42505523562431335, "learning_rate": 0.000487497211139328, "loss": 1.8257, "step": 25708 }, { "epoch": 0.86, "grad_norm": 0.4412840008735657, "learning_rate": 0.00048748905176691906, "loss": 1.7359, "step": 25709 }, { "epoch": 0.86, "grad_norm": 0.43412092328071594, "learning_rate": 0.0004874808921669259, "loss": 1.8312, "step": 25710 }, { "epoch": 0.86, "grad_norm": 0.4261191487312317, "learning_rate": 0.00048747273233935867, "loss": 1.8042, "step": 25711 }, { "epoch": 0.86, "grad_norm": 0.42516863346099854, "learning_rate": 0.00048746457228422707, "loss": 1.8144, "step": 25712 }, { "epoch": 0.86, "grad_norm": 0.44220611453056335, "learning_rate": 0.0004874564120015411, "loss": 1.8443, "step": 25713 }, { "epoch": 0.86, "grad_norm": 0.43997716903686523, "learning_rate": 0.0004874482514913107, "loss": 1.8364, "step": 25714 }, { "epoch": 0.86, "grad_norm": 0.45700377225875854, "learning_rate": 0.00048744009075354585, "loss": 1.7834, "step": 25715 }, { "epoch": 0.86, "grad_norm": 0.4388585090637207, "learning_rate": 0.0004874319297882562, "loss": 1.8087, "step": 25716 }, { "epoch": 0.86, "grad_norm": 0.4587392508983612, "learning_rate": 0.0004874237685954518, "loss": 1.7314, "step": 25717 }, { "epoch": 0.86, "grad_norm": 0.45352691411972046, "learning_rate": 0.0004874156071751426, "loss": 1.8768, "step": 25718 }, { "epoch": 0.86, "grad_norm": 0.44948941469192505, "learning_rate": 0.0004874074455273385, "loss": 1.7762, "step": 25719 }, { "epoch": 0.86, "grad_norm": 0.43894827365875244, "learning_rate": 0.0004873992836520494, "loss": 1.7535, "step": 25720 }, { "epoch": 0.86, "grad_norm": 0.4299634099006653, "learning_rate": 0.00048739112154928506, "loss": 1.8913, "step": 25721 }, { "epoch": 0.86, "grad_norm": 0.43790775537490845, "learning_rate": 0.00048738295921905566, "loss": 1.7672, "step": 25722 }, { "epoch": 0.86, "grad_norm": 0.4423880875110626, "learning_rate": 0.0004873747966613708, "loss": 1.8474, "step": 25723 }, { "epoch": 0.86, "grad_norm": 0.4416954517364502, "learning_rate": 0.0004873666338762407, "loss": 1.7847, "step": 25724 }, { "epoch": 0.86, "grad_norm": 0.4434182345867157, "learning_rate": 0.00048735847086367505, "loss": 1.858, "step": 25725 }, { "epoch": 0.86, "grad_norm": 0.4354768693447113, "learning_rate": 0.00048735030762368374, "loss": 1.7358, "step": 25726 }, { "epoch": 0.86, "grad_norm": 0.44749191403388977, "learning_rate": 0.0004873421441562769, "loss": 1.7794, "step": 25727 }, { "epoch": 0.86, "grad_norm": 0.4267094135284424, "learning_rate": 0.0004873339804614642, "loss": 1.807, "step": 25728 }, { "epoch": 0.86, "grad_norm": 0.44823160767555237, "learning_rate": 0.0004873258165392557, "loss": 1.8492, "step": 25729 }, { "epoch": 0.86, "grad_norm": 0.4462004005908966, "learning_rate": 0.0004873176523896612, "loss": 1.76, "step": 25730 }, { "epoch": 0.86, "grad_norm": 0.426852822303772, "learning_rate": 0.0004873094880126907, "loss": 1.8778, "step": 25731 }, { "epoch": 0.86, "grad_norm": 0.43839240074157715, "learning_rate": 0.0004873013234083542, "loss": 1.7747, "step": 25732 }, { "epoch": 0.86, "grad_norm": 0.44508665800094604, "learning_rate": 0.0004872931585766613, "loss": 1.8534, "step": 25733 }, { "epoch": 0.86, "grad_norm": 0.42452535033226013, "learning_rate": 0.00048728499351762217, "loss": 1.7783, "step": 25734 }, { "epoch": 0.86, "grad_norm": 0.4356999695301056, "learning_rate": 0.00048727682823124674, "loss": 1.8993, "step": 25735 }, { "epoch": 0.86, "grad_norm": 0.43894290924072266, "learning_rate": 0.00048726866271754475, "loss": 1.8186, "step": 25736 }, { "epoch": 0.86, "grad_norm": 0.4307611286640167, "learning_rate": 0.00048726049697652617, "loss": 1.8488, "step": 25737 }, { "epoch": 0.86, "grad_norm": 0.43440496921539307, "learning_rate": 0.00048725233100820097, "loss": 1.8654, "step": 25738 }, { "epoch": 0.86, "grad_norm": 0.8164501786231995, "learning_rate": 0.00048724416481257907, "loss": 1.8515, "step": 25739 }, { "epoch": 0.86, "grad_norm": 0.4642089605331421, "learning_rate": 0.00048723599838967034, "loss": 1.8527, "step": 25740 }, { "epoch": 0.86, "grad_norm": 0.43235400319099426, "learning_rate": 0.00048722783173948465, "loss": 1.7762, "step": 25741 }, { "epoch": 0.86, "grad_norm": 0.42715176939964294, "learning_rate": 0.00048721966486203196, "loss": 1.8342, "step": 25742 }, { "epoch": 0.86, "grad_norm": 0.43705815076828003, "learning_rate": 0.00048721149775732216, "loss": 1.8665, "step": 25743 }, { "epoch": 0.86, "grad_norm": 0.4357961416244507, "learning_rate": 0.0004872033304253652, "loss": 1.8164, "step": 25744 }, { "epoch": 0.86, "grad_norm": 0.43299680948257446, "learning_rate": 0.000487195162866171, "loss": 1.8256, "step": 25745 }, { "epoch": 0.86, "grad_norm": 0.4320961833000183, "learning_rate": 0.0004871869950797495, "loss": 1.7602, "step": 25746 }, { "epoch": 0.86, "grad_norm": 0.4488106369972229, "learning_rate": 0.00048717882706611053, "loss": 1.8402, "step": 25747 }, { "epoch": 0.86, "grad_norm": 0.4216763377189636, "learning_rate": 0.000487170658825264, "loss": 1.7917, "step": 25748 }, { "epoch": 0.86, "grad_norm": 0.43897008895874023, "learning_rate": 0.00048716249035721997, "loss": 1.8275, "step": 25749 }, { "epoch": 0.86, "grad_norm": 0.4506593346595764, "learning_rate": 0.0004871543216619882, "loss": 1.7455, "step": 25750 }, { "epoch": 0.86, "grad_norm": 0.43219512701034546, "learning_rate": 0.0004871461527395786, "loss": 1.7755, "step": 25751 }, { "epoch": 0.86, "grad_norm": 0.43046456575393677, "learning_rate": 0.00048713798359000123, "loss": 1.8254, "step": 25752 }, { "epoch": 0.86, "grad_norm": 0.44887107610702515, "learning_rate": 0.0004871298142132659, "loss": 1.8284, "step": 25753 }, { "epoch": 0.86, "grad_norm": 0.4342319667339325, "learning_rate": 0.00048712164460938256, "loss": 1.8004, "step": 25754 }, { "epoch": 0.86, "grad_norm": 0.4392184019088745, "learning_rate": 0.0004871134747783611, "loss": 1.802, "step": 25755 }, { "epoch": 0.86, "grad_norm": 0.4306236803531647, "learning_rate": 0.00048710530472021135, "loss": 1.784, "step": 25756 }, { "epoch": 0.86, "grad_norm": 0.43022415041923523, "learning_rate": 0.0004870971344349435, "loss": 1.8094, "step": 25757 }, { "epoch": 0.86, "grad_norm": 0.45522594451904297, "learning_rate": 0.00048708896392256714, "loss": 1.806, "step": 25758 }, { "epoch": 0.86, "grad_norm": 0.4393322467803955, "learning_rate": 0.0004870807931830925, "loss": 1.8112, "step": 25759 }, { "epoch": 0.86, "grad_norm": 0.449812650680542, "learning_rate": 0.00048707262221652924, "loss": 1.7727, "step": 25760 }, { "epoch": 0.86, "grad_norm": 0.44221654534339905, "learning_rate": 0.0004870644510228874, "loss": 1.7882, "step": 25761 }, { "epoch": 0.86, "grad_norm": 0.4287816286087036, "learning_rate": 0.00048705627960217695, "loss": 1.807, "step": 25762 }, { "epoch": 0.86, "grad_norm": 0.4212794303894043, "learning_rate": 0.0004870481079544076, "loss": 1.8261, "step": 25763 }, { "epoch": 0.86, "grad_norm": 0.5683095455169678, "learning_rate": 0.0004870399360795894, "loss": 1.8173, "step": 25764 }, { "epoch": 0.86, "grad_norm": 0.47732672095298767, "learning_rate": 0.0004870317639777325, "loss": 1.7944, "step": 25765 }, { "epoch": 0.86, "grad_norm": 0.4361465573310852, "learning_rate": 0.00048702359164884635, "loss": 1.8383, "step": 25766 }, { "epoch": 0.86, "grad_norm": 0.4362763464450836, "learning_rate": 0.00048701541909294127, "loss": 1.784, "step": 25767 }, { "epoch": 0.86, "grad_norm": 0.9379336833953857, "learning_rate": 0.000487007246310027, "loss": 1.8431, "step": 25768 }, { "epoch": 0.86, "grad_norm": 0.44858884811401367, "learning_rate": 0.00048699907330011343, "loss": 1.7366, "step": 25769 }, { "epoch": 0.86, "grad_norm": 0.4315776824951172, "learning_rate": 0.00048699090006321057, "loss": 1.8077, "step": 25770 }, { "epoch": 0.86, "grad_norm": 0.45361387729644775, "learning_rate": 0.00048698272659932835, "loss": 1.8355, "step": 25771 }, { "epoch": 0.86, "grad_norm": 0.4438372850418091, "learning_rate": 0.0004869745529084766, "loss": 1.7417, "step": 25772 }, { "epoch": 0.86, "grad_norm": 0.4368602931499481, "learning_rate": 0.00048696637899066527, "loss": 1.7801, "step": 25773 }, { "epoch": 0.86, "grad_norm": 0.45637059211730957, "learning_rate": 0.00048695820484590435, "loss": 1.7627, "step": 25774 }, { "epoch": 0.86, "grad_norm": 0.44679906964302063, "learning_rate": 0.0004869500304742037, "loss": 1.7752, "step": 25775 }, { "epoch": 0.86, "grad_norm": 0.42887452244758606, "learning_rate": 0.0004869418558755732, "loss": 1.7863, "step": 25776 }, { "epoch": 0.86, "grad_norm": 0.43929460644721985, "learning_rate": 0.000486933681050023, "loss": 1.8487, "step": 25777 }, { "epoch": 0.86, "grad_norm": 0.4523552656173706, "learning_rate": 0.00048692550599756273, "loss": 1.8613, "step": 25778 }, { "epoch": 0.86, "grad_norm": 0.4379238486289978, "learning_rate": 0.0004869173307182024, "loss": 1.8485, "step": 25779 }, { "epoch": 0.86, "grad_norm": 0.452196329832077, "learning_rate": 0.00048690915521195207, "loss": 1.7941, "step": 25780 }, { "epoch": 0.86, "grad_norm": 0.43349069356918335, "learning_rate": 0.00048690097947882147, "loss": 1.8092, "step": 25781 }, { "epoch": 0.86, "grad_norm": 0.4305303394794464, "learning_rate": 0.0004868928035188207, "loss": 1.8093, "step": 25782 }, { "epoch": 0.86, "grad_norm": 0.44014987349510193, "learning_rate": 0.0004868846273319596, "loss": 1.8619, "step": 25783 }, { "epoch": 0.86, "grad_norm": 0.44547975063323975, "learning_rate": 0.00048687645091824807, "loss": 1.8182, "step": 25784 }, { "epoch": 0.86, "grad_norm": 0.44280874729156494, "learning_rate": 0.00048686827427769604, "loss": 1.861, "step": 25785 }, { "epoch": 0.86, "grad_norm": 0.42955443263053894, "learning_rate": 0.00048686009741031347, "loss": 1.7869, "step": 25786 }, { "epoch": 0.86, "grad_norm": 0.42475035786628723, "learning_rate": 0.0004868519203161104, "loss": 1.7757, "step": 25787 }, { "epoch": 0.86, "grad_norm": 0.41936635971069336, "learning_rate": 0.0004868437429950964, "loss": 1.7831, "step": 25788 }, { "epoch": 0.86, "grad_norm": 0.4590628147125244, "learning_rate": 0.0004868355654472818, "loss": 1.8173, "step": 25789 }, { "epoch": 0.86, "grad_norm": 0.44231921434402466, "learning_rate": 0.0004868273876726763, "loss": 1.7995, "step": 25790 }, { "epoch": 0.86, "grad_norm": 0.44324791431427, "learning_rate": 0.0004868192096712898, "loss": 1.7836, "step": 25791 }, { "epoch": 0.86, "grad_norm": 0.44048652052879333, "learning_rate": 0.00048681103144313247, "loss": 1.816, "step": 25792 }, { "epoch": 0.86, "grad_norm": 0.45108407735824585, "learning_rate": 0.00048680285298821396, "loss": 1.8093, "step": 25793 }, { "epoch": 0.86, "grad_norm": 0.4872123897075653, "learning_rate": 0.00048679467430654433, "loss": 1.8725, "step": 25794 }, { "epoch": 0.86, "grad_norm": 0.46285074949264526, "learning_rate": 0.00048678649539813355, "loss": 1.8144, "step": 25795 }, { "epoch": 0.86, "grad_norm": 0.4614902138710022, "learning_rate": 0.00048677831626299143, "loss": 1.8457, "step": 25796 }, { "epoch": 0.86, "grad_norm": 0.44739845395088196, "learning_rate": 0.00048677013690112794, "loss": 1.8335, "step": 25797 }, { "epoch": 0.86, "grad_norm": 0.4857974350452423, "learning_rate": 0.000486761957312553, "loss": 1.838, "step": 25798 }, { "epoch": 0.86, "grad_norm": 0.4505813717842102, "learning_rate": 0.0004867537774972766, "loss": 1.791, "step": 25799 }, { "epoch": 0.86, "grad_norm": 0.4684358537197113, "learning_rate": 0.00048674559745530866, "loss": 1.7723, "step": 25800 }, { "epoch": 0.86, "grad_norm": 0.47067776322364807, "learning_rate": 0.000486737417186659, "loss": 1.7375, "step": 25801 }, { "epoch": 0.86, "grad_norm": 0.4524713456630707, "learning_rate": 0.0004867292366913377, "loss": 1.7817, "step": 25802 }, { "epoch": 0.86, "grad_norm": 0.4408080577850342, "learning_rate": 0.0004867210559693546, "loss": 1.8047, "step": 25803 }, { "epoch": 0.86, "grad_norm": 0.45548784732818604, "learning_rate": 0.00048671287502071966, "loss": 1.8141, "step": 25804 }, { "epoch": 0.86, "grad_norm": 0.452452689409256, "learning_rate": 0.00048670469384544276, "loss": 1.8556, "step": 25805 }, { "epoch": 0.86, "grad_norm": 0.46239519119262695, "learning_rate": 0.00048669651244353395, "loss": 1.7547, "step": 25806 }, { "epoch": 0.86, "grad_norm": 0.4511682391166687, "learning_rate": 0.0004866883308150031, "loss": 1.8158, "step": 25807 }, { "epoch": 0.86, "grad_norm": 0.44131460785865784, "learning_rate": 0.00048668014895986, "loss": 1.8153, "step": 25808 }, { "epoch": 0.86, "grad_norm": 0.4508591890335083, "learning_rate": 0.0004866719668781148, "loss": 1.7935, "step": 25809 }, { "epoch": 0.86, "grad_norm": 0.4365653693675995, "learning_rate": 0.00048666378456977723, "loss": 1.8304, "step": 25810 }, { "epoch": 0.86, "grad_norm": 0.4215202331542969, "learning_rate": 0.00048665560203485747, "loss": 1.7938, "step": 25811 }, { "epoch": 0.86, "grad_norm": 0.45035597681999207, "learning_rate": 0.00048664741927336526, "loss": 1.8772, "step": 25812 }, { "epoch": 0.86, "grad_norm": 0.4445611238479614, "learning_rate": 0.00048663923628531057, "loss": 1.8319, "step": 25813 }, { "epoch": 0.86, "grad_norm": 0.4459492564201355, "learning_rate": 0.0004866310530707033, "loss": 1.7849, "step": 25814 }, { "epoch": 0.86, "grad_norm": 0.4555118978023529, "learning_rate": 0.0004866228696295535, "loss": 1.7084, "step": 25815 }, { "epoch": 0.86, "grad_norm": 0.4594189524650574, "learning_rate": 0.000486614685961871, "loss": 1.848, "step": 25816 }, { "epoch": 0.86, "grad_norm": 0.4187951683998108, "learning_rate": 0.0004866065020676658, "loss": 1.6907, "step": 25817 }, { "epoch": 0.86, "grad_norm": 0.43650904297828674, "learning_rate": 0.0004865983179469477, "loss": 1.7986, "step": 25818 }, { "epoch": 0.86, "grad_norm": 0.4724572002887726, "learning_rate": 0.0004865901335997269, "loss": 1.8787, "step": 25819 }, { "epoch": 0.86, "grad_norm": 0.45773380994796753, "learning_rate": 0.00048658194902601303, "loss": 1.8165, "step": 25820 }, { "epoch": 0.86, "grad_norm": 0.44399502873420715, "learning_rate": 0.0004865737642258162, "loss": 1.8189, "step": 25821 }, { "epoch": 0.86, "grad_norm": 0.4528772830963135, "learning_rate": 0.00048656557919914633, "loss": 1.8449, "step": 25822 }, { "epoch": 0.86, "grad_norm": 0.4300044775009155, "learning_rate": 0.0004865573939460134, "loss": 1.7938, "step": 25823 }, { "epoch": 0.86, "grad_norm": 0.446022629737854, "learning_rate": 0.0004865492084664272, "loss": 1.8663, "step": 25824 }, { "epoch": 0.86, "grad_norm": 0.4354165494441986, "learning_rate": 0.0004865410227603978, "loss": 1.836, "step": 25825 }, { "epoch": 0.86, "grad_norm": 0.449045866727829, "learning_rate": 0.00048653283682793504, "loss": 1.8727, "step": 25826 }, { "epoch": 0.86, "grad_norm": 0.452465683221817, "learning_rate": 0.00048652465066904887, "loss": 1.785, "step": 25827 }, { "epoch": 0.86, "grad_norm": 0.44103100895881653, "learning_rate": 0.00048651646428374935, "loss": 1.7846, "step": 25828 }, { "epoch": 0.86, "grad_norm": 0.4366849362850189, "learning_rate": 0.0004865082776720463, "loss": 1.8142, "step": 25829 }, { "epoch": 0.86, "grad_norm": 0.43788617849349976, "learning_rate": 0.00048650009083394964, "loss": 1.8546, "step": 25830 }, { "epoch": 0.86, "grad_norm": 0.4551827907562256, "learning_rate": 0.00048649190376946934, "loss": 1.8784, "step": 25831 }, { "epoch": 0.86, "grad_norm": 0.4468560516834259, "learning_rate": 0.0004864837164786154, "loss": 1.695, "step": 25832 }, { "epoch": 0.86, "grad_norm": 0.42809808254241943, "learning_rate": 0.0004864755289613977, "loss": 1.8552, "step": 25833 }, { "epoch": 0.86, "grad_norm": 0.45757466554641724, "learning_rate": 0.0004864673412178261, "loss": 1.8514, "step": 25834 }, { "epoch": 0.86, "grad_norm": 0.43803519010543823, "learning_rate": 0.0004864591532479107, "loss": 1.715, "step": 25835 }, { "epoch": 0.86, "grad_norm": 0.45930323004722595, "learning_rate": 0.00048645096505166134, "loss": 1.7724, "step": 25836 }, { "epoch": 0.86, "grad_norm": 0.4292343556880951, "learning_rate": 0.00048644277662908793, "loss": 1.8154, "step": 25837 }, { "epoch": 0.86, "grad_norm": 0.46762657165527344, "learning_rate": 0.00048643458798020055, "loss": 1.8988, "step": 25838 }, { "epoch": 0.86, "grad_norm": 0.4483950436115265, "learning_rate": 0.000486426399105009, "loss": 1.8853, "step": 25839 }, { "epoch": 0.86, "grad_norm": 0.448561429977417, "learning_rate": 0.0004864182100035233, "loss": 1.7901, "step": 25840 }, { "epoch": 0.86, "grad_norm": 0.44043710827827454, "learning_rate": 0.00048641002067575337, "loss": 1.8346, "step": 25841 }, { "epoch": 0.86, "grad_norm": 0.4409852623939514, "learning_rate": 0.00048640183112170913, "loss": 1.8674, "step": 25842 }, { "epoch": 0.86, "grad_norm": 0.4433537423610687, "learning_rate": 0.0004863936413414004, "loss": 1.8455, "step": 25843 }, { "epoch": 0.86, "grad_norm": 0.44263705611228943, "learning_rate": 0.0004863854513348375, "loss": 1.8688, "step": 25844 }, { "epoch": 0.86, "grad_norm": 0.4375222623348236, "learning_rate": 0.00048637726110202994, "loss": 1.8049, "step": 25845 }, { "epoch": 0.86, "grad_norm": 0.4301969110965729, "learning_rate": 0.0004863690706429879, "loss": 1.8061, "step": 25846 }, { "epoch": 0.86, "grad_norm": 0.4359246492385864, "learning_rate": 0.0004863608799577213, "loss": 1.8229, "step": 25847 }, { "epoch": 0.86, "grad_norm": 0.4356940686702728, "learning_rate": 0.00048635268904623994, "loss": 1.7712, "step": 25848 }, { "epoch": 0.86, "grad_norm": 0.4406740069389343, "learning_rate": 0.00048634449790855406, "loss": 1.8547, "step": 25849 }, { "epoch": 0.86, "grad_norm": 0.4361541271209717, "learning_rate": 0.00048633630654467324, "loss": 1.6751, "step": 25850 }, { "epoch": 0.86, "grad_norm": 0.45049238204956055, "learning_rate": 0.0004863281149546077, "loss": 1.9371, "step": 25851 }, { "epoch": 0.86, "grad_norm": 0.44126126170158386, "learning_rate": 0.0004863199231383673, "loss": 1.9197, "step": 25852 }, { "epoch": 0.86, "grad_norm": 0.43808144330978394, "learning_rate": 0.0004863117310959619, "loss": 1.8695, "step": 25853 }, { "epoch": 0.86, "grad_norm": 0.42597562074661255, "learning_rate": 0.0004863035388274015, "loss": 1.798, "step": 25854 }, { "epoch": 0.86, "grad_norm": 0.4374656081199646, "learning_rate": 0.00048629534633269606, "loss": 1.8055, "step": 25855 }, { "epoch": 0.86, "grad_norm": 0.4465380311012268, "learning_rate": 0.00048628715361185556, "loss": 1.913, "step": 25856 }, { "epoch": 0.86, "grad_norm": 0.4528435468673706, "learning_rate": 0.0004862789606648899, "loss": 1.775, "step": 25857 }, { "epoch": 0.86, "grad_norm": 0.43965578079223633, "learning_rate": 0.000486270767491809, "loss": 1.7291, "step": 25858 }, { "epoch": 0.86, "grad_norm": 0.4335963726043701, "learning_rate": 0.0004862625740926229, "loss": 1.7921, "step": 25859 }, { "epoch": 0.86, "grad_norm": 0.43411096930503845, "learning_rate": 0.00048625438046734137, "loss": 1.7788, "step": 25860 }, { "epoch": 0.86, "grad_norm": 0.45079025626182556, "learning_rate": 0.00048624618661597454, "loss": 1.7863, "step": 25861 }, { "epoch": 0.86, "grad_norm": 0.43479254841804504, "learning_rate": 0.00048623799253853225, "loss": 1.8142, "step": 25862 }, { "epoch": 0.86, "grad_norm": 0.4291550815105438, "learning_rate": 0.0004862297982350245, "loss": 1.7329, "step": 25863 }, { "epoch": 0.86, "grad_norm": 0.4472705125808716, "learning_rate": 0.00048622160370546117, "loss": 1.8347, "step": 25864 }, { "epoch": 0.86, "grad_norm": 0.44363051652908325, "learning_rate": 0.0004862134089498523, "loss": 1.8407, "step": 25865 }, { "epoch": 0.86, "grad_norm": 0.43629857897758484, "learning_rate": 0.0004862052139682078, "loss": 1.7824, "step": 25866 }, { "epoch": 0.86, "grad_norm": 0.4618991017341614, "learning_rate": 0.0004861970187605376, "loss": 1.7765, "step": 25867 }, { "epoch": 0.86, "grad_norm": 0.44563716650009155, "learning_rate": 0.00048618882332685166, "loss": 1.8982, "step": 25868 }, { "epoch": 0.86, "grad_norm": 0.42672795057296753, "learning_rate": 0.0004861806276671599, "loss": 1.7401, "step": 25869 }, { "epoch": 0.86, "grad_norm": 0.4539278745651245, "learning_rate": 0.0004861724317814722, "loss": 1.766, "step": 25870 }, { "epoch": 0.86, "grad_norm": 0.4323694407939911, "learning_rate": 0.00048616423566979876, "loss": 1.7952, "step": 25871 }, { "epoch": 0.86, "grad_norm": 0.4365463852882385, "learning_rate": 0.00048615603933214937, "loss": 1.8201, "step": 25872 }, { "epoch": 0.86, "grad_norm": 0.4837484657764435, "learning_rate": 0.00048614784276853376, "loss": 1.8517, "step": 25873 }, { "epoch": 0.86, "grad_norm": 0.4365789592266083, "learning_rate": 0.0004861396459789623, "loss": 1.784, "step": 25874 }, { "epoch": 0.86, "grad_norm": 0.4309643805027008, "learning_rate": 0.00048613144896344465, "loss": 1.8381, "step": 25875 }, { "epoch": 0.86, "grad_norm": 0.43199148774147034, "learning_rate": 0.0004861232517219909, "loss": 1.7992, "step": 25876 }, { "epoch": 0.86, "grad_norm": 0.44394150376319885, "learning_rate": 0.00048611505425461097, "loss": 1.833, "step": 25877 }, { "epoch": 0.86, "grad_norm": 0.4307672083377838, "learning_rate": 0.0004861068565613147, "loss": 1.8768, "step": 25878 }, { "epoch": 0.86, "grad_norm": 0.41473153233528137, "learning_rate": 0.00048609865864211217, "loss": 1.7037, "step": 25879 }, { "epoch": 0.86, "grad_norm": 0.45388710498809814, "learning_rate": 0.00048609046049701325, "loss": 1.8297, "step": 25880 }, { "epoch": 0.86, "grad_norm": 0.44110608100891113, "learning_rate": 0.000486082262126028, "loss": 1.7931, "step": 25881 }, { "epoch": 0.86, "grad_norm": 0.4395435154438019, "learning_rate": 0.00048607406352916627, "loss": 1.8936, "step": 25882 }, { "epoch": 0.86, "grad_norm": 0.42156732082366943, "learning_rate": 0.00048606586470643806, "loss": 1.793, "step": 25883 }, { "epoch": 0.86, "grad_norm": 0.44778576493263245, "learning_rate": 0.00048605766565785325, "loss": 1.8073, "step": 25884 }, { "epoch": 0.86, "grad_norm": 0.4690403342247009, "learning_rate": 0.0004860494663834219, "loss": 1.7842, "step": 25885 }, { "epoch": 0.86, "grad_norm": 0.4279298484325409, "learning_rate": 0.0004860412668831539, "loss": 1.818, "step": 25886 }, { "epoch": 0.86, "grad_norm": 0.437592476606369, "learning_rate": 0.0004860330671570592, "loss": 1.8192, "step": 25887 }, { "epoch": 0.86, "grad_norm": 0.44046837091445923, "learning_rate": 0.00048602486720514775, "loss": 1.8463, "step": 25888 }, { "epoch": 0.86, "grad_norm": 0.4481890797615051, "learning_rate": 0.0004860166670274296, "loss": 1.7025, "step": 25889 }, { "epoch": 0.86, "grad_norm": 0.43809816241264343, "learning_rate": 0.0004860084666239145, "loss": 1.7944, "step": 25890 }, { "epoch": 0.86, "grad_norm": 0.9695746302604675, "learning_rate": 0.0004860002659946126, "loss": 1.8906, "step": 25891 }, { "epoch": 0.86, "grad_norm": 0.4288143813610077, "learning_rate": 0.00048599206513953383, "loss": 1.7977, "step": 25892 }, { "epoch": 0.86, "grad_norm": 0.45575663447380066, "learning_rate": 0.0004859838640586881, "loss": 1.8271, "step": 25893 }, { "epoch": 0.86, "grad_norm": 0.4684399962425232, "learning_rate": 0.0004859756627520853, "loss": 1.8607, "step": 25894 }, { "epoch": 0.86, "grad_norm": 0.4520913064479828, "learning_rate": 0.0004859674612197354, "loss": 1.7994, "step": 25895 }, { "epoch": 0.86, "grad_norm": 0.43040359020233154, "learning_rate": 0.0004859592594616485, "loss": 1.8256, "step": 25896 }, { "epoch": 0.86, "grad_norm": 0.42764291167259216, "learning_rate": 0.00048595105747783443, "loss": 1.8442, "step": 25897 }, { "epoch": 0.86, "grad_norm": 0.44739437103271484, "learning_rate": 0.0004859428552683031, "loss": 1.8386, "step": 25898 }, { "epoch": 0.86, "grad_norm": 0.44728323817253113, "learning_rate": 0.0004859346528330646, "loss": 1.7908, "step": 25899 }, { "epoch": 0.86, "grad_norm": 0.46128278970718384, "learning_rate": 0.00048592645017212887, "loss": 1.8496, "step": 25900 }, { "epoch": 0.86, "grad_norm": 0.43664371967315674, "learning_rate": 0.00048591824728550577, "loss": 1.8177, "step": 25901 }, { "epoch": 0.86, "grad_norm": 0.4484237730503082, "learning_rate": 0.00048591004417320534, "loss": 1.8103, "step": 25902 }, { "epoch": 0.86, "grad_norm": 0.4404715597629547, "learning_rate": 0.0004859018408352375, "loss": 1.7874, "step": 25903 }, { "epoch": 0.86, "grad_norm": 0.4308423697948456, "learning_rate": 0.00048589363727161226, "loss": 1.835, "step": 25904 }, { "epoch": 0.86, "grad_norm": 0.44203123450279236, "learning_rate": 0.00048588543348233946, "loss": 1.8311, "step": 25905 }, { "epoch": 0.86, "grad_norm": 0.45832282304763794, "learning_rate": 0.00048587722946742917, "loss": 1.9132, "step": 25906 }, { "epoch": 0.86, "grad_norm": 0.44657477736473083, "learning_rate": 0.0004858690252268913, "loss": 1.8333, "step": 25907 }, { "epoch": 0.86, "grad_norm": 0.45082345604896545, "learning_rate": 0.00048586082076073576, "loss": 1.7605, "step": 25908 }, { "epoch": 0.86, "grad_norm": 0.435170441865921, "learning_rate": 0.00048585261606897274, "loss": 1.9261, "step": 25909 }, { "epoch": 0.86, "grad_norm": 0.4294825792312622, "learning_rate": 0.00048584441115161187, "loss": 1.7413, "step": 25910 }, { "epoch": 0.86, "grad_norm": 0.4461282193660736, "learning_rate": 0.0004858362060086633, "loss": 1.8035, "step": 25911 }, { "epoch": 0.86, "grad_norm": 0.46240106225013733, "learning_rate": 0.0004858280006401369, "loss": 1.8421, "step": 25912 }, { "epoch": 0.86, "grad_norm": 0.438938707113266, "learning_rate": 0.00048581979504604285, "loss": 1.8748, "step": 25913 }, { "epoch": 0.86, "grad_norm": 0.4334305226802826, "learning_rate": 0.0004858115892263909, "loss": 1.8575, "step": 25914 }, { "epoch": 0.86, "grad_norm": 0.44385409355163574, "learning_rate": 0.0004858033831811909, "loss": 1.7988, "step": 25915 }, { "epoch": 0.86, "grad_norm": 0.43416884541511536, "learning_rate": 0.0004857951769104532, "loss": 1.7958, "step": 25916 }, { "epoch": 0.86, "grad_norm": 0.443240225315094, "learning_rate": 0.00048578697041418747, "loss": 1.8514, "step": 25917 }, { "epoch": 0.86, "grad_norm": 0.47105109691619873, "learning_rate": 0.00048577876369240366, "loss": 1.8766, "step": 25918 }, { "epoch": 0.86, "grad_norm": 0.427229642868042, "learning_rate": 0.0004857705567451118, "loss": 1.8139, "step": 25919 }, { "epoch": 0.86, "grad_norm": 0.455149382352829, "learning_rate": 0.0004857623495723219, "loss": 1.8032, "step": 25920 }, { "epoch": 0.86, "grad_norm": 0.42281919717788696, "learning_rate": 0.00048575414217404383, "loss": 1.835, "step": 25921 }, { "epoch": 0.86, "grad_norm": 0.4452032744884491, "learning_rate": 0.0004857459345502876, "loss": 1.7995, "step": 25922 }, { "epoch": 0.86, "grad_norm": 0.44848865270614624, "learning_rate": 0.00048573772670106323, "loss": 1.8262, "step": 25923 }, { "epoch": 0.86, "grad_norm": 0.43207472562789917, "learning_rate": 0.0004857295186263807, "loss": 1.7792, "step": 25924 }, { "epoch": 0.86, "grad_norm": 0.4290884733200073, "learning_rate": 0.00048572131032624977, "loss": 1.7696, "step": 25925 }, { "epoch": 0.86, "grad_norm": 0.4195111393928528, "learning_rate": 0.00048571310180068055, "loss": 1.8168, "step": 25926 }, { "epoch": 0.86, "grad_norm": 0.4421432912349701, "learning_rate": 0.000485704893049683, "loss": 1.7521, "step": 25927 }, { "epoch": 0.86, "grad_norm": 0.45624691247940063, "learning_rate": 0.0004856966840732671, "loss": 1.854, "step": 25928 }, { "epoch": 0.86, "grad_norm": 0.445448100566864, "learning_rate": 0.0004856884748714428, "loss": 1.8805, "step": 25929 }, { "epoch": 0.86, "grad_norm": 0.43946096301078796, "learning_rate": 0.00048568026544422, "loss": 1.8264, "step": 25930 }, { "epoch": 0.86, "grad_norm": 0.4426909387111664, "learning_rate": 0.0004856720557916087, "loss": 1.8859, "step": 25931 }, { "epoch": 0.86, "grad_norm": 0.4263834059238434, "learning_rate": 0.00048566384591361894, "loss": 1.7372, "step": 25932 }, { "epoch": 0.86, "grad_norm": 0.4231536090373993, "learning_rate": 0.00048565563581026054, "loss": 1.8427, "step": 25933 }, { "epoch": 0.86, "grad_norm": 0.4443724453449249, "learning_rate": 0.0004856474254815437, "loss": 1.8399, "step": 25934 }, { "epoch": 0.86, "grad_norm": 0.43434831500053406, "learning_rate": 0.0004856392149274781, "loss": 1.7431, "step": 25935 }, { "epoch": 0.86, "grad_norm": 0.4587557315826416, "learning_rate": 0.0004856310041480739, "loss": 1.8586, "step": 25936 }, { "epoch": 0.86, "grad_norm": 0.43740081787109375, "learning_rate": 0.00048562279314334105, "loss": 1.8956, "step": 25937 }, { "epoch": 0.86, "grad_norm": 0.4465261697769165, "learning_rate": 0.0004856145819132894, "loss": 1.8093, "step": 25938 }, { "epoch": 0.86, "grad_norm": 0.43459516763687134, "learning_rate": 0.00048560637045792906, "loss": 1.732, "step": 25939 }, { "epoch": 0.86, "grad_norm": 0.4484010338783264, "learning_rate": 0.0004855981587772699, "loss": 1.8396, "step": 25940 }, { "epoch": 0.86, "grad_norm": 0.4378553032875061, "learning_rate": 0.000485589946871322, "loss": 1.8194, "step": 25941 }, { "epoch": 0.86, "grad_norm": 0.46446314454078674, "learning_rate": 0.00048558173474009506, "loss": 1.8808, "step": 25942 }, { "epoch": 0.86, "grad_norm": 0.4303663969039917, "learning_rate": 0.0004855735223835994, "loss": 1.8339, "step": 25943 }, { "epoch": 0.86, "grad_norm": 0.4365304410457611, "learning_rate": 0.00048556530980184485, "loss": 1.8838, "step": 25944 }, { "epoch": 0.86, "grad_norm": 0.4383019506931305, "learning_rate": 0.00048555709699484126, "loss": 1.774, "step": 25945 }, { "epoch": 0.86, "grad_norm": 0.44968634843826294, "learning_rate": 0.00048554888396259874, "loss": 1.8412, "step": 25946 }, { "epoch": 0.86, "grad_norm": 0.4334714114665985, "learning_rate": 0.0004855406707051272, "loss": 1.81, "step": 25947 }, { "epoch": 0.86, "grad_norm": 0.43923884630203247, "learning_rate": 0.0004855324572224367, "loss": 1.7881, "step": 25948 }, { "epoch": 0.86, "grad_norm": 0.4293295741081238, "learning_rate": 0.000485524243514537, "loss": 1.841, "step": 25949 }, { "epoch": 0.86, "grad_norm": 0.43380069732666016, "learning_rate": 0.00048551602958143823, "loss": 1.8627, "step": 25950 }, { "epoch": 0.86, "grad_norm": 0.42311030626296997, "learning_rate": 0.00048550781542315043, "loss": 1.7688, "step": 25951 }, { "epoch": 0.86, "grad_norm": 0.4301133155822754, "learning_rate": 0.0004854996010396834, "loss": 1.7457, "step": 25952 }, { "epoch": 0.86, "grad_norm": 0.42843058705329895, "learning_rate": 0.0004854913864310472, "loss": 1.7607, "step": 25953 }, { "epoch": 0.86, "grad_norm": 0.4332679808139801, "learning_rate": 0.0004854831715972518, "loss": 1.8214, "step": 25954 }, { "epoch": 0.86, "grad_norm": 0.42717429995536804, "learning_rate": 0.0004854749565383072, "loss": 1.8166, "step": 25955 }, { "epoch": 0.86, "grad_norm": 0.453652024269104, "learning_rate": 0.00048546674125422325, "loss": 1.7521, "step": 25956 }, { "epoch": 0.86, "grad_norm": 0.45560422539711, "learning_rate": 0.00048545852574500995, "loss": 1.8412, "step": 25957 }, { "epoch": 0.86, "grad_norm": 0.43930697441101074, "learning_rate": 0.0004854503100106775, "loss": 1.8394, "step": 25958 }, { "epoch": 0.86, "grad_norm": 0.44482311606407166, "learning_rate": 0.0004854420940512356, "loss": 1.7272, "step": 25959 }, { "epoch": 0.86, "grad_norm": 0.4254416823387146, "learning_rate": 0.0004854338778666943, "loss": 1.812, "step": 25960 }, { "epoch": 0.86, "grad_norm": 0.4253443479537964, "learning_rate": 0.0004854256614570636, "loss": 1.8272, "step": 25961 }, { "epoch": 0.86, "grad_norm": 0.42349815368652344, "learning_rate": 0.0004854174448223535, "loss": 1.7812, "step": 25962 }, { "epoch": 0.86, "grad_norm": 0.43420177698135376, "learning_rate": 0.000485409227962574, "loss": 1.8141, "step": 25963 }, { "epoch": 0.86, "grad_norm": 0.42956334352493286, "learning_rate": 0.0004854010108777349, "loss": 1.8201, "step": 25964 }, { "epoch": 0.86, "grad_norm": 0.4395885467529297, "learning_rate": 0.0004853927935678464, "loss": 1.781, "step": 25965 }, { "epoch": 0.86, "grad_norm": 0.5307454466819763, "learning_rate": 0.00048538457603291825, "loss": 1.8726, "step": 25966 }, { "epoch": 0.86, "grad_norm": 0.4505947530269623, "learning_rate": 0.0004853763582729606, "loss": 1.8449, "step": 25967 }, { "epoch": 0.86, "grad_norm": 0.456528902053833, "learning_rate": 0.00048536814028798334, "loss": 1.788, "step": 25968 }, { "epoch": 0.86, "grad_norm": 0.4554533362388611, "learning_rate": 0.00048535992207799657, "loss": 1.808, "step": 25969 }, { "epoch": 0.86, "grad_norm": 0.44259458780288696, "learning_rate": 0.00048535170364301, "loss": 1.7571, "step": 25970 }, { "epoch": 0.86, "grad_norm": 0.43371424078941345, "learning_rate": 0.00048534348498303393, "loss": 1.8404, "step": 25971 }, { "epoch": 0.86, "grad_norm": 0.4659477472305298, "learning_rate": 0.00048533526609807813, "loss": 1.8361, "step": 25972 }, { "epoch": 0.86, "grad_norm": 0.4748413860797882, "learning_rate": 0.00048532704698815254, "loss": 1.8498, "step": 25973 }, { "epoch": 0.86, "grad_norm": 0.43529289960861206, "learning_rate": 0.0004853188276532673, "loss": 1.7672, "step": 25974 }, { "epoch": 0.86, "grad_norm": 0.43673887848854065, "learning_rate": 0.0004853106080934323, "loss": 1.824, "step": 25975 }, { "epoch": 0.86, "grad_norm": 0.46796664595603943, "learning_rate": 0.0004853023883086576, "loss": 1.8626, "step": 25976 }, { "epoch": 0.86, "grad_norm": 0.45820853114128113, "learning_rate": 0.00048529416829895295, "loss": 1.8253, "step": 25977 }, { "epoch": 0.86, "grad_norm": 0.42688778042793274, "learning_rate": 0.00048528594806432855, "loss": 1.7594, "step": 25978 }, { "epoch": 0.86, "grad_norm": 0.4445326328277588, "learning_rate": 0.00048527772760479434, "loss": 1.8091, "step": 25979 }, { "epoch": 0.86, "grad_norm": 0.43253958225250244, "learning_rate": 0.0004852695069203602, "loss": 1.8185, "step": 25980 }, { "epoch": 0.86, "grad_norm": 0.43790796399116516, "learning_rate": 0.00048526128601103627, "loss": 1.8372, "step": 25981 }, { "epoch": 0.86, "grad_norm": 0.428691029548645, "learning_rate": 0.0004852530648768323, "loss": 1.8267, "step": 25982 }, { "epoch": 0.86, "grad_norm": 0.46832919120788574, "learning_rate": 0.00048524484351775854, "loss": 1.8333, "step": 25983 }, { "epoch": 0.86, "grad_norm": 0.46047690510749817, "learning_rate": 0.0004852366219338248, "loss": 1.8673, "step": 25984 }, { "epoch": 0.86, "grad_norm": 0.4302060604095459, "learning_rate": 0.0004852284001250411, "loss": 1.7986, "step": 25985 }, { "epoch": 0.86, "grad_norm": 0.46252191066741943, "learning_rate": 0.00048522017809141734, "loss": 1.7766, "step": 25986 }, { "epoch": 0.86, "grad_norm": 0.44904786348342896, "learning_rate": 0.00048521195583296365, "loss": 1.9117, "step": 25987 }, { "epoch": 0.86, "grad_norm": 0.4601677358150482, "learning_rate": 0.0004852037333496899, "loss": 1.8274, "step": 25988 }, { "epoch": 0.86, "grad_norm": 0.4405321776866913, "learning_rate": 0.00048519551064160615, "loss": 1.7947, "step": 25989 }, { "epoch": 0.86, "grad_norm": 0.4314698576927185, "learning_rate": 0.0004851872877087223, "loss": 1.9, "step": 25990 }, { "epoch": 0.86, "grad_norm": 0.4687666893005371, "learning_rate": 0.0004851790645510484, "loss": 1.7693, "step": 25991 }, { "epoch": 0.86, "grad_norm": 0.444366991519928, "learning_rate": 0.00048517084116859435, "loss": 1.8326, "step": 25992 }, { "epoch": 0.86, "grad_norm": 0.45139002799987793, "learning_rate": 0.0004851626175613702, "loss": 1.7859, "step": 25993 }, { "epoch": 0.86, "grad_norm": 0.4389939308166504, "learning_rate": 0.0004851543937293859, "loss": 1.759, "step": 25994 }, { "epoch": 0.86, "grad_norm": 0.4275757670402527, "learning_rate": 0.0004851461696726515, "loss": 1.7492, "step": 25995 }, { "epoch": 0.86, "grad_norm": 0.4291573464870453, "learning_rate": 0.00048513794539117687, "loss": 1.8427, "step": 25996 }, { "epoch": 0.86, "grad_norm": 0.4530574679374695, "learning_rate": 0.00048512972088497216, "loss": 1.8569, "step": 25997 }, { "epoch": 0.86, "grad_norm": 0.4714909791946411, "learning_rate": 0.00048512149615404713, "loss": 1.8426, "step": 25998 }, { "epoch": 0.86, "grad_norm": 0.47476431727409363, "learning_rate": 0.0004851132711984119, "loss": 1.7582, "step": 25999 }, { "epoch": 0.87, "grad_norm": 0.46193981170654297, "learning_rate": 0.00048510504601807645, "loss": 1.7891, "step": 26000 }, { "epoch": 0.87, "grad_norm": 0.4855336546897888, "learning_rate": 0.0004850968206130508, "loss": 1.7741, "step": 26001 }, { "epoch": 0.87, "grad_norm": 0.4543500244617462, "learning_rate": 0.0004850885949833448, "loss": 1.7921, "step": 26002 }, { "epoch": 0.87, "grad_norm": 0.4402443766593933, "learning_rate": 0.00048508036912896853, "loss": 1.776, "step": 26003 }, { "epoch": 0.87, "grad_norm": 0.4302396774291992, "learning_rate": 0.00048507214304993205, "loss": 1.8368, "step": 26004 }, { "epoch": 0.87, "grad_norm": 0.44350823760032654, "learning_rate": 0.00048506391674624515, "loss": 1.8245, "step": 26005 }, { "epoch": 0.87, "grad_norm": 0.453859806060791, "learning_rate": 0.00048505569021791795, "loss": 1.8621, "step": 26006 }, { "epoch": 0.87, "grad_norm": 0.45697224140167236, "learning_rate": 0.0004850474634649604, "loss": 1.8255, "step": 26007 }, { "epoch": 0.87, "grad_norm": 0.45217788219451904, "learning_rate": 0.00048503923648738256, "loss": 1.7853, "step": 26008 }, { "epoch": 0.87, "grad_norm": 0.4407828450202942, "learning_rate": 0.00048503100928519433, "loss": 1.8231, "step": 26009 }, { "epoch": 0.87, "grad_norm": 0.4654260575771332, "learning_rate": 0.00048502278185840576, "loss": 1.7904, "step": 26010 }, { "epoch": 0.87, "grad_norm": 0.4598495662212372, "learning_rate": 0.00048501455420702665, "loss": 1.8399, "step": 26011 }, { "epoch": 0.87, "grad_norm": 0.4358230233192444, "learning_rate": 0.0004850063263310672, "loss": 1.8387, "step": 26012 }, { "epoch": 0.87, "grad_norm": 0.4259006679058075, "learning_rate": 0.0004849980982305374, "loss": 1.7726, "step": 26013 }, { "epoch": 0.87, "grad_norm": 0.42852783203125, "learning_rate": 0.0004849898699054471, "loss": 1.7706, "step": 26014 }, { "epoch": 0.87, "grad_norm": 0.45541587471961975, "learning_rate": 0.0004849816413558063, "loss": 1.8076, "step": 26015 }, { "epoch": 0.87, "grad_norm": 0.4315658509731293, "learning_rate": 0.0004849734125816252, "loss": 1.8098, "step": 26016 }, { "epoch": 0.87, "grad_norm": 0.44623681902885437, "learning_rate": 0.0004849651835829135, "loss": 1.819, "step": 26017 }, { "epoch": 0.87, "grad_norm": 0.44359153509140015, "learning_rate": 0.0004849569543596814, "loss": 1.8595, "step": 26018 }, { "epoch": 0.87, "grad_norm": 0.4311264157295227, "learning_rate": 0.0004849487249119387, "loss": 1.8976, "step": 26019 }, { "epoch": 0.87, "grad_norm": 0.6835291385650635, "learning_rate": 0.0004849404952396956, "loss": 1.8935, "step": 26020 }, { "epoch": 0.87, "grad_norm": 1.6666195392608643, "learning_rate": 0.00048493226534296194, "loss": 1.8442, "step": 26021 }, { "epoch": 0.87, "grad_norm": 0.4501406252384186, "learning_rate": 0.0004849240352217478, "loss": 1.8556, "step": 26022 }, { "epoch": 0.87, "grad_norm": 0.4391489028930664, "learning_rate": 0.0004849158048760631, "loss": 1.8433, "step": 26023 }, { "epoch": 0.87, "grad_norm": 0.4390332102775574, "learning_rate": 0.00048490757430591784, "loss": 1.865, "step": 26024 }, { "epoch": 0.87, "grad_norm": 0.4740823209285736, "learning_rate": 0.00048489934351132204, "loss": 1.7624, "step": 26025 }, { "epoch": 0.87, "grad_norm": 0.42961767315864563, "learning_rate": 0.00048489111249228566, "loss": 1.8084, "step": 26026 }, { "epoch": 0.87, "grad_norm": 0.4349232614040375, "learning_rate": 0.0004848828812488188, "loss": 1.7818, "step": 26027 }, { "epoch": 0.87, "grad_norm": 0.42739737033843994, "learning_rate": 0.00048487464978093126, "loss": 1.8525, "step": 26028 }, { "epoch": 0.87, "grad_norm": 0.4411567449569702, "learning_rate": 0.0004848664180886332, "loss": 1.8194, "step": 26029 }, { "epoch": 0.87, "grad_norm": 0.43643125891685486, "learning_rate": 0.0004848581861719345, "loss": 1.8192, "step": 26030 }, { "epoch": 0.87, "grad_norm": 0.4645470678806305, "learning_rate": 0.0004848499540308453, "loss": 1.7827, "step": 26031 }, { "epoch": 0.87, "grad_norm": 0.4554360806941986, "learning_rate": 0.00048484172166537537, "loss": 1.8503, "step": 26032 }, { "epoch": 0.87, "grad_norm": 0.44951605796813965, "learning_rate": 0.00048483348907553495, "loss": 1.8323, "step": 26033 }, { "epoch": 0.87, "grad_norm": 0.4309833347797394, "learning_rate": 0.00048482525626133375, "loss": 1.7434, "step": 26034 }, { "epoch": 0.87, "grad_norm": 0.418497771024704, "learning_rate": 0.00048481702322278206, "loss": 1.8497, "step": 26035 }, { "epoch": 0.87, "grad_norm": 0.4882243573665619, "learning_rate": 0.0004848087899598897, "loss": 1.798, "step": 26036 }, { "epoch": 0.87, "grad_norm": 0.42655959725379944, "learning_rate": 0.0004848005564726666, "loss": 1.7601, "step": 26037 }, { "epoch": 0.87, "grad_norm": 0.4428052008152008, "learning_rate": 0.00048479232276112295, "loss": 1.8136, "step": 26038 }, { "epoch": 0.87, "grad_norm": 0.4311542809009552, "learning_rate": 0.00048478408882526863, "loss": 1.8498, "step": 26039 }, { "epoch": 0.87, "grad_norm": 0.43692710995674133, "learning_rate": 0.0004847758546651137, "loss": 1.7755, "step": 26040 }, { "epoch": 0.87, "grad_norm": 0.4410949945449829, "learning_rate": 0.00048476762028066807, "loss": 1.8553, "step": 26041 }, { "epoch": 0.87, "grad_norm": 0.4198378622531891, "learning_rate": 0.0004847593856719418, "loss": 1.7511, "step": 26042 }, { "epoch": 0.87, "grad_norm": 0.4306871294975281, "learning_rate": 0.0004847511508389448, "loss": 1.7746, "step": 26043 }, { "epoch": 0.87, "grad_norm": 0.4208303391933441, "learning_rate": 0.00048474291578168717, "loss": 1.7727, "step": 26044 }, { "epoch": 0.87, "grad_norm": 0.43521174788475037, "learning_rate": 0.00048473468050017884, "loss": 1.8415, "step": 26045 }, { "epoch": 0.87, "grad_norm": 0.4415619671344757, "learning_rate": 0.0004847264449944298, "loss": 1.9082, "step": 26046 }, { "epoch": 0.87, "grad_norm": 0.44890454411506653, "learning_rate": 0.0004847182092644501, "loss": 1.9357, "step": 26047 }, { "epoch": 0.87, "grad_norm": 0.44166573882102966, "learning_rate": 0.0004847099733102497, "loss": 1.8762, "step": 26048 }, { "epoch": 0.87, "grad_norm": 0.4425487220287323, "learning_rate": 0.0004847017371318386, "loss": 1.7115, "step": 26049 }, { "epoch": 0.87, "grad_norm": 0.43097200989723206, "learning_rate": 0.0004846935007292268, "loss": 1.7698, "step": 26050 }, { "epoch": 0.87, "grad_norm": 0.4380260109901428, "learning_rate": 0.00048468526410242444, "loss": 1.807, "step": 26051 }, { "epoch": 0.87, "grad_norm": 0.42708462476730347, "learning_rate": 0.0004846770272514412, "loss": 1.8194, "step": 26052 }, { "epoch": 0.87, "grad_norm": 0.43741512298583984, "learning_rate": 0.0004846687901762874, "loss": 1.7695, "step": 26053 }, { "epoch": 0.87, "grad_norm": 0.43620920181274414, "learning_rate": 0.00048466055287697276, "loss": 1.8886, "step": 26054 }, { "epoch": 0.87, "grad_norm": 0.43313392996788025, "learning_rate": 0.00048465231535350754, "loss": 1.7632, "step": 26055 }, { "epoch": 0.87, "grad_norm": 0.42676976323127747, "learning_rate": 0.0004846440776059015, "loss": 1.7998, "step": 26056 }, { "epoch": 0.87, "grad_norm": 0.43932223320007324, "learning_rate": 0.00048463583963416485, "loss": 1.7761, "step": 26057 }, { "epoch": 0.87, "grad_norm": 0.4306768476963043, "learning_rate": 0.0004846276014383075, "loss": 1.8485, "step": 26058 }, { "epoch": 0.87, "grad_norm": 0.4394403398036957, "learning_rate": 0.0004846193630183394, "loss": 1.8042, "step": 26059 }, { "epoch": 0.87, "grad_norm": 0.41804978251457214, "learning_rate": 0.00048461112437427057, "loss": 1.8008, "step": 26060 }, { "epoch": 0.87, "grad_norm": 0.4413248598575592, "learning_rate": 0.00048460288550611114, "loss": 1.7674, "step": 26061 }, { "epoch": 0.87, "grad_norm": 0.439510315656662, "learning_rate": 0.00048459464641387087, "loss": 1.8062, "step": 26062 }, { "epoch": 0.87, "grad_norm": 0.4278261661529541, "learning_rate": 0.00048458640709756, "loss": 1.8721, "step": 26063 }, { "epoch": 0.87, "grad_norm": 0.44188424944877625, "learning_rate": 0.0004845781675571884, "loss": 1.8452, "step": 26064 }, { "epoch": 0.87, "grad_norm": 0.4230632781982422, "learning_rate": 0.0004845699277927661, "loss": 1.7636, "step": 26065 }, { "epoch": 0.87, "grad_norm": 0.45394665002822876, "learning_rate": 0.00048456168780430315, "loss": 1.8591, "step": 26066 }, { "epoch": 0.87, "grad_norm": 0.4379732012748718, "learning_rate": 0.0004845534475918094, "loss": 1.8009, "step": 26067 }, { "epoch": 0.87, "grad_norm": 0.44080743193626404, "learning_rate": 0.00048454520715529505, "loss": 1.8223, "step": 26068 }, { "epoch": 0.87, "grad_norm": 0.4303552210330963, "learning_rate": 0.00048453696649476994, "loss": 1.7513, "step": 26069 }, { "epoch": 0.87, "grad_norm": 0.4501967430114746, "learning_rate": 0.00048452872561024417, "loss": 1.7188, "step": 26070 }, { "epoch": 0.87, "grad_norm": 0.4280370771884918, "learning_rate": 0.0004845204845017277, "loss": 1.8407, "step": 26071 }, { "epoch": 0.87, "grad_norm": 0.4576468765735626, "learning_rate": 0.00048451224316923056, "loss": 1.8409, "step": 26072 }, { "epoch": 0.87, "grad_norm": 0.6264359951019287, "learning_rate": 0.0004845040016127628, "loss": 1.8495, "step": 26073 }, { "epoch": 0.87, "grad_norm": 0.47118908166885376, "learning_rate": 0.0004844957598323343, "loss": 1.76, "step": 26074 }, { "epoch": 0.87, "grad_norm": 0.42454802989959717, "learning_rate": 0.00048448751782795513, "loss": 1.7903, "step": 26075 }, { "epoch": 0.87, "grad_norm": 0.44507893919944763, "learning_rate": 0.0004844792755996354, "loss": 1.8632, "step": 26076 }, { "epoch": 0.87, "grad_norm": 0.4384206235408783, "learning_rate": 0.0004844710331473849, "loss": 1.7322, "step": 26077 }, { "epoch": 0.87, "grad_norm": 0.42768529057502747, "learning_rate": 0.0004844627904712137, "loss": 1.7834, "step": 26078 }, { "epoch": 0.87, "grad_norm": 0.4353151023387909, "learning_rate": 0.00048445454757113204, "loss": 1.7409, "step": 26079 }, { "epoch": 0.87, "grad_norm": 0.4312092959880829, "learning_rate": 0.00048444630444714954, "loss": 1.8128, "step": 26080 }, { "epoch": 0.87, "grad_norm": 0.44048619270324707, "learning_rate": 0.00048443806109927653, "loss": 1.7796, "step": 26081 }, { "epoch": 0.87, "grad_norm": 0.4354037344455719, "learning_rate": 0.0004844298175275228, "loss": 1.7487, "step": 26082 }, { "epoch": 0.87, "grad_norm": 0.43293169140815735, "learning_rate": 0.00048442157373189854, "loss": 1.8374, "step": 26083 }, { "epoch": 0.87, "grad_norm": 0.43592336773872375, "learning_rate": 0.0004844133297124136, "loss": 1.8043, "step": 26084 }, { "epoch": 0.87, "grad_norm": 0.4401995837688446, "learning_rate": 0.0004844050854690781, "loss": 1.7267, "step": 26085 }, { "epoch": 0.87, "grad_norm": 0.4243881404399872, "learning_rate": 0.000484396841001902, "loss": 1.8038, "step": 26086 }, { "epoch": 0.87, "grad_norm": 0.46135181188583374, "learning_rate": 0.00048438859631089523, "loss": 1.9109, "step": 26087 }, { "epoch": 0.87, "grad_norm": 0.4412115216255188, "learning_rate": 0.00048438035139606786, "loss": 1.8111, "step": 26088 }, { "epoch": 0.87, "grad_norm": 0.4369305968284607, "learning_rate": 0.00048437210625743005, "loss": 1.8402, "step": 26089 }, { "epoch": 0.87, "grad_norm": 0.5329371690750122, "learning_rate": 0.00048436386089499155, "loss": 1.84, "step": 26090 }, { "epoch": 0.87, "grad_norm": 0.4716613292694092, "learning_rate": 0.00048435561530876255, "loss": 1.8633, "step": 26091 }, { "epoch": 0.87, "grad_norm": 0.4651484489440918, "learning_rate": 0.0004843473694987529, "loss": 1.8039, "step": 26092 }, { "epoch": 0.87, "grad_norm": 0.446539968252182, "learning_rate": 0.0004843391234649728, "loss": 1.7822, "step": 26093 }, { "epoch": 0.87, "grad_norm": 0.42606449127197266, "learning_rate": 0.0004843308772074321, "loss": 1.823, "step": 26094 }, { "epoch": 0.87, "grad_norm": 0.44452744722366333, "learning_rate": 0.000484322630726141, "loss": 1.7577, "step": 26095 }, { "epoch": 0.87, "grad_norm": 0.4717489182949066, "learning_rate": 0.00048431438402110925, "loss": 1.731, "step": 26096 }, { "epoch": 0.87, "grad_norm": 0.44849923253059387, "learning_rate": 0.000484306137092347, "loss": 1.7863, "step": 26097 }, { "epoch": 0.87, "grad_norm": 0.43643718957901, "learning_rate": 0.0004842978899398644, "loss": 1.8007, "step": 26098 }, { "epoch": 0.87, "grad_norm": 0.7092569470405579, "learning_rate": 0.00048428964256367116, "loss": 1.9115, "step": 26099 }, { "epoch": 0.87, "grad_norm": 0.4405379593372345, "learning_rate": 0.0004842813949637775, "loss": 1.8287, "step": 26100 }, { "epoch": 0.87, "grad_norm": 0.44775670766830444, "learning_rate": 0.0004842731471401934, "loss": 1.8494, "step": 26101 }, { "epoch": 0.87, "grad_norm": 0.4456306993961334, "learning_rate": 0.0004842648990929288, "loss": 1.7741, "step": 26102 }, { "epoch": 0.87, "grad_norm": 0.4463133215904236, "learning_rate": 0.0004842566508219938, "loss": 1.8074, "step": 26103 }, { "epoch": 0.87, "grad_norm": 0.43989303708076477, "learning_rate": 0.00048424840232739834, "loss": 1.7773, "step": 26104 }, { "epoch": 0.87, "grad_norm": 0.45390138030052185, "learning_rate": 0.0004842401536091525, "loss": 1.8129, "step": 26105 }, { "epoch": 0.87, "grad_norm": 0.45721420645713806, "learning_rate": 0.0004842319046672663, "loss": 1.858, "step": 26106 }, { "epoch": 0.87, "grad_norm": 0.44452613592147827, "learning_rate": 0.0004842236555017496, "loss": 1.7875, "step": 26107 }, { "epoch": 0.87, "grad_norm": 0.43878015875816345, "learning_rate": 0.00048421540611261264, "loss": 1.7681, "step": 26108 }, { "epoch": 0.87, "grad_norm": 0.4732429087162018, "learning_rate": 0.0004842071564998653, "loss": 1.7298, "step": 26109 }, { "epoch": 0.87, "grad_norm": 0.9087619781494141, "learning_rate": 0.00048419890666351757, "loss": 1.8202, "step": 26110 }, { "epoch": 0.87, "grad_norm": 0.4416663646697998, "learning_rate": 0.00048419065660357957, "loss": 1.8204, "step": 26111 }, { "epoch": 0.87, "grad_norm": 0.46058741211891174, "learning_rate": 0.00048418240632006114, "loss": 1.8687, "step": 26112 }, { "epoch": 0.87, "grad_norm": 0.4475310146808624, "learning_rate": 0.0004841741558129725, "loss": 1.7492, "step": 26113 }, { "epoch": 0.87, "grad_norm": 0.45352646708488464, "learning_rate": 0.0004841659050823236, "loss": 1.8228, "step": 26114 }, { "epoch": 0.87, "grad_norm": 0.44555747509002686, "learning_rate": 0.00048415765412812437, "loss": 1.7992, "step": 26115 }, { "epoch": 0.87, "grad_norm": 0.46022170782089233, "learning_rate": 0.00048414940295038493, "loss": 1.7905, "step": 26116 }, { "epoch": 0.87, "grad_norm": 0.46374961733818054, "learning_rate": 0.0004841411515491151, "loss": 1.7983, "step": 26117 }, { "epoch": 0.87, "grad_norm": 0.7941417694091797, "learning_rate": 0.0004841328999243252, "loss": 1.897, "step": 26118 }, { "epoch": 0.87, "grad_norm": 0.43758800625801086, "learning_rate": 0.000484124648076025, "loss": 1.7644, "step": 26119 }, { "epoch": 0.87, "grad_norm": 0.43632861971855164, "learning_rate": 0.0004841163960042247, "loss": 1.783, "step": 26120 }, { "epoch": 0.87, "grad_norm": 0.4566737115383148, "learning_rate": 0.0004841081437089342, "loss": 1.858, "step": 26121 }, { "epoch": 0.87, "grad_norm": 0.43245166540145874, "learning_rate": 0.0004840998911901635, "loss": 1.732, "step": 26122 }, { "epoch": 0.87, "grad_norm": 0.4465698003768921, "learning_rate": 0.00048409163844792265, "loss": 1.8554, "step": 26123 }, { "epoch": 0.87, "grad_norm": 0.44796472787857056, "learning_rate": 0.0004840833854822216, "loss": 1.8233, "step": 26124 }, { "epoch": 0.87, "grad_norm": 0.4300970733165741, "learning_rate": 0.0004840751322930706, "loss": 1.7891, "step": 26125 }, { "epoch": 0.87, "grad_norm": 0.4276273846626282, "learning_rate": 0.0004840668788804794, "loss": 1.777, "step": 26126 }, { "epoch": 0.87, "grad_norm": 0.4404635429382324, "learning_rate": 0.00048405862524445825, "loss": 1.7213, "step": 26127 }, { "epoch": 0.87, "grad_norm": 0.43514835834503174, "learning_rate": 0.00048405037138501695, "loss": 1.8335, "step": 26128 }, { "epoch": 0.87, "grad_norm": 0.45098814368247986, "learning_rate": 0.00048404211730216566, "loss": 1.711, "step": 26129 }, { "epoch": 0.87, "grad_norm": 0.4326532781124115, "learning_rate": 0.00048403386299591435, "loss": 1.7955, "step": 26130 }, { "epoch": 0.87, "grad_norm": 0.4524489939212799, "learning_rate": 0.0004840256084662731, "loss": 1.7708, "step": 26131 }, { "epoch": 0.87, "grad_norm": 0.43683305382728577, "learning_rate": 0.00048401735371325167, "loss": 1.8721, "step": 26132 }, { "epoch": 0.87, "grad_norm": 0.43382370471954346, "learning_rate": 0.00048400909873686046, "loss": 1.8155, "step": 26133 }, { "epoch": 0.87, "grad_norm": 0.43794959783554077, "learning_rate": 0.0004840008435371093, "loss": 1.8337, "step": 26134 }, { "epoch": 0.87, "grad_norm": 0.4333614110946655, "learning_rate": 0.00048399258811400816, "loss": 1.818, "step": 26135 }, { "epoch": 0.87, "grad_norm": 0.4237213730812073, "learning_rate": 0.0004839843324675672, "loss": 1.8255, "step": 26136 }, { "epoch": 0.87, "grad_norm": 0.46480685472488403, "learning_rate": 0.0004839760765977964, "loss": 1.8341, "step": 26137 }, { "epoch": 0.87, "grad_norm": 0.42828911542892456, "learning_rate": 0.00048396782050470576, "loss": 1.8259, "step": 26138 }, { "epoch": 0.87, "grad_norm": 0.4531932771205902, "learning_rate": 0.00048395956418830513, "loss": 1.8235, "step": 26139 }, { "epoch": 0.87, "grad_norm": 0.4488420784473419, "learning_rate": 0.0004839513076486049, "loss": 1.8665, "step": 26140 }, { "epoch": 0.87, "grad_norm": 0.4436931908130646, "learning_rate": 0.00048394305088561476, "loss": 1.8774, "step": 26141 }, { "epoch": 0.87, "grad_norm": 0.4224188029766083, "learning_rate": 0.00048393479389934486, "loss": 1.8127, "step": 26142 }, { "epoch": 0.87, "grad_norm": 0.43980205059051514, "learning_rate": 0.00048392653668980526, "loss": 1.7203, "step": 26143 }, { "epoch": 0.87, "grad_norm": 0.43229272961616516, "learning_rate": 0.00048391827925700593, "loss": 1.7744, "step": 26144 }, { "epoch": 0.87, "grad_norm": 0.434708833694458, "learning_rate": 0.0004839100216009569, "loss": 1.7684, "step": 26145 }, { "epoch": 0.87, "grad_norm": 0.43437719345092773, "learning_rate": 0.0004839017637216683, "loss": 1.8402, "step": 26146 }, { "epoch": 0.87, "grad_norm": 0.4293424189090729, "learning_rate": 0.00048389350561914984, "loss": 1.7979, "step": 26147 }, { "epoch": 0.87, "grad_norm": 0.43992605805397034, "learning_rate": 0.00048388524729341196, "loss": 1.8215, "step": 26148 }, { "epoch": 0.87, "grad_norm": 0.4310612678527832, "learning_rate": 0.00048387698874446434, "loss": 1.8495, "step": 26149 }, { "epoch": 0.87, "grad_norm": 0.43683284521102905, "learning_rate": 0.0004838687299723173, "loss": 1.8491, "step": 26150 }, { "epoch": 0.87, "grad_norm": 0.4341604709625244, "learning_rate": 0.0004838604709769806, "loss": 1.8108, "step": 26151 }, { "epoch": 0.87, "grad_norm": 0.4345417022705078, "learning_rate": 0.0004838522117584644, "loss": 1.8027, "step": 26152 }, { "epoch": 0.87, "grad_norm": 0.4301111102104187, "learning_rate": 0.00048384395231677873, "loss": 1.7687, "step": 26153 }, { "epoch": 0.87, "grad_norm": 0.4426972270011902, "learning_rate": 0.00048383569265193354, "loss": 1.8045, "step": 26154 }, { "epoch": 0.87, "grad_norm": 0.42894020676612854, "learning_rate": 0.000483827432763939, "loss": 1.7547, "step": 26155 }, { "epoch": 0.87, "grad_norm": 0.44961491227149963, "learning_rate": 0.00048381917265280495, "loss": 1.8497, "step": 26156 }, { "epoch": 0.87, "grad_norm": 0.4325878322124481, "learning_rate": 0.00048381091231854156, "loss": 1.7833, "step": 26157 }, { "epoch": 0.87, "grad_norm": 0.4401657283306122, "learning_rate": 0.00048380265176115874, "loss": 1.8527, "step": 26158 }, { "epoch": 0.87, "grad_norm": 0.4314722716808319, "learning_rate": 0.00048379439098066664, "loss": 1.7806, "step": 26159 }, { "epoch": 0.87, "grad_norm": 0.44029587507247925, "learning_rate": 0.00048378612997707526, "loss": 1.7563, "step": 26160 }, { "epoch": 0.87, "grad_norm": 0.44551557302474976, "learning_rate": 0.0004837778687503945, "loss": 1.8184, "step": 26161 }, { "epoch": 0.87, "grad_norm": 0.4307284951210022, "learning_rate": 0.00048376960730063464, "loss": 1.8264, "step": 26162 }, { "epoch": 0.87, "grad_norm": 0.44756945967674255, "learning_rate": 0.00048376134562780543, "loss": 1.8516, "step": 26163 }, { "epoch": 0.87, "grad_norm": 0.4433180093765259, "learning_rate": 0.000483753083731917, "loss": 1.8508, "step": 26164 }, { "epoch": 0.87, "grad_norm": 0.4327514171600342, "learning_rate": 0.00048374482161297946, "loss": 1.7795, "step": 26165 }, { "epoch": 0.87, "grad_norm": 0.441677451133728, "learning_rate": 0.0004837365592710028, "loss": 1.7935, "step": 26166 }, { "epoch": 0.87, "grad_norm": 0.4424203336238861, "learning_rate": 0.000483728296705997, "loss": 1.7985, "step": 26167 }, { "epoch": 0.87, "grad_norm": 0.42704474925994873, "learning_rate": 0.00048372003391797215, "loss": 1.7965, "step": 26168 }, { "epoch": 0.87, "grad_norm": 0.4522201716899872, "learning_rate": 0.0004837117709069382, "loss": 1.7646, "step": 26169 }, { "epoch": 0.87, "grad_norm": 0.4357951879501343, "learning_rate": 0.00048370350767290517, "loss": 1.8198, "step": 26170 }, { "epoch": 0.87, "grad_norm": 0.44806674122810364, "learning_rate": 0.00048369524421588327, "loss": 1.834, "step": 26171 }, { "epoch": 0.87, "grad_norm": 0.44158101081848145, "learning_rate": 0.00048368698053588235, "loss": 1.7904, "step": 26172 }, { "epoch": 0.87, "grad_norm": 0.42000049352645874, "learning_rate": 0.0004836787166329126, "loss": 1.7678, "step": 26173 }, { "epoch": 0.87, "grad_norm": 0.4413134455680847, "learning_rate": 0.00048367045250698377, "loss": 1.846, "step": 26174 }, { "epoch": 0.87, "grad_norm": 0.45572522282600403, "learning_rate": 0.0004836621881581062, "loss": 1.7869, "step": 26175 }, { "epoch": 0.87, "grad_norm": 0.4384426474571228, "learning_rate": 0.00048365392358628976, "loss": 1.7845, "step": 26176 }, { "epoch": 0.87, "grad_norm": 0.45692867040634155, "learning_rate": 0.00048364565879154453, "loss": 1.7839, "step": 26177 }, { "epoch": 0.87, "grad_norm": 0.44471579790115356, "learning_rate": 0.00048363739377388056, "loss": 1.814, "step": 26178 }, { "epoch": 0.87, "grad_norm": 0.4344428777694702, "learning_rate": 0.00048362912853330776, "loss": 1.7975, "step": 26179 }, { "epoch": 0.87, "grad_norm": 0.44214436411857605, "learning_rate": 0.0004836208630698364, "loss": 1.7704, "step": 26180 }, { "epoch": 0.87, "grad_norm": 0.43457359075546265, "learning_rate": 0.0004836125973834762, "loss": 1.7528, "step": 26181 }, { "epoch": 0.87, "grad_norm": 0.43448448181152344, "learning_rate": 0.0004836043314742375, "loss": 1.82, "step": 26182 }, { "epoch": 0.87, "grad_norm": 0.4380146861076355, "learning_rate": 0.00048359606534213007, "loss": 1.733, "step": 26183 }, { "epoch": 0.87, "grad_norm": 0.44880935549736023, "learning_rate": 0.00048358779898716414, "loss": 1.8862, "step": 26184 }, { "epoch": 0.87, "grad_norm": 0.451101154088974, "learning_rate": 0.00048357953240934967, "loss": 1.8476, "step": 26185 }, { "epoch": 0.87, "grad_norm": 0.4670338034629822, "learning_rate": 0.0004835712656086967, "loss": 1.8324, "step": 26186 }, { "epoch": 0.87, "grad_norm": 0.46179503202438354, "learning_rate": 0.0004835629985852152, "loss": 1.8059, "step": 26187 }, { "epoch": 0.87, "grad_norm": 0.4315637946128845, "learning_rate": 0.0004835547313389154, "loss": 1.7753, "step": 26188 }, { "epoch": 0.87, "grad_norm": 0.43913763761520386, "learning_rate": 0.00048354646386980705, "loss": 1.8022, "step": 26189 }, { "epoch": 0.87, "grad_norm": 0.5439850687980652, "learning_rate": 0.0004835381961779004, "loss": 1.7842, "step": 26190 }, { "epoch": 0.87, "grad_norm": 0.4248959720134735, "learning_rate": 0.0004835299282632054, "loss": 1.8265, "step": 26191 }, { "epoch": 0.87, "grad_norm": 0.4577462375164032, "learning_rate": 0.00048352166012573223, "loss": 1.7525, "step": 26192 }, { "epoch": 0.87, "grad_norm": 0.44956302642822266, "learning_rate": 0.0004835133917654907, "loss": 1.8534, "step": 26193 }, { "epoch": 0.87, "grad_norm": 0.4291062355041504, "learning_rate": 0.0004835051231824909, "loss": 1.865, "step": 26194 }, { "epoch": 0.87, "grad_norm": 0.42700713872909546, "learning_rate": 0.00048349685437674305, "loss": 1.8068, "step": 26195 }, { "epoch": 0.87, "grad_norm": 0.4354592263698578, "learning_rate": 0.00048348858534825697, "loss": 1.8385, "step": 26196 }, { "epoch": 0.87, "grad_norm": 0.42377033829689026, "learning_rate": 0.0004834803160970428, "loss": 1.8137, "step": 26197 }, { "epoch": 0.87, "grad_norm": 0.4256226420402527, "learning_rate": 0.0004834720466231106, "loss": 1.8061, "step": 26198 }, { "epoch": 0.87, "grad_norm": 0.44342973828315735, "learning_rate": 0.0004834637769264703, "loss": 1.7833, "step": 26199 }, { "epoch": 0.87, "grad_norm": 0.43004971742630005, "learning_rate": 0.00048345550700713206, "loss": 1.81, "step": 26200 }, { "epoch": 0.87, "grad_norm": 0.4469892084598541, "learning_rate": 0.00048344723686510584, "loss": 1.8036, "step": 26201 }, { "epoch": 0.87, "grad_norm": 0.46264222264289856, "learning_rate": 0.0004834389665004017, "loss": 1.7943, "step": 26202 }, { "epoch": 0.87, "grad_norm": 0.4246823787689209, "learning_rate": 0.0004834306959130297, "loss": 1.7748, "step": 26203 }, { "epoch": 0.87, "grad_norm": 0.4267469048500061, "learning_rate": 0.00048342242510299985, "loss": 1.788, "step": 26204 }, { "epoch": 0.87, "grad_norm": 0.41904017329216003, "learning_rate": 0.0004834141540703222, "loss": 1.8029, "step": 26205 }, { "epoch": 0.87, "grad_norm": 0.44356077909469604, "learning_rate": 0.0004834058828150068, "loss": 1.8649, "step": 26206 }, { "epoch": 0.87, "grad_norm": 0.4482700228691101, "learning_rate": 0.0004833976113370637, "loss": 1.733, "step": 26207 }, { "epoch": 0.87, "grad_norm": 0.4283629357814789, "learning_rate": 0.0004833893396365029, "loss": 1.8271, "step": 26208 }, { "epoch": 0.87, "grad_norm": 0.449526846408844, "learning_rate": 0.0004833810677133344, "loss": 1.819, "step": 26209 }, { "epoch": 0.87, "grad_norm": 0.4386094808578491, "learning_rate": 0.00048337279556756845, "loss": 1.794, "step": 26210 }, { "epoch": 0.87, "grad_norm": 0.4444522559642792, "learning_rate": 0.0004833645231992148, "loss": 1.8368, "step": 26211 }, { "epoch": 0.87, "grad_norm": 0.44581252336502075, "learning_rate": 0.0004833562506082837, "loss": 1.8662, "step": 26212 }, { "epoch": 0.87, "grad_norm": 0.4485180675983429, "learning_rate": 0.0004833479777947852, "loss": 1.7911, "step": 26213 }, { "epoch": 0.87, "grad_norm": 0.4388943314552307, "learning_rate": 0.00048333970475872913, "loss": 1.7356, "step": 26214 }, { "epoch": 0.87, "grad_norm": 0.44560718536376953, "learning_rate": 0.00048333143150012576, "loss": 1.7279, "step": 26215 }, { "epoch": 0.87, "grad_norm": 0.46279653906822205, "learning_rate": 0.000483323158018985, "loss": 1.831, "step": 26216 }, { "epoch": 0.87, "grad_norm": 0.4621562659740448, "learning_rate": 0.000483314884315317, "loss": 1.8643, "step": 26217 }, { "epoch": 0.87, "grad_norm": 0.47226619720458984, "learning_rate": 0.0004833066103891317, "loss": 1.8234, "step": 26218 }, { "epoch": 0.87, "grad_norm": 0.43517544865608215, "learning_rate": 0.00048329833624043914, "loss": 1.7918, "step": 26219 }, { "epoch": 0.87, "grad_norm": 0.44109728932380676, "learning_rate": 0.00048329006186924957, "loss": 1.8078, "step": 26220 }, { "epoch": 0.87, "grad_norm": 0.43591421842575073, "learning_rate": 0.0004832817872755727, "loss": 1.8432, "step": 26221 }, { "epoch": 0.87, "grad_norm": 0.46709930896759033, "learning_rate": 0.0004832735124594188, "loss": 1.8193, "step": 26222 }, { "epoch": 0.87, "grad_norm": 0.45733460783958435, "learning_rate": 0.0004832652374207979, "loss": 1.7917, "step": 26223 }, { "epoch": 0.87, "grad_norm": 0.43674027919769287, "learning_rate": 0.0004832569621597199, "loss": 1.843, "step": 26224 }, { "epoch": 0.87, "grad_norm": 0.45396316051483154, "learning_rate": 0.00048324868667619505, "loss": 1.8797, "step": 26225 }, { "epoch": 0.87, "grad_norm": 0.4456064999103546, "learning_rate": 0.00048324041097023323, "loss": 1.8178, "step": 26226 }, { "epoch": 0.87, "grad_norm": 0.43565085530281067, "learning_rate": 0.0004832321350418446, "loss": 1.8813, "step": 26227 }, { "epoch": 0.87, "grad_norm": 0.4596591889858246, "learning_rate": 0.00048322385889103915, "loss": 1.8333, "step": 26228 }, { "epoch": 0.87, "grad_norm": 0.4410904049873352, "learning_rate": 0.0004832155825178269, "loss": 1.7145, "step": 26229 }, { "epoch": 0.87, "grad_norm": 0.448759526014328, "learning_rate": 0.000483207305922218, "loss": 1.817, "step": 26230 }, { "epoch": 0.87, "grad_norm": 0.4370960295200348, "learning_rate": 0.0004831990291042223, "loss": 1.8621, "step": 26231 }, { "epoch": 0.87, "grad_norm": 0.48406755924224854, "learning_rate": 0.0004831907520638501, "loss": 1.8248, "step": 26232 }, { "epoch": 0.87, "grad_norm": 0.43791893124580383, "learning_rate": 0.00048318247480111127, "loss": 1.782, "step": 26233 }, { "epoch": 0.87, "grad_norm": 0.44150441884994507, "learning_rate": 0.00048317419731601585, "loss": 1.8117, "step": 26234 }, { "epoch": 0.87, "grad_norm": 0.4517729580402374, "learning_rate": 0.00048316591960857405, "loss": 1.8041, "step": 26235 }, { "epoch": 0.87, "grad_norm": 0.4419459104537964, "learning_rate": 0.0004831576416787957, "loss": 1.744, "step": 26236 }, { "epoch": 0.87, "grad_norm": 0.42860841751098633, "learning_rate": 0.00048314936352669105, "loss": 1.764, "step": 26237 }, { "epoch": 0.87, "grad_norm": 0.43976104259490967, "learning_rate": 0.0004831410851522701, "loss": 1.8055, "step": 26238 }, { "epoch": 0.87, "grad_norm": 0.44261860847473145, "learning_rate": 0.00048313280655554267, "loss": 1.7791, "step": 26239 }, { "epoch": 0.87, "grad_norm": 0.46662265062332153, "learning_rate": 0.00048312452773651907, "loss": 1.8319, "step": 26240 }, { "epoch": 0.87, "grad_norm": 0.43815532326698303, "learning_rate": 0.00048311624869520935, "loss": 1.7983, "step": 26241 }, { "epoch": 0.87, "grad_norm": 0.46344393491744995, "learning_rate": 0.00048310796943162344, "loss": 1.824, "step": 26242 }, { "epoch": 0.87, "grad_norm": 0.4946453273296356, "learning_rate": 0.0004830996899457715, "loss": 1.9348, "step": 26243 }, { "epoch": 0.87, "grad_norm": 0.45914825797080994, "learning_rate": 0.00048309141023766345, "loss": 1.7854, "step": 26244 }, { "epoch": 0.87, "grad_norm": 0.45516496896743774, "learning_rate": 0.0004830831303073094, "loss": 1.7853, "step": 26245 }, { "epoch": 0.87, "grad_norm": 0.4465012848377228, "learning_rate": 0.0004830748501547194, "loss": 1.8675, "step": 26246 }, { "epoch": 0.87, "grad_norm": 0.43968597054481506, "learning_rate": 0.00048306656977990353, "loss": 1.7867, "step": 26247 }, { "epoch": 0.87, "grad_norm": 0.4479396939277649, "learning_rate": 0.0004830582891828718, "loss": 1.8321, "step": 26248 }, { "epoch": 0.87, "grad_norm": 0.4289889335632324, "learning_rate": 0.0004830500083636343, "loss": 1.8841, "step": 26249 }, { "epoch": 0.87, "grad_norm": 0.42557141184806824, "learning_rate": 0.0004830417273222011, "loss": 1.8522, "step": 26250 }, { "epoch": 0.87, "grad_norm": 0.4503977596759796, "learning_rate": 0.00048303344605858206, "loss": 1.8356, "step": 26251 }, { "epoch": 0.87, "grad_norm": 0.4371325969696045, "learning_rate": 0.00048302516457278757, "loss": 1.8498, "step": 26252 }, { "epoch": 0.87, "grad_norm": 0.4341447651386261, "learning_rate": 0.0004830168828648274, "loss": 1.8431, "step": 26253 }, { "epoch": 0.87, "grad_norm": 0.4642527401447296, "learning_rate": 0.0004830086009347117, "loss": 1.8063, "step": 26254 }, { "epoch": 0.87, "grad_norm": 0.4325704574584961, "learning_rate": 0.0004830003187824506, "loss": 1.8799, "step": 26255 }, { "epoch": 0.87, "grad_norm": 0.43312814831733704, "learning_rate": 0.000482992036408054, "loss": 1.8478, "step": 26256 }, { "epoch": 0.87, "grad_norm": 0.47974249720573425, "learning_rate": 0.00048298375381153206, "loss": 1.7416, "step": 26257 }, { "epoch": 0.87, "grad_norm": 0.43319636583328247, "learning_rate": 0.00048297547099289477, "loss": 1.7936, "step": 26258 }, { "epoch": 0.87, "grad_norm": 0.4365485608577728, "learning_rate": 0.0004829671879521522, "loss": 1.769, "step": 26259 }, { "epoch": 0.87, "grad_norm": 0.47107648849487305, "learning_rate": 0.00048295890468931453, "loss": 1.7676, "step": 26260 }, { "epoch": 0.87, "grad_norm": 0.4433414340019226, "learning_rate": 0.00048295062120439164, "loss": 1.7963, "step": 26261 }, { "epoch": 0.87, "grad_norm": 0.44294437766075134, "learning_rate": 0.0004829423374973936, "loss": 1.8203, "step": 26262 }, { "epoch": 0.87, "grad_norm": 0.43803057074546814, "learning_rate": 0.00048293405356833057, "loss": 1.7929, "step": 26263 }, { "epoch": 0.87, "grad_norm": 0.6523570418357849, "learning_rate": 0.00048292576941721255, "loss": 1.8527, "step": 26264 }, { "epoch": 0.87, "grad_norm": 0.445722758769989, "learning_rate": 0.00048291748504404966, "loss": 1.805, "step": 26265 }, { "epoch": 0.87, "grad_norm": 0.46694663166999817, "learning_rate": 0.00048290920044885175, "loss": 1.8392, "step": 26266 }, { "epoch": 0.87, "grad_norm": 0.44925349950790405, "learning_rate": 0.0004829009156316292, "loss": 1.8337, "step": 26267 }, { "epoch": 0.87, "grad_norm": 0.4453401267528534, "learning_rate": 0.0004828926305923918, "loss": 1.7998, "step": 26268 }, { "epoch": 0.87, "grad_norm": 0.46506503224372864, "learning_rate": 0.0004828843453311497, "loss": 1.8772, "step": 26269 }, { "epoch": 0.87, "grad_norm": 0.46185383200645447, "learning_rate": 0.00048287605984791295, "loss": 1.8315, "step": 26270 }, { "epoch": 0.87, "grad_norm": 0.4409047067165375, "learning_rate": 0.0004828677741426915, "loss": 1.8352, "step": 26271 }, { "epoch": 0.87, "grad_norm": 0.47008609771728516, "learning_rate": 0.0004828594882154957, "loss": 1.7921, "step": 26272 }, { "epoch": 0.87, "grad_norm": 0.44074931740760803, "learning_rate": 0.0004828512020663354, "loss": 1.7976, "step": 26273 }, { "epoch": 0.87, "grad_norm": 0.6318185329437256, "learning_rate": 0.00048284291569522053, "loss": 1.7965, "step": 26274 }, { "epoch": 0.87, "grad_norm": 0.45119139552116394, "learning_rate": 0.00048283462910216144, "loss": 1.7891, "step": 26275 }, { "epoch": 0.87, "grad_norm": 0.4606214761734009, "learning_rate": 0.000482826342287168, "loss": 1.7941, "step": 26276 }, { "epoch": 0.87, "grad_norm": 0.45095255970954895, "learning_rate": 0.0004828180552502503, "loss": 1.8454, "step": 26277 }, { "epoch": 0.87, "grad_norm": 0.4295823574066162, "learning_rate": 0.00048280976799141846, "loss": 1.7438, "step": 26278 }, { "epoch": 0.87, "grad_norm": 0.46109580993652344, "learning_rate": 0.0004828014805106825, "loss": 1.8418, "step": 26279 }, { "epoch": 0.87, "grad_norm": 0.43764355778694153, "learning_rate": 0.0004827931928080525, "loss": 1.7917, "step": 26280 }, { "epoch": 0.87, "grad_norm": 0.445537269115448, "learning_rate": 0.0004827849048835384, "loss": 1.8799, "step": 26281 }, { "epoch": 0.87, "grad_norm": 0.43767040967941284, "learning_rate": 0.00048277661673715047, "loss": 1.868, "step": 26282 }, { "epoch": 0.87, "grad_norm": 0.47310858964920044, "learning_rate": 0.00048276832836889854, "loss": 1.7874, "step": 26283 }, { "epoch": 0.87, "grad_norm": 0.46707218885421753, "learning_rate": 0.0004827600397787929, "loss": 1.7977, "step": 26284 }, { "epoch": 0.87, "grad_norm": 0.43737247586250305, "learning_rate": 0.0004827517509668434, "loss": 1.8603, "step": 26285 }, { "epoch": 0.87, "grad_norm": 0.4329366087913513, "learning_rate": 0.0004827434619330603, "loss": 1.7698, "step": 26286 }, { "epoch": 0.87, "grad_norm": 0.4263293445110321, "learning_rate": 0.0004827351726774535, "loss": 1.807, "step": 26287 }, { "epoch": 0.87, "grad_norm": 0.4553930461406708, "learning_rate": 0.00048272688320003307, "loss": 1.7485, "step": 26288 }, { "epoch": 0.87, "grad_norm": 0.45569661259651184, "learning_rate": 0.00048271859350080924, "loss": 1.8827, "step": 26289 }, { "epoch": 0.87, "grad_norm": 0.44069674611091614, "learning_rate": 0.0004827103035797919, "loss": 1.8298, "step": 26290 }, { "epoch": 0.87, "grad_norm": 0.44642457365989685, "learning_rate": 0.0004827020134369912, "loss": 1.8594, "step": 26291 }, { "epoch": 0.87, "grad_norm": 0.43444421887397766, "learning_rate": 0.0004826937230724172, "loss": 1.8577, "step": 26292 }, { "epoch": 0.87, "grad_norm": 0.4337841868400574, "learning_rate": 0.0004826854324860799, "loss": 1.8854, "step": 26293 }, { "epoch": 0.87, "grad_norm": 0.4193272888660431, "learning_rate": 0.0004826771416779894, "loss": 1.7943, "step": 26294 }, { "epoch": 0.87, "grad_norm": 0.42779141664505005, "learning_rate": 0.0004826688506481558, "loss": 1.7277, "step": 26295 }, { "epoch": 0.87, "grad_norm": 0.44062545895576477, "learning_rate": 0.0004826605593965891, "loss": 1.8492, "step": 26296 }, { "epoch": 0.87, "grad_norm": 0.4334719777107239, "learning_rate": 0.0004826522679232994, "loss": 1.8313, "step": 26297 }, { "epoch": 0.87, "grad_norm": 0.4270193576812744, "learning_rate": 0.0004826439762282967, "loss": 1.7909, "step": 26298 }, { "epoch": 0.87, "grad_norm": 0.4430638551712036, "learning_rate": 0.0004826356843115912, "loss": 1.7523, "step": 26299 }, { "epoch": 0.88, "grad_norm": 0.43800270557403564, "learning_rate": 0.00048262739217319286, "loss": 1.8331, "step": 26300 }, { "epoch": 0.88, "grad_norm": 0.43686404824256897, "learning_rate": 0.0004826190998131118, "loss": 1.8479, "step": 26301 }, { "epoch": 0.88, "grad_norm": 0.4812026619911194, "learning_rate": 0.00048261080723135805, "loss": 1.8861, "step": 26302 }, { "epoch": 0.88, "grad_norm": 0.42934292554855347, "learning_rate": 0.0004826025144279417, "loss": 1.7516, "step": 26303 }, { "epoch": 0.88, "grad_norm": 0.4344969391822815, "learning_rate": 0.00048259422140287274, "loss": 1.7875, "step": 26304 }, { "epoch": 0.88, "grad_norm": 0.4735506474971771, "learning_rate": 0.0004825859281561614, "loss": 1.8327, "step": 26305 }, { "epoch": 0.88, "grad_norm": 0.4698278605937958, "learning_rate": 0.0004825776346878176, "loss": 1.8954, "step": 26306 }, { "epoch": 0.88, "grad_norm": 0.4510119557380676, "learning_rate": 0.00048256934099785145, "loss": 1.8637, "step": 26307 }, { "epoch": 0.88, "grad_norm": 0.4632022976875305, "learning_rate": 0.000482561047086273, "loss": 1.8333, "step": 26308 }, { "epoch": 0.88, "grad_norm": 0.4431914687156677, "learning_rate": 0.0004825527529530923, "loss": 1.8622, "step": 26309 }, { "epoch": 0.88, "grad_norm": 0.4501257836818695, "learning_rate": 0.0004825444585983196, "loss": 1.7799, "step": 26310 }, { "epoch": 0.88, "grad_norm": 0.4712890088558197, "learning_rate": 0.0004825361640219646, "loss": 1.7035, "step": 26311 }, { "epoch": 0.88, "grad_norm": 0.43393635749816895, "learning_rate": 0.0004825278692240378, "loss": 1.8691, "step": 26312 }, { "epoch": 0.88, "grad_norm": 0.4437945783138275, "learning_rate": 0.0004825195742045489, "loss": 1.8295, "step": 26313 }, { "epoch": 0.88, "grad_norm": 0.4573616087436676, "learning_rate": 0.0004825112789635083, "loss": 1.7605, "step": 26314 }, { "epoch": 0.88, "grad_norm": 0.4314330518245697, "learning_rate": 0.0004825029835009258, "loss": 1.7614, "step": 26315 }, { "epoch": 0.88, "grad_norm": 0.45485228300094604, "learning_rate": 0.00048249468781681157, "loss": 1.8171, "step": 26316 }, { "epoch": 0.88, "grad_norm": 0.4374728798866272, "learning_rate": 0.00048248639191117573, "loss": 1.776, "step": 26317 }, { "epoch": 0.88, "grad_norm": 0.44867590069770813, "learning_rate": 0.0004824780957840282, "loss": 1.7965, "step": 26318 }, { "epoch": 0.88, "grad_norm": 0.4454669952392578, "learning_rate": 0.00048246979943537924, "loss": 1.883, "step": 26319 }, { "epoch": 0.88, "grad_norm": 0.4298781156539917, "learning_rate": 0.0004824615028652388, "loss": 1.8551, "step": 26320 }, { "epoch": 0.88, "grad_norm": 0.43623560667037964, "learning_rate": 0.000482453206073617, "loss": 1.8614, "step": 26321 }, { "epoch": 0.88, "grad_norm": 0.4410524368286133, "learning_rate": 0.0004824449090605238, "loss": 1.8645, "step": 26322 }, { "epoch": 0.88, "grad_norm": 0.4476930797100067, "learning_rate": 0.00048243661182596943, "loss": 1.742, "step": 26323 }, { "epoch": 0.88, "grad_norm": 0.42834722995758057, "learning_rate": 0.00048242831436996395, "loss": 1.762, "step": 26324 }, { "epoch": 0.88, "grad_norm": 0.4351707994937897, "learning_rate": 0.00048242001669251733, "loss": 1.7993, "step": 26325 }, { "epoch": 0.88, "grad_norm": 0.43121907114982605, "learning_rate": 0.00048241171879363965, "loss": 1.7609, "step": 26326 }, { "epoch": 0.88, "grad_norm": 0.4327845275402069, "learning_rate": 0.0004824034206733411, "loss": 1.7955, "step": 26327 }, { "epoch": 0.88, "grad_norm": 0.44458892941474915, "learning_rate": 0.0004823951223316316, "loss": 1.8139, "step": 26328 }, { "epoch": 0.88, "grad_norm": 0.46145227551460266, "learning_rate": 0.0004823868237685213, "loss": 1.8534, "step": 26329 }, { "epoch": 0.88, "grad_norm": 0.4585248529911041, "learning_rate": 0.0004823785249840203, "loss": 1.8921, "step": 26330 }, { "epoch": 0.88, "grad_norm": 0.45361557602882385, "learning_rate": 0.00048237022597813863, "loss": 1.8363, "step": 26331 }, { "epoch": 0.88, "grad_norm": 0.4375653862953186, "learning_rate": 0.0004823619267508864, "loss": 1.799, "step": 26332 }, { "epoch": 0.88, "grad_norm": 0.4536852240562439, "learning_rate": 0.0004823536273022736, "loss": 1.8271, "step": 26333 }, { "epoch": 0.88, "grad_norm": 0.4389244318008423, "learning_rate": 0.00048234532763231035, "loss": 1.8635, "step": 26334 }, { "epoch": 0.88, "grad_norm": 0.4438253343105316, "learning_rate": 0.00048233702774100687, "loss": 1.7704, "step": 26335 }, { "epoch": 0.88, "grad_norm": 0.4576173722743988, "learning_rate": 0.000482328727628373, "loss": 1.8848, "step": 26336 }, { "epoch": 0.88, "grad_norm": 0.4625169038772583, "learning_rate": 0.0004823204272944189, "loss": 1.7677, "step": 26337 }, { "epoch": 0.88, "grad_norm": 0.4414019286632538, "learning_rate": 0.0004823121267391547, "loss": 1.8701, "step": 26338 }, { "epoch": 0.88, "grad_norm": 0.4378298223018646, "learning_rate": 0.00048230382596259054, "loss": 1.8871, "step": 26339 }, { "epoch": 0.88, "grad_norm": 0.4323764443397522, "learning_rate": 0.00048229552496473633, "loss": 1.8841, "step": 26340 }, { "epoch": 0.88, "grad_norm": 0.4238418638706207, "learning_rate": 0.0004822872237456021, "loss": 1.8448, "step": 26341 }, { "epoch": 0.88, "grad_norm": 0.43051618337631226, "learning_rate": 0.0004822789223051982, "loss": 1.8637, "step": 26342 }, { "epoch": 0.88, "grad_norm": 0.4636939465999603, "learning_rate": 0.0004822706206435344, "loss": 1.8526, "step": 26343 }, { "epoch": 0.88, "grad_norm": 0.442720502614975, "learning_rate": 0.00048226231876062105, "loss": 1.8645, "step": 26344 }, { "epoch": 0.88, "grad_norm": 0.4270060956478119, "learning_rate": 0.00048225401665646803, "loss": 1.7682, "step": 26345 }, { "epoch": 0.88, "grad_norm": 0.4365774393081665, "learning_rate": 0.0004822457143310855, "loss": 1.7872, "step": 26346 }, { "epoch": 0.88, "grad_norm": 0.4498539865016937, "learning_rate": 0.00048223741178448357, "loss": 1.8313, "step": 26347 }, { "epoch": 0.88, "grad_norm": 0.43725457787513733, "learning_rate": 0.00048222910901667224, "loss": 1.8653, "step": 26348 }, { "epoch": 0.88, "grad_norm": 0.42425888776779175, "learning_rate": 0.0004822208060276616, "loss": 1.7808, "step": 26349 }, { "epoch": 0.88, "grad_norm": 0.4336625933647156, "learning_rate": 0.00048221250281746175, "loss": 1.7671, "step": 26350 }, { "epoch": 0.88, "grad_norm": 0.4314812421798706, "learning_rate": 0.00048220419938608275, "loss": 1.7794, "step": 26351 }, { "epoch": 0.88, "grad_norm": 0.4520663917064667, "learning_rate": 0.0004821958957335348, "loss": 1.7726, "step": 26352 }, { "epoch": 0.88, "grad_norm": 0.46085047721862793, "learning_rate": 0.0004821875918598278, "loss": 1.7727, "step": 26353 }, { "epoch": 0.88, "grad_norm": 0.44529569149017334, "learning_rate": 0.000482179287764972, "loss": 1.7748, "step": 26354 }, { "epoch": 0.88, "grad_norm": 0.46503838896751404, "learning_rate": 0.0004821709834489773, "loss": 1.8159, "step": 26355 }, { "epoch": 0.88, "grad_norm": 0.4204358458518982, "learning_rate": 0.00048216267891185383, "loss": 1.8022, "step": 26356 }, { "epoch": 0.88, "grad_norm": 0.44485390186309814, "learning_rate": 0.0004821543741536118, "loss": 1.786, "step": 26357 }, { "epoch": 0.88, "grad_norm": 0.45308631658554077, "learning_rate": 0.000482146069174261, "loss": 1.8351, "step": 26358 }, { "epoch": 0.88, "grad_norm": 0.43999016284942627, "learning_rate": 0.00048213776397381194, "loss": 1.8456, "step": 26359 }, { "epoch": 0.88, "grad_norm": 0.458575963973999, "learning_rate": 0.0004821294585522744, "loss": 1.7841, "step": 26360 }, { "epoch": 0.88, "grad_norm": 0.44586828351020813, "learning_rate": 0.0004821211529096586, "loss": 1.8162, "step": 26361 }, { "epoch": 0.88, "grad_norm": 0.42935076355934143, "learning_rate": 0.0004821128470459744, "loss": 1.8295, "step": 26362 }, { "epoch": 0.88, "grad_norm": 0.46169513463974, "learning_rate": 0.0004821045409612321, "loss": 1.8666, "step": 26363 }, { "epoch": 0.88, "grad_norm": 0.4411247968673706, "learning_rate": 0.0004820962346554417, "loss": 1.8571, "step": 26364 }, { "epoch": 0.88, "grad_norm": 0.4503658711910248, "learning_rate": 0.00048208792812861343, "loss": 1.8187, "step": 26365 }, { "epoch": 0.88, "grad_norm": 0.45407959818840027, "learning_rate": 0.00048207962138075713, "loss": 1.8215, "step": 26366 }, { "epoch": 0.88, "grad_norm": 0.4671412706375122, "learning_rate": 0.000482071314411883, "loss": 1.8196, "step": 26367 }, { "epoch": 0.88, "grad_norm": 0.4414776563644409, "learning_rate": 0.0004820630072220012, "loss": 1.7452, "step": 26368 }, { "epoch": 0.88, "grad_norm": 0.44453418254852295, "learning_rate": 0.00048205469981112164, "loss": 1.7993, "step": 26369 }, { "epoch": 0.88, "grad_norm": 0.4358352720737457, "learning_rate": 0.0004820463921792546, "loss": 1.7795, "step": 26370 }, { "epoch": 0.88, "grad_norm": 0.4443524479866028, "learning_rate": 0.00048203808432641, "loss": 1.9297, "step": 26371 }, { "epoch": 0.88, "grad_norm": 0.45718318223953247, "learning_rate": 0.00048202977625259793, "loss": 1.7991, "step": 26372 }, { "epoch": 0.88, "grad_norm": 0.4244239330291748, "learning_rate": 0.0004820214679578286, "loss": 1.7476, "step": 26373 }, { "epoch": 0.88, "grad_norm": 0.42850351333618164, "learning_rate": 0.000482013159442112, "loss": 1.7979, "step": 26374 }, { "epoch": 0.88, "grad_norm": 0.42765024304389954, "learning_rate": 0.00048200485070545824, "loss": 1.7856, "step": 26375 }, { "epoch": 0.88, "grad_norm": 0.43552812933921814, "learning_rate": 0.0004819965417478775, "loss": 1.7426, "step": 26376 }, { "epoch": 0.88, "grad_norm": 0.43307220935821533, "learning_rate": 0.0004819882325693797, "loss": 1.7573, "step": 26377 }, { "epoch": 0.88, "grad_norm": 0.45114952325820923, "learning_rate": 0.000481979923169975, "loss": 1.8481, "step": 26378 }, { "epoch": 0.88, "grad_norm": 0.44340357184410095, "learning_rate": 0.00048197161354967345, "loss": 1.7943, "step": 26379 }, { "epoch": 0.88, "grad_norm": 0.44901999831199646, "learning_rate": 0.0004819633037084852, "loss": 1.7934, "step": 26380 }, { "epoch": 0.88, "grad_norm": 0.4420660734176636, "learning_rate": 0.00048195499364642034, "loss": 1.7405, "step": 26381 }, { "epoch": 0.88, "grad_norm": 0.4416554868221283, "learning_rate": 0.0004819466833634889, "loss": 1.8714, "step": 26382 }, { "epoch": 0.88, "grad_norm": 0.4557141363620758, "learning_rate": 0.000481938372859701, "loss": 1.8541, "step": 26383 }, { "epoch": 0.88, "grad_norm": 0.45319250226020813, "learning_rate": 0.0004819300621350668, "loss": 1.829, "step": 26384 }, { "epoch": 0.88, "grad_norm": 0.46119117736816406, "learning_rate": 0.0004819217511895962, "loss": 1.7969, "step": 26385 }, { "epoch": 0.88, "grad_norm": 0.44883450865745544, "learning_rate": 0.0004819134400232995, "loss": 1.8218, "step": 26386 }, { "epoch": 0.88, "grad_norm": 0.42951980233192444, "learning_rate": 0.0004819051286361866, "loss": 1.7902, "step": 26387 }, { "epoch": 0.88, "grad_norm": 0.44852837920188904, "learning_rate": 0.0004818968170282677, "loss": 1.8146, "step": 26388 }, { "epoch": 0.88, "grad_norm": 0.46008220314979553, "learning_rate": 0.0004818885051995528, "loss": 1.8517, "step": 26389 }, { "epoch": 0.88, "grad_norm": 0.4514389932155609, "learning_rate": 0.0004818801931500522, "loss": 1.7994, "step": 26390 }, { "epoch": 0.88, "grad_norm": 0.4357602596282959, "learning_rate": 0.0004818718808797758, "loss": 1.7308, "step": 26391 }, { "epoch": 0.88, "grad_norm": 0.43287956714630127, "learning_rate": 0.0004818635683887336, "loss": 1.8098, "step": 26392 }, { "epoch": 0.88, "grad_norm": 0.43991604447364807, "learning_rate": 0.00048185525567693595, "loss": 1.756, "step": 26393 }, { "epoch": 0.88, "grad_norm": 0.4444347023963928, "learning_rate": 0.0004818469427443928, "loss": 1.8113, "step": 26394 }, { "epoch": 0.88, "grad_norm": 0.48444563150405884, "learning_rate": 0.00048183862959111427, "loss": 1.8277, "step": 26395 }, { "epoch": 0.88, "grad_norm": 0.4625689685344696, "learning_rate": 0.00048183031621711037, "loss": 1.7553, "step": 26396 }, { "epoch": 0.88, "grad_norm": 0.4328466057777405, "learning_rate": 0.0004818220026223913, "loss": 1.8359, "step": 26397 }, { "epoch": 0.88, "grad_norm": 0.43357259035110474, "learning_rate": 0.0004818136888069671, "loss": 1.7967, "step": 26398 }, { "epoch": 0.88, "grad_norm": 0.45012232661247253, "learning_rate": 0.0004818053747708479, "loss": 1.9042, "step": 26399 }, { "epoch": 0.88, "grad_norm": 0.43685808777809143, "learning_rate": 0.00048179706051404376, "loss": 1.8201, "step": 26400 }, { "epoch": 0.88, "grad_norm": 0.4359135627746582, "learning_rate": 0.00048178874603656475, "loss": 1.8005, "step": 26401 }, { "epoch": 0.88, "grad_norm": 0.436851441860199, "learning_rate": 0.00048178043133842105, "loss": 1.8892, "step": 26402 }, { "epoch": 0.88, "grad_norm": 0.46677166223526, "learning_rate": 0.00048177211641962263, "loss": 1.8178, "step": 26403 }, { "epoch": 0.88, "grad_norm": 0.46272554993629456, "learning_rate": 0.0004817638012801796, "loss": 1.744, "step": 26404 }, { "epoch": 0.88, "grad_norm": 0.4371448755264282, "learning_rate": 0.0004817554859201021, "loss": 1.7938, "step": 26405 }, { "epoch": 0.88, "grad_norm": 0.4351428151130676, "learning_rate": 0.0004817471703394003, "loss": 1.8607, "step": 26406 }, { "epoch": 0.88, "grad_norm": 0.44788017868995667, "learning_rate": 0.00048173885453808423, "loss": 1.8176, "step": 26407 }, { "epoch": 0.88, "grad_norm": 0.4485759735107422, "learning_rate": 0.00048173053851616394, "loss": 1.8284, "step": 26408 }, { "epoch": 0.88, "grad_norm": 0.43505164980888367, "learning_rate": 0.0004817222222736495, "loss": 1.8638, "step": 26409 }, { "epoch": 0.88, "grad_norm": 0.44155633449554443, "learning_rate": 0.00048171390581055107, "loss": 1.7345, "step": 26410 }, { "epoch": 0.88, "grad_norm": 0.5123941898345947, "learning_rate": 0.00048170558912687876, "loss": 1.8194, "step": 26411 }, { "epoch": 0.88, "grad_norm": 0.45874372124671936, "learning_rate": 0.0004816972722226426, "loss": 1.8496, "step": 26412 }, { "epoch": 0.88, "grad_norm": 0.4594474136829376, "learning_rate": 0.0004816889550978528, "loss": 1.866, "step": 26413 }, { "epoch": 0.88, "grad_norm": 0.42350536584854126, "learning_rate": 0.0004816806377525193, "loss": 1.8065, "step": 26414 }, { "epoch": 0.88, "grad_norm": 0.4464280903339386, "learning_rate": 0.00048167232018665225, "loss": 1.8687, "step": 26415 }, { "epoch": 0.88, "grad_norm": 0.6703423857688904, "learning_rate": 0.00048166400240026193, "loss": 1.8649, "step": 26416 }, { "epoch": 0.88, "grad_norm": 0.4440426230430603, "learning_rate": 0.00048165568439335817, "loss": 1.8281, "step": 26417 }, { "epoch": 0.88, "grad_norm": 0.4523119032382965, "learning_rate": 0.00048164736616595116, "loss": 1.8337, "step": 26418 }, { "epoch": 0.88, "grad_norm": 0.44350406527519226, "learning_rate": 0.00048163904771805104, "loss": 1.8066, "step": 26419 }, { "epoch": 0.88, "grad_norm": 0.4462971091270447, "learning_rate": 0.00048163072904966784, "loss": 1.755, "step": 26420 }, { "epoch": 0.88, "grad_norm": 0.44588419795036316, "learning_rate": 0.0004816224101608118, "loss": 1.7715, "step": 26421 }, { "epoch": 0.88, "grad_norm": 0.4533478915691376, "learning_rate": 0.0004816140910514928, "loss": 1.7994, "step": 26422 }, { "epoch": 0.88, "grad_norm": 0.4341123104095459, "learning_rate": 0.00048160577172172116, "loss": 1.7892, "step": 26423 }, { "epoch": 0.88, "grad_norm": 0.43407025933265686, "learning_rate": 0.0004815974521715068, "loss": 1.7835, "step": 26424 }, { "epoch": 0.88, "grad_norm": 0.4613899290561676, "learning_rate": 0.0004815891324008599, "loss": 1.838, "step": 26425 }, { "epoch": 0.88, "grad_norm": 0.4656774401664734, "learning_rate": 0.00048158081240979054, "loss": 1.8188, "step": 26426 }, { "epoch": 0.88, "grad_norm": 0.4589739739894867, "learning_rate": 0.0004815724921983088, "loss": 1.8516, "step": 26427 }, { "epoch": 0.88, "grad_norm": 0.4397297203540802, "learning_rate": 0.00048156417176642484, "loss": 1.7245, "step": 26428 }, { "epoch": 0.88, "grad_norm": 0.43087857961654663, "learning_rate": 0.0004815558511141488, "loss": 1.7461, "step": 26429 }, { "epoch": 0.88, "grad_norm": 0.45615172386169434, "learning_rate": 0.00048154753024149056, "loss": 1.8086, "step": 26430 }, { "epoch": 0.88, "grad_norm": 0.45242300629615784, "learning_rate": 0.0004815392091484605, "loss": 1.7885, "step": 26431 }, { "epoch": 0.88, "grad_norm": 0.45285627245903015, "learning_rate": 0.00048153088783506857, "loss": 1.8129, "step": 26432 }, { "epoch": 0.88, "grad_norm": 0.4424329400062561, "learning_rate": 0.0004815225663013248, "loss": 1.7869, "step": 26433 }, { "epoch": 0.88, "grad_norm": 0.5350369811058044, "learning_rate": 0.00048151424454723944, "loss": 1.8513, "step": 26434 }, { "epoch": 0.88, "grad_norm": 0.46514904499053955, "learning_rate": 0.0004815059225728225, "loss": 1.8276, "step": 26435 }, { "epoch": 0.88, "grad_norm": 0.44664570689201355, "learning_rate": 0.00048149760037808416, "loss": 1.7727, "step": 26436 }, { "epoch": 0.88, "grad_norm": 0.4379934072494507, "learning_rate": 0.00048148927796303446, "loss": 1.9108, "step": 26437 }, { "epoch": 0.88, "grad_norm": 0.43219202756881714, "learning_rate": 0.0004814809553276835, "loss": 1.7962, "step": 26438 }, { "epoch": 0.88, "grad_norm": 0.4399142265319824, "learning_rate": 0.0004814726324720414, "loss": 1.7747, "step": 26439 }, { "epoch": 0.88, "grad_norm": 0.4210122525691986, "learning_rate": 0.0004814643093961183, "loss": 1.8306, "step": 26440 }, { "epoch": 0.88, "grad_norm": 0.4291445314884186, "learning_rate": 0.0004814559860999243, "loss": 1.8633, "step": 26441 }, { "epoch": 0.88, "grad_norm": 0.4331284761428833, "learning_rate": 0.00048144766258346945, "loss": 1.7528, "step": 26442 }, { "epoch": 0.88, "grad_norm": 0.4274725914001465, "learning_rate": 0.0004814393388467638, "loss": 1.8055, "step": 26443 }, { "epoch": 0.88, "grad_norm": 0.4424445927143097, "learning_rate": 0.00048143101488981756, "loss": 1.7697, "step": 26444 }, { "epoch": 0.88, "grad_norm": 0.4450584948062897, "learning_rate": 0.0004814226907126408, "loss": 1.8249, "step": 26445 }, { "epoch": 0.88, "grad_norm": 0.4473164677619934, "learning_rate": 0.00048141436631524365, "loss": 1.7509, "step": 26446 }, { "epoch": 0.88, "grad_norm": 0.4229053556919098, "learning_rate": 0.00048140604169763615, "loss": 1.7734, "step": 26447 }, { "epoch": 0.88, "grad_norm": 0.45985090732574463, "learning_rate": 0.0004813977168598285, "loss": 1.7973, "step": 26448 }, { "epoch": 0.88, "grad_norm": 0.44160377979278564, "learning_rate": 0.00048138939180183076, "loss": 1.9184, "step": 26449 }, { "epoch": 0.88, "grad_norm": 0.45010483264923096, "learning_rate": 0.00048138106652365287, "loss": 1.7846, "step": 26450 }, { "epoch": 0.88, "grad_norm": 0.4976924657821655, "learning_rate": 0.00048137274102530523, "loss": 1.7237, "step": 26451 }, { "epoch": 0.88, "grad_norm": 0.42434701323509216, "learning_rate": 0.00048136441530679776, "loss": 1.787, "step": 26452 }, { "epoch": 0.88, "grad_norm": 0.4312267303466797, "learning_rate": 0.0004813560893681406, "loss": 1.8508, "step": 26453 }, { "epoch": 0.88, "grad_norm": 0.4383566975593567, "learning_rate": 0.00048134776320934395, "loss": 1.8636, "step": 26454 }, { "epoch": 0.88, "grad_norm": 0.4732006788253784, "learning_rate": 0.0004813394368304178, "loss": 1.7983, "step": 26455 }, { "epoch": 0.88, "grad_norm": 0.42787450551986694, "learning_rate": 0.0004813311102313722, "loss": 1.8264, "step": 26456 }, { "epoch": 0.88, "grad_norm": 0.4556671380996704, "learning_rate": 0.0004813227834122175, "loss": 1.835, "step": 26457 }, { "epoch": 0.88, "grad_norm": 0.44121432304382324, "learning_rate": 0.00048131445637296356, "loss": 1.7633, "step": 26458 }, { "epoch": 0.88, "grad_norm": 0.4430854320526123, "learning_rate": 0.0004813061291136206, "loss": 1.9122, "step": 26459 }, { "epoch": 0.88, "grad_norm": 0.4506312310695648, "learning_rate": 0.0004812978016341987, "loss": 1.8357, "step": 26460 }, { "epoch": 0.88, "grad_norm": 0.44943955540657043, "learning_rate": 0.000481289473934708, "loss": 1.8348, "step": 26461 }, { "epoch": 0.88, "grad_norm": 0.4407726526260376, "learning_rate": 0.00048128114601515864, "loss": 1.7881, "step": 26462 }, { "epoch": 0.88, "grad_norm": 0.4442219138145447, "learning_rate": 0.0004812728178755606, "loss": 1.7851, "step": 26463 }, { "epoch": 0.88, "grad_norm": 0.44172975420951843, "learning_rate": 0.0004812644895159241, "loss": 1.8221, "step": 26464 }, { "epoch": 0.88, "grad_norm": 0.44210371375083923, "learning_rate": 0.0004812561609362592, "loss": 1.8303, "step": 26465 }, { "epoch": 0.88, "grad_norm": 0.4450414776802063, "learning_rate": 0.0004812478321365761, "loss": 1.7984, "step": 26466 }, { "epoch": 0.88, "grad_norm": 0.44223278760910034, "learning_rate": 0.0004812395031168848, "loss": 1.7755, "step": 26467 }, { "epoch": 0.88, "grad_norm": 0.46225273609161377, "learning_rate": 0.00048123117387719534, "loss": 1.8769, "step": 26468 }, { "epoch": 0.88, "grad_norm": 0.4270075857639313, "learning_rate": 0.0004812228444175181, "loss": 1.7533, "step": 26469 }, { "epoch": 0.88, "grad_norm": 0.45819807052612305, "learning_rate": 0.00048121451473786295, "loss": 1.8839, "step": 26470 }, { "epoch": 0.88, "grad_norm": 0.4506440758705139, "learning_rate": 0.00048120618483824006, "loss": 1.784, "step": 26471 }, { "epoch": 0.88, "grad_norm": 0.4368366003036499, "learning_rate": 0.0004811978547186596, "loss": 1.7916, "step": 26472 }, { "epoch": 0.88, "grad_norm": 0.4369104504585266, "learning_rate": 0.0004811895243791316, "loss": 1.7488, "step": 26473 }, { "epoch": 0.88, "grad_norm": 0.4616599380970001, "learning_rate": 0.00048118119381966633, "loss": 1.8815, "step": 26474 }, { "epoch": 0.88, "grad_norm": 0.44449132680892944, "learning_rate": 0.00048117286304027367, "loss": 1.8116, "step": 26475 }, { "epoch": 0.88, "grad_norm": 0.4509097933769226, "learning_rate": 0.0004811645320409638, "loss": 1.8732, "step": 26476 }, { "epoch": 0.88, "grad_norm": 0.4579077661037445, "learning_rate": 0.0004811562008217471, "loss": 1.7561, "step": 26477 }, { "epoch": 0.88, "grad_norm": 0.4355235695838928, "learning_rate": 0.00048114786938263323, "loss": 1.7937, "step": 26478 }, { "epoch": 0.88, "grad_norm": 0.475509911775589, "learning_rate": 0.00048113953772363266, "loss": 1.7619, "step": 26479 }, { "epoch": 0.88, "grad_norm": 0.4531579315662384, "learning_rate": 0.0004811312058447554, "loss": 1.8306, "step": 26480 }, { "epoch": 0.88, "grad_norm": 0.4373552203178406, "learning_rate": 0.0004811228737460115, "loss": 1.7652, "step": 26481 }, { "epoch": 0.88, "grad_norm": 0.4450632631778717, "learning_rate": 0.00048111454142741114, "loss": 1.8686, "step": 26482 }, { "epoch": 0.88, "grad_norm": 0.44688519835472107, "learning_rate": 0.0004811062088889643, "loss": 1.8322, "step": 26483 }, { "epoch": 0.88, "grad_norm": 0.44785863161087036, "learning_rate": 0.0004810978761306813, "loss": 1.8565, "step": 26484 }, { "epoch": 0.88, "grad_norm": 0.4284331500530243, "learning_rate": 0.00048108954315257216, "loss": 1.7868, "step": 26485 }, { "epoch": 0.88, "grad_norm": 0.43296241760253906, "learning_rate": 0.000481081209954647, "loss": 1.8668, "step": 26486 }, { "epoch": 0.88, "grad_norm": 0.4415656328201294, "learning_rate": 0.00048107287653691594, "loss": 1.8002, "step": 26487 }, { "epoch": 0.88, "grad_norm": 0.46451881527900696, "learning_rate": 0.0004810645428993891, "loss": 1.8006, "step": 26488 }, { "epoch": 0.88, "grad_norm": 0.4405229687690735, "learning_rate": 0.00048105620904207656, "loss": 1.8566, "step": 26489 }, { "epoch": 0.88, "grad_norm": 0.4495680034160614, "learning_rate": 0.00048104787496498845, "loss": 1.804, "step": 26490 }, { "epoch": 0.88, "grad_norm": 0.4357013404369354, "learning_rate": 0.00048103954066813494, "loss": 1.9019, "step": 26491 }, { "epoch": 0.88, "grad_norm": 0.43502524495124817, "learning_rate": 0.00048103120615152606, "loss": 1.8266, "step": 26492 }, { "epoch": 0.88, "grad_norm": 0.4554826021194458, "learning_rate": 0.00048102287141517196, "loss": 1.8548, "step": 26493 }, { "epoch": 0.88, "grad_norm": 0.4140656888484955, "learning_rate": 0.00048101453645908276, "loss": 1.8333, "step": 26494 }, { "epoch": 0.88, "grad_norm": 0.43904992938041687, "learning_rate": 0.0004810062012832686, "loss": 1.8976, "step": 26495 }, { "epoch": 0.88, "grad_norm": 0.43450501561164856, "learning_rate": 0.00048099786588773965, "loss": 1.7727, "step": 26496 }, { "epoch": 0.88, "grad_norm": 0.43089842796325684, "learning_rate": 0.0004809895302725059, "loss": 1.7389, "step": 26497 }, { "epoch": 0.88, "grad_norm": 0.43968161940574646, "learning_rate": 0.00048098119443757754, "loss": 1.8125, "step": 26498 }, { "epoch": 0.88, "grad_norm": 0.44950881600379944, "learning_rate": 0.00048097285838296467, "loss": 1.7245, "step": 26499 }, { "epoch": 0.88, "grad_norm": 0.43864163756370544, "learning_rate": 0.0004809645221086774, "loss": 1.8068, "step": 26500 }, { "epoch": 0.88, "grad_norm": 0.4429487884044647, "learning_rate": 0.00048095618561472584, "loss": 1.7909, "step": 26501 }, { "epoch": 0.88, "grad_norm": 0.44994837045669556, "learning_rate": 0.0004809478489011202, "loss": 1.8873, "step": 26502 }, { "epoch": 0.88, "grad_norm": 0.42888104915618896, "learning_rate": 0.0004809395119678705, "loss": 1.7734, "step": 26503 }, { "epoch": 0.88, "grad_norm": 0.4675138592720032, "learning_rate": 0.0004809311748149869, "loss": 1.902, "step": 26504 }, { "epoch": 0.88, "grad_norm": 0.44174718856811523, "learning_rate": 0.00048092283744247946, "loss": 1.8136, "step": 26505 }, { "epoch": 0.88, "grad_norm": 0.44035860896110535, "learning_rate": 0.0004809144998503584, "loss": 1.7876, "step": 26506 }, { "epoch": 0.88, "grad_norm": 0.45130547881126404, "learning_rate": 0.00048090616203863383, "loss": 1.86, "step": 26507 }, { "epoch": 0.88, "grad_norm": 0.4467754364013672, "learning_rate": 0.00048089782400731576, "loss": 1.8075, "step": 26508 }, { "epoch": 0.88, "grad_norm": 0.4546850025653839, "learning_rate": 0.00048088948575641445, "loss": 1.7582, "step": 26509 }, { "epoch": 0.88, "grad_norm": 0.45148664712905884, "learning_rate": 0.00048088114728593985, "loss": 1.7839, "step": 26510 }, { "epoch": 0.88, "grad_norm": 0.4399157166481018, "learning_rate": 0.00048087280859590234, "loss": 1.7524, "step": 26511 }, { "epoch": 0.88, "grad_norm": 0.43918511271476746, "learning_rate": 0.0004808644696863119, "loss": 1.8458, "step": 26512 }, { "epoch": 0.88, "grad_norm": 0.4473557770252228, "learning_rate": 0.00048085613055717845, "loss": 1.8408, "step": 26513 }, { "epoch": 0.88, "grad_norm": 0.45871254801750183, "learning_rate": 0.0004808477912085125, "loss": 1.8154, "step": 26514 }, { "epoch": 0.88, "grad_norm": 0.4467200040817261, "learning_rate": 0.0004808394516403238, "loss": 1.7835, "step": 26515 }, { "epoch": 0.88, "grad_norm": 0.430130273103714, "learning_rate": 0.0004808311118526228, "loss": 1.7347, "step": 26516 }, { "epoch": 0.88, "grad_norm": 0.4377129077911377, "learning_rate": 0.00048082277184541945, "loss": 1.8332, "step": 26517 }, { "epoch": 0.88, "grad_norm": 0.44515496492385864, "learning_rate": 0.0004808144316187238, "loss": 1.8897, "step": 26518 }, { "epoch": 0.88, "grad_norm": 0.4322246313095093, "learning_rate": 0.0004808060911725462, "loss": 1.9057, "step": 26519 }, { "epoch": 0.88, "grad_norm": 0.47229424118995667, "learning_rate": 0.0004807977505068965, "loss": 1.7911, "step": 26520 }, { "epoch": 0.88, "grad_norm": 0.45736655592918396, "learning_rate": 0.00048078940962178513, "loss": 1.8282, "step": 26521 }, { "epoch": 0.88, "grad_norm": 0.4461302161216736, "learning_rate": 0.00048078106851722196, "loss": 1.7556, "step": 26522 }, { "epoch": 0.88, "grad_norm": 0.45538628101348877, "learning_rate": 0.00048077272719321723, "loss": 1.8077, "step": 26523 }, { "epoch": 0.88, "grad_norm": 0.4618186950683594, "learning_rate": 0.0004807643856497811, "loss": 1.8295, "step": 26524 }, { "epoch": 0.88, "grad_norm": 0.4429645538330078, "learning_rate": 0.00048075604388692354, "loss": 1.7897, "step": 26525 }, { "epoch": 0.88, "grad_norm": 0.4545377194881439, "learning_rate": 0.0004807477019046549, "loss": 1.9019, "step": 26526 }, { "epoch": 0.88, "grad_norm": 0.44216662645339966, "learning_rate": 0.000480739359702985, "loss": 1.7764, "step": 26527 }, { "epoch": 0.88, "grad_norm": 0.43752068281173706, "learning_rate": 0.0004807310172819243, "loss": 1.8133, "step": 26528 }, { "epoch": 0.88, "grad_norm": 0.44167226552963257, "learning_rate": 0.00048072267464148276, "loss": 1.7522, "step": 26529 }, { "epoch": 0.88, "grad_norm": 0.4305095672607422, "learning_rate": 0.0004807143317816704, "loss": 1.7882, "step": 26530 }, { "epoch": 0.88, "grad_norm": 0.4376193881034851, "learning_rate": 0.00048070598870249753, "loss": 1.7263, "step": 26531 }, { "epoch": 0.88, "grad_norm": 0.44023042917251587, "learning_rate": 0.0004806976454039743, "loss": 1.8374, "step": 26532 }, { "epoch": 0.88, "grad_norm": 0.4698522090911865, "learning_rate": 0.00048068930188611075, "loss": 1.7667, "step": 26533 }, { "epoch": 0.88, "grad_norm": 0.4642871618270874, "learning_rate": 0.0004806809581489169, "loss": 1.798, "step": 26534 }, { "epoch": 0.88, "grad_norm": 0.4535048007965088, "learning_rate": 0.00048067261419240307, "loss": 1.849, "step": 26535 }, { "epoch": 0.88, "grad_norm": 0.4368032217025757, "learning_rate": 0.0004806642700165793, "loss": 1.7717, "step": 26536 }, { "epoch": 0.88, "grad_norm": 0.4584721028804779, "learning_rate": 0.00048065592562145565, "loss": 1.816, "step": 26537 }, { "epoch": 0.88, "grad_norm": 0.4364345967769623, "learning_rate": 0.00048064758100704234, "loss": 1.8491, "step": 26538 }, { "epoch": 0.88, "grad_norm": 0.44063621759414673, "learning_rate": 0.0004806392361733496, "loss": 1.9341, "step": 26539 }, { "epoch": 0.88, "grad_norm": 0.425656259059906, "learning_rate": 0.00048063089112038734, "loss": 1.8714, "step": 26540 }, { "epoch": 0.88, "grad_norm": 0.44623875617980957, "learning_rate": 0.0004806225458481658, "loss": 1.8389, "step": 26541 }, { "epoch": 0.88, "grad_norm": 0.4528612494468689, "learning_rate": 0.00048061420035669513, "loss": 1.8283, "step": 26542 }, { "epoch": 0.88, "grad_norm": 0.44303104281425476, "learning_rate": 0.0004806058546459855, "loss": 1.8447, "step": 26543 }, { "epoch": 0.88, "grad_norm": 0.5054281949996948, "learning_rate": 0.0004805975087160469, "loss": 1.9535, "step": 26544 }, { "epoch": 0.88, "grad_norm": 0.43024715781211853, "learning_rate": 0.00048058916256688944, "loss": 1.7369, "step": 26545 }, { "epoch": 0.88, "grad_norm": 0.42381441593170166, "learning_rate": 0.00048058081619852346, "loss": 1.7563, "step": 26546 }, { "epoch": 0.88, "grad_norm": 0.4429234564304352, "learning_rate": 0.0004805724696109589, "loss": 1.7773, "step": 26547 }, { "epoch": 0.88, "grad_norm": 0.43463724851608276, "learning_rate": 0.0004805641228042061, "loss": 1.8549, "step": 26548 }, { "epoch": 0.88, "grad_norm": 0.46803393959999084, "learning_rate": 0.000480555775778275, "loss": 1.7394, "step": 26549 }, { "epoch": 0.88, "grad_norm": 0.45568224787712097, "learning_rate": 0.0004805474285331757, "loss": 1.9698, "step": 26550 }, { "epoch": 0.88, "grad_norm": 0.4372873604297638, "learning_rate": 0.00048053908106891855, "loss": 1.7795, "step": 26551 }, { "epoch": 0.88, "grad_norm": 0.44163474440574646, "learning_rate": 0.0004805307333855135, "loss": 1.8294, "step": 26552 }, { "epoch": 0.88, "grad_norm": 0.43050915002822876, "learning_rate": 0.00048052238548297084, "loss": 1.791, "step": 26553 }, { "epoch": 0.88, "grad_norm": 0.4510123133659363, "learning_rate": 0.0004805140373613005, "loss": 1.7646, "step": 26554 }, { "epoch": 0.88, "grad_norm": 0.4373263418674469, "learning_rate": 0.0004805056890205126, "loss": 1.8299, "step": 26555 }, { "epoch": 0.88, "grad_norm": 0.4388352930545807, "learning_rate": 0.00048049734046061763, "loss": 1.6852, "step": 26556 }, { "epoch": 0.88, "grad_norm": 0.4246712923049927, "learning_rate": 0.0004804889916816253, "loss": 1.8328, "step": 26557 }, { "epoch": 0.88, "grad_norm": 0.44515836238861084, "learning_rate": 0.00048048064268354606, "loss": 1.831, "step": 26558 }, { "epoch": 0.88, "grad_norm": 0.4274343252182007, "learning_rate": 0.0004804722934663899, "loss": 1.7689, "step": 26559 }, { "epoch": 0.88, "grad_norm": 0.45004987716674805, "learning_rate": 0.0004804639440301668, "loss": 1.8057, "step": 26560 }, { "epoch": 0.88, "grad_norm": 0.42749953269958496, "learning_rate": 0.0004804555943748872, "loss": 1.8267, "step": 26561 }, { "epoch": 0.88, "grad_norm": 0.4373549520969391, "learning_rate": 0.000480447244500561, "loss": 1.8068, "step": 26562 }, { "epoch": 0.88, "grad_norm": 0.43012651801109314, "learning_rate": 0.0004804388944071986, "loss": 1.9232, "step": 26563 }, { "epoch": 0.88, "grad_norm": 0.4936973750591278, "learning_rate": 0.0004804305440948099, "loss": 1.8449, "step": 26564 }, { "epoch": 0.88, "grad_norm": 0.4434642195701599, "learning_rate": 0.00048042219356340504, "loss": 1.7199, "step": 26565 }, { "epoch": 0.88, "grad_norm": 0.42512017488479614, "learning_rate": 0.00048041384281299426, "loss": 1.7842, "step": 26566 }, { "epoch": 0.88, "grad_norm": 0.4317081868648529, "learning_rate": 0.0004804054918435877, "loss": 1.7735, "step": 26567 }, { "epoch": 0.88, "grad_norm": 0.4430503249168396, "learning_rate": 0.0004803971406551954, "loss": 1.8896, "step": 26568 }, { "epoch": 0.88, "grad_norm": 0.43643689155578613, "learning_rate": 0.0004803887892478276, "loss": 1.8679, "step": 26569 }, { "epoch": 0.88, "grad_norm": 0.44367125630378723, "learning_rate": 0.0004803804376214943, "loss": 1.9087, "step": 26570 }, { "epoch": 0.88, "grad_norm": 0.4396233558654785, "learning_rate": 0.00048037208577620576, "loss": 1.8261, "step": 26571 }, { "epoch": 0.88, "grad_norm": 0.426656037569046, "learning_rate": 0.00048036373371197213, "loss": 1.8707, "step": 26572 }, { "epoch": 0.88, "grad_norm": 0.44888654351234436, "learning_rate": 0.00048035538142880354, "loss": 1.7494, "step": 26573 }, { "epoch": 0.88, "grad_norm": 0.43203261494636536, "learning_rate": 0.00048034702892671004, "loss": 1.818, "step": 26574 }, { "epoch": 0.88, "grad_norm": 0.4338369071483612, "learning_rate": 0.00048033867620570175, "loss": 1.8641, "step": 26575 }, { "epoch": 0.88, "grad_norm": 0.43095308542251587, "learning_rate": 0.000480330323265789, "loss": 1.8595, "step": 26576 }, { "epoch": 0.88, "grad_norm": 0.43134605884552, "learning_rate": 0.00048032197010698164, "loss": 1.8061, "step": 26577 }, { "epoch": 0.88, "grad_norm": 0.44084686040878296, "learning_rate": 0.00048031361672929015, "loss": 1.7464, "step": 26578 }, { "epoch": 0.88, "grad_norm": 0.4450518786907196, "learning_rate": 0.0004803052631327244, "loss": 1.8477, "step": 26579 }, { "epoch": 0.88, "grad_norm": 0.4125818610191345, "learning_rate": 0.0004802969093172947, "loss": 1.7203, "step": 26580 }, { "epoch": 0.88, "grad_norm": 0.45058193802833557, "learning_rate": 0.000480288555283011, "loss": 1.8367, "step": 26581 }, { "epoch": 0.88, "grad_norm": 0.42767390608787537, "learning_rate": 0.00048028020102988363, "loss": 1.9206, "step": 26582 }, { "epoch": 0.88, "grad_norm": 0.44144120812416077, "learning_rate": 0.0004802718465579227, "loss": 1.8301, "step": 26583 }, { "epoch": 0.88, "grad_norm": 0.43683114647865295, "learning_rate": 0.0004802634918671383, "loss": 1.8319, "step": 26584 }, { "epoch": 0.88, "grad_norm": 0.44561925530433655, "learning_rate": 0.00048025513695754054, "loss": 1.8157, "step": 26585 }, { "epoch": 0.88, "grad_norm": 0.4411816895008087, "learning_rate": 0.0004802467818291396, "loss": 1.8458, "step": 26586 }, { "epoch": 0.88, "grad_norm": 0.4401671886444092, "learning_rate": 0.0004802384264819457, "loss": 1.8296, "step": 26587 }, { "epoch": 0.88, "grad_norm": 0.46094810962677, "learning_rate": 0.00048023007091596885, "loss": 1.8317, "step": 26588 }, { "epoch": 0.88, "grad_norm": 0.4639122784137726, "learning_rate": 0.0004802217151312192, "loss": 1.8261, "step": 26589 }, { "epoch": 0.88, "grad_norm": 0.428184449672699, "learning_rate": 0.000480213359127707, "loss": 1.7869, "step": 26590 }, { "epoch": 0.88, "grad_norm": 0.4429823160171509, "learning_rate": 0.00048020500290544237, "loss": 1.8661, "step": 26591 }, { "epoch": 0.88, "grad_norm": 0.44301843643188477, "learning_rate": 0.0004801966464644353, "loss": 1.811, "step": 26592 }, { "epoch": 0.88, "grad_norm": 0.45994797348976135, "learning_rate": 0.00048018828980469613, "loss": 1.8783, "step": 26593 }, { "epoch": 0.88, "grad_norm": 0.43242791295051575, "learning_rate": 0.00048017993292623503, "loss": 1.7648, "step": 26594 }, { "epoch": 0.88, "grad_norm": 0.42262130975723267, "learning_rate": 0.0004801715758290619, "loss": 1.7656, "step": 26595 }, { "epoch": 0.88, "grad_norm": 0.43308067321777344, "learning_rate": 0.000480163218513187, "loss": 1.7566, "step": 26596 }, { "epoch": 0.88, "grad_norm": 0.4315798878669739, "learning_rate": 0.0004801548609786206, "loss": 1.8025, "step": 26597 }, { "epoch": 0.88, "grad_norm": 0.44415590167045593, "learning_rate": 0.0004801465032253727, "loss": 1.8614, "step": 26598 }, { "epoch": 0.88, "grad_norm": 0.46892210841178894, "learning_rate": 0.0004801381452534535, "loss": 1.8167, "step": 26599 }, { "epoch": 0.88, "grad_norm": 0.437749981880188, "learning_rate": 0.00048012978706287303, "loss": 1.7739, "step": 26600 }, { "epoch": 0.89, "grad_norm": 0.45410311222076416, "learning_rate": 0.0004801214286536417, "loss": 1.7401, "step": 26601 }, { "epoch": 0.89, "grad_norm": 0.45184585452079773, "learning_rate": 0.00048011307002576937, "loss": 1.8287, "step": 26602 }, { "epoch": 0.89, "grad_norm": 0.43658342957496643, "learning_rate": 0.00048010471117926643, "loss": 1.8375, "step": 26603 }, { "epoch": 0.89, "grad_norm": 0.5100129246711731, "learning_rate": 0.00048009635211414286, "loss": 1.9004, "step": 26604 }, { "epoch": 0.89, "grad_norm": 0.43652665615081787, "learning_rate": 0.00048008799283040884, "loss": 1.8032, "step": 26605 }, { "epoch": 0.89, "grad_norm": 0.4374767243862152, "learning_rate": 0.0004800796333280745, "loss": 1.7703, "step": 26606 }, { "epoch": 0.89, "grad_norm": 0.4322933554649353, "learning_rate": 0.00048007127360715006, "loss": 1.7738, "step": 26607 }, { "epoch": 0.89, "grad_norm": 0.47003868222236633, "learning_rate": 0.0004800629136676456, "loss": 1.7667, "step": 26608 }, { "epoch": 0.89, "grad_norm": 0.44325652718544006, "learning_rate": 0.00048005455350957134, "loss": 1.8026, "step": 26609 }, { "epoch": 0.89, "grad_norm": 0.43304964900016785, "learning_rate": 0.00048004619313293725, "loss": 1.8199, "step": 26610 }, { "epoch": 0.89, "grad_norm": 0.44187211990356445, "learning_rate": 0.00048003783253775374, "loss": 1.8655, "step": 26611 }, { "epoch": 0.89, "grad_norm": 0.4355425536632538, "learning_rate": 0.0004800294717240308, "loss": 1.8253, "step": 26612 }, { "epoch": 0.89, "grad_norm": 0.4285072088241577, "learning_rate": 0.0004800211106917787, "loss": 1.8655, "step": 26613 }, { "epoch": 0.89, "grad_norm": 0.4518575668334961, "learning_rate": 0.0004800127494410074, "loss": 1.8406, "step": 26614 }, { "epoch": 0.89, "grad_norm": 0.4524848759174347, "learning_rate": 0.0004800043879717271, "loss": 1.8442, "step": 26615 }, { "epoch": 0.89, "grad_norm": 0.4426687955856323, "learning_rate": 0.00047999602628394806, "loss": 1.8277, "step": 26616 }, { "epoch": 0.89, "grad_norm": 0.4325076639652252, "learning_rate": 0.0004799876643776803, "loss": 1.8527, "step": 26617 }, { "epoch": 0.89, "grad_norm": 0.4388611614704132, "learning_rate": 0.0004799793022529341, "loss": 1.7737, "step": 26618 }, { "epoch": 0.89, "grad_norm": 0.45164626836776733, "learning_rate": 0.0004799709399097196, "loss": 1.8786, "step": 26619 }, { "epoch": 0.89, "grad_norm": 0.4330810606479645, "learning_rate": 0.0004799625773480468, "loss": 1.928, "step": 26620 }, { "epoch": 0.89, "grad_norm": 0.440022736787796, "learning_rate": 0.000479954214567926, "loss": 1.8016, "step": 26621 }, { "epoch": 0.89, "grad_norm": 0.43764108419418335, "learning_rate": 0.0004799458515693672, "loss": 1.8459, "step": 26622 }, { "epoch": 0.89, "grad_norm": 0.44073545932769775, "learning_rate": 0.0004799374883523808, "loss": 1.7738, "step": 26623 }, { "epoch": 0.89, "grad_norm": 0.45117703080177307, "learning_rate": 0.0004799291249169767, "loss": 1.7602, "step": 26624 }, { "epoch": 0.89, "grad_norm": 0.4376648962497711, "learning_rate": 0.0004799207612631652, "loss": 1.8076, "step": 26625 }, { "epoch": 0.89, "grad_norm": 0.4393918216228485, "learning_rate": 0.00047991239739095633, "loss": 1.822, "step": 26626 }, { "epoch": 0.89, "grad_norm": 0.42686158418655396, "learning_rate": 0.00047990403330036037, "loss": 1.7504, "step": 26627 }, { "epoch": 0.89, "grad_norm": 0.4552176296710968, "learning_rate": 0.00047989566899138745, "loss": 1.8353, "step": 26628 }, { "epoch": 0.89, "grad_norm": 0.4464297890663147, "learning_rate": 0.0004798873044640477, "loss": 1.8088, "step": 26629 }, { "epoch": 0.89, "grad_norm": 0.4412165880203247, "learning_rate": 0.0004798789397183512, "loss": 1.7675, "step": 26630 }, { "epoch": 0.89, "grad_norm": 0.4411177635192871, "learning_rate": 0.0004798705747543082, "loss": 1.8289, "step": 26631 }, { "epoch": 0.89, "grad_norm": 0.4576404392719269, "learning_rate": 0.00047986220957192883, "loss": 1.7445, "step": 26632 }, { "epoch": 0.89, "grad_norm": 0.4473850727081299, "learning_rate": 0.0004798538441712233, "loss": 1.7298, "step": 26633 }, { "epoch": 0.89, "grad_norm": 0.44268110394477844, "learning_rate": 0.0004798454785522016, "loss": 1.8632, "step": 26634 }, { "epoch": 0.89, "grad_norm": 0.4511420428752899, "learning_rate": 0.0004798371127148741, "loss": 1.7802, "step": 26635 }, { "epoch": 0.89, "grad_norm": 0.4344395399093628, "learning_rate": 0.00047982874665925077, "loss": 1.8665, "step": 26636 }, { "epoch": 0.89, "grad_norm": 0.43791472911834717, "learning_rate": 0.00047982038038534177, "loss": 1.7255, "step": 26637 }, { "epoch": 0.89, "grad_norm": 0.4366655945777893, "learning_rate": 0.0004798120138931574, "loss": 1.9007, "step": 26638 }, { "epoch": 0.89, "grad_norm": 0.45587655901908875, "learning_rate": 0.0004798036471827077, "loss": 1.8413, "step": 26639 }, { "epoch": 0.89, "grad_norm": 0.4292041063308716, "learning_rate": 0.00047979528025400293, "loss": 1.8648, "step": 26640 }, { "epoch": 0.89, "grad_norm": 0.43505460023880005, "learning_rate": 0.00047978691310705324, "loss": 1.764, "step": 26641 }, { "epoch": 0.89, "grad_norm": 0.43397051095962524, "learning_rate": 0.00047977854574186857, "loss": 1.7927, "step": 26642 }, { "epoch": 0.89, "grad_norm": 0.4242728054523468, "learning_rate": 0.0004797701781584593, "loss": 1.8022, "step": 26643 }, { "epoch": 0.89, "grad_norm": 0.4405987560749054, "learning_rate": 0.0004797618103568355, "loss": 1.8219, "step": 26644 }, { "epoch": 0.89, "grad_norm": 0.44321954250335693, "learning_rate": 0.0004797534423370074, "loss": 1.8038, "step": 26645 }, { "epoch": 0.89, "grad_norm": 0.4388796091079712, "learning_rate": 0.00047974507409898515, "loss": 1.7764, "step": 26646 }, { "epoch": 0.89, "grad_norm": 0.46712926030158997, "learning_rate": 0.0004797367056427787, "loss": 1.8454, "step": 26647 }, { "epoch": 0.89, "grad_norm": 0.4424569308757782, "learning_rate": 0.0004797283369683985, "loss": 1.7657, "step": 26648 }, { "epoch": 0.89, "grad_norm": 0.4391908347606659, "learning_rate": 0.00047971996807585454, "loss": 1.7807, "step": 26649 }, { "epoch": 0.89, "grad_norm": 0.42988121509552, "learning_rate": 0.000479711598965157, "loss": 1.7709, "step": 26650 }, { "epoch": 0.89, "grad_norm": 0.4600745141506195, "learning_rate": 0.00047970322963631615, "loss": 1.8147, "step": 26651 }, { "epoch": 0.89, "grad_norm": 0.4483238160610199, "learning_rate": 0.000479694860089342, "loss": 1.7906, "step": 26652 }, { "epoch": 0.89, "grad_norm": 0.44953954219818115, "learning_rate": 0.00047968649032424475, "loss": 1.7986, "step": 26653 }, { "epoch": 0.89, "grad_norm": 0.4425472319126129, "learning_rate": 0.00047967812034103454, "loss": 1.8309, "step": 26654 }, { "epoch": 0.89, "grad_norm": 0.45341217517852783, "learning_rate": 0.0004796697501397216, "loss": 1.8431, "step": 26655 }, { "epoch": 0.89, "grad_norm": 0.47778892517089844, "learning_rate": 0.00047966137972031616, "loss": 1.7809, "step": 26656 }, { "epoch": 0.89, "grad_norm": 0.44916602969169617, "learning_rate": 0.00047965300908282815, "loss": 1.8228, "step": 26657 }, { "epoch": 0.89, "grad_norm": 0.5219032764434814, "learning_rate": 0.0004796446382272679, "loss": 1.8058, "step": 26658 }, { "epoch": 0.89, "grad_norm": 0.44502517580986023, "learning_rate": 0.0004796362671536455, "loss": 1.795, "step": 26659 }, { "epoch": 0.89, "grad_norm": 0.46299606561660767, "learning_rate": 0.0004796278958619712, "loss": 1.8233, "step": 26660 }, { "epoch": 0.89, "grad_norm": 0.4558273255825043, "learning_rate": 0.0004796195243522551, "loss": 1.8276, "step": 26661 }, { "epoch": 0.89, "grad_norm": 0.4471973478794098, "learning_rate": 0.0004796111526245073, "loss": 1.7447, "step": 26662 }, { "epoch": 0.89, "grad_norm": 0.4530647397041321, "learning_rate": 0.00047960278067873805, "loss": 1.8475, "step": 26663 }, { "epoch": 0.89, "grad_norm": 0.4529920220375061, "learning_rate": 0.00047959440851495745, "loss": 1.7876, "step": 26664 }, { "epoch": 0.89, "grad_norm": 0.45585641264915466, "learning_rate": 0.0004795860361331757, "loss": 1.8137, "step": 26665 }, { "epoch": 0.89, "grad_norm": 0.4306319057941437, "learning_rate": 0.00047957766353340305, "loss": 1.8548, "step": 26666 }, { "epoch": 0.89, "grad_norm": 0.46566537022590637, "learning_rate": 0.0004795692907156495, "loss": 1.749, "step": 26667 }, { "epoch": 0.89, "grad_norm": 0.4492367208003998, "learning_rate": 0.00047956091767992535, "loss": 1.7684, "step": 26668 }, { "epoch": 0.89, "grad_norm": 0.43996334075927734, "learning_rate": 0.0004795525444262406, "loss": 1.7598, "step": 26669 }, { "epoch": 0.89, "grad_norm": 0.44661709666252136, "learning_rate": 0.00047954417095460566, "loss": 1.8734, "step": 26670 }, { "epoch": 0.89, "grad_norm": 0.4410497844219208, "learning_rate": 0.00047953579726503053, "loss": 1.8217, "step": 26671 }, { "epoch": 0.89, "grad_norm": 0.45475855469703674, "learning_rate": 0.00047952742335752524, "loss": 1.8064, "step": 26672 }, { "epoch": 0.89, "grad_norm": 0.44186872243881226, "learning_rate": 0.0004795190492321003, "loss": 1.8113, "step": 26673 }, { "epoch": 0.89, "grad_norm": 0.442343533039093, "learning_rate": 0.00047951067488876553, "loss": 1.7996, "step": 26674 }, { "epoch": 0.89, "grad_norm": 0.435125470161438, "learning_rate": 0.0004795023003275314, "loss": 1.8663, "step": 26675 }, { "epoch": 0.89, "grad_norm": 0.44058138132095337, "learning_rate": 0.0004794939255484078, "loss": 1.8502, "step": 26676 }, { "epoch": 0.89, "grad_norm": 0.4271170198917389, "learning_rate": 0.00047948555055140506, "loss": 1.7956, "step": 26677 }, { "epoch": 0.89, "grad_norm": 0.4338887929916382, "learning_rate": 0.00047947717533653334, "loss": 1.7965, "step": 26678 }, { "epoch": 0.89, "grad_norm": 0.4377916157245636, "learning_rate": 0.00047946879990380274, "loss": 1.8099, "step": 26679 }, { "epoch": 0.89, "grad_norm": 0.44189175963401794, "learning_rate": 0.00047946042425322345, "loss": 1.8229, "step": 26680 }, { "epoch": 0.89, "grad_norm": 0.42012253403663635, "learning_rate": 0.0004794520483848057, "loss": 1.8903, "step": 26681 }, { "epoch": 0.89, "grad_norm": 0.44129714369773865, "learning_rate": 0.00047944367229855957, "loss": 1.7986, "step": 26682 }, { "epoch": 0.89, "grad_norm": 0.4252164363861084, "learning_rate": 0.00047943529599449525, "loss": 1.7809, "step": 26683 }, { "epoch": 0.89, "grad_norm": 0.44900909066200256, "learning_rate": 0.0004794269194726229, "loss": 1.926, "step": 26684 }, { "epoch": 0.89, "grad_norm": 0.434765100479126, "learning_rate": 0.0004794185427329528, "loss": 1.8139, "step": 26685 }, { "epoch": 0.89, "grad_norm": 0.43832865357398987, "learning_rate": 0.00047941016577549494, "loss": 1.7779, "step": 26686 }, { "epoch": 0.89, "grad_norm": 0.4388805329799652, "learning_rate": 0.00047940178860025954, "loss": 1.85, "step": 26687 }, { "epoch": 0.89, "grad_norm": 0.418411523103714, "learning_rate": 0.00047939341120725693, "loss": 1.828, "step": 26688 }, { "epoch": 0.89, "grad_norm": 0.4372020363807678, "learning_rate": 0.000479385033596497, "loss": 1.8552, "step": 26689 }, { "epoch": 0.89, "grad_norm": 0.4399034082889557, "learning_rate": 0.0004793766557679902, "loss": 1.7896, "step": 26690 }, { "epoch": 0.89, "grad_norm": 0.4276951849460602, "learning_rate": 0.00047936827772174657, "loss": 1.786, "step": 26691 }, { "epoch": 0.89, "grad_norm": 0.43744784593582153, "learning_rate": 0.00047935989945777617, "loss": 1.8368, "step": 26692 }, { "epoch": 0.89, "grad_norm": 0.44576743245124817, "learning_rate": 0.00047935152097608935, "loss": 1.8263, "step": 26693 }, { "epoch": 0.89, "grad_norm": 0.4297703504562378, "learning_rate": 0.00047934314227669625, "loss": 1.8613, "step": 26694 }, { "epoch": 0.89, "grad_norm": 0.44622424244880676, "learning_rate": 0.00047933476335960685, "loss": 1.8464, "step": 26695 }, { "epoch": 0.89, "grad_norm": 0.4453962743282318, "learning_rate": 0.00047932638422483166, "loss": 1.6979, "step": 26696 }, { "epoch": 0.89, "grad_norm": 0.44482186436653137, "learning_rate": 0.00047931800487238056, "loss": 1.7332, "step": 26697 }, { "epoch": 0.89, "grad_norm": 0.4489883780479431, "learning_rate": 0.0004793096253022638, "loss": 1.8496, "step": 26698 }, { "epoch": 0.89, "grad_norm": 0.4557076692581177, "learning_rate": 0.0004793012455144916, "loss": 1.9241, "step": 26699 }, { "epoch": 0.89, "grad_norm": 0.4327602684497833, "learning_rate": 0.00047929286550907416, "loss": 1.8202, "step": 26700 }, { "epoch": 0.89, "grad_norm": 0.4350545406341553, "learning_rate": 0.0004792844852860216, "loss": 1.8267, "step": 26701 }, { "epoch": 0.89, "grad_norm": 0.42919740080833435, "learning_rate": 0.000479276104845344, "loss": 1.8444, "step": 26702 }, { "epoch": 0.89, "grad_norm": 0.4426129460334778, "learning_rate": 0.00047926772418705165, "loss": 1.8612, "step": 26703 }, { "epoch": 0.89, "grad_norm": 0.4270148277282715, "learning_rate": 0.0004792593433111548, "loss": 1.7673, "step": 26704 }, { "epoch": 0.89, "grad_norm": 0.49584531784057617, "learning_rate": 0.0004792509622176634, "loss": 1.8518, "step": 26705 }, { "epoch": 0.89, "grad_norm": 0.4563324451446533, "learning_rate": 0.00047924258090658776, "loss": 1.8121, "step": 26706 }, { "epoch": 0.89, "grad_norm": 0.43413934111595154, "learning_rate": 0.0004792341993779381, "loss": 1.7684, "step": 26707 }, { "epoch": 0.89, "grad_norm": 0.42688554525375366, "learning_rate": 0.00047922581763172443, "loss": 1.8118, "step": 26708 }, { "epoch": 0.89, "grad_norm": 0.4675951898097992, "learning_rate": 0.0004792174356679571, "loss": 1.781, "step": 26709 }, { "epoch": 0.89, "grad_norm": 0.44956597685813904, "learning_rate": 0.0004792090534866462, "loss": 1.9032, "step": 26710 }, { "epoch": 0.89, "grad_norm": 0.4358872175216675, "learning_rate": 0.0004792006710878019, "loss": 1.8638, "step": 26711 }, { "epoch": 0.89, "grad_norm": 0.47219789028167725, "learning_rate": 0.00047919228847143443, "loss": 1.7717, "step": 26712 }, { "epoch": 0.89, "grad_norm": 0.4467558264732361, "learning_rate": 0.0004791839056375538, "loss": 1.7697, "step": 26713 }, { "epoch": 0.89, "grad_norm": 0.4441588521003723, "learning_rate": 0.0004791755225861705, "loss": 1.8095, "step": 26714 }, { "epoch": 0.89, "grad_norm": 0.4529072940349579, "learning_rate": 0.00047916713931729437, "loss": 1.77, "step": 26715 }, { "epoch": 0.89, "grad_norm": 0.44975271821022034, "learning_rate": 0.00047915875583093585, "loss": 1.8097, "step": 26716 }, { "epoch": 0.89, "grad_norm": 0.4409194588661194, "learning_rate": 0.00047915037212710487, "loss": 1.7822, "step": 26717 }, { "epoch": 0.89, "grad_norm": 0.43455883860588074, "learning_rate": 0.0004791419882058118, "loss": 1.8102, "step": 26718 }, { "epoch": 0.89, "grad_norm": 0.44566184282302856, "learning_rate": 0.00047913360406706673, "loss": 1.818, "step": 26719 }, { "epoch": 0.89, "grad_norm": 0.454807311296463, "learning_rate": 0.00047912521971087987, "loss": 1.8258, "step": 26720 }, { "epoch": 0.89, "grad_norm": 0.4389328062534332, "learning_rate": 0.0004791168351372614, "loss": 1.83, "step": 26721 }, { "epoch": 0.89, "grad_norm": 0.4316144585609436, "learning_rate": 0.00047910845034622153, "loss": 1.8813, "step": 26722 }, { "epoch": 0.89, "grad_norm": 0.46081119775772095, "learning_rate": 0.00047910006533777034, "loss": 1.8366, "step": 26723 }, { "epoch": 0.89, "grad_norm": 0.4414791762828827, "learning_rate": 0.000479091680111918, "loss": 1.8656, "step": 26724 }, { "epoch": 0.89, "grad_norm": 0.44611501693725586, "learning_rate": 0.00047908329466867476, "loss": 1.7635, "step": 26725 }, { "epoch": 0.89, "grad_norm": 0.4547581672668457, "learning_rate": 0.00047907490900805087, "loss": 1.8687, "step": 26726 }, { "epoch": 0.89, "grad_norm": 0.6596531271934509, "learning_rate": 0.00047906652313005636, "loss": 1.9216, "step": 26727 }, { "epoch": 0.89, "grad_norm": 0.42610296607017517, "learning_rate": 0.00047905813703470155, "loss": 1.8392, "step": 26728 }, { "epoch": 0.89, "grad_norm": 0.44053640961647034, "learning_rate": 0.00047904975072199645, "loss": 1.8754, "step": 26729 }, { "epoch": 0.89, "grad_norm": 0.4312760531902313, "learning_rate": 0.00047904136419195143, "loss": 1.8577, "step": 26730 }, { "epoch": 0.89, "grad_norm": 0.4389629065990448, "learning_rate": 0.0004790329774445765, "loss": 1.8348, "step": 26731 }, { "epoch": 0.89, "grad_norm": 0.4265470802783966, "learning_rate": 0.00047902459047988194, "loss": 1.8418, "step": 26732 }, { "epoch": 0.89, "grad_norm": 0.43471619486808777, "learning_rate": 0.00047901620329787796, "loss": 1.7787, "step": 26733 }, { "epoch": 0.89, "grad_norm": 0.4297139346599579, "learning_rate": 0.0004790078158985746, "loss": 1.819, "step": 26734 }, { "epoch": 0.89, "grad_norm": 0.4371306896209717, "learning_rate": 0.0004789994282819821, "loss": 1.8509, "step": 26735 }, { "epoch": 0.89, "grad_norm": 0.4386572539806366, "learning_rate": 0.00047899104044811074, "loss": 1.846, "step": 26736 }, { "epoch": 0.89, "grad_norm": 0.5555775761604309, "learning_rate": 0.0004789826523969707, "loss": 1.8278, "step": 26737 }, { "epoch": 0.89, "grad_norm": 0.4370417594909668, "learning_rate": 0.000478974264128572, "loss": 1.8502, "step": 26738 }, { "epoch": 0.89, "grad_norm": 0.44869014620780945, "learning_rate": 0.00047896587564292485, "loss": 1.8441, "step": 26739 }, { "epoch": 0.89, "grad_norm": 0.4850061237812042, "learning_rate": 0.0004789574869400396, "loss": 1.8425, "step": 26740 }, { "epoch": 0.89, "grad_norm": 0.4487929344177246, "learning_rate": 0.0004789490980199263, "loss": 1.8191, "step": 26741 }, { "epoch": 0.89, "grad_norm": 0.4350433051586151, "learning_rate": 0.0004789407088825951, "loss": 1.8518, "step": 26742 }, { "epoch": 0.89, "grad_norm": 0.4271666407585144, "learning_rate": 0.0004789323195280563, "loss": 1.7866, "step": 26743 }, { "epoch": 0.89, "grad_norm": 0.4371262788772583, "learning_rate": 0.0004789239299563201, "loss": 1.7814, "step": 26744 }, { "epoch": 0.89, "grad_norm": 0.4364780783653259, "learning_rate": 0.00047891554016739655, "loss": 1.7986, "step": 26745 }, { "epoch": 0.89, "grad_norm": 0.4437553584575653, "learning_rate": 0.00047890715016129593, "loss": 1.7803, "step": 26746 }, { "epoch": 0.89, "grad_norm": 0.4851686358451843, "learning_rate": 0.0004788987599380283, "loss": 1.8098, "step": 26747 }, { "epoch": 0.89, "grad_norm": 0.44142550230026245, "learning_rate": 0.000478890369497604, "loss": 1.7733, "step": 26748 }, { "epoch": 0.89, "grad_norm": 0.43053141236305237, "learning_rate": 0.00047888197884003313, "loss": 1.8329, "step": 26749 }, { "epoch": 0.89, "grad_norm": 0.8043121099472046, "learning_rate": 0.0004788735879653259, "loss": 1.8062, "step": 26750 }, { "epoch": 0.89, "grad_norm": 0.44233858585357666, "learning_rate": 0.0004788651968734925, "loss": 1.7522, "step": 26751 }, { "epoch": 0.89, "grad_norm": 0.4640621244907379, "learning_rate": 0.0004788568055645431, "loss": 1.8465, "step": 26752 }, { "epoch": 0.89, "grad_norm": 0.46330881118774414, "learning_rate": 0.00047884841403848794, "loss": 1.8618, "step": 26753 }, { "epoch": 0.89, "grad_norm": 0.4625629782676697, "learning_rate": 0.00047884002229533703, "loss": 1.8632, "step": 26754 }, { "epoch": 0.89, "grad_norm": 0.44488832354545593, "learning_rate": 0.0004788316303351008, "loss": 1.8234, "step": 26755 }, { "epoch": 0.89, "grad_norm": 0.44067826867103577, "learning_rate": 0.00047882323815778925, "loss": 1.8113, "step": 26756 }, { "epoch": 0.89, "grad_norm": 0.435841828584671, "learning_rate": 0.00047881484576341263, "loss": 1.7157, "step": 26757 }, { "epoch": 0.89, "grad_norm": 0.4480893015861511, "learning_rate": 0.0004788064531519812, "loss": 1.7853, "step": 26758 }, { "epoch": 0.89, "grad_norm": 0.43353813886642456, "learning_rate": 0.0004787980603235051, "loss": 1.8053, "step": 26759 }, { "epoch": 0.89, "grad_norm": 0.4405795633792877, "learning_rate": 0.00047878966727799444, "loss": 1.7632, "step": 26760 }, { "epoch": 0.89, "grad_norm": 0.4457012116909027, "learning_rate": 0.00047878127401545937, "loss": 1.7856, "step": 26761 }, { "epoch": 0.89, "grad_norm": 0.44523316621780396, "learning_rate": 0.0004787728805359104, "loss": 1.8811, "step": 26762 }, { "epoch": 0.89, "grad_norm": 0.43530380725860596, "learning_rate": 0.00047876448683935736, "loss": 1.8325, "step": 26763 }, { "epoch": 0.89, "grad_norm": 0.43359944224357605, "learning_rate": 0.00047875609292581054, "loss": 1.8664, "step": 26764 }, { "epoch": 0.89, "grad_norm": 0.4325665235519409, "learning_rate": 0.00047874769879528023, "loss": 1.7959, "step": 26765 }, { "epoch": 0.89, "grad_norm": 0.44029340147972107, "learning_rate": 0.00047873930444777656, "loss": 1.8539, "step": 26766 }, { "epoch": 0.89, "grad_norm": 0.43877169489860535, "learning_rate": 0.00047873090988330967, "loss": 1.8267, "step": 26767 }, { "epoch": 0.89, "grad_norm": 0.43695855140686035, "learning_rate": 0.0004787225151018898, "loss": 1.7895, "step": 26768 }, { "epoch": 0.89, "grad_norm": 0.4264110028743744, "learning_rate": 0.00047871412010352715, "loss": 1.8193, "step": 26769 }, { "epoch": 0.89, "grad_norm": 0.45898354053497314, "learning_rate": 0.00047870572488823186, "loss": 1.8479, "step": 26770 }, { "epoch": 0.89, "grad_norm": 0.44243472814559937, "learning_rate": 0.00047869732945601416, "loss": 1.8274, "step": 26771 }, { "epoch": 0.89, "grad_norm": 0.4305476248264313, "learning_rate": 0.00047868893380688426, "loss": 1.7524, "step": 26772 }, { "epoch": 0.89, "grad_norm": 0.46000605821609497, "learning_rate": 0.0004786805379408523, "loss": 1.7613, "step": 26773 }, { "epoch": 0.89, "grad_norm": 0.4574476480484009, "learning_rate": 0.0004786721418579285, "loss": 1.8724, "step": 26774 }, { "epoch": 0.89, "grad_norm": 0.4384383261203766, "learning_rate": 0.000478663745558123, "loss": 1.787, "step": 26775 }, { "epoch": 0.89, "grad_norm": 0.43341198563575745, "learning_rate": 0.0004786553490414461, "loss": 1.8293, "step": 26776 }, { "epoch": 0.89, "grad_norm": 0.45371875166893005, "learning_rate": 0.00047864695230790793, "loss": 1.8845, "step": 26777 }, { "epoch": 0.89, "grad_norm": 0.4414224624633789, "learning_rate": 0.00047863855535751863, "loss": 1.8038, "step": 26778 }, { "epoch": 0.89, "grad_norm": 0.4402958154678345, "learning_rate": 0.00047863015819028847, "loss": 1.7985, "step": 26779 }, { "epoch": 0.89, "grad_norm": 0.44549185037612915, "learning_rate": 0.0004786217608062276, "loss": 1.8063, "step": 26780 }, { "epoch": 0.89, "grad_norm": 0.4504345655441284, "learning_rate": 0.0004786133632053463, "loss": 1.7637, "step": 26781 }, { "epoch": 0.89, "grad_norm": 0.4486202299594879, "learning_rate": 0.0004786049653876546, "loss": 1.872, "step": 26782 }, { "epoch": 0.89, "grad_norm": 0.44294843077659607, "learning_rate": 0.0004785965673531629, "loss": 1.8185, "step": 26783 }, { "epoch": 0.89, "grad_norm": 0.43749383091926575, "learning_rate": 0.0004785881691018812, "loss": 1.7419, "step": 26784 }, { "epoch": 0.89, "grad_norm": 0.42975056171417236, "learning_rate": 0.0004785797706338198, "loss": 1.847, "step": 26785 }, { "epoch": 0.89, "grad_norm": 0.43016645312309265, "learning_rate": 0.00047857137194898887, "loss": 1.8059, "step": 26786 }, { "epoch": 0.89, "grad_norm": 0.4296480715274811, "learning_rate": 0.0004785629730473986, "loss": 1.852, "step": 26787 }, { "epoch": 0.89, "grad_norm": 0.4430585205554962, "learning_rate": 0.0004785545739290592, "loss": 1.7774, "step": 26788 }, { "epoch": 0.89, "grad_norm": 0.43855854868888855, "learning_rate": 0.0004785461745939808, "loss": 1.8517, "step": 26789 }, { "epoch": 0.89, "grad_norm": 0.45018985867500305, "learning_rate": 0.00047853777504217374, "loss": 1.8896, "step": 26790 }, { "epoch": 0.89, "grad_norm": 0.4516930878162384, "learning_rate": 0.0004785293752736481, "loss": 1.8766, "step": 26791 }, { "epoch": 0.89, "grad_norm": 0.4485935568809509, "learning_rate": 0.0004785209752884141, "loss": 1.8088, "step": 26792 }, { "epoch": 0.89, "grad_norm": 0.43603959679603577, "learning_rate": 0.000478512575086482, "loss": 1.7855, "step": 26793 }, { "epoch": 0.89, "grad_norm": 0.45243144035339355, "learning_rate": 0.0004785041746678618, "loss": 1.7549, "step": 26794 }, { "epoch": 0.89, "grad_norm": 0.4277302026748657, "learning_rate": 0.0004784957740325639, "loss": 1.883, "step": 26795 }, { "epoch": 0.89, "grad_norm": 0.4448453187942505, "learning_rate": 0.00047848737318059844, "loss": 1.8311, "step": 26796 }, { "epoch": 0.89, "grad_norm": 0.4432659447193146, "learning_rate": 0.0004784789721119756, "loss": 1.8072, "step": 26797 }, { "epoch": 0.89, "grad_norm": 0.4367745816707611, "learning_rate": 0.0004784705708267056, "loss": 1.8582, "step": 26798 }, { "epoch": 0.89, "grad_norm": 0.45411258935928345, "learning_rate": 0.0004784621693247986, "loss": 1.793, "step": 26799 }, { "epoch": 0.89, "grad_norm": 0.46894848346710205, "learning_rate": 0.00047845376760626484, "loss": 1.7948, "step": 26800 }, { "epoch": 0.89, "grad_norm": 0.44319531321525574, "learning_rate": 0.0004784453656711145, "loss": 1.8121, "step": 26801 }, { "epoch": 0.89, "grad_norm": 0.4497741162776947, "learning_rate": 0.0004784369635193578, "loss": 1.8226, "step": 26802 }, { "epoch": 0.89, "grad_norm": 0.4334663450717926, "learning_rate": 0.0004784285611510048, "loss": 1.821, "step": 26803 }, { "epoch": 0.89, "grad_norm": 0.44118958711624146, "learning_rate": 0.0004784201585660659, "loss": 1.841, "step": 26804 }, { "epoch": 0.89, "grad_norm": 0.44524407386779785, "learning_rate": 0.0004784117557645513, "loss": 1.8069, "step": 26805 }, { "epoch": 0.89, "grad_norm": 0.454512357711792, "learning_rate": 0.000478403352746471, "loss": 1.8674, "step": 26806 }, { "epoch": 0.89, "grad_norm": 0.6066774725914001, "learning_rate": 0.0004783949495118354, "loss": 1.8287, "step": 26807 }, { "epoch": 0.89, "grad_norm": 0.438273549079895, "learning_rate": 0.00047838654606065456, "loss": 1.8192, "step": 26808 }, { "epoch": 0.89, "grad_norm": 0.4460885524749756, "learning_rate": 0.00047837814239293865, "loss": 1.8427, "step": 26809 }, { "epoch": 0.89, "grad_norm": 0.46842432022094727, "learning_rate": 0.0004783697385086982, "loss": 1.77, "step": 26810 }, { "epoch": 0.89, "grad_norm": 0.4573850631713867, "learning_rate": 0.0004783613344079429, "loss": 1.8371, "step": 26811 }, { "epoch": 0.89, "grad_norm": 0.4372670650482178, "learning_rate": 0.0004783529300906834, "loss": 1.8413, "step": 26812 }, { "epoch": 0.89, "grad_norm": 0.4535665214061737, "learning_rate": 0.00047834452555692964, "loss": 1.8136, "step": 26813 }, { "epoch": 0.89, "grad_norm": 0.440975546836853, "learning_rate": 0.000478336120806692, "loss": 1.8272, "step": 26814 }, { "epoch": 0.89, "grad_norm": 0.45109716057777405, "learning_rate": 0.00047832771583998046, "loss": 1.8145, "step": 26815 }, { "epoch": 0.89, "grad_norm": 0.6604424118995667, "learning_rate": 0.0004783193106568054, "loss": 1.8874, "step": 26816 }, { "epoch": 0.89, "grad_norm": 0.4342693090438843, "learning_rate": 0.00047831090525717697, "loss": 1.827, "step": 26817 }, { "epoch": 0.89, "grad_norm": 0.4465113580226898, "learning_rate": 0.0004783024996411054, "loss": 1.7847, "step": 26818 }, { "epoch": 0.89, "grad_norm": 0.44012781977653503, "learning_rate": 0.00047829409380860085, "loss": 1.772, "step": 26819 }, { "epoch": 0.89, "grad_norm": 0.4468850791454315, "learning_rate": 0.00047828568775967355, "loss": 1.8957, "step": 26820 }, { "epoch": 0.89, "grad_norm": 0.44107359647750854, "learning_rate": 0.0004782772814943337, "loss": 1.8165, "step": 26821 }, { "epoch": 0.89, "grad_norm": 0.4311504364013672, "learning_rate": 0.0004782688750125914, "loss": 1.8916, "step": 26822 }, { "epoch": 0.89, "grad_norm": 0.4502493143081665, "learning_rate": 0.0004782604683144571, "loss": 1.8214, "step": 26823 }, { "epoch": 0.89, "grad_norm": 0.44646579027175903, "learning_rate": 0.0004782520613999407, "loss": 1.8049, "step": 26824 }, { "epoch": 0.89, "grad_norm": 0.44550982117652893, "learning_rate": 0.0004782436542690527, "loss": 1.8642, "step": 26825 }, { "epoch": 0.89, "grad_norm": 0.45508697628974915, "learning_rate": 0.00047823524692180313, "loss": 1.7559, "step": 26826 }, { "epoch": 0.89, "grad_norm": 0.44149142503738403, "learning_rate": 0.00047822683935820224, "loss": 1.7583, "step": 26827 }, { "epoch": 0.89, "grad_norm": 0.4554368555545807, "learning_rate": 0.00047821843157826025, "loss": 1.7808, "step": 26828 }, { "epoch": 0.89, "grad_norm": 0.447364866733551, "learning_rate": 0.00047821002358198724, "loss": 1.7662, "step": 26829 }, { "epoch": 0.89, "grad_norm": 0.9984838366508484, "learning_rate": 0.00047820161536939356, "loss": 1.787, "step": 26830 }, { "epoch": 0.89, "grad_norm": 0.45436203479766846, "learning_rate": 0.0004781932069404894, "loss": 1.7663, "step": 26831 }, { "epoch": 0.89, "grad_norm": 0.4376698136329651, "learning_rate": 0.00047818479829528495, "loss": 1.8445, "step": 26832 }, { "epoch": 0.89, "grad_norm": 0.4357442259788513, "learning_rate": 0.0004781763894337904, "loss": 1.8131, "step": 26833 }, { "epoch": 0.89, "grad_norm": 0.45194220542907715, "learning_rate": 0.0004781679803560159, "loss": 1.7752, "step": 26834 }, { "epoch": 0.89, "grad_norm": 0.4525424838066101, "learning_rate": 0.0004781595710619719, "loss": 1.7461, "step": 26835 }, { "epoch": 0.89, "grad_norm": 0.4399425685405731, "learning_rate": 0.0004781511615516682, "loss": 1.8386, "step": 26836 }, { "epoch": 0.89, "grad_norm": 0.4368995726108551, "learning_rate": 0.00047814275182511543, "loss": 1.8482, "step": 26837 }, { "epoch": 0.89, "grad_norm": 0.45126986503601074, "learning_rate": 0.0004781343418823235, "loss": 1.8745, "step": 26838 }, { "epoch": 0.89, "grad_norm": 0.42644721269607544, "learning_rate": 0.0004781259317233027, "loss": 1.7879, "step": 26839 }, { "epoch": 0.89, "grad_norm": 0.44550377130508423, "learning_rate": 0.0004781175213480634, "loss": 1.8867, "step": 26840 }, { "epoch": 0.89, "grad_norm": 0.4363739788532257, "learning_rate": 0.0004781091107566156, "loss": 1.8541, "step": 26841 }, { "epoch": 0.89, "grad_norm": 0.4446074664592743, "learning_rate": 0.0004781006999489696, "loss": 1.8391, "step": 26842 }, { "epoch": 0.89, "grad_norm": 0.44800955057144165, "learning_rate": 0.00047809228892513556, "loss": 1.8357, "step": 26843 }, { "epoch": 0.89, "grad_norm": 0.4492518901824951, "learning_rate": 0.0004780838776851237, "loss": 1.8743, "step": 26844 }, { "epoch": 0.89, "grad_norm": 0.4421235918998718, "learning_rate": 0.0004780754662289443, "loss": 1.7266, "step": 26845 }, { "epoch": 0.89, "grad_norm": 0.500495433807373, "learning_rate": 0.0004780670545566075, "loss": 1.8977, "step": 26846 }, { "epoch": 0.89, "grad_norm": 0.4444039463996887, "learning_rate": 0.0004780586426681236, "loss": 1.822, "step": 26847 }, { "epoch": 0.89, "grad_norm": 0.4316568970680237, "learning_rate": 0.0004780502305635027, "loss": 1.7651, "step": 26848 }, { "epoch": 0.89, "grad_norm": 0.4663044512271881, "learning_rate": 0.000478041818242755, "loss": 1.8033, "step": 26849 }, { "epoch": 0.89, "grad_norm": 0.45668458938598633, "learning_rate": 0.00047803340570589075, "loss": 1.8816, "step": 26850 }, { "epoch": 0.89, "grad_norm": 0.4383331835269928, "learning_rate": 0.00047802499295292026, "loss": 1.8012, "step": 26851 }, { "epoch": 0.89, "grad_norm": 0.43360501527786255, "learning_rate": 0.00047801657998385364, "loss": 1.7693, "step": 26852 }, { "epoch": 0.89, "grad_norm": 0.4436742961406708, "learning_rate": 0.00047800816679870113, "loss": 1.824, "step": 26853 }, { "epoch": 0.89, "grad_norm": 0.43423283100128174, "learning_rate": 0.0004779997533974729, "loss": 1.8129, "step": 26854 }, { "epoch": 0.89, "grad_norm": 0.4406912326812744, "learning_rate": 0.0004779913397801792, "loss": 1.6563, "step": 26855 }, { "epoch": 0.89, "grad_norm": 0.46926814317703247, "learning_rate": 0.00047798292594683023, "loss": 1.8501, "step": 26856 }, { "epoch": 0.89, "grad_norm": 0.47615426778793335, "learning_rate": 0.0004779745118974363, "loss": 1.7688, "step": 26857 }, { "epoch": 0.89, "grad_norm": 0.42942532896995544, "learning_rate": 0.00047796609763200754, "loss": 1.7572, "step": 26858 }, { "epoch": 0.89, "grad_norm": 0.42854970693588257, "learning_rate": 0.00047795768315055397, "loss": 1.7228, "step": 26859 }, { "epoch": 0.89, "grad_norm": 0.4342982769012451, "learning_rate": 0.0004779492684530862, "loss": 1.8538, "step": 26860 }, { "epoch": 0.89, "grad_norm": 0.4665488004684448, "learning_rate": 0.00047794085353961414, "loss": 1.8108, "step": 26861 }, { "epoch": 0.89, "grad_norm": 0.47121894359588623, "learning_rate": 0.00047793243841014806, "loss": 1.8018, "step": 26862 }, { "epoch": 0.89, "grad_norm": 0.4296117424964905, "learning_rate": 0.00047792402306469833, "loss": 1.8316, "step": 26863 }, { "epoch": 0.89, "grad_norm": 0.42911484837532043, "learning_rate": 0.00047791560750327493, "loss": 1.8587, "step": 26864 }, { "epoch": 0.89, "grad_norm": 0.45904481410980225, "learning_rate": 0.0004779071917258883, "loss": 1.7944, "step": 26865 }, { "epoch": 0.89, "grad_norm": 0.445949524641037, "learning_rate": 0.0004778987757325485, "loss": 1.8045, "step": 26866 }, { "epoch": 0.89, "grad_norm": 0.4497191309928894, "learning_rate": 0.00047789035952326583, "loss": 1.8447, "step": 26867 }, { "epoch": 0.89, "grad_norm": 0.4416491389274597, "learning_rate": 0.00047788194309805043, "loss": 1.8192, "step": 26868 }, { "epoch": 0.89, "grad_norm": 0.4796496629714966, "learning_rate": 0.00047787352645691265, "loss": 1.8192, "step": 26869 }, { "epoch": 0.89, "grad_norm": 0.4467095136642456, "learning_rate": 0.0004778651095998625, "loss": 1.8335, "step": 26870 }, { "epoch": 0.89, "grad_norm": 0.4274492859840393, "learning_rate": 0.00047785669252691036, "loss": 1.8443, "step": 26871 }, { "epoch": 0.89, "grad_norm": 0.4564315676689148, "learning_rate": 0.0004778482752380664, "loss": 1.861, "step": 26872 }, { "epoch": 0.89, "grad_norm": 0.4458807408809662, "learning_rate": 0.0004778398577333409, "loss": 1.8418, "step": 26873 }, { "epoch": 0.89, "grad_norm": 0.4377767741680145, "learning_rate": 0.00047783144001274386, "loss": 1.8538, "step": 26874 }, { "epoch": 0.89, "grad_norm": 0.4264487624168396, "learning_rate": 0.00047782302207628584, "loss": 1.8152, "step": 26875 }, { "epoch": 0.89, "grad_norm": 0.4328078329563141, "learning_rate": 0.00047781460392397676, "loss": 1.7912, "step": 26876 }, { "epoch": 0.89, "grad_norm": 0.46352776885032654, "learning_rate": 0.000477806185555827, "loss": 1.7562, "step": 26877 }, { "epoch": 0.89, "grad_norm": 0.44148924946784973, "learning_rate": 0.00047779776697184673, "loss": 1.8036, "step": 26878 }, { "epoch": 0.89, "grad_norm": 0.4351658821105957, "learning_rate": 0.00047778934817204616, "loss": 1.7729, "step": 26879 }, { "epoch": 0.89, "grad_norm": 0.43425023555755615, "learning_rate": 0.00047778092915643546, "loss": 1.8691, "step": 26880 }, { "epoch": 0.89, "grad_norm": 0.43315890431404114, "learning_rate": 0.0004777725099250249, "loss": 1.7641, "step": 26881 }, { "epoch": 0.89, "grad_norm": 0.5055766105651855, "learning_rate": 0.0004777640904778248, "loss": 1.8207, "step": 26882 }, { "epoch": 0.89, "grad_norm": 0.4558619260787964, "learning_rate": 0.00047775567081484525, "loss": 1.8628, "step": 26883 }, { "epoch": 0.89, "grad_norm": 0.4513257145881653, "learning_rate": 0.0004777472509360966, "loss": 1.8128, "step": 26884 }, { "epoch": 0.89, "grad_norm": 0.4243708848953247, "learning_rate": 0.0004777388308415888, "loss": 1.7123, "step": 26885 }, { "epoch": 0.89, "grad_norm": 0.4768785834312439, "learning_rate": 0.0004777304105313323, "loss": 1.8552, "step": 26886 }, { "epoch": 0.89, "grad_norm": 0.4618303179740906, "learning_rate": 0.00047772199000533733, "loss": 1.8359, "step": 26887 }, { "epoch": 0.89, "grad_norm": 0.4401632249355316, "learning_rate": 0.00047771356926361395, "loss": 1.8428, "step": 26888 }, { "epoch": 0.89, "grad_norm": 0.43167296051979065, "learning_rate": 0.0004777051483061726, "loss": 1.8848, "step": 26889 }, { "epoch": 0.89, "grad_norm": 0.46421679854393005, "learning_rate": 0.00047769672713302334, "loss": 1.8017, "step": 26890 }, { "epoch": 0.89, "grad_norm": 0.45180171728134155, "learning_rate": 0.0004776883057441764, "loss": 1.8086, "step": 26891 }, { "epoch": 0.89, "grad_norm": 0.4618227481842041, "learning_rate": 0.00047767988413964215, "loss": 1.8879, "step": 26892 }, { "epoch": 0.89, "grad_norm": 0.4398973882198334, "learning_rate": 0.0004776714623194305, "loss": 1.9106, "step": 26893 }, { "epoch": 0.89, "grad_norm": 0.703470766544342, "learning_rate": 0.00047766304028355207, "loss": 1.8175, "step": 26894 }, { "epoch": 0.89, "grad_norm": 0.44883453845977783, "learning_rate": 0.00047765461803201683, "loss": 1.8341, "step": 26895 }, { "epoch": 0.89, "grad_norm": 0.43355849385261536, "learning_rate": 0.000477646195564835, "loss": 1.8341, "step": 26896 }, { "epoch": 0.89, "grad_norm": 0.44382980465888977, "learning_rate": 0.0004776377728820169, "loss": 1.8808, "step": 26897 }, { "epoch": 0.89, "grad_norm": 0.4360786974430084, "learning_rate": 0.0004776293499835727, "loss": 1.7781, "step": 26898 }, { "epoch": 0.89, "grad_norm": 0.44008055329322815, "learning_rate": 0.00047762092686951267, "loss": 1.8219, "step": 26899 }, { "epoch": 0.89, "grad_norm": 0.4507735073566437, "learning_rate": 0.000477612503539847, "loss": 1.7255, "step": 26900 }, { "epoch": 0.89, "grad_norm": 0.4778282642364502, "learning_rate": 0.0004776040799945859, "loss": 1.8043, "step": 26901 }, { "epoch": 0.9, "grad_norm": 0.43504098057746887, "learning_rate": 0.0004775956562337397, "loss": 1.8254, "step": 26902 }, { "epoch": 0.9, "grad_norm": 0.43555325269699097, "learning_rate": 0.0004775872322573183, "loss": 1.816, "step": 26903 }, { "epoch": 0.9, "grad_norm": 0.44358935952186584, "learning_rate": 0.00047757880806533246, "loss": 1.7987, "step": 26904 }, { "epoch": 0.9, "grad_norm": 0.45957764983177185, "learning_rate": 0.000477570383657792, "loss": 1.7992, "step": 26905 }, { "epoch": 0.9, "grad_norm": 0.44032394886016846, "learning_rate": 0.0004775619590347072, "loss": 1.7677, "step": 26906 }, { "epoch": 0.9, "grad_norm": 0.4640718996524811, "learning_rate": 0.0004775535341960884, "loss": 1.883, "step": 26907 }, { "epoch": 0.9, "grad_norm": 0.4406888484954834, "learning_rate": 0.00047754510914194576, "loss": 1.8098, "step": 26908 }, { "epoch": 0.9, "grad_norm": 0.45453134179115295, "learning_rate": 0.00047753668387228956, "loss": 1.7689, "step": 26909 }, { "epoch": 0.9, "grad_norm": 0.45047762989997864, "learning_rate": 0.00047752825838712996, "loss": 1.9023, "step": 26910 }, { "epoch": 0.9, "grad_norm": 0.4477294087409973, "learning_rate": 0.00047751983268647715, "loss": 1.8149, "step": 26911 }, { "epoch": 0.9, "grad_norm": 0.4498569369316101, "learning_rate": 0.0004775114067703415, "loss": 1.77, "step": 26912 }, { "epoch": 0.9, "grad_norm": 0.43856653571128845, "learning_rate": 0.00047750298063873307, "loss": 1.7404, "step": 26913 }, { "epoch": 0.9, "grad_norm": 0.45727118849754333, "learning_rate": 0.00047749455429166223, "loss": 1.8094, "step": 26914 }, { "epoch": 0.9, "grad_norm": 0.4515964984893799, "learning_rate": 0.00047748612772913917, "loss": 1.82, "step": 26915 }, { "epoch": 0.9, "grad_norm": 0.44322317838668823, "learning_rate": 0.00047747770095117413, "loss": 1.7994, "step": 26916 }, { "epoch": 0.9, "grad_norm": 0.4491058886051178, "learning_rate": 0.00047746927395777726, "loss": 1.8661, "step": 26917 }, { "epoch": 0.9, "grad_norm": 0.4718421697616577, "learning_rate": 0.0004774608467489588, "loss": 1.8016, "step": 26918 }, { "epoch": 0.9, "grad_norm": 0.4400438666343689, "learning_rate": 0.0004774524193247291, "loss": 1.8766, "step": 26919 }, { "epoch": 0.9, "grad_norm": 0.41278284788131714, "learning_rate": 0.0004774439916850983, "loss": 1.7905, "step": 26920 }, { "epoch": 0.9, "grad_norm": 0.4370683431625366, "learning_rate": 0.0004774355638300766, "loss": 1.7317, "step": 26921 }, { "epoch": 0.9, "grad_norm": 0.4652985632419586, "learning_rate": 0.0004774271357596742, "loss": 1.8998, "step": 26922 }, { "epoch": 0.9, "grad_norm": 0.4554325342178345, "learning_rate": 0.0004774187074739016, "loss": 1.7433, "step": 26923 }, { "epoch": 0.9, "grad_norm": 0.43935826420783997, "learning_rate": 0.0004774102789727687, "loss": 1.8026, "step": 26924 }, { "epoch": 0.9, "grad_norm": 0.4332732856273651, "learning_rate": 0.00047740185025628586, "loss": 1.8491, "step": 26925 }, { "epoch": 0.9, "grad_norm": 0.456540584564209, "learning_rate": 0.0004773934213244633, "loss": 1.8816, "step": 26926 }, { "epoch": 0.9, "grad_norm": 0.44262605905532837, "learning_rate": 0.00047738499217731125, "loss": 1.7403, "step": 26927 }, { "epoch": 0.9, "grad_norm": 0.4459784924983978, "learning_rate": 0.00047737656281484004, "loss": 1.826, "step": 26928 }, { "epoch": 0.9, "grad_norm": 0.4481988251209259, "learning_rate": 0.0004773681332370598, "loss": 1.779, "step": 26929 }, { "epoch": 0.9, "grad_norm": 0.4530024528503418, "learning_rate": 0.0004773597034439808, "loss": 1.7203, "step": 26930 }, { "epoch": 0.9, "grad_norm": 0.7656044363975525, "learning_rate": 0.00047735127343561317, "loss": 1.8188, "step": 26931 }, { "epoch": 0.9, "grad_norm": 0.4292765259742737, "learning_rate": 0.0004773428432119673, "loss": 1.7949, "step": 26932 }, { "epoch": 0.9, "grad_norm": 0.4372327923774719, "learning_rate": 0.00047733441277305325, "loss": 1.7892, "step": 26933 }, { "epoch": 0.9, "grad_norm": 0.4238860011100769, "learning_rate": 0.00047732598211888146, "loss": 1.7749, "step": 26934 }, { "epoch": 0.9, "grad_norm": 0.4239007532596588, "learning_rate": 0.000477317551249462, "loss": 1.8149, "step": 26935 }, { "epoch": 0.9, "grad_norm": 0.459073543548584, "learning_rate": 0.0004773091201648051, "loss": 1.8495, "step": 26936 }, { "epoch": 0.9, "grad_norm": 0.46505069732666016, "learning_rate": 0.00047730068886492116, "loss": 1.7786, "step": 26937 }, { "epoch": 0.9, "grad_norm": 0.4338739514350891, "learning_rate": 0.0004772922573498203, "loss": 1.7745, "step": 26938 }, { "epoch": 0.9, "grad_norm": 0.431196004152298, "learning_rate": 0.0004772838256195127, "loss": 1.842, "step": 26939 }, { "epoch": 0.9, "grad_norm": 0.4395065903663635, "learning_rate": 0.0004772753936740087, "loss": 1.8021, "step": 26940 }, { "epoch": 0.9, "grad_norm": 0.45909664034843445, "learning_rate": 0.0004772669615133185, "loss": 1.8243, "step": 26941 }, { "epoch": 0.9, "grad_norm": 0.449103981256485, "learning_rate": 0.0004772585291374523, "loss": 1.8557, "step": 26942 }, { "epoch": 0.9, "grad_norm": 0.4409690797328949, "learning_rate": 0.00047725009654642035, "loss": 1.7016, "step": 26943 }, { "epoch": 0.9, "grad_norm": 0.45276308059692383, "learning_rate": 0.00047724166374023296, "loss": 1.7866, "step": 26944 }, { "epoch": 0.9, "grad_norm": 0.44768303632736206, "learning_rate": 0.00047723323071890023, "loss": 1.8294, "step": 26945 }, { "epoch": 0.9, "grad_norm": 0.45741188526153564, "learning_rate": 0.0004772247974824325, "loss": 1.8012, "step": 26946 }, { "epoch": 0.9, "grad_norm": 0.4462010860443115, "learning_rate": 0.00047721636403084, "loss": 1.7462, "step": 26947 }, { "epoch": 0.9, "grad_norm": 0.4494573771953583, "learning_rate": 0.000477207930364133, "loss": 1.7556, "step": 26948 }, { "epoch": 0.9, "grad_norm": 0.4779706299304962, "learning_rate": 0.0004771994964823216, "loss": 1.8125, "step": 26949 }, { "epoch": 0.9, "grad_norm": 0.4844100773334503, "learning_rate": 0.00047719106238541613, "loss": 1.8533, "step": 26950 }, { "epoch": 0.9, "grad_norm": 0.45378246903419495, "learning_rate": 0.00047718262807342675, "loss": 1.7807, "step": 26951 }, { "epoch": 0.9, "grad_norm": 0.45080316066741943, "learning_rate": 0.0004771741935463639, "loss": 1.7849, "step": 26952 }, { "epoch": 0.9, "grad_norm": 0.4487479329109192, "learning_rate": 0.00047716575880423764, "loss": 1.8247, "step": 26953 }, { "epoch": 0.9, "grad_norm": 0.43899911642074585, "learning_rate": 0.00047715732384705825, "loss": 1.8223, "step": 26954 }, { "epoch": 0.9, "grad_norm": 0.46498799324035645, "learning_rate": 0.0004771488886748359, "loss": 1.8587, "step": 26955 }, { "epoch": 0.9, "grad_norm": 0.44677218794822693, "learning_rate": 0.00047714045328758106, "loss": 1.8649, "step": 26956 }, { "epoch": 0.9, "grad_norm": 0.4240577518939972, "learning_rate": 0.00047713201768530375, "loss": 1.8592, "step": 26957 }, { "epoch": 0.9, "grad_norm": 0.431924045085907, "learning_rate": 0.00047712358186801414, "loss": 1.8232, "step": 26958 }, { "epoch": 0.9, "grad_norm": 0.4605801999568939, "learning_rate": 0.0004771151458357227, "loss": 1.7213, "step": 26959 }, { "epoch": 0.9, "grad_norm": 0.4386376142501831, "learning_rate": 0.00047710670958843953, "loss": 1.8202, "step": 26960 }, { "epoch": 0.9, "grad_norm": 0.4339390993118286, "learning_rate": 0.00047709827312617496, "loss": 1.8387, "step": 26961 }, { "epoch": 0.9, "grad_norm": 0.4433883726596832, "learning_rate": 0.00047708983644893923, "loss": 1.7772, "step": 26962 }, { "epoch": 0.9, "grad_norm": 0.46444863080978394, "learning_rate": 0.0004770813995567424, "loss": 1.7747, "step": 26963 }, { "epoch": 0.9, "grad_norm": 0.43777137994766235, "learning_rate": 0.00047707296244959494, "loss": 1.8009, "step": 26964 }, { "epoch": 0.9, "grad_norm": 0.4395953416824341, "learning_rate": 0.000477064525127507, "loss": 1.9327, "step": 26965 }, { "epoch": 0.9, "grad_norm": 0.43298590183258057, "learning_rate": 0.00047705608759048874, "loss": 1.8242, "step": 26966 }, { "epoch": 0.9, "grad_norm": 0.44673261046409607, "learning_rate": 0.0004770476498385505, "loss": 1.8662, "step": 26967 }, { "epoch": 0.9, "grad_norm": 0.49360379576683044, "learning_rate": 0.0004770392118717025, "loss": 1.8817, "step": 26968 }, { "epoch": 0.9, "grad_norm": 0.4756205081939697, "learning_rate": 0.000477030773689955, "loss": 1.8523, "step": 26969 }, { "epoch": 0.9, "grad_norm": 0.4584525525569916, "learning_rate": 0.00047702233529331824, "loss": 1.8132, "step": 26970 }, { "epoch": 0.9, "grad_norm": 0.4644396901130676, "learning_rate": 0.0004770138966818024, "loss": 1.6782, "step": 26971 }, { "epoch": 0.9, "grad_norm": 0.43852245807647705, "learning_rate": 0.0004770054578554179, "loss": 1.8392, "step": 26972 }, { "epoch": 0.9, "grad_norm": 0.9872649312019348, "learning_rate": 0.00047699701881417467, "loss": 1.8359, "step": 26973 }, { "epoch": 0.9, "grad_norm": 0.42439544200897217, "learning_rate": 0.00047698857955808324, "loss": 1.7574, "step": 26974 }, { "epoch": 0.9, "grad_norm": 0.4331943988800049, "learning_rate": 0.0004769801400871538, "loss": 1.7965, "step": 26975 }, { "epoch": 0.9, "grad_norm": 0.4601749777793884, "learning_rate": 0.0004769717004013964, "loss": 1.813, "step": 26976 }, { "epoch": 0.9, "grad_norm": 0.4532168507575989, "learning_rate": 0.0004769632605008215, "loss": 1.758, "step": 26977 }, { "epoch": 0.9, "grad_norm": 0.43762943148612976, "learning_rate": 0.00047695482038543935, "loss": 1.8766, "step": 26978 }, { "epoch": 0.9, "grad_norm": 0.4341092109680176, "learning_rate": 0.00047694638005526, "loss": 1.767, "step": 26979 }, { "epoch": 0.9, "grad_norm": 0.4306184947490692, "learning_rate": 0.000476937939510294, "loss": 1.7543, "step": 26980 }, { "epoch": 0.9, "grad_norm": 0.4453778862953186, "learning_rate": 0.00047692949875055124, "loss": 1.777, "step": 26981 }, { "epoch": 0.9, "grad_norm": 0.4380783140659332, "learning_rate": 0.00047692105777604214, "loss": 1.8379, "step": 26982 }, { "epoch": 0.9, "grad_norm": 0.4541111886501312, "learning_rate": 0.000476912616586777, "loss": 1.7445, "step": 26983 }, { "epoch": 0.9, "grad_norm": 0.46451109647750854, "learning_rate": 0.00047690417518276597, "loss": 1.8128, "step": 26984 }, { "epoch": 0.9, "grad_norm": 0.44411981105804443, "learning_rate": 0.00047689573356401944, "loss": 1.7537, "step": 26985 }, { "epoch": 0.9, "grad_norm": 0.4652353823184967, "learning_rate": 0.00047688729173054744, "loss": 1.7761, "step": 26986 }, { "epoch": 0.9, "grad_norm": 0.44903984665870667, "learning_rate": 0.0004768788496823604, "loss": 1.7288, "step": 26987 }, { "epoch": 0.9, "grad_norm": 0.4464606046676636, "learning_rate": 0.00047687040741946845, "loss": 1.7855, "step": 26988 }, { "epoch": 0.9, "grad_norm": 0.44261065125465393, "learning_rate": 0.00047686196494188196, "loss": 1.7772, "step": 26989 }, { "epoch": 0.9, "grad_norm": 0.4365536868572235, "learning_rate": 0.000476853522249611, "loss": 1.8074, "step": 26990 }, { "epoch": 0.9, "grad_norm": 0.43013185262680054, "learning_rate": 0.000476845079342666, "loss": 1.8091, "step": 26991 }, { "epoch": 0.9, "grad_norm": 0.4347594082355499, "learning_rate": 0.0004768366362210571, "loss": 1.7595, "step": 26992 }, { "epoch": 0.9, "grad_norm": 0.44103121757507324, "learning_rate": 0.0004768281928847946, "loss": 1.8228, "step": 26993 }, { "epoch": 0.9, "grad_norm": 0.45402681827545166, "learning_rate": 0.00047681974933388874, "loss": 1.8189, "step": 26994 }, { "epoch": 0.9, "grad_norm": 0.44044163823127747, "learning_rate": 0.0004768113055683497, "loss": 1.7509, "step": 26995 }, { "epoch": 0.9, "grad_norm": 0.435086727142334, "learning_rate": 0.0004768028615881878, "loss": 1.7717, "step": 26996 }, { "epoch": 0.9, "grad_norm": 0.44679731130599976, "learning_rate": 0.00047679441739341335, "loss": 1.8083, "step": 26997 }, { "epoch": 0.9, "grad_norm": 0.4493691623210907, "learning_rate": 0.00047678597298403635, "loss": 1.8742, "step": 26998 }, { "epoch": 0.9, "grad_norm": 0.441866934299469, "learning_rate": 0.0004767775283600674, "loss": 1.8335, "step": 26999 }, { "epoch": 0.9, "grad_norm": 0.4400682747364044, "learning_rate": 0.00047676908352151657, "loss": 1.7765, "step": 27000 }, { "epoch": 0.9, "grad_norm": 0.4319046139717102, "learning_rate": 0.00047676063846839405, "loss": 1.8443, "step": 27001 }, { "epoch": 0.9, "grad_norm": 0.4399415850639343, "learning_rate": 0.0004767521932007102, "loss": 1.8289, "step": 27002 }, { "epoch": 0.9, "grad_norm": 0.43887603282928467, "learning_rate": 0.00047674374771847526, "loss": 1.8231, "step": 27003 }, { "epoch": 0.9, "grad_norm": 0.44925278425216675, "learning_rate": 0.00047673530202169937, "loss": 1.8719, "step": 27004 }, { "epoch": 0.9, "grad_norm": 0.45036202669143677, "learning_rate": 0.0004767268561103928, "loss": 1.7912, "step": 27005 }, { "epoch": 0.9, "grad_norm": 0.4477227032184601, "learning_rate": 0.00047671840998456604, "loss": 1.8384, "step": 27006 }, { "epoch": 0.9, "grad_norm": 0.43947383761405945, "learning_rate": 0.0004767099636442291, "loss": 1.7938, "step": 27007 }, { "epoch": 0.9, "grad_norm": 0.44525614380836487, "learning_rate": 0.00047670151708939225, "loss": 1.8463, "step": 27008 }, { "epoch": 0.9, "grad_norm": 0.44163477420806885, "learning_rate": 0.0004766930703200659, "loss": 1.7728, "step": 27009 }, { "epoch": 0.9, "grad_norm": 0.44801467657089233, "learning_rate": 0.00047668462333626005, "loss": 1.7964, "step": 27010 }, { "epoch": 0.9, "grad_norm": 0.44593408703804016, "learning_rate": 0.00047667617613798526, "loss": 1.7976, "step": 27011 }, { "epoch": 0.9, "grad_norm": 0.4218394160270691, "learning_rate": 0.0004766677287252516, "loss": 1.8078, "step": 27012 }, { "epoch": 0.9, "grad_norm": 0.45555374026298523, "learning_rate": 0.00047665928109806924, "loss": 1.8236, "step": 27013 }, { "epoch": 0.9, "grad_norm": 0.44169488549232483, "learning_rate": 0.0004766508332564486, "loss": 1.8664, "step": 27014 }, { "epoch": 0.9, "grad_norm": 0.42393481731414795, "learning_rate": 0.00047664238520039984, "loss": 1.8083, "step": 27015 }, { "epoch": 0.9, "grad_norm": 0.43096455931663513, "learning_rate": 0.0004766339369299333, "loss": 1.7153, "step": 27016 }, { "epoch": 0.9, "grad_norm": 0.4260355830192566, "learning_rate": 0.0004766254884450592, "loss": 1.8883, "step": 27017 }, { "epoch": 0.9, "grad_norm": 0.4311201274394989, "learning_rate": 0.0004766170397457878, "loss": 1.7905, "step": 27018 }, { "epoch": 0.9, "grad_norm": 0.44411808252334595, "learning_rate": 0.0004766085908321292, "loss": 1.8656, "step": 27019 }, { "epoch": 0.9, "grad_norm": 0.4778737723827362, "learning_rate": 0.00047660014170409386, "loss": 1.8494, "step": 27020 }, { "epoch": 0.9, "grad_norm": 0.4406152367591858, "learning_rate": 0.00047659169236169207, "loss": 1.8646, "step": 27021 }, { "epoch": 0.9, "grad_norm": 0.4297514855861664, "learning_rate": 0.0004765832428049339, "loss": 1.8099, "step": 27022 }, { "epoch": 0.9, "grad_norm": 0.4274750351905823, "learning_rate": 0.00047657479303382975, "loss": 1.8178, "step": 27023 }, { "epoch": 0.9, "grad_norm": 0.43034303188323975, "learning_rate": 0.0004765663430483897, "loss": 1.7728, "step": 27024 }, { "epoch": 0.9, "grad_norm": 0.46128958463668823, "learning_rate": 0.00047655789284862426, "loss": 1.7915, "step": 27025 }, { "epoch": 0.9, "grad_norm": 0.4382883608341217, "learning_rate": 0.0004765494424345435, "loss": 1.8072, "step": 27026 }, { "epoch": 0.9, "grad_norm": 0.4373353123664856, "learning_rate": 0.0004765409918061577, "loss": 1.7254, "step": 27027 }, { "epoch": 0.9, "grad_norm": 0.4637100398540497, "learning_rate": 0.00047653254096347714, "loss": 1.8286, "step": 27028 }, { "epoch": 0.9, "grad_norm": 0.4514220654964447, "learning_rate": 0.0004765240899065121, "loss": 1.8176, "step": 27029 }, { "epoch": 0.9, "grad_norm": 0.44655299186706543, "learning_rate": 0.00047651563863527287, "loss": 1.8068, "step": 27030 }, { "epoch": 0.9, "grad_norm": 0.4404121935367584, "learning_rate": 0.0004765071871497696, "loss": 1.7611, "step": 27031 }, { "epoch": 0.9, "grad_norm": 0.46888604760169983, "learning_rate": 0.0004764987354500127, "loss": 1.903, "step": 27032 }, { "epoch": 0.9, "grad_norm": 0.43493807315826416, "learning_rate": 0.0004764902835360123, "loss": 1.8248, "step": 27033 }, { "epoch": 0.9, "grad_norm": 0.4849605858325958, "learning_rate": 0.0004764818314077787, "loss": 1.8106, "step": 27034 }, { "epoch": 0.9, "grad_norm": 0.5361146330833435, "learning_rate": 0.00047647337906532204, "loss": 1.7609, "step": 27035 }, { "epoch": 0.9, "grad_norm": 0.45268774032592773, "learning_rate": 0.00047646492650865285, "loss": 1.8119, "step": 27036 }, { "epoch": 0.9, "grad_norm": 0.43404048681259155, "learning_rate": 0.0004764564737377812, "loss": 1.7941, "step": 27037 }, { "epoch": 0.9, "grad_norm": 0.44000244140625, "learning_rate": 0.0004764480207527174, "loss": 1.8386, "step": 27038 }, { "epoch": 0.9, "grad_norm": 0.45616456866264343, "learning_rate": 0.00047643956755347166, "loss": 1.7547, "step": 27039 }, { "epoch": 0.9, "grad_norm": 0.42863157391548157, "learning_rate": 0.0004764311141400543, "loss": 1.7963, "step": 27040 }, { "epoch": 0.9, "grad_norm": 0.4453134834766388, "learning_rate": 0.00047642266051247566, "loss": 1.8388, "step": 27041 }, { "epoch": 0.9, "grad_norm": 0.44875258207321167, "learning_rate": 0.00047641420667074577, "loss": 1.8425, "step": 27042 }, { "epoch": 0.9, "grad_norm": 0.44173142313957214, "learning_rate": 0.00047640575261487505, "loss": 1.9156, "step": 27043 }, { "epoch": 0.9, "grad_norm": 0.4543093740940094, "learning_rate": 0.00047639729834487375, "loss": 1.7921, "step": 27044 }, { "epoch": 0.9, "grad_norm": 0.43663641810417175, "learning_rate": 0.0004763888438607521, "loss": 1.8427, "step": 27045 }, { "epoch": 0.9, "grad_norm": 0.43661826848983765, "learning_rate": 0.00047638038916252045, "loss": 1.8364, "step": 27046 }, { "epoch": 0.9, "grad_norm": 0.4374297559261322, "learning_rate": 0.000476371934250189, "loss": 1.8994, "step": 27047 }, { "epoch": 0.9, "grad_norm": 0.6648964881896973, "learning_rate": 0.000476363479123768, "loss": 1.8779, "step": 27048 }, { "epoch": 0.9, "grad_norm": 0.45061877369880676, "learning_rate": 0.0004763550237832677, "loss": 1.8758, "step": 27049 }, { "epoch": 0.9, "grad_norm": 0.42880120873451233, "learning_rate": 0.00047634656822869825, "loss": 1.8226, "step": 27050 }, { "epoch": 0.9, "grad_norm": 0.45652177929878235, "learning_rate": 0.0004763381124600703, "loss": 1.8142, "step": 27051 }, { "epoch": 0.9, "grad_norm": 0.4499659836292267, "learning_rate": 0.0004763296564773937, "loss": 1.7423, "step": 27052 }, { "epoch": 0.9, "grad_norm": 0.4468036890029907, "learning_rate": 0.0004763212002806789, "loss": 1.8296, "step": 27053 }, { "epoch": 0.9, "grad_norm": 0.4471704065799713, "learning_rate": 0.0004763127438699362, "loss": 1.8185, "step": 27054 }, { "epoch": 0.9, "grad_norm": 0.4536815583705902, "learning_rate": 0.0004763042872451757, "loss": 1.8154, "step": 27055 }, { "epoch": 0.9, "grad_norm": 0.4361201524734497, "learning_rate": 0.00047629583040640787, "loss": 1.8356, "step": 27056 }, { "epoch": 0.9, "grad_norm": 0.45192718505859375, "learning_rate": 0.00047628737335364286, "loss": 1.7717, "step": 27057 }, { "epoch": 0.9, "grad_norm": 0.45133456587791443, "learning_rate": 0.000476278916086891, "loss": 1.7702, "step": 27058 }, { "epoch": 0.9, "grad_norm": 0.4494185745716095, "learning_rate": 0.0004762704586061624, "loss": 1.9434, "step": 27059 }, { "epoch": 0.9, "grad_norm": 0.44675758481025696, "learning_rate": 0.00047626200091146744, "loss": 1.8225, "step": 27060 }, { "epoch": 0.9, "grad_norm": 0.4312960207462311, "learning_rate": 0.00047625354300281653, "loss": 1.77, "step": 27061 }, { "epoch": 0.9, "grad_norm": 0.44104528427124023, "learning_rate": 0.0004762450848802197, "loss": 1.8431, "step": 27062 }, { "epoch": 0.9, "grad_norm": 0.46821436285972595, "learning_rate": 0.00047623662654368716, "loss": 1.7916, "step": 27063 }, { "epoch": 0.9, "grad_norm": 0.42755189538002014, "learning_rate": 0.0004762281679932295, "loss": 1.851, "step": 27064 }, { "epoch": 0.9, "grad_norm": 0.43946048617362976, "learning_rate": 0.00047621970922885685, "loss": 1.8372, "step": 27065 }, { "epoch": 0.9, "grad_norm": 0.5124071836471558, "learning_rate": 0.00047621125025057925, "loss": 1.8372, "step": 27066 }, { "epoch": 0.9, "grad_norm": 0.4454263150691986, "learning_rate": 0.0004762027910584073, "loss": 1.8083, "step": 27067 }, { "epoch": 0.9, "grad_norm": 0.6074842214584351, "learning_rate": 0.0004761943316523511, "loss": 1.8378, "step": 27068 }, { "epoch": 0.9, "grad_norm": 0.4449879229068756, "learning_rate": 0.000476185872032421, "loss": 1.7341, "step": 27069 }, { "epoch": 0.9, "grad_norm": 0.4451087415218353, "learning_rate": 0.0004761774121986271, "loss": 1.8123, "step": 27070 }, { "epoch": 0.9, "grad_norm": 0.42788511514663696, "learning_rate": 0.00047616895215097976, "loss": 1.7368, "step": 27071 }, { "epoch": 0.9, "grad_norm": 0.4587486982345581, "learning_rate": 0.00047616049188948937, "loss": 1.8192, "step": 27072 }, { "epoch": 0.9, "grad_norm": 0.44708722829818726, "learning_rate": 0.000476152031414166, "loss": 1.8155, "step": 27073 }, { "epoch": 0.9, "grad_norm": 0.4648095965385437, "learning_rate": 0.00047614357072502014, "loss": 1.7854, "step": 27074 }, { "epoch": 0.9, "grad_norm": 0.43961694836616516, "learning_rate": 0.0004761351098220618, "loss": 1.8451, "step": 27075 }, { "epoch": 0.9, "grad_norm": 0.4412182569503784, "learning_rate": 0.00047612664870530155, "loss": 1.8155, "step": 27076 }, { "epoch": 0.9, "grad_norm": 0.46168267726898193, "learning_rate": 0.0004761181873747494, "loss": 1.8465, "step": 27077 }, { "epoch": 0.9, "grad_norm": 0.43433815240859985, "learning_rate": 0.0004761097258304157, "loss": 1.7791, "step": 27078 }, { "epoch": 0.9, "grad_norm": 0.4527719020843506, "learning_rate": 0.00047610126407231077, "loss": 1.8343, "step": 27079 }, { "epoch": 0.9, "grad_norm": 0.4412299692630768, "learning_rate": 0.00047609280210044483, "loss": 1.7872, "step": 27080 }, { "epoch": 0.9, "grad_norm": 0.4330297112464905, "learning_rate": 0.00047608433991482826, "loss": 1.7487, "step": 27081 }, { "epoch": 0.9, "grad_norm": 0.4378931522369385, "learning_rate": 0.0004760758775154712, "loss": 1.7966, "step": 27082 }, { "epoch": 0.9, "grad_norm": 0.4607894718647003, "learning_rate": 0.00047606741490238393, "loss": 1.7903, "step": 27083 }, { "epoch": 0.9, "grad_norm": 0.44863489270210266, "learning_rate": 0.00047605895207557676, "loss": 1.7936, "step": 27084 }, { "epoch": 0.9, "grad_norm": 0.44307735562324524, "learning_rate": 0.00047605048903506, "loss": 1.8308, "step": 27085 }, { "epoch": 0.9, "grad_norm": 0.43392375111579895, "learning_rate": 0.0004760420257808439, "loss": 1.8115, "step": 27086 }, { "epoch": 0.9, "grad_norm": 0.4293842017650604, "learning_rate": 0.0004760335623129387, "loss": 1.7909, "step": 27087 }, { "epoch": 0.9, "grad_norm": 0.47110527753829956, "learning_rate": 0.0004760250986313547, "loss": 1.7374, "step": 27088 }, { "epoch": 0.9, "grad_norm": 0.4524575173854828, "learning_rate": 0.0004760166347361021, "loss": 1.8249, "step": 27089 }, { "epoch": 0.9, "grad_norm": 0.4394429922103882, "learning_rate": 0.00047600817062719124, "loss": 1.8666, "step": 27090 }, { "epoch": 0.9, "grad_norm": 0.45968154072761536, "learning_rate": 0.0004759997063046325, "loss": 1.7764, "step": 27091 }, { "epoch": 0.9, "grad_norm": 0.4385060966014862, "learning_rate": 0.00047599124176843604, "loss": 1.8542, "step": 27092 }, { "epoch": 0.9, "grad_norm": 0.4267526865005493, "learning_rate": 0.000475982777018612, "loss": 1.7373, "step": 27093 }, { "epoch": 0.9, "grad_norm": 0.44741496443748474, "learning_rate": 0.00047597431205517103, "loss": 1.7823, "step": 27094 }, { "epoch": 0.9, "grad_norm": 0.43077361583709717, "learning_rate": 0.00047596584687812304, "loss": 1.7618, "step": 27095 }, { "epoch": 0.9, "grad_norm": 0.4378472566604614, "learning_rate": 0.00047595738148747843, "loss": 1.7611, "step": 27096 }, { "epoch": 0.9, "grad_norm": 0.44722843170166016, "learning_rate": 0.00047594891588324746, "loss": 1.7695, "step": 27097 }, { "epoch": 0.9, "grad_norm": 0.44199010729789734, "learning_rate": 0.0004759404500654405, "loss": 1.8037, "step": 27098 }, { "epoch": 0.9, "grad_norm": 0.48064112663269043, "learning_rate": 0.0004759319840340677, "loss": 1.7864, "step": 27099 }, { "epoch": 0.9, "grad_norm": 0.4436090886592865, "learning_rate": 0.00047592351778913937, "loss": 1.7901, "step": 27100 }, { "epoch": 0.9, "grad_norm": 0.45145025849342346, "learning_rate": 0.00047591505133066594, "loss": 1.7549, "step": 27101 }, { "epoch": 0.9, "grad_norm": 0.46410325169563293, "learning_rate": 0.00047590658465865743, "loss": 1.826, "step": 27102 }, { "epoch": 0.9, "grad_norm": 0.4453669488430023, "learning_rate": 0.00047589811777312436, "loss": 1.8881, "step": 27103 }, { "epoch": 0.9, "grad_norm": 0.4675082564353943, "learning_rate": 0.0004758896506740768, "loss": 1.8983, "step": 27104 }, { "epoch": 0.9, "grad_norm": 0.4427019953727722, "learning_rate": 0.00047588118336152507, "loss": 1.8064, "step": 27105 }, { "epoch": 0.9, "grad_norm": 0.4491610527038574, "learning_rate": 0.00047587271583547966, "loss": 1.9058, "step": 27106 }, { "epoch": 0.9, "grad_norm": 0.46014294028282166, "learning_rate": 0.00047586424809595057, "loss": 1.8485, "step": 27107 }, { "epoch": 0.9, "grad_norm": 0.4428707957267761, "learning_rate": 0.00047585578014294825, "loss": 1.8118, "step": 27108 }, { "epoch": 0.9, "grad_norm": 0.4608764052391052, "learning_rate": 0.0004758473119764829, "loss": 1.8003, "step": 27109 }, { "epoch": 0.9, "grad_norm": 0.43592143058776855, "learning_rate": 0.00047583884359656475, "loss": 1.7691, "step": 27110 }, { "epoch": 0.9, "grad_norm": 0.44808244705200195, "learning_rate": 0.0004758303750032043, "loss": 1.803, "step": 27111 }, { "epoch": 0.9, "grad_norm": 0.43768930435180664, "learning_rate": 0.00047582190619641155, "loss": 1.8434, "step": 27112 }, { "epoch": 0.9, "grad_norm": 0.4479902684688568, "learning_rate": 0.000475813437176197, "loss": 1.7891, "step": 27113 }, { "epoch": 0.9, "grad_norm": 0.4392714500427246, "learning_rate": 0.00047580496794257085, "loss": 1.8209, "step": 27114 }, { "epoch": 0.9, "grad_norm": 0.43522635102272034, "learning_rate": 0.00047579649849554325, "loss": 1.856, "step": 27115 }, { "epoch": 0.9, "grad_norm": 0.4427100121974945, "learning_rate": 0.0004757880288351247, "loss": 1.8535, "step": 27116 }, { "epoch": 0.9, "grad_norm": 0.4402705729007721, "learning_rate": 0.0004757795589613254, "loss": 1.8301, "step": 27117 }, { "epoch": 0.9, "grad_norm": 0.45284798741340637, "learning_rate": 0.00047577108887415553, "loss": 1.829, "step": 27118 }, { "epoch": 0.9, "grad_norm": 0.4546472728252411, "learning_rate": 0.0004757626185736256, "loss": 1.7832, "step": 27119 }, { "epoch": 0.9, "grad_norm": 0.4368292987346649, "learning_rate": 0.0004757541480597456, "loss": 1.8471, "step": 27120 }, { "epoch": 0.9, "grad_norm": 0.4455997943878174, "learning_rate": 0.000475745677332526, "loss": 1.8686, "step": 27121 }, { "epoch": 0.9, "grad_norm": 0.4390876591205597, "learning_rate": 0.00047573720639197713, "loss": 1.8425, "step": 27122 }, { "epoch": 0.9, "grad_norm": 0.4615291953086853, "learning_rate": 0.0004757287352381091, "loss": 1.9009, "step": 27123 }, { "epoch": 0.9, "grad_norm": 0.4341494143009186, "learning_rate": 0.00047572026387093227, "loss": 1.8345, "step": 27124 }, { "epoch": 0.9, "grad_norm": 0.45812752842903137, "learning_rate": 0.00047571179229045695, "loss": 1.8391, "step": 27125 }, { "epoch": 0.9, "grad_norm": 0.45327502489089966, "learning_rate": 0.0004757033204966934, "loss": 1.7574, "step": 27126 }, { "epoch": 0.9, "grad_norm": 0.4538569748401642, "learning_rate": 0.00047569484848965194, "loss": 1.8561, "step": 27127 }, { "epoch": 0.9, "grad_norm": 0.4566488564014435, "learning_rate": 0.00047568637626934284, "loss": 1.833, "step": 27128 }, { "epoch": 0.9, "grad_norm": 0.44828981161117554, "learning_rate": 0.00047567790383577633, "loss": 1.7964, "step": 27129 }, { "epoch": 0.9, "grad_norm": 0.4232930839061737, "learning_rate": 0.00047566943118896264, "loss": 1.775, "step": 27130 }, { "epoch": 0.9, "grad_norm": 0.46387627720832825, "learning_rate": 0.0004756609583289123, "loss": 1.863, "step": 27131 }, { "epoch": 0.9, "grad_norm": 0.4520050883293152, "learning_rate": 0.00047565248525563534, "loss": 1.8513, "step": 27132 }, { "epoch": 0.9, "grad_norm": 0.44197002053260803, "learning_rate": 0.00047564401196914215, "loss": 1.7448, "step": 27133 }, { "epoch": 0.9, "grad_norm": 0.4417734146118164, "learning_rate": 0.00047563553846944305, "loss": 1.8297, "step": 27134 }, { "epoch": 0.9, "grad_norm": 0.4305324852466583, "learning_rate": 0.00047562706475654833, "loss": 1.8086, "step": 27135 }, { "epoch": 0.9, "grad_norm": 0.45729634165763855, "learning_rate": 0.0004756185908304682, "loss": 1.8287, "step": 27136 }, { "epoch": 0.9, "grad_norm": 0.4460291266441345, "learning_rate": 0.00047561011669121285, "loss": 1.8152, "step": 27137 }, { "epoch": 0.9, "grad_norm": 0.44299405813217163, "learning_rate": 0.0004756016423387928, "loss": 1.8163, "step": 27138 }, { "epoch": 0.9, "grad_norm": 0.42766422033309937, "learning_rate": 0.0004755931677732183, "loss": 1.7804, "step": 27139 }, { "epoch": 0.9, "grad_norm": 0.44134148955345154, "learning_rate": 0.00047558469299449944, "loss": 1.7738, "step": 27140 }, { "epoch": 0.9, "grad_norm": 0.4426897466182709, "learning_rate": 0.00047557621800264666, "loss": 1.8848, "step": 27141 }, { "epoch": 0.9, "grad_norm": 0.4460923373699188, "learning_rate": 0.0004755677427976703, "loss": 1.8605, "step": 27142 }, { "epoch": 0.9, "grad_norm": 0.4444785714149475, "learning_rate": 0.00047555926737958046, "loss": 1.8349, "step": 27143 }, { "epoch": 0.9, "grad_norm": 0.44692182540893555, "learning_rate": 0.00047555079174838765, "loss": 1.8437, "step": 27144 }, { "epoch": 0.9, "grad_norm": 0.4273863434791565, "learning_rate": 0.0004755423159041019, "loss": 1.862, "step": 27145 }, { "epoch": 0.9, "grad_norm": 0.43729937076568604, "learning_rate": 0.0004755338398467338, "loss": 1.7403, "step": 27146 }, { "epoch": 0.9, "grad_norm": 0.4298543334007263, "learning_rate": 0.00047552536357629334, "loss": 1.7933, "step": 27147 }, { "epoch": 0.9, "grad_norm": 0.4468386173248291, "learning_rate": 0.00047551688709279103, "loss": 1.8683, "step": 27148 }, { "epoch": 0.9, "grad_norm": 0.4367855191230774, "learning_rate": 0.00047550841039623715, "loss": 1.8078, "step": 27149 }, { "epoch": 0.9, "grad_norm": 0.46254780888557434, "learning_rate": 0.00047549993348664177, "loss": 1.7657, "step": 27150 }, { "epoch": 0.9, "grad_norm": 0.4364294707775116, "learning_rate": 0.0004754914563640154, "loss": 1.7516, "step": 27151 }, { "epoch": 0.9, "grad_norm": 0.43897634744644165, "learning_rate": 0.0004754829790283682, "loss": 1.8775, "step": 27152 }, { "epoch": 0.9, "grad_norm": 0.4735982120037079, "learning_rate": 0.0004754745014797106, "loss": 1.812, "step": 27153 }, { "epoch": 0.9, "grad_norm": 0.4511459171772003, "learning_rate": 0.0004754660237180528, "loss": 1.841, "step": 27154 }, { "epoch": 0.9, "grad_norm": 0.4302525818347931, "learning_rate": 0.00047545754574340504, "loss": 1.8066, "step": 27155 }, { "epoch": 0.9, "grad_norm": 0.4434243142604828, "learning_rate": 0.0004754490675557778, "loss": 1.8139, "step": 27156 }, { "epoch": 0.9, "grad_norm": 0.43050989508628845, "learning_rate": 0.0004754405891551811, "loss": 1.8005, "step": 27157 }, { "epoch": 0.9, "grad_norm": 0.4432223439216614, "learning_rate": 0.00047543211054162547, "loss": 1.7613, "step": 27158 }, { "epoch": 0.9, "grad_norm": 0.43894636631011963, "learning_rate": 0.0004754236317151211, "loss": 1.7744, "step": 27159 }, { "epoch": 0.9, "grad_norm": 0.43463653326034546, "learning_rate": 0.0004754151526756782, "loss": 1.7874, "step": 27160 }, { "epoch": 0.9, "grad_norm": 0.432695597410202, "learning_rate": 0.00047540667342330724, "loss": 1.7642, "step": 27161 }, { "epoch": 0.9, "grad_norm": 0.4608476758003235, "learning_rate": 0.0004753981939580184, "loss": 1.7964, "step": 27162 }, { "epoch": 0.9, "grad_norm": 0.4687761962413788, "learning_rate": 0.000475389714279822, "loss": 1.7338, "step": 27163 }, { "epoch": 0.9, "grad_norm": 0.46960732340812683, "learning_rate": 0.0004753812343887284, "loss": 1.8364, "step": 27164 }, { "epoch": 0.9, "grad_norm": 0.4439689517021179, "learning_rate": 0.0004753727542847476, "loss": 1.8572, "step": 27165 }, { "epoch": 0.9, "grad_norm": 0.4573192596435547, "learning_rate": 0.00047536427396789035, "loss": 1.8564, "step": 27166 }, { "epoch": 0.9, "grad_norm": 0.4538741707801819, "learning_rate": 0.0004753557934381666, "loss": 1.8491, "step": 27167 }, { "epoch": 0.9, "grad_norm": 0.43818408250808716, "learning_rate": 0.00047534731269558675, "loss": 1.8309, "step": 27168 }, { "epoch": 0.9, "grad_norm": 0.43785151839256287, "learning_rate": 0.00047533883174016124, "loss": 1.8706, "step": 27169 }, { "epoch": 0.9, "grad_norm": 0.44675949215888977, "learning_rate": 0.0004753303505719, "loss": 1.8116, "step": 27170 }, { "epoch": 0.9, "grad_norm": 0.42528825998306274, "learning_rate": 0.0004753218691908137, "loss": 1.8159, "step": 27171 }, { "epoch": 0.9, "grad_norm": 0.42696309089660645, "learning_rate": 0.0004753133875969124, "loss": 1.8546, "step": 27172 }, { "epoch": 0.9, "grad_norm": 0.4325152635574341, "learning_rate": 0.0004753049057902066, "loss": 1.8187, "step": 27173 }, { "epoch": 0.9, "grad_norm": 0.4403928220272064, "learning_rate": 0.00047529642377070635, "loss": 1.8719, "step": 27174 }, { "epoch": 0.9, "grad_norm": 0.4442511200904846, "learning_rate": 0.0004752879415384221, "loss": 1.7946, "step": 27175 }, { "epoch": 0.9, "grad_norm": 0.43641024827957153, "learning_rate": 0.0004752794590933642, "loss": 1.7829, "step": 27176 }, { "epoch": 0.9, "grad_norm": 0.4421849250793457, "learning_rate": 0.00047527097643554274, "loss": 1.8168, "step": 27177 }, { "epoch": 0.9, "grad_norm": 0.44608184695243835, "learning_rate": 0.0004752624935649682, "loss": 1.8559, "step": 27178 }, { "epoch": 0.9, "grad_norm": 0.4249175488948822, "learning_rate": 0.0004752540104816508, "loss": 1.7394, "step": 27179 }, { "epoch": 0.9, "grad_norm": 0.45167115330696106, "learning_rate": 0.0004752455271856008, "loss": 1.7466, "step": 27180 }, { "epoch": 0.9, "grad_norm": 0.45562827587127686, "learning_rate": 0.00047523704367682864, "loss": 1.9006, "step": 27181 }, { "epoch": 0.9, "grad_norm": 0.46376025676727295, "learning_rate": 0.00047522855995534453, "loss": 1.8896, "step": 27182 }, { "epoch": 0.9, "grad_norm": 0.4201977252960205, "learning_rate": 0.00047522007602115876, "loss": 1.8294, "step": 27183 }, { "epoch": 0.9, "grad_norm": 0.4498387575149536, "learning_rate": 0.00047521159187428164, "loss": 1.8344, "step": 27184 }, { "epoch": 0.9, "grad_norm": 0.45334064960479736, "learning_rate": 0.0004752031075147234, "loss": 1.8464, "step": 27185 }, { "epoch": 0.9, "grad_norm": 0.4460013806819916, "learning_rate": 0.0004751946229424945, "loss": 1.8193, "step": 27186 }, { "epoch": 0.9, "grad_norm": 0.447822242975235, "learning_rate": 0.000475186138157605, "loss": 1.8559, "step": 27187 }, { "epoch": 0.9, "grad_norm": 0.4532719850540161, "learning_rate": 0.0004751776531600654, "loss": 1.8272, "step": 27188 }, { "epoch": 0.9, "grad_norm": 0.43307751417160034, "learning_rate": 0.000475169167949886, "loss": 1.8615, "step": 27189 }, { "epoch": 0.9, "grad_norm": 0.4561024010181427, "learning_rate": 0.000475160682527077, "loss": 1.8201, "step": 27190 }, { "epoch": 0.9, "grad_norm": 0.4540785849094391, "learning_rate": 0.00047515219689164877, "loss": 1.8098, "step": 27191 }, { "epoch": 0.9, "grad_norm": 0.446698933839798, "learning_rate": 0.00047514371104361144, "loss": 1.809, "step": 27192 }, { "epoch": 0.9, "grad_norm": 0.4446384012699127, "learning_rate": 0.0004751352249829757, "loss": 1.7727, "step": 27193 }, { "epoch": 0.9, "grad_norm": 0.441128134727478, "learning_rate": 0.0004751267387097514, "loss": 1.729, "step": 27194 }, { "epoch": 0.9, "grad_norm": 0.45208871364593506, "learning_rate": 0.0004751182522239491, "loss": 1.782, "step": 27195 }, { "epoch": 0.9, "grad_norm": 0.4474763870239258, "learning_rate": 0.0004751097655255791, "loss": 1.8791, "step": 27196 }, { "epoch": 0.9, "grad_norm": 0.4404069483280182, "learning_rate": 0.00047510127861465155, "loss": 1.8486, "step": 27197 }, { "epoch": 0.9, "grad_norm": 0.45263320207595825, "learning_rate": 0.00047509279149117695, "loss": 1.7743, "step": 27198 }, { "epoch": 0.9, "grad_norm": 0.4578206539154053, "learning_rate": 0.0004750843041551655, "loss": 1.8255, "step": 27199 }, { "epoch": 0.9, "grad_norm": 0.4327208995819092, "learning_rate": 0.00047507581660662736, "loss": 1.8435, "step": 27200 }, { "epoch": 0.9, "grad_norm": 0.4599144756793976, "learning_rate": 0.0004750673288455731, "loss": 1.8092, "step": 27201 }, { "epoch": 0.91, "grad_norm": 0.43465253710746765, "learning_rate": 0.0004750588408720128, "loss": 1.8492, "step": 27202 }, { "epoch": 0.91, "grad_norm": 0.4461325407028198, "learning_rate": 0.0004750503526859569, "loss": 1.7841, "step": 27203 }, { "epoch": 0.91, "grad_norm": 0.44858524203300476, "learning_rate": 0.0004750418642874158, "loss": 1.7888, "step": 27204 }, { "epoch": 0.91, "grad_norm": 0.6518182158470154, "learning_rate": 0.00047503337567639947, "loss": 1.8226, "step": 27205 }, { "epoch": 0.91, "grad_norm": 0.4512263834476471, "learning_rate": 0.0004750248868529185, "loss": 1.7714, "step": 27206 }, { "epoch": 0.91, "grad_norm": 0.4346071183681488, "learning_rate": 0.00047501639781698306, "loss": 1.828, "step": 27207 }, { "epoch": 0.91, "grad_norm": 0.43149229884147644, "learning_rate": 0.0004750079085686036, "loss": 1.8319, "step": 27208 }, { "epoch": 0.91, "grad_norm": 0.4396573007106781, "learning_rate": 0.0004749994191077903, "loss": 1.7729, "step": 27209 }, { "epoch": 0.91, "grad_norm": 0.4380577504634857, "learning_rate": 0.0004749909294345534, "loss": 1.8286, "step": 27210 }, { "epoch": 0.91, "grad_norm": 0.4351489245891571, "learning_rate": 0.00047498243954890345, "loss": 1.8079, "step": 27211 }, { "epoch": 0.91, "grad_norm": 0.447338342666626, "learning_rate": 0.0004749739494508504, "loss": 1.7413, "step": 27212 }, { "epoch": 0.91, "grad_norm": 0.4517301321029663, "learning_rate": 0.00047496545914040494, "loss": 1.8223, "step": 27213 }, { "epoch": 0.91, "grad_norm": 0.4401039183139801, "learning_rate": 0.00047495696861757705, "loss": 1.8017, "step": 27214 }, { "epoch": 0.91, "grad_norm": 0.4537113308906555, "learning_rate": 0.00047494847788237727, "loss": 1.8948, "step": 27215 }, { "epoch": 0.91, "grad_norm": 0.4311901926994324, "learning_rate": 0.00047493998693481573, "loss": 1.8304, "step": 27216 }, { "epoch": 0.91, "grad_norm": 0.43352609872817993, "learning_rate": 0.00047493149577490295, "loss": 1.7818, "step": 27217 }, { "epoch": 0.91, "grad_norm": 0.4397827684879303, "learning_rate": 0.000474923004402649, "loss": 1.8194, "step": 27218 }, { "epoch": 0.91, "grad_norm": 0.4391848146915436, "learning_rate": 0.0004749145128180644, "loss": 1.8574, "step": 27219 }, { "epoch": 0.91, "grad_norm": 0.43183737993240356, "learning_rate": 0.00047490602102115924, "loss": 1.7484, "step": 27220 }, { "epoch": 0.91, "grad_norm": 0.44813433289527893, "learning_rate": 0.00047489752901194395, "loss": 1.8597, "step": 27221 }, { "epoch": 0.91, "grad_norm": 0.4297390282154083, "learning_rate": 0.0004748890367904289, "loss": 1.8652, "step": 27222 }, { "epoch": 0.91, "grad_norm": 0.5034350752830505, "learning_rate": 0.00047488054435662435, "loss": 1.8799, "step": 27223 }, { "epoch": 0.91, "grad_norm": 0.4354007840156555, "learning_rate": 0.00047487205171054046, "loss": 1.7953, "step": 27224 }, { "epoch": 0.91, "grad_norm": 0.4293987452983856, "learning_rate": 0.0004748635588521878, "loss": 1.8742, "step": 27225 }, { "epoch": 0.91, "grad_norm": 0.4402201771736145, "learning_rate": 0.0004748550657815764, "loss": 1.8432, "step": 27226 }, { "epoch": 0.91, "grad_norm": 0.449200302362442, "learning_rate": 0.0004748465724987169, "loss": 1.7806, "step": 27227 }, { "epoch": 0.91, "grad_norm": 0.4347531795501709, "learning_rate": 0.00047483807900361924, "loss": 1.7999, "step": 27228 }, { "epoch": 0.91, "grad_norm": 0.4555707275867462, "learning_rate": 0.00047482958529629403, "loss": 1.8942, "step": 27229 }, { "epoch": 0.91, "grad_norm": 0.42530834674835205, "learning_rate": 0.0004748210913767514, "loss": 1.7722, "step": 27230 }, { "epoch": 0.91, "grad_norm": 0.4554974436759949, "learning_rate": 0.00047481259724500185, "loss": 1.8432, "step": 27231 }, { "epoch": 0.91, "grad_norm": 0.4417150914669037, "learning_rate": 0.0004748041029010554, "loss": 1.8111, "step": 27232 }, { "epoch": 0.91, "grad_norm": 0.45225095748901367, "learning_rate": 0.0004747956083449226, "loss": 1.7677, "step": 27233 }, { "epoch": 0.91, "grad_norm": 0.44672298431396484, "learning_rate": 0.00047478711357661367, "loss": 1.8223, "step": 27234 }, { "epoch": 0.91, "grad_norm": 0.4414311349391937, "learning_rate": 0.0004747786185961389, "loss": 1.8891, "step": 27235 }, { "epoch": 0.91, "grad_norm": 0.4358684718608856, "learning_rate": 0.0004747701234035087, "loss": 1.8052, "step": 27236 }, { "epoch": 0.91, "grad_norm": 0.46383655071258545, "learning_rate": 0.00047476162799873334, "loss": 1.7927, "step": 27237 }, { "epoch": 0.91, "grad_norm": 0.4411894977092743, "learning_rate": 0.0004747531323818231, "loss": 1.7782, "step": 27238 }, { "epoch": 0.91, "grad_norm": 0.44912758469581604, "learning_rate": 0.00047474463655278827, "loss": 1.8954, "step": 27239 }, { "epoch": 0.91, "grad_norm": 0.4462510645389557, "learning_rate": 0.0004747361405116392, "loss": 1.7599, "step": 27240 }, { "epoch": 0.91, "grad_norm": 0.44593504071235657, "learning_rate": 0.00047472764425838626, "loss": 1.7863, "step": 27241 }, { "epoch": 0.91, "grad_norm": 0.43674352765083313, "learning_rate": 0.00047471914779303955, "loss": 1.8691, "step": 27242 }, { "epoch": 0.91, "grad_norm": 0.43866828083992004, "learning_rate": 0.0004747106511156097, "loss": 1.8154, "step": 27243 }, { "epoch": 0.91, "grad_norm": 0.45190519094467163, "learning_rate": 0.00047470215422610684, "loss": 1.7039, "step": 27244 }, { "epoch": 0.91, "grad_norm": 0.44331520795822144, "learning_rate": 0.0004746936571245413, "loss": 1.881, "step": 27245 }, { "epoch": 0.91, "grad_norm": 0.4851352870464325, "learning_rate": 0.00047468515981092336, "loss": 1.8736, "step": 27246 }, { "epoch": 0.91, "grad_norm": 0.44851571321487427, "learning_rate": 0.00047467666228526335, "loss": 1.7636, "step": 27247 }, { "epoch": 0.91, "grad_norm": 0.44787073135375977, "learning_rate": 0.00047466816454757167, "loss": 1.8449, "step": 27248 }, { "epoch": 0.91, "grad_norm": 0.44719240069389343, "learning_rate": 0.00047465966659785856, "loss": 1.8449, "step": 27249 }, { "epoch": 0.91, "grad_norm": 0.44320744276046753, "learning_rate": 0.0004746511684361344, "loss": 1.8412, "step": 27250 }, { "epoch": 0.91, "grad_norm": 0.4507997930049896, "learning_rate": 0.00047464267006240937, "loss": 1.7439, "step": 27251 }, { "epoch": 0.91, "grad_norm": 0.4353083670139313, "learning_rate": 0.00047463417147669394, "loss": 1.8021, "step": 27252 }, { "epoch": 0.91, "grad_norm": 0.45016175508499146, "learning_rate": 0.00047462567267899834, "loss": 1.8147, "step": 27253 }, { "epoch": 0.91, "grad_norm": 0.4676283299922943, "learning_rate": 0.0004746171736693328, "loss": 1.8337, "step": 27254 }, { "epoch": 0.91, "grad_norm": 0.4409206807613373, "learning_rate": 0.00047460867444770787, "loss": 1.7565, "step": 27255 }, { "epoch": 0.91, "grad_norm": 0.4410281777381897, "learning_rate": 0.0004746001750141337, "loss": 1.8432, "step": 27256 }, { "epoch": 0.91, "grad_norm": 0.4375411868095398, "learning_rate": 0.00047459167536862063, "loss": 1.7327, "step": 27257 }, { "epoch": 0.91, "grad_norm": 0.4480586349964142, "learning_rate": 0.000474583175511179, "loss": 1.8427, "step": 27258 }, { "epoch": 0.91, "grad_norm": 0.44050857424736023, "learning_rate": 0.000474574675441819, "loss": 1.838, "step": 27259 }, { "epoch": 0.91, "grad_norm": 0.4214952290058136, "learning_rate": 0.0004745661751605512, "loss": 1.782, "step": 27260 }, { "epoch": 0.91, "grad_norm": 0.4341009557247162, "learning_rate": 0.00047455767466738583, "loss": 1.8297, "step": 27261 }, { "epoch": 0.91, "grad_norm": 0.4564341604709625, "learning_rate": 0.00047454917396233304, "loss": 1.8115, "step": 27262 }, { "epoch": 0.91, "grad_norm": 0.42480653524398804, "learning_rate": 0.00047454067304540334, "loss": 1.8136, "step": 27263 }, { "epoch": 0.91, "grad_norm": 0.4398958086967468, "learning_rate": 0.00047453217191660693, "loss": 1.8008, "step": 27264 }, { "epoch": 0.91, "grad_norm": 0.44306039810180664, "learning_rate": 0.0004745236705759543, "loss": 1.7837, "step": 27265 }, { "epoch": 0.91, "grad_norm": 0.4516107738018036, "learning_rate": 0.0004745151690234555, "loss": 1.8802, "step": 27266 }, { "epoch": 0.91, "grad_norm": 0.45652541518211365, "learning_rate": 0.00047450666725912114, "loss": 1.73, "step": 27267 }, { "epoch": 0.91, "grad_norm": 0.4450187385082245, "learning_rate": 0.00047449816528296137, "loss": 1.8265, "step": 27268 }, { "epoch": 0.91, "grad_norm": 0.437200129032135, "learning_rate": 0.0004744896630949864, "loss": 1.8246, "step": 27269 }, { "epoch": 0.91, "grad_norm": 0.44323471188545227, "learning_rate": 0.00047448116069520683, "loss": 1.6987, "step": 27270 }, { "epoch": 0.91, "grad_norm": 0.4540184736251831, "learning_rate": 0.0004744726580836328, "loss": 1.8083, "step": 27271 }, { "epoch": 0.91, "grad_norm": 0.45216429233551025, "learning_rate": 0.0004744641552602746, "loss": 1.8775, "step": 27272 }, { "epoch": 0.91, "grad_norm": 0.43664783239364624, "learning_rate": 0.00047445565222514275, "loss": 1.7973, "step": 27273 }, { "epoch": 0.91, "grad_norm": 0.4406900107860565, "learning_rate": 0.0004744471489782473, "loss": 1.8437, "step": 27274 }, { "epoch": 0.91, "grad_norm": 0.45465990900993347, "learning_rate": 0.0004744386455195988, "loss": 1.7531, "step": 27275 }, { "epoch": 0.91, "grad_norm": 0.43640273809432983, "learning_rate": 0.00047443014184920753, "loss": 1.8934, "step": 27276 }, { "epoch": 0.91, "grad_norm": 0.44386643171310425, "learning_rate": 0.0004744216379670836, "loss": 1.8796, "step": 27277 }, { "epoch": 0.91, "grad_norm": 0.4327455461025238, "learning_rate": 0.0004744131338732377, "loss": 1.773, "step": 27278 }, { "epoch": 0.91, "grad_norm": 0.45280832052230835, "learning_rate": 0.00047440462956767983, "loss": 1.8587, "step": 27279 }, { "epoch": 0.91, "grad_norm": 0.46664318442344666, "learning_rate": 0.00047439612505042046, "loss": 1.9073, "step": 27280 }, { "epoch": 0.91, "grad_norm": 0.42298200726509094, "learning_rate": 0.00047438762032146997, "loss": 1.8207, "step": 27281 }, { "epoch": 0.91, "grad_norm": 0.43812769651412964, "learning_rate": 0.00047437911538083854, "loss": 1.8195, "step": 27282 }, { "epoch": 0.91, "grad_norm": 0.44641855359077454, "learning_rate": 0.0004743706102285366, "loss": 1.8651, "step": 27283 }, { "epoch": 0.91, "grad_norm": 0.44301554560661316, "learning_rate": 0.0004743621048645743, "loss": 1.8445, "step": 27284 }, { "epoch": 0.91, "grad_norm": 0.42359036207199097, "learning_rate": 0.0004743535992889622, "loss": 1.8298, "step": 27285 }, { "epoch": 0.91, "grad_norm": 0.43843257427215576, "learning_rate": 0.00047434509350171054, "loss": 1.815, "step": 27286 }, { "epoch": 0.91, "grad_norm": 0.4351370632648468, "learning_rate": 0.00047433658750282954, "loss": 1.7931, "step": 27287 }, { "epoch": 0.91, "grad_norm": 0.4443551301956177, "learning_rate": 0.0004743280812923297, "loss": 1.7297, "step": 27288 }, { "epoch": 0.91, "grad_norm": 0.42749103903770447, "learning_rate": 0.0004743195748702212, "loss": 1.7846, "step": 27289 }, { "epoch": 0.91, "grad_norm": 0.4417877495288849, "learning_rate": 0.00047431106823651447, "loss": 1.7898, "step": 27290 }, { "epoch": 0.91, "grad_norm": 0.43794798851013184, "learning_rate": 0.0004743025613912197, "loss": 1.8991, "step": 27291 }, { "epoch": 0.91, "grad_norm": 0.4316733181476593, "learning_rate": 0.0004742940543343474, "loss": 1.7693, "step": 27292 }, { "epoch": 0.91, "grad_norm": 0.42740198969841003, "learning_rate": 0.0004742855470659078, "loss": 1.8121, "step": 27293 }, { "epoch": 0.91, "grad_norm": 0.42073431611061096, "learning_rate": 0.00047427703958591115, "loss": 1.7837, "step": 27294 }, { "epoch": 0.91, "grad_norm": 0.43466538190841675, "learning_rate": 0.00047426853189436787, "loss": 1.7912, "step": 27295 }, { "epoch": 0.91, "grad_norm": 0.48003581166267395, "learning_rate": 0.00047426002399128823, "loss": 1.7966, "step": 27296 }, { "epoch": 0.91, "grad_norm": 0.44567176699638367, "learning_rate": 0.00047425151587668265, "loss": 1.8107, "step": 27297 }, { "epoch": 0.91, "grad_norm": 0.4258919954299927, "learning_rate": 0.0004742430075505615, "loss": 1.7916, "step": 27298 }, { "epoch": 0.91, "grad_norm": 0.42627251148223877, "learning_rate": 0.0004742344990129349, "loss": 1.8591, "step": 27299 }, { "epoch": 0.91, "grad_norm": 0.4589358866214752, "learning_rate": 0.00047422599026381327, "loss": 1.8393, "step": 27300 }, { "epoch": 0.91, "grad_norm": 0.43016213178634644, "learning_rate": 0.000474217481303207, "loss": 1.8655, "step": 27301 }, { "epoch": 0.91, "grad_norm": 0.4402444362640381, "learning_rate": 0.00047420897213112635, "loss": 1.7606, "step": 27302 }, { "epoch": 0.91, "grad_norm": 0.446218878030777, "learning_rate": 0.00047420046274758166, "loss": 1.7875, "step": 27303 }, { "epoch": 0.91, "grad_norm": 0.46307533979415894, "learning_rate": 0.00047419195315258334, "loss": 1.7925, "step": 27304 }, { "epoch": 0.91, "grad_norm": 0.4319702386856079, "learning_rate": 0.00047418344334614163, "loss": 1.8214, "step": 27305 }, { "epoch": 0.91, "grad_norm": 0.4345853924751282, "learning_rate": 0.00047417493332826694, "loss": 1.8712, "step": 27306 }, { "epoch": 0.91, "grad_norm": 0.44320148229599, "learning_rate": 0.0004741664230989695, "loss": 1.7724, "step": 27307 }, { "epoch": 0.91, "grad_norm": 0.42326173186302185, "learning_rate": 0.00047415791265825963, "loss": 1.8767, "step": 27308 }, { "epoch": 0.91, "grad_norm": 0.45963093638420105, "learning_rate": 0.0004741494020061477, "loss": 1.8241, "step": 27309 }, { "epoch": 0.91, "grad_norm": 0.436592161655426, "learning_rate": 0.00047414089114264417, "loss": 1.7483, "step": 27310 }, { "epoch": 0.91, "grad_norm": 0.4302235245704651, "learning_rate": 0.00047413238006775927, "loss": 1.7908, "step": 27311 }, { "epoch": 0.91, "grad_norm": 0.4612369239330292, "learning_rate": 0.0004741238687815032, "loss": 1.787, "step": 27312 }, { "epoch": 0.91, "grad_norm": 0.4322700798511505, "learning_rate": 0.0004741153572838864, "loss": 1.7829, "step": 27313 }, { "epoch": 0.91, "grad_norm": 0.4391280710697174, "learning_rate": 0.0004741068455749193, "loss": 1.8478, "step": 27314 }, { "epoch": 0.91, "grad_norm": 0.4257166087627411, "learning_rate": 0.0004740983336546122, "loss": 1.7862, "step": 27315 }, { "epoch": 0.91, "grad_norm": 0.4551815986633301, "learning_rate": 0.00047408982152297526, "loss": 1.8837, "step": 27316 }, { "epoch": 0.91, "grad_norm": 0.44820812344551086, "learning_rate": 0.0004740813091800189, "loss": 1.7744, "step": 27317 }, { "epoch": 0.91, "grad_norm": 0.4333645701408386, "learning_rate": 0.00047407279662575356, "loss": 1.8629, "step": 27318 }, { "epoch": 0.91, "grad_norm": 0.4424683749675751, "learning_rate": 0.00047406428386018943, "loss": 1.8585, "step": 27319 }, { "epoch": 0.91, "grad_norm": 0.4456420838832855, "learning_rate": 0.00047405577088333696, "loss": 1.8166, "step": 27320 }, { "epoch": 0.91, "grad_norm": 0.4585796892642975, "learning_rate": 0.0004740472576952064, "loss": 1.8647, "step": 27321 }, { "epoch": 0.91, "grad_norm": 0.451301246881485, "learning_rate": 0.0004740387442958081, "loss": 1.7939, "step": 27322 }, { "epoch": 0.91, "grad_norm": 0.47860801219940186, "learning_rate": 0.00047403023068515247, "loss": 1.7519, "step": 27323 }, { "epoch": 0.91, "grad_norm": 0.4416878819465637, "learning_rate": 0.00047402171686324973, "loss": 1.8367, "step": 27324 }, { "epoch": 0.91, "grad_norm": 0.44659778475761414, "learning_rate": 0.00047401320283011036, "loss": 1.8381, "step": 27325 }, { "epoch": 0.91, "grad_norm": 0.4484756588935852, "learning_rate": 0.00047400468858574456, "loss": 1.8784, "step": 27326 }, { "epoch": 0.91, "grad_norm": 0.45720672607421875, "learning_rate": 0.00047399617413016257, "loss": 1.8448, "step": 27327 }, { "epoch": 0.91, "grad_norm": 0.429046630859375, "learning_rate": 0.000473987659463375, "loss": 1.8096, "step": 27328 }, { "epoch": 0.91, "grad_norm": 0.44319412112236023, "learning_rate": 0.000473979144585392, "loss": 1.7816, "step": 27329 }, { "epoch": 0.91, "grad_norm": 0.5709507465362549, "learning_rate": 0.000473970629496224, "loss": 1.9205, "step": 27330 }, { "epoch": 0.91, "grad_norm": 0.49374356865882874, "learning_rate": 0.0004739621141958813, "loss": 1.8883, "step": 27331 }, { "epoch": 0.91, "grad_norm": 0.44729530811309814, "learning_rate": 0.00047395359868437415, "loss": 1.8492, "step": 27332 }, { "epoch": 0.91, "grad_norm": 0.4673098027706146, "learning_rate": 0.000473945082961713, "loss": 1.8491, "step": 27333 }, { "epoch": 0.91, "grad_norm": 0.4269605875015259, "learning_rate": 0.0004739365670279081, "loss": 1.7333, "step": 27334 }, { "epoch": 0.91, "grad_norm": 0.4747609794139862, "learning_rate": 0.00047392805088296985, "loss": 1.8193, "step": 27335 }, { "epoch": 0.91, "grad_norm": 0.4561755955219269, "learning_rate": 0.00047391953452690863, "loss": 1.9103, "step": 27336 }, { "epoch": 0.91, "grad_norm": 0.447843074798584, "learning_rate": 0.0004739110179597347, "loss": 1.7748, "step": 27337 }, { "epoch": 0.91, "grad_norm": 0.45755189657211304, "learning_rate": 0.0004739025011814584, "loss": 1.8476, "step": 27338 }, { "epoch": 0.91, "grad_norm": 0.4796486794948578, "learning_rate": 0.00047389398419209005, "loss": 1.8532, "step": 27339 }, { "epoch": 0.91, "grad_norm": 0.4347333610057831, "learning_rate": 0.00047388546699164014, "loss": 1.7629, "step": 27340 }, { "epoch": 0.91, "grad_norm": 0.41301679611206055, "learning_rate": 0.00047387694958011883, "loss": 1.7754, "step": 27341 }, { "epoch": 0.91, "grad_norm": 0.4460969567298889, "learning_rate": 0.00047386843195753643, "loss": 1.7943, "step": 27342 }, { "epoch": 0.91, "grad_norm": 0.43819814920425415, "learning_rate": 0.0004738599141239035, "loss": 1.8433, "step": 27343 }, { "epoch": 0.91, "grad_norm": 0.4648332893848419, "learning_rate": 0.0004738513960792302, "loss": 1.7835, "step": 27344 }, { "epoch": 0.91, "grad_norm": 0.46685758233070374, "learning_rate": 0.00047384287782352696, "loss": 1.8682, "step": 27345 }, { "epoch": 0.91, "grad_norm": 0.4394686818122864, "learning_rate": 0.0004738343593568041, "loss": 1.7903, "step": 27346 }, { "epoch": 0.91, "grad_norm": 0.43214288353919983, "learning_rate": 0.00047382584067907186, "loss": 1.836, "step": 27347 }, { "epoch": 0.91, "grad_norm": 0.4373135268688202, "learning_rate": 0.00047381732179034076, "loss": 1.7604, "step": 27348 }, { "epoch": 0.91, "grad_norm": 0.42987510561943054, "learning_rate": 0.0004738088026906209, "loss": 1.7217, "step": 27349 }, { "epoch": 0.91, "grad_norm": 0.4407368302345276, "learning_rate": 0.00047380028337992285, "loss": 1.8119, "step": 27350 }, { "epoch": 0.91, "grad_norm": 0.46225887537002563, "learning_rate": 0.00047379176385825686, "loss": 1.883, "step": 27351 }, { "epoch": 0.91, "grad_norm": 0.46260344982147217, "learning_rate": 0.0004737832441256332, "loss": 1.7892, "step": 27352 }, { "epoch": 0.91, "grad_norm": 0.4600381553173065, "learning_rate": 0.0004737747241820624, "loss": 1.8819, "step": 27353 }, { "epoch": 0.91, "grad_norm": 0.446840763092041, "learning_rate": 0.00047376620402755465, "loss": 1.8145, "step": 27354 }, { "epoch": 0.91, "grad_norm": 0.4614937901496887, "learning_rate": 0.00047375768366212035, "loss": 1.7543, "step": 27355 }, { "epoch": 0.91, "grad_norm": 0.4350526034832001, "learning_rate": 0.0004737491630857698, "loss": 1.8081, "step": 27356 }, { "epoch": 0.91, "grad_norm": 0.41875818371772766, "learning_rate": 0.0004737406422985133, "loss": 1.8387, "step": 27357 }, { "epoch": 0.91, "grad_norm": 0.43804866075515747, "learning_rate": 0.00047373212130036145, "loss": 1.8363, "step": 27358 }, { "epoch": 0.91, "grad_norm": 0.43685969710350037, "learning_rate": 0.0004737236000913242, "loss": 1.856, "step": 27359 }, { "epoch": 0.91, "grad_norm": 0.4598372280597687, "learning_rate": 0.00047371507867141215, "loss": 1.8289, "step": 27360 }, { "epoch": 0.91, "grad_norm": 0.43740642070770264, "learning_rate": 0.0004737065570406357, "loss": 1.7643, "step": 27361 }, { "epoch": 0.91, "grad_norm": 0.43674182891845703, "learning_rate": 0.000473698035199005, "loss": 1.8091, "step": 27362 }, { "epoch": 0.91, "grad_norm": 0.4684450626373291, "learning_rate": 0.00047368951314653054, "loss": 1.8378, "step": 27363 }, { "epoch": 0.91, "grad_norm": 0.46300071477890015, "learning_rate": 0.0004736809908832224, "loss": 1.8321, "step": 27364 }, { "epoch": 0.91, "grad_norm": 0.45074763894081116, "learning_rate": 0.00047367246840909137, "loss": 1.8429, "step": 27365 }, { "epoch": 0.91, "grad_norm": 0.4452001452445984, "learning_rate": 0.0004736639457241474, "loss": 1.9117, "step": 27366 }, { "epoch": 0.91, "grad_norm": 0.49194562435150146, "learning_rate": 0.00047365542282840105, "loss": 1.7744, "step": 27367 }, { "epoch": 0.91, "grad_norm": 0.4498251676559448, "learning_rate": 0.0004736468997218627, "loss": 1.9025, "step": 27368 }, { "epoch": 0.91, "grad_norm": 0.4346463978290558, "learning_rate": 0.00047363837640454243, "loss": 1.8515, "step": 27369 }, { "epoch": 0.91, "grad_norm": 0.44873231649398804, "learning_rate": 0.0004736298528764509, "loss": 1.8118, "step": 27370 }, { "epoch": 0.91, "grad_norm": 0.4553053379058838, "learning_rate": 0.0004736213291375981, "loss": 1.7948, "step": 27371 }, { "epoch": 0.91, "grad_norm": 0.45476001501083374, "learning_rate": 0.0004736128051879948, "loss": 1.8236, "step": 27372 }, { "epoch": 0.91, "grad_norm": 0.44060376286506653, "learning_rate": 0.00047360428102765105, "loss": 1.8484, "step": 27373 }, { "epoch": 0.91, "grad_norm": 0.46245595812797546, "learning_rate": 0.00047359575665657723, "loss": 1.8382, "step": 27374 }, { "epoch": 0.91, "grad_norm": 0.48035579919815063, "learning_rate": 0.00047358723207478385, "loss": 1.8122, "step": 27375 }, { "epoch": 0.91, "grad_norm": 0.4524131119251251, "learning_rate": 0.0004735787072822811, "loss": 1.7721, "step": 27376 }, { "epoch": 0.91, "grad_norm": 0.44882017374038696, "learning_rate": 0.0004735701822790794, "loss": 1.8114, "step": 27377 }, { "epoch": 0.91, "grad_norm": 0.49793797731399536, "learning_rate": 0.00047356165706518904, "loss": 1.8603, "step": 27378 }, { "epoch": 0.91, "grad_norm": 0.460479736328125, "learning_rate": 0.00047355313164062037, "loss": 1.7807, "step": 27379 }, { "epoch": 0.91, "grad_norm": 0.46405091881752014, "learning_rate": 0.0004735446060053838, "loss": 1.827, "step": 27380 }, { "epoch": 0.91, "grad_norm": 0.48798149824142456, "learning_rate": 0.00047353608015948966, "loss": 1.7831, "step": 27381 }, { "epoch": 0.91, "grad_norm": 0.4365366995334625, "learning_rate": 0.00047352755410294827, "loss": 1.8282, "step": 27382 }, { "epoch": 0.91, "grad_norm": 0.4328298568725586, "learning_rate": 0.00047351902783577003, "loss": 1.8095, "step": 27383 }, { "epoch": 0.91, "grad_norm": 0.4434807002544403, "learning_rate": 0.00047351050135796525, "loss": 1.7643, "step": 27384 }, { "epoch": 0.91, "grad_norm": 0.4563648998737335, "learning_rate": 0.0004735019746695443, "loss": 1.8513, "step": 27385 }, { "epoch": 0.91, "grad_norm": 0.4492552578449249, "learning_rate": 0.00047349344777051746, "loss": 1.7278, "step": 27386 }, { "epoch": 0.91, "grad_norm": 0.45203354954719543, "learning_rate": 0.00047348492066089516, "loss": 1.7949, "step": 27387 }, { "epoch": 0.91, "grad_norm": 0.4516521692276001, "learning_rate": 0.0004734763933406878, "loss": 1.9059, "step": 27388 }, { "epoch": 0.91, "grad_norm": 0.44494888186454773, "learning_rate": 0.0004734678658099055, "loss": 1.7639, "step": 27389 }, { "epoch": 0.91, "grad_norm": 0.44006016850471497, "learning_rate": 0.0004734593380685589, "loss": 1.7871, "step": 27390 }, { "epoch": 0.91, "grad_norm": 0.4388403594493866, "learning_rate": 0.00047345081011665815, "loss": 1.851, "step": 27391 }, { "epoch": 0.91, "grad_norm": 0.4640754461288452, "learning_rate": 0.00047344228195421375, "loss": 1.8047, "step": 27392 }, { "epoch": 0.91, "grad_norm": 0.45977386832237244, "learning_rate": 0.0004734337535812359, "loss": 1.8457, "step": 27393 }, { "epoch": 0.91, "grad_norm": 0.4540032148361206, "learning_rate": 0.00047342522499773504, "loss": 1.7948, "step": 27394 }, { "epoch": 0.91, "grad_norm": 0.4364240765571594, "learning_rate": 0.0004734166962037216, "loss": 1.8194, "step": 27395 }, { "epoch": 0.91, "grad_norm": 0.4552086889743805, "learning_rate": 0.0004734081671992058, "loss": 1.8258, "step": 27396 }, { "epoch": 0.91, "grad_norm": 0.4398881494998932, "learning_rate": 0.00047339963798419797, "loss": 1.8735, "step": 27397 }, { "epoch": 0.91, "grad_norm": 0.43018490076065063, "learning_rate": 0.0004733911085587086, "loss": 1.7942, "step": 27398 }, { "epoch": 0.91, "grad_norm": 0.4457494914531708, "learning_rate": 0.00047338257892274796, "loss": 1.8519, "step": 27399 }, { "epoch": 0.91, "grad_norm": 0.44739216566085815, "learning_rate": 0.0004733740490763263, "loss": 1.8266, "step": 27400 }, { "epoch": 0.91, "grad_norm": 0.46177777647972107, "learning_rate": 0.0004733655190194542, "loss": 1.8361, "step": 27401 }, { "epoch": 0.91, "grad_norm": 0.4547877013683319, "learning_rate": 0.00047335698875214193, "loss": 1.7814, "step": 27402 }, { "epoch": 0.91, "grad_norm": 0.4479218125343323, "learning_rate": 0.0004733484582743999, "loss": 1.8632, "step": 27403 }, { "epoch": 0.91, "grad_norm": 0.4723103940486908, "learning_rate": 0.00047333992758623816, "loss": 1.7569, "step": 27404 }, { "epoch": 0.91, "grad_norm": 0.4418630301952362, "learning_rate": 0.00047333139668766737, "loss": 1.8896, "step": 27405 }, { "epoch": 0.91, "grad_norm": 0.4476034939289093, "learning_rate": 0.00047332286557869783, "loss": 1.7603, "step": 27406 }, { "epoch": 0.91, "grad_norm": 0.45042791962623596, "learning_rate": 0.0004733143342593399, "loss": 1.799, "step": 27407 }, { "epoch": 0.91, "grad_norm": 0.4356898367404938, "learning_rate": 0.00047330580272960385, "loss": 1.7877, "step": 27408 }, { "epoch": 0.91, "grad_norm": 0.4417265057563782, "learning_rate": 0.00047329727098950006, "loss": 1.8866, "step": 27409 }, { "epoch": 0.91, "grad_norm": 0.4367101788520813, "learning_rate": 0.000473288739039039, "loss": 1.8585, "step": 27410 }, { "epoch": 0.91, "grad_norm": 0.44132018089294434, "learning_rate": 0.0004732802068782309, "loss": 1.8201, "step": 27411 }, { "epoch": 0.91, "grad_norm": 0.44483861327171326, "learning_rate": 0.0004732716745070862, "loss": 1.8454, "step": 27412 }, { "epoch": 0.91, "grad_norm": 0.6013368368148804, "learning_rate": 0.0004732631419256152, "loss": 1.8762, "step": 27413 }, { "epoch": 0.91, "grad_norm": 0.45374172925949097, "learning_rate": 0.0004732546091338282, "loss": 1.7805, "step": 27414 }, { "epoch": 0.91, "grad_norm": 0.44462457299232483, "learning_rate": 0.00047324607613173576, "loss": 1.8553, "step": 27415 }, { "epoch": 0.91, "grad_norm": 0.44596248865127563, "learning_rate": 0.00047323754291934804, "loss": 1.8079, "step": 27416 }, { "epoch": 0.91, "grad_norm": 0.43880945444107056, "learning_rate": 0.0004732290094966755, "loss": 1.7726, "step": 27417 }, { "epoch": 0.91, "grad_norm": 0.44205307960510254, "learning_rate": 0.00047322047586372847, "loss": 1.8513, "step": 27418 }, { "epoch": 0.91, "grad_norm": 0.4349220395088196, "learning_rate": 0.0004732119420205172, "loss": 1.8563, "step": 27419 }, { "epoch": 0.91, "grad_norm": 0.445824533700943, "learning_rate": 0.0004732034079670523, "loss": 1.8022, "step": 27420 }, { "epoch": 0.91, "grad_norm": 0.45180976390838623, "learning_rate": 0.00047319487370334385, "loss": 1.878, "step": 27421 }, { "epoch": 0.91, "grad_norm": 0.4315565228462219, "learning_rate": 0.0004731863392294024, "loss": 1.7701, "step": 27422 }, { "epoch": 0.91, "grad_norm": 0.4310998320579529, "learning_rate": 0.00047317780454523835, "loss": 1.7635, "step": 27423 }, { "epoch": 0.91, "grad_norm": 0.4472504258155823, "learning_rate": 0.0004731692696508618, "loss": 1.8241, "step": 27424 }, { "epoch": 0.91, "grad_norm": 0.4400936961174011, "learning_rate": 0.0004731607345462833, "loss": 1.8245, "step": 27425 }, { "epoch": 0.91, "grad_norm": 0.4535253942012787, "learning_rate": 0.0004731521992315133, "loss": 1.8198, "step": 27426 }, { "epoch": 0.91, "grad_norm": 0.4221574366092682, "learning_rate": 0.0004731436637065619, "loss": 1.7135, "step": 27427 }, { "epoch": 0.91, "grad_norm": 0.43247681856155396, "learning_rate": 0.0004731351279714398, "loss": 1.7201, "step": 27428 }, { "epoch": 0.91, "grad_norm": 0.4288645088672638, "learning_rate": 0.0004731265920261569, "loss": 1.8203, "step": 27429 }, { "epoch": 0.91, "grad_norm": 0.442739337682724, "learning_rate": 0.00047311805587072403, "loss": 1.8155, "step": 27430 }, { "epoch": 0.91, "grad_norm": 0.449056476354599, "learning_rate": 0.0004731095195051513, "loss": 1.8282, "step": 27431 }, { "epoch": 0.91, "grad_norm": 0.43490439653396606, "learning_rate": 0.0004731009829294491, "loss": 1.8206, "step": 27432 }, { "epoch": 0.91, "grad_norm": 0.4380156397819519, "learning_rate": 0.0004730924461436278, "loss": 1.7531, "step": 27433 }, { "epoch": 0.91, "grad_norm": 0.42703527212142944, "learning_rate": 0.0004730839091476978, "loss": 1.7276, "step": 27434 }, { "epoch": 0.91, "grad_norm": 0.436109334230423, "learning_rate": 0.0004730753719416694, "loss": 1.7828, "step": 27435 }, { "epoch": 0.91, "grad_norm": 0.4278712570667267, "learning_rate": 0.00047306683452555306, "loss": 1.7833, "step": 27436 }, { "epoch": 0.91, "grad_norm": 0.4375562071800232, "learning_rate": 0.00047305829689935904, "loss": 1.8342, "step": 27437 }, { "epoch": 0.91, "grad_norm": 0.4318154752254486, "learning_rate": 0.0004730497590630977, "loss": 1.7889, "step": 27438 }, { "epoch": 0.91, "grad_norm": 0.4420096278190613, "learning_rate": 0.00047304122101677953, "loss": 1.7969, "step": 27439 }, { "epoch": 0.91, "grad_norm": 0.4451524317264557, "learning_rate": 0.0004730326827604148, "loss": 1.7754, "step": 27440 }, { "epoch": 0.91, "grad_norm": 0.4457367956638336, "learning_rate": 0.00047302414429401386, "loss": 1.8565, "step": 27441 }, { "epoch": 0.91, "grad_norm": 0.43317076563835144, "learning_rate": 0.00047301560561758714, "loss": 1.8244, "step": 27442 }, { "epoch": 0.91, "grad_norm": 0.4395432472229004, "learning_rate": 0.0004730070667311449, "loss": 1.7628, "step": 27443 }, { "epoch": 0.91, "grad_norm": 0.45175114274024963, "learning_rate": 0.0004729985276346976, "loss": 1.8109, "step": 27444 }, { "epoch": 0.91, "grad_norm": 0.4352603852748871, "learning_rate": 0.00047298998832825566, "loss": 1.8044, "step": 27445 }, { "epoch": 0.91, "grad_norm": 0.41679099202156067, "learning_rate": 0.00047298144881182926, "loss": 1.7668, "step": 27446 }, { "epoch": 0.91, "grad_norm": 0.44791021943092346, "learning_rate": 0.00047297290908542895, "loss": 1.8456, "step": 27447 }, { "epoch": 0.91, "grad_norm": 0.4325731098651886, "learning_rate": 0.00047296436914906503, "loss": 1.8344, "step": 27448 }, { "epoch": 0.91, "grad_norm": 0.4552772641181946, "learning_rate": 0.0004729558290027477, "loss": 1.827, "step": 27449 }, { "epoch": 0.91, "grad_norm": 0.42393091320991516, "learning_rate": 0.00047294728864648766, "loss": 1.8036, "step": 27450 }, { "epoch": 0.91, "grad_norm": 0.4418502151966095, "learning_rate": 0.0004729387480802949, "loss": 1.8673, "step": 27451 }, { "epoch": 0.91, "grad_norm": 0.4390285909175873, "learning_rate": 0.0004729302073041801, "loss": 1.8189, "step": 27452 }, { "epoch": 0.91, "grad_norm": 0.4343942105770111, "learning_rate": 0.00047292166631815355, "loss": 1.7842, "step": 27453 }, { "epoch": 0.91, "grad_norm": 0.4343574345111847, "learning_rate": 0.0004729131251222255, "loss": 1.7825, "step": 27454 }, { "epoch": 0.91, "grad_norm": 0.4421992003917694, "learning_rate": 0.0004729045837164064, "loss": 1.8108, "step": 27455 }, { "epoch": 0.91, "grad_norm": 0.44096019864082336, "learning_rate": 0.0004728960421007066, "loss": 1.7854, "step": 27456 }, { "epoch": 0.91, "grad_norm": 0.4355487525463104, "learning_rate": 0.00047288750027513654, "loss": 1.8358, "step": 27457 }, { "epoch": 0.91, "grad_norm": 0.4402320683002472, "learning_rate": 0.0004728789582397065, "loss": 1.8502, "step": 27458 }, { "epoch": 0.91, "grad_norm": 0.43421006202697754, "learning_rate": 0.0004728704159944268, "loss": 1.8738, "step": 27459 }, { "epoch": 0.91, "grad_norm": 0.437490850687027, "learning_rate": 0.0004728618735393081, "loss": 1.8019, "step": 27460 }, { "epoch": 0.91, "grad_norm": 0.46383652091026306, "learning_rate": 0.0004728533308743604, "loss": 1.7724, "step": 27461 }, { "epoch": 0.91, "grad_norm": 0.4367334246635437, "learning_rate": 0.00047284478799959417, "loss": 1.7643, "step": 27462 }, { "epoch": 0.91, "grad_norm": 0.45936885476112366, "learning_rate": 0.00047283624491501993, "loss": 1.8255, "step": 27463 }, { "epoch": 0.91, "grad_norm": 0.45015087723731995, "learning_rate": 0.00047282770162064786, "loss": 1.8456, "step": 27464 }, { "epoch": 0.91, "grad_norm": 0.4453216791152954, "learning_rate": 0.0004728191581164885, "loss": 1.8026, "step": 27465 }, { "epoch": 0.91, "grad_norm": 0.4313845634460449, "learning_rate": 0.0004728106144025522, "loss": 1.8001, "step": 27466 }, { "epoch": 0.91, "grad_norm": 0.430849552154541, "learning_rate": 0.00047280207047884924, "loss": 1.8348, "step": 27467 }, { "epoch": 0.91, "grad_norm": 0.45374470949172974, "learning_rate": 0.00047279352634539, "loss": 1.7509, "step": 27468 }, { "epoch": 0.91, "grad_norm": 0.4701623022556305, "learning_rate": 0.00047278498200218483, "loss": 1.6971, "step": 27469 }, { "epoch": 0.91, "grad_norm": 0.4571149945259094, "learning_rate": 0.00047277643744924426, "loss": 1.8135, "step": 27470 }, { "epoch": 0.91, "grad_norm": 0.43564724922180176, "learning_rate": 0.00047276789268657843, "loss": 1.8183, "step": 27471 }, { "epoch": 0.91, "grad_norm": 0.45962437987327576, "learning_rate": 0.00047275934771419793, "loss": 1.8141, "step": 27472 }, { "epoch": 0.91, "grad_norm": 0.4601849615573883, "learning_rate": 0.000472750802532113, "loss": 1.8179, "step": 27473 }, { "epoch": 0.91, "grad_norm": 0.4491167366504669, "learning_rate": 0.0004727422571403341, "loss": 1.8362, "step": 27474 }, { "epoch": 0.91, "grad_norm": 0.4637705087661743, "learning_rate": 0.00047273371153887147, "loss": 1.7689, "step": 27475 }, { "epoch": 0.91, "grad_norm": 0.44366922974586487, "learning_rate": 0.00047272516572773553, "loss": 1.7435, "step": 27476 }, { "epoch": 0.91, "grad_norm": 0.4339607357978821, "learning_rate": 0.0004727166197069368, "loss": 1.7585, "step": 27477 }, { "epoch": 0.91, "grad_norm": 0.43578827381134033, "learning_rate": 0.00047270807347648554, "loss": 1.845, "step": 27478 }, { "epoch": 0.91, "grad_norm": 0.452965646982193, "learning_rate": 0.0004726995270363921, "loss": 1.7497, "step": 27479 }, { "epoch": 0.91, "grad_norm": 0.4438954293727875, "learning_rate": 0.0004726909803866668, "loss": 1.8412, "step": 27480 }, { "epoch": 0.91, "grad_norm": 0.45387354493141174, "learning_rate": 0.0004726824335273202, "loss": 1.8269, "step": 27481 }, { "epoch": 0.91, "grad_norm": 0.4433766007423401, "learning_rate": 0.0004726738864583625, "loss": 1.8592, "step": 27482 }, { "epoch": 0.91, "grad_norm": 0.43083450198173523, "learning_rate": 0.0004726653391798043, "loss": 1.8074, "step": 27483 }, { "epoch": 0.91, "grad_norm": 0.4451567530632019, "learning_rate": 0.00047265679169165566, "loss": 1.8258, "step": 27484 }, { "epoch": 0.91, "grad_norm": 0.4498934745788574, "learning_rate": 0.0004726482439939271, "loss": 1.7842, "step": 27485 }, { "epoch": 0.91, "grad_norm": 0.45809853076934814, "learning_rate": 0.0004726396960866291, "loss": 1.8015, "step": 27486 }, { "epoch": 0.91, "grad_norm": 0.43970388174057007, "learning_rate": 0.0004726311479697719, "loss": 1.8049, "step": 27487 }, { "epoch": 0.91, "grad_norm": 0.44463491439819336, "learning_rate": 0.00047262259964336596, "loss": 1.7727, "step": 27488 }, { "epoch": 0.91, "grad_norm": 0.43954238295555115, "learning_rate": 0.00047261405110742157, "loss": 1.7831, "step": 27489 }, { "epoch": 0.91, "grad_norm": 0.4462328553199768, "learning_rate": 0.00047260550236194915, "loss": 1.8237, "step": 27490 }, { "epoch": 0.91, "grad_norm": 0.473134845495224, "learning_rate": 0.0004725969534069591, "loss": 1.7895, "step": 27491 }, { "epoch": 0.91, "grad_norm": 0.442656546831131, "learning_rate": 0.00047258840424246185, "loss": 1.784, "step": 27492 }, { "epoch": 0.91, "grad_norm": 0.4550504684448242, "learning_rate": 0.0004725798548684676, "loss": 1.7935, "step": 27493 }, { "epoch": 0.91, "grad_norm": 0.4766344428062439, "learning_rate": 0.0004725713052849869, "loss": 1.7714, "step": 27494 }, { "epoch": 0.91, "grad_norm": 0.46408817172050476, "learning_rate": 0.00047256275549203004, "loss": 1.8146, "step": 27495 }, { "epoch": 0.91, "grad_norm": 0.43048596382141113, "learning_rate": 0.00047255420548960746, "loss": 1.7945, "step": 27496 }, { "epoch": 0.91, "grad_norm": 0.4442826807498932, "learning_rate": 0.0004725456552777294, "loss": 1.85, "step": 27497 }, { "epoch": 0.91, "grad_norm": 0.4642082750797272, "learning_rate": 0.00047253710485640636, "loss": 1.8251, "step": 27498 }, { "epoch": 0.91, "grad_norm": 0.45052969455718994, "learning_rate": 0.00047252855422564877, "loss": 1.8187, "step": 27499 }, { "epoch": 0.91, "grad_norm": 0.44674378633499146, "learning_rate": 0.0004725200033854669, "loss": 1.8582, "step": 27500 }, { "epoch": 0.91, "grad_norm": 0.44832366704940796, "learning_rate": 0.0004725114523358712, "loss": 1.7962, "step": 27501 }, { "epoch": 0.91, "grad_norm": 0.4350398778915405, "learning_rate": 0.000472502901076872, "loss": 1.8188, "step": 27502 }, { "epoch": 0.92, "grad_norm": 0.45666876435279846, "learning_rate": 0.0004724943496084796, "loss": 1.7672, "step": 27503 }, { "epoch": 0.92, "grad_norm": 0.46784496307373047, "learning_rate": 0.0004724857979307046, "loss": 1.792, "step": 27504 }, { "epoch": 0.92, "grad_norm": 0.4495500326156616, "learning_rate": 0.0004724772460435573, "loss": 1.8228, "step": 27505 }, { "epoch": 0.92, "grad_norm": 0.4457826018333435, "learning_rate": 0.0004724686939470479, "loss": 1.862, "step": 27506 }, { "epoch": 0.92, "grad_norm": 0.46283501386642456, "learning_rate": 0.0004724601416411869, "loss": 1.7295, "step": 27507 }, { "epoch": 0.92, "grad_norm": 0.4765051305294037, "learning_rate": 0.00047245158912598474, "loss": 1.8262, "step": 27508 }, { "epoch": 0.92, "grad_norm": 0.45040541887283325, "learning_rate": 0.0004724430364014519, "loss": 1.8871, "step": 27509 }, { "epoch": 0.92, "grad_norm": 0.4619083106517792, "learning_rate": 0.00047243448346759846, "loss": 1.8148, "step": 27510 }, { "epoch": 0.92, "grad_norm": 0.45637744665145874, "learning_rate": 0.000472425930324435, "loss": 1.8688, "step": 27511 }, { "epoch": 0.92, "grad_norm": 0.48370978236198425, "learning_rate": 0.0004724173769719719, "loss": 1.7645, "step": 27512 }, { "epoch": 0.92, "grad_norm": 0.44928625226020813, "learning_rate": 0.0004724088234102194, "loss": 1.7751, "step": 27513 }, { "epoch": 0.92, "grad_norm": 0.4715096354484558, "learning_rate": 0.00047240026963918814, "loss": 1.7969, "step": 27514 }, { "epoch": 0.92, "grad_norm": 0.4517342150211334, "learning_rate": 0.00047239171565888823, "loss": 1.849, "step": 27515 }, { "epoch": 0.92, "grad_norm": 0.4808870553970337, "learning_rate": 0.0004723831614693303, "loss": 1.8365, "step": 27516 }, { "epoch": 0.92, "grad_norm": 0.43479448556900024, "learning_rate": 0.00047237460707052456, "loss": 1.7482, "step": 27517 }, { "epoch": 0.92, "grad_norm": 0.4312341809272766, "learning_rate": 0.00047236605246248135, "loss": 1.6827, "step": 27518 }, { "epoch": 0.92, "grad_norm": 0.4544160068035126, "learning_rate": 0.0004723574976452113, "loss": 1.7349, "step": 27519 }, { "epoch": 0.92, "grad_norm": 0.4789049029350281, "learning_rate": 0.00047234894261872465, "loss": 1.8014, "step": 27520 }, { "epoch": 0.92, "grad_norm": 0.44221043586730957, "learning_rate": 0.0004723403873830316, "loss": 1.7827, "step": 27521 }, { "epoch": 0.92, "grad_norm": 0.4817808270454407, "learning_rate": 0.0004723318319381429, "loss": 1.7646, "step": 27522 }, { "epoch": 0.92, "grad_norm": 0.4486343264579773, "learning_rate": 0.0004723232762840686, "loss": 1.8972, "step": 27523 }, { "epoch": 0.92, "grad_norm": 0.47104114294052124, "learning_rate": 0.00047231472042081935, "loss": 1.7934, "step": 27524 }, { "epoch": 0.92, "grad_norm": 0.4415656626224518, "learning_rate": 0.00047230616434840536, "loss": 1.738, "step": 27525 }, { "epoch": 0.92, "grad_norm": 0.4587612748146057, "learning_rate": 0.00047229760806683706, "loss": 1.8504, "step": 27526 }, { "epoch": 0.92, "grad_norm": 0.45347151160240173, "learning_rate": 0.0004722890515761249, "loss": 1.7834, "step": 27527 }, { "epoch": 0.92, "grad_norm": 0.4946524202823639, "learning_rate": 0.00047228049487627917, "loss": 1.7855, "step": 27528 }, { "epoch": 0.92, "grad_norm": 0.44832074642181396, "learning_rate": 0.00047227193796731034, "loss": 1.733, "step": 27529 }, { "epoch": 0.92, "grad_norm": 0.4369390904903412, "learning_rate": 0.00047226338084922873, "loss": 1.8443, "step": 27530 }, { "epoch": 0.92, "grad_norm": 0.4526861011981964, "learning_rate": 0.00047225482352204474, "loss": 1.8764, "step": 27531 }, { "epoch": 0.92, "grad_norm": 0.4535277783870697, "learning_rate": 0.00047224626598576884, "loss": 1.8405, "step": 27532 }, { "epoch": 0.92, "grad_norm": 0.44996657967567444, "learning_rate": 0.0004722377082404113, "loss": 1.7987, "step": 27533 }, { "epoch": 0.92, "grad_norm": 0.44265303015708923, "learning_rate": 0.0004722291502859826, "loss": 1.8076, "step": 27534 }, { "epoch": 0.92, "grad_norm": 0.46844956278800964, "learning_rate": 0.0004722205921224931, "loss": 1.8116, "step": 27535 }, { "epoch": 0.92, "grad_norm": 0.4659402370452881, "learning_rate": 0.0004722120337499531, "loss": 1.8347, "step": 27536 }, { "epoch": 0.92, "grad_norm": 0.4589744508266449, "learning_rate": 0.0004722034751683731, "loss": 1.8823, "step": 27537 }, { "epoch": 0.92, "grad_norm": 0.5253421664237976, "learning_rate": 0.0004721949163777635, "loss": 1.8531, "step": 27538 }, { "epoch": 0.92, "grad_norm": 0.46503007411956787, "learning_rate": 0.00047218635737813466, "loss": 1.7841, "step": 27539 }, { "epoch": 0.92, "grad_norm": 0.46857261657714844, "learning_rate": 0.00047217779816949686, "loss": 1.8193, "step": 27540 }, { "epoch": 0.92, "grad_norm": 0.4411887228488922, "learning_rate": 0.00047216923875186066, "loss": 1.9177, "step": 27541 }, { "epoch": 0.92, "grad_norm": 0.4393579959869385, "learning_rate": 0.0004721606791252363, "loss": 1.8776, "step": 27542 }, { "epoch": 0.92, "grad_norm": 0.4610505998134613, "learning_rate": 0.00047215211928963426, "loss": 1.8686, "step": 27543 }, { "epoch": 0.92, "grad_norm": 0.4644280970096588, "learning_rate": 0.0004721435592450649, "loss": 1.8359, "step": 27544 }, { "epoch": 0.92, "grad_norm": 0.4421806037425995, "learning_rate": 0.0004721349989915387, "loss": 1.7784, "step": 27545 }, { "epoch": 0.92, "grad_norm": 0.4472068250179291, "learning_rate": 0.0004721264385290659, "loss": 1.8202, "step": 27546 }, { "epoch": 0.92, "grad_norm": 0.45006898045539856, "learning_rate": 0.000472117877857657, "loss": 1.8352, "step": 27547 }, { "epoch": 0.92, "grad_norm": 0.44334137439727783, "learning_rate": 0.0004721093169773224, "loss": 1.8136, "step": 27548 }, { "epoch": 0.92, "grad_norm": 0.46016809344291687, "learning_rate": 0.00047210075588807233, "loss": 1.7707, "step": 27549 }, { "epoch": 0.92, "grad_norm": 0.4525930881500244, "learning_rate": 0.0004720921945899174, "loss": 1.7412, "step": 27550 }, { "epoch": 0.92, "grad_norm": 0.4436749219894409, "learning_rate": 0.0004720836330828678, "loss": 1.8071, "step": 27551 }, { "epoch": 0.92, "grad_norm": 0.45019975304603577, "learning_rate": 0.00047207507136693414, "loss": 1.8487, "step": 27552 }, { "epoch": 0.92, "grad_norm": 0.45492884516716003, "learning_rate": 0.00047206650944212666, "loss": 1.7988, "step": 27553 }, { "epoch": 0.92, "grad_norm": 0.45860108733177185, "learning_rate": 0.0004720579473084558, "loss": 1.7592, "step": 27554 }, { "epoch": 0.92, "grad_norm": 0.4404025077819824, "learning_rate": 0.0004720493849659319, "loss": 1.7534, "step": 27555 }, { "epoch": 0.92, "grad_norm": 0.4542981684207916, "learning_rate": 0.00047204082241456545, "loss": 1.8216, "step": 27556 }, { "epoch": 0.92, "grad_norm": 0.4357961118221283, "learning_rate": 0.0004720322596543668, "loss": 1.8197, "step": 27557 }, { "epoch": 0.92, "grad_norm": 0.45201343297958374, "learning_rate": 0.00047202369668534626, "loss": 1.8356, "step": 27558 }, { "epoch": 0.92, "grad_norm": 0.499592125415802, "learning_rate": 0.00047201513350751434, "loss": 1.864, "step": 27559 }, { "epoch": 0.92, "grad_norm": 0.43924447894096375, "learning_rate": 0.0004720065701208814, "loss": 1.7162, "step": 27560 }, { "epoch": 0.92, "grad_norm": 0.448451966047287, "learning_rate": 0.0004719980065254578, "loss": 1.8585, "step": 27561 }, { "epoch": 0.92, "grad_norm": 0.42818474769592285, "learning_rate": 0.000471989442721254, "loss": 1.7209, "step": 27562 }, { "epoch": 0.92, "grad_norm": 0.7419688701629639, "learning_rate": 0.00047198087870828034, "loss": 1.8032, "step": 27563 }, { "epoch": 0.92, "grad_norm": 0.4451271593570709, "learning_rate": 0.00047197231448654724, "loss": 1.8275, "step": 27564 }, { "epoch": 0.92, "grad_norm": 0.42828959226608276, "learning_rate": 0.0004719637500560651, "loss": 1.8141, "step": 27565 }, { "epoch": 0.92, "grad_norm": 0.43551599979400635, "learning_rate": 0.00047195518541684424, "loss": 1.7609, "step": 27566 }, { "epoch": 0.92, "grad_norm": 0.4471823275089264, "learning_rate": 0.0004719466205688952, "loss": 1.854, "step": 27567 }, { "epoch": 0.92, "grad_norm": 0.45124784111976624, "learning_rate": 0.0004719380555122282, "loss": 1.736, "step": 27568 }, { "epoch": 0.92, "grad_norm": 0.45395272970199585, "learning_rate": 0.0004719294902468538, "loss": 1.8255, "step": 27569 }, { "epoch": 0.92, "grad_norm": 0.44350746273994446, "learning_rate": 0.0004719209247727824, "loss": 1.8607, "step": 27570 }, { "epoch": 0.92, "grad_norm": 0.433864951133728, "learning_rate": 0.00047191235909002427, "loss": 1.8771, "step": 27571 }, { "epoch": 0.92, "grad_norm": 0.45255038142204285, "learning_rate": 0.00047190379319858984, "loss": 1.7909, "step": 27572 }, { "epoch": 0.92, "grad_norm": 0.43795400857925415, "learning_rate": 0.00047189522709848954, "loss": 1.9068, "step": 27573 }, { "epoch": 0.92, "grad_norm": 0.4517349898815155, "learning_rate": 0.0004718866607897337, "loss": 1.7952, "step": 27574 }, { "epoch": 0.92, "grad_norm": 0.4443908631801605, "learning_rate": 0.00047187809427233295, "loss": 1.8569, "step": 27575 }, { "epoch": 0.92, "grad_norm": 0.45237982273101807, "learning_rate": 0.0004718695275462973, "loss": 1.814, "step": 27576 }, { "epoch": 0.92, "grad_norm": 0.4620562195777893, "learning_rate": 0.0004718609606116376, "loss": 1.829, "step": 27577 }, { "epoch": 0.92, "grad_norm": 0.4332107603549957, "learning_rate": 0.00047185239346836383, "loss": 1.7851, "step": 27578 }, { "epoch": 0.92, "grad_norm": 0.4421320855617523, "learning_rate": 0.00047184382611648664, "loss": 1.7733, "step": 27579 }, { "epoch": 0.92, "grad_norm": 1.2693583965301514, "learning_rate": 0.0004718352585560164, "loss": 1.852, "step": 27580 }, { "epoch": 0.92, "grad_norm": 0.460300087928772, "learning_rate": 0.0004718266907869635, "loss": 1.8039, "step": 27581 }, { "epoch": 0.92, "grad_norm": 0.43757113814353943, "learning_rate": 0.00047181812280933826, "loss": 1.8422, "step": 27582 }, { "epoch": 0.92, "grad_norm": 0.43031203746795654, "learning_rate": 0.00047180955462315107, "loss": 1.778, "step": 27583 }, { "epoch": 0.92, "grad_norm": 0.4470132887363434, "learning_rate": 0.0004718009862284125, "loss": 1.8531, "step": 27584 }, { "epoch": 0.92, "grad_norm": 0.42587754130363464, "learning_rate": 0.00047179241762513284, "loss": 1.8298, "step": 27585 }, { "epoch": 0.92, "grad_norm": 0.4607589542865753, "learning_rate": 0.0004717838488133224, "loss": 1.8303, "step": 27586 }, { "epoch": 0.92, "grad_norm": 0.4443047344684601, "learning_rate": 0.00047177527979299174, "loss": 1.7663, "step": 27587 }, { "epoch": 0.92, "grad_norm": 0.436393678188324, "learning_rate": 0.00047176671056415123, "loss": 1.8196, "step": 27588 }, { "epoch": 0.92, "grad_norm": 0.4536803960800171, "learning_rate": 0.00047175814112681127, "loss": 1.8007, "step": 27589 }, { "epoch": 0.92, "grad_norm": 0.4466821551322937, "learning_rate": 0.00047174957148098215, "loss": 1.7421, "step": 27590 }, { "epoch": 0.92, "grad_norm": 0.47059279680252075, "learning_rate": 0.00047174100162667435, "loss": 1.864, "step": 27591 }, { "epoch": 0.92, "grad_norm": 0.45139020681381226, "learning_rate": 0.0004717324315638984, "loss": 1.8273, "step": 27592 }, { "epoch": 0.92, "grad_norm": 0.43987447023391724, "learning_rate": 0.00047172386129266453, "loss": 1.8102, "step": 27593 }, { "epoch": 0.92, "grad_norm": 0.4522555470466614, "learning_rate": 0.0004717152908129831, "loss": 1.7717, "step": 27594 }, { "epoch": 0.92, "grad_norm": 0.4618624448776245, "learning_rate": 0.0004717067201248647, "loss": 1.7904, "step": 27595 }, { "epoch": 0.92, "grad_norm": 0.44603919982910156, "learning_rate": 0.00047169814922831964, "loss": 1.862, "step": 27596 }, { "epoch": 0.92, "grad_norm": 0.4557816684246063, "learning_rate": 0.0004716895781233583, "loss": 1.8335, "step": 27597 }, { "epoch": 0.92, "grad_norm": 0.44193580746650696, "learning_rate": 0.0004716810068099911, "loss": 1.7733, "step": 27598 }, { "epoch": 0.92, "grad_norm": 0.43182268738746643, "learning_rate": 0.00047167243528822844, "loss": 1.7654, "step": 27599 }, { "epoch": 0.92, "grad_norm": 0.4514349400997162, "learning_rate": 0.00047166386355808077, "loss": 1.8691, "step": 27600 }, { "epoch": 0.92, "grad_norm": 0.44616031646728516, "learning_rate": 0.00047165529161955844, "loss": 1.8406, "step": 27601 }, { "epoch": 0.92, "grad_norm": 0.45518800616264343, "learning_rate": 0.0004716467194726719, "loss": 1.8215, "step": 27602 }, { "epoch": 0.92, "grad_norm": 0.45369529724121094, "learning_rate": 0.00047163814711743155, "loss": 1.8376, "step": 27603 }, { "epoch": 0.92, "grad_norm": 0.4852621555328369, "learning_rate": 0.00047162957455384774, "loss": 1.8218, "step": 27604 }, { "epoch": 0.92, "grad_norm": 0.47930261492729187, "learning_rate": 0.00047162100178193097, "loss": 1.8235, "step": 27605 }, { "epoch": 0.92, "grad_norm": 0.4735867381095886, "learning_rate": 0.0004716124288016915, "loss": 1.826, "step": 27606 }, { "epoch": 0.92, "grad_norm": 0.47045227885246277, "learning_rate": 0.0004716038556131399, "loss": 1.8222, "step": 27607 }, { "epoch": 0.92, "grad_norm": 0.4374988377094269, "learning_rate": 0.0004715952822162864, "loss": 1.8079, "step": 27608 }, { "epoch": 0.92, "grad_norm": 0.45118221640586853, "learning_rate": 0.00047158670861114163, "loss": 1.7832, "step": 27609 }, { "epoch": 0.92, "grad_norm": 0.4257645606994629, "learning_rate": 0.0004715781347977159, "loss": 1.8567, "step": 27610 }, { "epoch": 0.92, "grad_norm": 0.4673272371292114, "learning_rate": 0.0004715695607760195, "loss": 1.7413, "step": 27611 }, { "epoch": 0.92, "grad_norm": 0.43746402859687805, "learning_rate": 0.00047156098654606304, "loss": 1.8322, "step": 27612 }, { "epoch": 0.92, "grad_norm": 0.43301644921302795, "learning_rate": 0.0004715524121078567, "loss": 1.7689, "step": 27613 }, { "epoch": 0.92, "grad_norm": 0.45038893818855286, "learning_rate": 0.0004715438374614111, "loss": 1.7056, "step": 27614 }, { "epoch": 0.92, "grad_norm": 0.45321714878082275, "learning_rate": 0.00047153526260673646, "loss": 1.8176, "step": 27615 }, { "epoch": 0.92, "grad_norm": 0.47905030846595764, "learning_rate": 0.00047152668754384336, "loss": 1.8818, "step": 27616 }, { "epoch": 0.92, "grad_norm": 0.45351743698120117, "learning_rate": 0.00047151811227274214, "loss": 1.8214, "step": 27617 }, { "epoch": 0.92, "grad_norm": 0.4534255266189575, "learning_rate": 0.0004715095367934432, "loss": 1.9013, "step": 27618 }, { "epoch": 0.92, "grad_norm": 0.4684545397758484, "learning_rate": 0.00047150096110595694, "loss": 1.8003, "step": 27619 }, { "epoch": 0.92, "grad_norm": 0.4444659948348999, "learning_rate": 0.00047149238521029374, "loss": 1.7925, "step": 27620 }, { "epoch": 0.92, "grad_norm": 0.4372120797634125, "learning_rate": 0.0004714838091064641, "loss": 1.8013, "step": 27621 }, { "epoch": 0.92, "grad_norm": 0.4204489588737488, "learning_rate": 0.00047147523279447836, "loss": 1.8171, "step": 27622 }, { "epoch": 0.92, "grad_norm": 0.45716649293899536, "learning_rate": 0.0004714666562743469, "loss": 1.8421, "step": 27623 }, { "epoch": 0.92, "grad_norm": 0.4560309648513794, "learning_rate": 0.0004714580795460803, "loss": 1.836, "step": 27624 }, { "epoch": 0.92, "grad_norm": 0.44965487718582153, "learning_rate": 0.00047144950260968887, "loss": 1.8491, "step": 27625 }, { "epoch": 0.92, "grad_norm": 0.4312535524368286, "learning_rate": 0.0004714409254651829, "loss": 1.8513, "step": 27626 }, { "epoch": 0.92, "grad_norm": 0.4485624432563782, "learning_rate": 0.000471432348112573, "loss": 1.8779, "step": 27627 }, { "epoch": 0.92, "grad_norm": 0.4744463264942169, "learning_rate": 0.0004714237705518694, "loss": 1.8708, "step": 27628 }, { "epoch": 0.92, "grad_norm": 0.4601460099220276, "learning_rate": 0.00047141519278308267, "loss": 1.8012, "step": 27629 }, { "epoch": 0.92, "grad_norm": 0.4429534375667572, "learning_rate": 0.00047140661480622303, "loss": 1.8107, "step": 27630 }, { "epoch": 0.92, "grad_norm": 0.45239779353141785, "learning_rate": 0.0004713980366213011, "loss": 1.9014, "step": 27631 }, { "epoch": 0.92, "grad_norm": 0.44570761919021606, "learning_rate": 0.0004713894582283272, "loss": 1.7588, "step": 27632 }, { "epoch": 0.92, "grad_norm": 0.4520561099052429, "learning_rate": 0.0004713808796273118, "loss": 1.8062, "step": 27633 }, { "epoch": 0.92, "grad_norm": 0.4541552662849426, "learning_rate": 0.00047137230081826517, "loss": 1.9186, "step": 27634 }, { "epoch": 0.92, "grad_norm": 0.4364100992679596, "learning_rate": 0.0004713637218011979, "loss": 1.8373, "step": 27635 }, { "epoch": 0.92, "grad_norm": 0.47088125348091125, "learning_rate": 0.00047135514257612036, "loss": 1.821, "step": 27636 }, { "epoch": 0.92, "grad_norm": 0.47843873500823975, "learning_rate": 0.00047134656314304287, "loss": 1.8151, "step": 27637 }, { "epoch": 0.92, "grad_norm": 0.43714404106140137, "learning_rate": 0.0004713379835019758, "loss": 1.7619, "step": 27638 }, { "epoch": 0.92, "grad_norm": 0.4453097879886627, "learning_rate": 0.0004713294036529298, "loss": 1.8193, "step": 27639 }, { "epoch": 0.92, "grad_norm": 0.4910084903240204, "learning_rate": 0.00047132082359591505, "loss": 1.7944, "step": 27640 }, { "epoch": 0.92, "grad_norm": 0.45385634899139404, "learning_rate": 0.00047131224333094214, "loss": 1.8032, "step": 27641 }, { "epoch": 0.92, "grad_norm": 0.4308607280254364, "learning_rate": 0.0004713036628580214, "loss": 1.7846, "step": 27642 }, { "epoch": 0.92, "grad_norm": 0.4427434206008911, "learning_rate": 0.0004712950821771632, "loss": 1.8207, "step": 27643 }, { "epoch": 0.92, "grad_norm": 0.4535565674304962, "learning_rate": 0.00047128650128837806, "loss": 1.845, "step": 27644 }, { "epoch": 0.92, "grad_norm": 0.45364251732826233, "learning_rate": 0.0004712779201916763, "loss": 1.8868, "step": 27645 }, { "epoch": 0.92, "grad_norm": 0.43690401315689087, "learning_rate": 0.0004712693388870685, "loss": 1.8012, "step": 27646 }, { "epoch": 0.92, "grad_norm": 0.4379546642303467, "learning_rate": 0.0004712607573745648, "loss": 1.7431, "step": 27647 }, { "epoch": 0.92, "grad_norm": 0.4665084183216095, "learning_rate": 0.00047125217565417585, "loss": 1.7839, "step": 27648 }, { "epoch": 0.92, "grad_norm": 0.4394289553165436, "learning_rate": 0.000471243593725912, "loss": 1.7795, "step": 27649 }, { "epoch": 0.92, "grad_norm": 0.44721484184265137, "learning_rate": 0.00047123501158978363, "loss": 1.7958, "step": 27650 }, { "epoch": 0.92, "grad_norm": 0.4458836317062378, "learning_rate": 0.0004712264292458012, "loss": 1.7928, "step": 27651 }, { "epoch": 0.92, "grad_norm": 0.44528624415397644, "learning_rate": 0.0004712178466939752, "loss": 1.7935, "step": 27652 }, { "epoch": 0.92, "grad_norm": 0.4405827224254608, "learning_rate": 0.0004712092639343158, "loss": 1.7147, "step": 27653 }, { "epoch": 0.92, "grad_norm": 0.4405817687511444, "learning_rate": 0.00047120068096683366, "loss": 1.8598, "step": 27654 }, { "epoch": 0.92, "grad_norm": 0.4328070282936096, "learning_rate": 0.0004711920977915391, "loss": 1.7984, "step": 27655 }, { "epoch": 0.92, "grad_norm": 0.4468723237514496, "learning_rate": 0.00047118351440844256, "loss": 1.7624, "step": 27656 }, { "epoch": 0.92, "grad_norm": 0.45296967029571533, "learning_rate": 0.0004711749308175545, "loss": 1.7392, "step": 27657 }, { "epoch": 0.92, "grad_norm": 0.45095962285995483, "learning_rate": 0.0004711663470188853, "loss": 1.7715, "step": 27658 }, { "epoch": 0.92, "grad_norm": 0.441822350025177, "learning_rate": 0.00047115776301244537, "loss": 1.8079, "step": 27659 }, { "epoch": 0.92, "grad_norm": 0.447041779756546, "learning_rate": 0.000471149178798245, "loss": 1.8662, "step": 27660 }, { "epoch": 0.92, "grad_norm": 0.4303204417228699, "learning_rate": 0.00047114059437629486, "loss": 1.7645, "step": 27661 }, { "epoch": 0.92, "grad_norm": 0.4377672076225281, "learning_rate": 0.0004711320097466053, "loss": 1.7685, "step": 27662 }, { "epoch": 0.92, "grad_norm": 0.4457862377166748, "learning_rate": 0.00047112342490918655, "loss": 1.8294, "step": 27663 }, { "epoch": 0.92, "grad_norm": 0.44006162881851196, "learning_rate": 0.00047111483986404936, "loss": 1.8392, "step": 27664 }, { "epoch": 0.92, "grad_norm": 0.4259766638278961, "learning_rate": 0.0004711062546112038, "loss": 1.7983, "step": 27665 }, { "epoch": 0.92, "grad_norm": 0.45328575372695923, "learning_rate": 0.0004710976691506606, "loss": 1.7622, "step": 27666 }, { "epoch": 0.92, "grad_norm": 0.4478530287742615, "learning_rate": 0.00047108908348243, "loss": 1.8282, "step": 27667 }, { "epoch": 0.92, "grad_norm": 0.5061973333358765, "learning_rate": 0.00047108049760652236, "loss": 1.8232, "step": 27668 }, { "epoch": 0.92, "grad_norm": 0.44129040837287903, "learning_rate": 0.0004710719115229483, "loss": 1.8555, "step": 27669 }, { "epoch": 0.92, "grad_norm": 0.4368009567260742, "learning_rate": 0.0004710633252317181, "loss": 1.7265, "step": 27670 }, { "epoch": 0.92, "grad_norm": 0.45944732427597046, "learning_rate": 0.00047105473873284225, "loss": 1.8444, "step": 27671 }, { "epoch": 0.92, "grad_norm": 0.45195120573043823, "learning_rate": 0.0004710461520263312, "loss": 1.8896, "step": 27672 }, { "epoch": 0.92, "grad_norm": 0.4447997510433197, "learning_rate": 0.0004710375651121952, "loss": 1.8043, "step": 27673 }, { "epoch": 0.92, "grad_norm": 0.4588976502418518, "learning_rate": 0.00047102897799044494, "loss": 1.7774, "step": 27674 }, { "epoch": 0.92, "grad_norm": 0.43727725744247437, "learning_rate": 0.0004710203906610906, "loss": 1.8139, "step": 27675 }, { "epoch": 0.92, "grad_norm": 0.45139122009277344, "learning_rate": 0.00047101180312414273, "loss": 1.813, "step": 27676 }, { "epoch": 0.92, "grad_norm": 0.445474237203598, "learning_rate": 0.0004710032153796117, "loss": 1.8211, "step": 27677 }, { "epoch": 0.92, "grad_norm": 0.42432889342308044, "learning_rate": 0.000470994627427508, "loss": 1.7282, "step": 27678 }, { "epoch": 0.92, "grad_norm": 0.43000084161758423, "learning_rate": 0.000470986039267842, "loss": 1.7656, "step": 27679 }, { "epoch": 0.92, "grad_norm": 0.43764960765838623, "learning_rate": 0.0004709774509006242, "loss": 1.8394, "step": 27680 }, { "epoch": 0.92, "grad_norm": 0.461116224527359, "learning_rate": 0.00047096886232586495, "loss": 1.7742, "step": 27681 }, { "epoch": 0.92, "grad_norm": 0.44459033012390137, "learning_rate": 0.00047096027354357465, "loss": 1.8495, "step": 27682 }, { "epoch": 0.92, "grad_norm": 0.4386002719402313, "learning_rate": 0.0004709516845537637, "loss": 1.8013, "step": 27683 }, { "epoch": 0.92, "grad_norm": 0.46119576692581177, "learning_rate": 0.0004709430953564427, "loss": 1.8738, "step": 27684 }, { "epoch": 0.92, "grad_norm": 0.4603477716445923, "learning_rate": 0.00047093450595162196, "loss": 1.7672, "step": 27685 }, { "epoch": 0.92, "grad_norm": 0.44903287291526794, "learning_rate": 0.0004709259163393119, "loss": 1.8294, "step": 27686 }, { "epoch": 0.92, "grad_norm": 0.4403688311576843, "learning_rate": 0.000470917326519523, "loss": 1.7965, "step": 27687 }, { "epoch": 0.92, "grad_norm": 0.4483875632286072, "learning_rate": 0.00047090873649226553, "loss": 1.8536, "step": 27688 }, { "epoch": 0.92, "grad_norm": 0.4665222465991974, "learning_rate": 0.0004709001462575502, "loss": 1.8841, "step": 27689 }, { "epoch": 0.92, "grad_norm": 0.4389505982398987, "learning_rate": 0.0004708915558153872, "loss": 1.861, "step": 27690 }, { "epoch": 0.92, "grad_norm": 0.4447246193885803, "learning_rate": 0.00047088296516578697, "loss": 1.8422, "step": 27691 }, { "epoch": 0.92, "grad_norm": 0.44488444924354553, "learning_rate": 0.0004708743743087601, "loss": 1.8755, "step": 27692 }, { "epoch": 0.92, "grad_norm": 0.42718616127967834, "learning_rate": 0.00047086578324431684, "loss": 1.7374, "step": 27693 }, { "epoch": 0.92, "grad_norm": 0.43698814511299133, "learning_rate": 0.0004708571919724677, "loss": 1.8294, "step": 27694 }, { "epoch": 0.92, "grad_norm": 0.42021265625953674, "learning_rate": 0.00047084860049322313, "loss": 1.8441, "step": 27695 }, { "epoch": 0.92, "grad_norm": 0.43840524554252625, "learning_rate": 0.0004708400088065935, "loss": 1.862, "step": 27696 }, { "epoch": 0.92, "grad_norm": 0.44552841782569885, "learning_rate": 0.00047083141691258936, "loss": 1.7542, "step": 27697 }, { "epoch": 0.92, "grad_norm": 0.43690845370292664, "learning_rate": 0.000470822824811221, "loss": 1.8268, "step": 27698 }, { "epoch": 0.92, "grad_norm": 0.42907026410102844, "learning_rate": 0.00047081423250249887, "loss": 1.7806, "step": 27699 }, { "epoch": 0.92, "grad_norm": 0.4588072597980499, "learning_rate": 0.0004708056399864334, "loss": 1.8619, "step": 27700 }, { "epoch": 0.92, "grad_norm": 0.43325749039649963, "learning_rate": 0.00047079704726303513, "loss": 1.7849, "step": 27701 }, { "epoch": 0.92, "grad_norm": 0.43876439332962036, "learning_rate": 0.0004707884543323144, "loss": 1.8035, "step": 27702 }, { "epoch": 0.92, "grad_norm": 0.44607311487197876, "learning_rate": 0.00047077986119428163, "loss": 1.8542, "step": 27703 }, { "epoch": 0.92, "grad_norm": 0.45694664120674133, "learning_rate": 0.0004707712678489473, "loss": 1.7913, "step": 27704 }, { "epoch": 0.92, "grad_norm": 0.44340232014656067, "learning_rate": 0.0004707626742963218, "loss": 1.7156, "step": 27705 }, { "epoch": 0.92, "grad_norm": 0.42677369713783264, "learning_rate": 0.0004707540805364156, "loss": 1.7673, "step": 27706 }, { "epoch": 0.92, "grad_norm": 0.44046470522880554, "learning_rate": 0.000470745486569239, "loss": 1.8717, "step": 27707 }, { "epoch": 0.92, "grad_norm": 0.441216379404068, "learning_rate": 0.0004707368923948026, "loss": 1.8848, "step": 27708 }, { "epoch": 0.92, "grad_norm": 0.4380630552768707, "learning_rate": 0.0004707282980131168, "loss": 1.7682, "step": 27709 }, { "epoch": 0.92, "grad_norm": 0.46044686436653137, "learning_rate": 0.0004707197034241919, "loss": 1.8222, "step": 27710 }, { "epoch": 0.92, "grad_norm": 0.4389689564704895, "learning_rate": 0.0004707111086280386, "loss": 1.8023, "step": 27711 }, { "epoch": 0.92, "grad_norm": 0.6503840088844299, "learning_rate": 0.00047070251362466703, "loss": 1.9071, "step": 27712 }, { "epoch": 0.92, "grad_norm": 0.4367492198944092, "learning_rate": 0.00047069391841408784, "loss": 1.8384, "step": 27713 }, { "epoch": 0.92, "grad_norm": 0.45703014731407166, "learning_rate": 0.0004706853229963113, "loss": 1.8344, "step": 27714 }, { "epoch": 0.92, "grad_norm": 0.4496360421180725, "learning_rate": 0.000470676727371348, "loss": 1.796, "step": 27715 }, { "epoch": 0.92, "grad_norm": 0.4402986764907837, "learning_rate": 0.00047066813153920833, "loss": 1.7059, "step": 27716 }, { "epoch": 0.92, "grad_norm": 0.46120014786720276, "learning_rate": 0.0004706595354999026, "loss": 1.8029, "step": 27717 }, { "epoch": 0.92, "grad_norm": 0.4584437608718872, "learning_rate": 0.0004706509392534414, "loss": 1.794, "step": 27718 }, { "epoch": 0.92, "grad_norm": 0.4278905987739563, "learning_rate": 0.0004706423427998351, "loss": 1.8063, "step": 27719 }, { "epoch": 0.92, "grad_norm": 0.45993781089782715, "learning_rate": 0.00047063374613909406, "loss": 1.858, "step": 27720 }, { "epoch": 0.92, "grad_norm": 0.45828914642333984, "learning_rate": 0.0004706251492712289, "loss": 1.7581, "step": 27721 }, { "epoch": 0.92, "grad_norm": 0.449355810880661, "learning_rate": 0.00047061655219624997, "loss": 1.7709, "step": 27722 }, { "epoch": 0.92, "grad_norm": 0.43590718507766724, "learning_rate": 0.00047060795491416754, "loss": 1.783, "step": 27723 }, { "epoch": 0.92, "grad_norm": 0.42439961433410645, "learning_rate": 0.00047059935742499237, "loss": 1.7389, "step": 27724 }, { "epoch": 0.92, "grad_norm": 0.43667295575141907, "learning_rate": 0.00047059075972873446, "loss": 1.8007, "step": 27725 }, { "epoch": 0.92, "grad_norm": 0.439654141664505, "learning_rate": 0.0004705821618254048, "loss": 1.8533, "step": 27726 }, { "epoch": 0.92, "grad_norm": 0.42063337564468384, "learning_rate": 0.00047057356371501337, "loss": 1.8165, "step": 27727 }, { "epoch": 0.92, "grad_norm": 0.44098398089408875, "learning_rate": 0.0004705649653975708, "loss": 1.7955, "step": 27728 }, { "epoch": 0.92, "grad_norm": 0.4523351192474365, "learning_rate": 0.0004705563668730874, "loss": 1.7994, "step": 27729 }, { "epoch": 0.92, "grad_norm": 0.44016048312187195, "learning_rate": 0.0004705477681415738, "loss": 1.8492, "step": 27730 }, { "epoch": 0.92, "grad_norm": 0.44076767563819885, "learning_rate": 0.0004705391692030403, "loss": 1.8565, "step": 27731 }, { "epoch": 0.92, "grad_norm": 0.4392998218536377, "learning_rate": 0.00047053057005749745, "loss": 1.8882, "step": 27732 }, { "epoch": 0.92, "grad_norm": 0.4427395164966583, "learning_rate": 0.0004705219707049556, "loss": 1.7793, "step": 27733 }, { "epoch": 0.92, "grad_norm": 0.44079434871673584, "learning_rate": 0.0004705133711454251, "loss": 1.7742, "step": 27734 }, { "epoch": 0.92, "grad_norm": 0.44986557960510254, "learning_rate": 0.0004705047713789166, "loss": 1.8786, "step": 27735 }, { "epoch": 0.92, "grad_norm": 0.46070966124534607, "learning_rate": 0.00047049617140544045, "loss": 1.7736, "step": 27736 }, { "epoch": 0.92, "grad_norm": 0.4301452338695526, "learning_rate": 0.0004704875712250069, "loss": 1.7914, "step": 27737 }, { "epoch": 0.92, "grad_norm": 0.4421081840991974, "learning_rate": 0.00047047897083762674, "loss": 1.8114, "step": 27738 }, { "epoch": 0.92, "grad_norm": 0.4515670835971832, "learning_rate": 0.0004704703702433102, "loss": 1.8507, "step": 27739 }, { "epoch": 0.92, "grad_norm": 0.44081345200538635, "learning_rate": 0.00047046176944206766, "loss": 1.727, "step": 27740 }, { "epoch": 0.92, "grad_norm": 0.4475371837615967, "learning_rate": 0.0004704531684339097, "loss": 1.8618, "step": 27741 }, { "epoch": 0.92, "grad_norm": 0.44903528690338135, "learning_rate": 0.0004704445672188467, "loss": 1.7973, "step": 27742 }, { "epoch": 0.92, "grad_norm": 0.4459749758243561, "learning_rate": 0.0004704359657968891, "loss": 1.8209, "step": 27743 }, { "epoch": 0.92, "grad_norm": 0.4407024383544922, "learning_rate": 0.0004704273641680473, "loss": 1.8341, "step": 27744 }, { "epoch": 0.92, "grad_norm": 0.4392329752445221, "learning_rate": 0.00047041876233233187, "loss": 1.823, "step": 27745 }, { "epoch": 0.92, "grad_norm": 0.4470050036907196, "learning_rate": 0.0004704101602897532, "loss": 1.8685, "step": 27746 }, { "epoch": 0.92, "grad_norm": 0.44250428676605225, "learning_rate": 0.0004704015580403216, "loss": 1.7893, "step": 27747 }, { "epoch": 0.92, "grad_norm": 0.4290996789932251, "learning_rate": 0.0004703929555840477, "loss": 1.7664, "step": 27748 }, { "epoch": 0.92, "grad_norm": 0.4314243197441101, "learning_rate": 0.00047038435292094183, "loss": 1.8648, "step": 27749 }, { "epoch": 0.92, "grad_norm": 0.4379443824291229, "learning_rate": 0.00047037575005101444, "loss": 1.778, "step": 27750 }, { "epoch": 0.92, "grad_norm": 0.44333675503730774, "learning_rate": 0.00047036714697427603, "loss": 1.855, "step": 27751 }, { "epoch": 0.92, "grad_norm": 0.4293282628059387, "learning_rate": 0.000470358543690737, "loss": 1.8406, "step": 27752 }, { "epoch": 0.92, "grad_norm": 0.443954199552536, "learning_rate": 0.0004703499402004078, "loss": 1.7963, "step": 27753 }, { "epoch": 0.92, "grad_norm": 0.445539653301239, "learning_rate": 0.0004703413365032989, "loss": 1.8153, "step": 27754 }, { "epoch": 0.92, "grad_norm": 0.441540002822876, "learning_rate": 0.00047033273259942064, "loss": 1.8066, "step": 27755 }, { "epoch": 0.92, "grad_norm": 0.4331226050853729, "learning_rate": 0.0004703241284887836, "loss": 1.8234, "step": 27756 }, { "epoch": 0.92, "grad_norm": 0.44593438506126404, "learning_rate": 0.0004703155241713981, "loss": 1.8275, "step": 27757 }, { "epoch": 0.92, "grad_norm": 0.445404589176178, "learning_rate": 0.00047030691964727476, "loss": 1.8297, "step": 27758 }, { "epoch": 0.92, "grad_norm": 0.43672171235084534, "learning_rate": 0.00047029831491642385, "loss": 1.7436, "step": 27759 }, { "epoch": 0.92, "grad_norm": 0.43864861130714417, "learning_rate": 0.0004702897099788558, "loss": 1.7421, "step": 27760 }, { "epoch": 0.92, "grad_norm": 0.4263788163661957, "learning_rate": 0.0004702811048345813, "loss": 1.8056, "step": 27761 }, { "epoch": 0.92, "grad_norm": 0.4373972713947296, "learning_rate": 0.00047027249948361044, "loss": 1.748, "step": 27762 }, { "epoch": 0.92, "grad_norm": 0.4376152753829956, "learning_rate": 0.00047026389392595394, "loss": 1.8777, "step": 27763 }, { "epoch": 0.92, "grad_norm": 0.44462913274765015, "learning_rate": 0.00047025528816162227, "loss": 1.8212, "step": 27764 }, { "epoch": 0.92, "grad_norm": 0.4530623257160187, "learning_rate": 0.0004702466821906256, "loss": 1.8402, "step": 27765 }, { "epoch": 0.92, "grad_norm": 0.466539204120636, "learning_rate": 0.0004702380760129746, "loss": 1.7389, "step": 27766 }, { "epoch": 0.92, "grad_norm": 0.4501221776008606, "learning_rate": 0.00047022946962867965, "loss": 1.8026, "step": 27767 }, { "epoch": 0.92, "grad_norm": 0.45582687854766846, "learning_rate": 0.00047022086303775127, "loss": 1.8514, "step": 27768 }, { "epoch": 0.92, "grad_norm": 0.4469551146030426, "learning_rate": 0.0004702122562401998, "loss": 1.7559, "step": 27769 }, { "epoch": 0.92, "grad_norm": 0.43067142367362976, "learning_rate": 0.0004702036492360357, "loss": 1.7712, "step": 27770 }, { "epoch": 0.92, "grad_norm": 0.4315348267555237, "learning_rate": 0.0004701950420252695, "loss": 1.8249, "step": 27771 }, { "epoch": 0.92, "grad_norm": 0.44444236159324646, "learning_rate": 0.00047018643460791155, "loss": 1.8227, "step": 27772 }, { "epoch": 0.92, "grad_norm": 0.4433130621910095, "learning_rate": 0.00047017782698397236, "loss": 1.7829, "step": 27773 }, { "epoch": 0.92, "grad_norm": 0.5128054618835449, "learning_rate": 0.00047016921915346235, "loss": 1.8707, "step": 27774 }, { "epoch": 0.92, "grad_norm": 0.4633607268333435, "learning_rate": 0.00047016061111639204, "loss": 1.8361, "step": 27775 }, { "epoch": 0.92, "grad_norm": 0.4427328407764435, "learning_rate": 0.0004701520028727718, "loss": 1.8251, "step": 27776 }, { "epoch": 0.92, "grad_norm": 0.43593281507492065, "learning_rate": 0.00047014339442261204, "loss": 1.7734, "step": 27777 }, { "epoch": 0.92, "grad_norm": 0.4395131468772888, "learning_rate": 0.0004701347857659233, "loss": 1.8052, "step": 27778 }, { "epoch": 0.92, "grad_norm": 0.44291114807128906, "learning_rate": 0.00047012617690271596, "loss": 1.7855, "step": 27779 }, { "epoch": 0.92, "grad_norm": 0.43952521681785583, "learning_rate": 0.0004701175678330006, "loss": 1.7747, "step": 27780 }, { "epoch": 0.92, "grad_norm": 0.4351917803287506, "learning_rate": 0.00047010895855678755, "loss": 1.7344, "step": 27781 }, { "epoch": 0.92, "grad_norm": 0.44357845187187195, "learning_rate": 0.0004701003490740872, "loss": 1.7996, "step": 27782 }, { "epoch": 0.92, "grad_norm": 0.4727832078933716, "learning_rate": 0.0004700917393849102, "loss": 1.7468, "step": 27783 }, { "epoch": 0.92, "grad_norm": 0.4518943130970001, "learning_rate": 0.00047008312948926696, "loss": 1.8226, "step": 27784 }, { "epoch": 0.92, "grad_norm": 0.458333820104599, "learning_rate": 0.0004700745193871677, "loss": 1.8226, "step": 27785 }, { "epoch": 0.92, "grad_norm": 0.4416341483592987, "learning_rate": 0.00047006590907862307, "loss": 1.8437, "step": 27786 }, { "epoch": 0.92, "grad_norm": 0.4365389049053192, "learning_rate": 0.0004700572985636434, "loss": 1.7925, "step": 27787 }, { "epoch": 0.92, "grad_norm": 0.4373452067375183, "learning_rate": 0.00047004868784223937, "loss": 1.7728, "step": 27788 }, { "epoch": 0.92, "grad_norm": 0.4412149488925934, "learning_rate": 0.00047004007691442125, "loss": 1.7852, "step": 27789 }, { "epoch": 0.92, "grad_norm": 0.42337727546691895, "learning_rate": 0.00047003146578019955, "loss": 1.7587, "step": 27790 }, { "epoch": 0.92, "grad_norm": 0.4567004442214966, "learning_rate": 0.00047002285443958474, "loss": 1.8512, "step": 27791 }, { "epoch": 0.92, "grad_norm": 0.45176005363464355, "learning_rate": 0.0004700142428925871, "loss": 1.8169, "step": 27792 }, { "epoch": 0.92, "grad_norm": 0.44488680362701416, "learning_rate": 0.0004700056311392173, "loss": 1.8483, "step": 27793 }, { "epoch": 0.92, "grad_norm": 0.45201587677001953, "learning_rate": 0.00046999701917948574, "loss": 1.8067, "step": 27794 }, { "epoch": 0.92, "grad_norm": 0.4397723376750946, "learning_rate": 0.0004699884070134028, "loss": 1.7991, "step": 27795 }, { "epoch": 0.92, "grad_norm": 0.44509071111679077, "learning_rate": 0.000469979794640979, "loss": 1.7152, "step": 27796 }, { "epoch": 0.92, "grad_norm": 0.4521224796772003, "learning_rate": 0.00046997118206222474, "loss": 1.9025, "step": 27797 }, { "epoch": 0.92, "grad_norm": 0.4528667628765106, "learning_rate": 0.0004699625692771505, "loss": 1.81, "step": 27798 }, { "epoch": 0.92, "grad_norm": 0.4322017431259155, "learning_rate": 0.00046995395628576683, "loss": 1.8062, "step": 27799 }, { "epoch": 0.92, "grad_norm": 0.4536382853984833, "learning_rate": 0.000469945343088084, "loss": 1.7574, "step": 27800 }, { "epoch": 0.92, "grad_norm": 0.4440145194530487, "learning_rate": 0.0004699367296841127, "loss": 1.7614, "step": 27801 }, { "epoch": 0.92, "grad_norm": 0.45121994614601135, "learning_rate": 0.0004699281160738631, "loss": 1.8185, "step": 27802 }, { "epoch": 0.93, "grad_norm": 0.4384230673313141, "learning_rate": 0.00046991950225734586, "loss": 1.866, "step": 27803 }, { "epoch": 0.93, "grad_norm": 0.43840840458869934, "learning_rate": 0.0004699108882345714, "loss": 1.8504, "step": 27804 }, { "epoch": 0.93, "grad_norm": 0.46502697467803955, "learning_rate": 0.00046990227400555014, "loss": 1.7445, "step": 27805 }, { "epoch": 0.93, "grad_norm": 0.4439348876476288, "learning_rate": 0.00046989365957029257, "loss": 1.7732, "step": 27806 }, { "epoch": 0.93, "grad_norm": 0.4508901536464691, "learning_rate": 0.0004698850449288091, "loss": 1.9106, "step": 27807 }, { "epoch": 0.93, "grad_norm": 0.4349164366722107, "learning_rate": 0.0004698764300811102, "loss": 1.8887, "step": 27808 }, { "epoch": 0.93, "grad_norm": 0.4302099049091339, "learning_rate": 0.0004698678150272065, "loss": 1.7023, "step": 27809 }, { "epoch": 0.93, "grad_norm": 0.4548639953136444, "learning_rate": 0.00046985919976710806, "loss": 1.876, "step": 27810 }, { "epoch": 0.93, "grad_norm": 0.4314782917499542, "learning_rate": 0.00046985058430082567, "loss": 1.7765, "step": 27811 }, { "epoch": 0.93, "grad_norm": 0.44215285778045654, "learning_rate": 0.0004698419686283698, "loss": 1.8352, "step": 27812 }, { "epoch": 0.93, "grad_norm": 0.44195857644081116, "learning_rate": 0.00046983335274975075, "loss": 1.8593, "step": 27813 }, { "epoch": 0.93, "grad_norm": 0.42847588658332825, "learning_rate": 0.00046982473666497894, "loss": 1.872, "step": 27814 }, { "epoch": 0.93, "grad_norm": 0.4415786564350128, "learning_rate": 0.000469816120374065, "loss": 1.7229, "step": 27815 }, { "epoch": 0.93, "grad_norm": 0.6185406446456909, "learning_rate": 0.0004698075038770194, "loss": 1.8399, "step": 27816 }, { "epoch": 0.93, "grad_norm": 0.44753146171569824, "learning_rate": 0.00046979888717385234, "loss": 1.8369, "step": 27817 }, { "epoch": 0.93, "grad_norm": 0.4720342755317688, "learning_rate": 0.00046979027026457454, "loss": 1.8814, "step": 27818 }, { "epoch": 0.93, "grad_norm": 0.444189190864563, "learning_rate": 0.00046978165314919636, "loss": 1.7635, "step": 27819 }, { "epoch": 0.93, "grad_norm": 0.4406803846359253, "learning_rate": 0.00046977303582772825, "loss": 1.767, "step": 27820 }, { "epoch": 0.93, "grad_norm": 0.44415590167045593, "learning_rate": 0.0004697644183001807, "loss": 1.806, "step": 27821 }, { "epoch": 0.93, "grad_norm": 0.42810460925102234, "learning_rate": 0.0004697558005665642, "loss": 1.7358, "step": 27822 }, { "epoch": 0.93, "grad_norm": 0.434615820646286, "learning_rate": 0.00046974718262688916, "loss": 1.7714, "step": 27823 }, { "epoch": 0.93, "grad_norm": 0.4371577799320221, "learning_rate": 0.00046973856448116605, "loss": 1.7705, "step": 27824 }, { "epoch": 0.93, "grad_norm": 0.4389844238758087, "learning_rate": 0.0004697299461294053, "loss": 1.8306, "step": 27825 }, { "epoch": 0.93, "grad_norm": 0.44861093163490295, "learning_rate": 0.00046972132757161733, "loss": 1.7437, "step": 27826 }, { "epoch": 0.93, "grad_norm": 0.44151368737220764, "learning_rate": 0.0004697127088078128, "loss": 1.7757, "step": 27827 }, { "epoch": 0.93, "grad_norm": 0.4495043158531189, "learning_rate": 0.00046970408983800204, "loss": 1.8624, "step": 27828 }, { "epoch": 0.93, "grad_norm": 0.44489794969558716, "learning_rate": 0.00046969547066219553, "loss": 1.7687, "step": 27829 }, { "epoch": 0.93, "grad_norm": 0.43542712926864624, "learning_rate": 0.0004696868512804036, "loss": 1.8316, "step": 27830 }, { "epoch": 0.93, "grad_norm": 0.4575903117656708, "learning_rate": 0.000469678231692637, "loss": 1.8196, "step": 27831 }, { "epoch": 0.93, "grad_norm": 0.4644445776939392, "learning_rate": 0.00046966961189890587, "loss": 1.8485, "step": 27832 }, { "epoch": 0.93, "grad_norm": 0.4749692678451538, "learning_rate": 0.0004696609918992209, "loss": 1.865, "step": 27833 }, { "epoch": 0.93, "grad_norm": 0.43804651498794556, "learning_rate": 0.0004696523716935925, "loss": 1.8077, "step": 27834 }, { "epoch": 0.93, "grad_norm": 0.459476500749588, "learning_rate": 0.00046964375128203106, "loss": 1.7753, "step": 27835 }, { "epoch": 0.93, "grad_norm": 0.456211656332016, "learning_rate": 0.0004696351306645472, "loss": 1.8706, "step": 27836 }, { "epoch": 0.93, "grad_norm": 0.4426436424255371, "learning_rate": 0.0004696265098411512, "loss": 1.7214, "step": 27837 }, { "epoch": 0.93, "grad_norm": 0.4593407213687897, "learning_rate": 0.0004696178888118536, "loss": 1.8217, "step": 27838 }, { "epoch": 0.93, "grad_norm": 0.4498389959335327, "learning_rate": 0.00046960926757666494, "loss": 1.769, "step": 27839 }, { "epoch": 0.93, "grad_norm": 0.4387049078941345, "learning_rate": 0.00046960064613559563, "loss": 1.7607, "step": 27840 }, { "epoch": 0.93, "grad_norm": 0.4356575310230255, "learning_rate": 0.00046959202448865607, "loss": 1.795, "step": 27841 }, { "epoch": 0.93, "grad_norm": 0.43234336376190186, "learning_rate": 0.00046958340263585676, "loss": 1.831, "step": 27842 }, { "epoch": 0.93, "grad_norm": 0.43933552503585815, "learning_rate": 0.00046957478057720823, "loss": 1.8227, "step": 27843 }, { "epoch": 0.93, "grad_norm": 0.43601834774017334, "learning_rate": 0.000469566158312721, "loss": 1.7863, "step": 27844 }, { "epoch": 0.93, "grad_norm": 0.9211806058883667, "learning_rate": 0.0004695575358424053, "loss": 1.8658, "step": 27845 }, { "epoch": 0.93, "grad_norm": 0.43840497732162476, "learning_rate": 0.0004695489131662718, "loss": 1.8418, "step": 27846 }, { "epoch": 0.93, "grad_norm": 0.4469466805458069, "learning_rate": 0.00046954029028433076, "loss": 1.8575, "step": 27847 }, { "epoch": 0.93, "grad_norm": 0.44296586513519287, "learning_rate": 0.000469531667196593, "loss": 1.8077, "step": 27848 }, { "epoch": 0.93, "grad_norm": 0.4842923879623413, "learning_rate": 0.0004695230439030687, "loss": 1.8048, "step": 27849 }, { "epoch": 0.93, "grad_norm": 0.5067299604415894, "learning_rate": 0.00046951442040376836, "loss": 1.8343, "step": 27850 }, { "epoch": 0.93, "grad_norm": 0.44334518909454346, "learning_rate": 0.00046950579669870245, "loss": 1.7953, "step": 27851 }, { "epoch": 0.93, "grad_norm": 0.43881529569625854, "learning_rate": 0.0004694971727878816, "loss": 1.762, "step": 27852 }, { "epoch": 0.93, "grad_norm": 0.44786858558654785, "learning_rate": 0.0004694885486713161, "loss": 1.8298, "step": 27853 }, { "epoch": 0.93, "grad_norm": 0.45911404490470886, "learning_rate": 0.00046947992434901646, "loss": 1.7739, "step": 27854 }, { "epoch": 0.93, "grad_norm": 0.47059300541877747, "learning_rate": 0.0004694712998209931, "loss": 1.7833, "step": 27855 }, { "epoch": 0.93, "grad_norm": 0.45513832569122314, "learning_rate": 0.00046946267508725674, "loss": 1.8771, "step": 27856 }, { "epoch": 0.93, "grad_norm": 0.4456493854522705, "learning_rate": 0.0004694540501478175, "loss": 1.8159, "step": 27857 }, { "epoch": 0.93, "grad_norm": 0.4549523591995239, "learning_rate": 0.00046944542500268607, "loss": 1.862, "step": 27858 }, { "epoch": 0.93, "grad_norm": 0.44338589906692505, "learning_rate": 0.0004694367996518728, "loss": 1.9209, "step": 27859 }, { "epoch": 0.93, "grad_norm": 0.4594642221927643, "learning_rate": 0.0004694281740953883, "loss": 1.837, "step": 27860 }, { "epoch": 0.93, "grad_norm": 0.46423617005348206, "learning_rate": 0.00046941954833324296, "loss": 1.8339, "step": 27861 }, { "epoch": 0.93, "grad_norm": 0.4685281217098236, "learning_rate": 0.0004694109223654472, "loss": 1.7575, "step": 27862 }, { "epoch": 0.93, "grad_norm": 0.4453926086425781, "learning_rate": 0.00046940229619201157, "loss": 1.7673, "step": 27863 }, { "epoch": 0.93, "grad_norm": 0.44212013483047485, "learning_rate": 0.0004693936698129465, "loss": 1.8232, "step": 27864 }, { "epoch": 0.93, "grad_norm": 0.4374937415122986, "learning_rate": 0.0004693850432282625, "loss": 1.8066, "step": 27865 }, { "epoch": 0.93, "grad_norm": 0.44077983498573303, "learning_rate": 0.00046937641643797, "loss": 1.7805, "step": 27866 }, { "epoch": 0.93, "grad_norm": 0.44503793120384216, "learning_rate": 0.00046936778944207946, "loss": 1.7258, "step": 27867 }, { "epoch": 0.93, "grad_norm": 0.44184133410453796, "learning_rate": 0.00046935916224060146, "loss": 1.7818, "step": 27868 }, { "epoch": 0.93, "grad_norm": 0.4399777352809906, "learning_rate": 0.00046935053483354626, "loss": 1.8736, "step": 27869 }, { "epoch": 0.93, "grad_norm": 0.4463692605495453, "learning_rate": 0.00046934190722092454, "loss": 1.7959, "step": 27870 }, { "epoch": 0.93, "grad_norm": 0.44067293405532837, "learning_rate": 0.0004693332794027467, "loss": 1.8434, "step": 27871 }, { "epoch": 0.93, "grad_norm": 0.446504682302475, "learning_rate": 0.0004693246513790232, "loss": 1.8631, "step": 27872 }, { "epoch": 0.93, "grad_norm": 0.434799462556839, "learning_rate": 0.00046931602314976455, "loss": 1.7521, "step": 27873 }, { "epoch": 0.93, "grad_norm": 0.4578373432159424, "learning_rate": 0.0004693073947149811, "loss": 1.8115, "step": 27874 }, { "epoch": 0.93, "grad_norm": 0.473164439201355, "learning_rate": 0.0004692987660746835, "loss": 1.7551, "step": 27875 }, { "epoch": 0.93, "grad_norm": 0.444352388381958, "learning_rate": 0.00046929013722888215, "loss": 1.8073, "step": 27876 }, { "epoch": 0.93, "grad_norm": 0.4372487962245941, "learning_rate": 0.0004692815081775875, "loss": 1.7339, "step": 27877 }, { "epoch": 0.93, "grad_norm": 0.4504885971546173, "learning_rate": 0.00046927287892081006, "loss": 1.8361, "step": 27878 }, { "epoch": 0.93, "grad_norm": 0.4376381039619446, "learning_rate": 0.0004692642494585602, "loss": 1.8396, "step": 27879 }, { "epoch": 0.93, "grad_norm": 0.448026567697525, "learning_rate": 0.0004692556197908486, "loss": 1.7909, "step": 27880 }, { "epoch": 0.93, "grad_norm": 0.44366663694381714, "learning_rate": 0.00046924698991768557, "loss": 1.7756, "step": 27881 }, { "epoch": 0.93, "grad_norm": 0.4503158628940582, "learning_rate": 0.00046923835983908157, "loss": 1.7572, "step": 27882 }, { "epoch": 0.93, "grad_norm": 0.46115776896476746, "learning_rate": 0.0004692297295550472, "loss": 1.8355, "step": 27883 }, { "epoch": 0.93, "grad_norm": 0.44432416558265686, "learning_rate": 0.0004692210990655929, "loss": 1.8428, "step": 27884 }, { "epoch": 0.93, "grad_norm": 0.4431540369987488, "learning_rate": 0.0004692124683707291, "loss": 1.8207, "step": 27885 }, { "epoch": 0.93, "grad_norm": 0.4575018584728241, "learning_rate": 0.0004692038374704663, "loss": 1.7621, "step": 27886 }, { "epoch": 0.93, "grad_norm": 0.46375608444213867, "learning_rate": 0.00046919520636481494, "loss": 1.7783, "step": 27887 }, { "epoch": 0.93, "grad_norm": 0.425089955329895, "learning_rate": 0.00046918657505378553, "loss": 1.7889, "step": 27888 }, { "epoch": 0.93, "grad_norm": 0.4471346139907837, "learning_rate": 0.0004691779435373885, "loss": 1.7335, "step": 27889 }, { "epoch": 0.93, "grad_norm": 0.423335999250412, "learning_rate": 0.0004691693118156345, "loss": 1.7244, "step": 27890 }, { "epoch": 0.93, "grad_norm": 0.46537303924560547, "learning_rate": 0.00046916067988853386, "loss": 1.8403, "step": 27891 }, { "epoch": 0.93, "grad_norm": 0.43454235792160034, "learning_rate": 0.000469152047756097, "loss": 1.814, "step": 27892 }, { "epoch": 0.93, "grad_norm": 0.42958202958106995, "learning_rate": 0.00046914341541833454, "loss": 1.7614, "step": 27893 }, { "epoch": 0.93, "grad_norm": 0.455700159072876, "learning_rate": 0.0004691347828752568, "loss": 1.7725, "step": 27894 }, { "epoch": 0.93, "grad_norm": 0.4399247169494629, "learning_rate": 0.0004691261501268744, "loss": 1.838, "step": 27895 }, { "epoch": 0.93, "grad_norm": 0.4418005645275116, "learning_rate": 0.00046911751717319784, "loss": 1.8192, "step": 27896 }, { "epoch": 0.93, "grad_norm": 0.44792336225509644, "learning_rate": 0.00046910888401423744, "loss": 1.8299, "step": 27897 }, { "epoch": 0.93, "grad_norm": 0.4693615138530731, "learning_rate": 0.0004691002506500038, "loss": 1.8604, "step": 27898 }, { "epoch": 0.93, "grad_norm": 0.4422086775302887, "learning_rate": 0.00046909161708050745, "loss": 1.679, "step": 27899 }, { "epoch": 0.93, "grad_norm": 0.44453302025794983, "learning_rate": 0.0004690829833057587, "loss": 1.8035, "step": 27900 }, { "epoch": 0.93, "grad_norm": 0.4577445387840271, "learning_rate": 0.00046907434932576817, "loss": 1.9006, "step": 27901 }, { "epoch": 0.93, "grad_norm": 0.4584108591079712, "learning_rate": 0.0004690657151405463, "loss": 1.8042, "step": 27902 }, { "epoch": 0.93, "grad_norm": 0.4302605986595154, "learning_rate": 0.00046905708075010354, "loss": 1.7954, "step": 27903 }, { "epoch": 0.93, "grad_norm": 0.45626550912857056, "learning_rate": 0.00046904844615445037, "loss": 1.8694, "step": 27904 }, { "epoch": 0.93, "grad_norm": 0.446866512298584, "learning_rate": 0.0004690398113535973, "loss": 1.7831, "step": 27905 }, { "epoch": 0.93, "grad_norm": 0.4333076775074005, "learning_rate": 0.00046903117634755485, "loss": 1.7792, "step": 27906 }, { "epoch": 0.93, "grad_norm": 0.44291216135025024, "learning_rate": 0.00046902254113633337, "loss": 1.8218, "step": 27907 }, { "epoch": 0.93, "grad_norm": 0.44572699069976807, "learning_rate": 0.00046901390571994355, "loss": 1.8701, "step": 27908 }, { "epoch": 0.93, "grad_norm": 0.4380042850971222, "learning_rate": 0.0004690052700983956, "loss": 1.7331, "step": 27909 }, { "epoch": 0.93, "grad_norm": 0.44845375418663025, "learning_rate": 0.0004689966342717003, "loss": 1.8775, "step": 27910 }, { "epoch": 0.93, "grad_norm": 0.4257533550262451, "learning_rate": 0.00046898799823986796, "loss": 1.7629, "step": 27911 }, { "epoch": 0.93, "grad_norm": 0.44361618161201477, "learning_rate": 0.000468979362002909, "loss": 1.775, "step": 27912 }, { "epoch": 0.93, "grad_norm": 0.4502768814563751, "learning_rate": 0.000468970725560834, "loss": 1.8619, "step": 27913 }, { "epoch": 0.93, "grad_norm": 0.44988399744033813, "learning_rate": 0.0004689620889136535, "loss": 1.8325, "step": 27914 }, { "epoch": 0.93, "grad_norm": 0.44081535935401917, "learning_rate": 0.00046895345206137795, "loss": 1.8007, "step": 27915 }, { "epoch": 0.93, "grad_norm": 0.43591195344924927, "learning_rate": 0.0004689448150040178, "loss": 1.729, "step": 27916 }, { "epoch": 0.93, "grad_norm": 0.44131115078926086, "learning_rate": 0.0004689361777415834, "loss": 1.7764, "step": 27917 }, { "epoch": 0.93, "grad_norm": 0.4371678829193115, "learning_rate": 0.0004689275402740855, "loss": 1.7845, "step": 27918 }, { "epoch": 0.93, "grad_norm": 0.4431018531322479, "learning_rate": 0.00046891890260153445, "loss": 1.8239, "step": 27919 }, { "epoch": 0.93, "grad_norm": 0.44362178444862366, "learning_rate": 0.0004689102647239407, "loss": 1.7663, "step": 27920 }, { "epoch": 0.93, "grad_norm": 0.4468610882759094, "learning_rate": 0.0004689016266413149, "loss": 1.7899, "step": 27921 }, { "epoch": 0.93, "grad_norm": 0.45787662267684937, "learning_rate": 0.00046889298835366724, "loss": 1.8545, "step": 27922 }, { "epoch": 0.93, "grad_norm": 0.46341559290885925, "learning_rate": 0.0004688843498610084, "loss": 1.802, "step": 27923 }, { "epoch": 0.93, "grad_norm": 0.4239508807659149, "learning_rate": 0.00046887571116334886, "loss": 1.7962, "step": 27924 }, { "epoch": 0.93, "grad_norm": 0.46054884791374207, "learning_rate": 0.00046886707226069914, "loss": 1.7564, "step": 27925 }, { "epoch": 0.93, "grad_norm": 0.45146724581718445, "learning_rate": 0.0004688584331530697, "loss": 1.8863, "step": 27926 }, { "epoch": 0.93, "grad_norm": 0.44920387864112854, "learning_rate": 0.0004688497938404709, "loss": 1.7443, "step": 27927 }, { "epoch": 0.93, "grad_norm": 0.4485826790332794, "learning_rate": 0.0004688411543229134, "loss": 1.8286, "step": 27928 }, { "epoch": 0.93, "grad_norm": 0.43336328864097595, "learning_rate": 0.00046883251460040756, "loss": 1.7961, "step": 27929 }, { "epoch": 0.93, "grad_norm": 0.43898189067840576, "learning_rate": 0.000468823874672964, "loss": 1.85, "step": 27930 }, { "epoch": 0.93, "grad_norm": 0.45955535769462585, "learning_rate": 0.0004688152345405931, "loss": 1.9137, "step": 27931 }, { "epoch": 0.93, "grad_norm": 0.44616034626960754, "learning_rate": 0.00046880659420330537, "loss": 1.8042, "step": 27932 }, { "epoch": 0.93, "grad_norm": 0.43694743514060974, "learning_rate": 0.0004687979536611113, "loss": 1.8219, "step": 27933 }, { "epoch": 0.93, "grad_norm": 0.44076231122016907, "learning_rate": 0.00046878931291402135, "loss": 1.8274, "step": 27934 }, { "epoch": 0.93, "grad_norm": 0.42881831526756287, "learning_rate": 0.00046878067196204614, "loss": 1.8305, "step": 27935 }, { "epoch": 0.93, "grad_norm": 0.4414447546005249, "learning_rate": 0.00046877203080519596, "loss": 1.7848, "step": 27936 }, { "epoch": 0.93, "grad_norm": 0.42201605439186096, "learning_rate": 0.0004687633894434815, "loss": 1.7854, "step": 27937 }, { "epoch": 0.93, "grad_norm": 0.47106674313545227, "learning_rate": 0.00046875474787691306, "loss": 1.8433, "step": 27938 }, { "epoch": 0.93, "grad_norm": 0.4310446083545685, "learning_rate": 0.00046874610610550126, "loss": 1.8646, "step": 27939 }, { "epoch": 0.93, "grad_norm": 0.4533037543296814, "learning_rate": 0.0004687374641292566, "loss": 1.8229, "step": 27940 }, { "epoch": 0.93, "grad_norm": 0.42377233505249023, "learning_rate": 0.0004687288219481895, "loss": 1.8142, "step": 27941 }, { "epoch": 0.93, "grad_norm": 0.43677017092704773, "learning_rate": 0.0004687201795623104, "loss": 1.7191, "step": 27942 }, { "epoch": 0.93, "grad_norm": 0.4282892644405365, "learning_rate": 0.00046871153697162994, "loss": 1.7787, "step": 27943 }, { "epoch": 0.93, "grad_norm": 0.4603341221809387, "learning_rate": 0.0004687028941761585, "loss": 1.7654, "step": 27944 }, { "epoch": 0.93, "grad_norm": 0.44205087423324585, "learning_rate": 0.0004686942511759066, "loss": 1.8259, "step": 27945 }, { "epoch": 0.93, "grad_norm": 0.45743951201438904, "learning_rate": 0.0004686856079708848, "loss": 1.8716, "step": 27946 }, { "epoch": 0.93, "grad_norm": 0.4337700307369232, "learning_rate": 0.0004686769645611034, "loss": 1.8224, "step": 27947 }, { "epoch": 0.93, "grad_norm": 0.4522906243801117, "learning_rate": 0.00046866832094657313, "loss": 1.7949, "step": 27948 }, { "epoch": 0.93, "grad_norm": 0.4466181695461273, "learning_rate": 0.0004686596771273043, "loss": 1.8264, "step": 27949 }, { "epoch": 0.93, "grad_norm": 0.44333797693252563, "learning_rate": 0.00046865103310330747, "loss": 1.8489, "step": 27950 }, { "epoch": 0.93, "grad_norm": 0.4390847682952881, "learning_rate": 0.0004686423888745932, "loss": 1.8067, "step": 27951 }, { "epoch": 0.93, "grad_norm": 0.4543869197368622, "learning_rate": 0.00046863374444117183, "loss": 1.7952, "step": 27952 }, { "epoch": 0.93, "grad_norm": 0.44327446818351746, "learning_rate": 0.000468625099803054, "loss": 1.7385, "step": 27953 }, { "epoch": 0.93, "grad_norm": 0.42723211646080017, "learning_rate": 0.0004686164549602502, "loss": 1.7318, "step": 27954 }, { "epoch": 0.93, "grad_norm": 0.46502694487571716, "learning_rate": 0.0004686078099127708, "loss": 1.8667, "step": 27955 }, { "epoch": 0.93, "grad_norm": 0.4301239252090454, "learning_rate": 0.0004685991646606264, "loss": 1.8557, "step": 27956 }, { "epoch": 0.93, "grad_norm": 0.44098037481307983, "learning_rate": 0.00046859051920382735, "loss": 1.7655, "step": 27957 }, { "epoch": 0.93, "grad_norm": 0.42743802070617676, "learning_rate": 0.00046858187354238435, "loss": 1.7832, "step": 27958 }, { "epoch": 0.93, "grad_norm": 0.4334859848022461, "learning_rate": 0.00046857322767630767, "loss": 1.7672, "step": 27959 }, { "epoch": 0.93, "grad_norm": 0.44060206413269043, "learning_rate": 0.00046856458160560806, "loss": 1.8768, "step": 27960 }, { "epoch": 0.93, "grad_norm": 0.4693971574306488, "learning_rate": 0.0004685559353302959, "loss": 1.7649, "step": 27961 }, { "epoch": 0.93, "grad_norm": 0.4340618848800659, "learning_rate": 0.0004685472888503816, "loss": 1.7685, "step": 27962 }, { "epoch": 0.93, "grad_norm": 0.4230760931968689, "learning_rate": 0.00046853864216587576, "loss": 1.7303, "step": 27963 }, { "epoch": 0.93, "grad_norm": 0.4433367848396301, "learning_rate": 0.00046852999527678885, "loss": 1.8284, "step": 27964 }, { "epoch": 0.93, "grad_norm": 0.44696247577667236, "learning_rate": 0.0004685213481831313, "loss": 1.8391, "step": 27965 }, { "epoch": 0.93, "grad_norm": 0.4486302137374878, "learning_rate": 0.0004685127008849137, "loss": 1.752, "step": 27966 }, { "epoch": 0.93, "grad_norm": 0.45033395290374756, "learning_rate": 0.00046850405338214653, "loss": 1.8637, "step": 27967 }, { "epoch": 0.93, "grad_norm": 0.43720847368240356, "learning_rate": 0.00046849540567484027, "loss": 1.8432, "step": 27968 }, { "epoch": 0.93, "grad_norm": 0.43379756808280945, "learning_rate": 0.0004684867577630054, "loss": 1.7329, "step": 27969 }, { "epoch": 0.93, "grad_norm": 0.44842973351478577, "learning_rate": 0.00046847810964665247, "loss": 1.815, "step": 27970 }, { "epoch": 0.93, "grad_norm": 0.4436868727207184, "learning_rate": 0.0004684694613257918, "loss": 1.8075, "step": 27971 }, { "epoch": 0.93, "grad_norm": 0.44583839178085327, "learning_rate": 0.00046846081280043413, "loss": 1.7587, "step": 27972 }, { "epoch": 0.93, "grad_norm": 0.4518927335739136, "learning_rate": 0.0004684521640705899, "loss": 1.909, "step": 27973 }, { "epoch": 0.93, "grad_norm": 0.4410552978515625, "learning_rate": 0.00046844351513626944, "loss": 1.8293, "step": 27974 }, { "epoch": 0.93, "grad_norm": 0.4722188413143158, "learning_rate": 0.00046843486599748346, "loss": 1.7745, "step": 27975 }, { "epoch": 0.93, "grad_norm": 0.46361735463142395, "learning_rate": 0.0004684262166542424, "loss": 1.8323, "step": 27976 }, { "epoch": 0.93, "grad_norm": 0.43179425597190857, "learning_rate": 0.0004684175671065567, "loss": 1.7489, "step": 27977 }, { "epoch": 0.93, "grad_norm": 0.4379844069480896, "learning_rate": 0.0004684089173544368, "loss": 1.8157, "step": 27978 }, { "epoch": 0.93, "grad_norm": 0.45053574442863464, "learning_rate": 0.0004684002673978934, "loss": 1.8292, "step": 27979 }, { "epoch": 0.93, "grad_norm": 0.43450793623924255, "learning_rate": 0.0004683916172369368, "loss": 1.8105, "step": 27980 }, { "epoch": 0.93, "grad_norm": 0.4546165466308594, "learning_rate": 0.00046838296687157764, "loss": 1.7976, "step": 27981 }, { "epoch": 0.93, "grad_norm": 0.46574240922927856, "learning_rate": 0.00046837431630182634, "loss": 1.8106, "step": 27982 }, { "epoch": 0.93, "grad_norm": 0.44653552770614624, "learning_rate": 0.00046836566552769344, "loss": 1.7333, "step": 27983 }, { "epoch": 0.93, "grad_norm": 0.4547365605831146, "learning_rate": 0.0004683570145491895, "loss": 1.7916, "step": 27984 }, { "epoch": 0.93, "grad_norm": 0.44322869181632996, "learning_rate": 0.00046834836336632487, "loss": 1.7656, "step": 27985 }, { "epoch": 0.93, "grad_norm": 0.45637983083724976, "learning_rate": 0.0004683397119791101, "loss": 1.7861, "step": 27986 }, { "epoch": 0.93, "grad_norm": 0.4652480483055115, "learning_rate": 0.00046833106038755585, "loss": 1.7361, "step": 27987 }, { "epoch": 0.93, "grad_norm": 0.44835370779037476, "learning_rate": 0.00046832240859167245, "loss": 1.76, "step": 27988 }, { "epoch": 0.93, "grad_norm": 0.4563165307044983, "learning_rate": 0.0004683137565914703, "loss": 1.7972, "step": 27989 }, { "epoch": 0.93, "grad_norm": 0.451761931180954, "learning_rate": 0.00046830510438696025, "loss": 1.7908, "step": 27990 }, { "epoch": 0.93, "grad_norm": 0.48485204577445984, "learning_rate": 0.00046829645197815243, "loss": 1.8089, "step": 27991 }, { "epoch": 0.93, "grad_norm": 0.46696773171424866, "learning_rate": 0.00046828779936505766, "loss": 1.8654, "step": 27992 }, { "epoch": 0.93, "grad_norm": 0.466426819562912, "learning_rate": 0.00046827914654768625, "loss": 1.81, "step": 27993 }, { "epoch": 0.93, "grad_norm": 0.4565938413143158, "learning_rate": 0.0004682704935260487, "loss": 1.8214, "step": 27994 }, { "epoch": 0.93, "grad_norm": 0.4336896538734436, "learning_rate": 0.00046826184030015567, "loss": 1.7712, "step": 27995 }, { "epoch": 0.93, "grad_norm": 0.44996246695518494, "learning_rate": 0.00046825318687001747, "loss": 1.7953, "step": 27996 }, { "epoch": 0.93, "grad_norm": 0.47721898555755615, "learning_rate": 0.0004682445332356447, "loss": 1.8826, "step": 27997 }, { "epoch": 0.93, "grad_norm": 0.43908774852752686, "learning_rate": 0.0004682358793970479, "loss": 1.7688, "step": 27998 }, { "epoch": 0.93, "grad_norm": 0.44253116846084595, "learning_rate": 0.0004682272253542375, "loss": 1.6993, "step": 27999 }, { "epoch": 0.93, "grad_norm": 0.4397438168525696, "learning_rate": 0.0004682185711072241, "loss": 1.875, "step": 28000 }, { "epoch": 0.93, "grad_norm": 0.4420122802257538, "learning_rate": 0.00046820991665601805, "loss": 1.8667, "step": 28001 }, { "epoch": 0.93, "grad_norm": 0.4652542471885681, "learning_rate": 0.00046820126200062997, "loss": 1.8186, "step": 28002 }, { "epoch": 0.93, "grad_norm": 0.45168495178222656, "learning_rate": 0.00046819260714107036, "loss": 1.6918, "step": 28003 }, { "epoch": 0.93, "grad_norm": 0.465535968542099, "learning_rate": 0.0004681839520773496, "loss": 1.8596, "step": 28004 }, { "epoch": 0.93, "grad_norm": 0.48126211762428284, "learning_rate": 0.00046817529680947845, "loss": 1.783, "step": 28005 }, { "epoch": 0.93, "grad_norm": 0.4544122517108917, "learning_rate": 0.0004681666413374672, "loss": 1.8243, "step": 28006 }, { "epoch": 0.93, "grad_norm": 0.4526095688343048, "learning_rate": 0.0004681579856613265, "loss": 1.7514, "step": 28007 }, { "epoch": 0.93, "grad_norm": 0.4488140344619751, "learning_rate": 0.0004681493297810667, "loss": 1.7574, "step": 28008 }, { "epoch": 0.93, "grad_norm": 0.4565425217151642, "learning_rate": 0.0004681406736966984, "loss": 1.8275, "step": 28009 }, { "epoch": 0.93, "grad_norm": 0.42640450596809387, "learning_rate": 0.0004681320174082321, "loss": 1.7448, "step": 28010 }, { "epoch": 0.93, "grad_norm": 0.45309340953826904, "learning_rate": 0.00046812336091567823, "loss": 1.8036, "step": 28011 }, { "epoch": 0.93, "grad_norm": 0.4516144394874573, "learning_rate": 0.00046811470421904755, "loss": 1.7656, "step": 28012 }, { "epoch": 0.93, "grad_norm": 0.4343400299549103, "learning_rate": 0.0004681060473183503, "loss": 1.8157, "step": 28013 }, { "epoch": 0.93, "grad_norm": 0.4451492726802826, "learning_rate": 0.00046809739021359706, "loss": 1.7991, "step": 28014 }, { "epoch": 0.93, "grad_norm": 0.43579432368278503, "learning_rate": 0.00046808873290479834, "loss": 1.8096, "step": 28015 }, { "epoch": 0.93, "grad_norm": 0.445269912481308, "learning_rate": 0.0004680800753919647, "loss": 1.7633, "step": 28016 }, { "epoch": 0.93, "grad_norm": 0.4736759662628174, "learning_rate": 0.00046807141767510666, "loss": 1.7996, "step": 28017 }, { "epoch": 0.93, "grad_norm": 0.4569043815135956, "learning_rate": 0.0004680627597542346, "loss": 1.8427, "step": 28018 }, { "epoch": 0.93, "grad_norm": 0.4329790472984314, "learning_rate": 0.00046805410162935915, "loss": 1.7639, "step": 28019 }, { "epoch": 0.93, "grad_norm": 0.45666220784187317, "learning_rate": 0.00046804544330049077, "loss": 1.8183, "step": 28020 }, { "epoch": 0.93, "grad_norm": 0.4587291181087494, "learning_rate": 0.00046803678476763994, "loss": 1.8734, "step": 28021 }, { "epoch": 0.93, "grad_norm": 0.4572478234767914, "learning_rate": 0.0004680281260308173, "loss": 1.8139, "step": 28022 }, { "epoch": 0.93, "grad_norm": 0.45594558119773865, "learning_rate": 0.0004680194670900333, "loss": 1.8553, "step": 28023 }, { "epoch": 0.93, "grad_norm": 0.4489108622074127, "learning_rate": 0.0004680108079452983, "loss": 1.9184, "step": 28024 }, { "epoch": 0.93, "grad_norm": 0.44832491874694824, "learning_rate": 0.000468002148596623, "loss": 1.7625, "step": 28025 }, { "epoch": 0.93, "grad_norm": 0.4513950049877167, "learning_rate": 0.00046799348904401784, "loss": 1.8083, "step": 28026 }, { "epoch": 0.93, "grad_norm": 0.45813286304473877, "learning_rate": 0.00046798482928749334, "loss": 1.8143, "step": 28027 }, { "epoch": 0.93, "grad_norm": 0.45539721846580505, "learning_rate": 0.00046797616932706, "loss": 1.8468, "step": 28028 }, { "epoch": 0.93, "grad_norm": 0.44155973196029663, "learning_rate": 0.00046796750916272827, "loss": 1.8332, "step": 28029 }, { "epoch": 0.93, "grad_norm": 0.4418506622314453, "learning_rate": 0.00046795884879450883, "loss": 1.7889, "step": 28030 }, { "epoch": 0.93, "grad_norm": 0.48364749550819397, "learning_rate": 0.0004679501882224121, "loss": 1.8388, "step": 28031 }, { "epoch": 0.93, "grad_norm": 0.47797176241874695, "learning_rate": 0.0004679415274464486, "loss": 1.8287, "step": 28032 }, { "epoch": 0.93, "grad_norm": 0.4466972053050995, "learning_rate": 0.00046793286646662876, "loss": 1.8226, "step": 28033 }, { "epoch": 0.93, "grad_norm": 0.4563498795032501, "learning_rate": 0.0004679242052829632, "loss": 1.7677, "step": 28034 }, { "epoch": 0.93, "grad_norm": 0.4542303681373596, "learning_rate": 0.00046791554389546237, "loss": 1.819, "step": 28035 }, { "epoch": 0.93, "grad_norm": 0.44516584277153015, "learning_rate": 0.00046790688230413684, "loss": 1.7463, "step": 28036 }, { "epoch": 0.93, "grad_norm": 0.4596160054206848, "learning_rate": 0.00046789822050899703, "loss": 1.8096, "step": 28037 }, { "epoch": 0.93, "grad_norm": 0.4612495005130768, "learning_rate": 0.00046788955851005356, "loss": 1.8313, "step": 28038 }, { "epoch": 0.93, "grad_norm": 0.4782370924949646, "learning_rate": 0.000467880896307317, "loss": 1.7953, "step": 28039 }, { "epoch": 0.93, "grad_norm": 0.4443044364452362, "learning_rate": 0.0004678722339007977, "loss": 1.7603, "step": 28040 }, { "epoch": 0.93, "grad_norm": 0.4387694001197815, "learning_rate": 0.00046786357129050617, "loss": 1.7892, "step": 28041 }, { "epoch": 0.93, "grad_norm": 0.4619719386100769, "learning_rate": 0.0004678549084764531, "loss": 1.8046, "step": 28042 }, { "epoch": 0.93, "grad_norm": 0.4459229111671448, "learning_rate": 0.0004678462454586489, "loss": 1.7701, "step": 28043 }, { "epoch": 0.93, "grad_norm": 0.4656536877155304, "learning_rate": 0.000467837582237104, "loss": 1.8202, "step": 28044 }, { "epoch": 0.93, "grad_norm": 0.44277051091194153, "learning_rate": 0.0004678289188118291, "loss": 1.8742, "step": 28045 }, { "epoch": 0.93, "grad_norm": 0.44571101665496826, "learning_rate": 0.0004678202551828345, "loss": 1.8157, "step": 28046 }, { "epoch": 0.93, "grad_norm": 0.44719189405441284, "learning_rate": 0.0004678115913501309, "loss": 1.8408, "step": 28047 }, { "epoch": 0.93, "grad_norm": 0.4603489935398102, "learning_rate": 0.00046780292731372875, "loss": 1.9021, "step": 28048 }, { "epoch": 0.93, "grad_norm": 0.4362226724624634, "learning_rate": 0.0004677942630736386, "loss": 1.7702, "step": 28049 }, { "epoch": 0.93, "grad_norm": 0.44794711470603943, "learning_rate": 0.00046778559862987096, "loss": 1.7419, "step": 28050 }, { "epoch": 0.93, "grad_norm": 0.4494786858558655, "learning_rate": 0.0004677769339824363, "loss": 1.8158, "step": 28051 }, { "epoch": 0.93, "grad_norm": 0.4496382176876068, "learning_rate": 0.0004677682691313451, "loss": 1.812, "step": 28052 }, { "epoch": 0.93, "grad_norm": 0.4513223469257355, "learning_rate": 0.00046775960407660806, "loss": 1.8691, "step": 28053 }, { "epoch": 0.93, "grad_norm": 0.45039984583854675, "learning_rate": 0.0004677509388182355, "loss": 1.8795, "step": 28054 }, { "epoch": 0.93, "grad_norm": 0.4477742314338684, "learning_rate": 0.000467742273356238, "loss": 1.8156, "step": 28055 }, { "epoch": 0.93, "grad_norm": 0.43594256043434143, "learning_rate": 0.00046773360769062613, "loss": 1.7757, "step": 28056 }, { "epoch": 0.93, "grad_norm": 0.4631170630455017, "learning_rate": 0.00046772494182141034, "loss": 1.8056, "step": 28057 }, { "epoch": 0.93, "grad_norm": 0.43982866406440735, "learning_rate": 0.00046771627574860127, "loss": 1.8304, "step": 28058 }, { "epoch": 0.93, "grad_norm": 0.46963992714881897, "learning_rate": 0.0004677076094722093, "loss": 1.8257, "step": 28059 }, { "epoch": 0.93, "grad_norm": 0.4391995668411255, "learning_rate": 0.00046769894299224495, "loss": 1.8062, "step": 28060 }, { "epoch": 0.93, "grad_norm": 0.45838916301727295, "learning_rate": 0.00046769027630871884, "loss": 1.8161, "step": 28061 }, { "epoch": 0.93, "grad_norm": 0.43504443764686584, "learning_rate": 0.0004676816094216414, "loss": 1.8055, "step": 28062 }, { "epoch": 0.93, "grad_norm": 0.8917441368103027, "learning_rate": 0.0004676729423310233, "loss": 1.7877, "step": 28063 }, { "epoch": 0.93, "grad_norm": 0.47239020466804504, "learning_rate": 0.0004676642750368749, "loss": 1.8546, "step": 28064 }, { "epoch": 0.93, "grad_norm": 0.4493321180343628, "learning_rate": 0.0004676556075392068, "loss": 1.7877, "step": 28065 }, { "epoch": 0.93, "grad_norm": 0.4843519926071167, "learning_rate": 0.00046764693983802945, "loss": 1.8579, "step": 28066 }, { "epoch": 0.93, "grad_norm": 0.479475736618042, "learning_rate": 0.0004676382719333534, "loss": 1.8277, "step": 28067 }, { "epoch": 0.93, "grad_norm": 0.44399183988571167, "learning_rate": 0.00046762960382518924, "loss": 1.8204, "step": 28068 }, { "epoch": 0.93, "grad_norm": 0.4549630880355835, "learning_rate": 0.0004676209355135474, "loss": 1.7966, "step": 28069 }, { "epoch": 0.93, "grad_norm": 0.4702288806438446, "learning_rate": 0.00046761226699843845, "loss": 1.7063, "step": 28070 }, { "epoch": 0.93, "grad_norm": 0.45562267303466797, "learning_rate": 0.0004676035982798729, "loss": 1.8963, "step": 28071 }, { "epoch": 0.93, "grad_norm": 0.47056353092193604, "learning_rate": 0.00046759492935786133, "loss": 1.7897, "step": 28072 }, { "epoch": 0.93, "grad_norm": 0.43143922090530396, "learning_rate": 0.0004675862602324142, "loss": 1.8785, "step": 28073 }, { "epoch": 0.93, "grad_norm": 0.4345517158508301, "learning_rate": 0.00046757759090354197, "loss": 1.8862, "step": 28074 }, { "epoch": 0.93, "grad_norm": 0.44482389092445374, "learning_rate": 0.00046756892137125524, "loss": 1.7688, "step": 28075 }, { "epoch": 0.93, "grad_norm": 0.4485365152359009, "learning_rate": 0.00046756025163556455, "loss": 1.8573, "step": 28076 }, { "epoch": 0.93, "grad_norm": 0.4472087621688843, "learning_rate": 0.0004675515816964804, "loss": 1.7713, "step": 28077 }, { "epoch": 0.93, "grad_norm": 0.426740437746048, "learning_rate": 0.00046754291155401336, "loss": 1.7415, "step": 28078 }, { "epoch": 0.93, "grad_norm": 0.43856433033943176, "learning_rate": 0.00046753424120817396, "loss": 1.8145, "step": 28079 }, { "epoch": 0.93, "grad_norm": 0.44694456458091736, "learning_rate": 0.0004675255706589726, "loss": 1.8338, "step": 28080 }, { "epoch": 0.93, "grad_norm": 0.4514142870903015, "learning_rate": 0.00046751689990641985, "loss": 1.7047, "step": 28081 }, { "epoch": 0.93, "grad_norm": 0.4369055926799774, "learning_rate": 0.00046750822895052634, "loss": 1.8223, "step": 28082 }, { "epoch": 0.93, "grad_norm": 0.44150108098983765, "learning_rate": 0.0004674995577913025, "loss": 1.7868, "step": 28083 }, { "epoch": 0.93, "grad_norm": 0.4605068862438202, "learning_rate": 0.00046749088642875883, "loss": 1.7777, "step": 28084 }, { "epoch": 0.93, "grad_norm": 0.4415743947029114, "learning_rate": 0.0004674822148629059, "loss": 1.8757, "step": 28085 }, { "epoch": 0.93, "grad_norm": 0.4439027011394501, "learning_rate": 0.0004674735430937543, "loss": 1.8503, "step": 28086 }, { "epoch": 0.93, "grad_norm": 0.45870473980903625, "learning_rate": 0.00046746487112131447, "loss": 1.8456, "step": 28087 }, { "epoch": 0.93, "grad_norm": 0.4393174946308136, "learning_rate": 0.000467456198945597, "loss": 1.8992, "step": 28088 }, { "epoch": 0.93, "grad_norm": 0.4297998547554016, "learning_rate": 0.00046744752656661227, "loss": 1.7765, "step": 28089 }, { "epoch": 0.93, "grad_norm": 0.47934719920158386, "learning_rate": 0.000467438853984371, "loss": 1.8585, "step": 28090 }, { "epoch": 0.93, "grad_norm": 0.4516562819480896, "learning_rate": 0.0004674301811988836, "loss": 1.7481, "step": 28091 }, { "epoch": 0.93, "grad_norm": 0.4599097669124603, "learning_rate": 0.0004674215082101607, "loss": 1.8642, "step": 28092 }, { "epoch": 0.93, "grad_norm": 0.44799289107322693, "learning_rate": 0.00046741283501821267, "loss": 1.8363, "step": 28093 }, { "epoch": 0.93, "grad_norm": 0.4495583772659302, "learning_rate": 0.00046740416162305016, "loss": 1.8057, "step": 28094 }, { "epoch": 0.93, "grad_norm": 0.42871084809303284, "learning_rate": 0.0004673954880246836, "loss": 1.7552, "step": 28095 }, { "epoch": 0.93, "grad_norm": 0.43895843625068665, "learning_rate": 0.0004673868142231237, "loss": 1.8287, "step": 28096 }, { "epoch": 0.93, "grad_norm": 0.45278269052505493, "learning_rate": 0.0004673781402183808, "loss": 1.8682, "step": 28097 }, { "epoch": 0.93, "grad_norm": 0.44730088114738464, "learning_rate": 0.00046736946601046554, "loss": 1.8659, "step": 28098 }, { "epoch": 0.93, "grad_norm": 0.4419984221458435, "learning_rate": 0.00046736079159938836, "loss": 1.7828, "step": 28099 }, { "epoch": 0.93, "grad_norm": 0.46967318654060364, "learning_rate": 0.0004673521169851598, "loss": 1.7948, "step": 28100 }, { "epoch": 0.93, "grad_norm": 0.4491645097732544, "learning_rate": 0.00046734344216779056, "loss": 1.8129, "step": 28101 }, { "epoch": 0.93, "grad_norm": 0.49059221148490906, "learning_rate": 0.0004673347671472909, "loss": 1.9149, "step": 28102 }, { "epoch": 0.93, "grad_norm": 0.4798949062824249, "learning_rate": 0.0004673260919236716, "loss": 1.8462, "step": 28103 }, { "epoch": 0.94, "grad_norm": 0.4474318325519562, "learning_rate": 0.000467317416496943, "loss": 1.6878, "step": 28104 }, { "epoch": 0.94, "grad_norm": 0.4531402587890625, "learning_rate": 0.00046730874086711575, "loss": 1.7764, "step": 28105 }, { "epoch": 0.94, "grad_norm": 0.49651235342025757, "learning_rate": 0.00046730006503420033, "loss": 1.7761, "step": 28106 }, { "epoch": 0.94, "grad_norm": 0.4605851173400879, "learning_rate": 0.0004672913889982073, "loss": 1.8056, "step": 28107 }, { "epoch": 0.94, "grad_norm": 0.45106202363967896, "learning_rate": 0.00046728271275914713, "loss": 1.7547, "step": 28108 }, { "epoch": 0.94, "grad_norm": 0.45115500688552856, "learning_rate": 0.0004672740363170305, "loss": 1.766, "step": 28109 }, { "epoch": 0.94, "grad_norm": 0.5074249505996704, "learning_rate": 0.0004672653596718677, "loss": 1.8696, "step": 28110 }, { "epoch": 0.94, "grad_norm": 0.4903315603733063, "learning_rate": 0.0004672566828236695, "loss": 1.8668, "step": 28111 }, { "epoch": 0.94, "grad_norm": 0.44007426500320435, "learning_rate": 0.00046724800577244624, "loss": 1.7516, "step": 28112 }, { "epoch": 0.94, "grad_norm": 0.4515703022480011, "learning_rate": 0.00046723932851820856, "loss": 1.8692, "step": 28113 }, { "epoch": 0.94, "grad_norm": 0.5558609962463379, "learning_rate": 0.0004672306510609671, "loss": 1.8278, "step": 28114 }, { "epoch": 0.94, "grad_norm": 0.4752393364906311, "learning_rate": 0.0004672219734007322, "loss": 1.8906, "step": 28115 }, { "epoch": 0.94, "grad_norm": 0.46418094635009766, "learning_rate": 0.00046721329553751434, "loss": 1.8676, "step": 28116 }, { "epoch": 0.94, "grad_norm": 0.4463059604167938, "learning_rate": 0.00046720461747132427, "loss": 1.8423, "step": 28117 }, { "epoch": 0.94, "grad_norm": 0.44159096479415894, "learning_rate": 0.00046719593920217247, "loss": 1.8029, "step": 28118 }, { "epoch": 0.94, "grad_norm": 0.4746321439743042, "learning_rate": 0.00046718726073006935, "loss": 1.8308, "step": 28119 }, { "epoch": 0.94, "grad_norm": 0.46317583322525024, "learning_rate": 0.0004671785820550257, "loss": 1.7302, "step": 28120 }, { "epoch": 0.94, "grad_norm": 0.44091540575027466, "learning_rate": 0.0004671699031770517, "loss": 1.7844, "step": 28121 }, { "epoch": 0.94, "grad_norm": 0.47068679332733154, "learning_rate": 0.0004671612240961582, "loss": 1.7919, "step": 28122 }, { "epoch": 0.94, "grad_norm": 0.4703826904296875, "learning_rate": 0.0004671525448123555, "loss": 1.7915, "step": 28123 }, { "epoch": 0.94, "grad_norm": 0.4616009294986725, "learning_rate": 0.0004671438653256543, "loss": 1.8532, "step": 28124 }, { "epoch": 0.94, "grad_norm": 0.4425412118434906, "learning_rate": 0.0004671351856360651, "loss": 1.8007, "step": 28125 }, { "epoch": 0.94, "grad_norm": 0.4413659870624542, "learning_rate": 0.0004671265057435983, "loss": 1.7739, "step": 28126 }, { "epoch": 0.94, "grad_norm": 0.4508052170276642, "learning_rate": 0.0004671178256482646, "loss": 1.7439, "step": 28127 }, { "epoch": 0.94, "grad_norm": 0.44297781586647034, "learning_rate": 0.00046710914535007454, "loss": 1.8895, "step": 28128 }, { "epoch": 0.94, "grad_norm": 0.43699926137924194, "learning_rate": 0.0004671004648490385, "loss": 1.8192, "step": 28129 }, { "epoch": 0.94, "grad_norm": 0.45622241497039795, "learning_rate": 0.0004670917841451672, "loss": 1.8559, "step": 28130 }, { "epoch": 0.94, "grad_norm": 0.44310852885246277, "learning_rate": 0.0004670831032384711, "loss": 1.8153, "step": 28131 }, { "epoch": 0.94, "grad_norm": 0.4656412899494171, "learning_rate": 0.00046707442212896063, "loss": 1.7523, "step": 28132 }, { "epoch": 0.94, "grad_norm": 0.42617031931877136, "learning_rate": 0.00046706574081664654, "loss": 1.8085, "step": 28133 }, { "epoch": 0.94, "grad_norm": 0.43797504901885986, "learning_rate": 0.0004670570593015392, "loss": 1.7534, "step": 28134 }, { "epoch": 0.94, "grad_norm": 0.4319426119327545, "learning_rate": 0.00046704837758364926, "loss": 1.8427, "step": 28135 }, { "epoch": 0.94, "grad_norm": 0.4404886066913605, "learning_rate": 0.00046703969566298716, "loss": 1.8335, "step": 28136 }, { "epoch": 0.94, "grad_norm": 0.8010161519050598, "learning_rate": 0.0004670310135395635, "loss": 1.756, "step": 28137 }, { "epoch": 0.94, "grad_norm": 0.45719873905181885, "learning_rate": 0.0004670223312133887, "loss": 1.8152, "step": 28138 }, { "epoch": 0.94, "grad_norm": 0.43154922127723694, "learning_rate": 0.0004670136486844735, "loss": 1.7836, "step": 28139 }, { "epoch": 0.94, "grad_norm": 0.46297329664230347, "learning_rate": 0.0004670049659528284, "loss": 1.8221, "step": 28140 }, { "epoch": 0.94, "grad_norm": 0.4403260350227356, "learning_rate": 0.0004669962830184637, "loss": 1.8115, "step": 28141 }, { "epoch": 0.94, "grad_norm": 0.45078060030937195, "learning_rate": 0.0004669875998813903, "loss": 1.8573, "step": 28142 }, { "epoch": 0.94, "grad_norm": 0.4478679895401001, "learning_rate": 0.0004669789165416184, "loss": 1.7257, "step": 28143 }, { "epoch": 0.94, "grad_norm": 0.4399380385875702, "learning_rate": 0.0004669702329991588, "loss": 1.8104, "step": 28144 }, { "epoch": 0.94, "grad_norm": 0.44998207688331604, "learning_rate": 0.00046696154925402193, "loss": 1.8174, "step": 28145 }, { "epoch": 0.94, "grad_norm": 0.4532524347305298, "learning_rate": 0.0004669528653062183, "loss": 1.7343, "step": 28146 }, { "epoch": 0.94, "grad_norm": 0.4460430443286896, "learning_rate": 0.00046694418115575853, "loss": 1.8474, "step": 28147 }, { "epoch": 0.94, "grad_norm": 0.4453500807285309, "learning_rate": 0.0004669354968026531, "loss": 1.7681, "step": 28148 }, { "epoch": 0.94, "grad_norm": 0.444659024477005, "learning_rate": 0.0004669268122469126, "loss": 1.8341, "step": 28149 }, { "epoch": 0.94, "grad_norm": 0.4454329013824463, "learning_rate": 0.00046691812748854753, "loss": 1.8313, "step": 28150 }, { "epoch": 0.94, "grad_norm": 0.44098198413848877, "learning_rate": 0.0004669094425275684, "loss": 1.7735, "step": 28151 }, { "epoch": 0.94, "grad_norm": 0.43819740414619446, "learning_rate": 0.0004669007573639859, "loss": 1.8245, "step": 28152 }, { "epoch": 0.94, "grad_norm": 0.4251238703727722, "learning_rate": 0.0004668920719978104, "loss": 1.7785, "step": 28153 }, { "epoch": 0.94, "grad_norm": 0.4584183692932129, "learning_rate": 0.00046688338642905246, "loss": 1.7772, "step": 28154 }, { "epoch": 0.94, "grad_norm": 0.43579408526420593, "learning_rate": 0.00046687470065772286, "loss": 1.8508, "step": 28155 }, { "epoch": 0.94, "grad_norm": 0.43132707476615906, "learning_rate": 0.00046686601468383176, "loss": 1.8612, "step": 28156 }, { "epoch": 0.94, "grad_norm": 0.45447275042533875, "learning_rate": 0.00046685732850739, "loss": 1.8244, "step": 28157 }, { "epoch": 0.94, "grad_norm": 0.44936487078666687, "learning_rate": 0.0004668486421284081, "loss": 1.8207, "step": 28158 }, { "epoch": 0.94, "grad_norm": 0.4433845579624176, "learning_rate": 0.00046683995554689636, "loss": 1.8483, "step": 28159 }, { "epoch": 0.94, "grad_norm": 0.4434881806373596, "learning_rate": 0.00046683126876286566, "loss": 1.8033, "step": 28160 }, { "epoch": 0.94, "grad_norm": 0.4496859014034271, "learning_rate": 0.0004668225817763263, "loss": 1.7983, "step": 28161 }, { "epoch": 0.94, "grad_norm": 0.4559189975261688, "learning_rate": 0.0004668138945872889, "loss": 1.8733, "step": 28162 }, { "epoch": 0.94, "grad_norm": 0.44527238607406616, "learning_rate": 0.000466805207195764, "loss": 1.8517, "step": 28163 }, { "epoch": 0.94, "grad_norm": 0.5899680852890015, "learning_rate": 0.00046679651960176223, "loss": 1.8452, "step": 28164 }, { "epoch": 0.94, "grad_norm": 0.43439751863479614, "learning_rate": 0.000466787831805294, "loss": 1.8133, "step": 28165 }, { "epoch": 0.94, "grad_norm": 0.4455762803554535, "learning_rate": 0.00046677914380637, "loss": 1.8215, "step": 28166 }, { "epoch": 0.94, "grad_norm": 0.43460503220558167, "learning_rate": 0.0004667704556050006, "loss": 1.7907, "step": 28167 }, { "epoch": 0.94, "grad_norm": 0.43794938921928406, "learning_rate": 0.0004667617672011964, "loss": 1.8565, "step": 28168 }, { "epoch": 0.94, "grad_norm": 0.4471185803413391, "learning_rate": 0.0004667530785949681, "loss": 1.8077, "step": 28169 }, { "epoch": 0.94, "grad_norm": 0.43251386284828186, "learning_rate": 0.0004667443897863261, "loss": 1.8643, "step": 28170 }, { "epoch": 0.94, "grad_norm": 0.45252570509910583, "learning_rate": 0.0004667357007752809, "loss": 1.8015, "step": 28171 }, { "epoch": 0.94, "grad_norm": 0.44420331716537476, "learning_rate": 0.00046672701156184323, "loss": 1.8005, "step": 28172 }, { "epoch": 0.94, "grad_norm": 0.43723344802856445, "learning_rate": 0.0004667183221460235, "loss": 1.8133, "step": 28173 }, { "epoch": 0.94, "grad_norm": 0.4485436677932739, "learning_rate": 0.0004667096325278323, "loss": 1.938, "step": 28174 }, { "epoch": 0.94, "grad_norm": 0.4477422833442688, "learning_rate": 0.00046670094270728015, "loss": 1.8332, "step": 28175 }, { "epoch": 0.94, "grad_norm": 0.44758427143096924, "learning_rate": 0.0004666922526843776, "loss": 1.8395, "step": 28176 }, { "epoch": 0.94, "grad_norm": 0.43232929706573486, "learning_rate": 0.0004666835624591353, "loss": 1.8654, "step": 28177 }, { "epoch": 0.94, "grad_norm": 0.4955160915851593, "learning_rate": 0.0004666748720315636, "loss": 1.8711, "step": 28178 }, { "epoch": 0.94, "grad_norm": 0.44205933809280396, "learning_rate": 0.00046666618140167316, "loss": 1.7726, "step": 28179 }, { "epoch": 0.94, "grad_norm": 0.4644559323787689, "learning_rate": 0.0004666574905694747, "loss": 1.774, "step": 28180 }, { "epoch": 0.94, "grad_norm": 0.45152291655540466, "learning_rate": 0.0004666487995349785, "loss": 1.832, "step": 28181 }, { "epoch": 0.94, "grad_norm": 0.43130823969841003, "learning_rate": 0.0004666401082981952, "loss": 1.8469, "step": 28182 }, { "epoch": 0.94, "grad_norm": 0.4403029978275299, "learning_rate": 0.0004666314168591353, "loss": 1.798, "step": 28183 }, { "epoch": 0.94, "grad_norm": 0.4804562032222748, "learning_rate": 0.00046662272521780956, "loss": 1.7793, "step": 28184 }, { "epoch": 0.94, "grad_norm": 0.465169757604599, "learning_rate": 0.00046661403337422826, "loss": 1.8265, "step": 28185 }, { "epoch": 0.94, "grad_norm": 0.4443988502025604, "learning_rate": 0.0004666053413284021, "loss": 1.7966, "step": 28186 }, { "epoch": 0.94, "grad_norm": 0.4647101163864136, "learning_rate": 0.00046659664908034167, "loss": 1.8353, "step": 28187 }, { "epoch": 0.94, "grad_norm": 0.43751615285873413, "learning_rate": 0.00046658795663005737, "loss": 1.8241, "step": 28188 }, { "epoch": 0.94, "grad_norm": 0.4404989778995514, "learning_rate": 0.00046657926397755987, "loss": 1.8521, "step": 28189 }, { "epoch": 0.94, "grad_norm": 0.44427525997161865, "learning_rate": 0.00046657057112285975, "loss": 1.6881, "step": 28190 }, { "epoch": 0.94, "grad_norm": 0.46829432249069214, "learning_rate": 0.0004665618780659674, "loss": 1.8698, "step": 28191 }, { "epoch": 0.94, "grad_norm": 0.4703257977962494, "learning_rate": 0.00046655318480689345, "loss": 1.7566, "step": 28192 }, { "epoch": 0.94, "grad_norm": 0.45887091755867004, "learning_rate": 0.0004665444913456485, "loss": 1.8426, "step": 28193 }, { "epoch": 0.94, "grad_norm": 0.44904905557632446, "learning_rate": 0.00046653579768224314, "loss": 1.7722, "step": 28194 }, { "epoch": 0.94, "grad_norm": 0.43654531240463257, "learning_rate": 0.00046652710381668783, "loss": 1.8033, "step": 28195 }, { "epoch": 0.94, "grad_norm": 0.43838781118392944, "learning_rate": 0.0004665184097489932, "loss": 1.8622, "step": 28196 }, { "epoch": 0.94, "grad_norm": 0.47565099596977234, "learning_rate": 0.0004665097154791697, "loss": 1.7744, "step": 28197 }, { "epoch": 0.94, "grad_norm": 0.43509215116500854, "learning_rate": 0.000466501021007228, "loss": 1.7795, "step": 28198 }, { "epoch": 0.94, "grad_norm": 0.4459516704082489, "learning_rate": 0.0004664923263331785, "loss": 1.7712, "step": 28199 }, { "epoch": 0.94, "grad_norm": 0.45194312930107117, "learning_rate": 0.0004664836314570319, "loss": 1.803, "step": 28200 }, { "epoch": 0.94, "grad_norm": 0.734507143497467, "learning_rate": 0.0004664749363787986, "loss": 1.8748, "step": 28201 }, { "epoch": 0.94, "grad_norm": 0.45505738258361816, "learning_rate": 0.00046646624109848944, "loss": 1.7512, "step": 28202 }, { "epoch": 0.94, "grad_norm": 0.44770875573158264, "learning_rate": 0.00046645754561611466, "loss": 1.8059, "step": 28203 }, { "epoch": 0.94, "grad_norm": 0.4460600018501282, "learning_rate": 0.000466448849931685, "loss": 1.8332, "step": 28204 }, { "epoch": 0.94, "grad_norm": 0.4312814772129059, "learning_rate": 0.000466440154045211, "loss": 1.8176, "step": 28205 }, { "epoch": 0.94, "grad_norm": 0.44239020347595215, "learning_rate": 0.00046643145795670306, "loss": 1.7275, "step": 28206 }, { "epoch": 0.94, "grad_norm": 0.43931156396865845, "learning_rate": 0.0004664227616661719, "loss": 1.7584, "step": 28207 }, { "epoch": 0.94, "grad_norm": 0.43920937180519104, "learning_rate": 0.00046641406517362806, "loss": 1.8469, "step": 28208 }, { "epoch": 0.94, "grad_norm": 0.4396354556083679, "learning_rate": 0.000466405368479082, "loss": 1.7423, "step": 28209 }, { "epoch": 0.94, "grad_norm": 0.4464516341686249, "learning_rate": 0.00046639667158254446, "loss": 1.7729, "step": 28210 }, { "epoch": 0.94, "grad_norm": 0.4351593255996704, "learning_rate": 0.0004663879744840258, "loss": 1.7892, "step": 28211 }, { "epoch": 0.94, "grad_norm": 0.4579361379146576, "learning_rate": 0.00046637927718353665, "loss": 1.8109, "step": 28212 }, { "epoch": 0.94, "grad_norm": 0.44103607535362244, "learning_rate": 0.00046637057968108755, "loss": 1.8372, "step": 28213 }, { "epoch": 0.94, "grad_norm": 0.4436509907245636, "learning_rate": 0.00046636188197668915, "loss": 1.8289, "step": 28214 }, { "epoch": 0.94, "grad_norm": 0.45517003536224365, "learning_rate": 0.00046635318407035187, "loss": 1.8399, "step": 28215 }, { "epoch": 0.94, "grad_norm": 0.452742338180542, "learning_rate": 0.0004663444859620864, "loss": 1.7644, "step": 28216 }, { "epoch": 0.94, "grad_norm": 0.4209504723548889, "learning_rate": 0.00046633578765190313, "loss": 1.7263, "step": 28217 }, { "epoch": 0.94, "grad_norm": 0.4688539206981659, "learning_rate": 0.0004663270891398128, "loss": 1.8377, "step": 28218 }, { "epoch": 0.94, "grad_norm": 0.46560850739479065, "learning_rate": 0.00046631839042582596, "loss": 1.7918, "step": 28219 }, { "epoch": 0.94, "grad_norm": 0.43548861145973206, "learning_rate": 0.00046630969150995296, "loss": 1.8218, "step": 28220 }, { "epoch": 0.94, "grad_norm": 0.43802836537361145, "learning_rate": 0.00046630099239220455, "loss": 1.8431, "step": 28221 }, { "epoch": 0.94, "grad_norm": 0.4335874319076538, "learning_rate": 0.0004662922930725913, "loss": 1.8288, "step": 28222 }, { "epoch": 0.94, "grad_norm": 0.43661102652549744, "learning_rate": 0.0004662835935511236, "loss": 1.7912, "step": 28223 }, { "epoch": 0.94, "grad_norm": 0.4606301784515381, "learning_rate": 0.00046627489382781225, "loss": 1.8146, "step": 28224 }, { "epoch": 0.94, "grad_norm": 0.43763065338134766, "learning_rate": 0.0004662661939026676, "loss": 1.8503, "step": 28225 }, { "epoch": 0.94, "grad_norm": 0.44740423560142517, "learning_rate": 0.00046625749377570026, "loss": 1.8278, "step": 28226 }, { "epoch": 0.94, "grad_norm": 0.4367569386959076, "learning_rate": 0.0004662487934469209, "loss": 1.8987, "step": 28227 }, { "epoch": 0.94, "grad_norm": 0.44120895862579346, "learning_rate": 0.0004662400929163399, "loss": 1.7758, "step": 28228 }, { "epoch": 0.94, "grad_norm": 0.4589146077632904, "learning_rate": 0.00046623139218396803, "loss": 1.7644, "step": 28229 }, { "epoch": 0.94, "grad_norm": 0.4564305543899536, "learning_rate": 0.00046622269124981566, "loss": 1.7231, "step": 28230 }, { "epoch": 0.94, "grad_norm": 0.4503135681152344, "learning_rate": 0.0004662139901138935, "loss": 1.8954, "step": 28231 }, { "epoch": 0.94, "grad_norm": 0.4444657564163208, "learning_rate": 0.00046620528877621195, "loss": 1.8318, "step": 28232 }, { "epoch": 0.94, "grad_norm": 0.45879682898521423, "learning_rate": 0.00046619658723678176, "loss": 1.7886, "step": 28233 }, { "epoch": 0.94, "grad_norm": 0.4548373520374298, "learning_rate": 0.0004661878854956133, "loss": 1.7582, "step": 28234 }, { "epoch": 0.94, "grad_norm": 0.4597245454788208, "learning_rate": 0.00046617918355271735, "loss": 1.7977, "step": 28235 }, { "epoch": 0.94, "grad_norm": 0.44158998131752014, "learning_rate": 0.0004661704814081043, "loss": 1.7382, "step": 28236 }, { "epoch": 0.94, "grad_norm": 0.5366333723068237, "learning_rate": 0.00046616177906178485, "loss": 1.8008, "step": 28237 }, { "epoch": 0.94, "grad_norm": 0.4784603416919708, "learning_rate": 0.0004661530765137693, "loss": 1.7806, "step": 28238 }, { "epoch": 0.94, "grad_norm": 0.47033947706222534, "learning_rate": 0.00046614437376406857, "loss": 1.8854, "step": 28239 }, { "epoch": 0.94, "grad_norm": 0.4532328248023987, "learning_rate": 0.000466135670812693, "loss": 1.7139, "step": 28240 }, { "epoch": 0.94, "grad_norm": 0.44435247778892517, "learning_rate": 0.0004661269676596532, "loss": 1.8223, "step": 28241 }, { "epoch": 0.94, "grad_norm": 0.4615688920021057, "learning_rate": 0.0004661182643049597, "loss": 1.6811, "step": 28242 }, { "epoch": 0.94, "grad_norm": 0.45651528239250183, "learning_rate": 0.00046610956074862313, "loss": 1.8067, "step": 28243 }, { "epoch": 0.94, "grad_norm": 0.4401882290840149, "learning_rate": 0.0004661008569906541, "loss": 1.8773, "step": 28244 }, { "epoch": 0.94, "grad_norm": 0.4800111949443817, "learning_rate": 0.000466092153031063, "loss": 1.7484, "step": 28245 }, { "epoch": 0.94, "grad_norm": 2.251413583755493, "learning_rate": 0.00046608344886986055, "loss": 1.8242, "step": 28246 }, { "epoch": 0.94, "grad_norm": 0.4435417056083679, "learning_rate": 0.0004660747445070572, "loss": 1.7665, "step": 28247 }, { "epoch": 0.94, "grad_norm": 0.46327704191207886, "learning_rate": 0.00046606603994266366, "loss": 1.7602, "step": 28248 }, { "epoch": 0.94, "grad_norm": 0.44387516379356384, "learning_rate": 0.0004660573351766904, "loss": 1.7605, "step": 28249 }, { "epoch": 0.94, "grad_norm": 0.4561741054058075, "learning_rate": 0.000466048630209148, "loss": 1.8341, "step": 28250 }, { "epoch": 0.94, "grad_norm": 0.47061920166015625, "learning_rate": 0.000466039925040047, "loss": 1.891, "step": 28251 }, { "epoch": 0.94, "grad_norm": 0.45863935351371765, "learning_rate": 0.00046603121966939807, "loss": 1.7628, "step": 28252 }, { "epoch": 0.94, "grad_norm": 0.4609649181365967, "learning_rate": 0.0004660225140972116, "loss": 1.8321, "step": 28253 }, { "epoch": 0.94, "grad_norm": 0.45388323068618774, "learning_rate": 0.0004660138083234983, "loss": 1.8416, "step": 28254 }, { "epoch": 0.94, "grad_norm": 1.520178198814392, "learning_rate": 0.00046600510234826866, "loss": 1.8559, "step": 28255 }, { "epoch": 0.94, "grad_norm": 0.47591736912727356, "learning_rate": 0.00046599639617153334, "loss": 1.7785, "step": 28256 }, { "epoch": 0.94, "grad_norm": 0.4602627754211426, "learning_rate": 0.00046598768979330294, "loss": 1.8238, "step": 28257 }, { "epoch": 0.94, "grad_norm": 0.45085904002189636, "learning_rate": 0.0004659789832135878, "loss": 1.8254, "step": 28258 }, { "epoch": 0.94, "grad_norm": 0.45331546664237976, "learning_rate": 0.0004659702764323987, "loss": 1.8261, "step": 28259 }, { "epoch": 0.94, "grad_norm": 0.45545676350593567, "learning_rate": 0.00046596156944974607, "loss": 1.8678, "step": 28260 }, { "epoch": 0.94, "grad_norm": 0.45108523964881897, "learning_rate": 0.0004659528622656406, "loss": 1.832, "step": 28261 }, { "epoch": 0.94, "grad_norm": 0.44496238231658936, "learning_rate": 0.00046594415488009284, "loss": 1.793, "step": 28262 }, { "epoch": 0.94, "grad_norm": 0.4392068684101105, "learning_rate": 0.00046593544729311325, "loss": 1.7766, "step": 28263 }, { "epoch": 0.94, "grad_norm": 0.4565451443195343, "learning_rate": 0.00046592673950471254, "loss": 1.8481, "step": 28264 }, { "epoch": 0.94, "grad_norm": 0.4530864357948303, "learning_rate": 0.00046591803151490116, "loss": 1.8239, "step": 28265 }, { "epoch": 0.94, "grad_norm": 0.430270254611969, "learning_rate": 0.00046590932332368984, "loss": 1.8044, "step": 28266 }, { "epoch": 0.94, "grad_norm": 0.45602160692214966, "learning_rate": 0.0004659006149310889, "loss": 1.8503, "step": 28267 }, { "epoch": 0.94, "grad_norm": 0.43031296133995056, "learning_rate": 0.0004658919063371091, "loss": 1.7941, "step": 28268 }, { "epoch": 0.94, "grad_norm": 0.4382520616054535, "learning_rate": 0.00046588319754176105, "loss": 1.8139, "step": 28269 }, { "epoch": 0.94, "grad_norm": 0.4524068832397461, "learning_rate": 0.0004658744885450552, "loss": 1.8129, "step": 28270 }, { "epoch": 0.94, "grad_norm": 0.4286462664604187, "learning_rate": 0.0004658657793470021, "loss": 1.8143, "step": 28271 }, { "epoch": 0.94, "grad_norm": 0.4467252492904663, "learning_rate": 0.0004658570699476125, "loss": 1.7012, "step": 28272 }, { "epoch": 0.94, "grad_norm": 0.4426460266113281, "learning_rate": 0.0004658483603468968, "loss": 1.7845, "step": 28273 }, { "epoch": 0.94, "grad_norm": 0.4854460656642914, "learning_rate": 0.0004658396505448656, "loss": 1.8006, "step": 28274 }, { "epoch": 0.94, "grad_norm": 0.46517038345336914, "learning_rate": 0.00046583094054152946, "loss": 1.8478, "step": 28275 }, { "epoch": 0.94, "grad_norm": 0.4325859248638153, "learning_rate": 0.0004658222303368991, "loss": 1.7925, "step": 28276 }, { "epoch": 0.94, "grad_norm": 0.4537352919578552, "learning_rate": 0.00046581351993098496, "loss": 1.8014, "step": 28277 }, { "epoch": 0.94, "grad_norm": 0.45224499702453613, "learning_rate": 0.00046580480932379754, "loss": 1.6831, "step": 28278 }, { "epoch": 0.94, "grad_norm": 0.44881075620651245, "learning_rate": 0.00046579609851534766, "loss": 1.7402, "step": 28279 }, { "epoch": 0.94, "grad_norm": 0.4297826886177063, "learning_rate": 0.0004657873875056456, "loss": 1.7535, "step": 28280 }, { "epoch": 0.94, "grad_norm": 0.483632355928421, "learning_rate": 0.0004657786762947022, "loss": 1.8468, "step": 28281 }, { "epoch": 0.94, "grad_norm": 0.44120585918426514, "learning_rate": 0.00046576996488252787, "loss": 1.7698, "step": 28282 }, { "epoch": 0.94, "grad_norm": 0.42849957942962646, "learning_rate": 0.00046576125326913323, "loss": 1.8087, "step": 28283 }, { "epoch": 0.94, "grad_norm": 0.44109174609184265, "learning_rate": 0.00046575254145452883, "loss": 1.7517, "step": 28284 }, { "epoch": 0.94, "grad_norm": 0.434378445148468, "learning_rate": 0.0004657438294387253, "loss": 1.7851, "step": 28285 }, { "epoch": 0.94, "grad_norm": 0.44918620586395264, "learning_rate": 0.00046573511722173313, "loss": 1.7384, "step": 28286 }, { "epoch": 0.94, "grad_norm": 0.43931660056114197, "learning_rate": 0.0004657264048035629, "loss": 1.878, "step": 28287 }, { "epoch": 0.94, "grad_norm": 0.4489850401878357, "learning_rate": 0.00046571769218422534, "loss": 1.872, "step": 28288 }, { "epoch": 0.94, "grad_norm": 0.4362908899784088, "learning_rate": 0.0004657089793637309, "loss": 1.7803, "step": 28289 }, { "epoch": 0.94, "grad_norm": 0.4546552002429962, "learning_rate": 0.0004657002663420902, "loss": 1.8084, "step": 28290 }, { "epoch": 0.94, "grad_norm": 0.4523751437664032, "learning_rate": 0.0004656915531193137, "loss": 1.8725, "step": 28291 }, { "epoch": 0.94, "grad_norm": 0.4316725432872772, "learning_rate": 0.00046568283969541225, "loss": 1.8224, "step": 28292 }, { "epoch": 0.94, "grad_norm": 0.44762876629829407, "learning_rate": 0.00046567412607039605, "loss": 1.8137, "step": 28293 }, { "epoch": 0.94, "grad_norm": 0.44557860493659973, "learning_rate": 0.0004656654122442759, "loss": 1.7888, "step": 28294 }, { "epoch": 0.94, "grad_norm": 0.4352512061595917, "learning_rate": 0.00046565669821706235, "loss": 1.8667, "step": 28295 }, { "epoch": 0.94, "grad_norm": 0.4450342059135437, "learning_rate": 0.0004656479839887661, "loss": 1.7838, "step": 28296 }, { "epoch": 0.94, "grad_norm": 0.44629260897636414, "learning_rate": 0.00046563926955939754, "loss": 1.8127, "step": 28297 }, { "epoch": 0.94, "grad_norm": 0.4415011405944824, "learning_rate": 0.0004656305549289673, "loss": 1.8171, "step": 28298 }, { "epoch": 0.94, "grad_norm": 0.4403744041919708, "learning_rate": 0.00046562184009748593, "loss": 1.7498, "step": 28299 }, { "epoch": 0.94, "grad_norm": 0.4399351179599762, "learning_rate": 0.00046561312506496406, "loss": 1.7993, "step": 28300 }, { "epoch": 0.94, "grad_norm": 0.45560163259506226, "learning_rate": 0.00046560440983141235, "loss": 1.7329, "step": 28301 }, { "epoch": 0.94, "grad_norm": 0.45031440258026123, "learning_rate": 0.00046559569439684126, "loss": 1.8189, "step": 28302 }, { "epoch": 0.94, "grad_norm": 0.4470956325531006, "learning_rate": 0.00046558697876126126, "loss": 1.8429, "step": 28303 }, { "epoch": 0.94, "grad_norm": 0.4469098448753357, "learning_rate": 0.0004655782629246832, "loss": 1.8793, "step": 28304 }, { "epoch": 0.94, "grad_norm": 0.44782546162605286, "learning_rate": 0.0004655695468871175, "loss": 1.7848, "step": 28305 }, { "epoch": 0.94, "grad_norm": 0.43349313735961914, "learning_rate": 0.0004655608306485748, "loss": 1.7031, "step": 28306 }, { "epoch": 0.94, "grad_norm": 0.4446530342102051, "learning_rate": 0.0004655521142090656, "loss": 1.7649, "step": 28307 }, { "epoch": 0.94, "grad_norm": 0.4382476508617401, "learning_rate": 0.00046554339756860045, "loss": 1.8439, "step": 28308 }, { "epoch": 0.94, "grad_norm": 0.4430280029773712, "learning_rate": 0.00046553468072719014, "loss": 1.7801, "step": 28309 }, { "epoch": 0.94, "grad_norm": 0.44934016466140747, "learning_rate": 0.00046552596368484503, "loss": 1.8444, "step": 28310 }, { "epoch": 0.94, "grad_norm": 0.4334498941898346, "learning_rate": 0.0004655172464415758, "loss": 1.7915, "step": 28311 }, { "epoch": 0.94, "grad_norm": 0.45340994000434875, "learning_rate": 0.00046550852899739304, "loss": 1.8167, "step": 28312 }, { "epoch": 0.94, "grad_norm": 0.43345755338668823, "learning_rate": 0.0004654998113523073, "loss": 1.8209, "step": 28313 }, { "epoch": 0.94, "grad_norm": 0.4684607982635498, "learning_rate": 0.00046549109350632926, "loss": 1.8201, "step": 28314 }, { "epoch": 0.94, "grad_norm": 0.4406213164329529, "learning_rate": 0.00046548237545946926, "loss": 1.7863, "step": 28315 }, { "epoch": 0.94, "grad_norm": 0.4402071535587311, "learning_rate": 0.00046547365721173815, "loss": 1.8548, "step": 28316 }, { "epoch": 0.94, "grad_norm": 0.4537450075149536, "learning_rate": 0.00046546493876314644, "loss": 1.7902, "step": 28317 }, { "epoch": 0.94, "grad_norm": 0.4371420443058014, "learning_rate": 0.00046545622011370455, "loss": 1.7492, "step": 28318 }, { "epoch": 0.94, "grad_norm": 0.44339579343795776, "learning_rate": 0.00046544750126342326, "loss": 1.7408, "step": 28319 }, { "epoch": 0.94, "grad_norm": 0.4483349621295929, "learning_rate": 0.000465438782212313, "loss": 1.8699, "step": 28320 }, { "epoch": 0.94, "grad_norm": 0.43488407135009766, "learning_rate": 0.00046543006296038454, "loss": 1.7669, "step": 28321 }, { "epoch": 0.94, "grad_norm": 0.4555993974208832, "learning_rate": 0.00046542134350764837, "loss": 1.791, "step": 28322 }, { "epoch": 0.94, "grad_norm": 0.45426520705223083, "learning_rate": 0.00046541262385411497, "loss": 1.8744, "step": 28323 }, { "epoch": 0.94, "grad_norm": 0.4514646828174591, "learning_rate": 0.0004654039039997951, "loss": 1.8431, "step": 28324 }, { "epoch": 0.94, "grad_norm": 0.4222477972507477, "learning_rate": 0.00046539518394469917, "loss": 1.7842, "step": 28325 }, { "epoch": 0.94, "grad_norm": 0.4464114010334015, "learning_rate": 0.0004653864636888379, "loss": 1.7739, "step": 28326 }, { "epoch": 0.94, "grad_norm": 0.45566126704216003, "learning_rate": 0.00046537774323222186, "loss": 1.8051, "step": 28327 }, { "epoch": 0.94, "grad_norm": 0.4696142375469208, "learning_rate": 0.0004653690225748616, "loss": 1.8123, "step": 28328 }, { "epoch": 0.94, "grad_norm": 0.43927001953125, "learning_rate": 0.00046536030171676773, "loss": 1.7685, "step": 28329 }, { "epoch": 0.94, "grad_norm": 0.43935295939445496, "learning_rate": 0.00046535158065795073, "loss": 1.7956, "step": 28330 }, { "epoch": 0.94, "grad_norm": 0.4535520076751709, "learning_rate": 0.0004653428593984214, "loss": 1.7843, "step": 28331 }, { "epoch": 0.94, "grad_norm": 0.4472462832927704, "learning_rate": 0.00046533413793819014, "loss": 1.8114, "step": 28332 }, { "epoch": 0.94, "grad_norm": 0.4676629602909088, "learning_rate": 0.00046532541627726753, "loss": 1.715, "step": 28333 }, { "epoch": 0.94, "grad_norm": 0.4352910816669464, "learning_rate": 0.0004653166944156643, "loss": 1.8513, "step": 28334 }, { "epoch": 0.94, "grad_norm": 0.45399317145347595, "learning_rate": 0.00046530797235339093, "loss": 1.8778, "step": 28335 }, { "epoch": 0.94, "grad_norm": 0.48411455750465393, "learning_rate": 0.00046529925009045807, "loss": 1.8273, "step": 28336 }, { "epoch": 0.94, "grad_norm": 0.45194515585899353, "learning_rate": 0.00046529052762687626, "loss": 1.8098, "step": 28337 }, { "epoch": 0.94, "grad_norm": 0.43490099906921387, "learning_rate": 0.00046528180496265616, "loss": 1.7616, "step": 28338 }, { "epoch": 0.94, "grad_norm": 0.43909183144569397, "learning_rate": 0.00046527308209780825, "loss": 1.7562, "step": 28339 }, { "epoch": 0.94, "grad_norm": 0.4417584240436554, "learning_rate": 0.0004652643590323431, "loss": 1.8535, "step": 28340 }, { "epoch": 0.94, "grad_norm": 0.4454772472381592, "learning_rate": 0.0004652556357662715, "loss": 1.8109, "step": 28341 }, { "epoch": 0.94, "grad_norm": 0.44403085112571716, "learning_rate": 0.0004652469122996038, "loss": 1.8003, "step": 28342 }, { "epoch": 0.94, "grad_norm": 0.44760996103286743, "learning_rate": 0.0004652381886323507, "loss": 1.8007, "step": 28343 }, { "epoch": 0.94, "grad_norm": 0.4565575122833252, "learning_rate": 0.0004652294647645229, "loss": 1.844, "step": 28344 }, { "epoch": 0.94, "grad_norm": 0.4378373324871063, "learning_rate": 0.00046522074069613075, "loss": 1.7532, "step": 28345 }, { "epoch": 0.94, "grad_norm": 0.4376079738140106, "learning_rate": 0.00046521201642718503, "loss": 1.8456, "step": 28346 }, { "epoch": 0.94, "grad_norm": 0.427827924489975, "learning_rate": 0.00046520329195769616, "loss": 1.7732, "step": 28347 }, { "epoch": 0.94, "grad_norm": 0.45054176449775696, "learning_rate": 0.000465194567287675, "loss": 1.8816, "step": 28348 }, { "epoch": 0.94, "grad_norm": 0.45824408531188965, "learning_rate": 0.000465185842417132, "loss": 1.8381, "step": 28349 }, { "epoch": 0.94, "grad_norm": 0.4542330801486969, "learning_rate": 0.0004651771173460775, "loss": 1.8517, "step": 28350 }, { "epoch": 0.94, "grad_norm": 0.4494955837726593, "learning_rate": 0.0004651683920745225, "loss": 1.822, "step": 28351 }, { "epoch": 0.94, "grad_norm": 0.43145492672920227, "learning_rate": 0.00046515966660247736, "loss": 1.77, "step": 28352 }, { "epoch": 0.94, "grad_norm": 0.4305254817008972, "learning_rate": 0.0004651509409299527, "loss": 1.8553, "step": 28353 }, { "epoch": 0.94, "grad_norm": 0.46086838841438293, "learning_rate": 0.0004651422150569592, "loss": 1.8743, "step": 28354 }, { "epoch": 0.94, "grad_norm": 0.4746468961238861, "learning_rate": 0.0004651334889835073, "loss": 1.8761, "step": 28355 }, { "epoch": 0.94, "grad_norm": 0.43484416604042053, "learning_rate": 0.00046512476270960763, "loss": 1.6877, "step": 28356 }, { "epoch": 0.94, "grad_norm": 0.4538218379020691, "learning_rate": 0.0004651160362352709, "loss": 1.7794, "step": 28357 }, { "epoch": 0.94, "grad_norm": 0.4658327102661133, "learning_rate": 0.0004651073095605077, "loss": 1.8094, "step": 28358 }, { "epoch": 0.94, "grad_norm": 0.43924400210380554, "learning_rate": 0.0004650985826853285, "loss": 1.8709, "step": 28359 }, { "epoch": 0.94, "grad_norm": 0.45101460814476013, "learning_rate": 0.00046508985560974394, "loss": 1.8624, "step": 28360 }, { "epoch": 0.94, "grad_norm": 0.4426104426383972, "learning_rate": 0.00046508112833376457, "loss": 1.8568, "step": 28361 }, { "epoch": 0.94, "grad_norm": 0.4647197723388672, "learning_rate": 0.00046507240085740106, "loss": 1.8726, "step": 28362 }, { "epoch": 0.94, "grad_norm": 0.44306764006614685, "learning_rate": 0.0004650636731806641, "loss": 1.8141, "step": 28363 }, { "epoch": 0.94, "grad_norm": 0.44784700870513916, "learning_rate": 0.00046505494530356395, "loss": 1.8067, "step": 28364 }, { "epoch": 0.94, "grad_norm": 0.4288611114025116, "learning_rate": 0.00046504621722611155, "loss": 1.8161, "step": 28365 }, { "epoch": 0.94, "grad_norm": 0.4438290596008301, "learning_rate": 0.00046503748894831734, "loss": 1.7728, "step": 28366 }, { "epoch": 0.94, "grad_norm": 0.4263259768486023, "learning_rate": 0.0004650287604701919, "loss": 1.837, "step": 28367 }, { "epoch": 0.94, "grad_norm": 0.4477059543132782, "learning_rate": 0.00046502003179174596, "loss": 1.8805, "step": 28368 }, { "epoch": 0.94, "grad_norm": 0.4425036609172821, "learning_rate": 0.0004650113029129899, "loss": 1.7639, "step": 28369 }, { "epoch": 0.94, "grad_norm": 0.45666584372520447, "learning_rate": 0.0004650025738339344, "loss": 1.7648, "step": 28370 }, { "epoch": 0.94, "grad_norm": 0.45872762799263, "learning_rate": 0.00046499384455459024, "loss": 1.8377, "step": 28371 }, { "epoch": 0.94, "grad_norm": 0.44864600896835327, "learning_rate": 0.00046498511507496774, "loss": 1.7222, "step": 28372 }, { "epoch": 0.94, "grad_norm": 0.4528391361236572, "learning_rate": 0.00046497638539507775, "loss": 1.7722, "step": 28373 }, { "epoch": 0.94, "grad_norm": 0.4447011947631836, "learning_rate": 0.00046496765551493063, "loss": 1.8294, "step": 28374 }, { "epoch": 0.94, "grad_norm": 0.43032851815223694, "learning_rate": 0.0004649589254345371, "loss": 1.8474, "step": 28375 }, { "epoch": 0.94, "grad_norm": 0.4464184045791626, "learning_rate": 0.00046495019515390774, "loss": 1.8638, "step": 28376 }, { "epoch": 0.94, "grad_norm": 0.4538905918598175, "learning_rate": 0.00046494146467305315, "loss": 1.832, "step": 28377 }, { "epoch": 0.94, "grad_norm": 0.4550546407699585, "learning_rate": 0.00046493273399198393, "loss": 1.7585, "step": 28378 }, { "epoch": 0.94, "grad_norm": 0.4375440776348114, "learning_rate": 0.0004649240031107107, "loss": 1.8289, "step": 28379 }, { "epoch": 0.94, "grad_norm": 0.4460999667644501, "learning_rate": 0.00046491527202924396, "loss": 1.7802, "step": 28380 }, { "epoch": 0.94, "grad_norm": 1.1228386163711548, "learning_rate": 0.0004649065407475944, "loss": 1.8504, "step": 28381 }, { "epoch": 0.94, "grad_norm": 0.45279043912887573, "learning_rate": 0.0004648978092657727, "loss": 1.7668, "step": 28382 }, { "epoch": 0.94, "grad_norm": 0.44024062156677246, "learning_rate": 0.00046488907758378923, "loss": 1.7889, "step": 28383 }, { "epoch": 0.94, "grad_norm": 0.44502219557762146, "learning_rate": 0.00046488034570165473, "loss": 1.7778, "step": 28384 }, { "epoch": 0.94, "grad_norm": 0.44889208674430847, "learning_rate": 0.0004648716136193798, "loss": 1.8414, "step": 28385 }, { "epoch": 0.94, "grad_norm": 0.4450642764568329, "learning_rate": 0.00046486288133697505, "loss": 1.7863, "step": 28386 }, { "epoch": 0.94, "grad_norm": 0.4332922101020813, "learning_rate": 0.00046485414885445107, "loss": 1.8296, "step": 28387 }, { "epoch": 0.94, "grad_norm": 0.4557476341724396, "learning_rate": 0.00046484541617181844, "loss": 1.8196, "step": 28388 }, { "epoch": 0.94, "grad_norm": 0.46396365761756897, "learning_rate": 0.00046483668328908774, "loss": 1.7875, "step": 28389 }, { "epoch": 0.94, "grad_norm": 0.46503913402557373, "learning_rate": 0.0004648279502062696, "loss": 1.7969, "step": 28390 }, { "epoch": 0.94, "grad_norm": 0.4459628760814667, "learning_rate": 0.0004648192169233746, "loss": 1.8224, "step": 28391 }, { "epoch": 0.94, "grad_norm": 0.43977901339530945, "learning_rate": 0.0004648104834404133, "loss": 1.809, "step": 28392 }, { "epoch": 0.94, "grad_norm": 0.4688189923763275, "learning_rate": 0.0004648017497573965, "loss": 1.7479, "step": 28393 }, { "epoch": 0.94, "grad_norm": 0.44726288318634033, "learning_rate": 0.00046479301587433453, "loss": 1.7905, "step": 28394 }, { "epoch": 0.94, "grad_norm": 0.44680362939834595, "learning_rate": 0.00046478428179123817, "loss": 1.8446, "step": 28395 }, { "epoch": 0.94, "grad_norm": 0.4578985571861267, "learning_rate": 0.00046477554750811795, "loss": 1.7711, "step": 28396 }, { "epoch": 0.94, "grad_norm": 0.4658774137496948, "learning_rate": 0.0004647668130249845, "loss": 1.8258, "step": 28397 }, { "epoch": 0.94, "grad_norm": 0.44534072279930115, "learning_rate": 0.0004647580783418485, "loss": 1.8436, "step": 28398 }, { "epoch": 0.94, "grad_norm": 0.4482823312282562, "learning_rate": 0.00046474934345872037, "loss": 1.7239, "step": 28399 }, { "epoch": 0.94, "grad_norm": 0.45664116740226746, "learning_rate": 0.00046474060837561085, "loss": 1.8278, "step": 28400 }, { "epoch": 0.94, "grad_norm": 0.4478006064891815, "learning_rate": 0.0004647318730925305, "loss": 1.8317, "step": 28401 }, { "epoch": 0.94, "grad_norm": 0.45252057909965515, "learning_rate": 0.0004647231376094899, "loss": 1.8042, "step": 28402 }, { "epoch": 0.94, "grad_norm": 0.4890940189361572, "learning_rate": 0.0004647144019264997, "loss": 1.7852, "step": 28403 }, { "epoch": 0.95, "grad_norm": 0.44926154613494873, "learning_rate": 0.00046470566604357055, "loss": 1.8732, "step": 28404 }, { "epoch": 0.95, "grad_norm": 0.4391855001449585, "learning_rate": 0.0004646969299607129, "loss": 1.8676, "step": 28405 }, { "epoch": 0.95, "grad_norm": 0.4317395091056824, "learning_rate": 0.0004646881936779374, "loss": 1.856, "step": 28406 }, { "epoch": 0.95, "grad_norm": 0.44229796528816223, "learning_rate": 0.0004646794571952548, "loss": 1.754, "step": 28407 }, { "epoch": 0.95, "grad_norm": 0.4526292383670807, "learning_rate": 0.0004646707205126756, "loss": 1.8044, "step": 28408 }, { "epoch": 0.95, "grad_norm": 0.44626691937446594, "learning_rate": 0.00046466198363021043, "loss": 1.8134, "step": 28409 }, { "epoch": 0.95, "grad_norm": 0.4545932710170746, "learning_rate": 0.0004646532465478697, "loss": 1.788, "step": 28410 }, { "epoch": 0.95, "grad_norm": 0.46896618604660034, "learning_rate": 0.0004646445092656644, "loss": 1.737, "step": 28411 }, { "epoch": 0.95, "grad_norm": 0.4545701742172241, "learning_rate": 0.0004646357717836048, "loss": 1.8377, "step": 28412 }, { "epoch": 0.95, "grad_norm": 0.45083221793174744, "learning_rate": 0.0004646270341017017, "loss": 1.79, "step": 28413 }, { "epoch": 0.95, "grad_norm": 0.4477103054523468, "learning_rate": 0.00046461829621996563, "loss": 1.8772, "step": 28414 }, { "epoch": 0.95, "grad_norm": 0.4368576109409332, "learning_rate": 0.0004646095581384072, "loss": 1.811, "step": 28415 }, { "epoch": 0.95, "grad_norm": 0.455182820558548, "learning_rate": 0.000464600819857037, "loss": 1.8086, "step": 28416 }, { "epoch": 0.95, "grad_norm": 0.4605770707130432, "learning_rate": 0.0004645920813758656, "loss": 1.7631, "step": 28417 }, { "epoch": 0.95, "grad_norm": 0.46338915824890137, "learning_rate": 0.0004645833426949038, "loss": 1.794, "step": 28418 }, { "epoch": 0.95, "grad_norm": 0.4612075090408325, "learning_rate": 0.00046457460381416196, "loss": 1.8512, "step": 28419 }, { "epoch": 0.95, "grad_norm": 0.453450471162796, "learning_rate": 0.0004645658647336508, "loss": 1.7327, "step": 28420 }, { "epoch": 0.95, "grad_norm": 0.4481830894947052, "learning_rate": 0.000464557125453381, "loss": 1.8032, "step": 28421 }, { "epoch": 0.95, "grad_norm": 0.47066956758499146, "learning_rate": 0.0004645483859733631, "loss": 1.8043, "step": 28422 }, { "epoch": 0.95, "grad_norm": 0.4615931808948517, "learning_rate": 0.00046453964629360764, "loss": 1.8021, "step": 28423 }, { "epoch": 0.95, "grad_norm": 0.9121187329292297, "learning_rate": 0.0004645309064141254, "loss": 1.8052, "step": 28424 }, { "epoch": 0.95, "grad_norm": 0.4502009153366089, "learning_rate": 0.0004645221663349267, "loss": 1.8549, "step": 28425 }, { "epoch": 0.95, "grad_norm": 0.44752708077430725, "learning_rate": 0.0004645134260560225, "loss": 1.8518, "step": 28426 }, { "epoch": 0.95, "grad_norm": 0.4608526825904846, "learning_rate": 0.0004645046855774231, "loss": 1.8216, "step": 28427 }, { "epoch": 0.95, "grad_norm": 0.45111122727394104, "learning_rate": 0.0004644959448991394, "loss": 1.8455, "step": 28428 }, { "epoch": 0.95, "grad_norm": 0.44564756751060486, "learning_rate": 0.00046448720402118176, "loss": 1.7885, "step": 28429 }, { "epoch": 0.95, "grad_norm": 0.4500691294670105, "learning_rate": 0.0004644784629435609, "loss": 1.8476, "step": 28430 }, { "epoch": 0.95, "grad_norm": 0.4835246801376343, "learning_rate": 0.0004644697216662874, "loss": 1.8911, "step": 28431 }, { "epoch": 0.95, "grad_norm": 0.45260587334632874, "learning_rate": 0.0004644609801893719, "loss": 1.7738, "step": 28432 }, { "epoch": 0.95, "grad_norm": 0.45327627658843994, "learning_rate": 0.0004644522385128251, "loss": 1.8675, "step": 28433 }, { "epoch": 0.95, "grad_norm": 0.4457603096961975, "learning_rate": 0.00046444349663665746, "loss": 1.7862, "step": 28434 }, { "epoch": 0.95, "grad_norm": 0.4445309340953827, "learning_rate": 0.00046443475456087955, "loss": 1.7418, "step": 28435 }, { "epoch": 0.95, "grad_norm": 0.44472458958625793, "learning_rate": 0.00046442601228550225, "loss": 1.7865, "step": 28436 }, { "epoch": 0.95, "grad_norm": 0.45529690384864807, "learning_rate": 0.00046441726981053585, "loss": 1.8539, "step": 28437 }, { "epoch": 0.95, "grad_norm": 0.46256980299949646, "learning_rate": 0.00046440852713599114, "loss": 1.763, "step": 28438 }, { "epoch": 0.95, "grad_norm": 0.4684387743473053, "learning_rate": 0.00046439978426187874, "loss": 1.7966, "step": 28439 }, { "epoch": 0.95, "grad_norm": 0.45392754673957825, "learning_rate": 0.00046439104118820924, "loss": 1.8002, "step": 28440 }, { "epoch": 0.95, "grad_norm": 0.4514789283275604, "learning_rate": 0.0004643822979149932, "loss": 1.7702, "step": 28441 }, { "epoch": 0.95, "grad_norm": 0.44581338763237, "learning_rate": 0.00046437355444224127, "loss": 1.7825, "step": 28442 }, { "epoch": 0.95, "grad_norm": 0.42986243963241577, "learning_rate": 0.0004643648107699641, "loss": 1.7947, "step": 28443 }, { "epoch": 0.95, "grad_norm": 0.4452988803386688, "learning_rate": 0.0004643560668981722, "loss": 1.7695, "step": 28444 }, { "epoch": 0.95, "grad_norm": 0.44155552983283997, "learning_rate": 0.00046434732282687633, "loss": 1.7405, "step": 28445 }, { "epoch": 0.95, "grad_norm": 0.4611814022064209, "learning_rate": 0.000464338578556087, "loss": 1.8842, "step": 28446 }, { "epoch": 0.95, "grad_norm": 0.4492760896682739, "learning_rate": 0.00046432983408581477, "loss": 1.8089, "step": 28447 }, { "epoch": 0.95, "grad_norm": 0.4449496865272522, "learning_rate": 0.0004643210894160704, "loss": 1.7825, "step": 28448 }, { "epoch": 0.95, "grad_norm": 0.4411669969558716, "learning_rate": 0.0004643123445468645, "loss": 1.8671, "step": 28449 }, { "epoch": 0.95, "grad_norm": 0.48047107458114624, "learning_rate": 0.0004643035994782075, "loss": 1.8563, "step": 28450 }, { "epoch": 0.95, "grad_norm": 0.4569263160228729, "learning_rate": 0.0004642948542101103, "loss": 1.8012, "step": 28451 }, { "epoch": 0.95, "grad_norm": 0.4499436020851135, "learning_rate": 0.00046428610874258314, "loss": 1.8254, "step": 28452 }, { "epoch": 0.95, "grad_norm": 0.4531414806842804, "learning_rate": 0.00046427736307563697, "loss": 1.8182, "step": 28453 }, { "epoch": 0.95, "grad_norm": 0.4428139626979828, "learning_rate": 0.0004642686172092824, "loss": 1.8637, "step": 28454 }, { "epoch": 0.95, "grad_norm": 0.44509947299957275, "learning_rate": 0.0004642598711435298, "loss": 1.7041, "step": 28455 }, { "epoch": 0.95, "grad_norm": 0.4487028419971466, "learning_rate": 0.00046425112487838996, "loss": 1.7665, "step": 28456 }, { "epoch": 0.95, "grad_norm": 0.43848901987075806, "learning_rate": 0.0004642423784138733, "loss": 1.7851, "step": 28457 }, { "epoch": 0.95, "grad_norm": 0.4402002692222595, "learning_rate": 0.0004642336317499908, "loss": 1.8704, "step": 28458 }, { "epoch": 0.95, "grad_norm": 0.7612974643707275, "learning_rate": 0.0004642248848867528, "loss": 1.8098, "step": 28459 }, { "epoch": 0.95, "grad_norm": 0.44766131043434143, "learning_rate": 0.0004642161378241699, "loss": 1.8076, "step": 28460 }, { "epoch": 0.95, "grad_norm": 0.42845815420150757, "learning_rate": 0.00046420739056225296, "loss": 1.715, "step": 28461 }, { "epoch": 0.95, "grad_norm": 0.6589167714118958, "learning_rate": 0.00046419864310101236, "loss": 1.8523, "step": 28462 }, { "epoch": 0.95, "grad_norm": 0.45975732803344727, "learning_rate": 0.0004641898954404588, "loss": 1.7703, "step": 28463 }, { "epoch": 0.95, "grad_norm": 0.4501466155052185, "learning_rate": 0.00046418114758060294, "loss": 1.7751, "step": 28464 }, { "epoch": 0.95, "grad_norm": 0.47145015001296997, "learning_rate": 0.0004641723995214553, "loss": 1.8218, "step": 28465 }, { "epoch": 0.95, "grad_norm": 0.45739617943763733, "learning_rate": 0.0004641636512630266, "loss": 1.9041, "step": 28466 }, { "epoch": 0.95, "grad_norm": 0.4529128968715668, "learning_rate": 0.0004641549028053274, "loss": 1.8641, "step": 28467 }, { "epoch": 0.95, "grad_norm": 0.43861597776412964, "learning_rate": 0.00046414615414836836, "loss": 1.7802, "step": 28468 }, { "epoch": 0.95, "grad_norm": 0.4741699993610382, "learning_rate": 0.00046413740529216, "loss": 1.8719, "step": 28469 }, { "epoch": 0.95, "grad_norm": 0.46942034363746643, "learning_rate": 0.0004641286562367131, "loss": 1.8412, "step": 28470 }, { "epoch": 0.95, "grad_norm": 0.45232653617858887, "learning_rate": 0.0004641199069820381, "loss": 1.7912, "step": 28471 }, { "epoch": 0.95, "grad_norm": 0.43155157566070557, "learning_rate": 0.0004641111575281458, "loss": 1.8399, "step": 28472 }, { "epoch": 0.95, "grad_norm": 0.4570663571357727, "learning_rate": 0.0004641024078750467, "loss": 1.8938, "step": 28473 }, { "epoch": 0.95, "grad_norm": 0.4473462402820587, "learning_rate": 0.00046409365802275147, "loss": 1.8188, "step": 28474 }, { "epoch": 0.95, "grad_norm": 0.4476037323474884, "learning_rate": 0.00046408490797127073, "loss": 1.8546, "step": 28475 }, { "epoch": 0.95, "grad_norm": 0.4477149248123169, "learning_rate": 0.00046407615772061506, "loss": 1.7763, "step": 28476 }, { "epoch": 0.95, "grad_norm": 0.4336787760257721, "learning_rate": 0.00046406740727079513, "loss": 1.7474, "step": 28477 }, { "epoch": 0.95, "grad_norm": 0.4394075572490692, "learning_rate": 0.0004640586566218216, "loss": 1.7655, "step": 28478 }, { "epoch": 0.95, "grad_norm": 0.460196316242218, "learning_rate": 0.0004640499057737049, "loss": 1.7903, "step": 28479 }, { "epoch": 0.95, "grad_norm": 0.4542502462863922, "learning_rate": 0.0004640411547264559, "loss": 1.8719, "step": 28480 }, { "epoch": 0.95, "grad_norm": 0.43982529640197754, "learning_rate": 0.00046403240348008503, "loss": 1.7889, "step": 28481 }, { "epoch": 0.95, "grad_norm": 0.44226160645484924, "learning_rate": 0.000464023652034603, "loss": 1.8044, "step": 28482 }, { "epoch": 0.95, "grad_norm": 0.44813796877861023, "learning_rate": 0.00046401490039002047, "loss": 1.7765, "step": 28483 }, { "epoch": 0.95, "grad_norm": 0.45969024300575256, "learning_rate": 0.00046400614854634795, "loss": 1.7216, "step": 28484 }, { "epoch": 0.95, "grad_norm": 0.43847641348838806, "learning_rate": 0.00046399739650359624, "loss": 1.821, "step": 28485 }, { "epoch": 0.95, "grad_norm": 0.4351917505264282, "learning_rate": 0.0004639886442617758, "loss": 1.8325, "step": 28486 }, { "epoch": 0.95, "grad_norm": 0.4433066248893738, "learning_rate": 0.0004639798918208973, "loss": 1.816, "step": 28487 }, { "epoch": 0.95, "grad_norm": 0.45509323477745056, "learning_rate": 0.00046397113918097136, "loss": 1.7884, "step": 28488 }, { "epoch": 0.95, "grad_norm": 0.44124746322631836, "learning_rate": 0.00046396238634200857, "loss": 1.7717, "step": 28489 }, { "epoch": 0.95, "grad_norm": 0.45844629406929016, "learning_rate": 0.00046395363330401965, "loss": 1.825, "step": 28490 }, { "epoch": 0.95, "grad_norm": 0.45097580552101135, "learning_rate": 0.0004639448800670152, "loss": 1.8219, "step": 28491 }, { "epoch": 0.95, "grad_norm": 0.46026086807250977, "learning_rate": 0.00046393612663100584, "loss": 1.8471, "step": 28492 }, { "epoch": 0.95, "grad_norm": 0.4684792757034302, "learning_rate": 0.00046392737299600213, "loss": 1.7559, "step": 28493 }, { "epoch": 0.95, "grad_norm": 0.4563921391963959, "learning_rate": 0.00046391861916201475, "loss": 1.849, "step": 28494 }, { "epoch": 0.95, "grad_norm": 0.45973408222198486, "learning_rate": 0.00046390986512905437, "loss": 1.836, "step": 28495 }, { "epoch": 0.95, "grad_norm": 0.4362281858921051, "learning_rate": 0.0004639011108971315, "loss": 1.8207, "step": 28496 }, { "epoch": 0.95, "grad_norm": 0.4656851589679718, "learning_rate": 0.00046389235646625683, "loss": 1.814, "step": 28497 }, { "epoch": 0.95, "grad_norm": 0.4536243677139282, "learning_rate": 0.000463883601836441, "loss": 1.8458, "step": 28498 }, { "epoch": 0.95, "grad_norm": 0.4671936333179474, "learning_rate": 0.00046387484700769464, "loss": 1.8584, "step": 28499 }, { "epoch": 0.95, "grad_norm": 0.4484650194644928, "learning_rate": 0.0004638660919800283, "loss": 1.8114, "step": 28500 }, { "epoch": 0.95, "grad_norm": 0.4744412899017334, "learning_rate": 0.0004638573367534528, "loss": 1.732, "step": 28501 }, { "epoch": 0.95, "grad_norm": 0.4858507215976715, "learning_rate": 0.00046384858132797853, "loss": 1.7604, "step": 28502 }, { "epoch": 0.95, "grad_norm": 0.4409831762313843, "learning_rate": 0.0004638398257036163, "loss": 1.7992, "step": 28503 }, { "epoch": 0.95, "grad_norm": 0.46071985363960266, "learning_rate": 0.00046383106988037655, "loss": 1.7591, "step": 28504 }, { "epoch": 0.95, "grad_norm": 0.48577433824539185, "learning_rate": 0.00046382231385827007, "loss": 1.8619, "step": 28505 }, { "epoch": 0.95, "grad_norm": 0.47691261768341064, "learning_rate": 0.0004638135576373075, "loss": 1.7827, "step": 28506 }, { "epoch": 0.95, "grad_norm": 0.45258229970932007, "learning_rate": 0.00046380480121749936, "loss": 1.8094, "step": 28507 }, { "epoch": 0.95, "grad_norm": 0.4833167493343353, "learning_rate": 0.00046379604459885634, "loss": 1.8159, "step": 28508 }, { "epoch": 0.95, "grad_norm": 0.4638422727584839, "learning_rate": 0.00046378728778138906, "loss": 1.8314, "step": 28509 }, { "epoch": 0.95, "grad_norm": 0.4522559642791748, "learning_rate": 0.00046377853076510816, "loss": 1.8083, "step": 28510 }, { "epoch": 0.95, "grad_norm": 0.44246241450309753, "learning_rate": 0.0004637697735500242, "loss": 1.7613, "step": 28511 }, { "epoch": 0.95, "grad_norm": 0.44044777750968933, "learning_rate": 0.00046376101613614785, "loss": 1.7749, "step": 28512 }, { "epoch": 0.95, "grad_norm": 0.46037307381629944, "learning_rate": 0.0004637522585234898, "loss": 1.8812, "step": 28513 }, { "epoch": 0.95, "grad_norm": 0.44524091482162476, "learning_rate": 0.0004637435007120607, "loss": 1.8414, "step": 28514 }, { "epoch": 0.95, "grad_norm": 0.4474724233150482, "learning_rate": 0.000463734742701871, "loss": 1.8267, "step": 28515 }, { "epoch": 0.95, "grad_norm": 0.441607803106308, "learning_rate": 0.0004637259844929315, "loss": 1.8552, "step": 28516 }, { "epoch": 0.95, "grad_norm": 0.4405869245529175, "learning_rate": 0.00046371722608525283, "loss": 1.7524, "step": 28517 }, { "epoch": 0.95, "grad_norm": 0.43467622995376587, "learning_rate": 0.00046370846747884547, "loss": 1.7625, "step": 28518 }, { "epoch": 0.95, "grad_norm": 0.4418903887271881, "learning_rate": 0.00046369970867372016, "loss": 1.7863, "step": 28519 }, { "epoch": 0.95, "grad_norm": 0.449487566947937, "learning_rate": 0.00046369094966988765, "loss": 1.8136, "step": 28520 }, { "epoch": 0.95, "grad_norm": 0.4532792866230011, "learning_rate": 0.00046368219046735834, "loss": 1.7837, "step": 28521 }, { "epoch": 0.95, "grad_norm": 0.4796382188796997, "learning_rate": 0.0004636734310661429, "loss": 1.8288, "step": 28522 }, { "epoch": 0.95, "grad_norm": 0.43990108370780945, "learning_rate": 0.00046366467146625215, "loss": 1.8664, "step": 28523 }, { "epoch": 0.95, "grad_norm": 0.4564536213874817, "learning_rate": 0.00046365591166769657, "loss": 1.8712, "step": 28524 }, { "epoch": 0.95, "grad_norm": 0.448917031288147, "learning_rate": 0.00046364715167048685, "loss": 1.8016, "step": 28525 }, { "epoch": 0.95, "grad_norm": 0.4533385932445526, "learning_rate": 0.00046363839147463363, "loss": 1.8195, "step": 28526 }, { "epoch": 0.95, "grad_norm": 0.45116302371025085, "learning_rate": 0.00046362963108014747, "loss": 1.788, "step": 28527 }, { "epoch": 0.95, "grad_norm": 0.44027116894721985, "learning_rate": 0.00046362087048703904, "loss": 1.7953, "step": 28528 }, { "epoch": 0.95, "grad_norm": 0.45066988468170166, "learning_rate": 0.0004636121096953189, "loss": 1.8125, "step": 28529 }, { "epoch": 0.95, "grad_norm": 0.45460909605026245, "learning_rate": 0.0004636033487049979, "loss": 1.8372, "step": 28530 }, { "epoch": 0.95, "grad_norm": 0.44424840807914734, "learning_rate": 0.00046359458751608646, "loss": 1.8046, "step": 28531 }, { "epoch": 0.95, "grad_norm": 0.437806099653244, "learning_rate": 0.00046358582612859536, "loss": 1.7827, "step": 28532 }, { "epoch": 0.95, "grad_norm": 0.4636746048927307, "learning_rate": 0.0004635770645425351, "loss": 1.8368, "step": 28533 }, { "epoch": 0.95, "grad_norm": 0.4574367105960846, "learning_rate": 0.0004635683027579165, "loss": 1.8266, "step": 28534 }, { "epoch": 0.95, "grad_norm": 0.4460678696632385, "learning_rate": 0.00046355954077474996, "loss": 1.7653, "step": 28535 }, { "epoch": 0.95, "grad_norm": 0.4377668797969818, "learning_rate": 0.0004635507785930463, "loss": 1.7883, "step": 28536 }, { "epoch": 0.95, "grad_norm": 0.4564015865325928, "learning_rate": 0.000463542016212816, "loss": 1.8332, "step": 28537 }, { "epoch": 0.95, "grad_norm": 0.45142075419425964, "learning_rate": 0.0004635332536340699, "loss": 1.841, "step": 28538 }, { "epoch": 0.95, "grad_norm": 0.452990859746933, "learning_rate": 0.0004635244908568185, "loss": 1.7406, "step": 28539 }, { "epoch": 0.95, "grad_norm": 0.44270285964012146, "learning_rate": 0.0004635157278810724, "loss": 1.8337, "step": 28540 }, { "epoch": 0.95, "grad_norm": 0.43550196290016174, "learning_rate": 0.0004635069647068424, "loss": 1.8213, "step": 28541 }, { "epoch": 0.95, "grad_norm": 0.42936983704566956, "learning_rate": 0.00046349820133413894, "loss": 1.8109, "step": 28542 }, { "epoch": 0.95, "grad_norm": 0.4338492155075073, "learning_rate": 0.0004634894377629728, "loss": 1.7945, "step": 28543 }, { "epoch": 0.95, "grad_norm": 0.4316522479057312, "learning_rate": 0.0004634806739933545, "loss": 1.8317, "step": 28544 }, { "epoch": 0.95, "grad_norm": 0.6957375407218933, "learning_rate": 0.0004634719100252949, "loss": 1.7926, "step": 28545 }, { "epoch": 0.95, "grad_norm": 0.44796329736709595, "learning_rate": 0.0004634631458588044, "loss": 1.829, "step": 28546 }, { "epoch": 0.95, "grad_norm": 0.4424152076244354, "learning_rate": 0.00046345438149389374, "loss": 1.8325, "step": 28547 }, { "epoch": 0.95, "grad_norm": 0.44261372089385986, "learning_rate": 0.0004634456169305735, "loss": 1.7873, "step": 28548 }, { "epoch": 0.95, "grad_norm": 0.4681927263736725, "learning_rate": 0.0004634368521688544, "loss": 1.813, "step": 28549 }, { "epoch": 0.95, "grad_norm": 0.45785409212112427, "learning_rate": 0.000463428087208747, "loss": 1.8172, "step": 28550 }, { "epoch": 0.95, "grad_norm": 0.45433464646339417, "learning_rate": 0.0004634193220502621, "loss": 1.7989, "step": 28551 }, { "epoch": 0.95, "grad_norm": 0.4517315626144409, "learning_rate": 0.0004634105566934101, "loss": 1.8566, "step": 28552 }, { "epoch": 0.95, "grad_norm": 0.4447685778141022, "learning_rate": 0.00046340179113820183, "loss": 1.8389, "step": 28553 }, { "epoch": 0.95, "grad_norm": 0.4424985647201538, "learning_rate": 0.0004633930253846479, "loss": 1.8214, "step": 28554 }, { "epoch": 0.95, "grad_norm": 0.4503297805786133, "learning_rate": 0.00046338425943275887, "loss": 1.8558, "step": 28555 }, { "epoch": 0.95, "grad_norm": 0.44459453225135803, "learning_rate": 0.0004633754932825453, "loss": 1.7794, "step": 28556 }, { "epoch": 0.95, "grad_norm": 0.4452786147594452, "learning_rate": 0.00046336672693401813, "loss": 1.723, "step": 28557 }, { "epoch": 0.95, "grad_norm": 0.4623948931694031, "learning_rate": 0.0004633579603871877, "loss": 1.852, "step": 28558 }, { "epoch": 0.95, "grad_norm": 0.4456362724304199, "learning_rate": 0.0004633491936420648, "loss": 1.7766, "step": 28559 }, { "epoch": 0.95, "grad_norm": 0.4730474352836609, "learning_rate": 0.0004633404266986601, "loss": 1.8137, "step": 28560 }, { "epoch": 0.95, "grad_norm": 0.43836715817451477, "learning_rate": 0.0004633316595569842, "loss": 1.8197, "step": 28561 }, { "epoch": 0.95, "grad_norm": 0.44321832060813904, "learning_rate": 0.0004633228922170477, "loss": 1.7765, "step": 28562 }, { "epoch": 0.95, "grad_norm": 0.4498564898967743, "learning_rate": 0.0004633141246788613, "loss": 1.8037, "step": 28563 }, { "epoch": 0.95, "grad_norm": 0.4497714340686798, "learning_rate": 0.0004633053569424355, "loss": 1.8724, "step": 28564 }, { "epoch": 0.95, "grad_norm": 0.46758460998535156, "learning_rate": 0.00046329658900778117, "loss": 1.8535, "step": 28565 }, { "epoch": 0.95, "grad_norm": 0.4345329999923706, "learning_rate": 0.00046328782087490884, "loss": 1.7888, "step": 28566 }, { "epoch": 0.95, "grad_norm": 0.4417097568511963, "learning_rate": 0.00046327905254382913, "loss": 1.8576, "step": 28567 }, { "epoch": 0.95, "grad_norm": 0.43150436878204346, "learning_rate": 0.0004632702840145527, "loss": 1.7776, "step": 28568 }, { "epoch": 0.95, "grad_norm": 0.4508639872074127, "learning_rate": 0.00046326151528709015, "loss": 1.7931, "step": 28569 }, { "epoch": 0.95, "grad_norm": 0.4683275520801544, "learning_rate": 0.0004632527463614523, "loss": 1.8441, "step": 28570 }, { "epoch": 0.95, "grad_norm": 0.4484712481498718, "learning_rate": 0.0004632439772376496, "loss": 1.8083, "step": 28571 }, { "epoch": 0.95, "grad_norm": 0.4295184314250946, "learning_rate": 0.00046323520791569274, "loss": 1.8439, "step": 28572 }, { "epoch": 0.95, "grad_norm": 0.43747153878211975, "learning_rate": 0.0004632264383955925, "loss": 1.7991, "step": 28573 }, { "epoch": 0.95, "grad_norm": 0.44104573130607605, "learning_rate": 0.0004632176686773593, "loss": 1.7405, "step": 28574 }, { "epoch": 0.95, "grad_norm": 0.4354964792728424, "learning_rate": 0.00046320889876100393, "loss": 1.7738, "step": 28575 }, { "epoch": 0.95, "grad_norm": 0.4922078251838684, "learning_rate": 0.000463200128646537, "loss": 1.7277, "step": 28576 }, { "epoch": 0.95, "grad_norm": 0.42889824509620667, "learning_rate": 0.00046319135833396916, "loss": 1.7438, "step": 28577 }, { "epoch": 0.95, "grad_norm": 0.45411810278892517, "learning_rate": 0.0004631825878233111, "loss": 1.7906, "step": 28578 }, { "epoch": 0.95, "grad_norm": 0.4624031186103821, "learning_rate": 0.0004631738171145733, "loss": 1.874, "step": 28579 }, { "epoch": 0.95, "grad_norm": 0.4587686359882355, "learning_rate": 0.0004631650462077667, "loss": 1.7754, "step": 28580 }, { "epoch": 0.95, "grad_norm": 0.4425140619277954, "learning_rate": 0.0004631562751029016, "loss": 1.7337, "step": 28581 }, { "epoch": 0.95, "grad_norm": 0.4563971161842346, "learning_rate": 0.00046314750379998894, "loss": 1.7996, "step": 28582 }, { "epoch": 0.95, "grad_norm": 0.5199394226074219, "learning_rate": 0.00046313873229903926, "loss": 1.8096, "step": 28583 }, { "epoch": 0.95, "grad_norm": 0.46404770016670227, "learning_rate": 0.00046312996060006315, "loss": 1.8504, "step": 28584 }, { "epoch": 0.95, "grad_norm": 0.4520576000213623, "learning_rate": 0.0004631211887030713, "loss": 1.8037, "step": 28585 }, { "epoch": 0.95, "grad_norm": 0.43956008553504944, "learning_rate": 0.0004631124166080744, "loss": 1.8211, "step": 28586 }, { "epoch": 0.95, "grad_norm": 0.483309268951416, "learning_rate": 0.00046310364431508303, "loss": 1.8239, "step": 28587 }, { "epoch": 0.95, "grad_norm": 0.45392489433288574, "learning_rate": 0.00046309487182410786, "loss": 1.8151, "step": 28588 }, { "epoch": 0.95, "grad_norm": 0.4433682858943939, "learning_rate": 0.0004630860991351596, "loss": 1.8168, "step": 28589 }, { "epoch": 0.95, "grad_norm": 0.44802072644233704, "learning_rate": 0.0004630773262482488, "loss": 1.7855, "step": 28590 }, { "epoch": 0.95, "grad_norm": 0.44945091009140015, "learning_rate": 0.00046306855316338605, "loss": 1.779, "step": 28591 }, { "epoch": 0.95, "grad_norm": 0.45661312341690063, "learning_rate": 0.00046305977988058225, "loss": 1.8088, "step": 28592 }, { "epoch": 0.95, "grad_norm": 0.44172000885009766, "learning_rate": 0.0004630510063998479, "loss": 1.8242, "step": 28593 }, { "epoch": 0.95, "grad_norm": 0.46130892634391785, "learning_rate": 0.0004630422327211936, "loss": 1.8135, "step": 28594 }, { "epoch": 0.95, "grad_norm": 0.4463003873825073, "learning_rate": 0.0004630334588446301, "loss": 1.7984, "step": 28595 }, { "epoch": 0.95, "grad_norm": 0.43317854404449463, "learning_rate": 0.0004630246847701679, "loss": 1.8153, "step": 28596 }, { "epoch": 0.95, "grad_norm": 0.44447430968284607, "learning_rate": 0.0004630159104978178, "loss": 1.8283, "step": 28597 }, { "epoch": 0.95, "grad_norm": 0.4611714482307434, "learning_rate": 0.0004630071360275905, "loss": 1.8191, "step": 28598 }, { "epoch": 0.95, "grad_norm": 0.44667187333106995, "learning_rate": 0.0004629983613594964, "loss": 1.9088, "step": 28599 }, { "epoch": 0.95, "grad_norm": 0.44778499007225037, "learning_rate": 0.00046298958649354646, "loss": 1.8186, "step": 28600 }, { "epoch": 0.95, "grad_norm": 0.45013269782066345, "learning_rate": 0.0004629808114297511, "loss": 1.7584, "step": 28601 }, { "epoch": 0.95, "grad_norm": 0.44668886065483093, "learning_rate": 0.0004629720361681211, "loss": 1.8651, "step": 28602 }, { "epoch": 0.95, "grad_norm": 0.4371859133243561, "learning_rate": 0.000462963260708667, "loss": 1.7891, "step": 28603 }, { "epoch": 0.95, "grad_norm": 0.44170886278152466, "learning_rate": 0.00046295448505139945, "loss": 1.7893, "step": 28604 }, { "epoch": 0.95, "grad_norm": 0.44658419489860535, "learning_rate": 0.0004629457091963293, "loss": 1.7021, "step": 28605 }, { "epoch": 0.95, "grad_norm": 0.4524664282798767, "learning_rate": 0.000462936933143467, "loss": 1.8074, "step": 28606 }, { "epoch": 0.95, "grad_norm": 0.43058979511260986, "learning_rate": 0.0004629281568928233, "loss": 1.805, "step": 28607 }, { "epoch": 0.95, "grad_norm": 0.46121302247047424, "learning_rate": 0.0004629193804444088, "loss": 1.7583, "step": 28608 }, { "epoch": 0.95, "grad_norm": 0.43486469984054565, "learning_rate": 0.00046291060379823415, "loss": 1.8193, "step": 28609 }, { "epoch": 0.95, "grad_norm": 0.45410633087158203, "learning_rate": 0.0004629018269543101, "loss": 1.8675, "step": 28610 }, { "epoch": 0.95, "grad_norm": 0.4370676577091217, "learning_rate": 0.0004628930499126472, "loss": 1.8041, "step": 28611 }, { "epoch": 0.95, "grad_norm": 0.4511241018772125, "learning_rate": 0.00046288427267325615, "loss": 1.8409, "step": 28612 }, { "epoch": 0.95, "grad_norm": 0.44966059923171997, "learning_rate": 0.0004628754952361475, "loss": 1.723, "step": 28613 }, { "epoch": 0.95, "grad_norm": 0.4380344748497009, "learning_rate": 0.0004628667176013321, "loss": 1.8332, "step": 28614 }, { "epoch": 0.95, "grad_norm": 0.4432787299156189, "learning_rate": 0.0004628579397688205, "loss": 1.8616, "step": 28615 }, { "epoch": 0.95, "grad_norm": 0.4407143294811249, "learning_rate": 0.00046284916173862335, "loss": 1.797, "step": 28616 }, { "epoch": 0.95, "grad_norm": 0.45032259821891785, "learning_rate": 0.0004628403835107513, "loss": 1.7125, "step": 28617 }, { "epoch": 0.95, "grad_norm": 0.43977901339530945, "learning_rate": 0.00046283160508521503, "loss": 1.8114, "step": 28618 }, { "epoch": 0.95, "grad_norm": 0.4432476758956909, "learning_rate": 0.00046282282646202517, "loss": 1.8144, "step": 28619 }, { "epoch": 0.95, "grad_norm": 0.4296768307685852, "learning_rate": 0.00046281404764119245, "loss": 1.8119, "step": 28620 }, { "epoch": 0.95, "grad_norm": 0.4502732753753662, "learning_rate": 0.0004628052686227273, "loss": 1.8408, "step": 28621 }, { "epoch": 0.95, "grad_norm": 0.4391629695892334, "learning_rate": 0.00046279648940664067, "loss": 1.8339, "step": 28622 }, { "epoch": 0.95, "grad_norm": 0.4721110165119171, "learning_rate": 0.0004627877099929431, "loss": 1.8114, "step": 28623 }, { "epoch": 0.95, "grad_norm": 0.4648159444332123, "learning_rate": 0.0004627789303816452, "loss": 1.8239, "step": 28624 }, { "epoch": 0.95, "grad_norm": 0.4433823823928833, "learning_rate": 0.0004627701505727577, "loss": 1.7921, "step": 28625 }, { "epoch": 0.95, "grad_norm": 0.45905575156211853, "learning_rate": 0.00046276137056629115, "loss": 1.8853, "step": 28626 }, { "epoch": 0.95, "grad_norm": 0.45356234908103943, "learning_rate": 0.0004627525903622563, "loss": 1.8142, "step": 28627 }, { "epoch": 0.95, "grad_norm": 0.44039803743362427, "learning_rate": 0.00046274380996066386, "loss": 1.8667, "step": 28628 }, { "epoch": 0.95, "grad_norm": 0.4584936797618866, "learning_rate": 0.00046273502936152426, "loss": 1.6884, "step": 28629 }, { "epoch": 0.95, "grad_norm": 0.4509528577327728, "learning_rate": 0.0004627262485648485, "loss": 1.8766, "step": 28630 }, { "epoch": 0.95, "grad_norm": 0.44959837198257446, "learning_rate": 0.0004627174675706469, "loss": 1.8126, "step": 28631 }, { "epoch": 0.95, "grad_norm": 0.43778425455093384, "learning_rate": 0.0004627086863789303, "loss": 1.816, "step": 28632 }, { "epoch": 0.95, "grad_norm": 0.4292214512825012, "learning_rate": 0.0004626999049897094, "loss": 1.7392, "step": 28633 }, { "epoch": 0.95, "grad_norm": 0.44965627789497375, "learning_rate": 0.00046269112340299474, "loss": 1.8144, "step": 28634 }, { "epoch": 0.95, "grad_norm": 0.4317922592163086, "learning_rate": 0.000462682341618797, "loss": 1.7461, "step": 28635 }, { "epoch": 0.95, "grad_norm": 0.4400472640991211, "learning_rate": 0.0004626735596371269, "loss": 1.8452, "step": 28636 }, { "epoch": 0.95, "grad_norm": 0.4518580138683319, "learning_rate": 0.0004626647774579951, "loss": 1.7547, "step": 28637 }, { "epoch": 0.95, "grad_norm": 0.4345944821834564, "learning_rate": 0.0004626559950814122, "loss": 1.8274, "step": 28638 }, { "epoch": 0.95, "grad_norm": 0.4541236460208893, "learning_rate": 0.0004626472125073888, "loss": 1.7654, "step": 28639 }, { "epoch": 0.95, "grad_norm": 0.46220099925994873, "learning_rate": 0.00046263842973593577, "loss": 1.7974, "step": 28640 }, { "epoch": 0.95, "grad_norm": 0.45322129130363464, "learning_rate": 0.0004626296467670636, "loss": 1.8175, "step": 28641 }, { "epoch": 0.95, "grad_norm": 0.45279085636138916, "learning_rate": 0.0004626208636007831, "loss": 1.8159, "step": 28642 }, { "epoch": 0.95, "grad_norm": 0.46269410848617554, "learning_rate": 0.00046261208023710466, "loss": 1.7651, "step": 28643 }, { "epoch": 0.95, "grad_norm": 0.451900452375412, "learning_rate": 0.0004626032966760393, "loss": 1.8365, "step": 28644 }, { "epoch": 0.95, "grad_norm": 0.45772629976272583, "learning_rate": 0.0004625945129175973, "loss": 1.8542, "step": 28645 }, { "epoch": 0.95, "grad_norm": 0.4503958523273468, "learning_rate": 0.0004625857289617896, "loss": 1.8188, "step": 28646 }, { "epoch": 0.95, "grad_norm": 0.45839861035346985, "learning_rate": 0.0004625769448086268, "loss": 1.8027, "step": 28647 }, { "epoch": 0.95, "grad_norm": 0.44779932498931885, "learning_rate": 0.00046256816045811956, "loss": 1.8395, "step": 28648 }, { "epoch": 0.95, "grad_norm": 0.44593358039855957, "learning_rate": 0.0004625593759102785, "loss": 1.7768, "step": 28649 }, { "epoch": 0.95, "grad_norm": 0.4468258023262024, "learning_rate": 0.0004625505911651143, "loss": 1.7631, "step": 28650 }, { "epoch": 0.95, "grad_norm": 0.4302178621292114, "learning_rate": 0.00046254180622263766, "loss": 1.8025, "step": 28651 }, { "epoch": 0.95, "grad_norm": 0.4393078684806824, "learning_rate": 0.0004625330210828592, "loss": 1.7737, "step": 28652 }, { "epoch": 0.95, "grad_norm": 0.4621368646621704, "learning_rate": 0.0004625242357457896, "loss": 1.8262, "step": 28653 }, { "epoch": 0.95, "grad_norm": 0.44390711188316345, "learning_rate": 0.00046251545021143957, "loss": 1.7273, "step": 28654 }, { "epoch": 0.95, "grad_norm": 0.46511951088905334, "learning_rate": 0.00046250666447981964, "loss": 1.8599, "step": 28655 }, { "epoch": 0.95, "grad_norm": 0.45742276310920715, "learning_rate": 0.00046249787855094063, "loss": 1.7799, "step": 28656 }, { "epoch": 0.95, "grad_norm": 0.42362233996391296, "learning_rate": 0.0004624890924248131, "loss": 1.8116, "step": 28657 }, { "epoch": 0.95, "grad_norm": 0.4503290355205536, "learning_rate": 0.0004624803061014478, "loss": 1.8295, "step": 28658 }, { "epoch": 0.95, "grad_norm": 0.4458523392677307, "learning_rate": 0.00046247151958085533, "loss": 1.7929, "step": 28659 }, { "epoch": 0.95, "grad_norm": 0.4450478255748749, "learning_rate": 0.00046246273286304636, "loss": 1.8215, "step": 28660 }, { "epoch": 0.95, "grad_norm": 0.4404256045818329, "learning_rate": 0.00046245394594803153, "loss": 1.7321, "step": 28661 }, { "epoch": 0.95, "grad_norm": 0.436657577753067, "learning_rate": 0.00046244515883582163, "loss": 1.8343, "step": 28662 }, { "epoch": 0.95, "grad_norm": 0.45374158024787903, "learning_rate": 0.00046243637152642723, "loss": 1.9116, "step": 28663 }, { "epoch": 0.95, "grad_norm": 0.45095592737197876, "learning_rate": 0.00046242758401985896, "loss": 1.8177, "step": 28664 }, { "epoch": 0.95, "grad_norm": 0.457634299993515, "learning_rate": 0.0004624187963161276, "loss": 1.8009, "step": 28665 }, { "epoch": 0.95, "grad_norm": 0.4410248398780823, "learning_rate": 0.00046241000841524367, "loss": 1.78, "step": 28666 }, { "epoch": 0.95, "grad_norm": 0.45063191652297974, "learning_rate": 0.000462401220317218, "loss": 1.8299, "step": 28667 }, { "epoch": 0.95, "grad_norm": 0.44215846061706543, "learning_rate": 0.00046239243202206114, "loss": 1.7999, "step": 28668 }, { "epoch": 0.95, "grad_norm": 0.4392792880535126, "learning_rate": 0.00046238364352978374, "loss": 1.7815, "step": 28669 }, { "epoch": 0.95, "grad_norm": 0.438119113445282, "learning_rate": 0.00046237485484039665, "loss": 1.8389, "step": 28670 }, { "epoch": 0.95, "grad_norm": 0.4498209059238434, "learning_rate": 0.0004623660659539103, "loss": 1.816, "step": 28671 }, { "epoch": 0.95, "grad_norm": 0.4503471553325653, "learning_rate": 0.0004623572768703355, "loss": 1.7732, "step": 28672 }, { "epoch": 0.95, "grad_norm": 0.43640732765197754, "learning_rate": 0.0004623484875896829, "loss": 1.7707, "step": 28673 }, { "epoch": 0.95, "grad_norm": 0.440501868724823, "learning_rate": 0.0004623396981119631, "loss": 1.7991, "step": 28674 }, { "epoch": 0.95, "grad_norm": 0.4436219334602356, "learning_rate": 0.00046233090843718697, "loss": 1.8092, "step": 28675 }, { "epoch": 0.95, "grad_norm": 0.4338824152946472, "learning_rate": 0.00046232211856536487, "loss": 1.7996, "step": 28676 }, { "epoch": 0.95, "grad_norm": 0.45714128017425537, "learning_rate": 0.0004623133284965078, "loss": 1.7552, "step": 28677 }, { "epoch": 0.95, "grad_norm": 0.45245808362960815, "learning_rate": 0.00046230453823062616, "loss": 1.712, "step": 28678 }, { "epoch": 0.95, "grad_norm": 0.445660263299942, "learning_rate": 0.0004622957477677307, "loss": 1.8189, "step": 28679 }, { "epoch": 0.95, "grad_norm": 0.44713711738586426, "learning_rate": 0.0004622869571078322, "loss": 1.8606, "step": 28680 }, { "epoch": 0.95, "grad_norm": 0.4400215148925781, "learning_rate": 0.0004622781662509411, "loss": 1.7365, "step": 28681 }, { "epoch": 0.95, "grad_norm": 0.47539642453193665, "learning_rate": 0.00046226937519706836, "loss": 1.8998, "step": 28682 }, { "epoch": 0.95, "grad_norm": 0.4349195063114166, "learning_rate": 0.00046226058394622453, "loss": 1.7607, "step": 28683 }, { "epoch": 0.95, "grad_norm": 0.4555199444293976, "learning_rate": 0.0004622517924984201, "loss": 1.8366, "step": 28684 }, { "epoch": 0.95, "grad_norm": 0.4595523178577423, "learning_rate": 0.00046224300085366603, "loss": 1.7579, "step": 28685 }, { "epoch": 0.95, "grad_norm": 0.44054877758026123, "learning_rate": 0.00046223420901197283, "loss": 1.7975, "step": 28686 }, { "epoch": 0.95, "grad_norm": 0.44484901428222656, "learning_rate": 0.00046222541697335117, "loss": 1.8317, "step": 28687 }, { "epoch": 0.95, "grad_norm": 0.4710093140602112, "learning_rate": 0.0004622166247378118, "loss": 1.7929, "step": 28688 }, { "epoch": 0.95, "grad_norm": 0.45111367106437683, "learning_rate": 0.0004622078323053654, "loss": 1.8355, "step": 28689 }, { "epoch": 0.95, "grad_norm": 0.451556921005249, "learning_rate": 0.00046219903967602256, "loss": 1.9023, "step": 28690 }, { "epoch": 0.95, "grad_norm": 0.45571446418762207, "learning_rate": 0.0004621902468497938, "loss": 1.7568, "step": 28691 }, { "epoch": 0.95, "grad_norm": 0.4421679377555847, "learning_rate": 0.0004621814538266902, "loss": 1.7158, "step": 28692 }, { "epoch": 0.95, "grad_norm": 0.44736525416374207, "learning_rate": 0.00046217266060672216, "loss": 1.7981, "step": 28693 }, { "epoch": 0.95, "grad_norm": 0.4467276334762573, "learning_rate": 0.0004621638671899004, "loss": 1.8455, "step": 28694 }, { "epoch": 0.95, "grad_norm": 0.4340720772743225, "learning_rate": 0.00046215507357623555, "loss": 1.8472, "step": 28695 }, { "epoch": 0.95, "grad_norm": 0.4459182322025299, "learning_rate": 0.0004621462797657384, "loss": 1.7794, "step": 28696 }, { "epoch": 0.95, "grad_norm": 0.4568767547607422, "learning_rate": 0.00046213748575841954, "loss": 1.7539, "step": 28697 }, { "epoch": 0.95, "grad_norm": 0.44048959016799927, "learning_rate": 0.00046212869155428965, "loss": 1.8842, "step": 28698 }, { "epoch": 0.95, "grad_norm": 0.455941379070282, "learning_rate": 0.0004621198971533594, "loss": 1.8636, "step": 28699 }, { "epoch": 0.95, "grad_norm": 0.42427289485931396, "learning_rate": 0.00046211110255563946, "loss": 1.7913, "step": 28700 }, { "epoch": 0.95, "grad_norm": 0.43724414706230164, "learning_rate": 0.00046210230776114056, "loss": 1.7089, "step": 28701 }, { "epoch": 0.95, "grad_norm": 0.9277005791664124, "learning_rate": 0.0004620935127698734, "loss": 1.8446, "step": 28702 }, { "epoch": 0.95, "grad_norm": 0.4607757031917572, "learning_rate": 0.00046208471758184846, "loss": 1.7952, "step": 28703 }, { "epoch": 0.95, "grad_norm": 0.4671807587146759, "learning_rate": 0.0004620759221970767, "loss": 1.8297, "step": 28704 }, { "epoch": 0.96, "grad_norm": 0.4438658058643341, "learning_rate": 0.0004620671266155686, "loss": 1.808, "step": 28705 }, { "epoch": 0.96, "grad_norm": 0.43758323788642883, "learning_rate": 0.0004620583308373349, "loss": 1.7638, "step": 28706 }, { "epoch": 0.96, "grad_norm": 0.4519045054912567, "learning_rate": 0.00046204953486238623, "loss": 1.8764, "step": 28707 }, { "epoch": 0.96, "grad_norm": 0.42371076345443726, "learning_rate": 0.0004620407386907333, "loss": 1.7982, "step": 28708 }, { "epoch": 0.96, "grad_norm": 0.4521027207374573, "learning_rate": 0.00046203194232238685, "loss": 1.8142, "step": 28709 }, { "epoch": 0.96, "grad_norm": 0.43387728929519653, "learning_rate": 0.00046202314575735746, "loss": 1.7949, "step": 28710 }, { "epoch": 0.96, "grad_norm": 0.4403988718986511, "learning_rate": 0.0004620143489956558, "loss": 1.8522, "step": 28711 }, { "epoch": 0.96, "grad_norm": 0.44796454906463623, "learning_rate": 0.0004620055520372926, "loss": 1.8286, "step": 28712 }, { "epoch": 0.96, "grad_norm": 0.43758663535118103, "learning_rate": 0.00046199675488227854, "loss": 1.7697, "step": 28713 }, { "epoch": 0.96, "grad_norm": 0.449614942073822, "learning_rate": 0.00046198795753062426, "loss": 1.8314, "step": 28714 }, { "epoch": 0.96, "grad_norm": 0.4458310008049011, "learning_rate": 0.0004619791599823406, "loss": 1.8446, "step": 28715 }, { "epoch": 0.96, "grad_norm": 0.4356626272201538, "learning_rate": 0.0004619703622374381, "loss": 1.8279, "step": 28716 }, { "epoch": 0.96, "grad_norm": 0.48268574476242065, "learning_rate": 0.0004619615642959273, "loss": 1.8402, "step": 28717 }, { "epoch": 0.96, "grad_norm": 0.47985607385635376, "learning_rate": 0.0004619527661578191, "loss": 1.8463, "step": 28718 }, { "epoch": 0.96, "grad_norm": 0.46686822175979614, "learning_rate": 0.00046194396782312403, "loss": 1.9015, "step": 28719 }, { "epoch": 0.96, "grad_norm": 0.44514334201812744, "learning_rate": 0.000461935169291853, "loss": 1.8316, "step": 28720 }, { "epoch": 0.96, "grad_norm": 0.4578138589859009, "learning_rate": 0.00046192637056401636, "loss": 1.8116, "step": 28721 }, { "epoch": 0.96, "grad_norm": 0.4817831814289093, "learning_rate": 0.00046191757163962507, "loss": 1.8779, "step": 28722 }, { "epoch": 0.96, "grad_norm": 0.47148898243904114, "learning_rate": 0.0004619087725186897, "loss": 1.8596, "step": 28723 }, { "epoch": 0.96, "grad_norm": 0.45505619049072266, "learning_rate": 0.00046189997320122094, "loss": 1.9001, "step": 28724 }, { "epoch": 0.96, "grad_norm": 0.46999484300613403, "learning_rate": 0.00046189117368722945, "loss": 1.8023, "step": 28725 }, { "epoch": 0.96, "grad_norm": 0.4920167922973633, "learning_rate": 0.00046188237397672583, "loss": 1.8447, "step": 28726 }, { "epoch": 0.96, "grad_norm": 0.46475496888160706, "learning_rate": 0.00046187357406972103, "loss": 1.8283, "step": 28727 }, { "epoch": 0.96, "grad_norm": 0.4576191306114197, "learning_rate": 0.00046186477396622545, "loss": 1.7802, "step": 28728 }, { "epoch": 0.96, "grad_norm": 0.44446587562561035, "learning_rate": 0.00046185597366625, "loss": 1.8024, "step": 28729 }, { "epoch": 0.96, "grad_norm": 0.4572041928768158, "learning_rate": 0.0004618471731698052, "loss": 1.8221, "step": 28730 }, { "epoch": 0.96, "grad_norm": 0.4752424955368042, "learning_rate": 0.0004618383724769017, "loss": 1.7795, "step": 28731 }, { "epoch": 0.96, "grad_norm": 0.4333813190460205, "learning_rate": 0.0004618295715875503, "loss": 1.7782, "step": 28732 }, { "epoch": 0.96, "grad_norm": 0.47176411747932434, "learning_rate": 0.00046182077050176167, "loss": 1.8412, "step": 28733 }, { "epoch": 0.96, "grad_norm": 0.4447663724422455, "learning_rate": 0.00046181196921954643, "loss": 1.8059, "step": 28734 }, { "epoch": 0.96, "grad_norm": 0.4500431716442108, "learning_rate": 0.0004618031677409153, "loss": 1.866, "step": 28735 }, { "epoch": 0.96, "grad_norm": 0.44872698187828064, "learning_rate": 0.000461794366065879, "loss": 1.8052, "step": 28736 }, { "epoch": 0.96, "grad_norm": 0.4386778175830841, "learning_rate": 0.0004617855641944482, "loss": 1.7967, "step": 28737 }, { "epoch": 0.96, "grad_norm": 0.4516908526420593, "learning_rate": 0.0004617767621266335, "loss": 1.8443, "step": 28738 }, { "epoch": 0.96, "grad_norm": 0.5209915041923523, "learning_rate": 0.0004617679598624457, "loss": 1.7599, "step": 28739 }, { "epoch": 0.96, "grad_norm": 0.4279245436191559, "learning_rate": 0.00046175915740189545, "loss": 1.7878, "step": 28740 }, { "epoch": 0.96, "grad_norm": 0.44924047589302063, "learning_rate": 0.00046175035474499334, "loss": 1.8262, "step": 28741 }, { "epoch": 0.96, "grad_norm": 0.42848822474479675, "learning_rate": 0.0004617415518917502, "loss": 1.7983, "step": 28742 }, { "epoch": 0.96, "grad_norm": 0.4553772807121277, "learning_rate": 0.0004617327488421766, "loss": 1.8618, "step": 28743 }, { "epoch": 0.96, "grad_norm": 0.44058963656425476, "learning_rate": 0.0004617239455962833, "loss": 1.8312, "step": 28744 }, { "epoch": 0.96, "grad_norm": 0.45040035247802734, "learning_rate": 0.000461715142154081, "loss": 1.8488, "step": 28745 }, { "epoch": 0.96, "grad_norm": 0.45954838395118713, "learning_rate": 0.0004617063385155803, "loss": 1.8268, "step": 28746 }, { "epoch": 0.96, "grad_norm": 0.44384363293647766, "learning_rate": 0.0004616975346807919, "loss": 1.7657, "step": 28747 }, { "epoch": 0.96, "grad_norm": 0.4287279546260834, "learning_rate": 0.0004616887306497265, "loss": 1.7788, "step": 28748 }, { "epoch": 0.96, "grad_norm": 0.45166900753974915, "learning_rate": 0.0004616799264223948, "loss": 1.8187, "step": 28749 }, { "epoch": 0.96, "grad_norm": 0.4500963091850281, "learning_rate": 0.0004616711219988077, "loss": 1.8383, "step": 28750 }, { "epoch": 0.96, "grad_norm": 0.45524969696998596, "learning_rate": 0.00046166231737897537, "loss": 1.7429, "step": 28751 }, { "epoch": 0.96, "grad_norm": 0.4459637999534607, "learning_rate": 0.000461653512562909, "loss": 1.7924, "step": 28752 }, { "epoch": 0.96, "grad_norm": 0.45904433727264404, "learning_rate": 0.00046164470755061905, "loss": 1.7223, "step": 28753 }, { "epoch": 0.96, "grad_norm": 0.4723026156425476, "learning_rate": 0.0004616359023421162, "loss": 1.8007, "step": 28754 }, { "epoch": 0.96, "grad_norm": 0.4738388657569885, "learning_rate": 0.00046162709693741127, "loss": 1.7717, "step": 28755 }, { "epoch": 0.96, "grad_norm": 0.44496896862983704, "learning_rate": 0.00046161829133651477, "loss": 1.8363, "step": 28756 }, { "epoch": 0.96, "grad_norm": 0.4563046991825104, "learning_rate": 0.0004616094855394375, "loss": 1.772, "step": 28757 }, { "epoch": 0.96, "grad_norm": 0.47858473658561707, "learning_rate": 0.00046160067954619015, "loss": 1.8231, "step": 28758 }, { "epoch": 0.96, "grad_norm": 0.4604736566543579, "learning_rate": 0.0004615918733567834, "loss": 1.763, "step": 28759 }, { "epoch": 0.96, "grad_norm": 0.4511682391166687, "learning_rate": 0.0004615830669712279, "loss": 1.8272, "step": 28760 }, { "epoch": 0.96, "grad_norm": 0.45404598116874695, "learning_rate": 0.0004615742603895343, "loss": 1.7608, "step": 28761 }, { "epoch": 0.96, "grad_norm": 0.49008819460868835, "learning_rate": 0.00046156545361171345, "loss": 1.7708, "step": 28762 }, { "epoch": 0.96, "grad_norm": 0.4517754912376404, "learning_rate": 0.00046155664663777583, "loss": 1.7994, "step": 28763 }, { "epoch": 0.96, "grad_norm": 0.43754103779792786, "learning_rate": 0.00046154783946773245, "loss": 1.8154, "step": 28764 }, { "epoch": 0.96, "grad_norm": 0.448515385389328, "learning_rate": 0.00046153903210159364, "loss": 1.7578, "step": 28765 }, { "epoch": 0.96, "grad_norm": 0.44567927718162537, "learning_rate": 0.00046153022453937026, "loss": 1.8309, "step": 28766 }, { "epoch": 0.96, "grad_norm": 0.4492760896682739, "learning_rate": 0.00046152141678107303, "loss": 1.8547, "step": 28767 }, { "epoch": 0.96, "grad_norm": 0.4576590359210968, "learning_rate": 0.00046151260882671254, "loss": 1.7532, "step": 28768 }, { "epoch": 0.96, "grad_norm": 0.44114232063293457, "learning_rate": 0.00046150380067629956, "loss": 1.8561, "step": 28769 }, { "epoch": 0.96, "grad_norm": 0.4497603178024292, "learning_rate": 0.0004614949923298448, "loss": 1.7925, "step": 28770 }, { "epoch": 0.96, "grad_norm": 0.4674181342124939, "learning_rate": 0.0004614861837873589, "loss": 1.8651, "step": 28771 }, { "epoch": 0.96, "grad_norm": 1.0219159126281738, "learning_rate": 0.00046147737504885256, "loss": 1.8083, "step": 28772 }, { "epoch": 0.96, "grad_norm": 0.45982250571250916, "learning_rate": 0.0004614685661143365, "loss": 1.9013, "step": 28773 }, { "epoch": 0.96, "grad_norm": 0.4501051604747772, "learning_rate": 0.0004614597569838214, "loss": 1.9045, "step": 28774 }, { "epoch": 0.96, "grad_norm": 0.47202423214912415, "learning_rate": 0.00046145094765731793, "loss": 1.8763, "step": 28775 }, { "epoch": 0.96, "grad_norm": 0.4360429048538208, "learning_rate": 0.0004614421381348367, "loss": 1.8462, "step": 28776 }, { "epoch": 0.96, "grad_norm": 0.4566743075847626, "learning_rate": 0.0004614333284163886, "loss": 1.8063, "step": 28777 }, { "epoch": 0.96, "grad_norm": 0.4445491135120392, "learning_rate": 0.00046142451850198423, "loss": 1.7893, "step": 28778 }, { "epoch": 0.96, "grad_norm": 0.4335763156414032, "learning_rate": 0.00046141570839163426, "loss": 1.8579, "step": 28779 }, { "epoch": 0.96, "grad_norm": 0.43679794669151306, "learning_rate": 0.0004614068980853495, "loss": 1.8202, "step": 28780 }, { "epoch": 0.96, "grad_norm": 0.4407341778278351, "learning_rate": 0.00046139808758314036, "loss": 1.8884, "step": 28781 }, { "epoch": 0.96, "grad_norm": 0.4434197247028351, "learning_rate": 0.0004613892768850178, "loss": 1.777, "step": 28782 }, { "epoch": 0.96, "grad_norm": 0.43938836455345154, "learning_rate": 0.00046138046599099243, "loss": 1.8165, "step": 28783 }, { "epoch": 0.96, "grad_norm": 0.4426005184650421, "learning_rate": 0.000461371654901075, "loss": 1.7987, "step": 28784 }, { "epoch": 0.96, "grad_norm": 0.4449683129787445, "learning_rate": 0.00046136284361527615, "loss": 1.8228, "step": 28785 }, { "epoch": 0.96, "grad_norm": 0.43927112221717834, "learning_rate": 0.00046135403213360657, "loss": 1.7864, "step": 28786 }, { "epoch": 0.96, "grad_norm": 0.4445255398750305, "learning_rate": 0.000461345220456077, "loss": 1.7453, "step": 28787 }, { "epoch": 0.96, "grad_norm": 0.4417443871498108, "learning_rate": 0.00046133640858269796, "loss": 1.7916, "step": 28788 }, { "epoch": 0.96, "grad_norm": 0.4506852328777313, "learning_rate": 0.0004613275965134805, "loss": 1.8828, "step": 28789 }, { "epoch": 0.96, "grad_norm": 0.4389221668243408, "learning_rate": 0.000461318784248435, "loss": 1.7756, "step": 28790 }, { "epoch": 0.96, "grad_norm": 0.43820831179618835, "learning_rate": 0.00046130997178757226, "loss": 1.7989, "step": 28791 }, { "epoch": 0.96, "grad_norm": 0.444926381111145, "learning_rate": 0.00046130115913090296, "loss": 1.8176, "step": 28792 }, { "epoch": 0.96, "grad_norm": 0.4440613090991974, "learning_rate": 0.0004612923462784379, "loss": 1.7857, "step": 28793 }, { "epoch": 0.96, "grad_norm": 0.45443007349967957, "learning_rate": 0.00046128353323018765, "loss": 1.8343, "step": 28794 }, { "epoch": 0.96, "grad_norm": 0.44317948818206787, "learning_rate": 0.000461274719986163, "loss": 1.8778, "step": 28795 }, { "epoch": 0.96, "grad_norm": 0.4552336633205414, "learning_rate": 0.00046126590654637457, "loss": 1.7959, "step": 28796 }, { "epoch": 0.96, "grad_norm": 0.4420199692249298, "learning_rate": 0.00046125709291083307, "loss": 1.7715, "step": 28797 }, { "epoch": 0.96, "grad_norm": 0.44440898299217224, "learning_rate": 0.00046124827907954917, "loss": 1.8345, "step": 28798 }, { "epoch": 0.96, "grad_norm": 0.4400535523891449, "learning_rate": 0.0004612394650525337, "loss": 1.7875, "step": 28799 }, { "epoch": 0.96, "grad_norm": 0.4512125253677368, "learning_rate": 0.0004612306508297973, "loss": 1.7643, "step": 28800 }, { "epoch": 0.96, "grad_norm": 0.4369589388370514, "learning_rate": 0.00046122183641135066, "loss": 1.8032, "step": 28801 }, { "epoch": 0.96, "grad_norm": 0.4633082449436188, "learning_rate": 0.0004612130217972043, "loss": 1.795, "step": 28802 }, { "epoch": 0.96, "grad_norm": 0.42707571387290955, "learning_rate": 0.0004612042069873692, "loss": 1.7472, "step": 28803 }, { "epoch": 0.96, "grad_norm": 0.4476886987686157, "learning_rate": 0.00046119539198185596, "loss": 1.8436, "step": 28804 }, { "epoch": 0.96, "grad_norm": 0.4488101601600647, "learning_rate": 0.00046118657678067534, "loss": 1.8147, "step": 28805 }, { "epoch": 0.96, "grad_norm": 0.4636702835559845, "learning_rate": 0.00046117776138383777, "loss": 1.8105, "step": 28806 }, { "epoch": 0.96, "grad_norm": 0.45131900906562805, "learning_rate": 0.0004611689457913543, "loss": 1.863, "step": 28807 }, { "epoch": 0.96, "grad_norm": 0.4592798054218292, "learning_rate": 0.00046116013000323545, "loss": 1.8588, "step": 28808 }, { "epoch": 0.96, "grad_norm": 0.44826340675354004, "learning_rate": 0.00046115131401949186, "loss": 1.8718, "step": 28809 }, { "epoch": 0.96, "grad_norm": 0.45240986347198486, "learning_rate": 0.0004611424978401344, "loss": 1.8671, "step": 28810 }, { "epoch": 0.96, "grad_norm": 0.44919493794441223, "learning_rate": 0.00046113368146517364, "loss": 1.7779, "step": 28811 }, { "epoch": 0.96, "grad_norm": 0.47376009821891785, "learning_rate": 0.0004611248648946204, "loss": 1.7836, "step": 28812 }, { "epoch": 0.96, "grad_norm": 0.4637908935546875, "learning_rate": 0.0004611160481284852, "loss": 1.8209, "step": 28813 }, { "epoch": 0.96, "grad_norm": 0.4239393174648285, "learning_rate": 0.000461107231166779, "loss": 1.7708, "step": 28814 }, { "epoch": 0.96, "grad_norm": 0.4464113414287567, "learning_rate": 0.00046109841400951234, "loss": 1.8356, "step": 28815 }, { "epoch": 0.96, "grad_norm": 0.4517214894294739, "learning_rate": 0.00046108959665669587, "loss": 1.8215, "step": 28816 }, { "epoch": 0.96, "grad_norm": 0.49576231837272644, "learning_rate": 0.00046108077910834045, "loss": 1.7975, "step": 28817 }, { "epoch": 0.96, "grad_norm": 0.4660411775112152, "learning_rate": 0.00046107196136445654, "loss": 1.8688, "step": 28818 }, { "epoch": 0.96, "grad_norm": 0.4601931869983673, "learning_rate": 0.00046106314342505514, "loss": 1.8067, "step": 28819 }, { "epoch": 0.96, "grad_norm": 0.4482658803462982, "learning_rate": 0.00046105432529014673, "loss": 1.9069, "step": 28820 }, { "epoch": 0.96, "grad_norm": 0.46290984749794006, "learning_rate": 0.0004610455069597422, "loss": 1.8038, "step": 28821 }, { "epoch": 0.96, "grad_norm": 0.502999484539032, "learning_rate": 0.0004610366884338521, "loss": 1.833, "step": 28822 }, { "epoch": 0.96, "grad_norm": 0.48452579975128174, "learning_rate": 0.00046102786971248717, "loss": 1.9472, "step": 28823 }, { "epoch": 0.96, "grad_norm": 0.4332911968231201, "learning_rate": 0.00046101905079565816, "loss": 1.8531, "step": 28824 }, { "epoch": 0.96, "grad_norm": 0.45818567276000977, "learning_rate": 0.0004610102316833757, "loss": 1.855, "step": 28825 }, { "epoch": 0.96, "grad_norm": 0.45736566185951233, "learning_rate": 0.00046100141237565066, "loss": 1.8691, "step": 28826 }, { "epoch": 0.96, "grad_norm": 0.44998645782470703, "learning_rate": 0.0004609925928724935, "loss": 1.7964, "step": 28827 }, { "epoch": 0.96, "grad_norm": 0.45410069823265076, "learning_rate": 0.0004609837731739151, "loss": 1.792, "step": 28828 }, { "epoch": 0.96, "grad_norm": 0.43431395292282104, "learning_rate": 0.0004609749532799261, "loss": 1.8612, "step": 28829 }, { "epoch": 0.96, "grad_norm": 0.4719969630241394, "learning_rate": 0.00046096613319053726, "loss": 1.8354, "step": 28830 }, { "epoch": 0.96, "grad_norm": 0.46573498845100403, "learning_rate": 0.00046095731290575926, "loss": 1.8596, "step": 28831 }, { "epoch": 0.96, "grad_norm": 0.4711655080318451, "learning_rate": 0.00046094849242560277, "loss": 1.8339, "step": 28832 }, { "epoch": 0.96, "grad_norm": 0.44811195135116577, "learning_rate": 0.0004609396717500785, "loss": 1.8395, "step": 28833 }, { "epoch": 0.96, "grad_norm": 0.45619237422943115, "learning_rate": 0.00046093085087919725, "loss": 1.7924, "step": 28834 }, { "epoch": 0.96, "grad_norm": 0.46277162432670593, "learning_rate": 0.0004609220298129696, "loss": 1.8814, "step": 28835 }, { "epoch": 0.96, "grad_norm": 0.45805567502975464, "learning_rate": 0.0004609132085514063, "loss": 1.7693, "step": 28836 }, { "epoch": 0.96, "grad_norm": 0.4602866768836975, "learning_rate": 0.00046090438709451816, "loss": 1.8034, "step": 28837 }, { "epoch": 0.96, "grad_norm": 0.4528927803039551, "learning_rate": 0.0004608955654423157, "loss": 1.807, "step": 28838 }, { "epoch": 0.96, "grad_norm": 0.4529561698436737, "learning_rate": 0.00046088674359480984, "loss": 1.8237, "step": 28839 }, { "epoch": 0.96, "grad_norm": 1.3702912330627441, "learning_rate": 0.00046087792155201113, "loss": 1.8546, "step": 28840 }, { "epoch": 0.96, "grad_norm": 0.46034637093544006, "learning_rate": 0.0004608690993139303, "loss": 1.7618, "step": 28841 }, { "epoch": 0.96, "grad_norm": 0.4431861937046051, "learning_rate": 0.00046086027688057815, "loss": 1.7673, "step": 28842 }, { "epoch": 0.96, "grad_norm": 0.4348476231098175, "learning_rate": 0.0004608514542519652, "loss": 1.7879, "step": 28843 }, { "epoch": 0.96, "grad_norm": 0.45529863238334656, "learning_rate": 0.00046084263142810245, "loss": 1.814, "step": 28844 }, { "epoch": 0.96, "grad_norm": 0.46353206038475037, "learning_rate": 0.00046083380840900034, "loss": 1.8204, "step": 28845 }, { "epoch": 0.96, "grad_norm": 0.42677077651023865, "learning_rate": 0.0004608249851946697, "loss": 1.7542, "step": 28846 }, { "epoch": 0.96, "grad_norm": 0.4517234265804291, "learning_rate": 0.00046081616178512123, "loss": 1.8052, "step": 28847 }, { "epoch": 0.96, "grad_norm": 0.45422351360321045, "learning_rate": 0.0004608073381803656, "loss": 1.8185, "step": 28848 }, { "epoch": 0.96, "grad_norm": 0.46134153008461, "learning_rate": 0.0004607985143804137, "loss": 1.7757, "step": 28849 }, { "epoch": 0.96, "grad_norm": 0.44095486402511597, "learning_rate": 0.00046078969038527593, "loss": 1.8501, "step": 28850 }, { "epoch": 0.96, "grad_norm": 0.4422442615032196, "learning_rate": 0.0004607808661949632, "loss": 1.9054, "step": 28851 }, { "epoch": 0.96, "grad_norm": 0.44759997725486755, "learning_rate": 0.0004607720418094862, "loss": 1.8357, "step": 28852 }, { "epoch": 0.96, "grad_norm": 0.4666880667209625, "learning_rate": 0.0004607632172288557, "loss": 1.8052, "step": 28853 }, { "epoch": 0.96, "grad_norm": 0.44474121928215027, "learning_rate": 0.00046075439245308225, "loss": 1.8114, "step": 28854 }, { "epoch": 0.96, "grad_norm": 0.44272249937057495, "learning_rate": 0.0004607455674821767, "loss": 1.7817, "step": 28855 }, { "epoch": 0.96, "grad_norm": 0.45050469040870667, "learning_rate": 0.0004607367423161497, "loss": 1.6921, "step": 28856 }, { "epoch": 0.96, "grad_norm": 0.47570130228996277, "learning_rate": 0.000460727916955012, "loss": 1.7939, "step": 28857 }, { "epoch": 0.96, "grad_norm": 0.43280214071273804, "learning_rate": 0.00046071909139877424, "loss": 1.7827, "step": 28858 }, { "epoch": 0.96, "grad_norm": 0.4596281945705414, "learning_rate": 0.0004607102656474472, "loss": 1.7339, "step": 28859 }, { "epoch": 0.96, "grad_norm": 0.45883750915527344, "learning_rate": 0.00046070143970104163, "loss": 1.8365, "step": 28860 }, { "epoch": 0.96, "grad_norm": 0.44088616967201233, "learning_rate": 0.00046069261355956813, "loss": 1.7364, "step": 28861 }, { "epoch": 0.96, "grad_norm": 0.4541932940483093, "learning_rate": 0.00046068378722303755, "loss": 1.7725, "step": 28862 }, { "epoch": 0.96, "grad_norm": 0.463426798582077, "learning_rate": 0.00046067496069146047, "loss": 1.7776, "step": 28863 }, { "epoch": 0.96, "grad_norm": 0.45970919728279114, "learning_rate": 0.00046066613396484766, "loss": 1.7967, "step": 28864 }, { "epoch": 0.96, "grad_norm": 0.45077770948410034, "learning_rate": 0.0004606573070432098, "loss": 1.8063, "step": 28865 }, { "epoch": 0.96, "grad_norm": 0.44769611954689026, "learning_rate": 0.0004606484799265577, "loss": 1.8541, "step": 28866 }, { "epoch": 0.96, "grad_norm": 0.47954022884368896, "learning_rate": 0.0004606396526149021, "loss": 1.8387, "step": 28867 }, { "epoch": 0.96, "grad_norm": 0.47004538774490356, "learning_rate": 0.00046063082510825344, "loss": 1.8429, "step": 28868 }, { "epoch": 0.96, "grad_norm": 0.4493981897830963, "learning_rate": 0.00046062199740662277, "loss": 1.8726, "step": 28869 }, { "epoch": 0.96, "grad_norm": 0.4444775879383087, "learning_rate": 0.0004606131695100206, "loss": 1.8394, "step": 28870 }, { "epoch": 0.96, "grad_norm": 0.5296658277511597, "learning_rate": 0.00046060434141845767, "loss": 1.7923, "step": 28871 }, { "epoch": 0.96, "grad_norm": 0.4629746079444885, "learning_rate": 0.00046059551313194487, "loss": 1.7893, "step": 28872 }, { "epoch": 0.96, "grad_norm": 0.4426187574863434, "learning_rate": 0.0004605866846504926, "loss": 1.7601, "step": 28873 }, { "epoch": 0.96, "grad_norm": 0.4456852078437805, "learning_rate": 0.0004605778559741119, "loss": 1.7177, "step": 28874 }, { "epoch": 0.96, "grad_norm": 0.47196656465530396, "learning_rate": 0.00046056902710281324, "loss": 1.7781, "step": 28875 }, { "epoch": 0.96, "grad_norm": 0.4532467722892761, "learning_rate": 0.0004605601980366075, "loss": 1.8436, "step": 28876 }, { "epoch": 0.96, "grad_norm": 0.4564567804336548, "learning_rate": 0.0004605513687755053, "loss": 1.7707, "step": 28877 }, { "epoch": 0.96, "grad_norm": 0.44840916991233826, "learning_rate": 0.00046054253931951747, "loss": 1.7983, "step": 28878 }, { "epoch": 0.96, "grad_norm": 0.45726442337036133, "learning_rate": 0.0004605337096686546, "loss": 1.8531, "step": 28879 }, { "epoch": 0.96, "grad_norm": 0.471979022026062, "learning_rate": 0.0004605248798229275, "loss": 1.7922, "step": 28880 }, { "epoch": 0.96, "grad_norm": 0.4689830243587494, "learning_rate": 0.0004605160497823468, "loss": 1.8411, "step": 28881 }, { "epoch": 0.96, "grad_norm": 0.44287243485450745, "learning_rate": 0.0004605072195469232, "loss": 1.8148, "step": 28882 }, { "epoch": 0.96, "grad_norm": 0.4404311180114746, "learning_rate": 0.0004604983891166677, "loss": 1.7974, "step": 28883 }, { "epoch": 0.96, "grad_norm": 0.43537282943725586, "learning_rate": 0.00046048955849159055, "loss": 1.8003, "step": 28884 }, { "epoch": 0.96, "grad_norm": 0.4510868787765503, "learning_rate": 0.0004604807276717029, "loss": 1.7994, "step": 28885 }, { "epoch": 0.96, "grad_norm": 0.4707152545452118, "learning_rate": 0.0004604718966570152, "loss": 1.7743, "step": 28886 }, { "epoch": 0.96, "grad_norm": 0.4492764174938202, "learning_rate": 0.0004604630654475383, "loss": 1.8441, "step": 28887 }, { "epoch": 0.96, "grad_norm": 0.4566868543624878, "learning_rate": 0.00046045423404328286, "loss": 1.8342, "step": 28888 }, { "epoch": 0.96, "grad_norm": 0.43798500299453735, "learning_rate": 0.0004604454024442597, "loss": 1.8156, "step": 28889 }, { "epoch": 0.96, "grad_norm": 0.477976530790329, "learning_rate": 0.0004604365706504794, "loss": 1.7436, "step": 28890 }, { "epoch": 0.96, "grad_norm": 0.445831835269928, "learning_rate": 0.00046042773866195275, "loss": 1.8236, "step": 28891 }, { "epoch": 0.96, "grad_norm": 0.452063649892807, "learning_rate": 0.00046041890647869043, "loss": 1.7373, "step": 28892 }, { "epoch": 0.96, "grad_norm": 0.45135676860809326, "learning_rate": 0.0004604100741007032, "loss": 1.8144, "step": 28893 }, { "epoch": 0.96, "grad_norm": 0.48817160725593567, "learning_rate": 0.0004604012415280018, "loss": 1.8194, "step": 28894 }, { "epoch": 0.96, "grad_norm": 0.4500655233860016, "learning_rate": 0.00046039240876059694, "loss": 1.8751, "step": 28895 }, { "epoch": 0.96, "grad_norm": 0.4329052269458771, "learning_rate": 0.0004603835757984993, "loss": 1.8271, "step": 28896 }, { "epoch": 0.96, "grad_norm": 0.4900113046169281, "learning_rate": 0.0004603747426417197, "loss": 1.8135, "step": 28897 }, { "epoch": 0.96, "grad_norm": 0.46722760796546936, "learning_rate": 0.00046036590929026864, "loss": 1.776, "step": 28898 }, { "epoch": 0.96, "grad_norm": 0.43139320611953735, "learning_rate": 0.00046035707574415715, "loss": 1.7918, "step": 28899 }, { "epoch": 0.96, "grad_norm": 0.4367241859436035, "learning_rate": 0.00046034824200339564, "loss": 1.7871, "step": 28900 }, { "epoch": 0.96, "grad_norm": 0.44849762320518494, "learning_rate": 0.0004603394080679951, "loss": 1.7271, "step": 28901 }, { "epoch": 0.96, "grad_norm": 0.44499483704566956, "learning_rate": 0.00046033057393796617, "loss": 1.7519, "step": 28902 }, { "epoch": 0.96, "grad_norm": 0.4434888958930969, "learning_rate": 0.00046032173961331956, "loss": 1.8311, "step": 28903 }, { "epoch": 0.96, "grad_norm": 0.44555631279945374, "learning_rate": 0.00046031290509406593, "loss": 1.7207, "step": 28904 }, { "epoch": 0.96, "grad_norm": 0.4564618468284607, "learning_rate": 0.00046030407038021595, "loss": 1.7944, "step": 28905 }, { "epoch": 0.96, "grad_norm": 0.4361833333969116, "learning_rate": 0.0004602952354717806, "loss": 1.8115, "step": 28906 }, { "epoch": 0.96, "grad_norm": 0.46709156036376953, "learning_rate": 0.0004602864003687705, "loss": 1.8034, "step": 28907 }, { "epoch": 0.96, "grad_norm": 0.43144747614860535, "learning_rate": 0.0004602775650711961, "loss": 1.8544, "step": 28908 }, { "epoch": 0.96, "grad_norm": 0.44285863637924194, "learning_rate": 0.0004602687295790685, "loss": 1.8455, "step": 28909 }, { "epoch": 0.96, "grad_norm": 0.42777130007743835, "learning_rate": 0.0004602598938923983, "loss": 1.7842, "step": 28910 }, { "epoch": 0.96, "grad_norm": 0.44556814432144165, "learning_rate": 0.0004602510580111962, "loss": 1.7998, "step": 28911 }, { "epoch": 0.96, "grad_norm": 0.4560120701789856, "learning_rate": 0.00046024222193547285, "loss": 1.778, "step": 28912 }, { "epoch": 0.96, "grad_norm": 0.6294575333595276, "learning_rate": 0.00046023338566523905, "loss": 1.7957, "step": 28913 }, { "epoch": 0.96, "grad_norm": 0.4369116425514221, "learning_rate": 0.0004602245492005056, "loss": 1.7783, "step": 28914 }, { "epoch": 0.96, "grad_norm": 0.466320663690567, "learning_rate": 0.0004602157125412831, "loss": 1.8058, "step": 28915 }, { "epoch": 0.96, "grad_norm": 0.4580272138118744, "learning_rate": 0.0004602068756875824, "loss": 1.7816, "step": 28916 }, { "epoch": 0.96, "grad_norm": 0.4600016176700592, "learning_rate": 0.0004601980386394141, "loss": 1.7921, "step": 28917 }, { "epoch": 0.96, "grad_norm": 0.4487718641757965, "learning_rate": 0.000460189201396789, "loss": 1.884, "step": 28918 }, { "epoch": 0.96, "grad_norm": 0.4596138894557953, "learning_rate": 0.0004601803639597178, "loss": 1.841, "step": 28919 }, { "epoch": 0.96, "grad_norm": 0.4579266607761383, "learning_rate": 0.00046017152632821126, "loss": 1.8647, "step": 28920 }, { "epoch": 0.96, "grad_norm": 0.44246649742126465, "learning_rate": 0.00046016268850228005, "loss": 1.789, "step": 28921 }, { "epoch": 0.96, "grad_norm": 0.44642093777656555, "learning_rate": 0.000460153850481935, "loss": 1.805, "step": 28922 }, { "epoch": 0.96, "grad_norm": 0.46025562286376953, "learning_rate": 0.0004601450122671867, "loss": 1.839, "step": 28923 }, { "epoch": 0.96, "grad_norm": 0.4561454653739929, "learning_rate": 0.00046013617385804606, "loss": 1.8566, "step": 28924 }, { "epoch": 0.96, "grad_norm": 0.47015300393104553, "learning_rate": 0.0004601273352545236, "loss": 1.8273, "step": 28925 }, { "epoch": 0.96, "grad_norm": 0.43647998571395874, "learning_rate": 0.0004601184964566302, "loss": 1.7061, "step": 28926 }, { "epoch": 0.96, "grad_norm": 0.44238927960395813, "learning_rate": 0.0004601096574643765, "loss": 1.8187, "step": 28927 }, { "epoch": 0.96, "grad_norm": 0.45804524421691895, "learning_rate": 0.00046010081827777325, "loss": 1.8481, "step": 28928 }, { "epoch": 0.96, "grad_norm": 0.4517088830471039, "learning_rate": 0.00046009197889683125, "loss": 1.797, "step": 28929 }, { "epoch": 0.96, "grad_norm": 0.460649311542511, "learning_rate": 0.0004600831393215611, "loss": 1.8207, "step": 28930 }, { "epoch": 0.96, "grad_norm": 0.45665955543518066, "learning_rate": 0.0004600742995519737, "loss": 1.7965, "step": 28931 }, { "epoch": 0.96, "grad_norm": 0.4366024136543274, "learning_rate": 0.0004600654595880797, "loss": 1.8364, "step": 28932 }, { "epoch": 0.96, "grad_norm": 0.5271813273429871, "learning_rate": 0.0004600566194298897, "loss": 1.7331, "step": 28933 }, { "epoch": 0.96, "grad_norm": 0.44692304730415344, "learning_rate": 0.0004600477790774146, "loss": 1.8033, "step": 28934 }, { "epoch": 0.96, "grad_norm": 0.4441262483596802, "learning_rate": 0.0004600389385306651, "loss": 1.8046, "step": 28935 }, { "epoch": 0.96, "grad_norm": 0.44797253608703613, "learning_rate": 0.00046003009778965197, "loss": 1.8068, "step": 28936 }, { "epoch": 0.96, "grad_norm": 0.4405076205730438, "learning_rate": 0.00046002125685438576, "loss": 1.7454, "step": 28937 }, { "epoch": 0.96, "grad_norm": 0.44572991132736206, "learning_rate": 0.0004600124157248773, "loss": 1.7868, "step": 28938 }, { "epoch": 0.96, "grad_norm": 0.44396522641181946, "learning_rate": 0.00046000357440113745, "loss": 1.802, "step": 28939 }, { "epoch": 0.96, "grad_norm": 0.4382980465888977, "learning_rate": 0.0004599947328831768, "loss": 1.7874, "step": 28940 }, { "epoch": 0.96, "grad_norm": 0.4333558976650238, "learning_rate": 0.0004599858911710061, "loss": 1.7565, "step": 28941 }, { "epoch": 0.96, "grad_norm": 0.42917510867118835, "learning_rate": 0.00045997704926463613, "loss": 1.729, "step": 28942 }, { "epoch": 0.96, "grad_norm": 0.4778095483779907, "learning_rate": 0.0004599682071640776, "loss": 1.8558, "step": 28943 }, { "epoch": 0.96, "grad_norm": 0.46361497044563293, "learning_rate": 0.0004599593648693412, "loss": 1.8109, "step": 28944 }, { "epoch": 0.96, "grad_norm": 0.4438549876213074, "learning_rate": 0.0004599505223804377, "loss": 1.8001, "step": 28945 }, { "epoch": 0.96, "grad_norm": 0.43589723110198975, "learning_rate": 0.0004599416796973779, "loss": 1.868, "step": 28946 }, { "epoch": 0.96, "grad_norm": 0.4506895840167999, "learning_rate": 0.0004599328368201724, "loss": 1.7183, "step": 28947 }, { "epoch": 0.96, "grad_norm": 1.7920526266098022, "learning_rate": 0.00045992399374883197, "loss": 1.9466, "step": 28948 }, { "epoch": 0.96, "grad_norm": 0.46257343888282776, "learning_rate": 0.00045991515048336755, "loss": 1.8405, "step": 28949 }, { "epoch": 0.96, "grad_norm": 0.43714413046836853, "learning_rate": 0.0004599063070237896, "loss": 1.7764, "step": 28950 }, { "epoch": 0.96, "grad_norm": 0.4412393569946289, "learning_rate": 0.00045989746337010885, "loss": 1.8074, "step": 28951 }, { "epoch": 0.96, "grad_norm": 0.4487318694591522, "learning_rate": 0.0004598886195223362, "loss": 1.8602, "step": 28952 }, { "epoch": 0.96, "grad_norm": 0.4330638647079468, "learning_rate": 0.00045987977548048235, "loss": 1.7831, "step": 28953 }, { "epoch": 0.96, "grad_norm": 0.46101266145706177, "learning_rate": 0.00045987093124455806, "loss": 1.772, "step": 28954 }, { "epoch": 0.96, "grad_norm": 0.4615112245082855, "learning_rate": 0.0004598620868145739, "loss": 1.7877, "step": 28955 }, { "epoch": 0.96, "grad_norm": 0.4494149088859558, "learning_rate": 0.0004598532421905409, "loss": 1.8486, "step": 28956 }, { "epoch": 0.96, "grad_norm": 0.42446133494377136, "learning_rate": 0.00045984439737246945, "loss": 1.8163, "step": 28957 }, { "epoch": 0.96, "grad_norm": 0.4614197313785553, "learning_rate": 0.0004598355523603704, "loss": 1.7924, "step": 28958 }, { "epoch": 0.96, "grad_norm": 0.4453183114528656, "learning_rate": 0.00045982670715425473, "loss": 1.7438, "step": 28959 }, { "epoch": 0.96, "grad_norm": 0.4550878703594208, "learning_rate": 0.00045981786175413285, "loss": 1.7498, "step": 28960 }, { "epoch": 0.96, "grad_norm": 0.45940840244293213, "learning_rate": 0.00045980901616001575, "loss": 1.7763, "step": 28961 }, { "epoch": 0.96, "grad_norm": 0.4571327269077301, "learning_rate": 0.00045980017037191385, "loss": 1.7732, "step": 28962 }, { "epoch": 0.96, "grad_norm": 0.44134923815727234, "learning_rate": 0.00045979132438983837, "loss": 1.847, "step": 28963 }, { "epoch": 0.96, "grad_norm": 0.42990991473197937, "learning_rate": 0.0004597824782137996, "loss": 1.7636, "step": 28964 }, { "epoch": 0.96, "grad_norm": 0.4353790283203125, "learning_rate": 0.00045977363184380836, "loss": 1.7907, "step": 28965 }, { "epoch": 0.96, "grad_norm": 0.4598134160041809, "learning_rate": 0.0004597647852798756, "loss": 1.817, "step": 28966 }, { "epoch": 0.96, "grad_norm": 0.448869913816452, "learning_rate": 0.0004597559385220119, "loss": 1.9159, "step": 28967 }, { "epoch": 0.96, "grad_norm": 0.44336599111557007, "learning_rate": 0.00045974709157022806, "loss": 1.8135, "step": 28968 }, { "epoch": 0.96, "grad_norm": 0.43464726209640503, "learning_rate": 0.0004597382444245348, "loss": 1.783, "step": 28969 }, { "epoch": 0.96, "grad_norm": 0.44162699580192566, "learning_rate": 0.0004597293970849428, "loss": 1.8375, "step": 28970 }, { "epoch": 0.96, "grad_norm": 0.44219970703125, "learning_rate": 0.0004597205495514628, "loss": 1.8218, "step": 28971 }, { "epoch": 0.96, "grad_norm": 0.4564480483531952, "learning_rate": 0.0004597117018241057, "loss": 1.7926, "step": 28972 }, { "epoch": 0.96, "grad_norm": 0.4717313349246979, "learning_rate": 0.0004597028539028821, "loss": 1.8931, "step": 28973 }, { "epoch": 0.96, "grad_norm": 0.5639367699623108, "learning_rate": 0.0004596940057878027, "loss": 1.8274, "step": 28974 }, { "epoch": 0.96, "grad_norm": 0.42979586124420166, "learning_rate": 0.00045968515747887834, "loss": 1.8268, "step": 28975 }, { "epoch": 0.96, "grad_norm": 0.4523683786392212, "learning_rate": 0.00045967630897611976, "loss": 1.8259, "step": 28976 }, { "epoch": 0.96, "grad_norm": 0.441967248916626, "learning_rate": 0.0004596674602795376, "loss": 1.7646, "step": 28977 }, { "epoch": 0.96, "grad_norm": 0.44477617740631104, "learning_rate": 0.0004596586113891427, "loss": 1.7805, "step": 28978 }, { "epoch": 0.96, "grad_norm": 0.44544804096221924, "learning_rate": 0.00045964976230494585, "loss": 1.7991, "step": 28979 }, { "epoch": 0.96, "grad_norm": 0.4662410318851471, "learning_rate": 0.0004596409130269576, "loss": 1.7731, "step": 28980 }, { "epoch": 0.96, "grad_norm": 0.4544956684112549, "learning_rate": 0.0004596320635551889, "loss": 1.7557, "step": 28981 }, { "epoch": 0.96, "grad_norm": 0.44861459732055664, "learning_rate": 0.00045962321388965034, "loss": 1.7756, "step": 28982 }, { "epoch": 0.96, "grad_norm": 0.4468103349208832, "learning_rate": 0.00045961436403035274, "loss": 1.8367, "step": 28983 }, { "epoch": 0.96, "grad_norm": 0.4706222712993622, "learning_rate": 0.00045960551397730676, "loss": 1.7689, "step": 28984 }, { "epoch": 0.96, "grad_norm": 0.4469781219959259, "learning_rate": 0.0004595966637305232, "loss": 1.8229, "step": 28985 }, { "epoch": 0.96, "grad_norm": 0.4708239436149597, "learning_rate": 0.0004595878132900129, "loss": 1.7623, "step": 28986 }, { "epoch": 0.96, "grad_norm": 0.4377118945121765, "learning_rate": 0.00045957896265578645, "loss": 1.7699, "step": 28987 }, { "epoch": 0.96, "grad_norm": 0.4326587915420532, "learning_rate": 0.0004595701118278546, "loss": 1.8487, "step": 28988 }, { "epoch": 0.96, "grad_norm": 0.4414699375629425, "learning_rate": 0.0004595612608062283, "loss": 1.7662, "step": 28989 }, { "epoch": 0.96, "grad_norm": 0.495625764131546, "learning_rate": 0.000459552409590918, "loss": 1.784, "step": 28990 }, { "epoch": 0.96, "grad_norm": 0.44751983880996704, "learning_rate": 0.0004595435581819347, "loss": 1.8368, "step": 28991 }, { "epoch": 0.96, "grad_norm": 0.44473472237586975, "learning_rate": 0.0004595347065792889, "loss": 1.7588, "step": 28992 }, { "epoch": 0.96, "grad_norm": 0.4557173252105713, "learning_rate": 0.0004595258547829915, "loss": 1.8048, "step": 28993 }, { "epoch": 0.96, "grad_norm": 0.4458679258823395, "learning_rate": 0.0004595170027930532, "loss": 1.8536, "step": 28994 }, { "epoch": 0.96, "grad_norm": 0.461901992559433, "learning_rate": 0.00045950815060948486, "loss": 1.8371, "step": 28995 }, { "epoch": 0.96, "grad_norm": 0.4351021349430084, "learning_rate": 0.0004594992982322971, "loss": 1.7025, "step": 28996 }, { "epoch": 0.96, "grad_norm": 0.4582234025001526, "learning_rate": 0.0004594904456615006, "loss": 1.7651, "step": 28997 }, { "epoch": 0.96, "grad_norm": 0.4462772309780121, "learning_rate": 0.00045948159289710633, "loss": 1.8081, "step": 28998 }, { "epoch": 0.96, "grad_norm": 0.4538542330265045, "learning_rate": 0.00045947273993912483, "loss": 1.7387, "step": 28999 }, { "epoch": 0.96, "grad_norm": 0.44161155819892883, "learning_rate": 0.0004594638867875669, "loss": 1.8395, "step": 29000 }, { "epoch": 0.96, "grad_norm": 0.4487752616405487, "learning_rate": 0.00045945503344244337, "loss": 1.8464, "step": 29001 }, { "epoch": 0.96, "grad_norm": 0.44258323311805725, "learning_rate": 0.00045944617990376486, "loss": 1.7637, "step": 29002 }, { "epoch": 0.96, "grad_norm": 0.4389348328113556, "learning_rate": 0.00045943732617154223, "loss": 1.7889, "step": 29003 }, { "epoch": 0.96, "grad_norm": 0.44421952962875366, "learning_rate": 0.00045942847224578616, "loss": 1.8754, "step": 29004 }, { "epoch": 0.96, "grad_norm": 0.43857690691947937, "learning_rate": 0.0004594196181265074, "loss": 1.843, "step": 29005 }, { "epoch": 0.97, "grad_norm": 0.4351924955844879, "learning_rate": 0.00045941076381371675, "loss": 1.7961, "step": 29006 }, { "epoch": 0.97, "grad_norm": 0.44628944993019104, "learning_rate": 0.00045940190930742494, "loss": 1.8804, "step": 29007 }, { "epoch": 0.97, "grad_norm": 0.4844115674495697, "learning_rate": 0.0004593930546076426, "loss": 1.859, "step": 29008 }, { "epoch": 0.97, "grad_norm": 0.45127183198928833, "learning_rate": 0.00045938419971438074, "loss": 1.7519, "step": 29009 }, { "epoch": 0.97, "grad_norm": 0.4529608190059662, "learning_rate": 0.0004593753446276497, "loss": 1.8172, "step": 29010 }, { "epoch": 0.97, "grad_norm": 0.44484391808509827, "learning_rate": 0.0004593664893474607, "loss": 1.7683, "step": 29011 }, { "epoch": 0.97, "grad_norm": 0.46044906973838806, "learning_rate": 0.0004593576338738242, "loss": 1.8227, "step": 29012 }, { "epoch": 0.97, "grad_norm": 0.44696173071861267, "learning_rate": 0.00045934877820675093, "loss": 1.8672, "step": 29013 }, { "epoch": 0.97, "grad_norm": 0.45962974429130554, "learning_rate": 0.0004593399223462519, "loss": 1.7913, "step": 29014 }, { "epoch": 0.97, "grad_norm": 0.45739859342575073, "learning_rate": 0.0004593310662923375, "loss": 1.8037, "step": 29015 }, { "epoch": 0.97, "grad_norm": 0.4310673177242279, "learning_rate": 0.0004593222100450187, "loss": 1.8077, "step": 29016 }, { "epoch": 0.97, "grad_norm": 0.45387232303619385, "learning_rate": 0.0004593133536043062, "loss": 1.729, "step": 29017 }, { "epoch": 0.97, "grad_norm": 0.43409714102745056, "learning_rate": 0.00045930449697021087, "loss": 1.8353, "step": 29018 }, { "epoch": 0.97, "grad_norm": 0.4563089907169342, "learning_rate": 0.0004592956401427433, "loss": 1.7745, "step": 29019 }, { "epoch": 0.97, "grad_norm": 0.43090197443962097, "learning_rate": 0.0004592867831219143, "loss": 1.8551, "step": 29020 }, { "epoch": 0.97, "grad_norm": 0.4314652383327484, "learning_rate": 0.0004592779259077346, "loss": 1.7088, "step": 29021 }, { "epoch": 0.97, "grad_norm": 0.4461835026741028, "learning_rate": 0.0004592690685002149, "loss": 1.8217, "step": 29022 }, { "epoch": 0.97, "grad_norm": 0.4603424668312073, "learning_rate": 0.00045926021089936613, "loss": 1.8877, "step": 29023 }, { "epoch": 0.97, "grad_norm": 0.4457383155822754, "learning_rate": 0.0004592513531051989, "loss": 1.8088, "step": 29024 }, { "epoch": 0.97, "grad_norm": 0.45345473289489746, "learning_rate": 0.0004592424951177239, "loss": 1.8069, "step": 29025 }, { "epoch": 0.97, "grad_norm": 0.45118585228919983, "learning_rate": 0.0004592336369369521, "loss": 1.8262, "step": 29026 }, { "epoch": 0.97, "grad_norm": 0.43957456946372986, "learning_rate": 0.00045922477856289405, "loss": 1.7528, "step": 29027 }, { "epoch": 0.97, "grad_norm": 0.4392201900482178, "learning_rate": 0.00045921591999556065, "loss": 1.778, "step": 29028 }, { "epoch": 0.97, "grad_norm": 0.4492070972919464, "learning_rate": 0.0004592070612349625, "loss": 1.8329, "step": 29029 }, { "epoch": 0.97, "grad_norm": 0.43854621052742004, "learning_rate": 0.00045919820228111055, "loss": 1.8139, "step": 29030 }, { "epoch": 0.97, "grad_norm": 0.5830960273742676, "learning_rate": 0.00045918934313401527, "loss": 1.841, "step": 29031 }, { "epoch": 0.97, "grad_norm": 0.4415137469768524, "learning_rate": 0.0004591804837936877, "loss": 1.7893, "step": 29032 }, { "epoch": 0.97, "grad_norm": 0.44585901498794556, "learning_rate": 0.00045917162426013845, "loss": 1.8591, "step": 29033 }, { "epoch": 0.97, "grad_norm": 0.4578116834163666, "learning_rate": 0.0004591627645333784, "loss": 1.853, "step": 29034 }, { "epoch": 0.97, "grad_norm": 0.4657333791255951, "learning_rate": 0.000459153904613418, "loss": 1.7631, "step": 29035 }, { "epoch": 0.97, "grad_norm": 0.45075181126594543, "learning_rate": 0.0004591450445002683, "loss": 1.7896, "step": 29036 }, { "epoch": 0.97, "grad_norm": 0.4437636733055115, "learning_rate": 0.00045913618419394, "loss": 1.7959, "step": 29037 }, { "epoch": 0.97, "grad_norm": 0.4385814964771271, "learning_rate": 0.00045912732369444385, "loss": 1.8049, "step": 29038 }, { "epoch": 0.97, "grad_norm": 0.46267110109329224, "learning_rate": 0.0004591184630017905, "loss": 1.8463, "step": 29039 }, { "epoch": 0.97, "grad_norm": 0.46603697538375854, "learning_rate": 0.00045910960211599077, "loss": 1.8768, "step": 29040 }, { "epoch": 0.97, "grad_norm": 0.44971275329589844, "learning_rate": 0.0004591007410370555, "loss": 1.8397, "step": 29041 }, { "epoch": 0.97, "grad_norm": 0.45919275283813477, "learning_rate": 0.00045909187976499535, "loss": 1.8275, "step": 29042 }, { "epoch": 0.97, "grad_norm": 0.47247979044914246, "learning_rate": 0.000459083018299821, "loss": 1.7755, "step": 29043 }, { "epoch": 0.97, "grad_norm": 0.44274911284446716, "learning_rate": 0.0004590741566415435, "loss": 1.8153, "step": 29044 }, { "epoch": 0.97, "grad_norm": 0.4536496698856354, "learning_rate": 0.00045906529479017327, "loss": 1.8117, "step": 29045 }, { "epoch": 0.97, "grad_norm": 0.463256299495697, "learning_rate": 0.00045905643274572126, "loss": 1.8121, "step": 29046 }, { "epoch": 0.97, "grad_norm": 0.45165959000587463, "learning_rate": 0.0004590475705081981, "loss": 1.8159, "step": 29047 }, { "epoch": 0.97, "grad_norm": 0.4498838186264038, "learning_rate": 0.00045903870807761476, "loss": 1.8128, "step": 29048 }, { "epoch": 0.97, "grad_norm": 0.44991108775138855, "learning_rate": 0.0004590298454539818, "loss": 1.8084, "step": 29049 }, { "epoch": 0.97, "grad_norm": 0.4407051205635071, "learning_rate": 0.00045902098263730993, "loss": 1.8161, "step": 29050 }, { "epoch": 0.97, "grad_norm": 0.44465577602386475, "learning_rate": 0.0004590121196276102, "loss": 1.821, "step": 29051 }, { "epoch": 0.97, "grad_norm": 0.4405517280101776, "learning_rate": 0.000459003256424893, "loss": 1.8166, "step": 29052 }, { "epoch": 0.97, "grad_norm": 0.44296717643737793, "learning_rate": 0.0004589943930291695, "loss": 1.7394, "step": 29053 }, { "epoch": 0.97, "grad_norm": 0.46566176414489746, "learning_rate": 0.0004589855294404501, "loss": 1.8049, "step": 29054 }, { "epoch": 0.97, "grad_norm": 0.44367772340774536, "learning_rate": 0.00045897666565874567, "loss": 1.7246, "step": 29055 }, { "epoch": 0.97, "grad_norm": 0.45305734872817993, "learning_rate": 0.000458967801684067, "loss": 1.841, "step": 29056 }, { "epoch": 0.97, "grad_norm": 0.4572519063949585, "learning_rate": 0.0004589589375164249, "loss": 1.7922, "step": 29057 }, { "epoch": 0.97, "grad_norm": 0.45243239402770996, "learning_rate": 0.00045895007315583004, "loss": 1.8183, "step": 29058 }, { "epoch": 0.97, "grad_norm": 0.4316883683204651, "learning_rate": 0.00045894120860229324, "loss": 1.777, "step": 29059 }, { "epoch": 0.97, "grad_norm": 0.44095754623413086, "learning_rate": 0.0004589323438558251, "loss": 1.7784, "step": 29060 }, { "epoch": 0.97, "grad_norm": 0.5584191679954529, "learning_rate": 0.00045892347891643664, "loss": 1.7833, "step": 29061 }, { "epoch": 0.97, "grad_norm": 0.4833942949771881, "learning_rate": 0.00045891461378413847, "loss": 1.8064, "step": 29062 }, { "epoch": 0.97, "grad_norm": 0.4423753321170807, "learning_rate": 0.0004589057484589414, "loss": 1.7893, "step": 29063 }, { "epoch": 0.97, "grad_norm": 0.4476962089538574, "learning_rate": 0.0004588968829408562, "loss": 1.822, "step": 29064 }, { "epoch": 0.97, "grad_norm": 0.4523157477378845, "learning_rate": 0.0004588880172298934, "loss": 1.7787, "step": 29065 }, { "epoch": 0.97, "grad_norm": 0.49687743186950684, "learning_rate": 0.00045887915132606413, "loss": 1.8988, "step": 29066 }, { "epoch": 0.97, "grad_norm": 0.44235801696777344, "learning_rate": 0.0004588702852293789, "loss": 1.765, "step": 29067 }, { "epoch": 0.97, "grad_norm": 1.2249821424484253, "learning_rate": 0.0004588614189398486, "loss": 1.8653, "step": 29068 }, { "epoch": 0.97, "grad_norm": 0.4639039933681488, "learning_rate": 0.0004588525524574839, "loss": 1.8475, "step": 29069 }, { "epoch": 0.97, "grad_norm": 0.46514999866485596, "learning_rate": 0.0004588436857822957, "loss": 1.8124, "step": 29070 }, { "epoch": 0.97, "grad_norm": 0.4720768332481384, "learning_rate": 0.00045883481891429464, "loss": 1.898, "step": 29071 }, { "epoch": 0.97, "grad_norm": 0.45124801993370056, "learning_rate": 0.0004588259518534914, "loss": 1.8355, "step": 29072 }, { "epoch": 0.97, "grad_norm": 0.44867005944252014, "learning_rate": 0.000458817084599897, "loss": 1.7486, "step": 29073 }, { "epoch": 0.97, "grad_norm": 0.43826037645339966, "learning_rate": 0.00045880821715352195, "loss": 1.8404, "step": 29074 }, { "epoch": 0.97, "grad_norm": 0.45863622426986694, "learning_rate": 0.0004587993495143772, "loss": 1.7677, "step": 29075 }, { "epoch": 0.97, "grad_norm": 0.4642792344093323, "learning_rate": 0.0004587904816824735, "loss": 1.8377, "step": 29076 }, { "epoch": 0.97, "grad_norm": 0.4353622496128082, "learning_rate": 0.00045878161365782135, "loss": 1.7728, "step": 29077 }, { "epoch": 0.97, "grad_norm": 0.4438364803791046, "learning_rate": 0.0004587727454404319, "loss": 1.7484, "step": 29078 }, { "epoch": 0.97, "grad_norm": 0.4813902676105499, "learning_rate": 0.0004587638770303156, "loss": 1.9328, "step": 29079 }, { "epoch": 0.97, "grad_norm": 0.4609367549419403, "learning_rate": 0.0004587550084274834, "loss": 1.7464, "step": 29080 }, { "epoch": 0.97, "grad_norm": 0.44798266887664795, "learning_rate": 0.00045874613963194604, "loss": 1.8559, "step": 29081 }, { "epoch": 0.97, "grad_norm": 0.4275124669075012, "learning_rate": 0.0004587372706437143, "loss": 1.8006, "step": 29082 }, { "epoch": 0.97, "grad_norm": 0.46421441435813904, "learning_rate": 0.00045872840146279887, "loss": 1.8843, "step": 29083 }, { "epoch": 0.97, "grad_norm": 0.4453797936439514, "learning_rate": 0.00045871953208921043, "loss": 1.7849, "step": 29084 }, { "epoch": 0.97, "grad_norm": 0.44880205392837524, "learning_rate": 0.00045871066252296, "loss": 1.8236, "step": 29085 }, { "epoch": 0.97, "grad_norm": 0.44552305340766907, "learning_rate": 0.00045870179276405807, "loss": 1.807, "step": 29086 }, { "epoch": 0.97, "grad_norm": 0.4670029282569885, "learning_rate": 0.00045869292281251566, "loss": 1.8209, "step": 29087 }, { "epoch": 0.97, "grad_norm": 0.4455377459526062, "learning_rate": 0.0004586840526683434, "loss": 1.86, "step": 29088 }, { "epoch": 0.97, "grad_norm": 0.4590107500553131, "learning_rate": 0.0004586751823315521, "loss": 1.8071, "step": 29089 }, { "epoch": 0.97, "grad_norm": 0.4464051127433777, "learning_rate": 0.0004586663118021525, "loss": 1.869, "step": 29090 }, { "epoch": 0.97, "grad_norm": 0.45234087109565735, "learning_rate": 0.0004586574410801554, "loss": 1.8128, "step": 29091 }, { "epoch": 0.97, "grad_norm": 0.4453487992286682, "learning_rate": 0.00045864857016557147, "loss": 1.8051, "step": 29092 }, { "epoch": 0.97, "grad_norm": 0.45248398184776306, "learning_rate": 0.00045863969905841163, "loss": 1.8259, "step": 29093 }, { "epoch": 0.97, "grad_norm": 0.45240283012390137, "learning_rate": 0.00045863082775868654, "loss": 1.8619, "step": 29094 }, { "epoch": 0.97, "grad_norm": 0.4491201341152191, "learning_rate": 0.0004586219562664069, "loss": 1.8233, "step": 29095 }, { "epoch": 0.97, "grad_norm": 0.44274580478668213, "learning_rate": 0.0004586130845815838, "loss": 1.8237, "step": 29096 }, { "epoch": 0.97, "grad_norm": 0.4346216320991516, "learning_rate": 0.0004586042127042276, "loss": 1.7743, "step": 29097 }, { "epoch": 0.97, "grad_norm": 0.43238842487335205, "learning_rate": 0.0004585953406343493, "loss": 1.765, "step": 29098 }, { "epoch": 0.97, "grad_norm": 0.4570201337337494, "learning_rate": 0.00045858646837195963, "loss": 1.7872, "step": 29099 }, { "epoch": 0.97, "grad_norm": 0.45139941573143005, "learning_rate": 0.0004585775959170694, "loss": 1.8217, "step": 29100 }, { "epoch": 0.97, "grad_norm": 0.6433231234550476, "learning_rate": 0.0004585687232696893, "loss": 1.8235, "step": 29101 }, { "epoch": 0.97, "grad_norm": 0.44651278853416443, "learning_rate": 0.00045855985042983006, "loss": 1.7858, "step": 29102 }, { "epoch": 0.97, "grad_norm": 0.47983384132385254, "learning_rate": 0.0004585509773975026, "loss": 1.9385, "step": 29103 }, { "epoch": 0.97, "grad_norm": 0.4663298428058624, "learning_rate": 0.00045854210417271764, "loss": 1.7712, "step": 29104 }, { "epoch": 0.97, "grad_norm": 0.4649513065814972, "learning_rate": 0.0004585332307554858, "loss": 1.7793, "step": 29105 }, { "epoch": 0.97, "grad_norm": 0.46026721596717834, "learning_rate": 0.0004585243571458181, "loss": 1.85, "step": 29106 }, { "epoch": 0.97, "grad_norm": 0.4618982672691345, "learning_rate": 0.0004585154833437251, "loss": 1.7744, "step": 29107 }, { "epoch": 0.97, "grad_norm": 0.455009400844574, "learning_rate": 0.0004585066093492178, "loss": 1.8262, "step": 29108 }, { "epoch": 0.97, "grad_norm": 0.46270161867141724, "learning_rate": 0.0004584977351623067, "loss": 1.7767, "step": 29109 }, { "epoch": 0.97, "grad_norm": 0.4500337541103363, "learning_rate": 0.00045848886078300277, "loss": 1.7534, "step": 29110 }, { "epoch": 0.97, "grad_norm": 0.4541413187980652, "learning_rate": 0.0004584799862113167, "loss": 1.7414, "step": 29111 }, { "epoch": 0.97, "grad_norm": 0.47365275025367737, "learning_rate": 0.00045847111144725925, "loss": 1.8704, "step": 29112 }, { "epoch": 0.97, "grad_norm": 0.433994859457016, "learning_rate": 0.0004584622364908412, "loss": 1.7572, "step": 29113 }, { "epoch": 0.97, "grad_norm": 0.451454758644104, "learning_rate": 0.0004584533613420734, "loss": 1.8223, "step": 29114 }, { "epoch": 0.97, "grad_norm": 0.4536222219467163, "learning_rate": 0.0004584444860009666, "loss": 1.8766, "step": 29115 }, { "epoch": 0.97, "grad_norm": 0.45374980568885803, "learning_rate": 0.00045843561046753154, "loss": 1.8496, "step": 29116 }, { "epoch": 0.97, "grad_norm": 0.4543422758579254, "learning_rate": 0.00045842673474177887, "loss": 1.7997, "step": 29117 }, { "epoch": 0.97, "grad_norm": 0.44383883476257324, "learning_rate": 0.0004584178588237196, "loss": 1.7754, "step": 29118 }, { "epoch": 0.97, "grad_norm": 0.4658145308494568, "learning_rate": 0.0004584089827133643, "loss": 1.7853, "step": 29119 }, { "epoch": 0.97, "grad_norm": 0.44728589057922363, "learning_rate": 0.00045840010641072385, "loss": 1.8025, "step": 29120 }, { "epoch": 0.97, "grad_norm": 0.4386349320411682, "learning_rate": 0.0004583912299158091, "loss": 1.7944, "step": 29121 }, { "epoch": 0.97, "grad_norm": 0.4521963596343994, "learning_rate": 0.0004583823532286307, "loss": 1.7264, "step": 29122 }, { "epoch": 0.97, "grad_norm": 0.4470376968383789, "learning_rate": 0.00045837347634919944, "loss": 1.7999, "step": 29123 }, { "epoch": 0.97, "grad_norm": 0.44367730617523193, "learning_rate": 0.00045836459927752615, "loss": 1.7983, "step": 29124 }, { "epoch": 0.97, "grad_norm": 0.4470272362232208, "learning_rate": 0.0004583557220136215, "loss": 1.7949, "step": 29125 }, { "epoch": 0.97, "grad_norm": 0.44532379508018494, "learning_rate": 0.00045834684455749636, "loss": 1.8471, "step": 29126 }, { "epoch": 0.97, "grad_norm": 0.4418599307537079, "learning_rate": 0.0004583379669091615, "loss": 1.8049, "step": 29127 }, { "epoch": 0.97, "grad_norm": 0.45212501287460327, "learning_rate": 0.0004583290890686277, "loss": 1.7444, "step": 29128 }, { "epoch": 0.97, "grad_norm": 0.4502033293247223, "learning_rate": 0.00045832021103590567, "loss": 1.8528, "step": 29129 }, { "epoch": 0.97, "grad_norm": 0.43022945523262024, "learning_rate": 0.0004583113328110062, "loss": 1.7896, "step": 29130 }, { "epoch": 0.97, "grad_norm": 0.44777578115463257, "learning_rate": 0.0004583024543939402, "loss": 1.8534, "step": 29131 }, { "epoch": 0.97, "grad_norm": 0.4435313940048218, "learning_rate": 0.0004582935757847182, "loss": 1.7916, "step": 29132 }, { "epoch": 0.97, "grad_norm": 0.43613293766975403, "learning_rate": 0.0004582846969833513, "loss": 1.8593, "step": 29133 }, { "epoch": 0.97, "grad_norm": 0.4508908987045288, "learning_rate": 0.0004582758179898499, "loss": 1.7792, "step": 29134 }, { "epoch": 0.97, "grad_norm": 0.45877471566200256, "learning_rate": 0.0004582669388042252, "loss": 1.8041, "step": 29135 }, { "epoch": 0.97, "grad_norm": 0.4391632378101349, "learning_rate": 0.0004582580594264876, "loss": 1.7926, "step": 29136 }, { "epoch": 0.97, "grad_norm": 0.46043020486831665, "learning_rate": 0.0004582491798566481, "loss": 1.7991, "step": 29137 }, { "epoch": 0.97, "grad_norm": 0.44459182024002075, "learning_rate": 0.00045824030009471747, "loss": 1.8362, "step": 29138 }, { "epoch": 0.97, "grad_norm": 0.4522154927253723, "learning_rate": 0.00045823142014070627, "loss": 1.8784, "step": 29139 }, { "epoch": 0.97, "grad_norm": 0.4509173631668091, "learning_rate": 0.00045822253999462556, "loss": 1.7538, "step": 29140 }, { "epoch": 0.97, "grad_norm": 0.45161834359169006, "learning_rate": 0.00045821365965648597, "loss": 1.8402, "step": 29141 }, { "epoch": 0.97, "grad_norm": 0.460633784532547, "learning_rate": 0.0004582047791262983, "loss": 1.8456, "step": 29142 }, { "epoch": 0.97, "grad_norm": 0.44642192125320435, "learning_rate": 0.00045819589840407336, "loss": 1.7974, "step": 29143 }, { "epoch": 0.97, "grad_norm": 0.44560715556144714, "learning_rate": 0.0004581870174898218, "loss": 1.8196, "step": 29144 }, { "epoch": 0.97, "grad_norm": 0.4628669023513794, "learning_rate": 0.0004581781363835547, "loss": 1.8085, "step": 29145 }, { "epoch": 0.97, "grad_norm": 0.45757412910461426, "learning_rate": 0.00045816925508528256, "loss": 1.8263, "step": 29146 }, { "epoch": 0.97, "grad_norm": 0.44569575786590576, "learning_rate": 0.0004581603735950162, "loss": 1.8607, "step": 29147 }, { "epoch": 0.97, "grad_norm": 0.4580426812171936, "learning_rate": 0.00045815149191276645, "loss": 1.919, "step": 29148 }, { "epoch": 0.97, "grad_norm": 0.4410432279109955, "learning_rate": 0.0004581426100385442, "loss": 1.7738, "step": 29149 }, { "epoch": 0.97, "grad_norm": 0.45019739866256714, "learning_rate": 0.00045813372797235996, "loss": 1.8241, "step": 29150 }, { "epoch": 0.97, "grad_norm": 0.45410823822021484, "learning_rate": 0.0004581248457142248, "loss": 1.7846, "step": 29151 }, { "epoch": 0.97, "grad_norm": 0.44774553179740906, "learning_rate": 0.00045811596326414937, "loss": 1.8982, "step": 29152 }, { "epoch": 0.97, "grad_norm": 0.429991215467453, "learning_rate": 0.00045810708062214446, "loss": 1.8023, "step": 29153 }, { "epoch": 0.97, "grad_norm": 0.446017861366272, "learning_rate": 0.0004580981977882208, "loss": 1.7574, "step": 29154 }, { "epoch": 0.97, "grad_norm": 0.45691248774528503, "learning_rate": 0.00045808931476238923, "loss": 1.815, "step": 29155 }, { "epoch": 0.97, "grad_norm": 0.44929295778274536, "learning_rate": 0.00045808043154466055, "loss": 1.8021, "step": 29156 }, { "epoch": 0.97, "grad_norm": 0.44154536724090576, "learning_rate": 0.00045807154813504544, "loss": 1.8146, "step": 29157 }, { "epoch": 0.97, "grad_norm": 0.4539095163345337, "learning_rate": 0.0004580626645335549, "loss": 1.8254, "step": 29158 }, { "epoch": 0.97, "grad_norm": 0.4490291178226471, "learning_rate": 0.00045805378074019953, "loss": 1.7753, "step": 29159 }, { "epoch": 0.97, "grad_norm": 0.4582272469997406, "learning_rate": 0.00045804489675499004, "loss": 1.8074, "step": 29160 }, { "epoch": 0.97, "grad_norm": 0.43251362442970276, "learning_rate": 0.0004580360125779375, "loss": 1.7641, "step": 29161 }, { "epoch": 0.97, "grad_norm": 0.4670669734477997, "learning_rate": 0.0004580271282090524, "loss": 1.769, "step": 29162 }, { "epoch": 0.97, "grad_norm": 0.44760891795158386, "learning_rate": 0.0004580182436483458, "loss": 1.8581, "step": 29163 }, { "epoch": 0.97, "grad_norm": 0.4646676182746887, "learning_rate": 0.00045800935889582815, "loss": 1.7635, "step": 29164 }, { "epoch": 0.97, "grad_norm": 0.449462354183197, "learning_rate": 0.0004580004739515105, "loss": 1.8723, "step": 29165 }, { "epoch": 0.97, "grad_norm": 0.45990678668022156, "learning_rate": 0.0004579915888154036, "loss": 1.9224, "step": 29166 }, { "epoch": 0.97, "grad_norm": 0.43420737981796265, "learning_rate": 0.00045798270348751814, "loss": 1.8288, "step": 29167 }, { "epoch": 0.97, "grad_norm": 0.47101491689682007, "learning_rate": 0.00045797381796786486, "loss": 1.838, "step": 29168 }, { "epoch": 0.97, "grad_norm": 0.45405226945877075, "learning_rate": 0.0004579649322564548, "loss": 1.7628, "step": 29169 }, { "epoch": 0.97, "grad_norm": 0.43159884214401245, "learning_rate": 0.00045795604635329856, "loss": 1.8004, "step": 29170 }, { "epoch": 0.97, "grad_norm": 0.4570610821247101, "learning_rate": 0.00045794716025840697, "loss": 1.8198, "step": 29171 }, { "epoch": 0.97, "grad_norm": 0.44785076379776, "learning_rate": 0.00045793827397179075, "loss": 1.7233, "step": 29172 }, { "epoch": 0.97, "grad_norm": 0.44186151027679443, "learning_rate": 0.00045792938749346075, "loss": 1.8061, "step": 29173 }, { "epoch": 0.97, "grad_norm": 0.44715479016304016, "learning_rate": 0.00045792050082342766, "loss": 1.7694, "step": 29174 }, { "epoch": 0.97, "grad_norm": 0.43392032384872437, "learning_rate": 0.0004579116139617024, "loss": 1.772, "step": 29175 }, { "epoch": 0.97, "grad_norm": 0.44232508540153503, "learning_rate": 0.00045790272690829587, "loss": 1.844, "step": 29176 }, { "epoch": 0.97, "grad_norm": 0.5083451867103577, "learning_rate": 0.00045789383966321853, "loss": 1.8292, "step": 29177 }, { "epoch": 0.97, "grad_norm": 0.4287561774253845, "learning_rate": 0.0004578849522264813, "loss": 1.8373, "step": 29178 }, { "epoch": 0.97, "grad_norm": 0.427437424659729, "learning_rate": 0.00045787606459809507, "loss": 1.7665, "step": 29179 }, { "epoch": 0.97, "grad_norm": 0.8296029567718506, "learning_rate": 0.0004578671767780705, "loss": 1.7765, "step": 29180 }, { "epoch": 0.97, "grad_norm": 0.45557236671447754, "learning_rate": 0.0004578582887664186, "loss": 1.7899, "step": 29181 }, { "epoch": 0.97, "grad_norm": 1.1991320848464966, "learning_rate": 0.0004578494005631498, "loss": 1.8698, "step": 29182 }, { "epoch": 0.97, "grad_norm": 0.4424827992916107, "learning_rate": 0.0004578405121682753, "loss": 1.8679, "step": 29183 }, { "epoch": 0.97, "grad_norm": 0.4663943946361542, "learning_rate": 0.0004578316235818055, "loss": 1.7798, "step": 29184 }, { "epoch": 0.97, "grad_norm": 0.4499356150627136, "learning_rate": 0.00045782273480375136, "loss": 1.7402, "step": 29185 }, { "epoch": 0.97, "grad_norm": 0.4549562633037567, "learning_rate": 0.00045781384583412377, "loss": 1.893, "step": 29186 }, { "epoch": 0.97, "grad_norm": 0.4501345753669739, "learning_rate": 0.00045780495667293333, "loss": 1.7751, "step": 29187 }, { "epoch": 0.97, "grad_norm": 0.4462020695209503, "learning_rate": 0.00045779606732019104, "loss": 1.8103, "step": 29188 }, { "epoch": 0.97, "grad_norm": 0.43839213252067566, "learning_rate": 0.0004577871777759074, "loss": 1.8163, "step": 29189 }, { "epoch": 0.97, "grad_norm": 0.4450785517692566, "learning_rate": 0.0004577782880400936, "loss": 1.818, "step": 29190 }, { "epoch": 0.97, "grad_norm": 0.4369942545890808, "learning_rate": 0.00045776939811276006, "loss": 1.8438, "step": 29191 }, { "epoch": 0.97, "grad_norm": 0.4383920431137085, "learning_rate": 0.00045776050799391775, "loss": 1.834, "step": 29192 }, { "epoch": 0.97, "grad_norm": 0.46686699986457825, "learning_rate": 0.00045775161768357747, "loss": 1.796, "step": 29193 }, { "epoch": 0.97, "grad_norm": 0.43820247054100037, "learning_rate": 0.00045774272718174984, "loss": 1.8339, "step": 29194 }, { "epoch": 0.97, "grad_norm": 0.4452209770679474, "learning_rate": 0.000457733836488446, "loss": 1.7914, "step": 29195 }, { "epoch": 0.97, "grad_norm": 0.45112845301628113, "learning_rate": 0.0004577249456036762, "loss": 1.7982, "step": 29196 }, { "epoch": 0.97, "grad_norm": 0.4466117024421692, "learning_rate": 0.0004577160545274519, "loss": 1.7952, "step": 29197 }, { "epoch": 0.97, "grad_norm": 0.43245992064476013, "learning_rate": 0.00045770716325978336, "loss": 1.8251, "step": 29198 }, { "epoch": 0.97, "grad_norm": 0.4648151397705078, "learning_rate": 0.0004576982718006816, "loss": 1.6792, "step": 29199 }, { "epoch": 0.97, "grad_norm": 0.45887696743011475, "learning_rate": 0.0004576893801501574, "loss": 1.8378, "step": 29200 }, { "epoch": 0.97, "grad_norm": 0.44474899768829346, "learning_rate": 0.00045768048830822146, "loss": 1.8247, "step": 29201 }, { "epoch": 0.97, "grad_norm": 0.44200682640075684, "learning_rate": 0.00045767159627488465, "loss": 1.804, "step": 29202 }, { "epoch": 0.97, "grad_norm": 0.45381832122802734, "learning_rate": 0.0004576627040501578, "loss": 1.8003, "step": 29203 }, { "epoch": 0.97, "grad_norm": 0.4575648903846741, "learning_rate": 0.0004576538116340516, "loss": 1.7944, "step": 29204 }, { "epoch": 0.97, "grad_norm": 0.44651830196380615, "learning_rate": 0.000457644919026577, "loss": 1.8544, "step": 29205 }, { "epoch": 0.97, "grad_norm": 0.4622005224227905, "learning_rate": 0.0004576360262277446, "loss": 1.7478, "step": 29206 }, { "epoch": 0.97, "grad_norm": 0.48014551401138306, "learning_rate": 0.00045762713323756535, "loss": 1.8279, "step": 29207 }, { "epoch": 0.97, "grad_norm": 0.4701796770095825, "learning_rate": 0.00045761824005605004, "loss": 1.869, "step": 29208 }, { "epoch": 0.97, "grad_norm": 0.44757598638534546, "learning_rate": 0.0004576093466832093, "loss": 1.8357, "step": 29209 }, { "epoch": 0.97, "grad_norm": 0.4319651424884796, "learning_rate": 0.00045760045311905406, "loss": 1.8122, "step": 29210 }, { "epoch": 0.97, "grad_norm": 0.45000922679901123, "learning_rate": 0.000457591559363595, "loss": 1.76, "step": 29211 }, { "epoch": 0.97, "grad_norm": 0.4654279947280884, "learning_rate": 0.00045758266541684315, "loss": 1.7357, "step": 29212 }, { "epoch": 0.97, "grad_norm": 0.4814634323120117, "learning_rate": 0.00045757377127880923, "loss": 1.8397, "step": 29213 }, { "epoch": 0.97, "grad_norm": 0.44184285402297974, "learning_rate": 0.0004575648769495038, "loss": 1.7821, "step": 29214 }, { "epoch": 0.97, "grad_norm": 0.4525999426841736, "learning_rate": 0.0004575559824289379, "loss": 1.7664, "step": 29215 }, { "epoch": 0.97, "grad_norm": 0.45395222306251526, "learning_rate": 0.0004575470877171222, "loss": 1.7959, "step": 29216 }, { "epoch": 0.97, "grad_norm": 0.4350382685661316, "learning_rate": 0.00045753819281406756, "loss": 1.8278, "step": 29217 }, { "epoch": 0.97, "grad_norm": 0.4612733721733093, "learning_rate": 0.0004575292977197849, "loss": 1.759, "step": 29218 }, { "epoch": 0.97, "grad_norm": 1.831843614578247, "learning_rate": 0.0004575204024342847, "loss": 1.8802, "step": 29219 }, { "epoch": 0.97, "grad_norm": 0.45739439129829407, "learning_rate": 0.00045751150695757804, "loss": 1.7566, "step": 29220 }, { "epoch": 0.97, "grad_norm": 0.45758408308029175, "learning_rate": 0.0004575026112896756, "loss": 1.7606, "step": 29221 }, { "epoch": 0.97, "grad_norm": 0.44523441791534424, "learning_rate": 0.00045749371543058815, "loss": 1.8369, "step": 29222 }, { "epoch": 0.97, "grad_norm": 0.4481377899646759, "learning_rate": 0.00045748481938032664, "loss": 1.766, "step": 29223 }, { "epoch": 0.97, "grad_norm": 0.4396619498729706, "learning_rate": 0.00045747592313890163, "loss": 1.7881, "step": 29224 }, { "epoch": 0.97, "grad_norm": 1.086501955986023, "learning_rate": 0.0004574670267063242, "loss": 1.8026, "step": 29225 }, { "epoch": 0.97, "grad_norm": 0.44407546520233154, "learning_rate": 0.0004574581300826049, "loss": 1.7857, "step": 29226 }, { "epoch": 0.97, "grad_norm": 0.4489985406398773, "learning_rate": 0.00045744923326775463, "loss": 1.7597, "step": 29227 }, { "epoch": 0.97, "grad_norm": 0.45091092586517334, "learning_rate": 0.00045744033626178424, "loss": 1.7789, "step": 29228 }, { "epoch": 0.97, "grad_norm": 0.43166378140449524, "learning_rate": 0.00045743143906470447, "loss": 1.7904, "step": 29229 }, { "epoch": 0.97, "grad_norm": 0.440554678440094, "learning_rate": 0.00045742254167652603, "loss": 1.8455, "step": 29230 }, { "epoch": 0.97, "grad_norm": 0.44648581743240356, "learning_rate": 0.00045741364409725995, "loss": 1.7916, "step": 29231 }, { "epoch": 0.97, "grad_norm": 0.4515179693698883, "learning_rate": 0.0004574047463269169, "loss": 1.772, "step": 29232 }, { "epoch": 0.97, "grad_norm": 0.4876210391521454, "learning_rate": 0.0004573958483655076, "loss": 1.8881, "step": 29233 }, { "epoch": 0.97, "grad_norm": 0.4565568268299103, "learning_rate": 0.00045738695021304296, "loss": 1.7901, "step": 29234 }, { "epoch": 0.97, "grad_norm": 0.4513402283191681, "learning_rate": 0.0004573780518695338, "loss": 1.9217, "step": 29235 }, { "epoch": 0.97, "grad_norm": 0.45863252878189087, "learning_rate": 0.00045736915333499076, "loss": 1.7491, "step": 29236 }, { "epoch": 0.97, "grad_norm": 0.4590761959552765, "learning_rate": 0.0004573602546094248, "loss": 1.8392, "step": 29237 }, { "epoch": 0.97, "grad_norm": 0.4871664047241211, "learning_rate": 0.0004573513556928468, "loss": 1.8071, "step": 29238 }, { "epoch": 0.97, "grad_norm": 0.4898844361305237, "learning_rate": 0.0004573424565852673, "loss": 1.828, "step": 29239 }, { "epoch": 0.97, "grad_norm": 0.4462573230266571, "learning_rate": 0.0004573335572866973, "loss": 1.8473, "step": 29240 }, { "epoch": 0.97, "grad_norm": 0.447801798582077, "learning_rate": 0.00045732465779714754, "loss": 1.7519, "step": 29241 }, { "epoch": 0.97, "grad_norm": 0.45520564913749695, "learning_rate": 0.00045731575811662874, "loss": 1.8413, "step": 29242 }, { "epoch": 0.97, "grad_norm": 0.4823193848133087, "learning_rate": 0.00045730685824515196, "loss": 1.8358, "step": 29243 }, { "epoch": 0.97, "grad_norm": 0.53779137134552, "learning_rate": 0.0004572979581827276, "loss": 1.792, "step": 29244 }, { "epoch": 0.97, "grad_norm": 0.4461408257484436, "learning_rate": 0.0004572890579293669, "loss": 1.8375, "step": 29245 }, { "epoch": 0.97, "grad_norm": 0.46009954810142517, "learning_rate": 0.0004572801574850804, "loss": 1.7266, "step": 29246 }, { "epoch": 0.97, "grad_norm": 0.4547843933105469, "learning_rate": 0.000457271256849879, "loss": 1.7437, "step": 29247 }, { "epoch": 0.97, "grad_norm": 0.4437868893146515, "learning_rate": 0.00045726235602377334, "loss": 1.8368, "step": 29248 }, { "epoch": 0.97, "grad_norm": 0.44601473212242126, "learning_rate": 0.0004572534550067744, "loss": 1.8218, "step": 29249 }, { "epoch": 0.97, "grad_norm": 0.45755714178085327, "learning_rate": 0.00045724455379889307, "loss": 1.8076, "step": 29250 }, { "epoch": 0.97, "grad_norm": 0.5050771236419678, "learning_rate": 0.00045723565240013983, "loss": 1.8826, "step": 29251 }, { "epoch": 0.97, "grad_norm": 0.4407270550727844, "learning_rate": 0.0004572267508105258, "loss": 1.8682, "step": 29252 }, { "epoch": 0.97, "grad_norm": 0.4618583917617798, "learning_rate": 0.0004572178490300617, "loss": 1.8471, "step": 29253 }, { "epoch": 0.97, "grad_norm": 0.45525428652763367, "learning_rate": 0.0004572089470587581, "loss": 1.7615, "step": 29254 }, { "epoch": 0.97, "grad_norm": 0.4529743194580078, "learning_rate": 0.00045720004489662624, "loss": 1.8057, "step": 29255 }, { "epoch": 0.97, "grad_norm": 0.4452528655529022, "learning_rate": 0.0004571911425436765, "loss": 1.8608, "step": 29256 }, { "epoch": 0.97, "grad_norm": 0.4502761662006378, "learning_rate": 0.00045718223999991994, "loss": 1.8361, "step": 29257 }, { "epoch": 0.97, "grad_norm": 0.45372992753982544, "learning_rate": 0.0004571733372653673, "loss": 1.7587, "step": 29258 }, { "epoch": 0.97, "grad_norm": 0.4474521279335022, "learning_rate": 0.0004571644343400294, "loss": 1.6985, "step": 29259 }, { "epoch": 0.97, "grad_norm": 0.44885510206222534, "learning_rate": 0.000457155531223917, "loss": 1.7625, "step": 29260 }, { "epoch": 0.97, "grad_norm": 0.4681784212589264, "learning_rate": 0.00045714662791704097, "loss": 1.777, "step": 29261 }, { "epoch": 0.97, "grad_norm": 0.4511364996433258, "learning_rate": 0.00045713772441941214, "loss": 1.7876, "step": 29262 }, { "epoch": 0.97, "grad_norm": 0.4438832700252533, "learning_rate": 0.00045712882073104113, "loss": 1.8262, "step": 29263 }, { "epoch": 0.97, "grad_norm": 0.4535025358200073, "learning_rate": 0.000457119916851939, "loss": 1.857, "step": 29264 }, { "epoch": 0.97, "grad_norm": 0.4524464011192322, "learning_rate": 0.0004571110127821164, "loss": 1.8282, "step": 29265 }, { "epoch": 0.97, "grad_norm": 0.4528611898422241, "learning_rate": 0.00045710210852158416, "loss": 1.7906, "step": 29266 }, { "epoch": 0.97, "grad_norm": 0.4309624135494232, "learning_rate": 0.00045709320407035317, "loss": 1.8007, "step": 29267 }, { "epoch": 0.97, "grad_norm": 0.4456897974014282, "learning_rate": 0.0004570842994284342, "loss": 1.7507, "step": 29268 }, { "epoch": 0.97, "grad_norm": 0.45699816942214966, "learning_rate": 0.000457075394595838, "loss": 1.8071, "step": 29269 }, { "epoch": 0.97, "grad_norm": 0.5036427974700928, "learning_rate": 0.0004570664895725753, "loss": 1.7519, "step": 29270 }, { "epoch": 0.97, "grad_norm": 0.4430878460407257, "learning_rate": 0.0004570575843586571, "loss": 1.8077, "step": 29271 }, { "epoch": 0.97, "grad_norm": 0.45665234327316284, "learning_rate": 0.00045704867895409416, "loss": 1.771, "step": 29272 }, { "epoch": 0.97, "grad_norm": 0.4541318714618683, "learning_rate": 0.0004570397733588973, "loss": 1.7721, "step": 29273 }, { "epoch": 0.97, "grad_norm": 0.4561944901943207, "learning_rate": 0.00045703086757307716, "loss": 1.8086, "step": 29274 }, { "epoch": 0.97, "grad_norm": 0.438453733921051, "learning_rate": 0.0004570219615966448, "loss": 1.8046, "step": 29275 }, { "epoch": 0.97, "grad_norm": 0.44607171416282654, "learning_rate": 0.00045701305542961086, "loss": 1.7934, "step": 29276 }, { "epoch": 0.97, "grad_norm": 0.47288811206817627, "learning_rate": 0.00045700414907198626, "loss": 1.7865, "step": 29277 }, { "epoch": 0.97, "grad_norm": 0.43846365809440613, "learning_rate": 0.0004569952425237817, "loss": 1.7736, "step": 29278 }, { "epoch": 0.97, "grad_norm": 0.4452596604824066, "learning_rate": 0.0004569863357850081, "loss": 1.7983, "step": 29279 }, { "epoch": 0.97, "grad_norm": 0.46873167157173157, "learning_rate": 0.0004569774288556762, "loss": 1.7925, "step": 29280 }, { "epoch": 0.97, "grad_norm": 0.46209031343460083, "learning_rate": 0.00045696852173579674, "loss": 1.7883, "step": 29281 }, { "epoch": 0.97, "grad_norm": 0.4510500729084015, "learning_rate": 0.0004569596144253808, "loss": 1.8743, "step": 29282 }, { "epoch": 0.97, "grad_norm": 0.44278714060783386, "learning_rate": 0.00045695070692443893, "loss": 1.7207, "step": 29283 }, { "epoch": 0.97, "grad_norm": 0.45161065459251404, "learning_rate": 0.0004569417992329819, "loss": 1.8517, "step": 29284 }, { "epoch": 0.97, "grad_norm": 0.44467082619667053, "learning_rate": 0.0004569328913510208, "loss": 1.8046, "step": 29285 }, { "epoch": 0.97, "grad_norm": 0.4606279730796814, "learning_rate": 0.00045692398327856635, "loss": 1.7898, "step": 29286 }, { "epoch": 0.97, "grad_norm": 0.4676840901374817, "learning_rate": 0.0004569150750156292, "loss": 1.8774, "step": 29287 }, { "epoch": 0.97, "grad_norm": 0.44544053077697754, "learning_rate": 0.0004569061665622203, "loss": 1.7941, "step": 29288 }, { "epoch": 0.97, "grad_norm": 0.4499050974845886, "learning_rate": 0.0004568972579183504, "loss": 1.8063, "step": 29289 }, { "epoch": 0.97, "grad_norm": 0.46194225549697876, "learning_rate": 0.0004568883490840303, "loss": 1.75, "step": 29290 }, { "epoch": 0.97, "grad_norm": 0.46043938398361206, "learning_rate": 0.00045687944005927097, "loss": 1.8535, "step": 29291 }, { "epoch": 0.97, "grad_norm": 0.43429306149482727, "learning_rate": 0.000456870530844083, "loss": 1.8016, "step": 29292 }, { "epoch": 0.97, "grad_norm": 0.4496784806251526, "learning_rate": 0.0004568616214384775, "loss": 1.773, "step": 29293 }, { "epoch": 0.97, "grad_norm": 0.4448557496070862, "learning_rate": 0.000456852711842465, "loss": 1.7822, "step": 29294 }, { "epoch": 0.97, "grad_norm": 0.43566542863845825, "learning_rate": 0.0004568438020560564, "loss": 1.8072, "step": 29295 }, { "epoch": 0.97, "grad_norm": 1.0110280513763428, "learning_rate": 0.00045683489207926255, "loss": 1.8227, "step": 29296 }, { "epoch": 0.97, "grad_norm": 0.45685291290283203, "learning_rate": 0.0004568259819120942, "loss": 1.7935, "step": 29297 }, { "epoch": 0.97, "grad_norm": 0.4557406008243561, "learning_rate": 0.00045681707155456234, "loss": 1.8027, "step": 29298 }, { "epoch": 0.97, "grad_norm": 0.44285354018211365, "learning_rate": 0.0004568081610066775, "loss": 1.7794, "step": 29299 }, { "epoch": 0.97, "grad_norm": 0.4355418384075165, "learning_rate": 0.00045679925026845074, "loss": 1.9015, "step": 29300 }, { "epoch": 0.97, "grad_norm": 1.601361870765686, "learning_rate": 0.0004567903393398928, "loss": 1.8337, "step": 29301 }, { "epoch": 0.97, "grad_norm": 0.43335607647895813, "learning_rate": 0.00045678142822101455, "loss": 1.8081, "step": 29302 }, { "epoch": 0.97, "grad_norm": 0.4493477940559387, "learning_rate": 0.0004567725169118267, "loss": 1.8766, "step": 29303 }, { "epoch": 0.97, "grad_norm": 0.4540473520755768, "learning_rate": 0.00045676360541234, "loss": 1.813, "step": 29304 }, { "epoch": 0.97, "grad_norm": 0.4539690315723419, "learning_rate": 0.0004567546937225656, "loss": 1.7807, "step": 29305 }, { "epoch": 0.98, "grad_norm": 0.441662073135376, "learning_rate": 0.00045674578184251385, "loss": 1.7397, "step": 29306 }, { "epoch": 0.98, "grad_norm": 0.43122342228889465, "learning_rate": 0.000456736869772196, "loss": 1.795, "step": 29307 }, { "epoch": 0.98, "grad_norm": 0.4346948564052582, "learning_rate": 0.0004567279575116226, "loss": 1.7372, "step": 29308 }, { "epoch": 0.98, "grad_norm": 0.4459832012653351, "learning_rate": 0.00045671904506080455, "loss": 1.7816, "step": 29309 }, { "epoch": 0.98, "grad_norm": 0.45527806878089905, "learning_rate": 0.00045671013241975277, "loss": 1.796, "step": 29310 }, { "epoch": 0.98, "grad_norm": 0.4500543177127838, "learning_rate": 0.00045670121958847783, "loss": 1.8038, "step": 29311 }, { "epoch": 0.98, "grad_norm": 0.4760829210281372, "learning_rate": 0.00045669230656699084, "loss": 1.9894, "step": 29312 }, { "epoch": 0.98, "grad_norm": 0.44878673553466797, "learning_rate": 0.00045668339335530237, "loss": 1.7908, "step": 29313 }, { "epoch": 0.98, "grad_norm": 0.43328288197517395, "learning_rate": 0.00045667447995342337, "loss": 1.8061, "step": 29314 }, { "epoch": 0.98, "grad_norm": 0.46101629734039307, "learning_rate": 0.0004566655663613647, "loss": 1.7696, "step": 29315 }, { "epoch": 0.98, "grad_norm": 0.4466623365879059, "learning_rate": 0.00045665665257913705, "loss": 1.7163, "step": 29316 }, { "epoch": 0.98, "grad_norm": 0.46155980229377747, "learning_rate": 0.0004566477386067514, "loss": 1.8623, "step": 29317 }, { "epoch": 0.98, "grad_norm": 0.4537089169025421, "learning_rate": 0.0004566388244442184, "loss": 1.8048, "step": 29318 }, { "epoch": 0.98, "grad_norm": 0.4519702196121216, "learning_rate": 0.0004566299100915489, "loss": 1.7983, "step": 29319 }, { "epoch": 0.98, "grad_norm": 1.23231840133667, "learning_rate": 0.00045662099554875387, "loss": 1.9298, "step": 29320 }, { "epoch": 0.98, "grad_norm": 0.4767434895038605, "learning_rate": 0.00045661208081584394, "loss": 1.7761, "step": 29321 }, { "epoch": 0.98, "grad_norm": 0.4663589298725128, "learning_rate": 0.00045660316589283005, "loss": 1.8451, "step": 29322 }, { "epoch": 0.98, "grad_norm": 0.46458616852760315, "learning_rate": 0.000456594250779723, "loss": 1.8052, "step": 29323 }, { "epoch": 0.98, "grad_norm": 0.43573471903800964, "learning_rate": 0.0004565853354765336, "loss": 1.8197, "step": 29324 }, { "epoch": 0.98, "grad_norm": 0.4662858843803406, "learning_rate": 0.0004565764199832727, "loss": 1.7983, "step": 29325 }, { "epoch": 0.98, "grad_norm": 0.45073428750038147, "learning_rate": 0.0004565675042999511, "loss": 1.8713, "step": 29326 }, { "epoch": 0.98, "grad_norm": 0.43869468569755554, "learning_rate": 0.00045655858842657957, "loss": 1.8034, "step": 29327 }, { "epoch": 0.98, "grad_norm": 0.468469500541687, "learning_rate": 0.000456549672363169, "loss": 1.8064, "step": 29328 }, { "epoch": 0.98, "grad_norm": 0.5839221477508545, "learning_rate": 0.0004565407561097302, "loss": 1.8061, "step": 29329 }, { "epoch": 0.98, "grad_norm": 0.45087701082229614, "learning_rate": 0.00045653183966627394, "loss": 1.8547, "step": 29330 }, { "epoch": 0.98, "grad_norm": 0.44352447986602783, "learning_rate": 0.00045652292303281116, "loss": 1.8001, "step": 29331 }, { "epoch": 0.98, "grad_norm": 0.45388728380203247, "learning_rate": 0.0004565140062093526, "loss": 1.8016, "step": 29332 }, { "epoch": 0.98, "grad_norm": 0.4392505884170532, "learning_rate": 0.0004565050891959091, "loss": 1.8345, "step": 29333 }, { "epoch": 0.98, "grad_norm": 0.4532850980758667, "learning_rate": 0.00045649617199249146, "loss": 1.7707, "step": 29334 }, { "epoch": 0.98, "grad_norm": 0.45020848512649536, "learning_rate": 0.00045648725459911056, "loss": 1.8715, "step": 29335 }, { "epoch": 0.98, "grad_norm": 0.4460783302783966, "learning_rate": 0.00045647833701577706, "loss": 1.8442, "step": 29336 }, { "epoch": 0.98, "grad_norm": 0.4601934552192688, "learning_rate": 0.0004564694192425021, "loss": 1.7813, "step": 29337 }, { "epoch": 0.98, "grad_norm": 0.45586884021759033, "learning_rate": 0.00045646050127929623, "loss": 1.8302, "step": 29338 }, { "epoch": 0.98, "grad_norm": 0.4514666497707367, "learning_rate": 0.0004564515831261703, "loss": 1.7797, "step": 29339 }, { "epoch": 0.98, "grad_norm": 0.4455971121788025, "learning_rate": 0.00045644266478313527, "loss": 1.7351, "step": 29340 }, { "epoch": 0.98, "grad_norm": 0.462302029132843, "learning_rate": 0.0004564337462502019, "loss": 1.8144, "step": 29341 }, { "epoch": 0.98, "grad_norm": 0.4585895836353302, "learning_rate": 0.00045642482752738107, "loss": 1.8767, "step": 29342 }, { "epoch": 0.98, "grad_norm": 0.4541938900947571, "learning_rate": 0.00045641590861468344, "loss": 1.8613, "step": 29343 }, { "epoch": 0.98, "grad_norm": 0.452449232339859, "learning_rate": 0.00045640698951212005, "loss": 1.6998, "step": 29344 }, { "epoch": 0.98, "grad_norm": 0.435716837644577, "learning_rate": 0.0004563980702197016, "loss": 1.8714, "step": 29345 }, { "epoch": 0.98, "grad_norm": 0.433694988489151, "learning_rate": 0.00045638915073743883, "loss": 1.7818, "step": 29346 }, { "epoch": 0.98, "grad_norm": 0.45134615898132324, "learning_rate": 0.0004563802310653428, "loss": 1.8695, "step": 29347 }, { "epoch": 0.98, "grad_norm": 0.465252161026001, "learning_rate": 0.0004563713112034241, "loss": 1.8749, "step": 29348 }, { "epoch": 0.98, "grad_norm": 0.44130149483680725, "learning_rate": 0.00045636239115169375, "loss": 1.8647, "step": 29349 }, { "epoch": 0.98, "grad_norm": 0.445421040058136, "learning_rate": 0.0004563534709101625, "loss": 1.8621, "step": 29350 }, { "epoch": 0.98, "grad_norm": 0.44810786843299866, "learning_rate": 0.00045634455047884113, "loss": 1.7963, "step": 29351 }, { "epoch": 0.98, "grad_norm": 0.4712579846382141, "learning_rate": 0.00045633562985774055, "loss": 1.8324, "step": 29352 }, { "epoch": 0.98, "grad_norm": 0.45900529623031616, "learning_rate": 0.00045632670904687146, "loss": 1.8116, "step": 29353 }, { "epoch": 0.98, "grad_norm": 0.4926716983318329, "learning_rate": 0.0004563177880462448, "loss": 1.8074, "step": 29354 }, { "epoch": 0.98, "grad_norm": 0.4875257909297943, "learning_rate": 0.0004563088668558715, "loss": 1.7882, "step": 29355 }, { "epoch": 0.98, "grad_norm": 0.46017715334892273, "learning_rate": 0.00045629994547576215, "loss": 1.8199, "step": 29356 }, { "epoch": 0.98, "grad_norm": 0.44725537300109863, "learning_rate": 0.0004562910239059278, "loss": 1.7493, "step": 29357 }, { "epoch": 0.98, "grad_norm": 0.43846866488456726, "learning_rate": 0.00045628210214637905, "loss": 1.8024, "step": 29358 }, { "epoch": 0.98, "grad_norm": 0.4486939013004303, "learning_rate": 0.0004562731801971269, "loss": 1.7425, "step": 29359 }, { "epoch": 0.98, "grad_norm": 0.4578193426132202, "learning_rate": 0.0004562642580581822, "loss": 1.7993, "step": 29360 }, { "epoch": 0.98, "grad_norm": 0.46669137477874756, "learning_rate": 0.00045625533572955557, "loss": 1.8698, "step": 29361 }, { "epoch": 0.98, "grad_norm": 0.45569491386413574, "learning_rate": 0.0004562464132112581, "loss": 1.8149, "step": 29362 }, { "epoch": 0.98, "grad_norm": 0.4656463861465454, "learning_rate": 0.00045623749050330054, "loss": 1.9028, "step": 29363 }, { "epoch": 0.98, "grad_norm": 0.47469234466552734, "learning_rate": 0.00045622856760569366, "loss": 1.8559, "step": 29364 }, { "epoch": 0.98, "grad_norm": 0.44694995880126953, "learning_rate": 0.00045621964451844827, "loss": 1.755, "step": 29365 }, { "epoch": 0.98, "grad_norm": 0.5101040005683899, "learning_rate": 0.0004562107212415753, "loss": 1.782, "step": 29366 }, { "epoch": 0.98, "grad_norm": 0.4638957977294922, "learning_rate": 0.00045620179777508546, "loss": 1.8187, "step": 29367 }, { "epoch": 0.98, "grad_norm": 0.44141051173210144, "learning_rate": 0.0004561928741189896, "loss": 1.7446, "step": 29368 }, { "epoch": 0.98, "grad_norm": 0.43873950839042664, "learning_rate": 0.00045618395027329883, "loss": 1.8122, "step": 29369 }, { "epoch": 0.98, "grad_norm": 0.4470594525337219, "learning_rate": 0.00045617502623802354, "loss": 1.7714, "step": 29370 }, { "epoch": 0.98, "grad_norm": 0.45466458797454834, "learning_rate": 0.0004561661020131749, "loss": 1.7868, "step": 29371 }, { "epoch": 0.98, "grad_norm": 0.4569208025932312, "learning_rate": 0.0004561571775987636, "loss": 1.8764, "step": 29372 }, { "epoch": 0.98, "grad_norm": 0.4538482427597046, "learning_rate": 0.0004561482529948004, "loss": 1.8157, "step": 29373 }, { "epoch": 0.98, "grad_norm": 0.4397507309913635, "learning_rate": 0.0004561393282012964, "loss": 1.8144, "step": 29374 }, { "epoch": 0.98, "grad_norm": 0.4606230556964874, "learning_rate": 0.0004561304032182621, "loss": 1.7971, "step": 29375 }, { "epoch": 0.98, "grad_norm": 0.45199429988861084, "learning_rate": 0.00045612147804570863, "loss": 1.7692, "step": 29376 }, { "epoch": 0.98, "grad_norm": 0.4323793649673462, "learning_rate": 0.0004561125526836466, "loss": 1.773, "step": 29377 }, { "epoch": 0.98, "grad_norm": 0.4470973312854767, "learning_rate": 0.00045610362713208695, "loss": 1.7946, "step": 29378 }, { "epoch": 0.98, "grad_norm": 0.4344964921474457, "learning_rate": 0.00045609470139104056, "loss": 1.883, "step": 29379 }, { "epoch": 0.98, "grad_norm": 0.4290001094341278, "learning_rate": 0.0004560857754605182, "loss": 1.7586, "step": 29380 }, { "epoch": 0.98, "grad_norm": 0.44447392225265503, "learning_rate": 0.00045607684934053066, "loss": 1.7896, "step": 29381 }, { "epoch": 0.98, "grad_norm": 0.44504067301750183, "learning_rate": 0.0004560679230310888, "loss": 1.7804, "step": 29382 }, { "epoch": 0.98, "grad_norm": 0.45331984758377075, "learning_rate": 0.00045605899653220345, "loss": 1.7591, "step": 29383 }, { "epoch": 0.98, "grad_norm": 0.4512485861778259, "learning_rate": 0.0004560500698438855, "loss": 1.8465, "step": 29384 }, { "epoch": 0.98, "grad_norm": 0.4475187063217163, "learning_rate": 0.00045604114296614583, "loss": 1.8194, "step": 29385 }, { "epoch": 0.98, "grad_norm": 0.4434901773929596, "learning_rate": 0.0004560322158989951, "loss": 1.8126, "step": 29386 }, { "epoch": 0.98, "grad_norm": 0.4403788149356842, "learning_rate": 0.0004560232886424443, "loss": 1.8021, "step": 29387 }, { "epoch": 0.98, "grad_norm": 0.4660447835922241, "learning_rate": 0.0004560143611965042, "loss": 1.8853, "step": 29388 }, { "epoch": 0.98, "grad_norm": 0.645095705986023, "learning_rate": 0.0004560054335611857, "loss": 1.8919, "step": 29389 }, { "epoch": 0.98, "grad_norm": 0.4353046417236328, "learning_rate": 0.00045599650573649957, "loss": 1.7681, "step": 29390 }, { "epoch": 0.98, "grad_norm": 0.45176059007644653, "learning_rate": 0.0004559875777224566, "loss": 1.7797, "step": 29391 }, { "epoch": 0.98, "grad_norm": 0.4526508152484894, "learning_rate": 0.0004559786495190678, "loss": 1.7865, "step": 29392 }, { "epoch": 0.98, "grad_norm": 0.4501591920852661, "learning_rate": 0.0004559697211263439, "loss": 1.7665, "step": 29393 }, { "epoch": 0.98, "grad_norm": 0.44674980640411377, "learning_rate": 0.00045596079254429565, "loss": 1.807, "step": 29394 }, { "epoch": 0.98, "grad_norm": 0.4466778635978699, "learning_rate": 0.000455951863772934, "loss": 1.7764, "step": 29395 }, { "epoch": 0.98, "grad_norm": 0.44961243867874146, "learning_rate": 0.0004559429348122698, "loss": 1.7319, "step": 29396 }, { "epoch": 0.98, "grad_norm": 0.48977163434028625, "learning_rate": 0.0004559340056623139, "loss": 1.8316, "step": 29397 }, { "epoch": 0.98, "grad_norm": 0.45435526967048645, "learning_rate": 0.0004559250763230769, "loss": 1.8513, "step": 29398 }, { "epoch": 0.98, "grad_norm": 0.44411391019821167, "learning_rate": 0.00045591614679457005, "loss": 1.8464, "step": 29399 }, { "epoch": 0.98, "grad_norm": 0.44774866104125977, "learning_rate": 0.0004559072170768039, "loss": 1.8394, "step": 29400 }, { "epoch": 0.98, "grad_norm": 0.4438106119632721, "learning_rate": 0.0004558982871697893, "loss": 1.7993, "step": 29401 }, { "epoch": 0.98, "grad_norm": 0.45289522409439087, "learning_rate": 0.0004558893570735372, "loss": 1.8106, "step": 29402 }, { "epoch": 0.98, "grad_norm": 0.45751839876174927, "learning_rate": 0.0004558804267880583, "loss": 1.8575, "step": 29403 }, { "epoch": 0.98, "grad_norm": 0.4486309289932251, "learning_rate": 0.0004558714963133637, "loss": 1.8249, "step": 29404 }, { "epoch": 0.98, "grad_norm": 0.47019127011299133, "learning_rate": 0.0004558625656494641, "loss": 1.866, "step": 29405 }, { "epoch": 0.98, "grad_norm": 0.4572546184062958, "learning_rate": 0.0004558536347963701, "loss": 1.817, "step": 29406 }, { "epoch": 0.98, "grad_norm": 0.4578937590122223, "learning_rate": 0.0004558447037540929, "loss": 1.76, "step": 29407 }, { "epoch": 0.98, "grad_norm": 0.45664581656455994, "learning_rate": 0.00045583577252264305, "loss": 1.7094, "step": 29408 }, { "epoch": 0.98, "grad_norm": 0.42871853709220886, "learning_rate": 0.00045582684110203166, "loss": 1.806, "step": 29409 }, { "epoch": 0.98, "grad_norm": 0.45318910479545593, "learning_rate": 0.00045581790949226944, "loss": 1.8301, "step": 29410 }, { "epoch": 0.98, "grad_norm": 0.538669228553772, "learning_rate": 0.00045580897769336717, "loss": 1.8174, "step": 29411 }, { "epoch": 0.98, "grad_norm": 0.4709874093532562, "learning_rate": 0.00045580004570533575, "loss": 1.7736, "step": 29412 }, { "epoch": 0.98, "grad_norm": 0.4546288847923279, "learning_rate": 0.00045579111352818613, "loss": 1.8301, "step": 29413 }, { "epoch": 0.98, "grad_norm": 0.4484154284000397, "learning_rate": 0.00045578218116192894, "loss": 1.8128, "step": 29414 }, { "epoch": 0.98, "grad_norm": 0.47931233048439026, "learning_rate": 0.0004557732486065753, "loss": 1.735, "step": 29415 }, { "epoch": 0.98, "grad_norm": 0.45645156502723694, "learning_rate": 0.00045576431586213565, "loss": 1.7398, "step": 29416 }, { "epoch": 0.98, "grad_norm": 0.4590300917625427, "learning_rate": 0.0004557553829286213, "loss": 1.7219, "step": 29417 }, { "epoch": 0.98, "grad_norm": 0.4737662076950073, "learning_rate": 0.0004557464498060427, "loss": 1.8065, "step": 29418 }, { "epoch": 0.98, "grad_norm": 0.4460119605064392, "learning_rate": 0.00045573751649441094, "loss": 1.7671, "step": 29419 }, { "epoch": 0.98, "grad_norm": 0.45063596963882446, "learning_rate": 0.0004557285829937368, "loss": 1.809, "step": 29420 }, { "epoch": 0.98, "grad_norm": 0.45962363481521606, "learning_rate": 0.00045571964930403105, "loss": 1.8265, "step": 29421 }, { "epoch": 0.98, "grad_norm": 0.4560146629810333, "learning_rate": 0.0004557107154253047, "loss": 1.8465, "step": 29422 }, { "epoch": 0.98, "grad_norm": 0.4507629871368408, "learning_rate": 0.00045570178135756825, "loss": 1.8249, "step": 29423 }, { "epoch": 0.98, "grad_norm": 0.44368603825569153, "learning_rate": 0.000455692847100833, "loss": 1.7562, "step": 29424 }, { "epoch": 0.98, "grad_norm": 0.46704116463661194, "learning_rate": 0.0004556839126551095, "loss": 1.714, "step": 29425 }, { "epoch": 0.98, "grad_norm": 0.4532977044582367, "learning_rate": 0.0004556749780204086, "loss": 1.8079, "step": 29426 }, { "epoch": 0.98, "grad_norm": 0.45824456214904785, "learning_rate": 0.0004556660431967414, "loss": 1.8482, "step": 29427 }, { "epoch": 0.98, "grad_norm": 0.4616895914077759, "learning_rate": 0.00045565710818411834, "loss": 1.8241, "step": 29428 }, { "epoch": 0.98, "grad_norm": 0.44427943229675293, "learning_rate": 0.0004556481729825506, "loss": 1.7894, "step": 29429 }, { "epoch": 0.98, "grad_norm": 0.44176673889160156, "learning_rate": 0.0004556392375920488, "loss": 1.7912, "step": 29430 }, { "epoch": 0.98, "grad_norm": 0.444796621799469, "learning_rate": 0.0004556303020126241, "loss": 1.8316, "step": 29431 }, { "epoch": 0.98, "grad_norm": 0.44897598028182983, "learning_rate": 0.00045562136624428703, "loss": 1.809, "step": 29432 }, { "epoch": 0.98, "grad_norm": 0.44377458095550537, "learning_rate": 0.0004556124302870486, "loss": 1.777, "step": 29433 }, { "epoch": 0.98, "grad_norm": 0.4489571750164032, "learning_rate": 0.0004556034941409196, "loss": 1.8021, "step": 29434 }, { "epoch": 0.98, "grad_norm": 0.455317884683609, "learning_rate": 0.00045559455780591076, "loss": 1.8398, "step": 29435 }, { "epoch": 0.98, "grad_norm": 0.44221535325050354, "learning_rate": 0.0004555856212820332, "loss": 1.8164, "step": 29436 }, { "epoch": 0.98, "grad_norm": 0.44111356139183044, "learning_rate": 0.00045557668456929764, "loss": 1.8112, "step": 29437 }, { "epoch": 0.98, "grad_norm": 0.4501510560512543, "learning_rate": 0.0004555677476677147, "loss": 1.7901, "step": 29438 }, { "epoch": 0.98, "grad_norm": 0.47056007385253906, "learning_rate": 0.00045555881057729566, "loss": 1.8132, "step": 29439 }, { "epoch": 0.98, "grad_norm": 0.46439114212989807, "learning_rate": 0.00045554987329805103, "loss": 1.852, "step": 29440 }, { "epoch": 0.98, "grad_norm": 0.43869549036026, "learning_rate": 0.00045554093582999183, "loss": 1.7332, "step": 29441 }, { "epoch": 0.98, "grad_norm": 0.4416508376598358, "learning_rate": 0.00045553199817312886, "loss": 1.8358, "step": 29442 }, { "epoch": 0.98, "grad_norm": 0.45164594054222107, "learning_rate": 0.00045552306032747294, "loss": 1.7967, "step": 29443 }, { "epoch": 0.98, "grad_norm": 0.46323511004447937, "learning_rate": 0.0004555141222930349, "loss": 1.7125, "step": 29444 }, { "epoch": 0.98, "grad_norm": 0.43790942430496216, "learning_rate": 0.00045550518406982555, "loss": 1.7821, "step": 29445 }, { "epoch": 0.98, "grad_norm": 0.4558887183666229, "learning_rate": 0.000455496245657856, "loss": 1.8301, "step": 29446 }, { "epoch": 0.98, "grad_norm": 0.4837619364261627, "learning_rate": 0.00045548730705713685, "loss": 1.7856, "step": 29447 }, { "epoch": 0.98, "grad_norm": 0.4636678695678711, "learning_rate": 0.00045547836826767904, "loss": 1.8456, "step": 29448 }, { "epoch": 0.98, "grad_norm": 0.4523804485797882, "learning_rate": 0.0004554694292894934, "loss": 1.7988, "step": 29449 }, { "epoch": 0.98, "grad_norm": 0.44047456979751587, "learning_rate": 0.00045546049012259066, "loss": 1.7911, "step": 29450 }, { "epoch": 0.98, "grad_norm": 0.4490441083908081, "learning_rate": 0.0004554515507669819, "loss": 1.8214, "step": 29451 }, { "epoch": 0.98, "grad_norm": 0.4623280465602875, "learning_rate": 0.00045544261122267785, "loss": 1.8038, "step": 29452 }, { "epoch": 0.98, "grad_norm": 0.4688985347747803, "learning_rate": 0.0004554336714896893, "loss": 1.7167, "step": 29453 }, { "epoch": 0.98, "grad_norm": 0.4499496519565582, "learning_rate": 0.0004554247315680273, "loss": 1.73, "step": 29454 }, { "epoch": 0.98, "grad_norm": 0.5231660008430481, "learning_rate": 0.0004554157914577026, "loss": 1.7931, "step": 29455 }, { "epoch": 0.98, "grad_norm": 0.44250282645225525, "learning_rate": 0.00045540685115872585, "loss": 1.761, "step": 29456 }, { "epoch": 0.98, "grad_norm": 0.4433037340641022, "learning_rate": 0.00045539791067110815, "loss": 1.7941, "step": 29457 }, { "epoch": 0.98, "grad_norm": 0.4601694345474243, "learning_rate": 0.0004553889699948603, "loss": 1.833, "step": 29458 }, { "epoch": 0.98, "grad_norm": 0.4349599778652191, "learning_rate": 0.00045538002912999325, "loss": 1.7916, "step": 29459 }, { "epoch": 0.98, "grad_norm": 0.43785133957862854, "learning_rate": 0.00045537108807651753, "loss": 1.7646, "step": 29460 }, { "epoch": 0.98, "grad_norm": 0.531609296798706, "learning_rate": 0.0004553621468344444, "loss": 1.7964, "step": 29461 }, { "epoch": 0.98, "grad_norm": 0.43929845094680786, "learning_rate": 0.0004553532054037844, "loss": 1.7756, "step": 29462 }, { "epoch": 0.98, "grad_norm": 0.4613898992538452, "learning_rate": 0.00045534426378454856, "loss": 1.7725, "step": 29463 }, { "epoch": 0.98, "grad_norm": 0.4426165223121643, "learning_rate": 0.0004553353219767475, "loss": 1.7916, "step": 29464 }, { "epoch": 0.98, "grad_norm": 0.4232194721698761, "learning_rate": 0.0004553263799803924, "loss": 1.8007, "step": 29465 }, { "epoch": 0.98, "grad_norm": 0.4465310573577881, "learning_rate": 0.00045531743779549403, "loss": 1.8554, "step": 29466 }, { "epoch": 0.98, "grad_norm": 0.4533053934574127, "learning_rate": 0.00045530849542206305, "loss": 1.8361, "step": 29467 }, { "epoch": 0.98, "grad_norm": 0.45136144757270813, "learning_rate": 0.00045529955286011055, "loss": 1.8114, "step": 29468 }, { "epoch": 0.98, "grad_norm": 0.4538434147834778, "learning_rate": 0.00045529061010964717, "loss": 1.9145, "step": 29469 }, { "epoch": 0.98, "grad_norm": 0.45087555050849915, "learning_rate": 0.0004552816671706838, "loss": 1.897, "step": 29470 }, { "epoch": 0.98, "grad_norm": 0.4553696811199188, "learning_rate": 0.0004552727240432315, "loss": 1.781, "step": 29471 }, { "epoch": 0.98, "grad_norm": 0.44408512115478516, "learning_rate": 0.000455263780727301, "loss": 1.7299, "step": 29472 }, { "epoch": 0.98, "grad_norm": 0.45758992433547974, "learning_rate": 0.00045525483722290314, "loss": 1.8266, "step": 29473 }, { "epoch": 0.98, "grad_norm": 0.44610831141471863, "learning_rate": 0.0004552458935300487, "loss": 1.7645, "step": 29474 }, { "epoch": 0.98, "grad_norm": 0.45384278893470764, "learning_rate": 0.0004552369496487487, "loss": 1.7441, "step": 29475 }, { "epoch": 0.98, "grad_norm": 0.4602940082550049, "learning_rate": 0.0004552280055790139, "loss": 1.8536, "step": 29476 }, { "epoch": 0.98, "grad_norm": 0.4384421706199646, "learning_rate": 0.00045521906132085517, "loss": 1.7731, "step": 29477 }, { "epoch": 0.98, "grad_norm": 0.45415815711021423, "learning_rate": 0.0004552101168742833, "loss": 1.8331, "step": 29478 }, { "epoch": 0.98, "grad_norm": 0.4470931589603424, "learning_rate": 0.00045520117223930926, "loss": 1.8094, "step": 29479 }, { "epoch": 0.98, "grad_norm": 0.46451249718666077, "learning_rate": 0.0004551922274159439, "loss": 1.849, "step": 29480 }, { "epoch": 0.98, "grad_norm": 0.46190714836120605, "learning_rate": 0.000455183282404198, "loss": 1.8058, "step": 29481 }, { "epoch": 0.98, "grad_norm": 0.4472101926803589, "learning_rate": 0.00045517433720408256, "loss": 1.7786, "step": 29482 }, { "epoch": 0.98, "grad_norm": 0.4654540717601776, "learning_rate": 0.00045516539181560817, "loss": 1.8274, "step": 29483 }, { "epoch": 0.98, "grad_norm": 0.45614495873451233, "learning_rate": 0.00045515644623878607, "loss": 1.777, "step": 29484 }, { "epoch": 0.98, "grad_norm": 0.4457802474498749, "learning_rate": 0.00045514750047362666, "loss": 1.8439, "step": 29485 }, { "epoch": 0.98, "grad_norm": 0.4514268934726715, "learning_rate": 0.00045513855452014124, "loss": 1.7894, "step": 29486 }, { "epoch": 0.98, "grad_norm": 0.45206156373023987, "learning_rate": 0.00045512960837834035, "loss": 1.7678, "step": 29487 }, { "epoch": 0.98, "grad_norm": 0.462185800075531, "learning_rate": 0.0004551206620482351, "loss": 1.8362, "step": 29488 }, { "epoch": 0.98, "grad_norm": 0.4389978349208832, "learning_rate": 0.0004551117155298362, "loss": 1.8043, "step": 29489 }, { "epoch": 0.98, "grad_norm": 0.6417705416679382, "learning_rate": 0.0004551027688231544, "loss": 1.7814, "step": 29490 }, { "epoch": 0.98, "grad_norm": 0.4718402624130249, "learning_rate": 0.0004550938219282008, "loss": 1.8598, "step": 29491 }, { "epoch": 0.98, "grad_norm": 0.44606655836105347, "learning_rate": 0.00045508487484498614, "loss": 1.7796, "step": 29492 }, { "epoch": 0.98, "grad_norm": 0.45043304562568665, "learning_rate": 0.00045507592757352124, "loss": 1.8224, "step": 29493 }, { "epoch": 0.98, "grad_norm": 0.4542801082134247, "learning_rate": 0.000455066980113817, "loss": 1.8194, "step": 29494 }, { "epoch": 0.98, "grad_norm": 0.4459483027458191, "learning_rate": 0.00045505803246588436, "loss": 1.8627, "step": 29495 }, { "epoch": 0.98, "grad_norm": 0.4455430209636688, "learning_rate": 0.00045504908462973417, "loss": 1.7954, "step": 29496 }, { "epoch": 0.98, "grad_norm": 0.44938352704048157, "learning_rate": 0.00045504013660537716, "loss": 1.7882, "step": 29497 }, { "epoch": 0.98, "grad_norm": 0.443013459444046, "learning_rate": 0.0004550311883928243, "loss": 1.7939, "step": 29498 }, { "epoch": 0.98, "grad_norm": 0.4408108592033386, "learning_rate": 0.00045502223999208634, "loss": 1.7818, "step": 29499 }, { "epoch": 0.98, "grad_norm": 0.4486662447452545, "learning_rate": 0.0004550132914031743, "loss": 1.7548, "step": 29500 }, { "epoch": 0.98, "grad_norm": 0.46920427680015564, "learning_rate": 0.00045500434262609894, "loss": 1.8374, "step": 29501 }, { "epoch": 0.98, "grad_norm": 0.42401278018951416, "learning_rate": 0.00045499539366087113, "loss": 1.8229, "step": 29502 }, { "epoch": 0.98, "grad_norm": 0.4450114965438843, "learning_rate": 0.00045498644450750177, "loss": 1.8268, "step": 29503 }, { "epoch": 0.98, "grad_norm": 0.43944457173347473, "learning_rate": 0.00045497749516600175, "loss": 1.7394, "step": 29504 }, { "epoch": 0.98, "grad_norm": 0.45876720547676086, "learning_rate": 0.0004549685456363818, "loss": 1.7918, "step": 29505 }, { "epoch": 0.98, "grad_norm": 0.4506036341190338, "learning_rate": 0.0004549595959186529, "loss": 1.7742, "step": 29506 }, { "epoch": 0.98, "grad_norm": 0.4463972747325897, "learning_rate": 0.0004549506460128259, "loss": 1.8371, "step": 29507 }, { "epoch": 0.98, "grad_norm": 0.45756828784942627, "learning_rate": 0.0004549416959189116, "loss": 1.8518, "step": 29508 }, { "epoch": 0.98, "grad_norm": 0.44816991686820984, "learning_rate": 0.000454932745636921, "loss": 1.8185, "step": 29509 }, { "epoch": 0.98, "grad_norm": 0.4707910120487213, "learning_rate": 0.0004549237951668648, "loss": 1.8068, "step": 29510 }, { "epoch": 0.98, "grad_norm": 0.8522399663925171, "learning_rate": 0.0004549148445087539, "loss": 1.8811, "step": 29511 }, { "epoch": 0.98, "grad_norm": 1.4196900129318237, "learning_rate": 0.0004549058936625993, "loss": 1.9507, "step": 29512 }, { "epoch": 0.98, "grad_norm": 0.4503522217273712, "learning_rate": 0.00045489694262841173, "loss": 1.8412, "step": 29513 }, { "epoch": 0.98, "grad_norm": 0.4388046860694885, "learning_rate": 0.00045488799140620215, "loss": 1.7986, "step": 29514 }, { "epoch": 0.98, "grad_norm": 0.4550090730190277, "learning_rate": 0.0004548790399959813, "loss": 1.7384, "step": 29515 }, { "epoch": 0.98, "grad_norm": 0.44103363156318665, "learning_rate": 0.00045487008839776005, "loss": 1.8646, "step": 29516 }, { "epoch": 0.98, "grad_norm": 0.46134260296821594, "learning_rate": 0.00045486113661154953, "loss": 1.6845, "step": 29517 }, { "epoch": 0.98, "grad_norm": 0.4220552146434784, "learning_rate": 0.00045485218463736025, "loss": 1.8053, "step": 29518 }, { "epoch": 0.98, "grad_norm": 0.4382162392139435, "learning_rate": 0.00045484323247520323, "loss": 1.7959, "step": 29519 }, { "epoch": 0.98, "grad_norm": 0.43653252720832825, "learning_rate": 0.00045483428012508945, "loss": 1.7434, "step": 29520 }, { "epoch": 0.98, "grad_norm": 0.4616120457649231, "learning_rate": 0.0004548253275870296, "loss": 1.8432, "step": 29521 }, { "epoch": 0.98, "grad_norm": 0.49116015434265137, "learning_rate": 0.0004548163748610346, "loss": 1.869, "step": 29522 }, { "epoch": 0.98, "grad_norm": 0.4516546428203583, "learning_rate": 0.0004548074219471154, "loss": 1.7612, "step": 29523 }, { "epoch": 0.98, "grad_norm": 0.4487350881099701, "learning_rate": 0.00045479846884528266, "loss": 1.7606, "step": 29524 }, { "epoch": 0.98, "grad_norm": 0.4561789333820343, "learning_rate": 0.00045478951555554746, "loss": 1.8199, "step": 29525 }, { "epoch": 0.98, "grad_norm": 0.464622437953949, "learning_rate": 0.0004547805620779207, "loss": 1.805, "step": 29526 }, { "epoch": 0.98, "grad_norm": 0.4433201849460602, "learning_rate": 0.00045477160841241303, "loss": 1.7724, "step": 29527 }, { "epoch": 0.98, "grad_norm": 0.43540331721305847, "learning_rate": 0.0004547626545590355, "loss": 1.8545, "step": 29528 }, { "epoch": 0.98, "grad_norm": 0.45658642053604126, "learning_rate": 0.0004547537005177989, "loss": 1.8905, "step": 29529 }, { "epoch": 0.98, "grad_norm": 0.4607885777950287, "learning_rate": 0.000454744746288714, "loss": 1.764, "step": 29530 }, { "epoch": 0.98, "grad_norm": 0.4391157627105713, "learning_rate": 0.0004547357918717919, "loss": 1.7858, "step": 29531 }, { "epoch": 0.98, "grad_norm": 0.4447687268257141, "learning_rate": 0.0004547268372670433, "loss": 1.7981, "step": 29532 }, { "epoch": 0.98, "grad_norm": 0.4671028256416321, "learning_rate": 0.00045471788247447906, "loss": 1.7763, "step": 29533 }, { "epoch": 0.98, "grad_norm": 0.4530639946460724, "learning_rate": 0.0004547089274941102, "loss": 1.8613, "step": 29534 }, { "epoch": 0.98, "grad_norm": 0.45116302371025085, "learning_rate": 0.00045469997232594744, "loss": 1.8055, "step": 29535 }, { "epoch": 0.98, "grad_norm": 0.4680338203907013, "learning_rate": 0.0004546910169700018, "loss": 1.8491, "step": 29536 }, { "epoch": 0.98, "grad_norm": 0.44660165905952454, "learning_rate": 0.00045468206142628407, "loss": 1.8346, "step": 29537 }, { "epoch": 0.98, "grad_norm": 0.45680102705955505, "learning_rate": 0.00045467310569480496, "loss": 1.8318, "step": 29538 }, { "epoch": 0.98, "grad_norm": 0.4869769215583801, "learning_rate": 0.00045466414977557554, "loss": 1.7582, "step": 29539 }, { "epoch": 0.98, "grad_norm": 0.4450622797012329, "learning_rate": 0.00045465519366860664, "loss": 1.8295, "step": 29540 }, { "epoch": 0.98, "grad_norm": 0.452729195356369, "learning_rate": 0.0004546462373739092, "loss": 1.884, "step": 29541 }, { "epoch": 0.98, "grad_norm": 0.4358969032764435, "learning_rate": 0.0004546372808914939, "loss": 1.8079, "step": 29542 }, { "epoch": 0.98, "grad_norm": 0.4628795087337494, "learning_rate": 0.0004546283242213718, "loss": 1.7912, "step": 29543 }, { "epoch": 0.98, "grad_norm": 0.44830769300460815, "learning_rate": 0.0004546193673635537, "loss": 1.9016, "step": 29544 }, { "epoch": 0.98, "grad_norm": 0.45659926533699036, "learning_rate": 0.0004546104103180504, "loss": 1.7727, "step": 29545 }, { "epoch": 0.98, "grad_norm": 0.46434536576271057, "learning_rate": 0.00045460145308487296, "loss": 1.7925, "step": 29546 }, { "epoch": 0.98, "grad_norm": 0.49778932332992554, "learning_rate": 0.00045459249566403204, "loss": 1.7693, "step": 29547 }, { "epoch": 0.98, "grad_norm": 0.44301244616508484, "learning_rate": 0.0004545835380555386, "loss": 1.8252, "step": 29548 }, { "epoch": 0.98, "grad_norm": 0.4481489658355713, "learning_rate": 0.00045457458025940353, "loss": 1.7924, "step": 29549 }, { "epoch": 0.98, "grad_norm": 0.46992844343185425, "learning_rate": 0.0004545656222756377, "loss": 1.7834, "step": 29550 }, { "epoch": 0.98, "grad_norm": 0.4672147035598755, "learning_rate": 0.00045455666410425206, "loss": 1.8486, "step": 29551 }, { "epoch": 0.98, "grad_norm": 0.4726889133453369, "learning_rate": 0.0004545477057452573, "loss": 1.7696, "step": 29552 }, { "epoch": 0.98, "grad_norm": 0.4542383551597595, "learning_rate": 0.0004545387471986645, "loss": 1.793, "step": 29553 }, { "epoch": 0.98, "grad_norm": 0.4643782079219818, "learning_rate": 0.0004545297884644843, "loss": 1.7956, "step": 29554 }, { "epoch": 0.98, "grad_norm": 0.46646881103515625, "learning_rate": 0.0004545208295427278, "loss": 1.7381, "step": 29555 }, { "epoch": 0.98, "grad_norm": 0.4607292711734772, "learning_rate": 0.0004545118704334057, "loss": 1.8546, "step": 29556 }, { "epoch": 0.98, "grad_norm": 0.4442656934261322, "learning_rate": 0.00045450291113652904, "loss": 1.7986, "step": 29557 }, { "epoch": 0.98, "grad_norm": 0.4860931932926178, "learning_rate": 0.0004544939516521086, "loss": 1.8203, "step": 29558 }, { "epoch": 0.98, "grad_norm": 0.45367079973220825, "learning_rate": 0.00045448499198015525, "loss": 1.7362, "step": 29559 }, { "epoch": 0.98, "grad_norm": 0.4604632556438446, "learning_rate": 0.0004544760321206798, "loss": 1.8333, "step": 29560 }, { "epoch": 0.98, "grad_norm": 0.4511682689189911, "learning_rate": 0.0004544670720736933, "loss": 1.7949, "step": 29561 }, { "epoch": 0.98, "grad_norm": 0.44435232877731323, "learning_rate": 0.00045445811183920655, "loss": 1.8097, "step": 29562 }, { "epoch": 0.98, "grad_norm": 0.4501168131828308, "learning_rate": 0.00045444915141723037, "loss": 1.8191, "step": 29563 }, { "epoch": 0.98, "grad_norm": 0.4552648365497589, "learning_rate": 0.00045444019080777574, "loss": 1.8353, "step": 29564 }, { "epoch": 0.98, "grad_norm": 0.4392910599708557, "learning_rate": 0.0004544312300108534, "loss": 1.7041, "step": 29565 }, { "epoch": 0.98, "grad_norm": 0.4567071497440338, "learning_rate": 0.00045442226902647434, "loss": 1.7785, "step": 29566 }, { "epoch": 0.98, "grad_norm": 0.4438157379627228, "learning_rate": 0.00045441330785464935, "loss": 1.7955, "step": 29567 }, { "epoch": 0.98, "grad_norm": 0.4466065764427185, "learning_rate": 0.00045440434649538934, "loss": 1.8864, "step": 29568 }, { "epoch": 0.98, "grad_norm": 0.4478532671928406, "learning_rate": 0.0004543953849487053, "loss": 1.7736, "step": 29569 }, { "epoch": 0.98, "grad_norm": 0.45987674593925476, "learning_rate": 0.0004543864232146079, "loss": 1.8446, "step": 29570 }, { "epoch": 0.98, "grad_norm": 0.45033615827560425, "learning_rate": 0.00045437746129310826, "loss": 1.809, "step": 29571 }, { "epoch": 0.98, "grad_norm": 0.45775550603866577, "learning_rate": 0.0004543684991842171, "loss": 1.878, "step": 29572 }, { "epoch": 0.98, "grad_norm": 0.44469955563545227, "learning_rate": 0.0004543595368879452, "loss": 1.7697, "step": 29573 }, { "epoch": 0.98, "grad_norm": 0.4471694231033325, "learning_rate": 0.00045435057440430365, "loss": 1.7876, "step": 29574 }, { "epoch": 0.98, "grad_norm": 0.4611969292163849, "learning_rate": 0.00045434161173330326, "loss": 1.721, "step": 29575 }, { "epoch": 0.98, "grad_norm": 0.4678970277309418, "learning_rate": 0.0004543326488749549, "loss": 1.893, "step": 29576 }, { "epoch": 0.98, "grad_norm": 0.461324542760849, "learning_rate": 0.00045432368582926943, "loss": 1.8424, "step": 29577 }, { "epoch": 0.98, "grad_norm": 0.44381284713745117, "learning_rate": 0.0004543147225962577, "loss": 1.7179, "step": 29578 }, { "epoch": 0.98, "grad_norm": 0.4447699189186096, "learning_rate": 0.0004543057591759307, "loss": 1.7756, "step": 29579 }, { "epoch": 0.98, "grad_norm": 0.46448954939842224, "learning_rate": 0.00045429679556829926, "loss": 1.8022, "step": 29580 }, { "epoch": 0.98, "grad_norm": 0.4517310559749603, "learning_rate": 0.0004542878317733742, "loss": 1.8309, "step": 29581 }, { "epoch": 0.98, "grad_norm": 0.45090243220329285, "learning_rate": 0.0004542788677911664, "loss": 1.8042, "step": 29582 }, { "epoch": 0.98, "grad_norm": 0.44644173979759216, "learning_rate": 0.0004542699036216869, "loss": 1.8167, "step": 29583 }, { "epoch": 0.98, "grad_norm": 0.45315423607826233, "learning_rate": 0.00045426093926494637, "loss": 1.7974, "step": 29584 }, { "epoch": 0.98, "grad_norm": 0.4375012218952179, "learning_rate": 0.00045425197472095583, "loss": 1.8524, "step": 29585 }, { "epoch": 0.98, "grad_norm": 0.4484083652496338, "learning_rate": 0.0004542430099897262, "loss": 1.8615, "step": 29586 }, { "epoch": 0.98, "grad_norm": 0.4382498562335968, "learning_rate": 0.0004542340450712681, "loss": 1.779, "step": 29587 }, { "epoch": 0.98, "grad_norm": 0.4540705382823944, "learning_rate": 0.00045422507996559274, "loss": 1.7468, "step": 29588 }, { "epoch": 0.98, "grad_norm": 0.45426055788993835, "learning_rate": 0.0004542161146727108, "loss": 1.7578, "step": 29589 }, { "epoch": 0.98, "grad_norm": 0.4443879723548889, "learning_rate": 0.0004542071491926333, "loss": 1.7391, "step": 29590 }, { "epoch": 0.98, "grad_norm": 0.45028156042099, "learning_rate": 0.0004541981835253709, "loss": 1.8572, "step": 29591 }, { "epoch": 0.98, "grad_norm": 0.4407826066017151, "learning_rate": 0.0004541892176709347, "loss": 1.8078, "step": 29592 }, { "epoch": 0.98, "grad_norm": 0.45613107085227966, "learning_rate": 0.00045418025162933547, "loss": 1.7606, "step": 29593 }, { "epoch": 0.98, "grad_norm": 0.4566732347011566, "learning_rate": 0.0004541712854005842, "loss": 1.8082, "step": 29594 }, { "epoch": 0.98, "grad_norm": 0.45974892377853394, "learning_rate": 0.00045416231898469167, "loss": 1.8644, "step": 29595 }, { "epoch": 0.98, "grad_norm": 0.4460458755493164, "learning_rate": 0.00045415335238166877, "loss": 1.7928, "step": 29596 }, { "epoch": 0.98, "grad_norm": 0.4578416645526886, "learning_rate": 0.0004541443855915265, "loss": 1.8398, "step": 29597 }, { "epoch": 0.98, "grad_norm": 0.45202773809432983, "learning_rate": 0.0004541354186142755, "loss": 1.8756, "step": 29598 }, { "epoch": 0.98, "grad_norm": 0.43556562066078186, "learning_rate": 0.000454126451449927, "loss": 1.7365, "step": 29599 }, { "epoch": 0.98, "grad_norm": 0.43643611669540405, "learning_rate": 0.0004541174840984916, "loss": 1.81, "step": 29600 }, { "epoch": 0.98, "grad_norm": 0.4420241117477417, "learning_rate": 0.0004541085165599803, "loss": 1.7178, "step": 29601 }, { "epoch": 0.98, "grad_norm": 0.46570345759391785, "learning_rate": 0.0004540995488344039, "loss": 1.8423, "step": 29602 }, { "epoch": 0.98, "grad_norm": 0.4364038109779358, "learning_rate": 0.00045409058092177345, "loss": 1.7404, "step": 29603 }, { "epoch": 0.98, "grad_norm": 0.4484257996082306, "learning_rate": 0.0004540816128220997, "loss": 1.7692, "step": 29604 }, { "epoch": 0.98, "grad_norm": 0.46684181690216064, "learning_rate": 0.00045407264453539356, "loss": 1.7836, "step": 29605 }, { "epoch": 0.98, "grad_norm": 0.47870340943336487, "learning_rate": 0.00045406367606166606, "loss": 1.7774, "step": 29606 }, { "epoch": 0.99, "grad_norm": 0.4671614170074463, "learning_rate": 0.0004540547074009277, "loss": 1.8065, "step": 29607 }, { "epoch": 0.99, "grad_norm": 0.43548208475112915, "learning_rate": 0.0004540457385531899, "loss": 1.685, "step": 29608 }, { "epoch": 0.99, "grad_norm": 0.4725225567817688, "learning_rate": 0.0004540367695184631, "loss": 1.8536, "step": 29609 }, { "epoch": 0.99, "grad_norm": 0.4517793655395508, "learning_rate": 0.0004540278002967584, "loss": 1.8279, "step": 29610 }, { "epoch": 0.99, "grad_norm": 0.44545602798461914, "learning_rate": 0.0004540188308880866, "loss": 1.7827, "step": 29611 }, { "epoch": 0.99, "grad_norm": 0.43860816955566406, "learning_rate": 0.00045400986129245864, "loss": 1.8478, "step": 29612 }, { "epoch": 0.99, "grad_norm": 0.4540325403213501, "learning_rate": 0.0004540008915098856, "loss": 1.7893, "step": 29613 }, { "epoch": 0.99, "grad_norm": 0.4527161121368408, "learning_rate": 0.0004539919215403779, "loss": 1.8208, "step": 29614 }, { "epoch": 0.99, "grad_norm": 0.44871264696121216, "learning_rate": 0.00045398295138394685, "loss": 1.8758, "step": 29615 }, { "epoch": 0.99, "grad_norm": 0.4309612810611725, "learning_rate": 0.0004539739810406031, "loss": 1.8224, "step": 29616 }, { "epoch": 0.99, "grad_norm": 0.4618963897228241, "learning_rate": 0.0004539650105103577, "loss": 1.7572, "step": 29617 }, { "epoch": 0.99, "grad_norm": 0.45031923055648804, "learning_rate": 0.00045395603979322144, "loss": 1.7568, "step": 29618 }, { "epoch": 0.99, "grad_norm": 0.8973989486694336, "learning_rate": 0.0004539470688892052, "loss": 1.8915, "step": 29619 }, { "epoch": 0.99, "grad_norm": 0.4428872764110565, "learning_rate": 0.0004539380977983199, "loss": 1.853, "step": 29620 }, { "epoch": 0.99, "grad_norm": 0.4373674988746643, "learning_rate": 0.00045392912652057646, "loss": 1.7776, "step": 29621 }, { "epoch": 0.99, "grad_norm": 0.4539867639541626, "learning_rate": 0.0004539201550559857, "loss": 1.8264, "step": 29622 }, { "epoch": 0.99, "grad_norm": 0.45125100016593933, "learning_rate": 0.0004539111834045586, "loss": 1.8219, "step": 29623 }, { "epoch": 0.99, "grad_norm": 0.4668910503387451, "learning_rate": 0.00045390221156630604, "loss": 1.8351, "step": 29624 }, { "epoch": 0.99, "grad_norm": 0.433188796043396, "learning_rate": 0.0004538932395412387, "loss": 1.8086, "step": 29625 }, { "epoch": 0.99, "grad_norm": 0.4581879675388336, "learning_rate": 0.0004538842673293679, "loss": 1.8526, "step": 29626 }, { "epoch": 0.99, "grad_norm": 0.46365225315093994, "learning_rate": 0.0004538752949307041, "loss": 1.7193, "step": 29627 }, { "epoch": 0.99, "grad_norm": 0.4587680995464325, "learning_rate": 0.0004538663223452583, "loss": 1.7938, "step": 29628 }, { "epoch": 0.99, "grad_norm": 0.4416976869106293, "learning_rate": 0.00045385734957304163, "loss": 1.8171, "step": 29629 }, { "epoch": 0.99, "grad_norm": 0.46751198172569275, "learning_rate": 0.0004538483766140647, "loss": 1.8678, "step": 29630 }, { "epoch": 0.99, "grad_norm": 0.47167813777923584, "learning_rate": 0.0004538394034683386, "loss": 1.8054, "step": 29631 }, { "epoch": 0.99, "grad_norm": 0.45616763830184937, "learning_rate": 0.000453830430135874, "loss": 1.8275, "step": 29632 }, { "epoch": 0.99, "grad_norm": 0.45086559653282166, "learning_rate": 0.000453821456616682, "loss": 1.7867, "step": 29633 }, { "epoch": 0.99, "grad_norm": 0.45520085096359253, "learning_rate": 0.0004538124829107734, "loss": 1.8092, "step": 29634 }, { "epoch": 0.99, "grad_norm": 0.4897831976413727, "learning_rate": 0.0004538035090181592, "loss": 1.7455, "step": 29635 }, { "epoch": 0.99, "grad_norm": 0.4576951861381531, "learning_rate": 0.00045379453493885004, "loss": 1.849, "step": 29636 }, { "epoch": 0.99, "grad_norm": 0.44762811064720154, "learning_rate": 0.00045378556067285705, "loss": 1.7986, "step": 29637 }, { "epoch": 0.99, "grad_norm": 0.4432031214237213, "learning_rate": 0.0004537765862201911, "loss": 1.7554, "step": 29638 }, { "epoch": 0.99, "grad_norm": 0.45127934217453003, "learning_rate": 0.000453767611580863, "loss": 1.7919, "step": 29639 }, { "epoch": 0.99, "grad_norm": 0.4495859146118164, "learning_rate": 0.0004537586367548837, "loss": 1.7947, "step": 29640 }, { "epoch": 0.99, "grad_norm": 0.43251997232437134, "learning_rate": 0.00045374966174226394, "loss": 1.7527, "step": 29641 }, { "epoch": 0.99, "grad_norm": 0.4368961751461029, "learning_rate": 0.0004537406865430149, "loss": 1.7731, "step": 29642 }, { "epoch": 0.99, "grad_norm": 0.4461139440536499, "learning_rate": 0.0004537317111571472, "loss": 1.8069, "step": 29643 }, { "epoch": 0.99, "grad_norm": 0.4577791690826416, "learning_rate": 0.00045372273558467194, "loss": 1.8445, "step": 29644 }, { "epoch": 0.99, "grad_norm": 0.45450347661972046, "learning_rate": 0.00045371375982559997, "loss": 1.8252, "step": 29645 }, { "epoch": 0.99, "grad_norm": 0.46496036648750305, "learning_rate": 0.000453704783879942, "loss": 1.8469, "step": 29646 }, { "epoch": 0.99, "grad_norm": 0.4472672939300537, "learning_rate": 0.00045369580774770916, "loss": 1.8035, "step": 29647 }, { "epoch": 0.99, "grad_norm": 0.45226019620895386, "learning_rate": 0.00045368683142891223, "loss": 1.883, "step": 29648 }, { "epoch": 0.99, "grad_norm": 0.4649673104286194, "learning_rate": 0.00045367785492356216, "loss": 1.7993, "step": 29649 }, { "epoch": 0.99, "grad_norm": 0.46747028827667236, "learning_rate": 0.0004536688782316698, "loss": 1.7796, "step": 29650 }, { "epoch": 0.99, "grad_norm": 0.45267191529273987, "learning_rate": 0.0004536599013532461, "loss": 1.7794, "step": 29651 }, { "epoch": 0.99, "grad_norm": 0.44763436913490295, "learning_rate": 0.00045365092428830176, "loss": 1.8088, "step": 29652 }, { "epoch": 0.99, "grad_norm": 0.4317234754562378, "learning_rate": 0.000453641947036848, "loss": 1.8588, "step": 29653 }, { "epoch": 0.99, "grad_norm": 0.43947649002075195, "learning_rate": 0.0004536329695988956, "loss": 1.7659, "step": 29654 }, { "epoch": 0.99, "grad_norm": 0.4615561068058014, "learning_rate": 0.0004536239919744552, "loss": 1.8071, "step": 29655 }, { "epoch": 0.99, "grad_norm": 0.44216951727867126, "learning_rate": 0.0004536150141635381, "loss": 1.8203, "step": 29656 }, { "epoch": 0.99, "grad_norm": 0.460332989692688, "learning_rate": 0.0004536060361661549, "loss": 1.7453, "step": 29657 }, { "epoch": 0.99, "grad_norm": 0.4343295991420746, "learning_rate": 0.0004535970579823167, "loss": 1.7955, "step": 29658 }, { "epoch": 0.99, "grad_norm": 0.43950363993644714, "learning_rate": 0.00045358807961203424, "loss": 1.7485, "step": 29659 }, { "epoch": 0.99, "grad_norm": 0.4483816921710968, "learning_rate": 0.0004535791010553185, "loss": 1.7808, "step": 29660 }, { "epoch": 0.99, "grad_norm": 0.4638853967189789, "learning_rate": 0.00045357012231218044, "loss": 1.8197, "step": 29661 }, { "epoch": 0.99, "grad_norm": 0.4725438952445984, "learning_rate": 0.0004535611433826307, "loss": 1.8052, "step": 29662 }, { "epoch": 0.99, "grad_norm": 0.4535047709941864, "learning_rate": 0.0004535521642666804, "loss": 1.861, "step": 29663 }, { "epoch": 0.99, "grad_norm": 0.45828187465667725, "learning_rate": 0.0004535431849643405, "loss": 1.8551, "step": 29664 }, { "epoch": 0.99, "grad_norm": 0.46038973331451416, "learning_rate": 0.00045353420547562177, "loss": 1.8256, "step": 29665 }, { "epoch": 0.99, "grad_norm": 0.4673827290534973, "learning_rate": 0.0004535252258005351, "loss": 1.7955, "step": 29666 }, { "epoch": 0.99, "grad_norm": 0.447823166847229, "learning_rate": 0.0004535162459390914, "loss": 1.8275, "step": 29667 }, { "epoch": 0.99, "grad_norm": 0.45070409774780273, "learning_rate": 0.00045350726589130167, "loss": 1.7841, "step": 29668 }, { "epoch": 0.99, "grad_norm": 0.4481474459171295, "learning_rate": 0.00045349828565717667, "loss": 1.8214, "step": 29669 }, { "epoch": 0.99, "grad_norm": 0.4345066249370575, "learning_rate": 0.0004534893052367274, "loss": 1.7688, "step": 29670 }, { "epoch": 0.99, "grad_norm": 0.4683457911014557, "learning_rate": 0.00045348032462996476, "loss": 1.8404, "step": 29671 }, { "epoch": 0.99, "grad_norm": 0.4489046335220337, "learning_rate": 0.00045347134383689954, "loss": 1.8293, "step": 29672 }, { "epoch": 0.99, "grad_norm": 0.44355934858322144, "learning_rate": 0.0004534623628575428, "loss": 1.8083, "step": 29673 }, { "epoch": 0.99, "grad_norm": 0.46529489755630493, "learning_rate": 0.00045345338169190533, "loss": 1.8667, "step": 29674 }, { "epoch": 0.99, "grad_norm": 0.4409695863723755, "learning_rate": 0.0004534444003399981, "loss": 1.7959, "step": 29675 }, { "epoch": 0.99, "grad_norm": 0.4649120271205902, "learning_rate": 0.00045343541880183194, "loss": 1.7914, "step": 29676 }, { "epoch": 0.99, "grad_norm": 0.43773630261421204, "learning_rate": 0.00045342643707741776, "loss": 1.7476, "step": 29677 }, { "epoch": 0.99, "grad_norm": 0.4637868404388428, "learning_rate": 0.0004534174551667665, "loss": 1.8399, "step": 29678 }, { "epoch": 0.99, "grad_norm": 0.4449467658996582, "learning_rate": 0.0004534084730698891, "loss": 1.7966, "step": 29679 }, { "epoch": 0.99, "grad_norm": 0.43908780813217163, "learning_rate": 0.0004533994907867964, "loss": 1.8269, "step": 29680 }, { "epoch": 0.99, "grad_norm": 0.4520626366138458, "learning_rate": 0.00045339050831749933, "loss": 1.7501, "step": 29681 }, { "epoch": 0.99, "grad_norm": 0.4503156542778015, "learning_rate": 0.00045338152566200876, "loss": 1.8303, "step": 29682 }, { "epoch": 0.99, "grad_norm": 0.461151659488678, "learning_rate": 0.0004533725428203357, "loss": 1.7801, "step": 29683 }, { "epoch": 0.99, "grad_norm": 0.442329466342926, "learning_rate": 0.0004533635597924908, "loss": 1.8552, "step": 29684 }, { "epoch": 0.99, "grad_norm": 0.44888582825660706, "learning_rate": 0.0004533545765784853, "loss": 1.8726, "step": 29685 }, { "epoch": 0.99, "grad_norm": 0.43406590819358826, "learning_rate": 0.0004533455931783299, "loss": 1.7559, "step": 29686 }, { "epoch": 0.99, "grad_norm": 0.46274086833000183, "learning_rate": 0.00045333660959203547, "loss": 1.8563, "step": 29687 }, { "epoch": 0.99, "grad_norm": 0.46847325563430786, "learning_rate": 0.0004533276258196131, "loss": 1.7988, "step": 29688 }, { "epoch": 0.99, "grad_norm": 0.45276904106140137, "learning_rate": 0.0004533186418610735, "loss": 1.7972, "step": 29689 }, { "epoch": 0.99, "grad_norm": 0.43048155307769775, "learning_rate": 0.00045330965771642765, "loss": 1.7735, "step": 29690 }, { "epoch": 0.99, "grad_norm": 0.4840872287750244, "learning_rate": 0.00045330067338568647, "loss": 1.7896, "step": 29691 }, { "epoch": 0.99, "grad_norm": 0.4495795667171478, "learning_rate": 0.0004532916888688609, "loss": 1.8491, "step": 29692 }, { "epoch": 0.99, "grad_norm": 0.4468412697315216, "learning_rate": 0.0004532827041659618, "loss": 1.8003, "step": 29693 }, { "epoch": 0.99, "grad_norm": 0.46233487129211426, "learning_rate": 0.00045327371927700007, "loss": 1.8321, "step": 29694 }, { "epoch": 0.99, "grad_norm": 0.45374706387519836, "learning_rate": 0.0004532647342019867, "loss": 1.8364, "step": 29695 }, { "epoch": 0.99, "grad_norm": 0.46196743845939636, "learning_rate": 0.00045325574894093245, "loss": 1.8425, "step": 29696 }, { "epoch": 0.99, "grad_norm": 0.44434869289398193, "learning_rate": 0.00045324676349384825, "loss": 1.8675, "step": 29697 }, { "epoch": 0.99, "grad_norm": 0.4577754735946655, "learning_rate": 0.0004532377778607451, "loss": 1.8155, "step": 29698 }, { "epoch": 0.99, "grad_norm": 0.8900316953659058, "learning_rate": 0.00045322879204163396, "loss": 1.8193, "step": 29699 }, { "epoch": 0.99, "grad_norm": 0.4840048849582672, "learning_rate": 0.00045321980603652554, "loss": 1.8153, "step": 29700 }, { "epoch": 0.99, "grad_norm": 0.45050615072250366, "learning_rate": 0.00045321081984543086, "loss": 1.7887, "step": 29701 }, { "epoch": 0.99, "grad_norm": 0.4478190541267395, "learning_rate": 0.00045320183346836087, "loss": 1.8208, "step": 29702 }, { "epoch": 0.99, "grad_norm": 0.4439690113067627, "learning_rate": 0.00045319284690532635, "loss": 1.8256, "step": 29703 }, { "epoch": 0.99, "grad_norm": 0.4440688490867615, "learning_rate": 0.0004531838601563383, "loss": 1.7716, "step": 29704 }, { "epoch": 0.99, "grad_norm": 0.4438987970352173, "learning_rate": 0.0004531748732214077, "loss": 1.7802, "step": 29705 }, { "epoch": 0.99, "grad_norm": 0.4350576102733612, "learning_rate": 0.00045316588610054537, "loss": 1.7925, "step": 29706 }, { "epoch": 0.99, "grad_norm": 0.4575529396533966, "learning_rate": 0.0004531568987937622, "loss": 1.8018, "step": 29707 }, { "epoch": 0.99, "grad_norm": 0.4370596706867218, "learning_rate": 0.00045314791130106905, "loss": 1.7392, "step": 29708 }, { "epoch": 0.99, "grad_norm": 0.45865651965141296, "learning_rate": 0.0004531389236224769, "loss": 1.8587, "step": 29709 }, { "epoch": 0.99, "grad_norm": 0.4322330951690674, "learning_rate": 0.0004531299357579968, "loss": 1.799, "step": 29710 }, { "epoch": 0.99, "grad_norm": 0.45982837677001953, "learning_rate": 0.00045312094770763947, "loss": 1.8572, "step": 29711 }, { "epoch": 0.99, "grad_norm": 0.4429190456867218, "learning_rate": 0.00045311195947141575, "loss": 1.7868, "step": 29712 }, { "epoch": 0.99, "grad_norm": 0.45056280493736267, "learning_rate": 0.0004531029710493368, "loss": 1.7465, "step": 29713 }, { "epoch": 0.99, "grad_norm": 0.4565482437610626, "learning_rate": 0.0004530939824414133, "loss": 1.8188, "step": 29714 }, { "epoch": 0.99, "grad_norm": 0.46199023723602295, "learning_rate": 0.0004530849936476563, "loss": 1.8066, "step": 29715 }, { "epoch": 0.99, "grad_norm": 0.4548037052154541, "learning_rate": 0.0004530760046680767, "loss": 1.8492, "step": 29716 }, { "epoch": 0.99, "grad_norm": 0.441659539937973, "learning_rate": 0.0004530670155026853, "loss": 1.7844, "step": 29717 }, { "epoch": 0.99, "grad_norm": 0.44366639852523804, "learning_rate": 0.00045305802615149324, "loss": 1.7945, "step": 29718 }, { "epoch": 0.99, "grad_norm": 0.4478062689304352, "learning_rate": 0.0004530490366145112, "loss": 1.8343, "step": 29719 }, { "epoch": 0.99, "grad_norm": 0.45611807703971863, "learning_rate": 0.00045304004689175023, "loss": 1.8665, "step": 29720 }, { "epoch": 0.99, "grad_norm": 0.45369839668273926, "learning_rate": 0.00045303105698322115, "loss": 1.7873, "step": 29721 }, { "epoch": 0.99, "grad_norm": 0.4375801980495453, "learning_rate": 0.0004530220668889349, "loss": 1.8027, "step": 29722 }, { "epoch": 0.99, "grad_norm": 0.4428004026412964, "learning_rate": 0.00045301307660890254, "loss": 1.7837, "step": 29723 }, { "epoch": 0.99, "grad_norm": 0.4436103105545044, "learning_rate": 0.0004530040861431347, "loss": 1.8177, "step": 29724 }, { "epoch": 0.99, "grad_norm": 0.4457666575908661, "learning_rate": 0.00045299509549164246, "loss": 1.8478, "step": 29725 }, { "epoch": 0.99, "grad_norm": 0.45088404417037964, "learning_rate": 0.0004529861046544368, "loss": 1.8894, "step": 29726 }, { "epoch": 0.99, "grad_norm": 0.43880417943000793, "learning_rate": 0.00045297711363152847, "loss": 1.792, "step": 29727 }, { "epoch": 0.99, "grad_norm": 0.43583136796951294, "learning_rate": 0.0004529681224229284, "loss": 1.8417, "step": 29728 }, { "epoch": 0.99, "grad_norm": 0.4460192620754242, "learning_rate": 0.0004529591310286477, "loss": 1.8246, "step": 29729 }, { "epoch": 0.99, "grad_norm": 0.4512922167778015, "learning_rate": 0.00045295013944869714, "loss": 1.869, "step": 29730 }, { "epoch": 0.99, "grad_norm": 0.46062248945236206, "learning_rate": 0.0004529411476830876, "loss": 1.8698, "step": 29731 }, { "epoch": 0.99, "grad_norm": 0.4528956711292267, "learning_rate": 0.00045293215573183007, "loss": 1.7175, "step": 29732 }, { "epoch": 0.99, "grad_norm": 0.46230068802833557, "learning_rate": 0.0004529231635949354, "loss": 1.8093, "step": 29733 }, { "epoch": 0.99, "grad_norm": 0.4416408836841583, "learning_rate": 0.0004529141712724145, "loss": 1.8019, "step": 29734 }, { "epoch": 0.99, "grad_norm": 0.4450050890445709, "learning_rate": 0.0004529051787642785, "loss": 1.789, "step": 29735 }, { "epoch": 0.99, "grad_norm": 0.44661611318588257, "learning_rate": 0.00045289618607053804, "loss": 1.8137, "step": 29736 }, { "epoch": 0.99, "grad_norm": 0.4435730576515198, "learning_rate": 0.0004528871931912041, "loss": 1.8197, "step": 29737 }, { "epoch": 0.99, "grad_norm": 0.4547422528266907, "learning_rate": 0.0004528782001262877, "loss": 1.8437, "step": 29738 }, { "epoch": 0.99, "grad_norm": 0.45010456442832947, "learning_rate": 0.00045286920687579964, "loss": 1.802, "step": 29739 }, { "epoch": 0.99, "grad_norm": 0.4444938004016876, "learning_rate": 0.00045286021343975087, "loss": 1.805, "step": 29740 }, { "epoch": 0.99, "grad_norm": 0.43356868624687195, "learning_rate": 0.0004528512198181524, "loss": 1.7866, "step": 29741 }, { "epoch": 0.99, "grad_norm": 0.4370516538619995, "learning_rate": 0.000452842226011015, "loss": 1.7445, "step": 29742 }, { "epoch": 0.99, "grad_norm": 0.45447516441345215, "learning_rate": 0.0004528332320183497, "loss": 1.8021, "step": 29743 }, { "epoch": 0.99, "grad_norm": 0.4558578133583069, "learning_rate": 0.0004528242378401674, "loss": 1.7286, "step": 29744 }, { "epoch": 0.99, "grad_norm": 0.4584032893180847, "learning_rate": 0.00045281524347647885, "loss": 1.7978, "step": 29745 }, { "epoch": 0.99, "grad_norm": 0.450061172246933, "learning_rate": 0.00045280624892729527, "loss": 1.8461, "step": 29746 }, { "epoch": 0.99, "grad_norm": 0.4594930112361908, "learning_rate": 0.0004527972541926274, "loss": 1.857, "step": 29747 }, { "epoch": 0.99, "grad_norm": 0.4554876387119293, "learning_rate": 0.0004527882592724861, "loss": 1.823, "step": 29748 }, { "epoch": 0.99, "grad_norm": 0.44908544421195984, "learning_rate": 0.00045277926416688243, "loss": 1.8194, "step": 29749 }, { "epoch": 0.99, "grad_norm": 0.4655761420726776, "learning_rate": 0.00045277026887582716, "loss": 1.7425, "step": 29750 }, { "epoch": 0.99, "grad_norm": 0.4522351622581482, "learning_rate": 0.00045276127339933137, "loss": 1.7912, "step": 29751 }, { "epoch": 0.99, "grad_norm": 0.447903037071228, "learning_rate": 0.0004527522777374059, "loss": 1.7613, "step": 29752 }, { "epoch": 0.99, "grad_norm": 0.46480292081832886, "learning_rate": 0.0004527432818900616, "loss": 1.8281, "step": 29753 }, { "epoch": 0.99, "grad_norm": 0.4592609107494354, "learning_rate": 0.00045273428585730955, "loss": 1.8231, "step": 29754 }, { "epoch": 0.99, "grad_norm": 0.43236586451530457, "learning_rate": 0.0004527252896391606, "loss": 1.8508, "step": 29755 }, { "epoch": 0.99, "grad_norm": 0.44907668232917786, "learning_rate": 0.0004527162932356256, "loss": 1.7632, "step": 29756 }, { "epoch": 0.99, "grad_norm": 0.43880611658096313, "learning_rate": 0.00045270729664671555, "loss": 1.8524, "step": 29757 }, { "epoch": 0.99, "grad_norm": 0.4593333601951599, "learning_rate": 0.00045269829987244135, "loss": 1.7638, "step": 29758 }, { "epoch": 0.99, "grad_norm": 0.44045472145080566, "learning_rate": 0.0004526893029128138, "loss": 1.7776, "step": 29759 }, { "epoch": 0.99, "grad_norm": 0.453603595495224, "learning_rate": 0.00045268030576784405, "loss": 1.7688, "step": 29760 }, { "epoch": 0.99, "grad_norm": 0.4494662582874298, "learning_rate": 0.0004526713084375429, "loss": 1.814, "step": 29761 }, { "epoch": 0.99, "grad_norm": 0.45290607213974, "learning_rate": 0.00045266231092192124, "loss": 1.8465, "step": 29762 }, { "epoch": 0.99, "grad_norm": 0.4416837692260742, "learning_rate": 0.00045265331322099, "loss": 1.8083, "step": 29763 }, { "epoch": 0.99, "grad_norm": 0.43872204422950745, "learning_rate": 0.00045264431533476013, "loss": 1.8041, "step": 29764 }, { "epoch": 0.99, "grad_norm": 0.47030913829803467, "learning_rate": 0.00045263531726324265, "loss": 1.7493, "step": 29765 }, { "epoch": 0.99, "grad_norm": 0.4445836842060089, "learning_rate": 0.0004526263190064483, "loss": 1.8321, "step": 29766 }, { "epoch": 0.99, "grad_norm": 0.4595089554786682, "learning_rate": 0.00045261732056438807, "loss": 1.787, "step": 29767 }, { "epoch": 0.99, "grad_norm": 0.46039843559265137, "learning_rate": 0.000452608321937073, "loss": 1.8641, "step": 29768 }, { "epoch": 0.99, "grad_norm": 0.45410796999931335, "learning_rate": 0.0004525993231245139, "loss": 1.766, "step": 29769 }, { "epoch": 0.99, "grad_norm": 0.4610823690891266, "learning_rate": 0.0004525903241267216, "loss": 1.7991, "step": 29770 }, { "epoch": 0.99, "grad_norm": 0.4514979124069214, "learning_rate": 0.0004525813249437072, "loss": 1.7813, "step": 29771 }, { "epoch": 0.99, "grad_norm": 0.4435185492038727, "learning_rate": 0.00045257232557548155, "loss": 1.8059, "step": 29772 }, { "epoch": 0.99, "grad_norm": 0.44664105772972107, "learning_rate": 0.0004525633260220556, "loss": 1.7775, "step": 29773 }, { "epoch": 0.99, "grad_norm": 0.4617365002632141, "learning_rate": 0.0004525543262834402, "loss": 1.6685, "step": 29774 }, { "epoch": 0.99, "grad_norm": 0.4594988226890564, "learning_rate": 0.00045254532635964634, "loss": 1.8568, "step": 29775 }, { "epoch": 0.99, "grad_norm": 0.43582627177238464, "learning_rate": 0.0004525363262506849, "loss": 1.7765, "step": 29776 }, { "epoch": 0.99, "grad_norm": 0.4601321220397949, "learning_rate": 0.00045252732595656685, "loss": 1.7816, "step": 29777 }, { "epoch": 0.99, "grad_norm": 0.434128999710083, "learning_rate": 0.00045251832547730316, "loss": 1.7775, "step": 29778 }, { "epoch": 0.99, "grad_norm": 0.45717954635620117, "learning_rate": 0.0004525093248129046, "loss": 1.8673, "step": 29779 }, { "epoch": 0.99, "grad_norm": 0.45456480979919434, "learning_rate": 0.0004525003239633823, "loss": 1.8395, "step": 29780 }, { "epoch": 0.99, "grad_norm": 0.45125412940979004, "learning_rate": 0.000452491322928747, "loss": 1.8552, "step": 29781 }, { "epoch": 0.99, "grad_norm": 0.4430375397205353, "learning_rate": 0.00045248232170900973, "loss": 1.8624, "step": 29782 }, { "epoch": 0.99, "grad_norm": 0.4713295102119446, "learning_rate": 0.00045247332030418135, "loss": 1.806, "step": 29783 }, { "epoch": 0.99, "grad_norm": 0.48636680841445923, "learning_rate": 0.0004524643187142729, "loss": 1.8229, "step": 29784 }, { "epoch": 0.99, "grad_norm": 0.43826377391815186, "learning_rate": 0.0004524553169392953, "loss": 1.7918, "step": 29785 }, { "epoch": 0.99, "grad_norm": 0.43556490540504456, "learning_rate": 0.0004524463149792592, "loss": 1.7659, "step": 29786 }, { "epoch": 0.99, "grad_norm": 0.4617234766483307, "learning_rate": 0.00045243731283417586, "loss": 1.8568, "step": 29787 }, { "epoch": 0.99, "grad_norm": 0.48935964703559875, "learning_rate": 0.00045242831050405606, "loss": 1.726, "step": 29788 }, { "epoch": 0.99, "grad_norm": 0.44053739309310913, "learning_rate": 0.0004524193079889107, "loss": 1.7806, "step": 29789 }, { "epoch": 0.99, "grad_norm": 0.45971524715423584, "learning_rate": 0.00045241030528875086, "loss": 1.8267, "step": 29790 }, { "epoch": 0.99, "grad_norm": 0.43985715508461, "learning_rate": 0.0004524013024035873, "loss": 1.8557, "step": 29791 }, { "epoch": 0.99, "grad_norm": 0.45187267661094666, "learning_rate": 0.000452392299333431, "loss": 1.8477, "step": 29792 }, { "epoch": 0.99, "grad_norm": 0.465106338262558, "learning_rate": 0.00045238329607829295, "loss": 1.7486, "step": 29793 }, { "epoch": 0.99, "grad_norm": 1.1748666763305664, "learning_rate": 0.000452374292638184, "loss": 1.8429, "step": 29794 }, { "epoch": 0.99, "grad_norm": 0.4559951722621918, "learning_rate": 0.0004523652890131151, "loss": 1.7628, "step": 29795 }, { "epoch": 0.99, "grad_norm": 0.4581992030143738, "learning_rate": 0.0004523562852030972, "loss": 1.7495, "step": 29796 }, { "epoch": 0.99, "grad_norm": 0.5222730040550232, "learning_rate": 0.00045234728120814116, "loss": 1.7767, "step": 29797 }, { "epoch": 0.99, "grad_norm": 0.47488483786582947, "learning_rate": 0.0004523382770282582, "loss": 1.835, "step": 29798 }, { "epoch": 0.99, "grad_norm": 0.4463372528553009, "learning_rate": 0.00045232927266345877, "loss": 1.8173, "step": 29799 }, { "epoch": 0.99, "grad_norm": 0.4430933892726898, "learning_rate": 0.0004523202681137541, "loss": 1.886, "step": 29800 }, { "epoch": 0.99, "grad_norm": 0.49753713607788086, "learning_rate": 0.00045231126337915513, "loss": 1.7735, "step": 29801 }, { "epoch": 0.99, "grad_norm": 0.4933299720287323, "learning_rate": 0.00045230225845967274, "loss": 1.7719, "step": 29802 }, { "epoch": 0.99, "grad_norm": 0.4495234787464142, "learning_rate": 0.00045229325335531783, "loss": 1.8249, "step": 29803 }, { "epoch": 0.99, "grad_norm": 0.45985764265060425, "learning_rate": 0.00045228424806610125, "loss": 1.744, "step": 29804 }, { "epoch": 0.99, "grad_norm": 0.448615163564682, "learning_rate": 0.00045227524259203416, "loss": 1.6788, "step": 29805 }, { "epoch": 0.99, "grad_norm": 0.4653986692428589, "learning_rate": 0.00045226623693312725, "loss": 1.7965, "step": 29806 }, { "epoch": 0.99, "grad_norm": 0.45384180545806885, "learning_rate": 0.00045225723108939167, "loss": 1.8112, "step": 29807 }, { "epoch": 0.99, "grad_norm": 0.4440668523311615, "learning_rate": 0.0004522482250608381, "loss": 1.8162, "step": 29808 }, { "epoch": 0.99, "grad_norm": 0.4699316918849945, "learning_rate": 0.0004522392188474778, "loss": 1.822, "step": 29809 }, { "epoch": 0.99, "grad_norm": 0.4845268428325653, "learning_rate": 0.00045223021244932145, "loss": 1.823, "step": 29810 }, { "epoch": 0.99, "grad_norm": 0.462098091840744, "learning_rate": 0.0004522212058663799, "loss": 1.8613, "step": 29811 }, { "epoch": 0.99, "grad_norm": 0.443561851978302, "learning_rate": 0.00045221219909866454, "loss": 1.796, "step": 29812 }, { "epoch": 0.99, "grad_norm": 0.48000961542129517, "learning_rate": 0.00045220319214618577, "loss": 1.7469, "step": 29813 }, { "epoch": 0.99, "grad_norm": 0.45660072565078735, "learning_rate": 0.00045219418500895477, "loss": 1.8187, "step": 29814 }, { "epoch": 0.99, "grad_norm": 0.4360172748565674, "learning_rate": 0.00045218517768698255, "loss": 1.786, "step": 29815 }, { "epoch": 0.99, "grad_norm": 0.45731574296951294, "learning_rate": 0.00045217617018027985, "loss": 1.7424, "step": 29816 }, { "epoch": 0.99, "grad_norm": 0.4701174199581146, "learning_rate": 0.00045216716248885783, "loss": 1.7877, "step": 29817 }, { "epoch": 0.99, "grad_norm": 0.4518541097640991, "learning_rate": 0.0004521581546127271, "loss": 1.8246, "step": 29818 }, { "epoch": 0.99, "grad_norm": 0.42673876881599426, "learning_rate": 0.0004521491465518989, "loss": 1.8072, "step": 29819 }, { "epoch": 0.99, "grad_norm": 0.4558984935283661, "learning_rate": 0.00045214013830638405, "loss": 1.7671, "step": 29820 }, { "epoch": 0.99, "grad_norm": 0.4421125054359436, "learning_rate": 0.00045213112987619355, "loss": 1.7644, "step": 29821 }, { "epoch": 0.99, "grad_norm": 0.4587043225765228, "learning_rate": 0.00045212212126133816, "loss": 1.8061, "step": 29822 }, { "epoch": 0.99, "grad_norm": 0.4457819163799286, "learning_rate": 0.00045211311246182905, "loss": 1.75, "step": 29823 }, { "epoch": 0.99, "grad_norm": 0.448024719953537, "learning_rate": 0.000452104103477677, "loss": 1.846, "step": 29824 }, { "epoch": 0.99, "grad_norm": 0.4646964371204376, "learning_rate": 0.0004520950943088929, "loss": 1.7897, "step": 29825 }, { "epoch": 0.99, "grad_norm": 0.46351858973503113, "learning_rate": 0.0004520860849554879, "loss": 1.8242, "step": 29826 }, { "epoch": 0.99, "grad_norm": 0.4619501829147339, "learning_rate": 0.00045207707541747267, "loss": 1.811, "step": 29827 }, { "epoch": 0.99, "grad_norm": 0.44586312770843506, "learning_rate": 0.0004520680656948583, "loss": 1.7874, "step": 29828 }, { "epoch": 0.99, "grad_norm": 0.44710949063301086, "learning_rate": 0.0004520590557876557, "loss": 1.846, "step": 29829 }, { "epoch": 0.99, "grad_norm": 0.46619269251823425, "learning_rate": 0.00045205004569587586, "loss": 1.8038, "step": 29830 }, { "epoch": 0.99, "grad_norm": 0.4443572759628296, "learning_rate": 0.0004520410354195297, "loss": 1.7968, "step": 29831 }, { "epoch": 0.99, "grad_norm": 0.44378578662872314, "learning_rate": 0.00045203202495862804, "loss": 1.73, "step": 29832 }, { "epoch": 0.99, "grad_norm": 0.4638858735561371, "learning_rate": 0.000452023014313182, "loss": 1.7745, "step": 29833 }, { "epoch": 0.99, "grad_norm": 0.4417385458946228, "learning_rate": 0.00045201400348320226, "loss": 1.802, "step": 29834 }, { "epoch": 0.99, "grad_norm": 0.488709032535553, "learning_rate": 0.00045200499246870004, "loss": 1.7862, "step": 29835 }, { "epoch": 0.99, "grad_norm": 0.45485883951187134, "learning_rate": 0.0004519959812696861, "loss": 1.7879, "step": 29836 }, { "epoch": 0.99, "grad_norm": 0.4753173589706421, "learning_rate": 0.00045198696988617157, "loss": 1.8199, "step": 29837 }, { "epoch": 0.99, "grad_norm": 0.4638660252094269, "learning_rate": 0.00045197795831816706, "loss": 1.7073, "step": 29838 }, { "epoch": 0.99, "grad_norm": 0.4566975235939026, "learning_rate": 0.00045196894656568377, "loss": 1.8356, "step": 29839 }, { "epoch": 0.99, "grad_norm": 0.4705175459384918, "learning_rate": 0.0004519599346287327, "loss": 1.8703, "step": 29840 }, { "epoch": 0.99, "grad_norm": 0.45244133472442627, "learning_rate": 0.0004519509225073245, "loss": 1.766, "step": 29841 }, { "epoch": 0.99, "grad_norm": 0.4536316692829132, "learning_rate": 0.0004519419102014703, "loss": 1.781, "step": 29842 }, { "epoch": 0.99, "grad_norm": 0.4625411629676819, "learning_rate": 0.0004519328977111811, "loss": 1.8189, "step": 29843 }, { "epoch": 0.99, "grad_norm": 0.4592433571815491, "learning_rate": 0.00045192388503646763, "loss": 1.8406, "step": 29844 }, { "epoch": 0.99, "grad_norm": 0.47266140580177307, "learning_rate": 0.00045191487217734097, "loss": 1.7507, "step": 29845 }, { "epoch": 0.99, "grad_norm": 0.4698992669582367, "learning_rate": 0.000451905859133812, "loss": 1.8314, "step": 29846 }, { "epoch": 0.99, "grad_norm": 0.4585622251033783, "learning_rate": 0.00045189684590589185, "loss": 1.8459, "step": 29847 }, { "epoch": 0.99, "grad_norm": 0.5219648480415344, "learning_rate": 0.0004518878324935912, "loss": 1.882, "step": 29848 }, { "epoch": 0.99, "grad_norm": 0.4662918746471405, "learning_rate": 0.00045187881889692115, "loss": 1.7687, "step": 29849 }, { "epoch": 0.99, "grad_norm": 0.45688188076019287, "learning_rate": 0.00045186980511589255, "loss": 1.8147, "step": 29850 }, { "epoch": 0.99, "grad_norm": 0.45302438735961914, "learning_rate": 0.0004518607911505164, "loss": 1.7736, "step": 29851 }, { "epoch": 0.99, "grad_norm": 0.4425979256629944, "learning_rate": 0.0004518517770008036, "loss": 1.7825, "step": 29852 }, { "epoch": 0.99, "grad_norm": 0.45110684633255005, "learning_rate": 0.0004518427626667652, "loss": 1.7229, "step": 29853 }, { "epoch": 0.99, "grad_norm": 0.4591529667377472, "learning_rate": 0.000451833748148412, "loss": 1.8047, "step": 29854 }, { "epoch": 0.99, "grad_norm": 0.4454211890697479, "learning_rate": 0.000451824733445755, "loss": 1.7663, "step": 29855 }, { "epoch": 0.99, "grad_norm": 0.45177581906318665, "learning_rate": 0.0004518157185588051, "loss": 1.744, "step": 29856 }, { "epoch": 0.99, "grad_norm": 0.4463038444519043, "learning_rate": 0.00045180670348757333, "loss": 1.8011, "step": 29857 }, { "epoch": 0.99, "grad_norm": 0.44262269139289856, "learning_rate": 0.00045179768823207066, "loss": 1.8245, "step": 29858 }, { "epoch": 0.99, "grad_norm": 0.4578552842140198, "learning_rate": 0.0004517886727923078, "loss": 1.8946, "step": 29859 }, { "epoch": 0.99, "grad_norm": 0.46468472480773926, "learning_rate": 0.000451779657168296, "loss": 1.7288, "step": 29860 }, { "epoch": 0.99, "grad_norm": 0.4508829116821289, "learning_rate": 0.00045177064136004596, "loss": 1.7854, "step": 29861 }, { "epoch": 0.99, "grad_norm": 0.43883174657821655, "learning_rate": 0.00045176162536756883, "loss": 1.793, "step": 29862 }, { "epoch": 0.99, "grad_norm": 0.4615434408187866, "learning_rate": 0.00045175260919087535, "loss": 1.7477, "step": 29863 }, { "epoch": 0.99, "grad_norm": 0.45549681782722473, "learning_rate": 0.0004517435928299766, "loss": 1.7697, "step": 29864 }, { "epoch": 0.99, "grad_norm": 0.4692070186138153, "learning_rate": 0.00045173457628488355, "loss": 1.8368, "step": 29865 }, { "epoch": 0.99, "grad_norm": 0.4637458920478821, "learning_rate": 0.00045172555955560697, "loss": 1.8321, "step": 29866 }, { "epoch": 0.99, "grad_norm": 0.48327845335006714, "learning_rate": 0.000451716542642158, "loss": 1.8217, "step": 29867 }, { "epoch": 0.99, "grad_norm": 0.4605095386505127, "learning_rate": 0.0004517075255445474, "loss": 1.7824, "step": 29868 }, { "epoch": 0.99, "grad_norm": 0.46096566319465637, "learning_rate": 0.0004516985082627863, "loss": 1.8016, "step": 29869 }, { "epoch": 0.99, "grad_norm": 0.43635207414627075, "learning_rate": 0.0004516894907968856, "loss": 1.7601, "step": 29870 }, { "epoch": 0.99, "grad_norm": 0.46796470880508423, "learning_rate": 0.00045168047314685615, "loss": 1.7246, "step": 29871 }, { "epoch": 0.99, "grad_norm": 0.4483172595500946, "learning_rate": 0.000451671455312709, "loss": 1.8557, "step": 29872 }, { "epoch": 0.99, "grad_norm": 0.453064888715744, "learning_rate": 0.00045166243729445496, "loss": 1.7681, "step": 29873 }, { "epoch": 0.99, "grad_norm": 0.4695983827114105, "learning_rate": 0.00045165341909210514, "loss": 1.7866, "step": 29874 }, { "epoch": 0.99, "grad_norm": 0.4606401324272156, "learning_rate": 0.0004516444007056704, "loss": 1.7448, "step": 29875 }, { "epoch": 0.99, "grad_norm": 0.45125171542167664, "learning_rate": 0.00045163538213516166, "loss": 1.8189, "step": 29876 }, { "epoch": 0.99, "grad_norm": 0.443946897983551, "learning_rate": 0.00045162636338059, "loss": 1.7784, "step": 29877 }, { "epoch": 0.99, "grad_norm": 0.4480014145374298, "learning_rate": 0.0004516173444419662, "loss": 1.8233, "step": 29878 }, { "epoch": 0.99, "grad_norm": 0.44878247380256653, "learning_rate": 0.0004516083253193013, "loss": 1.7219, "step": 29879 }, { "epoch": 0.99, "grad_norm": 0.4408862292766571, "learning_rate": 0.0004515993060126062, "loss": 1.7883, "step": 29880 }, { "epoch": 0.99, "grad_norm": 0.4539048373699188, "learning_rate": 0.0004515902865218919, "loss": 1.7602, "step": 29881 }, { "epoch": 0.99, "grad_norm": 0.44696974754333496, "learning_rate": 0.00045158126684716936, "loss": 1.7605, "step": 29882 }, { "epoch": 0.99, "grad_norm": 0.4687346816062927, "learning_rate": 0.00045157224698844957, "loss": 1.7533, "step": 29883 }, { "epoch": 0.99, "grad_norm": 0.4449346363544464, "learning_rate": 0.0004515632269457432, "loss": 1.7654, "step": 29884 }, { "epoch": 0.99, "grad_norm": 0.47662484645843506, "learning_rate": 0.00045155420671906163, "loss": 1.8105, "step": 29885 }, { "epoch": 0.99, "grad_norm": 0.4432274401187897, "learning_rate": 0.00045154518630841545, "loss": 1.7179, "step": 29886 }, { "epoch": 0.99, "grad_norm": 0.45920810103416443, "learning_rate": 0.00045153616571381577, "loss": 1.773, "step": 29887 }, { "epoch": 0.99, "grad_norm": 0.4560462534427643, "learning_rate": 0.00045152714493527354, "loss": 1.7601, "step": 29888 }, { "epoch": 0.99, "grad_norm": 0.44828832149505615, "learning_rate": 0.00045151812397279956, "loss": 1.8415, "step": 29889 }, { "epoch": 0.99, "grad_norm": 0.44443580508232117, "learning_rate": 0.0004515091028264051, "loss": 1.8286, "step": 29890 }, { "epoch": 0.99, "grad_norm": 0.43986034393310547, "learning_rate": 0.00045150008149610074, "loss": 1.7506, "step": 29891 }, { "epoch": 0.99, "grad_norm": 0.44598835706710815, "learning_rate": 0.0004514910599818977, "loss": 1.8014, "step": 29892 }, { "epoch": 0.99, "grad_norm": 0.4643569588661194, "learning_rate": 0.00045148203828380675, "loss": 1.7802, "step": 29893 }, { "epoch": 0.99, "grad_norm": 0.43752360343933105, "learning_rate": 0.00045147301640183896, "loss": 1.8165, "step": 29894 }, { "epoch": 0.99, "grad_norm": 0.4428183436393738, "learning_rate": 0.00045146399433600534, "loss": 1.7742, "step": 29895 }, { "epoch": 0.99, "grad_norm": 0.4503491222858429, "learning_rate": 0.00045145497208631667, "loss": 1.8132, "step": 29896 }, { "epoch": 0.99, "grad_norm": 0.4441768527030945, "learning_rate": 0.00045144594965278395, "loss": 1.8431, "step": 29897 }, { "epoch": 0.99, "grad_norm": 0.4496068060398102, "learning_rate": 0.0004514369270354182, "loss": 1.8461, "step": 29898 }, { "epoch": 0.99, "grad_norm": 0.4462291896343231, "learning_rate": 0.00045142790423423037, "loss": 1.7423, "step": 29899 }, { "epoch": 0.99, "grad_norm": 0.45066213607788086, "learning_rate": 0.00045141888124923134, "loss": 1.7964, "step": 29900 }, { "epoch": 0.99, "grad_norm": 0.45447981357574463, "learning_rate": 0.00045140985808043207, "loss": 1.8234, "step": 29901 }, { "epoch": 0.99, "grad_norm": 0.45648428797721863, "learning_rate": 0.00045140083472784356, "loss": 1.7614, "step": 29902 }, { "epoch": 0.99, "grad_norm": 0.44113051891326904, "learning_rate": 0.0004513918111914768, "loss": 1.7434, "step": 29903 }, { "epoch": 0.99, "grad_norm": 0.4961722195148468, "learning_rate": 0.00045138278747134265, "loss": 1.8666, "step": 29904 }, { "epoch": 0.99, "grad_norm": 0.4568222761154175, "learning_rate": 0.0004513737635674521, "loss": 1.8888, "step": 29905 }, { "epoch": 0.99, "grad_norm": 0.45409470796585083, "learning_rate": 0.00045136473947981603, "loss": 1.8287, "step": 29906 }, { "epoch": 1.0, "grad_norm": 0.43856728076934814, "learning_rate": 0.0004513557152084456, "loss": 1.7818, "step": 29907 }, { "epoch": 1.0, "grad_norm": 0.45422738790512085, "learning_rate": 0.0004513466907533516, "loss": 1.7313, "step": 29908 }, { "epoch": 1.0, "grad_norm": 0.46207356452941895, "learning_rate": 0.00045133766611454493, "loss": 1.7861, "step": 29909 }, { "epoch": 1.0, "grad_norm": 0.43828362226486206, "learning_rate": 0.0004513286412920367, "loss": 1.8202, "step": 29910 }, { "epoch": 1.0, "grad_norm": 0.4325147867202759, "learning_rate": 0.0004513196162858378, "loss": 1.8008, "step": 29911 }, { "epoch": 1.0, "grad_norm": 0.45007240772247314, "learning_rate": 0.00045131059109595914, "loss": 1.8755, "step": 29912 }, { "epoch": 1.0, "grad_norm": 0.44920575618743896, "learning_rate": 0.0004513015657224117, "loss": 1.7646, "step": 29913 }, { "epoch": 1.0, "grad_norm": 0.4377771317958832, "learning_rate": 0.00045129254016520646, "loss": 1.8026, "step": 29914 }, { "epoch": 1.0, "grad_norm": 0.44138848781585693, "learning_rate": 0.00045128351442435443, "loss": 1.736, "step": 29915 }, { "epoch": 1.0, "grad_norm": 0.45479968190193176, "learning_rate": 0.00045127448849986647, "loss": 1.8211, "step": 29916 }, { "epoch": 1.0, "grad_norm": 0.4513203501701355, "learning_rate": 0.0004512654623917536, "loss": 1.8378, "step": 29917 }, { "epoch": 1.0, "grad_norm": 0.4458170235157013, "learning_rate": 0.0004512564361000267, "loss": 1.8182, "step": 29918 }, { "epoch": 1.0, "grad_norm": 0.44597819447517395, "learning_rate": 0.00045124740962469684, "loss": 1.9258, "step": 29919 }, { "epoch": 1.0, "grad_norm": 0.44315600395202637, "learning_rate": 0.0004512383829657749, "loss": 1.7562, "step": 29920 }, { "epoch": 1.0, "grad_norm": 0.4540952146053314, "learning_rate": 0.0004512293561232717, "loss": 1.8006, "step": 29921 }, { "epoch": 1.0, "grad_norm": 0.4436182379722595, "learning_rate": 0.0004512203290971985, "loss": 1.7911, "step": 29922 }, { "epoch": 1.0, "grad_norm": 0.4412074685096741, "learning_rate": 0.000451211301887566, "loss": 1.81, "step": 29923 }, { "epoch": 1.0, "grad_norm": 0.4645288586616516, "learning_rate": 0.0004512022744943854, "loss": 1.7568, "step": 29924 }, { "epoch": 1.0, "grad_norm": 0.4433791935443878, "learning_rate": 0.0004511932469176674, "loss": 1.8134, "step": 29925 }, { "epoch": 1.0, "grad_norm": 0.4501058757305145, "learning_rate": 0.00045118421915742315, "loss": 1.8247, "step": 29926 }, { "epoch": 1.0, "grad_norm": 0.4791383445262909, "learning_rate": 0.0004511751912136635, "loss": 1.853, "step": 29927 }, { "epoch": 1.0, "grad_norm": 0.44124773144721985, "learning_rate": 0.0004511661630863994, "loss": 1.8097, "step": 29928 }, { "epoch": 1.0, "grad_norm": 0.449995756149292, "learning_rate": 0.0004511571347756419, "loss": 1.8148, "step": 29929 }, { "epoch": 1.0, "grad_norm": 0.4819650650024414, "learning_rate": 0.0004511481062814019, "loss": 1.8099, "step": 29930 }, { "epoch": 1.0, "grad_norm": 0.47229260206222534, "learning_rate": 0.0004511390776036904, "loss": 1.766, "step": 29931 }, { "epoch": 1.0, "grad_norm": 0.4490996301174164, "learning_rate": 0.00045113004874251827, "loss": 1.7879, "step": 29932 }, { "epoch": 1.0, "grad_norm": 0.44860777258872986, "learning_rate": 0.0004511210196978965, "loss": 1.7829, "step": 29933 }, { "epoch": 1.0, "grad_norm": 0.4593188762664795, "learning_rate": 0.0004511119904698362, "loss": 1.8878, "step": 29934 }, { "epoch": 1.0, "grad_norm": 0.44395703077316284, "learning_rate": 0.00045110296105834813, "loss": 1.772, "step": 29935 }, { "epoch": 1.0, "grad_norm": 0.45495882630348206, "learning_rate": 0.0004510939314634434, "loss": 1.8115, "step": 29936 }, { "epoch": 1.0, "grad_norm": 0.44244542717933655, "learning_rate": 0.00045108490168513284, "loss": 1.7812, "step": 29937 }, { "epoch": 1.0, "grad_norm": 0.4469728469848633, "learning_rate": 0.0004510758717234274, "loss": 1.7427, "step": 29938 }, { "epoch": 1.0, "grad_norm": 0.4610331356525421, "learning_rate": 0.00045106684157833823, "loss": 1.816, "step": 29939 }, { "epoch": 1.0, "grad_norm": 0.4388534724712372, "learning_rate": 0.0004510578112498762, "loss": 1.8405, "step": 29940 }, { "epoch": 1.0, "grad_norm": 0.438732385635376, "learning_rate": 0.0004510487807380521, "loss": 1.8286, "step": 29941 }, { "epoch": 1.0, "grad_norm": 0.45053544640541077, "learning_rate": 0.0004510397500428771, "loss": 1.7917, "step": 29942 }, { "epoch": 1.0, "grad_norm": 0.454255610704422, "learning_rate": 0.0004510307191643621, "loss": 1.8556, "step": 29943 }, { "epoch": 1.0, "grad_norm": 0.4283366799354553, "learning_rate": 0.00045102168810251807, "loss": 1.7452, "step": 29944 }, { "epoch": 1.0, "grad_norm": 0.44321075081825256, "learning_rate": 0.00045101265685735603, "loss": 1.8345, "step": 29945 }, { "epoch": 1.0, "grad_norm": 0.4408641457557678, "learning_rate": 0.0004510036254288867, "loss": 1.7687, "step": 29946 }, { "epoch": 1.0, "grad_norm": 0.4384567439556122, "learning_rate": 0.0004509945938171214, "loss": 1.8204, "step": 29947 }, { "epoch": 1.0, "grad_norm": 0.44330161809921265, "learning_rate": 0.00045098556202207086, "loss": 1.8589, "step": 29948 }, { "epoch": 1.0, "grad_norm": 0.4532533884048462, "learning_rate": 0.00045097653004374605, "loss": 1.7834, "step": 29949 }, { "epoch": 1.0, "grad_norm": 0.4403708875179291, "learning_rate": 0.000450967497882158, "loss": 1.7558, "step": 29950 }, { "epoch": 1.0, "grad_norm": 0.4326280951499939, "learning_rate": 0.0004509584655373176, "loss": 1.7522, "step": 29951 }, { "epoch": 1.0, "grad_norm": 0.4319304823875427, "learning_rate": 0.00045094943300923597, "loss": 1.7504, "step": 29952 }, { "epoch": 1.0, "grad_norm": 0.4640534818172455, "learning_rate": 0.00045094040029792387, "loss": 1.7787, "step": 29953 }, { "epoch": 1.0, "grad_norm": 0.45798763632774353, "learning_rate": 0.00045093136740339244, "loss": 1.8676, "step": 29954 }, { "epoch": 1.0, "grad_norm": 0.45332515239715576, "learning_rate": 0.0004509223343256525, "loss": 1.7931, "step": 29955 }, { "epoch": 1.0, "grad_norm": 0.44555357098579407, "learning_rate": 0.0004509133010647151, "loss": 1.8449, "step": 29956 }, { "epoch": 1.0, "grad_norm": 0.4415055513381958, "learning_rate": 0.0004509042676205913, "loss": 1.8399, "step": 29957 }, { "epoch": 1.0, "grad_norm": 0.448808878660202, "learning_rate": 0.0004508952339932918, "loss": 1.7861, "step": 29958 }, { "epoch": 1.0, "grad_norm": 0.4458048939704895, "learning_rate": 0.00045088620018282784, "loss": 1.7409, "step": 29959 }, { "epoch": 1.0, "grad_norm": 0.5832394361495972, "learning_rate": 0.00045087716618921015, "loss": 1.8236, "step": 29960 }, { "epoch": 1.0, "grad_norm": 0.4529125988483429, "learning_rate": 0.0004508681320124499, "loss": 1.7465, "step": 29961 }, { "epoch": 1.0, "grad_norm": 0.44223272800445557, "learning_rate": 0.0004508590976525579, "loss": 1.7922, "step": 29962 }, { "epoch": 1.0, "grad_norm": 0.6943223476409912, "learning_rate": 0.00045085006310954524, "loss": 1.8411, "step": 29963 }, { "epoch": 1.0, "grad_norm": 0.4537705183029175, "learning_rate": 0.00045084102838342284, "loss": 1.7997, "step": 29964 }, { "epoch": 1.0, "grad_norm": 0.4471350312232971, "learning_rate": 0.0004508319934742016, "loss": 1.7763, "step": 29965 }, { "epoch": 1.0, "grad_norm": 0.4471602141857147, "learning_rate": 0.00045082295838189254, "loss": 1.7201, "step": 29966 }, { "epoch": 1.0, "grad_norm": 0.4773421287536621, "learning_rate": 0.00045081392310650665, "loss": 1.7845, "step": 29967 }, { "epoch": 1.0, "grad_norm": 0.4920378625392914, "learning_rate": 0.00045080488764805496, "loss": 1.7975, "step": 29968 }, { "epoch": 1.0, "grad_norm": 0.46266818046569824, "learning_rate": 0.0004507958520065482, "loss": 1.7897, "step": 29969 }, { "epoch": 1.0, "grad_norm": 0.4411969482898712, "learning_rate": 0.00045078681618199764, "loss": 1.7247, "step": 29970 }, { "epoch": 1.0, "grad_norm": 0.4464759826660156, "learning_rate": 0.0004507777801744141, "loss": 1.7952, "step": 29971 }, { "epoch": 1.0, "grad_norm": 0.47237977385520935, "learning_rate": 0.0004507687439838084, "loss": 1.7868, "step": 29972 }, { "epoch": 1.0, "grad_norm": 0.46406328678131104, "learning_rate": 0.00045075970761019175, "loss": 1.8356, "step": 29973 }, { "epoch": 1.0, "grad_norm": 0.45010384917259216, "learning_rate": 0.00045075067105357505, "loss": 1.7734, "step": 29974 }, { "epoch": 1.0, "grad_norm": 0.44283154606819153, "learning_rate": 0.00045074163431396924, "loss": 1.8002, "step": 29975 }, { "epoch": 1.0, "grad_norm": 0.4569568932056427, "learning_rate": 0.00045073259739138524, "loss": 1.8462, "step": 29976 }, { "epoch": 1.0, "grad_norm": 0.4455631375312805, "learning_rate": 0.00045072356028583415, "loss": 1.7589, "step": 29977 }, { "epoch": 1.0, "grad_norm": 0.44889283180236816, "learning_rate": 0.0004507145229973269, "loss": 1.8793, "step": 29978 }, { "epoch": 1.0, "grad_norm": 0.42832785844802856, "learning_rate": 0.00045070548552587436, "loss": 1.8085, "step": 29979 }, { "epoch": 1.0, "grad_norm": 0.45286503434181213, "learning_rate": 0.0004506964478714875, "loss": 1.7991, "step": 29980 }, { "epoch": 1.0, "grad_norm": 0.4712728261947632, "learning_rate": 0.00045068741003417747, "loss": 1.8629, "step": 29981 }, { "epoch": 1.0, "grad_norm": 0.49335870146751404, "learning_rate": 0.0004506783720139551, "loss": 1.8319, "step": 29982 }, { "epoch": 1.0, "grad_norm": 0.43179360032081604, "learning_rate": 0.0004506693338108313, "loss": 1.8577, "step": 29983 }, { "epoch": 1.0, "grad_norm": 0.44812944531440735, "learning_rate": 0.0004506602954248173, "loss": 1.8432, "step": 29984 }, { "epoch": 1.0, "grad_norm": 0.4469533860683441, "learning_rate": 0.0004506512568559238, "loss": 1.8116, "step": 29985 }, { "epoch": 1.0, "grad_norm": 0.48298317193984985, "learning_rate": 0.00045064221810416186, "loss": 1.805, "step": 29986 }, { "epoch": 1.0, "grad_norm": 0.4786292016506195, "learning_rate": 0.00045063317916954236, "loss": 1.8916, "step": 29987 }, { "epoch": 1.0, "grad_norm": 0.4487088620662689, "learning_rate": 0.00045062414005207654, "loss": 1.8644, "step": 29988 }, { "epoch": 1.0, "grad_norm": 0.4665190875530243, "learning_rate": 0.00045061510075177516, "loss": 1.7657, "step": 29989 }, { "epoch": 1.0, "grad_norm": 0.44926926493644714, "learning_rate": 0.00045060606126864927, "loss": 1.8449, "step": 29990 }, { "epoch": 1.0, "grad_norm": 0.4892897307872772, "learning_rate": 0.00045059702160270976, "loss": 1.9265, "step": 29991 }, { "epoch": 1.0, "grad_norm": 0.4431319534778595, "learning_rate": 0.0004505879817539677, "loss": 1.8304, "step": 29992 }, { "epoch": 1.0, "grad_norm": 0.44461408257484436, "learning_rate": 0.0004505789417224339, "loss": 1.7837, "step": 29993 }, { "epoch": 1.0, "grad_norm": 0.4758574664592743, "learning_rate": 0.00045056990150811957, "loss": 1.8205, "step": 29994 }, { "epoch": 1.0, "grad_norm": 0.4796036183834076, "learning_rate": 0.0004505608611110356, "loss": 1.8003, "step": 29995 }, { "epoch": 1.0, "grad_norm": 0.4517258107662201, "learning_rate": 0.00045055182053119284, "loss": 1.8093, "step": 29996 }, { "epoch": 1.0, "grad_norm": 0.4648866355419159, "learning_rate": 0.0004505427797686024, "loss": 1.858, "step": 29997 }, { "epoch": 1.0, "grad_norm": 0.4653850495815277, "learning_rate": 0.0004505337388232752, "loss": 1.78, "step": 29998 }, { "epoch": 1.0, "grad_norm": 0.4457913339138031, "learning_rate": 0.0004505246976952222, "loss": 1.7714, "step": 29999 }, { "epoch": 1.0, "grad_norm": 0.4450417160987854, "learning_rate": 0.00045051565638445446, "loss": 1.844, "step": 30000 }, { "epoch": 1.0, "grad_norm": 0.4355718791484833, "learning_rate": 0.00045050661489098273, "loss": 1.7638, "step": 30001 }, { "epoch": 1.0, "grad_norm": 0.4702327251434326, "learning_rate": 0.0004504975732148183, "loss": 1.806, "step": 30002 }, { "epoch": 1.0, "grad_norm": 0.6781954169273376, "learning_rate": 0.000450488531355972, "loss": 1.8934, "step": 30003 }, { "epoch": 1.0, "grad_norm": 0.4448036551475525, "learning_rate": 0.0004504794893144547, "loss": 1.8193, "step": 30004 }, { "epoch": 1.0, "grad_norm": 0.45006412267684937, "learning_rate": 0.0004504704470902776, "loss": 1.8102, "step": 30005 }, { "epoch": 1.0, "grad_norm": 0.4751993715763092, "learning_rate": 0.00045046140468345134, "loss": 1.7906, "step": 30006 }, { "epoch": 1.0, "grad_norm": 0.455409973859787, "learning_rate": 0.00045045236209398723, "loss": 1.8451, "step": 30007 }, { "epoch": 1.0, "grad_norm": 0.45836660265922546, "learning_rate": 0.00045044331932189604, "loss": 1.8179, "step": 30008 }, { "epoch": 1.0, "grad_norm": 0.46001026034355164, "learning_rate": 0.000450434276367189, "loss": 1.846, "step": 30009 }, { "epoch": 1.0, "grad_norm": 0.4765583872795105, "learning_rate": 0.00045042523322987683, "loss": 1.7922, "step": 30010 }, { "epoch": 1.0, "grad_norm": 1.1119316816329956, "learning_rate": 0.0004504161899099706, "loss": 1.7881, "step": 30011 }, { "epoch": 1.0, "grad_norm": 0.8139200806617737, "learning_rate": 0.00045040714640748124, "loss": 1.7622, "step": 30012 }, { "epoch": 1.0, "grad_norm": 0.48935946822166443, "learning_rate": 0.0004503981027224197, "loss": 1.8163, "step": 30013 }, { "epoch": 1.0, "grad_norm": 0.45989489555358887, "learning_rate": 0.00045038905885479716, "loss": 1.851, "step": 30014 }, { "epoch": 1.0, "grad_norm": 0.44691336154937744, "learning_rate": 0.0004503800148046244, "loss": 1.8268, "step": 30015 }, { "epoch": 1.0, "grad_norm": 0.4479992091655731, "learning_rate": 0.0004503709705719124, "loss": 1.7861, "step": 30016 }, { "epoch": 1.0, "grad_norm": 0.47481244802474976, "learning_rate": 0.00045036192615667233, "loss": 1.7451, "step": 30017 }, { "epoch": 1.0, "grad_norm": 0.4504100978374481, "learning_rate": 0.00045035288155891495, "loss": 1.7895, "step": 30018 }, { "epoch": 1.0, "grad_norm": 0.4454241394996643, "learning_rate": 0.0004503438367786514, "loss": 1.8012, "step": 30019 }, { "epoch": 1.0, "grad_norm": 0.45069101452827454, "learning_rate": 0.00045033479181589246, "loss": 1.8194, "step": 30020 }, { "epoch": 1.0, "grad_norm": 0.46604347229003906, "learning_rate": 0.0004503257466706493, "loss": 1.8248, "step": 30021 }, { "epoch": 1.0, "grad_norm": 0.44740724563598633, "learning_rate": 0.0004503167013429328, "loss": 1.8286, "step": 30022 }, { "epoch": 1.0, "grad_norm": 0.4684000015258789, "learning_rate": 0.0004503076558327541, "loss": 1.7829, "step": 30023 }, { "epoch": 1.0, "grad_norm": 0.4537754952907562, "learning_rate": 0.00045029861014012393, "loss": 1.7976, "step": 30024 }, { "epoch": 1.0, "grad_norm": 0.4505603611469269, "learning_rate": 0.00045028956426505345, "loss": 1.7919, "step": 30025 }, { "epoch": 1.0, "grad_norm": 0.445205420255661, "learning_rate": 0.00045028051820755353, "loss": 1.7711, "step": 30026 }, { "epoch": 1.0, "grad_norm": 0.4598079323768616, "learning_rate": 0.00045027147196763525, "loss": 1.684, "step": 30027 }, { "epoch": 1.0, "grad_norm": 0.4425392150878906, "learning_rate": 0.0004502624255453095, "loss": 1.7184, "step": 30028 }, { "epoch": 1.0, "grad_norm": 0.45474332571029663, "learning_rate": 0.0004502533789405873, "loss": 1.838, "step": 30029 }, { "epoch": 1.0, "grad_norm": 0.44693639874458313, "learning_rate": 0.0004502443321534797, "loss": 1.7157, "step": 30030 }, { "epoch": 1.0, "grad_norm": 0.43725842237472534, "learning_rate": 0.0004502352851839975, "loss": 1.8108, "step": 30031 }, { "epoch": 1.0, "grad_norm": 0.4475249946117401, "learning_rate": 0.00045022623803215195, "loss": 1.7513, "step": 30032 }, { "epoch": 1.0, "grad_norm": 0.4556970000267029, "learning_rate": 0.00045021719069795384, "loss": 1.7408, "step": 30033 }, { "epoch": 1.0, "grad_norm": 0.4429209530353546, "learning_rate": 0.00045020814318141416, "loss": 1.7242, "step": 30034 }, { "epoch": 1.0, "grad_norm": 0.4362216293811798, "learning_rate": 0.00045019909548254394, "loss": 1.8737, "step": 30035 }, { "epoch": 1.0, "grad_norm": 0.45713406801223755, "learning_rate": 0.00045019004760135406, "loss": 1.8521, "step": 30036 }, { "epoch": 1.0, "grad_norm": 0.46580809354782104, "learning_rate": 0.00045018099953785575, "loss": 1.8104, "step": 30037 }, { "epoch": 1.0, "grad_norm": 0.45606306195259094, "learning_rate": 0.0004501719512920597, "loss": 1.829, "step": 30038 }, { "epoch": 1.0, "grad_norm": 0.43756672739982605, "learning_rate": 0.00045016290286397715, "loss": 1.8584, "step": 30039 }, { "epoch": 1.0, "grad_norm": 0.4443945288658142, "learning_rate": 0.00045015385425361886, "loss": 1.8179, "step": 30040 }, { "epoch": 1.0, "grad_norm": 0.4569583535194397, "learning_rate": 0.000450144805460996, "loss": 1.7205, "step": 30041 }, { "epoch": 1.0, "grad_norm": 0.4492349326610565, "learning_rate": 0.0004501357564861194, "loss": 1.8486, "step": 30042 }, { "epoch": 1.0, "grad_norm": 0.46618524193763733, "learning_rate": 0.00045012670732900013, "loss": 1.8121, "step": 30043 }, { "epoch": 1.0, "grad_norm": 0.4530992805957794, "learning_rate": 0.00045011765798964924, "loss": 1.7969, "step": 30044 }, { "epoch": 1.0, "grad_norm": 0.44523441791534424, "learning_rate": 0.00045010860846807747, "loss": 1.7953, "step": 30045 }, { "epoch": 1.0, "grad_norm": 0.45593011379241943, "learning_rate": 0.0004500995587642961, "loss": 1.8177, "step": 30046 }, { "epoch": 1.0, "grad_norm": 0.4545198678970337, "learning_rate": 0.0004500905088783159, "loss": 1.8406, "step": 30047 }, { "epoch": 1.0, "grad_norm": 0.5361781716346741, "learning_rate": 0.000450081458810148, "loss": 1.8396, "step": 30048 }, { "epoch": 1.0, "grad_norm": 0.4506010413169861, "learning_rate": 0.0004500724085598032, "loss": 1.7665, "step": 30049 }, { "epoch": 1.0, "grad_norm": 0.5030941963195801, "learning_rate": 0.00045006335812729274, "loss": 1.8043, "step": 30050 }, { "epoch": 1.0, "grad_norm": 0.4529331922531128, "learning_rate": 0.00045005430751262753, "loss": 1.8576, "step": 30051 }, { "epoch": 1.0, "grad_norm": 0.44358834624290466, "learning_rate": 0.0004500452567158183, "loss": 1.8689, "step": 30052 }, { "epoch": 1.0, "grad_norm": 0.43755418062210083, "learning_rate": 0.0004500362057368764, "loss": 1.8907, "step": 30053 }, { "epoch": 1.0, "grad_norm": 0.44089341163635254, "learning_rate": 0.0004500271545758126, "loss": 1.7614, "step": 30054 }, { "epoch": 1.0, "grad_norm": 0.4633164703845978, "learning_rate": 0.00045001810323263793, "loss": 1.8178, "step": 30055 }, { "epoch": 1.0, "grad_norm": 0.44740405678749084, "learning_rate": 0.00045000905170736335, "loss": 1.7668, "step": 30056 }, { "epoch": 1.0, "grad_norm": 0.9024176597595215, "learning_rate": 0.00045, "loss": 1.7662, "step": 30057 }, { "epoch": 1.0, "grad_norm": 0.4525233805179596, "learning_rate": 0.00044999094811055864, "loss": 1.8019, "step": 30058 }, { "epoch": 1.0, "grad_norm": 0.43851807713508606, "learning_rate": 0.00044998189603905037, "loss": 1.7624, "step": 30059 }, { "epoch": 1.0, "grad_norm": 0.4622822403907776, "learning_rate": 0.00044997284378548623, "loss": 1.7678, "step": 30060 }, { "epoch": 1.0, "grad_norm": 0.47650042176246643, "learning_rate": 0.0004499637913498771, "loss": 1.7571, "step": 30061 }, { "epoch": 1.0, "grad_norm": 0.47549259662628174, "learning_rate": 0.00044995473873223415, "loss": 1.7118, "step": 30062 }, { "epoch": 1.0, "grad_norm": 0.4576050639152527, "learning_rate": 0.00044994568593256806, "loss": 1.8521, "step": 30063 }, { "epoch": 1.0, "grad_norm": 0.4408736228942871, "learning_rate": 0.00044993663295089017, "loss": 1.7617, "step": 30064 }, { "epoch": 1.0, "grad_norm": 0.4564814865589142, "learning_rate": 0.0004499275797872113, "loss": 1.7473, "step": 30065 }, { "epoch": 1.0, "grad_norm": 0.4509340822696686, "learning_rate": 0.0004499185264415423, "loss": 1.7622, "step": 30066 }, { "epoch": 1.0, "grad_norm": 0.4621680974960327, "learning_rate": 0.0004499094729138944, "loss": 1.8314, "step": 30067 }, { "epoch": 1.0, "grad_norm": 0.4463602304458618, "learning_rate": 0.00044990041920427837, "loss": 1.7501, "step": 30068 }, { "epoch": 1.0, "grad_norm": 0.4463523030281067, "learning_rate": 0.00044989136531270544, "loss": 1.8006, "step": 30069 }, { "epoch": 1.0, "grad_norm": 0.45936334133148193, "learning_rate": 0.0004498823112391864, "loss": 1.8013, "step": 30070 }, { "epoch": 1.0, "grad_norm": 0.4481481909751892, "learning_rate": 0.0004498732569837324, "loss": 1.8552, "step": 30071 }, { "epoch": 1.0, "grad_norm": 0.4671659469604492, "learning_rate": 0.00044986420254635427, "loss": 1.7528, "step": 30072 }, { "epoch": 1.0, "grad_norm": 0.4605269134044647, "learning_rate": 0.00044985514792706316, "loss": 1.8562, "step": 30073 }, { "epoch": 1.0, "grad_norm": 0.4558902382850647, "learning_rate": 0.00044984609312587, "loss": 1.7438, "step": 30074 }, { "epoch": 1.0, "grad_norm": 0.44286295771598816, "learning_rate": 0.00044983703814278556, "loss": 1.8208, "step": 30075 }, { "epoch": 1.0, "grad_norm": 0.4574962556362152, "learning_rate": 0.00044982798297782124, "loss": 1.7459, "step": 30076 }, { "epoch": 1.0, "grad_norm": 0.4534043073654175, "learning_rate": 0.0004498189276309877, "loss": 1.8287, "step": 30077 }, { "epoch": 1.0, "grad_norm": 0.4766234755516052, "learning_rate": 0.00044980987210229616, "loss": 1.8072, "step": 30078 }, { "epoch": 1.0, "grad_norm": 0.45334893465042114, "learning_rate": 0.00044980081639175746, "loss": 1.7729, "step": 30079 }, { "epoch": 1.0, "grad_norm": 0.4539332687854767, "learning_rate": 0.0004497917604993826, "loss": 1.7977, "step": 30080 }, { "epoch": 1.0, "grad_norm": 0.4490179717540741, "learning_rate": 0.0004497827044251827, "loss": 1.7579, "step": 30081 }, { "epoch": 1.0, "grad_norm": 0.4387413263320923, "learning_rate": 0.0004497736481691686, "loss": 1.7812, "step": 30082 }, { "epoch": 1.0, "grad_norm": 0.43693315982818604, "learning_rate": 0.0004497645917313514, "loss": 1.7704, "step": 30083 }, { "epoch": 1.0, "grad_norm": 0.4532642066478729, "learning_rate": 0.000449755535111742, "loss": 1.8464, "step": 30084 }, { "epoch": 1.0, "grad_norm": 0.4348626732826233, "learning_rate": 0.0004497464783103515, "loss": 1.8036, "step": 30085 }, { "epoch": 1.0, "grad_norm": 0.4654596745967865, "learning_rate": 0.00044973742132719077, "loss": 1.8373, "step": 30086 }, { "epoch": 1.0, "grad_norm": 0.4494546949863434, "learning_rate": 0.00044972836416227096, "loss": 1.7591, "step": 30087 }, { "epoch": 1.0, "grad_norm": 0.44738101959228516, "learning_rate": 0.000449719306815603, "loss": 1.8006, "step": 30088 }, { "epoch": 1.0, "grad_norm": 0.4446021020412445, "learning_rate": 0.00044971024928719774, "loss": 1.8151, "step": 30089 }, { "epoch": 1.0, "grad_norm": 0.4375282824039459, "learning_rate": 0.00044970119157706633, "loss": 1.7439, "step": 30090 }, { "epoch": 1.0, "grad_norm": 0.47196832299232483, "learning_rate": 0.00044969213368521976, "loss": 1.8305, "step": 30091 }, { "epoch": 1.0, "grad_norm": 0.45863038301467896, "learning_rate": 0.0004496830756116691, "loss": 1.7936, "step": 30092 }, { "epoch": 1.0, "grad_norm": 0.4417484402656555, "learning_rate": 0.000449674017356425, "loss": 1.7559, "step": 30093 }, { "epoch": 1.0, "grad_norm": 0.4359076917171478, "learning_rate": 0.0004496649589194989, "loss": 1.8532, "step": 30094 }, { "epoch": 1.0, "grad_norm": 0.4726281464099884, "learning_rate": 0.0004496559003009015, "loss": 1.7925, "step": 30095 }, { "epoch": 1.0, "grad_norm": 0.4560554027557373, "learning_rate": 0.00044964684150064385, "loss": 1.7887, "step": 30096 }, { "epoch": 1.0, "grad_norm": 0.42835673689842224, "learning_rate": 0.0004496377825187371, "loss": 1.755, "step": 30097 }, { "epoch": 1.0, "grad_norm": 0.4600165784358978, "learning_rate": 0.0004496287233551921, "loss": 1.8079, "step": 30098 }, { "epoch": 1.0, "grad_norm": 0.4393376410007477, "learning_rate": 0.0004496196640100198, "loss": 1.8152, "step": 30099 }, { "epoch": 1.0, "grad_norm": 0.44069766998291016, "learning_rate": 0.0004496106044832313, "loss": 1.7723, "step": 30100 }, { "epoch": 1.0, "grad_norm": 0.46057218313217163, "learning_rate": 0.00044960154477483765, "loss": 1.7796, "step": 30101 }, { "epoch": 1.0, "grad_norm": 0.4303821921348572, "learning_rate": 0.0004495924848848497, "loss": 1.7728, "step": 30102 }, { "epoch": 1.0, "grad_norm": 0.43950194120407104, "learning_rate": 0.0004495834248132785, "loss": 1.8382, "step": 30103 }, { "epoch": 1.0, "grad_norm": 0.4335435628890991, "learning_rate": 0.0004495743645601351, "loss": 1.7417, "step": 30104 }, { "epoch": 1.0, "grad_norm": 0.425340473651886, "learning_rate": 0.00044956530412543043, "loss": 1.7957, "step": 30105 }, { "epoch": 1.0, "grad_norm": 0.45836102962493896, "learning_rate": 0.0004495562435091756, "loss": 1.8284, "step": 30106 }, { "epoch": 1.0, "grad_norm": 0.47923749685287476, "learning_rate": 0.0004495471827113814, "loss": 1.7963, "step": 30107 }, { "epoch": 1.0, "grad_norm": 0.4433835446834564, "learning_rate": 0.00044953812173205904, "loss": 1.827, "step": 30108 }, { "epoch": 1.0, "grad_norm": 0.4367544651031494, "learning_rate": 0.0004495290605712194, "loss": 1.8164, "step": 30109 }, { "epoch": 1.0, "grad_norm": 0.4542847275733948, "learning_rate": 0.00044951999922887353, "loss": 1.8586, "step": 30110 }, { "epoch": 1.0, "grad_norm": 0.4855015277862549, "learning_rate": 0.00044951093770503235, "loss": 1.7716, "step": 30111 }, { "epoch": 1.0, "grad_norm": 0.4569479525089264, "learning_rate": 0.00044950187599970706, "loss": 1.8991, "step": 30112 }, { "epoch": 1.0, "grad_norm": 0.46702101826667786, "learning_rate": 0.00044949281411290845, "loss": 1.823, "step": 30113 }, { "epoch": 1.0, "grad_norm": 0.44343626499176025, "learning_rate": 0.0004494837520446475, "loss": 1.7809, "step": 30114 }, { "epoch": 1.0, "grad_norm": 0.4594000577926636, "learning_rate": 0.0004494746897949354, "loss": 1.8503, "step": 30115 }, { "epoch": 1.0, "grad_norm": 0.4474114179611206, "learning_rate": 0.00044946562736378307, "loss": 1.7866, "step": 30116 }, { "epoch": 1.0, "grad_norm": 0.44679418206214905, "learning_rate": 0.00044945656475120147, "loss": 1.8286, "step": 30117 }, { "epoch": 1.0, "grad_norm": 0.44988909363746643, "learning_rate": 0.00044944750195720156, "loss": 1.7776, "step": 30118 }, { "epoch": 1.0, "grad_norm": 0.44781869649887085, "learning_rate": 0.00044943843898179447, "loss": 1.7203, "step": 30119 }, { "epoch": 1.0, "grad_norm": 0.4410647749900818, "learning_rate": 0.0004494293758249911, "loss": 1.7676, "step": 30120 }, { "epoch": 1.0, "grad_norm": 0.4530312418937683, "learning_rate": 0.00044942031248680246, "loss": 1.7853, "step": 30121 }, { "epoch": 1.0, "grad_norm": 0.4522021412849426, "learning_rate": 0.00044941124896723966, "loss": 1.8334, "step": 30122 }, { "epoch": 1.0, "grad_norm": 0.4487746059894562, "learning_rate": 0.0004494021852663135, "loss": 1.8068, "step": 30123 }, { "epoch": 1.0, "grad_norm": 0.4617500305175781, "learning_rate": 0.00044939312138403523, "loss": 1.7736, "step": 30124 }, { "epoch": 1.0, "grad_norm": 0.455025315284729, "learning_rate": 0.0004493840573204156, "loss": 1.8132, "step": 30125 }, { "epoch": 1.0, "grad_norm": 0.4343944191932678, "learning_rate": 0.0004493749930754658, "loss": 1.7694, "step": 30126 }, { "epoch": 1.0, "grad_norm": 0.4782688021659851, "learning_rate": 0.0004493659286491968, "loss": 1.8147, "step": 30127 }, { "epoch": 1.0, "grad_norm": 0.4599522352218628, "learning_rate": 0.00044935686404161954, "loss": 1.7708, "step": 30128 }, { "epoch": 1.0, "grad_norm": 0.4452339708805084, "learning_rate": 0.00044934779925274505, "loss": 1.8289, "step": 30129 }, { "epoch": 1.0, "grad_norm": 0.45372188091278076, "learning_rate": 0.0004493387342825843, "loss": 1.8082, "step": 30130 }, { "epoch": 1.0, "grad_norm": 0.44523313641548157, "learning_rate": 0.0004493296691311484, "loss": 1.8065, "step": 30131 }, { "epoch": 1.0, "grad_norm": 0.4494840204715729, "learning_rate": 0.0004493206037984482, "loss": 1.8281, "step": 30132 }, { "epoch": 1.0, "grad_norm": 0.4598236382007599, "learning_rate": 0.00044931153828449487, "loss": 1.8238, "step": 30133 }, { "epoch": 1.0, "grad_norm": 0.455534964799881, "learning_rate": 0.0004493024725892993, "loss": 1.7553, "step": 30134 }, { "epoch": 1.0, "grad_norm": 0.43596550822257996, "learning_rate": 0.0004492934067128725, "loss": 1.743, "step": 30135 }, { "epoch": 1.0, "grad_norm": 0.47003811597824097, "learning_rate": 0.00044928434065522553, "loss": 1.769, "step": 30136 }, { "epoch": 1.0, "grad_norm": 0.4689624011516571, "learning_rate": 0.0004492752744163694, "loss": 1.7931, "step": 30137 }, { "epoch": 1.0, "grad_norm": 0.4547103941440582, "learning_rate": 0.00044926620799631503, "loss": 1.84, "step": 30138 }, { "epoch": 1.0, "grad_norm": 0.4390075206756592, "learning_rate": 0.00044925714139507355, "loss": 1.7423, "step": 30139 }, { "epoch": 1.0, "grad_norm": 0.44422274827957153, "learning_rate": 0.0004492480746126558, "loss": 1.8387, "step": 30140 }, { "epoch": 1.0, "grad_norm": 0.4688376486301422, "learning_rate": 0.0004492390076490729, "loss": 1.7692, "step": 30141 }, { "epoch": 1.0, "grad_norm": 0.44969847798347473, "learning_rate": 0.0004492299405043359, "loss": 1.7617, "step": 30142 }, { "epoch": 1.0, "grad_norm": 0.4750267267227173, "learning_rate": 0.0004492208731784557, "loss": 1.8177, "step": 30143 }, { "epoch": 1.0, "grad_norm": 0.4537758529186249, "learning_rate": 0.0004492118056714433, "loss": 1.8077, "step": 30144 }, { "epoch": 1.0, "grad_norm": 0.4398914873600006, "learning_rate": 0.00044920273798330977, "loss": 1.8211, "step": 30145 }, { "epoch": 1.0, "grad_norm": 0.4701233208179474, "learning_rate": 0.0004491936701140661, "loss": 1.8381, "step": 30146 }, { "epoch": 1.0, "grad_norm": 0.47552841901779175, "learning_rate": 0.0004491846020637234, "loss": 1.8888, "step": 30147 }, { "epoch": 1.0, "grad_norm": 0.46247828006744385, "learning_rate": 0.0004491755338322924, "loss": 1.7907, "step": 30148 }, { "epoch": 1.0, "grad_norm": 0.4422050714492798, "learning_rate": 0.0004491664654197844, "loss": 1.835, "step": 30149 }, { "epoch": 1.0, "grad_norm": 0.46971797943115234, "learning_rate": 0.00044915739682621034, "loss": 1.8004, "step": 30150 }, { "epoch": 1.0, "grad_norm": 0.46454185247421265, "learning_rate": 0.0004491483280515811, "loss": 1.8161, "step": 30151 }, { "epoch": 1.0, "grad_norm": 0.453258752822876, "learning_rate": 0.0004491392590959077, "loss": 1.7994, "step": 30152 }, { "epoch": 1.0, "grad_norm": 0.4501749873161316, "learning_rate": 0.0004491301899592013, "loss": 1.7912, "step": 30153 }, { "epoch": 1.0, "grad_norm": 0.45655861496925354, "learning_rate": 0.0004491211206414728, "loss": 1.8324, "step": 30154 }, { "epoch": 1.0, "grad_norm": 0.46388232707977295, "learning_rate": 0.00044911205114273316, "loss": 1.8716, "step": 30155 }, { "epoch": 1.0, "grad_norm": 0.46603813767433167, "learning_rate": 0.0004491029814629936, "loss": 1.8793, "step": 30156 }, { "epoch": 1.0, "grad_norm": 0.48935064673423767, "learning_rate": 0.0004490939116022649, "loss": 1.7534, "step": 30157 }, { "epoch": 1.0, "grad_norm": 0.49041375517845154, "learning_rate": 0.0004490848415605582, "loss": 1.8489, "step": 30158 }, { "epoch": 1.0, "grad_norm": 0.44970566034317017, "learning_rate": 0.00044907577133788447, "loss": 1.8717, "step": 30159 }, { "epoch": 1.0, "grad_norm": 0.45604872703552246, "learning_rate": 0.0004490667009342547, "loss": 1.8057, "step": 30160 }, { "epoch": 1.0, "grad_norm": 0.45289498567581177, "learning_rate": 0.00044905763034967994, "loss": 1.7977, "step": 30161 }, { "epoch": 1.0, "grad_norm": 0.461739182472229, "learning_rate": 0.0004490485595841711, "loss": 1.753, "step": 30162 }, { "epoch": 1.0, "grad_norm": 0.4681533873081207, "learning_rate": 0.0004490394886377394, "loss": 1.7502, "step": 30163 }, { "epoch": 1.0, "grad_norm": 0.45216238498687744, "learning_rate": 0.0004490304175103956, "loss": 1.9221, "step": 30164 }, { "epoch": 1.0, "grad_norm": 0.459877073764801, "learning_rate": 0.0004490213462021509, "loss": 1.8474, "step": 30165 }, { "epoch": 1.0, "grad_norm": 0.4690258800983429, "learning_rate": 0.0004490122747130162, "loss": 1.7879, "step": 30166 }, { "epoch": 1.0, "grad_norm": 0.4647027850151062, "learning_rate": 0.00044900320304300256, "loss": 1.8252, "step": 30167 }, { "epoch": 1.0, "grad_norm": 0.46402376890182495, "learning_rate": 0.00044899413119212105, "loss": 1.779, "step": 30168 }, { "epoch": 1.0, "grad_norm": 0.4419451355934143, "learning_rate": 0.0004489850591603826, "loss": 1.7994, "step": 30169 }, { "epoch": 1.0, "grad_norm": 0.45969879627227783, "learning_rate": 0.0004489759869477982, "loss": 1.8635, "step": 30170 }, { "epoch": 1.0, "grad_norm": 0.513897180557251, "learning_rate": 0.00044896691455437887, "loss": 1.7854, "step": 30171 }, { "epoch": 1.0, "grad_norm": 0.4522269070148468, "learning_rate": 0.0004489578419801357, "loss": 1.793, "step": 30172 }, { "epoch": 1.0, "grad_norm": 0.4662427306175232, "learning_rate": 0.00044894876922507964, "loss": 1.8249, "step": 30173 }, { "epoch": 1.0, "grad_norm": 0.44784122705459595, "learning_rate": 0.00044893969628922184, "loss": 1.885, "step": 30174 }, { "epoch": 1.0, "grad_norm": 0.47057831287384033, "learning_rate": 0.000448930623172573, "loss": 1.7951, "step": 30175 }, { "epoch": 1.0, "grad_norm": 0.4335075914859772, "learning_rate": 0.00044892154987514446, "loss": 1.7863, "step": 30176 }, { "epoch": 1.0, "grad_norm": 0.47279080748558044, "learning_rate": 0.00044891247639694705, "loss": 1.8778, "step": 30177 }, { "epoch": 1.0, "grad_norm": 0.4526110589504242, "learning_rate": 0.00044890340273799184, "loss": 1.849, "step": 30178 }, { "epoch": 1.0, "grad_norm": 0.4429343640804291, "learning_rate": 0.0004488943288982899, "loss": 1.7698, "step": 30179 }, { "epoch": 1.0, "grad_norm": 0.4412432312965393, "learning_rate": 0.00044888525487785206, "loss": 1.8027, "step": 30180 }, { "epoch": 1.0, "grad_norm": 0.45210275053977966, "learning_rate": 0.0004488761806766896, "loss": 1.8168, "step": 30181 }, { "epoch": 1.0, "grad_norm": 0.43389880657196045, "learning_rate": 0.00044886710629481327, "loss": 1.7525, "step": 30182 }, { "epoch": 1.0, "grad_norm": 0.43217164278030396, "learning_rate": 0.00044885803173223425, "loss": 1.7422, "step": 30183 }, { "epoch": 1.0, "grad_norm": 0.44785842299461365, "learning_rate": 0.00044884895698896356, "loss": 1.7936, "step": 30184 }, { "epoch": 1.0, "grad_norm": 0.4647200107574463, "learning_rate": 0.0004488398820650121, "loss": 1.8089, "step": 30185 }, { "epoch": 1.0, "grad_norm": 0.4439831078052521, "learning_rate": 0.000448830806960391, "loss": 1.7445, "step": 30186 }, { "epoch": 1.0, "grad_norm": 0.4472336173057556, "learning_rate": 0.00044882173167511114, "loss": 1.7863, "step": 30187 }, { "epoch": 1.0, "grad_norm": 0.4476572871208191, "learning_rate": 0.00044881265620918376, "loss": 1.8051, "step": 30188 }, { "epoch": 1.0, "grad_norm": 0.44995632767677307, "learning_rate": 0.0004488035805626197, "loss": 1.7794, "step": 30189 }, { "epoch": 1.0, "grad_norm": 0.4383465349674225, "learning_rate": 0.00044879450473542996, "loss": 1.8285, "step": 30190 }, { "epoch": 1.0, "grad_norm": 0.4604828655719757, "learning_rate": 0.0004487854287276257, "loss": 1.7395, "step": 30191 }, { "epoch": 1.0, "grad_norm": 0.4510531723499298, "learning_rate": 0.0004487763525392178, "loss": 1.7586, "step": 30192 }, { "epoch": 1.0, "grad_norm": 0.46638983488082886, "learning_rate": 0.0004487672761702173, "loss": 1.8624, "step": 30193 }, { "epoch": 1.0, "grad_norm": 0.4425796866416931, "learning_rate": 0.0004487581996206353, "loss": 1.7671, "step": 30194 }, { "epoch": 1.0, "grad_norm": 0.4310835599899292, "learning_rate": 0.00044874912289048275, "loss": 1.8512, "step": 30195 }, { "epoch": 1.0, "grad_norm": 0.47591355443000793, "learning_rate": 0.0004487400459797706, "loss": 1.7812, "step": 30196 }, { "epoch": 1.0, "grad_norm": 0.4502575099468231, "learning_rate": 0.00044873096888851005, "loss": 1.7649, "step": 30197 }, { "epoch": 1.0, "grad_norm": 0.446856826543808, "learning_rate": 0.00044872189161671206, "loss": 1.7767, "step": 30198 }, { "epoch": 1.0, "grad_norm": 0.4617276191711426, "learning_rate": 0.0004487128141643876, "loss": 1.7929, "step": 30199 }, { "epoch": 1.0, "grad_norm": 0.45475584268569946, "learning_rate": 0.00044870373653154756, "loss": 1.8671, "step": 30200 }, { "epoch": 1.0, "grad_norm": 0.4535752236843109, "learning_rate": 0.00044869465871820316, "loss": 1.8035, "step": 30201 }, { "epoch": 1.0, "grad_norm": 0.5398163199424744, "learning_rate": 0.00044868558072436534, "loss": 1.7786, "step": 30202 }, { "epoch": 1.0, "grad_norm": 0.46189942955970764, "learning_rate": 0.0004486765025500451, "loss": 1.7883, "step": 30203 }, { "epoch": 1.0, "grad_norm": 0.4478014409542084, "learning_rate": 0.00044866742419525366, "loss": 1.8592, "step": 30204 }, { "epoch": 1.0, "grad_norm": 0.46295320987701416, "learning_rate": 0.00044865834566000176, "loss": 1.7701, "step": 30205 }, { "epoch": 1.0, "grad_norm": 0.43851912021636963, "learning_rate": 0.0004486492669443005, "loss": 1.7757, "step": 30206 }, { "epoch": 1.0, "grad_norm": 0.4425675570964813, "learning_rate": 0.00044864018804816096, "loss": 1.8373, "step": 30207 }, { "epoch": 1.01, "grad_norm": 0.4521172344684601, "learning_rate": 0.0004486311089715941, "loss": 1.8397, "step": 30208 }, { "epoch": 1.01, "grad_norm": 0.44251811504364014, "learning_rate": 0.0004486220297146111, "loss": 1.7882, "step": 30209 }, { "epoch": 1.01, "grad_norm": 0.4637913107872009, "learning_rate": 0.0004486129502772227, "loss": 1.7843, "step": 30210 }, { "epoch": 1.01, "grad_norm": 0.46559691429138184, "learning_rate": 0.0004486038706594402, "loss": 1.8026, "step": 30211 }, { "epoch": 1.01, "grad_norm": 0.47538167238235474, "learning_rate": 0.0004485947908612744, "loss": 1.8318, "step": 30212 }, { "epoch": 1.01, "grad_norm": 0.4589388966560364, "learning_rate": 0.0004485857108827365, "loss": 1.7724, "step": 30213 }, { "epoch": 1.01, "grad_norm": 0.4542109966278076, "learning_rate": 0.00044857663072383737, "loss": 1.7938, "step": 30214 }, { "epoch": 1.01, "grad_norm": 0.4514022171497345, "learning_rate": 0.0004485675503845881, "loss": 1.7387, "step": 30215 }, { "epoch": 1.01, "grad_norm": 0.444096177816391, "learning_rate": 0.0004485584698649998, "loss": 1.7599, "step": 30216 }, { "epoch": 1.01, "grad_norm": 0.4564606249332428, "learning_rate": 0.00044854938916508326, "loss": 1.7687, "step": 30217 }, { "epoch": 1.01, "grad_norm": 0.459566205739975, "learning_rate": 0.0004485403082848498, "loss": 1.8162, "step": 30218 }, { "epoch": 1.01, "grad_norm": 0.47090432047843933, "learning_rate": 0.00044853122722431023, "loss": 1.8131, "step": 30219 }, { "epoch": 1.01, "grad_norm": 0.4605351984500885, "learning_rate": 0.00044852214598347566, "loss": 1.7375, "step": 30220 }, { "epoch": 1.01, "grad_norm": 0.45192089676856995, "learning_rate": 0.00044851306456235703, "loss": 1.8165, "step": 30221 }, { "epoch": 1.01, "grad_norm": 0.4776020348072052, "learning_rate": 0.00044850398296096544, "loss": 1.8105, "step": 30222 }, { "epoch": 1.01, "grad_norm": 0.462911456823349, "learning_rate": 0.0004484949011793119, "loss": 1.7503, "step": 30223 }, { "epoch": 1.01, "grad_norm": 0.44807788729667664, "learning_rate": 0.00044848581921740755, "loss": 1.7086, "step": 30224 }, { "epoch": 1.01, "grad_norm": 0.4757889211177826, "learning_rate": 0.00044847673707526314, "loss": 1.7515, "step": 30225 }, { "epoch": 1.01, "grad_norm": 0.456136554479599, "learning_rate": 0.0004484676547528899, "loss": 1.8049, "step": 30226 }, { "epoch": 1.01, "grad_norm": 0.440952867269516, "learning_rate": 0.00044845857225029874, "loss": 1.7536, "step": 30227 }, { "epoch": 1.01, "grad_norm": 0.47034451365470886, "learning_rate": 0.0004484494895675008, "loss": 1.7718, "step": 30228 }, { "epoch": 1.01, "grad_norm": 0.481126993894577, "learning_rate": 0.00044844040670450713, "loss": 1.7909, "step": 30229 }, { "epoch": 1.01, "grad_norm": 0.4448912441730499, "learning_rate": 0.0004484313236613286, "loss": 1.8114, "step": 30230 }, { "epoch": 1.01, "grad_norm": 0.49449169635772705, "learning_rate": 0.0004484222404379763, "loss": 1.8135, "step": 30231 }, { "epoch": 1.01, "grad_norm": 0.4540727734565735, "learning_rate": 0.00044841315703446127, "loss": 1.7719, "step": 30232 }, { "epoch": 1.01, "grad_norm": 0.4564971327781677, "learning_rate": 0.00044840407345079456, "loss": 1.8309, "step": 30233 }, { "epoch": 1.01, "grad_norm": 0.4502199590206146, "learning_rate": 0.0004483949896869872, "loss": 1.7959, "step": 30234 }, { "epoch": 1.01, "grad_norm": 0.47360771894454956, "learning_rate": 0.00044838590574305004, "loss": 1.7866, "step": 30235 }, { "epoch": 1.01, "grad_norm": 0.4567389488220215, "learning_rate": 0.0004483768216189944, "loss": 1.7785, "step": 30236 }, { "epoch": 1.01, "grad_norm": 0.4698869585990906, "learning_rate": 0.0004483677373148312, "loss": 1.844, "step": 30237 }, { "epoch": 1.01, "grad_norm": 0.4576212465763092, "learning_rate": 0.00044835865283057133, "loss": 1.7807, "step": 30238 }, { "epoch": 1.01, "grad_norm": 0.4777758717536926, "learning_rate": 0.000448349568166226, "loss": 1.747, "step": 30239 }, { "epoch": 1.01, "grad_norm": 0.4626762866973877, "learning_rate": 0.000448340483321806, "loss": 1.8062, "step": 30240 }, { "epoch": 1.01, "grad_norm": 0.4433399438858032, "learning_rate": 0.0004483313982973227, "loss": 1.8311, "step": 30241 }, { "epoch": 1.01, "grad_norm": 0.4586908221244812, "learning_rate": 0.00044832231309278674, "loss": 1.7673, "step": 30242 }, { "epoch": 1.01, "grad_norm": 0.4653065502643585, "learning_rate": 0.0004483132277082095, "loss": 1.8128, "step": 30243 }, { "epoch": 1.01, "grad_norm": 0.47779911756515503, "learning_rate": 0.0004483041421436018, "loss": 1.8812, "step": 30244 }, { "epoch": 1.01, "grad_norm": 0.4677182734012604, "learning_rate": 0.00044829505639897466, "loss": 1.8373, "step": 30245 }, { "epoch": 1.01, "grad_norm": 0.45845919847488403, "learning_rate": 0.0004482859704743393, "loss": 1.8141, "step": 30246 }, { "epoch": 1.01, "grad_norm": 0.43880078196525574, "learning_rate": 0.00044827688436970654, "loss": 1.7496, "step": 30247 }, { "epoch": 1.01, "grad_norm": 0.4403999447822571, "learning_rate": 0.00044826779808508757, "loss": 1.7642, "step": 30248 }, { "epoch": 1.01, "grad_norm": 0.43388932943344116, "learning_rate": 0.0004482587116204932, "loss": 1.8118, "step": 30249 }, { "epoch": 1.01, "grad_norm": 0.46354353427886963, "learning_rate": 0.0004482496249759347, "loss": 1.7593, "step": 30250 }, { "epoch": 1.01, "grad_norm": 0.4623255133628845, "learning_rate": 0.0004482405381514229, "loss": 1.8213, "step": 30251 }, { "epoch": 1.01, "grad_norm": 0.44920554757118225, "learning_rate": 0.00044823145114696906, "loss": 1.8326, "step": 30252 }, { "epoch": 1.01, "grad_norm": 0.44472160935401917, "learning_rate": 0.00044822236396258405, "loss": 1.7239, "step": 30253 }, { "epoch": 1.01, "grad_norm": 0.46342647075653076, "learning_rate": 0.00044821327659827893, "loss": 1.8226, "step": 30254 }, { "epoch": 1.01, "grad_norm": 0.45459550619125366, "learning_rate": 0.0004482041890540647, "loss": 1.8394, "step": 30255 }, { "epoch": 1.01, "grad_norm": 0.464018851518631, "learning_rate": 0.0004481951013299524, "loss": 1.8077, "step": 30256 }, { "epoch": 1.01, "grad_norm": 0.44503194093704224, "learning_rate": 0.0004481860134259531, "loss": 1.7777, "step": 30257 }, { "epoch": 1.01, "grad_norm": 0.4570225477218628, "learning_rate": 0.00044817692534207776, "loss": 1.8267, "step": 30258 }, { "epoch": 1.01, "grad_norm": 0.458610475063324, "learning_rate": 0.0004481678370783376, "loss": 1.7752, "step": 30259 }, { "epoch": 1.01, "grad_norm": 0.4548271894454956, "learning_rate": 0.00044815874863474343, "loss": 1.8136, "step": 30260 }, { "epoch": 1.01, "grad_norm": 0.4801504611968994, "learning_rate": 0.00044814966001130636, "loss": 1.8668, "step": 30261 }, { "epoch": 1.01, "grad_norm": 0.4490499198436737, "learning_rate": 0.00044814057120803744, "loss": 1.7862, "step": 30262 }, { "epoch": 1.01, "grad_norm": 0.45587852597236633, "learning_rate": 0.0004481314822249477, "loss": 1.7972, "step": 30263 }, { "epoch": 1.01, "grad_norm": 0.45275193452835083, "learning_rate": 0.0004481223930620482, "loss": 1.7825, "step": 30264 }, { "epoch": 1.01, "grad_norm": 0.45608222484588623, "learning_rate": 0.00044811330371934984, "loss": 1.7818, "step": 30265 }, { "epoch": 1.01, "grad_norm": 0.4510328769683838, "learning_rate": 0.0004481042141968639, "loss": 1.7921, "step": 30266 }, { "epoch": 1.01, "grad_norm": 0.47191253304481506, "learning_rate": 0.0004480951244946012, "loss": 1.7918, "step": 30267 }, { "epoch": 1.01, "grad_norm": 0.44638577103614807, "learning_rate": 0.00044808603461257284, "loss": 1.8237, "step": 30268 }, { "epoch": 1.01, "grad_norm": 0.9186777472496033, "learning_rate": 0.0004480769445507898, "loss": 1.8071, "step": 30269 }, { "epoch": 1.01, "grad_norm": 0.4399745762348175, "learning_rate": 0.0004480678543092632, "loss": 1.7531, "step": 30270 }, { "epoch": 1.01, "grad_norm": 0.45217716693878174, "learning_rate": 0.00044805876388800416, "loss": 1.8234, "step": 30271 }, { "epoch": 1.01, "grad_norm": 0.44821566343307495, "learning_rate": 0.00044804967328702336, "loss": 1.737, "step": 30272 }, { "epoch": 1.01, "grad_norm": 0.44413018226623535, "learning_rate": 0.00044804058250633226, "loss": 1.7634, "step": 30273 }, { "epoch": 1.01, "grad_norm": 0.44399622082710266, "learning_rate": 0.0004480314915459417, "loss": 1.7717, "step": 30274 }, { "epoch": 1.01, "grad_norm": 0.4506438076496124, "learning_rate": 0.0004480224004058626, "loss": 1.815, "step": 30275 }, { "epoch": 1.01, "grad_norm": 0.44710350036621094, "learning_rate": 0.0004480133090861062, "loss": 1.8287, "step": 30276 }, { "epoch": 1.01, "grad_norm": 0.45094746351242065, "learning_rate": 0.00044800421758668343, "loss": 1.7889, "step": 30277 }, { "epoch": 1.01, "grad_norm": 0.43843257427215576, "learning_rate": 0.00044799512590760544, "loss": 1.7788, "step": 30278 }, { "epoch": 1.01, "grad_norm": 0.4495313763618469, "learning_rate": 0.000447986034048883, "loss": 1.8785, "step": 30279 }, { "epoch": 1.01, "grad_norm": 0.4595257043838501, "learning_rate": 0.0004479769420105274, "loss": 1.824, "step": 30280 }, { "epoch": 1.01, "grad_norm": 0.44726359844207764, "learning_rate": 0.0004479678497925496, "loss": 1.7904, "step": 30281 }, { "epoch": 1.01, "grad_norm": 0.4594670832157135, "learning_rate": 0.00044795875739496064, "loss": 1.7806, "step": 30282 }, { "epoch": 1.01, "grad_norm": 0.44977977871894836, "learning_rate": 0.00044794966481777153, "loss": 1.7404, "step": 30283 }, { "epoch": 1.01, "grad_norm": 0.4807228147983551, "learning_rate": 0.00044794057206099327, "loss": 1.8408, "step": 30284 }, { "epoch": 1.01, "grad_norm": 0.46605023741722107, "learning_rate": 0.00044793147912463715, "loss": 1.7978, "step": 30285 }, { "epoch": 1.01, "grad_norm": 0.4674888253211975, "learning_rate": 0.0004479223860087139, "loss": 1.8554, "step": 30286 }, { "epoch": 1.01, "grad_norm": 0.4603331983089447, "learning_rate": 0.0004479132927132346, "loss": 1.802, "step": 30287 }, { "epoch": 1.01, "grad_norm": 0.47315889596939087, "learning_rate": 0.0004479041992382104, "loss": 1.8147, "step": 30288 }, { "epoch": 1.01, "grad_norm": 0.47349631786346436, "learning_rate": 0.0004478951055836523, "loss": 1.8209, "step": 30289 }, { "epoch": 1.01, "grad_norm": 0.4435560703277588, "learning_rate": 0.0004478860117495713, "loss": 1.8654, "step": 30290 }, { "epoch": 1.01, "grad_norm": 0.45427823066711426, "learning_rate": 0.00044787691773597857, "loss": 1.7727, "step": 30291 }, { "epoch": 1.01, "grad_norm": 0.47780460119247437, "learning_rate": 0.000447867823542885, "loss": 1.7313, "step": 30292 }, { "epoch": 1.01, "grad_norm": 0.4584837853908539, "learning_rate": 0.00044785872917030165, "loss": 1.6615, "step": 30293 }, { "epoch": 1.01, "grad_norm": 0.44921576976776123, "learning_rate": 0.0004478496346182395, "loss": 1.6772, "step": 30294 }, { "epoch": 1.01, "grad_norm": 0.4675382375717163, "learning_rate": 0.00044784053988670985, "loss": 1.8284, "step": 30295 }, { "epoch": 1.01, "grad_norm": 0.5588744282722473, "learning_rate": 0.00044783144497572363, "loss": 1.9162, "step": 30296 }, { "epoch": 1.01, "grad_norm": 0.4589841365814209, "learning_rate": 0.00044782234988529154, "loss": 1.8334, "step": 30297 }, { "epoch": 1.01, "grad_norm": 0.467898428440094, "learning_rate": 0.0004478132546154251, "loss": 1.7572, "step": 30298 }, { "epoch": 1.01, "grad_norm": 0.4585064947605133, "learning_rate": 0.0004478041591661351, "loss": 1.7703, "step": 30299 }, { "epoch": 1.01, "grad_norm": 0.4552549719810486, "learning_rate": 0.0004477950635374326, "loss": 1.7843, "step": 30300 }, { "epoch": 1.01, "grad_norm": 0.48267990350723267, "learning_rate": 0.00044778596772932877, "loss": 1.8655, "step": 30301 }, { "epoch": 1.01, "grad_norm": 0.45654305815696716, "learning_rate": 0.0004477768717418344, "loss": 1.8253, "step": 30302 }, { "epoch": 1.01, "grad_norm": 0.4471537172794342, "learning_rate": 0.00044776777557496076, "loss": 1.7995, "step": 30303 }, { "epoch": 1.01, "grad_norm": 0.4424845576286316, "learning_rate": 0.0004477586792287188, "loss": 1.8834, "step": 30304 }, { "epoch": 1.01, "grad_norm": 0.4574891924858093, "learning_rate": 0.00044774958270311964, "loss": 1.775, "step": 30305 }, { "epoch": 1.01, "grad_norm": 0.4639163613319397, "learning_rate": 0.0004477404859981742, "loss": 1.7813, "step": 30306 }, { "epoch": 1.01, "grad_norm": 0.4754658639431, "learning_rate": 0.00044773138911389354, "loss": 1.8026, "step": 30307 }, { "epoch": 1.01, "grad_norm": 0.46339964866638184, "learning_rate": 0.00044772229205028885, "loss": 1.7213, "step": 30308 }, { "epoch": 1.01, "grad_norm": 0.4604983627796173, "learning_rate": 0.00044771319480737095, "loss": 1.7344, "step": 30309 }, { "epoch": 1.01, "grad_norm": 0.45410555601119995, "learning_rate": 0.00044770409738515104, "loss": 1.7713, "step": 30310 }, { "epoch": 1.01, "grad_norm": 0.45891547203063965, "learning_rate": 0.0004476949997836401, "loss": 1.7477, "step": 30311 }, { "epoch": 1.01, "grad_norm": 0.44215190410614014, "learning_rate": 0.00044768590200284917, "loss": 1.7461, "step": 30312 }, { "epoch": 1.01, "grad_norm": 0.45746034383773804, "learning_rate": 0.0004476768040427894, "loss": 1.7507, "step": 30313 }, { "epoch": 1.01, "grad_norm": 0.4740951657295227, "learning_rate": 0.0004476677059034717, "loss": 1.8058, "step": 30314 }, { "epoch": 1.01, "grad_norm": 0.45903515815734863, "learning_rate": 0.0004476586075849073, "loss": 1.8945, "step": 30315 }, { "epoch": 1.01, "grad_norm": 0.4423268437385559, "learning_rate": 0.0004476495090871069, "loss": 1.7312, "step": 30316 }, { "epoch": 1.01, "grad_norm": 0.44934797286987305, "learning_rate": 0.0004476404104100818, "loss": 1.775, "step": 30317 }, { "epoch": 1.01, "grad_norm": 0.46514376997947693, "learning_rate": 0.00044763131155384304, "loss": 1.8123, "step": 30318 }, { "epoch": 1.01, "grad_norm": 0.44838887453079224, "learning_rate": 0.0004476222125184016, "loss": 1.7642, "step": 30319 }, { "epoch": 1.01, "grad_norm": 0.44809138774871826, "learning_rate": 0.0004476131133037685, "loss": 1.8547, "step": 30320 }, { "epoch": 1.01, "grad_norm": 0.44716179370880127, "learning_rate": 0.0004476040139099549, "loss": 1.7495, "step": 30321 }, { "epoch": 1.01, "grad_norm": 0.4481121003627777, "learning_rate": 0.0004475949143369718, "loss": 1.8145, "step": 30322 }, { "epoch": 1.01, "grad_norm": 0.4695016145706177, "learning_rate": 0.0004475858145848301, "loss": 1.9152, "step": 30323 }, { "epoch": 1.01, "grad_norm": 0.4717327654361725, "learning_rate": 0.0004475767146535411, "loss": 1.7927, "step": 30324 }, { "epoch": 1.01, "grad_norm": 0.46045950055122375, "learning_rate": 0.0004475676145431156, "loss": 1.8663, "step": 30325 }, { "epoch": 1.01, "grad_norm": 0.474027156829834, "learning_rate": 0.0004475585142535649, "loss": 1.8081, "step": 30326 }, { "epoch": 1.01, "grad_norm": 0.45674899220466614, "learning_rate": 0.0004475494137848997, "loss": 1.7746, "step": 30327 }, { "epoch": 1.01, "grad_norm": 0.4268791079521179, "learning_rate": 0.0004475403131371315, "loss": 1.7286, "step": 30328 }, { "epoch": 1.01, "grad_norm": 0.45651423931121826, "learning_rate": 0.00044753121231027085, "loss": 1.8207, "step": 30329 }, { "epoch": 1.01, "grad_norm": 0.47699597477912903, "learning_rate": 0.0004475221113043292, "loss": 1.7422, "step": 30330 }, { "epoch": 1.01, "grad_norm": 0.44436562061309814, "learning_rate": 0.00044751301011931746, "loss": 1.8011, "step": 30331 }, { "epoch": 1.01, "grad_norm": 0.4530153274536133, "learning_rate": 0.0004475039087552465, "loss": 1.8339, "step": 30332 }, { "epoch": 1.01, "grad_norm": 0.46204614639282227, "learning_rate": 0.00044749480721212776, "loss": 1.711, "step": 30333 }, { "epoch": 1.01, "grad_norm": 0.42892974615097046, "learning_rate": 0.00044748570548997185, "loss": 1.7764, "step": 30334 }, { "epoch": 1.01, "grad_norm": 0.4672873318195343, "learning_rate": 0.00044747660358879015, "loss": 1.862, "step": 30335 }, { "epoch": 1.01, "grad_norm": 0.46813586354255676, "learning_rate": 0.00044746750150859355, "loss": 1.7905, "step": 30336 }, { "epoch": 1.01, "grad_norm": 0.4523158669471741, "learning_rate": 0.0004474583992493931, "loss": 1.8446, "step": 30337 }, { "epoch": 1.01, "grad_norm": 0.45061734318733215, "learning_rate": 0.0004474492968111999, "loss": 1.8536, "step": 30338 }, { "epoch": 1.01, "grad_norm": 0.44958898425102234, "learning_rate": 0.00044744019419402497, "loss": 1.8504, "step": 30339 }, { "epoch": 1.01, "grad_norm": 0.4561852812767029, "learning_rate": 0.0004474310913978794, "loss": 1.7636, "step": 30340 }, { "epoch": 1.01, "grad_norm": 0.4550442397594452, "learning_rate": 0.0004474219884227742, "loss": 1.7981, "step": 30341 }, { "epoch": 1.01, "grad_norm": 0.4589216709136963, "learning_rate": 0.00044741288526872037, "loss": 1.8218, "step": 30342 }, { "epoch": 1.01, "grad_norm": 0.4741891622543335, "learning_rate": 0.0004474037819357291, "loss": 1.7935, "step": 30343 }, { "epoch": 1.01, "grad_norm": 0.44339776039123535, "learning_rate": 0.0004473946784238113, "loss": 1.7907, "step": 30344 }, { "epoch": 1.01, "grad_norm": 0.4498891532421112, "learning_rate": 0.0004473855747329781, "loss": 1.6995, "step": 30345 }, { "epoch": 1.01, "grad_norm": 0.46432265639305115, "learning_rate": 0.0004473764708632406, "loss": 1.8171, "step": 30346 }, { "epoch": 1.01, "grad_norm": 0.4594893753528595, "learning_rate": 0.00044736736681460967, "loss": 1.8208, "step": 30347 }, { "epoch": 1.01, "grad_norm": 0.4488230347633362, "learning_rate": 0.0004473582625870965, "loss": 1.7822, "step": 30348 }, { "epoch": 1.01, "grad_norm": 0.5311059355735779, "learning_rate": 0.0004473491581807121, "loss": 1.8653, "step": 30349 }, { "epoch": 1.01, "grad_norm": 0.4495719373226166, "learning_rate": 0.00044734005359546754, "loss": 1.805, "step": 30350 }, { "epoch": 1.01, "grad_norm": 0.46410220861434937, "learning_rate": 0.00044733094883137395, "loss": 1.7961, "step": 30351 }, { "epoch": 1.01, "grad_norm": 0.44553375244140625, "learning_rate": 0.0004473218438884421, "loss": 1.8266, "step": 30352 }, { "epoch": 1.01, "grad_norm": 0.4429768919944763, "learning_rate": 0.0004473127387666834, "loss": 1.8109, "step": 30353 }, { "epoch": 1.01, "grad_norm": 0.44991254806518555, "learning_rate": 0.0004473036334661088, "loss": 1.7515, "step": 30354 }, { "epoch": 1.01, "grad_norm": 0.4536207914352417, "learning_rate": 0.00044729452798672914, "loss": 1.8627, "step": 30355 }, { "epoch": 1.01, "grad_norm": 0.4662550687789917, "learning_rate": 0.00044728542232855576, "loss": 1.7933, "step": 30356 }, { "epoch": 1.01, "grad_norm": 0.44362667202949524, "learning_rate": 0.00044727631649159945, "loss": 1.7844, "step": 30357 }, { "epoch": 1.01, "grad_norm": 0.44207531213760376, "learning_rate": 0.00044726721047587143, "loss": 1.7431, "step": 30358 }, { "epoch": 1.01, "grad_norm": 0.46760866045951843, "learning_rate": 0.00044725810428138265, "loss": 1.839, "step": 30359 }, { "epoch": 1.01, "grad_norm": 0.4387235641479492, "learning_rate": 0.0004472489979081444, "loss": 1.8553, "step": 30360 }, { "epoch": 1.01, "grad_norm": 0.4367116093635559, "learning_rate": 0.0004472398913561674, "loss": 1.8268, "step": 30361 }, { "epoch": 1.01, "grad_norm": 0.4432736933231354, "learning_rate": 0.0004472307846254629, "loss": 1.7938, "step": 30362 }, { "epoch": 1.01, "grad_norm": 0.44880175590515137, "learning_rate": 0.0004472216777160421, "loss": 1.8119, "step": 30363 }, { "epoch": 1.01, "grad_norm": 0.44358235597610474, "learning_rate": 0.0004472125706279156, "loss": 1.8081, "step": 30364 }, { "epoch": 1.01, "grad_norm": 0.4632522761821747, "learning_rate": 0.0004472034633610949, "loss": 1.861, "step": 30365 }, { "epoch": 1.01, "grad_norm": 0.4531915783882141, "learning_rate": 0.0004471943559155908, "loss": 1.762, "step": 30366 }, { "epoch": 1.01, "grad_norm": 0.44469842314720154, "learning_rate": 0.0004471852482914145, "loss": 1.7963, "step": 30367 }, { "epoch": 1.01, "grad_norm": 0.4439157247543335, "learning_rate": 0.000447176140488577, "loss": 1.7759, "step": 30368 }, { "epoch": 1.01, "grad_norm": 0.4519318640232086, "learning_rate": 0.00044716703250708936, "loss": 1.8294, "step": 30369 }, { "epoch": 1.01, "grad_norm": 0.4535127282142639, "learning_rate": 0.00044715792434696263, "loss": 1.8368, "step": 30370 }, { "epoch": 1.01, "grad_norm": 0.45308080315589905, "learning_rate": 0.0004471488160082078, "loss": 1.8381, "step": 30371 }, { "epoch": 1.01, "grad_norm": 0.45497050881385803, "learning_rate": 0.00044713970749083603, "loss": 1.8643, "step": 30372 }, { "epoch": 1.01, "grad_norm": 0.4605908989906311, "learning_rate": 0.0004471305987948583, "loss": 1.7881, "step": 30373 }, { "epoch": 1.01, "grad_norm": 0.47069254517555237, "learning_rate": 0.00044712148992028574, "loss": 1.7651, "step": 30374 }, { "epoch": 1.01, "grad_norm": 0.4624462425708771, "learning_rate": 0.00044711238086712934, "loss": 1.8149, "step": 30375 }, { "epoch": 1.01, "grad_norm": 0.44120535254478455, "learning_rate": 0.0004471032716354003, "loss": 1.7342, "step": 30376 }, { "epoch": 1.01, "grad_norm": 0.46115022897720337, "learning_rate": 0.00044709416222510936, "loss": 1.7847, "step": 30377 }, { "epoch": 1.01, "grad_norm": 0.4532284438610077, "learning_rate": 0.0004470850526362679, "loss": 1.8308, "step": 30378 }, { "epoch": 1.01, "grad_norm": 0.45033228397369385, "learning_rate": 0.0004470759428688868, "loss": 1.7976, "step": 30379 }, { "epoch": 1.01, "grad_norm": 0.4480225145816803, "learning_rate": 0.00044706683292297726, "loss": 1.7963, "step": 30380 }, { "epoch": 1.01, "grad_norm": 0.4608684480190277, "learning_rate": 0.0004470577227985503, "loss": 1.8194, "step": 30381 }, { "epoch": 1.01, "grad_norm": 0.45220932364463806, "learning_rate": 0.00044704861249561675, "loss": 1.766, "step": 30382 }, { "epoch": 1.01, "grad_norm": 0.451688289642334, "learning_rate": 0.00044703950201418796, "loss": 1.8, "step": 30383 }, { "epoch": 1.01, "grad_norm": 0.4384891986846924, "learning_rate": 0.0004470303913542749, "loss": 1.7947, "step": 30384 }, { "epoch": 1.01, "grad_norm": 0.4456011652946472, "learning_rate": 0.00044702128051588854, "loss": 1.7458, "step": 30385 }, { "epoch": 1.01, "grad_norm": 0.45855244994163513, "learning_rate": 0.00044701216949904, "loss": 1.7503, "step": 30386 }, { "epoch": 1.01, "grad_norm": 0.44773975014686584, "learning_rate": 0.00044700305830374043, "loss": 1.828, "step": 30387 }, { "epoch": 1.01, "grad_norm": 0.4533573389053345, "learning_rate": 0.0004469939469300008, "loss": 1.7918, "step": 30388 }, { "epoch": 1.01, "grad_norm": 0.4469945728778839, "learning_rate": 0.0004469848353778321, "loss": 1.7953, "step": 30389 }, { "epoch": 1.01, "grad_norm": 0.43732312321662903, "learning_rate": 0.0004469757236472456, "loss": 1.7466, "step": 30390 }, { "epoch": 1.01, "grad_norm": 0.44582417607307434, "learning_rate": 0.00044696661173825204, "loss": 1.8204, "step": 30391 }, { "epoch": 1.01, "grad_norm": 0.4506478011608124, "learning_rate": 0.00044695749965086284, "loss": 1.8555, "step": 30392 }, { "epoch": 1.01, "grad_norm": 0.4521808922290802, "learning_rate": 0.00044694838738508876, "loss": 1.7843, "step": 30393 }, { "epoch": 1.01, "grad_norm": 0.45741209387779236, "learning_rate": 0.00044693927494094104, "loss": 1.8332, "step": 30394 }, { "epoch": 1.01, "grad_norm": 0.46016925573349, "learning_rate": 0.00044693016231843073, "loss": 1.8822, "step": 30395 }, { "epoch": 1.01, "grad_norm": 0.45092034339904785, "learning_rate": 0.00044692104951756884, "loss": 1.8661, "step": 30396 }, { "epoch": 1.01, "grad_norm": 0.4356629550457001, "learning_rate": 0.0004469119365383664, "loss": 1.7448, "step": 30397 }, { "epoch": 1.01, "grad_norm": 0.4479570686817169, "learning_rate": 0.0004469028233808345, "loss": 1.7294, "step": 30398 }, { "epoch": 1.01, "grad_norm": 0.4463891386985779, "learning_rate": 0.00044689371004498425, "loss": 1.8136, "step": 30399 }, { "epoch": 1.01, "grad_norm": 0.4560031592845917, "learning_rate": 0.0004468845965308267, "loss": 1.6707, "step": 30400 }, { "epoch": 1.01, "grad_norm": 0.44243693351745605, "learning_rate": 0.0004468754828383729, "loss": 1.9246, "step": 30401 }, { "epoch": 1.01, "grad_norm": 0.4454552233219147, "learning_rate": 0.00044686636896763387, "loss": 1.7997, "step": 30402 }, { "epoch": 1.01, "grad_norm": 0.45719441771507263, "learning_rate": 0.00044685725491862077, "loss": 1.7679, "step": 30403 }, { "epoch": 1.01, "grad_norm": 0.45408135652542114, "learning_rate": 0.00044684814069134455, "loss": 1.8049, "step": 30404 }, { "epoch": 1.01, "grad_norm": 0.4400871992111206, "learning_rate": 0.0004468390262858162, "loss": 1.7703, "step": 30405 }, { "epoch": 1.01, "grad_norm": 0.44027456641197205, "learning_rate": 0.00044682991170204707, "loss": 1.7077, "step": 30406 }, { "epoch": 1.01, "grad_norm": 0.46570366621017456, "learning_rate": 0.000446820796940048, "loss": 1.888, "step": 30407 }, { "epoch": 1.01, "grad_norm": 0.46531063318252563, "learning_rate": 0.0004468116819998302, "loss": 1.7634, "step": 30408 }, { "epoch": 1.01, "grad_norm": 0.45217615365982056, "learning_rate": 0.0004468025668814046, "loss": 1.8198, "step": 30409 }, { "epoch": 1.01, "grad_norm": 0.44089826941490173, "learning_rate": 0.0004467934515847823, "loss": 1.7725, "step": 30410 }, { "epoch": 1.01, "grad_norm": 0.4602004289627075, "learning_rate": 0.0004467843361099743, "loss": 1.8132, "step": 30411 }, { "epoch": 1.01, "grad_norm": 0.4498644769191742, "learning_rate": 0.00044677522045699186, "loss": 1.8002, "step": 30412 }, { "epoch": 1.01, "grad_norm": 0.46126797795295715, "learning_rate": 0.00044676610462584594, "loss": 1.8842, "step": 30413 }, { "epoch": 1.01, "grad_norm": 0.4710718095302582, "learning_rate": 0.00044675698861654754, "loss": 1.7648, "step": 30414 }, { "epoch": 1.01, "grad_norm": 0.4659058451652527, "learning_rate": 0.0004467478724291078, "loss": 1.7863, "step": 30415 }, { "epoch": 1.01, "grad_norm": 0.4489092528820038, "learning_rate": 0.00044673875606353775, "loss": 1.7499, "step": 30416 }, { "epoch": 1.01, "grad_norm": 0.46026530861854553, "learning_rate": 0.00044672963951984847, "loss": 1.7748, "step": 30417 }, { "epoch": 1.01, "grad_norm": 0.4557580351829529, "learning_rate": 0.00044672052279805106, "loss": 1.7584, "step": 30418 }, { "epoch": 1.01, "grad_norm": 0.44841015338897705, "learning_rate": 0.0004467114058981565, "loss": 1.7445, "step": 30419 }, { "epoch": 1.01, "grad_norm": 0.44635826349258423, "learning_rate": 0.000446702288820176, "loss": 1.7287, "step": 30420 }, { "epoch": 1.01, "grad_norm": 0.4716402292251587, "learning_rate": 0.0004466931715641204, "loss": 1.8511, "step": 30421 }, { "epoch": 1.01, "grad_norm": 0.46340930461883545, "learning_rate": 0.00044668405413000097, "loss": 1.7526, "step": 30422 }, { "epoch": 1.01, "grad_norm": 0.43589887022972107, "learning_rate": 0.0004466749365178287, "loss": 1.6894, "step": 30423 }, { "epoch": 1.01, "grad_norm": 0.4395500719547272, "learning_rate": 0.0004466658187276147, "loss": 1.7692, "step": 30424 }, { "epoch": 1.01, "grad_norm": 0.4503617584705353, "learning_rate": 0.00044665670075937005, "loss": 1.7598, "step": 30425 }, { "epoch": 1.01, "grad_norm": 0.44434189796447754, "learning_rate": 0.0004466475826131056, "loss": 1.8233, "step": 30426 }, { "epoch": 1.01, "grad_norm": 0.47136300802230835, "learning_rate": 0.00044663846428883276, "loss": 1.7376, "step": 30427 }, { "epoch": 1.01, "grad_norm": 0.4573631286621094, "learning_rate": 0.0004466293457865624, "loss": 1.803, "step": 30428 }, { "epoch": 1.01, "grad_norm": 0.46128425002098083, "learning_rate": 0.00044662022710630555, "loss": 1.8011, "step": 30429 }, { "epoch": 1.01, "grad_norm": 0.48540645837783813, "learning_rate": 0.00044661110824807345, "loss": 1.7683, "step": 30430 }, { "epoch": 1.01, "grad_norm": 0.4614783227443695, "learning_rate": 0.00044660198921187694, "loss": 1.8436, "step": 30431 }, { "epoch": 1.01, "grad_norm": 0.4616672992706299, "learning_rate": 0.00044659286999772734, "loss": 1.8205, "step": 30432 }, { "epoch": 1.01, "grad_norm": 0.46829214692115784, "learning_rate": 0.00044658375060563555, "loss": 1.8233, "step": 30433 }, { "epoch": 1.01, "grad_norm": 0.46145254373550415, "learning_rate": 0.00044657463103561264, "loss": 1.7662, "step": 30434 }, { "epoch": 1.01, "grad_norm": 0.44484254717826843, "learning_rate": 0.00044656551128766976, "loss": 1.7506, "step": 30435 }, { "epoch": 1.01, "grad_norm": 0.4499891698360443, "learning_rate": 0.0004465563913618179, "loss": 1.8161, "step": 30436 }, { "epoch": 1.01, "grad_norm": 0.4499560296535492, "learning_rate": 0.00044654727125806825, "loss": 1.8265, "step": 30437 }, { "epoch": 1.01, "grad_norm": 0.4531184732913971, "learning_rate": 0.00044653815097643184, "loss": 1.7648, "step": 30438 }, { "epoch": 1.01, "grad_norm": 0.4330732226371765, "learning_rate": 0.00044652903051691965, "loss": 1.7313, "step": 30439 }, { "epoch": 1.01, "grad_norm": 0.4511062502861023, "learning_rate": 0.0004465199098795428, "loss": 1.7877, "step": 30440 }, { "epoch": 1.01, "grad_norm": 0.4622325599193573, "learning_rate": 0.0004465107890643124, "loss": 1.7301, "step": 30441 }, { "epoch": 1.01, "grad_norm": 0.4570222496986389, "learning_rate": 0.0004465016680712394, "loss": 1.8116, "step": 30442 }, { "epoch": 1.01, "grad_norm": 0.45183059573173523, "learning_rate": 0.00044649254690033507, "loss": 1.8458, "step": 30443 }, { "epoch": 1.01, "grad_norm": 0.46160978078842163, "learning_rate": 0.00044648342555161024, "loss": 1.8316, "step": 30444 }, { "epoch": 1.01, "grad_norm": 0.46339911222457886, "learning_rate": 0.0004464743040250762, "loss": 1.7568, "step": 30445 }, { "epoch": 1.01, "grad_norm": 0.4405686855316162, "learning_rate": 0.00044646518232074395, "loss": 1.7748, "step": 30446 }, { "epoch": 1.01, "grad_norm": 0.4577510952949524, "learning_rate": 0.00044645606043862456, "loss": 1.794, "step": 30447 }, { "epoch": 1.01, "grad_norm": 0.4921330511569977, "learning_rate": 0.000446446938378729, "loss": 1.8471, "step": 30448 }, { "epoch": 1.01, "grad_norm": 0.4787713289260864, "learning_rate": 0.0004464378161410685, "loss": 1.8158, "step": 30449 }, { "epoch": 1.01, "grad_norm": 0.44247129559516907, "learning_rate": 0.00044642869372565414, "loss": 1.8277, "step": 30450 }, { "epoch": 1.01, "grad_norm": 0.45048806071281433, "learning_rate": 0.0004464195711324968, "loss": 1.8417, "step": 30451 }, { "epoch": 1.01, "grad_norm": 0.4489951431751251, "learning_rate": 0.00044641044836160777, "loss": 1.7647, "step": 30452 }, { "epoch": 1.01, "grad_norm": 0.46501466631889343, "learning_rate": 0.000446401325412998, "loss": 1.7703, "step": 30453 }, { "epoch": 1.01, "grad_norm": 0.4545343816280365, "learning_rate": 0.0004463922022866785, "loss": 1.8576, "step": 30454 }, { "epoch": 1.01, "grad_norm": 0.4399349093437195, "learning_rate": 0.00044638307898266054, "loss": 1.7486, "step": 30455 }, { "epoch": 1.01, "grad_norm": 0.4382351040840149, "learning_rate": 0.00044637395550095507, "loss": 1.7783, "step": 30456 }, { "epoch": 1.01, "grad_norm": 0.46454283595085144, "learning_rate": 0.0004463648318415732, "loss": 1.744, "step": 30457 }, { "epoch": 1.01, "grad_norm": 0.4582142233848572, "learning_rate": 0.000446355708004526, "loss": 1.8143, "step": 30458 }, { "epoch": 1.01, "grad_norm": 0.45797815918922424, "learning_rate": 0.00044634658398982446, "loss": 1.8119, "step": 30459 }, { "epoch": 1.01, "grad_norm": 0.472472220659256, "learning_rate": 0.00044633745979747976, "loss": 1.846, "step": 30460 }, { "epoch": 1.01, "grad_norm": 0.464348703622818, "learning_rate": 0.00044632833542750295, "loss": 1.795, "step": 30461 }, { "epoch": 1.01, "grad_norm": 0.42664432525634766, "learning_rate": 0.0004463192108799051, "loss": 1.7457, "step": 30462 }, { "epoch": 1.01, "grad_norm": 0.45298823714256287, "learning_rate": 0.00044631008615469736, "loss": 1.8455, "step": 30463 }, { "epoch": 1.01, "grad_norm": 0.46730703115463257, "learning_rate": 0.00044630096125189065, "loss": 1.8339, "step": 30464 }, { "epoch": 1.01, "grad_norm": 0.44354575872421265, "learning_rate": 0.0004462918361714961, "loss": 1.8309, "step": 30465 }, { "epoch": 1.01, "grad_norm": 0.45195502042770386, "learning_rate": 0.0004462827109135249, "loss": 1.8119, "step": 30466 }, { "epoch": 1.01, "grad_norm": 0.4558970332145691, "learning_rate": 0.00044627358547798794, "loss": 1.7618, "step": 30467 }, { "epoch": 1.01, "grad_norm": 0.4770108163356781, "learning_rate": 0.0004462644598648966, "loss": 1.7585, "step": 30468 }, { "epoch": 1.01, "grad_norm": 0.45880743861198425, "learning_rate": 0.00044625533407426153, "loss": 1.8312, "step": 30469 }, { "epoch": 1.01, "grad_norm": 0.4581843316555023, "learning_rate": 0.0004462462081060941, "loss": 1.8378, "step": 30470 }, { "epoch": 1.01, "grad_norm": 0.44410187005996704, "learning_rate": 0.00044623708196040536, "loss": 1.7898, "step": 30471 }, { "epoch": 1.01, "grad_norm": 0.4646955132484436, "learning_rate": 0.0004462279556372063, "loss": 1.8026, "step": 30472 }, { "epoch": 1.01, "grad_norm": 0.44043204188346863, "learning_rate": 0.0004462188291365082, "loss": 1.834, "step": 30473 }, { "epoch": 1.01, "grad_norm": 0.4477699100971222, "learning_rate": 0.0004462097024583218, "loss": 1.8313, "step": 30474 }, { "epoch": 1.01, "grad_norm": 0.4648039638996124, "learning_rate": 0.0004462005756026585, "loss": 1.8441, "step": 30475 }, { "epoch": 1.01, "grad_norm": 0.44887372851371765, "learning_rate": 0.0004461914485695291, "loss": 1.7652, "step": 30476 }, { "epoch": 1.01, "grad_norm": 0.4540163576602936, "learning_rate": 0.0004461823213589449, "loss": 1.8142, "step": 30477 }, { "epoch": 1.01, "grad_norm": 0.4661101698875427, "learning_rate": 0.0004461731939709169, "loss": 1.7597, "step": 30478 }, { "epoch": 1.01, "grad_norm": 0.4394354820251465, "learning_rate": 0.0004461640664054562, "loss": 1.8393, "step": 30479 }, { "epoch": 1.01, "grad_norm": 0.44481638073921204, "learning_rate": 0.00044615493866257387, "loss": 1.8632, "step": 30480 }, { "epoch": 1.01, "grad_norm": 0.45777979493141174, "learning_rate": 0.00044614581074228093, "loss": 1.7334, "step": 30481 }, { "epoch": 1.01, "grad_norm": 0.452100545167923, "learning_rate": 0.00044613668264458856, "loss": 1.7875, "step": 30482 }, { "epoch": 1.01, "grad_norm": 0.44002872705459595, "learning_rate": 0.00044612755436950776, "loss": 1.8283, "step": 30483 }, { "epoch": 1.01, "grad_norm": 0.45298251509666443, "learning_rate": 0.0004461184259170496, "loss": 1.7738, "step": 30484 }, { "epoch": 1.01, "grad_norm": 0.4583914577960968, "learning_rate": 0.0004461092972872252, "loss": 1.8535, "step": 30485 }, { "epoch": 1.01, "grad_norm": 0.4546871483325958, "learning_rate": 0.0004461001684800457, "loss": 1.7764, "step": 30486 }, { "epoch": 1.01, "grad_norm": 0.46706488728523254, "learning_rate": 0.00044609103949552216, "loss": 1.8455, "step": 30487 }, { "epoch": 1.01, "grad_norm": 0.46665698289871216, "learning_rate": 0.0004460819103336656, "loss": 1.7571, "step": 30488 }, { "epoch": 1.01, "grad_norm": 0.4777587354183197, "learning_rate": 0.000446072780994487, "loss": 1.6774, "step": 30489 }, { "epoch": 1.01, "grad_norm": 0.4980660080909729, "learning_rate": 0.0004460636514779977, "loss": 1.7384, "step": 30490 }, { "epoch": 1.01, "grad_norm": 0.47495758533477783, "learning_rate": 0.0004460545217842086, "loss": 1.7267, "step": 30491 }, { "epoch": 1.01, "grad_norm": 0.4485267698764801, "learning_rate": 0.0004460453919131308, "loss": 1.7467, "step": 30492 }, { "epoch": 1.01, "grad_norm": 0.4564976692199707, "learning_rate": 0.0004460362618647755, "loss": 1.8409, "step": 30493 }, { "epoch": 1.01, "grad_norm": 0.4730842113494873, "learning_rate": 0.0004460271316391536, "loss": 1.7605, "step": 30494 }, { "epoch": 1.01, "grad_norm": 0.48799338936805725, "learning_rate": 0.0004460180012362764, "loss": 1.8993, "step": 30495 }, { "epoch": 1.01, "grad_norm": 0.4631420075893402, "learning_rate": 0.0004460088706561548, "loss": 1.8582, "step": 30496 }, { "epoch": 1.01, "grad_norm": 0.4638274013996124, "learning_rate": 0.0004459997398987998, "loss": 1.6963, "step": 30497 }, { "epoch": 1.01, "grad_norm": 0.44577115774154663, "learning_rate": 0.00044599060896422286, "loss": 1.77, "step": 30498 }, { "epoch": 1.01, "grad_norm": 0.459748238325119, "learning_rate": 0.0004459814778524347, "loss": 1.7717, "step": 30499 }, { "epoch": 1.01, "grad_norm": 0.45865005254745483, "learning_rate": 0.00044597234656344655, "loss": 1.7156, "step": 30500 }, { "epoch": 1.01, "grad_norm": 0.45080822706222534, "learning_rate": 0.0004459632150972695, "loss": 1.8193, "step": 30501 }, { "epoch": 1.01, "grad_norm": 0.47171375155448914, "learning_rate": 0.0004459540834539146, "loss": 1.8197, "step": 30502 }, { "epoch": 1.01, "grad_norm": 0.4587385058403015, "learning_rate": 0.00044594495163339295, "loss": 1.7966, "step": 30503 }, { "epoch": 1.01, "grad_norm": 0.4644349217414856, "learning_rate": 0.0004459358196357157, "loss": 1.7924, "step": 30504 }, { "epoch": 1.01, "grad_norm": 0.4536982774734497, "learning_rate": 0.0004459266874608938, "loss": 1.7043, "step": 30505 }, { "epoch": 1.01, "grad_norm": 0.4403516352176666, "learning_rate": 0.00044591755510893836, "loss": 1.7485, "step": 30506 }, { "epoch": 1.01, "grad_norm": 0.4497758448123932, "learning_rate": 0.00044590842257986055, "loss": 1.7322, "step": 30507 }, { "epoch": 1.02, "grad_norm": 0.4725562632083893, "learning_rate": 0.0004458992898736715, "loss": 1.746, "step": 30508 }, { "epoch": 1.02, "grad_norm": 0.43880361318588257, "learning_rate": 0.00044589015699038207, "loss": 1.7362, "step": 30509 }, { "epoch": 1.02, "grad_norm": 0.46113699674606323, "learning_rate": 0.0004458810239300035, "loss": 1.7215, "step": 30510 }, { "epoch": 1.02, "grad_norm": 0.4675971567630768, "learning_rate": 0.0004458718906925469, "loss": 1.8019, "step": 30511 }, { "epoch": 1.02, "grad_norm": 0.4730561077594757, "learning_rate": 0.0004458627572780234, "loss": 1.7703, "step": 30512 }, { "epoch": 1.02, "grad_norm": 0.4785570800304413, "learning_rate": 0.0004458536236864439, "loss": 1.8014, "step": 30513 }, { "epoch": 1.02, "grad_norm": 0.4706522822380066, "learning_rate": 0.0004458444899178196, "loss": 1.7274, "step": 30514 }, { "epoch": 1.02, "grad_norm": 1.1263107061386108, "learning_rate": 0.0004458353559721616, "loss": 1.7707, "step": 30515 }, { "epoch": 1.02, "grad_norm": 0.462341845035553, "learning_rate": 0.000445826221849481, "loss": 1.833, "step": 30516 }, { "epoch": 1.02, "grad_norm": 0.46848002076148987, "learning_rate": 0.00044581708754978875, "loss": 1.8038, "step": 30517 }, { "epoch": 1.02, "grad_norm": 0.47772833704948425, "learning_rate": 0.0004458079530730962, "loss": 1.8102, "step": 30518 }, { "epoch": 1.02, "grad_norm": 0.4469344913959503, "learning_rate": 0.0004457988184194141, "loss": 1.8238, "step": 30519 }, { "epoch": 1.02, "grad_norm": 0.47542038559913635, "learning_rate": 0.00044578968358875384, "loss": 1.7902, "step": 30520 }, { "epoch": 1.02, "grad_norm": 0.4677006006240845, "learning_rate": 0.00044578054858112633, "loss": 1.6774, "step": 30521 }, { "epoch": 1.02, "grad_norm": 0.4804662764072418, "learning_rate": 0.00044577141339654277, "loss": 1.7527, "step": 30522 }, { "epoch": 1.02, "grad_norm": 0.46330782771110535, "learning_rate": 0.0004457622780350141, "loss": 1.7525, "step": 30523 }, { "epoch": 1.02, "grad_norm": 0.46957793831825256, "learning_rate": 0.0004457531424965515, "loss": 1.8027, "step": 30524 }, { "epoch": 1.02, "grad_norm": 0.47242242097854614, "learning_rate": 0.0004457440067811662, "loss": 1.7417, "step": 30525 }, { "epoch": 1.02, "grad_norm": 0.453141450881958, "learning_rate": 0.0004457348708888691, "loss": 1.8029, "step": 30526 }, { "epoch": 1.02, "grad_norm": 0.4488191604614258, "learning_rate": 0.0004457257348196713, "loss": 1.8141, "step": 30527 }, { "epoch": 1.02, "grad_norm": 0.44340887665748596, "learning_rate": 0.0004457165985735839, "loss": 1.8052, "step": 30528 }, { "epoch": 1.02, "grad_norm": 0.4589534401893616, "learning_rate": 0.00044570746215061806, "loss": 1.7935, "step": 30529 }, { "epoch": 1.02, "grad_norm": 0.4803590774536133, "learning_rate": 0.0004456983255507849, "loss": 1.7916, "step": 30530 }, { "epoch": 1.02, "grad_norm": 0.42719757556915283, "learning_rate": 0.00044568918877409524, "loss": 1.7659, "step": 30531 }, { "epoch": 1.02, "grad_norm": 0.46016544103622437, "learning_rate": 0.00044568005182056054, "loss": 1.7264, "step": 30532 }, { "epoch": 1.02, "grad_norm": 0.4740743339061737, "learning_rate": 0.0004456709146901917, "loss": 1.8027, "step": 30533 }, { "epoch": 1.02, "grad_norm": 0.45791494846343994, "learning_rate": 0.0004456617773829998, "loss": 1.8624, "step": 30534 }, { "epoch": 1.02, "grad_norm": 0.44412681460380554, "learning_rate": 0.00044565263989899604, "loss": 1.8109, "step": 30535 }, { "epoch": 1.02, "grad_norm": 0.4297819435596466, "learning_rate": 0.0004456435022381913, "loss": 1.8121, "step": 30536 }, { "epoch": 1.02, "grad_norm": 0.4453113377094269, "learning_rate": 0.0004456343644005969, "loss": 1.7576, "step": 30537 }, { "epoch": 1.02, "grad_norm": 0.4451005756855011, "learning_rate": 0.0004456252263862238, "loss": 1.7952, "step": 30538 }, { "epoch": 1.02, "grad_norm": 0.4699389636516571, "learning_rate": 0.0004456160881950832, "loss": 1.7507, "step": 30539 }, { "epoch": 1.02, "grad_norm": 0.4538804590702057, "learning_rate": 0.00044560694982718606, "loss": 1.802, "step": 30540 }, { "epoch": 1.02, "grad_norm": 0.45198026299476624, "learning_rate": 0.0004455978112825436, "loss": 1.829, "step": 30541 }, { "epoch": 1.02, "grad_norm": 0.4500804543495178, "learning_rate": 0.00044558867256116677, "loss": 1.7818, "step": 30542 }, { "epoch": 1.02, "grad_norm": 0.4663124978542328, "learning_rate": 0.00044557953366306674, "loss": 1.7509, "step": 30543 }, { "epoch": 1.02, "grad_norm": 0.45131927728652954, "learning_rate": 0.0004455703945882547, "loss": 1.7868, "step": 30544 }, { "epoch": 1.02, "grad_norm": 0.4665515422821045, "learning_rate": 0.00044556125533674163, "loss": 1.9114, "step": 30545 }, { "epoch": 1.02, "grad_norm": 0.4403965473175049, "learning_rate": 0.00044555211590853856, "loss": 1.7487, "step": 30546 }, { "epoch": 1.02, "grad_norm": 0.48114416003227234, "learning_rate": 0.00044554297630365673, "loss": 1.7907, "step": 30547 }, { "epoch": 1.02, "grad_norm": 0.4722697138786316, "learning_rate": 0.00044553383652210714, "loss": 1.8937, "step": 30548 }, { "epoch": 1.02, "grad_norm": 0.46150532364845276, "learning_rate": 0.000445524696563901, "loss": 1.8479, "step": 30549 }, { "epoch": 1.02, "grad_norm": 0.47930338978767395, "learning_rate": 0.0004455155564290492, "loss": 1.7098, "step": 30550 }, { "epoch": 1.02, "grad_norm": 0.46148210763931274, "learning_rate": 0.000445506416117563, "loss": 1.7941, "step": 30551 }, { "epoch": 1.02, "grad_norm": 0.4550689160823822, "learning_rate": 0.00044549727562945345, "loss": 1.8265, "step": 30552 }, { "epoch": 1.02, "grad_norm": 0.4604829251766205, "learning_rate": 0.0004454881349647316, "loss": 1.8231, "step": 30553 }, { "epoch": 1.02, "grad_norm": 0.46166732907295227, "learning_rate": 0.0004454789941234087, "loss": 1.7659, "step": 30554 }, { "epoch": 1.02, "grad_norm": 0.4650282561779022, "learning_rate": 0.0004454698531054957, "loss": 1.8537, "step": 30555 }, { "epoch": 1.02, "grad_norm": 0.45428869128227234, "learning_rate": 0.0004454607119110037, "loss": 1.7503, "step": 30556 }, { "epoch": 1.02, "grad_norm": 0.47216129302978516, "learning_rate": 0.0004454515705399438, "loss": 1.8449, "step": 30557 }, { "epoch": 1.02, "grad_norm": 0.4602300226688385, "learning_rate": 0.0004454424289923272, "loss": 1.7954, "step": 30558 }, { "epoch": 1.02, "grad_norm": 0.44096702337265015, "learning_rate": 0.0004454332872681649, "loss": 1.7935, "step": 30559 }, { "epoch": 1.02, "grad_norm": 0.44180455803871155, "learning_rate": 0.00044542414536746805, "loss": 1.8252, "step": 30560 }, { "epoch": 1.02, "grad_norm": 0.4427148997783661, "learning_rate": 0.0004454150032902476, "loss": 1.8372, "step": 30561 }, { "epoch": 1.02, "grad_norm": 0.4591276943683624, "learning_rate": 0.00044540586103651483, "loss": 1.7697, "step": 30562 }, { "epoch": 1.02, "grad_norm": 0.6367166638374329, "learning_rate": 0.0004453967186062808, "loss": 1.7704, "step": 30563 }, { "epoch": 1.02, "grad_norm": 0.4704342484474182, "learning_rate": 0.0004453875759995566, "loss": 1.7977, "step": 30564 }, { "epoch": 1.02, "grad_norm": 0.43908074498176575, "learning_rate": 0.00044537843321635316, "loss": 1.7818, "step": 30565 }, { "epoch": 1.02, "grad_norm": 0.44309747219085693, "learning_rate": 0.00044536929025668185, "loss": 1.7586, "step": 30566 }, { "epoch": 1.02, "grad_norm": 0.4378746747970581, "learning_rate": 0.0004453601471205536, "loss": 1.7277, "step": 30567 }, { "epoch": 1.02, "grad_norm": 0.44298025965690613, "learning_rate": 0.00044535100380797957, "loss": 1.828, "step": 30568 }, { "epoch": 1.02, "grad_norm": 0.4544343054294586, "learning_rate": 0.0004453418603189708, "loss": 1.8206, "step": 30569 }, { "epoch": 1.02, "grad_norm": 0.4626276195049286, "learning_rate": 0.0004453327166535385, "loss": 1.7567, "step": 30570 }, { "epoch": 1.02, "grad_norm": 0.44576725363731384, "learning_rate": 0.00044532357281169355, "loss": 1.8469, "step": 30571 }, { "epoch": 1.02, "grad_norm": 0.4461365044116974, "learning_rate": 0.0004453144287934473, "loss": 1.7681, "step": 30572 }, { "epoch": 1.02, "grad_norm": 0.4683092534542084, "learning_rate": 0.0004453052845988108, "loss": 1.7971, "step": 30573 }, { "epoch": 1.02, "grad_norm": 0.4506385922431946, "learning_rate": 0.000445296140227795, "loss": 1.8086, "step": 30574 }, { "epoch": 1.02, "grad_norm": 0.45201629400253296, "learning_rate": 0.0004452869956804112, "loss": 1.7411, "step": 30575 }, { "epoch": 1.02, "grad_norm": 0.4674147367477417, "learning_rate": 0.00044527785095667025, "loss": 1.8335, "step": 30576 }, { "epoch": 1.02, "grad_norm": 0.4619750678539276, "learning_rate": 0.00044526870605658345, "loss": 1.7589, "step": 30577 }, { "epoch": 1.02, "grad_norm": 0.4628216028213501, "learning_rate": 0.0004452595609801619, "loss": 1.7785, "step": 30578 }, { "epoch": 1.02, "grad_norm": 0.45388758182525635, "learning_rate": 0.00044525041572741653, "loss": 1.7649, "step": 30579 }, { "epoch": 1.02, "grad_norm": 0.46714165806770325, "learning_rate": 0.0004452412702983587, "loss": 1.8516, "step": 30580 }, { "epoch": 1.02, "grad_norm": 0.4661407768726349, "learning_rate": 0.0004452321246929993, "loss": 1.8148, "step": 30581 }, { "epoch": 1.02, "grad_norm": 0.4363923668861389, "learning_rate": 0.0004452229789113495, "loss": 1.7541, "step": 30582 }, { "epoch": 1.02, "grad_norm": 0.4640294015407562, "learning_rate": 0.00044521383295342034, "loss": 1.7157, "step": 30583 }, { "epoch": 1.02, "grad_norm": 0.46280795335769653, "learning_rate": 0.00044520468681922306, "loss": 1.7136, "step": 30584 }, { "epoch": 1.02, "grad_norm": 0.4568954110145569, "learning_rate": 0.0004451955405087687, "loss": 1.8412, "step": 30585 }, { "epoch": 1.02, "grad_norm": 0.45023953914642334, "learning_rate": 0.00044518639402206826, "loss": 1.804, "step": 30586 }, { "epoch": 1.02, "grad_norm": 0.4581342339515686, "learning_rate": 0.00044517724735913296, "loss": 1.7434, "step": 30587 }, { "epoch": 1.02, "grad_norm": 0.444262832403183, "learning_rate": 0.000445168100519974, "loss": 1.8649, "step": 30588 }, { "epoch": 1.02, "grad_norm": 0.4616236388683319, "learning_rate": 0.0004451589535046022, "loss": 1.7703, "step": 30589 }, { "epoch": 1.02, "grad_norm": 0.43075698614120483, "learning_rate": 0.0004451498063130289, "loss": 1.6891, "step": 30590 }, { "epoch": 1.02, "grad_norm": 0.4385617971420288, "learning_rate": 0.00044514065894526506, "loss": 1.8254, "step": 30591 }, { "epoch": 1.02, "grad_norm": 0.4466618001461029, "learning_rate": 0.00044513151140132193, "loss": 1.7134, "step": 30592 }, { "epoch": 1.02, "grad_norm": 0.5808436274528503, "learning_rate": 0.0004451223636812104, "loss": 1.8402, "step": 30593 }, { "epoch": 1.02, "grad_norm": 0.47035571932792664, "learning_rate": 0.00044511321578494185, "loss": 1.7356, "step": 30594 }, { "epoch": 1.02, "grad_norm": 0.44364720582962036, "learning_rate": 0.0004451040677125271, "loss": 1.7697, "step": 30595 }, { "epoch": 1.02, "grad_norm": 0.4482176899909973, "learning_rate": 0.0004450949194639775, "loss": 1.8306, "step": 30596 }, { "epoch": 1.02, "grad_norm": 0.4760821759700775, "learning_rate": 0.00044508577103930403, "loss": 1.7097, "step": 30597 }, { "epoch": 1.02, "grad_norm": 0.4672359824180603, "learning_rate": 0.00044507662243851775, "loss": 1.7336, "step": 30598 }, { "epoch": 1.02, "grad_norm": 0.4494856595993042, "learning_rate": 0.0004450674736616299, "loss": 1.7764, "step": 30599 }, { "epoch": 1.02, "grad_norm": 0.4982723891735077, "learning_rate": 0.0004450583247086515, "loss": 1.7879, "step": 30600 }, { "epoch": 1.02, "grad_norm": 0.4549059271812439, "learning_rate": 0.00044504917557959355, "loss": 1.8027, "step": 30601 }, { "epoch": 1.02, "grad_norm": 0.4485984742641449, "learning_rate": 0.00044504002627446736, "loss": 1.7553, "step": 30602 }, { "epoch": 1.02, "grad_norm": 0.44352489709854126, "learning_rate": 0.00044503087679328404, "loss": 1.807, "step": 30603 }, { "epoch": 1.02, "grad_norm": 0.4562503695487976, "learning_rate": 0.0004450217271360545, "loss": 1.7523, "step": 30604 }, { "epoch": 1.02, "grad_norm": 0.4663788080215454, "learning_rate": 0.00044501257730278994, "loss": 1.812, "step": 30605 }, { "epoch": 1.02, "grad_norm": 0.4585581123828888, "learning_rate": 0.0004450034272935015, "loss": 1.7877, "step": 30606 }, { "epoch": 1.02, "grad_norm": 0.4538605809211731, "learning_rate": 0.0004449942771082003, "loss": 1.7601, "step": 30607 }, { "epoch": 1.02, "grad_norm": 0.733117401599884, "learning_rate": 0.0004449851267468974, "loss": 1.9006, "step": 30608 }, { "epoch": 1.02, "grad_norm": 0.44925248622894287, "learning_rate": 0.0004449759762096039, "loss": 1.8201, "step": 30609 }, { "epoch": 1.02, "grad_norm": 0.45577436685562134, "learning_rate": 0.00044496682549633094, "loss": 1.7686, "step": 30610 }, { "epoch": 1.02, "grad_norm": 0.4698428809642792, "learning_rate": 0.0004449576746070896, "loss": 1.9138, "step": 30611 }, { "epoch": 1.02, "grad_norm": 0.45600682497024536, "learning_rate": 0.000444948523541891, "loss": 1.7271, "step": 30612 }, { "epoch": 1.02, "grad_norm": 0.44455137848854065, "learning_rate": 0.0004449393723007462, "loss": 1.7382, "step": 30613 }, { "epoch": 1.02, "grad_norm": 0.45320162177085876, "learning_rate": 0.00044493022088366646, "loss": 1.7572, "step": 30614 }, { "epoch": 1.02, "grad_norm": 0.4624238908290863, "learning_rate": 0.0004449210692906628, "loss": 1.8788, "step": 30615 }, { "epoch": 1.02, "grad_norm": 0.4632427394390106, "learning_rate": 0.00044491191752174617, "loss": 1.7996, "step": 30616 }, { "epoch": 1.02, "grad_norm": 0.45860686898231506, "learning_rate": 0.0004449027655769279, "loss": 1.8385, "step": 30617 }, { "epoch": 1.02, "grad_norm": 0.4649689197540283, "learning_rate": 0.0004448936134562191, "loss": 1.8467, "step": 30618 }, { "epoch": 1.02, "grad_norm": 0.4681033194065094, "learning_rate": 0.0004448844611596307, "loss": 1.7736, "step": 30619 }, { "epoch": 1.02, "grad_norm": 0.4562320411205292, "learning_rate": 0.0004448753086871739, "loss": 1.7524, "step": 30620 }, { "epoch": 1.02, "grad_norm": 0.45405063033103943, "learning_rate": 0.00044486615603885987, "loss": 1.8075, "step": 30621 }, { "epoch": 1.02, "grad_norm": 0.45035672187805176, "learning_rate": 0.0004448570032146998, "loss": 1.8083, "step": 30622 }, { "epoch": 1.02, "grad_norm": 0.44401413202285767, "learning_rate": 0.0004448478502147044, "loss": 1.8102, "step": 30623 }, { "epoch": 1.02, "grad_norm": 0.46247661113739014, "learning_rate": 0.00044483869703888524, "loss": 1.8015, "step": 30624 }, { "epoch": 1.02, "grad_norm": 0.44464462995529175, "learning_rate": 0.00044482954368725314, "loss": 1.7616, "step": 30625 }, { "epoch": 1.02, "grad_norm": 0.4504345655441284, "learning_rate": 0.0004448203901598194, "loss": 1.8446, "step": 30626 }, { "epoch": 1.02, "grad_norm": 0.45060232281684875, "learning_rate": 0.00044481123645659506, "loss": 1.7956, "step": 30627 }, { "epoch": 1.02, "grad_norm": 0.4626689851284027, "learning_rate": 0.0004448020825775911, "loss": 1.8464, "step": 30628 }, { "epoch": 1.02, "grad_norm": 0.44807007908821106, "learning_rate": 0.00044479292852281886, "loss": 1.7715, "step": 30629 }, { "epoch": 1.02, "grad_norm": 0.4471404254436493, "learning_rate": 0.00044478377429228933, "loss": 1.7449, "step": 30630 }, { "epoch": 1.02, "grad_norm": 0.46417298913002014, "learning_rate": 0.00044477461988601355, "loss": 1.7968, "step": 30631 }, { "epoch": 1.02, "grad_norm": 0.4517515301704407, "learning_rate": 0.00044476546530400275, "loss": 1.774, "step": 30632 }, { "epoch": 1.02, "grad_norm": 0.4588356912136078, "learning_rate": 0.000444756310546268, "loss": 1.7457, "step": 30633 }, { "epoch": 1.02, "grad_norm": 0.48630034923553467, "learning_rate": 0.00044474715561282043, "loss": 1.903, "step": 30634 }, { "epoch": 1.02, "grad_norm": 0.5016204714775085, "learning_rate": 0.0004447380005036712, "loss": 1.7927, "step": 30635 }, { "epoch": 1.02, "grad_norm": 0.455746054649353, "learning_rate": 0.00044472884521883126, "loss": 1.8024, "step": 30636 }, { "epoch": 1.02, "grad_norm": 0.4681999087333679, "learning_rate": 0.0004447196897583119, "loss": 1.788, "step": 30637 }, { "epoch": 1.02, "grad_norm": 0.48608970642089844, "learning_rate": 0.0004447105341221241, "loss": 1.7514, "step": 30638 }, { "epoch": 1.02, "grad_norm": 0.5016795992851257, "learning_rate": 0.00044470137831027903, "loss": 1.7305, "step": 30639 }, { "epoch": 1.02, "grad_norm": 0.46140339970588684, "learning_rate": 0.00044469222232278795, "loss": 1.8346, "step": 30640 }, { "epoch": 1.02, "grad_norm": 0.4584917426109314, "learning_rate": 0.00044468306615966165, "loss": 1.8038, "step": 30641 }, { "epoch": 1.02, "grad_norm": 0.4812735319137573, "learning_rate": 0.0004446739098209115, "loss": 1.8015, "step": 30642 }, { "epoch": 1.02, "grad_norm": 0.47534582018852234, "learning_rate": 0.0004446647533065486, "loss": 1.8178, "step": 30643 }, { "epoch": 1.02, "grad_norm": 0.4630556106567383, "learning_rate": 0.0004446555966165839, "loss": 1.763, "step": 30644 }, { "epoch": 1.02, "grad_norm": 0.45851337909698486, "learning_rate": 0.00044464643975102865, "loss": 1.7869, "step": 30645 }, { "epoch": 1.02, "grad_norm": 0.4663105010986328, "learning_rate": 0.00044463728270989397, "loss": 1.7883, "step": 30646 }, { "epoch": 1.02, "grad_norm": 0.4580736458301544, "learning_rate": 0.00044462812549319095, "loss": 1.7621, "step": 30647 }, { "epoch": 1.02, "grad_norm": 0.4494192898273468, "learning_rate": 0.0004446189681009306, "loss": 1.7724, "step": 30648 }, { "epoch": 1.02, "grad_norm": 0.467640221118927, "learning_rate": 0.00044460981053312423, "loss": 1.8151, "step": 30649 }, { "epoch": 1.02, "grad_norm": 0.4602328836917877, "learning_rate": 0.00044460065278978286, "loss": 1.7695, "step": 30650 }, { "epoch": 1.02, "grad_norm": 0.45757898688316345, "learning_rate": 0.00044459149487091756, "loss": 1.8767, "step": 30651 }, { "epoch": 1.02, "grad_norm": 0.45878198742866516, "learning_rate": 0.0004445823367765395, "loss": 1.8495, "step": 30652 }, { "epoch": 1.02, "grad_norm": 0.44926735758781433, "learning_rate": 0.0004445731785066598, "loss": 1.7719, "step": 30653 }, { "epoch": 1.02, "grad_norm": 0.4527156352996826, "learning_rate": 0.00044456402006128954, "loss": 1.7123, "step": 30654 }, { "epoch": 1.02, "grad_norm": 0.48395925760269165, "learning_rate": 0.00044455486144043976, "loss": 1.797, "step": 30655 }, { "epoch": 1.02, "grad_norm": 0.45558035373687744, "learning_rate": 0.0004445457026441219, "loss": 1.8221, "step": 30656 }, { "epoch": 1.02, "grad_norm": 0.4490312933921814, "learning_rate": 0.00044453654367234677, "loss": 1.8321, "step": 30657 }, { "epoch": 1.02, "grad_norm": 0.4550187587738037, "learning_rate": 0.0004445273845251255, "loss": 1.8306, "step": 30658 }, { "epoch": 1.02, "grad_norm": 0.4863770604133606, "learning_rate": 0.00044451822520246933, "loss": 1.7799, "step": 30659 }, { "epoch": 1.02, "grad_norm": 0.44655483961105347, "learning_rate": 0.0004445090657043893, "loss": 1.8785, "step": 30660 }, { "epoch": 1.02, "grad_norm": 0.44981664419174194, "learning_rate": 0.0004444999060308966, "loss": 1.8399, "step": 30661 }, { "epoch": 1.02, "grad_norm": 0.46380719542503357, "learning_rate": 0.0004444907461820023, "loss": 1.7516, "step": 30662 }, { "epoch": 1.02, "grad_norm": 0.4646001160144806, "learning_rate": 0.00044448158615771746, "loss": 1.8572, "step": 30663 }, { "epoch": 1.02, "grad_norm": 0.4453701078891754, "learning_rate": 0.00044447242595805334, "loss": 1.746, "step": 30664 }, { "epoch": 1.02, "grad_norm": 0.44059985876083374, "learning_rate": 0.0004444632655830209, "loss": 1.7885, "step": 30665 }, { "epoch": 1.02, "grad_norm": 0.4683530628681183, "learning_rate": 0.00044445410503263146, "loss": 1.7224, "step": 30666 }, { "epoch": 1.02, "grad_norm": 0.4349564015865326, "learning_rate": 0.0004444449443068959, "loss": 1.824, "step": 30667 }, { "epoch": 1.02, "grad_norm": 0.4636427164077759, "learning_rate": 0.00044443578340582553, "loss": 1.8008, "step": 30668 }, { "epoch": 1.02, "grad_norm": 0.4533878564834595, "learning_rate": 0.0004444266223294314, "loss": 1.7167, "step": 30669 }, { "epoch": 1.02, "grad_norm": 0.4593096375465393, "learning_rate": 0.00044441746107772457, "loss": 1.8107, "step": 30670 }, { "epoch": 1.02, "grad_norm": 0.45599356293678284, "learning_rate": 0.00044440829965071627, "loss": 1.772, "step": 30671 }, { "epoch": 1.02, "grad_norm": 0.44591882824897766, "learning_rate": 0.0004443991380484177, "loss": 1.8288, "step": 30672 }, { "epoch": 1.02, "grad_norm": 0.4615282416343689, "learning_rate": 0.0004443899762708396, "loss": 1.7937, "step": 30673 }, { "epoch": 1.02, "grad_norm": 0.4572657346725464, "learning_rate": 0.0004443808143179935, "loss": 1.7583, "step": 30674 }, { "epoch": 1.02, "grad_norm": 0.5248810052871704, "learning_rate": 0.00044437165218989035, "loss": 1.7979, "step": 30675 }, { "epoch": 1.02, "grad_norm": 0.4423086941242218, "learning_rate": 0.0004443624898865413, "loss": 1.8298, "step": 30676 }, { "epoch": 1.02, "grad_norm": 0.45425379276275635, "learning_rate": 0.00044435332740795744, "loss": 1.7391, "step": 30677 }, { "epoch": 1.02, "grad_norm": 0.4370158910751343, "learning_rate": 0.0004443441647541498, "loss": 1.7397, "step": 30678 }, { "epoch": 1.02, "grad_norm": 0.4632245898246765, "learning_rate": 0.0004443350019251297, "loss": 1.7105, "step": 30679 }, { "epoch": 1.02, "grad_norm": 0.4541970193386078, "learning_rate": 0.00044432583892090824, "loss": 1.8018, "step": 30680 }, { "epoch": 1.02, "grad_norm": 0.46109646558761597, "learning_rate": 0.0004443166757414964, "loss": 1.7599, "step": 30681 }, { "epoch": 1.02, "grad_norm": 0.4398636817932129, "learning_rate": 0.00044430751238690543, "loss": 1.7675, "step": 30682 }, { "epoch": 1.02, "grad_norm": 0.4745854139328003, "learning_rate": 0.0004442983488571463, "loss": 1.7834, "step": 30683 }, { "epoch": 1.02, "grad_norm": 0.45312315225601196, "learning_rate": 0.00044428918515223035, "loss": 1.7861, "step": 30684 }, { "epoch": 1.02, "grad_norm": 0.45111721754074097, "learning_rate": 0.0004442800212721685, "loss": 1.8065, "step": 30685 }, { "epoch": 1.02, "grad_norm": 0.4501008093357086, "learning_rate": 0.0004442708572169721, "loss": 1.7283, "step": 30686 }, { "epoch": 1.02, "grad_norm": 0.4632798135280609, "learning_rate": 0.000444261692986652, "loss": 1.8107, "step": 30687 }, { "epoch": 1.02, "grad_norm": 0.45381152629852295, "learning_rate": 0.00044425252858121945, "loss": 1.7839, "step": 30688 }, { "epoch": 1.02, "grad_norm": 0.462199866771698, "learning_rate": 0.0004442433640006857, "loss": 1.868, "step": 30689 }, { "epoch": 1.02, "grad_norm": 0.464643657207489, "learning_rate": 0.0004442341992450617, "loss": 1.7529, "step": 30690 }, { "epoch": 1.02, "grad_norm": 0.4478842318058014, "learning_rate": 0.00044422503431435867, "loss": 1.8515, "step": 30691 }, { "epoch": 1.02, "grad_norm": 0.46587395668029785, "learning_rate": 0.0004442158692085876, "loss": 1.7884, "step": 30692 }, { "epoch": 1.02, "grad_norm": 0.45804110169410706, "learning_rate": 0.0004442067039277598, "loss": 1.8486, "step": 30693 }, { "epoch": 1.02, "grad_norm": 0.44012051820755005, "learning_rate": 0.00044419753847188624, "loss": 1.8369, "step": 30694 }, { "epoch": 1.02, "grad_norm": 0.4702272415161133, "learning_rate": 0.0004441883728409781, "loss": 1.8, "step": 30695 }, { "epoch": 1.02, "grad_norm": 0.4774778485298157, "learning_rate": 0.00044417920703504666, "loss": 1.8047, "step": 30696 }, { "epoch": 1.02, "grad_norm": 0.4852429926395416, "learning_rate": 0.00044417004105410285, "loss": 1.7521, "step": 30697 }, { "epoch": 1.02, "grad_norm": 0.4583566188812256, "learning_rate": 0.00044416087489815786, "loss": 1.7501, "step": 30698 }, { "epoch": 1.02, "grad_norm": 0.49887746572494507, "learning_rate": 0.0004441517085672227, "loss": 1.8229, "step": 30699 }, { "epoch": 1.02, "grad_norm": 0.45177847146987915, "learning_rate": 0.0004441425420613087, "loss": 1.7916, "step": 30700 }, { "epoch": 1.02, "grad_norm": 0.46211498975753784, "learning_rate": 0.0004441333753804269, "loss": 1.7563, "step": 30701 }, { "epoch": 1.02, "grad_norm": 0.45071646571159363, "learning_rate": 0.00044412420852458844, "loss": 1.8177, "step": 30702 }, { "epoch": 1.02, "grad_norm": 0.46345511078834534, "learning_rate": 0.0004441150414938043, "loss": 1.8062, "step": 30703 }, { "epoch": 1.02, "grad_norm": 0.4474838972091675, "learning_rate": 0.00044410587428808586, "loss": 1.832, "step": 30704 }, { "epoch": 1.02, "grad_norm": 0.44981861114501953, "learning_rate": 0.0004440967069074441, "loss": 1.7624, "step": 30705 }, { "epoch": 1.02, "grad_norm": 0.4479384124279022, "learning_rate": 0.0004440875393518901, "loss": 1.7597, "step": 30706 }, { "epoch": 1.02, "grad_norm": 0.47645503282546997, "learning_rate": 0.00044407837162143516, "loss": 1.7495, "step": 30707 }, { "epoch": 1.02, "grad_norm": 0.4685518443584442, "learning_rate": 0.00044406920371609023, "loss": 1.7585, "step": 30708 }, { "epoch": 1.02, "grad_norm": 0.48589813709259033, "learning_rate": 0.0004440600356358665, "loss": 1.7876, "step": 30709 }, { "epoch": 1.02, "grad_norm": 0.4858582317829132, "learning_rate": 0.0004440508673807751, "loss": 1.8706, "step": 30710 }, { "epoch": 1.02, "grad_norm": 0.46122002601623535, "learning_rate": 0.0004440416989508273, "loss": 1.8375, "step": 30711 }, { "epoch": 1.02, "grad_norm": 0.4858246445655823, "learning_rate": 0.000444032530346034, "loss": 1.8539, "step": 30712 }, { "epoch": 1.02, "grad_norm": 0.4684164226055145, "learning_rate": 0.00044402336156640647, "loss": 1.7906, "step": 30713 }, { "epoch": 1.02, "grad_norm": 0.46343061327934265, "learning_rate": 0.00044401419261195585, "loss": 1.8082, "step": 30714 }, { "epoch": 1.02, "grad_norm": 0.46730688214302063, "learning_rate": 0.00044400502348269303, "loss": 1.8471, "step": 30715 }, { "epoch": 1.02, "grad_norm": 0.47518372535705566, "learning_rate": 0.0004439958541786295, "loss": 1.7046, "step": 30716 }, { "epoch": 1.02, "grad_norm": 0.4497717618942261, "learning_rate": 0.0004439866846997762, "loss": 1.8536, "step": 30717 }, { "epoch": 1.02, "grad_norm": 0.45387551188468933, "learning_rate": 0.0004439775150461442, "loss": 1.7942, "step": 30718 }, { "epoch": 1.02, "grad_norm": 0.48185256123542786, "learning_rate": 0.0004439683452177448, "loss": 1.7908, "step": 30719 }, { "epoch": 1.02, "grad_norm": 0.46322712302207947, "learning_rate": 0.00044395917521458894, "loss": 1.8178, "step": 30720 }, { "epoch": 1.02, "grad_norm": 0.447952538728714, "learning_rate": 0.000443950005036688, "loss": 1.7721, "step": 30721 }, { "epoch": 1.02, "grad_norm": 0.4583261013031006, "learning_rate": 0.0004439408346840528, "loss": 1.7926, "step": 30722 }, { "epoch": 1.02, "grad_norm": 0.4708946645259857, "learning_rate": 0.00044393166415669476, "loss": 1.8079, "step": 30723 }, { "epoch": 1.02, "grad_norm": 0.4438748061656952, "learning_rate": 0.0004439224934546249, "loss": 1.8299, "step": 30724 }, { "epoch": 1.02, "grad_norm": 0.44041383266448975, "learning_rate": 0.00044391332257785423, "loss": 1.7643, "step": 30725 }, { "epoch": 1.02, "grad_norm": 0.4494493007659912, "learning_rate": 0.00044390415152639404, "loss": 1.7987, "step": 30726 }, { "epoch": 1.02, "grad_norm": 0.4591237008571625, "learning_rate": 0.00044389498030025543, "loss": 1.8257, "step": 30727 }, { "epoch": 1.02, "grad_norm": 0.4517155885696411, "learning_rate": 0.00044388580889944947, "loss": 1.8585, "step": 30728 }, { "epoch": 1.02, "grad_norm": 0.46792370080947876, "learning_rate": 0.00044387663732398737, "loss": 1.7185, "step": 30729 }, { "epoch": 1.02, "grad_norm": 0.4671238958835602, "learning_rate": 0.00044386746557388025, "loss": 1.7807, "step": 30730 }, { "epoch": 1.02, "grad_norm": 0.4614237844944, "learning_rate": 0.00044385829364913923, "loss": 1.8588, "step": 30731 }, { "epoch": 1.02, "grad_norm": 0.4691374897956848, "learning_rate": 0.0004438491215497754, "loss": 1.7651, "step": 30732 }, { "epoch": 1.02, "grad_norm": 0.44959867000579834, "learning_rate": 0.0004438399492757999, "loss": 1.8396, "step": 30733 }, { "epoch": 1.02, "grad_norm": 0.4664883017539978, "learning_rate": 0.000443830776827224, "loss": 1.7375, "step": 30734 }, { "epoch": 1.02, "grad_norm": 0.4502350091934204, "learning_rate": 0.00044382160420405864, "loss": 1.81, "step": 30735 }, { "epoch": 1.02, "grad_norm": 0.4581459164619446, "learning_rate": 0.00044381243140631505, "loss": 1.7813, "step": 30736 }, { "epoch": 1.02, "grad_norm": 0.8656041026115417, "learning_rate": 0.0004438032584340044, "loss": 1.8324, "step": 30737 }, { "epoch": 1.02, "grad_norm": 0.4724569320678711, "learning_rate": 0.00044379408528713777, "loss": 1.78, "step": 30738 }, { "epoch": 1.02, "grad_norm": 0.43672990798950195, "learning_rate": 0.0004437849119657263, "loss": 1.7613, "step": 30739 }, { "epoch": 1.02, "grad_norm": 0.4493628144264221, "learning_rate": 0.00044377573846978113, "loss": 1.7555, "step": 30740 }, { "epoch": 1.02, "grad_norm": 0.456318199634552, "learning_rate": 0.0004437665647993135, "loss": 1.8251, "step": 30741 }, { "epoch": 1.02, "grad_norm": 0.46098592877388, "learning_rate": 0.00044375739095433425, "loss": 1.7364, "step": 30742 }, { "epoch": 1.02, "grad_norm": 0.45278826355934143, "learning_rate": 0.0004437482169348548, "loss": 1.8099, "step": 30743 }, { "epoch": 1.02, "grad_norm": 0.4539802074432373, "learning_rate": 0.0004437390427408862, "loss": 1.7039, "step": 30744 }, { "epoch": 1.02, "grad_norm": 0.4488382935523987, "learning_rate": 0.0004437298683724396, "loss": 1.7573, "step": 30745 }, { "epoch": 1.02, "grad_norm": 0.4551153779029846, "learning_rate": 0.00044372069382952615, "loss": 1.7204, "step": 30746 }, { "epoch": 1.02, "grad_norm": 0.47405579686164856, "learning_rate": 0.00044371151911215686, "loss": 1.8914, "step": 30747 }, { "epoch": 1.02, "grad_norm": 0.44378313422203064, "learning_rate": 0.000443702344220343, "loss": 1.7644, "step": 30748 }, { "epoch": 1.02, "grad_norm": 0.4754532277584076, "learning_rate": 0.0004436931691540957, "loss": 1.8819, "step": 30749 }, { "epoch": 1.02, "grad_norm": 0.45792409777641296, "learning_rate": 0.00044368399391342595, "loss": 1.7409, "step": 30750 }, { "epoch": 1.02, "grad_norm": 0.442853718996048, "learning_rate": 0.00044367481849834513, "loss": 1.8338, "step": 30751 }, { "epoch": 1.02, "grad_norm": 0.44786229729652405, "learning_rate": 0.00044366564290886426, "loss": 1.7592, "step": 30752 }, { "epoch": 1.02, "grad_norm": 1.9783862829208374, "learning_rate": 0.0004436564671449944, "loss": 1.8676, "step": 30753 }, { "epoch": 1.02, "grad_norm": 0.45840224623680115, "learning_rate": 0.00044364729120674673, "loss": 1.8452, "step": 30754 }, { "epoch": 1.02, "grad_norm": 0.46698707342147827, "learning_rate": 0.00044363811509413246, "loss": 1.739, "step": 30755 }, { "epoch": 1.02, "grad_norm": 0.464854896068573, "learning_rate": 0.0004436289388071627, "loss": 1.8475, "step": 30756 }, { "epoch": 1.02, "grad_norm": 0.45142820477485657, "learning_rate": 0.0004436197623458486, "loss": 1.7872, "step": 30757 }, { "epoch": 1.02, "grad_norm": 0.46536338329315186, "learning_rate": 0.00044361058571020114, "loss": 1.7871, "step": 30758 }, { "epoch": 1.02, "grad_norm": 0.4567340910434723, "learning_rate": 0.00044360140890023166, "loss": 1.8386, "step": 30759 }, { "epoch": 1.02, "grad_norm": 0.4561968445777893, "learning_rate": 0.0004435922319159513, "loss": 1.8064, "step": 30760 }, { "epoch": 1.02, "grad_norm": 0.4890361428260803, "learning_rate": 0.00044358305475737105, "loss": 1.8002, "step": 30761 }, { "epoch": 1.02, "grad_norm": 0.46563100814819336, "learning_rate": 0.0004435738774245022, "loss": 1.7436, "step": 30762 }, { "epoch": 1.02, "grad_norm": 0.457843154668808, "learning_rate": 0.0004435646999173557, "loss": 1.8177, "step": 30763 }, { "epoch": 1.02, "grad_norm": 0.45427337288856506, "learning_rate": 0.00044355552223594285, "loss": 1.6942, "step": 30764 }, { "epoch": 1.02, "grad_norm": 0.46558958292007446, "learning_rate": 0.00044354634438027474, "loss": 1.8415, "step": 30765 }, { "epoch": 1.02, "grad_norm": 0.47365647554397583, "learning_rate": 0.0004435371663503626, "loss": 1.7811, "step": 30766 }, { "epoch": 1.02, "grad_norm": 0.4863806962966919, "learning_rate": 0.00044352798814621745, "loss": 1.8117, "step": 30767 }, { "epoch": 1.02, "grad_norm": 0.4884912073612213, "learning_rate": 0.0004435188097678504, "loss": 1.7891, "step": 30768 }, { "epoch": 1.02, "grad_norm": 0.46563923358917236, "learning_rate": 0.0004435096312152728, "loss": 1.7258, "step": 30769 }, { "epoch": 1.02, "grad_norm": 0.4570706784725189, "learning_rate": 0.0004435004524884955, "loss": 1.8816, "step": 30770 }, { "epoch": 1.02, "grad_norm": 0.46895092725753784, "learning_rate": 0.00044349127358752987, "loss": 1.7944, "step": 30771 }, { "epoch": 1.02, "grad_norm": 0.48000630736351013, "learning_rate": 0.0004434820945123869, "loss": 1.7512, "step": 30772 }, { "epoch": 1.02, "grad_norm": 0.460999995470047, "learning_rate": 0.00044347291526307793, "loss": 1.7512, "step": 30773 }, { "epoch": 1.02, "grad_norm": 0.4727638363838196, "learning_rate": 0.00044346373583961397, "loss": 1.7749, "step": 30774 }, { "epoch": 1.02, "grad_norm": 0.4667038023471832, "learning_rate": 0.00044345455624200604, "loss": 1.8501, "step": 30775 }, { "epoch": 1.02, "grad_norm": 0.46742045879364014, "learning_rate": 0.00044344537647026555, "loss": 1.8291, "step": 30776 }, { "epoch": 1.02, "grad_norm": 0.4570142328739166, "learning_rate": 0.00044343619652440343, "loss": 1.7838, "step": 30777 }, { "epoch": 1.02, "grad_norm": 0.4612804651260376, "learning_rate": 0.00044342701640443096, "loss": 1.8419, "step": 30778 }, { "epoch": 1.02, "grad_norm": 0.4595356583595276, "learning_rate": 0.0004434178361103592, "loss": 1.7818, "step": 30779 }, { "epoch": 1.02, "grad_norm": 0.4555586278438568, "learning_rate": 0.00044340865564219934, "loss": 1.7767, "step": 30780 }, { "epoch": 1.02, "grad_norm": 0.48064273595809937, "learning_rate": 0.00044339947499996246, "loss": 1.8033, "step": 30781 }, { "epoch": 1.02, "grad_norm": 0.46014124155044556, "learning_rate": 0.0004433902941836598, "loss": 1.7784, "step": 30782 }, { "epoch": 1.02, "grad_norm": 0.43684107065200806, "learning_rate": 0.0004433811131933024, "loss": 1.7702, "step": 30783 }, { "epoch": 1.02, "grad_norm": 0.4629869759082794, "learning_rate": 0.0004433719320289015, "loss": 1.7977, "step": 30784 }, { "epoch": 1.02, "grad_norm": 0.44495442509651184, "learning_rate": 0.00044336275069046816, "loss": 1.7933, "step": 30785 }, { "epoch": 1.02, "grad_norm": 0.45755454897880554, "learning_rate": 0.0004433535691780136, "loss": 1.7669, "step": 30786 }, { "epoch": 1.02, "grad_norm": 0.46678686141967773, "learning_rate": 0.00044334438749154886, "loss": 1.7167, "step": 30787 }, { "epoch": 1.02, "grad_norm": 0.47042611241340637, "learning_rate": 0.00044333520563108524, "loss": 1.8074, "step": 30788 }, { "epoch": 1.02, "grad_norm": 0.45086151361465454, "learning_rate": 0.0004433260235966338, "loss": 1.7851, "step": 30789 }, { "epoch": 1.02, "grad_norm": 0.4483585059642792, "learning_rate": 0.0004433168413882056, "loss": 1.7942, "step": 30790 }, { "epoch": 1.02, "grad_norm": 0.47727182507514954, "learning_rate": 0.0004433076590058119, "loss": 1.7589, "step": 30791 }, { "epoch": 1.02, "grad_norm": 0.45528319478034973, "learning_rate": 0.0004432984764494638, "loss": 1.801, "step": 30792 }, { "epoch": 1.02, "grad_norm": 0.45034822821617126, "learning_rate": 0.00044328929371917255, "loss": 1.8723, "step": 30793 }, { "epoch": 1.02, "grad_norm": 0.4658993184566498, "learning_rate": 0.0004432801108149492, "loss": 1.7973, "step": 30794 }, { "epoch": 1.02, "grad_norm": 0.4492204785346985, "learning_rate": 0.00044327092773680475, "loss": 1.839, "step": 30795 }, { "epoch": 1.02, "grad_norm": 0.4606912136077881, "learning_rate": 0.00044326174448475067, "loss": 1.8978, "step": 30796 }, { "epoch": 1.02, "grad_norm": 0.4784772992134094, "learning_rate": 0.0004432525610587979, "loss": 1.8862, "step": 30797 }, { "epoch": 1.02, "grad_norm": 0.44246500730514526, "learning_rate": 0.0004432433774589576, "loss": 1.7263, "step": 30798 }, { "epoch": 1.02, "grad_norm": 0.45873162150382996, "learning_rate": 0.0004432341936852409, "loss": 1.7594, "step": 30799 }, { "epoch": 1.02, "grad_norm": 0.4605470895767212, "learning_rate": 0.0004432250097376591, "loss": 1.7578, "step": 30800 }, { "epoch": 1.02, "grad_norm": 0.4760046899318695, "learning_rate": 0.0004432158256162233, "loss": 1.7356, "step": 30801 }, { "epoch": 1.02, "grad_norm": 0.44991081953048706, "learning_rate": 0.00044320664132094435, "loss": 1.8191, "step": 30802 }, { "epoch": 1.02, "grad_norm": 0.4547600746154785, "learning_rate": 0.00044319745685183386, "loss": 1.7827, "step": 30803 }, { "epoch": 1.02, "grad_norm": 0.4742010235786438, "learning_rate": 0.0004431882722089027, "loss": 1.7705, "step": 30804 }, { "epoch": 1.02, "grad_norm": 0.4470802843570709, "learning_rate": 0.00044317908739216195, "loss": 1.7262, "step": 30805 }, { "epoch": 1.02, "grad_norm": 0.4463428556919098, "learning_rate": 0.000443169902401623, "loss": 1.7965, "step": 30806 }, { "epoch": 1.02, "grad_norm": 0.4380853474140167, "learning_rate": 0.00044316071723729685, "loss": 1.7024, "step": 30807 }, { "epoch": 1.02, "grad_norm": 0.47646957635879517, "learning_rate": 0.00044315153189919475, "loss": 1.8243, "step": 30808 }, { "epoch": 1.03, "grad_norm": 0.46328893303871155, "learning_rate": 0.00044314234638732767, "loss": 1.8337, "step": 30809 }, { "epoch": 1.03, "grad_norm": 0.4517420530319214, "learning_rate": 0.000443133160701707, "loss": 1.8047, "step": 30810 }, { "epoch": 1.03, "grad_norm": 0.46320992708206177, "learning_rate": 0.00044312397484234364, "loss": 1.7243, "step": 30811 }, { "epoch": 1.03, "grad_norm": 0.45540767908096313, "learning_rate": 0.0004431147888092489, "loss": 1.7712, "step": 30812 }, { "epoch": 1.03, "grad_norm": 0.44450098276138306, "learning_rate": 0.0004431056026024339, "loss": 1.8083, "step": 30813 }, { "epoch": 1.03, "grad_norm": 0.443791002035141, "learning_rate": 0.0004430964162219099, "loss": 1.8161, "step": 30814 }, { "epoch": 1.03, "grad_norm": 0.45902684330940247, "learning_rate": 0.00044308722966768775, "loss": 1.7985, "step": 30815 }, { "epoch": 1.03, "grad_norm": 0.45927366614341736, "learning_rate": 0.0004430780429397789, "loss": 1.7964, "step": 30816 }, { "epoch": 1.03, "grad_norm": 0.45588186383247375, "learning_rate": 0.0004430688560381944, "loss": 1.7719, "step": 30817 }, { "epoch": 1.03, "grad_norm": 0.4664935767650604, "learning_rate": 0.0004430596689629453, "loss": 1.8052, "step": 30818 }, { "epoch": 1.03, "grad_norm": 0.45017707347869873, "learning_rate": 0.0004430504817140429, "loss": 1.8084, "step": 30819 }, { "epoch": 1.03, "grad_norm": 0.4494447708129883, "learning_rate": 0.00044304129429149826, "loss": 1.8151, "step": 30820 }, { "epoch": 1.03, "grad_norm": 0.47148242592811584, "learning_rate": 0.0004430321066953226, "loss": 1.7691, "step": 30821 }, { "epoch": 1.03, "grad_norm": 0.4589803218841553, "learning_rate": 0.00044302291892552704, "loss": 1.8619, "step": 30822 }, { "epoch": 1.03, "grad_norm": 0.4402545690536499, "learning_rate": 0.00044301373098212273, "loss": 1.7736, "step": 30823 }, { "epoch": 1.03, "grad_norm": 0.4558924436569214, "learning_rate": 0.0004430045428651208, "loss": 1.8273, "step": 30824 }, { "epoch": 1.03, "grad_norm": 0.46791836619377136, "learning_rate": 0.00044299535457453234, "loss": 1.7769, "step": 30825 }, { "epoch": 1.03, "grad_norm": 0.8183732628822327, "learning_rate": 0.00044298616611036876, "loss": 1.7833, "step": 30826 }, { "epoch": 1.03, "grad_norm": 0.47207731008529663, "learning_rate": 0.0004429769774726409, "loss": 1.7507, "step": 30827 }, { "epoch": 1.03, "grad_norm": 0.4596722722053528, "learning_rate": 0.0004429677886613601, "loss": 1.8251, "step": 30828 }, { "epoch": 1.03, "grad_norm": 0.4551140367984772, "learning_rate": 0.00044295859967653756, "loss": 1.7769, "step": 30829 }, { "epoch": 1.03, "grad_norm": 0.6346779465675354, "learning_rate": 0.00044294941051818427, "loss": 1.7771, "step": 30830 }, { "epoch": 1.03, "grad_norm": 0.4676729142665863, "learning_rate": 0.00044294022118631147, "loss": 1.8161, "step": 30831 }, { "epoch": 1.03, "grad_norm": 0.45854392647743225, "learning_rate": 0.00044293103168093016, "loss": 1.8061, "step": 30832 }, { "epoch": 1.03, "grad_norm": 0.4613136053085327, "learning_rate": 0.0004429218420020519, "loss": 1.7768, "step": 30833 }, { "epoch": 1.03, "grad_norm": 0.446132093667984, "learning_rate": 0.0004429126521496874, "loss": 1.8064, "step": 30834 }, { "epoch": 1.03, "grad_norm": 0.44752392172813416, "learning_rate": 0.000442903462123848, "loss": 1.7932, "step": 30835 }, { "epoch": 1.03, "grad_norm": 0.46744072437286377, "learning_rate": 0.00044289427192454494, "loss": 1.7963, "step": 30836 }, { "epoch": 1.03, "grad_norm": 0.4543878436088562, "learning_rate": 0.00044288508155178926, "loss": 1.7684, "step": 30837 }, { "epoch": 1.03, "grad_norm": 0.45879116654396057, "learning_rate": 0.00044287589100559213, "loss": 1.8344, "step": 30838 }, { "epoch": 1.03, "grad_norm": 0.470958948135376, "learning_rate": 0.0004428667002859647, "loss": 1.7895, "step": 30839 }, { "epoch": 1.03, "grad_norm": 0.4639897346496582, "learning_rate": 0.00044285750939291815, "loss": 1.7134, "step": 30840 }, { "epoch": 1.03, "grad_norm": 0.4575417935848236, "learning_rate": 0.0004428483183264637, "loss": 1.7788, "step": 30841 }, { "epoch": 1.03, "grad_norm": 0.4608954191207886, "learning_rate": 0.0004428391270866124, "loss": 1.7926, "step": 30842 }, { "epoch": 1.03, "grad_norm": 0.44546231627464294, "learning_rate": 0.00044282993567337537, "loss": 1.74, "step": 30843 }, { "epoch": 1.03, "grad_norm": 1.060881495475769, "learning_rate": 0.00044282074408676396, "loss": 1.8345, "step": 30844 }, { "epoch": 1.03, "grad_norm": 0.45385444164276123, "learning_rate": 0.0004428115523267891, "loss": 1.8951, "step": 30845 }, { "epoch": 1.03, "grad_norm": 0.4565020501613617, "learning_rate": 0.00044280236039346215, "loss": 1.7642, "step": 30846 }, { "epoch": 1.03, "grad_norm": 0.4403000473976135, "learning_rate": 0.00044279316828679414, "loss": 1.7822, "step": 30847 }, { "epoch": 1.03, "grad_norm": 0.44907790422439575, "learning_rate": 0.0004427839760067963, "loss": 1.8809, "step": 30848 }, { "epoch": 1.03, "grad_norm": 0.4726436734199524, "learning_rate": 0.00044277478355347975, "loss": 1.7857, "step": 30849 }, { "epoch": 1.03, "grad_norm": 0.4684589207172394, "learning_rate": 0.0004427655909268556, "loss": 1.7964, "step": 30850 }, { "epoch": 1.03, "grad_norm": 0.45043450593948364, "learning_rate": 0.0004427563981269351, "loss": 1.799, "step": 30851 }, { "epoch": 1.03, "grad_norm": 0.4522760510444641, "learning_rate": 0.0004427472051537293, "loss": 1.798, "step": 30852 }, { "epoch": 1.03, "grad_norm": 0.46356135606765747, "learning_rate": 0.0004427380120072495, "loss": 1.795, "step": 30853 }, { "epoch": 1.03, "grad_norm": 0.48689770698547363, "learning_rate": 0.0004427288186875067, "loss": 1.8141, "step": 30854 }, { "epoch": 1.03, "grad_norm": 0.4525371193885803, "learning_rate": 0.0004427196251945122, "loss": 1.7701, "step": 30855 }, { "epoch": 1.03, "grad_norm": 0.46138209104537964, "learning_rate": 0.00044271043152827716, "loss": 1.8561, "step": 30856 }, { "epoch": 1.03, "grad_norm": 0.45894452929496765, "learning_rate": 0.0004427012376888125, "loss": 1.7613, "step": 30857 }, { "epoch": 1.03, "grad_norm": 0.4680345356464386, "learning_rate": 0.0004426920436761298, "loss": 1.7419, "step": 30858 }, { "epoch": 1.03, "grad_norm": 0.4639680087566376, "learning_rate": 0.0004426828494902399, "loss": 1.7279, "step": 30859 }, { "epoch": 1.03, "grad_norm": 0.47634822130203247, "learning_rate": 0.0004426736551311539, "loss": 1.7516, "step": 30860 }, { "epoch": 1.03, "grad_norm": 0.49160581827163696, "learning_rate": 0.00044266446059888326, "loss": 1.8084, "step": 30861 }, { "epoch": 1.03, "grad_norm": 0.4660586714744568, "learning_rate": 0.00044265526589343896, "loss": 1.8251, "step": 30862 }, { "epoch": 1.03, "grad_norm": 0.47530779242515564, "learning_rate": 0.00044264607101483213, "loss": 1.7666, "step": 30863 }, { "epoch": 1.03, "grad_norm": 0.4610610008239746, "learning_rate": 0.0004426368759630741, "loss": 1.7557, "step": 30864 }, { "epoch": 1.03, "grad_norm": 0.45163750648498535, "learning_rate": 0.00044262768073817586, "loss": 1.8535, "step": 30865 }, { "epoch": 1.03, "grad_norm": 0.45172208547592163, "learning_rate": 0.0004426184853401486, "loss": 1.802, "step": 30866 }, { "epoch": 1.03, "grad_norm": 0.5225785970687866, "learning_rate": 0.0004426092897690035, "loss": 1.7654, "step": 30867 }, { "epoch": 1.03, "grad_norm": 0.48460885882377625, "learning_rate": 0.00044260009402475177, "loss": 1.8132, "step": 30868 }, { "epoch": 1.03, "grad_norm": 0.4565674662590027, "learning_rate": 0.00044259089810740456, "loss": 1.7702, "step": 30869 }, { "epoch": 1.03, "grad_norm": 0.45376425981521606, "learning_rate": 0.000442581702016973, "loss": 1.8192, "step": 30870 }, { "epoch": 1.03, "grad_norm": 0.5415195226669312, "learning_rate": 0.00044257250575346826, "loss": 1.7749, "step": 30871 }, { "epoch": 1.03, "grad_norm": 0.49496957659721375, "learning_rate": 0.00044256330931690145, "loss": 1.8122, "step": 30872 }, { "epoch": 1.03, "grad_norm": 0.4637472927570343, "learning_rate": 0.00044255411270728383, "loss": 1.7852, "step": 30873 }, { "epoch": 1.03, "grad_norm": 0.4563790261745453, "learning_rate": 0.00044254491592462657, "loss": 1.8197, "step": 30874 }, { "epoch": 1.03, "grad_norm": 0.45695552229881287, "learning_rate": 0.0004425357189689407, "loss": 1.8153, "step": 30875 }, { "epoch": 1.03, "grad_norm": 0.47979098558425903, "learning_rate": 0.0004425265218402375, "loss": 1.7588, "step": 30876 }, { "epoch": 1.03, "grad_norm": 0.46148011088371277, "learning_rate": 0.00044251732453852807, "loss": 1.755, "step": 30877 }, { "epoch": 1.03, "grad_norm": 0.44730344414711, "learning_rate": 0.00044250812706382364, "loss": 1.7818, "step": 30878 }, { "epoch": 1.03, "grad_norm": 0.4557754695415497, "learning_rate": 0.0004424989294161354, "loss": 1.8219, "step": 30879 }, { "epoch": 1.03, "grad_norm": 0.5122283101081848, "learning_rate": 0.00044248973159547437, "loss": 1.7737, "step": 30880 }, { "epoch": 1.03, "grad_norm": 0.4553200304508209, "learning_rate": 0.0004424805336018519, "loss": 1.8211, "step": 30881 }, { "epoch": 1.03, "grad_norm": 0.45286357402801514, "learning_rate": 0.0004424713354352789, "loss": 1.8101, "step": 30882 }, { "epoch": 1.03, "grad_norm": 0.46569809317588806, "learning_rate": 0.00044246213709576683, "loss": 1.7823, "step": 30883 }, { "epoch": 1.03, "grad_norm": 0.45836421847343445, "learning_rate": 0.00044245293858332664, "loss": 1.8032, "step": 30884 }, { "epoch": 1.03, "grad_norm": 0.4655470848083496, "learning_rate": 0.0004424437398979696, "loss": 1.7912, "step": 30885 }, { "epoch": 1.03, "grad_norm": 0.448003888130188, "learning_rate": 0.00044243454103970686, "loss": 1.8526, "step": 30886 }, { "epoch": 1.03, "grad_norm": 0.4656388759613037, "learning_rate": 0.0004424253420085495, "loss": 1.7895, "step": 30887 }, { "epoch": 1.03, "grad_norm": 0.44995585083961487, "learning_rate": 0.00044241614280450883, "loss": 1.8024, "step": 30888 }, { "epoch": 1.03, "grad_norm": 0.4810594916343689, "learning_rate": 0.0004424069434275959, "loss": 1.7093, "step": 30889 }, { "epoch": 1.03, "grad_norm": 0.449812114238739, "learning_rate": 0.00044239774387782205, "loss": 1.7362, "step": 30890 }, { "epoch": 1.03, "grad_norm": 0.4555835425853729, "learning_rate": 0.0004423885441551982, "loss": 1.8238, "step": 30891 }, { "epoch": 1.03, "grad_norm": 0.4598853290081024, "learning_rate": 0.00044237934425973564, "loss": 1.8573, "step": 30892 }, { "epoch": 1.03, "grad_norm": 0.46205875277519226, "learning_rate": 0.00044237014419144566, "loss": 1.8357, "step": 30893 }, { "epoch": 1.03, "grad_norm": 0.4602298438549042, "learning_rate": 0.0004423609439503391, "loss": 1.7606, "step": 30894 }, { "epoch": 1.03, "grad_norm": 0.47907084226608276, "learning_rate": 0.00044235174353642753, "loss": 1.9123, "step": 30895 }, { "epoch": 1.03, "grad_norm": 0.4572297930717468, "learning_rate": 0.0004423425429497217, "loss": 1.7937, "step": 30896 }, { "epoch": 1.03, "grad_norm": 0.45073822140693665, "learning_rate": 0.0004423333421902332, "loss": 1.8344, "step": 30897 }, { "epoch": 1.03, "grad_norm": 0.455163836479187, "learning_rate": 0.0004423241412579729, "loss": 1.7655, "step": 30898 }, { "epoch": 1.03, "grad_norm": 0.4572039246559143, "learning_rate": 0.00044231494015295204, "loss": 1.7854, "step": 30899 }, { "epoch": 1.03, "grad_norm": 0.4510568678379059, "learning_rate": 0.0004423057388751819, "loss": 1.8491, "step": 30900 }, { "epoch": 1.03, "grad_norm": 0.4702553153038025, "learning_rate": 0.00044229653742467346, "loss": 1.768, "step": 30901 }, { "epoch": 1.03, "grad_norm": 0.45250892639160156, "learning_rate": 0.00044228733580143806, "loss": 1.7914, "step": 30902 }, { "epoch": 1.03, "grad_norm": 0.4622403681278229, "learning_rate": 0.00044227813400548677, "loss": 1.8121, "step": 30903 }, { "epoch": 1.03, "grad_norm": 0.45097583532333374, "learning_rate": 0.0004422689320368308, "loss": 1.8345, "step": 30904 }, { "epoch": 1.03, "grad_norm": 0.4577326476573944, "learning_rate": 0.00044225972989548126, "loss": 1.7612, "step": 30905 }, { "epoch": 1.03, "grad_norm": 0.4733220040798187, "learning_rate": 0.0004422505275814495, "loss": 1.7578, "step": 30906 }, { "epoch": 1.03, "grad_norm": 0.4400944709777832, "learning_rate": 0.0004422413250947465, "loss": 1.8094, "step": 30907 }, { "epoch": 1.03, "grad_norm": 0.45523589849472046, "learning_rate": 0.00044223212243538337, "loss": 1.7892, "step": 30908 }, { "epoch": 1.03, "grad_norm": 0.45666050910949707, "learning_rate": 0.00044222291960337157, "loss": 1.7561, "step": 30909 }, { "epoch": 1.03, "grad_norm": 0.447405070066452, "learning_rate": 0.00044221371659872193, "loss": 1.7599, "step": 30910 }, { "epoch": 1.03, "grad_norm": 0.4416605830192566, "learning_rate": 0.00044220451342144595, "loss": 1.7113, "step": 30911 }, { "epoch": 1.03, "grad_norm": 0.45294153690338135, "learning_rate": 0.0004421953100715546, "loss": 1.7894, "step": 30912 }, { "epoch": 1.03, "grad_norm": 0.44575315713882446, "learning_rate": 0.00044218610654905903, "loss": 1.8101, "step": 30913 }, { "epoch": 1.03, "grad_norm": 0.4594981372356415, "learning_rate": 0.00044217690285397053, "loss": 1.7588, "step": 30914 }, { "epoch": 1.03, "grad_norm": 0.4627295732498169, "learning_rate": 0.00044216769898630025, "loss": 1.7721, "step": 30915 }, { "epoch": 1.03, "grad_norm": 0.4504082202911377, "learning_rate": 0.00044215849494605924, "loss": 1.7651, "step": 30916 }, { "epoch": 1.03, "grad_norm": 0.4702187478542328, "learning_rate": 0.00044214929073325884, "loss": 1.7703, "step": 30917 }, { "epoch": 1.03, "grad_norm": 0.4564017057418823, "learning_rate": 0.0004421400863479102, "loss": 1.8206, "step": 30918 }, { "epoch": 1.03, "grad_norm": 0.4480261206626892, "learning_rate": 0.00044213088179002424, "loss": 1.7671, "step": 30919 }, { "epoch": 1.03, "grad_norm": 0.4664134681224823, "learning_rate": 0.0004421216770596125, "loss": 1.818, "step": 30920 }, { "epoch": 1.03, "grad_norm": 0.48021814227104187, "learning_rate": 0.0004421124721566859, "loss": 1.8328, "step": 30921 }, { "epoch": 1.03, "grad_norm": 0.45482179522514343, "learning_rate": 0.00044210326708125577, "loss": 1.8058, "step": 30922 }, { "epoch": 1.03, "grad_norm": 0.44471704959869385, "learning_rate": 0.00044209406183333313, "loss": 1.7663, "step": 30923 }, { "epoch": 1.03, "grad_norm": 0.4380537271499634, "learning_rate": 0.0004420848564129293, "loss": 1.7587, "step": 30924 }, { "epoch": 1.03, "grad_norm": 0.45552822947502136, "learning_rate": 0.0004420756508200554, "loss": 1.8513, "step": 30925 }, { "epoch": 1.03, "grad_norm": 0.447934091091156, "learning_rate": 0.00044206644505472253, "loss": 1.7691, "step": 30926 }, { "epoch": 1.03, "grad_norm": 0.452434778213501, "learning_rate": 0.000442057239116942, "loss": 1.7552, "step": 30927 }, { "epoch": 1.03, "grad_norm": 0.4625290334224701, "learning_rate": 0.00044204803300672486, "loss": 1.8004, "step": 30928 }, { "epoch": 1.03, "grad_norm": 0.4578606188297272, "learning_rate": 0.00044203882672408227, "loss": 1.8215, "step": 30929 }, { "epoch": 1.03, "grad_norm": 0.4654647707939148, "learning_rate": 0.0004420296202690256, "loss": 1.8489, "step": 30930 }, { "epoch": 1.03, "grad_norm": 0.4550173580646515, "learning_rate": 0.00044202041364156593, "loss": 1.8279, "step": 30931 }, { "epoch": 1.03, "grad_norm": 0.4775353670120239, "learning_rate": 0.0004420112068417143, "loss": 1.7911, "step": 30932 }, { "epoch": 1.03, "grad_norm": 0.4414581060409546, "learning_rate": 0.000442001999869482, "loss": 1.7674, "step": 30933 }, { "epoch": 1.03, "grad_norm": 0.46944180130958557, "learning_rate": 0.0004419927927248802, "loss": 1.7952, "step": 30934 }, { "epoch": 1.03, "grad_norm": 0.4590716063976288, "learning_rate": 0.00044198358540792007, "loss": 1.8027, "step": 30935 }, { "epoch": 1.03, "grad_norm": 0.45298999547958374, "learning_rate": 0.0004419743779186129, "loss": 1.7652, "step": 30936 }, { "epoch": 1.03, "grad_norm": 0.4448697865009308, "learning_rate": 0.00044196517025696955, "loss": 1.7566, "step": 30937 }, { "epoch": 1.03, "grad_norm": 0.4557587206363678, "learning_rate": 0.0004419559624230016, "loss": 1.7626, "step": 30938 }, { "epoch": 1.03, "grad_norm": 0.4436134994029999, "learning_rate": 0.0004419467544167199, "loss": 1.8075, "step": 30939 }, { "epoch": 1.03, "grad_norm": 0.4449460506439209, "learning_rate": 0.00044193754623813577, "loss": 1.7379, "step": 30940 }, { "epoch": 1.03, "grad_norm": 0.45256784558296204, "learning_rate": 0.0004419283378872604, "loss": 1.7557, "step": 30941 }, { "epoch": 1.03, "grad_norm": 0.45823153853416443, "learning_rate": 0.00044191912936410486, "loss": 1.7386, "step": 30942 }, { "epoch": 1.03, "grad_norm": 0.4393465518951416, "learning_rate": 0.00044190992066868055, "loss": 1.8324, "step": 30943 }, { "epoch": 1.03, "grad_norm": 0.48017922043800354, "learning_rate": 0.0004419007118009983, "loss": 1.7673, "step": 30944 }, { "epoch": 1.03, "grad_norm": 0.45317232608795166, "learning_rate": 0.00044189150276106973, "loss": 1.7337, "step": 30945 }, { "epoch": 1.03, "grad_norm": 0.45883631706237793, "learning_rate": 0.00044188229354890566, "loss": 1.8603, "step": 30946 }, { "epoch": 1.03, "grad_norm": 0.4615621864795685, "learning_rate": 0.00044187308416451744, "loss": 1.7903, "step": 30947 }, { "epoch": 1.03, "grad_norm": 0.46639639139175415, "learning_rate": 0.00044186387460791617, "loss": 1.7789, "step": 30948 }, { "epoch": 1.03, "grad_norm": 0.5166849493980408, "learning_rate": 0.00044185466487911296, "loss": 1.7498, "step": 30949 }, { "epoch": 1.03, "grad_norm": 0.4444856345653534, "learning_rate": 0.00044184545497811926, "loss": 1.7763, "step": 30950 }, { "epoch": 1.03, "grad_norm": 0.4292107820510864, "learning_rate": 0.000441836244904946, "loss": 1.6746, "step": 30951 }, { "epoch": 1.03, "grad_norm": 0.4623751640319824, "learning_rate": 0.0004418270346596045, "loss": 1.7841, "step": 30952 }, { "epoch": 1.03, "grad_norm": 0.45624929666519165, "learning_rate": 0.00044181782424210583, "loss": 1.7526, "step": 30953 }, { "epoch": 1.03, "grad_norm": 0.45384183526039124, "learning_rate": 0.00044180861365246117, "loss": 1.8476, "step": 30954 }, { "epoch": 1.03, "grad_norm": 0.46717530488967896, "learning_rate": 0.0004417994028906818, "loss": 1.8568, "step": 30955 }, { "epoch": 1.03, "grad_norm": 0.43913987278938293, "learning_rate": 0.0004417901919567789, "loss": 1.7669, "step": 30956 }, { "epoch": 1.03, "grad_norm": 0.4450148642063141, "learning_rate": 0.00044178098085076347, "loss": 1.7644, "step": 30957 }, { "epoch": 1.03, "grad_norm": 0.43831390142440796, "learning_rate": 0.0004417717695726469, "loss": 1.8089, "step": 30958 }, { "epoch": 1.03, "grad_norm": 0.4774792492389679, "learning_rate": 0.0004417625581224402, "loss": 1.7943, "step": 30959 }, { "epoch": 1.03, "grad_norm": 0.4382244050502777, "learning_rate": 0.0004417533465001547, "loss": 1.7531, "step": 30960 }, { "epoch": 1.03, "grad_norm": 0.4460330307483673, "learning_rate": 0.0004417441347058016, "loss": 1.7819, "step": 30961 }, { "epoch": 1.03, "grad_norm": 0.4623568654060364, "learning_rate": 0.000441734922739392, "loss": 1.8003, "step": 30962 }, { "epoch": 1.03, "grad_norm": 0.4443223774433136, "learning_rate": 0.000441725710600937, "loss": 1.7992, "step": 30963 }, { "epoch": 1.03, "grad_norm": 0.4840981364250183, "learning_rate": 0.0004417164982904479, "loss": 1.8548, "step": 30964 }, { "epoch": 1.03, "grad_norm": 0.4556540250778198, "learning_rate": 0.00044170728580793586, "loss": 1.7883, "step": 30965 }, { "epoch": 1.03, "grad_norm": 0.4506772756576538, "learning_rate": 0.00044169807315341206, "loss": 1.8085, "step": 30966 }, { "epoch": 1.03, "grad_norm": 0.4668763279914856, "learning_rate": 0.0004416888603268876, "loss": 1.7933, "step": 30967 }, { "epoch": 1.03, "grad_norm": 0.47447049617767334, "learning_rate": 0.00044167964732837384, "loss": 1.8773, "step": 30968 }, { "epoch": 1.03, "grad_norm": 0.455610454082489, "learning_rate": 0.0004416704341578818, "loss": 1.7898, "step": 30969 }, { "epoch": 1.03, "grad_norm": 0.4723392426967621, "learning_rate": 0.0004416612208154228, "loss": 1.7721, "step": 30970 }, { "epoch": 1.03, "grad_norm": 0.4481256306171417, "learning_rate": 0.00044165200730100784, "loss": 1.8023, "step": 30971 }, { "epoch": 1.03, "grad_norm": 0.4561685621738434, "learning_rate": 0.0004416427936146483, "loss": 1.8077, "step": 30972 }, { "epoch": 1.03, "grad_norm": 0.45165345072746277, "learning_rate": 0.0004416335797563553, "loss": 1.8565, "step": 30973 }, { "epoch": 1.03, "grad_norm": 0.7303354144096375, "learning_rate": 0.0004416243657261399, "loss": 1.8695, "step": 30974 }, { "epoch": 1.03, "grad_norm": 0.4353925883769989, "learning_rate": 0.0004416151515240135, "loss": 1.8168, "step": 30975 }, { "epoch": 1.03, "grad_norm": 0.45579972863197327, "learning_rate": 0.00044160593714998714, "loss": 1.8196, "step": 30976 }, { "epoch": 1.03, "grad_norm": 0.4542251229286194, "learning_rate": 0.00044159672260407195, "loss": 1.8072, "step": 30977 }, { "epoch": 1.03, "grad_norm": 0.44445985555648804, "learning_rate": 0.0004415875078862793, "loss": 1.7997, "step": 30978 }, { "epoch": 1.03, "grad_norm": 0.44836026430130005, "learning_rate": 0.0004415782929966202, "loss": 1.755, "step": 30979 }, { "epoch": 1.03, "grad_norm": 0.4605436325073242, "learning_rate": 0.00044156907793510607, "loss": 1.7267, "step": 30980 }, { "epoch": 1.03, "grad_norm": 0.4517953395843506, "learning_rate": 0.00044155986270174777, "loss": 1.8158, "step": 30981 }, { "epoch": 1.03, "grad_norm": 0.47361990809440613, "learning_rate": 0.0004415506472965567, "loss": 1.7243, "step": 30982 }, { "epoch": 1.03, "grad_norm": 0.4551525413990021, "learning_rate": 0.000441541431719544, "loss": 1.787, "step": 30983 }, { "epoch": 1.03, "grad_norm": 0.464626282453537, "learning_rate": 0.00044153221597072087, "loss": 1.7935, "step": 30984 }, { "epoch": 1.03, "grad_norm": 0.45936039090156555, "learning_rate": 0.0004415230000500985, "loss": 1.8256, "step": 30985 }, { "epoch": 1.03, "grad_norm": 0.4519156217575073, "learning_rate": 0.0004415137839576881, "loss": 1.763, "step": 30986 }, { "epoch": 1.03, "grad_norm": 0.45738938450813293, "learning_rate": 0.0004415045676935007, "loss": 1.8097, "step": 30987 }, { "epoch": 1.03, "grad_norm": 0.45420414209365845, "learning_rate": 0.0004414953512575477, "loss": 1.7711, "step": 30988 }, { "epoch": 1.03, "grad_norm": 0.4591101408004761, "learning_rate": 0.00044148613464984015, "loss": 1.8136, "step": 30989 }, { "epoch": 1.03, "grad_norm": 0.4559608995914459, "learning_rate": 0.0004414769178703893, "loss": 1.8224, "step": 30990 }, { "epoch": 1.03, "grad_norm": 0.4409591555595398, "learning_rate": 0.0004414677009192064, "loss": 1.7962, "step": 30991 }, { "epoch": 1.03, "grad_norm": 0.46442756056785583, "learning_rate": 0.0004414584837963023, "loss": 1.8708, "step": 30992 }, { "epoch": 1.03, "grad_norm": 0.46340495347976685, "learning_rate": 0.00044144926650168867, "loss": 1.8165, "step": 30993 }, { "epoch": 1.03, "grad_norm": 0.4652309715747833, "learning_rate": 0.0004414400490353764, "loss": 1.8179, "step": 30994 }, { "epoch": 1.03, "grad_norm": 0.4467504322528839, "learning_rate": 0.00044143083139737676, "loss": 1.7784, "step": 30995 }, { "epoch": 1.03, "grad_norm": 0.4524499475955963, "learning_rate": 0.00044142161358770105, "loss": 1.797, "step": 30996 }, { "epoch": 1.03, "grad_norm": 0.449196457862854, "learning_rate": 0.00044141239560636014, "loss": 1.7849, "step": 30997 }, { "epoch": 1.03, "grad_norm": 0.4445142447948456, "learning_rate": 0.00044140317745336547, "loss": 1.7901, "step": 30998 }, { "epoch": 1.03, "grad_norm": 0.45855918526649475, "learning_rate": 0.00044139395912872815, "loss": 1.8267, "step": 30999 }, { "epoch": 1.03, "grad_norm": 0.46465086936950684, "learning_rate": 0.0004413847406324595, "loss": 1.8534, "step": 31000 }, { "epoch": 1.03, "grad_norm": 0.4474097192287445, "learning_rate": 0.00044137552196457056, "loss": 1.7699, "step": 31001 }, { "epoch": 1.03, "grad_norm": 0.4490434229373932, "learning_rate": 0.0004413663031250725, "loss": 1.7543, "step": 31002 }, { "epoch": 1.03, "grad_norm": 0.45412418246269226, "learning_rate": 0.00044135708411397677, "loss": 1.8094, "step": 31003 }, { "epoch": 1.03, "grad_norm": 0.46685993671417236, "learning_rate": 0.00044134786493129415, "loss": 1.8182, "step": 31004 }, { "epoch": 1.03, "grad_norm": 0.47105878591537476, "learning_rate": 0.0004413386455770362, "loss": 1.8284, "step": 31005 }, { "epoch": 1.03, "grad_norm": 0.47784045338630676, "learning_rate": 0.0004413294260512139, "loss": 1.8777, "step": 31006 }, { "epoch": 1.03, "grad_norm": 0.4395149350166321, "learning_rate": 0.00044132020635383846, "loss": 1.8105, "step": 31007 }, { "epoch": 1.03, "grad_norm": 0.47702014446258545, "learning_rate": 0.00044131098648492114, "loss": 1.8138, "step": 31008 }, { "epoch": 1.03, "grad_norm": 0.45981353521347046, "learning_rate": 0.00044130176644447306, "loss": 1.7805, "step": 31009 }, { "epoch": 1.03, "grad_norm": 0.4706421494483948, "learning_rate": 0.00044129254623250555, "loss": 1.8448, "step": 31010 }, { "epoch": 1.03, "grad_norm": 0.4608495831489563, "learning_rate": 0.0004412833258490296, "loss": 1.7997, "step": 31011 }, { "epoch": 1.03, "grad_norm": 0.45090198516845703, "learning_rate": 0.0004412741052940567, "loss": 1.7987, "step": 31012 }, { "epoch": 1.03, "grad_norm": 0.45329228043556213, "learning_rate": 0.00044126488456759774, "loss": 1.8195, "step": 31013 }, { "epoch": 1.03, "grad_norm": 0.44902539253234863, "learning_rate": 0.000441255663669664, "loss": 1.8275, "step": 31014 }, { "epoch": 1.03, "grad_norm": 0.46674203872680664, "learning_rate": 0.00044124644260026667, "loss": 1.7436, "step": 31015 }, { "epoch": 1.03, "grad_norm": 0.45957818627357483, "learning_rate": 0.000441237221359417, "loss": 1.8245, "step": 31016 }, { "epoch": 1.03, "grad_norm": 0.4783461391925812, "learning_rate": 0.0004412279999471262, "loss": 1.7788, "step": 31017 }, { "epoch": 1.03, "grad_norm": 0.4492778778076172, "learning_rate": 0.00044121877836340536, "loss": 1.7945, "step": 31018 }, { "epoch": 1.03, "grad_norm": 0.4602488577365875, "learning_rate": 0.00044120955660826574, "loss": 1.7894, "step": 31019 }, { "epoch": 1.03, "grad_norm": 0.47656193375587463, "learning_rate": 0.00044120033468171853, "loss": 1.8248, "step": 31020 }, { "epoch": 1.03, "grad_norm": 0.4546874463558197, "learning_rate": 0.0004411911125837749, "loss": 1.8234, "step": 31021 }, { "epoch": 1.03, "grad_norm": 0.46823549270629883, "learning_rate": 0.0004411818903144461, "loss": 1.8672, "step": 31022 }, { "epoch": 1.03, "grad_norm": 0.4716655910015106, "learning_rate": 0.0004411726678737433, "loss": 1.8531, "step": 31023 }, { "epoch": 1.03, "grad_norm": 0.458209365606308, "learning_rate": 0.0004411634452616777, "loss": 1.7807, "step": 31024 }, { "epoch": 1.03, "grad_norm": 0.4458158612251282, "learning_rate": 0.0004411542224782604, "loss": 1.8646, "step": 31025 }, { "epoch": 1.03, "grad_norm": 0.4610098898410797, "learning_rate": 0.0004411449995235027, "loss": 1.7221, "step": 31026 }, { "epoch": 1.03, "grad_norm": 0.449720174074173, "learning_rate": 0.00044113577639741576, "loss": 1.795, "step": 31027 }, { "epoch": 1.03, "grad_norm": 0.4709419012069702, "learning_rate": 0.00044112655310001085, "loss": 1.8325, "step": 31028 }, { "epoch": 1.03, "grad_norm": 0.4668790400028229, "learning_rate": 0.00044111732963129897, "loss": 1.7854, "step": 31029 }, { "epoch": 1.03, "grad_norm": 0.4704379141330719, "learning_rate": 0.00044110810599129154, "loss": 1.8174, "step": 31030 }, { "epoch": 1.03, "grad_norm": 0.4478820860385895, "learning_rate": 0.0004410988821799996, "loss": 1.7898, "step": 31031 }, { "epoch": 1.03, "grad_norm": 0.43788260221481323, "learning_rate": 0.00044108965819743447, "loss": 1.7937, "step": 31032 }, { "epoch": 1.03, "grad_norm": 0.4407283663749695, "learning_rate": 0.00044108043404360726, "loss": 1.7457, "step": 31033 }, { "epoch": 1.03, "grad_norm": 0.44951552152633667, "learning_rate": 0.0004410712097185292, "loss": 1.8016, "step": 31034 }, { "epoch": 1.03, "grad_norm": 0.45425114035606384, "learning_rate": 0.0004410619852222115, "loss": 1.8673, "step": 31035 }, { "epoch": 1.03, "grad_norm": 0.44942307472229004, "learning_rate": 0.0004410527605546652, "loss": 1.8018, "step": 31036 }, { "epoch": 1.03, "grad_norm": 0.44196298718452454, "learning_rate": 0.00044104353571590183, "loss": 1.7999, "step": 31037 }, { "epoch": 1.03, "grad_norm": 0.43623483180999756, "learning_rate": 0.0004410343107059323, "loss": 1.7362, "step": 31038 }, { "epoch": 1.03, "grad_norm": 0.44671204686164856, "learning_rate": 0.0004410250855247679, "loss": 1.743, "step": 31039 }, { "epoch": 1.03, "grad_norm": 0.43530917167663574, "learning_rate": 0.00044101586017241973, "loss": 1.8151, "step": 31040 }, { "epoch": 1.03, "grad_norm": 1.2502456903457642, "learning_rate": 0.00044100663464889916, "loss": 1.8068, "step": 31041 }, { "epoch": 1.03, "grad_norm": 0.4441695213317871, "learning_rate": 0.00044099740895421743, "loss": 1.7683, "step": 31042 }, { "epoch": 1.03, "grad_norm": 0.4544488191604614, "learning_rate": 0.00044098818308838545, "loss": 1.7235, "step": 31043 }, { "epoch": 1.03, "grad_norm": 0.46318015456199646, "learning_rate": 0.00044097895705141465, "loss": 1.7555, "step": 31044 }, { "epoch": 1.03, "grad_norm": 0.44537362456321716, "learning_rate": 0.00044096973084331614, "loss": 1.7767, "step": 31045 }, { "epoch": 1.03, "grad_norm": 1.240535020828247, "learning_rate": 0.0004409605044641012, "loss": 1.8657, "step": 31046 }, { "epoch": 1.03, "grad_norm": 0.48517879843711853, "learning_rate": 0.00044095127791378094, "loss": 1.8476, "step": 31047 }, { "epoch": 1.03, "grad_norm": 0.4507281184196472, "learning_rate": 0.00044094205119236664, "loss": 1.789, "step": 31048 }, { "epoch": 1.03, "grad_norm": 0.44706082344055176, "learning_rate": 0.0004409328242998694, "loss": 1.8946, "step": 31049 }, { "epoch": 1.03, "grad_norm": 0.47014278173446655, "learning_rate": 0.00044092359723630053, "loss": 1.8037, "step": 31050 }, { "epoch": 1.03, "grad_norm": 0.4560853838920593, "learning_rate": 0.00044091437000167116, "loss": 1.8548, "step": 31051 }, { "epoch": 1.03, "grad_norm": 0.4566064774990082, "learning_rate": 0.0004409051425959925, "loss": 1.8088, "step": 31052 }, { "epoch": 1.03, "grad_norm": 0.46026891469955444, "learning_rate": 0.0004408959150192758, "loss": 1.7664, "step": 31053 }, { "epoch": 1.03, "grad_norm": 0.45083603262901306, "learning_rate": 0.0004408866872715321, "loss": 1.7641, "step": 31054 }, { "epoch": 1.03, "grad_norm": 0.4689483344554901, "learning_rate": 0.00044087745935277283, "loss": 1.803, "step": 31055 }, { "epoch": 1.03, "grad_norm": 0.4680768549442291, "learning_rate": 0.00044086823126300904, "loss": 1.7834, "step": 31056 }, { "epoch": 1.03, "grad_norm": 0.45249855518341064, "learning_rate": 0.0004408590030022519, "loss": 1.8135, "step": 31057 }, { "epoch": 1.03, "grad_norm": 0.45295968651771545, "learning_rate": 0.00044084977457051287, "loss": 1.8146, "step": 31058 }, { "epoch": 1.03, "grad_norm": 0.4568340480327606, "learning_rate": 0.00044084054596780283, "loss": 1.7998, "step": 31059 }, { "epoch": 1.03, "grad_norm": 0.4466480314731598, "learning_rate": 0.0004408313171941332, "loss": 1.8054, "step": 31060 }, { "epoch": 1.03, "grad_norm": 0.46101781725883484, "learning_rate": 0.00044082208824951496, "loss": 1.8293, "step": 31061 }, { "epoch": 1.03, "grad_norm": 0.44531571865081787, "learning_rate": 0.0004408128591339596, "loss": 1.7981, "step": 31062 }, { "epoch": 1.03, "grad_norm": 0.4815550446510315, "learning_rate": 0.0004408036298474781, "loss": 1.7834, "step": 31063 }, { "epoch": 1.03, "grad_norm": 0.4640277922153473, "learning_rate": 0.00044079440039008176, "loss": 1.7453, "step": 31064 }, { "epoch": 1.03, "grad_norm": 0.4647572934627533, "learning_rate": 0.00044078517076178185, "loss": 1.8197, "step": 31065 }, { "epoch": 1.03, "grad_norm": 0.45778846740722656, "learning_rate": 0.00044077594096258933, "loss": 1.7507, "step": 31066 }, { "epoch": 1.03, "grad_norm": 0.4789915382862091, "learning_rate": 0.00044076671099251566, "loss": 1.8228, "step": 31067 }, { "epoch": 1.03, "grad_norm": 0.47798582911491394, "learning_rate": 0.0004407574808515719, "loss": 1.8528, "step": 31068 }, { "epoch": 1.03, "grad_norm": 0.45368948578834534, "learning_rate": 0.0004407482505397693, "loss": 1.7893, "step": 31069 }, { "epoch": 1.03, "grad_norm": 0.5197219252586365, "learning_rate": 0.00044073902005711905, "loss": 1.8192, "step": 31070 }, { "epoch": 1.03, "grad_norm": 0.46809935569763184, "learning_rate": 0.0004407297894036324, "loss": 1.7104, "step": 31071 }, { "epoch": 1.03, "grad_norm": 0.4779549837112427, "learning_rate": 0.00044072055857932057, "loss": 1.7686, "step": 31072 }, { "epoch": 1.03, "grad_norm": 0.5493735074996948, "learning_rate": 0.0004407113275841947, "loss": 1.8211, "step": 31073 }, { "epoch": 1.03, "grad_norm": 0.45152515172958374, "learning_rate": 0.00044070209641826596, "loss": 1.8276, "step": 31074 }, { "epoch": 1.03, "grad_norm": 0.467580646276474, "learning_rate": 0.0004406928650815456, "loss": 1.745, "step": 31075 }, { "epoch": 1.03, "grad_norm": 0.4696712791919708, "learning_rate": 0.0004406836335740448, "loss": 1.7574, "step": 31076 }, { "epoch": 1.03, "grad_norm": 0.481018602848053, "learning_rate": 0.00044067440189577485, "loss": 1.8107, "step": 31077 }, { "epoch": 1.03, "grad_norm": 0.46516454219818115, "learning_rate": 0.000440665170046747, "loss": 1.721, "step": 31078 }, { "epoch": 1.03, "grad_norm": 0.44615957140922546, "learning_rate": 0.00044065593802697226, "loss": 1.7458, "step": 31079 }, { "epoch": 1.03, "grad_norm": 0.46539488434791565, "learning_rate": 0.0004406467058364619, "loss": 1.8518, "step": 31080 }, { "epoch": 1.03, "grad_norm": 0.4712517559528351, "learning_rate": 0.0004406374734752272, "loss": 1.7343, "step": 31081 }, { "epoch": 1.03, "grad_norm": 0.49701017141342163, "learning_rate": 0.0004406282409432793, "loss": 1.8231, "step": 31082 }, { "epoch": 1.03, "grad_norm": 0.4626403748989105, "learning_rate": 0.0004406190082406295, "loss": 1.8306, "step": 31083 }, { "epoch": 1.03, "grad_norm": 0.4642472565174103, "learning_rate": 0.00044060977536728883, "loss": 1.7151, "step": 31084 }, { "epoch": 1.03, "grad_norm": 0.4603032171726227, "learning_rate": 0.00044060054232326873, "loss": 1.8354, "step": 31085 }, { "epoch": 1.03, "grad_norm": 0.45081743597984314, "learning_rate": 0.0004405913091085802, "loss": 1.8207, "step": 31086 }, { "epoch": 1.03, "grad_norm": 0.4512783885002136, "learning_rate": 0.0004405820757232346, "loss": 1.7459, "step": 31087 }, { "epoch": 1.03, "grad_norm": 0.451328843832016, "learning_rate": 0.0004405728421672431, "loss": 1.7595, "step": 31088 }, { "epoch": 1.03, "grad_norm": 0.44965970516204834, "learning_rate": 0.00044056360844061684, "loss": 1.8069, "step": 31089 }, { "epoch": 1.03, "grad_norm": 0.4588596224784851, "learning_rate": 0.0004405543745433671, "loss": 1.7313, "step": 31090 }, { "epoch": 1.03, "grad_norm": 0.4523562788963318, "learning_rate": 0.00044054514047550496, "loss": 1.7476, "step": 31091 }, { "epoch": 1.03, "grad_norm": 0.4678713381290436, "learning_rate": 0.0004405359062370418, "loss": 1.8581, "step": 31092 }, { "epoch": 1.03, "grad_norm": 0.45994192361831665, "learning_rate": 0.0004405266718279888, "loss": 1.7962, "step": 31093 }, { "epoch": 1.03, "grad_norm": 0.46486181020736694, "learning_rate": 0.00044051743724835707, "loss": 1.8421, "step": 31094 }, { "epoch": 1.03, "grad_norm": 0.4528995454311371, "learning_rate": 0.00044050820249815783, "loss": 1.8224, "step": 31095 }, { "epoch": 1.03, "grad_norm": 0.4470807611942291, "learning_rate": 0.0004404989675774024, "loss": 1.812, "step": 31096 }, { "epoch": 1.03, "grad_norm": 0.4619253873825073, "learning_rate": 0.00044048973248610196, "loss": 1.8087, "step": 31097 }, { "epoch": 1.03, "grad_norm": 0.44376352429389954, "learning_rate": 0.00044048049722426766, "loss": 1.8169, "step": 31098 }, { "epoch": 1.03, "grad_norm": 0.4478253126144409, "learning_rate": 0.0004404712617919107, "loss": 1.8187, "step": 31099 }, { "epoch": 1.03, "grad_norm": 0.4444994032382965, "learning_rate": 0.00044046202618904233, "loss": 1.7559, "step": 31100 }, { "epoch": 1.03, "grad_norm": 0.4507574439048767, "learning_rate": 0.00044045279041567375, "loss": 1.7322, "step": 31101 }, { "epoch": 1.03, "grad_norm": 0.46311256289482117, "learning_rate": 0.00044044355447181617, "loss": 1.8251, "step": 31102 }, { "epoch": 1.03, "grad_norm": 0.45197373628616333, "learning_rate": 0.0004404343183574809, "loss": 1.8274, "step": 31103 }, { "epoch": 1.03, "grad_norm": 0.4587043821811676, "learning_rate": 0.000440425082072679, "loss": 1.7852, "step": 31104 }, { "epoch": 1.03, "grad_norm": 0.4604502320289612, "learning_rate": 0.0004404158456174217, "loss": 1.8129, "step": 31105 }, { "epoch": 1.03, "grad_norm": 0.46136391162872314, "learning_rate": 0.00044040660899172024, "loss": 1.7661, "step": 31106 }, { "epoch": 1.03, "grad_norm": 0.48246854543685913, "learning_rate": 0.0004403973721955859, "loss": 1.8546, "step": 31107 }, { "epoch": 1.03, "grad_norm": 0.444635272026062, "learning_rate": 0.0004403881352290299, "loss": 1.7807, "step": 31108 }, { "epoch": 1.04, "grad_norm": 0.44777071475982666, "learning_rate": 0.00044037889809206323, "loss": 1.7718, "step": 31109 }, { "epoch": 1.04, "grad_norm": 0.46504127979278564, "learning_rate": 0.0004403696607846974, "loss": 1.8099, "step": 31110 }, { "epoch": 1.04, "grad_norm": 0.4928460121154785, "learning_rate": 0.00044036042330694347, "loss": 1.7629, "step": 31111 }, { "epoch": 1.04, "grad_norm": 0.46446821093559265, "learning_rate": 0.0004403511856588126, "loss": 1.8389, "step": 31112 }, { "epoch": 1.04, "grad_norm": 0.4533320367336273, "learning_rate": 0.0004403419478403161, "loss": 1.7175, "step": 31113 }, { "epoch": 1.04, "grad_norm": 0.472791850566864, "learning_rate": 0.00044033270985146514, "loss": 1.7767, "step": 31114 }, { "epoch": 1.04, "grad_norm": 0.4674379229545593, "learning_rate": 0.00044032347169227097, "loss": 1.8013, "step": 31115 }, { "epoch": 1.04, "grad_norm": 0.460904598236084, "learning_rate": 0.00044031423336274474, "loss": 1.7991, "step": 31116 }, { "epoch": 1.04, "grad_norm": 0.4777343273162842, "learning_rate": 0.00044030499486289777, "loss": 1.7619, "step": 31117 }, { "epoch": 1.04, "grad_norm": 0.4778183698654175, "learning_rate": 0.0004402957561927412, "loss": 1.77, "step": 31118 }, { "epoch": 1.04, "grad_norm": 0.46745726466178894, "learning_rate": 0.0004402865173522862, "loss": 1.7327, "step": 31119 }, { "epoch": 1.04, "grad_norm": 0.4482564330101013, "learning_rate": 0.0004402772783415441, "loss": 1.7867, "step": 31120 }, { "epoch": 1.04, "grad_norm": 0.47192493081092834, "learning_rate": 0.0004402680391605259, "loss": 1.7625, "step": 31121 }, { "epoch": 1.04, "grad_norm": 0.4682160019874573, "learning_rate": 0.0004402587998092432, "loss": 1.761, "step": 31122 }, { "epoch": 1.04, "grad_norm": 0.4559980034828186, "learning_rate": 0.00044024956028770683, "loss": 1.824, "step": 31123 }, { "epoch": 1.04, "grad_norm": 0.46099716424942017, "learning_rate": 0.00044024032059592817, "loss": 1.7494, "step": 31124 }, { "epoch": 1.04, "grad_norm": 0.4755333364009857, "learning_rate": 0.0004402310807339185, "loss": 1.8366, "step": 31125 }, { "epoch": 1.04, "grad_norm": 0.45145294070243835, "learning_rate": 0.0004402218407016889, "loss": 1.7778, "step": 31126 }, { "epoch": 1.04, "grad_norm": 0.4436148405075073, "learning_rate": 0.0004402126004992507, "loss": 1.7873, "step": 31127 }, { "epoch": 1.04, "grad_norm": 0.6898338198661804, "learning_rate": 0.00044020336012661507, "loss": 1.8286, "step": 31128 }, { "epoch": 1.04, "grad_norm": 0.45016545057296753, "learning_rate": 0.00044019411958379304, "loss": 1.7993, "step": 31129 }, { "epoch": 1.04, "grad_norm": 0.46453702449798584, "learning_rate": 0.0004401848788707962, "loss": 1.8241, "step": 31130 }, { "epoch": 1.04, "grad_norm": 0.4765159487724304, "learning_rate": 0.0004401756379876355, "loss": 1.7266, "step": 31131 }, { "epoch": 1.04, "grad_norm": 0.4789406657218933, "learning_rate": 0.00044016639693432224, "loss": 1.8162, "step": 31132 }, { "epoch": 1.04, "grad_norm": 0.4487856328487396, "learning_rate": 0.00044015715571086764, "loss": 1.8077, "step": 31133 }, { "epoch": 1.04, "grad_norm": 0.4735771715641022, "learning_rate": 0.0004401479143172829, "loss": 1.7907, "step": 31134 }, { "epoch": 1.04, "grad_norm": 0.4743267893791199, "learning_rate": 0.0004401386727535792, "loss": 1.7562, "step": 31135 }, { "epoch": 1.04, "grad_norm": 0.46544837951660156, "learning_rate": 0.0004401294310197679, "loss": 1.8001, "step": 31136 }, { "epoch": 1.04, "grad_norm": 0.46886202692985535, "learning_rate": 0.00044012018911586, "loss": 1.8061, "step": 31137 }, { "epoch": 1.04, "grad_norm": 0.4621066153049469, "learning_rate": 0.00044011094704186687, "loss": 1.8145, "step": 31138 }, { "epoch": 1.04, "grad_norm": 0.45866113901138306, "learning_rate": 0.0004401017047977997, "loss": 1.7379, "step": 31139 }, { "epoch": 1.04, "grad_norm": 0.47203174233436584, "learning_rate": 0.00044009246238366976, "loss": 1.7765, "step": 31140 }, { "epoch": 1.04, "grad_norm": 0.48362213373184204, "learning_rate": 0.00044008321979948815, "loss": 1.798, "step": 31141 }, { "epoch": 1.04, "grad_norm": 0.44366931915283203, "learning_rate": 0.00044007397704526617, "loss": 1.8524, "step": 31142 }, { "epoch": 1.04, "grad_norm": 0.4744581878185272, "learning_rate": 0.000440064734121015, "loss": 1.8841, "step": 31143 }, { "epoch": 1.04, "grad_norm": 0.45919570326805115, "learning_rate": 0.00044005549102674586, "loss": 1.8511, "step": 31144 }, { "epoch": 1.04, "grad_norm": 0.4652484953403473, "learning_rate": 0.0004400462477624701, "loss": 1.8174, "step": 31145 }, { "epoch": 1.04, "grad_norm": 0.4506897032260895, "learning_rate": 0.0004400370043281987, "loss": 1.7855, "step": 31146 }, { "epoch": 1.04, "grad_norm": 0.4663388133049011, "learning_rate": 0.0004400277607239431, "loss": 1.6363, "step": 31147 }, { "epoch": 1.04, "grad_norm": 0.4585283100605011, "learning_rate": 0.00044001851694971435, "loss": 1.8213, "step": 31148 }, { "epoch": 1.04, "grad_norm": 0.4673066735267639, "learning_rate": 0.00044000927300552384, "loss": 1.7984, "step": 31149 }, { "epoch": 1.04, "grad_norm": 0.43874499201774597, "learning_rate": 0.00044000002889138263, "loss": 1.7203, "step": 31150 }, { "epoch": 1.04, "grad_norm": 0.45274099707603455, "learning_rate": 0.00043999078460730206, "loss": 1.7655, "step": 31151 }, { "epoch": 1.04, "grad_norm": 0.4763939380645752, "learning_rate": 0.0004399815401532933, "loss": 1.8131, "step": 31152 }, { "epoch": 1.04, "grad_norm": 0.45946958661079407, "learning_rate": 0.00043997229552936753, "loss": 1.7251, "step": 31153 }, { "epoch": 1.04, "grad_norm": 0.463756799697876, "learning_rate": 0.00043996305073553614, "loss": 1.7814, "step": 31154 }, { "epoch": 1.04, "grad_norm": 0.45958900451660156, "learning_rate": 0.0004399538057718101, "loss": 1.7457, "step": 31155 }, { "epoch": 1.04, "grad_norm": 0.47706979513168335, "learning_rate": 0.00043994456063820085, "loss": 1.8153, "step": 31156 }, { "epoch": 1.04, "grad_norm": 0.470880925655365, "learning_rate": 0.00043993531533471935, "loss": 1.7954, "step": 31157 }, { "epoch": 1.04, "grad_norm": 0.4495297074317932, "learning_rate": 0.0004399260698613772, "loss": 1.7748, "step": 31158 }, { "epoch": 1.04, "grad_norm": 0.4470214545726776, "learning_rate": 0.00043991682421818534, "loss": 1.7975, "step": 31159 }, { "epoch": 1.04, "grad_norm": 0.4724263846874237, "learning_rate": 0.0004399075784051551, "loss": 1.7164, "step": 31160 }, { "epoch": 1.04, "grad_norm": 0.4607354998588562, "learning_rate": 0.0004398983324222976, "loss": 1.7697, "step": 31161 }, { "epoch": 1.04, "grad_norm": 0.4548477530479431, "learning_rate": 0.0004398890862696242, "loss": 1.8178, "step": 31162 }, { "epoch": 1.04, "grad_norm": 0.46427011489868164, "learning_rate": 0.000439879839947146, "loss": 1.6978, "step": 31163 }, { "epoch": 1.04, "grad_norm": 0.4462815523147583, "learning_rate": 0.0004398705934548743, "loss": 1.797, "step": 31164 }, { "epoch": 1.04, "grad_norm": 0.46187707781791687, "learning_rate": 0.0004398613467928204, "loss": 1.7688, "step": 31165 }, { "epoch": 1.04, "grad_norm": 0.4626659154891968, "learning_rate": 0.0004398520999609954, "loss": 1.8219, "step": 31166 }, { "epoch": 1.04, "grad_norm": 0.4512738585472107, "learning_rate": 0.00043984285295941053, "loss": 1.8001, "step": 31167 }, { "epoch": 1.04, "grad_norm": 0.437031090259552, "learning_rate": 0.000439833605788077, "loss": 1.7938, "step": 31168 }, { "epoch": 1.04, "grad_norm": 0.4566037356853485, "learning_rate": 0.00043982435844700614, "loss": 1.8531, "step": 31169 }, { "epoch": 1.04, "grad_norm": 0.45063161849975586, "learning_rate": 0.00043981511093620914, "loss": 1.7852, "step": 31170 }, { "epoch": 1.04, "grad_norm": 0.44290441274642944, "learning_rate": 0.00043980586325569705, "loss": 1.7342, "step": 31171 }, { "epoch": 1.04, "grad_norm": 0.44318634271621704, "learning_rate": 0.00043979661540548135, "loss": 1.7844, "step": 31172 }, { "epoch": 1.04, "grad_norm": 0.4504907727241516, "learning_rate": 0.00043978736738557316, "loss": 1.7035, "step": 31173 }, { "epoch": 1.04, "grad_norm": 0.45787274837493896, "learning_rate": 0.00043977811919598364, "loss": 1.7612, "step": 31174 }, { "epoch": 1.04, "grad_norm": 0.45433568954467773, "learning_rate": 0.00043976887083672423, "loss": 1.8089, "step": 31175 }, { "epoch": 1.04, "grad_norm": 0.43531015515327454, "learning_rate": 0.0004397596223078058, "loss": 1.7757, "step": 31176 }, { "epoch": 1.04, "grad_norm": 0.45419666171073914, "learning_rate": 0.0004397503736092399, "loss": 1.7696, "step": 31177 }, { "epoch": 1.04, "grad_norm": 0.4695499837398529, "learning_rate": 0.00043974112474103754, "loss": 1.6912, "step": 31178 }, { "epoch": 1.04, "grad_norm": 0.45631179213523865, "learning_rate": 0.0004397318757032102, "loss": 1.8496, "step": 31179 }, { "epoch": 1.04, "grad_norm": 0.44333958625793457, "learning_rate": 0.0004397226264957688, "loss": 1.7867, "step": 31180 }, { "epoch": 1.04, "grad_norm": 0.4653630256652832, "learning_rate": 0.0004397133771187248, "loss": 1.7399, "step": 31181 }, { "epoch": 1.04, "grad_norm": 0.4541458785533905, "learning_rate": 0.0004397041275720894, "loss": 1.7594, "step": 31182 }, { "epoch": 1.04, "grad_norm": 0.4487106502056122, "learning_rate": 0.00043969487785587363, "loss": 1.7961, "step": 31183 }, { "epoch": 1.04, "grad_norm": 0.4653395116329193, "learning_rate": 0.000439685627970089, "loss": 1.8481, "step": 31184 }, { "epoch": 1.04, "grad_norm": 0.46188247203826904, "learning_rate": 0.00043967637791474654, "loss": 1.7739, "step": 31185 }, { "epoch": 1.04, "grad_norm": 0.4725641906261444, "learning_rate": 0.0004396671276898575, "loss": 1.8074, "step": 31186 }, { "epoch": 1.04, "grad_norm": 0.4535180926322937, "learning_rate": 0.00043965787729543317, "loss": 1.7999, "step": 31187 }, { "epoch": 1.04, "grad_norm": 0.4636583924293518, "learning_rate": 0.0004396486267314848, "loss": 1.8532, "step": 31188 }, { "epoch": 1.04, "grad_norm": 0.46102210879325867, "learning_rate": 0.00043963937599802354, "loss": 1.8382, "step": 31189 }, { "epoch": 1.04, "grad_norm": 0.4488663673400879, "learning_rate": 0.00043963012509506066, "loss": 1.7703, "step": 31190 }, { "epoch": 1.04, "grad_norm": 0.4726674556732178, "learning_rate": 0.0004396208740226074, "loss": 1.6838, "step": 31191 }, { "epoch": 1.04, "grad_norm": 0.5002368688583374, "learning_rate": 0.00043961162278067487, "loss": 1.7478, "step": 31192 }, { "epoch": 1.04, "grad_norm": 0.47209128737449646, "learning_rate": 0.0004396023713692745, "loss": 1.8, "step": 31193 }, { "epoch": 1.04, "grad_norm": 0.4667816460132599, "learning_rate": 0.00043959311978841736, "loss": 1.7251, "step": 31194 }, { "epoch": 1.04, "grad_norm": 0.485383540391922, "learning_rate": 0.00043958386803811487, "loss": 1.8426, "step": 31195 }, { "epoch": 1.04, "grad_norm": 0.4659315347671509, "learning_rate": 0.0004395746161183781, "loss": 1.7882, "step": 31196 }, { "epoch": 1.04, "grad_norm": 0.4511210024356842, "learning_rate": 0.00043956536402921817, "loss": 1.809, "step": 31197 }, { "epoch": 1.04, "grad_norm": 0.45859700441360474, "learning_rate": 0.00043955611177064647, "loss": 1.8516, "step": 31198 }, { "epoch": 1.04, "grad_norm": 0.4638483226299286, "learning_rate": 0.0004395468593426743, "loss": 1.7711, "step": 31199 }, { "epoch": 1.04, "grad_norm": 0.457565575838089, "learning_rate": 0.00043953760674531284, "loss": 1.8364, "step": 31200 }, { "epoch": 1.04, "grad_norm": 0.4464971423149109, "learning_rate": 0.00043952835397857313, "loss": 1.8425, "step": 31201 }, { "epoch": 1.04, "grad_norm": 0.4646821618080139, "learning_rate": 0.0004395191010424667, "loss": 1.8113, "step": 31202 }, { "epoch": 1.04, "grad_norm": 0.4617994725704193, "learning_rate": 0.0004395098479370046, "loss": 1.8118, "step": 31203 }, { "epoch": 1.04, "grad_norm": 0.4566879868507385, "learning_rate": 0.00043950059466219807, "loss": 1.7777, "step": 31204 }, { "epoch": 1.04, "grad_norm": 0.4682982563972473, "learning_rate": 0.0004394913412180584, "loss": 1.7976, "step": 31205 }, { "epoch": 1.04, "grad_norm": 0.45067068934440613, "learning_rate": 0.0004394820876045968, "loss": 1.8283, "step": 31206 }, { "epoch": 1.04, "grad_norm": 0.4429166913032532, "learning_rate": 0.0004394728338218245, "loss": 1.7956, "step": 31207 }, { "epoch": 1.04, "grad_norm": 0.4638071060180664, "learning_rate": 0.0004394635798697527, "loss": 1.765, "step": 31208 }, { "epoch": 1.04, "grad_norm": 0.4563111960887909, "learning_rate": 0.00043945432574839265, "loss": 1.7845, "step": 31209 }, { "epoch": 1.04, "grad_norm": 0.4505353569984436, "learning_rate": 0.0004394450714577557, "loss": 1.8587, "step": 31210 }, { "epoch": 1.04, "grad_norm": 0.4597976803779602, "learning_rate": 0.00043943581699785284, "loss": 1.8059, "step": 31211 }, { "epoch": 1.04, "grad_norm": 0.4613972306251526, "learning_rate": 0.00043942656236869553, "loss": 1.799, "step": 31212 }, { "epoch": 1.04, "grad_norm": 0.4740985929965973, "learning_rate": 0.00043941730757029486, "loss": 1.8308, "step": 31213 }, { "epoch": 1.04, "grad_norm": 0.4821615219116211, "learning_rate": 0.0004394080526026622, "loss": 1.844, "step": 31214 }, { "epoch": 1.04, "grad_norm": 0.4586086869239807, "learning_rate": 0.00043939879746580864, "loss": 1.7811, "step": 31215 }, { "epoch": 1.04, "grad_norm": 0.45604702830314636, "learning_rate": 0.0004393895421597455, "loss": 1.7823, "step": 31216 }, { "epoch": 1.04, "grad_norm": 0.4632365107536316, "learning_rate": 0.00043938028668448394, "loss": 1.8189, "step": 31217 }, { "epoch": 1.04, "grad_norm": 0.44458574056625366, "learning_rate": 0.0004393710310400354, "loss": 1.7557, "step": 31218 }, { "epoch": 1.04, "grad_norm": 0.44971656799316406, "learning_rate": 0.0004393617752264108, "loss": 1.7894, "step": 31219 }, { "epoch": 1.04, "grad_norm": 0.46648579835891724, "learning_rate": 0.00043935251924362163, "loss": 1.8522, "step": 31220 }, { "epoch": 1.04, "grad_norm": 0.45798689126968384, "learning_rate": 0.000439343263091679, "loss": 1.7665, "step": 31221 }, { "epoch": 1.04, "grad_norm": 0.4579032361507416, "learning_rate": 0.00043933400677059417, "loss": 1.7045, "step": 31222 }, { "epoch": 1.04, "grad_norm": 0.45194265246391296, "learning_rate": 0.0004393247502803784, "loss": 1.792, "step": 31223 }, { "epoch": 1.04, "grad_norm": 0.4518543481826782, "learning_rate": 0.0004393154936210429, "loss": 1.7435, "step": 31224 }, { "epoch": 1.04, "grad_norm": 0.4521549940109253, "learning_rate": 0.00043930623679259897, "loss": 1.7933, "step": 31225 }, { "epoch": 1.04, "grad_norm": 0.4472784996032715, "learning_rate": 0.00043929697979505774, "loss": 1.7804, "step": 31226 }, { "epoch": 1.04, "grad_norm": 0.47440841794013977, "learning_rate": 0.0004392877226284306, "loss": 1.7899, "step": 31227 }, { "epoch": 1.04, "grad_norm": 1.096361756324768, "learning_rate": 0.00043927846529272853, "loss": 1.8205, "step": 31228 }, { "epoch": 1.04, "grad_norm": 0.4633645713329315, "learning_rate": 0.000439269207787963, "loss": 1.835, "step": 31229 }, { "epoch": 1.04, "grad_norm": 0.4425329864025116, "learning_rate": 0.00043925995011414524, "loss": 1.7285, "step": 31230 }, { "epoch": 1.04, "grad_norm": 0.47880879044532776, "learning_rate": 0.0004392506922712863, "loss": 1.7513, "step": 31231 }, { "epoch": 1.04, "grad_norm": 0.4521506428718567, "learning_rate": 0.0004392414342593976, "loss": 1.764, "step": 31232 }, { "epoch": 1.04, "grad_norm": 0.4620203375816345, "learning_rate": 0.00043923217607849023, "loss": 1.7528, "step": 31233 }, { "epoch": 1.04, "grad_norm": 0.45030564069747925, "learning_rate": 0.0004392229177285756, "loss": 1.7637, "step": 31234 }, { "epoch": 1.04, "grad_norm": 0.45004335045814514, "learning_rate": 0.00043921365920966487, "loss": 1.7993, "step": 31235 }, { "epoch": 1.04, "grad_norm": 0.45071887969970703, "learning_rate": 0.00043920440052176924, "loss": 1.7955, "step": 31236 }, { "epoch": 1.04, "grad_norm": 0.47042351961135864, "learning_rate": 0.00043919514166490005, "loss": 1.7872, "step": 31237 }, { "epoch": 1.04, "grad_norm": 0.4607640504837036, "learning_rate": 0.00043918588263906836, "loss": 1.8652, "step": 31238 }, { "epoch": 1.04, "grad_norm": 0.4578554630279541, "learning_rate": 0.0004391766234442856, "loss": 1.7511, "step": 31239 }, { "epoch": 1.04, "grad_norm": 0.450936496257782, "learning_rate": 0.0004391673640805629, "loss": 1.721, "step": 31240 }, { "epoch": 1.04, "grad_norm": 0.44686245918273926, "learning_rate": 0.0004391581045479115, "loss": 1.7571, "step": 31241 }, { "epoch": 1.04, "grad_norm": 0.46441414952278137, "learning_rate": 0.0004391488448463426, "loss": 1.7714, "step": 31242 }, { "epoch": 1.04, "grad_norm": 0.5519230961799622, "learning_rate": 0.0004391395849758676, "loss": 1.8056, "step": 31243 }, { "epoch": 1.04, "grad_norm": 0.4839923679828644, "learning_rate": 0.0004391303249364977, "loss": 1.7942, "step": 31244 }, { "epoch": 1.04, "grad_norm": 0.4616822600364685, "learning_rate": 0.000439121064728244, "loss": 1.7791, "step": 31245 }, { "epoch": 1.04, "grad_norm": 0.4466856122016907, "learning_rate": 0.00043911180435111783, "loss": 1.883, "step": 31246 }, { "epoch": 1.04, "grad_norm": 1.0431150197982788, "learning_rate": 0.00043910254380513045, "loss": 1.8439, "step": 31247 }, { "epoch": 1.04, "grad_norm": 0.43990257382392883, "learning_rate": 0.00043909328309029306, "loss": 1.7827, "step": 31248 }, { "epoch": 1.04, "grad_norm": 0.45883527398109436, "learning_rate": 0.0004390840222066169, "loss": 1.8527, "step": 31249 }, { "epoch": 1.04, "grad_norm": 0.468952476978302, "learning_rate": 0.0004390747611541133, "loss": 1.7293, "step": 31250 }, { "epoch": 1.04, "grad_norm": 0.45826753973960876, "learning_rate": 0.0004390654999327934, "loss": 1.7951, "step": 31251 }, { "epoch": 1.04, "grad_norm": 0.4579508602619171, "learning_rate": 0.0004390562385426684, "loss": 1.7515, "step": 31252 }, { "epoch": 1.04, "grad_norm": 0.4626437723636627, "learning_rate": 0.00043904697698374966, "loss": 1.8527, "step": 31253 }, { "epoch": 1.04, "grad_norm": 0.488540381193161, "learning_rate": 0.00043903771525604843, "loss": 1.8664, "step": 31254 }, { "epoch": 1.04, "grad_norm": 0.43049752712249756, "learning_rate": 0.00043902845335957584, "loss": 1.8412, "step": 31255 }, { "epoch": 1.04, "grad_norm": 0.44884711503982544, "learning_rate": 0.0004390191912943432, "loss": 1.7539, "step": 31256 }, { "epoch": 1.04, "grad_norm": 0.45124295353889465, "learning_rate": 0.0004390099290603618, "loss": 1.8268, "step": 31257 }, { "epoch": 1.04, "grad_norm": 0.4676975905895233, "learning_rate": 0.0004390006666576427, "loss": 1.7947, "step": 31258 }, { "epoch": 1.04, "grad_norm": 0.4701378643512726, "learning_rate": 0.0004389914040861974, "loss": 1.8224, "step": 31259 }, { "epoch": 1.04, "grad_norm": 0.45668792724609375, "learning_rate": 0.000438982141346037, "loss": 1.7679, "step": 31260 }, { "epoch": 1.04, "grad_norm": 0.4464353919029236, "learning_rate": 0.0004389728784371727, "loss": 1.732, "step": 31261 }, { "epoch": 1.04, "grad_norm": 0.43910643458366394, "learning_rate": 0.0004389636153596159, "loss": 1.7817, "step": 31262 }, { "epoch": 1.04, "grad_norm": 0.7140321135520935, "learning_rate": 0.0004389543521133776, "loss": 1.8512, "step": 31263 }, { "epoch": 1.04, "grad_norm": 0.46257102489471436, "learning_rate": 0.0004389450886984693, "loss": 1.7669, "step": 31264 }, { "epoch": 1.04, "grad_norm": 0.469892293214798, "learning_rate": 0.0004389358251149021, "loss": 1.7911, "step": 31265 }, { "epoch": 1.04, "grad_norm": 0.461336225271225, "learning_rate": 0.0004389265613626873, "loss": 1.7324, "step": 31266 }, { "epoch": 1.04, "grad_norm": 0.48983103036880493, "learning_rate": 0.00043891729744183607, "loss": 1.8689, "step": 31267 }, { "epoch": 1.04, "grad_norm": 0.46256324648857117, "learning_rate": 0.0004389080333523598, "loss": 1.8256, "step": 31268 }, { "epoch": 1.04, "grad_norm": 0.47547364234924316, "learning_rate": 0.00043889876909426956, "loss": 1.7808, "step": 31269 }, { "epoch": 1.04, "grad_norm": 0.45647263526916504, "learning_rate": 0.0004388895046675767, "loss": 1.8545, "step": 31270 }, { "epoch": 1.04, "grad_norm": 0.4600640833377838, "learning_rate": 0.0004388802400722925, "loss": 1.6977, "step": 31271 }, { "epoch": 1.04, "grad_norm": 0.4596411883831024, "learning_rate": 0.0004388709753084281, "loss": 1.7405, "step": 31272 }, { "epoch": 1.04, "grad_norm": 0.47253698110580444, "learning_rate": 0.0004388617103759948, "loss": 1.8089, "step": 31273 }, { "epoch": 1.04, "grad_norm": 0.4736560583114624, "learning_rate": 0.00043885244527500387, "loss": 1.8213, "step": 31274 }, { "epoch": 1.04, "grad_norm": 0.46839869022369385, "learning_rate": 0.0004388431800054665, "loss": 1.8401, "step": 31275 }, { "epoch": 1.04, "grad_norm": 0.4936581552028656, "learning_rate": 0.000438833914567394, "loss": 1.826, "step": 31276 }, { "epoch": 1.04, "grad_norm": 0.45629391074180603, "learning_rate": 0.0004388246489607975, "loss": 1.7793, "step": 31277 }, { "epoch": 1.04, "grad_norm": 0.4543859660625458, "learning_rate": 0.0004388153831856885, "loss": 1.8453, "step": 31278 }, { "epoch": 1.04, "grad_norm": 0.4651222825050354, "learning_rate": 0.00043880611724207794, "loss": 1.7498, "step": 31279 }, { "epoch": 1.04, "grad_norm": 0.45150822401046753, "learning_rate": 0.0004387968511299772, "loss": 1.8207, "step": 31280 }, { "epoch": 1.04, "grad_norm": 0.4617231488227844, "learning_rate": 0.00043878758484939753, "loss": 1.8284, "step": 31281 }, { "epoch": 1.04, "grad_norm": 0.49108603596687317, "learning_rate": 0.0004387783184003503, "loss": 1.8422, "step": 31282 }, { "epoch": 1.04, "grad_norm": 0.47883570194244385, "learning_rate": 0.00043876905178284655, "loss": 1.8272, "step": 31283 }, { "epoch": 1.04, "grad_norm": 0.4662151038646698, "learning_rate": 0.0004387597849968976, "loss": 1.8193, "step": 31284 }, { "epoch": 1.04, "grad_norm": 0.4635860323905945, "learning_rate": 0.00043875051804251477, "loss": 1.7522, "step": 31285 }, { "epoch": 1.04, "grad_norm": 0.48726293444633484, "learning_rate": 0.0004387412509197092, "loss": 1.7321, "step": 31286 }, { "epoch": 1.04, "grad_norm": 0.4460429251194, "learning_rate": 0.00043873198362849233, "loss": 1.8298, "step": 31287 }, { "epoch": 1.04, "grad_norm": 0.4533194303512573, "learning_rate": 0.0004387227161688751, "loss": 1.7389, "step": 31288 }, { "epoch": 1.04, "grad_norm": 0.4793980121612549, "learning_rate": 0.000438713448540869, "loss": 1.7919, "step": 31289 }, { "epoch": 1.04, "grad_norm": 0.4720735549926758, "learning_rate": 0.00043870418074448524, "loss": 1.8947, "step": 31290 }, { "epoch": 1.04, "grad_norm": 0.45090219378471375, "learning_rate": 0.000438694912779735, "loss": 1.7455, "step": 31291 }, { "epoch": 1.04, "grad_norm": 0.4467736780643463, "learning_rate": 0.0004386856446466296, "loss": 1.7767, "step": 31292 }, { "epoch": 1.04, "grad_norm": 0.4369738698005676, "learning_rate": 0.0004386763763451802, "loss": 1.7501, "step": 31293 }, { "epoch": 1.04, "grad_norm": 0.4486614465713501, "learning_rate": 0.0004386671078753982, "loss": 1.7639, "step": 31294 }, { "epoch": 1.04, "grad_norm": 1.18893301486969, "learning_rate": 0.00043865783923729463, "loss": 1.7969, "step": 31295 }, { "epoch": 1.04, "grad_norm": 0.4582245945930481, "learning_rate": 0.00043864857043088106, "loss": 1.7239, "step": 31296 }, { "epoch": 1.04, "grad_norm": 0.45515939593315125, "learning_rate": 0.00043863930145616846, "loss": 1.7809, "step": 31297 }, { "epoch": 1.04, "grad_norm": 0.4557628631591797, "learning_rate": 0.00043863003231316817, "loss": 1.7645, "step": 31298 }, { "epoch": 1.04, "grad_norm": 0.4599658250808716, "learning_rate": 0.00043862076300189146, "loss": 1.7889, "step": 31299 }, { "epoch": 1.04, "grad_norm": 0.46517014503479004, "learning_rate": 0.00043861149352234955, "loss": 1.8484, "step": 31300 }, { "epoch": 1.04, "grad_norm": 0.45195382833480835, "learning_rate": 0.00043860222387455373, "loss": 1.7834, "step": 31301 }, { "epoch": 1.04, "grad_norm": 0.458815336227417, "learning_rate": 0.0004385929540585152, "loss": 1.808, "step": 31302 }, { "epoch": 1.04, "grad_norm": 0.4565371572971344, "learning_rate": 0.0004385836840742453, "loss": 1.8479, "step": 31303 }, { "epoch": 1.04, "grad_norm": 0.45896005630493164, "learning_rate": 0.00043857441392175517, "loss": 1.7359, "step": 31304 }, { "epoch": 1.04, "grad_norm": 0.44662466645240784, "learning_rate": 0.00043856514360105615, "loss": 1.7848, "step": 31305 }, { "epoch": 1.04, "grad_norm": 0.4403603672981262, "learning_rate": 0.0004385558731121595, "loss": 1.7934, "step": 31306 }, { "epoch": 1.04, "grad_norm": 0.47633764147758484, "learning_rate": 0.00043854660245507644, "loss": 1.7903, "step": 31307 }, { "epoch": 1.04, "grad_norm": 0.8306064009666443, "learning_rate": 0.00043853733162981814, "loss": 1.7723, "step": 31308 }, { "epoch": 1.04, "grad_norm": 0.4755919277667999, "learning_rate": 0.0004385280606363959, "loss": 1.7178, "step": 31309 }, { "epoch": 1.04, "grad_norm": 0.44945141673088074, "learning_rate": 0.00043851878947482107, "loss": 1.7165, "step": 31310 }, { "epoch": 1.04, "grad_norm": 0.4616605043411255, "learning_rate": 0.00043850951814510485, "loss": 1.791, "step": 31311 }, { "epoch": 1.04, "grad_norm": 0.46922820806503296, "learning_rate": 0.00043850024664725853, "loss": 1.8104, "step": 31312 }, { "epoch": 1.04, "grad_norm": 0.45074668526649475, "learning_rate": 0.0004384909749812933, "loss": 1.7878, "step": 31313 }, { "epoch": 1.04, "grad_norm": 0.4708220958709717, "learning_rate": 0.00043848170314722036, "loss": 1.7678, "step": 31314 }, { "epoch": 1.04, "grad_norm": 0.4528793692588806, "learning_rate": 0.00043847243114505106, "loss": 1.7722, "step": 31315 }, { "epoch": 1.04, "grad_norm": 0.4698544442653656, "learning_rate": 0.0004384631589747966, "loss": 1.7172, "step": 31316 }, { "epoch": 1.04, "grad_norm": 0.48453131318092346, "learning_rate": 0.00043845388663646834, "loss": 1.774, "step": 31317 }, { "epoch": 1.04, "grad_norm": 0.4489554464817047, "learning_rate": 0.0004384446141300774, "loss": 1.8081, "step": 31318 }, { "epoch": 1.04, "grad_norm": 0.46017277240753174, "learning_rate": 0.00043843534145563513, "loss": 1.7606, "step": 31319 }, { "epoch": 1.04, "grad_norm": 0.47581371665000916, "learning_rate": 0.0004384260686131527, "loss": 1.822, "step": 31320 }, { "epoch": 1.04, "grad_norm": 0.4697053134441376, "learning_rate": 0.0004384167956026415, "loss": 1.7635, "step": 31321 }, { "epoch": 1.04, "grad_norm": 0.4599388837814331, "learning_rate": 0.00043840752242411263, "loss": 1.7357, "step": 31322 }, { "epoch": 1.04, "grad_norm": 0.4522678554058075, "learning_rate": 0.0004383982490775775, "loss": 1.8688, "step": 31323 }, { "epoch": 1.04, "grad_norm": 0.46909990906715393, "learning_rate": 0.0004383889755630473, "loss": 1.8406, "step": 31324 }, { "epoch": 1.04, "grad_norm": 0.4398077726364136, "learning_rate": 0.0004383797018805332, "loss": 1.7349, "step": 31325 }, { "epoch": 1.04, "grad_norm": 0.46853217482566833, "learning_rate": 0.00043837042803004657, "loss": 1.8795, "step": 31326 }, { "epoch": 1.04, "grad_norm": 0.4792832136154175, "learning_rate": 0.00043836115401159857, "loss": 1.8389, "step": 31327 }, { "epoch": 1.04, "grad_norm": 0.45133423805236816, "learning_rate": 0.00043835187982520056, "loss": 1.7081, "step": 31328 }, { "epoch": 1.04, "grad_norm": 0.4575446546077728, "learning_rate": 0.00043834260547086375, "loss": 1.813, "step": 31329 }, { "epoch": 1.04, "grad_norm": 0.44721484184265137, "learning_rate": 0.00043833333094859937, "loss": 1.737, "step": 31330 }, { "epoch": 1.04, "grad_norm": 0.45017996430397034, "learning_rate": 0.0004383240562584188, "loss": 1.748, "step": 31331 }, { "epoch": 1.04, "grad_norm": 0.45916101336479187, "learning_rate": 0.0004383147814003332, "loss": 1.8534, "step": 31332 }, { "epoch": 1.04, "grad_norm": 0.4643361568450928, "learning_rate": 0.00043830550637435374, "loss": 1.7934, "step": 31333 }, { "epoch": 1.04, "grad_norm": 0.46465346217155457, "learning_rate": 0.0004382962311804918, "loss": 1.8507, "step": 31334 }, { "epoch": 1.04, "grad_norm": 0.46752774715423584, "learning_rate": 0.00043828695581875864, "loss": 1.8147, "step": 31335 }, { "epoch": 1.04, "grad_norm": 0.45702147483825684, "learning_rate": 0.00043827768028916543, "loss": 1.7384, "step": 31336 }, { "epoch": 1.04, "grad_norm": 0.4597753584384918, "learning_rate": 0.00043826840459172363, "loss": 1.8236, "step": 31337 }, { "epoch": 1.04, "grad_norm": 0.5043429732322693, "learning_rate": 0.0004382591287264443, "loss": 1.8489, "step": 31338 }, { "epoch": 1.04, "grad_norm": 0.4998561143875122, "learning_rate": 0.0004382498526933387, "loss": 1.7479, "step": 31339 }, { "epoch": 1.04, "grad_norm": 0.45610350370407104, "learning_rate": 0.00043824057649241815, "loss": 1.7747, "step": 31340 }, { "epoch": 1.04, "grad_norm": 0.48592981696128845, "learning_rate": 0.00043823130012369394, "loss": 1.7978, "step": 31341 }, { "epoch": 1.04, "grad_norm": 0.5054543614387512, "learning_rate": 0.00043822202358717734, "loss": 1.8034, "step": 31342 }, { "epoch": 1.04, "grad_norm": 0.47473040223121643, "learning_rate": 0.0004382127468828795, "loss": 1.7395, "step": 31343 }, { "epoch": 1.04, "grad_norm": 0.46453922986984253, "learning_rate": 0.0004382034700108118, "loss": 1.8237, "step": 31344 }, { "epoch": 1.04, "grad_norm": 0.4775235950946808, "learning_rate": 0.0004381941929709854, "loss": 1.7375, "step": 31345 }, { "epoch": 1.04, "grad_norm": 0.4666879177093506, "learning_rate": 0.00043818491576341165, "loss": 1.746, "step": 31346 }, { "epoch": 1.04, "grad_norm": 0.49208420515060425, "learning_rate": 0.00043817563838810183, "loss": 1.789, "step": 31347 }, { "epoch": 1.04, "grad_norm": 0.47474950551986694, "learning_rate": 0.000438166360845067, "loss": 1.7842, "step": 31348 }, { "epoch": 1.04, "grad_norm": 0.46385642886161804, "learning_rate": 0.00043815708313431866, "loss": 1.7601, "step": 31349 }, { "epoch": 1.04, "grad_norm": 0.4647929072380066, "learning_rate": 0.0004381478052558679, "loss": 1.8299, "step": 31350 }, { "epoch": 1.04, "grad_norm": 0.46958544850349426, "learning_rate": 0.0004381385272097262, "loss": 1.8331, "step": 31351 }, { "epoch": 1.04, "grad_norm": 0.4518936276435852, "learning_rate": 0.00043812924899590456, "loss": 1.8437, "step": 31352 }, { "epoch": 1.04, "grad_norm": 0.4601075351238251, "learning_rate": 0.0004381199706144144, "loss": 1.7297, "step": 31353 }, { "epoch": 1.04, "grad_norm": 0.4550265073776245, "learning_rate": 0.00043811069206526704, "loss": 1.7876, "step": 31354 }, { "epoch": 1.04, "grad_norm": 0.447235643863678, "learning_rate": 0.00043810141334847344, "loss": 1.7264, "step": 31355 }, { "epoch": 1.04, "grad_norm": 0.4590635299682617, "learning_rate": 0.00043809213446404526, "loss": 1.7624, "step": 31356 }, { "epoch": 1.04, "grad_norm": 0.4531936049461365, "learning_rate": 0.00043808285541199353, "loss": 1.8577, "step": 31357 }, { "epoch": 1.04, "grad_norm": 0.46128132939338684, "learning_rate": 0.00043807357619232956, "loss": 1.788, "step": 31358 }, { "epoch": 1.04, "grad_norm": 0.4539574384689331, "learning_rate": 0.0004380642968050646, "loss": 1.7375, "step": 31359 }, { "epoch": 1.04, "grad_norm": 0.4650605320930481, "learning_rate": 0.00043805501725020985, "loss": 1.7474, "step": 31360 }, { "epoch": 1.04, "grad_norm": 0.44604113698005676, "learning_rate": 0.0004380457375277768, "loss": 1.7459, "step": 31361 }, { "epoch": 1.04, "grad_norm": 0.4719466269016266, "learning_rate": 0.00043803645763777646, "loss": 1.7116, "step": 31362 }, { "epoch": 1.04, "grad_norm": 0.47506988048553467, "learning_rate": 0.0004380271775802202, "loss": 1.7623, "step": 31363 }, { "epoch": 1.04, "grad_norm": 0.4593288004398346, "learning_rate": 0.0004380178973551193, "loss": 1.8171, "step": 31364 }, { "epoch": 1.04, "grad_norm": 0.45335111021995544, "learning_rate": 0.000438008616962485, "loss": 1.7548, "step": 31365 }, { "epoch": 1.04, "grad_norm": 0.460827499628067, "learning_rate": 0.0004379993364023286, "loss": 1.7453, "step": 31366 }, { "epoch": 1.04, "grad_norm": 0.4664926528930664, "learning_rate": 0.00043799005567466136, "loss": 1.759, "step": 31367 }, { "epoch": 1.04, "grad_norm": 0.46090003848075867, "learning_rate": 0.0004379807747794945, "loss": 1.8107, "step": 31368 }, { "epoch": 1.04, "grad_norm": 0.4560549557209015, "learning_rate": 0.00043797149371683933, "loss": 1.804, "step": 31369 }, { "epoch": 1.04, "grad_norm": 0.45622384548187256, "learning_rate": 0.000437962212486707, "loss": 1.732, "step": 31370 }, { "epoch": 1.04, "grad_norm": 0.4535027742385864, "learning_rate": 0.00043795293108910894, "loss": 1.819, "step": 31371 }, { "epoch": 1.04, "grad_norm": 0.4558483362197876, "learning_rate": 0.0004379436495240564, "loss": 1.7608, "step": 31372 }, { "epoch": 1.04, "grad_norm": 0.4362909197807312, "learning_rate": 0.00043793436779156046, "loss": 1.7648, "step": 31373 }, { "epoch": 1.04, "grad_norm": 0.45820391178131104, "learning_rate": 0.00043792508589163265, "loss": 1.7915, "step": 31374 }, { "epoch": 1.04, "grad_norm": 0.47267839312553406, "learning_rate": 0.000437915803824284, "loss": 1.8068, "step": 31375 }, { "epoch": 1.04, "grad_norm": 0.45514655113220215, "learning_rate": 0.00043790652158952597, "loss": 1.8331, "step": 31376 }, { "epoch": 1.04, "grad_norm": 0.47012749314308167, "learning_rate": 0.0004378972391873697, "loss": 1.6917, "step": 31377 }, { "epoch": 1.04, "grad_norm": 0.4634126126766205, "learning_rate": 0.00043788795661782654, "loss": 1.8005, "step": 31378 }, { "epoch": 1.04, "grad_norm": 0.4689078629016876, "learning_rate": 0.0004378786738809078, "loss": 1.7462, "step": 31379 }, { "epoch": 1.04, "grad_norm": 0.45145270228385925, "learning_rate": 0.0004378693909766244, "loss": 1.7601, "step": 31380 }, { "epoch": 1.04, "grad_norm": 0.4720759093761444, "learning_rate": 0.0004378601079049881, "loss": 1.8251, "step": 31381 }, { "epoch": 1.04, "grad_norm": 0.4589717984199524, "learning_rate": 0.00043785082466600984, "loss": 1.909, "step": 31382 }, { "epoch": 1.04, "grad_norm": 0.4513137936592102, "learning_rate": 0.0004378415412597011, "loss": 1.7983, "step": 31383 }, { "epoch": 1.04, "grad_norm": 0.45043331384658813, "learning_rate": 0.0004378322576860729, "loss": 1.8079, "step": 31384 }, { "epoch": 1.04, "grad_norm": 0.45389267802238464, "learning_rate": 0.0004378229739451368, "loss": 1.8659, "step": 31385 }, { "epoch": 1.04, "grad_norm": 0.5076539516448975, "learning_rate": 0.0004378136900369038, "loss": 1.7681, "step": 31386 }, { "epoch": 1.04, "grad_norm": 0.45596957206726074, "learning_rate": 0.0004378044059613853, "loss": 1.8217, "step": 31387 }, { "epoch": 1.04, "grad_norm": 0.47462958097457886, "learning_rate": 0.0004377951217185926, "loss": 1.7443, "step": 31388 }, { "epoch": 1.04, "grad_norm": 0.4546235203742981, "learning_rate": 0.0004377858373085369, "loss": 1.8188, "step": 31389 }, { "epoch": 1.04, "grad_norm": 0.4877181649208069, "learning_rate": 0.00043777655273122946, "loss": 1.7755, "step": 31390 }, { "epoch": 1.04, "grad_norm": 0.44929271936416626, "learning_rate": 0.0004377672679866816, "loss": 1.8041, "step": 31391 }, { "epoch": 1.04, "grad_norm": 0.4506160318851471, "learning_rate": 0.0004377579830749046, "loss": 1.8311, "step": 31392 }, { "epoch": 1.04, "grad_norm": 0.47891053557395935, "learning_rate": 0.0004377486979959098, "loss": 1.7116, "step": 31393 }, { "epoch": 1.04, "grad_norm": 0.4982295334339142, "learning_rate": 0.00043773941274970825, "loss": 1.8138, "step": 31394 }, { "epoch": 1.04, "grad_norm": 0.49269548058509827, "learning_rate": 0.00043773012733631136, "loss": 1.8143, "step": 31395 }, { "epoch": 1.04, "grad_norm": 0.6118490695953369, "learning_rate": 0.0004377208417557304, "loss": 1.7962, "step": 31396 }, { "epoch": 1.04, "grad_norm": 0.4725789427757263, "learning_rate": 0.00043771155600797666, "loss": 1.7919, "step": 31397 }, { "epoch": 1.04, "grad_norm": 0.5136419534683228, "learning_rate": 0.0004377022700930613, "loss": 1.7365, "step": 31398 }, { "epoch": 1.04, "grad_norm": 0.5885258316993713, "learning_rate": 0.00043769298401099584, "loss": 1.7425, "step": 31399 }, { "epoch": 1.04, "grad_norm": 0.4422914683818817, "learning_rate": 0.00043768369776179123, "loss": 1.7816, "step": 31400 }, { "epoch": 1.04, "grad_norm": 0.49813392758369446, "learning_rate": 0.0004376744113454589, "loss": 1.8407, "step": 31401 }, { "epoch": 1.04, "grad_norm": 0.488014817237854, "learning_rate": 0.00043766512476201014, "loss": 1.8034, "step": 31402 }, { "epoch": 1.04, "grad_norm": 0.4526952803134918, "learning_rate": 0.00043765583801145624, "loss": 1.7996, "step": 31403 }, { "epoch": 1.04, "grad_norm": 0.48467695713043213, "learning_rate": 0.0004376465510938085, "loss": 1.7271, "step": 31404 }, { "epoch": 1.04, "grad_norm": 0.4522651731967926, "learning_rate": 0.00043763726400907794, "loss": 1.7539, "step": 31405 }, { "epoch": 1.04, "grad_norm": 0.46094903349876404, "learning_rate": 0.0004376279767572762, "loss": 1.8592, "step": 31406 }, { "epoch": 1.04, "grad_norm": 0.47355523705482483, "learning_rate": 0.00043761868933841434, "loss": 1.8163, "step": 31407 }, { "epoch": 1.04, "grad_norm": 0.4616313874721527, "learning_rate": 0.0004376094017525036, "loss": 1.8946, "step": 31408 }, { "epoch": 1.04, "grad_norm": 0.4753222167491913, "learning_rate": 0.0004376001139995554, "loss": 1.8259, "step": 31409 }, { "epoch": 1.05, "grad_norm": 0.4596497714519501, "learning_rate": 0.0004375908260795808, "loss": 1.7733, "step": 31410 }, { "epoch": 1.05, "grad_norm": 0.4555656313896179, "learning_rate": 0.0004375815379925913, "loss": 1.7386, "step": 31411 }, { "epoch": 1.05, "grad_norm": 0.4654746949672699, "learning_rate": 0.0004375722497385981, "loss": 1.7609, "step": 31412 }, { "epoch": 1.05, "grad_norm": 0.4596622884273529, "learning_rate": 0.00043756296131761244, "loss": 1.8225, "step": 31413 }, { "epoch": 1.05, "grad_norm": 0.4460035264492035, "learning_rate": 0.0004375536727296457, "loss": 1.7645, "step": 31414 }, { "epoch": 1.05, "grad_norm": 0.4473808705806732, "learning_rate": 0.0004375443839747089, "loss": 1.7671, "step": 31415 }, { "epoch": 1.05, "grad_norm": 0.48405998945236206, "learning_rate": 0.0004375350950528137, "loss": 1.7417, "step": 31416 }, { "epoch": 1.05, "grad_norm": 0.45113351941108704, "learning_rate": 0.00043752580596397093, "loss": 1.723, "step": 31417 }, { "epoch": 1.05, "grad_norm": 0.46421897411346436, "learning_rate": 0.00043751651670819223, "loss": 1.8532, "step": 31418 }, { "epoch": 1.05, "grad_norm": 0.4463070333003998, "learning_rate": 0.00043750722728548867, "loss": 1.8076, "step": 31419 }, { "epoch": 1.05, "grad_norm": 0.4609379470348358, "learning_rate": 0.00043749793769587165, "loss": 1.7686, "step": 31420 }, { "epoch": 1.05, "grad_norm": 0.466098427772522, "learning_rate": 0.0004374886479393524, "loss": 1.7706, "step": 31421 }, { "epoch": 1.05, "grad_norm": 0.4454374611377716, "learning_rate": 0.0004374793580159421, "loss": 1.715, "step": 31422 }, { "epoch": 1.05, "grad_norm": 0.4625868499279022, "learning_rate": 0.00043747006792565227, "loss": 1.7704, "step": 31423 }, { "epoch": 1.05, "grad_norm": 0.4478849768638611, "learning_rate": 0.0004374607776684939, "loss": 1.8181, "step": 31424 }, { "epoch": 1.05, "grad_norm": 0.44906455278396606, "learning_rate": 0.00043745148724447843, "loss": 1.8876, "step": 31425 }, { "epoch": 1.05, "grad_norm": 0.44987690448760986, "learning_rate": 0.00043744219665361713, "loss": 1.7683, "step": 31426 }, { "epoch": 1.05, "grad_norm": 0.45306605100631714, "learning_rate": 0.00043743290589592123, "loss": 1.6834, "step": 31427 }, { "epoch": 1.05, "grad_norm": 0.4494697153568268, "learning_rate": 0.0004374236149714021, "loss": 1.7385, "step": 31428 }, { "epoch": 1.05, "grad_norm": 0.44972696900367737, "learning_rate": 0.00043741432388007094, "loss": 1.7632, "step": 31429 }, { "epoch": 1.05, "grad_norm": 0.4592653214931488, "learning_rate": 0.0004374050326219389, "loss": 1.8362, "step": 31430 }, { "epoch": 1.05, "grad_norm": 0.4611304700374603, "learning_rate": 0.00043739574119701754, "loss": 1.8178, "step": 31431 }, { "epoch": 1.05, "grad_norm": 0.4582524299621582, "learning_rate": 0.000437386449605318, "loss": 1.7534, "step": 31432 }, { "epoch": 1.05, "grad_norm": 0.45134350657463074, "learning_rate": 0.00043737715784685144, "loss": 1.7862, "step": 31433 }, { "epoch": 1.05, "grad_norm": 0.478543221950531, "learning_rate": 0.0004373678659216294, "loss": 1.7507, "step": 31434 }, { "epoch": 1.05, "grad_norm": 0.462352454662323, "learning_rate": 0.0004373585738296628, "loss": 1.7939, "step": 31435 }, { "epoch": 1.05, "grad_norm": 0.46393081545829773, "learning_rate": 0.0004373492815709633, "loss": 1.783, "step": 31436 }, { "epoch": 1.05, "grad_norm": 0.4580886960029602, "learning_rate": 0.0004373399891455419, "loss": 1.7556, "step": 31437 }, { "epoch": 1.05, "grad_norm": 0.4715881943702698, "learning_rate": 0.0004373306965534101, "loss": 1.8136, "step": 31438 }, { "epoch": 1.05, "grad_norm": 0.47729241847991943, "learning_rate": 0.000437321403794579, "loss": 1.8441, "step": 31439 }, { "epoch": 1.05, "grad_norm": 0.4610409736633301, "learning_rate": 0.00043731211086905995, "loss": 1.6949, "step": 31440 }, { "epoch": 1.05, "grad_norm": 0.4594886004924774, "learning_rate": 0.00043730281777686433, "loss": 1.8651, "step": 31441 }, { "epoch": 1.05, "grad_norm": 0.4557782709598541, "learning_rate": 0.00043729352451800314, "loss": 1.727, "step": 31442 }, { "epoch": 1.05, "grad_norm": 0.4658758044242859, "learning_rate": 0.000437284231092488, "loss": 1.7781, "step": 31443 }, { "epoch": 1.05, "grad_norm": 0.45121797919273376, "learning_rate": 0.00043727493750033, "loss": 1.7869, "step": 31444 }, { "epoch": 1.05, "grad_norm": 0.4568786025047302, "learning_rate": 0.00043726564374154043, "loss": 1.7719, "step": 31445 }, { "epoch": 1.05, "grad_norm": 0.4484511613845825, "learning_rate": 0.0004372563498161306, "loss": 1.8072, "step": 31446 }, { "epoch": 1.05, "grad_norm": 0.4496065676212311, "learning_rate": 0.0004372470557241117, "loss": 1.8106, "step": 31447 }, { "epoch": 1.05, "grad_norm": 0.4691127836704254, "learning_rate": 0.00043723776146549525, "loss": 1.7425, "step": 31448 }, { "epoch": 1.05, "grad_norm": 0.4519827663898468, "learning_rate": 0.00043722846704029225, "loss": 1.762, "step": 31449 }, { "epoch": 1.05, "grad_norm": 0.4732576608657837, "learning_rate": 0.00043721917244851416, "loss": 1.7377, "step": 31450 }, { "epoch": 1.05, "grad_norm": 0.44346892833709717, "learning_rate": 0.00043720987769017226, "loss": 1.7451, "step": 31451 }, { "epoch": 1.05, "grad_norm": 0.4575026333332062, "learning_rate": 0.00043720058276527766, "loss": 1.764, "step": 31452 }, { "epoch": 1.05, "grad_norm": 0.6119785904884338, "learning_rate": 0.00043719128767384186, "loss": 1.7575, "step": 31453 }, { "epoch": 1.05, "grad_norm": 0.4507220387458801, "learning_rate": 0.0004371819924158761, "loss": 1.8145, "step": 31454 }, { "epoch": 1.05, "grad_norm": 0.45099276304244995, "learning_rate": 0.0004371726969913915, "loss": 1.7886, "step": 31455 }, { "epoch": 1.05, "grad_norm": 0.46650460362434387, "learning_rate": 0.00043716340140039953, "loss": 1.7476, "step": 31456 }, { "epoch": 1.05, "grad_norm": 0.4862048625946045, "learning_rate": 0.00043715410564291134, "loss": 1.8217, "step": 31457 }, { "epoch": 1.05, "grad_norm": 0.4655296504497528, "learning_rate": 0.00043714480971893837, "loss": 1.8548, "step": 31458 }, { "epoch": 1.05, "grad_norm": 0.46123942732810974, "learning_rate": 0.00043713551362849174, "loss": 1.7468, "step": 31459 }, { "epoch": 1.05, "grad_norm": 0.4493462145328522, "learning_rate": 0.00043712621737158284, "loss": 1.7012, "step": 31460 }, { "epoch": 1.05, "grad_norm": 0.4847963750362396, "learning_rate": 0.0004371169209482229, "loss": 1.7295, "step": 31461 }, { "epoch": 1.05, "grad_norm": 0.470607191324234, "learning_rate": 0.0004371076243584232, "loss": 1.7021, "step": 31462 }, { "epoch": 1.05, "grad_norm": 0.4511219263076782, "learning_rate": 0.000437098327602195, "loss": 1.7146, "step": 31463 }, { "epoch": 1.05, "grad_norm": 0.48138606548309326, "learning_rate": 0.0004370890306795498, "loss": 1.8172, "step": 31464 }, { "epoch": 1.05, "grad_norm": 0.4904472231864929, "learning_rate": 0.0004370797335904985, "loss": 1.6903, "step": 31465 }, { "epoch": 1.05, "grad_norm": 0.451418399810791, "learning_rate": 0.00043707043633505274, "loss": 1.7808, "step": 31466 }, { "epoch": 1.05, "grad_norm": 0.44814154505729675, "learning_rate": 0.00043706113891322357, "loss": 1.8609, "step": 31467 }, { "epoch": 1.05, "grad_norm": 0.449666291475296, "learning_rate": 0.0004370518413250225, "loss": 1.8363, "step": 31468 }, { "epoch": 1.05, "grad_norm": 0.4735824465751648, "learning_rate": 0.0004370425435704606, "loss": 1.8507, "step": 31469 }, { "epoch": 1.05, "grad_norm": 0.4739082157611847, "learning_rate": 0.00043703324564954923, "loss": 1.8255, "step": 31470 }, { "epoch": 1.05, "grad_norm": 0.44713106751441956, "learning_rate": 0.0004370239475622998, "loss": 1.7781, "step": 31471 }, { "epoch": 1.05, "grad_norm": 0.46297508478164673, "learning_rate": 0.0004370146493087233, "loss": 1.8362, "step": 31472 }, { "epoch": 1.05, "grad_norm": 0.45519202947616577, "learning_rate": 0.0004370053508888314, "loss": 1.759, "step": 31473 }, { "epoch": 1.05, "grad_norm": 0.4628838300704956, "learning_rate": 0.00043699605230263514, "loss": 1.757, "step": 31474 }, { "epoch": 1.05, "grad_norm": 0.45370638370513916, "learning_rate": 0.00043698675355014583, "loss": 1.7501, "step": 31475 }, { "epoch": 1.05, "grad_norm": 0.4451219439506531, "learning_rate": 0.00043697745463137477, "loss": 1.7526, "step": 31476 }, { "epoch": 1.05, "grad_norm": 0.4384686052799225, "learning_rate": 0.0004369681555463333, "loss": 1.7591, "step": 31477 }, { "epoch": 1.05, "grad_norm": 0.4576854109764099, "learning_rate": 0.00043695885629503264, "loss": 1.7762, "step": 31478 }, { "epoch": 1.05, "grad_norm": 0.45030102133750916, "learning_rate": 0.00043694955687748413, "loss": 1.7911, "step": 31479 }, { "epoch": 1.05, "grad_norm": 0.4661119282245636, "learning_rate": 0.000436940257293699, "loss": 1.7724, "step": 31480 }, { "epoch": 1.05, "grad_norm": 0.4741894006729126, "learning_rate": 0.00043693095754368863, "loss": 1.721, "step": 31481 }, { "epoch": 1.05, "grad_norm": 0.47598251700401306, "learning_rate": 0.00043692165762746415, "loss": 1.774, "step": 31482 }, { "epoch": 1.05, "grad_norm": 0.45484888553619385, "learning_rate": 0.00043691235754503706, "loss": 1.803, "step": 31483 }, { "epoch": 1.05, "grad_norm": 0.44685956835746765, "learning_rate": 0.0004369030572964185, "loss": 1.797, "step": 31484 }, { "epoch": 1.05, "grad_norm": 0.48249682784080505, "learning_rate": 0.0004368937568816198, "loss": 1.7655, "step": 31485 }, { "epoch": 1.05, "grad_norm": 0.4853367805480957, "learning_rate": 0.0004368844563006523, "loss": 1.753, "step": 31486 }, { "epoch": 1.05, "grad_norm": 0.46567443013191223, "learning_rate": 0.0004368751555535272, "loss": 1.7664, "step": 31487 }, { "epoch": 1.05, "grad_norm": 0.4600589871406555, "learning_rate": 0.00043686585464025574, "loss": 1.8444, "step": 31488 }, { "epoch": 1.05, "grad_norm": 0.47573959827423096, "learning_rate": 0.00043685655356084945, "loss": 1.7935, "step": 31489 }, { "epoch": 1.05, "grad_norm": 0.4630510210990906, "learning_rate": 0.0004368472523153193, "loss": 1.8132, "step": 31490 }, { "epoch": 1.05, "grad_norm": 0.45021459460258484, "learning_rate": 0.0004368379509036769, "loss": 1.7866, "step": 31491 }, { "epoch": 1.05, "grad_norm": 0.45983341336250305, "learning_rate": 0.0004368286493259333, "loss": 1.8023, "step": 31492 }, { "epoch": 1.05, "grad_norm": 0.4809512495994568, "learning_rate": 0.00043681934758209995, "loss": 1.8029, "step": 31493 }, { "epoch": 1.05, "grad_norm": 0.4647100865840912, "learning_rate": 0.000436810045672188, "loss": 1.7992, "step": 31494 }, { "epoch": 1.05, "grad_norm": 0.46295416355133057, "learning_rate": 0.0004368007435962089, "loss": 1.7989, "step": 31495 }, { "epoch": 1.05, "grad_norm": 0.4661584198474884, "learning_rate": 0.0004367914413541738, "loss": 1.8233, "step": 31496 }, { "epoch": 1.05, "grad_norm": 0.5219175815582275, "learning_rate": 0.0004367821389460941, "loss": 1.8353, "step": 31497 }, { "epoch": 1.05, "grad_norm": 0.4793211817741394, "learning_rate": 0.000436772836371981, "loss": 1.7457, "step": 31498 }, { "epoch": 1.05, "grad_norm": 0.48315325379371643, "learning_rate": 0.0004367635336318457, "loss": 1.839, "step": 31499 }, { "epoch": 1.05, "grad_norm": 0.5328902006149292, "learning_rate": 0.0004367542307256998, "loss": 1.7411, "step": 31500 }, { "epoch": 1.05, "grad_norm": 0.46515989303588867, "learning_rate": 0.0004367449276535544, "loss": 1.7516, "step": 31501 }, { "epoch": 1.05, "grad_norm": 0.4667339622974396, "learning_rate": 0.0004367356244154207, "loss": 1.76, "step": 31502 }, { "epoch": 1.05, "grad_norm": 0.4858401119709015, "learning_rate": 0.0004367263210113102, "loss": 1.8815, "step": 31503 }, { "epoch": 1.05, "grad_norm": 0.469050794839859, "learning_rate": 0.000436717017441234, "loss": 1.8157, "step": 31504 }, { "epoch": 1.05, "grad_norm": 0.5039362907409668, "learning_rate": 0.00043670771370520354, "loss": 1.7904, "step": 31505 }, { "epoch": 1.05, "grad_norm": 0.45459097623825073, "learning_rate": 0.0004366984098032301, "loss": 1.7989, "step": 31506 }, { "epoch": 1.05, "grad_norm": 0.4405832886695862, "learning_rate": 0.0004366891057353248, "loss": 1.7824, "step": 31507 }, { "epoch": 1.05, "grad_norm": 0.45224729180336, "learning_rate": 0.0004366798015014992, "loss": 1.7473, "step": 31508 }, { "epoch": 1.05, "grad_norm": 0.47842079401016235, "learning_rate": 0.0004366704971017644, "loss": 1.6733, "step": 31509 }, { "epoch": 1.05, "grad_norm": 0.4749873876571655, "learning_rate": 0.0004366611925361318, "loss": 1.8053, "step": 31510 }, { "epoch": 1.05, "grad_norm": 0.44529297947883606, "learning_rate": 0.00043665188780461257, "loss": 1.8137, "step": 31511 }, { "epoch": 1.05, "grad_norm": 0.47206997871398926, "learning_rate": 0.00043664258290721815, "loss": 1.8018, "step": 31512 }, { "epoch": 1.05, "grad_norm": 0.45635753870010376, "learning_rate": 0.00043663327784395977, "loss": 1.7843, "step": 31513 }, { "epoch": 1.05, "grad_norm": 0.4617421329021454, "learning_rate": 0.00043662397261484866, "loss": 1.7652, "step": 31514 }, { "epoch": 1.05, "grad_norm": 0.4622531235218048, "learning_rate": 0.0004366146672198962, "loss": 1.8516, "step": 31515 }, { "epoch": 1.05, "grad_norm": 0.4739600121974945, "learning_rate": 0.00043660536165911375, "loss": 1.8064, "step": 31516 }, { "epoch": 1.05, "grad_norm": 0.4552968740463257, "learning_rate": 0.00043659605593251245, "loss": 1.7842, "step": 31517 }, { "epoch": 1.05, "grad_norm": 0.48470553755760193, "learning_rate": 0.00043658675004010363, "loss": 1.808, "step": 31518 }, { "epoch": 1.05, "grad_norm": 0.45304954051971436, "learning_rate": 0.0004365774439818986, "loss": 1.8563, "step": 31519 }, { "epoch": 1.05, "grad_norm": 0.4602241516113281, "learning_rate": 0.00043656813775790873, "loss": 1.7947, "step": 31520 }, { "epoch": 1.05, "grad_norm": 0.4541199803352356, "learning_rate": 0.00043655883136814536, "loss": 1.752, "step": 31521 }, { "epoch": 1.05, "grad_norm": 0.4617416560649872, "learning_rate": 0.00043654952481261947, "loss": 1.8425, "step": 31522 }, { "epoch": 1.05, "grad_norm": 0.4785148501396179, "learning_rate": 0.00043654021809134275, "loss": 1.8377, "step": 31523 }, { "epoch": 1.05, "grad_norm": 0.4815771281719208, "learning_rate": 0.0004365309112043263, "loss": 1.7994, "step": 31524 }, { "epoch": 1.05, "grad_norm": 0.4706031382083893, "learning_rate": 0.00043652160415158135, "loss": 1.7408, "step": 31525 }, { "epoch": 1.05, "grad_norm": 0.47710874676704407, "learning_rate": 0.00043651229693311943, "loss": 1.7355, "step": 31526 }, { "epoch": 1.05, "grad_norm": 0.48413941264152527, "learning_rate": 0.0004365029895489516, "loss": 1.8057, "step": 31527 }, { "epoch": 1.05, "grad_norm": 0.47013500332832336, "learning_rate": 0.0004364936819990893, "loss": 1.7929, "step": 31528 }, { "epoch": 1.05, "grad_norm": 0.4566709101200104, "learning_rate": 0.0004364843742835436, "loss": 1.8318, "step": 31529 }, { "epoch": 1.05, "grad_norm": 0.4599141776561737, "learning_rate": 0.00043647506640232626, "loss": 1.737, "step": 31530 }, { "epoch": 1.05, "grad_norm": 0.4725264310836792, "learning_rate": 0.00043646575835544813, "loss": 1.8368, "step": 31531 }, { "epoch": 1.05, "grad_norm": 0.4739631116390228, "learning_rate": 0.0004364564501429207, "loss": 1.8724, "step": 31532 }, { "epoch": 1.05, "grad_norm": 0.4703330397605896, "learning_rate": 0.0004364471417647553, "loss": 1.7456, "step": 31533 }, { "epoch": 1.05, "grad_norm": 0.45874762535095215, "learning_rate": 0.00043643783322096306, "loss": 1.8124, "step": 31534 }, { "epoch": 1.05, "grad_norm": 0.45996472239494324, "learning_rate": 0.0004364285245115555, "loss": 1.7647, "step": 31535 }, { "epoch": 1.05, "grad_norm": 0.4698786735534668, "learning_rate": 0.0004364192156365438, "loss": 1.7653, "step": 31536 }, { "epoch": 1.05, "grad_norm": 0.45843127369880676, "learning_rate": 0.00043640990659593923, "loss": 1.7679, "step": 31537 }, { "epoch": 1.05, "grad_norm": 0.4634189307689667, "learning_rate": 0.0004364005973897531, "loss": 1.7142, "step": 31538 }, { "epoch": 1.05, "grad_norm": 0.46872633695602417, "learning_rate": 0.0004363912880179969, "loss": 1.8371, "step": 31539 }, { "epoch": 1.05, "grad_norm": 0.45816364884376526, "learning_rate": 0.0004363819784806817, "loss": 1.8315, "step": 31540 }, { "epoch": 1.05, "grad_norm": 0.47874411940574646, "learning_rate": 0.0004363726687778188, "loss": 1.759, "step": 31541 }, { "epoch": 1.05, "grad_norm": 0.4716244637966156, "learning_rate": 0.0004363633589094196, "loss": 1.7705, "step": 31542 }, { "epoch": 1.05, "grad_norm": 0.49119192361831665, "learning_rate": 0.0004363540488754954, "loss": 1.8393, "step": 31543 }, { "epoch": 1.05, "grad_norm": 0.45486271381378174, "learning_rate": 0.0004363447386760574, "loss": 1.7136, "step": 31544 }, { "epoch": 1.05, "grad_norm": 0.4547331929206848, "learning_rate": 0.0004363354283111171, "loss": 1.8313, "step": 31545 }, { "epoch": 1.05, "grad_norm": 0.4829380512237549, "learning_rate": 0.00043632611778068566, "loss": 1.8246, "step": 31546 }, { "epoch": 1.05, "grad_norm": 0.47585490345954895, "learning_rate": 0.0004363168070847743, "loss": 1.8028, "step": 31547 }, { "epoch": 1.05, "grad_norm": 0.6540789604187012, "learning_rate": 0.00043630749622339455, "loss": 1.7973, "step": 31548 }, { "epoch": 1.05, "grad_norm": 0.47764402627944946, "learning_rate": 0.0004362981851965575, "loss": 1.7549, "step": 31549 }, { "epoch": 1.05, "grad_norm": 0.5242563486099243, "learning_rate": 0.00043628887400427456, "loss": 1.8411, "step": 31550 }, { "epoch": 1.05, "grad_norm": 0.4614528715610504, "learning_rate": 0.00043627956264655703, "loss": 1.8193, "step": 31551 }, { "epoch": 1.05, "grad_norm": 0.47560200095176697, "learning_rate": 0.00043627025112341606, "loss": 1.7955, "step": 31552 }, { "epoch": 1.05, "grad_norm": 0.49423620104789734, "learning_rate": 0.00043626093943486323, "loss": 1.7867, "step": 31553 }, { "epoch": 1.05, "grad_norm": 0.46887943148612976, "learning_rate": 0.00043625162758090957, "loss": 1.7796, "step": 31554 }, { "epoch": 1.05, "grad_norm": 0.4581741690635681, "learning_rate": 0.0004362423155615667, "loss": 1.8495, "step": 31555 }, { "epoch": 1.05, "grad_norm": 0.4504226744174957, "learning_rate": 0.0004362330033768456, "loss": 1.8286, "step": 31556 }, { "epoch": 1.05, "grad_norm": 0.45461615920066833, "learning_rate": 0.0004362236910267577, "loss": 1.7967, "step": 31557 }, { "epoch": 1.05, "grad_norm": 0.4733589291572571, "learning_rate": 0.0004362143785113144, "loss": 1.7767, "step": 31558 }, { "epoch": 1.05, "grad_norm": 0.48856157064437866, "learning_rate": 0.0004362050658305268, "loss": 1.7685, "step": 31559 }, { "epoch": 1.05, "grad_norm": 0.4637240469455719, "learning_rate": 0.0004361957529844064, "loss": 1.7437, "step": 31560 }, { "epoch": 1.05, "grad_norm": 0.4669173061847687, "learning_rate": 0.0004361864399729644, "loss": 1.7868, "step": 31561 }, { "epoch": 1.05, "grad_norm": 0.5108693242073059, "learning_rate": 0.00043617712679621214, "loss": 1.8034, "step": 31562 }, { "epoch": 1.05, "grad_norm": 0.46040236949920654, "learning_rate": 0.0004361678134541609, "loss": 1.7708, "step": 31563 }, { "epoch": 1.05, "grad_norm": 0.4535215497016907, "learning_rate": 0.00043615849994682197, "loss": 1.8646, "step": 31564 }, { "epoch": 1.05, "grad_norm": 0.4605885148048401, "learning_rate": 0.0004361491862742068, "loss": 1.7958, "step": 31565 }, { "epoch": 1.05, "grad_norm": 0.48439863324165344, "learning_rate": 0.00043613987243632646, "loss": 1.8467, "step": 31566 }, { "epoch": 1.05, "grad_norm": 0.4779393672943115, "learning_rate": 0.0004361305584331924, "loss": 1.775, "step": 31567 }, { "epoch": 1.05, "grad_norm": 0.45678624510765076, "learning_rate": 0.0004361212442648159, "loss": 1.7628, "step": 31568 }, { "epoch": 1.05, "grad_norm": 0.4520817995071411, "learning_rate": 0.00043611192993120825, "loss": 1.752, "step": 31569 }, { "epoch": 1.05, "grad_norm": 0.4653545916080475, "learning_rate": 0.0004361026154323808, "loss": 1.792, "step": 31570 }, { "epoch": 1.05, "grad_norm": 0.47020450234413147, "learning_rate": 0.0004360933007683449, "loss": 1.7667, "step": 31571 }, { "epoch": 1.05, "grad_norm": 0.46245893836021423, "learning_rate": 0.0004360839859391117, "loss": 1.7789, "step": 31572 }, { "epoch": 1.05, "grad_norm": 0.47771209478378296, "learning_rate": 0.00043607467094469254, "loss": 1.8388, "step": 31573 }, { "epoch": 1.05, "grad_norm": 0.4478076696395874, "learning_rate": 0.00043606535578509883, "loss": 1.7942, "step": 31574 }, { "epoch": 1.05, "grad_norm": 0.4621616303920746, "learning_rate": 0.0004360560404603418, "loss": 1.7428, "step": 31575 }, { "epoch": 1.05, "grad_norm": 0.45333006978034973, "learning_rate": 0.0004360467249704329, "loss": 1.8214, "step": 31576 }, { "epoch": 1.05, "grad_norm": 0.4713003635406494, "learning_rate": 0.0004360374093153832, "loss": 1.7402, "step": 31577 }, { "epoch": 1.05, "grad_norm": 0.46018660068511963, "learning_rate": 0.0004360280934952042, "loss": 1.802, "step": 31578 }, { "epoch": 1.05, "grad_norm": 0.44268426299095154, "learning_rate": 0.0004360187775099071, "loss": 1.7688, "step": 31579 }, { "epoch": 1.05, "grad_norm": 0.4445951282978058, "learning_rate": 0.00043600946135950323, "loss": 1.7605, "step": 31580 }, { "epoch": 1.05, "grad_norm": 0.4533476233482361, "learning_rate": 0.00043600014504400396, "loss": 1.7246, "step": 31581 }, { "epoch": 1.05, "grad_norm": 0.455591082572937, "learning_rate": 0.0004359908285634205, "loss": 1.8196, "step": 31582 }, { "epoch": 1.05, "grad_norm": 0.4602963924407959, "learning_rate": 0.0004359815119177643, "loss": 1.7704, "step": 31583 }, { "epoch": 1.05, "grad_norm": 0.4616489112377167, "learning_rate": 0.0004359721951070464, "loss": 1.7815, "step": 31584 }, { "epoch": 1.05, "grad_norm": 0.459066778421402, "learning_rate": 0.0004359628781312785, "loss": 1.8912, "step": 31585 }, { "epoch": 1.05, "grad_norm": 0.4534952938556671, "learning_rate": 0.0004359535609904715, "loss": 1.7837, "step": 31586 }, { "epoch": 1.05, "grad_norm": 0.45006516575813293, "learning_rate": 0.0004359442436846371, "loss": 1.8548, "step": 31587 }, { "epoch": 1.05, "grad_norm": 0.45394617319107056, "learning_rate": 0.00043593492621378635, "loss": 1.7099, "step": 31588 }, { "epoch": 1.05, "grad_norm": 0.44228890538215637, "learning_rate": 0.00043592560857793055, "loss": 1.7484, "step": 31589 }, { "epoch": 1.05, "grad_norm": 0.4483031630516052, "learning_rate": 0.00043591629077708124, "loss": 1.8086, "step": 31590 }, { "epoch": 1.05, "grad_norm": 0.46394431591033936, "learning_rate": 0.0004359069728112494, "loss": 1.8185, "step": 31591 }, { "epoch": 1.05, "grad_norm": 0.4573385715484619, "learning_rate": 0.00043589765468044666, "loss": 1.7894, "step": 31592 }, { "epoch": 1.05, "grad_norm": 0.46466487646102905, "learning_rate": 0.00043588833638468416, "loss": 1.7979, "step": 31593 }, { "epoch": 1.05, "grad_norm": 0.45608824491500854, "learning_rate": 0.0004358790179239732, "loss": 1.727, "step": 31594 }, { "epoch": 1.05, "grad_norm": 0.4464513957500458, "learning_rate": 0.0004358696992983252, "loss": 1.7942, "step": 31595 }, { "epoch": 1.05, "grad_norm": 0.46319082379341125, "learning_rate": 0.0004358603805077514, "loss": 1.807, "step": 31596 }, { "epoch": 1.05, "grad_norm": 0.44708487391471863, "learning_rate": 0.000435851061552263, "loss": 1.8551, "step": 31597 }, { "epoch": 1.05, "grad_norm": 0.4553294777870178, "learning_rate": 0.00043584174243187153, "loss": 1.89, "step": 31598 }, { "epoch": 1.05, "grad_norm": 0.4691840410232544, "learning_rate": 0.0004358324231465882, "loss": 1.7475, "step": 31599 }, { "epoch": 1.05, "grad_norm": 0.44431230425834656, "learning_rate": 0.0004358231036964243, "loss": 1.7936, "step": 31600 }, { "epoch": 1.05, "grad_norm": 0.4505043625831604, "learning_rate": 0.00043581378408139113, "loss": 1.8054, "step": 31601 }, { "epoch": 1.05, "grad_norm": 0.445010781288147, "learning_rate": 0.0004358044643015001, "loss": 1.7302, "step": 31602 }, { "epoch": 1.05, "grad_norm": 0.7813966274261475, "learning_rate": 0.00043579514435676244, "loss": 1.7785, "step": 31603 }, { "epoch": 1.05, "grad_norm": 0.44497859477996826, "learning_rate": 0.00043578582424718944, "loss": 1.7346, "step": 31604 }, { "epoch": 1.05, "grad_norm": 0.44770684838294983, "learning_rate": 0.00043577650397279245, "loss": 1.7651, "step": 31605 }, { "epoch": 1.05, "grad_norm": 0.4798228144645691, "learning_rate": 0.0004357671835335829, "loss": 1.7607, "step": 31606 }, { "epoch": 1.05, "grad_norm": 0.4787004292011261, "learning_rate": 0.0004357578629295719, "loss": 1.7902, "step": 31607 }, { "epoch": 1.05, "grad_norm": 0.4602043628692627, "learning_rate": 0.00043574854216077094, "loss": 1.831, "step": 31608 }, { "epoch": 1.05, "grad_norm": 0.4490777552127838, "learning_rate": 0.0004357392212271911, "loss": 1.7083, "step": 31609 }, { "epoch": 1.05, "grad_norm": 0.4701366126537323, "learning_rate": 0.000435729900128844, "loss": 1.8865, "step": 31610 }, { "epoch": 1.05, "grad_norm": 0.4678495526313782, "learning_rate": 0.0004357205788657407, "loss": 1.7622, "step": 31611 }, { "epoch": 1.05, "grad_norm": 0.47034651041030884, "learning_rate": 0.0004357112574378927, "loss": 1.797, "step": 31612 }, { "epoch": 1.05, "grad_norm": 0.4503268599510193, "learning_rate": 0.00043570193584531117, "loss": 1.8135, "step": 31613 }, { "epoch": 1.05, "grad_norm": 0.46074384450912476, "learning_rate": 0.00043569261408800744, "loss": 1.7532, "step": 31614 }, { "epoch": 1.05, "grad_norm": 0.4514879584312439, "learning_rate": 0.00043568329216599303, "loss": 1.7744, "step": 31615 }, { "epoch": 1.05, "grad_norm": 0.4554550051689148, "learning_rate": 0.0004356739700792789, "loss": 1.7799, "step": 31616 }, { "epoch": 1.05, "grad_norm": 0.4662773013114929, "learning_rate": 0.00043566464782787673, "loss": 1.7465, "step": 31617 }, { "epoch": 1.05, "grad_norm": 0.4477589428424835, "learning_rate": 0.00043565532541179757, "loss": 1.7118, "step": 31618 }, { "epoch": 1.05, "grad_norm": 0.4546710252761841, "learning_rate": 0.00043564600283105287, "loss": 1.786, "step": 31619 }, { "epoch": 1.05, "grad_norm": 0.449883371591568, "learning_rate": 0.0004356366800856539, "loss": 1.8067, "step": 31620 }, { "epoch": 1.05, "grad_norm": 0.4728897511959076, "learning_rate": 0.0004356273571756119, "loss": 1.8028, "step": 31621 }, { "epoch": 1.05, "grad_norm": 0.4675210416316986, "learning_rate": 0.0004356180341009384, "loss": 1.7524, "step": 31622 }, { "epoch": 1.05, "grad_norm": 0.47080108523368835, "learning_rate": 0.00043560871086164454, "loss": 1.771, "step": 31623 }, { "epoch": 1.05, "grad_norm": 0.44281211495399475, "learning_rate": 0.0004355993874577417, "loss": 1.7858, "step": 31624 }, { "epoch": 1.05, "grad_norm": 0.49923431873321533, "learning_rate": 0.0004355900638892411, "loss": 1.8039, "step": 31625 }, { "epoch": 1.05, "grad_norm": 0.45715054869651794, "learning_rate": 0.0004355807401561543, "loss": 1.8241, "step": 31626 }, { "epoch": 1.05, "grad_norm": 0.4372943937778473, "learning_rate": 0.00043557141625849235, "loss": 1.7647, "step": 31627 }, { "epoch": 1.05, "grad_norm": 0.465261846780777, "learning_rate": 0.0004355620921962667, "loss": 1.7846, "step": 31628 }, { "epoch": 1.05, "grad_norm": 0.46669667959213257, "learning_rate": 0.00043555276796948864, "loss": 1.7916, "step": 31629 }, { "epoch": 1.05, "grad_norm": 0.46119236946105957, "learning_rate": 0.0004355434435781695, "loss": 1.7981, "step": 31630 }, { "epoch": 1.05, "grad_norm": 0.4560306966304779, "learning_rate": 0.0004355341190223206, "loss": 1.8281, "step": 31631 }, { "epoch": 1.05, "grad_norm": 0.4722067415714264, "learning_rate": 0.0004355247943019531, "loss": 1.7256, "step": 31632 }, { "epoch": 1.05, "grad_norm": 0.4692113697528839, "learning_rate": 0.00043551546941707876, "loss": 1.8103, "step": 31633 }, { "epoch": 1.05, "grad_norm": 0.46028438210487366, "learning_rate": 0.0004355061443677084, "loss": 1.8251, "step": 31634 }, { "epoch": 1.05, "grad_norm": 0.4494103491306305, "learning_rate": 0.00043549681915385364, "loss": 1.7696, "step": 31635 }, { "epoch": 1.05, "grad_norm": 0.44106224179267883, "learning_rate": 0.0004354874937755256, "loss": 1.826, "step": 31636 }, { "epoch": 1.05, "grad_norm": 0.46476686000823975, "learning_rate": 0.00043547816823273573, "loss": 1.8043, "step": 31637 }, { "epoch": 1.05, "grad_norm": 0.4413056969642639, "learning_rate": 0.0004354688425254954, "loss": 1.7698, "step": 31638 }, { "epoch": 1.05, "grad_norm": 0.4544605016708374, "learning_rate": 0.0004354595166538158, "loss": 1.7958, "step": 31639 }, { "epoch": 1.05, "grad_norm": 0.4777010381221771, "learning_rate": 0.0004354501906177083, "loss": 1.72, "step": 31640 }, { "epoch": 1.05, "grad_norm": 0.4469819962978363, "learning_rate": 0.00043544086441718425, "loss": 1.8369, "step": 31641 }, { "epoch": 1.05, "grad_norm": 0.47380590438842773, "learning_rate": 0.000435431538052255, "loss": 1.8076, "step": 31642 }, { "epoch": 1.05, "grad_norm": 0.45865681767463684, "learning_rate": 0.0004354222115229318, "loss": 1.811, "step": 31643 }, { "epoch": 1.05, "grad_norm": 0.4400655925273895, "learning_rate": 0.0004354128848292259, "loss": 1.7612, "step": 31644 }, { "epoch": 1.05, "grad_norm": 0.45895734429359436, "learning_rate": 0.0004354035579711488, "loss": 1.8278, "step": 31645 }, { "epoch": 1.05, "grad_norm": 0.4398263692855835, "learning_rate": 0.0004353942309487116, "loss": 1.7531, "step": 31646 }, { "epoch": 1.05, "grad_norm": 0.4617764949798584, "learning_rate": 0.0004353849037619259, "loss": 1.7979, "step": 31647 }, { "epoch": 1.05, "grad_norm": 0.4450931251049042, "learning_rate": 0.0004353755764108028, "loss": 1.6966, "step": 31648 }, { "epoch": 1.05, "grad_norm": 0.4780479967594147, "learning_rate": 0.00043536624889535373, "loss": 1.8701, "step": 31649 }, { "epoch": 1.05, "grad_norm": 0.45830246806144714, "learning_rate": 0.00043535692121559, "loss": 1.7579, "step": 31650 }, { "epoch": 1.05, "grad_norm": 0.4688984751701355, "learning_rate": 0.0004353475933715228, "loss": 1.7556, "step": 31651 }, { "epoch": 1.05, "grad_norm": 0.4521583616733551, "learning_rate": 0.00043533826536316375, "loss": 1.8753, "step": 31652 }, { "epoch": 1.05, "grad_norm": 0.436046838760376, "learning_rate": 0.0004353289371905238, "loss": 1.7912, "step": 31653 }, { "epoch": 1.05, "grad_norm": 0.45699772238731384, "learning_rate": 0.0004353196088536146, "loss": 1.8034, "step": 31654 }, { "epoch": 1.05, "grad_norm": 0.4538402259349823, "learning_rate": 0.00043531028035244726, "loss": 1.7641, "step": 31655 }, { "epoch": 1.05, "grad_norm": 0.4350340664386749, "learning_rate": 0.00043530095168703317, "loss": 1.7707, "step": 31656 }, { "epoch": 1.05, "grad_norm": 0.4527563750743866, "learning_rate": 0.0004352916228573837, "loss": 1.7966, "step": 31657 }, { "epoch": 1.05, "grad_norm": 0.45840880274772644, "learning_rate": 0.00043528229386351013, "loss": 1.7959, "step": 31658 }, { "epoch": 1.05, "grad_norm": 0.4720986783504486, "learning_rate": 0.00043527296470542373, "loss": 1.828, "step": 31659 }, { "epoch": 1.05, "grad_norm": 0.46091434359550476, "learning_rate": 0.000435263635383136, "loss": 1.8056, "step": 31660 }, { "epoch": 1.05, "grad_norm": 0.45585882663726807, "learning_rate": 0.00043525430589665804, "loss": 1.8529, "step": 31661 }, { "epoch": 1.05, "grad_norm": 0.476300984621048, "learning_rate": 0.00043524497624600124, "loss": 1.7978, "step": 31662 }, { "epoch": 1.05, "grad_norm": 0.45999908447265625, "learning_rate": 0.00043523564643117716, "loss": 1.8246, "step": 31663 }, { "epoch": 1.05, "grad_norm": 0.46899741888046265, "learning_rate": 0.0004352263164521967, "loss": 1.833, "step": 31664 }, { "epoch": 1.05, "grad_norm": 0.4638822674751282, "learning_rate": 0.00043521698630907157, "loss": 1.8003, "step": 31665 }, { "epoch": 1.05, "grad_norm": 0.463855504989624, "learning_rate": 0.0004352076560018129, "loss": 1.7311, "step": 31666 }, { "epoch": 1.05, "grad_norm": 0.4611847400665283, "learning_rate": 0.00043519832553043207, "loss": 1.7446, "step": 31667 }, { "epoch": 1.05, "grad_norm": 0.45264479517936707, "learning_rate": 0.0004351889948949404, "loss": 1.7676, "step": 31668 }, { "epoch": 1.05, "grad_norm": 0.4671500623226166, "learning_rate": 0.0004351796640953491, "loss": 1.8032, "step": 31669 }, { "epoch": 1.05, "grad_norm": 0.4549214839935303, "learning_rate": 0.0004351703331316698, "loss": 1.7811, "step": 31670 }, { "epoch": 1.05, "grad_norm": 0.4606704115867615, "learning_rate": 0.00043516100200391346, "loss": 1.7705, "step": 31671 }, { "epoch": 1.05, "grad_norm": 0.4475507438182831, "learning_rate": 0.00043515167071209166, "loss": 1.7406, "step": 31672 }, { "epoch": 1.05, "grad_norm": 0.44865673780441284, "learning_rate": 0.0004351423392562157, "loss": 1.7219, "step": 31673 }, { "epoch": 1.05, "grad_norm": 0.4606366753578186, "learning_rate": 0.0004351330076362967, "loss": 1.7548, "step": 31674 }, { "epoch": 1.05, "grad_norm": 0.46151503920555115, "learning_rate": 0.00043512367585234624, "loss": 1.7844, "step": 31675 }, { "epoch": 1.05, "grad_norm": 0.4448913335800171, "learning_rate": 0.00043511434390437547, "loss": 1.8102, "step": 31676 }, { "epoch": 1.05, "grad_norm": 0.46173855662345886, "learning_rate": 0.0004351050117923959, "loss": 1.8052, "step": 31677 }, { "epoch": 1.05, "grad_norm": 0.43411868810653687, "learning_rate": 0.0004350956795164187, "loss": 1.7622, "step": 31678 }, { "epoch": 1.05, "grad_norm": 0.47545844316482544, "learning_rate": 0.0004350863470764553, "loss": 1.78, "step": 31679 }, { "epoch": 1.05, "grad_norm": 0.44704902172088623, "learning_rate": 0.0004350770144725169, "loss": 1.7249, "step": 31680 }, { "epoch": 1.05, "grad_norm": 0.44857797026634216, "learning_rate": 0.00043506768170461493, "loss": 1.8366, "step": 31681 }, { "epoch": 1.05, "grad_norm": 0.46304264664649963, "learning_rate": 0.0004350583487727608, "loss": 1.8252, "step": 31682 }, { "epoch": 1.05, "grad_norm": 0.45221710205078125, "learning_rate": 0.0004350490156769656, "loss": 1.7887, "step": 31683 }, { "epoch": 1.05, "grad_norm": 0.4550861716270447, "learning_rate": 0.00043503968241724085, "loss": 1.8503, "step": 31684 }, { "epoch": 1.05, "grad_norm": 0.45036864280700684, "learning_rate": 0.0004350303489935978, "loss": 1.7722, "step": 31685 }, { "epoch": 1.05, "grad_norm": 0.458320677280426, "learning_rate": 0.0004350210154060478, "loss": 1.8184, "step": 31686 }, { "epoch": 1.05, "grad_norm": 0.4465993046760559, "learning_rate": 0.0004350116816546022, "loss": 1.6833, "step": 31687 }, { "epoch": 1.05, "grad_norm": 0.4442276954650879, "learning_rate": 0.00043500234773927236, "loss": 1.7616, "step": 31688 }, { "epoch": 1.05, "grad_norm": 0.45025596022605896, "learning_rate": 0.00043499301366006944, "loss": 1.7883, "step": 31689 }, { "epoch": 1.05, "grad_norm": 0.4613361656665802, "learning_rate": 0.000434983679417005, "loss": 1.7571, "step": 31690 }, { "epoch": 1.05, "grad_norm": 0.45405933260917664, "learning_rate": 0.0004349743450100902, "loss": 1.8145, "step": 31691 }, { "epoch": 1.05, "grad_norm": 0.48293182253837585, "learning_rate": 0.00043496501043933645, "loss": 1.8339, "step": 31692 }, { "epoch": 1.05, "grad_norm": 0.4473613202571869, "learning_rate": 0.00043495567570475506, "loss": 1.7786, "step": 31693 }, { "epoch": 1.05, "grad_norm": 0.45364078879356384, "learning_rate": 0.00043494634080635733, "loss": 1.6994, "step": 31694 }, { "epoch": 1.05, "grad_norm": 0.4604650139808655, "learning_rate": 0.00043493700574415476, "loss": 1.8116, "step": 31695 }, { "epoch": 1.05, "grad_norm": 0.4605911374092102, "learning_rate": 0.0004349276705181584, "loss": 1.8661, "step": 31696 }, { "epoch": 1.05, "grad_norm": 0.4493984580039978, "learning_rate": 0.0004349183351283798, "loss": 1.8681, "step": 31697 }, { "epoch": 1.05, "grad_norm": 0.4660790264606476, "learning_rate": 0.00043490899957483026, "loss": 1.7916, "step": 31698 }, { "epoch": 1.05, "grad_norm": 0.49221494793891907, "learning_rate": 0.0004348996638575209, "loss": 1.7463, "step": 31699 }, { "epoch": 1.05, "grad_norm": 0.4647755026817322, "learning_rate": 0.00043489032797646343, "loss": 1.7559, "step": 31700 }, { "epoch": 1.05, "grad_norm": 0.45133307576179504, "learning_rate": 0.0004348809919316688, "loss": 1.7676, "step": 31701 }, { "epoch": 1.05, "grad_norm": 0.46526315808296204, "learning_rate": 0.00043487165572314866, "loss": 1.7804, "step": 31702 }, { "epoch": 1.05, "grad_norm": 0.45526137948036194, "learning_rate": 0.00043486231935091415, "loss": 1.7639, "step": 31703 }, { "epoch": 1.05, "grad_norm": 0.45686933398246765, "learning_rate": 0.00043485298281497665, "loss": 1.7249, "step": 31704 }, { "epoch": 1.05, "grad_norm": 0.4479122757911682, "learning_rate": 0.00043484364611534756, "loss": 1.7892, "step": 31705 }, { "epoch": 1.05, "grad_norm": 0.477830708026886, "learning_rate": 0.000434834309252038, "loss": 1.7733, "step": 31706 }, { "epoch": 1.05, "grad_norm": 0.47476258873939514, "learning_rate": 0.00043482497222505957, "loss": 1.8028, "step": 31707 }, { "epoch": 1.05, "grad_norm": 0.48198026418685913, "learning_rate": 0.0004348156350344235, "loss": 1.833, "step": 31708 }, { "epoch": 1.05, "grad_norm": 0.4722137153148651, "learning_rate": 0.0004348062976801411, "loss": 1.8301, "step": 31709 }, { "epoch": 1.05, "grad_norm": 0.4615255892276764, "learning_rate": 0.0004347969601622237, "loss": 1.7558, "step": 31710 }, { "epoch": 1.06, "grad_norm": 0.46823522448539734, "learning_rate": 0.0004347876224806826, "loss": 1.807, "step": 31711 }, { "epoch": 1.06, "grad_norm": 0.49611589312553406, "learning_rate": 0.0004347782846355293, "loss": 1.7625, "step": 31712 }, { "epoch": 1.06, "grad_norm": 0.493198037147522, "learning_rate": 0.00043476894662677494, "loss": 1.7183, "step": 31713 }, { "epoch": 1.06, "grad_norm": 0.5008677840232849, "learning_rate": 0.00043475960845443094, "loss": 1.9066, "step": 31714 }, { "epoch": 1.06, "grad_norm": 0.4864393472671509, "learning_rate": 0.0004347502701185087, "loss": 1.737, "step": 31715 }, { "epoch": 1.06, "grad_norm": 0.483530193567276, "learning_rate": 0.00043474093161901937, "loss": 1.8409, "step": 31716 }, { "epoch": 1.06, "grad_norm": 0.45335790514945984, "learning_rate": 0.00043473159295597446, "loss": 1.8105, "step": 31717 }, { "epoch": 1.06, "grad_norm": 0.44230741262435913, "learning_rate": 0.0004347222541293853, "loss": 1.7488, "step": 31718 }, { "epoch": 1.06, "grad_norm": 0.4636348485946655, "learning_rate": 0.00043471291513926306, "loss": 1.828, "step": 31719 }, { "epoch": 1.06, "grad_norm": 0.457305371761322, "learning_rate": 0.00043470357598561927, "loss": 1.8376, "step": 31720 }, { "epoch": 1.06, "grad_norm": 0.4661214351654053, "learning_rate": 0.0004346942366684652, "loss": 1.8256, "step": 31721 }, { "epoch": 1.06, "grad_norm": 0.45412808656692505, "learning_rate": 0.0004346848971878121, "loss": 1.8032, "step": 31722 }, { "epoch": 1.06, "grad_norm": 0.4635221064090729, "learning_rate": 0.00043467555754367153, "loss": 1.8486, "step": 31723 }, { "epoch": 1.06, "grad_norm": 0.46525779366493225, "learning_rate": 0.0004346662177360545, "loss": 1.782, "step": 31724 }, { "epoch": 1.06, "grad_norm": 0.4698331356048584, "learning_rate": 0.0004346568777649726, "loss": 1.8969, "step": 31725 }, { "epoch": 1.06, "grad_norm": 0.48288998007774353, "learning_rate": 0.00043464753763043705, "loss": 1.8013, "step": 31726 }, { "epoch": 1.06, "grad_norm": 0.4696396589279175, "learning_rate": 0.00043463819733245934, "loss": 1.825, "step": 31727 }, { "epoch": 1.06, "grad_norm": 0.48510295152664185, "learning_rate": 0.0004346288568710506, "loss": 1.8146, "step": 31728 }, { "epoch": 1.06, "grad_norm": 0.48238664865493774, "learning_rate": 0.0004346195162462223, "loss": 1.8116, "step": 31729 }, { "epoch": 1.06, "grad_norm": 0.47702568769454956, "learning_rate": 0.00043461017545798575, "loss": 1.8048, "step": 31730 }, { "epoch": 1.06, "grad_norm": 0.44952741265296936, "learning_rate": 0.0004346008345063522, "loss": 1.7762, "step": 31731 }, { "epoch": 1.06, "grad_norm": 0.4673192799091339, "learning_rate": 0.0004345914933913332, "loss": 1.8504, "step": 31732 }, { "epoch": 1.06, "grad_norm": 0.47956401109695435, "learning_rate": 0.00043458215211293985, "loss": 1.7889, "step": 31733 }, { "epoch": 1.06, "grad_norm": 0.4666721820831299, "learning_rate": 0.00043457281067118365, "loss": 1.7686, "step": 31734 }, { "epoch": 1.06, "grad_norm": 0.44324567914009094, "learning_rate": 0.00043456346906607586, "loss": 1.7863, "step": 31735 }, { "epoch": 1.06, "grad_norm": 0.4599459171295166, "learning_rate": 0.0004345541272976278, "loss": 1.7517, "step": 31736 }, { "epoch": 1.06, "grad_norm": 0.4826662242412567, "learning_rate": 0.000434544785365851, "loss": 1.8493, "step": 31737 }, { "epoch": 1.06, "grad_norm": 0.4568067193031311, "learning_rate": 0.0004345354432707565, "loss": 1.7681, "step": 31738 }, { "epoch": 1.06, "grad_norm": 0.4557461440563202, "learning_rate": 0.00043452610101235594, "loss": 1.7392, "step": 31739 }, { "epoch": 1.06, "grad_norm": 0.42988401651382446, "learning_rate": 0.0004345167585906604, "loss": 1.7601, "step": 31740 }, { "epoch": 1.06, "grad_norm": 0.4588649570941925, "learning_rate": 0.00043450741600568134, "loss": 1.8195, "step": 31741 }, { "epoch": 1.06, "grad_norm": 0.44447869062423706, "learning_rate": 0.00043449807325743014, "loss": 1.8256, "step": 31742 }, { "epoch": 1.06, "grad_norm": 0.4587196707725525, "learning_rate": 0.0004344887303459181, "loss": 1.8612, "step": 31743 }, { "epoch": 1.06, "grad_norm": 0.46740710735321045, "learning_rate": 0.00043447938727115653, "loss": 1.7621, "step": 31744 }, { "epoch": 1.06, "grad_norm": 0.4549262821674347, "learning_rate": 0.00043447004403315685, "loss": 1.7674, "step": 31745 }, { "epoch": 1.06, "grad_norm": 0.4540528357028961, "learning_rate": 0.0004344607006319303, "loss": 1.7271, "step": 31746 }, { "epoch": 1.06, "grad_norm": 0.4507042467594147, "learning_rate": 0.00043445135706748824, "loss": 1.8177, "step": 31747 }, { "epoch": 1.06, "grad_norm": 0.45362746715545654, "learning_rate": 0.00043444201333984214, "loss": 1.7343, "step": 31748 }, { "epoch": 1.06, "grad_norm": 0.4768216609954834, "learning_rate": 0.0004344326694490031, "loss": 1.8278, "step": 31749 }, { "epoch": 1.06, "grad_norm": 0.4628604054450989, "learning_rate": 0.0004344233253949828, "loss": 1.865, "step": 31750 }, { "epoch": 1.06, "grad_norm": 0.4594910442829132, "learning_rate": 0.00043441398117779226, "loss": 1.817, "step": 31751 }, { "epoch": 1.06, "grad_norm": 0.4667542278766632, "learning_rate": 0.00043440463679744295, "loss": 1.8332, "step": 31752 }, { "epoch": 1.06, "grad_norm": 0.4587474763393402, "learning_rate": 0.0004343952922539462, "loss": 1.7589, "step": 31753 }, { "epoch": 1.06, "grad_norm": 0.4603608250617981, "learning_rate": 0.0004343859475473135, "loss": 1.7038, "step": 31754 }, { "epoch": 1.06, "grad_norm": 0.4729743003845215, "learning_rate": 0.00043437660267755596, "loss": 1.8445, "step": 31755 }, { "epoch": 1.06, "grad_norm": 0.4512866735458374, "learning_rate": 0.000434367257644685, "loss": 1.8023, "step": 31756 }, { "epoch": 1.06, "grad_norm": 0.4791058897972107, "learning_rate": 0.00043435791244871203, "loss": 1.8841, "step": 31757 }, { "epoch": 1.06, "grad_norm": 0.44720298051834106, "learning_rate": 0.00043434856708964833, "loss": 1.7692, "step": 31758 }, { "epoch": 1.06, "grad_norm": 0.45975008606910706, "learning_rate": 0.0004343392215675053, "loss": 1.7773, "step": 31759 }, { "epoch": 1.06, "grad_norm": 0.4674083888530731, "learning_rate": 0.0004343298758822942, "loss": 1.8802, "step": 31760 }, { "epoch": 1.06, "grad_norm": 0.43738481402397156, "learning_rate": 0.0004343205300340264, "loss": 1.7429, "step": 31761 }, { "epoch": 1.06, "grad_norm": 0.46402305364608765, "learning_rate": 0.00043431118402271335, "loss": 1.7936, "step": 31762 }, { "epoch": 1.06, "grad_norm": 0.45653289556503296, "learning_rate": 0.00043430183784836626, "loss": 1.7909, "step": 31763 }, { "epoch": 1.06, "grad_norm": 0.4433327615261078, "learning_rate": 0.0004342924915109966, "loss": 1.7516, "step": 31764 }, { "epoch": 1.06, "grad_norm": 0.4731718599796295, "learning_rate": 0.00043428314501061554, "loss": 1.8285, "step": 31765 }, { "epoch": 1.06, "grad_norm": 0.44943299889564514, "learning_rate": 0.0004342737983472346, "loss": 1.8969, "step": 31766 }, { "epoch": 1.06, "grad_norm": 0.45577260851860046, "learning_rate": 0.0004342644515208651, "loss": 1.7507, "step": 31767 }, { "epoch": 1.06, "grad_norm": 0.45966511964797974, "learning_rate": 0.0004342551045315182, "loss": 1.8191, "step": 31768 }, { "epoch": 1.06, "grad_norm": 0.4513620138168335, "learning_rate": 0.0004342457573792055, "loss": 1.9146, "step": 31769 }, { "epoch": 1.06, "grad_norm": 0.9645464420318604, "learning_rate": 0.00043423641006393816, "loss": 1.8259, "step": 31770 }, { "epoch": 1.06, "grad_norm": 0.43996381759643555, "learning_rate": 0.0004342270625857276, "loss": 1.7104, "step": 31771 }, { "epoch": 1.06, "grad_norm": 0.4545769691467285, "learning_rate": 0.00043421771494458523, "loss": 1.8105, "step": 31772 }, { "epoch": 1.06, "grad_norm": 0.4684281647205353, "learning_rate": 0.0004342083671405222, "loss": 1.7431, "step": 31773 }, { "epoch": 1.06, "grad_norm": 0.4491746425628662, "learning_rate": 0.0004341990191735502, "loss": 1.7988, "step": 31774 }, { "epoch": 1.06, "grad_norm": 0.44267019629478455, "learning_rate": 0.0004341896710436802, "loss": 1.8241, "step": 31775 }, { "epoch": 1.06, "grad_norm": 0.49796581268310547, "learning_rate": 0.0004341803227509237, "loss": 1.8091, "step": 31776 }, { "epoch": 1.06, "grad_norm": 0.43882355093955994, "learning_rate": 0.00043417097429529207, "loss": 1.7748, "step": 31777 }, { "epoch": 1.06, "grad_norm": 0.45437899231910706, "learning_rate": 0.00043416162567679674, "loss": 1.8253, "step": 31778 }, { "epoch": 1.06, "grad_norm": 0.4707665741443634, "learning_rate": 0.0004341522768954489, "loss": 1.7708, "step": 31779 }, { "epoch": 1.06, "grad_norm": 0.45230790972709656, "learning_rate": 0.00043414292795125993, "loss": 1.7951, "step": 31780 }, { "epoch": 1.06, "grad_norm": 0.4475545585155487, "learning_rate": 0.00043413357884424127, "loss": 1.8513, "step": 31781 }, { "epoch": 1.06, "grad_norm": 0.4552363157272339, "learning_rate": 0.0004341242295744042, "loss": 1.7619, "step": 31782 }, { "epoch": 1.06, "grad_norm": 0.46005043387413025, "learning_rate": 0.0004341148801417601, "loss": 1.7546, "step": 31783 }, { "epoch": 1.06, "grad_norm": 0.4736791253089905, "learning_rate": 0.0004341055305463202, "loss": 1.8116, "step": 31784 }, { "epoch": 1.06, "grad_norm": 0.4643610119819641, "learning_rate": 0.00043409618078809605, "loss": 1.7508, "step": 31785 }, { "epoch": 1.06, "grad_norm": 0.479478657245636, "learning_rate": 0.0004340868308670988, "loss": 1.8114, "step": 31786 }, { "epoch": 1.06, "grad_norm": 0.45550042390823364, "learning_rate": 0.00043407748078334006, "loss": 1.7662, "step": 31787 }, { "epoch": 1.06, "grad_norm": 0.48898231983184814, "learning_rate": 0.00043406813053683077, "loss": 1.7515, "step": 31788 }, { "epoch": 1.06, "grad_norm": 0.4579983353614807, "learning_rate": 0.00043405878012758273, "loss": 1.8086, "step": 31789 }, { "epoch": 1.06, "grad_norm": 0.45465588569641113, "learning_rate": 0.000434049429555607, "loss": 1.7954, "step": 31790 }, { "epoch": 1.06, "grad_norm": 0.46856698393821716, "learning_rate": 0.000434040078820915, "loss": 1.7926, "step": 31791 }, { "epoch": 1.06, "grad_norm": 0.45535606145858765, "learning_rate": 0.0004340307279235182, "loss": 1.8547, "step": 31792 }, { "epoch": 1.06, "grad_norm": 0.4630604088306427, "learning_rate": 0.00043402137686342766, "loss": 1.7873, "step": 31793 }, { "epoch": 1.06, "grad_norm": 0.4627993404865265, "learning_rate": 0.000434012025640655, "loss": 1.7392, "step": 31794 }, { "epoch": 1.06, "grad_norm": 0.46626827120780945, "learning_rate": 0.0004340026742552115, "loss": 1.817, "step": 31795 }, { "epoch": 1.06, "grad_norm": 0.4434657692909241, "learning_rate": 0.0004339933227071085, "loss": 1.7453, "step": 31796 }, { "epoch": 1.06, "grad_norm": 0.45094770193099976, "learning_rate": 0.00043398397099635736, "loss": 1.7275, "step": 31797 }, { "epoch": 1.06, "grad_norm": 0.4314610958099365, "learning_rate": 0.00043397461912296937, "loss": 1.7744, "step": 31798 }, { "epoch": 1.06, "grad_norm": 0.4666428565979004, "learning_rate": 0.00043396526708695604, "loss": 1.886, "step": 31799 }, { "epoch": 1.06, "grad_norm": 0.4561725854873657, "learning_rate": 0.00043395591488832854, "loss": 1.7577, "step": 31800 }, { "epoch": 1.06, "grad_norm": 0.44856199622154236, "learning_rate": 0.00043394656252709825, "loss": 1.7816, "step": 31801 }, { "epoch": 1.06, "grad_norm": 0.46776190400123596, "learning_rate": 0.00043393721000327667, "loss": 1.7253, "step": 31802 }, { "epoch": 1.06, "grad_norm": 0.45900142192840576, "learning_rate": 0.000433927857316875, "loss": 1.7514, "step": 31803 }, { "epoch": 1.06, "grad_norm": 0.45587506890296936, "learning_rate": 0.0004339185044679046, "loss": 1.8591, "step": 31804 }, { "epoch": 1.06, "grad_norm": 0.45554468035697937, "learning_rate": 0.00043390915145637696, "loss": 1.7576, "step": 31805 }, { "epoch": 1.06, "grad_norm": 0.4631574749946594, "learning_rate": 0.00043389979828230334, "loss": 1.802, "step": 31806 }, { "epoch": 1.06, "grad_norm": 0.46038395166397095, "learning_rate": 0.000433890444945695, "loss": 1.7354, "step": 31807 }, { "epoch": 1.06, "grad_norm": 0.4704586863517761, "learning_rate": 0.0004338810914465635, "loss": 1.8612, "step": 31808 }, { "epoch": 1.06, "grad_norm": 0.4688641428947449, "learning_rate": 0.00043387173778491997, "loss": 1.8549, "step": 31809 }, { "epoch": 1.06, "grad_norm": 0.46060478687286377, "learning_rate": 0.000433862383960776, "loss": 1.7488, "step": 31810 }, { "epoch": 1.06, "grad_norm": 0.4638766944408417, "learning_rate": 0.0004338530299741427, "loss": 1.8388, "step": 31811 }, { "epoch": 1.06, "grad_norm": 0.46513378620147705, "learning_rate": 0.00043384367582503157, "loss": 1.8184, "step": 31812 }, { "epoch": 1.06, "grad_norm": 0.46211981773376465, "learning_rate": 0.00043383432151345396, "loss": 1.7521, "step": 31813 }, { "epoch": 1.06, "grad_norm": 0.4554302394390106, "learning_rate": 0.00043382496703942117, "loss": 1.7843, "step": 31814 }, { "epoch": 1.06, "grad_norm": 0.4512941539287567, "learning_rate": 0.00043381561240294475, "loss": 1.7655, "step": 31815 }, { "epoch": 1.06, "grad_norm": 0.45225435495376587, "learning_rate": 0.0004338062576040356, "loss": 1.7817, "step": 31816 }, { "epoch": 1.06, "grad_norm": 0.4735012352466583, "learning_rate": 0.00043379690264270566, "loss": 1.8755, "step": 31817 }, { "epoch": 1.06, "grad_norm": 0.4866926968097687, "learning_rate": 0.00043378754751896575, "loss": 1.8201, "step": 31818 }, { "epoch": 1.06, "grad_norm": 0.4658254384994507, "learning_rate": 0.00043377819223282764, "loss": 1.833, "step": 31819 }, { "epoch": 1.06, "grad_norm": 0.45571985840797424, "learning_rate": 0.00043376883678430245, "loss": 1.7728, "step": 31820 }, { "epoch": 1.06, "grad_norm": 0.4489547312259674, "learning_rate": 0.0004337594811734016, "loss": 1.7508, "step": 31821 }, { "epoch": 1.06, "grad_norm": 0.4551258087158203, "learning_rate": 0.0004337501254001365, "loss": 1.7108, "step": 31822 }, { "epoch": 1.06, "grad_norm": 0.44989874958992004, "learning_rate": 0.00043374076946451837, "loss": 1.783, "step": 31823 }, { "epoch": 1.06, "grad_norm": 0.4747922420501709, "learning_rate": 0.0004337314133665587, "loss": 1.8938, "step": 31824 }, { "epoch": 1.06, "grad_norm": 0.4561607539653778, "learning_rate": 0.0004337220571062688, "loss": 1.7773, "step": 31825 }, { "epoch": 1.06, "grad_norm": 0.44670939445495605, "learning_rate": 0.00043371270068366007, "loss": 1.7387, "step": 31826 }, { "epoch": 1.06, "grad_norm": 0.46861180663108826, "learning_rate": 0.00043370334409874377, "loss": 1.7973, "step": 31827 }, { "epoch": 1.06, "grad_norm": 0.45779457688331604, "learning_rate": 0.0004336939873515313, "loss": 1.8469, "step": 31828 }, { "epoch": 1.06, "grad_norm": 0.45377880334854126, "learning_rate": 0.00043368463044203407, "loss": 1.8493, "step": 31829 }, { "epoch": 1.06, "grad_norm": 0.45842117071151733, "learning_rate": 0.00043367527337026334, "loss": 1.7314, "step": 31830 }, { "epoch": 1.06, "grad_norm": 0.46213796734809875, "learning_rate": 0.00043366591613623053, "loss": 1.815, "step": 31831 }, { "epoch": 1.06, "grad_norm": 0.44504696130752563, "learning_rate": 0.00043365655873994703, "loss": 1.8185, "step": 31832 }, { "epoch": 1.06, "grad_norm": 0.4623599350452423, "learning_rate": 0.0004336472011814241, "loss": 1.7891, "step": 31833 }, { "epoch": 1.06, "grad_norm": 0.4678487181663513, "learning_rate": 0.0004336378434606733, "loss": 1.7585, "step": 31834 }, { "epoch": 1.06, "grad_norm": 0.4477173686027527, "learning_rate": 0.00043362848557770573, "loss": 1.7669, "step": 31835 }, { "epoch": 1.06, "grad_norm": 0.4759673476219177, "learning_rate": 0.0004336191275325328, "loss": 1.7348, "step": 31836 }, { "epoch": 1.06, "grad_norm": 0.46529197692871094, "learning_rate": 0.00043360976932516613, "loss": 1.7393, "step": 31837 }, { "epoch": 1.06, "grad_norm": 0.512943685054779, "learning_rate": 0.0004336004109556168, "loss": 1.8373, "step": 31838 }, { "epoch": 1.06, "grad_norm": 0.47524183988571167, "learning_rate": 0.00043359105242389627, "loss": 1.8261, "step": 31839 }, { "epoch": 1.06, "grad_norm": 0.46631187200546265, "learning_rate": 0.00043358169373001586, "loss": 1.8047, "step": 31840 }, { "epoch": 1.06, "grad_norm": 0.4810810983181, "learning_rate": 0.0004335723348739869, "loss": 1.8754, "step": 31841 }, { "epoch": 1.06, "grad_norm": 0.4533906877040863, "learning_rate": 0.00043356297585582087, "loss": 1.8342, "step": 31842 }, { "epoch": 1.06, "grad_norm": 0.4603591561317444, "learning_rate": 0.00043355361667552904, "loss": 1.8554, "step": 31843 }, { "epoch": 1.06, "grad_norm": 0.45509228110313416, "learning_rate": 0.00043354425733312286, "loss": 1.8041, "step": 31844 }, { "epoch": 1.06, "grad_norm": 0.47667649388313293, "learning_rate": 0.0004335348978286136, "loss": 1.8274, "step": 31845 }, { "epoch": 1.06, "grad_norm": 0.45723411440849304, "learning_rate": 0.00043352553816201265, "loss": 1.8628, "step": 31846 }, { "epoch": 1.06, "grad_norm": 0.45868542790412903, "learning_rate": 0.00043351617833333134, "loss": 1.7341, "step": 31847 }, { "epoch": 1.06, "grad_norm": 0.4777682423591614, "learning_rate": 0.000433506818342581, "loss": 1.747, "step": 31848 }, { "epoch": 1.06, "grad_norm": 0.4517178237438202, "learning_rate": 0.0004334974581897732, "loss": 1.826, "step": 31849 }, { "epoch": 1.06, "grad_norm": 0.45548710227012634, "learning_rate": 0.00043348809787491905, "loss": 1.8156, "step": 31850 }, { "epoch": 1.06, "grad_norm": 0.4598369598388672, "learning_rate": 0.00043347873739803006, "loss": 1.7691, "step": 31851 }, { "epoch": 1.06, "grad_norm": 0.45375731587409973, "learning_rate": 0.0004334693767591175, "loss": 1.7665, "step": 31852 }, { "epoch": 1.06, "grad_norm": 0.46658948063850403, "learning_rate": 0.00043346001595819287, "loss": 1.8742, "step": 31853 }, { "epoch": 1.06, "grad_norm": 0.48021283745765686, "learning_rate": 0.0004334506549952675, "loss": 1.8456, "step": 31854 }, { "epoch": 1.06, "grad_norm": 0.45267459750175476, "learning_rate": 0.0004334412938703525, "loss": 1.7677, "step": 31855 }, { "epoch": 1.06, "grad_norm": 0.4724957048892975, "learning_rate": 0.0004334319325834596, "loss": 1.8916, "step": 31856 }, { "epoch": 1.06, "grad_norm": 0.4904216229915619, "learning_rate": 0.0004334225711345999, "loss": 1.8176, "step": 31857 }, { "epoch": 1.06, "grad_norm": 0.4755067825317383, "learning_rate": 0.0004334132095237849, "loss": 1.7818, "step": 31858 }, { "epoch": 1.06, "grad_norm": 0.49147024750709534, "learning_rate": 0.00043340384775102586, "loss": 1.8316, "step": 31859 }, { "epoch": 1.06, "grad_norm": 0.4698188900947571, "learning_rate": 0.00043339448581633424, "loss": 1.7848, "step": 31860 }, { "epoch": 1.06, "grad_norm": 0.4598499536514282, "learning_rate": 0.0004333851237197214, "loss": 1.8228, "step": 31861 }, { "epoch": 1.06, "grad_norm": 0.45522287487983704, "learning_rate": 0.0004333757614611987, "loss": 1.7056, "step": 31862 }, { "epoch": 1.06, "grad_norm": 0.48579391837120056, "learning_rate": 0.0004333663990407774, "loss": 1.8342, "step": 31863 }, { "epoch": 1.06, "grad_norm": 0.48124411702156067, "learning_rate": 0.00043335703645846894, "loss": 1.7575, "step": 31864 }, { "epoch": 1.06, "grad_norm": 0.46871933341026306, "learning_rate": 0.0004333476737142847, "loss": 1.8674, "step": 31865 }, { "epoch": 1.06, "grad_norm": 0.45981425046920776, "learning_rate": 0.00043333831080823604, "loss": 1.7886, "step": 31866 }, { "epoch": 1.06, "grad_norm": 0.44554632902145386, "learning_rate": 0.0004333289477403344, "loss": 1.7703, "step": 31867 }, { "epoch": 1.06, "grad_norm": 0.46716320514678955, "learning_rate": 0.000433319584510591, "loss": 1.8411, "step": 31868 }, { "epoch": 1.06, "grad_norm": 0.466703861951828, "learning_rate": 0.00043331022111901726, "loss": 1.8692, "step": 31869 }, { "epoch": 1.06, "grad_norm": 0.4656526744365692, "learning_rate": 0.00043330085756562454, "loss": 1.7791, "step": 31870 }, { "epoch": 1.06, "grad_norm": 0.4679168164730072, "learning_rate": 0.0004332914938504242, "loss": 1.8654, "step": 31871 }, { "epoch": 1.06, "grad_norm": 0.45652472972869873, "learning_rate": 0.0004332821299734277, "loss": 1.7986, "step": 31872 }, { "epoch": 1.06, "grad_norm": 0.4689168930053711, "learning_rate": 0.0004332727659346463, "loss": 1.7087, "step": 31873 }, { "epoch": 1.06, "grad_norm": 0.4552028477191925, "learning_rate": 0.00043326340173409133, "loss": 1.8357, "step": 31874 }, { "epoch": 1.06, "grad_norm": 0.4669313132762909, "learning_rate": 0.0004332540373717743, "loss": 1.7753, "step": 31875 }, { "epoch": 1.06, "grad_norm": 0.4557361304759979, "learning_rate": 0.0004332446728477065, "loss": 1.8158, "step": 31876 }, { "epoch": 1.06, "grad_norm": 0.4798116087913513, "learning_rate": 0.0004332353081618993, "loss": 1.8022, "step": 31877 }, { "epoch": 1.06, "grad_norm": 0.4527440071105957, "learning_rate": 0.000433225943314364, "loss": 1.7568, "step": 31878 }, { "epoch": 1.06, "grad_norm": 0.4523206949234009, "learning_rate": 0.0004332165783051121, "loss": 1.7689, "step": 31879 }, { "epoch": 1.06, "grad_norm": 0.4608204662799835, "learning_rate": 0.00043320721313415487, "loss": 1.8935, "step": 31880 }, { "epoch": 1.06, "grad_norm": 0.4703522324562073, "learning_rate": 0.00043319784780150377, "loss": 1.7926, "step": 31881 }, { "epoch": 1.06, "grad_norm": 0.44730767607688904, "learning_rate": 0.00043318848230717005, "loss": 1.7467, "step": 31882 }, { "epoch": 1.06, "grad_norm": 0.44798117876052856, "learning_rate": 0.0004331791166511652, "loss": 1.7312, "step": 31883 }, { "epoch": 1.06, "grad_norm": 0.4780224561691284, "learning_rate": 0.00043316975083350053, "loss": 1.8234, "step": 31884 }, { "epoch": 1.06, "grad_norm": 0.45463016629219055, "learning_rate": 0.0004331603848541873, "loss": 1.8053, "step": 31885 }, { "epoch": 1.06, "grad_norm": 0.4675486981868744, "learning_rate": 0.00043315101871323707, "loss": 1.7372, "step": 31886 }, { "epoch": 1.06, "grad_norm": 0.4480810761451721, "learning_rate": 0.00043314165241066104, "loss": 1.8059, "step": 31887 }, { "epoch": 1.06, "grad_norm": 0.4526912271976471, "learning_rate": 0.00043313228594647074, "loss": 1.8168, "step": 31888 }, { "epoch": 1.06, "grad_norm": 0.4718967378139496, "learning_rate": 0.0004331229193206774, "loss": 1.7749, "step": 31889 }, { "epoch": 1.06, "grad_norm": 0.45887085795402527, "learning_rate": 0.0004331135525332925, "loss": 1.7729, "step": 31890 }, { "epoch": 1.06, "grad_norm": 0.4643065333366394, "learning_rate": 0.00043310418558432733, "loss": 1.7857, "step": 31891 }, { "epoch": 1.06, "grad_norm": 0.4686740040779114, "learning_rate": 0.0004330948184737933, "loss": 1.877, "step": 31892 }, { "epoch": 1.06, "grad_norm": 0.46516668796539307, "learning_rate": 0.00043308545120170186, "loss": 1.7875, "step": 31893 }, { "epoch": 1.06, "grad_norm": 0.4539830982685089, "learning_rate": 0.00043307608376806417, "loss": 1.7889, "step": 31894 }, { "epoch": 1.06, "grad_norm": 0.46700727939605713, "learning_rate": 0.00043306671617289173, "loss": 1.8307, "step": 31895 }, { "epoch": 1.06, "grad_norm": 0.4793752133846283, "learning_rate": 0.00043305734841619594, "loss": 1.9114, "step": 31896 }, { "epoch": 1.06, "grad_norm": 0.46165743470191956, "learning_rate": 0.00043304798049798813, "loss": 1.8066, "step": 31897 }, { "epoch": 1.06, "grad_norm": 0.4537595808506012, "learning_rate": 0.0004330386124182796, "loss": 1.7931, "step": 31898 }, { "epoch": 1.06, "grad_norm": 0.44824784994125366, "learning_rate": 0.00043302924417708196, "loss": 1.7841, "step": 31899 }, { "epoch": 1.06, "grad_norm": 0.4534985423088074, "learning_rate": 0.0004330198757744064, "loss": 1.7635, "step": 31900 }, { "epoch": 1.06, "grad_norm": 0.4739910960197449, "learning_rate": 0.00043301050721026413, "loss": 1.8647, "step": 31901 }, { "epoch": 1.06, "grad_norm": 0.4664298892021179, "learning_rate": 0.0004330011384846669, "loss": 1.84, "step": 31902 }, { "epoch": 1.06, "grad_norm": 0.4667174816131592, "learning_rate": 0.00043299176959762573, "loss": 1.8307, "step": 31903 }, { "epoch": 1.06, "grad_norm": 0.45759865641593933, "learning_rate": 0.0004329824005491522, "loss": 1.7991, "step": 31904 }, { "epoch": 1.06, "grad_norm": 0.481799453496933, "learning_rate": 0.00043297303133925765, "loss": 1.7995, "step": 31905 }, { "epoch": 1.06, "grad_norm": 0.4601824879646301, "learning_rate": 0.00043296366196795346, "loss": 1.7985, "step": 31906 }, { "epoch": 1.06, "grad_norm": 0.45472604036331177, "learning_rate": 0.00043295429243525097, "loss": 1.7507, "step": 31907 }, { "epoch": 1.06, "grad_norm": 0.470944344997406, "learning_rate": 0.00043294492274116147, "loss": 1.8411, "step": 31908 }, { "epoch": 1.06, "grad_norm": 0.46454089879989624, "learning_rate": 0.00043293555288569655, "loss": 1.8666, "step": 31909 }, { "epoch": 1.06, "grad_norm": 0.4678175151348114, "learning_rate": 0.00043292618286886733, "loss": 1.8524, "step": 31910 }, { "epoch": 1.06, "grad_norm": 0.4674336910247803, "learning_rate": 0.00043291681269068546, "loss": 1.9258, "step": 31911 }, { "epoch": 1.06, "grad_norm": 0.4419252872467041, "learning_rate": 0.000432907442351162, "loss": 1.8254, "step": 31912 }, { "epoch": 1.06, "grad_norm": 0.47627758979797363, "learning_rate": 0.0004328980718503086, "loss": 1.776, "step": 31913 }, { "epoch": 1.06, "grad_norm": 0.4400685727596283, "learning_rate": 0.00043288870118813645, "loss": 1.7728, "step": 31914 }, { "epoch": 1.06, "grad_norm": 0.445598304271698, "learning_rate": 0.000432879330364657, "loss": 1.7872, "step": 31915 }, { "epoch": 1.06, "grad_norm": 0.46660682559013367, "learning_rate": 0.0004328699593798817, "loss": 1.775, "step": 31916 }, { "epoch": 1.06, "grad_norm": 0.4625680446624756, "learning_rate": 0.00043286058823382176, "loss": 1.8835, "step": 31917 }, { "epoch": 1.06, "grad_norm": 0.4485461413860321, "learning_rate": 0.0004328512169264887, "loss": 1.7968, "step": 31918 }, { "epoch": 1.06, "grad_norm": 0.473623126745224, "learning_rate": 0.0004328418454578938, "loss": 1.834, "step": 31919 }, { "epoch": 1.06, "grad_norm": 0.46005138754844666, "learning_rate": 0.0004328324738280485, "loss": 1.7644, "step": 31920 }, { "epoch": 1.06, "grad_norm": 0.44858771562576294, "learning_rate": 0.00043282310203696415, "loss": 1.8031, "step": 31921 }, { "epoch": 1.06, "grad_norm": 0.45608630776405334, "learning_rate": 0.0004328137300846521, "loss": 1.8239, "step": 31922 }, { "epoch": 1.06, "grad_norm": 0.4444413185119629, "learning_rate": 0.00043280435797112384, "loss": 1.7793, "step": 31923 }, { "epoch": 1.06, "grad_norm": 0.47025004029273987, "learning_rate": 0.0004327949856963905, "loss": 1.7405, "step": 31924 }, { "epoch": 1.06, "grad_norm": 0.4599458575248718, "learning_rate": 0.0004327856132604637, "loss": 1.7901, "step": 31925 }, { "epoch": 1.06, "grad_norm": 0.4687764346599579, "learning_rate": 0.00043277624066335476, "loss": 1.8782, "step": 31926 }, { "epoch": 1.06, "grad_norm": 0.46674782037734985, "learning_rate": 0.000432766867905075, "loss": 1.743, "step": 31927 }, { "epoch": 1.06, "grad_norm": 0.46736758947372437, "learning_rate": 0.0004327574949856358, "loss": 1.7946, "step": 31928 }, { "epoch": 1.06, "grad_norm": 0.6612487435340881, "learning_rate": 0.00043274812190504866, "loss": 1.6799, "step": 31929 }, { "epoch": 1.06, "grad_norm": 0.46762314438819885, "learning_rate": 0.0004327387486633247, "loss": 1.8167, "step": 31930 }, { "epoch": 1.06, "grad_norm": 0.4700929522514343, "learning_rate": 0.00043272937526047555, "loss": 1.8422, "step": 31931 }, { "epoch": 1.06, "grad_norm": 0.46989479660987854, "learning_rate": 0.00043272000169651255, "loss": 1.7744, "step": 31932 }, { "epoch": 1.06, "grad_norm": 0.46692782640457153, "learning_rate": 0.00043271062797144687, "loss": 1.8194, "step": 31933 }, { "epoch": 1.06, "grad_norm": 0.4660995900630951, "learning_rate": 0.00043270125408529015, "loss": 1.849, "step": 31934 }, { "epoch": 1.06, "grad_norm": 0.4582670032978058, "learning_rate": 0.0004326918800380536, "loss": 1.7396, "step": 31935 }, { "epoch": 1.06, "grad_norm": 0.46349000930786133, "learning_rate": 0.0004326825058297488, "loss": 1.7417, "step": 31936 }, { "epoch": 1.06, "grad_norm": 1.5341508388519287, "learning_rate": 0.0004326731314603868, "loss": 1.8315, "step": 31937 }, { "epoch": 1.06, "grad_norm": 0.47735410928726196, "learning_rate": 0.00043266375692997927, "loss": 1.7672, "step": 31938 }, { "epoch": 1.06, "grad_norm": 0.47403958439826965, "learning_rate": 0.0004326543822385375, "loss": 1.7962, "step": 31939 }, { "epoch": 1.06, "grad_norm": 0.45743462443351746, "learning_rate": 0.0004326450073860727, "loss": 1.7767, "step": 31940 }, { "epoch": 1.06, "grad_norm": 0.45853325724601746, "learning_rate": 0.0004326356323725967, "loss": 1.7903, "step": 31941 }, { "epoch": 1.06, "grad_norm": 1.126201868057251, "learning_rate": 0.0004326262571981203, "loss": 1.837, "step": 31942 }, { "epoch": 1.06, "grad_norm": 0.4850222170352936, "learning_rate": 0.0004326168818626553, "loss": 1.7701, "step": 31943 }, { "epoch": 1.06, "grad_norm": 0.48405003547668457, "learning_rate": 0.0004326075063662129, "loss": 1.8424, "step": 31944 }, { "epoch": 1.06, "grad_norm": 0.4510970115661621, "learning_rate": 0.0004325981307088046, "loss": 1.8347, "step": 31945 }, { "epoch": 1.06, "grad_norm": 0.4668722152709961, "learning_rate": 0.0004325887548904417, "loss": 1.7568, "step": 31946 }, { "epoch": 1.06, "grad_norm": 0.47591155767440796, "learning_rate": 0.00043257937891113553, "loss": 1.8246, "step": 31947 }, { "epoch": 1.06, "grad_norm": 0.4850159287452698, "learning_rate": 0.0004325700027708975, "loss": 1.8114, "step": 31948 }, { "epoch": 1.06, "grad_norm": 0.4450284242630005, "learning_rate": 0.00043256062646973906, "loss": 1.7801, "step": 31949 }, { "epoch": 1.06, "grad_norm": 0.4597909152507782, "learning_rate": 0.0004325512500076716, "loss": 1.804, "step": 31950 }, { "epoch": 1.06, "grad_norm": 0.46785449981689453, "learning_rate": 0.0004325418733847063, "loss": 1.7877, "step": 31951 }, { "epoch": 1.06, "grad_norm": 0.4739200472831726, "learning_rate": 0.00043253249660085496, "loss": 1.7887, "step": 31952 }, { "epoch": 1.06, "grad_norm": 0.4587794244289398, "learning_rate": 0.00043252311965612835, "loss": 1.6822, "step": 31953 }, { "epoch": 1.06, "grad_norm": 0.4469740092754364, "learning_rate": 0.0004325137425505385, "loss": 1.8106, "step": 31954 }, { "epoch": 1.06, "grad_norm": 0.481827974319458, "learning_rate": 0.0004325043652840964, "loss": 1.8322, "step": 31955 }, { "epoch": 1.06, "grad_norm": 0.45719027519226074, "learning_rate": 0.00043249498785681345, "loss": 1.8236, "step": 31956 }, { "epoch": 1.06, "grad_norm": 0.44759175181388855, "learning_rate": 0.0004324856102687012, "loss": 1.8435, "step": 31957 }, { "epoch": 1.06, "grad_norm": 0.48522061109542847, "learning_rate": 0.0004324762325197709, "loss": 1.8001, "step": 31958 }, { "epoch": 1.06, "grad_norm": 0.47444066405296326, "learning_rate": 0.000432466854610034, "loss": 1.7737, "step": 31959 }, { "epoch": 1.06, "grad_norm": 0.45447543263435364, "learning_rate": 0.00043245747653950173, "loss": 1.7848, "step": 31960 }, { "epoch": 1.06, "grad_norm": 0.44832608103752136, "learning_rate": 0.0004324480983081858, "loss": 1.8191, "step": 31961 }, { "epoch": 1.06, "grad_norm": 0.4473360478878021, "learning_rate": 0.00043243871991609727, "loss": 1.8098, "step": 31962 }, { "epoch": 1.06, "grad_norm": 0.44746002554893494, "learning_rate": 0.00043242934136324764, "loss": 1.8332, "step": 31963 }, { "epoch": 1.06, "grad_norm": 0.4630543291568756, "learning_rate": 0.00043241996264964844, "loss": 1.8229, "step": 31964 }, { "epoch": 1.06, "grad_norm": 0.45574498176574707, "learning_rate": 0.00043241058377531074, "loss": 1.7734, "step": 31965 }, { "epoch": 1.06, "grad_norm": 0.4484263062477112, "learning_rate": 0.0004324012047402462, "loss": 1.7925, "step": 31966 }, { "epoch": 1.06, "grad_norm": 0.47221702337265015, "learning_rate": 0.0004323918255444661, "loss": 1.797, "step": 31967 }, { "epoch": 1.06, "grad_norm": 0.45127856731414795, "learning_rate": 0.0004323824461879817, "loss": 1.7856, "step": 31968 }, { "epoch": 1.06, "grad_norm": 0.48141947388648987, "learning_rate": 0.0004323730666708047, "loss": 1.8333, "step": 31969 }, { "epoch": 1.06, "grad_norm": 0.4598950147628784, "learning_rate": 0.0004323636869929462, "loss": 1.7971, "step": 31970 }, { "epoch": 1.06, "grad_norm": 0.45666882395744324, "learning_rate": 0.00043235430715441776, "loss": 1.8382, "step": 31971 }, { "epoch": 1.06, "grad_norm": 0.46029773354530334, "learning_rate": 0.00043234492715523066, "loss": 1.8322, "step": 31972 }, { "epoch": 1.06, "grad_norm": 0.4559902250766754, "learning_rate": 0.00043233554699539624, "loss": 1.755, "step": 31973 }, { "epoch": 1.06, "grad_norm": 0.47631046175956726, "learning_rate": 0.00043232616667492605, "loss": 1.8126, "step": 31974 }, { "epoch": 1.06, "grad_norm": 0.4710337221622467, "learning_rate": 0.00043231678619383136, "loss": 1.8968, "step": 31975 }, { "epoch": 1.06, "grad_norm": 0.4579276740550995, "learning_rate": 0.00043230740555212356, "loss": 1.7226, "step": 31976 }, { "epoch": 1.06, "grad_norm": 0.458613783121109, "learning_rate": 0.0004322980247498142, "loss": 1.8061, "step": 31977 }, { "epoch": 1.06, "grad_norm": 0.4608946442604065, "learning_rate": 0.0004322886437869144, "loss": 1.837, "step": 31978 }, { "epoch": 1.06, "grad_norm": 0.45647382736206055, "learning_rate": 0.0004322792626634357, "loss": 1.852, "step": 31979 }, { "epoch": 1.06, "grad_norm": 0.46153008937835693, "learning_rate": 0.00043226988137938944, "loss": 1.7309, "step": 31980 }, { "epoch": 1.06, "grad_norm": 0.45309770107269287, "learning_rate": 0.0004322604999347871, "loss": 1.7614, "step": 31981 }, { "epoch": 1.06, "grad_norm": 0.4464251399040222, "learning_rate": 0.00043225111832963996, "loss": 1.7489, "step": 31982 }, { "epoch": 1.06, "grad_norm": 0.48491984605789185, "learning_rate": 0.0004322417365639595, "loss": 1.7667, "step": 31983 }, { "epoch": 1.06, "grad_norm": 0.47234460711479187, "learning_rate": 0.00043223235463775705, "loss": 1.8023, "step": 31984 }, { "epoch": 1.06, "grad_norm": 0.47875723242759705, "learning_rate": 0.0004322229725510439, "loss": 1.7834, "step": 31985 }, { "epoch": 1.06, "grad_norm": 0.4719995856285095, "learning_rate": 0.0004322135903038316, "loss": 1.8022, "step": 31986 }, { "epoch": 1.06, "grad_norm": 0.4577372968196869, "learning_rate": 0.0004322042078961315, "loss": 1.7356, "step": 31987 }, { "epoch": 1.06, "grad_norm": 0.4875301122665405, "learning_rate": 0.000432194825327955, "loss": 1.773, "step": 31988 }, { "epoch": 1.06, "grad_norm": 0.4732813835144043, "learning_rate": 0.00043218544259931345, "loss": 1.8654, "step": 31989 }, { "epoch": 1.06, "grad_norm": 0.4572300910949707, "learning_rate": 0.00043217605971021815, "loss": 1.7891, "step": 31990 }, { "epoch": 1.06, "grad_norm": 0.4600568115711212, "learning_rate": 0.0004321666766606807, "loss": 1.8238, "step": 31991 }, { "epoch": 1.06, "grad_norm": 0.48033156991004944, "learning_rate": 0.00043215729345071237, "loss": 1.7924, "step": 31992 }, { "epoch": 1.06, "grad_norm": 0.4964337944984436, "learning_rate": 0.00043214791008032454, "loss": 1.8002, "step": 31993 }, { "epoch": 1.06, "grad_norm": 0.45908573269844055, "learning_rate": 0.00043213852654952863, "loss": 1.8127, "step": 31994 }, { "epoch": 1.06, "grad_norm": 0.44605737924575806, "learning_rate": 0.00043212914285833596, "loss": 1.9006, "step": 31995 }, { "epoch": 1.06, "grad_norm": 0.46679773926734924, "learning_rate": 0.000432119759006758, "loss": 1.7445, "step": 31996 }, { "epoch": 1.06, "grad_norm": 0.4818527400493622, "learning_rate": 0.00043211037499480613, "loss": 1.8229, "step": 31997 }, { "epoch": 1.06, "grad_norm": 0.45859605073928833, "learning_rate": 0.0004321009908224918, "loss": 1.7305, "step": 31998 }, { "epoch": 1.06, "grad_norm": 0.4780123233795166, "learning_rate": 0.00043209160648982625, "loss": 1.8744, "step": 31999 }, { "epoch": 1.06, "grad_norm": 0.4420674443244934, "learning_rate": 0.00043208222199682097, "loss": 1.8365, "step": 32000 }, { "epoch": 1.06, "grad_norm": 0.45802268385887146, "learning_rate": 0.0004320728373434874, "loss": 1.7541, "step": 32001 }, { "epoch": 1.06, "grad_norm": 0.4802634119987488, "learning_rate": 0.0004320634525298367, "loss": 1.6977, "step": 32002 }, { "epoch": 1.06, "grad_norm": 0.4736456274986267, "learning_rate": 0.0004320540675558806, "loss": 1.7908, "step": 32003 }, { "epoch": 1.06, "grad_norm": 0.6154891848564148, "learning_rate": 0.00043204468242163024, "loss": 1.8562, "step": 32004 }, { "epoch": 1.06, "grad_norm": 0.462119460105896, "learning_rate": 0.0004320352971270971, "loss": 1.7933, "step": 32005 }, { "epoch": 1.06, "grad_norm": 0.4710610508918762, "learning_rate": 0.0004320259116722925, "loss": 1.8042, "step": 32006 }, { "epoch": 1.06, "grad_norm": 0.43561238050460815, "learning_rate": 0.00043201652605722796, "loss": 1.7528, "step": 32007 }, { "epoch": 1.06, "grad_norm": 0.45915284752845764, "learning_rate": 0.0004320071402819148, "loss": 1.824, "step": 32008 }, { "epoch": 1.06, "grad_norm": 0.46327313780784607, "learning_rate": 0.0004319977543463645, "loss": 1.7913, "step": 32009 }, { "epoch": 1.06, "grad_norm": 0.458138644695282, "learning_rate": 0.0004319883682505883, "loss": 1.7405, "step": 32010 }, { "epoch": 1.07, "grad_norm": 0.45967841148376465, "learning_rate": 0.00043197898199459763, "loss": 1.7681, "step": 32011 }, { "epoch": 1.07, "grad_norm": 0.5008245706558228, "learning_rate": 0.000431969595578404, "loss": 1.8667, "step": 32012 }, { "epoch": 1.07, "grad_norm": 0.4491952955722809, "learning_rate": 0.00043196020900201866, "loss": 1.8127, "step": 32013 }, { "epoch": 1.07, "grad_norm": 0.4834361970424652, "learning_rate": 0.00043195082226545316, "loss": 1.7779, "step": 32014 }, { "epoch": 1.07, "grad_norm": 0.48427483439445496, "learning_rate": 0.00043194143536871865, "loss": 1.8478, "step": 32015 }, { "epoch": 1.07, "grad_norm": 0.469593346118927, "learning_rate": 0.0004319320483118268, "loss": 1.8136, "step": 32016 }, { "epoch": 1.07, "grad_norm": 0.4722680151462555, "learning_rate": 0.0004319226610947889, "loss": 1.7912, "step": 32017 }, { "epoch": 1.07, "grad_norm": 0.4603103995323181, "learning_rate": 0.0004319132737176163, "loss": 1.764, "step": 32018 }, { "epoch": 1.07, "grad_norm": 0.48715490102767944, "learning_rate": 0.00043190388618032045, "loss": 1.706, "step": 32019 }, { "epoch": 1.07, "grad_norm": 0.4507020115852356, "learning_rate": 0.0004318944984829126, "loss": 1.8416, "step": 32020 }, { "epoch": 1.07, "grad_norm": 0.46185439825057983, "learning_rate": 0.00043188511062540436, "loss": 1.8481, "step": 32021 }, { "epoch": 1.07, "grad_norm": 0.45979586243629456, "learning_rate": 0.0004318757226078069, "loss": 1.7857, "step": 32022 }, { "epoch": 1.07, "grad_norm": 0.4591594934463501, "learning_rate": 0.0004318663344301319, "loss": 1.8276, "step": 32023 }, { "epoch": 1.07, "grad_norm": 0.45257923007011414, "learning_rate": 0.00043185694609239057, "loss": 1.7251, "step": 32024 }, { "epoch": 1.07, "grad_norm": 0.45947760343551636, "learning_rate": 0.0004318475575945943, "loss": 1.7593, "step": 32025 }, { "epoch": 1.07, "grad_norm": 0.4542548358440399, "learning_rate": 0.0004318381689367546, "loss": 1.782, "step": 32026 }, { "epoch": 1.07, "grad_norm": 0.4705214500427246, "learning_rate": 0.0004318287801188826, "loss": 1.7606, "step": 32027 }, { "epoch": 1.07, "grad_norm": 0.7306826710700989, "learning_rate": 0.0004318193911409901, "loss": 1.8019, "step": 32028 }, { "epoch": 1.07, "grad_norm": 0.45522433519363403, "learning_rate": 0.0004318100020030882, "loss": 1.7701, "step": 32029 }, { "epoch": 1.07, "grad_norm": 0.47466710209846497, "learning_rate": 0.0004318006127051884, "loss": 1.8041, "step": 32030 }, { "epoch": 1.07, "grad_norm": 0.4653339385986328, "learning_rate": 0.00043179122324730205, "loss": 1.8015, "step": 32031 }, { "epoch": 1.07, "grad_norm": 0.4733029007911682, "learning_rate": 0.00043178183362944055, "loss": 1.8438, "step": 32032 }, { "epoch": 1.07, "grad_norm": 0.4581495523452759, "learning_rate": 0.0004317724438516153, "loss": 1.8144, "step": 32033 }, { "epoch": 1.07, "grad_norm": 0.4660431444644928, "learning_rate": 0.0004317630539138378, "loss": 1.8279, "step": 32034 }, { "epoch": 1.07, "grad_norm": 0.456494003534317, "learning_rate": 0.0004317536638161193, "loss": 1.7577, "step": 32035 }, { "epoch": 1.07, "grad_norm": 0.4528043866157532, "learning_rate": 0.00043174427355847125, "loss": 1.8433, "step": 32036 }, { "epoch": 1.07, "grad_norm": 0.46360695362091064, "learning_rate": 0.00043173488314090516, "loss": 1.8221, "step": 32037 }, { "epoch": 1.07, "grad_norm": 0.4563506841659546, "learning_rate": 0.00043172549256343225, "loss": 1.7999, "step": 32038 }, { "epoch": 1.07, "grad_norm": 0.46331799030303955, "learning_rate": 0.0004317161018260641, "loss": 1.8293, "step": 32039 }, { "epoch": 1.07, "grad_norm": 0.4683782458305359, "learning_rate": 0.00043170671092881193, "loss": 1.8537, "step": 32040 }, { "epoch": 1.07, "grad_norm": 0.4515380263328552, "learning_rate": 0.00043169731987168717, "loss": 1.8141, "step": 32041 }, { "epoch": 1.07, "grad_norm": 0.4433409571647644, "learning_rate": 0.0004316879286547013, "loss": 1.8456, "step": 32042 }, { "epoch": 1.07, "grad_norm": 0.4385325014591217, "learning_rate": 0.0004316785372778657, "loss": 1.7738, "step": 32043 }, { "epoch": 1.07, "grad_norm": 0.4560108780860901, "learning_rate": 0.0004316691457411918, "loss": 1.7556, "step": 32044 }, { "epoch": 1.07, "grad_norm": 0.46275413036346436, "learning_rate": 0.0004316597540446909, "loss": 1.8752, "step": 32045 }, { "epoch": 1.07, "grad_norm": 0.46398547291755676, "learning_rate": 0.00043165036218837456, "loss": 1.723, "step": 32046 }, { "epoch": 1.07, "grad_norm": 0.483715295791626, "learning_rate": 0.000431640970172254, "loss": 1.8353, "step": 32047 }, { "epoch": 1.07, "grad_norm": 0.4531111419200897, "learning_rate": 0.00043163157799634065, "loss": 1.7423, "step": 32048 }, { "epoch": 1.07, "grad_norm": 0.44649118185043335, "learning_rate": 0.00043162218566064603, "loss": 1.7292, "step": 32049 }, { "epoch": 1.07, "grad_norm": 0.47431692481040955, "learning_rate": 0.00043161279316518143, "loss": 1.8068, "step": 32050 }, { "epoch": 1.07, "grad_norm": 0.46605926752090454, "learning_rate": 0.00043160340050995834, "loss": 1.8024, "step": 32051 }, { "epoch": 1.07, "grad_norm": 0.44853082299232483, "learning_rate": 0.0004315940076949881, "loss": 1.7824, "step": 32052 }, { "epoch": 1.07, "grad_norm": 0.4696383476257324, "learning_rate": 0.0004315846147202821, "loss": 1.7921, "step": 32053 }, { "epoch": 1.07, "grad_norm": 0.4615956246852875, "learning_rate": 0.0004315752215858518, "loss": 1.8278, "step": 32054 }, { "epoch": 1.07, "grad_norm": 0.44813263416290283, "learning_rate": 0.00043156582829170846, "loss": 1.8067, "step": 32055 }, { "epoch": 1.07, "grad_norm": 0.46676310896873474, "learning_rate": 0.00043155643483786373, "loss": 1.8217, "step": 32056 }, { "epoch": 1.07, "grad_norm": 0.45615461468696594, "learning_rate": 0.0004315470412243288, "loss": 1.8235, "step": 32057 }, { "epoch": 1.07, "grad_norm": 0.45565861463546753, "learning_rate": 0.00043153764745111514, "loss": 1.7968, "step": 32058 }, { "epoch": 1.07, "grad_norm": 0.45342060923576355, "learning_rate": 0.0004315282535182342, "loss": 1.8287, "step": 32059 }, { "epoch": 1.07, "grad_norm": 0.4488725960254669, "learning_rate": 0.0004315188594256973, "loss": 1.8041, "step": 32060 }, { "epoch": 1.07, "grad_norm": 0.4622395634651184, "learning_rate": 0.0004315094651735159, "loss": 1.7261, "step": 32061 }, { "epoch": 1.07, "grad_norm": 0.4523282051086426, "learning_rate": 0.0004315000707617014, "loss": 1.7684, "step": 32062 }, { "epoch": 1.07, "grad_norm": 0.46260693669319153, "learning_rate": 0.0004314906761902651, "loss": 1.8614, "step": 32063 }, { "epoch": 1.07, "grad_norm": 0.44903498888015747, "learning_rate": 0.00043148128145921867, "loss": 1.8569, "step": 32064 }, { "epoch": 1.07, "grad_norm": 0.4829438328742981, "learning_rate": 0.0004314718865685732, "loss": 1.7626, "step": 32065 }, { "epoch": 1.07, "grad_norm": 0.4481782913208008, "learning_rate": 0.00043146249151834027, "loss": 1.7697, "step": 32066 }, { "epoch": 1.07, "grad_norm": 0.4448455572128296, "learning_rate": 0.00043145309630853124, "loss": 1.7875, "step": 32067 }, { "epoch": 1.07, "grad_norm": 0.45543980598449707, "learning_rate": 0.00043144370093915757, "loss": 1.7945, "step": 32068 }, { "epoch": 1.07, "grad_norm": 0.4427250921726227, "learning_rate": 0.00043143430541023054, "loss": 1.7345, "step": 32069 }, { "epoch": 1.07, "grad_norm": 0.4693766236305237, "learning_rate": 0.00043142490972176165, "loss": 1.8321, "step": 32070 }, { "epoch": 1.07, "grad_norm": 0.45926356315612793, "learning_rate": 0.0004314155138737623, "loss": 1.8601, "step": 32071 }, { "epoch": 1.07, "grad_norm": 0.4745043218135834, "learning_rate": 0.0004314061178662439, "loss": 1.7787, "step": 32072 }, { "epoch": 1.07, "grad_norm": 0.4470473527908325, "learning_rate": 0.0004313967216992177, "loss": 1.794, "step": 32073 }, { "epoch": 1.07, "grad_norm": 0.4817143678665161, "learning_rate": 0.00043138732537269546, "loss": 1.8479, "step": 32074 }, { "epoch": 1.07, "grad_norm": 0.4575020968914032, "learning_rate": 0.00043137792888668815, "loss": 1.8457, "step": 32075 }, { "epoch": 1.07, "grad_norm": 0.47779521346092224, "learning_rate": 0.0004313685322412076, "loss": 1.8004, "step": 32076 }, { "epoch": 1.07, "grad_norm": 0.4584531784057617, "learning_rate": 0.00043135913543626477, "loss": 1.7202, "step": 32077 }, { "epoch": 1.07, "grad_norm": 0.4476531147956848, "learning_rate": 0.00043134973847187147, "loss": 1.7266, "step": 32078 }, { "epoch": 1.07, "grad_norm": 0.4537729322910309, "learning_rate": 0.0004313403413480389, "loss": 1.7922, "step": 32079 }, { "epoch": 1.07, "grad_norm": 0.46328726410865784, "learning_rate": 0.0004313309440647785, "loss": 1.7708, "step": 32080 }, { "epoch": 1.07, "grad_norm": 0.4764525890350342, "learning_rate": 0.0004313215466221017, "loss": 1.7587, "step": 32081 }, { "epoch": 1.07, "grad_norm": 0.4578532576560974, "learning_rate": 0.0004313121490200199, "loss": 1.7793, "step": 32082 }, { "epoch": 1.07, "grad_norm": 0.46693822741508484, "learning_rate": 0.0004313027512585445, "loss": 1.7945, "step": 32083 }, { "epoch": 1.07, "grad_norm": 0.4572669565677643, "learning_rate": 0.00043129335333768693, "loss": 1.821, "step": 32084 }, { "epoch": 1.07, "grad_norm": 0.4723133444786072, "learning_rate": 0.0004312839552574585, "loss": 1.7201, "step": 32085 }, { "epoch": 1.07, "grad_norm": 0.45470157265663147, "learning_rate": 0.00043127455701787073, "loss": 1.7617, "step": 32086 }, { "epoch": 1.07, "grad_norm": 0.45955124497413635, "learning_rate": 0.000431265158618935, "loss": 1.8416, "step": 32087 }, { "epoch": 1.07, "grad_norm": 0.46129247546195984, "learning_rate": 0.00043125576006066275, "loss": 1.827, "step": 32088 }, { "epoch": 1.07, "grad_norm": 0.47997990250587463, "learning_rate": 0.00043124636134306526, "loss": 1.8306, "step": 32089 }, { "epoch": 1.07, "grad_norm": 0.4655050039291382, "learning_rate": 0.00043123696246615413, "loss": 1.7971, "step": 32090 }, { "epoch": 1.07, "grad_norm": 0.4690958261489868, "learning_rate": 0.0004312275634299405, "loss": 1.7459, "step": 32091 }, { "epoch": 1.07, "grad_norm": 0.4654059112071991, "learning_rate": 0.0004312181642344361, "loss": 1.827, "step": 32092 }, { "epoch": 1.07, "grad_norm": 0.463861346244812, "learning_rate": 0.0004312087648796521, "loss": 1.7394, "step": 32093 }, { "epoch": 1.07, "grad_norm": 0.4581975042819977, "learning_rate": 0.0004311993653656001, "loss": 1.7983, "step": 32094 }, { "epoch": 1.07, "grad_norm": 0.449388325214386, "learning_rate": 0.0004311899656922913, "loss": 1.8021, "step": 32095 }, { "epoch": 1.07, "grad_norm": 0.476857990026474, "learning_rate": 0.0004311805658597372, "loss": 1.8087, "step": 32096 }, { "epoch": 1.07, "grad_norm": 0.4594484865665436, "learning_rate": 0.0004311711658679492, "loss": 1.8206, "step": 32097 }, { "epoch": 1.07, "grad_norm": 0.4579324722290039, "learning_rate": 0.00043116176571693875, "loss": 1.8009, "step": 32098 }, { "epoch": 1.07, "grad_norm": 0.45473024249076843, "learning_rate": 0.0004311523654067174, "loss": 1.7978, "step": 32099 }, { "epoch": 1.07, "grad_norm": 0.45870015025138855, "learning_rate": 0.00043114296493729616, "loss": 1.8067, "step": 32100 }, { "epoch": 1.07, "grad_norm": 0.44648289680480957, "learning_rate": 0.00043113356430868685, "loss": 1.7909, "step": 32101 }, { "epoch": 1.07, "grad_norm": 0.4514462649822235, "learning_rate": 0.00043112416352090066, "loss": 1.7632, "step": 32102 }, { "epoch": 1.07, "grad_norm": 0.44451218843460083, "learning_rate": 0.00043111476257394905, "loss": 1.8101, "step": 32103 }, { "epoch": 1.07, "grad_norm": 0.45974200963974, "learning_rate": 0.0004311053614678435, "loss": 1.7349, "step": 32104 }, { "epoch": 1.07, "grad_norm": 0.4696798324584961, "learning_rate": 0.00043109596020259527, "loss": 1.7915, "step": 32105 }, { "epoch": 1.07, "grad_norm": 0.47239890694618225, "learning_rate": 0.00043108655877821593, "loss": 1.7762, "step": 32106 }, { "epoch": 1.07, "grad_norm": 0.45694324374198914, "learning_rate": 0.00043107715719471675, "loss": 1.7824, "step": 32107 }, { "epoch": 1.07, "grad_norm": 0.455220490694046, "learning_rate": 0.0004310677554521092, "loss": 1.7502, "step": 32108 }, { "epoch": 1.07, "grad_norm": 0.4633491039276123, "learning_rate": 0.00043105835355040483, "loss": 1.804, "step": 32109 }, { "epoch": 1.07, "grad_norm": 0.4627974331378937, "learning_rate": 0.0004310489514896148, "loss": 1.7712, "step": 32110 }, { "epoch": 1.07, "grad_norm": 0.46577224135398865, "learning_rate": 0.0004310395492697508, "loss": 1.7619, "step": 32111 }, { "epoch": 1.07, "grad_norm": 0.4816127419471741, "learning_rate": 0.00043103014689082394, "loss": 1.8855, "step": 32112 }, { "epoch": 1.07, "grad_norm": 0.47554251551628113, "learning_rate": 0.00043102074435284586, "loss": 1.7583, "step": 32113 }, { "epoch": 1.07, "grad_norm": 0.4776395261287689, "learning_rate": 0.0004310113416558279, "loss": 1.8547, "step": 32114 }, { "epoch": 1.07, "grad_norm": 0.45674872398376465, "learning_rate": 0.0004310019387997815, "loss": 1.7947, "step": 32115 }, { "epoch": 1.07, "grad_norm": 0.47505658864974976, "learning_rate": 0.000430992535784718, "loss": 1.8125, "step": 32116 }, { "epoch": 1.07, "grad_norm": 0.49759647250175476, "learning_rate": 0.0004309831326106489, "loss": 1.8266, "step": 32117 }, { "epoch": 1.07, "grad_norm": 0.6242601871490479, "learning_rate": 0.0004309737292775856, "loss": 1.7548, "step": 32118 }, { "epoch": 1.07, "grad_norm": 0.4720044732093811, "learning_rate": 0.0004309643257855394, "loss": 1.8244, "step": 32119 }, { "epoch": 1.07, "grad_norm": 0.46044614911079407, "learning_rate": 0.0004309549221345219, "loss": 1.758, "step": 32120 }, { "epoch": 1.07, "grad_norm": 0.4658181965351105, "learning_rate": 0.0004309455183245443, "loss": 1.7267, "step": 32121 }, { "epoch": 1.07, "grad_norm": 0.4505278170108795, "learning_rate": 0.0004309361143556182, "loss": 1.7065, "step": 32122 }, { "epoch": 1.07, "grad_norm": 0.4542495608329773, "learning_rate": 0.00043092671022775496, "loss": 1.748, "step": 32123 }, { "epoch": 1.07, "grad_norm": 0.4565831422805786, "learning_rate": 0.000430917305940966, "loss": 1.8139, "step": 32124 }, { "epoch": 1.07, "grad_norm": 0.47546064853668213, "learning_rate": 0.0004309079014952627, "loss": 1.7844, "step": 32125 }, { "epoch": 1.07, "grad_norm": 0.45380064845085144, "learning_rate": 0.00043089849689065653, "loss": 1.7571, "step": 32126 }, { "epoch": 1.07, "grad_norm": 0.46500149369239807, "learning_rate": 0.0004308890921271588, "loss": 1.8562, "step": 32127 }, { "epoch": 1.07, "grad_norm": 0.4717978835105896, "learning_rate": 0.0004308796872047811, "loss": 1.7752, "step": 32128 }, { "epoch": 1.07, "grad_norm": 0.46255210041999817, "learning_rate": 0.0004308702821235347, "loss": 1.7584, "step": 32129 }, { "epoch": 1.07, "grad_norm": 0.45406490564346313, "learning_rate": 0.000430860876883431, "loss": 1.8316, "step": 32130 }, { "epoch": 1.07, "grad_norm": 0.5046706795692444, "learning_rate": 0.00043085147148448156, "loss": 1.7593, "step": 32131 }, { "epoch": 1.07, "grad_norm": 0.457232803106308, "learning_rate": 0.0004308420659266976, "loss": 1.7675, "step": 32132 }, { "epoch": 1.07, "grad_norm": 0.44565147161483765, "learning_rate": 0.0004308326602100908, "loss": 1.8217, "step": 32133 }, { "epoch": 1.07, "grad_norm": 0.44801440834999084, "learning_rate": 0.00043082325433467235, "loss": 1.7022, "step": 32134 }, { "epoch": 1.07, "grad_norm": 0.4573105275630951, "learning_rate": 0.0004308138483004537, "loss": 1.7646, "step": 32135 }, { "epoch": 1.07, "grad_norm": 0.48437443375587463, "learning_rate": 0.00043080444210744643, "loss": 1.8039, "step": 32136 }, { "epoch": 1.07, "grad_norm": 0.4553254246711731, "learning_rate": 0.00043079503575566165, "loss": 1.779, "step": 32137 }, { "epoch": 1.07, "grad_norm": 0.44851964712142944, "learning_rate": 0.0004307856292451112, "loss": 1.7413, "step": 32138 }, { "epoch": 1.07, "grad_norm": 0.4591785967350006, "learning_rate": 0.0004307762225758061, "loss": 1.7476, "step": 32139 }, { "epoch": 1.07, "grad_norm": 0.5108106732368469, "learning_rate": 0.000430766815747758, "loss": 1.785, "step": 32140 }, { "epoch": 1.07, "grad_norm": 0.4867216944694519, "learning_rate": 0.0004307574087609783, "loss": 1.8189, "step": 32141 }, { "epoch": 1.07, "grad_norm": 0.4720473885536194, "learning_rate": 0.00043074800161547826, "loss": 1.7892, "step": 32142 }, { "epoch": 1.07, "grad_norm": 0.45860666036605835, "learning_rate": 0.0004307385943112695, "loss": 1.8783, "step": 32143 }, { "epoch": 1.07, "grad_norm": 0.45290908217430115, "learning_rate": 0.00043072918684836325, "loss": 1.8394, "step": 32144 }, { "epoch": 1.07, "grad_norm": 0.45210984349250793, "learning_rate": 0.0004307197792267712, "loss": 1.8097, "step": 32145 }, { "epoch": 1.07, "grad_norm": 0.4757454991340637, "learning_rate": 0.00043071037144650447, "loss": 1.8201, "step": 32146 }, { "epoch": 1.07, "grad_norm": 0.47559720277786255, "learning_rate": 0.0004307009635075746, "loss": 1.7658, "step": 32147 }, { "epoch": 1.07, "grad_norm": 0.44366681575775146, "learning_rate": 0.00043069155540999304, "loss": 1.7938, "step": 32148 }, { "epoch": 1.07, "grad_norm": 0.46797704696655273, "learning_rate": 0.0004306821471537712, "loss": 1.7407, "step": 32149 }, { "epoch": 1.07, "grad_norm": 0.47138816118240356, "learning_rate": 0.00043067273873892057, "loss": 1.7643, "step": 32150 }, { "epoch": 1.07, "grad_norm": 0.48192211985588074, "learning_rate": 0.00043066333016545234, "loss": 1.7864, "step": 32151 }, { "epoch": 1.07, "grad_norm": 0.44861069321632385, "learning_rate": 0.0004306539214333782, "loss": 1.7902, "step": 32152 }, { "epoch": 1.07, "grad_norm": 0.5018608570098877, "learning_rate": 0.00043064451254270933, "loss": 1.7553, "step": 32153 }, { "epoch": 1.07, "grad_norm": 0.4924265742301941, "learning_rate": 0.00043063510349345736, "loss": 1.7768, "step": 32154 }, { "epoch": 1.07, "grad_norm": 0.4551694989204407, "learning_rate": 0.00043062569428563356, "loss": 1.7378, "step": 32155 }, { "epoch": 1.07, "grad_norm": 0.4545413553714752, "learning_rate": 0.00043061628491924953, "loss": 1.8138, "step": 32156 }, { "epoch": 1.07, "grad_norm": 0.46674594283103943, "learning_rate": 0.0004306068753943165, "loss": 1.8212, "step": 32157 }, { "epoch": 1.07, "grad_norm": 0.5005548000335693, "learning_rate": 0.0004305974657108459, "loss": 1.8106, "step": 32158 }, { "epoch": 1.07, "grad_norm": 0.4840342402458191, "learning_rate": 0.0004305880558688493, "loss": 1.8085, "step": 32159 }, { "epoch": 1.07, "grad_norm": 0.4555302560329437, "learning_rate": 0.000430578645868338, "loss": 1.7701, "step": 32160 }, { "epoch": 1.07, "grad_norm": 0.4678473472595215, "learning_rate": 0.00043056923570932354, "loss": 1.7841, "step": 32161 }, { "epoch": 1.07, "grad_norm": 0.4776250422000885, "learning_rate": 0.0004305598253918172, "loss": 1.7911, "step": 32162 }, { "epoch": 1.07, "grad_norm": 0.46900251507759094, "learning_rate": 0.0004305504149158305, "loss": 1.7447, "step": 32163 }, { "epoch": 1.07, "grad_norm": 0.47014009952545166, "learning_rate": 0.00043054100428137484, "loss": 1.7815, "step": 32164 }, { "epoch": 1.07, "grad_norm": 0.4597737789154053, "learning_rate": 0.0004305315934884616, "loss": 1.7878, "step": 32165 }, { "epoch": 1.07, "grad_norm": 0.48931223154067993, "learning_rate": 0.00043052218253710226, "loss": 1.8176, "step": 32166 }, { "epoch": 1.07, "grad_norm": 0.48256126046180725, "learning_rate": 0.0004305127714273082, "loss": 1.8016, "step": 32167 }, { "epoch": 1.07, "grad_norm": 0.45424139499664307, "learning_rate": 0.00043050336015909086, "loss": 1.7754, "step": 32168 }, { "epoch": 1.07, "grad_norm": 0.4554142653942108, "learning_rate": 0.0004304939487324617, "loss": 1.7981, "step": 32169 }, { "epoch": 1.07, "grad_norm": 0.5180466175079346, "learning_rate": 0.00043048453714743213, "loss": 1.8737, "step": 32170 }, { "epoch": 1.07, "grad_norm": 0.46417075395584106, "learning_rate": 0.0004304751254040135, "loss": 1.7611, "step": 32171 }, { "epoch": 1.07, "grad_norm": 0.4647887647151947, "learning_rate": 0.0004304657135022173, "loss": 1.7527, "step": 32172 }, { "epoch": 1.07, "grad_norm": 0.45154815912246704, "learning_rate": 0.00043045630144205503, "loss": 1.7751, "step": 32173 }, { "epoch": 1.07, "grad_norm": 0.461777001619339, "learning_rate": 0.0004304468892235379, "loss": 1.8003, "step": 32174 }, { "epoch": 1.07, "grad_norm": 0.4582632780075073, "learning_rate": 0.0004304374768466776, "loss": 1.8074, "step": 32175 }, { "epoch": 1.07, "grad_norm": 0.45449885725975037, "learning_rate": 0.0004304280643114854, "loss": 1.7794, "step": 32176 }, { "epoch": 1.07, "grad_norm": 0.4776551127433777, "learning_rate": 0.00043041865161797265, "loss": 1.7497, "step": 32177 }, { "epoch": 1.07, "grad_norm": 0.4674338400363922, "learning_rate": 0.00043040923876615087, "loss": 1.7851, "step": 32178 }, { "epoch": 1.07, "grad_norm": 0.4411582946777344, "learning_rate": 0.0004303998257560316, "loss": 1.7967, "step": 32179 }, { "epoch": 1.07, "grad_norm": 0.4593699276447296, "learning_rate": 0.0004303904125876261, "loss": 1.8486, "step": 32180 }, { "epoch": 1.07, "grad_norm": 0.46962064504623413, "learning_rate": 0.0004303809992609459, "loss": 1.7872, "step": 32181 }, { "epoch": 1.07, "grad_norm": 0.47987550497055054, "learning_rate": 0.0004303715857760023, "loss": 1.8987, "step": 32182 }, { "epoch": 1.07, "grad_norm": 0.46195894479751587, "learning_rate": 0.00043036217213280687, "loss": 1.8053, "step": 32183 }, { "epoch": 1.07, "grad_norm": 0.4569208025932312, "learning_rate": 0.0004303527583313709, "loss": 1.7983, "step": 32184 }, { "epoch": 1.07, "grad_norm": 0.45256680250167847, "learning_rate": 0.000430343344371706, "loss": 1.7467, "step": 32185 }, { "epoch": 1.07, "grad_norm": 0.45354756712913513, "learning_rate": 0.0004303339302538234, "loss": 1.7427, "step": 32186 }, { "epoch": 1.07, "grad_norm": 0.4610805809497833, "learning_rate": 0.00043032451597773455, "loss": 1.7536, "step": 32187 }, { "epoch": 1.07, "grad_norm": 0.4606297016143799, "learning_rate": 0.00043031510154345107, "loss": 1.6914, "step": 32188 }, { "epoch": 1.07, "grad_norm": 0.4583670198917389, "learning_rate": 0.0004303056869509842, "loss": 1.7783, "step": 32189 }, { "epoch": 1.07, "grad_norm": 0.4530831277370453, "learning_rate": 0.0004302962722003454, "loss": 1.7677, "step": 32190 }, { "epoch": 1.07, "grad_norm": 0.4554344117641449, "learning_rate": 0.0004302868572915462, "loss": 1.7347, "step": 32191 }, { "epoch": 1.07, "grad_norm": 0.45540550351142883, "learning_rate": 0.0004302774422245979, "loss": 1.7532, "step": 32192 }, { "epoch": 1.07, "grad_norm": 0.4540160298347473, "learning_rate": 0.00043026802699951204, "loss": 1.804, "step": 32193 }, { "epoch": 1.07, "grad_norm": 0.4638911187648773, "learning_rate": 0.00043025861161629986, "loss": 1.7921, "step": 32194 }, { "epoch": 1.07, "grad_norm": 0.4582856297492981, "learning_rate": 0.000430249196074973, "loss": 1.7472, "step": 32195 }, { "epoch": 1.07, "grad_norm": 0.4767698049545288, "learning_rate": 0.0004302397803755428, "loss": 1.8484, "step": 32196 }, { "epoch": 1.07, "grad_norm": 0.45562344789505005, "learning_rate": 0.00043023036451802073, "loss": 1.7376, "step": 32197 }, { "epoch": 1.07, "grad_norm": 0.4447232484817505, "learning_rate": 0.00043022094850241814, "loss": 1.6972, "step": 32198 }, { "epoch": 1.07, "grad_norm": 0.455548495054245, "learning_rate": 0.0004302115323287465, "loss": 1.8186, "step": 32199 }, { "epoch": 1.07, "grad_norm": 0.45974698662757874, "learning_rate": 0.0004302021159970173, "loss": 1.732, "step": 32200 }, { "epoch": 1.07, "grad_norm": 0.45008379220962524, "learning_rate": 0.0004301926995072418, "loss": 1.7957, "step": 32201 }, { "epoch": 1.07, "grad_norm": 0.4537201225757599, "learning_rate": 0.0004301832828594316, "loss": 1.7757, "step": 32202 }, { "epoch": 1.07, "grad_norm": 0.46070384979248047, "learning_rate": 0.00043017386605359814, "loss": 1.772, "step": 32203 }, { "epoch": 1.07, "grad_norm": 0.4693703055381775, "learning_rate": 0.00043016444908975277, "loss": 1.8363, "step": 32204 }, { "epoch": 1.07, "grad_norm": 0.4585985243320465, "learning_rate": 0.000430155031967907, "loss": 1.8257, "step": 32205 }, { "epoch": 1.07, "grad_norm": 0.46507909893989563, "learning_rate": 0.0004301456146880721, "loss": 1.8193, "step": 32206 }, { "epoch": 1.07, "grad_norm": 0.46565863490104675, "learning_rate": 0.0004301361972502596, "loss": 1.8279, "step": 32207 }, { "epoch": 1.07, "grad_norm": 0.48022401332855225, "learning_rate": 0.0004301267796544809, "loss": 1.7522, "step": 32208 }, { "epoch": 1.07, "grad_norm": 0.4685244560241699, "learning_rate": 0.0004301173619007475, "loss": 1.8301, "step": 32209 }, { "epoch": 1.07, "grad_norm": 0.46568843722343445, "learning_rate": 0.0004301079439890708, "loss": 1.7221, "step": 32210 }, { "epoch": 1.07, "grad_norm": 0.46275264024734497, "learning_rate": 0.00043009852591946224, "loss": 1.7268, "step": 32211 }, { "epoch": 1.07, "grad_norm": 0.47088736295700073, "learning_rate": 0.00043008910769193326, "loss": 1.8201, "step": 32212 }, { "epoch": 1.07, "grad_norm": 0.44360780715942383, "learning_rate": 0.0004300796893064952, "loss": 1.878, "step": 32213 }, { "epoch": 1.07, "grad_norm": 0.473558247089386, "learning_rate": 0.0004300702707631597, "loss": 1.8483, "step": 32214 }, { "epoch": 1.07, "grad_norm": 0.4510050415992737, "learning_rate": 0.0004300608520619379, "loss": 1.8022, "step": 32215 }, { "epoch": 1.07, "grad_norm": 0.46406468749046326, "learning_rate": 0.0004300514332028415, "loss": 1.7109, "step": 32216 }, { "epoch": 1.07, "grad_norm": 0.46762946248054504, "learning_rate": 0.0004300420141858817, "loss": 1.7362, "step": 32217 }, { "epoch": 1.07, "grad_norm": 0.47447463870048523, "learning_rate": 0.00043003259501107015, "loss": 1.7819, "step": 32218 }, { "epoch": 1.07, "grad_norm": 0.464447945356369, "learning_rate": 0.00043002317567841813, "loss": 1.7608, "step": 32219 }, { "epoch": 1.07, "grad_norm": 0.49896106123924255, "learning_rate": 0.00043001375618793717, "loss": 1.8206, "step": 32220 }, { "epoch": 1.07, "grad_norm": 0.45065969228744507, "learning_rate": 0.00043000433653963877, "loss": 1.806, "step": 32221 }, { "epoch": 1.07, "grad_norm": 0.46366554498672485, "learning_rate": 0.0004299949167335341, "loss": 1.8165, "step": 32222 }, { "epoch": 1.07, "grad_norm": 0.46478205919265747, "learning_rate": 0.00042998549676963484, "loss": 1.7942, "step": 32223 }, { "epoch": 1.07, "grad_norm": 0.47873303294181824, "learning_rate": 0.00042997607664795224, "loss": 1.8316, "step": 32224 }, { "epoch": 1.07, "grad_norm": 0.46826648712158203, "learning_rate": 0.0004299666563684979, "loss": 1.8306, "step": 32225 }, { "epoch": 1.07, "grad_norm": 0.4735211133956909, "learning_rate": 0.0004299572359312832, "loss": 1.8304, "step": 32226 }, { "epoch": 1.07, "grad_norm": 0.46020087599754333, "learning_rate": 0.00042994781533631953, "loss": 1.8392, "step": 32227 }, { "epoch": 1.07, "grad_norm": 0.4619585871696472, "learning_rate": 0.0004299383945836185, "loss": 1.8027, "step": 32228 }, { "epoch": 1.07, "grad_norm": 0.441249817609787, "learning_rate": 0.0004299289736731912, "loss": 1.7946, "step": 32229 }, { "epoch": 1.07, "grad_norm": 0.4568525552749634, "learning_rate": 0.0004299195526050494, "loss": 1.7725, "step": 32230 }, { "epoch": 1.07, "grad_norm": 0.4417746067047119, "learning_rate": 0.0004299101313792043, "loss": 1.8308, "step": 32231 }, { "epoch": 1.07, "grad_norm": 0.4595825672149658, "learning_rate": 0.00042990070999566755, "loss": 1.7019, "step": 32232 }, { "epoch": 1.07, "grad_norm": 0.46308571100234985, "learning_rate": 0.00042989128845445044, "loss": 1.7327, "step": 32233 }, { "epoch": 1.07, "grad_norm": 0.45040416717529297, "learning_rate": 0.0004298818667555644, "loss": 1.7375, "step": 32234 }, { "epoch": 1.07, "grad_norm": 0.450933575630188, "learning_rate": 0.000429872444899021, "loss": 1.7291, "step": 32235 }, { "epoch": 1.07, "grad_norm": 0.49730050563812256, "learning_rate": 0.0004298630228848315, "loss": 1.8005, "step": 32236 }, { "epoch": 1.07, "grad_norm": 0.46386733651161194, "learning_rate": 0.00042985360071300755, "loss": 1.8394, "step": 32237 }, { "epoch": 1.07, "grad_norm": 0.4947376549243927, "learning_rate": 0.00042984417838356034, "loss": 1.8375, "step": 32238 }, { "epoch": 1.07, "grad_norm": 0.46704742312431335, "learning_rate": 0.00042983475589650146, "loss": 1.8493, "step": 32239 }, { "epoch": 1.07, "grad_norm": 0.4961854815483093, "learning_rate": 0.0004298253332518423, "loss": 1.7203, "step": 32240 }, { "epoch": 1.07, "grad_norm": 0.4616990387439728, "learning_rate": 0.00042981591044959435, "loss": 1.7102, "step": 32241 }, { "epoch": 1.07, "grad_norm": 0.4614095091819763, "learning_rate": 0.00042980648748976895, "loss": 1.8541, "step": 32242 }, { "epoch": 1.07, "grad_norm": 0.482027530670166, "learning_rate": 0.00042979706437237775, "loss": 1.831, "step": 32243 }, { "epoch": 1.07, "grad_norm": 0.5221986174583435, "learning_rate": 0.0004297876410974319, "loss": 1.9207, "step": 32244 }, { "epoch": 1.07, "grad_norm": 0.453563392162323, "learning_rate": 0.000429778217664943, "loss": 1.7804, "step": 32245 }, { "epoch": 1.07, "grad_norm": 0.44955959916114807, "learning_rate": 0.0004297687940749225, "loss": 1.7992, "step": 32246 }, { "epoch": 1.07, "grad_norm": 0.47019943594932556, "learning_rate": 0.00042975937032738176, "loss": 1.8181, "step": 32247 }, { "epoch": 1.07, "grad_norm": 0.4746010899543762, "learning_rate": 0.0004297499464223323, "loss": 1.8064, "step": 32248 }, { "epoch": 1.07, "grad_norm": 0.46011751890182495, "learning_rate": 0.0004297405223597855, "loss": 1.777, "step": 32249 }, { "epoch": 1.07, "grad_norm": 0.43752023577690125, "learning_rate": 0.00042973109813975287, "loss": 1.7937, "step": 32250 }, { "epoch": 1.07, "grad_norm": 0.4903797507286072, "learning_rate": 0.00042972167376224574, "loss": 1.8267, "step": 32251 }, { "epoch": 1.07, "grad_norm": 0.4861023426055908, "learning_rate": 0.00042971224922727565, "loss": 1.7921, "step": 32252 }, { "epoch": 1.07, "grad_norm": 0.44079291820526123, "learning_rate": 0.000429702824534854, "loss": 1.7611, "step": 32253 }, { "epoch": 1.07, "grad_norm": 0.4521699547767639, "learning_rate": 0.00042969339968499217, "loss": 1.816, "step": 32254 }, { "epoch": 1.07, "grad_norm": 0.44638901948928833, "learning_rate": 0.0004296839746777017, "loss": 1.7898, "step": 32255 }, { "epoch": 1.07, "grad_norm": 0.4687623977661133, "learning_rate": 0.0004296745495129939, "loss": 1.7682, "step": 32256 }, { "epoch": 1.07, "grad_norm": 0.4752797782421112, "learning_rate": 0.00042966512419088053, "loss": 1.7428, "step": 32257 }, { "epoch": 1.07, "grad_norm": 0.4715673327445984, "learning_rate": 0.00042965569871137265, "loss": 1.8285, "step": 32258 }, { "epoch": 1.07, "grad_norm": 0.4796478748321533, "learning_rate": 0.0004296462730744819, "loss": 1.7724, "step": 32259 }, { "epoch": 1.07, "grad_norm": 0.45455801486968994, "learning_rate": 0.0004296368472802197, "loss": 1.7766, "step": 32260 }, { "epoch": 1.07, "grad_norm": 0.4805539548397064, "learning_rate": 0.0004296274213285974, "loss": 1.7988, "step": 32261 }, { "epoch": 1.07, "grad_norm": 0.45844918489456177, "learning_rate": 0.0004296179952196266, "loss": 1.7392, "step": 32262 }, { "epoch": 1.07, "grad_norm": 0.44413700699806213, "learning_rate": 0.0004296085689533186, "loss": 1.7948, "step": 32263 }, { "epoch": 1.07, "grad_norm": 0.44850003719329834, "learning_rate": 0.00042959914252968485, "loss": 1.7824, "step": 32264 }, { "epoch": 1.07, "grad_norm": 0.45030590891838074, "learning_rate": 0.00042958971594873684, "loss": 1.7857, "step": 32265 }, { "epoch": 1.07, "grad_norm": 0.46264269948005676, "learning_rate": 0.00042958028921048606, "loss": 1.7929, "step": 32266 }, { "epoch": 1.07, "grad_norm": 0.4601871967315674, "learning_rate": 0.00042957086231494395, "loss": 1.8299, "step": 32267 }, { "epoch": 1.07, "grad_norm": 0.460591197013855, "learning_rate": 0.0004295614352621218, "loss": 1.7427, "step": 32268 }, { "epoch": 1.07, "grad_norm": 0.47700828313827515, "learning_rate": 0.0004295520080520312, "loss": 1.9015, "step": 32269 }, { "epoch": 1.07, "grad_norm": 0.7455675601959229, "learning_rate": 0.0004295425806846836, "loss": 1.8532, "step": 32270 }, { "epoch": 1.07, "grad_norm": 0.4568527936935425, "learning_rate": 0.00042953315316009025, "loss": 1.8047, "step": 32271 }, { "epoch": 1.07, "grad_norm": 0.46723487973213196, "learning_rate": 0.0004295237254782628, "loss": 1.7921, "step": 32272 }, { "epoch": 1.07, "grad_norm": 0.4479295611381531, "learning_rate": 0.0004295142976392128, "loss": 1.7032, "step": 32273 }, { "epoch": 1.07, "grad_norm": 0.4528786242008209, "learning_rate": 0.0004295048696429514, "loss": 1.7562, "step": 32274 }, { "epoch": 1.07, "grad_norm": 0.47458416223526, "learning_rate": 0.0004294954414894901, "loss": 1.7524, "step": 32275 }, { "epoch": 1.07, "grad_norm": 0.45037102699279785, "learning_rate": 0.0004294860131788405, "loss": 1.7475, "step": 32276 }, { "epoch": 1.07, "grad_norm": 0.46049824357032776, "learning_rate": 0.00042947658471101385, "loss": 1.719, "step": 32277 }, { "epoch": 1.07, "grad_norm": 0.4624294638633728, "learning_rate": 0.0004294671560860218, "loss": 1.755, "step": 32278 }, { "epoch": 1.07, "grad_norm": 0.4670928418636322, "learning_rate": 0.0004294577273038757, "loss": 1.7668, "step": 32279 }, { "epoch": 1.07, "grad_norm": 0.46052414178848267, "learning_rate": 0.0004294482983645871, "loss": 1.8427, "step": 32280 }, { "epoch": 1.07, "grad_norm": 0.46506810188293457, "learning_rate": 0.00042943886926816714, "loss": 1.8299, "step": 32281 }, { "epoch": 1.07, "grad_norm": 0.4491320252418518, "learning_rate": 0.00042942944001462754, "loss": 1.7774, "step": 32282 }, { "epoch": 1.07, "grad_norm": 0.47300398349761963, "learning_rate": 0.00042942001060397974, "loss": 1.84, "step": 32283 }, { "epoch": 1.07, "grad_norm": 0.4554556608200073, "learning_rate": 0.00042941058103623494, "loss": 1.8504, "step": 32284 }, { "epoch": 1.07, "grad_norm": 0.44301092624664307, "learning_rate": 0.0004294011513114049, "loss": 1.8158, "step": 32285 }, { "epoch": 1.07, "grad_norm": 0.4646648168563843, "learning_rate": 0.0004293917214295008, "loss": 1.7936, "step": 32286 }, { "epoch": 1.07, "grad_norm": 0.48735782504081726, "learning_rate": 0.00042938229139053444, "loss": 1.7442, "step": 32287 }, { "epoch": 1.07, "grad_norm": 0.4480827748775482, "learning_rate": 0.00042937286119451687, "loss": 1.7935, "step": 32288 }, { "epoch": 1.07, "grad_norm": 0.45568493008613586, "learning_rate": 0.0004293634308414598, "loss": 1.8251, "step": 32289 }, { "epoch": 1.07, "grad_norm": 0.46878156065940857, "learning_rate": 0.0004293540003313746, "loss": 1.7484, "step": 32290 }, { "epoch": 1.07, "grad_norm": 0.45256564021110535, "learning_rate": 0.00042934456966427253, "loss": 1.69, "step": 32291 }, { "epoch": 1.07, "grad_norm": 0.4591594934463501, "learning_rate": 0.00042933513884016537, "loss": 1.779, "step": 32292 }, { "epoch": 1.07, "grad_norm": 0.4511534869670868, "learning_rate": 0.0004293257078590644, "loss": 1.731, "step": 32293 }, { "epoch": 1.07, "grad_norm": 0.4528753459453583, "learning_rate": 0.000429316276720981, "loss": 1.7969, "step": 32294 }, { "epoch": 1.07, "grad_norm": 0.45961740612983704, "learning_rate": 0.00042930684542592676, "loss": 1.7551, "step": 32295 }, { "epoch": 1.07, "grad_norm": 0.456306129693985, "learning_rate": 0.00042929741397391296, "loss": 1.724, "step": 32296 }, { "epoch": 1.07, "grad_norm": 0.4663378596305847, "learning_rate": 0.00042928798236495125, "loss": 1.8266, "step": 32297 }, { "epoch": 1.07, "grad_norm": 0.47795602679252625, "learning_rate": 0.000429278550599053, "loss": 1.7509, "step": 32298 }, { "epoch": 1.07, "grad_norm": 0.46763527393341064, "learning_rate": 0.00042926911867622957, "loss": 1.8128, "step": 32299 }, { "epoch": 1.07, "grad_norm": 0.46797052025794983, "learning_rate": 0.00042925968659649253, "loss": 1.8273, "step": 32300 }, { "epoch": 1.07, "grad_norm": 0.45104074478149414, "learning_rate": 0.0004292502543598532, "loss": 1.7222, "step": 32301 }, { "epoch": 1.07, "grad_norm": 0.47162818908691406, "learning_rate": 0.00042924082196632313, "loss": 1.8518, "step": 32302 }, { "epoch": 1.07, "grad_norm": 0.45544812083244324, "learning_rate": 0.0004292313894159138, "loss": 1.7835, "step": 32303 }, { "epoch": 1.07, "grad_norm": 0.46703630685806274, "learning_rate": 0.00042922195670863655, "loss": 1.805, "step": 32304 }, { "epoch": 1.07, "grad_norm": 0.4509507715702057, "learning_rate": 0.00042921252384450293, "loss": 1.8671, "step": 32305 }, { "epoch": 1.07, "grad_norm": 0.4476865231990814, "learning_rate": 0.0004292030908235243, "loss": 1.8086, "step": 32306 }, { "epoch": 1.07, "grad_norm": 0.4634469747543335, "learning_rate": 0.00042919365764571216, "loss": 1.6974, "step": 32307 }, { "epoch": 1.07, "grad_norm": 0.455807626247406, "learning_rate": 0.00042918422431107806, "loss": 1.7657, "step": 32308 }, { "epoch": 1.07, "grad_norm": 0.46910953521728516, "learning_rate": 0.0004291747908196332, "loss": 1.8281, "step": 32309 }, { "epoch": 1.07, "grad_norm": 0.45027023553848267, "learning_rate": 0.00042916535717138927, "loss": 1.7137, "step": 32310 }, { "epoch": 1.07, "grad_norm": 0.46125754714012146, "learning_rate": 0.00042915592336635746, "loss": 1.8495, "step": 32311 }, { "epoch": 1.08, "grad_norm": 0.4502938687801361, "learning_rate": 0.0004291464894045496, "loss": 1.8839, "step": 32312 }, { "epoch": 1.08, "grad_norm": 0.46414700150489807, "learning_rate": 0.0004291370552859768, "loss": 1.8165, "step": 32313 }, { "epoch": 1.08, "grad_norm": 0.47097891569137573, "learning_rate": 0.00042912762101065074, "loss": 1.7273, "step": 32314 }, { "epoch": 1.08, "grad_norm": 0.4567108154296875, "learning_rate": 0.00042911818657858275, "loss": 1.8271, "step": 32315 }, { "epoch": 1.08, "grad_norm": 0.449531614780426, "learning_rate": 0.00042910875198978423, "loss": 1.8343, "step": 32316 }, { "epoch": 1.08, "grad_norm": 0.4745129644870758, "learning_rate": 0.00042909931724426683, "loss": 1.8699, "step": 32317 }, { "epoch": 1.08, "grad_norm": 0.4886752665042877, "learning_rate": 0.00042908988234204184, "loss": 1.7138, "step": 32318 }, { "epoch": 1.08, "grad_norm": 0.4555809795856476, "learning_rate": 0.0004290804472831207, "loss": 1.8027, "step": 32319 }, { "epoch": 1.08, "grad_norm": 0.44766339659690857, "learning_rate": 0.00042907101206751495, "loss": 1.772, "step": 32320 }, { "epoch": 1.08, "grad_norm": 0.4669841229915619, "learning_rate": 0.00042906157669523595, "loss": 1.8113, "step": 32321 }, { "epoch": 1.08, "grad_norm": 0.4693256914615631, "learning_rate": 0.0004290521411662953, "loss": 1.7509, "step": 32322 }, { "epoch": 1.08, "grad_norm": 0.4582883417606354, "learning_rate": 0.0004290427054807043, "loss": 1.7549, "step": 32323 }, { "epoch": 1.08, "grad_norm": 0.4427240788936615, "learning_rate": 0.0004290332696384745, "loss": 1.8064, "step": 32324 }, { "epoch": 1.08, "grad_norm": 0.45642322301864624, "learning_rate": 0.00042902383363961735, "loss": 1.8804, "step": 32325 }, { "epoch": 1.08, "grad_norm": 0.4529240131378174, "learning_rate": 0.00042901439748414425, "loss": 1.914, "step": 32326 }, { "epoch": 1.08, "grad_norm": 0.47007548809051514, "learning_rate": 0.00042900496117206666, "loss": 1.8525, "step": 32327 }, { "epoch": 1.08, "grad_norm": 0.47702494263648987, "learning_rate": 0.0004289955247033961, "loss": 1.7901, "step": 32328 }, { "epoch": 1.08, "grad_norm": 0.4483732283115387, "learning_rate": 0.00042898608807814397, "loss": 1.8128, "step": 32329 }, { "epoch": 1.08, "grad_norm": 0.4500761926174164, "learning_rate": 0.00042897665129632164, "loss": 1.8183, "step": 32330 }, { "epoch": 1.08, "grad_norm": 0.4504617750644684, "learning_rate": 0.0004289672143579408, "loss": 1.7544, "step": 32331 }, { "epoch": 1.08, "grad_norm": 0.4435124099254608, "learning_rate": 0.0004289577772630127, "loss": 1.7336, "step": 32332 }, { "epoch": 1.08, "grad_norm": 0.47688406705856323, "learning_rate": 0.0004289483400115489, "loss": 1.8034, "step": 32333 }, { "epoch": 1.08, "grad_norm": 0.44647085666656494, "learning_rate": 0.0004289389026035607, "loss": 1.7516, "step": 32334 }, { "epoch": 1.08, "grad_norm": 0.46995919942855835, "learning_rate": 0.00042892946503905976, "loss": 1.811, "step": 32335 }, { "epoch": 1.08, "grad_norm": 0.44846975803375244, "learning_rate": 0.00042892002731805744, "loss": 1.7698, "step": 32336 }, { "epoch": 1.08, "grad_norm": 0.4679088294506073, "learning_rate": 0.00042891058944056516, "loss": 1.8558, "step": 32337 }, { "epoch": 1.08, "grad_norm": 0.4603908956050873, "learning_rate": 0.0004289011514065945, "loss": 1.8261, "step": 32338 }, { "epoch": 1.08, "grad_norm": 0.4786783754825592, "learning_rate": 0.00042889171321615674, "loss": 1.8042, "step": 32339 }, { "epoch": 1.08, "grad_norm": 0.4594773054122925, "learning_rate": 0.00042888227486926353, "loss": 1.806, "step": 32340 }, { "epoch": 1.08, "grad_norm": 0.45868125557899475, "learning_rate": 0.00042887283636592607, "loss": 1.8153, "step": 32341 }, { "epoch": 1.08, "grad_norm": 0.4701918959617615, "learning_rate": 0.0004288633977061562, "loss": 1.7777, "step": 32342 }, { "epoch": 1.08, "grad_norm": 0.4487469792366028, "learning_rate": 0.000428853958889965, "loss": 1.7485, "step": 32343 }, { "epoch": 1.08, "grad_norm": 0.46580594778060913, "learning_rate": 0.0004288445199173641, "loss": 1.7655, "step": 32344 }, { "epoch": 1.08, "grad_norm": 0.46776461601257324, "learning_rate": 0.000428835080788365, "loss": 1.8408, "step": 32345 }, { "epoch": 1.08, "grad_norm": 0.4677901864051819, "learning_rate": 0.000428825641502979, "loss": 1.8027, "step": 32346 }, { "epoch": 1.08, "grad_norm": 0.452548623085022, "learning_rate": 0.00042881620206121774, "loss": 1.7632, "step": 32347 }, { "epoch": 1.08, "grad_norm": 0.4577105641365051, "learning_rate": 0.00042880676246309245, "loss": 1.7472, "step": 32348 }, { "epoch": 1.08, "grad_norm": 0.46010997891426086, "learning_rate": 0.0004287973227086149, "loss": 1.7579, "step": 32349 }, { "epoch": 1.08, "grad_norm": 1.9124211072921753, "learning_rate": 0.00042878788279779626, "loss": 1.8643, "step": 32350 }, { "epoch": 1.08, "grad_norm": 0.46842819452285767, "learning_rate": 0.00042877844273064814, "loss": 1.8462, "step": 32351 }, { "epoch": 1.08, "grad_norm": 0.4487285614013672, "learning_rate": 0.00042876900250718203, "loss": 1.7674, "step": 32352 }, { "epoch": 1.08, "grad_norm": 0.45380979776382446, "learning_rate": 0.00042875956212740926, "loss": 1.7813, "step": 32353 }, { "epoch": 1.08, "grad_norm": 0.44030311703681946, "learning_rate": 0.00042875012159134137, "loss": 1.7962, "step": 32354 }, { "epoch": 1.08, "grad_norm": 0.4596829414367676, "learning_rate": 0.00042874068089898985, "loss": 1.8312, "step": 32355 }, { "epoch": 1.08, "grad_norm": 0.4578537344932556, "learning_rate": 0.00042873124005036604, "loss": 1.7779, "step": 32356 }, { "epoch": 1.08, "grad_norm": 0.9960015416145325, "learning_rate": 0.00042872179904548144, "loss": 1.9076, "step": 32357 }, { "epoch": 1.08, "grad_norm": 0.4744111895561218, "learning_rate": 0.00042871235788434763, "loss": 1.7719, "step": 32358 }, { "epoch": 1.08, "grad_norm": 0.4518192708492279, "learning_rate": 0.0004287029165669759, "loss": 1.7934, "step": 32359 }, { "epoch": 1.08, "grad_norm": 0.43843790888786316, "learning_rate": 0.0004286934750933779, "loss": 1.7691, "step": 32360 }, { "epoch": 1.08, "grad_norm": 0.4399154484272003, "learning_rate": 0.0004286840334635649, "loss": 1.6814, "step": 32361 }, { "epoch": 1.08, "grad_norm": 0.47703787684440613, "learning_rate": 0.0004286745916775485, "loss": 1.8285, "step": 32362 }, { "epoch": 1.08, "grad_norm": 0.45001181960105896, "learning_rate": 0.00042866514973534007, "loss": 1.8227, "step": 32363 }, { "epoch": 1.08, "grad_norm": 0.43693143129348755, "learning_rate": 0.00042865570763695105, "loss": 1.7088, "step": 32364 }, { "epoch": 1.08, "grad_norm": 0.47911855578422546, "learning_rate": 0.00042864626538239306, "loss": 1.8459, "step": 32365 }, { "epoch": 1.08, "grad_norm": 0.4649890065193176, "learning_rate": 0.0004286368229716774, "loss": 1.7561, "step": 32366 }, { "epoch": 1.08, "grad_norm": 0.4584854245185852, "learning_rate": 0.0004286273804048156, "loss": 1.8302, "step": 32367 }, { "epoch": 1.08, "grad_norm": 0.4607827365398407, "learning_rate": 0.00042861793768181913, "loss": 1.7893, "step": 32368 }, { "epoch": 1.08, "grad_norm": 0.44851866364479065, "learning_rate": 0.00042860849480269944, "loss": 1.776, "step": 32369 }, { "epoch": 1.08, "grad_norm": 0.4629487097263336, "learning_rate": 0.000428599051767468, "loss": 1.8027, "step": 32370 }, { "epoch": 1.08, "grad_norm": 0.4706224501132965, "learning_rate": 0.00042858960857613615, "loss": 1.7569, "step": 32371 }, { "epoch": 1.08, "grad_norm": 0.46198800206184387, "learning_rate": 0.00042858016522871566, "loss": 1.8172, "step": 32372 }, { "epoch": 1.08, "grad_norm": 0.45740851759910583, "learning_rate": 0.0004285707217252176, "loss": 1.7623, "step": 32373 }, { "epoch": 1.08, "grad_norm": 0.4489724040031433, "learning_rate": 0.0004285612780656537, "loss": 1.7829, "step": 32374 }, { "epoch": 1.08, "grad_norm": 0.4536187946796417, "learning_rate": 0.00042855183425003537, "loss": 1.7505, "step": 32375 }, { "epoch": 1.08, "grad_norm": 0.46025264263153076, "learning_rate": 0.00042854239027837404, "loss": 1.7932, "step": 32376 }, { "epoch": 1.08, "grad_norm": 0.46834149956703186, "learning_rate": 0.0004285329461506812, "loss": 1.7333, "step": 32377 }, { "epoch": 1.08, "grad_norm": 0.4481503665447235, "learning_rate": 0.0004285235018669683, "loss": 1.785, "step": 32378 }, { "epoch": 1.08, "grad_norm": 0.4505477547645569, "learning_rate": 0.0004285140574272468, "loss": 1.7712, "step": 32379 }, { "epoch": 1.08, "grad_norm": 0.4506506025791168, "learning_rate": 0.00042850461283152816, "loss": 1.7542, "step": 32380 }, { "epoch": 1.08, "grad_norm": 0.45076215267181396, "learning_rate": 0.0004284951680798238, "loss": 1.7962, "step": 32381 }, { "epoch": 1.08, "grad_norm": 0.4529419541358948, "learning_rate": 0.0004284857231721453, "loss": 1.8003, "step": 32382 }, { "epoch": 1.08, "grad_norm": 0.44292739033699036, "learning_rate": 0.00042847627810850406, "loss": 1.7526, "step": 32383 }, { "epoch": 1.08, "grad_norm": 0.44343286752700806, "learning_rate": 0.0004284668328889116, "loss": 1.7678, "step": 32384 }, { "epoch": 1.08, "grad_norm": 0.4754139184951782, "learning_rate": 0.0004284573875133793, "loss": 1.7648, "step": 32385 }, { "epoch": 1.08, "grad_norm": 0.4576382339000702, "learning_rate": 0.00042844794198191856, "loss": 1.7966, "step": 32386 }, { "epoch": 1.08, "grad_norm": 0.459696501493454, "learning_rate": 0.0004284384962945411, "loss": 1.8421, "step": 32387 }, { "epoch": 1.08, "grad_norm": 0.45720797777175903, "learning_rate": 0.0004284290504512581, "loss": 1.7087, "step": 32388 }, { "epoch": 1.08, "grad_norm": 0.44954007863998413, "learning_rate": 0.00042841960445208125, "loss": 1.7695, "step": 32389 }, { "epoch": 1.08, "grad_norm": 0.4646568298339844, "learning_rate": 0.0004284101582970219, "loss": 1.8191, "step": 32390 }, { "epoch": 1.08, "grad_norm": 0.4514828324317932, "learning_rate": 0.00042840071198609156, "loss": 1.7979, "step": 32391 }, { "epoch": 1.08, "grad_norm": 0.45723414421081543, "learning_rate": 0.0004283912655193016, "loss": 1.8478, "step": 32392 }, { "epoch": 1.08, "grad_norm": 0.45625466108322144, "learning_rate": 0.0004283818188966636, "loss": 1.7422, "step": 32393 }, { "epoch": 1.08, "grad_norm": 0.44725722074508667, "learning_rate": 0.00042837237211818896, "loss": 1.8338, "step": 32394 }, { "epoch": 1.08, "grad_norm": 0.4834480881690979, "learning_rate": 0.00042836292518388925, "loss": 1.8789, "step": 32395 }, { "epoch": 1.08, "grad_norm": 0.4476502239704132, "learning_rate": 0.00042835347809377575, "loss": 1.7601, "step": 32396 }, { "epoch": 1.08, "grad_norm": 0.4548591375350952, "learning_rate": 0.00042834403084786017, "loss": 1.8122, "step": 32397 }, { "epoch": 1.08, "grad_norm": 0.46796518564224243, "learning_rate": 0.00042833458344615374, "loss": 1.8016, "step": 32398 }, { "epoch": 1.08, "grad_norm": 0.4669601321220398, "learning_rate": 0.0004283251358886681, "loss": 1.7026, "step": 32399 }, { "epoch": 1.08, "grad_norm": 0.4624003767967224, "learning_rate": 0.00042831568817541473, "loss": 1.814, "step": 32400 }, { "epoch": 1.08, "grad_norm": 0.4629082679748535, "learning_rate": 0.00042830624030640475, "loss": 1.8433, "step": 32401 }, { "epoch": 1.08, "grad_norm": 0.4808764159679413, "learning_rate": 0.00042829679228165016, "loss": 1.8445, "step": 32402 }, { "epoch": 1.08, "grad_norm": 0.46216675639152527, "learning_rate": 0.000428287344101162, "loss": 1.7504, "step": 32403 }, { "epoch": 1.08, "grad_norm": 0.4629528820514679, "learning_rate": 0.00042827789576495203, "loss": 1.7676, "step": 32404 }, { "epoch": 1.08, "grad_norm": 0.44777747988700867, "learning_rate": 0.00042826844727303153, "loss": 1.7582, "step": 32405 }, { "epoch": 1.08, "grad_norm": 0.4632824659347534, "learning_rate": 0.00042825899862541206, "loss": 1.7691, "step": 32406 }, { "epoch": 1.08, "grad_norm": 0.4642069935798645, "learning_rate": 0.0004282495498221051, "loss": 1.8139, "step": 32407 }, { "epoch": 1.08, "grad_norm": 0.47709181904792786, "learning_rate": 0.0004282401008631219, "loss": 1.8324, "step": 32408 }, { "epoch": 1.08, "grad_norm": 0.46330568194389343, "learning_rate": 0.0004282306517484744, "loss": 1.6843, "step": 32409 }, { "epoch": 1.08, "grad_norm": 0.4687960743904114, "learning_rate": 0.0004282212024781736, "loss": 1.8456, "step": 32410 }, { "epoch": 1.08, "grad_norm": 0.46552738547325134, "learning_rate": 0.0004282117530522312, "loss": 1.8117, "step": 32411 }, { "epoch": 1.08, "grad_norm": 0.4627056419849396, "learning_rate": 0.0004282023034706586, "loss": 1.6735, "step": 32412 }, { "epoch": 1.08, "grad_norm": 0.45701029896736145, "learning_rate": 0.0004281928537334673, "loss": 1.8512, "step": 32413 }, { "epoch": 1.08, "grad_norm": 0.46395498514175415, "learning_rate": 0.00042818340384066875, "loss": 1.8299, "step": 32414 }, { "epoch": 1.08, "grad_norm": 0.5004643797874451, "learning_rate": 0.0004281739537922745, "loss": 1.8548, "step": 32415 }, { "epoch": 1.08, "grad_norm": 0.4832893908023834, "learning_rate": 0.00042816450358829593, "loss": 1.8153, "step": 32416 }, { "epoch": 1.08, "grad_norm": 0.4821249842643738, "learning_rate": 0.0004281550532287445, "loss": 1.8125, "step": 32417 }, { "epoch": 1.08, "grad_norm": 0.5047159194946289, "learning_rate": 0.00042814560271363166, "loss": 1.7641, "step": 32418 }, { "epoch": 1.08, "grad_norm": 0.4868592619895935, "learning_rate": 0.000428136152042969, "loss": 1.7776, "step": 32419 }, { "epoch": 1.08, "grad_norm": 0.45687922835350037, "learning_rate": 0.00042812670121676806, "loss": 1.7794, "step": 32420 }, { "epoch": 1.08, "grad_norm": 0.4709373414516449, "learning_rate": 0.00042811725023504, "loss": 1.8427, "step": 32421 }, { "epoch": 1.08, "grad_norm": 0.46011048555374146, "learning_rate": 0.00042810779909779656, "loss": 1.7655, "step": 32422 }, { "epoch": 1.08, "grad_norm": 0.48311424255371094, "learning_rate": 0.0004280983478050491, "loss": 1.8632, "step": 32423 }, { "epoch": 1.08, "grad_norm": 0.4673886001110077, "learning_rate": 0.0004280888963568091, "loss": 1.84, "step": 32424 }, { "epoch": 1.08, "grad_norm": 0.4903969466686249, "learning_rate": 0.0004280794447530882, "loss": 1.8375, "step": 32425 }, { "epoch": 1.08, "grad_norm": 0.4633457362651825, "learning_rate": 0.00042806999299389755, "loss": 1.7228, "step": 32426 }, { "epoch": 1.08, "grad_norm": 0.4732343852519989, "learning_rate": 0.00042806054107924886, "loss": 1.7729, "step": 32427 }, { "epoch": 1.08, "grad_norm": 0.43932825326919556, "learning_rate": 0.0004280510890091535, "loss": 1.7866, "step": 32428 }, { "epoch": 1.08, "grad_norm": 0.46136221289634705, "learning_rate": 0.0004280416367836231, "loss": 1.8496, "step": 32429 }, { "epoch": 1.08, "grad_norm": 0.4758288562297821, "learning_rate": 0.0004280321844026689, "loss": 1.7341, "step": 32430 }, { "epoch": 1.08, "grad_norm": 0.45766395330429077, "learning_rate": 0.0004280227318663025, "loss": 1.7808, "step": 32431 }, { "epoch": 1.08, "grad_norm": 0.46867817640304565, "learning_rate": 0.00042801327917453547, "loss": 1.8749, "step": 32432 }, { "epoch": 1.08, "grad_norm": 0.46563997864723206, "learning_rate": 0.00042800382632737907, "loss": 1.7918, "step": 32433 }, { "epoch": 1.08, "grad_norm": 0.45086991786956787, "learning_rate": 0.000427994373324845, "loss": 1.7979, "step": 32434 }, { "epoch": 1.08, "grad_norm": 0.46338382363319397, "learning_rate": 0.0004279849201669445, "loss": 1.7874, "step": 32435 }, { "epoch": 1.08, "grad_norm": 0.4525805413722992, "learning_rate": 0.0004279754668536892, "loss": 1.92, "step": 32436 }, { "epoch": 1.08, "grad_norm": 0.46500369906425476, "learning_rate": 0.0004279660133850905, "loss": 1.7783, "step": 32437 }, { "epoch": 1.08, "grad_norm": 0.4501427114009857, "learning_rate": 0.00042795655976116007, "loss": 1.8858, "step": 32438 }, { "epoch": 1.08, "grad_norm": 0.44150251150131226, "learning_rate": 0.00042794710598190915, "loss": 1.7274, "step": 32439 }, { "epoch": 1.08, "grad_norm": 0.49489668011665344, "learning_rate": 0.0004279376520473493, "loss": 1.7855, "step": 32440 }, { "epoch": 1.08, "grad_norm": 0.5366547107696533, "learning_rate": 0.0004279281979574919, "loss": 1.7749, "step": 32441 }, { "epoch": 1.08, "grad_norm": 0.4547945559024811, "learning_rate": 0.0004279187437123486, "loss": 1.8188, "step": 32442 }, { "epoch": 1.08, "grad_norm": 0.46730077266693115, "learning_rate": 0.0004279092893119307, "loss": 1.7995, "step": 32443 }, { "epoch": 1.08, "grad_norm": 0.45807644724845886, "learning_rate": 0.00042789983475624983, "loss": 1.7811, "step": 32444 }, { "epoch": 1.08, "grad_norm": 0.46312159299850464, "learning_rate": 0.00042789038004531753, "loss": 1.7533, "step": 32445 }, { "epoch": 1.08, "grad_norm": 0.46469977498054504, "learning_rate": 0.00042788092517914505, "loss": 1.8199, "step": 32446 }, { "epoch": 1.08, "grad_norm": 0.44021958112716675, "learning_rate": 0.0004278714701577439, "loss": 1.7601, "step": 32447 }, { "epoch": 1.08, "grad_norm": 0.4616604149341583, "learning_rate": 0.0004278620149811257, "loss": 1.7939, "step": 32448 }, { "epoch": 1.08, "grad_norm": 0.47352370619773865, "learning_rate": 0.00042785255964930183, "loss": 1.8512, "step": 32449 }, { "epoch": 1.08, "grad_norm": 0.5111415386199951, "learning_rate": 0.0004278431041622839, "loss": 1.7631, "step": 32450 }, { "epoch": 1.08, "grad_norm": 0.46717584133148193, "learning_rate": 0.00042783364852008303, "loss": 1.8298, "step": 32451 }, { "epoch": 1.08, "grad_norm": 0.458010733127594, "learning_rate": 0.0004278241927227112, "loss": 1.8062, "step": 32452 }, { "epoch": 1.08, "grad_norm": 0.4753662049770355, "learning_rate": 0.0004278147367701795, "loss": 1.8157, "step": 32453 }, { "epoch": 1.08, "grad_norm": 0.47063592076301575, "learning_rate": 0.0004278052806624996, "loss": 1.9011, "step": 32454 }, { "epoch": 1.08, "grad_norm": 0.4667012393474579, "learning_rate": 0.00042779582439968287, "loss": 1.8359, "step": 32455 }, { "epoch": 1.08, "grad_norm": 0.4557574391365051, "learning_rate": 0.0004277863679817409, "loss": 1.7651, "step": 32456 }, { "epoch": 1.08, "grad_norm": 0.4554021954536438, "learning_rate": 0.0004277769114086851, "loss": 1.7675, "step": 32457 }, { "epoch": 1.08, "grad_norm": 0.47946926951408386, "learning_rate": 0.0004277674546805268, "loss": 1.8196, "step": 32458 }, { "epoch": 1.08, "grad_norm": 0.4719943702220917, "learning_rate": 0.00042775799779727787, "loss": 1.8521, "step": 32459 }, { "epoch": 1.08, "grad_norm": 0.45049533247947693, "learning_rate": 0.00042774854075894943, "loss": 1.756, "step": 32460 }, { "epoch": 1.08, "grad_norm": 0.4635132849216461, "learning_rate": 0.0004277390835655531, "loss": 1.7574, "step": 32461 }, { "epoch": 1.08, "grad_norm": 0.4617307782173157, "learning_rate": 0.0004277296262171004, "loss": 1.7783, "step": 32462 }, { "epoch": 1.08, "grad_norm": 0.46085089445114136, "learning_rate": 0.00042772016871360264, "loss": 1.844, "step": 32463 }, { "epoch": 1.08, "grad_norm": 0.4669615626335144, "learning_rate": 0.0004277107110550715, "loss": 1.7818, "step": 32464 }, { "epoch": 1.08, "grad_norm": 0.49087730050086975, "learning_rate": 0.0004277012532415183, "loss": 1.7906, "step": 32465 }, { "epoch": 1.08, "grad_norm": 0.46606943011283875, "learning_rate": 0.00042769179527295465, "loss": 1.8054, "step": 32466 }, { "epoch": 1.08, "grad_norm": 0.4645669162273407, "learning_rate": 0.000427682337149392, "loss": 1.7602, "step": 32467 }, { "epoch": 1.08, "grad_norm": 0.4492577016353607, "learning_rate": 0.00042767287887084175, "loss": 1.8059, "step": 32468 }, { "epoch": 1.08, "grad_norm": 0.47725504636764526, "learning_rate": 0.0004276634204373155, "loss": 1.8201, "step": 32469 }, { "epoch": 1.08, "grad_norm": 0.44890639185905457, "learning_rate": 0.00042765396184882455, "loss": 1.783, "step": 32470 }, { "epoch": 1.08, "grad_norm": 0.4643077850341797, "learning_rate": 0.0004276445031053806, "loss": 1.8436, "step": 32471 }, { "epoch": 1.08, "grad_norm": 0.462878942489624, "learning_rate": 0.00042763504420699505, "loss": 1.7878, "step": 32472 }, { "epoch": 1.08, "grad_norm": 0.47074371576309204, "learning_rate": 0.0004276255851536793, "loss": 1.8685, "step": 32473 }, { "epoch": 1.08, "grad_norm": 0.47093465924263, "learning_rate": 0.0004276161259454449, "loss": 1.7844, "step": 32474 }, { "epoch": 1.08, "grad_norm": 0.4779244065284729, "learning_rate": 0.0004276066665823034, "loss": 1.8509, "step": 32475 }, { "epoch": 1.08, "grad_norm": 0.45421072840690613, "learning_rate": 0.00042759720706426606, "loss": 1.7622, "step": 32476 }, { "epoch": 1.08, "grad_norm": 0.46288636326789856, "learning_rate": 0.00042758774739134467, "loss": 1.8176, "step": 32477 }, { "epoch": 1.08, "grad_norm": 0.5983532667160034, "learning_rate": 0.00042757828756355046, "loss": 1.888, "step": 32478 }, { "epoch": 1.08, "grad_norm": 0.45482829213142395, "learning_rate": 0.00042756882758089506, "loss": 1.8356, "step": 32479 }, { "epoch": 1.08, "grad_norm": 0.4851645827293396, "learning_rate": 0.0004275593674433899, "loss": 1.7972, "step": 32480 }, { "epoch": 1.08, "grad_norm": 0.4668157398700714, "learning_rate": 0.0004275499071510464, "loss": 1.8011, "step": 32481 }, { "epoch": 1.08, "grad_norm": 0.46764275431632996, "learning_rate": 0.0004275404467038762, "loss": 1.7648, "step": 32482 }, { "epoch": 1.08, "grad_norm": 0.4532047212123871, "learning_rate": 0.00042753098610189054, "loss": 1.7971, "step": 32483 }, { "epoch": 1.08, "grad_norm": 0.45766371488571167, "learning_rate": 0.0004275215253451012, "loss": 1.8148, "step": 32484 }, { "epoch": 1.08, "grad_norm": 0.4832361936569214, "learning_rate": 0.00042751206443351946, "loss": 1.846, "step": 32485 }, { "epoch": 1.08, "grad_norm": 0.47090277075767517, "learning_rate": 0.0004275026033671568, "loss": 1.7326, "step": 32486 }, { "epoch": 1.08, "grad_norm": 0.4729517996311188, "learning_rate": 0.00042749314214602497, "loss": 1.7987, "step": 32487 }, { "epoch": 1.08, "grad_norm": 0.4557308256626129, "learning_rate": 0.000427483680770135, "loss": 1.6998, "step": 32488 }, { "epoch": 1.08, "grad_norm": 0.4506062865257263, "learning_rate": 0.00042747421923949874, "loss": 1.8038, "step": 32489 }, { "epoch": 1.08, "grad_norm": 0.45893657207489014, "learning_rate": 0.0004274647575541275, "loss": 1.7559, "step": 32490 }, { "epoch": 1.08, "grad_norm": 0.4541948139667511, "learning_rate": 0.00042745529571403294, "loss": 1.716, "step": 32491 }, { "epoch": 1.08, "grad_norm": 0.4539797604084015, "learning_rate": 0.0004274458337192264, "loss": 1.7618, "step": 32492 }, { "epoch": 1.08, "grad_norm": 0.4488123953342438, "learning_rate": 0.00042743637156971933, "loss": 1.7757, "step": 32493 }, { "epoch": 1.08, "grad_norm": 0.43907660245895386, "learning_rate": 0.00042742690926552334, "loss": 1.7552, "step": 32494 }, { "epoch": 1.08, "grad_norm": 0.4524628221988678, "learning_rate": 0.00042741744680664987, "loss": 1.7766, "step": 32495 }, { "epoch": 1.08, "grad_norm": 0.4641079008579254, "learning_rate": 0.0004274079841931104, "loss": 1.82, "step": 32496 }, { "epoch": 1.08, "grad_norm": 0.4735565483570099, "learning_rate": 0.0004273985214249164, "loss": 1.8099, "step": 32497 }, { "epoch": 1.08, "grad_norm": 0.4713805913925171, "learning_rate": 0.00042738905850207924, "loss": 1.7661, "step": 32498 }, { "epoch": 1.08, "grad_norm": 0.45628929138183594, "learning_rate": 0.0004273795954246106, "loss": 1.7678, "step": 32499 }, { "epoch": 1.08, "grad_norm": 0.4578036069869995, "learning_rate": 0.000427370132192522, "loss": 1.7963, "step": 32500 }, { "epoch": 1.08, "grad_norm": 0.4707302749156952, "learning_rate": 0.0004273606688058248, "loss": 1.8298, "step": 32501 }, { "epoch": 1.08, "grad_norm": 0.45334121584892273, "learning_rate": 0.00042735120526453044, "loss": 1.8234, "step": 32502 }, { "epoch": 1.08, "grad_norm": 0.47026240825653076, "learning_rate": 0.0004273417415686505, "loss": 1.8497, "step": 32503 }, { "epoch": 1.08, "grad_norm": 0.4654989242553711, "learning_rate": 0.00042733227771819647, "loss": 1.8405, "step": 32504 }, { "epoch": 1.08, "grad_norm": 0.4582039713859558, "learning_rate": 0.0004273228137131798, "loss": 1.7884, "step": 32505 }, { "epoch": 1.08, "grad_norm": 0.4641967713832855, "learning_rate": 0.00042731334955361193, "loss": 1.7425, "step": 32506 }, { "epoch": 1.08, "grad_norm": 0.44888949394226074, "learning_rate": 0.0004273038852395046, "loss": 1.7386, "step": 32507 }, { "epoch": 1.08, "grad_norm": 0.45950913429260254, "learning_rate": 0.000427294420770869, "loss": 1.8053, "step": 32508 }, { "epoch": 1.08, "grad_norm": 0.45983415842056274, "learning_rate": 0.0004272849561477167, "loss": 1.8377, "step": 32509 }, { "epoch": 1.08, "grad_norm": 0.46565738320350647, "learning_rate": 0.0004272754913700592, "loss": 1.8349, "step": 32510 }, { "epoch": 1.08, "grad_norm": 0.47133731842041016, "learning_rate": 0.00042726602643790805, "loss": 1.7563, "step": 32511 }, { "epoch": 1.08, "grad_norm": 0.46836984157562256, "learning_rate": 0.0004272565613512747, "loss": 1.7515, "step": 32512 }, { "epoch": 1.08, "grad_norm": 0.45811963081359863, "learning_rate": 0.0004272470961101706, "loss": 1.8027, "step": 32513 }, { "epoch": 1.08, "grad_norm": 0.4702991545200348, "learning_rate": 0.00042723763071460735, "loss": 1.7257, "step": 32514 }, { "epoch": 1.08, "grad_norm": 0.4696017801761627, "learning_rate": 0.0004272281651645963, "loss": 1.8163, "step": 32515 }, { "epoch": 1.08, "grad_norm": 0.48125970363616943, "learning_rate": 0.00042721869946014897, "loss": 1.8177, "step": 32516 }, { "epoch": 1.08, "grad_norm": 0.461672306060791, "learning_rate": 0.000427209233601277, "loss": 1.7569, "step": 32517 }, { "epoch": 1.08, "grad_norm": 2.431018590927124, "learning_rate": 0.00042719976758799166, "loss": 1.7473, "step": 32518 }, { "epoch": 1.08, "grad_norm": 0.4646395444869995, "learning_rate": 0.0004271903014203046, "loss": 1.6961, "step": 32519 }, { "epoch": 1.08, "grad_norm": 0.44797778129577637, "learning_rate": 0.00042718083509822716, "loss": 1.7944, "step": 32520 }, { "epoch": 1.08, "grad_norm": 0.44775906205177307, "learning_rate": 0.0004271713686217711, "loss": 1.703, "step": 32521 }, { "epoch": 1.08, "grad_norm": 0.4485572278499603, "learning_rate": 0.0004271619019909476, "loss": 1.7886, "step": 32522 }, { "epoch": 1.08, "grad_norm": 0.4431345760822296, "learning_rate": 0.0004271524352057683, "loss": 1.7706, "step": 32523 }, { "epoch": 1.08, "grad_norm": 0.4664025604724884, "learning_rate": 0.00042714296826624473, "loss": 1.7887, "step": 32524 }, { "epoch": 1.08, "grad_norm": 0.45518308877944946, "learning_rate": 0.0004271335011723883, "loss": 1.8005, "step": 32525 }, { "epoch": 1.08, "grad_norm": 0.4592134654521942, "learning_rate": 0.0004271240339242105, "loss": 1.711, "step": 32526 }, { "epoch": 1.08, "grad_norm": 0.45153358578681946, "learning_rate": 0.0004271145665217229, "loss": 1.8065, "step": 32527 }, { "epoch": 1.08, "grad_norm": 0.46004489064216614, "learning_rate": 0.000427105098964937, "loss": 1.7798, "step": 32528 }, { "epoch": 1.08, "grad_norm": 0.44600173830986023, "learning_rate": 0.0004270956312538642, "loss": 1.822, "step": 32529 }, { "epoch": 1.08, "grad_norm": 0.44312989711761475, "learning_rate": 0.0004270861633885159, "loss": 1.8194, "step": 32530 }, { "epoch": 1.08, "grad_norm": 0.6105103492736816, "learning_rate": 0.00042707669536890387, "loss": 1.7779, "step": 32531 }, { "epoch": 1.08, "grad_norm": 0.4509800374507904, "learning_rate": 0.00042706722719503947, "loss": 1.8126, "step": 32532 }, { "epoch": 1.08, "grad_norm": 0.4616067707538605, "learning_rate": 0.0004270577588669341, "loss": 1.8577, "step": 32533 }, { "epoch": 1.08, "grad_norm": 0.4690951108932495, "learning_rate": 0.0004270482903845993, "loss": 1.8525, "step": 32534 }, { "epoch": 1.08, "grad_norm": 0.45808762311935425, "learning_rate": 0.0004270388217480467, "loss": 1.8171, "step": 32535 }, { "epoch": 1.08, "grad_norm": 0.4348398447036743, "learning_rate": 0.00042702935295728763, "loss": 1.7164, "step": 32536 }, { "epoch": 1.08, "grad_norm": 0.46076446771621704, "learning_rate": 0.0004270198840123337, "loss": 1.833, "step": 32537 }, { "epoch": 1.08, "grad_norm": 0.4590843915939331, "learning_rate": 0.0004270104149131962, "loss": 1.7858, "step": 32538 }, { "epoch": 1.08, "grad_norm": 0.46125903725624084, "learning_rate": 0.0004270009456598869, "loss": 1.7523, "step": 32539 }, { "epoch": 1.08, "grad_norm": 0.450275182723999, "learning_rate": 0.00042699147625241716, "loss": 1.8343, "step": 32540 }, { "epoch": 1.08, "grad_norm": 0.4544016420841217, "learning_rate": 0.0004269820066907984, "loss": 1.7216, "step": 32541 }, { "epoch": 1.08, "grad_norm": 0.4756262004375458, "learning_rate": 0.00042697253697504236, "loss": 1.7399, "step": 32542 }, { "epoch": 1.08, "grad_norm": 0.474915474653244, "learning_rate": 0.00042696306710516017, "loss": 1.8723, "step": 32543 }, { "epoch": 1.08, "grad_norm": 0.476756751537323, "learning_rate": 0.00042695359708116364, "loss": 1.8223, "step": 32544 }, { "epoch": 1.08, "grad_norm": 0.463614821434021, "learning_rate": 0.0004269441269030641, "loss": 1.7253, "step": 32545 }, { "epoch": 1.08, "grad_norm": 0.4486778676509857, "learning_rate": 0.00042693465657087304, "loss": 1.8377, "step": 32546 }, { "epoch": 1.08, "grad_norm": 0.4567977786064148, "learning_rate": 0.00042692518608460214, "loss": 1.7698, "step": 32547 }, { "epoch": 1.08, "grad_norm": 0.4533274173736572, "learning_rate": 0.0004269157154442627, "loss": 1.8365, "step": 32548 }, { "epoch": 1.08, "grad_norm": 0.4498712122440338, "learning_rate": 0.0004269062446498663, "loss": 1.7766, "step": 32549 }, { "epoch": 1.08, "grad_norm": 0.4485930800437927, "learning_rate": 0.00042689677370142434, "loss": 1.7353, "step": 32550 }, { "epoch": 1.08, "grad_norm": 0.47238343954086304, "learning_rate": 0.00042688730259894854, "loss": 1.8674, "step": 32551 }, { "epoch": 1.08, "grad_norm": 0.4519110918045044, "learning_rate": 0.00042687783134245016, "loss": 1.7179, "step": 32552 }, { "epoch": 1.08, "grad_norm": 0.46333229541778564, "learning_rate": 0.00042686835993194076, "loss": 1.7553, "step": 32553 }, { "epoch": 1.08, "grad_norm": 0.46527475118637085, "learning_rate": 0.00042685888836743193, "loss": 1.8273, "step": 32554 }, { "epoch": 1.08, "grad_norm": 0.4693858027458191, "learning_rate": 0.000426849416648935, "loss": 1.7608, "step": 32555 }, { "epoch": 1.08, "grad_norm": 0.448762983083725, "learning_rate": 0.0004268399447764617, "loss": 1.7327, "step": 32556 }, { "epoch": 1.08, "grad_norm": 0.4495261609554291, "learning_rate": 0.0004268304727500233, "loss": 1.7402, "step": 32557 }, { "epoch": 1.08, "grad_norm": 0.4666711986064911, "learning_rate": 0.00042682100056963144, "loss": 1.7903, "step": 32558 }, { "epoch": 1.08, "grad_norm": 0.4706938564777374, "learning_rate": 0.00042681152823529755, "loss": 1.7806, "step": 32559 }, { "epoch": 1.08, "grad_norm": 0.4503093957901001, "learning_rate": 0.0004268020557470332, "loss": 1.8237, "step": 32560 }, { "epoch": 1.08, "grad_norm": 0.45102229714393616, "learning_rate": 0.0004267925831048498, "loss": 1.7241, "step": 32561 }, { "epoch": 1.08, "grad_norm": 0.4582449495792389, "learning_rate": 0.00042678311030875895, "loss": 1.8067, "step": 32562 }, { "epoch": 1.08, "grad_norm": 0.4573889970779419, "learning_rate": 0.00042677363735877196, "loss": 1.6725, "step": 32563 }, { "epoch": 1.08, "grad_norm": 0.4579668939113617, "learning_rate": 0.0004267641642549005, "loss": 1.8877, "step": 32564 }, { "epoch": 1.08, "grad_norm": 0.4435133934020996, "learning_rate": 0.00042675469099715603, "loss": 1.7066, "step": 32565 }, { "epoch": 1.08, "grad_norm": 0.4717242419719696, "learning_rate": 0.00042674521758555, "loss": 1.8346, "step": 32566 }, { "epoch": 1.08, "grad_norm": 0.4629882872104645, "learning_rate": 0.0004267357440200941, "loss": 1.7774, "step": 32567 }, { "epoch": 1.08, "grad_norm": 0.4538583755493164, "learning_rate": 0.00042672627030079953, "loss": 1.7646, "step": 32568 }, { "epoch": 1.08, "grad_norm": 0.48511630296707153, "learning_rate": 0.00042671679642767804, "loss": 1.8266, "step": 32569 }, { "epoch": 1.08, "grad_norm": 0.4998840391635895, "learning_rate": 0.00042670732240074094, "loss": 1.8288, "step": 32570 }, { "epoch": 1.08, "grad_norm": 0.460439532995224, "learning_rate": 0.0004266978482199998, "loss": 1.8597, "step": 32571 }, { "epoch": 1.08, "grad_norm": 0.4469657242298126, "learning_rate": 0.0004266883738854663, "loss": 1.7305, "step": 32572 }, { "epoch": 1.08, "grad_norm": 0.4475111961364746, "learning_rate": 0.0004266788993971516, "loss": 1.8347, "step": 32573 }, { "epoch": 1.08, "grad_norm": 0.46363314986228943, "learning_rate": 0.0004266694247550675, "loss": 1.8241, "step": 32574 }, { "epoch": 1.08, "grad_norm": 0.47593024373054504, "learning_rate": 0.0004266599499592253, "loss": 1.7821, "step": 32575 }, { "epoch": 1.08, "grad_norm": 0.4610911011695862, "learning_rate": 0.0004266504750096367, "loss": 1.8205, "step": 32576 }, { "epoch": 1.08, "grad_norm": 0.4541532099246979, "learning_rate": 0.00042664099990631296, "loss": 1.7542, "step": 32577 }, { "epoch": 1.08, "grad_norm": 0.46712374687194824, "learning_rate": 0.0004266315246492658, "loss": 1.8045, "step": 32578 }, { "epoch": 1.08, "grad_norm": 0.4681215286254883, "learning_rate": 0.00042662204923850665, "loss": 1.7618, "step": 32579 }, { "epoch": 1.08, "grad_norm": 0.4740537703037262, "learning_rate": 0.00042661257367404684, "loss": 1.8726, "step": 32580 }, { "epoch": 1.08, "grad_norm": 0.4800906479358673, "learning_rate": 0.00042660309795589807, "loss": 1.7832, "step": 32581 }, { "epoch": 1.08, "grad_norm": 0.46886003017425537, "learning_rate": 0.0004265936220840719, "loss": 1.7963, "step": 32582 }, { "epoch": 1.08, "grad_norm": 0.4432213604450226, "learning_rate": 0.0004265841460585796, "loss": 1.8119, "step": 32583 }, { "epoch": 1.08, "grad_norm": 0.47275879979133606, "learning_rate": 0.00042657466987943283, "loss": 1.8103, "step": 32584 }, { "epoch": 1.08, "grad_norm": 0.48050984740257263, "learning_rate": 0.0004265651935466431, "loss": 1.7857, "step": 32585 }, { "epoch": 1.08, "grad_norm": 0.45078423619270325, "learning_rate": 0.00042655571706022187, "loss": 1.7248, "step": 32586 }, { "epoch": 1.08, "grad_norm": 0.45697885751724243, "learning_rate": 0.0004265462404201806, "loss": 1.8433, "step": 32587 }, { "epoch": 1.08, "grad_norm": 0.4827098846435547, "learning_rate": 0.0004265367636265309, "loss": 1.7999, "step": 32588 }, { "epoch": 1.08, "grad_norm": 0.46091458201408386, "learning_rate": 0.00042652728667928416, "loss": 1.725, "step": 32589 }, { "epoch": 1.08, "grad_norm": 0.4581560492515564, "learning_rate": 0.00042651780957845203, "loss": 1.7647, "step": 32590 }, { "epoch": 1.08, "grad_norm": 0.4634990692138672, "learning_rate": 0.0004265083323240458, "loss": 1.7664, "step": 32591 }, { "epoch": 1.08, "grad_norm": 0.4639330506324768, "learning_rate": 0.00042649885491607715, "loss": 1.7253, "step": 32592 }, { "epoch": 1.08, "grad_norm": 0.46756184101104736, "learning_rate": 0.0004264893773545575, "loss": 1.8296, "step": 32593 }, { "epoch": 1.08, "grad_norm": 0.48615509271621704, "learning_rate": 0.00042647989963949843, "loss": 1.8564, "step": 32594 }, { "epoch": 1.08, "grad_norm": 0.4596487283706665, "learning_rate": 0.00042647042177091136, "loss": 1.7461, "step": 32595 }, { "epoch": 1.08, "grad_norm": 0.46316325664520264, "learning_rate": 0.00042646094374880787, "loss": 1.8249, "step": 32596 }, { "epoch": 1.08, "grad_norm": 0.4506896138191223, "learning_rate": 0.0004264514655731994, "loss": 1.7991, "step": 32597 }, { "epoch": 1.08, "grad_norm": 0.4598621726036072, "learning_rate": 0.00042644198724409746, "loss": 1.8017, "step": 32598 }, { "epoch": 1.08, "grad_norm": 0.4804989993572235, "learning_rate": 0.0004264325087615137, "loss": 1.8551, "step": 32599 }, { "epoch": 1.08, "grad_norm": 0.4691654443740845, "learning_rate": 0.0004264230301254593, "loss": 1.7677, "step": 32600 }, { "epoch": 1.08, "grad_norm": 0.4708957076072693, "learning_rate": 0.00042641355133594613, "loss": 1.7582, "step": 32601 }, { "epoch": 1.08, "grad_norm": 0.4497905373573303, "learning_rate": 0.0004264040723929855, "loss": 1.7495, "step": 32602 }, { "epoch": 1.08, "grad_norm": 0.46476981043815613, "learning_rate": 0.0004263945932965889, "loss": 1.7652, "step": 32603 }, { "epoch": 1.08, "grad_norm": 0.46906349062919617, "learning_rate": 0.00042638511404676795, "loss": 1.8405, "step": 32604 }, { "epoch": 1.08, "grad_norm": 0.4615198075771332, "learning_rate": 0.00042637563464353406, "loss": 1.8226, "step": 32605 }, { "epoch": 1.08, "grad_norm": 0.45402440428733826, "learning_rate": 0.0004263661550868988, "loss": 1.7339, "step": 32606 }, { "epoch": 1.08, "grad_norm": 0.46247589588165283, "learning_rate": 0.0004263566753768736, "loss": 1.7913, "step": 32607 }, { "epoch": 1.08, "grad_norm": 0.46530982851982117, "learning_rate": 0.00042634719551347004, "loss": 1.8317, "step": 32608 }, { "epoch": 1.08, "grad_norm": 0.4541913568973541, "learning_rate": 0.0004263377154966996, "loss": 1.7885, "step": 32609 }, { "epoch": 1.08, "grad_norm": 0.4494844079017639, "learning_rate": 0.0004263282353265738, "loss": 1.6942, "step": 32610 }, { "epoch": 1.08, "grad_norm": 0.47654542326927185, "learning_rate": 0.0004263187550031041, "loss": 1.8309, "step": 32611 }, { "epoch": 1.09, "grad_norm": 0.4459153115749359, "learning_rate": 0.00042630927452630207, "loss": 1.694, "step": 32612 }, { "epoch": 1.09, "grad_norm": 0.4652150273323059, "learning_rate": 0.00042629979389617926, "loss": 1.7266, "step": 32613 }, { "epoch": 1.09, "grad_norm": 0.4747588634490967, "learning_rate": 0.00042629031311274695, "loss": 1.8993, "step": 32614 }, { "epoch": 1.09, "grad_norm": 0.4634736478328705, "learning_rate": 0.0004262808321760169, "loss": 1.7609, "step": 32615 }, { "epoch": 1.09, "grad_norm": 0.4671279489994049, "learning_rate": 0.00042627135108600053, "loss": 1.8014, "step": 32616 }, { "epoch": 1.09, "grad_norm": 0.4517359137535095, "learning_rate": 0.0004262618698427093, "loss": 1.8179, "step": 32617 }, { "epoch": 1.09, "grad_norm": 0.46303197741508484, "learning_rate": 0.0004262523884461548, "loss": 1.7242, "step": 32618 }, { "epoch": 1.09, "grad_norm": 0.4563300311565399, "learning_rate": 0.00042624290689634856, "loss": 1.7685, "step": 32619 }, { "epoch": 1.09, "grad_norm": 0.45401254296302795, "learning_rate": 0.000426233425193302, "loss": 1.7426, "step": 32620 }, { "epoch": 1.09, "grad_norm": 0.45660874247550964, "learning_rate": 0.00042622394333702654, "loss": 1.8469, "step": 32621 }, { "epoch": 1.09, "grad_norm": 0.458638072013855, "learning_rate": 0.0004262144613275339, "loss": 1.8829, "step": 32622 }, { "epoch": 1.09, "grad_norm": 0.46933186054229736, "learning_rate": 0.00042620497916483554, "loss": 1.7838, "step": 32623 }, { "epoch": 1.09, "grad_norm": 0.45983079075813293, "learning_rate": 0.00042619549684894294, "loss": 1.7244, "step": 32624 }, { "epoch": 1.09, "grad_norm": 0.4717722535133362, "learning_rate": 0.0004261860143798676, "loss": 1.7314, "step": 32625 }, { "epoch": 1.09, "grad_norm": 0.4715999960899353, "learning_rate": 0.00042617653175762096, "loss": 1.8024, "step": 32626 }, { "epoch": 1.09, "grad_norm": 0.4668234586715698, "learning_rate": 0.0004261670489822146, "loss": 1.7987, "step": 32627 }, { "epoch": 1.09, "grad_norm": 0.4565774202346802, "learning_rate": 0.00042615756605366003, "loss": 1.741, "step": 32628 }, { "epoch": 1.09, "grad_norm": 0.4668276906013489, "learning_rate": 0.00042614808297196887, "loss": 1.8332, "step": 32629 }, { "epoch": 1.09, "grad_norm": 0.46417123079299927, "learning_rate": 0.00042613859973715236, "loss": 1.8229, "step": 32630 }, { "epoch": 1.09, "grad_norm": 0.4675222933292389, "learning_rate": 0.0004261291163492224, "loss": 1.7573, "step": 32631 }, { "epoch": 1.09, "grad_norm": 0.4558546841144562, "learning_rate": 0.0004261196328081901, "loss": 1.7683, "step": 32632 }, { "epoch": 1.09, "grad_norm": 0.4755552113056183, "learning_rate": 0.0004261101491140672, "loss": 1.8507, "step": 32633 }, { "epoch": 1.09, "grad_norm": 0.4622092843055725, "learning_rate": 0.0004261006652668652, "loss": 1.7195, "step": 32634 }, { "epoch": 1.09, "grad_norm": 0.463177889585495, "learning_rate": 0.00042609118126659545, "loss": 1.7814, "step": 32635 }, { "epoch": 1.09, "grad_norm": 0.4612424373626709, "learning_rate": 0.0004260816971132697, "loss": 1.8546, "step": 32636 }, { "epoch": 1.09, "grad_norm": 0.46143603324890137, "learning_rate": 0.00042607221280689927, "loss": 1.8276, "step": 32637 }, { "epoch": 1.09, "grad_norm": 0.471993625164032, "learning_rate": 0.0004260627283474958, "loss": 1.8407, "step": 32638 }, { "epoch": 1.09, "grad_norm": 0.4791449308395386, "learning_rate": 0.0004260532437350707, "loss": 1.8023, "step": 32639 }, { "epoch": 1.09, "grad_norm": 0.46912866830825806, "learning_rate": 0.00042604375896963565, "loss": 1.7643, "step": 32640 }, { "epoch": 1.09, "grad_norm": 0.4642377197742462, "learning_rate": 0.0004260342740512019, "loss": 1.7742, "step": 32641 }, { "epoch": 1.09, "grad_norm": 0.45947757363319397, "learning_rate": 0.0004260247889797812, "loss": 1.7697, "step": 32642 }, { "epoch": 1.09, "grad_norm": 0.4592598080635071, "learning_rate": 0.000426015303755385, "loss": 1.7604, "step": 32643 }, { "epoch": 1.09, "grad_norm": 0.4572073817253113, "learning_rate": 0.0004260058183780247, "loss": 1.7272, "step": 32644 }, { "epoch": 1.09, "grad_norm": 0.4690505564212799, "learning_rate": 0.00042599633284771195, "loss": 1.8225, "step": 32645 }, { "epoch": 1.09, "grad_norm": 0.4672226309776306, "learning_rate": 0.0004259868471644582, "loss": 1.8061, "step": 32646 }, { "epoch": 1.09, "grad_norm": 0.46653568744659424, "learning_rate": 0.00042597736132827504, "loss": 1.7783, "step": 32647 }, { "epoch": 1.09, "grad_norm": 0.5227916240692139, "learning_rate": 0.0004259678753391739, "loss": 1.7712, "step": 32648 }, { "epoch": 1.09, "grad_norm": 0.44415727257728577, "learning_rate": 0.0004259583891971663, "loss": 1.7417, "step": 32649 }, { "epoch": 1.09, "grad_norm": 0.4624289870262146, "learning_rate": 0.00042594890290226375, "loss": 1.82, "step": 32650 }, { "epoch": 1.09, "grad_norm": 0.47758448123931885, "learning_rate": 0.00042593941645447785, "loss": 1.7841, "step": 32651 }, { "epoch": 1.09, "grad_norm": 0.5002235770225525, "learning_rate": 0.00042592992985382, "loss": 1.8392, "step": 32652 }, { "epoch": 1.09, "grad_norm": 0.4552273750305176, "learning_rate": 0.0004259204431003018, "loss": 1.7401, "step": 32653 }, { "epoch": 1.09, "grad_norm": 0.45550698041915894, "learning_rate": 0.0004259109561939348, "loss": 1.8177, "step": 32654 }, { "epoch": 1.09, "grad_norm": 0.4677019417285919, "learning_rate": 0.00042590146913473026, "loss": 1.7789, "step": 32655 }, { "epoch": 1.09, "grad_norm": 0.46305495500564575, "learning_rate": 0.0004258919819227001, "loss": 1.7837, "step": 32656 }, { "epoch": 1.09, "grad_norm": 0.47161003947257996, "learning_rate": 0.0004258824945578555, "loss": 1.8338, "step": 32657 }, { "epoch": 1.09, "grad_norm": 0.4660970866680145, "learning_rate": 0.0004258730070402081, "loss": 1.8041, "step": 32658 }, { "epoch": 1.09, "grad_norm": 0.47779005765914917, "learning_rate": 0.0004258635193697695, "loss": 1.7363, "step": 32659 }, { "epoch": 1.09, "grad_norm": 0.4752470552921295, "learning_rate": 0.00042585403154655105, "loss": 1.8511, "step": 32660 }, { "epoch": 1.09, "grad_norm": 0.4889369010925293, "learning_rate": 0.0004258445435705644, "loss": 1.7926, "step": 32661 }, { "epoch": 1.09, "grad_norm": 0.452851802110672, "learning_rate": 0.00042583505544182095, "loss": 1.7022, "step": 32662 }, { "epoch": 1.09, "grad_norm": 0.4529537856578827, "learning_rate": 0.0004258255671603324, "loss": 1.7417, "step": 32663 }, { "epoch": 1.09, "grad_norm": 0.4843410551548004, "learning_rate": 0.00042581607872611, "loss": 1.8421, "step": 32664 }, { "epoch": 1.09, "grad_norm": 0.46386879682540894, "learning_rate": 0.00042580659013916554, "loss": 1.7583, "step": 32665 }, { "epoch": 1.09, "grad_norm": 0.4705182909965515, "learning_rate": 0.0004257971013995104, "loss": 1.772, "step": 32666 }, { "epoch": 1.09, "grad_norm": 0.476657509803772, "learning_rate": 0.000425787612507156, "loss": 1.7738, "step": 32667 }, { "epoch": 1.09, "grad_norm": 0.47521063685417175, "learning_rate": 0.0004257781234621141, "loss": 1.8387, "step": 32668 }, { "epoch": 1.09, "grad_norm": 0.45745721459388733, "learning_rate": 0.0004257686342643961, "loss": 1.8139, "step": 32669 }, { "epoch": 1.09, "grad_norm": 0.4579976499080658, "learning_rate": 0.0004257591449140134, "loss": 1.79, "step": 32670 }, { "epoch": 1.09, "grad_norm": 0.46962258219718933, "learning_rate": 0.0004257496554109777, "loss": 1.8076, "step": 32671 }, { "epoch": 1.09, "grad_norm": 0.45574185252189636, "learning_rate": 0.0004257401657553004, "loss": 1.7934, "step": 32672 }, { "epoch": 1.09, "grad_norm": 0.4733579456806183, "learning_rate": 0.0004257306759469931, "loss": 1.8164, "step": 32673 }, { "epoch": 1.09, "grad_norm": 0.47912949323654175, "learning_rate": 0.0004257211859860673, "loss": 1.8079, "step": 32674 }, { "epoch": 1.09, "grad_norm": 0.4713621139526367, "learning_rate": 0.0004257116958725345, "loss": 1.8053, "step": 32675 }, { "epoch": 1.09, "grad_norm": 0.4652388095855713, "learning_rate": 0.0004257022056064062, "loss": 1.7145, "step": 32676 }, { "epoch": 1.09, "grad_norm": 0.46811407804489136, "learning_rate": 0.000425692715187694, "loss": 1.7209, "step": 32677 }, { "epoch": 1.09, "grad_norm": 0.46993598341941833, "learning_rate": 0.0004256832246164093, "loss": 1.79, "step": 32678 }, { "epoch": 1.09, "grad_norm": 0.4724366068840027, "learning_rate": 0.0004256737338925637, "loss": 1.7644, "step": 32679 }, { "epoch": 1.09, "grad_norm": 0.4499569237232208, "learning_rate": 0.00042566424301616873, "loss": 1.7966, "step": 32680 }, { "epoch": 1.09, "grad_norm": 0.4628128707408905, "learning_rate": 0.0004256547519872359, "loss": 1.801, "step": 32681 }, { "epoch": 1.09, "grad_norm": 0.4488304555416107, "learning_rate": 0.00042564526080577666, "loss": 1.77, "step": 32682 }, { "epoch": 1.09, "grad_norm": 0.46585771441459656, "learning_rate": 0.00042563576947180257, "loss": 1.7905, "step": 32683 }, { "epoch": 1.09, "grad_norm": 0.45268720388412476, "learning_rate": 0.00042562627798532526, "loss": 1.8564, "step": 32684 }, { "epoch": 1.09, "grad_norm": 0.46745625138282776, "learning_rate": 0.000425616786346356, "loss": 1.8426, "step": 32685 }, { "epoch": 1.09, "grad_norm": 0.4338265359401703, "learning_rate": 0.0004256072945549066, "loss": 1.7487, "step": 32686 }, { "epoch": 1.09, "grad_norm": 0.4593762159347534, "learning_rate": 0.0004255978026109884, "loss": 1.7902, "step": 32687 }, { "epoch": 1.09, "grad_norm": 0.4594278931617737, "learning_rate": 0.00042558831051461306, "loss": 1.8011, "step": 32688 }, { "epoch": 1.09, "grad_norm": 0.45381250977516174, "learning_rate": 0.000425578818265792, "loss": 1.7594, "step": 32689 }, { "epoch": 1.09, "grad_norm": 0.4549506604671478, "learning_rate": 0.0004255693258645366, "loss": 1.6998, "step": 32690 }, { "epoch": 1.09, "grad_norm": 0.45983994007110596, "learning_rate": 0.0004255598333108587, "loss": 1.7977, "step": 32691 }, { "epoch": 1.09, "grad_norm": 0.45743709802627563, "learning_rate": 0.00042555034060476956, "loss": 1.8456, "step": 32692 }, { "epoch": 1.09, "grad_norm": 0.4568401575088501, "learning_rate": 0.0004255408477462809, "loss": 1.7561, "step": 32693 }, { "epoch": 1.09, "grad_norm": 0.46036043763160706, "learning_rate": 0.0004255313547354041, "loss": 1.741, "step": 32694 }, { "epoch": 1.09, "grad_norm": 0.4717588722705841, "learning_rate": 0.00042552186157215075, "loss": 1.8457, "step": 32695 }, { "epoch": 1.09, "grad_norm": 0.4652824401855469, "learning_rate": 0.0004255123682565323, "loss": 1.7999, "step": 32696 }, { "epoch": 1.09, "grad_norm": 0.47449663281440735, "learning_rate": 0.0004255028747885604, "loss": 1.8483, "step": 32697 }, { "epoch": 1.09, "grad_norm": 0.4573526084423065, "learning_rate": 0.00042549338116824653, "loss": 1.8502, "step": 32698 }, { "epoch": 1.09, "grad_norm": 0.4501034617424011, "learning_rate": 0.0004254838873956021, "loss": 1.7362, "step": 32699 }, { "epoch": 1.09, "grad_norm": 0.4844132363796234, "learning_rate": 0.0004254743934706388, "loss": 1.7335, "step": 32700 }, { "epoch": 1.09, "grad_norm": 0.4580111503601074, "learning_rate": 0.00042546489939336796, "loss": 1.7592, "step": 32701 }, { "epoch": 1.09, "grad_norm": 0.4538818597793579, "learning_rate": 0.00042545540516380126, "loss": 1.7778, "step": 32702 }, { "epoch": 1.09, "grad_norm": 0.4558400511741638, "learning_rate": 0.00042544591078195023, "loss": 1.7824, "step": 32703 }, { "epoch": 1.09, "grad_norm": 0.4585408568382263, "learning_rate": 0.00042543641624782637, "loss": 1.8307, "step": 32704 }, { "epoch": 1.09, "grad_norm": 0.4520149827003479, "learning_rate": 0.00042542692156144116, "loss": 1.8303, "step": 32705 }, { "epoch": 1.09, "grad_norm": 0.4560084342956543, "learning_rate": 0.00042541742672280605, "loss": 1.7358, "step": 32706 }, { "epoch": 1.09, "grad_norm": 0.4369474947452545, "learning_rate": 0.00042540793173193275, "loss": 1.7892, "step": 32707 }, { "epoch": 1.09, "grad_norm": 0.4553411900997162, "learning_rate": 0.0004253984365888327, "loss": 1.7868, "step": 32708 }, { "epoch": 1.09, "grad_norm": 0.46123889088630676, "learning_rate": 0.0004253889412935174, "loss": 1.7802, "step": 32709 }, { "epoch": 1.09, "grad_norm": 0.4718341827392578, "learning_rate": 0.0004253794458459984, "loss": 1.8253, "step": 32710 }, { "epoch": 1.09, "grad_norm": 0.4518616795539856, "learning_rate": 0.00042536995024628724, "loss": 1.7256, "step": 32711 }, { "epoch": 1.09, "grad_norm": 0.45306524634361267, "learning_rate": 0.00042536045449439547, "loss": 1.7783, "step": 32712 }, { "epoch": 1.09, "grad_norm": 0.4778602123260498, "learning_rate": 0.0004253509585903345, "loss": 1.7839, "step": 32713 }, { "epoch": 1.09, "grad_norm": 0.46693938970565796, "learning_rate": 0.00042534146253411597, "loss": 1.7493, "step": 32714 }, { "epoch": 1.09, "grad_norm": 0.46951159834861755, "learning_rate": 0.0004253319663257514, "loss": 1.8099, "step": 32715 }, { "epoch": 1.09, "grad_norm": 0.4629961848258972, "learning_rate": 0.00042532246996525227, "loss": 1.7184, "step": 32716 }, { "epoch": 1.09, "grad_norm": 0.45909592509269714, "learning_rate": 0.0004253129734526301, "loss": 1.7488, "step": 32717 }, { "epoch": 1.09, "grad_norm": 0.4725741744041443, "learning_rate": 0.0004253034767878965, "loss": 1.7901, "step": 32718 }, { "epoch": 1.09, "grad_norm": 0.4735002815723419, "learning_rate": 0.00042529397997106294, "loss": 1.8228, "step": 32719 }, { "epoch": 1.09, "grad_norm": 0.4566859006881714, "learning_rate": 0.0004252844830021409, "loss": 1.7217, "step": 32720 }, { "epoch": 1.09, "grad_norm": 0.4701625406742096, "learning_rate": 0.00042527498588114203, "loss": 1.7597, "step": 32721 }, { "epoch": 1.09, "grad_norm": 0.4686180651187897, "learning_rate": 0.00042526548860807764, "loss": 1.7298, "step": 32722 }, { "epoch": 1.09, "grad_norm": 0.4692680835723877, "learning_rate": 0.0004252559911829596, "loss": 1.7621, "step": 32723 }, { "epoch": 1.09, "grad_norm": 0.453325480222702, "learning_rate": 0.00042524649360579903, "loss": 1.7878, "step": 32724 }, { "epoch": 1.09, "grad_norm": 0.43456634879112244, "learning_rate": 0.00042523699587660786, "loss": 1.7525, "step": 32725 }, { "epoch": 1.09, "grad_norm": 1.111942172050476, "learning_rate": 0.0004252274979953973, "loss": 1.7959, "step": 32726 }, { "epoch": 1.09, "grad_norm": 0.4521103799343109, "learning_rate": 0.00042521799996217906, "loss": 1.7596, "step": 32727 }, { "epoch": 1.09, "grad_norm": 0.46639326214790344, "learning_rate": 0.00042520850177696465, "loss": 1.8498, "step": 32728 }, { "epoch": 1.09, "grad_norm": 0.4735407531261444, "learning_rate": 0.0004251990034397655, "loss": 1.7841, "step": 32729 }, { "epoch": 1.09, "grad_norm": 0.47185465693473816, "learning_rate": 0.0004251895049505933, "loss": 1.7901, "step": 32730 }, { "epoch": 1.09, "grad_norm": 0.4587109386920929, "learning_rate": 0.0004251800063094594, "loss": 1.8218, "step": 32731 }, { "epoch": 1.09, "grad_norm": 0.4574216306209564, "learning_rate": 0.00042517050751637544, "loss": 1.7234, "step": 32732 }, { "epoch": 1.09, "grad_norm": 0.4594621956348419, "learning_rate": 0.00042516100857135285, "loss": 1.7797, "step": 32733 }, { "epoch": 1.09, "grad_norm": 0.49380937218666077, "learning_rate": 0.0004251515094744034, "loss": 1.8841, "step": 32734 }, { "epoch": 1.09, "grad_norm": 0.4656921327114105, "learning_rate": 0.0004251420102255384, "loss": 1.8002, "step": 32735 }, { "epoch": 1.09, "grad_norm": 0.4585736393928528, "learning_rate": 0.0004251325108247694, "loss": 1.7613, "step": 32736 }, { "epoch": 1.09, "grad_norm": 0.46539562940597534, "learning_rate": 0.000425123011272108, "loss": 1.7772, "step": 32737 }, { "epoch": 1.09, "grad_norm": 0.46696001291275024, "learning_rate": 0.0004251135115675656, "loss": 1.748, "step": 32738 }, { "epoch": 1.09, "grad_norm": 0.4535522162914276, "learning_rate": 0.000425104011711154, "loss": 1.7726, "step": 32739 }, { "epoch": 1.09, "grad_norm": 0.459170401096344, "learning_rate": 0.0004250945117028844, "loss": 1.8315, "step": 32740 }, { "epoch": 1.09, "grad_norm": 0.4623030424118042, "learning_rate": 0.0004250850115427687, "loss": 1.8324, "step": 32741 }, { "epoch": 1.09, "grad_norm": 0.46485477685928345, "learning_rate": 0.000425075511230818, "loss": 1.8052, "step": 32742 }, { "epoch": 1.09, "grad_norm": 0.4754984378814697, "learning_rate": 0.0004250660107670442, "loss": 1.7563, "step": 32743 }, { "epoch": 1.09, "grad_norm": 0.4651281237602234, "learning_rate": 0.0004250565101514586, "loss": 1.8362, "step": 32744 }, { "epoch": 1.09, "grad_norm": 0.4416883587837219, "learning_rate": 0.0004250470093840729, "loss": 1.839, "step": 32745 }, { "epoch": 1.09, "grad_norm": 0.4483410716056824, "learning_rate": 0.00042503750846489857, "loss": 1.9127, "step": 32746 }, { "epoch": 1.09, "grad_norm": 0.45770326256752014, "learning_rate": 0.00042502800739394704, "loss": 1.7956, "step": 32747 }, { "epoch": 1.09, "grad_norm": 0.46160179376602173, "learning_rate": 0.00042501850617123003, "loss": 1.8282, "step": 32748 }, { "epoch": 1.09, "grad_norm": 0.45841526985168457, "learning_rate": 0.0004250090047967589, "loss": 1.7335, "step": 32749 }, { "epoch": 1.09, "grad_norm": 0.4624101519584656, "learning_rate": 0.00042499950327054526, "loss": 1.8226, "step": 32750 }, { "epoch": 1.09, "grad_norm": 0.44081589579582214, "learning_rate": 0.0004249900015926007, "loss": 1.8134, "step": 32751 }, { "epoch": 1.09, "grad_norm": 0.4601046144962311, "learning_rate": 0.00042498049976293653, "loss": 1.8454, "step": 32752 }, { "epoch": 1.09, "grad_norm": 0.4556374251842499, "learning_rate": 0.0004249709977815647, "loss": 1.7878, "step": 32753 }, { "epoch": 1.09, "grad_norm": 0.47609132528305054, "learning_rate": 0.0004249614956484963, "loss": 1.8549, "step": 32754 }, { "epoch": 1.09, "grad_norm": 0.469783753156662, "learning_rate": 0.0004249519933637431, "loss": 1.8591, "step": 32755 }, { "epoch": 1.09, "grad_norm": 0.4561963975429535, "learning_rate": 0.0004249424909273167, "loss": 1.8199, "step": 32756 }, { "epoch": 1.09, "grad_norm": 0.45804330706596375, "learning_rate": 0.0004249329883392284, "loss": 1.798, "step": 32757 }, { "epoch": 1.09, "grad_norm": 0.45221588015556335, "learning_rate": 0.0004249234855994899, "loss": 1.776, "step": 32758 }, { "epoch": 1.09, "grad_norm": 0.45709723234176636, "learning_rate": 0.00042491398270811265, "loss": 1.759, "step": 32759 }, { "epoch": 1.09, "grad_norm": 0.4604828953742981, "learning_rate": 0.00042490447966510834, "loss": 1.7833, "step": 32760 }, { "epoch": 1.09, "grad_norm": 0.4519721567630768, "learning_rate": 0.0004248949764704883, "loss": 1.6888, "step": 32761 }, { "epoch": 1.09, "grad_norm": 0.49137434363365173, "learning_rate": 0.0004248854731242642, "loss": 1.7737, "step": 32762 }, { "epoch": 1.09, "grad_norm": 0.9068993926048279, "learning_rate": 0.0004248759696264475, "loss": 1.7965, "step": 32763 }, { "epoch": 1.09, "grad_norm": 0.46535128355026245, "learning_rate": 0.0004248664659770498, "loss": 1.8002, "step": 32764 }, { "epoch": 1.09, "grad_norm": 0.468951940536499, "learning_rate": 0.0004248569621760826, "loss": 1.7605, "step": 32765 }, { "epoch": 1.09, "grad_norm": 0.48956167697906494, "learning_rate": 0.00042484745822355747, "loss": 1.8344, "step": 32766 }, { "epoch": 1.09, "grad_norm": 0.4932030141353607, "learning_rate": 0.00042483795411948596, "loss": 1.7323, "step": 32767 }, { "epoch": 1.09, "grad_norm": 0.44751453399658203, "learning_rate": 0.00042482844986387947, "loss": 1.8133, "step": 32768 }, { "epoch": 1.09, "grad_norm": 0.46068912744522095, "learning_rate": 0.0004248189454567496, "loss": 1.849, "step": 32769 }, { "epoch": 1.09, "grad_norm": 0.4640522599220276, "learning_rate": 0.000424809440898108, "loss": 1.8266, "step": 32770 }, { "epoch": 1.09, "grad_norm": 0.46481403708457947, "learning_rate": 0.00042479993618796624, "loss": 1.7474, "step": 32771 }, { "epoch": 1.09, "grad_norm": 0.466219961643219, "learning_rate": 0.0004247904313263355, "loss": 1.753, "step": 32772 }, { "epoch": 1.09, "grad_norm": 0.4740167558193207, "learning_rate": 0.0004247809263132278, "loss": 1.7741, "step": 32773 }, { "epoch": 1.09, "grad_norm": 0.47603005170822144, "learning_rate": 0.0004247714211486543, "loss": 1.8443, "step": 32774 }, { "epoch": 1.09, "grad_norm": 0.46811643242836, "learning_rate": 0.0004247619158326267, "loss": 1.8493, "step": 32775 }, { "epoch": 1.09, "grad_norm": 0.45737430453300476, "learning_rate": 0.00042475241036515654, "loss": 1.7379, "step": 32776 }, { "epoch": 1.09, "grad_norm": 0.4578964114189148, "learning_rate": 0.0004247429047462553, "loss": 1.8243, "step": 32777 }, { "epoch": 1.09, "grad_norm": 0.4747094511985779, "learning_rate": 0.00042473339897593465, "loss": 1.8593, "step": 32778 }, { "epoch": 1.09, "grad_norm": 0.45809030532836914, "learning_rate": 0.0004247238930542059, "loss": 1.758, "step": 32779 }, { "epoch": 1.09, "grad_norm": 0.47334301471710205, "learning_rate": 0.00042471438698108083, "loss": 1.8206, "step": 32780 }, { "epoch": 1.09, "grad_norm": 0.4565259516239166, "learning_rate": 0.0004247048807565708, "loss": 1.7531, "step": 32781 }, { "epoch": 1.09, "grad_norm": 0.45188021659851074, "learning_rate": 0.0004246953743806874, "loss": 1.7453, "step": 32782 }, { "epoch": 1.09, "grad_norm": 0.4384852945804596, "learning_rate": 0.00042468586785344227, "loss": 1.7859, "step": 32783 }, { "epoch": 1.09, "grad_norm": 0.48357093334198, "learning_rate": 0.0004246763611748468, "loss": 1.7128, "step": 32784 }, { "epoch": 1.09, "grad_norm": 0.46142661571502686, "learning_rate": 0.00042466685434491266, "loss": 1.8363, "step": 32785 }, { "epoch": 1.09, "grad_norm": 0.4782751500606537, "learning_rate": 0.0004246573473636513, "loss": 1.8281, "step": 32786 }, { "epoch": 1.09, "grad_norm": 0.4536798298358917, "learning_rate": 0.0004246478402310743, "loss": 1.7725, "step": 32787 }, { "epoch": 1.09, "grad_norm": 0.449380099773407, "learning_rate": 0.0004246383329471931, "loss": 1.8611, "step": 32788 }, { "epoch": 1.09, "grad_norm": 0.46491503715515137, "learning_rate": 0.00042462882551201945, "loss": 1.772, "step": 32789 }, { "epoch": 1.09, "grad_norm": 0.4683429002761841, "learning_rate": 0.0004246193179255648, "loss": 1.8304, "step": 32790 }, { "epoch": 1.09, "grad_norm": 0.4662337899208069, "learning_rate": 0.0004246098101878405, "loss": 1.7463, "step": 32791 }, { "epoch": 1.09, "grad_norm": 0.4552186131477356, "learning_rate": 0.00042460030229885833, "loss": 1.7064, "step": 32792 }, { "epoch": 1.09, "grad_norm": 0.4779125154018402, "learning_rate": 0.00042459079425862976, "loss": 1.8162, "step": 32793 }, { "epoch": 1.09, "grad_norm": 0.4785102903842926, "learning_rate": 0.00042458128606716626, "loss": 1.8027, "step": 32794 }, { "epoch": 1.09, "grad_norm": 0.46269017457962036, "learning_rate": 0.00042457177772447954, "loss": 1.7974, "step": 32795 }, { "epoch": 1.09, "grad_norm": 0.4671179950237274, "learning_rate": 0.000424562269230581, "loss": 1.7452, "step": 32796 }, { "epoch": 1.09, "grad_norm": 0.46549558639526367, "learning_rate": 0.00042455276058548215, "loss": 1.8447, "step": 32797 }, { "epoch": 1.09, "grad_norm": 0.5049962401390076, "learning_rate": 0.0004245432517891947, "loss": 1.805, "step": 32798 }, { "epoch": 1.09, "grad_norm": 0.4615594446659088, "learning_rate": 0.00042453374284173007, "loss": 1.776, "step": 32799 }, { "epoch": 1.09, "grad_norm": 0.44494572281837463, "learning_rate": 0.00042452423374309976, "loss": 1.6825, "step": 32800 }, { "epoch": 1.09, "grad_norm": 0.46440389752388, "learning_rate": 0.00042451472449331546, "loss": 1.8056, "step": 32801 }, { "epoch": 1.09, "grad_norm": 0.46053746342658997, "learning_rate": 0.0004245052150923885, "loss": 1.7453, "step": 32802 }, { "epoch": 1.09, "grad_norm": 0.4870453476905823, "learning_rate": 0.0004244957055403307, "loss": 1.7731, "step": 32803 }, { "epoch": 1.09, "grad_norm": 0.4734848141670227, "learning_rate": 0.0004244861958371534, "loss": 1.8229, "step": 32804 }, { "epoch": 1.09, "grad_norm": 0.4342726171016693, "learning_rate": 0.0004244766859828682, "loss": 1.7221, "step": 32805 }, { "epoch": 1.09, "grad_norm": 0.4757792055606842, "learning_rate": 0.0004244671759774867, "loss": 1.7498, "step": 32806 }, { "epoch": 1.09, "grad_norm": 0.46122774481773376, "learning_rate": 0.00042445766582102027, "loss": 1.776, "step": 32807 }, { "epoch": 1.09, "grad_norm": 0.47379985451698303, "learning_rate": 0.00042444815551348063, "loss": 1.7744, "step": 32808 }, { "epoch": 1.09, "grad_norm": 0.4453487992286682, "learning_rate": 0.0004244386450548792, "loss": 1.7638, "step": 32809 }, { "epoch": 1.09, "grad_norm": 0.4542381763458252, "learning_rate": 0.0004244291344452278, "loss": 1.7914, "step": 32810 }, { "epoch": 1.09, "grad_norm": 0.4549442529678345, "learning_rate": 0.0004244196236845376, "loss": 1.7328, "step": 32811 }, { "epoch": 1.09, "grad_norm": 0.446688175201416, "learning_rate": 0.00042441011277282034, "loss": 1.7516, "step": 32812 }, { "epoch": 1.09, "grad_norm": 0.47375786304473877, "learning_rate": 0.0004244006017100875, "loss": 1.781, "step": 32813 }, { "epoch": 1.09, "grad_norm": 0.44801586866378784, "learning_rate": 0.0004243910904963507, "loss": 1.8057, "step": 32814 }, { "epoch": 1.09, "grad_norm": 0.4586080014705658, "learning_rate": 0.00042438157913162144, "loss": 1.7691, "step": 32815 }, { "epoch": 1.09, "grad_norm": 0.4598243236541748, "learning_rate": 0.00042437206761591125, "loss": 1.8055, "step": 32816 }, { "epoch": 1.09, "grad_norm": 0.45136797428131104, "learning_rate": 0.0004243625559492317, "loss": 1.7778, "step": 32817 }, { "epoch": 1.09, "grad_norm": 0.4619521498680115, "learning_rate": 0.0004243530441315943, "loss": 1.7335, "step": 32818 }, { "epoch": 1.09, "grad_norm": 0.45928624272346497, "learning_rate": 0.00042434353216301075, "loss": 1.8485, "step": 32819 }, { "epoch": 1.09, "grad_norm": 0.4947093725204468, "learning_rate": 0.00042433402004349237, "loss": 1.7618, "step": 32820 }, { "epoch": 1.09, "grad_norm": 0.4819321930408478, "learning_rate": 0.00042432450777305083, "loss": 1.7138, "step": 32821 }, { "epoch": 1.09, "grad_norm": 0.4694909453392029, "learning_rate": 0.0004243149953516977, "loss": 1.8094, "step": 32822 }, { "epoch": 1.09, "grad_norm": 0.4743902087211609, "learning_rate": 0.0004243054827794444, "loss": 1.8531, "step": 32823 }, { "epoch": 1.09, "grad_norm": 0.7955363392829895, "learning_rate": 0.0004242959700563026, "loss": 1.8473, "step": 32824 }, { "epoch": 1.09, "grad_norm": 0.4818631112575531, "learning_rate": 0.0004242864571822838, "loss": 1.7654, "step": 32825 }, { "epoch": 1.09, "grad_norm": 0.4612258970737457, "learning_rate": 0.0004242769441573996, "loss": 1.7265, "step": 32826 }, { "epoch": 1.09, "grad_norm": 0.46511611342430115, "learning_rate": 0.00042426743098166137, "loss": 1.7691, "step": 32827 }, { "epoch": 1.09, "grad_norm": 0.4614448547363281, "learning_rate": 0.0004242579176550809, "loss": 1.8585, "step": 32828 }, { "epoch": 1.09, "grad_norm": 0.4407605230808258, "learning_rate": 0.0004242484041776696, "loss": 1.7317, "step": 32829 }, { "epoch": 1.09, "grad_norm": 0.48122087121009827, "learning_rate": 0.00042423889054943906, "loss": 1.8673, "step": 32830 }, { "epoch": 1.09, "grad_norm": 0.46099022030830383, "learning_rate": 0.00042422937677040083, "loss": 1.7908, "step": 32831 }, { "epoch": 1.09, "grad_norm": 0.49765726923942566, "learning_rate": 0.00042421986284056634, "loss": 1.8162, "step": 32832 }, { "epoch": 1.09, "grad_norm": 0.44675642251968384, "learning_rate": 0.0004242103487599474, "loss": 1.7795, "step": 32833 }, { "epoch": 1.09, "grad_norm": 0.46452292799949646, "learning_rate": 0.0004242008345285552, "loss": 1.8095, "step": 32834 }, { "epoch": 1.09, "grad_norm": 0.45348215103149414, "learning_rate": 0.00042419132014640166, "loss": 1.7563, "step": 32835 }, { "epoch": 1.09, "grad_norm": 0.4715834856033325, "learning_rate": 0.00042418180561349804, "loss": 1.8135, "step": 32836 }, { "epoch": 1.09, "grad_norm": 0.48847538232803345, "learning_rate": 0.0004241722909298561, "loss": 1.7841, "step": 32837 }, { "epoch": 1.09, "grad_norm": 0.46680858731269836, "learning_rate": 0.0004241627760954873, "loss": 1.8634, "step": 32838 }, { "epoch": 1.09, "grad_norm": 0.45865488052368164, "learning_rate": 0.0004241532611104031, "loss": 1.7828, "step": 32839 }, { "epoch": 1.09, "grad_norm": 0.4898754954338074, "learning_rate": 0.0004241437459746152, "loss": 1.8123, "step": 32840 }, { "epoch": 1.09, "grad_norm": 0.46881213784217834, "learning_rate": 0.000424134230688135, "loss": 1.7614, "step": 32841 }, { "epoch": 1.09, "grad_norm": 0.46454286575317383, "learning_rate": 0.00042412471525097423, "loss": 1.7968, "step": 32842 }, { "epoch": 1.09, "grad_norm": 0.4836116135120392, "learning_rate": 0.0004241151996631443, "loss": 1.7802, "step": 32843 }, { "epoch": 1.09, "grad_norm": 0.475498765707016, "learning_rate": 0.00042410568392465674, "loss": 1.7613, "step": 32844 }, { "epoch": 1.09, "grad_norm": 0.46518510580062866, "learning_rate": 0.00042409616803552327, "loss": 1.8466, "step": 32845 }, { "epoch": 1.09, "grad_norm": 0.47988173365592957, "learning_rate": 0.0004240866519957553, "loss": 1.7512, "step": 32846 }, { "epoch": 1.09, "grad_norm": 0.47204411029815674, "learning_rate": 0.00042407713580536443, "loss": 1.777, "step": 32847 }, { "epoch": 1.09, "grad_norm": 0.46188321709632874, "learning_rate": 0.0004240676194643622, "loss": 1.7868, "step": 32848 }, { "epoch": 1.09, "grad_norm": 0.47648903727531433, "learning_rate": 0.00042405810297276015, "loss": 1.792, "step": 32849 }, { "epoch": 1.09, "grad_norm": 0.4526740312576294, "learning_rate": 0.00042404858633056977, "loss": 1.7813, "step": 32850 }, { "epoch": 1.09, "grad_norm": 0.47333189845085144, "learning_rate": 0.0004240390695378028, "loss": 1.7892, "step": 32851 }, { "epoch": 1.09, "grad_norm": 0.4743005335330963, "learning_rate": 0.00042402955259447065, "loss": 1.8525, "step": 32852 }, { "epoch": 1.09, "grad_norm": 0.46048441529273987, "learning_rate": 0.0004240200355005849, "loss": 1.8769, "step": 32853 }, { "epoch": 1.09, "grad_norm": 0.45577937364578247, "learning_rate": 0.000424010518256157, "loss": 1.8116, "step": 32854 }, { "epoch": 1.09, "grad_norm": 0.46896958351135254, "learning_rate": 0.0004240010008611987, "loss": 1.7004, "step": 32855 }, { "epoch": 1.09, "grad_norm": 0.46432673931121826, "learning_rate": 0.0004239914833157214, "loss": 1.7912, "step": 32856 }, { "epoch": 1.09, "grad_norm": 0.4708997309207916, "learning_rate": 0.0004239819656197368, "loss": 1.7504, "step": 32857 }, { "epoch": 1.09, "grad_norm": 0.47531643509864807, "learning_rate": 0.0004239724477732563, "loss": 1.7219, "step": 32858 }, { "epoch": 1.09, "grad_norm": 0.44148585200309753, "learning_rate": 0.00042396292977629155, "loss": 1.7695, "step": 32859 }, { "epoch": 1.09, "grad_norm": 0.4427051246166229, "learning_rate": 0.0004239534116288541, "loss": 1.7761, "step": 32860 }, { "epoch": 1.09, "grad_norm": 0.48170006275177, "learning_rate": 0.00042394389333095536, "loss": 1.8673, "step": 32861 }, { "epoch": 1.09, "grad_norm": 0.4709285795688629, "learning_rate": 0.000423934374882607, "loss": 1.7487, "step": 32862 }, { "epoch": 1.09, "grad_norm": 0.4587826430797577, "learning_rate": 0.0004239248562838207, "loss": 1.8078, "step": 32863 }, { "epoch": 1.09, "grad_norm": 0.45494693517684937, "learning_rate": 0.0004239153375346077, "loss": 1.7837, "step": 32864 }, { "epoch": 1.09, "grad_norm": 0.45540979504585266, "learning_rate": 0.0004239058186349799, "loss": 1.8501, "step": 32865 }, { "epoch": 1.09, "grad_norm": 0.4736079275608063, "learning_rate": 0.00042389629958494864, "loss": 1.7905, "step": 32866 }, { "epoch": 1.09, "grad_norm": 0.48012641072273254, "learning_rate": 0.0004238867803845255, "loss": 1.8269, "step": 32867 }, { "epoch": 1.09, "grad_norm": 0.4856352210044861, "learning_rate": 0.0004238772610337221, "loss": 1.7638, "step": 32868 }, { "epoch": 1.09, "grad_norm": 0.4729125201702118, "learning_rate": 0.0004238677415325499, "loss": 1.7956, "step": 32869 }, { "epoch": 1.09, "grad_norm": 0.47931626439094543, "learning_rate": 0.00042385822188102066, "loss": 1.7856, "step": 32870 }, { "epoch": 1.09, "grad_norm": 0.48270905017852783, "learning_rate": 0.0004238487020791456, "loss": 1.7861, "step": 32871 }, { "epoch": 1.09, "grad_norm": 0.4660452902317047, "learning_rate": 0.0004238391821269366, "loss": 1.7629, "step": 32872 }, { "epoch": 1.09, "grad_norm": 0.4550394117832184, "learning_rate": 0.000423829662024405, "loss": 1.6716, "step": 32873 }, { "epoch": 1.09, "grad_norm": 0.4506710171699524, "learning_rate": 0.0004238201417715624, "loss": 1.772, "step": 32874 }, { "epoch": 1.09, "grad_norm": 0.46965935826301575, "learning_rate": 0.0004238106213684205, "loss": 1.8414, "step": 32875 }, { "epoch": 1.09, "grad_norm": 0.47831252217292786, "learning_rate": 0.0004238011008149907, "loss": 1.7783, "step": 32876 }, { "epoch": 1.09, "grad_norm": 0.4595637619495392, "learning_rate": 0.0004237915801112846, "loss": 1.8035, "step": 32877 }, { "epoch": 1.09, "grad_norm": 0.46151071786880493, "learning_rate": 0.0004237820592573138, "loss": 1.8387, "step": 32878 }, { "epoch": 1.09, "grad_norm": 0.4518504738807678, "learning_rate": 0.00042377253825308977, "loss": 1.7521, "step": 32879 }, { "epoch": 1.09, "grad_norm": 0.4671967923641205, "learning_rate": 0.0004237630170986241, "loss": 1.7644, "step": 32880 }, { "epoch": 1.09, "grad_norm": 0.4532741904258728, "learning_rate": 0.0004237534957939284, "loss": 1.8781, "step": 32881 }, { "epoch": 1.09, "grad_norm": 0.4435608386993408, "learning_rate": 0.0004237439743390142, "loss": 1.7525, "step": 32882 }, { "epoch": 1.09, "grad_norm": 0.4425821602344513, "learning_rate": 0.00042373445273389306, "loss": 1.7762, "step": 32883 }, { "epoch": 1.09, "grad_norm": 0.45554500818252563, "learning_rate": 0.0004237249309785765, "loss": 1.8644, "step": 32884 }, { "epoch": 1.09, "grad_norm": 0.4704506993293762, "learning_rate": 0.0004237154090730761, "loss": 1.8162, "step": 32885 }, { "epoch": 1.09, "grad_norm": 0.4428035318851471, "learning_rate": 0.0004237058870174034, "loss": 1.7852, "step": 32886 }, { "epoch": 1.09, "grad_norm": 0.4696371555328369, "learning_rate": 0.00042369636481157, "loss": 1.8033, "step": 32887 }, { "epoch": 1.09, "grad_norm": 0.4599848985671997, "learning_rate": 0.00042368684245558755, "loss": 1.803, "step": 32888 }, { "epoch": 1.09, "grad_norm": 0.4596273601055145, "learning_rate": 0.0004236773199494672, "loss": 1.7816, "step": 32889 }, { "epoch": 1.09, "grad_norm": 0.4530782699584961, "learning_rate": 0.00042366779729322116, "loss": 1.863, "step": 32890 }, { "epoch": 1.09, "grad_norm": 0.4579721987247467, "learning_rate": 0.0004236582744868604, "loss": 1.869, "step": 32891 }, { "epoch": 1.09, "grad_norm": 0.4536435604095459, "learning_rate": 0.0004236487515303968, "loss": 1.7766, "step": 32892 }, { "epoch": 1.09, "grad_norm": 0.47109556198120117, "learning_rate": 0.0004236392284238419, "loss": 1.7383, "step": 32893 }, { "epoch": 1.09, "grad_norm": 0.45547065138816833, "learning_rate": 0.00042362970516720704, "loss": 1.8219, "step": 32894 }, { "epoch": 1.09, "grad_norm": 0.46434545516967773, "learning_rate": 0.00042362018176050404, "loss": 1.7543, "step": 32895 }, { "epoch": 1.09, "grad_norm": 0.4592985212802887, "learning_rate": 0.00042361065820374427, "loss": 1.7856, "step": 32896 }, { "epoch": 1.09, "grad_norm": 0.4734209179878235, "learning_rate": 0.0004236011344969395, "loss": 1.7713, "step": 32897 }, { "epoch": 1.09, "grad_norm": 0.46536725759506226, "learning_rate": 0.00042359161064010106, "loss": 1.845, "step": 32898 }, { "epoch": 1.09, "grad_norm": 0.4613133370876312, "learning_rate": 0.00042358208663324066, "loss": 1.7858, "step": 32899 }, { "epoch": 1.09, "grad_norm": 0.44900333881378174, "learning_rate": 0.00042357256247636983, "loss": 1.8012, "step": 32900 }, { "epoch": 1.09, "grad_norm": 0.4571000039577484, "learning_rate": 0.00042356303816950007, "loss": 1.802, "step": 32901 }, { "epoch": 1.09, "grad_norm": 0.45143964886665344, "learning_rate": 0.00042355351371264297, "loss": 1.7488, "step": 32902 }, { "epoch": 1.09, "grad_norm": 0.4571688771247864, "learning_rate": 0.00042354398910581014, "loss": 1.6953, "step": 32903 }, { "epoch": 1.09, "grad_norm": 0.46410539746284485, "learning_rate": 0.00042353446434901317, "loss": 1.739, "step": 32904 }, { "epoch": 1.09, "grad_norm": 0.45715075731277466, "learning_rate": 0.0004235249394422635, "loss": 1.8836, "step": 32905 }, { "epoch": 1.09, "grad_norm": 0.4621638357639313, "learning_rate": 0.0004235154143855727, "loss": 1.8179, "step": 32906 }, { "epoch": 1.09, "grad_norm": 0.4730989634990692, "learning_rate": 0.0004235058891789525, "loss": 1.8255, "step": 32907 }, { "epoch": 1.09, "grad_norm": 0.46658292412757874, "learning_rate": 0.00042349636382241435, "loss": 1.7902, "step": 32908 }, { "epoch": 1.09, "grad_norm": 0.4739895462989807, "learning_rate": 0.0004234868383159697, "loss": 1.8357, "step": 32909 }, { "epoch": 1.09, "grad_norm": 0.45660147070884705, "learning_rate": 0.0004234773126596303, "loss": 1.8713, "step": 32910 }, { "epoch": 1.09, "grad_norm": 0.46641772985458374, "learning_rate": 0.00042346778685340764, "loss": 1.8401, "step": 32911 }, { "epoch": 1.09, "grad_norm": 0.461571604013443, "learning_rate": 0.00042345826089731323, "loss": 1.7826, "step": 32912 }, { "epoch": 1.1, "grad_norm": 0.4630424678325653, "learning_rate": 0.00042344873479135877, "loss": 1.7678, "step": 32913 }, { "epoch": 1.1, "grad_norm": 0.4518624544143677, "learning_rate": 0.00042343920853555564, "loss": 1.7995, "step": 32914 }, { "epoch": 1.1, "grad_norm": 0.5110360383987427, "learning_rate": 0.0004234296821299155, "loss": 1.7611, "step": 32915 }, { "epoch": 1.1, "grad_norm": 0.45200082659721375, "learning_rate": 0.00042342015557444995, "loss": 1.7756, "step": 32916 }, { "epoch": 1.1, "grad_norm": 0.46945300698280334, "learning_rate": 0.0004234106288691705, "loss": 1.8051, "step": 32917 }, { "epoch": 1.1, "grad_norm": 0.4730657637119293, "learning_rate": 0.0004234011020140888, "loss": 1.7921, "step": 32918 }, { "epoch": 1.1, "grad_norm": 0.48325347900390625, "learning_rate": 0.00042339157500921625, "loss": 1.8067, "step": 32919 }, { "epoch": 1.1, "grad_norm": 0.48430702090263367, "learning_rate": 0.00042338204785456453, "loss": 1.7796, "step": 32920 }, { "epoch": 1.1, "grad_norm": 0.48378103971481323, "learning_rate": 0.00042337252055014524, "loss": 1.7145, "step": 32921 }, { "epoch": 1.1, "grad_norm": 0.47213461995124817, "learning_rate": 0.0004233629930959698, "loss": 1.849, "step": 32922 }, { "epoch": 1.1, "grad_norm": 0.4579198360443115, "learning_rate": 0.00042335346549205, "loss": 1.8103, "step": 32923 }, { "epoch": 1.1, "grad_norm": 0.47388097643852234, "learning_rate": 0.0004233439377383971, "loss": 1.7507, "step": 32924 }, { "epoch": 1.1, "grad_norm": 0.4888237714767456, "learning_rate": 0.000423334409835023, "loss": 1.7492, "step": 32925 }, { "epoch": 1.1, "grad_norm": 0.506522536277771, "learning_rate": 0.0004233248817819389, "loss": 1.8023, "step": 32926 }, { "epoch": 1.1, "grad_norm": 0.46929657459259033, "learning_rate": 0.00042331535357915674, "loss": 1.8164, "step": 32927 }, { "epoch": 1.1, "grad_norm": 0.47076651453971863, "learning_rate": 0.00042330582522668783, "loss": 1.8006, "step": 32928 }, { "epoch": 1.1, "grad_norm": 0.4593612253665924, "learning_rate": 0.00042329629672454384, "loss": 1.7898, "step": 32929 }, { "epoch": 1.1, "grad_norm": 0.46260544657707214, "learning_rate": 0.0004232867680727363, "loss": 1.7592, "step": 32930 }, { "epoch": 1.1, "grad_norm": 0.4904143214225769, "learning_rate": 0.00042327723927127674, "loss": 1.7582, "step": 32931 }, { "epoch": 1.1, "grad_norm": 0.4698414206504822, "learning_rate": 0.000423267710320177, "loss": 1.7633, "step": 32932 }, { "epoch": 1.1, "grad_norm": 0.4863351285457611, "learning_rate": 0.0004232581812194482, "loss": 1.7744, "step": 32933 }, { "epoch": 1.1, "grad_norm": 0.44725778698921204, "learning_rate": 0.0004232486519691021, "loss": 1.8046, "step": 32934 }, { "epoch": 1.1, "grad_norm": 0.44581422209739685, "learning_rate": 0.0004232391225691504, "loss": 1.7598, "step": 32935 }, { "epoch": 1.1, "grad_norm": 0.4888182282447815, "learning_rate": 0.0004232295930196045, "loss": 1.7803, "step": 32936 }, { "epoch": 1.1, "grad_norm": 0.44913962483406067, "learning_rate": 0.0004232200633204761, "loss": 1.8488, "step": 32937 }, { "epoch": 1.1, "grad_norm": 0.46369999647140503, "learning_rate": 0.0004232105334717767, "loss": 1.7141, "step": 32938 }, { "epoch": 1.1, "grad_norm": 0.4661509394645691, "learning_rate": 0.00042320100347351786, "loss": 1.7805, "step": 32939 }, { "epoch": 1.1, "grad_norm": 0.45064324140548706, "learning_rate": 0.0004231914733257111, "loss": 1.8293, "step": 32940 }, { "epoch": 1.1, "grad_norm": 0.4602518379688263, "learning_rate": 0.000423181943028368, "loss": 1.7792, "step": 32941 }, { "epoch": 1.1, "grad_norm": 0.4567175507545471, "learning_rate": 0.00042317241258150036, "loss": 1.7259, "step": 32942 }, { "epoch": 1.1, "grad_norm": 0.4575382471084595, "learning_rate": 0.00042316288198511947, "loss": 1.7178, "step": 32943 }, { "epoch": 1.1, "grad_norm": 0.4638463258743286, "learning_rate": 0.0004231533512392369, "loss": 1.729, "step": 32944 }, { "epoch": 1.1, "grad_norm": 0.47519925236701965, "learning_rate": 0.00042314382034386446, "loss": 1.8598, "step": 32945 }, { "epoch": 1.1, "grad_norm": 0.47295892238616943, "learning_rate": 0.00042313428929901343, "loss": 1.7805, "step": 32946 }, { "epoch": 1.1, "grad_norm": 0.46774908900260925, "learning_rate": 0.0004231247581046956, "loss": 1.7062, "step": 32947 }, { "epoch": 1.1, "grad_norm": 0.448201060295105, "learning_rate": 0.0004231152267609224, "loss": 1.8145, "step": 32948 }, { "epoch": 1.1, "grad_norm": 0.4671069383621216, "learning_rate": 0.00042310569526770545, "loss": 1.8348, "step": 32949 }, { "epoch": 1.1, "grad_norm": 0.4652107059955597, "learning_rate": 0.0004230961636250564, "loss": 1.74, "step": 32950 }, { "epoch": 1.1, "grad_norm": 0.4695597290992737, "learning_rate": 0.00042308663183298663, "loss": 1.8323, "step": 32951 }, { "epoch": 1.1, "grad_norm": 0.4610747992992401, "learning_rate": 0.00042307709989150794, "loss": 1.7286, "step": 32952 }, { "epoch": 1.1, "grad_norm": 0.4764069616794586, "learning_rate": 0.0004230675678006317, "loss": 1.7456, "step": 32953 }, { "epoch": 1.1, "grad_norm": 0.4687758982181549, "learning_rate": 0.00042305803556036956, "loss": 1.722, "step": 32954 }, { "epoch": 1.1, "grad_norm": 0.46449288725852966, "learning_rate": 0.0004230485031707332, "loss": 1.6697, "step": 32955 }, { "epoch": 1.1, "grad_norm": 0.45697078108787537, "learning_rate": 0.000423038970631734, "loss": 1.8802, "step": 32956 }, { "epoch": 1.1, "grad_norm": 0.45351096987724304, "learning_rate": 0.0004230294379433837, "loss": 1.7723, "step": 32957 }, { "epoch": 1.1, "grad_norm": 0.4892868399620056, "learning_rate": 0.0004230199051056936, "loss": 1.856, "step": 32958 }, { "epoch": 1.1, "grad_norm": 0.4630056917667389, "learning_rate": 0.0004230103721186757, "loss": 1.7648, "step": 32959 }, { "epoch": 1.1, "grad_norm": 0.48428699374198914, "learning_rate": 0.0004230008389823412, "loss": 1.7616, "step": 32960 }, { "epoch": 1.1, "grad_norm": 0.45622518658638, "learning_rate": 0.00042299130569670184, "loss": 1.7966, "step": 32961 }, { "epoch": 1.1, "grad_norm": 0.4612068831920624, "learning_rate": 0.00042298177226176914, "loss": 1.832, "step": 32962 }, { "epoch": 1.1, "grad_norm": 0.47486940026283264, "learning_rate": 0.0004229722386775547, "loss": 1.786, "step": 32963 }, { "epoch": 1.1, "grad_norm": 0.4502125382423401, "learning_rate": 0.00042296270494407006, "loss": 1.8347, "step": 32964 }, { "epoch": 1.1, "grad_norm": 0.45856302976608276, "learning_rate": 0.0004229531710613269, "loss": 1.7795, "step": 32965 }, { "epoch": 1.1, "grad_norm": 0.6000513434410095, "learning_rate": 0.00042294363702933657, "loss": 1.8224, "step": 32966 }, { "epoch": 1.1, "grad_norm": 0.5003984570503235, "learning_rate": 0.00042293410284811086, "loss": 1.8351, "step": 32967 }, { "epoch": 1.1, "grad_norm": 0.47365623712539673, "learning_rate": 0.00042292456851766126, "loss": 1.7601, "step": 32968 }, { "epoch": 1.1, "grad_norm": 0.46277692914009094, "learning_rate": 0.00042291503403799936, "loss": 1.8652, "step": 32969 }, { "epoch": 1.1, "grad_norm": 0.463692307472229, "learning_rate": 0.0004229054994091367, "loss": 1.7889, "step": 32970 }, { "epoch": 1.1, "grad_norm": 0.4691440463066101, "learning_rate": 0.00042289596463108487, "loss": 1.7308, "step": 32971 }, { "epoch": 1.1, "grad_norm": 0.45146286487579346, "learning_rate": 0.00042288642970385543, "loss": 1.8356, "step": 32972 }, { "epoch": 1.1, "grad_norm": 0.4945438802242279, "learning_rate": 0.00042287689462746, "loss": 1.8234, "step": 32973 }, { "epoch": 1.1, "grad_norm": 0.4665707051753998, "learning_rate": 0.00042286735940191004, "loss": 1.7624, "step": 32974 }, { "epoch": 1.1, "grad_norm": 0.47147300839424133, "learning_rate": 0.0004228578240272174, "loss": 1.7496, "step": 32975 }, { "epoch": 1.1, "grad_norm": 0.46465161442756653, "learning_rate": 0.0004228482885033933, "loss": 1.8042, "step": 32976 }, { "epoch": 1.1, "grad_norm": 0.4510992169380188, "learning_rate": 0.00042283875283044954, "loss": 1.8096, "step": 32977 }, { "epoch": 1.1, "grad_norm": 0.48108336329460144, "learning_rate": 0.0004228292170083976, "loss": 1.7852, "step": 32978 }, { "epoch": 1.1, "grad_norm": 0.4743655025959015, "learning_rate": 0.00042281968103724906, "loss": 1.8263, "step": 32979 }, { "epoch": 1.1, "grad_norm": 0.4706707298755646, "learning_rate": 0.0004228101449170157, "loss": 1.7976, "step": 32980 }, { "epoch": 1.1, "grad_norm": 0.4596788287162781, "learning_rate": 0.00042280060864770865, "loss": 1.7441, "step": 32981 }, { "epoch": 1.1, "grad_norm": 0.45877447724342346, "learning_rate": 0.00042279107222933997, "loss": 1.8155, "step": 32982 }, { "epoch": 1.1, "grad_norm": 0.45614519715309143, "learning_rate": 0.0004227815356619209, "loss": 1.6398, "step": 32983 }, { "epoch": 1.1, "grad_norm": 0.47669360041618347, "learning_rate": 0.0004227719989454632, "loss": 1.8459, "step": 32984 }, { "epoch": 1.1, "grad_norm": 0.46861958503723145, "learning_rate": 0.0004227624620799783, "loss": 1.8371, "step": 32985 }, { "epoch": 1.1, "grad_norm": 0.48778802156448364, "learning_rate": 0.0004227529250654779, "loss": 1.8678, "step": 32986 }, { "epoch": 1.1, "grad_norm": 0.4528851807117462, "learning_rate": 0.00042274338790197367, "loss": 1.7142, "step": 32987 }, { "epoch": 1.1, "grad_norm": 0.46219897270202637, "learning_rate": 0.0004227338505894769, "loss": 1.7514, "step": 32988 }, { "epoch": 1.1, "grad_norm": 0.47059279680252075, "learning_rate": 0.0004227243131279994, "loss": 1.8082, "step": 32989 }, { "epoch": 1.1, "grad_norm": 0.4697304368019104, "learning_rate": 0.00042271477551755266, "loss": 1.8068, "step": 32990 }, { "epoch": 1.1, "grad_norm": 0.4848034083843231, "learning_rate": 0.0004227052377581482, "loss": 1.8653, "step": 32991 }, { "epoch": 1.1, "grad_norm": 0.4809792935848236, "learning_rate": 0.00042269569984979764, "loss": 1.7542, "step": 32992 }, { "epoch": 1.1, "grad_norm": 0.4453175961971283, "learning_rate": 0.0004226861617925127, "loss": 1.7433, "step": 32993 }, { "epoch": 1.1, "grad_norm": 0.4598144590854645, "learning_rate": 0.00042267662358630475, "loss": 1.7772, "step": 32994 }, { "epoch": 1.1, "grad_norm": 0.4663483202457428, "learning_rate": 0.00042266708523118544, "loss": 1.7766, "step": 32995 }, { "epoch": 1.1, "grad_norm": 0.461169958114624, "learning_rate": 0.0004226575467271664, "loss": 1.727, "step": 32996 }, { "epoch": 1.1, "grad_norm": 0.4802234470844269, "learning_rate": 0.00042264800807425917, "loss": 1.7537, "step": 32997 }, { "epoch": 1.1, "grad_norm": 0.4597456455230713, "learning_rate": 0.0004226384692724753, "loss": 1.8131, "step": 32998 }, { "epoch": 1.1, "grad_norm": 0.45909667015075684, "learning_rate": 0.0004226289303218265, "loss": 1.8163, "step": 32999 }, { "epoch": 1.1, "grad_norm": 0.47639259696006775, "learning_rate": 0.0004226193912223242, "loss": 1.7777, "step": 33000 }, { "epoch": 1.1, "grad_norm": 0.600542426109314, "learning_rate": 0.0004226098519739799, "loss": 1.8005, "step": 33001 }, { "epoch": 1.1, "grad_norm": 0.4764379858970642, "learning_rate": 0.00042260031257680545, "loss": 1.8608, "step": 33002 }, { "epoch": 1.1, "grad_norm": 0.47298017144203186, "learning_rate": 0.0004225907730308122, "loss": 1.7695, "step": 33003 }, { "epoch": 1.1, "grad_norm": 0.46550941467285156, "learning_rate": 0.0004225812333360119, "loss": 1.7931, "step": 33004 }, { "epoch": 1.1, "grad_norm": 0.4601927101612091, "learning_rate": 0.000422571693492416, "loss": 1.8288, "step": 33005 }, { "epoch": 1.1, "grad_norm": 0.5715681314468384, "learning_rate": 0.00042256215350003606, "loss": 1.8009, "step": 33006 }, { "epoch": 1.1, "grad_norm": 0.4490329921245575, "learning_rate": 0.00042255261335888384, "loss": 1.7981, "step": 33007 }, { "epoch": 1.1, "grad_norm": 0.4765368103981018, "learning_rate": 0.00042254307306897074, "loss": 1.8485, "step": 33008 }, { "epoch": 1.1, "grad_norm": 0.49234500527381897, "learning_rate": 0.0004225335326303084, "loss": 1.7175, "step": 33009 }, { "epoch": 1.1, "grad_norm": 0.4765530526638031, "learning_rate": 0.0004225239920429085, "loss": 1.7784, "step": 33010 }, { "epoch": 1.1, "grad_norm": 0.4901360273361206, "learning_rate": 0.00042251445130678234, "loss": 1.753, "step": 33011 }, { "epoch": 1.1, "grad_norm": 0.47751274704933167, "learning_rate": 0.00042250491042194186, "loss": 1.8071, "step": 33012 }, { "epoch": 1.1, "grad_norm": 0.4642401337623596, "learning_rate": 0.00042249536938839834, "loss": 1.8046, "step": 33013 }, { "epoch": 1.1, "grad_norm": 0.4779517352581024, "learning_rate": 0.0004224858282061636, "loss": 1.8141, "step": 33014 }, { "epoch": 1.1, "grad_norm": 0.4743678867816925, "learning_rate": 0.00042247628687524906, "loss": 1.8048, "step": 33015 }, { "epoch": 1.1, "grad_norm": 0.4840892553329468, "learning_rate": 0.00042246674539566634, "loss": 1.8401, "step": 33016 }, { "epoch": 1.1, "grad_norm": 0.457391619682312, "learning_rate": 0.00042245720376742714, "loss": 1.8341, "step": 33017 }, { "epoch": 1.1, "grad_norm": 0.4672737717628479, "learning_rate": 0.0004224476619905428, "loss": 1.8101, "step": 33018 }, { "epoch": 1.1, "grad_norm": 0.4729817807674408, "learning_rate": 0.0004224381200650251, "loss": 1.8263, "step": 33019 }, { "epoch": 1.1, "grad_norm": 0.48941269516944885, "learning_rate": 0.00042242857799088554, "loss": 1.8147, "step": 33020 }, { "epoch": 1.1, "grad_norm": 0.4966057240962982, "learning_rate": 0.00042241903576813574, "loss": 1.7949, "step": 33021 }, { "epoch": 1.1, "grad_norm": 0.48498302698135376, "learning_rate": 0.0004224094933967873, "loss": 1.8938, "step": 33022 }, { "epoch": 1.1, "grad_norm": 0.4939480125904083, "learning_rate": 0.0004223999508768517, "loss": 1.8409, "step": 33023 }, { "epoch": 1.1, "grad_norm": 0.478395938873291, "learning_rate": 0.0004223904082083407, "loss": 1.7426, "step": 33024 }, { "epoch": 1.1, "grad_norm": 0.4682479798793793, "learning_rate": 0.00042238086539126573, "loss": 1.7491, "step": 33025 }, { "epoch": 1.1, "grad_norm": 0.4460587203502655, "learning_rate": 0.0004223713224256383, "loss": 1.7542, "step": 33026 }, { "epoch": 1.1, "grad_norm": 0.46842294931411743, "learning_rate": 0.00042236177931147023, "loss": 1.8298, "step": 33027 }, { "epoch": 1.1, "grad_norm": 0.4748150110244751, "learning_rate": 0.000422352236048773, "loss": 1.694, "step": 33028 }, { "epoch": 1.1, "grad_norm": 0.46117618680000305, "learning_rate": 0.0004223426926375582, "loss": 1.7752, "step": 33029 }, { "epoch": 1.1, "grad_norm": 0.46044421195983887, "learning_rate": 0.00042233314907783737, "loss": 1.8041, "step": 33030 }, { "epoch": 1.1, "grad_norm": 0.45635470747947693, "learning_rate": 0.0004223236053696221, "loss": 1.7088, "step": 33031 }, { "epoch": 1.1, "grad_norm": 0.4663732349872589, "learning_rate": 0.000422314061512924, "loss": 1.8244, "step": 33032 }, { "epoch": 1.1, "grad_norm": 0.45558643341064453, "learning_rate": 0.0004223045175077546, "loss": 1.7728, "step": 33033 }, { "epoch": 1.1, "grad_norm": 0.4573192596435547, "learning_rate": 0.00042229497335412563, "loss": 1.8071, "step": 33034 }, { "epoch": 1.1, "grad_norm": 0.4507754445075989, "learning_rate": 0.0004222854290520486, "loss": 1.8201, "step": 33035 }, { "epoch": 1.1, "grad_norm": 0.4582279324531555, "learning_rate": 0.000422275884601535, "loss": 1.8241, "step": 33036 }, { "epoch": 1.1, "grad_norm": 0.4817931652069092, "learning_rate": 0.0004222663400025965, "loss": 1.7727, "step": 33037 }, { "epoch": 1.1, "grad_norm": 0.45065149664878845, "learning_rate": 0.0004222567952552447, "loss": 1.8484, "step": 33038 }, { "epoch": 1.1, "grad_norm": 0.4565013349056244, "learning_rate": 0.00042224725035949117, "loss": 1.8145, "step": 33039 }, { "epoch": 1.1, "grad_norm": 0.45812904834747314, "learning_rate": 0.00042223770531534747, "loss": 1.8378, "step": 33040 }, { "epoch": 1.1, "grad_norm": 0.45123326778411865, "learning_rate": 0.00042222816012282523, "loss": 1.7947, "step": 33041 }, { "epoch": 1.1, "grad_norm": 0.4419638514518738, "learning_rate": 0.00042221861478193605, "loss": 1.7646, "step": 33042 }, { "epoch": 1.1, "grad_norm": 0.45802900195121765, "learning_rate": 0.0004222090692926913, "loss": 1.7724, "step": 33043 }, { "epoch": 1.1, "grad_norm": 0.45486587285995483, "learning_rate": 0.00042219952365510295, "loss": 1.718, "step": 33044 }, { "epoch": 1.1, "grad_norm": 0.4496058225631714, "learning_rate": 0.00042218997786918234, "loss": 1.7129, "step": 33045 }, { "epoch": 1.1, "grad_norm": 0.4506010413169861, "learning_rate": 0.000422180431934941, "loss": 1.7928, "step": 33046 }, { "epoch": 1.1, "grad_norm": 0.4457744061946869, "learning_rate": 0.0004221708858523907, "loss": 1.8385, "step": 33047 }, { "epoch": 1.1, "grad_norm": 0.4498721659183502, "learning_rate": 0.000422161339621543, "loss": 1.7508, "step": 33048 }, { "epoch": 1.1, "grad_norm": 0.4812338352203369, "learning_rate": 0.00042215179324240937, "loss": 1.7953, "step": 33049 }, { "epoch": 1.1, "grad_norm": 0.47293683886528015, "learning_rate": 0.0004221422467150015, "loss": 1.7794, "step": 33050 }, { "epoch": 1.1, "grad_norm": 0.4704258441925049, "learning_rate": 0.00042213270003933084, "loss": 1.7532, "step": 33051 }, { "epoch": 1.1, "grad_norm": 0.46691185235977173, "learning_rate": 0.00042212315321540916, "loss": 1.8174, "step": 33052 }, { "epoch": 1.1, "grad_norm": 0.4583417475223541, "learning_rate": 0.00042211360624324793, "loss": 1.7264, "step": 33053 }, { "epoch": 1.1, "grad_norm": 0.4561919569969177, "learning_rate": 0.00042210405912285876, "loss": 1.8091, "step": 33054 }, { "epoch": 1.1, "grad_norm": 0.46153712272644043, "learning_rate": 0.0004220945118542533, "loss": 1.7905, "step": 33055 }, { "epoch": 1.1, "grad_norm": 0.4726463556289673, "learning_rate": 0.0004220849644374431, "loss": 1.8192, "step": 33056 }, { "epoch": 1.1, "grad_norm": 0.45215803384780884, "learning_rate": 0.0004220754168724397, "loss": 1.7407, "step": 33057 }, { "epoch": 1.1, "grad_norm": 0.4518689215183258, "learning_rate": 0.0004220658691592547, "loss": 1.7399, "step": 33058 }, { "epoch": 1.1, "grad_norm": 0.48732393980026245, "learning_rate": 0.00042205632129789977, "loss": 1.7883, "step": 33059 }, { "epoch": 1.1, "grad_norm": 0.48568570613861084, "learning_rate": 0.00042204677328838653, "loss": 1.8268, "step": 33060 }, { "epoch": 1.1, "grad_norm": 0.46681565046310425, "learning_rate": 0.0004220372251307263, "loss": 1.7523, "step": 33061 }, { "epoch": 1.1, "grad_norm": 0.4541303217411041, "learning_rate": 0.00042202767682493106, "loss": 1.7908, "step": 33062 }, { "epoch": 1.1, "grad_norm": 0.45212891697883606, "learning_rate": 0.00042201812837101203, "loss": 1.7777, "step": 33063 }, { "epoch": 1.1, "grad_norm": 0.47969990968704224, "learning_rate": 0.00042200857976898106, "loss": 1.8489, "step": 33064 }, { "epoch": 1.1, "grad_norm": 0.45758911967277527, "learning_rate": 0.00042199903101884976, "loss": 1.7699, "step": 33065 }, { "epoch": 1.1, "grad_norm": 0.4582279622554779, "learning_rate": 0.00042198948212062934, "loss": 1.7139, "step": 33066 }, { "epoch": 1.1, "grad_norm": 0.46251505613327026, "learning_rate": 0.0004219799330743319, "loss": 1.7536, "step": 33067 }, { "epoch": 1.1, "grad_norm": 0.46762171387672424, "learning_rate": 0.00042197038387996864, "loss": 1.7998, "step": 33068 }, { "epoch": 1.1, "grad_norm": 0.47033679485321045, "learning_rate": 0.0004219608345375514, "loss": 1.7698, "step": 33069 }, { "epoch": 1.1, "grad_norm": 0.46557334065437317, "learning_rate": 0.00042195128504709167, "loss": 1.8143, "step": 33070 }, { "epoch": 1.1, "grad_norm": 0.4435887932777405, "learning_rate": 0.000421941735408601, "loss": 1.7502, "step": 33071 }, { "epoch": 1.1, "grad_norm": 0.4662189185619354, "learning_rate": 0.00042193218562209113, "loss": 1.8688, "step": 33072 }, { "epoch": 1.1, "grad_norm": 0.4682038724422455, "learning_rate": 0.0004219226356875734, "loss": 1.7731, "step": 33073 }, { "epoch": 1.1, "grad_norm": 0.4744984805583954, "learning_rate": 0.00042191308560505966, "loss": 1.7751, "step": 33074 }, { "epoch": 1.1, "grad_norm": 0.4645996689796448, "learning_rate": 0.0004219035353745613, "loss": 1.7865, "step": 33075 }, { "epoch": 1.1, "grad_norm": 0.4637182950973511, "learning_rate": 0.0004218939849960902, "loss": 1.7832, "step": 33076 }, { "epoch": 1.1, "grad_norm": 0.453553706407547, "learning_rate": 0.0004218844344696576, "loss": 1.7624, "step": 33077 }, { "epoch": 1.1, "grad_norm": 0.46203526854515076, "learning_rate": 0.0004218748837952753, "loss": 1.788, "step": 33078 }, { "epoch": 1.1, "grad_norm": 0.4746967554092407, "learning_rate": 0.0004218653329729548, "loss": 1.7717, "step": 33079 }, { "epoch": 1.1, "grad_norm": 0.4549506604671478, "learning_rate": 0.0004218557820027078, "loss": 1.8277, "step": 33080 }, { "epoch": 1.1, "grad_norm": 0.47744011878967285, "learning_rate": 0.00042184623088454583, "loss": 1.8289, "step": 33081 }, { "epoch": 1.1, "grad_norm": 0.45708760619163513, "learning_rate": 0.00042183667961848056, "loss": 1.7916, "step": 33082 }, { "epoch": 1.1, "grad_norm": 0.4556768536567688, "learning_rate": 0.0004218271282045234, "loss": 1.7735, "step": 33083 }, { "epoch": 1.1, "grad_norm": 0.46617111563682556, "learning_rate": 0.000421817576642686, "loss": 1.7574, "step": 33084 }, { "epoch": 1.1, "grad_norm": 0.4646885097026825, "learning_rate": 0.00042180802493298013, "loss": 1.8045, "step": 33085 }, { "epoch": 1.1, "grad_norm": 0.46606528759002686, "learning_rate": 0.00042179847307541736, "loss": 1.8128, "step": 33086 }, { "epoch": 1.1, "grad_norm": 0.4552595615386963, "learning_rate": 0.00042178892107000904, "loss": 1.7391, "step": 33087 }, { "epoch": 1.1, "grad_norm": 0.4806746244430542, "learning_rate": 0.00042177936891676693, "loss": 1.7997, "step": 33088 }, { "epoch": 1.1, "grad_norm": 0.47606077790260315, "learning_rate": 0.0004217698166157026, "loss": 1.8166, "step": 33089 }, { "epoch": 1.1, "grad_norm": 0.46519386768341064, "learning_rate": 0.0004217602641668277, "loss": 1.7842, "step": 33090 }, { "epoch": 1.1, "grad_norm": 0.4649405777454376, "learning_rate": 0.00042175071157015374, "loss": 1.8603, "step": 33091 }, { "epoch": 1.1, "grad_norm": 0.45233583450317383, "learning_rate": 0.0004217411588256924, "loss": 1.8417, "step": 33092 }, { "epoch": 1.1, "grad_norm": 0.46622419357299805, "learning_rate": 0.0004217316059334552, "loss": 1.7867, "step": 33093 }, { "epoch": 1.1, "grad_norm": 0.46996766328811646, "learning_rate": 0.0004217220528934538, "loss": 1.7797, "step": 33094 }, { "epoch": 1.1, "grad_norm": 0.44867801666259766, "learning_rate": 0.0004217124997056997, "loss": 1.7538, "step": 33095 }, { "epoch": 1.1, "grad_norm": 0.45570412278175354, "learning_rate": 0.00042170294637020457, "loss": 1.8234, "step": 33096 }, { "epoch": 1.1, "grad_norm": 0.7844894528388977, "learning_rate": 0.0004216933928869801, "loss": 1.7924, "step": 33097 }, { "epoch": 1.1, "grad_norm": 0.4597609043121338, "learning_rate": 0.0004216838392560376, "loss": 1.7685, "step": 33098 }, { "epoch": 1.1, "grad_norm": 0.47371065616607666, "learning_rate": 0.00042167428547738904, "loss": 1.8127, "step": 33099 }, { "epoch": 1.1, "grad_norm": 0.4567633867263794, "learning_rate": 0.00042166473155104574, "loss": 1.7573, "step": 33100 }, { "epoch": 1.1, "grad_norm": 0.45925265550613403, "learning_rate": 0.00042165517747701936, "loss": 1.8459, "step": 33101 }, { "epoch": 1.1, "grad_norm": 0.4674922227859497, "learning_rate": 0.0004216456232553215, "loss": 1.81, "step": 33102 }, { "epoch": 1.1, "grad_norm": 0.45538169145584106, "learning_rate": 0.0004216360688859638, "loss": 1.8102, "step": 33103 }, { "epoch": 1.1, "grad_norm": 0.4890836775302887, "learning_rate": 0.0004216265143689579, "loss": 1.6863, "step": 33104 }, { "epoch": 1.1, "grad_norm": 0.47324538230895996, "learning_rate": 0.00042161695970431524, "loss": 1.6824, "step": 33105 }, { "epoch": 1.1, "grad_norm": 0.449601411819458, "learning_rate": 0.0004216074048920476, "loss": 1.7926, "step": 33106 }, { "epoch": 1.1, "grad_norm": 0.45595377683639526, "learning_rate": 0.0004215978499321664, "loss": 1.75, "step": 33107 }, { "epoch": 1.1, "grad_norm": 0.48658159375190735, "learning_rate": 0.00042158829482468336, "loss": 1.7625, "step": 33108 }, { "epoch": 1.1, "grad_norm": 0.49722930788993835, "learning_rate": 0.00042157873956961, "loss": 1.7951, "step": 33109 }, { "epoch": 1.1, "grad_norm": 0.4446098208427429, "learning_rate": 0.00042156918416695805, "loss": 1.7858, "step": 33110 }, { "epoch": 1.1, "grad_norm": 0.4765399694442749, "learning_rate": 0.000421559628616739, "loss": 1.724, "step": 33111 }, { "epoch": 1.1, "grad_norm": 0.9787489771842957, "learning_rate": 0.0004215500729189645, "loss": 1.9191, "step": 33112 }, { "epoch": 1.1, "grad_norm": 0.453757107257843, "learning_rate": 0.0004215405170736461, "loss": 1.9062, "step": 33113 }, { "epoch": 1.1, "grad_norm": 0.47567421197891235, "learning_rate": 0.00042153096108079534, "loss": 1.8035, "step": 33114 }, { "epoch": 1.1, "grad_norm": 0.4560219943523407, "learning_rate": 0.00042152140494042397, "loss": 1.7966, "step": 33115 }, { "epoch": 1.1, "grad_norm": 0.47739699482917786, "learning_rate": 0.0004215118486525435, "loss": 1.8186, "step": 33116 }, { "epoch": 1.1, "grad_norm": 0.49080702662467957, "learning_rate": 0.0004215022922171656, "loss": 1.7548, "step": 33117 }, { "epoch": 1.1, "grad_norm": 0.58187335729599, "learning_rate": 0.0004214927356343017, "loss": 1.8392, "step": 33118 }, { "epoch": 1.1, "grad_norm": 0.4553670585155487, "learning_rate": 0.00042148317890396364, "loss": 1.7633, "step": 33119 }, { "epoch": 1.1, "grad_norm": 0.4733305871486664, "learning_rate": 0.00042147362202616284, "loss": 1.7689, "step": 33120 }, { "epoch": 1.1, "grad_norm": 0.46090182662010193, "learning_rate": 0.000421464065000911, "loss": 1.7963, "step": 33121 }, { "epoch": 1.1, "grad_norm": 0.46916958689689636, "learning_rate": 0.0004214545078282197, "loss": 1.8088, "step": 33122 }, { "epoch": 1.1, "grad_norm": 0.468011736869812, "learning_rate": 0.00042144495050810044, "loss": 1.8297, "step": 33123 }, { "epoch": 1.1, "grad_norm": 0.45690488815307617, "learning_rate": 0.000421435393040565, "loss": 1.7806, "step": 33124 }, { "epoch": 1.1, "grad_norm": 0.4883127510547638, "learning_rate": 0.00042142583542562485, "loss": 1.7557, "step": 33125 }, { "epoch": 1.1, "grad_norm": 0.4454285204410553, "learning_rate": 0.0004214162776632916, "loss": 1.7884, "step": 33126 }, { "epoch": 1.1, "grad_norm": 0.44320106506347656, "learning_rate": 0.00042140671975357687, "loss": 1.8083, "step": 33127 }, { "epoch": 1.1, "grad_norm": 0.45796671509742737, "learning_rate": 0.0004213971616964923, "loss": 1.745, "step": 33128 }, { "epoch": 1.1, "grad_norm": 0.4835893213748932, "learning_rate": 0.0004213876034920495, "loss": 1.8096, "step": 33129 }, { "epoch": 1.1, "grad_norm": 0.4712660312652588, "learning_rate": 0.0004213780451402599, "loss": 1.7869, "step": 33130 }, { "epoch": 1.1, "grad_norm": 0.4708867371082306, "learning_rate": 0.00042136848664113536, "loss": 1.7354, "step": 33131 }, { "epoch": 1.1, "grad_norm": 0.4704236686229706, "learning_rate": 0.0004213589279946873, "loss": 1.7565, "step": 33132 }, { "epoch": 1.1, "grad_norm": 0.48639652132987976, "learning_rate": 0.00042134936920092743, "loss": 1.8283, "step": 33133 }, { "epoch": 1.1, "grad_norm": 0.45726218819618225, "learning_rate": 0.0004213398102598674, "loss": 1.7085, "step": 33134 }, { "epoch": 1.1, "grad_norm": 0.462192177772522, "learning_rate": 0.0004213302511715185, "loss": 1.8188, "step": 33135 }, { "epoch": 1.1, "grad_norm": 0.4859793484210968, "learning_rate": 0.0004213206919358927, "loss": 1.8306, "step": 33136 }, { "epoch": 1.1, "grad_norm": 0.46483537554740906, "learning_rate": 0.00042131113255300135, "loss": 1.831, "step": 33137 }, { "epoch": 1.1, "grad_norm": 0.48168185353279114, "learning_rate": 0.0004213015730228562, "loss": 1.8192, "step": 33138 }, { "epoch": 1.1, "grad_norm": 0.49357789754867554, "learning_rate": 0.00042129201334546886, "loss": 1.7494, "step": 33139 }, { "epoch": 1.1, "grad_norm": 0.45422661304473877, "learning_rate": 0.0004212824535208509, "loss": 1.8305, "step": 33140 }, { "epoch": 1.1, "grad_norm": 0.46858400106430054, "learning_rate": 0.0004212728935490139, "loss": 1.8387, "step": 33141 }, { "epoch": 1.1, "grad_norm": 0.4844822883605957, "learning_rate": 0.00042126333342996946, "loss": 1.7732, "step": 33142 }, { "epoch": 1.1, "grad_norm": 0.5060310959815979, "learning_rate": 0.0004212537731637292, "loss": 1.7808, "step": 33143 }, { "epoch": 1.1, "grad_norm": 0.4629763960838318, "learning_rate": 0.00042124421275030466, "loss": 1.7961, "step": 33144 }, { "epoch": 1.1, "grad_norm": 0.4615700840950012, "learning_rate": 0.0004212346521897076, "loss": 1.7731, "step": 33145 }, { "epoch": 1.1, "grad_norm": 0.47827187180519104, "learning_rate": 0.00042122509148194946, "loss": 1.7802, "step": 33146 }, { "epoch": 1.1, "grad_norm": 0.4872165322303772, "learning_rate": 0.000421215530627042, "loss": 1.842, "step": 33147 }, { "epoch": 1.1, "grad_norm": 0.47296327352523804, "learning_rate": 0.00042120596962499674, "loss": 1.7182, "step": 33148 }, { "epoch": 1.1, "grad_norm": 0.4952416718006134, "learning_rate": 0.0004211964084758252, "loss": 1.7929, "step": 33149 }, { "epoch": 1.1, "grad_norm": 0.47015297412872314, "learning_rate": 0.0004211868471795392, "loss": 1.8465, "step": 33150 }, { "epoch": 1.1, "grad_norm": 0.464672327041626, "learning_rate": 0.00042117728573615014, "loss": 1.7717, "step": 33151 }, { "epoch": 1.1, "grad_norm": 0.4634445309638977, "learning_rate": 0.00042116772414566977, "loss": 1.7787, "step": 33152 }, { "epoch": 1.1, "grad_norm": 0.4665493071079254, "learning_rate": 0.0004211581624081095, "loss": 1.7589, "step": 33153 }, { "epoch": 1.1, "grad_norm": 0.4737068712711334, "learning_rate": 0.0004211486005234812, "loss": 1.7954, "step": 33154 }, { "epoch": 1.1, "grad_norm": 0.4644315540790558, "learning_rate": 0.00042113903849179635, "loss": 1.7748, "step": 33155 }, { "epoch": 1.1, "grad_norm": 0.4625435769557953, "learning_rate": 0.00042112947631306653, "loss": 1.7445, "step": 33156 }, { "epoch": 1.1, "grad_norm": 0.4708798825740814, "learning_rate": 0.00042111991398730334, "loss": 1.8163, "step": 33157 }, { "epoch": 1.1, "grad_norm": 0.49462956190109253, "learning_rate": 0.00042111035151451843, "loss": 1.919, "step": 33158 }, { "epoch": 1.1, "grad_norm": 0.46510016918182373, "learning_rate": 0.0004211007888947235, "loss": 1.7942, "step": 33159 }, { "epoch": 1.1, "grad_norm": 0.4893709719181061, "learning_rate": 0.00042109122612792987, "loss": 1.8125, "step": 33160 }, { "epoch": 1.1, "grad_norm": 0.48338258266448975, "learning_rate": 0.00042108166321414947, "loss": 1.7931, "step": 33161 }, { "epoch": 1.1, "grad_norm": 0.4635390341281891, "learning_rate": 0.00042107210015339375, "loss": 1.8315, "step": 33162 }, { "epoch": 1.1, "grad_norm": 0.46598151326179504, "learning_rate": 0.00042106253694567427, "loss": 1.7812, "step": 33163 }, { "epoch": 1.1, "grad_norm": 0.45574846863746643, "learning_rate": 0.0004210529735910028, "loss": 1.7471, "step": 33164 }, { "epoch": 1.1, "grad_norm": 0.4667693078517914, "learning_rate": 0.00042104341008939084, "loss": 1.8133, "step": 33165 }, { "epoch": 1.1, "grad_norm": 0.4849626123905182, "learning_rate": 0.00042103384644085005, "loss": 1.8143, "step": 33166 }, { "epoch": 1.1, "grad_norm": 0.4670833647251129, "learning_rate": 0.00042102428264539194, "loss": 1.8535, "step": 33167 }, { "epoch": 1.1, "grad_norm": 0.45456501841545105, "learning_rate": 0.00042101471870302823, "loss": 1.8156, "step": 33168 }, { "epoch": 1.1, "grad_norm": 0.46506452560424805, "learning_rate": 0.00042100515461377037, "loss": 1.7568, "step": 33169 }, { "epoch": 1.1, "grad_norm": 0.45828428864479065, "learning_rate": 0.00042099559037763016, "loss": 1.7916, "step": 33170 }, { "epoch": 1.1, "grad_norm": 0.46656447649002075, "learning_rate": 0.00042098602599461915, "loss": 1.8439, "step": 33171 }, { "epoch": 1.1, "grad_norm": 0.4696104824542999, "learning_rate": 0.00042097646146474894, "loss": 1.7379, "step": 33172 }, { "epoch": 1.1, "grad_norm": 0.4659174382686615, "learning_rate": 0.0004209668967880311, "loss": 1.7835, "step": 33173 }, { "epoch": 1.1, "grad_norm": 0.46188005805015564, "learning_rate": 0.00042095733196447724, "loss": 1.7495, "step": 33174 }, { "epoch": 1.1, "grad_norm": 0.4691046476364136, "learning_rate": 0.00042094776699409906, "loss": 1.7925, "step": 33175 }, { "epoch": 1.1, "grad_norm": 0.44879084825515747, "learning_rate": 0.0004209382018769081, "loss": 1.8023, "step": 33176 }, { "epoch": 1.1, "grad_norm": 0.4644661843776703, "learning_rate": 0.000420928636612916, "loss": 1.7358, "step": 33177 }, { "epoch": 1.1, "grad_norm": 0.46973130106925964, "learning_rate": 0.0004209190712021342, "loss": 1.8593, "step": 33178 }, { "epoch": 1.1, "grad_norm": 0.4698834717273712, "learning_rate": 0.0004209095056445746, "loss": 1.8225, "step": 33179 }, { "epoch": 1.1, "grad_norm": 0.46671348810195923, "learning_rate": 0.00042089993994024865, "loss": 1.7816, "step": 33180 }, { "epoch": 1.1, "grad_norm": 0.46021148562431335, "learning_rate": 0.000420890374089168, "loss": 1.7787, "step": 33181 }, { "epoch": 1.1, "grad_norm": 0.46904540061950684, "learning_rate": 0.0004208808080913443, "loss": 1.8025, "step": 33182 }, { "epoch": 1.1, "grad_norm": 0.46385279297828674, "learning_rate": 0.00042087124194678906, "loss": 1.8084, "step": 33183 }, { "epoch": 1.1, "grad_norm": 0.4591827094554901, "learning_rate": 0.0004208616756555139, "loss": 1.8281, "step": 33184 }, { "epoch": 1.1, "grad_norm": 0.45014747977256775, "learning_rate": 0.00042085210921753046, "loss": 1.8031, "step": 33185 }, { "epoch": 1.1, "grad_norm": 0.458583265542984, "learning_rate": 0.0004208425426328505, "loss": 1.8231, "step": 33186 }, { "epoch": 1.1, "grad_norm": 0.46071115136146545, "learning_rate": 0.00042083297590148544, "loss": 1.7664, "step": 33187 }, { "epoch": 1.1, "grad_norm": 0.4542664885520935, "learning_rate": 0.00042082340902344686, "loss": 1.7957, "step": 33188 }, { "epoch": 1.1, "grad_norm": 0.46911266446113586, "learning_rate": 0.0004208138419987466, "loss": 1.7883, "step": 33189 }, { "epoch": 1.1, "grad_norm": 0.4463012218475342, "learning_rate": 0.00042080427482739597, "loss": 1.7278, "step": 33190 }, { "epoch": 1.1, "grad_norm": 0.46147412061691284, "learning_rate": 0.00042079470750940696, "loss": 1.8264, "step": 33191 }, { "epoch": 1.1, "grad_norm": 0.45788973569869995, "learning_rate": 0.00042078514004479083, "loss": 1.7292, "step": 33192 }, { "epoch": 1.1, "grad_norm": 0.46187615394592285, "learning_rate": 0.0004207755724335593, "loss": 1.8059, "step": 33193 }, { "epoch": 1.1, "grad_norm": 0.45608416199684143, "learning_rate": 0.0004207660046757241, "loss": 1.7266, "step": 33194 }, { "epoch": 1.1, "grad_norm": 0.45584285259246826, "learning_rate": 0.00042075643677129675, "loss": 1.8457, "step": 33195 }, { "epoch": 1.1, "grad_norm": 0.45867982506752014, "learning_rate": 0.00042074686872028896, "loss": 1.7841, "step": 33196 }, { "epoch": 1.1, "grad_norm": 0.4856223165988922, "learning_rate": 0.0004207373005227121, "loss": 1.8789, "step": 33197 }, { "epoch": 1.1, "grad_norm": 0.46745485067367554, "learning_rate": 0.00042072773217857805, "loss": 1.8122, "step": 33198 }, { "epoch": 1.1, "grad_norm": 0.46035030484199524, "learning_rate": 0.0004207181636878983, "loss": 1.7356, "step": 33199 }, { "epoch": 1.1, "grad_norm": 0.4748358130455017, "learning_rate": 0.0004207085950506845, "loss": 1.824, "step": 33200 }, { "epoch": 1.1, "grad_norm": 0.4629153907299042, "learning_rate": 0.0004206990262669482, "loss": 1.8159, "step": 33201 }, { "epoch": 1.1, "grad_norm": 0.45750945806503296, "learning_rate": 0.0004206894573367011, "loss": 1.7922, "step": 33202 }, { "epoch": 1.1, "grad_norm": 0.48227986693382263, "learning_rate": 0.0004206798882599548, "loss": 1.8447, "step": 33203 }, { "epoch": 1.1, "grad_norm": 0.4625917673110962, "learning_rate": 0.000420670319036721, "loss": 1.7718, "step": 33204 }, { "epoch": 1.1, "grad_norm": 0.4715751111507416, "learning_rate": 0.000420660749667011, "loss": 1.8085, "step": 33205 }, { "epoch": 1.1, "grad_norm": 0.4543660581111908, "learning_rate": 0.0004206511801508367, "loss": 1.7423, "step": 33206 }, { "epoch": 1.1, "grad_norm": 0.46400323510169983, "learning_rate": 0.0004206416104882097, "loss": 1.8098, "step": 33207 }, { "epoch": 1.1, "grad_norm": 0.4710923135280609, "learning_rate": 0.0004206320406791415, "loss": 1.7763, "step": 33208 }, { "epoch": 1.1, "grad_norm": 0.4699257016181946, "learning_rate": 0.0004206224707236439, "loss": 1.8164, "step": 33209 }, { "epoch": 1.1, "grad_norm": 0.4620231091976166, "learning_rate": 0.0004206129006217283, "loss": 1.7831, "step": 33210 }, { "epoch": 1.1, "grad_norm": 0.45864883065223694, "learning_rate": 0.0004206033303734064, "loss": 1.7664, "step": 33211 }, { "epoch": 1.1, "grad_norm": 0.47360536456108093, "learning_rate": 0.0004205937599786898, "loss": 1.8823, "step": 33212 }, { "epoch": 1.11, "grad_norm": 0.49979928135871887, "learning_rate": 0.0004205841894375902, "loss": 1.8265, "step": 33213 }, { "epoch": 1.11, "grad_norm": 0.4789372384548187, "learning_rate": 0.0004205746187501192, "loss": 1.7846, "step": 33214 }, { "epoch": 1.11, "grad_norm": 0.4548712372779846, "learning_rate": 0.0004205650479162882, "loss": 1.7318, "step": 33215 }, { "epoch": 1.11, "grad_norm": 0.48734062910079956, "learning_rate": 0.0004205554769361092, "loss": 1.7887, "step": 33216 }, { "epoch": 1.11, "grad_norm": 0.48303458094596863, "learning_rate": 0.0004205459058095935, "loss": 1.7724, "step": 33217 }, { "epoch": 1.11, "grad_norm": 0.46500998735427856, "learning_rate": 0.0004205363345367528, "loss": 1.766, "step": 33218 }, { "epoch": 1.11, "grad_norm": 0.49004065990448, "learning_rate": 0.0004205267631175989, "loss": 1.7921, "step": 33219 }, { "epoch": 1.11, "grad_norm": 0.4538128674030304, "learning_rate": 0.00042051719155214315, "loss": 1.8178, "step": 33220 }, { "epoch": 1.11, "grad_norm": 0.4824274480342865, "learning_rate": 0.00042050761984039736, "loss": 1.7804, "step": 33221 }, { "epoch": 1.11, "grad_norm": 0.47855886816978455, "learning_rate": 0.00042049804798237296, "loss": 1.8444, "step": 33222 }, { "epoch": 1.11, "grad_norm": 0.48992782831192017, "learning_rate": 0.0004204884759780818, "loss": 1.7679, "step": 33223 }, { "epoch": 1.11, "grad_norm": 0.47696653008461, "learning_rate": 0.00042047890382753534, "loss": 1.7755, "step": 33224 }, { "epoch": 1.11, "grad_norm": 0.46554410457611084, "learning_rate": 0.0004204693315307452, "loss": 1.8127, "step": 33225 }, { "epoch": 1.11, "grad_norm": 0.45637568831443787, "learning_rate": 0.0004204597590877231, "loss": 1.7913, "step": 33226 }, { "epoch": 1.11, "grad_norm": 0.458009272813797, "learning_rate": 0.0004204501864984806, "loss": 1.8684, "step": 33227 }, { "epoch": 1.11, "grad_norm": 0.47694942355155945, "learning_rate": 0.00042044061376302934, "loss": 1.7817, "step": 33228 }, { "epoch": 1.11, "grad_norm": 0.47088804841041565, "learning_rate": 0.00042043104088138094, "loss": 1.7769, "step": 33229 }, { "epoch": 1.11, "grad_norm": 0.48372408747673035, "learning_rate": 0.00042042146785354684, "loss": 1.7677, "step": 33230 }, { "epoch": 1.11, "grad_norm": 0.447677880525589, "learning_rate": 0.00042041189467953893, "loss": 1.7209, "step": 33231 }, { "epoch": 1.11, "grad_norm": 0.4775281846523285, "learning_rate": 0.00042040232135936875, "loss": 1.7916, "step": 33232 }, { "epoch": 1.11, "grad_norm": 0.4596346914768219, "learning_rate": 0.00042039274789304785, "loss": 1.7805, "step": 33233 }, { "epoch": 1.11, "grad_norm": 0.4698607921600342, "learning_rate": 0.000420383174280588, "loss": 1.8393, "step": 33234 }, { "epoch": 1.11, "grad_norm": 0.4803132712841034, "learning_rate": 0.0004203736005220006, "loss": 1.7256, "step": 33235 }, { "epoch": 1.11, "grad_norm": 1.2548025846481323, "learning_rate": 0.00042036402661729734, "loss": 1.8319, "step": 33236 }, { "epoch": 1.11, "grad_norm": 0.47570812702178955, "learning_rate": 0.00042035445256649, "loss": 1.8331, "step": 33237 }, { "epoch": 1.11, "grad_norm": 0.49183517694473267, "learning_rate": 0.0004203448783695901, "loss": 1.8099, "step": 33238 }, { "epoch": 1.11, "grad_norm": 0.4590836763381958, "learning_rate": 0.00042033530402660915, "loss": 1.8082, "step": 33239 }, { "epoch": 1.11, "grad_norm": 0.46292248368263245, "learning_rate": 0.0004203257295375588, "loss": 1.7619, "step": 33240 }, { "epoch": 1.11, "grad_norm": 0.4885302484035492, "learning_rate": 0.000420316154902451, "loss": 1.8455, "step": 33241 }, { "epoch": 1.11, "grad_norm": 0.48431727290153503, "learning_rate": 0.00042030658012129693, "loss": 1.7661, "step": 33242 }, { "epoch": 1.11, "grad_norm": 0.4592237174510956, "learning_rate": 0.0004202970051941084, "loss": 1.7872, "step": 33243 }, { "epoch": 1.11, "grad_norm": 0.468826025724411, "learning_rate": 0.00042028743012089716, "loss": 1.7969, "step": 33244 }, { "epoch": 1.11, "grad_norm": 0.46846625208854675, "learning_rate": 0.00042027785490167456, "loss": 1.8524, "step": 33245 }, { "epoch": 1.11, "grad_norm": 0.49610039591789246, "learning_rate": 0.0004202682795364525, "loss": 1.7719, "step": 33246 }, { "epoch": 1.11, "grad_norm": 0.45983538031578064, "learning_rate": 0.00042025870402524225, "loss": 1.8597, "step": 33247 }, { "epoch": 1.11, "grad_norm": 0.46599265933036804, "learning_rate": 0.0004202491283680559, "loss": 1.7603, "step": 33248 }, { "epoch": 1.11, "grad_norm": 0.45470738410949707, "learning_rate": 0.0004202395525649047, "loss": 1.6762, "step": 33249 }, { "epoch": 1.11, "grad_norm": 0.4570920169353485, "learning_rate": 0.00042022997661580043, "loss": 1.823, "step": 33250 }, { "epoch": 1.11, "grad_norm": 0.4668915867805481, "learning_rate": 0.00042022040052075465, "loss": 1.7588, "step": 33251 }, { "epoch": 1.11, "grad_norm": 0.48157069087028503, "learning_rate": 0.000420210824279779, "loss": 1.833, "step": 33252 }, { "epoch": 1.11, "grad_norm": 0.4658072590827942, "learning_rate": 0.0004202012478928852, "loss": 1.7472, "step": 33253 }, { "epoch": 1.11, "grad_norm": 0.4651532471179962, "learning_rate": 0.00042019167136008476, "loss": 1.7675, "step": 33254 }, { "epoch": 1.11, "grad_norm": 0.46363604068756104, "learning_rate": 0.0004201820946813893, "loss": 1.8428, "step": 33255 }, { "epoch": 1.11, "grad_norm": 0.5010967254638672, "learning_rate": 0.00042017251785681056, "loss": 1.7776, "step": 33256 }, { "epoch": 1.11, "grad_norm": 0.4718441963195801, "learning_rate": 0.00042016294088636, "loss": 1.774, "step": 33257 }, { "epoch": 1.11, "grad_norm": 0.49055609107017517, "learning_rate": 0.0004201533637700495, "loss": 1.7724, "step": 33258 }, { "epoch": 1.11, "grad_norm": 0.47484567761421204, "learning_rate": 0.0004201437865078904, "loss": 1.8027, "step": 33259 }, { "epoch": 1.11, "grad_norm": 0.4663454294204712, "learning_rate": 0.0004201342090998944, "loss": 1.7512, "step": 33260 }, { "epoch": 1.11, "grad_norm": 0.46928855776786804, "learning_rate": 0.00042012463154607327, "loss": 1.7623, "step": 33261 }, { "epoch": 1.11, "grad_norm": 0.45899859070777893, "learning_rate": 0.00042011505384643847, "loss": 1.7707, "step": 33262 }, { "epoch": 1.11, "grad_norm": 0.4688167870044708, "learning_rate": 0.0004201054760010016, "loss": 1.7763, "step": 33263 }, { "epoch": 1.11, "grad_norm": 0.48168841004371643, "learning_rate": 0.0004200958980097746, "loss": 1.7981, "step": 33264 }, { "epoch": 1.11, "grad_norm": 0.47401344776153564, "learning_rate": 0.00042008631987276875, "loss": 1.8271, "step": 33265 }, { "epoch": 1.11, "grad_norm": 0.4652344584465027, "learning_rate": 0.00042007674158999587, "loss": 1.7558, "step": 33266 }, { "epoch": 1.11, "grad_norm": 0.4606069326400757, "learning_rate": 0.00042006716316146745, "loss": 1.7503, "step": 33267 }, { "epoch": 1.11, "grad_norm": 0.4748886823654175, "learning_rate": 0.00042005758458719516, "loss": 1.8118, "step": 33268 }, { "epoch": 1.11, "grad_norm": 0.4789802134037018, "learning_rate": 0.0004200480058671907, "loss": 1.831, "step": 33269 }, { "epoch": 1.11, "grad_norm": 0.44432592391967773, "learning_rate": 0.0004200384270014656, "loss": 1.7606, "step": 33270 }, { "epoch": 1.11, "grad_norm": 0.45684313774108887, "learning_rate": 0.0004200288479900316, "loss": 1.7707, "step": 33271 }, { "epoch": 1.11, "grad_norm": 0.4471798241138458, "learning_rate": 0.00042001926883290027, "loss": 1.8014, "step": 33272 }, { "epoch": 1.11, "grad_norm": 0.4564482867717743, "learning_rate": 0.0004200096895300832, "loss": 1.7835, "step": 33273 }, { "epoch": 1.11, "grad_norm": 0.45297691226005554, "learning_rate": 0.000420000110081592, "loss": 1.7508, "step": 33274 }, { "epoch": 1.11, "grad_norm": 0.44732633233070374, "learning_rate": 0.00041999053048743845, "loss": 1.7882, "step": 33275 }, { "epoch": 1.11, "grad_norm": 0.45335379242897034, "learning_rate": 0.0004199809507476341, "loss": 1.823, "step": 33276 }, { "epoch": 1.11, "grad_norm": 0.4680059552192688, "learning_rate": 0.00041997137086219045, "loss": 1.7826, "step": 33277 }, { "epoch": 1.11, "grad_norm": 0.46805161237716675, "learning_rate": 0.0004199617908311193, "loss": 1.7801, "step": 33278 }, { "epoch": 1.11, "grad_norm": 0.4778037965297699, "learning_rate": 0.0004199522106544321, "loss": 1.7504, "step": 33279 }, { "epoch": 1.11, "grad_norm": 0.4496256709098816, "learning_rate": 0.00041994263033214073, "loss": 1.8011, "step": 33280 }, { "epoch": 1.11, "grad_norm": 0.45377227663993835, "learning_rate": 0.00041993304986425654, "loss": 1.8171, "step": 33281 }, { "epoch": 1.11, "grad_norm": 0.4586714506149292, "learning_rate": 0.0004199234692507914, "loss": 1.7468, "step": 33282 }, { "epoch": 1.11, "grad_norm": 0.46991366147994995, "learning_rate": 0.00041991388849175684, "loss": 1.7312, "step": 33283 }, { "epoch": 1.11, "grad_norm": 0.4885323643684387, "learning_rate": 0.00041990430758716453, "loss": 1.7854, "step": 33284 }, { "epoch": 1.11, "grad_norm": 0.4550034999847412, "learning_rate": 0.00041989472653702594, "loss": 1.7503, "step": 33285 }, { "epoch": 1.11, "grad_norm": 0.4641241431236267, "learning_rate": 0.0004198851453413529, "loss": 1.8144, "step": 33286 }, { "epoch": 1.11, "grad_norm": 0.44966235756874084, "learning_rate": 0.0004198755640001569, "loss": 1.8014, "step": 33287 }, { "epoch": 1.11, "grad_norm": 0.46650856733322144, "learning_rate": 0.00041986598251344957, "loss": 1.7037, "step": 33288 }, { "epoch": 1.11, "grad_norm": 0.4703737497329712, "learning_rate": 0.0004198564008812428, "loss": 1.7734, "step": 33289 }, { "epoch": 1.11, "grad_norm": 0.4639478027820587, "learning_rate": 0.0004198468191035478, "loss": 1.7739, "step": 33290 }, { "epoch": 1.11, "grad_norm": 0.46002042293548584, "learning_rate": 0.00041983723718037663, "loss": 1.7452, "step": 33291 }, { "epoch": 1.11, "grad_norm": 0.4543107748031616, "learning_rate": 0.0004198276551117405, "loss": 1.7562, "step": 33292 }, { "epoch": 1.11, "grad_norm": 0.5715292096138, "learning_rate": 0.0004198180728976514, "loss": 1.8438, "step": 33293 }, { "epoch": 1.11, "grad_norm": 0.45373958349227905, "learning_rate": 0.0004198084905381208, "loss": 1.8011, "step": 33294 }, { "epoch": 1.11, "grad_norm": 0.4641928970813751, "learning_rate": 0.0004197989080331602, "loss": 1.7818, "step": 33295 }, { "epoch": 1.11, "grad_norm": 0.4745885729789734, "learning_rate": 0.0004197893253827816, "loss": 1.7485, "step": 33296 }, { "epoch": 1.11, "grad_norm": 0.4676002860069275, "learning_rate": 0.00041977974258699624, "loss": 1.77, "step": 33297 }, { "epoch": 1.11, "grad_norm": 0.4532136023044586, "learning_rate": 0.000419770159645816, "loss": 1.8782, "step": 33298 }, { "epoch": 1.11, "grad_norm": 0.45407605171203613, "learning_rate": 0.0004197605765592525, "loss": 1.7454, "step": 33299 }, { "epoch": 1.11, "grad_norm": 0.45181804895401, "learning_rate": 0.0004197509933273171, "loss": 1.7795, "step": 33300 }, { "epoch": 1.11, "grad_norm": 0.45691168308258057, "learning_rate": 0.00041974140995002185, "loss": 1.8047, "step": 33301 }, { "epoch": 1.11, "grad_norm": 0.4693641662597656, "learning_rate": 0.00041973182642737804, "loss": 1.8174, "step": 33302 }, { "epoch": 1.11, "grad_norm": 0.4601018726825714, "learning_rate": 0.0004197222427593975, "loss": 1.7888, "step": 33303 }, { "epoch": 1.11, "grad_norm": 0.4771461486816406, "learning_rate": 0.00041971265894609174, "loss": 1.7705, "step": 33304 }, { "epoch": 1.11, "grad_norm": 0.45234543085098267, "learning_rate": 0.0004197030749874725, "loss": 1.8005, "step": 33305 }, { "epoch": 1.11, "grad_norm": 0.4584505558013916, "learning_rate": 0.00041969349088355137, "loss": 1.7891, "step": 33306 }, { "epoch": 1.11, "grad_norm": 0.45446208119392395, "learning_rate": 0.00041968390663433994, "loss": 1.746, "step": 33307 }, { "epoch": 1.11, "grad_norm": 0.4682067334651947, "learning_rate": 0.0004196743222398499, "loss": 1.7654, "step": 33308 }, { "epoch": 1.11, "grad_norm": 0.4475606679916382, "learning_rate": 0.0004196647377000929, "loss": 1.764, "step": 33309 }, { "epoch": 1.11, "grad_norm": 0.4402914345264435, "learning_rate": 0.0004196551530150805, "loss": 1.7679, "step": 33310 }, { "epoch": 1.11, "grad_norm": 0.4567435681819916, "learning_rate": 0.00041964556818482434, "loss": 1.8222, "step": 33311 }, { "epoch": 1.11, "grad_norm": 0.4684680700302124, "learning_rate": 0.00041963598320933607, "loss": 1.823, "step": 33312 }, { "epoch": 1.11, "grad_norm": 0.47048136591911316, "learning_rate": 0.0004196263980886275, "loss": 1.7131, "step": 33313 }, { "epoch": 1.11, "grad_norm": 0.4827008545398712, "learning_rate": 0.00041961681282270995, "loss": 1.8342, "step": 33314 }, { "epoch": 1.11, "grad_norm": 0.5076797604560852, "learning_rate": 0.0004196072274115953, "loss": 1.8094, "step": 33315 }, { "epoch": 1.11, "grad_norm": 0.4646241068840027, "learning_rate": 0.00041959764185529506, "loss": 1.7726, "step": 33316 }, { "epoch": 1.11, "grad_norm": 0.4806109070777893, "learning_rate": 0.00041958805615382094, "loss": 1.837, "step": 33317 }, { "epoch": 1.11, "grad_norm": 0.4567391872406006, "learning_rate": 0.0004195784703071845, "loss": 1.8167, "step": 33318 }, { "epoch": 1.11, "grad_norm": 0.4879116415977478, "learning_rate": 0.00041956888431539743, "loss": 1.7502, "step": 33319 }, { "epoch": 1.11, "grad_norm": 0.47999730706214905, "learning_rate": 0.0004195592981784714, "loss": 1.8298, "step": 33320 }, { "epoch": 1.11, "grad_norm": 0.45082616806030273, "learning_rate": 0.0004195497118964179, "loss": 1.7476, "step": 33321 }, { "epoch": 1.11, "grad_norm": 0.4674227833747864, "learning_rate": 0.0004195401254692488, "loss": 1.8146, "step": 33322 }, { "epoch": 1.11, "grad_norm": 0.47517457604408264, "learning_rate": 0.0004195305388969755, "loss": 1.8142, "step": 33323 }, { "epoch": 1.11, "grad_norm": 0.46092990040779114, "learning_rate": 0.0004195209521796097, "loss": 1.767, "step": 33324 }, { "epoch": 1.11, "grad_norm": 0.4867757260799408, "learning_rate": 0.0004195113653171631, "loss": 1.7723, "step": 33325 }, { "epoch": 1.11, "grad_norm": 0.4741475284099579, "learning_rate": 0.00041950177830964744, "loss": 1.7685, "step": 33326 }, { "epoch": 1.11, "grad_norm": 0.8909494876861572, "learning_rate": 0.0004194921911570741, "loss": 1.8099, "step": 33327 }, { "epoch": 1.11, "grad_norm": 0.47195345163345337, "learning_rate": 0.0004194826038594549, "loss": 1.8366, "step": 33328 }, { "epoch": 1.11, "grad_norm": 0.4636317491531372, "learning_rate": 0.0004194730164168014, "loss": 1.7456, "step": 33329 }, { "epoch": 1.11, "grad_norm": 0.4981219172477722, "learning_rate": 0.0004194634288291252, "loss": 1.8091, "step": 33330 }, { "epoch": 1.11, "grad_norm": 0.4738346338272095, "learning_rate": 0.0004194538410964382, "loss": 1.8258, "step": 33331 }, { "epoch": 1.11, "grad_norm": 0.47024571895599365, "learning_rate": 0.00041944425321875157, "loss": 1.8038, "step": 33332 }, { "epoch": 1.11, "grad_norm": 0.4643756151199341, "learning_rate": 0.00041943466519607747, "loss": 1.7491, "step": 33333 }, { "epoch": 1.11, "grad_norm": 0.469950407743454, "learning_rate": 0.0004194250770284271, "loss": 1.8385, "step": 33334 }, { "epoch": 1.11, "grad_norm": 0.4591556787490845, "learning_rate": 0.0004194154887158123, "loss": 1.7667, "step": 33335 }, { "epoch": 1.11, "grad_norm": 0.4656657874584198, "learning_rate": 0.00041940590025824477, "loss": 1.8149, "step": 33336 }, { "epoch": 1.11, "grad_norm": 0.46773964166641235, "learning_rate": 0.00041939631165573604, "loss": 1.7335, "step": 33337 }, { "epoch": 1.11, "grad_norm": 0.474123477935791, "learning_rate": 0.0004193867229082978, "loss": 1.8041, "step": 33338 }, { "epoch": 1.11, "grad_norm": 0.45021507143974304, "learning_rate": 0.0004193771340159416, "loss": 1.7972, "step": 33339 }, { "epoch": 1.11, "grad_norm": 0.4495372474193573, "learning_rate": 0.00041936754497867923, "loss": 1.7583, "step": 33340 }, { "epoch": 1.11, "grad_norm": 0.46146196126937866, "learning_rate": 0.00041935795579652225, "loss": 1.7654, "step": 33341 }, { "epoch": 1.11, "grad_norm": 0.4705606997013092, "learning_rate": 0.0004193483664694822, "loss": 1.7627, "step": 33342 }, { "epoch": 1.11, "grad_norm": 0.46429070830345154, "learning_rate": 0.00041933877699757094, "loss": 1.8428, "step": 33343 }, { "epoch": 1.11, "grad_norm": 0.4590817987918854, "learning_rate": 0.00041932918738079986, "loss": 1.7804, "step": 33344 }, { "epoch": 1.11, "grad_norm": 0.4693557918071747, "learning_rate": 0.0004193195976191809, "loss": 1.7812, "step": 33345 }, { "epoch": 1.11, "grad_norm": 0.4565334618091583, "learning_rate": 0.0004193100077127254, "loss": 1.787, "step": 33346 }, { "epoch": 1.11, "grad_norm": 0.4650464355945587, "learning_rate": 0.0004193004176614452, "loss": 1.8479, "step": 33347 }, { "epoch": 1.11, "grad_norm": 0.46264171600341797, "learning_rate": 0.0004192908274653518, "loss": 1.7544, "step": 33348 }, { "epoch": 1.11, "grad_norm": 0.47069722414016724, "learning_rate": 0.00041928123712445687, "loss": 1.7962, "step": 33349 }, { "epoch": 1.11, "grad_norm": 0.4478021264076233, "learning_rate": 0.00041927164663877224, "loss": 1.7244, "step": 33350 }, { "epoch": 1.11, "grad_norm": 0.4549000859260559, "learning_rate": 0.00041926205600830937, "loss": 1.8169, "step": 33351 }, { "epoch": 1.11, "grad_norm": 0.46853306889533997, "learning_rate": 0.0004192524652330798, "loss": 1.7105, "step": 33352 }, { "epoch": 1.11, "grad_norm": 0.484512597322464, "learning_rate": 0.0004192428743130954, "loss": 1.7837, "step": 33353 }, { "epoch": 1.11, "grad_norm": 0.4689646363258362, "learning_rate": 0.0004192332832483677, "loss": 1.7709, "step": 33354 }, { "epoch": 1.11, "grad_norm": 0.4696567952632904, "learning_rate": 0.0004192236920389084, "loss": 1.8107, "step": 33355 }, { "epoch": 1.11, "grad_norm": 0.4750007688999176, "learning_rate": 0.0004192141006847292, "loss": 1.7377, "step": 33356 }, { "epoch": 1.11, "grad_norm": 0.4674776494503021, "learning_rate": 0.0004192045091858414, "loss": 1.8439, "step": 33357 }, { "epoch": 1.11, "grad_norm": 0.4781304597854614, "learning_rate": 0.0004191949175422571, "loss": 1.8001, "step": 33358 }, { "epoch": 1.11, "grad_norm": 0.49252721667289734, "learning_rate": 0.00041918532575398773, "loss": 1.8156, "step": 33359 }, { "epoch": 1.11, "grad_norm": 0.46485382318496704, "learning_rate": 0.00041917573382104477, "loss": 1.7721, "step": 33360 }, { "epoch": 1.11, "grad_norm": 0.4598323404788971, "learning_rate": 0.0004191661417434402, "loss": 1.7971, "step": 33361 }, { "epoch": 1.11, "grad_norm": 0.489523708820343, "learning_rate": 0.00041915654952118535, "loss": 1.7992, "step": 33362 }, { "epoch": 1.11, "grad_norm": 0.4663156270980835, "learning_rate": 0.00041914695715429215, "loss": 1.778, "step": 33363 }, { "epoch": 1.11, "grad_norm": 0.4522184133529663, "learning_rate": 0.0004191373646427719, "loss": 1.7395, "step": 33364 }, { "epoch": 1.11, "grad_norm": 0.45057472586631775, "learning_rate": 0.0004191277719866366, "loss": 1.7664, "step": 33365 }, { "epoch": 1.11, "grad_norm": 0.4683297872543335, "learning_rate": 0.00041911817918589774, "loss": 1.7389, "step": 33366 }, { "epoch": 1.11, "grad_norm": 0.4757518172264099, "learning_rate": 0.0004191085862405669, "loss": 1.8193, "step": 33367 }, { "epoch": 1.11, "grad_norm": 0.4509475529193878, "learning_rate": 0.00041909899315065586, "loss": 1.8633, "step": 33368 }, { "epoch": 1.11, "grad_norm": 0.4408720135688782, "learning_rate": 0.00041908939991617606, "loss": 1.8036, "step": 33369 }, { "epoch": 1.11, "grad_norm": 0.4607808589935303, "learning_rate": 0.0004190798065371394, "loss": 1.8769, "step": 33370 }, { "epoch": 1.11, "grad_norm": 0.4614907503128052, "learning_rate": 0.00041907021301355743, "loss": 1.7939, "step": 33371 }, { "epoch": 1.11, "grad_norm": 0.4487161934375763, "learning_rate": 0.00041906061934544165, "loss": 1.7125, "step": 33372 }, { "epoch": 1.11, "grad_norm": 0.4529683291912079, "learning_rate": 0.00041905102553280383, "loss": 1.805, "step": 33373 }, { "epoch": 1.11, "grad_norm": 0.44224220514297485, "learning_rate": 0.0004190414315756557, "loss": 1.7288, "step": 33374 }, { "epoch": 1.11, "grad_norm": 0.4682804048061371, "learning_rate": 0.0004190318374740088, "loss": 1.7105, "step": 33375 }, { "epoch": 1.11, "grad_norm": 0.4743365943431854, "learning_rate": 0.0004190222432278747, "loss": 1.8833, "step": 33376 }, { "epoch": 1.11, "grad_norm": 0.4772094488143921, "learning_rate": 0.0004190126488372652, "loss": 1.8487, "step": 33377 }, { "epoch": 1.11, "grad_norm": 0.490598201751709, "learning_rate": 0.0004190030543021918, "loss": 1.7883, "step": 33378 }, { "epoch": 1.11, "grad_norm": 0.4603542983531952, "learning_rate": 0.00041899345962266625, "loss": 1.8981, "step": 33379 }, { "epoch": 1.11, "grad_norm": 0.44603481888771057, "learning_rate": 0.0004189838647987002, "loss": 1.8027, "step": 33380 }, { "epoch": 1.11, "grad_norm": 0.4566405713558197, "learning_rate": 0.0004189742698303053, "loss": 1.8609, "step": 33381 }, { "epoch": 1.11, "grad_norm": 0.47481024265289307, "learning_rate": 0.00041896467471749316, "loss": 1.8157, "step": 33382 }, { "epoch": 1.11, "grad_norm": 0.4821281433105469, "learning_rate": 0.0004189550794602754, "loss": 1.8566, "step": 33383 }, { "epoch": 1.11, "grad_norm": 0.45574215054512024, "learning_rate": 0.0004189454840586637, "loss": 1.8536, "step": 33384 }, { "epoch": 1.11, "grad_norm": 0.44887423515319824, "learning_rate": 0.00041893588851266974, "loss": 1.7574, "step": 33385 }, { "epoch": 1.11, "grad_norm": 0.4454129636287689, "learning_rate": 0.00041892629282230516, "loss": 1.7916, "step": 33386 }, { "epoch": 1.11, "grad_norm": 0.4878975450992584, "learning_rate": 0.0004189166969875815, "loss": 1.8116, "step": 33387 }, { "epoch": 1.11, "grad_norm": 0.45936912298202515, "learning_rate": 0.0004189071010085106, "loss": 1.8516, "step": 33388 }, { "epoch": 1.11, "grad_norm": 1.2418525218963623, "learning_rate": 0.0004188975048851039, "loss": 1.8127, "step": 33389 }, { "epoch": 1.11, "grad_norm": 0.48120900988578796, "learning_rate": 0.0004188879086173732, "loss": 1.7813, "step": 33390 }, { "epoch": 1.11, "grad_norm": 0.46347033977508545, "learning_rate": 0.00041887831220533, "loss": 1.7885, "step": 33391 }, { "epoch": 1.11, "grad_norm": 0.5006792545318604, "learning_rate": 0.0004188687156489861, "loss": 1.7445, "step": 33392 }, { "epoch": 1.11, "grad_norm": 0.47252482175827026, "learning_rate": 0.0004188591189483532, "loss": 1.7404, "step": 33393 }, { "epoch": 1.11, "grad_norm": 0.4522821605205536, "learning_rate": 0.0004188495221034427, "loss": 1.783, "step": 33394 }, { "epoch": 1.11, "grad_norm": 0.44395896792411804, "learning_rate": 0.0004188399251142665, "loss": 1.8025, "step": 33395 }, { "epoch": 1.11, "grad_norm": 0.462388813495636, "learning_rate": 0.0004188303279808361, "loss": 1.8557, "step": 33396 }, { "epoch": 1.11, "grad_norm": 0.45129162073135376, "learning_rate": 0.0004188207307031631, "loss": 1.7295, "step": 33397 }, { "epoch": 1.11, "grad_norm": 0.47549179196357727, "learning_rate": 0.00041881113328125935, "loss": 1.7379, "step": 33398 }, { "epoch": 1.11, "grad_norm": 0.4823562800884247, "learning_rate": 0.0004188015357151363, "loss": 1.7794, "step": 33399 }, { "epoch": 1.11, "grad_norm": 0.4549751281738281, "learning_rate": 0.00041879193800480584, "loss": 1.7816, "step": 33400 }, { "epoch": 1.11, "grad_norm": 0.4671274423599243, "learning_rate": 0.00041878234015027945, "loss": 1.7881, "step": 33401 }, { "epoch": 1.11, "grad_norm": 0.47484472393989563, "learning_rate": 0.0004187727421515687, "loss": 1.7831, "step": 33402 }, { "epoch": 1.11, "grad_norm": 0.469140887260437, "learning_rate": 0.00041876314400868535, "loss": 1.7703, "step": 33403 }, { "epoch": 1.11, "grad_norm": 0.5023075342178345, "learning_rate": 0.00041875354572164106, "loss": 1.8297, "step": 33404 }, { "epoch": 1.11, "grad_norm": 0.4659567177295685, "learning_rate": 0.00041874394729044743, "loss": 1.7737, "step": 33405 }, { "epoch": 1.11, "grad_norm": 0.45805731415748596, "learning_rate": 0.0004187343487151163, "loss": 1.8617, "step": 33406 }, { "epoch": 1.11, "grad_norm": 0.46052148938179016, "learning_rate": 0.00041872474999565904, "loss": 1.7759, "step": 33407 }, { "epoch": 1.11, "grad_norm": 0.460878849029541, "learning_rate": 0.00041871515113208743, "loss": 1.8188, "step": 33408 }, { "epoch": 1.11, "grad_norm": 0.47113150358200073, "learning_rate": 0.0004187055521244131, "loss": 1.7733, "step": 33409 }, { "epoch": 1.11, "grad_norm": 0.4531513750553131, "learning_rate": 0.0004186959529726477, "loss": 1.7838, "step": 33410 }, { "epoch": 1.11, "grad_norm": 0.4686465263366699, "learning_rate": 0.00041868635367680305, "loss": 1.717, "step": 33411 }, { "epoch": 1.11, "grad_norm": 0.4791499376296997, "learning_rate": 0.0004186767542368905, "loss": 1.7614, "step": 33412 }, { "epoch": 1.11, "grad_norm": 0.47273480892181396, "learning_rate": 0.00041866715465292193, "loss": 1.7756, "step": 33413 }, { "epoch": 1.11, "grad_norm": 0.46211475133895874, "learning_rate": 0.0004186575549249089, "loss": 1.8175, "step": 33414 }, { "epoch": 1.11, "grad_norm": 0.4938678741455078, "learning_rate": 0.00041864795505286304, "loss": 1.8988, "step": 33415 }, { "epoch": 1.11, "grad_norm": 0.4747290015220642, "learning_rate": 0.0004186383550367961, "loss": 1.8101, "step": 33416 }, { "epoch": 1.11, "grad_norm": 0.46357110142707825, "learning_rate": 0.0004186287548767196, "loss": 1.8069, "step": 33417 }, { "epoch": 1.11, "grad_norm": 0.4636567533016205, "learning_rate": 0.0004186191545726454, "loss": 1.8005, "step": 33418 }, { "epoch": 1.11, "grad_norm": 0.4760826826095581, "learning_rate": 0.0004186095541245849, "loss": 1.8049, "step": 33419 }, { "epoch": 1.11, "grad_norm": 0.45606839656829834, "learning_rate": 0.0004185999535325499, "loss": 1.7609, "step": 33420 }, { "epoch": 1.11, "grad_norm": 0.4697667956352234, "learning_rate": 0.0004185903527965521, "loss": 1.7368, "step": 33421 }, { "epoch": 1.11, "grad_norm": 0.4732733368873596, "learning_rate": 0.0004185807519166031, "loss": 1.7512, "step": 33422 }, { "epoch": 1.11, "grad_norm": 0.44355309009552, "learning_rate": 0.0004185711508927145, "loss": 1.7205, "step": 33423 }, { "epoch": 1.11, "grad_norm": 0.4542710483074188, "learning_rate": 0.0004185615497248979, "loss": 1.7982, "step": 33424 }, { "epoch": 1.11, "grad_norm": 0.47877392172813416, "learning_rate": 0.0004185519484131652, "loss": 1.7928, "step": 33425 }, { "epoch": 1.11, "grad_norm": 0.48057475686073303, "learning_rate": 0.0004185423469575278, "loss": 1.8714, "step": 33426 }, { "epoch": 1.11, "grad_norm": 0.4539448916912079, "learning_rate": 0.0004185327453579975, "loss": 1.7561, "step": 33427 }, { "epoch": 1.11, "grad_norm": 0.4807663559913635, "learning_rate": 0.0004185231436145859, "loss": 1.7535, "step": 33428 }, { "epoch": 1.11, "grad_norm": 0.45647695660591125, "learning_rate": 0.0004185135417273047, "loss": 1.8268, "step": 33429 }, { "epoch": 1.11, "grad_norm": 0.4804162085056305, "learning_rate": 0.00041850393969616547, "loss": 1.8017, "step": 33430 }, { "epoch": 1.11, "grad_norm": 0.4610981047153473, "learning_rate": 0.00041849433752117995, "loss": 1.7366, "step": 33431 }, { "epoch": 1.11, "grad_norm": 0.44978249073028564, "learning_rate": 0.0004184847352023597, "loss": 1.7699, "step": 33432 }, { "epoch": 1.11, "grad_norm": 0.4816608428955078, "learning_rate": 0.0004184751327397165, "loss": 1.7971, "step": 33433 }, { "epoch": 1.11, "grad_norm": 0.4828137457370758, "learning_rate": 0.00041846553013326197, "loss": 1.7708, "step": 33434 }, { "epoch": 1.11, "grad_norm": 0.45630332827568054, "learning_rate": 0.00041845592738300766, "loss": 1.8061, "step": 33435 }, { "epoch": 1.11, "grad_norm": 0.44868871569633484, "learning_rate": 0.00041844632448896537, "loss": 1.7147, "step": 33436 }, { "epoch": 1.11, "grad_norm": 0.4666014611721039, "learning_rate": 0.0004184367214511467, "loss": 1.8455, "step": 33437 }, { "epoch": 1.11, "grad_norm": 0.46109846234321594, "learning_rate": 0.0004184271182695633, "loss": 1.7418, "step": 33438 }, { "epoch": 1.11, "grad_norm": 0.48424115777015686, "learning_rate": 0.00041841751494422683, "loss": 1.816, "step": 33439 }, { "epoch": 1.11, "grad_norm": 0.48533371090888977, "learning_rate": 0.00041840791147514886, "loss": 1.8048, "step": 33440 }, { "epoch": 1.11, "grad_norm": 0.4568049907684326, "learning_rate": 0.0004183983078623412, "loss": 1.7639, "step": 33441 }, { "epoch": 1.11, "grad_norm": 0.4713022708892822, "learning_rate": 0.00041838870410581543, "loss": 1.7807, "step": 33442 }, { "epoch": 1.11, "grad_norm": 0.49178650975227356, "learning_rate": 0.0004183791002055833, "loss": 1.8384, "step": 33443 }, { "epoch": 1.11, "grad_norm": 0.4926845133304596, "learning_rate": 0.0004183694961616563, "loss": 1.6513, "step": 33444 }, { "epoch": 1.11, "grad_norm": 0.4536359906196594, "learning_rate": 0.00041835989197404615, "loss": 1.8125, "step": 33445 }, { "epoch": 1.11, "grad_norm": 0.4524592459201813, "learning_rate": 0.0004183502876427646, "loss": 1.7519, "step": 33446 }, { "epoch": 1.11, "grad_norm": 0.47267305850982666, "learning_rate": 0.00041834068316782316, "loss": 1.7308, "step": 33447 }, { "epoch": 1.11, "grad_norm": 0.44530001282691956, "learning_rate": 0.00041833107854923366, "loss": 1.7752, "step": 33448 }, { "epoch": 1.11, "grad_norm": 0.476595938205719, "learning_rate": 0.00041832147378700755, "loss": 1.8301, "step": 33449 }, { "epoch": 1.11, "grad_norm": 0.4694487452507019, "learning_rate": 0.0004183118688811567, "loss": 1.7488, "step": 33450 }, { "epoch": 1.11, "grad_norm": 0.4810546636581421, "learning_rate": 0.0004183022638316926, "loss": 1.7509, "step": 33451 }, { "epoch": 1.11, "grad_norm": 0.4550505578517914, "learning_rate": 0.00041829265863862705, "loss": 1.7698, "step": 33452 }, { "epoch": 1.11, "grad_norm": 0.45598071813583374, "learning_rate": 0.0004182830533019716, "loss": 1.7449, "step": 33453 }, { "epoch": 1.11, "grad_norm": 0.4655248820781708, "learning_rate": 0.00041827344782173796, "loss": 1.7681, "step": 33454 }, { "epoch": 1.11, "grad_norm": 0.48291417956352234, "learning_rate": 0.00041826384219793784, "loss": 1.8645, "step": 33455 }, { "epoch": 1.11, "grad_norm": 0.4580897390842438, "learning_rate": 0.00041825423643058275, "loss": 1.8818, "step": 33456 }, { "epoch": 1.11, "grad_norm": 0.48451435565948486, "learning_rate": 0.0004182446305196845, "loss": 1.8449, "step": 33457 }, { "epoch": 1.11, "grad_norm": 0.4707457423210144, "learning_rate": 0.0004182350244652547, "loss": 1.7864, "step": 33458 }, { "epoch": 1.11, "grad_norm": 0.4425463080406189, "learning_rate": 0.00041822541826730495, "loss": 1.7593, "step": 33459 }, { "epoch": 1.11, "grad_norm": 0.46552029252052307, "learning_rate": 0.0004182158119258469, "loss": 1.7768, "step": 33460 }, { "epoch": 1.11, "grad_norm": 0.45868217945098877, "learning_rate": 0.0004182062054408924, "loss": 1.7784, "step": 33461 }, { "epoch": 1.11, "grad_norm": 0.47061169147491455, "learning_rate": 0.000418196598812453, "loss": 1.8676, "step": 33462 }, { "epoch": 1.11, "grad_norm": 0.4589677155017853, "learning_rate": 0.0004181869920405403, "loss": 1.809, "step": 33463 }, { "epoch": 1.11, "grad_norm": 0.4569123685359955, "learning_rate": 0.00041817738512516596, "loss": 1.7523, "step": 33464 }, { "epoch": 1.11, "grad_norm": 0.4584527015686035, "learning_rate": 0.00041816777806634166, "loss": 1.7226, "step": 33465 }, { "epoch": 1.11, "grad_norm": 0.45035520195961, "learning_rate": 0.0004181581708640792, "loss": 1.7377, "step": 33466 }, { "epoch": 1.11, "grad_norm": 0.47882387042045593, "learning_rate": 0.00041814856351839007, "loss": 1.8254, "step": 33467 }, { "epoch": 1.11, "grad_norm": 0.4716106951236725, "learning_rate": 0.000418138956029286, "loss": 1.7696, "step": 33468 }, { "epoch": 1.11, "grad_norm": 0.4677901864051819, "learning_rate": 0.0004181293483967787, "loss": 1.8726, "step": 33469 }, { "epoch": 1.11, "grad_norm": 0.4739459753036499, "learning_rate": 0.0004181197406208797, "loss": 1.7602, "step": 33470 }, { "epoch": 1.11, "grad_norm": 0.5168275237083435, "learning_rate": 0.0004181101327016007, "loss": 1.8286, "step": 33471 }, { "epoch": 1.11, "grad_norm": 0.4870432913303375, "learning_rate": 0.0004181005246389535, "loss": 1.7654, "step": 33472 }, { "epoch": 1.11, "grad_norm": 0.4705488979816437, "learning_rate": 0.0004180909164329497, "loss": 1.8205, "step": 33473 }, { "epoch": 1.11, "grad_norm": 0.45655590295791626, "learning_rate": 0.0004180813080836008, "loss": 1.7617, "step": 33474 }, { "epoch": 1.11, "grad_norm": 0.44903895258903503, "learning_rate": 0.0004180716995909186, "loss": 1.769, "step": 33475 }, { "epoch": 1.11, "grad_norm": 0.4674598276615143, "learning_rate": 0.00041806209095491485, "loss": 1.7496, "step": 33476 }, { "epoch": 1.11, "grad_norm": 0.4598666727542877, "learning_rate": 0.000418052482175601, "loss": 1.744, "step": 33477 }, { "epoch": 1.11, "grad_norm": 0.46223363280296326, "learning_rate": 0.000418042873252989, "loss": 1.7997, "step": 33478 }, { "epoch": 1.11, "grad_norm": 0.466743528842926, "learning_rate": 0.0004180332641870902, "loss": 1.7484, "step": 33479 }, { "epoch": 1.11, "grad_norm": 0.4589948058128357, "learning_rate": 0.00041802365497791647, "loss": 1.8085, "step": 33480 }, { "epoch": 1.11, "grad_norm": 0.46739545464515686, "learning_rate": 0.00041801404562547933, "loss": 1.7618, "step": 33481 }, { "epoch": 1.11, "grad_norm": 0.4814857840538025, "learning_rate": 0.0004180044361297906, "loss": 1.7902, "step": 33482 }, { "epoch": 1.11, "grad_norm": 0.48428165912628174, "learning_rate": 0.00041799482649086184, "loss": 1.8342, "step": 33483 }, { "epoch": 1.11, "grad_norm": 0.4447709321975708, "learning_rate": 0.00041798521670870475, "loss": 1.7742, "step": 33484 }, { "epoch": 1.11, "grad_norm": 0.43697410821914673, "learning_rate": 0.0004179756067833311, "loss": 1.7825, "step": 33485 }, { "epoch": 1.11, "grad_norm": 0.4647751748561859, "learning_rate": 0.00041796599671475225, "loss": 1.7142, "step": 33486 }, { "epoch": 1.11, "grad_norm": 0.4664016366004944, "learning_rate": 0.0004179563865029802, "loss": 1.7513, "step": 33487 }, { "epoch": 1.11, "grad_norm": 0.4618355929851532, "learning_rate": 0.0004179467761480265, "loss": 1.8275, "step": 33488 }, { "epoch": 1.11, "grad_norm": 0.4609758257865906, "learning_rate": 0.00041793716564990276, "loss": 1.8512, "step": 33489 }, { "epoch": 1.11, "grad_norm": 0.4547358751296997, "learning_rate": 0.0004179275550086206, "loss": 1.8001, "step": 33490 }, { "epoch": 1.11, "grad_norm": 0.5151206851005554, "learning_rate": 0.0004179179442241918, "loss": 1.7783, "step": 33491 }, { "epoch": 1.11, "grad_norm": 0.48292142152786255, "learning_rate": 0.0004179083332966281, "loss": 1.8001, "step": 33492 }, { "epoch": 1.11, "grad_norm": 0.44464239478111267, "learning_rate": 0.00041789872222594093, "loss": 1.7614, "step": 33493 }, { "epoch": 1.11, "grad_norm": 0.48972538113594055, "learning_rate": 0.0004178891110121421, "loss": 1.8528, "step": 33494 }, { "epoch": 1.11, "grad_norm": 0.5130177736282349, "learning_rate": 0.0004178794996552432, "loss": 1.827, "step": 33495 }, { "epoch": 1.11, "grad_norm": 0.4707496464252472, "learning_rate": 0.0004178698881552561, "loss": 1.8166, "step": 33496 }, { "epoch": 1.11, "grad_norm": 0.46074143052101135, "learning_rate": 0.00041786027651219225, "loss": 1.7111, "step": 33497 }, { "epoch": 1.11, "grad_norm": 0.48174217343330383, "learning_rate": 0.0004178506647260634, "loss": 1.86, "step": 33498 }, { "epoch": 1.11, "grad_norm": 0.4829663634300232, "learning_rate": 0.0004178410527968812, "loss": 1.8597, "step": 33499 }, { "epoch": 1.11, "grad_norm": 0.49584144353866577, "learning_rate": 0.00041783144072465736, "loss": 1.7594, "step": 33500 }, { "epoch": 1.11, "grad_norm": 0.45741498470306396, "learning_rate": 0.0004178218285094034, "loss": 1.7466, "step": 33501 }, { "epoch": 1.11, "grad_norm": 0.4641163647174835, "learning_rate": 0.00041781221615113116, "loss": 1.7885, "step": 33502 }, { "epoch": 1.11, "grad_norm": 0.4782584011554718, "learning_rate": 0.0004178026036498524, "loss": 1.8576, "step": 33503 }, { "epoch": 1.11, "grad_norm": 0.4865691661834717, "learning_rate": 0.0004177929910055784, "loss": 1.7601, "step": 33504 }, { "epoch": 1.11, "grad_norm": 0.4654601812362671, "learning_rate": 0.00041778337821832127, "loss": 1.751, "step": 33505 }, { "epoch": 1.11, "grad_norm": 0.4733355939388275, "learning_rate": 0.0004177737652880923, "loss": 1.81, "step": 33506 }, { "epoch": 1.11, "grad_norm": 0.4584762156009674, "learning_rate": 0.00041776415221490344, "loss": 1.7558, "step": 33507 }, { "epoch": 1.11, "grad_norm": 0.4658363163471222, "learning_rate": 0.00041775453899876614, "loss": 1.8052, "step": 33508 }, { "epoch": 1.11, "grad_norm": 0.45754939317703247, "learning_rate": 0.0004177449256396923, "loss": 1.7827, "step": 33509 }, { "epoch": 1.11, "grad_norm": 0.463636577129364, "learning_rate": 0.00041773531213769343, "loss": 1.7681, "step": 33510 }, { "epoch": 1.11, "grad_norm": 0.4783913195133209, "learning_rate": 0.00041772569849278117, "loss": 1.8594, "step": 33511 }, { "epoch": 1.11, "grad_norm": 0.4744006097316742, "learning_rate": 0.0004177160847049674, "loss": 1.8154, "step": 33512 }, { "epoch": 1.11, "grad_norm": 0.45992153882980347, "learning_rate": 0.00041770647077426354, "loss": 1.8264, "step": 33513 }, { "epoch": 1.12, "grad_norm": 0.4691121578216553, "learning_rate": 0.00041769685670068143, "loss": 1.7395, "step": 33514 }, { "epoch": 1.12, "grad_norm": 0.45956796407699585, "learning_rate": 0.00041768724248423265, "loss": 1.788, "step": 33515 }, { "epoch": 1.12, "grad_norm": 0.5034295320510864, "learning_rate": 0.00041767762812492884, "loss": 1.7547, "step": 33516 }, { "epoch": 1.12, "grad_norm": 0.4666235148906708, "learning_rate": 0.00041766801362278183, "loss": 1.6929, "step": 33517 }, { "epoch": 1.12, "grad_norm": 0.4648137390613556, "learning_rate": 0.00041765839897780317, "loss": 1.7412, "step": 33518 }, { "epoch": 1.12, "grad_norm": 0.47238799929618835, "learning_rate": 0.0004176487841900045, "loss": 1.8234, "step": 33519 }, { "epoch": 1.12, "grad_norm": 0.45684316754341125, "learning_rate": 0.00041763916925939754, "loss": 1.7962, "step": 33520 }, { "epoch": 1.12, "grad_norm": 0.4555724263191223, "learning_rate": 0.000417629554185994, "loss": 1.7413, "step": 33521 }, { "epoch": 1.12, "grad_norm": 0.4945317208766937, "learning_rate": 0.00041761993896980543, "loss": 1.8238, "step": 33522 }, { "epoch": 1.12, "grad_norm": 0.4907960593700409, "learning_rate": 0.0004176103236108437, "loss": 1.814, "step": 33523 }, { "epoch": 1.12, "grad_norm": 0.4414621591567993, "learning_rate": 0.00041760070810912034, "loss": 1.7369, "step": 33524 }, { "epoch": 1.12, "grad_norm": 0.4652145802974701, "learning_rate": 0.00041759109246464703, "loss": 1.8024, "step": 33525 }, { "epoch": 1.12, "grad_norm": 0.46247631311416626, "learning_rate": 0.00041758147667743546, "loss": 1.7865, "step": 33526 }, { "epoch": 1.12, "grad_norm": 0.47318387031555176, "learning_rate": 0.0004175718607474973, "loss": 1.8755, "step": 33527 }, { "epoch": 1.12, "grad_norm": 0.45577165484428406, "learning_rate": 0.00041756224467484425, "loss": 1.7769, "step": 33528 }, { "epoch": 1.12, "grad_norm": 0.48861145973205566, "learning_rate": 0.00041755262845948786, "loss": 1.7277, "step": 33529 }, { "epoch": 1.12, "grad_norm": 0.4601189196109772, "learning_rate": 0.00041754301210144, "loss": 1.727, "step": 33530 }, { "epoch": 1.12, "grad_norm": 0.45634791254997253, "learning_rate": 0.0004175333956007122, "loss": 1.807, "step": 33531 }, { "epoch": 1.12, "grad_norm": 0.4612153172492981, "learning_rate": 0.00041752377895731623, "loss": 1.8476, "step": 33532 }, { "epoch": 1.12, "grad_norm": 0.45503050088882446, "learning_rate": 0.00041751416217126363, "loss": 1.748, "step": 33533 }, { "epoch": 1.12, "grad_norm": 0.46271926164627075, "learning_rate": 0.0004175045452425661, "loss": 1.8055, "step": 33534 }, { "epoch": 1.12, "grad_norm": 0.4526268541812897, "learning_rate": 0.0004174949281712356, "loss": 1.7673, "step": 33535 }, { "epoch": 1.12, "grad_norm": 0.4732179641723633, "learning_rate": 0.00041748531095728335, "loss": 1.8176, "step": 33536 }, { "epoch": 1.12, "grad_norm": 0.4655735492706299, "learning_rate": 0.0004174756936007214, "loss": 1.7835, "step": 33537 }, { "epoch": 1.12, "grad_norm": 0.466012179851532, "learning_rate": 0.0004174660761015611, "loss": 1.8084, "step": 33538 }, { "epoch": 1.12, "grad_norm": 0.442410409450531, "learning_rate": 0.00041745645845981435, "loss": 1.7178, "step": 33539 }, { "epoch": 1.12, "grad_norm": 0.46932539343833923, "learning_rate": 0.0004174468406754929, "loss": 1.7991, "step": 33540 }, { "epoch": 1.12, "grad_norm": 0.4744736850261688, "learning_rate": 0.0004174372227486081, "loss": 1.763, "step": 33541 }, { "epoch": 1.12, "grad_norm": 0.4697769284248352, "learning_rate": 0.0004174276046791719, "loss": 1.7351, "step": 33542 }, { "epoch": 1.12, "grad_norm": 0.4896065890789032, "learning_rate": 0.0004174179864671959, "loss": 1.851, "step": 33543 }, { "epoch": 1.12, "grad_norm": 0.46091604232788086, "learning_rate": 0.0004174083681126917, "loss": 1.7528, "step": 33544 }, { "epoch": 1.12, "grad_norm": 0.5057833790779114, "learning_rate": 0.00041739874961567116, "loss": 1.7627, "step": 33545 }, { "epoch": 1.12, "grad_norm": 0.472298800945282, "learning_rate": 0.0004173891309761457, "loss": 1.8061, "step": 33546 }, { "epoch": 1.12, "grad_norm": 0.46555668115615845, "learning_rate": 0.0004173795121941273, "loss": 1.7884, "step": 33547 }, { "epoch": 1.12, "grad_norm": 0.46698641777038574, "learning_rate": 0.00041736989326962733, "loss": 1.7672, "step": 33548 }, { "epoch": 1.12, "grad_norm": 0.45860669016838074, "learning_rate": 0.0004173602742026576, "loss": 1.7466, "step": 33549 }, { "epoch": 1.12, "grad_norm": 0.46847426891326904, "learning_rate": 0.0004173506549932299, "loss": 1.7559, "step": 33550 }, { "epoch": 1.12, "grad_norm": 0.4764893054962158, "learning_rate": 0.0004173410356413557, "loss": 1.822, "step": 33551 }, { "epoch": 1.12, "grad_norm": 0.4644792973995209, "learning_rate": 0.00041733141614704673, "loss": 1.7092, "step": 33552 }, { "epoch": 1.12, "grad_norm": 0.4687618315219879, "learning_rate": 0.00041732179651031486, "loss": 1.8392, "step": 33553 }, { "epoch": 1.12, "grad_norm": 0.46332278847694397, "learning_rate": 0.00041731217673117156, "loss": 1.7784, "step": 33554 }, { "epoch": 1.12, "grad_norm": 0.4732828140258789, "learning_rate": 0.0004173025568096285, "loss": 1.7169, "step": 33555 }, { "epoch": 1.12, "grad_norm": 0.47285598516464233, "learning_rate": 0.00041729293674569744, "loss": 1.8014, "step": 33556 }, { "epoch": 1.12, "grad_norm": 0.4714318513870239, "learning_rate": 0.00041728331653939003, "loss": 1.7823, "step": 33557 }, { "epoch": 1.12, "grad_norm": 0.7732445001602173, "learning_rate": 0.00041727369619071794, "loss": 1.8064, "step": 33558 }, { "epoch": 1.12, "grad_norm": 0.4724792540073395, "learning_rate": 0.0004172640756996929, "loss": 1.8008, "step": 33559 }, { "epoch": 1.12, "grad_norm": 0.4498100280761719, "learning_rate": 0.0004172544550663266, "loss": 1.8212, "step": 33560 }, { "epoch": 1.12, "grad_norm": 0.46076786518096924, "learning_rate": 0.0004172448342906306, "loss": 1.7691, "step": 33561 }, { "epoch": 1.12, "grad_norm": 0.47713905572891235, "learning_rate": 0.0004172352133726166, "loss": 1.7762, "step": 33562 }, { "epoch": 1.12, "grad_norm": 0.4895135164260864, "learning_rate": 0.0004172255923122964, "loss": 1.7596, "step": 33563 }, { "epoch": 1.12, "grad_norm": 0.4914581775665283, "learning_rate": 0.0004172159711096816, "loss": 1.8184, "step": 33564 }, { "epoch": 1.12, "grad_norm": 0.47128552198410034, "learning_rate": 0.00041720634976478397, "loss": 1.7488, "step": 33565 }, { "epoch": 1.12, "grad_norm": 0.4634670913219452, "learning_rate": 0.0004171967282776149, "loss": 1.7456, "step": 33566 }, { "epoch": 1.12, "grad_norm": 0.5160536170005798, "learning_rate": 0.00041718710664818645, "loss": 1.8163, "step": 33567 }, { "epoch": 1.12, "grad_norm": 0.48784106969833374, "learning_rate": 0.00041717748487651006, "loss": 1.8445, "step": 33568 }, { "epoch": 1.12, "grad_norm": 0.4723251461982727, "learning_rate": 0.0004171678629625974, "loss": 1.7927, "step": 33569 }, { "epoch": 1.12, "grad_norm": 0.4542851448059082, "learning_rate": 0.0004171582409064602, "loss": 1.8006, "step": 33570 }, { "epoch": 1.12, "grad_norm": 0.4536779820919037, "learning_rate": 0.00041714861870811024, "loss": 1.8578, "step": 33571 }, { "epoch": 1.12, "grad_norm": 0.47486791014671326, "learning_rate": 0.0004171389963675592, "loss": 1.7612, "step": 33572 }, { "epoch": 1.12, "grad_norm": 0.4705566167831421, "learning_rate": 0.0004171293738848185, "loss": 1.7536, "step": 33573 }, { "epoch": 1.12, "grad_norm": 0.4655173122882843, "learning_rate": 0.00041711975125990013, "loss": 1.7353, "step": 33574 }, { "epoch": 1.12, "grad_norm": 0.45604661107063293, "learning_rate": 0.00041711012849281556, "loss": 1.7548, "step": 33575 }, { "epoch": 1.12, "grad_norm": 0.4620679020881653, "learning_rate": 0.0004171005055835766, "loss": 1.775, "step": 33576 }, { "epoch": 1.12, "grad_norm": 0.4449612498283386, "learning_rate": 0.00041709088253219485, "loss": 1.7063, "step": 33577 }, { "epoch": 1.12, "grad_norm": 0.4498686194419861, "learning_rate": 0.000417081259338682, "loss": 1.7012, "step": 33578 }, { "epoch": 1.12, "grad_norm": 0.45258674025535583, "learning_rate": 0.0004170716360030499, "loss": 1.7936, "step": 33579 }, { "epoch": 1.12, "grad_norm": 0.4648914337158203, "learning_rate": 0.00041706201252530994, "loss": 1.7388, "step": 33580 }, { "epoch": 1.12, "grad_norm": 0.4487384855747223, "learning_rate": 0.0004170523889054739, "loss": 1.7446, "step": 33581 }, { "epoch": 1.12, "grad_norm": 0.48942384123802185, "learning_rate": 0.0004170427651435536, "loss": 1.7229, "step": 33582 }, { "epoch": 1.12, "grad_norm": 0.4761713743209839, "learning_rate": 0.0004170331412395606, "loss": 1.7822, "step": 33583 }, { "epoch": 1.12, "grad_norm": 0.4669381082057953, "learning_rate": 0.0004170235171935066, "loss": 1.8305, "step": 33584 }, { "epoch": 1.12, "grad_norm": 0.9456384181976318, "learning_rate": 0.0004170138930054034, "loss": 1.8233, "step": 33585 }, { "epoch": 1.12, "grad_norm": 0.49188122153282166, "learning_rate": 0.00041700426867526244, "loss": 1.8278, "step": 33586 }, { "epoch": 1.12, "grad_norm": 0.4811931848526001, "learning_rate": 0.00041699464420309556, "loss": 1.7911, "step": 33587 }, { "epoch": 1.12, "grad_norm": 0.4797165095806122, "learning_rate": 0.00041698501958891445, "loss": 1.8975, "step": 33588 }, { "epoch": 1.12, "grad_norm": 0.47390130162239075, "learning_rate": 0.00041697539483273073, "loss": 1.7344, "step": 33589 }, { "epoch": 1.12, "grad_norm": 0.46842053532600403, "learning_rate": 0.0004169657699345562, "loss": 1.7713, "step": 33590 }, { "epoch": 1.12, "grad_norm": 0.4671117961406708, "learning_rate": 0.0004169561448944023, "loss": 1.7633, "step": 33591 }, { "epoch": 1.12, "grad_norm": 0.4630597233772278, "learning_rate": 0.00041694651971228104, "loss": 1.7425, "step": 33592 }, { "epoch": 1.12, "grad_norm": 0.4797239303588867, "learning_rate": 0.00041693689438820387, "loss": 1.759, "step": 33593 }, { "epoch": 1.12, "grad_norm": 0.4592764973640442, "learning_rate": 0.0004169272689221826, "loss": 1.7367, "step": 33594 }, { "epoch": 1.12, "grad_norm": 0.46066516637802124, "learning_rate": 0.0004169176433142288, "loss": 1.7226, "step": 33595 }, { "epoch": 1.12, "grad_norm": 0.4537051022052765, "learning_rate": 0.0004169080175643541, "loss": 1.7609, "step": 33596 }, { "epoch": 1.12, "grad_norm": 0.48095014691352844, "learning_rate": 0.00041689839167257053, "loss": 1.804, "step": 33597 }, { "epoch": 1.12, "grad_norm": 0.4547565281391144, "learning_rate": 0.0004168887656388893, "loss": 1.7181, "step": 33598 }, { "epoch": 1.12, "grad_norm": 0.46662649512290955, "learning_rate": 0.0004168791394633225, "loss": 1.7454, "step": 33599 }, { "epoch": 1.12, "grad_norm": 0.46175816655158997, "learning_rate": 0.0004168695131458815, "loss": 1.7774, "step": 33600 }, { "epoch": 1.12, "grad_norm": 0.47601619362831116, "learning_rate": 0.0004168598866865782, "loss": 1.7956, "step": 33601 }, { "epoch": 1.12, "grad_norm": 0.4673689305782318, "learning_rate": 0.00041685026008542436, "loss": 1.8363, "step": 33602 }, { "epoch": 1.12, "grad_norm": 0.4551396369934082, "learning_rate": 0.00041684063334243126, "loss": 1.7448, "step": 33603 }, { "epoch": 1.12, "grad_norm": 0.4455643594264984, "learning_rate": 0.000416831006457611, "loss": 1.826, "step": 33604 }, { "epoch": 1.12, "grad_norm": 0.45981448888778687, "learning_rate": 0.0004168213794309751, "loss": 1.8004, "step": 33605 }, { "epoch": 1.12, "grad_norm": 0.46714332699775696, "learning_rate": 0.0004168117522625352, "loss": 1.767, "step": 33606 }, { "epoch": 1.12, "grad_norm": 0.46738091111183167, "learning_rate": 0.00041680212495230314, "loss": 1.7646, "step": 33607 }, { "epoch": 1.12, "grad_norm": 0.45917046070098877, "learning_rate": 0.0004167924975002904, "loss": 1.7886, "step": 33608 }, { "epoch": 1.12, "grad_norm": 0.47154879570007324, "learning_rate": 0.00041678286990650895, "loss": 1.8949, "step": 33609 }, { "epoch": 1.12, "grad_norm": 0.4863981306552887, "learning_rate": 0.0004167732421709702, "loss": 1.6982, "step": 33610 }, { "epoch": 1.12, "grad_norm": 0.47096261382102966, "learning_rate": 0.00041676361429368586, "loss": 1.8094, "step": 33611 }, { "epoch": 1.12, "grad_norm": 0.4648989737033844, "learning_rate": 0.0004167539862746678, "loss": 1.7887, "step": 33612 }, { "epoch": 1.12, "grad_norm": 0.48375648260116577, "learning_rate": 0.0004167443581139275, "loss": 1.8005, "step": 33613 }, { "epoch": 1.12, "grad_norm": 0.47775858640670776, "learning_rate": 0.00041673472981147686, "loss": 1.8021, "step": 33614 }, { "epoch": 1.12, "grad_norm": 0.4625928997993469, "learning_rate": 0.00041672510136732746, "loss": 1.84, "step": 33615 }, { "epoch": 1.12, "grad_norm": 0.48194125294685364, "learning_rate": 0.0004167154727814909, "loss": 1.9038, "step": 33616 }, { "epoch": 1.12, "grad_norm": 0.463507741689682, "learning_rate": 0.00041670584405397907, "loss": 1.7505, "step": 33617 }, { "epoch": 1.12, "grad_norm": 0.4750281274318695, "learning_rate": 0.0004166962151848034, "loss": 1.8216, "step": 33618 }, { "epoch": 1.12, "grad_norm": 0.4676675796508789, "learning_rate": 0.0004166865861739758, "loss": 1.814, "step": 33619 }, { "epoch": 1.12, "grad_norm": 0.4806436598300934, "learning_rate": 0.00041667695702150793, "loss": 1.8885, "step": 33620 }, { "epoch": 1.12, "grad_norm": 0.4551398754119873, "learning_rate": 0.00041666732772741125, "loss": 1.8294, "step": 33621 }, { "epoch": 1.12, "grad_norm": 0.4660245478153229, "learning_rate": 0.0004166576982916978, "loss": 1.738, "step": 33622 }, { "epoch": 1.12, "grad_norm": 0.4736519753932953, "learning_rate": 0.000416648068714379, "loss": 1.845, "step": 33623 }, { "epoch": 1.12, "grad_norm": 0.456863671541214, "learning_rate": 0.00041663843899546665, "loss": 1.811, "step": 33624 }, { "epoch": 1.12, "grad_norm": 0.4631890058517456, "learning_rate": 0.00041662880913497245, "loss": 1.7685, "step": 33625 }, { "epoch": 1.12, "grad_norm": 0.4678298830986023, "learning_rate": 0.000416619179132908, "loss": 1.7121, "step": 33626 }, { "epoch": 1.12, "grad_norm": 0.4725266993045807, "learning_rate": 0.00041660954898928515, "loss": 1.8302, "step": 33627 }, { "epoch": 1.12, "grad_norm": 0.4584985077381134, "learning_rate": 0.00041659991870411536, "loss": 1.7629, "step": 33628 }, { "epoch": 1.12, "grad_norm": 0.4602851867675781, "learning_rate": 0.00041659028827741056, "loss": 1.8163, "step": 33629 }, { "epoch": 1.12, "grad_norm": 0.4816468060016632, "learning_rate": 0.00041658065770918226, "loss": 1.8875, "step": 33630 }, { "epoch": 1.12, "grad_norm": 0.4696078598499298, "learning_rate": 0.00041657102699944225, "loss": 1.6943, "step": 33631 }, { "epoch": 1.12, "grad_norm": 0.480156272649765, "learning_rate": 0.00041656139614820216, "loss": 1.837, "step": 33632 }, { "epoch": 1.12, "grad_norm": 0.4554672837257385, "learning_rate": 0.0004165517651554738, "loss": 1.7647, "step": 33633 }, { "epoch": 1.12, "grad_norm": 0.4924670457839966, "learning_rate": 0.0004165421340212687, "loss": 1.8576, "step": 33634 }, { "epoch": 1.12, "grad_norm": 0.45925936102867126, "learning_rate": 0.0004165325027455986, "loss": 1.7265, "step": 33635 }, { "epoch": 1.12, "grad_norm": 0.4543886184692383, "learning_rate": 0.00041652287132847525, "loss": 1.7471, "step": 33636 }, { "epoch": 1.12, "grad_norm": 0.45596835017204285, "learning_rate": 0.0004165132397699103, "loss": 1.7898, "step": 33637 }, { "epoch": 1.12, "grad_norm": 0.4600556790828705, "learning_rate": 0.0004165036080699154, "loss": 1.7839, "step": 33638 }, { "epoch": 1.12, "grad_norm": 0.48620086908340454, "learning_rate": 0.0004164939762285023, "loss": 1.7295, "step": 33639 }, { "epoch": 1.12, "grad_norm": 0.4698888957500458, "learning_rate": 0.0004164843442456827, "loss": 1.754, "step": 33640 }, { "epoch": 1.12, "grad_norm": 0.4577066898345947, "learning_rate": 0.00041647471212146836, "loss": 1.7514, "step": 33641 }, { "epoch": 1.12, "grad_norm": 0.48550501465797424, "learning_rate": 0.00041646507985587075, "loss": 1.7644, "step": 33642 }, { "epoch": 1.12, "grad_norm": 0.47446149587631226, "learning_rate": 0.0004164554474489017, "loss": 1.776, "step": 33643 }, { "epoch": 1.12, "grad_norm": 0.457426518201828, "learning_rate": 0.0004164458149005729, "loss": 1.7411, "step": 33644 }, { "epoch": 1.12, "grad_norm": 0.4802207350730896, "learning_rate": 0.0004164361822108961, "loss": 1.8472, "step": 33645 }, { "epoch": 1.12, "grad_norm": 0.48617321252822876, "learning_rate": 0.0004164265493798829, "loss": 1.8759, "step": 33646 }, { "epoch": 1.12, "grad_norm": 0.4779694676399231, "learning_rate": 0.00041641691640754503, "loss": 1.7502, "step": 33647 }, { "epoch": 1.12, "grad_norm": 0.45526131987571716, "learning_rate": 0.0004164072832938942, "loss": 1.7644, "step": 33648 }, { "epoch": 1.12, "grad_norm": 0.4797317385673523, "learning_rate": 0.000416397650038942, "loss": 1.7565, "step": 33649 }, { "epoch": 1.12, "grad_norm": 0.4937084913253784, "learning_rate": 0.00041638801664270025, "loss": 1.7268, "step": 33650 }, { "epoch": 1.12, "grad_norm": 0.4672306478023529, "learning_rate": 0.00041637838310518057, "loss": 1.8291, "step": 33651 }, { "epoch": 1.12, "grad_norm": 0.4769077003002167, "learning_rate": 0.0004163687494263948, "loss": 1.7357, "step": 33652 }, { "epoch": 1.12, "grad_norm": 0.46058419346809387, "learning_rate": 0.0004163591156063543, "loss": 1.8069, "step": 33653 }, { "epoch": 1.12, "grad_norm": 0.4633801579475403, "learning_rate": 0.00041634948164507115, "loss": 1.7788, "step": 33654 }, { "epoch": 1.12, "grad_norm": 0.46842068433761597, "learning_rate": 0.0004163398475425568, "loss": 1.8021, "step": 33655 }, { "epoch": 1.12, "grad_norm": 0.46432751417160034, "learning_rate": 0.00041633021329882307, "loss": 1.8225, "step": 33656 }, { "epoch": 1.12, "grad_norm": 0.46938058733940125, "learning_rate": 0.0004163205789138816, "loss": 1.8579, "step": 33657 }, { "epoch": 1.12, "grad_norm": 0.4769224226474762, "learning_rate": 0.00041631094438774404, "loss": 1.8113, "step": 33658 }, { "epoch": 1.12, "grad_norm": 0.47564005851745605, "learning_rate": 0.0004163013097204222, "loss": 1.7058, "step": 33659 }, { "epoch": 1.12, "grad_norm": 0.44814321398735046, "learning_rate": 0.00041629167491192756, "loss": 1.824, "step": 33660 }, { "epoch": 1.12, "grad_norm": 0.46743398904800415, "learning_rate": 0.0004162820399622721, "loss": 1.7803, "step": 33661 }, { "epoch": 1.12, "grad_norm": 0.45657745003700256, "learning_rate": 0.0004162724048714673, "loss": 1.7577, "step": 33662 }, { "epoch": 1.12, "grad_norm": 0.49600306153297424, "learning_rate": 0.00041626276963952494, "loss": 1.8471, "step": 33663 }, { "epoch": 1.12, "grad_norm": 0.46196249127388, "learning_rate": 0.0004162531342664568, "loss": 1.7444, "step": 33664 }, { "epoch": 1.12, "grad_norm": 0.45931535959243774, "learning_rate": 0.0004162434987522744, "loss": 1.7849, "step": 33665 }, { "epoch": 1.12, "grad_norm": 0.46664512157440186, "learning_rate": 0.0004162338630969895, "loss": 1.7859, "step": 33666 }, { "epoch": 1.12, "grad_norm": 0.46565085649490356, "learning_rate": 0.0004162242273006139, "loss": 1.7533, "step": 33667 }, { "epoch": 1.12, "grad_norm": 0.4836369454860687, "learning_rate": 0.00041621459136315916, "loss": 1.8637, "step": 33668 }, { "epoch": 1.12, "grad_norm": 0.4819408357143402, "learning_rate": 0.00041620495528463697, "loss": 1.7884, "step": 33669 }, { "epoch": 1.12, "grad_norm": 0.46061328053474426, "learning_rate": 0.0004161953190650592, "loss": 1.7538, "step": 33670 }, { "epoch": 1.12, "grad_norm": 0.46661117672920227, "learning_rate": 0.00041618568270443743, "loss": 1.7454, "step": 33671 }, { "epoch": 1.12, "grad_norm": 0.44735684990882874, "learning_rate": 0.0004161760462027833, "loss": 1.8539, "step": 33672 }, { "epoch": 1.12, "grad_norm": 0.45888379216194153, "learning_rate": 0.0004161664095601086, "loss": 1.703, "step": 33673 }, { "epoch": 1.12, "grad_norm": 0.4669485092163086, "learning_rate": 0.00041615677277642493, "loss": 1.7722, "step": 33674 }, { "epoch": 1.12, "grad_norm": 0.46889281272888184, "learning_rate": 0.00041614713585174415, "loss": 1.7971, "step": 33675 }, { "epoch": 1.12, "grad_norm": 0.4572336971759796, "learning_rate": 0.0004161374987860778, "loss": 1.7833, "step": 33676 }, { "epoch": 1.12, "grad_norm": 0.4674452841281891, "learning_rate": 0.00041612786157943765, "loss": 1.7232, "step": 33677 }, { "epoch": 1.12, "grad_norm": 0.4694483280181885, "learning_rate": 0.0004161182242318354, "loss": 1.8275, "step": 33678 }, { "epoch": 1.12, "grad_norm": 0.5073208808898926, "learning_rate": 0.0004161085867432827, "loss": 1.7501, "step": 33679 }, { "epoch": 1.12, "grad_norm": 0.4768276810646057, "learning_rate": 0.0004160989491137913, "loss": 1.785, "step": 33680 }, { "epoch": 1.12, "grad_norm": 0.46858999133110046, "learning_rate": 0.00041608931134337287, "loss": 1.8101, "step": 33681 }, { "epoch": 1.12, "grad_norm": 0.47006142139434814, "learning_rate": 0.00041607967343203916, "loss": 1.8329, "step": 33682 }, { "epoch": 1.12, "grad_norm": 0.4613446295261383, "learning_rate": 0.0004160700353798018, "loss": 1.8134, "step": 33683 }, { "epoch": 1.12, "grad_norm": 0.46591660380363464, "learning_rate": 0.0004160603971866726, "loss": 1.7883, "step": 33684 }, { "epoch": 1.12, "grad_norm": 0.44580966234207153, "learning_rate": 0.00041605075885266306, "loss": 1.7622, "step": 33685 }, { "epoch": 1.12, "grad_norm": 0.45511290431022644, "learning_rate": 0.0004160411203777851, "loss": 1.7432, "step": 33686 }, { "epoch": 1.12, "grad_norm": 0.47706910967826843, "learning_rate": 0.00041603148176205015, "loss": 1.7577, "step": 33687 }, { "epoch": 1.12, "grad_norm": 0.4556240439414978, "learning_rate": 0.0004160218430054702, "loss": 1.7931, "step": 33688 }, { "epoch": 1.12, "grad_norm": 0.45115625858306885, "learning_rate": 0.0004160122041080569, "loss": 1.762, "step": 33689 }, { "epoch": 1.12, "grad_norm": 0.4571586549282074, "learning_rate": 0.0004160025650698217, "loss": 1.7749, "step": 33690 }, { "epoch": 1.12, "grad_norm": 0.4780184328556061, "learning_rate": 0.00041599292589077666, "loss": 1.7862, "step": 33691 }, { "epoch": 1.12, "grad_norm": 0.4799198806285858, "learning_rate": 0.0004159832865709332, "loss": 1.8276, "step": 33692 }, { "epoch": 1.12, "grad_norm": 0.4666185677051544, "learning_rate": 0.0004159736471103031, "loss": 1.7972, "step": 33693 }, { "epoch": 1.12, "grad_norm": 0.49216601252555847, "learning_rate": 0.00041596400750889813, "loss": 1.7236, "step": 33694 }, { "epoch": 1.12, "grad_norm": 0.4830681085586548, "learning_rate": 0.0004159543677667299, "loss": 1.7655, "step": 33695 }, { "epoch": 1.12, "grad_norm": 0.4660278260707855, "learning_rate": 0.00041594472788381026, "loss": 1.86, "step": 33696 }, { "epoch": 1.12, "grad_norm": 0.4682242274284363, "learning_rate": 0.0004159350878601506, "loss": 1.7778, "step": 33697 }, { "epoch": 1.12, "grad_norm": 0.48401302099227905, "learning_rate": 0.000415925447695763, "loss": 1.8461, "step": 33698 }, { "epoch": 1.12, "grad_norm": 0.47614529728889465, "learning_rate": 0.00041591580739065893, "loss": 1.9014, "step": 33699 }, { "epoch": 1.12, "grad_norm": 0.47216543555259705, "learning_rate": 0.0004159061669448501, "loss": 1.8521, "step": 33700 }, { "epoch": 1.12, "grad_norm": 0.4704892039299011, "learning_rate": 0.00041589652635834836, "loss": 1.8177, "step": 33701 }, { "epoch": 1.12, "grad_norm": 0.4590107500553131, "learning_rate": 0.0004158868856311653, "loss": 1.7696, "step": 33702 }, { "epoch": 1.12, "grad_norm": 0.4504114091396332, "learning_rate": 0.0004158772447633126, "loss": 1.7933, "step": 33703 }, { "epoch": 1.12, "grad_norm": 0.4672994613647461, "learning_rate": 0.000415867603754802, "loss": 1.6897, "step": 33704 }, { "epoch": 1.12, "grad_norm": 0.4676761031150818, "learning_rate": 0.00041585796260564515, "loss": 1.7683, "step": 33705 }, { "epoch": 1.12, "grad_norm": 0.48249444365501404, "learning_rate": 0.00041584832131585383, "loss": 1.7973, "step": 33706 }, { "epoch": 1.12, "grad_norm": 0.44519495964050293, "learning_rate": 0.00041583867988543986, "loss": 1.7418, "step": 33707 }, { "epoch": 1.12, "grad_norm": 0.4544960856437683, "learning_rate": 0.0004158290383144146, "loss": 1.8952, "step": 33708 }, { "epoch": 1.12, "grad_norm": 0.4828138053417206, "learning_rate": 0.00041581939660279005, "loss": 1.811, "step": 33709 }, { "epoch": 1.12, "grad_norm": 0.49338895082473755, "learning_rate": 0.00041580975475057785, "loss": 1.8587, "step": 33710 }, { "epoch": 1.12, "grad_norm": 0.47351136803627014, "learning_rate": 0.0004158001127577896, "loss": 1.8071, "step": 33711 }, { "epoch": 1.12, "grad_norm": 0.4662822186946869, "learning_rate": 0.0004157904706244372, "loss": 1.8225, "step": 33712 }, { "epoch": 1.12, "grad_norm": 0.49998554587364197, "learning_rate": 0.00041578082835053203, "loss": 1.8173, "step": 33713 }, { "epoch": 1.12, "grad_norm": 0.4779564142227173, "learning_rate": 0.00041577118593608614, "loss": 1.7888, "step": 33714 }, { "epoch": 1.12, "grad_norm": 0.4703519642353058, "learning_rate": 0.000415761543381111, "loss": 1.7613, "step": 33715 }, { "epoch": 1.12, "grad_norm": 0.46998330950737, "learning_rate": 0.0004157519006856185, "loss": 1.8031, "step": 33716 }, { "epoch": 1.12, "grad_norm": 0.47844433784484863, "learning_rate": 0.00041574225784962016, "loss": 1.8247, "step": 33717 }, { "epoch": 1.12, "grad_norm": 0.47121113538742065, "learning_rate": 0.0004157326148731278, "loss": 1.8207, "step": 33718 }, { "epoch": 1.12, "grad_norm": 0.47063273191452026, "learning_rate": 0.0004157229717561532, "loss": 1.7749, "step": 33719 }, { "epoch": 1.12, "grad_norm": 0.46377187967300415, "learning_rate": 0.0004157133284987078, "loss": 1.7733, "step": 33720 }, { "epoch": 1.12, "grad_norm": 0.4600796699523926, "learning_rate": 0.0004157036851008036, "loss": 1.8275, "step": 33721 }, { "epoch": 1.12, "grad_norm": 0.46815386414527893, "learning_rate": 0.00041569404156245203, "loss": 1.7902, "step": 33722 }, { "epoch": 1.12, "grad_norm": 0.4543846845626831, "learning_rate": 0.0004156843978836651, "loss": 1.7849, "step": 33723 }, { "epoch": 1.12, "grad_norm": 0.4536032974720001, "learning_rate": 0.0004156747540644542, "loss": 1.8087, "step": 33724 }, { "epoch": 1.12, "grad_norm": 0.4557657837867737, "learning_rate": 0.0004156651101048313, "loss": 1.7962, "step": 33725 }, { "epoch": 1.12, "grad_norm": 0.4796977937221527, "learning_rate": 0.000415655466004808, "loss": 1.7688, "step": 33726 }, { "epoch": 1.12, "grad_norm": 0.46825218200683594, "learning_rate": 0.000415645821764396, "loss": 1.7836, "step": 33727 }, { "epoch": 1.12, "grad_norm": 0.455951988697052, "learning_rate": 0.0004156361773836069, "loss": 1.7465, "step": 33728 }, { "epoch": 1.12, "grad_norm": 0.4926755428314209, "learning_rate": 0.0004156265328624526, "loss": 1.849, "step": 33729 }, { "epoch": 1.12, "grad_norm": 0.44591960310935974, "learning_rate": 0.0004156168882009448, "loss": 1.8078, "step": 33730 }, { "epoch": 1.12, "grad_norm": 0.4675927460193634, "learning_rate": 0.000415607243399095, "loss": 1.7905, "step": 33731 }, { "epoch": 1.12, "grad_norm": 0.49189814925193787, "learning_rate": 0.00041559759845691523, "loss": 1.7895, "step": 33732 }, { "epoch": 1.12, "grad_norm": 0.44285640120506287, "learning_rate": 0.0004155879533744168, "loss": 1.7396, "step": 33733 }, { "epoch": 1.12, "grad_norm": 0.4569743871688843, "learning_rate": 0.0004155783081516117, "loss": 1.6953, "step": 33734 }, { "epoch": 1.12, "grad_norm": 0.4903384745121002, "learning_rate": 0.00041556866278851163, "loss": 1.7737, "step": 33735 }, { "epoch": 1.12, "grad_norm": 0.4859327971935272, "learning_rate": 0.00041555901728512816, "loss": 1.8789, "step": 33736 }, { "epoch": 1.12, "grad_norm": 0.4501798748970032, "learning_rate": 0.00041554937164147316, "loss": 1.7888, "step": 33737 }, { "epoch": 1.12, "grad_norm": 0.46571555733680725, "learning_rate": 0.00041553972585755807, "loss": 1.7896, "step": 33738 }, { "epoch": 1.12, "grad_norm": 0.48106980323791504, "learning_rate": 0.0004155300799333949, "loss": 1.8121, "step": 33739 }, { "epoch": 1.12, "grad_norm": 0.46466508507728577, "learning_rate": 0.0004155204338689952, "loss": 1.7605, "step": 33740 }, { "epoch": 1.12, "grad_norm": 0.4916285574436188, "learning_rate": 0.00041551078766437075, "loss": 1.7762, "step": 33741 }, { "epoch": 1.12, "grad_norm": 0.4373995065689087, "learning_rate": 0.0004155011413195332, "loss": 1.7772, "step": 33742 }, { "epoch": 1.12, "grad_norm": 0.45751920342445374, "learning_rate": 0.00041549149483449427, "loss": 1.7867, "step": 33743 }, { "epoch": 1.12, "grad_norm": 0.4635199308395386, "learning_rate": 0.0004154818482092657, "loss": 1.753, "step": 33744 }, { "epoch": 1.12, "grad_norm": 0.4833238124847412, "learning_rate": 0.00041547220144385916, "loss": 1.7621, "step": 33745 }, { "epoch": 1.12, "grad_norm": 0.4813365042209625, "learning_rate": 0.00041546255453828645, "loss": 1.7879, "step": 33746 }, { "epoch": 1.12, "grad_norm": 0.45297545194625854, "learning_rate": 0.00041545290749255915, "loss": 1.869, "step": 33747 }, { "epoch": 1.12, "grad_norm": 0.4518771469593048, "learning_rate": 0.00041544326030668906, "loss": 1.8098, "step": 33748 }, { "epoch": 1.12, "grad_norm": 0.4689631164073944, "learning_rate": 0.0004154336129806878, "loss": 1.719, "step": 33749 }, { "epoch": 1.12, "grad_norm": 0.45723089575767517, "learning_rate": 0.0004154239655145672, "loss": 1.7718, "step": 33750 }, { "epoch": 1.12, "grad_norm": 0.47200775146484375, "learning_rate": 0.000415414317908339, "loss": 1.794, "step": 33751 }, { "epoch": 1.12, "grad_norm": 0.46598583459854126, "learning_rate": 0.00041540467016201466, "loss": 1.8012, "step": 33752 }, { "epoch": 1.12, "grad_norm": 0.46402356028556824, "learning_rate": 0.0004153950222756061, "loss": 1.7365, "step": 33753 }, { "epoch": 1.12, "grad_norm": 0.47039923071861267, "learning_rate": 0.000415385374249125, "loss": 1.7829, "step": 33754 }, { "epoch": 1.12, "grad_norm": 0.47348809242248535, "learning_rate": 0.00041537572608258306, "loss": 1.7697, "step": 33755 }, { "epoch": 1.12, "grad_norm": 0.48169007897377014, "learning_rate": 0.000415366077775992, "loss": 1.8144, "step": 33756 }, { "epoch": 1.12, "grad_norm": 0.46640345454216003, "learning_rate": 0.00041535642932936354, "loss": 1.7755, "step": 33757 }, { "epoch": 1.12, "grad_norm": 0.46148741245269775, "learning_rate": 0.0004153467807427094, "loss": 1.7529, "step": 33758 }, { "epoch": 1.12, "grad_norm": 0.46171149611473083, "learning_rate": 0.0004153371320160411, "loss": 1.7654, "step": 33759 }, { "epoch": 1.12, "grad_norm": 0.47944188117980957, "learning_rate": 0.00041532748314937067, "loss": 1.8162, "step": 33760 }, { "epoch": 1.12, "grad_norm": 0.44686082005500793, "learning_rate": 0.0004153178341427096, "loss": 1.7665, "step": 33761 }, { "epoch": 1.12, "grad_norm": 0.4612600803375244, "learning_rate": 0.00041530818499606976, "loss": 1.7916, "step": 33762 }, { "epoch": 1.12, "grad_norm": 0.4471215009689331, "learning_rate": 0.0004152985357094626, "loss": 1.7758, "step": 33763 }, { "epoch": 1.12, "grad_norm": 0.44920021295547485, "learning_rate": 0.0004152888862829002, "loss": 1.7496, "step": 33764 }, { "epoch": 1.12, "grad_norm": 0.4591071307659149, "learning_rate": 0.00041527923671639394, "loss": 1.8249, "step": 33765 }, { "epoch": 1.12, "grad_norm": 0.4784632921218872, "learning_rate": 0.0004152695870099557, "loss": 1.8256, "step": 33766 }, { "epoch": 1.12, "grad_norm": 0.45718804001808167, "learning_rate": 0.00041525993716359723, "loss": 1.7854, "step": 33767 }, { "epoch": 1.12, "grad_norm": 0.4632011353969574, "learning_rate": 0.00041525028717733013, "loss": 1.7919, "step": 33768 }, { "epoch": 1.12, "grad_norm": 0.48047515749931335, "learning_rate": 0.0004152406370511662, "loss": 1.7342, "step": 33769 }, { "epoch": 1.12, "grad_norm": 0.4614366292953491, "learning_rate": 0.000415230986785117, "loss": 1.7023, "step": 33770 }, { "epoch": 1.12, "grad_norm": 0.46260878443717957, "learning_rate": 0.0004152213363791945, "loss": 1.839, "step": 33771 }, { "epoch": 1.12, "grad_norm": 0.46095171570777893, "learning_rate": 0.0004152116858334102, "loss": 1.755, "step": 33772 }, { "epoch": 1.12, "grad_norm": 0.4599405527114868, "learning_rate": 0.0004152020351477759, "loss": 1.7385, "step": 33773 }, { "epoch": 1.12, "grad_norm": 0.4799857437610626, "learning_rate": 0.0004151923843223034, "loss": 1.7259, "step": 33774 }, { "epoch": 1.12, "grad_norm": 0.47021594643592834, "learning_rate": 0.00041518273335700414, "loss": 1.7856, "step": 33775 }, { "epoch": 1.12, "grad_norm": 0.5046616196632385, "learning_rate": 0.0004151730822518901, "loss": 1.8642, "step": 33776 }, { "epoch": 1.12, "grad_norm": 0.47630611062049866, "learning_rate": 0.00041516343100697303, "loss": 1.8416, "step": 33777 }, { "epoch": 1.12, "grad_norm": 0.4614786207675934, "learning_rate": 0.00041515377962226434, "loss": 1.872, "step": 33778 }, { "epoch": 1.12, "grad_norm": 0.44839954376220703, "learning_rate": 0.0004151441280977759, "loss": 1.7742, "step": 33779 }, { "epoch": 1.12, "grad_norm": 0.4499245584011078, "learning_rate": 0.0004151344764335196, "loss": 1.8034, "step": 33780 }, { "epoch": 1.12, "grad_norm": 0.46663698554039, "learning_rate": 0.00041512482462950705, "loss": 1.7311, "step": 33781 }, { "epoch": 1.12, "grad_norm": 0.48339489102363586, "learning_rate": 0.0004151151726857498, "loss": 1.6963, "step": 33782 }, { "epoch": 1.12, "grad_norm": 0.48079395294189453, "learning_rate": 0.0004151055206022597, "loss": 1.7923, "step": 33783 }, { "epoch": 1.12, "grad_norm": 0.46135345101356506, "learning_rate": 0.0004150958683790485, "loss": 1.847, "step": 33784 }, { "epoch": 1.12, "grad_norm": 0.4661646783351898, "learning_rate": 0.00041508621601612784, "loss": 1.759, "step": 33785 }, { "epoch": 1.12, "grad_norm": 0.495750367641449, "learning_rate": 0.0004150765635135095, "loss": 1.7262, "step": 33786 }, { "epoch": 1.12, "grad_norm": 0.46873170137405396, "learning_rate": 0.0004150669108712052, "loss": 1.7433, "step": 33787 }, { "epoch": 1.12, "grad_norm": 0.46528545022010803, "learning_rate": 0.00041505725808922657, "loss": 1.7966, "step": 33788 }, { "epoch": 1.12, "grad_norm": 0.4606263339519501, "learning_rate": 0.00041504760516758535, "loss": 1.8495, "step": 33789 }, { "epoch": 1.12, "grad_norm": 0.45530372858047485, "learning_rate": 0.0004150379521062933, "loss": 1.726, "step": 33790 }, { "epoch": 1.12, "grad_norm": 0.4595576822757721, "learning_rate": 0.0004150282989053622, "loss": 1.7525, "step": 33791 }, { "epoch": 1.12, "grad_norm": 0.45381826162338257, "learning_rate": 0.00041501864556480367, "loss": 1.8073, "step": 33792 }, { "epoch": 1.12, "grad_norm": 0.4576069116592407, "learning_rate": 0.00041500899208462937, "loss": 1.7851, "step": 33793 }, { "epoch": 1.12, "grad_norm": 0.4565388560295105, "learning_rate": 0.00041499933846485124, "loss": 1.811, "step": 33794 }, { "epoch": 1.12, "grad_norm": 0.460258424282074, "learning_rate": 0.00041498968470548073, "loss": 1.7923, "step": 33795 }, { "epoch": 1.12, "grad_norm": 0.4957614243030548, "learning_rate": 0.00041498003080652976, "loss": 1.8236, "step": 33796 }, { "epoch": 1.12, "grad_norm": 0.4965939521789551, "learning_rate": 0.00041497037676800993, "loss": 1.8033, "step": 33797 }, { "epoch": 1.12, "grad_norm": 0.4937942922115326, "learning_rate": 0.00041496072258993306, "loss": 1.7844, "step": 33798 }, { "epoch": 1.12, "grad_norm": 0.46364545822143555, "learning_rate": 0.00041495106827231076, "loss": 1.8402, "step": 33799 }, { "epoch": 1.12, "grad_norm": 0.4734019339084625, "learning_rate": 0.0004149414138151548, "loss": 1.7458, "step": 33800 }, { "epoch": 1.12, "grad_norm": 0.4784141778945923, "learning_rate": 0.00041493175921847693, "loss": 1.7217, "step": 33801 }, { "epoch": 1.12, "grad_norm": 0.4664618670940399, "learning_rate": 0.0004149221044822888, "loss": 1.8479, "step": 33802 }, { "epoch": 1.12, "grad_norm": 0.4810188114643097, "learning_rate": 0.0004149124496066022, "loss": 1.7475, "step": 33803 }, { "epoch": 1.12, "grad_norm": 0.47870415449142456, "learning_rate": 0.0004149027945914288, "loss": 1.8402, "step": 33804 }, { "epoch": 1.12, "grad_norm": 0.48188233375549316, "learning_rate": 0.0004148931394367803, "loss": 1.7608, "step": 33805 }, { "epoch": 1.12, "grad_norm": 0.47064730525016785, "learning_rate": 0.0004148834841426686, "loss": 1.7377, "step": 33806 }, { "epoch": 1.12, "grad_norm": 0.4850623607635498, "learning_rate": 0.00041487382870910515, "loss": 1.824, "step": 33807 }, { "epoch": 1.12, "grad_norm": 0.48837047815322876, "learning_rate": 0.00041486417313610184, "loss": 1.8508, "step": 33808 }, { "epoch": 1.12, "grad_norm": 0.4833098351955414, "learning_rate": 0.0004148545174236703, "loss": 1.7639, "step": 33809 }, { "epoch": 1.12, "grad_norm": 0.4822118282318115, "learning_rate": 0.00041484486157182234, "loss": 1.8413, "step": 33810 }, { "epoch": 1.12, "grad_norm": 0.4638671875, "learning_rate": 0.00041483520558056967, "loss": 1.8755, "step": 33811 }, { "epoch": 1.12, "grad_norm": 0.4637434482574463, "learning_rate": 0.0004148255494499239, "loss": 1.8189, "step": 33812 }, { "epoch": 1.12, "grad_norm": 0.507948637008667, "learning_rate": 0.0004148158931798969, "loss": 1.7586, "step": 33813 }, { "epoch": 1.12, "grad_norm": 0.47931018471717834, "learning_rate": 0.0004148062367705004, "loss": 1.8838, "step": 33814 }, { "epoch": 1.13, "grad_norm": 0.48311302065849304, "learning_rate": 0.00041479658022174586, "loss": 1.8778, "step": 33815 }, { "epoch": 1.13, "grad_norm": 0.45364245772361755, "learning_rate": 0.00041478692353364533, "loss": 1.7756, "step": 33816 }, { "epoch": 1.13, "grad_norm": 0.48827993869781494, "learning_rate": 0.0004147772667062103, "loss": 1.7764, "step": 33817 }, { "epoch": 1.13, "grad_norm": 0.46291887760162354, "learning_rate": 0.0004147676097394526, "loss": 1.7923, "step": 33818 }, { "epoch": 1.13, "grad_norm": 0.46574702858924866, "learning_rate": 0.00041475795263338393, "loss": 1.7294, "step": 33819 }, { "epoch": 1.13, "grad_norm": 0.47399428486824036, "learning_rate": 0.000414748295388016, "loss": 1.8115, "step": 33820 }, { "epoch": 1.13, "grad_norm": 0.4521614611148834, "learning_rate": 0.0004147386380033606, "loss": 1.8536, "step": 33821 }, { "epoch": 1.13, "grad_norm": 0.4788002669811249, "learning_rate": 0.00041472898047942937, "loss": 1.8647, "step": 33822 }, { "epoch": 1.13, "grad_norm": 0.47599464654922485, "learning_rate": 0.00041471932281623406, "loss": 1.7638, "step": 33823 }, { "epoch": 1.13, "grad_norm": 0.4600544571876526, "learning_rate": 0.0004147096650137864, "loss": 1.8452, "step": 33824 }, { "epoch": 1.13, "grad_norm": 0.47361454367637634, "learning_rate": 0.0004147000070720981, "loss": 1.7408, "step": 33825 }, { "epoch": 1.13, "grad_norm": 0.4719579219818115, "learning_rate": 0.00041469034899118095, "loss": 1.7998, "step": 33826 }, { "epoch": 1.13, "grad_norm": 0.46552005410194397, "learning_rate": 0.00041468069077104654, "loss": 1.6976, "step": 33827 }, { "epoch": 1.13, "grad_norm": 0.4714987576007843, "learning_rate": 0.0004146710324117067, "loss": 1.7138, "step": 33828 }, { "epoch": 1.13, "grad_norm": 0.46821990609169006, "learning_rate": 0.00041466137391317314, "loss": 1.8022, "step": 33829 }, { "epoch": 1.13, "grad_norm": 0.47641459107398987, "learning_rate": 0.00041465171527545745, "loss": 1.8377, "step": 33830 }, { "epoch": 1.13, "grad_norm": 0.4671797454357147, "learning_rate": 0.0004146420564985716, "loss": 1.8347, "step": 33831 }, { "epoch": 1.13, "grad_norm": 0.4643592834472656, "learning_rate": 0.00041463239758252713, "loss": 1.7615, "step": 33832 }, { "epoch": 1.13, "grad_norm": 0.4873722791671753, "learning_rate": 0.0004146227385273359, "loss": 1.8279, "step": 33833 }, { "epoch": 1.13, "grad_norm": 0.4527049660682678, "learning_rate": 0.00041461307933300944, "loss": 1.7244, "step": 33834 }, { "epoch": 1.13, "grad_norm": 0.4731086790561676, "learning_rate": 0.0004146034199995597, "loss": 1.8615, "step": 33835 }, { "epoch": 1.13, "grad_norm": 0.4663504958152771, "learning_rate": 0.00041459376052699825, "loss": 1.7497, "step": 33836 }, { "epoch": 1.13, "grad_norm": 0.4959178864955902, "learning_rate": 0.00041458410091533676, "loss": 1.8132, "step": 33837 }, { "epoch": 1.13, "grad_norm": 0.4656086266040802, "learning_rate": 0.00041457444116458725, "loss": 1.7605, "step": 33838 }, { "epoch": 1.13, "grad_norm": 0.48505789041519165, "learning_rate": 0.00041456478127476106, "loss": 1.8144, "step": 33839 }, { "epoch": 1.13, "grad_norm": 0.4639755189418793, "learning_rate": 0.00041455512124587016, "loss": 1.8123, "step": 33840 }, { "epoch": 1.13, "grad_norm": 0.47480595111846924, "learning_rate": 0.00041454546107792627, "loss": 1.7572, "step": 33841 }, { "epoch": 1.13, "grad_norm": 0.4729654788970947, "learning_rate": 0.000414535800770941, "loss": 1.7669, "step": 33842 }, { "epoch": 1.13, "grad_norm": 0.4797765016555786, "learning_rate": 0.00041452614032492623, "loss": 1.7109, "step": 33843 }, { "epoch": 1.13, "grad_norm": 0.46638137102127075, "learning_rate": 0.0004145164797398936, "loss": 1.7714, "step": 33844 }, { "epoch": 1.13, "grad_norm": 0.4679146111011505, "learning_rate": 0.0004145068190158548, "loss": 1.7328, "step": 33845 }, { "epoch": 1.13, "grad_norm": 0.45949864387512207, "learning_rate": 0.00041449715815282157, "loss": 1.7821, "step": 33846 }, { "epoch": 1.13, "grad_norm": 0.4433799982070923, "learning_rate": 0.00041448749715080566, "loss": 1.8298, "step": 33847 }, { "epoch": 1.13, "grad_norm": 0.4631408452987671, "learning_rate": 0.00041447783600981884, "loss": 1.7316, "step": 33848 }, { "epoch": 1.13, "grad_norm": 0.48639461398124695, "learning_rate": 0.00041446817472987275, "loss": 1.849, "step": 33849 }, { "epoch": 1.13, "grad_norm": 0.4634988009929657, "learning_rate": 0.0004144585133109792, "loss": 1.7486, "step": 33850 }, { "epoch": 1.13, "grad_norm": 0.4531381130218506, "learning_rate": 0.0004144488517531499, "loss": 1.7778, "step": 33851 }, { "epoch": 1.13, "grad_norm": 0.4548742175102234, "learning_rate": 0.0004144391900563965, "loss": 1.7108, "step": 33852 }, { "epoch": 1.13, "grad_norm": 0.4606378972530365, "learning_rate": 0.0004144295282207308, "loss": 1.7736, "step": 33853 }, { "epoch": 1.13, "grad_norm": 0.46752870082855225, "learning_rate": 0.00041441986624616455, "loss": 1.8202, "step": 33854 }, { "epoch": 1.13, "grad_norm": 0.45694252848625183, "learning_rate": 0.00041441020413270936, "loss": 1.8215, "step": 33855 }, { "epoch": 1.13, "grad_norm": 0.4643915593624115, "learning_rate": 0.00041440054188037715, "loss": 1.766, "step": 33856 }, { "epoch": 1.13, "grad_norm": 0.4608295261859894, "learning_rate": 0.00041439087948917945, "loss": 1.7271, "step": 33857 }, { "epoch": 1.13, "grad_norm": 0.4555789530277252, "learning_rate": 0.0004143812169591281, "loss": 1.7712, "step": 33858 }, { "epoch": 1.13, "grad_norm": 0.46997231245040894, "learning_rate": 0.0004143715542902348, "loss": 1.8262, "step": 33859 }, { "epoch": 1.13, "grad_norm": 0.4495173990726471, "learning_rate": 0.0004143618914825113, "loss": 1.7684, "step": 33860 }, { "epoch": 1.13, "grad_norm": 0.4683947265148163, "learning_rate": 0.00041435222853596933, "loss": 1.7777, "step": 33861 }, { "epoch": 1.13, "grad_norm": 0.46711352467536926, "learning_rate": 0.0004143425654506205, "loss": 1.8238, "step": 33862 }, { "epoch": 1.13, "grad_norm": 0.44918933510780334, "learning_rate": 0.00041433290222647685, "loss": 1.7882, "step": 33863 }, { "epoch": 1.13, "grad_norm": 0.467207133769989, "learning_rate": 0.00041432323886354973, "loss": 1.7941, "step": 33864 }, { "epoch": 1.13, "grad_norm": 0.4636628031730652, "learning_rate": 0.0004143135753618511, "loss": 1.7612, "step": 33865 }, { "epoch": 1.13, "grad_norm": 0.4809785485267639, "learning_rate": 0.0004143039117213926, "loss": 1.8628, "step": 33866 }, { "epoch": 1.13, "grad_norm": 0.46716660261154175, "learning_rate": 0.000414294247942186, "loss": 1.7486, "step": 33867 }, { "epoch": 1.13, "grad_norm": 0.4748258590698242, "learning_rate": 0.0004142845840242431, "loss": 1.8326, "step": 33868 }, { "epoch": 1.13, "grad_norm": 0.49236270785331726, "learning_rate": 0.00041427491996757544, "loss": 1.7671, "step": 33869 }, { "epoch": 1.13, "grad_norm": 0.48588740825653076, "learning_rate": 0.00041426525577219496, "loss": 1.8139, "step": 33870 }, { "epoch": 1.13, "grad_norm": 0.45294812321662903, "learning_rate": 0.0004142555914381132, "loss": 1.827, "step": 33871 }, { "epoch": 1.13, "grad_norm": 0.46294865012168884, "learning_rate": 0.0004142459269653421, "loss": 1.837, "step": 33872 }, { "epoch": 1.13, "grad_norm": 0.4614567756652832, "learning_rate": 0.0004142362623538932, "loss": 1.7456, "step": 33873 }, { "epoch": 1.13, "grad_norm": 0.45496469736099243, "learning_rate": 0.0004142265976037783, "loss": 1.8155, "step": 33874 }, { "epoch": 1.13, "grad_norm": 0.4527323544025421, "learning_rate": 0.00041421693271500925, "loss": 1.7167, "step": 33875 }, { "epoch": 1.13, "grad_norm": 0.45672622323036194, "learning_rate": 0.00041420726768759755, "loss": 1.8958, "step": 33876 }, { "epoch": 1.13, "grad_norm": 0.46286019682884216, "learning_rate": 0.00041419760252155505, "loss": 1.8112, "step": 33877 }, { "epoch": 1.13, "grad_norm": 0.48014989495277405, "learning_rate": 0.0004141879372168936, "loss": 1.7246, "step": 33878 }, { "epoch": 1.13, "grad_norm": 0.4448373317718506, "learning_rate": 0.0004141782717736248, "loss": 1.7939, "step": 33879 }, { "epoch": 1.13, "grad_norm": 0.46705928444862366, "learning_rate": 0.0004141686061917603, "loss": 1.8282, "step": 33880 }, { "epoch": 1.13, "grad_norm": 0.4532615840435028, "learning_rate": 0.000414158940471312, "loss": 1.8265, "step": 33881 }, { "epoch": 1.13, "grad_norm": 0.47335681319236755, "learning_rate": 0.00041414927461229156, "loss": 1.8001, "step": 33882 }, { "epoch": 1.13, "grad_norm": 0.45753881335258484, "learning_rate": 0.00041413960861471066, "loss": 1.7799, "step": 33883 }, { "epoch": 1.13, "grad_norm": 0.485737144947052, "learning_rate": 0.00041412994247858124, "loss": 1.8091, "step": 33884 }, { "epoch": 1.13, "grad_norm": 0.48300889134407043, "learning_rate": 0.00041412027620391475, "loss": 1.8163, "step": 33885 }, { "epoch": 1.13, "grad_norm": 0.4614415168762207, "learning_rate": 0.0004141106097907231, "loss": 1.7864, "step": 33886 }, { "epoch": 1.13, "grad_norm": 0.4674611985683441, "learning_rate": 0.00041410094323901796, "loss": 1.7722, "step": 33887 }, { "epoch": 1.13, "grad_norm": 0.4606242775917053, "learning_rate": 0.0004140912765488112, "loss": 1.7728, "step": 33888 }, { "epoch": 1.13, "grad_norm": 0.47397246956825256, "learning_rate": 0.0004140816097201143, "loss": 1.8443, "step": 33889 }, { "epoch": 1.13, "grad_norm": 0.46750307083129883, "learning_rate": 0.0004140719427529392, "loss": 1.7512, "step": 33890 }, { "epoch": 1.13, "grad_norm": 0.47623202204704285, "learning_rate": 0.0004140622756472976, "loss": 1.8441, "step": 33891 }, { "epoch": 1.13, "grad_norm": 0.4580642879009247, "learning_rate": 0.00041405260840320107, "loss": 1.8331, "step": 33892 }, { "epoch": 1.13, "grad_norm": 0.45712223649024963, "learning_rate": 0.00041404294102066167, "loss": 1.8649, "step": 33893 }, { "epoch": 1.13, "grad_norm": 0.4520299434661865, "learning_rate": 0.0004140332734996908, "loss": 1.7797, "step": 33894 }, { "epoch": 1.13, "grad_norm": 0.4651743173599243, "learning_rate": 0.0004140236058403004, "loss": 1.772, "step": 33895 }, { "epoch": 1.13, "grad_norm": 0.4691954553127289, "learning_rate": 0.0004140139380425021, "loss": 1.7998, "step": 33896 }, { "epoch": 1.13, "grad_norm": 0.47988179326057434, "learning_rate": 0.0004140042701063077, "loss": 1.8209, "step": 33897 }, { "epoch": 1.13, "grad_norm": 0.4555119276046753, "learning_rate": 0.000413994602031729, "loss": 1.743, "step": 33898 }, { "epoch": 1.13, "grad_norm": 0.4526202380657196, "learning_rate": 0.0004139849338187775, "loss": 1.7838, "step": 33899 }, { "epoch": 1.13, "grad_norm": 0.4610229432582855, "learning_rate": 0.0004139752654674652, "loss": 1.7919, "step": 33900 }, { "epoch": 1.13, "grad_norm": 0.4698839485645294, "learning_rate": 0.00041396559697780364, "loss": 1.7624, "step": 33901 }, { "epoch": 1.13, "grad_norm": 0.4825228750705719, "learning_rate": 0.00041395592834980467, "loss": 1.7903, "step": 33902 }, { "epoch": 1.13, "grad_norm": 0.4623434841632843, "learning_rate": 0.00041394625958348, "loss": 1.8018, "step": 33903 }, { "epoch": 1.13, "grad_norm": 0.4564345180988312, "learning_rate": 0.00041393659067884134, "loss": 1.8263, "step": 33904 }, { "epoch": 1.13, "grad_norm": 0.47070521116256714, "learning_rate": 0.0004139269216359005, "loss": 1.8335, "step": 33905 }, { "epoch": 1.13, "grad_norm": 0.473294734954834, "learning_rate": 0.00041391725245466907, "loss": 1.7543, "step": 33906 }, { "epoch": 1.13, "grad_norm": 0.49468833208084106, "learning_rate": 0.00041390758313515895, "loss": 1.7811, "step": 33907 }, { "epoch": 1.13, "grad_norm": 0.4623708724975586, "learning_rate": 0.0004138979136773818, "loss": 1.746, "step": 33908 }, { "epoch": 1.13, "grad_norm": 0.4675092399120331, "learning_rate": 0.0004138882440813494, "loss": 1.8065, "step": 33909 }, { "epoch": 1.13, "grad_norm": 0.5133713483810425, "learning_rate": 0.0004138785743470734, "loss": 1.7339, "step": 33910 }, { "epoch": 1.13, "grad_norm": 0.4646872580051422, "learning_rate": 0.0004138689044745656, "loss": 1.8125, "step": 33911 }, { "epoch": 1.13, "grad_norm": 0.45632824301719666, "learning_rate": 0.00041385923446383776, "loss": 1.7655, "step": 33912 }, { "epoch": 1.13, "grad_norm": 0.4508344233036041, "learning_rate": 0.0004138495643149015, "loss": 1.7761, "step": 33913 }, { "epoch": 1.13, "grad_norm": 0.47072339057922363, "learning_rate": 0.0004138398940277687, "loss": 1.7366, "step": 33914 }, { "epoch": 1.13, "grad_norm": 0.45604315400123596, "learning_rate": 0.000413830223602451, "loss": 1.7454, "step": 33915 }, { "epoch": 1.13, "grad_norm": 0.4699276387691498, "learning_rate": 0.0004138205530389603, "loss": 1.8233, "step": 33916 }, { "epoch": 1.13, "grad_norm": 0.45031875371932983, "learning_rate": 0.00041381088233730806, "loss": 1.7804, "step": 33917 }, { "epoch": 1.13, "grad_norm": 0.4726608097553253, "learning_rate": 0.00041380121149750635, "loss": 1.7166, "step": 33918 }, { "epoch": 1.13, "grad_norm": 0.46174195408821106, "learning_rate": 0.0004137915405195666, "loss": 1.7965, "step": 33919 }, { "epoch": 1.13, "grad_norm": 0.45270541310310364, "learning_rate": 0.00041378186940350074, "loss": 1.7161, "step": 33920 }, { "epoch": 1.13, "grad_norm": 0.48159897327423096, "learning_rate": 0.00041377219814932043, "loss": 1.8655, "step": 33921 }, { "epoch": 1.13, "grad_norm": 0.4623766839504242, "learning_rate": 0.00041376252675703743, "loss": 1.7877, "step": 33922 }, { "epoch": 1.13, "grad_norm": 0.46525976061820984, "learning_rate": 0.0004137528552266635, "loss": 1.8107, "step": 33923 }, { "epoch": 1.13, "grad_norm": 0.4500577747821808, "learning_rate": 0.00041374318355821035, "loss": 1.7411, "step": 33924 }, { "epoch": 1.13, "grad_norm": 0.46497172117233276, "learning_rate": 0.0004137335117516898, "loss": 1.7941, "step": 33925 }, { "epoch": 1.13, "grad_norm": 0.48863816261291504, "learning_rate": 0.0004137238398071135, "loss": 1.7611, "step": 33926 }, { "epoch": 1.13, "grad_norm": 0.4657798409461975, "learning_rate": 0.0004137141677244931, "loss": 1.7471, "step": 33927 }, { "epoch": 1.13, "grad_norm": 0.4790477752685547, "learning_rate": 0.00041370449550384063, "loss": 1.7993, "step": 33928 }, { "epoch": 1.13, "grad_norm": 0.45407775044441223, "learning_rate": 0.0004136948231451676, "loss": 1.7801, "step": 33929 }, { "epoch": 1.13, "grad_norm": 0.47930046916007996, "learning_rate": 0.0004136851506484858, "loss": 1.7876, "step": 33930 }, { "epoch": 1.13, "grad_norm": 0.488791286945343, "learning_rate": 0.00041367547801380694, "loss": 1.745, "step": 33931 }, { "epoch": 1.13, "grad_norm": 0.45795702934265137, "learning_rate": 0.0004136658052411428, "loss": 1.8192, "step": 33932 }, { "epoch": 1.13, "grad_norm": 0.4838966429233551, "learning_rate": 0.00041365613233050515, "loss": 1.7608, "step": 33933 }, { "epoch": 1.13, "grad_norm": 0.47227779030799866, "learning_rate": 0.00041364645928190574, "loss": 1.7753, "step": 33934 }, { "epoch": 1.13, "grad_norm": 0.4787869155406952, "learning_rate": 0.0004136367860953562, "loss": 1.7964, "step": 33935 }, { "epoch": 1.13, "grad_norm": 0.48473039269447327, "learning_rate": 0.0004136271127708684, "loss": 1.7681, "step": 33936 }, { "epoch": 1.13, "grad_norm": 0.46011027693748474, "learning_rate": 0.00041361743930845395, "loss": 1.7777, "step": 33937 }, { "epoch": 1.13, "grad_norm": 0.4586088955402374, "learning_rate": 0.00041360776570812475, "loss": 1.7831, "step": 33938 }, { "epoch": 1.13, "grad_norm": 0.4718489646911621, "learning_rate": 0.0004135980919698924, "loss": 1.7692, "step": 33939 }, { "epoch": 1.13, "grad_norm": 0.4622741639614105, "learning_rate": 0.0004135884180937687, "loss": 1.808, "step": 33940 }, { "epoch": 1.13, "grad_norm": 0.47155433893203735, "learning_rate": 0.0004135787440797655, "loss": 1.6892, "step": 33941 }, { "epoch": 1.13, "grad_norm": 0.4887574017047882, "learning_rate": 0.00041356906992789434, "loss": 1.8613, "step": 33942 }, { "epoch": 1.13, "grad_norm": 0.46655258536338806, "learning_rate": 0.0004135593956381671, "loss": 1.8037, "step": 33943 }, { "epoch": 1.13, "grad_norm": 0.47361108660697937, "learning_rate": 0.0004135497212105955, "loss": 1.6797, "step": 33944 }, { "epoch": 1.13, "grad_norm": 0.5037752389907837, "learning_rate": 0.00041354004664519124, "loss": 1.7503, "step": 33945 }, { "epoch": 1.13, "grad_norm": 0.4791811406612396, "learning_rate": 0.00041353037194196617, "loss": 1.8172, "step": 33946 }, { "epoch": 1.13, "grad_norm": 0.4719120264053345, "learning_rate": 0.0004135206971009318, "loss": 1.8216, "step": 33947 }, { "epoch": 1.13, "grad_norm": 0.48783743381500244, "learning_rate": 0.00041351102212210014, "loss": 1.8387, "step": 33948 }, { "epoch": 1.13, "grad_norm": 0.4752320945262909, "learning_rate": 0.00041350134700548276, "loss": 1.7147, "step": 33949 }, { "epoch": 1.13, "grad_norm": 0.43868789076805115, "learning_rate": 0.00041349167175109154, "loss": 1.7997, "step": 33950 }, { "epoch": 1.13, "grad_norm": 0.4719988703727722, "learning_rate": 0.00041348199635893806, "loss": 1.8567, "step": 33951 }, { "epoch": 1.13, "grad_norm": 0.4678441286087036, "learning_rate": 0.00041347232082903425, "loss": 1.7797, "step": 33952 }, { "epoch": 1.13, "grad_norm": 0.49288439750671387, "learning_rate": 0.0004134626451613918, "loss": 1.7823, "step": 33953 }, { "epoch": 1.13, "grad_norm": 0.4619236886501312, "learning_rate": 0.00041345296935602223, "loss": 1.8292, "step": 33954 }, { "epoch": 1.13, "grad_norm": 0.46404796838760376, "learning_rate": 0.0004134432934129377, "loss": 1.7639, "step": 33955 }, { "epoch": 1.13, "grad_norm": 0.4811352491378784, "learning_rate": 0.0004134336173321496, "loss": 1.8016, "step": 33956 }, { "epoch": 1.13, "grad_norm": 0.47344598174095154, "learning_rate": 0.0004134239411136698, "loss": 1.766, "step": 33957 }, { "epoch": 1.13, "grad_norm": 0.46251875162124634, "learning_rate": 0.00041341426475751005, "loss": 1.7842, "step": 33958 }, { "epoch": 1.13, "grad_norm": 0.4657014012336731, "learning_rate": 0.0004134045882636821, "loss": 1.8338, "step": 33959 }, { "epoch": 1.13, "grad_norm": 0.4925098717212677, "learning_rate": 0.00041339491163219777, "loss": 1.788, "step": 33960 }, { "epoch": 1.13, "grad_norm": 0.501277506351471, "learning_rate": 0.0004133852348630686, "loss": 1.8021, "step": 33961 }, { "epoch": 1.13, "grad_norm": 0.4805140793323517, "learning_rate": 0.00041337555795630654, "loss": 1.717, "step": 33962 }, { "epoch": 1.13, "grad_norm": 0.47921299934387207, "learning_rate": 0.0004133658809119232, "loss": 1.7089, "step": 33963 }, { "epoch": 1.13, "grad_norm": 0.48503074049949646, "learning_rate": 0.0004133562037299304, "loss": 1.8098, "step": 33964 }, { "epoch": 1.13, "grad_norm": 0.4879165589809418, "learning_rate": 0.0004133465264103399, "loss": 1.8034, "step": 33965 }, { "epoch": 1.13, "grad_norm": 0.46608757972717285, "learning_rate": 0.0004133368489531635, "loss": 1.7881, "step": 33966 }, { "epoch": 1.13, "grad_norm": 0.46717900037765503, "learning_rate": 0.0004133271713584127, "loss": 1.8557, "step": 33967 }, { "epoch": 1.13, "grad_norm": 0.48459702730178833, "learning_rate": 0.00041331749362609954, "loss": 1.7789, "step": 33968 }, { "epoch": 1.13, "grad_norm": 0.47652187943458557, "learning_rate": 0.00041330781575623553, "loss": 1.7978, "step": 33969 }, { "epoch": 1.13, "grad_norm": 0.46823805570602417, "learning_rate": 0.0004132981377488326, "loss": 1.7853, "step": 33970 }, { "epoch": 1.13, "grad_norm": 0.4584573805332184, "learning_rate": 0.00041328845960390245, "loss": 1.8226, "step": 33971 }, { "epoch": 1.13, "grad_norm": 0.47164055705070496, "learning_rate": 0.00041327878132145664, "loss": 1.8071, "step": 33972 }, { "epoch": 1.13, "grad_norm": 0.4641748368740082, "learning_rate": 0.00041326910290150726, "loss": 1.8585, "step": 33973 }, { "epoch": 1.13, "grad_norm": 0.5845109224319458, "learning_rate": 0.0004132594243440658, "loss": 1.7534, "step": 33974 }, { "epoch": 1.13, "grad_norm": 0.46026256680488586, "learning_rate": 0.00041324974564914413, "loss": 1.7379, "step": 33975 }, { "epoch": 1.13, "grad_norm": 0.4760984480381012, "learning_rate": 0.0004132400668167539, "loss": 1.7609, "step": 33976 }, { "epoch": 1.13, "grad_norm": 0.45442628860473633, "learning_rate": 0.00041323038784690693, "loss": 1.7706, "step": 33977 }, { "epoch": 1.13, "grad_norm": 0.48505961894989014, "learning_rate": 0.000413220708739615, "loss": 1.7342, "step": 33978 }, { "epoch": 1.13, "grad_norm": 0.4901897609233856, "learning_rate": 0.0004132110294948897, "loss": 1.7706, "step": 33979 }, { "epoch": 1.13, "grad_norm": 0.4731690585613251, "learning_rate": 0.000413201350112743, "loss": 1.7606, "step": 33980 }, { "epoch": 1.13, "grad_norm": 0.47893956303596497, "learning_rate": 0.00041319167059318654, "loss": 1.7631, "step": 33981 }, { "epoch": 1.13, "grad_norm": 0.4798395037651062, "learning_rate": 0.00041318199093623204, "loss": 1.8026, "step": 33982 }, { "epoch": 1.13, "grad_norm": 0.47444894909858704, "learning_rate": 0.0004131723111418913, "loss": 1.7637, "step": 33983 }, { "epoch": 1.13, "grad_norm": 0.45260554552078247, "learning_rate": 0.000413162631210176, "loss": 1.8898, "step": 33984 }, { "epoch": 1.13, "grad_norm": 0.46170857548713684, "learning_rate": 0.000413152951141098, "loss": 1.8261, "step": 33985 }, { "epoch": 1.13, "grad_norm": 0.47423282265663147, "learning_rate": 0.00041314327093466894, "loss": 1.8073, "step": 33986 }, { "epoch": 1.13, "grad_norm": 0.4745428264141083, "learning_rate": 0.0004131335905909006, "loss": 1.7344, "step": 33987 }, { "epoch": 1.13, "grad_norm": 0.4582458436489105, "learning_rate": 0.0004131239101098048, "loss": 1.7954, "step": 33988 }, { "epoch": 1.13, "grad_norm": 0.46472159028053284, "learning_rate": 0.0004131142294913932, "loss": 1.8365, "step": 33989 }, { "epoch": 1.13, "grad_norm": 0.45750924944877625, "learning_rate": 0.00041310454873567763, "loss": 1.7864, "step": 33990 }, { "epoch": 1.13, "grad_norm": 0.47086673974990845, "learning_rate": 0.0004130948678426699, "loss": 1.7085, "step": 33991 }, { "epoch": 1.13, "grad_norm": 0.4580390751361847, "learning_rate": 0.0004130851868123815, "loss": 1.7654, "step": 33992 }, { "epoch": 1.13, "grad_norm": 0.48236894607543945, "learning_rate": 0.0004130755056448244, "loss": 1.7592, "step": 33993 }, { "epoch": 1.13, "grad_norm": 0.4687132239341736, "learning_rate": 0.00041306582434001026, "loss": 1.815, "step": 33994 }, { "epoch": 1.13, "grad_norm": 0.5017490983009338, "learning_rate": 0.00041305614289795093, "loss": 1.8145, "step": 33995 }, { "epoch": 1.13, "grad_norm": 0.46240872144699097, "learning_rate": 0.00041304646131865816, "loss": 1.7346, "step": 33996 }, { "epoch": 1.13, "grad_norm": 0.4653514325618744, "learning_rate": 0.00041303677960214346, "loss": 1.6519, "step": 33997 }, { "epoch": 1.13, "grad_norm": 0.4759041965007782, "learning_rate": 0.0004130270977484189, "loss": 1.7871, "step": 33998 }, { "epoch": 1.13, "grad_norm": 0.4635753333568573, "learning_rate": 0.00041301741575749604, "loss": 1.7344, "step": 33999 }, { "epoch": 1.13, "grad_norm": 0.49405437707901, "learning_rate": 0.00041300773362938674, "loss": 1.8542, "step": 34000 }, { "epoch": 1.13, "grad_norm": 0.4749779999256134, "learning_rate": 0.00041299805136410273, "loss": 1.8152, "step": 34001 }, { "epoch": 1.13, "grad_norm": 0.49298378825187683, "learning_rate": 0.0004129883689616556, "loss": 1.7961, "step": 34002 }, { "epoch": 1.13, "grad_norm": 0.5007870197296143, "learning_rate": 0.00041297868642205737, "loss": 1.7349, "step": 34003 }, { "epoch": 1.13, "grad_norm": 0.4773029685020447, "learning_rate": 0.0004129690037453195, "loss": 1.7977, "step": 34004 }, { "epoch": 1.13, "grad_norm": 0.46790948510169983, "learning_rate": 0.0004129593209314541, "loss": 1.842, "step": 34005 }, { "epoch": 1.13, "grad_norm": 0.49086371064186096, "learning_rate": 0.0004129496379804726, "loss": 1.8853, "step": 34006 }, { "epoch": 1.13, "grad_norm": 0.46333229541778564, "learning_rate": 0.0004129399548923869, "loss": 1.7869, "step": 34007 }, { "epoch": 1.13, "grad_norm": 0.47333958745002747, "learning_rate": 0.00041293027166720885, "loss": 1.8294, "step": 34008 }, { "epoch": 1.13, "grad_norm": 0.4843786358833313, "learning_rate": 0.00041292058830494995, "loss": 1.8255, "step": 34009 }, { "epoch": 1.13, "grad_norm": 0.459555447101593, "learning_rate": 0.00041291090480562214, "loss": 1.818, "step": 34010 }, { "epoch": 1.13, "grad_norm": 0.48074203729629517, "learning_rate": 0.00041290122116923714, "loss": 1.8287, "step": 34011 }, { "epoch": 1.13, "grad_norm": 0.49561285972595215, "learning_rate": 0.00041289153739580665, "loss": 1.7528, "step": 34012 }, { "epoch": 1.13, "grad_norm": 0.4966972768306732, "learning_rate": 0.00041288185348534255, "loss": 1.6163, "step": 34013 }, { "epoch": 1.13, "grad_norm": 0.46879804134368896, "learning_rate": 0.00041287216943785643, "loss": 1.745, "step": 34014 }, { "epoch": 1.13, "grad_norm": 0.47948694229125977, "learning_rate": 0.00041286248525336023, "loss": 1.7784, "step": 34015 }, { "epoch": 1.13, "grad_norm": 0.5333284735679626, "learning_rate": 0.0004128528009318655, "loss": 1.7299, "step": 34016 }, { "epoch": 1.13, "grad_norm": 0.4955080449581146, "learning_rate": 0.00041284311647338414, "loss": 1.7616, "step": 34017 }, { "epoch": 1.13, "grad_norm": 0.45025911927223206, "learning_rate": 0.0004128334318779278, "loss": 1.8153, "step": 34018 }, { "epoch": 1.13, "grad_norm": 0.46460333466529846, "learning_rate": 0.0004128237471455083, "loss": 1.8, "step": 34019 }, { "epoch": 1.13, "grad_norm": 0.47760751843452454, "learning_rate": 0.00041281406227613754, "loss": 1.7406, "step": 34020 }, { "epoch": 1.13, "grad_norm": 0.5013699531555176, "learning_rate": 0.00041280437726982705, "loss": 1.8291, "step": 34021 }, { "epoch": 1.13, "grad_norm": 0.4891539514064789, "learning_rate": 0.00041279469212658865, "loss": 1.7485, "step": 34022 }, { "epoch": 1.13, "grad_norm": 0.47389984130859375, "learning_rate": 0.0004127850068464341, "loss": 1.7737, "step": 34023 }, { "epoch": 1.13, "grad_norm": 0.44606128334999084, "learning_rate": 0.00041277532142937524, "loss": 1.7441, "step": 34024 }, { "epoch": 1.13, "grad_norm": 0.47955718636512756, "learning_rate": 0.0004127656358754236, "loss": 1.7974, "step": 34025 }, { "epoch": 1.13, "grad_norm": 0.5048933029174805, "learning_rate": 0.00041275595018459133, "loss": 1.8896, "step": 34026 }, { "epoch": 1.13, "grad_norm": 0.4917961061000824, "learning_rate": 0.0004127462643568897, "loss": 1.8147, "step": 34027 }, { "epoch": 1.13, "grad_norm": 0.46243521571159363, "learning_rate": 0.00041273657839233086, "loss": 1.7301, "step": 34028 }, { "epoch": 1.13, "grad_norm": 0.4903363287448883, "learning_rate": 0.00041272689229092645, "loss": 1.7901, "step": 34029 }, { "epoch": 1.13, "grad_norm": 0.5057998895645142, "learning_rate": 0.00041271720605268815, "loss": 1.9375, "step": 34030 }, { "epoch": 1.13, "grad_norm": 0.48799073696136475, "learning_rate": 0.0004127075196776277, "loss": 1.8285, "step": 34031 }, { "epoch": 1.13, "grad_norm": 0.46216896176338196, "learning_rate": 0.00041269783316575704, "loss": 1.8407, "step": 34032 }, { "epoch": 1.13, "grad_norm": 0.6399258971214294, "learning_rate": 0.00041268814651708777, "loss": 1.7936, "step": 34033 }, { "epoch": 1.13, "grad_norm": 0.47942256927490234, "learning_rate": 0.0004126784597316317, "loss": 1.7845, "step": 34034 }, { "epoch": 1.13, "grad_norm": 0.49794769287109375, "learning_rate": 0.0004126687728094006, "loss": 1.8245, "step": 34035 }, { "epoch": 1.13, "grad_norm": 0.4614395797252655, "learning_rate": 0.0004126590857504062, "loss": 1.827, "step": 34036 }, { "epoch": 1.13, "grad_norm": 0.4562225639820099, "learning_rate": 0.00041264939855466026, "loss": 1.8358, "step": 34037 }, { "epoch": 1.13, "grad_norm": 0.46492016315460205, "learning_rate": 0.00041263971122217454, "loss": 1.8033, "step": 34038 }, { "epoch": 1.13, "grad_norm": 0.47680196166038513, "learning_rate": 0.0004126300237529608, "loss": 1.8225, "step": 34039 }, { "epoch": 1.13, "grad_norm": 0.466590017080307, "learning_rate": 0.0004126203361470309, "loss": 1.7763, "step": 34040 }, { "epoch": 1.13, "grad_norm": 0.47058650851249695, "learning_rate": 0.0004126106484043964, "loss": 1.7587, "step": 34041 }, { "epoch": 1.13, "grad_norm": 0.47138074040412903, "learning_rate": 0.00041260096052506915, "loss": 1.8073, "step": 34042 }, { "epoch": 1.13, "grad_norm": 0.46195167303085327, "learning_rate": 0.000412591272509061, "loss": 1.7681, "step": 34043 }, { "epoch": 1.13, "grad_norm": 0.45110955834388733, "learning_rate": 0.0004125815843563836, "loss": 1.8476, "step": 34044 }, { "epoch": 1.13, "grad_norm": 0.46871790289878845, "learning_rate": 0.0004125718960670487, "loss": 1.7506, "step": 34045 }, { "epoch": 1.13, "grad_norm": 0.45786401629447937, "learning_rate": 0.0004125622076410682, "loss": 1.7904, "step": 34046 }, { "epoch": 1.13, "grad_norm": 0.4792056977748871, "learning_rate": 0.0004125525190784538, "loss": 1.8013, "step": 34047 }, { "epoch": 1.13, "grad_norm": 0.45631521940231323, "learning_rate": 0.0004125428303792171, "loss": 1.8441, "step": 34048 }, { "epoch": 1.13, "grad_norm": 0.4713456332683563, "learning_rate": 0.00041253314154337, "loss": 1.7587, "step": 34049 }, { "epoch": 1.13, "grad_norm": 0.4700247645378113, "learning_rate": 0.0004125234525709243, "loss": 1.888, "step": 34050 }, { "epoch": 1.13, "grad_norm": 0.46679139137268066, "learning_rate": 0.0004125137634618917, "loss": 1.7739, "step": 34051 }, { "epoch": 1.13, "grad_norm": 0.4882012605667114, "learning_rate": 0.0004125040742162839, "loss": 1.7983, "step": 34052 }, { "epoch": 1.13, "grad_norm": 0.45550861954689026, "learning_rate": 0.0004124943848341129, "loss": 1.8108, "step": 34053 }, { "epoch": 1.13, "grad_norm": 0.4496701955795288, "learning_rate": 0.00041248469531539013, "loss": 1.7777, "step": 34054 }, { "epoch": 1.13, "grad_norm": 0.480889230966568, "learning_rate": 0.00041247500566012757, "loss": 1.7782, "step": 34055 }, { "epoch": 1.13, "grad_norm": 0.4813400208950043, "learning_rate": 0.0004124653158683369, "loss": 1.8653, "step": 34056 }, { "epoch": 1.13, "grad_norm": 0.46579480171203613, "learning_rate": 0.0004124556259400299, "loss": 1.8053, "step": 34057 }, { "epoch": 1.13, "grad_norm": 0.44652724266052246, "learning_rate": 0.0004124459358752184, "loss": 1.7643, "step": 34058 }, { "epoch": 1.13, "grad_norm": 0.47635379433631897, "learning_rate": 0.000412436245673914, "loss": 1.7895, "step": 34059 }, { "epoch": 1.13, "grad_norm": 0.46819326281547546, "learning_rate": 0.00041242655533612867, "loss": 1.7435, "step": 34060 }, { "epoch": 1.13, "grad_norm": 0.45309826731681824, "learning_rate": 0.00041241686486187404, "loss": 1.7428, "step": 34061 }, { "epoch": 1.13, "grad_norm": 0.4776133894920349, "learning_rate": 0.00041240717425116195, "loss": 1.8203, "step": 34062 }, { "epoch": 1.13, "grad_norm": 0.47531622648239136, "learning_rate": 0.00041239748350400403, "loss": 1.8477, "step": 34063 }, { "epoch": 1.13, "grad_norm": 0.4747063219547272, "learning_rate": 0.00041238779262041213, "loss": 1.7926, "step": 34064 }, { "epoch": 1.13, "grad_norm": 0.4802078306674957, "learning_rate": 0.00041237810160039805, "loss": 1.7796, "step": 34065 }, { "epoch": 1.13, "grad_norm": 0.47240567207336426, "learning_rate": 0.00041236841044397344, "loss": 1.7697, "step": 34066 }, { "epoch": 1.13, "grad_norm": 0.7445801496505737, "learning_rate": 0.0004123587191511502, "loss": 1.8081, "step": 34067 }, { "epoch": 1.13, "grad_norm": 0.46878868341445923, "learning_rate": 0.00041234902772194004, "loss": 1.7693, "step": 34068 }, { "epoch": 1.13, "grad_norm": 0.4764324128627777, "learning_rate": 0.00041233933615635466, "loss": 1.7408, "step": 34069 }, { "epoch": 1.13, "grad_norm": 0.46363797783851624, "learning_rate": 0.00041232964445440603, "loss": 1.7274, "step": 34070 }, { "epoch": 1.13, "grad_norm": 0.510197103023529, "learning_rate": 0.0004123199526161055, "loss": 1.7585, "step": 34071 }, { "epoch": 1.13, "grad_norm": 0.48035749793052673, "learning_rate": 0.0004123102606414653, "loss": 1.7993, "step": 34072 }, { "epoch": 1.13, "grad_norm": 0.4679182767868042, "learning_rate": 0.0004123005685304969, "loss": 1.7844, "step": 34073 }, { "epoch": 1.13, "grad_norm": 0.46593520045280457, "learning_rate": 0.00041229087628321217, "loss": 1.8007, "step": 34074 }, { "epoch": 1.13, "grad_norm": 0.46268191933631897, "learning_rate": 0.0004122811838996229, "loss": 1.7939, "step": 34075 }, { "epoch": 1.13, "grad_norm": 0.474961519241333, "learning_rate": 0.00041227149137974085, "loss": 1.9109, "step": 34076 }, { "epoch": 1.13, "grad_norm": 0.4599885642528534, "learning_rate": 0.00041226179872357765, "loss": 1.7945, "step": 34077 }, { "epoch": 1.13, "grad_norm": 0.45528706908226013, "learning_rate": 0.00041225210593114525, "loss": 1.7626, "step": 34078 }, { "epoch": 1.13, "grad_norm": 0.4755527973175049, "learning_rate": 0.0004122424130024553, "loss": 1.7525, "step": 34079 }, { "epoch": 1.13, "grad_norm": 0.4840211570262909, "learning_rate": 0.00041223271993751955, "loss": 1.7708, "step": 34080 }, { "epoch": 1.13, "grad_norm": 0.4457071125507355, "learning_rate": 0.00041222302673634987, "loss": 1.819, "step": 34081 }, { "epoch": 1.13, "grad_norm": 0.4797723889350891, "learning_rate": 0.000412213333398958, "loss": 1.7978, "step": 34082 }, { "epoch": 1.13, "grad_norm": 0.476031094789505, "learning_rate": 0.00041220363992535564, "loss": 1.7749, "step": 34083 }, { "epoch": 1.13, "grad_norm": 0.47284504771232605, "learning_rate": 0.0004121939463155546, "loss": 1.7989, "step": 34084 }, { "epoch": 1.13, "grad_norm": 0.44772884249687195, "learning_rate": 0.0004121842525695666, "loss": 1.7735, "step": 34085 }, { "epoch": 1.13, "grad_norm": 0.48163118958473206, "learning_rate": 0.0004121745586874035, "loss": 1.7295, "step": 34086 }, { "epoch": 1.13, "grad_norm": 0.47640636563301086, "learning_rate": 0.00041216486466907697, "loss": 1.7528, "step": 34087 }, { "epoch": 1.13, "grad_norm": 0.4846465587615967, "learning_rate": 0.00041215517051459895, "loss": 1.7168, "step": 34088 }, { "epoch": 1.13, "grad_norm": 0.47013962268829346, "learning_rate": 0.0004121454762239808, "loss": 1.8251, "step": 34089 }, { "epoch": 1.13, "grad_norm": 0.46822598576545715, "learning_rate": 0.00041213578179723486, "loss": 1.7715, "step": 34090 }, { "epoch": 1.13, "grad_norm": 0.4637607932090759, "learning_rate": 0.0004121260872343724, "loss": 1.7161, "step": 34091 }, { "epoch": 1.13, "grad_norm": 0.4749046564102173, "learning_rate": 0.0004121163925354055, "loss": 1.7306, "step": 34092 }, { "epoch": 1.13, "grad_norm": 0.46809208393096924, "learning_rate": 0.0004121066977003458, "loss": 1.7601, "step": 34093 }, { "epoch": 1.13, "grad_norm": 0.4761075973510742, "learning_rate": 0.0004120970027292051, "loss": 1.7596, "step": 34094 }, { "epoch": 1.13, "grad_norm": 0.4793853163719177, "learning_rate": 0.00041208730762199515, "loss": 1.7914, "step": 34095 }, { "epoch": 1.13, "grad_norm": 0.45804622769355774, "learning_rate": 0.00041207761237872765, "loss": 1.8243, "step": 34096 }, { "epoch": 1.13, "grad_norm": 0.47026780247688293, "learning_rate": 0.0004120679169994146, "loss": 1.779, "step": 34097 }, { "epoch": 1.13, "grad_norm": 0.4627079963684082, "learning_rate": 0.0004120582214840675, "loss": 1.7814, "step": 34098 }, { "epoch": 1.13, "grad_norm": 0.4714986979961395, "learning_rate": 0.00041204852583269817, "loss": 1.7459, "step": 34099 }, { "epoch": 1.13, "grad_norm": 0.47866091132164, "learning_rate": 0.00041203883004531854, "loss": 1.8071, "step": 34100 }, { "epoch": 1.13, "grad_norm": 0.4632262885570526, "learning_rate": 0.00041202913412194023, "loss": 1.8233, "step": 34101 }, { "epoch": 1.13, "grad_norm": 0.4941192865371704, "learning_rate": 0.00041201943806257516, "loss": 1.7348, "step": 34102 }, { "epoch": 1.13, "grad_norm": 0.4660271108150482, "learning_rate": 0.00041200974186723487, "loss": 1.7067, "step": 34103 }, { "epoch": 1.13, "grad_norm": 0.46988725662231445, "learning_rate": 0.0004120000455359313, "loss": 1.7866, "step": 34104 }, { "epoch": 1.13, "grad_norm": 0.462736576795578, "learning_rate": 0.0004119903490686762, "loss": 1.7347, "step": 34105 }, { "epoch": 1.13, "grad_norm": 0.45787355303764343, "learning_rate": 0.00041198065246548127, "loss": 1.8364, "step": 34106 }, { "epoch": 1.13, "grad_norm": 0.45710086822509766, "learning_rate": 0.00041197095572635835, "loss": 1.7635, "step": 34107 }, { "epoch": 1.13, "grad_norm": 0.47737205028533936, "learning_rate": 0.0004119612588513192, "loss": 1.7985, "step": 34108 }, { "epoch": 1.13, "grad_norm": 0.4837454855442047, "learning_rate": 0.0004119515618403756, "loss": 1.817, "step": 34109 }, { "epoch": 1.13, "grad_norm": 0.4756481945514679, "learning_rate": 0.0004119418646935393, "loss": 1.8027, "step": 34110 }, { "epoch": 1.13, "grad_norm": 0.4671976864337921, "learning_rate": 0.00041193216741082197, "loss": 1.7949, "step": 34111 }, { "epoch": 1.13, "grad_norm": 0.4682435095310211, "learning_rate": 0.00041192246999223556, "loss": 1.8667, "step": 34112 }, { "epoch": 1.13, "grad_norm": 0.47443726658821106, "learning_rate": 0.00041191277243779174, "loss": 1.7579, "step": 34113 }, { "epoch": 1.13, "grad_norm": 0.4639751613140106, "learning_rate": 0.0004119030747475023, "loss": 1.7606, "step": 34114 }, { "epoch": 1.14, "grad_norm": 0.46861907839775085, "learning_rate": 0.00041189337692137907, "loss": 1.8124, "step": 34115 }, { "epoch": 1.14, "grad_norm": 0.4718416929244995, "learning_rate": 0.00041188367895943375, "loss": 1.8551, "step": 34116 }, { "epoch": 1.14, "grad_norm": 0.48500576615333557, "learning_rate": 0.00041187398086167806, "loss": 1.8171, "step": 34117 }, { "epoch": 1.14, "grad_norm": 0.4719643294811249, "learning_rate": 0.000411864282628124, "loss": 1.7911, "step": 34118 }, { "epoch": 1.14, "grad_norm": 0.4586019217967987, "learning_rate": 0.000411854584258783, "loss": 1.785, "step": 34119 }, { "epoch": 1.14, "grad_norm": 0.46652594208717346, "learning_rate": 0.00041184488575366716, "loss": 1.7775, "step": 34120 }, { "epoch": 1.14, "grad_norm": 0.4957413673400879, "learning_rate": 0.00041183518711278793, "loss": 1.8169, "step": 34121 }, { "epoch": 1.14, "grad_norm": 0.45492109656333923, "learning_rate": 0.0004118254883361575, "loss": 1.7283, "step": 34122 }, { "epoch": 1.14, "grad_norm": 0.4593401551246643, "learning_rate": 0.00041181578942378714, "loss": 1.8028, "step": 34123 }, { "epoch": 1.14, "grad_norm": 0.4655132293701172, "learning_rate": 0.000411806090375689, "loss": 1.7257, "step": 34124 }, { "epoch": 1.14, "grad_norm": 0.45649850368499756, "learning_rate": 0.0004117963911918748, "loss": 1.7912, "step": 34125 }, { "epoch": 1.14, "grad_norm": 0.47078245878219604, "learning_rate": 0.0004117866918723562, "loss": 1.7291, "step": 34126 }, { "epoch": 1.14, "grad_norm": 0.46853309869766235, "learning_rate": 0.0004117769924171451, "loss": 1.8536, "step": 34127 }, { "epoch": 1.14, "grad_norm": 0.464697003364563, "learning_rate": 0.00041176729282625316, "loss": 1.7274, "step": 34128 }, { "epoch": 1.14, "grad_norm": 0.47676414251327515, "learning_rate": 0.00041175759309969214, "loss": 1.7834, "step": 34129 }, { "epoch": 1.14, "grad_norm": 0.4801613688468933, "learning_rate": 0.00041174789323747386, "loss": 1.8125, "step": 34130 }, { "epoch": 1.14, "grad_norm": 0.456321656703949, "learning_rate": 0.00041173819323961017, "loss": 1.751, "step": 34131 }, { "epoch": 1.14, "grad_norm": 0.4619978368282318, "learning_rate": 0.0004117284931061128, "loss": 1.7542, "step": 34132 }, { "epoch": 1.14, "grad_norm": 0.47712603211402893, "learning_rate": 0.0004117187928369934, "loss": 1.8193, "step": 34133 }, { "epoch": 1.14, "grad_norm": 0.4723142087459564, "learning_rate": 0.00041170909243226395, "loss": 1.7582, "step": 34134 }, { "epoch": 1.14, "grad_norm": 0.46771055459976196, "learning_rate": 0.000411699391891936, "loss": 1.7856, "step": 34135 }, { "epoch": 1.14, "grad_norm": 0.45124125480651855, "learning_rate": 0.0004116896912160216, "loss": 1.7838, "step": 34136 }, { "epoch": 1.14, "grad_norm": 0.4798012673854828, "learning_rate": 0.00041167999040453223, "loss": 1.8551, "step": 34137 }, { "epoch": 1.14, "grad_norm": 0.46528008580207825, "learning_rate": 0.00041167028945747995, "loss": 1.8572, "step": 34138 }, { "epoch": 1.14, "grad_norm": 0.6230214834213257, "learning_rate": 0.00041166058837487627, "loss": 1.788, "step": 34139 }, { "epoch": 1.14, "grad_norm": 0.46377983689308167, "learning_rate": 0.0004116508871567331, "loss": 1.7349, "step": 34140 }, { "epoch": 1.14, "grad_norm": 0.4616761803627014, "learning_rate": 0.00041164118580306225, "loss": 1.7701, "step": 34141 }, { "epoch": 1.14, "grad_norm": 0.4654686748981476, "learning_rate": 0.0004116314843138754, "loss": 1.826, "step": 34142 }, { "epoch": 1.14, "grad_norm": 0.4528628885746002, "learning_rate": 0.00041162178268918444, "loss": 1.8139, "step": 34143 }, { "epoch": 1.14, "grad_norm": 0.45875298976898193, "learning_rate": 0.000411612080929001, "loss": 1.7842, "step": 34144 }, { "epoch": 1.14, "grad_norm": 0.4791848957538605, "learning_rate": 0.00041160237903333706, "loss": 1.8257, "step": 34145 }, { "epoch": 1.14, "grad_norm": 0.45924630761146545, "learning_rate": 0.00041159267700220426, "loss": 1.8235, "step": 34146 }, { "epoch": 1.14, "grad_norm": 0.47757530212402344, "learning_rate": 0.0004115829748356143, "loss": 1.8112, "step": 34147 }, { "epoch": 1.14, "grad_norm": 0.4864683747291565, "learning_rate": 0.0004115732725335791, "loss": 1.8312, "step": 34148 }, { "epoch": 1.14, "grad_norm": 0.4926295578479767, "learning_rate": 0.00041156357009611034, "loss": 1.7729, "step": 34149 }, { "epoch": 1.14, "grad_norm": 0.4989074766635895, "learning_rate": 0.0004115538675232199, "loss": 1.7739, "step": 34150 }, { "epoch": 1.14, "grad_norm": 0.4499821066856384, "learning_rate": 0.00041154416481491943, "loss": 1.7622, "step": 34151 }, { "epoch": 1.14, "grad_norm": 0.45979875326156616, "learning_rate": 0.0004115344619712209, "loss": 1.7507, "step": 34152 }, { "epoch": 1.14, "grad_norm": 0.4880472719669342, "learning_rate": 0.00041152475899213593, "loss": 1.833, "step": 34153 }, { "epoch": 1.14, "grad_norm": 0.47395560145378113, "learning_rate": 0.0004115150558776763, "loss": 1.8084, "step": 34154 }, { "epoch": 1.14, "grad_norm": 0.5537818074226379, "learning_rate": 0.0004115053526278538, "loss": 1.8008, "step": 34155 }, { "epoch": 1.14, "grad_norm": 0.4970073401927948, "learning_rate": 0.0004114956492426802, "loss": 1.7585, "step": 34156 }, { "epoch": 1.14, "grad_norm": 0.46420326828956604, "learning_rate": 0.0004114859457221674, "loss": 1.7503, "step": 34157 }, { "epoch": 1.14, "grad_norm": 0.4548064172267914, "learning_rate": 0.0004114762420663271, "loss": 1.7194, "step": 34158 }, { "epoch": 1.14, "grad_norm": 0.4616123139858246, "learning_rate": 0.00041146653827517094, "loss": 1.795, "step": 34159 }, { "epoch": 1.14, "grad_norm": 0.4789513647556305, "learning_rate": 0.000411456834348711, "loss": 1.7281, "step": 34160 }, { "epoch": 1.14, "grad_norm": 0.48507654666900635, "learning_rate": 0.00041144713028695875, "loss": 1.8494, "step": 34161 }, { "epoch": 1.14, "grad_norm": 0.49765175580978394, "learning_rate": 0.00041143742608992616, "loss": 1.8081, "step": 34162 }, { "epoch": 1.14, "grad_norm": 0.4597626328468323, "learning_rate": 0.000411427721757625, "loss": 1.7405, "step": 34163 }, { "epoch": 1.14, "grad_norm": 0.467311292886734, "learning_rate": 0.00041141801729006693, "loss": 1.7866, "step": 34164 }, { "epoch": 1.14, "grad_norm": 0.47575050592422485, "learning_rate": 0.0004114083126872638, "loss": 1.7332, "step": 34165 }, { "epoch": 1.14, "grad_norm": 0.48479318618774414, "learning_rate": 0.00041139860794922736, "loss": 1.7284, "step": 34166 }, { "epoch": 1.14, "grad_norm": 0.47336500883102417, "learning_rate": 0.00041138890307596947, "loss": 1.7426, "step": 34167 }, { "epoch": 1.14, "grad_norm": 0.46174997091293335, "learning_rate": 0.00041137919806750187, "loss": 1.8282, "step": 34168 }, { "epoch": 1.14, "grad_norm": 0.45248866081237793, "learning_rate": 0.0004113694929238364, "loss": 1.8213, "step": 34169 }, { "epoch": 1.14, "grad_norm": 0.46364477276802063, "learning_rate": 0.0004113597876449847, "loss": 1.6926, "step": 34170 }, { "epoch": 1.14, "grad_norm": 0.46648019552230835, "learning_rate": 0.0004113500822309587, "loss": 1.8566, "step": 34171 }, { "epoch": 1.14, "grad_norm": 0.4659181237220764, "learning_rate": 0.00041134037668177, "loss": 1.838, "step": 34172 }, { "epoch": 1.14, "grad_norm": 0.47057482600212097, "learning_rate": 0.00041133067099743053, "loss": 1.8308, "step": 34173 }, { "epoch": 1.14, "grad_norm": 0.47507500648498535, "learning_rate": 0.000411320965177952, "loss": 1.7337, "step": 34174 }, { "epoch": 1.14, "grad_norm": 0.4513167440891266, "learning_rate": 0.00041131125922334627, "loss": 1.7934, "step": 34175 }, { "epoch": 1.14, "grad_norm": 0.45898449420928955, "learning_rate": 0.000411301553133625, "loss": 1.7667, "step": 34176 }, { "epoch": 1.14, "grad_norm": 0.4577018618583679, "learning_rate": 0.0004112918469088002, "loss": 1.7368, "step": 34177 }, { "epoch": 1.14, "grad_norm": 0.47894105315208435, "learning_rate": 0.00041128214054888336, "loss": 1.8198, "step": 34178 }, { "epoch": 1.14, "grad_norm": 0.4557906985282898, "learning_rate": 0.00041127243405388644, "loss": 1.8039, "step": 34179 }, { "epoch": 1.14, "grad_norm": 0.47979703545570374, "learning_rate": 0.00041126272742382123, "loss": 1.7273, "step": 34180 }, { "epoch": 1.14, "grad_norm": 1.0904078483581543, "learning_rate": 0.00041125302065869935, "loss": 1.847, "step": 34181 }, { "epoch": 1.14, "grad_norm": 0.4533080458641052, "learning_rate": 0.00041124331375853277, "loss": 1.7584, "step": 34182 }, { "epoch": 1.14, "grad_norm": 0.4560770094394684, "learning_rate": 0.0004112336067233331, "loss": 1.8315, "step": 34183 }, { "epoch": 1.14, "grad_norm": 0.46959009766578674, "learning_rate": 0.0004112238995531124, "loss": 1.7497, "step": 34184 }, { "epoch": 1.14, "grad_norm": 0.47537028789520264, "learning_rate": 0.0004112141922478821, "loss": 1.7957, "step": 34185 }, { "epoch": 1.14, "grad_norm": 0.4611697196960449, "learning_rate": 0.00041120448480765423, "loss": 1.6807, "step": 34186 }, { "epoch": 1.14, "grad_norm": 0.46982091665267944, "learning_rate": 0.00041119477723244055, "loss": 1.8256, "step": 34187 }, { "epoch": 1.14, "grad_norm": 0.45465087890625, "learning_rate": 0.0004111850695222527, "loss": 1.7519, "step": 34188 }, { "epoch": 1.14, "grad_norm": 0.4703783392906189, "learning_rate": 0.0004111753616771026, "loss": 1.8113, "step": 34189 }, { "epoch": 1.14, "grad_norm": 0.4569675326347351, "learning_rate": 0.00041116565369700203, "loss": 1.8026, "step": 34190 }, { "epoch": 1.14, "grad_norm": 0.46993643045425415, "learning_rate": 0.0004111559455819627, "loss": 1.7692, "step": 34191 }, { "epoch": 1.14, "grad_norm": 0.4759526252746582, "learning_rate": 0.00041114623733199643, "loss": 1.7515, "step": 34192 }, { "epoch": 1.14, "grad_norm": 0.46234798431396484, "learning_rate": 0.000411136528947115, "loss": 1.787, "step": 34193 }, { "epoch": 1.14, "grad_norm": 0.45530006289482117, "learning_rate": 0.00041112682042733023, "loss": 1.8048, "step": 34194 }, { "epoch": 1.14, "grad_norm": 0.4518907368183136, "learning_rate": 0.0004111171117726539, "loss": 1.7702, "step": 34195 }, { "epoch": 1.14, "grad_norm": 0.4716196656227112, "learning_rate": 0.00041110740298309765, "loss": 1.7474, "step": 34196 }, { "epoch": 1.14, "grad_norm": 0.48070406913757324, "learning_rate": 0.0004110976940586734, "loss": 1.8264, "step": 34197 }, { "epoch": 1.14, "grad_norm": 0.45813876390457153, "learning_rate": 0.000411087984999393, "loss": 1.791, "step": 34198 }, { "epoch": 1.14, "grad_norm": 0.46344804763793945, "learning_rate": 0.0004110782758052681, "loss": 1.8215, "step": 34199 }, { "epoch": 1.14, "grad_norm": 0.4553203284740448, "learning_rate": 0.00041106856647631063, "loss": 1.7797, "step": 34200 }, { "epoch": 1.14, "grad_norm": 0.5132777690887451, "learning_rate": 0.00041105885701253224, "loss": 1.7932, "step": 34201 }, { "epoch": 1.14, "grad_norm": 0.4663524925708771, "learning_rate": 0.0004110491474139447, "loss": 1.8013, "step": 34202 }, { "epoch": 1.14, "grad_norm": 0.471902996301651, "learning_rate": 0.0004110394376805599, "loss": 1.7855, "step": 34203 }, { "epoch": 1.14, "grad_norm": 0.4762323200702667, "learning_rate": 0.0004110297278123896, "loss": 1.7938, "step": 34204 }, { "epoch": 1.14, "grad_norm": 0.46571075916290283, "learning_rate": 0.0004110200178094456, "loss": 1.8233, "step": 34205 }, { "epoch": 1.14, "grad_norm": 0.4749414920806885, "learning_rate": 0.00041101030767173955, "loss": 1.8375, "step": 34206 }, { "epoch": 1.14, "grad_norm": 0.492819219827652, "learning_rate": 0.00041100059739928343, "loss": 1.8309, "step": 34207 }, { "epoch": 1.14, "grad_norm": 0.45377129316329956, "learning_rate": 0.0004109908869920889, "loss": 1.7823, "step": 34208 }, { "epoch": 1.14, "grad_norm": 0.47643277049064636, "learning_rate": 0.00041098117645016783, "loss": 1.7978, "step": 34209 }, { "epoch": 1.14, "grad_norm": 0.46179068088531494, "learning_rate": 0.0004109714657735319, "loss": 1.853, "step": 34210 }, { "epoch": 1.14, "grad_norm": 0.47130441665649414, "learning_rate": 0.00041096175496219297, "loss": 1.8161, "step": 34211 }, { "epoch": 1.14, "grad_norm": 0.47442495822906494, "learning_rate": 0.0004109520440161629, "loss": 1.7495, "step": 34212 }, { "epoch": 1.14, "grad_norm": 0.4496675729751587, "learning_rate": 0.00041094233293545326, "loss": 1.7205, "step": 34213 }, { "epoch": 1.14, "grad_norm": 0.47745609283447266, "learning_rate": 0.0004109326217200761, "loss": 1.8149, "step": 34214 }, { "epoch": 1.14, "grad_norm": 0.4636051058769226, "learning_rate": 0.00041092291037004306, "loss": 1.8004, "step": 34215 }, { "epoch": 1.14, "grad_norm": 0.4717343747615814, "learning_rate": 0.00041091319888536586, "loss": 1.7647, "step": 34216 }, { "epoch": 1.14, "grad_norm": 0.47089701890945435, "learning_rate": 0.00041090348726605645, "loss": 1.835, "step": 34217 }, { "epoch": 1.14, "grad_norm": 0.48178932070732117, "learning_rate": 0.00041089377551212653, "loss": 1.8198, "step": 34218 }, { "epoch": 1.14, "grad_norm": 1.1386897563934326, "learning_rate": 0.00041088406362358804, "loss": 1.8148, "step": 34219 }, { "epoch": 1.14, "grad_norm": 0.44929060339927673, "learning_rate": 0.00041087435160045246, "loss": 1.7892, "step": 34220 }, { "epoch": 1.14, "grad_norm": 0.47540661692619324, "learning_rate": 0.0004108646394427318, "loss": 1.7428, "step": 34221 }, { "epoch": 1.14, "grad_norm": 0.47854793071746826, "learning_rate": 0.0004108549271504378, "loss": 1.7741, "step": 34222 }, { "epoch": 1.14, "grad_norm": 0.47929468750953674, "learning_rate": 0.0004108452147235823, "loss": 1.7411, "step": 34223 }, { "epoch": 1.14, "grad_norm": 0.46824756264686584, "learning_rate": 0.00041083550216217693, "loss": 1.8032, "step": 34224 }, { "epoch": 1.14, "grad_norm": 0.46420174837112427, "learning_rate": 0.0004108257894662338, "loss": 1.7884, "step": 34225 }, { "epoch": 1.14, "grad_norm": 0.455021470785141, "learning_rate": 0.00041081607663576424, "loss": 1.71, "step": 34226 }, { "epoch": 1.14, "grad_norm": 0.4462166726589203, "learning_rate": 0.0004108063636707805, "loss": 1.8159, "step": 34227 }, { "epoch": 1.14, "grad_norm": 0.4853009581565857, "learning_rate": 0.00041079665057129405, "loss": 1.8114, "step": 34228 }, { "epoch": 1.14, "grad_norm": 0.4958188533782959, "learning_rate": 0.00041078693733731677, "loss": 1.8358, "step": 34229 }, { "epoch": 1.14, "grad_norm": 0.473885715007782, "learning_rate": 0.0004107772239688605, "loss": 1.8687, "step": 34230 }, { "epoch": 1.14, "grad_norm": 0.4672242999076843, "learning_rate": 0.00041076751046593697, "loss": 1.8149, "step": 34231 }, { "epoch": 1.14, "grad_norm": 0.4723600745201111, "learning_rate": 0.00041075779682855814, "loss": 1.805, "step": 34232 }, { "epoch": 1.14, "grad_norm": 0.49418604373931885, "learning_rate": 0.00041074808305673554, "loss": 1.7554, "step": 34233 }, { "epoch": 1.14, "grad_norm": 0.4736267328262329, "learning_rate": 0.00041073836915048114, "loss": 1.8142, "step": 34234 }, { "epoch": 1.14, "grad_norm": 0.4651162624359131, "learning_rate": 0.0004107286551098067, "loss": 1.787, "step": 34235 }, { "epoch": 1.14, "grad_norm": 0.4663870930671692, "learning_rate": 0.0004107189409347239, "loss": 1.775, "step": 34236 }, { "epoch": 1.14, "grad_norm": 0.48252081871032715, "learning_rate": 0.00041070922662524465, "loss": 1.7989, "step": 34237 }, { "epoch": 1.14, "grad_norm": 0.5000251531600952, "learning_rate": 0.0004106995121813807, "loss": 1.7688, "step": 34238 }, { "epoch": 1.14, "grad_norm": 0.4708888828754425, "learning_rate": 0.0004106897976031439, "loss": 1.7859, "step": 34239 }, { "epoch": 1.14, "grad_norm": 0.4638880491256714, "learning_rate": 0.0004106800828905459, "loss": 1.7731, "step": 34240 }, { "epoch": 1.14, "grad_norm": 0.5575870275497437, "learning_rate": 0.0004106703680435987, "loss": 1.8344, "step": 34241 }, { "epoch": 1.14, "grad_norm": 0.528595507144928, "learning_rate": 0.000410660653062314, "loss": 1.7604, "step": 34242 }, { "epoch": 1.14, "grad_norm": 0.49716490507125854, "learning_rate": 0.0004106509379467034, "loss": 1.8261, "step": 34243 }, { "epoch": 1.14, "grad_norm": 0.45911797881126404, "learning_rate": 0.00041064122269677904, "loss": 1.7073, "step": 34244 }, { "epoch": 1.14, "grad_norm": 0.47539615631103516, "learning_rate": 0.0004106315073125525, "loss": 1.8154, "step": 34245 }, { "epoch": 1.14, "grad_norm": 0.519786536693573, "learning_rate": 0.00041062179179403557, "loss": 1.8676, "step": 34246 }, { "epoch": 1.14, "grad_norm": 0.4810239374637604, "learning_rate": 0.0004106120761412401, "loss": 1.7749, "step": 34247 }, { "epoch": 1.14, "grad_norm": 0.4572943449020386, "learning_rate": 0.0004106023603541778, "loss": 1.8086, "step": 34248 }, { "epoch": 1.14, "grad_norm": 0.46210813522338867, "learning_rate": 0.0004105926444328607, "loss": 1.8063, "step": 34249 }, { "epoch": 1.14, "grad_norm": 0.5045920014381409, "learning_rate": 0.00041058292837730027, "loss": 1.7639, "step": 34250 }, { "epoch": 1.14, "grad_norm": 0.5243534445762634, "learning_rate": 0.0004105732121875085, "loss": 1.8013, "step": 34251 }, { "epoch": 1.14, "grad_norm": 0.4597308039665222, "learning_rate": 0.00041056349586349715, "loss": 1.8451, "step": 34252 }, { "epoch": 1.14, "grad_norm": 0.48770254850387573, "learning_rate": 0.00041055377940527797, "loss": 1.7695, "step": 34253 }, { "epoch": 1.14, "grad_norm": 0.49104639887809753, "learning_rate": 0.00041054406281286287, "loss": 1.7849, "step": 34254 }, { "epoch": 1.14, "grad_norm": 0.48774415254592896, "learning_rate": 0.00041053434608626354, "loss": 1.7728, "step": 34255 }, { "epoch": 1.14, "grad_norm": 0.46914753317832947, "learning_rate": 0.0004105246292254918, "loss": 1.7565, "step": 34256 }, { "epoch": 1.14, "grad_norm": 0.47965431213378906, "learning_rate": 0.0004105149122305594, "loss": 1.7468, "step": 34257 }, { "epoch": 1.14, "grad_norm": 0.5109384655952454, "learning_rate": 0.0004105051951014782, "loss": 1.7569, "step": 34258 }, { "epoch": 1.14, "grad_norm": 0.4840051829814911, "learning_rate": 0.00041049547783826, "loss": 1.8044, "step": 34259 }, { "epoch": 1.14, "grad_norm": 0.47538241744041443, "learning_rate": 0.0004104857604409166, "loss": 1.7567, "step": 34260 }, { "epoch": 1.14, "grad_norm": 0.4771873652935028, "learning_rate": 0.00041047604290945963, "loss": 1.7396, "step": 34261 }, { "epoch": 1.14, "grad_norm": 0.47068727016448975, "learning_rate": 0.00041046632524390116, "loss": 1.797, "step": 34262 }, { "epoch": 1.14, "grad_norm": 0.4708789587020874, "learning_rate": 0.0004104566074442528, "loss": 1.861, "step": 34263 }, { "epoch": 1.14, "grad_norm": 0.4851818084716797, "learning_rate": 0.0004104468895105264, "loss": 1.7263, "step": 34264 }, { "epoch": 1.14, "grad_norm": 0.48307567834854126, "learning_rate": 0.0004104371714427337, "loss": 1.7876, "step": 34265 }, { "epoch": 1.14, "grad_norm": 0.4708610475063324, "learning_rate": 0.0004104274532408866, "loss": 1.8052, "step": 34266 }, { "epoch": 1.14, "grad_norm": 0.4724757969379425, "learning_rate": 0.0004104177349049969, "loss": 1.7451, "step": 34267 }, { "epoch": 1.14, "grad_norm": 0.4508233666419983, "learning_rate": 0.0004104080164350762, "loss": 1.7856, "step": 34268 }, { "epoch": 1.14, "grad_norm": 0.46614396572113037, "learning_rate": 0.0004103982978311366, "loss": 1.8003, "step": 34269 }, { "epoch": 1.14, "grad_norm": 0.49567413330078125, "learning_rate": 0.0004103885790931896, "loss": 1.7778, "step": 34270 }, { "epoch": 1.14, "grad_norm": 0.4571249186992645, "learning_rate": 0.00041037886022124713, "loss": 1.758, "step": 34271 }, { "epoch": 1.14, "grad_norm": 0.4922652244567871, "learning_rate": 0.000410369141215321, "loss": 1.7905, "step": 34272 }, { "epoch": 1.14, "grad_norm": 0.45104867219924927, "learning_rate": 0.00041035942207542307, "loss": 1.8516, "step": 34273 }, { "epoch": 1.14, "grad_norm": 0.45652058720588684, "learning_rate": 0.00041034970280156506, "loss": 1.8482, "step": 34274 }, { "epoch": 1.14, "grad_norm": 0.45824486017227173, "learning_rate": 0.00041033998339375875, "loss": 1.7653, "step": 34275 }, { "epoch": 1.14, "grad_norm": 0.44747263193130493, "learning_rate": 0.0004103302638520159, "loss": 1.7571, "step": 34276 }, { "epoch": 1.14, "grad_norm": 0.467792809009552, "learning_rate": 0.0004103205441763484, "loss": 1.7284, "step": 34277 }, { "epoch": 1.14, "grad_norm": 0.47170281410217285, "learning_rate": 0.000410310824366768, "loss": 1.7913, "step": 34278 }, { "epoch": 1.14, "grad_norm": 0.4625491201877594, "learning_rate": 0.0004103011044232865, "loss": 1.889, "step": 34279 }, { "epoch": 1.14, "grad_norm": 0.44978734850883484, "learning_rate": 0.0004102913843459158, "loss": 1.7489, "step": 34280 }, { "epoch": 1.14, "grad_norm": 0.48901277780532837, "learning_rate": 0.00041028166413466753, "loss": 1.7749, "step": 34281 }, { "epoch": 1.14, "grad_norm": 0.4646761417388916, "learning_rate": 0.0004102719437895536, "loss": 1.7712, "step": 34282 }, { "epoch": 1.14, "grad_norm": 0.46687939763069153, "learning_rate": 0.00041026222331058573, "loss": 1.803, "step": 34283 }, { "epoch": 1.14, "grad_norm": 0.4650028944015503, "learning_rate": 0.0004102525026977758, "loss": 1.7483, "step": 34284 }, { "epoch": 1.14, "grad_norm": 0.4733794331550598, "learning_rate": 0.0004102427819511355, "loss": 1.7236, "step": 34285 }, { "epoch": 1.14, "grad_norm": 0.4566953778266907, "learning_rate": 0.0004102330610706768, "loss": 1.7853, "step": 34286 }, { "epoch": 1.14, "grad_norm": 0.48619863390922546, "learning_rate": 0.00041022334005641145, "loss": 1.7488, "step": 34287 }, { "epoch": 1.14, "grad_norm": 0.4760727286338806, "learning_rate": 0.00041021361890835115, "loss": 1.7628, "step": 34288 }, { "epoch": 1.14, "grad_norm": 0.476982980966568, "learning_rate": 0.0004102038976265078, "loss": 1.8197, "step": 34289 }, { "epoch": 1.14, "grad_norm": 0.47908493876457214, "learning_rate": 0.00041019417621089305, "loss": 1.7957, "step": 34290 }, { "epoch": 1.14, "grad_norm": 0.45862650871276855, "learning_rate": 0.0004101844546615189, "loss": 1.7668, "step": 34291 }, { "epoch": 1.14, "grad_norm": 0.4700720012187958, "learning_rate": 0.000410174732978397, "loss": 1.8062, "step": 34292 }, { "epoch": 1.14, "grad_norm": 0.45288267731666565, "learning_rate": 0.00041016501116153924, "loss": 1.8022, "step": 34293 }, { "epoch": 1.14, "grad_norm": 0.476493775844574, "learning_rate": 0.00041015528921095737, "loss": 1.7862, "step": 34294 }, { "epoch": 1.14, "grad_norm": 0.47867897152900696, "learning_rate": 0.00041014556712666323, "loss": 1.802, "step": 34295 }, { "epoch": 1.14, "grad_norm": 0.4645639657974243, "learning_rate": 0.00041013584490866853, "loss": 1.7478, "step": 34296 }, { "epoch": 1.14, "grad_norm": 0.4606679081916809, "learning_rate": 0.0004101261225569853, "loss": 1.6979, "step": 34297 }, { "epoch": 1.14, "grad_norm": 0.45270541310310364, "learning_rate": 0.00041011640007162505, "loss": 1.7749, "step": 34298 }, { "epoch": 1.14, "grad_norm": 0.47735798358917236, "learning_rate": 0.00041010667745259975, "loss": 1.7418, "step": 34299 }, { "epoch": 1.14, "grad_norm": 0.4809136986732483, "learning_rate": 0.0004100969546999211, "loss": 1.8095, "step": 34300 }, { "epoch": 1.14, "grad_norm": 0.4741611182689667, "learning_rate": 0.00041008723181360114, "loss": 1.7767, "step": 34301 }, { "epoch": 1.14, "grad_norm": 0.47816941142082214, "learning_rate": 0.00041007750879365137, "loss": 1.7983, "step": 34302 }, { "epoch": 1.14, "grad_norm": 0.45332056283950806, "learning_rate": 0.0004100677856400837, "loss": 1.8511, "step": 34303 }, { "epoch": 1.14, "grad_norm": 0.45460274815559387, "learning_rate": 0.00041005806235291014, "loss": 1.8188, "step": 34304 }, { "epoch": 1.14, "grad_norm": 0.4619006812572479, "learning_rate": 0.00041004833893214205, "loss": 1.7793, "step": 34305 }, { "epoch": 1.14, "grad_norm": 0.4821837246417999, "learning_rate": 0.0004100386153777917, "loss": 1.8328, "step": 34306 }, { "epoch": 1.14, "grad_norm": 0.4681743085384369, "learning_rate": 0.00041002889168987054, "loss": 1.7849, "step": 34307 }, { "epoch": 1.14, "grad_norm": 0.46507924795150757, "learning_rate": 0.0004100191678683906, "loss": 1.7909, "step": 34308 }, { "epoch": 1.14, "grad_norm": 0.45712006092071533, "learning_rate": 0.00041000944391336356, "loss": 1.7678, "step": 34309 }, { "epoch": 1.14, "grad_norm": 0.46375954151153564, "learning_rate": 0.00040999971982480125, "loss": 1.7558, "step": 34310 }, { "epoch": 1.14, "grad_norm": 0.4697255492210388, "learning_rate": 0.0004099899956027156, "loss": 1.7831, "step": 34311 }, { "epoch": 1.14, "grad_norm": 0.5038606524467468, "learning_rate": 0.0004099802712471182, "loss": 1.7734, "step": 34312 }, { "epoch": 1.14, "grad_norm": 0.4421505033969879, "learning_rate": 0.00040997054675802097, "loss": 1.7918, "step": 34313 }, { "epoch": 1.14, "grad_norm": 0.46378466486930847, "learning_rate": 0.0004099608221354357, "loss": 1.8291, "step": 34314 }, { "epoch": 1.14, "grad_norm": 0.4845871031284332, "learning_rate": 0.00040995109737937424, "loss": 1.8192, "step": 34315 }, { "epoch": 1.14, "grad_norm": 0.4869930148124695, "learning_rate": 0.0004099413724898483, "loss": 1.7136, "step": 34316 }, { "epoch": 1.14, "grad_norm": 0.48534634709358215, "learning_rate": 0.00040993164746686977, "loss": 1.7646, "step": 34317 }, { "epoch": 1.14, "grad_norm": 0.4730137586593628, "learning_rate": 0.00040992192231045034, "loss": 1.7106, "step": 34318 }, { "epoch": 1.14, "grad_norm": 0.4600544273853302, "learning_rate": 0.00040991219702060194, "loss": 1.7018, "step": 34319 }, { "epoch": 1.14, "grad_norm": 0.4715888202190399, "learning_rate": 0.0004099024715973363, "loss": 1.8122, "step": 34320 }, { "epoch": 1.14, "grad_norm": 0.47213152050971985, "learning_rate": 0.0004098927460406653, "loss": 1.8593, "step": 34321 }, { "epoch": 1.14, "grad_norm": 0.4538893699645996, "learning_rate": 0.00040988302035060075, "loss": 1.7714, "step": 34322 }, { "epoch": 1.14, "grad_norm": 0.4810214638710022, "learning_rate": 0.0004098732945271543, "loss": 1.816, "step": 34323 }, { "epoch": 1.14, "grad_norm": 0.46107327938079834, "learning_rate": 0.0004098635685703379, "loss": 1.827, "step": 34324 }, { "epoch": 1.14, "grad_norm": 0.4853149950504303, "learning_rate": 0.00040985384248016333, "loss": 1.7691, "step": 34325 }, { "epoch": 1.14, "grad_norm": 0.4442950487136841, "learning_rate": 0.0004098441162566423, "loss": 1.7825, "step": 34326 }, { "epoch": 1.14, "grad_norm": 0.4482945203781128, "learning_rate": 0.0004098343898997868, "loss": 1.7598, "step": 34327 }, { "epoch": 1.14, "grad_norm": 0.48269712924957275, "learning_rate": 0.0004098246634096085, "loss": 1.7436, "step": 34328 }, { "epoch": 1.14, "grad_norm": 0.4950520992279053, "learning_rate": 0.0004098149367861192, "loss": 1.8624, "step": 34329 }, { "epoch": 1.14, "grad_norm": 0.46659454703330994, "learning_rate": 0.00040980521002933074, "loss": 1.8185, "step": 34330 }, { "epoch": 1.14, "grad_norm": 0.45664823055267334, "learning_rate": 0.000409795483139255, "loss": 1.8075, "step": 34331 }, { "epoch": 1.14, "grad_norm": 0.4573219418525696, "learning_rate": 0.00040978575611590374, "loss": 1.834, "step": 34332 }, { "epoch": 1.14, "grad_norm": 0.45698466897010803, "learning_rate": 0.0004097760289592887, "loss": 1.7792, "step": 34333 }, { "epoch": 1.14, "grad_norm": 0.4552218019962311, "learning_rate": 0.00040976630166942175, "loss": 1.7542, "step": 34334 }, { "epoch": 1.14, "grad_norm": 0.48743051290512085, "learning_rate": 0.0004097565742463146, "loss": 1.7417, "step": 34335 }, { "epoch": 1.14, "grad_norm": 0.4725970923900604, "learning_rate": 0.0004097468466899793, "loss": 1.8169, "step": 34336 }, { "epoch": 1.14, "grad_norm": 0.4660888612270355, "learning_rate": 0.0004097371190004274, "loss": 1.8587, "step": 34337 }, { "epoch": 1.14, "grad_norm": 0.47182920575141907, "learning_rate": 0.00040972739117767085, "loss": 1.7614, "step": 34338 }, { "epoch": 1.14, "grad_norm": 0.4664784371852875, "learning_rate": 0.00040971766322172134, "loss": 1.7943, "step": 34339 }, { "epoch": 1.14, "grad_norm": 0.4750751554965973, "learning_rate": 0.00040970793513259076, "loss": 1.7692, "step": 34340 }, { "epoch": 1.14, "grad_norm": 0.47577226161956787, "learning_rate": 0.00040969820691029094, "loss": 1.8251, "step": 34341 }, { "epoch": 1.14, "grad_norm": 0.47724586725234985, "learning_rate": 0.00040968847855483374, "loss": 1.8171, "step": 34342 }, { "epoch": 1.14, "grad_norm": 0.4647618234157562, "learning_rate": 0.00040967875006623077, "loss": 1.744, "step": 34343 }, { "epoch": 1.14, "grad_norm": 0.46951231360435486, "learning_rate": 0.00040966902144449406, "loss": 1.8018, "step": 34344 }, { "epoch": 1.14, "grad_norm": 0.48514947295188904, "learning_rate": 0.0004096592926896352, "loss": 1.7325, "step": 34345 }, { "epoch": 1.14, "grad_norm": 0.4799445569515228, "learning_rate": 0.0004096495638016662, "loss": 1.8021, "step": 34346 }, { "epoch": 1.14, "grad_norm": 0.44859305024147034, "learning_rate": 0.0004096398347805988, "loss": 1.7562, "step": 34347 }, { "epoch": 1.14, "grad_norm": 0.46548569202423096, "learning_rate": 0.00040963010562644473, "loss": 1.7742, "step": 34348 }, { "epoch": 1.14, "grad_norm": 0.48011621832847595, "learning_rate": 0.00040962037633921595, "loss": 1.7895, "step": 34349 }, { "epoch": 1.14, "grad_norm": 0.48299476504325867, "learning_rate": 0.0004096106469189241, "loss": 1.815, "step": 34350 }, { "epoch": 1.14, "grad_norm": 0.46395251154899597, "learning_rate": 0.0004096009173655811, "loss": 1.8059, "step": 34351 }, { "epoch": 1.14, "grad_norm": 0.45572254061698914, "learning_rate": 0.0004095911876791988, "loss": 1.759, "step": 34352 }, { "epoch": 1.14, "grad_norm": 0.46069589257240295, "learning_rate": 0.0004095814578597888, "loss": 1.7781, "step": 34353 }, { "epoch": 1.14, "grad_norm": 0.4879935681819916, "learning_rate": 0.00040957172790736314, "loss": 1.7615, "step": 34354 }, { "epoch": 1.14, "grad_norm": 0.48158836364746094, "learning_rate": 0.0004095619978219335, "loss": 1.762, "step": 34355 }, { "epoch": 1.14, "grad_norm": 0.4625900387763977, "learning_rate": 0.00040955226760351187, "loss": 1.82, "step": 34356 }, { "epoch": 1.14, "grad_norm": 0.46643468737602234, "learning_rate": 0.0004095425372521098, "loss": 1.7883, "step": 34357 }, { "epoch": 1.14, "grad_norm": 0.46717530488967896, "learning_rate": 0.00040953280676773925, "loss": 1.7989, "step": 34358 }, { "epoch": 1.14, "grad_norm": 0.46943697333335876, "learning_rate": 0.00040952307615041216, "loss": 1.7621, "step": 34359 }, { "epoch": 1.14, "grad_norm": 0.4747634530067444, "learning_rate": 0.00040951334540013993, "loss": 1.696, "step": 34360 }, { "epoch": 1.14, "grad_norm": 0.45722103118896484, "learning_rate": 0.00040950361451693487, "loss": 1.7709, "step": 34361 }, { "epoch": 1.14, "grad_norm": 0.4628719389438629, "learning_rate": 0.0004094938835008084, "loss": 1.7963, "step": 34362 }, { "epoch": 1.14, "grad_norm": 0.49179524183273315, "learning_rate": 0.0004094841523517726, "loss": 1.7418, "step": 34363 }, { "epoch": 1.14, "grad_norm": 0.4799446761608124, "learning_rate": 0.0004094744210698391, "loss": 1.7537, "step": 34364 }, { "epoch": 1.14, "grad_norm": 0.48318442702293396, "learning_rate": 0.00040946468965501984, "loss": 1.7681, "step": 34365 }, { "epoch": 1.14, "grad_norm": 0.4808056056499481, "learning_rate": 0.00040945495810732656, "loss": 1.7531, "step": 34366 }, { "epoch": 1.14, "grad_norm": 0.4813424050807953, "learning_rate": 0.000409445226426771, "loss": 1.8012, "step": 34367 }, { "epoch": 1.14, "grad_norm": 0.47442612051963806, "learning_rate": 0.0004094354946133651, "loss": 1.808, "step": 34368 }, { "epoch": 1.14, "grad_norm": 0.4842296540737152, "learning_rate": 0.00040942576266712075, "loss": 1.745, "step": 34369 }, { "epoch": 1.14, "grad_norm": 0.49124035239219666, "learning_rate": 0.0004094160305880495, "loss": 1.8313, "step": 34370 }, { "epoch": 1.14, "grad_norm": 0.4925179183483124, "learning_rate": 0.0004094062983761633, "loss": 1.7997, "step": 34371 }, { "epoch": 1.14, "grad_norm": 0.48347732424736023, "learning_rate": 0.00040939656603147416, "loss": 1.7753, "step": 34372 }, { "epoch": 1.14, "grad_norm": 0.46642398834228516, "learning_rate": 0.0004093868335539936, "loss": 1.7744, "step": 34373 }, { "epoch": 1.14, "grad_norm": 0.4639321565628052, "learning_rate": 0.0004093771009437335, "loss": 1.7843, "step": 34374 }, { "epoch": 1.14, "grad_norm": 0.5017703175544739, "learning_rate": 0.00040936736820070576, "loss": 1.8159, "step": 34375 }, { "epoch": 1.14, "grad_norm": 0.49642083048820496, "learning_rate": 0.00040935763532492207, "loss": 1.7559, "step": 34376 }, { "epoch": 1.14, "grad_norm": 0.515500545501709, "learning_rate": 0.0004093479023163945, "loss": 1.7763, "step": 34377 }, { "epoch": 1.14, "grad_norm": 0.47240757942199707, "learning_rate": 0.0004093381691751345, "loss": 1.7748, "step": 34378 }, { "epoch": 1.14, "grad_norm": 0.48028022050857544, "learning_rate": 0.0004093284359011542, "loss": 1.8502, "step": 34379 }, { "epoch": 1.14, "grad_norm": 0.46581578254699707, "learning_rate": 0.00040931870249446525, "loss": 1.8366, "step": 34380 }, { "epoch": 1.14, "grad_norm": 0.46279603242874146, "learning_rate": 0.00040930896895507947, "loss": 1.7135, "step": 34381 }, { "epoch": 1.14, "grad_norm": 0.4756470322608948, "learning_rate": 0.00040929923528300875, "loss": 1.7447, "step": 34382 }, { "epoch": 1.14, "grad_norm": 0.4703732430934906, "learning_rate": 0.0004092895014782648, "loss": 1.6953, "step": 34383 }, { "epoch": 1.14, "grad_norm": 0.45114555954933167, "learning_rate": 0.00040927976754085956, "loss": 1.7494, "step": 34384 }, { "epoch": 1.14, "grad_norm": 0.46367108821868896, "learning_rate": 0.00040927003347080473, "loss": 1.8245, "step": 34385 }, { "epoch": 1.14, "grad_norm": 0.4554516673088074, "learning_rate": 0.0004092602992681123, "loss": 1.7955, "step": 34386 }, { "epoch": 1.14, "grad_norm": 0.46905702352523804, "learning_rate": 0.00040925056493279385, "loss": 1.7728, "step": 34387 }, { "epoch": 1.14, "grad_norm": 0.4601277709007263, "learning_rate": 0.0004092408304648613, "loss": 1.7079, "step": 34388 }, { "epoch": 1.14, "grad_norm": 0.45916983485221863, "learning_rate": 0.00040923109586432654, "loss": 1.8041, "step": 34389 }, { "epoch": 1.14, "grad_norm": 0.4666910469532013, "learning_rate": 0.00040922136113120125, "loss": 1.8127, "step": 34390 }, { "epoch": 1.14, "grad_norm": 0.4696753919124603, "learning_rate": 0.00040921162626549745, "loss": 1.8717, "step": 34391 }, { "epoch": 1.14, "grad_norm": 0.48254507780075073, "learning_rate": 0.00040920189126722673, "loss": 1.8364, "step": 34392 }, { "epoch": 1.14, "grad_norm": 0.46440479159355164, "learning_rate": 0.000409192156136401, "loss": 1.7768, "step": 34393 }, { "epoch": 1.14, "grad_norm": 0.48243001103401184, "learning_rate": 0.0004091824208730321, "loss": 1.8027, "step": 34394 }, { "epoch": 1.14, "grad_norm": 0.47491952776908875, "learning_rate": 0.0004091726854771319, "loss": 1.8117, "step": 34395 }, { "epoch": 1.14, "grad_norm": 0.4621240198612213, "learning_rate": 0.00040916294994871206, "loss": 1.7997, "step": 34396 }, { "epoch": 1.14, "grad_norm": 0.4923255741596222, "learning_rate": 0.00040915321428778456, "loss": 1.7895, "step": 34397 }, { "epoch": 1.14, "grad_norm": 0.4602019786834717, "learning_rate": 0.00040914347849436105, "loss": 1.7845, "step": 34398 }, { "epoch": 1.14, "grad_norm": 0.4625673294067383, "learning_rate": 0.0004091337425684535, "loss": 1.7684, "step": 34399 }, { "epoch": 1.14, "grad_norm": 0.4589541554450989, "learning_rate": 0.0004091240065100736, "loss": 1.7576, "step": 34400 }, { "epoch": 1.14, "grad_norm": 0.4672071635723114, "learning_rate": 0.0004091142703192333, "loss": 1.8241, "step": 34401 }, { "epoch": 1.14, "grad_norm": 0.473238080739975, "learning_rate": 0.00040910453399594434, "loss": 1.8197, "step": 34402 }, { "epoch": 1.14, "grad_norm": 0.4678893983364105, "learning_rate": 0.0004090947975402184, "loss": 1.8356, "step": 34403 }, { "epoch": 1.14, "grad_norm": 0.45857685804367065, "learning_rate": 0.0004090850609520677, "loss": 1.8224, "step": 34404 }, { "epoch": 1.14, "grad_norm": 0.4699651896953583, "learning_rate": 0.0004090753242315037, "loss": 1.7755, "step": 34405 }, { "epoch": 1.14, "grad_norm": 0.47995519638061523, "learning_rate": 0.0004090655873785383, "loss": 1.7839, "step": 34406 }, { "epoch": 1.14, "grad_norm": 0.4887615442276001, "learning_rate": 0.0004090558503931834, "loss": 1.789, "step": 34407 }, { "epoch": 1.14, "grad_norm": 0.4470008611679077, "learning_rate": 0.00040904611327545075, "loss": 1.7508, "step": 34408 }, { "epoch": 1.14, "grad_norm": 0.4709462821483612, "learning_rate": 0.0004090363760253522, "loss": 1.7349, "step": 34409 }, { "epoch": 1.14, "grad_norm": 0.46603891253471375, "learning_rate": 0.00040902663864289947, "loss": 1.7816, "step": 34410 }, { "epoch": 1.14, "grad_norm": 0.47875383496284485, "learning_rate": 0.0004090169011281046, "loss": 1.8467, "step": 34411 }, { "epoch": 1.14, "grad_norm": 0.45218855142593384, "learning_rate": 0.00040900716348097916, "loss": 1.7752, "step": 34412 }, { "epoch": 1.14, "grad_norm": 0.4526112973690033, "learning_rate": 0.00040899742570153514, "loss": 1.8125, "step": 34413 }, { "epoch": 1.14, "grad_norm": 0.46797722578048706, "learning_rate": 0.0004089876877897843, "loss": 1.7546, "step": 34414 }, { "epoch": 1.14, "grad_norm": 0.46849945187568665, "learning_rate": 0.00040897794974573836, "loss": 1.8025, "step": 34415 }, { "epoch": 1.15, "grad_norm": 0.4645412266254425, "learning_rate": 0.00040896821156940946, "loss": 1.7308, "step": 34416 }, { "epoch": 1.15, "grad_norm": 0.448178231716156, "learning_rate": 0.000408958473260809, "loss": 1.8727, "step": 34417 }, { "epoch": 1.15, "grad_norm": 0.4552688002586365, "learning_rate": 0.00040894873481994915, "loss": 1.8443, "step": 34418 }, { "epoch": 1.15, "grad_norm": 0.46259504556655884, "learning_rate": 0.0004089389962468415, "loss": 1.7954, "step": 34419 }, { "epoch": 1.15, "grad_norm": 0.47201967239379883, "learning_rate": 0.000408929257541498, "loss": 1.8119, "step": 34420 }, { "epoch": 1.15, "grad_norm": 0.4671049118041992, "learning_rate": 0.0004089195187039305, "loss": 1.8118, "step": 34421 }, { "epoch": 1.15, "grad_norm": 0.45153066515922546, "learning_rate": 0.0004089097797341506, "loss": 1.7635, "step": 34422 }, { "epoch": 1.15, "grad_norm": 0.454296350479126, "learning_rate": 0.0004089000406321704, "loss": 1.7211, "step": 34423 }, { "epoch": 1.15, "grad_norm": 0.46536490321159363, "learning_rate": 0.0004088903013980015, "loss": 1.7505, "step": 34424 }, { "epoch": 1.15, "grad_norm": 0.4793643653392792, "learning_rate": 0.0004088805620316559, "loss": 1.8275, "step": 34425 }, { "epoch": 1.15, "grad_norm": 0.45765724778175354, "learning_rate": 0.0004088708225331453, "loss": 1.749, "step": 34426 }, { "epoch": 1.15, "grad_norm": 0.4769333600997925, "learning_rate": 0.0004088610829024816, "loss": 1.778, "step": 34427 }, { "epoch": 1.15, "grad_norm": 0.4732186496257782, "learning_rate": 0.00040885134313967656, "loss": 1.7779, "step": 34428 }, { "epoch": 1.15, "grad_norm": 0.4744129776954651, "learning_rate": 0.000408841603244742, "loss": 1.822, "step": 34429 }, { "epoch": 1.15, "grad_norm": 0.4689374566078186, "learning_rate": 0.00040883186321768984, "loss": 1.7231, "step": 34430 }, { "epoch": 1.15, "grad_norm": 0.4763988256454468, "learning_rate": 0.00040882212305853177, "loss": 1.815, "step": 34431 }, { "epoch": 1.15, "grad_norm": 0.4697835147380829, "learning_rate": 0.00040881238276727967, "loss": 1.7413, "step": 34432 }, { "epoch": 1.15, "grad_norm": 0.46340471506118774, "learning_rate": 0.00040880264234394544, "loss": 1.7568, "step": 34433 }, { "epoch": 1.15, "grad_norm": 0.47795403003692627, "learning_rate": 0.0004087929017885408, "loss": 1.8104, "step": 34434 }, { "epoch": 1.15, "grad_norm": 0.4695756733417511, "learning_rate": 0.0004087831611010776, "loss": 1.8058, "step": 34435 }, { "epoch": 1.15, "grad_norm": 0.4622379541397095, "learning_rate": 0.00040877342028156766, "loss": 1.7642, "step": 34436 }, { "epoch": 1.15, "grad_norm": 0.4688717722892761, "learning_rate": 0.0004087636793300228, "loss": 1.8052, "step": 34437 }, { "epoch": 1.15, "grad_norm": 0.4646953046321869, "learning_rate": 0.00040875393824645485, "loss": 1.7831, "step": 34438 }, { "epoch": 1.15, "grad_norm": 0.46793997287750244, "learning_rate": 0.0004087441970308758, "loss": 1.7732, "step": 34439 }, { "epoch": 1.15, "grad_norm": 0.4694945514202118, "learning_rate": 0.0004087344556832971, "loss": 1.7182, "step": 34440 }, { "epoch": 1.15, "grad_norm": 0.46826958656311035, "learning_rate": 0.0004087247142037309, "loss": 1.7907, "step": 34441 }, { "epoch": 1.15, "grad_norm": 0.46844998002052307, "learning_rate": 0.00040871497259218893, "loss": 1.6943, "step": 34442 }, { "epoch": 1.15, "grad_norm": 0.4725206792354584, "learning_rate": 0.000408705230848683, "loss": 1.7521, "step": 34443 }, { "epoch": 1.15, "grad_norm": 1.1338908672332764, "learning_rate": 0.00040869548897322486, "loss": 1.7449, "step": 34444 }, { "epoch": 1.15, "grad_norm": 0.4576757550239563, "learning_rate": 0.0004086857469658264, "loss": 1.7486, "step": 34445 }, { "epoch": 1.15, "grad_norm": 0.46711963415145874, "learning_rate": 0.0004086760048264996, "loss": 1.7803, "step": 34446 }, { "epoch": 1.15, "grad_norm": 0.4634194076061249, "learning_rate": 0.000408666262555256, "loss": 1.7906, "step": 34447 }, { "epoch": 1.15, "grad_norm": 0.4684727191925049, "learning_rate": 0.00040865652015210766, "loss": 1.7144, "step": 34448 }, { "epoch": 1.15, "grad_norm": 0.46561217308044434, "learning_rate": 0.0004086467776170663, "loss": 1.7605, "step": 34449 }, { "epoch": 1.15, "grad_norm": 0.47058388590812683, "learning_rate": 0.00040863703495014374, "loss": 1.7754, "step": 34450 }, { "epoch": 1.15, "grad_norm": 0.47051674127578735, "learning_rate": 0.00040862729215135183, "loss": 1.8182, "step": 34451 }, { "epoch": 1.15, "grad_norm": 0.4694440960884094, "learning_rate": 0.00040861754922070235, "loss": 1.7905, "step": 34452 }, { "epoch": 1.15, "grad_norm": 0.47773420810699463, "learning_rate": 0.0004086078061582073, "loss": 1.7791, "step": 34453 }, { "epoch": 1.15, "grad_norm": 0.47906407713890076, "learning_rate": 0.0004085980629638783, "loss": 1.8054, "step": 34454 }, { "epoch": 1.15, "grad_norm": 0.4542015492916107, "learning_rate": 0.0004085883196377272, "loss": 1.7601, "step": 34455 }, { "epoch": 1.15, "grad_norm": 0.45421287417411804, "learning_rate": 0.0004085785761797659, "loss": 1.8272, "step": 34456 }, { "epoch": 1.15, "grad_norm": 0.47468101978302, "learning_rate": 0.00040856883259000627, "loss": 1.7793, "step": 34457 }, { "epoch": 1.15, "grad_norm": 0.4685966670513153, "learning_rate": 0.00040855908886845997, "loss": 1.7614, "step": 34458 }, { "epoch": 1.15, "grad_norm": 0.46617650985717773, "learning_rate": 0.00040854934501513904, "loss": 1.7006, "step": 34459 }, { "epoch": 1.15, "grad_norm": 0.4474557042121887, "learning_rate": 0.0004085396010300552, "loss": 1.7408, "step": 34460 }, { "epoch": 1.15, "grad_norm": 0.4704737663269043, "learning_rate": 0.00040852985691322016, "loss": 1.8153, "step": 34461 }, { "epoch": 1.15, "grad_norm": 0.4658255875110626, "learning_rate": 0.00040852011266464595, "loss": 1.8011, "step": 34462 }, { "epoch": 1.15, "grad_norm": 0.48884549736976624, "learning_rate": 0.00040851036828434427, "loss": 1.775, "step": 34463 }, { "epoch": 1.15, "grad_norm": 0.49784401059150696, "learning_rate": 0.00040850062377232715, "loss": 1.79, "step": 34464 }, { "epoch": 1.15, "grad_norm": 0.5032349824905396, "learning_rate": 0.000408490879128606, "loss": 1.7435, "step": 34465 }, { "epoch": 1.15, "grad_norm": 0.4573313593864441, "learning_rate": 0.0004084811343531931, "loss": 1.6581, "step": 34466 }, { "epoch": 1.15, "grad_norm": 0.48770996928215027, "learning_rate": 0.0004084713894461, "loss": 1.7525, "step": 34467 }, { "epoch": 1.15, "grad_norm": 0.49053090810775757, "learning_rate": 0.0004084616444073386, "loss": 1.753, "step": 34468 }, { "epoch": 1.15, "grad_norm": 0.46044397354125977, "learning_rate": 0.00040845189923692087, "loss": 1.7795, "step": 34469 }, { "epoch": 1.15, "grad_norm": 0.4986017346382141, "learning_rate": 0.00040844215393485837, "loss": 1.7432, "step": 34470 }, { "epoch": 1.15, "grad_norm": 0.5030918717384338, "learning_rate": 0.0004084324085011632, "loss": 1.726, "step": 34471 }, { "epoch": 1.15, "grad_norm": 0.47477805614471436, "learning_rate": 0.0004084226629358469, "loss": 1.7495, "step": 34472 }, { "epoch": 1.15, "grad_norm": 0.470892071723938, "learning_rate": 0.00040841291723892157, "loss": 1.762, "step": 34473 }, { "epoch": 1.15, "grad_norm": 0.47879788279533386, "learning_rate": 0.000408403171410399, "loss": 1.8197, "step": 34474 }, { "epoch": 1.15, "grad_norm": 0.5036525726318359, "learning_rate": 0.00040839342545029083, "loss": 1.7772, "step": 34475 }, { "epoch": 1.15, "grad_norm": 0.4991517961025238, "learning_rate": 0.0004083836793586091, "loss": 1.8262, "step": 34476 }, { "epoch": 1.15, "grad_norm": 0.4563840925693512, "learning_rate": 0.00040837393313536543, "loss": 1.7881, "step": 34477 }, { "epoch": 1.15, "grad_norm": 0.45962414145469666, "learning_rate": 0.00040836418678057195, "loss": 1.7638, "step": 34478 }, { "epoch": 1.15, "grad_norm": 0.9733278155326843, "learning_rate": 0.0004083544402942402, "loss": 1.8318, "step": 34479 }, { "epoch": 1.15, "grad_norm": 0.47501516342163086, "learning_rate": 0.0004083446936763821, "loss": 1.7582, "step": 34480 }, { "epoch": 1.15, "grad_norm": 0.48385804891586304, "learning_rate": 0.0004083349469270095, "loss": 1.8305, "step": 34481 }, { "epoch": 1.15, "grad_norm": 0.45533618330955505, "learning_rate": 0.0004083252000461343, "loss": 1.7613, "step": 34482 }, { "epoch": 1.15, "grad_norm": 0.47373583912849426, "learning_rate": 0.0004083154530337683, "loss": 1.8386, "step": 34483 }, { "epoch": 1.15, "grad_norm": 0.4770544469356537, "learning_rate": 0.00040830570588992326, "loss": 1.7428, "step": 34484 }, { "epoch": 1.15, "grad_norm": 0.4728516638278961, "learning_rate": 0.000408295958614611, "loss": 1.7317, "step": 34485 }, { "epoch": 1.15, "grad_norm": 0.45571231842041016, "learning_rate": 0.0004082862112078434, "loss": 1.7798, "step": 34486 }, { "epoch": 1.15, "grad_norm": 0.4845404326915741, "learning_rate": 0.00040827646366963237, "loss": 1.7502, "step": 34487 }, { "epoch": 1.15, "grad_norm": 0.44363921880722046, "learning_rate": 0.0004082667159999896, "loss": 1.7709, "step": 34488 }, { "epoch": 1.15, "grad_norm": 0.4623889923095703, "learning_rate": 0.0004082569681989271, "loss": 1.8213, "step": 34489 }, { "epoch": 1.15, "grad_norm": 0.4703550338745117, "learning_rate": 0.0004082472202664565, "loss": 1.7603, "step": 34490 }, { "epoch": 1.15, "grad_norm": 0.45948803424835205, "learning_rate": 0.00040823747220258973, "loss": 1.6689, "step": 34491 }, { "epoch": 1.15, "grad_norm": 0.46200495958328247, "learning_rate": 0.0004082277240073386, "loss": 1.803, "step": 34492 }, { "epoch": 1.15, "grad_norm": 0.46795833110809326, "learning_rate": 0.00040821797568071504, "loss": 1.7707, "step": 34493 }, { "epoch": 1.15, "grad_norm": 0.5072764158248901, "learning_rate": 0.0004082082272227308, "loss": 1.8019, "step": 34494 }, { "epoch": 1.15, "grad_norm": 0.46345534920692444, "learning_rate": 0.0004081984786333976, "loss": 1.8243, "step": 34495 }, { "epoch": 1.15, "grad_norm": 0.46449118852615356, "learning_rate": 0.0004081887299127275, "loss": 1.7902, "step": 34496 }, { "epoch": 1.15, "grad_norm": 0.4439811110496521, "learning_rate": 0.00040817898106073214, "loss": 1.7647, "step": 34497 }, { "epoch": 1.15, "grad_norm": 0.48028725385665894, "learning_rate": 0.00040816923207742347, "loss": 1.7926, "step": 34498 }, { "epoch": 1.15, "grad_norm": 0.4825141727924347, "learning_rate": 0.00040815948296281325, "loss": 1.7136, "step": 34499 }, { "epoch": 1.15, "grad_norm": 0.449922114610672, "learning_rate": 0.00040814973371691346, "loss": 1.7935, "step": 34500 }, { "epoch": 1.15, "grad_norm": 0.45732298493385315, "learning_rate": 0.00040813998433973577, "loss": 1.8186, "step": 34501 }, { "epoch": 1.15, "grad_norm": 0.4594782888889313, "learning_rate": 0.000408130234831292, "loss": 1.7812, "step": 34502 }, { "epoch": 1.15, "grad_norm": 0.47772541642189026, "learning_rate": 0.00040812048519159414, "loss": 1.8973, "step": 34503 }, { "epoch": 1.15, "grad_norm": 0.45534655451774597, "learning_rate": 0.00040811073542065395, "loss": 1.7582, "step": 34504 }, { "epoch": 1.15, "grad_norm": 0.47076356410980225, "learning_rate": 0.0004081009855184833, "loss": 1.8131, "step": 34505 }, { "epoch": 1.15, "grad_norm": 0.45691820979118347, "learning_rate": 0.0004080912354850938, "loss": 1.803, "step": 34506 }, { "epoch": 1.15, "grad_norm": 0.46913260221481323, "learning_rate": 0.0004080814853204976, "loss": 1.8164, "step": 34507 }, { "epoch": 1.15, "grad_norm": 0.4815545678138733, "learning_rate": 0.00040807173502470644, "loss": 1.8114, "step": 34508 }, { "epoch": 1.15, "grad_norm": 0.4598948061466217, "learning_rate": 0.00040806198459773205, "loss": 1.7921, "step": 34509 }, { "epoch": 1.15, "grad_norm": 0.4791416823863983, "learning_rate": 0.0004080522340395864, "loss": 1.7337, "step": 34510 }, { "epoch": 1.15, "grad_norm": 0.4551321864128113, "learning_rate": 0.00040804248335028114, "loss": 1.7551, "step": 34511 }, { "epoch": 1.15, "grad_norm": 0.4604061543941498, "learning_rate": 0.00040803273252982826, "loss": 1.6837, "step": 34512 }, { "epoch": 1.15, "grad_norm": 0.45510193705558777, "learning_rate": 0.0004080229815782396, "loss": 1.7845, "step": 34513 }, { "epoch": 1.15, "grad_norm": 0.47764891386032104, "learning_rate": 0.000408013230495527, "loss": 1.7883, "step": 34514 }, { "epoch": 1.15, "grad_norm": 0.47779133915901184, "learning_rate": 0.0004080034792817022, "loss": 1.8511, "step": 34515 }, { "epoch": 1.15, "grad_norm": 0.4805262088775635, "learning_rate": 0.0004079937279367771, "loss": 1.7354, "step": 34516 }, { "epoch": 1.15, "grad_norm": 0.4697893559932709, "learning_rate": 0.0004079839764607635, "loss": 1.8157, "step": 34517 }, { "epoch": 1.15, "grad_norm": 0.4629004895687103, "learning_rate": 0.00040797422485367325, "loss": 1.7973, "step": 34518 }, { "epoch": 1.15, "grad_norm": 0.47615960240364075, "learning_rate": 0.0004079644731155183, "loss": 1.82, "step": 34519 }, { "epoch": 1.15, "grad_norm": 0.500251829624176, "learning_rate": 0.0004079547212463102, "loss": 1.7598, "step": 34520 }, { "epoch": 1.15, "grad_norm": 0.4621956944465637, "learning_rate": 0.00040794496924606116, "loss": 1.6986, "step": 34521 }, { "epoch": 1.15, "grad_norm": 0.4595380127429962, "learning_rate": 0.0004079352171147828, "loss": 1.7587, "step": 34522 }, { "epoch": 1.15, "grad_norm": 0.45489075779914856, "learning_rate": 0.00040792546485248686, "loss": 1.7737, "step": 34523 }, { "epoch": 1.15, "grad_norm": 0.4714823365211487, "learning_rate": 0.00040791571245918547, "loss": 1.8253, "step": 34524 }, { "epoch": 1.15, "grad_norm": 0.4804213345050812, "learning_rate": 0.0004079059599348902, "loss": 1.8064, "step": 34525 }, { "epoch": 1.15, "grad_norm": 0.4830242097377777, "learning_rate": 0.00040789620727961306, "loss": 1.777, "step": 34526 }, { "epoch": 1.15, "grad_norm": 0.4692004323005676, "learning_rate": 0.00040788645449336573, "loss": 1.7645, "step": 34527 }, { "epoch": 1.15, "grad_norm": 0.47094714641571045, "learning_rate": 0.0004078767015761603, "loss": 1.807, "step": 34528 }, { "epoch": 1.15, "grad_norm": 0.4646129012107849, "learning_rate": 0.0004078669485280083, "loss": 1.8076, "step": 34529 }, { "epoch": 1.15, "grad_norm": 0.4690951108932495, "learning_rate": 0.00040785719534892174, "loss": 1.7639, "step": 34530 }, { "epoch": 1.15, "grad_norm": 0.4877476096153259, "learning_rate": 0.00040784744203891255, "loss": 1.8419, "step": 34531 }, { "epoch": 1.15, "grad_norm": 0.4617994725704193, "learning_rate": 0.0004078376885979923, "loss": 1.7619, "step": 34532 }, { "epoch": 1.15, "grad_norm": 0.451576828956604, "learning_rate": 0.00040782793502617317, "loss": 1.7869, "step": 34533 }, { "epoch": 1.15, "grad_norm": 0.46184831857681274, "learning_rate": 0.00040781818132346655, "loss": 1.743, "step": 34534 }, { "epoch": 1.15, "grad_norm": 0.5035679936408997, "learning_rate": 0.00040780842748988475, "loss": 1.8073, "step": 34535 }, { "epoch": 1.15, "grad_norm": 0.7367052435874939, "learning_rate": 0.0004077986735254394, "loss": 1.8936, "step": 34536 }, { "epoch": 1.15, "grad_norm": 0.4609145224094391, "learning_rate": 0.00040778891943014223, "loss": 1.752, "step": 34537 }, { "epoch": 1.15, "grad_norm": 0.4678976535797119, "learning_rate": 0.00040777916520400536, "loss": 1.7841, "step": 34538 }, { "epoch": 1.15, "grad_norm": 0.5049474239349365, "learning_rate": 0.00040776941084704025, "loss": 1.8262, "step": 34539 }, { "epoch": 1.15, "grad_norm": 0.49072787165641785, "learning_rate": 0.00040775965635925915, "loss": 1.7448, "step": 34540 }, { "epoch": 1.15, "grad_norm": 0.451142281293869, "learning_rate": 0.0004077499017406736, "loss": 1.7006, "step": 34541 }, { "epoch": 1.15, "grad_norm": 0.47031867504119873, "learning_rate": 0.00040774014699129557, "loss": 1.784, "step": 34542 }, { "epoch": 1.15, "grad_norm": 0.48842403292655945, "learning_rate": 0.00040773039211113683, "loss": 1.7485, "step": 34543 }, { "epoch": 1.15, "grad_norm": 0.4950867295265198, "learning_rate": 0.00040772063710020926, "loss": 1.8344, "step": 34544 }, { "epoch": 1.15, "grad_norm": 0.4614097774028778, "learning_rate": 0.00040771088195852486, "loss": 1.8147, "step": 34545 }, { "epoch": 1.15, "grad_norm": 0.4649145305156708, "learning_rate": 0.00040770112668609516, "loss": 1.7842, "step": 34546 }, { "epoch": 1.15, "grad_norm": 0.46069857478141785, "learning_rate": 0.00040769137128293225, "loss": 1.7335, "step": 34547 }, { "epoch": 1.15, "grad_norm": 0.46363693475723267, "learning_rate": 0.00040768161574904785, "loss": 1.7915, "step": 34548 }, { "epoch": 1.15, "grad_norm": 0.479968398809433, "learning_rate": 0.0004076718600844538, "loss": 1.7173, "step": 34549 }, { "epoch": 1.15, "grad_norm": 0.4768839478492737, "learning_rate": 0.00040766210428916204, "loss": 1.8551, "step": 34550 }, { "epoch": 1.15, "grad_norm": 0.4512421190738678, "learning_rate": 0.0004076523483631843, "loss": 1.7903, "step": 34551 }, { "epoch": 1.15, "grad_norm": 0.48841631412506104, "learning_rate": 0.00040764259230653254, "loss": 1.8585, "step": 34552 }, { "epoch": 1.15, "grad_norm": 0.454212486743927, "learning_rate": 0.0004076328361192185, "loss": 1.8776, "step": 34553 }, { "epoch": 1.15, "grad_norm": 0.47687670588493347, "learning_rate": 0.000407623079801254, "loss": 1.7368, "step": 34554 }, { "epoch": 1.15, "grad_norm": 0.4615732431411743, "learning_rate": 0.000407613323352651, "loss": 1.7697, "step": 34555 }, { "epoch": 1.15, "grad_norm": 0.4675757586956024, "learning_rate": 0.0004076035667734213, "loss": 1.7728, "step": 34556 }, { "epoch": 1.15, "grad_norm": 0.46306806802749634, "learning_rate": 0.0004075938100635766, "loss": 1.8285, "step": 34557 }, { "epoch": 1.15, "grad_norm": 0.44287440180778503, "learning_rate": 0.00040758405322312904, "loss": 1.839, "step": 34558 }, { "epoch": 1.15, "grad_norm": 0.4657534062862396, "learning_rate": 0.0004075742962520902, "loss": 1.7789, "step": 34559 }, { "epoch": 1.15, "grad_norm": 0.48111531138420105, "learning_rate": 0.000407564539150472, "loss": 1.7713, "step": 34560 }, { "epoch": 1.15, "grad_norm": 0.46344783902168274, "learning_rate": 0.0004075547819182863, "loss": 1.7217, "step": 34561 }, { "epoch": 1.15, "grad_norm": 0.47713780403137207, "learning_rate": 0.0004075450245555449, "loss": 1.8133, "step": 34562 }, { "epoch": 1.15, "grad_norm": 0.44766923785209656, "learning_rate": 0.00040753526706225983, "loss": 1.6626, "step": 34563 }, { "epoch": 1.15, "grad_norm": 0.47270700335502625, "learning_rate": 0.00040752550943844266, "loss": 1.8025, "step": 34564 }, { "epoch": 1.15, "grad_norm": 0.4710085690021515, "learning_rate": 0.0004075157516841054, "loss": 1.7515, "step": 34565 }, { "epoch": 1.15, "grad_norm": 0.45331957936286926, "learning_rate": 0.00040750599379925996, "loss": 1.7237, "step": 34566 }, { "epoch": 1.15, "grad_norm": 0.49364855885505676, "learning_rate": 0.0004074962357839179, "loss": 1.8352, "step": 34567 }, { "epoch": 1.15, "grad_norm": 0.46840307116508484, "learning_rate": 0.0004074864776380914, "loss": 1.7417, "step": 34568 }, { "epoch": 1.15, "grad_norm": 0.4482008218765259, "learning_rate": 0.00040747671936179207, "loss": 1.7972, "step": 34569 }, { "epoch": 1.15, "grad_norm": 0.47070789337158203, "learning_rate": 0.0004074669609550319, "loss": 1.7638, "step": 34570 }, { "epoch": 1.15, "grad_norm": 0.4769757390022278, "learning_rate": 0.0004074572024178227, "loss": 1.7343, "step": 34571 }, { "epoch": 1.15, "grad_norm": 0.4659740924835205, "learning_rate": 0.00040744744375017616, "loss": 1.7522, "step": 34572 }, { "epoch": 1.15, "grad_norm": 0.4659271240234375, "learning_rate": 0.00040743768495210434, "loss": 1.8623, "step": 34573 }, { "epoch": 1.15, "grad_norm": 0.47007009387016296, "learning_rate": 0.000407427926023619, "loss": 1.8414, "step": 34574 }, { "epoch": 1.15, "grad_norm": 0.8808431625366211, "learning_rate": 0.00040741816696473197, "loss": 1.7606, "step": 34575 }, { "epoch": 1.15, "grad_norm": 0.4996618628501892, "learning_rate": 0.0004074084077754551, "loss": 1.8499, "step": 34576 }, { "epoch": 1.15, "grad_norm": 0.46942776441574097, "learning_rate": 0.0004073986484558003, "loss": 1.7905, "step": 34577 }, { "epoch": 1.15, "grad_norm": 0.4731425940990448, "learning_rate": 0.0004073888890057793, "loss": 1.8313, "step": 34578 }, { "epoch": 1.15, "grad_norm": 0.45341062545776367, "learning_rate": 0.0004073791294254041, "loss": 1.77, "step": 34579 }, { "epoch": 1.15, "grad_norm": 0.48071685433387756, "learning_rate": 0.0004073693697146863, "loss": 1.8462, "step": 34580 }, { "epoch": 1.15, "grad_norm": 0.48825907707214355, "learning_rate": 0.0004073596098736381, "loss": 1.8372, "step": 34581 }, { "epoch": 1.15, "grad_norm": 0.47399696707725525, "learning_rate": 0.00040734984990227095, "loss": 1.8168, "step": 34582 }, { "epoch": 1.15, "grad_norm": 0.46621015667915344, "learning_rate": 0.0004073400898005971, "loss": 1.8444, "step": 34583 }, { "epoch": 1.15, "grad_norm": 0.4865877330303192, "learning_rate": 0.00040733032956862805, "loss": 1.8269, "step": 34584 }, { "epoch": 1.15, "grad_norm": 0.45808377861976624, "learning_rate": 0.00040732056920637585, "loss": 1.7586, "step": 34585 }, { "epoch": 1.15, "grad_norm": 0.4755324125289917, "learning_rate": 0.00040731080871385237, "loss": 1.7527, "step": 34586 }, { "epoch": 1.15, "grad_norm": 0.45192956924438477, "learning_rate": 0.0004073010480910692, "loss": 1.7292, "step": 34587 }, { "epoch": 1.15, "grad_norm": 0.4782790243625641, "learning_rate": 0.0004072912873380385, "loss": 1.8348, "step": 34588 }, { "epoch": 1.15, "grad_norm": 0.4568172097206116, "learning_rate": 0.0004072815264547719, "loss": 1.735, "step": 34589 }, { "epoch": 1.15, "grad_norm": 0.4934074878692627, "learning_rate": 0.00040727176544128144, "loss": 1.7804, "step": 34590 }, { "epoch": 1.15, "grad_norm": 0.46724948287010193, "learning_rate": 0.00040726200429757876, "loss": 1.7871, "step": 34591 }, { "epoch": 1.15, "grad_norm": 0.4680320918560028, "learning_rate": 0.0004072522430236759, "loss": 1.8216, "step": 34592 }, { "epoch": 1.15, "grad_norm": 0.5046606063842773, "learning_rate": 0.00040724248161958464, "loss": 1.9078, "step": 34593 }, { "epoch": 1.15, "grad_norm": 0.5005550980567932, "learning_rate": 0.00040723272008531667, "loss": 1.8392, "step": 34594 }, { "epoch": 1.15, "grad_norm": 0.4881473481655121, "learning_rate": 0.0004072229584208841, "loss": 1.7253, "step": 34595 }, { "epoch": 1.15, "grad_norm": 0.4697575867176056, "learning_rate": 0.0004072131966262986, "loss": 1.7528, "step": 34596 }, { "epoch": 1.15, "grad_norm": 0.477713406085968, "learning_rate": 0.0004072034347015721, "loss": 1.9012, "step": 34597 }, { "epoch": 1.15, "grad_norm": 0.48255205154418945, "learning_rate": 0.00040719367264671635, "loss": 1.7861, "step": 34598 }, { "epoch": 1.15, "grad_norm": 0.4646589159965515, "learning_rate": 0.00040718391046174336, "loss": 1.7545, "step": 34599 }, { "epoch": 1.15, "grad_norm": 0.46813255548477173, "learning_rate": 0.0004071741481466649, "loss": 1.7858, "step": 34600 }, { "epoch": 1.15, "grad_norm": 0.46072933077812195, "learning_rate": 0.0004071643857014928, "loss": 1.7657, "step": 34601 }, { "epoch": 1.15, "grad_norm": 0.46498239040374756, "learning_rate": 0.00040715462312623893, "loss": 1.7749, "step": 34602 }, { "epoch": 1.15, "grad_norm": 0.49756330251693726, "learning_rate": 0.0004071448604209151, "loss": 1.8053, "step": 34603 }, { "epoch": 1.15, "grad_norm": 0.4821317493915558, "learning_rate": 0.0004071350975855332, "loss": 1.7807, "step": 34604 }, { "epoch": 1.15, "grad_norm": 0.4978969395160675, "learning_rate": 0.00040712533462010516, "loss": 1.7831, "step": 34605 }, { "epoch": 1.15, "grad_norm": 0.47478875517845154, "learning_rate": 0.00040711557152464266, "loss": 1.7852, "step": 34606 }, { "epoch": 1.15, "grad_norm": 0.4675844609737396, "learning_rate": 0.00040710580829915764, "loss": 1.8662, "step": 34607 }, { "epoch": 1.15, "grad_norm": 0.4794701933860779, "learning_rate": 0.000407096044943662, "loss": 1.7864, "step": 34608 }, { "epoch": 1.15, "grad_norm": 1.724153995513916, "learning_rate": 0.0004070862814581675, "loss": 1.9193, "step": 34609 }, { "epoch": 1.15, "grad_norm": 0.4662577211856842, "learning_rate": 0.0004070765178426861, "loss": 1.753, "step": 34610 }, { "epoch": 1.15, "grad_norm": 0.4653145968914032, "learning_rate": 0.0004070667540972295, "loss": 1.7247, "step": 34611 }, { "epoch": 1.15, "grad_norm": 0.47133880853652954, "learning_rate": 0.00040705699022180965, "loss": 1.7628, "step": 34612 }, { "epoch": 1.15, "grad_norm": 0.44773557782173157, "learning_rate": 0.00040704722621643844, "loss": 1.7721, "step": 34613 }, { "epoch": 1.15, "grad_norm": 0.4540678560733795, "learning_rate": 0.00040703746208112766, "loss": 1.7375, "step": 34614 }, { "epoch": 1.15, "grad_norm": 0.45824697613716125, "learning_rate": 0.0004070276978158891, "loss": 1.7754, "step": 34615 }, { "epoch": 1.15, "grad_norm": 0.45666852593421936, "learning_rate": 0.0004070179334207347, "loss": 1.7557, "step": 34616 }, { "epoch": 1.15, "grad_norm": 0.4876408278942108, "learning_rate": 0.00040700816889567635, "loss": 1.7706, "step": 34617 }, { "epoch": 1.15, "grad_norm": 0.47592803835868835, "learning_rate": 0.00040699840424072585, "loss": 1.8169, "step": 34618 }, { "epoch": 1.15, "grad_norm": 0.4616205394268036, "learning_rate": 0.000406988639455895, "loss": 1.8093, "step": 34619 }, { "epoch": 1.15, "grad_norm": 0.47719845175743103, "learning_rate": 0.00040697887454119576, "loss": 1.7696, "step": 34620 }, { "epoch": 1.15, "grad_norm": 0.48651123046875, "learning_rate": 0.0004069691094966399, "loss": 1.8735, "step": 34621 }, { "epoch": 1.15, "grad_norm": 0.47301772236824036, "learning_rate": 0.00040695934432223937, "loss": 1.8442, "step": 34622 }, { "epoch": 1.15, "grad_norm": 0.48976582288742065, "learning_rate": 0.00040694957901800583, "loss": 1.7673, "step": 34623 }, { "epoch": 1.15, "grad_norm": 0.4638907313346863, "learning_rate": 0.00040693981358395134, "loss": 1.7324, "step": 34624 }, { "epoch": 1.15, "grad_norm": 0.47211042046546936, "learning_rate": 0.0004069300480200877, "loss": 1.821, "step": 34625 }, { "epoch": 1.15, "grad_norm": 0.45082008838653564, "learning_rate": 0.0004069202823264267, "loss": 1.8373, "step": 34626 }, { "epoch": 1.15, "grad_norm": 0.46291688084602356, "learning_rate": 0.0004069105165029802, "loss": 1.7588, "step": 34627 }, { "epoch": 1.15, "grad_norm": 0.46995165944099426, "learning_rate": 0.0004069007505497601, "loss": 1.7974, "step": 34628 }, { "epoch": 1.15, "grad_norm": 0.45913437008857727, "learning_rate": 0.0004068909844667782, "loss": 1.7918, "step": 34629 }, { "epoch": 1.15, "grad_norm": 0.4598175287246704, "learning_rate": 0.0004068812182540465, "loss": 1.7873, "step": 34630 }, { "epoch": 1.15, "grad_norm": 0.4601013958454132, "learning_rate": 0.00040687145191157674, "loss": 1.6888, "step": 34631 }, { "epoch": 1.15, "grad_norm": 0.4383290410041809, "learning_rate": 0.0004068616854393807, "loss": 1.7185, "step": 34632 }, { "epoch": 1.15, "grad_norm": 0.4665602445602417, "learning_rate": 0.00040685191883747037, "loss": 1.8384, "step": 34633 }, { "epoch": 1.15, "grad_norm": 0.47586628794670105, "learning_rate": 0.00040684215210585756, "loss": 1.7642, "step": 34634 }, { "epoch": 1.15, "grad_norm": 0.48100024461746216, "learning_rate": 0.00040683238524455407, "loss": 1.7082, "step": 34635 }, { "epoch": 1.15, "grad_norm": 0.4661114513874054, "learning_rate": 0.00040682261825357186, "loss": 1.8214, "step": 34636 }, { "epoch": 1.15, "grad_norm": 0.4664965867996216, "learning_rate": 0.00040681285113292265, "loss": 1.7553, "step": 34637 }, { "epoch": 1.15, "grad_norm": 0.46146053075790405, "learning_rate": 0.00040680308388261853, "loss": 1.7944, "step": 34638 }, { "epoch": 1.15, "grad_norm": 0.4580053687095642, "learning_rate": 0.0004067933165026711, "loss": 1.7873, "step": 34639 }, { "epoch": 1.15, "grad_norm": 0.46623626351356506, "learning_rate": 0.0004067835489930923, "loss": 1.8721, "step": 34640 }, { "epoch": 1.15, "grad_norm": 0.47509071230888367, "learning_rate": 0.00040677378135389416, "loss": 1.8651, "step": 34641 }, { "epoch": 1.15, "grad_norm": 0.4878237843513489, "learning_rate": 0.0004067640135850881, "loss": 1.7925, "step": 34642 }, { "epoch": 1.15, "grad_norm": 0.46345093846321106, "learning_rate": 0.0004067542456866865, "loss": 1.7952, "step": 34643 }, { "epoch": 1.15, "grad_norm": 0.45888951420783997, "learning_rate": 0.0004067444776587008, "loss": 1.805, "step": 34644 }, { "epoch": 1.15, "grad_norm": 0.45855382084846497, "learning_rate": 0.0004067347095011432, "loss": 1.8256, "step": 34645 }, { "epoch": 1.15, "grad_norm": 0.4610881805419922, "learning_rate": 0.0004067249412140253, "loss": 1.7042, "step": 34646 }, { "epoch": 1.15, "grad_norm": 0.46714726090431213, "learning_rate": 0.000406715172797359, "loss": 1.7893, "step": 34647 }, { "epoch": 1.15, "grad_norm": 0.4570082128047943, "learning_rate": 0.0004067054042511563, "loss": 1.8033, "step": 34648 }, { "epoch": 1.15, "grad_norm": 0.46596837043762207, "learning_rate": 0.00040669563557542887, "loss": 1.805, "step": 34649 }, { "epoch": 1.15, "grad_norm": 0.4693315327167511, "learning_rate": 0.00040668586677018877, "loss": 1.8092, "step": 34650 }, { "epoch": 1.15, "grad_norm": 0.47897791862487793, "learning_rate": 0.00040667609783544766, "loss": 1.826, "step": 34651 }, { "epoch": 1.15, "grad_norm": 0.4699903428554535, "learning_rate": 0.0004066663287712176, "loss": 1.7884, "step": 34652 }, { "epoch": 1.15, "grad_norm": 0.46516722440719604, "learning_rate": 0.0004066565595775102, "loss": 1.7541, "step": 34653 }, { "epoch": 1.15, "grad_norm": 0.4688759446144104, "learning_rate": 0.0004066467902543375, "loss": 1.8299, "step": 34654 }, { "epoch": 1.15, "grad_norm": 0.4657851457595825, "learning_rate": 0.0004066370208017113, "loss": 1.7965, "step": 34655 }, { "epoch": 1.15, "grad_norm": 0.49727335572242737, "learning_rate": 0.00040662725121964346, "loss": 1.9321, "step": 34656 }, { "epoch": 1.15, "grad_norm": 0.4982762932777405, "learning_rate": 0.00040661748150814587, "loss": 1.8192, "step": 34657 }, { "epoch": 1.15, "grad_norm": 0.47717687487602234, "learning_rate": 0.00040660771166723043, "loss": 1.7848, "step": 34658 }, { "epoch": 1.15, "grad_norm": 0.48388373851776123, "learning_rate": 0.0004065979416969088, "loss": 1.8693, "step": 34659 }, { "epoch": 1.15, "grad_norm": 0.4966263175010681, "learning_rate": 0.00040658817159719304, "loss": 1.7743, "step": 34660 }, { "epoch": 1.15, "grad_norm": 0.49345970153808594, "learning_rate": 0.00040657840136809495, "loss": 1.7336, "step": 34661 }, { "epoch": 1.15, "grad_norm": 0.4776464104652405, "learning_rate": 0.00040656863100962644, "loss": 1.8372, "step": 34662 }, { "epoch": 1.15, "grad_norm": 0.4825681746006012, "learning_rate": 0.0004065588605217992, "loss": 1.7633, "step": 34663 }, { "epoch": 1.15, "grad_norm": 0.49644285440444946, "learning_rate": 0.0004065490899046253, "loss": 1.7927, "step": 34664 }, { "epoch": 1.15, "grad_norm": 0.4680781066417694, "learning_rate": 0.0004065393191581164, "loss": 1.7422, "step": 34665 }, { "epoch": 1.15, "grad_norm": 0.4694972336292267, "learning_rate": 0.0004065295482822846, "loss": 1.7723, "step": 34666 }, { "epoch": 1.15, "grad_norm": 0.470680296421051, "learning_rate": 0.0004065197772771415, "loss": 1.7373, "step": 34667 }, { "epoch": 1.15, "grad_norm": 0.4900299906730652, "learning_rate": 0.00040651000614269926, "loss": 1.7781, "step": 34668 }, { "epoch": 1.15, "grad_norm": 0.48323237895965576, "learning_rate": 0.0004065002348789694, "loss": 1.8287, "step": 34669 }, { "epoch": 1.15, "grad_norm": 0.47365912795066833, "learning_rate": 0.000406490463485964, "loss": 1.7504, "step": 34670 }, { "epoch": 1.15, "grad_norm": 0.4733112156391144, "learning_rate": 0.00040648069196369486, "loss": 1.744, "step": 34671 }, { "epoch": 1.15, "grad_norm": 0.46599164605140686, "learning_rate": 0.00040647092031217383, "loss": 1.7286, "step": 34672 }, { "epoch": 1.15, "grad_norm": 0.44658347964286804, "learning_rate": 0.00040646114853141286, "loss": 1.7536, "step": 34673 }, { "epoch": 1.15, "grad_norm": 0.4562152028083801, "learning_rate": 0.0004064513766214236, "loss": 1.7741, "step": 34674 }, { "epoch": 1.15, "grad_norm": 0.4741900861263275, "learning_rate": 0.00040644160458221817, "loss": 1.8135, "step": 34675 }, { "epoch": 1.15, "grad_norm": 0.4704137444496155, "learning_rate": 0.00040643183241380825, "loss": 1.7207, "step": 34676 }, { "epoch": 1.15, "grad_norm": 0.4622959494590759, "learning_rate": 0.0004064220601162059, "loss": 1.8036, "step": 34677 }, { "epoch": 1.15, "grad_norm": 0.484762042760849, "learning_rate": 0.0004064122876894227, "loss": 1.7237, "step": 34678 }, { "epoch": 1.15, "grad_norm": 0.5002055168151855, "learning_rate": 0.00040640251513347075, "loss": 1.8779, "step": 34679 }, { "epoch": 1.15, "grad_norm": 0.46261879801750183, "learning_rate": 0.00040639274244836185, "loss": 1.7748, "step": 34680 }, { "epoch": 1.15, "grad_norm": 0.4864138066768646, "learning_rate": 0.00040638296963410765, "loss": 1.8117, "step": 34681 }, { "epoch": 1.15, "grad_norm": 0.4707117974758148, "learning_rate": 0.0004063731966907204, "loss": 1.7699, "step": 34682 }, { "epoch": 1.15, "grad_norm": 0.47561222314834595, "learning_rate": 0.0004063634236182117, "loss": 1.8045, "step": 34683 }, { "epoch": 1.15, "grad_norm": 0.46882662177085876, "learning_rate": 0.00040635365041659344, "loss": 1.7667, "step": 34684 }, { "epoch": 1.15, "grad_norm": 0.48340466618537903, "learning_rate": 0.0004063438770858776, "loss": 1.7305, "step": 34685 }, { "epoch": 1.15, "grad_norm": 0.4679708182811737, "learning_rate": 0.0004063341036260759, "loss": 1.7612, "step": 34686 }, { "epoch": 1.15, "grad_norm": 0.4607875347137451, "learning_rate": 0.0004063243300372003, "loss": 1.7866, "step": 34687 }, { "epoch": 1.15, "grad_norm": 0.49598103761672974, "learning_rate": 0.0004063145563192626, "loss": 1.8259, "step": 34688 }, { "epoch": 1.15, "grad_norm": 0.46426957845687866, "learning_rate": 0.0004063047824722747, "loss": 1.7964, "step": 34689 }, { "epoch": 1.15, "grad_norm": 0.47388821840286255, "learning_rate": 0.00040629500849624845, "loss": 1.829, "step": 34690 }, { "epoch": 1.15, "grad_norm": 0.4638177156448364, "learning_rate": 0.00040628523439119573, "loss": 1.8387, "step": 34691 }, { "epoch": 1.15, "grad_norm": 0.46636027097702026, "learning_rate": 0.0004062754601571283, "loss": 1.7302, "step": 34692 }, { "epoch": 1.15, "grad_norm": 0.5077643394470215, "learning_rate": 0.00040626568579405837, "loss": 1.7703, "step": 34693 }, { "epoch": 1.15, "grad_norm": 0.4908653795719147, "learning_rate": 0.00040625591130199736, "loss": 1.7622, "step": 34694 }, { "epoch": 1.15, "grad_norm": 0.47404754161834717, "learning_rate": 0.0004062461366809574, "loss": 1.6576, "step": 34695 }, { "epoch": 1.15, "grad_norm": 0.46059590578079224, "learning_rate": 0.00040623636193095025, "loss": 1.7982, "step": 34696 }, { "epoch": 1.15, "grad_norm": 0.46179524064064026, "learning_rate": 0.00040622658705198773, "loss": 1.7914, "step": 34697 }, { "epoch": 1.15, "grad_norm": 0.48743247985839844, "learning_rate": 0.0004062168120440819, "loss": 1.8222, "step": 34698 }, { "epoch": 1.15, "grad_norm": 0.4516831636428833, "learning_rate": 0.0004062070369072444, "loss": 1.6403, "step": 34699 }, { "epoch": 1.15, "grad_norm": 0.4583803117275238, "learning_rate": 0.0004061972616414874, "loss": 1.7495, "step": 34700 }, { "epoch": 1.15, "grad_norm": 0.4664221704006195, "learning_rate": 0.00040618748624682245, "loss": 1.7378, "step": 34701 }, { "epoch": 1.15, "grad_norm": 0.46987980604171753, "learning_rate": 0.00040617771072326157, "loss": 1.8329, "step": 34702 }, { "epoch": 1.15, "grad_norm": 0.4702504873275757, "learning_rate": 0.00040616793507081654, "loss": 1.7934, "step": 34703 }, { "epoch": 1.15, "grad_norm": 0.4878246784210205, "learning_rate": 0.0004061581592894993, "loss": 1.7529, "step": 34704 }, { "epoch": 1.15, "grad_norm": 0.46600446105003357, "learning_rate": 0.0004061483833793217, "loss": 1.6926, "step": 34705 }, { "epoch": 1.15, "grad_norm": 0.46225792169570923, "learning_rate": 0.0004061386073402956, "loss": 1.7912, "step": 34706 }, { "epoch": 1.15, "grad_norm": 0.46852824091911316, "learning_rate": 0.0004061288311724329, "loss": 1.7819, "step": 34707 }, { "epoch": 1.15, "grad_norm": 0.47508734464645386, "learning_rate": 0.0004061190548757454, "loss": 1.8052, "step": 34708 }, { "epoch": 1.15, "grad_norm": 0.4715579152107239, "learning_rate": 0.00040610927845024505, "loss": 1.8183, "step": 34709 }, { "epoch": 1.15, "grad_norm": 0.4637773036956787, "learning_rate": 0.0004060995018959437, "loss": 1.7978, "step": 34710 }, { "epoch": 1.15, "grad_norm": 0.47007372975349426, "learning_rate": 0.00040608972521285303, "loss": 1.7372, "step": 34711 }, { "epoch": 1.15, "grad_norm": 0.4711757302284241, "learning_rate": 0.0004060799484009852, "loss": 1.8193, "step": 34712 }, { "epoch": 1.15, "grad_norm": 0.46802717447280884, "learning_rate": 0.00040607017146035185, "loss": 1.8326, "step": 34713 }, { "epoch": 1.15, "grad_norm": 0.47424405813217163, "learning_rate": 0.000406060394390965, "loss": 1.7476, "step": 34714 }, { "epoch": 1.15, "grad_norm": 0.5030359625816345, "learning_rate": 0.0004060506171928365, "loss": 1.7943, "step": 34715 }, { "epoch": 1.16, "grad_norm": 0.46708524227142334, "learning_rate": 0.0004060408398659781, "loss": 1.7266, "step": 34716 }, { "epoch": 1.16, "grad_norm": 0.4717552661895752, "learning_rate": 0.0004060310624104018, "loss": 1.7784, "step": 34717 }, { "epoch": 1.16, "grad_norm": 0.5019155144691467, "learning_rate": 0.00040602128482611935, "loss": 1.8422, "step": 34718 }, { "epoch": 1.16, "grad_norm": 0.464825838804245, "learning_rate": 0.0004060115071131428, "loss": 1.7634, "step": 34719 }, { "epoch": 1.16, "grad_norm": 0.4853609502315521, "learning_rate": 0.00040600172927148377, "loss": 1.8128, "step": 34720 }, { "epoch": 1.16, "grad_norm": 0.45919668674468994, "learning_rate": 0.00040599195130115425, "loss": 1.8036, "step": 34721 }, { "epoch": 1.16, "grad_norm": 0.4520924985408783, "learning_rate": 0.00040598217320216625, "loss": 1.7824, "step": 34722 }, { "epoch": 1.16, "grad_norm": 0.49259039759635925, "learning_rate": 0.00040597239497453143, "loss": 1.7924, "step": 34723 }, { "epoch": 1.16, "grad_norm": 0.47772616147994995, "learning_rate": 0.00040596261661826177, "loss": 1.759, "step": 34724 }, { "epoch": 1.16, "grad_norm": 0.47005486488342285, "learning_rate": 0.0004059528381333691, "loss": 1.7782, "step": 34725 }, { "epoch": 1.16, "grad_norm": 0.4469859004020691, "learning_rate": 0.0004059430595198652, "loss": 1.7995, "step": 34726 }, { "epoch": 1.16, "grad_norm": 0.48179152607917786, "learning_rate": 0.0004059332807777621, "loss": 1.8657, "step": 34727 }, { "epoch": 1.16, "grad_norm": 0.47454705834388733, "learning_rate": 0.00040592350190707166, "loss": 1.7809, "step": 34728 }, { "epoch": 1.16, "grad_norm": 0.4820345640182495, "learning_rate": 0.0004059137229078056, "loss": 1.7555, "step": 34729 }, { "epoch": 1.16, "grad_norm": 0.458479106426239, "learning_rate": 0.00040590394377997593, "loss": 1.7477, "step": 34730 }, { "epoch": 1.16, "grad_norm": 0.4683881402015686, "learning_rate": 0.0004058941645235945, "loss": 1.8849, "step": 34731 }, { "epoch": 1.16, "grad_norm": 0.4653737246990204, "learning_rate": 0.00040588438513867314, "loss": 1.8574, "step": 34732 }, { "epoch": 1.16, "grad_norm": 0.4792286157608032, "learning_rate": 0.00040587460562522377, "loss": 1.8302, "step": 34733 }, { "epoch": 1.16, "grad_norm": 0.4779030382633209, "learning_rate": 0.00040586482598325817, "loss": 1.7796, "step": 34734 }, { "epoch": 1.16, "grad_norm": 0.47806769609451294, "learning_rate": 0.0004058550462127883, "loss": 1.8433, "step": 34735 }, { "epoch": 1.16, "grad_norm": 0.47263434529304504, "learning_rate": 0.000405845266313826, "loss": 1.8102, "step": 34736 }, { "epoch": 1.16, "grad_norm": 0.4558279812335968, "learning_rate": 0.0004058354862863832, "loss": 1.8318, "step": 34737 }, { "epoch": 1.16, "grad_norm": 0.47104957699775696, "learning_rate": 0.00040582570613047165, "loss": 1.8085, "step": 34738 }, { "epoch": 1.16, "grad_norm": 0.4531162977218628, "learning_rate": 0.00040581592584610325, "loss": 1.7372, "step": 34739 }, { "epoch": 1.16, "grad_norm": 0.4692573845386505, "learning_rate": 0.0004058061454332899, "loss": 1.8398, "step": 34740 }, { "epoch": 1.16, "grad_norm": 0.46624886989593506, "learning_rate": 0.00040579636489204354, "loss": 1.7681, "step": 34741 }, { "epoch": 1.16, "grad_norm": 0.48627471923828125, "learning_rate": 0.00040578658422237604, "loss": 1.8197, "step": 34742 }, { "epoch": 1.16, "grad_norm": 0.4787397086620331, "learning_rate": 0.0004057768034242991, "loss": 1.7625, "step": 34743 }, { "epoch": 1.16, "grad_norm": 0.4597136378288269, "learning_rate": 0.00040576702249782475, "loss": 1.8169, "step": 34744 }, { "epoch": 1.16, "grad_norm": 0.4614194631576538, "learning_rate": 0.00040575724144296477, "loss": 1.7953, "step": 34745 }, { "epoch": 1.16, "grad_norm": 0.473267525434494, "learning_rate": 0.00040574746025973113, "loss": 1.772, "step": 34746 }, { "epoch": 1.16, "grad_norm": 0.46053025126457214, "learning_rate": 0.00040573767894813565, "loss": 1.7827, "step": 34747 }, { "epoch": 1.16, "grad_norm": 0.49341168999671936, "learning_rate": 0.00040572789750819026, "loss": 1.7754, "step": 34748 }, { "epoch": 1.16, "grad_norm": 0.4585132300853729, "learning_rate": 0.0004057181159399067, "loss": 1.7419, "step": 34749 }, { "epoch": 1.16, "grad_norm": 0.4617500305175781, "learning_rate": 0.000405708334243297, "loss": 1.77, "step": 34750 }, { "epoch": 1.16, "grad_norm": 0.47378963232040405, "learning_rate": 0.0004056985524183729, "loss": 1.8283, "step": 34751 }, { "epoch": 1.16, "grad_norm": 0.47643935680389404, "learning_rate": 0.0004056887704651463, "loss": 1.8505, "step": 34752 }, { "epoch": 1.16, "grad_norm": 0.4776317775249481, "learning_rate": 0.0004056789883836292, "loss": 1.8002, "step": 34753 }, { "epoch": 1.16, "grad_norm": 0.46361324191093445, "learning_rate": 0.00040566920617383326, "loss": 1.7476, "step": 34754 }, { "epoch": 1.16, "grad_norm": 0.46945133805274963, "learning_rate": 0.00040565942383577057, "loss": 1.8295, "step": 34755 }, { "epoch": 1.16, "grad_norm": 0.48311012983322144, "learning_rate": 0.0004056496413694529, "loss": 1.7512, "step": 34756 }, { "epoch": 1.16, "grad_norm": 0.4715287387371063, "learning_rate": 0.00040563985877489203, "loss": 1.7959, "step": 34757 }, { "epoch": 1.16, "grad_norm": 0.47586387395858765, "learning_rate": 0.00040563007605210014, "loss": 1.8106, "step": 34758 }, { "epoch": 1.16, "grad_norm": 0.46337005496025085, "learning_rate": 0.00040562029320108866, "loss": 1.6969, "step": 34759 }, { "epoch": 1.16, "grad_norm": 0.4651283323764801, "learning_rate": 0.00040561051022186995, "loss": 1.7696, "step": 34760 }, { "epoch": 1.16, "grad_norm": 0.49487942457199097, "learning_rate": 0.0004056007271144554, "loss": 1.8264, "step": 34761 }, { "epoch": 1.16, "grad_norm": 0.4745905101299286, "learning_rate": 0.00040559094387885734, "loss": 1.7917, "step": 34762 }, { "epoch": 1.16, "grad_norm": 0.46970561146736145, "learning_rate": 0.0004055811605150873, "loss": 1.7762, "step": 34763 }, { "epoch": 1.16, "grad_norm": 0.47393909096717834, "learning_rate": 0.0004055713770231573, "loss": 1.7873, "step": 34764 }, { "epoch": 1.16, "grad_norm": 0.4703986644744873, "learning_rate": 0.00040556159340307925, "loss": 1.7355, "step": 34765 }, { "epoch": 1.16, "grad_norm": 0.472128301858902, "learning_rate": 0.0004055518096548649, "loss": 1.7745, "step": 34766 }, { "epoch": 1.16, "grad_norm": 0.46028926968574524, "learning_rate": 0.00040554202577852634, "loss": 1.8364, "step": 34767 }, { "epoch": 1.16, "grad_norm": 0.46751904487609863, "learning_rate": 0.00040553224177407516, "loss": 1.6989, "step": 34768 }, { "epoch": 1.16, "grad_norm": 0.479261577129364, "learning_rate": 0.0004055224576415235, "loss": 1.7173, "step": 34769 }, { "epoch": 1.16, "grad_norm": 0.47044289112091064, "learning_rate": 0.0004055126733808831, "loss": 1.7701, "step": 34770 }, { "epoch": 1.16, "grad_norm": 0.48103344440460205, "learning_rate": 0.0004055028889921658, "loss": 1.8369, "step": 34771 }, { "epoch": 1.16, "grad_norm": 0.48753246665000916, "learning_rate": 0.00040549310447538363, "loss": 1.7406, "step": 34772 }, { "epoch": 1.16, "grad_norm": 0.4772174060344696, "learning_rate": 0.00040548331983054827, "loss": 1.7309, "step": 34773 }, { "epoch": 1.16, "grad_norm": 0.4839661717414856, "learning_rate": 0.0004054735350576718, "loss": 1.8319, "step": 34774 }, { "epoch": 1.16, "grad_norm": 0.4828116297721863, "learning_rate": 0.000405463750156766, "loss": 1.6995, "step": 34775 }, { "epoch": 1.16, "grad_norm": 0.47164687514305115, "learning_rate": 0.00040545396512784267, "loss": 1.6776, "step": 34776 }, { "epoch": 1.16, "grad_norm": 0.4665945768356323, "learning_rate": 0.00040544417997091374, "loss": 1.7637, "step": 34777 }, { "epoch": 1.16, "grad_norm": 0.4636293351650238, "learning_rate": 0.0004054343946859912, "loss": 1.7736, "step": 34778 }, { "epoch": 1.16, "grad_norm": 0.4609571099281311, "learning_rate": 0.0004054246092730869, "loss": 1.8077, "step": 34779 }, { "epoch": 1.16, "grad_norm": 0.47123709321022034, "learning_rate": 0.00040541482373221253, "loss": 1.8035, "step": 34780 }, { "epoch": 1.16, "grad_norm": 0.45474234223365784, "learning_rate": 0.00040540503806338015, "loss": 1.7737, "step": 34781 }, { "epoch": 1.16, "grad_norm": 0.46214187145233154, "learning_rate": 0.0004053952522666015, "loss": 1.6981, "step": 34782 }, { "epoch": 1.16, "grad_norm": 0.4803635776042938, "learning_rate": 0.00040538546634188863, "loss": 1.8404, "step": 34783 }, { "epoch": 1.16, "grad_norm": 0.45383188128471375, "learning_rate": 0.0004053756802892533, "loss": 1.8124, "step": 34784 }, { "epoch": 1.16, "grad_norm": 0.6952481865882874, "learning_rate": 0.0004053658941087075, "loss": 1.8198, "step": 34785 }, { "epoch": 1.16, "grad_norm": 0.4688918888568878, "learning_rate": 0.00040535610780026297, "loss": 1.8854, "step": 34786 }, { "epoch": 1.16, "grad_norm": 0.47138795256614685, "learning_rate": 0.0004053463213639316, "loss": 1.7986, "step": 34787 }, { "epoch": 1.16, "grad_norm": 0.4967191815376282, "learning_rate": 0.00040533653479972535, "loss": 1.7937, "step": 34788 }, { "epoch": 1.16, "grad_norm": 0.4689386188983917, "learning_rate": 0.0004053267481076561, "loss": 1.7539, "step": 34789 }, { "epoch": 1.16, "grad_norm": 0.4745368957519531, "learning_rate": 0.00040531696128773574, "loss": 1.7531, "step": 34790 }, { "epoch": 1.16, "grad_norm": 0.46881693601608276, "learning_rate": 0.00040530717433997593, "loss": 1.813, "step": 34791 }, { "epoch": 1.16, "grad_norm": 0.4691465198993683, "learning_rate": 0.0004052973872643889, "loss": 1.7826, "step": 34792 }, { "epoch": 1.16, "grad_norm": 0.4755455255508423, "learning_rate": 0.00040528760006098625, "loss": 1.8543, "step": 34793 }, { "epoch": 1.16, "grad_norm": 0.4574713706970215, "learning_rate": 0.00040527781272978007, "loss": 1.7435, "step": 34794 }, { "epoch": 1.16, "grad_norm": 0.4779086709022522, "learning_rate": 0.00040526802527078207, "loss": 1.852, "step": 34795 }, { "epoch": 1.16, "grad_norm": 1.617719054222107, "learning_rate": 0.0004052582376840042, "loss": 1.8466, "step": 34796 }, { "epoch": 1.16, "grad_norm": 0.4689602255821228, "learning_rate": 0.0004052484499694584, "loss": 1.8141, "step": 34797 }, { "epoch": 1.16, "grad_norm": 0.49540501832962036, "learning_rate": 0.00040523866212715637, "loss": 1.7456, "step": 34798 }, { "epoch": 1.16, "grad_norm": 0.47899481654167175, "learning_rate": 0.00040522887415711024, "loss": 1.7616, "step": 34799 }, { "epoch": 1.16, "grad_norm": 0.4694462716579437, "learning_rate": 0.0004052190860593317, "loss": 1.8041, "step": 34800 }, { "epoch": 1.16, "grad_norm": 0.4657289981842041, "learning_rate": 0.0004052092978338327, "loss": 1.7317, "step": 34801 }, { "epoch": 1.16, "grad_norm": 0.47831442952156067, "learning_rate": 0.00040519950948062503, "loss": 1.7857, "step": 34802 }, { "epoch": 1.16, "grad_norm": 0.4867649972438812, "learning_rate": 0.0004051897209997208, "loss": 1.8062, "step": 34803 }, { "epoch": 1.16, "grad_norm": 0.45505961775779724, "learning_rate": 0.0004051799323911317, "loss": 1.7869, "step": 34804 }, { "epoch": 1.16, "grad_norm": 0.46548664569854736, "learning_rate": 0.00040517014365486963, "loss": 1.7734, "step": 34805 }, { "epoch": 1.16, "grad_norm": 1.2386406660079956, "learning_rate": 0.00040516035479094646, "loss": 1.8505, "step": 34806 }, { "epoch": 1.16, "grad_norm": 0.4593353271484375, "learning_rate": 0.0004051505657993742, "loss": 1.7283, "step": 34807 }, { "epoch": 1.16, "grad_norm": 0.48076215386390686, "learning_rate": 0.00040514077668016453, "loss": 1.8789, "step": 34808 }, { "epoch": 1.16, "grad_norm": 0.44782310724258423, "learning_rate": 0.00040513098743332957, "loss": 1.7226, "step": 34809 }, { "epoch": 1.16, "grad_norm": 0.4583984613418579, "learning_rate": 0.0004051211980588811, "loss": 1.7843, "step": 34810 }, { "epoch": 1.16, "grad_norm": 0.5024971961975098, "learning_rate": 0.0004051114085568309, "loss": 1.7957, "step": 34811 }, { "epoch": 1.16, "grad_norm": 0.8627966046333313, "learning_rate": 0.00040510161892719096, "loss": 1.7868, "step": 34812 }, { "epoch": 1.16, "grad_norm": 0.44976553320884705, "learning_rate": 0.00040509182916997314, "loss": 1.7751, "step": 34813 }, { "epoch": 1.16, "grad_norm": 0.48314711451530457, "learning_rate": 0.0004050820392851893, "loss": 1.7914, "step": 34814 }, { "epoch": 1.16, "grad_norm": 0.4618894159793854, "learning_rate": 0.0004050722492728514, "loss": 1.8012, "step": 34815 }, { "epoch": 1.16, "grad_norm": 0.46958252787590027, "learning_rate": 0.00040506245913297126, "loss": 1.811, "step": 34816 }, { "epoch": 1.16, "grad_norm": 0.47210389375686646, "learning_rate": 0.00040505266886556074, "loss": 1.7754, "step": 34817 }, { "epoch": 1.16, "grad_norm": 0.4696785807609558, "learning_rate": 0.00040504287847063184, "loss": 1.7901, "step": 34818 }, { "epoch": 1.16, "grad_norm": 0.47780588269233704, "learning_rate": 0.00040503308794819626, "loss": 1.7257, "step": 34819 }, { "epoch": 1.16, "grad_norm": 0.46230337023735046, "learning_rate": 0.0004050232972982661, "loss": 1.8057, "step": 34820 }, { "epoch": 1.16, "grad_norm": 0.464870810508728, "learning_rate": 0.00040501350652085295, "loss": 1.7819, "step": 34821 }, { "epoch": 1.16, "grad_norm": 0.5915570259094238, "learning_rate": 0.000405003715615969, "loss": 1.8432, "step": 34822 }, { "epoch": 1.16, "grad_norm": 0.4590514302253723, "learning_rate": 0.000404993924583626, "loss": 1.8476, "step": 34823 }, { "epoch": 1.16, "grad_norm": 0.4718479514122009, "learning_rate": 0.0004049841334238359, "loss": 1.8247, "step": 34824 }, { "epoch": 1.16, "grad_norm": 0.48567819595336914, "learning_rate": 0.00040497434213661044, "loss": 1.7344, "step": 34825 }, { "epoch": 1.16, "grad_norm": 0.47663193941116333, "learning_rate": 0.00040496455072196164, "loss": 1.8049, "step": 34826 }, { "epoch": 1.16, "grad_norm": 0.4627857506275177, "learning_rate": 0.00040495475917990136, "loss": 1.7501, "step": 34827 }, { "epoch": 1.16, "grad_norm": 0.4658065438270569, "learning_rate": 0.0004049449675104414, "loss": 1.847, "step": 34828 }, { "epoch": 1.16, "grad_norm": 0.4678814113140106, "learning_rate": 0.0004049351757135938, "loss": 1.8083, "step": 34829 }, { "epoch": 1.16, "grad_norm": 0.4707559645175934, "learning_rate": 0.00040492538378937034, "loss": 1.7694, "step": 34830 }, { "epoch": 1.16, "grad_norm": 0.47568604350090027, "learning_rate": 0.0004049155917377828, "loss": 1.7519, "step": 34831 }, { "epoch": 1.16, "grad_norm": 0.46711382269859314, "learning_rate": 0.0004049057995588433, "loss": 1.8145, "step": 34832 }, { "epoch": 1.16, "grad_norm": 0.48184502124786377, "learning_rate": 0.0004048960072525636, "loss": 1.8258, "step": 34833 }, { "epoch": 1.16, "grad_norm": 0.4560844302177429, "learning_rate": 0.0004048862148189557, "loss": 1.7475, "step": 34834 }, { "epoch": 1.16, "grad_norm": 0.4925590753555298, "learning_rate": 0.0004048764222580312, "loss": 1.8204, "step": 34835 }, { "epoch": 1.16, "grad_norm": 0.4769730865955353, "learning_rate": 0.00040486662956980225, "loss": 1.8104, "step": 34836 }, { "epoch": 1.16, "grad_norm": 0.4589824080467224, "learning_rate": 0.0004048568367542807, "loss": 1.7687, "step": 34837 }, { "epoch": 1.16, "grad_norm": 0.4974708557128906, "learning_rate": 0.0004048470438114784, "loss": 1.7424, "step": 34838 }, { "epoch": 1.16, "grad_norm": 0.47313326597213745, "learning_rate": 0.00040483725074140725, "loss": 1.7396, "step": 34839 }, { "epoch": 1.16, "grad_norm": 0.4702439606189728, "learning_rate": 0.0004048274575440791, "loss": 1.7922, "step": 34840 }, { "epoch": 1.16, "grad_norm": 0.4606429934501648, "learning_rate": 0.00040481766421950586, "loss": 1.7222, "step": 34841 }, { "epoch": 1.16, "grad_norm": 0.46069198846817017, "learning_rate": 0.00040480787076769935, "loss": 1.7742, "step": 34842 }, { "epoch": 1.16, "grad_norm": 0.45236867666244507, "learning_rate": 0.0004047980771886716, "loss": 1.7965, "step": 34843 }, { "epoch": 1.16, "grad_norm": 0.4853399991989136, "learning_rate": 0.0004047882834824344, "loss": 1.7994, "step": 34844 }, { "epoch": 1.16, "grad_norm": 0.4484521448612213, "learning_rate": 0.00040477848964899974, "loss": 1.6391, "step": 34845 }, { "epoch": 1.16, "grad_norm": 0.45568788051605225, "learning_rate": 0.0004047686956883793, "loss": 1.8291, "step": 34846 }, { "epoch": 1.16, "grad_norm": 0.4671619236469269, "learning_rate": 0.0004047589016005853, "loss": 1.7858, "step": 34847 }, { "epoch": 1.16, "grad_norm": 0.46966421604156494, "learning_rate": 0.0004047491073856292, "loss": 1.7118, "step": 34848 }, { "epoch": 1.16, "grad_norm": 0.49499279260635376, "learning_rate": 0.0004047393130435233, "loss": 1.8234, "step": 34849 }, { "epoch": 1.16, "grad_norm": 0.47482728958129883, "learning_rate": 0.0004047295185742792, "loss": 1.7909, "step": 34850 }, { "epoch": 1.16, "grad_norm": 0.46238619089126587, "learning_rate": 0.0004047197239779089, "loss": 1.8056, "step": 34851 }, { "epoch": 1.16, "grad_norm": 0.48086017370224, "learning_rate": 0.00040470992925442434, "loss": 1.7835, "step": 34852 }, { "epoch": 1.16, "grad_norm": 0.4600681662559509, "learning_rate": 0.00040470013440383726, "loss": 1.785, "step": 34853 }, { "epoch": 1.16, "grad_norm": 0.46332070231437683, "learning_rate": 0.00040469033942615974, "loss": 1.7879, "step": 34854 }, { "epoch": 1.16, "grad_norm": 0.47967612743377686, "learning_rate": 0.0004046805443214036, "loss": 1.8029, "step": 34855 }, { "epoch": 1.16, "grad_norm": 0.4870794713497162, "learning_rate": 0.0004046707490895806, "loss": 1.8058, "step": 34856 }, { "epoch": 1.16, "grad_norm": 0.4685032367706299, "learning_rate": 0.0004046609537307027, "loss": 1.8534, "step": 34857 }, { "epoch": 1.16, "grad_norm": 0.46999630331993103, "learning_rate": 0.00040465115824478193, "loss": 1.8223, "step": 34858 }, { "epoch": 1.16, "grad_norm": 0.47585007548332214, "learning_rate": 0.00040464136263183007, "loss": 1.7903, "step": 34859 }, { "epoch": 1.16, "grad_norm": 0.48299482464790344, "learning_rate": 0.00040463156689185905, "loss": 1.7848, "step": 34860 }, { "epoch": 1.16, "grad_norm": 0.46829208731651306, "learning_rate": 0.00040462177102488063, "loss": 1.8396, "step": 34861 }, { "epoch": 1.16, "grad_norm": 0.503980815410614, "learning_rate": 0.0004046119750309068, "loss": 1.838, "step": 34862 }, { "epoch": 1.16, "grad_norm": 0.4537867605686188, "learning_rate": 0.00040460217890994946, "loss": 1.8166, "step": 34863 }, { "epoch": 1.16, "grad_norm": 0.4607459604740143, "learning_rate": 0.00040459238266202046, "loss": 1.7509, "step": 34864 }, { "epoch": 1.16, "grad_norm": 0.46443885564804077, "learning_rate": 0.0004045825862871318, "loss": 1.7285, "step": 34865 }, { "epoch": 1.16, "grad_norm": 0.4587363302707672, "learning_rate": 0.0004045727897852953, "loss": 1.799, "step": 34866 }, { "epoch": 1.16, "grad_norm": 0.47410663962364197, "learning_rate": 0.00040456299315652275, "loss": 1.7465, "step": 34867 }, { "epoch": 1.16, "grad_norm": 0.45636922121047974, "learning_rate": 0.00040455319640082615, "loss": 1.8314, "step": 34868 }, { "epoch": 1.16, "grad_norm": 0.7482149600982666, "learning_rate": 0.0004045433995182174, "loss": 1.7042, "step": 34869 }, { "epoch": 1.16, "grad_norm": 0.4848194718360901, "learning_rate": 0.0004045336025087084, "loss": 1.7667, "step": 34870 }, { "epoch": 1.16, "grad_norm": 0.4706619083881378, "learning_rate": 0.0004045238053723109, "loss": 1.792, "step": 34871 }, { "epoch": 1.16, "grad_norm": 0.45839667320251465, "learning_rate": 0.00040451400810903707, "loss": 1.7445, "step": 34872 }, { "epoch": 1.16, "grad_norm": 0.4878099262714386, "learning_rate": 0.0004045042107188985, "loss": 1.8258, "step": 34873 }, { "epoch": 1.16, "grad_norm": 0.49095991253852844, "learning_rate": 0.00040449441320190724, "loss": 1.7531, "step": 34874 }, { "epoch": 1.16, "grad_norm": 0.4770527482032776, "learning_rate": 0.00040448461555807525, "loss": 1.838, "step": 34875 }, { "epoch": 1.16, "grad_norm": 0.4946446120738983, "learning_rate": 0.0004044748177874142, "loss": 1.7748, "step": 34876 }, { "epoch": 1.16, "grad_norm": 0.4714754521846771, "learning_rate": 0.00040446501988993617, "loss": 1.6738, "step": 34877 }, { "epoch": 1.16, "grad_norm": 0.49205896258354187, "learning_rate": 0.00040445522186565293, "loss": 1.7674, "step": 34878 }, { "epoch": 1.16, "grad_norm": 0.5058404803276062, "learning_rate": 0.0004044454237145766, "loss": 1.8259, "step": 34879 }, { "epoch": 1.16, "grad_norm": 0.7121543288230896, "learning_rate": 0.00040443562543671877, "loss": 1.7953, "step": 34880 }, { "epoch": 1.16, "grad_norm": 0.46314236521720886, "learning_rate": 0.00040442582703209155, "loss": 1.8143, "step": 34881 }, { "epoch": 1.16, "grad_norm": 0.4702034592628479, "learning_rate": 0.00040441602850070673, "loss": 1.7246, "step": 34882 }, { "epoch": 1.16, "grad_norm": 0.46171683073043823, "learning_rate": 0.0004044062298425763, "loss": 1.8359, "step": 34883 }, { "epoch": 1.16, "grad_norm": 0.4967568516731262, "learning_rate": 0.00040439643105771205, "loss": 1.8218, "step": 34884 }, { "epoch": 1.16, "grad_norm": 0.4633060395717621, "learning_rate": 0.0004043866321461259, "loss": 1.8194, "step": 34885 }, { "epoch": 1.16, "grad_norm": 0.4863366484642029, "learning_rate": 0.00040437683310782976, "loss": 1.8027, "step": 34886 }, { "epoch": 1.16, "grad_norm": 0.4596962630748749, "learning_rate": 0.0004043670339428355, "loss": 1.7781, "step": 34887 }, { "epoch": 1.16, "grad_norm": 0.4778805375099182, "learning_rate": 0.0004043572346511551, "loss": 1.737, "step": 34888 }, { "epoch": 1.16, "grad_norm": 0.469808429479599, "learning_rate": 0.00040434743523280037, "loss": 1.7506, "step": 34889 }, { "epoch": 1.16, "grad_norm": 0.46829545497894287, "learning_rate": 0.0004043376356877832, "loss": 1.7399, "step": 34890 }, { "epoch": 1.16, "grad_norm": 0.4868527948856354, "learning_rate": 0.0004043278360161155, "loss": 1.7906, "step": 34891 }, { "epoch": 1.16, "grad_norm": 0.9462367296218872, "learning_rate": 0.0004043180362178092, "loss": 1.6613, "step": 34892 }, { "epoch": 1.16, "grad_norm": 0.45629826188087463, "learning_rate": 0.0004043082362928762, "loss": 1.7955, "step": 34893 }, { "epoch": 1.16, "grad_norm": 0.466508686542511, "learning_rate": 0.0004042984362413283, "loss": 1.7547, "step": 34894 }, { "epoch": 1.16, "grad_norm": 0.6569231748580933, "learning_rate": 0.0004042886360631776, "loss": 1.8189, "step": 34895 }, { "epoch": 1.16, "grad_norm": 0.4569057822227478, "learning_rate": 0.00040427883575843587, "loss": 1.8193, "step": 34896 }, { "epoch": 1.16, "grad_norm": 0.47640177607536316, "learning_rate": 0.00040426903532711484, "loss": 1.8094, "step": 34897 }, { "epoch": 1.16, "grad_norm": 0.4735139310359955, "learning_rate": 0.00040425923476922665, "loss": 1.7938, "step": 34898 }, { "epoch": 1.16, "grad_norm": 0.4665783941745758, "learning_rate": 0.00040424943408478306, "loss": 1.8527, "step": 34899 }, { "epoch": 1.16, "grad_norm": 0.4782714247703552, "learning_rate": 0.00040423963327379606, "loss": 1.7599, "step": 34900 }, { "epoch": 1.16, "grad_norm": 0.4819982647895813, "learning_rate": 0.0004042298323362775, "loss": 1.8322, "step": 34901 }, { "epoch": 1.16, "grad_norm": 0.4599266052246094, "learning_rate": 0.00040422003127223936, "loss": 1.7798, "step": 34902 }, { "epoch": 1.16, "grad_norm": 0.45775309205055237, "learning_rate": 0.0004042102300816933, "loss": 1.808, "step": 34903 }, { "epoch": 1.16, "grad_norm": 0.45030152797698975, "learning_rate": 0.00040420042876465145, "loss": 1.8792, "step": 34904 }, { "epoch": 1.16, "grad_norm": 0.47994542121887207, "learning_rate": 0.0004041906273211257, "loss": 1.812, "step": 34905 }, { "epoch": 1.16, "grad_norm": 0.4604097306728363, "learning_rate": 0.00040418082575112776, "loss": 1.7582, "step": 34906 }, { "epoch": 1.16, "grad_norm": 0.4826795756816864, "learning_rate": 0.00040417102405466976, "loss": 1.7703, "step": 34907 }, { "epoch": 1.16, "grad_norm": 0.4626551568508148, "learning_rate": 0.00040416122223176337, "loss": 1.7219, "step": 34908 }, { "epoch": 1.16, "grad_norm": 0.4835638105869293, "learning_rate": 0.0004041514202824207, "loss": 1.8297, "step": 34909 }, { "epoch": 1.16, "grad_norm": 0.49296924471855164, "learning_rate": 0.0004041416182066534, "loss": 1.7725, "step": 34910 }, { "epoch": 1.16, "grad_norm": 0.49560612440109253, "learning_rate": 0.00040413181600447375, "loss": 1.8862, "step": 34911 }, { "epoch": 1.16, "grad_norm": 0.4813913404941559, "learning_rate": 0.0004041220136758933, "loss": 1.757, "step": 34912 }, { "epoch": 1.16, "grad_norm": 0.4737117886543274, "learning_rate": 0.00040411221122092403, "loss": 1.7297, "step": 34913 }, { "epoch": 1.16, "grad_norm": 0.49130940437316895, "learning_rate": 0.000404102408639578, "loss": 1.7754, "step": 34914 }, { "epoch": 1.16, "grad_norm": 0.48309776186943054, "learning_rate": 0.00040409260593186685, "loss": 1.8277, "step": 34915 }, { "epoch": 1.16, "grad_norm": 0.47727110981941223, "learning_rate": 0.0004040828030978027, "loss": 1.8177, "step": 34916 }, { "epoch": 1.16, "grad_norm": 0.4517228305339813, "learning_rate": 0.00040407300013739733, "loss": 1.7871, "step": 34917 }, { "epoch": 1.16, "grad_norm": 0.45458024740219116, "learning_rate": 0.00040406319705066265, "loss": 1.7123, "step": 34918 }, { "epoch": 1.16, "grad_norm": 0.4567715525627136, "learning_rate": 0.00040405339383761056, "loss": 1.787, "step": 34919 }, { "epoch": 1.16, "grad_norm": 0.489689439535141, "learning_rate": 0.0004040435904982531, "loss": 1.7585, "step": 34920 }, { "epoch": 1.16, "grad_norm": 0.4696713387966156, "learning_rate": 0.00040403378703260194, "loss": 1.7982, "step": 34921 }, { "epoch": 1.16, "grad_norm": 0.4601752758026123, "learning_rate": 0.00040402398344066917, "loss": 1.8132, "step": 34922 }, { "epoch": 1.16, "grad_norm": 0.5959860682487488, "learning_rate": 0.0004040141797224666, "loss": 1.8385, "step": 34923 }, { "epoch": 1.16, "grad_norm": 0.46218252182006836, "learning_rate": 0.00040400437587800604, "loss": 1.7572, "step": 34924 }, { "epoch": 1.16, "grad_norm": 0.47461339831352234, "learning_rate": 0.0004039945719072996, "loss": 1.7882, "step": 34925 }, { "epoch": 1.16, "grad_norm": 0.4666425585746765, "learning_rate": 0.00040398476781035894, "loss": 1.7855, "step": 34926 }, { "epoch": 1.16, "grad_norm": 0.46843773126602173, "learning_rate": 0.0004039749635871963, "loss": 1.7813, "step": 34927 }, { "epoch": 1.16, "grad_norm": 0.45632433891296387, "learning_rate": 0.0004039651592378232, "loss": 1.7663, "step": 34928 }, { "epoch": 1.16, "grad_norm": 0.455290824174881, "learning_rate": 0.0004039553547622518, "loss": 1.7714, "step": 34929 }, { "epoch": 1.16, "grad_norm": 0.46190258860588074, "learning_rate": 0.0004039455501604939, "loss": 1.7597, "step": 34930 }, { "epoch": 1.16, "grad_norm": 0.4742599427700043, "learning_rate": 0.00040393574543256134, "loss": 1.8024, "step": 34931 }, { "epoch": 1.16, "grad_norm": 0.47208428382873535, "learning_rate": 0.00040392594057846626, "loss": 1.735, "step": 34932 }, { "epoch": 1.16, "grad_norm": 0.46590757369995117, "learning_rate": 0.00040391613559822025, "loss": 1.7614, "step": 34933 }, { "epoch": 1.16, "grad_norm": 0.4540350139141083, "learning_rate": 0.0004039063304918355, "loss": 1.7809, "step": 34934 }, { "epoch": 1.16, "grad_norm": 0.4521268904209137, "learning_rate": 0.00040389652525932366, "loss": 1.7853, "step": 34935 }, { "epoch": 1.16, "grad_norm": 0.48783475160598755, "learning_rate": 0.0004038867199006968, "loss": 1.77, "step": 34936 }, { "epoch": 1.16, "grad_norm": 0.47915565967559814, "learning_rate": 0.0004038769144159668, "loss": 1.8352, "step": 34937 }, { "epoch": 1.16, "grad_norm": 0.4926379919052124, "learning_rate": 0.00040386710880514537, "loss": 1.8234, "step": 34938 }, { "epoch": 1.16, "grad_norm": 0.4714483618736267, "learning_rate": 0.0004038573030682448, "loss": 1.7949, "step": 34939 }, { "epoch": 1.16, "grad_norm": 0.4767135977745056, "learning_rate": 0.0004038474972052766, "loss": 1.7874, "step": 34940 }, { "epoch": 1.16, "grad_norm": 0.4683248698711395, "learning_rate": 0.00040383769121625295, "loss": 1.6595, "step": 34941 }, { "epoch": 1.16, "grad_norm": 0.4693228304386139, "learning_rate": 0.0004038278851011856, "loss": 1.7862, "step": 34942 }, { "epoch": 1.16, "grad_norm": 0.45321592688560486, "learning_rate": 0.00040381807886008645, "loss": 1.799, "step": 34943 }, { "epoch": 1.16, "grad_norm": 0.4782331883907318, "learning_rate": 0.0004038082724929675, "loss": 1.7667, "step": 34944 }, { "epoch": 1.16, "grad_norm": 0.4744788110256195, "learning_rate": 0.0004037984659998406, "loss": 1.8149, "step": 34945 }, { "epoch": 1.16, "grad_norm": 0.5070743560791016, "learning_rate": 0.0004037886593807176, "loss": 1.8241, "step": 34946 }, { "epoch": 1.16, "grad_norm": 0.48153334856033325, "learning_rate": 0.0004037788526356105, "loss": 1.8095, "step": 34947 }, { "epoch": 1.16, "grad_norm": 0.449015349149704, "learning_rate": 0.00040376904576453116, "loss": 1.8043, "step": 34948 }, { "epoch": 1.16, "grad_norm": 0.46354708075523376, "learning_rate": 0.0004037592387674915, "loss": 1.8228, "step": 34949 }, { "epoch": 1.16, "grad_norm": 0.5047281384468079, "learning_rate": 0.0004037494316445034, "loss": 1.8182, "step": 34950 }, { "epoch": 1.16, "grad_norm": 0.4588017463684082, "learning_rate": 0.0004037396243955788, "loss": 1.8098, "step": 34951 }, { "epoch": 1.16, "grad_norm": 0.4708331227302551, "learning_rate": 0.0004037298170207296, "loss": 1.7831, "step": 34952 }, { "epoch": 1.16, "grad_norm": 0.4644322395324707, "learning_rate": 0.0004037200095199676, "loss": 1.8233, "step": 34953 }, { "epoch": 1.16, "grad_norm": 0.47942450642585754, "learning_rate": 0.00040371020189330486, "loss": 1.824, "step": 34954 }, { "epoch": 1.16, "grad_norm": 0.4687891900539398, "learning_rate": 0.0004037003941407532, "loss": 1.7144, "step": 34955 }, { "epoch": 1.16, "grad_norm": 0.48006975650787354, "learning_rate": 0.0004036905862623245, "loss": 1.8172, "step": 34956 }, { "epoch": 1.16, "grad_norm": 0.4822697937488556, "learning_rate": 0.00040368077825803084, "loss": 1.8029, "step": 34957 }, { "epoch": 1.16, "grad_norm": 0.5024785995483398, "learning_rate": 0.0004036709701278838, "loss": 1.8, "step": 34958 }, { "epoch": 1.16, "grad_norm": 0.46631327271461487, "learning_rate": 0.00040366116187189564, "loss": 1.8129, "step": 34959 }, { "epoch": 1.16, "grad_norm": 0.4685317575931549, "learning_rate": 0.000403651353490078, "loss": 1.75, "step": 34960 }, { "epoch": 1.16, "grad_norm": 0.4586758613586426, "learning_rate": 0.0004036415449824429, "loss": 1.7644, "step": 34961 }, { "epoch": 1.16, "grad_norm": 0.4645809531211853, "learning_rate": 0.0004036317363490024, "loss": 1.8014, "step": 34962 }, { "epoch": 1.16, "grad_norm": 0.46647316217422485, "learning_rate": 0.00040362192758976797, "loss": 1.7457, "step": 34963 }, { "epoch": 1.16, "grad_norm": 0.47516751289367676, "learning_rate": 0.000403612118704752, "loss": 1.7835, "step": 34964 }, { "epoch": 1.16, "grad_norm": 0.4632304310798645, "learning_rate": 0.00040360230969396603, "loss": 1.7639, "step": 34965 }, { "epoch": 1.16, "grad_norm": 0.46738746762275696, "learning_rate": 0.00040359250055742226, "loss": 1.7279, "step": 34966 }, { "epoch": 1.16, "grad_norm": 0.4719507694244385, "learning_rate": 0.00040358269129513234, "loss": 1.8129, "step": 34967 }, { "epoch": 1.16, "grad_norm": 0.46879932284355164, "learning_rate": 0.00040357288190710837, "loss": 1.8288, "step": 34968 }, { "epoch": 1.16, "grad_norm": 0.46779534220695496, "learning_rate": 0.0004035630723933622, "loss": 1.7399, "step": 34969 }, { "epoch": 1.16, "grad_norm": 0.4707915484905243, "learning_rate": 0.0004035532627539057, "loss": 1.78, "step": 34970 }, { "epoch": 1.16, "grad_norm": 0.476887047290802, "learning_rate": 0.00040354345298875075, "loss": 1.8523, "step": 34971 }, { "epoch": 1.16, "grad_norm": 0.4741533398628235, "learning_rate": 0.0004035336430979093, "loss": 1.7873, "step": 34972 }, { "epoch": 1.16, "grad_norm": 0.4618941843509674, "learning_rate": 0.0004035238330813933, "loss": 1.7375, "step": 34973 }, { "epoch": 1.16, "grad_norm": 0.48260435461997986, "learning_rate": 0.00040351402293921465, "loss": 1.7781, "step": 34974 }, { "epoch": 1.16, "grad_norm": 0.7158647775650024, "learning_rate": 0.00040350421267138517, "loss": 1.8218, "step": 34975 }, { "epoch": 1.16, "grad_norm": 0.49128833413124084, "learning_rate": 0.0004034944022779169, "loss": 1.7223, "step": 34976 }, { "epoch": 1.16, "grad_norm": 0.4488687217235565, "learning_rate": 0.0004034845917588216, "loss": 1.7642, "step": 34977 }, { "epoch": 1.16, "grad_norm": 0.47177913784980774, "learning_rate": 0.00040347478111411124, "loss": 1.7976, "step": 34978 }, { "epoch": 1.16, "grad_norm": 0.4706138074398041, "learning_rate": 0.00040346497034379785, "loss": 1.8038, "step": 34979 }, { "epoch": 1.16, "grad_norm": 0.4819718599319458, "learning_rate": 0.00040345515944789314, "loss": 1.8326, "step": 34980 }, { "epoch": 1.16, "grad_norm": 0.4859044551849365, "learning_rate": 0.00040344534842640914, "loss": 1.6705, "step": 34981 }, { "epoch": 1.16, "grad_norm": 0.45230045914649963, "learning_rate": 0.0004034355372793577, "loss": 1.7835, "step": 34982 }, { "epoch": 1.16, "grad_norm": 0.4809471666812897, "learning_rate": 0.00040342572600675076, "loss": 1.8, "step": 34983 }, { "epoch": 1.16, "grad_norm": 0.4599549472332001, "learning_rate": 0.00040341591460860024, "loss": 1.8084, "step": 34984 }, { "epoch": 1.16, "grad_norm": 0.47814279794692993, "learning_rate": 0.0004034061030849181, "loss": 1.7488, "step": 34985 }, { "epoch": 1.16, "grad_norm": 0.4766179025173187, "learning_rate": 0.0004033962914357161, "loss": 1.8189, "step": 34986 }, { "epoch": 1.16, "grad_norm": 0.4957023561000824, "learning_rate": 0.0004033864796610063, "loss": 1.8109, "step": 34987 }, { "epoch": 1.16, "grad_norm": 0.4558807611465454, "learning_rate": 0.0004033766677608005, "loss": 1.7522, "step": 34988 }, { "epoch": 1.16, "grad_norm": 0.45405787229537964, "learning_rate": 0.0004033668557351107, "loss": 1.7412, "step": 34989 }, { "epoch": 1.16, "grad_norm": 0.46976393461227417, "learning_rate": 0.00040335704358394875, "loss": 1.7773, "step": 34990 }, { "epoch": 1.16, "grad_norm": 0.4714083671569824, "learning_rate": 0.0004033472313073265, "loss": 1.8377, "step": 34991 }, { "epoch": 1.16, "grad_norm": 0.47650161385536194, "learning_rate": 0.0004033374189052561, "loss": 1.7542, "step": 34992 }, { "epoch": 1.16, "grad_norm": 0.47141286730766296, "learning_rate": 0.0004033276063777491, "loss": 1.803, "step": 34993 }, { "epoch": 1.16, "grad_norm": 0.47874829173088074, "learning_rate": 0.00040331779372481774, "loss": 1.7509, "step": 34994 }, { "epoch": 1.16, "grad_norm": 0.504396915435791, "learning_rate": 0.00040330798094647373, "loss": 1.8484, "step": 34995 }, { "epoch": 1.16, "grad_norm": 0.5038931965827942, "learning_rate": 0.00040329816804272916, "loss": 1.797, "step": 34996 }, { "epoch": 1.16, "grad_norm": 0.4650152921676636, "learning_rate": 0.00040328835501359574, "loss": 1.8231, "step": 34997 }, { "epoch": 1.16, "grad_norm": 0.47030210494995117, "learning_rate": 0.0004032785418590855, "loss": 1.7528, "step": 34998 }, { "epoch": 1.16, "grad_norm": 0.4680479168891907, "learning_rate": 0.0004032687285792104, "loss": 1.7716, "step": 34999 }, { "epoch": 1.16, "grad_norm": 0.46018514037132263, "learning_rate": 0.00040325891517398215, "loss": 1.8087, "step": 35000 }, { "epoch": 1.16, "grad_norm": 0.46654850244522095, "learning_rate": 0.0004032491016434129, "loss": 1.7541, "step": 35001 }, { "epoch": 1.16, "grad_norm": 0.4738897383213043, "learning_rate": 0.00040323928798751443, "loss": 1.8457, "step": 35002 }, { "epoch": 1.16, "grad_norm": 0.47523626685142517, "learning_rate": 0.0004032294742062987, "loss": 1.7917, "step": 35003 }, { "epoch": 1.16, "grad_norm": 0.4731631875038147, "learning_rate": 0.0004032196602997775, "loss": 1.8278, "step": 35004 }, { "epoch": 1.16, "grad_norm": 0.44910526275634766, "learning_rate": 0.0004032098462679629, "loss": 1.7114, "step": 35005 }, { "epoch": 1.16, "grad_norm": 0.4676748514175415, "learning_rate": 0.0004032000321108668, "loss": 1.6728, "step": 35006 }, { "epoch": 1.16, "grad_norm": 0.46772709488868713, "learning_rate": 0.00040319021782850105, "loss": 1.7926, "step": 35007 }, { "epoch": 1.16, "grad_norm": 0.4784427285194397, "learning_rate": 0.0004031804034208775, "loss": 1.7677, "step": 35008 }, { "epoch": 1.16, "grad_norm": 0.49869170784950256, "learning_rate": 0.00040317058888800824, "loss": 1.8249, "step": 35009 }, { "epoch": 1.16, "grad_norm": 0.5019962191581726, "learning_rate": 0.00040316077422990497, "loss": 1.769, "step": 35010 }, { "epoch": 1.16, "grad_norm": 0.4710390269756317, "learning_rate": 0.00040315095944657976, "loss": 1.8572, "step": 35011 }, { "epoch": 1.16, "grad_norm": 0.49446338415145874, "learning_rate": 0.0004031411445380446, "loss": 1.7975, "step": 35012 }, { "epoch": 1.16, "grad_norm": 0.5097395777702332, "learning_rate": 0.0004031313295043112, "loss": 1.696, "step": 35013 }, { "epoch": 1.16, "grad_norm": 0.4878588914871216, "learning_rate": 0.0004031215143453916, "loss": 1.8283, "step": 35014 }, { "epoch": 1.16, "grad_norm": 0.4642111361026764, "learning_rate": 0.0004031116990612975, "loss": 1.7985, "step": 35015 }, { "epoch": 1.16, "grad_norm": 0.48377329111099243, "learning_rate": 0.0004031018836520412, "loss": 1.823, "step": 35016 }, { "epoch": 1.17, "grad_norm": 0.49777039885520935, "learning_rate": 0.0004030920681176343, "loss": 1.7047, "step": 35017 }, { "epoch": 1.17, "grad_norm": 0.4918796420097351, "learning_rate": 0.00040308225245808886, "loss": 1.8175, "step": 35018 }, { "epoch": 1.17, "grad_norm": 0.4930914044380188, "learning_rate": 0.00040307243667341675, "loss": 1.7164, "step": 35019 }, { "epoch": 1.17, "grad_norm": 0.5455521941184998, "learning_rate": 0.00040306262076362983, "loss": 1.904, "step": 35020 }, { "epoch": 1.17, "grad_norm": 0.485017329454422, "learning_rate": 0.0004030528047287402, "loss": 1.8224, "step": 35021 }, { "epoch": 1.17, "grad_norm": 0.5113767981529236, "learning_rate": 0.00040304298856875956, "loss": 1.7548, "step": 35022 }, { "epoch": 1.17, "grad_norm": 0.46838998794555664, "learning_rate": 0.0004030331722836999, "loss": 1.7437, "step": 35023 }, { "epoch": 1.17, "grad_norm": 0.47243374586105347, "learning_rate": 0.00040302335587357325, "loss": 1.8216, "step": 35024 }, { "epoch": 1.17, "grad_norm": 0.48166927695274353, "learning_rate": 0.0004030135393383913, "loss": 1.788, "step": 35025 }, { "epoch": 1.17, "grad_norm": 0.4946063756942749, "learning_rate": 0.0004030037226781662, "loss": 1.8049, "step": 35026 }, { "epoch": 1.17, "grad_norm": 0.47505664825439453, "learning_rate": 0.00040299390589290966, "loss": 1.7562, "step": 35027 }, { "epoch": 1.17, "grad_norm": 0.46653372049331665, "learning_rate": 0.00040298408898263376, "loss": 1.7945, "step": 35028 }, { "epoch": 1.17, "grad_norm": 0.4792591631412506, "learning_rate": 0.00040297427194735033, "loss": 1.8477, "step": 35029 }, { "epoch": 1.17, "grad_norm": 0.46765875816345215, "learning_rate": 0.0004029644547870713, "loss": 1.7847, "step": 35030 }, { "epoch": 1.17, "grad_norm": 0.4659474194049835, "learning_rate": 0.00040295463750180863, "loss": 1.7359, "step": 35031 }, { "epoch": 1.17, "grad_norm": 0.4656596779823303, "learning_rate": 0.0004029448200915741, "loss": 1.867, "step": 35032 }, { "epoch": 1.17, "grad_norm": 0.4607127606868744, "learning_rate": 0.00040293500255637985, "loss": 1.7908, "step": 35033 }, { "epoch": 1.17, "grad_norm": 0.4863528311252594, "learning_rate": 0.00040292518489623764, "loss": 1.782, "step": 35034 }, { "epoch": 1.17, "grad_norm": 0.471489280462265, "learning_rate": 0.0004029153671111594, "loss": 1.7626, "step": 35035 }, { "epoch": 1.17, "grad_norm": 0.4752214550971985, "learning_rate": 0.00040290554920115705, "loss": 1.8222, "step": 35036 }, { "epoch": 1.17, "grad_norm": 0.47312912344932556, "learning_rate": 0.0004028957311662425, "loss": 1.8539, "step": 35037 }, { "epoch": 1.17, "grad_norm": 0.49206385016441345, "learning_rate": 0.0004028859130064278, "loss": 1.7564, "step": 35038 }, { "epoch": 1.17, "grad_norm": 0.4643622934818268, "learning_rate": 0.00040287609472172464, "loss": 1.7696, "step": 35039 }, { "epoch": 1.17, "grad_norm": 0.4621749818325043, "learning_rate": 0.0004028662763121452, "loss": 1.7956, "step": 35040 }, { "epoch": 1.17, "grad_norm": 0.47551605105400085, "learning_rate": 0.00040285645777770116, "loss": 1.7848, "step": 35041 }, { "epoch": 1.17, "grad_norm": 0.473204106092453, "learning_rate": 0.0004028466391184045, "loss": 1.7214, "step": 35042 }, { "epoch": 1.17, "grad_norm": 0.471591979265213, "learning_rate": 0.00040283682033426726, "loss": 1.8716, "step": 35043 }, { "epoch": 1.17, "grad_norm": 0.5610629320144653, "learning_rate": 0.0004028270014253013, "loss": 1.7483, "step": 35044 }, { "epoch": 1.17, "grad_norm": 0.4986432194709778, "learning_rate": 0.00040281718239151837, "loss": 1.7631, "step": 35045 }, { "epoch": 1.17, "grad_norm": 0.4750005900859833, "learning_rate": 0.00040280736323293064, "loss": 1.7141, "step": 35046 }, { "epoch": 1.17, "grad_norm": 0.4836359918117523, "learning_rate": 0.0004027975439495499, "loss": 1.7259, "step": 35047 }, { "epoch": 1.17, "grad_norm": 0.4723508358001709, "learning_rate": 0.00040278772454138803, "loss": 1.8036, "step": 35048 }, { "epoch": 1.17, "grad_norm": 0.46760305762290955, "learning_rate": 0.0004027779050084571, "loss": 1.8015, "step": 35049 }, { "epoch": 1.17, "grad_norm": 0.4662150740623474, "learning_rate": 0.0004027680853507688, "loss": 1.7406, "step": 35050 }, { "epoch": 1.17, "grad_norm": 0.4711318612098694, "learning_rate": 0.00040275826556833536, "loss": 1.7839, "step": 35051 }, { "epoch": 1.17, "grad_norm": 0.4694206416606903, "learning_rate": 0.00040274844566116837, "loss": 1.7834, "step": 35052 }, { "epoch": 1.17, "grad_norm": 0.5281035304069519, "learning_rate": 0.0004027386256292801, "loss": 1.9075, "step": 35053 }, { "epoch": 1.17, "grad_norm": 0.583614706993103, "learning_rate": 0.0004027288054726822, "loss": 1.8633, "step": 35054 }, { "epoch": 1.17, "grad_norm": 0.4685933291912079, "learning_rate": 0.0004027189851913865, "loss": 1.7453, "step": 35055 }, { "epoch": 1.17, "grad_norm": 0.46195700764656067, "learning_rate": 0.0004027091647854052, "loss": 1.7876, "step": 35056 }, { "epoch": 1.17, "grad_norm": 0.47096312046051025, "learning_rate": 0.0004026993442547501, "loss": 1.7848, "step": 35057 }, { "epoch": 1.17, "grad_norm": 0.44085896015167236, "learning_rate": 0.0004026895235994332, "loss": 1.769, "step": 35058 }, { "epoch": 1.17, "grad_norm": 0.45874693989753723, "learning_rate": 0.0004026797028194663, "loss": 1.7927, "step": 35059 }, { "epoch": 1.17, "grad_norm": 0.4596629738807678, "learning_rate": 0.0004026698819148613, "loss": 1.8616, "step": 35060 }, { "epoch": 1.17, "grad_norm": 0.4861976206302643, "learning_rate": 0.0004026600608856303, "loss": 1.8794, "step": 35061 }, { "epoch": 1.17, "grad_norm": 0.46396487951278687, "learning_rate": 0.000402650239731785, "loss": 1.7783, "step": 35062 }, { "epoch": 1.17, "grad_norm": 0.4580387771129608, "learning_rate": 0.0004026404184533376, "loss": 1.7855, "step": 35063 }, { "epoch": 1.17, "grad_norm": 0.45928749442100525, "learning_rate": 0.00040263059705029973, "loss": 1.7844, "step": 35064 }, { "epoch": 1.17, "grad_norm": 0.4590948522090912, "learning_rate": 0.00040262077552268347, "loss": 1.6958, "step": 35065 }, { "epoch": 1.17, "grad_norm": 0.46872010827064514, "learning_rate": 0.0004026109538705006, "loss": 1.7672, "step": 35066 }, { "epoch": 1.17, "grad_norm": 0.4538683593273163, "learning_rate": 0.0004026011320937633, "loss": 1.8027, "step": 35067 }, { "epoch": 1.17, "grad_norm": 0.46944013237953186, "learning_rate": 0.00040259131019248335, "loss": 1.8109, "step": 35068 }, { "epoch": 1.17, "grad_norm": 0.4710541367530823, "learning_rate": 0.0004025814881666726, "loss": 1.8076, "step": 35069 }, { "epoch": 1.17, "grad_norm": 0.4700142443180084, "learning_rate": 0.0004025716660163431, "loss": 1.7258, "step": 35070 }, { "epoch": 1.17, "grad_norm": 0.48277562856674194, "learning_rate": 0.0004025618437415066, "loss": 1.7206, "step": 35071 }, { "epoch": 1.17, "grad_norm": 0.47045376896858215, "learning_rate": 0.0004025520213421752, "loss": 1.8461, "step": 35072 }, { "epoch": 1.17, "grad_norm": 0.4863220453262329, "learning_rate": 0.00040254219881836074, "loss": 1.8015, "step": 35073 }, { "epoch": 1.17, "grad_norm": 0.47051313519477844, "learning_rate": 0.0004025323761700752, "loss": 1.7896, "step": 35074 }, { "epoch": 1.17, "grad_norm": 0.46086886525154114, "learning_rate": 0.00040252255339733045, "loss": 1.8724, "step": 35075 }, { "epoch": 1.17, "grad_norm": 0.47943827509880066, "learning_rate": 0.0004025127305001384, "loss": 1.7431, "step": 35076 }, { "epoch": 1.17, "grad_norm": 0.4830975830554962, "learning_rate": 0.000402502907478511, "loss": 1.6925, "step": 35077 }, { "epoch": 1.17, "grad_norm": 0.4706861674785614, "learning_rate": 0.00040249308433246015, "loss": 1.7724, "step": 35078 }, { "epoch": 1.17, "grad_norm": 0.4754936993122101, "learning_rate": 0.0004024832610619979, "loss": 1.8189, "step": 35079 }, { "epoch": 1.17, "grad_norm": 0.49419960379600525, "learning_rate": 0.00040247343766713597, "loss": 1.8432, "step": 35080 }, { "epoch": 1.17, "grad_norm": 0.4847210943698883, "learning_rate": 0.00040246361414788645, "loss": 1.8313, "step": 35081 }, { "epoch": 1.17, "grad_norm": 0.46832698583602905, "learning_rate": 0.00040245379050426105, "loss": 1.7967, "step": 35082 }, { "epoch": 1.17, "grad_norm": 0.48049551248550415, "learning_rate": 0.000402443966736272, "loss": 1.7641, "step": 35083 }, { "epoch": 1.17, "grad_norm": 0.4754337668418884, "learning_rate": 0.00040243414284393104, "loss": 1.7736, "step": 35084 }, { "epoch": 1.17, "grad_norm": 0.4835056960582733, "learning_rate": 0.0004024243188272501, "loss": 1.8291, "step": 35085 }, { "epoch": 1.17, "grad_norm": 0.46762287616729736, "learning_rate": 0.0004024144946862412, "loss": 1.8158, "step": 35086 }, { "epoch": 1.17, "grad_norm": 0.4719802439212799, "learning_rate": 0.0004024046704209161, "loss": 1.6875, "step": 35087 }, { "epoch": 1.17, "grad_norm": 0.4978612959384918, "learning_rate": 0.00040239484603128684, "loss": 1.7628, "step": 35088 }, { "epoch": 1.17, "grad_norm": 0.5249831676483154, "learning_rate": 0.0004023850215173654, "loss": 1.6596, "step": 35089 }, { "epoch": 1.17, "grad_norm": 0.4673682749271393, "learning_rate": 0.0004023751968791635, "loss": 1.7847, "step": 35090 }, { "epoch": 1.17, "grad_norm": 0.45895013213157654, "learning_rate": 0.00040236537211669327, "loss": 1.8209, "step": 35091 }, { "epoch": 1.17, "grad_norm": 0.5185467004776001, "learning_rate": 0.00040235554722996655, "loss": 1.7708, "step": 35092 }, { "epoch": 1.17, "grad_norm": 0.5265629291534424, "learning_rate": 0.0004023457222189953, "loss": 1.7821, "step": 35093 }, { "epoch": 1.17, "grad_norm": 0.4801540672779083, "learning_rate": 0.00040233589708379145, "loss": 1.7834, "step": 35094 }, { "epoch": 1.17, "grad_norm": 0.4770523011684418, "learning_rate": 0.00040232607182436676, "loss": 1.7883, "step": 35095 }, { "epoch": 1.17, "grad_norm": 0.48329707980155945, "learning_rate": 0.00040231624644073344, "loss": 1.8271, "step": 35096 }, { "epoch": 1.17, "grad_norm": 0.48310941457748413, "learning_rate": 0.0004023064209329032, "loss": 1.7127, "step": 35097 }, { "epoch": 1.17, "grad_norm": 0.4858996868133545, "learning_rate": 0.000402296595300888, "loss": 1.8603, "step": 35098 }, { "epoch": 1.17, "grad_norm": 0.4874180257320404, "learning_rate": 0.00040228676954469995, "loss": 1.8351, "step": 35099 }, { "epoch": 1.17, "grad_norm": 0.48794856667518616, "learning_rate": 0.0004022769436643508, "loss": 1.767, "step": 35100 }, { "epoch": 1.17, "grad_norm": 0.4792758524417877, "learning_rate": 0.0004022671176598524, "loss": 1.8088, "step": 35101 }, { "epoch": 1.17, "grad_norm": 0.47822442650794983, "learning_rate": 0.00040225729153121686, "loss": 1.8151, "step": 35102 }, { "epoch": 1.17, "grad_norm": 0.5095779299736023, "learning_rate": 0.00040224746527845595, "loss": 1.788, "step": 35103 }, { "epoch": 1.17, "grad_norm": 0.48024505376815796, "learning_rate": 0.00040223763890158187, "loss": 1.719, "step": 35104 }, { "epoch": 1.17, "grad_norm": 0.5021657943725586, "learning_rate": 0.0004022278124006062, "loss": 1.8042, "step": 35105 }, { "epoch": 1.17, "grad_norm": 0.4993748068809509, "learning_rate": 0.00040221798577554107, "loss": 1.6772, "step": 35106 }, { "epoch": 1.17, "grad_norm": 0.47801581025123596, "learning_rate": 0.0004022081590263984, "loss": 1.805, "step": 35107 }, { "epoch": 1.17, "grad_norm": 0.4634210169315338, "learning_rate": 0.00040219833215319003, "loss": 1.8095, "step": 35108 }, { "epoch": 1.17, "grad_norm": 0.48370978236198425, "learning_rate": 0.000402188505155928, "loss": 1.8644, "step": 35109 }, { "epoch": 1.17, "grad_norm": 0.5056909322738647, "learning_rate": 0.00040217867803462405, "loss": 1.7784, "step": 35110 }, { "epoch": 1.17, "grad_norm": 0.49502116441726685, "learning_rate": 0.0004021688507892904, "loss": 1.8166, "step": 35111 }, { "epoch": 1.17, "grad_norm": 0.49212244153022766, "learning_rate": 0.0004021590234199387, "loss": 1.7149, "step": 35112 }, { "epoch": 1.17, "grad_norm": 0.466105192899704, "learning_rate": 0.00040214919592658114, "loss": 1.843, "step": 35113 }, { "epoch": 1.17, "grad_norm": 0.49444639682769775, "learning_rate": 0.0004021393683092294, "loss": 1.7788, "step": 35114 }, { "epoch": 1.17, "grad_norm": 0.49561670422554016, "learning_rate": 0.00040212954056789555, "loss": 1.7968, "step": 35115 }, { "epoch": 1.17, "grad_norm": 0.46725839376449585, "learning_rate": 0.0004021197127025915, "loss": 1.7589, "step": 35116 }, { "epoch": 1.17, "grad_norm": 0.4768521189689636, "learning_rate": 0.0004021098847133291, "loss": 1.7539, "step": 35117 }, { "epoch": 1.17, "grad_norm": 0.4888289272785187, "learning_rate": 0.0004021000566001204, "loss": 1.8608, "step": 35118 }, { "epoch": 1.17, "grad_norm": 0.4659697413444519, "learning_rate": 0.00040209022836297726, "loss": 1.7773, "step": 35119 }, { "epoch": 1.17, "grad_norm": 0.46194759011268616, "learning_rate": 0.00040208040000191167, "loss": 1.8077, "step": 35120 }, { "epoch": 1.17, "grad_norm": 0.4841668903827667, "learning_rate": 0.0004020705715169354, "loss": 1.7271, "step": 35121 }, { "epoch": 1.17, "grad_norm": 0.4538845717906952, "learning_rate": 0.00040206074290806057, "loss": 1.7732, "step": 35122 }, { "epoch": 1.17, "grad_norm": 0.4906015694141388, "learning_rate": 0.00040205091417529903, "loss": 1.7486, "step": 35123 }, { "epoch": 1.17, "grad_norm": 0.4561018645763397, "learning_rate": 0.00040204108531866273, "loss": 1.7614, "step": 35124 }, { "epoch": 1.17, "grad_norm": 0.46669647097587585, "learning_rate": 0.0004020312563381636, "loss": 1.8246, "step": 35125 }, { "epoch": 1.17, "grad_norm": 0.4552628695964813, "learning_rate": 0.0004020214272338135, "loss": 1.8035, "step": 35126 }, { "epoch": 1.17, "grad_norm": 0.47187089920043945, "learning_rate": 0.0004020115980056245, "loss": 1.8234, "step": 35127 }, { "epoch": 1.17, "grad_norm": 0.4753265380859375, "learning_rate": 0.00040200176865360835, "loss": 1.8379, "step": 35128 }, { "epoch": 1.17, "grad_norm": 0.4606548845767975, "learning_rate": 0.0004019919391777772, "loss": 1.8203, "step": 35129 }, { "epoch": 1.17, "grad_norm": 0.45739811658859253, "learning_rate": 0.0004019821095781428, "loss": 1.7389, "step": 35130 }, { "epoch": 1.17, "grad_norm": 0.47336825728416443, "learning_rate": 0.00040197227985471715, "loss": 1.6998, "step": 35131 }, { "epoch": 1.17, "grad_norm": 0.47256433963775635, "learning_rate": 0.0004019624500075121, "loss": 1.7953, "step": 35132 }, { "epoch": 1.17, "grad_norm": 0.47215428948402405, "learning_rate": 0.00040195262003653976, "loss": 1.7919, "step": 35133 }, { "epoch": 1.17, "grad_norm": 0.4888516366481781, "learning_rate": 0.0004019427899418119, "loss": 1.7621, "step": 35134 }, { "epoch": 1.17, "grad_norm": 0.46856531500816345, "learning_rate": 0.00040193295972334054, "loss": 1.7455, "step": 35135 }, { "epoch": 1.17, "grad_norm": 0.46857088804244995, "learning_rate": 0.0004019231293811377, "loss": 1.7264, "step": 35136 }, { "epoch": 1.17, "grad_norm": 0.46388623118400574, "learning_rate": 0.000401913298915215, "loss": 1.828, "step": 35137 }, { "epoch": 1.17, "grad_norm": 0.4705638587474823, "learning_rate": 0.00040190346832558475, "loss": 1.8331, "step": 35138 }, { "epoch": 1.17, "grad_norm": 0.47108742594718933, "learning_rate": 0.0004018936376122585, "loss": 1.7704, "step": 35139 }, { "epoch": 1.17, "grad_norm": 0.46427199244499207, "learning_rate": 0.00040188380677524856, "loss": 1.8046, "step": 35140 }, { "epoch": 1.17, "grad_norm": 0.4832451045513153, "learning_rate": 0.0004018739758145667, "loss": 1.7394, "step": 35141 }, { "epoch": 1.17, "grad_norm": 0.47432345151901245, "learning_rate": 0.0004018641447302247, "loss": 1.8486, "step": 35142 }, { "epoch": 1.17, "grad_norm": 0.4744657576084137, "learning_rate": 0.00040185431352223475, "loss": 1.7767, "step": 35143 }, { "epoch": 1.17, "grad_norm": 0.4890771806240082, "learning_rate": 0.0004018444821906086, "loss": 1.7748, "step": 35144 }, { "epoch": 1.17, "grad_norm": 0.4819299876689911, "learning_rate": 0.0004018346507353583, "loss": 1.8006, "step": 35145 }, { "epoch": 1.17, "grad_norm": 0.48451489210128784, "learning_rate": 0.00040182481915649573, "loss": 1.8063, "step": 35146 }, { "epoch": 1.17, "grad_norm": 0.4830452799797058, "learning_rate": 0.00040181498745403283, "loss": 1.8295, "step": 35147 }, { "epoch": 1.17, "grad_norm": 0.4695342779159546, "learning_rate": 0.0004018051556279816, "loss": 1.9495, "step": 35148 }, { "epoch": 1.17, "grad_norm": 0.49505648016929626, "learning_rate": 0.00040179532367835374, "loss": 1.7941, "step": 35149 }, { "epoch": 1.17, "grad_norm": 0.47812941670417786, "learning_rate": 0.00040178549160516157, "loss": 1.7468, "step": 35150 }, { "epoch": 1.17, "grad_norm": 0.48129671812057495, "learning_rate": 0.0004017756594084167, "loss": 1.7579, "step": 35151 }, { "epoch": 1.17, "grad_norm": 0.47024890780448914, "learning_rate": 0.00040176582708813116, "loss": 1.8949, "step": 35152 }, { "epoch": 1.17, "grad_norm": 0.49955010414123535, "learning_rate": 0.00040175599464431695, "loss": 1.7938, "step": 35153 }, { "epoch": 1.17, "grad_norm": 0.5263544321060181, "learning_rate": 0.00040174616207698586, "loss": 1.7209, "step": 35154 }, { "epoch": 1.17, "grad_norm": 0.49645599722862244, "learning_rate": 0.00040173632938615006, "loss": 1.7866, "step": 35155 }, { "epoch": 1.17, "grad_norm": 0.4672081470489502, "learning_rate": 0.00040172649657182124, "loss": 1.8219, "step": 35156 }, { "epoch": 1.17, "grad_norm": 0.4785085618495941, "learning_rate": 0.00040171666363401155, "loss": 1.8098, "step": 35157 }, { "epoch": 1.17, "grad_norm": 0.5290226936340332, "learning_rate": 0.0004017068305727327, "loss": 1.7965, "step": 35158 }, { "epoch": 1.17, "grad_norm": 0.500718891620636, "learning_rate": 0.0004016969973879968, "loss": 1.7834, "step": 35159 }, { "epoch": 1.17, "grad_norm": 0.4870774745941162, "learning_rate": 0.00040168716407981567, "loss": 1.7463, "step": 35160 }, { "epoch": 1.17, "grad_norm": 0.4605448246002197, "learning_rate": 0.00040167733064820144, "loss": 1.8361, "step": 35161 }, { "epoch": 1.17, "grad_norm": 0.4704224765300751, "learning_rate": 0.0004016674970931658, "loss": 1.71, "step": 35162 }, { "epoch": 1.17, "grad_norm": 0.5022599101066589, "learning_rate": 0.0004016576634147208, "loss": 1.7277, "step": 35163 }, { "epoch": 1.17, "grad_norm": 0.5058317184448242, "learning_rate": 0.0004016478296128784, "loss": 1.7844, "step": 35164 }, { "epoch": 1.17, "grad_norm": 0.4767034351825714, "learning_rate": 0.00040163799568765054, "loss": 1.7696, "step": 35165 }, { "epoch": 1.17, "grad_norm": 0.45603713393211365, "learning_rate": 0.0004016281616390491, "loss": 1.8064, "step": 35166 }, { "epoch": 1.17, "grad_norm": 0.4763566553592682, "learning_rate": 0.000401618327467086, "loss": 1.7869, "step": 35167 }, { "epoch": 1.17, "grad_norm": 0.5345304608345032, "learning_rate": 0.0004016084931717733, "loss": 1.8349, "step": 35168 }, { "epoch": 1.17, "grad_norm": 0.5036547780036926, "learning_rate": 0.0004015986587531229, "loss": 1.8364, "step": 35169 }, { "epoch": 1.17, "grad_norm": 0.4768262207508087, "learning_rate": 0.0004015888242111466, "loss": 1.8623, "step": 35170 }, { "epoch": 1.17, "grad_norm": 0.4836893677711487, "learning_rate": 0.0004015789895458565, "loss": 1.7971, "step": 35171 }, { "epoch": 1.17, "grad_norm": 0.6747764348983765, "learning_rate": 0.00040156915475726435, "loss": 1.7698, "step": 35172 }, { "epoch": 1.17, "grad_norm": 0.5031632781028748, "learning_rate": 0.00040155931984538234, "loss": 1.8066, "step": 35173 }, { "epoch": 1.17, "grad_norm": 0.47213611006736755, "learning_rate": 0.0004015494848102222, "loss": 1.7878, "step": 35174 }, { "epoch": 1.17, "grad_norm": 0.47983020544052124, "learning_rate": 0.0004015396496517961, "loss": 1.7792, "step": 35175 }, { "epoch": 1.17, "grad_norm": 0.4641990065574646, "learning_rate": 0.00040152981437011564, "loss": 1.7034, "step": 35176 }, { "epoch": 1.17, "grad_norm": 0.47526147961616516, "learning_rate": 0.000401519978965193, "loss": 1.7751, "step": 35177 }, { "epoch": 1.17, "grad_norm": 0.5012633800506592, "learning_rate": 0.00040151014343704013, "loss": 1.8406, "step": 35178 }, { "epoch": 1.17, "grad_norm": 0.46654394268989563, "learning_rate": 0.0004015003077856688, "loss": 1.776, "step": 35179 }, { "epoch": 1.17, "grad_norm": 0.4718494713306427, "learning_rate": 0.0004014904720110912, "loss": 1.7612, "step": 35180 }, { "epoch": 1.17, "grad_norm": 0.4702446758747101, "learning_rate": 0.00040148063611331893, "loss": 1.7433, "step": 35181 }, { "epoch": 1.17, "grad_norm": 0.44740933179855347, "learning_rate": 0.0004014708000923643, "loss": 1.7344, "step": 35182 }, { "epoch": 1.17, "grad_norm": 0.49904218316078186, "learning_rate": 0.00040146096394823896, "loss": 1.7707, "step": 35183 }, { "epoch": 1.17, "grad_norm": 0.4737468957901001, "learning_rate": 0.00040145112768095494, "loss": 1.7212, "step": 35184 }, { "epoch": 1.17, "grad_norm": 0.48523208498954773, "learning_rate": 0.0004014412912905243, "loss": 1.8446, "step": 35185 }, { "epoch": 1.17, "grad_norm": 0.4709782004356384, "learning_rate": 0.0004014314547769588, "loss": 1.8594, "step": 35186 }, { "epoch": 1.17, "grad_norm": 0.4761993885040283, "learning_rate": 0.0004014216181402705, "loss": 1.7888, "step": 35187 }, { "epoch": 1.17, "grad_norm": 0.462566614151001, "learning_rate": 0.00040141178138047124, "loss": 1.6992, "step": 35188 }, { "epoch": 1.17, "grad_norm": 0.48858219385147095, "learning_rate": 0.0004014019444975731, "loss": 1.7877, "step": 35189 }, { "epoch": 1.17, "grad_norm": 0.4839200973510742, "learning_rate": 0.0004013921074915879, "loss": 1.7793, "step": 35190 }, { "epoch": 1.17, "grad_norm": 0.47321969270706177, "learning_rate": 0.0004013822703625277, "loss": 1.7444, "step": 35191 }, { "epoch": 1.17, "grad_norm": 0.47394466400146484, "learning_rate": 0.0004013724331104043, "loss": 1.7307, "step": 35192 }, { "epoch": 1.17, "grad_norm": 0.4732963740825653, "learning_rate": 0.00040136259573522964, "loss": 1.8416, "step": 35193 }, { "epoch": 1.17, "grad_norm": 0.47906970977783203, "learning_rate": 0.00040135275823701577, "loss": 1.8238, "step": 35194 }, { "epoch": 1.17, "grad_norm": 0.4729679524898529, "learning_rate": 0.00040134292061577454, "loss": 1.796, "step": 35195 }, { "epoch": 1.17, "grad_norm": 0.48052099347114563, "learning_rate": 0.00040133308287151805, "loss": 1.7398, "step": 35196 }, { "epoch": 1.17, "grad_norm": 0.49012094736099243, "learning_rate": 0.00040132324500425796, "loss": 1.7822, "step": 35197 }, { "epoch": 1.17, "grad_norm": 0.48770377039909363, "learning_rate": 0.0004013134070140066, "loss": 1.7854, "step": 35198 }, { "epoch": 1.17, "grad_norm": 0.48455357551574707, "learning_rate": 0.00040130356890077544, "loss": 1.8037, "step": 35199 }, { "epoch": 1.17, "grad_norm": 0.45548224449157715, "learning_rate": 0.0004012937306645769, "loss": 1.8219, "step": 35200 }, { "epoch": 1.17, "grad_norm": 0.47914308309555054, "learning_rate": 0.0004012838923054226, "loss": 1.8229, "step": 35201 }, { "epoch": 1.17, "grad_norm": 0.45785003900527954, "learning_rate": 0.00040127405382332456, "loss": 1.8714, "step": 35202 }, { "epoch": 1.17, "grad_norm": 0.4878605008125305, "learning_rate": 0.0004012642152182948, "loss": 1.8301, "step": 35203 }, { "epoch": 1.17, "grad_norm": 0.4831811487674713, "learning_rate": 0.0004012543764903451, "loss": 1.7356, "step": 35204 }, { "epoch": 1.17, "grad_norm": 0.47290101647377014, "learning_rate": 0.00040124453763948765, "loss": 1.8121, "step": 35205 }, { "epoch": 1.17, "grad_norm": 0.478931188583374, "learning_rate": 0.00040123469866573414, "loss": 1.7644, "step": 35206 }, { "epoch": 1.17, "grad_norm": 0.48102766275405884, "learning_rate": 0.00040122485956909667, "loss": 1.7759, "step": 35207 }, { "epoch": 1.17, "grad_norm": 0.4882434010505676, "learning_rate": 0.0004012150203495871, "loss": 1.7935, "step": 35208 }, { "epoch": 1.17, "grad_norm": 0.44143566489219666, "learning_rate": 0.00040120518100721737, "loss": 1.7762, "step": 35209 }, { "epoch": 1.17, "grad_norm": 0.48963305354118347, "learning_rate": 0.00040119534154199957, "loss": 1.8165, "step": 35210 }, { "epoch": 1.17, "grad_norm": 0.5149765014648438, "learning_rate": 0.0004011855019539455, "loss": 1.7592, "step": 35211 }, { "epoch": 1.17, "grad_norm": 0.47256433963775635, "learning_rate": 0.00040117566224306717, "loss": 1.8285, "step": 35212 }, { "epoch": 1.17, "grad_norm": 0.5138275027275085, "learning_rate": 0.00040116582240937637, "loss": 1.7265, "step": 35213 }, { "epoch": 1.17, "grad_norm": 0.4731425344944, "learning_rate": 0.0004011559824528853, "loss": 1.8068, "step": 35214 }, { "epoch": 1.17, "grad_norm": 0.4613548517227173, "learning_rate": 0.0004011461423736057, "loss": 1.7255, "step": 35215 }, { "epoch": 1.17, "grad_norm": 0.4619527757167816, "learning_rate": 0.0004011363021715496, "loss": 1.8146, "step": 35216 }, { "epoch": 1.17, "grad_norm": 0.4622640609741211, "learning_rate": 0.00040112646184672896, "loss": 1.7713, "step": 35217 }, { "epoch": 1.17, "grad_norm": 0.47356894612312317, "learning_rate": 0.00040111662139915574, "loss": 1.8104, "step": 35218 }, { "epoch": 1.17, "grad_norm": 0.4992792308330536, "learning_rate": 0.0004011067808288417, "loss": 1.8365, "step": 35219 }, { "epoch": 1.17, "grad_norm": 0.47759270668029785, "learning_rate": 0.000401096940135799, "loss": 1.7915, "step": 35220 }, { "epoch": 1.17, "grad_norm": 0.4758251905441284, "learning_rate": 0.0004010870993200396, "loss": 1.7805, "step": 35221 }, { "epoch": 1.17, "grad_norm": 0.46424901485443115, "learning_rate": 0.00040107725838157515, "loss": 1.8071, "step": 35222 }, { "epoch": 1.17, "grad_norm": 0.5479564666748047, "learning_rate": 0.000401067417320418, "loss": 1.7909, "step": 35223 }, { "epoch": 1.17, "grad_norm": 0.5058183073997498, "learning_rate": 0.00040105757613657976, "loss": 1.7578, "step": 35224 }, { "epoch": 1.17, "grad_norm": 0.47491779923439026, "learning_rate": 0.0004010477348300726, "loss": 1.8038, "step": 35225 }, { "epoch": 1.17, "grad_norm": 0.4696371555328369, "learning_rate": 0.00040103789340090835, "loss": 1.7376, "step": 35226 }, { "epoch": 1.17, "grad_norm": 0.4790714681148529, "learning_rate": 0.0004010280518490989, "loss": 1.8727, "step": 35227 }, { "epoch": 1.17, "grad_norm": 0.4810890257358551, "learning_rate": 0.00040101821017465643, "loss": 1.7916, "step": 35228 }, { "epoch": 1.17, "grad_norm": 0.5032400488853455, "learning_rate": 0.0004010083683775926, "loss": 1.8527, "step": 35229 }, { "epoch": 1.17, "grad_norm": 0.45685312151908875, "learning_rate": 0.0004009985264579196, "loss": 1.7837, "step": 35230 }, { "epoch": 1.17, "grad_norm": 0.47337186336517334, "learning_rate": 0.0004009886844156492, "loss": 1.812, "step": 35231 }, { "epoch": 1.17, "grad_norm": 0.47643911838531494, "learning_rate": 0.00040097884225079344, "loss": 1.7911, "step": 35232 }, { "epoch": 1.17, "grad_norm": 0.470245361328125, "learning_rate": 0.0004009689999633643, "loss": 1.7255, "step": 35233 }, { "epoch": 1.17, "grad_norm": 0.4762437343597412, "learning_rate": 0.00040095915755337355, "loss": 1.8279, "step": 35234 }, { "epoch": 1.17, "grad_norm": 0.47294431924819946, "learning_rate": 0.00040094931502083337, "loss": 1.8286, "step": 35235 }, { "epoch": 1.17, "grad_norm": 0.4668644368648529, "learning_rate": 0.0004009394723657556, "loss": 1.7827, "step": 35236 }, { "epoch": 1.17, "grad_norm": 0.49260231852531433, "learning_rate": 0.00040092962958815213, "loss": 1.8607, "step": 35237 }, { "epoch": 1.17, "grad_norm": 0.4659377336502075, "learning_rate": 0.00040091978668803493, "loss": 1.7516, "step": 35238 }, { "epoch": 1.17, "grad_norm": 0.4666960835456848, "learning_rate": 0.00040090994366541607, "loss": 1.8001, "step": 35239 }, { "epoch": 1.17, "grad_norm": 0.4606114327907562, "learning_rate": 0.0004009001005203074, "loss": 1.8097, "step": 35240 }, { "epoch": 1.17, "grad_norm": 0.45447197556495667, "learning_rate": 0.00040089025725272085, "loss": 1.7196, "step": 35241 }, { "epoch": 1.17, "grad_norm": 0.4879283308982849, "learning_rate": 0.0004008804138626683, "loss": 1.8166, "step": 35242 }, { "epoch": 1.17, "grad_norm": 0.4730464518070221, "learning_rate": 0.0004008705703501619, "loss": 1.7916, "step": 35243 }, { "epoch": 1.17, "grad_norm": 0.4692308306694031, "learning_rate": 0.0004008607267152134, "loss": 1.7914, "step": 35244 }, { "epoch": 1.17, "grad_norm": 0.48973530530929565, "learning_rate": 0.0004008508829578349, "loss": 1.8284, "step": 35245 }, { "epoch": 1.17, "grad_norm": 0.49532535672187805, "learning_rate": 0.0004008410390780383, "loss": 1.8183, "step": 35246 }, { "epoch": 1.17, "grad_norm": 0.4662957191467285, "learning_rate": 0.00040083119507583557, "loss": 1.7509, "step": 35247 }, { "epoch": 1.17, "grad_norm": 0.4821089506149292, "learning_rate": 0.00040082135095123855, "loss": 1.8476, "step": 35248 }, { "epoch": 1.17, "grad_norm": 0.7726861238479614, "learning_rate": 0.0004008115067042593, "loss": 1.7952, "step": 35249 }, { "epoch": 1.17, "grad_norm": 0.44925573468208313, "learning_rate": 0.00040080166233490974, "loss": 1.7802, "step": 35250 }, { "epoch": 1.17, "grad_norm": 0.4568485617637634, "learning_rate": 0.0004007918178432018, "loss": 1.7382, "step": 35251 }, { "epoch": 1.17, "grad_norm": 0.4986531436443329, "learning_rate": 0.0004007819732291474, "loss": 1.8208, "step": 35252 }, { "epoch": 1.17, "grad_norm": 0.4890868663787842, "learning_rate": 0.0004007721284927586, "loss": 1.8292, "step": 35253 }, { "epoch": 1.17, "grad_norm": 0.4590971767902374, "learning_rate": 0.00040076228363404717, "loss": 1.8312, "step": 35254 }, { "epoch": 1.17, "grad_norm": 0.46523934602737427, "learning_rate": 0.0004007524386530254, "loss": 1.7789, "step": 35255 }, { "epoch": 1.17, "grad_norm": 0.4549940228462219, "learning_rate": 0.00040074259354970477, "loss": 1.7735, "step": 35256 }, { "epoch": 1.17, "grad_norm": 0.4774036407470703, "learning_rate": 0.0004007327483240976, "loss": 1.7765, "step": 35257 }, { "epoch": 1.17, "grad_norm": 0.4602058529853821, "learning_rate": 0.00040072290297621583, "loss": 1.7501, "step": 35258 }, { "epoch": 1.17, "grad_norm": 1.3269520998001099, "learning_rate": 0.00040071305750607106, "loss": 1.8353, "step": 35259 }, { "epoch": 1.17, "grad_norm": 0.4600711166858673, "learning_rate": 0.0004007032119136756, "loss": 1.719, "step": 35260 }, { "epoch": 1.17, "grad_norm": 0.4505370855331421, "learning_rate": 0.00040069336619904123, "loss": 1.7986, "step": 35261 }, { "epoch": 1.17, "grad_norm": 0.4676338732242584, "learning_rate": 0.00040068352036218003, "loss": 1.7481, "step": 35262 }, { "epoch": 1.17, "grad_norm": 0.46902474761009216, "learning_rate": 0.0004006736744031038, "loss": 1.7875, "step": 35263 }, { "epoch": 1.17, "grad_norm": 0.4677097201347351, "learning_rate": 0.00040066382832182465, "loss": 1.832, "step": 35264 }, { "epoch": 1.17, "grad_norm": 0.46303483843803406, "learning_rate": 0.0004006539821183544, "loss": 1.7684, "step": 35265 }, { "epoch": 1.17, "grad_norm": 0.4675333499908447, "learning_rate": 0.0004006441357927049, "loss": 1.7507, "step": 35266 }, { "epoch": 1.17, "grad_norm": 0.47096720337867737, "learning_rate": 0.00040063428934488846, "loss": 1.871, "step": 35267 }, { "epoch": 1.17, "grad_norm": 0.46104896068573, "learning_rate": 0.00040062444277491675, "loss": 1.7646, "step": 35268 }, { "epoch": 1.17, "grad_norm": 0.46961358189582825, "learning_rate": 0.00040061459608280175, "loss": 1.7201, "step": 35269 }, { "epoch": 1.17, "grad_norm": 0.4578382670879364, "learning_rate": 0.0004006047492685555, "loss": 1.7379, "step": 35270 }, { "epoch": 1.17, "grad_norm": 0.5034071207046509, "learning_rate": 0.00040059490233218986, "loss": 1.8069, "step": 35271 }, { "epoch": 1.17, "grad_norm": 0.4694633185863495, "learning_rate": 0.00040058505527371693, "loss": 1.8203, "step": 35272 }, { "epoch": 1.17, "grad_norm": 0.48533299565315247, "learning_rate": 0.00040057520809314847, "loss": 1.8492, "step": 35273 }, { "epoch": 1.17, "grad_norm": 0.46310970187187195, "learning_rate": 0.00040056536079049657, "loss": 1.7481, "step": 35274 }, { "epoch": 1.17, "grad_norm": 0.4775194525718689, "learning_rate": 0.00040055551336577315, "loss": 1.7608, "step": 35275 }, { "epoch": 1.17, "grad_norm": 0.5015665292739868, "learning_rate": 0.00040054566581899004, "loss": 1.8059, "step": 35276 }, { "epoch": 1.17, "grad_norm": 0.46840015053749084, "learning_rate": 0.0004005358181501594, "loss": 1.8143, "step": 35277 }, { "epoch": 1.17, "grad_norm": 0.46436548233032227, "learning_rate": 0.0004005259703592932, "loss": 1.8078, "step": 35278 }, { "epoch": 1.17, "grad_norm": 0.4677625000476837, "learning_rate": 0.0004005161224464031, "loss": 1.8397, "step": 35279 }, { "epoch": 1.17, "grad_norm": 0.4823455810546875, "learning_rate": 0.0004005062744115013, "loss": 1.7825, "step": 35280 }, { "epoch": 1.17, "grad_norm": 0.4942879378795624, "learning_rate": 0.0004004964262545997, "loss": 1.7674, "step": 35281 }, { "epoch": 1.17, "grad_norm": 0.4794389307498932, "learning_rate": 0.00040048657797571023, "loss": 1.7705, "step": 35282 }, { "epoch": 1.17, "grad_norm": 0.4718964397907257, "learning_rate": 0.00040047672957484496, "loss": 1.868, "step": 35283 }, { "epoch": 1.17, "grad_norm": 0.46697017550468445, "learning_rate": 0.0004004668810520156, "loss": 1.7706, "step": 35284 }, { "epoch": 1.17, "grad_norm": 0.47403064370155334, "learning_rate": 0.0004004570324072343, "loss": 1.8136, "step": 35285 }, { "epoch": 1.17, "grad_norm": 0.4609888792037964, "learning_rate": 0.000400447183640513, "loss": 1.7401, "step": 35286 }, { "epoch": 1.17, "grad_norm": 0.4947394132614136, "learning_rate": 0.00040043733475186356, "loss": 1.8244, "step": 35287 }, { "epoch": 1.17, "grad_norm": 0.48142582178115845, "learning_rate": 0.0004004274857412981, "loss": 1.6995, "step": 35288 }, { "epoch": 1.17, "grad_norm": 0.4812901020050049, "learning_rate": 0.00040041763660882834, "loss": 1.8102, "step": 35289 }, { "epoch": 1.17, "grad_norm": 0.4744545519351959, "learning_rate": 0.00040040778735446643, "loss": 1.8431, "step": 35290 }, { "epoch": 1.17, "grad_norm": 0.4753314256668091, "learning_rate": 0.00040039793797822426, "loss": 1.7692, "step": 35291 }, { "epoch": 1.17, "grad_norm": 0.4705640971660614, "learning_rate": 0.0004003880884801138, "loss": 1.8293, "step": 35292 }, { "epoch": 1.17, "grad_norm": 0.47227880358695984, "learning_rate": 0.00040037823886014695, "loss": 1.839, "step": 35293 }, { "epoch": 1.17, "grad_norm": 0.47190722823143005, "learning_rate": 0.0004003683891183357, "loss": 1.7395, "step": 35294 }, { "epoch": 1.17, "grad_norm": 0.45713526010513306, "learning_rate": 0.00040035853925469213, "loss": 1.7676, "step": 35295 }, { "epoch": 1.17, "grad_norm": 0.4472545385360718, "learning_rate": 0.00040034868926922787, "loss": 1.7837, "step": 35296 }, { "epoch": 1.17, "grad_norm": 0.46889835596084595, "learning_rate": 0.0004003388391619553, "loss": 1.8075, "step": 35297 }, { "epoch": 1.17, "grad_norm": 0.47398489713668823, "learning_rate": 0.00040032898893288603, "loss": 1.8018, "step": 35298 }, { "epoch": 1.17, "grad_norm": 0.48113587498664856, "learning_rate": 0.00040031913858203224, "loss": 1.8393, "step": 35299 }, { "epoch": 1.17, "grad_norm": 0.48614344000816345, "learning_rate": 0.0004003092881094057, "loss": 1.7976, "step": 35300 }, { "epoch": 1.17, "grad_norm": 0.46394968032836914, "learning_rate": 0.00040029943751501853, "loss": 1.7416, "step": 35301 }, { "epoch": 1.17, "grad_norm": 0.46099695563316345, "learning_rate": 0.00040028958679888267, "loss": 1.7803, "step": 35302 }, { "epoch": 1.17, "grad_norm": 0.48887208104133606, "learning_rate": 0.00040027973596100993, "loss": 1.7657, "step": 35303 }, { "epoch": 1.17, "grad_norm": 0.4782063663005829, "learning_rate": 0.0004002698850014124, "loss": 1.8242, "step": 35304 }, { "epoch": 1.17, "grad_norm": 0.47534576058387756, "learning_rate": 0.00040026003392010196, "loss": 1.7753, "step": 35305 }, { "epoch": 1.17, "grad_norm": 0.4845563769340515, "learning_rate": 0.00040025018271709064, "loss": 1.7992, "step": 35306 }, { "epoch": 1.17, "grad_norm": 0.4742526710033417, "learning_rate": 0.00040024033139239037, "loss": 1.7308, "step": 35307 }, { "epoch": 1.17, "grad_norm": 0.481067955493927, "learning_rate": 0.00040023047994601317, "loss": 1.729, "step": 35308 }, { "epoch": 1.17, "grad_norm": 0.48385563492774963, "learning_rate": 0.0004002206283779708, "loss": 1.7809, "step": 35309 }, { "epoch": 1.17, "grad_norm": 0.465623676776886, "learning_rate": 0.00040021077668827556, "loss": 1.7592, "step": 35310 }, { "epoch": 1.17, "grad_norm": 0.476555198431015, "learning_rate": 0.000400200924876939, "loss": 1.7749, "step": 35311 }, { "epoch": 1.17, "grad_norm": 0.47833874821662903, "learning_rate": 0.0004001910729439734, "loss": 1.8366, "step": 35312 }, { "epoch": 1.17, "grad_norm": 0.48874399065971375, "learning_rate": 0.00040018122088939065, "loss": 1.7704, "step": 35313 }, { "epoch": 1.17, "grad_norm": 0.4786522686481476, "learning_rate": 0.0004001713687132025, "loss": 1.828, "step": 35314 }, { "epoch": 1.17, "grad_norm": 0.4566751718521118, "learning_rate": 0.0004001615164154212, "loss": 1.815, "step": 35315 }, { "epoch": 1.17, "grad_norm": 0.45812177658081055, "learning_rate": 0.0004001516639960584, "loss": 1.8185, "step": 35316 }, { "epoch": 1.18, "grad_norm": 0.47797030210494995, "learning_rate": 0.0004001418114551264, "loss": 1.8103, "step": 35317 }, { "epoch": 1.18, "grad_norm": 0.4727073609828949, "learning_rate": 0.00040013195879263694, "loss": 1.7904, "step": 35318 }, { "epoch": 1.18, "grad_norm": 0.4734823703765869, "learning_rate": 0.00040012210600860206, "loss": 1.8381, "step": 35319 }, { "epoch": 1.18, "grad_norm": 0.4778975546360016, "learning_rate": 0.00040011225310303375, "loss": 1.8114, "step": 35320 }, { "epoch": 1.18, "grad_norm": 0.48212599754333496, "learning_rate": 0.0004001024000759438, "loss": 1.8549, "step": 35321 }, { "epoch": 1.18, "grad_norm": 0.4693586528301239, "learning_rate": 0.00040009254692734435, "loss": 1.7867, "step": 35322 }, { "epoch": 1.18, "grad_norm": 0.45402494072914124, "learning_rate": 0.00040008269365724726, "loss": 1.8008, "step": 35323 }, { "epoch": 1.18, "grad_norm": 0.5279936790466309, "learning_rate": 0.00040007284026566457, "loss": 1.8283, "step": 35324 }, { "epoch": 1.18, "grad_norm": 0.481749951839447, "learning_rate": 0.0004000629867526082, "loss": 1.7447, "step": 35325 }, { "epoch": 1.18, "grad_norm": 0.46825215220451355, "learning_rate": 0.00040005313311809005, "loss": 1.7795, "step": 35326 }, { "epoch": 1.18, "grad_norm": 0.47775015234947205, "learning_rate": 0.0004000432793621222, "loss": 1.8259, "step": 35327 }, { "epoch": 1.18, "grad_norm": 0.49323078989982605, "learning_rate": 0.00040003342548471647, "loss": 1.8436, "step": 35328 }, { "epoch": 1.18, "grad_norm": 0.45152711868286133, "learning_rate": 0.000400023571485885, "loss": 1.7412, "step": 35329 }, { "epoch": 1.18, "grad_norm": 0.45606622099876404, "learning_rate": 0.0004000137173656396, "loss": 1.7873, "step": 35330 }, { "epoch": 1.18, "grad_norm": 0.48286548256874084, "learning_rate": 0.00040000386312399227, "loss": 1.7241, "step": 35331 }, { "epoch": 1.18, "grad_norm": 0.45875903964042664, "learning_rate": 0.000399994008760955, "loss": 1.7804, "step": 35332 }, { "epoch": 1.18, "grad_norm": 0.4691173732280731, "learning_rate": 0.0003999841542765397, "loss": 1.8272, "step": 35333 }, { "epoch": 1.18, "grad_norm": 0.46646648645401, "learning_rate": 0.00039997429967075847, "loss": 1.7172, "step": 35334 }, { "epoch": 1.18, "grad_norm": 0.4682559669017792, "learning_rate": 0.00039996444494362306, "loss": 1.776, "step": 35335 }, { "epoch": 1.18, "grad_norm": 0.47214338183403015, "learning_rate": 0.0003999545900951456, "loss": 1.7663, "step": 35336 }, { "epoch": 1.18, "grad_norm": 0.4752817153930664, "learning_rate": 0.000399944735125338, "loss": 1.7773, "step": 35337 }, { "epoch": 1.18, "grad_norm": 0.4799500107765198, "learning_rate": 0.00039993488003421223, "loss": 1.7852, "step": 35338 }, { "epoch": 1.18, "grad_norm": 0.46823224425315857, "learning_rate": 0.00039992502482178013, "loss": 1.7932, "step": 35339 }, { "epoch": 1.18, "grad_norm": 0.46436434984207153, "learning_rate": 0.0003999151694880539, "loss": 1.7425, "step": 35340 }, { "epoch": 1.18, "grad_norm": 0.4659382700920105, "learning_rate": 0.0003999053140330454, "loss": 1.7014, "step": 35341 }, { "epoch": 1.18, "grad_norm": 0.47990331053733826, "learning_rate": 0.0003998954584567664, "loss": 1.7226, "step": 35342 }, { "epoch": 1.18, "grad_norm": 0.4778001308441162, "learning_rate": 0.00039988560275922925, "loss": 1.8174, "step": 35343 }, { "epoch": 1.18, "grad_norm": 0.4554697573184967, "learning_rate": 0.0003998757469404455, "loss": 1.7686, "step": 35344 }, { "epoch": 1.18, "grad_norm": 0.488940954208374, "learning_rate": 0.0003998658910004274, "loss": 1.795, "step": 35345 }, { "epoch": 1.18, "grad_norm": 0.4770272374153137, "learning_rate": 0.0003998560349391867, "loss": 1.7839, "step": 35346 }, { "epoch": 1.18, "grad_norm": 0.4707895815372467, "learning_rate": 0.00039984617875673574, "loss": 1.7391, "step": 35347 }, { "epoch": 1.18, "grad_norm": 0.4995567500591278, "learning_rate": 0.00039983632245308603, "loss": 1.7623, "step": 35348 }, { "epoch": 1.18, "grad_norm": 0.46533432602882385, "learning_rate": 0.0003998264660282498, "loss": 1.7748, "step": 35349 }, { "epoch": 1.18, "grad_norm": 0.47321125864982605, "learning_rate": 0.00039981660948223903, "loss": 1.8204, "step": 35350 }, { "epoch": 1.18, "grad_norm": 0.4472111761569977, "learning_rate": 0.00039980675281506544, "loss": 1.7636, "step": 35351 }, { "epoch": 1.18, "grad_norm": 0.4677698314189911, "learning_rate": 0.00039979689602674125, "loss": 1.753, "step": 35352 }, { "epoch": 1.18, "grad_norm": 0.4682466685771942, "learning_rate": 0.0003997870391172783, "loss": 1.7999, "step": 35353 }, { "epoch": 1.18, "grad_norm": 0.45810654759407043, "learning_rate": 0.0003997771820866886, "loss": 1.8087, "step": 35354 }, { "epoch": 1.18, "grad_norm": 0.4756576120853424, "learning_rate": 0.00039976732493498407, "loss": 1.8184, "step": 35355 }, { "epoch": 1.18, "grad_norm": 0.4974004328250885, "learning_rate": 0.0003997574676621767, "loss": 1.7905, "step": 35356 }, { "epoch": 1.18, "grad_norm": 0.4756344258785248, "learning_rate": 0.00039974761026827867, "loss": 1.8, "step": 35357 }, { "epoch": 1.18, "grad_norm": 0.4774216413497925, "learning_rate": 0.00039973775275330153, "loss": 1.8471, "step": 35358 }, { "epoch": 1.18, "grad_norm": 0.45843538641929626, "learning_rate": 0.00039972789511725745, "loss": 1.7917, "step": 35359 }, { "epoch": 1.18, "grad_norm": 0.4867979884147644, "learning_rate": 0.00039971803736015845, "loss": 1.788, "step": 35360 }, { "epoch": 1.18, "grad_norm": 0.48974737524986267, "learning_rate": 0.00039970817948201646, "loss": 1.7659, "step": 35361 }, { "epoch": 1.18, "grad_norm": 0.47991639375686646, "learning_rate": 0.0003996983214828434, "loss": 1.7654, "step": 35362 }, { "epoch": 1.18, "grad_norm": 0.4684562385082245, "learning_rate": 0.00039968846336265135, "loss": 1.8267, "step": 35363 }, { "epoch": 1.18, "grad_norm": 0.47569507360458374, "learning_rate": 0.0003996786051214521, "loss": 1.7847, "step": 35364 }, { "epoch": 1.18, "grad_norm": 0.4662379324436188, "learning_rate": 0.0003996687467592577, "loss": 1.8003, "step": 35365 }, { "epoch": 1.18, "grad_norm": 0.500156044960022, "learning_rate": 0.0003996588882760802, "loss": 1.7642, "step": 35366 }, { "epoch": 1.18, "grad_norm": 0.46948981285095215, "learning_rate": 0.00039964902967193144, "loss": 1.8451, "step": 35367 }, { "epoch": 1.18, "grad_norm": 0.4671644866466522, "learning_rate": 0.00039963917094682356, "loss": 1.8356, "step": 35368 }, { "epoch": 1.18, "grad_norm": 0.4793734550476074, "learning_rate": 0.0003996293121007682, "loss": 1.8372, "step": 35369 }, { "epoch": 1.18, "grad_norm": 0.46095457673072815, "learning_rate": 0.00039961945313377766, "loss": 1.8029, "step": 35370 }, { "epoch": 1.18, "grad_norm": 0.48008882999420166, "learning_rate": 0.0003996095940458637, "loss": 1.825, "step": 35371 }, { "epoch": 1.18, "grad_norm": 0.48007720708847046, "learning_rate": 0.0003995997348370385, "loss": 1.8353, "step": 35372 }, { "epoch": 1.18, "grad_norm": 0.4663832485675812, "learning_rate": 0.00039958987550731384, "loss": 1.7422, "step": 35373 }, { "epoch": 1.18, "grad_norm": 0.47215554118156433, "learning_rate": 0.00039958001605670167, "loss": 1.8417, "step": 35374 }, { "epoch": 1.18, "grad_norm": 0.46257317066192627, "learning_rate": 0.0003995701564852142, "loss": 1.7804, "step": 35375 }, { "epoch": 1.18, "grad_norm": 0.4605463147163391, "learning_rate": 0.000399560296792863, "loss": 1.7497, "step": 35376 }, { "epoch": 1.18, "grad_norm": 0.4840519428253174, "learning_rate": 0.00039955043697966045, "loss": 1.7995, "step": 35377 }, { "epoch": 1.18, "grad_norm": 0.49007734656333923, "learning_rate": 0.0003995405770456183, "loss": 1.7697, "step": 35378 }, { "epoch": 1.18, "grad_norm": 0.48686280846595764, "learning_rate": 0.0003995307169907485, "loss": 1.8371, "step": 35379 }, { "epoch": 1.18, "grad_norm": 0.4581133723258972, "learning_rate": 0.0003995208568150632, "loss": 1.804, "step": 35380 }, { "epoch": 1.18, "grad_norm": 0.47027429938316345, "learning_rate": 0.00039951099651857404, "loss": 1.7918, "step": 35381 }, { "epoch": 1.18, "grad_norm": 0.48613062500953674, "learning_rate": 0.00039950113610129345, "loss": 1.7686, "step": 35382 }, { "epoch": 1.18, "grad_norm": 0.47740164399147034, "learning_rate": 0.0003994912755632329, "loss": 1.8479, "step": 35383 }, { "epoch": 1.18, "grad_norm": 0.47766903042793274, "learning_rate": 0.00039948141490440476, "loss": 1.7755, "step": 35384 }, { "epoch": 1.18, "grad_norm": 0.4535510241985321, "learning_rate": 0.0003994715541248208, "loss": 1.7483, "step": 35385 }, { "epoch": 1.18, "grad_norm": 0.4645022749900818, "learning_rate": 0.00039946169322449294, "loss": 1.7604, "step": 35386 }, { "epoch": 1.18, "grad_norm": 0.47133636474609375, "learning_rate": 0.0003994518322034334, "loss": 1.7796, "step": 35387 }, { "epoch": 1.18, "grad_norm": 0.45887333154678345, "learning_rate": 0.00039944197106165383, "loss": 1.7931, "step": 35388 }, { "epoch": 1.18, "grad_norm": 0.46517321467399597, "learning_rate": 0.0003994321097991665, "loss": 1.7417, "step": 35389 }, { "epoch": 1.18, "grad_norm": 0.47709012031555176, "learning_rate": 0.0003994222484159831, "loss": 1.7941, "step": 35390 }, { "epoch": 1.18, "grad_norm": 0.46155717968940735, "learning_rate": 0.0003994123869121158, "loss": 1.8299, "step": 35391 }, { "epoch": 1.18, "grad_norm": 0.4755792021751404, "learning_rate": 0.0003994025252875766, "loss": 1.7413, "step": 35392 }, { "epoch": 1.18, "grad_norm": 0.4778454005718231, "learning_rate": 0.0003993926635423773, "loss": 1.7765, "step": 35393 }, { "epoch": 1.18, "grad_norm": 0.4600928723812103, "learning_rate": 0.0003993828016765299, "loss": 1.8669, "step": 35394 }, { "epoch": 1.18, "grad_norm": 0.4713699221611023, "learning_rate": 0.00039937293969004653, "loss": 1.8263, "step": 35395 }, { "epoch": 1.18, "grad_norm": 0.4563983976840973, "learning_rate": 0.00039936307758293895, "loss": 1.7928, "step": 35396 }, { "epoch": 1.18, "grad_norm": 0.4626682698726654, "learning_rate": 0.0003993532153552193, "loss": 1.7454, "step": 35397 }, { "epoch": 1.18, "grad_norm": 0.488419771194458, "learning_rate": 0.00039934335300689946, "loss": 1.7798, "step": 35398 }, { "epoch": 1.18, "grad_norm": 0.4834720194339752, "learning_rate": 0.00039933349053799145, "loss": 1.8419, "step": 35399 }, { "epoch": 1.18, "grad_norm": 0.4884093403816223, "learning_rate": 0.00039932362794850725, "loss": 1.7752, "step": 35400 }, { "epoch": 1.18, "grad_norm": 0.48494669795036316, "learning_rate": 0.00039931376523845864, "loss": 1.7835, "step": 35401 }, { "epoch": 1.18, "grad_norm": 0.4513944387435913, "learning_rate": 0.00039930390240785796, "loss": 1.7194, "step": 35402 }, { "epoch": 1.18, "grad_norm": 0.4996068477630615, "learning_rate": 0.0003992940394567169, "loss": 1.8375, "step": 35403 }, { "epoch": 1.18, "grad_norm": 0.4641425311565399, "learning_rate": 0.0003992841763850475, "loss": 1.7183, "step": 35404 }, { "epoch": 1.18, "grad_norm": 0.4744643568992615, "learning_rate": 0.0003992743131928617, "loss": 1.7308, "step": 35405 }, { "epoch": 1.18, "grad_norm": 0.48197102546691895, "learning_rate": 0.0003992644498801715, "loss": 1.7853, "step": 35406 }, { "epoch": 1.18, "grad_norm": 0.49312031269073486, "learning_rate": 0.00039925458644698893, "loss": 1.7872, "step": 35407 }, { "epoch": 1.18, "grad_norm": 0.4727371633052826, "learning_rate": 0.00039924472289332586, "loss": 1.7635, "step": 35408 }, { "epoch": 1.18, "grad_norm": 0.47629544138908386, "learning_rate": 0.00039923485921919443, "loss": 1.7956, "step": 35409 }, { "epoch": 1.18, "grad_norm": 0.4799489974975586, "learning_rate": 0.00039922499542460646, "loss": 1.7614, "step": 35410 }, { "epoch": 1.18, "grad_norm": 0.4690002501010895, "learning_rate": 0.0003992151315095739, "loss": 1.7124, "step": 35411 }, { "epoch": 1.18, "grad_norm": 1.340867042541504, "learning_rate": 0.00039920526747410886, "loss": 1.8978, "step": 35412 }, { "epoch": 1.18, "grad_norm": 0.48142558336257935, "learning_rate": 0.00039919540331822314, "loss": 1.7639, "step": 35413 }, { "epoch": 1.18, "grad_norm": 0.4877397418022156, "learning_rate": 0.00039918553904192897, "loss": 1.8207, "step": 35414 }, { "epoch": 1.18, "grad_norm": 0.47845691442489624, "learning_rate": 0.0003991756746452381, "loss": 1.7878, "step": 35415 }, { "epoch": 1.18, "grad_norm": 0.4604746401309967, "learning_rate": 0.00039916581012816257, "loss": 1.8069, "step": 35416 }, { "epoch": 1.18, "grad_norm": 0.4887460470199585, "learning_rate": 0.0003991559454907143, "loss": 1.782, "step": 35417 }, { "epoch": 1.18, "grad_norm": 0.48001694679260254, "learning_rate": 0.00039914608073290535, "loss": 1.8131, "step": 35418 }, { "epoch": 1.18, "grad_norm": 0.47405892610549927, "learning_rate": 0.0003991362158547478, "loss": 1.7986, "step": 35419 }, { "epoch": 1.18, "grad_norm": 0.46656447649002075, "learning_rate": 0.0003991263508562534, "loss": 1.8056, "step": 35420 }, { "epoch": 1.18, "grad_norm": 0.4950142204761505, "learning_rate": 0.00039911648573743414, "loss": 1.8224, "step": 35421 }, { "epoch": 1.18, "grad_norm": 0.47947821021080017, "learning_rate": 0.0003991066204983021, "loss": 1.8003, "step": 35422 }, { "epoch": 1.18, "grad_norm": 0.46577897667884827, "learning_rate": 0.00039909675513886923, "loss": 1.7665, "step": 35423 }, { "epoch": 1.18, "grad_norm": 0.48085686564445496, "learning_rate": 0.00039908688965914756, "loss": 1.7689, "step": 35424 }, { "epoch": 1.18, "grad_norm": 0.46515509486198425, "learning_rate": 0.00039907702405914906, "loss": 1.7136, "step": 35425 }, { "epoch": 1.18, "grad_norm": 0.4579901397228241, "learning_rate": 0.0003990671583388854, "loss": 1.8174, "step": 35426 }, { "epoch": 1.18, "grad_norm": 0.4756576716899872, "learning_rate": 0.0003990572924983691, "loss": 1.7501, "step": 35427 }, { "epoch": 1.18, "grad_norm": 0.5070897936820984, "learning_rate": 0.0003990474265376117, "loss": 1.8838, "step": 35428 }, { "epoch": 1.18, "grad_norm": 0.4575485289096832, "learning_rate": 0.00039903756045662526, "loss": 1.7286, "step": 35429 }, { "epoch": 1.18, "grad_norm": 0.46944424510002136, "learning_rate": 0.00039902769425542196, "loss": 1.8237, "step": 35430 }, { "epoch": 1.18, "grad_norm": 0.4877147972583771, "learning_rate": 0.0003990178279340135, "loss": 1.8391, "step": 35431 }, { "epoch": 1.18, "grad_norm": 0.4549371004104614, "learning_rate": 0.0003990079614924121, "loss": 1.832, "step": 35432 }, { "epoch": 1.18, "grad_norm": 0.5610917210578918, "learning_rate": 0.00039899809493062946, "loss": 1.6504, "step": 35433 }, { "epoch": 1.18, "grad_norm": 0.4673578143119812, "learning_rate": 0.00039898822824867785, "loss": 1.7587, "step": 35434 }, { "epoch": 1.18, "grad_norm": 0.4797244966030121, "learning_rate": 0.0003989783614465691, "loss": 1.7613, "step": 35435 }, { "epoch": 1.18, "grad_norm": 0.4686238169670105, "learning_rate": 0.0003989684945243152, "loss": 1.8086, "step": 35436 }, { "epoch": 1.18, "grad_norm": 0.4773666560649872, "learning_rate": 0.0003989586274819282, "loss": 1.7678, "step": 35437 }, { "epoch": 1.18, "grad_norm": 0.4586491882801056, "learning_rate": 0.0003989487603194199, "loss": 1.7528, "step": 35438 }, { "epoch": 1.18, "grad_norm": 0.466086745262146, "learning_rate": 0.00039893889303680243, "loss": 1.8766, "step": 35439 }, { "epoch": 1.18, "grad_norm": 0.4708464443683624, "learning_rate": 0.0003989290256340877, "loss": 1.7405, "step": 35440 }, { "epoch": 1.18, "grad_norm": 0.4698041081428528, "learning_rate": 0.00039891915811128765, "loss": 1.8259, "step": 35441 }, { "epoch": 1.18, "grad_norm": 0.448284775018692, "learning_rate": 0.0003989092904684144, "loss": 1.7855, "step": 35442 }, { "epoch": 1.18, "grad_norm": 0.4830259680747986, "learning_rate": 0.0003988994227054798, "loss": 1.8211, "step": 35443 }, { "epoch": 1.18, "grad_norm": 0.4614606499671936, "learning_rate": 0.000398889554822496, "loss": 1.8515, "step": 35444 }, { "epoch": 1.18, "grad_norm": 0.47432103753089905, "learning_rate": 0.0003988796868194747, "loss": 1.7929, "step": 35445 }, { "epoch": 1.18, "grad_norm": 0.46461325883865356, "learning_rate": 0.000398869818696428, "loss": 1.8337, "step": 35446 }, { "epoch": 1.18, "grad_norm": 0.4720934331417084, "learning_rate": 0.0003988599504533681, "loss": 1.8165, "step": 35447 }, { "epoch": 1.18, "grad_norm": 0.46128299832344055, "learning_rate": 0.00039885008209030663, "loss": 1.782, "step": 35448 }, { "epoch": 1.18, "grad_norm": 0.4709624946117401, "learning_rate": 0.00039884021360725577, "loss": 1.7092, "step": 35449 }, { "epoch": 1.18, "grad_norm": 0.4774271547794342, "learning_rate": 0.0003988303450042275, "loss": 1.7886, "step": 35450 }, { "epoch": 1.18, "grad_norm": 0.47790706157684326, "learning_rate": 0.00039882047628123367, "loss": 1.7496, "step": 35451 }, { "epoch": 1.18, "grad_norm": 0.47191354632377625, "learning_rate": 0.0003988106074382864, "loss": 1.7867, "step": 35452 }, { "epoch": 1.18, "grad_norm": 0.46976515650749207, "learning_rate": 0.0003988007384753976, "loss": 1.7733, "step": 35453 }, { "epoch": 1.18, "grad_norm": 0.4885551929473877, "learning_rate": 0.0003987908693925792, "loss": 1.8636, "step": 35454 }, { "epoch": 1.18, "grad_norm": 0.48120957612991333, "learning_rate": 0.0003987810001898434, "loss": 1.7859, "step": 35455 }, { "epoch": 1.18, "grad_norm": 0.4574630558490753, "learning_rate": 0.0003987711308672018, "loss": 1.864, "step": 35456 }, { "epoch": 1.18, "grad_norm": 0.4734266400337219, "learning_rate": 0.0003987612614246667, "loss": 1.7377, "step": 35457 }, { "epoch": 1.18, "grad_norm": 0.46860700845718384, "learning_rate": 0.0003987513918622501, "loss": 1.8064, "step": 35458 }, { "epoch": 1.18, "grad_norm": 0.4778306484222412, "learning_rate": 0.0003987415221799637, "loss": 1.7713, "step": 35459 }, { "epoch": 1.18, "grad_norm": 0.46394091844558716, "learning_rate": 0.0003987316523778197, "loss": 1.7355, "step": 35460 }, { "epoch": 1.18, "grad_norm": 0.4677645266056061, "learning_rate": 0.00039872178245583, "loss": 1.7401, "step": 35461 }, { "epoch": 1.18, "grad_norm": 0.47350773215293884, "learning_rate": 0.00039871191241400665, "loss": 1.7626, "step": 35462 }, { "epoch": 1.18, "grad_norm": 0.4649490416049957, "learning_rate": 0.0003987020422523615, "loss": 1.7267, "step": 35463 }, { "epoch": 1.18, "grad_norm": 0.46758800745010376, "learning_rate": 0.00039869217197090667, "loss": 1.7774, "step": 35464 }, { "epoch": 1.18, "grad_norm": 0.4714577794075012, "learning_rate": 0.0003986823015696541, "loss": 1.7606, "step": 35465 }, { "epoch": 1.18, "grad_norm": 0.4678167402744293, "learning_rate": 0.0003986724310486157, "loss": 1.803, "step": 35466 }, { "epoch": 1.18, "grad_norm": 0.46352049708366394, "learning_rate": 0.0003986625604078036, "loss": 1.8005, "step": 35467 }, { "epoch": 1.18, "grad_norm": 0.4688573181629181, "learning_rate": 0.00039865268964722956, "loss": 1.7912, "step": 35468 }, { "epoch": 1.18, "grad_norm": 0.49172815680503845, "learning_rate": 0.00039864281876690583, "loss": 1.8105, "step": 35469 }, { "epoch": 1.18, "grad_norm": 0.4620913863182068, "learning_rate": 0.0003986329477668442, "loss": 1.8406, "step": 35470 }, { "epoch": 1.18, "grad_norm": 0.47865787148475647, "learning_rate": 0.00039862307664705667, "loss": 1.8965, "step": 35471 }, { "epoch": 1.18, "grad_norm": 0.480405330657959, "learning_rate": 0.0003986132054075553, "loss": 1.8128, "step": 35472 }, { "epoch": 1.18, "grad_norm": 0.4828455150127411, "learning_rate": 0.00039860333404835195, "loss": 1.8206, "step": 35473 }, { "epoch": 1.18, "grad_norm": 0.4769778251647949, "learning_rate": 0.0003985934625694588, "loss": 1.7244, "step": 35474 }, { "epoch": 1.18, "grad_norm": 0.48088306188583374, "learning_rate": 0.0003985835909708876, "loss": 1.7827, "step": 35475 }, { "epoch": 1.18, "grad_norm": 0.4646625220775604, "learning_rate": 0.0003985737192526505, "loss": 1.7553, "step": 35476 }, { "epoch": 1.18, "grad_norm": 0.4771401584148407, "learning_rate": 0.0003985638474147594, "loss": 1.7447, "step": 35477 }, { "epoch": 1.18, "grad_norm": 0.4696112871170044, "learning_rate": 0.0003985539754572264, "loss": 1.7228, "step": 35478 }, { "epoch": 1.18, "grad_norm": 0.46452680230140686, "learning_rate": 0.0003985441033800633, "loss": 1.7768, "step": 35479 }, { "epoch": 1.18, "grad_norm": 0.4981105923652649, "learning_rate": 0.00039853423118328224, "loss": 1.8005, "step": 35480 }, { "epoch": 1.18, "grad_norm": 0.4831906259059906, "learning_rate": 0.00039852435886689506, "loss": 1.8114, "step": 35481 }, { "epoch": 1.18, "grad_norm": 0.48918819427490234, "learning_rate": 0.0003985144864309139, "loss": 1.7565, "step": 35482 }, { "epoch": 1.18, "grad_norm": 0.4844586253166199, "learning_rate": 0.0003985046138753506, "loss": 1.8005, "step": 35483 }, { "epoch": 1.18, "grad_norm": 0.48010239005088806, "learning_rate": 0.00039849474120021736, "loss": 1.8045, "step": 35484 }, { "epoch": 1.18, "grad_norm": 0.4636836647987366, "learning_rate": 0.0003984848684055259, "loss": 1.7553, "step": 35485 }, { "epoch": 1.18, "grad_norm": 0.4778355360031128, "learning_rate": 0.00039847499549128826, "loss": 1.7806, "step": 35486 }, { "epoch": 1.18, "grad_norm": 0.4846589267253876, "learning_rate": 0.0003984651224575166, "loss": 1.7201, "step": 35487 }, { "epoch": 1.18, "grad_norm": 0.488282173871994, "learning_rate": 0.00039845524930422275, "loss": 1.8328, "step": 35488 }, { "epoch": 1.18, "grad_norm": 0.5038496255874634, "learning_rate": 0.00039844537603141877, "loss": 1.7385, "step": 35489 }, { "epoch": 1.18, "grad_norm": 0.4505833387374878, "learning_rate": 0.00039843550263911657, "loss": 1.8294, "step": 35490 }, { "epoch": 1.18, "grad_norm": 0.48320910334587097, "learning_rate": 0.00039842562912732814, "loss": 1.7341, "step": 35491 }, { "epoch": 1.18, "grad_norm": 0.4705679714679718, "learning_rate": 0.0003984157554960656, "loss": 1.75, "step": 35492 }, { "epoch": 1.18, "grad_norm": 0.47173675894737244, "learning_rate": 0.0003984058817453407, "loss": 1.7864, "step": 35493 }, { "epoch": 1.18, "grad_norm": 0.4833739995956421, "learning_rate": 0.00039839600787516566, "loss": 1.7925, "step": 35494 }, { "epoch": 1.18, "grad_norm": 0.47430622577667236, "learning_rate": 0.00039838613388555236, "loss": 1.8527, "step": 35495 }, { "epoch": 1.18, "grad_norm": 0.47450584173202515, "learning_rate": 0.00039837625977651273, "loss": 1.8467, "step": 35496 }, { "epoch": 1.18, "grad_norm": 0.48357605934143066, "learning_rate": 0.00039836638554805885, "loss": 1.7986, "step": 35497 }, { "epoch": 1.18, "grad_norm": 0.45681530237197876, "learning_rate": 0.0003983565112002026, "loss": 1.7888, "step": 35498 }, { "epoch": 1.18, "grad_norm": 0.47243717312812805, "learning_rate": 0.0003983466367329562, "loss": 1.7284, "step": 35499 }, { "epoch": 1.18, "grad_norm": 0.4715858995914459, "learning_rate": 0.0003983367621463313, "loss": 1.7929, "step": 35500 }, { "epoch": 1.18, "grad_norm": 0.45649453997612, "learning_rate": 0.0003983268874403402, "loss": 1.7215, "step": 35501 }, { "epoch": 1.18, "grad_norm": 0.4608646631240845, "learning_rate": 0.0003983170126149947, "loss": 1.791, "step": 35502 }, { "epoch": 1.18, "grad_norm": 0.48081454634666443, "learning_rate": 0.0003983071376703068, "loss": 1.743, "step": 35503 }, { "epoch": 1.18, "grad_norm": 0.4717599153518677, "learning_rate": 0.0003982972626062885, "loss": 1.7713, "step": 35504 }, { "epoch": 1.18, "grad_norm": 0.4759327471256256, "learning_rate": 0.00039828738742295184, "loss": 1.7692, "step": 35505 }, { "epoch": 1.18, "grad_norm": 0.46482524275779724, "learning_rate": 0.0003982775121203089, "loss": 1.7956, "step": 35506 }, { "epoch": 1.18, "grad_norm": 0.4543541669845581, "learning_rate": 0.0003982676366983713, "loss": 1.7187, "step": 35507 }, { "epoch": 1.18, "grad_norm": 0.4646880030632019, "learning_rate": 0.0003982577611571514, "loss": 1.7555, "step": 35508 }, { "epoch": 1.18, "grad_norm": 0.47862890362739563, "learning_rate": 0.00039824788549666103, "loss": 1.7936, "step": 35509 }, { "epoch": 1.18, "grad_norm": 0.4730852544307709, "learning_rate": 0.00039823800971691226, "loss": 1.7868, "step": 35510 }, { "epoch": 1.18, "grad_norm": 0.47568926215171814, "learning_rate": 0.00039822813381791696, "loss": 1.7559, "step": 35511 }, { "epoch": 1.18, "grad_norm": 0.4781561493873596, "learning_rate": 0.00039821825779968724, "loss": 1.854, "step": 35512 }, { "epoch": 1.18, "grad_norm": 0.46989506483078003, "learning_rate": 0.000398208381662235, "loss": 1.8071, "step": 35513 }, { "epoch": 1.18, "grad_norm": 0.4708738923072815, "learning_rate": 0.00039819850540557226, "loss": 1.7242, "step": 35514 }, { "epoch": 1.18, "grad_norm": 0.474027544260025, "learning_rate": 0.0003981886290297109, "loss": 1.7407, "step": 35515 }, { "epoch": 1.18, "grad_norm": 0.47679510712623596, "learning_rate": 0.0003981787525346631, "loss": 1.8209, "step": 35516 }, { "epoch": 1.18, "grad_norm": 0.4609110355377197, "learning_rate": 0.00039816887592044085, "loss": 1.6925, "step": 35517 }, { "epoch": 1.18, "grad_norm": 0.4625685214996338, "learning_rate": 0.0003981589991870558, "loss": 1.7585, "step": 35518 }, { "epoch": 1.18, "grad_norm": 0.4528284966945648, "learning_rate": 0.00039814912233452043, "loss": 1.7287, "step": 35519 }, { "epoch": 1.18, "grad_norm": 0.469169944524765, "learning_rate": 0.0003981392453628464, "loss": 1.7417, "step": 35520 }, { "epoch": 1.18, "grad_norm": 0.48033106327056885, "learning_rate": 0.0003981293682720458, "loss": 1.8305, "step": 35521 }, { "epoch": 1.18, "grad_norm": 0.46389147639274597, "learning_rate": 0.00039811949106213063, "loss": 1.7286, "step": 35522 }, { "epoch": 1.18, "grad_norm": 0.48652687668800354, "learning_rate": 0.00039810961373311273, "loss": 1.8132, "step": 35523 }, { "epoch": 1.18, "grad_norm": 0.45643165707588196, "learning_rate": 0.0003980997362850044, "loss": 1.7868, "step": 35524 }, { "epoch": 1.18, "grad_norm": 0.4620741903781891, "learning_rate": 0.00039808985871781725, "loss": 1.8071, "step": 35525 }, { "epoch": 1.18, "grad_norm": 0.48059192299842834, "learning_rate": 0.0003980799810315636, "loss": 1.7239, "step": 35526 }, { "epoch": 1.18, "grad_norm": 0.4692353904247284, "learning_rate": 0.0003980701032262553, "loss": 1.7327, "step": 35527 }, { "epoch": 1.18, "grad_norm": 0.46836546063423157, "learning_rate": 0.0003980602253019043, "loss": 1.7985, "step": 35528 }, { "epoch": 1.18, "grad_norm": 0.4749796688556671, "learning_rate": 0.0003980503472585226, "loss": 1.6915, "step": 35529 }, { "epoch": 1.18, "grad_norm": 0.48128247261047363, "learning_rate": 0.00039804046909612225, "loss": 1.7971, "step": 35530 }, { "epoch": 1.18, "grad_norm": 0.46252623200416565, "learning_rate": 0.0003980305908147153, "loss": 1.7451, "step": 35531 }, { "epoch": 1.18, "grad_norm": 0.454305499792099, "learning_rate": 0.0003980207124143136, "loss": 1.7533, "step": 35532 }, { "epoch": 1.18, "grad_norm": 0.4835323393344879, "learning_rate": 0.00039801083389492915, "loss": 1.7979, "step": 35533 }, { "epoch": 1.18, "grad_norm": 0.46887311339378357, "learning_rate": 0.000398000955256574, "loss": 1.768, "step": 35534 }, { "epoch": 1.18, "grad_norm": 0.47969943284988403, "learning_rate": 0.00039799107649926015, "loss": 1.7367, "step": 35535 }, { "epoch": 1.18, "grad_norm": 0.46760913729667664, "learning_rate": 0.00039798119762299967, "loss": 1.7724, "step": 35536 }, { "epoch": 1.18, "grad_norm": 0.46694740653038025, "learning_rate": 0.0003979713186278043, "loss": 1.8155, "step": 35537 }, { "epoch": 1.18, "grad_norm": 0.46278560161590576, "learning_rate": 0.0003979614395136863, "loss": 1.7862, "step": 35538 }, { "epoch": 1.18, "grad_norm": 0.4758636951446533, "learning_rate": 0.00039795156028065744, "loss": 1.7905, "step": 35539 }, { "epoch": 1.18, "grad_norm": 0.47165724635124207, "learning_rate": 0.0003979416809287299, "loss": 1.7857, "step": 35540 }, { "epoch": 1.18, "grad_norm": 0.4701491594314575, "learning_rate": 0.0003979318014579155, "loss": 1.762, "step": 35541 }, { "epoch": 1.18, "grad_norm": 0.46953195333480835, "learning_rate": 0.0003979219218682265, "loss": 1.8158, "step": 35542 }, { "epoch": 1.18, "grad_norm": 0.4642057716846466, "learning_rate": 0.00039791204215967445, "loss": 1.7757, "step": 35543 }, { "epoch": 1.18, "grad_norm": 0.47686269879341125, "learning_rate": 0.00039790216233227186, "loss": 1.7375, "step": 35544 }, { "epoch": 1.18, "grad_norm": 0.4647451937198639, "learning_rate": 0.00039789228238603036, "loss": 1.8053, "step": 35545 }, { "epoch": 1.18, "grad_norm": 0.4725889265537262, "learning_rate": 0.0003978824023209621, "loss": 1.7876, "step": 35546 }, { "epoch": 1.18, "grad_norm": 0.4617708921432495, "learning_rate": 0.000397872522137079, "loss": 1.7837, "step": 35547 }, { "epoch": 1.18, "grad_norm": 0.4721294641494751, "learning_rate": 0.00039786264183439295, "loss": 1.8176, "step": 35548 }, { "epoch": 1.18, "grad_norm": 0.4731142520904541, "learning_rate": 0.00039785276141291625, "loss": 1.7803, "step": 35549 }, { "epoch": 1.18, "grad_norm": 0.462726354598999, "learning_rate": 0.0003978428808726606, "loss": 1.8459, "step": 35550 }, { "epoch": 1.18, "grad_norm": 0.46193763613700867, "learning_rate": 0.00039783300021363824, "loss": 1.8339, "step": 35551 }, { "epoch": 1.18, "grad_norm": 0.4813333749771118, "learning_rate": 0.00039782311943586094, "loss": 1.7006, "step": 35552 }, { "epoch": 1.18, "grad_norm": 0.48181965947151184, "learning_rate": 0.00039781323853934074, "loss": 1.8467, "step": 35553 }, { "epoch": 1.18, "grad_norm": 0.48526352643966675, "learning_rate": 0.00039780335752408983, "loss": 1.7364, "step": 35554 }, { "epoch": 1.18, "grad_norm": 0.49379441142082214, "learning_rate": 0.0003977934763901199, "loss": 1.8509, "step": 35555 }, { "epoch": 1.18, "grad_norm": 0.46567147970199585, "learning_rate": 0.0003977835951374432, "loss": 1.8062, "step": 35556 }, { "epoch": 1.18, "grad_norm": 0.6069644689559937, "learning_rate": 0.0003977737137660716, "loss": 1.8344, "step": 35557 }, { "epoch": 1.18, "grad_norm": 0.4704917371273041, "learning_rate": 0.0003977638322760171, "loss": 1.7695, "step": 35558 }, { "epoch": 1.18, "grad_norm": 0.4771663546562195, "learning_rate": 0.00039775395066729167, "loss": 1.7693, "step": 35559 }, { "epoch": 1.18, "grad_norm": 0.4739903509616852, "learning_rate": 0.0003977440689399074, "loss": 1.7993, "step": 35560 }, { "epoch": 1.18, "grad_norm": 0.4765171706676483, "learning_rate": 0.0003977341870938763, "loss": 1.8121, "step": 35561 }, { "epoch": 1.18, "grad_norm": 0.49395227432250977, "learning_rate": 0.00039772430512921027, "loss": 1.8138, "step": 35562 }, { "epoch": 1.18, "grad_norm": 0.47644278407096863, "learning_rate": 0.00039771442304592123, "loss": 1.7571, "step": 35563 }, { "epoch": 1.18, "grad_norm": 0.48721134662628174, "learning_rate": 0.00039770454084402134, "loss": 1.7945, "step": 35564 }, { "epoch": 1.18, "grad_norm": 0.4831426739692688, "learning_rate": 0.00039769465852352253, "loss": 1.8027, "step": 35565 }, { "epoch": 1.18, "grad_norm": 0.4670020341873169, "learning_rate": 0.0003976847760844367, "loss": 1.7785, "step": 35566 }, { "epoch": 1.18, "grad_norm": 0.46218934655189514, "learning_rate": 0.0003976748935267762, "loss": 1.8792, "step": 35567 }, { "epoch": 1.18, "grad_norm": 0.5028169751167297, "learning_rate": 0.00039766501085055256, "loss": 1.8019, "step": 35568 }, { "epoch": 1.18, "grad_norm": 0.4726638197898865, "learning_rate": 0.00039765512805577807, "loss": 1.7973, "step": 35569 }, { "epoch": 1.18, "grad_norm": 0.9858151078224182, "learning_rate": 0.0003976452451424646, "loss": 1.7468, "step": 35570 }, { "epoch": 1.18, "grad_norm": 0.4577288031578064, "learning_rate": 0.00039763536211062417, "loss": 1.7511, "step": 35571 }, { "epoch": 1.18, "grad_norm": 0.45638617873191833, "learning_rate": 0.0003976254789602689, "loss": 1.789, "step": 35572 }, { "epoch": 1.18, "grad_norm": 0.45926809310913086, "learning_rate": 0.00039761559569141057, "loss": 1.7735, "step": 35573 }, { "epoch": 1.18, "grad_norm": 0.5145929455757141, "learning_rate": 0.00039760571230406137, "loss": 1.763, "step": 35574 }, { "epoch": 1.18, "grad_norm": 0.47824087738990784, "learning_rate": 0.00039759582879823316, "loss": 1.7483, "step": 35575 }, { "epoch": 1.18, "grad_norm": 0.4743667244911194, "learning_rate": 0.000397585945173938, "loss": 1.7915, "step": 35576 }, { "epoch": 1.18, "grad_norm": 0.4817202091217041, "learning_rate": 0.000397576061431188, "loss": 1.7961, "step": 35577 }, { "epoch": 1.18, "grad_norm": 0.4771331548690796, "learning_rate": 0.0003975661775699948, "loss": 1.869, "step": 35578 }, { "epoch": 1.18, "grad_norm": 0.4828267991542816, "learning_rate": 0.00039755629359037085, "loss": 1.8027, "step": 35579 }, { "epoch": 1.18, "grad_norm": 0.47920048236846924, "learning_rate": 0.0003975464094923278, "loss": 1.7685, "step": 35580 }, { "epoch": 1.18, "grad_norm": 0.49179980158805847, "learning_rate": 0.00039753652527587784, "loss": 1.7511, "step": 35581 }, { "epoch": 1.18, "grad_norm": 0.47892269492149353, "learning_rate": 0.00039752664094103293, "loss": 1.7444, "step": 35582 }, { "epoch": 1.18, "grad_norm": 0.4728761613368988, "learning_rate": 0.000397516756487805, "loss": 1.8139, "step": 35583 }, { "epoch": 1.18, "grad_norm": 0.508763313293457, "learning_rate": 0.0003975068719162062, "loss": 1.809, "step": 35584 }, { "epoch": 1.18, "grad_norm": 0.4728795886039734, "learning_rate": 0.0003974969872262483, "loss": 1.7495, "step": 35585 }, { "epoch": 1.18, "grad_norm": 0.4822690486907959, "learning_rate": 0.00039748710241794345, "loss": 1.8107, "step": 35586 }, { "epoch": 1.18, "grad_norm": 0.45308175683021545, "learning_rate": 0.00039747721749130364, "loss": 1.7486, "step": 35587 }, { "epoch": 1.18, "grad_norm": 0.46407532691955566, "learning_rate": 0.00039746733244634084, "loss": 1.7873, "step": 35588 }, { "epoch": 1.18, "grad_norm": 0.4748958647251129, "learning_rate": 0.00039745744728306707, "loss": 1.7174, "step": 35589 }, { "epoch": 1.18, "grad_norm": 0.4622482657432556, "learning_rate": 0.0003974475620014943, "loss": 1.8331, "step": 35590 }, { "epoch": 1.18, "grad_norm": 0.4825826585292816, "learning_rate": 0.00039743767660163463, "loss": 1.7824, "step": 35591 }, { "epoch": 1.18, "grad_norm": 0.4739501476287842, "learning_rate": 0.0003974277910834999, "loss": 1.7637, "step": 35592 }, { "epoch": 1.18, "grad_norm": 0.469682902097702, "learning_rate": 0.00039741790544710223, "loss": 1.7302, "step": 35593 }, { "epoch": 1.18, "grad_norm": 0.4766780138015747, "learning_rate": 0.0003974080196924535, "loss": 1.7566, "step": 35594 }, { "epoch": 1.18, "grad_norm": 0.46578818559646606, "learning_rate": 0.00039739813381956587, "loss": 1.7878, "step": 35595 }, { "epoch": 1.18, "grad_norm": 0.46445485949516296, "learning_rate": 0.00039738824782845116, "loss": 1.7326, "step": 35596 }, { "epoch": 1.18, "grad_norm": 0.46187588572502136, "learning_rate": 0.0003973783617191216, "loss": 1.8204, "step": 35597 }, { "epoch": 1.18, "grad_norm": 0.4589036703109741, "learning_rate": 0.0003973684754915889, "loss": 1.7629, "step": 35598 }, { "epoch": 1.18, "grad_norm": 0.46676814556121826, "learning_rate": 0.0003973585891458654, "loss": 1.715, "step": 35599 }, { "epoch": 1.18, "grad_norm": 0.4654846489429474, "learning_rate": 0.00039734870268196275, "loss": 1.7683, "step": 35600 }, { "epoch": 1.18, "grad_norm": 0.46157339215278625, "learning_rate": 0.00039733881609989324, "loss": 1.7931, "step": 35601 }, { "epoch": 1.18, "grad_norm": 0.4758140742778778, "learning_rate": 0.00039732892939966873, "loss": 1.775, "step": 35602 }, { "epoch": 1.18, "grad_norm": 0.4712395668029785, "learning_rate": 0.0003973190425813011, "loss": 1.7923, "step": 35603 }, { "epoch": 1.18, "grad_norm": 0.4656660258769989, "learning_rate": 0.0003973091556448027, "loss": 1.7996, "step": 35604 }, { "epoch": 1.18, "grad_norm": 0.4830307066440582, "learning_rate": 0.00039729926859018513, "loss": 1.8413, "step": 35605 }, { "epoch": 1.18, "grad_norm": 0.4731253683567047, "learning_rate": 0.0003972893814174607, "loss": 1.7107, "step": 35606 }, { "epoch": 1.18, "grad_norm": 0.48596417903900146, "learning_rate": 0.00039727949412664124, "loss": 1.812, "step": 35607 }, { "epoch": 1.18, "grad_norm": 0.45457032322883606, "learning_rate": 0.0003972696067177389, "loss": 1.7677, "step": 35608 }, { "epoch": 1.18, "grad_norm": 0.47480419278144836, "learning_rate": 0.0003972597191907655, "loss": 1.8053, "step": 35609 }, { "epoch": 1.18, "grad_norm": 1.1413540840148926, "learning_rate": 0.00039724983154573317, "loss": 1.8493, "step": 35610 }, { "epoch": 1.18, "grad_norm": 0.507574737071991, "learning_rate": 0.0003972399437826539, "loss": 1.7887, "step": 35611 }, { "epoch": 1.18, "grad_norm": 0.4822347164154053, "learning_rate": 0.0003972300559015396, "loss": 1.861, "step": 35612 }, { "epoch": 1.18, "grad_norm": 0.49183890223503113, "learning_rate": 0.00039722016790240237, "loss": 1.8494, "step": 35613 }, { "epoch": 1.18, "grad_norm": 0.48122942447662354, "learning_rate": 0.00039721027978525416, "loss": 1.779, "step": 35614 }, { "epoch": 1.18, "grad_norm": 0.47799479961395264, "learning_rate": 0.000397200391550107, "loss": 1.8304, "step": 35615 }, { "epoch": 1.18, "grad_norm": 0.46150800585746765, "learning_rate": 0.0003971905031969729, "loss": 1.7381, "step": 35616 }, { "epoch": 1.18, "grad_norm": 0.46063730120658875, "learning_rate": 0.00039718061472586387, "loss": 1.8065, "step": 35617 }, { "epoch": 1.19, "grad_norm": 0.4766736626625061, "learning_rate": 0.0003971707261367919, "loss": 1.8153, "step": 35618 }, { "epoch": 1.19, "grad_norm": 0.47343626618385315, "learning_rate": 0.0003971608374297689, "loss": 1.7888, "step": 35619 }, { "epoch": 1.19, "grad_norm": 0.4801803231239319, "learning_rate": 0.00039715094860480704, "loss": 1.8362, "step": 35620 }, { "epoch": 1.19, "grad_norm": 0.47191786766052246, "learning_rate": 0.0003971410596619182, "loss": 1.8537, "step": 35621 }, { "epoch": 1.19, "grad_norm": 0.47182363271713257, "learning_rate": 0.00039713117060111446, "loss": 1.8376, "step": 35622 }, { "epoch": 1.19, "grad_norm": 0.4620453119277954, "learning_rate": 0.00039712128142240785, "loss": 1.7273, "step": 35623 }, { "epoch": 1.19, "grad_norm": 0.4913345277309418, "learning_rate": 0.00039711139212581024, "loss": 1.8, "step": 35624 }, { "epoch": 1.19, "grad_norm": 0.5024637579917908, "learning_rate": 0.0003971015027113337, "loss": 1.8368, "step": 35625 }, { "epoch": 1.19, "grad_norm": 0.48363906145095825, "learning_rate": 0.0003970916131789902, "loss": 1.8016, "step": 35626 }, { "epoch": 1.19, "grad_norm": 0.4966087341308594, "learning_rate": 0.0003970817235287918, "loss": 1.811, "step": 35627 }, { "epoch": 1.19, "grad_norm": 0.48543787002563477, "learning_rate": 0.0003970718337607506, "loss": 1.7159, "step": 35628 }, { "epoch": 1.19, "grad_norm": 0.47547534108161926, "learning_rate": 0.00039706194387487847, "loss": 1.8156, "step": 35629 }, { "epoch": 1.19, "grad_norm": 0.46712374687194824, "learning_rate": 0.0003970520538711874, "loss": 1.7988, "step": 35630 }, { "epoch": 1.19, "grad_norm": 0.45681706070899963, "learning_rate": 0.0003970421637496894, "loss": 1.7652, "step": 35631 }, { "epoch": 1.19, "grad_norm": 0.6351448893547058, "learning_rate": 0.00039703227351039653, "loss": 1.7479, "step": 35632 }, { "epoch": 1.19, "grad_norm": 0.484299898147583, "learning_rate": 0.0003970223831533208, "loss": 1.8057, "step": 35633 }, { "epoch": 1.19, "grad_norm": 0.4802483916282654, "learning_rate": 0.0003970124926784742, "loss": 1.7546, "step": 35634 }, { "epoch": 1.19, "grad_norm": 0.4960646629333496, "learning_rate": 0.00039700260208586866, "loss": 1.779, "step": 35635 }, { "epoch": 1.19, "grad_norm": 0.48359373211860657, "learning_rate": 0.00039699271137551636, "loss": 1.8127, "step": 35636 }, { "epoch": 1.19, "grad_norm": 0.49967160820961, "learning_rate": 0.00039698282054742915, "loss": 1.8066, "step": 35637 }, { "epoch": 1.19, "grad_norm": 0.4901897609233856, "learning_rate": 0.00039697292960161904, "loss": 1.7986, "step": 35638 }, { "epoch": 1.19, "grad_norm": 0.48497605323791504, "learning_rate": 0.00039696303853809824, "loss": 1.792, "step": 35639 }, { "epoch": 1.19, "grad_norm": 0.4978398084640503, "learning_rate": 0.0003969531473568784, "loss": 1.8028, "step": 35640 }, { "epoch": 1.19, "grad_norm": 0.4712820053100586, "learning_rate": 0.00039694325605797183, "loss": 1.8078, "step": 35641 }, { "epoch": 1.19, "grad_norm": 0.4700503349304199, "learning_rate": 0.0003969333646413903, "loss": 1.7592, "step": 35642 }, { "epoch": 1.19, "grad_norm": 0.4711483418941498, "learning_rate": 0.00039692347310714616, "loss": 1.7805, "step": 35643 }, { "epoch": 1.19, "grad_norm": 0.485525906085968, "learning_rate": 0.0003969135814552511, "loss": 1.7638, "step": 35644 }, { "epoch": 1.19, "grad_norm": 0.477413535118103, "learning_rate": 0.0003969036896857172, "loss": 1.7718, "step": 35645 }, { "epoch": 1.19, "grad_norm": 0.4599478840827942, "learning_rate": 0.0003968937977985566, "loss": 1.6982, "step": 35646 }, { "epoch": 1.19, "grad_norm": 0.47385337948799133, "learning_rate": 0.0003968839057937811, "loss": 1.8141, "step": 35647 }, { "epoch": 1.19, "grad_norm": 0.49255332350730896, "learning_rate": 0.00039687401367140285, "loss": 1.829, "step": 35648 }, { "epoch": 1.19, "grad_norm": 0.4907597303390503, "learning_rate": 0.00039686412143143384, "loss": 1.7653, "step": 35649 }, { "epoch": 1.19, "grad_norm": 0.4728916883468628, "learning_rate": 0.00039685422907388603, "loss": 1.7941, "step": 35650 }, { "epoch": 1.19, "grad_norm": 0.6165256500244141, "learning_rate": 0.0003968443365987715, "loss": 1.8589, "step": 35651 }, { "epoch": 1.19, "grad_norm": 0.4753452241420746, "learning_rate": 0.0003968344440061021, "loss": 1.8079, "step": 35652 }, { "epoch": 1.19, "grad_norm": 0.47903597354888916, "learning_rate": 0.00039682455129589006, "loss": 1.7757, "step": 35653 }, { "epoch": 1.19, "grad_norm": 0.4691634178161621, "learning_rate": 0.0003968146584681473, "loss": 1.727, "step": 35654 }, { "epoch": 1.19, "grad_norm": 0.46745553612709045, "learning_rate": 0.0003968047655228858, "loss": 1.7292, "step": 35655 }, { "epoch": 1.19, "grad_norm": 0.4697020649909973, "learning_rate": 0.00039679487246011756, "loss": 1.8455, "step": 35656 }, { "epoch": 1.19, "grad_norm": 0.5042595863342285, "learning_rate": 0.00039678497927985454, "loss": 1.865, "step": 35657 }, { "epoch": 1.19, "grad_norm": 0.4781196117401123, "learning_rate": 0.00039677508598210884, "loss": 1.8675, "step": 35658 }, { "epoch": 1.19, "grad_norm": 0.458847314119339, "learning_rate": 0.00039676519256689254, "loss": 1.7288, "step": 35659 }, { "epoch": 1.19, "grad_norm": 0.47229233384132385, "learning_rate": 0.00039675529903421746, "loss": 1.7651, "step": 35660 }, { "epoch": 1.19, "grad_norm": 0.46870332956314087, "learning_rate": 0.00039674540538409575, "loss": 1.7506, "step": 35661 }, { "epoch": 1.19, "grad_norm": 0.4849132299423218, "learning_rate": 0.00039673551161653934, "loss": 1.8511, "step": 35662 }, { "epoch": 1.19, "grad_norm": 0.468092679977417, "learning_rate": 0.0003967256177315603, "loss": 1.8709, "step": 35663 }, { "epoch": 1.19, "grad_norm": 0.5189602375030518, "learning_rate": 0.0003967157237291707, "loss": 1.7744, "step": 35664 }, { "epoch": 1.19, "grad_norm": 0.5002351403236389, "learning_rate": 0.0003967058296093822, "loss": 1.7551, "step": 35665 }, { "epoch": 1.19, "grad_norm": 0.48793336749076843, "learning_rate": 0.0003966959353722073, "loss": 1.8114, "step": 35666 }, { "epoch": 1.19, "grad_norm": 0.4682811498641968, "learning_rate": 0.00039668604101765763, "loss": 1.7979, "step": 35667 }, { "epoch": 1.19, "grad_norm": 0.4779455065727234, "learning_rate": 0.0003966761465457455, "loss": 1.7468, "step": 35668 }, { "epoch": 1.19, "grad_norm": 0.48527640104293823, "learning_rate": 0.0003966662519564827, "loss": 1.7991, "step": 35669 }, { "epoch": 1.19, "grad_norm": 0.45573773980140686, "learning_rate": 0.0003966563572498813, "loss": 1.7213, "step": 35670 }, { "epoch": 1.19, "grad_norm": 0.4833393394947052, "learning_rate": 0.00039664646242595344, "loss": 1.8246, "step": 35671 }, { "epoch": 1.19, "grad_norm": 0.462088018655777, "learning_rate": 0.00039663656748471077, "loss": 1.7956, "step": 35672 }, { "epoch": 1.19, "grad_norm": 0.49110326170921326, "learning_rate": 0.00039662667242616577, "loss": 1.8316, "step": 35673 }, { "epoch": 1.19, "grad_norm": 0.47769108414649963, "learning_rate": 0.0003966167772503302, "loss": 1.7538, "step": 35674 }, { "epoch": 1.19, "grad_norm": 0.46605154871940613, "learning_rate": 0.000396606881957216, "loss": 1.7476, "step": 35675 }, { "epoch": 1.19, "grad_norm": 0.4808686375617981, "learning_rate": 0.0003965969865468353, "loss": 1.728, "step": 35676 }, { "epoch": 1.19, "grad_norm": 0.4763213098049164, "learning_rate": 0.00039658709101920016, "loss": 1.7977, "step": 35677 }, { "epoch": 1.19, "grad_norm": 0.4848731458187103, "learning_rate": 0.0003965771953743226, "loss": 1.7849, "step": 35678 }, { "epoch": 1.19, "grad_norm": 0.4780089259147644, "learning_rate": 0.00039656729961221434, "loss": 1.7602, "step": 35679 }, { "epoch": 1.19, "grad_norm": 0.4801256060600281, "learning_rate": 0.0003965574037328877, "loss": 1.8155, "step": 35680 }, { "epoch": 1.19, "grad_norm": 0.4734453558921814, "learning_rate": 0.0003965475077363546, "loss": 1.7594, "step": 35681 }, { "epoch": 1.19, "grad_norm": 0.4572173058986664, "learning_rate": 0.00039653761162262703, "loss": 1.7699, "step": 35682 }, { "epoch": 1.19, "grad_norm": 0.4559749364852905, "learning_rate": 0.00039652771539171704, "loss": 1.7428, "step": 35683 }, { "epoch": 1.19, "grad_norm": 0.4723801016807556, "learning_rate": 0.00039651781904363666, "loss": 1.7482, "step": 35684 }, { "epoch": 1.19, "grad_norm": 0.47717928886413574, "learning_rate": 0.00039650792257839784, "loss": 1.8338, "step": 35685 }, { "epoch": 1.19, "grad_norm": 0.47353243827819824, "learning_rate": 0.00039649802599601265, "loss": 1.7416, "step": 35686 }, { "epoch": 1.19, "grad_norm": 0.4820263385772705, "learning_rate": 0.000396488129296493, "loss": 1.7798, "step": 35687 }, { "epoch": 1.19, "grad_norm": 0.4786898195743561, "learning_rate": 0.0003964782324798511, "loss": 1.7835, "step": 35688 }, { "epoch": 1.19, "grad_norm": 0.4619165062904358, "learning_rate": 0.0003964683355460988, "loss": 1.8201, "step": 35689 }, { "epoch": 1.19, "grad_norm": 0.4588974118232727, "learning_rate": 0.000396458438495248, "loss": 1.7506, "step": 35690 }, { "epoch": 1.19, "grad_norm": 0.4719583988189697, "learning_rate": 0.0003964485413273111, "loss": 1.7221, "step": 35691 }, { "epoch": 1.19, "grad_norm": 0.475841224193573, "learning_rate": 0.00039643864404229973, "loss": 1.7811, "step": 35692 }, { "epoch": 1.19, "grad_norm": 0.4770248532295227, "learning_rate": 0.0003964287466402261, "loss": 1.8026, "step": 35693 }, { "epoch": 1.19, "grad_norm": 0.46602514386177063, "learning_rate": 0.0003964188491211022, "loss": 1.8302, "step": 35694 }, { "epoch": 1.19, "grad_norm": 0.4761465787887573, "learning_rate": 0.00039640895148493994, "loss": 1.7655, "step": 35695 }, { "epoch": 1.19, "grad_norm": 0.5112369060516357, "learning_rate": 0.0003963990537317516, "loss": 1.7847, "step": 35696 }, { "epoch": 1.19, "grad_norm": 0.4832249879837036, "learning_rate": 0.0003963891558615488, "loss": 1.7434, "step": 35697 }, { "epoch": 1.19, "grad_norm": 0.4681455194950104, "learning_rate": 0.0003963792578743439, "loss": 1.7136, "step": 35698 }, { "epoch": 1.19, "grad_norm": 0.7493842244148254, "learning_rate": 0.0003963693597701487, "loss": 1.8079, "step": 35699 }, { "epoch": 1.19, "grad_norm": 0.4772268831729889, "learning_rate": 0.00039635946154897535, "loss": 1.7436, "step": 35700 }, { "epoch": 1.19, "grad_norm": 0.49485135078430176, "learning_rate": 0.00039634956321083593, "loss": 1.8279, "step": 35701 }, { "epoch": 1.19, "grad_norm": 0.5137437582015991, "learning_rate": 0.00039633966475574204, "loss": 1.778, "step": 35702 }, { "epoch": 1.19, "grad_norm": 0.9335164427757263, "learning_rate": 0.0003963297661837063, "loss": 1.8746, "step": 35703 }, { "epoch": 1.19, "grad_norm": 0.45925405621528625, "learning_rate": 0.0003963198674947402, "loss": 1.8269, "step": 35704 }, { "epoch": 1.19, "grad_norm": 0.4800897240638733, "learning_rate": 0.0003963099686888561, "loss": 1.8177, "step": 35705 }, { "epoch": 1.19, "grad_norm": 0.4737248718738556, "learning_rate": 0.0003963000697660658, "loss": 1.8177, "step": 35706 }, { "epoch": 1.19, "grad_norm": 0.4831578731536865, "learning_rate": 0.0003962901707263814, "loss": 1.7843, "step": 35707 }, { "epoch": 1.19, "grad_norm": 0.46623495221138, "learning_rate": 0.0003962802715698151, "loss": 1.7576, "step": 35708 }, { "epoch": 1.19, "grad_norm": 0.46508023142814636, "learning_rate": 0.00039627037229637857, "loss": 1.8567, "step": 35709 }, { "epoch": 1.19, "grad_norm": 0.45468294620513916, "learning_rate": 0.000396260472906084, "loss": 1.7541, "step": 35710 }, { "epoch": 1.19, "grad_norm": 0.46489468216896057, "learning_rate": 0.0003962505733989434, "loss": 1.8259, "step": 35711 }, { "epoch": 1.19, "grad_norm": 0.4717448651790619, "learning_rate": 0.0003962406737749688, "loss": 1.7504, "step": 35712 }, { "epoch": 1.19, "grad_norm": 0.4583829939365387, "learning_rate": 0.00039623077403417217, "loss": 1.7395, "step": 35713 }, { "epoch": 1.19, "grad_norm": 0.46638429164886475, "learning_rate": 0.0003962208741765657, "loss": 1.7452, "step": 35714 }, { "epoch": 1.19, "grad_norm": 0.4566963016986847, "learning_rate": 0.0003962109742021611, "loss": 1.7208, "step": 35715 }, { "epoch": 1.19, "grad_norm": 0.47631874680519104, "learning_rate": 0.00039620107411097067, "loss": 1.8913, "step": 35716 }, { "epoch": 1.19, "grad_norm": 0.461713045835495, "learning_rate": 0.00039619117390300626, "loss": 1.76, "step": 35717 }, { "epoch": 1.19, "grad_norm": 0.4763638377189636, "learning_rate": 0.00039618127357827994, "loss": 1.7913, "step": 35718 }, { "epoch": 1.19, "grad_norm": 0.46972936391830444, "learning_rate": 0.00039617137313680375, "loss": 1.7506, "step": 35719 }, { "epoch": 1.19, "grad_norm": 0.4868510961532593, "learning_rate": 0.00039616147257858956, "loss": 1.8478, "step": 35720 }, { "epoch": 1.19, "grad_norm": 0.4673946499824524, "learning_rate": 0.0003961515719036497, "loss": 1.8253, "step": 35721 }, { "epoch": 1.19, "grad_norm": 0.4579590857028961, "learning_rate": 0.0003961416711119958, "loss": 1.753, "step": 35722 }, { "epoch": 1.19, "grad_norm": 0.4672141969203949, "learning_rate": 0.00039613177020364027, "loss": 1.8308, "step": 35723 }, { "epoch": 1.19, "grad_norm": 0.4848491847515106, "learning_rate": 0.0003961218691785948, "loss": 1.7391, "step": 35724 }, { "epoch": 1.19, "grad_norm": 0.46911656856536865, "learning_rate": 0.00039611196803687157, "loss": 1.8189, "step": 35725 }, { "epoch": 1.19, "grad_norm": 0.47871315479278564, "learning_rate": 0.00039610206677848266, "loss": 1.8258, "step": 35726 }, { "epoch": 1.19, "grad_norm": 0.454677015542984, "learning_rate": 0.0003960921654034399, "loss": 1.7812, "step": 35727 }, { "epoch": 1.19, "grad_norm": 0.4682002067565918, "learning_rate": 0.0003960822639117555, "loss": 1.7644, "step": 35728 }, { "epoch": 1.19, "grad_norm": 0.4782904386520386, "learning_rate": 0.0003960723623034413, "loss": 1.8086, "step": 35729 }, { "epoch": 1.19, "grad_norm": 0.4709091782569885, "learning_rate": 0.00039606246057850944, "loss": 1.676, "step": 35730 }, { "epoch": 1.19, "grad_norm": 0.4826706647872925, "learning_rate": 0.0003960525587369719, "loss": 1.669, "step": 35731 }, { "epoch": 1.19, "grad_norm": 0.47057679295539856, "learning_rate": 0.0003960426567788407, "loss": 1.732, "step": 35732 }, { "epoch": 1.19, "grad_norm": 0.4950089156627655, "learning_rate": 0.00039603275470412796, "loss": 1.7814, "step": 35733 }, { "epoch": 1.19, "grad_norm": 0.45939263701438904, "learning_rate": 0.0003960228525128455, "loss": 1.6702, "step": 35734 }, { "epoch": 1.19, "grad_norm": 0.4642893671989441, "learning_rate": 0.0003960129502050055, "loss": 1.7613, "step": 35735 }, { "epoch": 1.19, "grad_norm": 0.4605134427547455, "learning_rate": 0.00039600304778061987, "loss": 1.7885, "step": 35736 }, { "epoch": 1.19, "grad_norm": 0.4880688786506653, "learning_rate": 0.00039599314523970067, "loss": 1.7967, "step": 35737 }, { "epoch": 1.19, "grad_norm": 0.46990445256233215, "learning_rate": 0.00039598324258226, "loss": 1.6831, "step": 35738 }, { "epoch": 1.19, "grad_norm": 0.49397391080856323, "learning_rate": 0.00039597333980830984, "loss": 1.8026, "step": 35739 }, { "epoch": 1.19, "grad_norm": 0.4622795879840851, "learning_rate": 0.0003959634369178621, "loss": 1.6916, "step": 35740 }, { "epoch": 1.19, "grad_norm": 0.4693601429462433, "learning_rate": 0.0003959535339109289, "loss": 1.7752, "step": 35741 }, { "epoch": 1.19, "grad_norm": 0.4727526903152466, "learning_rate": 0.0003959436307875223, "loss": 1.7347, "step": 35742 }, { "epoch": 1.19, "grad_norm": 0.49463048577308655, "learning_rate": 0.0003959337275476543, "loss": 1.8092, "step": 35743 }, { "epoch": 1.19, "grad_norm": 0.48416560888290405, "learning_rate": 0.00039592382419133677, "loss": 1.7369, "step": 35744 }, { "epoch": 1.19, "grad_norm": 0.47509682178497314, "learning_rate": 0.0003959139207185819, "loss": 1.778, "step": 35745 }, { "epoch": 1.19, "grad_norm": 0.4698215126991272, "learning_rate": 0.0003959040171294017, "loss": 1.7645, "step": 35746 }, { "epoch": 1.19, "grad_norm": 0.46660417318344116, "learning_rate": 0.00039589411342380815, "loss": 1.7023, "step": 35747 }, { "epoch": 1.19, "grad_norm": 0.4768812656402588, "learning_rate": 0.0003958842096018133, "loss": 1.7572, "step": 35748 }, { "epoch": 1.19, "grad_norm": 0.4736509323120117, "learning_rate": 0.0003958743056634291, "loss": 1.8185, "step": 35749 }, { "epoch": 1.19, "grad_norm": 0.4746769070625305, "learning_rate": 0.0003958644016086676, "loss": 1.7166, "step": 35750 }, { "epoch": 1.19, "grad_norm": 0.5001991391181946, "learning_rate": 0.00039585449743754093, "loss": 1.846, "step": 35751 }, { "epoch": 1.19, "grad_norm": 0.477914959192276, "learning_rate": 0.0003958445931500609, "loss": 1.7565, "step": 35752 }, { "epoch": 1.19, "grad_norm": 0.4794025719165802, "learning_rate": 0.00039583468874623975, "loss": 1.7492, "step": 35753 }, { "epoch": 1.19, "grad_norm": 0.4642205238342285, "learning_rate": 0.00039582478422608936, "loss": 1.7394, "step": 35754 }, { "epoch": 1.19, "grad_norm": 0.47906264662742615, "learning_rate": 0.00039581487958962176, "loss": 1.7402, "step": 35755 }, { "epoch": 1.19, "grad_norm": 0.44916999340057373, "learning_rate": 0.00039580497483684914, "loss": 1.7877, "step": 35756 }, { "epoch": 1.19, "grad_norm": 0.4566698372364044, "learning_rate": 0.00039579506996778325, "loss": 1.7747, "step": 35757 }, { "epoch": 1.19, "grad_norm": 0.4655892550945282, "learning_rate": 0.0003957851649824364, "loss": 1.8228, "step": 35758 }, { "epoch": 1.19, "grad_norm": 0.4728489816188812, "learning_rate": 0.0003957752598808204, "loss": 1.8001, "step": 35759 }, { "epoch": 1.19, "grad_norm": 0.4751459062099457, "learning_rate": 0.0003957653546629474, "loss": 1.7354, "step": 35760 }, { "epoch": 1.19, "grad_norm": 0.47892943024635315, "learning_rate": 0.00039575544932882927, "loss": 1.8284, "step": 35761 }, { "epoch": 1.19, "grad_norm": 0.48567166924476624, "learning_rate": 0.00039574554387847814, "loss": 1.7886, "step": 35762 }, { "epoch": 1.19, "grad_norm": 0.4904985725879669, "learning_rate": 0.0003957356383119062, "loss": 1.8481, "step": 35763 }, { "epoch": 1.19, "grad_norm": 0.4885530471801758, "learning_rate": 0.0003957257326291251, "loss": 1.8269, "step": 35764 }, { "epoch": 1.19, "grad_norm": 0.4766272306442261, "learning_rate": 0.0003957158268301472, "loss": 1.6856, "step": 35765 }, { "epoch": 1.19, "grad_norm": 0.4722311794757843, "learning_rate": 0.00039570592091498424, "loss": 1.783, "step": 35766 }, { "epoch": 1.19, "grad_norm": 0.47940805554389954, "learning_rate": 0.00039569601488364853, "loss": 1.7113, "step": 35767 }, { "epoch": 1.19, "grad_norm": 0.4696892499923706, "learning_rate": 0.0003956861087361519, "loss": 1.7678, "step": 35768 }, { "epoch": 1.19, "grad_norm": 0.4816302955150604, "learning_rate": 0.0003956762024725064, "loss": 1.7463, "step": 35769 }, { "epoch": 1.19, "grad_norm": 0.47330808639526367, "learning_rate": 0.00039566629609272415, "loss": 1.7766, "step": 35770 }, { "epoch": 1.19, "grad_norm": 0.4799641966819763, "learning_rate": 0.00039565638959681714, "loss": 1.8005, "step": 35771 }, { "epoch": 1.19, "grad_norm": 0.47472894191741943, "learning_rate": 0.0003956464829847974, "loss": 1.8615, "step": 35772 }, { "epoch": 1.19, "grad_norm": 0.49828776717185974, "learning_rate": 0.00039563657625667675, "loss": 1.7621, "step": 35773 }, { "epoch": 1.19, "grad_norm": 0.47146254777908325, "learning_rate": 0.00039562666941246746, "loss": 1.7692, "step": 35774 }, { "epoch": 1.19, "grad_norm": 0.4989558756351471, "learning_rate": 0.0003956167624521815, "loss": 1.8263, "step": 35775 }, { "epoch": 1.19, "grad_norm": 0.4847017824649811, "learning_rate": 0.000395606855375831, "loss": 1.7143, "step": 35776 }, { "epoch": 1.19, "grad_norm": 0.46833422780036926, "learning_rate": 0.0003955969481834276, "loss": 1.7103, "step": 35777 }, { "epoch": 1.19, "grad_norm": 0.48395779728889465, "learning_rate": 0.0003955870408749838, "loss": 1.8293, "step": 35778 }, { "epoch": 1.19, "grad_norm": 0.46612873673439026, "learning_rate": 0.00039557713345051137, "loss": 1.7807, "step": 35779 }, { "epoch": 1.19, "grad_norm": 0.4878937005996704, "learning_rate": 0.00039556722591002234, "loss": 1.7986, "step": 35780 }, { "epoch": 1.19, "grad_norm": 0.5038641095161438, "learning_rate": 0.0003955573182535289, "loss": 1.8092, "step": 35781 }, { "epoch": 1.19, "grad_norm": 0.4927905201911926, "learning_rate": 0.00039554741048104276, "loss": 1.8023, "step": 35782 }, { "epoch": 1.19, "grad_norm": 0.47088339924812317, "learning_rate": 0.0003955375025925763, "loss": 1.7474, "step": 35783 }, { "epoch": 1.19, "grad_norm": 0.47080957889556885, "learning_rate": 0.00039552759458814126, "loss": 1.7701, "step": 35784 }, { "epoch": 1.19, "grad_norm": 0.4860892593860626, "learning_rate": 0.00039551768646774995, "loss": 1.7623, "step": 35785 }, { "epoch": 1.19, "grad_norm": 0.46549054980278015, "learning_rate": 0.0003955077782314141, "loss": 1.7885, "step": 35786 }, { "epoch": 1.19, "grad_norm": 0.49666398763656616, "learning_rate": 0.000395497869879146, "loss": 1.7288, "step": 35787 }, { "epoch": 1.19, "grad_norm": 0.4838010370731354, "learning_rate": 0.00039548796141095755, "loss": 1.8036, "step": 35788 }, { "epoch": 1.19, "grad_norm": 0.47341862320899963, "learning_rate": 0.00039547805282686067, "loss": 1.8519, "step": 35789 }, { "epoch": 1.19, "grad_norm": 0.49457886815071106, "learning_rate": 0.0003954681441268676, "loss": 1.7595, "step": 35790 }, { "epoch": 1.19, "grad_norm": 0.46829232573509216, "learning_rate": 0.00039545823531099026, "loss": 1.8572, "step": 35791 }, { "epoch": 1.19, "grad_norm": 0.47265955805778503, "learning_rate": 0.0003954483263792406, "loss": 1.749, "step": 35792 }, { "epoch": 1.19, "grad_norm": 0.4712049067020416, "learning_rate": 0.0003954384173316308, "loss": 1.8096, "step": 35793 }, { "epoch": 1.19, "grad_norm": 0.47497865557670593, "learning_rate": 0.00039542850816817277, "loss": 1.7894, "step": 35794 }, { "epoch": 1.19, "grad_norm": 0.49870747327804565, "learning_rate": 0.00039541859888887873, "loss": 1.8022, "step": 35795 }, { "epoch": 1.19, "grad_norm": 0.47362953424453735, "learning_rate": 0.0003954086894937605, "loss": 1.784, "step": 35796 }, { "epoch": 1.19, "grad_norm": 0.46993228793144226, "learning_rate": 0.00039539877998283015, "loss": 1.781, "step": 35797 }, { "epoch": 1.19, "grad_norm": 0.4685174524784088, "learning_rate": 0.0003953888703560998, "loss": 1.7958, "step": 35798 }, { "epoch": 1.19, "grad_norm": 0.45212680101394653, "learning_rate": 0.0003953789606135813, "loss": 1.749, "step": 35799 }, { "epoch": 1.19, "grad_norm": 0.48608970642089844, "learning_rate": 0.0003953690507552868, "loss": 1.7388, "step": 35800 }, { "epoch": 1.19, "grad_norm": 0.4868699312210083, "learning_rate": 0.00039535914078122844, "loss": 1.8708, "step": 35801 }, { "epoch": 1.19, "grad_norm": 0.4841510057449341, "learning_rate": 0.0003953492306914181, "loss": 1.8457, "step": 35802 }, { "epoch": 1.19, "grad_norm": 0.48003461956977844, "learning_rate": 0.0003953393204858678, "loss": 1.7476, "step": 35803 }, { "epoch": 1.19, "grad_norm": 0.4870644211769104, "learning_rate": 0.0003953294101645896, "loss": 1.69, "step": 35804 }, { "epoch": 1.19, "grad_norm": 0.4789077639579773, "learning_rate": 0.0003953194997275956, "loss": 1.7436, "step": 35805 }, { "epoch": 1.19, "grad_norm": 0.4802256226539612, "learning_rate": 0.00039530958917489777, "loss": 1.8421, "step": 35806 }, { "epoch": 1.19, "grad_norm": 0.5407009124755859, "learning_rate": 0.0003952996785065081, "loss": 1.8145, "step": 35807 }, { "epoch": 1.19, "grad_norm": 0.49800142645835876, "learning_rate": 0.00039528976772243874, "loss": 1.8817, "step": 35808 }, { "epoch": 1.19, "grad_norm": 0.4890064597129822, "learning_rate": 0.00039527985682270156, "loss": 1.8347, "step": 35809 }, { "epoch": 1.19, "grad_norm": 0.474104106426239, "learning_rate": 0.0003952699458073087, "loss": 1.8175, "step": 35810 }, { "epoch": 1.19, "grad_norm": 0.4889821708202362, "learning_rate": 0.0003952600346762722, "loss": 1.7446, "step": 35811 }, { "epoch": 1.19, "grad_norm": 0.47237804532051086, "learning_rate": 0.0003952501234296039, "loss": 1.7534, "step": 35812 }, { "epoch": 1.19, "grad_norm": 0.4803743064403534, "learning_rate": 0.00039524021206731617, "loss": 1.8452, "step": 35813 }, { "epoch": 1.19, "grad_norm": 0.5056167840957642, "learning_rate": 0.00039523030058942075, "loss": 1.8478, "step": 35814 }, { "epoch": 1.19, "grad_norm": 0.48392820358276367, "learning_rate": 0.00039522038899592987, "loss": 1.8137, "step": 35815 }, { "epoch": 1.19, "grad_norm": 0.4743858277797699, "learning_rate": 0.0003952104772868554, "loss": 1.7572, "step": 35816 }, { "epoch": 1.19, "grad_norm": 0.47779715061187744, "learning_rate": 0.0003952005654622094, "loss": 1.7784, "step": 35817 }, { "epoch": 1.19, "grad_norm": 0.48436275124549866, "learning_rate": 0.00039519065352200405, "loss": 1.774, "step": 35818 }, { "epoch": 1.19, "grad_norm": 1.4048587083816528, "learning_rate": 0.00039518074146625116, "loss": 1.7461, "step": 35819 }, { "epoch": 1.19, "grad_norm": 0.47177886962890625, "learning_rate": 0.00039517082929496297, "loss": 1.8509, "step": 35820 }, { "epoch": 1.19, "grad_norm": 0.47939571738243103, "learning_rate": 0.00039516091700815136, "loss": 1.7852, "step": 35821 }, { "epoch": 1.19, "grad_norm": 0.4729051887989044, "learning_rate": 0.0003951510046058284, "loss": 1.7488, "step": 35822 }, { "epoch": 1.19, "grad_norm": 0.471832811832428, "learning_rate": 0.00039514109208800617, "loss": 1.6751, "step": 35823 }, { "epoch": 1.19, "grad_norm": 0.46629300713539124, "learning_rate": 0.00039513117945469666, "loss": 1.7149, "step": 35824 }, { "epoch": 1.19, "grad_norm": 0.4704696536064148, "learning_rate": 0.0003951212667059119, "loss": 1.7208, "step": 35825 }, { "epoch": 1.19, "grad_norm": 0.4646131694316864, "learning_rate": 0.00039511135384166405, "loss": 1.7854, "step": 35826 }, { "epoch": 1.19, "grad_norm": 0.4661906361579895, "learning_rate": 0.00039510144086196496, "loss": 1.7892, "step": 35827 }, { "epoch": 1.19, "grad_norm": 0.4675934612751007, "learning_rate": 0.0003950915277668267, "loss": 1.7759, "step": 35828 }, { "epoch": 1.19, "grad_norm": 0.47922593355178833, "learning_rate": 0.00039508161455626133, "loss": 1.8541, "step": 35829 }, { "epoch": 1.19, "grad_norm": 0.4773743450641632, "learning_rate": 0.00039507170123028086, "loss": 1.8039, "step": 35830 }, { "epoch": 1.19, "grad_norm": 0.4626052677631378, "learning_rate": 0.0003950617877888975, "loss": 1.7708, "step": 35831 }, { "epoch": 1.19, "grad_norm": 0.46339645981788635, "learning_rate": 0.000395051874232123, "loss": 1.753, "step": 35832 }, { "epoch": 1.19, "grad_norm": 0.523201048374176, "learning_rate": 0.0003950419605599696, "loss": 1.7841, "step": 35833 }, { "epoch": 1.19, "grad_norm": 0.47874531149864197, "learning_rate": 0.0003950320467724492, "loss": 1.8258, "step": 35834 }, { "epoch": 1.19, "grad_norm": 0.4785812199115753, "learning_rate": 0.00039502213286957393, "loss": 1.7942, "step": 35835 }, { "epoch": 1.19, "grad_norm": 0.4876980483531952, "learning_rate": 0.00039501221885135585, "loss": 1.7839, "step": 35836 }, { "epoch": 1.19, "grad_norm": 0.4600248634815216, "learning_rate": 0.00039500230471780685, "loss": 1.7733, "step": 35837 }, { "epoch": 1.19, "grad_norm": 0.4922850430011749, "learning_rate": 0.00039499239046893907, "loss": 1.7703, "step": 35838 }, { "epoch": 1.19, "grad_norm": 0.4688607454299927, "learning_rate": 0.00039498247610476455, "loss": 1.7808, "step": 35839 }, { "epoch": 1.19, "grad_norm": 0.47778964042663574, "learning_rate": 0.0003949725616252953, "loss": 1.8168, "step": 35840 }, { "epoch": 1.19, "grad_norm": 0.5329796075820923, "learning_rate": 0.0003949626470305433, "loss": 1.7955, "step": 35841 }, { "epoch": 1.19, "grad_norm": 0.4709102511405945, "learning_rate": 0.0003949527323205207, "loss": 1.7313, "step": 35842 }, { "epoch": 1.19, "grad_norm": 0.479976087808609, "learning_rate": 0.0003949428174952395, "loss": 1.7805, "step": 35843 }, { "epoch": 1.19, "grad_norm": 0.46168985962867737, "learning_rate": 0.0003949329025547116, "loss": 1.8318, "step": 35844 }, { "epoch": 1.19, "grad_norm": 0.485391765832901, "learning_rate": 0.0003949229874989493, "loss": 1.7378, "step": 35845 }, { "epoch": 1.19, "grad_norm": 0.47131457924842834, "learning_rate": 0.00039491307232796436, "loss": 1.7822, "step": 35846 }, { "epoch": 1.19, "grad_norm": 0.4738095998764038, "learning_rate": 0.0003949031570417689, "loss": 1.7724, "step": 35847 }, { "epoch": 1.19, "grad_norm": 0.5041534900665283, "learning_rate": 0.00039489324164037503, "loss": 1.7588, "step": 35848 }, { "epoch": 1.19, "grad_norm": 0.47873303294181824, "learning_rate": 0.00039488332612379475, "loss": 1.7901, "step": 35849 }, { "epoch": 1.19, "grad_norm": 0.4745738208293915, "learning_rate": 0.0003948734104920402, "loss": 1.8789, "step": 35850 }, { "epoch": 1.19, "grad_norm": 0.4691389203071594, "learning_rate": 0.0003948634947451232, "loss": 1.7631, "step": 35851 }, { "epoch": 1.19, "grad_norm": 0.5115084052085876, "learning_rate": 0.0003948535788830559, "loss": 1.7345, "step": 35852 }, { "epoch": 1.19, "grad_norm": 0.4947811961174011, "learning_rate": 0.00039484366290585036, "loss": 1.7472, "step": 35853 }, { "epoch": 1.19, "grad_norm": 0.4721774756908417, "learning_rate": 0.0003948337468135185, "loss": 1.7681, "step": 35854 }, { "epoch": 1.19, "grad_norm": 0.4813154339790344, "learning_rate": 0.0003948238306060725, "loss": 1.818, "step": 35855 }, { "epoch": 1.19, "grad_norm": 0.48917603492736816, "learning_rate": 0.0003948139142835244, "loss": 1.8111, "step": 35856 }, { "epoch": 1.19, "grad_norm": 0.46421098709106445, "learning_rate": 0.0003948039978458862, "loss": 1.6872, "step": 35857 }, { "epoch": 1.19, "grad_norm": 0.4699738323688507, "learning_rate": 0.0003947940812931698, "loss": 1.8208, "step": 35858 }, { "epoch": 1.19, "grad_norm": 0.48007404804229736, "learning_rate": 0.0003947841646253874, "loss": 1.785, "step": 35859 }, { "epoch": 1.19, "grad_norm": 0.5013903379440308, "learning_rate": 0.000394774247842551, "loss": 1.7648, "step": 35860 }, { "epoch": 1.19, "grad_norm": 0.47531867027282715, "learning_rate": 0.0003947643309446727, "loss": 1.8355, "step": 35861 }, { "epoch": 1.19, "grad_norm": 0.4744240939617157, "learning_rate": 0.0003947544139317643, "loss": 1.7452, "step": 35862 }, { "epoch": 1.19, "grad_norm": 0.48441213369369507, "learning_rate": 0.0003947444968038381, "loss": 1.8202, "step": 35863 }, { "epoch": 1.19, "grad_norm": 0.46212801337242126, "learning_rate": 0.00039473457956090604, "loss": 1.741, "step": 35864 }, { "epoch": 1.19, "grad_norm": 0.47421905398368835, "learning_rate": 0.0003947246622029801, "loss": 1.7284, "step": 35865 }, { "epoch": 1.19, "grad_norm": 0.4968278408050537, "learning_rate": 0.0003947147447300724, "loss": 1.7821, "step": 35866 }, { "epoch": 1.19, "grad_norm": 0.4869631230831146, "learning_rate": 0.000394704827142195, "loss": 1.8016, "step": 35867 }, { "epoch": 1.19, "grad_norm": 0.46778404712677, "learning_rate": 0.00039469490943936, "loss": 1.7826, "step": 35868 }, { "epoch": 1.19, "grad_norm": 0.48238763213157654, "learning_rate": 0.0003946849916215791, "loss": 1.8073, "step": 35869 }, { "epoch": 1.19, "grad_norm": 0.49799996614456177, "learning_rate": 0.00039467507368886473, "loss": 1.7877, "step": 35870 }, { "epoch": 1.19, "grad_norm": 0.48769593238830566, "learning_rate": 0.0003946651556412287, "loss": 1.8094, "step": 35871 }, { "epoch": 1.19, "grad_norm": 0.4857332110404968, "learning_rate": 0.00039465523747868314, "loss": 1.8095, "step": 35872 }, { "epoch": 1.19, "grad_norm": 0.45758551359176636, "learning_rate": 0.0003946453192012401, "loss": 1.7317, "step": 35873 }, { "epoch": 1.19, "grad_norm": 0.46434286236763, "learning_rate": 0.0003946354008089115, "loss": 1.8457, "step": 35874 }, { "epoch": 1.19, "grad_norm": 0.49813446402549744, "learning_rate": 0.00039462548230170954, "loss": 1.7195, "step": 35875 }, { "epoch": 1.19, "grad_norm": 0.4753519892692566, "learning_rate": 0.0003946155636796461, "loss": 1.7894, "step": 35876 }, { "epoch": 1.19, "grad_norm": 0.47141584753990173, "learning_rate": 0.00039460564494273346, "loss": 1.7802, "step": 35877 }, { "epoch": 1.19, "grad_norm": 0.47765257954597473, "learning_rate": 0.00039459572609098337, "loss": 1.7929, "step": 35878 }, { "epoch": 1.19, "grad_norm": 0.45869046449661255, "learning_rate": 0.0003945858071244081, "loss": 1.8004, "step": 35879 }, { "epoch": 1.19, "grad_norm": 0.4870794713497162, "learning_rate": 0.0003945758880430196, "loss": 1.827, "step": 35880 }, { "epoch": 1.19, "grad_norm": 0.4790220558643341, "learning_rate": 0.00039456596884682974, "loss": 1.761, "step": 35881 }, { "epoch": 1.19, "grad_norm": 0.46707579493522644, "learning_rate": 0.00039455604953585096, "loss": 1.7249, "step": 35882 }, { "epoch": 1.19, "grad_norm": 0.47960925102233887, "learning_rate": 0.00039454613011009497, "loss": 1.8305, "step": 35883 }, { "epoch": 1.19, "grad_norm": 0.462593138217926, "learning_rate": 0.00039453621056957384, "loss": 1.7793, "step": 35884 }, { "epoch": 1.19, "grad_norm": 0.4579308032989502, "learning_rate": 0.0003945262909142997, "loss": 1.8475, "step": 35885 }, { "epoch": 1.19, "grad_norm": 0.4459601938724518, "learning_rate": 0.0003945163711442846, "loss": 1.7316, "step": 35886 }, { "epoch": 1.19, "grad_norm": 0.4674910604953766, "learning_rate": 0.0003945064512595405, "loss": 1.732, "step": 35887 }, { "epoch": 1.19, "grad_norm": 0.4909032881259918, "learning_rate": 0.00039449653126007964, "loss": 1.7811, "step": 35888 }, { "epoch": 1.19, "grad_norm": 0.47599586844444275, "learning_rate": 0.00039448661114591387, "loss": 1.7968, "step": 35889 }, { "epoch": 1.19, "grad_norm": 0.48657405376434326, "learning_rate": 0.0003944766909170552, "loss": 1.7837, "step": 35890 }, { "epoch": 1.19, "grad_norm": 0.48503074049949646, "learning_rate": 0.0003944667705735158, "loss": 1.8286, "step": 35891 }, { "epoch": 1.19, "grad_norm": 0.5045772790908813, "learning_rate": 0.00039445685011530764, "loss": 1.7471, "step": 35892 }, { "epoch": 1.19, "grad_norm": 0.45603519678115845, "learning_rate": 0.00039444692954244276, "loss": 1.6859, "step": 35893 }, { "epoch": 1.19, "grad_norm": 0.4585863947868347, "learning_rate": 0.0003944370088549332, "loss": 1.7918, "step": 35894 }, { "epoch": 1.19, "grad_norm": 0.47146737575531006, "learning_rate": 0.0003944270880527911, "loss": 1.7951, "step": 35895 }, { "epoch": 1.19, "grad_norm": 0.4734533131122589, "learning_rate": 0.0003944171671360284, "loss": 1.7801, "step": 35896 }, { "epoch": 1.19, "grad_norm": 0.4731465280056, "learning_rate": 0.0003944072461046572, "loss": 1.7694, "step": 35897 }, { "epoch": 1.19, "grad_norm": 0.498805433511734, "learning_rate": 0.0003943973249586895, "loss": 1.815, "step": 35898 }, { "epoch": 1.19, "grad_norm": 0.4611397385597229, "learning_rate": 0.00039438740369813726, "loss": 1.8367, "step": 35899 }, { "epoch": 1.19, "grad_norm": 0.46736854314804077, "learning_rate": 0.00039437748232301276, "loss": 1.8444, "step": 35900 }, { "epoch": 1.19, "grad_norm": 0.48017799854278564, "learning_rate": 0.00039436756083332776, "loss": 1.7682, "step": 35901 }, { "epoch": 1.19, "grad_norm": 0.4567200243473053, "learning_rate": 0.0003943576392290946, "loss": 1.7713, "step": 35902 }, { "epoch": 1.19, "grad_norm": 0.47265589237213135, "learning_rate": 0.00039434771751032505, "loss": 1.7443, "step": 35903 }, { "epoch": 1.19, "grad_norm": 0.4832689166069031, "learning_rate": 0.00039433779567703134, "loss": 1.7664, "step": 35904 }, { "epoch": 1.19, "grad_norm": 0.49335023760795593, "learning_rate": 0.00039432787372922545, "loss": 1.871, "step": 35905 }, { "epoch": 1.19, "grad_norm": 0.466159850358963, "learning_rate": 0.00039431795166691935, "loss": 1.8676, "step": 35906 }, { "epoch": 1.19, "grad_norm": 0.4794045686721802, "learning_rate": 0.00039430802949012525, "loss": 1.8175, "step": 35907 }, { "epoch": 1.19, "grad_norm": 0.5118229985237122, "learning_rate": 0.000394298107198855, "loss": 1.767, "step": 35908 }, { "epoch": 1.19, "grad_norm": 0.47378382086753845, "learning_rate": 0.0003942881847931208, "loss": 1.8619, "step": 35909 }, { "epoch": 1.19, "grad_norm": 0.4683834910392761, "learning_rate": 0.0003942782622729346, "loss": 1.7725, "step": 35910 }, { "epoch": 1.19, "grad_norm": 0.47539249062538147, "learning_rate": 0.0003942683396383085, "loss": 1.7956, "step": 35911 }, { "epoch": 1.19, "grad_norm": 0.5090339779853821, "learning_rate": 0.00039425841688925454, "loss": 1.8184, "step": 35912 }, { "epoch": 1.19, "grad_norm": 0.5332277417182922, "learning_rate": 0.0003942484940257848, "loss": 1.8136, "step": 35913 }, { "epoch": 1.19, "grad_norm": 0.46119800209999084, "learning_rate": 0.0003942385710479112, "loss": 1.8314, "step": 35914 }, { "epoch": 1.19, "grad_norm": 0.4667152464389801, "learning_rate": 0.00039422864795564584, "loss": 1.836, "step": 35915 }, { "epoch": 1.19, "grad_norm": 0.4918217957019806, "learning_rate": 0.0003942187247490008, "loss": 1.8407, "step": 35916 }, { "epoch": 1.19, "grad_norm": 0.4657737910747528, "learning_rate": 0.00039420880142798816, "loss": 1.7685, "step": 35917 }, { "epoch": 1.19, "grad_norm": 0.4783118963241577, "learning_rate": 0.0003941988779926199, "loss": 1.7473, "step": 35918 }, { "epoch": 1.2, "grad_norm": 0.45400649309158325, "learning_rate": 0.0003941889544429081, "loss": 1.7409, "step": 35919 }, { "epoch": 1.2, "grad_norm": 0.4602375626564026, "learning_rate": 0.0003941790307788647, "loss": 1.8009, "step": 35920 }, { "epoch": 1.2, "grad_norm": 0.504139244556427, "learning_rate": 0.0003941691070005019, "loss": 1.7706, "step": 35921 }, { "epoch": 1.2, "grad_norm": 0.46610426902770996, "learning_rate": 0.0003941591831078317, "loss": 1.6989, "step": 35922 }, { "epoch": 1.2, "grad_norm": 0.5084643959999084, "learning_rate": 0.00039414925910086614, "loss": 1.8096, "step": 35923 }, { "epoch": 1.2, "grad_norm": 0.469135046005249, "learning_rate": 0.00039413933497961707, "loss": 1.8236, "step": 35924 }, { "epoch": 1.2, "grad_norm": 0.46121641993522644, "learning_rate": 0.00039412941074409694, "loss": 1.7568, "step": 35925 }, { "epoch": 1.2, "grad_norm": 0.4754141569137573, "learning_rate": 0.00039411948639431747, "loss": 1.7955, "step": 35926 }, { "epoch": 1.2, "grad_norm": 0.47864818572998047, "learning_rate": 0.00039410956193029084, "loss": 1.7355, "step": 35927 }, { "epoch": 1.2, "grad_norm": 0.4758830666542053, "learning_rate": 0.00039409963735202914, "loss": 1.7941, "step": 35928 }, { "epoch": 1.2, "grad_norm": 0.4989851415157318, "learning_rate": 0.00039408971265954414, "loss": 1.7497, "step": 35929 }, { "epoch": 1.2, "grad_norm": 1.0097079277038574, "learning_rate": 0.00039407978785284826, "loss": 1.8107, "step": 35930 }, { "epoch": 1.2, "grad_norm": 0.47171634435653687, "learning_rate": 0.0003940698629319533, "loss": 1.8292, "step": 35931 }, { "epoch": 1.2, "grad_norm": 0.48704251646995544, "learning_rate": 0.0003940599378968715, "loss": 1.74, "step": 35932 }, { "epoch": 1.2, "grad_norm": 0.4974254071712494, "learning_rate": 0.0003940500127476146, "loss": 1.8583, "step": 35933 }, { "epoch": 1.2, "grad_norm": 0.4728374779224396, "learning_rate": 0.00039404008748419503, "loss": 1.7368, "step": 35934 }, { "epoch": 1.2, "grad_norm": 0.48866888880729675, "learning_rate": 0.0003940301621066246, "loss": 1.9103, "step": 35935 }, { "epoch": 1.2, "grad_norm": 0.46726804971694946, "learning_rate": 0.00039402023661491535, "loss": 1.788, "step": 35936 }, { "epoch": 1.2, "grad_norm": 0.5026143789291382, "learning_rate": 0.00039401031100907947, "loss": 1.7304, "step": 35937 }, { "epoch": 1.2, "grad_norm": 0.4949232041835785, "learning_rate": 0.00039400038528912887, "loss": 1.7481, "step": 35938 }, { "epoch": 1.2, "grad_norm": 0.48204389214515686, "learning_rate": 0.00039399045945507564, "loss": 1.8202, "step": 35939 }, { "epoch": 1.2, "grad_norm": 0.49051934480667114, "learning_rate": 0.0003939805335069318, "loss": 1.7365, "step": 35940 }, { "epoch": 1.2, "grad_norm": 0.47204405069351196, "learning_rate": 0.00039397060744470955, "loss": 1.8236, "step": 35941 }, { "epoch": 1.2, "grad_norm": 0.48716986179351807, "learning_rate": 0.00039396068126842075, "loss": 1.7078, "step": 35942 }, { "epoch": 1.2, "grad_norm": 0.4717181921005249, "learning_rate": 0.0003939507549780776, "loss": 1.6883, "step": 35943 }, { "epoch": 1.2, "grad_norm": 0.8656670451164246, "learning_rate": 0.0003939408285736921, "loss": 1.7711, "step": 35944 }, { "epoch": 1.2, "grad_norm": 0.4847184717655182, "learning_rate": 0.00039393090205527614, "loss": 1.7479, "step": 35945 }, { "epoch": 1.2, "grad_norm": 0.48004040122032166, "learning_rate": 0.00039392097542284197, "loss": 1.8228, "step": 35946 }, { "epoch": 1.2, "grad_norm": 0.49049896001815796, "learning_rate": 0.00039391104867640155, "loss": 1.8367, "step": 35947 }, { "epoch": 1.2, "grad_norm": 0.48102596402168274, "learning_rate": 0.0003939011218159671, "loss": 1.7837, "step": 35948 }, { "epoch": 1.2, "grad_norm": 0.4992682933807373, "learning_rate": 0.00039389119484155023, "loss": 1.7915, "step": 35949 }, { "epoch": 1.2, "grad_norm": 0.4767807126045227, "learning_rate": 0.0003938812677531635, "loss": 1.8649, "step": 35950 }, { "epoch": 1.2, "grad_norm": 0.4802120625972748, "learning_rate": 0.0003938713405508188, "loss": 1.7768, "step": 35951 }, { "epoch": 1.2, "grad_norm": 0.466862291097641, "learning_rate": 0.00039386141323452794, "loss": 1.8375, "step": 35952 }, { "epoch": 1.2, "grad_norm": 0.47818028926849365, "learning_rate": 0.0003938514858043033, "loss": 1.771, "step": 35953 }, { "epoch": 1.2, "grad_norm": 0.4620400369167328, "learning_rate": 0.00039384155826015666, "loss": 1.8412, "step": 35954 }, { "epoch": 1.2, "grad_norm": 0.4774886965751648, "learning_rate": 0.0003938316306021003, "loss": 1.7925, "step": 35955 }, { "epoch": 1.2, "grad_norm": 0.48232030868530273, "learning_rate": 0.0003938217028301461, "loss": 1.745, "step": 35956 }, { "epoch": 1.2, "grad_norm": 0.47190970182418823, "learning_rate": 0.00039381177494430625, "loss": 1.8033, "step": 35957 }, { "epoch": 1.2, "grad_norm": 0.46986937522888184, "learning_rate": 0.00039380184694459263, "loss": 1.772, "step": 35958 }, { "epoch": 1.2, "grad_norm": 0.4792160391807556, "learning_rate": 0.00039379191883101745, "loss": 1.7683, "step": 35959 }, { "epoch": 1.2, "grad_norm": 0.4962531328201294, "learning_rate": 0.0003937819906035928, "loss": 1.7166, "step": 35960 }, { "epoch": 1.2, "grad_norm": 0.45990288257598877, "learning_rate": 0.0003937720622623304, "loss": 1.7026, "step": 35961 }, { "epoch": 1.2, "grad_norm": 0.47844383120536804, "learning_rate": 0.0003937621338072427, "loss": 1.7581, "step": 35962 }, { "epoch": 1.2, "grad_norm": 0.48530539870262146, "learning_rate": 0.0003937522052383415, "loss": 1.7194, "step": 35963 }, { "epoch": 1.2, "grad_norm": 0.4752489924430847, "learning_rate": 0.00039374227655563894, "loss": 1.8538, "step": 35964 }, { "epoch": 1.2, "grad_norm": 0.48922762274742126, "learning_rate": 0.0003937323477591472, "loss": 1.7748, "step": 35965 }, { "epoch": 1.2, "grad_norm": 0.5018531680107117, "learning_rate": 0.00039372241884887805, "loss": 1.7706, "step": 35966 }, { "epoch": 1.2, "grad_norm": 0.4628322720527649, "learning_rate": 0.0003937124898248438, "loss": 1.825, "step": 35967 }, { "epoch": 1.2, "grad_norm": 0.4738856256008148, "learning_rate": 0.0003937025606870563, "loss": 1.703, "step": 35968 }, { "epoch": 1.2, "grad_norm": 0.47572195529937744, "learning_rate": 0.0003936926314355278, "loss": 1.6555, "step": 35969 }, { "epoch": 1.2, "grad_norm": 0.49528902769088745, "learning_rate": 0.0003936827020702701, "loss": 1.8627, "step": 35970 }, { "epoch": 1.2, "grad_norm": 0.46545693278312683, "learning_rate": 0.00039367277259129554, "loss": 1.7346, "step": 35971 }, { "epoch": 1.2, "grad_norm": 0.47179844975471497, "learning_rate": 0.000393662842998616, "loss": 1.7959, "step": 35972 }, { "epoch": 1.2, "grad_norm": 0.472572386264801, "learning_rate": 0.00039365291329224357, "loss": 1.7656, "step": 35973 }, { "epoch": 1.2, "grad_norm": 0.4544529914855957, "learning_rate": 0.0003936429834721903, "loss": 1.7467, "step": 35974 }, { "epoch": 1.2, "grad_norm": 0.456083208322525, "learning_rate": 0.0003936330535384682, "loss": 1.7407, "step": 35975 }, { "epoch": 1.2, "grad_norm": 0.47002196311950684, "learning_rate": 0.00039362312349108943, "loss": 1.8067, "step": 35976 }, { "epoch": 1.2, "grad_norm": 0.48930808901786804, "learning_rate": 0.00039361319333006595, "loss": 1.7992, "step": 35977 }, { "epoch": 1.2, "grad_norm": 0.5055237412452698, "learning_rate": 0.00039360326305540994, "loss": 1.8671, "step": 35978 }, { "epoch": 1.2, "grad_norm": 0.47564631700515747, "learning_rate": 0.0003935933326671332, "loss": 1.7449, "step": 35979 }, { "epoch": 1.2, "grad_norm": 0.47466936707496643, "learning_rate": 0.000393583402165248, "loss": 1.7433, "step": 35980 }, { "epoch": 1.2, "grad_norm": 0.47679638862609863, "learning_rate": 0.0003935734715497664, "loss": 1.7968, "step": 35981 }, { "epoch": 1.2, "grad_norm": 0.47590628266334534, "learning_rate": 0.0003935635408207003, "loss": 1.8186, "step": 35982 }, { "epoch": 1.2, "grad_norm": 0.5217300057411194, "learning_rate": 0.000393553609978062, "loss": 1.7704, "step": 35983 }, { "epoch": 1.2, "grad_norm": 0.4748753011226654, "learning_rate": 0.0003935436790218632, "loss": 1.7942, "step": 35984 }, { "epoch": 1.2, "grad_norm": 0.47190654277801514, "learning_rate": 0.0003935337479521163, "loss": 1.7342, "step": 35985 }, { "epoch": 1.2, "grad_norm": 0.485096275806427, "learning_rate": 0.00039352381676883313, "loss": 1.8026, "step": 35986 }, { "epoch": 1.2, "grad_norm": 0.47176429629325867, "learning_rate": 0.0003935138854720259, "loss": 1.775, "step": 35987 }, { "epoch": 1.2, "grad_norm": 0.49478134512901306, "learning_rate": 0.0003935039540617066, "loss": 1.7396, "step": 35988 }, { "epoch": 1.2, "grad_norm": 0.485503613948822, "learning_rate": 0.0003934940225378872, "loss": 1.713, "step": 35989 }, { "epoch": 1.2, "grad_norm": 0.4816659390926361, "learning_rate": 0.00039348409090057997, "loss": 1.7769, "step": 35990 }, { "epoch": 1.2, "grad_norm": 0.48723334074020386, "learning_rate": 0.0003934741591497966, "loss": 1.8933, "step": 35991 }, { "epoch": 1.2, "grad_norm": 0.4843708276748657, "learning_rate": 0.00039346422728554954, "loss": 1.7338, "step": 35992 }, { "epoch": 1.2, "grad_norm": 0.4515051245689392, "learning_rate": 0.0003934542953078506, "loss": 1.7275, "step": 35993 }, { "epoch": 1.2, "grad_norm": 0.461830198764801, "learning_rate": 0.000393444363216712, "loss": 1.8119, "step": 35994 }, { "epoch": 1.2, "grad_norm": 0.4737326204776764, "learning_rate": 0.0003934344310121456, "loss": 1.7993, "step": 35995 }, { "epoch": 1.2, "grad_norm": 0.45986565947532654, "learning_rate": 0.0003934244986941637, "loss": 1.8, "step": 35996 }, { "epoch": 1.2, "grad_norm": 0.46348246932029724, "learning_rate": 0.0003934145662627781, "loss": 1.8247, "step": 35997 }, { "epoch": 1.2, "grad_norm": 0.46612775325775146, "learning_rate": 0.00039340463371800107, "loss": 1.8385, "step": 35998 }, { "epoch": 1.2, "grad_norm": 0.4686296284198761, "learning_rate": 0.00039339470105984454, "loss": 1.7293, "step": 35999 }, { "epoch": 1.2, "grad_norm": 0.47583165764808655, "learning_rate": 0.00039338476828832064, "loss": 1.7385, "step": 36000 }, { "epoch": 1.2, "grad_norm": 0.471166729927063, "learning_rate": 0.00039337483540344135, "loss": 1.7668, "step": 36001 }, { "epoch": 1.2, "grad_norm": 0.47162801027297974, "learning_rate": 0.0003933649024052188, "loss": 1.8063, "step": 36002 }, { "epoch": 1.2, "grad_norm": 0.48397842049598694, "learning_rate": 0.000393354969293665, "loss": 1.7676, "step": 36003 }, { "epoch": 1.2, "grad_norm": 0.47943776845932007, "learning_rate": 0.0003933450360687919, "loss": 1.8345, "step": 36004 }, { "epoch": 1.2, "grad_norm": 0.46111243963241577, "learning_rate": 0.0003933351027306119, "loss": 1.7615, "step": 36005 }, { "epoch": 1.2, "grad_norm": 0.4658041298389435, "learning_rate": 0.0003933251692791367, "loss": 1.8368, "step": 36006 }, { "epoch": 1.2, "grad_norm": 0.4842536747455597, "learning_rate": 0.00039331523571437856, "loss": 1.7924, "step": 36007 }, { "epoch": 1.2, "grad_norm": 0.48997291922569275, "learning_rate": 0.00039330530203634937, "loss": 1.907, "step": 36008 }, { "epoch": 1.2, "grad_norm": 1.7106871604919434, "learning_rate": 0.00039329536824506145, "loss": 1.891, "step": 36009 }, { "epoch": 1.2, "grad_norm": 0.4790315628051758, "learning_rate": 0.0003932854343405266, "loss": 1.7782, "step": 36010 }, { "epoch": 1.2, "grad_norm": 0.46042823791503906, "learning_rate": 0.0003932755003227569, "loss": 1.7328, "step": 36011 }, { "epoch": 1.2, "grad_norm": 0.45983001589775085, "learning_rate": 0.00039326556619176465, "loss": 1.7457, "step": 36012 }, { "epoch": 1.2, "grad_norm": 0.4779762029647827, "learning_rate": 0.0003932556319475617, "loss": 1.7676, "step": 36013 }, { "epoch": 1.2, "grad_norm": 0.46965453028678894, "learning_rate": 0.0003932456975901601, "loss": 1.7597, "step": 36014 }, { "epoch": 1.2, "grad_norm": 0.4667247235774994, "learning_rate": 0.000393235763119572, "loss": 1.7866, "step": 36015 }, { "epoch": 1.2, "grad_norm": 0.46414652466773987, "learning_rate": 0.00039322582853580933, "loss": 1.7549, "step": 36016 }, { "epoch": 1.2, "grad_norm": 0.47884345054626465, "learning_rate": 0.00039321589383888435, "loss": 1.7798, "step": 36017 }, { "epoch": 1.2, "grad_norm": 0.47474899888038635, "learning_rate": 0.00039320595902880894, "loss": 1.7689, "step": 36018 }, { "epoch": 1.2, "grad_norm": 0.49057382345199585, "learning_rate": 0.0003931960241055953, "loss": 1.8229, "step": 36019 }, { "epoch": 1.2, "grad_norm": 0.47786006331443787, "learning_rate": 0.00039318608906925535, "loss": 1.7697, "step": 36020 }, { "epoch": 1.2, "grad_norm": 1.6914194822311401, "learning_rate": 0.0003931761539198013, "loss": 1.8117, "step": 36021 }, { "epoch": 1.2, "grad_norm": 0.4798959195613861, "learning_rate": 0.0003931662186572451, "loss": 1.7461, "step": 36022 }, { "epoch": 1.2, "grad_norm": 0.47676125168800354, "learning_rate": 0.0003931562832815988, "loss": 1.7703, "step": 36023 }, { "epoch": 1.2, "grad_norm": 0.47691425681114197, "learning_rate": 0.0003931463477928745, "loss": 1.8085, "step": 36024 }, { "epoch": 1.2, "grad_norm": 0.46667394042015076, "learning_rate": 0.00039313641219108425, "loss": 1.7778, "step": 36025 }, { "epoch": 1.2, "grad_norm": 0.46817293763160706, "learning_rate": 0.0003931264764762402, "loss": 1.7785, "step": 36026 }, { "epoch": 1.2, "grad_norm": 0.4830562472343445, "learning_rate": 0.0003931165406483542, "loss": 1.8462, "step": 36027 }, { "epoch": 1.2, "grad_norm": 0.5057024955749512, "learning_rate": 0.00039310660470743853, "loss": 1.7846, "step": 36028 }, { "epoch": 1.2, "grad_norm": 0.4683154225349426, "learning_rate": 0.00039309666865350524, "loss": 1.7835, "step": 36029 }, { "epoch": 1.2, "grad_norm": 0.4760672152042389, "learning_rate": 0.0003930867324865662, "loss": 1.7708, "step": 36030 }, { "epoch": 1.2, "grad_norm": 0.49797260761260986, "learning_rate": 0.00039307679620663357, "loss": 1.8066, "step": 36031 }, { "epoch": 1.2, "grad_norm": 0.48119109869003296, "learning_rate": 0.00039306685981371943, "loss": 1.7456, "step": 36032 }, { "epoch": 1.2, "grad_norm": 0.4721808433532715, "learning_rate": 0.00039305692330783587, "loss": 1.8218, "step": 36033 }, { "epoch": 1.2, "grad_norm": 0.47357866168022156, "learning_rate": 0.00039304698668899486, "loss": 1.7605, "step": 36034 }, { "epoch": 1.2, "grad_norm": 0.4725433588027954, "learning_rate": 0.0003930370499572086, "loss": 1.8522, "step": 36035 }, { "epoch": 1.2, "grad_norm": 0.4771081507205963, "learning_rate": 0.00039302711311248905, "loss": 1.7869, "step": 36036 }, { "epoch": 1.2, "grad_norm": 0.49460434913635254, "learning_rate": 0.00039301717615484826, "loss": 1.7642, "step": 36037 }, { "epoch": 1.2, "grad_norm": 0.4638543426990509, "learning_rate": 0.0003930072390842983, "loss": 1.7273, "step": 36038 }, { "epoch": 1.2, "grad_norm": 0.4569019377231598, "learning_rate": 0.00039299730190085134, "loss": 1.7825, "step": 36039 }, { "epoch": 1.2, "grad_norm": 0.4782896041870117, "learning_rate": 0.00039298736460451934, "loss": 1.7572, "step": 36040 }, { "epoch": 1.2, "grad_norm": 0.4953465461730957, "learning_rate": 0.0003929774271953143, "loss": 1.8069, "step": 36041 }, { "epoch": 1.2, "grad_norm": 0.48531919717788696, "learning_rate": 0.00039296748967324844, "loss": 1.8444, "step": 36042 }, { "epoch": 1.2, "grad_norm": 0.47469040751457214, "learning_rate": 0.0003929575520383337, "loss": 1.7784, "step": 36043 }, { "epoch": 1.2, "grad_norm": 0.5030423998832703, "learning_rate": 0.00039294761429058215, "loss": 1.7468, "step": 36044 }, { "epoch": 1.2, "grad_norm": 0.5021344423294067, "learning_rate": 0.000392937676430006, "loss": 1.7716, "step": 36045 }, { "epoch": 1.2, "grad_norm": 0.48738792538642883, "learning_rate": 0.0003929277384566171, "loss": 1.7715, "step": 36046 }, { "epoch": 1.2, "grad_norm": 0.4819067716598511, "learning_rate": 0.00039291780037042776, "loss": 1.8221, "step": 36047 }, { "epoch": 1.2, "grad_norm": 0.4971380829811096, "learning_rate": 0.00039290786217144975, "loss": 1.707, "step": 36048 }, { "epoch": 1.2, "grad_norm": 0.4926905930042267, "learning_rate": 0.00039289792385969536, "loss": 1.7886, "step": 36049 }, { "epoch": 1.2, "grad_norm": 0.485916405916214, "learning_rate": 0.00039288798543517657, "loss": 1.7852, "step": 36050 }, { "epoch": 1.2, "grad_norm": 0.4972691237926483, "learning_rate": 0.0003928780468979054, "loss": 1.7831, "step": 36051 }, { "epoch": 1.2, "grad_norm": 0.4900684058666229, "learning_rate": 0.0003928681082478941, "loss": 1.7519, "step": 36052 }, { "epoch": 1.2, "grad_norm": 0.5164362788200378, "learning_rate": 0.0003928581694851544, "loss": 1.7557, "step": 36053 }, { "epoch": 1.2, "grad_norm": 0.4726647734642029, "learning_rate": 0.00039284823060969877, "loss": 1.7183, "step": 36054 }, { "epoch": 1.2, "grad_norm": 0.490025132894516, "learning_rate": 0.000392838291621539, "loss": 1.7925, "step": 36055 }, { "epoch": 1.2, "grad_norm": 0.4752987027168274, "learning_rate": 0.0003928283525206872, "loss": 1.7412, "step": 36056 }, { "epoch": 1.2, "grad_norm": 0.46288877725601196, "learning_rate": 0.00039281841330715545, "loss": 1.7485, "step": 36057 }, { "epoch": 1.2, "grad_norm": 0.4588356614112854, "learning_rate": 0.00039280847398095575, "loss": 1.7324, "step": 36058 }, { "epoch": 1.2, "grad_norm": 0.47570833563804626, "learning_rate": 0.0003927985345421003, "loss": 1.7875, "step": 36059 }, { "epoch": 1.2, "grad_norm": 0.4876784682273865, "learning_rate": 0.0003927885949906012, "loss": 1.7707, "step": 36060 }, { "epoch": 1.2, "grad_norm": 0.4800717532634735, "learning_rate": 0.0003927786553264704, "loss": 1.668, "step": 36061 }, { "epoch": 1.2, "grad_norm": 0.4792259931564331, "learning_rate": 0.0003927687155497198, "loss": 1.7736, "step": 36062 }, { "epoch": 1.2, "grad_norm": 0.46320679783821106, "learning_rate": 0.0003927587756603618, "loss": 1.745, "step": 36063 }, { "epoch": 1.2, "grad_norm": 0.4647618532180786, "learning_rate": 0.00039274883565840825, "loss": 1.7417, "step": 36064 }, { "epoch": 1.2, "grad_norm": 0.4615817964076996, "learning_rate": 0.0003927388955438714, "loss": 1.7514, "step": 36065 }, { "epoch": 1.2, "grad_norm": 0.4831660985946655, "learning_rate": 0.00039272895531676294, "loss": 1.8659, "step": 36066 }, { "epoch": 1.2, "grad_norm": 0.4685177505016327, "learning_rate": 0.00039271901497709544, "loss": 1.8119, "step": 36067 }, { "epoch": 1.2, "grad_norm": 0.467721551656723, "learning_rate": 0.00039270907452488066, "loss": 1.8206, "step": 36068 }, { "epoch": 1.2, "grad_norm": 0.47363656759262085, "learning_rate": 0.0003926991339601307, "loss": 1.8521, "step": 36069 }, { "epoch": 1.2, "grad_norm": 0.469270795583725, "learning_rate": 0.00039268919328285765, "loss": 1.8396, "step": 36070 }, { "epoch": 1.2, "grad_norm": 0.4769413471221924, "learning_rate": 0.00039267925249307354, "loss": 1.8039, "step": 36071 }, { "epoch": 1.2, "grad_norm": 0.43792885541915894, "learning_rate": 0.00039266931159079055, "loss": 1.7922, "step": 36072 }, { "epoch": 1.2, "grad_norm": 0.4847091734409332, "learning_rate": 0.00039265937057602056, "loss": 1.8347, "step": 36073 }, { "epoch": 1.2, "grad_norm": 0.4808115065097809, "learning_rate": 0.0003926494294487758, "loss": 1.91, "step": 36074 }, { "epoch": 1.2, "grad_norm": 0.47628310322761536, "learning_rate": 0.0003926394882090683, "loss": 1.838, "step": 36075 }, { "epoch": 1.2, "grad_norm": 0.4615686535835266, "learning_rate": 0.0003926295468569101, "loss": 1.7841, "step": 36076 }, { "epoch": 1.2, "grad_norm": 0.490725576877594, "learning_rate": 0.00039261960539231335, "loss": 1.787, "step": 36077 }, { "epoch": 1.2, "grad_norm": 0.4881068766117096, "learning_rate": 0.00039260966381528994, "loss": 1.7114, "step": 36078 }, { "epoch": 1.2, "grad_norm": 0.4785141348838806, "learning_rate": 0.00039259972212585216, "loss": 1.804, "step": 36079 }, { "epoch": 1.2, "grad_norm": 0.47631537914276123, "learning_rate": 0.0003925897803240119, "loss": 1.8126, "step": 36080 }, { "epoch": 1.2, "grad_norm": 0.4690733850002289, "learning_rate": 0.0003925798384097812, "loss": 1.7812, "step": 36081 }, { "epoch": 1.2, "grad_norm": 0.466157466173172, "learning_rate": 0.00039256989638317233, "loss": 1.8242, "step": 36082 }, { "epoch": 1.2, "grad_norm": 0.47060173749923706, "learning_rate": 0.0003925599542441972, "loss": 1.7434, "step": 36083 }, { "epoch": 1.2, "grad_norm": 0.49918410181999207, "learning_rate": 0.000392550011992868, "loss": 1.7902, "step": 36084 }, { "epoch": 1.2, "grad_norm": 0.4648786783218384, "learning_rate": 0.0003925400696291966, "loss": 1.7974, "step": 36085 }, { "epoch": 1.2, "grad_norm": 0.4576922655105591, "learning_rate": 0.0003925301271531953, "loss": 1.8119, "step": 36086 }, { "epoch": 1.2, "grad_norm": 0.45444250106811523, "learning_rate": 0.00039252018456487596, "loss": 1.7952, "step": 36087 }, { "epoch": 1.2, "grad_norm": 0.49647364020347595, "learning_rate": 0.00039251024186425074, "loss": 1.7814, "step": 36088 }, { "epoch": 1.2, "grad_norm": 0.46763262152671814, "learning_rate": 0.0003925002990513317, "loss": 1.7525, "step": 36089 }, { "epoch": 1.2, "grad_norm": 0.4884214401245117, "learning_rate": 0.0003924903561261311, "loss": 1.8339, "step": 36090 }, { "epoch": 1.2, "grad_norm": 0.4728033244609833, "learning_rate": 0.0003924804130886608, "loss": 1.8319, "step": 36091 }, { "epoch": 1.2, "grad_norm": 0.47283920645713806, "learning_rate": 0.00039247046993893283, "loss": 1.7674, "step": 36092 }, { "epoch": 1.2, "grad_norm": 0.4847603440284729, "learning_rate": 0.0003924605266769593, "loss": 1.7363, "step": 36093 }, { "epoch": 1.2, "grad_norm": 0.48166796565055847, "learning_rate": 0.0003924505833027523, "loss": 1.8148, "step": 36094 }, { "epoch": 1.2, "grad_norm": 0.4919278621673584, "learning_rate": 0.00039244063981632403, "loss": 1.8657, "step": 36095 }, { "epoch": 1.2, "grad_norm": 0.4793863296508789, "learning_rate": 0.0003924306962176863, "loss": 1.8159, "step": 36096 }, { "epoch": 1.2, "grad_norm": 0.49698299169540405, "learning_rate": 0.00039242075250685147, "loss": 1.7666, "step": 36097 }, { "epoch": 1.2, "grad_norm": 0.4706864058971405, "learning_rate": 0.0003924108086838314, "loss": 1.6902, "step": 36098 }, { "epoch": 1.2, "grad_norm": 0.6261247992515564, "learning_rate": 0.0003924008647486382, "loss": 1.8932, "step": 36099 }, { "epoch": 1.2, "grad_norm": 0.4689335525035858, "learning_rate": 0.00039239092070128404, "loss": 1.8377, "step": 36100 }, { "epoch": 1.2, "grad_norm": 0.5411515831947327, "learning_rate": 0.0003923809765417808, "loss": 1.7635, "step": 36101 }, { "epoch": 1.2, "grad_norm": 0.4638444781303406, "learning_rate": 0.0003923710322701408, "loss": 1.855, "step": 36102 }, { "epoch": 1.2, "grad_norm": 0.4818413257598877, "learning_rate": 0.00039236108788637584, "loss": 1.7543, "step": 36103 }, { "epoch": 1.2, "grad_norm": 0.4892175793647766, "learning_rate": 0.00039235114339049824, "loss": 1.8132, "step": 36104 }, { "epoch": 1.2, "grad_norm": 0.46193167567253113, "learning_rate": 0.0003923411987825199, "loss": 1.7259, "step": 36105 }, { "epoch": 1.2, "grad_norm": 0.47592076659202576, "learning_rate": 0.00039233125406245293, "loss": 1.8182, "step": 36106 }, { "epoch": 1.2, "grad_norm": 0.47759103775024414, "learning_rate": 0.00039232130923030954, "loss": 1.724, "step": 36107 }, { "epoch": 1.2, "grad_norm": 0.4905489385128021, "learning_rate": 0.00039231136428610156, "loss": 1.7651, "step": 36108 }, { "epoch": 1.2, "grad_norm": 0.47162508964538574, "learning_rate": 0.00039230141922984123, "loss": 1.7194, "step": 36109 }, { "epoch": 1.2, "grad_norm": 0.46578341722488403, "learning_rate": 0.00039229147406154047, "loss": 1.7626, "step": 36110 }, { "epoch": 1.2, "grad_norm": 0.4936739206314087, "learning_rate": 0.0003922815287812117, "loss": 1.7837, "step": 36111 }, { "epoch": 1.2, "grad_norm": 0.457369327545166, "learning_rate": 0.00039227158338886654, "loss": 1.7891, "step": 36112 }, { "epoch": 1.2, "grad_norm": 0.4773862063884735, "learning_rate": 0.0003922616378845173, "loss": 1.7885, "step": 36113 }, { "epoch": 1.2, "grad_norm": 0.46233218908309937, "learning_rate": 0.00039225169226817614, "loss": 1.7667, "step": 36114 }, { "epoch": 1.2, "grad_norm": 0.4821820855140686, "learning_rate": 0.00039224174653985493, "loss": 1.7657, "step": 36115 }, { "epoch": 1.2, "grad_norm": 0.5055914521217346, "learning_rate": 0.00039223180069956583, "loss": 1.8884, "step": 36116 }, { "epoch": 1.2, "grad_norm": 0.4696495234966278, "learning_rate": 0.000392221854747321, "loss": 1.8599, "step": 36117 }, { "epoch": 1.2, "grad_norm": 0.4925694465637207, "learning_rate": 0.0003922119086831323, "loss": 1.7746, "step": 36118 }, { "epoch": 1.2, "grad_norm": 0.4923814833164215, "learning_rate": 0.00039220196250701203, "loss": 1.7455, "step": 36119 }, { "epoch": 1.2, "grad_norm": 0.47256097197532654, "learning_rate": 0.0003921920162189721, "loss": 1.6972, "step": 36120 }, { "epoch": 1.2, "grad_norm": 0.47963905334472656, "learning_rate": 0.0003921820698190247, "loss": 1.83, "step": 36121 }, { "epoch": 1.2, "grad_norm": 0.485004186630249, "learning_rate": 0.00039217212330718183, "loss": 1.7465, "step": 36122 }, { "epoch": 1.2, "grad_norm": 0.4663980007171631, "learning_rate": 0.00039216217668345556, "loss": 1.7623, "step": 36123 }, { "epoch": 1.2, "grad_norm": 0.46732276678085327, "learning_rate": 0.00039215222994785795, "loss": 1.7691, "step": 36124 }, { "epoch": 1.2, "grad_norm": 0.4628945291042328, "learning_rate": 0.0003921422831004012, "loss": 1.7758, "step": 36125 }, { "epoch": 1.2, "grad_norm": 0.49361705780029297, "learning_rate": 0.0003921323361410972, "loss": 1.7208, "step": 36126 }, { "epoch": 1.2, "grad_norm": 0.4862578809261322, "learning_rate": 0.0003921223890699583, "loss": 1.8257, "step": 36127 }, { "epoch": 1.2, "grad_norm": 0.46178048849105835, "learning_rate": 0.00039211244188699614, "loss": 1.6858, "step": 36128 }, { "epoch": 1.2, "grad_norm": 0.48766180872917175, "learning_rate": 0.00039210249459222326, "loss": 1.7862, "step": 36129 }, { "epoch": 1.2, "grad_norm": 0.46681109070777893, "learning_rate": 0.0003920925471856514, "loss": 1.7515, "step": 36130 }, { "epoch": 1.2, "grad_norm": 0.47576904296875, "learning_rate": 0.00039208259966729285, "loss": 1.7737, "step": 36131 }, { "epoch": 1.2, "grad_norm": 0.4763818681240082, "learning_rate": 0.00039207265203715955, "loss": 1.7853, "step": 36132 }, { "epoch": 1.2, "grad_norm": 0.47047126293182373, "learning_rate": 0.00039206270429526354, "loss": 1.7515, "step": 36133 }, { "epoch": 1.2, "grad_norm": 0.4736175537109375, "learning_rate": 0.00039205275644161713, "loss": 1.7875, "step": 36134 }, { "epoch": 1.2, "grad_norm": 0.4654531478881836, "learning_rate": 0.000392042808476232, "loss": 1.7755, "step": 36135 }, { "epoch": 1.2, "grad_norm": 0.4564269781112671, "learning_rate": 0.00039203286039912066, "loss": 1.782, "step": 36136 }, { "epoch": 1.2, "grad_norm": 0.4857110381126404, "learning_rate": 0.00039202291221029494, "loss": 1.7999, "step": 36137 }, { "epoch": 1.2, "grad_norm": 0.4811137020587921, "learning_rate": 0.00039201296390976696, "loss": 1.7879, "step": 36138 }, { "epoch": 1.2, "grad_norm": 0.4644280970096588, "learning_rate": 0.0003920030154975488, "loss": 1.7502, "step": 36139 }, { "epoch": 1.2, "grad_norm": 0.4724390208721161, "learning_rate": 0.00039199306697365247, "loss": 1.7694, "step": 36140 }, { "epoch": 1.2, "grad_norm": 0.4820576012134552, "learning_rate": 0.00039198311833809024, "loss": 1.7342, "step": 36141 }, { "epoch": 1.2, "grad_norm": 0.4670456349849701, "learning_rate": 0.000391973169590874, "loss": 1.8456, "step": 36142 }, { "epoch": 1.2, "grad_norm": 0.4867206811904907, "learning_rate": 0.00039196322073201584, "loss": 1.8124, "step": 36143 }, { "epoch": 1.2, "grad_norm": 0.4761006236076355, "learning_rate": 0.00039195327176152795, "loss": 1.7522, "step": 36144 }, { "epoch": 1.2, "grad_norm": 0.49654921889305115, "learning_rate": 0.00039194332267942226, "loss": 1.811, "step": 36145 }, { "epoch": 1.2, "grad_norm": 0.4872976243495941, "learning_rate": 0.000391933373485711, "loss": 1.7968, "step": 36146 }, { "epoch": 1.2, "grad_norm": 0.47831058502197266, "learning_rate": 0.0003919234241804061, "loss": 1.7571, "step": 36147 }, { "epoch": 1.2, "grad_norm": 0.46753907203674316, "learning_rate": 0.00039191347476351976, "loss": 1.7343, "step": 36148 }, { "epoch": 1.2, "grad_norm": 0.474793940782547, "learning_rate": 0.00039190352523506397, "loss": 1.8039, "step": 36149 }, { "epoch": 1.2, "grad_norm": 0.4760468304157257, "learning_rate": 0.0003918935755950509, "loss": 1.763, "step": 36150 }, { "epoch": 1.2, "grad_norm": 0.4816647171974182, "learning_rate": 0.00039188362584349247, "loss": 1.7099, "step": 36151 }, { "epoch": 1.2, "grad_norm": 0.4686504602432251, "learning_rate": 0.000391873675980401, "loss": 1.7728, "step": 36152 }, { "epoch": 1.2, "grad_norm": 0.4896213412284851, "learning_rate": 0.00039186372600578826, "loss": 1.8237, "step": 36153 }, { "epoch": 1.2, "grad_norm": 0.7621778845787048, "learning_rate": 0.0003918537759196666, "loss": 1.7299, "step": 36154 }, { "epoch": 1.2, "grad_norm": 0.4809322655200958, "learning_rate": 0.00039184382572204793, "loss": 1.8793, "step": 36155 }, { "epoch": 1.2, "grad_norm": 0.4517850875854492, "learning_rate": 0.0003918338754129444, "loss": 1.8401, "step": 36156 }, { "epoch": 1.2, "grad_norm": 0.4773554801940918, "learning_rate": 0.00039182392499236815, "loss": 1.7892, "step": 36157 }, { "epoch": 1.2, "grad_norm": 0.48349863290786743, "learning_rate": 0.0003918139744603311, "loss": 1.8829, "step": 36158 }, { "epoch": 1.2, "grad_norm": 0.4780327081680298, "learning_rate": 0.00039180402381684546, "loss": 1.7875, "step": 36159 }, { "epoch": 1.2, "grad_norm": 1.568885087966919, "learning_rate": 0.00039179407306192326, "loss": 1.7836, "step": 36160 }, { "epoch": 1.2, "grad_norm": 0.47716087102890015, "learning_rate": 0.0003917841221955765, "loss": 1.756, "step": 36161 }, { "epoch": 1.2, "grad_norm": 0.4993683993816376, "learning_rate": 0.0003917741712178175, "loss": 1.7929, "step": 36162 }, { "epoch": 1.2, "grad_norm": 0.4668642580509186, "learning_rate": 0.000391764220128658, "loss": 1.7703, "step": 36163 }, { "epoch": 1.2, "grad_norm": 0.4712183177471161, "learning_rate": 0.0003917542689281104, "loss": 1.7602, "step": 36164 }, { "epoch": 1.2, "grad_norm": 0.49028289318084717, "learning_rate": 0.0003917443176161865, "loss": 1.7598, "step": 36165 }, { "epoch": 1.2, "grad_norm": 0.5017756819725037, "learning_rate": 0.0003917343661928986, "loss": 1.7773, "step": 36166 }, { "epoch": 1.2, "grad_norm": 0.46878156065940857, "learning_rate": 0.00039172441465825866, "loss": 1.7154, "step": 36167 }, { "epoch": 1.2, "grad_norm": 0.47487154603004456, "learning_rate": 0.0003917144630122789, "loss": 1.8203, "step": 36168 }, { "epoch": 1.2, "grad_norm": 0.4883917570114136, "learning_rate": 0.00039170451125497124, "loss": 1.7924, "step": 36169 }, { "epoch": 1.2, "grad_norm": 0.47790148854255676, "learning_rate": 0.00039169455938634773, "loss": 1.7872, "step": 36170 }, { "epoch": 1.2, "grad_norm": 0.4874083399772644, "learning_rate": 0.0003916846074064206, "loss": 1.802, "step": 36171 }, { "epoch": 1.2, "grad_norm": 0.48226961493492126, "learning_rate": 0.0003916746553152019, "loss": 1.8411, "step": 36172 }, { "epoch": 1.2, "grad_norm": 0.47565242648124695, "learning_rate": 0.0003916647031127036, "loss": 1.8266, "step": 36173 }, { "epoch": 1.2, "grad_norm": 0.4840492606163025, "learning_rate": 0.0003916547507989379, "loss": 1.7614, "step": 36174 }, { "epoch": 1.2, "grad_norm": 0.46859946846961975, "learning_rate": 0.00039164479837391686, "loss": 1.7376, "step": 36175 }, { "epoch": 1.2, "grad_norm": 0.4766250550746918, "learning_rate": 0.0003916348458376525, "loss": 1.7925, "step": 36176 }, { "epoch": 1.2, "grad_norm": 0.46967917680740356, "learning_rate": 0.000391624893190157, "loss": 1.8052, "step": 36177 }, { "epoch": 1.2, "grad_norm": 0.46240806579589844, "learning_rate": 0.00039161494043144234, "loss": 1.7376, "step": 36178 }, { "epoch": 1.2, "grad_norm": 0.4783385097980499, "learning_rate": 0.0003916049875615206, "loss": 1.7261, "step": 36179 }, { "epoch": 1.2, "grad_norm": 0.48548126220703125, "learning_rate": 0.000391595034580404, "loss": 1.8146, "step": 36180 }, { "epoch": 1.2, "grad_norm": 0.4852394461631775, "learning_rate": 0.00039158508148810436, "loss": 1.639, "step": 36181 }, { "epoch": 1.2, "grad_norm": 0.6769899129867554, "learning_rate": 0.00039157512828463407, "loss": 1.8314, "step": 36182 }, { "epoch": 1.2, "grad_norm": 0.46721574664115906, "learning_rate": 0.000391565174970005, "loss": 1.7834, "step": 36183 }, { "epoch": 1.2, "grad_norm": 0.4756925106048584, "learning_rate": 0.00039155522154422933, "loss": 1.8248, "step": 36184 }, { "epoch": 1.2, "grad_norm": 0.4714857339859009, "learning_rate": 0.0003915452680073191, "loss": 1.7443, "step": 36185 }, { "epoch": 1.2, "grad_norm": 0.4763439893722534, "learning_rate": 0.00039153531435928647, "loss": 1.7592, "step": 36186 }, { "epoch": 1.2, "grad_norm": 0.47682926058769226, "learning_rate": 0.0003915253606001434, "loss": 1.7255, "step": 36187 }, { "epoch": 1.2, "grad_norm": 0.4664572775363922, "learning_rate": 0.0003915154067299019, "loss": 1.7789, "step": 36188 }, { "epoch": 1.2, "grad_norm": 0.4752904772758484, "learning_rate": 0.0003915054527485744, "loss": 1.8015, "step": 36189 }, { "epoch": 1.2, "grad_norm": 0.4680347740650177, "learning_rate": 0.0003914954986561726, "loss": 1.7482, "step": 36190 }, { "epoch": 1.2, "grad_norm": 0.47789692878723145, "learning_rate": 0.00039148554445270884, "loss": 1.7476, "step": 36191 }, { "epoch": 1.2, "grad_norm": 0.49496299028396606, "learning_rate": 0.0003914755901381951, "loss": 1.7797, "step": 36192 }, { "epoch": 1.2, "grad_norm": 0.4833160638809204, "learning_rate": 0.00039146563571264335, "loss": 1.7563, "step": 36193 }, { "epoch": 1.2, "grad_norm": 0.46917998790740967, "learning_rate": 0.000391455681176066, "loss": 1.7458, "step": 36194 }, { "epoch": 1.2, "grad_norm": 0.49523279070854187, "learning_rate": 0.0003914457265284748, "loss": 1.7642, "step": 36195 }, { "epoch": 1.2, "grad_norm": 0.46884652972221375, "learning_rate": 0.000391435771769882, "loss": 1.8168, "step": 36196 }, { "epoch": 1.2, "grad_norm": 0.4744042754173279, "learning_rate": 0.0003914258169002996, "loss": 1.723, "step": 36197 }, { "epoch": 1.2, "grad_norm": 0.48504456877708435, "learning_rate": 0.0003914158619197398, "loss": 1.8232, "step": 36198 }, { "epoch": 1.2, "grad_norm": 0.47673743963241577, "learning_rate": 0.0003914059068282145, "loss": 1.7978, "step": 36199 }, { "epoch": 1.2, "grad_norm": 0.49117910861968994, "learning_rate": 0.000391395951625736, "loss": 1.7825, "step": 36200 }, { "epoch": 1.2, "grad_norm": 0.46540746092796326, "learning_rate": 0.0003913859963123163, "loss": 1.8056, "step": 36201 }, { "epoch": 1.2, "grad_norm": 0.46844401955604553, "learning_rate": 0.0003913760408879674, "loss": 1.7702, "step": 36202 }, { "epoch": 1.2, "grad_norm": 0.4682167172431946, "learning_rate": 0.00039136608535270145, "loss": 1.7688, "step": 36203 }, { "epoch": 1.2, "grad_norm": 0.4730738401412964, "learning_rate": 0.0003913561297065306, "loss": 1.8109, "step": 36204 }, { "epoch": 1.2, "grad_norm": 1.1735682487487793, "learning_rate": 0.00039134617394946675, "loss": 1.8536, "step": 36205 }, { "epoch": 1.2, "grad_norm": 0.4912498891353607, "learning_rate": 0.0003913362180815222, "loss": 1.7415, "step": 36206 }, { "epoch": 1.2, "grad_norm": 0.48861247301101685, "learning_rate": 0.00039132626210270895, "loss": 1.777, "step": 36207 }, { "epoch": 1.2, "grad_norm": 0.4597037136554718, "learning_rate": 0.000391316306013039, "loss": 1.7427, "step": 36208 }, { "epoch": 1.2, "grad_norm": 0.4838905930519104, "learning_rate": 0.00039130634981252455, "loss": 1.759, "step": 36209 }, { "epoch": 1.2, "grad_norm": 0.4912071228027344, "learning_rate": 0.0003912963935011777, "loss": 1.8396, "step": 36210 }, { "epoch": 1.2, "grad_norm": 0.4814557433128357, "learning_rate": 0.0003912864370790103, "loss": 1.7273, "step": 36211 }, { "epoch": 1.2, "grad_norm": 0.4668528139591217, "learning_rate": 0.00039127648054603485, "loss": 1.6886, "step": 36212 }, { "epoch": 1.2, "grad_norm": 0.45710885524749756, "learning_rate": 0.00039126652390226297, "loss": 1.8025, "step": 36213 }, { "epoch": 1.2, "grad_norm": 0.4674376845359802, "learning_rate": 0.0003912565671477072, "loss": 1.7108, "step": 36214 }, { "epoch": 1.2, "grad_norm": 0.49352309107780457, "learning_rate": 0.00039124661028237925, "loss": 1.8184, "step": 36215 }, { "epoch": 1.2, "grad_norm": 0.4891067147254944, "learning_rate": 0.00039123665330629144, "loss": 1.7842, "step": 36216 }, { "epoch": 1.2, "grad_norm": 0.47167086601257324, "learning_rate": 0.0003912266962194558, "loss": 1.682, "step": 36217 }, { "epoch": 1.2, "grad_norm": 0.4871559143066406, "learning_rate": 0.0003912167390218842, "loss": 1.8428, "step": 36218 }, { "epoch": 1.21, "grad_norm": 0.4839634895324707, "learning_rate": 0.00039120678171358916, "loss": 1.8074, "step": 36219 }, { "epoch": 1.21, "grad_norm": 0.47189024090766907, "learning_rate": 0.00039119682429458237, "loss": 1.8325, "step": 36220 }, { "epoch": 1.21, "grad_norm": 0.46684637665748596, "learning_rate": 0.00039118686676487615, "loss": 1.7374, "step": 36221 }, { "epoch": 1.21, "grad_norm": 0.485191285610199, "learning_rate": 0.00039117690912448253, "loss": 1.6986, "step": 36222 }, { "epoch": 1.21, "grad_norm": 0.5068517923355103, "learning_rate": 0.00039116695137341345, "loss": 1.8134, "step": 36223 }, { "epoch": 1.21, "grad_norm": 0.4693396985530853, "learning_rate": 0.00039115699351168127, "loss": 1.7498, "step": 36224 }, { "epoch": 1.21, "grad_norm": 0.48880407214164734, "learning_rate": 0.0003911470355392978, "loss": 1.7993, "step": 36225 }, { "epoch": 1.21, "grad_norm": 0.4675534963607788, "learning_rate": 0.00039113707745627545, "loss": 1.7827, "step": 36226 }, { "epoch": 1.21, "grad_norm": 0.46432942152023315, "learning_rate": 0.0003911271192626259, "loss": 1.809, "step": 36227 }, { "epoch": 1.21, "grad_norm": 0.48931756615638733, "learning_rate": 0.00039111716095836156, "loss": 1.7735, "step": 36228 }, { "epoch": 1.21, "grad_norm": 0.4790295362472534, "learning_rate": 0.00039110720254349437, "loss": 1.8237, "step": 36229 }, { "epoch": 1.21, "grad_norm": 0.4806494414806366, "learning_rate": 0.0003910972440180365, "loss": 1.7952, "step": 36230 }, { "epoch": 1.21, "grad_norm": 0.45890650153160095, "learning_rate": 0.000391087285382, "loss": 1.7791, "step": 36231 }, { "epoch": 1.21, "grad_norm": 0.47482582926750183, "learning_rate": 0.0003910773266353969, "loss": 1.7903, "step": 36232 }, { "epoch": 1.21, "grad_norm": 0.49651986360549927, "learning_rate": 0.00039106736777823944, "loss": 1.8548, "step": 36233 }, { "epoch": 1.21, "grad_norm": 0.6380555033683777, "learning_rate": 0.0003910574088105395, "loss": 1.8324, "step": 36234 }, { "epoch": 1.21, "grad_norm": 0.4674757719039917, "learning_rate": 0.0003910474497323094, "loss": 1.7253, "step": 36235 }, { "epoch": 1.21, "grad_norm": 0.47953924536705017, "learning_rate": 0.000391037490543561, "loss": 1.7199, "step": 36236 }, { "epoch": 1.21, "grad_norm": 0.4638371467590332, "learning_rate": 0.0003910275312443066, "loss": 1.7942, "step": 36237 }, { "epoch": 1.21, "grad_norm": 0.495400607585907, "learning_rate": 0.0003910175718345581, "loss": 1.7393, "step": 36238 }, { "epoch": 1.21, "grad_norm": 0.4770784080028534, "learning_rate": 0.00039100761231432773, "loss": 1.826, "step": 36239 }, { "epoch": 1.21, "grad_norm": 0.44887369871139526, "learning_rate": 0.0003909976526836276, "loss": 1.7779, "step": 36240 }, { "epoch": 1.21, "grad_norm": 0.4868408143520355, "learning_rate": 0.0003909876929424696, "loss": 1.7358, "step": 36241 }, { "epoch": 1.21, "grad_norm": 0.6202133893966675, "learning_rate": 0.000390977733090866, "loss": 1.7745, "step": 36242 }, { "epoch": 1.21, "grad_norm": 0.4679792523384094, "learning_rate": 0.0003909677731288288, "loss": 1.7528, "step": 36243 }, { "epoch": 1.21, "grad_norm": 0.4945046603679657, "learning_rate": 0.0003909578130563702, "loss": 1.7863, "step": 36244 }, { "epoch": 1.21, "grad_norm": 0.4836089313030243, "learning_rate": 0.0003909478528735021, "loss": 1.7821, "step": 36245 }, { "epoch": 1.21, "grad_norm": 0.47176337242126465, "learning_rate": 0.00039093789258023683, "loss": 1.7486, "step": 36246 }, { "epoch": 1.21, "grad_norm": 0.487239271402359, "learning_rate": 0.0003909279321765863, "loss": 1.7793, "step": 36247 }, { "epoch": 1.21, "grad_norm": 0.4627199172973633, "learning_rate": 0.00039091797166256276, "loss": 1.7399, "step": 36248 }, { "epoch": 1.21, "grad_norm": 0.4994218051433563, "learning_rate": 0.0003909080110381781, "loss": 1.8481, "step": 36249 }, { "epoch": 1.21, "grad_norm": 0.475759893655777, "learning_rate": 0.0003908980503034445, "loss": 1.9476, "step": 36250 }, { "epoch": 1.21, "grad_norm": 0.4589236378669739, "learning_rate": 0.00039088808945837413, "loss": 1.7618, "step": 36251 }, { "epoch": 1.21, "grad_norm": 0.4703969359397888, "learning_rate": 0.00039087812850297887, "loss": 1.816, "step": 36252 }, { "epoch": 1.21, "grad_norm": 0.4731144607067108, "learning_rate": 0.0003908681674372711, "loss": 1.7887, "step": 36253 }, { "epoch": 1.21, "grad_norm": 0.46098726987838745, "learning_rate": 0.0003908582062612627, "loss": 1.7321, "step": 36254 }, { "epoch": 1.21, "grad_norm": 0.4816393256187439, "learning_rate": 0.0003908482449749658, "loss": 1.8083, "step": 36255 }, { "epoch": 1.21, "grad_norm": 0.46672531962394714, "learning_rate": 0.00039083828357839255, "loss": 1.7669, "step": 36256 }, { "epoch": 1.21, "grad_norm": 0.471405565738678, "learning_rate": 0.000390828322071555, "loss": 1.7776, "step": 36257 }, { "epoch": 1.21, "grad_norm": 0.4932325482368469, "learning_rate": 0.0003908183604544653, "loss": 1.8312, "step": 36258 }, { "epoch": 1.21, "grad_norm": 0.4944400191307068, "learning_rate": 0.0003908083987271354, "loss": 1.8117, "step": 36259 }, { "epoch": 1.21, "grad_norm": 0.4561268389225006, "learning_rate": 0.0003907984368895775, "loss": 1.8036, "step": 36260 }, { "epoch": 1.21, "grad_norm": 0.46996209025382996, "learning_rate": 0.0003907884749418037, "loss": 1.7335, "step": 36261 }, { "epoch": 1.21, "grad_norm": 0.48302575945854187, "learning_rate": 0.0003907785128838261, "loss": 1.7118, "step": 36262 }, { "epoch": 1.21, "grad_norm": 0.4916611909866333, "learning_rate": 0.0003907685507156568, "loss": 1.7482, "step": 36263 }, { "epoch": 1.21, "grad_norm": 0.478452205657959, "learning_rate": 0.0003907585884373078, "loss": 1.764, "step": 36264 }, { "epoch": 1.21, "grad_norm": 0.46692901849746704, "learning_rate": 0.00039074862604879116, "loss": 1.823, "step": 36265 }, { "epoch": 1.21, "grad_norm": 0.4745599627494812, "learning_rate": 0.00039073866355011916, "loss": 1.7524, "step": 36266 }, { "epoch": 1.21, "grad_norm": 0.47286444902420044, "learning_rate": 0.00039072870094130377, "loss": 1.7936, "step": 36267 }, { "epoch": 1.21, "grad_norm": 0.4697742462158203, "learning_rate": 0.00039071873822235707, "loss": 1.8104, "step": 36268 }, { "epoch": 1.21, "grad_norm": 0.4726395308971405, "learning_rate": 0.00039070877539329127, "loss": 1.7806, "step": 36269 }, { "epoch": 1.21, "grad_norm": 0.4710738956928253, "learning_rate": 0.00039069881245411834, "loss": 1.7728, "step": 36270 }, { "epoch": 1.21, "grad_norm": 0.4667995572090149, "learning_rate": 0.00039068884940485037, "loss": 1.79, "step": 36271 }, { "epoch": 1.21, "grad_norm": 0.4742569625377655, "learning_rate": 0.00039067888624549956, "loss": 1.7662, "step": 36272 }, { "epoch": 1.21, "grad_norm": 0.477202445268631, "learning_rate": 0.0003906689229760779, "loss": 1.7759, "step": 36273 }, { "epoch": 1.21, "grad_norm": 0.4811174273490906, "learning_rate": 0.0003906589595965976, "loss": 1.8537, "step": 36274 }, { "epoch": 1.21, "grad_norm": 0.4790261685848236, "learning_rate": 0.00039064899610707053, "loss": 1.7534, "step": 36275 }, { "epoch": 1.21, "grad_norm": 0.48648926615715027, "learning_rate": 0.0003906390325075091, "loss": 1.849, "step": 36276 }, { "epoch": 1.21, "grad_norm": 0.4721878170967102, "learning_rate": 0.0003906290687979251, "loss": 1.7727, "step": 36277 }, { "epoch": 1.21, "grad_norm": 0.4629928469657898, "learning_rate": 0.00039061910497833086, "loss": 1.7424, "step": 36278 }, { "epoch": 1.21, "grad_norm": 0.4760027527809143, "learning_rate": 0.0003906091410487385, "loss": 1.8346, "step": 36279 }, { "epoch": 1.21, "grad_norm": 0.46650055050849915, "learning_rate": 0.0003905991770091597, "loss": 1.7056, "step": 36280 }, { "epoch": 1.21, "grad_norm": 0.6996055841445923, "learning_rate": 0.00039058921285960705, "loss": 1.701, "step": 36281 }, { "epoch": 1.21, "grad_norm": 0.4733848571777344, "learning_rate": 0.00039057924860009234, "loss": 1.8847, "step": 36282 }, { "epoch": 1.21, "grad_norm": 0.4668687880039215, "learning_rate": 0.00039056928423062786, "loss": 1.8221, "step": 36283 }, { "epoch": 1.21, "grad_norm": 0.4766436815261841, "learning_rate": 0.00039055931975122556, "loss": 1.7375, "step": 36284 }, { "epoch": 1.21, "grad_norm": 0.4613485634326935, "learning_rate": 0.00039054935516189763, "loss": 1.7694, "step": 36285 }, { "epoch": 1.21, "grad_norm": 0.46328696608543396, "learning_rate": 0.00039053939046265605, "loss": 1.7984, "step": 36286 }, { "epoch": 1.21, "grad_norm": 0.46889063715934753, "learning_rate": 0.000390529425653513, "loss": 1.7519, "step": 36287 }, { "epoch": 1.21, "grad_norm": 0.47892826795578003, "learning_rate": 0.00039051946073448066, "loss": 1.7244, "step": 36288 }, { "epoch": 1.21, "grad_norm": 0.4708062410354614, "learning_rate": 0.000390509495705571, "loss": 1.7366, "step": 36289 }, { "epoch": 1.21, "grad_norm": 0.4742949604988098, "learning_rate": 0.0003904995305667961, "loss": 1.7149, "step": 36290 }, { "epoch": 1.21, "grad_norm": 0.4922861158847809, "learning_rate": 0.00039048956531816814, "loss": 1.7717, "step": 36291 }, { "epoch": 1.21, "grad_norm": 0.48944154381752014, "learning_rate": 0.0003904795999596991, "loss": 1.7205, "step": 36292 }, { "epoch": 1.21, "grad_norm": 0.470499724149704, "learning_rate": 0.0003904696344914012, "loss": 1.6979, "step": 36293 }, { "epoch": 1.21, "grad_norm": 0.49815988540649414, "learning_rate": 0.00039045966891328664, "loss": 1.7082, "step": 36294 }, { "epoch": 1.21, "grad_norm": 0.4669143855571747, "learning_rate": 0.00039044970322536717, "loss": 1.7703, "step": 36295 }, { "epoch": 1.21, "grad_norm": 0.48967981338500977, "learning_rate": 0.0003904397374276552, "loss": 1.7682, "step": 36296 }, { "epoch": 1.21, "grad_norm": 0.4896547496318817, "learning_rate": 0.00039042977152016267, "loss": 1.8025, "step": 36297 }, { "epoch": 1.21, "grad_norm": 0.4729839265346527, "learning_rate": 0.0003904198055029017, "loss": 1.7886, "step": 36298 }, { "epoch": 1.21, "grad_norm": 0.47919657826423645, "learning_rate": 0.00039040983937588454, "loss": 1.8351, "step": 36299 }, { "epoch": 1.21, "grad_norm": 0.5003791451454163, "learning_rate": 0.000390399873139123, "loss": 1.8458, "step": 36300 }, { "epoch": 1.21, "grad_norm": 0.48159217834472656, "learning_rate": 0.0003903899067926294, "loss": 1.8206, "step": 36301 }, { "epoch": 1.21, "grad_norm": 0.47024235129356384, "learning_rate": 0.00039037994033641577, "loss": 1.8177, "step": 36302 }, { "epoch": 1.21, "grad_norm": 0.47155386209487915, "learning_rate": 0.00039036997377049415, "loss": 1.8299, "step": 36303 }, { "epoch": 1.21, "grad_norm": 0.4843215346336365, "learning_rate": 0.00039036000709487685, "loss": 1.8353, "step": 36304 }, { "epoch": 1.21, "grad_norm": 0.4583238661289215, "learning_rate": 0.00039035004030957564, "loss": 1.8819, "step": 36305 }, { "epoch": 1.21, "grad_norm": 0.47075504064559937, "learning_rate": 0.00039034007341460294, "loss": 1.7664, "step": 36306 }, { "epoch": 1.21, "grad_norm": 0.46604716777801514, "learning_rate": 0.0003903301064099705, "loss": 1.8455, "step": 36307 }, { "epoch": 1.21, "grad_norm": 0.47500061988830566, "learning_rate": 0.00039032013929569086, "loss": 1.7744, "step": 36308 }, { "epoch": 1.21, "grad_norm": 0.46974000334739685, "learning_rate": 0.0003903101720717757, "loss": 1.7748, "step": 36309 }, { "epoch": 1.21, "grad_norm": 0.4831567704677582, "learning_rate": 0.00039030020473823743, "loss": 1.735, "step": 36310 }, { "epoch": 1.21, "grad_norm": 0.47620776295661926, "learning_rate": 0.000390290237295088, "loss": 1.8026, "step": 36311 }, { "epoch": 1.21, "grad_norm": 0.4571300446987152, "learning_rate": 0.0003902802697423394, "loss": 1.7788, "step": 36312 }, { "epoch": 1.21, "grad_norm": 0.4716377556324005, "learning_rate": 0.000390270302080004, "loss": 1.7819, "step": 36313 }, { "epoch": 1.21, "grad_norm": 0.47238850593566895, "learning_rate": 0.0003902603343080936, "loss": 1.7892, "step": 36314 }, { "epoch": 1.21, "grad_norm": 0.4672461450099945, "learning_rate": 0.00039025036642662057, "loss": 1.8558, "step": 36315 }, { "epoch": 1.21, "grad_norm": 0.4720042645931244, "learning_rate": 0.0003902403984355969, "loss": 1.7544, "step": 36316 }, { "epoch": 1.21, "grad_norm": 0.4729866087436676, "learning_rate": 0.0003902304303350346, "loss": 1.8744, "step": 36317 }, { "epoch": 1.21, "grad_norm": 0.46051225066185, "learning_rate": 0.00039022046212494594, "loss": 1.7472, "step": 36318 }, { "epoch": 1.21, "grad_norm": 0.48839953541755676, "learning_rate": 0.0003902104938053429, "loss": 1.8413, "step": 36319 }, { "epoch": 1.21, "grad_norm": 0.46939370036125183, "learning_rate": 0.00039020052537623763, "loss": 1.784, "step": 36320 }, { "epoch": 1.21, "grad_norm": 0.4947061538696289, "learning_rate": 0.00039019055683764213, "loss": 1.7723, "step": 36321 }, { "epoch": 1.21, "grad_norm": 0.4662327468395233, "learning_rate": 0.0003901805881895687, "loss": 1.6871, "step": 36322 }, { "epoch": 1.21, "grad_norm": 0.46298253536224365, "learning_rate": 0.00039017061943202923, "loss": 1.7939, "step": 36323 }, { "epoch": 1.21, "grad_norm": 0.46978622674942017, "learning_rate": 0.000390160650565036, "loss": 1.8061, "step": 36324 }, { "epoch": 1.21, "grad_norm": 0.48565393686294556, "learning_rate": 0.0003901506815886009, "loss": 1.8251, "step": 36325 }, { "epoch": 1.21, "grad_norm": 0.4657166004180908, "learning_rate": 0.00039014071250273626, "loss": 1.7757, "step": 36326 }, { "epoch": 1.21, "grad_norm": 0.48669594526290894, "learning_rate": 0.00039013074330745403, "loss": 1.8078, "step": 36327 }, { "epoch": 1.21, "grad_norm": 0.4632261395454407, "learning_rate": 0.0003901207740027664, "loss": 1.8342, "step": 36328 }, { "epoch": 1.21, "grad_norm": 0.4745177924633026, "learning_rate": 0.0003901108045886854, "loss": 1.7787, "step": 36329 }, { "epoch": 1.21, "grad_norm": 0.49449875950813293, "learning_rate": 0.00039010083506522313, "loss": 1.8151, "step": 36330 }, { "epoch": 1.21, "grad_norm": 0.4814864695072174, "learning_rate": 0.00039009086543239177, "loss": 1.7457, "step": 36331 }, { "epoch": 1.21, "grad_norm": 0.48588162660598755, "learning_rate": 0.0003900808956902034, "loss": 1.7658, "step": 36332 }, { "epoch": 1.21, "grad_norm": 0.46978509426116943, "learning_rate": 0.00039007092583866995, "loss": 1.7923, "step": 36333 }, { "epoch": 1.21, "grad_norm": 0.46922996640205383, "learning_rate": 0.0003900609558778039, "loss": 1.8006, "step": 36334 }, { "epoch": 1.21, "grad_norm": 0.49354690313339233, "learning_rate": 0.00039005098580761693, "loss": 1.8247, "step": 36335 }, { "epoch": 1.21, "grad_norm": 0.471320778131485, "learning_rate": 0.0003900410156281214, "loss": 1.7444, "step": 36336 }, { "epoch": 1.21, "grad_norm": 0.463205486536026, "learning_rate": 0.0003900310453393293, "loss": 1.7565, "step": 36337 }, { "epoch": 1.21, "grad_norm": 1.202534794807434, "learning_rate": 0.00039002107494125285, "loss": 1.7292, "step": 36338 }, { "epoch": 1.21, "grad_norm": 0.48785796761512756, "learning_rate": 0.000390011104433904, "loss": 1.738, "step": 36339 }, { "epoch": 1.21, "grad_norm": 0.46211788058280945, "learning_rate": 0.000390001133817295, "loss": 1.7819, "step": 36340 }, { "epoch": 1.21, "grad_norm": 0.4634012281894684, "learning_rate": 0.0003899911630914379, "loss": 1.7828, "step": 36341 }, { "epoch": 1.21, "grad_norm": 0.46511420607566833, "learning_rate": 0.00038998119225634465, "loss": 1.8601, "step": 36342 }, { "epoch": 1.21, "grad_norm": 0.47276076674461365, "learning_rate": 0.00038997122131202765, "loss": 1.7438, "step": 36343 }, { "epoch": 1.21, "grad_norm": 0.48093870282173157, "learning_rate": 0.0003899612502584988, "loss": 1.8079, "step": 36344 }, { "epoch": 1.21, "grad_norm": 0.4798804819583893, "learning_rate": 0.00038995127909577024, "loss": 1.7677, "step": 36345 }, { "epoch": 1.21, "grad_norm": 0.4653934836387634, "learning_rate": 0.000389941307823854, "loss": 1.7326, "step": 36346 }, { "epoch": 1.21, "grad_norm": 0.49615487456321716, "learning_rate": 0.0003899313364427623, "loss": 1.8734, "step": 36347 }, { "epoch": 1.21, "grad_norm": 0.48515555262565613, "learning_rate": 0.00038992136495250723, "loss": 1.8383, "step": 36348 }, { "epoch": 1.21, "grad_norm": 0.48222604393959045, "learning_rate": 0.0003899113933531009, "loss": 1.7362, "step": 36349 }, { "epoch": 1.21, "grad_norm": 0.5253183245658875, "learning_rate": 0.0003899014216445554, "loss": 1.7829, "step": 36350 }, { "epoch": 1.21, "grad_norm": 0.4847528040409088, "learning_rate": 0.0003898914498268828, "loss": 1.9062, "step": 36351 }, { "epoch": 1.21, "grad_norm": 0.4659595787525177, "learning_rate": 0.00038988147790009516, "loss": 1.8001, "step": 36352 }, { "epoch": 1.21, "grad_norm": 0.4731576144695282, "learning_rate": 0.0003898715058642047, "loss": 1.7616, "step": 36353 }, { "epoch": 1.21, "grad_norm": 0.49937450885772705, "learning_rate": 0.00038986153371922347, "loss": 1.7795, "step": 36354 }, { "epoch": 1.21, "grad_norm": 0.47720515727996826, "learning_rate": 0.00038985156146516357, "loss": 1.7387, "step": 36355 }, { "epoch": 1.21, "grad_norm": 0.45992663502693176, "learning_rate": 0.0003898415891020372, "loss": 1.7456, "step": 36356 }, { "epoch": 1.21, "grad_norm": 0.5074671506881714, "learning_rate": 0.0003898316166298563, "loss": 1.7697, "step": 36357 }, { "epoch": 1.21, "grad_norm": 0.4680839478969574, "learning_rate": 0.00038982164404863305, "loss": 1.699, "step": 36358 }, { "epoch": 1.21, "grad_norm": 0.4773187041282654, "learning_rate": 0.0003898116713583796, "loss": 1.7757, "step": 36359 }, { "epoch": 1.21, "grad_norm": 0.4715929627418518, "learning_rate": 0.00038980169855910797, "loss": 1.807, "step": 36360 }, { "epoch": 1.21, "grad_norm": 0.4947356581687927, "learning_rate": 0.00038979172565083034, "loss": 1.796, "step": 36361 }, { "epoch": 1.21, "grad_norm": 0.47530749440193176, "learning_rate": 0.0003897817526335587, "loss": 1.7053, "step": 36362 }, { "epoch": 1.21, "grad_norm": 0.4882466495037079, "learning_rate": 0.0003897717795073054, "loss": 1.7306, "step": 36363 }, { "epoch": 1.21, "grad_norm": 0.489314466714859, "learning_rate": 0.0003897618062720822, "loss": 1.836, "step": 36364 }, { "epoch": 1.21, "grad_norm": 0.5005044937133789, "learning_rate": 0.00038975183292790154, "loss": 1.7407, "step": 36365 }, { "epoch": 1.21, "grad_norm": 0.46155256032943726, "learning_rate": 0.00038974185947477535, "loss": 1.7517, "step": 36366 }, { "epoch": 1.21, "grad_norm": 0.4935903251171112, "learning_rate": 0.00038973188591271573, "loss": 1.8104, "step": 36367 }, { "epoch": 1.21, "grad_norm": 0.4814322292804718, "learning_rate": 0.0003897219122417349, "loss": 1.8488, "step": 36368 }, { "epoch": 1.21, "grad_norm": 0.4705345928668976, "learning_rate": 0.00038971193846184474, "loss": 1.755, "step": 36369 }, { "epoch": 1.21, "grad_norm": 0.4817611575126648, "learning_rate": 0.00038970196457305766, "loss": 1.8063, "step": 36370 }, { "epoch": 1.21, "grad_norm": 0.47879353165626526, "learning_rate": 0.00038969199057538545, "loss": 1.7899, "step": 36371 }, { "epoch": 1.21, "grad_norm": 0.48687660694122314, "learning_rate": 0.00038968201646884053, "loss": 1.6825, "step": 36372 }, { "epoch": 1.21, "grad_norm": 0.4842602014541626, "learning_rate": 0.0003896720422534348, "loss": 1.7625, "step": 36373 }, { "epoch": 1.21, "grad_norm": 0.48090860247612, "learning_rate": 0.0003896620679291804, "loss": 1.8231, "step": 36374 }, { "epoch": 1.21, "grad_norm": 0.4807562828063965, "learning_rate": 0.0003896520934960895, "loss": 1.812, "step": 36375 }, { "epoch": 1.21, "grad_norm": 0.47946831583976746, "learning_rate": 0.00038964211895417414, "loss": 1.743, "step": 36376 }, { "epoch": 1.21, "grad_norm": 0.4602477252483368, "learning_rate": 0.0003896321443034464, "loss": 1.7661, "step": 36377 }, { "epoch": 1.21, "grad_norm": 0.5000086426734924, "learning_rate": 0.0003896221695439185, "loss": 1.7887, "step": 36378 }, { "epoch": 1.21, "grad_norm": 0.4903550446033478, "learning_rate": 0.0003896121946756025, "loss": 1.7289, "step": 36379 }, { "epoch": 1.21, "grad_norm": 0.4654405117034912, "learning_rate": 0.00038960221969851054, "loss": 1.7773, "step": 36380 }, { "epoch": 1.21, "grad_norm": 0.49493446946144104, "learning_rate": 0.0003895922446126547, "loss": 1.762, "step": 36381 }, { "epoch": 1.21, "grad_norm": 0.5027360320091248, "learning_rate": 0.000389582269418047, "loss": 1.8176, "step": 36382 }, { "epoch": 1.21, "grad_norm": 0.5088284015655518, "learning_rate": 0.00038957229411469964, "loss": 1.7958, "step": 36383 }, { "epoch": 1.21, "grad_norm": 0.47795626521110535, "learning_rate": 0.00038956231870262466, "loss": 1.7583, "step": 36384 }, { "epoch": 1.21, "grad_norm": 0.4763098359107971, "learning_rate": 0.00038955234318183434, "loss": 1.8347, "step": 36385 }, { "epoch": 1.21, "grad_norm": 0.4771752953529358, "learning_rate": 0.00038954236755234066, "loss": 1.8201, "step": 36386 }, { "epoch": 1.21, "grad_norm": 0.4858575761318207, "learning_rate": 0.0003895323918141557, "loss": 1.7732, "step": 36387 }, { "epoch": 1.21, "grad_norm": 0.47826918959617615, "learning_rate": 0.00038952241596729154, "loss": 1.8137, "step": 36388 }, { "epoch": 1.21, "grad_norm": 0.47100400924682617, "learning_rate": 0.00038951244001176043, "loss": 1.794, "step": 36389 }, { "epoch": 1.21, "grad_norm": 0.4795878529548645, "learning_rate": 0.0003895024639475743, "loss": 1.7796, "step": 36390 }, { "epoch": 1.21, "grad_norm": 0.49003249406814575, "learning_rate": 0.0003894924877747456, "loss": 1.8029, "step": 36391 }, { "epoch": 1.21, "grad_norm": 0.4667781591415405, "learning_rate": 0.00038948251149328595, "loss": 1.7557, "step": 36392 }, { "epoch": 1.21, "grad_norm": 0.4727989733219147, "learning_rate": 0.0003894725351032079, "loss": 1.7328, "step": 36393 }, { "epoch": 1.21, "grad_norm": 0.49108824133872986, "learning_rate": 0.0003894625586045233, "loss": 1.8432, "step": 36394 }, { "epoch": 1.21, "grad_norm": 0.478657066822052, "learning_rate": 0.0003894525819972443, "loss": 1.7675, "step": 36395 }, { "epoch": 1.21, "grad_norm": 0.4685365557670593, "learning_rate": 0.0003894426052813831, "loss": 1.8233, "step": 36396 }, { "epoch": 1.21, "grad_norm": 0.48267945647239685, "learning_rate": 0.00038943262845695173, "loss": 1.7597, "step": 36397 }, { "epoch": 1.21, "grad_norm": 0.4895647466182709, "learning_rate": 0.0003894226515239624, "loss": 1.7509, "step": 36398 }, { "epoch": 1.21, "grad_norm": 0.48682206869125366, "learning_rate": 0.00038941267448242705, "loss": 1.7656, "step": 36399 }, { "epoch": 1.21, "grad_norm": 0.4759899973869324, "learning_rate": 0.000389402697332358, "loss": 1.8406, "step": 36400 }, { "epoch": 1.21, "grad_norm": 0.48525333404541016, "learning_rate": 0.00038939272007376707, "loss": 1.7693, "step": 36401 }, { "epoch": 1.21, "grad_norm": 0.4591332674026489, "learning_rate": 0.0003893827427066667, "loss": 1.7918, "step": 36402 }, { "epoch": 1.21, "grad_norm": 0.4702354967594147, "learning_rate": 0.00038937276523106875, "loss": 1.7774, "step": 36403 }, { "epoch": 1.21, "grad_norm": 0.4808066189289093, "learning_rate": 0.0003893627876469855, "loss": 1.752, "step": 36404 }, { "epoch": 1.21, "grad_norm": 0.470335990190506, "learning_rate": 0.0003893528099544291, "loss": 1.7517, "step": 36405 }, { "epoch": 1.21, "grad_norm": 0.45933645963668823, "learning_rate": 0.0003893428321534114, "loss": 1.8583, "step": 36406 }, { "epoch": 1.21, "grad_norm": 0.47239285707473755, "learning_rate": 0.00038933285424394467, "loss": 1.8479, "step": 36407 }, { "epoch": 1.21, "grad_norm": 0.4638780653476715, "learning_rate": 0.000389322876226041, "loss": 1.7896, "step": 36408 }, { "epoch": 1.21, "grad_norm": 0.4924148619174957, "learning_rate": 0.00038931289809971255, "loss": 1.806, "step": 36409 }, { "epoch": 1.21, "grad_norm": 0.45609772205352783, "learning_rate": 0.0003893029198649714, "loss": 1.784, "step": 36410 }, { "epoch": 1.21, "grad_norm": 0.4683918356895447, "learning_rate": 0.00038929294152182975, "loss": 1.7482, "step": 36411 }, { "epoch": 1.21, "grad_norm": 0.4706844985485077, "learning_rate": 0.0003892829630702995, "loss": 1.7828, "step": 36412 }, { "epoch": 1.21, "grad_norm": 0.45502349734306335, "learning_rate": 0.00038927298451039294, "loss": 1.7803, "step": 36413 }, { "epoch": 1.21, "grad_norm": 0.4952949285507202, "learning_rate": 0.00038926300584212213, "loss": 1.8247, "step": 36414 }, { "epoch": 1.21, "grad_norm": 0.4664604067802429, "learning_rate": 0.0003892530270654992, "loss": 1.7229, "step": 36415 }, { "epoch": 1.21, "grad_norm": 0.47059738636016846, "learning_rate": 0.0003892430481805362, "loss": 1.8073, "step": 36416 }, { "epoch": 1.21, "grad_norm": 0.4718661606311798, "learning_rate": 0.00038923306918724535, "loss": 1.7356, "step": 36417 }, { "epoch": 1.21, "grad_norm": 0.4770599901676178, "learning_rate": 0.0003892230900856387, "loss": 1.7855, "step": 36418 }, { "epoch": 1.21, "grad_norm": 0.4597020447254181, "learning_rate": 0.00038921311087572826, "loss": 1.7149, "step": 36419 }, { "epoch": 1.21, "grad_norm": 0.48713406920433044, "learning_rate": 0.00038920313155752634, "loss": 1.8207, "step": 36420 }, { "epoch": 1.21, "grad_norm": 0.4844740927219391, "learning_rate": 0.00038919315213104504, "loss": 1.7341, "step": 36421 }, { "epoch": 1.21, "grad_norm": 0.5023069977760315, "learning_rate": 0.00038918317259629623, "loss": 1.7482, "step": 36422 }, { "epoch": 1.21, "grad_norm": 0.480426162481308, "learning_rate": 0.0003891731929532923, "loss": 1.751, "step": 36423 }, { "epoch": 1.21, "grad_norm": 0.4859863817691803, "learning_rate": 0.0003891632132020451, "loss": 1.8458, "step": 36424 }, { "epoch": 1.21, "grad_norm": 0.46407172083854675, "learning_rate": 0.0003891532333425671, "loss": 1.6898, "step": 36425 }, { "epoch": 1.21, "grad_norm": 0.5118194222450256, "learning_rate": 0.00038914325337487013, "loss": 1.6732, "step": 36426 }, { "epoch": 1.21, "grad_norm": 0.49635782837867737, "learning_rate": 0.00038913327329896636, "loss": 1.8389, "step": 36427 }, { "epoch": 1.21, "grad_norm": 0.4718995988368988, "learning_rate": 0.000389123293114868, "loss": 1.8538, "step": 36428 }, { "epoch": 1.21, "grad_norm": 0.5006821155548096, "learning_rate": 0.000389113312822587, "loss": 1.7521, "step": 36429 }, { "epoch": 1.21, "grad_norm": 0.47431591153144836, "learning_rate": 0.0003891033324221356, "loss": 1.7102, "step": 36430 }, { "epoch": 1.21, "grad_norm": 0.47533419728279114, "learning_rate": 0.0003890933519135259, "loss": 1.7469, "step": 36431 }, { "epoch": 1.21, "grad_norm": 0.4916633367538452, "learning_rate": 0.00038908337129677005, "loss": 1.7687, "step": 36432 }, { "epoch": 1.21, "grad_norm": 0.4897102117538452, "learning_rate": 0.00038907339057188, "loss": 1.7203, "step": 36433 }, { "epoch": 1.21, "grad_norm": 0.4667649269104004, "learning_rate": 0.000389063409738868, "loss": 1.7245, "step": 36434 }, { "epoch": 1.21, "grad_norm": 0.4943845272064209, "learning_rate": 0.0003890534287977462, "loss": 1.8614, "step": 36435 }, { "epoch": 1.21, "grad_norm": 0.49743491411209106, "learning_rate": 0.00038904344774852667, "loss": 1.8223, "step": 36436 }, { "epoch": 1.21, "grad_norm": 0.48728641867637634, "learning_rate": 0.0003890334665912215, "loss": 1.8084, "step": 36437 }, { "epoch": 1.21, "grad_norm": 0.47116395831108093, "learning_rate": 0.0003890234853258428, "loss": 1.7387, "step": 36438 }, { "epoch": 1.21, "grad_norm": 0.5003318190574646, "learning_rate": 0.00038901350395240266, "loss": 1.8084, "step": 36439 }, { "epoch": 1.21, "grad_norm": 0.474017858505249, "learning_rate": 0.00038900352247091323, "loss": 1.7733, "step": 36440 }, { "epoch": 1.21, "grad_norm": 0.4632210433483124, "learning_rate": 0.0003889935408813868, "loss": 1.7345, "step": 36441 }, { "epoch": 1.21, "grad_norm": 0.4635604918003082, "learning_rate": 0.00038898355918383517, "loss": 1.7299, "step": 36442 }, { "epoch": 1.21, "grad_norm": 0.47955039143562317, "learning_rate": 0.00038897357737827066, "loss": 1.8052, "step": 36443 }, { "epoch": 1.21, "grad_norm": 0.4647725522518158, "learning_rate": 0.0003889635954647053, "loss": 1.7789, "step": 36444 }, { "epoch": 1.21, "grad_norm": 0.46035587787628174, "learning_rate": 0.0003889536134431513, "loss": 1.8249, "step": 36445 }, { "epoch": 1.21, "grad_norm": 0.45991331338882446, "learning_rate": 0.0003889436313136208, "loss": 1.7563, "step": 36446 }, { "epoch": 1.21, "grad_norm": 0.4672229588031769, "learning_rate": 0.00038893364907612564, "loss": 1.8237, "step": 36447 }, { "epoch": 1.21, "grad_norm": 0.4719790518283844, "learning_rate": 0.00038892366673067827, "loss": 1.7557, "step": 36448 }, { "epoch": 1.21, "grad_norm": 0.47063687443733215, "learning_rate": 0.00038891368427729055, "loss": 1.7915, "step": 36449 }, { "epoch": 1.21, "grad_norm": 0.46402686834335327, "learning_rate": 0.00038890370171597484, "loss": 1.7932, "step": 36450 }, { "epoch": 1.21, "grad_norm": 0.47650131583213806, "learning_rate": 0.0003888937190467431, "loss": 1.8071, "step": 36451 }, { "epoch": 1.21, "grad_norm": 0.48164102435112, "learning_rate": 0.0003888837362696075, "loss": 1.8138, "step": 36452 }, { "epoch": 1.21, "grad_norm": 0.470795601606369, "learning_rate": 0.0003888737533845801, "loss": 1.8142, "step": 36453 }, { "epoch": 1.21, "grad_norm": 0.48421236872673035, "learning_rate": 0.00038886377039167296, "loss": 1.7859, "step": 36454 }, { "epoch": 1.21, "grad_norm": 0.46723631024360657, "learning_rate": 0.0003888537872908985, "loss": 1.82, "step": 36455 }, { "epoch": 1.21, "grad_norm": 0.48058417439460754, "learning_rate": 0.0003888438040822686, "loss": 1.9045, "step": 36456 }, { "epoch": 1.21, "grad_norm": 0.4707085192203522, "learning_rate": 0.00038883382076579533, "loss": 1.7895, "step": 36457 }, { "epoch": 1.21, "grad_norm": 0.49218258261680603, "learning_rate": 0.00038882383734149085, "loss": 1.7334, "step": 36458 }, { "epoch": 1.21, "grad_norm": 0.4721572697162628, "learning_rate": 0.0003888138538093674, "loss": 1.7289, "step": 36459 }, { "epoch": 1.21, "grad_norm": 0.4684206545352936, "learning_rate": 0.00038880387016943706, "loss": 1.8101, "step": 36460 }, { "epoch": 1.21, "grad_norm": 0.48008695244789124, "learning_rate": 0.0003887938864217118, "loss": 1.7834, "step": 36461 }, { "epoch": 1.21, "grad_norm": 0.6795856952667236, "learning_rate": 0.0003887839025662039, "loss": 1.8402, "step": 36462 }, { "epoch": 1.21, "grad_norm": 0.496707946062088, "learning_rate": 0.00038877391860292547, "loss": 1.7848, "step": 36463 }, { "epoch": 1.21, "grad_norm": 0.4980502128601074, "learning_rate": 0.0003887639345318885, "loss": 1.7584, "step": 36464 }, { "epoch": 1.21, "grad_norm": 0.4673667848110199, "learning_rate": 0.0003887539503531052, "loss": 1.8135, "step": 36465 }, { "epoch": 1.21, "grad_norm": 0.4810064136981964, "learning_rate": 0.0003887439660665877, "loss": 1.7693, "step": 36466 }, { "epoch": 1.21, "grad_norm": 0.5005686283111572, "learning_rate": 0.00038873398167234817, "loss": 1.7608, "step": 36467 }, { "epoch": 1.21, "grad_norm": 0.48783305287361145, "learning_rate": 0.0003887239971703986, "loss": 1.8035, "step": 36468 }, { "epoch": 1.21, "grad_norm": 0.4802180826663971, "learning_rate": 0.0003887140125607511, "loss": 1.8014, "step": 36469 }, { "epoch": 1.21, "grad_norm": 0.4684361219406128, "learning_rate": 0.0003887040278434179, "loss": 1.6932, "step": 36470 }, { "epoch": 1.21, "grad_norm": 0.4980759620666504, "learning_rate": 0.00038869404301841113, "loss": 1.7028, "step": 36471 }, { "epoch": 1.21, "grad_norm": 0.4393167197704315, "learning_rate": 0.00038868405808574285, "loss": 1.7155, "step": 36472 }, { "epoch": 1.21, "grad_norm": 0.49701252579689026, "learning_rate": 0.0003886740730454252, "loss": 1.7373, "step": 36473 }, { "epoch": 1.21, "grad_norm": 0.49037280678749084, "learning_rate": 0.0003886640878974703, "loss": 1.7065, "step": 36474 }, { "epoch": 1.21, "grad_norm": 0.4649055302143097, "learning_rate": 0.0003886541026418902, "loss": 1.8447, "step": 36475 }, { "epoch": 1.21, "grad_norm": 0.46608850359916687, "learning_rate": 0.0003886441172786971, "loss": 1.7749, "step": 36476 }, { "epoch": 1.21, "grad_norm": 0.48425543308258057, "learning_rate": 0.00038863413180790314, "loss": 1.7424, "step": 36477 }, { "epoch": 1.21, "grad_norm": 0.4739448130130768, "learning_rate": 0.0003886241462295205, "loss": 1.8013, "step": 36478 }, { "epoch": 1.21, "grad_norm": 0.4575252830982208, "learning_rate": 0.00038861416054356094, "loss": 1.7656, "step": 36479 }, { "epoch": 1.21, "grad_norm": 0.4751224219799042, "learning_rate": 0.000388604174750037, "loss": 1.7762, "step": 36480 }, { "epoch": 1.21, "grad_norm": 0.4871785640716553, "learning_rate": 0.00038859418884896064, "loss": 1.8194, "step": 36481 }, { "epoch": 1.21, "grad_norm": 0.5375614166259766, "learning_rate": 0.00038858420284034395, "loss": 1.8155, "step": 36482 }, { "epoch": 1.21, "grad_norm": 0.48981741070747375, "learning_rate": 0.0003885742167241992, "loss": 1.7119, "step": 36483 }, { "epoch": 1.21, "grad_norm": 0.46338585019111633, "learning_rate": 0.0003885642305005383, "loss": 1.8654, "step": 36484 }, { "epoch": 1.21, "grad_norm": 0.4816603660583496, "learning_rate": 0.00038855424416937357, "loss": 1.7295, "step": 36485 }, { "epoch": 1.21, "grad_norm": 0.4692787230014801, "learning_rate": 0.00038854425773071687, "loss": 1.7444, "step": 36486 }, { "epoch": 1.21, "grad_norm": 0.4640035033226013, "learning_rate": 0.0003885342711845806, "loss": 1.8252, "step": 36487 }, { "epoch": 1.21, "grad_norm": 0.47254160046577454, "learning_rate": 0.00038852428453097675, "loss": 1.7798, "step": 36488 }, { "epoch": 1.21, "grad_norm": 0.4708492159843445, "learning_rate": 0.0003885142977699174, "loss": 1.848, "step": 36489 }, { "epoch": 1.21, "grad_norm": 0.4749155044555664, "learning_rate": 0.00038850431090141493, "loss": 1.7462, "step": 36490 }, { "epoch": 1.21, "grad_norm": 0.46936365962028503, "learning_rate": 0.000388494323925481, "loss": 1.7723, "step": 36491 }, { "epoch": 1.21, "grad_norm": 0.48552393913269043, "learning_rate": 0.0003884843368421282, "loss": 1.7714, "step": 36492 }, { "epoch": 1.21, "grad_norm": 0.4755178391933441, "learning_rate": 0.00038847434965136844, "loss": 1.7642, "step": 36493 }, { "epoch": 1.21, "grad_norm": 0.4723948538303375, "learning_rate": 0.0003884643623532138, "loss": 1.7897, "step": 36494 }, { "epoch": 1.21, "grad_norm": 0.4559060037136078, "learning_rate": 0.0003884543749476764, "loss": 1.7339, "step": 36495 }, { "epoch": 1.21, "grad_norm": 0.4656376242637634, "learning_rate": 0.0003884443874347685, "loss": 1.8011, "step": 36496 }, { "epoch": 1.21, "grad_norm": 0.4660124182701111, "learning_rate": 0.0003884343998145022, "loss": 1.7396, "step": 36497 }, { "epoch": 1.21, "grad_norm": 0.4770320653915405, "learning_rate": 0.00038842441208688947, "loss": 1.6989, "step": 36498 }, { "epoch": 1.21, "grad_norm": 0.4932723343372345, "learning_rate": 0.00038841442425194256, "loss": 1.7671, "step": 36499 }, { "epoch": 1.21, "grad_norm": 0.4851709306240082, "learning_rate": 0.0003884044363096736, "loss": 1.7681, "step": 36500 }, { "epoch": 1.21, "grad_norm": 0.47203654050827026, "learning_rate": 0.00038839444826009454, "loss": 1.7384, "step": 36501 }, { "epoch": 1.21, "grad_norm": 0.48247307538986206, "learning_rate": 0.00038838446010321776, "loss": 1.8038, "step": 36502 }, { "epoch": 1.21, "grad_norm": 0.48025989532470703, "learning_rate": 0.00038837447183905533, "loss": 1.7407, "step": 36503 }, { "epoch": 1.21, "grad_norm": 0.4898470640182495, "learning_rate": 0.0003883644834676193, "loss": 1.7239, "step": 36504 }, { "epoch": 1.21, "grad_norm": 0.4964112639427185, "learning_rate": 0.0003883544949889217, "loss": 1.7346, "step": 36505 }, { "epoch": 1.21, "grad_norm": 0.48537805676460266, "learning_rate": 0.00038834450640297483, "loss": 1.8456, "step": 36506 }, { "epoch": 1.21, "grad_norm": 1.7374982833862305, "learning_rate": 0.00038833451770979067, "loss": 1.8032, "step": 36507 }, { "epoch": 1.21, "grad_norm": 0.47876977920532227, "learning_rate": 0.0003883245289093816, "loss": 1.7615, "step": 36508 }, { "epoch": 1.21, "grad_norm": 0.4780862629413605, "learning_rate": 0.00038831454000175935, "loss": 1.8042, "step": 36509 }, { "epoch": 1.21, "grad_norm": 0.4998502731323242, "learning_rate": 0.00038830455098693643, "loss": 1.8301, "step": 36510 }, { "epoch": 1.21, "grad_norm": 0.47683846950531006, "learning_rate": 0.00038829456186492467, "loss": 1.8041, "step": 36511 }, { "epoch": 1.21, "grad_norm": 0.4747418463230133, "learning_rate": 0.0003882845726357364, "loss": 1.8112, "step": 36512 }, { "epoch": 1.21, "grad_norm": 0.47585591673851013, "learning_rate": 0.00038827458329938365, "loss": 1.8796, "step": 36513 }, { "epoch": 1.21, "grad_norm": 0.48114749789237976, "learning_rate": 0.00038826459385587846, "loss": 1.8556, "step": 36514 }, { "epoch": 1.21, "grad_norm": 0.4902372658252716, "learning_rate": 0.00038825460430523323, "loss": 1.6945, "step": 36515 }, { "epoch": 1.21, "grad_norm": 0.4588330090045929, "learning_rate": 0.0003882446146474597, "loss": 1.7771, "step": 36516 }, { "epoch": 1.21, "grad_norm": 0.4907800555229187, "learning_rate": 0.00038823462488257044, "loss": 1.8136, "step": 36517 }, { "epoch": 1.21, "grad_norm": 0.47322845458984375, "learning_rate": 0.00038822463501057727, "loss": 1.7702, "step": 36518 }, { "epoch": 1.21, "grad_norm": 0.4715956151485443, "learning_rate": 0.00038821464503149236, "loss": 1.7424, "step": 36519 }, { "epoch": 1.22, "grad_norm": 0.505263090133667, "learning_rate": 0.00038820465494532786, "loss": 1.7938, "step": 36520 }, { "epoch": 1.22, "grad_norm": 0.4727558195590973, "learning_rate": 0.0003881946647520959, "loss": 1.801, "step": 36521 }, { "epoch": 1.22, "grad_norm": 0.46455058455467224, "learning_rate": 0.0003881846744518087, "loss": 1.7608, "step": 36522 }, { "epoch": 1.22, "grad_norm": 0.48481908440589905, "learning_rate": 0.00038817468404447826, "loss": 1.8906, "step": 36523 }, { "epoch": 1.22, "grad_norm": 0.48501208424568176, "learning_rate": 0.00038816469353011667, "loss": 1.8015, "step": 36524 }, { "epoch": 1.22, "grad_norm": 0.47659480571746826, "learning_rate": 0.00038815470290873625, "loss": 1.8074, "step": 36525 }, { "epoch": 1.22, "grad_norm": 0.4674742519855499, "learning_rate": 0.0003881447121803489, "loss": 1.7763, "step": 36526 }, { "epoch": 1.22, "grad_norm": 0.47258397936820984, "learning_rate": 0.0003881347213449669, "loss": 1.6891, "step": 36527 }, { "epoch": 1.22, "grad_norm": 0.494764506816864, "learning_rate": 0.0003881247304026024, "loss": 1.8346, "step": 36528 }, { "epoch": 1.22, "grad_norm": 0.5362504720687866, "learning_rate": 0.00038811473935326736, "loss": 1.738, "step": 36529 }, { "epoch": 1.22, "grad_norm": 0.4755060374736786, "learning_rate": 0.00038810474819697406, "loss": 1.7769, "step": 36530 }, { "epoch": 1.22, "grad_norm": 0.4671313166618347, "learning_rate": 0.00038809475693373456, "loss": 1.8067, "step": 36531 }, { "epoch": 1.22, "grad_norm": 0.48429015278816223, "learning_rate": 0.00038808476556356097, "loss": 1.733, "step": 36532 }, { "epoch": 1.22, "grad_norm": 0.5008267760276794, "learning_rate": 0.0003880747740864655, "loss": 1.7707, "step": 36533 }, { "epoch": 1.22, "grad_norm": 0.4899923503398895, "learning_rate": 0.00038806478250246016, "loss": 1.7662, "step": 36534 }, { "epoch": 1.22, "grad_norm": 0.4750485420227051, "learning_rate": 0.00038805479081155733, "loss": 1.7791, "step": 36535 }, { "epoch": 1.22, "grad_norm": 0.47205403447151184, "learning_rate": 0.0003880447990137688, "loss": 1.7977, "step": 36536 }, { "epoch": 1.22, "grad_norm": 0.5058133602142334, "learning_rate": 0.0003880348071091069, "loss": 1.8052, "step": 36537 }, { "epoch": 1.22, "grad_norm": 0.4876890778541565, "learning_rate": 0.00038802481509758377, "loss": 1.8273, "step": 36538 }, { "epoch": 1.22, "grad_norm": 0.5000269412994385, "learning_rate": 0.0003880148229792114, "loss": 1.7801, "step": 36539 }, { "epoch": 1.22, "grad_norm": 0.4776157736778259, "learning_rate": 0.00038800483075400206, "loss": 1.8016, "step": 36540 }, { "epoch": 1.22, "grad_norm": 0.5168678164482117, "learning_rate": 0.00038799483842196777, "loss": 1.73, "step": 36541 }, { "epoch": 1.22, "grad_norm": 0.4932098090648651, "learning_rate": 0.00038798484598312076, "loss": 1.7804, "step": 36542 }, { "epoch": 1.22, "grad_norm": 0.4858139455318451, "learning_rate": 0.00038797485343747306, "loss": 1.845, "step": 36543 }, { "epoch": 1.22, "grad_norm": 0.47648292779922485, "learning_rate": 0.00038796486078503686, "loss": 1.7929, "step": 36544 }, { "epoch": 1.22, "grad_norm": 0.4942074120044708, "learning_rate": 0.0003879548680258244, "loss": 1.7897, "step": 36545 }, { "epoch": 1.22, "grad_norm": 0.48947975039482117, "learning_rate": 0.0003879448751598475, "loss": 1.7861, "step": 36546 }, { "epoch": 1.22, "grad_norm": 0.506520688533783, "learning_rate": 0.0003879348821871186, "loss": 1.8701, "step": 36547 }, { "epoch": 1.22, "grad_norm": 0.49246102571487427, "learning_rate": 0.0003879248891076497, "loss": 1.8059, "step": 36548 }, { "epoch": 1.22, "grad_norm": 0.46177050471305847, "learning_rate": 0.00038791489592145286, "loss": 1.7661, "step": 36549 }, { "epoch": 1.22, "grad_norm": 0.500554084777832, "learning_rate": 0.00038790490262854033, "loss": 1.8022, "step": 36550 }, { "epoch": 1.22, "grad_norm": 0.50669264793396, "learning_rate": 0.00038789490922892426, "loss": 1.7036, "step": 36551 }, { "epoch": 1.22, "grad_norm": 0.4807133972644806, "learning_rate": 0.00038788491572261673, "loss": 1.6773, "step": 36552 }, { "epoch": 1.22, "grad_norm": 0.48503074049949646, "learning_rate": 0.00038787492210962983, "loss": 1.7185, "step": 36553 }, { "epoch": 1.22, "grad_norm": 0.4998323321342468, "learning_rate": 0.00038786492838997565, "loss": 1.8183, "step": 36554 }, { "epoch": 1.22, "grad_norm": 0.49663782119750977, "learning_rate": 0.00038785493456366645, "loss": 1.8232, "step": 36555 }, { "epoch": 1.22, "grad_norm": 0.47203031182289124, "learning_rate": 0.0003878449406307143, "loss": 1.8331, "step": 36556 }, { "epoch": 1.22, "grad_norm": 0.46816715598106384, "learning_rate": 0.0003878349465911313, "loss": 1.7785, "step": 36557 }, { "epoch": 1.22, "grad_norm": 0.45907658338546753, "learning_rate": 0.0003878249524449297, "loss": 1.7482, "step": 36558 }, { "epoch": 1.22, "grad_norm": 0.4824102818965912, "learning_rate": 0.0003878149581921215, "loss": 1.7884, "step": 36559 }, { "epoch": 1.22, "grad_norm": 0.47999024391174316, "learning_rate": 0.0003878049638327189, "loss": 1.8252, "step": 36560 }, { "epoch": 1.22, "grad_norm": 0.46441733837127686, "learning_rate": 0.0003877949693667339, "loss": 1.7411, "step": 36561 }, { "epoch": 1.22, "grad_norm": 0.4572621285915375, "learning_rate": 0.0003877849747941789, "loss": 1.7255, "step": 36562 }, { "epoch": 1.22, "grad_norm": 0.47918280959129333, "learning_rate": 0.00038777498011506584, "loss": 1.7337, "step": 36563 }, { "epoch": 1.22, "grad_norm": 0.4688887596130371, "learning_rate": 0.0003877649853294068, "loss": 1.7266, "step": 36564 }, { "epoch": 1.22, "grad_norm": 0.48995307087898254, "learning_rate": 0.0003877549904372141, "loss": 1.8068, "step": 36565 }, { "epoch": 1.22, "grad_norm": 0.49527913331985474, "learning_rate": 0.0003877449954384997, "loss": 1.8108, "step": 36566 }, { "epoch": 1.22, "grad_norm": 0.45949000120162964, "learning_rate": 0.0003877350003332758, "loss": 1.7006, "step": 36567 }, { "epoch": 1.22, "grad_norm": 0.4854571223258972, "learning_rate": 0.00038772500512155466, "loss": 1.794, "step": 36568 }, { "epoch": 1.22, "grad_norm": 0.47068989276885986, "learning_rate": 0.00038771500980334814, "loss": 1.7882, "step": 36569 }, { "epoch": 1.22, "grad_norm": 0.4800166189670563, "learning_rate": 0.0003877050143786686, "loss": 1.7701, "step": 36570 }, { "epoch": 1.22, "grad_norm": 0.47266504168510437, "learning_rate": 0.000387695018847528, "loss": 1.8917, "step": 36571 }, { "epoch": 1.22, "grad_norm": 0.47107431292533875, "learning_rate": 0.0003876850232099387, "loss": 1.8964, "step": 36572 }, { "epoch": 1.22, "grad_norm": 0.4763428568840027, "learning_rate": 0.00038767502746591266, "loss": 1.6604, "step": 36573 }, { "epoch": 1.22, "grad_norm": 0.4540173411369324, "learning_rate": 0.0003876650316154621, "loss": 1.6957, "step": 36574 }, { "epoch": 1.22, "grad_norm": 0.4809066951274872, "learning_rate": 0.000387655035658599, "loss": 1.7454, "step": 36575 }, { "epoch": 1.22, "grad_norm": 0.4565955102443695, "learning_rate": 0.00038764503959533563, "loss": 1.7318, "step": 36576 }, { "epoch": 1.22, "grad_norm": 0.4612933397293091, "learning_rate": 0.0003876350434256841, "loss": 1.7934, "step": 36577 }, { "epoch": 1.22, "grad_norm": 0.5464696884155273, "learning_rate": 0.0003876250471496566, "loss": 1.8404, "step": 36578 }, { "epoch": 1.22, "grad_norm": 0.4707246720790863, "learning_rate": 0.00038761505076726513, "loss": 1.7494, "step": 36579 }, { "epoch": 1.22, "grad_norm": 0.4747435450553894, "learning_rate": 0.00038760505427852196, "loss": 1.7401, "step": 36580 }, { "epoch": 1.22, "grad_norm": 0.4954935908317566, "learning_rate": 0.0003875950576834391, "loss": 1.7523, "step": 36581 }, { "epoch": 1.22, "grad_norm": 0.5077185034751892, "learning_rate": 0.00038758506098202875, "loss": 1.7829, "step": 36582 }, { "epoch": 1.22, "grad_norm": 0.47301435470581055, "learning_rate": 0.00038757506417430316, "loss": 1.8253, "step": 36583 }, { "epoch": 1.22, "grad_norm": 0.46622344851493835, "learning_rate": 0.0003875650672602742, "loss": 1.7615, "step": 36584 }, { "epoch": 1.22, "grad_norm": 0.4712241291999817, "learning_rate": 0.00038755507023995425, "loss": 1.8635, "step": 36585 }, { "epoch": 1.22, "grad_norm": 0.4648032784461975, "learning_rate": 0.00038754507311335526, "loss": 1.8315, "step": 36586 }, { "epoch": 1.22, "grad_norm": 0.4823180139064789, "learning_rate": 0.0003875350758804895, "loss": 1.8037, "step": 36587 }, { "epoch": 1.22, "grad_norm": 0.48182281851768494, "learning_rate": 0.0003875250785413691, "loss": 1.7613, "step": 36588 }, { "epoch": 1.22, "grad_norm": 0.4585593044757843, "learning_rate": 0.000387515081096006, "loss": 1.853, "step": 36589 }, { "epoch": 1.22, "grad_norm": 0.46730032563209534, "learning_rate": 0.00038750508354441266, "loss": 1.7894, "step": 36590 }, { "epoch": 1.22, "grad_norm": 0.48029056191444397, "learning_rate": 0.0003874950858866009, "loss": 1.7821, "step": 36591 }, { "epoch": 1.22, "grad_norm": 0.4687679409980774, "learning_rate": 0.00038748508812258315, "loss": 1.7819, "step": 36592 }, { "epoch": 1.22, "grad_norm": 0.486628919839859, "learning_rate": 0.0003874750902523712, "loss": 1.7592, "step": 36593 }, { "epoch": 1.22, "grad_norm": 0.48561471700668335, "learning_rate": 0.00038746509227597747, "loss": 1.8508, "step": 36594 }, { "epoch": 1.22, "grad_norm": 0.48980361223220825, "learning_rate": 0.0003874550941934142, "loss": 1.7524, "step": 36595 }, { "epoch": 1.22, "grad_norm": 0.47940966486930847, "learning_rate": 0.000387445096004693, "loss": 1.7692, "step": 36596 }, { "epoch": 1.22, "grad_norm": 0.46938183903694153, "learning_rate": 0.00038743509770982655, "loss": 1.7803, "step": 36597 }, { "epoch": 1.22, "grad_norm": 0.4756050407886505, "learning_rate": 0.00038742509930882673, "loss": 1.7347, "step": 36598 }, { "epoch": 1.22, "grad_norm": 0.4803546667098999, "learning_rate": 0.0003874151008017057, "loss": 1.8882, "step": 36599 }, { "epoch": 1.22, "grad_norm": 0.5036052465438843, "learning_rate": 0.0003874051021884756, "loss": 1.8148, "step": 36600 }, { "epoch": 1.22, "grad_norm": 0.45892760157585144, "learning_rate": 0.00038739510346914863, "loss": 1.7906, "step": 36601 }, { "epoch": 1.22, "grad_norm": 0.47662708163261414, "learning_rate": 0.00038738510464373684, "loss": 1.7869, "step": 36602 }, { "epoch": 1.22, "grad_norm": 0.4701371192932129, "learning_rate": 0.0003873751057122524, "loss": 1.8, "step": 36603 }, { "epoch": 1.22, "grad_norm": 0.5546616911888123, "learning_rate": 0.0003873651066747075, "loss": 1.778, "step": 36604 }, { "epoch": 1.22, "grad_norm": 0.48071444034576416, "learning_rate": 0.0003873551075311142, "loss": 1.7811, "step": 36605 }, { "epoch": 1.22, "grad_norm": 0.47322529554367065, "learning_rate": 0.0003873451082814847, "loss": 1.7302, "step": 36606 }, { "epoch": 1.22, "grad_norm": 0.4596359133720398, "learning_rate": 0.00038733510892583113, "loss": 1.739, "step": 36607 }, { "epoch": 1.22, "grad_norm": 0.4800075888633728, "learning_rate": 0.0003873251094641655, "loss": 1.7796, "step": 36608 }, { "epoch": 1.22, "grad_norm": 0.47520050406455994, "learning_rate": 0.0003873151098965002, "loss": 1.8359, "step": 36609 }, { "epoch": 1.22, "grad_norm": 0.5404835343360901, "learning_rate": 0.0003873051102228471, "loss": 1.7643, "step": 36610 }, { "epoch": 1.22, "grad_norm": 0.4909024238586426, "learning_rate": 0.0003872951104432185, "loss": 1.7829, "step": 36611 }, { "epoch": 1.22, "grad_norm": 0.5071582794189453, "learning_rate": 0.0003872851105576265, "loss": 1.7871, "step": 36612 }, { "epoch": 1.22, "grad_norm": 0.5359830260276794, "learning_rate": 0.0003872751105660833, "loss": 1.7833, "step": 36613 }, { "epoch": 1.22, "grad_norm": 0.4865427315235138, "learning_rate": 0.00038726511046860093, "loss": 1.8243, "step": 36614 }, { "epoch": 1.22, "grad_norm": 0.46394118666648865, "learning_rate": 0.00038725511026519154, "loss": 1.8612, "step": 36615 }, { "epoch": 1.22, "grad_norm": 0.4605444073677063, "learning_rate": 0.0003872451099558673, "loss": 1.7988, "step": 36616 }, { "epoch": 1.22, "grad_norm": 0.5025730133056641, "learning_rate": 0.0003872351095406403, "loss": 1.7307, "step": 36617 }, { "epoch": 1.22, "grad_norm": 0.47807595133781433, "learning_rate": 0.00038722510901952284, "loss": 1.7348, "step": 36618 }, { "epoch": 1.22, "grad_norm": 0.4838380813598633, "learning_rate": 0.00038721510839252694, "loss": 1.7188, "step": 36619 }, { "epoch": 1.22, "grad_norm": 0.4743560254573822, "learning_rate": 0.00038720510765966477, "loss": 1.7209, "step": 36620 }, { "epoch": 1.22, "grad_norm": 0.5002239346504211, "learning_rate": 0.0003871951068209484, "loss": 1.7674, "step": 36621 }, { "epoch": 1.22, "grad_norm": 0.5007777214050293, "learning_rate": 0.00038718510587639, "loss": 1.7878, "step": 36622 }, { "epoch": 1.22, "grad_norm": 0.47606217861175537, "learning_rate": 0.00038717510482600174, "loss": 1.7543, "step": 36623 }, { "epoch": 1.22, "grad_norm": 0.492106556892395, "learning_rate": 0.00038716510366979575, "loss": 1.9048, "step": 36624 }, { "epoch": 1.22, "grad_norm": 0.46856608986854553, "learning_rate": 0.00038715510240778427, "loss": 1.7471, "step": 36625 }, { "epoch": 1.22, "grad_norm": 0.4679337441921234, "learning_rate": 0.0003871451010399791, "loss": 1.8188, "step": 36626 }, { "epoch": 1.22, "grad_norm": 0.46742746233940125, "learning_rate": 0.0003871350995663929, "loss": 1.7778, "step": 36627 }, { "epoch": 1.22, "grad_norm": 0.4937855303287506, "learning_rate": 0.00038712509798703726, "loss": 1.8171, "step": 36628 }, { "epoch": 1.22, "grad_norm": 0.5010865330696106, "learning_rate": 0.00038711509630192476, "loss": 1.7848, "step": 36629 }, { "epoch": 1.22, "grad_norm": 0.7108994126319885, "learning_rate": 0.0003871050945110673, "loss": 1.8562, "step": 36630 }, { "epoch": 1.22, "grad_norm": 0.4603025019168854, "learning_rate": 0.0003870950926144771, "loss": 1.7399, "step": 36631 }, { "epoch": 1.22, "grad_norm": 0.4882611036300659, "learning_rate": 0.0003870850906121664, "loss": 1.8266, "step": 36632 }, { "epoch": 1.22, "grad_norm": 0.4927724599838257, "learning_rate": 0.000387075088504147, "loss": 1.8043, "step": 36633 }, { "epoch": 1.22, "grad_norm": 0.48617425560951233, "learning_rate": 0.0003870650862904315, "loss": 1.7885, "step": 36634 }, { "epoch": 1.22, "grad_norm": 0.9080905914306641, "learning_rate": 0.00038705508397103165, "loss": 1.8228, "step": 36635 }, { "epoch": 1.22, "grad_norm": 0.466848224401474, "learning_rate": 0.00038704508154595985, "loss": 1.7256, "step": 36636 }, { "epoch": 1.22, "grad_norm": 0.4791640341281891, "learning_rate": 0.0003870350790152281, "loss": 1.7926, "step": 36637 }, { "epoch": 1.22, "grad_norm": 0.4736686944961548, "learning_rate": 0.0003870250763788486, "loss": 1.7956, "step": 36638 }, { "epoch": 1.22, "grad_norm": 0.5031086206436157, "learning_rate": 0.00038701507363683355, "loss": 1.8453, "step": 36639 }, { "epoch": 1.22, "grad_norm": 0.4695582091808319, "learning_rate": 0.000387005070789195, "loss": 1.7611, "step": 36640 }, { "epoch": 1.22, "grad_norm": 0.471150279045105, "learning_rate": 0.0003869950678359451, "loss": 1.7912, "step": 36641 }, { "epoch": 1.22, "grad_norm": 0.4728681743144989, "learning_rate": 0.0003869850647770959, "loss": 1.7872, "step": 36642 }, { "epoch": 1.22, "grad_norm": 0.49083325266838074, "learning_rate": 0.0003869750616126597, "loss": 1.736, "step": 36643 }, { "epoch": 1.22, "grad_norm": 0.5003592371940613, "learning_rate": 0.0003869650583426486, "loss": 1.8192, "step": 36644 }, { "epoch": 1.22, "grad_norm": 0.47665050625801086, "learning_rate": 0.00038695505496707486, "loss": 1.7532, "step": 36645 }, { "epoch": 1.22, "grad_norm": 0.4852820932865143, "learning_rate": 0.0003869450514859503, "loss": 1.7355, "step": 36646 }, { "epoch": 1.22, "grad_norm": 0.5011587142944336, "learning_rate": 0.00038693504789928744, "loss": 1.7961, "step": 36647 }, { "epoch": 1.22, "grad_norm": 0.48899614810943604, "learning_rate": 0.0003869250442070981, "loss": 1.8459, "step": 36648 }, { "epoch": 1.22, "grad_norm": 0.47700557112693787, "learning_rate": 0.00038691504040939457, "loss": 1.698, "step": 36649 }, { "epoch": 1.22, "grad_norm": 0.49238383769989014, "learning_rate": 0.00038690503650618907, "loss": 1.7205, "step": 36650 }, { "epoch": 1.22, "grad_norm": 0.4720422327518463, "learning_rate": 0.0003868950324974936, "loss": 1.7641, "step": 36651 }, { "epoch": 1.22, "grad_norm": 0.4750404357910156, "learning_rate": 0.0003868850283833204, "loss": 1.7724, "step": 36652 }, { "epoch": 1.22, "grad_norm": 0.4972420334815979, "learning_rate": 0.00038687502416368154, "loss": 1.8323, "step": 36653 }, { "epoch": 1.22, "grad_norm": 0.4755634367465973, "learning_rate": 0.0003868650198385892, "loss": 1.7913, "step": 36654 }, { "epoch": 1.22, "grad_norm": 0.475603312253952, "learning_rate": 0.0003868550154080556, "loss": 1.7943, "step": 36655 }, { "epoch": 1.22, "grad_norm": 0.48106229305267334, "learning_rate": 0.0003868450108720927, "loss": 1.7321, "step": 36656 }, { "epoch": 1.22, "grad_norm": 0.4857483208179474, "learning_rate": 0.00038683500623071286, "loss": 1.8348, "step": 36657 }, { "epoch": 1.22, "grad_norm": 0.4822976291179657, "learning_rate": 0.00038682500148392804, "loss": 1.7793, "step": 36658 }, { "epoch": 1.22, "grad_norm": 0.4823651909828186, "learning_rate": 0.00038681499663175046, "loss": 1.7187, "step": 36659 }, { "epoch": 1.22, "grad_norm": 0.46661707758903503, "learning_rate": 0.00038680499167419234, "loss": 1.7722, "step": 36660 }, { "epoch": 1.22, "grad_norm": 0.48693767189979553, "learning_rate": 0.0003867949866112657, "loss": 1.688, "step": 36661 }, { "epoch": 1.22, "grad_norm": 0.4830853343009949, "learning_rate": 0.00038678498144298275, "loss": 1.7502, "step": 36662 }, { "epoch": 1.22, "grad_norm": 0.48477765917778015, "learning_rate": 0.0003867749761693556, "loss": 1.7692, "step": 36663 }, { "epoch": 1.22, "grad_norm": 0.494009405374527, "learning_rate": 0.0003867649707903964, "loss": 1.8559, "step": 36664 }, { "epoch": 1.22, "grad_norm": 0.4923761487007141, "learning_rate": 0.0003867549653061173, "loss": 1.8332, "step": 36665 }, { "epoch": 1.22, "grad_norm": 0.4631361663341522, "learning_rate": 0.00038674495971653054, "loss": 1.7617, "step": 36666 }, { "epoch": 1.22, "grad_norm": 0.4762828052043915, "learning_rate": 0.0003867349540216481, "loss": 1.8514, "step": 36667 }, { "epoch": 1.22, "grad_norm": 0.4744589626789093, "learning_rate": 0.00038672494822148226, "loss": 1.7736, "step": 36668 }, { "epoch": 1.22, "grad_norm": 0.48955777287483215, "learning_rate": 0.0003867149423160451, "loss": 1.789, "step": 36669 }, { "epoch": 1.22, "grad_norm": 0.47244441509246826, "learning_rate": 0.00038670493630534877, "loss": 1.7808, "step": 36670 }, { "epoch": 1.22, "grad_norm": 0.471849262714386, "learning_rate": 0.00038669493018940535, "loss": 1.7721, "step": 36671 }, { "epoch": 1.22, "grad_norm": 0.4728459417819977, "learning_rate": 0.00038668492396822716, "loss": 1.8569, "step": 36672 }, { "epoch": 1.22, "grad_norm": 0.48891812562942505, "learning_rate": 0.0003866749176418262, "loss": 1.7564, "step": 36673 }, { "epoch": 1.22, "grad_norm": 0.47699078917503357, "learning_rate": 0.00038666491121021465, "loss": 1.6712, "step": 36674 }, { "epoch": 1.22, "grad_norm": 0.4848971366882324, "learning_rate": 0.0003866549046734047, "loss": 1.7487, "step": 36675 }, { "epoch": 1.22, "grad_norm": 0.48683300614356995, "learning_rate": 0.00038664489803140847, "loss": 1.8221, "step": 36676 }, { "epoch": 1.22, "grad_norm": 0.49450284242630005, "learning_rate": 0.00038663489128423805, "loss": 1.8249, "step": 36677 }, { "epoch": 1.22, "grad_norm": 0.4650648236274719, "learning_rate": 0.0003866248844319056, "loss": 1.7314, "step": 36678 }, { "epoch": 1.22, "grad_norm": 0.4658328592777252, "learning_rate": 0.00038661487747442344, "loss": 1.7834, "step": 36679 }, { "epoch": 1.22, "grad_norm": 0.48347437381744385, "learning_rate": 0.00038660487041180353, "loss": 1.7471, "step": 36680 }, { "epoch": 1.22, "grad_norm": 0.47569575905799866, "learning_rate": 0.000386594863244058, "loss": 1.8931, "step": 36681 }, { "epoch": 1.22, "grad_norm": 0.4811452329158783, "learning_rate": 0.0003865848559711992, "loss": 1.8096, "step": 36682 }, { "epoch": 1.22, "grad_norm": 0.4899812638759613, "learning_rate": 0.000386574848593239, "loss": 1.7567, "step": 36683 }, { "epoch": 1.22, "grad_norm": 0.47663038969039917, "learning_rate": 0.00038656484111018974, "loss": 1.7844, "step": 36684 }, { "epoch": 1.22, "grad_norm": 0.4559418261051178, "learning_rate": 0.00038655483352206354, "loss": 1.7499, "step": 36685 }, { "epoch": 1.22, "grad_norm": 0.48751673102378845, "learning_rate": 0.00038654482582887247, "loss": 1.7576, "step": 36686 }, { "epoch": 1.22, "grad_norm": 0.49117642641067505, "learning_rate": 0.0003865348180306288, "loss": 1.7951, "step": 36687 }, { "epoch": 1.22, "grad_norm": 0.47878381609916687, "learning_rate": 0.0003865248101273445, "loss": 1.7364, "step": 36688 }, { "epoch": 1.22, "grad_norm": 0.4640578627586365, "learning_rate": 0.00038651480211903197, "loss": 1.7767, "step": 36689 }, { "epoch": 1.22, "grad_norm": 0.4776172637939453, "learning_rate": 0.00038650479400570317, "loss": 1.7506, "step": 36690 }, { "epoch": 1.22, "grad_norm": 0.4890824258327484, "learning_rate": 0.0003864947857873703, "loss": 1.811, "step": 36691 }, { "epoch": 1.22, "grad_norm": 0.48540183901786804, "learning_rate": 0.0003864847774640455, "loss": 1.8377, "step": 36692 }, { "epoch": 1.22, "grad_norm": 0.4683140516281128, "learning_rate": 0.0003864747690357409, "loss": 1.7949, "step": 36693 }, { "epoch": 1.22, "grad_norm": 0.47482261061668396, "learning_rate": 0.00038646476050246866, "loss": 1.7704, "step": 36694 }, { "epoch": 1.22, "grad_norm": 0.4723995327949524, "learning_rate": 0.000386454751864241, "loss": 1.7791, "step": 36695 }, { "epoch": 1.22, "grad_norm": 0.4860706329345703, "learning_rate": 0.00038644474312106994, "loss": 1.7944, "step": 36696 }, { "epoch": 1.22, "grad_norm": 0.47398269176483154, "learning_rate": 0.00038643473427296774, "loss": 1.7394, "step": 36697 }, { "epoch": 1.22, "grad_norm": 0.45785459876060486, "learning_rate": 0.0003864247253199465, "loss": 1.8158, "step": 36698 }, { "epoch": 1.22, "grad_norm": 0.4738290011882782, "learning_rate": 0.00038641471626201836, "loss": 1.7912, "step": 36699 }, { "epoch": 1.22, "grad_norm": 0.5135546922683716, "learning_rate": 0.0003864047070991955, "loss": 1.7624, "step": 36700 }, { "epoch": 1.22, "grad_norm": 0.4916652739048004, "learning_rate": 0.0003863946978314901, "loss": 1.7614, "step": 36701 }, { "epoch": 1.22, "grad_norm": 0.46563398838043213, "learning_rate": 0.0003863846884589142, "loss": 1.8151, "step": 36702 }, { "epoch": 1.22, "grad_norm": 0.47128647565841675, "learning_rate": 0.00038637467898148013, "loss": 1.7649, "step": 36703 }, { "epoch": 1.22, "grad_norm": 0.46770527958869934, "learning_rate": 0.00038636466939919974, "loss": 1.836, "step": 36704 }, { "epoch": 1.22, "grad_norm": 0.48554742336273193, "learning_rate": 0.00038635465971208557, "loss": 1.7877, "step": 36705 }, { "epoch": 1.22, "grad_norm": 0.8914838433265686, "learning_rate": 0.0003863446499201494, "loss": 1.821, "step": 36706 }, { "epoch": 1.22, "grad_norm": 0.4997044801712036, "learning_rate": 0.00038633464002340364, "loss": 1.7858, "step": 36707 }, { "epoch": 1.22, "grad_norm": 0.4674701690673828, "learning_rate": 0.0003863246300218604, "loss": 1.8319, "step": 36708 }, { "epoch": 1.22, "grad_norm": 0.4832623302936554, "learning_rate": 0.0003863146199155316, "loss": 1.7813, "step": 36709 }, { "epoch": 1.22, "grad_norm": 0.472638338804245, "learning_rate": 0.00038630460970442976, "loss": 1.8193, "step": 36710 }, { "epoch": 1.22, "grad_norm": 0.46950069069862366, "learning_rate": 0.00038629459938856664, "loss": 1.794, "step": 36711 }, { "epoch": 1.22, "grad_norm": 0.48670485615730286, "learning_rate": 0.00038628458896795473, "loss": 1.8208, "step": 36712 }, { "epoch": 1.22, "grad_norm": 0.4768630564212799, "learning_rate": 0.00038627457844260597, "loss": 1.7372, "step": 36713 }, { "epoch": 1.22, "grad_norm": 0.4669654667377472, "learning_rate": 0.00038626456781253266, "loss": 1.7925, "step": 36714 }, { "epoch": 1.22, "grad_norm": 0.4684809148311615, "learning_rate": 0.0003862545570777469, "loss": 1.7722, "step": 36715 }, { "epoch": 1.22, "grad_norm": 0.48025041818618774, "learning_rate": 0.00038624454623826073, "loss": 1.7478, "step": 36716 }, { "epoch": 1.22, "grad_norm": 0.47968459129333496, "learning_rate": 0.0003862345352940864, "loss": 1.7717, "step": 36717 }, { "epoch": 1.22, "grad_norm": 0.5012339949607849, "learning_rate": 0.00038622452424523604, "loss": 1.8161, "step": 36718 }, { "epoch": 1.22, "grad_norm": 0.49726343154907227, "learning_rate": 0.00038621451309172195, "loss": 1.7455, "step": 36719 }, { "epoch": 1.22, "grad_norm": 0.4648633599281311, "learning_rate": 0.00038620450183355596, "loss": 1.7125, "step": 36720 }, { "epoch": 1.22, "grad_norm": 0.47024381160736084, "learning_rate": 0.0003861944904707505, "loss": 1.7696, "step": 36721 }, { "epoch": 1.22, "grad_norm": 0.4875694513320923, "learning_rate": 0.00038618447900331756, "loss": 1.7669, "step": 36722 }, { "epoch": 1.22, "grad_norm": 0.484612375497818, "learning_rate": 0.00038617446743126945, "loss": 1.7891, "step": 36723 }, { "epoch": 1.22, "grad_norm": 0.49117109179496765, "learning_rate": 0.0003861644557546182, "loss": 1.7903, "step": 36724 }, { "epoch": 1.22, "grad_norm": 0.4656103253364563, "learning_rate": 0.00038615444397337596, "loss": 1.7101, "step": 36725 }, { "epoch": 1.22, "grad_norm": 0.4710637629032135, "learning_rate": 0.00038614443208755497, "loss": 1.7757, "step": 36726 }, { "epoch": 1.22, "grad_norm": 0.48204079270362854, "learning_rate": 0.00038613442009716734, "loss": 1.7996, "step": 36727 }, { "epoch": 1.22, "grad_norm": 0.47776463627815247, "learning_rate": 0.00038612440800222516, "loss": 1.7303, "step": 36728 }, { "epoch": 1.22, "grad_norm": 0.4792838990688324, "learning_rate": 0.00038611439580274064, "loss": 1.8365, "step": 36729 }, { "epoch": 1.22, "grad_norm": 0.46987444162368774, "learning_rate": 0.00038610438349872595, "loss": 1.7597, "step": 36730 }, { "epoch": 1.22, "grad_norm": 0.4745611250400543, "learning_rate": 0.00038609437109019326, "loss": 1.7762, "step": 36731 }, { "epoch": 1.22, "grad_norm": 0.46958619356155396, "learning_rate": 0.00038608435857715465, "loss": 1.7711, "step": 36732 }, { "epoch": 1.22, "grad_norm": 0.4814032316207886, "learning_rate": 0.0003860743459596223, "loss": 1.7671, "step": 36733 }, { "epoch": 1.22, "grad_norm": 0.46497154235839844, "learning_rate": 0.0003860643332376084, "loss": 1.7527, "step": 36734 }, { "epoch": 1.22, "grad_norm": 0.4803038239479065, "learning_rate": 0.00038605432041112514, "loss": 1.7113, "step": 36735 }, { "epoch": 1.22, "grad_norm": 0.45656290650367737, "learning_rate": 0.0003860443074801845, "loss": 1.6966, "step": 36736 }, { "epoch": 1.22, "grad_norm": 0.4758927822113037, "learning_rate": 0.00038603429444479884, "loss": 1.8132, "step": 36737 }, { "epoch": 1.22, "grad_norm": 0.8885270953178406, "learning_rate": 0.00038602428130498023, "loss": 1.8523, "step": 36738 }, { "epoch": 1.22, "grad_norm": 0.4752771258354187, "learning_rate": 0.0003860142680607407, "loss": 1.8239, "step": 36739 }, { "epoch": 1.22, "grad_norm": 0.4820455014705658, "learning_rate": 0.0003860042547120926, "loss": 1.7812, "step": 36740 }, { "epoch": 1.22, "grad_norm": 0.4819072484970093, "learning_rate": 0.0003859942412590481, "loss": 1.7975, "step": 36741 }, { "epoch": 1.22, "grad_norm": 0.484200656414032, "learning_rate": 0.00038598422770161914, "loss": 1.7843, "step": 36742 }, { "epoch": 1.22, "grad_norm": 0.48732444643974304, "learning_rate": 0.0003859742140398179, "loss": 1.8023, "step": 36743 }, { "epoch": 1.22, "grad_norm": 0.4785003662109375, "learning_rate": 0.00038596420027365685, "loss": 1.8121, "step": 36744 }, { "epoch": 1.22, "grad_norm": 0.48200199007987976, "learning_rate": 0.00038595418640314784, "loss": 1.7542, "step": 36745 }, { "epoch": 1.22, "grad_norm": 0.4886036813259125, "learning_rate": 0.00038594417242830307, "loss": 1.7428, "step": 36746 }, { "epoch": 1.22, "grad_norm": 0.47465822100639343, "learning_rate": 0.0003859341583491348, "loss": 1.7884, "step": 36747 }, { "epoch": 1.22, "grad_norm": 0.46215805411338806, "learning_rate": 0.00038592414416565505, "loss": 1.7903, "step": 36748 }, { "epoch": 1.22, "grad_norm": 0.47861137986183167, "learning_rate": 0.0003859141298778762, "loss": 1.8283, "step": 36749 }, { "epoch": 1.22, "grad_norm": 0.4804108440876007, "learning_rate": 0.00038590411548581005, "loss": 1.8348, "step": 36750 }, { "epoch": 1.22, "grad_norm": 0.4826354682445526, "learning_rate": 0.00038589410098946914, "loss": 1.8129, "step": 36751 }, { "epoch": 1.22, "grad_norm": 0.46993616223335266, "learning_rate": 0.0003858840863888653, "loss": 1.7794, "step": 36752 }, { "epoch": 1.22, "grad_norm": 0.48905789852142334, "learning_rate": 0.0003858740716840109, "loss": 1.8632, "step": 36753 }, { "epoch": 1.22, "grad_norm": 0.4721687138080597, "learning_rate": 0.00038586405687491806, "loss": 1.759, "step": 36754 }, { "epoch": 1.22, "grad_norm": 0.4759815037250519, "learning_rate": 0.00038585404196159884, "loss": 1.7779, "step": 36755 }, { "epoch": 1.22, "grad_norm": 0.48208919167518616, "learning_rate": 0.00038584402694406556, "loss": 1.8691, "step": 36756 }, { "epoch": 1.22, "grad_norm": 0.46681419014930725, "learning_rate": 0.00038583401182233027, "loss": 1.78, "step": 36757 }, { "epoch": 1.22, "grad_norm": 0.4867742657661438, "learning_rate": 0.000385823996596405, "loss": 1.8775, "step": 36758 }, { "epoch": 1.22, "grad_norm": 0.4668620824813843, "learning_rate": 0.00038581398126630214, "loss": 1.8295, "step": 36759 }, { "epoch": 1.22, "grad_norm": 0.475208580493927, "learning_rate": 0.00038580396583203375, "loss": 1.805, "step": 36760 }, { "epoch": 1.22, "grad_norm": 0.4754124581813812, "learning_rate": 0.000385793950293612, "loss": 1.8318, "step": 36761 }, { "epoch": 1.22, "grad_norm": 0.46499916911125183, "learning_rate": 0.000385783934651049, "loss": 1.8044, "step": 36762 }, { "epoch": 1.22, "grad_norm": 0.4687729775905609, "learning_rate": 0.000385773918904357, "loss": 1.8456, "step": 36763 }, { "epoch": 1.22, "grad_norm": 0.48280608654022217, "learning_rate": 0.0003857639030535481, "loss": 1.8345, "step": 36764 }, { "epoch": 1.22, "grad_norm": 0.48390430212020874, "learning_rate": 0.00038575388709863437, "loss": 1.7551, "step": 36765 }, { "epoch": 1.22, "grad_norm": 0.4624166190624237, "learning_rate": 0.00038574387103962804, "loss": 1.7262, "step": 36766 }, { "epoch": 1.22, "grad_norm": 0.4730803072452545, "learning_rate": 0.00038573385487654144, "loss": 1.7681, "step": 36767 }, { "epoch": 1.22, "grad_norm": 0.4812505543231964, "learning_rate": 0.0003857238386093864, "loss": 1.7945, "step": 36768 }, { "epoch": 1.22, "grad_norm": 0.47094014286994934, "learning_rate": 0.00038571382223817537, "loss": 1.8106, "step": 36769 }, { "epoch": 1.22, "grad_norm": 0.4670685827732086, "learning_rate": 0.0003857038057629204, "loss": 1.8136, "step": 36770 }, { "epoch": 1.22, "grad_norm": 0.47509095072746277, "learning_rate": 0.00038569378918363353, "loss": 1.7617, "step": 36771 }, { "epoch": 1.22, "grad_norm": 0.4644696116447449, "learning_rate": 0.00038568377250032716, "loss": 1.7994, "step": 36772 }, { "epoch": 1.22, "grad_norm": 0.49325117468833923, "learning_rate": 0.00038567375571301317, "loss": 1.7493, "step": 36773 }, { "epoch": 1.22, "grad_norm": 0.4724368155002594, "learning_rate": 0.00038566373882170397, "loss": 1.7317, "step": 36774 }, { "epoch": 1.22, "grad_norm": 0.48372235894203186, "learning_rate": 0.00038565372182641154, "loss": 1.7385, "step": 36775 }, { "epoch": 1.22, "grad_norm": 0.4841436445713043, "learning_rate": 0.0003856437047271482, "loss": 1.834, "step": 36776 }, { "epoch": 1.22, "grad_norm": 0.47253695130348206, "learning_rate": 0.00038563368752392595, "loss": 1.806, "step": 36777 }, { "epoch": 1.22, "grad_norm": 0.4621308445930481, "learning_rate": 0.000385623670216757, "loss": 1.7517, "step": 36778 }, { "epoch": 1.22, "grad_norm": 0.4621586799621582, "learning_rate": 0.0003856136528056536, "loss": 1.7452, "step": 36779 }, { "epoch": 1.22, "grad_norm": 0.4772276282310486, "learning_rate": 0.00038560363529062776, "loss": 1.7927, "step": 36780 }, { "epoch": 1.22, "grad_norm": 0.4930382966995239, "learning_rate": 0.0003855936176716919, "loss": 1.7739, "step": 36781 }, { "epoch": 1.22, "grad_norm": 0.4630034565925598, "learning_rate": 0.0003855835999488578, "loss": 1.778, "step": 36782 }, { "epoch": 1.22, "grad_norm": 0.4765710234642029, "learning_rate": 0.00038557358212213793, "loss": 1.7349, "step": 36783 }, { "epoch": 1.22, "grad_norm": 0.4673929810523987, "learning_rate": 0.00038556356419154424, "loss": 1.7336, "step": 36784 }, { "epoch": 1.22, "grad_norm": 0.4723147451877594, "learning_rate": 0.000385553546157089, "loss": 1.8116, "step": 36785 }, { "epoch": 1.22, "grad_norm": 0.49086058139801025, "learning_rate": 0.00038554352801878453, "loss": 1.7566, "step": 36786 }, { "epoch": 1.22, "grad_norm": 0.46751537919044495, "learning_rate": 0.00038553350977664274, "loss": 1.8681, "step": 36787 }, { "epoch": 1.22, "grad_norm": 0.46835562586784363, "learning_rate": 0.0003855234914306758, "loss": 1.8128, "step": 36788 }, { "epoch": 1.22, "grad_norm": 0.4634219706058502, "learning_rate": 0.00038551347298089597, "loss": 1.7981, "step": 36789 }, { "epoch": 1.22, "grad_norm": 0.48339396715164185, "learning_rate": 0.0003855034544273153, "loss": 1.7329, "step": 36790 }, { "epoch": 1.22, "grad_norm": 0.4755232036113739, "learning_rate": 0.00038549343576994616, "loss": 1.7403, "step": 36791 }, { "epoch": 1.22, "grad_norm": 0.47198039293289185, "learning_rate": 0.0003854834170088006, "loss": 1.7105, "step": 36792 }, { "epoch": 1.22, "grad_norm": 0.48635581135749817, "learning_rate": 0.00038547339814389074, "loss": 1.8137, "step": 36793 }, { "epoch": 1.22, "grad_norm": 0.4689241051673889, "learning_rate": 0.0003854633791752287, "loss": 1.764, "step": 36794 }, { "epoch": 1.22, "grad_norm": 0.4641192555427551, "learning_rate": 0.0003854533601028268, "loss": 1.7626, "step": 36795 }, { "epoch": 1.22, "grad_norm": 0.4629383385181427, "learning_rate": 0.00038544334092669705, "loss": 1.8272, "step": 36796 }, { "epoch": 1.22, "grad_norm": 0.48312175273895264, "learning_rate": 0.0003854333216468517, "loss": 1.8622, "step": 36797 }, { "epoch": 1.22, "grad_norm": 0.4909820854663849, "learning_rate": 0.00038542330226330285, "loss": 1.7622, "step": 36798 }, { "epoch": 1.22, "grad_norm": 1.367199182510376, "learning_rate": 0.0003854132827760628, "loss": 1.7739, "step": 36799 }, { "epoch": 1.22, "grad_norm": 0.4723798632621765, "learning_rate": 0.00038540326318514353, "loss": 1.8142, "step": 36800 }, { "epoch": 1.22, "grad_norm": 0.46246930956840515, "learning_rate": 0.00038539324349055724, "loss": 1.7894, "step": 36801 }, { "epoch": 1.22, "grad_norm": 0.49503153562545776, "learning_rate": 0.0003853832236923162, "loss": 1.7907, "step": 36802 }, { "epoch": 1.22, "grad_norm": 0.4881434440612793, "learning_rate": 0.00038537320379043244, "loss": 1.6555, "step": 36803 }, { "epoch": 1.22, "grad_norm": 0.4829031527042389, "learning_rate": 0.00038536318378491833, "loss": 1.8548, "step": 36804 }, { "epoch": 1.22, "grad_norm": 0.461196631193161, "learning_rate": 0.0003853531636757857, "loss": 1.7828, "step": 36805 }, { "epoch": 1.22, "grad_norm": 0.4884340167045593, "learning_rate": 0.00038534314346304707, "loss": 1.8139, "step": 36806 }, { "epoch": 1.22, "grad_norm": 0.47309789061546326, "learning_rate": 0.0003853331231467144, "loss": 1.7446, "step": 36807 }, { "epoch": 1.22, "grad_norm": 0.4673391580581665, "learning_rate": 0.00038532310272679986, "loss": 1.72, "step": 36808 }, { "epoch": 1.22, "grad_norm": 0.4578304588794708, "learning_rate": 0.0003853130822033156, "loss": 1.7518, "step": 36809 }, { "epoch": 1.22, "grad_norm": 0.4773447513580322, "learning_rate": 0.00038530306157627385, "loss": 1.7897, "step": 36810 }, { "epoch": 1.22, "grad_norm": 0.4761461019515991, "learning_rate": 0.0003852930408456869, "loss": 1.7952, "step": 36811 }, { "epoch": 1.22, "grad_norm": 0.4747297465801239, "learning_rate": 0.0003852830200115666, "loss": 1.7281, "step": 36812 }, { "epoch": 1.22, "grad_norm": 0.48505890369415283, "learning_rate": 0.0003852729990739253, "loss": 1.814, "step": 36813 }, { "epoch": 1.22, "grad_norm": 0.483565092086792, "learning_rate": 0.0003852629780327752, "loss": 1.8368, "step": 36814 }, { "epoch": 1.22, "grad_norm": 0.4712298810482025, "learning_rate": 0.0003852529568881284, "loss": 1.7504, "step": 36815 }, { "epoch": 1.22, "grad_norm": 0.4787410497665405, "learning_rate": 0.000385242935639997, "loss": 1.6636, "step": 36816 }, { "epoch": 1.22, "grad_norm": 0.4730214774608612, "learning_rate": 0.00038523291428839337, "loss": 1.7905, "step": 36817 }, { "epoch": 1.22, "grad_norm": 0.5019404292106628, "learning_rate": 0.00038522289283332946, "loss": 1.8215, "step": 36818 }, { "epoch": 1.22, "grad_norm": 0.467668354511261, "learning_rate": 0.0003852128712748175, "loss": 1.8145, "step": 36819 }, { "epoch": 1.23, "grad_norm": 0.4776850640773773, "learning_rate": 0.00038520284961286966, "loss": 1.746, "step": 36820 }, { "epoch": 1.23, "grad_norm": 0.47381794452667236, "learning_rate": 0.00038519282784749815, "loss": 1.8525, "step": 36821 }, { "epoch": 1.23, "grad_norm": 0.4745005667209625, "learning_rate": 0.0003851828059787151, "loss": 1.7127, "step": 36822 }, { "epoch": 1.23, "grad_norm": 0.4828721880912781, "learning_rate": 0.00038517278400653263, "loss": 1.7527, "step": 36823 }, { "epoch": 1.23, "grad_norm": 0.46784868836402893, "learning_rate": 0.0003851627619309631, "loss": 1.7522, "step": 36824 }, { "epoch": 1.23, "grad_norm": 0.4861144721508026, "learning_rate": 0.0003851527397520183, "loss": 1.7626, "step": 36825 }, { "epoch": 1.23, "grad_norm": 0.4665350615978241, "learning_rate": 0.0003851427174697107, "loss": 1.9051, "step": 36826 }, { "epoch": 1.23, "grad_norm": 0.48399439454078674, "learning_rate": 0.0003851326950840525, "loss": 1.7228, "step": 36827 }, { "epoch": 1.23, "grad_norm": 0.47150373458862305, "learning_rate": 0.00038512267259505555, "loss": 1.7711, "step": 36828 }, { "epoch": 1.23, "grad_norm": 0.47381868958473206, "learning_rate": 0.0003851126500027324, "loss": 1.6678, "step": 36829 }, { "epoch": 1.23, "grad_norm": 0.46380093693733215, "learning_rate": 0.00038510262730709495, "loss": 1.7756, "step": 36830 }, { "epoch": 1.23, "grad_norm": 0.46317848563194275, "learning_rate": 0.00038509260450815547, "loss": 1.742, "step": 36831 }, { "epoch": 1.23, "grad_norm": 0.47883668541908264, "learning_rate": 0.00038508258160592615, "loss": 1.7512, "step": 36832 }, { "epoch": 1.23, "grad_norm": 1.0252611637115479, "learning_rate": 0.000385072558600419, "loss": 1.8109, "step": 36833 }, { "epoch": 1.23, "grad_norm": 0.47525879740715027, "learning_rate": 0.00038506253549164645, "loss": 1.7673, "step": 36834 }, { "epoch": 1.23, "grad_norm": 0.47678741812705994, "learning_rate": 0.00038505251227962035, "loss": 1.8037, "step": 36835 }, { "epoch": 1.23, "grad_norm": 0.4733259677886963, "learning_rate": 0.00038504248896435316, "loss": 1.8074, "step": 36836 }, { "epoch": 1.23, "grad_norm": 0.48470139503479004, "learning_rate": 0.0003850324655458568, "loss": 1.8039, "step": 36837 }, { "epoch": 1.23, "grad_norm": 0.4670794904232025, "learning_rate": 0.0003850224420241437, "loss": 1.7449, "step": 36838 }, { "epoch": 1.23, "grad_norm": 0.4910925328731537, "learning_rate": 0.0003850124183992258, "loss": 1.7837, "step": 36839 }, { "epoch": 1.23, "grad_norm": 0.49123868346214294, "learning_rate": 0.00038500239467111535, "loss": 1.8305, "step": 36840 }, { "epoch": 1.23, "grad_norm": 0.472752183675766, "learning_rate": 0.0003849923708398246, "loss": 1.803, "step": 36841 }, { "epoch": 1.23, "grad_norm": 0.4647241532802582, "learning_rate": 0.00038498234690536546, "loss": 1.7318, "step": 36842 }, { "epoch": 1.23, "grad_norm": 0.4739188253879547, "learning_rate": 0.0003849723228677504, "loss": 1.7743, "step": 36843 }, { "epoch": 1.23, "grad_norm": 0.4763079583644867, "learning_rate": 0.0003849622987269914, "loss": 1.8652, "step": 36844 }, { "epoch": 1.23, "grad_norm": 0.48357415199279785, "learning_rate": 0.00038495227448310073, "loss": 1.7916, "step": 36845 }, { "epoch": 1.23, "grad_norm": 0.48866569995880127, "learning_rate": 0.00038494225013609054, "loss": 1.7674, "step": 36846 }, { "epoch": 1.23, "grad_norm": 0.49965086579322815, "learning_rate": 0.00038493222568597294, "loss": 1.7382, "step": 36847 }, { "epoch": 1.23, "grad_norm": 0.4987363815307617, "learning_rate": 0.00038492220113276013, "loss": 1.8546, "step": 36848 }, { "epoch": 1.23, "grad_norm": 0.4854655861854553, "learning_rate": 0.00038491217647646425, "loss": 1.7765, "step": 36849 }, { "epoch": 1.23, "grad_norm": 0.47975078225135803, "learning_rate": 0.00038490215171709755, "loss": 1.7647, "step": 36850 }, { "epoch": 1.23, "grad_norm": 0.48743417859077454, "learning_rate": 0.0003848921268546721, "loss": 1.8201, "step": 36851 }, { "epoch": 1.23, "grad_norm": 0.48508352041244507, "learning_rate": 0.00038488210188920017, "loss": 1.8587, "step": 36852 }, { "epoch": 1.23, "grad_norm": 0.46138057112693787, "learning_rate": 0.0003848720768206938, "loss": 1.7881, "step": 36853 }, { "epoch": 1.23, "grad_norm": 0.4889489710330963, "learning_rate": 0.00038486205164916544, "loss": 1.7661, "step": 36854 }, { "epoch": 1.23, "grad_norm": 0.48032641410827637, "learning_rate": 0.00038485202637462686, "loss": 1.8495, "step": 36855 }, { "epoch": 1.23, "grad_norm": 0.4754238426685333, "learning_rate": 0.0003848420009970904, "loss": 1.8421, "step": 36856 }, { "epoch": 1.23, "grad_norm": 0.47339844703674316, "learning_rate": 0.0003848319755165683, "loss": 1.7408, "step": 36857 }, { "epoch": 1.23, "grad_norm": 0.46297743916511536, "learning_rate": 0.00038482194993307267, "loss": 1.7972, "step": 36858 }, { "epoch": 1.23, "grad_norm": 0.4894818067550659, "learning_rate": 0.0003848119242466158, "loss": 1.7853, "step": 36859 }, { "epoch": 1.23, "grad_norm": 0.463553786277771, "learning_rate": 0.00038480189845720966, "loss": 1.801, "step": 36860 }, { "epoch": 1.23, "grad_norm": 0.48061099648475647, "learning_rate": 0.00038479187256486655, "loss": 1.7715, "step": 36861 }, { "epoch": 1.23, "grad_norm": 0.46329203248023987, "learning_rate": 0.0003847818465695985, "loss": 1.7381, "step": 36862 }, { "epoch": 1.23, "grad_norm": 0.48719504475593567, "learning_rate": 0.0003847718204714179, "loss": 1.7161, "step": 36863 }, { "epoch": 1.23, "grad_norm": 0.5068890452384949, "learning_rate": 0.00038476179427033674, "loss": 1.8086, "step": 36864 }, { "epoch": 1.23, "grad_norm": 0.46449634432792664, "learning_rate": 0.0003847517679663672, "loss": 1.7458, "step": 36865 }, { "epoch": 1.23, "grad_norm": 0.4700174331665039, "learning_rate": 0.0003847417415595216, "loss": 1.7529, "step": 36866 }, { "epoch": 1.23, "grad_norm": 0.4747980833053589, "learning_rate": 0.0003847317150498119, "loss": 1.7874, "step": 36867 }, { "epoch": 1.23, "grad_norm": 0.5139020085334778, "learning_rate": 0.0003847216884372505, "loss": 1.8258, "step": 36868 }, { "epoch": 1.23, "grad_norm": 0.506594181060791, "learning_rate": 0.00038471166172184944, "loss": 1.7803, "step": 36869 }, { "epoch": 1.23, "grad_norm": 0.48229750990867615, "learning_rate": 0.0003847016349036208, "loss": 1.7874, "step": 36870 }, { "epoch": 1.23, "grad_norm": 0.498451828956604, "learning_rate": 0.000384691607982577, "loss": 1.8396, "step": 36871 }, { "epoch": 1.23, "grad_norm": 0.48985010385513306, "learning_rate": 0.00038468158095872996, "loss": 1.8317, "step": 36872 }, { "epoch": 1.23, "grad_norm": 0.47368568181991577, "learning_rate": 0.000384671553832092, "loss": 1.7716, "step": 36873 }, { "epoch": 1.23, "grad_norm": 0.4653571546077728, "learning_rate": 0.0003846615266026753, "loss": 1.8097, "step": 36874 }, { "epoch": 1.23, "grad_norm": 0.4655075967311859, "learning_rate": 0.00038465149927049193, "loss": 1.7533, "step": 36875 }, { "epoch": 1.23, "grad_norm": 0.47423192858695984, "learning_rate": 0.00038464147183555405, "loss": 1.7554, "step": 36876 }, { "epoch": 1.23, "grad_norm": 0.4819226562976837, "learning_rate": 0.00038463144429787396, "loss": 1.77, "step": 36877 }, { "epoch": 1.23, "grad_norm": 0.46161070466041565, "learning_rate": 0.0003846214166574637, "loss": 1.7925, "step": 36878 }, { "epoch": 1.23, "grad_norm": 0.45874884724617004, "learning_rate": 0.00038461138891433567, "loss": 1.6914, "step": 36879 }, { "epoch": 1.23, "grad_norm": 0.5035624504089355, "learning_rate": 0.00038460136106850174, "loss": 1.7923, "step": 36880 }, { "epoch": 1.23, "grad_norm": 0.47646498680114746, "learning_rate": 0.00038459133311997425, "loss": 1.7507, "step": 36881 }, { "epoch": 1.23, "grad_norm": 0.47574880719184875, "learning_rate": 0.0003845813050687653, "loss": 1.7874, "step": 36882 }, { "epoch": 1.23, "grad_norm": 0.5000976920127869, "learning_rate": 0.0003845712769148872, "loss": 1.7984, "step": 36883 }, { "epoch": 1.23, "grad_norm": 0.4898884892463684, "learning_rate": 0.00038456124865835205, "loss": 1.8881, "step": 36884 }, { "epoch": 1.23, "grad_norm": 0.4875231981277466, "learning_rate": 0.0003845512202991719, "loss": 1.8127, "step": 36885 }, { "epoch": 1.23, "grad_norm": 0.4739595651626587, "learning_rate": 0.00038454119183735906, "loss": 1.7687, "step": 36886 }, { "epoch": 1.23, "grad_norm": 0.46142464876174927, "learning_rate": 0.0003845311632729257, "loss": 1.8236, "step": 36887 }, { "epoch": 1.23, "grad_norm": 0.4787411093711853, "learning_rate": 0.0003845211346058839, "loss": 1.8025, "step": 36888 }, { "epoch": 1.23, "grad_norm": 0.4689956307411194, "learning_rate": 0.00038451110583624605, "loss": 1.7597, "step": 36889 }, { "epoch": 1.23, "grad_norm": 0.4880993962287903, "learning_rate": 0.00038450107696402395, "loss": 1.7632, "step": 36890 }, { "epoch": 1.23, "grad_norm": 0.47317615151405334, "learning_rate": 0.0003844910479892302, "loss": 1.7768, "step": 36891 }, { "epoch": 1.23, "grad_norm": 0.4781091809272766, "learning_rate": 0.0003844810189118766, "loss": 1.7819, "step": 36892 }, { "epoch": 1.23, "grad_norm": 0.4733588397502899, "learning_rate": 0.00038447098973197556, "loss": 1.7616, "step": 36893 }, { "epoch": 1.23, "grad_norm": 0.48153403401374817, "learning_rate": 0.0003844609604495392, "loss": 1.8224, "step": 36894 }, { "epoch": 1.23, "grad_norm": 0.4882161617279053, "learning_rate": 0.00038445093106457965, "loss": 1.8471, "step": 36895 }, { "epoch": 1.23, "grad_norm": 0.4654606580734253, "learning_rate": 0.00038444090157710925, "loss": 1.7612, "step": 36896 }, { "epoch": 1.23, "grad_norm": 0.4546964764595032, "learning_rate": 0.0003844308719871398, "loss": 1.7389, "step": 36897 }, { "epoch": 1.23, "grad_norm": 0.48764553666114807, "learning_rate": 0.0003844208422946839, "loss": 1.7468, "step": 36898 }, { "epoch": 1.23, "grad_norm": 0.49624764919281006, "learning_rate": 0.0003844108124997534, "loss": 1.873, "step": 36899 }, { "epoch": 1.23, "grad_norm": 0.4773808419704437, "learning_rate": 0.00038440078260236065, "loss": 1.8295, "step": 36900 }, { "epoch": 1.23, "grad_norm": 0.4639822840690613, "learning_rate": 0.0003843907526025178, "loss": 1.7726, "step": 36901 }, { "epoch": 1.23, "grad_norm": 0.4913346767425537, "learning_rate": 0.000384380722500237, "loss": 1.8386, "step": 36902 }, { "epoch": 1.23, "grad_norm": 0.4860056936740875, "learning_rate": 0.00038437069229553056, "loss": 1.7421, "step": 36903 }, { "epoch": 1.23, "grad_norm": 0.48536416888237, "learning_rate": 0.0003843606619884104, "loss": 1.7987, "step": 36904 }, { "epoch": 1.23, "grad_norm": 0.4802369177341461, "learning_rate": 0.00038435063157888884, "loss": 1.7844, "step": 36905 }, { "epoch": 1.23, "grad_norm": 0.4973755180835724, "learning_rate": 0.00038434060106697803, "loss": 1.7206, "step": 36906 }, { "epoch": 1.23, "grad_norm": 0.4809466302394867, "learning_rate": 0.00038433057045269024, "loss": 1.8225, "step": 36907 }, { "epoch": 1.23, "grad_norm": 0.47887223958969116, "learning_rate": 0.00038432053973603745, "loss": 1.7951, "step": 36908 }, { "epoch": 1.23, "grad_norm": 0.49731478095054626, "learning_rate": 0.00038431050891703206, "loss": 1.8081, "step": 36909 }, { "epoch": 1.23, "grad_norm": 0.47283944487571716, "learning_rate": 0.0003843004779956861, "loss": 1.7216, "step": 36910 }, { "epoch": 1.23, "grad_norm": 0.4691945016384125, "learning_rate": 0.0003842904469720117, "loss": 1.7302, "step": 36911 }, { "epoch": 1.23, "grad_norm": 0.4988262355327606, "learning_rate": 0.0003842804158460212, "loss": 1.8475, "step": 36912 }, { "epoch": 1.23, "grad_norm": 0.47054746747016907, "learning_rate": 0.0003842703846177267, "loss": 1.7886, "step": 36913 }, { "epoch": 1.23, "grad_norm": 0.5237204432487488, "learning_rate": 0.00038426035328714036, "loss": 1.8047, "step": 36914 }, { "epoch": 1.23, "grad_norm": 0.4643058776855469, "learning_rate": 0.00038425032185427423, "loss": 1.7749, "step": 36915 }, { "epoch": 1.23, "grad_norm": 0.4871807396411896, "learning_rate": 0.0003842402903191409, "loss": 1.7417, "step": 36916 }, { "epoch": 1.23, "grad_norm": 0.489025354385376, "learning_rate": 0.000384230258681752, "loss": 1.8313, "step": 36917 }, { "epoch": 1.23, "grad_norm": 0.481441468000412, "learning_rate": 0.0003842202269421201, "loss": 1.6996, "step": 36918 }, { "epoch": 1.23, "grad_norm": 0.4795384407043457, "learning_rate": 0.00038421019510025725, "loss": 1.7872, "step": 36919 }, { "epoch": 1.23, "grad_norm": 0.47853273153305054, "learning_rate": 0.0003842001631561756, "loss": 1.7673, "step": 36920 }, { "epoch": 1.23, "grad_norm": 0.4626000225543976, "learning_rate": 0.00038419013110988745, "loss": 1.7277, "step": 36921 }, { "epoch": 1.23, "grad_norm": 0.46768102049827576, "learning_rate": 0.0003841800989614047, "loss": 1.7677, "step": 36922 }, { "epoch": 1.23, "grad_norm": 0.48778119683265686, "learning_rate": 0.0003841700667107399, "loss": 1.776, "step": 36923 }, { "epoch": 1.23, "grad_norm": 0.47922205924987793, "learning_rate": 0.00038416003435790496, "loss": 1.7476, "step": 36924 }, { "epoch": 1.23, "grad_norm": 0.4897511899471283, "learning_rate": 0.00038415000190291217, "loss": 1.7263, "step": 36925 }, { "epoch": 1.23, "grad_norm": 0.46159830689430237, "learning_rate": 0.0003841399693457736, "loss": 1.7075, "step": 36926 }, { "epoch": 1.23, "grad_norm": 0.48855188488960266, "learning_rate": 0.00038412993668650156, "loss": 1.8481, "step": 36927 }, { "epoch": 1.23, "grad_norm": 0.46034565567970276, "learning_rate": 0.00038411990392510825, "loss": 1.8017, "step": 36928 }, { "epoch": 1.23, "grad_norm": 0.47997844219207764, "learning_rate": 0.00038410987106160565, "loss": 1.7992, "step": 36929 }, { "epoch": 1.23, "grad_norm": 0.5012515783309937, "learning_rate": 0.0003840998380960061, "loss": 1.8194, "step": 36930 }, { "epoch": 1.23, "grad_norm": 0.46983978152275085, "learning_rate": 0.0003840898050283218, "loss": 1.7716, "step": 36931 }, { "epoch": 1.23, "grad_norm": 0.4832637906074524, "learning_rate": 0.00038407977185856475, "loss": 1.751, "step": 36932 }, { "epoch": 1.23, "grad_norm": 0.4925951361656189, "learning_rate": 0.00038406973858674733, "loss": 1.7997, "step": 36933 }, { "epoch": 1.23, "grad_norm": 0.4706319272518158, "learning_rate": 0.0003840597052128816, "loss": 1.7869, "step": 36934 }, { "epoch": 1.23, "grad_norm": 0.47841188311576843, "learning_rate": 0.0003840496717369798, "loss": 1.7638, "step": 36935 }, { "epoch": 1.23, "grad_norm": 0.46499690413475037, "learning_rate": 0.00038403963815905406, "loss": 1.799, "step": 36936 }, { "epoch": 1.23, "grad_norm": 0.49726319313049316, "learning_rate": 0.0003840296044791166, "loss": 1.8273, "step": 36937 }, { "epoch": 1.23, "grad_norm": 0.4805827736854553, "learning_rate": 0.00038401957069717955, "loss": 1.712, "step": 36938 }, { "epoch": 1.23, "grad_norm": 0.45964452624320984, "learning_rate": 0.00038400953681325513, "loss": 1.7551, "step": 36939 }, { "epoch": 1.23, "grad_norm": 0.4698611795902252, "learning_rate": 0.00038399950282735543, "loss": 1.7138, "step": 36940 }, { "epoch": 1.23, "grad_norm": 0.48902827501296997, "learning_rate": 0.0003839894687394929, "loss": 1.764, "step": 36941 }, { "epoch": 1.23, "grad_norm": 0.4824392795562744, "learning_rate": 0.0003839794345496794, "loss": 1.817, "step": 36942 }, { "epoch": 1.23, "grad_norm": 0.4581151008605957, "learning_rate": 0.0003839694002579273, "loss": 1.7261, "step": 36943 }, { "epoch": 1.23, "grad_norm": 0.4834609031677246, "learning_rate": 0.0003839593658642487, "loss": 1.7716, "step": 36944 }, { "epoch": 1.23, "grad_norm": 0.48956775665283203, "learning_rate": 0.0003839493313686557, "loss": 1.8071, "step": 36945 }, { "epoch": 1.23, "grad_norm": 0.49057331681251526, "learning_rate": 0.00038393929677116073, "loss": 1.8364, "step": 36946 }, { "epoch": 1.23, "grad_norm": 0.48912328481674194, "learning_rate": 0.0003839292620717757, "loss": 1.8038, "step": 36947 }, { "epoch": 1.23, "grad_norm": 0.4902956783771515, "learning_rate": 0.00038391922727051305, "loss": 1.7648, "step": 36948 }, { "epoch": 1.23, "grad_norm": 0.46386441588401794, "learning_rate": 0.00038390919236738476, "loss": 1.7303, "step": 36949 }, { "epoch": 1.23, "grad_norm": 0.48387402296066284, "learning_rate": 0.000383899157362403, "loss": 1.7385, "step": 36950 }, { "epoch": 1.23, "grad_norm": 0.4655323028564453, "learning_rate": 0.00038388912225558017, "loss": 1.7717, "step": 36951 }, { "epoch": 1.23, "grad_norm": 0.4901031255722046, "learning_rate": 0.0003838790870469281, "loss": 1.8135, "step": 36952 }, { "epoch": 1.23, "grad_norm": 0.48779261112213135, "learning_rate": 0.00038386905173645936, "loss": 1.7098, "step": 36953 }, { "epoch": 1.23, "grad_norm": 0.46969032287597656, "learning_rate": 0.00038385901632418594, "loss": 1.7945, "step": 36954 }, { "epoch": 1.23, "grad_norm": 0.47848761081695557, "learning_rate": 0.0003838489808101199, "loss": 1.799, "step": 36955 }, { "epoch": 1.23, "grad_norm": 0.4972068965435028, "learning_rate": 0.0003838389451942737, "loss": 1.7856, "step": 36956 }, { "epoch": 1.23, "grad_norm": 0.47678253054618835, "learning_rate": 0.0003838289094766593, "loss": 1.8408, "step": 36957 }, { "epoch": 1.23, "grad_norm": 0.4749543070793152, "learning_rate": 0.00038381887365728895, "loss": 1.7443, "step": 36958 }, { "epoch": 1.23, "grad_norm": 0.44989392161369324, "learning_rate": 0.00038380883773617483, "loss": 1.7544, "step": 36959 }, { "epoch": 1.23, "grad_norm": 0.4900914132595062, "learning_rate": 0.00038379880171332916, "loss": 1.8562, "step": 36960 }, { "epoch": 1.23, "grad_norm": 0.4743844270706177, "learning_rate": 0.0003837887655887641, "loss": 1.7891, "step": 36961 }, { "epoch": 1.23, "grad_norm": 0.48939427733421326, "learning_rate": 0.0003837787293624918, "loss": 1.8418, "step": 36962 }, { "epoch": 1.23, "grad_norm": 0.4826015830039978, "learning_rate": 0.0003837686930345244, "loss": 1.7543, "step": 36963 }, { "epoch": 1.23, "grad_norm": 0.46702101826667786, "learning_rate": 0.00038375865660487425, "loss": 1.8032, "step": 36964 }, { "epoch": 1.23, "grad_norm": 0.5316938161849976, "learning_rate": 0.00038374862007355343, "loss": 1.7311, "step": 36965 }, { "epoch": 1.23, "grad_norm": 0.48005712032318115, "learning_rate": 0.0003837385834405741, "loss": 1.789, "step": 36966 }, { "epoch": 1.23, "grad_norm": 0.4909428060054779, "learning_rate": 0.0003837285467059484, "loss": 1.798, "step": 36967 }, { "epoch": 1.23, "grad_norm": 0.5177136659622192, "learning_rate": 0.00038371850986968864, "loss": 1.8158, "step": 36968 }, { "epoch": 1.23, "grad_norm": 0.4728879928588867, "learning_rate": 0.00038370847293180696, "loss": 1.801, "step": 36969 }, { "epoch": 1.23, "grad_norm": 0.447692334651947, "learning_rate": 0.0003836984358923155, "loss": 1.7926, "step": 36970 }, { "epoch": 1.23, "grad_norm": 0.4829000234603882, "learning_rate": 0.00038368839875122657, "loss": 1.7819, "step": 36971 }, { "epoch": 1.23, "grad_norm": 0.46117904782295227, "learning_rate": 0.00038367836150855213, "loss": 1.7151, "step": 36972 }, { "epoch": 1.23, "grad_norm": 0.47689899802207947, "learning_rate": 0.0003836683241643046, "loss": 1.6996, "step": 36973 }, { "epoch": 1.23, "grad_norm": 0.49401798844337463, "learning_rate": 0.00038365828671849594, "loss": 1.8355, "step": 36974 }, { "epoch": 1.23, "grad_norm": 0.46750691533088684, "learning_rate": 0.0003836482491711385, "loss": 1.7698, "step": 36975 }, { "epoch": 1.23, "grad_norm": 0.4827107787132263, "learning_rate": 0.00038363821152224444, "loss": 1.7664, "step": 36976 }, { "epoch": 1.23, "grad_norm": 0.4945812523365021, "learning_rate": 0.0003836281737718258, "loss": 1.7927, "step": 36977 }, { "epoch": 1.23, "grad_norm": 0.47833052277565, "learning_rate": 0.00038361813591989503, "loss": 1.7623, "step": 36978 }, { "epoch": 1.23, "grad_norm": 0.48183903098106384, "learning_rate": 0.00038360809796646406, "loss": 1.7874, "step": 36979 }, { "epoch": 1.23, "grad_norm": 0.5022428631782532, "learning_rate": 0.0003835980599115452, "loss": 1.7592, "step": 36980 }, { "epoch": 1.23, "grad_norm": 0.4768795967102051, "learning_rate": 0.0003835880217551506, "loss": 1.7626, "step": 36981 }, { "epoch": 1.23, "grad_norm": 0.4589843451976776, "learning_rate": 0.00038357798349729247, "loss": 1.7795, "step": 36982 }, { "epoch": 1.23, "grad_norm": 0.47207698225975037, "learning_rate": 0.0003835679451379831, "loss": 1.7567, "step": 36983 }, { "epoch": 1.23, "grad_norm": 0.8641735911369324, "learning_rate": 0.0003835579066772343, "loss": 1.6981, "step": 36984 }, { "epoch": 1.23, "grad_norm": 0.4685831367969513, "learning_rate": 0.00038354786811505877, "loss": 1.716, "step": 36985 }, { "epoch": 1.23, "grad_norm": 0.4890788197517395, "learning_rate": 0.0003835378294514683, "loss": 1.7977, "step": 36986 }, { "epoch": 1.23, "grad_norm": 0.5068024396896362, "learning_rate": 0.0003835277906864752, "loss": 1.7994, "step": 36987 }, { "epoch": 1.23, "grad_norm": 0.4937804937362671, "learning_rate": 0.00038351775182009175, "loss": 1.8027, "step": 36988 }, { "epoch": 1.23, "grad_norm": 0.48096075654029846, "learning_rate": 0.00038350771285232995, "loss": 1.795, "step": 36989 }, { "epoch": 1.23, "grad_norm": 0.5034148097038269, "learning_rate": 0.00038349767378320225, "loss": 1.8219, "step": 36990 }, { "epoch": 1.23, "grad_norm": 0.4882623553276062, "learning_rate": 0.00038348763461272056, "loss": 1.7285, "step": 36991 }, { "epoch": 1.23, "grad_norm": 0.4751928448677063, "learning_rate": 0.0003834775953408972, "loss": 1.8466, "step": 36992 }, { "epoch": 1.23, "grad_norm": 0.48358240723609924, "learning_rate": 0.0003834675559677444, "loss": 1.7725, "step": 36993 }, { "epoch": 1.23, "grad_norm": 0.48617973923683167, "learning_rate": 0.0003834575164932742, "loss": 1.754, "step": 36994 }, { "epoch": 1.23, "grad_norm": 0.4618985950946808, "learning_rate": 0.00038344747691749885, "loss": 1.7956, "step": 36995 }, { "epoch": 1.23, "grad_norm": 0.4891951382160187, "learning_rate": 0.0003834374372404307, "loss": 1.7853, "step": 36996 }, { "epoch": 1.23, "grad_norm": 0.4944458305835724, "learning_rate": 0.0003834273974620817, "loss": 1.8039, "step": 36997 }, { "epoch": 1.23, "grad_norm": 0.46465563774108887, "learning_rate": 0.0003834173575824641, "loss": 1.8096, "step": 36998 }, { "epoch": 1.23, "grad_norm": 0.5497350096702576, "learning_rate": 0.0003834073176015902, "loss": 1.7503, "step": 36999 }, { "epoch": 1.23, "grad_norm": 0.4805420935153961, "learning_rate": 0.00038339727751947203, "loss": 1.7783, "step": 37000 }, { "epoch": 1.23, "grad_norm": 0.47282952070236206, "learning_rate": 0.0003833872373361219, "loss": 1.8194, "step": 37001 }, { "epoch": 1.23, "grad_norm": 0.47010934352874756, "learning_rate": 0.0003833771970515519, "loss": 1.7851, "step": 37002 }, { "epoch": 1.23, "grad_norm": 0.49885526299476624, "learning_rate": 0.0003833671566657744, "loss": 1.8009, "step": 37003 }, { "epoch": 1.23, "grad_norm": 0.46629026532173157, "learning_rate": 0.00038335711617880136, "loss": 1.7903, "step": 37004 }, { "epoch": 1.23, "grad_norm": 0.4715423285961151, "learning_rate": 0.000383347075590645, "loss": 1.784, "step": 37005 }, { "epoch": 1.23, "grad_norm": 0.4853011965751648, "learning_rate": 0.00038333703490131765, "loss": 1.7521, "step": 37006 }, { "epoch": 1.23, "grad_norm": 0.48042556643486023, "learning_rate": 0.0003833269941108314, "loss": 1.828, "step": 37007 }, { "epoch": 1.23, "grad_norm": 0.47425276041030884, "learning_rate": 0.0003833169532191985, "loss": 1.812, "step": 37008 }, { "epoch": 1.23, "grad_norm": 0.49271532893180847, "learning_rate": 0.00038330691222643093, "loss": 1.7791, "step": 37009 }, { "epoch": 1.23, "grad_norm": 0.4748360812664032, "learning_rate": 0.00038329687113254125, "loss": 1.7999, "step": 37010 }, { "epoch": 1.23, "grad_norm": 0.4565659761428833, "learning_rate": 0.00038328682993754137, "loss": 1.73, "step": 37011 }, { "epoch": 1.23, "grad_norm": 0.4794889986515045, "learning_rate": 0.0003832767886414435, "loss": 1.8038, "step": 37012 }, { "epoch": 1.23, "grad_norm": 0.4774637520313263, "learning_rate": 0.00038326674724425996, "loss": 1.8078, "step": 37013 }, { "epoch": 1.23, "grad_norm": 0.46030324697494507, "learning_rate": 0.0003832567057460028, "loss": 1.7822, "step": 37014 }, { "epoch": 1.23, "grad_norm": 0.4751095771789551, "learning_rate": 0.0003832466641466843, "loss": 1.8151, "step": 37015 }, { "epoch": 1.23, "grad_norm": 0.4765843451023102, "learning_rate": 0.00038323662244631656, "loss": 1.7276, "step": 37016 }, { "epoch": 1.23, "grad_norm": 0.45808956027030945, "learning_rate": 0.0003832265806449119, "loss": 1.7689, "step": 37017 }, { "epoch": 1.23, "grad_norm": 0.47695276141166687, "learning_rate": 0.0003832165387424824, "loss": 1.7224, "step": 37018 }, { "epoch": 1.23, "grad_norm": 0.4764261245727539, "learning_rate": 0.0003832064967390403, "loss": 1.8284, "step": 37019 }, { "epoch": 1.23, "grad_norm": 0.47338828444480896, "learning_rate": 0.0003831964546345978, "loss": 1.7519, "step": 37020 }, { "epoch": 1.23, "grad_norm": 0.46041497588157654, "learning_rate": 0.000383186412429167, "loss": 1.7753, "step": 37021 }, { "epoch": 1.23, "grad_norm": 0.4697457253932953, "learning_rate": 0.0003831763701227601, "loss": 1.7193, "step": 37022 }, { "epoch": 1.23, "grad_norm": 0.46080127358436584, "learning_rate": 0.0003831663277153895, "loss": 1.7733, "step": 37023 }, { "epoch": 1.23, "grad_norm": 0.492468923330307, "learning_rate": 0.00038315628520706706, "loss": 1.8076, "step": 37024 }, { "epoch": 1.23, "grad_norm": 0.47894522547721863, "learning_rate": 0.00038314624259780524, "loss": 1.8833, "step": 37025 }, { "epoch": 1.23, "grad_norm": 0.4812183380126953, "learning_rate": 0.0003831361998876162, "loss": 1.7324, "step": 37026 }, { "epoch": 1.23, "grad_norm": 0.4738031029701233, "learning_rate": 0.00038312615707651197, "loss": 1.754, "step": 37027 }, { "epoch": 1.23, "grad_norm": 0.4853992760181427, "learning_rate": 0.0003831161141645048, "loss": 1.8245, "step": 37028 }, { "epoch": 1.23, "grad_norm": 0.48029717803001404, "learning_rate": 0.00038310607115160693, "loss": 1.7464, "step": 37029 }, { "epoch": 1.23, "grad_norm": 1.6659281253814697, "learning_rate": 0.00038309602803783055, "loss": 1.8633, "step": 37030 }, { "epoch": 1.23, "grad_norm": 0.4884704053401947, "learning_rate": 0.0003830859848231879, "loss": 1.819, "step": 37031 }, { "epoch": 1.23, "grad_norm": 0.4718247056007385, "learning_rate": 0.000383075941507691, "loss": 1.8132, "step": 37032 }, { "epoch": 1.23, "grad_norm": 0.48825782537460327, "learning_rate": 0.00038306589809135224, "loss": 1.7506, "step": 37033 }, { "epoch": 1.23, "grad_norm": 0.4874145984649658, "learning_rate": 0.00038305585457418365, "loss": 1.8195, "step": 37034 }, { "epoch": 1.23, "grad_norm": 0.5108994841575623, "learning_rate": 0.0003830458109561976, "loss": 1.7817, "step": 37035 }, { "epoch": 1.23, "grad_norm": 0.4772624969482422, "learning_rate": 0.00038303576723740603, "loss": 1.7543, "step": 37036 }, { "epoch": 1.23, "grad_norm": 0.45294734835624695, "learning_rate": 0.00038302572341782136, "loss": 1.6581, "step": 37037 }, { "epoch": 1.23, "grad_norm": 0.4760266840457916, "learning_rate": 0.00038301567949745576, "loss": 1.804, "step": 37038 }, { "epoch": 1.23, "grad_norm": 0.4793682396411896, "learning_rate": 0.00038300563547632115, "loss": 1.7653, "step": 37039 }, { "epoch": 1.23, "grad_norm": 0.46312031149864197, "learning_rate": 0.00038299559135443007, "loss": 1.7257, "step": 37040 }, { "epoch": 1.23, "grad_norm": 0.4755145311355591, "learning_rate": 0.0003829855471317946, "loss": 1.7146, "step": 37041 }, { "epoch": 1.23, "grad_norm": 0.5614292025566101, "learning_rate": 0.0003829755028084268, "loss": 1.7554, "step": 37042 }, { "epoch": 1.23, "grad_norm": 0.46073079109191895, "learning_rate": 0.00038296545838433903, "loss": 1.7123, "step": 37043 }, { "epoch": 1.23, "grad_norm": 0.47949039936065674, "learning_rate": 0.0003829554138595434, "loss": 1.7099, "step": 37044 }, { "epoch": 1.23, "grad_norm": 0.5249292850494385, "learning_rate": 0.0003829453692340522, "loss": 1.8202, "step": 37045 }, { "epoch": 1.23, "grad_norm": 0.4692818820476532, "learning_rate": 0.00038293532450787743, "loss": 1.7817, "step": 37046 }, { "epoch": 1.23, "grad_norm": 0.4796035587787628, "learning_rate": 0.0003829252796810315, "loss": 1.7412, "step": 37047 }, { "epoch": 1.23, "grad_norm": 0.4873266816139221, "learning_rate": 0.00038291523475352635, "loss": 1.8126, "step": 37048 }, { "epoch": 1.23, "grad_norm": 0.4656800329685211, "learning_rate": 0.0003829051897253744, "loss": 1.7449, "step": 37049 }, { "epoch": 1.23, "grad_norm": 0.4794633984565735, "learning_rate": 0.00038289514459658775, "loss": 1.792, "step": 37050 }, { "epoch": 1.23, "grad_norm": 0.4772645831108093, "learning_rate": 0.0003828850993671787, "loss": 1.7089, "step": 37051 }, { "epoch": 1.23, "grad_norm": 0.46377629041671753, "learning_rate": 0.00038287505403715926, "loss": 1.7865, "step": 37052 }, { "epoch": 1.23, "grad_norm": 0.5040525794029236, "learning_rate": 0.0003828650086065417, "loss": 1.7533, "step": 37053 }, { "epoch": 1.23, "grad_norm": 0.4767279028892517, "learning_rate": 0.00038285496307533825, "loss": 1.7893, "step": 37054 }, { "epoch": 1.23, "grad_norm": 0.4846251606941223, "learning_rate": 0.00038284491744356114, "loss": 1.8089, "step": 37055 }, { "epoch": 1.23, "grad_norm": 0.5084910988807678, "learning_rate": 0.00038283487171122255, "loss": 1.8443, "step": 37056 }, { "epoch": 1.23, "grad_norm": 0.6758038401603699, "learning_rate": 0.00038282482587833445, "loss": 1.764, "step": 37057 }, { "epoch": 1.23, "grad_norm": 0.49295076727867126, "learning_rate": 0.0003828147799449093, "loss": 1.7992, "step": 37058 }, { "epoch": 1.23, "grad_norm": 0.47820624709129333, "learning_rate": 0.0003828047339109593, "loss": 1.7672, "step": 37059 }, { "epoch": 1.23, "grad_norm": 0.4712475538253784, "learning_rate": 0.00038279468777649645, "loss": 1.8108, "step": 37060 }, { "epoch": 1.23, "grad_norm": 0.4867222309112549, "learning_rate": 0.0003827846415415331, "loss": 1.7454, "step": 37061 }, { "epoch": 1.23, "grad_norm": 0.487962007522583, "learning_rate": 0.0003827745952060813, "loss": 1.7459, "step": 37062 }, { "epoch": 1.23, "grad_norm": 0.47599101066589355, "learning_rate": 0.00038276454877015346, "loss": 1.7615, "step": 37063 }, { "epoch": 1.23, "grad_norm": 0.4681079089641571, "learning_rate": 0.00038275450223376154, "loss": 1.7773, "step": 37064 }, { "epoch": 1.23, "grad_norm": 0.48786529898643494, "learning_rate": 0.0003827444555969179, "loss": 1.8061, "step": 37065 }, { "epoch": 1.23, "grad_norm": 0.48738908767700195, "learning_rate": 0.0003827344088596347, "loss": 1.7641, "step": 37066 }, { "epoch": 1.23, "grad_norm": 0.4747706651687622, "learning_rate": 0.00038272436202192413, "loss": 1.7841, "step": 37067 }, { "epoch": 1.23, "grad_norm": 0.47163504362106323, "learning_rate": 0.0003827143150837984, "loss": 1.7368, "step": 37068 }, { "epoch": 1.23, "grad_norm": 0.4582459330558777, "learning_rate": 0.0003827042680452695, "loss": 1.7613, "step": 37069 }, { "epoch": 1.23, "grad_norm": 0.49246153235435486, "learning_rate": 0.00038269422090635, "loss": 1.7881, "step": 37070 }, { "epoch": 1.23, "grad_norm": 0.48362740874290466, "learning_rate": 0.0003826841736670518, "loss": 1.8101, "step": 37071 }, { "epoch": 1.23, "grad_norm": 0.48299598693847656, "learning_rate": 0.00038267412632738725, "loss": 1.8528, "step": 37072 }, { "epoch": 1.23, "grad_norm": 0.5071910619735718, "learning_rate": 0.00038266407888736843, "loss": 1.7733, "step": 37073 }, { "epoch": 1.23, "grad_norm": 0.49174970388412476, "learning_rate": 0.0003826540313470077, "loss": 1.8107, "step": 37074 }, { "epoch": 1.23, "grad_norm": 0.4641996920108795, "learning_rate": 0.0003826439837063171, "loss": 1.7004, "step": 37075 }, { "epoch": 1.23, "grad_norm": 0.46355584263801575, "learning_rate": 0.0003826339359653088, "loss": 1.7982, "step": 37076 }, { "epoch": 1.23, "grad_norm": 0.4806539714336395, "learning_rate": 0.0003826238881239952, "loss": 1.7891, "step": 37077 }, { "epoch": 1.23, "grad_norm": 0.49756279587745667, "learning_rate": 0.00038261384018238836, "loss": 1.6511, "step": 37078 }, { "epoch": 1.23, "grad_norm": 0.4908873438835144, "learning_rate": 0.0003826037921405004, "loss": 1.8042, "step": 37079 }, { "epoch": 1.23, "grad_norm": 0.6965705752372742, "learning_rate": 0.0003825937439983437, "loss": 1.7824, "step": 37080 }, { "epoch": 1.23, "grad_norm": 0.49046361446380615, "learning_rate": 0.0003825836957559303, "loss": 1.7145, "step": 37081 }, { "epoch": 1.23, "grad_norm": 0.48193877935409546, "learning_rate": 0.00038257364741327255, "loss": 1.7065, "step": 37082 }, { "epoch": 1.23, "grad_norm": 0.4956226348876953, "learning_rate": 0.00038256359897038245, "loss": 1.691, "step": 37083 }, { "epoch": 1.23, "grad_norm": 0.48364609479904175, "learning_rate": 0.00038255355042727233, "loss": 1.8074, "step": 37084 }, { "epoch": 1.23, "grad_norm": 0.47727179527282715, "learning_rate": 0.0003825435017839544, "loss": 1.7515, "step": 37085 }, { "epoch": 1.23, "grad_norm": 0.4792715907096863, "learning_rate": 0.0003825334530404408, "loss": 1.7507, "step": 37086 }, { "epoch": 1.23, "grad_norm": 0.5685492753982544, "learning_rate": 0.00038252340419674374, "loss": 1.892, "step": 37087 }, { "epoch": 1.23, "grad_norm": 0.5034623146057129, "learning_rate": 0.0003825133552528755, "loss": 1.7848, "step": 37088 }, { "epoch": 1.23, "grad_norm": 0.5295751690864563, "learning_rate": 0.0003825033062088481, "loss": 1.8159, "step": 37089 }, { "epoch": 1.23, "grad_norm": 0.4865666925907135, "learning_rate": 0.00038249325706467394, "loss": 1.7627, "step": 37090 }, { "epoch": 1.23, "grad_norm": 0.4939952790737152, "learning_rate": 0.000382483207820365, "loss": 1.7004, "step": 37091 }, { "epoch": 1.23, "grad_norm": 0.5109577775001526, "learning_rate": 0.00038247315847593375, "loss": 1.7796, "step": 37092 }, { "epoch": 1.23, "grad_norm": 0.47355785965919495, "learning_rate": 0.00038246310903139214, "loss": 1.7945, "step": 37093 }, { "epoch": 1.23, "grad_norm": 0.46518784761428833, "learning_rate": 0.00038245305948675243, "loss": 1.7721, "step": 37094 }, { "epoch": 1.23, "grad_norm": 0.47021743655204773, "learning_rate": 0.000382443009842027, "loss": 1.7756, "step": 37095 }, { "epoch": 1.23, "grad_norm": 0.4883265495300293, "learning_rate": 0.0003824329600972277, "loss": 1.7395, "step": 37096 }, { "epoch": 1.23, "grad_norm": 0.4811306595802307, "learning_rate": 0.0003824229102523671, "loss": 1.738, "step": 37097 }, { "epoch": 1.23, "grad_norm": 0.47134366631507874, "learning_rate": 0.00038241286030745716, "loss": 1.7949, "step": 37098 }, { "epoch": 1.23, "grad_norm": 0.4785296618938446, "learning_rate": 0.0003824028102625102, "loss": 1.7921, "step": 37099 }, { "epoch": 1.23, "grad_norm": 0.4859369695186615, "learning_rate": 0.00038239276011753836, "loss": 1.8191, "step": 37100 }, { "epoch": 1.23, "grad_norm": 0.5002183318138123, "learning_rate": 0.00038238270987255376, "loss": 1.7817, "step": 37101 }, { "epoch": 1.23, "grad_norm": 0.4843173921108246, "learning_rate": 0.0003823726595275688, "loss": 1.8098, "step": 37102 }, { "epoch": 1.23, "grad_norm": 0.49967458844184875, "learning_rate": 0.0003823626090825955, "loss": 1.7498, "step": 37103 }, { "epoch": 1.23, "grad_norm": 0.5055394172668457, "learning_rate": 0.0003823525585376462, "loss": 1.8013, "step": 37104 }, { "epoch": 1.23, "grad_norm": 0.47314345836639404, "learning_rate": 0.0003823425078927329, "loss": 1.7771, "step": 37105 }, { "epoch": 1.23, "grad_norm": 0.48190486431121826, "learning_rate": 0.000382332457147868, "loss": 1.715, "step": 37106 }, { "epoch": 1.23, "grad_norm": 0.47232818603515625, "learning_rate": 0.0003823224063030637, "loss": 1.7735, "step": 37107 }, { "epoch": 1.23, "grad_norm": 0.48203274607658386, "learning_rate": 0.00038231235535833206, "loss": 1.8287, "step": 37108 }, { "epoch": 1.23, "grad_norm": 0.5013493299484253, "learning_rate": 0.0003823023043136853, "loss": 1.8702, "step": 37109 }, { "epoch": 1.23, "grad_norm": 0.4987296164035797, "learning_rate": 0.0003822922531691358, "loss": 1.7988, "step": 37110 }, { "epoch": 1.23, "grad_norm": 0.502488374710083, "learning_rate": 0.00038228220192469545, "loss": 1.7503, "step": 37111 }, { "epoch": 1.23, "grad_norm": 0.47718819975852966, "learning_rate": 0.00038227215058037674, "loss": 1.846, "step": 37112 }, { "epoch": 1.23, "grad_norm": 0.5088362097740173, "learning_rate": 0.00038226209913619184, "loss": 1.7807, "step": 37113 }, { "epoch": 1.23, "grad_norm": 0.47446176409721375, "learning_rate": 0.00038225204759215274, "loss": 1.7758, "step": 37114 }, { "epoch": 1.23, "grad_norm": 0.48065513372421265, "learning_rate": 0.00038224199594827184, "loss": 1.85, "step": 37115 }, { "epoch": 1.23, "grad_norm": 0.4790579378604889, "learning_rate": 0.00038223194420456124, "loss": 1.7511, "step": 37116 }, { "epoch": 1.23, "grad_norm": 0.47674933075904846, "learning_rate": 0.0003822218923610332, "loss": 1.7937, "step": 37117 }, { "epoch": 1.23, "grad_norm": 0.47415927052497864, "learning_rate": 0.00038221184041769987, "loss": 1.7807, "step": 37118 }, { "epoch": 1.23, "grad_norm": 0.4997807741165161, "learning_rate": 0.00038220178837457345, "loss": 1.8502, "step": 37119 }, { "epoch": 1.23, "grad_norm": 0.4934476315975189, "learning_rate": 0.0003821917362316663, "loss": 1.7806, "step": 37120 }, { "epoch": 1.24, "grad_norm": 0.5104144215583801, "learning_rate": 0.0003821816839889904, "loss": 1.7512, "step": 37121 }, { "epoch": 1.24, "grad_norm": 0.5011703968048096, "learning_rate": 0.000382171631646558, "loss": 1.773, "step": 37122 }, { "epoch": 1.24, "grad_norm": 0.49657151103019714, "learning_rate": 0.0003821615792043815, "loss": 1.7287, "step": 37123 }, { "epoch": 1.24, "grad_norm": 0.47031867504119873, "learning_rate": 0.00038215152666247276, "loss": 1.7281, "step": 37124 }, { "epoch": 1.24, "grad_norm": 0.4845312535762787, "learning_rate": 0.0003821414740208443, "loss": 1.7658, "step": 37125 }, { "epoch": 1.24, "grad_norm": 0.5407505631446838, "learning_rate": 0.00038213142127950804, "loss": 1.8147, "step": 37126 }, { "epoch": 1.24, "grad_norm": 0.501137375831604, "learning_rate": 0.00038212136843847653, "loss": 1.6742, "step": 37127 }, { "epoch": 1.24, "grad_norm": 0.5102688670158386, "learning_rate": 0.0003821113154977617, "loss": 1.8087, "step": 37128 }, { "epoch": 1.24, "grad_norm": 0.48612967133522034, "learning_rate": 0.0003821012624573758, "loss": 1.8854, "step": 37129 }, { "epoch": 1.24, "grad_norm": 0.48694804310798645, "learning_rate": 0.0003820912093173312, "loss": 1.7106, "step": 37130 }, { "epoch": 1.24, "grad_norm": 0.502152681350708, "learning_rate": 0.00038208115607763973, "loss": 1.8246, "step": 37131 }, { "epoch": 1.24, "grad_norm": 0.4988967776298523, "learning_rate": 0.00038207110273831405, "loss": 1.7406, "step": 37132 }, { "epoch": 1.24, "grad_norm": 0.48155301809310913, "learning_rate": 0.0003820610492993661, "loss": 1.7201, "step": 37133 }, { "epoch": 1.24, "grad_norm": 0.4834894835948944, "learning_rate": 0.000382050995760808, "loss": 1.8075, "step": 37134 }, { "epoch": 1.24, "grad_norm": 0.47921615839004517, "learning_rate": 0.00038204094212265213, "loss": 1.7537, "step": 37135 }, { "epoch": 1.24, "grad_norm": 0.4958481788635254, "learning_rate": 0.0003820308883849107, "loss": 1.7222, "step": 37136 }, { "epoch": 1.24, "grad_norm": 0.4839223325252533, "learning_rate": 0.0003820208345475959, "loss": 1.7258, "step": 37137 }, { "epoch": 1.24, "grad_norm": 0.4951421022415161, "learning_rate": 0.00038201078061071985, "loss": 1.7524, "step": 37138 }, { "epoch": 1.24, "grad_norm": 0.47761252522468567, "learning_rate": 0.0003820007265742947, "loss": 1.8075, "step": 37139 }, { "epoch": 1.24, "grad_norm": 0.5221141576766968, "learning_rate": 0.0003819906724383329, "loss": 1.7468, "step": 37140 }, { "epoch": 1.24, "grad_norm": 0.491098552942276, "learning_rate": 0.00038198061820284636, "loss": 1.6809, "step": 37141 }, { "epoch": 1.24, "grad_norm": 0.4796455502510071, "learning_rate": 0.0003819705638678475, "loss": 1.746, "step": 37142 }, { "epoch": 1.24, "grad_norm": 0.48691704869270325, "learning_rate": 0.0003819605094333485, "loss": 1.7795, "step": 37143 }, { "epoch": 1.24, "grad_norm": 0.4742603003978729, "learning_rate": 0.00038195045489936144, "loss": 1.7201, "step": 37144 }, { "epoch": 1.24, "grad_norm": 0.49340134859085083, "learning_rate": 0.0003819404002658987, "loss": 1.6703, "step": 37145 }, { "epoch": 1.24, "grad_norm": 0.5157901048660278, "learning_rate": 0.0003819303455329723, "loss": 1.8673, "step": 37146 }, { "epoch": 1.24, "grad_norm": 0.47283822298049927, "learning_rate": 0.0003819202907005945, "loss": 1.8601, "step": 37147 }, { "epoch": 1.24, "grad_norm": 0.4820632040500641, "learning_rate": 0.00038191023576877766, "loss": 1.752, "step": 37148 }, { "epoch": 1.24, "grad_norm": 0.5029701590538025, "learning_rate": 0.00038190018073753373, "loss": 1.796, "step": 37149 }, { "epoch": 1.24, "grad_norm": 0.46632683277130127, "learning_rate": 0.0003818901256068751, "loss": 1.7969, "step": 37150 }, { "epoch": 1.24, "grad_norm": 0.4817531108856201, "learning_rate": 0.000381880070376814, "loss": 1.7829, "step": 37151 }, { "epoch": 1.24, "grad_norm": 0.49288663268089294, "learning_rate": 0.0003818700150473624, "loss": 1.8138, "step": 37152 }, { "epoch": 1.24, "grad_norm": 0.48904603719711304, "learning_rate": 0.00038185995961853277, "loss": 1.8411, "step": 37153 }, { "epoch": 1.24, "grad_norm": 0.48815828561782837, "learning_rate": 0.0003818499040903372, "loss": 1.8082, "step": 37154 }, { "epoch": 1.24, "grad_norm": 0.5048857927322388, "learning_rate": 0.00038183984846278796, "loss": 1.7971, "step": 37155 }, { "epoch": 1.24, "grad_norm": 0.48199352622032166, "learning_rate": 0.0003818297927358971, "loss": 1.8075, "step": 37156 }, { "epoch": 1.24, "grad_norm": 0.46490442752838135, "learning_rate": 0.00038181973690967704, "loss": 1.8327, "step": 37157 }, { "epoch": 1.24, "grad_norm": 0.46493464708328247, "learning_rate": 0.00038180968098413973, "loss": 1.7914, "step": 37158 }, { "epoch": 1.24, "grad_norm": 0.4962453246116638, "learning_rate": 0.00038179962495929763, "loss": 1.8626, "step": 37159 }, { "epoch": 1.24, "grad_norm": 0.47142699360847473, "learning_rate": 0.0003817895688351628, "loss": 1.7582, "step": 37160 }, { "epoch": 1.24, "grad_norm": 0.4746653437614441, "learning_rate": 0.0003817795126117475, "loss": 1.7931, "step": 37161 }, { "epoch": 1.24, "grad_norm": 0.48777851462364197, "learning_rate": 0.00038176945628906394, "loss": 1.7994, "step": 37162 }, { "epoch": 1.24, "grad_norm": 0.47823625802993774, "learning_rate": 0.0003817593998671243, "loss": 1.8902, "step": 37163 }, { "epoch": 1.24, "grad_norm": 0.47936055064201355, "learning_rate": 0.0003817493433459408, "loss": 1.8008, "step": 37164 }, { "epoch": 1.24, "grad_norm": 0.46426457166671753, "learning_rate": 0.0003817392867255256, "loss": 1.745, "step": 37165 }, { "epoch": 1.24, "grad_norm": 0.4796641767024994, "learning_rate": 0.00038172923000589094, "loss": 1.8887, "step": 37166 }, { "epoch": 1.24, "grad_norm": 0.4929310083389282, "learning_rate": 0.00038171917318704905, "loss": 1.7814, "step": 37167 }, { "epoch": 1.24, "grad_norm": 0.4939548671245575, "learning_rate": 0.00038170911626901225, "loss": 1.7493, "step": 37168 }, { "epoch": 1.24, "grad_norm": 0.46797528862953186, "learning_rate": 0.00038169905925179243, "loss": 1.8058, "step": 37169 }, { "epoch": 1.24, "grad_norm": 0.48332929611206055, "learning_rate": 0.0003816890021354021, "loss": 1.8374, "step": 37170 }, { "epoch": 1.24, "grad_norm": 0.48609939217567444, "learning_rate": 0.0003816789449198533, "loss": 1.782, "step": 37171 }, { "epoch": 1.24, "grad_norm": 0.476664274930954, "learning_rate": 0.0003816688876051583, "loss": 1.7957, "step": 37172 }, { "epoch": 1.24, "grad_norm": 0.474279522895813, "learning_rate": 0.0003816588301913294, "loss": 1.7542, "step": 37173 }, { "epoch": 1.24, "grad_norm": 0.469534307718277, "learning_rate": 0.0003816487726783786, "loss": 1.7885, "step": 37174 }, { "epoch": 1.24, "grad_norm": 0.4798506498336792, "learning_rate": 0.0003816387150663183, "loss": 1.7167, "step": 37175 }, { "epoch": 1.24, "grad_norm": 0.4711049497127533, "learning_rate": 0.0003816286573551606, "loss": 1.7528, "step": 37176 }, { "epoch": 1.24, "grad_norm": 0.4714280962944031, "learning_rate": 0.00038161859954491774, "loss": 1.7147, "step": 37177 }, { "epoch": 1.24, "grad_norm": 0.4877997636795044, "learning_rate": 0.00038160854163560195, "loss": 1.7888, "step": 37178 }, { "epoch": 1.24, "grad_norm": 0.5057596564292908, "learning_rate": 0.0003815984836272253, "loss": 1.7422, "step": 37179 }, { "epoch": 1.24, "grad_norm": 0.46058332920074463, "learning_rate": 0.0003815884255198003, "loss": 1.752, "step": 37180 }, { "epoch": 1.24, "grad_norm": 0.4765926003456116, "learning_rate": 0.00038157836731333875, "loss": 1.7691, "step": 37181 }, { "epoch": 1.24, "grad_norm": 0.4784330725669861, "learning_rate": 0.00038156830900785326, "loss": 1.7686, "step": 37182 }, { "epoch": 1.24, "grad_norm": 0.48670628666877747, "learning_rate": 0.00038155825060335577, "loss": 1.7208, "step": 37183 }, { "epoch": 1.24, "grad_norm": 0.47928085923194885, "learning_rate": 0.00038154819209985864, "loss": 1.8673, "step": 37184 }, { "epoch": 1.24, "grad_norm": 0.4808885157108307, "learning_rate": 0.00038153813349737397, "loss": 1.8, "step": 37185 }, { "epoch": 1.24, "grad_norm": 0.5584072470664978, "learning_rate": 0.00038152807479591405, "loss": 1.7082, "step": 37186 }, { "epoch": 1.24, "grad_norm": 0.4894629418849945, "learning_rate": 0.0003815180159954911, "loss": 1.7691, "step": 37187 }, { "epoch": 1.24, "grad_norm": 0.47817349433898926, "learning_rate": 0.0003815079570961172, "loss": 1.8018, "step": 37188 }, { "epoch": 1.24, "grad_norm": 0.4760420620441437, "learning_rate": 0.0003814978980978047, "loss": 1.7983, "step": 37189 }, { "epoch": 1.24, "grad_norm": 0.47749581933021545, "learning_rate": 0.0003814878390005657, "loss": 1.7189, "step": 37190 }, { "epoch": 1.24, "grad_norm": 0.48541340231895447, "learning_rate": 0.0003814777798044125, "loss": 1.7915, "step": 37191 }, { "epoch": 1.24, "grad_norm": 0.47962409257888794, "learning_rate": 0.0003814677205093574, "loss": 1.7086, "step": 37192 }, { "epoch": 1.24, "grad_norm": 0.48108014464378357, "learning_rate": 0.0003814576611154124, "loss": 1.8396, "step": 37193 }, { "epoch": 1.24, "grad_norm": 0.7731775045394897, "learning_rate": 0.0003814476016225898, "loss": 1.8513, "step": 37194 }, { "epoch": 1.24, "grad_norm": 0.474536657333374, "learning_rate": 0.0003814375420309018, "loss": 1.8109, "step": 37195 }, { "epoch": 1.24, "grad_norm": 0.4886499345302582, "learning_rate": 0.00038142748234036057, "loss": 1.7856, "step": 37196 }, { "epoch": 1.24, "grad_norm": 0.48361265659332275, "learning_rate": 0.00038141742255097847, "loss": 1.7249, "step": 37197 }, { "epoch": 1.24, "grad_norm": 0.4741297960281372, "learning_rate": 0.00038140736266276755, "loss": 1.6798, "step": 37198 }, { "epoch": 1.24, "grad_norm": 0.48408520221710205, "learning_rate": 0.00038139730267574016, "loss": 1.7389, "step": 37199 }, { "epoch": 1.24, "grad_norm": 0.47820812463760376, "learning_rate": 0.0003813872425899084, "loss": 1.7874, "step": 37200 }, { "epoch": 1.24, "grad_norm": 0.46683070063591003, "learning_rate": 0.0003813771824052845, "loss": 1.8092, "step": 37201 }, { "epoch": 1.24, "grad_norm": 0.47218576073646545, "learning_rate": 0.0003813671221218807, "loss": 1.7685, "step": 37202 }, { "epoch": 1.24, "grad_norm": 0.47100427746772766, "learning_rate": 0.0003813570617397092, "loss": 1.7294, "step": 37203 }, { "epoch": 1.24, "grad_norm": 0.48807573318481445, "learning_rate": 0.0003813470012587822, "loss": 1.7945, "step": 37204 }, { "epoch": 1.24, "grad_norm": 0.4622713327407837, "learning_rate": 0.000381336940679112, "loss": 1.7944, "step": 37205 }, { "epoch": 1.24, "grad_norm": 0.49308836460113525, "learning_rate": 0.0003813268800007107, "loss": 1.8829, "step": 37206 }, { "epoch": 1.24, "grad_norm": 0.4711662232875824, "learning_rate": 0.0003813168192235905, "loss": 1.7482, "step": 37207 }, { "epoch": 1.24, "grad_norm": 0.4824388325214386, "learning_rate": 0.0003813067583477637, "loss": 1.7649, "step": 37208 }, { "epoch": 1.24, "grad_norm": 0.47732987999916077, "learning_rate": 0.0003812966973732425, "loss": 1.716, "step": 37209 }, { "epoch": 1.24, "grad_norm": 0.4737716019153595, "learning_rate": 0.00038128663630003913, "loss": 1.7687, "step": 37210 }, { "epoch": 1.24, "grad_norm": 0.4944096505641937, "learning_rate": 0.0003812765751281655, "loss": 1.7768, "step": 37211 }, { "epoch": 1.24, "grad_norm": 0.4866148829460144, "learning_rate": 0.0003812665138576344, "loss": 1.7366, "step": 37212 }, { "epoch": 1.24, "grad_norm": 0.48538610339164734, "learning_rate": 0.00038125645248845763, "loss": 1.7714, "step": 37213 }, { "epoch": 1.24, "grad_norm": 0.49436816573143005, "learning_rate": 0.00038124639102064746, "loss": 1.8425, "step": 37214 }, { "epoch": 1.24, "grad_norm": 0.4852001667022705, "learning_rate": 0.0003812363294542161, "loss": 1.8296, "step": 37215 }, { "epoch": 1.24, "grad_norm": 0.4968646466732025, "learning_rate": 0.0003812262677891759, "loss": 1.8216, "step": 37216 }, { "epoch": 1.24, "grad_norm": 0.4697042405605316, "learning_rate": 0.00038121620602553895, "loss": 1.7508, "step": 37217 }, { "epoch": 1.24, "grad_norm": 0.5089330673217773, "learning_rate": 0.0003812061441633174, "loss": 1.6569, "step": 37218 }, { "epoch": 1.24, "grad_norm": 0.4842333197593689, "learning_rate": 0.0003811960822025237, "loss": 1.7614, "step": 37219 }, { "epoch": 1.24, "grad_norm": 0.4826246201992035, "learning_rate": 0.00038118602014316984, "loss": 1.861, "step": 37220 }, { "epoch": 1.24, "grad_norm": 0.4825899004936218, "learning_rate": 0.0003811759579852682, "loss": 1.7622, "step": 37221 }, { "epoch": 1.24, "grad_norm": 0.4921037256717682, "learning_rate": 0.0003811658957288308, "loss": 1.7521, "step": 37222 }, { "epoch": 1.24, "grad_norm": 0.4797704815864563, "learning_rate": 0.00038115583337386996, "loss": 1.7718, "step": 37223 }, { "epoch": 1.24, "grad_norm": 0.4867151379585266, "learning_rate": 0.00038114577092039805, "loss": 1.7768, "step": 37224 }, { "epoch": 1.24, "grad_norm": 0.48508119583129883, "learning_rate": 0.00038113570836842696, "loss": 1.7881, "step": 37225 }, { "epoch": 1.24, "grad_norm": 0.4794830083847046, "learning_rate": 0.0003811256457179692, "loss": 1.72, "step": 37226 }, { "epoch": 1.24, "grad_norm": 0.4780576825141907, "learning_rate": 0.0003811155829690367, "loss": 1.7781, "step": 37227 }, { "epoch": 1.24, "grad_norm": 0.4909313917160034, "learning_rate": 0.00038110552012164195, "loss": 1.7744, "step": 37228 }, { "epoch": 1.24, "grad_norm": 0.49271267652511597, "learning_rate": 0.000381095457175797, "loss": 1.8146, "step": 37229 }, { "epoch": 1.24, "grad_norm": 0.49947190284729004, "learning_rate": 0.00038108539413151424, "loss": 1.7784, "step": 37230 }, { "epoch": 1.24, "grad_norm": 0.46299341320991516, "learning_rate": 0.0003810753309888056, "loss": 1.83, "step": 37231 }, { "epoch": 1.24, "grad_norm": 0.49705636501312256, "learning_rate": 0.0003810652677476835, "loss": 1.6871, "step": 37232 }, { "epoch": 1.24, "grad_norm": 0.46788540482521057, "learning_rate": 0.00038105520440816015, "loss": 1.7004, "step": 37233 }, { "epoch": 1.24, "grad_norm": 0.46893686056137085, "learning_rate": 0.0003810451409702476, "loss": 1.8415, "step": 37234 }, { "epoch": 1.24, "grad_norm": 0.5058104395866394, "learning_rate": 0.0003810350774339584, "loss": 1.8038, "step": 37235 }, { "epoch": 1.24, "grad_norm": 0.4696379005908966, "learning_rate": 0.00038102501379930437, "loss": 1.7832, "step": 37236 }, { "epoch": 1.24, "grad_norm": 0.48586559295654297, "learning_rate": 0.00038101495006629803, "loss": 1.6823, "step": 37237 }, { "epoch": 1.24, "grad_norm": 0.47582197189331055, "learning_rate": 0.0003810048862349514, "loss": 1.7084, "step": 37238 }, { "epoch": 1.24, "grad_norm": 0.4982951879501343, "learning_rate": 0.00038099482230527683, "loss": 1.8389, "step": 37239 }, { "epoch": 1.24, "grad_norm": 0.5040647387504578, "learning_rate": 0.0003809847582772865, "loss": 1.766, "step": 37240 }, { "epoch": 1.24, "grad_norm": 0.5018308758735657, "learning_rate": 0.0003809746941509925, "loss": 1.6748, "step": 37241 }, { "epoch": 1.24, "grad_norm": 0.48197415471076965, "learning_rate": 0.0003809646299264072, "loss": 1.7598, "step": 37242 }, { "epoch": 1.24, "grad_norm": 0.498534619808197, "learning_rate": 0.00038095456560354276, "loss": 1.7145, "step": 37243 }, { "epoch": 1.24, "grad_norm": 0.48164287209510803, "learning_rate": 0.00038094450118241147, "loss": 1.728, "step": 37244 }, { "epoch": 1.24, "grad_norm": 0.4924824833869934, "learning_rate": 0.00038093443666302544, "loss": 1.7712, "step": 37245 }, { "epoch": 1.24, "grad_norm": 0.46913325786590576, "learning_rate": 0.0003809243720453969, "loss": 1.8283, "step": 37246 }, { "epoch": 1.24, "grad_norm": 0.47023671865463257, "learning_rate": 0.0003809143073295382, "loss": 1.7279, "step": 37247 }, { "epoch": 1.24, "grad_norm": 0.5341416597366333, "learning_rate": 0.0003809042425154613, "loss": 1.7715, "step": 37248 }, { "epoch": 1.24, "grad_norm": 0.48187682032585144, "learning_rate": 0.0003808941776031787, "loss": 1.7907, "step": 37249 }, { "epoch": 1.24, "grad_norm": 0.5080541968345642, "learning_rate": 0.0003808841125927024, "loss": 1.7956, "step": 37250 }, { "epoch": 1.24, "grad_norm": 0.4835597574710846, "learning_rate": 0.00038087404748404476, "loss": 1.7568, "step": 37251 }, { "epoch": 1.24, "grad_norm": 0.48026207089424133, "learning_rate": 0.0003808639822772179, "loss": 1.729, "step": 37252 }, { "epoch": 1.24, "grad_norm": 0.5306305885314941, "learning_rate": 0.0003808539169722341, "loss": 1.7759, "step": 37253 }, { "epoch": 1.24, "grad_norm": 0.4783017635345459, "learning_rate": 0.00038084385156910566, "loss": 1.7952, "step": 37254 }, { "epoch": 1.24, "grad_norm": 0.4750581681728363, "learning_rate": 0.00038083378606784457, "loss": 1.7592, "step": 37255 }, { "epoch": 1.24, "grad_norm": 0.48323798179626465, "learning_rate": 0.0003808237204684632, "loss": 1.7563, "step": 37256 }, { "epoch": 1.24, "grad_norm": 0.49746838212013245, "learning_rate": 0.00038081365477097374, "loss": 1.7488, "step": 37257 }, { "epoch": 1.24, "grad_norm": 0.4811221957206726, "learning_rate": 0.0003808035889753884, "loss": 1.8113, "step": 37258 }, { "epoch": 1.24, "grad_norm": 0.4948635399341583, "learning_rate": 0.0003807935230817194, "loss": 1.829, "step": 37259 }, { "epoch": 1.24, "grad_norm": 0.4748992621898651, "learning_rate": 0.0003807834570899791, "loss": 1.7783, "step": 37260 }, { "epoch": 1.24, "grad_norm": 0.48997917771339417, "learning_rate": 0.0003807733910001794, "loss": 1.7494, "step": 37261 }, { "epoch": 1.24, "grad_norm": 0.4851463735103607, "learning_rate": 0.0003807633248123328, "loss": 1.7859, "step": 37262 }, { "epoch": 1.24, "grad_norm": 0.47699829936027527, "learning_rate": 0.0003807532585264514, "loss": 1.8785, "step": 37263 }, { "epoch": 1.24, "grad_norm": 0.4884018301963806, "learning_rate": 0.00038074319214254743, "loss": 1.837, "step": 37264 }, { "epoch": 1.24, "grad_norm": 0.4951814115047455, "learning_rate": 0.0003807331256606332, "loss": 1.7038, "step": 37265 }, { "epoch": 1.24, "grad_norm": 0.4879707396030426, "learning_rate": 0.00038072305908072075, "loss": 1.8066, "step": 37266 }, { "epoch": 1.24, "grad_norm": 0.49141567945480347, "learning_rate": 0.0003807129924028224, "loss": 1.8451, "step": 37267 }, { "epoch": 1.24, "grad_norm": 0.4731733202934265, "learning_rate": 0.00038070292562695043, "loss": 1.7551, "step": 37268 }, { "epoch": 1.24, "grad_norm": 0.46805596351623535, "learning_rate": 0.000380692858753117, "loss": 1.7825, "step": 37269 }, { "epoch": 1.24, "grad_norm": 0.4843207895755768, "learning_rate": 0.00038068279178133426, "loss": 1.8091, "step": 37270 }, { "epoch": 1.24, "grad_norm": 0.5114553570747375, "learning_rate": 0.00038067272471161453, "loss": 1.7886, "step": 37271 }, { "epoch": 1.24, "grad_norm": 0.5061213970184326, "learning_rate": 0.0003806626575439701, "loss": 1.7522, "step": 37272 }, { "epoch": 1.24, "grad_norm": 0.4781152904033661, "learning_rate": 0.0003806525902784129, "loss": 1.7214, "step": 37273 }, { "epoch": 1.24, "grad_norm": 0.47571367025375366, "learning_rate": 0.00038064252291495547, "loss": 1.7177, "step": 37274 }, { "epoch": 1.24, "grad_norm": 0.4794566333293915, "learning_rate": 0.00038063245545360994, "loss": 1.7254, "step": 37275 }, { "epoch": 1.24, "grad_norm": 0.543337345123291, "learning_rate": 0.0003806223878943884, "loss": 1.766, "step": 37276 }, { "epoch": 1.24, "grad_norm": 0.4774407148361206, "learning_rate": 0.00038061232023730315, "loss": 1.7954, "step": 37277 }, { "epoch": 1.24, "grad_norm": 0.49634861946105957, "learning_rate": 0.00038060225248236646, "loss": 1.8665, "step": 37278 }, { "epoch": 1.24, "grad_norm": 0.48160189390182495, "learning_rate": 0.0003805921846295906, "loss": 1.712, "step": 37279 }, { "epoch": 1.24, "grad_norm": 0.4942917823791504, "learning_rate": 0.00038058211667898757, "loss": 1.8533, "step": 37280 }, { "epoch": 1.24, "grad_norm": 0.5027092695236206, "learning_rate": 0.00038057204863056976, "loss": 1.7337, "step": 37281 }, { "epoch": 1.24, "grad_norm": 0.46497413516044617, "learning_rate": 0.00038056198048434933, "loss": 1.8431, "step": 37282 }, { "epoch": 1.24, "grad_norm": 0.4817937910556793, "learning_rate": 0.0003805519122403386, "loss": 1.7007, "step": 37283 }, { "epoch": 1.24, "grad_norm": 0.4977521300315857, "learning_rate": 0.0003805418438985496, "loss": 1.7057, "step": 37284 }, { "epoch": 1.24, "grad_norm": 0.48187166452407837, "learning_rate": 0.0003805317754589948, "loss": 1.7483, "step": 37285 }, { "epoch": 1.24, "grad_norm": 0.4984019100666046, "learning_rate": 0.0003805217069216862, "loss": 1.7997, "step": 37286 }, { "epoch": 1.24, "grad_norm": 0.4756127893924713, "learning_rate": 0.00038051163828663615, "loss": 1.8502, "step": 37287 }, { "epoch": 1.24, "grad_norm": 0.5139567852020264, "learning_rate": 0.00038050156955385687, "loss": 1.7765, "step": 37288 }, { "epoch": 1.24, "grad_norm": 0.4602351784706116, "learning_rate": 0.00038049150072336045, "loss": 1.8197, "step": 37289 }, { "epoch": 1.24, "grad_norm": 0.4714391827583313, "learning_rate": 0.00038048143179515926, "loss": 1.7696, "step": 37290 }, { "epoch": 1.24, "grad_norm": 0.4880165457725525, "learning_rate": 0.0003804713627692655, "loss": 1.817, "step": 37291 }, { "epoch": 1.24, "grad_norm": 0.49487796425819397, "learning_rate": 0.00038046129364569133, "loss": 1.7673, "step": 37292 }, { "epoch": 1.24, "grad_norm": 0.48158934712409973, "learning_rate": 0.000380451224424449, "loss": 1.7605, "step": 37293 }, { "epoch": 1.24, "grad_norm": 0.4762933552265167, "learning_rate": 0.0003804411551055507, "loss": 1.762, "step": 37294 }, { "epoch": 1.24, "grad_norm": 0.4858313500881195, "learning_rate": 0.0003804310856890088, "loss": 1.8395, "step": 37295 }, { "epoch": 1.24, "grad_norm": 0.4666411280632019, "learning_rate": 0.0003804210161748353, "loss": 1.7328, "step": 37296 }, { "epoch": 1.24, "grad_norm": 0.4844736158847809, "learning_rate": 0.00038041094656304265, "loss": 1.8235, "step": 37297 }, { "epoch": 1.24, "grad_norm": 0.49892938137054443, "learning_rate": 0.0003804008768536428, "loss": 1.7891, "step": 37298 }, { "epoch": 1.24, "grad_norm": 0.4858723282814026, "learning_rate": 0.0003803908070466483, "loss": 1.7982, "step": 37299 }, { "epoch": 1.24, "grad_norm": 0.48045605421066284, "learning_rate": 0.0003803807371420711, "loss": 1.7811, "step": 37300 }, { "epoch": 1.24, "grad_norm": 0.5135823488235474, "learning_rate": 0.00038037066713992354, "loss": 1.7677, "step": 37301 }, { "epoch": 1.24, "grad_norm": 0.4962508976459503, "learning_rate": 0.0003803605970402179, "loss": 1.8439, "step": 37302 }, { "epoch": 1.24, "grad_norm": 0.4887463450431824, "learning_rate": 0.00038035052684296623, "loss": 1.8051, "step": 37303 }, { "epoch": 1.24, "grad_norm": 0.4917459785938263, "learning_rate": 0.000380340456548181, "loss": 1.7732, "step": 37304 }, { "epoch": 1.24, "grad_norm": 0.4975432753562927, "learning_rate": 0.00038033038615587416, "loss": 1.8238, "step": 37305 }, { "epoch": 1.24, "grad_norm": 0.48961687088012695, "learning_rate": 0.00038032031566605814, "loss": 1.7255, "step": 37306 }, { "epoch": 1.24, "grad_norm": 0.48324862122535706, "learning_rate": 0.00038031024507874503, "loss": 1.802, "step": 37307 }, { "epoch": 1.24, "grad_norm": 0.48218536376953125, "learning_rate": 0.0003803001743939471, "loss": 1.8433, "step": 37308 }, { "epoch": 1.24, "grad_norm": 0.4964452385902405, "learning_rate": 0.0003802901036116768, "loss": 1.8234, "step": 37309 }, { "epoch": 1.24, "grad_norm": 0.4725465178489685, "learning_rate": 0.0003802800327319459, "loss": 1.7523, "step": 37310 }, { "epoch": 1.24, "grad_norm": 0.4788084626197815, "learning_rate": 0.00038026996175476695, "loss": 1.6931, "step": 37311 }, { "epoch": 1.24, "grad_norm": 0.4827765226364136, "learning_rate": 0.00038025989068015206, "loss": 1.8052, "step": 37312 }, { "epoch": 1.24, "grad_norm": 0.4849780797958374, "learning_rate": 0.00038024981950811353, "loss": 1.7391, "step": 37313 }, { "epoch": 1.24, "grad_norm": 0.47155970335006714, "learning_rate": 0.00038023974823866356, "loss": 1.7488, "step": 37314 }, { "epoch": 1.24, "grad_norm": 0.5121239423751831, "learning_rate": 0.0003802296768718144, "loss": 1.7819, "step": 37315 }, { "epoch": 1.24, "grad_norm": 0.47033002972602844, "learning_rate": 0.0003802196054075781, "loss": 1.7599, "step": 37316 }, { "epoch": 1.24, "grad_norm": 0.48292386531829834, "learning_rate": 0.00038020953384596717, "loss": 1.7719, "step": 37317 }, { "epoch": 1.24, "grad_norm": 0.4902602434158325, "learning_rate": 0.00038019946218699355, "loss": 1.7538, "step": 37318 }, { "epoch": 1.24, "grad_norm": 0.4865284562110901, "learning_rate": 0.00038018939043066963, "loss": 1.6865, "step": 37319 }, { "epoch": 1.24, "grad_norm": 0.4725618064403534, "learning_rate": 0.0003801793185770076, "loss": 1.7882, "step": 37320 }, { "epoch": 1.24, "grad_norm": 0.4789527654647827, "learning_rate": 0.0003801692466260198, "loss": 1.7147, "step": 37321 }, { "epoch": 1.24, "grad_norm": 0.491279274225235, "learning_rate": 0.00038015917457771836, "loss": 1.7908, "step": 37322 }, { "epoch": 1.24, "grad_norm": 0.4960043728351593, "learning_rate": 0.0003801491024321153, "loss": 1.7807, "step": 37323 }, { "epoch": 1.24, "grad_norm": 0.47616904973983765, "learning_rate": 0.0003801390301892232, "loss": 1.7197, "step": 37324 }, { "epoch": 1.24, "grad_norm": 0.48786845803260803, "learning_rate": 0.00038012895784905406, "loss": 1.7397, "step": 37325 }, { "epoch": 1.24, "grad_norm": 0.46884986758232117, "learning_rate": 0.00038011888541162014, "loss": 1.7666, "step": 37326 }, { "epoch": 1.24, "grad_norm": 0.44980353116989136, "learning_rate": 0.0003801088128769338, "loss": 1.7525, "step": 37327 }, { "epoch": 1.24, "grad_norm": 0.4708242118358612, "learning_rate": 0.000380098740245007, "loss": 1.7277, "step": 37328 }, { "epoch": 1.24, "grad_norm": 0.5382870435714722, "learning_rate": 0.0003800886675158523, "loss": 1.8385, "step": 37329 }, { "epoch": 1.24, "grad_norm": 0.5103372931480408, "learning_rate": 0.00038007859468948174, "loss": 1.7664, "step": 37330 }, { "epoch": 1.24, "grad_norm": 0.4773547053337097, "learning_rate": 0.0003800685217659074, "loss": 1.8048, "step": 37331 }, { "epoch": 1.24, "grad_norm": 0.4787522852420807, "learning_rate": 0.00038005844874514186, "loss": 1.7788, "step": 37332 }, { "epoch": 1.24, "grad_norm": 0.4843607544898987, "learning_rate": 0.00038004837562719706, "loss": 1.8383, "step": 37333 }, { "epoch": 1.24, "grad_norm": 0.47830501198768616, "learning_rate": 0.00038003830241208536, "loss": 1.7981, "step": 37334 }, { "epoch": 1.24, "grad_norm": 0.49422702193260193, "learning_rate": 0.0003800282290998189, "loss": 1.8258, "step": 37335 }, { "epoch": 1.24, "grad_norm": 0.48034828901290894, "learning_rate": 0.0003800181556904101, "loss": 1.8346, "step": 37336 }, { "epoch": 1.24, "grad_norm": 0.4888506531715393, "learning_rate": 0.0003800080821838709, "loss": 1.7582, "step": 37337 }, { "epoch": 1.24, "grad_norm": 0.47134149074554443, "learning_rate": 0.00037999800858021373, "loss": 1.7858, "step": 37338 }, { "epoch": 1.24, "grad_norm": 0.49141019582748413, "learning_rate": 0.0003799879348794508, "loss": 1.7404, "step": 37339 }, { "epoch": 1.24, "grad_norm": 0.4842788279056549, "learning_rate": 0.00037997786108159425, "loss": 1.766, "step": 37340 }, { "epoch": 1.24, "grad_norm": 0.47098395228385925, "learning_rate": 0.0003799677871866564, "loss": 1.7632, "step": 37341 }, { "epoch": 1.24, "grad_norm": 0.4896823465824127, "learning_rate": 0.0003799577131946494, "loss": 1.7609, "step": 37342 }, { "epoch": 1.24, "grad_norm": 0.5011561512947083, "learning_rate": 0.00037994763910558556, "loss": 1.7846, "step": 37343 }, { "epoch": 1.24, "grad_norm": 0.4864996373653412, "learning_rate": 0.0003799375649194771, "loss": 1.7174, "step": 37344 }, { "epoch": 1.24, "grad_norm": 0.48712679743766785, "learning_rate": 0.0003799274906363361, "loss": 1.8934, "step": 37345 }, { "epoch": 1.24, "grad_norm": 0.46038779616355896, "learning_rate": 0.00037991741625617494, "loss": 1.7582, "step": 37346 }, { "epoch": 1.24, "grad_norm": 0.4710051119327545, "learning_rate": 0.0003799073417790059, "loss": 1.8066, "step": 37347 }, { "epoch": 1.24, "grad_norm": 0.4869289696216583, "learning_rate": 0.00037989726720484107, "loss": 1.8009, "step": 37348 }, { "epoch": 1.24, "grad_norm": 0.4851471483707428, "learning_rate": 0.00037988719253369274, "loss": 1.8253, "step": 37349 }, { "epoch": 1.24, "grad_norm": 0.47341907024383545, "learning_rate": 0.00037987711776557305, "loss": 1.7784, "step": 37350 }, { "epoch": 1.24, "grad_norm": 0.478586345911026, "learning_rate": 0.00037986704290049434, "loss": 1.7585, "step": 37351 }, { "epoch": 1.24, "grad_norm": 0.47587570548057556, "learning_rate": 0.00037985696793846894, "loss": 1.788, "step": 37352 }, { "epoch": 1.24, "grad_norm": 0.4752722978591919, "learning_rate": 0.00037984689287950875, "loss": 1.7296, "step": 37353 }, { "epoch": 1.24, "grad_norm": 0.503825843334198, "learning_rate": 0.0003798368177236264, "loss": 1.7671, "step": 37354 }, { "epoch": 1.24, "grad_norm": 0.48325374722480774, "learning_rate": 0.00037982674247083367, "loss": 1.7535, "step": 37355 }, { "epoch": 1.24, "grad_norm": 0.47544053196907043, "learning_rate": 0.0003798166671211432, "loss": 1.785, "step": 37356 }, { "epoch": 1.24, "grad_norm": 0.4860718548297882, "learning_rate": 0.0003798065916745671, "loss": 1.6892, "step": 37357 }, { "epoch": 1.24, "grad_norm": 0.5232812762260437, "learning_rate": 0.0003797965161311174, "loss": 1.7278, "step": 37358 }, { "epoch": 1.24, "grad_norm": 0.49016252160072327, "learning_rate": 0.00037978644049080666, "loss": 1.773, "step": 37359 }, { "epoch": 1.24, "grad_norm": 0.48467108607292175, "learning_rate": 0.0003797763647536467, "loss": 1.8172, "step": 37360 }, { "epoch": 1.24, "grad_norm": 0.49725738167762756, "learning_rate": 0.0003797662889196502, "loss": 1.8321, "step": 37361 }, { "epoch": 1.24, "grad_norm": 0.5159828066825867, "learning_rate": 0.00037975621298882913, "loss": 1.7869, "step": 37362 }, { "epoch": 1.24, "grad_norm": 0.4781537652015686, "learning_rate": 0.00037974613696119573, "loss": 1.8201, "step": 37363 }, { "epoch": 1.24, "grad_norm": 0.47697630524635315, "learning_rate": 0.00037973606083676237, "loss": 1.7903, "step": 37364 }, { "epoch": 1.24, "grad_norm": 0.4884912669658661, "learning_rate": 0.000379725984615541, "loss": 1.9012, "step": 37365 }, { "epoch": 1.24, "grad_norm": 0.48182934522628784, "learning_rate": 0.0003797159082975442, "loss": 1.8256, "step": 37366 }, { "epoch": 1.24, "grad_norm": 0.4880642890930176, "learning_rate": 0.00037970583188278394, "loss": 1.7286, "step": 37367 }, { "epoch": 1.24, "grad_norm": 0.46717724204063416, "learning_rate": 0.0003796957553712726, "loss": 1.8428, "step": 37368 }, { "epoch": 1.24, "grad_norm": 0.4797924757003784, "learning_rate": 0.0003796856787630223, "loss": 1.746, "step": 37369 }, { "epoch": 1.24, "grad_norm": 0.48977166414260864, "learning_rate": 0.00037967560205804534, "loss": 1.7828, "step": 37370 }, { "epoch": 1.24, "grad_norm": 0.4747884273529053, "learning_rate": 0.00037966552525635404, "loss": 1.8184, "step": 37371 }, { "epoch": 1.24, "grad_norm": 0.47397178411483765, "learning_rate": 0.00037965544835796036, "loss": 1.8009, "step": 37372 }, { "epoch": 1.24, "grad_norm": 0.4784563183784485, "learning_rate": 0.0003796453713628768, "loss": 1.7582, "step": 37373 }, { "epoch": 1.24, "grad_norm": 0.46501588821411133, "learning_rate": 0.00037963529427111547, "loss": 1.8471, "step": 37374 }, { "epoch": 1.24, "grad_norm": 0.48435020446777344, "learning_rate": 0.00037962521708268866, "loss": 1.8468, "step": 37375 }, { "epoch": 1.24, "grad_norm": 0.4773717224597931, "learning_rate": 0.0003796151397976085, "loss": 1.7763, "step": 37376 }, { "epoch": 1.24, "grad_norm": 0.48497483134269714, "learning_rate": 0.00037960506241588744, "loss": 1.7276, "step": 37377 }, { "epoch": 1.24, "grad_norm": 0.47320815920829773, "learning_rate": 0.0003795949849375374, "loss": 1.8159, "step": 37378 }, { "epoch": 1.24, "grad_norm": 0.47526976466178894, "learning_rate": 0.00037958490736257085, "loss": 1.8149, "step": 37379 }, { "epoch": 1.24, "grad_norm": 1.05977463722229, "learning_rate": 0.0003795748296909999, "loss": 1.7508, "step": 37380 }, { "epoch": 1.24, "grad_norm": 0.46471160650253296, "learning_rate": 0.00037956475192283686, "loss": 1.7443, "step": 37381 }, { "epoch": 1.24, "grad_norm": 0.4722763001918793, "learning_rate": 0.00037955467405809395, "loss": 1.8056, "step": 37382 }, { "epoch": 1.24, "grad_norm": 0.49528342485427856, "learning_rate": 0.00037954459609678337, "loss": 1.8026, "step": 37383 }, { "epoch": 1.24, "grad_norm": 1.6193695068359375, "learning_rate": 0.0003795345180389175, "loss": 1.8165, "step": 37384 }, { "epoch": 1.24, "grad_norm": 0.4719606339931488, "learning_rate": 0.0003795244398845082, "loss": 1.7419, "step": 37385 }, { "epoch": 1.24, "grad_norm": 0.4842109680175781, "learning_rate": 0.0003795143616335681, "loss": 1.7461, "step": 37386 }, { "epoch": 1.24, "grad_norm": 0.4735598862171173, "learning_rate": 0.00037950428328610927, "loss": 1.7846, "step": 37387 }, { "epoch": 1.24, "grad_norm": 0.4812124967575073, "learning_rate": 0.00037949420484214387, "loss": 1.7759, "step": 37388 }, { "epoch": 1.24, "grad_norm": 0.47096171975135803, "learning_rate": 0.00037948412630168436, "loss": 1.736, "step": 37389 }, { "epoch": 1.24, "grad_norm": 0.4860004782676697, "learning_rate": 0.00037947404766474263, "loss": 1.8345, "step": 37390 }, { "epoch": 1.24, "grad_norm": 0.5072750449180603, "learning_rate": 0.00037946396893133135, "loss": 1.8531, "step": 37391 }, { "epoch": 1.24, "grad_norm": 0.4867571294307709, "learning_rate": 0.00037945389010146234, "loss": 1.739, "step": 37392 }, { "epoch": 1.24, "grad_norm": 0.48341843485832214, "learning_rate": 0.00037944381117514814, "loss": 1.832, "step": 37393 }, { "epoch": 1.24, "grad_norm": 0.49516353011131287, "learning_rate": 0.0003794337321524008, "loss": 1.7581, "step": 37394 }, { "epoch": 1.24, "grad_norm": 0.5072430968284607, "learning_rate": 0.00037942365303323257, "loss": 1.8529, "step": 37395 }, { "epoch": 1.24, "grad_norm": 0.4755333662033081, "learning_rate": 0.0003794135738176559, "loss": 1.7715, "step": 37396 }, { "epoch": 1.24, "grad_norm": 0.4778646230697632, "learning_rate": 0.0003794034945056827, "loss": 1.7552, "step": 37397 }, { "epoch": 1.24, "grad_norm": 0.46386536955833435, "learning_rate": 0.00037939341509732533, "loss": 1.8173, "step": 37398 }, { "epoch": 1.24, "grad_norm": 0.4668470025062561, "learning_rate": 0.00037938333559259615, "loss": 1.7482, "step": 37399 }, { "epoch": 1.24, "grad_norm": 0.5093388557434082, "learning_rate": 0.00037937325599150725, "loss": 1.7409, "step": 37400 }, { "epoch": 1.24, "grad_norm": 0.5039051175117493, "learning_rate": 0.00037936317629407083, "loss": 1.8206, "step": 37401 }, { "epoch": 1.24, "grad_norm": 0.48725399374961853, "learning_rate": 0.00037935309650029937, "loss": 1.7758, "step": 37402 }, { "epoch": 1.24, "grad_norm": 0.47266489267349243, "learning_rate": 0.00037934301661020483, "loss": 1.703, "step": 37403 }, { "epoch": 1.24, "grad_norm": 0.49016261100769043, "learning_rate": 0.00037933293662379964, "loss": 1.7576, "step": 37404 }, { "epoch": 1.24, "grad_norm": 0.4911109507083893, "learning_rate": 0.0003793228565410959, "loss": 1.7845, "step": 37405 }, { "epoch": 1.24, "grad_norm": 0.49042633175849915, "learning_rate": 0.0003793127763621059, "loss": 1.7968, "step": 37406 }, { "epoch": 1.24, "grad_norm": 0.47927358746528625, "learning_rate": 0.00037930269608684196, "loss": 1.7207, "step": 37407 }, { "epoch": 1.24, "grad_norm": 0.4598134458065033, "learning_rate": 0.000379292615715316, "loss": 1.7544, "step": 37408 }, { "epoch": 1.24, "grad_norm": 0.4857926666736603, "learning_rate": 0.0003792825352475407, "loss": 1.7679, "step": 37409 }, { "epoch": 1.24, "grad_norm": 0.49946972727775574, "learning_rate": 0.0003792724546835281, "loss": 1.7948, "step": 37410 }, { "epoch": 1.24, "grad_norm": 0.4858458936214447, "learning_rate": 0.0003792623740232903, "loss": 1.7596, "step": 37411 }, { "epoch": 1.24, "grad_norm": 0.4789353013038635, "learning_rate": 0.0003792522932668398, "loss": 1.8367, "step": 37412 }, { "epoch": 1.24, "grad_norm": 0.5001790523529053, "learning_rate": 0.00037924221241418846, "loss": 1.7768, "step": 37413 }, { "epoch": 1.24, "grad_norm": 0.4866721034049988, "learning_rate": 0.000379232131465349, "loss": 1.7979, "step": 37414 }, { "epoch": 1.24, "grad_norm": 0.4876387119293213, "learning_rate": 0.00037922205042033324, "loss": 1.7993, "step": 37415 }, { "epoch": 1.24, "grad_norm": 0.4837668240070343, "learning_rate": 0.00037921196927915364, "loss": 1.7952, "step": 37416 }, { "epoch": 1.24, "grad_norm": 0.477507084608078, "learning_rate": 0.0003792018880418224, "loss": 1.7114, "step": 37417 }, { "epoch": 1.24, "grad_norm": 0.49089720845222473, "learning_rate": 0.00037919180670835167, "loss": 1.7923, "step": 37418 }, { "epoch": 1.24, "grad_norm": 0.4808329641819, "learning_rate": 0.0003791817252787539, "loss": 1.8298, "step": 37419 }, { "epoch": 1.24, "grad_norm": 0.48277002573013306, "learning_rate": 0.000379171643753041, "loss": 1.7609, "step": 37420 }, { "epoch": 1.25, "grad_norm": 0.47389838099479675, "learning_rate": 0.00037916156213122556, "loss": 1.8052, "step": 37421 }, { "epoch": 1.25, "grad_norm": 0.49123111367225647, "learning_rate": 0.0003791514804133195, "loss": 1.8053, "step": 37422 }, { "epoch": 1.25, "grad_norm": 0.4820226728916168, "learning_rate": 0.0003791413985993353, "loss": 1.7834, "step": 37423 }, { "epoch": 1.25, "grad_norm": 0.4854944944381714, "learning_rate": 0.00037913131668928504, "loss": 1.8287, "step": 37424 }, { "epoch": 1.25, "grad_norm": 0.494147926568985, "learning_rate": 0.0003791212346831811, "loss": 1.7836, "step": 37425 }, { "epoch": 1.25, "grad_norm": 0.4847900867462158, "learning_rate": 0.0003791111525810356, "loss": 1.6901, "step": 37426 }, { "epoch": 1.25, "grad_norm": 0.4955865144729614, "learning_rate": 0.0003791010703828608, "loss": 1.7635, "step": 37427 }, { "epoch": 1.25, "grad_norm": 0.4721652567386627, "learning_rate": 0.00037909098808866894, "loss": 1.7719, "step": 37428 }, { "epoch": 1.25, "grad_norm": 0.4851333796977997, "learning_rate": 0.0003790809056984723, "loss": 1.8611, "step": 37429 }, { "epoch": 1.25, "grad_norm": 0.5040425062179565, "learning_rate": 0.0003790708232122831, "loss": 1.7735, "step": 37430 }, { "epoch": 1.25, "grad_norm": 0.48290184140205383, "learning_rate": 0.0003790607406301136, "loss": 1.7767, "step": 37431 }, { "epoch": 1.25, "grad_norm": 0.48100948333740234, "learning_rate": 0.00037905065795197597, "loss": 1.7583, "step": 37432 }, { "epoch": 1.25, "grad_norm": 0.4817740023136139, "learning_rate": 0.00037904057517788246, "loss": 1.8343, "step": 37433 }, { "epoch": 1.25, "grad_norm": 0.5265477299690247, "learning_rate": 0.00037903049230784535, "loss": 1.7545, "step": 37434 }, { "epoch": 1.25, "grad_norm": 0.45914074778556824, "learning_rate": 0.00037902040934187684, "loss": 1.7295, "step": 37435 }, { "epoch": 1.25, "grad_norm": 0.5067219138145447, "learning_rate": 0.0003790103262799893, "loss": 1.7938, "step": 37436 }, { "epoch": 1.25, "grad_norm": 0.5565173625946045, "learning_rate": 0.00037900024312219484, "loss": 1.7602, "step": 37437 }, { "epoch": 1.25, "grad_norm": 0.47374075651168823, "learning_rate": 0.00037899015986850564, "loss": 1.7324, "step": 37438 }, { "epoch": 1.25, "grad_norm": 0.4752163589000702, "learning_rate": 0.0003789800765189341, "loss": 1.7793, "step": 37439 }, { "epoch": 1.25, "grad_norm": 0.49690544605255127, "learning_rate": 0.00037896999307349236, "loss": 1.8127, "step": 37440 }, { "epoch": 1.25, "grad_norm": 0.5000635981559753, "learning_rate": 0.00037895990953219267, "loss": 1.7783, "step": 37441 }, { "epoch": 1.25, "grad_norm": 0.47084343433380127, "learning_rate": 0.0003789498258950473, "loss": 1.7689, "step": 37442 }, { "epoch": 1.25, "grad_norm": 0.4697491526603699, "learning_rate": 0.00037893974216206847, "loss": 1.8278, "step": 37443 }, { "epoch": 1.25, "grad_norm": 0.4674248695373535, "learning_rate": 0.00037892965833326845, "loss": 1.7721, "step": 37444 }, { "epoch": 1.25, "grad_norm": 0.480013370513916, "learning_rate": 0.0003789195744086594, "loss": 1.7989, "step": 37445 }, { "epoch": 1.25, "grad_norm": 0.48157352209091187, "learning_rate": 0.0003789094903882537, "loss": 1.8083, "step": 37446 }, { "epoch": 1.25, "grad_norm": 0.48840004205703735, "learning_rate": 0.0003788994062720634, "loss": 1.7944, "step": 37447 }, { "epoch": 1.25, "grad_norm": 0.47125616669654846, "learning_rate": 0.0003788893220601009, "loss": 1.7325, "step": 37448 }, { "epoch": 1.25, "grad_norm": 0.5073142647743225, "learning_rate": 0.0003788792377523784, "loss": 1.8267, "step": 37449 }, { "epoch": 1.25, "grad_norm": 0.4992569386959076, "learning_rate": 0.0003788691533489081, "loss": 1.8021, "step": 37450 }, { "epoch": 1.25, "grad_norm": 0.48462769389152527, "learning_rate": 0.00037885906884970225, "loss": 1.8384, "step": 37451 }, { "epoch": 1.25, "grad_norm": 0.4892114996910095, "learning_rate": 0.00037884898425477316, "loss": 1.6984, "step": 37452 }, { "epoch": 1.25, "grad_norm": 0.48456206917762756, "learning_rate": 0.00037883889956413303, "loss": 1.8797, "step": 37453 }, { "epoch": 1.25, "grad_norm": 0.48908674716949463, "learning_rate": 0.000378828814777794, "loss": 1.77, "step": 37454 }, { "epoch": 1.25, "grad_norm": 0.4940851330757141, "learning_rate": 0.0003788187298957685, "loss": 1.7041, "step": 37455 }, { "epoch": 1.25, "grad_norm": 0.47305911779403687, "learning_rate": 0.00037880864491806866, "loss": 1.7886, "step": 37456 }, { "epoch": 1.25, "grad_norm": 0.4728432297706604, "learning_rate": 0.0003787985598447067, "loss": 1.8504, "step": 37457 }, { "epoch": 1.25, "grad_norm": 0.4784713089466095, "learning_rate": 0.00037878847467569493, "loss": 1.8011, "step": 37458 }, { "epoch": 1.25, "grad_norm": 0.47800692915916443, "learning_rate": 0.0003787783894110456, "loss": 1.8478, "step": 37459 }, { "epoch": 1.25, "grad_norm": 0.4782026410102844, "learning_rate": 0.0003787683040507708, "loss": 1.7358, "step": 37460 }, { "epoch": 1.25, "grad_norm": 0.47111600637435913, "learning_rate": 0.00037875821859488295, "loss": 1.7566, "step": 37461 }, { "epoch": 1.25, "grad_norm": 0.48326635360717773, "learning_rate": 0.00037874813304339427, "loss": 1.7254, "step": 37462 }, { "epoch": 1.25, "grad_norm": 0.5130744576454163, "learning_rate": 0.0003787380473963169, "loss": 1.788, "step": 37463 }, { "epoch": 1.25, "grad_norm": 0.48962387442588806, "learning_rate": 0.00037872796165366315, "loss": 1.7808, "step": 37464 }, { "epoch": 1.25, "grad_norm": 0.48059365153312683, "learning_rate": 0.0003787178758154453, "loss": 1.829, "step": 37465 }, { "epoch": 1.25, "grad_norm": 0.48421573638916016, "learning_rate": 0.00037870778988167547, "loss": 1.8052, "step": 37466 }, { "epoch": 1.25, "grad_norm": 0.4793145954608917, "learning_rate": 0.000378697703852366, "loss": 1.7894, "step": 37467 }, { "epoch": 1.25, "grad_norm": 0.5139570236206055, "learning_rate": 0.0003786876177275291, "loss": 1.7885, "step": 37468 }, { "epoch": 1.25, "grad_norm": 0.49406033754348755, "learning_rate": 0.00037867753150717714, "loss": 1.736, "step": 37469 }, { "epoch": 1.25, "grad_norm": 0.4749980568885803, "learning_rate": 0.0003786674451913221, "loss": 1.7733, "step": 37470 }, { "epoch": 1.25, "grad_norm": 0.4922383427619934, "learning_rate": 0.00037865735877997645, "loss": 1.9217, "step": 37471 }, { "epoch": 1.25, "grad_norm": 0.49647414684295654, "learning_rate": 0.00037864727227315234, "loss": 1.7964, "step": 37472 }, { "epoch": 1.25, "grad_norm": 0.501786470413208, "learning_rate": 0.0003786371856708621, "loss": 1.7639, "step": 37473 }, { "epoch": 1.25, "grad_norm": 0.4786834120750427, "learning_rate": 0.0003786270989731179, "loss": 1.7013, "step": 37474 }, { "epoch": 1.25, "grad_norm": 0.47068044543266296, "learning_rate": 0.0003786170121799318, "loss": 1.6921, "step": 37475 }, { "epoch": 1.25, "grad_norm": 0.49006497859954834, "learning_rate": 0.00037860692529131636, "loss": 1.8153, "step": 37476 }, { "epoch": 1.25, "grad_norm": 0.49066030979156494, "learning_rate": 0.00037859683830728365, "loss": 1.7733, "step": 37477 }, { "epoch": 1.25, "grad_norm": 0.4974093735218048, "learning_rate": 0.0003785867512278461, "loss": 1.8369, "step": 37478 }, { "epoch": 1.25, "grad_norm": 0.4896462857723236, "learning_rate": 0.00037857666405301563, "loss": 1.8099, "step": 37479 }, { "epoch": 1.25, "grad_norm": 0.47702786326408386, "learning_rate": 0.0003785665767828048, "loss": 1.7593, "step": 37480 }, { "epoch": 1.25, "grad_norm": 0.47652989625930786, "learning_rate": 0.0003785564894172257, "loss": 1.769, "step": 37481 }, { "epoch": 1.25, "grad_norm": 0.47206568717956543, "learning_rate": 0.0003785464019562905, "loss": 1.7797, "step": 37482 }, { "epoch": 1.25, "grad_norm": 0.4769487977027893, "learning_rate": 0.00037853631440001173, "loss": 1.7176, "step": 37483 }, { "epoch": 1.25, "grad_norm": 0.4875747561454773, "learning_rate": 0.00037852622674840127, "loss": 1.7287, "step": 37484 }, { "epoch": 1.25, "grad_norm": 0.49835941195487976, "learning_rate": 0.00037851613900147154, "loss": 1.7616, "step": 37485 }, { "epoch": 1.25, "grad_norm": 0.4676554799079895, "learning_rate": 0.00037850605115923486, "loss": 1.809, "step": 37486 }, { "epoch": 1.25, "grad_norm": 0.46504485607147217, "learning_rate": 0.0003784959632217034, "loss": 1.7435, "step": 37487 }, { "epoch": 1.25, "grad_norm": 0.48960983753204346, "learning_rate": 0.0003784858751888894, "loss": 1.8433, "step": 37488 }, { "epoch": 1.25, "grad_norm": 0.5114477276802063, "learning_rate": 0.00037847578706080517, "loss": 1.7429, "step": 37489 }, { "epoch": 1.25, "grad_norm": 0.4754437804222107, "learning_rate": 0.0003784656988374628, "loss": 1.7532, "step": 37490 }, { "epoch": 1.25, "grad_norm": 0.4814107120037079, "learning_rate": 0.0003784556105188746, "loss": 1.7499, "step": 37491 }, { "epoch": 1.25, "grad_norm": 0.5006579756736755, "learning_rate": 0.00037844552210505297, "loss": 1.7766, "step": 37492 }, { "epoch": 1.25, "grad_norm": 0.5002567768096924, "learning_rate": 0.00037843543359600994, "loss": 1.708, "step": 37493 }, { "epoch": 1.25, "grad_norm": 0.4702017307281494, "learning_rate": 0.0003784253449917579, "loss": 1.7873, "step": 37494 }, { "epoch": 1.25, "grad_norm": 0.4809364080429077, "learning_rate": 0.00037841525629230905, "loss": 1.7544, "step": 37495 }, { "epoch": 1.25, "grad_norm": 0.46635228395462036, "learning_rate": 0.0003784051674976756, "loss": 1.7335, "step": 37496 }, { "epoch": 1.25, "grad_norm": 0.5045676827430725, "learning_rate": 0.00037839507860786985, "loss": 1.7154, "step": 37497 }, { "epoch": 1.25, "grad_norm": 0.49122482538223267, "learning_rate": 0.000378384989622904, "loss": 1.7849, "step": 37498 }, { "epoch": 1.25, "grad_norm": 0.5058643817901611, "learning_rate": 0.0003783749005427904, "loss": 1.7589, "step": 37499 }, { "epoch": 1.25, "grad_norm": 0.4920329451560974, "learning_rate": 0.0003783648113675411, "loss": 1.7663, "step": 37500 }, { "epoch": 1.25, "grad_norm": 0.4540213942527771, "learning_rate": 0.00037835472209716853, "loss": 1.7387, "step": 37501 }, { "epoch": 1.25, "grad_norm": 0.5072170495986938, "learning_rate": 0.00037834463273168494, "loss": 1.8298, "step": 37502 }, { "epoch": 1.25, "grad_norm": 0.4752609133720398, "learning_rate": 0.0003783345432711024, "loss": 1.7702, "step": 37503 }, { "epoch": 1.25, "grad_norm": 0.49873656034469604, "learning_rate": 0.00037832445371543326, "loss": 1.8287, "step": 37504 }, { "epoch": 1.25, "grad_norm": 0.48214444518089294, "learning_rate": 0.00037831436406468984, "loss": 1.7608, "step": 37505 }, { "epoch": 1.25, "grad_norm": 0.5098794102668762, "learning_rate": 0.0003783042743188844, "loss": 1.848, "step": 37506 }, { "epoch": 1.25, "grad_norm": 0.4879703223705292, "learning_rate": 0.0003782941844780289, "loss": 1.7777, "step": 37507 }, { "epoch": 1.25, "grad_norm": 0.4894702434539795, "learning_rate": 0.00037828409454213595, "loss": 1.7306, "step": 37508 }, { "epoch": 1.25, "grad_norm": 0.48225781321525574, "learning_rate": 0.0003782740045112176, "loss": 1.7938, "step": 37509 }, { "epoch": 1.25, "grad_norm": 0.4708636999130249, "learning_rate": 0.0003782639143852861, "loss": 1.8316, "step": 37510 }, { "epoch": 1.25, "grad_norm": 0.47778722643852234, "learning_rate": 0.0003782538241643538, "loss": 1.784, "step": 37511 }, { "epoch": 1.25, "grad_norm": 0.4724726974964142, "learning_rate": 0.00037824373384843287, "loss": 1.7939, "step": 37512 }, { "epoch": 1.25, "grad_norm": 0.4759436845779419, "learning_rate": 0.00037823364343753563, "loss": 1.7981, "step": 37513 }, { "epoch": 1.25, "grad_norm": 0.47969022393226624, "learning_rate": 0.0003782235529316742, "loss": 1.739, "step": 37514 }, { "epoch": 1.25, "grad_norm": 0.5842418074607849, "learning_rate": 0.0003782134623308609, "loss": 1.7572, "step": 37515 }, { "epoch": 1.25, "grad_norm": 0.5051906704902649, "learning_rate": 0.00037820337163510797, "loss": 1.8414, "step": 37516 }, { "epoch": 1.25, "grad_norm": 0.49639880657196045, "learning_rate": 0.00037819328084442766, "loss": 1.8271, "step": 37517 }, { "epoch": 1.25, "grad_norm": 0.4867609143257141, "learning_rate": 0.0003781831899588323, "loss": 1.8066, "step": 37518 }, { "epoch": 1.25, "grad_norm": 0.48170530796051025, "learning_rate": 0.0003781730989783341, "loss": 1.7247, "step": 37519 }, { "epoch": 1.25, "grad_norm": 0.49435511231422424, "learning_rate": 0.0003781630079029452, "loss": 1.7747, "step": 37520 }, { "epoch": 1.25, "grad_norm": 1.606255054473877, "learning_rate": 0.0003781529167326779, "loss": 1.7638, "step": 37521 }, { "epoch": 1.25, "grad_norm": 0.4640214145183563, "learning_rate": 0.00037814282546754456, "loss": 1.8044, "step": 37522 }, { "epoch": 1.25, "grad_norm": 0.4967257082462311, "learning_rate": 0.00037813273410755723, "loss": 1.8152, "step": 37523 }, { "epoch": 1.25, "grad_norm": 0.5012328624725342, "learning_rate": 0.0003781226426527284, "loss": 1.8101, "step": 37524 }, { "epoch": 1.25, "grad_norm": 0.49106740951538086, "learning_rate": 0.0003781125511030701, "loss": 1.8031, "step": 37525 }, { "epoch": 1.25, "grad_norm": 0.4965735673904419, "learning_rate": 0.00037810245945859477, "loss": 1.7885, "step": 37526 }, { "epoch": 1.25, "grad_norm": 0.4711385667324066, "learning_rate": 0.00037809236771931443, "loss": 1.8108, "step": 37527 }, { "epoch": 1.25, "grad_norm": 0.469420850276947, "learning_rate": 0.00037808227588524153, "loss": 1.8013, "step": 37528 }, { "epoch": 1.25, "grad_norm": 0.4840618669986725, "learning_rate": 0.0003780721839563883, "loss": 1.8522, "step": 37529 }, { "epoch": 1.25, "grad_norm": 0.47782185673713684, "learning_rate": 0.0003780620919327668, "loss": 1.8405, "step": 37530 }, { "epoch": 1.25, "grad_norm": 0.5873968005180359, "learning_rate": 0.0003780519998143896, "loss": 1.7801, "step": 37531 }, { "epoch": 1.25, "grad_norm": 0.46802905201911926, "learning_rate": 0.0003780419076012686, "loss": 1.824, "step": 37532 }, { "epoch": 1.25, "grad_norm": 0.4715266525745392, "learning_rate": 0.00037803181529341636, "loss": 1.7976, "step": 37533 }, { "epoch": 1.25, "grad_norm": 0.4812171757221222, "learning_rate": 0.000378021722890845, "loss": 1.8135, "step": 37534 }, { "epoch": 1.25, "grad_norm": 0.4866790771484375, "learning_rate": 0.00037801163039356665, "loss": 1.8085, "step": 37535 }, { "epoch": 1.25, "grad_norm": 0.4943360388278961, "learning_rate": 0.0003780015378015938, "loss": 1.8266, "step": 37536 }, { "epoch": 1.25, "grad_norm": 0.46915894746780396, "learning_rate": 0.00037799144511493844, "loss": 1.8038, "step": 37537 }, { "epoch": 1.25, "grad_norm": 0.47279593348503113, "learning_rate": 0.0003779813523336131, "loss": 1.7754, "step": 37538 }, { "epoch": 1.25, "grad_norm": 0.4722296893596649, "learning_rate": 0.00037797125945762976, "loss": 1.7685, "step": 37539 }, { "epoch": 1.25, "grad_norm": 0.47767576575279236, "learning_rate": 0.00037796116648700084, "loss": 1.7203, "step": 37540 }, { "epoch": 1.25, "grad_norm": 0.48589277267456055, "learning_rate": 0.00037795107342173854, "loss": 1.7643, "step": 37541 }, { "epoch": 1.25, "grad_norm": 0.4790964126586914, "learning_rate": 0.0003779409802618552, "loss": 1.7634, "step": 37542 }, { "epoch": 1.25, "grad_norm": 0.5130580067634583, "learning_rate": 0.000377930887007363, "loss": 1.8418, "step": 37543 }, { "epoch": 1.25, "grad_norm": 0.4835945665836334, "learning_rate": 0.0003779207936582741, "loss": 1.8324, "step": 37544 }, { "epoch": 1.25, "grad_norm": 0.4826347827911377, "learning_rate": 0.00037791070021460084, "loss": 1.7919, "step": 37545 }, { "epoch": 1.25, "grad_norm": 0.49547278881073, "learning_rate": 0.00037790060667635544, "loss": 1.7609, "step": 37546 }, { "epoch": 1.25, "grad_norm": 0.4868919551372528, "learning_rate": 0.00037789051304355027, "loss": 1.7897, "step": 37547 }, { "epoch": 1.25, "grad_norm": 0.46238771080970764, "learning_rate": 0.0003778804193161974, "loss": 1.8337, "step": 37548 }, { "epoch": 1.25, "grad_norm": 0.48107120394706726, "learning_rate": 0.00037787032549430933, "loss": 1.7636, "step": 37549 }, { "epoch": 1.25, "grad_norm": 0.49398916959762573, "learning_rate": 0.000377860231577898, "loss": 1.804, "step": 37550 }, { "epoch": 1.25, "grad_norm": 0.4858996272087097, "learning_rate": 0.00037785013756697593, "loss": 1.7007, "step": 37551 }, { "epoch": 1.25, "grad_norm": 0.4793235957622528, "learning_rate": 0.0003778400434615552, "loss": 1.8005, "step": 37552 }, { "epoch": 1.25, "grad_norm": 0.4503744840621948, "learning_rate": 0.0003778299492616481, "loss": 1.7514, "step": 37553 }, { "epoch": 1.25, "grad_norm": 0.46530625224113464, "learning_rate": 0.000377819854967267, "loss": 1.7872, "step": 37554 }, { "epoch": 1.25, "grad_norm": 0.4726475477218628, "learning_rate": 0.00037780976057842403, "loss": 1.7672, "step": 37555 }, { "epoch": 1.25, "grad_norm": 0.4791984558105469, "learning_rate": 0.0003777996660951315, "loss": 1.7733, "step": 37556 }, { "epoch": 1.25, "grad_norm": 0.4825635254383087, "learning_rate": 0.0003777895715174016, "loss": 1.8094, "step": 37557 }, { "epoch": 1.25, "grad_norm": 0.49170589447021484, "learning_rate": 0.00037777947684524663, "loss": 1.7801, "step": 37558 }, { "epoch": 1.25, "grad_norm": 0.4752691984176636, "learning_rate": 0.00037776938207867883, "loss": 1.7868, "step": 37559 }, { "epoch": 1.25, "grad_norm": 0.4616096317768097, "learning_rate": 0.0003777592872177104, "loss": 1.7124, "step": 37560 }, { "epoch": 1.25, "grad_norm": 0.49363383650779724, "learning_rate": 0.00037774919226235375, "loss": 1.7597, "step": 37561 }, { "epoch": 1.25, "grad_norm": 0.45441409945487976, "learning_rate": 0.00037773909721262097, "loss": 1.7106, "step": 37562 }, { "epoch": 1.25, "grad_norm": 0.4770358204841614, "learning_rate": 0.00037772900206852443, "loss": 1.7887, "step": 37563 }, { "epoch": 1.25, "grad_norm": 0.4726030230522156, "learning_rate": 0.0003777189068300763, "loss": 1.7485, "step": 37564 }, { "epoch": 1.25, "grad_norm": 0.47035881876945496, "learning_rate": 0.0003777088114972889, "loss": 1.7358, "step": 37565 }, { "epoch": 1.25, "grad_norm": 0.46841710805892944, "learning_rate": 0.0003776987160701744, "loss": 1.7864, "step": 37566 }, { "epoch": 1.25, "grad_norm": 0.46958309412002563, "learning_rate": 0.0003776886205487451, "loss": 1.8204, "step": 37567 }, { "epoch": 1.25, "grad_norm": 0.48383843898773193, "learning_rate": 0.0003776785249330134, "loss": 1.7709, "step": 37568 }, { "epoch": 1.25, "grad_norm": 0.47610488533973694, "learning_rate": 0.0003776684292229912, "loss": 1.7763, "step": 37569 }, { "epoch": 1.25, "grad_norm": 0.5107010006904602, "learning_rate": 0.00037765833341869116, "loss": 1.7594, "step": 37570 }, { "epoch": 1.25, "grad_norm": 0.4762260615825653, "learning_rate": 0.00037764823752012527, "loss": 1.7909, "step": 37571 }, { "epoch": 1.25, "grad_norm": 0.5236523747444153, "learning_rate": 0.00037763814152730584, "loss": 1.7482, "step": 37572 }, { "epoch": 1.25, "grad_norm": 0.47729456424713135, "learning_rate": 0.0003776280454402452, "loss": 1.7584, "step": 37573 }, { "epoch": 1.25, "grad_norm": 0.4865139126777649, "learning_rate": 0.0003776179492589555, "loss": 1.8496, "step": 37574 }, { "epoch": 1.25, "grad_norm": 0.5025321841239929, "learning_rate": 0.0003776078529834491, "loss": 1.8471, "step": 37575 }, { "epoch": 1.25, "grad_norm": 0.48451951146125793, "learning_rate": 0.0003775977566137381, "loss": 1.8237, "step": 37576 }, { "epoch": 1.25, "grad_norm": 0.4553520679473877, "learning_rate": 0.000377587660149835, "loss": 1.7761, "step": 37577 }, { "epoch": 1.25, "grad_norm": 0.47544044256210327, "learning_rate": 0.0003775775635917518, "loss": 1.8046, "step": 37578 }, { "epoch": 1.25, "grad_norm": 0.5017617344856262, "learning_rate": 0.00037756746693950094, "loss": 1.7848, "step": 37579 }, { "epoch": 1.25, "grad_norm": 0.4724324643611908, "learning_rate": 0.0003775573701930945, "loss": 1.7474, "step": 37580 }, { "epoch": 1.25, "grad_norm": 0.46887338161468506, "learning_rate": 0.000377547273352545, "loss": 1.7664, "step": 37581 }, { "epoch": 1.25, "grad_norm": 0.4908439815044403, "learning_rate": 0.00037753717641786437, "loss": 1.8662, "step": 37582 }, { "epoch": 1.25, "grad_norm": 0.4828163683414459, "learning_rate": 0.0003775270793890652, "loss": 1.8614, "step": 37583 }, { "epoch": 1.25, "grad_norm": 0.47758710384368896, "learning_rate": 0.0003775169822661594, "loss": 1.8094, "step": 37584 }, { "epoch": 1.25, "grad_norm": 0.4873977303504944, "learning_rate": 0.0003775068850491595, "loss": 1.7468, "step": 37585 }, { "epoch": 1.25, "grad_norm": 0.4606372117996216, "learning_rate": 0.0003774967877380777, "loss": 1.7149, "step": 37586 }, { "epoch": 1.25, "grad_norm": 0.4693088233470917, "learning_rate": 0.00037748669033292614, "loss": 1.8051, "step": 37587 }, { "epoch": 1.25, "grad_norm": 0.4688699543476105, "learning_rate": 0.00037747659283371725, "loss": 1.8564, "step": 37588 }, { "epoch": 1.25, "grad_norm": 0.48569849133491516, "learning_rate": 0.0003774664952404631, "loss": 1.8271, "step": 37589 }, { "epoch": 1.25, "grad_norm": 0.4873977601528168, "learning_rate": 0.00037745639755317606, "loss": 1.8249, "step": 37590 }, { "epoch": 1.25, "grad_norm": 0.4926437735557556, "learning_rate": 0.0003774462997718684, "loss": 1.8186, "step": 37591 }, { "epoch": 1.25, "grad_norm": 0.4865480363368988, "learning_rate": 0.0003774362018965523, "loss": 1.7532, "step": 37592 }, { "epoch": 1.25, "grad_norm": 0.4934377074241638, "learning_rate": 0.00037742610392724007, "loss": 1.7581, "step": 37593 }, { "epoch": 1.25, "grad_norm": 0.4839427173137665, "learning_rate": 0.0003774160058639439, "loss": 1.7289, "step": 37594 }, { "epoch": 1.25, "grad_norm": 0.47944220900535583, "learning_rate": 0.00037740590770667624, "loss": 1.7954, "step": 37595 }, { "epoch": 1.25, "grad_norm": 0.48469898104667664, "learning_rate": 0.00037739580945544916, "loss": 1.7478, "step": 37596 }, { "epoch": 1.25, "grad_norm": 0.4794965982437134, "learning_rate": 0.00037738571111027493, "loss": 1.7296, "step": 37597 }, { "epoch": 1.25, "grad_norm": 0.4626477062702179, "learning_rate": 0.0003773756126711659, "loss": 1.7645, "step": 37598 }, { "epoch": 1.25, "grad_norm": 0.457160621881485, "learning_rate": 0.0003773655141381342, "loss": 1.7502, "step": 37599 }, { "epoch": 1.25, "grad_norm": 0.48662903904914856, "learning_rate": 0.00037735541551119225, "loss": 1.8968, "step": 37600 }, { "epoch": 1.25, "grad_norm": 0.47670015692710876, "learning_rate": 0.0003773453167903522, "loss": 1.8404, "step": 37601 }, { "epoch": 1.25, "grad_norm": 0.4677537977695465, "learning_rate": 0.0003773352179756263, "loss": 1.7592, "step": 37602 }, { "epoch": 1.25, "grad_norm": 0.4717426002025604, "learning_rate": 0.0003773251190670268, "loss": 1.7988, "step": 37603 }, { "epoch": 1.25, "grad_norm": 0.4689912796020508, "learning_rate": 0.0003773150200645661, "loss": 1.8022, "step": 37604 }, { "epoch": 1.25, "grad_norm": 0.5045879483222961, "learning_rate": 0.0003773049209682564, "loss": 1.7096, "step": 37605 }, { "epoch": 1.25, "grad_norm": 0.4590112268924713, "learning_rate": 0.00037729482177810983, "loss": 1.7687, "step": 37606 }, { "epoch": 1.25, "grad_norm": 0.487297922372818, "learning_rate": 0.0003772847224941387, "loss": 1.7665, "step": 37607 }, { "epoch": 1.25, "grad_norm": 0.4637294113636017, "learning_rate": 0.00037727462311635537, "loss": 1.6786, "step": 37608 }, { "epoch": 1.25, "grad_norm": 0.4758160710334778, "learning_rate": 0.00037726452364477195, "loss": 1.7964, "step": 37609 }, { "epoch": 1.25, "grad_norm": 0.49671775102615356, "learning_rate": 0.00037725442407940087, "loss": 1.7241, "step": 37610 }, { "epoch": 1.25, "grad_norm": 0.5279693603515625, "learning_rate": 0.00037724432442025433, "loss": 1.8581, "step": 37611 }, { "epoch": 1.25, "grad_norm": 0.4658646583557129, "learning_rate": 0.00037723422466734446, "loss": 1.8098, "step": 37612 }, { "epoch": 1.25, "grad_norm": 0.48643729090690613, "learning_rate": 0.0003772241248206837, "loss": 1.8186, "step": 37613 }, { "epoch": 1.25, "grad_norm": 0.5011641979217529, "learning_rate": 0.0003772140248802842, "loss": 1.7432, "step": 37614 }, { "epoch": 1.25, "grad_norm": 0.5074026584625244, "learning_rate": 0.0003772039248461582, "loss": 1.8042, "step": 37615 }, { "epoch": 1.25, "grad_norm": 0.47295093536376953, "learning_rate": 0.0003771938247183181, "loss": 1.8035, "step": 37616 }, { "epoch": 1.25, "grad_norm": 0.4732208847999573, "learning_rate": 0.000377183724496776, "loss": 1.7489, "step": 37617 }, { "epoch": 1.25, "grad_norm": 0.476813942193985, "learning_rate": 0.00037717362418154425, "loss": 1.8554, "step": 37618 }, { "epoch": 1.25, "grad_norm": 0.490397185087204, "learning_rate": 0.00037716352377263515, "loss": 1.858, "step": 37619 }, { "epoch": 1.25, "grad_norm": 0.4923184812068939, "learning_rate": 0.0003771534232700608, "loss": 1.8477, "step": 37620 }, { "epoch": 1.25, "grad_norm": 0.4884285628795624, "learning_rate": 0.0003771433226738336, "loss": 1.803, "step": 37621 }, { "epoch": 1.25, "grad_norm": 0.4817573130130768, "learning_rate": 0.0003771332219839658, "loss": 1.7124, "step": 37622 }, { "epoch": 1.25, "grad_norm": 0.4925825595855713, "learning_rate": 0.0003771231212004697, "loss": 1.7471, "step": 37623 }, { "epoch": 1.25, "grad_norm": 0.48903727531433105, "learning_rate": 0.00037711302032335736, "loss": 1.6677, "step": 37624 }, { "epoch": 1.25, "grad_norm": 0.4856506884098053, "learning_rate": 0.00037710291935264125, "loss": 1.7981, "step": 37625 }, { "epoch": 1.25, "grad_norm": 0.49711498618125916, "learning_rate": 0.0003770928182883335, "loss": 1.8439, "step": 37626 }, { "epoch": 1.25, "grad_norm": 0.4689517021179199, "learning_rate": 0.0003770827171304465, "loss": 1.7397, "step": 37627 }, { "epoch": 1.25, "grad_norm": 0.5235012173652649, "learning_rate": 0.00037707261587899233, "loss": 1.811, "step": 37628 }, { "epoch": 1.25, "grad_norm": 0.48712486028671265, "learning_rate": 0.00037706251453398346, "loss": 1.8137, "step": 37629 }, { "epoch": 1.25, "grad_norm": 0.49301472306251526, "learning_rate": 0.00037705241309543206, "loss": 1.7558, "step": 37630 }, { "epoch": 1.25, "grad_norm": 0.4893029034137726, "learning_rate": 0.00037704231156335033, "loss": 1.8019, "step": 37631 }, { "epoch": 1.25, "grad_norm": 0.489393949508667, "learning_rate": 0.00037703220993775063, "loss": 1.8309, "step": 37632 }, { "epoch": 1.25, "grad_norm": 0.47510069608688354, "learning_rate": 0.0003770221082186451, "loss": 1.8591, "step": 37633 }, { "epoch": 1.25, "grad_norm": 0.47988423705101013, "learning_rate": 0.0003770120064060462, "loss": 1.7836, "step": 37634 }, { "epoch": 1.25, "grad_norm": 0.4896489381790161, "learning_rate": 0.00037700190449996594, "loss": 1.7045, "step": 37635 }, { "epoch": 1.25, "grad_norm": 0.47709202766418457, "learning_rate": 0.0003769918025004169, "loss": 1.7648, "step": 37636 }, { "epoch": 1.25, "grad_norm": 0.49134454131126404, "learning_rate": 0.00037698170040741097, "loss": 1.8123, "step": 37637 }, { "epoch": 1.25, "grad_norm": 0.46914756298065186, "learning_rate": 0.00037697159822096065, "loss": 1.8267, "step": 37638 }, { "epoch": 1.25, "grad_norm": 0.501055121421814, "learning_rate": 0.0003769614959410782, "loss": 1.7978, "step": 37639 }, { "epoch": 1.25, "grad_norm": 0.47982558608055115, "learning_rate": 0.00037695139356777574, "loss": 1.7016, "step": 37640 }, { "epoch": 1.25, "grad_norm": 0.49585700035095215, "learning_rate": 0.0003769412911010657, "loss": 1.8053, "step": 37641 }, { "epoch": 1.25, "grad_norm": 1.2518082857131958, "learning_rate": 0.0003769311885409602, "loss": 1.7879, "step": 37642 }, { "epoch": 1.25, "grad_norm": 0.485714852809906, "learning_rate": 0.0003769210858874717, "loss": 1.7741, "step": 37643 }, { "epoch": 1.25, "grad_norm": 0.471390962600708, "learning_rate": 0.00037691098314061226, "loss": 1.7941, "step": 37644 }, { "epoch": 1.25, "grad_norm": 0.49261897802352905, "learning_rate": 0.0003769008803003942, "loss": 1.7005, "step": 37645 }, { "epoch": 1.25, "grad_norm": 0.47750329971313477, "learning_rate": 0.0003768907773668299, "loss": 1.7157, "step": 37646 }, { "epoch": 1.25, "grad_norm": 0.4762209951877594, "learning_rate": 0.0003768806743399314, "loss": 1.7652, "step": 37647 }, { "epoch": 1.25, "grad_norm": 0.47742319107055664, "learning_rate": 0.00037687057121971114, "loss": 1.7691, "step": 37648 }, { "epoch": 1.25, "grad_norm": 0.49912163615226746, "learning_rate": 0.00037686046800618125, "loss": 1.8483, "step": 37649 }, { "epoch": 1.25, "grad_norm": 0.4841834008693695, "learning_rate": 0.00037685036469935424, "loss": 1.801, "step": 37650 }, { "epoch": 1.25, "grad_norm": 0.46357327699661255, "learning_rate": 0.0003768402612992421, "loss": 1.841, "step": 37651 }, { "epoch": 1.25, "grad_norm": 0.4925379455089569, "learning_rate": 0.00037683015780585723, "loss": 1.7686, "step": 37652 }, { "epoch": 1.25, "grad_norm": 0.4763683080673218, "learning_rate": 0.0003768200542192119, "loss": 1.709, "step": 37653 }, { "epoch": 1.25, "grad_norm": 0.4762871563434601, "learning_rate": 0.0003768099505393183, "loss": 1.8083, "step": 37654 }, { "epoch": 1.25, "grad_norm": 0.5455528497695923, "learning_rate": 0.0003767998467661887, "loss": 1.8255, "step": 37655 }, { "epoch": 1.25, "grad_norm": 0.48914793133735657, "learning_rate": 0.0003767897428998355, "loss": 1.7456, "step": 37656 }, { "epoch": 1.25, "grad_norm": 0.4879752993583679, "learning_rate": 0.0003767796389402708, "loss": 1.781, "step": 37657 }, { "epoch": 1.25, "grad_norm": 0.4887969195842743, "learning_rate": 0.0003767695348875069, "loss": 1.7794, "step": 37658 }, { "epoch": 1.25, "grad_norm": 0.46546465158462524, "learning_rate": 0.0003767594307415562, "loss": 1.7851, "step": 37659 }, { "epoch": 1.25, "grad_norm": 0.4812487065792084, "learning_rate": 0.00037674932650243084, "loss": 1.7769, "step": 37660 }, { "epoch": 1.25, "grad_norm": 0.4901387393474579, "learning_rate": 0.000376739222170143, "loss": 1.8277, "step": 37661 }, { "epoch": 1.25, "grad_norm": 0.4929116666316986, "learning_rate": 0.00037672911774470517, "loss": 1.7785, "step": 37662 }, { "epoch": 1.25, "grad_norm": 0.4916136860847473, "learning_rate": 0.0003767190132261294, "loss": 1.7095, "step": 37663 }, { "epoch": 1.25, "grad_norm": 0.4841560125350952, "learning_rate": 0.0003767089086144281, "loss": 1.7876, "step": 37664 }, { "epoch": 1.25, "grad_norm": 1.2459452152252197, "learning_rate": 0.00037669880390961354, "loss": 1.8539, "step": 37665 }, { "epoch": 1.25, "grad_norm": 0.48538264632225037, "learning_rate": 0.0003766886991116979, "loss": 1.8285, "step": 37666 }, { "epoch": 1.25, "grad_norm": 0.489046573638916, "learning_rate": 0.0003766785942206935, "loss": 1.823, "step": 37667 }, { "epoch": 1.25, "grad_norm": 0.4739014804363251, "learning_rate": 0.0003766684892366125, "loss": 1.7308, "step": 37668 }, { "epoch": 1.25, "grad_norm": 0.47029396891593933, "learning_rate": 0.0003766583841594673, "loss": 1.7836, "step": 37669 }, { "epoch": 1.25, "grad_norm": 0.46860870718955994, "learning_rate": 0.00037664827898927004, "loss": 1.801, "step": 37670 }, { "epoch": 1.25, "grad_norm": 0.4933830201625824, "learning_rate": 0.0003766381737260333, "loss": 1.8061, "step": 37671 }, { "epoch": 1.25, "grad_norm": 0.48555299639701843, "learning_rate": 0.00037662806836976883, "loss": 1.8758, "step": 37672 }, { "epoch": 1.25, "grad_norm": 0.4992016553878784, "learning_rate": 0.0003766179629204893, "loss": 1.8672, "step": 37673 }, { "epoch": 1.25, "grad_norm": 0.48295360803604126, "learning_rate": 0.0003766078573782069, "loss": 1.7909, "step": 37674 }, { "epoch": 1.25, "grad_norm": 0.4672561585903168, "learning_rate": 0.00037659775174293374, "loss": 1.7977, "step": 37675 }, { "epoch": 1.25, "grad_norm": 0.48318392038345337, "learning_rate": 0.00037658764601468224, "loss": 1.797, "step": 37676 }, { "epoch": 1.25, "grad_norm": 0.48653310537338257, "learning_rate": 0.0003765775401934647, "loss": 1.7329, "step": 37677 }, { "epoch": 1.25, "grad_norm": 0.4882456064224243, "learning_rate": 0.00037656743427929326, "loss": 1.7697, "step": 37678 }, { "epoch": 1.25, "grad_norm": 0.49851977825164795, "learning_rate": 0.00037655732827218013, "loss": 1.7325, "step": 37679 }, { "epoch": 1.25, "grad_norm": 0.4673767387866974, "learning_rate": 0.0003765472221721378, "loss": 1.8602, "step": 37680 }, { "epoch": 1.25, "grad_norm": 0.4629363417625427, "learning_rate": 0.0003765371159791784, "loss": 1.781, "step": 37681 }, { "epoch": 1.25, "grad_norm": 0.47311243414878845, "learning_rate": 0.0003765270096933142, "loss": 1.7893, "step": 37682 }, { "epoch": 1.25, "grad_norm": 0.49159082770347595, "learning_rate": 0.00037651690331455746, "loss": 1.8125, "step": 37683 }, { "epoch": 1.25, "grad_norm": 0.47612547874450684, "learning_rate": 0.0003765067968429205, "loss": 1.7413, "step": 37684 }, { "epoch": 1.25, "grad_norm": 0.47498026490211487, "learning_rate": 0.0003764966902784157, "loss": 1.6908, "step": 37685 }, { "epoch": 1.25, "grad_norm": 0.46783095598220825, "learning_rate": 0.00037648658362105495, "loss": 1.8258, "step": 37686 }, { "epoch": 1.25, "grad_norm": 0.4567335546016693, "learning_rate": 0.0003764764768708509, "loss": 1.7763, "step": 37687 }, { "epoch": 1.25, "grad_norm": 0.49349209666252136, "learning_rate": 0.0003764663700278156, "loss": 1.8202, "step": 37688 }, { "epoch": 1.25, "grad_norm": 0.4744623303413391, "learning_rate": 0.00037645626309196147, "loss": 1.7453, "step": 37689 }, { "epoch": 1.25, "grad_norm": 0.47838103771209717, "learning_rate": 0.0003764461560633007, "loss": 1.7525, "step": 37690 }, { "epoch": 1.25, "grad_norm": 0.5391734838485718, "learning_rate": 0.00037643604894184543, "loss": 1.7964, "step": 37691 }, { "epoch": 1.25, "grad_norm": 0.5006964802742004, "learning_rate": 0.0003764259417276083, "loss": 1.821, "step": 37692 }, { "epoch": 1.25, "grad_norm": 0.49488383531570435, "learning_rate": 0.00037641583442060114, "loss": 1.8767, "step": 37693 }, { "epoch": 1.25, "grad_norm": 0.49126964807510376, "learning_rate": 0.00037640572702083645, "loss": 1.8112, "step": 37694 }, { "epoch": 1.25, "grad_norm": 0.49223288893699646, "learning_rate": 0.0003763956195283265, "loss": 1.8004, "step": 37695 }, { "epoch": 1.25, "grad_norm": 0.4944187104701996, "learning_rate": 0.0003763855119430835, "loss": 1.7778, "step": 37696 }, { "epoch": 1.25, "grad_norm": 0.4872452914714813, "learning_rate": 0.00037637540426511967, "loss": 1.7756, "step": 37697 }, { "epoch": 1.25, "grad_norm": 0.45564523339271545, "learning_rate": 0.0003763652964944475, "loss": 1.7157, "step": 37698 }, { "epoch": 1.25, "grad_norm": 0.48003631830215454, "learning_rate": 0.00037635518863107906, "loss": 1.8361, "step": 37699 }, { "epoch": 1.25, "grad_norm": 0.48521915078163147, "learning_rate": 0.00037634508067502665, "loss": 1.7524, "step": 37700 }, { "epoch": 1.25, "grad_norm": 0.5103446245193481, "learning_rate": 0.0003763349726263026, "loss": 1.7964, "step": 37701 }, { "epoch": 1.25, "grad_norm": 0.47590917348861694, "learning_rate": 0.00037632486448491907, "loss": 1.8031, "step": 37702 }, { "epoch": 1.25, "grad_norm": 0.48499050736427307, "learning_rate": 0.0003763147562508885, "loss": 1.7955, "step": 37703 }, { "epoch": 1.25, "grad_norm": 0.4897209107875824, "learning_rate": 0.00037630464792422295, "loss": 1.7509, "step": 37704 }, { "epoch": 1.25, "grad_norm": 0.5328356027603149, "learning_rate": 0.0003762945395049349, "loss": 1.7236, "step": 37705 }, { "epoch": 1.25, "grad_norm": 0.48433876037597656, "learning_rate": 0.00037628443099303646, "loss": 1.7855, "step": 37706 }, { "epoch": 1.25, "grad_norm": 0.47352516651153564, "learning_rate": 0.00037627432238853997, "loss": 1.8148, "step": 37707 }, { "epoch": 1.25, "grad_norm": 0.4714064300060272, "learning_rate": 0.0003762642136914577, "loss": 1.7298, "step": 37708 }, { "epoch": 1.25, "grad_norm": 0.5521993041038513, "learning_rate": 0.00037625410490180184, "loss": 1.8169, "step": 37709 }, { "epoch": 1.25, "grad_norm": 0.49296894669532776, "learning_rate": 0.0003762439960195849, "loss": 1.7957, "step": 37710 }, { "epoch": 1.25, "grad_norm": 0.48195913434028625, "learning_rate": 0.0003762338870448188, "loss": 1.7505, "step": 37711 }, { "epoch": 1.25, "grad_norm": 0.471454918384552, "learning_rate": 0.00037622377797751614, "loss": 1.7124, "step": 37712 }, { "epoch": 1.25, "grad_norm": 0.49975070357322693, "learning_rate": 0.000376213668817689, "loss": 1.7869, "step": 37713 }, { "epoch": 1.25, "grad_norm": 0.4866403341293335, "learning_rate": 0.00037620355956534965, "loss": 1.8037, "step": 37714 }, { "epoch": 1.25, "grad_norm": 0.4893183410167694, "learning_rate": 0.0003761934502205105, "loss": 1.7465, "step": 37715 }, { "epoch": 1.25, "grad_norm": 0.48492568731307983, "learning_rate": 0.0003761833407831835, "loss": 1.797, "step": 37716 }, { "epoch": 1.25, "grad_norm": 0.47149643301963806, "learning_rate": 0.00037617323125338145, "loss": 1.8188, "step": 37717 }, { "epoch": 1.25, "grad_norm": 0.4721827507019043, "learning_rate": 0.0003761631216311161, "loss": 1.7756, "step": 37718 }, { "epoch": 1.25, "grad_norm": 0.4911785125732422, "learning_rate": 0.00037615301191640005, "loss": 1.723, "step": 37719 }, { "epoch": 1.25, "grad_norm": 0.5796388983726501, "learning_rate": 0.0003761429021092454, "loss": 1.7812, "step": 37720 }, { "epoch": 1.25, "grad_norm": 0.4838521182537079, "learning_rate": 0.0003761327922096645, "loss": 1.8565, "step": 37721 }, { "epoch": 1.26, "grad_norm": 0.4694344699382782, "learning_rate": 0.0003761226822176697, "loss": 1.8103, "step": 37722 }, { "epoch": 1.26, "grad_norm": 0.4715230166912079, "learning_rate": 0.00037611257213327306, "loss": 1.7878, "step": 37723 }, { "epoch": 1.26, "grad_norm": 0.5043979287147522, "learning_rate": 0.00037610246195648707, "loss": 1.7356, "step": 37724 }, { "epoch": 1.26, "grad_norm": 0.4947103261947632, "learning_rate": 0.00037609235168732387, "loss": 1.7422, "step": 37725 }, { "epoch": 1.26, "grad_norm": 0.46359920501708984, "learning_rate": 0.00037608224132579574, "loss": 1.7992, "step": 37726 }, { "epoch": 1.26, "grad_norm": 0.48922085762023926, "learning_rate": 0.00037607213087191495, "loss": 1.7372, "step": 37727 }, { "epoch": 1.26, "grad_norm": 0.48006299138069153, "learning_rate": 0.0003760620203256939, "loss": 1.7709, "step": 37728 }, { "epoch": 1.26, "grad_norm": 0.5208726525306702, "learning_rate": 0.00037605190968714475, "loss": 1.7245, "step": 37729 }, { "epoch": 1.26, "grad_norm": 0.48900970816612244, "learning_rate": 0.00037604179895627974, "loss": 1.8312, "step": 37730 }, { "epoch": 1.26, "grad_norm": 0.5112634897232056, "learning_rate": 0.0003760316881331111, "loss": 1.8057, "step": 37731 }, { "epoch": 1.26, "grad_norm": 0.5132275819778442, "learning_rate": 0.0003760215772176513, "loss": 1.7802, "step": 37732 }, { "epoch": 1.26, "grad_norm": 0.4727880358695984, "learning_rate": 0.0003760114662099126, "loss": 1.7766, "step": 37733 }, { "epoch": 1.26, "grad_norm": 0.479221373796463, "learning_rate": 0.000376001355109907, "loss": 1.7441, "step": 37734 }, { "epoch": 1.26, "grad_norm": 0.48219040036201477, "learning_rate": 0.0003759912439176471, "loss": 1.799, "step": 37735 }, { "epoch": 1.26, "grad_norm": 0.49794548749923706, "learning_rate": 0.0003759811326331449, "loss": 1.8312, "step": 37736 }, { "epoch": 1.26, "grad_norm": 0.5022107362747192, "learning_rate": 0.0003759710212564128, "loss": 1.7745, "step": 37737 }, { "epoch": 1.26, "grad_norm": 0.48907631635665894, "learning_rate": 0.00037596090978746325, "loss": 1.7759, "step": 37738 }, { "epoch": 1.26, "grad_norm": 0.4719092845916748, "learning_rate": 0.0003759507982263082, "loss": 1.7855, "step": 37739 }, { "epoch": 1.26, "grad_norm": 0.4813563823699951, "learning_rate": 0.00037594068657296006, "loss": 1.7913, "step": 37740 }, { "epoch": 1.26, "grad_norm": 0.49033254384994507, "learning_rate": 0.0003759305748274311, "loss": 1.7923, "step": 37741 }, { "epoch": 1.26, "grad_norm": 0.5029736757278442, "learning_rate": 0.00037592046298973376, "loss": 1.7911, "step": 37742 }, { "epoch": 1.26, "grad_norm": 0.501667320728302, "learning_rate": 0.00037591035105988003, "loss": 1.7514, "step": 37743 }, { "epoch": 1.26, "grad_norm": 0.4718777537345886, "learning_rate": 0.0003759002390378824, "loss": 1.7894, "step": 37744 }, { "epoch": 1.26, "grad_norm": 0.4896712899208069, "learning_rate": 0.00037589012692375296, "loss": 1.7568, "step": 37745 }, { "epoch": 1.26, "grad_norm": 0.503077507019043, "learning_rate": 0.00037588001471750417, "loss": 1.7729, "step": 37746 }, { "epoch": 1.26, "grad_norm": 0.48881107568740845, "learning_rate": 0.0003758699024191483, "loss": 1.8008, "step": 37747 }, { "epoch": 1.26, "grad_norm": 0.46927589178085327, "learning_rate": 0.0003758597900286974, "loss": 1.7285, "step": 37748 }, { "epoch": 1.26, "grad_norm": 0.4660116136074066, "learning_rate": 0.0003758496775461639, "loss": 1.7945, "step": 37749 }, { "epoch": 1.26, "grad_norm": 0.5145534873008728, "learning_rate": 0.00037583956497156014, "loss": 1.7983, "step": 37750 }, { "epoch": 1.26, "grad_norm": 0.46728596091270447, "learning_rate": 0.0003758294523048983, "loss": 1.8299, "step": 37751 }, { "epoch": 1.26, "grad_norm": 0.4716840386390686, "learning_rate": 0.00037581933954619064, "loss": 1.8156, "step": 37752 }, { "epoch": 1.26, "grad_norm": 0.4642919898033142, "learning_rate": 0.00037580922669544953, "loss": 1.7722, "step": 37753 }, { "epoch": 1.26, "grad_norm": 0.5006691813468933, "learning_rate": 0.00037579911375268713, "loss": 1.8715, "step": 37754 }, { "epoch": 1.26, "grad_norm": 0.5074424743652344, "learning_rate": 0.00037578900071791587, "loss": 1.8356, "step": 37755 }, { "epoch": 1.26, "grad_norm": 0.4675559997558594, "learning_rate": 0.00037577888759114783, "loss": 1.7809, "step": 37756 }, { "epoch": 1.26, "grad_norm": 0.4662802219390869, "learning_rate": 0.00037576877437239543, "loss": 1.7824, "step": 37757 }, { "epoch": 1.26, "grad_norm": 0.48154810070991516, "learning_rate": 0.0003757586610616709, "loss": 1.7853, "step": 37758 }, { "epoch": 1.26, "grad_norm": 0.4866044223308563, "learning_rate": 0.0003757485476589865, "loss": 1.7167, "step": 37759 }, { "epoch": 1.26, "grad_norm": 0.4508107900619507, "learning_rate": 0.0003757384341643546, "loss": 1.7232, "step": 37760 }, { "epoch": 1.26, "grad_norm": 0.4865209758281708, "learning_rate": 0.00037572832057778733, "loss": 1.7842, "step": 37761 }, { "epoch": 1.26, "grad_norm": 0.47122856974601746, "learning_rate": 0.00037571820689929705, "loss": 1.7482, "step": 37762 }, { "epoch": 1.26, "grad_norm": 0.48956987261772156, "learning_rate": 0.000375708093128896, "loss": 1.7375, "step": 37763 }, { "epoch": 1.26, "grad_norm": 0.4716961681842804, "learning_rate": 0.0003756979792665965, "loss": 1.742, "step": 37764 }, { "epoch": 1.26, "grad_norm": 0.47324368357658386, "learning_rate": 0.0003756878653124108, "loss": 1.7673, "step": 37765 }, { "epoch": 1.26, "grad_norm": 0.4739307165145874, "learning_rate": 0.00037567775126635115, "loss": 1.8076, "step": 37766 }, { "epoch": 1.26, "grad_norm": 0.46940726041793823, "learning_rate": 0.00037566763712843, "loss": 1.7982, "step": 37767 }, { "epoch": 1.26, "grad_norm": 0.49275678396224976, "learning_rate": 0.0003756575228986593, "loss": 1.8542, "step": 37768 }, { "epoch": 1.26, "grad_norm": 0.4809470474720001, "learning_rate": 0.0003756474085770516, "loss": 1.7525, "step": 37769 }, { "epoch": 1.26, "grad_norm": 0.4893113374710083, "learning_rate": 0.00037563729416361914, "loss": 1.7411, "step": 37770 }, { "epoch": 1.26, "grad_norm": 0.49478378891944885, "learning_rate": 0.0003756271796583741, "loss": 1.794, "step": 37771 }, { "epoch": 1.26, "grad_norm": 0.470849871635437, "learning_rate": 0.0003756170650613288, "loss": 1.7776, "step": 37772 }, { "epoch": 1.26, "grad_norm": 0.4817931056022644, "learning_rate": 0.0003756069503724955, "loss": 1.7475, "step": 37773 }, { "epoch": 1.26, "grad_norm": 0.47227945923805237, "learning_rate": 0.00037559683559188655, "loss": 1.7346, "step": 37774 }, { "epoch": 1.26, "grad_norm": 0.5017435550689697, "learning_rate": 0.00037558672071951414, "loss": 1.8155, "step": 37775 }, { "epoch": 1.26, "grad_norm": 0.4969259798526764, "learning_rate": 0.00037557660575539064, "loss": 1.7789, "step": 37776 }, { "epoch": 1.26, "grad_norm": 0.49241170287132263, "learning_rate": 0.0003755664906995283, "loss": 1.7398, "step": 37777 }, { "epoch": 1.26, "grad_norm": 0.467796266078949, "learning_rate": 0.0003755563755519393, "loss": 1.7484, "step": 37778 }, { "epoch": 1.26, "grad_norm": 0.4832005798816681, "learning_rate": 0.000375546260312636, "loss": 1.7888, "step": 37779 }, { "epoch": 1.26, "grad_norm": 0.49224141240119934, "learning_rate": 0.0003755361449816307, "loss": 1.7375, "step": 37780 }, { "epoch": 1.26, "grad_norm": 0.4599640369415283, "learning_rate": 0.00037552602955893564, "loss": 1.7751, "step": 37781 }, { "epoch": 1.26, "grad_norm": 0.4768553376197815, "learning_rate": 0.0003755159140445631, "loss": 1.7444, "step": 37782 }, { "epoch": 1.26, "grad_norm": 0.48979559540748596, "learning_rate": 0.00037550579843852536, "loss": 1.7963, "step": 37783 }, { "epoch": 1.26, "grad_norm": 0.4784855842590332, "learning_rate": 0.00037549568274083473, "loss": 1.7683, "step": 37784 }, { "epoch": 1.26, "grad_norm": 0.48293405771255493, "learning_rate": 0.0003754855669515035, "loss": 1.7404, "step": 37785 }, { "epoch": 1.26, "grad_norm": 0.4881899058818817, "learning_rate": 0.0003754754510705439, "loss": 1.7855, "step": 37786 }, { "epoch": 1.26, "grad_norm": 0.5568994283676147, "learning_rate": 0.0003754653350979681, "loss": 1.8279, "step": 37787 }, { "epoch": 1.26, "grad_norm": 0.4840477406978607, "learning_rate": 0.0003754552190337887, "loss": 1.7814, "step": 37788 }, { "epoch": 1.26, "grad_norm": 0.47804614901542664, "learning_rate": 0.0003754451028780176, "loss": 1.8446, "step": 37789 }, { "epoch": 1.26, "grad_norm": 0.47822949290275574, "learning_rate": 0.00037543498663066737, "loss": 1.7889, "step": 37790 }, { "epoch": 1.26, "grad_norm": 0.4994598627090454, "learning_rate": 0.00037542487029175014, "loss": 1.8153, "step": 37791 }, { "epoch": 1.26, "grad_norm": 0.49784329533576965, "learning_rate": 0.0003754147538612782, "loss": 1.7932, "step": 37792 }, { "epoch": 1.26, "grad_norm": 0.48771655559539795, "learning_rate": 0.0003754046373392639, "loss": 1.7866, "step": 37793 }, { "epoch": 1.26, "grad_norm": 0.4969419538974762, "learning_rate": 0.0003753945207257195, "loss": 1.7885, "step": 37794 }, { "epoch": 1.26, "grad_norm": 0.47628870606422424, "learning_rate": 0.0003753844040206573, "loss": 1.7931, "step": 37795 }, { "epoch": 1.26, "grad_norm": 0.48956748843193054, "learning_rate": 0.0003753742872240894, "loss": 1.7352, "step": 37796 }, { "epoch": 1.26, "grad_norm": 0.4727891683578491, "learning_rate": 0.00037536417033602837, "loss": 1.8178, "step": 37797 }, { "epoch": 1.26, "grad_norm": 0.4789227247238159, "learning_rate": 0.0003753540533564863, "loss": 1.7493, "step": 37798 }, { "epoch": 1.26, "grad_norm": 0.4805153012275696, "learning_rate": 0.0003753439362854754, "loss": 1.8263, "step": 37799 }, { "epoch": 1.26, "grad_norm": 0.4825277328491211, "learning_rate": 0.0003753338191230082, "loss": 1.8559, "step": 37800 }, { "epoch": 1.26, "grad_norm": 0.49490705132484436, "learning_rate": 0.0003753237018690968, "loss": 1.7728, "step": 37801 }, { "epoch": 1.26, "grad_norm": 0.47836920619010925, "learning_rate": 0.00037531358452375357, "loss": 1.7735, "step": 37802 }, { "epoch": 1.26, "grad_norm": 0.4751419723033905, "learning_rate": 0.0003753034670869906, "loss": 1.7783, "step": 37803 }, { "epoch": 1.26, "grad_norm": 0.49285387992858887, "learning_rate": 0.00037529334955882043, "loss": 1.804, "step": 37804 }, { "epoch": 1.26, "grad_norm": 0.46937263011932373, "learning_rate": 0.0003752832319392552, "loss": 1.6301, "step": 37805 }, { "epoch": 1.26, "grad_norm": 0.5028948187828064, "learning_rate": 0.00037527311422830723, "loss": 1.8428, "step": 37806 }, { "epoch": 1.26, "grad_norm": 0.48196282982826233, "learning_rate": 0.0003752629964259888, "loss": 1.7869, "step": 37807 }, { "epoch": 1.26, "grad_norm": 0.4766954183578491, "learning_rate": 0.00037525287853231213, "loss": 1.7129, "step": 37808 }, { "epoch": 1.26, "grad_norm": 0.4633477032184601, "learning_rate": 0.0003752427605472896, "loss": 1.7679, "step": 37809 }, { "epoch": 1.26, "grad_norm": 0.48692476749420166, "learning_rate": 0.00037523264247093347, "loss": 1.7372, "step": 37810 }, { "epoch": 1.26, "grad_norm": 0.48760128021240234, "learning_rate": 0.00037522252430325596, "loss": 1.6874, "step": 37811 }, { "epoch": 1.26, "grad_norm": 0.4713480472564697, "learning_rate": 0.00037521240604426943, "loss": 1.7329, "step": 37812 }, { "epoch": 1.26, "grad_norm": 0.49226826429367065, "learning_rate": 0.0003752022876939861, "loss": 1.8078, "step": 37813 }, { "epoch": 1.26, "grad_norm": 0.48367300629615784, "learning_rate": 0.00037519216925241823, "loss": 1.7748, "step": 37814 }, { "epoch": 1.26, "grad_norm": 0.49336621165275574, "learning_rate": 0.0003751820507195782, "loss": 1.7892, "step": 37815 }, { "epoch": 1.26, "grad_norm": 0.4819805324077606, "learning_rate": 0.0003751719320954782, "loss": 1.8059, "step": 37816 }, { "epoch": 1.26, "grad_norm": 0.48391664028167725, "learning_rate": 0.00037516181338013055, "loss": 1.7706, "step": 37817 }, { "epoch": 1.26, "grad_norm": 0.5088890790939331, "learning_rate": 0.00037515169457354755, "loss": 1.8069, "step": 37818 }, { "epoch": 1.26, "grad_norm": 0.489096075296402, "learning_rate": 0.00037514157567574146, "loss": 1.7259, "step": 37819 }, { "epoch": 1.26, "grad_norm": 0.46834155917167664, "learning_rate": 0.0003751314566867246, "loss": 1.7634, "step": 37820 }, { "epoch": 1.26, "grad_norm": 0.48243820667266846, "learning_rate": 0.0003751213376065091, "loss": 1.7789, "step": 37821 }, { "epoch": 1.26, "grad_norm": 0.4845081865787506, "learning_rate": 0.0003751112184351076, "loss": 1.7812, "step": 37822 }, { "epoch": 1.26, "grad_norm": 0.4851666986942291, "learning_rate": 0.000375101099172532, "loss": 1.8044, "step": 37823 }, { "epoch": 1.26, "grad_norm": 0.4837947487831116, "learning_rate": 0.00037509097981879464, "loss": 1.7942, "step": 37824 }, { "epoch": 1.26, "grad_norm": 0.47439321875572205, "learning_rate": 0.0003750808603739081, "loss": 1.7982, "step": 37825 }, { "epoch": 1.26, "grad_norm": 0.4856429398059845, "learning_rate": 0.0003750707408378843, "loss": 1.7299, "step": 37826 }, { "epoch": 1.26, "grad_norm": 0.4826918840408325, "learning_rate": 0.00037506062121073577, "loss": 1.7675, "step": 37827 }, { "epoch": 1.26, "grad_norm": 0.4703288972377777, "learning_rate": 0.0003750505014924746, "loss": 1.8415, "step": 37828 }, { "epoch": 1.26, "grad_norm": 0.4787801504135132, "learning_rate": 0.00037504038168311327, "loss": 1.7807, "step": 37829 }, { "epoch": 1.26, "grad_norm": 0.48044553399086, "learning_rate": 0.0003750302617826639, "loss": 1.7774, "step": 37830 }, { "epoch": 1.26, "grad_norm": 0.47914063930511475, "learning_rate": 0.00037502014179113894, "loss": 1.8305, "step": 37831 }, { "epoch": 1.26, "grad_norm": 0.4913575351238251, "learning_rate": 0.0003750100217085506, "loss": 1.8062, "step": 37832 }, { "epoch": 1.26, "grad_norm": 0.4927441477775574, "learning_rate": 0.00037499990153491095, "loss": 1.7485, "step": 37833 }, { "epoch": 1.26, "grad_norm": 0.47898441553115845, "learning_rate": 0.0003749897812702327, "loss": 1.8775, "step": 37834 }, { "epoch": 1.26, "grad_norm": 0.4638555943965912, "learning_rate": 0.0003749796609145278, "loss": 1.7181, "step": 37835 }, { "epoch": 1.26, "grad_norm": 0.46785497665405273, "learning_rate": 0.0003749695404678086, "loss": 1.8165, "step": 37836 }, { "epoch": 1.26, "grad_norm": 0.48194506764411926, "learning_rate": 0.00037495941993008746, "loss": 1.7969, "step": 37837 }, { "epoch": 1.26, "grad_norm": 0.473836213350296, "learning_rate": 0.00037494929930137667, "loss": 1.882, "step": 37838 }, { "epoch": 1.26, "grad_norm": 0.4827826917171478, "learning_rate": 0.00037493917858168847, "loss": 1.7177, "step": 37839 }, { "epoch": 1.26, "grad_norm": 0.4662264585494995, "learning_rate": 0.0003749290577710351, "loss": 1.8212, "step": 37840 }, { "epoch": 1.26, "grad_norm": 0.48622721433639526, "learning_rate": 0.00037491893686942895, "loss": 1.712, "step": 37841 }, { "epoch": 1.26, "grad_norm": 0.5061851739883423, "learning_rate": 0.00037490881587688224, "loss": 1.7391, "step": 37842 }, { "epoch": 1.26, "grad_norm": 0.4728964865207672, "learning_rate": 0.0003748986947934072, "loss": 1.7836, "step": 37843 }, { "epoch": 1.26, "grad_norm": 0.4808502495288849, "learning_rate": 0.0003748885736190162, "loss": 1.8198, "step": 37844 }, { "epoch": 1.26, "grad_norm": 0.5004013776779175, "learning_rate": 0.0003748784523537216, "loss": 1.7214, "step": 37845 }, { "epoch": 1.26, "grad_norm": 0.47850364446640015, "learning_rate": 0.00037486833099753547, "loss": 1.7397, "step": 37846 }, { "epoch": 1.26, "grad_norm": 0.4769133925437927, "learning_rate": 0.0003748582095504703, "loss": 1.8296, "step": 37847 }, { "epoch": 1.26, "grad_norm": 0.4719730615615845, "learning_rate": 0.00037484808801253823, "loss": 1.7786, "step": 37848 }, { "epoch": 1.26, "grad_norm": 0.4985866844654083, "learning_rate": 0.00037483796638375166, "loss": 1.8183, "step": 37849 }, { "epoch": 1.26, "grad_norm": 0.46637603640556335, "learning_rate": 0.0003748278446641228, "loss": 1.763, "step": 37850 }, { "epoch": 1.26, "grad_norm": 0.48153626918792725, "learning_rate": 0.00037481772285366397, "loss": 1.8122, "step": 37851 }, { "epoch": 1.26, "grad_norm": 0.4872012734413147, "learning_rate": 0.00037480760095238747, "loss": 1.7633, "step": 37852 }, { "epoch": 1.26, "grad_norm": 0.4802727997303009, "learning_rate": 0.0003747974789603055, "loss": 1.8407, "step": 37853 }, { "epoch": 1.26, "grad_norm": 0.482438862323761, "learning_rate": 0.00037478735687743045, "loss": 1.7767, "step": 37854 }, { "epoch": 1.26, "grad_norm": 0.4961619973182678, "learning_rate": 0.0003747772347037746, "loss": 1.8058, "step": 37855 }, { "epoch": 1.26, "grad_norm": 0.5063895583152771, "learning_rate": 0.0003747671124393501, "loss": 1.7634, "step": 37856 }, { "epoch": 1.26, "grad_norm": 0.4764181971549988, "learning_rate": 0.00037475699008416956, "loss": 1.7939, "step": 37857 }, { "epoch": 1.26, "grad_norm": 0.5069316029548645, "learning_rate": 0.0003747468676382448, "loss": 1.8328, "step": 37858 }, { "epoch": 1.26, "grad_norm": 0.4772307872772217, "learning_rate": 0.0003747367451015885, "loss": 1.8044, "step": 37859 }, { "epoch": 1.26, "grad_norm": 0.4763405919075012, "learning_rate": 0.0003747266224742127, "loss": 1.7941, "step": 37860 }, { "epoch": 1.26, "grad_norm": 0.48701098561286926, "learning_rate": 0.0003747164997561299, "loss": 1.8809, "step": 37861 }, { "epoch": 1.26, "grad_norm": 0.4976966083049774, "learning_rate": 0.0003747063769473522, "loss": 1.7935, "step": 37862 }, { "epoch": 1.26, "grad_norm": 0.47891825437545776, "learning_rate": 0.00037469625404789204, "loss": 1.7937, "step": 37863 }, { "epoch": 1.26, "grad_norm": 0.48621758818626404, "learning_rate": 0.00037468613105776165, "loss": 1.7533, "step": 37864 }, { "epoch": 1.26, "grad_norm": 0.47299981117248535, "learning_rate": 0.0003746760079769732, "loss": 1.7732, "step": 37865 }, { "epoch": 1.26, "grad_norm": 0.49452707171440125, "learning_rate": 0.00037466588480553916, "loss": 1.7679, "step": 37866 }, { "epoch": 1.26, "grad_norm": 0.5059598684310913, "learning_rate": 0.00037465576154347167, "loss": 1.759, "step": 37867 }, { "epoch": 1.26, "grad_norm": 0.4837562143802643, "learning_rate": 0.0003746456381907831, "loss": 1.8042, "step": 37868 }, { "epoch": 1.26, "grad_norm": 0.4958251714706421, "learning_rate": 0.00037463551474748575, "loss": 1.7858, "step": 37869 }, { "epoch": 1.26, "grad_norm": 0.48926740884780884, "learning_rate": 0.00037462539121359197, "loss": 1.8072, "step": 37870 }, { "epoch": 1.26, "grad_norm": 0.4857048988342285, "learning_rate": 0.00037461526758911375, "loss": 1.792, "step": 37871 }, { "epoch": 1.26, "grad_norm": 0.4847005605697632, "learning_rate": 0.0003746051438740638, "loss": 1.7366, "step": 37872 }, { "epoch": 1.26, "grad_norm": 0.4947255849838257, "learning_rate": 0.0003745950200684541, "loss": 1.7236, "step": 37873 }, { "epoch": 1.26, "grad_norm": 0.48917272686958313, "learning_rate": 0.0003745848961722969, "loss": 1.8601, "step": 37874 }, { "epoch": 1.26, "grad_norm": 0.5148971676826477, "learning_rate": 0.0003745747721856049, "loss": 1.7974, "step": 37875 }, { "epoch": 1.26, "grad_norm": 0.4896571636199951, "learning_rate": 0.00037456464810838995, "loss": 1.7611, "step": 37876 }, { "epoch": 1.26, "grad_norm": 0.502784788608551, "learning_rate": 0.00037455452394066454, "loss": 1.8058, "step": 37877 }, { "epoch": 1.26, "grad_norm": 0.496439129114151, "learning_rate": 0.00037454439968244094, "loss": 1.7318, "step": 37878 }, { "epoch": 1.26, "grad_norm": 0.48558369278907776, "learning_rate": 0.00037453427533373133, "loss": 1.7111, "step": 37879 }, { "epoch": 1.26, "grad_norm": 0.4913750886917114, "learning_rate": 0.00037452415089454826, "loss": 1.7106, "step": 37880 }, { "epoch": 1.26, "grad_norm": 0.5120804309844971, "learning_rate": 0.00037451402636490364, "loss": 1.8115, "step": 37881 }, { "epoch": 1.26, "grad_norm": 0.4710780084133148, "learning_rate": 0.0003745039017448101, "loss": 1.7479, "step": 37882 }, { "epoch": 1.26, "grad_norm": 0.4824703335762024, "learning_rate": 0.00037449377703427974, "loss": 1.8683, "step": 37883 }, { "epoch": 1.26, "grad_norm": 0.4836716651916504, "learning_rate": 0.0003744836522333251, "loss": 1.8126, "step": 37884 }, { "epoch": 1.26, "grad_norm": 0.49381405115127563, "learning_rate": 0.0003744735273419581, "loss": 1.7816, "step": 37885 }, { "epoch": 1.26, "grad_norm": 0.4953208863735199, "learning_rate": 0.00037446340236019124, "loss": 1.8387, "step": 37886 }, { "epoch": 1.26, "grad_norm": 0.5252173542976379, "learning_rate": 0.0003744532772880368, "loss": 1.8191, "step": 37887 }, { "epoch": 1.26, "grad_norm": 0.4894293248653412, "learning_rate": 0.000374443152125507, "loss": 1.7829, "step": 37888 }, { "epoch": 1.26, "grad_norm": 0.49702826142311096, "learning_rate": 0.00037443302687261424, "loss": 1.781, "step": 37889 }, { "epoch": 1.26, "grad_norm": 0.4908411502838135, "learning_rate": 0.00037442290152937076, "loss": 1.8649, "step": 37890 }, { "epoch": 1.26, "grad_norm": 0.4902322292327881, "learning_rate": 0.00037441277609578873, "loss": 1.8184, "step": 37891 }, { "epoch": 1.26, "grad_norm": 0.4747668206691742, "learning_rate": 0.0003744026505718807, "loss": 1.7932, "step": 37892 }, { "epoch": 1.26, "grad_norm": 0.4747644066810608, "learning_rate": 0.0003743925249576587, "loss": 1.6752, "step": 37893 }, { "epoch": 1.26, "grad_norm": 0.5126371383666992, "learning_rate": 0.00037438239925313525, "loss": 1.859, "step": 37894 }, { "epoch": 1.26, "grad_norm": 0.5326108336448669, "learning_rate": 0.0003743722734583225, "loss": 1.8142, "step": 37895 }, { "epoch": 1.26, "grad_norm": 0.5265844464302063, "learning_rate": 0.0003743621475732327, "loss": 1.8472, "step": 37896 }, { "epoch": 1.26, "grad_norm": 0.4899388551712036, "learning_rate": 0.00037435202159787827, "loss": 1.8011, "step": 37897 }, { "epoch": 1.26, "grad_norm": 0.49466654658317566, "learning_rate": 0.00037434189553227137, "loss": 1.7819, "step": 37898 }, { "epoch": 1.26, "grad_norm": 0.5082290172576904, "learning_rate": 0.00037433176937642443, "loss": 1.7555, "step": 37899 }, { "epoch": 1.26, "grad_norm": 0.4791710078716278, "learning_rate": 0.0003743216431303497, "loss": 1.8109, "step": 37900 }, { "epoch": 1.26, "grad_norm": 0.4986652731895447, "learning_rate": 0.00037431151679405937, "loss": 1.8231, "step": 37901 }, { "epoch": 1.26, "grad_norm": 0.4860907793045044, "learning_rate": 0.00037430139036756586, "loss": 1.8002, "step": 37902 }, { "epoch": 1.26, "grad_norm": 0.48108839988708496, "learning_rate": 0.0003742912638508813, "loss": 1.8391, "step": 37903 }, { "epoch": 1.26, "grad_norm": 0.4971482753753662, "learning_rate": 0.0003742811372440182, "loss": 1.763, "step": 37904 }, { "epoch": 1.26, "grad_norm": 0.46635937690734863, "learning_rate": 0.0003742710105469888, "loss": 1.739, "step": 37905 }, { "epoch": 1.26, "grad_norm": 0.4725353419780731, "learning_rate": 0.0003742608837598052, "loss": 1.7525, "step": 37906 }, { "epoch": 1.26, "grad_norm": 0.4554479420185089, "learning_rate": 0.00037425075688247994, "loss": 1.7351, "step": 37907 }, { "epoch": 1.26, "grad_norm": 0.47084349393844604, "learning_rate": 0.0003742406299150251, "loss": 1.877, "step": 37908 }, { "epoch": 1.26, "grad_norm": 0.48395755887031555, "learning_rate": 0.00037423050285745313, "loss": 1.7701, "step": 37909 }, { "epoch": 1.26, "grad_norm": 0.46384450793266296, "learning_rate": 0.00037422037570977626, "loss": 1.7795, "step": 37910 }, { "epoch": 1.26, "grad_norm": 0.47748827934265137, "learning_rate": 0.00037421024847200684, "loss": 1.7256, "step": 37911 }, { "epoch": 1.26, "grad_norm": 0.49097833037376404, "learning_rate": 0.0003742001211441571, "loss": 1.7553, "step": 37912 }, { "epoch": 1.26, "grad_norm": 0.4780507981777191, "learning_rate": 0.0003741899937262392, "loss": 1.7744, "step": 37913 }, { "epoch": 1.26, "grad_norm": 0.47967734932899475, "learning_rate": 0.0003741798662182658, "loss": 1.7501, "step": 37914 }, { "epoch": 1.26, "grad_norm": 0.48936548829078674, "learning_rate": 0.00037416973862024883, "loss": 1.7124, "step": 37915 }, { "epoch": 1.26, "grad_norm": 0.4900624752044678, "learning_rate": 0.00037415961093220076, "loss": 1.8127, "step": 37916 }, { "epoch": 1.26, "grad_norm": 0.47516465187072754, "learning_rate": 0.00037414948315413383, "loss": 1.7919, "step": 37917 }, { "epoch": 1.26, "grad_norm": 0.48385998606681824, "learning_rate": 0.00037413935528606034, "loss": 1.6727, "step": 37918 }, { "epoch": 1.26, "grad_norm": 0.489314466714859, "learning_rate": 0.0003741292273279927, "loss": 1.7393, "step": 37919 }, { "epoch": 1.26, "grad_norm": 0.45643922686576843, "learning_rate": 0.000374119099279943, "loss": 1.7974, "step": 37920 }, { "epoch": 1.26, "grad_norm": 0.4908600449562073, "learning_rate": 0.00037410897114192366, "loss": 1.7123, "step": 37921 }, { "epoch": 1.26, "grad_norm": 0.4714260697364807, "learning_rate": 0.0003740988429139469, "loss": 1.8279, "step": 37922 }, { "epoch": 1.26, "grad_norm": 0.5189348459243774, "learning_rate": 0.00037408871459602516, "loss": 1.8631, "step": 37923 }, { "epoch": 1.26, "grad_norm": 0.4902331233024597, "learning_rate": 0.0003740785861881705, "loss": 1.888, "step": 37924 }, { "epoch": 1.26, "grad_norm": 0.4703313410282135, "learning_rate": 0.0003740684576903956, "loss": 1.7524, "step": 37925 }, { "epoch": 1.26, "grad_norm": 0.4564642906188965, "learning_rate": 0.0003740583291027123, "loss": 1.7408, "step": 37926 }, { "epoch": 1.26, "grad_norm": 0.48914462327957153, "learning_rate": 0.0003740482004251331, "loss": 1.8173, "step": 37927 }, { "epoch": 1.26, "grad_norm": 0.4863841235637665, "learning_rate": 0.00037403807165767034, "loss": 1.7894, "step": 37928 }, { "epoch": 1.26, "grad_norm": 0.48335185647010803, "learning_rate": 0.00037402794280033626, "loss": 1.8002, "step": 37929 }, { "epoch": 1.26, "grad_norm": 0.4858759641647339, "learning_rate": 0.0003740178138531432, "loss": 1.6571, "step": 37930 }, { "epoch": 1.26, "grad_norm": 0.4682733714580536, "learning_rate": 0.00037400768481610333, "loss": 1.7718, "step": 37931 }, { "epoch": 1.26, "grad_norm": 0.4791816473007202, "learning_rate": 0.0003739975556892292, "loss": 1.6997, "step": 37932 }, { "epoch": 1.26, "grad_norm": 0.475693017244339, "learning_rate": 0.0003739874264725328, "loss": 1.78, "step": 37933 }, { "epoch": 1.26, "grad_norm": 0.486113041639328, "learning_rate": 0.00037397729716602665, "loss": 1.7076, "step": 37934 }, { "epoch": 1.26, "grad_norm": 0.5049495100975037, "learning_rate": 0.0003739671677697229, "loss": 1.8026, "step": 37935 }, { "epoch": 1.26, "grad_norm": 0.5145101547241211, "learning_rate": 0.0003739570382836339, "loss": 1.7649, "step": 37936 }, { "epoch": 1.26, "grad_norm": 0.4817187488079071, "learning_rate": 0.000373946908707772, "loss": 1.7772, "step": 37937 }, { "epoch": 1.26, "grad_norm": 0.4777890145778656, "learning_rate": 0.00037393677904214937, "loss": 1.7544, "step": 37938 }, { "epoch": 1.26, "grad_norm": 0.4789254367351532, "learning_rate": 0.00037392664928677854, "loss": 1.7206, "step": 37939 }, { "epoch": 1.26, "grad_norm": 0.4832230508327484, "learning_rate": 0.00037391651944167155, "loss": 1.7985, "step": 37940 }, { "epoch": 1.26, "grad_norm": 0.49016427993774414, "learning_rate": 0.00037390638950684076, "loss": 1.7632, "step": 37941 }, { "epoch": 1.26, "grad_norm": 0.4816139340400696, "learning_rate": 0.0003738962594822986, "loss": 1.7877, "step": 37942 }, { "epoch": 1.26, "grad_norm": 0.494450181722641, "learning_rate": 0.00037388612936805715, "loss": 1.8634, "step": 37943 }, { "epoch": 1.26, "grad_norm": 0.47521907091140747, "learning_rate": 0.0003738759991641289, "loss": 1.7474, "step": 37944 }, { "epoch": 1.26, "grad_norm": 0.46131327748298645, "learning_rate": 0.000373865868870526, "loss": 1.7399, "step": 37945 }, { "epoch": 1.26, "grad_norm": 0.49487176537513733, "learning_rate": 0.00037385573848726096, "loss": 1.7529, "step": 37946 }, { "epoch": 1.26, "grad_norm": 0.4824608564376831, "learning_rate": 0.0003738456080143458, "loss": 1.8119, "step": 37947 }, { "epoch": 1.26, "grad_norm": 0.5043852925300598, "learning_rate": 0.00037383547745179304, "loss": 1.7908, "step": 37948 }, { "epoch": 1.26, "grad_norm": 0.4820714294910431, "learning_rate": 0.0003738253467996149, "loss": 1.8078, "step": 37949 }, { "epoch": 1.26, "grad_norm": 0.47482356429100037, "learning_rate": 0.0003738152160578235, "loss": 1.7977, "step": 37950 }, { "epoch": 1.26, "grad_norm": 0.47575992345809937, "learning_rate": 0.0003738050852264315, "loss": 1.8191, "step": 37951 }, { "epoch": 1.26, "grad_norm": 0.4856676459312439, "learning_rate": 0.0003737949543054509, "loss": 1.7755, "step": 37952 }, { "epoch": 1.26, "grad_norm": 0.4841604232788086, "learning_rate": 0.0003737848232948941, "loss": 1.7569, "step": 37953 }, { "epoch": 1.26, "grad_norm": 0.5104914903640747, "learning_rate": 0.00037377469219477345, "loss": 1.8107, "step": 37954 }, { "epoch": 1.26, "grad_norm": 0.5048873424530029, "learning_rate": 0.00037376456100510116, "loss": 1.759, "step": 37955 }, { "epoch": 1.26, "grad_norm": 0.49932530522346497, "learning_rate": 0.0003737544297258896, "loss": 1.8114, "step": 37956 }, { "epoch": 1.26, "grad_norm": 0.5016623139381409, "learning_rate": 0.00037374429835715096, "loss": 1.8239, "step": 37957 }, { "epoch": 1.26, "grad_norm": 0.46061933040618896, "learning_rate": 0.0003737341668988977, "loss": 1.7061, "step": 37958 }, { "epoch": 1.26, "grad_norm": 0.47710055112838745, "learning_rate": 0.00037372403535114196, "loss": 1.7609, "step": 37959 }, { "epoch": 1.26, "grad_norm": 0.4840924143791199, "learning_rate": 0.00037371390371389605, "loss": 1.7852, "step": 37960 }, { "epoch": 1.26, "grad_norm": 0.4658477008342743, "learning_rate": 0.0003737037719871724, "loss": 1.7125, "step": 37961 }, { "epoch": 1.26, "grad_norm": 0.4770635664463043, "learning_rate": 0.0003736936401709833, "loss": 1.7734, "step": 37962 }, { "epoch": 1.26, "grad_norm": 0.48508596420288086, "learning_rate": 0.0003736835082653409, "loss": 1.7919, "step": 37963 }, { "epoch": 1.26, "grad_norm": 0.4741109311580658, "learning_rate": 0.00037367337627025756, "loss": 1.7347, "step": 37964 }, { "epoch": 1.26, "grad_norm": 0.480060875415802, "learning_rate": 0.00037366324418574556, "loss": 1.772, "step": 37965 }, { "epoch": 1.26, "grad_norm": 0.4775494337081909, "learning_rate": 0.0003736531120118173, "loss": 1.7563, "step": 37966 }, { "epoch": 1.26, "grad_norm": 0.49312639236450195, "learning_rate": 0.000373642979748485, "loss": 1.7184, "step": 37967 }, { "epoch": 1.26, "grad_norm": 0.49037671089172363, "learning_rate": 0.00037363284739576097, "loss": 1.8015, "step": 37968 }, { "epoch": 1.26, "grad_norm": 0.4802922010421753, "learning_rate": 0.0003736227149536575, "loss": 1.7976, "step": 37969 }, { "epoch": 1.26, "grad_norm": 0.4859248697757721, "learning_rate": 0.0003736125824221869, "loss": 1.834, "step": 37970 }, { "epoch": 1.26, "grad_norm": 0.5194358825683594, "learning_rate": 0.0003736024498013615, "loss": 1.7832, "step": 37971 }, { "epoch": 1.26, "grad_norm": 0.48919418454170227, "learning_rate": 0.0003735923170911936, "loss": 1.8091, "step": 37972 }, { "epoch": 1.26, "grad_norm": 0.501929759979248, "learning_rate": 0.00037358218429169536, "loss": 1.7661, "step": 37973 }, { "epoch": 1.26, "grad_norm": 0.4629187285900116, "learning_rate": 0.00037357205140287937, "loss": 1.7974, "step": 37974 }, { "epoch": 1.26, "grad_norm": 0.4812573790550232, "learning_rate": 0.00037356191842475753, "loss": 1.7933, "step": 37975 }, { "epoch": 1.26, "grad_norm": 0.48811906576156616, "learning_rate": 0.00037355178535734254, "loss": 1.7325, "step": 37976 }, { "epoch": 1.26, "grad_norm": 0.48233577609062195, "learning_rate": 0.00037354165220064643, "loss": 1.694, "step": 37977 }, { "epoch": 1.26, "grad_norm": 0.5027723908424377, "learning_rate": 0.00037353151895468155, "loss": 1.7519, "step": 37978 }, { "epoch": 1.26, "grad_norm": 0.48939254879951477, "learning_rate": 0.0003735213856194604, "loss": 1.7584, "step": 37979 }, { "epoch": 1.26, "grad_norm": 0.48717421293258667, "learning_rate": 0.00037351125219499496, "loss": 1.7501, "step": 37980 }, { "epoch": 1.26, "grad_norm": 0.5089403986930847, "learning_rate": 0.0003735011186812978, "loss": 1.7913, "step": 37981 }, { "epoch": 1.26, "grad_norm": 0.4821954369544983, "learning_rate": 0.0003734909850783811, "loss": 1.7547, "step": 37982 }, { "epoch": 1.26, "grad_norm": 0.49394670128822327, "learning_rate": 0.0003734808513862571, "loss": 1.8439, "step": 37983 }, { "epoch": 1.26, "grad_norm": 0.4859708845615387, "learning_rate": 0.00037347071760493813, "loss": 1.7199, "step": 37984 }, { "epoch": 1.26, "grad_norm": 0.4675513207912445, "learning_rate": 0.00037346058373443665, "loss": 1.7452, "step": 37985 }, { "epoch": 1.26, "grad_norm": 0.4693600833415985, "learning_rate": 0.0003734504497747648, "loss": 1.7475, "step": 37986 }, { "epoch": 1.26, "grad_norm": 0.4878763258457184, "learning_rate": 0.000373440315725935, "loss": 1.8232, "step": 37987 }, { "epoch": 1.26, "grad_norm": 0.49350255727767944, "learning_rate": 0.0003734301815879594, "loss": 1.8175, "step": 37988 }, { "epoch": 1.26, "grad_norm": 0.4873104989528656, "learning_rate": 0.0003734200473608504, "loss": 1.8163, "step": 37989 }, { "epoch": 1.26, "grad_norm": 0.49465426802635193, "learning_rate": 0.00037340991304462027, "loss": 1.8624, "step": 37990 }, { "epoch": 1.26, "grad_norm": 0.469784140586853, "learning_rate": 0.00037339977863928126, "loss": 1.8185, "step": 37991 }, { "epoch": 1.26, "grad_norm": 0.4579520523548126, "learning_rate": 0.0003733896441448459, "loss": 1.8055, "step": 37992 }, { "epoch": 1.26, "grad_norm": 0.49304336309432983, "learning_rate": 0.0003733795095613261, "loss": 1.7734, "step": 37993 }, { "epoch": 1.26, "grad_norm": 0.4894915223121643, "learning_rate": 0.0003733693748887346, "loss": 1.7454, "step": 37994 }, { "epoch": 1.26, "grad_norm": 0.5321949124336243, "learning_rate": 0.00037335924012708334, "loss": 1.8924, "step": 37995 }, { "epoch": 1.26, "grad_norm": 0.47990110516548157, "learning_rate": 0.0003733491052763848, "loss": 1.7725, "step": 37996 }, { "epoch": 1.26, "grad_norm": 0.4809917211532593, "learning_rate": 0.0003733389703366514, "loss": 1.7746, "step": 37997 }, { "epoch": 1.26, "grad_norm": 0.4751916527748108, "learning_rate": 0.00037332883530789504, "loss": 1.8682, "step": 37998 }, { "epoch": 1.26, "grad_norm": 0.4837290346622467, "learning_rate": 0.00037331870019012857, "loss": 1.742, "step": 37999 }, { "epoch": 1.26, "grad_norm": 0.7120597958564758, "learning_rate": 0.0003733085649833637, "loss": 1.8729, "step": 38000 }, { "epoch": 1.26, "grad_norm": 0.48436135053634644, "learning_rate": 0.0003732984296876132, "loss": 1.8159, "step": 38001 }, { "epoch": 1.26, "grad_norm": 0.49396494030952454, "learning_rate": 0.0003732882943028893, "loss": 1.7885, "step": 38002 }, { "epoch": 1.26, "grad_norm": 2.524256467819214, "learning_rate": 0.0003732781588292041, "loss": 1.7548, "step": 38003 }, { "epoch": 1.26, "grad_norm": 0.4806596636772156, "learning_rate": 0.00037326802326657, "loss": 1.7909, "step": 38004 }, { "epoch": 1.26, "grad_norm": 0.482520192861557, "learning_rate": 0.00037325788761499924, "loss": 1.7422, "step": 38005 }, { "epoch": 1.26, "grad_norm": 0.504654586315155, "learning_rate": 0.00037324775187450433, "loss": 1.8002, "step": 38006 }, { "epoch": 1.26, "grad_norm": 0.4703407883644104, "learning_rate": 0.0003732376160450974, "loss": 1.8381, "step": 38007 }, { "epoch": 1.26, "grad_norm": 0.4766401946544647, "learning_rate": 0.0003732274801267908, "loss": 1.7379, "step": 38008 }, { "epoch": 1.26, "grad_norm": 1.0755451917648315, "learning_rate": 0.0003732173441195968, "loss": 1.7871, "step": 38009 }, { "epoch": 1.26, "grad_norm": 0.4674806594848633, "learning_rate": 0.00037320720802352773, "loss": 1.7784, "step": 38010 }, { "epoch": 1.26, "grad_norm": 0.4793437123298645, "learning_rate": 0.000373197071838596, "loss": 1.8291, "step": 38011 }, { "epoch": 1.26, "grad_norm": 0.49110373854637146, "learning_rate": 0.0003731869355648137, "loss": 1.7731, "step": 38012 }, { "epoch": 1.26, "grad_norm": 0.46601802110671997, "learning_rate": 0.0003731767992021933, "loss": 1.7537, "step": 38013 }, { "epoch": 1.26, "grad_norm": 0.49447137117385864, "learning_rate": 0.00037316666275074694, "loss": 1.813, "step": 38014 }, { "epoch": 1.26, "grad_norm": 0.46496298909187317, "learning_rate": 0.0003731565262104871, "loss": 1.7388, "step": 38015 }, { "epoch": 1.26, "grad_norm": 0.4691327214241028, "learning_rate": 0.0003731463895814261, "loss": 1.7843, "step": 38016 }, { "epoch": 1.26, "grad_norm": 0.4791094958782196, "learning_rate": 0.0003731362528635761, "loss": 1.7566, "step": 38017 }, { "epoch": 1.26, "grad_norm": 0.47825175523757935, "learning_rate": 0.00037312611605694947, "loss": 1.7292, "step": 38018 }, { "epoch": 1.26, "grad_norm": 0.48122283816337585, "learning_rate": 0.00037311597916155846, "loss": 1.8049, "step": 38019 }, { "epoch": 1.26, "grad_norm": 0.5014402270317078, "learning_rate": 0.00037310584217741547, "loss": 1.8251, "step": 38020 }, { "epoch": 1.26, "grad_norm": 0.47883111238479614, "learning_rate": 0.0003730957051045327, "loss": 1.7498, "step": 38021 }, { "epoch": 1.26, "grad_norm": 0.494525283575058, "learning_rate": 0.0003730855679429226, "loss": 1.7902, "step": 38022 }, { "epoch": 1.27, "grad_norm": 0.47587087750434875, "learning_rate": 0.0003730754306925973, "loss": 1.8584, "step": 38023 }, { "epoch": 1.27, "grad_norm": 0.4901825785636902, "learning_rate": 0.0003730652933535693, "loss": 1.8228, "step": 38024 }, { "epoch": 1.27, "grad_norm": 0.47542768716812134, "learning_rate": 0.0003730551559258508, "loss": 1.7346, "step": 38025 }, { "epoch": 1.27, "grad_norm": 0.48216044902801514, "learning_rate": 0.000373045018409454, "loss": 1.7001, "step": 38026 }, { "epoch": 1.27, "grad_norm": 0.5083313584327698, "learning_rate": 0.00037303488080439134, "loss": 1.765, "step": 38027 }, { "epoch": 1.27, "grad_norm": 0.48734408617019653, "learning_rate": 0.00037302474311067517, "loss": 1.8341, "step": 38028 }, { "epoch": 1.27, "grad_norm": 0.4846629798412323, "learning_rate": 0.0003730146053283177, "loss": 1.8077, "step": 38029 }, { "epoch": 1.27, "grad_norm": 0.48522770404815674, "learning_rate": 0.0003730044674573311, "loss": 1.771, "step": 38030 }, { "epoch": 1.27, "grad_norm": 0.5090323686599731, "learning_rate": 0.00037299432949772807, "loss": 1.8183, "step": 38031 }, { "epoch": 1.27, "grad_norm": 0.482894629240036, "learning_rate": 0.0003729841914495205, "loss": 1.7899, "step": 38032 }, { "epoch": 1.27, "grad_norm": 0.4868643581867218, "learning_rate": 0.0003729740533127209, "loss": 1.7297, "step": 38033 }, { "epoch": 1.27, "grad_norm": 0.470344215631485, "learning_rate": 0.00037296391508734165, "loss": 1.7313, "step": 38034 }, { "epoch": 1.27, "grad_norm": 0.5004793405532837, "learning_rate": 0.0003729537767733949, "loss": 1.7953, "step": 38035 }, { "epoch": 1.27, "grad_norm": 0.4770621657371521, "learning_rate": 0.000372943638370893, "loss": 1.7041, "step": 38036 }, { "epoch": 1.27, "grad_norm": 0.4832342565059662, "learning_rate": 0.00037293349987984833, "loss": 1.7536, "step": 38037 }, { "epoch": 1.27, "grad_norm": 0.4890672266483307, "learning_rate": 0.0003729233613002731, "loss": 1.7983, "step": 38038 }, { "epoch": 1.27, "grad_norm": 0.4970894753932953, "learning_rate": 0.0003729132226321796, "loss": 1.8425, "step": 38039 }, { "epoch": 1.27, "grad_norm": 0.5085399746894836, "learning_rate": 0.00037290308387558026, "loss": 1.8055, "step": 38040 }, { "epoch": 1.27, "grad_norm": 0.5233970284461975, "learning_rate": 0.0003728929450304873, "loss": 1.829, "step": 38041 }, { "epoch": 1.27, "grad_norm": 0.4851682484149933, "learning_rate": 0.0003728828060969131, "loss": 1.6937, "step": 38042 }, { "epoch": 1.27, "grad_norm": 0.48292598128318787, "learning_rate": 0.00037287266707486985, "loss": 1.7295, "step": 38043 }, { "epoch": 1.27, "grad_norm": 0.512931764125824, "learning_rate": 0.00037286252796436993, "loss": 1.8389, "step": 38044 }, { "epoch": 1.27, "grad_norm": 0.5153585076332092, "learning_rate": 0.00037285238876542563, "loss": 1.823, "step": 38045 }, { "epoch": 1.27, "grad_norm": 0.5172634720802307, "learning_rate": 0.00037284224947804924, "loss": 1.8447, "step": 38046 }, { "epoch": 1.27, "grad_norm": 1.5762977600097656, "learning_rate": 0.0003728321101022531, "loss": 1.9022, "step": 38047 }, { "epoch": 1.27, "grad_norm": 0.47123128175735474, "learning_rate": 0.0003728219706380495, "loss": 1.7104, "step": 38048 }, { "epoch": 1.27, "grad_norm": 0.47851982712745667, "learning_rate": 0.0003728118310854508, "loss": 1.8113, "step": 38049 }, { "epoch": 1.27, "grad_norm": 0.4822041392326355, "learning_rate": 0.0003728016914444693, "loss": 1.7039, "step": 38050 }, { "epoch": 1.27, "grad_norm": 0.5083492398262024, "learning_rate": 0.0003727915517151172, "loss": 1.8604, "step": 38051 }, { "epoch": 1.27, "grad_norm": 0.4926835596561432, "learning_rate": 0.00037278141189740684, "loss": 1.8359, "step": 38052 }, { "epoch": 1.27, "grad_norm": 0.4850728511810303, "learning_rate": 0.00037277127199135065, "loss": 1.7651, "step": 38053 }, { "epoch": 1.27, "grad_norm": 0.47772523760795593, "learning_rate": 0.00037276113199696093, "loss": 1.6521, "step": 38054 }, { "epoch": 1.27, "grad_norm": 0.4863107204437256, "learning_rate": 0.0003727509919142498, "loss": 1.7879, "step": 38055 }, { "epoch": 1.27, "grad_norm": 0.4957084059715271, "learning_rate": 0.0003727408517432297, "loss": 1.7452, "step": 38056 }, { "epoch": 1.27, "grad_norm": 0.480990469455719, "learning_rate": 0.00037273071148391295, "loss": 1.772, "step": 38057 }, { "epoch": 1.27, "grad_norm": 0.4671364426612854, "learning_rate": 0.0003727205711363118, "loss": 1.7781, "step": 38058 }, { "epoch": 1.27, "grad_norm": 0.4735356271266937, "learning_rate": 0.0003727104307004387, "loss": 1.7744, "step": 38059 }, { "epoch": 1.27, "grad_norm": 0.4858078062534332, "learning_rate": 0.0003727002901763057, "loss": 1.7406, "step": 38060 }, { "epoch": 1.27, "grad_norm": 0.49346426129341125, "learning_rate": 0.00037269014956392533, "loss": 1.787, "step": 38061 }, { "epoch": 1.27, "grad_norm": 0.4801190495491028, "learning_rate": 0.0003726800088633098, "loss": 1.7688, "step": 38062 }, { "epoch": 1.27, "grad_norm": 0.5158541798591614, "learning_rate": 0.00037266986807447147, "loss": 1.7966, "step": 38063 }, { "epoch": 1.27, "grad_norm": 0.4977077841758728, "learning_rate": 0.0003726597271974226, "loss": 1.7597, "step": 38064 }, { "epoch": 1.27, "grad_norm": 0.967207670211792, "learning_rate": 0.00037264958623217556, "loss": 1.7569, "step": 38065 }, { "epoch": 1.27, "grad_norm": 0.4805426001548767, "learning_rate": 0.0003726394451787427, "loss": 1.7746, "step": 38066 }, { "epoch": 1.27, "grad_norm": 0.48289865255355835, "learning_rate": 0.0003726293040371361, "loss": 1.7266, "step": 38067 }, { "epoch": 1.27, "grad_norm": 0.4942583739757538, "learning_rate": 0.00037261916280736837, "loss": 1.7917, "step": 38068 }, { "epoch": 1.27, "grad_norm": 0.4843038320541382, "learning_rate": 0.0003726090214894516, "loss": 1.8106, "step": 38069 }, { "epoch": 1.27, "grad_norm": 0.48795953392982483, "learning_rate": 0.00037259888008339817, "loss": 1.7333, "step": 38070 }, { "epoch": 1.27, "grad_norm": 0.4883381128311157, "learning_rate": 0.0003725887385892204, "loss": 1.8496, "step": 38071 }, { "epoch": 1.27, "grad_norm": 0.4822646379470825, "learning_rate": 0.0003725785970069306, "loss": 1.8386, "step": 38072 }, { "epoch": 1.27, "grad_norm": 0.4903363585472107, "learning_rate": 0.00037256845533654114, "loss": 1.7033, "step": 38073 }, { "epoch": 1.27, "grad_norm": 0.4777619540691376, "learning_rate": 0.0003725583135780642, "loss": 1.7446, "step": 38074 }, { "epoch": 1.27, "grad_norm": 0.4817555248737335, "learning_rate": 0.00037254817173151215, "loss": 1.7801, "step": 38075 }, { "epoch": 1.27, "grad_norm": 0.4576483368873596, "learning_rate": 0.0003725380297968973, "loss": 1.728, "step": 38076 }, { "epoch": 1.27, "grad_norm": 0.4958855211734772, "learning_rate": 0.000372527887774232, "loss": 1.7467, "step": 38077 }, { "epoch": 1.27, "grad_norm": 0.49679598212242126, "learning_rate": 0.00037251774566352853, "loss": 1.7914, "step": 38078 }, { "epoch": 1.27, "grad_norm": 0.4906740188598633, "learning_rate": 0.0003725076034647993, "loss": 1.8321, "step": 38079 }, { "epoch": 1.27, "grad_norm": 0.4906799793243408, "learning_rate": 0.00037249746117805636, "loss": 1.752, "step": 38080 }, { "epoch": 1.27, "grad_norm": 0.48927414417266846, "learning_rate": 0.0003724873188033123, "loss": 1.8055, "step": 38081 }, { "epoch": 1.27, "grad_norm": 0.4819498062133789, "learning_rate": 0.00037247717634057926, "loss": 1.7447, "step": 38082 }, { "epoch": 1.27, "grad_norm": 0.4889129102230072, "learning_rate": 0.0003724670337898696, "loss": 1.7633, "step": 38083 }, { "epoch": 1.27, "grad_norm": 0.5017334222793579, "learning_rate": 0.00037245689115119566, "loss": 1.7858, "step": 38084 }, { "epoch": 1.27, "grad_norm": 0.46267712116241455, "learning_rate": 0.0003724467484245697, "loss": 1.757, "step": 38085 }, { "epoch": 1.27, "grad_norm": 0.4849480092525482, "learning_rate": 0.0003724366056100041, "loss": 1.9095, "step": 38086 }, { "epoch": 1.27, "grad_norm": 0.49545785784721375, "learning_rate": 0.0003724264627075111, "loss": 1.7544, "step": 38087 }, { "epoch": 1.27, "grad_norm": 0.4711419343948364, "learning_rate": 0.0003724163197171031, "loss": 1.805, "step": 38088 }, { "epoch": 1.27, "grad_norm": 0.4843131899833679, "learning_rate": 0.0003724061766387923, "loss": 1.908, "step": 38089 }, { "epoch": 1.27, "grad_norm": 0.4794868528842926, "learning_rate": 0.00037239603347259106, "loss": 1.7326, "step": 38090 }, { "epoch": 1.27, "grad_norm": 0.49550163745880127, "learning_rate": 0.0003723858902185118, "loss": 1.8218, "step": 38091 }, { "epoch": 1.27, "grad_norm": 0.4777834415435791, "learning_rate": 0.0003723757468765666, "loss": 1.7114, "step": 38092 }, { "epoch": 1.27, "grad_norm": 0.4900752305984497, "learning_rate": 0.000372365603446768, "loss": 1.8175, "step": 38093 }, { "epoch": 1.27, "grad_norm": 0.476671427488327, "learning_rate": 0.0003723554599291282, "loss": 1.8121, "step": 38094 }, { "epoch": 1.27, "grad_norm": 0.48898571729660034, "learning_rate": 0.00037234531632365954, "loss": 1.7505, "step": 38095 }, { "epoch": 1.27, "grad_norm": 0.5071700811386108, "learning_rate": 0.0003723351726303743, "loss": 1.7893, "step": 38096 }, { "epoch": 1.27, "grad_norm": 0.4756752550601959, "learning_rate": 0.00037232502884928484, "loss": 1.7608, "step": 38097 }, { "epoch": 1.27, "grad_norm": 0.49128457903862, "learning_rate": 0.00037231488498040347, "loss": 1.8159, "step": 38098 }, { "epoch": 1.27, "grad_norm": 0.47248902916908264, "learning_rate": 0.0003723047410237425, "loss": 1.707, "step": 38099 }, { "epoch": 1.27, "grad_norm": 0.48431235551834106, "learning_rate": 0.00037229459697931414, "loss": 1.8148, "step": 38100 }, { "epoch": 1.27, "grad_norm": 0.49053508043289185, "learning_rate": 0.0003722844528471308, "loss": 1.7746, "step": 38101 }, { "epoch": 1.27, "grad_norm": 0.5005234479904175, "learning_rate": 0.0003722743086272048, "loss": 1.8694, "step": 38102 }, { "epoch": 1.27, "grad_norm": 0.47581905126571655, "learning_rate": 0.00037226416431954855, "loss": 1.741, "step": 38103 }, { "epoch": 1.27, "grad_norm": 0.4893791377544403, "learning_rate": 0.00037225401992417416, "loss": 1.8323, "step": 38104 }, { "epoch": 1.27, "grad_norm": 0.48244693875312805, "learning_rate": 0.00037224387544109404, "loss": 1.7371, "step": 38105 }, { "epoch": 1.27, "grad_norm": 0.4596726596355438, "learning_rate": 0.0003722337308703205, "loss": 1.6917, "step": 38106 }, { "epoch": 1.27, "grad_norm": 0.48048001527786255, "learning_rate": 0.0003722235862118658, "loss": 1.7729, "step": 38107 }, { "epoch": 1.27, "grad_norm": 0.5026686191558838, "learning_rate": 0.0003722134414657424, "loss": 1.8352, "step": 38108 }, { "epoch": 1.27, "grad_norm": 0.5199730396270752, "learning_rate": 0.00037220329663196247, "loss": 1.7169, "step": 38109 }, { "epoch": 1.27, "grad_norm": 0.5201102495193481, "learning_rate": 0.0003721931517105384, "loss": 1.8221, "step": 38110 }, { "epoch": 1.27, "grad_norm": 0.48677656054496765, "learning_rate": 0.00037218300670148247, "loss": 1.8585, "step": 38111 }, { "epoch": 1.27, "grad_norm": 0.4902195632457733, "learning_rate": 0.0003721728616048071, "loss": 1.8186, "step": 38112 }, { "epoch": 1.27, "grad_norm": 0.4832533299922943, "learning_rate": 0.00037216271642052435, "loss": 1.7899, "step": 38113 }, { "epoch": 1.27, "grad_norm": 0.49331873655319214, "learning_rate": 0.0003721525711486468, "loss": 1.8014, "step": 38114 }, { "epoch": 1.27, "grad_norm": 0.524170458316803, "learning_rate": 0.0003721424257891865, "loss": 1.8221, "step": 38115 }, { "epoch": 1.27, "grad_norm": 0.49173349142074585, "learning_rate": 0.0003721322803421561, "loss": 1.7491, "step": 38116 }, { "epoch": 1.27, "grad_norm": 0.4933902621269226, "learning_rate": 0.0003721221348075677, "loss": 1.7905, "step": 38117 }, { "epoch": 1.27, "grad_norm": 0.46732303500175476, "learning_rate": 0.00037211198918543365, "loss": 1.7197, "step": 38118 }, { "epoch": 1.27, "grad_norm": 0.47478702664375305, "learning_rate": 0.00037210184347576624, "loss": 1.7781, "step": 38119 }, { "epoch": 1.27, "grad_norm": 0.490563303232193, "learning_rate": 0.00037209169767857785, "loss": 1.8469, "step": 38120 }, { "epoch": 1.27, "grad_norm": 0.45661699771881104, "learning_rate": 0.00037208155179388084, "loss": 1.7654, "step": 38121 }, { "epoch": 1.27, "grad_norm": 0.4773632884025574, "learning_rate": 0.0003720714058216872, "loss": 1.7466, "step": 38122 }, { "epoch": 1.27, "grad_norm": 0.47882190346717834, "learning_rate": 0.00037206125976200973, "loss": 1.7988, "step": 38123 }, { "epoch": 1.27, "grad_norm": 0.48645082116127014, "learning_rate": 0.00037205111361486045, "loss": 1.8317, "step": 38124 }, { "epoch": 1.27, "grad_norm": 0.47829800844192505, "learning_rate": 0.00037204096738025166, "loss": 1.712, "step": 38125 }, { "epoch": 1.27, "grad_norm": 0.47541120648384094, "learning_rate": 0.0003720308210581958, "loss": 1.8267, "step": 38126 }, { "epoch": 1.27, "grad_norm": 0.49581339955329895, "learning_rate": 0.00037202067464870515, "loss": 1.7645, "step": 38127 }, { "epoch": 1.27, "grad_norm": 0.4858035445213318, "learning_rate": 0.0003720105281517921, "loss": 1.8136, "step": 38128 }, { "epoch": 1.27, "grad_norm": 0.5058400630950928, "learning_rate": 0.00037200038156746875, "loss": 1.8041, "step": 38129 }, { "epoch": 1.27, "grad_norm": 0.48879462480545044, "learning_rate": 0.00037199023489574754, "loss": 1.8164, "step": 38130 }, { "epoch": 1.27, "grad_norm": 0.47759830951690674, "learning_rate": 0.00037198008813664083, "loss": 1.7512, "step": 38131 }, { "epoch": 1.27, "grad_norm": 0.5398927330970764, "learning_rate": 0.0003719699412901608, "loss": 1.7992, "step": 38132 }, { "epoch": 1.27, "grad_norm": 0.48432615399360657, "learning_rate": 0.00037195979435632003, "loss": 1.7761, "step": 38133 }, { "epoch": 1.27, "grad_norm": 0.4905681014060974, "learning_rate": 0.00037194964733513065, "loss": 1.7135, "step": 38134 }, { "epoch": 1.27, "grad_norm": 0.47454023361206055, "learning_rate": 0.000371939500226605, "loss": 1.7197, "step": 38135 }, { "epoch": 1.27, "grad_norm": 0.4730309844017029, "learning_rate": 0.00037192935303075525, "loss": 1.7785, "step": 38136 }, { "epoch": 1.27, "grad_norm": 0.4755837023258209, "learning_rate": 0.000371919205747594, "loss": 1.7942, "step": 38137 }, { "epoch": 1.27, "grad_norm": 0.47609829902648926, "learning_rate": 0.0003719090583771334, "loss": 1.7292, "step": 38138 }, { "epoch": 1.27, "grad_norm": 0.45154333114624023, "learning_rate": 0.00037189891091938583, "loss": 1.7569, "step": 38139 }, { "epoch": 1.27, "grad_norm": 0.48653653264045715, "learning_rate": 0.00037188876337436346, "loss": 1.7843, "step": 38140 }, { "epoch": 1.27, "grad_norm": 0.4935002326965332, "learning_rate": 0.00037187861574207887, "loss": 1.7782, "step": 38141 }, { "epoch": 1.27, "grad_norm": 0.47403526306152344, "learning_rate": 0.00037186846802254414, "loss": 1.7345, "step": 38142 }, { "epoch": 1.27, "grad_norm": 0.4883784055709839, "learning_rate": 0.0003718583202157717, "loss": 1.7839, "step": 38143 }, { "epoch": 1.27, "grad_norm": 0.47938624024391174, "learning_rate": 0.0003718481723217738, "loss": 1.8513, "step": 38144 }, { "epoch": 1.27, "grad_norm": 0.4790470004081726, "learning_rate": 0.00037183802434056286, "loss": 1.7313, "step": 38145 }, { "epoch": 1.27, "grad_norm": 0.4670145511627197, "learning_rate": 0.0003718278762721512, "loss": 1.7967, "step": 38146 }, { "epoch": 1.27, "grad_norm": 0.47499191761016846, "learning_rate": 0.000371817728116551, "loss": 1.7906, "step": 38147 }, { "epoch": 1.27, "grad_norm": 0.49073711037635803, "learning_rate": 0.0003718075798737747, "loss": 1.7269, "step": 38148 }, { "epoch": 1.27, "grad_norm": 0.5258458852767944, "learning_rate": 0.00037179743154383457, "loss": 1.8658, "step": 38149 }, { "epoch": 1.27, "grad_norm": 0.4957939386367798, "learning_rate": 0.00037178728312674286, "loss": 1.7907, "step": 38150 }, { "epoch": 1.27, "grad_norm": 0.4852682948112488, "learning_rate": 0.00037177713462251203, "loss": 1.7397, "step": 38151 }, { "epoch": 1.27, "grad_norm": 0.4640149772167206, "learning_rate": 0.00037176698603115434, "loss": 1.7814, "step": 38152 }, { "epoch": 1.27, "grad_norm": 0.5026699304580688, "learning_rate": 0.00037175683735268214, "loss": 1.8796, "step": 38153 }, { "epoch": 1.27, "grad_norm": 0.4964029788970947, "learning_rate": 0.0003717466885871077, "loss": 1.8144, "step": 38154 }, { "epoch": 1.27, "grad_norm": 0.4776206612586975, "learning_rate": 0.0003717365397344433, "loss": 1.7483, "step": 38155 }, { "epoch": 1.27, "grad_norm": 0.48006269335746765, "learning_rate": 0.0003717263907947013, "loss": 1.7875, "step": 38156 }, { "epoch": 1.27, "grad_norm": 0.4699389338493347, "learning_rate": 0.0003717162417678941, "loss": 1.8179, "step": 38157 }, { "epoch": 1.27, "grad_norm": 0.4733152985572815, "learning_rate": 0.0003717060926540339, "loss": 1.7566, "step": 38158 }, { "epoch": 1.27, "grad_norm": 0.48772695660591125, "learning_rate": 0.0003716959434531331, "loss": 1.8348, "step": 38159 }, { "epoch": 1.27, "grad_norm": 0.5052887201309204, "learning_rate": 0.00037168579416520394, "loss": 1.8654, "step": 38160 }, { "epoch": 1.27, "grad_norm": 0.484749436378479, "learning_rate": 0.0003716756447902588, "loss": 1.8259, "step": 38161 }, { "epoch": 1.27, "grad_norm": 0.4819488823413849, "learning_rate": 0.0003716654953283101, "loss": 1.7303, "step": 38162 }, { "epoch": 1.27, "grad_norm": 0.4761648178100586, "learning_rate": 0.0003716553457793699, "loss": 1.7864, "step": 38163 }, { "epoch": 1.27, "grad_norm": 0.483020544052124, "learning_rate": 0.0003716451961434508, "loss": 1.7519, "step": 38164 }, { "epoch": 1.27, "grad_norm": 0.4801214039325714, "learning_rate": 0.0003716350464205648, "loss": 1.6613, "step": 38165 }, { "epoch": 1.27, "grad_norm": 0.46717265248298645, "learning_rate": 0.00037162489661072455, "loss": 1.8001, "step": 38166 }, { "epoch": 1.27, "grad_norm": 0.48698708415031433, "learning_rate": 0.00037161474671394224, "loss": 1.8054, "step": 38167 }, { "epoch": 1.27, "grad_norm": 0.5228021144866943, "learning_rate": 0.0003716045967302301, "loss": 1.772, "step": 38168 }, { "epoch": 1.27, "grad_norm": 0.4775931239128113, "learning_rate": 0.0003715944466596005, "loss": 1.7672, "step": 38169 }, { "epoch": 1.27, "grad_norm": 0.5032448172569275, "learning_rate": 0.00037158429650206595, "loss": 1.8631, "step": 38170 }, { "epoch": 1.27, "grad_norm": 0.4857470691204071, "learning_rate": 0.00037157414625763855, "loss": 1.8931, "step": 38171 }, { "epoch": 1.27, "grad_norm": 0.4909443259239197, "learning_rate": 0.00037156399592633055, "loss": 1.7865, "step": 38172 }, { "epoch": 1.27, "grad_norm": 0.47777727246284485, "learning_rate": 0.0003715538455081546, "loss": 1.7359, "step": 38173 }, { "epoch": 1.27, "grad_norm": 0.4921096861362457, "learning_rate": 0.0003715436950031227, "loss": 1.6819, "step": 38174 }, { "epoch": 1.27, "grad_norm": 0.4916767477989197, "learning_rate": 0.0003715335444112473, "loss": 1.8101, "step": 38175 }, { "epoch": 1.27, "grad_norm": 0.47279688715934753, "learning_rate": 0.00037152339373254083, "loss": 1.7632, "step": 38176 }, { "epoch": 1.27, "grad_norm": 0.46544018387794495, "learning_rate": 0.00037151324296701534, "loss": 1.8587, "step": 38177 }, { "epoch": 1.27, "grad_norm": 0.4819319248199463, "learning_rate": 0.00037150309211468347, "loss": 1.8206, "step": 38178 }, { "epoch": 1.27, "grad_norm": 0.46707847714424133, "learning_rate": 0.0003714929411755572, "loss": 1.6926, "step": 38179 }, { "epoch": 1.27, "grad_norm": 0.4823727011680603, "learning_rate": 0.0003714827901496491, "loss": 1.7729, "step": 38180 }, { "epoch": 1.27, "grad_norm": 0.49352505803108215, "learning_rate": 0.00037147263903697153, "loss": 1.7434, "step": 38181 }, { "epoch": 1.27, "grad_norm": 0.49313491582870483, "learning_rate": 0.0003714624878375366, "loss": 1.8503, "step": 38182 }, { "epoch": 1.27, "grad_norm": 0.48156648874282837, "learning_rate": 0.0003714523365513568, "loss": 1.7632, "step": 38183 }, { "epoch": 1.27, "grad_norm": 0.47837427258491516, "learning_rate": 0.0003714421851784443, "loss": 1.7127, "step": 38184 }, { "epoch": 1.27, "grad_norm": 0.46329787373542786, "learning_rate": 0.00037143203371881164, "loss": 1.7715, "step": 38185 }, { "epoch": 1.27, "grad_norm": 0.48447385430336, "learning_rate": 0.0003714218821724709, "loss": 1.6954, "step": 38186 }, { "epoch": 1.27, "grad_norm": 0.46919354796409607, "learning_rate": 0.0003714117305394346, "loss": 1.7544, "step": 38187 }, { "epoch": 1.27, "grad_norm": 0.4769074618816376, "learning_rate": 0.0003714015788197149, "loss": 1.8065, "step": 38188 }, { "epoch": 1.27, "grad_norm": 0.4732525944709778, "learning_rate": 0.00037139142701332427, "loss": 1.7758, "step": 38189 }, { "epoch": 1.27, "grad_norm": 0.5276368260383606, "learning_rate": 0.000371381275120275, "loss": 1.8628, "step": 38190 }, { "epoch": 1.27, "grad_norm": 0.509227991104126, "learning_rate": 0.0003713711231405793, "loss": 1.7637, "step": 38191 }, { "epoch": 1.27, "grad_norm": 0.4861254096031189, "learning_rate": 0.00037136097107424965, "loss": 1.8339, "step": 38192 }, { "epoch": 1.27, "grad_norm": 0.4901449680328369, "learning_rate": 0.00037135081892129823, "loss": 1.7729, "step": 38193 }, { "epoch": 1.27, "grad_norm": 0.5076276063919067, "learning_rate": 0.0003713406666817374, "loss": 1.7937, "step": 38194 }, { "epoch": 1.27, "grad_norm": 0.49325788021087646, "learning_rate": 0.00037133051435557957, "loss": 1.7318, "step": 38195 }, { "epoch": 1.27, "grad_norm": 0.4896402955055237, "learning_rate": 0.0003713203619428371, "loss": 1.7912, "step": 38196 }, { "epoch": 1.27, "grad_norm": 0.4897674322128296, "learning_rate": 0.000371310209443522, "loss": 1.7961, "step": 38197 }, { "epoch": 1.27, "grad_norm": 0.504929780960083, "learning_rate": 0.000371300056857647, "loss": 1.8312, "step": 38198 }, { "epoch": 1.27, "grad_norm": 0.4743364453315735, "learning_rate": 0.0003712899041852241, "loss": 1.7423, "step": 38199 }, { "epoch": 1.27, "grad_norm": 0.485151469707489, "learning_rate": 0.00037127975142626585, "loss": 1.7396, "step": 38200 }, { "epoch": 1.27, "grad_norm": 0.5090926289558411, "learning_rate": 0.0003712695985807845, "loss": 1.7683, "step": 38201 }, { "epoch": 1.27, "grad_norm": 0.481489360332489, "learning_rate": 0.00037125944564879226, "loss": 1.6807, "step": 38202 }, { "epoch": 1.27, "grad_norm": 0.4869547188282013, "learning_rate": 0.00037124929263030165, "loss": 1.7858, "step": 38203 }, { "epoch": 1.27, "grad_norm": 0.4855351150035858, "learning_rate": 0.0003712391395253249, "loss": 1.7637, "step": 38204 }, { "epoch": 1.27, "grad_norm": 0.5011065602302551, "learning_rate": 0.0003712289863338742, "loss": 1.7374, "step": 38205 }, { "epoch": 1.27, "grad_norm": 0.4845312833786011, "learning_rate": 0.00037121883305596214, "loss": 1.8567, "step": 38206 }, { "epoch": 1.27, "grad_norm": 0.47940489649772644, "learning_rate": 0.00037120867969160084, "loss": 1.7389, "step": 38207 }, { "epoch": 1.27, "grad_norm": 0.48282960057258606, "learning_rate": 0.00037119852624080277, "loss": 1.8015, "step": 38208 }, { "epoch": 1.27, "grad_norm": 0.47487613558769226, "learning_rate": 0.00037118837270358, "loss": 1.7622, "step": 38209 }, { "epoch": 1.27, "grad_norm": 0.4712422788143158, "learning_rate": 0.0003711782190799452, "loss": 1.7214, "step": 38210 }, { "epoch": 1.27, "grad_norm": 0.4902927577495575, "learning_rate": 0.0003711680653699105, "loss": 1.7979, "step": 38211 }, { "epoch": 1.27, "grad_norm": 0.48423251509666443, "learning_rate": 0.00037115791157348824, "loss": 1.8088, "step": 38212 }, { "epoch": 1.27, "grad_norm": 0.47252771258354187, "learning_rate": 0.00037114775769069074, "loss": 1.7859, "step": 38213 }, { "epoch": 1.27, "grad_norm": 0.5020331740379333, "learning_rate": 0.0003711376037215304, "loss": 1.8071, "step": 38214 }, { "epoch": 1.27, "grad_norm": 0.48761820793151855, "learning_rate": 0.00037112744966601955, "loss": 1.8096, "step": 38215 }, { "epoch": 1.27, "grad_norm": 0.46836355328559875, "learning_rate": 0.0003711172955241703, "loss": 1.7467, "step": 38216 }, { "epoch": 1.27, "grad_norm": 0.4831424951553345, "learning_rate": 0.0003711071412959952, "loss": 1.8206, "step": 38217 }, { "epoch": 1.27, "grad_norm": 0.49205282330513, "learning_rate": 0.0003710969869815065, "loss": 1.7455, "step": 38218 }, { "epoch": 1.27, "grad_norm": 0.4791773557662964, "learning_rate": 0.0003710868325807165, "loss": 1.7446, "step": 38219 }, { "epoch": 1.27, "grad_norm": 0.48226645588874817, "learning_rate": 0.00037107667809363763, "loss": 1.7755, "step": 38220 }, { "epoch": 1.27, "grad_norm": 0.45830869674682617, "learning_rate": 0.00037106652352028217, "loss": 1.7478, "step": 38221 }, { "epoch": 1.27, "grad_norm": 0.4740971028804779, "learning_rate": 0.00037105636886066237, "loss": 1.7459, "step": 38222 }, { "epoch": 1.27, "grad_norm": 0.4737659990787506, "learning_rate": 0.0003710462141147906, "loss": 1.7861, "step": 38223 }, { "epoch": 1.27, "grad_norm": 0.4811524450778961, "learning_rate": 0.00037103605928267923, "loss": 1.7969, "step": 38224 }, { "epoch": 1.27, "grad_norm": 0.4808533787727356, "learning_rate": 0.00037102590436434045, "loss": 1.7342, "step": 38225 }, { "epoch": 1.27, "grad_norm": 0.49813058972358704, "learning_rate": 0.0003710157493597869, "loss": 1.7976, "step": 38226 }, { "epoch": 1.27, "grad_norm": 0.48000970482826233, "learning_rate": 0.0003710055942690304, "loss": 1.8052, "step": 38227 }, { "epoch": 1.27, "grad_norm": 0.4896625280380249, "learning_rate": 0.0003709954390920838, "loss": 1.7752, "step": 38228 }, { "epoch": 1.27, "grad_norm": 0.4822472631931305, "learning_rate": 0.0003709852838289591, "loss": 1.8252, "step": 38229 }, { "epoch": 1.27, "grad_norm": 0.47634926438331604, "learning_rate": 0.00037097512847966875, "loss": 1.7548, "step": 38230 }, { "epoch": 1.27, "grad_norm": 0.48571139574050903, "learning_rate": 0.0003709649730442251, "loss": 1.7598, "step": 38231 }, { "epoch": 1.27, "grad_norm": 0.49156972765922546, "learning_rate": 0.0003709548175226403, "loss": 1.7217, "step": 38232 }, { "epoch": 1.27, "grad_norm": 0.4795702397823334, "learning_rate": 0.0003709446619149269, "loss": 1.7564, "step": 38233 }, { "epoch": 1.27, "grad_norm": 0.5072187781333923, "learning_rate": 0.00037093450622109706, "loss": 1.8309, "step": 38234 }, { "epoch": 1.27, "grad_norm": 0.48050469160079956, "learning_rate": 0.00037092435044116335, "loss": 1.84, "step": 38235 }, { "epoch": 1.27, "grad_norm": 0.47871753573417664, "learning_rate": 0.00037091419457513776, "loss": 1.8003, "step": 38236 }, { "epoch": 1.27, "grad_norm": 0.4784710705280304, "learning_rate": 0.0003709040386230328, "loss": 1.8062, "step": 38237 }, { "epoch": 1.27, "grad_norm": 0.5532913208007812, "learning_rate": 0.0003708938825848609, "loss": 1.8067, "step": 38238 }, { "epoch": 1.27, "grad_norm": 0.4818718731403351, "learning_rate": 0.0003708837264606342, "loss": 1.7203, "step": 38239 }, { "epoch": 1.27, "grad_norm": 0.49078595638275146, "learning_rate": 0.00037087357025036513, "loss": 1.8869, "step": 38240 }, { "epoch": 1.27, "grad_norm": 0.4789355397224426, "learning_rate": 0.00037086341395406593, "loss": 1.7918, "step": 38241 }, { "epoch": 1.27, "grad_norm": 0.4851800799369812, "learning_rate": 0.00037085325757174896, "loss": 1.7269, "step": 38242 }, { "epoch": 1.27, "grad_norm": 0.47218045592308044, "learning_rate": 0.00037084310110342667, "loss": 1.7759, "step": 38243 }, { "epoch": 1.27, "grad_norm": 0.4917665719985962, "learning_rate": 0.0003708329445491112, "loss": 1.7915, "step": 38244 }, { "epoch": 1.27, "grad_norm": 0.47639453411102295, "learning_rate": 0.0003708227879088151, "loss": 1.7909, "step": 38245 }, { "epoch": 1.27, "grad_norm": 0.4761470556259155, "learning_rate": 0.0003708126311825505, "loss": 1.7219, "step": 38246 }, { "epoch": 1.27, "grad_norm": 0.5126239657402039, "learning_rate": 0.0003708024743703298, "loss": 1.8244, "step": 38247 }, { "epoch": 1.27, "grad_norm": 0.48910510540008545, "learning_rate": 0.00037079231747216525, "loss": 1.7675, "step": 38248 }, { "epoch": 1.27, "grad_norm": 0.49932849407196045, "learning_rate": 0.00037078216048806934, "loss": 1.7774, "step": 38249 }, { "epoch": 1.27, "grad_norm": 0.4762018024921417, "learning_rate": 0.00037077200341805437, "loss": 1.767, "step": 38250 }, { "epoch": 1.27, "grad_norm": 0.47526541352272034, "learning_rate": 0.0003707618462621325, "loss": 1.8349, "step": 38251 }, { "epoch": 1.27, "grad_norm": 0.4973610043525696, "learning_rate": 0.00037075168902031623, "loss": 1.695, "step": 38252 }, { "epoch": 1.27, "grad_norm": 0.49350953102111816, "learning_rate": 0.00037074153169261784, "loss": 1.8161, "step": 38253 }, { "epoch": 1.27, "grad_norm": 0.4901972711086273, "learning_rate": 0.00037073137427904966, "loss": 1.7851, "step": 38254 }, { "epoch": 1.27, "grad_norm": 0.49416881799697876, "learning_rate": 0.00037072121677962405, "loss": 1.7931, "step": 38255 }, { "epoch": 1.27, "grad_norm": 0.48615142703056335, "learning_rate": 0.00037071105919435327, "loss": 1.7581, "step": 38256 }, { "epoch": 1.27, "grad_norm": 0.46300387382507324, "learning_rate": 0.00037070090152324956, "loss": 1.7342, "step": 38257 }, { "epoch": 1.27, "grad_norm": 0.4883681535720825, "learning_rate": 0.00037069074376632556, "loss": 1.8036, "step": 38258 }, { "epoch": 1.27, "grad_norm": 0.4766176640987396, "learning_rate": 0.0003706805859235933, "loss": 1.8042, "step": 38259 }, { "epoch": 1.27, "grad_norm": 0.4781287610530853, "learning_rate": 0.00037067042799506525, "loss": 1.7676, "step": 38260 }, { "epoch": 1.27, "grad_norm": 0.4709271192550659, "learning_rate": 0.0003706602699807537, "loss": 1.8045, "step": 38261 }, { "epoch": 1.27, "grad_norm": 0.4689973294734955, "learning_rate": 0.000370650111880671, "loss": 1.7779, "step": 38262 }, { "epoch": 1.27, "grad_norm": 0.48121577501296997, "learning_rate": 0.00037063995369482957, "loss": 1.7807, "step": 38263 }, { "epoch": 1.27, "grad_norm": 0.4740874767303467, "learning_rate": 0.00037062979542324147, "loss": 1.8242, "step": 38264 }, { "epoch": 1.27, "grad_norm": 0.48780521750450134, "learning_rate": 0.0003706196370659193, "loss": 1.8063, "step": 38265 }, { "epoch": 1.27, "grad_norm": 0.48843660950660706, "learning_rate": 0.0003706094786228753, "loss": 1.8521, "step": 38266 }, { "epoch": 1.27, "grad_norm": 0.45436179637908936, "learning_rate": 0.0003705993200941218, "loss": 1.7246, "step": 38267 }, { "epoch": 1.27, "grad_norm": 0.47458165884017944, "learning_rate": 0.0003705891614796711, "loss": 1.7574, "step": 38268 }, { "epoch": 1.27, "grad_norm": 0.4823269844055176, "learning_rate": 0.00037057900277953556, "loss": 1.752, "step": 38269 }, { "epoch": 1.27, "grad_norm": 0.4923311173915863, "learning_rate": 0.0003705688439937276, "loss": 1.8018, "step": 38270 }, { "epoch": 1.27, "grad_norm": 0.48958316445350647, "learning_rate": 0.0003705586851222594, "loss": 1.7964, "step": 38271 }, { "epoch": 1.27, "grad_norm": 0.48213645815849304, "learning_rate": 0.0003705485261651433, "loss": 1.7974, "step": 38272 }, { "epoch": 1.27, "grad_norm": 0.48974326252937317, "learning_rate": 0.00037053836712239174, "loss": 1.7498, "step": 38273 }, { "epoch": 1.27, "grad_norm": 0.485939621925354, "learning_rate": 0.00037052820799401693, "loss": 1.7492, "step": 38274 }, { "epoch": 1.27, "grad_norm": 0.4989353120326996, "learning_rate": 0.0003705180487800313, "loss": 1.7456, "step": 38275 }, { "epoch": 1.27, "grad_norm": 0.47769689559936523, "learning_rate": 0.0003705078894804472, "loss": 1.8256, "step": 38276 }, { "epoch": 1.27, "grad_norm": 0.4848717749118805, "learning_rate": 0.0003704977300952769, "loss": 1.7675, "step": 38277 }, { "epoch": 1.27, "grad_norm": 0.4853445291519165, "learning_rate": 0.00037048757062453267, "loss": 1.797, "step": 38278 }, { "epoch": 1.27, "grad_norm": 0.48101335763931274, "learning_rate": 0.00037047741106822695, "loss": 1.7251, "step": 38279 }, { "epoch": 1.27, "grad_norm": 0.4864119291305542, "learning_rate": 0.0003704672514263721, "loss": 1.8145, "step": 38280 }, { "epoch": 1.27, "grad_norm": 0.48115211725234985, "learning_rate": 0.00037045709169898037, "loss": 1.763, "step": 38281 }, { "epoch": 1.27, "grad_norm": 0.5019668936729431, "learning_rate": 0.000370446931886064, "loss": 1.7022, "step": 38282 }, { "epoch": 1.27, "grad_norm": 0.5012773275375366, "learning_rate": 0.0003704367719876356, "loss": 1.7878, "step": 38283 }, { "epoch": 1.27, "grad_norm": 0.4886016249656677, "learning_rate": 0.00037042661200370724, "loss": 1.7647, "step": 38284 }, { "epoch": 1.27, "grad_norm": 0.5342589616775513, "learning_rate": 0.00037041645193429135, "loss": 1.851, "step": 38285 }, { "epoch": 1.27, "grad_norm": 0.4721938669681549, "learning_rate": 0.0003704062917794004, "loss": 1.8264, "step": 38286 }, { "epoch": 1.27, "grad_norm": 0.4917426109313965, "learning_rate": 0.0003703961315390463, "loss": 1.7216, "step": 38287 }, { "epoch": 1.27, "grad_norm": 0.4753846526145935, "learning_rate": 0.00037038597121324187, "loss": 1.772, "step": 38288 }, { "epoch": 1.27, "grad_norm": 0.4842197299003601, "learning_rate": 0.0003703758108019992, "loss": 1.7567, "step": 38289 }, { "epoch": 1.27, "grad_norm": 0.5194485783576965, "learning_rate": 0.0003703656503053307, "loss": 1.8, "step": 38290 }, { "epoch": 1.27, "grad_norm": 0.5057412385940552, "learning_rate": 0.0003703554897232486, "loss": 1.7434, "step": 38291 }, { "epoch": 1.27, "grad_norm": 0.4927774965763092, "learning_rate": 0.00037034532905576533, "loss": 1.832, "step": 38292 }, { "epoch": 1.27, "grad_norm": 0.5183713436126709, "learning_rate": 0.0003703351683028933, "loss": 1.7888, "step": 38293 }, { "epoch": 1.27, "grad_norm": 0.4673711061477661, "learning_rate": 0.00037032500746464453, "loss": 1.7705, "step": 38294 }, { "epoch": 1.27, "grad_norm": 0.47561538219451904, "learning_rate": 0.0003703148465410317, "loss": 1.7495, "step": 38295 }, { "epoch": 1.27, "grad_norm": 0.49999621510505676, "learning_rate": 0.00037030468553206694, "loss": 1.817, "step": 38296 }, { "epoch": 1.27, "grad_norm": 0.4778505861759186, "learning_rate": 0.0003702945244377627, "loss": 1.7756, "step": 38297 }, { "epoch": 1.27, "grad_norm": 0.47372522950172424, "learning_rate": 0.0003702843632581313, "loss": 1.7627, "step": 38298 }, { "epoch": 1.27, "grad_norm": 0.47386714816093445, "learning_rate": 0.00037027420199318495, "loss": 1.6673, "step": 38299 }, { "epoch": 1.27, "grad_norm": 0.49644580483436584, "learning_rate": 0.0003702640406429362, "loss": 1.7467, "step": 38300 }, { "epoch": 1.27, "grad_norm": 0.9039403200149536, "learning_rate": 0.00037025387920739707, "loss": 1.8216, "step": 38301 }, { "epoch": 1.27, "grad_norm": 0.5077435970306396, "learning_rate": 0.0003702437176865803, "loss": 1.9063, "step": 38302 }, { "epoch": 1.27, "grad_norm": 0.4756559431552887, "learning_rate": 0.0003702335560804978, "loss": 1.8066, "step": 38303 }, { "epoch": 1.27, "grad_norm": 1.4624125957489014, "learning_rate": 0.00037022339438916226, "loss": 1.8407, "step": 38304 }, { "epoch": 1.27, "grad_norm": 0.4806693196296692, "learning_rate": 0.00037021323261258576, "loss": 1.7716, "step": 38305 }, { "epoch": 1.27, "grad_norm": 0.5014879703521729, "learning_rate": 0.0003702030707507808, "loss": 1.7623, "step": 38306 }, { "epoch": 1.27, "grad_norm": 0.488836407661438, "learning_rate": 0.00037019290880375977, "loss": 1.7547, "step": 38307 }, { "epoch": 1.27, "grad_norm": 0.4920003116130829, "learning_rate": 0.00037018274677153473, "loss": 1.8093, "step": 38308 }, { "epoch": 1.27, "grad_norm": 0.5058250427246094, "learning_rate": 0.00037017258465411824, "loss": 1.7678, "step": 38309 }, { "epoch": 1.27, "grad_norm": 0.47426751255989075, "learning_rate": 0.0003701624224515225, "loss": 1.7648, "step": 38310 }, { "epoch": 1.27, "grad_norm": 0.4750177264213562, "learning_rate": 0.00037015226016376, "loss": 1.8102, "step": 38311 }, { "epoch": 1.27, "grad_norm": 0.46647995710372925, "learning_rate": 0.00037014209779084287, "loss": 1.7525, "step": 38312 }, { "epoch": 1.27, "grad_norm": 0.47759172320365906, "learning_rate": 0.0003701319353327837, "loss": 1.7359, "step": 38313 }, { "epoch": 1.27, "grad_norm": 0.5206151604652405, "learning_rate": 0.0003701217727895947, "loss": 1.7359, "step": 38314 }, { "epoch": 1.27, "grad_norm": 0.490170419216156, "learning_rate": 0.00037011161016128815, "loss": 1.7944, "step": 38315 }, { "epoch": 1.27, "grad_norm": 0.4658544957637787, "learning_rate": 0.0003701014474478764, "loss": 1.7919, "step": 38316 }, { "epoch": 1.27, "grad_norm": 0.4801827371120453, "learning_rate": 0.00037009128464937185, "loss": 1.6414, "step": 38317 }, { "epoch": 1.27, "grad_norm": 0.48646605014801025, "learning_rate": 0.00037008112176578693, "loss": 1.7313, "step": 38318 }, { "epoch": 1.27, "grad_norm": 0.48954957723617554, "learning_rate": 0.0003700709587971336, "loss": 1.7292, "step": 38319 }, { "epoch": 1.27, "grad_norm": 0.4612646996974945, "learning_rate": 0.0003700607957434247, "loss": 1.7904, "step": 38320 }, { "epoch": 1.27, "grad_norm": 0.4731205105781555, "learning_rate": 0.0003700506326046722, "loss": 1.7628, "step": 38321 }, { "epoch": 1.27, "grad_norm": 0.48796597123146057, "learning_rate": 0.0003700404693808885, "loss": 1.7846, "step": 38322 }, { "epoch": 1.28, "grad_norm": 0.47508159279823303, "learning_rate": 0.00037003030607208606, "loss": 1.7981, "step": 38323 }, { "epoch": 1.28, "grad_norm": 0.48427122831344604, "learning_rate": 0.0003700201426782771, "loss": 1.7749, "step": 38324 }, { "epoch": 1.28, "grad_norm": 0.4879682660102844, "learning_rate": 0.00037000997919947414, "loss": 1.8171, "step": 38325 }, { "epoch": 1.28, "grad_norm": 0.4692141115665436, "learning_rate": 0.0003699998156356892, "loss": 1.6886, "step": 38326 }, { "epoch": 1.28, "grad_norm": 0.4844677746295929, "learning_rate": 0.00036998965198693497, "loss": 1.7341, "step": 38327 }, { "epoch": 1.28, "grad_norm": 0.4938536584377289, "learning_rate": 0.0003699794882532235, "loss": 1.7631, "step": 38328 }, { "epoch": 1.28, "grad_norm": 0.4972800612449646, "learning_rate": 0.00036996932443456726, "loss": 1.8038, "step": 38329 }, { "epoch": 1.28, "grad_norm": 0.49326273798942566, "learning_rate": 0.00036995916053097854, "loss": 1.8427, "step": 38330 }, { "epoch": 1.28, "grad_norm": 0.4999995231628418, "learning_rate": 0.00036994899654246974, "loss": 1.7883, "step": 38331 }, { "epoch": 1.28, "grad_norm": 0.47968512773513794, "learning_rate": 0.0003699388324690532, "loss": 1.747, "step": 38332 }, { "epoch": 1.28, "grad_norm": 0.494773268699646, "learning_rate": 0.0003699286683107412, "loss": 1.7982, "step": 38333 }, { "epoch": 1.28, "grad_norm": 0.49196022748947144, "learning_rate": 0.00036991850406754606, "loss": 1.8118, "step": 38334 }, { "epoch": 1.28, "grad_norm": 0.5041801333427429, "learning_rate": 0.0003699083397394802, "loss": 1.7743, "step": 38335 }, { "epoch": 1.28, "grad_norm": 0.4812222421169281, "learning_rate": 0.0003698981753265559, "loss": 1.8143, "step": 38336 }, { "epoch": 1.28, "grad_norm": 0.4781598150730133, "learning_rate": 0.00036988801082878553, "loss": 1.7925, "step": 38337 }, { "epoch": 1.28, "grad_norm": 0.484361469745636, "learning_rate": 0.00036987784624618143, "loss": 1.8829, "step": 38338 }, { "epoch": 1.28, "grad_norm": 0.5012809634208679, "learning_rate": 0.0003698676815787559, "loss": 1.7086, "step": 38339 }, { "epoch": 1.28, "grad_norm": 0.46950745582580566, "learning_rate": 0.00036985751682652127, "loss": 1.7464, "step": 38340 }, { "epoch": 1.28, "grad_norm": 0.4957541227340698, "learning_rate": 0.00036984735198948993, "loss": 1.8367, "step": 38341 }, { "epoch": 1.28, "grad_norm": 0.49201127886772156, "learning_rate": 0.0003698371870676742, "loss": 1.801, "step": 38342 }, { "epoch": 1.28, "grad_norm": 0.49608972668647766, "learning_rate": 0.0003698270220610865, "loss": 1.7754, "step": 38343 }, { "epoch": 1.28, "grad_norm": 0.4942977726459503, "learning_rate": 0.0003698168569697389, "loss": 1.8324, "step": 38344 }, { "epoch": 1.28, "grad_norm": 0.48939505219459534, "learning_rate": 0.000369806691793644, "loss": 1.8421, "step": 38345 }, { "epoch": 1.28, "grad_norm": 0.46506860852241516, "learning_rate": 0.00036979652653281413, "loss": 1.787, "step": 38346 }, { "epoch": 1.28, "grad_norm": 0.4876347482204437, "learning_rate": 0.00036978636118726146, "loss": 1.7642, "step": 38347 }, { "epoch": 1.28, "grad_norm": 0.493363618850708, "learning_rate": 0.0003697761957569986, "loss": 1.7393, "step": 38348 }, { "epoch": 1.28, "grad_norm": 0.48636868596076965, "learning_rate": 0.00036976603024203744, "loss": 1.812, "step": 38349 }, { "epoch": 1.28, "grad_norm": 0.48712095618247986, "learning_rate": 0.0003697558646423909, "loss": 1.7882, "step": 38350 }, { "epoch": 1.28, "grad_norm": 0.49153727293014526, "learning_rate": 0.0003697456989580707, "loss": 1.7386, "step": 38351 }, { "epoch": 1.28, "grad_norm": 0.48599547147750854, "learning_rate": 0.00036973553318908977, "loss": 1.7534, "step": 38352 }, { "epoch": 1.28, "grad_norm": 0.49426984786987305, "learning_rate": 0.0003697253673354601, "loss": 1.7963, "step": 38353 }, { "epoch": 1.28, "grad_norm": 0.4868246614933014, "learning_rate": 0.0003697152013971941, "loss": 1.7831, "step": 38354 }, { "epoch": 1.28, "grad_norm": 0.4829164743423462, "learning_rate": 0.00036970503537430414, "loss": 1.7813, "step": 38355 }, { "epoch": 1.28, "grad_norm": 0.5034285187721252, "learning_rate": 0.0003696948692668025, "loss": 1.7417, "step": 38356 }, { "epoch": 1.28, "grad_norm": 0.47551238536834717, "learning_rate": 0.00036968470307470155, "loss": 1.7194, "step": 38357 }, { "epoch": 1.28, "grad_norm": 0.5111321806907654, "learning_rate": 0.0003696745367980136, "loss": 1.7691, "step": 38358 }, { "epoch": 1.28, "grad_norm": 0.5224094986915588, "learning_rate": 0.0003696643704367511, "loss": 1.714, "step": 38359 }, { "epoch": 1.28, "grad_norm": 0.481853187084198, "learning_rate": 0.0003696542039909263, "loss": 1.8175, "step": 38360 }, { "epoch": 1.28, "grad_norm": 0.46988287568092346, "learning_rate": 0.0003696440374605515, "loss": 1.7412, "step": 38361 }, { "epoch": 1.28, "grad_norm": 0.4730152487754822, "learning_rate": 0.00036963387084563927, "loss": 1.7574, "step": 38362 }, { "epoch": 1.28, "grad_norm": 0.5014452934265137, "learning_rate": 0.00036962370414620165, "loss": 1.7882, "step": 38363 }, { "epoch": 1.28, "grad_norm": 0.49140533804893494, "learning_rate": 0.00036961353736225113, "loss": 1.6959, "step": 38364 }, { "epoch": 1.28, "grad_norm": 0.4971056878566742, "learning_rate": 0.0003696033704938, "loss": 1.766, "step": 38365 }, { "epoch": 1.28, "grad_norm": 0.4761749804019928, "learning_rate": 0.0003695932035408607, "loss": 1.8397, "step": 38366 }, { "epoch": 1.28, "grad_norm": 0.4914311468601227, "learning_rate": 0.00036958303650344544, "loss": 1.9057, "step": 38367 }, { "epoch": 1.28, "grad_norm": 0.4582885205745697, "learning_rate": 0.0003695728693815667, "loss": 1.7347, "step": 38368 }, { "epoch": 1.28, "grad_norm": 0.48477306962013245, "learning_rate": 0.00036956270217523667, "loss": 1.8247, "step": 38369 }, { "epoch": 1.28, "grad_norm": 0.5080223679542542, "learning_rate": 0.0003695525348844678, "loss": 1.7751, "step": 38370 }, { "epoch": 1.28, "grad_norm": 0.49399423599243164, "learning_rate": 0.0003695423675092724, "loss": 1.7332, "step": 38371 }, { "epoch": 1.28, "grad_norm": 0.48915961384773254, "learning_rate": 0.0003695322000496628, "loss": 1.6524, "step": 38372 }, { "epoch": 1.28, "grad_norm": 0.48684078454971313, "learning_rate": 0.0003695220325056515, "loss": 1.7585, "step": 38373 }, { "epoch": 1.28, "grad_norm": 0.4885469377040863, "learning_rate": 0.0003695118648772505, "loss": 1.7312, "step": 38374 }, { "epoch": 1.28, "grad_norm": 0.4907951056957245, "learning_rate": 0.0003695016971644724, "loss": 1.7948, "step": 38375 }, { "epoch": 1.28, "grad_norm": 0.4938404858112335, "learning_rate": 0.0003694915293673295, "loss": 1.8071, "step": 38376 }, { "epoch": 1.28, "grad_norm": 0.47422757744789124, "learning_rate": 0.0003694813614858341, "loss": 1.8621, "step": 38377 }, { "epoch": 1.28, "grad_norm": 0.4829553961753845, "learning_rate": 0.00036947119351999854, "loss": 1.8059, "step": 38378 }, { "epoch": 1.28, "grad_norm": 0.48293426632881165, "learning_rate": 0.00036946102546983524, "loss": 1.7798, "step": 38379 }, { "epoch": 1.28, "grad_norm": 0.49138879776000977, "learning_rate": 0.00036945085733535647, "loss": 1.7743, "step": 38380 }, { "epoch": 1.28, "grad_norm": 0.48527970910072327, "learning_rate": 0.0003694406891165746, "loss": 1.7527, "step": 38381 }, { "epoch": 1.28, "grad_norm": 0.4839042127132416, "learning_rate": 0.000369430520813502, "loss": 1.7517, "step": 38382 }, { "epoch": 1.28, "grad_norm": 0.48370736837387085, "learning_rate": 0.0003694203524261509, "loss": 1.7825, "step": 38383 }, { "epoch": 1.28, "grad_norm": 0.48558473587036133, "learning_rate": 0.0003694101839545337, "loss": 1.8202, "step": 38384 }, { "epoch": 1.28, "grad_norm": 0.47836583852767944, "learning_rate": 0.00036940001539866285, "loss": 1.7476, "step": 38385 }, { "epoch": 1.28, "grad_norm": 0.4818648099899292, "learning_rate": 0.0003693898467585506, "loss": 1.7619, "step": 38386 }, { "epoch": 1.28, "grad_norm": 0.4691764712333679, "learning_rate": 0.00036937967803420927, "loss": 1.7234, "step": 38387 }, { "epoch": 1.28, "grad_norm": 0.49772363901138306, "learning_rate": 0.0003693695092256513, "loss": 1.7655, "step": 38388 }, { "epoch": 1.28, "grad_norm": 0.5077914595603943, "learning_rate": 0.00036935934033288886, "loss": 1.7764, "step": 38389 }, { "epoch": 1.28, "grad_norm": 0.49451330304145813, "learning_rate": 0.0003693491713559344, "loss": 1.8068, "step": 38390 }, { "epoch": 1.28, "grad_norm": 0.45842641592025757, "learning_rate": 0.00036933900229480036, "loss": 1.7427, "step": 38391 }, { "epoch": 1.28, "grad_norm": 0.5033795833587646, "learning_rate": 0.0003693288331494989, "loss": 1.8267, "step": 38392 }, { "epoch": 1.28, "grad_norm": 0.5193485617637634, "learning_rate": 0.0003693186639200426, "loss": 1.8133, "step": 38393 }, { "epoch": 1.28, "grad_norm": 0.4979909360408783, "learning_rate": 0.0003693084946064435, "loss": 1.8452, "step": 38394 }, { "epoch": 1.28, "grad_norm": 0.4911247789859772, "learning_rate": 0.0003692983252087141, "loss": 1.7564, "step": 38395 }, { "epoch": 1.28, "grad_norm": 0.5009046792984009, "learning_rate": 0.00036928815572686685, "loss": 1.7903, "step": 38396 }, { "epoch": 1.28, "grad_norm": 0.4919716417789459, "learning_rate": 0.0003692779861609139, "loss": 1.7512, "step": 38397 }, { "epoch": 1.28, "grad_norm": 0.4863639771938324, "learning_rate": 0.00036926781651086776, "loss": 1.7858, "step": 38398 }, { "epoch": 1.28, "grad_norm": 0.4833963215351105, "learning_rate": 0.0003692576467767406, "loss": 1.7944, "step": 38399 }, { "epoch": 1.28, "grad_norm": 0.48624324798583984, "learning_rate": 0.000369247476958545, "loss": 1.6993, "step": 38400 }, { "epoch": 1.28, "grad_norm": 0.5002226829528809, "learning_rate": 0.00036923730705629306, "loss": 1.8109, "step": 38401 }, { "epoch": 1.28, "grad_norm": 0.4875149130821228, "learning_rate": 0.0003692271370699972, "loss": 1.7578, "step": 38402 }, { "epoch": 1.28, "grad_norm": 0.48026207089424133, "learning_rate": 0.0003692169669996699, "loss": 1.791, "step": 38403 }, { "epoch": 1.28, "grad_norm": 0.48104041814804077, "learning_rate": 0.00036920679684532325, "loss": 1.8674, "step": 38404 }, { "epoch": 1.28, "grad_norm": 0.47369351983070374, "learning_rate": 0.0003691966266069699, "loss": 1.7871, "step": 38405 }, { "epoch": 1.28, "grad_norm": 0.4828261137008667, "learning_rate": 0.00036918645628462193, "loss": 1.8455, "step": 38406 }, { "epoch": 1.28, "grad_norm": 0.5070716142654419, "learning_rate": 0.0003691762858782919, "loss": 1.7856, "step": 38407 }, { "epoch": 1.28, "grad_norm": 0.47728145122528076, "learning_rate": 0.00036916611538799195, "loss": 1.8574, "step": 38408 }, { "epoch": 1.28, "grad_norm": 0.48161378502845764, "learning_rate": 0.00036915594481373454, "loss": 1.7747, "step": 38409 }, { "epoch": 1.28, "grad_norm": 0.49416518211364746, "learning_rate": 0.00036914577415553214, "loss": 1.8312, "step": 38410 }, { "epoch": 1.28, "grad_norm": 0.4945347011089325, "learning_rate": 0.00036913560341339676, "loss": 1.8624, "step": 38411 }, { "epoch": 1.28, "grad_norm": 0.46619942784309387, "learning_rate": 0.00036912543258734115, "loss": 1.7706, "step": 38412 }, { "epoch": 1.28, "grad_norm": 0.4677220284938812, "learning_rate": 0.00036911526167737724, "loss": 1.7844, "step": 38413 }, { "epoch": 1.28, "grad_norm": 0.4804523289203644, "learning_rate": 0.00036910509068351777, "loss": 1.8716, "step": 38414 }, { "epoch": 1.28, "grad_norm": 0.46761488914489746, "learning_rate": 0.00036909491960577476, "loss": 1.7409, "step": 38415 }, { "epoch": 1.28, "grad_norm": 0.48349565267562866, "learning_rate": 0.00036908474844416075, "loss": 1.7329, "step": 38416 }, { "epoch": 1.28, "grad_norm": 0.4593047797679901, "learning_rate": 0.0003690745771986881, "loss": 1.7569, "step": 38417 }, { "epoch": 1.28, "grad_norm": 0.4955090284347534, "learning_rate": 0.000369064405869369, "loss": 1.7219, "step": 38418 }, { "epoch": 1.28, "grad_norm": 0.5049843788146973, "learning_rate": 0.00036905423445621595, "loss": 1.8464, "step": 38419 }, { "epoch": 1.28, "grad_norm": 0.5111597776412964, "learning_rate": 0.0003690440629592412, "loss": 1.746, "step": 38420 }, { "epoch": 1.28, "grad_norm": 0.49901071190834045, "learning_rate": 0.0003690338913784571, "loss": 1.7603, "step": 38421 }, { "epoch": 1.28, "grad_norm": 0.4685713052749634, "learning_rate": 0.00036902371971387607, "loss": 1.786, "step": 38422 }, { "epoch": 1.28, "grad_norm": 0.5139258503913879, "learning_rate": 0.00036901354796551044, "loss": 1.7666, "step": 38423 }, { "epoch": 1.28, "grad_norm": 0.4899129867553711, "learning_rate": 0.0003690033761333725, "loss": 1.7788, "step": 38424 }, { "epoch": 1.28, "grad_norm": 0.48015788197517395, "learning_rate": 0.00036899320421747466, "loss": 1.7196, "step": 38425 }, { "epoch": 1.28, "grad_norm": 0.4815167188644409, "learning_rate": 0.0003689830322178291, "loss": 1.7984, "step": 38426 }, { "epoch": 1.28, "grad_norm": 0.48648184537887573, "learning_rate": 0.00036897286013444844, "loss": 1.7253, "step": 38427 }, { "epoch": 1.28, "grad_norm": 0.4871262311935425, "learning_rate": 0.0003689626879673449, "loss": 1.7893, "step": 38428 }, { "epoch": 1.28, "grad_norm": 0.7809762954711914, "learning_rate": 0.0003689525157165307, "loss": 1.8104, "step": 38429 }, { "epoch": 1.28, "grad_norm": 0.4740724265575409, "learning_rate": 0.0003689423433820185, "loss": 1.822, "step": 38430 }, { "epoch": 1.28, "grad_norm": 0.47800686955451965, "learning_rate": 0.0003689321709638203, "loss": 1.8604, "step": 38431 }, { "epoch": 1.28, "grad_norm": 0.4801945090293884, "learning_rate": 0.00036892199846194856, "loss": 1.7708, "step": 38432 }, { "epoch": 1.28, "grad_norm": 0.4783712923526764, "learning_rate": 0.0003689118258764158, "loss": 1.7075, "step": 38433 }, { "epoch": 1.28, "grad_norm": 0.4783632457256317, "learning_rate": 0.00036890165320723416, "loss": 1.7476, "step": 38434 }, { "epoch": 1.28, "grad_norm": 0.4920286536216736, "learning_rate": 0.0003688914804544161, "loss": 1.7916, "step": 38435 }, { "epoch": 1.28, "grad_norm": 0.4827367067337036, "learning_rate": 0.0003688813076179739, "loss": 1.7552, "step": 38436 }, { "epoch": 1.28, "grad_norm": 0.4929346442222595, "learning_rate": 0.00036887113469792, "loss": 1.8628, "step": 38437 }, { "epoch": 1.28, "grad_norm": 0.49383142590522766, "learning_rate": 0.00036886096169426653, "loss": 1.7856, "step": 38438 }, { "epoch": 1.28, "grad_norm": 0.4805653691291809, "learning_rate": 0.0003688507886070262, "loss": 1.7563, "step": 38439 }, { "epoch": 1.28, "grad_norm": 0.49223312735557556, "learning_rate": 0.00036884061543621105, "loss": 1.7815, "step": 38440 }, { "epoch": 1.28, "grad_norm": 0.4827234148979187, "learning_rate": 0.0003688304421818336, "loss": 1.75, "step": 38441 }, { "epoch": 1.28, "grad_norm": 0.4681650996208191, "learning_rate": 0.00036882026884390613, "loss": 1.7413, "step": 38442 }, { "epoch": 1.28, "grad_norm": 0.47101646661758423, "learning_rate": 0.00036881009542244085, "loss": 1.7235, "step": 38443 }, { "epoch": 1.28, "grad_norm": 0.4801853895187378, "learning_rate": 0.0003687999219174504, "loss": 1.7981, "step": 38444 }, { "epoch": 1.28, "grad_norm": 0.4784902036190033, "learning_rate": 0.000368789748328947, "loss": 1.7174, "step": 38445 }, { "epoch": 1.28, "grad_norm": 0.47846701741218567, "learning_rate": 0.00036877957465694287, "loss": 1.7667, "step": 38446 }, { "epoch": 1.28, "grad_norm": 0.4907298982143402, "learning_rate": 0.00036876940090145057, "loss": 1.809, "step": 38447 }, { "epoch": 1.28, "grad_norm": 0.48592817783355713, "learning_rate": 0.0003687592270624823, "loss": 1.8096, "step": 38448 }, { "epoch": 1.28, "grad_norm": 0.49746182560920715, "learning_rate": 0.0003687490531400505, "loss": 1.7995, "step": 38449 }, { "epoch": 1.28, "grad_norm": 0.49695757031440735, "learning_rate": 0.00036873887913416745, "loss": 1.7646, "step": 38450 }, { "epoch": 1.28, "grad_norm": 0.4787687361240387, "learning_rate": 0.0003687287050448455, "loss": 1.8053, "step": 38451 }, { "epoch": 1.28, "grad_norm": 0.506848156452179, "learning_rate": 0.00036871853087209707, "loss": 1.7994, "step": 38452 }, { "epoch": 1.28, "grad_norm": 0.48385727405548096, "learning_rate": 0.0003687083566159344, "loss": 1.7301, "step": 38453 }, { "epoch": 1.28, "grad_norm": 0.47075313329696655, "learning_rate": 0.00036869818227637, "loss": 1.7387, "step": 38454 }, { "epoch": 1.28, "grad_norm": 0.5847705602645874, "learning_rate": 0.0003686880078534161, "loss": 1.8634, "step": 38455 }, { "epoch": 1.28, "grad_norm": 0.4805571138858795, "learning_rate": 0.00036867783334708506, "loss": 1.7575, "step": 38456 }, { "epoch": 1.28, "grad_norm": 0.48092880845069885, "learning_rate": 0.0003686676587573893, "loss": 1.6823, "step": 38457 }, { "epoch": 1.28, "grad_norm": 0.4831421971321106, "learning_rate": 0.00036865748408434107, "loss": 1.8298, "step": 38458 }, { "epoch": 1.28, "grad_norm": 0.512186586856842, "learning_rate": 0.0003686473093279528, "loss": 1.8334, "step": 38459 }, { "epoch": 1.28, "grad_norm": 0.48163846135139465, "learning_rate": 0.0003686371344882368, "loss": 1.847, "step": 38460 }, { "epoch": 1.28, "grad_norm": 0.49392884969711304, "learning_rate": 0.00036862695956520533, "loss": 1.7465, "step": 38461 }, { "epoch": 1.28, "grad_norm": 0.4763263165950775, "learning_rate": 0.000368616784558871, "loss": 1.8649, "step": 38462 }, { "epoch": 1.28, "grad_norm": 0.48476991057395935, "learning_rate": 0.0003686066094692459, "loss": 1.7582, "step": 38463 }, { "epoch": 1.28, "grad_norm": 0.46719178557395935, "learning_rate": 0.00036859643429634255, "loss": 1.7933, "step": 38464 }, { "epoch": 1.28, "grad_norm": 0.4702693521976471, "learning_rate": 0.0003685862590401732, "loss": 1.714, "step": 38465 }, { "epoch": 1.28, "grad_norm": 0.46281471848487854, "learning_rate": 0.00036857608370075023, "loss": 1.8122, "step": 38466 }, { "epoch": 1.28, "grad_norm": 0.4809693992137909, "learning_rate": 0.00036856590827808605, "loss": 1.8683, "step": 38467 }, { "epoch": 1.28, "grad_norm": 0.48554885387420654, "learning_rate": 0.0003685557327721928, "loss": 1.7601, "step": 38468 }, { "epoch": 1.28, "grad_norm": 0.5792884230613708, "learning_rate": 0.00036854555718308327, "loss": 1.8347, "step": 38469 }, { "epoch": 1.28, "grad_norm": 0.4720075726509094, "learning_rate": 0.00036853538151076934, "loss": 1.741, "step": 38470 }, { "epoch": 1.28, "grad_norm": 0.4847341477870941, "learning_rate": 0.0003685252057552636, "loss": 1.7671, "step": 38471 }, { "epoch": 1.28, "grad_norm": 0.47636717557907104, "learning_rate": 0.0003685150299165784, "loss": 1.8181, "step": 38472 }, { "epoch": 1.28, "grad_norm": 0.47932279109954834, "learning_rate": 0.0003685048539947259, "loss": 1.7964, "step": 38473 }, { "epoch": 1.28, "grad_norm": 0.48913294076919556, "learning_rate": 0.00036849467798971876, "loss": 1.7824, "step": 38474 }, { "epoch": 1.28, "grad_norm": 0.47777777910232544, "learning_rate": 0.00036848450190156917, "loss": 1.7583, "step": 38475 }, { "epoch": 1.28, "grad_norm": 0.4721699655056, "learning_rate": 0.0003684743257302894, "loss": 1.7701, "step": 38476 }, { "epoch": 1.28, "grad_norm": 0.48198121786117554, "learning_rate": 0.00036846414947589194, "loss": 1.7436, "step": 38477 }, { "epoch": 1.28, "grad_norm": 0.4949427545070648, "learning_rate": 0.00036845397313838897, "loss": 1.7518, "step": 38478 }, { "epoch": 1.28, "grad_norm": 0.48038551211357117, "learning_rate": 0.0003684437967177931, "loss": 1.733, "step": 38479 }, { "epoch": 1.28, "grad_norm": 0.48737388849258423, "learning_rate": 0.0003684336202141166, "loss": 1.7513, "step": 38480 }, { "epoch": 1.28, "grad_norm": 0.459244966506958, "learning_rate": 0.0003684234436273716, "loss": 1.787, "step": 38481 }, { "epoch": 1.28, "grad_norm": 0.4769565463066101, "learning_rate": 0.0003684132669575707, "loss": 1.7316, "step": 38482 }, { "epoch": 1.28, "grad_norm": 0.47917938232421875, "learning_rate": 0.0003684030902047261, "loss": 1.7814, "step": 38483 }, { "epoch": 1.28, "grad_norm": 0.4697299599647522, "learning_rate": 0.0003683929133688503, "loss": 1.7468, "step": 38484 }, { "epoch": 1.28, "grad_norm": 0.4951023459434509, "learning_rate": 0.00036838273644995565, "loss": 1.7864, "step": 38485 }, { "epoch": 1.28, "grad_norm": 0.4719439446926117, "learning_rate": 0.0003683725594480544, "loss": 1.7259, "step": 38486 }, { "epoch": 1.28, "grad_norm": 0.7869243621826172, "learning_rate": 0.00036836238236315894, "loss": 1.7805, "step": 38487 }, { "epoch": 1.28, "grad_norm": 0.47646695375442505, "learning_rate": 0.00036835220519528157, "loss": 1.7623, "step": 38488 }, { "epoch": 1.28, "grad_norm": 0.49292001128196716, "learning_rate": 0.0003683420279444346, "loss": 1.8449, "step": 38489 }, { "epoch": 1.28, "grad_norm": 0.47283580899238586, "learning_rate": 0.0003683318506106307, "loss": 1.7112, "step": 38490 }, { "epoch": 1.28, "grad_norm": 0.47562986612319946, "learning_rate": 0.00036832167319388173, "loss": 1.7429, "step": 38491 }, { "epoch": 1.28, "grad_norm": 0.5033180713653564, "learning_rate": 0.0003683114956942006, "loss": 1.8402, "step": 38492 }, { "epoch": 1.28, "grad_norm": 0.47840508818626404, "learning_rate": 0.00036830131811159926, "loss": 1.7379, "step": 38493 }, { "epoch": 1.28, "grad_norm": 0.4966535270214081, "learning_rate": 0.0003682911404460901, "loss": 1.7688, "step": 38494 }, { "epoch": 1.28, "grad_norm": 0.48928794264793396, "learning_rate": 0.0003682809626976856, "loss": 1.7753, "step": 38495 }, { "epoch": 1.28, "grad_norm": 0.4736696481704712, "learning_rate": 0.00036827078486639813, "loss": 1.7698, "step": 38496 }, { "epoch": 1.28, "grad_norm": 0.4682757258415222, "learning_rate": 0.00036826060695223996, "loss": 1.6786, "step": 38497 }, { "epoch": 1.28, "grad_norm": 0.481568306684494, "learning_rate": 0.0003682504289552235, "loss": 1.7909, "step": 38498 }, { "epoch": 1.28, "grad_norm": 0.4774843156337738, "learning_rate": 0.00036824025087536105, "loss": 1.789, "step": 38499 }, { "epoch": 1.28, "grad_norm": 0.4849206209182739, "learning_rate": 0.0003682300727126649, "loss": 1.804, "step": 38500 }, { "epoch": 1.28, "grad_norm": 0.4783737063407898, "learning_rate": 0.0003682198944671477, "loss": 1.7571, "step": 38501 }, { "epoch": 1.28, "grad_norm": 0.4952997863292694, "learning_rate": 0.0003682097161388214, "loss": 1.8222, "step": 38502 }, { "epoch": 1.28, "grad_norm": 0.49742287397384644, "learning_rate": 0.0003681995377276986, "loss": 1.8615, "step": 38503 }, { "epoch": 1.28, "grad_norm": 0.5036383867263794, "learning_rate": 0.00036818935923379174, "loss": 1.7807, "step": 38504 }, { "epoch": 1.28, "grad_norm": 0.49171602725982666, "learning_rate": 0.0003681791806571129, "loss": 1.7679, "step": 38505 }, { "epoch": 1.28, "grad_norm": 0.4814974367618561, "learning_rate": 0.00036816900199767465, "loss": 1.7377, "step": 38506 }, { "epoch": 1.28, "grad_norm": 0.4757557213306427, "learning_rate": 0.0003681588232554893, "loss": 1.6847, "step": 38507 }, { "epoch": 1.28, "grad_norm": 0.47628411650657654, "learning_rate": 0.0003681486444305691, "loss": 1.7398, "step": 38508 }, { "epoch": 1.28, "grad_norm": 0.4851599633693695, "learning_rate": 0.0003681384655229265, "loss": 1.8661, "step": 38509 }, { "epoch": 1.28, "grad_norm": 0.49116143584251404, "learning_rate": 0.00036812828653257394, "loss": 1.8227, "step": 38510 }, { "epoch": 1.28, "grad_norm": 0.4793648421764374, "learning_rate": 0.0003681181074595236, "loss": 1.8615, "step": 38511 }, { "epoch": 1.28, "grad_norm": 0.4871320128440857, "learning_rate": 0.0003681079283037879, "loss": 1.7153, "step": 38512 }, { "epoch": 1.28, "grad_norm": 0.48718586564064026, "learning_rate": 0.00036809774906537923, "loss": 1.7539, "step": 38513 }, { "epoch": 1.28, "grad_norm": 0.5063191652297974, "learning_rate": 0.0003680875697443099, "loss": 1.7689, "step": 38514 }, { "epoch": 1.28, "grad_norm": 0.4848337173461914, "learning_rate": 0.00036807739034059233, "loss": 1.7318, "step": 38515 }, { "epoch": 1.28, "grad_norm": 0.48600298166275024, "learning_rate": 0.00036806721085423877, "loss": 1.8087, "step": 38516 }, { "epoch": 1.28, "grad_norm": 0.48167312145233154, "learning_rate": 0.00036805703128526174, "loss": 1.8363, "step": 38517 }, { "epoch": 1.28, "grad_norm": 0.48234280943870544, "learning_rate": 0.0003680468516336735, "loss": 1.8337, "step": 38518 }, { "epoch": 1.28, "grad_norm": 0.46853312849998474, "learning_rate": 0.0003680366718994864, "loss": 1.7564, "step": 38519 }, { "epoch": 1.28, "grad_norm": 0.49916911125183105, "learning_rate": 0.00036802649208271277, "loss": 1.7451, "step": 38520 }, { "epoch": 1.28, "grad_norm": 0.48641204833984375, "learning_rate": 0.0003680163121833649, "loss": 1.7864, "step": 38521 }, { "epoch": 1.28, "grad_norm": 0.4815638065338135, "learning_rate": 0.0003680061322014555, "loss": 1.7493, "step": 38522 }, { "epoch": 1.28, "grad_norm": 0.4767860472202301, "learning_rate": 0.00036799595213699645, "loss": 1.8019, "step": 38523 }, { "epoch": 1.28, "grad_norm": 0.5018120408058167, "learning_rate": 0.00036798577199000044, "loss": 1.7704, "step": 38524 }, { "epoch": 1.28, "grad_norm": 0.47053733468055725, "learning_rate": 0.00036797559176047963, "loss": 1.765, "step": 38525 }, { "epoch": 1.28, "grad_norm": 0.48489508032798767, "learning_rate": 0.00036796541144844654, "loss": 1.7616, "step": 38526 }, { "epoch": 1.28, "grad_norm": 0.5037607550621033, "learning_rate": 0.0003679552310539135, "loss": 1.7489, "step": 38527 }, { "epoch": 1.28, "grad_norm": 0.4980340003967285, "learning_rate": 0.0003679450505768927, "loss": 1.7399, "step": 38528 }, { "epoch": 1.28, "grad_norm": 0.4897323250770569, "learning_rate": 0.00036793487001739677, "loss": 1.7523, "step": 38529 }, { "epoch": 1.28, "grad_norm": 0.45809775590896606, "learning_rate": 0.00036792468937543786, "loss": 1.7241, "step": 38530 }, { "epoch": 1.28, "grad_norm": 0.4733371436595917, "learning_rate": 0.00036791450865102836, "loss": 1.7797, "step": 38531 }, { "epoch": 1.28, "grad_norm": 0.477115660905838, "learning_rate": 0.00036790432784418063, "loss": 1.6918, "step": 38532 }, { "epoch": 1.28, "grad_norm": 0.5023024082183838, "learning_rate": 0.0003678941469549071, "loss": 1.7527, "step": 38533 }, { "epoch": 1.28, "grad_norm": 0.4797261357307434, "learning_rate": 0.00036788396598322006, "loss": 1.7547, "step": 38534 }, { "epoch": 1.28, "grad_norm": 0.49956071376800537, "learning_rate": 0.00036787378492913184, "loss": 1.7969, "step": 38535 }, { "epoch": 1.28, "grad_norm": 0.489240825176239, "learning_rate": 0.00036786360379265494, "loss": 1.7991, "step": 38536 }, { "epoch": 1.28, "grad_norm": 0.5085443258285522, "learning_rate": 0.0003678534225738016, "loss": 1.7962, "step": 38537 }, { "epoch": 1.28, "grad_norm": 0.48398616909980774, "learning_rate": 0.0003678432412725842, "loss": 1.8022, "step": 38538 }, { "epoch": 1.28, "grad_norm": 0.5039269328117371, "learning_rate": 0.000367833059889015, "loss": 1.8261, "step": 38539 }, { "epoch": 1.28, "grad_norm": 0.48540380597114563, "learning_rate": 0.0003678228784231065, "loss": 1.7415, "step": 38540 }, { "epoch": 1.28, "grad_norm": 0.4897859990596771, "learning_rate": 0.0003678126968748711, "loss": 1.8745, "step": 38541 }, { "epoch": 1.28, "grad_norm": 0.4941127896308899, "learning_rate": 0.00036780251524432103, "loss": 1.7967, "step": 38542 }, { "epoch": 1.28, "grad_norm": 0.47484755516052246, "learning_rate": 0.00036779233353146875, "loss": 1.7981, "step": 38543 }, { "epoch": 1.28, "grad_norm": 0.4774658679962158, "learning_rate": 0.0003677821517363265, "loss": 1.7824, "step": 38544 }, { "epoch": 1.28, "grad_norm": 0.49098625779151917, "learning_rate": 0.0003677719698589067, "loss": 1.7825, "step": 38545 }, { "epoch": 1.28, "grad_norm": 0.4875922203063965, "learning_rate": 0.0003677617878992217, "loss": 1.7683, "step": 38546 }, { "epoch": 1.28, "grad_norm": 0.4824793338775635, "learning_rate": 0.000367751605857284, "loss": 1.8453, "step": 38547 }, { "epoch": 1.28, "grad_norm": 0.5025731325149536, "learning_rate": 0.0003677414237331057, "loss": 1.7726, "step": 38548 }, { "epoch": 1.28, "grad_norm": 0.5083790421485901, "learning_rate": 0.00036773124152669935, "loss": 1.7627, "step": 38549 }, { "epoch": 1.28, "grad_norm": 0.49361011385917664, "learning_rate": 0.0003677210592380772, "loss": 1.7863, "step": 38550 }, { "epoch": 1.28, "grad_norm": 0.5008400082588196, "learning_rate": 0.0003677108768672517, "loss": 1.7657, "step": 38551 }, { "epoch": 1.28, "grad_norm": 0.49342480301856995, "learning_rate": 0.0003677006944142353, "loss": 1.8295, "step": 38552 }, { "epoch": 1.28, "grad_norm": 0.489124059677124, "learning_rate": 0.00036769051187904, "loss": 1.832, "step": 38553 }, { "epoch": 1.28, "grad_norm": 0.48002907633781433, "learning_rate": 0.0003676803292616786, "loss": 1.787, "step": 38554 }, { "epoch": 1.28, "grad_norm": 0.5033280849456787, "learning_rate": 0.000367670146562163, "loss": 1.7654, "step": 38555 }, { "epoch": 1.28, "grad_norm": 0.6998482346534729, "learning_rate": 0.00036765996378050603, "loss": 1.7273, "step": 38556 }, { "epoch": 1.28, "grad_norm": 0.4760735034942627, "learning_rate": 0.0003676497809167198, "loss": 1.813, "step": 38557 }, { "epoch": 1.28, "grad_norm": 0.45775169134140015, "learning_rate": 0.00036763959797081664, "loss": 1.7966, "step": 38558 }, { "epoch": 1.28, "grad_norm": 0.4743865430355072, "learning_rate": 0.0003676294149428091, "loss": 1.8164, "step": 38559 }, { "epoch": 1.28, "grad_norm": 0.48725438117980957, "learning_rate": 0.0003676192318327092, "loss": 1.8167, "step": 38560 }, { "epoch": 1.28, "grad_norm": 0.5201126337051392, "learning_rate": 0.0003676090486405297, "loss": 1.7125, "step": 38561 }, { "epoch": 1.28, "grad_norm": 0.4758684039115906, "learning_rate": 0.00036759886536628266, "loss": 1.7161, "step": 38562 }, { "epoch": 1.28, "grad_norm": 0.4660571813583374, "learning_rate": 0.0003675886820099807, "loss": 1.7579, "step": 38563 }, { "epoch": 1.28, "grad_norm": 0.4771136939525604, "learning_rate": 0.0003675784985716359, "loss": 1.8314, "step": 38564 }, { "epoch": 1.28, "grad_norm": 1.0512765645980835, "learning_rate": 0.00036756831505126077, "loss": 1.795, "step": 38565 }, { "epoch": 1.28, "grad_norm": 0.47498321533203125, "learning_rate": 0.0003675581314488678, "loss": 1.8174, "step": 38566 }, { "epoch": 1.28, "grad_norm": 0.48589804768562317, "learning_rate": 0.0003675479477644691, "loss": 1.8529, "step": 38567 }, { "epoch": 1.28, "grad_norm": 0.505366861820221, "learning_rate": 0.0003675377639980771, "loss": 1.807, "step": 38568 }, { "epoch": 1.28, "grad_norm": 0.47800779342651367, "learning_rate": 0.00036752758014970424, "loss": 1.6677, "step": 38569 }, { "epoch": 1.28, "grad_norm": 0.4782036244869232, "learning_rate": 0.0003675173962193629, "loss": 1.7157, "step": 38570 }, { "epoch": 1.28, "grad_norm": 0.4660041034221649, "learning_rate": 0.0003675072122070654, "loss": 1.7173, "step": 38571 }, { "epoch": 1.28, "grad_norm": 0.46148523688316345, "learning_rate": 0.000367497028112824, "loss": 1.7902, "step": 38572 }, { "epoch": 1.28, "grad_norm": 0.48899826407432556, "learning_rate": 0.0003674868439366513, "loss": 1.767, "step": 38573 }, { "epoch": 1.28, "grad_norm": 0.5005537271499634, "learning_rate": 0.0003674766596785593, "loss": 1.8672, "step": 38574 }, { "epoch": 1.28, "grad_norm": 0.49561193585395813, "learning_rate": 0.00036746647533856077, "loss": 1.7649, "step": 38575 }, { "epoch": 1.28, "grad_norm": 0.47543632984161377, "learning_rate": 0.0003674562909166678, "loss": 1.7844, "step": 38576 }, { "epoch": 1.28, "grad_norm": 0.49820074439048767, "learning_rate": 0.0003674461064128929, "loss": 1.7663, "step": 38577 }, { "epoch": 1.28, "grad_norm": 0.4770563542842865, "learning_rate": 0.00036743592182724816, "loss": 1.794, "step": 38578 }, { "epoch": 1.28, "grad_norm": 0.49229738116264343, "learning_rate": 0.00036742573715974633, "loss": 1.751, "step": 38579 }, { "epoch": 1.28, "grad_norm": 0.4691181182861328, "learning_rate": 0.0003674155524103996, "loss": 1.7993, "step": 38580 }, { "epoch": 1.28, "grad_norm": 0.4910234808921814, "learning_rate": 0.0003674053675792202, "loss": 1.7714, "step": 38581 }, { "epoch": 1.28, "grad_norm": 0.4960377812385559, "learning_rate": 0.0003673951826662208, "loss": 1.7137, "step": 38582 }, { "epoch": 1.28, "grad_norm": 0.46931761503219604, "learning_rate": 0.00036738499767141337, "loss": 1.7474, "step": 38583 }, { "epoch": 1.28, "grad_norm": 0.5074209570884705, "learning_rate": 0.0003673748125948106, "loss": 1.827, "step": 38584 }, { "epoch": 1.28, "grad_norm": 0.5093379616737366, "learning_rate": 0.00036736462743642466, "loss": 1.823, "step": 38585 }, { "epoch": 1.28, "grad_norm": 0.47715678811073303, "learning_rate": 0.0003673544421962681, "loss": 1.7522, "step": 38586 }, { "epoch": 1.28, "grad_norm": 0.4983961582183838, "learning_rate": 0.0003673442568743531, "loss": 1.8157, "step": 38587 }, { "epoch": 1.28, "grad_norm": 0.4838480055332184, "learning_rate": 0.00036733407147069213, "loss": 1.6983, "step": 38588 }, { "epoch": 1.28, "grad_norm": 0.4961961507797241, "learning_rate": 0.00036732388598529757, "loss": 1.7822, "step": 38589 }, { "epoch": 1.28, "grad_norm": 0.48508399724960327, "learning_rate": 0.00036731370041818154, "loss": 1.7483, "step": 38590 }, { "epoch": 1.28, "grad_norm": 0.4817781150341034, "learning_rate": 0.0003673035147693568, "loss": 1.7718, "step": 38591 }, { "epoch": 1.28, "grad_norm": 0.4854278862476349, "learning_rate": 0.00036729332903883533, "loss": 1.7945, "step": 38592 }, { "epoch": 1.28, "grad_norm": 0.5004006624221802, "learning_rate": 0.0003672831432266298, "loss": 1.814, "step": 38593 }, { "epoch": 1.28, "grad_norm": 0.4921084940433502, "learning_rate": 0.00036727295733275244, "loss": 1.7677, "step": 38594 }, { "epoch": 1.28, "grad_norm": 0.49113792181015015, "learning_rate": 0.00036726277135721555, "loss": 1.8435, "step": 38595 }, { "epoch": 1.28, "grad_norm": 0.489454060792923, "learning_rate": 0.0003672525853000317, "loss": 1.724, "step": 38596 }, { "epoch": 1.28, "grad_norm": 0.48759356141090393, "learning_rate": 0.00036724239916121297, "loss": 1.757, "step": 38597 }, { "epoch": 1.28, "grad_norm": 0.48231595754623413, "learning_rate": 0.000367232212940772, "loss": 1.7418, "step": 38598 }, { "epoch": 1.28, "grad_norm": 0.49113619327545166, "learning_rate": 0.0003672220266387209, "loss": 1.7778, "step": 38599 }, { "epoch": 1.28, "grad_norm": 0.5052959322929382, "learning_rate": 0.0003672118402550722, "loss": 1.7493, "step": 38600 }, { "epoch": 1.28, "grad_norm": 0.5363944172859192, "learning_rate": 0.0003672016537898383, "loss": 1.8686, "step": 38601 }, { "epoch": 1.28, "grad_norm": 0.48106828331947327, "learning_rate": 0.0003671914672430315, "loss": 1.777, "step": 38602 }, { "epoch": 1.28, "grad_norm": 0.49866074323654175, "learning_rate": 0.00036718128061466416, "loss": 1.7571, "step": 38603 }, { "epoch": 1.28, "grad_norm": 0.4989844858646393, "learning_rate": 0.00036717109390474857, "loss": 1.7907, "step": 38604 }, { "epoch": 1.28, "grad_norm": 0.48788952827453613, "learning_rate": 0.0003671609071132972, "loss": 1.8508, "step": 38605 }, { "epoch": 1.28, "grad_norm": 0.4760264456272125, "learning_rate": 0.00036715072024032236, "loss": 1.7742, "step": 38606 }, { "epoch": 1.28, "grad_norm": 0.4797952175140381, "learning_rate": 0.00036714053328583646, "loss": 1.7304, "step": 38607 }, { "epoch": 1.28, "grad_norm": 0.49557948112487793, "learning_rate": 0.00036713034624985183, "loss": 1.8414, "step": 38608 }, { "epoch": 1.28, "grad_norm": 0.511357307434082, "learning_rate": 0.0003671201591323809, "loss": 1.8481, "step": 38609 }, { "epoch": 1.28, "grad_norm": 0.48169541358947754, "learning_rate": 0.00036710997193343587, "loss": 1.7477, "step": 38610 }, { "epoch": 1.28, "grad_norm": 0.502061128616333, "learning_rate": 0.00036709978465302936, "loss": 1.6888, "step": 38611 }, { "epoch": 1.28, "grad_norm": 0.48058122396469116, "learning_rate": 0.0003670895972911736, "loss": 1.7631, "step": 38612 }, { "epoch": 1.28, "grad_norm": 0.49765655398368835, "learning_rate": 0.00036707940984788087, "loss": 1.7204, "step": 38613 }, { "epoch": 1.28, "grad_norm": 0.47112560272216797, "learning_rate": 0.00036706922232316365, "loss": 1.8146, "step": 38614 }, { "epoch": 1.28, "grad_norm": 0.4850381314754486, "learning_rate": 0.00036705903471703424, "loss": 1.8191, "step": 38615 }, { "epoch": 1.28, "grad_norm": 0.4790603518486023, "learning_rate": 0.0003670488470295051, "loss": 1.7609, "step": 38616 }, { "epoch": 1.28, "grad_norm": 0.4714764654636383, "learning_rate": 0.00036703865926058854, "loss": 1.8056, "step": 38617 }, { "epoch": 1.28, "grad_norm": 0.49209171533584595, "learning_rate": 0.0003670284714102969, "loss": 1.7357, "step": 38618 }, { "epoch": 1.28, "grad_norm": 0.47832173109054565, "learning_rate": 0.0003670182834786425, "loss": 1.8306, "step": 38619 }, { "epoch": 1.28, "grad_norm": 0.481572687625885, "learning_rate": 0.0003670080954656379, "loss": 1.7858, "step": 38620 }, { "epoch": 1.28, "grad_norm": 0.48762157559394836, "learning_rate": 0.0003669979073712953, "loss": 1.7691, "step": 38621 }, { "epoch": 1.28, "grad_norm": 0.47789743542671204, "learning_rate": 0.00036698771919562714, "loss": 1.7585, "step": 38622 }, { "epoch": 1.28, "grad_norm": 0.485969215631485, "learning_rate": 0.00036697753093864574, "loss": 1.8015, "step": 38623 }, { "epoch": 1.29, "grad_norm": 0.47814467549324036, "learning_rate": 0.00036696734260036344, "loss": 1.7424, "step": 38624 }, { "epoch": 1.29, "grad_norm": 0.4830128252506256, "learning_rate": 0.0003669571541807927, "loss": 1.7397, "step": 38625 }, { "epoch": 1.29, "grad_norm": 0.48624736070632935, "learning_rate": 0.00036694696567994583, "loss": 1.7688, "step": 38626 }, { "epoch": 1.29, "grad_norm": 0.5001898407936096, "learning_rate": 0.00036693677709783524, "loss": 1.728, "step": 38627 }, { "epoch": 1.29, "grad_norm": 0.5069005489349365, "learning_rate": 0.00036692658843447324, "loss": 1.7982, "step": 38628 }, { "epoch": 1.29, "grad_norm": 0.4982869327068329, "learning_rate": 0.0003669163996898722, "loss": 1.8232, "step": 38629 }, { "epoch": 1.29, "grad_norm": 0.4870432913303375, "learning_rate": 0.0003669062108640445, "loss": 1.7909, "step": 38630 }, { "epoch": 1.29, "grad_norm": 0.5003372430801392, "learning_rate": 0.00036689602195700253, "loss": 1.8625, "step": 38631 }, { "epoch": 1.29, "grad_norm": 0.4932263195514679, "learning_rate": 0.00036688583296875876, "loss": 1.7725, "step": 38632 }, { "epoch": 1.29, "grad_norm": 0.48528778553009033, "learning_rate": 0.0003668756438993252, "loss": 1.7924, "step": 38633 }, { "epoch": 1.29, "grad_norm": 0.4810701608657837, "learning_rate": 0.00036686545474871467, "loss": 1.8231, "step": 38634 }, { "epoch": 1.29, "grad_norm": 0.4799869656562805, "learning_rate": 0.00036685526551693924, "loss": 1.7408, "step": 38635 }, { "epoch": 1.29, "grad_norm": 0.4802969992160797, "learning_rate": 0.0003668450762040114, "loss": 1.7789, "step": 38636 }, { "epoch": 1.29, "grad_norm": 0.4846448600292206, "learning_rate": 0.0003668348868099436, "loss": 1.7112, "step": 38637 }, { "epoch": 1.29, "grad_norm": 0.4998238980770111, "learning_rate": 0.00036682469733474786, "loss": 1.7821, "step": 38638 }, { "epoch": 1.29, "grad_norm": 0.47969990968704224, "learning_rate": 0.00036681450777843695, "loss": 1.7019, "step": 38639 }, { "epoch": 1.29, "grad_norm": 0.4809473156929016, "learning_rate": 0.00036680431814102295, "loss": 1.7962, "step": 38640 }, { "epoch": 1.29, "grad_norm": 0.4736645221710205, "learning_rate": 0.0003667941284225184, "loss": 1.7703, "step": 38641 }, { "epoch": 1.29, "grad_norm": 0.4707303047180176, "learning_rate": 0.00036678393862293575, "loss": 1.6816, "step": 38642 }, { "epoch": 1.29, "grad_norm": 2.4668924808502197, "learning_rate": 0.00036677374874228706, "loss": 1.7843, "step": 38643 }, { "epoch": 1.29, "grad_norm": 0.4695868194103241, "learning_rate": 0.00036676355878058505, "loss": 1.7223, "step": 38644 }, { "epoch": 1.29, "grad_norm": 0.48192694783210754, "learning_rate": 0.0003667533687378417, "loss": 1.8274, "step": 38645 }, { "epoch": 1.29, "grad_norm": 0.49916085600852966, "learning_rate": 0.00036674317861406976, "loss": 1.7759, "step": 38646 }, { "epoch": 1.29, "grad_norm": 0.5051735639572144, "learning_rate": 0.0003667329884092814, "loss": 1.8212, "step": 38647 }, { "epoch": 1.29, "grad_norm": 0.47784826159477234, "learning_rate": 0.000366722798123489, "loss": 1.7477, "step": 38648 }, { "epoch": 1.29, "grad_norm": 0.49241146445274353, "learning_rate": 0.000366712607756705, "loss": 1.7622, "step": 38649 }, { "epoch": 1.29, "grad_norm": 0.47942492365837097, "learning_rate": 0.00036670241730894163, "loss": 1.7635, "step": 38650 }, { "epoch": 1.29, "grad_norm": 0.4830956757068634, "learning_rate": 0.0003666922267802114, "loss": 1.7386, "step": 38651 }, { "epoch": 1.29, "grad_norm": 0.48140084743499756, "learning_rate": 0.00036668203617052665, "loss": 1.7482, "step": 38652 }, { "epoch": 1.29, "grad_norm": 0.5342567563056946, "learning_rate": 0.0003666718454798998, "loss": 1.7951, "step": 38653 }, { "epoch": 1.29, "grad_norm": 0.46887075901031494, "learning_rate": 0.00036666165470834305, "loss": 1.7417, "step": 38654 }, { "epoch": 1.29, "grad_norm": 0.4612094461917877, "learning_rate": 0.0003666514638558689, "loss": 1.6775, "step": 38655 }, { "epoch": 1.29, "grad_norm": 0.4755215048789978, "learning_rate": 0.0003666412729224897, "loss": 1.7345, "step": 38656 }, { "epoch": 1.29, "grad_norm": 0.4901132583618164, "learning_rate": 0.00036663108190821775, "loss": 1.8579, "step": 38657 }, { "epoch": 1.29, "grad_norm": 0.48736995458602905, "learning_rate": 0.0003666208908130657, "loss": 1.8199, "step": 38658 }, { "epoch": 1.29, "grad_norm": 0.4945182502269745, "learning_rate": 0.0003666106996370455, "loss": 1.7193, "step": 38659 }, { "epoch": 1.29, "grad_norm": 0.4792003333568573, "learning_rate": 0.0003666005083801698, "loss": 1.731, "step": 38660 }, { "epoch": 1.29, "grad_norm": 0.5009865164756775, "learning_rate": 0.00036659031704245086, "loss": 1.7979, "step": 38661 }, { "epoch": 1.29, "grad_norm": 0.4907085597515106, "learning_rate": 0.00036658012562390107, "loss": 1.8463, "step": 38662 }, { "epoch": 1.29, "grad_norm": 0.4819362163543701, "learning_rate": 0.0003665699341245328, "loss": 1.7692, "step": 38663 }, { "epoch": 1.29, "grad_norm": 0.5001780390739441, "learning_rate": 0.00036655974254435855, "loss": 1.8659, "step": 38664 }, { "epoch": 1.29, "grad_norm": 0.4991901218891144, "learning_rate": 0.00036654955088339053, "loss": 1.7195, "step": 38665 }, { "epoch": 1.29, "grad_norm": 0.48398059606552124, "learning_rate": 0.0003665393591416411, "loss": 1.7896, "step": 38666 }, { "epoch": 1.29, "grad_norm": 0.49797767400741577, "learning_rate": 0.00036652916731912274, "loss": 1.7904, "step": 38667 }, { "epoch": 1.29, "grad_norm": 0.4779340624809265, "learning_rate": 0.0003665189754158478, "loss": 1.8033, "step": 38668 }, { "epoch": 1.29, "grad_norm": 0.49999305605888367, "learning_rate": 0.0003665087834318286, "loss": 1.7806, "step": 38669 }, { "epoch": 1.29, "grad_norm": 0.4873107075691223, "learning_rate": 0.00036649859136707747, "loss": 1.8794, "step": 38670 }, { "epoch": 1.29, "grad_norm": 0.48187169432640076, "learning_rate": 0.000366488399221607, "loss": 1.7924, "step": 38671 }, { "epoch": 1.29, "grad_norm": 0.4803444743156433, "learning_rate": 0.0003664782069954292, "loss": 1.7727, "step": 38672 }, { "epoch": 1.29, "grad_norm": 0.48431965708732605, "learning_rate": 0.00036646801468855685, "loss": 1.7416, "step": 38673 }, { "epoch": 1.29, "grad_norm": 0.5160495638847351, "learning_rate": 0.000366457822301002, "loss": 1.7668, "step": 38674 }, { "epoch": 1.29, "grad_norm": 0.49631351232528687, "learning_rate": 0.00036644762983277724, "loss": 1.7471, "step": 38675 }, { "epoch": 1.29, "grad_norm": 0.4732889235019684, "learning_rate": 0.00036643743728389486, "loss": 1.7409, "step": 38676 }, { "epoch": 1.29, "grad_norm": 0.48692646622657776, "learning_rate": 0.0003664272446543671, "loss": 1.7465, "step": 38677 }, { "epoch": 1.29, "grad_norm": 0.5174328088760376, "learning_rate": 0.00036641705194420654, "loss": 1.7844, "step": 38678 }, { "epoch": 1.29, "grad_norm": 0.48405343294143677, "learning_rate": 0.00036640685915342543, "loss": 1.8558, "step": 38679 }, { "epoch": 1.29, "grad_norm": 0.5161298513412476, "learning_rate": 0.00036639666628203617, "loss": 1.8409, "step": 38680 }, { "epoch": 1.29, "grad_norm": 0.47002533078193665, "learning_rate": 0.00036638647333005116, "loss": 1.6939, "step": 38681 }, { "epoch": 1.29, "grad_norm": 0.4953172504901886, "learning_rate": 0.0003663762802974827, "loss": 1.8029, "step": 38682 }, { "epoch": 1.29, "grad_norm": 0.4959249198436737, "learning_rate": 0.00036636608718434337, "loss": 1.7734, "step": 38683 }, { "epoch": 1.29, "grad_norm": 0.48094654083251953, "learning_rate": 0.00036635589399064525, "loss": 1.8213, "step": 38684 }, { "epoch": 1.29, "grad_norm": 0.4795737564563751, "learning_rate": 0.00036634570071640094, "loss": 1.6905, "step": 38685 }, { "epoch": 1.29, "grad_norm": 0.49837684631347656, "learning_rate": 0.0003663355073616226, "loss": 1.7933, "step": 38686 }, { "epoch": 1.29, "grad_norm": 0.4672616124153137, "learning_rate": 0.00036632531392632284, "loss": 1.7655, "step": 38687 }, { "epoch": 1.29, "grad_norm": 0.46737420558929443, "learning_rate": 0.00036631512041051384, "loss": 1.7358, "step": 38688 }, { "epoch": 1.29, "grad_norm": 0.4713398516178131, "learning_rate": 0.00036630492681420814, "loss": 1.7315, "step": 38689 }, { "epoch": 1.29, "grad_norm": 0.5232787132263184, "learning_rate": 0.000366294733137418, "loss": 1.7415, "step": 38690 }, { "epoch": 1.29, "grad_norm": 0.48032212257385254, "learning_rate": 0.00036628453938015575, "loss": 1.7702, "step": 38691 }, { "epoch": 1.29, "grad_norm": 0.47683894634246826, "learning_rate": 0.0003662743455424339, "loss": 1.7151, "step": 38692 }, { "epoch": 1.29, "grad_norm": 0.4990752637386322, "learning_rate": 0.0003662641516242648, "loss": 1.867, "step": 38693 }, { "epoch": 1.29, "grad_norm": 0.48599621653556824, "learning_rate": 0.0003662539576256607, "loss": 1.7823, "step": 38694 }, { "epoch": 1.29, "grad_norm": 0.46913981437683105, "learning_rate": 0.00036624376354663405, "loss": 1.8322, "step": 38695 }, { "epoch": 1.29, "grad_norm": 0.4780879318714142, "learning_rate": 0.0003662335693871973, "loss": 1.815, "step": 38696 }, { "epoch": 1.29, "grad_norm": 0.4720781445503235, "learning_rate": 0.0003662233751473628, "loss": 1.7005, "step": 38697 }, { "epoch": 1.29, "grad_norm": 0.5054634213447571, "learning_rate": 0.00036621318082714275, "loss": 1.7454, "step": 38698 }, { "epoch": 1.29, "grad_norm": 0.4872570335865021, "learning_rate": 0.0003662029864265498, "loss": 1.8034, "step": 38699 }, { "epoch": 1.29, "grad_norm": 0.47711867094039917, "learning_rate": 0.000366192791945596, "loss": 1.8772, "step": 38700 }, { "epoch": 1.29, "grad_norm": 0.48526400327682495, "learning_rate": 0.00036618259738429405, "loss": 1.7852, "step": 38701 }, { "epoch": 1.29, "grad_norm": 0.47789818048477173, "learning_rate": 0.00036617240274265607, "loss": 1.7388, "step": 38702 }, { "epoch": 1.29, "grad_norm": 0.47393038868904114, "learning_rate": 0.00036616220802069465, "loss": 1.6946, "step": 38703 }, { "epoch": 1.29, "grad_norm": 0.46782034635543823, "learning_rate": 0.000366152013218422, "loss": 1.7795, "step": 38704 }, { "epoch": 1.29, "grad_norm": 0.4758554995059967, "learning_rate": 0.0003661418183358506, "loss": 1.762, "step": 38705 }, { "epoch": 1.29, "grad_norm": 0.5064794421195984, "learning_rate": 0.0003661316233729927, "loss": 1.8771, "step": 38706 }, { "epoch": 1.29, "grad_norm": 0.48111477494239807, "learning_rate": 0.00036612142832986074, "loss": 1.7791, "step": 38707 }, { "epoch": 1.29, "grad_norm": 0.46863821148872375, "learning_rate": 0.00036611123320646723, "loss": 1.7446, "step": 38708 }, { "epoch": 1.29, "grad_norm": 0.475648432970047, "learning_rate": 0.0003661010380028244, "loss": 1.7699, "step": 38709 }, { "epoch": 1.29, "grad_norm": 0.5055752992630005, "learning_rate": 0.0003660908427189445, "loss": 1.8148, "step": 38710 }, { "epoch": 1.29, "grad_norm": 0.5107874274253845, "learning_rate": 0.00036608064735484016, "loss": 1.8276, "step": 38711 }, { "epoch": 1.29, "grad_norm": 0.4969009757041931, "learning_rate": 0.0003660704519105237, "loss": 1.754, "step": 38712 }, { "epoch": 1.29, "grad_norm": 0.474993497133255, "learning_rate": 0.0003660602563860074, "loss": 1.8061, "step": 38713 }, { "epoch": 1.29, "grad_norm": 0.48542994260787964, "learning_rate": 0.0003660500607813037, "loss": 1.8455, "step": 38714 }, { "epoch": 1.29, "grad_norm": 0.4826125502586365, "learning_rate": 0.0003660398650964249, "loss": 1.8324, "step": 38715 }, { "epoch": 1.29, "grad_norm": 0.49201080203056335, "learning_rate": 0.0003660296693313835, "loss": 1.6908, "step": 38716 }, { "epoch": 1.29, "grad_norm": 0.4875723421573639, "learning_rate": 0.00036601947348619175, "loss": 1.8203, "step": 38717 }, { "epoch": 1.29, "grad_norm": 0.49486592411994934, "learning_rate": 0.00036600927756086214, "loss": 1.8027, "step": 38718 }, { "epoch": 1.29, "grad_norm": 0.48031845688819885, "learning_rate": 0.000365999081555407, "loss": 1.7737, "step": 38719 }, { "epoch": 1.29, "grad_norm": 0.49455195665359497, "learning_rate": 0.00036598888546983865, "loss": 1.8037, "step": 38720 }, { "epoch": 1.29, "grad_norm": 0.5012004375457764, "learning_rate": 0.0003659786893041695, "loss": 1.8054, "step": 38721 }, { "epoch": 1.29, "grad_norm": 0.5059964656829834, "learning_rate": 0.00036596849305841196, "loss": 1.8621, "step": 38722 }, { "epoch": 1.29, "grad_norm": 0.49069228768348694, "learning_rate": 0.0003659582967325784, "loss": 1.76, "step": 38723 }, { "epoch": 1.29, "grad_norm": 0.47588911652565, "learning_rate": 0.00036594810032668126, "loss": 1.7111, "step": 38724 }, { "epoch": 1.29, "grad_norm": 0.4743512272834778, "learning_rate": 0.0003659379038407327, "loss": 1.8134, "step": 38725 }, { "epoch": 1.29, "grad_norm": 0.4819713234901428, "learning_rate": 0.0003659277072747454, "loss": 1.8215, "step": 38726 }, { "epoch": 1.29, "grad_norm": 0.5210670232772827, "learning_rate": 0.00036591751062873136, "loss": 1.7856, "step": 38727 }, { "epoch": 1.29, "grad_norm": 0.5065762996673584, "learning_rate": 0.00036590731390270337, "loss": 1.7928, "step": 38728 }, { "epoch": 1.29, "grad_norm": 0.47415876388549805, "learning_rate": 0.00036589711709667357, "loss": 1.7673, "step": 38729 }, { "epoch": 1.29, "grad_norm": 0.48652657866477966, "learning_rate": 0.00036588692021065437, "loss": 1.7226, "step": 38730 }, { "epoch": 1.29, "grad_norm": 0.5029184222221375, "learning_rate": 0.0003658767232446582, "loss": 1.7815, "step": 38731 }, { "epoch": 1.29, "grad_norm": 0.49092206358909607, "learning_rate": 0.00036586652619869726, "loss": 1.7759, "step": 38732 }, { "epoch": 1.29, "grad_norm": 0.4884854555130005, "learning_rate": 0.00036585632907278423, "loss": 1.8353, "step": 38733 }, { "epoch": 1.29, "grad_norm": 0.4949188530445099, "learning_rate": 0.0003658461318669312, "loss": 1.7586, "step": 38734 }, { "epoch": 1.29, "grad_norm": 0.5042540431022644, "learning_rate": 0.0003658359345811507, "loss": 1.7578, "step": 38735 }, { "epoch": 1.29, "grad_norm": 0.5172082185745239, "learning_rate": 0.00036582573721545504, "loss": 1.7712, "step": 38736 }, { "epoch": 1.29, "grad_norm": 0.481763631105423, "learning_rate": 0.0003658155397698567, "loss": 1.7579, "step": 38737 }, { "epoch": 1.29, "grad_norm": 0.49679818749427795, "learning_rate": 0.000365805342244368, "loss": 1.7898, "step": 38738 }, { "epoch": 1.29, "grad_norm": 0.4868716299533844, "learning_rate": 0.0003657951446390013, "loss": 1.7364, "step": 38739 }, { "epoch": 1.29, "grad_norm": 0.5200814008712769, "learning_rate": 0.00036578494695376895, "loss": 1.8162, "step": 38740 }, { "epoch": 1.29, "grad_norm": 0.5113847255706787, "learning_rate": 0.0003657747491886834, "loss": 1.7848, "step": 38741 }, { "epoch": 1.29, "grad_norm": 0.4835405945777893, "learning_rate": 0.0003657645513437569, "loss": 1.8197, "step": 38742 }, { "epoch": 1.29, "grad_norm": 0.48185259103775024, "learning_rate": 0.000365754353419002, "loss": 1.6728, "step": 38743 }, { "epoch": 1.29, "grad_norm": 0.48595157265663147, "learning_rate": 0.0003657441554144311, "loss": 1.7403, "step": 38744 }, { "epoch": 1.29, "grad_norm": 0.5339269638061523, "learning_rate": 0.0003657339573300563, "loss": 1.8244, "step": 38745 }, { "epoch": 1.29, "grad_norm": 0.5076132416725159, "learning_rate": 0.0003657237591658903, "loss": 1.7496, "step": 38746 }, { "epoch": 1.29, "grad_norm": 0.48297637701034546, "learning_rate": 0.0003657135609219452, "loss": 1.7274, "step": 38747 }, { "epoch": 1.29, "grad_norm": 0.4718836545944214, "learning_rate": 0.00036570336259823366, "loss": 1.7037, "step": 38748 }, { "epoch": 1.29, "grad_norm": 0.49412113428115845, "learning_rate": 0.0003656931641947679, "loss": 1.7562, "step": 38749 }, { "epoch": 1.29, "grad_norm": 0.4873485863208771, "learning_rate": 0.00036568296571156024, "loss": 1.8043, "step": 38750 }, { "epoch": 1.29, "grad_norm": 0.4675109386444092, "learning_rate": 0.0003656727671486232, "loss": 1.7616, "step": 38751 }, { "epoch": 1.29, "grad_norm": 0.48418813943862915, "learning_rate": 0.00036566256850596913, "loss": 1.8028, "step": 38752 }, { "epoch": 1.29, "grad_norm": 0.5103775262832642, "learning_rate": 0.00036565236978361026, "loss": 1.8127, "step": 38753 }, { "epoch": 1.29, "grad_norm": 0.5049859881401062, "learning_rate": 0.00036564217098155927, "loss": 1.7637, "step": 38754 }, { "epoch": 1.29, "grad_norm": 0.47738397121429443, "learning_rate": 0.0003656319720998281, "loss": 1.7542, "step": 38755 }, { "epoch": 1.29, "grad_norm": 0.4870777130126953, "learning_rate": 0.00036562177313842964, "loss": 1.794, "step": 38756 }, { "epoch": 1.29, "grad_norm": 0.5023975372314453, "learning_rate": 0.00036561157409737576, "loss": 1.7659, "step": 38757 }, { "epoch": 1.29, "grad_norm": 0.5135151147842407, "learning_rate": 0.00036560137497667933, "loss": 1.71, "step": 38758 }, { "epoch": 1.29, "grad_norm": 0.48497429490089417, "learning_rate": 0.0003655911757763524, "loss": 1.7619, "step": 38759 }, { "epoch": 1.29, "grad_norm": 0.49167919158935547, "learning_rate": 0.0003655809764964074, "loss": 1.8672, "step": 38760 }, { "epoch": 1.29, "grad_norm": 0.4959959387779236, "learning_rate": 0.0003655707771368569, "loss": 1.731, "step": 38761 }, { "epoch": 1.29, "grad_norm": 0.48405230045318604, "learning_rate": 0.0003655605776977129, "loss": 1.7482, "step": 38762 }, { "epoch": 1.29, "grad_norm": 0.5130093097686768, "learning_rate": 0.00036555037817898826, "loss": 1.8162, "step": 38763 }, { "epoch": 1.29, "grad_norm": 0.48792389035224915, "learning_rate": 0.000365540178580695, "loss": 1.781, "step": 38764 }, { "epoch": 1.29, "grad_norm": 0.4743008315563202, "learning_rate": 0.0003655299789028457, "loss": 1.7709, "step": 38765 }, { "epoch": 1.29, "grad_norm": 0.4780982732772827, "learning_rate": 0.00036551977914545255, "loss": 1.7921, "step": 38766 }, { "epoch": 1.29, "grad_norm": 0.4744625985622406, "learning_rate": 0.0003655095793085281, "loss": 1.7456, "step": 38767 }, { "epoch": 1.29, "grad_norm": 0.4990041255950928, "learning_rate": 0.0003654993793920848, "loss": 1.6822, "step": 38768 }, { "epoch": 1.29, "grad_norm": 0.4699361026287079, "learning_rate": 0.0003654891793961347, "loss": 1.7416, "step": 38769 }, { "epoch": 1.29, "grad_norm": 0.4988968074321747, "learning_rate": 0.0003654789793206905, "loss": 1.7939, "step": 38770 }, { "epoch": 1.29, "grad_norm": 0.4828719198703766, "learning_rate": 0.0003654687791657644, "loss": 1.835, "step": 38771 }, { "epoch": 1.29, "grad_norm": 0.4680476188659668, "learning_rate": 0.00036545857893136886, "loss": 1.7719, "step": 38772 }, { "epoch": 1.29, "grad_norm": 0.4835047721862793, "learning_rate": 0.00036544837861751623, "loss": 1.7354, "step": 38773 }, { "epoch": 1.29, "grad_norm": 1.4394187927246094, "learning_rate": 0.000365438178224219, "loss": 1.8181, "step": 38774 }, { "epoch": 1.29, "grad_norm": 0.4914278984069824, "learning_rate": 0.00036542797775148946, "loss": 1.7734, "step": 38775 }, { "epoch": 1.29, "grad_norm": 0.49410271644592285, "learning_rate": 0.00036541777719933987, "loss": 1.801, "step": 38776 }, { "epoch": 1.29, "grad_norm": 0.45715653896331787, "learning_rate": 0.0003654075765677829, "loss": 1.7851, "step": 38777 }, { "epoch": 1.29, "grad_norm": 0.4742325246334076, "learning_rate": 0.0003653973758568306, "loss": 1.773, "step": 38778 }, { "epoch": 1.29, "grad_norm": 0.6798554062843323, "learning_rate": 0.0003653871750664956, "loss": 1.7889, "step": 38779 }, { "epoch": 1.29, "grad_norm": 0.4694374203681946, "learning_rate": 0.0003653769741967902, "loss": 1.7688, "step": 38780 }, { "epoch": 1.29, "grad_norm": 0.4865944981575012, "learning_rate": 0.00036536677324772686, "loss": 1.7692, "step": 38781 }, { "epoch": 1.29, "grad_norm": 0.49523988366127014, "learning_rate": 0.0003653565722193177, "loss": 1.6754, "step": 38782 }, { "epoch": 1.29, "grad_norm": 0.48065561056137085, "learning_rate": 0.00036534637111157545, "loss": 1.6989, "step": 38783 }, { "epoch": 1.29, "grad_norm": 0.48096972703933716, "learning_rate": 0.00036533616992451235, "loss": 1.7456, "step": 38784 }, { "epoch": 1.29, "grad_norm": 0.49725598096847534, "learning_rate": 0.0003653259686581407, "loss": 1.7135, "step": 38785 }, { "epoch": 1.29, "grad_norm": 0.5003926157951355, "learning_rate": 0.000365315767312473, "loss": 1.7321, "step": 38786 }, { "epoch": 1.29, "grad_norm": 0.49175596237182617, "learning_rate": 0.00036530556588752153, "loss": 1.7255, "step": 38787 }, { "epoch": 1.29, "grad_norm": 0.48968666791915894, "learning_rate": 0.0003652953643832988, "loss": 1.7672, "step": 38788 }, { "epoch": 1.29, "grad_norm": 0.4873066246509552, "learning_rate": 0.00036528516279981695, "loss": 1.7436, "step": 38789 }, { "epoch": 1.29, "grad_norm": 0.46715909242630005, "learning_rate": 0.0003652749611370887, "loss": 1.8014, "step": 38790 }, { "epoch": 1.29, "grad_norm": 0.48053696751594543, "learning_rate": 0.0003652647593951262, "loss": 1.7014, "step": 38791 }, { "epoch": 1.29, "grad_norm": 0.4937901496887207, "learning_rate": 0.0003652545575739419, "loss": 1.7672, "step": 38792 }, { "epoch": 1.29, "grad_norm": 0.49361953139305115, "learning_rate": 0.0003652443556735482, "loss": 1.8079, "step": 38793 }, { "epoch": 1.29, "grad_norm": 0.48276856541633606, "learning_rate": 0.00036523415369395744, "loss": 1.787, "step": 38794 }, { "epoch": 1.29, "grad_norm": 0.511020302772522, "learning_rate": 0.00036522395163518203, "loss": 1.853, "step": 38795 }, { "epoch": 1.29, "grad_norm": 0.4788047671318054, "learning_rate": 0.00036521374949723443, "loss": 1.7932, "step": 38796 }, { "epoch": 1.29, "grad_norm": 0.502324640750885, "learning_rate": 0.0003652035472801268, "loss": 1.7789, "step": 38797 }, { "epoch": 1.29, "grad_norm": 0.48768356442451477, "learning_rate": 0.0003651933449838718, "loss": 1.8013, "step": 38798 }, { "epoch": 1.29, "grad_norm": 0.48655301332473755, "learning_rate": 0.00036518314260848165, "loss": 1.8264, "step": 38799 }, { "epoch": 1.29, "grad_norm": 0.4835987687110901, "learning_rate": 0.0003651729401539688, "loss": 1.7562, "step": 38800 }, { "epoch": 1.29, "grad_norm": 0.4918651580810547, "learning_rate": 0.00036516273762034554, "loss": 1.8708, "step": 38801 }, { "epoch": 1.29, "grad_norm": 0.47110509872436523, "learning_rate": 0.00036515253500762435, "loss": 1.7466, "step": 38802 }, { "epoch": 1.29, "grad_norm": 0.49063509702682495, "learning_rate": 0.00036514233231581755, "loss": 1.712, "step": 38803 }, { "epoch": 1.29, "grad_norm": 0.5077076554298401, "learning_rate": 0.0003651321295449376, "loss": 1.8279, "step": 38804 }, { "epoch": 1.29, "grad_norm": 0.4824383854866028, "learning_rate": 0.0003651219266949967, "loss": 1.7155, "step": 38805 }, { "epoch": 1.29, "grad_norm": 0.46852436661720276, "learning_rate": 0.00036511172376600757, "loss": 1.7026, "step": 38806 }, { "epoch": 1.29, "grad_norm": 0.4953685700893402, "learning_rate": 0.0003651015207579823, "loss": 1.7321, "step": 38807 }, { "epoch": 1.29, "grad_norm": 0.5222118496894836, "learning_rate": 0.0003650913176709335, "loss": 1.755, "step": 38808 }, { "epoch": 1.29, "grad_norm": 0.48650094866752625, "learning_rate": 0.0003650811145048733, "loss": 1.7725, "step": 38809 }, { "epoch": 1.29, "grad_norm": 0.4682300090789795, "learning_rate": 0.0003650709112598142, "loss": 1.7556, "step": 38810 }, { "epoch": 1.29, "grad_norm": 0.4989573657512665, "learning_rate": 0.0003650607079357687, "loss": 1.7683, "step": 38811 }, { "epoch": 1.29, "grad_norm": 0.49911537766456604, "learning_rate": 0.000365050504532749, "loss": 1.8217, "step": 38812 }, { "epoch": 1.29, "grad_norm": 0.4946669638156891, "learning_rate": 0.00036504030105076766, "loss": 1.8347, "step": 38813 }, { "epoch": 1.29, "grad_norm": 0.48398375511169434, "learning_rate": 0.0003650300974898369, "loss": 1.7709, "step": 38814 }, { "epoch": 1.29, "grad_norm": 0.49542415142059326, "learning_rate": 0.00036501989384996926, "loss": 1.8113, "step": 38815 }, { "epoch": 1.29, "grad_norm": 0.4933716654777527, "learning_rate": 0.000365009690131177, "loss": 1.83, "step": 38816 }, { "epoch": 1.29, "grad_norm": 0.4701666533946991, "learning_rate": 0.0003649994863334725, "loss": 1.853, "step": 38817 }, { "epoch": 1.29, "grad_norm": 0.4966766834259033, "learning_rate": 0.0003649892824568683, "loss": 1.7361, "step": 38818 }, { "epoch": 1.29, "grad_norm": 0.48872458934783936, "learning_rate": 0.0003649790785013766, "loss": 1.7438, "step": 38819 }, { "epoch": 1.29, "grad_norm": 0.48914071917533875, "learning_rate": 0.00036496887446700995, "loss": 1.7497, "step": 38820 }, { "epoch": 1.29, "grad_norm": 0.5108676552772522, "learning_rate": 0.0003649586703537806, "loss": 1.7617, "step": 38821 }, { "epoch": 1.29, "grad_norm": 0.48986321687698364, "learning_rate": 0.000364948466161701, "loss": 1.8449, "step": 38822 }, { "epoch": 1.29, "grad_norm": 0.48369842767715454, "learning_rate": 0.00036493826189078367, "loss": 1.7413, "step": 38823 }, { "epoch": 1.29, "grad_norm": 0.4888296127319336, "learning_rate": 0.00036492805754104066, "loss": 1.7209, "step": 38824 }, { "epoch": 1.29, "grad_norm": 0.479669451713562, "learning_rate": 0.00036491785311248466, "loss": 1.8005, "step": 38825 }, { "epoch": 1.29, "grad_norm": 0.5008341073989868, "learning_rate": 0.0003649076486051279, "loss": 1.7994, "step": 38826 }, { "epoch": 1.29, "grad_norm": 0.47913089394569397, "learning_rate": 0.00036489744401898286, "loss": 1.7829, "step": 38827 }, { "epoch": 1.29, "grad_norm": 0.4777205288410187, "learning_rate": 0.00036488723935406184, "loss": 1.6826, "step": 38828 }, { "epoch": 1.29, "grad_norm": 0.48151665925979614, "learning_rate": 0.0003648770346103773, "loss": 1.837, "step": 38829 }, { "epoch": 1.29, "grad_norm": 0.49155646562576294, "learning_rate": 0.0003648668297879417, "loss": 1.7886, "step": 38830 }, { "epoch": 1.29, "grad_norm": 0.49411115050315857, "learning_rate": 0.0003648566248867672, "loss": 1.7147, "step": 38831 }, { "epoch": 1.29, "grad_norm": 0.4938451647758484, "learning_rate": 0.00036484641990686633, "loss": 1.6899, "step": 38832 }, { "epoch": 1.29, "grad_norm": 0.5075567960739136, "learning_rate": 0.00036483621484825144, "loss": 1.7402, "step": 38833 }, { "epoch": 1.29, "grad_norm": 0.518600583076477, "learning_rate": 0.00036482600971093497, "loss": 1.7427, "step": 38834 }, { "epoch": 1.29, "grad_norm": 0.5046002864837646, "learning_rate": 0.0003648158044949293, "loss": 1.8416, "step": 38835 }, { "epoch": 1.29, "grad_norm": 0.46830716729164124, "learning_rate": 0.0003648055992002468, "loss": 1.7532, "step": 38836 }, { "epoch": 1.29, "grad_norm": 0.47368189692497253, "learning_rate": 0.0003647953938268998, "loss": 1.745, "step": 38837 }, { "epoch": 1.29, "grad_norm": 0.501432478427887, "learning_rate": 0.0003647851883749008, "loss": 1.8151, "step": 38838 }, { "epoch": 1.29, "grad_norm": 0.5154269933700562, "learning_rate": 0.00036477498284426203, "loss": 1.8449, "step": 38839 }, { "epoch": 1.29, "grad_norm": 0.5021764636039734, "learning_rate": 0.00036476477723499604, "loss": 1.7676, "step": 38840 }, { "epoch": 1.29, "grad_norm": 0.4701969027519226, "learning_rate": 0.0003647545715471152, "loss": 1.7816, "step": 38841 }, { "epoch": 1.29, "grad_norm": 0.47276467084884644, "learning_rate": 0.0003647443657806317, "loss": 1.7194, "step": 38842 }, { "epoch": 1.29, "grad_norm": 0.49038103222846985, "learning_rate": 0.00036473415993555825, "loss": 1.7613, "step": 38843 }, { "epoch": 1.29, "grad_norm": 0.47867247462272644, "learning_rate": 0.0003647239540119069, "loss": 1.8499, "step": 38844 }, { "epoch": 1.29, "grad_norm": 0.4740062355995178, "learning_rate": 0.0003647137480096904, "loss": 1.7175, "step": 38845 }, { "epoch": 1.29, "grad_norm": 0.4777330160140991, "learning_rate": 0.00036470354192892087, "loss": 1.7688, "step": 38846 }, { "epoch": 1.29, "grad_norm": 0.48507803678512573, "learning_rate": 0.0003646933357696107, "loss": 1.7655, "step": 38847 }, { "epoch": 1.29, "grad_norm": 0.4882083237171173, "learning_rate": 0.00036468312953177244, "loss": 1.7987, "step": 38848 }, { "epoch": 1.29, "grad_norm": 0.49038150906562805, "learning_rate": 0.0003646729232154183, "loss": 1.7677, "step": 38849 }, { "epoch": 1.29, "grad_norm": 0.4804176688194275, "learning_rate": 0.00036466271682056083, "loss": 1.7929, "step": 38850 }, { "epoch": 1.29, "grad_norm": 0.4975454807281494, "learning_rate": 0.00036465251034721233, "loss": 1.841, "step": 38851 }, { "epoch": 1.29, "grad_norm": 0.5004588961601257, "learning_rate": 0.00036464230379538526, "loss": 1.7339, "step": 38852 }, { "epoch": 1.29, "grad_norm": 0.5074479579925537, "learning_rate": 0.0003646320971650919, "loss": 1.8159, "step": 38853 }, { "epoch": 1.29, "grad_norm": 0.5014063715934753, "learning_rate": 0.00036462189045634465, "loss": 1.7345, "step": 38854 }, { "epoch": 1.29, "grad_norm": 0.4829632639884949, "learning_rate": 0.0003646116836691561, "loss": 1.7226, "step": 38855 }, { "epoch": 1.29, "grad_norm": 0.49711382389068604, "learning_rate": 0.00036460147680353837, "loss": 1.758, "step": 38856 }, { "epoch": 1.29, "grad_norm": 0.4839043915271759, "learning_rate": 0.00036459126985950393, "loss": 1.8266, "step": 38857 }, { "epoch": 1.29, "grad_norm": 0.48534494638442993, "learning_rate": 0.00036458106283706525, "loss": 1.7693, "step": 38858 }, { "epoch": 1.29, "grad_norm": 0.5069183111190796, "learning_rate": 0.00036457085573623474, "loss": 1.7084, "step": 38859 }, { "epoch": 1.29, "grad_norm": 0.4901673197746277, "learning_rate": 0.00036456064855702465, "loss": 1.8524, "step": 38860 }, { "epoch": 1.29, "grad_norm": 0.48041102290153503, "learning_rate": 0.0003645504412994475, "loss": 1.7471, "step": 38861 }, { "epoch": 1.29, "grad_norm": 0.49527207016944885, "learning_rate": 0.0003645402339635156, "loss": 1.8141, "step": 38862 }, { "epoch": 1.29, "grad_norm": 0.49130287766456604, "learning_rate": 0.00036453002654924133, "loss": 1.8915, "step": 38863 }, { "epoch": 1.29, "grad_norm": 0.4820980727672577, "learning_rate": 0.0003645198190566372, "loss": 1.8713, "step": 38864 }, { "epoch": 1.29, "grad_norm": 0.4697345793247223, "learning_rate": 0.00036450961148571536, "loss": 1.8004, "step": 38865 }, { "epoch": 1.29, "grad_norm": 0.5038843154907227, "learning_rate": 0.00036449940383648856, "loss": 1.8039, "step": 38866 }, { "epoch": 1.29, "grad_norm": 0.48238638043403625, "learning_rate": 0.00036448919610896876, "loss": 1.7521, "step": 38867 }, { "epoch": 1.29, "grad_norm": 0.6217077970504761, "learning_rate": 0.0003644789883031688, "loss": 1.7945, "step": 38868 }, { "epoch": 1.29, "grad_norm": 0.4927031993865967, "learning_rate": 0.0003644687804191007, "loss": 1.7755, "step": 38869 }, { "epoch": 1.29, "grad_norm": 0.49209171533584595, "learning_rate": 0.0003644585724567771, "loss": 1.7957, "step": 38870 }, { "epoch": 1.29, "grad_norm": 0.49553704261779785, "learning_rate": 0.0003644483644162103, "loss": 1.7587, "step": 38871 }, { "epoch": 1.29, "grad_norm": 0.494937002658844, "learning_rate": 0.00036443815629741254, "loss": 1.7136, "step": 38872 }, { "epoch": 1.29, "grad_norm": 0.48520249128341675, "learning_rate": 0.00036442794810039644, "loss": 1.7899, "step": 38873 }, { "epoch": 1.29, "grad_norm": 0.4906097948551178, "learning_rate": 0.00036441773982517423, "loss": 1.6856, "step": 38874 }, { "epoch": 1.29, "grad_norm": 0.5143070816993713, "learning_rate": 0.0003644075314717585, "loss": 1.7863, "step": 38875 }, { "epoch": 1.29, "grad_norm": 0.5155819058418274, "learning_rate": 0.0003643973230401615, "loss": 1.8178, "step": 38876 }, { "epoch": 1.29, "grad_norm": 0.516781747341156, "learning_rate": 0.00036438711453039556, "loss": 1.7544, "step": 38877 }, { "epoch": 1.29, "grad_norm": 0.4971846044063568, "learning_rate": 0.00036437690594247324, "loss": 1.7503, "step": 38878 }, { "epoch": 1.29, "grad_norm": 0.4728013873100281, "learning_rate": 0.0003643666972764067, "loss": 1.7263, "step": 38879 }, { "epoch": 1.29, "grad_norm": 0.4903239905834198, "learning_rate": 0.00036435648853220864, "loss": 1.7938, "step": 38880 }, { "epoch": 1.29, "grad_norm": 0.673241138458252, "learning_rate": 0.00036434627970989125, "loss": 1.8139, "step": 38881 }, { "epoch": 1.29, "grad_norm": 0.4934101104736328, "learning_rate": 0.0003643360708094669, "loss": 1.7192, "step": 38882 }, { "epoch": 1.29, "grad_norm": 0.4779113531112671, "learning_rate": 0.000364325861830948, "loss": 1.8236, "step": 38883 }, { "epoch": 1.29, "grad_norm": 0.49529746174812317, "learning_rate": 0.00036431565277434704, "loss": 1.7255, "step": 38884 }, { "epoch": 1.29, "grad_norm": 0.48004814982414246, "learning_rate": 0.00036430544363967636, "loss": 1.7441, "step": 38885 }, { "epoch": 1.29, "grad_norm": 0.49293848872184753, "learning_rate": 0.0003642952344269484, "loss": 1.8768, "step": 38886 }, { "epoch": 1.29, "grad_norm": 0.5037604570388794, "learning_rate": 0.00036428502513617537, "loss": 1.8143, "step": 38887 }, { "epoch": 1.29, "grad_norm": 0.4968089163303375, "learning_rate": 0.0003642748157673699, "loss": 1.744, "step": 38888 }, { "epoch": 1.29, "grad_norm": 0.4890784025192261, "learning_rate": 0.0003642646063205442, "loss": 1.7536, "step": 38889 }, { "epoch": 1.29, "grad_norm": 0.4843086898326874, "learning_rate": 0.0003642543967957107, "loss": 1.7069, "step": 38890 }, { "epoch": 1.29, "grad_norm": 0.48316898941993713, "learning_rate": 0.000364244187192882, "loss": 1.7888, "step": 38891 }, { "epoch": 1.29, "grad_norm": 0.48566991090774536, "learning_rate": 0.00036423397751207025, "loss": 1.7658, "step": 38892 }, { "epoch": 1.29, "grad_norm": 0.45987680554389954, "learning_rate": 0.00036422376775328786, "loss": 1.7592, "step": 38893 }, { "epoch": 1.29, "grad_norm": 0.47783640027046204, "learning_rate": 0.0003642135579165473, "loss": 1.7449, "step": 38894 }, { "epoch": 1.29, "grad_norm": 0.48675206303596497, "learning_rate": 0.0003642033480018609, "loss": 1.716, "step": 38895 }, { "epoch": 1.29, "grad_norm": 0.5069942474365234, "learning_rate": 0.0003641931380092412, "loss": 1.7607, "step": 38896 }, { "epoch": 1.29, "grad_norm": 0.48189160227775574, "learning_rate": 0.0003641829279387004, "loss": 1.8342, "step": 38897 }, { "epoch": 1.29, "grad_norm": 0.5023716688156128, "learning_rate": 0.0003641727177902511, "loss": 1.7853, "step": 38898 }, { "epoch": 1.29, "grad_norm": 0.4653454124927521, "learning_rate": 0.0003641625075639054, "loss": 1.7284, "step": 38899 }, { "epoch": 1.29, "grad_norm": 0.47246691584587097, "learning_rate": 0.000364152297259676, "loss": 1.8274, "step": 38900 }, { "epoch": 1.29, "grad_norm": 0.4811401963233948, "learning_rate": 0.0003641420868775751, "loss": 1.7955, "step": 38901 }, { "epoch": 1.29, "grad_norm": 0.4869369864463806, "learning_rate": 0.0003641318764176152, "loss": 1.7315, "step": 38902 }, { "epoch": 1.29, "grad_norm": 0.5002747178077698, "learning_rate": 0.00036412166587980867, "loss": 1.7303, "step": 38903 }, { "epoch": 1.29, "grad_norm": 0.4893607199192047, "learning_rate": 0.0003641114552641678, "loss": 1.8198, "step": 38904 }, { "epoch": 1.29, "grad_norm": 0.48250314593315125, "learning_rate": 0.00036410124457070513, "loss": 1.8055, "step": 38905 }, { "epoch": 1.29, "grad_norm": 0.4845878779888153, "learning_rate": 0.00036409103379943296, "loss": 1.7877, "step": 38906 }, { "epoch": 1.29, "grad_norm": 0.4881913959980011, "learning_rate": 0.00036408082295036375, "loss": 1.7919, "step": 38907 }, { "epoch": 1.29, "grad_norm": 0.4958232343196869, "learning_rate": 0.0003640706120235099, "loss": 1.742, "step": 38908 }, { "epoch": 1.29, "grad_norm": 0.47447144985198975, "learning_rate": 0.00036406040101888366, "loss": 1.8232, "step": 38909 }, { "epoch": 1.29, "grad_norm": 0.4761177599430084, "learning_rate": 0.0003640501899364976, "loss": 1.8312, "step": 38910 }, { "epoch": 1.29, "grad_norm": 0.4740620255470276, "learning_rate": 0.0003640399787763641, "loss": 1.7811, "step": 38911 }, { "epoch": 1.29, "grad_norm": 0.48025646805763245, "learning_rate": 0.00036402976753849547, "loss": 1.6746, "step": 38912 }, { "epoch": 1.29, "grad_norm": 0.49192801117897034, "learning_rate": 0.00036401955622290404, "loss": 1.7686, "step": 38913 }, { "epoch": 1.29, "grad_norm": 0.5287081599235535, "learning_rate": 0.00036400934482960236, "loss": 1.7617, "step": 38914 }, { "epoch": 1.29, "grad_norm": 0.48502811789512634, "learning_rate": 0.0003639991333586028, "loss": 1.7575, "step": 38915 }, { "epoch": 1.29, "grad_norm": 0.46444764733314514, "learning_rate": 0.0003639889218099177, "loss": 1.7733, "step": 38916 }, { "epoch": 1.29, "grad_norm": 0.4728400707244873, "learning_rate": 0.0003639787101835595, "loss": 1.7595, "step": 38917 }, { "epoch": 1.29, "grad_norm": 0.4951328933238983, "learning_rate": 0.00036396849847954057, "loss": 1.8164, "step": 38918 }, { "epoch": 1.29, "grad_norm": 0.4933134615421295, "learning_rate": 0.0003639582866978733, "loss": 1.6444, "step": 38919 }, { "epoch": 1.29, "grad_norm": 0.48457083106040955, "learning_rate": 0.00036394807483857, "loss": 1.7677, "step": 38920 }, { "epoch": 1.29, "grad_norm": 0.4758743941783905, "learning_rate": 0.00036393786290164326, "loss": 1.7587, "step": 38921 }, { "epoch": 1.29, "grad_norm": 0.5208978652954102, "learning_rate": 0.00036392765088710534, "loss": 1.881, "step": 38922 }, { "epoch": 1.29, "grad_norm": 0.5029098391532898, "learning_rate": 0.00036391743879496874, "loss": 1.7262, "step": 38923 }, { "epoch": 1.3, "grad_norm": 0.494410902261734, "learning_rate": 0.00036390722662524576, "loss": 1.799, "step": 38924 }, { "epoch": 1.3, "grad_norm": 0.4892491400241852, "learning_rate": 0.0003638970143779487, "loss": 1.8176, "step": 38925 }, { "epoch": 1.3, "grad_norm": 0.5133581161499023, "learning_rate": 0.00036388680205309024, "loss": 1.7573, "step": 38926 }, { "epoch": 1.3, "grad_norm": 0.4892595112323761, "learning_rate": 0.0003638765896506825, "loss": 1.7768, "step": 38927 }, { "epoch": 1.3, "grad_norm": 0.4665077030658722, "learning_rate": 0.0003638663771707381, "loss": 1.7857, "step": 38928 }, { "epoch": 1.3, "grad_norm": 0.4622544050216675, "learning_rate": 0.00036385616461326927, "loss": 1.754, "step": 38929 }, { "epoch": 1.3, "grad_norm": 0.4825294315814972, "learning_rate": 0.0003638459519782885, "loss": 1.7669, "step": 38930 }, { "epoch": 1.3, "grad_norm": 0.4949841797351837, "learning_rate": 0.0003638357392658081, "loss": 1.7806, "step": 38931 }, { "epoch": 1.3, "grad_norm": 0.4882916510105133, "learning_rate": 0.00036382552647584054, "loss": 1.7748, "step": 38932 }, { "epoch": 1.3, "grad_norm": 0.4921593964099884, "learning_rate": 0.00036381531360839823, "loss": 1.8131, "step": 38933 }, { "epoch": 1.3, "grad_norm": 0.4730975329875946, "learning_rate": 0.00036380510066349347, "loss": 1.8127, "step": 38934 }, { "epoch": 1.3, "grad_norm": 0.5030257105827332, "learning_rate": 0.0003637948876411388, "loss": 1.7244, "step": 38935 }, { "epoch": 1.3, "grad_norm": 0.47148311138153076, "learning_rate": 0.00036378467454134636, "loss": 1.8515, "step": 38936 }, { "epoch": 1.3, "grad_norm": 0.49414756894111633, "learning_rate": 0.00036377446136412884, "loss": 1.8646, "step": 38937 }, { "epoch": 1.3, "grad_norm": 0.4859524667263031, "learning_rate": 0.0003637642481094986, "loss": 1.7632, "step": 38938 }, { "epoch": 1.3, "grad_norm": 0.4764493405818939, "learning_rate": 0.0003637540347774678, "loss": 1.7888, "step": 38939 }, { "epoch": 1.3, "grad_norm": 0.508080005645752, "learning_rate": 0.0003637438213680491, "loss": 1.8021, "step": 38940 }, { "epoch": 1.3, "grad_norm": 0.5000695586204529, "learning_rate": 0.00036373360788125475, "loss": 1.7775, "step": 38941 }, { "epoch": 1.3, "grad_norm": 0.4913956820964813, "learning_rate": 0.0003637233943170973, "loss": 1.7431, "step": 38942 }, { "epoch": 1.3, "grad_norm": 0.4994698464870453, "learning_rate": 0.0003637131806755889, "loss": 1.8046, "step": 38943 }, { "epoch": 1.3, "grad_norm": 0.5095605850219727, "learning_rate": 0.0003637029669567422, "loss": 1.7794, "step": 38944 }, { "epoch": 1.3, "grad_norm": 0.4865695834159851, "learning_rate": 0.0003636927531605693, "loss": 1.7685, "step": 38945 }, { "epoch": 1.3, "grad_norm": 0.47861602902412415, "learning_rate": 0.00036368253928708297, "loss": 1.734, "step": 38946 }, { "epoch": 1.3, "grad_norm": 0.4827086925506592, "learning_rate": 0.0003636723253362954, "loss": 1.674, "step": 38947 }, { "epoch": 1.3, "grad_norm": 0.48894649744033813, "learning_rate": 0.00036366211130821894, "loss": 1.749, "step": 38948 }, { "epoch": 1.3, "grad_norm": 0.4959826171398163, "learning_rate": 0.0003636518972028661, "loss": 1.8143, "step": 38949 }, { "epoch": 1.3, "grad_norm": 0.4784761965274811, "learning_rate": 0.0003636416830202492, "loss": 1.7288, "step": 38950 }, { "epoch": 1.3, "grad_norm": 0.4840238392353058, "learning_rate": 0.00036363146876038067, "loss": 1.8477, "step": 38951 }, { "epoch": 1.3, "grad_norm": 0.4842459559440613, "learning_rate": 0.0003636212544232729, "loss": 1.7275, "step": 38952 }, { "epoch": 1.3, "grad_norm": 0.4870966672897339, "learning_rate": 0.0003636110400089384, "loss": 1.752, "step": 38953 }, { "epoch": 1.3, "grad_norm": 0.48515787720680237, "learning_rate": 0.0003636008255173894, "loss": 1.7932, "step": 38954 }, { "epoch": 1.3, "grad_norm": 0.5201091766357422, "learning_rate": 0.0003635906109486383, "loss": 1.8445, "step": 38955 }, { "epoch": 1.3, "grad_norm": 0.48998111486434937, "learning_rate": 0.0003635803963026977, "loss": 1.7217, "step": 38956 }, { "epoch": 1.3, "grad_norm": 0.4775877594947815, "learning_rate": 0.00036357018157957973, "loss": 1.7771, "step": 38957 }, { "epoch": 1.3, "grad_norm": 0.48410382866859436, "learning_rate": 0.0003635599667792971, "loss": 1.7837, "step": 38958 }, { "epoch": 1.3, "grad_norm": 0.48466745018959045, "learning_rate": 0.00036354975190186186, "loss": 1.7705, "step": 38959 }, { "epoch": 1.3, "grad_norm": 0.5056800842285156, "learning_rate": 0.0003635395369472867, "loss": 1.7647, "step": 38960 }, { "epoch": 1.3, "grad_norm": 0.5035903453826904, "learning_rate": 0.00036352932191558384, "loss": 1.7582, "step": 38961 }, { "epoch": 1.3, "grad_norm": 0.6373310685157776, "learning_rate": 0.00036351910680676577, "loss": 1.736, "step": 38962 }, { "epoch": 1.3, "grad_norm": 0.46221181750297546, "learning_rate": 0.00036350889162084483, "loss": 1.7844, "step": 38963 }, { "epoch": 1.3, "grad_norm": 0.4992978870868683, "learning_rate": 0.0003634986763578335, "loss": 1.7207, "step": 38964 }, { "epoch": 1.3, "grad_norm": 0.5335159301757812, "learning_rate": 0.0003634884610177442, "loss": 1.7482, "step": 38965 }, { "epoch": 1.3, "grad_norm": 0.510373592376709, "learning_rate": 0.0003634782456005891, "loss": 1.8805, "step": 38966 }, { "epoch": 1.3, "grad_norm": 0.46303829550743103, "learning_rate": 0.00036346803010638086, "loss": 1.8356, "step": 38967 }, { "epoch": 1.3, "grad_norm": 0.4993249177932739, "learning_rate": 0.00036345781453513175, "loss": 1.835, "step": 38968 }, { "epoch": 1.3, "grad_norm": 0.5070388317108154, "learning_rate": 0.00036344759888685426, "loss": 1.7503, "step": 38969 }, { "epoch": 1.3, "grad_norm": 0.5033918023109436, "learning_rate": 0.0003634373831615607, "loss": 1.806, "step": 38970 }, { "epoch": 1.3, "grad_norm": 0.4703832268714905, "learning_rate": 0.0003634271673592635, "loss": 1.7661, "step": 38971 }, { "epoch": 1.3, "grad_norm": 0.490021675825119, "learning_rate": 0.0003634169514799751, "loss": 1.7872, "step": 38972 }, { "epoch": 1.3, "grad_norm": 0.48742160201072693, "learning_rate": 0.00036340673552370784, "loss": 1.6992, "step": 38973 }, { "epoch": 1.3, "grad_norm": 0.4845374524593353, "learning_rate": 0.0003633965194904741, "loss": 1.7843, "step": 38974 }, { "epoch": 1.3, "grad_norm": 0.48286330699920654, "learning_rate": 0.0003633863033802863, "loss": 1.6989, "step": 38975 }, { "epoch": 1.3, "grad_norm": 0.49225813150405884, "learning_rate": 0.000363376087193157, "loss": 1.7653, "step": 38976 }, { "epoch": 1.3, "grad_norm": 0.5078305006027222, "learning_rate": 0.0003633658709290984, "loss": 1.7597, "step": 38977 }, { "epoch": 1.3, "grad_norm": 0.46645936369895935, "learning_rate": 0.000363355654588123, "loss": 1.7029, "step": 38978 }, { "epoch": 1.3, "grad_norm": 0.5008857250213623, "learning_rate": 0.0003633454381702431, "loss": 1.8153, "step": 38979 }, { "epoch": 1.3, "grad_norm": 0.4754865765571594, "learning_rate": 0.0003633352216754712, "loss": 1.7866, "step": 38980 }, { "epoch": 1.3, "grad_norm": 0.49399247765541077, "learning_rate": 0.00036332500510381967, "loss": 1.7911, "step": 38981 }, { "epoch": 1.3, "grad_norm": 0.5030138492584229, "learning_rate": 0.00036331478845530094, "loss": 1.8099, "step": 38982 }, { "epoch": 1.3, "grad_norm": 0.4863349199295044, "learning_rate": 0.0003633045717299274, "loss": 1.7776, "step": 38983 }, { "epoch": 1.3, "grad_norm": 0.5081648826599121, "learning_rate": 0.0003632943549277113, "loss": 1.8247, "step": 38984 }, { "epoch": 1.3, "grad_norm": 0.4777129888534546, "learning_rate": 0.0003632841380486654, "loss": 1.743, "step": 38985 }, { "epoch": 1.3, "grad_norm": 0.4762445390224457, "learning_rate": 0.0003632739210928017, "loss": 1.6972, "step": 38986 }, { "epoch": 1.3, "grad_norm": 0.48476874828338623, "learning_rate": 0.0003632637040601329, "loss": 1.7914, "step": 38987 }, { "epoch": 1.3, "grad_norm": 0.47460663318634033, "learning_rate": 0.00036325348695067126, "loss": 1.7756, "step": 38988 }, { "epoch": 1.3, "grad_norm": 0.5010050535202026, "learning_rate": 0.00036324326976442913, "loss": 1.8168, "step": 38989 }, { "epoch": 1.3, "grad_norm": 0.4948903024196625, "learning_rate": 0.0003632330525014191, "loss": 1.8225, "step": 38990 }, { "epoch": 1.3, "grad_norm": 0.49027034640312195, "learning_rate": 0.00036322283516165333, "loss": 1.7706, "step": 38991 }, { "epoch": 1.3, "grad_norm": 0.47250896692276, "learning_rate": 0.00036321261774514444, "loss": 1.7888, "step": 38992 }, { "epoch": 1.3, "grad_norm": 0.4903339147567749, "learning_rate": 0.0003632024002519048, "loss": 1.7572, "step": 38993 }, { "epoch": 1.3, "grad_norm": 0.49727940559387207, "learning_rate": 0.00036319218268194664, "loss": 1.7872, "step": 38994 }, { "epoch": 1.3, "grad_norm": 0.5067684650421143, "learning_rate": 0.00036318196503528255, "loss": 1.8004, "step": 38995 }, { "epoch": 1.3, "grad_norm": 0.4951242506504059, "learning_rate": 0.00036317174731192477, "loss": 1.8268, "step": 38996 }, { "epoch": 1.3, "grad_norm": 0.4895545244216919, "learning_rate": 0.00036316152951188597, "loss": 1.7634, "step": 38997 }, { "epoch": 1.3, "grad_norm": 0.4867361783981323, "learning_rate": 0.0003631513116351782, "loss": 1.7912, "step": 38998 }, { "epoch": 1.3, "grad_norm": 0.49207690358161926, "learning_rate": 0.00036314109368181414, "loss": 1.7889, "step": 38999 }, { "epoch": 1.3, "grad_norm": 0.47828423976898193, "learning_rate": 0.0003631308756518061, "loss": 1.7761, "step": 39000 }, { "epoch": 1.3, "grad_norm": 0.47645303606987, "learning_rate": 0.00036312065754516646, "loss": 1.8062, "step": 39001 }, { "epoch": 1.3, "grad_norm": 0.4736662805080414, "learning_rate": 0.0003631104393619077, "loss": 1.792, "step": 39002 }, { "epoch": 1.3, "grad_norm": 0.4743031859397888, "learning_rate": 0.0003631002211020421, "loss": 1.792, "step": 39003 }, { "epoch": 1.3, "grad_norm": 0.48309847712516785, "learning_rate": 0.00036309000276558214, "loss": 1.796, "step": 39004 }, { "epoch": 1.3, "grad_norm": 0.49125778675079346, "learning_rate": 0.00036307978435254015, "loss": 1.7218, "step": 39005 }, { "epoch": 1.3, "grad_norm": 0.47999411821365356, "learning_rate": 0.00036306956586292866, "loss": 1.7356, "step": 39006 }, { "epoch": 1.3, "grad_norm": 0.48945626616477966, "learning_rate": 0.00036305934729676, "loss": 1.8089, "step": 39007 }, { "epoch": 1.3, "grad_norm": 0.4804987609386444, "learning_rate": 0.0003630491286540466, "loss": 1.7908, "step": 39008 }, { "epoch": 1.3, "grad_norm": 0.47851306200027466, "learning_rate": 0.00036303890993480084, "loss": 1.7732, "step": 39009 }, { "epoch": 1.3, "grad_norm": 0.49143844842910767, "learning_rate": 0.00036302869113903516, "loss": 1.7821, "step": 39010 }, { "epoch": 1.3, "grad_norm": 0.49897703528404236, "learning_rate": 0.00036301847226676183, "loss": 1.8231, "step": 39011 }, { "epoch": 1.3, "grad_norm": 0.5263278484344482, "learning_rate": 0.0003630082533179935, "loss": 1.704, "step": 39012 }, { "epoch": 1.3, "grad_norm": 0.510793924331665, "learning_rate": 0.0003629980342927424, "loss": 1.7536, "step": 39013 }, { "epoch": 1.3, "grad_norm": 0.48487406969070435, "learning_rate": 0.0003629878151910208, "loss": 1.7429, "step": 39014 }, { "epoch": 1.3, "grad_norm": 0.529289722442627, "learning_rate": 0.00036297759601284146, "loss": 1.8475, "step": 39015 }, { "epoch": 1.3, "grad_norm": 0.5841624736785889, "learning_rate": 0.0003629673767582165, "loss": 1.7858, "step": 39016 }, { "epoch": 1.3, "grad_norm": 0.6946113109588623, "learning_rate": 0.0003629571574271585, "loss": 1.7645, "step": 39017 }, { "epoch": 1.3, "grad_norm": 0.4723532199859619, "learning_rate": 0.0003629469380196798, "loss": 1.823, "step": 39018 }, { "epoch": 1.3, "grad_norm": 0.48617151379585266, "learning_rate": 0.0003629367185357927, "loss": 1.8206, "step": 39019 }, { "epoch": 1.3, "grad_norm": 0.4995560348033905, "learning_rate": 0.00036292649897550985, "loss": 1.7331, "step": 39020 }, { "epoch": 1.3, "grad_norm": 0.5559049248695374, "learning_rate": 0.0003629162793388433, "loss": 1.7735, "step": 39021 }, { "epoch": 1.3, "grad_norm": 0.48459649085998535, "learning_rate": 0.0003629060596258058, "loss": 1.7358, "step": 39022 }, { "epoch": 1.3, "grad_norm": 0.4946604073047638, "learning_rate": 0.00036289583983640947, "loss": 1.7334, "step": 39023 }, { "epoch": 1.3, "grad_norm": 0.5136477947235107, "learning_rate": 0.000362885619970667, "loss": 1.8142, "step": 39024 }, { "epoch": 1.3, "grad_norm": 0.4969843327999115, "learning_rate": 0.0003628754000285907, "loss": 1.7974, "step": 39025 }, { "epoch": 1.3, "grad_norm": 0.5147578716278076, "learning_rate": 0.00036286518001019276, "loss": 1.8362, "step": 39026 }, { "epoch": 1.3, "grad_norm": 0.47127801179885864, "learning_rate": 0.0003628549599154859, "loss": 1.7504, "step": 39027 }, { "epoch": 1.3, "grad_norm": 0.48410463333129883, "learning_rate": 0.0003628447397444822, "loss": 1.7687, "step": 39028 }, { "epoch": 1.3, "grad_norm": 0.4911337196826935, "learning_rate": 0.0003628345194971944, "loss": 1.8446, "step": 39029 }, { "epoch": 1.3, "grad_norm": 0.5002988576889038, "learning_rate": 0.00036282429917363474, "loss": 1.65, "step": 39030 }, { "epoch": 1.3, "grad_norm": 0.4711317718029022, "learning_rate": 0.00036281407877381564, "loss": 1.7752, "step": 39031 }, { "epoch": 1.3, "grad_norm": 0.48509567975997925, "learning_rate": 0.0003628038582977494, "loss": 1.7885, "step": 39032 }, { "epoch": 1.3, "grad_norm": 0.4913070797920227, "learning_rate": 0.0003627936377454486, "loss": 1.774, "step": 39033 }, { "epoch": 1.3, "grad_norm": 0.4910230338573456, "learning_rate": 0.0003627834171169256, "loss": 1.8132, "step": 39034 }, { "epoch": 1.3, "grad_norm": 0.49211645126342773, "learning_rate": 0.00036277319641219276, "loss": 1.7042, "step": 39035 }, { "epoch": 1.3, "grad_norm": 0.4939621388912201, "learning_rate": 0.0003627629756312625, "loss": 1.7802, "step": 39036 }, { "epoch": 1.3, "grad_norm": 0.5037871599197388, "learning_rate": 0.00036275275477414725, "loss": 1.723, "step": 39037 }, { "epoch": 1.3, "grad_norm": 0.4997122883796692, "learning_rate": 0.00036274253384085937, "loss": 1.7683, "step": 39038 }, { "epoch": 1.3, "grad_norm": 0.5199263095855713, "learning_rate": 0.0003627323128314113, "loss": 1.8253, "step": 39039 }, { "epoch": 1.3, "grad_norm": 0.4966980814933777, "learning_rate": 0.00036272209174581554, "loss": 1.7411, "step": 39040 }, { "epoch": 1.3, "grad_norm": 0.49069440364837646, "learning_rate": 0.0003627118705840843, "loss": 1.8246, "step": 39041 }, { "epoch": 1.3, "grad_norm": 0.4727340340614319, "learning_rate": 0.0003627016493462301, "loss": 1.728, "step": 39042 }, { "epoch": 1.3, "grad_norm": 0.49770602583885193, "learning_rate": 0.00036269142803226534, "loss": 1.8481, "step": 39043 }, { "epoch": 1.3, "grad_norm": 0.4856991767883301, "learning_rate": 0.0003626812066422025, "loss": 1.8162, "step": 39044 }, { "epoch": 1.3, "grad_norm": 0.5061436891555786, "learning_rate": 0.00036267098517605385, "loss": 1.7777, "step": 39045 }, { "epoch": 1.3, "grad_norm": 0.5275329947471619, "learning_rate": 0.0003626607636338318, "loss": 1.7642, "step": 39046 }, { "epoch": 1.3, "grad_norm": 0.89151930809021, "learning_rate": 0.00036265054201554893, "loss": 1.7568, "step": 39047 }, { "epoch": 1.3, "grad_norm": 0.49910297989845276, "learning_rate": 0.0003626403203212175, "loss": 1.7692, "step": 39048 }, { "epoch": 1.3, "grad_norm": 0.4771806001663208, "learning_rate": 0.0003626300985508499, "loss": 1.7214, "step": 39049 }, { "epoch": 1.3, "grad_norm": 0.4991947114467621, "learning_rate": 0.0003626198767044586, "loss": 1.7936, "step": 39050 }, { "epoch": 1.3, "grad_norm": 0.4795420467853546, "learning_rate": 0.00036260965478205595, "loss": 1.7838, "step": 39051 }, { "epoch": 1.3, "grad_norm": 0.5061777830123901, "learning_rate": 0.0003625994327836546, "loss": 1.7837, "step": 39052 }, { "epoch": 1.3, "grad_norm": 0.4734511971473694, "learning_rate": 0.0003625892107092665, "loss": 1.6874, "step": 39053 }, { "epoch": 1.3, "grad_norm": 0.5183179974555969, "learning_rate": 0.0003625789885589045, "loss": 1.8474, "step": 39054 }, { "epoch": 1.3, "grad_norm": 0.5205723643302917, "learning_rate": 0.0003625687663325808, "loss": 1.8021, "step": 39055 }, { "epoch": 1.3, "grad_norm": 0.4955432415008545, "learning_rate": 0.0003625585440303078, "loss": 1.7785, "step": 39056 }, { "epoch": 1.3, "grad_norm": 0.492104709148407, "learning_rate": 0.000362548321652098, "loss": 1.7764, "step": 39057 }, { "epoch": 1.3, "grad_norm": 0.5104138851165771, "learning_rate": 0.0003625380991979636, "loss": 1.7124, "step": 39058 }, { "epoch": 1.3, "grad_norm": 0.4978207051753998, "learning_rate": 0.00036252787666791737, "loss": 1.8634, "step": 39059 }, { "epoch": 1.3, "grad_norm": 0.5220502018928528, "learning_rate": 0.0003625176540619714, "loss": 1.7121, "step": 39060 }, { "epoch": 1.3, "grad_norm": 0.5016638040542603, "learning_rate": 0.0003625074313801382, "loss": 1.8501, "step": 39061 }, { "epoch": 1.3, "grad_norm": 0.6500626802444458, "learning_rate": 0.0003624972086224302, "loss": 1.7799, "step": 39062 }, { "epoch": 1.3, "grad_norm": 0.49449771642684937, "learning_rate": 0.0003624869857888598, "loss": 1.8464, "step": 39063 }, { "epoch": 1.3, "grad_norm": 0.48803913593292236, "learning_rate": 0.0003624767628794395, "loss": 1.7506, "step": 39064 }, { "epoch": 1.3, "grad_norm": 0.5031499862670898, "learning_rate": 0.0003624665398941815, "loss": 1.7281, "step": 39065 }, { "epoch": 1.3, "grad_norm": 0.49060237407684326, "learning_rate": 0.00036245631683309837, "loss": 1.7632, "step": 39066 }, { "epoch": 1.3, "grad_norm": 0.48270633816719055, "learning_rate": 0.0003624460936962025, "loss": 1.6988, "step": 39067 }, { "epoch": 1.3, "grad_norm": 0.5009103417396545, "learning_rate": 0.0003624358704835061, "loss": 1.7847, "step": 39068 }, { "epoch": 1.3, "grad_norm": 0.4842984676361084, "learning_rate": 0.0003624256471950219, "loss": 1.7262, "step": 39069 }, { "epoch": 1.3, "grad_norm": 0.476104736328125, "learning_rate": 0.0003624154238307622, "loss": 1.8412, "step": 39070 }, { "epoch": 1.3, "grad_norm": 0.48905330896377563, "learning_rate": 0.00036240520039073926, "loss": 1.7798, "step": 39071 }, { "epoch": 1.3, "grad_norm": 0.599250316619873, "learning_rate": 0.0003623949768749657, "loss": 1.7742, "step": 39072 }, { "epoch": 1.3, "grad_norm": 0.5232760310173035, "learning_rate": 0.0003623847532834538, "loss": 1.7284, "step": 39073 }, { "epoch": 1.3, "grad_norm": 0.48379287123680115, "learning_rate": 0.000362374529616216, "loss": 1.7128, "step": 39074 }, { "epoch": 1.3, "grad_norm": 0.49581730365753174, "learning_rate": 0.00036236430587326476, "loss": 1.7508, "step": 39075 }, { "epoch": 1.3, "grad_norm": 0.49731242656707764, "learning_rate": 0.0003623540820546123, "loss": 1.7388, "step": 39076 }, { "epoch": 1.3, "grad_norm": 0.4818982183933258, "learning_rate": 0.00036234385816027136, "loss": 1.6931, "step": 39077 }, { "epoch": 1.3, "grad_norm": 0.48464223742485046, "learning_rate": 0.00036233363419025397, "loss": 1.8011, "step": 39078 }, { "epoch": 1.3, "grad_norm": 0.48315876722335815, "learning_rate": 0.0003623234101445729, "loss": 1.6669, "step": 39079 }, { "epoch": 1.3, "grad_norm": 0.4821906089782715, "learning_rate": 0.0003623131860232403, "loss": 1.8098, "step": 39080 }, { "epoch": 1.3, "grad_norm": 0.5200584530830383, "learning_rate": 0.0003623029618262687, "loss": 1.8159, "step": 39081 }, { "epoch": 1.3, "grad_norm": 0.4748776853084564, "learning_rate": 0.00036229273755367054, "loss": 1.7294, "step": 39082 }, { "epoch": 1.3, "grad_norm": 0.48212140798568726, "learning_rate": 0.00036228251320545796, "loss": 1.8017, "step": 39083 }, { "epoch": 1.3, "grad_norm": 0.49406808614730835, "learning_rate": 0.0003622722887816438, "loss": 1.8178, "step": 39084 }, { "epoch": 1.3, "grad_norm": 0.47145339846611023, "learning_rate": 0.0003622620642822402, "loss": 1.7813, "step": 39085 }, { "epoch": 1.3, "grad_norm": 0.5032354593276978, "learning_rate": 0.00036225183970725963, "loss": 1.7518, "step": 39086 }, { "epoch": 1.3, "grad_norm": 0.49013280868530273, "learning_rate": 0.00036224161505671445, "loss": 1.8269, "step": 39087 }, { "epoch": 1.3, "grad_norm": 0.4918064475059509, "learning_rate": 0.00036223139033061713, "loss": 1.7543, "step": 39088 }, { "epoch": 1.3, "grad_norm": 0.49606242775917053, "learning_rate": 0.0003622211655289802, "loss": 1.8293, "step": 39089 }, { "epoch": 1.3, "grad_norm": 0.47517845034599304, "learning_rate": 0.00036221094065181584, "loss": 1.7391, "step": 39090 }, { "epoch": 1.3, "grad_norm": 0.49251237511634827, "learning_rate": 0.00036220071569913653, "loss": 1.7748, "step": 39091 }, { "epoch": 1.3, "grad_norm": 0.47684308886528015, "learning_rate": 0.0003621904906709548, "loss": 1.7712, "step": 39092 }, { "epoch": 1.3, "grad_norm": 0.4945129156112671, "learning_rate": 0.0003621802655672829, "loss": 1.867, "step": 39093 }, { "epoch": 1.3, "grad_norm": 0.5062789916992188, "learning_rate": 0.00036217004038813336, "loss": 1.8213, "step": 39094 }, { "epoch": 1.3, "grad_norm": 0.48203426599502563, "learning_rate": 0.0003621598151335186, "loss": 1.76, "step": 39095 }, { "epoch": 1.3, "grad_norm": 0.4696415364742279, "learning_rate": 0.00036214958980345085, "loss": 1.6734, "step": 39096 }, { "epoch": 1.3, "grad_norm": 0.47921568155288696, "learning_rate": 0.00036213936439794274, "loss": 1.7743, "step": 39097 }, { "epoch": 1.3, "grad_norm": 0.4863177239894867, "learning_rate": 0.00036212913891700663, "loss": 1.7537, "step": 39098 }, { "epoch": 1.3, "grad_norm": 0.5287737846374512, "learning_rate": 0.0003621189133606548, "loss": 1.7773, "step": 39099 }, { "epoch": 1.3, "grad_norm": 0.5179598331451416, "learning_rate": 0.00036210868772889985, "loss": 1.7684, "step": 39100 }, { "epoch": 1.3, "grad_norm": 0.503563404083252, "learning_rate": 0.000362098462021754, "loss": 1.7371, "step": 39101 }, { "epoch": 1.3, "grad_norm": 0.5086110234260559, "learning_rate": 0.00036208823623922994, "loss": 1.8225, "step": 39102 }, { "epoch": 1.3, "grad_norm": 0.5842427611351013, "learning_rate": 0.0003620780103813398, "loss": 1.7874, "step": 39103 }, { "epoch": 1.3, "grad_norm": 0.48026543855667114, "learning_rate": 0.0003620677844480961, "loss": 1.6438, "step": 39104 }, { "epoch": 1.3, "grad_norm": 0.49238523840904236, "learning_rate": 0.0003620575584395113, "loss": 1.8253, "step": 39105 }, { "epoch": 1.3, "grad_norm": 0.5020251870155334, "learning_rate": 0.0003620473323555977, "loss": 1.8211, "step": 39106 }, { "epoch": 1.3, "grad_norm": 0.5274770259857178, "learning_rate": 0.0003620371061963679, "loss": 1.8037, "step": 39107 }, { "epoch": 1.3, "grad_norm": 0.4888491928577423, "learning_rate": 0.00036202687996183397, "loss": 1.7631, "step": 39108 }, { "epoch": 1.3, "grad_norm": 0.48986127972602844, "learning_rate": 0.00036201665365200877, "loss": 1.7521, "step": 39109 }, { "epoch": 1.3, "grad_norm": 0.4824301302433014, "learning_rate": 0.00036200642726690437, "loss": 1.8328, "step": 39110 }, { "epoch": 1.3, "grad_norm": 0.49825167655944824, "learning_rate": 0.00036199620080653336, "loss": 1.7047, "step": 39111 }, { "epoch": 1.3, "grad_norm": 0.48658066987991333, "learning_rate": 0.00036198597427090814, "loss": 1.6795, "step": 39112 }, { "epoch": 1.3, "grad_norm": 0.48682472109794617, "learning_rate": 0.0003619757476600409, "loss": 1.7604, "step": 39113 }, { "epoch": 1.3, "grad_norm": 0.5768067240715027, "learning_rate": 0.00036196552097394444, "loss": 1.7568, "step": 39114 }, { "epoch": 1.3, "grad_norm": 0.5154598951339722, "learning_rate": 0.0003619552942126309, "loss": 1.7723, "step": 39115 }, { "epoch": 1.3, "grad_norm": 0.48212894797325134, "learning_rate": 0.00036194506737611274, "loss": 1.7406, "step": 39116 }, { "epoch": 1.3, "grad_norm": 0.4802948534488678, "learning_rate": 0.0003619348404644024, "loss": 1.7738, "step": 39117 }, { "epoch": 1.3, "grad_norm": 0.48712071776390076, "learning_rate": 0.00036192461347751224, "loss": 1.7131, "step": 39118 }, { "epoch": 1.3, "grad_norm": 0.48684439063072205, "learning_rate": 0.00036191438641545483, "loss": 1.7393, "step": 39119 }, { "epoch": 1.3, "grad_norm": 0.49004989862442017, "learning_rate": 0.00036190415927824237, "loss": 1.8272, "step": 39120 }, { "epoch": 1.3, "grad_norm": 0.4823107421398163, "learning_rate": 0.00036189393206588743, "loss": 1.749, "step": 39121 }, { "epoch": 1.3, "grad_norm": 0.47327131032943726, "learning_rate": 0.00036188370477840243, "loss": 1.745, "step": 39122 }, { "epoch": 1.3, "grad_norm": 0.5201109647750854, "learning_rate": 0.00036187347741579957, "loss": 1.8096, "step": 39123 }, { "epoch": 1.3, "grad_norm": 0.5054272413253784, "learning_rate": 0.0003618632499780916, "loss": 1.8192, "step": 39124 }, { "epoch": 1.3, "grad_norm": 0.48481714725494385, "learning_rate": 0.00036185302246529073, "loss": 1.7471, "step": 39125 }, { "epoch": 1.3, "grad_norm": 0.4687126576900482, "learning_rate": 0.0003618427948774093, "loss": 1.7212, "step": 39126 }, { "epoch": 1.3, "grad_norm": 0.4967482388019562, "learning_rate": 0.0003618325672144599, "loss": 1.7817, "step": 39127 }, { "epoch": 1.3, "grad_norm": 0.48722290992736816, "learning_rate": 0.0003618223394764549, "loss": 1.7579, "step": 39128 }, { "epoch": 1.3, "grad_norm": 0.5153529047966003, "learning_rate": 0.00036181211166340664, "loss": 1.6651, "step": 39129 }, { "epoch": 1.3, "grad_norm": 0.4771261215209961, "learning_rate": 0.0003618018837753277, "loss": 1.741, "step": 39130 }, { "epoch": 1.3, "grad_norm": 0.4842166006565094, "learning_rate": 0.00036179165581223013, "loss": 1.7881, "step": 39131 }, { "epoch": 1.3, "grad_norm": 0.497969388961792, "learning_rate": 0.0003617814277741268, "loss": 1.7829, "step": 39132 }, { "epoch": 1.3, "grad_norm": 0.48778489232063293, "learning_rate": 0.00036177119966102983, "loss": 1.811, "step": 39133 }, { "epoch": 1.3, "grad_norm": 0.4801793694496155, "learning_rate": 0.00036176097147295176, "loss": 1.8496, "step": 39134 }, { "epoch": 1.3, "grad_norm": 0.48298463225364685, "learning_rate": 0.000361750743209905, "loss": 1.7458, "step": 39135 }, { "epoch": 1.3, "grad_norm": 0.9169825911521912, "learning_rate": 0.0003617405148719019, "loss": 1.859, "step": 39136 }, { "epoch": 1.3, "grad_norm": 0.5032483339309692, "learning_rate": 0.00036173028645895497, "loss": 1.7394, "step": 39137 }, { "epoch": 1.3, "grad_norm": 0.48983967304229736, "learning_rate": 0.00036172005797107645, "loss": 1.7795, "step": 39138 }, { "epoch": 1.3, "grad_norm": 0.4958297610282898, "learning_rate": 0.000361709829408279, "loss": 1.7452, "step": 39139 }, { "epoch": 1.3, "grad_norm": 0.4797142744064331, "learning_rate": 0.0003616996007705748, "loss": 1.8053, "step": 39140 }, { "epoch": 1.3, "grad_norm": 0.4734766185283661, "learning_rate": 0.00036168937205797644, "loss": 1.7378, "step": 39141 }, { "epoch": 1.3, "grad_norm": 0.4785314202308655, "learning_rate": 0.0003616791432704962, "loss": 1.8508, "step": 39142 }, { "epoch": 1.3, "grad_norm": 0.49744635820388794, "learning_rate": 0.0003616689144081467, "loss": 1.7161, "step": 39143 }, { "epoch": 1.3, "grad_norm": 0.48811933398246765, "learning_rate": 0.00036165868547094015, "loss": 1.8013, "step": 39144 }, { "epoch": 1.3, "grad_norm": 0.5025485157966614, "learning_rate": 0.000361648456458889, "loss": 1.7061, "step": 39145 }, { "epoch": 1.3, "grad_norm": 0.501220703125, "learning_rate": 0.0003616382273720058, "loss": 1.762, "step": 39146 }, { "epoch": 1.3, "grad_norm": 0.5049899220466614, "learning_rate": 0.0003616279982103028, "loss": 1.7678, "step": 39147 }, { "epoch": 1.3, "grad_norm": 0.4830946922302246, "learning_rate": 0.0003616177689737925, "loss": 1.777, "step": 39148 }, { "epoch": 1.3, "grad_norm": 0.4897845685482025, "learning_rate": 0.00036160753966248734, "loss": 1.8039, "step": 39149 }, { "epoch": 1.3, "grad_norm": 0.47359713912010193, "learning_rate": 0.00036159731027639964, "loss": 1.7219, "step": 39150 }, { "epoch": 1.3, "grad_norm": 0.4814818501472473, "learning_rate": 0.000361587080815542, "loss": 1.7584, "step": 39151 }, { "epoch": 1.3, "grad_norm": 0.4874988794326782, "learning_rate": 0.00036157685127992666, "loss": 1.7978, "step": 39152 }, { "epoch": 1.3, "grad_norm": 0.5022451877593994, "learning_rate": 0.00036156662166956605, "loss": 1.7449, "step": 39153 }, { "epoch": 1.3, "grad_norm": 0.5049352049827576, "learning_rate": 0.0003615563919844726, "loss": 1.8405, "step": 39154 }, { "epoch": 1.3, "grad_norm": 0.4987139105796814, "learning_rate": 0.00036154616222465886, "loss": 1.8243, "step": 39155 }, { "epoch": 1.3, "grad_norm": 0.48400235176086426, "learning_rate": 0.0003615359323901371, "loss": 1.7944, "step": 39156 }, { "epoch": 1.3, "grad_norm": 0.49680739641189575, "learning_rate": 0.00036152570248091986, "loss": 1.7823, "step": 39157 }, { "epoch": 1.3, "grad_norm": 0.5193842053413391, "learning_rate": 0.0003615154724970194, "loss": 1.7444, "step": 39158 }, { "epoch": 1.3, "grad_norm": 0.482193261384964, "learning_rate": 0.0003615052424384482, "loss": 1.6996, "step": 39159 }, { "epoch": 1.3, "grad_norm": 0.4952876567840576, "learning_rate": 0.00036149501230521873, "loss": 1.7071, "step": 39160 }, { "epoch": 1.3, "grad_norm": 0.5081433653831482, "learning_rate": 0.00036148478209734335, "loss": 1.8, "step": 39161 }, { "epoch": 1.3, "grad_norm": 0.49530360102653503, "learning_rate": 0.00036147455181483463, "loss": 1.7884, "step": 39162 }, { "epoch": 1.3, "grad_norm": 0.5036396980285645, "learning_rate": 0.0003614643214577047, "loss": 1.7883, "step": 39163 }, { "epoch": 1.3, "grad_norm": 0.501742422580719, "learning_rate": 0.00036145409102596626, "loss": 1.7653, "step": 39164 }, { "epoch": 1.3, "grad_norm": 0.49331486225128174, "learning_rate": 0.00036144386051963155, "loss": 1.7265, "step": 39165 }, { "epoch": 1.3, "grad_norm": 0.4989790618419647, "learning_rate": 0.000361433629938713, "loss": 1.7525, "step": 39166 }, { "epoch": 1.3, "grad_norm": 0.48174089193344116, "learning_rate": 0.0003614233992832232, "loss": 1.7397, "step": 39167 }, { "epoch": 1.3, "grad_norm": 0.5055646300315857, "learning_rate": 0.0003614131685531743, "loss": 1.8355, "step": 39168 }, { "epoch": 1.3, "grad_norm": 0.49337896704673767, "learning_rate": 0.000361402937748579, "loss": 1.7879, "step": 39169 }, { "epoch": 1.3, "grad_norm": 0.4851420521736145, "learning_rate": 0.00036139270686944946, "loss": 1.7505, "step": 39170 }, { "epoch": 1.3, "grad_norm": 0.48890456557273865, "learning_rate": 0.0003613824759157983, "loss": 1.7603, "step": 39171 }, { "epoch": 1.3, "grad_norm": 0.485588401556015, "learning_rate": 0.00036137224488763783, "loss": 1.7262, "step": 39172 }, { "epoch": 1.3, "grad_norm": 0.48416972160339355, "learning_rate": 0.0003613620137849805, "loss": 1.7474, "step": 39173 }, { "epoch": 1.3, "grad_norm": 0.5156973600387573, "learning_rate": 0.00036135178260783877, "loss": 1.843, "step": 39174 }, { "epoch": 1.3, "grad_norm": 0.4908824563026428, "learning_rate": 0.0003613415513562249, "loss": 1.757, "step": 39175 }, { "epoch": 1.3, "grad_norm": 0.48427167534828186, "learning_rate": 0.0003613313200301515, "loss": 1.7746, "step": 39176 }, { "epoch": 1.3, "grad_norm": 0.474277526140213, "learning_rate": 0.000361321088629631, "loss": 1.7465, "step": 39177 }, { "epoch": 1.3, "grad_norm": 0.495454877614975, "learning_rate": 0.0003613108571546756, "loss": 1.8529, "step": 39178 }, { "epoch": 1.3, "grad_norm": 0.4913285970687866, "learning_rate": 0.0003613006256052979, "loss": 1.7615, "step": 39179 }, { "epoch": 1.3, "grad_norm": 0.4808340072631836, "learning_rate": 0.00036129039398151024, "loss": 1.7977, "step": 39180 }, { "epoch": 1.3, "grad_norm": 0.4836026430130005, "learning_rate": 0.0003612801622833252, "loss": 1.7155, "step": 39181 }, { "epoch": 1.3, "grad_norm": 0.48151662945747375, "learning_rate": 0.00036126993051075493, "loss": 1.8116, "step": 39182 }, { "epoch": 1.3, "grad_norm": 0.47308191657066345, "learning_rate": 0.00036125969866381203, "loss": 1.7603, "step": 39183 }, { "epoch": 1.3, "grad_norm": 0.4773801565170288, "learning_rate": 0.00036124946674250886, "loss": 1.7825, "step": 39184 }, { "epoch": 1.3, "grad_norm": 0.5004060864448547, "learning_rate": 0.00036123923474685787, "loss": 1.803, "step": 39185 }, { "epoch": 1.3, "grad_norm": 0.49054935574531555, "learning_rate": 0.00036122900267687155, "loss": 1.7844, "step": 39186 }, { "epoch": 1.3, "grad_norm": 0.47906896471977234, "learning_rate": 0.00036121877053256224, "loss": 1.8327, "step": 39187 }, { "epoch": 1.3, "grad_norm": 0.49278494715690613, "learning_rate": 0.00036120853831394216, "loss": 1.7367, "step": 39188 }, { "epoch": 1.3, "grad_norm": 0.46196821331977844, "learning_rate": 0.0003611983060210242, "loss": 1.7521, "step": 39189 }, { "epoch": 1.3, "grad_norm": 0.6228124499320984, "learning_rate": 0.0003611880736538204, "loss": 1.7862, "step": 39190 }, { "epoch": 1.3, "grad_norm": 0.5184898972511292, "learning_rate": 0.00036117784121234323, "loss": 1.8508, "step": 39191 }, { "epoch": 1.3, "grad_norm": 0.49248987436294556, "learning_rate": 0.00036116760869660535, "loss": 1.7678, "step": 39192 }, { "epoch": 1.3, "grad_norm": 0.4869500994682312, "learning_rate": 0.0003611573761066188, "loss": 1.7476, "step": 39193 }, { "epoch": 1.3, "grad_norm": 0.48304852843284607, "learning_rate": 0.0003611471434423963, "loss": 1.7575, "step": 39194 }, { "epoch": 1.3, "grad_norm": 0.482695996761322, "learning_rate": 0.0003611369107039501, "loss": 1.7544, "step": 39195 }, { "epoch": 1.3, "grad_norm": 0.49652862548828125, "learning_rate": 0.0003611266778912928, "loss": 1.7178, "step": 39196 }, { "epoch": 1.3, "grad_norm": 0.48530980944633484, "learning_rate": 0.0003611164450044367, "loss": 1.8147, "step": 39197 }, { "epoch": 1.3, "grad_norm": 0.4845704138278961, "learning_rate": 0.00036110621204339427, "loss": 1.8241, "step": 39198 }, { "epoch": 1.3, "grad_norm": 0.4874069392681122, "learning_rate": 0.00036109597900817794, "loss": 1.7817, "step": 39199 }, { "epoch": 1.3, "grad_norm": 0.9959716796875, "learning_rate": 0.00036108574589879995, "loss": 1.764, "step": 39200 }, { "epoch": 1.3, "grad_norm": 0.48949405550956726, "learning_rate": 0.000361075512715273, "loss": 1.7668, "step": 39201 }, { "epoch": 1.3, "grad_norm": 0.4812357723712921, "learning_rate": 0.0003610652794576093, "loss": 1.7801, "step": 39202 }, { "epoch": 1.3, "grad_norm": 0.4811260998249054, "learning_rate": 0.0003610550461258213, "loss": 1.686, "step": 39203 }, { "epoch": 1.3, "grad_norm": 0.4806565046310425, "learning_rate": 0.0003610448127199215, "loss": 1.8403, "step": 39204 }, { "epoch": 1.3, "grad_norm": 0.4908079504966736, "learning_rate": 0.0003610345792399223, "loss": 1.7863, "step": 39205 }, { "epoch": 1.3, "grad_norm": 1.0725613832473755, "learning_rate": 0.00036102434568583623, "loss": 1.8521, "step": 39206 }, { "epoch": 1.3, "grad_norm": 0.48752346634864807, "learning_rate": 0.00036101411205767547, "loss": 1.8432, "step": 39207 }, { "epoch": 1.3, "grad_norm": 0.4905257523059845, "learning_rate": 0.0003610038783554526, "loss": 1.8377, "step": 39208 }, { "epoch": 1.3, "grad_norm": 0.5068596601486206, "learning_rate": 0.00036099364457918, "loss": 1.8373, "step": 39209 }, { "epoch": 1.3, "grad_norm": 0.48495280742645264, "learning_rate": 0.00036098341072887013, "loss": 1.795, "step": 39210 }, { "epoch": 1.3, "grad_norm": 0.4856319725513458, "learning_rate": 0.00036097317680453537, "loss": 1.7501, "step": 39211 }, { "epoch": 1.3, "grad_norm": 0.47463372349739075, "learning_rate": 0.0003609629428061882, "loss": 1.723, "step": 39212 }, { "epoch": 1.3, "grad_norm": 0.47619637846946716, "learning_rate": 0.0003609527087338409, "loss": 1.7073, "step": 39213 }, { "epoch": 1.3, "grad_norm": 0.4720495045185089, "learning_rate": 0.000360942474587506, "loss": 1.6964, "step": 39214 }, { "epoch": 1.3, "grad_norm": 0.5170198678970337, "learning_rate": 0.000360932240367196, "loss": 1.7769, "step": 39215 }, { "epoch": 1.3, "grad_norm": 0.47738176584243774, "learning_rate": 0.00036092200607292315, "loss": 1.7738, "step": 39216 }, { "epoch": 1.3, "grad_norm": 0.5038280487060547, "learning_rate": 0.00036091177170470014, "loss": 1.8211, "step": 39217 }, { "epoch": 1.3, "grad_norm": 0.4895646870136261, "learning_rate": 0.000360901537262539, "loss": 1.7637, "step": 39218 }, { "epoch": 1.3, "grad_norm": 0.488440603017807, "learning_rate": 0.0003608913027464525, "loss": 1.8256, "step": 39219 }, { "epoch": 1.3, "grad_norm": 0.48512133955955505, "learning_rate": 0.0003608810681564529, "loss": 1.7213, "step": 39220 }, { "epoch": 1.3, "grad_norm": 0.5053169131278992, "learning_rate": 0.00036087083349255254, "loss": 1.7946, "step": 39221 }, { "epoch": 1.3, "grad_norm": 0.49502846598625183, "learning_rate": 0.00036086059875476415, "loss": 1.7328, "step": 39222 }, { "epoch": 1.3, "grad_norm": 0.4740365445613861, "learning_rate": 0.0003608503639430998, "loss": 1.7623, "step": 39223 }, { "epoch": 1.3, "grad_norm": 0.48767295479774475, "learning_rate": 0.00036084012905757216, "loss": 1.8271, "step": 39224 }, { "epoch": 1.31, "grad_norm": 0.5290011167526245, "learning_rate": 0.0003608298940981935, "loss": 1.7706, "step": 39225 }, { "epoch": 1.31, "grad_norm": 0.5087838172912598, "learning_rate": 0.0003608196590649764, "loss": 1.8708, "step": 39226 }, { "epoch": 1.31, "grad_norm": 0.4799375832080841, "learning_rate": 0.0003608094239579331, "loss": 1.8044, "step": 39227 }, { "epoch": 1.31, "grad_norm": 0.5249472260475159, "learning_rate": 0.0003607991887770762, "loss": 1.8128, "step": 39228 }, { "epoch": 1.31, "grad_norm": 0.4933747351169586, "learning_rate": 0.0003607889535224181, "loss": 1.7754, "step": 39229 }, { "epoch": 1.31, "grad_norm": 0.5160234570503235, "learning_rate": 0.00036077871819397095, "loss": 1.835, "step": 39230 }, { "epoch": 1.31, "grad_norm": 0.4916542172431946, "learning_rate": 0.00036076848279174763, "loss": 1.8475, "step": 39231 }, { "epoch": 1.31, "grad_norm": 0.48439157009124756, "learning_rate": 0.0003607582473157602, "loss": 1.7852, "step": 39232 }, { "epoch": 1.31, "grad_norm": 2.541259765625, "learning_rate": 0.0003607480117660212, "loss": 1.7966, "step": 39233 }, { "epoch": 1.31, "grad_norm": 0.5230056643486023, "learning_rate": 0.0003607377761425431, "loss": 1.8175, "step": 39234 }, { "epoch": 1.31, "grad_norm": 0.5081204771995544, "learning_rate": 0.00036072754044533825, "loss": 1.7422, "step": 39235 }, { "epoch": 1.31, "grad_norm": 0.49352288246154785, "learning_rate": 0.0003607173046744192, "loss": 1.8017, "step": 39236 }, { "epoch": 1.31, "grad_norm": 0.5155046582221985, "learning_rate": 0.00036070706882979824, "loss": 1.7551, "step": 39237 }, { "epoch": 1.31, "grad_norm": 0.4887247383594513, "learning_rate": 0.00036069683291148786, "loss": 1.7563, "step": 39238 }, { "epoch": 1.31, "grad_norm": 0.4951927363872528, "learning_rate": 0.0003606865969195004, "loss": 1.7899, "step": 39239 }, { "epoch": 1.31, "grad_norm": 0.48869588971138, "learning_rate": 0.00036067636085384836, "loss": 1.7116, "step": 39240 }, { "epoch": 1.31, "grad_norm": 0.4875926971435547, "learning_rate": 0.00036066612471454424, "loss": 1.769, "step": 39241 }, { "epoch": 1.31, "grad_norm": 0.48906993865966797, "learning_rate": 0.00036065588850160035, "loss": 1.8494, "step": 39242 }, { "epoch": 1.31, "grad_norm": 0.4743715524673462, "learning_rate": 0.0003606456522150291, "loss": 1.7432, "step": 39243 }, { "epoch": 1.31, "grad_norm": 0.4931807816028595, "learning_rate": 0.000360635415854843, "loss": 1.7702, "step": 39244 }, { "epoch": 1.31, "grad_norm": 0.48114821314811707, "learning_rate": 0.00036062517942105445, "loss": 1.746, "step": 39245 }, { "epoch": 1.31, "grad_norm": 0.49224522709846497, "learning_rate": 0.00036061494291367584, "loss": 1.8648, "step": 39246 }, { "epoch": 1.31, "grad_norm": 0.48897913098335266, "learning_rate": 0.0003606047063327197, "loss": 1.7552, "step": 39247 }, { "epoch": 1.31, "grad_norm": 0.49972814321517944, "learning_rate": 0.00036059446967819824, "loss": 1.7819, "step": 39248 }, { "epoch": 1.31, "grad_norm": 0.5027705430984497, "learning_rate": 0.0003605842329501242, "loss": 1.8815, "step": 39249 }, { "epoch": 1.31, "grad_norm": 0.4888719916343689, "learning_rate": 0.0003605739961485096, "loss": 1.7263, "step": 39250 }, { "epoch": 1.31, "grad_norm": 0.4763185977935791, "learning_rate": 0.00036056375927336727, "loss": 1.7526, "step": 39251 }, { "epoch": 1.31, "grad_norm": 0.48060372471809387, "learning_rate": 0.0003605535223247094, "loss": 1.7026, "step": 39252 }, { "epoch": 1.31, "grad_norm": 0.4922172427177429, "learning_rate": 0.0003605432853025485, "loss": 1.7767, "step": 39253 }, { "epoch": 1.31, "grad_norm": 0.4867904484272003, "learning_rate": 0.000360533048206897, "loss": 1.7514, "step": 39254 }, { "epoch": 1.31, "grad_norm": 0.5157410502433777, "learning_rate": 0.00036052281103776717, "loss": 1.7144, "step": 39255 }, { "epoch": 1.31, "grad_norm": 0.4824380576610565, "learning_rate": 0.0003605125737951717, "loss": 1.7306, "step": 39256 }, { "epoch": 1.31, "grad_norm": 0.4965706467628479, "learning_rate": 0.0003605023364791228, "loss": 1.757, "step": 39257 }, { "epoch": 1.31, "grad_norm": 0.48463088274002075, "learning_rate": 0.000360492099089633, "loss": 1.7678, "step": 39258 }, { "epoch": 1.31, "grad_norm": 0.49404338002204895, "learning_rate": 0.00036048186162671475, "loss": 1.7107, "step": 39259 }, { "epoch": 1.31, "grad_norm": 0.5054659247398376, "learning_rate": 0.00036047162409038035, "loss": 1.7794, "step": 39260 }, { "epoch": 1.31, "grad_norm": 0.478122353553772, "learning_rate": 0.0003604613864806424, "loss": 1.7889, "step": 39261 }, { "epoch": 1.31, "grad_norm": 0.4809601306915283, "learning_rate": 0.0003604511487975132, "loss": 1.7422, "step": 39262 }, { "epoch": 1.31, "grad_norm": 0.48834526538848877, "learning_rate": 0.00036044091104100523, "loss": 1.6784, "step": 39263 }, { "epoch": 1.31, "grad_norm": 0.4770178496837616, "learning_rate": 0.0003604306732111308, "loss": 1.7621, "step": 39264 }, { "epoch": 1.31, "grad_norm": 0.5066540241241455, "learning_rate": 0.0003604204353079025, "loss": 1.802, "step": 39265 }, { "epoch": 1.31, "grad_norm": 0.4835381507873535, "learning_rate": 0.0003604101973313327, "loss": 1.7755, "step": 39266 }, { "epoch": 1.31, "grad_norm": 0.4928448796272278, "learning_rate": 0.00036039995928143387, "loss": 1.7542, "step": 39267 }, { "epoch": 1.31, "grad_norm": 0.5217558741569519, "learning_rate": 0.0003603897211582184, "loss": 1.7207, "step": 39268 }, { "epoch": 1.31, "grad_norm": 0.4879136085510254, "learning_rate": 0.0003603794829616986, "loss": 1.6794, "step": 39269 }, { "epoch": 1.31, "grad_norm": 0.49322450160980225, "learning_rate": 0.00036036924469188706, "loss": 1.7647, "step": 39270 }, { "epoch": 1.31, "grad_norm": 0.5005963444709778, "learning_rate": 0.00036035900634879615, "loss": 1.7726, "step": 39271 }, { "epoch": 1.31, "grad_norm": 0.7262524366378784, "learning_rate": 0.00036034876793243826, "loss": 1.6751, "step": 39272 }, { "epoch": 1.31, "grad_norm": 0.5081406831741333, "learning_rate": 0.0003603385294428259, "loss": 1.8251, "step": 39273 }, { "epoch": 1.31, "grad_norm": 0.4645441472530365, "learning_rate": 0.00036032829087997146, "loss": 1.8465, "step": 39274 }, { "epoch": 1.31, "grad_norm": 0.48115280270576477, "learning_rate": 0.00036031805224388737, "loss": 1.7246, "step": 39275 }, { "epoch": 1.31, "grad_norm": 0.48609593510627747, "learning_rate": 0.000360307813534586, "loss": 1.7587, "step": 39276 }, { "epoch": 1.31, "grad_norm": 0.4887637495994568, "learning_rate": 0.0003602975747520799, "loss": 1.7621, "step": 39277 }, { "epoch": 1.31, "grad_norm": 0.5006623864173889, "learning_rate": 0.00036028733589638134, "loss": 1.7773, "step": 39278 }, { "epoch": 1.31, "grad_norm": 0.48758643865585327, "learning_rate": 0.0003602770969675029, "loss": 1.6945, "step": 39279 }, { "epoch": 1.31, "grad_norm": 0.4861317276954651, "learning_rate": 0.0003602668579654569, "loss": 1.8116, "step": 39280 }, { "epoch": 1.31, "grad_norm": 0.47873416543006897, "learning_rate": 0.0003602566188902559, "loss": 1.7812, "step": 39281 }, { "epoch": 1.31, "grad_norm": 0.4801535904407501, "learning_rate": 0.0003602463797419122, "loss": 1.8304, "step": 39282 }, { "epoch": 1.31, "grad_norm": 0.4809194803237915, "learning_rate": 0.0003602361405204383, "loss": 1.779, "step": 39283 }, { "epoch": 1.31, "grad_norm": 0.6598154902458191, "learning_rate": 0.0003602259012258466, "loss": 1.7548, "step": 39284 }, { "epoch": 1.31, "grad_norm": 0.4879224896430969, "learning_rate": 0.0003602156618581494, "loss": 1.7808, "step": 39285 }, { "epoch": 1.31, "grad_norm": 0.47521713376045227, "learning_rate": 0.0003602054224173594, "loss": 1.7472, "step": 39286 }, { "epoch": 1.31, "grad_norm": 0.4952111542224884, "learning_rate": 0.0003601951829034888, "loss": 1.781, "step": 39287 }, { "epoch": 1.31, "grad_norm": 0.502093493938446, "learning_rate": 0.00036018494331655024, "loss": 1.7725, "step": 39288 }, { "epoch": 1.31, "grad_norm": 0.4783027768135071, "learning_rate": 0.0003601747036565559, "loss": 1.784, "step": 39289 }, { "epoch": 1.31, "grad_norm": 0.4828224182128906, "learning_rate": 0.00036016446392351836, "loss": 1.7751, "step": 39290 }, { "epoch": 1.31, "grad_norm": 0.4887233376502991, "learning_rate": 0.0003601542241174501, "loss": 1.7711, "step": 39291 }, { "epoch": 1.31, "grad_norm": 0.4840245246887207, "learning_rate": 0.0003601439842383634, "loss": 1.8253, "step": 39292 }, { "epoch": 1.31, "grad_norm": 0.47958528995513916, "learning_rate": 0.0003601337442862708, "loss": 1.8124, "step": 39293 }, { "epoch": 1.31, "grad_norm": 0.48263391852378845, "learning_rate": 0.0003601235042611847, "loss": 1.7842, "step": 39294 }, { "epoch": 1.31, "grad_norm": 0.5055328011512756, "learning_rate": 0.0003601132641631175, "loss": 1.8316, "step": 39295 }, { "epoch": 1.31, "grad_norm": 0.47794055938720703, "learning_rate": 0.0003601030239920816, "loss": 1.7329, "step": 39296 }, { "epoch": 1.31, "grad_norm": 0.47759243845939636, "learning_rate": 0.0003600927837480896, "loss": 1.7538, "step": 39297 }, { "epoch": 1.31, "grad_norm": 0.48685991764068604, "learning_rate": 0.00036008254343115377, "loss": 1.717, "step": 39298 }, { "epoch": 1.31, "grad_norm": 0.4846453070640564, "learning_rate": 0.0003600723030412866, "loss": 1.7342, "step": 39299 }, { "epoch": 1.31, "grad_norm": 0.4805786907672882, "learning_rate": 0.00036006206257850046, "loss": 1.7964, "step": 39300 }, { "epoch": 1.31, "grad_norm": 0.4933570921421051, "learning_rate": 0.00036005182204280786, "loss": 1.7567, "step": 39301 }, { "epoch": 1.31, "grad_norm": 0.4903016686439514, "learning_rate": 0.0003600415814342211, "loss": 1.7889, "step": 39302 }, { "epoch": 1.31, "grad_norm": 0.47137460112571716, "learning_rate": 0.00036003134075275286, "loss": 1.739, "step": 39303 }, { "epoch": 1.31, "grad_norm": 0.4958585500717163, "learning_rate": 0.0003600210999984154, "loss": 1.8235, "step": 39304 }, { "epoch": 1.31, "grad_norm": 0.48642945289611816, "learning_rate": 0.000360010859171221, "loss": 1.8816, "step": 39305 }, { "epoch": 1.31, "grad_norm": 0.4831768870353699, "learning_rate": 0.00036000061827118243, "loss": 1.7672, "step": 39306 }, { "epoch": 1.31, "grad_norm": 0.477072149515152, "learning_rate": 0.00035999037729831187, "loss": 1.7571, "step": 39307 }, { "epoch": 1.31, "grad_norm": 0.4981425702571869, "learning_rate": 0.0003599801362526219, "loss": 1.7816, "step": 39308 }, { "epoch": 1.31, "grad_norm": 0.47934988141059875, "learning_rate": 0.00035996989513412483, "loss": 1.829, "step": 39309 }, { "epoch": 1.31, "grad_norm": 0.48079678416252136, "learning_rate": 0.0003599596539428331, "loss": 1.69, "step": 39310 }, { "epoch": 1.31, "grad_norm": 0.47294923663139343, "learning_rate": 0.0003599494126787593, "loss": 1.8358, "step": 39311 }, { "epoch": 1.31, "grad_norm": 0.5074226260185242, "learning_rate": 0.00035993917134191563, "loss": 1.8618, "step": 39312 }, { "epoch": 1.31, "grad_norm": 0.48526397347450256, "learning_rate": 0.0003599289299323147, "loss": 1.8121, "step": 39313 }, { "epoch": 1.31, "grad_norm": 0.4944881796836853, "learning_rate": 0.00035991868844996894, "loss": 1.7721, "step": 39314 }, { "epoch": 1.31, "grad_norm": 0.4819703996181488, "learning_rate": 0.0003599084468948906, "loss": 1.8099, "step": 39315 }, { "epoch": 1.31, "grad_norm": 0.48521867394447327, "learning_rate": 0.0003598982052670923, "loss": 1.7789, "step": 39316 }, { "epoch": 1.31, "grad_norm": 0.48299142718315125, "learning_rate": 0.0003598879635665863, "loss": 1.7988, "step": 39317 }, { "epoch": 1.31, "grad_norm": 0.8809769749641418, "learning_rate": 0.00035987772179338535, "loss": 1.8596, "step": 39318 }, { "epoch": 1.31, "grad_norm": 0.49022799730300903, "learning_rate": 0.0003598674799475015, "loss": 1.7727, "step": 39319 }, { "epoch": 1.31, "grad_norm": 0.4846886396408081, "learning_rate": 0.00035985723802894735, "loss": 1.8555, "step": 39320 }, { "epoch": 1.31, "grad_norm": 0.4929499328136444, "learning_rate": 0.00035984699603773545, "loss": 1.8549, "step": 39321 }, { "epoch": 1.31, "grad_norm": 0.4637913405895233, "learning_rate": 0.000359836753973878, "loss": 1.6959, "step": 39322 }, { "epoch": 1.31, "grad_norm": 0.48591285943984985, "learning_rate": 0.0003598265118373876, "loss": 1.7674, "step": 39323 }, { "epoch": 1.31, "grad_norm": 0.48069390654563904, "learning_rate": 0.0003598162696282767, "loss": 1.7938, "step": 39324 }, { "epoch": 1.31, "grad_norm": 0.5110443234443665, "learning_rate": 0.0003598060273465575, "loss": 1.7359, "step": 39325 }, { "epoch": 1.31, "grad_norm": 0.46740230917930603, "learning_rate": 0.0003597957849922427, "loss": 1.7215, "step": 39326 }, { "epoch": 1.31, "grad_norm": 0.4807531535625458, "learning_rate": 0.0003597855425653446, "loss": 1.7721, "step": 39327 }, { "epoch": 1.31, "grad_norm": 0.47183769941329956, "learning_rate": 0.00035977530006587563, "loss": 1.7841, "step": 39328 }, { "epoch": 1.31, "grad_norm": 0.47260621190071106, "learning_rate": 0.0003597650574938483, "loss": 1.7269, "step": 39329 }, { "epoch": 1.31, "grad_norm": 0.4931384027004242, "learning_rate": 0.000359754814849275, "loss": 1.8045, "step": 39330 }, { "epoch": 1.31, "grad_norm": 0.49453848600387573, "learning_rate": 0.00035974457213216814, "loss": 1.7938, "step": 39331 }, { "epoch": 1.31, "grad_norm": 0.49799874424934387, "learning_rate": 0.0003597343293425402, "loss": 1.7567, "step": 39332 }, { "epoch": 1.31, "grad_norm": 0.49978405237197876, "learning_rate": 0.0003597240864804035, "loss": 1.7845, "step": 39333 }, { "epoch": 1.31, "grad_norm": 0.4774165451526642, "learning_rate": 0.0003597138435457707, "loss": 1.7765, "step": 39334 }, { "epoch": 1.31, "grad_norm": 0.5049867630004883, "learning_rate": 0.00035970360053865397, "loss": 1.7269, "step": 39335 }, { "epoch": 1.31, "grad_norm": 0.48594537377357483, "learning_rate": 0.0003596933574590659, "loss": 1.7198, "step": 39336 }, { "epoch": 1.31, "grad_norm": 0.4983585476875305, "learning_rate": 0.00035968311430701884, "loss": 1.8064, "step": 39337 }, { "epoch": 1.31, "grad_norm": 0.4851062297821045, "learning_rate": 0.0003596728710825254, "loss": 1.7577, "step": 39338 }, { "epoch": 1.31, "grad_norm": 0.478963166475296, "learning_rate": 0.0003596626277855978, "loss": 1.7876, "step": 39339 }, { "epoch": 1.31, "grad_norm": 0.47459879517555237, "learning_rate": 0.0003596523844162486, "loss": 1.7727, "step": 39340 }, { "epoch": 1.31, "grad_norm": 0.4786423444747925, "learning_rate": 0.00035964214097449013, "loss": 1.7644, "step": 39341 }, { "epoch": 1.31, "grad_norm": 0.501126229763031, "learning_rate": 0.0003596318974603349, "loss": 1.8392, "step": 39342 }, { "epoch": 1.31, "grad_norm": 0.4922172427177429, "learning_rate": 0.00035962165387379546, "loss": 1.7967, "step": 39343 }, { "epoch": 1.31, "grad_norm": 0.48551833629608154, "learning_rate": 0.00035961141021488395, "loss": 1.7203, "step": 39344 }, { "epoch": 1.31, "grad_norm": 0.4851398169994354, "learning_rate": 0.00035960116648361306, "loss": 1.803, "step": 39345 }, { "epoch": 1.31, "grad_norm": 0.49549150466918945, "learning_rate": 0.00035959092267999514, "loss": 1.7139, "step": 39346 }, { "epoch": 1.31, "grad_norm": 0.5063704252243042, "learning_rate": 0.0003595806788040425, "loss": 1.7428, "step": 39347 }, { "epoch": 1.31, "grad_norm": 0.5008493065834045, "learning_rate": 0.00035957043485576785, "loss": 1.7897, "step": 39348 }, { "epoch": 1.31, "grad_norm": 0.5168536901473999, "learning_rate": 0.00035956019083518346, "loss": 1.7582, "step": 39349 }, { "epoch": 1.31, "grad_norm": 0.47931531071662903, "learning_rate": 0.0003595499467423017, "loss": 1.77, "step": 39350 }, { "epoch": 1.31, "grad_norm": 0.5043959617614746, "learning_rate": 0.00035953970257713506, "loss": 1.8765, "step": 39351 }, { "epoch": 1.31, "grad_norm": 0.49818623065948486, "learning_rate": 0.00035952945833969604, "loss": 1.7808, "step": 39352 }, { "epoch": 1.31, "grad_norm": 0.48165303468704224, "learning_rate": 0.0003595192140299971, "loss": 1.7842, "step": 39353 }, { "epoch": 1.31, "grad_norm": 0.46679776906967163, "learning_rate": 0.0003595089696480505, "loss": 1.7614, "step": 39354 }, { "epoch": 1.31, "grad_norm": 0.48783963918685913, "learning_rate": 0.0003594987251938688, "loss": 1.7197, "step": 39355 }, { "epoch": 1.31, "grad_norm": 0.5211734771728516, "learning_rate": 0.0003594884806674644, "loss": 1.7781, "step": 39356 }, { "epoch": 1.31, "grad_norm": 0.48397424817085266, "learning_rate": 0.0003594782360688498, "loss": 1.7422, "step": 39357 }, { "epoch": 1.31, "grad_norm": 0.48796188831329346, "learning_rate": 0.0003594679913980373, "loss": 1.7852, "step": 39358 }, { "epoch": 1.31, "grad_norm": 0.4861520826816559, "learning_rate": 0.0003594577466550396, "loss": 1.7236, "step": 39359 }, { "epoch": 1.31, "grad_norm": 0.5030325651168823, "learning_rate": 0.00035944750183986875, "loss": 1.8031, "step": 39360 }, { "epoch": 1.31, "grad_norm": 0.47825899720191956, "learning_rate": 0.00035943725695253753, "loss": 1.795, "step": 39361 }, { "epoch": 1.31, "grad_norm": 0.4841708838939667, "learning_rate": 0.00035942701199305813, "loss": 1.7498, "step": 39362 }, { "epoch": 1.31, "grad_norm": 0.4827522933483124, "learning_rate": 0.00035941676696144313, "loss": 1.7846, "step": 39363 }, { "epoch": 1.31, "grad_norm": 0.4797579348087311, "learning_rate": 0.00035940652185770505, "loss": 1.7014, "step": 39364 }, { "epoch": 1.31, "grad_norm": 0.476860374212265, "learning_rate": 0.00035939627668185603, "loss": 1.7601, "step": 39365 }, { "epoch": 1.31, "grad_norm": 0.48873546719551086, "learning_rate": 0.0003593860314339088, "loss": 1.7059, "step": 39366 }, { "epoch": 1.31, "grad_norm": 0.5060909390449524, "learning_rate": 0.0003593757861138756, "loss": 1.7472, "step": 39367 }, { "epoch": 1.31, "grad_norm": 0.49079349637031555, "learning_rate": 0.000359365540721769, "loss": 1.7704, "step": 39368 }, { "epoch": 1.31, "grad_norm": 0.4586341083049774, "learning_rate": 0.00035935529525760136, "loss": 1.7431, "step": 39369 }, { "epoch": 1.31, "grad_norm": 0.5104801058769226, "learning_rate": 0.0003593450497213851, "loss": 1.8501, "step": 39370 }, { "epoch": 1.31, "grad_norm": 0.4858350455760956, "learning_rate": 0.0003593348041131328, "loss": 1.7354, "step": 39371 }, { "epoch": 1.31, "grad_norm": 0.473466694355011, "learning_rate": 0.0003593245584328566, "loss": 1.7183, "step": 39372 }, { "epoch": 1.31, "grad_norm": 0.48673152923583984, "learning_rate": 0.0003593143126805694, "loss": 1.7931, "step": 39373 }, { "epoch": 1.31, "grad_norm": 0.4871397912502289, "learning_rate": 0.00035930406685628317, "loss": 1.7703, "step": 39374 }, { "epoch": 1.31, "grad_norm": 0.5120623111724854, "learning_rate": 0.00035929382096001053, "loss": 1.7298, "step": 39375 }, { "epoch": 1.31, "grad_norm": 0.5029584169387817, "learning_rate": 0.000359283574991764, "loss": 1.8114, "step": 39376 }, { "epoch": 1.31, "grad_norm": 0.46982696652412415, "learning_rate": 0.0003592733289515559, "loss": 1.771, "step": 39377 }, { "epoch": 1.31, "grad_norm": 0.47694337368011475, "learning_rate": 0.00035926308283939877, "loss": 1.6907, "step": 39378 }, { "epoch": 1.31, "grad_norm": 0.48210325837135315, "learning_rate": 0.00035925283665530494, "loss": 1.8107, "step": 39379 }, { "epoch": 1.31, "grad_norm": 0.4779212474822998, "learning_rate": 0.00035924259039928693, "loss": 1.7815, "step": 39380 }, { "epoch": 1.31, "grad_norm": 0.4825003743171692, "learning_rate": 0.00035923234407135715, "loss": 1.7796, "step": 39381 }, { "epoch": 1.31, "grad_norm": 0.5011613368988037, "learning_rate": 0.000359222097671528, "loss": 1.8128, "step": 39382 }, { "epoch": 1.31, "grad_norm": 0.4770016670227051, "learning_rate": 0.0003592118511998119, "loss": 1.8088, "step": 39383 }, { "epoch": 1.31, "grad_norm": 0.4762423038482666, "learning_rate": 0.00035920160465622143, "loss": 1.8016, "step": 39384 }, { "epoch": 1.31, "grad_norm": 0.4804098904132843, "learning_rate": 0.00035919135804076893, "loss": 1.6703, "step": 39385 }, { "epoch": 1.31, "grad_norm": 0.48601284623146057, "learning_rate": 0.00035918111135346684, "loss": 1.7338, "step": 39386 }, { "epoch": 1.31, "grad_norm": 0.48321646451950073, "learning_rate": 0.0003591708645943275, "loss": 1.7304, "step": 39387 }, { "epoch": 1.31, "grad_norm": 0.4816134572029114, "learning_rate": 0.0003591606177633635, "loss": 1.7497, "step": 39388 }, { "epoch": 1.31, "grad_norm": 0.4881841242313385, "learning_rate": 0.0003591503708605873, "loss": 1.7445, "step": 39389 }, { "epoch": 1.31, "grad_norm": 0.48680949211120605, "learning_rate": 0.0003591401238860112, "loss": 1.763, "step": 39390 }, { "epoch": 1.31, "grad_norm": 0.49498993158340454, "learning_rate": 0.00035912987683964773, "loss": 1.8237, "step": 39391 }, { "epoch": 1.31, "grad_norm": 0.47345542907714844, "learning_rate": 0.00035911962972150934, "loss": 1.7921, "step": 39392 }, { "epoch": 1.31, "grad_norm": 0.492566853761673, "learning_rate": 0.0003591093825316083, "loss": 1.788, "step": 39393 }, { "epoch": 1.31, "grad_norm": 0.49393975734710693, "learning_rate": 0.0003590991352699573, "loss": 1.7703, "step": 39394 }, { "epoch": 1.31, "grad_norm": 0.5295997262001038, "learning_rate": 0.0003590888879365686, "loss": 1.8608, "step": 39395 }, { "epoch": 1.31, "grad_norm": 0.5044535994529724, "learning_rate": 0.0003590786405314548, "loss": 1.8008, "step": 39396 }, { "epoch": 1.31, "grad_norm": 0.47770628333091736, "learning_rate": 0.000359068393054628, "loss": 1.778, "step": 39397 }, { "epoch": 1.31, "grad_norm": 0.48846209049224854, "learning_rate": 0.00035905814550610114, "loss": 1.8175, "step": 39398 }, { "epoch": 1.31, "grad_norm": 0.4900166690349579, "learning_rate": 0.0003590478978858862, "loss": 1.8331, "step": 39399 }, { "epoch": 1.31, "grad_norm": 0.4743555784225464, "learning_rate": 0.00035903765019399596, "loss": 1.7873, "step": 39400 }, { "epoch": 1.31, "grad_norm": 0.49404847621917725, "learning_rate": 0.0003590274024304427, "loss": 1.7579, "step": 39401 }, { "epoch": 1.31, "grad_norm": 0.4859263002872467, "learning_rate": 0.00035901715459523873, "loss": 1.7886, "step": 39402 }, { "epoch": 1.31, "grad_norm": 0.48556041717529297, "learning_rate": 0.0003590069066883967, "loss": 1.7974, "step": 39403 }, { "epoch": 1.31, "grad_norm": 0.4924192428588867, "learning_rate": 0.000358996658709929, "loss": 1.7737, "step": 39404 }, { "epoch": 1.31, "grad_norm": 0.49262797832489014, "learning_rate": 0.000358986410659848, "loss": 1.8129, "step": 39405 }, { "epoch": 1.31, "grad_norm": 0.48296383023262024, "learning_rate": 0.00035897616253816625, "loss": 1.8347, "step": 39406 }, { "epoch": 1.31, "grad_norm": 0.5011714100837708, "learning_rate": 0.0003589659143448961, "loss": 1.7755, "step": 39407 }, { "epoch": 1.31, "grad_norm": 0.4892856776714325, "learning_rate": 0.0003589556660800501, "loss": 1.712, "step": 39408 }, { "epoch": 1.31, "grad_norm": 0.48662906885147095, "learning_rate": 0.00035894541774364046, "loss": 1.8349, "step": 39409 }, { "epoch": 1.31, "grad_norm": 0.5005108118057251, "learning_rate": 0.00035893516933567987, "loss": 1.7711, "step": 39410 }, { "epoch": 1.31, "grad_norm": 0.4875173568725586, "learning_rate": 0.0003589249208561806, "loss": 1.7315, "step": 39411 }, { "epoch": 1.31, "grad_norm": 0.4986838102340698, "learning_rate": 0.00035891467230515523, "loss": 1.7947, "step": 39412 }, { "epoch": 1.31, "grad_norm": 0.48362281918525696, "learning_rate": 0.00035890442368261607, "loss": 1.7845, "step": 39413 }, { "epoch": 1.31, "grad_norm": 0.4796360731124878, "learning_rate": 0.00035889417498857567, "loss": 1.7091, "step": 39414 }, { "epoch": 1.31, "grad_norm": 0.48270830512046814, "learning_rate": 0.00035888392622304634, "loss": 1.8172, "step": 39415 }, { "epoch": 1.31, "grad_norm": 0.47108253836631775, "learning_rate": 0.0003588736773860407, "loss": 1.8106, "step": 39416 }, { "epoch": 1.31, "grad_norm": 0.47769200801849365, "learning_rate": 0.00035886342847757103, "loss": 1.7005, "step": 39417 }, { "epoch": 1.31, "grad_norm": 0.4776286482810974, "learning_rate": 0.00035885317949764983, "loss": 1.7845, "step": 39418 }, { "epoch": 1.31, "grad_norm": 0.488540917634964, "learning_rate": 0.00035884293044628957, "loss": 1.7396, "step": 39419 }, { "epoch": 1.31, "grad_norm": 0.48944413661956787, "learning_rate": 0.00035883268132350267, "loss": 1.7637, "step": 39420 }, { "epoch": 1.31, "grad_norm": 0.5100148916244507, "learning_rate": 0.0003588224321293016, "loss": 1.7898, "step": 39421 }, { "epoch": 1.31, "grad_norm": 0.48948541283607483, "learning_rate": 0.0003588121828636986, "loss": 1.8301, "step": 39422 }, { "epoch": 1.31, "grad_norm": 0.47987574338912964, "learning_rate": 0.0003588019335267064, "loss": 1.708, "step": 39423 }, { "epoch": 1.31, "grad_norm": 0.4889591336250305, "learning_rate": 0.00035879168411833735, "loss": 1.829, "step": 39424 }, { "epoch": 1.31, "grad_norm": 0.4905947148799896, "learning_rate": 0.00035878143463860383, "loss": 1.787, "step": 39425 }, { "epoch": 1.31, "grad_norm": 0.4956822395324707, "learning_rate": 0.00035877118508751825, "loss": 1.7699, "step": 39426 }, { "epoch": 1.31, "grad_norm": 0.501781165599823, "learning_rate": 0.00035876093546509315, "loss": 1.7717, "step": 39427 }, { "epoch": 1.31, "grad_norm": 0.48308899998664856, "learning_rate": 0.00035875068577134094, "loss": 1.7772, "step": 39428 }, { "epoch": 1.31, "grad_norm": 0.4715114235877991, "learning_rate": 0.00035874043600627397, "loss": 1.7655, "step": 39429 }, { "epoch": 1.31, "grad_norm": 0.47878706455230713, "learning_rate": 0.00035873018616990493, "loss": 1.7003, "step": 39430 }, { "epoch": 1.31, "grad_norm": 0.4849451780319214, "learning_rate": 0.000358719936262246, "loss": 1.8097, "step": 39431 }, { "epoch": 1.31, "grad_norm": 0.4804971218109131, "learning_rate": 0.00035870968628330967, "loss": 1.7625, "step": 39432 }, { "epoch": 1.31, "grad_norm": 0.4887714385986328, "learning_rate": 0.0003586994362331086, "loss": 1.7384, "step": 39433 }, { "epoch": 1.31, "grad_norm": 0.49219024181365967, "learning_rate": 0.0003586891861116548, "loss": 1.7026, "step": 39434 }, { "epoch": 1.31, "grad_norm": 0.5253268480300903, "learning_rate": 0.0003586789359189612, "loss": 1.794, "step": 39435 }, { "epoch": 1.31, "grad_norm": 0.5201252698898315, "learning_rate": 0.00035866868565504, "loss": 1.7182, "step": 39436 }, { "epoch": 1.31, "grad_norm": 0.4994748532772064, "learning_rate": 0.00035865843531990353, "loss": 1.8087, "step": 39437 }, { "epoch": 1.31, "grad_norm": 0.48870131373405457, "learning_rate": 0.0003586481849135644, "loss": 1.7681, "step": 39438 }, { "epoch": 1.31, "grad_norm": 0.5006079077720642, "learning_rate": 0.0003586379344360351, "loss": 1.8382, "step": 39439 }, { "epoch": 1.31, "grad_norm": 0.5165679454803467, "learning_rate": 0.000358627683887328, "loss": 1.7396, "step": 39440 }, { "epoch": 1.31, "grad_norm": 0.5172454118728638, "learning_rate": 0.0003586174332674555, "loss": 1.8273, "step": 39441 }, { "epoch": 1.31, "grad_norm": 0.4875911772251129, "learning_rate": 0.00035860718257643, "loss": 1.7326, "step": 39442 }, { "epoch": 1.31, "grad_norm": 0.4927050769329071, "learning_rate": 0.000358596931814264, "loss": 1.7725, "step": 39443 }, { "epoch": 1.31, "grad_norm": 0.47343385219573975, "learning_rate": 0.00035858668098097006, "loss": 1.7717, "step": 39444 }, { "epoch": 1.31, "grad_norm": 0.4944591224193573, "learning_rate": 0.00035857643007656046, "loss": 1.7357, "step": 39445 }, { "epoch": 1.31, "grad_norm": 0.5066114664077759, "learning_rate": 0.00035856617910104774, "loss": 1.802, "step": 39446 }, { "epoch": 1.31, "grad_norm": 0.5054410696029663, "learning_rate": 0.0003585559280544443, "loss": 1.7543, "step": 39447 }, { "epoch": 1.31, "grad_norm": 0.4962875545024872, "learning_rate": 0.0003585456769367626, "loss": 1.7732, "step": 39448 }, { "epoch": 1.31, "grad_norm": 0.49373659491539, "learning_rate": 0.000358535425748015, "loss": 1.7958, "step": 39449 }, { "epoch": 1.31, "grad_norm": 0.49720150232315063, "learning_rate": 0.0003585251744882141, "loss": 1.8446, "step": 39450 }, { "epoch": 1.31, "grad_norm": 0.48193374276161194, "learning_rate": 0.0003585149231573723, "loss": 1.7297, "step": 39451 }, { "epoch": 1.31, "grad_norm": 0.49920517206192017, "learning_rate": 0.00035850467175550186, "loss": 1.7139, "step": 39452 }, { "epoch": 1.31, "grad_norm": 0.49224114418029785, "learning_rate": 0.00035849442028261546, "loss": 1.7766, "step": 39453 }, { "epoch": 1.31, "grad_norm": 0.47895556688308716, "learning_rate": 0.00035848416873872547, "loss": 1.744, "step": 39454 }, { "epoch": 1.31, "grad_norm": 0.4895297884941101, "learning_rate": 0.00035847391712384426, "loss": 1.7551, "step": 39455 }, { "epoch": 1.31, "grad_norm": 0.4867013394832611, "learning_rate": 0.0003584636654379844, "loss": 1.8018, "step": 39456 }, { "epoch": 1.31, "grad_norm": 0.5046601891517639, "learning_rate": 0.00035845341368115813, "loss": 1.8328, "step": 39457 }, { "epoch": 1.31, "grad_norm": 0.47544991970062256, "learning_rate": 0.00035844316185337826, "loss": 1.8231, "step": 39458 }, { "epoch": 1.31, "grad_norm": 0.4920969307422638, "learning_rate": 0.00035843290995465676, "loss": 1.777, "step": 39459 }, { "epoch": 1.31, "grad_norm": 0.5104600787162781, "learning_rate": 0.00035842265798500644, "loss": 1.6861, "step": 39460 }, { "epoch": 1.31, "grad_norm": 0.49153658747673035, "learning_rate": 0.0003584124059444396, "loss": 1.8144, "step": 39461 }, { "epoch": 1.31, "grad_norm": 0.5032981634140015, "learning_rate": 0.00035840215383296873, "loss": 1.7553, "step": 39462 }, { "epoch": 1.31, "grad_norm": 0.48609432578086853, "learning_rate": 0.00035839190165060623, "loss": 1.7728, "step": 39463 }, { "epoch": 1.31, "grad_norm": 0.5113218426704407, "learning_rate": 0.0003583816493973645, "loss": 1.807, "step": 39464 }, { "epoch": 1.31, "grad_norm": 0.4720686972141266, "learning_rate": 0.00035837139707325623, "loss": 1.7192, "step": 39465 }, { "epoch": 1.31, "grad_norm": 0.4903343617916107, "learning_rate": 0.00035836114467829356, "loss": 1.8042, "step": 39466 }, { "epoch": 1.31, "grad_norm": 0.545996367931366, "learning_rate": 0.000358350892212489, "loss": 1.7944, "step": 39467 }, { "epoch": 1.31, "grad_norm": 0.49959424138069153, "learning_rate": 0.00035834063967585516, "loss": 1.753, "step": 39468 }, { "epoch": 1.31, "grad_norm": 0.5014094114303589, "learning_rate": 0.00035833038706840426, "loss": 1.7701, "step": 39469 }, { "epoch": 1.31, "grad_norm": 0.4662545621395111, "learning_rate": 0.000358320134390149, "loss": 1.7435, "step": 39470 }, { "epoch": 1.31, "grad_norm": 0.4784232974052429, "learning_rate": 0.00035830988164110166, "loss": 1.7763, "step": 39471 }, { "epoch": 1.31, "grad_norm": 0.47729310393333435, "learning_rate": 0.00035829962882127475, "loss": 1.7801, "step": 39472 }, { "epoch": 1.31, "grad_norm": 0.4919739067554474, "learning_rate": 0.0003582893759306806, "loss": 1.8726, "step": 39473 }, { "epoch": 1.31, "grad_norm": 0.49083423614501953, "learning_rate": 0.00035827912296933174, "loss": 1.7588, "step": 39474 }, { "epoch": 1.31, "grad_norm": 0.5107602477073669, "learning_rate": 0.0003582688699372406, "loss": 1.7937, "step": 39475 }, { "epoch": 1.31, "grad_norm": 0.48612144589424133, "learning_rate": 0.0003582586168344197, "loss": 1.8063, "step": 39476 }, { "epoch": 1.31, "grad_norm": 0.4704638123512268, "learning_rate": 0.00035824836366088133, "loss": 1.7932, "step": 39477 }, { "epoch": 1.31, "grad_norm": 0.4886325001716614, "learning_rate": 0.00035823811041663816, "loss": 1.8021, "step": 39478 }, { "epoch": 1.31, "grad_norm": 0.5066229104995728, "learning_rate": 0.0003582278571017024, "loss": 1.7963, "step": 39479 }, { "epoch": 1.31, "grad_norm": 0.5017321109771729, "learning_rate": 0.00035821760371608665, "loss": 1.778, "step": 39480 }, { "epoch": 1.31, "grad_norm": 0.4634789526462555, "learning_rate": 0.0003582073502598033, "loss": 1.7685, "step": 39481 }, { "epoch": 1.31, "grad_norm": 0.4902452826499939, "learning_rate": 0.0003581970967328647, "loss": 1.781, "step": 39482 }, { "epoch": 1.31, "grad_norm": 0.4909796714782715, "learning_rate": 0.00035818684313528357, "loss": 1.6351, "step": 39483 }, { "epoch": 1.31, "grad_norm": 0.46700823307037354, "learning_rate": 0.0003581765894670721, "loss": 1.7888, "step": 39484 }, { "epoch": 1.31, "grad_norm": 0.5039543509483337, "learning_rate": 0.0003581663357282429, "loss": 1.811, "step": 39485 }, { "epoch": 1.31, "grad_norm": 0.4878889322280884, "learning_rate": 0.0003581560819188082, "loss": 1.7281, "step": 39486 }, { "epoch": 1.31, "grad_norm": 0.47348058223724365, "learning_rate": 0.00035814582803878064, "loss": 1.8067, "step": 39487 }, { "epoch": 1.31, "grad_norm": 0.4883352518081665, "learning_rate": 0.0003581355740881727, "loss": 1.892, "step": 39488 }, { "epoch": 1.31, "grad_norm": 0.49878233671188354, "learning_rate": 0.00035812532006699666, "loss": 1.6852, "step": 39489 }, { "epoch": 1.31, "grad_norm": 0.4967115521430969, "learning_rate": 0.00035811506597526507, "loss": 1.8061, "step": 39490 }, { "epoch": 1.31, "grad_norm": 0.48089006543159485, "learning_rate": 0.0003581048118129903, "loss": 1.7977, "step": 39491 }, { "epoch": 1.31, "grad_norm": 0.4925289452075958, "learning_rate": 0.00035809455758018487, "loss": 1.7327, "step": 39492 }, { "epoch": 1.31, "grad_norm": 0.49825671315193176, "learning_rate": 0.0003580843032768612, "loss": 1.7473, "step": 39493 }, { "epoch": 1.31, "grad_norm": 0.4801725745201111, "learning_rate": 0.0003580740489030318, "loss": 1.7625, "step": 39494 }, { "epoch": 1.31, "grad_norm": 0.4998306334018707, "learning_rate": 0.0003580637944587091, "loss": 1.8191, "step": 39495 }, { "epoch": 1.31, "grad_norm": 0.4868369996547699, "learning_rate": 0.0003580535399439054, "loss": 1.7188, "step": 39496 }, { "epoch": 1.31, "grad_norm": 0.6043300628662109, "learning_rate": 0.0003580432853586333, "loss": 1.8513, "step": 39497 }, { "epoch": 1.31, "grad_norm": 0.49965792894363403, "learning_rate": 0.00035803303070290523, "loss": 1.6906, "step": 39498 }, { "epoch": 1.31, "grad_norm": 0.4960753917694092, "learning_rate": 0.00035802277597673355, "loss": 1.7978, "step": 39499 }, { "epoch": 1.31, "grad_norm": 0.5020599365234375, "learning_rate": 0.0003580125211801308, "loss": 1.8037, "step": 39500 }, { "epoch": 1.31, "grad_norm": 0.49388885498046875, "learning_rate": 0.00035800226631310943, "loss": 1.7258, "step": 39501 }, { "epoch": 1.31, "grad_norm": 0.487293004989624, "learning_rate": 0.00035799201137568177, "loss": 1.7936, "step": 39502 }, { "epoch": 1.31, "grad_norm": 0.4943343698978424, "learning_rate": 0.0003579817563678604, "loss": 1.7497, "step": 39503 }, { "epoch": 1.31, "grad_norm": 0.48424792289733887, "learning_rate": 0.0003579715012896577, "loss": 1.704, "step": 39504 }, { "epoch": 1.31, "grad_norm": 0.4770723283290863, "learning_rate": 0.00035796124614108615, "loss": 1.7672, "step": 39505 }, { "epoch": 1.31, "grad_norm": 0.5060364007949829, "learning_rate": 0.00035795099092215824, "loss": 1.7038, "step": 39506 }, { "epoch": 1.31, "grad_norm": 0.4936622679233551, "learning_rate": 0.0003579407356328863, "loss": 1.747, "step": 39507 }, { "epoch": 1.31, "grad_norm": 0.4987148940563202, "learning_rate": 0.0003579304802732829, "loss": 1.7649, "step": 39508 }, { "epoch": 1.31, "grad_norm": 0.5092481970787048, "learning_rate": 0.00035792022484336047, "loss": 1.8387, "step": 39509 }, { "epoch": 1.31, "grad_norm": 0.4893144965171814, "learning_rate": 0.00035790996934313127, "loss": 1.7219, "step": 39510 }, { "epoch": 1.31, "grad_norm": 1.23841392993927, "learning_rate": 0.000357899713772608, "loss": 1.9121, "step": 39511 }, { "epoch": 1.31, "grad_norm": 0.49008095264434814, "learning_rate": 0.000357889458131803, "loss": 1.7747, "step": 39512 }, { "epoch": 1.31, "grad_norm": 0.49166110157966614, "learning_rate": 0.00035787920242072875, "loss": 1.7948, "step": 39513 }, { "epoch": 1.31, "grad_norm": 0.48471295833587646, "learning_rate": 0.0003578689466393976, "loss": 1.8573, "step": 39514 }, { "epoch": 1.31, "grad_norm": 0.4997706711292267, "learning_rate": 0.0003578586907878222, "loss": 1.7151, "step": 39515 }, { "epoch": 1.31, "grad_norm": 0.47255775332450867, "learning_rate": 0.0003578484348660148, "loss": 1.7856, "step": 39516 }, { "epoch": 1.31, "grad_norm": 0.4894838035106659, "learning_rate": 0.00035783817887398786, "loss": 1.7317, "step": 39517 }, { "epoch": 1.31, "grad_norm": 0.49744054675102234, "learning_rate": 0.000357827922811754, "loss": 1.8285, "step": 39518 }, { "epoch": 1.31, "grad_norm": 0.5050601363182068, "learning_rate": 0.0003578176666793255, "loss": 1.7698, "step": 39519 }, { "epoch": 1.31, "grad_norm": 0.4985429644584656, "learning_rate": 0.0003578074104767149, "loss": 1.8063, "step": 39520 }, { "epoch": 1.31, "grad_norm": 0.48236018419265747, "learning_rate": 0.00035779715420393454, "loss": 1.7932, "step": 39521 }, { "epoch": 1.31, "grad_norm": 0.5179370045661926, "learning_rate": 0.00035778689786099705, "loss": 1.8595, "step": 39522 }, { "epoch": 1.31, "grad_norm": 0.5073457956314087, "learning_rate": 0.0003577766414479148, "loss": 1.786, "step": 39523 }, { "epoch": 1.31, "grad_norm": 0.4914315938949585, "learning_rate": 0.0003577663849647002, "loss": 1.7907, "step": 39524 }, { "epoch": 1.32, "grad_norm": 0.4687650501728058, "learning_rate": 0.00035775612841136567, "loss": 1.6937, "step": 39525 }, { "epoch": 1.32, "grad_norm": 0.4899631142616272, "learning_rate": 0.00035774587178792367, "loss": 1.8062, "step": 39526 }, { "epoch": 1.32, "grad_norm": 0.48653456568717957, "learning_rate": 0.0003577356150943868, "loss": 1.7582, "step": 39527 }, { "epoch": 1.32, "grad_norm": 0.48116740584373474, "learning_rate": 0.0003577253583307674, "loss": 1.8184, "step": 39528 }, { "epoch": 1.32, "grad_norm": 0.4875794053077698, "learning_rate": 0.00035771510149707776, "loss": 1.751, "step": 39529 }, { "epoch": 1.32, "grad_norm": 0.48979851603507996, "learning_rate": 0.00035770484459333063, "loss": 1.7421, "step": 39530 }, { "epoch": 1.32, "grad_norm": 0.4844658374786377, "learning_rate": 0.00035769458761953824, "loss": 1.7559, "step": 39531 }, { "epoch": 1.32, "grad_norm": 0.47742336988449097, "learning_rate": 0.0003576843305757132, "loss": 1.8011, "step": 39532 }, { "epoch": 1.32, "grad_norm": 0.49400755763053894, "learning_rate": 0.0003576740734618679, "loss": 1.7147, "step": 39533 }, { "epoch": 1.32, "grad_norm": 0.47855404019355774, "learning_rate": 0.0003576638162780147, "loss": 1.7189, "step": 39534 }, { "epoch": 1.32, "grad_norm": 0.500214159488678, "learning_rate": 0.0003576535590241662, "loss": 1.7804, "step": 39535 }, { "epoch": 1.32, "grad_norm": 0.48016357421875, "learning_rate": 0.0003576433017003347, "loss": 1.7938, "step": 39536 }, { "epoch": 1.32, "grad_norm": 0.48195284605026245, "learning_rate": 0.0003576330443065327, "loss": 1.7838, "step": 39537 }, { "epoch": 1.32, "grad_norm": 0.5341818332672119, "learning_rate": 0.00035762278684277283, "loss": 1.7963, "step": 39538 }, { "epoch": 1.32, "grad_norm": 0.501092255115509, "learning_rate": 0.0003576125293090672, "loss": 1.8052, "step": 39539 }, { "epoch": 1.32, "grad_norm": 0.4760724604129791, "learning_rate": 0.0003576022717054286, "loss": 1.8046, "step": 39540 }, { "epoch": 1.32, "grad_norm": 0.487261027097702, "learning_rate": 0.00035759201403186925, "loss": 1.7764, "step": 39541 }, { "epoch": 1.32, "grad_norm": 0.4922657907009125, "learning_rate": 0.0003575817562884017, "loss": 1.7678, "step": 39542 }, { "epoch": 1.32, "grad_norm": 0.48201680183410645, "learning_rate": 0.0003575714984750384, "loss": 1.7056, "step": 39543 }, { "epoch": 1.32, "grad_norm": 0.49212053418159485, "learning_rate": 0.0003575612405917917, "loss": 1.8441, "step": 39544 }, { "epoch": 1.32, "grad_norm": 0.47542324662208557, "learning_rate": 0.00035755098263867425, "loss": 1.8389, "step": 39545 }, { "epoch": 1.32, "grad_norm": 0.5036894083023071, "learning_rate": 0.0003575407246156983, "loss": 1.7442, "step": 39546 }, { "epoch": 1.32, "grad_norm": 0.4892978370189667, "learning_rate": 0.00035753046652287643, "loss": 1.8429, "step": 39547 }, { "epoch": 1.32, "grad_norm": 0.47346988320350647, "learning_rate": 0.00035752020836022106, "loss": 1.8606, "step": 39548 }, { "epoch": 1.32, "grad_norm": 0.47864770889282227, "learning_rate": 0.0003575099501277446, "loss": 1.7384, "step": 39549 }, { "epoch": 1.32, "grad_norm": 0.4651350677013397, "learning_rate": 0.0003574996918254596, "loss": 1.7805, "step": 39550 }, { "epoch": 1.32, "grad_norm": 0.48415300250053406, "learning_rate": 0.0003574894334533783, "loss": 1.8018, "step": 39551 }, { "epoch": 1.32, "grad_norm": 0.4828057587146759, "learning_rate": 0.00035747917501151343, "loss": 1.6875, "step": 39552 }, { "epoch": 1.32, "grad_norm": 0.4884103536605835, "learning_rate": 0.0003574689164998773, "loss": 1.7513, "step": 39553 }, { "epoch": 1.32, "grad_norm": 0.4958534240722656, "learning_rate": 0.00035745865791848234, "loss": 1.7757, "step": 39554 }, { "epoch": 1.32, "grad_norm": 0.4702068865299225, "learning_rate": 0.000357448399267341, "loss": 1.8567, "step": 39555 }, { "epoch": 1.32, "grad_norm": 0.497880220413208, "learning_rate": 0.0003574381405464658, "loss": 1.7351, "step": 39556 }, { "epoch": 1.32, "grad_norm": 0.47788819670677185, "learning_rate": 0.0003574278817558692, "loss": 1.7928, "step": 39557 }, { "epoch": 1.32, "grad_norm": 0.5258093476295471, "learning_rate": 0.0003574176228955636, "loss": 1.7184, "step": 39558 }, { "epoch": 1.32, "grad_norm": 0.4713437557220459, "learning_rate": 0.0003574073639655614, "loss": 1.8535, "step": 39559 }, { "epoch": 1.32, "grad_norm": 0.5095174908638, "learning_rate": 0.0003573971049658752, "loss": 1.8024, "step": 39560 }, { "epoch": 1.32, "grad_norm": 0.4953511655330658, "learning_rate": 0.00035738684589651734, "loss": 1.7768, "step": 39561 }, { "epoch": 1.32, "grad_norm": 0.5065189003944397, "learning_rate": 0.0003573765867575003, "loss": 1.7768, "step": 39562 }, { "epoch": 1.32, "grad_norm": 0.5055619478225708, "learning_rate": 0.0003573663275488366, "loss": 1.8166, "step": 39563 }, { "epoch": 1.32, "grad_norm": 0.48521432280540466, "learning_rate": 0.00035735606827053855, "loss": 1.7974, "step": 39564 }, { "epoch": 1.32, "grad_norm": 0.5145125389099121, "learning_rate": 0.0003573458089226187, "loss": 1.7946, "step": 39565 }, { "epoch": 1.32, "grad_norm": 0.5009522438049316, "learning_rate": 0.00035733554950508945, "loss": 1.8515, "step": 39566 }, { "epoch": 1.32, "grad_norm": 0.4752334654331207, "learning_rate": 0.00035732529001796334, "loss": 1.7783, "step": 39567 }, { "epoch": 1.32, "grad_norm": 0.4921268820762634, "learning_rate": 0.00035731503046125285, "loss": 1.741, "step": 39568 }, { "epoch": 1.32, "grad_norm": 0.47461897134780884, "learning_rate": 0.0003573047708349702, "loss": 1.7229, "step": 39569 }, { "epoch": 1.32, "grad_norm": 0.4747863709926605, "learning_rate": 0.0003572945111391281, "loss": 1.7257, "step": 39570 }, { "epoch": 1.32, "grad_norm": 0.49078819155693054, "learning_rate": 0.0003572842513737389, "loss": 1.795, "step": 39571 }, { "epoch": 1.32, "grad_norm": 0.5016614198684692, "learning_rate": 0.00035727399153881505, "loss": 1.7815, "step": 39572 }, { "epoch": 1.32, "grad_norm": 0.49984532594680786, "learning_rate": 0.00035726373163436905, "loss": 1.8012, "step": 39573 }, { "epoch": 1.32, "grad_norm": 0.5071837306022644, "learning_rate": 0.00035725347166041323, "loss": 1.7671, "step": 39574 }, { "epoch": 1.32, "grad_norm": 0.46927696466445923, "learning_rate": 0.00035724321161696016, "loss": 1.7483, "step": 39575 }, { "epoch": 1.32, "grad_norm": 0.47689780592918396, "learning_rate": 0.00035723295150402224, "loss": 1.7875, "step": 39576 }, { "epoch": 1.32, "grad_norm": 0.5004751682281494, "learning_rate": 0.000357222691321612, "loss": 1.8198, "step": 39577 }, { "epoch": 1.32, "grad_norm": 0.49557676911354065, "learning_rate": 0.00035721243106974184, "loss": 1.8218, "step": 39578 }, { "epoch": 1.32, "grad_norm": 0.48460525274276733, "learning_rate": 0.00035720217074842423, "loss": 1.7454, "step": 39579 }, { "epoch": 1.32, "grad_norm": 0.484485000371933, "learning_rate": 0.00035719191035767165, "loss": 1.7084, "step": 39580 }, { "epoch": 1.32, "grad_norm": 0.6075955033302307, "learning_rate": 0.0003571816498974964, "loss": 1.713, "step": 39581 }, { "epoch": 1.32, "grad_norm": 0.47229722142219543, "learning_rate": 0.00035717138936791116, "loss": 1.692, "step": 39582 }, { "epoch": 1.32, "grad_norm": 0.48268768191337585, "learning_rate": 0.00035716112876892824, "loss": 1.7712, "step": 39583 }, { "epoch": 1.32, "grad_norm": 0.6178752183914185, "learning_rate": 0.0003571508681005601, "loss": 1.7944, "step": 39584 }, { "epoch": 1.32, "grad_norm": 0.4824405014514923, "learning_rate": 0.00035714060736281926, "loss": 1.7005, "step": 39585 }, { "epoch": 1.32, "grad_norm": 0.48275673389434814, "learning_rate": 0.00035713034655571814, "loss": 1.7536, "step": 39586 }, { "epoch": 1.32, "grad_norm": 0.4911079406738281, "learning_rate": 0.0003571200856792692, "loss": 1.7196, "step": 39587 }, { "epoch": 1.32, "grad_norm": 0.4900970160961151, "learning_rate": 0.0003571098247334849, "loss": 1.794, "step": 39588 }, { "epoch": 1.32, "grad_norm": 0.47784462571144104, "learning_rate": 0.00035709956371837763, "loss": 1.7303, "step": 39589 }, { "epoch": 1.32, "grad_norm": 0.47067686915397644, "learning_rate": 0.00035708930263396, "loss": 1.6973, "step": 39590 }, { "epoch": 1.32, "grad_norm": 0.4726482033729553, "learning_rate": 0.00035707904148024426, "loss": 1.8186, "step": 39591 }, { "epoch": 1.32, "grad_norm": 0.4880999028682709, "learning_rate": 0.00035706878025724303, "loss": 1.7829, "step": 39592 }, { "epoch": 1.32, "grad_norm": 0.47549086809158325, "learning_rate": 0.0003570585189649687, "loss": 1.7254, "step": 39593 }, { "epoch": 1.32, "grad_norm": 0.48379817605018616, "learning_rate": 0.0003570482576034337, "loss": 1.8145, "step": 39594 }, { "epoch": 1.32, "grad_norm": 0.4794691205024719, "learning_rate": 0.00035703799617265065, "loss": 1.7482, "step": 39595 }, { "epoch": 1.32, "grad_norm": 0.4991227090358734, "learning_rate": 0.00035702773467263175, "loss": 1.7487, "step": 39596 }, { "epoch": 1.32, "grad_norm": 0.4904347360134125, "learning_rate": 0.0003570174731033896, "loss": 1.7972, "step": 39597 }, { "epoch": 1.32, "grad_norm": 0.5153102278709412, "learning_rate": 0.00035700721146493677, "loss": 1.8048, "step": 39598 }, { "epoch": 1.32, "grad_norm": 0.4829828441143036, "learning_rate": 0.00035699694975728535, "loss": 1.7247, "step": 39599 }, { "epoch": 1.32, "grad_norm": 0.4738515615463257, "learning_rate": 0.0003569866879804482, "loss": 1.7629, "step": 39600 }, { "epoch": 1.32, "grad_norm": 0.4957737624645233, "learning_rate": 0.00035697642613443753, "loss": 1.8341, "step": 39601 }, { "epoch": 1.32, "grad_norm": 0.4949681758880615, "learning_rate": 0.0003569661642192659, "loss": 1.817, "step": 39602 }, { "epoch": 1.32, "grad_norm": 0.48932692408561707, "learning_rate": 0.00035695590223494577, "loss": 1.8187, "step": 39603 }, { "epoch": 1.32, "grad_norm": 0.4832468032836914, "learning_rate": 0.00035694564018148953, "loss": 1.7279, "step": 39604 }, { "epoch": 1.32, "grad_norm": 0.49763935804367065, "learning_rate": 0.00035693537805890976, "loss": 1.8112, "step": 39605 }, { "epoch": 1.32, "grad_norm": 0.505422055721283, "learning_rate": 0.00035692511586721867, "loss": 1.7565, "step": 39606 }, { "epoch": 1.32, "grad_norm": 0.4882332682609558, "learning_rate": 0.000356914853606429, "loss": 1.7921, "step": 39607 }, { "epoch": 1.32, "grad_norm": 0.5008861422538757, "learning_rate": 0.00035690459127655307, "loss": 1.7595, "step": 39608 }, { "epoch": 1.32, "grad_norm": 0.48777005076408386, "learning_rate": 0.00035689432887760333, "loss": 1.8024, "step": 39609 }, { "epoch": 1.32, "grad_norm": 0.4936627149581909, "learning_rate": 0.0003568840664095922, "loss": 1.8006, "step": 39610 }, { "epoch": 1.32, "grad_norm": 0.48367059230804443, "learning_rate": 0.00035687380387253224, "loss": 1.8341, "step": 39611 }, { "epoch": 1.32, "grad_norm": 0.49605321884155273, "learning_rate": 0.00035686354126643593, "loss": 1.7567, "step": 39612 }, { "epoch": 1.32, "grad_norm": 0.49985387921333313, "learning_rate": 0.0003568532785913156, "loss": 1.7221, "step": 39613 }, { "epoch": 1.32, "grad_norm": 0.4860847592353821, "learning_rate": 0.0003568430158471838, "loss": 1.7487, "step": 39614 }, { "epoch": 1.32, "grad_norm": 0.5012496709823608, "learning_rate": 0.0003568327530340529, "loss": 1.7758, "step": 39615 }, { "epoch": 1.32, "grad_norm": 0.49531683325767517, "learning_rate": 0.00035682249015193544, "loss": 1.7574, "step": 39616 }, { "epoch": 1.32, "grad_norm": 0.488520085811615, "learning_rate": 0.0003568122272008438, "loss": 1.7382, "step": 39617 }, { "epoch": 1.32, "grad_norm": 0.5124847292900085, "learning_rate": 0.0003568019641807906, "loss": 1.761, "step": 39618 }, { "epoch": 1.32, "grad_norm": 0.4968496561050415, "learning_rate": 0.00035679170109178816, "loss": 1.6807, "step": 39619 }, { "epoch": 1.32, "grad_norm": 0.5182173252105713, "learning_rate": 0.0003567814379338488, "loss": 1.8067, "step": 39620 }, { "epoch": 1.32, "grad_norm": 0.4855177402496338, "learning_rate": 0.00035677117470698535, "loss": 1.736, "step": 39621 }, { "epoch": 1.32, "grad_norm": 0.4946122169494629, "learning_rate": 0.0003567609114112099, "loss": 1.8353, "step": 39622 }, { "epoch": 1.32, "grad_norm": 0.49831971526145935, "learning_rate": 0.0003567506480465352, "loss": 1.8202, "step": 39623 }, { "epoch": 1.32, "grad_norm": 0.5033970475196838, "learning_rate": 0.00035674038461297344, "loss": 1.8139, "step": 39624 }, { "epoch": 1.32, "grad_norm": 0.4758993983268738, "learning_rate": 0.0003567301211105373, "loss": 1.6994, "step": 39625 }, { "epoch": 1.32, "grad_norm": 0.47939416766166687, "learning_rate": 0.0003567198575392392, "loss": 1.7825, "step": 39626 }, { "epoch": 1.32, "grad_norm": 0.4984055161476135, "learning_rate": 0.0003567095938990914, "loss": 1.7352, "step": 39627 }, { "epoch": 1.32, "grad_norm": 0.5038871765136719, "learning_rate": 0.00035669933019010663, "loss": 1.7842, "step": 39628 }, { "epoch": 1.32, "grad_norm": 0.5053048133850098, "learning_rate": 0.0003566890664122972, "loss": 1.8673, "step": 39629 }, { "epoch": 1.32, "grad_norm": 0.4890492558479309, "learning_rate": 0.0003566788025656756, "loss": 1.7405, "step": 39630 }, { "epoch": 1.32, "grad_norm": 0.49096357822418213, "learning_rate": 0.00035666853865025426, "loss": 1.7781, "step": 39631 }, { "epoch": 1.32, "grad_norm": 0.49404793977737427, "learning_rate": 0.0003566582746660457, "loss": 1.812, "step": 39632 }, { "epoch": 1.32, "grad_norm": 0.47941339015960693, "learning_rate": 0.00035664801061306234, "loss": 1.6751, "step": 39633 }, { "epoch": 1.32, "grad_norm": 0.49116700887680054, "learning_rate": 0.0003566377464913166, "loss": 1.801, "step": 39634 }, { "epoch": 1.32, "grad_norm": 0.4970014691352844, "learning_rate": 0.00035662748230082107, "loss": 1.8019, "step": 39635 }, { "epoch": 1.32, "grad_norm": 0.511405885219574, "learning_rate": 0.000356617218041588, "loss": 1.84, "step": 39636 }, { "epoch": 1.32, "grad_norm": 0.48555663228034973, "learning_rate": 0.00035660695371363, "loss": 1.755, "step": 39637 }, { "epoch": 1.32, "grad_norm": 0.4736153483390808, "learning_rate": 0.0003565966893169595, "loss": 1.7959, "step": 39638 }, { "epoch": 1.32, "grad_norm": 0.5088304281234741, "learning_rate": 0.000356586424851589, "loss": 1.7058, "step": 39639 }, { "epoch": 1.32, "grad_norm": 0.5126621723175049, "learning_rate": 0.0003565761603175309, "loss": 1.7222, "step": 39640 }, { "epoch": 1.32, "grad_norm": 0.4719163477420807, "learning_rate": 0.0003565658957147976, "loss": 1.7844, "step": 39641 }, { "epoch": 1.32, "grad_norm": 0.4943092167377472, "learning_rate": 0.0003565556310434018, "loss": 1.7752, "step": 39642 }, { "epoch": 1.32, "grad_norm": 0.5023605823516846, "learning_rate": 0.0003565453663033557, "loss": 1.8362, "step": 39643 }, { "epoch": 1.32, "grad_norm": 0.49910059571266174, "learning_rate": 0.0003565351014946718, "loss": 1.745, "step": 39644 }, { "epoch": 1.32, "grad_norm": 0.520889401435852, "learning_rate": 0.0003565248366173627, "loss": 1.7985, "step": 39645 }, { "epoch": 1.32, "grad_norm": 0.4747411608695984, "learning_rate": 0.0003565145716714408, "loss": 1.8212, "step": 39646 }, { "epoch": 1.32, "grad_norm": 0.4844606816768646, "learning_rate": 0.0003565043066569185, "loss": 1.7232, "step": 39647 }, { "epoch": 1.32, "grad_norm": 0.509188175201416, "learning_rate": 0.00035649404157380826, "loss": 1.8255, "step": 39648 }, { "epoch": 1.32, "grad_norm": 0.5033922791481018, "learning_rate": 0.0003564837764221226, "loss": 1.7442, "step": 39649 }, { "epoch": 1.32, "grad_norm": 0.474173367023468, "learning_rate": 0.000356473511201874, "loss": 1.733, "step": 39650 }, { "epoch": 1.32, "grad_norm": 0.4971875250339508, "learning_rate": 0.00035646324591307485, "loss": 1.7656, "step": 39651 }, { "epoch": 1.32, "grad_norm": 0.4877416491508484, "learning_rate": 0.0003564529805557376, "loss": 1.8107, "step": 39652 }, { "epoch": 1.32, "grad_norm": 0.5008822679519653, "learning_rate": 0.0003564427151298748, "loss": 1.7881, "step": 39653 }, { "epoch": 1.32, "grad_norm": 0.5305944085121155, "learning_rate": 0.00035643244963549887, "loss": 1.8072, "step": 39654 }, { "epoch": 1.32, "grad_norm": 0.4915580451488495, "learning_rate": 0.0003564221840726223, "loss": 1.8694, "step": 39655 }, { "epoch": 1.32, "grad_norm": 0.4921400547027588, "learning_rate": 0.00035641191844125735, "loss": 1.7563, "step": 39656 }, { "epoch": 1.32, "grad_norm": 0.48422160744667053, "learning_rate": 0.0003564016527414168, "loss": 1.748, "step": 39657 }, { "epoch": 1.32, "grad_norm": 0.5144050717353821, "learning_rate": 0.0003563913869731129, "loss": 1.7942, "step": 39658 }, { "epoch": 1.32, "grad_norm": 0.48579344153404236, "learning_rate": 0.0003563811211363582, "loss": 1.718, "step": 39659 }, { "epoch": 1.32, "grad_norm": 0.48789823055267334, "learning_rate": 0.00035637085523116513, "loss": 1.7772, "step": 39660 }, { "epoch": 1.32, "grad_norm": 0.48677611351013184, "learning_rate": 0.000356360589257546, "loss": 1.789, "step": 39661 }, { "epoch": 1.32, "grad_norm": 0.4872286021709442, "learning_rate": 0.00035635032321551356, "loss": 1.8183, "step": 39662 }, { "epoch": 1.32, "grad_norm": 0.485369473695755, "learning_rate": 0.00035634005710508, "loss": 1.7386, "step": 39663 }, { "epoch": 1.32, "grad_norm": 0.4761289358139038, "learning_rate": 0.0003563297909262581, "loss": 1.7174, "step": 39664 }, { "epoch": 1.32, "grad_norm": 0.5157421231269836, "learning_rate": 0.00035631952467906003, "loss": 1.7421, "step": 39665 }, { "epoch": 1.32, "grad_norm": 0.4852176606655121, "learning_rate": 0.00035630925836349837, "loss": 1.7443, "step": 39666 }, { "epoch": 1.32, "grad_norm": 0.4834497272968292, "learning_rate": 0.00035629899197958556, "loss": 1.7818, "step": 39667 }, { "epoch": 1.32, "grad_norm": 0.48420342803001404, "learning_rate": 0.000356288725527334, "loss": 1.8064, "step": 39668 }, { "epoch": 1.32, "grad_norm": 0.5072065591812134, "learning_rate": 0.00035627845900675634, "loss": 1.7552, "step": 39669 }, { "epoch": 1.32, "grad_norm": 0.49765175580978394, "learning_rate": 0.0003562681924178649, "loss": 1.8264, "step": 39670 }, { "epoch": 1.32, "grad_norm": 0.48467254638671875, "learning_rate": 0.00035625792576067213, "loss": 1.7593, "step": 39671 }, { "epoch": 1.32, "grad_norm": 0.48219525814056396, "learning_rate": 0.0003562476590351905, "loss": 1.7389, "step": 39672 }, { "epoch": 1.32, "grad_norm": 0.49156051874160767, "learning_rate": 0.00035623739224143256, "loss": 1.8199, "step": 39673 }, { "epoch": 1.32, "grad_norm": 0.9454261064529419, "learning_rate": 0.0003562271253794107, "loss": 1.8033, "step": 39674 }, { "epoch": 1.32, "grad_norm": 0.5086931586265564, "learning_rate": 0.00035621685844913737, "loss": 1.8361, "step": 39675 }, { "epoch": 1.32, "grad_norm": 0.46030882000923157, "learning_rate": 0.0003562065914506251, "loss": 1.7244, "step": 39676 }, { "epoch": 1.32, "grad_norm": 0.5025104880332947, "learning_rate": 0.0003561963243838863, "loss": 1.7331, "step": 39677 }, { "epoch": 1.32, "grad_norm": 0.4781021773815155, "learning_rate": 0.0003561860572489334, "loss": 1.7499, "step": 39678 }, { "epoch": 1.32, "grad_norm": 0.5260345339775085, "learning_rate": 0.00035617579004577893, "loss": 1.7825, "step": 39679 }, { "epoch": 1.32, "grad_norm": 0.492552787065506, "learning_rate": 0.00035616552277443537, "loss": 1.7953, "step": 39680 }, { "epoch": 1.32, "grad_norm": 0.46601176261901855, "learning_rate": 0.0003561552554349151, "loss": 1.7296, "step": 39681 }, { "epoch": 1.32, "grad_norm": 0.4923158586025238, "learning_rate": 0.0003561449880272306, "loss": 1.8138, "step": 39682 }, { "epoch": 1.32, "grad_norm": 0.49150511622428894, "learning_rate": 0.00035613472055139435, "loss": 1.7104, "step": 39683 }, { "epoch": 1.32, "grad_norm": 0.5069448351860046, "learning_rate": 0.00035612445300741883, "loss": 1.8497, "step": 39684 }, { "epoch": 1.32, "grad_norm": 0.5009109973907471, "learning_rate": 0.0003561141853953166, "loss": 1.7395, "step": 39685 }, { "epoch": 1.32, "grad_norm": 0.4950624704360962, "learning_rate": 0.00035610391771509985, "loss": 1.7344, "step": 39686 }, { "epoch": 1.32, "grad_norm": 0.4892398715019226, "learning_rate": 0.00035609364996678133, "loss": 1.6944, "step": 39687 }, { "epoch": 1.32, "grad_norm": 0.4836176633834839, "learning_rate": 0.00035608338215037334, "loss": 1.7826, "step": 39688 }, { "epoch": 1.32, "grad_norm": 0.5089073777198792, "learning_rate": 0.00035607311426588835, "loss": 1.8742, "step": 39689 }, { "epoch": 1.32, "grad_norm": 0.495409220457077, "learning_rate": 0.000356062846313339, "loss": 1.7216, "step": 39690 }, { "epoch": 1.32, "grad_norm": 0.4997934401035309, "learning_rate": 0.0003560525782927374, "loss": 1.7757, "step": 39691 }, { "epoch": 1.32, "grad_norm": 0.4869968593120575, "learning_rate": 0.0003560423102040964, "loss": 1.7635, "step": 39692 }, { "epoch": 1.32, "grad_norm": 0.4817192256450653, "learning_rate": 0.0003560320420474282, "loss": 1.7229, "step": 39693 }, { "epoch": 1.32, "grad_norm": 0.47615960240364075, "learning_rate": 0.0003560217738227454, "loss": 1.8084, "step": 39694 }, { "epoch": 1.32, "grad_norm": 0.48545658588409424, "learning_rate": 0.00035601150553006046, "loss": 1.7873, "step": 39695 }, { "epoch": 1.32, "grad_norm": 0.4781584143638611, "learning_rate": 0.00035600123716938574, "loss": 1.7625, "step": 39696 }, { "epoch": 1.32, "grad_norm": 0.49040114879608154, "learning_rate": 0.00035599096874073383, "loss": 1.7657, "step": 39697 }, { "epoch": 1.32, "grad_norm": 0.4828343689441681, "learning_rate": 0.000355980700244117, "loss": 1.8109, "step": 39698 }, { "epoch": 1.32, "grad_norm": 0.47736620903015137, "learning_rate": 0.00035597043167954806, "loss": 1.8423, "step": 39699 }, { "epoch": 1.32, "grad_norm": 0.4726143479347229, "learning_rate": 0.00035596016304703914, "loss": 1.8142, "step": 39700 }, { "epoch": 1.32, "grad_norm": 0.480258584022522, "learning_rate": 0.0003559498943466028, "loss": 1.7559, "step": 39701 }, { "epoch": 1.32, "grad_norm": 0.5012142062187195, "learning_rate": 0.0003559396255782515, "loss": 1.7131, "step": 39702 }, { "epoch": 1.32, "grad_norm": 0.47607067227363586, "learning_rate": 0.00035592935674199787, "loss": 1.7585, "step": 39703 }, { "epoch": 1.32, "grad_norm": 0.48391038179397583, "learning_rate": 0.00035591908783785413, "loss": 1.7504, "step": 39704 }, { "epoch": 1.32, "grad_norm": 0.48644909262657166, "learning_rate": 0.000355908818865833, "loss": 1.7468, "step": 39705 }, { "epoch": 1.32, "grad_norm": 0.47722142934799194, "learning_rate": 0.0003558985498259467, "loss": 1.7993, "step": 39706 }, { "epoch": 1.32, "grad_norm": 0.49241718649864197, "learning_rate": 0.00035588828071820774, "loss": 1.7144, "step": 39707 }, { "epoch": 1.32, "grad_norm": 0.48464104533195496, "learning_rate": 0.00035587801154262873, "loss": 1.7696, "step": 39708 }, { "epoch": 1.32, "grad_norm": 0.4948451817035675, "learning_rate": 0.00035586774229922204, "loss": 1.7681, "step": 39709 }, { "epoch": 1.32, "grad_norm": 0.48359766602516174, "learning_rate": 0.00035585747298800017, "loss": 1.7897, "step": 39710 }, { "epoch": 1.32, "grad_norm": 0.48217839002609253, "learning_rate": 0.0003558472036089754, "loss": 1.7483, "step": 39711 }, { "epoch": 1.32, "grad_norm": 0.508773684501648, "learning_rate": 0.0003558369341621605, "loss": 1.7638, "step": 39712 }, { "epoch": 1.32, "grad_norm": 0.48690134286880493, "learning_rate": 0.0003558266646475678, "loss": 1.7617, "step": 39713 }, { "epoch": 1.32, "grad_norm": 0.4847262501716614, "learning_rate": 0.00035581639506520967, "loss": 1.7509, "step": 39714 }, { "epoch": 1.32, "grad_norm": 0.4881319999694824, "learning_rate": 0.0003558061254150987, "loss": 1.7254, "step": 39715 }, { "epoch": 1.32, "grad_norm": 0.4970666766166687, "learning_rate": 0.0003557958556972472, "loss": 1.7759, "step": 39716 }, { "epoch": 1.32, "grad_norm": 0.4851631224155426, "learning_rate": 0.000355785585911668, "loss": 1.7614, "step": 39717 }, { "epoch": 1.32, "grad_norm": 0.4848470985889435, "learning_rate": 0.0003557753160583731, "loss": 1.7796, "step": 39718 }, { "epoch": 1.32, "grad_norm": 0.5091859102249146, "learning_rate": 0.0003557650461373753, "loss": 1.754, "step": 39719 }, { "epoch": 1.32, "grad_norm": 0.493134468793869, "learning_rate": 0.000355754776148687, "loss": 1.7799, "step": 39720 }, { "epoch": 1.32, "grad_norm": 0.5133296847343445, "learning_rate": 0.0003557445060923204, "loss": 1.7918, "step": 39721 }, { "epoch": 1.32, "grad_norm": 0.49711138010025024, "learning_rate": 0.00035573423596828844, "loss": 1.7706, "step": 39722 }, { "epoch": 1.32, "grad_norm": 0.4973433315753937, "learning_rate": 0.0003557239657766031, "loss": 1.8082, "step": 39723 }, { "epoch": 1.32, "grad_norm": 0.49478763341903687, "learning_rate": 0.0003557136955172773, "loss": 1.7575, "step": 39724 }, { "epoch": 1.32, "grad_norm": 0.5066778063774109, "learning_rate": 0.00035570342519032313, "loss": 1.7666, "step": 39725 }, { "epoch": 1.32, "grad_norm": 0.4925963878631592, "learning_rate": 0.0003556931547957533, "loss": 1.7904, "step": 39726 }, { "epoch": 1.32, "grad_norm": 0.49581384658813477, "learning_rate": 0.00035568288433358007, "loss": 1.7066, "step": 39727 }, { "epoch": 1.32, "grad_norm": 0.5069980025291443, "learning_rate": 0.00035567261380381606, "loss": 1.7608, "step": 39728 }, { "epoch": 1.32, "grad_norm": 0.4877331852912903, "learning_rate": 0.00035566234320647384, "loss": 1.7806, "step": 39729 }, { "epoch": 1.32, "grad_norm": 0.4773886501789093, "learning_rate": 0.0003556520725415656, "loss": 1.8052, "step": 39730 }, { "epoch": 1.32, "grad_norm": 0.4849059581756592, "learning_rate": 0.0003556418018091039, "loss": 1.7091, "step": 39731 }, { "epoch": 1.32, "grad_norm": 0.5195349454879761, "learning_rate": 0.00035563153100910135, "loss": 1.8376, "step": 39732 }, { "epoch": 1.32, "grad_norm": 0.4831588864326477, "learning_rate": 0.00035562126014157026, "loss": 1.7554, "step": 39733 }, { "epoch": 1.32, "grad_norm": 0.46320655941963196, "learning_rate": 0.00035561098920652314, "loss": 1.7776, "step": 39734 }, { "epoch": 1.32, "grad_norm": 0.4847574532032013, "learning_rate": 0.0003556007182039726, "loss": 1.8331, "step": 39735 }, { "epoch": 1.32, "grad_norm": 0.5006433725357056, "learning_rate": 0.0003555904471339309, "loss": 1.7765, "step": 39736 }, { "epoch": 1.32, "grad_norm": 0.490739643573761, "learning_rate": 0.00035558017599641057, "loss": 1.7374, "step": 39737 }, { "epoch": 1.32, "grad_norm": 0.49298539757728577, "learning_rate": 0.0003555699047914241, "loss": 1.732, "step": 39738 }, { "epoch": 1.32, "grad_norm": 0.491547167301178, "learning_rate": 0.00035555963351898394, "loss": 1.6984, "step": 39739 }, { "epoch": 1.32, "grad_norm": 0.4877663552761078, "learning_rate": 0.0003555493621791026, "loss": 1.7609, "step": 39740 }, { "epoch": 1.32, "grad_norm": 0.4796847403049469, "learning_rate": 0.00035553909077179247, "loss": 1.7683, "step": 39741 }, { "epoch": 1.32, "grad_norm": 0.4855439364910126, "learning_rate": 0.00035552881929706615, "loss": 1.7386, "step": 39742 }, { "epoch": 1.32, "grad_norm": 0.48926299810409546, "learning_rate": 0.00035551854775493595, "loss": 1.7917, "step": 39743 }, { "epoch": 1.32, "grad_norm": 0.499131977558136, "learning_rate": 0.0003555082761454145, "loss": 1.9002, "step": 39744 }, { "epoch": 1.32, "grad_norm": 0.46853238344192505, "learning_rate": 0.0003554980044685142, "loss": 1.7397, "step": 39745 }, { "epoch": 1.32, "grad_norm": 0.5179493427276611, "learning_rate": 0.0003554877327242473, "loss": 1.783, "step": 39746 }, { "epoch": 1.32, "grad_norm": 0.491945743560791, "learning_rate": 0.0003554774609126267, "loss": 1.8252, "step": 39747 }, { "epoch": 1.32, "grad_norm": 0.4896799325942993, "learning_rate": 0.0003554671890336644, "loss": 1.7422, "step": 39748 }, { "epoch": 1.32, "grad_norm": 0.48194244503974915, "learning_rate": 0.00035545691708737336, "loss": 1.7618, "step": 39749 }, { "epoch": 1.32, "grad_norm": 0.5099676251411438, "learning_rate": 0.00035544664507376566, "loss": 1.8197, "step": 39750 }, { "epoch": 1.32, "grad_norm": 0.4615577757358551, "learning_rate": 0.0003554363729928539, "loss": 1.7174, "step": 39751 }, { "epoch": 1.32, "grad_norm": 0.4848456382751465, "learning_rate": 0.0003554261008446506, "loss": 1.7671, "step": 39752 }, { "epoch": 1.32, "grad_norm": 0.4924720525741577, "learning_rate": 0.0003554158286291681, "loss": 1.8118, "step": 39753 }, { "epoch": 1.32, "grad_norm": 0.5282074809074402, "learning_rate": 0.000355405556346419, "loss": 1.8249, "step": 39754 }, { "epoch": 1.32, "grad_norm": 0.48739221692085266, "learning_rate": 0.0003553952839964157, "loss": 1.7471, "step": 39755 }, { "epoch": 1.32, "grad_norm": 0.5031648874282837, "learning_rate": 0.00035538501157917073, "loss": 1.7411, "step": 39756 }, { "epoch": 1.32, "grad_norm": 0.4930286109447479, "learning_rate": 0.00035537473909469646, "loss": 1.7436, "step": 39757 }, { "epoch": 1.32, "grad_norm": 0.49878594279289246, "learning_rate": 0.00035536446654300545, "loss": 1.6965, "step": 39758 }, { "epoch": 1.32, "grad_norm": 0.499564528465271, "learning_rate": 0.0003553541939241101, "loss": 1.8534, "step": 39759 }, { "epoch": 1.32, "grad_norm": 0.4821091294288635, "learning_rate": 0.000355343921238023, "loss": 1.7744, "step": 39760 }, { "epoch": 1.32, "grad_norm": 0.4809197187423706, "learning_rate": 0.0003553336484847565, "loss": 1.7184, "step": 39761 }, { "epoch": 1.32, "grad_norm": 0.49085646867752075, "learning_rate": 0.00035532337566432305, "loss": 1.7603, "step": 39762 }, { "epoch": 1.32, "grad_norm": 0.501662015914917, "learning_rate": 0.0003553131027767352, "loss": 1.8219, "step": 39763 }, { "epoch": 1.32, "grad_norm": 0.5100117921829224, "learning_rate": 0.0003553028298220054, "loss": 1.7277, "step": 39764 }, { "epoch": 1.32, "grad_norm": 0.4851628243923187, "learning_rate": 0.0003552925568001461, "loss": 1.731, "step": 39765 }, { "epoch": 1.32, "grad_norm": 0.484774649143219, "learning_rate": 0.0003552822837111698, "loss": 1.7051, "step": 39766 }, { "epoch": 1.32, "grad_norm": 0.49400627613067627, "learning_rate": 0.00035527201055508904, "loss": 1.841, "step": 39767 }, { "epoch": 1.32, "grad_norm": 0.5080575942993164, "learning_rate": 0.00035526173733191606, "loss": 1.7538, "step": 39768 }, { "epoch": 1.32, "grad_norm": 0.4717395603656769, "learning_rate": 0.0003552514640416635, "loss": 1.7954, "step": 39769 }, { "epoch": 1.32, "grad_norm": 0.4872487187385559, "learning_rate": 0.0003552411906843438, "loss": 1.6973, "step": 39770 }, { "epoch": 1.32, "grad_norm": 0.48236772418022156, "learning_rate": 0.00035523091725996953, "loss": 1.8099, "step": 39771 }, { "epoch": 1.32, "grad_norm": 0.48304206132888794, "learning_rate": 0.000355220643768553, "loss": 1.7437, "step": 39772 }, { "epoch": 1.32, "grad_norm": 0.5027899742126465, "learning_rate": 0.00035521037021010667, "loss": 1.8462, "step": 39773 }, { "epoch": 1.32, "grad_norm": 0.4953140616416931, "learning_rate": 0.00035520009658464324, "loss": 1.7715, "step": 39774 }, { "epoch": 1.32, "grad_norm": 0.5068458914756775, "learning_rate": 0.00035518982289217487, "loss": 1.8134, "step": 39775 }, { "epoch": 1.32, "grad_norm": 0.5141962766647339, "learning_rate": 0.0003551795491327143, "loss": 1.7803, "step": 39776 }, { "epoch": 1.32, "grad_norm": 0.506081759929657, "learning_rate": 0.00035516927530627394, "loss": 1.7327, "step": 39777 }, { "epoch": 1.32, "grad_norm": 0.49128732085227966, "learning_rate": 0.000355159001412866, "loss": 1.7825, "step": 39778 }, { "epoch": 1.32, "grad_norm": 0.5020084977149963, "learning_rate": 0.0003551487274525033, "loss": 1.685, "step": 39779 }, { "epoch": 1.32, "grad_norm": 0.49940380454063416, "learning_rate": 0.00035513845342519806, "loss": 1.8193, "step": 39780 }, { "epoch": 1.32, "grad_norm": 0.4847329556941986, "learning_rate": 0.000355128179330963, "loss": 1.7607, "step": 39781 }, { "epoch": 1.32, "grad_norm": 0.5170416235923767, "learning_rate": 0.0003551179051698104, "loss": 1.8008, "step": 39782 }, { "epoch": 1.32, "grad_norm": 0.5093734264373779, "learning_rate": 0.0003551076309417528, "loss": 1.7832, "step": 39783 }, { "epoch": 1.32, "grad_norm": 0.49158188700675964, "learning_rate": 0.0003550973566468027, "loss": 1.6951, "step": 39784 }, { "epoch": 1.32, "grad_norm": 0.4932248294353485, "learning_rate": 0.00035508708228497237, "loss": 1.8451, "step": 39785 }, { "epoch": 1.32, "grad_norm": 0.5183081030845642, "learning_rate": 0.0003550768078562746, "loss": 1.6848, "step": 39786 }, { "epoch": 1.32, "grad_norm": 0.491349995136261, "learning_rate": 0.00035506653336072166, "loss": 1.8233, "step": 39787 }, { "epoch": 1.32, "grad_norm": 0.507397472858429, "learning_rate": 0.00035505625879832606, "loss": 1.8895, "step": 39788 }, { "epoch": 1.32, "grad_norm": 0.501675546169281, "learning_rate": 0.00035504598416910023, "loss": 1.846, "step": 39789 }, { "epoch": 1.32, "grad_norm": 0.49044764041900635, "learning_rate": 0.0003550357094730567, "loss": 1.7349, "step": 39790 }, { "epoch": 1.32, "grad_norm": 0.49977993965148926, "learning_rate": 0.00035502543471020796, "loss": 1.7379, "step": 39791 }, { "epoch": 1.32, "grad_norm": 0.5061966180801392, "learning_rate": 0.00035501515988056644, "loss": 1.8337, "step": 39792 }, { "epoch": 1.32, "grad_norm": 0.4820998013019562, "learning_rate": 0.0003550048849841446, "loss": 1.7792, "step": 39793 }, { "epoch": 1.32, "grad_norm": 0.4769579768180847, "learning_rate": 0.00035499461002095494, "loss": 1.7801, "step": 39794 }, { "epoch": 1.32, "grad_norm": 0.4943227171897888, "learning_rate": 0.00035498433499100995, "loss": 1.7913, "step": 39795 }, { "epoch": 1.32, "grad_norm": 0.5549964308738708, "learning_rate": 0.000354974059894322, "loss": 1.8589, "step": 39796 }, { "epoch": 1.32, "grad_norm": 0.5088040232658386, "learning_rate": 0.00035496378473090376, "loss": 1.7934, "step": 39797 }, { "epoch": 1.32, "grad_norm": 0.48475372791290283, "learning_rate": 0.0003549535095007675, "loss": 1.7676, "step": 39798 }, { "epoch": 1.32, "grad_norm": 0.48743560910224915, "learning_rate": 0.00035494323420392576, "loss": 1.704, "step": 39799 }, { "epoch": 1.32, "grad_norm": 0.5131537318229675, "learning_rate": 0.00035493295884039106, "loss": 1.8302, "step": 39800 }, { "epoch": 1.32, "grad_norm": 0.49879375100135803, "learning_rate": 0.0003549226834101759, "loss": 1.7035, "step": 39801 }, { "epoch": 1.32, "grad_norm": 0.4817642569541931, "learning_rate": 0.0003549124079132927, "loss": 1.7635, "step": 39802 }, { "epoch": 1.32, "grad_norm": 0.48368602991104126, "learning_rate": 0.00035490213234975375, "loss": 1.7964, "step": 39803 }, { "epoch": 1.32, "grad_norm": 0.4926404654979706, "learning_rate": 0.00035489185671957194, "loss": 1.7437, "step": 39804 }, { "epoch": 1.32, "grad_norm": 0.5091374516487122, "learning_rate": 0.00035488158102275934, "loss": 1.7901, "step": 39805 }, { "epoch": 1.32, "grad_norm": 0.4838816225528717, "learning_rate": 0.00035487130525932865, "loss": 1.7835, "step": 39806 }, { "epoch": 1.32, "grad_norm": 0.4818810522556305, "learning_rate": 0.0003548610294292923, "loss": 1.7674, "step": 39807 }, { "epoch": 1.32, "grad_norm": 0.47372546792030334, "learning_rate": 0.0003548507535326627, "loss": 1.7544, "step": 39808 }, { "epoch": 1.32, "grad_norm": 0.4894755780696869, "learning_rate": 0.0003548404775694524, "loss": 1.6741, "step": 39809 }, { "epoch": 1.32, "grad_norm": 0.5164842009544373, "learning_rate": 0.0003548302015396737, "loss": 1.8211, "step": 39810 }, { "epoch": 1.32, "grad_norm": 0.5252311825752258, "learning_rate": 0.0003548199254433394, "loss": 1.7627, "step": 39811 }, { "epoch": 1.32, "grad_norm": 0.5104696154594421, "learning_rate": 0.00035480964928046174, "loss": 1.8492, "step": 39812 }, { "epoch": 1.32, "grad_norm": 0.5023254156112671, "learning_rate": 0.00035479937305105315, "loss": 1.8016, "step": 39813 }, { "epoch": 1.32, "grad_norm": 0.5036235451698303, "learning_rate": 0.00035478909675512637, "loss": 1.7921, "step": 39814 }, { "epoch": 1.32, "grad_norm": 0.46622616052627563, "learning_rate": 0.0003547788203926935, "loss": 1.6764, "step": 39815 }, { "epoch": 1.32, "grad_norm": 0.48433539271354675, "learning_rate": 0.0003547685439637674, "loss": 1.8232, "step": 39816 }, { "epoch": 1.32, "grad_norm": 0.51998370885849, "learning_rate": 0.0003547582674683602, "loss": 1.8091, "step": 39817 }, { "epoch": 1.32, "grad_norm": 0.5173539519309998, "learning_rate": 0.00035474799090648467, "loss": 1.7555, "step": 39818 }, { "epoch": 1.32, "grad_norm": 0.4942477345466614, "learning_rate": 0.000354737714278153, "loss": 1.7658, "step": 39819 }, { "epoch": 1.32, "grad_norm": 0.4714542329311371, "learning_rate": 0.0003547274375833779, "loss": 1.8006, "step": 39820 }, { "epoch": 1.32, "grad_norm": 0.4867979884147644, "learning_rate": 0.00035471716082217177, "loss": 1.7782, "step": 39821 }, { "epoch": 1.32, "grad_norm": 0.5140799283981323, "learning_rate": 0.0003547068839945471, "loss": 1.735, "step": 39822 }, { "epoch": 1.32, "grad_norm": 0.48481321334838867, "learning_rate": 0.00035469660710051623, "loss": 1.7625, "step": 39823 }, { "epoch": 1.32, "grad_norm": 0.4699626863002777, "learning_rate": 0.0003546863301400918, "loss": 1.7614, "step": 39824 }, { "epoch": 1.32, "grad_norm": 0.4878602623939514, "learning_rate": 0.0003546760531132861, "loss": 1.7704, "step": 39825 }, { "epoch": 1.33, "grad_norm": 0.5103198289871216, "learning_rate": 0.0003546657760201119, "loss": 1.7568, "step": 39826 }, { "epoch": 1.33, "grad_norm": 0.4808700680732727, "learning_rate": 0.0003546554988605815, "loss": 1.7472, "step": 39827 }, { "epoch": 1.33, "grad_norm": 0.5026564002037048, "learning_rate": 0.0003546452216347072, "loss": 1.8321, "step": 39828 }, { "epoch": 1.33, "grad_norm": 0.49917417764663696, "learning_rate": 0.0003546349443425018, "loss": 1.8001, "step": 39829 }, { "epoch": 1.33, "grad_norm": 0.48010221123695374, "learning_rate": 0.00035462466698397765, "loss": 1.7742, "step": 39830 }, { "epoch": 1.33, "grad_norm": 0.48498499393463135, "learning_rate": 0.00035461438955914704, "loss": 1.7747, "step": 39831 }, { "epoch": 1.33, "grad_norm": 0.49406135082244873, "learning_rate": 0.00035460411206802284, "loss": 1.8492, "step": 39832 }, { "epoch": 1.33, "grad_norm": 0.4815388321876526, "learning_rate": 0.0003545938345106171, "loss": 1.7935, "step": 39833 }, { "epoch": 1.33, "grad_norm": 0.4927932620048523, "learning_rate": 0.00035458355688694254, "loss": 1.7599, "step": 39834 }, { "epoch": 1.33, "grad_norm": 0.4729401767253876, "learning_rate": 0.00035457327919701155, "loss": 1.7103, "step": 39835 }, { "epoch": 1.33, "grad_norm": 0.47885769605636597, "learning_rate": 0.0003545630014408368, "loss": 1.7449, "step": 39836 }, { "epoch": 1.33, "grad_norm": 0.476064532995224, "learning_rate": 0.0003545527236184304, "loss": 1.8057, "step": 39837 }, { "epoch": 1.33, "grad_norm": 0.4855378270149231, "learning_rate": 0.0003545424457298052, "loss": 1.7637, "step": 39838 }, { "epoch": 1.33, "grad_norm": 0.4726693034172058, "learning_rate": 0.0003545321677749734, "loss": 1.7689, "step": 39839 }, { "epoch": 1.33, "grad_norm": 0.4962843060493469, "learning_rate": 0.00035452188975394755, "loss": 1.7286, "step": 39840 }, { "epoch": 1.33, "grad_norm": 0.514179527759552, "learning_rate": 0.00035451161166674027, "loss": 1.7711, "step": 39841 }, { "epoch": 1.33, "grad_norm": 0.4865610599517822, "learning_rate": 0.0003545013335133639, "loss": 1.7487, "step": 39842 }, { "epoch": 1.33, "grad_norm": 0.5025970339775085, "learning_rate": 0.0003544910552938309, "loss": 1.8154, "step": 39843 }, { "epoch": 1.33, "grad_norm": 0.4959847033023834, "learning_rate": 0.0003544807770081538, "loss": 1.7591, "step": 39844 }, { "epoch": 1.33, "grad_norm": 0.48515188694000244, "learning_rate": 0.00035447049865634507, "loss": 1.6549, "step": 39845 }, { "epoch": 1.33, "grad_norm": 0.48652520775794983, "learning_rate": 0.00035446022023841726, "loss": 1.7981, "step": 39846 }, { "epoch": 1.33, "grad_norm": 0.49690374732017517, "learning_rate": 0.00035444994175438266, "loss": 1.7423, "step": 39847 }, { "epoch": 1.33, "grad_norm": 0.47470688819885254, "learning_rate": 0.0003544396632042539, "loss": 1.811, "step": 39848 }, { "epoch": 1.33, "grad_norm": 0.4880144000053406, "learning_rate": 0.0003544293845880434, "loss": 1.7801, "step": 39849 }, { "epoch": 1.33, "grad_norm": 0.4858638644218445, "learning_rate": 0.00035441910590576357, "loss": 1.8658, "step": 39850 }, { "epoch": 1.33, "grad_norm": 0.5706172585487366, "learning_rate": 0.00035440882715742704, "loss": 1.8816, "step": 39851 }, { "epoch": 1.33, "grad_norm": 0.4817584455013275, "learning_rate": 0.0003543985483430463, "loss": 1.815, "step": 39852 }, { "epoch": 1.33, "grad_norm": 0.48486295342445374, "learning_rate": 0.00035438826946263365, "loss": 1.7143, "step": 39853 }, { "epoch": 1.33, "grad_norm": 0.4691997170448303, "learning_rate": 0.0003543779905162016, "loss": 1.7498, "step": 39854 }, { "epoch": 1.33, "grad_norm": 0.4860347509384155, "learning_rate": 0.0003543677115037627, "loss": 1.8224, "step": 39855 }, { "epoch": 1.33, "grad_norm": 0.48595356941223145, "learning_rate": 0.00035435743242532945, "loss": 1.7044, "step": 39856 }, { "epoch": 1.33, "grad_norm": 0.46952661871910095, "learning_rate": 0.0003543471532809143, "loss": 1.8048, "step": 39857 }, { "epoch": 1.33, "grad_norm": 0.46855518221855164, "learning_rate": 0.0003543368740705296, "loss": 1.7724, "step": 39858 }, { "epoch": 1.33, "grad_norm": 0.47843608260154724, "learning_rate": 0.0003543265947941881, "loss": 1.7841, "step": 39859 }, { "epoch": 1.33, "grad_norm": 0.49502432346343994, "learning_rate": 0.0003543163154519021, "loss": 1.7754, "step": 39860 }, { "epoch": 1.33, "grad_norm": 0.499288946390152, "learning_rate": 0.000354306036043684, "loss": 1.7672, "step": 39861 }, { "epoch": 1.33, "grad_norm": 0.5001784563064575, "learning_rate": 0.0003542957565695465, "loss": 1.7593, "step": 39862 }, { "epoch": 1.33, "grad_norm": 0.4770578444004059, "learning_rate": 0.0003542854770295018, "loss": 1.7672, "step": 39863 }, { "epoch": 1.33, "grad_norm": 0.49638158082962036, "learning_rate": 0.00035427519742356267, "loss": 1.8165, "step": 39864 }, { "epoch": 1.33, "grad_norm": 0.48381757736206055, "learning_rate": 0.00035426491775174127, "loss": 1.7472, "step": 39865 }, { "epoch": 1.33, "grad_norm": 0.49588435888290405, "learning_rate": 0.0003542546380140504, "loss": 1.7545, "step": 39866 }, { "epoch": 1.33, "grad_norm": 0.4886307418346405, "learning_rate": 0.00035424435821050236, "loss": 1.8156, "step": 39867 }, { "epoch": 1.33, "grad_norm": 0.5139660239219666, "learning_rate": 0.0003542340783411097, "loss": 1.7981, "step": 39868 }, { "epoch": 1.33, "grad_norm": 0.4885611832141876, "learning_rate": 0.00035422379840588484, "loss": 1.7771, "step": 39869 }, { "epoch": 1.33, "grad_norm": 0.48589763045310974, "learning_rate": 0.00035421351840484014, "loss": 1.796, "step": 39870 }, { "epoch": 1.33, "grad_norm": 0.5177093744277954, "learning_rate": 0.00035420323833798843, "loss": 1.8154, "step": 39871 }, { "epoch": 1.33, "grad_norm": 0.4819709360599518, "learning_rate": 0.0003541929582053418, "loss": 1.754, "step": 39872 }, { "epoch": 1.33, "grad_norm": 0.4954380393028259, "learning_rate": 0.000354182678006913, "loss": 1.7329, "step": 39873 }, { "epoch": 1.33, "grad_norm": 0.5099256038665771, "learning_rate": 0.0003541723977427144, "loss": 1.7206, "step": 39874 }, { "epoch": 1.33, "grad_norm": 0.5257077217102051, "learning_rate": 0.0003541621174127585, "loss": 1.8548, "step": 39875 }, { "epoch": 1.33, "grad_norm": 0.5084314942359924, "learning_rate": 0.00035415183701705775, "loss": 1.7553, "step": 39876 }, { "epoch": 1.33, "grad_norm": 0.49504274129867554, "learning_rate": 0.0003541415565556246, "loss": 1.8474, "step": 39877 }, { "epoch": 1.33, "grad_norm": 0.4986760914325714, "learning_rate": 0.0003541312760284717, "loss": 1.7623, "step": 39878 }, { "epoch": 1.33, "grad_norm": 0.5424182415008545, "learning_rate": 0.00035412099543561136, "loss": 1.7494, "step": 39879 }, { "epoch": 1.33, "grad_norm": 0.4917302429676056, "learning_rate": 0.000354110714777056, "loss": 1.84, "step": 39880 }, { "epoch": 1.33, "grad_norm": 0.49913424253463745, "learning_rate": 0.00035410043405281836, "loss": 1.8083, "step": 39881 }, { "epoch": 1.33, "grad_norm": 0.4798612892627716, "learning_rate": 0.00035409015326291066, "loss": 1.8758, "step": 39882 }, { "epoch": 1.33, "grad_norm": 0.49456682801246643, "learning_rate": 0.0003540798724073455, "loss": 1.7907, "step": 39883 }, { "epoch": 1.33, "grad_norm": 0.516772449016571, "learning_rate": 0.0003540695914861354, "loss": 1.7662, "step": 39884 }, { "epoch": 1.33, "grad_norm": 0.4933132827281952, "learning_rate": 0.0003540593104992928, "loss": 1.8149, "step": 39885 }, { "epoch": 1.33, "grad_norm": 0.49055203795433044, "learning_rate": 0.00035404902944683006, "loss": 1.8435, "step": 39886 }, { "epoch": 1.33, "grad_norm": 0.47019463777542114, "learning_rate": 0.00035403874832875973, "loss": 1.7765, "step": 39887 }, { "epoch": 1.33, "grad_norm": 0.47270798683166504, "learning_rate": 0.0003540284671450944, "loss": 1.7274, "step": 39888 }, { "epoch": 1.33, "grad_norm": 0.5225746035575867, "learning_rate": 0.00035401818589584657, "loss": 1.7785, "step": 39889 }, { "epoch": 1.33, "grad_norm": 0.5031030774116516, "learning_rate": 0.0003540079045810284, "loss": 1.8027, "step": 39890 }, { "epoch": 1.33, "grad_norm": 0.4923792779445648, "learning_rate": 0.0003539976232006528, "loss": 1.7602, "step": 39891 }, { "epoch": 1.33, "grad_norm": 0.4816744923591614, "learning_rate": 0.000353987341754732, "loss": 1.8516, "step": 39892 }, { "epoch": 1.33, "grad_norm": 0.4898451864719391, "learning_rate": 0.0003539770602432784, "loss": 1.8385, "step": 39893 }, { "epoch": 1.33, "grad_norm": 0.5053223371505737, "learning_rate": 0.00035396677866630477, "loss": 1.759, "step": 39894 }, { "epoch": 1.33, "grad_norm": 0.4887017607688904, "learning_rate": 0.00035395649702382325, "loss": 1.7847, "step": 39895 }, { "epoch": 1.33, "grad_norm": 0.4768770635128021, "learning_rate": 0.0003539462153158466, "loss": 1.7411, "step": 39896 }, { "epoch": 1.33, "grad_norm": 0.49597299098968506, "learning_rate": 0.0003539359335423871, "loss": 1.787, "step": 39897 }, { "epoch": 1.33, "grad_norm": 0.480733186006546, "learning_rate": 0.00035392565170345747, "loss": 1.7799, "step": 39898 }, { "epoch": 1.33, "grad_norm": 0.501166045665741, "learning_rate": 0.00035391536979906995, "loss": 1.8116, "step": 39899 }, { "epoch": 1.33, "grad_norm": 0.48619550466537476, "learning_rate": 0.00035390508782923716, "loss": 1.807, "step": 39900 }, { "epoch": 1.33, "grad_norm": 0.47666504979133606, "learning_rate": 0.00035389480579397153, "loss": 1.8298, "step": 39901 }, { "epoch": 1.33, "grad_norm": 0.5000426173210144, "learning_rate": 0.00035388452369328546, "loss": 1.7756, "step": 39902 }, { "epoch": 1.33, "grad_norm": 0.484176903963089, "learning_rate": 0.00035387424152719165, "loss": 1.7094, "step": 39903 }, { "epoch": 1.33, "grad_norm": 0.4894205927848816, "learning_rate": 0.00035386395929570233, "loss": 1.7322, "step": 39904 }, { "epoch": 1.33, "grad_norm": 0.6316460967063904, "learning_rate": 0.0003538536769988301, "loss": 1.7728, "step": 39905 }, { "epoch": 1.33, "grad_norm": 0.5161468386650085, "learning_rate": 0.0003538433946365875, "loss": 1.7517, "step": 39906 }, { "epoch": 1.33, "grad_norm": 0.5083961486816406, "learning_rate": 0.00035383311220898696, "loss": 1.8397, "step": 39907 }, { "epoch": 1.33, "grad_norm": 0.5114994645118713, "learning_rate": 0.000353822829716041, "loss": 1.7558, "step": 39908 }, { "epoch": 1.33, "grad_norm": 0.5029160380363464, "learning_rate": 0.000353812547157762, "loss": 1.7581, "step": 39909 }, { "epoch": 1.33, "grad_norm": 0.5059292316436768, "learning_rate": 0.00035380226453416244, "loss": 1.7872, "step": 39910 }, { "epoch": 1.33, "grad_norm": 0.5193933844566345, "learning_rate": 0.00035379198184525495, "loss": 1.798, "step": 39911 }, { "epoch": 1.33, "grad_norm": 0.5139365196228027, "learning_rate": 0.0003537816990910518, "loss": 1.774, "step": 39912 }, { "epoch": 1.33, "grad_norm": 0.4876343011856079, "learning_rate": 0.0003537714162715657, "loss": 1.7458, "step": 39913 }, { "epoch": 1.33, "grad_norm": 0.487566739320755, "learning_rate": 0.00035376113338680905, "loss": 1.8001, "step": 39914 }, { "epoch": 1.33, "grad_norm": 0.4881210923194885, "learning_rate": 0.00035375085043679423, "loss": 1.75, "step": 39915 }, { "epoch": 1.33, "grad_norm": 0.5169616937637329, "learning_rate": 0.0003537405674215338, "loss": 1.7795, "step": 39916 }, { "epoch": 1.33, "grad_norm": 0.48193785548210144, "learning_rate": 0.0003537302843410402, "loss": 1.7935, "step": 39917 }, { "epoch": 1.33, "grad_norm": 0.49574658274650574, "learning_rate": 0.00035372000119532603, "loss": 1.8811, "step": 39918 }, { "epoch": 1.33, "grad_norm": 0.4987891614437103, "learning_rate": 0.0003537097179844037, "loss": 1.7127, "step": 39919 }, { "epoch": 1.33, "grad_norm": 0.47099897265434265, "learning_rate": 0.0003536994347082856, "loss": 1.7796, "step": 39920 }, { "epoch": 1.33, "grad_norm": 0.48608124256134033, "learning_rate": 0.0003536891513669844, "loss": 1.7956, "step": 39921 }, { "epoch": 1.33, "grad_norm": 0.5038990378379822, "learning_rate": 0.00035367886796051245, "loss": 1.8384, "step": 39922 }, { "epoch": 1.33, "grad_norm": 0.4874204099178314, "learning_rate": 0.0003536685844888822, "loss": 1.7424, "step": 39923 }, { "epoch": 1.33, "grad_norm": 0.7783107161521912, "learning_rate": 0.00035365830095210636, "loss": 1.798, "step": 39924 }, { "epoch": 1.33, "grad_norm": 0.5027416944503784, "learning_rate": 0.00035364801735019704, "loss": 1.7283, "step": 39925 }, { "epoch": 1.33, "grad_norm": 0.48935651779174805, "learning_rate": 0.0003536377336831671, "loss": 1.8032, "step": 39926 }, { "epoch": 1.33, "grad_norm": 0.487092524766922, "learning_rate": 0.00035362744995102866, "loss": 1.8298, "step": 39927 }, { "epoch": 1.33, "grad_norm": 0.4897722601890564, "learning_rate": 0.00035361716615379463, "loss": 1.7949, "step": 39928 }, { "epoch": 1.33, "grad_norm": 0.47755879163742065, "learning_rate": 0.0003536068822914771, "loss": 1.7241, "step": 39929 }, { "epoch": 1.33, "grad_norm": 0.493937224149704, "learning_rate": 0.00035359659836408877, "loss": 1.8023, "step": 39930 }, { "epoch": 1.33, "grad_norm": 0.5053388476371765, "learning_rate": 0.0003535863143716421, "loss": 1.7615, "step": 39931 }, { "epoch": 1.33, "grad_norm": 0.5015009045600891, "learning_rate": 0.00035357603031414944, "loss": 1.7813, "step": 39932 }, { "epoch": 1.33, "grad_norm": 0.4912733733654022, "learning_rate": 0.00035356574619162343, "loss": 1.8482, "step": 39933 }, { "epoch": 1.33, "grad_norm": 0.4974924921989441, "learning_rate": 0.0003535554620040765, "loss": 1.8297, "step": 39934 }, { "epoch": 1.33, "grad_norm": 0.49968284368515015, "learning_rate": 0.00035354517775152116, "loss": 1.7809, "step": 39935 }, { "epoch": 1.33, "grad_norm": 0.48861104249954224, "learning_rate": 0.0003535348934339698, "loss": 1.8322, "step": 39936 }, { "epoch": 1.33, "grad_norm": 0.48224732279777527, "learning_rate": 0.00035352460905143496, "loss": 1.8022, "step": 39937 }, { "epoch": 1.33, "grad_norm": 0.4888877272605896, "learning_rate": 0.00035351432460392916, "loss": 1.8095, "step": 39938 }, { "epoch": 1.33, "grad_norm": 0.5077240467071533, "learning_rate": 0.00035350404009146496, "loss": 1.7025, "step": 39939 }, { "epoch": 1.33, "grad_norm": 0.4897008538246155, "learning_rate": 0.0003534937555140546, "loss": 1.7319, "step": 39940 }, { "epoch": 1.33, "grad_norm": 0.5152695178985596, "learning_rate": 0.0003534834708717108, "loss": 1.7409, "step": 39941 }, { "epoch": 1.33, "grad_norm": 0.48102402687072754, "learning_rate": 0.0003534731861644458, "loss": 1.7676, "step": 39942 }, { "epoch": 1.33, "grad_norm": 0.49226540327072144, "learning_rate": 0.00035346290139227237, "loss": 1.7283, "step": 39943 }, { "epoch": 1.33, "grad_norm": 0.49827247858047485, "learning_rate": 0.00035345261655520286, "loss": 1.8382, "step": 39944 }, { "epoch": 1.33, "grad_norm": 0.49364590644836426, "learning_rate": 0.0003534423316532496, "loss": 1.765, "step": 39945 }, { "epoch": 1.33, "grad_norm": 0.4846154749393463, "learning_rate": 0.00035343204668642544, "loss": 1.8141, "step": 39946 }, { "epoch": 1.33, "grad_norm": 0.4905073940753937, "learning_rate": 0.0003534217616547425, "loss": 1.8558, "step": 39947 }, { "epoch": 1.33, "grad_norm": 0.496865838766098, "learning_rate": 0.00035341147655821345, "loss": 1.8087, "step": 39948 }, { "epoch": 1.33, "grad_norm": 0.49687066674232483, "learning_rate": 0.0003534011913968507, "loss": 1.7757, "step": 39949 }, { "epoch": 1.33, "grad_norm": 0.4840223789215088, "learning_rate": 0.0003533909061706668, "loss": 1.7458, "step": 39950 }, { "epoch": 1.33, "grad_norm": 0.46591153740882874, "learning_rate": 0.0003533806208796742, "loss": 1.7669, "step": 39951 }, { "epoch": 1.33, "grad_norm": 0.5155951380729675, "learning_rate": 0.00035337033552388535, "loss": 1.8143, "step": 39952 }, { "epoch": 1.33, "grad_norm": 0.4756864905357361, "learning_rate": 0.00035336005010331293, "loss": 1.7895, "step": 39953 }, { "epoch": 1.33, "grad_norm": 0.49690714478492737, "learning_rate": 0.0003533497646179692, "loss": 1.7182, "step": 39954 }, { "epoch": 1.33, "grad_norm": 0.49303844571113586, "learning_rate": 0.0003533394790678666, "loss": 1.6982, "step": 39955 }, { "epoch": 1.33, "grad_norm": 0.4836450517177582, "learning_rate": 0.00035332919345301786, "loss": 1.6426, "step": 39956 }, { "epoch": 1.33, "grad_norm": 0.48182743787765503, "learning_rate": 0.00035331890777343523, "loss": 1.7939, "step": 39957 }, { "epoch": 1.33, "grad_norm": 0.48902231454849243, "learning_rate": 0.0003533086220291314, "loss": 1.7685, "step": 39958 }, { "epoch": 1.33, "grad_norm": 0.5201597809791565, "learning_rate": 0.00035329833622011874, "loss": 1.8106, "step": 39959 }, { "epoch": 1.33, "grad_norm": 0.49556922912597656, "learning_rate": 0.0003532880503464097, "loss": 1.7574, "step": 39960 }, { "epoch": 1.33, "grad_norm": 0.4889329969882965, "learning_rate": 0.0003532777644080169, "loss": 1.8332, "step": 39961 }, { "epoch": 1.33, "grad_norm": 0.49188700318336487, "learning_rate": 0.0003532674784049527, "loss": 1.8068, "step": 39962 }, { "epoch": 1.33, "grad_norm": 0.4864691495895386, "learning_rate": 0.00035325719233722964, "loss": 1.823, "step": 39963 }, { "epoch": 1.33, "grad_norm": 0.4885339140892029, "learning_rate": 0.00035324690620486016, "loss": 1.7644, "step": 39964 }, { "epoch": 1.33, "grad_norm": 0.48059767484664917, "learning_rate": 0.0003532366200078568, "loss": 1.782, "step": 39965 }, { "epoch": 1.33, "grad_norm": 0.4935530126094818, "learning_rate": 0.00035322633374623204, "loss": 1.757, "step": 39966 }, { "epoch": 1.33, "grad_norm": 0.4721262753009796, "learning_rate": 0.0003532160474199983, "loss": 1.7441, "step": 39967 }, { "epoch": 1.33, "grad_norm": 0.5096825957298279, "learning_rate": 0.0003532057610291682, "loss": 1.7834, "step": 39968 }, { "epoch": 1.33, "grad_norm": 0.475079208612442, "learning_rate": 0.00035319547457375415, "loss": 1.7833, "step": 39969 }, { "epoch": 1.33, "grad_norm": 0.4904022216796875, "learning_rate": 0.0003531851880537686, "loss": 1.7553, "step": 39970 }, { "epoch": 1.33, "grad_norm": 0.8789231181144714, "learning_rate": 0.000353174901469224, "loss": 1.786, "step": 39971 }, { "epoch": 1.33, "grad_norm": 0.48663702607154846, "learning_rate": 0.000353164614820133, "loss": 1.7265, "step": 39972 }, { "epoch": 1.33, "grad_norm": 0.4963426887989044, "learning_rate": 0.00035315432810650795, "loss": 1.7804, "step": 39973 }, { "epoch": 1.33, "grad_norm": 0.48838871717453003, "learning_rate": 0.0003531440413283614, "loss": 1.7377, "step": 39974 }, { "epoch": 1.33, "grad_norm": 0.47846719622612, "learning_rate": 0.0003531337544857058, "loss": 1.7263, "step": 39975 }, { "epoch": 1.33, "grad_norm": 0.5031675696372986, "learning_rate": 0.0003531234675785537, "loss": 1.8328, "step": 39976 }, { "epoch": 1.33, "grad_norm": 0.5111094117164612, "learning_rate": 0.00035311318060691745, "loss": 1.7289, "step": 39977 }, { "epoch": 1.33, "grad_norm": 0.4825327694416046, "learning_rate": 0.0003531028935708096, "loss": 1.7711, "step": 39978 }, { "epoch": 1.33, "grad_norm": 0.4906229078769684, "learning_rate": 0.00035309260647024284, "loss": 1.8157, "step": 39979 }, { "epoch": 1.33, "grad_norm": 0.5048902034759521, "learning_rate": 0.00035308231930522924, "loss": 1.835, "step": 39980 }, { "epoch": 1.33, "grad_norm": 0.49240338802337646, "learning_rate": 0.0003530720320757817, "loss": 1.7272, "step": 39981 }, { "epoch": 1.33, "grad_norm": 0.5011911392211914, "learning_rate": 0.0003530617447819124, "loss": 1.8432, "step": 39982 }, { "epoch": 1.33, "grad_norm": 0.5132100582122803, "learning_rate": 0.0003530514574236341, "loss": 1.8012, "step": 39983 }, { "epoch": 1.33, "grad_norm": 0.5112112164497375, "learning_rate": 0.0003530411700009591, "loss": 1.7481, "step": 39984 }, { "epoch": 1.33, "grad_norm": 0.5234489440917969, "learning_rate": 0.00035303088251389996, "loss": 1.8407, "step": 39985 }, { "epoch": 1.33, "grad_norm": 0.5147984027862549, "learning_rate": 0.00035302059496246916, "loss": 1.7513, "step": 39986 }, { "epoch": 1.33, "grad_norm": 0.5100608468055725, "learning_rate": 0.000353010307346679, "loss": 1.8078, "step": 39987 }, { "epoch": 1.33, "grad_norm": 0.49153128266334534, "learning_rate": 0.00035300001966654234, "loss": 1.7523, "step": 39988 }, { "epoch": 1.33, "grad_norm": 0.550786554813385, "learning_rate": 0.0003529897319220713, "loss": 1.7624, "step": 39989 }, { "epoch": 1.33, "grad_norm": 0.48981809616088867, "learning_rate": 0.00035297944411327866, "loss": 1.7443, "step": 39990 }, { "epoch": 1.33, "grad_norm": 0.509004533290863, "learning_rate": 0.00035296915624017674, "loss": 1.7853, "step": 39991 }, { "epoch": 1.33, "grad_norm": 0.4823989272117615, "learning_rate": 0.000352958868302778, "loss": 1.7726, "step": 39992 }, { "epoch": 1.33, "grad_norm": 0.5040895938873291, "learning_rate": 0.0003529485803010951, "loss": 1.754, "step": 39993 }, { "epoch": 1.33, "grad_norm": 0.4838612973690033, "learning_rate": 0.00035293829223514034, "loss": 1.7211, "step": 39994 }, { "epoch": 1.33, "grad_norm": 0.49339261651039124, "learning_rate": 0.0003529280041049264, "loss": 1.7193, "step": 39995 }, { "epoch": 1.33, "grad_norm": 0.50282222032547, "learning_rate": 0.0003529177159104656, "loss": 1.761, "step": 39996 }, { "epoch": 1.33, "grad_norm": 0.48770037293434143, "learning_rate": 0.00035290742765177046, "loss": 1.7939, "step": 39997 }, { "epoch": 1.33, "grad_norm": 0.5029098391532898, "learning_rate": 0.0003528971393288535, "loss": 1.7614, "step": 39998 }, { "epoch": 1.33, "grad_norm": 0.4871724247932434, "learning_rate": 0.00035288685094172725, "loss": 1.7049, "step": 39999 }, { "epoch": 1.33, "grad_norm": 0.5096835494041443, "learning_rate": 0.0003528765624904041, "loss": 1.7613, "step": 40000 }, { "epoch": 1.33, "grad_norm": 0.4900929927825928, "learning_rate": 0.00035286627397489666, "loss": 1.7456, "step": 40001 }, { "epoch": 1.33, "grad_norm": 0.4884691834449768, "learning_rate": 0.0003528559853952173, "loss": 1.8121, "step": 40002 }, { "epoch": 1.33, "grad_norm": 0.4883221387863159, "learning_rate": 0.0003528456967513786, "loss": 1.792, "step": 40003 }, { "epoch": 1.33, "grad_norm": 0.51360023021698, "learning_rate": 0.00035283540804339304, "loss": 1.8368, "step": 40004 }, { "epoch": 1.33, "grad_norm": 0.49871134757995605, "learning_rate": 0.00035282511927127306, "loss": 1.7947, "step": 40005 }, { "epoch": 1.33, "grad_norm": 0.505986213684082, "learning_rate": 0.0003528148304350312, "loss": 1.7537, "step": 40006 }, { "epoch": 1.33, "grad_norm": 0.47648894786834717, "learning_rate": 0.00035280454153467975, "loss": 1.77, "step": 40007 }, { "epoch": 1.33, "grad_norm": 0.525332510471344, "learning_rate": 0.0003527942525702315, "loss": 1.8226, "step": 40008 }, { "epoch": 1.33, "grad_norm": 0.4831829369068146, "learning_rate": 0.0003527839635416988, "loss": 1.8127, "step": 40009 }, { "epoch": 1.33, "grad_norm": 0.4816608428955078, "learning_rate": 0.00035277367444909414, "loss": 1.7868, "step": 40010 }, { "epoch": 1.33, "grad_norm": 0.47881215810775757, "learning_rate": 0.0003527633852924301, "loss": 1.7746, "step": 40011 }, { "epoch": 1.33, "grad_norm": 0.4995531141757965, "learning_rate": 0.0003527530960717189, "loss": 1.7216, "step": 40012 }, { "epoch": 1.33, "grad_norm": 0.4793699085712433, "learning_rate": 0.00035274280678697335, "loss": 1.7514, "step": 40013 }, { "epoch": 1.33, "grad_norm": 0.5141376852989197, "learning_rate": 0.00035273251743820564, "loss": 1.7721, "step": 40014 }, { "epoch": 1.33, "grad_norm": 0.4974697232246399, "learning_rate": 0.00035272222802542865, "loss": 1.7157, "step": 40015 }, { "epoch": 1.33, "grad_norm": 0.5319435000419617, "learning_rate": 0.0003527119385486545, "loss": 1.8066, "step": 40016 }, { "epoch": 1.33, "grad_norm": 0.49916473031044006, "learning_rate": 0.00035270164900789584, "loss": 1.8195, "step": 40017 }, { "epoch": 1.33, "grad_norm": 0.47646650671958923, "learning_rate": 0.00035269135940316524, "loss": 1.7369, "step": 40018 }, { "epoch": 1.33, "grad_norm": 0.49786463379859924, "learning_rate": 0.00035268106973447496, "loss": 1.7481, "step": 40019 }, { "epoch": 1.33, "grad_norm": 0.500377357006073, "learning_rate": 0.0003526707800018378, "loss": 1.747, "step": 40020 }, { "epoch": 1.33, "grad_norm": 0.4804714620113373, "learning_rate": 0.0003526604902052659, "loss": 1.834, "step": 40021 }, { "epoch": 1.33, "grad_norm": 0.4874596893787384, "learning_rate": 0.00035265020034477195, "loss": 1.8263, "step": 40022 }, { "epoch": 1.33, "grad_norm": 0.48245343565940857, "learning_rate": 0.00035263991042036846, "loss": 1.8174, "step": 40023 }, { "epoch": 1.33, "grad_norm": 0.499595046043396, "learning_rate": 0.00035262962043206784, "loss": 1.7141, "step": 40024 }, { "epoch": 1.33, "grad_norm": 0.47570425271987915, "learning_rate": 0.00035261933037988266, "loss": 1.6839, "step": 40025 }, { "epoch": 1.33, "grad_norm": 0.47651344537734985, "learning_rate": 0.0003526090402638253, "loss": 1.7377, "step": 40026 }, { "epoch": 1.33, "grad_norm": 0.4798469841480255, "learning_rate": 0.00035259875008390837, "loss": 1.7577, "step": 40027 }, { "epoch": 1.33, "grad_norm": 0.5091038346290588, "learning_rate": 0.00035258845984014435, "loss": 1.7498, "step": 40028 }, { "epoch": 1.33, "grad_norm": 0.5052862167358398, "learning_rate": 0.00035257816953254554, "loss": 1.7833, "step": 40029 }, { "epoch": 1.33, "grad_norm": 0.4848497211933136, "learning_rate": 0.0003525678791611247, "loss": 1.7977, "step": 40030 }, { "epoch": 1.33, "grad_norm": 0.4967484772205353, "learning_rate": 0.0003525575887258942, "loss": 1.735, "step": 40031 }, { "epoch": 1.33, "grad_norm": 0.4878932237625122, "learning_rate": 0.00035254729822686647, "loss": 1.8084, "step": 40032 }, { "epoch": 1.33, "grad_norm": 0.5020685195922852, "learning_rate": 0.00035253700766405406, "loss": 1.8251, "step": 40033 }, { "epoch": 1.33, "grad_norm": 0.5022904276847839, "learning_rate": 0.0003525267170374695, "loss": 1.7446, "step": 40034 }, { "epoch": 1.33, "grad_norm": 0.49953117966651917, "learning_rate": 0.0003525164263471253, "loss": 1.8031, "step": 40035 }, { "epoch": 1.33, "grad_norm": 0.5141457319259644, "learning_rate": 0.0003525061355930339, "loss": 1.7176, "step": 40036 }, { "epoch": 1.33, "grad_norm": 0.5035608410835266, "learning_rate": 0.0003524958447752076, "loss": 1.8596, "step": 40037 }, { "epoch": 1.33, "grad_norm": 0.482083797454834, "learning_rate": 0.0003524855538936593, "loss": 1.8691, "step": 40038 }, { "epoch": 1.33, "grad_norm": 0.4883783161640167, "learning_rate": 0.00035247526294840116, "loss": 1.834, "step": 40039 }, { "epoch": 1.33, "grad_norm": 0.49605828523635864, "learning_rate": 0.0003524649719394458, "loss": 1.7252, "step": 40040 }, { "epoch": 1.33, "grad_norm": 0.48991644382476807, "learning_rate": 0.00035245468086680574, "loss": 1.7366, "step": 40041 }, { "epoch": 1.33, "grad_norm": 0.5036652088165283, "learning_rate": 0.0003524443897304933, "loss": 1.7102, "step": 40042 }, { "epoch": 1.33, "grad_norm": 0.5025627613067627, "learning_rate": 0.00035243409853052125, "loss": 1.7326, "step": 40043 }, { "epoch": 1.33, "grad_norm": 0.4996442496776581, "learning_rate": 0.0003524238072669018, "loss": 1.817, "step": 40044 }, { "epoch": 1.33, "grad_norm": 0.4914133548736572, "learning_rate": 0.0003524135159396477, "loss": 1.7885, "step": 40045 }, { "epoch": 1.33, "grad_norm": 0.4979715049266815, "learning_rate": 0.00035240322454877117, "loss": 1.7675, "step": 40046 }, { "epoch": 1.33, "grad_norm": 0.502286970615387, "learning_rate": 0.000352392933094285, "loss": 1.8272, "step": 40047 }, { "epoch": 1.33, "grad_norm": 0.47939154505729675, "learning_rate": 0.0003523826415762014, "loss": 1.7292, "step": 40048 }, { "epoch": 1.33, "grad_norm": 0.4880326986312866, "learning_rate": 0.000352372349994533, "loss": 1.8354, "step": 40049 }, { "epoch": 1.33, "grad_norm": 0.48315292596817017, "learning_rate": 0.0003523620583492924, "loss": 1.7721, "step": 40050 }, { "epoch": 1.33, "grad_norm": 0.5094877481460571, "learning_rate": 0.0003523517666404919, "loss": 1.7387, "step": 40051 }, { "epoch": 1.33, "grad_norm": 0.5018652081489563, "learning_rate": 0.0003523414748681441, "loss": 1.7848, "step": 40052 }, { "epoch": 1.33, "grad_norm": 0.4951467216014862, "learning_rate": 0.00035233118303226144, "loss": 1.715, "step": 40053 }, { "epoch": 1.33, "grad_norm": 0.4871463179588318, "learning_rate": 0.00035232089113285635, "loss": 1.8458, "step": 40054 }, { "epoch": 1.33, "grad_norm": 0.48517918586730957, "learning_rate": 0.0003523105991699415, "loss": 1.8283, "step": 40055 }, { "epoch": 1.33, "grad_norm": 0.5012460947036743, "learning_rate": 0.00035230030714352936, "loss": 1.7571, "step": 40056 }, { "epoch": 1.33, "grad_norm": 0.5004675388336182, "learning_rate": 0.0003522900150536322, "loss": 1.7865, "step": 40057 }, { "epoch": 1.33, "grad_norm": 0.4856612980365753, "learning_rate": 0.0003522797229002628, "loss": 1.7508, "step": 40058 }, { "epoch": 1.33, "grad_norm": 0.48698604106903076, "learning_rate": 0.00035226943068343336, "loss": 1.6719, "step": 40059 }, { "epoch": 1.33, "grad_norm": 0.5029134154319763, "learning_rate": 0.00035225913840315667, "loss": 1.8093, "step": 40060 }, { "epoch": 1.33, "grad_norm": 0.4936564564704895, "learning_rate": 0.0003522488460594451, "loss": 1.7175, "step": 40061 }, { "epoch": 1.33, "grad_norm": 0.49997177720069885, "learning_rate": 0.000352238553652311, "loss": 1.7022, "step": 40062 }, { "epoch": 1.33, "grad_norm": 0.5108849406242371, "learning_rate": 0.00035222826118176713, "loss": 1.841, "step": 40063 }, { "epoch": 1.33, "grad_norm": 0.48570606112480164, "learning_rate": 0.00035221796864782573, "loss": 1.8271, "step": 40064 }, { "epoch": 1.33, "grad_norm": 0.49851521849632263, "learning_rate": 0.00035220767605049943, "loss": 1.7446, "step": 40065 }, { "epoch": 1.33, "grad_norm": 0.4973193109035492, "learning_rate": 0.0003521973833898008, "loss": 1.7595, "step": 40066 }, { "epoch": 1.33, "grad_norm": 0.4790927767753601, "learning_rate": 0.0003521870906657421, "loss": 1.8249, "step": 40067 }, { "epoch": 1.33, "grad_norm": 0.49377748370170593, "learning_rate": 0.00035217679787833615, "loss": 1.8434, "step": 40068 }, { "epoch": 1.33, "grad_norm": 0.4838036000728607, "learning_rate": 0.00035216650502759505, "loss": 1.8438, "step": 40069 }, { "epoch": 1.33, "grad_norm": 0.4754692614078522, "learning_rate": 0.00035215621211353166, "loss": 1.8118, "step": 40070 }, { "epoch": 1.33, "grad_norm": 0.4949445128440857, "learning_rate": 0.00035214591913615824, "loss": 1.8419, "step": 40071 }, { "epoch": 1.33, "grad_norm": 0.4972621500492096, "learning_rate": 0.0003521356260954873, "loss": 1.8196, "step": 40072 }, { "epoch": 1.33, "grad_norm": 0.5212565064430237, "learning_rate": 0.00035212533299153155, "loss": 1.8186, "step": 40073 }, { "epoch": 1.33, "grad_norm": 0.4994068145751953, "learning_rate": 0.0003521150398243031, "loss": 1.787, "step": 40074 }, { "epoch": 1.33, "grad_norm": 0.5008447170257568, "learning_rate": 0.0003521047465938149, "loss": 1.8093, "step": 40075 }, { "epoch": 1.33, "grad_norm": 0.9542210698127747, "learning_rate": 0.0003520944533000791, "loss": 1.8504, "step": 40076 }, { "epoch": 1.33, "grad_norm": 0.49170613288879395, "learning_rate": 0.00035208415994310836, "loss": 1.7746, "step": 40077 }, { "epoch": 1.33, "grad_norm": 0.5017494559288025, "learning_rate": 0.000352073866522915, "loss": 1.8286, "step": 40078 }, { "epoch": 1.33, "grad_norm": 0.5065539479255676, "learning_rate": 0.00035206357303951174, "loss": 1.7872, "step": 40079 }, { "epoch": 1.33, "grad_norm": 0.5028385519981384, "learning_rate": 0.000352053279492911, "loss": 1.8341, "step": 40080 }, { "epoch": 1.33, "grad_norm": 0.4997122287750244, "learning_rate": 0.00035204298588312525, "loss": 1.8625, "step": 40081 }, { "epoch": 1.33, "grad_norm": 0.47126686573028564, "learning_rate": 0.0003520326922101669, "loss": 1.8028, "step": 40082 }, { "epoch": 1.33, "grad_norm": 0.5404366850852966, "learning_rate": 0.00035202239847404853, "loss": 1.8207, "step": 40083 }, { "epoch": 1.33, "grad_norm": 0.5704091787338257, "learning_rate": 0.0003520121046747827, "loss": 1.7607, "step": 40084 }, { "epoch": 1.33, "grad_norm": 0.5117236375808716, "learning_rate": 0.0003520018108123818, "loss": 1.751, "step": 40085 }, { "epoch": 1.33, "grad_norm": 0.48107364773750305, "learning_rate": 0.0003519915168868584, "loss": 1.8303, "step": 40086 }, { "epoch": 1.33, "grad_norm": 0.4970668852329254, "learning_rate": 0.0003519812228982249, "loss": 1.7358, "step": 40087 }, { "epoch": 1.33, "grad_norm": 0.4896090030670166, "learning_rate": 0.000351970928846494, "loss": 1.7375, "step": 40088 }, { "epoch": 1.33, "grad_norm": 0.48242226243019104, "learning_rate": 0.0003519606347316779, "loss": 1.7815, "step": 40089 }, { "epoch": 1.33, "grad_norm": 0.4888678193092346, "learning_rate": 0.00035195034055378923, "loss": 1.7232, "step": 40090 }, { "epoch": 1.33, "grad_norm": 0.5068895816802979, "learning_rate": 0.00035194004631284063, "loss": 1.8426, "step": 40091 }, { "epoch": 1.33, "grad_norm": 0.48883056640625, "learning_rate": 0.0003519297520088443, "loss": 1.741, "step": 40092 }, { "epoch": 1.33, "grad_norm": 0.4825069308280945, "learning_rate": 0.0003519194576418131, "loss": 1.7512, "step": 40093 }, { "epoch": 1.33, "grad_norm": 0.5018754601478577, "learning_rate": 0.0003519091632117591, "loss": 1.7694, "step": 40094 }, { "epoch": 1.33, "grad_norm": 0.5249441266059875, "learning_rate": 0.0003518988687186952, "loss": 1.805, "step": 40095 }, { "epoch": 1.33, "grad_norm": 0.49478673934936523, "learning_rate": 0.00035188857416263374, "loss": 1.7792, "step": 40096 }, { "epoch": 1.33, "grad_norm": 0.48750370740890503, "learning_rate": 0.00035187827954358703, "loss": 1.7456, "step": 40097 }, { "epoch": 1.33, "grad_norm": 0.4760039448738098, "learning_rate": 0.00035186798486156785, "loss": 1.7828, "step": 40098 }, { "epoch": 1.33, "grad_norm": 0.5074867010116577, "learning_rate": 0.0003518576901165885, "loss": 1.7568, "step": 40099 }, { "epoch": 1.33, "grad_norm": 0.4942489564418793, "learning_rate": 0.0003518473953086617, "loss": 1.857, "step": 40100 }, { "epoch": 1.33, "grad_norm": 0.48297008872032166, "learning_rate": 0.0003518371004377997, "loss": 1.7577, "step": 40101 }, { "epoch": 1.33, "grad_norm": 0.495941162109375, "learning_rate": 0.0003518268055040151, "loss": 1.7557, "step": 40102 }, { "epoch": 1.33, "grad_norm": 0.5415220856666565, "learning_rate": 0.00035181651050732047, "loss": 1.8377, "step": 40103 }, { "epoch": 1.33, "grad_norm": 0.48930907249450684, "learning_rate": 0.0003518062154477281, "loss": 1.792, "step": 40104 }, { "epoch": 1.33, "grad_norm": 0.5033412575721741, "learning_rate": 0.0003517959203252507, "loss": 1.7842, "step": 40105 }, { "epoch": 1.33, "grad_norm": 0.49742648005485535, "learning_rate": 0.0003517856251399007, "loss": 1.8677, "step": 40106 }, { "epoch": 1.33, "grad_norm": 0.5023443102836609, "learning_rate": 0.00035177532989169054, "loss": 1.7689, "step": 40107 }, { "epoch": 1.33, "grad_norm": 0.47651857137680054, "learning_rate": 0.0003517650345806327, "loss": 1.8204, "step": 40108 }, { "epoch": 1.33, "grad_norm": 0.4983668625354767, "learning_rate": 0.00035175473920673984, "loss": 1.6912, "step": 40109 }, { "epoch": 1.33, "grad_norm": 0.5158834457397461, "learning_rate": 0.0003517444437700243, "loss": 1.8669, "step": 40110 }, { "epoch": 1.33, "grad_norm": 0.4928983151912689, "learning_rate": 0.00035173414827049867, "loss": 1.8369, "step": 40111 }, { "epoch": 1.33, "grad_norm": 0.4842159152030945, "learning_rate": 0.00035172385270817536, "loss": 1.7935, "step": 40112 }, { "epoch": 1.33, "grad_norm": 0.5006246566772461, "learning_rate": 0.000351713557083067, "loss": 1.9005, "step": 40113 }, { "epoch": 1.33, "grad_norm": 0.5011260509490967, "learning_rate": 0.00035170326139518586, "loss": 1.796, "step": 40114 }, { "epoch": 1.33, "grad_norm": 0.4868663251399994, "learning_rate": 0.00035169296564454465, "loss": 1.7597, "step": 40115 }, { "epoch": 1.33, "grad_norm": 0.510621964931488, "learning_rate": 0.00035168266983115584, "loss": 1.7901, "step": 40116 }, { "epoch": 1.33, "grad_norm": 0.506190299987793, "learning_rate": 0.00035167237395503185, "loss": 1.7477, "step": 40117 }, { "epoch": 1.33, "grad_norm": 0.5140582323074341, "learning_rate": 0.0003516620780161852, "loss": 1.743, "step": 40118 }, { "epoch": 1.33, "grad_norm": 0.48917004466056824, "learning_rate": 0.0003516517820146284, "loss": 1.815, "step": 40119 }, { "epoch": 1.33, "grad_norm": 0.4925476908683777, "learning_rate": 0.00035164148595037394, "loss": 1.685, "step": 40120 }, { "epoch": 1.33, "grad_norm": 0.47724276781082153, "learning_rate": 0.0003516311898234343, "loss": 1.7608, "step": 40121 }, { "epoch": 1.33, "grad_norm": 0.5040413737297058, "learning_rate": 0.0003516208936338221, "loss": 1.7819, "step": 40122 }, { "epoch": 1.33, "grad_norm": 0.4765579402446747, "learning_rate": 0.0003516105973815497, "loss": 1.704, "step": 40123 }, { "epoch": 1.33, "grad_norm": 0.47587889432907104, "learning_rate": 0.00035160030106662956, "loss": 1.7614, "step": 40124 }, { "epoch": 1.33, "grad_norm": 0.4918326735496521, "learning_rate": 0.00035159000468907436, "loss": 1.8512, "step": 40125 }, { "epoch": 1.33, "grad_norm": 0.5339231491088867, "learning_rate": 0.00035157970824889646, "loss": 1.7621, "step": 40126 }, { "epoch": 1.34, "grad_norm": 0.4998083710670471, "learning_rate": 0.0003515694117461083, "loss": 1.7247, "step": 40127 }, { "epoch": 1.34, "grad_norm": 0.502303957939148, "learning_rate": 0.0003515591151807227, "loss": 1.7553, "step": 40128 }, { "epoch": 1.34, "grad_norm": 0.5067936182022095, "learning_rate": 0.0003515488185527517, "loss": 1.7548, "step": 40129 }, { "epoch": 1.34, "grad_norm": 0.5006582140922546, "learning_rate": 0.0003515385218622082, "loss": 1.7556, "step": 40130 }, { "epoch": 1.34, "grad_norm": 0.4937787353992462, "learning_rate": 0.00035152822510910436, "loss": 1.7589, "step": 40131 }, { "epoch": 1.34, "grad_norm": 0.4950370490550995, "learning_rate": 0.000351517928293453, "loss": 1.7932, "step": 40132 }, { "epoch": 1.34, "grad_norm": 0.4945678114891052, "learning_rate": 0.0003515076314152664, "loss": 1.7295, "step": 40133 }, { "epoch": 1.34, "grad_norm": 0.5141252279281616, "learning_rate": 0.00035149733447455714, "loss": 1.8179, "step": 40134 }, { "epoch": 1.34, "grad_norm": 0.49458780884742737, "learning_rate": 0.0003514870374713377, "loss": 1.8647, "step": 40135 }, { "epoch": 1.34, "grad_norm": 0.4920629560947418, "learning_rate": 0.00035147674040562053, "loss": 1.8216, "step": 40136 }, { "epoch": 1.34, "grad_norm": 0.5058911442756653, "learning_rate": 0.00035146644327741823, "loss": 1.7873, "step": 40137 }, { "epoch": 1.34, "grad_norm": 0.5028183460235596, "learning_rate": 0.00035145614608674333, "loss": 1.7727, "step": 40138 }, { "epoch": 1.34, "grad_norm": 0.5015896558761597, "learning_rate": 0.00035144584883360815, "loss": 1.7908, "step": 40139 }, { "epoch": 1.34, "grad_norm": 0.49795839190483093, "learning_rate": 0.00035143555151802524, "loss": 1.7786, "step": 40140 }, { "epoch": 1.34, "grad_norm": 0.4976569414138794, "learning_rate": 0.00035142525414000726, "loss": 1.7717, "step": 40141 }, { "epoch": 1.34, "grad_norm": 0.48344579339027405, "learning_rate": 0.00035141495669956655, "loss": 1.8338, "step": 40142 }, { "epoch": 1.34, "grad_norm": 0.505058228969574, "learning_rate": 0.0003514046591967157, "loss": 1.7352, "step": 40143 }, { "epoch": 1.34, "grad_norm": 0.4895343482494354, "learning_rate": 0.0003513943616314671, "loss": 1.7166, "step": 40144 }, { "epoch": 1.34, "grad_norm": 0.4867846667766571, "learning_rate": 0.00035138406400383337, "loss": 1.7951, "step": 40145 }, { "epoch": 1.34, "grad_norm": 0.48975855112075806, "learning_rate": 0.00035137376631382694, "loss": 1.7094, "step": 40146 }, { "epoch": 1.34, "grad_norm": 0.4740065038204193, "learning_rate": 0.0003513634685614603, "loss": 1.7942, "step": 40147 }, { "epoch": 1.34, "grad_norm": 0.49748364090919495, "learning_rate": 0.000351353170746746, "loss": 1.8339, "step": 40148 }, { "epoch": 1.34, "grad_norm": 0.5020853877067566, "learning_rate": 0.00035134287286969654, "loss": 1.8438, "step": 40149 }, { "epoch": 1.34, "grad_norm": 0.49228039383888245, "learning_rate": 0.0003513325749303243, "loss": 1.7844, "step": 40150 }, { "epoch": 1.34, "grad_norm": 0.49330708384513855, "learning_rate": 0.000351322276928642, "loss": 1.74, "step": 40151 }, { "epoch": 1.34, "grad_norm": 0.5275112986564636, "learning_rate": 0.000351311978864662, "loss": 1.8015, "step": 40152 }, { "epoch": 1.34, "grad_norm": 0.4977792501449585, "learning_rate": 0.00035130168073839685, "loss": 1.8434, "step": 40153 }, { "epoch": 1.34, "grad_norm": 0.4802376925945282, "learning_rate": 0.00035129138254985886, "loss": 1.7717, "step": 40154 }, { "epoch": 1.34, "grad_norm": 0.49257779121398926, "learning_rate": 0.00035128108429906084, "loss": 1.7508, "step": 40155 }, { "epoch": 1.34, "grad_norm": 0.5102565288543701, "learning_rate": 0.00035127078598601503, "loss": 1.7777, "step": 40156 }, { "epoch": 1.34, "grad_norm": 0.49914073944091797, "learning_rate": 0.0003512604876107341, "loss": 1.8249, "step": 40157 }, { "epoch": 1.34, "grad_norm": 0.4984321594238281, "learning_rate": 0.0003512501891732305, "loss": 1.8437, "step": 40158 }, { "epoch": 1.34, "grad_norm": 0.497506320476532, "learning_rate": 0.0003512398906735167, "loss": 1.729, "step": 40159 }, { "epoch": 1.34, "grad_norm": 0.5059147477149963, "learning_rate": 0.00035122959211160524, "loss": 1.7493, "step": 40160 }, { "epoch": 1.34, "grad_norm": 0.47231897711753845, "learning_rate": 0.00035121929348750855, "loss": 1.7854, "step": 40161 }, { "epoch": 1.34, "grad_norm": 0.4953874945640564, "learning_rate": 0.0003512089948012393, "loss": 1.6837, "step": 40162 }, { "epoch": 1.34, "grad_norm": 0.48122143745422363, "learning_rate": 0.0003511986960528098, "loss": 1.8073, "step": 40163 }, { "epoch": 1.34, "grad_norm": 0.5007850527763367, "learning_rate": 0.0003511883972422326, "loss": 1.822, "step": 40164 }, { "epoch": 1.34, "grad_norm": 0.48817917704582214, "learning_rate": 0.00035117809836952027, "loss": 1.7829, "step": 40165 }, { "epoch": 1.34, "grad_norm": 0.48237577080726624, "learning_rate": 0.0003511677994346852, "loss": 1.7726, "step": 40166 }, { "epoch": 1.34, "grad_norm": 0.5086659789085388, "learning_rate": 0.0003511575004377401, "loss": 1.8156, "step": 40167 }, { "epoch": 1.34, "grad_norm": 0.49429211020469666, "learning_rate": 0.0003511472013786973, "loss": 1.7007, "step": 40168 }, { "epoch": 1.34, "grad_norm": 0.518326997756958, "learning_rate": 0.0003511369022575692, "loss": 1.8146, "step": 40169 }, { "epoch": 1.34, "grad_norm": 0.5033499598503113, "learning_rate": 0.00035112660307436855, "loss": 1.7813, "step": 40170 }, { "epoch": 1.34, "grad_norm": 0.5059100389480591, "learning_rate": 0.0003511163038291077, "loss": 1.7824, "step": 40171 }, { "epoch": 1.34, "grad_norm": 0.49709072709083557, "learning_rate": 0.0003511060045217992, "loss": 1.8497, "step": 40172 }, { "epoch": 1.34, "grad_norm": 0.4869270920753479, "learning_rate": 0.0003510957051524556, "loss": 1.7671, "step": 40173 }, { "epoch": 1.34, "grad_norm": 0.4959927499294281, "learning_rate": 0.0003510854057210892, "loss": 1.7669, "step": 40174 }, { "epoch": 1.34, "grad_norm": 0.49677106738090515, "learning_rate": 0.0003510751062277128, "loss": 1.8113, "step": 40175 }, { "epoch": 1.34, "grad_norm": 0.517728328704834, "learning_rate": 0.0003510648066723386, "loss": 1.8003, "step": 40176 }, { "epoch": 1.34, "grad_norm": 0.49748170375823975, "learning_rate": 0.0003510545070549794, "loss": 1.7652, "step": 40177 }, { "epoch": 1.34, "grad_norm": 0.4991872310638428, "learning_rate": 0.0003510442073756475, "loss": 1.7115, "step": 40178 }, { "epoch": 1.34, "grad_norm": 0.5001476407051086, "learning_rate": 0.0003510339076343553, "loss": 1.7795, "step": 40179 }, { "epoch": 1.34, "grad_norm": 0.49945446848869324, "learning_rate": 0.0003510236078311157, "loss": 1.7983, "step": 40180 }, { "epoch": 1.34, "grad_norm": 0.496570348739624, "learning_rate": 0.0003510133079659408, "loss": 1.7037, "step": 40181 }, { "epoch": 1.34, "grad_norm": 0.5099709033966064, "learning_rate": 0.0003510030080388433, "loss": 1.7851, "step": 40182 }, { "epoch": 1.34, "grad_norm": 0.5138335824012756, "learning_rate": 0.0003509927080498357, "loss": 1.761, "step": 40183 }, { "epoch": 1.34, "grad_norm": 0.48241308331489563, "learning_rate": 0.00035098240799893033, "loss": 1.83, "step": 40184 }, { "epoch": 1.34, "grad_norm": 0.4881090819835663, "learning_rate": 0.00035097210788614, "loss": 1.7481, "step": 40185 }, { "epoch": 1.34, "grad_norm": 0.49772295355796814, "learning_rate": 0.00035096180771147686, "loss": 1.7348, "step": 40186 }, { "epoch": 1.34, "grad_norm": 0.5210756063461304, "learning_rate": 0.00035095150747495383, "loss": 1.7554, "step": 40187 }, { "epoch": 1.34, "grad_norm": 0.5107855200767517, "learning_rate": 0.000350941207176583, "loss": 1.8018, "step": 40188 }, { "epoch": 1.34, "grad_norm": 0.513518214225769, "learning_rate": 0.0003509309068163771, "loss": 1.7866, "step": 40189 }, { "epoch": 1.34, "grad_norm": 0.5236921310424805, "learning_rate": 0.00035092060639434864, "loss": 1.7838, "step": 40190 }, { "epoch": 1.34, "grad_norm": 0.5156307220458984, "learning_rate": 0.0003509103059105099, "loss": 1.7982, "step": 40191 }, { "epoch": 1.34, "grad_norm": 0.5129013657569885, "learning_rate": 0.0003509000053648737, "loss": 1.7719, "step": 40192 }, { "epoch": 1.34, "grad_norm": 0.4916897714138031, "learning_rate": 0.00035088970475745233, "loss": 1.798, "step": 40193 }, { "epoch": 1.34, "grad_norm": 0.5049890875816345, "learning_rate": 0.0003508794040882584, "loss": 1.7541, "step": 40194 }, { "epoch": 1.34, "grad_norm": 0.5206820368766785, "learning_rate": 0.00035086910335730434, "loss": 1.7972, "step": 40195 }, { "epoch": 1.34, "grad_norm": 0.5084918737411499, "learning_rate": 0.00035085880256460273, "loss": 1.7905, "step": 40196 }, { "epoch": 1.34, "grad_norm": 0.49918946623802185, "learning_rate": 0.00035084850171016605, "loss": 1.7674, "step": 40197 }, { "epoch": 1.34, "grad_norm": 0.4734148383140564, "learning_rate": 0.0003508382007940067, "loss": 1.7723, "step": 40198 }, { "epoch": 1.34, "grad_norm": 0.490455687046051, "learning_rate": 0.0003508278998161373, "loss": 1.7232, "step": 40199 }, { "epoch": 1.34, "grad_norm": 0.5218744874000549, "learning_rate": 0.0003508175987765703, "loss": 1.6926, "step": 40200 }, { "epoch": 1.34, "grad_norm": 0.5005772709846497, "learning_rate": 0.00035080729767531817, "loss": 1.79, "step": 40201 }, { "epoch": 1.34, "grad_norm": 0.4841243624687195, "learning_rate": 0.0003507969965123935, "loss": 1.7354, "step": 40202 }, { "epoch": 1.34, "grad_norm": 0.48943212628364563, "learning_rate": 0.0003507866952878089, "loss": 1.7413, "step": 40203 }, { "epoch": 1.34, "grad_norm": 0.49490565061569214, "learning_rate": 0.00035077639400157655, "loss": 1.7914, "step": 40204 }, { "epoch": 1.34, "grad_norm": 0.494878351688385, "learning_rate": 0.00035076609265370927, "loss": 1.8107, "step": 40205 }, { "epoch": 1.34, "grad_norm": 0.4787989556789398, "learning_rate": 0.0003507557912442193, "loss": 1.6982, "step": 40206 }, { "epoch": 1.34, "grad_norm": 0.49561288952827454, "learning_rate": 0.0003507454897731193, "loss": 1.7559, "step": 40207 }, { "epoch": 1.34, "grad_norm": 0.49002036452293396, "learning_rate": 0.0003507351882404219, "loss": 1.7732, "step": 40208 }, { "epoch": 1.34, "grad_norm": 0.5079430937767029, "learning_rate": 0.00035072488664613933, "loss": 1.75, "step": 40209 }, { "epoch": 1.34, "grad_norm": 0.4882737994194031, "learning_rate": 0.0003507145849902843, "loss": 1.7697, "step": 40210 }, { "epoch": 1.34, "grad_norm": 0.483102411031723, "learning_rate": 0.00035070428327286916, "loss": 1.8148, "step": 40211 }, { "epoch": 1.34, "grad_norm": 0.508386492729187, "learning_rate": 0.0003506939814939065, "loss": 1.7883, "step": 40212 }, { "epoch": 1.34, "grad_norm": 0.49479031562805176, "learning_rate": 0.0003506836796534089, "loss": 1.829, "step": 40213 }, { "epoch": 1.34, "grad_norm": 0.48092809319496155, "learning_rate": 0.0003506733777513886, "loss": 1.761, "step": 40214 }, { "epoch": 1.34, "grad_norm": 0.4930579960346222, "learning_rate": 0.00035066307578785855, "loss": 1.7472, "step": 40215 }, { "epoch": 1.34, "grad_norm": 0.4886397421360016, "learning_rate": 0.00035065277376283073, "loss": 1.7281, "step": 40216 }, { "epoch": 1.34, "grad_norm": 0.48341605067253113, "learning_rate": 0.0003506424716763181, "loss": 1.7871, "step": 40217 }, { "epoch": 1.34, "grad_norm": 0.525264322757721, "learning_rate": 0.00035063216952833286, "loss": 1.8079, "step": 40218 }, { "epoch": 1.34, "grad_norm": 0.5038365125656128, "learning_rate": 0.0003506218673188876, "loss": 1.809, "step": 40219 }, { "epoch": 1.34, "grad_norm": 0.47403180599212646, "learning_rate": 0.0003506115650479949, "loss": 1.7636, "step": 40220 }, { "epoch": 1.34, "grad_norm": 0.5009422302246094, "learning_rate": 0.00035060126271566723, "loss": 1.8213, "step": 40221 }, { "epoch": 1.34, "grad_norm": 0.49697044491767883, "learning_rate": 0.0003505909603219171, "loss": 1.7643, "step": 40222 }, { "epoch": 1.34, "grad_norm": 0.4950009286403656, "learning_rate": 0.000350580657866757, "loss": 1.8014, "step": 40223 }, { "epoch": 1.34, "grad_norm": 0.4935376048088074, "learning_rate": 0.0003505703553501994, "loss": 1.8392, "step": 40224 }, { "epoch": 1.34, "grad_norm": 0.48584437370300293, "learning_rate": 0.0003505600527722568, "loss": 1.7161, "step": 40225 }, { "epoch": 1.34, "grad_norm": 0.4810345768928528, "learning_rate": 0.00035054975013294184, "loss": 1.7947, "step": 40226 }, { "epoch": 1.34, "grad_norm": 0.4986794888973236, "learning_rate": 0.0003505394474322669, "loss": 1.8816, "step": 40227 }, { "epoch": 1.34, "grad_norm": 0.5056753158569336, "learning_rate": 0.0003505291446702445, "loss": 1.8102, "step": 40228 }, { "epoch": 1.34, "grad_norm": 0.49690359830856323, "learning_rate": 0.0003505188418468872, "loss": 1.7599, "step": 40229 }, { "epoch": 1.34, "grad_norm": 0.4901164472103119, "learning_rate": 0.00035050853896220746, "loss": 1.7927, "step": 40230 }, { "epoch": 1.34, "grad_norm": 0.48205018043518066, "learning_rate": 0.0003504982360162178, "loss": 1.7772, "step": 40231 }, { "epoch": 1.34, "grad_norm": 0.49660825729370117, "learning_rate": 0.0003504879330089306, "loss": 1.7495, "step": 40232 }, { "epoch": 1.34, "grad_norm": 0.5021716356277466, "learning_rate": 0.0003504776299403586, "loss": 1.7619, "step": 40233 }, { "epoch": 1.34, "grad_norm": 0.4841150939464569, "learning_rate": 0.0003504673268105142, "loss": 1.788, "step": 40234 }, { "epoch": 1.34, "grad_norm": 0.5020822882652283, "learning_rate": 0.0003504570236194099, "loss": 1.8239, "step": 40235 }, { "epoch": 1.34, "grad_norm": 0.5035467743873596, "learning_rate": 0.0003504467203670582, "loss": 1.7347, "step": 40236 }, { "epoch": 1.34, "grad_norm": 0.480244517326355, "learning_rate": 0.0003504364170534716, "loss": 1.7625, "step": 40237 }, { "epoch": 1.34, "grad_norm": 0.49899688363075256, "learning_rate": 0.0003504261136786626, "loss": 1.7556, "step": 40238 }, { "epoch": 1.34, "grad_norm": 0.5230517983436584, "learning_rate": 0.0003504158102426438, "loss": 1.7899, "step": 40239 }, { "epoch": 1.34, "grad_norm": 0.4940120577812195, "learning_rate": 0.0003504055067454276, "loss": 1.7428, "step": 40240 }, { "epoch": 1.34, "grad_norm": 0.4928273856639862, "learning_rate": 0.00035039520318702643, "loss": 1.7418, "step": 40241 }, { "epoch": 1.34, "grad_norm": 0.49274906516075134, "learning_rate": 0.00035038489956745305, "loss": 1.7695, "step": 40242 }, { "epoch": 1.34, "grad_norm": 0.4748836159706116, "learning_rate": 0.0003503745958867198, "loss": 1.7236, "step": 40243 }, { "epoch": 1.34, "grad_norm": 0.4950116276741028, "learning_rate": 0.00035036429214483917, "loss": 1.8409, "step": 40244 }, { "epoch": 1.34, "grad_norm": 0.48337236046791077, "learning_rate": 0.0003503539883418238, "loss": 1.7594, "step": 40245 }, { "epoch": 1.34, "grad_norm": 0.4954582750797272, "learning_rate": 0.000350343684477686, "loss": 1.8428, "step": 40246 }, { "epoch": 1.34, "grad_norm": 0.5082131028175354, "learning_rate": 0.00035033338055243845, "loss": 1.8333, "step": 40247 }, { "epoch": 1.34, "grad_norm": 0.5095365643501282, "learning_rate": 0.0003503230765660935, "loss": 1.7829, "step": 40248 }, { "epoch": 1.34, "grad_norm": 0.4737056791782379, "learning_rate": 0.00035031277251866387, "loss": 1.806, "step": 40249 }, { "epoch": 1.34, "grad_norm": 0.4832957983016968, "learning_rate": 0.0003503024684101619, "loss": 1.7094, "step": 40250 }, { "epoch": 1.34, "grad_norm": 0.4847280979156494, "learning_rate": 0.0003502921642406001, "loss": 1.7506, "step": 40251 }, { "epoch": 1.34, "grad_norm": 0.49783962965011597, "learning_rate": 0.00035028186000999115, "loss": 1.7997, "step": 40252 }, { "epoch": 1.34, "grad_norm": 0.5044241547584534, "learning_rate": 0.00035027155571834724, "loss": 1.7957, "step": 40253 }, { "epoch": 1.34, "grad_norm": 0.48928001523017883, "learning_rate": 0.00035026125136568124, "loss": 1.8129, "step": 40254 }, { "epoch": 1.34, "grad_norm": 0.4870656132698059, "learning_rate": 0.00035025094695200545, "loss": 1.742, "step": 40255 }, { "epoch": 1.34, "grad_norm": 0.4942222237586975, "learning_rate": 0.0003502406424773324, "loss": 1.7538, "step": 40256 }, { "epoch": 1.34, "grad_norm": 0.4742738902568817, "learning_rate": 0.0003502303379416745, "loss": 1.7908, "step": 40257 }, { "epoch": 1.34, "grad_norm": 0.49897098541259766, "learning_rate": 0.0003502200333450445, "loss": 1.761, "step": 40258 }, { "epoch": 1.34, "grad_norm": 0.49742457270622253, "learning_rate": 0.0003502097286874548, "loss": 1.7382, "step": 40259 }, { "epoch": 1.34, "grad_norm": 0.4974348247051239, "learning_rate": 0.00035019942396891785, "loss": 1.7861, "step": 40260 }, { "epoch": 1.34, "grad_norm": 0.4774673581123352, "learning_rate": 0.0003501891191894461, "loss": 1.7162, "step": 40261 }, { "epoch": 1.34, "grad_norm": 0.49460169672966003, "learning_rate": 0.0003501788143490523, "loss": 1.7301, "step": 40262 }, { "epoch": 1.34, "grad_norm": 0.48996663093566895, "learning_rate": 0.00035016850944774873, "loss": 1.7519, "step": 40263 }, { "epoch": 1.34, "grad_norm": 0.489000141620636, "learning_rate": 0.000350158204485548, "loss": 1.7052, "step": 40264 }, { "epoch": 1.34, "grad_norm": 0.49627968668937683, "learning_rate": 0.00035014789946246266, "loss": 1.8392, "step": 40265 }, { "epoch": 1.34, "grad_norm": 0.49212199449539185, "learning_rate": 0.0003501375943785051, "loss": 1.7909, "step": 40266 }, { "epoch": 1.34, "grad_norm": 0.5209914445877075, "learning_rate": 0.0003501272892336878, "loss": 1.9708, "step": 40267 }, { "epoch": 1.34, "grad_norm": 0.5005213022232056, "learning_rate": 0.0003501169840280235, "loss": 1.7942, "step": 40268 }, { "epoch": 1.34, "grad_norm": 0.48307010531425476, "learning_rate": 0.00035010667876152446, "loss": 1.7295, "step": 40269 }, { "epoch": 1.34, "grad_norm": 0.5026673078536987, "learning_rate": 0.00035009637343420347, "loss": 1.7871, "step": 40270 }, { "epoch": 1.34, "grad_norm": 0.49794700741767883, "learning_rate": 0.0003500860680460726, "loss": 1.8175, "step": 40271 }, { "epoch": 1.34, "grad_norm": 0.4999295175075531, "learning_rate": 0.00035007576259714487, "loss": 1.7622, "step": 40272 }, { "epoch": 1.34, "grad_norm": 0.5039845705032349, "learning_rate": 0.0003500654570874324, "loss": 1.7434, "step": 40273 }, { "epoch": 1.34, "grad_norm": 0.5103727579116821, "learning_rate": 0.00035005515151694785, "loss": 1.8138, "step": 40274 }, { "epoch": 1.34, "grad_norm": 0.48276379704475403, "learning_rate": 0.0003500448458857038, "loss": 1.7559, "step": 40275 }, { "epoch": 1.34, "grad_norm": 0.4783783555030823, "learning_rate": 0.00035003454019371256, "loss": 1.765, "step": 40276 }, { "epoch": 1.34, "grad_norm": 0.5048984885215759, "learning_rate": 0.0003500242344409869, "loss": 1.7027, "step": 40277 }, { "epoch": 1.34, "grad_norm": 0.49832841753959656, "learning_rate": 0.000350013928627539, "loss": 1.8036, "step": 40278 }, { "epoch": 1.34, "grad_norm": 0.6135749220848083, "learning_rate": 0.00035000362275338173, "loss": 1.798, "step": 40279 }, { "epoch": 1.34, "grad_norm": 0.4954094886779785, "learning_rate": 0.0003499933168185274, "loss": 1.8208, "step": 40280 }, { "epoch": 1.34, "grad_norm": 0.4998723268508911, "learning_rate": 0.0003499830108229885, "loss": 1.7219, "step": 40281 }, { "epoch": 1.34, "grad_norm": 0.49883347749710083, "learning_rate": 0.0003499727047667776, "loss": 1.7529, "step": 40282 }, { "epoch": 1.34, "grad_norm": 0.927158772945404, "learning_rate": 0.00034996239864990724, "loss": 1.7818, "step": 40283 }, { "epoch": 1.34, "grad_norm": 0.4983668327331543, "learning_rate": 0.00034995209247238993, "loss": 1.7459, "step": 40284 }, { "epoch": 1.34, "grad_norm": 0.48229268193244934, "learning_rate": 0.0003499417862342381, "loss": 1.7608, "step": 40285 }, { "epoch": 1.34, "grad_norm": 0.5040972828865051, "learning_rate": 0.00034993147993546425, "loss": 1.7372, "step": 40286 }, { "epoch": 1.34, "grad_norm": 0.47723284363746643, "learning_rate": 0.0003499211735760809, "loss": 1.6979, "step": 40287 }, { "epoch": 1.34, "grad_norm": 0.49457216262817383, "learning_rate": 0.0003499108671561007, "loss": 1.8036, "step": 40288 }, { "epoch": 1.34, "grad_norm": 0.48647743463516235, "learning_rate": 0.000349900560675536, "loss": 1.7733, "step": 40289 }, { "epoch": 1.34, "grad_norm": 0.5170348882675171, "learning_rate": 0.00034989025413439945, "loss": 1.854, "step": 40290 }, { "epoch": 1.34, "grad_norm": 0.5059666037559509, "learning_rate": 0.00034987994753270344, "loss": 1.8382, "step": 40291 }, { "epoch": 1.34, "grad_norm": 0.4712271988391876, "learning_rate": 0.0003498696408704605, "loss": 1.7649, "step": 40292 }, { "epoch": 1.34, "grad_norm": 0.4971754252910614, "learning_rate": 0.0003498593341476832, "loss": 1.8243, "step": 40293 }, { "epoch": 1.34, "grad_norm": 0.4815632700920105, "learning_rate": 0.000349849027364384, "loss": 1.6995, "step": 40294 }, { "epoch": 1.34, "grad_norm": 0.49160635471343994, "learning_rate": 0.0003498387205205755, "loss": 1.7129, "step": 40295 }, { "epoch": 1.34, "grad_norm": 0.62296462059021, "learning_rate": 0.00034982841361627, "loss": 1.7812, "step": 40296 }, { "epoch": 1.34, "grad_norm": 0.4912957549095154, "learning_rate": 0.0003498181066514802, "loss": 1.7797, "step": 40297 }, { "epoch": 1.34, "grad_norm": 0.49058762192726135, "learning_rate": 0.0003498077996262186, "loss": 1.7224, "step": 40298 }, { "epoch": 1.34, "grad_norm": 0.5044012069702148, "learning_rate": 0.0003497974925404976, "loss": 1.7643, "step": 40299 }, { "epoch": 1.34, "grad_norm": 0.4952147603034973, "learning_rate": 0.0003497871853943299, "loss": 1.8309, "step": 40300 }, { "epoch": 1.34, "grad_norm": 0.5026109218597412, "learning_rate": 0.00034977687818772766, "loss": 1.838, "step": 40301 }, { "epoch": 1.34, "grad_norm": 0.48203620314598083, "learning_rate": 0.00034976657092070383, "loss": 1.8259, "step": 40302 }, { "epoch": 1.34, "grad_norm": 0.5224032402038574, "learning_rate": 0.0003497562635932706, "loss": 1.7945, "step": 40303 }, { "epoch": 1.34, "grad_norm": 0.48388129472732544, "learning_rate": 0.0003497459562054407, "loss": 1.8231, "step": 40304 }, { "epoch": 1.34, "grad_norm": 0.48167717456817627, "learning_rate": 0.00034973564875722654, "loss": 1.7435, "step": 40305 }, { "epoch": 1.34, "grad_norm": 0.516170859336853, "learning_rate": 0.0003497253412486405, "loss": 1.8224, "step": 40306 }, { "epoch": 1.34, "grad_norm": 0.49814507365226746, "learning_rate": 0.00034971503367969543, "loss": 1.8076, "step": 40307 }, { "epoch": 1.34, "grad_norm": 0.4756006896495819, "learning_rate": 0.0003497047260504034, "loss": 1.8175, "step": 40308 }, { "epoch": 1.34, "grad_norm": 0.49084463715553284, "learning_rate": 0.00034969441836077734, "loss": 1.7966, "step": 40309 }, { "epoch": 1.34, "grad_norm": 0.5000073313713074, "learning_rate": 0.0003496841106108295, "loss": 1.7375, "step": 40310 }, { "epoch": 1.34, "grad_norm": 0.49640268087387085, "learning_rate": 0.0003496738028005725, "loss": 1.778, "step": 40311 }, { "epoch": 1.34, "grad_norm": 0.5090840458869934, "learning_rate": 0.00034966349493001874, "loss": 1.8231, "step": 40312 }, { "epoch": 1.34, "grad_norm": 1.8544570207595825, "learning_rate": 0.00034965318699918085, "loss": 1.8394, "step": 40313 }, { "epoch": 1.34, "grad_norm": 0.5012514591217041, "learning_rate": 0.00034964287900807146, "loss": 1.7585, "step": 40314 }, { "epoch": 1.34, "grad_norm": 0.490226149559021, "learning_rate": 0.00034963257095670276, "loss": 1.7641, "step": 40315 }, { "epoch": 1.34, "grad_norm": 0.4898875057697296, "learning_rate": 0.0003496222628450875, "loss": 1.7502, "step": 40316 }, { "epoch": 1.34, "grad_norm": 0.4808333218097687, "learning_rate": 0.00034961195467323803, "loss": 1.7497, "step": 40317 }, { "epoch": 1.34, "grad_norm": 0.5195238590240479, "learning_rate": 0.00034960164644116704, "loss": 1.82, "step": 40318 }, { "epoch": 1.34, "grad_norm": 0.5203297138214111, "learning_rate": 0.00034959133814888694, "loss": 1.8498, "step": 40319 }, { "epoch": 1.34, "grad_norm": 0.49240025877952576, "learning_rate": 0.0003495810297964103, "loss": 1.7796, "step": 40320 }, { "epoch": 1.34, "grad_norm": 0.5130475163459778, "learning_rate": 0.00034957072138374956, "loss": 1.8152, "step": 40321 }, { "epoch": 1.34, "grad_norm": 0.4877456724643707, "learning_rate": 0.0003495604129109172, "loss": 1.8369, "step": 40322 }, { "epoch": 1.34, "grad_norm": 0.5100469589233398, "learning_rate": 0.00034955010437792597, "loss": 1.7424, "step": 40323 }, { "epoch": 1.34, "grad_norm": 0.48756223917007446, "learning_rate": 0.00034953979578478806, "loss": 1.767, "step": 40324 }, { "epoch": 1.34, "grad_norm": 0.49528825283050537, "learning_rate": 0.00034952948713151623, "loss": 1.8086, "step": 40325 }, { "epoch": 1.34, "grad_norm": 0.4917730391025543, "learning_rate": 0.0003495191784181228, "loss": 1.7353, "step": 40326 }, { "epoch": 1.34, "grad_norm": 0.5199806094169617, "learning_rate": 0.0003495088696446205, "loss": 1.7909, "step": 40327 }, { "epoch": 1.34, "grad_norm": 0.5094093680381775, "learning_rate": 0.00034949856081102165, "loss": 1.8016, "step": 40328 }, { "epoch": 1.34, "grad_norm": 0.9337881207466125, "learning_rate": 0.0003494882519173389, "loss": 1.8222, "step": 40329 }, { "epoch": 1.34, "grad_norm": 0.4972154200077057, "learning_rate": 0.0003494779429635847, "loss": 1.7306, "step": 40330 }, { "epoch": 1.34, "grad_norm": 0.49991706013679504, "learning_rate": 0.00034946763394977143, "loss": 1.8513, "step": 40331 }, { "epoch": 1.34, "grad_norm": 0.4996972978115082, "learning_rate": 0.00034945732487591194, "loss": 1.7488, "step": 40332 }, { "epoch": 1.34, "grad_norm": 0.5238845944404602, "learning_rate": 0.0003494470157420184, "loss": 1.7765, "step": 40333 }, { "epoch": 1.34, "grad_norm": 0.5050780177116394, "learning_rate": 0.0003494367065481035, "loss": 1.7632, "step": 40334 }, { "epoch": 1.34, "grad_norm": 1.1833643913269043, "learning_rate": 0.00034942639729417977, "loss": 1.8101, "step": 40335 }, { "epoch": 1.34, "grad_norm": 0.49193209409713745, "learning_rate": 0.00034941608798025965, "loss": 1.7565, "step": 40336 }, { "epoch": 1.34, "grad_norm": 0.4986932575702667, "learning_rate": 0.00034940577860635567, "loss": 1.7711, "step": 40337 }, { "epoch": 1.34, "grad_norm": 0.4981546998023987, "learning_rate": 0.00034939546917248034, "loss": 1.8175, "step": 40338 }, { "epoch": 1.34, "grad_norm": 0.5026302337646484, "learning_rate": 0.00034938515967864624, "loss": 1.7644, "step": 40339 }, { "epoch": 1.34, "grad_norm": 0.4837019145488739, "learning_rate": 0.0003493748501248658, "loss": 1.8428, "step": 40340 }, { "epoch": 1.34, "grad_norm": 0.5153263807296753, "learning_rate": 0.0003493645405111516, "loss": 1.8286, "step": 40341 }, { "epoch": 1.34, "grad_norm": 0.5564974546432495, "learning_rate": 0.00034935423083751606, "loss": 1.8169, "step": 40342 }, { "epoch": 1.34, "grad_norm": 0.5080825090408325, "learning_rate": 0.0003493439211039718, "loss": 1.7731, "step": 40343 }, { "epoch": 1.34, "grad_norm": 0.5233741402626038, "learning_rate": 0.00034933361131053123, "loss": 1.7765, "step": 40344 }, { "epoch": 1.34, "grad_norm": 0.5088436603546143, "learning_rate": 0.0003493233014572071, "loss": 1.683, "step": 40345 }, { "epoch": 1.34, "grad_norm": 0.49475231766700745, "learning_rate": 0.00034931299154401156, "loss": 1.7967, "step": 40346 }, { "epoch": 1.34, "grad_norm": 0.5138410329818726, "learning_rate": 0.00034930268157095734, "loss": 1.8303, "step": 40347 }, { "epoch": 1.34, "grad_norm": 0.5204495787620544, "learning_rate": 0.000349292371538057, "loss": 1.7314, "step": 40348 }, { "epoch": 1.34, "grad_norm": 0.5097311735153198, "learning_rate": 0.0003492820614453229, "loss": 1.797, "step": 40349 }, { "epoch": 1.34, "grad_norm": 0.48721373081207275, "learning_rate": 0.00034927175129276775, "loss": 1.7711, "step": 40350 }, { "epoch": 1.34, "grad_norm": 0.510665774345398, "learning_rate": 0.0003492614410804038, "loss": 1.8028, "step": 40351 }, { "epoch": 1.34, "grad_norm": 0.5162487626075745, "learning_rate": 0.0003492511308082439, "loss": 1.6923, "step": 40352 }, { "epoch": 1.34, "grad_norm": 0.48605531454086304, "learning_rate": 0.0003492408204763003, "loss": 1.8139, "step": 40353 }, { "epoch": 1.34, "grad_norm": 0.5103665590286255, "learning_rate": 0.0003492305100845856, "loss": 1.8414, "step": 40354 }, { "epoch": 1.34, "grad_norm": 0.4784812927246094, "learning_rate": 0.00034922019963311225, "loss": 1.8133, "step": 40355 }, { "epoch": 1.34, "grad_norm": 0.524749755859375, "learning_rate": 0.00034920988912189293, "loss": 1.849, "step": 40356 }, { "epoch": 1.34, "grad_norm": 0.5160532593727112, "learning_rate": 0.00034919957855094003, "loss": 1.7364, "step": 40357 }, { "epoch": 1.34, "grad_norm": 0.5083151459693909, "learning_rate": 0.00034918926792026607, "loss": 1.7037, "step": 40358 }, { "epoch": 1.34, "grad_norm": 0.63029944896698, "learning_rate": 0.00034917895722988364, "loss": 1.76, "step": 40359 }, { "epoch": 1.34, "grad_norm": 0.47762101888656616, "learning_rate": 0.0003491686464798051, "loss": 1.783, "step": 40360 }, { "epoch": 1.34, "grad_norm": 0.4893766939640045, "learning_rate": 0.0003491583356700431, "loss": 1.6995, "step": 40361 }, { "epoch": 1.34, "grad_norm": 0.493367999792099, "learning_rate": 0.0003491480248006102, "loss": 1.792, "step": 40362 }, { "epoch": 1.34, "grad_norm": 0.49522849917411804, "learning_rate": 0.0003491377138715187, "loss": 1.7636, "step": 40363 }, { "epoch": 1.34, "grad_norm": 0.4741763770580292, "learning_rate": 0.0003491274028827814, "loss": 1.7726, "step": 40364 }, { "epoch": 1.34, "grad_norm": 0.5388326644897461, "learning_rate": 0.00034911709183441054, "loss": 1.7776, "step": 40365 }, { "epoch": 1.34, "grad_norm": 0.4810638427734375, "learning_rate": 0.0003491067807264189, "loss": 1.7737, "step": 40366 }, { "epoch": 1.34, "grad_norm": 0.49474114179611206, "learning_rate": 0.0003490964695588188, "loss": 1.7993, "step": 40367 }, { "epoch": 1.34, "grad_norm": 0.47427135705947876, "learning_rate": 0.0003490861583316228, "loss": 1.7376, "step": 40368 }, { "epoch": 1.34, "grad_norm": 0.5048269629478455, "learning_rate": 0.0003490758470448435, "loss": 1.8037, "step": 40369 }, { "epoch": 1.34, "grad_norm": 0.5099776387214661, "learning_rate": 0.0003490655356984933, "loss": 1.7586, "step": 40370 }, { "epoch": 1.34, "grad_norm": 0.4796532690525055, "learning_rate": 0.00034905522429258475, "loss": 1.754, "step": 40371 }, { "epoch": 1.34, "grad_norm": 0.513698160648346, "learning_rate": 0.00034904491282713047, "loss": 1.7995, "step": 40372 }, { "epoch": 1.34, "grad_norm": 0.49108487367630005, "learning_rate": 0.0003490346013021428, "loss": 1.7106, "step": 40373 }, { "epoch": 1.34, "grad_norm": 0.51319819688797, "learning_rate": 0.0003490242897176344, "loss": 1.7633, "step": 40374 }, { "epoch": 1.34, "grad_norm": 0.48951268196105957, "learning_rate": 0.00034901397807361766, "loss": 1.759, "step": 40375 }, { "epoch": 1.34, "grad_norm": 0.49448728561401367, "learning_rate": 0.00034900366637010533, "loss": 1.8049, "step": 40376 }, { "epoch": 1.34, "grad_norm": 0.4805509150028229, "learning_rate": 0.00034899335460710965, "loss": 1.7282, "step": 40377 }, { "epoch": 1.34, "grad_norm": 0.4873596727848053, "learning_rate": 0.00034898304278464326, "loss": 1.7462, "step": 40378 }, { "epoch": 1.34, "grad_norm": 0.4766843914985657, "learning_rate": 0.00034897273090271874, "loss": 1.6984, "step": 40379 }, { "epoch": 1.34, "grad_norm": 0.4933047294616699, "learning_rate": 0.00034896241896134844, "loss": 1.7711, "step": 40380 }, { "epoch": 1.34, "grad_norm": 0.5095974802970886, "learning_rate": 0.00034895210696054505, "loss": 1.8252, "step": 40381 }, { "epoch": 1.34, "grad_norm": 0.4909382462501526, "learning_rate": 0.0003489417949003211, "loss": 1.6363, "step": 40382 }, { "epoch": 1.34, "grad_norm": 0.5014432668685913, "learning_rate": 0.0003489314827806889, "loss": 1.8015, "step": 40383 }, { "epoch": 1.34, "grad_norm": 0.4749406576156616, "learning_rate": 0.00034892117060166103, "loss": 1.8214, "step": 40384 }, { "epoch": 1.34, "grad_norm": 0.47222593426704407, "learning_rate": 0.0003489108583632502, "loss": 1.7321, "step": 40385 }, { "epoch": 1.34, "grad_norm": 0.48622164130210876, "learning_rate": 0.0003489005460654687, "loss": 1.7107, "step": 40386 }, { "epoch": 1.34, "grad_norm": 0.5056439638137817, "learning_rate": 0.00034889023370832925, "loss": 1.7908, "step": 40387 }, { "epoch": 1.34, "grad_norm": 0.5029426217079163, "learning_rate": 0.0003488799212918441, "loss": 1.7715, "step": 40388 }, { "epoch": 1.34, "grad_norm": 0.49212852120399475, "learning_rate": 0.0003488696088160261, "loss": 1.7709, "step": 40389 }, { "epoch": 1.34, "grad_norm": 0.4723786413669586, "learning_rate": 0.00034885929628088755, "loss": 1.7501, "step": 40390 }, { "epoch": 1.34, "grad_norm": 0.4860524535179138, "learning_rate": 0.0003488489836864409, "loss": 1.812, "step": 40391 }, { "epoch": 1.34, "grad_norm": 0.5022309422492981, "learning_rate": 0.00034883867103269894, "loss": 1.7739, "step": 40392 }, { "epoch": 1.34, "grad_norm": 0.4937502443790436, "learning_rate": 0.00034882835831967393, "loss": 1.7946, "step": 40393 }, { "epoch": 1.34, "grad_norm": 0.48326006531715393, "learning_rate": 0.0003488180455473785, "loss": 1.7848, "step": 40394 }, { "epoch": 1.34, "grad_norm": 0.47279632091522217, "learning_rate": 0.0003488077327158251, "loss": 1.7257, "step": 40395 }, { "epoch": 1.34, "grad_norm": 0.47934243083000183, "learning_rate": 0.00034879741982502644, "loss": 1.7521, "step": 40396 }, { "epoch": 1.34, "grad_norm": 0.5029489994049072, "learning_rate": 0.00034878710687499484, "loss": 1.7068, "step": 40397 }, { "epoch": 1.34, "grad_norm": 0.4848395586013794, "learning_rate": 0.00034877679386574286, "loss": 1.7631, "step": 40398 }, { "epoch": 1.34, "grad_norm": 0.5490521788597107, "learning_rate": 0.00034876648079728304, "loss": 1.772, "step": 40399 }, { "epoch": 1.34, "grad_norm": 0.49518951773643494, "learning_rate": 0.0003487561676696279, "loss": 1.8047, "step": 40400 }, { "epoch": 1.34, "grad_norm": 0.5680888891220093, "learning_rate": 0.00034874585448279, "loss": 1.7253, "step": 40401 }, { "epoch": 1.34, "grad_norm": 0.469668984413147, "learning_rate": 0.00034873554123678177, "loss": 1.7079, "step": 40402 }, { "epoch": 1.34, "grad_norm": 0.48716112971305847, "learning_rate": 0.0003487252279316157, "loss": 1.7345, "step": 40403 }, { "epoch": 1.34, "grad_norm": 0.5002105832099915, "learning_rate": 0.0003487149145673045, "loss": 1.8013, "step": 40404 }, { "epoch": 1.34, "grad_norm": 0.4824533760547638, "learning_rate": 0.0003487046011438605, "loss": 1.7174, "step": 40405 }, { "epoch": 1.34, "grad_norm": 0.4902653694152832, "learning_rate": 0.00034869428766129635, "loss": 1.7271, "step": 40406 }, { "epoch": 1.34, "grad_norm": 0.5027084946632385, "learning_rate": 0.0003486839741196246, "loss": 1.7761, "step": 40407 }, { "epoch": 1.34, "grad_norm": 0.51276695728302, "learning_rate": 0.0003486736605188575, "loss": 1.7315, "step": 40408 }, { "epoch": 1.34, "grad_norm": 0.5141302347183228, "learning_rate": 0.0003486633468590078, "loss": 1.7492, "step": 40409 }, { "epoch": 1.34, "grad_norm": 0.5006996989250183, "learning_rate": 0.000348653033140088, "loss": 1.8602, "step": 40410 }, { "epoch": 1.34, "grad_norm": 0.5073723196983337, "learning_rate": 0.00034864271936211054, "loss": 1.8384, "step": 40411 }, { "epoch": 1.34, "grad_norm": 0.508023738861084, "learning_rate": 0.00034863240552508804, "loss": 1.7156, "step": 40412 }, { "epoch": 1.34, "grad_norm": 0.4933914244174957, "learning_rate": 0.0003486220916290329, "loss": 1.7628, "step": 40413 }, { "epoch": 1.34, "grad_norm": 0.48444560170173645, "learning_rate": 0.00034861177767395776, "loss": 1.8052, "step": 40414 }, { "epoch": 1.34, "grad_norm": 0.4823615849018097, "learning_rate": 0.0003486014636598751, "loss": 1.7658, "step": 40415 }, { "epoch": 1.34, "grad_norm": 0.49728628993034363, "learning_rate": 0.00034859114958679736, "loss": 1.7661, "step": 40416 }, { "epoch": 1.34, "grad_norm": 0.4781540632247925, "learning_rate": 0.0003485808354547372, "loss": 1.7517, "step": 40417 }, { "epoch": 1.34, "grad_norm": 0.49444466829299927, "learning_rate": 0.00034857052126370704, "loss": 1.8018, "step": 40418 }, { "epoch": 1.34, "grad_norm": 0.4833320081233978, "learning_rate": 0.0003485602070137194, "loss": 1.7938, "step": 40419 }, { "epoch": 1.34, "grad_norm": 0.4860074818134308, "learning_rate": 0.00034854989270478675, "loss": 1.7875, "step": 40420 }, { "epoch": 1.34, "grad_norm": 0.4900722801685333, "learning_rate": 0.00034853957833692184, "loss": 1.7248, "step": 40421 }, { "epoch": 1.34, "grad_norm": 0.4908737540245056, "learning_rate": 0.00034852926391013695, "loss": 1.793, "step": 40422 }, { "epoch": 1.34, "grad_norm": 0.48200809955596924, "learning_rate": 0.0003485189494244447, "loss": 1.7569, "step": 40423 }, { "epoch": 1.34, "grad_norm": 0.4913383722305298, "learning_rate": 0.00034850863487985765, "loss": 1.7285, "step": 40424 }, { "epoch": 1.34, "grad_norm": 0.482686847448349, "learning_rate": 0.0003484983202763881, "loss": 1.799, "step": 40425 }, { "epoch": 1.34, "grad_norm": 0.4799867570400238, "learning_rate": 0.0003484880056140488, "loss": 1.6623, "step": 40426 }, { "epoch": 1.35, "grad_norm": 0.494917094707489, "learning_rate": 0.0003484776908928523, "loss": 1.6547, "step": 40427 }, { "epoch": 1.35, "grad_norm": 0.5020718574523926, "learning_rate": 0.00034846737611281096, "loss": 1.7245, "step": 40428 }, { "epoch": 1.35, "grad_norm": 0.48800748586654663, "learning_rate": 0.00034845706127393735, "loss": 1.7181, "step": 40429 }, { "epoch": 1.35, "grad_norm": 0.495853066444397, "learning_rate": 0.000348446746376244, "loss": 1.7393, "step": 40430 }, { "epoch": 1.35, "grad_norm": 0.5270341634750366, "learning_rate": 0.00034843643141974355, "loss": 1.7959, "step": 40431 }, { "epoch": 1.35, "grad_norm": 0.5097363591194153, "learning_rate": 0.0003484261164044483, "loss": 1.7568, "step": 40432 }, { "epoch": 1.35, "grad_norm": 0.4917753040790558, "learning_rate": 0.0003484158013303709, "loss": 1.8165, "step": 40433 }, { "epoch": 1.35, "grad_norm": 0.5016729831695557, "learning_rate": 0.00034840548619752385, "loss": 1.7508, "step": 40434 }, { "epoch": 1.35, "grad_norm": 0.5319875478744507, "learning_rate": 0.0003483951710059197, "loss": 1.7719, "step": 40435 }, { "epoch": 1.35, "grad_norm": 0.5149485468864441, "learning_rate": 0.0003483848557555709, "loss": 1.7224, "step": 40436 }, { "epoch": 1.35, "grad_norm": 0.489601731300354, "learning_rate": 0.0003483745404464901, "loss": 1.7413, "step": 40437 }, { "epoch": 1.35, "grad_norm": 0.5154595375061035, "learning_rate": 0.0003483642250786896, "loss": 1.7299, "step": 40438 }, { "epoch": 1.35, "grad_norm": 0.5007061958312988, "learning_rate": 0.00034835390965218217, "loss": 1.7331, "step": 40439 }, { "epoch": 1.35, "grad_norm": 0.511133074760437, "learning_rate": 0.00034834359416698014, "loss": 1.7916, "step": 40440 }, { "epoch": 1.35, "grad_norm": 0.5071730613708496, "learning_rate": 0.0003483332786230961, "loss": 1.7672, "step": 40441 }, { "epoch": 1.35, "grad_norm": 0.48344799876213074, "learning_rate": 0.0003483229630205428, "loss": 1.8006, "step": 40442 }, { "epoch": 1.35, "grad_norm": 0.5030834674835205, "learning_rate": 0.00034831264735933225, "loss": 1.7687, "step": 40443 }, { "epoch": 1.35, "grad_norm": 0.5000942349433899, "learning_rate": 0.0003483023316394774, "loss": 1.7845, "step": 40444 }, { "epoch": 1.35, "grad_norm": 0.5039791464805603, "learning_rate": 0.0003482920158609906, "loss": 1.7526, "step": 40445 }, { "epoch": 1.35, "grad_norm": 0.51105797290802, "learning_rate": 0.00034828170002388443, "loss": 1.8126, "step": 40446 }, { "epoch": 1.35, "grad_norm": 0.49877622723579407, "learning_rate": 0.0003482713841281715, "loss": 1.8066, "step": 40447 }, { "epoch": 1.35, "grad_norm": 0.4997020661830902, "learning_rate": 0.000348261068173864, "loss": 1.7519, "step": 40448 }, { "epoch": 1.35, "grad_norm": 0.4743098318576813, "learning_rate": 0.00034825075216097487, "loss": 1.7961, "step": 40449 }, { "epoch": 1.35, "grad_norm": 0.4816863238811493, "learning_rate": 0.00034824043608951626, "loss": 1.7857, "step": 40450 }, { "epoch": 1.35, "grad_norm": 0.5543760657310486, "learning_rate": 0.00034823011995950104, "loss": 1.7503, "step": 40451 }, { "epoch": 1.35, "grad_norm": 0.5007608532905579, "learning_rate": 0.00034821980377094144, "loss": 1.7717, "step": 40452 }, { "epoch": 1.35, "grad_norm": 0.48868292570114136, "learning_rate": 0.00034820948752385016, "loss": 1.7193, "step": 40453 }, { "epoch": 1.35, "grad_norm": 0.48740607500076294, "learning_rate": 0.0003481991712182396, "loss": 1.7564, "step": 40454 }, { "epoch": 1.35, "grad_norm": 0.4935879111289978, "learning_rate": 0.0003481888548541225, "loss": 1.8067, "step": 40455 }, { "epoch": 1.35, "grad_norm": 0.4936167299747467, "learning_rate": 0.0003481785384315111, "loss": 1.7872, "step": 40456 }, { "epoch": 1.35, "grad_norm": 0.5017074346542358, "learning_rate": 0.0003481682219504181, "loss": 1.7339, "step": 40457 }, { "epoch": 1.35, "grad_norm": 0.5100542306900024, "learning_rate": 0.0003481579054108559, "loss": 1.7637, "step": 40458 }, { "epoch": 1.35, "grad_norm": 0.49438104033470154, "learning_rate": 0.0003481475888128372, "loss": 1.8159, "step": 40459 }, { "epoch": 1.35, "grad_norm": 0.509976327419281, "learning_rate": 0.00034813727215637436, "loss": 1.7893, "step": 40460 }, { "epoch": 1.35, "grad_norm": 0.5123804211616516, "learning_rate": 0.00034812695544148, "loss": 1.7442, "step": 40461 }, { "epoch": 1.35, "grad_norm": 0.5242558121681213, "learning_rate": 0.00034811663866816657, "loss": 1.7164, "step": 40462 }, { "epoch": 1.35, "grad_norm": 0.48622632026672363, "learning_rate": 0.00034810632183644664, "loss": 1.7952, "step": 40463 }, { "epoch": 1.35, "grad_norm": 0.4780398905277252, "learning_rate": 0.0003480960049463328, "loss": 1.7374, "step": 40464 }, { "epoch": 1.35, "grad_norm": 0.4888232946395874, "learning_rate": 0.0003480856879978374, "loss": 1.7839, "step": 40465 }, { "epoch": 1.35, "grad_norm": 0.48772335052490234, "learning_rate": 0.00034807537099097307, "loss": 1.7882, "step": 40466 }, { "epoch": 1.35, "grad_norm": 0.512380838394165, "learning_rate": 0.0003480650539257524, "loss": 1.8063, "step": 40467 }, { "epoch": 1.35, "grad_norm": 0.5065089464187622, "learning_rate": 0.0003480547368021877, "loss": 1.8229, "step": 40468 }, { "epoch": 1.35, "grad_norm": 0.4972948729991913, "learning_rate": 0.00034804441962029174, "loss": 1.7495, "step": 40469 }, { "epoch": 1.35, "grad_norm": 0.4965653121471405, "learning_rate": 0.0003480341023800769, "loss": 1.733, "step": 40470 }, { "epoch": 1.35, "grad_norm": 0.5038070678710938, "learning_rate": 0.0003480237850815558, "loss": 1.7652, "step": 40471 }, { "epoch": 1.35, "grad_norm": 0.48490041494369507, "learning_rate": 0.0003480134677247408, "loss": 1.7158, "step": 40472 }, { "epoch": 1.35, "grad_norm": 0.5264677405357361, "learning_rate": 0.00034800315030964455, "loss": 1.7868, "step": 40473 }, { "epoch": 1.35, "grad_norm": 0.47385838627815247, "learning_rate": 0.00034799283283627963, "loss": 1.769, "step": 40474 }, { "epoch": 1.35, "grad_norm": 0.4735817015171051, "learning_rate": 0.0003479825153046583, "loss": 1.7711, "step": 40475 }, { "epoch": 1.35, "grad_norm": 0.5026703476905823, "learning_rate": 0.0003479721977147935, "loss": 1.7656, "step": 40476 }, { "epoch": 1.35, "grad_norm": 0.5031024217605591, "learning_rate": 0.00034796188006669734, "loss": 1.7093, "step": 40477 }, { "epoch": 1.35, "grad_norm": 0.5074358582496643, "learning_rate": 0.00034795156236038264, "loss": 1.9202, "step": 40478 }, { "epoch": 1.35, "grad_norm": 0.6245086789131165, "learning_rate": 0.0003479412445958618, "loss": 1.8453, "step": 40479 }, { "epoch": 1.35, "grad_norm": 0.4809606075286865, "learning_rate": 0.00034793092677314725, "loss": 1.819, "step": 40480 }, { "epoch": 1.35, "grad_norm": 0.49454107880592346, "learning_rate": 0.0003479206088922517, "loss": 1.8066, "step": 40481 }, { "epoch": 1.35, "grad_norm": 0.5124680995941162, "learning_rate": 0.0003479102909531875, "loss": 1.7063, "step": 40482 }, { "epoch": 1.35, "grad_norm": 0.5153775811195374, "learning_rate": 0.0003478999729559674, "loss": 1.7176, "step": 40483 }, { "epoch": 1.35, "grad_norm": 0.5109792947769165, "learning_rate": 0.0003478896549006037, "loss": 1.7803, "step": 40484 }, { "epoch": 1.35, "grad_norm": 0.4901265501976013, "learning_rate": 0.00034787933678710905, "loss": 1.7505, "step": 40485 }, { "epoch": 1.35, "grad_norm": 0.4959958493709564, "learning_rate": 0.00034786901861549597, "loss": 1.7908, "step": 40486 }, { "epoch": 1.35, "grad_norm": 0.5002166032791138, "learning_rate": 0.00034785870038577687, "loss": 1.8131, "step": 40487 }, { "epoch": 1.35, "grad_norm": 0.48733678460121155, "learning_rate": 0.00034784838209796443, "loss": 1.7967, "step": 40488 }, { "epoch": 1.35, "grad_norm": 0.5092732310295105, "learning_rate": 0.00034783806375207106, "loss": 1.7649, "step": 40489 }, { "epoch": 1.35, "grad_norm": 0.5021623969078064, "learning_rate": 0.0003478277453481093, "loss": 1.7229, "step": 40490 }, { "epoch": 1.35, "grad_norm": 0.48002636432647705, "learning_rate": 0.00034781742688609184, "loss": 1.7929, "step": 40491 }, { "epoch": 1.35, "grad_norm": 0.4823508858680725, "learning_rate": 0.0003478071083660309, "loss": 1.7181, "step": 40492 }, { "epoch": 1.35, "grad_norm": 0.5004793405532837, "learning_rate": 0.0003477967897879393, "loss": 1.7341, "step": 40493 }, { "epoch": 1.35, "grad_norm": 0.48100417852401733, "learning_rate": 0.0003477864711518294, "loss": 1.7358, "step": 40494 }, { "epoch": 1.35, "grad_norm": 0.5165013670921326, "learning_rate": 0.00034777615245771377, "loss": 1.722, "step": 40495 }, { "epoch": 1.35, "grad_norm": 0.4864886999130249, "learning_rate": 0.00034776583370560487, "loss": 1.7765, "step": 40496 }, { "epoch": 1.35, "grad_norm": 0.5261656641960144, "learning_rate": 0.0003477555148955154, "loss": 1.7873, "step": 40497 }, { "epoch": 1.35, "grad_norm": 0.5078399181365967, "learning_rate": 0.00034774519602745767, "loss": 1.6984, "step": 40498 }, { "epoch": 1.35, "grad_norm": 0.49373579025268555, "learning_rate": 0.0003477348771014444, "loss": 1.7865, "step": 40499 }, { "epoch": 1.35, "grad_norm": 0.5190079212188721, "learning_rate": 0.000347724558117488, "loss": 1.8378, "step": 40500 }, { "epoch": 1.35, "grad_norm": 0.5272504091262817, "learning_rate": 0.0003477142390756009, "loss": 1.7892, "step": 40501 }, { "epoch": 1.35, "grad_norm": 0.49774378538131714, "learning_rate": 0.0003477039199757959, "loss": 1.7564, "step": 40502 }, { "epoch": 1.35, "grad_norm": 0.5069933533668518, "learning_rate": 0.0003476936008180852, "loss": 1.7852, "step": 40503 }, { "epoch": 1.35, "grad_norm": 0.5005585551261902, "learning_rate": 0.0003476832816024817, "loss": 1.7715, "step": 40504 }, { "epoch": 1.35, "grad_norm": 0.484002023935318, "learning_rate": 0.00034767296232899756, "loss": 1.7911, "step": 40505 }, { "epoch": 1.35, "grad_norm": 0.5129851698875427, "learning_rate": 0.00034766264299764557, "loss": 1.7802, "step": 40506 }, { "epoch": 1.35, "grad_norm": 0.49340203404426575, "learning_rate": 0.0003476523236084381, "loss": 1.7562, "step": 40507 }, { "epoch": 1.35, "grad_norm": 0.4891430139541626, "learning_rate": 0.0003476420041613877, "loss": 1.7783, "step": 40508 }, { "epoch": 1.35, "grad_norm": 0.46426641941070557, "learning_rate": 0.00034763168465650694, "loss": 1.7355, "step": 40509 }, { "epoch": 1.35, "grad_norm": 0.47562482953071594, "learning_rate": 0.0003476213650938084, "loss": 1.7593, "step": 40510 }, { "epoch": 1.35, "grad_norm": 0.5125175714492798, "learning_rate": 0.00034761104547330454, "loss": 1.928, "step": 40511 }, { "epoch": 1.35, "grad_norm": 0.4901219308376312, "learning_rate": 0.00034760072579500775, "loss": 1.7489, "step": 40512 }, { "epoch": 1.35, "grad_norm": 0.48518186807632446, "learning_rate": 0.0003475904060589309, "loss": 1.8495, "step": 40513 }, { "epoch": 1.35, "grad_norm": 0.4864629805088043, "learning_rate": 0.0003475800862650861, "loss": 1.745, "step": 40514 }, { "epoch": 1.35, "grad_norm": 0.4882284998893738, "learning_rate": 0.00034756976641348627, "loss": 1.8169, "step": 40515 }, { "epoch": 1.35, "grad_norm": 0.5017310380935669, "learning_rate": 0.0003475594465041436, "loss": 1.7755, "step": 40516 }, { "epoch": 1.35, "grad_norm": 0.484615683555603, "learning_rate": 0.0003475491265370708, "loss": 1.7835, "step": 40517 }, { "epoch": 1.35, "grad_norm": 0.49766993522644043, "learning_rate": 0.00034753880651228045, "loss": 1.7907, "step": 40518 }, { "epoch": 1.35, "grad_norm": 0.4969736337661743, "learning_rate": 0.00034752848642978496, "loss": 1.7931, "step": 40519 }, { "epoch": 1.35, "grad_norm": 0.5107985138893127, "learning_rate": 0.0003475181662895969, "loss": 1.8257, "step": 40520 }, { "epoch": 1.35, "grad_norm": 0.5129351615905762, "learning_rate": 0.00034750784609172873, "loss": 1.8043, "step": 40521 }, { "epoch": 1.35, "grad_norm": 0.4962517023086548, "learning_rate": 0.00034749752583619305, "loss": 1.7385, "step": 40522 }, { "epoch": 1.35, "grad_norm": 0.6274890303611755, "learning_rate": 0.0003474872055230024, "loss": 1.7591, "step": 40523 }, { "epoch": 1.35, "grad_norm": 0.49149924516677856, "learning_rate": 0.0003474768851521693, "loss": 1.6755, "step": 40524 }, { "epoch": 1.35, "grad_norm": 0.4828598201274872, "learning_rate": 0.0003474665647237062, "loss": 1.7449, "step": 40525 }, { "epoch": 1.35, "grad_norm": 0.49984604120254517, "learning_rate": 0.0003474562442376258, "loss": 1.8194, "step": 40526 }, { "epoch": 1.35, "grad_norm": 0.4899144768714905, "learning_rate": 0.0003474459236939404, "loss": 1.8445, "step": 40527 }, { "epoch": 1.35, "grad_norm": 0.5194799900054932, "learning_rate": 0.00034743560309266256, "loss": 1.7982, "step": 40528 }, { "epoch": 1.35, "grad_norm": 0.5078507661819458, "learning_rate": 0.00034742528243380513, "loss": 1.7861, "step": 40529 }, { "epoch": 1.35, "grad_norm": 0.49641454219818115, "learning_rate": 0.00034741496171738015, "loss": 1.7537, "step": 40530 }, { "epoch": 1.35, "grad_norm": 0.49670419096946716, "learning_rate": 0.0003474046409434005, "loss": 1.8244, "step": 40531 }, { "epoch": 1.35, "grad_norm": 0.4956522583961487, "learning_rate": 0.00034739432011187863, "loss": 1.7374, "step": 40532 }, { "epoch": 1.35, "grad_norm": 0.508427083492279, "learning_rate": 0.000347383999222827, "loss": 1.7296, "step": 40533 }, { "epoch": 1.35, "grad_norm": 0.48296278715133667, "learning_rate": 0.00034737367827625823, "loss": 1.7271, "step": 40534 }, { "epoch": 1.35, "grad_norm": 0.48544618487358093, "learning_rate": 0.00034736335727218466, "loss": 1.7018, "step": 40535 }, { "epoch": 1.35, "grad_norm": 0.5150669813156128, "learning_rate": 0.00034735303621061914, "loss": 1.8535, "step": 40536 }, { "epoch": 1.35, "grad_norm": 0.48094913363456726, "learning_rate": 0.0003473427150915738, "loss": 1.7611, "step": 40537 }, { "epoch": 1.35, "grad_norm": 0.4845554232597351, "learning_rate": 0.00034733239391506154, "loss": 1.7721, "step": 40538 }, { "epoch": 1.35, "grad_norm": 0.5149475336074829, "learning_rate": 0.0003473220726810947, "loss": 1.8233, "step": 40539 }, { "epoch": 1.35, "grad_norm": 0.5040658712387085, "learning_rate": 0.0003473117513896858, "loss": 1.7285, "step": 40540 }, { "epoch": 1.35, "grad_norm": 0.5173549652099609, "learning_rate": 0.0003473014300408475, "loss": 1.7033, "step": 40541 }, { "epoch": 1.35, "grad_norm": 0.5035734176635742, "learning_rate": 0.000347291108634592, "loss": 1.7244, "step": 40542 }, { "epoch": 1.35, "grad_norm": 0.4978087246417999, "learning_rate": 0.00034728078717093233, "loss": 1.769, "step": 40543 }, { "epoch": 1.35, "grad_norm": 0.478108286857605, "learning_rate": 0.00034727046564988063, "loss": 1.7401, "step": 40544 }, { "epoch": 1.35, "grad_norm": 0.47563493251800537, "learning_rate": 0.00034726014407144955, "loss": 1.7179, "step": 40545 }, { "epoch": 1.35, "grad_norm": 0.4844539761543274, "learning_rate": 0.0003472498224356516, "loss": 1.7854, "step": 40546 }, { "epoch": 1.35, "grad_norm": 0.49013209342956543, "learning_rate": 0.00034723950074249933, "loss": 1.8428, "step": 40547 }, { "epoch": 1.35, "grad_norm": 0.5152318477630615, "learning_rate": 0.00034722917899200536, "loss": 1.8212, "step": 40548 }, { "epoch": 1.35, "grad_norm": 0.48919859528541565, "learning_rate": 0.00034721885718418203, "loss": 1.7779, "step": 40549 }, { "epoch": 1.35, "grad_norm": 0.4865686297416687, "learning_rate": 0.00034720853531904197, "loss": 1.8057, "step": 40550 }, { "epoch": 1.35, "grad_norm": 0.4926414489746094, "learning_rate": 0.00034719821339659776, "loss": 1.8103, "step": 40551 }, { "epoch": 1.35, "grad_norm": 0.4984860420227051, "learning_rate": 0.0003471878914168618, "loss": 1.7581, "step": 40552 }, { "epoch": 1.35, "grad_norm": 0.5109918713569641, "learning_rate": 0.0003471775693798467, "loss": 1.7757, "step": 40553 }, { "epoch": 1.35, "grad_norm": 0.49944910407066345, "learning_rate": 0.00034716724728556513, "loss": 1.7586, "step": 40554 }, { "epoch": 1.35, "grad_norm": 0.5228145122528076, "learning_rate": 0.00034715692513402933, "loss": 1.6882, "step": 40555 }, { "epoch": 1.35, "grad_norm": 0.4880029559135437, "learning_rate": 0.00034714660292525193, "loss": 1.8055, "step": 40556 }, { "epoch": 1.35, "grad_norm": 0.4983098804950714, "learning_rate": 0.0003471362806592456, "loss": 1.7362, "step": 40557 }, { "epoch": 1.35, "grad_norm": 0.5097794532775879, "learning_rate": 0.0003471259583360227, "loss": 1.8012, "step": 40558 }, { "epoch": 1.35, "grad_norm": 0.5011429786682129, "learning_rate": 0.0003471156359555959, "loss": 1.7656, "step": 40559 }, { "epoch": 1.35, "grad_norm": 0.49689340591430664, "learning_rate": 0.0003471053135179776, "loss": 1.8128, "step": 40560 }, { "epoch": 1.35, "grad_norm": 0.49568721652030945, "learning_rate": 0.00034709499102318045, "loss": 1.8459, "step": 40561 }, { "epoch": 1.35, "grad_norm": 0.4847022593021393, "learning_rate": 0.00034708466847121686, "loss": 1.7435, "step": 40562 }, { "epoch": 1.35, "grad_norm": 0.48422005772590637, "learning_rate": 0.00034707434586209944, "loss": 1.7929, "step": 40563 }, { "epoch": 1.35, "grad_norm": 0.4916488230228424, "learning_rate": 0.0003470640231958407, "loss": 1.85, "step": 40564 }, { "epoch": 1.35, "grad_norm": 0.49240642786026, "learning_rate": 0.00034705370047245315, "loss": 1.7946, "step": 40565 }, { "epoch": 1.35, "grad_norm": 0.5190185904502869, "learning_rate": 0.00034704337769194944, "loss": 1.8116, "step": 40566 }, { "epoch": 1.35, "grad_norm": 0.48735278844833374, "learning_rate": 0.0003470330548543419, "loss": 1.7889, "step": 40567 }, { "epoch": 1.35, "grad_norm": 0.4964752793312073, "learning_rate": 0.00034702273195964323, "loss": 1.7088, "step": 40568 }, { "epoch": 1.35, "grad_norm": 0.4903014600276947, "learning_rate": 0.00034701240900786583, "loss": 1.7409, "step": 40569 }, { "epoch": 1.35, "grad_norm": 0.4806925058364868, "learning_rate": 0.0003470020859990223, "loss": 1.8104, "step": 40570 }, { "epoch": 1.35, "grad_norm": 0.4916372001171112, "learning_rate": 0.00034699176293312515, "loss": 1.8384, "step": 40571 }, { "epoch": 1.35, "grad_norm": 1.2164264917373657, "learning_rate": 0.0003469814398101869, "loss": 1.839, "step": 40572 }, { "epoch": 1.35, "grad_norm": 0.4897213280200958, "learning_rate": 0.0003469711166302203, "loss": 1.7815, "step": 40573 }, { "epoch": 1.35, "grad_norm": 0.4811577796936035, "learning_rate": 0.0003469607933932375, "loss": 1.7249, "step": 40574 }, { "epoch": 1.35, "grad_norm": 0.4953341484069824, "learning_rate": 0.00034695047009925125, "loss": 1.7551, "step": 40575 }, { "epoch": 1.35, "grad_norm": 0.48692232370376587, "learning_rate": 0.0003469401467482741, "loss": 1.7907, "step": 40576 }, { "epoch": 1.35, "grad_norm": 0.48821520805358887, "learning_rate": 0.0003469298233403184, "loss": 1.8623, "step": 40577 }, { "epoch": 1.35, "grad_norm": 0.5111247301101685, "learning_rate": 0.0003469194998753969, "loss": 1.7221, "step": 40578 }, { "epoch": 1.35, "grad_norm": 0.468107134103775, "learning_rate": 0.000346909176353522, "loss": 1.7721, "step": 40579 }, { "epoch": 1.35, "grad_norm": 0.4852878153324127, "learning_rate": 0.00034689885277470637, "loss": 1.7788, "step": 40580 }, { "epoch": 1.35, "grad_norm": 0.4933781623840332, "learning_rate": 0.00034688852913896236, "loss": 1.665, "step": 40581 }, { "epoch": 1.35, "grad_norm": 0.4806050956249237, "learning_rate": 0.0003468782054463026, "loss": 1.7991, "step": 40582 }, { "epoch": 1.35, "grad_norm": 0.48472824692726135, "learning_rate": 0.0003468678816967396, "loss": 1.7244, "step": 40583 }, { "epoch": 1.35, "grad_norm": 0.5206685066223145, "learning_rate": 0.00034685755789028595, "loss": 1.729, "step": 40584 }, { "epoch": 1.35, "grad_norm": 0.49722129106521606, "learning_rate": 0.000346847234026954, "loss": 1.6893, "step": 40585 }, { "epoch": 1.35, "grad_norm": 0.4902270436286926, "learning_rate": 0.00034683691010675656, "loss": 1.7874, "step": 40586 }, { "epoch": 1.35, "grad_norm": 0.5054392218589783, "learning_rate": 0.00034682658612970597, "loss": 1.8799, "step": 40587 }, { "epoch": 1.35, "grad_norm": 1.1023563146591187, "learning_rate": 0.0003468162620958147, "loss": 1.8675, "step": 40588 }, { "epoch": 1.35, "grad_norm": 0.49880173802375793, "learning_rate": 0.0003468059380050956, "loss": 1.8065, "step": 40589 }, { "epoch": 1.35, "grad_norm": 0.4977695941925049, "learning_rate": 0.00034679561385756076, "loss": 1.8217, "step": 40590 }, { "epoch": 1.35, "grad_norm": 0.4914073050022125, "learning_rate": 0.00034678528965322307, "loss": 1.6915, "step": 40591 }, { "epoch": 1.35, "grad_norm": 0.4820520281791687, "learning_rate": 0.00034677496539209477, "loss": 1.6809, "step": 40592 }, { "epoch": 1.35, "grad_norm": 0.506036102771759, "learning_rate": 0.0003467646410741888, "loss": 1.7609, "step": 40593 }, { "epoch": 1.35, "grad_norm": 0.4822937846183777, "learning_rate": 0.00034675431669951726, "loss": 1.7822, "step": 40594 }, { "epoch": 1.35, "grad_norm": 0.4990537166595459, "learning_rate": 0.00034674399226809294, "loss": 1.7633, "step": 40595 }, { "epoch": 1.35, "grad_norm": 0.4939209818840027, "learning_rate": 0.0003467336677799283, "loss": 1.7705, "step": 40596 }, { "epoch": 1.35, "grad_norm": 0.4942984879016876, "learning_rate": 0.00034672334323503583, "loss": 1.747, "step": 40597 }, { "epoch": 1.35, "grad_norm": 0.48393362760543823, "learning_rate": 0.00034671301863342825, "loss": 1.7494, "step": 40598 }, { "epoch": 1.35, "grad_norm": 0.488958477973938, "learning_rate": 0.0003467026939751177, "loss": 1.7745, "step": 40599 }, { "epoch": 1.35, "grad_norm": 0.48383715748786926, "learning_rate": 0.0003466923692601172, "loss": 1.8018, "step": 40600 }, { "epoch": 1.35, "grad_norm": 0.48025402426719666, "learning_rate": 0.0003466820444884389, "loss": 1.7727, "step": 40601 }, { "epoch": 1.35, "grad_norm": 0.49787452816963196, "learning_rate": 0.00034667171966009557, "loss": 1.8331, "step": 40602 }, { "epoch": 1.35, "grad_norm": 0.4944923520088196, "learning_rate": 0.00034666139477509966, "loss": 1.8415, "step": 40603 }, { "epoch": 1.35, "grad_norm": 0.5022997260093689, "learning_rate": 0.0003466510698334635, "loss": 1.7901, "step": 40604 }, { "epoch": 1.35, "grad_norm": 0.5110067129135132, "learning_rate": 0.00034664074483520003, "loss": 1.6959, "step": 40605 }, { "epoch": 1.35, "grad_norm": 0.5200314521789551, "learning_rate": 0.00034663041978032146, "loss": 1.833, "step": 40606 }, { "epoch": 1.35, "grad_norm": 0.5002102851867676, "learning_rate": 0.0003466200946688405, "loss": 1.8199, "step": 40607 }, { "epoch": 1.35, "grad_norm": 0.4779617488384247, "learning_rate": 0.00034660976950076953, "loss": 1.7132, "step": 40608 }, { "epoch": 1.35, "grad_norm": 0.4681051969528198, "learning_rate": 0.00034659944427612116, "loss": 1.7638, "step": 40609 }, { "epoch": 1.35, "grad_norm": 0.778948187828064, "learning_rate": 0.000346589118994908, "loss": 1.7361, "step": 40610 }, { "epoch": 1.35, "grad_norm": 0.49470874667167664, "learning_rate": 0.00034657879365714256, "loss": 1.7887, "step": 40611 }, { "epoch": 1.35, "grad_norm": 0.4908770024776459, "learning_rate": 0.00034656846826283726, "loss": 1.812, "step": 40612 }, { "epoch": 1.35, "grad_norm": 0.5087383389472961, "learning_rate": 0.00034655814281200464, "loss": 1.7453, "step": 40613 }, { "epoch": 1.35, "grad_norm": 0.4808521568775177, "learning_rate": 0.0003465478173046574, "loss": 1.7477, "step": 40614 }, { "epoch": 1.35, "grad_norm": 0.5060625076293945, "learning_rate": 0.0003465374917408079, "loss": 1.7267, "step": 40615 }, { "epoch": 1.35, "grad_norm": 0.4977229833602905, "learning_rate": 0.0003465271661204688, "loss": 1.8102, "step": 40616 }, { "epoch": 1.35, "grad_norm": 0.4979274272918701, "learning_rate": 0.00034651684044365255, "loss": 1.8335, "step": 40617 }, { "epoch": 1.35, "grad_norm": 0.501762866973877, "learning_rate": 0.00034650651471037166, "loss": 1.7949, "step": 40618 }, { "epoch": 1.35, "grad_norm": 0.5137242078781128, "learning_rate": 0.00034649618892063875, "loss": 1.789, "step": 40619 }, { "epoch": 1.35, "grad_norm": 0.493642121553421, "learning_rate": 0.00034648586307446633, "loss": 1.8017, "step": 40620 }, { "epoch": 1.35, "grad_norm": 0.5108997225761414, "learning_rate": 0.000346475537171867, "loss": 1.7563, "step": 40621 }, { "epoch": 1.35, "grad_norm": 0.5030069351196289, "learning_rate": 0.00034646521121285305, "loss": 1.8237, "step": 40622 }, { "epoch": 1.35, "grad_norm": 0.4699178636074066, "learning_rate": 0.00034645488519743727, "loss": 1.7494, "step": 40623 }, { "epoch": 1.35, "grad_norm": 0.4961260259151459, "learning_rate": 0.00034644455912563214, "loss": 1.8877, "step": 40624 }, { "epoch": 1.35, "grad_norm": 0.5049861669540405, "learning_rate": 0.00034643423299745007, "loss": 1.8129, "step": 40625 }, { "epoch": 1.35, "grad_norm": 0.5012514591217041, "learning_rate": 0.0003464239068129037, "loss": 1.7769, "step": 40626 }, { "epoch": 1.35, "grad_norm": 0.5000872611999512, "learning_rate": 0.00034641358057200557, "loss": 1.778, "step": 40627 }, { "epoch": 1.35, "grad_norm": 0.4804413914680481, "learning_rate": 0.00034640325427476824, "loss": 1.7254, "step": 40628 }, { "epoch": 1.35, "grad_norm": 0.4935099184513092, "learning_rate": 0.00034639292792120407, "loss": 1.7799, "step": 40629 }, { "epoch": 1.35, "grad_norm": 0.5056390166282654, "learning_rate": 0.0003463826015113259, "loss": 1.7124, "step": 40630 }, { "epoch": 1.35, "grad_norm": 0.5292559266090393, "learning_rate": 0.000346372275045146, "loss": 1.741, "step": 40631 }, { "epoch": 1.35, "grad_norm": 0.4874893128871918, "learning_rate": 0.00034636194852267696, "loss": 1.776, "step": 40632 }, { "epoch": 1.35, "grad_norm": 0.4907718896865845, "learning_rate": 0.0003463516219439313, "loss": 1.7115, "step": 40633 }, { "epoch": 1.35, "grad_norm": 0.49046486616134644, "learning_rate": 0.00034634129530892164, "loss": 1.7358, "step": 40634 }, { "epoch": 1.35, "grad_norm": 0.5011521577835083, "learning_rate": 0.00034633096861766063, "loss": 1.7237, "step": 40635 }, { "epoch": 1.35, "grad_norm": 0.49318727850914, "learning_rate": 0.0003463206418701605, "loss": 1.7865, "step": 40636 }, { "epoch": 1.35, "grad_norm": 0.4946257472038269, "learning_rate": 0.00034631031506643394, "loss": 1.8294, "step": 40637 }, { "epoch": 1.35, "grad_norm": 0.505815327167511, "learning_rate": 0.00034629998820649347, "loss": 1.7532, "step": 40638 }, { "epoch": 1.35, "grad_norm": 0.5010337233543396, "learning_rate": 0.0003462896612903517, "loss": 1.9265, "step": 40639 }, { "epoch": 1.35, "grad_norm": 0.5161345601081848, "learning_rate": 0.000346279334318021, "loss": 1.7903, "step": 40640 }, { "epoch": 1.35, "grad_norm": 0.5084194540977478, "learning_rate": 0.0003462690072895141, "loss": 1.7924, "step": 40641 }, { "epoch": 1.35, "grad_norm": 0.5404300093650818, "learning_rate": 0.00034625868020484346, "loss": 1.7284, "step": 40642 }, { "epoch": 1.35, "grad_norm": 0.48785269260406494, "learning_rate": 0.00034624835306402155, "loss": 1.7775, "step": 40643 }, { "epoch": 1.35, "grad_norm": 0.6032871007919312, "learning_rate": 0.00034623802586706095, "loss": 1.7779, "step": 40644 }, { "epoch": 1.35, "grad_norm": 0.5143997073173523, "learning_rate": 0.0003462276986139742, "loss": 1.8439, "step": 40645 }, { "epoch": 1.35, "grad_norm": 0.5037004947662354, "learning_rate": 0.00034621737130477394, "loss": 1.7012, "step": 40646 }, { "epoch": 1.35, "grad_norm": 0.5118467807769775, "learning_rate": 0.0003462070439394724, "loss": 1.7718, "step": 40647 }, { "epoch": 1.35, "grad_norm": 0.48721420764923096, "learning_rate": 0.00034619671651808245, "loss": 1.7083, "step": 40648 }, { "epoch": 1.35, "grad_norm": 0.48435550928115845, "learning_rate": 0.00034618638904061646, "loss": 1.7632, "step": 40649 }, { "epoch": 1.35, "grad_norm": 0.4909106194972992, "learning_rate": 0.00034617606150708704, "loss": 1.7709, "step": 40650 }, { "epoch": 1.35, "grad_norm": 0.5008527040481567, "learning_rate": 0.00034616573391750663, "loss": 1.7293, "step": 40651 }, { "epoch": 1.35, "grad_norm": 0.4873281419277191, "learning_rate": 0.0003461554062718878, "loss": 1.787, "step": 40652 }, { "epoch": 1.35, "grad_norm": 0.506076991558075, "learning_rate": 0.00034614507857024315, "loss": 1.7635, "step": 40653 }, { "epoch": 1.35, "grad_norm": 0.47868505120277405, "learning_rate": 0.0003461347508125851, "loss": 1.7165, "step": 40654 }, { "epoch": 1.35, "grad_norm": 0.502985954284668, "learning_rate": 0.0003461244229989264, "loss": 1.8421, "step": 40655 }, { "epoch": 1.35, "grad_norm": 0.49210435152053833, "learning_rate": 0.00034611409512927936, "loss": 1.7925, "step": 40656 }, { "epoch": 1.35, "grad_norm": 0.5065690279006958, "learning_rate": 0.0003461037672036566, "loss": 1.8134, "step": 40657 }, { "epoch": 1.35, "grad_norm": 0.5023626089096069, "learning_rate": 0.0003460934392220708, "loss": 1.6979, "step": 40658 }, { "epoch": 1.35, "grad_norm": 0.4824807345867157, "learning_rate": 0.00034608311118453414, "loss": 1.8102, "step": 40659 }, { "epoch": 1.35, "grad_norm": 0.48175859451293945, "learning_rate": 0.0003460727830910595, "loss": 1.7412, "step": 40660 }, { "epoch": 1.35, "grad_norm": 0.5002485513687134, "learning_rate": 0.0003460624549416593, "loss": 1.7595, "step": 40661 }, { "epoch": 1.35, "grad_norm": 0.4762458801269531, "learning_rate": 0.000346052126736346, "loss": 1.7112, "step": 40662 }, { "epoch": 1.35, "grad_norm": 0.5017309188842773, "learning_rate": 0.0003460417984751322, "loss": 1.8344, "step": 40663 }, { "epoch": 1.35, "grad_norm": 0.5046371817588806, "learning_rate": 0.00034603147015803046, "loss": 1.7609, "step": 40664 }, { "epoch": 1.35, "grad_norm": 0.4885946810245514, "learning_rate": 0.00034602114178505333, "loss": 1.7807, "step": 40665 }, { "epoch": 1.35, "grad_norm": 0.5088748931884766, "learning_rate": 0.00034601081335621333, "loss": 1.7132, "step": 40666 }, { "epoch": 1.35, "grad_norm": 0.4961005747318268, "learning_rate": 0.0003460004848715229, "loss": 1.7846, "step": 40667 }, { "epoch": 1.35, "grad_norm": 0.5002983808517456, "learning_rate": 0.0003459901563309947, "loss": 1.7448, "step": 40668 }, { "epoch": 1.35, "grad_norm": 0.48466816544532776, "learning_rate": 0.00034597982773464125, "loss": 1.7493, "step": 40669 }, { "epoch": 1.35, "grad_norm": 0.49204933643341064, "learning_rate": 0.00034596949908247506, "loss": 1.7508, "step": 40670 }, { "epoch": 1.35, "grad_norm": 0.5026774406433105, "learning_rate": 0.0003459591703745087, "loss": 1.7218, "step": 40671 }, { "epoch": 1.35, "grad_norm": 0.5017052888870239, "learning_rate": 0.00034594884161075456, "loss": 1.7865, "step": 40672 }, { "epoch": 1.35, "grad_norm": 0.507439911365509, "learning_rate": 0.0003459385127912254, "loss": 1.7114, "step": 40673 }, { "epoch": 1.35, "grad_norm": 0.4945151209831238, "learning_rate": 0.0003459281839159336, "loss": 1.7596, "step": 40674 }, { "epoch": 1.35, "grad_norm": 0.5003255009651184, "learning_rate": 0.00034591785498489175, "loss": 1.8251, "step": 40675 }, { "epoch": 1.35, "grad_norm": 0.498712956905365, "learning_rate": 0.0003459075259981125, "loss": 1.7159, "step": 40676 }, { "epoch": 1.35, "grad_norm": 0.5242826342582703, "learning_rate": 0.00034589719695560814, "loss": 1.8122, "step": 40677 }, { "epoch": 1.35, "grad_norm": 0.5026523470878601, "learning_rate": 0.0003458868678573914, "loss": 1.8163, "step": 40678 }, { "epoch": 1.35, "grad_norm": 0.4866466224193573, "learning_rate": 0.0003458765387034748, "loss": 1.8638, "step": 40679 }, { "epoch": 1.35, "grad_norm": 0.472365140914917, "learning_rate": 0.00034586620949387083, "loss": 1.7685, "step": 40680 }, { "epoch": 1.35, "grad_norm": 0.48342397809028625, "learning_rate": 0.00034585588022859195, "loss": 1.756, "step": 40681 }, { "epoch": 1.35, "grad_norm": 0.49970200657844543, "learning_rate": 0.00034584555090765083, "loss": 1.7819, "step": 40682 }, { "epoch": 1.35, "grad_norm": 0.5132567286491394, "learning_rate": 0.00034583522153106006, "loss": 1.6725, "step": 40683 }, { "epoch": 1.35, "grad_norm": 0.488436758518219, "learning_rate": 0.00034582489209883193, "loss": 1.7185, "step": 40684 }, { "epoch": 1.35, "grad_norm": 0.49261170625686646, "learning_rate": 0.0003458145626109793, "loss": 1.8769, "step": 40685 }, { "epoch": 1.35, "grad_norm": 0.49216631054878235, "learning_rate": 0.0003458042330675144, "loss": 1.7521, "step": 40686 }, { "epoch": 1.35, "grad_norm": 0.5069946050643921, "learning_rate": 0.00034579390346845, "loss": 1.7717, "step": 40687 }, { "epoch": 1.35, "grad_norm": 0.5073288083076477, "learning_rate": 0.0003457835738137985, "loss": 1.783, "step": 40688 }, { "epoch": 1.35, "grad_norm": 0.48865807056427, "learning_rate": 0.0003457732441035725, "loss": 1.7071, "step": 40689 }, { "epoch": 1.35, "grad_norm": 0.505631148815155, "learning_rate": 0.0003457629143377846, "loss": 1.8027, "step": 40690 }, { "epoch": 1.35, "grad_norm": 0.5062941908836365, "learning_rate": 0.0003457525845164472, "loss": 1.818, "step": 40691 }, { "epoch": 1.35, "grad_norm": 0.508732259273529, "learning_rate": 0.00034574225463957284, "loss": 1.8145, "step": 40692 }, { "epoch": 1.35, "grad_norm": 0.4914693236351013, "learning_rate": 0.00034573192470717417, "loss": 1.8036, "step": 40693 }, { "epoch": 1.35, "grad_norm": 0.48453497886657715, "learning_rate": 0.0003457215947192637, "loss": 1.8465, "step": 40694 }, { "epoch": 1.35, "grad_norm": 0.5259585976600647, "learning_rate": 0.0003457112646758539, "loss": 1.7875, "step": 40695 }, { "epoch": 1.35, "grad_norm": 0.49878790974617004, "learning_rate": 0.0003457009345769575, "loss": 1.8228, "step": 40696 }, { "epoch": 1.35, "grad_norm": 0.516869306564331, "learning_rate": 0.0003456906044225868, "loss": 1.789, "step": 40697 }, { "epoch": 1.35, "grad_norm": 0.4877183139324188, "learning_rate": 0.00034568027421275435, "loss": 1.7413, "step": 40698 }, { "epoch": 1.35, "grad_norm": 0.5089536905288696, "learning_rate": 0.0003456699439474729, "loss": 1.8241, "step": 40699 }, { "epoch": 1.35, "grad_norm": 0.48892495036125183, "learning_rate": 0.0003456596136267549, "loss": 1.7625, "step": 40700 }, { "epoch": 1.35, "grad_norm": 0.49938657879829407, "learning_rate": 0.00034564928325061275, "loss": 1.7413, "step": 40701 }, { "epoch": 1.35, "grad_norm": 0.5014444589614868, "learning_rate": 0.0003456389528190591, "loss": 1.8168, "step": 40702 }, { "epoch": 1.35, "grad_norm": 0.4850890636444092, "learning_rate": 0.0003456286223321066, "loss": 1.703, "step": 40703 }, { "epoch": 1.35, "grad_norm": 0.5062333941459656, "learning_rate": 0.00034561829178976754, "loss": 1.7411, "step": 40704 }, { "epoch": 1.35, "grad_norm": 0.49188435077667236, "learning_rate": 0.00034560796119205466, "loss": 1.8098, "step": 40705 }, { "epoch": 1.35, "grad_norm": 0.48554736375808716, "learning_rate": 0.00034559763053898046, "loss": 1.7442, "step": 40706 }, { "epoch": 1.35, "grad_norm": 0.503360390663147, "learning_rate": 0.00034558729983055736, "loss": 1.7627, "step": 40707 }, { "epoch": 1.35, "grad_norm": 0.48292574286460876, "learning_rate": 0.0003455769690667981, "loss": 1.7481, "step": 40708 }, { "epoch": 1.35, "grad_norm": 0.49326393008232117, "learning_rate": 0.00034556663824771505, "loss": 1.8053, "step": 40709 }, { "epoch": 1.35, "grad_norm": 0.4889043867588043, "learning_rate": 0.0003455563073733209, "loss": 1.7622, "step": 40710 }, { "epoch": 1.35, "grad_norm": 0.4842710793018341, "learning_rate": 0.000345545976443628, "loss": 1.7563, "step": 40711 }, { "epoch": 1.35, "grad_norm": 0.5116637349128723, "learning_rate": 0.00034553564545864904, "loss": 1.8054, "step": 40712 }, { "epoch": 1.35, "grad_norm": 0.5457656383514404, "learning_rate": 0.00034552531441839655, "loss": 1.8456, "step": 40713 }, { "epoch": 1.35, "grad_norm": 0.47744613885879517, "learning_rate": 0.00034551498332288297, "loss": 1.7246, "step": 40714 }, { "epoch": 1.35, "grad_norm": 0.48767903447151184, "learning_rate": 0.000345504652172121, "loss": 1.7544, "step": 40715 }, { "epoch": 1.35, "grad_norm": 0.4985104501247406, "learning_rate": 0.000345494320966123, "loss": 1.8015, "step": 40716 }, { "epoch": 1.35, "grad_norm": 0.4838685989379883, "learning_rate": 0.00034548398970490164, "loss": 1.7347, "step": 40717 }, { "epoch": 1.35, "grad_norm": 0.495944082736969, "learning_rate": 0.0003454736583884694, "loss": 1.7604, "step": 40718 }, { "epoch": 1.35, "grad_norm": 0.4995920658111572, "learning_rate": 0.00034546332701683886, "loss": 1.7308, "step": 40719 }, { "epoch": 1.35, "grad_norm": 0.49011659622192383, "learning_rate": 0.00034545299559002253, "loss": 1.7531, "step": 40720 }, { "epoch": 1.35, "grad_norm": 0.5052211284637451, "learning_rate": 0.0003454426641080329, "loss": 1.8092, "step": 40721 }, { "epoch": 1.35, "grad_norm": 0.4889603853225708, "learning_rate": 0.0003454323325708827, "loss": 1.744, "step": 40722 }, { "epoch": 1.35, "grad_norm": 0.46477121114730835, "learning_rate": 0.0003454220009785842, "loss": 1.8146, "step": 40723 }, { "epoch": 1.35, "grad_norm": 0.491462379693985, "learning_rate": 0.00034541166933115013, "loss": 1.8141, "step": 40724 }, { "epoch": 1.35, "grad_norm": 0.4939439296722412, "learning_rate": 0.000345401337628593, "loss": 1.7794, "step": 40725 }, { "epoch": 1.35, "grad_norm": 0.48936596512794495, "learning_rate": 0.00034539100587092534, "loss": 1.871, "step": 40726 }, { "epoch": 1.35, "grad_norm": 0.4897827208042145, "learning_rate": 0.0003453806740581597, "loss": 1.6261, "step": 40727 }, { "epoch": 1.36, "grad_norm": 0.47185641527175903, "learning_rate": 0.00034537034219030865, "loss": 1.745, "step": 40728 }, { "epoch": 1.36, "grad_norm": 0.4816904366016388, "learning_rate": 0.00034536001026738456, "loss": 1.686, "step": 40729 }, { "epoch": 1.36, "grad_norm": 0.4934840798377991, "learning_rate": 0.00034534967828940024, "loss": 1.7475, "step": 40730 }, { "epoch": 1.36, "grad_norm": 0.47862645983695984, "learning_rate": 0.000345339346256368, "loss": 1.8525, "step": 40731 }, { "epoch": 1.36, "grad_norm": 0.48038801550865173, "learning_rate": 0.0003453290141683005, "loss": 1.7075, "step": 40732 }, { "epoch": 1.36, "grad_norm": 0.5106179118156433, "learning_rate": 0.00034531868202521035, "loss": 1.8426, "step": 40733 }, { "epoch": 1.36, "grad_norm": 0.5079631209373474, "learning_rate": 0.00034530834982710983, "loss": 1.7894, "step": 40734 }, { "epoch": 1.36, "grad_norm": 0.4933626055717468, "learning_rate": 0.00034529801757401176, "loss": 1.8168, "step": 40735 }, { "epoch": 1.36, "grad_norm": 0.5298547148704529, "learning_rate": 0.0003452876852659285, "loss": 1.8086, "step": 40736 }, { "epoch": 1.36, "grad_norm": 0.4971909523010254, "learning_rate": 0.0003452773529028727, "loss": 1.7637, "step": 40737 }, { "epoch": 1.36, "grad_norm": 0.48865368962287903, "learning_rate": 0.0003452670204848569, "loss": 1.7609, "step": 40738 }, { "epoch": 1.36, "grad_norm": 0.49836477637290955, "learning_rate": 0.0003452566880118935, "loss": 1.6825, "step": 40739 }, { "epoch": 1.36, "grad_norm": 0.5014634132385254, "learning_rate": 0.0003452463554839953, "loss": 1.7802, "step": 40740 }, { "epoch": 1.36, "grad_norm": 0.4964514374732971, "learning_rate": 0.00034523602290117457, "loss": 1.7503, "step": 40741 }, { "epoch": 1.36, "grad_norm": 0.5222147703170776, "learning_rate": 0.000345225690263444, "loss": 1.7944, "step": 40742 }, { "epoch": 1.36, "grad_norm": 0.5292468070983887, "learning_rate": 0.0003452153575708161, "loss": 1.7655, "step": 40743 }, { "epoch": 1.36, "grad_norm": 0.4975837171077728, "learning_rate": 0.0003452050248233034, "loss": 1.7808, "step": 40744 }, { "epoch": 1.36, "grad_norm": 0.4869288504123688, "learning_rate": 0.0003451946920209186, "loss": 1.6968, "step": 40745 }, { "epoch": 1.36, "grad_norm": 0.5015984773635864, "learning_rate": 0.00034518435916367396, "loss": 1.7364, "step": 40746 }, { "epoch": 1.36, "grad_norm": 0.4846843183040619, "learning_rate": 0.00034517402625158225, "loss": 1.7903, "step": 40747 }, { "epoch": 1.36, "grad_norm": 0.49193668365478516, "learning_rate": 0.0003451636932846558, "loss": 1.7403, "step": 40748 }, { "epoch": 1.36, "grad_norm": 0.4915868639945984, "learning_rate": 0.0003451533602629074, "loss": 1.6933, "step": 40749 }, { "epoch": 1.36, "grad_norm": 0.4936349093914032, "learning_rate": 0.00034514302718634943, "loss": 1.865, "step": 40750 }, { "epoch": 1.36, "grad_norm": 0.5195862054824829, "learning_rate": 0.00034513269405499447, "loss": 1.8099, "step": 40751 }, { "epoch": 1.36, "grad_norm": 0.4950874149799347, "learning_rate": 0.00034512236086885515, "loss": 1.7965, "step": 40752 }, { "epoch": 1.36, "grad_norm": 0.5088953375816345, "learning_rate": 0.0003451120276279439, "loss": 1.7144, "step": 40753 }, { "epoch": 1.36, "grad_norm": 0.5201706290245056, "learning_rate": 0.0003451016943322732, "loss": 1.7236, "step": 40754 }, { "epoch": 1.36, "grad_norm": 0.5034649968147278, "learning_rate": 0.00034509136098185574, "loss": 1.7735, "step": 40755 }, { "epoch": 1.36, "grad_norm": 0.5012547969818115, "learning_rate": 0.00034508102757670406, "loss": 1.781, "step": 40756 }, { "epoch": 1.36, "grad_norm": 0.5046830773353577, "learning_rate": 0.00034507069411683053, "loss": 1.7244, "step": 40757 }, { "epoch": 1.36, "grad_norm": 0.4922810196876526, "learning_rate": 0.000345060360602248, "loss": 1.7281, "step": 40758 }, { "epoch": 1.36, "grad_norm": 0.4948160946369171, "learning_rate": 0.00034505002703296876, "loss": 1.8221, "step": 40759 }, { "epoch": 1.36, "grad_norm": 0.48496001958847046, "learning_rate": 0.00034503969340900535, "loss": 1.7877, "step": 40760 }, { "epoch": 1.36, "grad_norm": 0.5161636471748352, "learning_rate": 0.00034502935973037044, "loss": 1.7902, "step": 40761 }, { "epoch": 1.36, "grad_norm": 0.49416354298591614, "learning_rate": 0.0003450190259970766, "loss": 1.7125, "step": 40762 }, { "epoch": 1.36, "grad_norm": 0.4993368089199066, "learning_rate": 0.0003450086922091362, "loss": 1.7815, "step": 40763 }, { "epoch": 1.36, "grad_norm": 0.48272883892059326, "learning_rate": 0.0003449983583665618, "loss": 1.7557, "step": 40764 }, { "epoch": 1.36, "grad_norm": 0.49586009979248047, "learning_rate": 0.0003449880244693662, "loss": 1.736, "step": 40765 }, { "epoch": 1.36, "grad_norm": 0.4713782072067261, "learning_rate": 0.00034497769051756164, "loss": 1.7827, "step": 40766 }, { "epoch": 1.36, "grad_norm": 0.4975122809410095, "learning_rate": 0.0003449673565111609, "loss": 1.7359, "step": 40767 }, { "epoch": 1.36, "grad_norm": 0.4955478608608246, "learning_rate": 0.0003449570224501764, "loss": 1.7285, "step": 40768 }, { "epoch": 1.36, "grad_norm": 0.4797549545764923, "learning_rate": 0.00034494668833462053, "loss": 1.728, "step": 40769 }, { "epoch": 1.36, "grad_norm": 0.4859142601490021, "learning_rate": 0.00034493635416450616, "loss": 1.7197, "step": 40770 }, { "epoch": 1.36, "grad_norm": 0.4911061227321625, "learning_rate": 0.00034492601993984563, "loss": 1.7638, "step": 40771 }, { "epoch": 1.36, "grad_norm": 0.4795417785644531, "learning_rate": 0.0003449156856606516, "loss": 1.8089, "step": 40772 }, { "epoch": 1.36, "grad_norm": 0.4720735251903534, "learning_rate": 0.00034490535132693646, "loss": 1.7744, "step": 40773 }, { "epoch": 1.36, "grad_norm": 0.4985440671443939, "learning_rate": 0.00034489501693871287, "loss": 1.7667, "step": 40774 }, { "epoch": 1.36, "grad_norm": 0.5126435160636902, "learning_rate": 0.00034488468249599337, "loss": 1.7436, "step": 40775 }, { "epoch": 1.36, "grad_norm": 0.5062950849533081, "learning_rate": 0.0003448743479987904, "loss": 1.7293, "step": 40776 }, { "epoch": 1.36, "grad_norm": 0.47777503728866577, "learning_rate": 0.0003448640134471167, "loss": 1.7596, "step": 40777 }, { "epoch": 1.36, "grad_norm": 0.49377012252807617, "learning_rate": 0.0003448536788409846, "loss": 1.7923, "step": 40778 }, { "epoch": 1.36, "grad_norm": 0.5214455127716064, "learning_rate": 0.00034484334418040684, "loss": 1.6946, "step": 40779 }, { "epoch": 1.36, "grad_norm": 0.506121814250946, "learning_rate": 0.0003448330094653957, "loss": 1.7781, "step": 40780 }, { "epoch": 1.36, "grad_norm": 0.4796611964702606, "learning_rate": 0.00034482267469596403, "loss": 1.7142, "step": 40781 }, { "epoch": 1.36, "grad_norm": 0.488148957490921, "learning_rate": 0.00034481233987212426, "loss": 1.745, "step": 40782 }, { "epoch": 1.36, "grad_norm": 0.49754664301872253, "learning_rate": 0.0003448020049938889, "loss": 1.7568, "step": 40783 }, { "epoch": 1.36, "grad_norm": 0.5083825588226318, "learning_rate": 0.00034479167006127036, "loss": 1.8139, "step": 40784 }, { "epoch": 1.36, "grad_norm": 0.4899499714374542, "learning_rate": 0.0003447813350742815, "loss": 1.7588, "step": 40785 }, { "epoch": 1.36, "grad_norm": 0.484300822019577, "learning_rate": 0.00034477100003293455, "loss": 1.7467, "step": 40786 }, { "epoch": 1.36, "grad_norm": 0.496252179145813, "learning_rate": 0.0003447606649372423, "loss": 1.7474, "step": 40787 }, { "epoch": 1.36, "grad_norm": 0.5103045701980591, "learning_rate": 0.00034475032978721723, "loss": 1.7526, "step": 40788 }, { "epoch": 1.36, "grad_norm": 0.4862652122974396, "learning_rate": 0.00034473999458287184, "loss": 1.7889, "step": 40789 }, { "epoch": 1.36, "grad_norm": 0.49733781814575195, "learning_rate": 0.00034472965932421865, "loss": 1.7215, "step": 40790 }, { "epoch": 1.36, "grad_norm": 0.5027872323989868, "learning_rate": 0.00034471932401127024, "loss": 1.8315, "step": 40791 }, { "epoch": 1.36, "grad_norm": 0.4781731069087982, "learning_rate": 0.00034470898864403913, "loss": 1.781, "step": 40792 }, { "epoch": 1.36, "grad_norm": 0.48626524209976196, "learning_rate": 0.000344698653222538, "loss": 1.6841, "step": 40793 }, { "epoch": 1.36, "grad_norm": 0.5309760570526123, "learning_rate": 0.0003446883177467792, "loss": 1.8865, "step": 40794 }, { "epoch": 1.36, "grad_norm": 0.4785144031047821, "learning_rate": 0.0003446779822167754, "loss": 1.7396, "step": 40795 }, { "epoch": 1.36, "grad_norm": 0.4953708052635193, "learning_rate": 0.00034466764663253907, "loss": 1.842, "step": 40796 }, { "epoch": 1.36, "grad_norm": 0.4896545708179474, "learning_rate": 0.00034465731099408287, "loss": 1.767, "step": 40797 }, { "epoch": 1.36, "grad_norm": 0.49816054105758667, "learning_rate": 0.00034464697530141926, "loss": 1.7876, "step": 40798 }, { "epoch": 1.36, "grad_norm": 1.0863605737686157, "learning_rate": 0.00034463663955456073, "loss": 1.7332, "step": 40799 }, { "epoch": 1.36, "grad_norm": 0.4885239601135254, "learning_rate": 0.00034462630375351995, "loss": 1.8122, "step": 40800 }, { "epoch": 1.36, "grad_norm": 0.49011415243148804, "learning_rate": 0.0003446159678983094, "loss": 1.7823, "step": 40801 }, { "epoch": 1.36, "grad_norm": 0.49090486764907837, "learning_rate": 0.00034460563198894165, "loss": 1.7715, "step": 40802 }, { "epoch": 1.36, "grad_norm": 0.4866476356983185, "learning_rate": 0.00034459529602542927, "loss": 1.788, "step": 40803 }, { "epoch": 1.36, "grad_norm": 0.5184273719787598, "learning_rate": 0.0003445849600077847, "loss": 1.8283, "step": 40804 }, { "epoch": 1.36, "grad_norm": 0.4908277988433838, "learning_rate": 0.0003445746239360206, "loss": 1.7925, "step": 40805 }, { "epoch": 1.36, "grad_norm": 0.49167826771736145, "learning_rate": 0.00034456428781014947, "loss": 1.822, "step": 40806 }, { "epoch": 1.36, "grad_norm": 0.49096250534057617, "learning_rate": 0.0003445539516301839, "loss": 1.8169, "step": 40807 }, { "epoch": 1.36, "grad_norm": 0.5030133128166199, "learning_rate": 0.00034454361539613634, "loss": 1.7765, "step": 40808 }, { "epoch": 1.36, "grad_norm": 0.47770196199417114, "learning_rate": 0.00034453327910801936, "loss": 1.7291, "step": 40809 }, { "epoch": 1.36, "grad_norm": 0.5091581344604492, "learning_rate": 0.0003445229427658456, "loss": 1.86, "step": 40810 }, { "epoch": 1.36, "grad_norm": 0.48215803503990173, "learning_rate": 0.0003445126063696275, "loss": 1.7566, "step": 40811 }, { "epoch": 1.36, "grad_norm": 0.5005608201026917, "learning_rate": 0.0003445022699193777, "loss": 1.8052, "step": 40812 }, { "epoch": 1.36, "grad_norm": 0.48531362414360046, "learning_rate": 0.00034449193341510876, "loss": 1.7262, "step": 40813 }, { "epoch": 1.36, "grad_norm": 0.48561084270477295, "learning_rate": 0.0003444815968568331, "loss": 1.6929, "step": 40814 }, { "epoch": 1.36, "grad_norm": 0.4848007261753082, "learning_rate": 0.0003444712602445633, "loss": 1.7873, "step": 40815 }, { "epoch": 1.36, "grad_norm": 0.49710655212402344, "learning_rate": 0.00034446092357831197, "loss": 1.777, "step": 40816 }, { "epoch": 1.36, "grad_norm": 0.5028348565101624, "learning_rate": 0.00034445058685809163, "loss": 1.8154, "step": 40817 }, { "epoch": 1.36, "grad_norm": 0.48647865653038025, "learning_rate": 0.0003444402500839149, "loss": 1.741, "step": 40818 }, { "epoch": 1.36, "grad_norm": 0.517254650592804, "learning_rate": 0.0003444299132557941, "loss": 1.8246, "step": 40819 }, { "epoch": 1.36, "grad_norm": 0.4802689850330353, "learning_rate": 0.00034441957637374213, "loss": 1.7059, "step": 40820 }, { "epoch": 1.36, "grad_norm": 0.48394739627838135, "learning_rate": 0.0003444092394377712, "loss": 1.7674, "step": 40821 }, { "epoch": 1.36, "grad_norm": 0.4892483651638031, "learning_rate": 0.000344398902447894, "loss": 1.7286, "step": 40822 }, { "epoch": 1.36, "grad_norm": 0.48827579617500305, "learning_rate": 0.0003443885654041232, "loss": 1.7854, "step": 40823 }, { "epoch": 1.36, "grad_norm": 0.49191713333129883, "learning_rate": 0.00034437822830647106, "loss": 1.8275, "step": 40824 }, { "epoch": 1.36, "grad_norm": 0.4861942231655121, "learning_rate": 0.00034436789115495033, "loss": 1.7084, "step": 40825 }, { "epoch": 1.36, "grad_norm": 1.0850584506988525, "learning_rate": 0.00034435755394957355, "loss": 1.7578, "step": 40826 }, { "epoch": 1.36, "grad_norm": 0.4812626838684082, "learning_rate": 0.00034434721669035327, "loss": 1.6987, "step": 40827 }, { "epoch": 1.36, "grad_norm": 0.5037635564804077, "learning_rate": 0.0003443368793773019, "loss": 1.811, "step": 40828 }, { "epoch": 1.36, "grad_norm": 0.4927902817726135, "learning_rate": 0.0003443265420104322, "loss": 1.8036, "step": 40829 }, { "epoch": 1.36, "grad_norm": 0.5041747093200684, "learning_rate": 0.0003443162045897566, "loss": 1.8451, "step": 40830 }, { "epoch": 1.36, "grad_norm": 0.500676155090332, "learning_rate": 0.0003443058671152875, "loss": 1.7646, "step": 40831 }, { "epoch": 1.36, "grad_norm": 0.48591911792755127, "learning_rate": 0.0003442955295870378, "loss": 1.7326, "step": 40832 }, { "epoch": 1.36, "grad_norm": 0.495961457490921, "learning_rate": 0.00034428519200501974, "loss": 1.8131, "step": 40833 }, { "epoch": 1.36, "grad_norm": 0.49608296155929565, "learning_rate": 0.000344274854369246, "loss": 1.779, "step": 40834 }, { "epoch": 1.36, "grad_norm": 0.5066896677017212, "learning_rate": 0.00034426451667972916, "loss": 1.7035, "step": 40835 }, { "epoch": 1.36, "grad_norm": 0.5132639408111572, "learning_rate": 0.0003442541789364817, "loss": 1.7073, "step": 40836 }, { "epoch": 1.36, "grad_norm": 0.48633578419685364, "learning_rate": 0.0003442438411395162, "loss": 1.785, "step": 40837 }, { "epoch": 1.36, "grad_norm": 0.49872568249702454, "learning_rate": 0.0003442335032888451, "loss": 1.7532, "step": 40838 }, { "epoch": 1.36, "grad_norm": 0.4793226420879364, "learning_rate": 0.0003442231653844812, "loss": 1.7082, "step": 40839 }, { "epoch": 1.36, "grad_norm": 0.5028519630432129, "learning_rate": 0.00034421282742643676, "loss": 1.7451, "step": 40840 }, { "epoch": 1.36, "grad_norm": 0.48347246646881104, "learning_rate": 0.0003442024894147245, "loss": 1.7696, "step": 40841 }, { "epoch": 1.36, "grad_norm": 0.5221730470657349, "learning_rate": 0.000344192151349357, "loss": 1.7542, "step": 40842 }, { "epoch": 1.36, "grad_norm": 0.49689188599586487, "learning_rate": 0.00034418181323034666, "loss": 1.7781, "step": 40843 }, { "epoch": 1.36, "grad_norm": 0.5015126466751099, "learning_rate": 0.00034417147505770617, "loss": 1.7909, "step": 40844 }, { "epoch": 1.36, "grad_norm": 1.5820167064666748, "learning_rate": 0.000344161136831448, "loss": 1.7813, "step": 40845 }, { "epoch": 1.36, "grad_norm": 0.5054515600204468, "learning_rate": 0.0003441507985515846, "loss": 1.787, "step": 40846 }, { "epoch": 1.36, "grad_norm": 0.4926483929157257, "learning_rate": 0.00034414046021812874, "loss": 1.7521, "step": 40847 }, { "epoch": 1.36, "grad_norm": 0.48015230894088745, "learning_rate": 0.00034413012183109285, "loss": 1.7922, "step": 40848 }, { "epoch": 1.36, "grad_norm": 0.495712012052536, "learning_rate": 0.0003441197833904895, "loss": 1.7227, "step": 40849 }, { "epoch": 1.36, "grad_norm": 0.4818306565284729, "learning_rate": 0.00034410944489633125, "loss": 1.8293, "step": 40850 }, { "epoch": 1.36, "grad_norm": 0.5058314800262451, "learning_rate": 0.0003440991063486306, "loss": 1.7983, "step": 40851 }, { "epoch": 1.36, "grad_norm": 0.49792420864105225, "learning_rate": 0.00034408876774740015, "loss": 1.8059, "step": 40852 }, { "epoch": 1.36, "grad_norm": 0.4933643639087677, "learning_rate": 0.00034407842909265246, "loss": 1.782, "step": 40853 }, { "epoch": 1.36, "grad_norm": 1.086663842201233, "learning_rate": 0.0003440680903844, "loss": 1.7813, "step": 40854 }, { "epoch": 1.36, "grad_norm": 0.4935472309589386, "learning_rate": 0.0003440577516226555, "loss": 1.7812, "step": 40855 }, { "epoch": 1.36, "grad_norm": 0.5173021554946899, "learning_rate": 0.00034404741280743114, "loss": 1.802, "step": 40856 }, { "epoch": 1.36, "grad_norm": 0.4983588755130768, "learning_rate": 0.0003440370739387399, "loss": 1.7978, "step": 40857 }, { "epoch": 1.36, "grad_norm": 0.48957380652427673, "learning_rate": 0.0003440267350165941, "loss": 1.7745, "step": 40858 }, { "epoch": 1.36, "grad_norm": 0.47706839442253113, "learning_rate": 0.0003440163960410063, "loss": 1.7831, "step": 40859 }, { "epoch": 1.36, "grad_norm": 0.5222392678260803, "learning_rate": 0.0003440060570119891, "loss": 1.7732, "step": 40860 }, { "epoch": 1.36, "grad_norm": 0.5015681982040405, "learning_rate": 0.00034399571792955504, "loss": 1.7136, "step": 40861 }, { "epoch": 1.36, "grad_norm": 0.5276015996932983, "learning_rate": 0.00034398537879371666, "loss": 1.742, "step": 40862 }, { "epoch": 1.36, "grad_norm": 0.4876333177089691, "learning_rate": 0.00034397503960448646, "loss": 1.7949, "step": 40863 }, { "epoch": 1.36, "grad_norm": 0.5023938417434692, "learning_rate": 0.0003439647003618771, "loss": 1.7516, "step": 40864 }, { "epoch": 1.36, "grad_norm": 0.49767228960990906, "learning_rate": 0.0003439543610659011, "loss": 1.7522, "step": 40865 }, { "epoch": 1.36, "grad_norm": 0.5033092498779297, "learning_rate": 0.00034394402171657087, "loss": 1.7552, "step": 40866 }, { "epoch": 1.36, "grad_norm": 0.5000483989715576, "learning_rate": 0.0003439336823138991, "loss": 1.6897, "step": 40867 }, { "epoch": 1.36, "grad_norm": 0.504273533821106, "learning_rate": 0.00034392334285789837, "loss": 1.8244, "step": 40868 }, { "epoch": 1.36, "grad_norm": 0.5234800577163696, "learning_rate": 0.00034391300334858123, "loss": 1.7518, "step": 40869 }, { "epoch": 1.36, "grad_norm": 0.5127026438713074, "learning_rate": 0.00034390266378596005, "loss": 1.7698, "step": 40870 }, { "epoch": 1.36, "grad_norm": 0.48916003108024597, "learning_rate": 0.00034389232417004757, "loss": 1.7907, "step": 40871 }, { "epoch": 1.36, "grad_norm": 0.4903743267059326, "learning_rate": 0.00034388198450085625, "loss": 1.8116, "step": 40872 }, { "epoch": 1.36, "grad_norm": 0.4912215769290924, "learning_rate": 0.0003438716447783987, "loss": 1.7896, "step": 40873 }, { "epoch": 1.36, "grad_norm": 0.48322778940200806, "learning_rate": 0.0003438613050026874, "loss": 1.8294, "step": 40874 }, { "epoch": 1.36, "grad_norm": 0.4989871382713318, "learning_rate": 0.000343850965173735, "loss": 1.7922, "step": 40875 }, { "epoch": 1.36, "grad_norm": 0.515248715877533, "learning_rate": 0.00034384062529155393, "loss": 1.854, "step": 40876 }, { "epoch": 1.36, "grad_norm": 0.500679075717926, "learning_rate": 0.0003438302853561568, "loss": 1.8687, "step": 40877 }, { "epoch": 1.36, "grad_norm": 0.502303421497345, "learning_rate": 0.0003438199453675562, "loss": 1.826, "step": 40878 }, { "epoch": 1.36, "grad_norm": 0.5085200071334839, "learning_rate": 0.0003438096053257646, "loss": 1.8011, "step": 40879 }, { "epoch": 1.36, "grad_norm": 0.5327801704406738, "learning_rate": 0.0003437992652307946, "loss": 1.7604, "step": 40880 }, { "epoch": 1.36, "grad_norm": 0.5106788873672485, "learning_rate": 0.00034378892508265873, "loss": 1.869, "step": 40881 }, { "epoch": 1.36, "grad_norm": 0.4933536946773529, "learning_rate": 0.0003437785848813696, "loss": 1.6629, "step": 40882 }, { "epoch": 1.36, "grad_norm": 0.5065421462059021, "learning_rate": 0.0003437682446269397, "loss": 1.7582, "step": 40883 }, { "epoch": 1.36, "grad_norm": 0.5036284923553467, "learning_rate": 0.00034375790431938157, "loss": 1.7763, "step": 40884 }, { "epoch": 1.36, "grad_norm": 0.5120394825935364, "learning_rate": 0.00034374756395870795, "loss": 1.8071, "step": 40885 }, { "epoch": 1.36, "grad_norm": 0.4967401921749115, "learning_rate": 0.00034373722354493104, "loss": 1.8124, "step": 40886 }, { "epoch": 1.36, "grad_norm": 0.48596784472465515, "learning_rate": 0.00034372688307806367, "loss": 1.7352, "step": 40887 }, { "epoch": 1.36, "grad_norm": 0.4750134348869324, "learning_rate": 0.0003437165425581182, "loss": 1.7017, "step": 40888 }, { "epoch": 1.36, "grad_norm": 0.48547571897506714, "learning_rate": 0.00034370620198510745, "loss": 1.8224, "step": 40889 }, { "epoch": 1.36, "grad_norm": 0.499782532453537, "learning_rate": 0.00034369586135904375, "loss": 1.7712, "step": 40890 }, { "epoch": 1.36, "grad_norm": 0.5108822584152222, "learning_rate": 0.0003436855206799397, "loss": 1.7071, "step": 40891 }, { "epoch": 1.36, "grad_norm": 0.5059012770652771, "learning_rate": 0.0003436751799478079, "loss": 1.7233, "step": 40892 }, { "epoch": 1.36, "grad_norm": 0.4919750392436981, "learning_rate": 0.00034366483916266073, "loss": 1.7377, "step": 40893 }, { "epoch": 1.36, "grad_norm": 0.5161587595939636, "learning_rate": 0.00034365449832451104, "loss": 1.7254, "step": 40894 }, { "epoch": 1.36, "grad_norm": 0.5138767957687378, "learning_rate": 0.00034364415743337113, "loss": 1.8403, "step": 40895 }, { "epoch": 1.36, "grad_norm": 0.47794029116630554, "learning_rate": 0.00034363381648925374, "loss": 1.7607, "step": 40896 }, { "epoch": 1.36, "grad_norm": 0.508109450340271, "learning_rate": 0.00034362347549217123, "loss": 1.7512, "step": 40897 }, { "epoch": 1.36, "grad_norm": 0.532662034034729, "learning_rate": 0.0003436131344421363, "loss": 1.7725, "step": 40898 }, { "epoch": 1.36, "grad_norm": 0.5117974877357483, "learning_rate": 0.00034360279333916144, "loss": 1.7612, "step": 40899 }, { "epoch": 1.36, "grad_norm": 0.4846559166908264, "learning_rate": 0.00034359245218325914, "loss": 1.7405, "step": 40900 }, { "epoch": 1.36, "grad_norm": 0.5012601017951965, "learning_rate": 0.00034358211097444214, "loss": 1.7018, "step": 40901 }, { "epoch": 1.36, "grad_norm": 0.4863230884075165, "learning_rate": 0.0003435717697127228, "loss": 1.7701, "step": 40902 }, { "epoch": 1.36, "grad_norm": 0.5116790533065796, "learning_rate": 0.00034356142839811374, "loss": 1.7758, "step": 40903 }, { "epoch": 1.36, "grad_norm": 0.4803318381309509, "learning_rate": 0.0003435510870306276, "loss": 1.7763, "step": 40904 }, { "epoch": 1.36, "grad_norm": 0.4982835054397583, "learning_rate": 0.0003435407456102769, "loss": 1.7381, "step": 40905 }, { "epoch": 1.36, "grad_norm": 0.5116192102432251, "learning_rate": 0.00034353040413707405, "loss": 1.7445, "step": 40906 }, { "epoch": 1.36, "grad_norm": 0.5037553310394287, "learning_rate": 0.00034352006261103164, "loss": 1.8153, "step": 40907 }, { "epoch": 1.36, "grad_norm": 0.49795714020729065, "learning_rate": 0.0003435097210321624, "loss": 1.7581, "step": 40908 }, { "epoch": 1.36, "grad_norm": 0.5025736093521118, "learning_rate": 0.00034349937940047874, "loss": 1.7054, "step": 40909 }, { "epoch": 1.36, "grad_norm": 0.49355509877204895, "learning_rate": 0.0003434890377159933, "loss": 1.8298, "step": 40910 }, { "epoch": 1.36, "grad_norm": 0.48980581760406494, "learning_rate": 0.0003434786959787185, "loss": 1.8007, "step": 40911 }, { "epoch": 1.36, "grad_norm": 0.47589558362960815, "learning_rate": 0.00034346835418866704, "loss": 1.7754, "step": 40912 }, { "epoch": 1.36, "grad_norm": 0.4880189597606659, "learning_rate": 0.00034345801234585133, "loss": 1.798, "step": 40913 }, { "epoch": 1.36, "grad_norm": 0.520664632320404, "learning_rate": 0.000343447670450284, "loss": 1.8498, "step": 40914 }, { "epoch": 1.36, "grad_norm": 0.49646446108818054, "learning_rate": 0.00034343732850197756, "loss": 1.7167, "step": 40915 }, { "epoch": 1.36, "grad_norm": 0.4984326660633087, "learning_rate": 0.0003434269865009447, "loss": 1.7234, "step": 40916 }, { "epoch": 1.36, "grad_norm": 0.48418959975242615, "learning_rate": 0.0003434166444471979, "loss": 1.7684, "step": 40917 }, { "epoch": 1.36, "grad_norm": 0.4902046322822571, "learning_rate": 0.00034340630234074953, "loss": 1.7709, "step": 40918 }, { "epoch": 1.36, "grad_norm": 0.4860416650772095, "learning_rate": 0.0003433959601816124, "loss": 1.8084, "step": 40919 }, { "epoch": 1.36, "grad_norm": 0.4636104106903076, "learning_rate": 0.000343385617969799, "loss": 1.7243, "step": 40920 }, { "epoch": 1.36, "grad_norm": 0.5389651656150818, "learning_rate": 0.00034337527570532184, "loss": 1.841, "step": 40921 }, { "epoch": 1.36, "grad_norm": 0.4940110445022583, "learning_rate": 0.0003433649333881935, "loss": 1.7352, "step": 40922 }, { "epoch": 1.36, "grad_norm": 0.4963129460811615, "learning_rate": 0.0003433545910184264, "loss": 1.7702, "step": 40923 }, { "epoch": 1.36, "grad_norm": 0.49980807304382324, "learning_rate": 0.0003433442485960334, "loss": 1.7691, "step": 40924 }, { "epoch": 1.36, "grad_norm": 0.5001516342163086, "learning_rate": 0.00034333390612102676, "loss": 1.8329, "step": 40925 }, { "epoch": 1.36, "grad_norm": 0.525438129901886, "learning_rate": 0.00034332356359341915, "loss": 1.7612, "step": 40926 }, { "epoch": 1.36, "grad_norm": 0.48875078558921814, "learning_rate": 0.0003433132210132231, "loss": 1.7563, "step": 40927 }, { "epoch": 1.36, "grad_norm": 0.5081321001052856, "learning_rate": 0.00034330287838045125, "loss": 1.8048, "step": 40928 }, { "epoch": 1.36, "grad_norm": 0.49474862217903137, "learning_rate": 0.000343292535695116, "loss": 1.8047, "step": 40929 }, { "epoch": 1.36, "grad_norm": 0.49456706643104553, "learning_rate": 0.0003432821929572301, "loss": 1.7638, "step": 40930 }, { "epoch": 1.36, "grad_norm": 0.4857196509838104, "learning_rate": 0.0003432718501668059, "loss": 1.7692, "step": 40931 }, { "epoch": 1.36, "grad_norm": 0.5006769895553589, "learning_rate": 0.0003432615073238561, "loss": 1.7604, "step": 40932 }, { "epoch": 1.36, "grad_norm": 0.49490171670913696, "learning_rate": 0.0003432511644283932, "loss": 1.7604, "step": 40933 }, { "epoch": 1.36, "grad_norm": 0.5021545886993408, "learning_rate": 0.00034324082148042966, "loss": 1.775, "step": 40934 }, { "epoch": 1.36, "grad_norm": 0.48451781272888184, "learning_rate": 0.00034323047847997833, "loss": 1.7814, "step": 40935 }, { "epoch": 1.36, "grad_norm": 0.4946349263191223, "learning_rate": 0.0003432201354270514, "loss": 1.8148, "step": 40936 }, { "epoch": 1.36, "grad_norm": 0.5054295659065247, "learning_rate": 0.0003432097923216617, "loss": 1.7169, "step": 40937 }, { "epoch": 1.36, "grad_norm": 0.4722408950328827, "learning_rate": 0.0003431994491638216, "loss": 1.7233, "step": 40938 }, { "epoch": 1.36, "grad_norm": 0.5097964406013489, "learning_rate": 0.0003431891059535437, "loss": 1.8183, "step": 40939 }, { "epoch": 1.36, "grad_norm": 0.5028954148292542, "learning_rate": 0.00034317876269084074, "loss": 1.7521, "step": 40940 }, { "epoch": 1.36, "grad_norm": 0.4953141510486603, "learning_rate": 0.00034316841937572493, "loss": 1.8154, "step": 40941 }, { "epoch": 1.36, "grad_norm": 0.49651893973350525, "learning_rate": 0.0003431580760082092, "loss": 1.8438, "step": 40942 }, { "epoch": 1.36, "grad_norm": 0.4909530580043793, "learning_rate": 0.0003431477325883058, "loss": 1.7646, "step": 40943 }, { "epoch": 1.36, "grad_norm": 0.491644024848938, "learning_rate": 0.0003431373891160275, "loss": 1.7527, "step": 40944 }, { "epoch": 1.36, "grad_norm": 0.5586199164390564, "learning_rate": 0.0003431270455913867, "loss": 1.8321, "step": 40945 }, { "epoch": 1.36, "grad_norm": 0.4991164803504944, "learning_rate": 0.0003431167020143961, "loss": 1.7478, "step": 40946 }, { "epoch": 1.36, "grad_norm": 0.47988763451576233, "learning_rate": 0.00034310635838506813, "loss": 1.7427, "step": 40947 }, { "epoch": 1.36, "grad_norm": 0.4884522259235382, "learning_rate": 0.0003430960147034153, "loss": 1.7798, "step": 40948 }, { "epoch": 1.36, "grad_norm": 0.5218555927276611, "learning_rate": 0.0003430856709694504, "loss": 1.7863, "step": 40949 }, { "epoch": 1.36, "grad_norm": 0.5005027651786804, "learning_rate": 0.0003430753271831858, "loss": 1.7894, "step": 40950 }, { "epoch": 1.36, "grad_norm": 0.47744956612586975, "learning_rate": 0.00034306498334463404, "loss": 1.7761, "step": 40951 }, { "epoch": 1.36, "grad_norm": 0.4810318052768707, "learning_rate": 0.00034305463945380775, "loss": 1.7332, "step": 40952 }, { "epoch": 1.36, "grad_norm": 0.49484938383102417, "learning_rate": 0.00034304429551071955, "loss": 1.75, "step": 40953 }, { "epoch": 1.36, "grad_norm": 0.49411576986312866, "learning_rate": 0.0003430339515153819, "loss": 1.7324, "step": 40954 }, { "epoch": 1.36, "grad_norm": 0.5045923590660095, "learning_rate": 0.0003430236074678073, "loss": 1.8414, "step": 40955 }, { "epoch": 1.36, "grad_norm": 0.4864600598812103, "learning_rate": 0.0003430132633680085, "loss": 1.692, "step": 40956 }, { "epoch": 1.36, "grad_norm": 0.4937818944454193, "learning_rate": 0.00034300291921599783, "loss": 1.8062, "step": 40957 }, { "epoch": 1.36, "grad_norm": 0.4947621822357178, "learning_rate": 0.00034299257501178794, "loss": 1.7172, "step": 40958 }, { "epoch": 1.36, "grad_norm": 0.5224130749702454, "learning_rate": 0.00034298223075539144, "loss": 1.759, "step": 40959 }, { "epoch": 1.36, "grad_norm": 0.7186921834945679, "learning_rate": 0.00034297188644682086, "loss": 1.7516, "step": 40960 }, { "epoch": 1.36, "grad_norm": 0.4833741784095764, "learning_rate": 0.0003429615420860887, "loss": 1.7324, "step": 40961 }, { "epoch": 1.36, "grad_norm": 0.517431378364563, "learning_rate": 0.00034295119767320764, "loss": 1.8315, "step": 40962 }, { "epoch": 1.36, "grad_norm": 0.4934653639793396, "learning_rate": 0.00034294085320819005, "loss": 1.7648, "step": 40963 }, { "epoch": 1.36, "grad_norm": 0.48414188623428345, "learning_rate": 0.0003429305086910487, "loss": 1.7293, "step": 40964 }, { "epoch": 1.36, "grad_norm": 0.5110934376716614, "learning_rate": 0.00034292016412179594, "loss": 1.6906, "step": 40965 }, { "epoch": 1.36, "grad_norm": 0.49843645095825195, "learning_rate": 0.0003429098195004444, "loss": 1.814, "step": 40966 }, { "epoch": 1.36, "grad_norm": 0.49300968647003174, "learning_rate": 0.00034289947482700684, "loss": 1.7516, "step": 40967 }, { "epoch": 1.36, "grad_norm": 0.4839593470096588, "learning_rate": 0.00034288913010149547, "loss": 1.7758, "step": 40968 }, { "epoch": 1.36, "grad_norm": 0.4928571879863739, "learning_rate": 0.0003428787853239231, "loss": 1.7937, "step": 40969 }, { "epoch": 1.36, "grad_norm": 0.5046228766441345, "learning_rate": 0.00034286844049430214, "loss": 1.7392, "step": 40970 }, { "epoch": 1.36, "grad_norm": 0.5022889971733093, "learning_rate": 0.0003428580956126452, "loss": 1.7024, "step": 40971 }, { "epoch": 1.36, "grad_norm": 0.48886731266975403, "learning_rate": 0.00034284775067896497, "loss": 1.7899, "step": 40972 }, { "epoch": 1.36, "grad_norm": 0.4885992407798767, "learning_rate": 0.00034283740569327376, "loss": 1.8768, "step": 40973 }, { "epoch": 1.36, "grad_norm": 0.4807383716106415, "learning_rate": 0.0003428270606555843, "loss": 1.6979, "step": 40974 }, { "epoch": 1.36, "grad_norm": 0.4888341426849365, "learning_rate": 0.00034281671556590906, "loss": 1.7133, "step": 40975 }, { "epoch": 1.36, "grad_norm": 0.5050525069236755, "learning_rate": 0.0003428063704242607, "loss": 1.691, "step": 40976 }, { "epoch": 1.36, "grad_norm": 0.5090036392211914, "learning_rate": 0.00034279602523065164, "loss": 1.7836, "step": 40977 }, { "epoch": 1.36, "grad_norm": 0.4885282516479492, "learning_rate": 0.00034278567998509456, "loss": 1.7996, "step": 40978 }, { "epoch": 1.36, "grad_norm": 0.5048276782035828, "learning_rate": 0.00034277533468760204, "loss": 1.7578, "step": 40979 }, { "epoch": 1.36, "grad_norm": 0.49534302949905396, "learning_rate": 0.00034276498933818637, "loss": 1.7364, "step": 40980 }, { "epoch": 1.36, "grad_norm": 0.47903090715408325, "learning_rate": 0.0003427546439368605, "loss": 1.7381, "step": 40981 }, { "epoch": 1.36, "grad_norm": 0.4877825677394867, "learning_rate": 0.0003427442984836367, "loss": 1.8513, "step": 40982 }, { "epoch": 1.36, "grad_norm": 0.5080789923667908, "learning_rate": 0.00034273395297852765, "loss": 1.8215, "step": 40983 }, { "epoch": 1.36, "grad_norm": 0.5051934123039246, "learning_rate": 0.00034272360742154586, "loss": 1.708, "step": 40984 }, { "epoch": 1.36, "grad_norm": 0.4940069615840912, "learning_rate": 0.00034271326181270386, "loss": 1.7623, "step": 40985 }, { "epoch": 1.36, "grad_norm": 0.49183371663093567, "learning_rate": 0.0003427029161520144, "loss": 1.7354, "step": 40986 }, { "epoch": 1.36, "grad_norm": 0.5002486705780029, "learning_rate": 0.00034269257043948977, "loss": 1.7575, "step": 40987 }, { "epoch": 1.36, "grad_norm": 0.48182496428489685, "learning_rate": 0.0003426822246751426, "loss": 1.7775, "step": 40988 }, { "epoch": 1.36, "grad_norm": 0.5100454092025757, "learning_rate": 0.0003426718788589856, "loss": 1.7681, "step": 40989 }, { "epoch": 1.36, "grad_norm": 0.5040552616119385, "learning_rate": 0.0003426615329910312, "loss": 1.7063, "step": 40990 }, { "epoch": 1.36, "grad_norm": 0.507684051990509, "learning_rate": 0.000342651187071292, "loss": 1.827, "step": 40991 }, { "epoch": 1.36, "grad_norm": 0.5031847953796387, "learning_rate": 0.00034264084109978055, "loss": 1.7642, "step": 40992 }, { "epoch": 1.36, "grad_norm": 0.4971019923686981, "learning_rate": 0.0003426304950765094, "loss": 1.76, "step": 40993 }, { "epoch": 1.36, "grad_norm": 0.5149441361427307, "learning_rate": 0.0003426201490014911, "loss": 1.768, "step": 40994 }, { "epoch": 1.36, "grad_norm": 0.5063677430152893, "learning_rate": 0.00034260980287473817, "loss": 1.8317, "step": 40995 }, { "epoch": 1.36, "grad_norm": 0.5083988904953003, "learning_rate": 0.0003425994566962632, "loss": 1.71, "step": 40996 }, { "epoch": 1.36, "grad_norm": 0.4972752034664154, "learning_rate": 0.0003425891104660789, "loss": 1.7738, "step": 40997 }, { "epoch": 1.36, "grad_norm": 0.4967510998249054, "learning_rate": 0.00034257876418419754, "loss": 1.7894, "step": 40998 }, { "epoch": 1.36, "grad_norm": 0.4930249750614166, "learning_rate": 0.00034256841785063193, "loss": 1.7846, "step": 40999 }, { "epoch": 1.36, "grad_norm": 0.4925275444984436, "learning_rate": 0.00034255807146539455, "loss": 1.8201, "step": 41000 }, { "epoch": 1.36, "grad_norm": 0.4998505115509033, "learning_rate": 0.0003425477250284979, "loss": 1.8846, "step": 41001 }, { "epoch": 1.36, "grad_norm": 0.5129276514053345, "learning_rate": 0.00034253737853995466, "loss": 1.7792, "step": 41002 }, { "epoch": 1.36, "grad_norm": 0.4999488890171051, "learning_rate": 0.0003425270319997771, "loss": 1.7135, "step": 41003 }, { "epoch": 1.36, "grad_norm": 0.47232934832572937, "learning_rate": 0.00034251668540797816, "loss": 1.7384, "step": 41004 }, { "epoch": 1.36, "grad_norm": 0.49199649691581726, "learning_rate": 0.0003425063387645701, "loss": 1.7249, "step": 41005 }, { "epoch": 1.36, "grad_norm": 0.510237991809845, "learning_rate": 0.0003424959920695658, "loss": 1.787, "step": 41006 }, { "epoch": 1.36, "grad_norm": 0.5053348541259766, "learning_rate": 0.0003424856453229775, "loss": 1.7815, "step": 41007 }, { "epoch": 1.36, "grad_norm": 0.4754640460014343, "learning_rate": 0.00034247529852481795, "loss": 1.765, "step": 41008 }, { "epoch": 1.36, "grad_norm": 0.4830755293369293, "learning_rate": 0.0003424649516750996, "loss": 1.6921, "step": 41009 }, { "epoch": 1.36, "grad_norm": 0.49049508571624756, "learning_rate": 0.0003424546047738351, "loss": 1.8645, "step": 41010 }, { "epoch": 1.36, "grad_norm": 0.50344318151474, "learning_rate": 0.0003424442578210369, "loss": 1.7994, "step": 41011 }, { "epoch": 1.36, "grad_norm": 0.5004108548164368, "learning_rate": 0.0003424339108167177, "loss": 1.8263, "step": 41012 }, { "epoch": 1.36, "grad_norm": 0.48082712292671204, "learning_rate": 0.00034242356376088987, "loss": 1.7883, "step": 41013 }, { "epoch": 1.36, "grad_norm": 0.48848646879196167, "learning_rate": 0.0003424132166535662, "loss": 1.766, "step": 41014 }, { "epoch": 1.36, "grad_norm": 0.4962841272354126, "learning_rate": 0.000342402869494759, "loss": 1.6522, "step": 41015 }, { "epoch": 1.36, "grad_norm": 0.4908777177333832, "learning_rate": 0.0003423925222844812, "loss": 1.7465, "step": 41016 }, { "epoch": 1.36, "grad_norm": 0.5952543616294861, "learning_rate": 0.00034238217502274496, "loss": 1.7583, "step": 41017 }, { "epoch": 1.36, "grad_norm": 0.4792822003364563, "learning_rate": 0.000342371827709563, "loss": 1.7775, "step": 41018 }, { "epoch": 1.36, "grad_norm": 0.49509671330451965, "learning_rate": 0.00034236148034494786, "loss": 1.7566, "step": 41019 }, { "epoch": 1.36, "grad_norm": 0.4784250855445862, "learning_rate": 0.0003423511329289122, "loss": 1.8144, "step": 41020 }, { "epoch": 1.36, "grad_norm": 0.499728798866272, "learning_rate": 0.00034234078546146844, "loss": 1.7465, "step": 41021 }, { "epoch": 1.36, "grad_norm": 0.4951304793357849, "learning_rate": 0.0003423304379426293, "loss": 1.7353, "step": 41022 }, { "epoch": 1.36, "grad_norm": 0.534918487071991, "learning_rate": 0.0003423200903724072, "loss": 1.8332, "step": 41023 }, { "epoch": 1.36, "grad_norm": 0.4943152964115143, "learning_rate": 0.0003423097427508147, "loss": 1.7825, "step": 41024 }, { "epoch": 1.36, "grad_norm": 0.5023164749145508, "learning_rate": 0.00034229939507786437, "loss": 1.7925, "step": 41025 }, { "epoch": 1.36, "grad_norm": 0.5044686198234558, "learning_rate": 0.0003422890473535689, "loss": 1.7288, "step": 41026 }, { "epoch": 1.36, "grad_norm": 0.49399417638778687, "learning_rate": 0.00034227869957794083, "loss": 1.6633, "step": 41027 }, { "epoch": 1.37, "grad_norm": 0.5020846724510193, "learning_rate": 0.00034226835175099244, "loss": 1.7612, "step": 41028 }, { "epoch": 1.37, "grad_norm": 0.4858543574810028, "learning_rate": 0.0003422580038727367, "loss": 1.818, "step": 41029 }, { "epoch": 1.37, "grad_norm": 0.49763745069503784, "learning_rate": 0.00034224765594318584, "loss": 1.7965, "step": 41030 }, { "epoch": 1.37, "grad_norm": 0.49462372064590454, "learning_rate": 0.00034223730796235257, "loss": 1.7733, "step": 41031 }, { "epoch": 1.37, "grad_norm": 0.5040463805198669, "learning_rate": 0.00034222695993024944, "loss": 1.7521, "step": 41032 }, { "epoch": 1.37, "grad_norm": 0.5122778415679932, "learning_rate": 0.00034221661184688897, "loss": 1.8303, "step": 41033 }, { "epoch": 1.37, "grad_norm": 0.4997884929180145, "learning_rate": 0.0003422062637122839, "loss": 1.7582, "step": 41034 }, { "epoch": 1.37, "grad_norm": 0.48708662390708923, "learning_rate": 0.0003421959155264465, "loss": 1.7339, "step": 41035 }, { "epoch": 1.37, "grad_norm": 0.5025674104690552, "learning_rate": 0.00034218556728938957, "loss": 1.746, "step": 41036 }, { "epoch": 1.37, "grad_norm": 0.5034672617912292, "learning_rate": 0.00034217521900112546, "loss": 1.8413, "step": 41037 }, { "epoch": 1.37, "grad_norm": 0.49527618288993835, "learning_rate": 0.0003421648706616669, "loss": 1.7344, "step": 41038 }, { "epoch": 1.37, "grad_norm": 0.5119589567184448, "learning_rate": 0.00034215452227102645, "loss": 1.809, "step": 41039 }, { "epoch": 1.37, "grad_norm": 0.5083608031272888, "learning_rate": 0.00034214417382921655, "loss": 1.7771, "step": 41040 }, { "epoch": 1.37, "grad_norm": 0.4937729239463806, "learning_rate": 0.00034213382533624997, "loss": 1.7414, "step": 41041 }, { "epoch": 1.37, "grad_norm": 0.4955190122127533, "learning_rate": 0.000342123476792139, "loss": 1.8031, "step": 41042 }, { "epoch": 1.37, "grad_norm": 0.4931182563304901, "learning_rate": 0.0003421131281968963, "loss": 1.7788, "step": 41043 }, { "epoch": 1.37, "grad_norm": 0.5027444958686829, "learning_rate": 0.00034210277955053453, "loss": 1.7775, "step": 41044 }, { "epoch": 1.37, "grad_norm": 0.4867255389690399, "learning_rate": 0.00034209243085306623, "loss": 1.7664, "step": 41045 }, { "epoch": 1.37, "grad_norm": 0.48693010210990906, "learning_rate": 0.00034208208210450387, "loss": 1.7633, "step": 41046 }, { "epoch": 1.37, "grad_norm": 0.49891942739486694, "learning_rate": 0.0003420717333048601, "loss": 1.7593, "step": 41047 }, { "epoch": 1.37, "grad_norm": 0.5141705274581909, "learning_rate": 0.00034206138445414745, "loss": 1.7833, "step": 41048 }, { "epoch": 1.37, "grad_norm": 0.49250638484954834, "learning_rate": 0.0003420510355523784, "loss": 1.769, "step": 41049 }, { "epoch": 1.37, "grad_norm": 0.4985068738460541, "learning_rate": 0.0003420406865995656, "loss": 1.8395, "step": 41050 }, { "epoch": 1.37, "grad_norm": 0.49474871158599854, "learning_rate": 0.00034203033759572167, "loss": 1.6974, "step": 41051 }, { "epoch": 1.37, "grad_norm": 0.473727285861969, "learning_rate": 0.0003420199885408591, "loss": 1.702, "step": 41052 }, { "epoch": 1.37, "grad_norm": 0.4943374693393707, "learning_rate": 0.00034200963943499036, "loss": 1.7776, "step": 41053 }, { "epoch": 1.37, "grad_norm": 0.4755167067050934, "learning_rate": 0.00034199929027812825, "loss": 1.8014, "step": 41054 }, { "epoch": 1.37, "grad_norm": 0.484345406293869, "learning_rate": 0.0003419889410702851, "loss": 1.8279, "step": 41055 }, { "epoch": 1.37, "grad_norm": 0.5007331967353821, "learning_rate": 0.0003419785918114736, "loss": 1.7461, "step": 41056 }, { "epoch": 1.37, "grad_norm": 0.49459388852119446, "learning_rate": 0.00034196824250170627, "loss": 1.7946, "step": 41057 }, { "epoch": 1.37, "grad_norm": 0.5048702955245972, "learning_rate": 0.00034195789314099556, "loss": 1.6648, "step": 41058 }, { "epoch": 1.37, "grad_norm": 0.505220890045166, "learning_rate": 0.00034194754372935434, "loss": 1.7814, "step": 41059 }, { "epoch": 1.37, "grad_norm": 0.48027491569519043, "learning_rate": 0.00034193719426679476, "loss": 1.6981, "step": 41060 }, { "epoch": 1.37, "grad_norm": 0.489885151386261, "learning_rate": 0.00034192684475332983, "loss": 1.6984, "step": 41061 }, { "epoch": 1.37, "grad_norm": 0.504040002822876, "learning_rate": 0.0003419164951889718, "loss": 1.6573, "step": 41062 }, { "epoch": 1.37, "grad_norm": 0.48186585307121277, "learning_rate": 0.00034190614557373325, "loss": 1.8031, "step": 41063 }, { "epoch": 1.37, "grad_norm": 0.5008061528205872, "learning_rate": 0.000341895795907627, "loss": 1.74, "step": 41064 }, { "epoch": 1.37, "grad_norm": 0.48077455163002014, "learning_rate": 0.0003418854461906652, "loss": 1.7694, "step": 41065 }, { "epoch": 1.37, "grad_norm": 0.49410733580589294, "learning_rate": 0.0003418750964228608, "loss": 1.7809, "step": 41066 }, { "epoch": 1.37, "grad_norm": 0.5140205025672913, "learning_rate": 0.00034186474660422613, "loss": 1.8102, "step": 41067 }, { "epoch": 1.37, "grad_norm": 0.500150203704834, "learning_rate": 0.00034185439673477383, "loss": 1.7241, "step": 41068 }, { "epoch": 1.37, "grad_norm": 0.4987025260925293, "learning_rate": 0.00034184404681451646, "loss": 1.7185, "step": 41069 }, { "epoch": 1.37, "grad_norm": 0.4981434941291809, "learning_rate": 0.00034183369684346666, "loss": 1.7595, "step": 41070 }, { "epoch": 1.37, "grad_norm": 0.5112152695655823, "learning_rate": 0.00034182334682163684, "loss": 1.8031, "step": 41071 }, { "epoch": 1.37, "grad_norm": 0.4932231307029724, "learning_rate": 0.0003418129967490397, "loss": 1.7955, "step": 41072 }, { "epoch": 1.37, "grad_norm": 0.48810577392578125, "learning_rate": 0.0003418026466256876, "loss": 1.7998, "step": 41073 }, { "epoch": 1.37, "grad_norm": 0.4795316457748413, "learning_rate": 0.00034179229645159336, "loss": 1.8024, "step": 41074 }, { "epoch": 1.37, "grad_norm": 0.49694278836250305, "learning_rate": 0.00034178194622676945, "loss": 1.7123, "step": 41075 }, { "epoch": 1.37, "grad_norm": 0.5040454864501953, "learning_rate": 0.00034177159595122835, "loss": 1.7887, "step": 41076 }, { "epoch": 1.37, "grad_norm": 0.4968080520629883, "learning_rate": 0.00034176124562498275, "loss": 1.8097, "step": 41077 }, { "epoch": 1.37, "grad_norm": 0.49997425079345703, "learning_rate": 0.0003417508952480451, "loss": 1.8736, "step": 41078 }, { "epoch": 1.37, "grad_norm": 0.5064424872398376, "learning_rate": 0.000341740544820428, "loss": 1.7436, "step": 41079 }, { "epoch": 1.37, "grad_norm": 0.4820408225059509, "learning_rate": 0.0003417301943421441, "loss": 1.797, "step": 41080 }, { "epoch": 1.37, "grad_norm": 0.49156320095062256, "learning_rate": 0.0003417198438132058, "loss": 1.7565, "step": 41081 }, { "epoch": 1.37, "grad_norm": 0.474724143743515, "learning_rate": 0.00034170949323362575, "loss": 1.7188, "step": 41082 }, { "epoch": 1.37, "grad_norm": 0.5001288652420044, "learning_rate": 0.00034169914260341665, "loss": 1.7137, "step": 41083 }, { "epoch": 1.37, "grad_norm": 0.5067371129989624, "learning_rate": 0.00034168879192259086, "loss": 1.729, "step": 41084 }, { "epoch": 1.37, "grad_norm": 0.48886579275131226, "learning_rate": 0.000341678441191161, "loss": 1.8269, "step": 41085 }, { "epoch": 1.37, "grad_norm": 0.5020900964736938, "learning_rate": 0.0003416680904091397, "loss": 1.7601, "step": 41086 }, { "epoch": 1.37, "grad_norm": 0.4942760169506073, "learning_rate": 0.0003416577395765394, "loss": 1.8002, "step": 41087 }, { "epoch": 1.37, "grad_norm": 0.5160210728645325, "learning_rate": 0.00034164738869337283, "loss": 1.8215, "step": 41088 }, { "epoch": 1.37, "grad_norm": 0.5368436574935913, "learning_rate": 0.0003416370377596525, "loss": 1.725, "step": 41089 }, { "epoch": 1.37, "grad_norm": 0.49418431520462036, "learning_rate": 0.0003416266867753908, "loss": 1.7406, "step": 41090 }, { "epoch": 1.37, "grad_norm": 0.4901057183742523, "learning_rate": 0.00034161633574060057, "loss": 1.7163, "step": 41091 }, { "epoch": 1.37, "grad_norm": 0.5171944499015808, "learning_rate": 0.00034160598465529413, "loss": 1.7784, "step": 41092 }, { "epoch": 1.37, "grad_norm": 0.5103976130485535, "learning_rate": 0.0003415956335194842, "loss": 1.7106, "step": 41093 }, { "epoch": 1.37, "grad_norm": 0.5055894255638123, "learning_rate": 0.0003415852823331833, "loss": 1.7656, "step": 41094 }, { "epoch": 1.37, "grad_norm": 0.5136032700538635, "learning_rate": 0.000341574931096404, "loss": 1.7516, "step": 41095 }, { "epoch": 1.37, "grad_norm": 0.49341389536857605, "learning_rate": 0.0003415645798091589, "loss": 1.7444, "step": 41096 }, { "epoch": 1.37, "grad_norm": 0.49360954761505127, "learning_rate": 0.0003415542284714604, "loss": 1.7522, "step": 41097 }, { "epoch": 1.37, "grad_norm": 0.4873102605342865, "learning_rate": 0.0003415438770833213, "loss": 1.733, "step": 41098 }, { "epoch": 1.37, "grad_norm": 0.49638450145721436, "learning_rate": 0.000341533525644754, "loss": 1.8514, "step": 41099 }, { "epoch": 1.37, "grad_norm": 0.4827151596546173, "learning_rate": 0.0003415231741557712, "loss": 1.7461, "step": 41100 }, { "epoch": 1.37, "grad_norm": 0.5124308466911316, "learning_rate": 0.0003415128226163853, "loss": 1.7277, "step": 41101 }, { "epoch": 1.37, "grad_norm": 0.49023592472076416, "learning_rate": 0.000341502471026609, "loss": 1.7364, "step": 41102 }, { "epoch": 1.37, "grad_norm": 0.49919888377189636, "learning_rate": 0.0003414921193864548, "loss": 1.7879, "step": 41103 }, { "epoch": 1.37, "grad_norm": 0.49601343274116516, "learning_rate": 0.0003414817676959353, "loss": 1.8055, "step": 41104 }, { "epoch": 1.37, "grad_norm": 0.5344549417495728, "learning_rate": 0.000341471415955063, "loss": 1.7979, "step": 41105 }, { "epoch": 1.37, "grad_norm": 0.5060636401176453, "learning_rate": 0.0003414610641638505, "loss": 1.7867, "step": 41106 }, { "epoch": 1.37, "grad_norm": 0.4824427664279938, "learning_rate": 0.0003414507123223104, "loss": 1.8337, "step": 41107 }, { "epoch": 1.37, "grad_norm": 0.4824361801147461, "learning_rate": 0.0003414403604304552, "loss": 1.7417, "step": 41108 }, { "epoch": 1.37, "grad_norm": 0.49832215905189514, "learning_rate": 0.00034143000848829767, "loss": 1.7864, "step": 41109 }, { "epoch": 1.37, "grad_norm": 0.4961254596710205, "learning_rate": 0.00034141965649585006, "loss": 1.7569, "step": 41110 }, { "epoch": 1.37, "grad_norm": 0.5110372304916382, "learning_rate": 0.0003414093044531252, "loss": 1.7949, "step": 41111 }, { "epoch": 1.37, "grad_norm": 0.4954600930213928, "learning_rate": 0.0003413989523601354, "loss": 1.7888, "step": 41112 }, { "epoch": 1.37, "grad_norm": 0.5148321390151978, "learning_rate": 0.00034138860021689347, "loss": 1.7587, "step": 41113 }, { "epoch": 1.37, "grad_norm": 0.5015196204185486, "learning_rate": 0.00034137824802341196, "loss": 1.7445, "step": 41114 }, { "epoch": 1.37, "grad_norm": 0.5411051511764526, "learning_rate": 0.00034136789577970315, "loss": 1.8476, "step": 41115 }, { "epoch": 1.37, "grad_norm": 0.5064601302146912, "learning_rate": 0.00034135754348577996, "loss": 1.8615, "step": 41116 }, { "epoch": 1.37, "grad_norm": 0.48111823201179504, "learning_rate": 0.0003413471911416548, "loss": 1.7345, "step": 41117 }, { "epoch": 1.37, "grad_norm": 0.4967654347419739, "learning_rate": 0.0003413368387473401, "loss": 1.6915, "step": 41118 }, { "epoch": 1.37, "grad_norm": 0.48719361424446106, "learning_rate": 0.00034132648630284877, "loss": 1.8134, "step": 41119 }, { "epoch": 1.37, "grad_norm": 0.4940492510795593, "learning_rate": 0.00034131613380819297, "loss": 1.7752, "step": 41120 }, { "epoch": 1.37, "grad_norm": 0.4979207515716553, "learning_rate": 0.0003413057812633856, "loss": 1.7678, "step": 41121 }, { "epoch": 1.37, "grad_norm": 0.4850955605506897, "learning_rate": 0.00034129542866843905, "loss": 1.7002, "step": 41122 }, { "epoch": 1.37, "grad_norm": 0.4881242513656616, "learning_rate": 0.00034128507602336597, "loss": 1.7508, "step": 41123 }, { "epoch": 1.37, "grad_norm": 0.5005556344985962, "learning_rate": 0.00034127472332817885, "loss": 1.8569, "step": 41124 }, { "epoch": 1.37, "grad_norm": 0.4875953495502472, "learning_rate": 0.00034126437058289036, "loss": 1.7372, "step": 41125 }, { "epoch": 1.37, "grad_norm": 0.4958970844745636, "learning_rate": 0.00034125401778751304, "loss": 1.8071, "step": 41126 }, { "epoch": 1.37, "grad_norm": 0.48876267671585083, "learning_rate": 0.0003412436649420593, "loss": 1.7546, "step": 41127 }, { "epoch": 1.37, "grad_norm": 0.4996437132358551, "learning_rate": 0.0003412333120465419, "loss": 1.7935, "step": 41128 }, { "epoch": 1.37, "grad_norm": 0.488902747631073, "learning_rate": 0.0003412229591009733, "loss": 1.751, "step": 41129 }, { "epoch": 1.37, "grad_norm": 0.49566948413848877, "learning_rate": 0.0003412126061053661, "loss": 1.8296, "step": 41130 }, { "epoch": 1.37, "grad_norm": 0.48290303349494934, "learning_rate": 0.00034120225305973287, "loss": 1.7195, "step": 41131 }, { "epoch": 1.37, "grad_norm": 0.48388445377349854, "learning_rate": 0.00034119189996408624, "loss": 1.746, "step": 41132 }, { "epoch": 1.37, "grad_norm": 0.4972281754016876, "learning_rate": 0.00034118154681843865, "loss": 1.7853, "step": 41133 }, { "epoch": 1.37, "grad_norm": 0.4733234941959381, "learning_rate": 0.00034117119362280277, "loss": 1.7908, "step": 41134 }, { "epoch": 1.37, "grad_norm": 0.49741724133491516, "learning_rate": 0.0003411608403771911, "loss": 1.751, "step": 41135 }, { "epoch": 1.37, "grad_norm": 0.49721336364746094, "learning_rate": 0.0003411504870816162, "loss": 1.6911, "step": 41136 }, { "epoch": 1.37, "grad_norm": 0.485399454832077, "learning_rate": 0.0003411401337360907, "loss": 1.7396, "step": 41137 }, { "epoch": 1.37, "grad_norm": 0.5023540258407593, "learning_rate": 0.00034112978034062715, "loss": 1.6889, "step": 41138 }, { "epoch": 1.37, "grad_norm": 0.48673015832901, "learning_rate": 0.0003411194268952381, "loss": 1.7823, "step": 41139 }, { "epoch": 1.37, "grad_norm": 0.4887036383152008, "learning_rate": 0.00034110907339993617, "loss": 1.7619, "step": 41140 }, { "epoch": 1.37, "grad_norm": 0.5281636714935303, "learning_rate": 0.0003410987198547338, "loss": 1.8112, "step": 41141 }, { "epoch": 1.37, "grad_norm": 0.5103235840797424, "learning_rate": 0.00034108836625964373, "loss": 1.7777, "step": 41142 }, { "epoch": 1.37, "grad_norm": 0.49550947546958923, "learning_rate": 0.0003410780126146783, "loss": 1.859, "step": 41143 }, { "epoch": 1.37, "grad_norm": 0.4951266050338745, "learning_rate": 0.0003410676589198504, "loss": 1.7693, "step": 41144 }, { "epoch": 1.37, "grad_norm": 0.4993506073951721, "learning_rate": 0.0003410573051751722, "loss": 1.7176, "step": 41145 }, { "epoch": 1.37, "grad_norm": 0.5234206318855286, "learning_rate": 0.00034104695138065667, "loss": 1.7332, "step": 41146 }, { "epoch": 1.37, "grad_norm": 0.4983959496021271, "learning_rate": 0.00034103659753631613, "loss": 1.7269, "step": 41147 }, { "epoch": 1.37, "grad_norm": 0.48560580611228943, "learning_rate": 0.0003410262436421631, "loss": 1.7934, "step": 41148 }, { "epoch": 1.37, "grad_norm": 0.5150681734085083, "learning_rate": 0.0003410158896982104, "loss": 1.7806, "step": 41149 }, { "epoch": 1.37, "grad_norm": 0.5076544880867004, "learning_rate": 0.0003410055357044704, "loss": 1.804, "step": 41150 }, { "epoch": 1.37, "grad_norm": 0.4882473647594452, "learning_rate": 0.0003409951816609558, "loss": 1.7488, "step": 41151 }, { "epoch": 1.37, "grad_norm": 0.5064367651939392, "learning_rate": 0.00034098482756767884, "loss": 1.8461, "step": 41152 }, { "epoch": 1.37, "grad_norm": 0.4849078357219696, "learning_rate": 0.00034097447342465264, "loss": 1.776, "step": 41153 }, { "epoch": 1.37, "grad_norm": 0.5142514109611511, "learning_rate": 0.0003409641192318893, "loss": 1.7611, "step": 41154 }, { "epoch": 1.37, "grad_norm": 0.4955204129219055, "learning_rate": 0.0003409537649894016, "loss": 1.7911, "step": 41155 }, { "epoch": 1.37, "grad_norm": 0.5156233310699463, "learning_rate": 0.000340943410697202, "loss": 1.8196, "step": 41156 }, { "epoch": 1.37, "grad_norm": 0.48144015669822693, "learning_rate": 0.00034093305635530315, "loss": 1.7522, "step": 41157 }, { "epoch": 1.37, "grad_norm": 0.5058597922325134, "learning_rate": 0.00034092270196371774, "loss": 1.7597, "step": 41158 }, { "epoch": 1.37, "grad_norm": 0.47661009430885315, "learning_rate": 0.0003409123475224581, "loss": 1.7774, "step": 41159 }, { "epoch": 1.37, "grad_norm": 0.4974631369113922, "learning_rate": 0.0003409019930315369, "loss": 1.6719, "step": 41160 }, { "epoch": 1.37, "grad_norm": 0.5011874437332153, "learning_rate": 0.00034089163849096665, "loss": 1.7654, "step": 41161 }, { "epoch": 1.37, "grad_norm": 0.5057618021965027, "learning_rate": 0.00034088128390076, "loss": 1.7806, "step": 41162 }, { "epoch": 1.37, "grad_norm": 0.5452739596366882, "learning_rate": 0.00034087092926092955, "loss": 1.8145, "step": 41163 }, { "epoch": 1.37, "grad_norm": 0.48573753237724304, "learning_rate": 0.00034086057457148784, "loss": 1.7245, "step": 41164 }, { "epoch": 1.37, "grad_norm": 0.5037106871604919, "learning_rate": 0.0003408502198324473, "loss": 1.7237, "step": 41165 }, { "epoch": 1.37, "grad_norm": 0.4968020021915436, "learning_rate": 0.0003408398650438207, "loss": 1.7195, "step": 41166 }, { "epoch": 1.37, "grad_norm": 0.5013612508773804, "learning_rate": 0.0003408295102056205, "loss": 1.7808, "step": 41167 }, { "epoch": 1.37, "grad_norm": 0.4936841130256653, "learning_rate": 0.00034081915531785923, "loss": 1.8266, "step": 41168 }, { "epoch": 1.37, "grad_norm": 0.4993151128292084, "learning_rate": 0.0003408088003805496, "loss": 1.8851, "step": 41169 }, { "epoch": 1.37, "grad_norm": 0.4955839514732361, "learning_rate": 0.000340798445393704, "loss": 1.7924, "step": 41170 }, { "epoch": 1.37, "grad_norm": 1.0300663709640503, "learning_rate": 0.0003407880903573353, "loss": 1.8359, "step": 41171 }, { "epoch": 1.37, "grad_norm": 0.49795296788215637, "learning_rate": 0.0003407777352714557, "loss": 1.7847, "step": 41172 }, { "epoch": 1.37, "grad_norm": 0.4862419068813324, "learning_rate": 0.000340767380136078, "loss": 1.7697, "step": 41173 }, { "epoch": 1.37, "grad_norm": 0.5054401159286499, "learning_rate": 0.00034075702495121467, "loss": 1.7353, "step": 41174 }, { "epoch": 1.37, "grad_norm": 0.5067096948623657, "learning_rate": 0.00034074666971687826, "loss": 1.7358, "step": 41175 }, { "epoch": 1.37, "grad_norm": 0.4974367618560791, "learning_rate": 0.0003407363144330816, "loss": 1.742, "step": 41176 }, { "epoch": 1.37, "grad_norm": 0.48804599046707153, "learning_rate": 0.0003407259590998368, "loss": 1.7767, "step": 41177 }, { "epoch": 1.37, "grad_norm": 0.48635175824165344, "learning_rate": 0.0003407156037171569, "loss": 1.7101, "step": 41178 }, { "epoch": 1.37, "grad_norm": 0.5153462290763855, "learning_rate": 0.00034070524828505415, "loss": 1.7853, "step": 41179 }, { "epoch": 1.37, "grad_norm": 0.4848994314670563, "learning_rate": 0.0003406948928035412, "loss": 1.7342, "step": 41180 }, { "epoch": 1.37, "grad_norm": 0.5316043496131897, "learning_rate": 0.00034068453727263074, "loss": 1.8144, "step": 41181 }, { "epoch": 1.37, "grad_norm": 0.49767735600471497, "learning_rate": 0.0003406741816923352, "loss": 1.8, "step": 41182 }, { "epoch": 1.37, "grad_norm": 0.4888578951358795, "learning_rate": 0.00034066382606266716, "loss": 1.7621, "step": 41183 }, { "epoch": 1.37, "grad_norm": 0.48283472657203674, "learning_rate": 0.00034065347038363927, "loss": 1.7928, "step": 41184 }, { "epoch": 1.37, "grad_norm": 0.49325114488601685, "learning_rate": 0.0003406431146552641, "loss": 1.7694, "step": 41185 }, { "epoch": 1.37, "grad_norm": 0.5069466829299927, "learning_rate": 0.0003406327588775541, "loss": 1.768, "step": 41186 }, { "epoch": 1.37, "grad_norm": 0.6638574004173279, "learning_rate": 0.00034062240305052195, "loss": 1.8056, "step": 41187 }, { "epoch": 1.37, "grad_norm": 0.4992017447948456, "learning_rate": 0.0003406120471741802, "loss": 1.7231, "step": 41188 }, { "epoch": 1.37, "grad_norm": 0.4924876093864441, "learning_rate": 0.00034060169124854137, "loss": 1.8158, "step": 41189 }, { "epoch": 1.37, "grad_norm": 0.525871217250824, "learning_rate": 0.00034059133527361806, "loss": 1.8351, "step": 41190 }, { "epoch": 1.37, "grad_norm": 0.5061896443367004, "learning_rate": 0.00034058097924942286, "loss": 1.8119, "step": 41191 }, { "epoch": 1.37, "grad_norm": 0.49718132615089417, "learning_rate": 0.00034057062317596835, "loss": 1.8244, "step": 41192 }, { "epoch": 1.37, "grad_norm": 0.4855242073535919, "learning_rate": 0.0003405602670532671, "loss": 1.7757, "step": 41193 }, { "epoch": 1.37, "grad_norm": 0.5097225308418274, "learning_rate": 0.0003405499108813317, "loss": 1.7842, "step": 41194 }, { "epoch": 1.37, "grad_norm": 0.5010858774185181, "learning_rate": 0.00034053955466017456, "loss": 1.8571, "step": 41195 }, { "epoch": 1.37, "grad_norm": 0.5267612934112549, "learning_rate": 0.0003405291983898084, "loss": 1.8146, "step": 41196 }, { "epoch": 1.37, "grad_norm": 0.4916667938232422, "learning_rate": 0.00034051884207024583, "loss": 1.8343, "step": 41197 }, { "epoch": 1.37, "grad_norm": 0.4989713728427887, "learning_rate": 0.0003405084857014993, "loss": 1.8092, "step": 41198 }, { "epoch": 1.37, "grad_norm": 0.48319441080093384, "learning_rate": 0.0003404981292835815, "loss": 1.8131, "step": 41199 }, { "epoch": 1.37, "grad_norm": 0.48220187425613403, "learning_rate": 0.0003404877728165049, "loss": 1.7126, "step": 41200 }, { "epoch": 1.37, "grad_norm": 0.4980831742286682, "learning_rate": 0.0003404774163002821, "loss": 1.7368, "step": 41201 }, { "epoch": 1.37, "grad_norm": 0.49755871295928955, "learning_rate": 0.0003404670597349258, "loss": 1.7323, "step": 41202 }, { "epoch": 1.37, "grad_norm": 0.50034499168396, "learning_rate": 0.0003404567031204483, "loss": 1.8007, "step": 41203 }, { "epoch": 1.37, "grad_norm": 0.4818236827850342, "learning_rate": 0.00034044634645686233, "loss": 1.7223, "step": 41204 }, { "epoch": 1.37, "grad_norm": 0.4944298267364502, "learning_rate": 0.0003404359897441805, "loss": 1.7436, "step": 41205 }, { "epoch": 1.37, "grad_norm": 0.49205201864242554, "learning_rate": 0.00034042563298241534, "loss": 1.8302, "step": 41206 }, { "epoch": 1.37, "grad_norm": 0.49284684658050537, "learning_rate": 0.00034041527617157937, "loss": 1.8267, "step": 41207 }, { "epoch": 1.37, "grad_norm": 0.4957387447357178, "learning_rate": 0.00034040491931168525, "loss": 1.8091, "step": 41208 }, { "epoch": 1.37, "grad_norm": 0.49868810176849365, "learning_rate": 0.0003403945624027455, "loss": 1.7921, "step": 41209 }, { "epoch": 1.37, "grad_norm": 0.504389226436615, "learning_rate": 0.00034038420544477274, "loss": 1.7752, "step": 41210 }, { "epoch": 1.37, "grad_norm": 0.49113383889198303, "learning_rate": 0.00034037384843777944, "loss": 1.7237, "step": 41211 }, { "epoch": 1.37, "grad_norm": 0.48930442333221436, "learning_rate": 0.00034036349138177825, "loss": 1.7933, "step": 41212 }, { "epoch": 1.37, "grad_norm": 0.4986392557621002, "learning_rate": 0.00034035313427678186, "loss": 1.8488, "step": 41213 }, { "epoch": 1.37, "grad_norm": 0.49603742361068726, "learning_rate": 0.0003403427771228025, "loss": 1.7947, "step": 41214 }, { "epoch": 1.37, "grad_norm": 0.4927966296672821, "learning_rate": 0.00034033241991985317, "loss": 1.8201, "step": 41215 }, { "epoch": 1.37, "grad_norm": 0.5212690830230713, "learning_rate": 0.000340322062667946, "loss": 1.7421, "step": 41216 }, { "epoch": 1.37, "grad_norm": 0.508366048336029, "learning_rate": 0.00034031170536709393, "loss": 1.7058, "step": 41217 }, { "epoch": 1.37, "grad_norm": 0.49849697947502136, "learning_rate": 0.00034030134801730935, "loss": 1.7757, "step": 41218 }, { "epoch": 1.37, "grad_norm": 0.49084752798080444, "learning_rate": 0.00034029099061860487, "loss": 1.7846, "step": 41219 }, { "epoch": 1.37, "grad_norm": 0.5040334463119507, "learning_rate": 0.00034028063317099316, "loss": 1.792, "step": 41220 }, { "epoch": 1.37, "grad_norm": 0.4918065369129181, "learning_rate": 0.0003402702756744866, "loss": 1.6865, "step": 41221 }, { "epoch": 1.37, "grad_norm": 0.5417416095733643, "learning_rate": 0.00034025991812909785, "loss": 1.8275, "step": 41222 }, { "epoch": 1.37, "grad_norm": 0.5249507427215576, "learning_rate": 0.00034024956053483945, "loss": 1.7636, "step": 41223 }, { "epoch": 1.37, "grad_norm": 0.6444824934005737, "learning_rate": 0.0003402392028917241, "loss": 1.7558, "step": 41224 }, { "epoch": 1.37, "grad_norm": 0.484070748090744, "learning_rate": 0.0003402288451997642, "loss": 1.7767, "step": 41225 }, { "epoch": 1.37, "grad_norm": 0.5120139718055725, "learning_rate": 0.0003402184874589726, "loss": 1.7127, "step": 41226 }, { "epoch": 1.37, "grad_norm": 0.5018860697746277, "learning_rate": 0.0003402081296693615, "loss": 1.7343, "step": 41227 }, { "epoch": 1.37, "grad_norm": 0.5157604813575745, "learning_rate": 0.00034019777183094376, "loss": 1.7551, "step": 41228 }, { "epoch": 1.37, "grad_norm": 0.49642089009284973, "learning_rate": 0.00034018741394373176, "loss": 1.8243, "step": 41229 }, { "epoch": 1.37, "grad_norm": 0.49167928099632263, "learning_rate": 0.00034017705600773815, "loss": 1.8354, "step": 41230 }, { "epoch": 1.37, "grad_norm": 0.49252310395240784, "learning_rate": 0.00034016669802297567, "loss": 1.8146, "step": 41231 }, { "epoch": 1.37, "grad_norm": 0.4897100031375885, "learning_rate": 0.0003401563399894565, "loss": 1.7968, "step": 41232 }, { "epoch": 1.37, "grad_norm": 0.495504766702652, "learning_rate": 0.00034014598190719366, "loss": 1.7704, "step": 41233 }, { "epoch": 1.37, "grad_norm": 0.4922372102737427, "learning_rate": 0.0003401356237761994, "loss": 1.7622, "step": 41234 }, { "epoch": 1.37, "grad_norm": 0.47815945744514465, "learning_rate": 0.00034012526559648647, "loss": 1.6485, "step": 41235 }, { "epoch": 1.37, "grad_norm": 0.5236271619796753, "learning_rate": 0.0003401149073680674, "loss": 1.7843, "step": 41236 }, { "epoch": 1.37, "grad_norm": 0.5492942333221436, "learning_rate": 0.00034010454909095454, "loss": 1.7939, "step": 41237 }, { "epoch": 1.37, "grad_norm": 0.49195078015327454, "learning_rate": 0.0003400941907651609, "loss": 1.7278, "step": 41238 }, { "epoch": 1.37, "grad_norm": 0.5164366364479065, "learning_rate": 0.00034008383239069866, "loss": 1.8127, "step": 41239 }, { "epoch": 1.37, "grad_norm": 0.4952894449234009, "learning_rate": 0.0003400734739675807, "loss": 1.8284, "step": 41240 }, { "epoch": 1.37, "grad_norm": 0.6069939732551575, "learning_rate": 0.00034006311549581933, "loss": 1.8227, "step": 41241 }, { "epoch": 1.37, "grad_norm": 0.49436846375465393, "learning_rate": 0.0003400527569754273, "loss": 1.6638, "step": 41242 }, { "epoch": 1.37, "grad_norm": 0.5119291543960571, "learning_rate": 0.00034004239840641716, "loss": 1.8081, "step": 41243 }, { "epoch": 1.37, "grad_norm": 0.5190959572792053, "learning_rate": 0.00034003203978880136, "loss": 1.7918, "step": 41244 }, { "epoch": 1.37, "grad_norm": 0.5042169690132141, "learning_rate": 0.0003400216811225926, "loss": 1.7839, "step": 41245 }, { "epoch": 1.37, "grad_norm": 0.5020625591278076, "learning_rate": 0.0003400113224078034, "loss": 1.7673, "step": 41246 }, { "epoch": 1.37, "grad_norm": 0.49308091402053833, "learning_rate": 0.0003400009636444464, "loss": 1.8104, "step": 41247 }, { "epoch": 1.37, "grad_norm": 0.5119381546974182, "learning_rate": 0.0003399906048325341, "loss": 1.7879, "step": 41248 }, { "epoch": 1.37, "grad_norm": 0.4906894564628601, "learning_rate": 0.00033998024597207906, "loss": 1.7532, "step": 41249 }, { "epoch": 1.37, "grad_norm": 0.4945542514324188, "learning_rate": 0.000339969887063094, "loss": 1.7811, "step": 41250 }, { "epoch": 1.37, "grad_norm": 0.49002790451049805, "learning_rate": 0.0003399595281055913, "loss": 1.7492, "step": 41251 }, { "epoch": 1.37, "grad_norm": 0.4857916533946991, "learning_rate": 0.00033994916909958365, "loss": 1.6663, "step": 41252 }, { "epoch": 1.37, "grad_norm": 0.5141018033027649, "learning_rate": 0.0003399388100450835, "loss": 1.8248, "step": 41253 }, { "epoch": 1.37, "grad_norm": 0.5030725598335266, "learning_rate": 0.0003399284509421036, "loss": 1.696, "step": 41254 }, { "epoch": 1.37, "grad_norm": 0.49377527832984924, "learning_rate": 0.00033991809179065644, "loss": 1.7863, "step": 41255 }, { "epoch": 1.37, "grad_norm": 0.5016955733299255, "learning_rate": 0.00033990773259075463, "loss": 1.7503, "step": 41256 }, { "epoch": 1.37, "grad_norm": 0.5037838220596313, "learning_rate": 0.00033989737334241067, "loss": 1.829, "step": 41257 }, { "epoch": 1.37, "grad_norm": 0.49631139636039734, "learning_rate": 0.0003398870140456372, "loss": 1.7354, "step": 41258 }, { "epoch": 1.37, "grad_norm": 0.7416307330131531, "learning_rate": 0.0003398766547004467, "loss": 1.7615, "step": 41259 }, { "epoch": 1.37, "grad_norm": 0.4784039855003357, "learning_rate": 0.0003398662953068519, "loss": 1.8457, "step": 41260 }, { "epoch": 1.37, "grad_norm": 0.4871075749397278, "learning_rate": 0.00033985593586486533, "loss": 1.7537, "step": 41261 }, { "epoch": 1.37, "grad_norm": 0.5227895379066467, "learning_rate": 0.00033984557637449937, "loss": 1.8704, "step": 41262 }, { "epoch": 1.37, "grad_norm": 0.5076528191566467, "learning_rate": 0.00033983521683576687, "loss": 1.8343, "step": 41263 }, { "epoch": 1.37, "grad_norm": 0.5086938738822937, "learning_rate": 0.0003398248572486803, "loss": 1.7384, "step": 41264 }, { "epoch": 1.37, "grad_norm": 0.48792561888694763, "learning_rate": 0.00033981449761325217, "loss": 1.7516, "step": 41265 }, { "epoch": 1.37, "grad_norm": 0.5182173848152161, "learning_rate": 0.0003398041379294951, "loss": 1.7543, "step": 41266 }, { "epoch": 1.37, "grad_norm": 0.5235286951065063, "learning_rate": 0.0003397937781974216, "loss": 1.7463, "step": 41267 }, { "epoch": 1.37, "grad_norm": 0.5055959820747375, "learning_rate": 0.0003397834184170445, "loss": 1.7261, "step": 41268 }, { "epoch": 1.37, "grad_norm": 0.5053684711456299, "learning_rate": 0.0003397730585883761, "loss": 1.7487, "step": 41269 }, { "epoch": 1.37, "grad_norm": 0.5228613018989563, "learning_rate": 0.00033976269871142906, "loss": 1.8317, "step": 41270 }, { "epoch": 1.37, "grad_norm": 0.5501189827919006, "learning_rate": 0.00033975233878621595, "loss": 1.7401, "step": 41271 }, { "epoch": 1.37, "grad_norm": 0.5073436498641968, "learning_rate": 0.0003397419788127494, "loss": 1.7387, "step": 41272 }, { "epoch": 1.37, "grad_norm": 0.5228462219238281, "learning_rate": 0.00033973161879104186, "loss": 1.7474, "step": 41273 }, { "epoch": 1.37, "grad_norm": 0.49832382798194885, "learning_rate": 0.0003397212587211061, "loss": 1.845, "step": 41274 }, { "epoch": 1.37, "grad_norm": 0.4925045073032379, "learning_rate": 0.0003397108986029546, "loss": 1.8647, "step": 41275 }, { "epoch": 1.37, "grad_norm": 0.48803284764289856, "learning_rate": 0.00033970053843659985, "loss": 1.7406, "step": 41276 }, { "epoch": 1.37, "grad_norm": 0.4951680302619934, "learning_rate": 0.00033969017822205443, "loss": 1.8116, "step": 41277 }, { "epoch": 1.37, "grad_norm": 0.4974816143512726, "learning_rate": 0.0003396798179593311, "loss": 1.6748, "step": 41278 }, { "epoch": 1.37, "grad_norm": 0.4988487958908081, "learning_rate": 0.0003396694576484422, "loss": 1.8068, "step": 41279 }, { "epoch": 1.37, "grad_norm": 0.4805879294872284, "learning_rate": 0.00033965909728940046, "loss": 1.6526, "step": 41280 }, { "epoch": 1.37, "grad_norm": 0.5021935701370239, "learning_rate": 0.0003396487368822185, "loss": 1.7371, "step": 41281 }, { "epoch": 1.37, "grad_norm": 0.48755544424057007, "learning_rate": 0.00033963837642690876, "loss": 1.6897, "step": 41282 }, { "epoch": 1.37, "grad_norm": 0.5053955912590027, "learning_rate": 0.0003396280159234839, "loss": 1.8192, "step": 41283 }, { "epoch": 1.37, "grad_norm": 0.5007714629173279, "learning_rate": 0.0003396176553719564, "loss": 1.6827, "step": 41284 }, { "epoch": 1.37, "grad_norm": 0.49022310972213745, "learning_rate": 0.000339607294772339, "loss": 1.6924, "step": 41285 }, { "epoch": 1.37, "grad_norm": 0.5170118808746338, "learning_rate": 0.0003395969341246441, "loss": 1.7527, "step": 41286 }, { "epoch": 1.37, "grad_norm": 0.5051715970039368, "learning_rate": 0.00033958657342888436, "loss": 1.8191, "step": 41287 }, { "epoch": 1.37, "grad_norm": 0.5553011298179626, "learning_rate": 0.0003395762126850725, "loss": 1.7718, "step": 41288 }, { "epoch": 1.37, "grad_norm": 0.49648818373680115, "learning_rate": 0.00033956585189322074, "loss": 1.7492, "step": 41289 }, { "epoch": 1.37, "grad_norm": 0.4955971837043762, "learning_rate": 0.000339555491053342, "loss": 1.7452, "step": 41290 }, { "epoch": 1.37, "grad_norm": 0.49405530095100403, "learning_rate": 0.00033954513016544864, "loss": 1.7481, "step": 41291 }, { "epoch": 1.37, "grad_norm": 0.5043061375617981, "learning_rate": 0.00033953476922955327, "loss": 1.8327, "step": 41292 }, { "epoch": 1.37, "grad_norm": 0.49025604128837585, "learning_rate": 0.0003395244082456687, "loss": 1.8038, "step": 41293 }, { "epoch": 1.37, "grad_norm": 0.500663697719574, "learning_rate": 0.00033951404721380715, "loss": 1.7589, "step": 41294 }, { "epoch": 1.37, "grad_norm": 0.5117522478103638, "learning_rate": 0.00033950368613398145, "loss": 1.7329, "step": 41295 }, { "epoch": 1.37, "grad_norm": 0.5115629434585571, "learning_rate": 0.00033949332500620414, "loss": 1.7994, "step": 41296 }, { "epoch": 1.37, "grad_norm": 0.4915980398654938, "learning_rate": 0.00033948296383048764, "loss": 1.7173, "step": 41297 }, { "epoch": 1.37, "grad_norm": 0.5104314088821411, "learning_rate": 0.0003394726026068448, "loss": 1.7088, "step": 41298 }, { "epoch": 1.37, "grad_norm": 0.5177534818649292, "learning_rate": 0.00033946224133528786, "loss": 1.7683, "step": 41299 }, { "epoch": 1.37, "grad_norm": 0.48057880997657776, "learning_rate": 0.00033945188001582963, "loss": 1.7648, "step": 41300 }, { "epoch": 1.37, "grad_norm": 0.5045441389083862, "learning_rate": 0.0003394415186484827, "loss": 1.7598, "step": 41301 }, { "epoch": 1.37, "grad_norm": 0.5045329928398132, "learning_rate": 0.0003394311572332595, "loss": 1.8646, "step": 41302 }, { "epoch": 1.37, "grad_norm": 0.5187183022499084, "learning_rate": 0.00033942079577017274, "loss": 1.7213, "step": 41303 }, { "epoch": 1.37, "grad_norm": 0.516103982925415, "learning_rate": 0.00033941043425923486, "loss": 1.7287, "step": 41304 }, { "epoch": 1.37, "grad_norm": 0.544360339641571, "learning_rate": 0.0003394000727004586, "loss": 1.7769, "step": 41305 }, { "epoch": 1.37, "grad_norm": 0.4942779839038849, "learning_rate": 0.00033938971109385643, "loss": 1.7467, "step": 41306 }, { "epoch": 1.37, "grad_norm": 0.5140721201896667, "learning_rate": 0.00033937934943944093, "loss": 1.7892, "step": 41307 }, { "epoch": 1.37, "grad_norm": 0.49148961901664734, "learning_rate": 0.00033936898773722474, "loss": 1.8035, "step": 41308 }, { "epoch": 1.37, "grad_norm": 0.5285029411315918, "learning_rate": 0.00033935862598722034, "loss": 1.7931, "step": 41309 }, { "epoch": 1.37, "grad_norm": 0.5049331784248352, "learning_rate": 0.0003393482641894404, "loss": 1.8332, "step": 41310 }, { "epoch": 1.37, "grad_norm": 0.4966532588005066, "learning_rate": 0.00033933790234389757, "loss": 1.8389, "step": 41311 }, { "epoch": 1.37, "grad_norm": 0.5332990884780884, "learning_rate": 0.00033932754045060424, "loss": 1.767, "step": 41312 }, { "epoch": 1.37, "grad_norm": 0.532008171081543, "learning_rate": 0.000339317178509573, "loss": 1.7516, "step": 41313 }, { "epoch": 1.37, "grad_norm": 0.5137722492218018, "learning_rate": 0.00033930681652081656, "loss": 1.7697, "step": 41314 }, { "epoch": 1.37, "grad_norm": 0.5074654817581177, "learning_rate": 0.00033929645448434745, "loss": 1.7659, "step": 41315 }, { "epoch": 1.37, "grad_norm": 0.4999518096446991, "learning_rate": 0.0003392860924001783, "loss": 1.7153, "step": 41316 }, { "epoch": 1.37, "grad_norm": 0.5094208121299744, "learning_rate": 0.00033927573026832145, "loss": 1.8051, "step": 41317 }, { "epoch": 1.37, "grad_norm": 0.5104348063468933, "learning_rate": 0.0003392653680887898, "loss": 1.7921, "step": 41318 }, { "epoch": 1.37, "grad_norm": 0.5173141956329346, "learning_rate": 0.00033925500586159566, "loss": 1.7241, "step": 41319 }, { "epoch": 1.37, "grad_norm": 0.5218132138252258, "learning_rate": 0.0003392446435867518, "loss": 1.7652, "step": 41320 }, { "epoch": 1.37, "grad_norm": 0.4916657507419586, "learning_rate": 0.00033923428126427066, "loss": 1.7304, "step": 41321 }, { "epoch": 1.37, "grad_norm": 0.5172955393791199, "learning_rate": 0.00033922391889416496, "loss": 1.7583, "step": 41322 }, { "epoch": 1.37, "grad_norm": 0.4945403039455414, "learning_rate": 0.00033921355647644727, "loss": 1.7657, "step": 41323 }, { "epoch": 1.37, "grad_norm": 0.4945761263370514, "learning_rate": 0.0003392031940111299, "loss": 1.7532, "step": 41324 }, { "epoch": 1.37, "grad_norm": 0.7133883237838745, "learning_rate": 0.0003391928314982258, "loss": 1.7773, "step": 41325 }, { "epoch": 1.37, "grad_norm": 0.49282822012901306, "learning_rate": 0.0003391824689377473, "loss": 1.7106, "step": 41326 }, { "epoch": 1.37, "grad_norm": 0.510815441608429, "learning_rate": 0.0003391721063297071, "loss": 1.8185, "step": 41327 }, { "epoch": 1.37, "grad_norm": 0.5057834982872009, "learning_rate": 0.00033916174367411766, "loss": 1.6469, "step": 41328 }, { "epoch": 1.38, "grad_norm": 0.49038758873939514, "learning_rate": 0.00033915138097099167, "loss": 1.8216, "step": 41329 }, { "epoch": 1.38, "grad_norm": 0.5177432298660278, "learning_rate": 0.0003391410182203418, "loss": 1.6909, "step": 41330 }, { "epoch": 1.38, "grad_norm": 0.5299459099769592, "learning_rate": 0.0003391306554221803, "loss": 1.7655, "step": 41331 }, { "epoch": 1.38, "grad_norm": 0.4968189299106598, "learning_rate": 0.0003391202925765201, "loss": 1.8094, "step": 41332 }, { "epoch": 1.38, "grad_norm": 0.5220440626144409, "learning_rate": 0.00033910992968337345, "loss": 1.7295, "step": 41333 }, { "epoch": 1.38, "grad_norm": 0.5001429319381714, "learning_rate": 0.00033909956674275325, "loss": 1.691, "step": 41334 }, { "epoch": 1.38, "grad_norm": 0.5243076086044312, "learning_rate": 0.0003390892037546719, "loss": 1.828, "step": 41335 }, { "epoch": 1.38, "grad_norm": 0.49793535470962524, "learning_rate": 0.000339078840719142, "loss": 1.7732, "step": 41336 }, { "epoch": 1.38, "grad_norm": 0.5085583925247192, "learning_rate": 0.00033906847763617626, "loss": 1.8601, "step": 41337 }, { "epoch": 1.38, "grad_norm": 0.4897438883781433, "learning_rate": 0.00033905811450578706, "loss": 1.7558, "step": 41338 }, { "epoch": 1.38, "grad_norm": 0.5150793790817261, "learning_rate": 0.000339047751327987, "loss": 1.8509, "step": 41339 }, { "epoch": 1.38, "grad_norm": 0.5084378719329834, "learning_rate": 0.00033903738810278877, "loss": 1.812, "step": 41340 }, { "epoch": 1.38, "grad_norm": 0.5193353295326233, "learning_rate": 0.0003390270248302049, "loss": 1.8102, "step": 41341 }, { "epoch": 1.38, "grad_norm": 0.5233201384544373, "learning_rate": 0.000339016661510248, "loss": 1.8051, "step": 41342 }, { "epoch": 1.38, "grad_norm": 0.4990597367286682, "learning_rate": 0.00033900629814293063, "loss": 1.7811, "step": 41343 }, { "epoch": 1.38, "grad_norm": 0.49600541591644287, "learning_rate": 0.00033899593472826536, "loss": 1.7596, "step": 41344 }, { "epoch": 1.38, "grad_norm": 0.5081126689910889, "learning_rate": 0.00033898557126626474, "loss": 1.7749, "step": 41345 }, { "epoch": 1.38, "grad_norm": 0.5005395412445068, "learning_rate": 0.00033897520775694136, "loss": 1.7952, "step": 41346 }, { "epoch": 1.38, "grad_norm": 0.5041859745979309, "learning_rate": 0.00033896484420030783, "loss": 1.8443, "step": 41347 }, { "epoch": 1.38, "grad_norm": 0.47122588753700256, "learning_rate": 0.0003389544805963768, "loss": 1.7603, "step": 41348 }, { "epoch": 1.38, "grad_norm": 0.4940465986728668, "learning_rate": 0.0003389441169451607, "loss": 1.8596, "step": 41349 }, { "epoch": 1.38, "grad_norm": 0.49464622139930725, "learning_rate": 0.0003389337532466722, "loss": 1.7781, "step": 41350 }, { "epoch": 1.38, "grad_norm": 0.49522414803504944, "learning_rate": 0.0003389233895009238, "loss": 1.7642, "step": 41351 }, { "epoch": 1.38, "grad_norm": 0.4798617959022522, "learning_rate": 0.00033891302570792823, "loss": 1.706, "step": 41352 }, { "epoch": 1.38, "grad_norm": 0.4882557690143585, "learning_rate": 0.00033890266186769794, "loss": 1.7866, "step": 41353 }, { "epoch": 1.38, "grad_norm": 0.494745135307312, "learning_rate": 0.0003388922979802455, "loss": 1.7587, "step": 41354 }, { "epoch": 1.38, "grad_norm": 0.5045580267906189, "learning_rate": 0.00033888193404558363, "loss": 1.8442, "step": 41355 }, { "epoch": 1.38, "grad_norm": 0.5003960728645325, "learning_rate": 0.0003388715700637247, "loss": 1.7515, "step": 41356 }, { "epoch": 1.38, "grad_norm": 0.46747320890426636, "learning_rate": 0.00033886120603468154, "loss": 1.7578, "step": 41357 }, { "epoch": 1.38, "grad_norm": 0.5050850510597229, "learning_rate": 0.00033885084195846657, "loss": 1.8573, "step": 41358 }, { "epoch": 1.38, "grad_norm": 0.512689471244812, "learning_rate": 0.0003388404778350924, "loss": 1.8109, "step": 41359 }, { "epoch": 1.38, "grad_norm": 0.5020543336868286, "learning_rate": 0.00033883011366457166, "loss": 1.7238, "step": 41360 }, { "epoch": 1.38, "grad_norm": 0.5062107443809509, "learning_rate": 0.00033881974944691677, "loss": 1.7322, "step": 41361 }, { "epoch": 1.38, "grad_norm": 0.5052301287651062, "learning_rate": 0.00033880938518214044, "loss": 1.8075, "step": 41362 }, { "epoch": 1.38, "grad_norm": 0.5109330415725708, "learning_rate": 0.0003387990208702553, "loss": 1.6479, "step": 41363 }, { "epoch": 1.38, "grad_norm": 0.5135514736175537, "learning_rate": 0.0003387886565112739, "loss": 1.7619, "step": 41364 }, { "epoch": 1.38, "grad_norm": 0.5010621547698975, "learning_rate": 0.00033877829210520865, "loss": 1.755, "step": 41365 }, { "epoch": 1.38, "grad_norm": 0.5120742321014404, "learning_rate": 0.00033876792765207236, "loss": 1.7578, "step": 41366 }, { "epoch": 1.38, "grad_norm": 0.4883526861667633, "learning_rate": 0.00033875756315187753, "loss": 1.7787, "step": 41367 }, { "epoch": 1.38, "grad_norm": 0.49925416707992554, "learning_rate": 0.00033874719860463667, "loss": 1.7867, "step": 41368 }, { "epoch": 1.38, "grad_norm": 0.5084772109985352, "learning_rate": 0.00033873683401036247, "loss": 1.8565, "step": 41369 }, { "epoch": 1.38, "grad_norm": 0.5089906454086304, "learning_rate": 0.0003387264693690674, "loss": 1.854, "step": 41370 }, { "epoch": 1.38, "grad_norm": 0.5007471442222595, "learning_rate": 0.0003387161046807642, "loss": 1.8154, "step": 41371 }, { "epoch": 1.38, "grad_norm": 0.49959027767181396, "learning_rate": 0.0003387057399454652, "loss": 1.7355, "step": 41372 }, { "epoch": 1.38, "grad_norm": 0.48560768365859985, "learning_rate": 0.0003386953751631833, "loss": 1.8305, "step": 41373 }, { "epoch": 1.38, "grad_norm": 0.5018647313117981, "learning_rate": 0.0003386850103339309, "loss": 1.731, "step": 41374 }, { "epoch": 1.38, "grad_norm": 0.5076321959495544, "learning_rate": 0.00033867464545772055, "loss": 1.7953, "step": 41375 }, { "epoch": 1.38, "grad_norm": 0.49403733015060425, "learning_rate": 0.00033866428053456484, "loss": 1.7354, "step": 41376 }, { "epoch": 1.38, "grad_norm": 0.5058610439300537, "learning_rate": 0.0003386539155644764, "loss": 1.7922, "step": 41377 }, { "epoch": 1.38, "grad_norm": 0.48416709899902344, "learning_rate": 0.0003386435505474679, "loss": 1.7544, "step": 41378 }, { "epoch": 1.38, "grad_norm": 0.5030190944671631, "learning_rate": 0.00033863318548355166, "loss": 1.8168, "step": 41379 }, { "epoch": 1.38, "grad_norm": 0.5031731724739075, "learning_rate": 0.0003386228203727405, "loss": 1.7961, "step": 41380 }, { "epoch": 1.38, "grad_norm": 0.4885830283164978, "learning_rate": 0.000338612455215047, "loss": 1.7392, "step": 41381 }, { "epoch": 1.38, "grad_norm": 0.4966517686843872, "learning_rate": 0.00033860209001048357, "loss": 1.7009, "step": 41382 }, { "epoch": 1.38, "grad_norm": 0.47973933815956116, "learning_rate": 0.00033859172475906294, "loss": 1.7617, "step": 41383 }, { "epoch": 1.38, "grad_norm": 0.5046725273132324, "learning_rate": 0.0003385813594607977, "loss": 1.7241, "step": 41384 }, { "epoch": 1.38, "grad_norm": 0.521448016166687, "learning_rate": 0.0003385709941157003, "loss": 1.7962, "step": 41385 }, { "epoch": 1.38, "grad_norm": 0.49999192357063293, "learning_rate": 0.0003385606287237833, "loss": 1.773, "step": 41386 }, { "epoch": 1.38, "grad_norm": 0.4877854585647583, "learning_rate": 0.00033855026328505957, "loss": 1.7682, "step": 41387 }, { "epoch": 1.38, "grad_norm": 0.4893690049648285, "learning_rate": 0.00033853989779954137, "loss": 1.7966, "step": 41388 }, { "epoch": 1.38, "grad_norm": 0.5122674703598022, "learning_rate": 0.00033852953226724147, "loss": 1.7512, "step": 41389 }, { "epoch": 1.38, "grad_norm": 0.4722537696361542, "learning_rate": 0.0003385191666881724, "loss": 1.744, "step": 41390 }, { "epoch": 1.38, "grad_norm": 0.49706393480300903, "learning_rate": 0.0003385088010623467, "loss": 1.794, "step": 41391 }, { "epoch": 1.38, "grad_norm": 0.5020917057991028, "learning_rate": 0.00033849843538977703, "loss": 1.7564, "step": 41392 }, { "epoch": 1.38, "grad_norm": 0.4849528670310974, "learning_rate": 0.0003384880696704759, "loss": 1.7279, "step": 41393 }, { "epoch": 1.38, "grad_norm": 0.4833899438381195, "learning_rate": 0.0003384777039044559, "loss": 1.6993, "step": 41394 }, { "epoch": 1.38, "grad_norm": 0.49902790784835815, "learning_rate": 0.00033846733809172964, "loss": 1.6951, "step": 41395 }, { "epoch": 1.38, "grad_norm": 0.5261786580085754, "learning_rate": 0.00033845697223230974, "loss": 1.7633, "step": 41396 }, { "epoch": 1.38, "grad_norm": 0.5108454823493958, "learning_rate": 0.00033844660632620876, "loss": 1.7949, "step": 41397 }, { "epoch": 1.38, "grad_norm": 0.5133677124977112, "learning_rate": 0.00033843624037343926, "loss": 1.8197, "step": 41398 }, { "epoch": 1.38, "grad_norm": 0.5154891610145569, "learning_rate": 0.00033842587437401384, "loss": 1.7255, "step": 41399 }, { "epoch": 1.38, "grad_norm": 0.5046563744544983, "learning_rate": 0.00033841550832794495, "loss": 1.7185, "step": 41400 }, { "epoch": 1.38, "grad_norm": 0.4973469078540802, "learning_rate": 0.0003384051422352454, "loss": 1.873, "step": 41401 }, { "epoch": 1.38, "grad_norm": 0.5533678531646729, "learning_rate": 0.00033839477609592765, "loss": 1.7856, "step": 41402 }, { "epoch": 1.38, "grad_norm": 0.7397050857543945, "learning_rate": 0.0003383844099100044, "loss": 1.7471, "step": 41403 }, { "epoch": 1.38, "grad_norm": 0.48190951347351074, "learning_rate": 0.00033837404367748785, "loss": 1.7458, "step": 41404 }, { "epoch": 1.38, "grad_norm": 0.5080849528312683, "learning_rate": 0.0003383636773983911, "loss": 1.7715, "step": 41405 }, { "epoch": 1.38, "grad_norm": 0.47599080204963684, "learning_rate": 0.00033835331107272645, "loss": 1.8031, "step": 41406 }, { "epoch": 1.38, "grad_norm": 0.4955520033836365, "learning_rate": 0.0003383429447005065, "loss": 1.8033, "step": 41407 }, { "epoch": 1.38, "grad_norm": 0.4861593544483185, "learning_rate": 0.000338332578281744, "loss": 1.7412, "step": 41408 }, { "epoch": 1.38, "grad_norm": 0.5050961375236511, "learning_rate": 0.00033832221181645116, "loss": 1.7084, "step": 41409 }, { "epoch": 1.38, "grad_norm": 0.4995451271533966, "learning_rate": 0.00033831184530464095, "loss": 1.7665, "step": 41410 }, { "epoch": 1.38, "grad_norm": 0.5048164129257202, "learning_rate": 0.00033830147874632573, "loss": 1.6937, "step": 41411 }, { "epoch": 1.38, "grad_norm": 0.5038232803344727, "learning_rate": 0.0003382911121415183, "loss": 1.814, "step": 41412 }, { "epoch": 1.38, "grad_norm": 0.504191517829895, "learning_rate": 0.00033828074549023096, "loss": 1.7917, "step": 41413 }, { "epoch": 1.38, "grad_norm": 0.4905962347984314, "learning_rate": 0.0003382703787924765, "loss": 1.7781, "step": 41414 }, { "epoch": 1.38, "grad_norm": 0.4990178346633911, "learning_rate": 0.00033826001204826744, "loss": 1.7551, "step": 41415 }, { "epoch": 1.38, "grad_norm": 0.5074061155319214, "learning_rate": 0.00033824964525761627, "loss": 1.8029, "step": 41416 }, { "epoch": 1.38, "grad_norm": 0.4954753816127777, "learning_rate": 0.00033823927842053587, "loss": 1.8038, "step": 41417 }, { "epoch": 1.38, "grad_norm": 0.5024327635765076, "learning_rate": 0.0003382289115370384, "loss": 1.7989, "step": 41418 }, { "epoch": 1.38, "grad_norm": 0.48586875200271606, "learning_rate": 0.0003382185446071367, "loss": 1.7655, "step": 41419 }, { "epoch": 1.38, "grad_norm": 0.4877340793609619, "learning_rate": 0.00033820817763084346, "loss": 1.7102, "step": 41420 }, { "epoch": 1.38, "grad_norm": 0.4872928559780121, "learning_rate": 0.000338197810608171, "loss": 1.6651, "step": 41421 }, { "epoch": 1.38, "grad_norm": 0.5054776668548584, "learning_rate": 0.0003381874435391321, "loss": 1.7604, "step": 41422 }, { "epoch": 1.38, "grad_norm": 0.48919007182121277, "learning_rate": 0.0003381770764237392, "loss": 1.8157, "step": 41423 }, { "epoch": 1.38, "grad_norm": 0.49760669469833374, "learning_rate": 0.000338166709262005, "loss": 1.7695, "step": 41424 }, { "epoch": 1.38, "grad_norm": 0.506098747253418, "learning_rate": 0.000338156342053942, "loss": 1.8229, "step": 41425 }, { "epoch": 1.38, "grad_norm": 0.5133348107337952, "learning_rate": 0.0003381459747995629, "loss": 1.7398, "step": 41426 }, { "epoch": 1.38, "grad_norm": 0.49288293719291687, "learning_rate": 0.0003381356074988801, "loss": 1.7761, "step": 41427 }, { "epoch": 1.38, "grad_norm": 0.4938944876194, "learning_rate": 0.00033812524015190644, "loss": 1.7809, "step": 41428 }, { "epoch": 1.38, "grad_norm": 0.49021315574645996, "learning_rate": 0.00033811487275865423, "loss": 1.7209, "step": 41429 }, { "epoch": 1.38, "grad_norm": 0.49693846702575684, "learning_rate": 0.0003381045053191362, "loss": 1.8089, "step": 41430 }, { "epoch": 1.38, "grad_norm": 0.5040518045425415, "learning_rate": 0.00033809413783336494, "loss": 1.7249, "step": 41431 }, { "epoch": 1.38, "grad_norm": 0.4871605634689331, "learning_rate": 0.00033808377030135296, "loss": 1.7684, "step": 41432 }, { "epoch": 1.38, "grad_norm": 0.48470357060432434, "learning_rate": 0.000338073402723113, "loss": 1.7837, "step": 41433 }, { "epoch": 1.38, "grad_norm": 0.5182420611381531, "learning_rate": 0.00033806303509865737, "loss": 1.7586, "step": 41434 }, { "epoch": 1.38, "grad_norm": 0.5155646204948425, "learning_rate": 0.00033805266742799897, "loss": 1.8063, "step": 41435 }, { "epoch": 1.38, "grad_norm": 0.4951683282852173, "learning_rate": 0.0003380422997111502, "loss": 1.7423, "step": 41436 }, { "epoch": 1.38, "grad_norm": 0.49146613478660583, "learning_rate": 0.0003380319319481237, "loss": 1.7349, "step": 41437 }, { "epoch": 1.38, "grad_norm": 0.4972381591796875, "learning_rate": 0.00033802156413893195, "loss": 1.7416, "step": 41438 }, { "epoch": 1.38, "grad_norm": 0.5187768936157227, "learning_rate": 0.0003380111962835877, "loss": 1.8042, "step": 41439 }, { "epoch": 1.38, "grad_norm": 0.5055806040763855, "learning_rate": 0.0003380008283821035, "loss": 1.7054, "step": 41440 }, { "epoch": 1.38, "grad_norm": 0.5040099620819092, "learning_rate": 0.0003379904604344918, "loss": 1.726, "step": 41441 }, { "epoch": 1.38, "grad_norm": 0.4885522127151489, "learning_rate": 0.00033798009244076537, "loss": 1.8108, "step": 41442 }, { "epoch": 1.38, "grad_norm": 0.4962419271469116, "learning_rate": 0.0003379697244009366, "loss": 1.7486, "step": 41443 }, { "epoch": 1.38, "grad_norm": 0.49968603253364563, "learning_rate": 0.00033795935631501827, "loss": 1.7048, "step": 41444 }, { "epoch": 1.38, "grad_norm": 0.49395859241485596, "learning_rate": 0.0003379489881830228, "loss": 1.7338, "step": 41445 }, { "epoch": 1.38, "grad_norm": 0.5286628007888794, "learning_rate": 0.00033793862000496287, "loss": 1.7757, "step": 41446 }, { "epoch": 1.38, "grad_norm": 0.5018961429595947, "learning_rate": 0.00033792825178085114, "loss": 1.8285, "step": 41447 }, { "epoch": 1.38, "grad_norm": 0.5041247606277466, "learning_rate": 0.0003379178835106999, "loss": 1.8235, "step": 41448 }, { "epoch": 1.38, "grad_norm": 0.5062354207038879, "learning_rate": 0.00033790751519452216, "loss": 1.769, "step": 41449 }, { "epoch": 1.38, "grad_norm": 0.505989134311676, "learning_rate": 0.00033789714683233013, "loss": 1.7312, "step": 41450 }, { "epoch": 1.38, "grad_norm": 0.5194019079208374, "learning_rate": 0.0003378867784241366, "loss": 1.7683, "step": 41451 }, { "epoch": 1.38, "grad_norm": 0.5076106190681458, "learning_rate": 0.00033787640996995406, "loss": 1.801, "step": 41452 }, { "epoch": 1.38, "grad_norm": 0.5105540752410889, "learning_rate": 0.00033786604146979524, "loss": 1.7105, "step": 41453 }, { "epoch": 1.38, "grad_norm": 0.49868258833885193, "learning_rate": 0.0003378556729236726, "loss": 1.7976, "step": 41454 }, { "epoch": 1.38, "grad_norm": 0.5432016253471375, "learning_rate": 0.0003378453043315987, "loss": 1.7806, "step": 41455 }, { "epoch": 1.38, "grad_norm": 0.5023795962333679, "learning_rate": 0.00033783493569358616, "loss": 1.7512, "step": 41456 }, { "epoch": 1.38, "grad_norm": 0.5158334970474243, "learning_rate": 0.0003378245670096476, "loss": 1.7209, "step": 41457 }, { "epoch": 1.38, "grad_norm": 0.4933498203754425, "learning_rate": 0.0003378141982797956, "loss": 1.7834, "step": 41458 }, { "epoch": 1.38, "grad_norm": 0.49630922079086304, "learning_rate": 0.00033780382950404273, "loss": 1.7558, "step": 41459 }, { "epoch": 1.38, "grad_norm": 0.5267056822776794, "learning_rate": 0.00033779346068240165, "loss": 1.7937, "step": 41460 }, { "epoch": 1.38, "grad_norm": 0.5066956281661987, "learning_rate": 0.0003377830918148849, "loss": 1.8957, "step": 41461 }, { "epoch": 1.38, "grad_norm": 0.5188236832618713, "learning_rate": 0.0003377727229015049, "loss": 1.7539, "step": 41462 }, { "epoch": 1.38, "grad_norm": 0.5035173296928406, "learning_rate": 0.0003377623539422744, "loss": 1.7969, "step": 41463 }, { "epoch": 1.38, "grad_norm": 0.494480162858963, "learning_rate": 0.000337751984937206, "loss": 1.7656, "step": 41464 }, { "epoch": 1.38, "grad_norm": 0.494801789522171, "learning_rate": 0.0003377416158863123, "loss": 1.8062, "step": 41465 }, { "epoch": 1.38, "grad_norm": 0.4993826746940613, "learning_rate": 0.00033773124678960576, "loss": 1.765, "step": 41466 }, { "epoch": 1.38, "grad_norm": 0.5077100992202759, "learning_rate": 0.00033772087764709916, "loss": 1.7786, "step": 41467 }, { "epoch": 1.38, "grad_norm": 0.49024271965026855, "learning_rate": 0.00033771050845880486, "loss": 1.7214, "step": 41468 }, { "epoch": 1.38, "grad_norm": 0.5211591720581055, "learning_rate": 0.00033770013922473555, "loss": 1.8265, "step": 41469 }, { "epoch": 1.38, "grad_norm": 0.4862523376941681, "learning_rate": 0.00033768976994490396, "loss": 1.7994, "step": 41470 }, { "epoch": 1.38, "grad_norm": 0.5072137713432312, "learning_rate": 0.0003376794006193224, "loss": 1.7978, "step": 41471 }, { "epoch": 1.38, "grad_norm": 0.49655818939208984, "learning_rate": 0.00033766903124800366, "loss": 1.819, "step": 41472 }, { "epoch": 1.38, "grad_norm": 0.5058334469795227, "learning_rate": 0.0003376586618309602, "loss": 1.79, "step": 41473 }, { "epoch": 1.38, "grad_norm": 0.5023586750030518, "learning_rate": 0.0003376482923682048, "loss": 1.7827, "step": 41474 }, { "epoch": 1.38, "grad_norm": 0.49335670471191406, "learning_rate": 0.0003376379228597499, "loss": 1.7619, "step": 41475 }, { "epoch": 1.38, "grad_norm": 0.512270450592041, "learning_rate": 0.000337627553305608, "loss": 1.8657, "step": 41476 }, { "epoch": 1.38, "grad_norm": 0.4821591377258301, "learning_rate": 0.0003376171837057919, "loss": 1.8222, "step": 41477 }, { "epoch": 1.38, "grad_norm": 0.49847808480262756, "learning_rate": 0.00033760681406031396, "loss": 1.7375, "step": 41478 }, { "epoch": 1.38, "grad_norm": 0.49085673689842224, "learning_rate": 0.000337596444369187, "loss": 1.7098, "step": 41479 }, { "epoch": 1.38, "grad_norm": 0.49718472361564636, "learning_rate": 0.0003375860746324235, "loss": 1.7713, "step": 41480 }, { "epoch": 1.38, "grad_norm": 0.5065481662750244, "learning_rate": 0.000337575704850036, "loss": 1.8411, "step": 41481 }, { "epoch": 1.38, "grad_norm": 0.5253470540046692, "learning_rate": 0.0003375653350220371, "loss": 1.7471, "step": 41482 }, { "epoch": 1.38, "grad_norm": 0.4990277886390686, "learning_rate": 0.00033755496514843947, "loss": 1.7696, "step": 41483 }, { "epoch": 1.38, "grad_norm": 0.5076568722724915, "learning_rate": 0.0003375445952292557, "loss": 1.7861, "step": 41484 }, { "epoch": 1.38, "grad_norm": 0.49432650208473206, "learning_rate": 0.00033753422526449826, "loss": 1.7216, "step": 41485 }, { "epoch": 1.38, "grad_norm": 0.49119213223457336, "learning_rate": 0.0003375238552541797, "loss": 1.7178, "step": 41486 }, { "epoch": 1.38, "grad_norm": 0.5035667419433594, "learning_rate": 0.00033751348519831285, "loss": 1.7555, "step": 41487 }, { "epoch": 1.38, "grad_norm": 0.5046955943107605, "learning_rate": 0.00033750311509691015, "loss": 1.8145, "step": 41488 }, { "epoch": 1.38, "grad_norm": 0.49941304326057434, "learning_rate": 0.0003374927449499841, "loss": 1.7441, "step": 41489 }, { "epoch": 1.38, "grad_norm": 0.5130870342254639, "learning_rate": 0.00033748237475754754, "loss": 1.7975, "step": 41490 }, { "epoch": 1.38, "grad_norm": 0.4994824230670929, "learning_rate": 0.0003374720045196127, "loss": 1.6814, "step": 41491 }, { "epoch": 1.38, "grad_norm": 0.49781182408332825, "learning_rate": 0.0003374616342361925, "loss": 1.7462, "step": 41492 }, { "epoch": 1.38, "grad_norm": 0.5073761343955994, "learning_rate": 0.00033745126390729934, "loss": 1.7327, "step": 41493 }, { "epoch": 1.38, "grad_norm": 0.5021752119064331, "learning_rate": 0.00033744089353294587, "loss": 1.794, "step": 41494 }, { "epoch": 1.38, "grad_norm": 0.5022725462913513, "learning_rate": 0.00033743052311314476, "loss": 1.7778, "step": 41495 }, { "epoch": 1.38, "grad_norm": 0.5107393860816956, "learning_rate": 0.00033742015264790837, "loss": 1.8187, "step": 41496 }, { "epoch": 1.38, "grad_norm": 0.5030180811882019, "learning_rate": 0.00033740978213724955, "loss": 1.7607, "step": 41497 }, { "epoch": 1.38, "grad_norm": 0.5151864886283875, "learning_rate": 0.0003373994115811807, "loss": 1.7783, "step": 41498 }, { "epoch": 1.38, "grad_norm": 0.502711832523346, "learning_rate": 0.00033738904097971447, "loss": 1.6883, "step": 41499 }, { "epoch": 1.38, "grad_norm": 0.5051636695861816, "learning_rate": 0.00033737867033286343, "loss": 1.6586, "step": 41500 }, { "epoch": 1.38, "grad_norm": 0.5118653178215027, "learning_rate": 0.00033736829964064026, "loss": 1.8298, "step": 41501 }, { "epoch": 1.38, "grad_norm": 0.49193140864372253, "learning_rate": 0.0003373579289030575, "loss": 1.7533, "step": 41502 }, { "epoch": 1.38, "grad_norm": 0.4819703698158264, "learning_rate": 0.00033734755812012764, "loss": 1.7646, "step": 41503 }, { "epoch": 1.38, "grad_norm": 0.4954642653465271, "learning_rate": 0.0003373371872918634, "loss": 1.7917, "step": 41504 }, { "epoch": 1.38, "grad_norm": 0.4988521933555603, "learning_rate": 0.00033732681641827726, "loss": 1.6712, "step": 41505 }, { "epoch": 1.38, "grad_norm": 0.5127942562103271, "learning_rate": 0.0003373164454993819, "loss": 1.7176, "step": 41506 }, { "epoch": 1.38, "grad_norm": 0.49618273973464966, "learning_rate": 0.00033730607453518987, "loss": 1.7793, "step": 41507 }, { "epoch": 1.38, "grad_norm": 0.5041379928588867, "learning_rate": 0.0003372957035257137, "loss": 1.8246, "step": 41508 }, { "epoch": 1.38, "grad_norm": 0.5376971960067749, "learning_rate": 0.00033728533247096617, "loss": 1.8223, "step": 41509 }, { "epoch": 1.38, "grad_norm": 0.5152348279953003, "learning_rate": 0.00033727496137095966, "loss": 1.7892, "step": 41510 }, { "epoch": 1.38, "grad_norm": 0.5075958967208862, "learning_rate": 0.00033726459022570684, "loss": 1.7939, "step": 41511 }, { "epoch": 1.38, "grad_norm": 0.5327022075653076, "learning_rate": 0.0003372542190352203, "loss": 1.8781, "step": 41512 }, { "epoch": 1.38, "grad_norm": 0.4954870343208313, "learning_rate": 0.0003372438477995127, "loss": 1.8238, "step": 41513 }, { "epoch": 1.38, "grad_norm": 0.4882150888442993, "learning_rate": 0.0003372334765185965, "loss": 1.747, "step": 41514 }, { "epoch": 1.38, "grad_norm": 0.4867406189441681, "learning_rate": 0.00033722310519248433, "loss": 1.767, "step": 41515 }, { "epoch": 1.38, "grad_norm": 0.5270109176635742, "learning_rate": 0.00033721273382118886, "loss": 1.7744, "step": 41516 }, { "epoch": 1.38, "grad_norm": 0.4957042932510376, "learning_rate": 0.00033720236240472254, "loss": 1.7847, "step": 41517 }, { "epoch": 1.38, "grad_norm": 0.4948092997074127, "learning_rate": 0.000337191990943098, "loss": 1.7199, "step": 41518 }, { "epoch": 1.38, "grad_norm": 0.5032950043678284, "learning_rate": 0.0003371816194363279, "loss": 1.7342, "step": 41519 }, { "epoch": 1.38, "grad_norm": 0.5105372071266174, "learning_rate": 0.0003371712478844249, "loss": 1.8125, "step": 41520 }, { "epoch": 1.38, "grad_norm": 0.487892210483551, "learning_rate": 0.00033716087628740134, "loss": 1.7223, "step": 41521 }, { "epoch": 1.38, "grad_norm": 0.47466668486595154, "learning_rate": 0.00033715050464527005, "loss": 1.7621, "step": 41522 }, { "epoch": 1.38, "grad_norm": 0.5140916109085083, "learning_rate": 0.00033714013295804343, "loss": 1.7833, "step": 41523 }, { "epoch": 1.38, "grad_norm": 0.5166908502578735, "learning_rate": 0.00033712976122573417, "loss": 1.86, "step": 41524 }, { "epoch": 1.38, "grad_norm": 0.49430951476097107, "learning_rate": 0.00033711938944835495, "loss": 1.7811, "step": 41525 }, { "epoch": 1.38, "grad_norm": 0.6859210729598999, "learning_rate": 0.00033710901762591813, "loss": 1.6868, "step": 41526 }, { "epoch": 1.38, "grad_norm": 0.5020279884338379, "learning_rate": 0.0003370986457584366, "loss": 1.7652, "step": 41527 }, { "epoch": 1.38, "grad_norm": 0.5022653341293335, "learning_rate": 0.00033708827384592254, "loss": 1.7261, "step": 41528 }, { "epoch": 1.38, "grad_norm": 0.5049567818641663, "learning_rate": 0.000337077901888389, "loss": 1.8222, "step": 41529 }, { "epoch": 1.38, "grad_norm": 0.5114538073539734, "learning_rate": 0.0003370675298858483, "loss": 1.7104, "step": 41530 }, { "epoch": 1.38, "grad_norm": 0.4688121974468231, "learning_rate": 0.00033705715783831293, "loss": 1.6959, "step": 41531 }, { "epoch": 1.38, "grad_norm": 0.49389752745628357, "learning_rate": 0.0003370467857457959, "loss": 1.7268, "step": 41532 }, { "epoch": 1.38, "grad_norm": 0.5130988359451294, "learning_rate": 0.00033703641360830925, "loss": 1.7161, "step": 41533 }, { "epoch": 1.38, "grad_norm": 0.4824196696281433, "learning_rate": 0.0003370260414258661, "loss": 1.7502, "step": 41534 }, { "epoch": 1.38, "grad_norm": 0.48811718821525574, "learning_rate": 0.0003370156691984786, "loss": 1.7417, "step": 41535 }, { "epoch": 1.38, "grad_norm": 0.5079249739646912, "learning_rate": 0.0003370052969261596, "loss": 1.7783, "step": 41536 }, { "epoch": 1.38, "grad_norm": 0.5060116052627563, "learning_rate": 0.0003369949246089216, "loss": 1.7742, "step": 41537 }, { "epoch": 1.38, "grad_norm": 0.5035571455955505, "learning_rate": 0.00033698455224677725, "loss": 1.8055, "step": 41538 }, { "epoch": 1.38, "grad_norm": 0.5120964050292969, "learning_rate": 0.00033697417983973916, "loss": 1.7974, "step": 41539 }, { "epoch": 1.38, "grad_norm": 0.5095915794372559, "learning_rate": 0.00033696380738781976, "loss": 1.7867, "step": 41540 }, { "epoch": 1.38, "grad_norm": 0.4985400140285492, "learning_rate": 0.0003369534348910318, "loss": 1.7714, "step": 41541 }, { "epoch": 1.38, "grad_norm": 0.49277880787849426, "learning_rate": 0.0003369430623493878, "loss": 1.8102, "step": 41542 }, { "epoch": 1.38, "grad_norm": 0.5058730840682983, "learning_rate": 0.0003369326897629003, "loss": 1.7531, "step": 41543 }, { "epoch": 1.38, "grad_norm": 0.4985998570919037, "learning_rate": 0.00033692231713158203, "loss": 1.7775, "step": 41544 }, { "epoch": 1.38, "grad_norm": 0.5160006880760193, "learning_rate": 0.00033691194445544556, "loss": 1.8172, "step": 41545 }, { "epoch": 1.38, "grad_norm": 0.5047224760055542, "learning_rate": 0.0003369015717345033, "loss": 1.7487, "step": 41546 }, { "epoch": 1.38, "grad_norm": 0.49531006813049316, "learning_rate": 0.000336891198968768, "loss": 1.8359, "step": 41547 }, { "epoch": 1.38, "grad_norm": 0.49582695960998535, "learning_rate": 0.00033688082615825223, "loss": 1.6586, "step": 41548 }, { "epoch": 1.38, "grad_norm": 0.522367000579834, "learning_rate": 0.0003368704533029686, "loss": 1.7077, "step": 41549 }, { "epoch": 1.38, "grad_norm": 0.5027697086334229, "learning_rate": 0.0003368600804029298, "loss": 1.7796, "step": 41550 }, { "epoch": 1.38, "grad_norm": 0.4959545433521271, "learning_rate": 0.000336849707458148, "loss": 1.7518, "step": 41551 }, { "epoch": 1.38, "grad_norm": 0.530789315700531, "learning_rate": 0.00033683933446863635, "loss": 1.7974, "step": 41552 }, { "epoch": 1.38, "grad_norm": 0.5093950033187866, "learning_rate": 0.00033682896143440703, "loss": 1.7851, "step": 41553 }, { "epoch": 1.38, "grad_norm": 0.49013954401016235, "learning_rate": 0.00033681858835547286, "loss": 1.7744, "step": 41554 }, { "epoch": 1.38, "grad_norm": 0.5065434575080872, "learning_rate": 0.00033680821523184625, "loss": 1.716, "step": 41555 }, { "epoch": 1.38, "grad_norm": 0.4878905415534973, "learning_rate": 0.00033679784206353994, "loss": 1.7485, "step": 41556 }, { "epoch": 1.38, "grad_norm": 0.5046101808547974, "learning_rate": 0.0003367874688505665, "loss": 1.7728, "step": 41557 }, { "epoch": 1.38, "grad_norm": 0.5040736198425293, "learning_rate": 0.00033677709559293837, "loss": 1.7111, "step": 41558 }, { "epoch": 1.38, "grad_norm": 0.49478960037231445, "learning_rate": 0.0003367667222906684, "loss": 1.8916, "step": 41559 }, { "epoch": 1.38, "grad_norm": 0.4867567718029022, "learning_rate": 0.000336756348943769, "loss": 1.8208, "step": 41560 }, { "epoch": 1.38, "grad_norm": 0.4861019551753998, "learning_rate": 0.00033674597555225286, "loss": 1.7105, "step": 41561 }, { "epoch": 1.38, "grad_norm": 0.5214395523071289, "learning_rate": 0.00033673560211613246, "loss": 1.804, "step": 41562 }, { "epoch": 1.38, "grad_norm": 0.49688977003097534, "learning_rate": 0.0003367252286354205, "loss": 1.7457, "step": 41563 }, { "epoch": 1.38, "grad_norm": 0.4929791986942291, "learning_rate": 0.0003367148551101295, "loss": 1.7834, "step": 41564 }, { "epoch": 1.38, "grad_norm": 0.952305018901825, "learning_rate": 0.0003367044815402721, "loss": 1.7058, "step": 41565 }, { "epoch": 1.38, "grad_norm": 0.4842650294303894, "learning_rate": 0.0003366941079258608, "loss": 1.7203, "step": 41566 }, { "epoch": 1.38, "grad_norm": 0.5162580013275146, "learning_rate": 0.0003366837342669083, "loss": 1.7823, "step": 41567 }, { "epoch": 1.38, "grad_norm": 0.5038164258003235, "learning_rate": 0.0003366733605634271, "loss": 1.7419, "step": 41568 }, { "epoch": 1.38, "grad_norm": 0.505401611328125, "learning_rate": 0.00033666298681542993, "loss": 1.7878, "step": 41569 }, { "epoch": 1.38, "grad_norm": 0.5125277638435364, "learning_rate": 0.00033665261302292924, "loss": 1.7477, "step": 41570 }, { "epoch": 1.38, "grad_norm": 0.5063318014144897, "learning_rate": 0.0003366422391859377, "loss": 1.7671, "step": 41571 }, { "epoch": 1.38, "grad_norm": 0.5113239288330078, "learning_rate": 0.0003366318653044679, "loss": 1.8323, "step": 41572 }, { "epoch": 1.38, "grad_norm": 0.4870125651359558, "learning_rate": 0.00033662149137853243, "loss": 1.6966, "step": 41573 }, { "epoch": 1.38, "grad_norm": 0.4830337464809418, "learning_rate": 0.00033661111740814376, "loss": 1.7285, "step": 41574 }, { "epoch": 1.38, "grad_norm": 0.5017228126525879, "learning_rate": 0.0003366007433933146, "loss": 1.7297, "step": 41575 }, { "epoch": 1.38, "grad_norm": 0.49938690662384033, "learning_rate": 0.0003365903693340576, "loss": 1.7629, "step": 41576 }, { "epoch": 1.38, "grad_norm": 1.1908303499221802, "learning_rate": 0.00033657999523038534, "loss": 1.8258, "step": 41577 }, { "epoch": 1.38, "grad_norm": 0.5085451602935791, "learning_rate": 0.00033656962108231025, "loss": 1.7506, "step": 41578 }, { "epoch": 1.38, "grad_norm": 0.5169694423675537, "learning_rate": 0.0003365592468898451, "loss": 1.7209, "step": 41579 }, { "epoch": 1.38, "grad_norm": 0.5112722516059875, "learning_rate": 0.0003365488726530023, "loss": 1.8396, "step": 41580 }, { "epoch": 1.38, "grad_norm": 0.5136129856109619, "learning_rate": 0.0003365384983717946, "loss": 1.7527, "step": 41581 }, { "epoch": 1.38, "grad_norm": 0.4925515353679657, "learning_rate": 0.00033652812404623466, "loss": 1.715, "step": 41582 }, { "epoch": 1.38, "grad_norm": 0.5061054229736328, "learning_rate": 0.0003365177496763348, "loss": 1.7576, "step": 41583 }, { "epoch": 1.38, "grad_norm": 0.5068363547325134, "learning_rate": 0.0003365073752621079, "loss": 1.7707, "step": 41584 }, { "epoch": 1.38, "grad_norm": 0.5014440417289734, "learning_rate": 0.00033649700080356636, "loss": 1.8179, "step": 41585 }, { "epoch": 1.38, "grad_norm": 0.5103063583374023, "learning_rate": 0.00033648662630072286, "loss": 1.8269, "step": 41586 }, { "epoch": 1.38, "grad_norm": 0.5117144584655762, "learning_rate": 0.00033647625175359003, "loss": 1.7849, "step": 41587 }, { "epoch": 1.38, "grad_norm": 0.48319825530052185, "learning_rate": 0.0003364658771621802, "loss": 1.7584, "step": 41588 }, { "epoch": 1.38, "grad_norm": 0.5249239206314087, "learning_rate": 0.0003364555025265064, "loss": 1.7593, "step": 41589 }, { "epoch": 1.38, "grad_norm": 0.488156259059906, "learning_rate": 0.00033644512784658076, "loss": 1.7651, "step": 41590 }, { "epoch": 1.38, "grad_norm": 0.501430332660675, "learning_rate": 0.00033643475312241636, "loss": 1.8165, "step": 41591 }, { "epoch": 1.38, "grad_norm": 0.5005756616592407, "learning_rate": 0.0003364243783540254, "loss": 1.746, "step": 41592 }, { "epoch": 1.38, "grad_norm": 0.4924283027648926, "learning_rate": 0.0003364140035414206, "loss": 1.7151, "step": 41593 }, { "epoch": 1.38, "grad_norm": 0.4974867105484009, "learning_rate": 0.00033640362868461466, "loss": 1.7423, "step": 41594 }, { "epoch": 1.38, "grad_norm": 0.4877312481403351, "learning_rate": 0.0003363932537836199, "loss": 1.7189, "step": 41595 }, { "epoch": 1.38, "grad_norm": 0.49709245562553406, "learning_rate": 0.0003363828788384493, "loss": 1.7289, "step": 41596 }, { "epoch": 1.38, "grad_norm": 0.4942597448825836, "learning_rate": 0.00033637250384911514, "loss": 1.7702, "step": 41597 }, { "epoch": 1.38, "grad_norm": 0.498045951128006, "learning_rate": 0.00033636212881563014, "loss": 1.7385, "step": 41598 }, { "epoch": 1.38, "grad_norm": 0.4995454251766205, "learning_rate": 0.00033635175373800683, "loss": 1.79, "step": 41599 }, { "epoch": 1.38, "grad_norm": 0.4968383014202118, "learning_rate": 0.00033634137861625785, "loss": 1.7302, "step": 41600 }, { "epoch": 1.38, "grad_norm": 0.5013845562934875, "learning_rate": 0.0003363310034503959, "loss": 1.7472, "step": 41601 }, { "epoch": 1.38, "grad_norm": 0.4924994111061096, "learning_rate": 0.0003363206282404334, "loss": 1.7879, "step": 41602 }, { "epoch": 1.38, "grad_norm": 0.5216566324234009, "learning_rate": 0.00033631025298638297, "loss": 1.6776, "step": 41603 }, { "epoch": 1.38, "grad_norm": 0.49601519107818604, "learning_rate": 0.0003362998776882572, "loss": 1.7268, "step": 41604 }, { "epoch": 1.38, "grad_norm": 0.49822431802749634, "learning_rate": 0.00033628950234606885, "loss": 1.7255, "step": 41605 }, { "epoch": 1.38, "grad_norm": 0.48781993985176086, "learning_rate": 0.0003362791269598303, "loss": 1.7681, "step": 41606 }, { "epoch": 1.38, "grad_norm": 0.5055429339408875, "learning_rate": 0.0003362687515295543, "loss": 1.8345, "step": 41607 }, { "epoch": 1.38, "grad_norm": 0.4990808069705963, "learning_rate": 0.0003362583760552534, "loss": 1.7467, "step": 41608 }, { "epoch": 1.38, "grad_norm": 0.5135717988014221, "learning_rate": 0.00033624800053694005, "loss": 1.7387, "step": 41609 }, { "epoch": 1.38, "grad_norm": 0.5036188960075378, "learning_rate": 0.0003362376249746271, "loss": 1.8071, "step": 41610 }, { "epoch": 1.38, "grad_norm": 0.516461968421936, "learning_rate": 0.00033622724936832686, "loss": 1.7729, "step": 41611 }, { "epoch": 1.38, "grad_norm": 0.5036648511886597, "learning_rate": 0.00033621687371805225, "loss": 1.8142, "step": 41612 }, { "epoch": 1.38, "grad_norm": 0.5028153657913208, "learning_rate": 0.0003362064980238155, "loss": 1.7824, "step": 41613 }, { "epoch": 1.38, "grad_norm": 0.4916030466556549, "learning_rate": 0.0003361961222856296, "loss": 1.7265, "step": 41614 }, { "epoch": 1.38, "grad_norm": 0.5056961178779602, "learning_rate": 0.00033618574650350687, "loss": 1.7643, "step": 41615 }, { "epoch": 1.38, "grad_norm": 0.4773428440093994, "learning_rate": 0.00033617537067745994, "loss": 1.7607, "step": 41616 }, { "epoch": 1.38, "grad_norm": 0.4992588460445404, "learning_rate": 0.0003361649948075015, "loss": 1.7094, "step": 41617 }, { "epoch": 1.38, "grad_norm": 0.4907296597957611, "learning_rate": 0.00033615461889364404, "loss": 1.7933, "step": 41618 }, { "epoch": 1.38, "grad_norm": 0.4894496500492096, "learning_rate": 0.0003361442429359002, "loss": 1.7883, "step": 41619 }, { "epoch": 1.38, "grad_norm": 0.51252281665802, "learning_rate": 0.0003361338669342825, "loss": 1.8009, "step": 41620 }, { "epoch": 1.38, "grad_norm": 0.5347961783409119, "learning_rate": 0.00033612349088880375, "loss": 1.6808, "step": 41621 }, { "epoch": 1.38, "grad_norm": 0.537968099117279, "learning_rate": 0.0003361131147994763, "loss": 1.7763, "step": 41622 }, { "epoch": 1.38, "grad_norm": 0.5062791109085083, "learning_rate": 0.0003361027386663129, "loss": 1.7056, "step": 41623 }, { "epoch": 1.38, "grad_norm": 0.49779361486434937, "learning_rate": 0.0003360923624893261, "loss": 1.7326, "step": 41624 }, { "epoch": 1.38, "grad_norm": 0.4887968599796295, "learning_rate": 0.00033608198626852845, "loss": 1.7282, "step": 41625 }, { "epoch": 1.38, "grad_norm": 0.5052844882011414, "learning_rate": 0.00033607161000393264, "loss": 1.8214, "step": 41626 }, { "epoch": 1.38, "grad_norm": 0.5104655027389526, "learning_rate": 0.00033606123369555116, "loss": 1.8136, "step": 41627 }, { "epoch": 1.38, "grad_norm": 0.5321841835975647, "learning_rate": 0.0003360508573433967, "loss": 1.8201, "step": 41628 }, { "epoch": 1.39, "grad_norm": 0.4812251925468445, "learning_rate": 0.0003360404809474818, "loss": 1.7502, "step": 41629 }, { "epoch": 1.39, "grad_norm": 0.49358606338500977, "learning_rate": 0.000336030104507819, "loss": 1.6983, "step": 41630 }, { "epoch": 1.39, "grad_norm": 0.49484020471572876, "learning_rate": 0.00033601972802442103, "loss": 1.7354, "step": 41631 }, { "epoch": 1.39, "grad_norm": 0.5224571228027344, "learning_rate": 0.00033600935149730046, "loss": 1.731, "step": 41632 }, { "epoch": 1.39, "grad_norm": 0.4863027036190033, "learning_rate": 0.00033599897492646975, "loss": 1.7826, "step": 41633 }, { "epoch": 1.39, "grad_norm": 0.49415910243988037, "learning_rate": 0.00033598859831194167, "loss": 1.8048, "step": 41634 }, { "epoch": 1.39, "grad_norm": 0.4948146939277649, "learning_rate": 0.00033597822165372864, "loss": 1.7223, "step": 41635 }, { "epoch": 1.39, "grad_norm": 0.5191671848297119, "learning_rate": 0.0003359678449518434, "loss": 1.786, "step": 41636 }, { "epoch": 1.39, "grad_norm": 0.4921812117099762, "learning_rate": 0.0003359574682062985, "loss": 1.7188, "step": 41637 }, { "epoch": 1.39, "grad_norm": 0.49610021710395813, "learning_rate": 0.0003359470914171065, "loss": 1.8307, "step": 41638 }, { "epoch": 1.39, "grad_norm": 0.5122184157371521, "learning_rate": 0.00033593671458428015, "loss": 1.7443, "step": 41639 }, { "epoch": 1.39, "grad_norm": 0.48636725544929504, "learning_rate": 0.00033592633770783176, "loss": 1.7317, "step": 41640 }, { "epoch": 1.39, "grad_norm": 0.4898608922958374, "learning_rate": 0.0003359159607877742, "loss": 1.7783, "step": 41641 }, { "epoch": 1.39, "grad_norm": 0.5012627840042114, "learning_rate": 0.0003359055838241199, "loss": 1.7214, "step": 41642 }, { "epoch": 1.39, "grad_norm": 0.5173149704933167, "learning_rate": 0.00033589520681688145, "loss": 1.807, "step": 41643 }, { "epoch": 1.39, "grad_norm": 0.512188196182251, "learning_rate": 0.0003358848297660716, "loss": 1.74, "step": 41644 }, { "epoch": 1.39, "grad_norm": 0.5003879070281982, "learning_rate": 0.0003358744526717028, "loss": 1.8016, "step": 41645 }, { "epoch": 1.39, "grad_norm": 0.5184846520423889, "learning_rate": 0.0003358640755337878, "loss": 1.7717, "step": 41646 }, { "epoch": 1.39, "grad_norm": 0.5176315307617188, "learning_rate": 0.00033585369835233906, "loss": 1.7796, "step": 41647 }, { "epoch": 1.39, "grad_norm": 0.5055531859397888, "learning_rate": 0.0003358433211273691, "loss": 1.8431, "step": 41648 }, { "epoch": 1.39, "grad_norm": 0.499102383852005, "learning_rate": 0.00033583294385889083, "loss": 1.6647, "step": 41649 }, { "epoch": 1.39, "grad_norm": 0.5089492797851562, "learning_rate": 0.0003358225665469165, "loss": 1.7249, "step": 41650 }, { "epoch": 1.39, "grad_norm": 0.5125470161437988, "learning_rate": 0.0003358121891914589, "loss": 1.7356, "step": 41651 }, { "epoch": 1.39, "grad_norm": 0.5033958554267883, "learning_rate": 0.00033580181179253054, "loss": 1.8372, "step": 41652 }, { "epoch": 1.39, "grad_norm": 0.5203885436058044, "learning_rate": 0.00033579143435014407, "loss": 1.7721, "step": 41653 }, { "epoch": 1.39, "grad_norm": 0.5009098052978516, "learning_rate": 0.0003357810568643121, "loss": 1.766, "step": 41654 }, { "epoch": 1.39, "grad_norm": 0.5014064908027649, "learning_rate": 0.0003357706793350471, "loss": 1.793, "step": 41655 }, { "epoch": 1.39, "grad_norm": 0.512127161026001, "learning_rate": 0.00033576030176236195, "loss": 1.8044, "step": 41656 }, { "epoch": 1.39, "grad_norm": 0.5133041143417358, "learning_rate": 0.00033574992414626894, "loss": 1.7386, "step": 41657 }, { "epoch": 1.39, "grad_norm": 0.4864438772201538, "learning_rate": 0.0003357395464867808, "loss": 1.8758, "step": 41658 }, { "epoch": 1.39, "grad_norm": 0.4876505136489868, "learning_rate": 0.00033572916878391013, "loss": 1.6959, "step": 41659 }, { "epoch": 1.39, "grad_norm": 0.4853628873825073, "learning_rate": 0.00033571879103766944, "loss": 1.8005, "step": 41660 }, { "epoch": 1.39, "grad_norm": 0.48968878388404846, "learning_rate": 0.0003357084132480715, "loss": 1.7522, "step": 41661 }, { "epoch": 1.39, "grad_norm": 0.5145485997200012, "learning_rate": 0.00033569803541512883, "loss": 1.7941, "step": 41662 }, { "epoch": 1.39, "grad_norm": 0.5059362649917603, "learning_rate": 0.0003356876575388539, "loss": 1.8148, "step": 41663 }, { "epoch": 1.39, "grad_norm": 0.5000426173210144, "learning_rate": 0.0003356772796192595, "loss": 1.7717, "step": 41664 }, { "epoch": 1.39, "grad_norm": 0.4894885718822479, "learning_rate": 0.00033566690165635806, "loss": 1.7975, "step": 41665 }, { "epoch": 1.39, "grad_norm": 0.4956643879413605, "learning_rate": 0.00033565652365016233, "loss": 1.7836, "step": 41666 }, { "epoch": 1.39, "grad_norm": 0.48682504892349243, "learning_rate": 0.0003356461456006849, "loss": 1.7088, "step": 41667 }, { "epoch": 1.39, "grad_norm": 0.5081618428230286, "learning_rate": 0.0003356357675079381, "loss": 1.792, "step": 41668 }, { "epoch": 1.39, "grad_norm": 0.4911285638809204, "learning_rate": 0.00033562538937193485, "loss": 1.781, "step": 41669 }, { "epoch": 1.39, "grad_norm": 0.517711877822876, "learning_rate": 0.0003356150111926876, "loss": 1.8512, "step": 41670 }, { "epoch": 1.39, "grad_norm": 0.5114859938621521, "learning_rate": 0.000335604632970209, "loss": 1.7073, "step": 41671 }, { "epoch": 1.39, "grad_norm": 0.5219437479972839, "learning_rate": 0.00033559425470451154, "loss": 1.7915, "step": 41672 }, { "epoch": 1.39, "grad_norm": 0.4777258336544037, "learning_rate": 0.000335583876395608, "loss": 1.8093, "step": 41673 }, { "epoch": 1.39, "grad_norm": 0.5129085779190063, "learning_rate": 0.0003355734980435109, "loss": 1.762, "step": 41674 }, { "epoch": 1.39, "grad_norm": 0.4816855192184448, "learning_rate": 0.00033556311964823266, "loss": 1.7233, "step": 41675 }, { "epoch": 1.39, "grad_norm": 0.5062088966369629, "learning_rate": 0.00033555274120978623, "loss": 1.6693, "step": 41676 }, { "epoch": 1.39, "grad_norm": 0.5106680989265442, "learning_rate": 0.0003355423627281839, "loss": 1.7385, "step": 41677 }, { "epoch": 1.39, "grad_norm": 0.4863784909248352, "learning_rate": 0.00033553198420343834, "loss": 1.7472, "step": 41678 }, { "epoch": 1.39, "grad_norm": 0.49997416138648987, "learning_rate": 0.00033552160563556224, "loss": 1.7166, "step": 41679 }, { "epoch": 1.39, "grad_norm": 0.5040291547775269, "learning_rate": 0.0003355112270245681, "loss": 1.7126, "step": 41680 }, { "epoch": 1.39, "grad_norm": 0.5067155361175537, "learning_rate": 0.0003355008483704687, "loss": 1.726, "step": 41681 }, { "epoch": 1.39, "grad_norm": 0.4919949769973755, "learning_rate": 0.0003354904696732764, "loss": 1.7113, "step": 41682 }, { "epoch": 1.39, "grad_norm": 0.48832622170448303, "learning_rate": 0.00033548009093300384, "loss": 1.7368, "step": 41683 }, { "epoch": 1.39, "grad_norm": 0.4965425729751587, "learning_rate": 0.00033546971214966375, "loss": 1.7595, "step": 41684 }, { "epoch": 1.39, "grad_norm": 0.4957467317581177, "learning_rate": 0.00033545933332326865, "loss": 1.7499, "step": 41685 }, { "epoch": 1.39, "grad_norm": 0.4883778393268585, "learning_rate": 0.00033544895445383115, "loss": 1.7723, "step": 41686 }, { "epoch": 1.39, "grad_norm": 0.4925544261932373, "learning_rate": 0.0003354385755413639, "loss": 1.7647, "step": 41687 }, { "epoch": 1.39, "grad_norm": 0.5132467746734619, "learning_rate": 0.0003354281965858794, "loss": 1.8144, "step": 41688 }, { "epoch": 1.39, "grad_norm": 0.48744726181030273, "learning_rate": 0.00033541781758739017, "loss": 1.7608, "step": 41689 }, { "epoch": 1.39, "grad_norm": 0.5265991687774658, "learning_rate": 0.00033540743854590903, "loss": 1.759, "step": 41690 }, { "epoch": 1.39, "grad_norm": 0.5036875009536743, "learning_rate": 0.0003353970594614485, "loss": 1.8314, "step": 41691 }, { "epoch": 1.39, "grad_norm": 0.4958789348602295, "learning_rate": 0.00033538668033402107, "loss": 1.7931, "step": 41692 }, { "epoch": 1.39, "grad_norm": 0.5028281211853027, "learning_rate": 0.00033537630116363946, "loss": 1.7362, "step": 41693 }, { "epoch": 1.39, "grad_norm": 0.5079882740974426, "learning_rate": 0.00033536592195031634, "loss": 1.7963, "step": 41694 }, { "epoch": 1.39, "grad_norm": 0.4777383804321289, "learning_rate": 0.0003353555426940641, "loss": 1.7733, "step": 41695 }, { "epoch": 1.39, "grad_norm": 0.4912457764148712, "learning_rate": 0.0003353451633948954, "loss": 1.7686, "step": 41696 }, { "epoch": 1.39, "grad_norm": 0.5029846429824829, "learning_rate": 0.0003353347840528229, "loss": 1.7874, "step": 41697 }, { "epoch": 1.39, "grad_norm": 0.49484536051750183, "learning_rate": 0.00033532440466785924, "loss": 1.793, "step": 41698 }, { "epoch": 1.39, "grad_norm": 0.49012792110443115, "learning_rate": 0.00033531402524001697, "loss": 1.7542, "step": 41699 }, { "epoch": 1.39, "grad_norm": 0.4813196361064911, "learning_rate": 0.0003353036457693086, "loss": 1.8264, "step": 41700 }, { "epoch": 1.39, "grad_norm": 0.4957441985607147, "learning_rate": 0.0003352932662557469, "loss": 1.7365, "step": 41701 }, { "epoch": 1.39, "grad_norm": 0.5174616575241089, "learning_rate": 0.0003352828866993442, "loss": 1.7835, "step": 41702 }, { "epoch": 1.39, "grad_norm": 0.48222556710243225, "learning_rate": 0.0003352725071001135, "loss": 1.7116, "step": 41703 }, { "epoch": 1.39, "grad_norm": 0.5032755136489868, "learning_rate": 0.00033526212745806707, "loss": 1.8095, "step": 41704 }, { "epoch": 1.39, "grad_norm": 0.5080369114875793, "learning_rate": 0.0003352517477732175, "loss": 1.7548, "step": 41705 }, { "epoch": 1.39, "grad_norm": 0.4876784384250641, "learning_rate": 0.0003352413680455777, "loss": 1.7354, "step": 41706 }, { "epoch": 1.39, "grad_norm": 0.48681318759918213, "learning_rate": 0.00033523098827515984, "loss": 1.83, "step": 41707 }, { "epoch": 1.39, "grad_norm": 0.49982133507728577, "learning_rate": 0.00033522060846197704, "loss": 1.8038, "step": 41708 }, { "epoch": 1.39, "grad_norm": 0.5261526107788086, "learning_rate": 0.0003352102286060414, "loss": 1.793, "step": 41709 }, { "epoch": 1.39, "grad_norm": 0.5445179343223572, "learning_rate": 0.0003351998487073658, "loss": 1.7257, "step": 41710 }, { "epoch": 1.39, "grad_norm": 0.49859124422073364, "learning_rate": 0.0003351894687659628, "loss": 1.7393, "step": 41711 }, { "epoch": 1.39, "grad_norm": 0.5191745758056641, "learning_rate": 0.0003351790887818449, "loss": 1.8393, "step": 41712 }, { "epoch": 1.39, "grad_norm": 0.4800926744937897, "learning_rate": 0.00033516870875502483, "loss": 1.7467, "step": 41713 }, { "epoch": 1.39, "grad_norm": 0.5130454897880554, "learning_rate": 0.00033515832868551506, "loss": 1.7324, "step": 41714 }, { "epoch": 1.39, "grad_norm": 0.4951682984828949, "learning_rate": 0.0003351479485733283, "loss": 1.7653, "step": 41715 }, { "epoch": 1.39, "grad_norm": 0.49768033623695374, "learning_rate": 0.0003351375684184771, "loss": 1.7733, "step": 41716 }, { "epoch": 1.39, "grad_norm": 0.5100617408752441, "learning_rate": 0.0003351271882209741, "loss": 1.8785, "step": 41717 }, { "epoch": 1.39, "grad_norm": 0.476438045501709, "learning_rate": 0.0003351168079808319, "loss": 1.8458, "step": 41718 }, { "epoch": 1.39, "grad_norm": 0.8137796521186829, "learning_rate": 0.000335106427698063, "loss": 1.9028, "step": 41719 }, { "epoch": 1.39, "grad_norm": 0.9157038927078247, "learning_rate": 0.00033509604737268006, "loss": 1.7726, "step": 41720 }, { "epoch": 1.39, "grad_norm": 0.48986920714378357, "learning_rate": 0.0003350856670046957, "loss": 1.7989, "step": 41721 }, { "epoch": 1.39, "grad_norm": 0.5246927738189697, "learning_rate": 0.0003350752865941225, "loss": 1.7659, "step": 41722 }, { "epoch": 1.39, "grad_norm": 0.49530982971191406, "learning_rate": 0.0003350649061409731, "loss": 1.7566, "step": 41723 }, { "epoch": 1.39, "grad_norm": 0.5007365942001343, "learning_rate": 0.0003350545256452601, "loss": 1.7212, "step": 41724 }, { "epoch": 1.39, "grad_norm": 0.4986717402935028, "learning_rate": 0.0003350441451069961, "loss": 1.742, "step": 41725 }, { "epoch": 1.39, "grad_norm": 0.5064839124679565, "learning_rate": 0.00033503376452619354, "loss": 1.7035, "step": 41726 }, { "epoch": 1.39, "grad_norm": 0.5122014284133911, "learning_rate": 0.0003350233839028652, "loss": 1.7591, "step": 41727 }, { "epoch": 1.39, "grad_norm": 0.5208665132522583, "learning_rate": 0.0003350130032370237, "loss": 1.7884, "step": 41728 }, { "epoch": 1.39, "grad_norm": 0.5022998452186584, "learning_rate": 0.0003350026225286816, "loss": 1.6717, "step": 41729 }, { "epoch": 1.39, "grad_norm": 0.4869355261325836, "learning_rate": 0.00033499224177785126, "loss": 1.7261, "step": 41730 }, { "epoch": 1.39, "grad_norm": 0.5137054324150085, "learning_rate": 0.00033498186098454574, "loss": 1.8447, "step": 41731 }, { "epoch": 1.39, "grad_norm": 0.5214500427246094, "learning_rate": 0.0003349714801487773, "loss": 1.7683, "step": 41732 }, { "epoch": 1.39, "grad_norm": 0.519210159778595, "learning_rate": 0.00033496109927055856, "loss": 1.7693, "step": 41733 }, { "epoch": 1.39, "grad_norm": 0.49025067687034607, "learning_rate": 0.0003349507183499023, "loss": 1.8127, "step": 41734 }, { "epoch": 1.39, "grad_norm": 0.48737651109695435, "learning_rate": 0.00033494033738682094, "loss": 1.7503, "step": 41735 }, { "epoch": 1.39, "grad_norm": 0.5140218138694763, "learning_rate": 0.00033492995638132725, "loss": 1.8211, "step": 41736 }, { "epoch": 1.39, "grad_norm": 0.5100274085998535, "learning_rate": 0.0003349195753334336, "loss": 1.7583, "step": 41737 }, { "epoch": 1.39, "grad_norm": 0.5136545896530151, "learning_rate": 0.00033490919424315285, "loss": 1.7999, "step": 41738 }, { "epoch": 1.39, "grad_norm": 0.4947985112667084, "learning_rate": 0.00033489881311049753, "loss": 1.7305, "step": 41739 }, { "epoch": 1.39, "grad_norm": 0.4945244789123535, "learning_rate": 0.00033488843193548004, "loss": 1.757, "step": 41740 }, { "epoch": 1.39, "grad_norm": 0.4960923492908478, "learning_rate": 0.00033487805071811324, "loss": 1.79, "step": 41741 }, { "epoch": 1.39, "grad_norm": 0.49770307540893555, "learning_rate": 0.00033486766945840953, "loss": 1.721, "step": 41742 }, { "epoch": 1.39, "grad_norm": 0.48525741696357727, "learning_rate": 0.0003348572881563817, "loss": 1.79, "step": 41743 }, { "epoch": 1.39, "grad_norm": 0.48294514417648315, "learning_rate": 0.0003348469068120423, "loss": 1.7734, "step": 41744 }, { "epoch": 1.39, "grad_norm": 0.5099310874938965, "learning_rate": 0.00033483652542540374, "loss": 1.7996, "step": 41745 }, { "epoch": 1.39, "grad_norm": 0.48996368050575256, "learning_rate": 0.0003348261439964789, "loss": 1.8479, "step": 41746 }, { "epoch": 1.39, "grad_norm": 0.49906229972839355, "learning_rate": 0.00033481576252528014, "loss": 1.7449, "step": 41747 }, { "epoch": 1.39, "grad_norm": 0.4957764148712158, "learning_rate": 0.00033480538101182027, "loss": 1.8561, "step": 41748 }, { "epoch": 1.39, "grad_norm": 0.5069657564163208, "learning_rate": 0.00033479499945611186, "loss": 1.7722, "step": 41749 }, { "epoch": 1.39, "grad_norm": 0.5027160048484802, "learning_rate": 0.00033478461785816727, "loss": 1.7747, "step": 41750 }, { "epoch": 1.39, "grad_norm": 0.479915052652359, "learning_rate": 0.0003347742362179994, "loss": 1.7682, "step": 41751 }, { "epoch": 1.39, "grad_norm": 0.5155919194221497, "learning_rate": 0.0003347638545356206, "loss": 1.7691, "step": 41752 }, { "epoch": 1.39, "grad_norm": 0.4988376200199127, "learning_rate": 0.00033475347281104376, "loss": 1.7816, "step": 41753 }, { "epoch": 1.39, "grad_norm": 0.4887019097805023, "learning_rate": 0.00033474309104428125, "loss": 1.7119, "step": 41754 }, { "epoch": 1.39, "grad_norm": 0.5054497122764587, "learning_rate": 0.00033473270923534566, "loss": 1.7683, "step": 41755 }, { "epoch": 1.39, "grad_norm": 0.5050231218338013, "learning_rate": 0.0003347223273842499, "loss": 1.6823, "step": 41756 }, { "epoch": 1.39, "grad_norm": 0.4876585304737091, "learning_rate": 0.0003347119454910062, "loss": 1.7641, "step": 41757 }, { "epoch": 1.39, "grad_norm": 0.491100013256073, "learning_rate": 0.0003347015635556273, "loss": 1.765, "step": 41758 }, { "epoch": 1.39, "grad_norm": 0.49448078870773315, "learning_rate": 0.0003346911815781259, "loss": 1.7157, "step": 41759 }, { "epoch": 1.39, "grad_norm": 0.5095489025115967, "learning_rate": 0.0003346807995585144, "loss": 1.8193, "step": 41760 }, { "epoch": 1.39, "grad_norm": 0.4943469166755676, "learning_rate": 0.0003346704174968057, "loss": 1.8406, "step": 41761 }, { "epoch": 1.39, "grad_norm": 0.47625237703323364, "learning_rate": 0.000334660035393012, "loss": 1.7124, "step": 41762 }, { "epoch": 1.39, "grad_norm": 0.4963080883026123, "learning_rate": 0.0003346496532471463, "loss": 1.8449, "step": 41763 }, { "epoch": 1.39, "grad_norm": 0.48513343930244446, "learning_rate": 0.00033463927105922093, "loss": 1.7438, "step": 41764 }, { "epoch": 1.39, "grad_norm": 0.5055810809135437, "learning_rate": 0.0003346288888292486, "loss": 1.7709, "step": 41765 }, { "epoch": 1.39, "grad_norm": 0.5021825432777405, "learning_rate": 0.00033461850655724197, "loss": 1.8034, "step": 41766 }, { "epoch": 1.39, "grad_norm": 0.5250312089920044, "learning_rate": 0.00033460812424321345, "loss": 1.7732, "step": 41767 }, { "epoch": 1.39, "grad_norm": 0.49554240703582764, "learning_rate": 0.0003345977418871759, "loss": 1.7333, "step": 41768 }, { "epoch": 1.39, "grad_norm": 0.49731531739234924, "learning_rate": 0.0003345873594891417, "loss": 1.7617, "step": 41769 }, { "epoch": 1.39, "grad_norm": 0.4873136878013611, "learning_rate": 0.0003345769770491235, "loss": 1.7593, "step": 41770 }, { "epoch": 1.39, "grad_norm": 0.49208807945251465, "learning_rate": 0.0003345665945671341, "loss": 1.7326, "step": 41771 }, { "epoch": 1.39, "grad_norm": 0.521723210811615, "learning_rate": 0.0003345562120431858, "loss": 1.7471, "step": 41772 }, { "epoch": 1.39, "grad_norm": 0.49681875109672546, "learning_rate": 0.0003345458294772915, "loss": 1.7342, "step": 41773 }, { "epoch": 1.39, "grad_norm": 0.5130200982093811, "learning_rate": 0.0003345354468694636, "loss": 1.7664, "step": 41774 }, { "epoch": 1.39, "grad_norm": 0.505123496055603, "learning_rate": 0.0003345250642197147, "loss": 1.7881, "step": 41775 }, { "epoch": 1.39, "grad_norm": 0.4881182909011841, "learning_rate": 0.00033451468152805746, "loss": 1.8553, "step": 41776 }, { "epoch": 1.39, "grad_norm": 0.5221986174583435, "learning_rate": 0.00033450429879450445, "loss": 1.7455, "step": 41777 }, { "epoch": 1.39, "grad_norm": 0.509454607963562, "learning_rate": 0.0003344939160190684, "loss": 1.8109, "step": 41778 }, { "epoch": 1.39, "grad_norm": 0.4882926344871521, "learning_rate": 0.0003344835332017618, "loss": 1.7103, "step": 41779 }, { "epoch": 1.39, "grad_norm": 0.48899224400520325, "learning_rate": 0.0003344731503425972, "loss": 1.7855, "step": 41780 }, { "epoch": 1.39, "grad_norm": 0.49050548672676086, "learning_rate": 0.00033446276744158733, "loss": 1.7907, "step": 41781 }, { "epoch": 1.39, "grad_norm": 0.48854705691337585, "learning_rate": 0.0003344523844987447, "loss": 1.6737, "step": 41782 }, { "epoch": 1.39, "grad_norm": 0.5077090263366699, "learning_rate": 0.000334442001514082, "loss": 1.8161, "step": 41783 }, { "epoch": 1.39, "grad_norm": 0.5022361874580383, "learning_rate": 0.0003344316184876118, "loss": 1.7862, "step": 41784 }, { "epoch": 1.39, "grad_norm": 0.5106808543205261, "learning_rate": 0.0003344212354193466, "loss": 1.7206, "step": 41785 }, { "epoch": 1.39, "grad_norm": 0.508966863155365, "learning_rate": 0.0003344108523092992, "loss": 1.7488, "step": 41786 }, { "epoch": 1.39, "grad_norm": 0.5006806254386902, "learning_rate": 0.00033440046915748207, "loss": 1.7799, "step": 41787 }, { "epoch": 1.39, "grad_norm": 0.5192068219184875, "learning_rate": 0.00033439008596390776, "loss": 1.7867, "step": 41788 }, { "epoch": 1.39, "grad_norm": 0.5020155310630798, "learning_rate": 0.000334379702728589, "loss": 1.8204, "step": 41789 }, { "epoch": 1.39, "grad_norm": 0.5004869103431702, "learning_rate": 0.00033436931945153835, "loss": 1.6949, "step": 41790 }, { "epoch": 1.39, "grad_norm": 0.5037983059883118, "learning_rate": 0.00033435893613276846, "loss": 1.7374, "step": 41791 }, { "epoch": 1.39, "grad_norm": 0.5005943179130554, "learning_rate": 0.0003343485527722918, "loss": 1.6618, "step": 41792 }, { "epoch": 1.39, "grad_norm": 0.5050604343414307, "learning_rate": 0.0003343381693701211, "loss": 1.7709, "step": 41793 }, { "epoch": 1.39, "grad_norm": 0.48436853289604187, "learning_rate": 0.0003343277859262689, "loss": 1.7123, "step": 41794 }, { "epoch": 1.39, "grad_norm": 0.48461583256721497, "learning_rate": 0.0003343174024407478, "loss": 1.7784, "step": 41795 }, { "epoch": 1.39, "grad_norm": 0.508771538734436, "learning_rate": 0.0003343070189135705, "loss": 1.746, "step": 41796 }, { "epoch": 1.39, "grad_norm": 0.49261894822120667, "learning_rate": 0.0003342966353447495, "loss": 1.7042, "step": 41797 }, { "epoch": 1.39, "grad_norm": 0.5201311707496643, "learning_rate": 0.0003342862517342975, "loss": 1.7882, "step": 41798 }, { "epoch": 1.39, "grad_norm": 0.4968663156032562, "learning_rate": 0.000334275868082227, "loss": 1.7715, "step": 41799 }, { "epoch": 1.39, "grad_norm": 0.489046573638916, "learning_rate": 0.00033426548438855063, "loss": 1.7448, "step": 41800 }, { "epoch": 1.39, "grad_norm": 0.4946213960647583, "learning_rate": 0.000334255100653281, "loss": 1.827, "step": 41801 }, { "epoch": 1.39, "grad_norm": 0.5060030817985535, "learning_rate": 0.0003342447168764307, "loss": 1.8305, "step": 41802 }, { "epoch": 1.39, "grad_norm": 0.5003601908683777, "learning_rate": 0.0003342343330580124, "loss": 1.7758, "step": 41803 }, { "epoch": 1.39, "grad_norm": 0.49578386545181274, "learning_rate": 0.0003342239491980387, "loss": 1.7033, "step": 41804 }, { "epoch": 1.39, "grad_norm": 0.5006231665611267, "learning_rate": 0.00033421356529652214, "loss": 1.7551, "step": 41805 }, { "epoch": 1.39, "grad_norm": 0.5067020058631897, "learning_rate": 0.0003342031813534753, "loss": 1.7786, "step": 41806 }, { "epoch": 1.39, "grad_norm": 0.5049310922622681, "learning_rate": 0.0003341927973689109, "loss": 1.7946, "step": 41807 }, { "epoch": 1.39, "grad_norm": 0.48532363772392273, "learning_rate": 0.00033418241334284143, "loss": 1.8054, "step": 41808 }, { "epoch": 1.39, "grad_norm": 0.49592432379722595, "learning_rate": 0.00033417202927527956, "loss": 1.8478, "step": 41809 }, { "epoch": 1.39, "grad_norm": 0.48571333289146423, "learning_rate": 0.0003341616451662379, "loss": 1.7877, "step": 41810 }, { "epoch": 1.39, "grad_norm": 0.4902973771095276, "learning_rate": 0.0003341512610157291, "loss": 1.7711, "step": 41811 }, { "epoch": 1.39, "grad_norm": 0.5134711861610413, "learning_rate": 0.0003341408768237656, "loss": 1.8372, "step": 41812 }, { "epoch": 1.39, "grad_norm": 0.520551860332489, "learning_rate": 0.0003341304925903602, "loss": 1.7717, "step": 41813 }, { "epoch": 1.39, "grad_norm": 0.4981573820114136, "learning_rate": 0.0003341201083155253, "loss": 1.7249, "step": 41814 }, { "epoch": 1.39, "grad_norm": 0.5218812823295593, "learning_rate": 0.0003341097239992737, "loss": 1.7938, "step": 41815 }, { "epoch": 1.39, "grad_norm": 0.4977495074272156, "learning_rate": 0.0003340993396416179, "loss": 1.7833, "step": 41816 }, { "epoch": 1.39, "grad_norm": 0.5074940919876099, "learning_rate": 0.00033408895524257045, "loss": 1.8047, "step": 41817 }, { "epoch": 1.39, "grad_norm": 0.5072183012962341, "learning_rate": 0.0003340785708021441, "loss": 1.7898, "step": 41818 }, { "epoch": 1.39, "grad_norm": 0.5043652653694153, "learning_rate": 0.00033406818632035143, "loss": 1.8445, "step": 41819 }, { "epoch": 1.39, "grad_norm": 0.5141441226005554, "learning_rate": 0.000334057801797205, "loss": 1.7671, "step": 41820 }, { "epoch": 1.39, "grad_norm": 0.5025688409805298, "learning_rate": 0.00033404741723271737, "loss": 1.8468, "step": 41821 }, { "epoch": 1.39, "grad_norm": 0.5014854073524475, "learning_rate": 0.0003340370326269011, "loss": 1.7764, "step": 41822 }, { "epoch": 1.39, "grad_norm": 0.49949589371681213, "learning_rate": 0.00033402664797976903, "loss": 1.7686, "step": 41823 }, { "epoch": 1.39, "grad_norm": 0.5070407390594482, "learning_rate": 0.00033401626329133353, "loss": 1.7995, "step": 41824 }, { "epoch": 1.39, "grad_norm": 0.5156118273735046, "learning_rate": 0.0003340058785616074, "loss": 1.7762, "step": 41825 }, { "epoch": 1.39, "grad_norm": 0.4966702461242676, "learning_rate": 0.00033399549379060304, "loss": 1.7681, "step": 41826 }, { "epoch": 1.39, "grad_norm": 0.5122726559638977, "learning_rate": 0.0003339851089783332, "loss": 1.8078, "step": 41827 }, { "epoch": 1.39, "grad_norm": 0.48108407855033875, "learning_rate": 0.0003339747241248106, "loss": 1.7368, "step": 41828 }, { "epoch": 1.39, "grad_norm": 0.4795493483543396, "learning_rate": 0.0003339643392300474, "loss": 1.8108, "step": 41829 }, { "epoch": 1.39, "grad_norm": 0.5130068063735962, "learning_rate": 0.0003339539542940567, "loss": 1.7884, "step": 41830 }, { "epoch": 1.39, "grad_norm": 0.49738040566444397, "learning_rate": 0.00033394356931685084, "loss": 1.773, "step": 41831 }, { "epoch": 1.39, "grad_norm": 0.49258142709732056, "learning_rate": 0.0003339331842984425, "loss": 1.7287, "step": 41832 }, { "epoch": 1.39, "grad_norm": 0.4999368190765381, "learning_rate": 0.0003339227992388442, "loss": 1.8163, "step": 41833 }, { "epoch": 1.39, "grad_norm": 0.5002772808074951, "learning_rate": 0.0003339124141380687, "loss": 1.7413, "step": 41834 }, { "epoch": 1.39, "grad_norm": 0.4912000596523285, "learning_rate": 0.00033390202899612854, "loss": 1.7192, "step": 41835 }, { "epoch": 1.39, "grad_norm": 0.4919470250606537, "learning_rate": 0.0003338916438130363, "loss": 1.777, "step": 41836 }, { "epoch": 1.39, "grad_norm": 0.5086730718612671, "learning_rate": 0.00033388125858880457, "loss": 1.8152, "step": 41837 }, { "epoch": 1.39, "grad_norm": 0.48796766996383667, "learning_rate": 0.000333870873323446, "loss": 1.7141, "step": 41838 }, { "epoch": 1.39, "grad_norm": 0.5170508027076721, "learning_rate": 0.00033386048801697314, "loss": 1.8051, "step": 41839 }, { "epoch": 1.39, "grad_norm": 0.5131893754005432, "learning_rate": 0.0003338501026693987, "loss": 1.7624, "step": 41840 }, { "epoch": 1.39, "grad_norm": 0.5009726881980896, "learning_rate": 0.00033383971728073524, "loss": 1.815, "step": 41841 }, { "epoch": 1.39, "grad_norm": 0.5063621401786804, "learning_rate": 0.00033382933185099526, "loss": 1.7808, "step": 41842 }, { "epoch": 1.39, "grad_norm": 0.5117331743240356, "learning_rate": 0.0003338189463801915, "loss": 1.7704, "step": 41843 }, { "epoch": 1.39, "grad_norm": 0.49823442101478577, "learning_rate": 0.00033380856086833653, "loss": 1.7855, "step": 41844 }, { "epoch": 1.39, "grad_norm": 0.5141129493713379, "learning_rate": 0.00033379817531544294, "loss": 1.7999, "step": 41845 }, { "epoch": 1.39, "grad_norm": 0.49411436915397644, "learning_rate": 0.0003337877897215234, "loss": 1.7241, "step": 41846 }, { "epoch": 1.39, "grad_norm": 0.5080768465995789, "learning_rate": 0.0003337774040865903, "loss": 1.7809, "step": 41847 }, { "epoch": 1.39, "grad_norm": 0.5114791393280029, "learning_rate": 0.0003337670184106566, "loss": 1.7433, "step": 41848 }, { "epoch": 1.39, "grad_norm": 0.49591076374053955, "learning_rate": 0.00033375663269373464, "loss": 1.7584, "step": 41849 }, { "epoch": 1.39, "grad_norm": 0.5177114605903625, "learning_rate": 0.00033374624693583707, "loss": 1.7921, "step": 41850 }, { "epoch": 1.39, "grad_norm": 0.5032029747962952, "learning_rate": 0.00033373586113697655, "loss": 1.7467, "step": 41851 }, { "epoch": 1.39, "grad_norm": 0.5266930460929871, "learning_rate": 0.00033372547529716566, "loss": 1.8057, "step": 41852 }, { "epoch": 1.39, "grad_norm": 0.5312644839286804, "learning_rate": 0.00033371508941641703, "loss": 1.7938, "step": 41853 }, { "epoch": 1.39, "grad_norm": 0.538227915763855, "learning_rate": 0.00033370470349474313, "loss": 1.8046, "step": 41854 }, { "epoch": 1.39, "grad_norm": 0.5090020298957825, "learning_rate": 0.0003336943175321569, "loss": 1.7665, "step": 41855 }, { "epoch": 1.39, "grad_norm": 0.520077645778656, "learning_rate": 0.0003336839315286706, "loss": 1.7606, "step": 41856 }, { "epoch": 1.39, "grad_norm": 0.5164088606834412, "learning_rate": 0.000333673545484297, "loss": 1.8085, "step": 41857 }, { "epoch": 1.39, "grad_norm": 0.5248380899429321, "learning_rate": 0.00033366315939904864, "loss": 1.7568, "step": 41858 }, { "epoch": 1.39, "grad_norm": 0.5275463461875916, "learning_rate": 0.00033365277327293823, "loss": 1.8053, "step": 41859 }, { "epoch": 1.39, "grad_norm": 0.4957389533519745, "learning_rate": 0.0003336423871059783, "loss": 1.7559, "step": 41860 }, { "epoch": 1.39, "grad_norm": 0.5033659338951111, "learning_rate": 0.0003336320008981815, "loss": 1.844, "step": 41861 }, { "epoch": 1.39, "grad_norm": 0.5195667743682861, "learning_rate": 0.00033362161464956034, "loss": 1.7633, "step": 41862 }, { "epoch": 1.39, "grad_norm": 0.5147815346717834, "learning_rate": 0.00033361122836012745, "loss": 1.8092, "step": 41863 }, { "epoch": 1.39, "grad_norm": 0.5264580249786377, "learning_rate": 0.00033360084202989555, "loss": 1.7863, "step": 41864 }, { "epoch": 1.39, "grad_norm": 0.4956776201725006, "learning_rate": 0.00033359045565887715, "loss": 1.7405, "step": 41865 }, { "epoch": 1.39, "grad_norm": 0.4923410415649414, "learning_rate": 0.00033358006924708494, "loss": 1.8225, "step": 41866 }, { "epoch": 1.39, "grad_norm": 0.4968906044960022, "learning_rate": 0.00033356968279453145, "loss": 1.7043, "step": 41867 }, { "epoch": 1.39, "grad_norm": 0.5013249516487122, "learning_rate": 0.0003335592963012293, "loss": 1.7412, "step": 41868 }, { "epoch": 1.39, "grad_norm": 0.4972887933254242, "learning_rate": 0.00033354890976719113, "loss": 1.753, "step": 41869 }, { "epoch": 1.39, "grad_norm": 0.5133589506149292, "learning_rate": 0.00033353852319242946, "loss": 1.8264, "step": 41870 }, { "epoch": 1.39, "grad_norm": 0.5000714659690857, "learning_rate": 0.0003335281365769571, "loss": 1.7048, "step": 41871 }, { "epoch": 1.39, "grad_norm": 0.5238649845123291, "learning_rate": 0.0003335177499207863, "loss": 1.8247, "step": 41872 }, { "epoch": 1.39, "grad_norm": 0.5196772813796997, "learning_rate": 0.0003335073632239301, "loss": 1.8842, "step": 41873 }, { "epoch": 1.39, "grad_norm": 0.5110229849815369, "learning_rate": 0.00033349697648640077, "loss": 1.7948, "step": 41874 }, { "epoch": 1.39, "grad_norm": 0.502409815788269, "learning_rate": 0.0003334865897082111, "loss": 1.8167, "step": 41875 }, { "epoch": 1.39, "grad_norm": 0.49489259719848633, "learning_rate": 0.0003334762028893737, "loss": 1.7252, "step": 41876 }, { "epoch": 1.39, "grad_norm": 0.5081585049629211, "learning_rate": 0.000333465816029901, "loss": 1.7207, "step": 41877 }, { "epoch": 1.39, "grad_norm": 0.49450451135635376, "learning_rate": 0.0003334554291298059, "loss": 1.8189, "step": 41878 }, { "epoch": 1.39, "grad_norm": 0.5219576954841614, "learning_rate": 0.0003334450421891006, "loss": 1.7146, "step": 41879 }, { "epoch": 1.39, "grad_norm": 0.5159416198730469, "learning_rate": 0.00033343465520779814, "loss": 1.8252, "step": 41880 }, { "epoch": 1.39, "grad_norm": 0.5204174518585205, "learning_rate": 0.00033342426818591084, "loss": 1.7897, "step": 41881 }, { "epoch": 1.39, "grad_norm": 0.5040854811668396, "learning_rate": 0.00033341388112345145, "loss": 1.7675, "step": 41882 }, { "epoch": 1.39, "grad_norm": 0.49238279461860657, "learning_rate": 0.0003334034940204326, "loss": 1.8134, "step": 41883 }, { "epoch": 1.39, "grad_norm": 0.5257524847984314, "learning_rate": 0.00033339310687686665, "loss": 1.776, "step": 41884 }, { "epoch": 1.39, "grad_norm": 0.4992397129535675, "learning_rate": 0.00033338271969276655, "loss": 1.7984, "step": 41885 }, { "epoch": 1.39, "grad_norm": 0.5185253620147705, "learning_rate": 0.00033337233246814467, "loss": 1.8371, "step": 41886 }, { "epoch": 1.39, "grad_norm": 0.5218356847763062, "learning_rate": 0.0003333619452030137, "loss": 1.8662, "step": 41887 }, { "epoch": 1.39, "grad_norm": 0.5390727519989014, "learning_rate": 0.00033335155789738623, "loss": 1.7916, "step": 41888 }, { "epoch": 1.39, "grad_norm": 0.5103450417518616, "learning_rate": 0.00033334117055127494, "loss": 1.7898, "step": 41889 }, { "epoch": 1.39, "grad_norm": 0.5094696283340454, "learning_rate": 0.0003333307831646924, "loss": 1.8574, "step": 41890 }, { "epoch": 1.39, "grad_norm": 0.5095478892326355, "learning_rate": 0.00033332039573765116, "loss": 1.73, "step": 41891 }, { "epoch": 1.39, "grad_norm": 0.4839746952056885, "learning_rate": 0.00033331000827016376, "loss": 1.8483, "step": 41892 }, { "epoch": 1.39, "grad_norm": 0.5104680061340332, "learning_rate": 0.00033329962076224305, "loss": 1.657, "step": 41893 }, { "epoch": 1.39, "grad_norm": 0.5045233964920044, "learning_rate": 0.0003332892332139015, "loss": 1.7445, "step": 41894 }, { "epoch": 1.39, "grad_norm": 0.5031800866127014, "learning_rate": 0.00033327884562515167, "loss": 1.7978, "step": 41895 }, { "epoch": 1.39, "grad_norm": 0.511164128780365, "learning_rate": 0.00033326845799600625, "loss": 1.7354, "step": 41896 }, { "epoch": 1.39, "grad_norm": 0.5145858526229858, "learning_rate": 0.0003332580703264779, "loss": 1.7319, "step": 41897 }, { "epoch": 1.39, "grad_norm": 0.4856346547603607, "learning_rate": 0.00033324768261657904, "loss": 1.8067, "step": 41898 }, { "epoch": 1.39, "grad_norm": 0.5032132267951965, "learning_rate": 0.0003332372948663224, "loss": 1.8129, "step": 41899 }, { "epoch": 1.39, "grad_norm": 0.5040461421012878, "learning_rate": 0.00033322690707572063, "loss": 1.7264, "step": 41900 }, { "epoch": 1.39, "grad_norm": 0.4983239769935608, "learning_rate": 0.00033321651924478633, "loss": 1.9142, "step": 41901 }, { "epoch": 1.39, "grad_norm": 0.4924158453941345, "learning_rate": 0.00033320613137353196, "loss": 1.6989, "step": 41902 }, { "epoch": 1.39, "grad_norm": 0.504672110080719, "learning_rate": 0.0003331957434619703, "loss": 1.7667, "step": 41903 }, { "epoch": 1.39, "grad_norm": 0.514257550239563, "learning_rate": 0.0003331853555101139, "loss": 1.6917, "step": 41904 }, { "epoch": 1.39, "grad_norm": 0.49661698937416077, "learning_rate": 0.00033317496751797534, "loss": 1.7625, "step": 41905 }, { "epoch": 1.39, "grad_norm": 0.494027704000473, "learning_rate": 0.0003331645794855673, "loss": 1.7725, "step": 41906 }, { "epoch": 1.39, "grad_norm": 0.483999103307724, "learning_rate": 0.0003331541914129023, "loss": 1.8273, "step": 41907 }, { "epoch": 1.39, "grad_norm": 0.5133509635925293, "learning_rate": 0.0003331438032999931, "loss": 1.8504, "step": 41908 }, { "epoch": 1.39, "grad_norm": 0.49235624074935913, "learning_rate": 0.00033313341514685206, "loss": 1.8085, "step": 41909 }, { "epoch": 1.39, "grad_norm": 0.4878655672073364, "learning_rate": 0.00033312302695349207, "loss": 1.8391, "step": 41910 }, { "epoch": 1.39, "grad_norm": 0.4921368658542633, "learning_rate": 0.00033311263871992547, "loss": 1.733, "step": 41911 }, { "epoch": 1.39, "grad_norm": 0.4886142313480377, "learning_rate": 0.00033310225044616505, "loss": 1.7856, "step": 41912 }, { "epoch": 1.39, "grad_norm": 0.48994171619415283, "learning_rate": 0.0003330918621322234, "loss": 1.8279, "step": 41913 }, { "epoch": 1.39, "grad_norm": 0.49251630902290344, "learning_rate": 0.0003330814737781131, "loss": 1.8629, "step": 41914 }, { "epoch": 1.39, "grad_norm": 0.5052675604820251, "learning_rate": 0.00033307108538384685, "loss": 1.7919, "step": 41915 }, { "epoch": 1.39, "grad_norm": 0.4864823520183563, "learning_rate": 0.0003330606969494371, "loss": 1.7574, "step": 41916 }, { "epoch": 1.39, "grad_norm": 0.4998771846294403, "learning_rate": 0.0003330503084748965, "loss": 1.7549, "step": 41917 }, { "epoch": 1.39, "grad_norm": 0.4823308289051056, "learning_rate": 0.0003330399199602377, "loss": 1.7789, "step": 41918 }, { "epoch": 1.39, "grad_norm": 0.4918832778930664, "learning_rate": 0.0003330295314054733, "loss": 1.7514, "step": 41919 }, { "epoch": 1.39, "grad_norm": 0.49764132499694824, "learning_rate": 0.00033301914281061597, "loss": 1.7321, "step": 41920 }, { "epoch": 1.39, "grad_norm": 0.5022306442260742, "learning_rate": 0.00033300875417567827, "loss": 1.7452, "step": 41921 }, { "epoch": 1.39, "grad_norm": 0.4950302541255951, "learning_rate": 0.0003329983655006728, "loss": 1.8417, "step": 41922 }, { "epoch": 1.39, "grad_norm": 0.5160670280456543, "learning_rate": 0.0003329879767856121, "loss": 1.7403, "step": 41923 }, { "epoch": 1.39, "grad_norm": 0.4891444146633148, "learning_rate": 0.0003329775880305089, "loss": 1.7931, "step": 41924 }, { "epoch": 1.39, "grad_norm": 0.5111526846885681, "learning_rate": 0.0003329671992353758, "loss": 1.8734, "step": 41925 }, { "epoch": 1.39, "grad_norm": 0.502790629863739, "learning_rate": 0.0003329568104002254, "loss": 1.7826, "step": 41926 }, { "epoch": 1.39, "grad_norm": 0.5005033016204834, "learning_rate": 0.00033294642152507015, "loss": 1.8457, "step": 41927 }, { "epoch": 1.39, "grad_norm": 0.5054575204849243, "learning_rate": 0.000332936032609923, "loss": 1.7784, "step": 41928 }, { "epoch": 1.39, "grad_norm": 0.4933624863624573, "learning_rate": 0.0003329256436547962, "loss": 1.7773, "step": 41929 }, { "epoch": 1.4, "grad_norm": 0.5553068518638611, "learning_rate": 0.0003329152546597026, "loss": 1.675, "step": 41930 }, { "epoch": 1.4, "grad_norm": 0.5162741541862488, "learning_rate": 0.00033290486562465466, "loss": 1.8316, "step": 41931 }, { "epoch": 1.4, "grad_norm": 0.4953414499759674, "learning_rate": 0.0003328944765496651, "loss": 1.7884, "step": 41932 }, { "epoch": 1.4, "grad_norm": 0.5049472451210022, "learning_rate": 0.00033288408743474654, "loss": 1.7903, "step": 41933 }, { "epoch": 1.4, "grad_norm": 0.5129507780075073, "learning_rate": 0.00033287369827991146, "loss": 1.7519, "step": 41934 }, { "epoch": 1.4, "grad_norm": 0.5343165397644043, "learning_rate": 0.00033286330908517263, "loss": 1.7684, "step": 41935 }, { "epoch": 1.4, "grad_norm": 0.5255401730537415, "learning_rate": 0.00033285291985054254, "loss": 1.8294, "step": 41936 }, { "epoch": 1.4, "grad_norm": 0.5021218061447144, "learning_rate": 0.00033284253057603387, "loss": 1.7481, "step": 41937 }, { "epoch": 1.4, "grad_norm": 0.5100415349006653, "learning_rate": 0.00033283214126165924, "loss": 1.8195, "step": 41938 }, { "epoch": 1.4, "grad_norm": 0.4934617877006531, "learning_rate": 0.0003328217519074311, "loss": 1.7624, "step": 41939 }, { "epoch": 1.4, "grad_norm": 0.5076062083244324, "learning_rate": 0.00033281136251336234, "loss": 1.7115, "step": 41940 }, { "epoch": 1.4, "grad_norm": 0.5077019929885864, "learning_rate": 0.0003328009730794652, "loss": 1.7292, "step": 41941 }, { "epoch": 1.4, "grad_norm": 0.5165790915489197, "learning_rate": 0.0003327905836057528, "loss": 1.6569, "step": 41942 }, { "epoch": 1.4, "grad_norm": 0.5021933913230896, "learning_rate": 0.0003327801940922373, "loss": 1.7335, "step": 41943 }, { "epoch": 1.4, "grad_norm": 0.5360188484191895, "learning_rate": 0.0003327698045389315, "loss": 1.825, "step": 41944 }, { "epoch": 1.4, "grad_norm": 0.5167077779769897, "learning_rate": 0.00033275941494584804, "loss": 1.7618, "step": 41945 }, { "epoch": 1.4, "grad_norm": 0.502515435218811, "learning_rate": 0.00033274902531299933, "loss": 1.7553, "step": 41946 }, { "epoch": 1.4, "grad_norm": 0.4922633469104767, "learning_rate": 0.0003327386356403983, "loss": 1.8122, "step": 41947 }, { "epoch": 1.4, "grad_norm": 0.4873351454734802, "learning_rate": 0.0003327282459280573, "loss": 1.7064, "step": 41948 }, { "epoch": 1.4, "grad_norm": 0.5299627780914307, "learning_rate": 0.000332717856175989, "loss": 1.8313, "step": 41949 }, { "epoch": 1.4, "grad_norm": 0.5131829977035522, "learning_rate": 0.00033270746638420606, "loss": 1.7537, "step": 41950 }, { "epoch": 1.4, "grad_norm": 0.5067761540412903, "learning_rate": 0.0003326970765527211, "loss": 1.737, "step": 41951 }, { "epoch": 1.4, "grad_norm": 0.5009058713912964, "learning_rate": 0.00033268668668154675, "loss": 1.7627, "step": 41952 }, { "epoch": 1.4, "grad_norm": 0.515813410282135, "learning_rate": 0.00033267629677069554, "loss": 1.7541, "step": 41953 }, { "epoch": 1.4, "grad_norm": 0.5569507479667664, "learning_rate": 0.00033266590682018015, "loss": 1.8273, "step": 41954 }, { "epoch": 1.4, "grad_norm": 0.5215129256248474, "learning_rate": 0.00033265551683001304, "loss": 1.7972, "step": 41955 }, { "epoch": 1.4, "grad_norm": 0.499878466129303, "learning_rate": 0.00033264512680020706, "loss": 1.7696, "step": 41956 }, { "epoch": 1.4, "grad_norm": 0.5021042823791504, "learning_rate": 0.00033263473673077464, "loss": 1.7924, "step": 41957 }, { "epoch": 1.4, "grad_norm": 0.49001842737197876, "learning_rate": 0.0003326243466217286, "loss": 1.5956, "step": 41958 }, { "epoch": 1.4, "grad_norm": 0.5256435871124268, "learning_rate": 0.0003326139564730813, "loss": 1.7953, "step": 41959 }, { "epoch": 1.4, "grad_norm": 0.49591708183288574, "learning_rate": 0.0003326035662848454, "loss": 1.8032, "step": 41960 }, { "epoch": 1.4, "grad_norm": 0.49735206365585327, "learning_rate": 0.0003325931760570337, "loss": 1.7709, "step": 41961 }, { "epoch": 1.4, "grad_norm": 3.151090383529663, "learning_rate": 0.0003325827857896586, "loss": 1.8336, "step": 41962 }, { "epoch": 1.4, "grad_norm": 0.49747830629348755, "learning_rate": 0.0003325723954827329, "loss": 1.7499, "step": 41963 }, { "epoch": 1.4, "grad_norm": 0.5526450276374817, "learning_rate": 0.00033256200513626897, "loss": 1.7196, "step": 41964 }, { "epoch": 1.4, "grad_norm": 0.524448812007904, "learning_rate": 0.00033255161475027965, "loss": 1.7683, "step": 41965 }, { "epoch": 1.4, "grad_norm": 0.5196512937545776, "learning_rate": 0.0003325412243247775, "loss": 1.724, "step": 41966 }, { "epoch": 1.4, "grad_norm": 0.5102103352546692, "learning_rate": 0.000332530833859775, "loss": 1.7475, "step": 41967 }, { "epoch": 1.4, "grad_norm": 0.522453784942627, "learning_rate": 0.0003325204433552849, "loss": 1.808, "step": 41968 }, { "epoch": 1.4, "grad_norm": 0.5157709121704102, "learning_rate": 0.00033251005281131977, "loss": 1.764, "step": 41969 }, { "epoch": 1.4, "grad_norm": 0.5453618764877319, "learning_rate": 0.0003324996622278923, "loss": 1.788, "step": 41970 }, { "epoch": 1.4, "grad_norm": 0.5021277666091919, "learning_rate": 0.00033248927160501484, "loss": 1.7687, "step": 41971 }, { "epoch": 1.4, "grad_norm": 0.4904763698577881, "learning_rate": 0.00033247888094270044, "loss": 1.7325, "step": 41972 }, { "epoch": 1.4, "grad_norm": 0.515831708908081, "learning_rate": 0.0003324684902409613, "loss": 1.8127, "step": 41973 }, { "epoch": 1.4, "grad_norm": 0.49158284068107605, "learning_rate": 0.00033245809949981024, "loss": 1.7604, "step": 41974 }, { "epoch": 1.4, "grad_norm": 0.5090367197990417, "learning_rate": 0.00033244770871925986, "loss": 1.7888, "step": 41975 }, { "epoch": 1.4, "grad_norm": 0.505277693271637, "learning_rate": 0.00033243731789932263, "loss": 1.7201, "step": 41976 }, { "epoch": 1.4, "grad_norm": 0.5070613026618958, "learning_rate": 0.00033242692704001147, "loss": 1.799, "step": 41977 }, { "epoch": 1.4, "grad_norm": 0.5187702178955078, "learning_rate": 0.0003324165361413386, "loss": 1.8155, "step": 41978 }, { "epoch": 1.4, "grad_norm": 0.5221267342567444, "learning_rate": 0.00033240614520331697, "loss": 1.7682, "step": 41979 }, { "epoch": 1.4, "grad_norm": 0.5086925625801086, "learning_rate": 0.000332395754225959, "loss": 1.8166, "step": 41980 }, { "epoch": 1.4, "grad_norm": 0.5138535499572754, "learning_rate": 0.00033238536320927736, "loss": 1.8085, "step": 41981 }, { "epoch": 1.4, "grad_norm": 0.5062696933746338, "learning_rate": 0.00033237497215328466, "loss": 1.8427, "step": 41982 }, { "epoch": 1.4, "grad_norm": 0.5352215766906738, "learning_rate": 0.00033236458105799353, "loss": 1.7832, "step": 41983 }, { "epoch": 1.4, "grad_norm": 0.500528872013092, "learning_rate": 0.00033235418992341655, "loss": 1.7617, "step": 41984 }, { "epoch": 1.4, "grad_norm": 0.559731662273407, "learning_rate": 0.0003323437987495663, "loss": 1.7636, "step": 41985 }, { "epoch": 1.4, "grad_norm": 0.4953387975692749, "learning_rate": 0.00033233340753645557, "loss": 1.8223, "step": 41986 }, { "epoch": 1.4, "grad_norm": 0.4943423569202423, "learning_rate": 0.0003323230162840967, "loss": 1.7752, "step": 41987 }, { "epoch": 1.4, "grad_norm": 1.595475673675537, "learning_rate": 0.00033231262499250255, "loss": 1.7955, "step": 41988 }, { "epoch": 1.4, "grad_norm": 0.5588576793670654, "learning_rate": 0.00033230223366168557, "loss": 1.6889, "step": 41989 }, { "epoch": 1.4, "grad_norm": 0.5386136770248413, "learning_rate": 0.0003322918422916585, "loss": 1.7014, "step": 41990 }, { "epoch": 1.4, "grad_norm": 0.49528658390045166, "learning_rate": 0.0003322814508824339, "loss": 1.6806, "step": 41991 }, { "epoch": 1.4, "grad_norm": 0.5137693881988525, "learning_rate": 0.0003322710594340243, "loss": 1.7032, "step": 41992 }, { "epoch": 1.4, "grad_norm": 0.5266178250312805, "learning_rate": 0.0003322606679464425, "loss": 1.7641, "step": 41993 }, { "epoch": 1.4, "grad_norm": 0.5327718257904053, "learning_rate": 0.0003322502764197009, "loss": 1.7805, "step": 41994 }, { "epoch": 1.4, "grad_norm": 0.5086456537246704, "learning_rate": 0.0003322398848538123, "loss": 1.7978, "step": 41995 }, { "epoch": 1.4, "grad_norm": 0.4978325664997101, "learning_rate": 0.0003322294932487891, "loss": 1.7935, "step": 41996 }, { "epoch": 1.4, "grad_norm": 0.5228381752967834, "learning_rate": 0.00033221910160464416, "loss": 1.8513, "step": 41997 }, { "epoch": 1.4, "grad_norm": 0.4847370982170105, "learning_rate": 0.00033220870992138997, "loss": 1.7653, "step": 41998 }, { "epoch": 1.4, "grad_norm": 0.5136037468910217, "learning_rate": 0.0003321983181990391, "loss": 1.8047, "step": 41999 }, { "epoch": 1.4, "grad_norm": 0.49412667751312256, "learning_rate": 0.0003321879264376043, "loss": 1.7725, "step": 42000 }, { "epoch": 1.4, "grad_norm": 0.49500784277915955, "learning_rate": 0.00033217753463709795, "loss": 1.7801, "step": 42001 }, { "epoch": 1.4, "grad_norm": 0.49232906103134155, "learning_rate": 0.00033216714279753293, "loss": 1.7082, "step": 42002 }, { "epoch": 1.4, "grad_norm": 0.4950210154056549, "learning_rate": 0.0003321567509189217, "loss": 1.7019, "step": 42003 }, { "epoch": 1.4, "grad_norm": 0.503520667552948, "learning_rate": 0.00033214635900127696, "loss": 1.7507, "step": 42004 }, { "epoch": 1.4, "grad_norm": 0.4929406940937042, "learning_rate": 0.00033213596704461117, "loss": 1.7825, "step": 42005 }, { "epoch": 1.4, "grad_norm": 0.5072489380836487, "learning_rate": 0.00033212557504893713, "loss": 1.8663, "step": 42006 }, { "epoch": 1.4, "grad_norm": 0.49467912316322327, "learning_rate": 0.0003321151830142675, "loss": 1.7765, "step": 42007 }, { "epoch": 1.4, "grad_norm": 0.5121731162071228, "learning_rate": 0.0003321047909406146, "loss": 1.7652, "step": 42008 }, { "epoch": 1.4, "grad_norm": 0.5166715979576111, "learning_rate": 0.0003320943988279912, "loss": 1.7198, "step": 42009 }, { "epoch": 1.4, "grad_norm": 0.5035908818244934, "learning_rate": 0.00033208400667641004, "loss": 1.7991, "step": 42010 }, { "epoch": 1.4, "grad_norm": 0.4942023456096649, "learning_rate": 0.0003320736144858835, "loss": 1.7881, "step": 42011 }, { "epoch": 1.4, "grad_norm": 0.497560977935791, "learning_rate": 0.0003320632222564244, "loss": 1.7124, "step": 42012 }, { "epoch": 1.4, "grad_norm": 0.503953754901886, "learning_rate": 0.00033205282998804525, "loss": 1.8707, "step": 42013 }, { "epoch": 1.4, "grad_norm": 0.5044457316398621, "learning_rate": 0.00033204243768075877, "loss": 1.7421, "step": 42014 }, { "epoch": 1.4, "grad_norm": 0.5040377974510193, "learning_rate": 0.00033203204533457736, "loss": 1.8056, "step": 42015 }, { "epoch": 1.4, "grad_norm": 0.49967169761657715, "learning_rate": 0.0003320216529495138, "loss": 1.6578, "step": 42016 }, { "epoch": 1.4, "grad_norm": 0.49793747067451477, "learning_rate": 0.0003320112605255807, "loss": 1.7333, "step": 42017 }, { "epoch": 1.4, "grad_norm": 0.4940529465675354, "learning_rate": 0.0003320008680627907, "loss": 1.7458, "step": 42018 }, { "epoch": 1.4, "grad_norm": 0.5082384943962097, "learning_rate": 0.0003319904755611562, "loss": 1.8076, "step": 42019 }, { "epoch": 1.4, "grad_norm": 0.484408438205719, "learning_rate": 0.0003319800830206901, "loss": 1.7648, "step": 42020 }, { "epoch": 1.4, "grad_norm": 0.49972042441368103, "learning_rate": 0.0003319696904414049, "loss": 1.7424, "step": 42021 }, { "epoch": 1.4, "grad_norm": 0.5075080394744873, "learning_rate": 0.0003319592978233131, "loss": 1.759, "step": 42022 }, { "epoch": 1.4, "grad_norm": 0.49148574471473694, "learning_rate": 0.00033194890516642753, "loss": 1.7269, "step": 42023 }, { "epoch": 1.4, "grad_norm": 0.48817178606987, "learning_rate": 0.00033193851247076064, "loss": 1.7482, "step": 42024 }, { "epoch": 1.4, "grad_norm": 0.506533145904541, "learning_rate": 0.00033192811973632524, "loss": 1.788, "step": 42025 }, { "epoch": 1.4, "grad_norm": 0.4860576093196869, "learning_rate": 0.0003319177269631336, "loss": 1.7606, "step": 42026 }, { "epoch": 1.4, "grad_norm": 0.5172763466835022, "learning_rate": 0.0003319073341511987, "loss": 1.834, "step": 42027 }, { "epoch": 1.4, "grad_norm": 0.49542632699012756, "learning_rate": 0.0003318969413005329, "loss": 1.8511, "step": 42028 }, { "epoch": 1.4, "grad_norm": 0.48517197370529175, "learning_rate": 0.00033188654841114894, "loss": 1.8098, "step": 42029 }, { "epoch": 1.4, "grad_norm": 0.4780574142932892, "learning_rate": 0.0003318761554830594, "loss": 1.7507, "step": 42030 }, { "epoch": 1.4, "grad_norm": 0.5008754134178162, "learning_rate": 0.0003318657625162769, "loss": 1.7308, "step": 42031 }, { "epoch": 1.4, "grad_norm": 0.5005370378494263, "learning_rate": 0.00033185536951081413, "loss": 1.7034, "step": 42032 }, { "epoch": 1.4, "grad_norm": 0.4926919937133789, "learning_rate": 0.00033184497646668353, "loss": 1.6873, "step": 42033 }, { "epoch": 1.4, "grad_norm": 0.5168308019638062, "learning_rate": 0.00033183458338389787, "loss": 1.8282, "step": 42034 }, { "epoch": 1.4, "grad_norm": 0.5113138556480408, "learning_rate": 0.0003318241902624697, "loss": 1.7928, "step": 42035 }, { "epoch": 1.4, "grad_norm": 0.491703063249588, "learning_rate": 0.0003318137971024117, "loss": 1.7754, "step": 42036 }, { "epoch": 1.4, "grad_norm": 0.5080867409706116, "learning_rate": 0.00033180340390373634, "loss": 1.7619, "step": 42037 }, { "epoch": 1.4, "grad_norm": 0.5247307419776917, "learning_rate": 0.0003317930106664565, "loss": 1.7758, "step": 42038 }, { "epoch": 1.4, "grad_norm": 0.5294029712677002, "learning_rate": 0.00033178261739058454, "loss": 1.8266, "step": 42039 }, { "epoch": 1.4, "grad_norm": 0.505886435508728, "learning_rate": 0.0003317722240761331, "loss": 1.7123, "step": 42040 }, { "epoch": 1.4, "grad_norm": 0.5108317732810974, "learning_rate": 0.0003317618307231149, "loss": 1.7905, "step": 42041 }, { "epoch": 1.4, "grad_norm": 0.49406999349594116, "learning_rate": 0.00033175143733154254, "loss": 1.7126, "step": 42042 }, { "epoch": 1.4, "grad_norm": 0.4921790063381195, "learning_rate": 0.0003317410439014287, "loss": 1.8378, "step": 42043 }, { "epoch": 1.4, "grad_norm": 0.49894222617149353, "learning_rate": 0.0003317306504327857, "loss": 1.7933, "step": 42044 }, { "epoch": 1.4, "grad_norm": 0.5045177936553955, "learning_rate": 0.0003317202569256266, "loss": 1.7289, "step": 42045 }, { "epoch": 1.4, "grad_norm": 0.5036767721176147, "learning_rate": 0.00033170986337996357, "loss": 1.7442, "step": 42046 }, { "epoch": 1.4, "grad_norm": 0.518338143825531, "learning_rate": 0.00033169946979580954, "loss": 1.755, "step": 42047 }, { "epoch": 1.4, "grad_norm": 0.4957018494606018, "learning_rate": 0.0003316890761731771, "loss": 1.7272, "step": 42048 }, { "epoch": 1.4, "grad_norm": 0.5031849145889282, "learning_rate": 0.00033167868251207866, "loss": 1.8129, "step": 42049 }, { "epoch": 1.4, "grad_norm": 0.4928288757801056, "learning_rate": 0.00033166828881252703, "loss": 1.7444, "step": 42050 }, { "epoch": 1.4, "grad_norm": 0.5485120415687561, "learning_rate": 0.0003316578950745347, "loss": 1.7929, "step": 42051 }, { "epoch": 1.4, "grad_norm": 0.5147191882133484, "learning_rate": 0.0003316475012981144, "loss": 1.8037, "step": 42052 }, { "epoch": 1.4, "grad_norm": 0.5084384083747864, "learning_rate": 0.00033163710748327876, "loss": 1.7184, "step": 42053 }, { "epoch": 1.4, "grad_norm": 0.514008641242981, "learning_rate": 0.00033162671363004017, "loss": 1.7787, "step": 42054 }, { "epoch": 1.4, "grad_norm": 0.5134500861167908, "learning_rate": 0.0003316163197384116, "loss": 1.775, "step": 42055 }, { "epoch": 1.4, "grad_norm": 0.49033886194229126, "learning_rate": 0.0003316059258084053, "loss": 1.751, "step": 42056 }, { "epoch": 1.4, "grad_norm": 0.530695915222168, "learning_rate": 0.0003315955318400342, "loss": 1.8099, "step": 42057 }, { "epoch": 1.4, "grad_norm": 0.5072908997535706, "learning_rate": 0.0003315851378333107, "loss": 1.703, "step": 42058 }, { "epoch": 1.4, "grad_norm": 0.5019137263298035, "learning_rate": 0.0003315747437882476, "loss": 1.8235, "step": 42059 }, { "epoch": 1.4, "grad_norm": 0.49200600385665894, "learning_rate": 0.00033156434970485726, "loss": 1.7398, "step": 42060 }, { "epoch": 1.4, "grad_norm": 0.5240670442581177, "learning_rate": 0.00033155395558315254, "loss": 1.8169, "step": 42061 }, { "epoch": 1.4, "grad_norm": 0.4950447380542755, "learning_rate": 0.00033154356142314604, "loss": 1.825, "step": 42062 }, { "epoch": 1.4, "grad_norm": 0.4988071024417877, "learning_rate": 0.0003315331672248501, "loss": 1.7382, "step": 42063 }, { "epoch": 1.4, "grad_norm": 0.518123984336853, "learning_rate": 0.0003315227729882777, "loss": 1.7584, "step": 42064 }, { "epoch": 1.4, "grad_norm": 0.5024547576904297, "learning_rate": 0.00033151237871344126, "loss": 1.8052, "step": 42065 }, { "epoch": 1.4, "grad_norm": 0.5096445083618164, "learning_rate": 0.0003315019844003534, "loss": 1.7615, "step": 42066 }, { "epoch": 1.4, "grad_norm": 0.49680525064468384, "learning_rate": 0.00033149159004902684, "loss": 1.7638, "step": 42067 }, { "epoch": 1.4, "grad_norm": 0.505000114440918, "learning_rate": 0.0003314811956594741, "loss": 1.7904, "step": 42068 }, { "epoch": 1.4, "grad_norm": 0.5204842686653137, "learning_rate": 0.0003314708012317078, "loss": 1.7833, "step": 42069 }, { "epoch": 1.4, "grad_norm": 0.5005609393119812, "learning_rate": 0.00033146040676574067, "loss": 1.7772, "step": 42070 }, { "epoch": 1.4, "grad_norm": 0.517366886138916, "learning_rate": 0.0003314500122615851, "loss": 1.7376, "step": 42071 }, { "epoch": 1.4, "grad_norm": 0.5020527839660645, "learning_rate": 0.000331439617719254, "loss": 1.8124, "step": 42072 }, { "epoch": 1.4, "grad_norm": 0.5102702379226685, "learning_rate": 0.0003314292231387597, "loss": 1.8111, "step": 42073 }, { "epoch": 1.4, "grad_norm": 0.5183162093162537, "learning_rate": 0.000331418828520115, "loss": 1.7734, "step": 42074 }, { "epoch": 1.4, "grad_norm": 0.5266777873039246, "learning_rate": 0.00033140843386333257, "loss": 1.6997, "step": 42075 }, { "epoch": 1.4, "grad_norm": 0.49586549401283264, "learning_rate": 0.00033139803916842485, "loss": 1.8358, "step": 42076 }, { "epoch": 1.4, "grad_norm": 0.5114468932151794, "learning_rate": 0.00033138764443540455, "loss": 1.6986, "step": 42077 }, { "epoch": 1.4, "grad_norm": 0.5098591446876526, "learning_rate": 0.0003313772496642842, "loss": 1.8297, "step": 42078 }, { "epoch": 1.4, "grad_norm": 0.5078490972518921, "learning_rate": 0.0003313668548550765, "loss": 1.7628, "step": 42079 }, { "epoch": 1.4, "grad_norm": 0.5278739929199219, "learning_rate": 0.0003313564600077942, "loss": 1.8023, "step": 42080 }, { "epoch": 1.4, "grad_norm": 0.5479918718338013, "learning_rate": 0.00033134606512244964, "loss": 1.7104, "step": 42081 }, { "epoch": 1.4, "grad_norm": 0.4964958429336548, "learning_rate": 0.0003313356701990557, "loss": 1.7173, "step": 42082 }, { "epoch": 1.4, "grad_norm": 0.4962378740310669, "learning_rate": 0.0003313252752376248, "loss": 1.749, "step": 42083 }, { "epoch": 1.4, "grad_norm": 0.49358394742012024, "learning_rate": 0.00033131488023816955, "loss": 1.792, "step": 42084 }, { "epoch": 1.4, "grad_norm": 0.4927729070186615, "learning_rate": 0.0003313044852007028, "loss": 1.7149, "step": 42085 }, { "epoch": 1.4, "grad_norm": 0.5178298950195312, "learning_rate": 0.0003312940901252369, "loss": 1.8146, "step": 42086 }, { "epoch": 1.4, "grad_norm": 0.5336701273918152, "learning_rate": 0.00033128369501178465, "loss": 1.7828, "step": 42087 }, { "epoch": 1.4, "grad_norm": 0.49811112880706787, "learning_rate": 0.00033127329986035853, "loss": 1.6946, "step": 42088 }, { "epoch": 1.4, "grad_norm": 0.5121345520019531, "learning_rate": 0.00033126290467097136, "loss": 1.711, "step": 42089 }, { "epoch": 1.4, "grad_norm": 0.49351736903190613, "learning_rate": 0.00033125250944363553, "loss": 1.7468, "step": 42090 }, { "epoch": 1.4, "grad_norm": 0.505733847618103, "learning_rate": 0.0003312421141783638, "loss": 1.7651, "step": 42091 }, { "epoch": 1.4, "grad_norm": 0.5127163529396057, "learning_rate": 0.00033123171887516873, "loss": 1.7278, "step": 42092 }, { "epoch": 1.4, "grad_norm": 0.5225342512130737, "learning_rate": 0.00033122132353406296, "loss": 1.7549, "step": 42093 }, { "epoch": 1.4, "grad_norm": 0.49559247493743896, "learning_rate": 0.0003312109281550591, "loss": 1.7977, "step": 42094 }, { "epoch": 1.4, "grad_norm": 0.4959825873374939, "learning_rate": 0.0003312005327381698, "loss": 1.7972, "step": 42095 }, { "epoch": 1.4, "grad_norm": 0.5121496319770813, "learning_rate": 0.0003311901372834076, "loss": 1.8025, "step": 42096 }, { "epoch": 1.4, "grad_norm": 0.508508563041687, "learning_rate": 0.0003311797417907852, "loss": 1.7786, "step": 42097 }, { "epoch": 1.4, "grad_norm": 0.49484890699386597, "learning_rate": 0.00033116934626031515, "loss": 1.7381, "step": 42098 }, { "epoch": 1.4, "grad_norm": 0.4800429940223694, "learning_rate": 0.0003311589506920101, "loss": 1.7688, "step": 42099 }, { "epoch": 1.4, "grad_norm": 0.5095345973968506, "learning_rate": 0.00033114855508588277, "loss": 1.7665, "step": 42100 }, { "epoch": 1.4, "grad_norm": 0.49788740277290344, "learning_rate": 0.0003311381594419456, "loss": 1.738, "step": 42101 }, { "epoch": 1.4, "grad_norm": 0.4857971966266632, "learning_rate": 0.00033112776376021134, "loss": 1.7251, "step": 42102 }, { "epoch": 1.4, "grad_norm": 0.5056727528572083, "learning_rate": 0.0003311173680406925, "loss": 1.735, "step": 42103 }, { "epoch": 1.4, "grad_norm": 0.5050848126411438, "learning_rate": 0.00033110697228340186, "loss": 1.808, "step": 42104 }, { "epoch": 1.4, "grad_norm": 0.49431487917900085, "learning_rate": 0.0003310965764883519, "loss": 1.7788, "step": 42105 }, { "epoch": 1.4, "grad_norm": 0.4819889962673187, "learning_rate": 0.00033108618065555515, "loss": 1.7373, "step": 42106 }, { "epoch": 1.4, "grad_norm": 0.5021490454673767, "learning_rate": 0.0003310757847850245, "loss": 1.7035, "step": 42107 }, { "epoch": 1.4, "grad_norm": 0.4985741972923279, "learning_rate": 0.00033106538887677243, "loss": 1.7403, "step": 42108 }, { "epoch": 1.4, "grad_norm": 0.5069984197616577, "learning_rate": 0.00033105499293081147, "loss": 1.7758, "step": 42109 }, { "epoch": 1.4, "grad_norm": 0.49325647950172424, "learning_rate": 0.0003310445969471545, "loss": 1.7541, "step": 42110 }, { "epoch": 1.4, "grad_norm": 0.518948495388031, "learning_rate": 0.0003310342009258137, "loss": 1.8084, "step": 42111 }, { "epoch": 1.4, "grad_norm": 0.47762376070022583, "learning_rate": 0.00033102380486680217, "loss": 1.8059, "step": 42112 }, { "epoch": 1.4, "grad_norm": 0.506072461605072, "learning_rate": 0.0003310134087701321, "loss": 1.7576, "step": 42113 }, { "epoch": 1.4, "grad_norm": 0.5006064176559448, "learning_rate": 0.00033100301263581656, "loss": 1.7982, "step": 42114 }, { "epoch": 1.4, "grad_norm": 0.5046537518501282, "learning_rate": 0.0003309926164638678, "loss": 1.8242, "step": 42115 }, { "epoch": 1.4, "grad_norm": 0.5065231919288635, "learning_rate": 0.0003309822202542986, "loss": 1.757, "step": 42116 }, { "epoch": 1.4, "grad_norm": 0.5076192021369934, "learning_rate": 0.00033097182400712154, "loss": 1.7824, "step": 42117 }, { "epoch": 1.4, "grad_norm": 0.5019800662994385, "learning_rate": 0.0003309614277223492, "loss": 1.6842, "step": 42118 }, { "epoch": 1.4, "grad_norm": 0.5140427350997925, "learning_rate": 0.0003309510313999944, "loss": 1.7182, "step": 42119 }, { "epoch": 1.4, "grad_norm": 0.4816451966762543, "learning_rate": 0.0003309406350400694, "loss": 1.7153, "step": 42120 }, { "epoch": 1.4, "grad_norm": 0.483651727437973, "learning_rate": 0.0003309302386425872, "loss": 1.7329, "step": 42121 }, { "epoch": 1.4, "grad_norm": 0.4922928512096405, "learning_rate": 0.00033091984220756014, "loss": 1.6891, "step": 42122 }, { "epoch": 1.4, "grad_norm": 0.4819214642047882, "learning_rate": 0.000330909445735001, "loss": 1.7785, "step": 42123 }, { "epoch": 1.4, "grad_norm": 0.5051697492599487, "learning_rate": 0.0003308990492249224, "loss": 1.7945, "step": 42124 }, { "epoch": 1.4, "grad_norm": 0.49906909465789795, "learning_rate": 0.0003308886526773368, "loss": 1.7467, "step": 42125 }, { "epoch": 1.4, "grad_norm": 0.4954768419265747, "learning_rate": 0.00033087825609225694, "loss": 1.7257, "step": 42126 }, { "epoch": 1.4, "grad_norm": 0.5067356824874878, "learning_rate": 0.0003308678594696955, "loss": 1.7033, "step": 42127 }, { "epoch": 1.4, "grad_norm": 0.5193433165550232, "learning_rate": 0.00033085746280966493, "loss": 1.8334, "step": 42128 }, { "epoch": 1.4, "grad_norm": 0.49904778599739075, "learning_rate": 0.000330847066112178, "loss": 1.771, "step": 42129 }, { "epoch": 1.4, "grad_norm": 0.49639061093330383, "learning_rate": 0.0003308366693772473, "loss": 1.7512, "step": 42130 }, { "epoch": 1.4, "grad_norm": 0.5094659924507141, "learning_rate": 0.00033082627260488544, "loss": 1.7129, "step": 42131 }, { "epoch": 1.4, "grad_norm": 0.49761879444122314, "learning_rate": 0.000330815875795105, "loss": 1.7468, "step": 42132 }, { "epoch": 1.4, "grad_norm": 0.49013614654541016, "learning_rate": 0.00033080547894791853, "loss": 1.7174, "step": 42133 }, { "epoch": 1.4, "grad_norm": 0.5078181028366089, "learning_rate": 0.00033079508206333886, "loss": 1.774, "step": 42134 }, { "epoch": 1.4, "grad_norm": 0.5034993290901184, "learning_rate": 0.0003307846851413785, "loss": 1.7777, "step": 42135 }, { "epoch": 1.4, "grad_norm": 0.5252213478088379, "learning_rate": 0.00033077428818205, "loss": 1.8285, "step": 42136 }, { "epoch": 1.4, "grad_norm": 0.49625590443611145, "learning_rate": 0.00033076389118536615, "loss": 1.7591, "step": 42137 }, { "epoch": 1.4, "grad_norm": 0.49020445346832275, "learning_rate": 0.0003307534941513394, "loss": 1.7336, "step": 42138 }, { "epoch": 1.4, "grad_norm": 0.4976004362106323, "learning_rate": 0.00033074309707998244, "loss": 1.7991, "step": 42139 }, { "epoch": 1.4, "grad_norm": 0.485649049282074, "learning_rate": 0.00033073269997130784, "loss": 1.7123, "step": 42140 }, { "epoch": 1.4, "grad_norm": 0.5120382308959961, "learning_rate": 0.0003307223028253283, "loss": 1.7928, "step": 42141 }, { "epoch": 1.4, "grad_norm": 0.4950675666332245, "learning_rate": 0.00033071190564205655, "loss": 1.7735, "step": 42142 }, { "epoch": 1.4, "grad_norm": 0.6756027340888977, "learning_rate": 0.0003307015084215049, "loss": 1.7677, "step": 42143 }, { "epoch": 1.4, "grad_norm": 0.6069496273994446, "learning_rate": 0.00033069111116368626, "loss": 1.8008, "step": 42144 }, { "epoch": 1.4, "grad_norm": 0.507662296295166, "learning_rate": 0.00033068071386861307, "loss": 1.8016, "step": 42145 }, { "epoch": 1.4, "grad_norm": 0.5081563591957092, "learning_rate": 0.00033067031653629796, "loss": 1.8299, "step": 42146 }, { "epoch": 1.4, "grad_norm": 0.49668067693710327, "learning_rate": 0.0003306599191667537, "loss": 1.8027, "step": 42147 }, { "epoch": 1.4, "grad_norm": 0.5071489810943604, "learning_rate": 0.0003306495217599927, "loss": 1.85, "step": 42148 }, { "epoch": 1.4, "grad_norm": 0.5078871250152588, "learning_rate": 0.0003306391243160279, "loss": 1.7678, "step": 42149 }, { "epoch": 1.4, "grad_norm": 0.5069847106933594, "learning_rate": 0.0003306287268348716, "loss": 1.7852, "step": 42150 }, { "epoch": 1.4, "grad_norm": 0.5049461722373962, "learning_rate": 0.00033061832931653647, "loss": 1.7901, "step": 42151 }, { "epoch": 1.4, "grad_norm": 0.49715307354927063, "learning_rate": 0.0003306079317610353, "loss": 1.7607, "step": 42152 }, { "epoch": 1.4, "grad_norm": 0.5141980051994324, "learning_rate": 0.00033059753416838055, "loss": 1.7988, "step": 42153 }, { "epoch": 1.4, "grad_norm": 0.496084600687027, "learning_rate": 0.0003305871365385849, "loss": 1.7628, "step": 42154 }, { "epoch": 1.4, "grad_norm": 0.49904030561447144, "learning_rate": 0.0003305767388716611, "loss": 1.7669, "step": 42155 }, { "epoch": 1.4, "grad_norm": 0.5005186200141907, "learning_rate": 0.0003305663411676215, "loss": 1.7654, "step": 42156 }, { "epoch": 1.4, "grad_norm": 0.48841702938079834, "learning_rate": 0.00033055594342647893, "loss": 1.7558, "step": 42157 }, { "epoch": 1.4, "grad_norm": 0.49269047379493713, "learning_rate": 0.0003305455456482459, "loss": 1.7398, "step": 42158 }, { "epoch": 1.4, "grad_norm": 0.5364491939544678, "learning_rate": 0.00033053514783293504, "loss": 1.7618, "step": 42159 }, { "epoch": 1.4, "grad_norm": 0.506932258605957, "learning_rate": 0.00033052474998055916, "loss": 1.8058, "step": 42160 }, { "epoch": 1.4, "grad_norm": 0.4864587187767029, "learning_rate": 0.00033051435209113057, "loss": 1.7392, "step": 42161 }, { "epoch": 1.4, "grad_norm": 0.4981718063354492, "learning_rate": 0.00033050395416466216, "loss": 1.7741, "step": 42162 }, { "epoch": 1.4, "grad_norm": 0.4949101507663727, "learning_rate": 0.0003304935562011664, "loss": 1.7223, "step": 42163 }, { "epoch": 1.4, "grad_norm": 0.5232190489768982, "learning_rate": 0.000330483158200656, "loss": 1.7706, "step": 42164 }, { "epoch": 1.4, "grad_norm": 0.5193781852722168, "learning_rate": 0.00033047276016314354, "loss": 1.8102, "step": 42165 }, { "epoch": 1.4, "grad_norm": 0.5248674750328064, "learning_rate": 0.0003304623620886416, "loss": 1.7367, "step": 42166 }, { "epoch": 1.4, "grad_norm": 0.5019041895866394, "learning_rate": 0.00033045196397716285, "loss": 1.6899, "step": 42167 }, { "epoch": 1.4, "grad_norm": 0.49605536460876465, "learning_rate": 0.0003304415658287198, "loss": 1.8263, "step": 42168 }, { "epoch": 1.4, "grad_norm": 0.5227198004722595, "learning_rate": 0.00033043116764332534, "loss": 1.7456, "step": 42169 }, { "epoch": 1.4, "grad_norm": 0.5269812941551208, "learning_rate": 0.00033042076942099187, "loss": 1.7515, "step": 42170 }, { "epoch": 1.4, "grad_norm": 0.541530430316925, "learning_rate": 0.00033041037116173205, "loss": 1.7167, "step": 42171 }, { "epoch": 1.4, "grad_norm": 0.5004106760025024, "learning_rate": 0.0003303999728655585, "loss": 1.7041, "step": 42172 }, { "epoch": 1.4, "grad_norm": 0.5366880893707275, "learning_rate": 0.00033038957453248387, "loss": 1.7997, "step": 42173 }, { "epoch": 1.4, "grad_norm": 0.531305730342865, "learning_rate": 0.0003303791761625209, "loss": 1.8187, "step": 42174 }, { "epoch": 1.4, "grad_norm": 0.5247054100036621, "learning_rate": 0.00033036877775568184, "loss": 1.6517, "step": 42175 }, { "epoch": 1.4, "grad_norm": 0.49144288897514343, "learning_rate": 0.00033035837931197977, "loss": 1.811, "step": 42176 }, { "epoch": 1.4, "grad_norm": 0.504629373550415, "learning_rate": 0.000330347980831427, "loss": 1.7353, "step": 42177 }, { "epoch": 1.4, "grad_norm": 0.515938937664032, "learning_rate": 0.0003303375823140363, "loss": 1.7681, "step": 42178 }, { "epoch": 1.4, "grad_norm": 0.520537793636322, "learning_rate": 0.00033032718375982024, "loss": 1.7724, "step": 42179 }, { "epoch": 1.4, "grad_norm": 0.527018666267395, "learning_rate": 0.0003303167851687914, "loss": 1.7939, "step": 42180 }, { "epoch": 1.4, "grad_norm": 0.491012305021286, "learning_rate": 0.00033030638654096253, "loss": 1.7144, "step": 42181 }, { "epoch": 1.4, "grad_norm": 0.5100364089012146, "learning_rate": 0.00033029598787634614, "loss": 1.7603, "step": 42182 }, { "epoch": 1.4, "grad_norm": 0.5353566408157349, "learning_rate": 0.00033028558917495485, "loss": 1.7415, "step": 42183 }, { "epoch": 1.4, "grad_norm": 0.5115516185760498, "learning_rate": 0.0003302751904368013, "loss": 1.7333, "step": 42184 }, { "epoch": 1.4, "grad_norm": 0.5180122256278992, "learning_rate": 0.0003302647916618982, "loss": 1.7526, "step": 42185 }, { "epoch": 1.4, "grad_norm": 0.5219032764434814, "learning_rate": 0.0003302543928502581, "loss": 1.7648, "step": 42186 }, { "epoch": 1.4, "grad_norm": 0.5291675329208374, "learning_rate": 0.0003302439940018936, "loss": 1.7763, "step": 42187 }, { "epoch": 1.4, "grad_norm": 0.49337783455848694, "learning_rate": 0.0003302335951168173, "loss": 1.7772, "step": 42188 }, { "epoch": 1.4, "grad_norm": 0.5552419424057007, "learning_rate": 0.0003302231961950419, "loss": 1.7599, "step": 42189 }, { "epoch": 1.4, "grad_norm": 0.5531837344169617, "learning_rate": 0.00033021279723658, "loss": 1.8265, "step": 42190 }, { "epoch": 1.4, "grad_norm": 0.5181722044944763, "learning_rate": 0.0003302023982414442, "loss": 1.7053, "step": 42191 }, { "epoch": 1.4, "grad_norm": 0.515831470489502, "learning_rate": 0.00033019199920964723, "loss": 1.793, "step": 42192 }, { "epoch": 1.4, "grad_norm": 0.525667667388916, "learning_rate": 0.0003301816001412015, "loss": 1.7953, "step": 42193 }, { "epoch": 1.4, "grad_norm": 0.5133829712867737, "learning_rate": 0.0003301712010361198, "loss": 1.7946, "step": 42194 }, { "epoch": 1.4, "grad_norm": 0.5061091780662537, "learning_rate": 0.0003301608018944147, "loss": 1.8332, "step": 42195 }, { "epoch": 1.4, "grad_norm": 0.5038976073265076, "learning_rate": 0.00033015040271609883, "loss": 1.7925, "step": 42196 }, { "epoch": 1.4, "grad_norm": 0.5284245610237122, "learning_rate": 0.00033014000350118494, "loss": 1.7357, "step": 42197 }, { "epoch": 1.4, "grad_norm": 0.5207082629203796, "learning_rate": 0.0003301296042496853, "loss": 1.7095, "step": 42198 }, { "epoch": 1.4, "grad_norm": 0.5122271776199341, "learning_rate": 0.0003301192049616129, "loss": 1.7646, "step": 42199 }, { "epoch": 1.4, "grad_norm": 0.49500271677970886, "learning_rate": 0.00033010880563698016, "loss": 1.771, "step": 42200 }, { "epoch": 1.4, "grad_norm": 0.5018467903137207, "learning_rate": 0.00033009840627579986, "loss": 1.7061, "step": 42201 }, { "epoch": 1.4, "grad_norm": 0.5175476670265198, "learning_rate": 0.0003300880068780845, "loss": 1.712, "step": 42202 }, { "epoch": 1.4, "grad_norm": 0.5256627202033997, "learning_rate": 0.00033007760744384665, "loss": 1.7609, "step": 42203 }, { "epoch": 1.4, "grad_norm": 0.5293421149253845, "learning_rate": 0.0003300672079730991, "loss": 1.7854, "step": 42204 }, { "epoch": 1.4, "grad_norm": 0.5160007476806641, "learning_rate": 0.0003300568084658543, "loss": 1.8123, "step": 42205 }, { "epoch": 1.4, "grad_norm": 0.5092064142227173, "learning_rate": 0.00033004640892212513, "loss": 1.7787, "step": 42206 }, { "epoch": 1.4, "grad_norm": 0.49548107385635376, "learning_rate": 0.00033003600934192396, "loss": 1.7151, "step": 42207 }, { "epoch": 1.4, "grad_norm": 0.5122368931770325, "learning_rate": 0.00033002560972526345, "loss": 1.7883, "step": 42208 }, { "epoch": 1.4, "grad_norm": 0.5358320474624634, "learning_rate": 0.00033001521007215625, "loss": 1.8435, "step": 42209 }, { "epoch": 1.4, "grad_norm": 0.4882686734199524, "learning_rate": 0.000330004810382615, "loss": 1.7421, "step": 42210 }, { "epoch": 1.4, "grad_norm": 0.49441033601760864, "learning_rate": 0.0003299944106566525, "loss": 1.7723, "step": 42211 }, { "epoch": 1.4, "grad_norm": 0.5128763914108276, "learning_rate": 0.0003299840108942811, "loss": 1.7205, "step": 42212 }, { "epoch": 1.4, "grad_norm": 0.5342946648597717, "learning_rate": 0.00032997361109551355, "loss": 1.8082, "step": 42213 }, { "epoch": 1.4, "grad_norm": 0.5040216445922852, "learning_rate": 0.0003299632112603624, "loss": 1.7319, "step": 42214 }, { "epoch": 1.4, "grad_norm": 0.5027540922164917, "learning_rate": 0.00032995281138884034, "loss": 1.6686, "step": 42215 }, { "epoch": 1.4, "grad_norm": 0.5040459632873535, "learning_rate": 0.00032994241148096, "loss": 1.7679, "step": 42216 }, { "epoch": 1.4, "grad_norm": 0.49392783641815186, "learning_rate": 0.00032993201153673404, "loss": 1.7011, "step": 42217 }, { "epoch": 1.4, "grad_norm": 0.5001647472381592, "learning_rate": 0.0003299216115561749, "loss": 1.7198, "step": 42218 }, { "epoch": 1.4, "grad_norm": 0.49619174003601074, "learning_rate": 0.0003299112115392954, "loss": 1.726, "step": 42219 }, { "epoch": 1.4, "grad_norm": 0.5010359883308411, "learning_rate": 0.0003299008114861081, "loss": 1.8337, "step": 42220 }, { "epoch": 1.4, "grad_norm": 0.49712294340133667, "learning_rate": 0.00032989041139662564, "loss": 1.7148, "step": 42221 }, { "epoch": 1.4, "grad_norm": 0.5357860922813416, "learning_rate": 0.00032988001127086066, "loss": 1.7865, "step": 42222 }, { "epoch": 1.4, "grad_norm": 0.5049581527709961, "learning_rate": 0.0003298696111088256, "loss": 1.7169, "step": 42223 }, { "epoch": 1.4, "grad_norm": 0.5035445094108582, "learning_rate": 0.0003298592109105334, "loss": 1.7677, "step": 42224 }, { "epoch": 1.4, "grad_norm": 0.5054365992546082, "learning_rate": 0.0003298488106759964, "loss": 1.7646, "step": 42225 }, { "epoch": 1.4, "grad_norm": 0.5086405277252197, "learning_rate": 0.00032983841040522743, "loss": 1.8257, "step": 42226 }, { "epoch": 1.4, "grad_norm": 0.528205156326294, "learning_rate": 0.000329828010098239, "loss": 1.7961, "step": 42227 }, { "epoch": 1.4, "grad_norm": 0.5283432602882385, "learning_rate": 0.00032981760975504365, "loss": 1.8117, "step": 42228 }, { "epoch": 1.4, "grad_norm": 0.5004384517669678, "learning_rate": 0.0003298072093756543, "loss": 1.8465, "step": 42229 }, { "epoch": 1.4, "grad_norm": 0.5068811178207397, "learning_rate": 0.0003297968089600832, "loss": 1.7641, "step": 42230 }, { "epoch": 1.41, "grad_norm": 0.517212450504303, "learning_rate": 0.00032978640850834335, "loss": 1.6811, "step": 42231 }, { "epoch": 1.41, "grad_norm": 0.5103111267089844, "learning_rate": 0.00032977600802044704, "loss": 1.8063, "step": 42232 }, { "epoch": 1.41, "grad_norm": 0.5260207056999207, "learning_rate": 0.0003297656074964071, "loss": 1.8074, "step": 42233 }, { "epoch": 1.41, "grad_norm": 0.48318079113960266, "learning_rate": 0.00032975520693623615, "loss": 1.7643, "step": 42234 }, { "epoch": 1.41, "grad_norm": 0.49775853753089905, "learning_rate": 0.00032974480633994667, "loss": 1.7304, "step": 42235 }, { "epoch": 1.41, "grad_norm": 0.49653905630111694, "learning_rate": 0.00032973440570755147, "loss": 1.778, "step": 42236 }, { "epoch": 1.41, "grad_norm": 0.48461997509002686, "learning_rate": 0.000329724005039063, "loss": 1.7936, "step": 42237 }, { "epoch": 1.41, "grad_norm": 0.4840415120124817, "learning_rate": 0.00032971360433449405, "loss": 1.7831, "step": 42238 }, { "epoch": 1.41, "grad_norm": 0.4969887435436249, "learning_rate": 0.0003297032035938571, "loss": 1.7399, "step": 42239 }, { "epoch": 1.41, "grad_norm": 0.5012807846069336, "learning_rate": 0.00032969280281716477, "loss": 1.7551, "step": 42240 }, { "epoch": 1.41, "grad_norm": 0.49189841747283936, "learning_rate": 0.00032968240200442994, "loss": 1.7531, "step": 42241 }, { "epoch": 1.41, "grad_norm": 0.4836728274822235, "learning_rate": 0.00032967200115566496, "loss": 1.6757, "step": 42242 }, { "epoch": 1.41, "grad_norm": 0.49905335903167725, "learning_rate": 0.00032966160027088245, "loss": 1.7088, "step": 42243 }, { "epoch": 1.41, "grad_norm": 0.5118743181228638, "learning_rate": 0.0003296511993500952, "loss": 1.8143, "step": 42244 }, { "epoch": 1.41, "grad_norm": 0.4908694922924042, "learning_rate": 0.0003296407983933158, "loss": 1.7924, "step": 42245 }, { "epoch": 1.41, "grad_norm": 0.4996008276939392, "learning_rate": 0.00032963039740055677, "loss": 1.8128, "step": 42246 }, { "epoch": 1.41, "grad_norm": 0.49389585852622986, "learning_rate": 0.00032961999637183087, "loss": 1.8073, "step": 42247 }, { "epoch": 1.41, "grad_norm": 0.49076443910598755, "learning_rate": 0.0003296095953071506, "loss": 1.7472, "step": 42248 }, { "epoch": 1.41, "grad_norm": 0.5030614137649536, "learning_rate": 0.0003295991942065287, "loss": 1.767, "step": 42249 }, { "epoch": 1.41, "grad_norm": 0.4916080832481384, "learning_rate": 0.0003295887930699777, "loss": 1.7225, "step": 42250 }, { "epoch": 1.41, "grad_norm": 0.49774184823036194, "learning_rate": 0.0003295783918975103, "loss": 1.6831, "step": 42251 }, { "epoch": 1.41, "grad_norm": 0.48668694496154785, "learning_rate": 0.0003295679906891391, "loss": 1.7779, "step": 42252 }, { "epoch": 1.41, "grad_norm": 0.488005131483078, "learning_rate": 0.0003295575894448766, "loss": 1.7853, "step": 42253 }, { "epoch": 1.41, "grad_norm": 0.4966789782047272, "learning_rate": 0.0003295471881647357, "loss": 1.6952, "step": 42254 }, { "epoch": 1.41, "grad_norm": 0.49661141633987427, "learning_rate": 0.0003295367868487287, "loss": 1.7709, "step": 42255 }, { "epoch": 1.41, "grad_norm": 0.4907781481742859, "learning_rate": 0.0003295263854968685, "loss": 1.7341, "step": 42256 }, { "epoch": 1.41, "grad_norm": 0.5072943568229675, "learning_rate": 0.0003295159841091676, "loss": 1.8082, "step": 42257 }, { "epoch": 1.41, "grad_norm": 0.5174707174301147, "learning_rate": 0.00032950558268563863, "loss": 1.7052, "step": 42258 }, { "epoch": 1.41, "grad_norm": 0.5017366409301758, "learning_rate": 0.00032949518122629435, "loss": 1.7734, "step": 42259 }, { "epoch": 1.41, "grad_norm": 0.5052288770675659, "learning_rate": 0.00032948477973114706, "loss": 1.7667, "step": 42260 }, { "epoch": 1.41, "grad_norm": 0.4886148273944855, "learning_rate": 0.0003294743782002097, "loss": 1.7271, "step": 42261 }, { "epoch": 1.41, "grad_norm": 0.5170849561691284, "learning_rate": 0.00032946397663349483, "loss": 1.8606, "step": 42262 }, { "epoch": 1.41, "grad_norm": 0.4938199520111084, "learning_rate": 0.00032945357503101494, "loss": 1.7242, "step": 42263 }, { "epoch": 1.41, "grad_norm": 0.5106943845748901, "learning_rate": 0.00032944317339278284, "loss": 1.7307, "step": 42264 }, { "epoch": 1.41, "grad_norm": 0.5129075646400452, "learning_rate": 0.0003294327717188109, "loss": 1.7898, "step": 42265 }, { "epoch": 1.41, "grad_norm": 0.49607476592063904, "learning_rate": 0.00032942237000911217, "loss": 1.8292, "step": 42266 }, { "epoch": 1.41, "grad_norm": 0.5023288726806641, "learning_rate": 0.0003294119682636988, "loss": 1.7803, "step": 42267 }, { "epoch": 1.41, "grad_norm": 0.512601912021637, "learning_rate": 0.0003294015664825837, "loss": 1.7581, "step": 42268 }, { "epoch": 1.41, "grad_norm": 0.48649492859840393, "learning_rate": 0.0003293911646657795, "loss": 1.74, "step": 42269 }, { "epoch": 1.41, "grad_norm": 0.48746201395988464, "learning_rate": 0.0003293807628132987, "loss": 1.7827, "step": 42270 }, { "epoch": 1.41, "grad_norm": 0.4978961944580078, "learning_rate": 0.00032937036092515397, "loss": 1.8108, "step": 42271 }, { "epoch": 1.41, "grad_norm": 0.49777543544769287, "learning_rate": 0.00032935995900135797, "loss": 1.7924, "step": 42272 }, { "epoch": 1.41, "grad_norm": 0.5187572836875916, "learning_rate": 0.0003293495570419233, "loss": 1.8354, "step": 42273 }, { "epoch": 1.41, "grad_norm": 0.5037409663200378, "learning_rate": 0.0003293391550468626, "loss": 1.7263, "step": 42274 }, { "epoch": 1.41, "grad_norm": 0.4966168701648712, "learning_rate": 0.00032932875301618845, "loss": 1.774, "step": 42275 }, { "epoch": 1.41, "grad_norm": 0.5047811269760132, "learning_rate": 0.00032931835094991355, "loss": 1.7609, "step": 42276 }, { "epoch": 1.41, "grad_norm": 0.4927523136138916, "learning_rate": 0.0003293079488480505, "loss": 1.7095, "step": 42277 }, { "epoch": 1.41, "grad_norm": 0.508179783821106, "learning_rate": 0.00032929754671061183, "loss": 1.7841, "step": 42278 }, { "epoch": 1.41, "grad_norm": 0.49093911051750183, "learning_rate": 0.0003292871445376104, "loss": 1.8065, "step": 42279 }, { "epoch": 1.41, "grad_norm": 0.5051857233047485, "learning_rate": 0.0003292767423290586, "loss": 1.7323, "step": 42280 }, { "epoch": 1.41, "grad_norm": 0.5078372359275818, "learning_rate": 0.0003292663400849692, "loss": 1.8279, "step": 42281 }, { "epoch": 1.41, "grad_norm": 0.496620237827301, "learning_rate": 0.0003292559378053547, "loss": 1.786, "step": 42282 }, { "epoch": 1.41, "grad_norm": 0.4948946535587311, "learning_rate": 0.0003292455354902279, "loss": 1.8093, "step": 42283 }, { "epoch": 1.41, "grad_norm": 0.49192288517951965, "learning_rate": 0.0003292351331396013, "loss": 1.7701, "step": 42284 }, { "epoch": 1.41, "grad_norm": 0.49954667687416077, "learning_rate": 0.00032922473075348743, "loss": 1.7591, "step": 42285 }, { "epoch": 1.41, "grad_norm": 0.4904772937297821, "learning_rate": 0.00032921432833189925, "loss": 1.733, "step": 42286 }, { "epoch": 1.41, "grad_norm": 0.4823112189769745, "learning_rate": 0.00032920392587484904, "loss": 1.7512, "step": 42287 }, { "epoch": 1.41, "grad_norm": 0.499335378408432, "learning_rate": 0.0003291935233823496, "loss": 1.8006, "step": 42288 }, { "epoch": 1.41, "grad_norm": 0.4899950921535492, "learning_rate": 0.0003291831208544136, "loss": 1.7461, "step": 42289 }, { "epoch": 1.41, "grad_norm": 0.5015884041786194, "learning_rate": 0.0003291727182910534, "loss": 1.7186, "step": 42290 }, { "epoch": 1.41, "grad_norm": 0.4977356791496277, "learning_rate": 0.000329162315692282, "loss": 1.7501, "step": 42291 }, { "epoch": 1.41, "grad_norm": 0.49880003929138184, "learning_rate": 0.0003291519130581117, "loss": 1.7732, "step": 42292 }, { "epoch": 1.41, "grad_norm": 0.5224868059158325, "learning_rate": 0.0003291415103885554, "loss": 1.7626, "step": 42293 }, { "epoch": 1.41, "grad_norm": 0.5005780458450317, "learning_rate": 0.00032913110768362556, "loss": 1.7742, "step": 42294 }, { "epoch": 1.41, "grad_norm": 0.4930211305618286, "learning_rate": 0.00032912070494333485, "loss": 1.7492, "step": 42295 }, { "epoch": 1.41, "grad_norm": 0.5362816452980042, "learning_rate": 0.00032911030216769595, "loss": 1.8299, "step": 42296 }, { "epoch": 1.41, "grad_norm": 0.5168987512588501, "learning_rate": 0.00032909989935672127, "loss": 1.8423, "step": 42297 }, { "epoch": 1.41, "grad_norm": 0.5054656863212585, "learning_rate": 0.0003290894965104237, "loss": 1.7792, "step": 42298 }, { "epoch": 1.41, "grad_norm": 0.4915761649608612, "learning_rate": 0.00032907909362881577, "loss": 1.7719, "step": 42299 }, { "epoch": 1.41, "grad_norm": 0.48567306995391846, "learning_rate": 0.00032906869071191015, "loss": 1.7536, "step": 42300 }, { "epoch": 1.41, "grad_norm": 0.4994485676288605, "learning_rate": 0.00032905828775971934, "loss": 1.8304, "step": 42301 }, { "epoch": 1.41, "grad_norm": 0.4981536865234375, "learning_rate": 0.00032904788477225607, "loss": 1.8052, "step": 42302 }, { "epoch": 1.41, "grad_norm": 0.5485606789588928, "learning_rate": 0.000329037481749533, "loss": 1.863, "step": 42303 }, { "epoch": 1.41, "grad_norm": 0.5085169672966003, "learning_rate": 0.00032902707869156267, "loss": 1.8191, "step": 42304 }, { "epoch": 1.41, "grad_norm": 0.49749302864074707, "learning_rate": 0.00032901667559835776, "loss": 1.7725, "step": 42305 }, { "epoch": 1.41, "grad_norm": 0.5495473146438599, "learning_rate": 0.00032900627246993085, "loss": 1.7208, "step": 42306 }, { "epoch": 1.41, "grad_norm": 0.4947431981563568, "learning_rate": 0.00032899586930629465, "loss": 1.6936, "step": 42307 }, { "epoch": 1.41, "grad_norm": 0.5054318904876709, "learning_rate": 0.00032898546610746166, "loss": 1.7464, "step": 42308 }, { "epoch": 1.41, "grad_norm": 0.5060355067253113, "learning_rate": 0.00032897506287344464, "loss": 1.7974, "step": 42309 }, { "epoch": 1.41, "grad_norm": 0.5204656720161438, "learning_rate": 0.00032896465960425615, "loss": 1.7786, "step": 42310 }, { "epoch": 1.41, "grad_norm": 0.5559322834014893, "learning_rate": 0.0003289542562999088, "loss": 1.777, "step": 42311 }, { "epoch": 1.41, "grad_norm": 0.5100827217102051, "learning_rate": 0.0003289438529604153, "loss": 1.7223, "step": 42312 }, { "epoch": 1.41, "grad_norm": 0.5175747871398926, "learning_rate": 0.0003289334495857882, "loss": 1.7573, "step": 42313 }, { "epoch": 1.41, "grad_norm": 0.4945391118526459, "learning_rate": 0.0003289230461760402, "loss": 1.7768, "step": 42314 }, { "epoch": 1.41, "grad_norm": 0.524581789970398, "learning_rate": 0.00032891264273118376, "loss": 1.7278, "step": 42315 }, { "epoch": 1.41, "grad_norm": 0.5222599506378174, "learning_rate": 0.0003289022392512318, "loss": 1.7302, "step": 42316 }, { "epoch": 1.41, "grad_norm": 0.5346460938453674, "learning_rate": 0.00032889183573619655, "loss": 1.8131, "step": 42317 }, { "epoch": 1.41, "grad_norm": 0.4957681894302368, "learning_rate": 0.0003288814321860911, "loss": 1.753, "step": 42318 }, { "epoch": 1.41, "grad_norm": 0.5228219032287598, "learning_rate": 0.0003288710286009278, "loss": 1.7375, "step": 42319 }, { "epoch": 1.41, "grad_norm": 0.5142921805381775, "learning_rate": 0.00032886062498071924, "loss": 1.7493, "step": 42320 }, { "epoch": 1.41, "grad_norm": 0.5377185940742493, "learning_rate": 0.0003288502213254782, "loss": 1.8241, "step": 42321 }, { "epoch": 1.41, "grad_norm": 0.4927806556224823, "learning_rate": 0.00032883981763521717, "loss": 1.7815, "step": 42322 }, { "epoch": 1.41, "grad_norm": 0.4970863461494446, "learning_rate": 0.00032882941390994895, "loss": 1.7571, "step": 42323 }, { "epoch": 1.41, "grad_norm": 0.5039963722229004, "learning_rate": 0.000328819010149686, "loss": 1.769, "step": 42324 }, { "epoch": 1.41, "grad_norm": 0.5026258230209351, "learning_rate": 0.00032880860635444104, "loss": 1.7038, "step": 42325 }, { "epoch": 1.41, "grad_norm": 0.5275059342384338, "learning_rate": 0.0003287982025242267, "loss": 1.7955, "step": 42326 }, { "epoch": 1.41, "grad_norm": 0.49324342608451843, "learning_rate": 0.00032878779865905554, "loss": 1.6673, "step": 42327 }, { "epoch": 1.41, "grad_norm": 0.4979453682899475, "learning_rate": 0.0003287773947589403, "loss": 1.7905, "step": 42328 }, { "epoch": 1.41, "grad_norm": 0.4908254146575928, "learning_rate": 0.0003287669908238935, "loss": 1.7585, "step": 42329 }, { "epoch": 1.41, "grad_norm": 0.5101049542427063, "learning_rate": 0.00032875658685392774, "loss": 1.7159, "step": 42330 }, { "epoch": 1.41, "grad_norm": 0.5228050351142883, "learning_rate": 0.0003287461828490558, "loss": 1.7498, "step": 42331 }, { "epoch": 1.41, "grad_norm": 0.5168347358703613, "learning_rate": 0.00032873577880929025, "loss": 1.8989, "step": 42332 }, { "epoch": 1.41, "grad_norm": 0.49497899413108826, "learning_rate": 0.00032872537473464363, "loss": 1.804, "step": 42333 }, { "epoch": 1.41, "grad_norm": 0.5059655904769897, "learning_rate": 0.00032871497062512874, "loss": 1.8191, "step": 42334 }, { "epoch": 1.41, "grad_norm": 0.5271956920623779, "learning_rate": 0.00032870456648075806, "loss": 1.8024, "step": 42335 }, { "epoch": 1.41, "grad_norm": 0.5528061389923096, "learning_rate": 0.00032869416230154426, "loss": 1.8013, "step": 42336 }, { "epoch": 1.41, "grad_norm": 0.5129362940788269, "learning_rate": 0.0003286837580874999, "loss": 1.8027, "step": 42337 }, { "epoch": 1.41, "grad_norm": 0.5368125438690186, "learning_rate": 0.0003286733538386377, "loss": 1.7402, "step": 42338 }, { "epoch": 1.41, "grad_norm": 0.5282851457595825, "learning_rate": 0.0003286629495549704, "loss": 1.7385, "step": 42339 }, { "epoch": 1.41, "grad_norm": 0.5367595553398132, "learning_rate": 0.00032865254523651037, "loss": 1.7705, "step": 42340 }, { "epoch": 1.41, "grad_norm": 0.5295166969299316, "learning_rate": 0.00032864214088327047, "loss": 1.8167, "step": 42341 }, { "epoch": 1.41, "grad_norm": 0.5092481970787048, "learning_rate": 0.0003286317364952632, "loss": 1.768, "step": 42342 }, { "epoch": 1.41, "grad_norm": 0.5296663045883179, "learning_rate": 0.0003286213320725012, "loss": 1.7302, "step": 42343 }, { "epoch": 1.41, "grad_norm": 0.5455113649368286, "learning_rate": 0.00032861092761499714, "loss": 1.7637, "step": 42344 }, { "epoch": 1.41, "grad_norm": 0.5094380378723145, "learning_rate": 0.0003286005231227636, "loss": 1.8454, "step": 42345 }, { "epoch": 1.41, "grad_norm": 0.5004279017448425, "learning_rate": 0.00032859011859581326, "loss": 1.7644, "step": 42346 }, { "epoch": 1.41, "grad_norm": 0.5273297429084778, "learning_rate": 0.0003285797140341587, "loss": 1.7397, "step": 42347 }, { "epoch": 1.41, "grad_norm": 0.5090897679328918, "learning_rate": 0.00032856930943781263, "loss": 1.7919, "step": 42348 }, { "epoch": 1.41, "grad_norm": 0.5013555288314819, "learning_rate": 0.00032855890480678755, "loss": 1.7845, "step": 42349 }, { "epoch": 1.41, "grad_norm": 0.5031927227973938, "learning_rate": 0.0003285485001410962, "loss": 1.7841, "step": 42350 }, { "epoch": 1.41, "grad_norm": 0.5070095658302307, "learning_rate": 0.0003285380954407513, "loss": 1.8276, "step": 42351 }, { "epoch": 1.41, "grad_norm": 0.5009350776672363, "learning_rate": 0.0003285276907057652, "loss": 1.7642, "step": 42352 }, { "epoch": 1.41, "grad_norm": 0.5246661305427551, "learning_rate": 0.00032851728593615075, "loss": 1.7634, "step": 42353 }, { "epoch": 1.41, "grad_norm": 0.5028812885284424, "learning_rate": 0.0003285068811319205, "loss": 1.8068, "step": 42354 }, { "epoch": 1.41, "grad_norm": 0.4973302185535431, "learning_rate": 0.0003284964762930871, "loss": 1.6751, "step": 42355 }, { "epoch": 1.41, "grad_norm": 0.49594342708587646, "learning_rate": 0.00032848607141966316, "loss": 1.7844, "step": 42356 }, { "epoch": 1.41, "grad_norm": 0.49702900648117065, "learning_rate": 0.00032847566651166135, "loss": 1.7609, "step": 42357 }, { "epoch": 1.41, "grad_norm": 0.49494779109954834, "learning_rate": 0.0003284652615690943, "loss": 1.7849, "step": 42358 }, { "epoch": 1.41, "grad_norm": 0.5059919953346252, "learning_rate": 0.0003284548565919746, "loss": 1.7843, "step": 42359 }, { "epoch": 1.41, "grad_norm": 0.5010412335395813, "learning_rate": 0.00032844445158031486, "loss": 1.6936, "step": 42360 }, { "epoch": 1.41, "grad_norm": 0.5009183287620544, "learning_rate": 0.00032843404653412774, "loss": 1.8574, "step": 42361 }, { "epoch": 1.41, "grad_norm": 0.503224790096283, "learning_rate": 0.00032842364145342596, "loss": 1.8699, "step": 42362 }, { "epoch": 1.41, "grad_norm": 0.4974771738052368, "learning_rate": 0.000328413236338222, "loss": 1.8136, "step": 42363 }, { "epoch": 1.41, "grad_norm": 0.49479106068611145, "learning_rate": 0.0003284028311885286, "loss": 1.8074, "step": 42364 }, { "epoch": 1.41, "grad_norm": 0.49079588055610657, "learning_rate": 0.0003283924260043583, "loss": 1.7906, "step": 42365 }, { "epoch": 1.41, "grad_norm": 0.49198469519615173, "learning_rate": 0.0003283820207857238, "loss": 1.7849, "step": 42366 }, { "epoch": 1.41, "grad_norm": 0.5285604000091553, "learning_rate": 0.0003283716155326377, "loss": 1.8003, "step": 42367 }, { "epoch": 1.41, "grad_norm": 0.5125889778137207, "learning_rate": 0.00032836121024511266, "loss": 1.7305, "step": 42368 }, { "epoch": 1.41, "grad_norm": 0.4950109124183655, "learning_rate": 0.0003283508049231613, "loss": 1.8458, "step": 42369 }, { "epoch": 1.41, "grad_norm": 0.4889892637729645, "learning_rate": 0.00032834039956679614, "loss": 1.8067, "step": 42370 }, { "epoch": 1.41, "grad_norm": 0.5086721181869507, "learning_rate": 0.00032832999417603005, "loss": 1.7833, "step": 42371 }, { "epoch": 1.41, "grad_norm": 0.5163955092430115, "learning_rate": 0.0003283195887508754, "loss": 1.7851, "step": 42372 }, { "epoch": 1.41, "grad_norm": 0.5058596730232239, "learning_rate": 0.0003283091832913451, "loss": 1.8072, "step": 42373 }, { "epoch": 1.41, "grad_norm": 0.4947391450405121, "learning_rate": 0.0003282987777974515, "loss": 1.8094, "step": 42374 }, { "epoch": 1.41, "grad_norm": 0.9045705795288086, "learning_rate": 0.00032828837226920736, "loss": 1.8486, "step": 42375 }, { "epoch": 1.41, "grad_norm": 0.5111440420150757, "learning_rate": 0.00032827796670662537, "loss": 1.7547, "step": 42376 }, { "epoch": 1.41, "grad_norm": 0.5113070011138916, "learning_rate": 0.00032826756110971797, "loss": 1.7576, "step": 42377 }, { "epoch": 1.41, "grad_norm": 0.5074960589408875, "learning_rate": 0.00032825715547849806, "loss": 1.7815, "step": 42378 }, { "epoch": 1.41, "grad_norm": 0.5087839365005493, "learning_rate": 0.000328246749812978, "loss": 1.8194, "step": 42379 }, { "epoch": 1.41, "grad_norm": 0.4980747401714325, "learning_rate": 0.00032823634411317066, "loss": 1.7286, "step": 42380 }, { "epoch": 1.41, "grad_norm": 0.4947364032268524, "learning_rate": 0.0003282259383790885, "loss": 1.8299, "step": 42381 }, { "epoch": 1.41, "grad_norm": 0.507763147354126, "learning_rate": 0.0003282155326107441, "loss": 1.7541, "step": 42382 }, { "epoch": 1.41, "grad_norm": 0.5261328220367432, "learning_rate": 0.0003282051268081504, "loss": 1.7194, "step": 42383 }, { "epoch": 1.41, "grad_norm": 0.5104655623435974, "learning_rate": 0.0003281947209713198, "loss": 1.743, "step": 42384 }, { "epoch": 1.41, "grad_norm": 0.494285523891449, "learning_rate": 0.0003281843151002649, "loss": 1.7816, "step": 42385 }, { "epoch": 1.41, "grad_norm": 0.469212144613266, "learning_rate": 0.0003281739091949984, "loss": 1.7174, "step": 42386 }, { "epoch": 1.41, "grad_norm": 0.498738169670105, "learning_rate": 0.00032816350325553286, "loss": 1.6876, "step": 42387 }, { "epoch": 1.41, "grad_norm": 0.5082442760467529, "learning_rate": 0.00032815309728188104, "loss": 1.8038, "step": 42388 }, { "epoch": 1.41, "grad_norm": 0.5172088146209717, "learning_rate": 0.0003281426912740556, "loss": 1.7884, "step": 42389 }, { "epoch": 1.41, "grad_norm": 0.5168483257293701, "learning_rate": 0.000328132285232069, "loss": 1.7972, "step": 42390 }, { "epoch": 1.41, "grad_norm": 0.5217317342758179, "learning_rate": 0.00032812187915593394, "loss": 1.7881, "step": 42391 }, { "epoch": 1.41, "grad_norm": 0.5222306847572327, "learning_rate": 0.00032811147304566304, "loss": 1.7576, "step": 42392 }, { "epoch": 1.41, "grad_norm": 0.49850451946258545, "learning_rate": 0.00032810106690126894, "loss": 1.7313, "step": 42393 }, { "epoch": 1.41, "grad_norm": 0.5029836893081665, "learning_rate": 0.0003280906607227644, "loss": 1.788, "step": 42394 }, { "epoch": 1.41, "grad_norm": 0.4984968304634094, "learning_rate": 0.0003280802545101618, "loss": 1.7334, "step": 42395 }, { "epoch": 1.41, "grad_norm": 0.4953711926937103, "learning_rate": 0.000328069848263474, "loss": 1.6897, "step": 42396 }, { "epoch": 1.41, "grad_norm": 0.4881787598133087, "learning_rate": 0.0003280594419827135, "loss": 1.7601, "step": 42397 }, { "epoch": 1.41, "grad_norm": 0.5198302268981934, "learning_rate": 0.00032804903566789297, "loss": 1.7591, "step": 42398 }, { "epoch": 1.41, "grad_norm": 0.4977191090583801, "learning_rate": 0.0003280386293190252, "loss": 1.7654, "step": 42399 }, { "epoch": 1.41, "grad_norm": 0.5124388337135315, "learning_rate": 0.0003280282229361224, "loss": 1.8128, "step": 42400 }, { "epoch": 1.41, "grad_norm": 0.5303268432617188, "learning_rate": 0.00032801781651919763, "loss": 1.8074, "step": 42401 }, { "epoch": 1.41, "grad_norm": 0.49588093161582947, "learning_rate": 0.00032800741006826326, "loss": 1.781, "step": 42402 }, { "epoch": 1.41, "grad_norm": 0.4938841462135315, "learning_rate": 0.0003279970035833321, "loss": 1.7792, "step": 42403 }, { "epoch": 1.41, "grad_norm": 0.49924013018608093, "learning_rate": 0.0003279865970644167, "loss": 1.7695, "step": 42404 }, { "epoch": 1.41, "grad_norm": 0.49901556968688965, "learning_rate": 0.0003279761905115297, "loss": 1.7677, "step": 42405 }, { "epoch": 1.41, "grad_norm": 0.5116151571273804, "learning_rate": 0.00032796578392468373, "loss": 1.8015, "step": 42406 }, { "epoch": 1.41, "grad_norm": 0.4916442036628723, "learning_rate": 0.0003279553773038913, "loss": 1.7775, "step": 42407 }, { "epoch": 1.41, "grad_norm": 0.49431321024894714, "learning_rate": 0.0003279449706491653, "loss": 1.8044, "step": 42408 }, { "epoch": 1.41, "grad_norm": 0.5070036053657532, "learning_rate": 0.0003279345639605182, "loss": 1.756, "step": 42409 }, { "epoch": 1.41, "grad_norm": 0.5052785873413086, "learning_rate": 0.0003279241572379626, "loss": 1.7866, "step": 42410 }, { "epoch": 1.41, "grad_norm": 0.49055519700050354, "learning_rate": 0.00032791375048151123, "loss": 1.7529, "step": 42411 }, { "epoch": 1.41, "grad_norm": 0.5000593066215515, "learning_rate": 0.00032790334369117664, "loss": 1.7021, "step": 42412 }, { "epoch": 1.41, "grad_norm": 0.4804040193557739, "learning_rate": 0.00032789293686697157, "loss": 1.6602, "step": 42413 }, { "epoch": 1.41, "grad_norm": 0.5016732215881348, "learning_rate": 0.00032788253000890855, "loss": 1.723, "step": 42414 }, { "epoch": 1.41, "grad_norm": 0.5069664120674133, "learning_rate": 0.00032787212311700026, "loss": 1.741, "step": 42415 }, { "epoch": 1.41, "grad_norm": 0.5068436861038208, "learning_rate": 0.0003278617161912593, "loss": 1.803, "step": 42416 }, { "epoch": 1.41, "grad_norm": 0.4952313303947449, "learning_rate": 0.0003278513092316983, "loss": 1.7157, "step": 42417 }, { "epoch": 1.41, "grad_norm": 0.5086867213249207, "learning_rate": 0.0003278409022383299, "loss": 1.8102, "step": 42418 }, { "epoch": 1.41, "grad_norm": 0.5216630101203918, "learning_rate": 0.00032783049521116676, "loss": 1.7014, "step": 42419 }, { "epoch": 1.41, "grad_norm": 0.5064803957939148, "learning_rate": 0.00032782008815022165, "loss": 1.7726, "step": 42420 }, { "epoch": 1.41, "grad_norm": 0.5189118385314941, "learning_rate": 0.0003278096810555069, "loss": 1.8295, "step": 42421 }, { "epoch": 1.41, "grad_norm": 0.5043991208076477, "learning_rate": 0.0003277992739270353, "loss": 1.6811, "step": 42422 }, { "epoch": 1.41, "grad_norm": 0.4954984188079834, "learning_rate": 0.0003277888667648195, "loss": 1.7923, "step": 42423 }, { "epoch": 1.41, "grad_norm": 0.5178147554397583, "learning_rate": 0.00032777845956887214, "loss": 1.7988, "step": 42424 }, { "epoch": 1.41, "grad_norm": 0.4989043176174164, "learning_rate": 0.0003277680523392057, "loss": 1.7651, "step": 42425 }, { "epoch": 1.41, "grad_norm": 0.5031365156173706, "learning_rate": 0.0003277576450758331, "loss": 1.7622, "step": 42426 }, { "epoch": 1.41, "grad_norm": 0.49727004766464233, "learning_rate": 0.00032774723777876676, "loss": 1.7469, "step": 42427 }, { "epoch": 1.41, "grad_norm": 0.5172266364097595, "learning_rate": 0.0003277368304480193, "loss": 1.7631, "step": 42428 }, { "epoch": 1.41, "grad_norm": 0.5084312558174133, "learning_rate": 0.00032772642308360344, "loss": 1.7601, "step": 42429 }, { "epoch": 1.41, "grad_norm": 0.5007987022399902, "learning_rate": 0.00032771601568553184, "loss": 1.8202, "step": 42430 }, { "epoch": 1.41, "grad_norm": 0.4990867078304291, "learning_rate": 0.00032770560825381707, "loss": 1.7482, "step": 42431 }, { "epoch": 1.41, "grad_norm": 0.4987923204898834, "learning_rate": 0.00032769520078847165, "loss": 1.7572, "step": 42432 }, { "epoch": 1.41, "grad_norm": 0.5072950124740601, "learning_rate": 0.0003276847932895085, "loss": 1.7753, "step": 42433 }, { "epoch": 1.41, "grad_norm": 0.5366877913475037, "learning_rate": 0.0003276743857569399, "loss": 1.7975, "step": 42434 }, { "epoch": 1.41, "grad_norm": 0.5186651349067688, "learning_rate": 0.00032766397819077893, "loss": 1.8495, "step": 42435 }, { "epoch": 1.41, "grad_norm": 0.5281924605369568, "learning_rate": 0.0003276535705910378, "loss": 1.736, "step": 42436 }, { "epoch": 1.41, "grad_norm": 0.5067234635353088, "learning_rate": 0.0003276431629577293, "loss": 1.7284, "step": 42437 }, { "epoch": 1.41, "grad_norm": 0.4980306923389435, "learning_rate": 0.0003276327552908662, "loss": 1.8259, "step": 42438 }, { "epoch": 1.41, "grad_norm": 0.5189024209976196, "learning_rate": 0.0003276223475904608, "loss": 1.748, "step": 42439 }, { "epoch": 1.41, "grad_norm": 0.510380744934082, "learning_rate": 0.00032761193985652616, "loss": 1.8062, "step": 42440 }, { "epoch": 1.41, "grad_norm": 0.5063406229019165, "learning_rate": 0.0003276015320890746, "loss": 1.7662, "step": 42441 }, { "epoch": 1.41, "grad_norm": 0.5211659073829651, "learning_rate": 0.0003275911242881188, "loss": 1.7234, "step": 42442 }, { "epoch": 1.41, "grad_norm": 0.5286436080932617, "learning_rate": 0.0003275807164536715, "loss": 1.8027, "step": 42443 }, { "epoch": 1.41, "grad_norm": 0.5098586082458496, "learning_rate": 0.00032757030858574524, "loss": 1.8369, "step": 42444 }, { "epoch": 1.41, "grad_norm": 0.5007253289222717, "learning_rate": 0.0003275599006843528, "loss": 1.7266, "step": 42445 }, { "epoch": 1.41, "grad_norm": 0.5177518129348755, "learning_rate": 0.00032754949274950657, "loss": 1.8174, "step": 42446 }, { "epoch": 1.41, "grad_norm": 0.5202705264091492, "learning_rate": 0.0003275390847812193, "loss": 1.7701, "step": 42447 }, { "epoch": 1.41, "grad_norm": 0.5210430026054382, "learning_rate": 0.0003275286767795037, "loss": 1.7709, "step": 42448 }, { "epoch": 1.41, "grad_norm": 0.5046619176864624, "learning_rate": 0.00032751826874437235, "loss": 1.7475, "step": 42449 }, { "epoch": 1.41, "grad_norm": 0.4971129596233368, "learning_rate": 0.00032750786067583783, "loss": 1.74, "step": 42450 }, { "epoch": 1.41, "grad_norm": 0.5121820569038391, "learning_rate": 0.0003274974525739129, "loss": 1.7093, "step": 42451 }, { "epoch": 1.41, "grad_norm": 0.5262805819511414, "learning_rate": 0.00032748704443861, "loss": 1.7992, "step": 42452 }, { "epoch": 1.41, "grad_norm": 0.509239912033081, "learning_rate": 0.000327476636269942, "loss": 1.7649, "step": 42453 }, { "epoch": 1.41, "grad_norm": 0.5032423138618469, "learning_rate": 0.0003274662280679213, "loss": 1.6785, "step": 42454 }, { "epoch": 1.41, "grad_norm": 0.4917525053024292, "learning_rate": 0.00032745581983256077, "loss": 1.7654, "step": 42455 }, { "epoch": 1.41, "grad_norm": 0.49704790115356445, "learning_rate": 0.0003274454115638729, "loss": 1.7351, "step": 42456 }, { "epoch": 1.41, "grad_norm": 0.49469253420829773, "learning_rate": 0.00032743500326187016, "loss": 1.7397, "step": 42457 }, { "epoch": 1.41, "grad_norm": 0.49095258116722107, "learning_rate": 0.00032742459492656557, "loss": 1.8012, "step": 42458 }, { "epoch": 1.41, "grad_norm": 0.4960557818412781, "learning_rate": 0.00032741418655797154, "loss": 1.7582, "step": 42459 }, { "epoch": 1.41, "grad_norm": 0.5129817724227905, "learning_rate": 0.0003274037781561007, "loss": 1.7712, "step": 42460 }, { "epoch": 1.41, "grad_norm": 0.4870990812778473, "learning_rate": 0.0003273933697209657, "loss": 1.8349, "step": 42461 }, { "epoch": 1.41, "grad_norm": 0.510208785533905, "learning_rate": 0.0003273829612525791, "loss": 1.8057, "step": 42462 }, { "epoch": 1.41, "grad_norm": 0.5020924210548401, "learning_rate": 0.0003273725527509537, "loss": 1.7617, "step": 42463 }, { "epoch": 1.41, "grad_norm": 0.5093381404876709, "learning_rate": 0.00032736214421610205, "loss": 1.7594, "step": 42464 }, { "epoch": 1.41, "grad_norm": 0.5101609826087952, "learning_rate": 0.00032735173564803677, "loss": 1.792, "step": 42465 }, { "epoch": 1.41, "grad_norm": 0.5083808898925781, "learning_rate": 0.00032734132704677056, "loss": 1.7631, "step": 42466 }, { "epoch": 1.41, "grad_norm": 0.5100066661834717, "learning_rate": 0.00032733091841231595, "loss": 1.7935, "step": 42467 }, { "epoch": 1.41, "grad_norm": 0.49477967619895935, "learning_rate": 0.00032732050974468573, "loss": 1.7492, "step": 42468 }, { "epoch": 1.41, "grad_norm": 0.5154706835746765, "learning_rate": 0.00032731010104389226, "loss": 1.7442, "step": 42469 }, { "epoch": 1.41, "grad_norm": 0.4850633442401886, "learning_rate": 0.0003272996923099486, "loss": 1.7768, "step": 42470 }, { "epoch": 1.41, "grad_norm": 0.5210683941841125, "learning_rate": 0.00032728928354286695, "loss": 1.8239, "step": 42471 }, { "epoch": 1.41, "grad_norm": 0.5146182775497437, "learning_rate": 0.0003272788747426602, "loss": 1.7232, "step": 42472 }, { "epoch": 1.41, "grad_norm": 0.4806506037712097, "learning_rate": 0.00032726846590934086, "loss": 1.7286, "step": 42473 }, { "epoch": 1.41, "grad_norm": 0.48283031582832336, "learning_rate": 0.00032725805704292163, "loss": 1.7619, "step": 42474 }, { "epoch": 1.41, "grad_norm": 0.5240156650543213, "learning_rate": 0.0003272476481434152, "loss": 1.805, "step": 42475 }, { "epoch": 1.41, "grad_norm": 0.494829386472702, "learning_rate": 0.0003272372392108341, "loss": 1.7574, "step": 42476 }, { "epoch": 1.41, "grad_norm": 0.48427602648735046, "learning_rate": 0.000327226830245191, "loss": 1.6868, "step": 42477 }, { "epoch": 1.41, "grad_norm": 0.49153366684913635, "learning_rate": 0.0003272164212464986, "loss": 1.7008, "step": 42478 }, { "epoch": 1.41, "grad_norm": 0.5266610980033875, "learning_rate": 0.00032720601221476937, "loss": 1.8156, "step": 42479 }, { "epoch": 1.41, "grad_norm": 0.49567586183547974, "learning_rate": 0.00032719560315001606, "loss": 1.6682, "step": 42480 }, { "epoch": 1.41, "grad_norm": 0.5137185454368591, "learning_rate": 0.0003271851940522514, "loss": 1.7522, "step": 42481 }, { "epoch": 1.41, "grad_norm": 0.4909384846687317, "learning_rate": 0.0003271747849214878, "loss": 1.7174, "step": 42482 }, { "epoch": 1.41, "grad_norm": 0.4931086599826813, "learning_rate": 0.00032716437575773804, "loss": 1.7846, "step": 42483 }, { "epoch": 1.41, "grad_norm": 0.5358472466468811, "learning_rate": 0.0003271539665610148, "loss": 1.7687, "step": 42484 }, { "epoch": 1.41, "grad_norm": 0.501205563545227, "learning_rate": 0.0003271435573313306, "loss": 1.779, "step": 42485 }, { "epoch": 1.41, "grad_norm": 0.49512776732444763, "learning_rate": 0.00032713314806869816, "loss": 1.7445, "step": 42486 }, { "epoch": 1.41, "grad_norm": 0.4972144365310669, "learning_rate": 0.00032712273877313, "loss": 1.7711, "step": 42487 }, { "epoch": 1.41, "grad_norm": 0.5113003849983215, "learning_rate": 0.0003271123294446389, "loss": 1.7362, "step": 42488 }, { "epoch": 1.41, "grad_norm": 0.49059948325157166, "learning_rate": 0.0003271019200832373, "loss": 1.8239, "step": 42489 }, { "epoch": 1.41, "grad_norm": 0.4844389855861664, "learning_rate": 0.00032709151068893814, "loss": 1.7246, "step": 42490 }, { "epoch": 1.41, "grad_norm": 0.49294617772102356, "learning_rate": 0.00032708110126175376, "loss": 1.6814, "step": 42491 }, { "epoch": 1.41, "grad_norm": 0.4977855980396271, "learning_rate": 0.000327070691801697, "loss": 1.7554, "step": 42492 }, { "epoch": 1.41, "grad_norm": 0.49389317631721497, "learning_rate": 0.0003270602823087804, "loss": 1.7516, "step": 42493 }, { "epoch": 1.41, "grad_norm": 0.4852595031261444, "learning_rate": 0.00032704987278301653, "loss": 1.7367, "step": 42494 }, { "epoch": 1.41, "grad_norm": 0.4949510991573334, "learning_rate": 0.00032703946322441813, "loss": 1.6757, "step": 42495 }, { "epoch": 1.41, "grad_norm": 0.5072243213653564, "learning_rate": 0.00032702905363299784, "loss": 1.8054, "step": 42496 }, { "epoch": 1.41, "grad_norm": 0.4997972548007965, "learning_rate": 0.0003270186440087682, "loss": 1.7661, "step": 42497 }, { "epoch": 1.41, "grad_norm": 0.49818557500839233, "learning_rate": 0.00032700823435174195, "loss": 1.7324, "step": 42498 }, { "epoch": 1.41, "grad_norm": 0.4989289939403534, "learning_rate": 0.00032699782466193166, "loss": 1.7641, "step": 42499 }, { "epoch": 1.41, "grad_norm": 0.4882468283176422, "learning_rate": 0.00032698741493935007, "loss": 1.7765, "step": 42500 }, { "epoch": 1.41, "grad_norm": 0.49810051918029785, "learning_rate": 0.0003269770051840096, "loss": 1.7712, "step": 42501 }, { "epoch": 1.41, "grad_norm": 0.5128287076950073, "learning_rate": 0.0003269665953959231, "loss": 1.8335, "step": 42502 }, { "epoch": 1.41, "grad_norm": 0.4850306510925293, "learning_rate": 0.00032695618557510313, "loss": 1.7455, "step": 42503 }, { "epoch": 1.41, "grad_norm": 0.5202057957649231, "learning_rate": 0.00032694577572156227, "loss": 1.8427, "step": 42504 }, { "epoch": 1.41, "grad_norm": 0.4853890538215637, "learning_rate": 0.0003269353658353132, "loss": 1.7512, "step": 42505 }, { "epoch": 1.41, "grad_norm": 0.4901053309440613, "learning_rate": 0.0003269249559163687, "loss": 1.7212, "step": 42506 }, { "epoch": 1.41, "grad_norm": 0.5092602372169495, "learning_rate": 0.00032691454596474117, "loss": 1.7951, "step": 42507 }, { "epoch": 1.41, "grad_norm": 0.5211654901504517, "learning_rate": 0.00032690413598044336, "loss": 1.8321, "step": 42508 }, { "epoch": 1.41, "grad_norm": 0.4930988848209381, "learning_rate": 0.00032689372596348787, "loss": 1.8189, "step": 42509 }, { "epoch": 1.41, "grad_norm": 0.5103398561477661, "learning_rate": 0.00032688331591388737, "loss": 1.7626, "step": 42510 }, { "epoch": 1.41, "grad_norm": 0.5079456567764282, "learning_rate": 0.0003268729058316545, "loss": 1.7434, "step": 42511 }, { "epoch": 1.41, "grad_norm": 0.4951144754886627, "learning_rate": 0.00032686249571680184, "loss": 1.7847, "step": 42512 }, { "epoch": 1.41, "grad_norm": 0.5204514265060425, "learning_rate": 0.0003268520855693422, "loss": 1.7029, "step": 42513 }, { "epoch": 1.41, "grad_norm": 0.5314185619354248, "learning_rate": 0.0003268416753892879, "loss": 1.7919, "step": 42514 }, { "epoch": 1.41, "grad_norm": 0.5234449505805969, "learning_rate": 0.0003268312651766519, "loss": 1.7486, "step": 42515 }, { "epoch": 1.41, "grad_norm": 0.5126304626464844, "learning_rate": 0.00032682085493144667, "loss": 1.7597, "step": 42516 }, { "epoch": 1.41, "grad_norm": 0.5018207430839539, "learning_rate": 0.00032681044465368477, "loss": 1.7375, "step": 42517 }, { "epoch": 1.41, "grad_norm": 0.48876866698265076, "learning_rate": 0.0003268000343433791, "loss": 1.6963, "step": 42518 }, { "epoch": 1.41, "grad_norm": 1.7709227800369263, "learning_rate": 0.00032678962400054196, "loss": 1.6872, "step": 42519 }, { "epoch": 1.41, "grad_norm": 0.5139762163162231, "learning_rate": 0.00032677921362518636, "loss": 1.7226, "step": 42520 }, { "epoch": 1.41, "grad_norm": 0.5319749712944031, "learning_rate": 0.0003267688032173246, "loss": 1.7252, "step": 42521 }, { "epoch": 1.41, "grad_norm": 0.5169682502746582, "learning_rate": 0.0003267583927769695, "loss": 1.7672, "step": 42522 }, { "epoch": 1.41, "grad_norm": 0.5107337236404419, "learning_rate": 0.0003267479823041337, "loss": 1.7739, "step": 42523 }, { "epoch": 1.41, "grad_norm": 0.4897092282772064, "learning_rate": 0.0003267375717988297, "loss": 1.6521, "step": 42524 }, { "epoch": 1.41, "grad_norm": 0.5321339964866638, "learning_rate": 0.0003267271612610703, "loss": 1.8409, "step": 42525 }, { "epoch": 1.41, "grad_norm": 0.5118187665939331, "learning_rate": 0.000326716750690868, "loss": 1.7775, "step": 42526 }, { "epoch": 1.41, "grad_norm": 0.5185800194740295, "learning_rate": 0.0003267063400882356, "loss": 1.6967, "step": 42527 }, { "epoch": 1.41, "grad_norm": 0.4970901608467102, "learning_rate": 0.00032669592945318553, "loss": 1.7852, "step": 42528 }, { "epoch": 1.41, "grad_norm": 0.5035003423690796, "learning_rate": 0.0003266855187857306, "loss": 1.7815, "step": 42529 }, { "epoch": 1.41, "grad_norm": 0.5077587962150574, "learning_rate": 0.00032667510808588335, "loss": 1.7466, "step": 42530 }, { "epoch": 1.42, "grad_norm": 0.5075008273124695, "learning_rate": 0.0003266646973536565, "loss": 1.7759, "step": 42531 }, { "epoch": 1.42, "grad_norm": 0.48962098360061646, "learning_rate": 0.00032665428658906255, "loss": 1.8032, "step": 42532 }, { "epoch": 1.42, "grad_norm": 0.5262237787246704, "learning_rate": 0.0003266438757921143, "loss": 1.6919, "step": 42533 }, { "epoch": 1.42, "grad_norm": 0.5118188858032227, "learning_rate": 0.0003266334649628242, "loss": 1.7519, "step": 42534 }, { "epoch": 1.42, "grad_norm": 0.5141677856445312, "learning_rate": 0.00032662305410120513, "loss": 1.7935, "step": 42535 }, { "epoch": 1.42, "grad_norm": 0.5029793381690979, "learning_rate": 0.0003266126432072696, "loss": 1.7205, "step": 42536 }, { "epoch": 1.42, "grad_norm": 0.49866825342178345, "learning_rate": 0.0003266022322810302, "loss": 1.8528, "step": 42537 }, { "epoch": 1.42, "grad_norm": 0.5014488697052002, "learning_rate": 0.0003265918213224996, "loss": 1.7606, "step": 42538 }, { "epoch": 1.42, "grad_norm": 0.48931360244750977, "learning_rate": 0.0003265814103316904, "loss": 1.7596, "step": 42539 }, { "epoch": 1.42, "grad_norm": 0.5015990138053894, "learning_rate": 0.0003265709993086153, "loss": 1.8471, "step": 42540 }, { "epoch": 1.42, "grad_norm": 0.5093890428543091, "learning_rate": 0.000326560588253287, "loss": 1.8451, "step": 42541 }, { "epoch": 1.42, "grad_norm": 0.524509608745575, "learning_rate": 0.00032655017716571796, "loss": 1.7963, "step": 42542 }, { "epoch": 1.42, "grad_norm": 0.5240343809127808, "learning_rate": 0.000326539766045921, "loss": 1.7548, "step": 42543 }, { "epoch": 1.42, "grad_norm": 0.5024067759513855, "learning_rate": 0.0003265293548939086, "loss": 1.81, "step": 42544 }, { "epoch": 1.42, "grad_norm": 0.512201726436615, "learning_rate": 0.00032651894370969354, "loss": 1.8272, "step": 42545 }, { "epoch": 1.42, "grad_norm": 0.4965716004371643, "learning_rate": 0.0003265085324932884, "loss": 1.8269, "step": 42546 }, { "epoch": 1.42, "grad_norm": 0.48901763558387756, "learning_rate": 0.0003264981212447057, "loss": 1.7893, "step": 42547 }, { "epoch": 1.42, "grad_norm": 0.5071969628334045, "learning_rate": 0.0003264877099639583, "loss": 1.7524, "step": 42548 }, { "epoch": 1.42, "grad_norm": 0.5153542160987854, "learning_rate": 0.0003264772986510587, "loss": 1.765, "step": 42549 }, { "epoch": 1.42, "grad_norm": 0.7234631180763245, "learning_rate": 0.00032646688730601954, "loss": 1.7486, "step": 42550 }, { "epoch": 1.42, "grad_norm": 0.5025554299354553, "learning_rate": 0.00032645647592885344, "loss": 1.7535, "step": 42551 }, { "epoch": 1.42, "grad_norm": 0.520471453666687, "learning_rate": 0.0003264460645195731, "loss": 1.7595, "step": 42552 }, { "epoch": 1.42, "grad_norm": 0.5149229764938354, "learning_rate": 0.00032643565307819116, "loss": 1.7929, "step": 42553 }, { "epoch": 1.42, "grad_norm": 0.49926015734672546, "learning_rate": 0.00032642524160472026, "loss": 1.754, "step": 42554 }, { "epoch": 1.42, "grad_norm": 0.5190818309783936, "learning_rate": 0.000326414830099173, "loss": 1.7645, "step": 42555 }, { "epoch": 1.42, "grad_norm": 0.5155692100524902, "learning_rate": 0.0003264044185615619, "loss": 1.7166, "step": 42556 }, { "epoch": 1.42, "grad_norm": 0.5183842182159424, "learning_rate": 0.0003263940069918999, "loss": 1.771, "step": 42557 }, { "epoch": 1.42, "grad_norm": 0.5120298266410828, "learning_rate": 0.00032638359539019935, "loss": 1.7965, "step": 42558 }, { "epoch": 1.42, "grad_norm": 0.5376201272010803, "learning_rate": 0.00032637318375647306, "loss": 1.7085, "step": 42559 }, { "epoch": 1.42, "grad_norm": 0.5133659839630127, "learning_rate": 0.00032636277209073354, "loss": 1.7456, "step": 42560 }, { "epoch": 1.42, "grad_norm": 0.5000141859054565, "learning_rate": 0.0003263523603929936, "loss": 1.7476, "step": 42561 }, { "epoch": 1.42, "grad_norm": 0.502836287021637, "learning_rate": 0.00032634194866326574, "loss": 1.7752, "step": 42562 }, { "epoch": 1.42, "grad_norm": 0.5176142454147339, "learning_rate": 0.0003263315369015626, "loss": 1.7254, "step": 42563 }, { "epoch": 1.42, "grad_norm": 0.49516063928604126, "learning_rate": 0.00032632112510789694, "loss": 1.6663, "step": 42564 }, { "epoch": 1.42, "grad_norm": 0.4971161484718323, "learning_rate": 0.00032631071328228123, "loss": 1.7184, "step": 42565 }, { "epoch": 1.42, "grad_norm": 0.5101718902587891, "learning_rate": 0.0003263003014247282, "loss": 1.768, "step": 42566 }, { "epoch": 1.42, "grad_norm": 0.48506638407707214, "learning_rate": 0.0003262898895352505, "loss": 1.6974, "step": 42567 }, { "epoch": 1.42, "grad_norm": 0.4981527030467987, "learning_rate": 0.0003262794776138608, "loss": 1.7187, "step": 42568 }, { "epoch": 1.42, "grad_norm": 0.5118383765220642, "learning_rate": 0.00032626906566057156, "loss": 1.7257, "step": 42569 }, { "epoch": 1.42, "grad_norm": 0.5186154842376709, "learning_rate": 0.0003262586536753956, "loss": 1.7816, "step": 42570 }, { "epoch": 1.42, "grad_norm": 0.49052056670188904, "learning_rate": 0.0003262482416583455, "loss": 1.739, "step": 42571 }, { "epoch": 1.42, "grad_norm": 0.522716760635376, "learning_rate": 0.00032623782960943394, "loss": 1.7543, "step": 42572 }, { "epoch": 1.42, "grad_norm": 0.5025148987770081, "learning_rate": 0.0003262274175286736, "loss": 1.7796, "step": 42573 }, { "epoch": 1.42, "grad_norm": 0.49560052156448364, "learning_rate": 0.0003262170054160768, "loss": 1.8154, "step": 42574 }, { "epoch": 1.42, "grad_norm": 0.5123473405838013, "learning_rate": 0.0003262065932716566, "loss": 1.7702, "step": 42575 }, { "epoch": 1.42, "grad_norm": 0.49946779012680054, "learning_rate": 0.00032619618109542544, "loss": 1.8362, "step": 42576 }, { "epoch": 1.42, "grad_norm": 0.5156360268592834, "learning_rate": 0.0003261857688873959, "loss": 1.7703, "step": 42577 }, { "epoch": 1.42, "grad_norm": 0.5085107088088989, "learning_rate": 0.00032617535664758077, "loss": 1.7321, "step": 42578 }, { "epoch": 1.42, "grad_norm": 0.4943849742412567, "learning_rate": 0.0003261649443759926, "loss": 1.6786, "step": 42579 }, { "epoch": 1.42, "grad_norm": 0.5287729501724243, "learning_rate": 0.0003261545320726441, "loss": 1.8188, "step": 42580 }, { "epoch": 1.42, "grad_norm": 0.4927739202976227, "learning_rate": 0.0003261441197375477, "loss": 1.7852, "step": 42581 }, { "epoch": 1.42, "grad_norm": 0.5057147741317749, "learning_rate": 0.00032613370737071624, "loss": 1.8489, "step": 42582 }, { "epoch": 1.42, "grad_norm": 0.5168269276618958, "learning_rate": 0.00032612329497216236, "loss": 1.8176, "step": 42583 }, { "epoch": 1.42, "grad_norm": 0.5032375454902649, "learning_rate": 0.0003261128825418986, "loss": 1.8168, "step": 42584 }, { "epoch": 1.42, "grad_norm": 0.5062945485115051, "learning_rate": 0.00032610247007993775, "loss": 1.7968, "step": 42585 }, { "epoch": 1.42, "grad_norm": 0.5305871367454529, "learning_rate": 0.00032609205758629223, "loss": 1.7856, "step": 42586 }, { "epoch": 1.42, "grad_norm": 0.49266859889030457, "learning_rate": 0.0003260816450609749, "loss": 1.7173, "step": 42587 }, { "epoch": 1.42, "grad_norm": 0.5014097690582275, "learning_rate": 0.00032607123250399825, "loss": 1.7411, "step": 42588 }, { "epoch": 1.42, "grad_norm": 0.5032756328582764, "learning_rate": 0.0003260608199153749, "loss": 1.6964, "step": 42589 }, { "epoch": 1.42, "grad_norm": 0.5044177770614624, "learning_rate": 0.0003260504072951176, "loss": 1.8384, "step": 42590 }, { "epoch": 1.42, "grad_norm": 0.48815488815307617, "learning_rate": 0.00032603999464323895, "loss": 1.8325, "step": 42591 }, { "epoch": 1.42, "grad_norm": 0.5493722558021545, "learning_rate": 0.00032602958195975163, "loss": 1.7827, "step": 42592 }, { "epoch": 1.42, "grad_norm": 0.5251383185386658, "learning_rate": 0.0003260191692446682, "loss": 1.8184, "step": 42593 }, { "epoch": 1.42, "grad_norm": 0.5147895812988281, "learning_rate": 0.0003260087564980013, "loss": 1.784, "step": 42594 }, { "epoch": 1.42, "grad_norm": 0.4865708649158478, "learning_rate": 0.0003259983437197636, "loss": 1.6693, "step": 42595 }, { "epoch": 1.42, "grad_norm": 0.49266213178634644, "learning_rate": 0.0003259879309099678, "loss": 1.7313, "step": 42596 }, { "epoch": 1.42, "grad_norm": 0.49937722086906433, "learning_rate": 0.00032597751806862644, "loss": 1.7332, "step": 42597 }, { "epoch": 1.42, "grad_norm": 0.5089913010597229, "learning_rate": 0.0003259671051957522, "loss": 1.6878, "step": 42598 }, { "epoch": 1.42, "grad_norm": 0.5027115345001221, "learning_rate": 0.0003259566922913577, "loss": 1.8043, "step": 42599 }, { "epoch": 1.42, "grad_norm": 0.49483931064605713, "learning_rate": 0.0003259462793554557, "loss": 1.7923, "step": 42600 }, { "epoch": 1.42, "grad_norm": 0.506909191608429, "learning_rate": 0.00032593586638805864, "loss": 1.7795, "step": 42601 }, { "epoch": 1.42, "grad_norm": 0.49991604685783386, "learning_rate": 0.0003259254533891793, "loss": 1.7715, "step": 42602 }, { "epoch": 1.42, "grad_norm": 0.5006911158561707, "learning_rate": 0.0003259150403588303, "loss": 1.7556, "step": 42603 }, { "epoch": 1.42, "grad_norm": 0.5096350312232971, "learning_rate": 0.00032590462729702413, "loss": 1.7629, "step": 42604 }, { "epoch": 1.42, "grad_norm": 0.5135213136672974, "learning_rate": 0.0003258942142037737, "loss": 1.7761, "step": 42605 }, { "epoch": 1.42, "grad_norm": 0.5102107524871826, "learning_rate": 0.0003258838010790914, "loss": 1.7163, "step": 42606 }, { "epoch": 1.42, "grad_norm": 0.501888632774353, "learning_rate": 0.0003258733879229901, "loss": 1.7666, "step": 42607 }, { "epoch": 1.42, "grad_norm": 0.5145360827445984, "learning_rate": 0.0003258629747354823, "loss": 1.7536, "step": 42608 }, { "epoch": 1.42, "grad_norm": 0.48836734890937805, "learning_rate": 0.0003258525615165806, "loss": 1.7862, "step": 42609 }, { "epoch": 1.42, "grad_norm": 0.49759796261787415, "learning_rate": 0.0003258421482662977, "loss": 1.7841, "step": 42610 }, { "epoch": 1.42, "grad_norm": 0.4941261112689972, "learning_rate": 0.00032583173498464623, "loss": 1.7624, "step": 42611 }, { "epoch": 1.42, "grad_norm": 0.5174230933189392, "learning_rate": 0.00032582132167163893, "loss": 1.8042, "step": 42612 }, { "epoch": 1.42, "grad_norm": 0.5005054473876953, "learning_rate": 0.0003258109083272882, "loss": 1.8443, "step": 42613 }, { "epoch": 1.42, "grad_norm": 0.5033438801765442, "learning_rate": 0.00032580049495160694, "loss": 1.6546, "step": 42614 }, { "epoch": 1.42, "grad_norm": 0.5103315114974976, "learning_rate": 0.00032579008154460766, "loss": 1.8135, "step": 42615 }, { "epoch": 1.42, "grad_norm": 0.48860394954681396, "learning_rate": 0.00032577966810630303, "loss": 1.7208, "step": 42616 }, { "epoch": 1.42, "grad_norm": 0.49966534972190857, "learning_rate": 0.0003257692546367057, "loss": 1.7554, "step": 42617 }, { "epoch": 1.42, "grad_norm": 0.5129761099815369, "learning_rate": 0.0003257588411358283, "loss": 1.8354, "step": 42618 }, { "epoch": 1.42, "grad_norm": 1.3378385305404663, "learning_rate": 0.00032574842760368336, "loss": 1.804, "step": 42619 }, { "epoch": 1.42, "grad_norm": 0.48650065064430237, "learning_rate": 0.0003257380140402837, "loss": 1.7381, "step": 42620 }, { "epoch": 1.42, "grad_norm": 0.5355486869812012, "learning_rate": 0.00032572760044564193, "loss": 1.7832, "step": 42621 }, { "epoch": 1.42, "grad_norm": 0.518083393573761, "learning_rate": 0.0003257171868197706, "loss": 1.7754, "step": 42622 }, { "epoch": 1.42, "grad_norm": 0.5157967805862427, "learning_rate": 0.0003257067731626824, "loss": 1.7307, "step": 42623 }, { "epoch": 1.42, "grad_norm": 0.5043070316314697, "learning_rate": 0.0003256963594743899, "loss": 1.7744, "step": 42624 }, { "epoch": 1.42, "grad_norm": 0.4992252290248871, "learning_rate": 0.00032568594575490585, "loss": 1.6968, "step": 42625 }, { "epoch": 1.42, "grad_norm": 0.49910256266593933, "learning_rate": 0.00032567553200424284, "loss": 1.8109, "step": 42626 }, { "epoch": 1.42, "grad_norm": 0.5181695222854614, "learning_rate": 0.0003256651182224135, "loss": 1.7489, "step": 42627 }, { "epoch": 1.42, "grad_norm": 0.4870377480983734, "learning_rate": 0.0003256547044094306, "loss": 1.7365, "step": 42628 }, { "epoch": 1.42, "grad_norm": 0.5211617350578308, "learning_rate": 0.0003256442905653065, "loss": 1.7272, "step": 42629 }, { "epoch": 1.42, "grad_norm": 0.4891851842403412, "learning_rate": 0.0003256338766900542, "loss": 1.7169, "step": 42630 }, { "epoch": 1.42, "grad_norm": 0.5028887391090393, "learning_rate": 0.00032562346278368603, "loss": 1.7297, "step": 42631 }, { "epoch": 1.42, "grad_norm": 0.5094788074493408, "learning_rate": 0.0003256130488462147, "loss": 1.8178, "step": 42632 }, { "epoch": 1.42, "grad_norm": 0.535045862197876, "learning_rate": 0.00032560263487765303, "loss": 1.7687, "step": 42633 }, { "epoch": 1.42, "grad_norm": 0.5137315392494202, "learning_rate": 0.0003255922208780134, "loss": 1.737, "step": 42634 }, { "epoch": 1.42, "grad_norm": 0.5070446133613586, "learning_rate": 0.0003255818068473087, "loss": 1.7214, "step": 42635 }, { "epoch": 1.42, "grad_norm": 0.49876096844673157, "learning_rate": 0.0003255713927855514, "loss": 1.7074, "step": 42636 }, { "epoch": 1.42, "grad_norm": 0.4955005347728729, "learning_rate": 0.0003255609786927542, "loss": 1.7948, "step": 42637 }, { "epoch": 1.42, "grad_norm": 0.48775070905685425, "learning_rate": 0.00032555056456892977, "loss": 1.7564, "step": 42638 }, { "epoch": 1.42, "grad_norm": 0.5184857249259949, "learning_rate": 0.00032554015041409066, "loss": 1.8805, "step": 42639 }, { "epoch": 1.42, "grad_norm": 0.49926286935806274, "learning_rate": 0.0003255297362282497, "loss": 1.7571, "step": 42640 }, { "epoch": 1.42, "grad_norm": 0.5177671313285828, "learning_rate": 0.00032551932201141924, "loss": 1.783, "step": 42641 }, { "epoch": 1.42, "grad_norm": 0.4935275912284851, "learning_rate": 0.0003255089077636122, "loss": 1.7403, "step": 42642 }, { "epoch": 1.42, "grad_norm": 0.5078969597816467, "learning_rate": 0.000325498493484841, "loss": 1.8113, "step": 42643 }, { "epoch": 1.42, "grad_norm": 0.4997211992740631, "learning_rate": 0.00032548807917511847, "loss": 1.6884, "step": 42644 }, { "epoch": 1.42, "grad_norm": 0.5076638460159302, "learning_rate": 0.0003254776648344571, "loss": 1.8109, "step": 42645 }, { "epoch": 1.42, "grad_norm": 0.5138543844223022, "learning_rate": 0.00032546725046286965, "loss": 1.8214, "step": 42646 }, { "epoch": 1.42, "grad_norm": 0.4928782880306244, "learning_rate": 0.00032545683606036873, "loss": 1.7796, "step": 42647 }, { "epoch": 1.42, "grad_norm": 0.5143358707427979, "learning_rate": 0.00032544642162696697, "loss": 1.7876, "step": 42648 }, { "epoch": 1.42, "grad_norm": 0.5016205906867981, "learning_rate": 0.00032543600716267694, "loss": 1.7885, "step": 42649 }, { "epoch": 1.42, "grad_norm": 0.4987947940826416, "learning_rate": 0.0003254255926675113, "loss": 1.7355, "step": 42650 }, { "epoch": 1.42, "grad_norm": 0.5236919522285461, "learning_rate": 0.00032541517814148286, "loss": 1.7992, "step": 42651 }, { "epoch": 1.42, "grad_norm": 0.5146994590759277, "learning_rate": 0.0003254047635846041, "loss": 1.8599, "step": 42652 }, { "epoch": 1.42, "grad_norm": 0.5303479433059692, "learning_rate": 0.0003253943489968877, "loss": 1.8017, "step": 42653 }, { "epoch": 1.42, "grad_norm": 0.47810372710227966, "learning_rate": 0.0003253839343783463, "loss": 1.7222, "step": 42654 }, { "epoch": 1.42, "grad_norm": 0.5102317929267883, "learning_rate": 0.0003253735197289925, "loss": 1.7335, "step": 42655 }, { "epoch": 1.42, "grad_norm": 0.5025428533554077, "learning_rate": 0.00032536310504883905, "loss": 1.746, "step": 42656 }, { "epoch": 1.42, "grad_norm": 0.5138733983039856, "learning_rate": 0.00032535269033789844, "loss": 1.7036, "step": 42657 }, { "epoch": 1.42, "grad_norm": 0.5067830681800842, "learning_rate": 0.0003253422755961835, "loss": 1.8328, "step": 42658 }, { "epoch": 1.42, "grad_norm": 0.5203211903572083, "learning_rate": 0.00032533186082370663, "loss": 1.6763, "step": 42659 }, { "epoch": 1.42, "grad_norm": 0.505034863948822, "learning_rate": 0.0003253214460204808, "loss": 1.7505, "step": 42660 }, { "epoch": 1.42, "grad_norm": 0.5044706463813782, "learning_rate": 0.00032531103118651827, "loss": 1.7558, "step": 42661 }, { "epoch": 1.42, "grad_norm": 0.4955599308013916, "learning_rate": 0.0003253006163218321, "loss": 1.7136, "step": 42662 }, { "epoch": 1.42, "grad_norm": 0.5040966272354126, "learning_rate": 0.00032529020142643456, "loss": 1.7333, "step": 42663 }, { "epoch": 1.42, "grad_norm": 0.509107232093811, "learning_rate": 0.0003252797865003384, "loss": 1.7713, "step": 42664 }, { "epoch": 1.42, "grad_norm": 0.5120027661323547, "learning_rate": 0.0003252693715435565, "loss": 1.7713, "step": 42665 }, { "epoch": 1.42, "grad_norm": 0.4982890486717224, "learning_rate": 0.00032525895655610113, "loss": 1.7478, "step": 42666 }, { "epoch": 1.42, "grad_norm": 0.49700671434402466, "learning_rate": 0.0003252485415379852, "loss": 1.7082, "step": 42667 }, { "epoch": 1.42, "grad_norm": 0.4953548014163971, "learning_rate": 0.00032523812648922116, "loss": 1.7234, "step": 42668 }, { "epoch": 1.42, "grad_norm": 0.513307511806488, "learning_rate": 0.0003252277114098218, "loss": 1.765, "step": 42669 }, { "epoch": 1.42, "grad_norm": 0.47976404428482056, "learning_rate": 0.00032521729629979973, "loss": 1.7976, "step": 42670 }, { "epoch": 1.42, "grad_norm": 0.5087364912033081, "learning_rate": 0.0003252068811591676, "loss": 1.8124, "step": 42671 }, { "epoch": 1.42, "grad_norm": 0.5229933261871338, "learning_rate": 0.00032519646598793805, "loss": 1.7652, "step": 42672 }, { "epoch": 1.42, "grad_norm": 0.5106866955757141, "learning_rate": 0.0003251860507861236, "loss": 1.7504, "step": 42673 }, { "epoch": 1.42, "grad_norm": 0.49295708537101746, "learning_rate": 0.0003251756355537371, "loss": 1.7299, "step": 42674 }, { "epoch": 1.42, "grad_norm": 0.48693639039993286, "learning_rate": 0.000325165220290791, "loss": 1.7506, "step": 42675 }, { "epoch": 1.42, "grad_norm": 0.5076044201850891, "learning_rate": 0.0003251548049972981, "loss": 1.7468, "step": 42676 }, { "epoch": 1.42, "grad_norm": 0.5038765668869019, "learning_rate": 0.00032514438967327095, "loss": 1.8764, "step": 42677 }, { "epoch": 1.42, "grad_norm": 0.5082601308822632, "learning_rate": 0.0003251339743187222, "loss": 1.7381, "step": 42678 }, { "epoch": 1.42, "grad_norm": 0.5042800307273865, "learning_rate": 0.00032512355893366457, "loss": 1.7783, "step": 42679 }, { "epoch": 1.42, "grad_norm": 0.5076532959938049, "learning_rate": 0.0003251131435181106, "loss": 1.7325, "step": 42680 }, { "epoch": 1.42, "grad_norm": 0.49459320306777954, "learning_rate": 0.000325102728072073, "loss": 1.7769, "step": 42681 }, { "epoch": 1.42, "grad_norm": 0.48868608474731445, "learning_rate": 0.0003250923125955643, "loss": 1.732, "step": 42682 }, { "epoch": 1.42, "grad_norm": 0.5061526894569397, "learning_rate": 0.00032508189708859727, "loss": 1.7946, "step": 42683 }, { "epoch": 1.42, "grad_norm": 0.5128509998321533, "learning_rate": 0.0003250714815511845, "loss": 1.7392, "step": 42684 }, { "epoch": 1.42, "grad_norm": 0.5219408869743347, "learning_rate": 0.0003250610659833387, "loss": 1.7974, "step": 42685 }, { "epoch": 1.42, "grad_norm": 0.5064362287521362, "learning_rate": 0.00032505065038507247, "loss": 1.7637, "step": 42686 }, { "epoch": 1.42, "grad_norm": 0.5196133255958557, "learning_rate": 0.0003250402347563984, "loss": 1.772, "step": 42687 }, { "epoch": 1.42, "grad_norm": 0.5352165699005127, "learning_rate": 0.00032502981909732914, "loss": 1.8332, "step": 42688 }, { "epoch": 1.42, "grad_norm": 0.524519681930542, "learning_rate": 0.00032501940340787743, "loss": 1.7514, "step": 42689 }, { "epoch": 1.42, "grad_norm": 0.5047342777252197, "learning_rate": 0.00032500898768805586, "loss": 1.7818, "step": 42690 }, { "epoch": 1.42, "grad_norm": 0.5086944699287415, "learning_rate": 0.000324998571937877, "loss": 1.6722, "step": 42691 }, { "epoch": 1.42, "grad_norm": 0.4978518486022949, "learning_rate": 0.00032498815615735366, "loss": 1.7539, "step": 42692 }, { "epoch": 1.42, "grad_norm": 0.5130400657653809, "learning_rate": 0.00032497774034649825, "loss": 1.77, "step": 42693 }, { "epoch": 1.42, "grad_norm": 0.5091334581375122, "learning_rate": 0.0003249673245053236, "loss": 1.6855, "step": 42694 }, { "epoch": 1.42, "grad_norm": 0.4978337585926056, "learning_rate": 0.00032495690863384237, "loss": 1.7505, "step": 42695 }, { "epoch": 1.42, "grad_norm": 0.5011053085327148, "learning_rate": 0.00032494649273206696, "loss": 1.7252, "step": 42696 }, { "epoch": 1.42, "grad_norm": 0.5152105689048767, "learning_rate": 0.00032493607680001037, "loss": 1.7276, "step": 42697 }, { "epoch": 1.42, "grad_norm": 0.5129848718643188, "learning_rate": 0.0003249256608376849, "loss": 1.7593, "step": 42698 }, { "epoch": 1.42, "grad_norm": 0.49303802847862244, "learning_rate": 0.00032491524484510345, "loss": 1.7115, "step": 42699 }, { "epoch": 1.42, "grad_norm": 0.5096451044082642, "learning_rate": 0.00032490482882227854, "loss": 1.8189, "step": 42700 }, { "epoch": 1.42, "grad_norm": 0.51301109790802, "learning_rate": 0.00032489441276922277, "loss": 1.7218, "step": 42701 }, { "epoch": 1.42, "grad_norm": 0.49862611293792725, "learning_rate": 0.00032488399668594904, "loss": 1.7903, "step": 42702 }, { "epoch": 1.42, "grad_norm": 0.52059006690979, "learning_rate": 0.00032487358057246954, "loss": 1.7787, "step": 42703 }, { "epoch": 1.42, "grad_norm": 0.5003048777580261, "learning_rate": 0.00032486316442879734, "loss": 1.7082, "step": 42704 }, { "epoch": 1.42, "grad_norm": 0.5071647763252258, "learning_rate": 0.0003248527482549449, "loss": 1.8114, "step": 42705 }, { "epoch": 1.42, "grad_norm": 0.5105231404304504, "learning_rate": 0.00032484233205092494, "loss": 1.79, "step": 42706 }, { "epoch": 1.42, "grad_norm": 0.5102118849754333, "learning_rate": 0.00032483191581674994, "loss": 1.7217, "step": 42707 }, { "epoch": 1.42, "grad_norm": 0.5208733677864075, "learning_rate": 0.00032482149955243266, "loss": 1.7769, "step": 42708 }, { "epoch": 1.42, "grad_norm": 0.4969553053379059, "learning_rate": 0.0003248110832579859, "loss": 1.7912, "step": 42709 }, { "epoch": 1.42, "grad_norm": 0.5000815391540527, "learning_rate": 0.000324800666933422, "loss": 1.7722, "step": 42710 }, { "epoch": 1.42, "grad_norm": 0.5120306611061096, "learning_rate": 0.0003247902505787537, "loss": 1.7849, "step": 42711 }, { "epoch": 1.42, "grad_norm": 0.5219756364822388, "learning_rate": 0.0003247798341939938, "loss": 1.7772, "step": 42712 }, { "epoch": 1.42, "grad_norm": 0.49382930994033813, "learning_rate": 0.0003247694177791548, "loss": 1.7235, "step": 42713 }, { "epoch": 1.42, "grad_norm": 0.5073055028915405, "learning_rate": 0.00032475900133424927, "loss": 1.8028, "step": 42714 }, { "epoch": 1.42, "grad_norm": 0.5147212743759155, "learning_rate": 0.00032474858485929015, "loss": 1.7782, "step": 42715 }, { "epoch": 1.42, "grad_norm": 0.5258159041404724, "learning_rate": 0.00032473816835428974, "loss": 1.8034, "step": 42716 }, { "epoch": 1.42, "grad_norm": 0.5212562084197998, "learning_rate": 0.00032472775181926095, "loss": 1.8348, "step": 42717 }, { "epoch": 1.42, "grad_norm": 0.5116384029388428, "learning_rate": 0.00032471733525421624, "loss": 1.7314, "step": 42718 }, { "epoch": 1.42, "grad_norm": 0.5117301344871521, "learning_rate": 0.0003247069186591683, "loss": 1.7862, "step": 42719 }, { "epoch": 1.42, "grad_norm": 0.48985499143600464, "learning_rate": 0.00032469650203412995, "loss": 1.8005, "step": 42720 }, { "epoch": 1.42, "grad_norm": 0.47710949182510376, "learning_rate": 0.0003246860853791135, "loss": 1.7134, "step": 42721 }, { "epoch": 1.42, "grad_norm": 0.5002572536468506, "learning_rate": 0.0003246756686941319, "loss": 1.7244, "step": 42722 }, { "epoch": 1.42, "grad_norm": 0.49041202664375305, "learning_rate": 0.00032466525197919757, "loss": 1.6981, "step": 42723 }, { "epoch": 1.42, "grad_norm": 0.491714209318161, "learning_rate": 0.0003246548352343234, "loss": 1.7902, "step": 42724 }, { "epoch": 1.42, "grad_norm": 0.5024543404579163, "learning_rate": 0.00032464441845952177, "loss": 1.7443, "step": 42725 }, { "epoch": 1.42, "grad_norm": 0.5047739148139954, "learning_rate": 0.0003246340016548055, "loss": 1.8106, "step": 42726 }, { "epoch": 1.42, "grad_norm": 0.5016731023788452, "learning_rate": 0.00032462358482018724, "loss": 1.8057, "step": 42727 }, { "epoch": 1.42, "grad_norm": 0.5311446785926819, "learning_rate": 0.00032461316795567944, "loss": 1.7827, "step": 42728 }, { "epoch": 1.42, "grad_norm": 0.5064486861228943, "learning_rate": 0.000324602751061295, "loss": 1.8201, "step": 42729 }, { "epoch": 1.42, "grad_norm": 0.49748748540878296, "learning_rate": 0.0003245923341370464, "loss": 1.7871, "step": 42730 }, { "epoch": 1.42, "grad_norm": 0.48565179109573364, "learning_rate": 0.00032458191718294635, "loss": 1.7555, "step": 42731 }, { "epoch": 1.42, "grad_norm": 0.500408947467804, "learning_rate": 0.00032457150019900744, "loss": 1.7363, "step": 42732 }, { "epoch": 1.42, "grad_norm": 0.4895938038825989, "learning_rate": 0.0003245610831852423, "loss": 1.7369, "step": 42733 }, { "epoch": 1.42, "grad_norm": 0.5027567148208618, "learning_rate": 0.00032455066614166375, "loss": 1.7518, "step": 42734 }, { "epoch": 1.42, "grad_norm": 0.5081238746643066, "learning_rate": 0.0003245402490682842, "loss": 1.7404, "step": 42735 }, { "epoch": 1.42, "grad_norm": 0.5140047073364258, "learning_rate": 0.0003245298319651165, "loss": 1.836, "step": 42736 }, { "epoch": 1.42, "grad_norm": 0.5160909295082092, "learning_rate": 0.00032451941483217306, "loss": 1.7638, "step": 42737 }, { "epoch": 1.42, "grad_norm": 0.4955425262451172, "learning_rate": 0.0003245089976694668, "loss": 1.7867, "step": 42738 }, { "epoch": 1.42, "grad_norm": 0.49972036480903625, "learning_rate": 0.0003244985804770101, "loss": 1.7588, "step": 42739 }, { "epoch": 1.42, "grad_norm": 0.5226346850395203, "learning_rate": 0.0003244881632548159, "loss": 1.7685, "step": 42740 }, { "epoch": 1.42, "grad_norm": 0.5097349882125854, "learning_rate": 0.0003244777460028966, "loss": 1.7639, "step": 42741 }, { "epoch": 1.42, "grad_norm": 0.5074728727340698, "learning_rate": 0.0003244673287212649, "loss": 1.7863, "step": 42742 }, { "epoch": 1.42, "grad_norm": 0.4925954341888428, "learning_rate": 0.00032445691140993346, "loss": 1.776, "step": 42743 }, { "epoch": 1.42, "grad_norm": 0.4943658709526062, "learning_rate": 0.00032444649406891495, "loss": 1.7734, "step": 42744 }, { "epoch": 1.42, "grad_norm": 0.4939497709274292, "learning_rate": 0.00032443607669822204, "loss": 1.7889, "step": 42745 }, { "epoch": 1.42, "grad_norm": 0.5171350836753845, "learning_rate": 0.0003244256592978672, "loss": 1.7488, "step": 42746 }, { "epoch": 1.42, "grad_norm": 0.494363933801651, "learning_rate": 0.0003244152418678634, "loss": 1.8238, "step": 42747 }, { "epoch": 1.42, "grad_norm": 0.5191139578819275, "learning_rate": 0.000324404824408223, "loss": 1.7505, "step": 42748 }, { "epoch": 1.42, "grad_norm": 0.4976460635662079, "learning_rate": 0.0003243944069189587, "loss": 1.8187, "step": 42749 }, { "epoch": 1.42, "grad_norm": 0.5074586868286133, "learning_rate": 0.0003243839894000833, "loss": 1.8484, "step": 42750 }, { "epoch": 1.42, "grad_norm": 0.5080862641334534, "learning_rate": 0.0003243735718516092, "loss": 1.8179, "step": 42751 }, { "epoch": 1.42, "grad_norm": 0.5194417238235474, "learning_rate": 0.0003243631542735492, "loss": 1.7414, "step": 42752 }, { "epoch": 1.42, "grad_norm": 0.5029518604278564, "learning_rate": 0.0003243527366659159, "loss": 1.6813, "step": 42753 }, { "epoch": 1.42, "grad_norm": 0.5097128748893738, "learning_rate": 0.0003243423190287221, "loss": 1.7747, "step": 42754 }, { "epoch": 1.42, "grad_norm": 0.5061520934104919, "learning_rate": 0.00032433190136198014, "loss": 1.7537, "step": 42755 }, { "epoch": 1.42, "grad_norm": 0.48306214809417725, "learning_rate": 0.0003243214836657029, "loss": 1.7801, "step": 42756 }, { "epoch": 1.42, "grad_norm": 0.5081359148025513, "learning_rate": 0.00032431106593990304, "loss": 1.7268, "step": 42757 }, { "epoch": 1.42, "grad_norm": 0.48677903413772583, "learning_rate": 0.00032430064818459304, "loss": 1.7998, "step": 42758 }, { "epoch": 1.42, "grad_norm": 0.49421441555023193, "learning_rate": 0.0003242902303997857, "loss": 1.6901, "step": 42759 }, { "epoch": 1.42, "grad_norm": 0.5092620253562927, "learning_rate": 0.0003242798125854936, "loss": 1.7995, "step": 42760 }, { "epoch": 1.42, "grad_norm": 0.5140396356582642, "learning_rate": 0.00032426939474172923, "loss": 1.7826, "step": 42761 }, { "epoch": 1.42, "grad_norm": 0.4827508330345154, "learning_rate": 0.0003242589768685056, "loss": 1.7824, "step": 42762 }, { "epoch": 1.42, "grad_norm": 0.5027121305465698, "learning_rate": 0.00032424855896583495, "loss": 1.8203, "step": 42763 }, { "epoch": 1.42, "grad_norm": 0.5008503794670105, "learning_rate": 0.0003242381410337303, "loss": 1.8063, "step": 42764 }, { "epoch": 1.42, "grad_norm": 0.48686811327934265, "learning_rate": 0.00032422772307220407, "loss": 1.7933, "step": 42765 }, { "epoch": 1.42, "grad_norm": 0.5275733470916748, "learning_rate": 0.0003242173050812689, "loss": 1.8502, "step": 42766 }, { "epoch": 1.42, "grad_norm": 0.5131135582923889, "learning_rate": 0.0003242068870609375, "loss": 1.7739, "step": 42767 }, { "epoch": 1.42, "grad_norm": 0.5318607091903687, "learning_rate": 0.00032419646901122256, "loss": 1.7004, "step": 42768 }, { "epoch": 1.42, "grad_norm": 0.4919991195201874, "learning_rate": 0.00032418605093213656, "loss": 1.7747, "step": 42769 }, { "epoch": 1.42, "grad_norm": 0.490940660238266, "learning_rate": 0.00032417563282369243, "loss": 1.8007, "step": 42770 }, { "epoch": 1.42, "grad_norm": 0.49996650218963623, "learning_rate": 0.00032416521468590253, "loss": 1.7602, "step": 42771 }, { "epoch": 1.42, "grad_norm": 0.5291176438331604, "learning_rate": 0.00032415479651877964, "loss": 1.7249, "step": 42772 }, { "epoch": 1.42, "grad_norm": 0.5065489411354065, "learning_rate": 0.0003241443783223364, "loss": 1.7538, "step": 42773 }, { "epoch": 1.42, "grad_norm": 0.5325990319252014, "learning_rate": 0.00032413396009658535, "loss": 1.7559, "step": 42774 }, { "epoch": 1.42, "grad_norm": 0.5199370384216309, "learning_rate": 0.00032412354184153936, "loss": 1.7622, "step": 42775 }, { "epoch": 1.42, "grad_norm": 0.5139045715332031, "learning_rate": 0.00032411312355721085, "loss": 1.7938, "step": 42776 }, { "epoch": 1.42, "grad_norm": 0.5076516270637512, "learning_rate": 0.00032410270524361264, "loss": 1.7711, "step": 42777 }, { "epoch": 1.42, "grad_norm": 0.5060269236564636, "learning_rate": 0.0003240922869007572, "loss": 1.8237, "step": 42778 }, { "epoch": 1.42, "grad_norm": 0.5044206976890564, "learning_rate": 0.0003240818685286574, "loss": 1.7918, "step": 42779 }, { "epoch": 1.42, "grad_norm": 0.5595517158508301, "learning_rate": 0.00032407145012732565, "loss": 1.8999, "step": 42780 }, { "epoch": 1.42, "grad_norm": 0.5175999999046326, "learning_rate": 0.0003240610316967747, "loss": 1.8079, "step": 42781 }, { "epoch": 1.42, "grad_norm": 0.5076996684074402, "learning_rate": 0.00032405061323701736, "loss": 1.8002, "step": 42782 }, { "epoch": 1.42, "grad_norm": 0.5034233927726746, "learning_rate": 0.0003240401947480659, "loss": 1.7731, "step": 42783 }, { "epoch": 1.42, "grad_norm": 0.5294690132141113, "learning_rate": 0.0003240297762299334, "loss": 1.705, "step": 42784 }, { "epoch": 1.42, "grad_norm": 0.522343099117279, "learning_rate": 0.0003240193576826321, "loss": 1.7435, "step": 42785 }, { "epoch": 1.42, "grad_norm": 0.516430139541626, "learning_rate": 0.000324008939106175, "loss": 1.789, "step": 42786 }, { "epoch": 1.42, "grad_norm": 0.49171409010887146, "learning_rate": 0.0003239985205005745, "loss": 1.8192, "step": 42787 }, { "epoch": 1.42, "grad_norm": 0.49040254950523376, "learning_rate": 0.00032398810186584333, "loss": 1.6607, "step": 42788 }, { "epoch": 1.42, "grad_norm": 0.5083937644958496, "learning_rate": 0.0003239776832019942, "loss": 1.7624, "step": 42789 }, { "epoch": 1.42, "grad_norm": 0.5076771378517151, "learning_rate": 0.0003239672645090396, "loss": 1.8027, "step": 42790 }, { "epoch": 1.42, "grad_norm": 0.5205581188201904, "learning_rate": 0.00032395684578699234, "loss": 1.7722, "step": 42791 }, { "epoch": 1.42, "grad_norm": 0.5262882113456726, "learning_rate": 0.000323946427035865, "loss": 1.8697, "step": 42792 }, { "epoch": 1.42, "grad_norm": 0.4831103980541229, "learning_rate": 0.00032393600825567016, "loss": 1.7592, "step": 42793 }, { "epoch": 1.42, "grad_norm": 0.4901029169559479, "learning_rate": 0.0003239255894464206, "loss": 1.834, "step": 42794 }, { "epoch": 1.42, "grad_norm": 0.49533435702323914, "learning_rate": 0.00032391517060812884, "loss": 1.6901, "step": 42795 }, { "epoch": 1.42, "grad_norm": 0.4972354769706726, "learning_rate": 0.0003239047517408077, "loss": 1.7267, "step": 42796 }, { "epoch": 1.42, "grad_norm": 0.49629804491996765, "learning_rate": 0.0003238943328444696, "loss": 1.7575, "step": 42797 }, { "epoch": 1.42, "grad_norm": 0.49077513813972473, "learning_rate": 0.00032388391391912735, "loss": 1.7778, "step": 42798 }, { "epoch": 1.42, "grad_norm": 0.49312660098075867, "learning_rate": 0.0003238734949647935, "loss": 1.7215, "step": 42799 }, { "epoch": 1.42, "grad_norm": 0.5037457346916199, "learning_rate": 0.0003238630759814808, "loss": 1.7788, "step": 42800 }, { "epoch": 1.42, "grad_norm": 0.506668746471405, "learning_rate": 0.0003238526569692018, "loss": 1.7181, "step": 42801 }, { "epoch": 1.42, "grad_norm": 0.4885367155075073, "learning_rate": 0.0003238422379279693, "loss": 1.7535, "step": 42802 }, { "epoch": 1.42, "grad_norm": 0.4945065677165985, "learning_rate": 0.0003238318188577957, "loss": 1.7318, "step": 42803 }, { "epoch": 1.42, "grad_norm": 0.5019940137863159, "learning_rate": 0.00032382139975869383, "loss": 1.802, "step": 42804 }, { "epoch": 1.42, "grad_norm": 0.4950771927833557, "learning_rate": 0.0003238109806306763, "loss": 1.8032, "step": 42805 }, { "epoch": 1.42, "grad_norm": 0.48921436071395874, "learning_rate": 0.0003238005614737557, "loss": 1.7791, "step": 42806 }, { "epoch": 1.42, "grad_norm": 0.5193102955818176, "learning_rate": 0.00032379014228794484, "loss": 1.7739, "step": 42807 }, { "epoch": 1.42, "grad_norm": 0.4985584616661072, "learning_rate": 0.00032377972307325606, "loss": 1.6912, "step": 42808 }, { "epoch": 1.42, "grad_norm": 0.5215358734130859, "learning_rate": 0.0003237693038297024, "loss": 1.724, "step": 42809 }, { "epoch": 1.42, "grad_norm": 0.507004976272583, "learning_rate": 0.0003237588845572962, "loss": 1.7688, "step": 42810 }, { "epoch": 1.42, "grad_norm": 0.4931854009628296, "learning_rate": 0.0003237484652560502, "loss": 1.7188, "step": 42811 }, { "epoch": 1.42, "grad_norm": 0.5219990015029907, "learning_rate": 0.0003237380459259772, "loss": 1.727, "step": 42812 }, { "epoch": 1.42, "grad_norm": 0.5167387127876282, "learning_rate": 0.00032372762656708955, "loss": 1.7632, "step": 42813 }, { "epoch": 1.42, "grad_norm": 0.5037245154380798, "learning_rate": 0.00032371720717940016, "loss": 1.6906, "step": 42814 }, { "epoch": 1.42, "grad_norm": 0.5138733386993408, "learning_rate": 0.0003237067877629215, "loss": 1.7739, "step": 42815 }, { "epoch": 1.42, "grad_norm": 0.5198879837989807, "learning_rate": 0.00032369636831766635, "loss": 1.7723, "step": 42816 }, { "epoch": 1.42, "grad_norm": 0.49837803840637207, "learning_rate": 0.00032368594884364725, "loss": 1.7244, "step": 42817 }, { "epoch": 1.42, "grad_norm": 0.508111834526062, "learning_rate": 0.00032367552934087695, "loss": 1.8297, "step": 42818 }, { "epoch": 1.42, "grad_norm": 0.5070497393608093, "learning_rate": 0.0003236651098093681, "loss": 1.8224, "step": 42819 }, { "epoch": 1.42, "grad_norm": 0.4959004521369934, "learning_rate": 0.0003236546902491331, "loss": 1.7669, "step": 42820 }, { "epoch": 1.42, "grad_norm": 0.5036136507987976, "learning_rate": 0.0003236442706601849, "loss": 1.7781, "step": 42821 }, { "epoch": 1.42, "grad_norm": 0.49810338020324707, "learning_rate": 0.0003236338510425361, "loss": 1.7126, "step": 42822 }, { "epoch": 1.42, "grad_norm": 0.4944987893104553, "learning_rate": 0.00032362343139619924, "loss": 1.7926, "step": 42823 }, { "epoch": 1.42, "grad_norm": 0.6127402782440186, "learning_rate": 0.00032361301172118695, "loss": 1.726, "step": 42824 }, { "epoch": 1.42, "grad_norm": 0.5161483883857727, "learning_rate": 0.000323602592017512, "loss": 1.7749, "step": 42825 }, { "epoch": 1.42, "grad_norm": 0.520262598991394, "learning_rate": 0.000323592172285187, "loss": 1.8651, "step": 42826 }, { "epoch": 1.42, "grad_norm": 0.5251672863960266, "learning_rate": 0.0003235817525242246, "loss": 1.6792, "step": 42827 }, { "epoch": 1.42, "grad_norm": 0.5193304419517517, "learning_rate": 0.00032357133273463735, "loss": 1.7152, "step": 42828 }, { "epoch": 1.42, "grad_norm": 0.5152590274810791, "learning_rate": 0.000323560912916438, "loss": 1.7424, "step": 42829 }, { "epoch": 1.42, "grad_norm": 0.4845668077468872, "learning_rate": 0.00032355049306963924, "loss": 1.7604, "step": 42830 }, { "epoch": 1.42, "grad_norm": 0.4968390464782715, "learning_rate": 0.0003235400731942536, "loss": 1.7701, "step": 42831 }, { "epoch": 1.43, "grad_norm": 0.5269291400909424, "learning_rate": 0.00032352965329029374, "loss": 1.7628, "step": 42832 }, { "epoch": 1.43, "grad_norm": 0.4948734939098358, "learning_rate": 0.0003235192333577723, "loss": 1.7644, "step": 42833 }, { "epoch": 1.43, "grad_norm": 0.4970422387123108, "learning_rate": 0.00032350881339670217, "loss": 1.671, "step": 42834 }, { "epoch": 1.43, "grad_norm": 0.48688825964927673, "learning_rate": 0.00032349839340709566, "loss": 1.784, "step": 42835 }, { "epoch": 1.43, "grad_norm": 0.5246914029121399, "learning_rate": 0.0003234879733889656, "loss": 1.7966, "step": 42836 }, { "epoch": 1.43, "grad_norm": 0.5369411110877991, "learning_rate": 0.00032347755334232467, "loss": 1.7415, "step": 42837 }, { "epoch": 1.43, "grad_norm": 0.5161848664283752, "learning_rate": 0.00032346713326718534, "loss": 1.6792, "step": 42838 }, { "epoch": 1.43, "grad_norm": 0.5062904357910156, "learning_rate": 0.00032345671316356045, "loss": 1.7507, "step": 42839 }, { "epoch": 1.43, "grad_norm": 0.5124797224998474, "learning_rate": 0.0003234462930314624, "loss": 1.8716, "step": 42840 }, { "epoch": 1.43, "grad_norm": 0.5143988132476807, "learning_rate": 0.0003234358728709042, "loss": 1.7237, "step": 42841 }, { "epoch": 1.43, "grad_norm": 0.4947513937950134, "learning_rate": 0.00032342545268189817, "loss": 1.7741, "step": 42842 }, { "epoch": 1.43, "grad_norm": 0.49346470832824707, "learning_rate": 0.0003234150324644572, "loss": 1.7736, "step": 42843 }, { "epoch": 1.43, "grad_norm": 0.49115777015686035, "learning_rate": 0.00032340461221859386, "loss": 1.7463, "step": 42844 }, { "epoch": 1.43, "grad_norm": 0.493488609790802, "learning_rate": 0.0003233941919443206, "loss": 1.7264, "step": 42845 }, { "epoch": 1.43, "grad_norm": 0.5014917254447937, "learning_rate": 0.0003233837716416504, "loss": 1.7043, "step": 42846 }, { "epoch": 1.43, "grad_norm": 0.5127557516098022, "learning_rate": 0.00032337335131059564, "loss": 1.6967, "step": 42847 }, { "epoch": 1.43, "grad_norm": 0.5314955115318298, "learning_rate": 0.00032336293095116907, "loss": 1.7628, "step": 42848 }, { "epoch": 1.43, "grad_norm": 0.5255258083343506, "learning_rate": 0.00032335251056338346, "loss": 1.7882, "step": 42849 }, { "epoch": 1.43, "grad_norm": 0.49764230847358704, "learning_rate": 0.0003233420901472512, "loss": 1.6911, "step": 42850 }, { "epoch": 1.43, "grad_norm": 0.5106916427612305, "learning_rate": 0.00032333166970278525, "loss": 1.7514, "step": 42851 }, { "epoch": 1.43, "grad_norm": 0.5233466029167175, "learning_rate": 0.00032332124922999797, "loss": 1.7263, "step": 42852 }, { "epoch": 1.43, "grad_norm": 0.5143274068832397, "learning_rate": 0.0003233108287289021, "loss": 1.7886, "step": 42853 }, { "epoch": 1.43, "grad_norm": 0.523094117641449, "learning_rate": 0.0003233004081995104, "loss": 1.777, "step": 42854 }, { "epoch": 1.43, "grad_norm": 0.5009047389030457, "learning_rate": 0.0003232899876418354, "loss": 1.7664, "step": 42855 }, { "epoch": 1.43, "grad_norm": 0.5259169340133667, "learning_rate": 0.00032327956705588976, "loss": 1.833, "step": 42856 }, { "epoch": 1.43, "grad_norm": 0.5215017199516296, "learning_rate": 0.00032326914644168625, "loss": 1.8429, "step": 42857 }, { "epoch": 1.43, "grad_norm": 0.4907994270324707, "learning_rate": 0.0003232587257992374, "loss": 1.7522, "step": 42858 }, { "epoch": 1.43, "grad_norm": 0.5028234720230103, "learning_rate": 0.0003232483051285558, "loss": 1.8577, "step": 42859 }, { "epoch": 1.43, "grad_norm": 0.5327627062797546, "learning_rate": 0.0003232378844296542, "loss": 1.7884, "step": 42860 }, { "epoch": 1.43, "grad_norm": 0.5061349868774414, "learning_rate": 0.00032322746370254526, "loss": 1.7653, "step": 42861 }, { "epoch": 1.43, "grad_norm": 0.5096807479858398, "learning_rate": 0.0003232170429472417, "loss": 1.7181, "step": 42862 }, { "epoch": 1.43, "grad_norm": 0.5360295176506042, "learning_rate": 0.0003232066221637559, "loss": 1.7854, "step": 42863 }, { "epoch": 1.43, "grad_norm": 0.7391184568405151, "learning_rate": 0.0003231962013521008, "loss": 1.8279, "step": 42864 }, { "epoch": 1.43, "grad_norm": 0.5028122067451477, "learning_rate": 0.0003231857805122889, "loss": 1.7247, "step": 42865 }, { "epoch": 1.43, "grad_norm": 0.503266453742981, "learning_rate": 0.00032317535964433285, "loss": 1.7801, "step": 42866 }, { "epoch": 1.43, "grad_norm": 0.827325701713562, "learning_rate": 0.0003231649387482454, "loss": 1.7716, "step": 42867 }, { "epoch": 1.43, "grad_norm": 0.5246276259422302, "learning_rate": 0.00032315451782403894, "loss": 1.7982, "step": 42868 }, { "epoch": 1.43, "grad_norm": 0.5088985562324524, "learning_rate": 0.00032314409687172655, "loss": 1.8352, "step": 42869 }, { "epoch": 1.43, "grad_norm": 0.5002318620681763, "learning_rate": 0.0003231336758913204, "loss": 1.7287, "step": 42870 }, { "epoch": 1.43, "grad_norm": 0.5300571918487549, "learning_rate": 0.0003231232548828336, "loss": 1.8519, "step": 42871 }, { "epoch": 1.43, "grad_norm": 0.502052366733551, "learning_rate": 0.00032311283384627845, "loss": 1.7687, "step": 42872 }, { "epoch": 1.43, "grad_norm": 0.5089406371116638, "learning_rate": 0.00032310241278166776, "loss": 1.8148, "step": 42873 }, { "epoch": 1.43, "grad_norm": 0.5105305314064026, "learning_rate": 0.00032309199168901423, "loss": 1.7777, "step": 42874 }, { "epoch": 1.43, "grad_norm": 0.5218160152435303, "learning_rate": 0.00032308157056833023, "loss": 1.8135, "step": 42875 }, { "epoch": 1.43, "grad_norm": 0.5077677369117737, "learning_rate": 0.0003230711494196287, "loss": 1.8029, "step": 42876 }, { "epoch": 1.43, "grad_norm": 0.5031081438064575, "learning_rate": 0.0003230607282429222, "loss": 1.6869, "step": 42877 }, { "epoch": 1.43, "grad_norm": 0.6546450257301331, "learning_rate": 0.0003230503070382234, "loss": 1.7492, "step": 42878 }, { "epoch": 1.43, "grad_norm": 0.49861574172973633, "learning_rate": 0.00032303988580554486, "loss": 1.821, "step": 42879 }, { "epoch": 1.43, "grad_norm": 0.4987827241420746, "learning_rate": 0.0003230294645448993, "loss": 1.7396, "step": 42880 }, { "epoch": 1.43, "grad_norm": 0.5350199341773987, "learning_rate": 0.0003230190432562995, "loss": 1.6949, "step": 42881 }, { "epoch": 1.43, "grad_norm": 0.4871881604194641, "learning_rate": 0.00032300862193975784, "loss": 1.7917, "step": 42882 }, { "epoch": 1.43, "grad_norm": 0.5030235648155212, "learning_rate": 0.00032299820059528714, "loss": 1.8113, "step": 42883 }, { "epoch": 1.43, "grad_norm": 0.5013732314109802, "learning_rate": 0.00032298777922289997, "loss": 1.76, "step": 42884 }, { "epoch": 1.43, "grad_norm": 0.5059465169906616, "learning_rate": 0.000322977357822609, "loss": 1.7109, "step": 42885 }, { "epoch": 1.43, "grad_norm": 0.4994508922100067, "learning_rate": 0.000322966936394427, "loss": 1.7689, "step": 42886 }, { "epoch": 1.43, "grad_norm": 0.4930339455604553, "learning_rate": 0.0003229565149383666, "loss": 1.7956, "step": 42887 }, { "epoch": 1.43, "grad_norm": 0.5056361556053162, "learning_rate": 0.0003229460934544402, "loss": 1.728, "step": 42888 }, { "epoch": 1.43, "grad_norm": 0.49467891454696655, "learning_rate": 0.00032293567194266064, "loss": 1.8, "step": 42889 }, { "epoch": 1.43, "grad_norm": 0.49814489483833313, "learning_rate": 0.0003229252504030406, "loss": 1.6741, "step": 42890 }, { "epoch": 1.43, "grad_norm": 0.5122203230857849, "learning_rate": 0.00032291482883559265, "loss": 1.7447, "step": 42891 }, { "epoch": 1.43, "grad_norm": 0.509179413318634, "learning_rate": 0.0003229044072403296, "loss": 1.7885, "step": 42892 }, { "epoch": 1.43, "grad_norm": 0.5118798017501831, "learning_rate": 0.00032289398561726374, "loss": 1.8068, "step": 42893 }, { "epoch": 1.43, "grad_norm": 0.487239271402359, "learning_rate": 0.0003228835639664082, "loss": 1.7466, "step": 42894 }, { "epoch": 1.43, "grad_norm": 0.488143652677536, "learning_rate": 0.00032287314228777515, "loss": 1.7007, "step": 42895 }, { "epoch": 1.43, "grad_norm": 0.5372353792190552, "learning_rate": 0.0003228627205813777, "loss": 1.8171, "step": 42896 }, { "epoch": 1.43, "grad_norm": 0.516404926776886, "learning_rate": 0.00032285229884722815, "loss": 1.6874, "step": 42897 }, { "epoch": 1.43, "grad_norm": 0.5064978003501892, "learning_rate": 0.0003228418770853393, "loss": 1.8373, "step": 42898 }, { "epoch": 1.43, "grad_norm": 0.5000297427177429, "learning_rate": 0.00032283145529572385, "loss": 1.7383, "step": 42899 }, { "epoch": 1.43, "grad_norm": 0.5166473984718323, "learning_rate": 0.0003228210334783942, "loss": 1.7918, "step": 42900 }, { "epoch": 1.43, "grad_norm": 0.49304693937301636, "learning_rate": 0.00032281061163336335, "loss": 1.7676, "step": 42901 }, { "epoch": 1.43, "grad_norm": 0.4967973828315735, "learning_rate": 0.00032280018976064365, "loss": 1.7915, "step": 42902 }, { "epoch": 1.43, "grad_norm": 0.516995370388031, "learning_rate": 0.00032278976786024796, "loss": 1.8461, "step": 42903 }, { "epoch": 1.43, "grad_norm": 0.5086013078689575, "learning_rate": 0.0003227793459321888, "loss": 1.8035, "step": 42904 }, { "epoch": 1.43, "grad_norm": 0.5025604367256165, "learning_rate": 0.0003227689239764789, "loss": 1.7013, "step": 42905 }, { "epoch": 1.43, "grad_norm": 0.506415843963623, "learning_rate": 0.00032275850199313096, "loss": 1.7401, "step": 42906 }, { "epoch": 1.43, "grad_norm": 0.5163410305976868, "learning_rate": 0.00032274807998215743, "loss": 1.7332, "step": 42907 }, { "epoch": 1.43, "grad_norm": 0.5058165788650513, "learning_rate": 0.0003227376579435711, "loss": 1.697, "step": 42908 }, { "epoch": 1.43, "grad_norm": 0.49636292457580566, "learning_rate": 0.00032272723587738466, "loss": 1.7651, "step": 42909 }, { "epoch": 1.43, "grad_norm": 0.5062611699104309, "learning_rate": 0.00032271681378361067, "loss": 1.8033, "step": 42910 }, { "epoch": 1.43, "grad_norm": 0.4971497058868408, "learning_rate": 0.0003227063916622619, "loss": 1.757, "step": 42911 }, { "epoch": 1.43, "grad_norm": 0.49843037128448486, "learning_rate": 0.00032269596951335075, "loss": 1.7277, "step": 42912 }, { "epoch": 1.43, "grad_norm": 0.4993593692779541, "learning_rate": 0.0003226855473368902, "loss": 1.7475, "step": 42913 }, { "epoch": 1.43, "grad_norm": 0.5365579128265381, "learning_rate": 0.0003226751251328926, "loss": 1.7121, "step": 42914 }, { "epoch": 1.43, "grad_norm": 0.4974789619445801, "learning_rate": 0.0003226647029013708, "loss": 1.8046, "step": 42915 }, { "epoch": 1.43, "grad_norm": 0.5103804469108582, "learning_rate": 0.0003226542806423374, "loss": 1.7221, "step": 42916 }, { "epoch": 1.43, "grad_norm": 0.5015627145767212, "learning_rate": 0.00032264385835580506, "loss": 1.7092, "step": 42917 }, { "epoch": 1.43, "grad_norm": 0.5043780207633972, "learning_rate": 0.00032263343604178635, "loss": 1.7848, "step": 42918 }, { "epoch": 1.43, "grad_norm": 0.5195821523666382, "learning_rate": 0.00032262301370029406, "loss": 1.7759, "step": 42919 }, { "epoch": 1.43, "grad_norm": 0.5113515257835388, "learning_rate": 0.0003226125913313407, "loss": 1.8119, "step": 42920 }, { "epoch": 1.43, "grad_norm": 0.49720922112464905, "learning_rate": 0.000322602168934939, "loss": 1.7233, "step": 42921 }, { "epoch": 1.43, "grad_norm": 0.5095683932304382, "learning_rate": 0.00032259174651110163, "loss": 1.7569, "step": 42922 }, { "epoch": 1.43, "grad_norm": 0.5072577595710754, "learning_rate": 0.0003225813240598412, "loss": 1.7645, "step": 42923 }, { "epoch": 1.43, "grad_norm": 0.5049029588699341, "learning_rate": 0.0003225709015811704, "loss": 1.8493, "step": 42924 }, { "epoch": 1.43, "grad_norm": 0.49566370248794556, "learning_rate": 0.00032256047907510175, "loss": 1.7832, "step": 42925 }, { "epoch": 1.43, "grad_norm": 0.5224854946136475, "learning_rate": 0.00032255005654164805, "loss": 1.7475, "step": 42926 }, { "epoch": 1.43, "grad_norm": 0.5098206400871277, "learning_rate": 0.00032253963398082194, "loss": 1.7858, "step": 42927 }, { "epoch": 1.43, "grad_norm": 0.4741595685482025, "learning_rate": 0.00032252921139263604, "loss": 1.7484, "step": 42928 }, { "epoch": 1.43, "grad_norm": 0.49540069699287415, "learning_rate": 0.000322518788777103, "loss": 1.8036, "step": 42929 }, { "epoch": 1.43, "grad_norm": 0.4947357177734375, "learning_rate": 0.00032250836613423534, "loss": 1.8186, "step": 42930 }, { "epoch": 1.43, "grad_norm": 0.5237926840782166, "learning_rate": 0.00032249794346404597, "loss": 1.8174, "step": 42931 }, { "epoch": 1.43, "grad_norm": 0.4973960220813751, "learning_rate": 0.0003224875207665473, "loss": 1.7193, "step": 42932 }, { "epoch": 1.43, "grad_norm": 0.5056858062744141, "learning_rate": 0.0003224770980417523, "loss": 1.7089, "step": 42933 }, { "epoch": 1.43, "grad_norm": 0.5417416095733643, "learning_rate": 0.0003224666752896732, "loss": 1.7048, "step": 42934 }, { "epoch": 1.43, "grad_norm": 0.4934947192668915, "learning_rate": 0.0003224562525103229, "loss": 1.7126, "step": 42935 }, { "epoch": 1.43, "grad_norm": 0.5173405408859253, "learning_rate": 0.00032244582970371413, "loss": 1.8478, "step": 42936 }, { "epoch": 1.43, "grad_norm": 0.4916391372680664, "learning_rate": 0.00032243540686985935, "loss": 1.7889, "step": 42937 }, { "epoch": 1.43, "grad_norm": 0.501134991645813, "learning_rate": 0.0003224249840087713, "loss": 1.8323, "step": 42938 }, { "epoch": 1.43, "grad_norm": 0.5211160182952881, "learning_rate": 0.0003224145611204627, "loss": 1.7615, "step": 42939 }, { "epoch": 1.43, "grad_norm": 0.5312032699584961, "learning_rate": 0.000322404138204946, "loss": 1.8716, "step": 42940 }, { "epoch": 1.43, "grad_norm": 0.5254204869270325, "learning_rate": 0.000322393715262234, "loss": 1.8481, "step": 42941 }, { "epoch": 1.43, "grad_norm": 0.48903220891952515, "learning_rate": 0.0003223832922923394, "loss": 1.7392, "step": 42942 }, { "epoch": 1.43, "grad_norm": 0.4997199773788452, "learning_rate": 0.00032237286929527484, "loss": 1.7865, "step": 42943 }, { "epoch": 1.43, "grad_norm": 0.5282852053642273, "learning_rate": 0.00032236244627105275, "loss": 1.7648, "step": 42944 }, { "epoch": 1.43, "grad_norm": 0.503930926322937, "learning_rate": 0.00032235202321968603, "loss": 1.7758, "step": 42945 }, { "epoch": 1.43, "grad_norm": 0.5016292333602905, "learning_rate": 0.0003223416001411872, "loss": 1.8524, "step": 42946 }, { "epoch": 1.43, "grad_norm": 0.5002126693725586, "learning_rate": 0.000322331177035569, "loss": 1.7513, "step": 42947 }, { "epoch": 1.43, "grad_norm": 0.5100067853927612, "learning_rate": 0.000322320753902844, "loss": 1.7533, "step": 42948 }, { "epoch": 1.43, "grad_norm": 0.5218064785003662, "learning_rate": 0.000322310330743025, "loss": 1.7932, "step": 42949 }, { "epoch": 1.43, "grad_norm": 0.5149039626121521, "learning_rate": 0.00032229990755612434, "loss": 1.8045, "step": 42950 }, { "epoch": 1.43, "grad_norm": 0.5068420171737671, "learning_rate": 0.0003222894843421551, "loss": 1.7571, "step": 42951 }, { "epoch": 1.43, "grad_norm": 0.4972909092903137, "learning_rate": 0.0003222790611011296, "loss": 1.7245, "step": 42952 }, { "epoch": 1.43, "grad_norm": 0.5010735392570496, "learning_rate": 0.00032226863783306063, "loss": 1.8574, "step": 42953 }, { "epoch": 1.43, "grad_norm": 0.5156004428863525, "learning_rate": 0.0003222582145379609, "loss": 1.7991, "step": 42954 }, { "epoch": 1.43, "grad_norm": 0.49240946769714355, "learning_rate": 0.0003222477912158428, "loss": 1.8103, "step": 42955 }, { "epoch": 1.43, "grad_norm": 0.5073672533035278, "learning_rate": 0.00032223736786671925, "loss": 1.8086, "step": 42956 }, { "epoch": 1.43, "grad_norm": 0.5031738877296448, "learning_rate": 0.00032222694449060275, "loss": 1.8098, "step": 42957 }, { "epoch": 1.43, "grad_norm": 0.533133864402771, "learning_rate": 0.0003222165210875061, "loss": 1.7562, "step": 42958 }, { "epoch": 1.43, "grad_norm": 0.5260257124900818, "learning_rate": 0.0003222060976574418, "loss": 1.7415, "step": 42959 }, { "epoch": 1.43, "grad_norm": 0.4966977536678314, "learning_rate": 0.00032219567420042265, "loss": 1.7682, "step": 42960 }, { "epoch": 1.43, "grad_norm": 0.5175647735595703, "learning_rate": 0.0003221852507164612, "loss": 1.8209, "step": 42961 }, { "epoch": 1.43, "grad_norm": 0.4893905222415924, "learning_rate": 0.00032217482720557004, "loss": 1.7696, "step": 42962 }, { "epoch": 1.43, "grad_norm": 0.5005682110786438, "learning_rate": 0.00032216440366776203, "loss": 1.7921, "step": 42963 }, { "epoch": 1.43, "grad_norm": 0.5166160464286804, "learning_rate": 0.00032215398010304965, "loss": 1.7606, "step": 42964 }, { "epoch": 1.43, "grad_norm": 0.5099425315856934, "learning_rate": 0.0003221435565114456, "loss": 1.8014, "step": 42965 }, { "epoch": 1.43, "grad_norm": 0.5144999027252197, "learning_rate": 0.00032213313289296253, "loss": 1.7143, "step": 42966 }, { "epoch": 1.43, "grad_norm": 0.49021920561790466, "learning_rate": 0.00032212270924761305, "loss": 1.7778, "step": 42967 }, { "epoch": 1.43, "grad_norm": 0.5236879587173462, "learning_rate": 0.00032211228557540994, "loss": 1.7371, "step": 42968 }, { "epoch": 1.43, "grad_norm": 0.5173739790916443, "learning_rate": 0.0003221018618763657, "loss": 1.7223, "step": 42969 }, { "epoch": 1.43, "grad_norm": 0.5141234993934631, "learning_rate": 0.00032209143815049314, "loss": 1.6917, "step": 42970 }, { "epoch": 1.43, "grad_norm": 0.5009453296661377, "learning_rate": 0.0003220810143978048, "loss": 1.7804, "step": 42971 }, { "epoch": 1.43, "grad_norm": 0.5032442808151245, "learning_rate": 0.00032207059061831336, "loss": 1.7504, "step": 42972 }, { "epoch": 1.43, "grad_norm": 0.5013626217842102, "learning_rate": 0.0003220601668120314, "loss": 1.7908, "step": 42973 }, { "epoch": 1.43, "grad_norm": 0.5183845162391663, "learning_rate": 0.00032204974297897173, "loss": 1.7806, "step": 42974 }, { "epoch": 1.43, "grad_norm": 0.5281005501747131, "learning_rate": 0.0003220393191191469, "loss": 1.7302, "step": 42975 }, { "epoch": 1.43, "grad_norm": 0.5153989791870117, "learning_rate": 0.0003220288952325696, "loss": 1.7405, "step": 42976 }, { "epoch": 1.43, "grad_norm": 0.49660447239875793, "learning_rate": 0.00032201847131925243, "loss": 1.8301, "step": 42977 }, { "epoch": 1.43, "grad_norm": 0.5027342438697815, "learning_rate": 0.00032200804737920807, "loss": 1.7889, "step": 42978 }, { "epoch": 1.43, "grad_norm": 0.498333215713501, "learning_rate": 0.00032199762341244935, "loss": 1.7433, "step": 42979 }, { "epoch": 1.43, "grad_norm": 0.5115354657173157, "learning_rate": 0.0003219871994189885, "loss": 1.8134, "step": 42980 }, { "epoch": 1.43, "grad_norm": 0.49183809757232666, "learning_rate": 0.0003219767753988386, "loss": 1.681, "step": 42981 }, { "epoch": 1.43, "grad_norm": 0.4963805675506592, "learning_rate": 0.0003219663513520121, "loss": 1.7561, "step": 42982 }, { "epoch": 1.43, "grad_norm": 0.4961055815219879, "learning_rate": 0.0003219559272785216, "loss": 1.7829, "step": 42983 }, { "epoch": 1.43, "grad_norm": 0.5055018067359924, "learning_rate": 0.00032194550317838, "loss": 1.7491, "step": 42984 }, { "epoch": 1.43, "grad_norm": 0.5076032280921936, "learning_rate": 0.0003219350790515996, "loss": 1.7948, "step": 42985 }, { "epoch": 1.43, "grad_norm": 0.5300936102867126, "learning_rate": 0.0003219246548981934, "loss": 1.791, "step": 42986 }, { "epoch": 1.43, "grad_norm": 0.5010415315628052, "learning_rate": 0.0003219142307181738, "loss": 1.7785, "step": 42987 }, { "epoch": 1.43, "grad_norm": 0.5126004219055176, "learning_rate": 0.0003219038065115537, "loss": 1.826, "step": 42988 }, { "epoch": 1.43, "grad_norm": 0.48937347531318665, "learning_rate": 0.00032189338227834544, "loss": 1.7495, "step": 42989 }, { "epoch": 1.43, "grad_norm": 0.48399126529693604, "learning_rate": 0.00032188295801856195, "loss": 1.7737, "step": 42990 }, { "epoch": 1.43, "grad_norm": 0.5025193691253662, "learning_rate": 0.0003218725337322157, "loss": 1.7454, "step": 42991 }, { "epoch": 1.43, "grad_norm": 0.5016840696334839, "learning_rate": 0.0003218621094193194, "loss": 1.8241, "step": 42992 }, { "epoch": 1.43, "grad_norm": 0.517891526222229, "learning_rate": 0.0003218516850798858, "loss": 1.8214, "step": 42993 }, { "epoch": 1.43, "grad_norm": 0.4951219856739044, "learning_rate": 0.0003218412607139275, "loss": 1.8132, "step": 42994 }, { "epoch": 1.43, "grad_norm": 0.5388982892036438, "learning_rate": 0.00032183083632145697, "loss": 1.7672, "step": 42995 }, { "epoch": 1.43, "grad_norm": 0.535666286945343, "learning_rate": 0.00032182041190248717, "loss": 1.7341, "step": 42996 }, { "epoch": 1.43, "grad_norm": 0.509352445602417, "learning_rate": 0.0003218099874570305, "loss": 1.8307, "step": 42997 }, { "epoch": 1.43, "grad_norm": 0.49665147066116333, "learning_rate": 0.00032179956298509984, "loss": 1.8392, "step": 42998 }, { "epoch": 1.43, "grad_norm": 0.49692633748054504, "learning_rate": 0.0003217891384867077, "loss": 1.8267, "step": 42999 }, { "epoch": 1.43, "grad_norm": 0.5085585713386536, "learning_rate": 0.00032177871396186667, "loss": 1.7773, "step": 43000 }, { "epoch": 1.43, "grad_norm": 0.5130364894866943, "learning_rate": 0.0003217682894105895, "loss": 1.8955, "step": 43001 }, { "epoch": 1.43, "grad_norm": 0.5195309519767761, "learning_rate": 0.00032175786483288883, "loss": 1.81, "step": 43002 }, { "epoch": 1.43, "grad_norm": 0.5078843235969543, "learning_rate": 0.00032174744022877736, "loss": 1.8008, "step": 43003 }, { "epoch": 1.43, "grad_norm": 0.507575511932373, "learning_rate": 0.00032173701559826774, "loss": 1.8393, "step": 43004 }, { "epoch": 1.43, "grad_norm": 0.4903246760368347, "learning_rate": 0.00032172659094137254, "loss": 1.7017, "step": 43005 }, { "epoch": 1.43, "grad_norm": 0.49752089381217957, "learning_rate": 0.00032171616625810445, "loss": 1.7264, "step": 43006 }, { "epoch": 1.43, "grad_norm": 0.5013235807418823, "learning_rate": 0.00032170574154847615, "loss": 1.7732, "step": 43007 }, { "epoch": 1.43, "grad_norm": 0.5239443778991699, "learning_rate": 0.00032169531681250027, "loss": 1.8608, "step": 43008 }, { "epoch": 1.43, "grad_norm": 0.5156477689743042, "learning_rate": 0.0003216848920501895, "loss": 1.7712, "step": 43009 }, { "epoch": 1.43, "grad_norm": 0.5099637508392334, "learning_rate": 0.0003216744672615564, "loss": 1.8259, "step": 43010 }, { "epoch": 1.43, "grad_norm": 0.5192751288414001, "learning_rate": 0.00032166404244661376, "loss": 1.6912, "step": 43011 }, { "epoch": 1.43, "grad_norm": 0.49212536215782166, "learning_rate": 0.0003216536176053741, "loss": 1.728, "step": 43012 }, { "epoch": 1.43, "grad_norm": 0.5240345001220703, "learning_rate": 0.00032164319273785024, "loss": 1.7593, "step": 43013 }, { "epoch": 1.43, "grad_norm": 0.5184087753295898, "learning_rate": 0.0003216327678440546, "loss": 1.7262, "step": 43014 }, { "epoch": 1.43, "grad_norm": 0.5066030025482178, "learning_rate": 0.00032162234292400005, "loss": 1.7152, "step": 43015 }, { "epoch": 1.43, "grad_norm": 0.5220758318901062, "learning_rate": 0.0003216119179776992, "loss": 1.7452, "step": 43016 }, { "epoch": 1.43, "grad_norm": 0.5085986852645874, "learning_rate": 0.00032160149300516455, "loss": 1.8189, "step": 43017 }, { "epoch": 1.43, "grad_norm": 0.48626431822776794, "learning_rate": 0.00032159106800640897, "loss": 1.7734, "step": 43018 }, { "epoch": 1.43, "grad_norm": 0.5329304933547974, "learning_rate": 0.00032158064298144496, "loss": 1.7351, "step": 43019 }, { "epoch": 1.43, "grad_norm": 0.4964846670627594, "learning_rate": 0.0003215702179302853, "loss": 1.7283, "step": 43020 }, { "epoch": 1.43, "grad_norm": 0.49936869740486145, "learning_rate": 0.0003215597928529425, "loss": 1.7542, "step": 43021 }, { "epoch": 1.43, "grad_norm": 0.5033625364303589, "learning_rate": 0.00032154936774942924, "loss": 1.8765, "step": 43022 }, { "epoch": 1.43, "grad_norm": 0.5180171132087708, "learning_rate": 0.00032153894261975835, "loss": 1.7676, "step": 43023 }, { "epoch": 1.43, "grad_norm": 0.5019825100898743, "learning_rate": 0.0003215285174639422, "loss": 1.7185, "step": 43024 }, { "epoch": 1.43, "grad_norm": 0.5118962526321411, "learning_rate": 0.00032151809228199375, "loss": 1.7694, "step": 43025 }, { "epoch": 1.43, "grad_norm": 0.4894329905509949, "learning_rate": 0.00032150766707392546, "loss": 1.6619, "step": 43026 }, { "epoch": 1.43, "grad_norm": 0.5303575396537781, "learning_rate": 0.00032149724183975006, "loss": 1.7949, "step": 43027 }, { "epoch": 1.43, "grad_norm": 0.4948295056819916, "learning_rate": 0.00032148681657948015, "loss": 1.7574, "step": 43028 }, { "epoch": 1.43, "grad_norm": 0.5114890933036804, "learning_rate": 0.00032147639129312833, "loss": 1.7238, "step": 43029 }, { "epoch": 1.43, "grad_norm": 0.5010296702384949, "learning_rate": 0.00032146596598070753, "loss": 1.701, "step": 43030 }, { "epoch": 1.43, "grad_norm": 0.5091307163238525, "learning_rate": 0.00032145554064223005, "loss": 1.7446, "step": 43031 }, { "epoch": 1.43, "grad_norm": 0.5056171417236328, "learning_rate": 0.0003214451152777088, "loss": 1.7421, "step": 43032 }, { "epoch": 1.43, "grad_norm": 0.5148801803588867, "learning_rate": 0.00032143468988715624, "loss": 1.7323, "step": 43033 }, { "epoch": 1.43, "grad_norm": 0.5249409079551697, "learning_rate": 0.00032142426447058514, "loss": 1.6973, "step": 43034 }, { "epoch": 1.43, "grad_norm": 0.49049046635627747, "learning_rate": 0.00032141383902800814, "loss": 1.7651, "step": 43035 }, { "epoch": 1.43, "grad_norm": 0.4992528259754181, "learning_rate": 0.000321403413559438, "loss": 1.7253, "step": 43036 }, { "epoch": 1.43, "grad_norm": 0.5131101012229919, "learning_rate": 0.0003213929880648872, "loss": 1.8614, "step": 43037 }, { "epoch": 1.43, "grad_norm": 0.5129461288452148, "learning_rate": 0.00032138256254436845, "loss": 1.727, "step": 43038 }, { "epoch": 1.43, "grad_norm": 0.49407094717025757, "learning_rate": 0.0003213721369978944, "loss": 1.698, "step": 43039 }, { "epoch": 1.43, "grad_norm": 0.5254978537559509, "learning_rate": 0.00032136171142547776, "loss": 1.7303, "step": 43040 }, { "epoch": 1.43, "grad_norm": 0.5170247554779053, "learning_rate": 0.00032135128582713116, "loss": 1.8011, "step": 43041 }, { "epoch": 1.43, "grad_norm": 0.5075549483299255, "learning_rate": 0.00032134086020286714, "loss": 1.8073, "step": 43042 }, { "epoch": 1.43, "grad_norm": 0.5101637840270996, "learning_rate": 0.00032133043455269866, "loss": 1.7836, "step": 43043 }, { "epoch": 1.43, "grad_norm": 0.5095498561859131, "learning_rate": 0.000321320008876638, "loss": 1.7127, "step": 43044 }, { "epoch": 1.43, "grad_norm": 0.49144819378852844, "learning_rate": 0.0003213095831746981, "loss": 1.7629, "step": 43045 }, { "epoch": 1.43, "grad_norm": 0.49991247057914734, "learning_rate": 0.00032129915744689145, "loss": 1.7589, "step": 43046 }, { "epoch": 1.43, "grad_norm": 0.5081503987312317, "learning_rate": 0.0003212887316932308, "loss": 1.8022, "step": 43047 }, { "epoch": 1.43, "grad_norm": 0.5060802102088928, "learning_rate": 0.0003212783059137287, "loss": 1.7585, "step": 43048 }, { "epoch": 1.43, "grad_norm": 0.5255526304244995, "learning_rate": 0.00032126788010839786, "loss": 1.7325, "step": 43049 }, { "epoch": 1.43, "grad_norm": 0.4873925447463989, "learning_rate": 0.00032125745427725105, "loss": 1.7922, "step": 43050 }, { "epoch": 1.43, "grad_norm": 0.5227712988853455, "learning_rate": 0.0003212470284203007, "loss": 1.8473, "step": 43051 }, { "epoch": 1.43, "grad_norm": 0.5117417573928833, "learning_rate": 0.0003212366025375597, "loss": 1.7638, "step": 43052 }, { "epoch": 1.43, "grad_norm": 0.4978262186050415, "learning_rate": 0.0003212261766290406, "loss": 1.7506, "step": 43053 }, { "epoch": 1.43, "grad_norm": 0.5097330808639526, "learning_rate": 0.00032121575069475594, "loss": 1.8013, "step": 43054 }, { "epoch": 1.43, "grad_norm": 0.4947636127471924, "learning_rate": 0.0003212053247347186, "loss": 1.7626, "step": 43055 }, { "epoch": 1.43, "grad_norm": 0.4900211989879608, "learning_rate": 0.00032119489874894106, "loss": 1.8294, "step": 43056 }, { "epoch": 1.43, "grad_norm": 0.5147596001625061, "learning_rate": 0.00032118447273743597, "loss": 1.7804, "step": 43057 }, { "epoch": 1.43, "grad_norm": 0.49990394711494446, "learning_rate": 0.00032117404670021615, "loss": 1.7152, "step": 43058 }, { "epoch": 1.43, "grad_norm": 0.5097159743309021, "learning_rate": 0.0003211636206372942, "loss": 1.7778, "step": 43059 }, { "epoch": 1.43, "grad_norm": 0.5552183389663696, "learning_rate": 0.00032115319454868263, "loss": 1.7773, "step": 43060 }, { "epoch": 1.43, "grad_norm": 0.5250085592269897, "learning_rate": 0.00032114276843439425, "loss": 1.7888, "step": 43061 }, { "epoch": 1.43, "grad_norm": 0.5070376396179199, "learning_rate": 0.00032113234229444167, "loss": 1.764, "step": 43062 }, { "epoch": 1.43, "grad_norm": 0.5049840807914734, "learning_rate": 0.00032112191612883745, "loss": 1.8176, "step": 43063 }, { "epoch": 1.43, "grad_norm": 0.4966886639595032, "learning_rate": 0.0003211114899375944, "loss": 1.7527, "step": 43064 }, { "epoch": 1.43, "grad_norm": 0.507641613483429, "learning_rate": 0.0003211010637207251, "loss": 1.7676, "step": 43065 }, { "epoch": 1.43, "grad_norm": 0.49392879009246826, "learning_rate": 0.0003210906374782423, "loss": 1.759, "step": 43066 }, { "epoch": 1.43, "grad_norm": 0.5005123615264893, "learning_rate": 0.0003210802112101584, "loss": 1.7493, "step": 43067 }, { "epoch": 1.43, "grad_norm": 0.5253388285636902, "learning_rate": 0.0003210697849164864, "loss": 1.7338, "step": 43068 }, { "epoch": 1.43, "grad_norm": 0.4976712465286255, "learning_rate": 0.00032105935859723866, "loss": 1.8246, "step": 43069 }, { "epoch": 1.43, "grad_norm": 0.5203008055686951, "learning_rate": 0.000321048932252428, "loss": 1.8123, "step": 43070 }, { "epoch": 1.43, "grad_norm": 0.4748414158821106, "learning_rate": 0.00032103850588206714, "loss": 1.7708, "step": 43071 }, { "epoch": 1.43, "grad_norm": 0.5092721581459045, "learning_rate": 0.00032102807948616846, "loss": 1.8041, "step": 43072 }, { "epoch": 1.43, "grad_norm": 0.5064681768417358, "learning_rate": 0.00032101765306474496, "loss": 1.7988, "step": 43073 }, { "epoch": 1.43, "grad_norm": 0.49033182859420776, "learning_rate": 0.00032100722661780904, "loss": 1.6885, "step": 43074 }, { "epoch": 1.43, "grad_norm": 0.4868850111961365, "learning_rate": 0.00032099680014537345, "loss": 1.782, "step": 43075 }, { "epoch": 1.43, "grad_norm": 0.5008857250213623, "learning_rate": 0.0003209863736474508, "loss": 1.7247, "step": 43076 }, { "epoch": 1.43, "grad_norm": 0.5163973569869995, "learning_rate": 0.00032097594712405386, "loss": 1.8117, "step": 43077 }, { "epoch": 1.43, "grad_norm": 0.4927179515361786, "learning_rate": 0.00032096552057519524, "loss": 1.7412, "step": 43078 }, { "epoch": 1.43, "grad_norm": 0.5252600908279419, "learning_rate": 0.0003209550940008874, "loss": 1.7747, "step": 43079 }, { "epoch": 1.43, "grad_norm": 0.5014950037002563, "learning_rate": 0.0003209446674011433, "loss": 1.7336, "step": 43080 }, { "epoch": 1.43, "grad_norm": 0.4998096227645874, "learning_rate": 0.0003209342407759754, "loss": 1.7209, "step": 43081 }, { "epoch": 1.43, "grad_norm": 0.5158846378326416, "learning_rate": 0.00032092381412539647, "loss": 1.746, "step": 43082 }, { "epoch": 1.43, "grad_norm": 0.5355947613716125, "learning_rate": 0.0003209133874494191, "loss": 1.7827, "step": 43083 }, { "epoch": 1.43, "grad_norm": 0.5027115345001221, "learning_rate": 0.00032090296074805595, "loss": 1.7533, "step": 43084 }, { "epoch": 1.43, "grad_norm": 0.5003648400306702, "learning_rate": 0.0003208925340213197, "loss": 1.7786, "step": 43085 }, { "epoch": 1.43, "grad_norm": 0.4852096736431122, "learning_rate": 0.00032088210726922295, "loss": 1.7845, "step": 43086 }, { "epoch": 1.43, "grad_norm": 0.5308312177658081, "learning_rate": 0.0003208716804917784, "loss": 1.7343, "step": 43087 }, { "epoch": 1.43, "grad_norm": 0.8626711964607239, "learning_rate": 0.00032086125368899877, "loss": 1.8555, "step": 43088 }, { "epoch": 1.43, "grad_norm": 0.5211384892463684, "learning_rate": 0.00032085082686089656, "loss": 1.8035, "step": 43089 }, { "epoch": 1.43, "grad_norm": 0.5023193955421448, "learning_rate": 0.0003208404000074845, "loss": 1.8302, "step": 43090 }, { "epoch": 1.43, "grad_norm": 0.5315188765525818, "learning_rate": 0.00032082997312877544, "loss": 1.7116, "step": 43091 }, { "epoch": 1.43, "grad_norm": 0.5110348463058472, "learning_rate": 0.00032081954622478173, "loss": 1.8114, "step": 43092 }, { "epoch": 1.43, "grad_norm": 0.4927527606487274, "learning_rate": 0.00032080911929551614, "loss": 1.7339, "step": 43093 }, { "epoch": 1.43, "grad_norm": 0.5040901303291321, "learning_rate": 0.0003207986923409914, "loss": 1.8433, "step": 43094 }, { "epoch": 1.43, "grad_norm": 0.5035037994384766, "learning_rate": 0.00032078826536122005, "loss": 1.7649, "step": 43095 }, { "epoch": 1.43, "grad_norm": 0.5157601833343506, "learning_rate": 0.00032077783835621497, "loss": 1.7359, "step": 43096 }, { "epoch": 1.43, "grad_norm": 0.5331481099128723, "learning_rate": 0.0003207674113259885, "loss": 1.7082, "step": 43097 }, { "epoch": 1.43, "grad_norm": 0.5101703405380249, "learning_rate": 0.0003207569842705535, "loss": 1.7139, "step": 43098 }, { "epoch": 1.43, "grad_norm": 0.5126450061798096, "learning_rate": 0.0003207465571899226, "loss": 1.7232, "step": 43099 }, { "epoch": 1.43, "grad_norm": 0.4931073784828186, "learning_rate": 0.0003207361300841083, "loss": 1.7839, "step": 43100 }, { "epoch": 1.43, "grad_norm": 0.5136663317680359, "learning_rate": 0.0003207257029531236, "loss": 1.6402, "step": 43101 }, { "epoch": 1.43, "grad_norm": 0.5019890666007996, "learning_rate": 0.0003207152757969808, "loss": 1.724, "step": 43102 }, { "epoch": 1.43, "grad_norm": 0.514126718044281, "learning_rate": 0.00032070484861569285, "loss": 1.7464, "step": 43103 }, { "epoch": 1.43, "grad_norm": 0.4979955554008484, "learning_rate": 0.00032069442140927206, "loss": 1.8156, "step": 43104 }, { "epoch": 1.43, "grad_norm": 0.5207414627075195, "learning_rate": 0.00032068399417773145, "loss": 1.7806, "step": 43105 }, { "epoch": 1.43, "grad_norm": 0.5212339162826538, "learning_rate": 0.0003206735669210835, "loss": 1.7521, "step": 43106 }, { "epoch": 1.43, "grad_norm": 0.5141608715057373, "learning_rate": 0.0003206631396393408, "loss": 1.7776, "step": 43107 }, { "epoch": 1.43, "grad_norm": 0.5058198571205139, "learning_rate": 0.0003206527123325162, "loss": 1.7109, "step": 43108 }, { "epoch": 1.43, "grad_norm": 0.5260521173477173, "learning_rate": 0.00032064228500062217, "loss": 1.7858, "step": 43109 }, { "epoch": 1.43, "grad_norm": 0.511853039264679, "learning_rate": 0.0003206318576436715, "loss": 1.7135, "step": 43110 }, { "epoch": 1.43, "grad_norm": 0.5207080841064453, "learning_rate": 0.0003206214302616768, "loss": 1.749, "step": 43111 }, { "epoch": 1.43, "grad_norm": 0.5011389255523682, "learning_rate": 0.0003206110028546507, "loss": 1.7958, "step": 43112 }, { "epoch": 1.43, "grad_norm": 0.4917221963405609, "learning_rate": 0.00032060057542260587, "loss": 1.7291, "step": 43113 }, { "epoch": 1.43, "grad_norm": 0.5109106302261353, "learning_rate": 0.000320590147965555, "loss": 1.8317, "step": 43114 }, { "epoch": 1.43, "grad_norm": 0.5017020106315613, "learning_rate": 0.0003205797204835107, "loss": 1.7545, "step": 43115 }, { "epoch": 1.43, "grad_norm": 0.5141512155532837, "learning_rate": 0.0003205692929764857, "loss": 1.7414, "step": 43116 }, { "epoch": 1.43, "grad_norm": 0.5083918571472168, "learning_rate": 0.0003205588654444926, "loss": 1.7642, "step": 43117 }, { "epoch": 1.43, "grad_norm": 0.49981772899627686, "learning_rate": 0.00032054843788754397, "loss": 1.7898, "step": 43118 }, { "epoch": 1.43, "grad_norm": 0.5026229023933411, "learning_rate": 0.00032053801030565264, "loss": 1.8053, "step": 43119 }, { "epoch": 1.43, "grad_norm": 0.5063906908035278, "learning_rate": 0.00032052758269883116, "loss": 1.7094, "step": 43120 }, { "epoch": 1.43, "grad_norm": 0.4946810305118561, "learning_rate": 0.00032051715506709234, "loss": 1.7595, "step": 43121 }, { "epoch": 1.43, "grad_norm": 0.5155919194221497, "learning_rate": 0.00032050672741044847, "loss": 1.8147, "step": 43122 }, { "epoch": 1.43, "grad_norm": 0.5062635540962219, "learning_rate": 0.0003204962997289127, "loss": 1.7288, "step": 43123 }, { "epoch": 1.43, "grad_norm": 0.5139591097831726, "learning_rate": 0.0003204858720224973, "loss": 1.7799, "step": 43124 }, { "epoch": 1.43, "grad_norm": 0.49363669753074646, "learning_rate": 0.0003204754442912151, "loss": 1.7379, "step": 43125 }, { "epoch": 1.43, "grad_norm": 0.5075873136520386, "learning_rate": 0.0003204650165350788, "loss": 1.7334, "step": 43126 }, { "epoch": 1.43, "grad_norm": 0.5152872204780579, "learning_rate": 0.0003204545887541009, "loss": 1.7593, "step": 43127 }, { "epoch": 1.43, "grad_norm": 0.5089539885520935, "learning_rate": 0.00032044416094829416, "loss": 1.7442, "step": 43128 }, { "epoch": 1.43, "grad_norm": 0.5343843698501587, "learning_rate": 0.0003204337331176712, "loss": 1.8184, "step": 43129 }, { "epoch": 1.43, "grad_norm": 0.5379714369773865, "learning_rate": 0.00032042330526224475, "loss": 1.7454, "step": 43130 }, { "epoch": 1.43, "grad_norm": 0.505460262298584, "learning_rate": 0.00032041287738202744, "loss": 1.6371, "step": 43131 }, { "epoch": 1.44, "grad_norm": 0.511722981929779, "learning_rate": 0.0003204024494770319, "loss": 1.7475, "step": 43132 }, { "epoch": 1.44, "grad_norm": 0.50734943151474, "learning_rate": 0.00032039202154727077, "loss": 1.7478, "step": 43133 }, { "epoch": 1.44, "grad_norm": 0.5189316272735596, "learning_rate": 0.0003203815935927566, "loss": 1.7313, "step": 43134 }, { "epoch": 1.44, "grad_norm": 0.5296874642372131, "learning_rate": 0.00032037116561350236, "loss": 1.7775, "step": 43135 }, { "epoch": 1.44, "grad_norm": 0.5146890878677368, "learning_rate": 0.00032036073760952045, "loss": 1.7983, "step": 43136 }, { "epoch": 1.44, "grad_norm": 0.5005476474761963, "learning_rate": 0.00032035030958082355, "loss": 1.7575, "step": 43137 }, { "epoch": 1.44, "grad_norm": 0.5111588835716248, "learning_rate": 0.0003203398815274245, "loss": 1.7977, "step": 43138 }, { "epoch": 1.44, "grad_norm": 0.5202174782752991, "learning_rate": 0.00032032945344933575, "loss": 1.7617, "step": 43139 }, { "epoch": 1.44, "grad_norm": 0.525975227355957, "learning_rate": 0.0003203190253465701, "loss": 1.7103, "step": 43140 }, { "epoch": 1.44, "grad_norm": 0.7172298431396484, "learning_rate": 0.0003203085972191401, "loss": 1.7938, "step": 43141 }, { "epoch": 1.44, "grad_norm": 0.49494171142578125, "learning_rate": 0.0003202981690670585, "loss": 1.7297, "step": 43142 }, { "epoch": 1.44, "grad_norm": 0.4878895580768585, "learning_rate": 0.0003202877408903378, "loss": 1.7186, "step": 43143 }, { "epoch": 1.44, "grad_norm": 0.5014485120773315, "learning_rate": 0.0003202773126889908, "loss": 1.7557, "step": 43144 }, { "epoch": 1.44, "grad_norm": 0.5306986570358276, "learning_rate": 0.0003202668844630302, "loss": 1.7583, "step": 43145 }, { "epoch": 1.44, "grad_norm": 0.4849986732006073, "learning_rate": 0.00032025645621246856, "loss": 1.7714, "step": 43146 }, { "epoch": 1.44, "grad_norm": 0.5348520874977112, "learning_rate": 0.00032024602793731863, "loss": 1.7638, "step": 43147 }, { "epoch": 1.44, "grad_norm": 0.4695063829421997, "learning_rate": 0.00032023559963759293, "loss": 1.7488, "step": 43148 }, { "epoch": 1.44, "grad_norm": 0.5126045942306519, "learning_rate": 0.0003202251713133042, "loss": 1.763, "step": 43149 }, { "epoch": 1.44, "grad_norm": 0.5119007229804993, "learning_rate": 0.0003202147429644651, "loss": 1.8269, "step": 43150 }, { "epoch": 1.44, "grad_norm": 0.515382707118988, "learning_rate": 0.0003202043145910883, "loss": 1.8044, "step": 43151 }, { "epoch": 1.44, "grad_norm": 0.5113093852996826, "learning_rate": 0.0003201938861931864, "loss": 1.8006, "step": 43152 }, { "epoch": 1.44, "grad_norm": 0.5150375962257385, "learning_rate": 0.0003201834577707721, "loss": 1.7898, "step": 43153 }, { "epoch": 1.44, "grad_norm": 0.5038877725601196, "learning_rate": 0.0003201730293238581, "loss": 1.7139, "step": 43154 }, { "epoch": 1.44, "grad_norm": 0.5256488919258118, "learning_rate": 0.000320162600852457, "loss": 1.7581, "step": 43155 }, { "epoch": 1.44, "grad_norm": 0.5089396834373474, "learning_rate": 0.00032015217235658146, "loss": 1.7615, "step": 43156 }, { "epoch": 1.44, "grad_norm": 0.5444247722625732, "learning_rate": 0.0003201417438362441, "loss": 1.7568, "step": 43157 }, { "epoch": 1.44, "grad_norm": 0.5074543952941895, "learning_rate": 0.0003201313152914578, "loss": 1.7799, "step": 43158 }, { "epoch": 1.44, "grad_norm": 0.5309115648269653, "learning_rate": 0.0003201208867222348, "loss": 1.765, "step": 43159 }, { "epoch": 1.44, "grad_norm": 0.5166150331497192, "learning_rate": 0.0003201104581285882, "loss": 1.7864, "step": 43160 }, { "epoch": 1.44, "grad_norm": 0.536927342414856, "learning_rate": 0.00032010002951053044, "loss": 1.6918, "step": 43161 }, { "epoch": 1.44, "grad_norm": 0.5096685886383057, "learning_rate": 0.0003200896008680741, "loss": 1.7457, "step": 43162 }, { "epoch": 1.44, "grad_norm": 0.5156628489494324, "learning_rate": 0.0003200791722012321, "loss": 1.7322, "step": 43163 }, { "epoch": 1.44, "grad_norm": 0.5293442010879517, "learning_rate": 0.00032006874351001686, "loss": 1.7793, "step": 43164 }, { "epoch": 1.44, "grad_norm": 0.49075832962989807, "learning_rate": 0.00032005831479444113, "loss": 1.7565, "step": 43165 }, { "epoch": 1.44, "grad_norm": 0.5114915370941162, "learning_rate": 0.0003200478860545175, "loss": 1.7951, "step": 43166 }, { "epoch": 1.44, "grad_norm": 0.5034357905387878, "learning_rate": 0.0003200374572902588, "loss": 1.7889, "step": 43167 }, { "epoch": 1.44, "grad_norm": 0.5017973184585571, "learning_rate": 0.0003200270285016775, "loss": 1.717, "step": 43168 }, { "epoch": 1.44, "grad_norm": 0.4908325672149658, "learning_rate": 0.0003200165996887864, "loss": 1.8247, "step": 43169 }, { "epoch": 1.44, "grad_norm": 1.1210640668869019, "learning_rate": 0.00032000617085159807, "loss": 1.8017, "step": 43170 }, { "epoch": 1.44, "grad_norm": 1.0180524587631226, "learning_rate": 0.00031999574199012516, "loss": 1.8092, "step": 43171 }, { "epoch": 1.44, "grad_norm": 0.49873867630958557, "learning_rate": 0.00031998531310438043, "loss": 1.7786, "step": 43172 }, { "epoch": 1.44, "grad_norm": 0.48729199171066284, "learning_rate": 0.0003199748841943764, "loss": 1.6748, "step": 43173 }, { "epoch": 1.44, "grad_norm": 0.4954254925251007, "learning_rate": 0.0003199644552601259, "loss": 1.7889, "step": 43174 }, { "epoch": 1.44, "grad_norm": 0.5156028866767883, "learning_rate": 0.0003199540263016413, "loss": 1.721, "step": 43175 }, { "epoch": 1.44, "grad_norm": 0.5080342888832092, "learning_rate": 0.00031994359731893563, "loss": 1.7591, "step": 43176 }, { "epoch": 1.44, "grad_norm": 0.4816632866859436, "learning_rate": 0.00031993316831202133, "loss": 1.784, "step": 43177 }, { "epoch": 1.44, "grad_norm": 0.5321252942085266, "learning_rate": 0.00031992273928091116, "loss": 1.8728, "step": 43178 }, { "epoch": 1.44, "grad_norm": 0.5172272324562073, "learning_rate": 0.00031991231022561765, "loss": 1.7286, "step": 43179 }, { "epoch": 1.44, "grad_norm": 0.49684303998947144, "learning_rate": 0.0003199018811461535, "loss": 1.82, "step": 43180 }, { "epoch": 1.44, "grad_norm": 0.5020039677619934, "learning_rate": 0.00031989145204253144, "loss": 1.7501, "step": 43181 }, { "epoch": 1.44, "grad_norm": 0.4900819659233093, "learning_rate": 0.00031988102291476405, "loss": 1.7779, "step": 43182 }, { "epoch": 1.44, "grad_norm": 0.505328357219696, "learning_rate": 0.0003198705937628641, "loss": 1.763, "step": 43183 }, { "epoch": 1.44, "grad_norm": 0.5278536081314087, "learning_rate": 0.00031986016458684414, "loss": 1.7929, "step": 43184 }, { "epoch": 1.44, "grad_norm": 0.5692917108535767, "learning_rate": 0.0003198497353867169, "loss": 1.7964, "step": 43185 }, { "epoch": 1.44, "grad_norm": 0.5085955262184143, "learning_rate": 0.0003198393061624949, "loss": 1.7874, "step": 43186 }, { "epoch": 1.44, "grad_norm": 0.4919913411140442, "learning_rate": 0.00031982887691419103, "loss": 1.7664, "step": 43187 }, { "epoch": 1.44, "grad_norm": 0.5179117918014526, "learning_rate": 0.0003198184476418178, "loss": 1.7235, "step": 43188 }, { "epoch": 1.44, "grad_norm": 0.5098100304603577, "learning_rate": 0.0003198080183453878, "loss": 1.7664, "step": 43189 }, { "epoch": 1.44, "grad_norm": 0.5000215172767639, "learning_rate": 0.00031979758902491394, "loss": 1.7567, "step": 43190 }, { "epoch": 1.44, "grad_norm": 0.5164159536361694, "learning_rate": 0.0003197871596804085, "loss": 1.7749, "step": 43191 }, { "epoch": 1.44, "grad_norm": 0.5052751898765564, "learning_rate": 0.0003197767303118846, "loss": 1.8625, "step": 43192 }, { "epoch": 1.44, "grad_norm": 0.5049825310707092, "learning_rate": 0.0003197663009193545, "loss": 1.7077, "step": 43193 }, { "epoch": 1.44, "grad_norm": 0.5168131589889526, "learning_rate": 0.00031975587150283113, "loss": 1.7162, "step": 43194 }, { "epoch": 1.44, "grad_norm": 0.5527368783950806, "learning_rate": 0.00031974544206232703, "loss": 1.8238, "step": 43195 }, { "epoch": 1.44, "grad_norm": 0.5111016631126404, "learning_rate": 0.0003197350125978548, "loss": 1.7083, "step": 43196 }, { "epoch": 1.44, "grad_norm": 0.5192516446113586, "learning_rate": 0.00031972458310942724, "loss": 1.7194, "step": 43197 }, { "epoch": 1.44, "grad_norm": 0.4916009306907654, "learning_rate": 0.0003197141535970569, "loss": 1.8075, "step": 43198 }, { "epoch": 1.44, "grad_norm": 0.522929310798645, "learning_rate": 0.0003197037240607565, "loss": 1.7639, "step": 43199 }, { "epoch": 1.44, "grad_norm": 0.5026997923851013, "learning_rate": 0.0003196932945005387, "loss": 1.6781, "step": 43200 }, { "epoch": 1.44, "grad_norm": 0.5073156356811523, "learning_rate": 0.0003196828649164161, "loss": 1.7488, "step": 43201 }, { "epoch": 1.44, "grad_norm": 0.5083810091018677, "learning_rate": 0.0003196724353084015, "loss": 1.7578, "step": 43202 }, { "epoch": 1.44, "grad_norm": 0.4848107099533081, "learning_rate": 0.00031966200567650735, "loss": 1.7513, "step": 43203 }, { "epoch": 1.44, "grad_norm": 0.4961963891983032, "learning_rate": 0.00031965157602074646, "loss": 1.7531, "step": 43204 }, { "epoch": 1.44, "grad_norm": 0.5066661238670349, "learning_rate": 0.00031964114634113144, "loss": 1.7543, "step": 43205 }, { "epoch": 1.44, "grad_norm": 0.5037409663200378, "learning_rate": 0.00031963071663767493, "loss": 1.7441, "step": 43206 }, { "epoch": 1.44, "grad_norm": 0.5084314346313477, "learning_rate": 0.00031962028691038966, "loss": 1.7686, "step": 43207 }, { "epoch": 1.44, "grad_norm": 0.5196849703788757, "learning_rate": 0.00031960985715928837, "loss": 1.7492, "step": 43208 }, { "epoch": 1.44, "grad_norm": 0.49749988317489624, "learning_rate": 0.0003195994273843834, "loss": 1.7592, "step": 43209 }, { "epoch": 1.44, "grad_norm": 0.49878138303756714, "learning_rate": 0.0003195889975856877, "loss": 1.7681, "step": 43210 }, { "epoch": 1.44, "grad_norm": 0.5105203986167908, "learning_rate": 0.00031957856776321393, "loss": 1.7809, "step": 43211 }, { "epoch": 1.44, "grad_norm": 0.4906499981880188, "learning_rate": 0.0003195681379169745, "loss": 1.5996, "step": 43212 }, { "epoch": 1.44, "grad_norm": 0.5178308486938477, "learning_rate": 0.00031955770804698236, "loss": 1.8808, "step": 43213 }, { "epoch": 1.44, "grad_norm": 0.5007645487785339, "learning_rate": 0.00031954727815324994, "loss": 1.7694, "step": 43214 }, { "epoch": 1.44, "grad_norm": 0.5055752992630005, "learning_rate": 0.00031953684823579013, "loss": 1.7207, "step": 43215 }, { "epoch": 1.44, "grad_norm": 0.5032583475112915, "learning_rate": 0.0003195264182946154, "loss": 1.8103, "step": 43216 }, { "epoch": 1.44, "grad_norm": 0.5018883347511292, "learning_rate": 0.00031951598832973845, "loss": 1.7773, "step": 43217 }, { "epoch": 1.44, "grad_norm": 0.5118562579154968, "learning_rate": 0.0003195055583411721, "loss": 1.8081, "step": 43218 }, { "epoch": 1.44, "grad_norm": 0.5270171165466309, "learning_rate": 0.0003194951283289287, "loss": 1.7342, "step": 43219 }, { "epoch": 1.44, "grad_norm": 0.5145204067230225, "learning_rate": 0.00031948469829302115, "loss": 1.7365, "step": 43220 }, { "epoch": 1.44, "grad_norm": 0.5244969129562378, "learning_rate": 0.00031947426823346196, "loss": 1.7572, "step": 43221 }, { "epoch": 1.44, "grad_norm": 0.5051373243331909, "learning_rate": 0.00031946383815026406, "loss": 1.7828, "step": 43222 }, { "epoch": 1.44, "grad_norm": 0.5170595645904541, "learning_rate": 0.00031945340804343984, "loss": 1.7781, "step": 43223 }, { "epoch": 1.44, "grad_norm": 0.5041812658309937, "learning_rate": 0.000319442977913002, "loss": 1.7552, "step": 43224 }, { "epoch": 1.44, "grad_norm": 0.5029075741767883, "learning_rate": 0.0003194325477589634, "loss": 1.8307, "step": 43225 }, { "epoch": 1.44, "grad_norm": 0.5202041268348694, "learning_rate": 0.0003194221175813363, "loss": 1.7678, "step": 43226 }, { "epoch": 1.44, "grad_norm": 0.5111395716667175, "learning_rate": 0.00031941168738013385, "loss": 1.7906, "step": 43227 }, { "epoch": 1.44, "grad_norm": 0.5035196542739868, "learning_rate": 0.00031940125715536837, "loss": 1.822, "step": 43228 }, { "epoch": 1.44, "grad_norm": 0.4982890188694, "learning_rate": 0.0003193908269070526, "loss": 1.7669, "step": 43229 }, { "epoch": 1.44, "grad_norm": 0.5109121203422546, "learning_rate": 0.00031938039663519926, "loss": 1.7647, "step": 43230 }, { "epoch": 1.44, "grad_norm": 0.5161590576171875, "learning_rate": 0.00031936996633982095, "loss": 1.8541, "step": 43231 }, { "epoch": 1.44, "grad_norm": 0.5072975754737854, "learning_rate": 0.0003193595360209304, "loss": 1.7953, "step": 43232 }, { "epoch": 1.44, "grad_norm": 0.527118444442749, "learning_rate": 0.0003193491056785402, "loss": 1.848, "step": 43233 }, { "epoch": 1.44, "grad_norm": 0.5030709505081177, "learning_rate": 0.000319338675312663, "loss": 1.7424, "step": 43234 }, { "epoch": 1.44, "grad_norm": 0.5111322999000549, "learning_rate": 0.0003193282449233115, "loss": 1.781, "step": 43235 }, { "epoch": 1.44, "grad_norm": 0.5126768946647644, "learning_rate": 0.0003193178145104985, "loss": 1.6821, "step": 43236 }, { "epoch": 1.44, "grad_norm": 0.5186615586280823, "learning_rate": 0.0003193073840742363, "loss": 1.7812, "step": 43237 }, { "epoch": 1.44, "grad_norm": 0.4931700825691223, "learning_rate": 0.000319296953614538, "loss": 1.6962, "step": 43238 }, { "epoch": 1.44, "grad_norm": 0.5015577077865601, "learning_rate": 0.00031928652313141585, "loss": 1.7654, "step": 43239 }, { "epoch": 1.44, "grad_norm": 0.5110653638839722, "learning_rate": 0.00031927609262488287, "loss": 1.825, "step": 43240 }, { "epoch": 1.44, "grad_norm": 0.5096483826637268, "learning_rate": 0.0003192656620949514, "loss": 1.8442, "step": 43241 }, { "epoch": 1.44, "grad_norm": 0.5253693461418152, "learning_rate": 0.00031925523154163434, "loss": 1.781, "step": 43242 }, { "epoch": 1.44, "grad_norm": 0.529791533946991, "learning_rate": 0.0003192448009649443, "loss": 1.8059, "step": 43243 }, { "epoch": 1.44, "grad_norm": 0.5056413412094116, "learning_rate": 0.00031923437036489376, "loss": 1.7892, "step": 43244 }, { "epoch": 1.44, "grad_norm": 0.5118311047554016, "learning_rate": 0.0003192239397414957, "loss": 1.8397, "step": 43245 }, { "epoch": 1.44, "grad_norm": 0.5126358866691589, "learning_rate": 0.0003192135090947625, "loss": 1.8307, "step": 43246 }, { "epoch": 1.44, "grad_norm": 0.5064429640769958, "learning_rate": 0.000319203078424707, "loss": 1.795, "step": 43247 }, { "epoch": 1.44, "grad_norm": 0.5015314221382141, "learning_rate": 0.0003191926477313417, "loss": 1.8023, "step": 43248 }, { "epoch": 1.44, "grad_norm": 0.5196912884712219, "learning_rate": 0.00031918221701467944, "loss": 1.7175, "step": 43249 }, { "epoch": 1.44, "grad_norm": 0.5196532011032104, "learning_rate": 0.0003191717862747328, "loss": 1.7187, "step": 43250 }, { "epoch": 1.44, "grad_norm": 0.4964657127857208, "learning_rate": 0.00031916135551151437, "loss": 1.7105, "step": 43251 }, { "epoch": 1.44, "grad_norm": 0.5189601182937622, "learning_rate": 0.0003191509247250369, "loss": 1.7941, "step": 43252 }, { "epoch": 1.44, "grad_norm": 0.5104597806930542, "learning_rate": 0.00031914049391531304, "loss": 1.7448, "step": 43253 }, { "epoch": 1.44, "grad_norm": 0.487756609916687, "learning_rate": 0.00031913006308235546, "loss": 1.7319, "step": 43254 }, { "epoch": 1.44, "grad_norm": 0.5156689882278442, "learning_rate": 0.0003191196322261768, "loss": 1.7529, "step": 43255 }, { "epoch": 1.44, "grad_norm": 0.5202252268791199, "learning_rate": 0.0003191092013467897, "loss": 1.7724, "step": 43256 }, { "epoch": 1.44, "grad_norm": 0.5031125545501709, "learning_rate": 0.0003190987704442069, "loss": 1.7363, "step": 43257 }, { "epoch": 1.44, "grad_norm": 0.5257742404937744, "learning_rate": 0.00031908833951844095, "loss": 1.6981, "step": 43258 }, { "epoch": 1.44, "grad_norm": 0.518401026725769, "learning_rate": 0.00031907790856950454, "loss": 1.7113, "step": 43259 }, { "epoch": 1.44, "grad_norm": 0.500204861164093, "learning_rate": 0.0003190674775974104, "loss": 1.7199, "step": 43260 }, { "epoch": 1.44, "grad_norm": 0.5178009867668152, "learning_rate": 0.0003190570466021711, "loss": 1.8472, "step": 43261 }, { "epoch": 1.44, "grad_norm": 0.5521052479743958, "learning_rate": 0.00031904661558379945, "loss": 1.7193, "step": 43262 }, { "epoch": 1.44, "grad_norm": 0.49655815958976746, "learning_rate": 0.000319036184542308, "loss": 1.761, "step": 43263 }, { "epoch": 1.44, "grad_norm": 0.5030723810195923, "learning_rate": 0.0003190257534777094, "loss": 1.7755, "step": 43264 }, { "epoch": 1.44, "grad_norm": 0.5071093440055847, "learning_rate": 0.00031901532239001623, "loss": 1.7427, "step": 43265 }, { "epoch": 1.44, "grad_norm": 0.5065807104110718, "learning_rate": 0.0003190048912792414, "loss": 1.8056, "step": 43266 }, { "epoch": 1.44, "grad_norm": 0.49152809381484985, "learning_rate": 0.00031899446014539744, "loss": 1.8028, "step": 43267 }, { "epoch": 1.44, "grad_norm": 0.503257155418396, "learning_rate": 0.0003189840289884969, "loss": 1.7703, "step": 43268 }, { "epoch": 1.44, "grad_norm": 0.5290237665176392, "learning_rate": 0.0003189735978085526, "loss": 1.7779, "step": 43269 }, { "epoch": 1.44, "grad_norm": 0.5017120242118835, "learning_rate": 0.0003189631666055772, "loss": 1.8168, "step": 43270 }, { "epoch": 1.44, "grad_norm": 0.5039176940917969, "learning_rate": 0.00031895273537958324, "loss": 1.7069, "step": 43271 }, { "epoch": 1.44, "grad_norm": 0.5033860802650452, "learning_rate": 0.00031894230413058347, "loss": 1.7636, "step": 43272 }, { "epoch": 1.44, "grad_norm": 0.48345622420310974, "learning_rate": 0.0003189318728585906, "loss": 1.6788, "step": 43273 }, { "epoch": 1.44, "grad_norm": 0.5012649893760681, "learning_rate": 0.0003189214415636171, "loss": 1.7928, "step": 43274 }, { "epoch": 1.44, "grad_norm": 0.4941188097000122, "learning_rate": 0.00031891101024567585, "loss": 1.7961, "step": 43275 }, { "epoch": 1.44, "grad_norm": 0.513987123966217, "learning_rate": 0.0003189005789047793, "loss": 1.8517, "step": 43276 }, { "epoch": 1.44, "grad_norm": 0.4950862526893616, "learning_rate": 0.0003188901475409404, "loss": 1.7376, "step": 43277 }, { "epoch": 1.44, "grad_norm": 0.5104643106460571, "learning_rate": 0.0003188797161541716, "loss": 1.7852, "step": 43278 }, { "epoch": 1.44, "grad_norm": 0.5094712972640991, "learning_rate": 0.00031886928474448553, "loss": 1.8363, "step": 43279 }, { "epoch": 1.44, "grad_norm": 0.49514785408973694, "learning_rate": 0.00031885885331189504, "loss": 1.8211, "step": 43280 }, { "epoch": 1.44, "grad_norm": 0.4994693696498871, "learning_rate": 0.00031884842185641253, "loss": 1.7747, "step": 43281 }, { "epoch": 1.44, "grad_norm": 0.49805206060409546, "learning_rate": 0.000318837990378051, "loss": 1.6757, "step": 43282 }, { "epoch": 1.44, "grad_norm": 0.5002288818359375, "learning_rate": 0.0003188275588768227, "loss": 1.7265, "step": 43283 }, { "epoch": 1.44, "grad_norm": 0.519286572933197, "learning_rate": 0.0003188171273527407, "loss": 1.8177, "step": 43284 }, { "epoch": 1.44, "grad_norm": 0.5181713104248047, "learning_rate": 0.00031880669580581746, "loss": 1.7538, "step": 43285 }, { "epoch": 1.44, "grad_norm": 0.5011851787567139, "learning_rate": 0.0003187962642360656, "loss": 1.7444, "step": 43286 }, { "epoch": 1.44, "grad_norm": 0.502074658870697, "learning_rate": 0.0003187858326434979, "loss": 1.8081, "step": 43287 }, { "epoch": 1.44, "grad_norm": 0.5198737978935242, "learning_rate": 0.00031877540102812687, "loss": 1.8189, "step": 43288 }, { "epoch": 1.44, "grad_norm": 0.5148847699165344, "learning_rate": 0.00031876496938996535, "loss": 1.7711, "step": 43289 }, { "epoch": 1.44, "grad_norm": 0.5341106653213501, "learning_rate": 0.0003187545377290259, "loss": 1.7454, "step": 43290 }, { "epoch": 1.44, "grad_norm": 0.5346330404281616, "learning_rate": 0.0003187441060453212, "loss": 1.7374, "step": 43291 }, { "epoch": 1.44, "grad_norm": 0.5034382939338684, "learning_rate": 0.00031873367433886396, "loss": 1.74, "step": 43292 }, { "epoch": 1.44, "grad_norm": 0.4829290211200714, "learning_rate": 0.00031872324260966676, "loss": 1.7524, "step": 43293 }, { "epoch": 1.44, "grad_norm": 0.5146487951278687, "learning_rate": 0.0003187128108577423, "loss": 1.7167, "step": 43294 }, { "epoch": 1.44, "grad_norm": 0.5278165340423584, "learning_rate": 0.0003187023790831033, "loss": 1.7179, "step": 43295 }, { "epoch": 1.44, "grad_norm": 0.5477694272994995, "learning_rate": 0.00031869194728576227, "loss": 1.828, "step": 43296 }, { "epoch": 1.44, "grad_norm": 0.4930649697780609, "learning_rate": 0.000318681515465732, "loss": 1.7313, "step": 43297 }, { "epoch": 1.44, "grad_norm": 0.5287447571754456, "learning_rate": 0.00031867108362302514, "loss": 1.7427, "step": 43298 }, { "epoch": 1.44, "grad_norm": 0.5173044204711914, "learning_rate": 0.00031866065175765437, "loss": 1.753, "step": 43299 }, { "epoch": 1.44, "grad_norm": 0.5022662281990051, "learning_rate": 0.00031865021986963234, "loss": 1.775, "step": 43300 }, { "epoch": 1.44, "grad_norm": 0.49947598576545715, "learning_rate": 0.0003186397879589715, "loss": 1.7805, "step": 43301 }, { "epoch": 1.44, "grad_norm": 0.5222055912017822, "learning_rate": 0.0003186293560256849, "loss": 1.7694, "step": 43302 }, { "epoch": 1.44, "grad_norm": 0.5123304128646851, "learning_rate": 0.000318618924069785, "loss": 1.7398, "step": 43303 }, { "epoch": 1.44, "grad_norm": 0.5123050212860107, "learning_rate": 0.00031860849209128434, "loss": 1.7777, "step": 43304 }, { "epoch": 1.44, "grad_norm": 0.5597171783447266, "learning_rate": 0.0003185980600901959, "loss": 1.8635, "step": 43305 }, { "epoch": 1.44, "grad_norm": 0.5106791853904724, "learning_rate": 0.00031858762806653195, "loss": 1.7393, "step": 43306 }, { "epoch": 1.44, "grad_norm": 0.5110591650009155, "learning_rate": 0.0003185771960203056, "loss": 1.7308, "step": 43307 }, { "epoch": 1.44, "grad_norm": 0.5077997446060181, "learning_rate": 0.000318566763951529, "loss": 1.7561, "step": 43308 }, { "epoch": 1.44, "grad_norm": 0.5245854258537292, "learning_rate": 0.00031855633186021526, "loss": 1.7982, "step": 43309 }, { "epoch": 1.44, "grad_norm": 0.5350663661956787, "learning_rate": 0.00031854589974637684, "loss": 1.7378, "step": 43310 }, { "epoch": 1.44, "grad_norm": 0.5170941352844238, "learning_rate": 0.00031853546761002633, "loss": 1.725, "step": 43311 }, { "epoch": 1.44, "grad_norm": 0.4908827543258667, "learning_rate": 0.0003185250354511766, "loss": 1.8024, "step": 43312 }, { "epoch": 1.44, "grad_norm": 0.5312557220458984, "learning_rate": 0.00031851460326984016, "loss": 1.7027, "step": 43313 }, { "epoch": 1.44, "grad_norm": 0.506817638874054, "learning_rate": 0.0003185041710660298, "loss": 1.6855, "step": 43314 }, { "epoch": 1.44, "grad_norm": 0.5176244378089905, "learning_rate": 0.00031849373883975797, "loss": 1.7725, "step": 43315 }, { "epoch": 1.44, "grad_norm": 0.5327348709106445, "learning_rate": 0.0003184833065910376, "loss": 1.7889, "step": 43316 }, { "epoch": 1.44, "grad_norm": 0.4983268678188324, "learning_rate": 0.0003184728743198811, "loss": 1.7405, "step": 43317 }, { "epoch": 1.44, "grad_norm": 0.746016800403595, "learning_rate": 0.0003184624420263013, "loss": 1.7916, "step": 43318 }, { "epoch": 1.44, "grad_norm": 0.5107818245887756, "learning_rate": 0.0003184520097103109, "loss": 1.8009, "step": 43319 }, { "epoch": 1.44, "grad_norm": 0.5117896199226379, "learning_rate": 0.00031844157737192244, "loss": 1.8508, "step": 43320 }, { "epoch": 1.44, "grad_norm": 0.5143097043037415, "learning_rate": 0.0003184311450111485, "loss": 1.6961, "step": 43321 }, { "epoch": 1.44, "grad_norm": 0.5021511912345886, "learning_rate": 0.00031842071262800194, "loss": 1.7318, "step": 43322 }, { "epoch": 1.44, "grad_norm": 0.5067598223686218, "learning_rate": 0.00031841028022249536, "loss": 1.7418, "step": 43323 }, { "epoch": 1.44, "grad_norm": 0.5044012069702148, "learning_rate": 0.00031839984779464143, "loss": 1.7076, "step": 43324 }, { "epoch": 1.44, "grad_norm": 0.5224719643592834, "learning_rate": 0.0003183894153444528, "loss": 1.7256, "step": 43325 }, { "epoch": 1.44, "grad_norm": 0.5059700608253479, "learning_rate": 0.0003183789828719421, "loss": 1.6967, "step": 43326 }, { "epoch": 1.44, "grad_norm": 0.5157834887504578, "learning_rate": 0.000318368550377122, "loss": 1.7765, "step": 43327 }, { "epoch": 1.44, "grad_norm": 0.4986112415790558, "learning_rate": 0.0003183581178600052, "loss": 1.7621, "step": 43328 }, { "epoch": 1.44, "grad_norm": 0.5357434153556824, "learning_rate": 0.0003183476853206044, "loss": 1.7508, "step": 43329 }, { "epoch": 1.44, "grad_norm": 0.5312487483024597, "learning_rate": 0.00031833725275893224, "loss": 1.8402, "step": 43330 }, { "epoch": 1.44, "grad_norm": 0.5060933828353882, "learning_rate": 0.0003183268201750012, "loss": 1.7044, "step": 43331 }, { "epoch": 1.44, "grad_norm": 0.492305189371109, "learning_rate": 0.0003183163875688243, "loss": 1.798, "step": 43332 }, { "epoch": 1.44, "grad_norm": 0.49563461542129517, "learning_rate": 0.00031830595494041386, "loss": 1.7124, "step": 43333 }, { "epoch": 1.44, "grad_norm": 0.6142697930335999, "learning_rate": 0.0003182955222897827, "loss": 1.7827, "step": 43334 }, { "epoch": 1.44, "grad_norm": 0.5215451717376709, "learning_rate": 0.0003182850896169436, "loss": 1.7552, "step": 43335 }, { "epoch": 1.44, "grad_norm": 0.5437026619911194, "learning_rate": 0.0003182746569219089, "loss": 1.8226, "step": 43336 }, { "epoch": 1.44, "grad_norm": 0.5042575597763062, "learning_rate": 0.0003182642242046916, "loss": 1.7478, "step": 43337 }, { "epoch": 1.44, "grad_norm": 0.5070468187332153, "learning_rate": 0.00031825379146530416, "loss": 1.785, "step": 43338 }, { "epoch": 1.44, "grad_norm": 0.521873950958252, "learning_rate": 0.00031824335870375936, "loss": 1.8638, "step": 43339 }, { "epoch": 1.44, "grad_norm": 0.5356676578521729, "learning_rate": 0.00031823292592006973, "loss": 1.7194, "step": 43340 }, { "epoch": 1.44, "grad_norm": 0.5267375111579895, "learning_rate": 0.00031822249311424806, "loss": 1.7561, "step": 43341 }, { "epoch": 1.44, "grad_norm": 0.5052036046981812, "learning_rate": 0.00031821206028630704, "loss": 1.8384, "step": 43342 }, { "epoch": 1.44, "grad_norm": 0.5388522744178772, "learning_rate": 0.00031820162743625914, "loss": 1.8613, "step": 43343 }, { "epoch": 1.44, "grad_norm": 0.5338006019592285, "learning_rate": 0.0003181911945641172, "loss": 1.8057, "step": 43344 }, { "epoch": 1.44, "grad_norm": 0.5267221927642822, "learning_rate": 0.00031818076166989387, "loss": 1.853, "step": 43345 }, { "epoch": 1.44, "grad_norm": 0.507392406463623, "learning_rate": 0.0003181703287536017, "loss": 1.7064, "step": 43346 }, { "epoch": 1.44, "grad_norm": 0.5021955370903015, "learning_rate": 0.0003181598958152534, "loss": 1.758, "step": 43347 }, { "epoch": 1.44, "grad_norm": 0.5262970328330994, "learning_rate": 0.00031814946285486174, "loss": 1.7353, "step": 43348 }, { "epoch": 1.44, "grad_norm": 0.5165205597877502, "learning_rate": 0.00031813902987243926, "loss": 1.762, "step": 43349 }, { "epoch": 1.44, "grad_norm": 0.503096342086792, "learning_rate": 0.0003181285968679988, "loss": 1.728, "step": 43350 }, { "epoch": 1.44, "grad_norm": 0.5321760773658752, "learning_rate": 0.00031811816384155273, "loss": 1.8785, "step": 43351 }, { "epoch": 1.44, "grad_norm": 0.5167753100395203, "learning_rate": 0.000318107730793114, "loss": 1.7553, "step": 43352 }, { "epoch": 1.44, "grad_norm": 0.5081117153167725, "learning_rate": 0.00031809729772269506, "loss": 1.7058, "step": 43353 }, { "epoch": 1.44, "grad_norm": 0.5101449489593506, "learning_rate": 0.0003180868646303087, "loss": 1.762, "step": 43354 }, { "epoch": 1.44, "grad_norm": 0.5044634938240051, "learning_rate": 0.0003180764315159676, "loss": 1.7814, "step": 43355 }, { "epoch": 1.44, "grad_norm": 0.5490784645080566, "learning_rate": 0.0003180659983796843, "loss": 1.8371, "step": 43356 }, { "epoch": 1.44, "grad_norm": 0.524766206741333, "learning_rate": 0.0003180555652214716, "loss": 1.8062, "step": 43357 }, { "epoch": 1.44, "grad_norm": 0.5049213767051697, "learning_rate": 0.00031804513204134203, "loss": 1.6999, "step": 43358 }, { "epoch": 1.44, "grad_norm": 0.5002279877662659, "learning_rate": 0.0003180346988393084, "loss": 1.7489, "step": 43359 }, { "epoch": 1.44, "grad_norm": 0.5034756064414978, "learning_rate": 0.0003180242656153833, "loss": 1.7522, "step": 43360 }, { "epoch": 1.44, "grad_norm": 0.511838972568512, "learning_rate": 0.00031801383236957926, "loss": 1.7842, "step": 43361 }, { "epoch": 1.44, "grad_norm": 0.5229939818382263, "learning_rate": 0.00031800339910190926, "loss": 1.7548, "step": 43362 }, { "epoch": 1.44, "grad_norm": 0.5109591484069824, "learning_rate": 0.00031799296581238565, "loss": 1.7714, "step": 43363 }, { "epoch": 1.44, "grad_norm": 0.5072882175445557, "learning_rate": 0.0003179825325010213, "loss": 1.7186, "step": 43364 }, { "epoch": 1.44, "grad_norm": 0.5037301778793335, "learning_rate": 0.00031797209916782887, "loss": 1.6827, "step": 43365 }, { "epoch": 1.44, "grad_norm": 0.49620553851127625, "learning_rate": 0.00031796166581282085, "loss": 1.7901, "step": 43366 }, { "epoch": 1.44, "grad_norm": 0.5129269361495972, "learning_rate": 0.0003179512324360102, "loss": 1.8309, "step": 43367 }, { "epoch": 1.44, "grad_norm": 0.5140463709831238, "learning_rate": 0.0003179407990374091, "loss": 1.7239, "step": 43368 }, { "epoch": 1.44, "grad_norm": 0.506321907043457, "learning_rate": 0.0003179303656170308, "loss": 1.7197, "step": 43369 }, { "epoch": 1.44, "grad_norm": 0.5298789739608765, "learning_rate": 0.00031791993217488747, "loss": 1.8317, "step": 43370 }, { "epoch": 1.44, "grad_norm": 0.5174469947814941, "learning_rate": 0.0003179094987109921, "loss": 1.8311, "step": 43371 }, { "epoch": 1.44, "grad_norm": 0.5029988288879395, "learning_rate": 0.0003178990652253572, "loss": 1.768, "step": 43372 }, { "epoch": 1.44, "grad_norm": 0.4970453381538391, "learning_rate": 0.0003178886317179954, "loss": 1.7044, "step": 43373 }, { "epoch": 1.44, "grad_norm": 0.5216244459152222, "learning_rate": 0.0003178781981889196, "loss": 1.8339, "step": 43374 }, { "epoch": 1.44, "grad_norm": 0.5325051546096802, "learning_rate": 0.0003178677646381422, "loss": 1.7466, "step": 43375 }, { "epoch": 1.44, "grad_norm": 0.49775296449661255, "learning_rate": 0.0003178573310656759, "loss": 1.7812, "step": 43376 }, { "epoch": 1.44, "grad_norm": 0.5219506025314331, "learning_rate": 0.00031784689747153355, "loss": 1.72, "step": 43377 }, { "epoch": 1.44, "grad_norm": 0.506524920463562, "learning_rate": 0.0003178364638557277, "loss": 1.7585, "step": 43378 }, { "epoch": 1.44, "grad_norm": 0.5208469033241272, "learning_rate": 0.0003178260302182709, "loss": 1.7507, "step": 43379 }, { "epoch": 1.44, "grad_norm": 0.501059353351593, "learning_rate": 0.000317815596559176, "loss": 1.761, "step": 43380 }, { "epoch": 1.44, "grad_norm": 0.49609264731407166, "learning_rate": 0.0003178051628784556, "loss": 1.7349, "step": 43381 }, { "epoch": 1.44, "grad_norm": 0.48648691177368164, "learning_rate": 0.0003177947291761223, "loss": 1.762, "step": 43382 }, { "epoch": 1.44, "grad_norm": 0.5002781748771667, "learning_rate": 0.0003177842954521889, "loss": 1.7322, "step": 43383 }, { "epoch": 1.44, "grad_norm": 0.5243406891822815, "learning_rate": 0.0003177738617066679, "loss": 1.7649, "step": 43384 }, { "epoch": 1.44, "grad_norm": 0.5314993262290955, "learning_rate": 0.0003177634279395722, "loss": 1.7497, "step": 43385 }, { "epoch": 1.44, "grad_norm": 0.4966287612915039, "learning_rate": 0.0003177529941509141, "loss": 1.76, "step": 43386 }, { "epoch": 1.44, "grad_norm": 0.511550784111023, "learning_rate": 0.00031774256034070665, "loss": 1.7076, "step": 43387 }, { "epoch": 1.44, "grad_norm": 0.4913020133972168, "learning_rate": 0.00031773212650896225, "loss": 1.6979, "step": 43388 }, { "epoch": 1.44, "grad_norm": 0.5221458673477173, "learning_rate": 0.00031772169265569374, "loss": 1.7691, "step": 43389 }, { "epoch": 1.44, "grad_norm": 0.5040199756622314, "learning_rate": 0.00031771125878091367, "loss": 1.7684, "step": 43390 }, { "epoch": 1.44, "grad_norm": 0.4954163730144501, "learning_rate": 0.0003177008248846347, "loss": 1.7191, "step": 43391 }, { "epoch": 1.44, "grad_norm": 0.4977887272834778, "learning_rate": 0.00031769039096686964, "loss": 1.7431, "step": 43392 }, { "epoch": 1.44, "grad_norm": 0.5279468297958374, "learning_rate": 0.0003176799570276309, "loss": 1.7835, "step": 43393 }, { "epoch": 1.44, "grad_norm": 0.50461745262146, "learning_rate": 0.0003176695230669314, "loss": 1.7952, "step": 43394 }, { "epoch": 1.44, "grad_norm": 0.5229884386062622, "learning_rate": 0.0003176590890847837, "loss": 1.7912, "step": 43395 }, { "epoch": 1.44, "grad_norm": 0.5157226920127869, "learning_rate": 0.00031764865508120046, "loss": 1.7034, "step": 43396 }, { "epoch": 1.44, "grad_norm": 0.5241363644599915, "learning_rate": 0.00031763822105619437, "loss": 1.7544, "step": 43397 }, { "epoch": 1.44, "grad_norm": 0.4965778887271881, "learning_rate": 0.000317627787009778, "loss": 1.7772, "step": 43398 }, { "epoch": 1.44, "grad_norm": 0.5265721082687378, "learning_rate": 0.0003176173529419642, "loss": 1.8022, "step": 43399 }, { "epoch": 1.44, "grad_norm": 0.51045161485672, "learning_rate": 0.0003176069188527654, "loss": 1.7341, "step": 43400 }, { "epoch": 1.44, "grad_norm": 0.5159698128700256, "learning_rate": 0.00031759648474219453, "loss": 1.6808, "step": 43401 }, { "epoch": 1.44, "grad_norm": 0.488454669713974, "learning_rate": 0.00031758605061026404, "loss": 1.8044, "step": 43402 }, { "epoch": 1.44, "grad_norm": 0.4991775155067444, "learning_rate": 0.0003175756164569867, "loss": 1.7381, "step": 43403 }, { "epoch": 1.44, "grad_norm": 0.5186925530433655, "learning_rate": 0.0003175651822823752, "loss": 1.8255, "step": 43404 }, { "epoch": 1.44, "grad_norm": 0.5348492860794067, "learning_rate": 0.0003175547480864421, "loss": 1.802, "step": 43405 }, { "epoch": 1.44, "grad_norm": 0.5048731565475464, "learning_rate": 0.0003175443138692001, "loss": 1.7541, "step": 43406 }, { "epoch": 1.44, "grad_norm": 0.4988219141960144, "learning_rate": 0.0003175338796306619, "loss": 1.6879, "step": 43407 }, { "epoch": 1.44, "grad_norm": 0.5060833096504211, "learning_rate": 0.0003175234453708402, "loss": 1.7213, "step": 43408 }, { "epoch": 1.44, "grad_norm": 0.5131174325942993, "learning_rate": 0.00031751301108974756, "loss": 1.7293, "step": 43409 }, { "epoch": 1.44, "grad_norm": 0.5321733951568604, "learning_rate": 0.0003175025767873967, "loss": 1.8032, "step": 43410 }, { "epoch": 1.44, "grad_norm": 0.5049811005592346, "learning_rate": 0.00031749214246380036, "loss": 1.739, "step": 43411 }, { "epoch": 1.44, "grad_norm": 0.4954855442047119, "learning_rate": 0.0003174817081189711, "loss": 1.7839, "step": 43412 }, { "epoch": 1.44, "grad_norm": 0.49112263321876526, "learning_rate": 0.00031747127375292164, "loss": 1.8155, "step": 43413 }, { "epoch": 1.44, "grad_norm": 0.5240558981895447, "learning_rate": 0.00031746083936566463, "loss": 1.7564, "step": 43414 }, { "epoch": 1.44, "grad_norm": 0.5353289246559143, "learning_rate": 0.00031745040495721267, "loss": 1.7497, "step": 43415 }, { "epoch": 1.44, "grad_norm": 0.4976726472377777, "learning_rate": 0.00031743997052757853, "loss": 1.7407, "step": 43416 }, { "epoch": 1.44, "grad_norm": 0.5250141620635986, "learning_rate": 0.00031742953607677486, "loss": 1.7326, "step": 43417 }, { "epoch": 1.44, "grad_norm": 0.5223518013954163, "learning_rate": 0.0003174191016048143, "loss": 1.7728, "step": 43418 }, { "epoch": 1.44, "grad_norm": 0.507192075252533, "learning_rate": 0.0003174086671117095, "loss": 1.8069, "step": 43419 }, { "epoch": 1.44, "grad_norm": 0.5055451393127441, "learning_rate": 0.0003173982325974731, "loss": 1.7078, "step": 43420 }, { "epoch": 1.44, "grad_norm": 0.49861422181129456, "learning_rate": 0.0003173877980621179, "loss": 1.6899, "step": 43421 }, { "epoch": 1.44, "grad_norm": 0.5052523612976074, "learning_rate": 0.0003173773635056565, "loss": 1.7541, "step": 43422 }, { "epoch": 1.44, "grad_norm": 0.509727954864502, "learning_rate": 0.00031736692892810135, "loss": 1.7379, "step": 43423 }, { "epoch": 1.44, "grad_norm": 0.5081989765167236, "learning_rate": 0.00031735649432946547, "loss": 1.7271, "step": 43424 }, { "epoch": 1.44, "grad_norm": 0.5231729745864868, "learning_rate": 0.00031734605970976126, "loss": 1.7884, "step": 43425 }, { "epoch": 1.44, "grad_norm": 0.4972745180130005, "learning_rate": 0.0003173356250690016, "loss": 1.7527, "step": 43426 }, { "epoch": 1.44, "grad_norm": 0.5128858685493469, "learning_rate": 0.000317325190407199, "loss": 1.7477, "step": 43427 }, { "epoch": 1.44, "grad_norm": 0.5190834403038025, "learning_rate": 0.00031731475572436613, "loss": 1.7953, "step": 43428 }, { "epoch": 1.44, "grad_norm": 0.5158249735832214, "learning_rate": 0.0003173043210205158, "loss": 1.7497, "step": 43429 }, { "epoch": 1.44, "grad_norm": 0.5084971785545349, "learning_rate": 0.0003172938862956604, "loss": 1.6902, "step": 43430 }, { "epoch": 1.44, "grad_norm": 0.49967271089553833, "learning_rate": 0.00031728345154981297, "loss": 1.7931, "step": 43431 }, { "epoch": 1.44, "grad_norm": 0.5124739408493042, "learning_rate": 0.00031727301678298583, "loss": 1.7676, "step": 43432 }, { "epoch": 1.45, "grad_norm": 0.5127161145210266, "learning_rate": 0.00031726258199519185, "loss": 1.8044, "step": 43433 }, { "epoch": 1.45, "grad_norm": 0.4995771050453186, "learning_rate": 0.00031725214718644366, "loss": 1.7953, "step": 43434 }, { "epoch": 1.45, "grad_norm": 0.5199728012084961, "learning_rate": 0.0003172417123567539, "loss": 1.7854, "step": 43435 }, { "epoch": 1.45, "grad_norm": 0.5213555097579956, "learning_rate": 0.0003172312775061352, "loss": 1.7987, "step": 43436 }, { "epoch": 1.45, "grad_norm": 0.5009077787399292, "learning_rate": 0.0003172208426346003, "loss": 1.7325, "step": 43437 }, { "epoch": 1.45, "grad_norm": 0.5212135910987854, "learning_rate": 0.00031721040774216176, "loss": 1.722, "step": 43438 }, { "epoch": 1.45, "grad_norm": 0.48992934823036194, "learning_rate": 0.0003171999728288324, "loss": 1.7486, "step": 43439 }, { "epoch": 1.45, "grad_norm": 0.5133692622184753, "learning_rate": 0.0003171895378946248, "loss": 1.8327, "step": 43440 }, { "epoch": 1.45, "grad_norm": 0.4888371527194977, "learning_rate": 0.00031717910293955164, "loss": 1.718, "step": 43441 }, { "epoch": 1.45, "grad_norm": 0.5643323063850403, "learning_rate": 0.0003171686679636256, "loss": 1.7451, "step": 43442 }, { "epoch": 1.45, "grad_norm": 0.5201574563980103, "learning_rate": 0.0003171582329668593, "loss": 1.7022, "step": 43443 }, { "epoch": 1.45, "grad_norm": 0.507931113243103, "learning_rate": 0.0003171477979492654, "loss": 1.7722, "step": 43444 }, { "epoch": 1.45, "grad_norm": 0.5053068399429321, "learning_rate": 0.00031713736291085664, "loss": 1.7442, "step": 43445 }, { "epoch": 1.45, "grad_norm": 0.516866147518158, "learning_rate": 0.0003171269278516456, "loss": 1.7561, "step": 43446 }, { "epoch": 1.45, "grad_norm": 0.4908336400985718, "learning_rate": 0.0003171164927716451, "loss": 1.7385, "step": 43447 }, { "epoch": 1.45, "grad_norm": 0.5408000349998474, "learning_rate": 0.00031710605767086753, "loss": 1.7635, "step": 43448 }, { "epoch": 1.45, "grad_norm": 0.5190621614456177, "learning_rate": 0.0003170956225493259, "loss": 1.7904, "step": 43449 }, { "epoch": 1.45, "grad_norm": 0.5266385078430176, "learning_rate": 0.00031708518740703254, "loss": 1.8104, "step": 43450 }, { "epoch": 1.45, "grad_norm": 0.8124681711196899, "learning_rate": 0.0003170747522440004, "loss": 1.8018, "step": 43451 }, { "epoch": 1.45, "grad_norm": 0.5164780616760254, "learning_rate": 0.00031706431706024203, "loss": 1.774, "step": 43452 }, { "epoch": 1.45, "grad_norm": 0.5799053311347961, "learning_rate": 0.00031705388185576997, "loss": 1.7009, "step": 43453 }, { "epoch": 1.45, "grad_norm": 0.5092318058013916, "learning_rate": 0.0003170434466305972, "loss": 1.7607, "step": 43454 }, { "epoch": 1.45, "grad_norm": 0.5221439003944397, "learning_rate": 0.000317033011384736, "loss": 1.7779, "step": 43455 }, { "epoch": 1.45, "grad_norm": 0.5075806975364685, "learning_rate": 0.0003170225761181994, "loss": 1.7623, "step": 43456 }, { "epoch": 1.45, "grad_norm": 0.5189505219459534, "learning_rate": 0.0003170121408309998, "loss": 1.8206, "step": 43457 }, { "epoch": 1.45, "grad_norm": 0.49408748745918274, "learning_rate": 0.00031700170552314996, "loss": 1.8237, "step": 43458 }, { "epoch": 1.45, "grad_norm": 0.5300213694572449, "learning_rate": 0.0003169912701946627, "loss": 1.7494, "step": 43459 }, { "epoch": 1.45, "grad_norm": 0.5451933145523071, "learning_rate": 0.00031698083484555035, "loss": 1.7372, "step": 43460 }, { "epoch": 1.45, "grad_norm": 0.4995060861110687, "learning_rate": 0.00031697039947582587, "loss": 1.8244, "step": 43461 }, { "epoch": 1.45, "grad_norm": 0.5548714399337769, "learning_rate": 0.0003169599640855018, "loss": 1.8643, "step": 43462 }, { "epoch": 1.45, "grad_norm": 0.5173367857933044, "learning_rate": 0.0003169495286745908, "loss": 1.7849, "step": 43463 }, { "epoch": 1.45, "grad_norm": 0.5219297409057617, "learning_rate": 0.0003169390932431056, "loss": 1.7699, "step": 43464 }, { "epoch": 1.45, "grad_norm": 0.49923768639564514, "learning_rate": 0.00031692865779105883, "loss": 1.7097, "step": 43465 }, { "epoch": 1.45, "grad_norm": 0.5155845284461975, "learning_rate": 0.0003169182223184632, "loss": 1.8487, "step": 43466 }, { "epoch": 1.45, "grad_norm": 0.5329579710960388, "learning_rate": 0.0003169077868253313, "loss": 1.7482, "step": 43467 }, { "epoch": 1.45, "grad_norm": 0.531207799911499, "learning_rate": 0.0003168973513116759, "loss": 1.7612, "step": 43468 }, { "epoch": 1.45, "grad_norm": 0.8804222345352173, "learning_rate": 0.0003168869157775095, "loss": 1.8101, "step": 43469 }, { "epoch": 1.45, "grad_norm": 0.527539849281311, "learning_rate": 0.00031687648022284495, "loss": 1.7392, "step": 43470 }, { "epoch": 1.45, "grad_norm": 0.5156620740890503, "learning_rate": 0.00031686604464769484, "loss": 1.7467, "step": 43471 }, { "epoch": 1.45, "grad_norm": 0.5208327174186707, "learning_rate": 0.00031685560905207185, "loss": 1.7256, "step": 43472 }, { "epoch": 1.45, "grad_norm": 0.5060439705848694, "learning_rate": 0.0003168451734359885, "loss": 1.7089, "step": 43473 }, { "epoch": 1.45, "grad_norm": 0.5140262246131897, "learning_rate": 0.0003168347377994578, "loss": 1.8617, "step": 43474 }, { "epoch": 1.45, "grad_norm": 0.4947018623352051, "learning_rate": 0.0003168243021424921, "loss": 1.7136, "step": 43475 }, { "epoch": 1.45, "grad_norm": 0.5030190944671631, "learning_rate": 0.00031681386646510414, "loss": 1.7296, "step": 43476 }, { "epoch": 1.45, "grad_norm": 0.5053119659423828, "learning_rate": 0.00031680343076730675, "loss": 1.7736, "step": 43477 }, { "epoch": 1.45, "grad_norm": 0.5123979449272156, "learning_rate": 0.0003167929950491123, "loss": 1.7745, "step": 43478 }, { "epoch": 1.45, "grad_norm": 0.5141005516052246, "learning_rate": 0.00031678255931053375, "loss": 1.7603, "step": 43479 }, { "epoch": 1.45, "grad_norm": 0.499197781085968, "learning_rate": 0.00031677212355158356, "loss": 1.769, "step": 43480 }, { "epoch": 1.45, "grad_norm": 0.49093005061149597, "learning_rate": 0.0003167616877722746, "loss": 1.767, "step": 43481 }, { "epoch": 1.45, "grad_norm": 0.5129190683364868, "learning_rate": 0.0003167512519726193, "loss": 1.7701, "step": 43482 }, { "epoch": 1.45, "grad_norm": 0.49995309114456177, "learning_rate": 0.0003167408161526305, "loss": 1.7607, "step": 43483 }, { "epoch": 1.45, "grad_norm": 0.49958017468452454, "learning_rate": 0.0003167303803123209, "loss": 1.7855, "step": 43484 }, { "epoch": 1.45, "grad_norm": 0.504019021987915, "learning_rate": 0.0003167199444517029, "loss": 1.7807, "step": 43485 }, { "epoch": 1.45, "grad_norm": 0.5009128451347351, "learning_rate": 0.0003167095085707895, "loss": 1.8189, "step": 43486 }, { "epoch": 1.45, "grad_norm": 0.49344509840011597, "learning_rate": 0.00031669907266959315, "loss": 1.7488, "step": 43487 }, { "epoch": 1.45, "grad_norm": 0.5140331387519836, "learning_rate": 0.00031668863674812664, "loss": 1.7905, "step": 43488 }, { "epoch": 1.45, "grad_norm": 0.5107230544090271, "learning_rate": 0.0003166782008064025, "loss": 1.7201, "step": 43489 }, { "epoch": 1.45, "grad_norm": 0.505736768245697, "learning_rate": 0.0003166677648444335, "loss": 1.6925, "step": 43490 }, { "epoch": 1.45, "grad_norm": 0.5377191305160522, "learning_rate": 0.00031665732886223245, "loss": 1.7247, "step": 43491 }, { "epoch": 1.45, "grad_norm": 0.5069149732589722, "learning_rate": 0.00031664689285981166, "loss": 1.7937, "step": 43492 }, { "epoch": 1.45, "grad_norm": 0.5084346532821655, "learning_rate": 0.00031663645683718404, "loss": 1.785, "step": 43493 }, { "epoch": 1.45, "grad_norm": 0.5122650861740112, "learning_rate": 0.0003166260207943623, "loss": 1.793, "step": 43494 }, { "epoch": 1.45, "grad_norm": 0.5093813538551331, "learning_rate": 0.0003166155847313589, "loss": 1.7428, "step": 43495 }, { "epoch": 1.45, "grad_norm": 0.5155938267707825, "learning_rate": 0.00031660514864818666, "loss": 1.8371, "step": 43496 }, { "epoch": 1.45, "grad_norm": 0.5193067193031311, "learning_rate": 0.0003165947125448583, "loss": 1.7286, "step": 43497 }, { "epoch": 1.45, "grad_norm": 0.5147133469581604, "learning_rate": 0.00031658427642138634, "loss": 1.7347, "step": 43498 }, { "epoch": 1.45, "grad_norm": 1.4514168500900269, "learning_rate": 0.0003165738402777835, "loss": 1.8248, "step": 43499 }, { "epoch": 1.45, "grad_norm": 0.5192021131515503, "learning_rate": 0.00031656340411406254, "loss": 1.7269, "step": 43500 }, { "epoch": 1.45, "grad_norm": 0.5023570656776428, "learning_rate": 0.0003165529679302359, "loss": 1.6504, "step": 43501 }, { "epoch": 1.45, "grad_norm": 0.5231956243515015, "learning_rate": 0.0003165425317263165, "loss": 1.7761, "step": 43502 }, { "epoch": 1.45, "grad_norm": 0.5120606422424316, "learning_rate": 0.0003165320955023169, "loss": 1.819, "step": 43503 }, { "epoch": 1.45, "grad_norm": 0.5309372544288635, "learning_rate": 0.00031652165925824977, "loss": 1.7551, "step": 43504 }, { "epoch": 1.45, "grad_norm": 0.5096592307090759, "learning_rate": 0.0003165112229941278, "loss": 1.7843, "step": 43505 }, { "epoch": 1.45, "grad_norm": 0.4987633526325226, "learning_rate": 0.00031650078670996355, "loss": 1.7472, "step": 43506 }, { "epoch": 1.45, "grad_norm": 0.5087085962295532, "learning_rate": 0.0003164903504057699, "loss": 1.7113, "step": 43507 }, { "epoch": 1.45, "grad_norm": 0.4880632758140564, "learning_rate": 0.00031647991408155924, "loss": 1.7472, "step": 43508 }, { "epoch": 1.45, "grad_norm": 0.49930423498153687, "learning_rate": 0.0003164694777373445, "loss": 1.7573, "step": 43509 }, { "epoch": 1.45, "grad_norm": 0.524400532245636, "learning_rate": 0.0003164590413731382, "loss": 1.7151, "step": 43510 }, { "epoch": 1.45, "grad_norm": 0.5128711462020874, "learning_rate": 0.0003164486049889531, "loss": 1.792, "step": 43511 }, { "epoch": 1.45, "grad_norm": 0.5053656697273254, "learning_rate": 0.0003164381685848018, "loss": 1.7862, "step": 43512 }, { "epoch": 1.45, "grad_norm": 0.5127731561660767, "learning_rate": 0.00031642773216069696, "loss": 1.8282, "step": 43513 }, { "epoch": 1.45, "grad_norm": 0.508285641670227, "learning_rate": 0.0003164172957166513, "loss": 1.791, "step": 43514 }, { "epoch": 1.45, "grad_norm": 0.5151403546333313, "learning_rate": 0.00031640685925267734, "loss": 1.7277, "step": 43515 }, { "epoch": 1.45, "grad_norm": 0.6349361538887024, "learning_rate": 0.00031639642276878806, "loss": 1.7857, "step": 43516 }, { "epoch": 1.45, "grad_norm": 0.5070604085922241, "learning_rate": 0.0003163859862649958, "loss": 1.7962, "step": 43517 }, { "epoch": 1.45, "grad_norm": 0.5093262195587158, "learning_rate": 0.0003163755497413135, "loss": 1.7517, "step": 43518 }, { "epoch": 1.45, "grad_norm": 0.5087288022041321, "learning_rate": 0.00031636511319775356, "loss": 1.8329, "step": 43519 }, { "epoch": 1.45, "grad_norm": 0.5148496031761169, "learning_rate": 0.00031635467663432884, "loss": 1.7302, "step": 43520 }, { "epoch": 1.45, "grad_norm": 0.4973129332065582, "learning_rate": 0.000316344240051052, "loss": 1.7232, "step": 43521 }, { "epoch": 1.45, "grad_norm": 0.5268232822418213, "learning_rate": 0.0003163338034479356, "loss": 1.7529, "step": 43522 }, { "epoch": 1.45, "grad_norm": 0.5096941590309143, "learning_rate": 0.0003163233668249924, "loss": 1.7966, "step": 43523 }, { "epoch": 1.45, "grad_norm": 0.5089524984359741, "learning_rate": 0.000316312930182235, "loss": 1.7404, "step": 43524 }, { "epoch": 1.45, "grad_norm": 0.5069242715835571, "learning_rate": 0.0003163024935196761, "loss": 1.7681, "step": 43525 }, { "epoch": 1.45, "grad_norm": 0.5027492046356201, "learning_rate": 0.00031629205683732844, "loss": 1.7519, "step": 43526 }, { "epoch": 1.45, "grad_norm": 0.5236204266548157, "learning_rate": 0.0003162816201352046, "loss": 1.7829, "step": 43527 }, { "epoch": 1.45, "grad_norm": 0.5221265554428101, "learning_rate": 0.00031627118341331716, "loss": 1.7958, "step": 43528 }, { "epoch": 1.45, "grad_norm": 0.5027850866317749, "learning_rate": 0.0003162607466716791, "loss": 1.7479, "step": 43529 }, { "epoch": 1.45, "grad_norm": 0.5048669576644897, "learning_rate": 0.00031625030991030273, "loss": 1.8248, "step": 43530 }, { "epoch": 1.45, "grad_norm": 0.5152188539505005, "learning_rate": 0.000316239873129201, "loss": 1.8117, "step": 43531 }, { "epoch": 1.45, "grad_norm": 0.5255727171897888, "learning_rate": 0.00031622943632838636, "loss": 1.7906, "step": 43532 }, { "epoch": 1.45, "grad_norm": 0.5100733041763306, "learning_rate": 0.0003162189995078716, "loss": 1.7846, "step": 43533 }, { "epoch": 1.45, "grad_norm": 0.5356575846672058, "learning_rate": 0.0003162085626676694, "loss": 1.7242, "step": 43534 }, { "epoch": 1.45, "grad_norm": 0.5076255798339844, "learning_rate": 0.0003161981258077923, "loss": 1.729, "step": 43535 }, { "epoch": 1.45, "grad_norm": 0.5129278302192688, "learning_rate": 0.0003161876889282532, "loss": 1.6623, "step": 43536 }, { "epoch": 1.45, "grad_norm": 0.522057056427002, "learning_rate": 0.0003161772520290645, "loss": 1.7824, "step": 43537 }, { "epoch": 1.45, "grad_norm": 0.7285413146018982, "learning_rate": 0.00031616681511023907, "loss": 1.8166, "step": 43538 }, { "epoch": 1.45, "grad_norm": 0.5113604068756104, "learning_rate": 0.00031615637817178956, "loss": 1.7604, "step": 43539 }, { "epoch": 1.45, "grad_norm": 0.5121286511421204, "learning_rate": 0.0003161459412137285, "loss": 1.8229, "step": 43540 }, { "epoch": 1.45, "grad_norm": 0.5031904578208923, "learning_rate": 0.00031613550423606864, "loss": 1.7515, "step": 43541 }, { "epoch": 1.45, "grad_norm": 0.5192621946334839, "learning_rate": 0.0003161250672388227, "loss": 1.7463, "step": 43542 }, { "epoch": 1.45, "grad_norm": 0.5099882483482361, "learning_rate": 0.00031611463022200326, "loss": 1.7379, "step": 43543 }, { "epoch": 1.45, "grad_norm": 0.5018848776817322, "learning_rate": 0.00031610419318562304, "loss": 1.7961, "step": 43544 }, { "epoch": 1.45, "grad_norm": 0.49001866579055786, "learning_rate": 0.00031609375612969476, "loss": 1.6943, "step": 43545 }, { "epoch": 1.45, "grad_norm": 0.5254239439964294, "learning_rate": 0.00031608331905423105, "loss": 1.7623, "step": 43546 }, { "epoch": 1.45, "grad_norm": 0.49576061964035034, "learning_rate": 0.00031607288195924444, "loss": 1.78, "step": 43547 }, { "epoch": 1.45, "grad_norm": 0.5154661536216736, "learning_rate": 0.0003160624448447478, "loss": 1.8205, "step": 43548 }, { "epoch": 1.45, "grad_norm": 0.5304089188575745, "learning_rate": 0.00031605200771075374, "loss": 1.7541, "step": 43549 }, { "epoch": 1.45, "grad_norm": 0.4906036853790283, "learning_rate": 0.00031604157055727485, "loss": 1.7922, "step": 43550 }, { "epoch": 1.45, "grad_norm": 0.5117823481559753, "learning_rate": 0.00031603113338432386, "loss": 1.8109, "step": 43551 }, { "epoch": 1.45, "grad_norm": 0.5265341997146606, "learning_rate": 0.00031602069619191344, "loss": 1.73, "step": 43552 }, { "epoch": 1.45, "grad_norm": 0.5435197353363037, "learning_rate": 0.00031601025898005635, "loss": 1.7747, "step": 43553 }, { "epoch": 1.45, "grad_norm": 0.5094517469406128, "learning_rate": 0.0003159998217487651, "loss": 1.8008, "step": 43554 }, { "epoch": 1.45, "grad_norm": 0.5072911977767944, "learning_rate": 0.0003159893844980524, "loss": 1.7901, "step": 43555 }, { "epoch": 1.45, "grad_norm": 0.4973072409629822, "learning_rate": 0.000315978947227931, "loss": 1.7479, "step": 43556 }, { "epoch": 1.45, "grad_norm": 0.4916647672653198, "learning_rate": 0.00031596850993841335, "loss": 1.7881, "step": 43557 }, { "epoch": 1.45, "grad_norm": 0.5183078646659851, "learning_rate": 0.0003159580726295124, "loss": 1.706, "step": 43558 }, { "epoch": 1.45, "grad_norm": 0.49573442339897156, "learning_rate": 0.0003159476353012407, "loss": 1.773, "step": 43559 }, { "epoch": 1.45, "grad_norm": 0.510318398475647, "learning_rate": 0.000315937197953611, "loss": 1.8445, "step": 43560 }, { "epoch": 1.45, "grad_norm": 0.48966023325920105, "learning_rate": 0.0003159267605866358, "loss": 1.7581, "step": 43561 }, { "epoch": 1.45, "grad_norm": 0.5437771081924438, "learning_rate": 0.00031591632320032784, "loss": 1.8289, "step": 43562 }, { "epoch": 1.45, "grad_norm": 0.506019115447998, "learning_rate": 0.00031590588579469986, "loss": 1.7356, "step": 43563 }, { "epoch": 1.45, "grad_norm": 0.5314326286315918, "learning_rate": 0.0003158954483697645, "loss": 1.7976, "step": 43564 }, { "epoch": 1.45, "grad_norm": 0.5494366884231567, "learning_rate": 0.00031588501092553426, "loss": 1.8253, "step": 43565 }, { "epoch": 1.45, "grad_norm": 0.5121714472770691, "learning_rate": 0.0003158745734620221, "loss": 1.7411, "step": 43566 }, { "epoch": 1.45, "grad_norm": 0.5345433354377747, "learning_rate": 0.0003158641359792405, "loss": 1.7457, "step": 43567 }, { "epoch": 1.45, "grad_norm": 0.5076079964637756, "learning_rate": 0.0003158536984772022, "loss": 1.7558, "step": 43568 }, { "epoch": 1.45, "grad_norm": 0.5074597001075745, "learning_rate": 0.00031584326095591986, "loss": 1.7499, "step": 43569 }, { "epoch": 1.45, "grad_norm": 0.5063204765319824, "learning_rate": 0.00031583282341540606, "loss": 1.7455, "step": 43570 }, { "epoch": 1.45, "grad_norm": 0.5195717811584473, "learning_rate": 0.00031582238585567364, "loss": 1.7634, "step": 43571 }, { "epoch": 1.45, "grad_norm": 0.49657511711120605, "learning_rate": 0.00031581194827673506, "loss": 1.7206, "step": 43572 }, { "epoch": 1.45, "grad_norm": 0.5229361653327942, "learning_rate": 0.00031580151067860324, "loss": 1.7297, "step": 43573 }, { "epoch": 1.45, "grad_norm": 0.5238110423088074, "learning_rate": 0.0003157910730612906, "loss": 1.7046, "step": 43574 }, { "epoch": 1.45, "grad_norm": 0.5104045271873474, "learning_rate": 0.0003157806354248099, "loss": 1.7695, "step": 43575 }, { "epoch": 1.45, "grad_norm": 0.5161253213882446, "learning_rate": 0.000315770197769174, "loss": 1.7646, "step": 43576 }, { "epoch": 1.45, "grad_norm": 0.5109217762947083, "learning_rate": 0.00031575976009439523, "loss": 1.804, "step": 43577 }, { "epoch": 1.45, "grad_norm": 0.4899824559688568, "learning_rate": 0.0003157493224004866, "loss": 1.7116, "step": 43578 }, { "epoch": 1.45, "grad_norm": 0.5024230480194092, "learning_rate": 0.00031573888468746053, "loss": 1.7729, "step": 43579 }, { "epoch": 1.45, "grad_norm": 0.5182626843452454, "learning_rate": 0.0003157284469553297, "loss": 1.7552, "step": 43580 }, { "epoch": 1.45, "grad_norm": 0.5144815444946289, "learning_rate": 0.000315718009204107, "loss": 1.7852, "step": 43581 }, { "epoch": 1.45, "grad_norm": 0.501623272895813, "learning_rate": 0.0003157075714338048, "loss": 1.7881, "step": 43582 }, { "epoch": 1.45, "grad_norm": 0.5041537880897522, "learning_rate": 0.000315697133644436, "loss": 1.7261, "step": 43583 }, { "epoch": 1.45, "grad_norm": 0.5062351226806641, "learning_rate": 0.00031568669583601324, "loss": 1.7487, "step": 43584 }, { "epoch": 1.45, "grad_norm": 0.49591344594955444, "learning_rate": 0.0003156762580085491, "loss": 1.7839, "step": 43585 }, { "epoch": 1.45, "grad_norm": 1.778195858001709, "learning_rate": 0.00031566582016205624, "loss": 1.7384, "step": 43586 }, { "epoch": 1.45, "grad_norm": 0.4998316764831543, "learning_rate": 0.0003156553822965474, "loss": 1.7303, "step": 43587 }, { "epoch": 1.45, "grad_norm": 0.5012946724891663, "learning_rate": 0.00031564494441203524, "loss": 1.7353, "step": 43588 }, { "epoch": 1.45, "grad_norm": 0.5144041776657104, "learning_rate": 0.0003156345065085325, "loss": 1.7063, "step": 43589 }, { "epoch": 1.45, "grad_norm": 0.49248623847961426, "learning_rate": 0.00031562406858605163, "loss": 1.8117, "step": 43590 }, { "epoch": 1.45, "grad_norm": 0.4892463684082031, "learning_rate": 0.0003156136306446056, "loss": 1.7781, "step": 43591 }, { "epoch": 1.45, "grad_norm": 0.49928712844848633, "learning_rate": 0.0003156031926842069, "loss": 1.8283, "step": 43592 }, { "epoch": 1.45, "grad_norm": 0.5098572373390198, "learning_rate": 0.0003155927547048681, "loss": 1.8276, "step": 43593 }, { "epoch": 1.45, "grad_norm": 0.5082381963729858, "learning_rate": 0.00031558231670660215, "loss": 1.7367, "step": 43594 }, { "epoch": 1.45, "grad_norm": 0.5242069363594055, "learning_rate": 0.00031557187868942136, "loss": 1.7384, "step": 43595 }, { "epoch": 1.45, "grad_norm": 0.4997868835926056, "learning_rate": 0.0003155614406533388, "loss": 1.7212, "step": 43596 }, { "epoch": 1.45, "grad_norm": 0.5009677410125732, "learning_rate": 0.0003155510025983668, "loss": 1.7486, "step": 43597 }, { "epoch": 1.45, "grad_norm": 0.5319281816482544, "learning_rate": 0.0003155405645245183, "loss": 1.7085, "step": 43598 }, { "epoch": 1.45, "grad_norm": 0.5102128386497498, "learning_rate": 0.00031553012643180574, "loss": 1.7303, "step": 43599 }, { "epoch": 1.45, "grad_norm": 0.5263197422027588, "learning_rate": 0.00031551968832024195, "loss": 1.7839, "step": 43600 }, { "epoch": 1.45, "grad_norm": 0.4876851439476013, "learning_rate": 0.00031550925018983955, "loss": 1.7699, "step": 43601 }, { "epoch": 1.45, "grad_norm": 0.49561983346939087, "learning_rate": 0.0003154988120406111, "loss": 1.7771, "step": 43602 }, { "epoch": 1.45, "grad_norm": 0.5176194906234741, "learning_rate": 0.00031548837387256957, "loss": 1.7561, "step": 43603 }, { "epoch": 1.45, "grad_norm": 0.4948931336402893, "learning_rate": 0.0003154779356857273, "loss": 1.77, "step": 43604 }, { "epoch": 1.45, "grad_norm": 0.5222127437591553, "learning_rate": 0.00031546749748009715, "loss": 1.7714, "step": 43605 }, { "epoch": 1.45, "grad_norm": 0.5045773983001709, "learning_rate": 0.00031545705925569166, "loss": 1.7623, "step": 43606 }, { "epoch": 1.45, "grad_norm": 0.5065597295761108, "learning_rate": 0.0003154466210125237, "loss": 1.7591, "step": 43607 }, { "epoch": 1.45, "grad_norm": 0.4821361303329468, "learning_rate": 0.00031543618275060577, "loss": 1.7524, "step": 43608 }, { "epoch": 1.45, "grad_norm": 0.5291045904159546, "learning_rate": 0.0003154257444699505, "loss": 1.7603, "step": 43609 }, { "epoch": 1.45, "grad_norm": 0.5163935422897339, "learning_rate": 0.0003154153061705707, "loss": 1.8037, "step": 43610 }, { "epoch": 1.45, "grad_norm": 0.5107847452163696, "learning_rate": 0.00031540486785247904, "loss": 1.72, "step": 43611 }, { "epoch": 1.45, "grad_norm": 0.5262717008590698, "learning_rate": 0.00031539442951568814, "loss": 1.7091, "step": 43612 }, { "epoch": 1.45, "grad_norm": 0.5001786947250366, "learning_rate": 0.00031538399116021063, "loss": 1.707, "step": 43613 }, { "epoch": 1.45, "grad_norm": 0.528296947479248, "learning_rate": 0.0003153735527860592, "loss": 1.8311, "step": 43614 }, { "epoch": 1.45, "grad_norm": 0.5080865621566772, "learning_rate": 0.00031536311439324665, "loss": 1.7008, "step": 43615 }, { "epoch": 1.45, "grad_norm": 0.5190057158470154, "learning_rate": 0.00031535267598178544, "loss": 1.8307, "step": 43616 }, { "epoch": 1.45, "grad_norm": 0.5380104780197144, "learning_rate": 0.00031534223755168837, "loss": 1.7693, "step": 43617 }, { "epoch": 1.45, "grad_norm": 0.5036359429359436, "learning_rate": 0.000315331799102968, "loss": 1.7662, "step": 43618 }, { "epoch": 1.45, "grad_norm": 0.5116101503372192, "learning_rate": 0.00031532136063563726, "loss": 1.7632, "step": 43619 }, { "epoch": 1.45, "grad_norm": 0.5278304815292358, "learning_rate": 0.0003153109221497085, "loss": 1.7963, "step": 43620 }, { "epoch": 1.45, "grad_norm": 0.522281289100647, "learning_rate": 0.00031530048364519466, "loss": 1.7845, "step": 43621 }, { "epoch": 1.45, "grad_norm": 0.4991970658302307, "learning_rate": 0.0003152900451221082, "loss": 1.7774, "step": 43622 }, { "epoch": 1.45, "grad_norm": 0.5009881854057312, "learning_rate": 0.00031527960658046187, "loss": 1.7017, "step": 43623 }, { "epoch": 1.45, "grad_norm": 0.49074259400367737, "learning_rate": 0.00031526916802026845, "loss": 1.6524, "step": 43624 }, { "epoch": 1.45, "grad_norm": 0.5134870409965515, "learning_rate": 0.00031525872944154033, "loss": 1.7604, "step": 43625 }, { "epoch": 1.45, "grad_norm": 0.526134729385376, "learning_rate": 0.0003152482908442905, "loss": 1.7418, "step": 43626 }, { "epoch": 1.45, "grad_norm": 0.4886423647403717, "learning_rate": 0.0003152378522285314, "loss": 1.7363, "step": 43627 }, { "epoch": 1.45, "grad_norm": 0.5028072595596313, "learning_rate": 0.0003152274135942759, "loss": 1.6647, "step": 43628 }, { "epoch": 1.45, "grad_norm": 0.5005750060081482, "learning_rate": 0.0003152169749415365, "loss": 1.749, "step": 43629 }, { "epoch": 1.45, "grad_norm": 0.5113948583602905, "learning_rate": 0.00031520653627032595, "loss": 1.7419, "step": 43630 }, { "epoch": 1.45, "grad_norm": 0.4990871846675873, "learning_rate": 0.00031519609758065694, "loss": 1.7301, "step": 43631 }, { "epoch": 1.45, "grad_norm": 0.542896032333374, "learning_rate": 0.000315185658872542, "loss": 1.7955, "step": 43632 }, { "epoch": 1.45, "grad_norm": 0.502654492855072, "learning_rate": 0.00031517522014599405, "loss": 1.8206, "step": 43633 }, { "epoch": 1.45, "grad_norm": 0.5044629573822021, "learning_rate": 0.0003151647814010255, "loss": 1.8179, "step": 43634 }, { "epoch": 1.45, "grad_norm": 0.5084438323974609, "learning_rate": 0.0003151543426376492, "loss": 1.7523, "step": 43635 }, { "epoch": 1.45, "grad_norm": 0.5070211887359619, "learning_rate": 0.0003151439038558777, "loss": 1.8005, "step": 43636 }, { "epoch": 1.45, "grad_norm": 0.5391767621040344, "learning_rate": 0.0003151334650557238, "loss": 1.7853, "step": 43637 }, { "epoch": 1.45, "grad_norm": 0.5091869831085205, "learning_rate": 0.0003151230262372001, "loss": 1.6988, "step": 43638 }, { "epoch": 1.45, "grad_norm": 0.5016352534294128, "learning_rate": 0.00031511258740031924, "loss": 1.7634, "step": 43639 }, { "epoch": 1.45, "grad_norm": 0.5170053243637085, "learning_rate": 0.000315102148545094, "loss": 1.7826, "step": 43640 }, { "epoch": 1.45, "grad_norm": 0.5108704566955566, "learning_rate": 0.0003150917096715369, "loss": 1.8078, "step": 43641 }, { "epoch": 1.45, "grad_norm": 0.5279855728149414, "learning_rate": 0.00031508127077966067, "loss": 1.7584, "step": 43642 }, { "epoch": 1.45, "grad_norm": 0.49685513973236084, "learning_rate": 0.00031507083186947806, "loss": 1.6848, "step": 43643 }, { "epoch": 1.45, "grad_norm": 0.5026637315750122, "learning_rate": 0.00031506039294100166, "loss": 1.7905, "step": 43644 }, { "epoch": 1.45, "grad_norm": 0.5124558806419373, "learning_rate": 0.0003150499539942441, "loss": 1.7353, "step": 43645 }, { "epoch": 1.45, "grad_norm": 0.5167951583862305, "learning_rate": 0.00031503951502921825, "loss": 1.7079, "step": 43646 }, { "epoch": 1.45, "grad_norm": 0.50411057472229, "learning_rate": 0.00031502907604593657, "loss": 1.7589, "step": 43647 }, { "epoch": 1.45, "grad_norm": 0.5136218667030334, "learning_rate": 0.00031501863704441177, "loss": 1.8066, "step": 43648 }, { "epoch": 1.45, "grad_norm": 0.5044437050819397, "learning_rate": 0.0003150081980246566, "loss": 1.7669, "step": 43649 }, { "epoch": 1.45, "grad_norm": 0.5122070908546448, "learning_rate": 0.0003149977589866837, "loss": 1.7714, "step": 43650 }, { "epoch": 1.45, "grad_norm": 0.5181348919868469, "learning_rate": 0.0003149873199305057, "loss": 1.698, "step": 43651 }, { "epoch": 1.45, "grad_norm": 0.5197412967681885, "learning_rate": 0.0003149768808561353, "loss": 1.7253, "step": 43652 }, { "epoch": 1.45, "grad_norm": 0.521006166934967, "learning_rate": 0.0003149664417635852, "loss": 1.7561, "step": 43653 }, { "epoch": 1.45, "grad_norm": 0.5357359647750854, "learning_rate": 0.000314956002652868, "loss": 1.8459, "step": 43654 }, { "epoch": 1.45, "grad_norm": 0.5225133895874023, "learning_rate": 0.00031494556352399647, "loss": 1.7754, "step": 43655 }, { "epoch": 1.45, "grad_norm": 0.5087255835533142, "learning_rate": 0.0003149351243769833, "loss": 1.7598, "step": 43656 }, { "epoch": 1.45, "grad_norm": 0.5074968338012695, "learning_rate": 0.0003149246852118409, "loss": 1.7475, "step": 43657 }, { "epoch": 1.45, "grad_norm": 0.5044524669647217, "learning_rate": 0.0003149142460285823, "loss": 1.7952, "step": 43658 }, { "epoch": 1.45, "grad_norm": 0.510485827922821, "learning_rate": 0.0003149038068272198, "loss": 1.7075, "step": 43659 }, { "epoch": 1.45, "grad_norm": 0.5027965307235718, "learning_rate": 0.00031489336760776645, "loss": 1.7014, "step": 43660 }, { "epoch": 1.45, "grad_norm": 0.49174678325653076, "learning_rate": 0.0003148829283702347, "loss": 1.7923, "step": 43661 }, { "epoch": 1.45, "grad_norm": 0.5030386447906494, "learning_rate": 0.00031487248911463725, "loss": 1.7493, "step": 43662 }, { "epoch": 1.45, "grad_norm": 0.5248722434043884, "learning_rate": 0.00031486204984098687, "loss": 1.8332, "step": 43663 }, { "epoch": 1.45, "grad_norm": 0.503587543964386, "learning_rate": 0.000314851610549296, "loss": 1.7857, "step": 43664 }, { "epoch": 1.45, "grad_norm": 0.519927442073822, "learning_rate": 0.0003148411712395776, "loss": 1.7036, "step": 43665 }, { "epoch": 1.45, "grad_norm": 0.5042126178741455, "learning_rate": 0.0003148307319118442, "loss": 1.7456, "step": 43666 }, { "epoch": 1.45, "grad_norm": 0.5167503356933594, "learning_rate": 0.00031482029256610844, "loss": 1.8546, "step": 43667 }, { "epoch": 1.45, "grad_norm": 0.5059871077537537, "learning_rate": 0.0003148098532023829, "loss": 1.8061, "step": 43668 }, { "epoch": 1.45, "grad_norm": 0.5429299473762512, "learning_rate": 0.0003147994138206805, "loss": 1.7764, "step": 43669 }, { "epoch": 1.45, "grad_norm": 0.4941243827342987, "learning_rate": 0.00031478897442101386, "loss": 1.8323, "step": 43670 }, { "epoch": 1.45, "grad_norm": 0.5001832246780396, "learning_rate": 0.0003147785350033956, "loss": 1.7197, "step": 43671 }, { "epoch": 1.45, "grad_norm": 0.509136974811554, "learning_rate": 0.00031476809556783825, "loss": 1.8179, "step": 43672 }, { "epoch": 1.45, "grad_norm": 0.49483150243759155, "learning_rate": 0.0003147576561143546, "loss": 1.7946, "step": 43673 }, { "epoch": 1.45, "grad_norm": 0.49673599004745483, "learning_rate": 0.00031474721664295736, "loss": 1.7474, "step": 43674 }, { "epoch": 1.45, "grad_norm": 0.4984455704689026, "learning_rate": 0.0003147367771536592, "loss": 1.8294, "step": 43675 }, { "epoch": 1.45, "grad_norm": 0.5051023960113525, "learning_rate": 0.00031472633764647283, "loss": 1.8036, "step": 43676 }, { "epoch": 1.45, "grad_norm": 0.5006462335586548, "learning_rate": 0.00031471589812141076, "loss": 1.7437, "step": 43677 }, { "epoch": 1.45, "grad_norm": 0.4983768165111542, "learning_rate": 0.00031470545857848576, "loss": 1.697, "step": 43678 }, { "epoch": 1.45, "grad_norm": 0.5085195302963257, "learning_rate": 0.0003146950190177105, "loss": 1.7752, "step": 43679 }, { "epoch": 1.45, "grad_norm": 0.504135251045227, "learning_rate": 0.0003146845794390977, "loss": 1.7725, "step": 43680 }, { "epoch": 1.45, "grad_norm": 0.500732421875, "learning_rate": 0.00031467413984266005, "loss": 1.7671, "step": 43681 }, { "epoch": 1.45, "grad_norm": 0.5295631885528564, "learning_rate": 0.00031466370022840994, "loss": 1.7192, "step": 43682 }, { "epoch": 1.45, "grad_norm": 0.4982089102268219, "learning_rate": 0.0003146532605963605, "loss": 1.694, "step": 43683 }, { "epoch": 1.45, "grad_norm": 0.4858297109603882, "learning_rate": 0.000314642820946524, "loss": 1.7619, "step": 43684 }, { "epoch": 1.45, "grad_norm": 0.50984787940979, "learning_rate": 0.0003146323812789133, "loss": 1.8012, "step": 43685 }, { "epoch": 1.45, "grad_norm": 0.5108144283294678, "learning_rate": 0.0003146219415935411, "loss": 1.7639, "step": 43686 }, { "epoch": 1.45, "grad_norm": 0.5210844278335571, "learning_rate": 0.0003146115018904199, "loss": 1.8068, "step": 43687 }, { "epoch": 1.45, "grad_norm": 0.512265145778656, "learning_rate": 0.0003146010621695626, "loss": 1.8147, "step": 43688 }, { "epoch": 1.45, "grad_norm": 0.4994531273841858, "learning_rate": 0.00031459062243098166, "loss": 1.7548, "step": 43689 }, { "epoch": 1.45, "grad_norm": 0.5020965337753296, "learning_rate": 0.00031458018267469, "loss": 1.7612, "step": 43690 }, { "epoch": 1.45, "grad_norm": 0.5033740997314453, "learning_rate": 0.00031456974290070005, "loss": 1.7814, "step": 43691 }, { "epoch": 1.45, "grad_norm": 0.5101912021636963, "learning_rate": 0.0003145593031090246, "loss": 1.6932, "step": 43692 }, { "epoch": 1.45, "grad_norm": 0.5087065696716309, "learning_rate": 0.00031454886329967637, "loss": 1.8214, "step": 43693 }, { "epoch": 1.45, "grad_norm": 0.520087718963623, "learning_rate": 0.0003145384234726678, "loss": 1.7642, "step": 43694 }, { "epoch": 1.45, "grad_norm": 0.5095165967941284, "learning_rate": 0.0003145279836280119, "loss": 1.8382, "step": 43695 }, { "epoch": 1.45, "grad_norm": 0.502861499786377, "learning_rate": 0.0003145175437657211, "loss": 1.7249, "step": 43696 }, { "epoch": 1.45, "grad_norm": 0.4858694076538086, "learning_rate": 0.0003145071038858081, "loss": 1.7973, "step": 43697 }, { "epoch": 1.45, "grad_norm": 0.4887532889842987, "learning_rate": 0.0003144966639882856, "loss": 1.7256, "step": 43698 }, { "epoch": 1.45, "grad_norm": 0.49688416719436646, "learning_rate": 0.0003144862240731663, "loss": 1.735, "step": 43699 }, { "epoch": 1.45, "grad_norm": 0.5186074376106262, "learning_rate": 0.0003144757841404629, "loss": 1.7598, "step": 43700 }, { "epoch": 1.45, "grad_norm": 0.5243175625801086, "learning_rate": 0.0003144653441901881, "loss": 1.8442, "step": 43701 }, { "epoch": 1.45, "grad_norm": 0.5133300423622131, "learning_rate": 0.0003144549042223544, "loss": 1.7542, "step": 43702 }, { "epoch": 1.45, "grad_norm": 0.5210111141204834, "learning_rate": 0.0003144444642369746, "loss": 1.6747, "step": 43703 }, { "epoch": 1.45, "grad_norm": 0.5212526917457581, "learning_rate": 0.0003144340242340613, "loss": 1.776, "step": 43704 }, { "epoch": 1.45, "grad_norm": 0.5001798272132874, "learning_rate": 0.00031442358421362727, "loss": 1.7954, "step": 43705 }, { "epoch": 1.45, "grad_norm": 0.5269714593887329, "learning_rate": 0.0003144131441756852, "loss": 1.7241, "step": 43706 }, { "epoch": 1.45, "grad_norm": 0.5091944336891174, "learning_rate": 0.0003144027041202476, "loss": 1.7732, "step": 43707 }, { "epoch": 1.45, "grad_norm": 0.5753005743026733, "learning_rate": 0.0003143922640473273, "loss": 1.8159, "step": 43708 }, { "epoch": 1.45, "grad_norm": 0.5016919374465942, "learning_rate": 0.0003143818239569369, "loss": 1.6954, "step": 43709 }, { "epoch": 1.45, "grad_norm": 0.5208591818809509, "learning_rate": 0.000314371383849089, "loss": 1.7433, "step": 43710 }, { "epoch": 1.45, "grad_norm": 0.5295460820198059, "learning_rate": 0.0003143609437237965, "loss": 1.7558, "step": 43711 }, { "epoch": 1.45, "grad_norm": 0.49246448278427124, "learning_rate": 0.0003143505035810718, "loss": 1.7368, "step": 43712 }, { "epoch": 1.45, "grad_norm": 0.5063104033470154, "learning_rate": 0.0003143400634209278, "loss": 1.7194, "step": 43713 }, { "epoch": 1.45, "grad_norm": 0.5018035769462585, "learning_rate": 0.0003143296232433769, "loss": 1.7442, "step": 43714 }, { "epoch": 1.45, "grad_norm": 0.49988654255867004, "learning_rate": 0.0003143191830484321, "loss": 1.8174, "step": 43715 }, { "epoch": 1.45, "grad_norm": 0.5167731642723083, "learning_rate": 0.00031430874283610595, "loss": 1.8237, "step": 43716 }, { "epoch": 1.45, "grad_norm": 0.5074811577796936, "learning_rate": 0.00031429830260641103, "loss": 1.796, "step": 43717 }, { "epoch": 1.45, "grad_norm": 0.5025050640106201, "learning_rate": 0.00031428786235936014, "loss": 1.8173, "step": 43718 }, { "epoch": 1.45, "grad_norm": 0.48196232318878174, "learning_rate": 0.00031427742209496576, "loss": 1.7218, "step": 43719 }, { "epoch": 1.45, "grad_norm": 0.5306146740913391, "learning_rate": 0.0003142669818132409, "loss": 1.746, "step": 43720 }, { "epoch": 1.45, "grad_norm": 0.5255858898162842, "learning_rate": 0.00031425654151419786, "loss": 1.7279, "step": 43721 }, { "epoch": 1.45, "grad_norm": 0.5148224234580994, "learning_rate": 0.0003142461011978495, "loss": 1.6849, "step": 43722 }, { "epoch": 1.45, "grad_norm": 0.4956090748310089, "learning_rate": 0.0003142356608642085, "loss": 1.8097, "step": 43723 }, { "epoch": 1.45, "grad_norm": 0.5129992365837097, "learning_rate": 0.0003142252205132875, "loss": 1.72, "step": 43724 }, { "epoch": 1.45, "grad_norm": 0.544506311416626, "learning_rate": 0.00031421478014509924, "loss": 1.669, "step": 43725 }, { "epoch": 1.45, "grad_norm": 0.5228031873703003, "learning_rate": 0.00031420433975965623, "loss": 1.8407, "step": 43726 }, { "epoch": 1.45, "grad_norm": 0.5055245161056519, "learning_rate": 0.00031419389935697127, "loss": 1.7744, "step": 43727 }, { "epoch": 1.45, "grad_norm": 2.083698272705078, "learning_rate": 0.00031418345893705703, "loss": 1.8087, "step": 43728 }, { "epoch": 1.45, "grad_norm": 0.4922744333744049, "learning_rate": 0.00031417301849992615, "loss": 1.7439, "step": 43729 }, { "epoch": 1.45, "grad_norm": 0.530580997467041, "learning_rate": 0.0003141625780455913, "loss": 1.7671, "step": 43730 }, { "epoch": 1.45, "grad_norm": 0.5278373956680298, "learning_rate": 0.00031415213757406526, "loss": 1.7565, "step": 43731 }, { "epoch": 1.45, "grad_norm": 0.4891751706600189, "learning_rate": 0.0003141416970853605, "loss": 1.682, "step": 43732 }, { "epoch": 1.46, "grad_norm": 0.5123592019081116, "learning_rate": 0.0003141312565794898, "loss": 1.8271, "step": 43733 }, { "epoch": 1.46, "grad_norm": 0.5138434171676636, "learning_rate": 0.00031412081605646587, "loss": 1.8013, "step": 43734 }, { "epoch": 1.46, "grad_norm": 0.49386051297187805, "learning_rate": 0.00031411037551630136, "loss": 1.7735, "step": 43735 }, { "epoch": 1.46, "grad_norm": 0.5213155150413513, "learning_rate": 0.00031409993495900895, "loss": 1.8277, "step": 43736 }, { "epoch": 1.46, "grad_norm": 0.5052146911621094, "learning_rate": 0.0003140894943846012, "loss": 1.7636, "step": 43737 }, { "epoch": 1.46, "grad_norm": 0.5098353624343872, "learning_rate": 0.00031407905379309106, "loss": 1.7531, "step": 43738 }, { "epoch": 1.46, "grad_norm": 0.5175068974494934, "learning_rate": 0.00031406861318449084, "loss": 1.8304, "step": 43739 }, { "epoch": 1.46, "grad_norm": 0.5162268877029419, "learning_rate": 0.0003140581725588135, "loss": 1.7968, "step": 43740 }, { "epoch": 1.46, "grad_norm": 0.5217494368553162, "learning_rate": 0.0003140477319160716, "loss": 1.7137, "step": 43741 }, { "epoch": 1.46, "grad_norm": 0.5215263366699219, "learning_rate": 0.00031403729125627775, "loss": 1.751, "step": 43742 }, { "epoch": 1.46, "grad_norm": 0.5060106515884399, "learning_rate": 0.0003140268505794448, "loss": 1.7303, "step": 43743 }, { "epoch": 1.46, "grad_norm": 0.49635547399520874, "learning_rate": 0.0003140164098855852, "loss": 1.7281, "step": 43744 }, { "epoch": 1.46, "grad_norm": 0.5079724788665771, "learning_rate": 0.0003140059691747119, "loss": 1.7451, "step": 43745 }, { "epoch": 1.46, "grad_norm": 0.5444133281707764, "learning_rate": 0.00031399552844683736, "loss": 1.7082, "step": 43746 }, { "epoch": 1.46, "grad_norm": 0.5551370978355408, "learning_rate": 0.00031398508770197423, "loss": 1.8484, "step": 43747 }, { "epoch": 1.46, "grad_norm": 0.5155089497566223, "learning_rate": 0.0003139746469401354, "loss": 1.7737, "step": 43748 }, { "epoch": 1.46, "grad_norm": 0.514750599861145, "learning_rate": 0.00031396420616133325, "loss": 1.758, "step": 43749 }, { "epoch": 1.46, "grad_norm": 0.5040135979652405, "learning_rate": 0.00031395376536558076, "loss": 1.7498, "step": 43750 }, { "epoch": 1.46, "grad_norm": 0.5150518417358398, "learning_rate": 0.00031394332455289027, "loss": 1.7447, "step": 43751 }, { "epoch": 1.46, "grad_norm": 0.5345627665519714, "learning_rate": 0.0003139328837232748, "loss": 1.7565, "step": 43752 }, { "epoch": 1.46, "grad_norm": 0.5185685157775879, "learning_rate": 0.0003139224428767468, "loss": 1.793, "step": 43753 }, { "epoch": 1.46, "grad_norm": 0.5041377544403076, "learning_rate": 0.00031391200201331905, "loss": 1.7368, "step": 43754 }, { "epoch": 1.46, "grad_norm": 0.5297741293907166, "learning_rate": 0.0003139015611330041, "loss": 1.7669, "step": 43755 }, { "epoch": 1.46, "grad_norm": 0.5182121992111206, "learning_rate": 0.00031389112023581475, "loss": 1.7497, "step": 43756 }, { "epoch": 1.46, "grad_norm": 0.5100510120391846, "learning_rate": 0.00031388067932176366, "loss": 1.7135, "step": 43757 }, { "epoch": 1.46, "grad_norm": 0.4903033375740051, "learning_rate": 0.0003138702383908634, "loss": 1.8145, "step": 43758 }, { "epoch": 1.46, "grad_norm": 0.5018899440765381, "learning_rate": 0.00031385979744312677, "loss": 1.794, "step": 43759 }, { "epoch": 1.46, "grad_norm": 0.5047373175621033, "learning_rate": 0.0003138493564785664, "loss": 1.7757, "step": 43760 }, { "epoch": 1.46, "grad_norm": 0.4952683448791504, "learning_rate": 0.0003138389154971949, "loss": 1.7025, "step": 43761 }, { "epoch": 1.46, "grad_norm": 0.5182157158851624, "learning_rate": 0.000313828474499025, "loss": 1.7681, "step": 43762 }, { "epoch": 1.46, "grad_norm": 0.5316466689109802, "learning_rate": 0.00031381803348406946, "loss": 1.6725, "step": 43763 }, { "epoch": 1.46, "grad_norm": 0.4916919469833374, "learning_rate": 0.00031380759245234075, "loss": 1.7161, "step": 43764 }, { "epoch": 1.46, "grad_norm": 0.5024336576461792, "learning_rate": 0.0003137971514038517, "loss": 1.7488, "step": 43765 }, { "epoch": 1.46, "grad_norm": 0.5150386691093445, "learning_rate": 0.00031378671033861495, "loss": 1.7725, "step": 43766 }, { "epoch": 1.46, "grad_norm": 0.5063767433166504, "learning_rate": 0.0003137762692566431, "loss": 1.7388, "step": 43767 }, { "epoch": 1.46, "grad_norm": 0.5096179246902466, "learning_rate": 0.00031376582815794904, "loss": 1.7467, "step": 43768 }, { "epoch": 1.46, "grad_norm": 0.5198571085929871, "learning_rate": 0.00031375538704254513, "loss": 1.7697, "step": 43769 }, { "epoch": 1.46, "grad_norm": 0.530575156211853, "learning_rate": 0.00031374494591044434, "loss": 1.788, "step": 43770 }, { "epoch": 1.46, "grad_norm": 0.5070403814315796, "learning_rate": 0.0003137345047616591, "loss": 1.8121, "step": 43771 }, { "epoch": 1.46, "grad_norm": 0.49959734082221985, "learning_rate": 0.00031372406359620223, "loss": 1.739, "step": 43772 }, { "epoch": 1.46, "grad_norm": 0.5152039527893066, "learning_rate": 0.00031371362241408644, "loss": 1.7764, "step": 43773 }, { "epoch": 1.46, "grad_norm": 0.5028845071792603, "learning_rate": 0.0003137031812153242, "loss": 1.765, "step": 43774 }, { "epoch": 1.46, "grad_norm": 0.517982006072998, "learning_rate": 0.00031369273999992843, "loss": 1.8186, "step": 43775 }, { "epoch": 1.46, "grad_norm": 0.5146627426147461, "learning_rate": 0.00031368229876791153, "loss": 1.7717, "step": 43776 }, { "epoch": 1.46, "grad_norm": 0.5177031755447388, "learning_rate": 0.0003136718575192866, "loss": 1.7548, "step": 43777 }, { "epoch": 1.46, "grad_norm": 0.4975670278072357, "learning_rate": 0.00031366141625406585, "loss": 1.7497, "step": 43778 }, { "epoch": 1.46, "grad_norm": 0.5267646312713623, "learning_rate": 0.00031365097497226217, "loss": 1.7266, "step": 43779 }, { "epoch": 1.46, "grad_norm": 0.5435457229614258, "learning_rate": 0.00031364053367388835, "loss": 1.7663, "step": 43780 }, { "epoch": 1.46, "grad_norm": 0.4911099970340729, "learning_rate": 0.00031363009235895676, "loss": 1.8005, "step": 43781 }, { "epoch": 1.46, "grad_norm": 0.5167700052261353, "learning_rate": 0.0003136196510274804, "loss": 1.7128, "step": 43782 }, { "epoch": 1.46, "grad_norm": 0.5100009441375732, "learning_rate": 0.00031360920967947166, "loss": 1.7929, "step": 43783 }, { "epoch": 1.46, "grad_norm": 0.5211110711097717, "learning_rate": 0.0003135987683149434, "loss": 1.742, "step": 43784 }, { "epoch": 1.46, "grad_norm": 0.5366324782371521, "learning_rate": 0.00031358832693390823, "loss": 1.8475, "step": 43785 }, { "epoch": 1.46, "grad_norm": 0.5007284283638, "learning_rate": 0.0003135778855363788, "loss": 1.7464, "step": 43786 }, { "epoch": 1.46, "grad_norm": 0.5162972807884216, "learning_rate": 0.00031356744412236795, "loss": 1.7319, "step": 43787 }, { "epoch": 1.46, "grad_norm": 0.5068680047988892, "learning_rate": 0.0003135570026918881, "loss": 1.8019, "step": 43788 }, { "epoch": 1.46, "grad_norm": 0.5178558826446533, "learning_rate": 0.0003135465612449521, "loss": 1.7405, "step": 43789 }, { "epoch": 1.46, "grad_norm": 0.4922882616519928, "learning_rate": 0.0003135361197815725, "loss": 1.7523, "step": 43790 }, { "epoch": 1.46, "grad_norm": 0.5135646462440491, "learning_rate": 0.0003135256783017621, "loss": 1.7279, "step": 43791 }, { "epoch": 1.46, "grad_norm": 0.5116698145866394, "learning_rate": 0.0003135152368055335, "loss": 1.7664, "step": 43792 }, { "epoch": 1.46, "grad_norm": 0.5062898397445679, "learning_rate": 0.00031350479529289945, "loss": 1.8019, "step": 43793 }, { "epoch": 1.46, "grad_norm": 0.5075848698616028, "learning_rate": 0.0003134943537638725, "loss": 1.7776, "step": 43794 }, { "epoch": 1.46, "grad_norm": 0.4923990070819855, "learning_rate": 0.0003134839122184654, "loss": 1.7676, "step": 43795 }, { "epoch": 1.46, "grad_norm": 0.506340742111206, "learning_rate": 0.00031347347065669085, "loss": 1.7805, "step": 43796 }, { "epoch": 1.46, "grad_norm": 0.5373557209968567, "learning_rate": 0.0003134630290785615, "loss": 1.8153, "step": 43797 }, { "epoch": 1.46, "grad_norm": 0.5112107396125793, "learning_rate": 0.00031345258748409, "loss": 1.6983, "step": 43798 }, { "epoch": 1.46, "grad_norm": 0.5104847550392151, "learning_rate": 0.00031344214587328896, "loss": 1.7864, "step": 43799 }, { "epoch": 1.46, "grad_norm": 0.5065774917602539, "learning_rate": 0.0003134317042461713, "loss": 1.7751, "step": 43800 }, { "epoch": 1.46, "grad_norm": 0.7004552483558655, "learning_rate": 0.00031342126260274937, "loss": 1.6899, "step": 43801 }, { "epoch": 1.46, "grad_norm": 0.5359147787094116, "learning_rate": 0.0003134108209430361, "loss": 1.7694, "step": 43802 }, { "epoch": 1.46, "grad_norm": 0.5070459842681885, "learning_rate": 0.0003134003792670441, "loss": 1.762, "step": 43803 }, { "epoch": 1.46, "grad_norm": 0.5045656561851501, "learning_rate": 0.0003133899375747859, "loss": 1.6993, "step": 43804 }, { "epoch": 1.46, "grad_norm": 0.5379103422164917, "learning_rate": 0.0003133794958662744, "loss": 1.82, "step": 43805 }, { "epoch": 1.46, "grad_norm": 0.5461310148239136, "learning_rate": 0.000313369054141522, "loss": 1.7257, "step": 43806 }, { "epoch": 1.46, "grad_norm": 0.5208902359008789, "learning_rate": 0.0003133586124005417, "loss": 1.7713, "step": 43807 }, { "epoch": 1.46, "grad_norm": 0.519260823726654, "learning_rate": 0.00031334817064334594, "loss": 1.741, "step": 43808 }, { "epoch": 1.46, "grad_norm": 0.5238685011863708, "learning_rate": 0.0003133377288699475, "loss": 1.7673, "step": 43809 }, { "epoch": 1.46, "grad_norm": 0.5178292393684387, "learning_rate": 0.000313327287080359, "loss": 1.7481, "step": 43810 }, { "epoch": 1.46, "grad_norm": 0.4975834786891937, "learning_rate": 0.0003133168452745931, "loss": 1.7176, "step": 43811 }, { "epoch": 1.46, "grad_norm": 0.5176767706871033, "learning_rate": 0.0003133064034526627, "loss": 1.6787, "step": 43812 }, { "epoch": 1.46, "grad_norm": 0.5157514214515686, "learning_rate": 0.0003132959616145801, "loss": 1.788, "step": 43813 }, { "epoch": 1.46, "grad_norm": 0.5198751091957092, "learning_rate": 0.0003132855197603582, "loss": 1.6482, "step": 43814 }, { "epoch": 1.46, "grad_norm": 0.49126997590065, "learning_rate": 0.00031327507789000967, "loss": 1.7095, "step": 43815 }, { "epoch": 1.46, "grad_norm": 0.5054423809051514, "learning_rate": 0.0003132646360035471, "loss": 1.7566, "step": 43816 }, { "epoch": 1.46, "grad_norm": 0.5363593101501465, "learning_rate": 0.00031325419410098323, "loss": 1.8266, "step": 43817 }, { "epoch": 1.46, "grad_norm": 0.5013914108276367, "learning_rate": 0.00031324375218233074, "loss": 1.7364, "step": 43818 }, { "epoch": 1.46, "grad_norm": 0.5293975472450256, "learning_rate": 0.0003132333102476023, "loss": 1.7242, "step": 43819 }, { "epoch": 1.46, "grad_norm": 0.5257689952850342, "learning_rate": 0.00031322286829681053, "loss": 1.8233, "step": 43820 }, { "epoch": 1.46, "grad_norm": 0.49517014622688293, "learning_rate": 0.0003132124263299682, "loss": 1.7699, "step": 43821 }, { "epoch": 1.46, "grad_norm": 0.5132851600646973, "learning_rate": 0.0003132019843470878, "loss": 1.7761, "step": 43822 }, { "epoch": 1.46, "grad_norm": 0.5359014868736267, "learning_rate": 0.00031319154234818236, "loss": 1.794, "step": 43823 }, { "epoch": 1.46, "grad_norm": 0.5066531300544739, "learning_rate": 0.0003131811003332641, "loss": 1.7256, "step": 43824 }, { "epoch": 1.46, "grad_norm": 0.5113502740859985, "learning_rate": 0.0003131706583023461, "loss": 1.7659, "step": 43825 }, { "epoch": 1.46, "grad_norm": 0.4980027675628662, "learning_rate": 0.00031316021625544074, "loss": 1.759, "step": 43826 }, { "epoch": 1.46, "grad_norm": 0.514441967010498, "learning_rate": 0.0003131497741925609, "loss": 1.8101, "step": 43827 }, { "epoch": 1.46, "grad_norm": 0.5053277611732483, "learning_rate": 0.00031313933211371916, "loss": 1.8654, "step": 43828 }, { "epoch": 1.46, "grad_norm": 0.5200092792510986, "learning_rate": 0.00031312889001892816, "loss": 1.734, "step": 43829 }, { "epoch": 1.46, "grad_norm": 0.5325984358787537, "learning_rate": 0.0003131184479082007, "loss": 1.7438, "step": 43830 }, { "epoch": 1.46, "grad_norm": 0.490024209022522, "learning_rate": 0.0003131080057815492, "loss": 1.8239, "step": 43831 }, { "epoch": 1.46, "grad_norm": 0.5277093648910522, "learning_rate": 0.00031309756363898675, "loss": 1.7923, "step": 43832 }, { "epoch": 1.46, "grad_norm": 0.5149168968200684, "learning_rate": 0.0003130871214805257, "loss": 1.7199, "step": 43833 }, { "epoch": 1.46, "grad_norm": 0.5186553001403809, "learning_rate": 0.00031307667930617877, "loss": 1.7792, "step": 43834 }, { "epoch": 1.46, "grad_norm": 0.5135557055473328, "learning_rate": 0.00031306623711595873, "loss": 1.7762, "step": 43835 }, { "epoch": 1.46, "grad_norm": 0.5024224519729614, "learning_rate": 0.00031305579490987806, "loss": 1.8265, "step": 43836 }, { "epoch": 1.46, "grad_norm": 0.515881359577179, "learning_rate": 0.0003130453526879497, "loss": 1.7323, "step": 43837 }, { "epoch": 1.46, "grad_norm": 0.48036590218544006, "learning_rate": 0.0003130349104501863, "loss": 1.7166, "step": 43838 }, { "epoch": 1.46, "grad_norm": 0.5465320944786072, "learning_rate": 0.00031302446819660027, "loss": 1.7668, "step": 43839 }, { "epoch": 1.46, "grad_norm": 0.5336117148399353, "learning_rate": 0.00031301402592720447, "loss": 1.7675, "step": 43840 }, { "epoch": 1.46, "grad_norm": 0.5140290260314941, "learning_rate": 0.00031300358364201163, "loss": 1.7122, "step": 43841 }, { "epoch": 1.46, "grad_norm": 0.5236154794692993, "learning_rate": 0.0003129931413410344, "loss": 1.8218, "step": 43842 }, { "epoch": 1.46, "grad_norm": 0.5251482725143433, "learning_rate": 0.00031298269902428535, "loss": 1.765, "step": 43843 }, { "epoch": 1.46, "grad_norm": 0.5200173258781433, "learning_rate": 0.00031297225669177717, "loss": 1.7946, "step": 43844 }, { "epoch": 1.46, "grad_norm": 0.5754777789115906, "learning_rate": 0.00031296181434352263, "loss": 1.8209, "step": 43845 }, { "epoch": 1.46, "grad_norm": 0.5162515640258789, "learning_rate": 0.0003129513719795343, "loss": 1.644, "step": 43846 }, { "epoch": 1.46, "grad_norm": 0.5101108551025391, "learning_rate": 0.000312940929599825, "loss": 1.7809, "step": 43847 }, { "epoch": 1.46, "grad_norm": 0.5101550221443176, "learning_rate": 0.0003129304872044073, "loss": 1.7745, "step": 43848 }, { "epoch": 1.46, "grad_norm": 0.5094894170761108, "learning_rate": 0.00031292004479329386, "loss": 1.7604, "step": 43849 }, { "epoch": 1.46, "grad_norm": 0.5019994974136353, "learning_rate": 0.00031290960236649744, "loss": 1.7273, "step": 43850 }, { "epoch": 1.46, "grad_norm": 0.5163320899009705, "learning_rate": 0.0003128991599240306, "loss": 1.7946, "step": 43851 }, { "epoch": 1.46, "grad_norm": 0.5083014369010925, "learning_rate": 0.00031288871746590607, "loss": 1.7894, "step": 43852 }, { "epoch": 1.46, "grad_norm": 0.5154038071632385, "learning_rate": 0.00031287827499213664, "loss": 1.7309, "step": 43853 }, { "epoch": 1.46, "grad_norm": 0.5185154676437378, "learning_rate": 0.00031286783250273476, "loss": 1.8012, "step": 43854 }, { "epoch": 1.46, "grad_norm": 0.5028907060623169, "learning_rate": 0.0003128573899977133, "loss": 1.8203, "step": 43855 }, { "epoch": 1.46, "grad_norm": 0.5051752328872681, "learning_rate": 0.00031284694747708483, "loss": 1.7549, "step": 43856 }, { "epoch": 1.46, "grad_norm": 0.5048777461051941, "learning_rate": 0.0003128365049408621, "loss": 1.7399, "step": 43857 }, { "epoch": 1.46, "grad_norm": 0.5345429182052612, "learning_rate": 0.0003128260623890578, "loss": 1.728, "step": 43858 }, { "epoch": 1.46, "grad_norm": 3.522853374481201, "learning_rate": 0.00031281561982168435, "loss": 1.8035, "step": 43859 }, { "epoch": 1.46, "grad_norm": 0.5312309861183167, "learning_rate": 0.0003128051772387548, "loss": 1.7539, "step": 43860 }, { "epoch": 1.46, "grad_norm": 0.5044257640838623, "learning_rate": 0.0003127947346402815, "loss": 1.8566, "step": 43861 }, { "epoch": 1.46, "grad_norm": 0.5165826678276062, "learning_rate": 0.0003127842920262775, "loss": 1.6822, "step": 43862 }, { "epoch": 1.46, "grad_norm": 0.5104301571846008, "learning_rate": 0.0003127738493967551, "loss": 1.7931, "step": 43863 }, { "epoch": 1.46, "grad_norm": 0.49795153737068176, "learning_rate": 0.00031276340675172716, "loss": 1.7548, "step": 43864 }, { "epoch": 1.46, "grad_norm": 0.502334713935852, "learning_rate": 0.0003127529640912064, "loss": 1.7793, "step": 43865 }, { "epoch": 1.46, "grad_norm": 0.493561714887619, "learning_rate": 0.00031274252141520525, "loss": 1.7152, "step": 43866 }, { "epoch": 1.46, "grad_norm": 0.5029139518737793, "learning_rate": 0.0003127320787237367, "loss": 1.7471, "step": 43867 }, { "epoch": 1.46, "grad_norm": 0.5028762221336365, "learning_rate": 0.0003127216360168132, "loss": 1.7847, "step": 43868 }, { "epoch": 1.46, "grad_norm": 0.49275144934654236, "learning_rate": 0.0003127111932944476, "loss": 1.8186, "step": 43869 }, { "epoch": 1.46, "grad_norm": 0.4923340082168579, "learning_rate": 0.0003127007505566523, "loss": 1.7621, "step": 43870 }, { "epoch": 1.46, "grad_norm": 0.5296101570129395, "learning_rate": 0.0003126903078034403, "loss": 1.7482, "step": 43871 }, { "epoch": 1.46, "grad_norm": 0.5220651030540466, "learning_rate": 0.00031267986503482416, "loss": 1.8112, "step": 43872 }, { "epoch": 1.46, "grad_norm": 0.4994948208332062, "learning_rate": 0.00031266942225081646, "loss": 1.6958, "step": 43873 }, { "epoch": 1.46, "grad_norm": 0.4954681396484375, "learning_rate": 0.00031265897945143006, "loss": 1.7005, "step": 43874 }, { "epoch": 1.46, "grad_norm": 0.49515819549560547, "learning_rate": 0.0003126485366366774, "loss": 1.7261, "step": 43875 }, { "epoch": 1.46, "grad_norm": 0.5118321776390076, "learning_rate": 0.00031263809380657135, "loss": 1.7427, "step": 43876 }, { "epoch": 1.46, "grad_norm": 0.5290290117263794, "learning_rate": 0.0003126276509611244, "loss": 1.7846, "step": 43877 }, { "epoch": 1.46, "grad_norm": 0.4967629909515381, "learning_rate": 0.0003126172081003495, "loss": 1.7383, "step": 43878 }, { "epoch": 1.46, "grad_norm": 0.510846734046936, "learning_rate": 0.00031260676522425906, "loss": 1.8006, "step": 43879 }, { "epoch": 1.46, "grad_norm": 0.5246809720993042, "learning_rate": 0.00031259632233286597, "loss": 1.7129, "step": 43880 }, { "epoch": 1.46, "grad_norm": 0.517762303352356, "learning_rate": 0.00031258587942618274, "loss": 1.7484, "step": 43881 }, { "epoch": 1.46, "grad_norm": 0.5157285332679749, "learning_rate": 0.00031257543650422214, "loss": 1.743, "step": 43882 }, { "epoch": 1.46, "grad_norm": 0.501848578453064, "learning_rate": 0.00031256499356699667, "loss": 1.79, "step": 43883 }, { "epoch": 1.46, "grad_norm": 0.4957845211029053, "learning_rate": 0.00031255455061451925, "loss": 1.8127, "step": 43884 }, { "epoch": 1.46, "grad_norm": 0.5089690089225769, "learning_rate": 0.00031254410764680254, "loss": 1.6976, "step": 43885 }, { "epoch": 1.46, "grad_norm": 0.5067073702812195, "learning_rate": 0.000312533664663859, "loss": 1.7759, "step": 43886 }, { "epoch": 1.46, "grad_norm": 0.5125669240951538, "learning_rate": 0.0003125232216657015, "loss": 1.7707, "step": 43887 }, { "epoch": 1.46, "grad_norm": 0.5137743353843689, "learning_rate": 0.00031251277865234267, "loss": 1.822, "step": 43888 }, { "epoch": 1.46, "grad_norm": 0.509550154209137, "learning_rate": 0.0003125023356237952, "loss": 1.7403, "step": 43889 }, { "epoch": 1.46, "grad_norm": 0.5070277452468872, "learning_rate": 0.0003124918925800717, "loss": 1.856, "step": 43890 }, { "epoch": 1.46, "grad_norm": 0.5066754221916199, "learning_rate": 0.00031248144952118474, "loss": 1.7587, "step": 43891 }, { "epoch": 1.46, "grad_norm": 0.4994890093803406, "learning_rate": 0.0003124710064471474, "loss": 1.7424, "step": 43892 }, { "epoch": 1.46, "grad_norm": 0.5316110253334045, "learning_rate": 0.0003124605633579718, "loss": 1.8185, "step": 43893 }, { "epoch": 1.46, "grad_norm": 0.5179218053817749, "learning_rate": 0.00031245012025367115, "loss": 1.8145, "step": 43894 }, { "epoch": 1.46, "grad_norm": 0.5448544025421143, "learning_rate": 0.0003124396771342578, "loss": 1.7211, "step": 43895 }, { "epoch": 1.46, "grad_norm": 0.508743166923523, "learning_rate": 0.0003124292339997445, "loss": 1.6884, "step": 43896 }, { "epoch": 1.46, "grad_norm": 0.5466702580451965, "learning_rate": 0.000312418790850144, "loss": 1.6981, "step": 43897 }, { "epoch": 1.46, "grad_norm": 0.5141259431838989, "learning_rate": 0.00031240834768546887, "loss": 1.8293, "step": 43898 }, { "epoch": 1.46, "grad_norm": 0.5029074549674988, "learning_rate": 0.0003123979045057319, "loss": 1.7165, "step": 43899 }, { "epoch": 1.46, "grad_norm": 0.5203143358230591, "learning_rate": 0.0003123874613109456, "loss": 1.8434, "step": 43900 }, { "epoch": 1.46, "grad_norm": 0.5132636427879333, "learning_rate": 0.0003123770181011228, "loss": 1.7999, "step": 43901 }, { "epoch": 1.46, "grad_norm": 0.5512546896934509, "learning_rate": 0.0003123665748762761, "loss": 1.7892, "step": 43902 }, { "epoch": 1.46, "grad_norm": 0.5149691700935364, "learning_rate": 0.0003123561316364182, "loss": 1.8398, "step": 43903 }, { "epoch": 1.46, "grad_norm": 0.49851328134536743, "learning_rate": 0.0003123456883815619, "loss": 1.7667, "step": 43904 }, { "epoch": 1.46, "grad_norm": 0.5052052140235901, "learning_rate": 0.0003123352451117196, "loss": 1.738, "step": 43905 }, { "epoch": 1.46, "grad_norm": 0.4962867796421051, "learning_rate": 0.0003123248018269042, "loss": 1.7635, "step": 43906 }, { "epoch": 1.46, "grad_norm": 0.5061998963356018, "learning_rate": 0.0003123143585271283, "loss": 1.8108, "step": 43907 }, { "epoch": 1.46, "grad_norm": 0.5103350281715393, "learning_rate": 0.00031230391521240455, "loss": 1.7698, "step": 43908 }, { "epoch": 1.46, "grad_norm": 0.534292459487915, "learning_rate": 0.00031229347188274566, "loss": 1.8276, "step": 43909 }, { "epoch": 1.46, "grad_norm": 0.5136229991912842, "learning_rate": 0.0003122830285381644, "loss": 1.7711, "step": 43910 }, { "epoch": 1.46, "grad_norm": 0.5319724678993225, "learning_rate": 0.0003122725851786733, "loss": 1.682, "step": 43911 }, { "epoch": 1.46, "grad_norm": 0.5332528948783875, "learning_rate": 0.000312262141804285, "loss": 1.8209, "step": 43912 }, { "epoch": 1.46, "grad_norm": 0.5003278851509094, "learning_rate": 0.00031225169841501234, "loss": 1.7801, "step": 43913 }, { "epoch": 1.46, "grad_norm": 0.49425894021987915, "learning_rate": 0.00031224125501086794, "loss": 1.7241, "step": 43914 }, { "epoch": 1.46, "grad_norm": 0.5248584747314453, "learning_rate": 0.0003122308115918645, "loss": 1.7527, "step": 43915 }, { "epoch": 1.46, "grad_norm": 0.5337262749671936, "learning_rate": 0.0003122203681580145, "loss": 1.8462, "step": 43916 }, { "epoch": 1.46, "grad_norm": 0.5220754146575928, "learning_rate": 0.00031220992470933095, "loss": 1.8637, "step": 43917 }, { "epoch": 1.46, "grad_norm": 0.5126361846923828, "learning_rate": 0.0003121994812458262, "loss": 1.7421, "step": 43918 }, { "epoch": 1.46, "grad_norm": 0.49864810705184937, "learning_rate": 0.0003121890377675131, "loss": 1.8395, "step": 43919 }, { "epoch": 1.46, "grad_norm": 0.5188544392585754, "learning_rate": 0.00031217859427440446, "loss": 1.7577, "step": 43920 }, { "epoch": 1.46, "grad_norm": 0.5056552886962891, "learning_rate": 0.00031216815076651265, "loss": 1.7125, "step": 43921 }, { "epoch": 1.46, "grad_norm": 0.49816134572029114, "learning_rate": 0.0003121577072438506, "loss": 1.7544, "step": 43922 }, { "epoch": 1.46, "grad_norm": 0.5278492569923401, "learning_rate": 0.0003121472637064307, "loss": 1.754, "step": 43923 }, { "epoch": 1.46, "grad_norm": 0.4924287796020508, "learning_rate": 0.00031213682015426603, "loss": 1.7734, "step": 43924 }, { "epoch": 1.46, "grad_norm": 0.49834421277046204, "learning_rate": 0.0003121263765873689, "loss": 1.7208, "step": 43925 }, { "epoch": 1.46, "grad_norm": 0.5130009651184082, "learning_rate": 0.0003121159330057522, "loss": 1.7227, "step": 43926 }, { "epoch": 1.46, "grad_norm": 0.5232387781143188, "learning_rate": 0.00031210548940942853, "loss": 1.7952, "step": 43927 }, { "epoch": 1.46, "grad_norm": 0.5133755207061768, "learning_rate": 0.0003120950457984106, "loss": 1.7327, "step": 43928 }, { "epoch": 1.46, "grad_norm": 0.5382577180862427, "learning_rate": 0.00031208460217271107, "loss": 1.8203, "step": 43929 }, { "epoch": 1.46, "grad_norm": 0.5073358416557312, "learning_rate": 0.0003120741585323426, "loss": 1.6705, "step": 43930 }, { "epoch": 1.46, "grad_norm": 0.5308197140693665, "learning_rate": 0.0003120637148773179, "loss": 1.7461, "step": 43931 }, { "epoch": 1.46, "grad_norm": 0.5076765418052673, "learning_rate": 0.00031205327120764956, "loss": 1.7736, "step": 43932 }, { "epoch": 1.46, "grad_norm": 0.5029612183570862, "learning_rate": 0.00031204282752335036, "loss": 1.723, "step": 43933 }, { "epoch": 1.46, "grad_norm": 0.5318059921264648, "learning_rate": 0.0003120323838244329, "loss": 1.7868, "step": 43934 }, { "epoch": 1.46, "grad_norm": 0.5078502893447876, "learning_rate": 0.00031202194011091, "loss": 1.6934, "step": 43935 }, { "epoch": 1.46, "grad_norm": 0.5299744009971619, "learning_rate": 0.0003120114963827942, "loss": 1.7739, "step": 43936 }, { "epoch": 1.46, "grad_norm": 0.5394355654716492, "learning_rate": 0.0003120010526400982, "loss": 1.7008, "step": 43937 }, { "epoch": 1.46, "grad_norm": 0.5226374268531799, "learning_rate": 0.00031199060888283467, "loss": 1.8079, "step": 43938 }, { "epoch": 1.46, "grad_norm": 0.4992832541465759, "learning_rate": 0.00031198016511101634, "loss": 1.7098, "step": 43939 }, { "epoch": 1.46, "grad_norm": 0.5295499563217163, "learning_rate": 0.00031196972132465587, "loss": 1.7381, "step": 43940 }, { "epoch": 1.46, "grad_norm": 0.5064358711242676, "learning_rate": 0.0003119592775237658, "loss": 1.7797, "step": 43941 }, { "epoch": 1.46, "grad_norm": 0.5356453061103821, "learning_rate": 0.00031194883370835907, "loss": 1.7331, "step": 43942 }, { "epoch": 1.46, "grad_norm": 0.49916914105415344, "learning_rate": 0.00031193838987844815, "loss": 1.8087, "step": 43943 }, { "epoch": 1.46, "grad_norm": 0.534583330154419, "learning_rate": 0.0003119279460340458, "loss": 1.7881, "step": 43944 }, { "epoch": 1.46, "grad_norm": 0.5554364323616028, "learning_rate": 0.0003119175021751648, "loss": 1.7118, "step": 43945 }, { "epoch": 1.46, "grad_norm": 0.5107538104057312, "learning_rate": 0.00031190705830181747, "loss": 1.7835, "step": 43946 }, { "epoch": 1.46, "grad_norm": 0.5156849026679993, "learning_rate": 0.0003118966144140169, "loss": 1.7852, "step": 43947 }, { "epoch": 1.46, "grad_norm": 0.49847522377967834, "learning_rate": 0.0003118861705117755, "loss": 1.7832, "step": 43948 }, { "epoch": 1.46, "grad_norm": 0.511981725692749, "learning_rate": 0.0003118757265951061, "loss": 1.7355, "step": 43949 }, { "epoch": 1.46, "grad_norm": 0.5040115118026733, "learning_rate": 0.0003118652826640213, "loss": 1.8135, "step": 43950 }, { "epoch": 1.46, "grad_norm": 0.5048999190330505, "learning_rate": 0.0003118548387185338, "loss": 1.7204, "step": 43951 }, { "epoch": 1.46, "grad_norm": 0.5174704194068909, "learning_rate": 0.00031184439475865624, "loss": 1.7693, "step": 43952 }, { "epoch": 1.46, "grad_norm": 0.49950534105300903, "learning_rate": 0.0003118339507844013, "loss": 1.773, "step": 43953 }, { "epoch": 1.46, "grad_norm": 0.5135267972946167, "learning_rate": 0.00031182350679578185, "loss": 1.7152, "step": 43954 }, { "epoch": 1.46, "grad_norm": 0.5139809250831604, "learning_rate": 0.00031181306279281026, "loss": 1.7636, "step": 43955 }, { "epoch": 1.46, "grad_norm": 0.5002990365028381, "learning_rate": 0.0003118026187754994, "loss": 1.816, "step": 43956 }, { "epoch": 1.46, "grad_norm": 0.49307388067245483, "learning_rate": 0.0003117921747438619, "loss": 1.7223, "step": 43957 }, { "epoch": 1.46, "grad_norm": 0.4905332028865814, "learning_rate": 0.0003117817306979104, "loss": 1.8097, "step": 43958 }, { "epoch": 1.46, "grad_norm": 0.5143471956253052, "learning_rate": 0.0003117712866376577, "loss": 1.7903, "step": 43959 }, { "epoch": 1.46, "grad_norm": 0.527286171913147, "learning_rate": 0.0003117608425631163, "loss": 1.7416, "step": 43960 }, { "epoch": 1.46, "grad_norm": 0.5196919441223145, "learning_rate": 0.000311750398474299, "loss": 1.7742, "step": 43961 }, { "epoch": 1.46, "grad_norm": 0.49785977602005005, "learning_rate": 0.0003117399543712184, "loss": 1.7145, "step": 43962 }, { "epoch": 1.46, "grad_norm": 0.49325358867645264, "learning_rate": 0.0003117295102538873, "loss": 1.713, "step": 43963 }, { "epoch": 1.46, "grad_norm": 0.5112153887748718, "learning_rate": 0.00031171906612231825, "loss": 1.8664, "step": 43964 }, { "epoch": 1.46, "grad_norm": 0.5110694169998169, "learning_rate": 0.0003117086219765241, "loss": 1.7555, "step": 43965 }, { "epoch": 1.46, "grad_norm": 0.5098185539245605, "learning_rate": 0.0003116981778165173, "loss": 1.7769, "step": 43966 }, { "epoch": 1.46, "grad_norm": 0.5146007537841797, "learning_rate": 0.00031168773364231064, "loss": 1.7403, "step": 43967 }, { "epoch": 1.46, "grad_norm": 0.5401034355163574, "learning_rate": 0.0003116772894539168, "loss": 1.8, "step": 43968 }, { "epoch": 1.46, "grad_norm": 0.5155656933784485, "learning_rate": 0.00031166684525134845, "loss": 1.7579, "step": 43969 }, { "epoch": 1.46, "grad_norm": 0.5096114873886108, "learning_rate": 0.00031165640103461833, "loss": 1.6429, "step": 43970 }, { "epoch": 1.46, "grad_norm": 0.5027922987937927, "learning_rate": 0.0003116459568037389, "loss": 1.746, "step": 43971 }, { "epoch": 1.46, "grad_norm": 0.5145450830459595, "learning_rate": 0.0003116355125587232, "loss": 1.7577, "step": 43972 }, { "epoch": 1.46, "grad_norm": 0.5003517270088196, "learning_rate": 0.0003116250682995836, "loss": 1.6826, "step": 43973 }, { "epoch": 1.46, "grad_norm": 0.5191901326179504, "learning_rate": 0.0003116146240263328, "loss": 1.8434, "step": 43974 }, { "epoch": 1.46, "grad_norm": 0.5046769976615906, "learning_rate": 0.0003116041797389837, "loss": 1.728, "step": 43975 }, { "epoch": 1.46, "grad_norm": 0.5069257616996765, "learning_rate": 0.00031159373543754873, "loss": 1.7584, "step": 43976 }, { "epoch": 1.46, "grad_norm": 0.5137436389923096, "learning_rate": 0.0003115832911220408, "loss": 1.7554, "step": 43977 }, { "epoch": 1.46, "grad_norm": 0.5011875629425049, "learning_rate": 0.0003115728467924723, "loss": 1.7469, "step": 43978 }, { "epoch": 1.46, "grad_norm": 0.5091444849967957, "learning_rate": 0.0003115624024488562, "loss": 1.8249, "step": 43979 }, { "epoch": 1.46, "grad_norm": 0.5255405306816101, "learning_rate": 0.0003115519580912049, "loss": 1.8303, "step": 43980 }, { "epoch": 1.46, "grad_norm": 0.5158544182777405, "learning_rate": 0.00031154151371953136, "loss": 1.7535, "step": 43981 }, { "epoch": 1.46, "grad_norm": 0.49799463152885437, "learning_rate": 0.00031153106933384805, "loss": 1.7242, "step": 43982 }, { "epoch": 1.46, "grad_norm": 0.5221657752990723, "learning_rate": 0.0003115206249341678, "loss": 1.8313, "step": 43983 }, { "epoch": 1.46, "grad_norm": 0.511999249458313, "learning_rate": 0.0003115101805205032, "loss": 1.772, "step": 43984 }, { "epoch": 1.46, "grad_norm": 0.5280296206474304, "learning_rate": 0.0003114997360928669, "loss": 1.8356, "step": 43985 }, { "epoch": 1.46, "grad_norm": 0.5199822783470154, "learning_rate": 0.00031148929165127157, "loss": 1.8515, "step": 43986 }, { "epoch": 1.46, "grad_norm": 0.5772210955619812, "learning_rate": 0.00031147884719573, "loss": 1.8697, "step": 43987 }, { "epoch": 1.46, "grad_norm": 0.5167115330696106, "learning_rate": 0.0003114684027262547, "loss": 1.7611, "step": 43988 }, { "epoch": 1.46, "grad_norm": 0.5026583075523376, "learning_rate": 0.00031145795824285855, "loss": 1.7181, "step": 43989 }, { "epoch": 1.46, "grad_norm": 0.509819507598877, "learning_rate": 0.0003114475137455542, "loss": 1.8037, "step": 43990 }, { "epoch": 1.46, "grad_norm": 0.5177050232887268, "learning_rate": 0.00031143706923435406, "loss": 1.771, "step": 43991 }, { "epoch": 1.46, "grad_norm": 0.5288333296775818, "learning_rate": 0.00031142662470927114, "loss": 1.7065, "step": 43992 }, { "epoch": 1.46, "grad_norm": 0.5124651193618774, "learning_rate": 0.00031141618017031797, "loss": 1.7571, "step": 43993 }, { "epoch": 1.46, "grad_norm": 0.5403447151184082, "learning_rate": 0.0003114057356175072, "loss": 1.7822, "step": 43994 }, { "epoch": 1.46, "grad_norm": 0.5210123658180237, "learning_rate": 0.0003113952910508515, "loss": 1.752, "step": 43995 }, { "epoch": 1.46, "grad_norm": 0.4903636872768402, "learning_rate": 0.0003113848464703637, "loss": 1.797, "step": 43996 }, { "epoch": 1.46, "grad_norm": 0.5222113728523254, "learning_rate": 0.0003113744018760563, "loss": 1.7977, "step": 43997 }, { "epoch": 1.46, "grad_norm": 0.5115143656730652, "learning_rate": 0.0003113639572679421, "loss": 1.7743, "step": 43998 }, { "epoch": 1.46, "grad_norm": 0.5070280432701111, "learning_rate": 0.0003113535126460337, "loss": 1.7951, "step": 43999 }, { "epoch": 1.46, "grad_norm": 0.5246644616127014, "learning_rate": 0.00031134306801034383, "loss": 1.8016, "step": 44000 }, { "epoch": 1.46, "grad_norm": 0.5373592376708984, "learning_rate": 0.0003113326233608851, "loss": 1.7494, "step": 44001 }, { "epoch": 1.46, "grad_norm": 0.5272995829582214, "learning_rate": 0.0003113221786976703, "loss": 1.8542, "step": 44002 }, { "epoch": 1.46, "grad_norm": 0.49248006939888, "learning_rate": 0.00031131173402071204, "loss": 1.7718, "step": 44003 }, { "epoch": 1.46, "grad_norm": 0.5142195224761963, "learning_rate": 0.00031130128933002293, "loss": 1.7044, "step": 44004 }, { "epoch": 1.46, "grad_norm": 0.5154759287834167, "learning_rate": 0.0003112908446256158, "loss": 1.7377, "step": 44005 }, { "epoch": 1.46, "grad_norm": 0.514626681804657, "learning_rate": 0.00031128039990750315, "loss": 1.7574, "step": 44006 }, { "epoch": 1.46, "grad_norm": 0.5022726655006409, "learning_rate": 0.00031126995517569795, "loss": 1.7832, "step": 44007 }, { "epoch": 1.46, "grad_norm": 0.5052633881568909, "learning_rate": 0.0003112595104302125, "loss": 1.758, "step": 44008 }, { "epoch": 1.46, "grad_norm": 0.5129169821739197, "learning_rate": 0.0003112490656710597, "loss": 1.7496, "step": 44009 }, { "epoch": 1.46, "grad_norm": 0.5105425715446472, "learning_rate": 0.0003112386208982522, "loss": 1.759, "step": 44010 }, { "epoch": 1.46, "grad_norm": 0.4920351207256317, "learning_rate": 0.00031122817611180275, "loss": 1.7151, "step": 44011 }, { "epoch": 1.46, "grad_norm": 0.5137056112289429, "learning_rate": 0.0003112177313117238, "loss": 1.8795, "step": 44012 }, { "epoch": 1.46, "grad_norm": 0.5097552537918091, "learning_rate": 0.0003112072864980283, "loss": 1.8342, "step": 44013 }, { "epoch": 1.46, "grad_norm": 0.49890196323394775, "learning_rate": 0.00031119684167072874, "loss": 1.8078, "step": 44014 }, { "epoch": 1.46, "grad_norm": 0.5100376605987549, "learning_rate": 0.00031118639682983784, "loss": 1.7669, "step": 44015 }, { "epoch": 1.46, "grad_norm": 0.49600961804389954, "learning_rate": 0.00031117595197536845, "loss": 1.67, "step": 44016 }, { "epoch": 1.46, "grad_norm": 0.5109220147132874, "learning_rate": 0.000311165507107333, "loss": 1.7206, "step": 44017 }, { "epoch": 1.46, "grad_norm": 0.5899343490600586, "learning_rate": 0.0003111550622257442, "loss": 1.7278, "step": 44018 }, { "epoch": 1.46, "grad_norm": 0.49504056572914124, "learning_rate": 0.00031114461733061486, "loss": 1.7419, "step": 44019 }, { "epoch": 1.46, "grad_norm": 0.5020005106925964, "learning_rate": 0.0003111341724219576, "loss": 1.7782, "step": 44020 }, { "epoch": 1.46, "grad_norm": 0.5144500136375427, "learning_rate": 0.0003111237274997852, "loss": 1.7433, "step": 44021 }, { "epoch": 1.46, "grad_norm": 0.5205392837524414, "learning_rate": 0.0003111132825641101, "loss": 1.7954, "step": 44022 }, { "epoch": 1.46, "grad_norm": 0.50230872631073, "learning_rate": 0.00031110283761494507, "loss": 1.7545, "step": 44023 }, { "epoch": 1.46, "grad_norm": 0.5104361176490784, "learning_rate": 0.000311092392652303, "loss": 1.7549, "step": 44024 }, { "epoch": 1.46, "grad_norm": 0.5080423355102539, "learning_rate": 0.00031108194767619625, "loss": 1.7949, "step": 44025 }, { "epoch": 1.46, "grad_norm": 0.5029110312461853, "learning_rate": 0.00031107150268663765, "loss": 1.8039, "step": 44026 }, { "epoch": 1.46, "grad_norm": 0.5304779410362244, "learning_rate": 0.00031106105768364005, "loss": 1.7878, "step": 44027 }, { "epoch": 1.46, "grad_norm": 0.5305695533752441, "learning_rate": 0.0003110506126672158, "loss": 1.7487, "step": 44028 }, { "epoch": 1.46, "grad_norm": 0.49700698256492615, "learning_rate": 0.0003110401676373778, "loss": 1.6935, "step": 44029 }, { "epoch": 1.46, "grad_norm": 0.5181321501731873, "learning_rate": 0.0003110297225941386, "loss": 1.7741, "step": 44030 }, { "epoch": 1.46, "grad_norm": 0.5418960452079773, "learning_rate": 0.000311019277537511, "loss": 1.7069, "step": 44031 }, { "epoch": 1.46, "grad_norm": 0.5161582231521606, "learning_rate": 0.0003110088324675076, "loss": 1.8109, "step": 44032 }, { "epoch": 1.46, "grad_norm": 0.5217790007591248, "learning_rate": 0.00031099838738414103, "loss": 1.7611, "step": 44033 }, { "epoch": 1.47, "grad_norm": 0.5062023997306824, "learning_rate": 0.0003109879422874242, "loss": 1.8126, "step": 44034 }, { "epoch": 1.47, "grad_norm": 0.505362331867218, "learning_rate": 0.00031097749717736947, "loss": 1.7108, "step": 44035 }, { "epoch": 1.47, "grad_norm": 0.5194787979125977, "learning_rate": 0.00031096705205398975, "loss": 1.721, "step": 44036 }, { "epoch": 1.47, "grad_norm": 0.5293405652046204, "learning_rate": 0.0003109566069172977, "loss": 1.7861, "step": 44037 }, { "epoch": 1.47, "grad_norm": 0.5183963179588318, "learning_rate": 0.0003109461617673058, "loss": 1.8091, "step": 44038 }, { "epoch": 1.47, "grad_norm": 0.5251111388206482, "learning_rate": 0.00031093571660402696, "loss": 1.7957, "step": 44039 }, { "epoch": 1.47, "grad_norm": 0.5163915753364563, "learning_rate": 0.0003109252714274737, "loss": 1.7759, "step": 44040 }, { "epoch": 1.47, "grad_norm": 0.5222089290618896, "learning_rate": 0.0003109148262376589, "loss": 1.7337, "step": 44041 }, { "epoch": 1.47, "grad_norm": 0.5088884234428406, "learning_rate": 0.0003109043810345951, "loss": 1.8074, "step": 44042 }, { "epoch": 1.47, "grad_norm": 0.5018703937530518, "learning_rate": 0.00031089393581829484, "loss": 1.7262, "step": 44043 }, { "epoch": 1.47, "grad_norm": 0.5594903826713562, "learning_rate": 0.0003108834905887711, "loss": 1.8274, "step": 44044 }, { "epoch": 1.47, "grad_norm": 0.5034067630767822, "learning_rate": 0.00031087304534603626, "loss": 1.7488, "step": 44045 }, { "epoch": 1.47, "grad_norm": 0.5114622712135315, "learning_rate": 0.0003108626000901033, "loss": 1.7731, "step": 44046 }, { "epoch": 1.47, "grad_norm": 0.5366300344467163, "learning_rate": 0.0003108521548209846, "loss": 1.8389, "step": 44047 }, { "epoch": 1.47, "grad_norm": 0.5464168190956116, "learning_rate": 0.0003108417095386931, "loss": 1.8335, "step": 44048 }, { "epoch": 1.47, "grad_norm": 0.5100638270378113, "learning_rate": 0.0003108312642432413, "loss": 1.7829, "step": 44049 }, { "epoch": 1.47, "grad_norm": 0.5346296429634094, "learning_rate": 0.00031082081893464195, "loss": 1.7011, "step": 44050 }, { "epoch": 1.47, "grad_norm": 0.518355667591095, "learning_rate": 0.0003108103736129077, "loss": 1.8006, "step": 44051 }, { "epoch": 1.47, "grad_norm": 0.5243546366691589, "learning_rate": 0.0003107999282780513, "loss": 1.7013, "step": 44052 }, { "epoch": 1.47, "grad_norm": 0.5091603398323059, "learning_rate": 0.00031078948293008536, "loss": 1.7474, "step": 44053 }, { "epoch": 1.47, "grad_norm": 0.5090730786323547, "learning_rate": 0.0003107790375690226, "loss": 1.7093, "step": 44054 }, { "epoch": 1.47, "grad_norm": 0.5132849812507629, "learning_rate": 0.00031076859219487566, "loss": 1.782, "step": 44055 }, { "epoch": 1.47, "grad_norm": 0.5132328867912292, "learning_rate": 0.0003107581468076572, "loss": 1.778, "step": 44056 }, { "epoch": 1.47, "grad_norm": 0.5289146900177002, "learning_rate": 0.00031074770140738004, "loss": 1.6976, "step": 44057 }, { "epoch": 1.47, "grad_norm": 0.520865797996521, "learning_rate": 0.0003107372559940566, "loss": 1.8367, "step": 44058 }, { "epoch": 1.47, "grad_norm": 0.49463915824890137, "learning_rate": 0.0003107268105676998, "loss": 1.809, "step": 44059 }, { "epoch": 1.47, "grad_norm": 0.4981083273887634, "learning_rate": 0.0003107163651283222, "loss": 1.7646, "step": 44060 }, { "epoch": 1.47, "grad_norm": 0.5006828308105469, "learning_rate": 0.0003107059196759365, "loss": 1.798, "step": 44061 }, { "epoch": 1.47, "grad_norm": 0.5225566625595093, "learning_rate": 0.00031069547421055553, "loss": 1.7659, "step": 44062 }, { "epoch": 1.47, "grad_norm": 0.5323894619941711, "learning_rate": 0.00031068502873219165, "loss": 1.7588, "step": 44063 }, { "epoch": 1.47, "grad_norm": 0.5142937302589417, "learning_rate": 0.00031067458324085783, "loss": 1.7639, "step": 44064 }, { "epoch": 1.47, "grad_norm": 0.4903748035430908, "learning_rate": 0.0003106641377365665, "loss": 1.7181, "step": 44065 }, { "epoch": 1.47, "grad_norm": 0.5312355160713196, "learning_rate": 0.00031065369221933067, "loss": 1.8202, "step": 44066 }, { "epoch": 1.47, "grad_norm": 0.5285032987594604, "learning_rate": 0.00031064324668916274, "loss": 1.7236, "step": 44067 }, { "epoch": 1.47, "grad_norm": 0.5012767910957336, "learning_rate": 0.00031063280114607545, "loss": 1.7779, "step": 44068 }, { "epoch": 1.47, "grad_norm": 0.5070899128913879, "learning_rate": 0.0003106223555900815, "loss": 1.6519, "step": 44069 }, { "epoch": 1.47, "grad_norm": 0.5023264288902283, "learning_rate": 0.0003106119100211936, "loss": 1.6635, "step": 44070 }, { "epoch": 1.47, "grad_norm": 0.5231105089187622, "learning_rate": 0.00031060146443942445, "loss": 1.8499, "step": 44071 }, { "epoch": 1.47, "grad_norm": 0.5167067050933838, "learning_rate": 0.0003105910188447866, "loss": 1.7117, "step": 44072 }, { "epoch": 1.47, "grad_norm": 0.5315960645675659, "learning_rate": 0.0003105805732372929, "loss": 1.7372, "step": 44073 }, { "epoch": 1.47, "grad_norm": 0.5216183662414551, "learning_rate": 0.0003105701276169559, "loss": 1.8212, "step": 44074 }, { "epoch": 1.47, "grad_norm": 0.5352122187614441, "learning_rate": 0.0003105596819837883, "loss": 1.8033, "step": 44075 }, { "epoch": 1.47, "grad_norm": 0.5358418226242065, "learning_rate": 0.00031054923633780287, "loss": 1.8215, "step": 44076 }, { "epoch": 1.47, "grad_norm": 0.4991243779659271, "learning_rate": 0.00031053879067901213, "loss": 1.7328, "step": 44077 }, { "epoch": 1.47, "grad_norm": 0.5102729797363281, "learning_rate": 0.0003105283450074289, "loss": 1.7287, "step": 44078 }, { "epoch": 1.47, "grad_norm": 0.5293849110603333, "learning_rate": 0.0003105178993230658, "loss": 1.7865, "step": 44079 }, { "epoch": 1.47, "grad_norm": 0.5124951601028442, "learning_rate": 0.00031050745362593555, "loss": 1.7446, "step": 44080 }, { "epoch": 1.47, "grad_norm": 0.5009860396385193, "learning_rate": 0.00031049700791605075, "loss": 1.7695, "step": 44081 }, { "epoch": 1.47, "grad_norm": 0.5217970013618469, "learning_rate": 0.0003104865621934242, "loss": 1.7651, "step": 44082 }, { "epoch": 1.47, "grad_norm": 0.5179140567779541, "learning_rate": 0.00031047611645806846, "loss": 1.7859, "step": 44083 }, { "epoch": 1.47, "grad_norm": 0.49731528759002686, "learning_rate": 0.0003104656707099963, "loss": 1.6997, "step": 44084 }, { "epoch": 1.47, "grad_norm": 0.5145628452301025, "learning_rate": 0.00031045522494922023, "loss": 1.799, "step": 44085 }, { "epoch": 1.47, "grad_norm": 0.48944032192230225, "learning_rate": 0.00031044477917575316, "loss": 1.8398, "step": 44086 }, { "epoch": 1.47, "grad_norm": 0.520373523235321, "learning_rate": 0.0003104343333896077, "loss": 1.7015, "step": 44087 }, { "epoch": 1.47, "grad_norm": 0.5106234550476074, "learning_rate": 0.00031042388759079637, "loss": 1.7799, "step": 44088 }, { "epoch": 1.47, "grad_norm": 0.5325894951820374, "learning_rate": 0.00031041344177933214, "loss": 1.7945, "step": 44089 }, { "epoch": 1.47, "grad_norm": 0.5166733264923096, "learning_rate": 0.0003104029959552274, "loss": 1.7309, "step": 44090 }, { "epoch": 1.47, "grad_norm": 0.5075273513793945, "learning_rate": 0.000310392550118495, "loss": 1.8331, "step": 44091 }, { "epoch": 1.47, "grad_norm": 0.5013303160667419, "learning_rate": 0.0003103821042691476, "loss": 1.7228, "step": 44092 }, { "epoch": 1.47, "grad_norm": 0.5239537954330444, "learning_rate": 0.0003103716584071978, "loss": 1.6821, "step": 44093 }, { "epoch": 1.47, "grad_norm": 0.5295150279998779, "learning_rate": 0.00031036121253265846, "loss": 1.7899, "step": 44094 }, { "epoch": 1.47, "grad_norm": 0.5059576630592346, "learning_rate": 0.00031035076664554195, "loss": 1.7318, "step": 44095 }, { "epoch": 1.47, "grad_norm": 0.5278610587120056, "learning_rate": 0.0003103403207458612, "loss": 1.7511, "step": 44096 }, { "epoch": 1.47, "grad_norm": 0.5169553756713867, "learning_rate": 0.0003103298748336289, "loss": 1.835, "step": 44097 }, { "epoch": 1.47, "grad_norm": 0.5099169611930847, "learning_rate": 0.0003103194289088576, "loss": 1.7495, "step": 44098 }, { "epoch": 1.47, "grad_norm": 0.5223351716995239, "learning_rate": 0.00031030898297156, "loss": 1.7404, "step": 44099 }, { "epoch": 1.47, "grad_norm": 0.5261961817741394, "learning_rate": 0.0003102985370217488, "loss": 1.7281, "step": 44100 }, { "epoch": 1.47, "grad_norm": 0.5251824259757996, "learning_rate": 0.00031028809105943683, "loss": 1.7692, "step": 44101 }, { "epoch": 1.47, "grad_norm": 0.5029637813568115, "learning_rate": 0.0003102776450846365, "loss": 1.8466, "step": 44102 }, { "epoch": 1.47, "grad_norm": 0.5055047869682312, "learning_rate": 0.0003102671990973607, "loss": 1.7629, "step": 44103 }, { "epoch": 1.47, "grad_norm": 0.5082623958587646, "learning_rate": 0.000310256753097622, "loss": 1.7142, "step": 44104 }, { "epoch": 1.47, "grad_norm": 0.5107792615890503, "learning_rate": 0.0003102463070854331, "loss": 1.7278, "step": 44105 }, { "epoch": 1.47, "grad_norm": 0.5231522917747498, "learning_rate": 0.0003102358610608067, "loss": 1.8145, "step": 44106 }, { "epoch": 1.47, "grad_norm": 0.5170294642448425, "learning_rate": 0.00031022541502375556, "loss": 1.8262, "step": 44107 }, { "epoch": 1.47, "grad_norm": 0.5273445248603821, "learning_rate": 0.00031021496897429214, "loss": 1.7844, "step": 44108 }, { "epoch": 1.47, "grad_norm": 0.5138248801231384, "learning_rate": 0.0003102045229124293, "loss": 1.7911, "step": 44109 }, { "epoch": 1.47, "grad_norm": 0.5085001587867737, "learning_rate": 0.0003101940768381797, "loss": 1.7024, "step": 44110 }, { "epoch": 1.47, "grad_norm": 0.4983714520931244, "learning_rate": 0.000310183630751556, "loss": 1.7154, "step": 44111 }, { "epoch": 1.47, "grad_norm": 0.5220137238502502, "learning_rate": 0.00031017318465257086, "loss": 1.7728, "step": 44112 }, { "epoch": 1.47, "grad_norm": 0.5204929113388062, "learning_rate": 0.0003101627385412369, "loss": 1.7128, "step": 44113 }, { "epoch": 1.47, "grad_norm": 0.5465749502182007, "learning_rate": 0.00031015229241756695, "loss": 1.8426, "step": 44114 }, { "epoch": 1.47, "grad_norm": 0.5407125949859619, "learning_rate": 0.0003101418462815736, "loss": 1.7945, "step": 44115 }, { "epoch": 1.47, "grad_norm": 0.5165049433708191, "learning_rate": 0.0003101314001332695, "loss": 1.7369, "step": 44116 }, { "epoch": 1.47, "grad_norm": 0.5271673202514648, "learning_rate": 0.00031012095397266745, "loss": 1.8066, "step": 44117 }, { "epoch": 1.47, "grad_norm": 0.5285963416099548, "learning_rate": 0.0003101105077997799, "loss": 1.726, "step": 44118 }, { "epoch": 1.47, "grad_norm": 0.5263250470161438, "learning_rate": 0.00031010006161461993, "loss": 1.799, "step": 44119 }, { "epoch": 1.47, "grad_norm": 0.5361716151237488, "learning_rate": 0.0003100896154171998, "loss": 1.7263, "step": 44120 }, { "epoch": 1.47, "grad_norm": 0.514083981513977, "learning_rate": 0.0003100791692075324, "loss": 1.7192, "step": 44121 }, { "epoch": 1.47, "grad_norm": 0.5131535530090332, "learning_rate": 0.0003100687229856304, "loss": 1.7887, "step": 44122 }, { "epoch": 1.47, "grad_norm": 0.48950016498565674, "learning_rate": 0.0003100582767515064, "loss": 1.821, "step": 44123 }, { "epoch": 1.47, "grad_norm": 0.5365099906921387, "learning_rate": 0.0003100478305051732, "loss": 1.7127, "step": 44124 }, { "epoch": 1.47, "grad_norm": 0.505747377872467, "learning_rate": 0.00031003738424664333, "loss": 1.7214, "step": 44125 }, { "epoch": 1.47, "grad_norm": 0.5379427075386047, "learning_rate": 0.00031002693797592965, "loss": 1.7976, "step": 44126 }, { "epoch": 1.47, "grad_norm": 0.5368900299072266, "learning_rate": 0.00031001649169304466, "loss": 1.7387, "step": 44127 }, { "epoch": 1.47, "grad_norm": 0.5100181698799133, "learning_rate": 0.0003100060453980012, "loss": 1.7599, "step": 44128 }, { "epoch": 1.47, "grad_norm": 0.5113866925239563, "learning_rate": 0.0003099955990908118, "loss": 1.7166, "step": 44129 }, { "epoch": 1.47, "grad_norm": 0.5258660316467285, "learning_rate": 0.00030998515277148923, "loss": 1.7226, "step": 44130 }, { "epoch": 1.47, "grad_norm": 0.5045325756072998, "learning_rate": 0.0003099747064400463, "loss": 1.7269, "step": 44131 }, { "epoch": 1.47, "grad_norm": 0.4985958933830261, "learning_rate": 0.0003099642600964953, "loss": 1.7496, "step": 44132 }, { "epoch": 1.47, "grad_norm": 0.5238269567489624, "learning_rate": 0.00030995381374084935, "loss": 1.6898, "step": 44133 }, { "epoch": 1.47, "grad_norm": 0.524590253829956, "learning_rate": 0.0003099433673731209, "loss": 1.7603, "step": 44134 }, { "epoch": 1.47, "grad_norm": 0.5200313329696655, "learning_rate": 0.0003099329209933226, "loss": 1.7959, "step": 44135 }, { "epoch": 1.47, "grad_norm": 0.5199544429779053, "learning_rate": 0.0003099224746014672, "loss": 1.7805, "step": 44136 }, { "epoch": 1.47, "grad_norm": 0.5244259834289551, "learning_rate": 0.00030991202819756735, "loss": 1.7593, "step": 44137 }, { "epoch": 1.47, "grad_norm": 0.5093015432357788, "learning_rate": 0.00030990158178163595, "loss": 1.7233, "step": 44138 }, { "epoch": 1.47, "grad_norm": 0.4985300302505493, "learning_rate": 0.00030989113535368536, "loss": 1.8138, "step": 44139 }, { "epoch": 1.47, "grad_norm": 0.4890889823436737, "learning_rate": 0.00030988068891372837, "loss": 1.762, "step": 44140 }, { "epoch": 1.47, "grad_norm": 0.5114539861679077, "learning_rate": 0.00030987024246177766, "loss": 1.7614, "step": 44141 }, { "epoch": 1.47, "grad_norm": 0.5204933285713196, "learning_rate": 0.000309859795997846, "loss": 1.7257, "step": 44142 }, { "epoch": 1.47, "grad_norm": 0.5297671556472778, "learning_rate": 0.00030984934952194595, "loss": 1.7292, "step": 44143 }, { "epoch": 1.47, "grad_norm": 0.49015510082244873, "learning_rate": 0.00030983890303409026, "loss": 1.682, "step": 44144 }, { "epoch": 1.47, "grad_norm": 0.5025672316551208, "learning_rate": 0.0003098284565342916, "loss": 1.7613, "step": 44145 }, { "epoch": 1.47, "grad_norm": 0.5051991939544678, "learning_rate": 0.0003098180100225626, "loss": 1.7755, "step": 44146 }, { "epoch": 1.47, "grad_norm": 0.5379164814949036, "learning_rate": 0.000309807563498916, "loss": 1.7615, "step": 44147 }, { "epoch": 1.47, "grad_norm": 0.5208413004875183, "learning_rate": 0.00030979711696336445, "loss": 1.7073, "step": 44148 }, { "epoch": 1.47, "grad_norm": 0.5129147171974182, "learning_rate": 0.0003097866704159208, "loss": 1.7234, "step": 44149 }, { "epoch": 1.47, "grad_norm": 0.5080990791320801, "learning_rate": 0.0003097762238565973, "loss": 1.6965, "step": 44150 }, { "epoch": 1.47, "grad_norm": 0.5256446599960327, "learning_rate": 0.0003097657772854072, "loss": 1.7141, "step": 44151 }, { "epoch": 1.47, "grad_norm": 0.5133085250854492, "learning_rate": 0.0003097553307023627, "loss": 1.7913, "step": 44152 }, { "epoch": 1.47, "grad_norm": 0.517218828201294, "learning_rate": 0.0003097448841074766, "loss": 1.7382, "step": 44153 }, { "epoch": 1.47, "grad_norm": 0.5153149366378784, "learning_rate": 0.0003097344375007618, "loss": 1.783, "step": 44154 }, { "epoch": 1.47, "grad_norm": 0.5226250886917114, "learning_rate": 0.00030972399088223073, "loss": 1.7434, "step": 44155 }, { "epoch": 1.47, "grad_norm": 0.5103052258491516, "learning_rate": 0.0003097135442518963, "loss": 1.7342, "step": 44156 }, { "epoch": 1.47, "grad_norm": 0.5239288210868835, "learning_rate": 0.00030970309760977084, "loss": 1.7372, "step": 44157 }, { "epoch": 1.47, "grad_norm": 0.5514265298843384, "learning_rate": 0.00030969265095586747, "loss": 1.7405, "step": 44158 }, { "epoch": 1.47, "grad_norm": 0.5265515446662903, "learning_rate": 0.00030968220429019856, "loss": 1.7766, "step": 44159 }, { "epoch": 1.47, "grad_norm": 0.4993940591812134, "learning_rate": 0.0003096717576127769, "loss": 1.7686, "step": 44160 }, { "epoch": 1.47, "grad_norm": 0.5087531208992004, "learning_rate": 0.0003096613109236151, "loss": 1.6923, "step": 44161 }, { "epoch": 1.47, "grad_norm": 0.5122071504592896, "learning_rate": 0.0003096508642227259, "loss": 1.7907, "step": 44162 }, { "epoch": 1.47, "grad_norm": 0.517724335193634, "learning_rate": 0.00030964041751012204, "loss": 1.7604, "step": 44163 }, { "epoch": 1.47, "grad_norm": 0.5291711688041687, "learning_rate": 0.0003096299707858161, "loss": 1.8117, "step": 44164 }, { "epoch": 1.47, "grad_norm": 0.5057744979858398, "learning_rate": 0.00030961952404982075, "loss": 1.703, "step": 44165 }, { "epoch": 1.47, "grad_norm": 0.5192866921424866, "learning_rate": 0.0003096090773021487, "loss": 1.7774, "step": 44166 }, { "epoch": 1.47, "grad_norm": 0.501528799533844, "learning_rate": 0.00030959863054281275, "loss": 1.7622, "step": 44167 }, { "epoch": 1.47, "grad_norm": 0.5296393632888794, "learning_rate": 0.0003095881837718254, "loss": 1.8561, "step": 44168 }, { "epoch": 1.47, "grad_norm": 0.4991307258605957, "learning_rate": 0.0003095777369891995, "loss": 1.7705, "step": 44169 }, { "epoch": 1.47, "grad_norm": 0.5135043263435364, "learning_rate": 0.00030956729019494757, "loss": 1.8111, "step": 44170 }, { "epoch": 1.47, "grad_norm": 0.5041786432266235, "learning_rate": 0.0003095568433890823, "loss": 1.7961, "step": 44171 }, { "epoch": 1.47, "grad_norm": 0.5205177664756775, "learning_rate": 0.0003095463965716165, "loss": 1.7801, "step": 44172 }, { "epoch": 1.47, "grad_norm": 0.5234151482582092, "learning_rate": 0.00030953594974256275, "loss": 1.77, "step": 44173 }, { "epoch": 1.47, "grad_norm": 0.515413761138916, "learning_rate": 0.00030952550290193386, "loss": 1.7315, "step": 44174 }, { "epoch": 1.47, "grad_norm": 0.5068336725234985, "learning_rate": 0.0003095150560497422, "loss": 1.7605, "step": 44175 }, { "epoch": 1.47, "grad_norm": 0.5195629000663757, "learning_rate": 0.00030950460918600085, "loss": 1.8115, "step": 44176 }, { "epoch": 1.47, "grad_norm": 0.5062546730041504, "learning_rate": 0.00030949416231072226, "loss": 1.7652, "step": 44177 }, { "epoch": 1.47, "grad_norm": 0.5232439637184143, "learning_rate": 0.0003094837154239192, "loss": 1.7771, "step": 44178 }, { "epoch": 1.47, "grad_norm": 0.5108349323272705, "learning_rate": 0.0003094732685256043, "loss": 1.7553, "step": 44179 }, { "epoch": 1.47, "grad_norm": 0.5202731490135193, "learning_rate": 0.0003094628216157901, "loss": 1.8243, "step": 44180 }, { "epoch": 1.47, "grad_norm": 0.507414698600769, "learning_rate": 0.00030945237469448956, "loss": 1.7958, "step": 44181 }, { "epoch": 1.47, "grad_norm": 0.5058717727661133, "learning_rate": 0.00030944192776171514, "loss": 1.7941, "step": 44182 }, { "epoch": 1.47, "grad_norm": 0.5090816617012024, "learning_rate": 0.0003094314808174798, "loss": 1.7318, "step": 44183 }, { "epoch": 1.47, "grad_norm": 0.5272577404975891, "learning_rate": 0.00030942103386179585, "loss": 1.7994, "step": 44184 }, { "epoch": 1.47, "grad_norm": 0.5074604153633118, "learning_rate": 0.0003094105868946762, "loss": 1.772, "step": 44185 }, { "epoch": 1.47, "grad_norm": 0.5158939361572266, "learning_rate": 0.00030940013991613357, "loss": 1.8288, "step": 44186 }, { "epoch": 1.47, "grad_norm": 0.49984923005104065, "learning_rate": 0.0003093896929261804, "loss": 1.6784, "step": 44187 }, { "epoch": 1.47, "grad_norm": 0.5160375833511353, "learning_rate": 0.00030937924592482963, "loss": 1.7618, "step": 44188 }, { "epoch": 1.47, "grad_norm": 0.5110290050506592, "learning_rate": 0.0003093687989120938, "loss": 1.7559, "step": 44189 }, { "epoch": 1.47, "grad_norm": 0.5212922096252441, "learning_rate": 0.00030935835188798563, "loss": 1.8005, "step": 44190 }, { "epoch": 1.47, "grad_norm": 0.5201246738433838, "learning_rate": 0.00030934790485251775, "loss": 1.7984, "step": 44191 }, { "epoch": 1.47, "grad_norm": 0.5099878907203674, "learning_rate": 0.000309337457805703, "loss": 1.7747, "step": 44192 }, { "epoch": 1.47, "grad_norm": 0.5059048533439636, "learning_rate": 0.0003093270107475539, "loss": 1.771, "step": 44193 }, { "epoch": 1.47, "grad_norm": 0.5260181427001953, "learning_rate": 0.0003093165636780832, "loss": 1.7537, "step": 44194 }, { "epoch": 1.47, "grad_norm": 0.5172427892684937, "learning_rate": 0.00030930611659730356, "loss": 1.8005, "step": 44195 }, { "epoch": 1.47, "grad_norm": 0.5008713006973267, "learning_rate": 0.00030929566950522764, "loss": 1.7439, "step": 44196 }, { "epoch": 1.47, "grad_norm": 0.5209354162216187, "learning_rate": 0.00030928522240186815, "loss": 1.7586, "step": 44197 }, { "epoch": 1.47, "grad_norm": 0.5073278546333313, "learning_rate": 0.0003092747752872378, "loss": 1.8136, "step": 44198 }, { "epoch": 1.47, "grad_norm": 0.5404258966445923, "learning_rate": 0.0003092643281613493, "loss": 1.7714, "step": 44199 }, { "epoch": 1.47, "grad_norm": 0.5420660376548767, "learning_rate": 0.00030925388102421517, "loss": 1.7754, "step": 44200 }, { "epoch": 1.47, "grad_norm": 0.5172024369239807, "learning_rate": 0.0003092434338758482, "loss": 1.765, "step": 44201 }, { "epoch": 1.47, "grad_norm": 0.5176440477371216, "learning_rate": 0.000309232986716261, "loss": 1.8274, "step": 44202 }, { "epoch": 1.47, "grad_norm": 0.517419695854187, "learning_rate": 0.0003092225395454664, "loss": 1.8215, "step": 44203 }, { "epoch": 1.47, "grad_norm": 0.508560836315155, "learning_rate": 0.00030921209236347706, "loss": 1.77, "step": 44204 }, { "epoch": 1.47, "grad_norm": 0.5203952193260193, "learning_rate": 0.0003092016451703054, "loss": 1.75, "step": 44205 }, { "epoch": 1.47, "grad_norm": 0.5076254606246948, "learning_rate": 0.00030919119796596453, "loss": 1.7373, "step": 44206 }, { "epoch": 1.47, "grad_norm": 0.5113014578819275, "learning_rate": 0.0003091807507504668, "loss": 1.6826, "step": 44207 }, { "epoch": 1.47, "grad_norm": 0.5039231777191162, "learning_rate": 0.0003091703035238249, "loss": 1.722, "step": 44208 }, { "epoch": 1.47, "grad_norm": 0.5185648798942566, "learning_rate": 0.00030915985628605176, "loss": 1.6929, "step": 44209 }, { "epoch": 1.47, "grad_norm": 0.5048248171806335, "learning_rate": 0.00030914940903715977, "loss": 1.7565, "step": 44210 }, { "epoch": 1.47, "grad_norm": 0.49491843581199646, "learning_rate": 0.00030913896177716185, "loss": 1.7544, "step": 44211 }, { "epoch": 1.47, "grad_norm": 0.49608078598976135, "learning_rate": 0.00030912851450607044, "loss": 1.7278, "step": 44212 }, { "epoch": 1.47, "grad_norm": 0.5282006859779358, "learning_rate": 0.00030911806722389853, "loss": 1.7913, "step": 44213 }, { "epoch": 1.47, "grad_norm": 0.49932196736335754, "learning_rate": 0.00030910761993065855, "loss": 1.7597, "step": 44214 }, { "epoch": 1.47, "grad_norm": 0.501826286315918, "learning_rate": 0.0003090971726263632, "loss": 1.7188, "step": 44215 }, { "epoch": 1.47, "grad_norm": 0.5310918688774109, "learning_rate": 0.0003090867253110253, "loss": 1.7535, "step": 44216 }, { "epoch": 1.47, "grad_norm": 0.6321375966072083, "learning_rate": 0.00030907627798465746, "loss": 1.7076, "step": 44217 }, { "epoch": 1.47, "grad_norm": 0.5214070677757263, "learning_rate": 0.0003090658306472724, "loss": 1.7458, "step": 44218 }, { "epoch": 1.47, "grad_norm": 0.5006245970726013, "learning_rate": 0.0003090553832988826, "loss": 1.7423, "step": 44219 }, { "epoch": 1.47, "grad_norm": 0.5260928273200989, "learning_rate": 0.00030904493593950105, "loss": 1.8011, "step": 44220 }, { "epoch": 1.47, "grad_norm": 0.5156211853027344, "learning_rate": 0.0003090344885691402, "loss": 1.7955, "step": 44221 }, { "epoch": 1.47, "grad_norm": 0.5070357322692871, "learning_rate": 0.00030902404118781285, "loss": 1.7286, "step": 44222 }, { "epoch": 1.47, "grad_norm": 0.5410473346710205, "learning_rate": 0.0003090135937955316, "loss": 1.8243, "step": 44223 }, { "epoch": 1.47, "grad_norm": 0.5162421464920044, "learning_rate": 0.00030900314639230927, "loss": 1.8416, "step": 44224 }, { "epoch": 1.47, "grad_norm": 0.5113360285758972, "learning_rate": 0.00030899269897815835, "loss": 1.7572, "step": 44225 }, { "epoch": 1.47, "grad_norm": 0.5180839896202087, "learning_rate": 0.00030898225155309163, "loss": 1.7447, "step": 44226 }, { "epoch": 1.47, "grad_norm": 0.515417218208313, "learning_rate": 0.0003089718041171218, "loss": 1.8235, "step": 44227 }, { "epoch": 1.47, "grad_norm": 0.49481436610221863, "learning_rate": 0.00030896135667026153, "loss": 1.7089, "step": 44228 }, { "epoch": 1.47, "grad_norm": 0.5305496454238892, "learning_rate": 0.0003089509092125235, "loss": 1.8466, "step": 44229 }, { "epoch": 1.47, "grad_norm": 0.5529748797416687, "learning_rate": 0.0003089404617439203, "loss": 1.8017, "step": 44230 }, { "epoch": 1.47, "grad_norm": 0.5031101703643799, "learning_rate": 0.0003089300142644648, "loss": 1.8072, "step": 44231 }, { "epoch": 1.47, "grad_norm": 0.5316704511642456, "learning_rate": 0.0003089195667741696, "loss": 1.7899, "step": 44232 }, { "epoch": 1.47, "grad_norm": 0.5139684081077576, "learning_rate": 0.00030890911927304723, "loss": 1.7097, "step": 44233 }, { "epoch": 1.47, "grad_norm": 0.5067417621612549, "learning_rate": 0.00030889867176111066, "loss": 1.7283, "step": 44234 }, { "epoch": 1.47, "grad_norm": 0.5595280528068542, "learning_rate": 0.00030888822423837223, "loss": 1.815, "step": 44235 }, { "epoch": 1.47, "grad_norm": 0.5436177849769592, "learning_rate": 0.00030887777670484496, "loss": 1.7987, "step": 44236 }, { "epoch": 1.47, "grad_norm": 0.5127964019775391, "learning_rate": 0.00030886732916054127, "loss": 1.7669, "step": 44237 }, { "epoch": 1.47, "grad_norm": 0.49683526158332825, "learning_rate": 0.00030885688160547405, "loss": 1.6826, "step": 44238 }, { "epoch": 1.47, "grad_norm": 0.5158956050872803, "learning_rate": 0.00030884643403965577, "loss": 1.7566, "step": 44239 }, { "epoch": 1.47, "grad_norm": 0.5184876918792725, "learning_rate": 0.0003088359864630993, "loss": 1.7745, "step": 44240 }, { "epoch": 1.47, "grad_norm": 0.5244191884994507, "learning_rate": 0.00030882553887581727, "loss": 1.7318, "step": 44241 }, { "epoch": 1.47, "grad_norm": 0.5143739581108093, "learning_rate": 0.00030881509127782226, "loss": 1.806, "step": 44242 }, { "epoch": 1.47, "grad_norm": 0.5240601897239685, "learning_rate": 0.0003088046436691271, "loss": 1.7985, "step": 44243 }, { "epoch": 1.47, "grad_norm": 0.5253555178642273, "learning_rate": 0.0003087941960497443, "loss": 1.7165, "step": 44244 }, { "epoch": 1.47, "grad_norm": 0.5487596988677979, "learning_rate": 0.0003087837484196867, "loss": 1.8479, "step": 44245 }, { "epoch": 1.47, "grad_norm": 0.5442565083503723, "learning_rate": 0.000308773300778967, "loss": 1.7224, "step": 44246 }, { "epoch": 1.47, "grad_norm": 0.522861659526825, "learning_rate": 0.0003087628531275977, "loss": 1.7852, "step": 44247 }, { "epoch": 1.47, "grad_norm": 0.5171042680740356, "learning_rate": 0.00030875240546559163, "loss": 1.736, "step": 44248 }, { "epoch": 1.47, "grad_norm": 0.5153624415397644, "learning_rate": 0.00030874195779296133, "loss": 1.7807, "step": 44249 }, { "epoch": 1.47, "grad_norm": 0.5335935354232788, "learning_rate": 0.00030873151010971973, "loss": 1.7288, "step": 44250 }, { "epoch": 1.47, "grad_norm": 0.5758901238441467, "learning_rate": 0.00030872106241587935, "loss": 1.7753, "step": 44251 }, { "epoch": 1.47, "grad_norm": 0.5212106704711914, "learning_rate": 0.0003087106147114528, "loss": 1.7523, "step": 44252 }, { "epoch": 1.47, "grad_norm": 0.5224118232727051, "learning_rate": 0.0003087001669964529, "loss": 1.7924, "step": 44253 }, { "epoch": 1.47, "grad_norm": 0.5012940168380737, "learning_rate": 0.00030868971927089226, "loss": 1.7264, "step": 44254 }, { "epoch": 1.47, "grad_norm": 0.5240697264671326, "learning_rate": 0.00030867927153478366, "loss": 1.7574, "step": 44255 }, { "epoch": 1.47, "grad_norm": 0.5233067870140076, "learning_rate": 0.0003086688237881396, "loss": 1.6983, "step": 44256 }, { "epoch": 1.47, "grad_norm": 0.5226818323135376, "learning_rate": 0.0003086583760309729, "loss": 1.729, "step": 44257 }, { "epoch": 1.47, "grad_norm": 0.5329762101173401, "learning_rate": 0.00030864792826329615, "loss": 1.7085, "step": 44258 }, { "epoch": 1.47, "grad_norm": 0.5226632356643677, "learning_rate": 0.0003086374804851222, "loss": 1.758, "step": 44259 }, { "epoch": 1.47, "grad_norm": 0.5157803893089294, "learning_rate": 0.00030862703269646355, "loss": 1.7371, "step": 44260 }, { "epoch": 1.47, "grad_norm": 0.5323160886764526, "learning_rate": 0.000308616584897333, "loss": 1.8472, "step": 44261 }, { "epoch": 1.47, "grad_norm": 0.5165609121322632, "learning_rate": 0.0003086061370877432, "loss": 1.8559, "step": 44262 }, { "epoch": 1.47, "grad_norm": 0.5206300616264343, "learning_rate": 0.0003085956892677067, "loss": 1.7696, "step": 44263 }, { "epoch": 1.47, "grad_norm": 0.5172634124755859, "learning_rate": 0.0003085852414372363, "loss": 1.7122, "step": 44264 }, { "epoch": 1.47, "grad_norm": 0.529488205909729, "learning_rate": 0.00030857479359634477, "loss": 1.7726, "step": 44265 }, { "epoch": 1.47, "grad_norm": 0.5393061637878418, "learning_rate": 0.0003085643457450448, "loss": 1.8091, "step": 44266 }, { "epoch": 1.47, "grad_norm": 0.5249595642089844, "learning_rate": 0.0003085538978833487, "loss": 1.7566, "step": 44267 }, { "epoch": 1.47, "grad_norm": 0.5247213840484619, "learning_rate": 0.0003085434500112697, "loss": 1.7316, "step": 44268 }, { "epoch": 1.47, "grad_norm": 0.516014039516449, "learning_rate": 0.0003085330021288201, "loss": 1.7486, "step": 44269 }, { "epoch": 1.47, "grad_norm": 0.5076701045036316, "learning_rate": 0.00030852255423601265, "loss": 1.7892, "step": 44270 }, { "epoch": 1.47, "grad_norm": 0.5227499604225159, "learning_rate": 0.0003085121063328601, "loss": 1.7308, "step": 44271 }, { "epoch": 1.47, "grad_norm": 0.5261463522911072, "learning_rate": 0.00030850165841937515, "loss": 1.7497, "step": 44272 }, { "epoch": 1.47, "grad_norm": 0.5167719125747681, "learning_rate": 0.0003084912104955705, "loss": 1.7834, "step": 44273 }, { "epoch": 1.47, "grad_norm": 0.5226666331291199, "learning_rate": 0.00030848076256145856, "loss": 1.7662, "step": 44274 }, { "epoch": 1.47, "grad_norm": 0.5015418529510498, "learning_rate": 0.0003084703146170524, "loss": 1.74, "step": 44275 }, { "epoch": 1.47, "grad_norm": 0.5250096321105957, "learning_rate": 0.0003084598666623645, "loss": 1.704, "step": 44276 }, { "epoch": 1.47, "grad_norm": 0.535033106803894, "learning_rate": 0.0003084494186974075, "loss": 1.7255, "step": 44277 }, { "epoch": 1.47, "grad_norm": 0.4936874210834503, "learning_rate": 0.0003084389707221942, "loss": 1.7458, "step": 44278 }, { "epoch": 1.47, "grad_norm": 0.5085679888725281, "learning_rate": 0.0003084285227367372, "loss": 1.7448, "step": 44279 }, { "epoch": 1.47, "grad_norm": 0.5056805610656738, "learning_rate": 0.0003084180747410493, "loss": 1.8147, "step": 44280 }, { "epoch": 1.47, "grad_norm": 0.7188136577606201, "learning_rate": 0.0003084076267351431, "loss": 1.7916, "step": 44281 }, { "epoch": 1.47, "grad_norm": 0.5150280594825745, "learning_rate": 0.00030839717871903113, "loss": 1.778, "step": 44282 }, { "epoch": 1.47, "grad_norm": 0.508476197719574, "learning_rate": 0.00030838673069272633, "loss": 1.8082, "step": 44283 }, { "epoch": 1.47, "grad_norm": 0.49199333786964417, "learning_rate": 0.00030837628265624126, "loss": 1.8221, "step": 44284 }, { "epoch": 1.47, "grad_norm": 0.5050826668739319, "learning_rate": 0.0003083658346095886, "loss": 1.722, "step": 44285 }, { "epoch": 1.47, "grad_norm": 0.4903942346572876, "learning_rate": 0.0003083553865527811, "loss": 1.7295, "step": 44286 }, { "epoch": 1.47, "grad_norm": 0.5182125568389893, "learning_rate": 0.0003083449384858313, "loss": 1.7926, "step": 44287 }, { "epoch": 1.47, "grad_norm": 0.49671173095703125, "learning_rate": 0.0003083344904087521, "loss": 1.7162, "step": 44288 }, { "epoch": 1.47, "grad_norm": 0.5032030344009399, "learning_rate": 0.00030832404232155594, "loss": 1.7409, "step": 44289 }, { "epoch": 1.47, "grad_norm": 0.5143148899078369, "learning_rate": 0.0003083135942242557, "loss": 1.7603, "step": 44290 }, { "epoch": 1.47, "grad_norm": 0.49973735213279724, "learning_rate": 0.00030830314611686395, "loss": 1.7507, "step": 44291 }, { "epoch": 1.47, "grad_norm": 0.4965988099575043, "learning_rate": 0.00030829269799939336, "loss": 1.7475, "step": 44292 }, { "epoch": 1.47, "grad_norm": 0.49936598539352417, "learning_rate": 0.00030828224987185677, "loss": 1.7105, "step": 44293 }, { "epoch": 1.47, "grad_norm": 0.5261109471321106, "learning_rate": 0.00030827180173426664, "loss": 1.7667, "step": 44294 }, { "epoch": 1.47, "grad_norm": 0.4918088912963867, "learning_rate": 0.00030826135358663577, "loss": 1.8025, "step": 44295 }, { "epoch": 1.47, "grad_norm": 0.496724396944046, "learning_rate": 0.00030825090542897695, "loss": 1.7375, "step": 44296 }, { "epoch": 1.47, "grad_norm": 0.5020751953125, "learning_rate": 0.00030824045726130254, "loss": 1.7666, "step": 44297 }, { "epoch": 1.47, "grad_norm": 0.5054230093955994, "learning_rate": 0.00030823000908362567, "loss": 1.7641, "step": 44298 }, { "epoch": 1.47, "grad_norm": 0.5149328112602234, "learning_rate": 0.0003082195608959585, "loss": 1.8187, "step": 44299 }, { "epoch": 1.47, "grad_norm": 0.5157712697982788, "learning_rate": 0.00030820911269831427, "loss": 1.7739, "step": 44300 }, { "epoch": 1.47, "grad_norm": 0.5169587731361389, "learning_rate": 0.0003081986644907052, "loss": 1.8315, "step": 44301 }, { "epoch": 1.47, "grad_norm": 0.5083311796188354, "learning_rate": 0.00030818821627314423, "loss": 1.7635, "step": 44302 }, { "epoch": 1.47, "grad_norm": 0.50419020652771, "learning_rate": 0.0003081777680456441, "loss": 1.7433, "step": 44303 }, { "epoch": 1.47, "grad_norm": 0.5158922672271729, "learning_rate": 0.00030816731980821716, "loss": 1.8769, "step": 44304 }, { "epoch": 1.47, "grad_norm": 0.5007539391517639, "learning_rate": 0.0003081568715608764, "loss": 1.7018, "step": 44305 }, { "epoch": 1.47, "grad_norm": 0.5108433365821838, "learning_rate": 0.0003081464233036344, "loss": 1.7404, "step": 44306 }, { "epoch": 1.47, "grad_norm": 0.5071197748184204, "learning_rate": 0.00030813597503650376, "loss": 1.7402, "step": 44307 }, { "epoch": 1.47, "grad_norm": 0.5132147073745728, "learning_rate": 0.0003081255267594973, "loss": 1.7694, "step": 44308 }, { "epoch": 1.47, "grad_norm": 0.4905659854412079, "learning_rate": 0.0003081150784726276, "loss": 1.7702, "step": 44309 }, { "epoch": 1.47, "grad_norm": 0.49143245816230774, "learning_rate": 0.00030810463017590757, "loss": 1.7937, "step": 44310 }, { "epoch": 1.47, "grad_norm": 0.5160834789276123, "learning_rate": 0.00030809418186934957, "loss": 1.7686, "step": 44311 }, { "epoch": 1.47, "grad_norm": 0.5192731022834778, "learning_rate": 0.00030808373355296643, "loss": 1.7686, "step": 44312 }, { "epoch": 1.47, "grad_norm": 0.49082300066947937, "learning_rate": 0.00030807328522677077, "loss": 1.6981, "step": 44313 }, { "epoch": 1.47, "grad_norm": 0.5119395852088928, "learning_rate": 0.00030806283689077544, "loss": 1.7661, "step": 44314 }, { "epoch": 1.47, "grad_norm": 0.49868443608283997, "learning_rate": 0.00030805238854499296, "loss": 1.7719, "step": 44315 }, { "epoch": 1.47, "grad_norm": 0.49723392724990845, "learning_rate": 0.0003080419401894361, "loss": 1.7446, "step": 44316 }, { "epoch": 1.47, "grad_norm": 0.5104126334190369, "learning_rate": 0.00030803149182411746, "loss": 1.761, "step": 44317 }, { "epoch": 1.47, "grad_norm": 0.5149239301681519, "learning_rate": 0.00030802104344904987, "loss": 1.7726, "step": 44318 }, { "epoch": 1.47, "grad_norm": 0.5045422911643982, "learning_rate": 0.00030801059506424576, "loss": 1.7714, "step": 44319 }, { "epoch": 1.47, "grad_norm": 0.5010401606559753, "learning_rate": 0.00030800014666971804, "loss": 1.7543, "step": 44320 }, { "epoch": 1.47, "grad_norm": 0.5091826319694519, "learning_rate": 0.0003079896982654794, "loss": 1.7085, "step": 44321 }, { "epoch": 1.47, "grad_norm": 0.5035170316696167, "learning_rate": 0.00030797924985154234, "loss": 1.735, "step": 44322 }, { "epoch": 1.47, "grad_norm": 0.5408069491386414, "learning_rate": 0.00030796880142791973, "loss": 1.8553, "step": 44323 }, { "epoch": 1.47, "grad_norm": 0.5171571969985962, "learning_rate": 0.0003079583529946241, "loss": 1.7615, "step": 44324 }, { "epoch": 1.47, "grad_norm": 0.4845528304576874, "learning_rate": 0.00030794790455166825, "loss": 1.7475, "step": 44325 }, { "epoch": 1.47, "grad_norm": 0.5237877368927002, "learning_rate": 0.0003079374560990647, "loss": 1.8135, "step": 44326 }, { "epoch": 1.47, "grad_norm": 0.49986574053764343, "learning_rate": 0.0003079270076368264, "loss": 1.7384, "step": 44327 }, { "epoch": 1.47, "grad_norm": 0.5092954039573669, "learning_rate": 0.0003079165591649659, "loss": 1.7683, "step": 44328 }, { "epoch": 1.47, "grad_norm": 0.5135636925697327, "learning_rate": 0.00030790611068349567, "loss": 1.7251, "step": 44329 }, { "epoch": 1.47, "grad_norm": 0.5291154980659485, "learning_rate": 0.00030789566219242874, "loss": 1.7739, "step": 44330 }, { "epoch": 1.47, "grad_norm": 0.49917566776275635, "learning_rate": 0.00030788521369177764, "loss": 1.7525, "step": 44331 }, { "epoch": 1.47, "grad_norm": 0.5076544284820557, "learning_rate": 0.00030787476518155497, "loss": 1.8375, "step": 44332 }, { "epoch": 1.47, "grad_norm": 0.5216241478919983, "learning_rate": 0.00030786431666177355, "loss": 1.8059, "step": 44333 }, { "epoch": 1.47, "grad_norm": 0.5047658085823059, "learning_rate": 0.00030785386813244594, "loss": 1.7657, "step": 44334 }, { "epoch": 1.48, "grad_norm": 0.5117461681365967, "learning_rate": 0.000307843419593585, "loss": 1.8036, "step": 44335 }, { "epoch": 1.48, "grad_norm": 0.5048849582672119, "learning_rate": 0.00030783297104520325, "loss": 1.6214, "step": 44336 }, { "epoch": 1.48, "grad_norm": 0.4915626049041748, "learning_rate": 0.0003078225224873134, "loss": 1.8195, "step": 44337 }, { "epoch": 1.48, "grad_norm": 0.5122469663619995, "learning_rate": 0.00030781207391992815, "loss": 1.8184, "step": 44338 }, { "epoch": 1.48, "grad_norm": 0.521826982498169, "learning_rate": 0.00030780162534306023, "loss": 1.8154, "step": 44339 }, { "epoch": 1.48, "grad_norm": 0.515897810459137, "learning_rate": 0.0003077911767567223, "loss": 1.7305, "step": 44340 }, { "epoch": 1.48, "grad_norm": 0.5174468159675598, "learning_rate": 0.0003077807281609271, "loss": 1.7757, "step": 44341 }, { "epoch": 1.48, "grad_norm": 0.5208054780960083, "learning_rate": 0.0003077702795556871, "loss": 1.7144, "step": 44342 }, { "epoch": 1.48, "grad_norm": 0.5046501159667969, "learning_rate": 0.0003077598309410152, "loss": 1.8307, "step": 44343 }, { "epoch": 1.48, "grad_norm": 0.5126213431358337, "learning_rate": 0.00030774938231692395, "loss": 1.7701, "step": 44344 }, { "epoch": 1.48, "grad_norm": 0.5430232882499695, "learning_rate": 0.00030773893368342615, "loss": 1.7675, "step": 44345 }, { "epoch": 1.48, "grad_norm": 0.512653648853302, "learning_rate": 0.0003077284850405345, "loss": 1.8363, "step": 44346 }, { "epoch": 1.48, "grad_norm": 0.5263803005218506, "learning_rate": 0.00030771803638826145, "loss": 1.7902, "step": 44347 }, { "epoch": 1.48, "grad_norm": 0.5404171943664551, "learning_rate": 0.0003077075877266199, "loss": 1.7871, "step": 44348 }, { "epoch": 1.48, "grad_norm": 0.519038200378418, "learning_rate": 0.00030769713905562254, "loss": 1.8251, "step": 44349 }, { "epoch": 1.48, "grad_norm": 0.534862220287323, "learning_rate": 0.0003076866903752819, "loss": 1.7753, "step": 44350 }, { "epoch": 1.48, "grad_norm": 0.4850422441959381, "learning_rate": 0.0003076762416856108, "loss": 1.7225, "step": 44351 }, { "epoch": 1.48, "grad_norm": 0.5319954752922058, "learning_rate": 0.0003076657929866218, "loss": 1.7694, "step": 44352 }, { "epoch": 1.48, "grad_norm": 0.49417170882225037, "learning_rate": 0.0003076553442783278, "loss": 1.7308, "step": 44353 }, { "epoch": 1.48, "grad_norm": 0.5044696927070618, "learning_rate": 0.00030764489556074116, "loss": 1.7536, "step": 44354 }, { "epoch": 1.48, "grad_norm": 0.4979401230812073, "learning_rate": 0.00030763444683387485, "loss": 1.707, "step": 44355 }, { "epoch": 1.48, "grad_norm": 0.5135796666145325, "learning_rate": 0.0003076239980977415, "loss": 1.7619, "step": 44356 }, { "epoch": 1.48, "grad_norm": 0.5531882047653198, "learning_rate": 0.00030761354935235363, "loss": 1.7198, "step": 44357 }, { "epoch": 1.48, "grad_norm": 0.5401274561882019, "learning_rate": 0.0003076031005977241, "loss": 1.702, "step": 44358 }, { "epoch": 1.48, "grad_norm": 0.5256591439247131, "learning_rate": 0.00030759265183386545, "loss": 1.8207, "step": 44359 }, { "epoch": 1.48, "grad_norm": 0.49943429231643677, "learning_rate": 0.00030758220306079055, "loss": 1.6778, "step": 44360 }, { "epoch": 1.48, "grad_norm": 0.5350183844566345, "learning_rate": 0.0003075717542785118, "loss": 1.721, "step": 44361 }, { "epoch": 1.48, "grad_norm": 0.54493248462677, "learning_rate": 0.00030756130548704227, "loss": 1.7823, "step": 44362 }, { "epoch": 1.48, "grad_norm": 0.5216025114059448, "learning_rate": 0.0003075508566863943, "loss": 1.801, "step": 44363 }, { "epoch": 1.48, "grad_norm": 0.5112902522087097, "learning_rate": 0.00030754040787658074, "loss": 1.7692, "step": 44364 }, { "epoch": 1.48, "grad_norm": 0.5095694661140442, "learning_rate": 0.00030752995905761427, "loss": 1.8161, "step": 44365 }, { "epoch": 1.48, "grad_norm": 0.5325840711593628, "learning_rate": 0.0003075195102295074, "loss": 1.7728, "step": 44366 }, { "epoch": 1.48, "grad_norm": 0.5491888523101807, "learning_rate": 0.00030750906139227313, "loss": 1.8378, "step": 44367 }, { "epoch": 1.48, "grad_norm": 0.5425460338592529, "learning_rate": 0.00030749861254592383, "loss": 1.8373, "step": 44368 }, { "epoch": 1.48, "grad_norm": 0.5281360745429993, "learning_rate": 0.0003074881636904724, "loss": 1.7794, "step": 44369 }, { "epoch": 1.48, "grad_norm": 0.5471705794334412, "learning_rate": 0.00030747771482593137, "loss": 1.6657, "step": 44370 }, { "epoch": 1.48, "grad_norm": 0.5234925150871277, "learning_rate": 0.0003074672659523136, "loss": 1.7357, "step": 44371 }, { "epoch": 1.48, "grad_norm": 0.5177698731422424, "learning_rate": 0.00030745681706963163, "loss": 1.7528, "step": 44372 }, { "epoch": 1.48, "grad_norm": 0.519732654094696, "learning_rate": 0.0003074463681778982, "loss": 1.7629, "step": 44373 }, { "epoch": 1.48, "grad_norm": 0.5236949920654297, "learning_rate": 0.0003074359192771259, "loss": 1.7606, "step": 44374 }, { "epoch": 1.48, "grad_norm": 0.5188732743263245, "learning_rate": 0.00030742547036732745, "loss": 1.7454, "step": 44375 }, { "epoch": 1.48, "grad_norm": 0.5133183002471924, "learning_rate": 0.00030741502144851566, "loss": 1.8539, "step": 44376 }, { "epoch": 1.48, "grad_norm": 0.5570810437202454, "learning_rate": 0.0003074045725207031, "loss": 1.7147, "step": 44377 }, { "epoch": 1.48, "grad_norm": 0.5161698460578918, "learning_rate": 0.00030739412358390256, "loss": 1.8296, "step": 44378 }, { "epoch": 1.48, "grad_norm": 0.5215871334075928, "learning_rate": 0.0003073836746381266, "loss": 1.8502, "step": 44379 }, { "epoch": 1.48, "grad_norm": 0.5243762731552124, "learning_rate": 0.0003073732256833879, "loss": 1.7821, "step": 44380 }, { "epoch": 1.48, "grad_norm": 0.513680636882782, "learning_rate": 0.0003073627767196992, "loss": 1.7758, "step": 44381 }, { "epoch": 1.48, "grad_norm": 0.5350263714790344, "learning_rate": 0.0003073523277470732, "loss": 1.7108, "step": 44382 }, { "epoch": 1.48, "grad_norm": 0.5253490805625916, "learning_rate": 0.00030734187876552257, "loss": 1.8366, "step": 44383 }, { "epoch": 1.48, "grad_norm": 0.5358109474182129, "learning_rate": 0.00030733142977505993, "loss": 1.6681, "step": 44384 }, { "epoch": 1.48, "grad_norm": 0.5194965600967407, "learning_rate": 0.0003073209807756981, "loss": 1.7275, "step": 44385 }, { "epoch": 1.48, "grad_norm": 0.5107303857803345, "learning_rate": 0.00030731053176744966, "loss": 1.8223, "step": 44386 }, { "epoch": 1.48, "grad_norm": 0.5308806896209717, "learning_rate": 0.00030730008275032725, "loss": 1.7915, "step": 44387 }, { "epoch": 1.48, "grad_norm": 0.5217549204826355, "learning_rate": 0.00030728963372434354, "loss": 1.7654, "step": 44388 }, { "epoch": 1.48, "grad_norm": 0.5253415703773499, "learning_rate": 0.00030727918468951144, "loss": 1.7853, "step": 44389 }, { "epoch": 1.48, "grad_norm": 0.5263768434524536, "learning_rate": 0.0003072687356458435, "loss": 1.7732, "step": 44390 }, { "epoch": 1.48, "grad_norm": 0.5284902453422546, "learning_rate": 0.0003072582865933522, "loss": 1.7667, "step": 44391 }, { "epoch": 1.48, "grad_norm": 0.5035226345062256, "learning_rate": 0.0003072478375320506, "loss": 1.7623, "step": 44392 }, { "epoch": 1.48, "grad_norm": 0.5165768265724182, "learning_rate": 0.0003072373884619511, "loss": 1.8181, "step": 44393 }, { "epoch": 1.48, "grad_norm": 0.5129256248474121, "learning_rate": 0.00030722693938306647, "loss": 1.6944, "step": 44394 }, { "epoch": 1.48, "grad_norm": 0.5212423205375671, "learning_rate": 0.00030721649029540944, "loss": 1.8053, "step": 44395 }, { "epoch": 1.48, "grad_norm": 0.5031659007072449, "learning_rate": 0.00030720604119899265, "loss": 1.7727, "step": 44396 }, { "epoch": 1.48, "grad_norm": 0.5173922777175903, "learning_rate": 0.00030719559209382884, "loss": 1.8189, "step": 44397 }, { "epoch": 1.48, "grad_norm": 0.5186527967453003, "learning_rate": 0.0003071851429799306, "loss": 1.745, "step": 44398 }, { "epoch": 1.48, "grad_norm": 0.5104094743728638, "learning_rate": 0.0003071746938573106, "loss": 1.7622, "step": 44399 }, { "epoch": 1.48, "grad_norm": 0.514295220375061, "learning_rate": 0.00030716424472598163, "loss": 1.8046, "step": 44400 }, { "epoch": 1.48, "grad_norm": 0.5084652900695801, "learning_rate": 0.0003071537955859563, "loss": 1.7564, "step": 44401 }, { "epoch": 1.48, "grad_norm": 0.5061125159263611, "learning_rate": 0.0003071433464372473, "loss": 1.7153, "step": 44402 }, { "epoch": 1.48, "grad_norm": 0.5546144247055054, "learning_rate": 0.00030713289727986743, "loss": 1.7595, "step": 44403 }, { "epoch": 1.48, "grad_norm": 0.5365858674049377, "learning_rate": 0.0003071224481138291, "loss": 1.731, "step": 44404 }, { "epoch": 1.48, "grad_norm": 0.5035969018936157, "learning_rate": 0.00030711199893914526, "loss": 1.789, "step": 44405 }, { "epoch": 1.48, "grad_norm": 0.5052172541618347, "learning_rate": 0.00030710154975582853, "loss": 1.7204, "step": 44406 }, { "epoch": 1.48, "grad_norm": 0.5055953860282898, "learning_rate": 0.00030709110056389153, "loss": 1.8248, "step": 44407 }, { "epoch": 1.48, "grad_norm": 0.5153864026069641, "learning_rate": 0.00030708065136334704, "loss": 1.827, "step": 44408 }, { "epoch": 1.48, "grad_norm": 0.5136667490005493, "learning_rate": 0.00030707020215420754, "loss": 1.8021, "step": 44409 }, { "epoch": 1.48, "grad_norm": 0.5027058720588684, "learning_rate": 0.000307059752936486, "loss": 1.778, "step": 44410 }, { "epoch": 1.48, "grad_norm": 0.5178086161613464, "learning_rate": 0.0003070493037101949, "loss": 1.8596, "step": 44411 }, { "epoch": 1.48, "grad_norm": 0.491910457611084, "learning_rate": 0.000307038854475347, "loss": 1.7358, "step": 44412 }, { "epoch": 1.48, "grad_norm": 0.5078168511390686, "learning_rate": 0.000307028405231955, "loss": 1.7839, "step": 44413 }, { "epoch": 1.48, "grad_norm": 0.5185188055038452, "learning_rate": 0.00030701795598003144, "loss": 1.7201, "step": 44414 }, { "epoch": 1.48, "grad_norm": 0.5231869220733643, "learning_rate": 0.0003070075067195893, "loss": 1.7773, "step": 44415 }, { "epoch": 1.48, "grad_norm": 0.4952624440193176, "learning_rate": 0.0003069970574506408, "loss": 1.7833, "step": 44416 }, { "epoch": 1.48, "grad_norm": 0.4948771595954895, "learning_rate": 0.00030698660817319917, "loss": 1.79, "step": 44417 }, { "epoch": 1.48, "grad_norm": 0.49721384048461914, "learning_rate": 0.0003069761588872767, "loss": 1.8159, "step": 44418 }, { "epoch": 1.48, "grad_norm": 0.5091584324836731, "learning_rate": 0.0003069657095928862, "loss": 1.7423, "step": 44419 }, { "epoch": 1.48, "grad_norm": 0.5045817494392395, "learning_rate": 0.0003069552602900405, "loss": 1.8146, "step": 44420 }, { "epoch": 1.48, "grad_norm": 0.5237659215927124, "learning_rate": 0.00030694481097875195, "loss": 1.8846, "step": 44421 }, { "epoch": 1.48, "grad_norm": 0.5048039555549622, "learning_rate": 0.00030693436165903353, "loss": 1.7748, "step": 44422 }, { "epoch": 1.48, "grad_norm": 0.49407958984375, "learning_rate": 0.0003069239123308978, "loss": 1.8375, "step": 44423 }, { "epoch": 1.48, "grad_norm": 0.5181103348731995, "learning_rate": 0.0003069134629943574, "loss": 1.6778, "step": 44424 }, { "epoch": 1.48, "grad_norm": 0.4932851195335388, "learning_rate": 0.0003069030136494251, "loss": 1.81, "step": 44425 }, { "epoch": 1.48, "grad_norm": 0.502299427986145, "learning_rate": 0.0003068925642961136, "loss": 1.707, "step": 44426 }, { "epoch": 1.48, "grad_norm": 0.5216432809829712, "learning_rate": 0.0003068821149344355, "loss": 1.7624, "step": 44427 }, { "epoch": 1.48, "grad_norm": 0.4963163435459137, "learning_rate": 0.0003068716655644036, "loss": 1.7274, "step": 44428 }, { "epoch": 1.48, "grad_norm": 0.5096152424812317, "learning_rate": 0.0003068612161860304, "loss": 1.7159, "step": 44429 }, { "epoch": 1.48, "grad_norm": 0.5441262722015381, "learning_rate": 0.0003068507667993288, "loss": 1.7887, "step": 44430 }, { "epoch": 1.48, "grad_norm": 0.5074863433837891, "learning_rate": 0.00030684031740431123, "loss": 1.8056, "step": 44431 }, { "epoch": 1.48, "grad_norm": 0.5147425532341003, "learning_rate": 0.00030682986800099064, "loss": 1.7909, "step": 44432 }, { "epoch": 1.48, "grad_norm": 0.5058332681655884, "learning_rate": 0.00030681941858937963, "loss": 1.7405, "step": 44433 }, { "epoch": 1.48, "grad_norm": 0.512363076210022, "learning_rate": 0.0003068089691694908, "loss": 1.7924, "step": 44434 }, { "epoch": 1.48, "grad_norm": 0.5011674761772156, "learning_rate": 0.00030679851974133694, "loss": 1.7848, "step": 44435 }, { "epoch": 1.48, "grad_norm": 0.5171082615852356, "learning_rate": 0.00030678807030493056, "loss": 1.7865, "step": 44436 }, { "epoch": 1.48, "grad_norm": 0.5175540447235107, "learning_rate": 0.0003067776208602845, "loss": 1.7962, "step": 44437 }, { "epoch": 1.48, "grad_norm": 0.5241500735282898, "learning_rate": 0.0003067671714074115, "loss": 1.7047, "step": 44438 }, { "epoch": 1.48, "grad_norm": 0.5099780559539795, "learning_rate": 0.000306756721946324, "loss": 1.8327, "step": 44439 }, { "epoch": 1.48, "grad_norm": 0.5251520276069641, "learning_rate": 0.000306746272477035, "loss": 1.7979, "step": 44440 }, { "epoch": 1.48, "grad_norm": 0.5221068263053894, "learning_rate": 0.0003067358229995569, "loss": 1.7459, "step": 44441 }, { "epoch": 1.48, "grad_norm": 0.5317621827125549, "learning_rate": 0.0003067253735139025, "loss": 1.7887, "step": 44442 }, { "epoch": 1.48, "grad_norm": 0.5073362588882446, "learning_rate": 0.0003067149240200846, "loss": 1.7809, "step": 44443 }, { "epoch": 1.48, "grad_norm": 0.523145318031311, "learning_rate": 0.00030670447451811565, "loss": 1.8424, "step": 44444 }, { "epoch": 1.48, "grad_norm": 0.5296570658683777, "learning_rate": 0.0003066940250080085, "loss": 1.7438, "step": 44445 }, { "epoch": 1.48, "grad_norm": 0.5197421908378601, "learning_rate": 0.00030668357548977577, "loss": 1.7634, "step": 44446 }, { "epoch": 1.48, "grad_norm": 0.49558666348457336, "learning_rate": 0.0003066731259634302, "loss": 1.7401, "step": 44447 }, { "epoch": 1.48, "grad_norm": 0.5193808674812317, "learning_rate": 0.00030666267642898447, "loss": 1.7892, "step": 44448 }, { "epoch": 1.48, "grad_norm": 0.8254883289337158, "learning_rate": 0.00030665222688645117, "loss": 1.8409, "step": 44449 }, { "epoch": 1.48, "grad_norm": 0.5221216678619385, "learning_rate": 0.00030664177733584307, "loss": 1.6994, "step": 44450 }, { "epoch": 1.48, "grad_norm": 0.5071731805801392, "learning_rate": 0.0003066313277771728, "loss": 1.7662, "step": 44451 }, { "epoch": 1.48, "grad_norm": 0.5151094198226929, "learning_rate": 0.00030662087821045316, "loss": 1.8679, "step": 44452 }, { "epoch": 1.48, "grad_norm": 0.5125153064727783, "learning_rate": 0.00030661042863569667, "loss": 1.7781, "step": 44453 }, { "epoch": 1.48, "grad_norm": 0.5102852582931519, "learning_rate": 0.0003065999790529161, "loss": 1.8178, "step": 44454 }, { "epoch": 1.48, "grad_norm": 0.5062445998191833, "learning_rate": 0.0003065895294621241, "loss": 1.7991, "step": 44455 }, { "epoch": 1.48, "grad_norm": 0.5182598829269409, "learning_rate": 0.00030657907986333347, "loss": 1.7532, "step": 44456 }, { "epoch": 1.48, "grad_norm": 0.5186681747436523, "learning_rate": 0.0003065686302565567, "loss": 1.7892, "step": 44457 }, { "epoch": 1.48, "grad_norm": 0.537428081035614, "learning_rate": 0.00030655818064180676, "loss": 1.701, "step": 44458 }, { "epoch": 1.48, "grad_norm": 0.5148927569389343, "learning_rate": 0.000306547731019096, "loss": 1.7071, "step": 44459 }, { "epoch": 1.48, "grad_norm": 0.5199219584465027, "learning_rate": 0.0003065372813884373, "loss": 1.7585, "step": 44460 }, { "epoch": 1.48, "grad_norm": 0.5135126709938049, "learning_rate": 0.0003065268317498433, "loss": 1.7785, "step": 44461 }, { "epoch": 1.48, "grad_norm": 0.5750669240951538, "learning_rate": 0.0003065163821033267, "loss": 1.7597, "step": 44462 }, { "epoch": 1.48, "grad_norm": 0.5034447312355042, "learning_rate": 0.0003065059324489002, "loss": 1.7855, "step": 44463 }, { "epoch": 1.48, "grad_norm": 0.5152782201766968, "learning_rate": 0.00030649548278657634, "loss": 1.8024, "step": 44464 }, { "epoch": 1.48, "grad_norm": 0.483127623796463, "learning_rate": 0.000306485033116368, "loss": 1.6894, "step": 44465 }, { "epoch": 1.48, "grad_norm": 0.5108606815338135, "learning_rate": 0.00030647458343828775, "loss": 1.6498, "step": 44466 }, { "epoch": 1.48, "grad_norm": 0.5058897733688354, "learning_rate": 0.00030646413375234833, "loss": 1.8261, "step": 44467 }, { "epoch": 1.48, "grad_norm": 0.523584246635437, "learning_rate": 0.00030645368405856255, "loss": 1.7734, "step": 44468 }, { "epoch": 1.48, "grad_norm": 0.5266191363334656, "learning_rate": 0.00030644323435694275, "loss": 1.8139, "step": 44469 }, { "epoch": 1.48, "grad_norm": 0.5021700859069824, "learning_rate": 0.0003064327846475019, "loss": 1.7795, "step": 44470 }, { "epoch": 1.48, "grad_norm": 0.5728521943092346, "learning_rate": 0.00030642233493025247, "loss": 1.7484, "step": 44471 }, { "epoch": 1.48, "grad_norm": 0.5264580249786377, "learning_rate": 0.00030641188520520745, "loss": 1.7826, "step": 44472 }, { "epoch": 1.48, "grad_norm": 0.7294716238975525, "learning_rate": 0.0003064014354723793, "loss": 1.8264, "step": 44473 }, { "epoch": 1.48, "grad_norm": 0.5084877014160156, "learning_rate": 0.0003063909857317807, "loss": 1.752, "step": 44474 }, { "epoch": 1.48, "grad_norm": 0.49403536319732666, "learning_rate": 0.00030638053598342445, "loss": 1.7266, "step": 44475 }, { "epoch": 1.48, "grad_norm": 0.541493833065033, "learning_rate": 0.000306370086227323, "loss": 1.7431, "step": 44476 }, { "epoch": 1.48, "grad_norm": 0.5286950469017029, "learning_rate": 0.00030635963646348947, "loss": 1.8084, "step": 44477 }, { "epoch": 1.48, "grad_norm": 0.4976870119571686, "learning_rate": 0.0003063491866919361, "loss": 1.7365, "step": 44478 }, { "epoch": 1.48, "grad_norm": 0.5183931589126587, "learning_rate": 0.0003063387369126758, "loss": 1.6852, "step": 44479 }, { "epoch": 1.48, "grad_norm": 0.5063359141349792, "learning_rate": 0.0003063282871257212, "loss": 1.78, "step": 44480 }, { "epoch": 1.48, "grad_norm": 0.5200570225715637, "learning_rate": 0.0003063178373310849, "loss": 1.7269, "step": 44481 }, { "epoch": 1.48, "grad_norm": 0.5283704400062561, "learning_rate": 0.00030630738752877987, "loss": 1.7699, "step": 44482 }, { "epoch": 1.48, "grad_norm": 0.5055410861968994, "learning_rate": 0.00030629693771881846, "loss": 1.7353, "step": 44483 }, { "epoch": 1.48, "grad_norm": 0.5133724808692932, "learning_rate": 0.00030628648790121353, "loss": 1.7918, "step": 44484 }, { "epoch": 1.48, "grad_norm": 0.5052685141563416, "learning_rate": 0.0003062760380759778, "loss": 1.7334, "step": 44485 }, { "epoch": 1.48, "grad_norm": 0.5198394656181335, "learning_rate": 0.0003062655882431238, "loss": 1.7716, "step": 44486 }, { "epoch": 1.48, "grad_norm": 0.5268372893333435, "learning_rate": 0.00030625513840266424, "loss": 1.7837, "step": 44487 }, { "epoch": 1.48, "grad_norm": 0.5171996355056763, "learning_rate": 0.000306244688554612, "loss": 1.8037, "step": 44488 }, { "epoch": 1.48, "grad_norm": 0.5107020735740662, "learning_rate": 0.00030623423869897965, "loss": 1.7338, "step": 44489 }, { "epoch": 1.48, "grad_norm": 0.5103110671043396, "learning_rate": 0.00030622378883577974, "loss": 1.7175, "step": 44490 }, { "epoch": 1.48, "grad_norm": 0.5386641025543213, "learning_rate": 0.0003062133389650251, "loss": 1.8119, "step": 44491 }, { "epoch": 1.48, "grad_norm": 0.5275266766548157, "learning_rate": 0.00030620288908672833, "loss": 1.6569, "step": 44492 }, { "epoch": 1.48, "grad_norm": 0.526881754398346, "learning_rate": 0.0003061924392009022, "loss": 1.8356, "step": 44493 }, { "epoch": 1.48, "grad_norm": 0.769943118095398, "learning_rate": 0.00030618198930755936, "loss": 1.8048, "step": 44494 }, { "epoch": 1.48, "grad_norm": 0.49847516417503357, "learning_rate": 0.0003061715394067126, "loss": 1.7763, "step": 44495 }, { "epoch": 1.48, "grad_norm": 0.5098806619644165, "learning_rate": 0.0003061610894983744, "loss": 1.6874, "step": 44496 }, { "epoch": 1.48, "grad_norm": 0.5352365374565125, "learning_rate": 0.00030615063958255753, "loss": 1.8003, "step": 44497 }, { "epoch": 1.48, "grad_norm": 0.5121660828590393, "learning_rate": 0.0003061401896592747, "loss": 1.772, "step": 44498 }, { "epoch": 1.48, "grad_norm": 0.5237680673599243, "learning_rate": 0.00030612973972853866, "loss": 1.7312, "step": 44499 }, { "epoch": 1.48, "grad_norm": 0.49441254138946533, "learning_rate": 0.000306119289790362, "loss": 1.7326, "step": 44500 }, { "epoch": 1.48, "grad_norm": 0.5207580327987671, "learning_rate": 0.0003061088398447573, "loss": 1.7345, "step": 44501 }, { "epoch": 1.48, "grad_norm": 0.5007172226905823, "learning_rate": 0.00030609838989173753, "loss": 1.73, "step": 44502 }, { "epoch": 1.48, "grad_norm": 0.5601797699928284, "learning_rate": 0.00030608793993131515, "loss": 1.805, "step": 44503 }, { "epoch": 1.48, "grad_norm": 0.5345661044120789, "learning_rate": 0.00030607748996350285, "loss": 1.8103, "step": 44504 }, { "epoch": 1.48, "grad_norm": 0.5102535486221313, "learning_rate": 0.0003060670399883134, "loss": 1.7655, "step": 44505 }, { "epoch": 1.48, "grad_norm": 0.5576411485671997, "learning_rate": 0.00030605659000575944, "loss": 1.8333, "step": 44506 }, { "epoch": 1.48, "grad_norm": 0.5228801369667053, "learning_rate": 0.0003060461400158538, "loss": 1.7981, "step": 44507 }, { "epoch": 1.48, "grad_norm": 0.5083467960357666, "learning_rate": 0.00030603569001860884, "loss": 1.7662, "step": 44508 }, { "epoch": 1.48, "grad_norm": 0.5062690377235413, "learning_rate": 0.00030602524001403764, "loss": 1.8223, "step": 44509 }, { "epoch": 1.48, "grad_norm": 0.5085246562957764, "learning_rate": 0.00030601479000215263, "loss": 1.7475, "step": 44510 }, { "epoch": 1.48, "grad_norm": 0.5367883443832397, "learning_rate": 0.00030600433998296644, "loss": 1.7613, "step": 44511 }, { "epoch": 1.48, "grad_norm": 0.5354363322257996, "learning_rate": 0.000305993889956492, "loss": 1.7077, "step": 44512 }, { "epoch": 1.48, "grad_norm": 0.5081129670143127, "learning_rate": 0.00030598343992274175, "loss": 1.756, "step": 44513 }, { "epoch": 1.48, "grad_norm": 0.5068200826644897, "learning_rate": 0.00030597298988172856, "loss": 1.716, "step": 44514 }, { "epoch": 1.48, "grad_norm": 0.5114521980285645, "learning_rate": 0.00030596253983346505, "loss": 1.7196, "step": 44515 }, { "epoch": 1.48, "grad_norm": 0.524237871170044, "learning_rate": 0.0003059520897779639, "loss": 1.7744, "step": 44516 }, { "epoch": 1.48, "grad_norm": 0.5029661059379578, "learning_rate": 0.0003059416397152377, "loss": 1.7441, "step": 44517 }, { "epoch": 1.48, "grad_norm": 0.522201657295227, "learning_rate": 0.00030593118964529926, "loss": 1.7637, "step": 44518 }, { "epoch": 1.48, "grad_norm": 0.5282090902328491, "learning_rate": 0.0003059207395681612, "loss": 1.7479, "step": 44519 }, { "epoch": 1.48, "grad_norm": 0.5166128277778625, "learning_rate": 0.00030591028948383643, "loss": 1.7939, "step": 44520 }, { "epoch": 1.48, "grad_norm": 0.49388861656188965, "learning_rate": 0.0003058998393923372, "loss": 1.773, "step": 44521 }, { "epoch": 1.48, "grad_norm": 0.5289225578308105, "learning_rate": 0.00030588938929367656, "loss": 1.8671, "step": 44522 }, { "epoch": 1.48, "grad_norm": 0.5189694166183472, "learning_rate": 0.00030587893918786706, "loss": 1.7352, "step": 44523 }, { "epoch": 1.48, "grad_norm": 0.5097551345825195, "learning_rate": 0.00030586848907492134, "loss": 1.7798, "step": 44524 }, { "epoch": 1.48, "grad_norm": 0.5445218682289124, "learning_rate": 0.00030585803895485225, "loss": 1.8441, "step": 44525 }, { "epoch": 1.48, "grad_norm": 0.5447285771369934, "learning_rate": 0.0003058475888276722, "loss": 1.746, "step": 44526 }, { "epoch": 1.48, "grad_norm": 0.6655576825141907, "learning_rate": 0.00030583713869339416, "loss": 1.7541, "step": 44527 }, { "epoch": 1.48, "grad_norm": 0.540501058101654, "learning_rate": 0.00030582668855203064, "loss": 1.7504, "step": 44528 }, { "epoch": 1.48, "grad_norm": 0.5120649933815002, "learning_rate": 0.0003058162384035944, "loss": 1.7449, "step": 44529 }, { "epoch": 1.48, "grad_norm": 0.5242490768432617, "learning_rate": 0.00030580578824809817, "loss": 1.6953, "step": 44530 }, { "epoch": 1.48, "grad_norm": 0.5113387703895569, "learning_rate": 0.0003057953380855544, "loss": 1.7635, "step": 44531 }, { "epoch": 1.48, "grad_norm": 0.5122615694999695, "learning_rate": 0.0003057848879159761, "loss": 1.8081, "step": 44532 }, { "epoch": 1.48, "grad_norm": 0.5343331694602966, "learning_rate": 0.00030577443773937565, "loss": 1.7894, "step": 44533 }, { "epoch": 1.48, "grad_norm": 0.5334671139717102, "learning_rate": 0.0003057639875557661, "loss": 1.8585, "step": 44534 }, { "epoch": 1.48, "grad_norm": 0.5210492610931396, "learning_rate": 0.0003057535373651597, "loss": 1.8338, "step": 44535 }, { "epoch": 1.48, "grad_norm": 0.5305673480033875, "learning_rate": 0.00030574308716756947, "loss": 1.8071, "step": 44536 }, { "epoch": 1.48, "grad_norm": 0.5259711742401123, "learning_rate": 0.00030573263696300797, "loss": 1.7521, "step": 44537 }, { "epoch": 1.48, "grad_norm": 0.5340946316719055, "learning_rate": 0.00030572218675148783, "loss": 1.7838, "step": 44538 }, { "epoch": 1.48, "grad_norm": 0.5305463075637817, "learning_rate": 0.00030571173653302187, "loss": 1.7727, "step": 44539 }, { "epoch": 1.48, "grad_norm": 0.5268961191177368, "learning_rate": 0.0003057012863076227, "loss": 1.7231, "step": 44540 }, { "epoch": 1.48, "grad_norm": 0.5279815793037415, "learning_rate": 0.0003056908360753029, "loss": 1.766, "step": 44541 }, { "epoch": 1.48, "grad_norm": 0.5168195366859436, "learning_rate": 0.00030568038583607534, "loss": 1.7795, "step": 44542 }, { "epoch": 1.48, "grad_norm": 0.536597490310669, "learning_rate": 0.00030566993558995274, "loss": 1.708, "step": 44543 }, { "epoch": 1.48, "grad_norm": 0.5317867994308472, "learning_rate": 0.00030565948533694756, "loss": 1.755, "step": 44544 }, { "epoch": 1.48, "grad_norm": 0.5207128524780273, "learning_rate": 0.0003056490350770726, "loss": 1.7429, "step": 44545 }, { "epoch": 1.48, "grad_norm": 0.5311592221260071, "learning_rate": 0.00030563858481034057, "loss": 1.796, "step": 44546 }, { "epoch": 1.48, "grad_norm": 0.5163272619247437, "learning_rate": 0.00030562813453676406, "loss": 1.858, "step": 44547 }, { "epoch": 1.48, "grad_norm": 0.5151669383049011, "learning_rate": 0.0003056176842563559, "loss": 1.7066, "step": 44548 }, { "epoch": 1.48, "grad_norm": 0.5072147250175476, "learning_rate": 0.0003056072339691287, "loss": 1.761, "step": 44549 }, { "epoch": 1.48, "grad_norm": 0.5022897124290466, "learning_rate": 0.00030559678367509513, "loss": 1.7522, "step": 44550 }, { "epoch": 1.48, "grad_norm": 0.5042548179626465, "learning_rate": 0.00030558633337426786, "loss": 1.7859, "step": 44551 }, { "epoch": 1.48, "grad_norm": 0.5115259885787964, "learning_rate": 0.00030557588306665966, "loss": 1.8359, "step": 44552 }, { "epoch": 1.48, "grad_norm": 0.5085718631744385, "learning_rate": 0.00030556543275228306, "loss": 1.8534, "step": 44553 }, { "epoch": 1.48, "grad_norm": 0.5047442317008972, "learning_rate": 0.00030555498243115095, "loss": 1.7382, "step": 44554 }, { "epoch": 1.48, "grad_norm": 0.5153994560241699, "learning_rate": 0.00030554453210327587, "loss": 1.716, "step": 44555 }, { "epoch": 1.48, "grad_norm": 0.5197039246559143, "learning_rate": 0.0003055340817686705, "loss": 1.7461, "step": 44556 }, { "epoch": 1.48, "grad_norm": 0.5237469673156738, "learning_rate": 0.00030552363142734767, "loss": 1.7914, "step": 44557 }, { "epoch": 1.48, "grad_norm": 0.5105459094047546, "learning_rate": 0.00030551318107931993, "loss": 1.7131, "step": 44558 }, { "epoch": 1.48, "grad_norm": 0.5060784816741943, "learning_rate": 0.00030550273072459995, "loss": 1.8799, "step": 44559 }, { "epoch": 1.48, "grad_norm": 0.5172269940376282, "learning_rate": 0.0003054922803632004, "loss": 1.7531, "step": 44560 }, { "epoch": 1.48, "grad_norm": 0.4992929697036743, "learning_rate": 0.0003054818299951342, "loss": 1.6876, "step": 44561 }, { "epoch": 1.48, "grad_norm": 0.5133960247039795, "learning_rate": 0.0003054713796204138, "loss": 1.7734, "step": 44562 }, { "epoch": 1.48, "grad_norm": 0.5167199969291687, "learning_rate": 0.0003054609292390519, "loss": 1.7941, "step": 44563 }, { "epoch": 1.48, "grad_norm": 0.5102327466011047, "learning_rate": 0.0003054504788510613, "loss": 1.7593, "step": 44564 }, { "epoch": 1.48, "grad_norm": 0.5223484635353088, "learning_rate": 0.0003054400284564546, "loss": 1.8155, "step": 44565 }, { "epoch": 1.48, "grad_norm": 0.5107704997062683, "learning_rate": 0.00030542957805524453, "loss": 1.7415, "step": 44566 }, { "epoch": 1.48, "grad_norm": 0.5079295635223389, "learning_rate": 0.0003054191276474437, "loss": 1.7809, "step": 44567 }, { "epoch": 1.48, "grad_norm": 0.48997119069099426, "learning_rate": 0.00030540867723306483, "loss": 1.7264, "step": 44568 }, { "epoch": 1.48, "grad_norm": 0.5147204995155334, "learning_rate": 0.0003053982268121208, "loss": 1.8119, "step": 44569 }, { "epoch": 1.48, "grad_norm": 0.5339534878730774, "learning_rate": 0.00030538777638462397, "loss": 1.7567, "step": 44570 }, { "epoch": 1.48, "grad_norm": 0.5135006904602051, "learning_rate": 0.0003053773259505872, "loss": 1.7463, "step": 44571 }, { "epoch": 1.48, "grad_norm": 0.5067878365516663, "learning_rate": 0.0003053668755100231, "loss": 1.7412, "step": 44572 }, { "epoch": 1.48, "grad_norm": 0.5113280415534973, "learning_rate": 0.0003053564250629445, "loss": 1.7213, "step": 44573 }, { "epoch": 1.48, "grad_norm": 0.4991231858730316, "learning_rate": 0.0003053459746093638, "loss": 1.7638, "step": 44574 }, { "epoch": 1.48, "grad_norm": 0.5186004638671875, "learning_rate": 0.0003053355241492941, "loss": 1.7807, "step": 44575 }, { "epoch": 1.48, "grad_norm": 0.5236479640007019, "learning_rate": 0.0003053250736827478, "loss": 1.6898, "step": 44576 }, { "epoch": 1.48, "grad_norm": 0.5277730226516724, "learning_rate": 0.00030531462320973756, "loss": 1.8, "step": 44577 }, { "epoch": 1.48, "grad_norm": 0.4983305037021637, "learning_rate": 0.0003053041727302762, "loss": 1.7523, "step": 44578 }, { "epoch": 1.48, "grad_norm": 0.5190437436103821, "learning_rate": 0.0003052937222443764, "loss": 1.8347, "step": 44579 }, { "epoch": 1.48, "grad_norm": 0.5042561292648315, "learning_rate": 0.0003052832717520508, "loss": 1.7719, "step": 44580 }, { "epoch": 1.48, "grad_norm": 0.4969809055328369, "learning_rate": 0.000305272821253312, "loss": 1.6938, "step": 44581 }, { "epoch": 1.48, "grad_norm": 0.5315355062484741, "learning_rate": 0.0003052623707481728, "loss": 1.7419, "step": 44582 }, { "epoch": 1.48, "grad_norm": 0.5081533789634705, "learning_rate": 0.00030525192023664587, "loss": 1.7639, "step": 44583 }, { "epoch": 1.48, "grad_norm": 0.5163683295249939, "learning_rate": 0.0003052414697187439, "loss": 1.6786, "step": 44584 }, { "epoch": 1.48, "grad_norm": 0.5217452645301819, "learning_rate": 0.0003052310191944796, "loss": 1.7402, "step": 44585 }, { "epoch": 1.48, "grad_norm": 0.510100781917572, "learning_rate": 0.00030522056866386544, "loss": 1.7668, "step": 44586 }, { "epoch": 1.48, "grad_norm": 0.4901030361652374, "learning_rate": 0.00030521011812691454, "loss": 1.7866, "step": 44587 }, { "epoch": 1.48, "grad_norm": 0.5206519365310669, "learning_rate": 0.0003051996675836391, "loss": 1.7858, "step": 44588 }, { "epoch": 1.48, "grad_norm": 0.5123322606086731, "learning_rate": 0.0003051892170340521, "loss": 1.8595, "step": 44589 }, { "epoch": 1.48, "grad_norm": 0.5114039778709412, "learning_rate": 0.00030517876647816617, "loss": 1.7988, "step": 44590 }, { "epoch": 1.48, "grad_norm": 0.5340687036514282, "learning_rate": 0.000305168315915994, "loss": 1.7312, "step": 44591 }, { "epoch": 1.48, "grad_norm": 0.5171735882759094, "learning_rate": 0.00030515786534754825, "loss": 1.7569, "step": 44592 }, { "epoch": 1.48, "grad_norm": 0.5074099898338318, "learning_rate": 0.0003051474147728416, "loss": 1.6753, "step": 44593 }, { "epoch": 1.48, "grad_norm": 0.5272787809371948, "learning_rate": 0.0003051369641918868, "loss": 1.7394, "step": 44594 }, { "epoch": 1.48, "grad_norm": 0.49650347232818604, "learning_rate": 0.00030512651360469634, "loss": 1.8154, "step": 44595 }, { "epoch": 1.48, "grad_norm": 0.5119128823280334, "learning_rate": 0.0003051160630112832, "loss": 1.7584, "step": 44596 }, { "epoch": 1.48, "grad_norm": 0.5461050868034363, "learning_rate": 0.0003051056124116598, "loss": 1.727, "step": 44597 }, { "epoch": 1.48, "grad_norm": 0.5407777428627014, "learning_rate": 0.000305095161805839, "loss": 1.7207, "step": 44598 }, { "epoch": 1.48, "grad_norm": 0.5373722314834595, "learning_rate": 0.0003050847111938335, "loss": 1.7867, "step": 44599 }, { "epoch": 1.48, "grad_norm": 0.49728843569755554, "learning_rate": 0.00030507426057565574, "loss": 1.7635, "step": 44600 }, { "epoch": 1.48, "grad_norm": 0.5255182385444641, "learning_rate": 0.00030506380995131864, "loss": 1.6907, "step": 44601 }, { "epoch": 1.48, "grad_norm": 0.5133616924285889, "learning_rate": 0.00030505335932083487, "loss": 1.7855, "step": 44602 }, { "epoch": 1.48, "grad_norm": 0.5250342488288879, "learning_rate": 0.00030504290868421695, "loss": 1.7563, "step": 44603 }, { "epoch": 1.48, "grad_norm": 0.5031105875968933, "learning_rate": 0.0003050324580414778, "loss": 1.7842, "step": 44604 }, { "epoch": 1.48, "grad_norm": 0.5181479454040527, "learning_rate": 0.0003050220073926299, "loss": 1.7908, "step": 44605 }, { "epoch": 1.48, "grad_norm": 0.5176004767417908, "learning_rate": 0.00030501155673768617, "loss": 1.7957, "step": 44606 }, { "epoch": 1.48, "grad_norm": 0.5203214883804321, "learning_rate": 0.000305001106076659, "loss": 1.8062, "step": 44607 }, { "epoch": 1.48, "grad_norm": 0.5308215618133545, "learning_rate": 0.0003049906554095613, "loss": 1.8467, "step": 44608 }, { "epoch": 1.48, "grad_norm": 0.5198022723197937, "learning_rate": 0.00030498020473640567, "loss": 1.845, "step": 44609 }, { "epoch": 1.48, "grad_norm": 0.5150301456451416, "learning_rate": 0.00030496975405720474, "loss": 1.8041, "step": 44610 }, { "epoch": 1.48, "grad_norm": 0.4947551488876343, "learning_rate": 0.00030495930337197136, "loss": 1.7484, "step": 44611 }, { "epoch": 1.48, "grad_norm": 0.49288904666900635, "learning_rate": 0.0003049488526807181, "loss": 1.7976, "step": 44612 }, { "epoch": 1.48, "grad_norm": 0.4994792640209198, "learning_rate": 0.00030493840198345763, "loss": 1.8363, "step": 44613 }, { "epoch": 1.48, "grad_norm": 0.5034509301185608, "learning_rate": 0.00030492795128020264, "loss": 1.6998, "step": 44614 }, { "epoch": 1.48, "grad_norm": 0.506790816783905, "learning_rate": 0.0003049175005709659, "loss": 1.8451, "step": 44615 }, { "epoch": 1.48, "grad_norm": 0.5093874931335449, "learning_rate": 0.00030490704985576, "loss": 1.8339, "step": 44616 }, { "epoch": 1.48, "grad_norm": 0.49886396527290344, "learning_rate": 0.00030489659913459765, "loss": 1.7575, "step": 44617 }, { "epoch": 1.48, "grad_norm": 0.5086503624916077, "learning_rate": 0.00030488614840749157, "loss": 1.7436, "step": 44618 }, { "epoch": 1.48, "grad_norm": 0.5014813542366028, "learning_rate": 0.0003048756976744545, "loss": 1.7085, "step": 44619 }, { "epoch": 1.48, "grad_norm": 0.5107503533363342, "learning_rate": 0.00030486524693549893, "loss": 1.7396, "step": 44620 }, { "epoch": 1.48, "grad_norm": 0.512459397315979, "learning_rate": 0.00030485479619063774, "loss": 1.7325, "step": 44621 }, { "epoch": 1.48, "grad_norm": 0.5117590427398682, "learning_rate": 0.0003048443454398835, "loss": 1.7574, "step": 44622 }, { "epoch": 1.48, "grad_norm": 0.5119346976280212, "learning_rate": 0.00030483389468324894, "loss": 1.754, "step": 44623 }, { "epoch": 1.48, "grad_norm": 0.5176741480827332, "learning_rate": 0.0003048234439207468, "loss": 1.7885, "step": 44624 }, { "epoch": 1.48, "grad_norm": 0.5105715394020081, "learning_rate": 0.00030481299315238965, "loss": 1.8346, "step": 44625 }, { "epoch": 1.48, "grad_norm": 0.5060631036758423, "learning_rate": 0.0003048025423781903, "loss": 1.8015, "step": 44626 }, { "epoch": 1.48, "grad_norm": 0.5214635133743286, "learning_rate": 0.0003047920915981613, "loss": 1.73, "step": 44627 }, { "epoch": 1.48, "grad_norm": 0.514170229434967, "learning_rate": 0.00030478164081231544, "loss": 1.713, "step": 44628 }, { "epoch": 1.48, "grad_norm": 0.5287556052207947, "learning_rate": 0.0003047711900206654, "loss": 1.7872, "step": 44629 }, { "epoch": 1.48, "grad_norm": 0.5090603232383728, "learning_rate": 0.00030476073922322376, "loss": 1.7911, "step": 44630 }, { "epoch": 1.48, "grad_norm": 0.5290801525115967, "learning_rate": 0.0003047502884200034, "loss": 1.8005, "step": 44631 }, { "epoch": 1.48, "grad_norm": 0.5234568119049072, "learning_rate": 0.0003047398376110168, "loss": 1.7394, "step": 44632 }, { "epoch": 1.48, "grad_norm": 0.5159299969673157, "learning_rate": 0.00030472938679627676, "loss": 1.7679, "step": 44633 }, { "epoch": 1.48, "grad_norm": 0.5071302652359009, "learning_rate": 0.0003047189359757959, "loss": 1.8143, "step": 44634 }, { "epoch": 1.49, "grad_norm": 0.5034487843513489, "learning_rate": 0.000304708485149587, "loss": 1.7385, "step": 44635 }, { "epoch": 1.49, "grad_norm": 0.534911572933197, "learning_rate": 0.0003046980343176627, "loss": 1.8116, "step": 44636 }, { "epoch": 1.49, "grad_norm": 0.5362935662269592, "learning_rate": 0.0003046875834800357, "loss": 1.807, "step": 44637 }, { "epoch": 1.49, "grad_norm": 0.51767498254776, "learning_rate": 0.00030467713263671855, "loss": 1.836, "step": 44638 }, { "epoch": 1.49, "grad_norm": 0.5094479322433472, "learning_rate": 0.0003046666817877242, "loss": 1.7508, "step": 44639 }, { "epoch": 1.49, "grad_norm": 0.5060248970985413, "learning_rate": 0.00030465623093306503, "loss": 1.7313, "step": 44640 }, { "epoch": 1.49, "grad_norm": 0.5262741446495056, "learning_rate": 0.00030464578007275404, "loss": 1.7133, "step": 44641 }, { "epoch": 1.49, "grad_norm": 0.5124568939208984, "learning_rate": 0.00030463532920680373, "loss": 1.686, "step": 44642 }, { "epoch": 1.49, "grad_norm": 0.5106118321418762, "learning_rate": 0.00030462487833522663, "loss": 1.6995, "step": 44643 }, { "epoch": 1.49, "grad_norm": 0.5067437887191772, "learning_rate": 0.0003046144274580358, "loss": 1.7415, "step": 44644 }, { "epoch": 1.49, "grad_norm": 0.5353919863700867, "learning_rate": 0.0003046039765752437, "loss": 1.8002, "step": 44645 }, { "epoch": 1.49, "grad_norm": 0.5448165535926819, "learning_rate": 0.000304593525686863, "loss": 1.7525, "step": 44646 }, { "epoch": 1.49, "grad_norm": 0.5186073780059814, "learning_rate": 0.0003045830747929065, "loss": 1.8464, "step": 44647 }, { "epoch": 1.49, "grad_norm": 0.508934736251831, "learning_rate": 0.0003045726238933867, "loss": 1.7648, "step": 44648 }, { "epoch": 1.49, "grad_norm": 0.5154937505722046, "learning_rate": 0.00030456217298831657, "loss": 1.747, "step": 44649 }, { "epoch": 1.49, "grad_norm": 0.5349087715148926, "learning_rate": 0.0003045517220777085, "loss": 1.7785, "step": 44650 }, { "epoch": 1.49, "grad_norm": 0.5007306337356567, "learning_rate": 0.0003045412711615754, "loss": 1.6828, "step": 44651 }, { "epoch": 1.49, "grad_norm": 0.5263659954071045, "learning_rate": 0.00030453082023992986, "loss": 1.7845, "step": 44652 }, { "epoch": 1.49, "grad_norm": 0.5262333154678345, "learning_rate": 0.0003045203693127845, "loss": 1.7428, "step": 44653 }, { "epoch": 1.49, "grad_norm": 0.5186922550201416, "learning_rate": 0.0003045099183801522, "loss": 1.786, "step": 44654 }, { "epoch": 1.49, "grad_norm": 0.5082424879074097, "learning_rate": 0.0003044994674420453, "loss": 1.7381, "step": 44655 }, { "epoch": 1.49, "grad_norm": 0.5477016568183899, "learning_rate": 0.000304489016498477, "loss": 1.7134, "step": 44656 }, { "epoch": 1.49, "grad_norm": 0.5127058625221252, "learning_rate": 0.00030447856554945954, "loss": 1.7084, "step": 44657 }, { "epoch": 1.49, "grad_norm": 0.525032103061676, "learning_rate": 0.00030446811459500575, "loss": 1.7883, "step": 44658 }, { "epoch": 1.49, "grad_norm": 0.5081242918968201, "learning_rate": 0.0003044576636351283, "loss": 1.7197, "step": 44659 }, { "epoch": 1.49, "grad_norm": 0.5486869812011719, "learning_rate": 0.00030444721266984, "loss": 1.8334, "step": 44660 }, { "epoch": 1.49, "grad_norm": 0.5177266597747803, "learning_rate": 0.0003044367616991534, "loss": 1.7732, "step": 44661 }, { "epoch": 1.49, "grad_norm": 0.520762026309967, "learning_rate": 0.0003044263107230813, "loss": 1.8151, "step": 44662 }, { "epoch": 1.49, "grad_norm": 0.5046457052230835, "learning_rate": 0.0003044158597416361, "loss": 1.7358, "step": 44663 }, { "epoch": 1.49, "grad_norm": 0.5106070041656494, "learning_rate": 0.0003044054087548308, "loss": 1.7482, "step": 44664 }, { "epoch": 1.49, "grad_norm": 0.4985140264034271, "learning_rate": 0.00030439495776267807, "loss": 1.729, "step": 44665 }, { "epoch": 1.49, "grad_norm": 0.5166866183280945, "learning_rate": 0.0003043845067651904, "loss": 1.842, "step": 44666 }, { "epoch": 1.49, "grad_norm": 0.5132431387901306, "learning_rate": 0.0003043740557623807, "loss": 1.7473, "step": 44667 }, { "epoch": 1.49, "grad_norm": 0.5031664967536926, "learning_rate": 0.00030436360475426147, "loss": 1.813, "step": 44668 }, { "epoch": 1.49, "grad_norm": 0.528099775314331, "learning_rate": 0.0003043531537408455, "loss": 1.8288, "step": 44669 }, { "epoch": 1.49, "grad_norm": 0.5302603840827942, "learning_rate": 0.0003043427027221454, "loss": 1.7231, "step": 44670 }, { "epoch": 1.49, "grad_norm": 0.5236903429031372, "learning_rate": 0.0003043322516981739, "loss": 1.8086, "step": 44671 }, { "epoch": 1.49, "grad_norm": 0.5460494756698608, "learning_rate": 0.0003043218006689437, "loss": 1.8162, "step": 44672 }, { "epoch": 1.49, "grad_norm": 0.5141677260398865, "learning_rate": 0.0003043113496344674, "loss": 1.7024, "step": 44673 }, { "epoch": 1.49, "grad_norm": 0.502500593662262, "learning_rate": 0.0003043008985947579, "loss": 1.7252, "step": 44674 }, { "epoch": 1.49, "grad_norm": 0.5298142433166504, "learning_rate": 0.00030429044754982763, "loss": 1.7636, "step": 44675 }, { "epoch": 1.49, "grad_norm": 0.5070222020149231, "learning_rate": 0.0003042799964996895, "loss": 1.6963, "step": 44676 }, { "epoch": 1.49, "grad_norm": 0.5101954340934753, "learning_rate": 0.00030426954544435594, "loss": 1.6977, "step": 44677 }, { "epoch": 1.49, "grad_norm": 0.5179724097251892, "learning_rate": 0.00030425909438383986, "loss": 1.6707, "step": 44678 }, { "epoch": 1.49, "grad_norm": 0.5803495049476624, "learning_rate": 0.0003042486433181539, "loss": 1.7037, "step": 44679 }, { "epoch": 1.49, "grad_norm": 0.531043291091919, "learning_rate": 0.00030423819224731054, "loss": 1.7325, "step": 44680 }, { "epoch": 1.49, "grad_norm": 0.5160199999809265, "learning_rate": 0.0003042277411713229, "loss": 1.7618, "step": 44681 }, { "epoch": 1.49, "grad_norm": 0.5091274976730347, "learning_rate": 0.00030421729009020323, "loss": 1.8067, "step": 44682 }, { "epoch": 1.49, "grad_norm": 0.5168991684913635, "learning_rate": 0.00030420683900396446, "loss": 1.8072, "step": 44683 }, { "epoch": 1.49, "grad_norm": 0.5385899543762207, "learning_rate": 0.0003041963879126192, "loss": 1.7763, "step": 44684 }, { "epoch": 1.49, "grad_norm": 0.5404226779937744, "learning_rate": 0.00030418593681618, "loss": 1.7097, "step": 44685 }, { "epoch": 1.49, "grad_norm": 0.5095325112342834, "learning_rate": 0.00030417548571465996, "loss": 1.8065, "step": 44686 }, { "epoch": 1.49, "grad_norm": 0.5147686004638672, "learning_rate": 0.00030416503460807136, "loss": 1.6844, "step": 44687 }, { "epoch": 1.49, "grad_norm": 0.5395902991294861, "learning_rate": 0.00030415458349642696, "loss": 1.7706, "step": 44688 }, { "epoch": 1.49, "grad_norm": 0.5302700996398926, "learning_rate": 0.00030414413237973956, "loss": 1.7276, "step": 44689 }, { "epoch": 1.49, "grad_norm": 0.5352024435997009, "learning_rate": 0.0003041336812580218, "loss": 1.7212, "step": 44690 }, { "epoch": 1.49, "grad_norm": 0.5060694217681885, "learning_rate": 0.0003041232301312863, "loss": 1.7781, "step": 44691 }, { "epoch": 1.49, "grad_norm": 0.5157737731933594, "learning_rate": 0.00030411277899954596, "loss": 1.7691, "step": 44692 }, { "epoch": 1.49, "grad_norm": 0.5502369403839111, "learning_rate": 0.0003041023278628132, "loss": 1.7668, "step": 44693 }, { "epoch": 1.49, "grad_norm": 0.5431373715400696, "learning_rate": 0.00030409187672110087, "loss": 1.7088, "step": 44694 }, { "epoch": 1.49, "grad_norm": 0.5043312907218933, "learning_rate": 0.0003040814255744215, "loss": 1.754, "step": 44695 }, { "epoch": 1.49, "grad_norm": 0.5100184679031372, "learning_rate": 0.00030407097442278794, "loss": 1.8088, "step": 44696 }, { "epoch": 1.49, "grad_norm": 0.5124149322509766, "learning_rate": 0.0003040605232662129, "loss": 1.7145, "step": 44697 }, { "epoch": 1.49, "grad_norm": 0.5411567091941833, "learning_rate": 0.0003040500721047088, "loss": 1.7891, "step": 44698 }, { "epoch": 1.49, "grad_norm": 0.49534252285957336, "learning_rate": 0.0003040396209382887, "loss": 1.7655, "step": 44699 }, { "epoch": 1.49, "grad_norm": 0.5192071199417114, "learning_rate": 0.000304029169766965, "loss": 1.7728, "step": 44700 }, { "epoch": 1.49, "grad_norm": 0.5214203596115112, "learning_rate": 0.00030401871859075046, "loss": 1.7287, "step": 44701 }, { "epoch": 1.49, "grad_norm": 0.553261935710907, "learning_rate": 0.0003040082674096579, "loss": 1.7789, "step": 44702 }, { "epoch": 1.49, "grad_norm": 0.5218883752822876, "learning_rate": 0.00030399781622369976, "loss": 1.7947, "step": 44703 }, { "epoch": 1.49, "grad_norm": 0.5166775584220886, "learning_rate": 0.000303987365032889, "loss": 1.6648, "step": 44704 }, { "epoch": 1.49, "grad_norm": 0.5073607563972473, "learning_rate": 0.00030397691383723794, "loss": 1.8321, "step": 44705 }, { "epoch": 1.49, "grad_norm": 0.5364401340484619, "learning_rate": 0.00030396646263675965, "loss": 1.7364, "step": 44706 }, { "epoch": 1.49, "grad_norm": 0.5322061777114868, "learning_rate": 0.0003039560114314667, "loss": 1.8107, "step": 44707 }, { "epoch": 1.49, "grad_norm": 0.5106251835823059, "learning_rate": 0.00030394556022137164, "loss": 1.7678, "step": 44708 }, { "epoch": 1.49, "grad_norm": 0.5090047717094421, "learning_rate": 0.00030393510900648735, "loss": 1.7489, "step": 44709 }, { "epoch": 1.49, "grad_norm": 0.5158531069755554, "learning_rate": 0.0003039246577868263, "loss": 1.768, "step": 44710 }, { "epoch": 1.49, "grad_norm": 0.5307890176773071, "learning_rate": 0.0003039142065624014, "loss": 1.8583, "step": 44711 }, { "epoch": 1.49, "grad_norm": 0.5442312359809875, "learning_rate": 0.0003039037553332252, "loss": 1.7582, "step": 44712 }, { "epoch": 1.49, "grad_norm": 0.5282179117202759, "learning_rate": 0.00030389330409931036, "loss": 1.7598, "step": 44713 }, { "epoch": 1.49, "grad_norm": 0.5180573463439941, "learning_rate": 0.0003038828528606696, "loss": 1.7472, "step": 44714 }, { "epoch": 1.49, "grad_norm": 0.5279759764671326, "learning_rate": 0.0003038724016173157, "loss": 1.7086, "step": 44715 }, { "epoch": 1.49, "grad_norm": 0.5293526649475098, "learning_rate": 0.00030386195036926137, "loss": 1.6843, "step": 44716 }, { "epoch": 1.49, "grad_norm": 0.5162387490272522, "learning_rate": 0.00030385149911651906, "loss": 1.6928, "step": 44717 }, { "epoch": 1.49, "grad_norm": 0.5031624436378479, "learning_rate": 0.0003038410478591016, "loss": 1.7174, "step": 44718 }, { "epoch": 1.49, "grad_norm": 0.5267462134361267, "learning_rate": 0.0003038305965970217, "loss": 1.737, "step": 44719 }, { "epoch": 1.49, "grad_norm": 0.5027027726173401, "learning_rate": 0.000303820145330292, "loss": 1.7404, "step": 44720 }, { "epoch": 1.49, "grad_norm": 0.52281254529953, "learning_rate": 0.0003038096940589253, "loss": 1.755, "step": 44721 }, { "epoch": 1.49, "grad_norm": 0.5289242267608643, "learning_rate": 0.0003037992427829341, "loss": 1.8075, "step": 44722 }, { "epoch": 1.49, "grad_norm": 0.5177505016326904, "learning_rate": 0.0003037887915023312, "loss": 1.6644, "step": 44723 }, { "epoch": 1.49, "grad_norm": 0.5145226120948792, "learning_rate": 0.00030377834021712925, "loss": 1.7742, "step": 44724 }, { "epoch": 1.49, "grad_norm": 0.5142955183982849, "learning_rate": 0.000303767888927341, "loss": 1.7645, "step": 44725 }, { "epoch": 1.49, "grad_norm": 0.5047150254249573, "learning_rate": 0.00030375743763297905, "loss": 1.6629, "step": 44726 }, { "epoch": 1.49, "grad_norm": 0.5115493535995483, "learning_rate": 0.00030374698633405607, "loss": 1.7774, "step": 44727 }, { "epoch": 1.49, "grad_norm": 0.5076953172683716, "learning_rate": 0.0003037365350305849, "loss": 1.7925, "step": 44728 }, { "epoch": 1.49, "grad_norm": 0.5167543888092041, "learning_rate": 0.0003037260837225781, "loss": 1.8021, "step": 44729 }, { "epoch": 1.49, "grad_norm": 0.5032728910446167, "learning_rate": 0.0003037156324100484, "loss": 1.7654, "step": 44730 }, { "epoch": 1.49, "grad_norm": 0.522140383720398, "learning_rate": 0.00030370518109300846, "loss": 1.7574, "step": 44731 }, { "epoch": 1.49, "grad_norm": 0.5218520760536194, "learning_rate": 0.000303694729771471, "loss": 1.833, "step": 44732 }, { "epoch": 1.49, "grad_norm": 0.5459312200546265, "learning_rate": 0.0003036842784454486, "loss": 1.8059, "step": 44733 }, { "epoch": 1.49, "grad_norm": 0.5060873031616211, "learning_rate": 0.00030367382711495413, "loss": 1.8242, "step": 44734 }, { "epoch": 1.49, "grad_norm": 0.5111281871795654, "learning_rate": 0.00030366337578, "loss": 1.8035, "step": 44735 }, { "epoch": 1.49, "grad_norm": 0.5336549282073975, "learning_rate": 0.0003036529244405993, "loss": 1.7482, "step": 44736 }, { "epoch": 1.49, "grad_norm": 0.5193671584129333, "learning_rate": 0.00030364247309676436, "loss": 1.7161, "step": 44737 }, { "epoch": 1.49, "grad_norm": 0.500139594078064, "learning_rate": 0.00030363202174850797, "loss": 1.6325, "step": 44738 }, { "epoch": 1.49, "grad_norm": 0.5196059346199036, "learning_rate": 0.0003036215703958429, "loss": 1.7266, "step": 44739 }, { "epoch": 1.49, "grad_norm": 0.5426000952720642, "learning_rate": 0.0003036111190387817, "loss": 1.6891, "step": 44740 }, { "epoch": 1.49, "grad_norm": 0.5253846049308777, "learning_rate": 0.00030360066767733725, "loss": 1.845, "step": 44741 }, { "epoch": 1.49, "grad_norm": 0.5188559889793396, "learning_rate": 0.000303590216311522, "loss": 1.7389, "step": 44742 }, { "epoch": 1.49, "grad_norm": 0.5265091061592102, "learning_rate": 0.0003035797649413489, "loss": 1.7683, "step": 44743 }, { "epoch": 1.49, "grad_norm": 0.5223847031593323, "learning_rate": 0.0003035693135668304, "loss": 1.7033, "step": 44744 }, { "epoch": 1.49, "grad_norm": 0.5118087530136108, "learning_rate": 0.00030355886218797927, "loss": 1.8027, "step": 44745 }, { "epoch": 1.49, "grad_norm": 0.5177178382873535, "learning_rate": 0.0003035484108048082, "loss": 1.7488, "step": 44746 }, { "epoch": 1.49, "grad_norm": 0.5252507925033569, "learning_rate": 0.00030353795941732997, "loss": 1.7637, "step": 44747 }, { "epoch": 1.49, "grad_norm": 0.49799060821533203, "learning_rate": 0.0003035275080255571, "loss": 1.8188, "step": 44748 }, { "epoch": 1.49, "grad_norm": 0.5079086422920227, "learning_rate": 0.0003035170566295024, "loss": 1.6934, "step": 44749 }, { "epoch": 1.49, "grad_norm": 0.5233848094940186, "learning_rate": 0.00030350660522917845, "loss": 1.7593, "step": 44750 }, { "epoch": 1.49, "grad_norm": 0.5146563053131104, "learning_rate": 0.000303496153824598, "loss": 1.6713, "step": 44751 }, { "epoch": 1.49, "grad_norm": 0.5270882248878479, "learning_rate": 0.00030348570241577375, "loss": 1.7671, "step": 44752 }, { "epoch": 1.49, "grad_norm": 0.5168915390968323, "learning_rate": 0.00030347525100271844, "loss": 1.7041, "step": 44753 }, { "epoch": 1.49, "grad_norm": 0.5404106378555298, "learning_rate": 0.00030346479958544466, "loss": 1.712, "step": 44754 }, { "epoch": 1.49, "grad_norm": 0.5201829075813293, "learning_rate": 0.0003034543481639651, "loss": 1.6904, "step": 44755 }, { "epoch": 1.49, "grad_norm": 0.5133797526359558, "learning_rate": 0.00030344389673829243, "loss": 1.7364, "step": 44756 }, { "epoch": 1.49, "grad_norm": 0.5192979574203491, "learning_rate": 0.0003034334453084394, "loss": 1.7539, "step": 44757 }, { "epoch": 1.49, "grad_norm": 0.5450441241264343, "learning_rate": 0.0003034229938744187, "loss": 1.7836, "step": 44758 }, { "epoch": 1.49, "grad_norm": 0.5253880620002747, "learning_rate": 0.0003034125424362431, "loss": 1.7985, "step": 44759 }, { "epoch": 1.49, "grad_norm": 0.5089449286460876, "learning_rate": 0.00030340209099392494, "loss": 1.7724, "step": 44760 }, { "epoch": 1.49, "grad_norm": 0.5141690969467163, "learning_rate": 0.00030339163954747734, "loss": 1.8011, "step": 44761 }, { "epoch": 1.49, "grad_norm": 0.5420471429824829, "learning_rate": 0.00030338118809691264, "loss": 1.8048, "step": 44762 }, { "epoch": 1.49, "grad_norm": 0.5199241042137146, "learning_rate": 0.00030337073664224375, "loss": 1.7863, "step": 44763 }, { "epoch": 1.49, "grad_norm": 0.5198060274124146, "learning_rate": 0.0003033602851834833, "loss": 1.802, "step": 44764 }, { "epoch": 1.49, "grad_norm": 0.509106457233429, "learning_rate": 0.00030334983372064386, "loss": 1.7635, "step": 44765 }, { "epoch": 1.49, "grad_norm": 0.513432502746582, "learning_rate": 0.0003033393822537384, "loss": 1.7232, "step": 44766 }, { "epoch": 1.49, "grad_norm": 0.5303819179534912, "learning_rate": 0.0003033289307827792, "loss": 1.7624, "step": 44767 }, { "epoch": 1.49, "grad_norm": 0.5068036913871765, "learning_rate": 0.00030331847930777935, "loss": 1.7537, "step": 44768 }, { "epoch": 1.49, "grad_norm": 0.5257992744445801, "learning_rate": 0.00030330802782875126, "loss": 1.7641, "step": 44769 }, { "epoch": 1.49, "grad_norm": 0.5271992683410645, "learning_rate": 0.00030329757634570777, "loss": 1.7865, "step": 44770 }, { "epoch": 1.49, "grad_norm": 0.517659068107605, "learning_rate": 0.00030328712485866145, "loss": 1.7414, "step": 44771 }, { "epoch": 1.49, "grad_norm": 0.5179634690284729, "learning_rate": 0.00030327667336762506, "loss": 1.8183, "step": 44772 }, { "epoch": 1.49, "grad_norm": 0.5237869620323181, "learning_rate": 0.00030326622187261133, "loss": 1.809, "step": 44773 }, { "epoch": 1.49, "grad_norm": 0.5057398080825806, "learning_rate": 0.0003032557703736328, "loss": 1.7525, "step": 44774 }, { "epoch": 1.49, "grad_norm": 0.5269291400909424, "learning_rate": 0.0003032453188707023, "loss": 1.7648, "step": 44775 }, { "epoch": 1.49, "grad_norm": 0.5027803182601929, "learning_rate": 0.0003032348673638324, "loss": 1.7446, "step": 44776 }, { "epoch": 1.49, "grad_norm": 0.5252718329429626, "learning_rate": 0.0003032244158530359, "loss": 1.765, "step": 44777 }, { "epoch": 1.49, "grad_norm": 0.5456645488739014, "learning_rate": 0.0003032139643383255, "loss": 1.8117, "step": 44778 }, { "epoch": 1.49, "grad_norm": 0.5234231352806091, "learning_rate": 0.0003032035128197137, "loss": 1.8238, "step": 44779 }, { "epoch": 1.49, "grad_norm": 0.5070412158966064, "learning_rate": 0.00030319306129721334, "loss": 1.7463, "step": 44780 }, { "epoch": 1.49, "grad_norm": 0.5017767548561096, "learning_rate": 0.0003031826097708371, "loss": 1.7466, "step": 44781 }, { "epoch": 1.49, "grad_norm": 0.5258638858795166, "learning_rate": 0.0003031721582405976, "loss": 1.7486, "step": 44782 }, { "epoch": 1.49, "grad_norm": 0.5186973214149475, "learning_rate": 0.00030316170670650764, "loss": 1.7124, "step": 44783 }, { "epoch": 1.49, "grad_norm": 0.5320999026298523, "learning_rate": 0.0003031512551685798, "loss": 1.7313, "step": 44784 }, { "epoch": 1.49, "grad_norm": 0.5247443318367004, "learning_rate": 0.0003031408036268268, "loss": 1.785, "step": 44785 }, { "epoch": 1.49, "grad_norm": 0.49781960248947144, "learning_rate": 0.0003031303520812613, "loss": 1.7587, "step": 44786 }, { "epoch": 1.49, "grad_norm": 0.5184985399246216, "learning_rate": 0.000303119900531896, "loss": 1.7725, "step": 44787 }, { "epoch": 1.49, "grad_norm": 0.5107385516166687, "learning_rate": 0.0003031094489787437, "loss": 1.8135, "step": 44788 }, { "epoch": 1.49, "grad_norm": 0.510224461555481, "learning_rate": 0.00030309899742181687, "loss": 1.679, "step": 44789 }, { "epoch": 1.49, "grad_norm": 0.5147784948348999, "learning_rate": 0.0003030885458611283, "loss": 1.7268, "step": 44790 }, { "epoch": 1.49, "grad_norm": 0.5184292793273926, "learning_rate": 0.00030307809429669087, "loss": 1.8419, "step": 44791 }, { "epoch": 1.49, "grad_norm": 0.5202292203903198, "learning_rate": 0.00030306764272851696, "loss": 1.789, "step": 44792 }, { "epoch": 1.49, "grad_norm": 0.527571439743042, "learning_rate": 0.00030305719115661936, "loss": 1.7413, "step": 44793 }, { "epoch": 1.49, "grad_norm": 0.5299469828605652, "learning_rate": 0.00030304673958101087, "loss": 1.802, "step": 44794 }, { "epoch": 1.49, "grad_norm": 0.515110194683075, "learning_rate": 0.000303036288001704, "loss": 1.8176, "step": 44795 }, { "epoch": 1.49, "grad_norm": 0.529273509979248, "learning_rate": 0.0003030258364187116, "loss": 1.7497, "step": 44796 }, { "epoch": 1.49, "grad_norm": 0.5203680396080017, "learning_rate": 0.0003030153848320461, "loss": 1.9033, "step": 44797 }, { "epoch": 1.49, "grad_norm": 0.5200109481811523, "learning_rate": 0.00030300493324172056, "loss": 1.8009, "step": 44798 }, { "epoch": 1.49, "grad_norm": 0.5149965286254883, "learning_rate": 0.0003029944816477474, "loss": 1.8355, "step": 44799 }, { "epoch": 1.49, "grad_norm": 0.5193967223167419, "learning_rate": 0.00030298403005013947, "loss": 1.8618, "step": 44800 }, { "epoch": 1.49, "grad_norm": 0.5028014183044434, "learning_rate": 0.0003029735784489092, "loss": 1.7872, "step": 44801 }, { "epoch": 1.49, "grad_norm": 0.5194003582000732, "learning_rate": 0.0003029631268440696, "loss": 1.7452, "step": 44802 }, { "epoch": 1.49, "grad_norm": 0.5029767751693726, "learning_rate": 0.00030295267523563314, "loss": 1.7895, "step": 44803 }, { "epoch": 1.49, "grad_norm": 0.5025215744972229, "learning_rate": 0.0003029422236236126, "loss": 1.8113, "step": 44804 }, { "epoch": 1.49, "grad_norm": 0.5188581943511963, "learning_rate": 0.00030293177200802054, "loss": 1.8021, "step": 44805 }, { "epoch": 1.49, "grad_norm": 0.5025053024291992, "learning_rate": 0.00030292132038886984, "loss": 1.7208, "step": 44806 }, { "epoch": 1.49, "grad_norm": 0.5139310956001282, "learning_rate": 0.000302910868766173, "loss": 1.7291, "step": 44807 }, { "epoch": 1.49, "grad_norm": 0.525222897529602, "learning_rate": 0.00030290041713994286, "loss": 1.8196, "step": 44808 }, { "epoch": 1.49, "grad_norm": 0.520261824131012, "learning_rate": 0.0003028899655101921, "loss": 1.7549, "step": 44809 }, { "epoch": 1.49, "grad_norm": 0.49539315700531006, "learning_rate": 0.00030287951387693323, "loss": 1.7822, "step": 44810 }, { "epoch": 1.49, "grad_norm": 0.7548912763595581, "learning_rate": 0.00030286906224017915, "loss": 1.7544, "step": 44811 }, { "epoch": 1.49, "grad_norm": 0.5077111124992371, "learning_rate": 0.00030285861059994233, "loss": 1.7717, "step": 44812 }, { "epoch": 1.49, "grad_norm": 0.5389895439147949, "learning_rate": 0.00030284815895623564, "loss": 1.7179, "step": 44813 }, { "epoch": 1.49, "grad_norm": 0.523648202419281, "learning_rate": 0.00030283770730907175, "loss": 1.8491, "step": 44814 }, { "epoch": 1.49, "grad_norm": 0.523087203502655, "learning_rate": 0.0003028272556584632, "loss": 1.7505, "step": 44815 }, { "epoch": 1.49, "grad_norm": 0.5573486089706421, "learning_rate": 0.00030281680400442285, "loss": 1.7184, "step": 44816 }, { "epoch": 1.49, "grad_norm": 0.5212423801422119, "learning_rate": 0.0003028063523469633, "loss": 1.763, "step": 44817 }, { "epoch": 1.49, "grad_norm": 0.5125710368156433, "learning_rate": 0.0003027959006860973, "loss": 1.7478, "step": 44818 }, { "epoch": 1.49, "grad_norm": 0.5034970045089722, "learning_rate": 0.0003027854490218375, "loss": 1.698, "step": 44819 }, { "epoch": 1.49, "grad_norm": 0.5164342522621155, "learning_rate": 0.0003027749973541965, "loss": 1.6354, "step": 44820 }, { "epoch": 1.49, "grad_norm": 0.5247734189033508, "learning_rate": 0.0003027645456831871, "loss": 1.8573, "step": 44821 }, { "epoch": 1.49, "grad_norm": 0.5104725956916809, "learning_rate": 0.0003027540940088218, "loss": 1.7284, "step": 44822 }, { "epoch": 1.49, "grad_norm": 0.5310695171356201, "learning_rate": 0.00030274364233111366, "loss": 1.7931, "step": 44823 }, { "epoch": 1.49, "grad_norm": 0.5273101329803467, "learning_rate": 0.000302733190650075, "loss": 1.7359, "step": 44824 }, { "epoch": 1.49, "grad_norm": 0.5279890894889832, "learning_rate": 0.0003027227389657187, "loss": 1.7332, "step": 44825 }, { "epoch": 1.49, "grad_norm": 0.550550103187561, "learning_rate": 0.0003027122872780575, "loss": 1.8029, "step": 44826 }, { "epoch": 1.49, "grad_norm": 0.5174806118011475, "learning_rate": 0.0003027018355871038, "loss": 1.7828, "step": 44827 }, { "epoch": 1.49, "grad_norm": 0.5265553593635559, "learning_rate": 0.00030269138389287057, "loss": 1.7184, "step": 44828 }, { "epoch": 1.49, "grad_norm": 0.5172944068908691, "learning_rate": 0.00030268093219537037, "loss": 1.7936, "step": 44829 }, { "epoch": 1.49, "grad_norm": 0.5259758830070496, "learning_rate": 0.0003026704804946159, "loss": 1.7404, "step": 44830 }, { "epoch": 1.49, "grad_norm": 0.5281226634979248, "learning_rate": 0.0003026600287906199, "loss": 1.806, "step": 44831 }, { "epoch": 1.49, "grad_norm": 0.5180208086967468, "learning_rate": 0.00030264957708339495, "loss": 1.7942, "step": 44832 }, { "epoch": 1.49, "grad_norm": 0.5274778604507446, "learning_rate": 0.0003026391253729539, "loss": 1.7873, "step": 44833 }, { "epoch": 1.49, "grad_norm": 0.5101940035820007, "learning_rate": 0.00030262867365930926, "loss": 1.7368, "step": 44834 }, { "epoch": 1.49, "grad_norm": 0.5017687678337097, "learning_rate": 0.0003026182219424739, "loss": 1.8038, "step": 44835 }, { "epoch": 1.49, "grad_norm": 0.5286121368408203, "learning_rate": 0.0003026077702224603, "loss": 1.7597, "step": 44836 }, { "epoch": 1.49, "grad_norm": 0.5000315308570862, "learning_rate": 0.0003025973184992812, "loss": 1.7317, "step": 44837 }, { "epoch": 1.49, "grad_norm": 0.5259802341461182, "learning_rate": 0.00030258686677294946, "loss": 1.7881, "step": 44838 }, { "epoch": 1.49, "grad_norm": 0.5228344798088074, "learning_rate": 0.00030257641504347765, "loss": 1.797, "step": 44839 }, { "epoch": 1.49, "grad_norm": 0.521169900894165, "learning_rate": 0.0003025659633108784, "loss": 1.7159, "step": 44840 }, { "epoch": 1.49, "grad_norm": 0.523637592792511, "learning_rate": 0.0003025555115751644, "loss": 1.7244, "step": 44841 }, { "epoch": 1.49, "grad_norm": 0.5266410112380981, "learning_rate": 0.0003025450598363485, "loss": 1.8113, "step": 44842 }, { "epoch": 1.49, "grad_norm": 0.5238425731658936, "learning_rate": 0.0003025346080944432, "loss": 1.8532, "step": 44843 }, { "epoch": 1.49, "grad_norm": 0.5179865956306458, "learning_rate": 0.00030252415634946124, "loss": 1.7152, "step": 44844 }, { "epoch": 1.49, "grad_norm": 0.5128725171089172, "learning_rate": 0.0003025137046014153, "loss": 1.765, "step": 44845 }, { "epoch": 1.49, "grad_norm": 0.5358450412750244, "learning_rate": 0.0003025032528503183, "loss": 1.7922, "step": 44846 }, { "epoch": 1.49, "grad_norm": 0.5312369465827942, "learning_rate": 0.0003024928010961825, "loss": 1.7814, "step": 44847 }, { "epoch": 1.49, "grad_norm": 0.564975380897522, "learning_rate": 0.0003024823493390209, "loss": 1.8412, "step": 44848 }, { "epoch": 1.49, "grad_norm": 0.5095390677452087, "learning_rate": 0.000302471897578846, "loss": 1.7228, "step": 44849 }, { "epoch": 1.49, "grad_norm": 0.5051916241645813, "learning_rate": 0.0003024614458156707, "loss": 1.7146, "step": 44850 }, { "epoch": 1.49, "grad_norm": 0.4962314963340759, "learning_rate": 0.00030245099404950754, "loss": 1.6794, "step": 44851 }, { "epoch": 1.49, "grad_norm": 0.5178187489509583, "learning_rate": 0.00030244054228036917, "loss": 1.7646, "step": 44852 }, { "epoch": 1.49, "grad_norm": 0.5398436188697815, "learning_rate": 0.00030243009050826843, "loss": 1.7893, "step": 44853 }, { "epoch": 1.49, "grad_norm": 0.5092682242393494, "learning_rate": 0.0003024196387332179, "loss": 1.7653, "step": 44854 }, { "epoch": 1.49, "grad_norm": 0.5291949510574341, "learning_rate": 0.0003024091869552302, "loss": 1.7988, "step": 44855 }, { "epoch": 1.49, "grad_norm": 0.5124241709709167, "learning_rate": 0.0003023987351743182, "loss": 1.7844, "step": 44856 }, { "epoch": 1.49, "grad_norm": 0.5072116255760193, "learning_rate": 0.00030238828339049435, "loss": 1.7821, "step": 44857 }, { "epoch": 1.49, "grad_norm": 0.501060962677002, "learning_rate": 0.0003023778316037717, "loss": 1.7815, "step": 44858 }, { "epoch": 1.49, "grad_norm": 0.5502583384513855, "learning_rate": 0.0003023673798141625, "loss": 1.7566, "step": 44859 }, { "epoch": 1.49, "grad_norm": 0.5344123840332031, "learning_rate": 0.0003023569280216798, "loss": 1.7415, "step": 44860 }, { "epoch": 1.49, "grad_norm": 0.511222243309021, "learning_rate": 0.00030234647622633603, "loss": 1.7061, "step": 44861 }, { "epoch": 1.49, "grad_norm": 0.5134204626083374, "learning_rate": 0.0003023360244281441, "loss": 1.7442, "step": 44862 }, { "epoch": 1.49, "grad_norm": 0.5295450091362, "learning_rate": 0.0003023255726271164, "loss": 1.7824, "step": 44863 }, { "epoch": 1.49, "grad_norm": 0.5519943237304688, "learning_rate": 0.00030231512082326594, "loss": 1.6689, "step": 44864 }, { "epoch": 1.49, "grad_norm": 0.5155925154685974, "learning_rate": 0.0003023046690166053, "loss": 1.7237, "step": 44865 }, { "epoch": 1.49, "grad_norm": 0.5134985446929932, "learning_rate": 0.000302294217207147, "loss": 1.8146, "step": 44866 }, { "epoch": 1.49, "grad_norm": 0.5262688994407654, "learning_rate": 0.00030228376539490396, "loss": 1.7171, "step": 44867 }, { "epoch": 1.49, "grad_norm": 0.5234542489051819, "learning_rate": 0.00030227331357988866, "loss": 1.7187, "step": 44868 }, { "epoch": 1.49, "grad_norm": 0.5396932363510132, "learning_rate": 0.000302262861762114, "loss": 1.7063, "step": 44869 }, { "epoch": 1.49, "grad_norm": 0.5227219462394714, "learning_rate": 0.0003022524099415925, "loss": 1.8114, "step": 44870 }, { "epoch": 1.49, "grad_norm": 0.5106533765792847, "learning_rate": 0.0003022419581183369, "loss": 1.7167, "step": 44871 }, { "epoch": 1.49, "grad_norm": 0.5153908729553223, "learning_rate": 0.00030223150629235995, "loss": 1.7209, "step": 44872 }, { "epoch": 1.49, "grad_norm": 0.5391474366188049, "learning_rate": 0.0003022210544636742, "loss": 1.7827, "step": 44873 }, { "epoch": 1.49, "grad_norm": 0.5237622261047363, "learning_rate": 0.0003022106026322925, "loss": 1.7772, "step": 44874 }, { "epoch": 1.49, "grad_norm": 0.5252152681350708, "learning_rate": 0.00030220015079822745, "loss": 1.7168, "step": 44875 }, { "epoch": 1.49, "grad_norm": 0.5072054862976074, "learning_rate": 0.00030218969896149173, "loss": 1.7676, "step": 44876 }, { "epoch": 1.49, "grad_norm": 0.5005255937576294, "learning_rate": 0.000302179247122098, "loss": 1.7778, "step": 44877 }, { "epoch": 1.49, "grad_norm": 0.5017639994621277, "learning_rate": 0.000302168795280059, "loss": 1.7637, "step": 44878 }, { "epoch": 1.49, "grad_norm": 0.5180903077125549, "learning_rate": 0.0003021583434353874, "loss": 1.6836, "step": 44879 }, { "epoch": 1.49, "grad_norm": 0.5476838946342468, "learning_rate": 0.0003021478915880959, "loss": 1.7478, "step": 44880 }, { "epoch": 1.49, "grad_norm": 0.5085156559944153, "learning_rate": 0.00030213743973819726, "loss": 1.7305, "step": 44881 }, { "epoch": 1.49, "grad_norm": 0.5348869562149048, "learning_rate": 0.0003021269878857039, "loss": 1.7851, "step": 44882 }, { "epoch": 1.49, "grad_norm": 0.5142008662223816, "learning_rate": 0.0003021165360306288, "loss": 1.7643, "step": 44883 }, { "epoch": 1.49, "grad_norm": 0.5108278393745422, "learning_rate": 0.00030210608417298446, "loss": 1.8436, "step": 44884 }, { "epoch": 1.49, "grad_norm": 0.5051479339599609, "learning_rate": 0.0003020956323127838, "loss": 1.8191, "step": 44885 }, { "epoch": 1.49, "grad_norm": 0.49626773595809937, "learning_rate": 0.00030208518045003925, "loss": 1.7734, "step": 44886 }, { "epoch": 1.49, "grad_norm": 0.5095446705818176, "learning_rate": 0.00030207472858476353, "loss": 1.7402, "step": 44887 }, { "epoch": 1.49, "grad_norm": 0.516520619392395, "learning_rate": 0.0003020642767169695, "loss": 1.6966, "step": 44888 }, { "epoch": 1.49, "grad_norm": 0.5314236879348755, "learning_rate": 0.0003020538248466697, "loss": 1.768, "step": 44889 }, { "epoch": 1.49, "grad_norm": 0.5114884972572327, "learning_rate": 0.0003020433729738769, "loss": 1.753, "step": 44890 }, { "epoch": 1.49, "grad_norm": 0.5156261920928955, "learning_rate": 0.0003020329210986037, "loss": 1.7262, "step": 44891 }, { "epoch": 1.49, "grad_norm": 0.5279875993728638, "learning_rate": 0.00030202246922086284, "loss": 1.7443, "step": 44892 }, { "epoch": 1.49, "grad_norm": 0.5377522706985474, "learning_rate": 0.000302012017340667, "loss": 1.8577, "step": 44893 }, { "epoch": 1.49, "grad_norm": 0.5222828984260559, "learning_rate": 0.0003020015654580288, "loss": 1.8065, "step": 44894 }, { "epoch": 1.49, "grad_norm": 0.5221900939941406, "learning_rate": 0.0003019911135729612, "loss": 1.7779, "step": 44895 }, { "epoch": 1.49, "grad_norm": 0.5032980442047119, "learning_rate": 0.0003019806616854765, "loss": 1.6946, "step": 44896 }, { "epoch": 1.49, "grad_norm": 0.5463017821311951, "learning_rate": 0.0003019702097955877, "loss": 1.8942, "step": 44897 }, { "epoch": 1.49, "grad_norm": 0.5125201344490051, "learning_rate": 0.0003019597579033072, "loss": 1.7634, "step": 44898 }, { "epoch": 1.49, "grad_norm": 0.527437150478363, "learning_rate": 0.0003019493060086479, "loss": 1.7683, "step": 44899 }, { "epoch": 1.49, "grad_norm": 0.5364895462989807, "learning_rate": 0.00030193885411162243, "loss": 1.7321, "step": 44900 }, { "epoch": 1.49, "grad_norm": 0.5186185240745544, "learning_rate": 0.00030192840221224357, "loss": 1.808, "step": 44901 }, { "epoch": 1.49, "grad_norm": 0.5235990881919861, "learning_rate": 0.0003019179503105238, "loss": 1.7828, "step": 44902 }, { "epoch": 1.49, "grad_norm": 0.560366153717041, "learning_rate": 0.00030190749840647594, "loss": 1.7532, "step": 44903 }, { "epoch": 1.49, "grad_norm": 0.5086811780929565, "learning_rate": 0.0003018970465001127, "loss": 1.8696, "step": 44904 }, { "epoch": 1.49, "grad_norm": 0.5292573571205139, "learning_rate": 0.0003018865945914467, "loss": 1.7904, "step": 44905 }, { "epoch": 1.49, "grad_norm": 0.5246318578720093, "learning_rate": 0.00030187614268049076, "loss": 1.7472, "step": 44906 }, { "epoch": 1.49, "grad_norm": 0.49748694896698, "learning_rate": 0.00030186569076725726, "loss": 1.716, "step": 44907 }, { "epoch": 1.49, "grad_norm": 0.5524752140045166, "learning_rate": 0.00030185523885175924, "loss": 1.7097, "step": 44908 }, { "epoch": 1.49, "grad_norm": 0.5032753348350525, "learning_rate": 0.00030184478693400917, "loss": 1.7381, "step": 44909 }, { "epoch": 1.49, "grad_norm": 0.4980672001838684, "learning_rate": 0.0003018343350140198, "loss": 1.695, "step": 44910 }, { "epoch": 1.49, "grad_norm": 0.5141316056251526, "learning_rate": 0.0003018238830918038, "loss": 1.7167, "step": 44911 }, { "epoch": 1.49, "grad_norm": 0.5097236633300781, "learning_rate": 0.0003018134311673739, "loss": 1.7446, "step": 44912 }, { "epoch": 1.49, "grad_norm": 0.5146270394325256, "learning_rate": 0.0003018029792407428, "loss": 1.7359, "step": 44913 }, { "epoch": 1.49, "grad_norm": 0.5018794536590576, "learning_rate": 0.0003017925273119231, "loss": 1.8034, "step": 44914 }, { "epoch": 1.49, "grad_norm": 0.518478274345398, "learning_rate": 0.0003017820753809276, "loss": 1.8087, "step": 44915 }, { "epoch": 1.49, "grad_norm": 0.49328261613845825, "learning_rate": 0.0003017716234477688, "loss": 1.7636, "step": 44916 }, { "epoch": 1.49, "grad_norm": 0.5106754899024963, "learning_rate": 0.00030176117151245956, "loss": 1.7961, "step": 44917 }, { "epoch": 1.49, "grad_norm": 0.5084616541862488, "learning_rate": 0.00030175071957501247, "loss": 1.782, "step": 44918 }, { "epoch": 1.49, "grad_norm": 0.5057319402694702, "learning_rate": 0.0003017402676354404, "loss": 1.6789, "step": 44919 }, { "epoch": 1.49, "grad_norm": 2.8385565280914307, "learning_rate": 0.00030172981569375585, "loss": 1.8525, "step": 44920 }, { "epoch": 1.49, "grad_norm": 0.511527419090271, "learning_rate": 0.00030171936374997154, "loss": 1.7878, "step": 44921 }, { "epoch": 1.49, "grad_norm": 0.504700779914856, "learning_rate": 0.00030170891180410014, "loss": 1.716, "step": 44922 }, { "epoch": 1.49, "grad_norm": 0.5265522599220276, "learning_rate": 0.0003016984598561544, "loss": 1.7339, "step": 44923 }, { "epoch": 1.49, "grad_norm": 0.5296545624732971, "learning_rate": 0.00030168800790614697, "loss": 1.79, "step": 44924 }, { "epoch": 1.49, "grad_norm": 0.503838300704956, "learning_rate": 0.0003016775559540906, "loss": 1.7651, "step": 44925 }, { "epoch": 1.49, "grad_norm": 0.5121690034866333, "learning_rate": 0.00030166710399999794, "loss": 1.6682, "step": 44926 }, { "epoch": 1.49, "grad_norm": 0.5126835703849792, "learning_rate": 0.0003016566520438816, "loss": 1.7518, "step": 44927 }, { "epoch": 1.49, "grad_norm": 0.5216609239578247, "learning_rate": 0.0003016462000857543, "loss": 1.7159, "step": 44928 }, { "epoch": 1.49, "grad_norm": 0.5089535713195801, "learning_rate": 0.0003016357481256288, "loss": 1.7583, "step": 44929 }, { "epoch": 1.49, "grad_norm": 0.4972976744174957, "learning_rate": 0.00030162529616351773, "loss": 1.7202, "step": 44930 }, { "epoch": 1.49, "grad_norm": 0.528683602809906, "learning_rate": 0.00030161484419943384, "loss": 1.8322, "step": 44931 }, { "epoch": 1.49, "grad_norm": 0.5156604647636414, "learning_rate": 0.00030160439223338967, "loss": 1.7373, "step": 44932 }, { "epoch": 1.49, "grad_norm": 0.5488741993904114, "learning_rate": 0.0003015939402653981, "loss": 1.8118, "step": 44933 }, { "epoch": 1.49, "grad_norm": 0.5018139481544495, "learning_rate": 0.00030158348829547167, "loss": 1.7523, "step": 44934 }, { "epoch": 1.49, "grad_norm": 0.5073711276054382, "learning_rate": 0.0003015730363236231, "loss": 1.7492, "step": 44935 }, { "epoch": 1.5, "grad_norm": 0.5122489929199219, "learning_rate": 0.00030156258434986516, "loss": 1.7348, "step": 44936 }, { "epoch": 1.5, "grad_norm": 0.5198780298233032, "learning_rate": 0.0003015521323742104, "loss": 1.7259, "step": 44937 }, { "epoch": 1.5, "grad_norm": 0.5237659215927124, "learning_rate": 0.0003015416803966717, "loss": 1.7216, "step": 44938 }, { "epoch": 1.5, "grad_norm": 0.5037131905555725, "learning_rate": 0.00030153122841726146, "loss": 1.7678, "step": 44939 }, { "epoch": 1.5, "grad_norm": 0.5670962333679199, "learning_rate": 0.00030152077643599265, "loss": 1.7809, "step": 44940 }, { "epoch": 1.5, "grad_norm": 0.510476291179657, "learning_rate": 0.0003015103244528778, "loss": 1.7475, "step": 44941 }, { "epoch": 1.5, "grad_norm": 0.5038866996765137, "learning_rate": 0.00030149987246792966, "loss": 1.7333, "step": 44942 }, { "epoch": 1.5, "grad_norm": 0.5160735845565796, "learning_rate": 0.00030148942048116096, "loss": 1.8442, "step": 44943 }, { "epoch": 1.5, "grad_norm": 0.5160155296325684, "learning_rate": 0.00030147896849258413, "loss": 1.7762, "step": 44944 }, { "epoch": 1.5, "grad_norm": 0.4984508752822876, "learning_rate": 0.00030146851650221225, "loss": 1.7478, "step": 44945 }, { "epoch": 1.5, "grad_norm": 0.5383781790733337, "learning_rate": 0.00030145806451005767, "loss": 1.7718, "step": 44946 }, { "epoch": 1.5, "grad_norm": 0.5209816694259644, "learning_rate": 0.0003014476125161333, "loss": 1.7991, "step": 44947 }, { "epoch": 1.5, "grad_norm": 0.49410757422447205, "learning_rate": 0.00030143716052045174, "loss": 1.7242, "step": 44948 }, { "epoch": 1.5, "grad_norm": 0.5225695371627808, "learning_rate": 0.00030142670852302555, "loss": 1.7375, "step": 44949 }, { "epoch": 1.5, "grad_norm": 0.5106488466262817, "learning_rate": 0.00030141625652386773, "loss": 1.7728, "step": 44950 }, { "epoch": 1.5, "grad_norm": 0.5021812915802002, "learning_rate": 0.00030140580452299074, "loss": 1.7852, "step": 44951 }, { "epoch": 1.5, "grad_norm": 0.4950677156448364, "learning_rate": 0.00030139535252040727, "loss": 1.774, "step": 44952 }, { "epoch": 1.5, "grad_norm": 0.5371813178062439, "learning_rate": 0.00030138490051612995, "loss": 1.8029, "step": 44953 }, { "epoch": 1.5, "grad_norm": 0.5222398042678833, "learning_rate": 0.00030137444851017174, "loss": 1.7957, "step": 44954 }, { "epoch": 1.5, "grad_norm": 0.537798285484314, "learning_rate": 0.00030136399650254505, "loss": 1.711, "step": 44955 }, { "epoch": 1.5, "grad_norm": 0.5262558460235596, "learning_rate": 0.00030135354449326273, "loss": 1.7889, "step": 44956 }, { "epoch": 1.5, "grad_norm": 0.5110507011413574, "learning_rate": 0.00030134309248233736, "loss": 1.8035, "step": 44957 }, { "epoch": 1.5, "grad_norm": 0.504580020904541, "learning_rate": 0.00030133264046978167, "loss": 1.7695, "step": 44958 }, { "epoch": 1.5, "grad_norm": 0.5405041575431824, "learning_rate": 0.00030132218845560835, "loss": 1.747, "step": 44959 }, { "epoch": 1.5, "grad_norm": 0.5148788094520569, "learning_rate": 0.0003013117364398301, "loss": 1.754, "step": 44960 }, { "epoch": 1.5, "grad_norm": 0.5289276838302612, "learning_rate": 0.00030130128442245967, "loss": 1.695, "step": 44961 }, { "epoch": 1.5, "grad_norm": 0.5332134962081909, "learning_rate": 0.00030129083240350947, "loss": 1.7693, "step": 44962 }, { "epoch": 1.5, "grad_norm": 0.4947352707386017, "learning_rate": 0.0003012803803829926, "loss": 1.7246, "step": 44963 }, { "epoch": 1.5, "grad_norm": 0.5280039310455322, "learning_rate": 0.0003012699283609215, "loss": 1.851, "step": 44964 }, { "epoch": 1.5, "grad_norm": 0.542205274105072, "learning_rate": 0.0003012594763373088, "loss": 1.8024, "step": 44965 }, { "epoch": 1.5, "grad_norm": 0.5194021463394165, "learning_rate": 0.0003012490243121673, "loss": 1.7949, "step": 44966 }, { "epoch": 1.5, "grad_norm": 0.506899356842041, "learning_rate": 0.00030123857228550973, "loss": 1.7267, "step": 44967 }, { "epoch": 1.5, "grad_norm": 0.5300593972206116, "learning_rate": 0.0003012281202573487, "loss": 1.7893, "step": 44968 }, { "epoch": 1.5, "grad_norm": 0.5043563842773438, "learning_rate": 0.00030121766822769686, "loss": 1.7291, "step": 44969 }, { "epoch": 1.5, "grad_norm": 0.5197189450263977, "learning_rate": 0.00030120721619656706, "loss": 1.748, "step": 44970 }, { "epoch": 1.5, "grad_norm": 0.5049374103546143, "learning_rate": 0.0003011967641639718, "loss": 1.7866, "step": 44971 }, { "epoch": 1.5, "grad_norm": 0.5360099077224731, "learning_rate": 0.0003011863121299238, "loss": 1.7816, "step": 44972 }, { "epoch": 1.5, "grad_norm": 0.524410605430603, "learning_rate": 0.0003011758600944359, "loss": 1.7808, "step": 44973 }, { "epoch": 1.5, "grad_norm": 0.5164710879325867, "learning_rate": 0.0003011654080575206, "loss": 1.7669, "step": 44974 }, { "epoch": 1.5, "grad_norm": 0.526027262210846, "learning_rate": 0.00030115495601919066, "loss": 1.7691, "step": 44975 }, { "epoch": 1.5, "grad_norm": 0.5106464624404907, "learning_rate": 0.0003011445039794588, "loss": 1.7281, "step": 44976 }, { "epoch": 1.5, "grad_norm": 0.5141860842704773, "learning_rate": 0.0003011340519383377, "loss": 1.6883, "step": 44977 }, { "epoch": 1.5, "grad_norm": 0.5117542147636414, "learning_rate": 0.00030112359989584, "loss": 1.741, "step": 44978 }, { "epoch": 1.5, "grad_norm": 0.4924470782279968, "learning_rate": 0.0003011131478519785, "loss": 1.6705, "step": 44979 }, { "epoch": 1.5, "grad_norm": 0.5139309763908386, "learning_rate": 0.00030110269580676564, "loss": 1.7048, "step": 44980 }, { "epoch": 1.5, "grad_norm": 0.5048677325248718, "learning_rate": 0.00030109224376021436, "loss": 1.7902, "step": 44981 }, { "epoch": 1.5, "grad_norm": 0.527924120426178, "learning_rate": 0.00030108179171233737, "loss": 1.7037, "step": 44982 }, { "epoch": 1.5, "grad_norm": 0.525439977645874, "learning_rate": 0.0003010713396631471, "loss": 1.7494, "step": 44983 }, { "epoch": 1.5, "grad_norm": 0.5197908282279968, "learning_rate": 0.00030106088761265636, "loss": 1.7469, "step": 44984 }, { "epoch": 1.5, "grad_norm": 1.1126261949539185, "learning_rate": 0.00030105043556087786, "loss": 1.831, "step": 44985 }, { "epoch": 1.5, "grad_norm": 0.5208926200866699, "learning_rate": 0.00030103998350782435, "loss": 1.7753, "step": 44986 }, { "epoch": 1.5, "grad_norm": 0.5221973657608032, "learning_rate": 0.00030102953145350845, "loss": 1.7543, "step": 44987 }, { "epoch": 1.5, "grad_norm": 0.5020756721496582, "learning_rate": 0.00030101907939794294, "loss": 1.7936, "step": 44988 }, { "epoch": 1.5, "grad_norm": 0.5831624269485474, "learning_rate": 0.00030100862734114026, "loss": 1.8637, "step": 44989 }, { "epoch": 1.5, "grad_norm": 0.5348407626152039, "learning_rate": 0.00030099817528311335, "loss": 1.7795, "step": 44990 }, { "epoch": 1.5, "grad_norm": 0.5003691911697388, "learning_rate": 0.0003009877232238747, "loss": 1.6935, "step": 44991 }, { "epoch": 1.5, "grad_norm": 0.5024194717407227, "learning_rate": 0.00030097727116343715, "loss": 1.7861, "step": 44992 }, { "epoch": 1.5, "grad_norm": 0.5128284692764282, "learning_rate": 0.0003009668191018134, "loss": 1.665, "step": 44993 }, { "epoch": 1.5, "grad_norm": 0.529462456703186, "learning_rate": 0.000300956367039016, "loss": 1.7506, "step": 44994 }, { "epoch": 1.5, "grad_norm": 0.5230007767677307, "learning_rate": 0.00030094591497505774, "loss": 1.8095, "step": 44995 }, { "epoch": 1.5, "grad_norm": 0.5200192928314209, "learning_rate": 0.0003009354629099513, "loss": 1.7973, "step": 44996 }, { "epoch": 1.5, "grad_norm": 0.5094122886657715, "learning_rate": 0.0003009250108437093, "loss": 1.7905, "step": 44997 }, { "epoch": 1.5, "grad_norm": 0.5136290192604065, "learning_rate": 0.0003009145587763446, "loss": 1.7866, "step": 44998 }, { "epoch": 1.5, "grad_norm": 0.5106797814369202, "learning_rate": 0.0003009041067078696, "loss": 1.7864, "step": 44999 }, { "epoch": 1.5, "grad_norm": 0.533926784992218, "learning_rate": 0.0003008936546382972, "loss": 1.7546, "step": 45000 }, { "epoch": 1.5, "grad_norm": 0.5161113739013672, "learning_rate": 0.00030088320256763996, "loss": 1.7036, "step": 45001 }, { "epoch": 1.5, "grad_norm": 0.519960880279541, "learning_rate": 0.0003008727504959108, "loss": 1.7781, "step": 45002 }, { "epoch": 1.5, "grad_norm": 0.5360531210899353, "learning_rate": 0.0003008622984231222, "loss": 1.8097, "step": 45003 }, { "epoch": 1.5, "grad_norm": 0.5200461745262146, "learning_rate": 0.0003008518463492868, "loss": 1.8007, "step": 45004 }, { "epoch": 1.5, "grad_norm": 0.5219783782958984, "learning_rate": 0.0003008413942744175, "loss": 1.7639, "step": 45005 }, { "epoch": 1.5, "grad_norm": 0.532306432723999, "learning_rate": 0.00030083094219852675, "loss": 1.846, "step": 45006 }, { "epoch": 1.5, "grad_norm": 0.5066584944725037, "learning_rate": 0.0003008204901216275, "loss": 1.6962, "step": 45007 }, { "epoch": 1.5, "grad_norm": 0.5196084976196289, "learning_rate": 0.00030081003804373216, "loss": 1.7443, "step": 45008 }, { "epoch": 1.5, "grad_norm": 0.5286592245101929, "learning_rate": 0.00030079958596485366, "loss": 1.7679, "step": 45009 }, { "epoch": 1.5, "grad_norm": 0.5056052803993225, "learning_rate": 0.0003007891338850045, "loss": 1.7464, "step": 45010 }, { "epoch": 1.5, "grad_norm": 0.508600652217865, "learning_rate": 0.0003007786818041975, "loss": 1.7971, "step": 45011 }, { "epoch": 1.5, "grad_norm": 0.5021036267280579, "learning_rate": 0.00030076822972244536, "loss": 1.7551, "step": 45012 }, { "epoch": 1.5, "grad_norm": 0.513062596321106, "learning_rate": 0.0003007577776397606, "loss": 1.7681, "step": 45013 }, { "epoch": 1.5, "grad_norm": 0.5200478434562683, "learning_rate": 0.00030074732555615606, "loss": 1.7688, "step": 45014 }, { "epoch": 1.5, "grad_norm": 0.5104591250419617, "learning_rate": 0.00030073687347164426, "loss": 1.7947, "step": 45015 }, { "epoch": 1.5, "grad_norm": 0.5300450921058655, "learning_rate": 0.0003007264213862381, "loss": 1.7374, "step": 45016 }, { "epoch": 1.5, "grad_norm": 0.5312085151672363, "learning_rate": 0.00030071596929995016, "loss": 1.7759, "step": 45017 }, { "epoch": 1.5, "grad_norm": 0.5026349425315857, "learning_rate": 0.0003007055172127933, "loss": 1.775, "step": 45018 }, { "epoch": 1.5, "grad_norm": 0.5161647200584412, "learning_rate": 0.00030069506512477983, "loss": 1.7469, "step": 45019 }, { "epoch": 1.5, "grad_norm": 0.5109142065048218, "learning_rate": 0.0003006846130359227, "loss": 1.668, "step": 45020 }, { "epoch": 1.5, "grad_norm": 0.5419979691505432, "learning_rate": 0.00030067416094623454, "loss": 1.7393, "step": 45021 }, { "epoch": 1.5, "grad_norm": 0.5246775150299072, "learning_rate": 0.000300663708855728, "loss": 1.7202, "step": 45022 }, { "epoch": 1.5, "grad_norm": 0.4969271719455719, "learning_rate": 0.000300653256764416, "loss": 1.7929, "step": 45023 }, { "epoch": 1.5, "grad_norm": 0.510711669921875, "learning_rate": 0.0003006428046723109, "loss": 1.7239, "step": 45024 }, { "epoch": 1.5, "grad_norm": 0.5160689949989319, "learning_rate": 0.00030063235257942557, "loss": 1.7363, "step": 45025 }, { "epoch": 1.5, "grad_norm": 0.5177485942840576, "learning_rate": 0.00030062190048577264, "loss": 1.7362, "step": 45026 }, { "epoch": 1.5, "grad_norm": 0.5212242603302002, "learning_rate": 0.00030061144839136484, "loss": 1.8036, "step": 45027 }, { "epoch": 1.5, "grad_norm": 0.5359938144683838, "learning_rate": 0.0003006009962962148, "loss": 1.6629, "step": 45028 }, { "epoch": 1.5, "grad_norm": 0.5337843298912048, "learning_rate": 0.00030059054420033523, "loss": 1.7937, "step": 45029 }, { "epoch": 1.5, "grad_norm": 0.523617684841156, "learning_rate": 0.00030058009210373897, "loss": 1.7187, "step": 45030 }, { "epoch": 1.5, "grad_norm": 0.514785885810852, "learning_rate": 0.0003005696400064383, "loss": 1.8164, "step": 45031 }, { "epoch": 1.5, "grad_norm": 0.5240331888198853, "learning_rate": 0.0003005591879084464, "loss": 1.7604, "step": 45032 }, { "epoch": 1.5, "grad_norm": 0.5098431706428528, "learning_rate": 0.0003005487358097756, "loss": 1.7279, "step": 45033 }, { "epoch": 1.5, "grad_norm": 0.511676013469696, "learning_rate": 0.00030053828371043876, "loss": 1.7105, "step": 45034 }, { "epoch": 1.5, "grad_norm": 0.5409737825393677, "learning_rate": 0.00030052783161044856, "loss": 1.8078, "step": 45035 }, { "epoch": 1.5, "grad_norm": 0.5313496589660645, "learning_rate": 0.00030051737950981754, "loss": 1.6509, "step": 45036 }, { "epoch": 1.5, "grad_norm": 0.514202356338501, "learning_rate": 0.00030050692740855867, "loss": 1.7203, "step": 45037 }, { "epoch": 1.5, "grad_norm": 1.3460971117019653, "learning_rate": 0.0003004964753066843, "loss": 1.8316, "step": 45038 }, { "epoch": 1.5, "grad_norm": 0.5270043015480042, "learning_rate": 0.00030048602320420736, "loss": 1.7798, "step": 45039 }, { "epoch": 1.5, "grad_norm": 0.5270006060600281, "learning_rate": 0.00030047557110114047, "loss": 1.7372, "step": 45040 }, { "epoch": 1.5, "grad_norm": 0.5135064125061035, "learning_rate": 0.0003004651189974962, "loss": 1.839, "step": 45041 }, { "epoch": 1.5, "grad_norm": 0.52787184715271, "learning_rate": 0.00030045466689328747, "loss": 1.6937, "step": 45042 }, { "epoch": 1.5, "grad_norm": 0.5454304218292236, "learning_rate": 0.00030044421478852683, "loss": 1.7398, "step": 45043 }, { "epoch": 1.5, "grad_norm": 0.5127205848693848, "learning_rate": 0.0003004337626832269, "loss": 1.8096, "step": 45044 }, { "epoch": 1.5, "grad_norm": 0.5343286991119385, "learning_rate": 0.00030042331057740057, "loss": 1.7927, "step": 45045 }, { "epoch": 1.5, "grad_norm": 0.5282290577888489, "learning_rate": 0.00030041285847106025, "loss": 1.7683, "step": 45046 }, { "epoch": 1.5, "grad_norm": 1.006075143814087, "learning_rate": 0.0003004024063642189, "loss": 1.7728, "step": 45047 }, { "epoch": 1.5, "grad_norm": 0.5294262766838074, "learning_rate": 0.00030039195425688906, "loss": 1.7253, "step": 45048 }, { "epoch": 1.5, "grad_norm": 0.5278120040893555, "learning_rate": 0.0003003815021490834, "loss": 1.8415, "step": 45049 }, { "epoch": 1.5, "grad_norm": 0.5391186475753784, "learning_rate": 0.0003003710500408147, "loss": 1.7262, "step": 45050 }, { "epoch": 1.5, "grad_norm": 0.5352905988693237, "learning_rate": 0.00030036059793209555, "loss": 1.7728, "step": 45051 }, { "epoch": 1.5, "grad_norm": 0.52180415391922, "learning_rate": 0.00030035014582293875, "loss": 1.8518, "step": 45052 }, { "epoch": 1.5, "grad_norm": 0.5218886733055115, "learning_rate": 0.00030033969371335694, "loss": 1.811, "step": 45053 }, { "epoch": 1.5, "grad_norm": 0.5304609537124634, "learning_rate": 0.00030032924160336265, "loss": 1.7643, "step": 45054 }, { "epoch": 1.5, "grad_norm": 0.5058050751686096, "learning_rate": 0.0003003187894929689, "loss": 1.7598, "step": 45055 }, { "epoch": 1.5, "grad_norm": 0.5357740521430969, "learning_rate": 0.000300308337382188, "loss": 1.7869, "step": 45056 }, { "epoch": 1.5, "grad_norm": 0.5010135173797607, "learning_rate": 0.000300297885271033, "loss": 1.8281, "step": 45057 }, { "epoch": 1.5, "grad_norm": 0.5184811353683472, "learning_rate": 0.00030028743315951633, "loss": 1.7811, "step": 45058 }, { "epoch": 1.5, "grad_norm": 0.5085154175758362, "learning_rate": 0.0003002769810476507, "loss": 1.7853, "step": 45059 }, { "epoch": 1.5, "grad_norm": 0.49893131852149963, "learning_rate": 0.000300266528935449, "loss": 1.803, "step": 45060 }, { "epoch": 1.5, "grad_norm": 0.516184389591217, "learning_rate": 0.0003002560768229236, "loss": 1.7818, "step": 45061 }, { "epoch": 1.5, "grad_norm": 0.5395285487174988, "learning_rate": 0.0003002456247100875, "loss": 1.8084, "step": 45062 }, { "epoch": 1.5, "grad_norm": 0.5365692973136902, "learning_rate": 0.00030023517259695316, "loss": 1.7391, "step": 45063 }, { "epoch": 1.5, "grad_norm": 0.5315209627151489, "learning_rate": 0.00030022472048353343, "loss": 1.8296, "step": 45064 }, { "epoch": 1.5, "grad_norm": 0.5125622153282166, "learning_rate": 0.0003002142683698408, "loss": 1.7934, "step": 45065 }, { "epoch": 1.5, "grad_norm": 0.5285277366638184, "learning_rate": 0.0003002038162558882, "loss": 1.8309, "step": 45066 }, { "epoch": 1.5, "grad_norm": 0.5220460891723633, "learning_rate": 0.0003001933641416882, "loss": 1.8016, "step": 45067 }, { "epoch": 1.5, "grad_norm": 0.5409040451049805, "learning_rate": 0.00030018291202725344, "loss": 1.7854, "step": 45068 }, { "epoch": 1.5, "grad_norm": 0.5346907377243042, "learning_rate": 0.00030017245991259664, "loss": 1.7908, "step": 45069 }, { "epoch": 1.5, "grad_norm": 0.5361042022705078, "learning_rate": 0.0003001620077977305, "loss": 1.7462, "step": 45070 }, { "epoch": 1.5, "grad_norm": 0.5102669596672058, "learning_rate": 0.0003001515556826677, "loss": 1.7717, "step": 45071 }, { "epoch": 1.5, "grad_norm": 0.5169257521629333, "learning_rate": 0.00030014110356742094, "loss": 1.7324, "step": 45072 }, { "epoch": 1.5, "grad_norm": 0.50298011302948, "learning_rate": 0.000300130651452003, "loss": 1.8367, "step": 45073 }, { "epoch": 1.5, "grad_norm": 0.5291078090667725, "learning_rate": 0.00030012019933642635, "loss": 1.7137, "step": 45074 }, { "epoch": 1.5, "grad_norm": 0.5123361349105835, "learning_rate": 0.0003001097472207038, "loss": 1.7115, "step": 45075 }, { "epoch": 1.5, "grad_norm": 0.5027003884315491, "learning_rate": 0.00030009929510484807, "loss": 1.712, "step": 45076 }, { "epoch": 1.5, "grad_norm": 0.5163803100585938, "learning_rate": 0.0003000888429888718, "loss": 1.7035, "step": 45077 }, { "epoch": 1.5, "grad_norm": 0.5009846687316895, "learning_rate": 0.0003000783908727877, "loss": 1.7211, "step": 45078 }, { "epoch": 1.5, "grad_norm": 0.5142592191696167, "learning_rate": 0.0003000679387566084, "loss": 1.7244, "step": 45079 }, { "epoch": 1.5, "grad_norm": 0.5220785140991211, "learning_rate": 0.0003000574866403467, "loss": 1.8084, "step": 45080 }, { "epoch": 1.5, "grad_norm": 0.5013822913169861, "learning_rate": 0.0003000470345240152, "loss": 1.8321, "step": 45081 }, { "epoch": 1.5, "grad_norm": 0.5101746320724487, "learning_rate": 0.0003000365824076265, "loss": 1.7698, "step": 45082 }, { "epoch": 1.5, "grad_norm": 0.5287039279937744, "learning_rate": 0.00030002613029119353, "loss": 1.8097, "step": 45083 }, { "epoch": 1.5, "grad_norm": 0.5365612506866455, "learning_rate": 0.00030001567817472877, "loss": 1.7478, "step": 45084 }, { "epoch": 1.5, "grad_norm": 0.5252463221549988, "learning_rate": 0.00030000522605824504, "loss": 1.7893, "step": 45085 }, { "epoch": 1.5, "grad_norm": 0.5200310945510864, "learning_rate": 0.00029999477394175496, "loss": 1.8026, "step": 45086 }, { "epoch": 1.5, "grad_norm": 0.5024345517158508, "learning_rate": 0.0002999843218252712, "loss": 1.7678, "step": 45087 }, { "epoch": 1.5, "grad_norm": 0.5099281668663025, "learning_rate": 0.0002999738697088065, "loss": 1.732, "step": 45088 }, { "epoch": 1.5, "grad_norm": 0.5109580755233765, "learning_rate": 0.00029996341759237354, "loss": 1.7068, "step": 45089 }, { "epoch": 1.5, "grad_norm": 0.5189691781997681, "learning_rate": 0.00029995296547598484, "loss": 1.7028, "step": 45090 }, { "epoch": 1.5, "grad_norm": 0.5120670795440674, "learning_rate": 0.00029994251335965327, "loss": 1.8309, "step": 45091 }, { "epoch": 1.5, "grad_norm": 0.529498279094696, "learning_rate": 0.00029993206124339153, "loss": 1.758, "step": 45092 }, { "epoch": 1.5, "grad_norm": 0.5313644409179688, "learning_rate": 0.00029992160912721235, "loss": 1.7488, "step": 45093 }, { "epoch": 1.5, "grad_norm": 0.5105671286582947, "learning_rate": 0.0002999111570111282, "loss": 1.8012, "step": 45094 }, { "epoch": 1.5, "grad_norm": 0.49541980028152466, "learning_rate": 0.00029990070489515193, "loss": 1.7772, "step": 45095 }, { "epoch": 1.5, "grad_norm": 0.5278889536857605, "learning_rate": 0.0002998902527792962, "loss": 1.7404, "step": 45096 }, { "epoch": 1.5, "grad_norm": 0.5115567445755005, "learning_rate": 0.00029987980066357366, "loss": 1.7475, "step": 45097 }, { "epoch": 1.5, "grad_norm": 0.49179455637931824, "learning_rate": 0.000299869348547997, "loss": 1.7442, "step": 45098 }, { "epoch": 1.5, "grad_norm": 0.4944384694099426, "learning_rate": 0.00029985889643257906, "loss": 1.749, "step": 45099 }, { "epoch": 1.5, "grad_norm": 0.5146026015281677, "learning_rate": 0.0002998484443173323, "loss": 1.7646, "step": 45100 }, { "epoch": 1.5, "grad_norm": 0.51105135679245, "learning_rate": 0.0002998379922022695, "loss": 1.7372, "step": 45101 }, { "epoch": 1.5, "grad_norm": 0.5192815661430359, "learning_rate": 0.0002998275400874033, "loss": 1.6985, "step": 45102 }, { "epoch": 1.5, "grad_norm": 0.5495265126228333, "learning_rate": 0.0002998170879727466, "loss": 1.7401, "step": 45103 }, { "epoch": 1.5, "grad_norm": 0.5592394471168518, "learning_rate": 0.0002998066358583118, "loss": 1.6748, "step": 45104 }, { "epoch": 1.5, "grad_norm": 0.5253159999847412, "learning_rate": 0.00029979618374411174, "loss": 1.8183, "step": 45105 }, { "epoch": 1.5, "grad_norm": 0.5219579339027405, "learning_rate": 0.00029978573163015913, "loss": 1.7437, "step": 45106 }, { "epoch": 1.5, "grad_norm": 0.5130542516708374, "learning_rate": 0.0002997752795164666, "loss": 1.794, "step": 45107 }, { "epoch": 1.5, "grad_norm": 0.5599943995475769, "learning_rate": 0.00029976482740304685, "loss": 1.6556, "step": 45108 }, { "epoch": 1.5, "grad_norm": 0.5163836479187012, "learning_rate": 0.0002997543752899126, "loss": 1.7803, "step": 45109 }, { "epoch": 1.5, "grad_norm": 0.5303226709365845, "learning_rate": 0.0002997439231770764, "loss": 1.7122, "step": 45110 }, { "epoch": 1.5, "grad_norm": 0.5194814801216125, "learning_rate": 0.000299733471064551, "loss": 1.8522, "step": 45111 }, { "epoch": 1.5, "grad_norm": 0.5115821361541748, "learning_rate": 0.0002997230189523492, "loss": 1.7195, "step": 45112 }, { "epoch": 1.5, "grad_norm": 0.5324957966804504, "learning_rate": 0.0002997125668404837, "loss": 1.7556, "step": 45113 }, { "epoch": 1.5, "grad_norm": 0.5060648918151855, "learning_rate": 0.000299702114728967, "loss": 1.6698, "step": 45114 }, { "epoch": 1.5, "grad_norm": 0.5305199027061462, "learning_rate": 0.00029969166261781195, "loss": 1.7708, "step": 45115 }, { "epoch": 1.5, "grad_norm": 0.530845582485199, "learning_rate": 0.0002996812105070312, "loss": 1.837, "step": 45116 }, { "epoch": 1.5, "grad_norm": 0.5254833698272705, "learning_rate": 0.0002996707583966373, "loss": 1.777, "step": 45117 }, { "epoch": 1.5, "grad_norm": 0.5278536677360535, "learning_rate": 0.00029966030628664306, "loss": 1.72, "step": 45118 }, { "epoch": 1.5, "grad_norm": 0.5106574892997742, "learning_rate": 0.0002996498541770613, "loss": 1.7688, "step": 45119 }, { "epoch": 1.5, "grad_norm": 0.5206677913665771, "learning_rate": 0.0002996394020679044, "loss": 1.7796, "step": 45120 }, { "epoch": 1.5, "grad_norm": 0.5124956369400024, "learning_rate": 0.0002996289499591853, "loss": 1.7071, "step": 45121 }, { "epoch": 1.5, "grad_norm": 0.5265559554100037, "learning_rate": 0.00029961849785091656, "loss": 1.7289, "step": 45122 }, { "epoch": 1.5, "grad_norm": 0.5651469826698303, "learning_rate": 0.000299608045743111, "loss": 1.8165, "step": 45123 }, { "epoch": 1.5, "grad_norm": 0.524864912033081, "learning_rate": 0.0002995975936357811, "loss": 1.7424, "step": 45124 }, { "epoch": 1.5, "grad_norm": 0.5199875831604004, "learning_rate": 0.00029958714152893964, "loss": 1.7735, "step": 45125 }, { "epoch": 1.5, "grad_norm": 0.5120280981063843, "learning_rate": 0.0002995766894225995, "loss": 1.6788, "step": 45126 }, { "epoch": 1.5, "grad_norm": 0.5500109195709229, "learning_rate": 0.00029956623731677303, "loss": 1.7808, "step": 45127 }, { "epoch": 1.5, "grad_norm": 0.513693630695343, "learning_rate": 0.0002995557852114731, "loss": 1.7267, "step": 45128 }, { "epoch": 1.5, "grad_norm": 0.5119432210922241, "learning_rate": 0.00029954533310671253, "loss": 1.734, "step": 45129 }, { "epoch": 1.5, "grad_norm": 0.5069096088409424, "learning_rate": 0.00029953488100250376, "loss": 1.7259, "step": 45130 }, { "epoch": 1.5, "grad_norm": 0.5294445157051086, "learning_rate": 0.00029952442889885953, "loss": 1.7261, "step": 45131 }, { "epoch": 1.5, "grad_norm": 0.5211765170097351, "learning_rate": 0.0002995139767957926, "loss": 1.7451, "step": 45132 }, { "epoch": 1.5, "grad_norm": 0.5295459628105164, "learning_rate": 0.0002995035246933157, "loss": 1.7764, "step": 45133 }, { "epoch": 1.5, "grad_norm": 0.5309547781944275, "learning_rate": 0.00029949307259144134, "loss": 1.8271, "step": 45134 }, { "epoch": 1.5, "grad_norm": 0.5225260257720947, "learning_rate": 0.00029948262049018235, "loss": 1.8001, "step": 45135 }, { "epoch": 1.5, "grad_norm": 0.5048408508300781, "learning_rate": 0.0002994721683895515, "loss": 1.7657, "step": 45136 }, { "epoch": 1.5, "grad_norm": 0.5101966857910156, "learning_rate": 0.0002994617162895612, "loss": 1.8765, "step": 45137 }, { "epoch": 1.5, "grad_norm": 0.5330507159233093, "learning_rate": 0.0002994512641902244, "loss": 1.7899, "step": 45138 }, { "epoch": 1.5, "grad_norm": 0.5105443000793457, "learning_rate": 0.00029944081209155366, "loss": 1.7361, "step": 45139 }, { "epoch": 1.5, "grad_norm": 0.5252787470817566, "learning_rate": 0.00029943035999356165, "loss": 1.7389, "step": 45140 }, { "epoch": 1.5, "grad_norm": 0.5212152600288391, "learning_rate": 0.0002994199078962611, "loss": 1.7239, "step": 45141 }, { "epoch": 1.5, "grad_norm": 0.5413108468055725, "learning_rate": 0.00029940945579966466, "loss": 1.7904, "step": 45142 }, { "epoch": 1.5, "grad_norm": 0.5297431349754333, "learning_rate": 0.00029939900370378516, "loss": 1.7937, "step": 45143 }, { "epoch": 1.5, "grad_norm": 0.5175608992576599, "learning_rate": 0.00029938855160863517, "loss": 1.6731, "step": 45144 }, { "epoch": 1.5, "grad_norm": 0.5533952116966248, "learning_rate": 0.0002993780995142273, "loss": 1.7294, "step": 45145 }, { "epoch": 1.5, "grad_norm": 0.5619663000106812, "learning_rate": 0.0002993676474205745, "loss": 1.7377, "step": 45146 }, { "epoch": 1.5, "grad_norm": 0.507213294506073, "learning_rate": 0.0002993571953276891, "loss": 1.7303, "step": 45147 }, { "epoch": 1.5, "grad_norm": 0.5281281471252441, "learning_rate": 0.00029934674323558395, "loss": 1.749, "step": 45148 }, { "epoch": 1.5, "grad_norm": 0.5247742533683777, "learning_rate": 0.00029933629114427187, "loss": 1.8068, "step": 45149 }, { "epoch": 1.5, "grad_norm": 0.528825581073761, "learning_rate": 0.00029932583905376546, "loss": 1.6923, "step": 45150 }, { "epoch": 1.5, "grad_norm": 0.5012419819831848, "learning_rate": 0.0002993153869640773, "loss": 1.6813, "step": 45151 }, { "epoch": 1.5, "grad_norm": 0.5268725156784058, "learning_rate": 0.00029930493487522017, "loss": 1.7642, "step": 45152 }, { "epoch": 1.5, "grad_norm": 0.5145612359046936, "learning_rate": 0.00029929448278720683, "loss": 1.7733, "step": 45153 }, { "epoch": 1.5, "grad_norm": 0.5289785265922546, "learning_rate": 0.0002992840307000498, "loss": 1.7517, "step": 45154 }, { "epoch": 1.5, "grad_norm": 0.5289137363433838, "learning_rate": 0.0002992735786137618, "loss": 1.7163, "step": 45155 }, { "epoch": 1.5, "grad_norm": 0.522383451461792, "learning_rate": 0.00029926312652835574, "loss": 1.7654, "step": 45156 }, { "epoch": 1.5, "grad_norm": 0.5056462287902832, "learning_rate": 0.00029925267444384394, "loss": 1.7481, "step": 45157 }, { "epoch": 1.5, "grad_norm": 0.5144869685173035, "learning_rate": 0.0002992422223602394, "loss": 1.7533, "step": 45158 }, { "epoch": 1.5, "grad_norm": 0.5139449238777161, "learning_rate": 0.00029923177027755464, "loss": 1.7997, "step": 45159 }, { "epoch": 1.5, "grad_norm": 0.5228482484817505, "learning_rate": 0.00029922131819580246, "loss": 1.774, "step": 45160 }, { "epoch": 1.5, "grad_norm": 0.5409990549087524, "learning_rate": 0.00029921086611499543, "loss": 1.7967, "step": 45161 }, { "epoch": 1.5, "grad_norm": 0.5161087512969971, "learning_rate": 0.00029920041403514624, "loss": 1.7216, "step": 45162 }, { "epoch": 1.5, "grad_norm": 0.5268037915229797, "learning_rate": 0.00029918996195626784, "loss": 1.7028, "step": 45163 }, { "epoch": 1.5, "grad_norm": 0.5129661560058594, "learning_rate": 0.0002991795098783725, "loss": 1.7189, "step": 45164 }, { "epoch": 1.5, "grad_norm": 0.5181491374969482, "learning_rate": 0.00029916905780147314, "loss": 1.7528, "step": 45165 }, { "epoch": 1.5, "grad_norm": 0.5252094864845276, "learning_rate": 0.00029915860572558254, "loss": 1.7621, "step": 45166 }, { "epoch": 1.5, "grad_norm": 0.5214582085609436, "learning_rate": 0.0002991481536507132, "loss": 1.8028, "step": 45167 }, { "epoch": 1.5, "grad_norm": 0.5337966680526733, "learning_rate": 0.00029913770157687786, "loss": 1.8218, "step": 45168 }, { "epoch": 1.5, "grad_norm": 0.5204960703849792, "learning_rate": 0.00029912724950408915, "loss": 1.7241, "step": 45169 }, { "epoch": 1.5, "grad_norm": 0.5531156063079834, "learning_rate": 0.00029911679743236004, "loss": 1.7862, "step": 45170 }, { "epoch": 1.5, "grad_norm": 0.5045090317726135, "learning_rate": 0.00029910634536170277, "loss": 1.7045, "step": 45171 }, { "epoch": 1.5, "grad_norm": 0.5228244066238403, "learning_rate": 0.00029909589329213035, "loss": 1.6544, "step": 45172 }, { "epoch": 1.5, "grad_norm": 0.5048469305038452, "learning_rate": 0.00029908544122365547, "loss": 1.7476, "step": 45173 }, { "epoch": 1.5, "grad_norm": 0.5171976089477539, "learning_rate": 0.00029907498915629065, "loss": 1.7878, "step": 45174 }, { "epoch": 1.5, "grad_norm": 0.5173618197441101, "learning_rate": 0.0002990645370900487, "loss": 1.7434, "step": 45175 }, { "epoch": 1.5, "grad_norm": 0.5097476243972778, "learning_rate": 0.00029905408502494226, "loss": 1.7122, "step": 45176 }, { "epoch": 1.5, "grad_norm": 0.5275119543075562, "learning_rate": 0.000299043632960984, "loss": 1.761, "step": 45177 }, { "epoch": 1.5, "grad_norm": 0.5049564242362976, "learning_rate": 0.00029903318089818655, "loss": 1.7553, "step": 45178 }, { "epoch": 1.5, "grad_norm": 0.5095229744911194, "learning_rate": 0.00029902272883656274, "loss": 1.8536, "step": 45179 }, { "epoch": 1.5, "grad_norm": 0.5110421776771545, "learning_rate": 0.00029901227677612527, "loss": 1.7116, "step": 45180 }, { "epoch": 1.5, "grad_norm": 0.5112572908401489, "learning_rate": 0.00029900182471688665, "loss": 1.7563, "step": 45181 }, { "epoch": 1.5, "grad_norm": 0.5225240588188171, "learning_rate": 0.0002989913726588597, "loss": 1.7609, "step": 45182 }, { "epoch": 1.5, "grad_norm": 0.5108754634857178, "learning_rate": 0.00029898092060205717, "loss": 1.8107, "step": 45183 }, { "epoch": 1.5, "grad_norm": 0.5467234253883362, "learning_rate": 0.0002989704685464915, "loss": 1.7247, "step": 45184 }, { "epoch": 1.5, "grad_norm": 0.49304428696632385, "learning_rate": 0.00029896001649217554, "loss": 1.8028, "step": 45185 }, { "epoch": 1.5, "grad_norm": 0.5433195233345032, "learning_rate": 0.00029894956443912214, "loss": 1.8284, "step": 45186 }, { "epoch": 1.5, "grad_norm": 0.5234502553939819, "learning_rate": 0.0002989391123873436, "loss": 1.7966, "step": 45187 }, { "epoch": 1.5, "grad_norm": 0.5204798579216003, "learning_rate": 0.0002989286603368529, "loss": 1.7954, "step": 45188 }, { "epoch": 1.5, "grad_norm": 0.5267989635467529, "learning_rate": 0.0002989182082876627, "loss": 1.6613, "step": 45189 }, { "epoch": 1.5, "grad_norm": 0.5347326397895813, "learning_rate": 0.0002989077562397856, "loss": 1.7879, "step": 45190 }, { "epoch": 1.5, "grad_norm": 0.5259771347045898, "learning_rate": 0.0002988973041932343, "loss": 1.7575, "step": 45191 }, { "epoch": 1.5, "grad_norm": 0.5201911926269531, "learning_rate": 0.00029888685214802147, "loss": 1.7426, "step": 45192 }, { "epoch": 1.5, "grad_norm": 0.498877614736557, "learning_rate": 0.00029887640010416, "loss": 1.7537, "step": 45193 }, { "epoch": 1.5, "grad_norm": 0.5344128608703613, "learning_rate": 0.00029886594806166226, "loss": 1.799, "step": 45194 }, { "epoch": 1.5, "grad_norm": 0.5335855484008789, "learning_rate": 0.00029885549602054115, "loss": 1.7481, "step": 45195 }, { "epoch": 1.5, "grad_norm": 0.520523726940155, "learning_rate": 0.00029884504398080934, "loss": 1.7758, "step": 45196 }, { "epoch": 1.5, "grad_norm": 0.5182448029518127, "learning_rate": 0.00029883459194247945, "loss": 1.7814, "step": 45197 }, { "epoch": 1.5, "grad_norm": 1.6597378253936768, "learning_rate": 0.00029882413990556413, "loss": 1.7803, "step": 45198 }, { "epoch": 1.5, "grad_norm": 0.5109047889709473, "learning_rate": 0.0002988136878700761, "loss": 1.797, "step": 45199 }, { "epoch": 1.5, "grad_norm": 0.5255457758903503, "learning_rate": 0.0002988032358360282, "loss": 1.7937, "step": 45200 }, { "epoch": 1.5, "grad_norm": 0.5255297422409058, "learning_rate": 0.00029879278380343295, "loss": 1.8664, "step": 45201 }, { "epoch": 1.5, "grad_norm": 0.5334635972976685, "learning_rate": 0.00029878233177230303, "loss": 1.7178, "step": 45202 }, { "epoch": 1.5, "grad_norm": 0.5117461085319519, "learning_rate": 0.0002987718797426513, "loss": 1.7392, "step": 45203 }, { "epoch": 1.5, "grad_norm": 0.519702672958374, "learning_rate": 0.00029876142771449027, "loss": 1.7362, "step": 45204 }, { "epoch": 1.5, "grad_norm": 0.5480769872665405, "learning_rate": 0.00029875097568783264, "loss": 1.7193, "step": 45205 }, { "epoch": 1.5, "grad_norm": 0.564493715763092, "learning_rate": 0.00029874052366269126, "loss": 1.748, "step": 45206 }, { "epoch": 1.5, "grad_norm": 0.509942889213562, "learning_rate": 0.0002987300716390785, "loss": 1.7015, "step": 45207 }, { "epoch": 1.5, "grad_norm": 0.49682605266571045, "learning_rate": 0.00029871961961700736, "loss": 1.6987, "step": 45208 }, { "epoch": 1.5, "grad_norm": 0.505066454410553, "learning_rate": 0.00029870916759649037, "loss": 1.723, "step": 45209 }, { "epoch": 1.5, "grad_norm": 0.5474964380264282, "learning_rate": 0.0002986987155775404, "loss": 1.8304, "step": 45210 }, { "epoch": 1.5, "grad_norm": 0.5290328860282898, "learning_rate": 0.0002986882635601699, "loss": 1.7335, "step": 45211 }, { "epoch": 1.5, "grad_norm": 0.5102953314781189, "learning_rate": 0.0002986778115443916, "loss": 1.779, "step": 45212 }, { "epoch": 1.5, "grad_norm": 0.5176398754119873, "learning_rate": 0.0002986673595302184, "loss": 1.7149, "step": 45213 }, { "epoch": 1.5, "grad_norm": 0.522100567817688, "learning_rate": 0.0002986569075176626, "loss": 1.6142, "step": 45214 }, { "epoch": 1.5, "grad_norm": 0.5319623947143555, "learning_rate": 0.00029864645550673727, "loss": 1.7892, "step": 45215 }, { "epoch": 1.5, "grad_norm": 0.5399035811424255, "learning_rate": 0.00029863600349745495, "loss": 1.8198, "step": 45216 }, { "epoch": 1.5, "grad_norm": 0.5198310613632202, "learning_rate": 0.00029862555148982826, "loss": 1.6782, "step": 45217 }, { "epoch": 1.5, "grad_norm": 0.4970424771308899, "learning_rate": 0.00029861509948387, "loss": 1.7356, "step": 45218 }, { "epoch": 1.5, "grad_norm": 0.50665682554245, "learning_rate": 0.0002986046474795927, "loss": 1.6862, "step": 45219 }, { "epoch": 1.5, "grad_norm": 0.5231212973594666, "learning_rate": 0.0002985941954770093, "loss": 1.7619, "step": 45220 }, { "epoch": 1.5, "grad_norm": 0.5132477283477783, "learning_rate": 0.0002985837434761322, "loss": 1.7621, "step": 45221 }, { "epoch": 1.5, "grad_norm": 0.539348304271698, "learning_rate": 0.0002985732914769743, "loss": 1.733, "step": 45222 }, { "epoch": 1.5, "grad_norm": 0.5217592716217041, "learning_rate": 0.0002985628394795483, "loss": 1.7725, "step": 45223 }, { "epoch": 1.5, "grad_norm": 0.5405064821243286, "learning_rate": 0.00029855238748386666, "loss": 1.8118, "step": 45224 }, { "epoch": 1.5, "grad_norm": 0.5241909027099609, "learning_rate": 0.0002985419354899423, "loss": 1.8311, "step": 45225 }, { "epoch": 1.5, "grad_norm": 0.5619940757751465, "learning_rate": 0.0002985314834977878, "loss": 1.762, "step": 45226 }, { "epoch": 1.5, "grad_norm": 0.5379800796508789, "learning_rate": 0.0002985210315074158, "loss": 1.6925, "step": 45227 }, { "epoch": 1.5, "grad_norm": 0.49560725688934326, "learning_rate": 0.00029851057951883904, "loss": 1.7064, "step": 45228 }, { "epoch": 1.5, "grad_norm": 0.5156409740447998, "learning_rate": 0.0002985001275320703, "loss": 1.7017, "step": 45229 }, { "epoch": 1.5, "grad_norm": 0.539006233215332, "learning_rate": 0.0002984896755471222, "loss": 1.7952, "step": 45230 }, { "epoch": 1.5, "grad_norm": 0.5428420901298523, "learning_rate": 0.0002984792235640073, "loss": 1.7637, "step": 45231 }, { "epoch": 1.5, "grad_norm": 0.527229368686676, "learning_rate": 0.0002984687715827385, "loss": 1.7356, "step": 45232 }, { "epoch": 1.5, "grad_norm": 0.5372446179389954, "learning_rate": 0.0002984583196033284, "loss": 1.7396, "step": 45233 }, { "epoch": 1.5, "grad_norm": 0.5454121828079224, "learning_rate": 0.0002984478676257896, "loss": 1.7071, "step": 45234 }, { "epoch": 1.5, "grad_norm": 0.5367846488952637, "learning_rate": 0.0002984374156501348, "loss": 1.7627, "step": 45235 }, { "epoch": 1.51, "grad_norm": 0.5288705229759216, "learning_rate": 0.00029842696367637694, "loss": 1.7919, "step": 45236 }, { "epoch": 1.51, "grad_norm": 0.5489184856414795, "learning_rate": 0.00029841651170452833, "loss": 1.7826, "step": 45237 }, { "epoch": 1.51, "grad_norm": 0.520680844783783, "learning_rate": 0.0002984060597346019, "loss": 1.7111, "step": 45238 }, { "epoch": 1.51, "grad_norm": 0.5364943146705627, "learning_rate": 0.0002983956077666103, "loss": 1.7339, "step": 45239 }, { "epoch": 1.51, "grad_norm": 0.5213238000869751, "learning_rate": 0.0002983851558005662, "loss": 1.7347, "step": 45240 }, { "epoch": 1.51, "grad_norm": 0.5335718393325806, "learning_rate": 0.0002983747038364822, "loss": 1.7417, "step": 45241 }, { "epoch": 1.51, "grad_norm": 0.5157487988471985, "learning_rate": 0.0002983642518743711, "loss": 1.7573, "step": 45242 }, { "epoch": 1.51, "grad_norm": 0.517108142375946, "learning_rate": 0.00029835379991424574, "loss": 1.7724, "step": 45243 }, { "epoch": 1.51, "grad_norm": 1.0490453243255615, "learning_rate": 0.0002983433479561184, "loss": 1.749, "step": 45244 }, { "epoch": 1.51, "grad_norm": 0.5000014305114746, "learning_rate": 0.00029833289600000206, "loss": 1.7595, "step": 45245 }, { "epoch": 1.51, "grad_norm": 0.5180541276931763, "learning_rate": 0.0002983224440459094, "loss": 1.7438, "step": 45246 }, { "epoch": 1.51, "grad_norm": 0.5108596682548523, "learning_rate": 0.000298311992093853, "loss": 1.6941, "step": 45247 }, { "epoch": 1.51, "grad_norm": 0.5148937106132507, "learning_rate": 0.00029830154014384556, "loss": 1.7969, "step": 45248 }, { "epoch": 1.51, "grad_norm": 0.5284242033958435, "learning_rate": 0.00029829108819589976, "loss": 1.7153, "step": 45249 }, { "epoch": 1.51, "grad_norm": 0.5191163420677185, "learning_rate": 0.0002982806362500285, "loss": 1.801, "step": 45250 }, { "epoch": 1.51, "grad_norm": 0.5042192339897156, "learning_rate": 0.00029827018430624415, "loss": 1.7421, "step": 45251 }, { "epoch": 1.51, "grad_norm": 0.5401875972747803, "learning_rate": 0.00029825973236455956, "loss": 1.8058, "step": 45252 }, { "epoch": 1.51, "grad_norm": 0.5266989469528198, "learning_rate": 0.0002982492804249875, "loss": 1.7817, "step": 45253 }, { "epoch": 1.51, "grad_norm": 0.5251095294952393, "learning_rate": 0.00029823882848754044, "loss": 1.7939, "step": 45254 }, { "epoch": 1.51, "grad_norm": 0.5215559601783752, "learning_rate": 0.0002982283765522312, "loss": 1.79, "step": 45255 }, { "epoch": 1.51, "grad_norm": 0.5300779938697815, "learning_rate": 0.00029821792461907253, "loss": 1.7399, "step": 45256 }, { "epoch": 1.51, "grad_norm": 0.5341122150421143, "learning_rate": 0.000298207472688077, "loss": 1.685, "step": 45257 }, { "epoch": 1.51, "grad_norm": 0.5186082124710083, "learning_rate": 0.0002981970207592572, "loss": 1.6952, "step": 45258 }, { "epoch": 1.51, "grad_norm": 0.5216503143310547, "learning_rate": 0.00029818656883262606, "loss": 1.6842, "step": 45259 }, { "epoch": 1.51, "grad_norm": 0.5175560116767883, "learning_rate": 0.0002981761169081962, "loss": 1.808, "step": 45260 }, { "epoch": 1.51, "grad_norm": 0.5362639427185059, "learning_rate": 0.0002981656649859802, "loss": 1.8015, "step": 45261 }, { "epoch": 1.51, "grad_norm": 0.5164831280708313, "learning_rate": 0.00029815521306599083, "loss": 1.7251, "step": 45262 }, { "epoch": 1.51, "grad_norm": 0.5283179879188538, "learning_rate": 0.0002981447611482408, "loss": 1.7737, "step": 45263 }, { "epoch": 1.51, "grad_norm": 0.537407398223877, "learning_rate": 0.00029813430923274274, "loss": 1.7585, "step": 45264 }, { "epoch": 1.51, "grad_norm": 0.5332195162773132, "learning_rate": 0.00029812385731950924, "loss": 1.7722, "step": 45265 }, { "epoch": 1.51, "grad_norm": 0.5364568829536438, "learning_rate": 0.0002981134054085533, "loss": 1.7872, "step": 45266 }, { "epoch": 1.51, "grad_norm": 0.505608320236206, "learning_rate": 0.0002981029534998873, "loss": 1.7594, "step": 45267 }, { "epoch": 1.51, "grad_norm": 0.512907087802887, "learning_rate": 0.000298092501593524, "loss": 1.7247, "step": 45268 }, { "epoch": 1.51, "grad_norm": 0.5522956252098083, "learning_rate": 0.00029808204968947614, "loss": 1.6938, "step": 45269 }, { "epoch": 1.51, "grad_norm": 0.5317378640174866, "learning_rate": 0.00029807159778775643, "loss": 1.7714, "step": 45270 }, { "epoch": 1.51, "grad_norm": 0.5087918043136597, "learning_rate": 0.0002980611458883775, "loss": 1.715, "step": 45271 }, { "epoch": 1.51, "grad_norm": 0.5320773124694824, "learning_rate": 0.000298050693991352, "loss": 1.6949, "step": 45272 }, { "epoch": 1.51, "grad_norm": 0.5323687791824341, "learning_rate": 0.00029804024209669286, "loss": 1.7548, "step": 45273 }, { "epoch": 1.51, "grad_norm": 0.5273833870887756, "learning_rate": 0.0002980297902044124, "loss": 1.7486, "step": 45274 }, { "epoch": 1.51, "grad_norm": 0.5033087730407715, "learning_rate": 0.00029801933831452343, "loss": 1.72, "step": 45275 }, { "epoch": 1.51, "grad_norm": 0.5221388936042786, "learning_rate": 0.0002980088864270388, "loss": 1.8037, "step": 45276 }, { "epoch": 1.51, "grad_norm": 0.5014825463294983, "learning_rate": 0.00029799843454197115, "loss": 1.6879, "step": 45277 }, { "epoch": 1.51, "grad_norm": 0.5218505859375, "learning_rate": 0.00029798798265933296, "loss": 1.7792, "step": 45278 }, { "epoch": 1.51, "grad_norm": 0.6572911739349365, "learning_rate": 0.0002979775307791371, "loss": 1.7452, "step": 45279 }, { "epoch": 1.51, "grad_norm": 0.5362388491630554, "learning_rate": 0.0002979670789013964, "loss": 1.8561, "step": 45280 }, { "epoch": 1.51, "grad_norm": 0.5199136734008789, "learning_rate": 0.00029795662702612307, "loss": 1.7172, "step": 45281 }, { "epoch": 1.51, "grad_norm": 0.5261659622192383, "learning_rate": 0.00029794617515333026, "loss": 1.7653, "step": 45282 }, { "epoch": 1.51, "grad_norm": 0.5084142684936523, "learning_rate": 0.0002979357232830305, "loss": 1.7112, "step": 45283 }, { "epoch": 1.51, "grad_norm": 0.5218841433525085, "learning_rate": 0.00029792527141523647, "loss": 1.8395, "step": 45284 }, { "epoch": 1.51, "grad_norm": 0.5111597180366516, "learning_rate": 0.0002979148195499608, "loss": 1.8154, "step": 45285 }, { "epoch": 1.51, "grad_norm": 0.5133047699928284, "learning_rate": 0.00029790436768721616, "loss": 1.7172, "step": 45286 }, { "epoch": 1.51, "grad_norm": 0.5228255391120911, "learning_rate": 0.00029789391582701554, "loss": 1.7544, "step": 45287 }, { "epoch": 1.51, "grad_norm": 0.5098044872283936, "learning_rate": 0.0002978834639693712, "loss": 1.7031, "step": 45288 }, { "epoch": 1.51, "grad_norm": 0.4988884925842285, "learning_rate": 0.00029787301211429604, "loss": 1.6869, "step": 45289 }, { "epoch": 1.51, "grad_norm": 0.5302967429161072, "learning_rate": 0.00029786256026180285, "loss": 1.7393, "step": 45290 }, { "epoch": 1.51, "grad_norm": 0.5197231769561768, "learning_rate": 0.0002978521084119041, "loss": 1.7479, "step": 45291 }, { "epoch": 1.51, "grad_norm": 0.5159717202186584, "learning_rate": 0.0002978416565646126, "loss": 1.7591, "step": 45292 }, { "epoch": 1.51, "grad_norm": 2.7807533740997314, "learning_rate": 0.00029783120471994106, "loss": 1.8387, "step": 45293 }, { "epoch": 1.51, "grad_norm": 0.5334316492080688, "learning_rate": 0.000297820752877902, "loss": 1.7116, "step": 45294 }, { "epoch": 1.51, "grad_norm": 0.5165907740592957, "learning_rate": 0.0002978103010385082, "loss": 1.8135, "step": 45295 }, { "epoch": 1.51, "grad_norm": 0.5056866407394409, "learning_rate": 0.0002977998492017725, "loss": 1.7761, "step": 45296 }, { "epoch": 1.51, "grad_norm": 0.522863507270813, "learning_rate": 0.0002977893973677075, "loss": 1.7926, "step": 45297 }, { "epoch": 1.51, "grad_norm": 0.5120558738708496, "learning_rate": 0.00029777894553632573, "loss": 1.8247, "step": 45298 }, { "epoch": 1.51, "grad_norm": 0.5029852986335754, "learning_rate": 0.00029776849370764, "loss": 1.7446, "step": 45299 }, { "epoch": 1.51, "grad_norm": 0.509326159954071, "learning_rate": 0.0002977580418816631, "loss": 1.7051, "step": 45300 }, { "epoch": 1.51, "grad_norm": 0.5233959555625916, "learning_rate": 0.0002977475900584075, "loss": 1.7917, "step": 45301 }, { "epoch": 1.51, "grad_norm": 0.522296130657196, "learning_rate": 0.00029773713823788594, "loss": 1.7891, "step": 45302 }, { "epoch": 1.51, "grad_norm": 0.5101900100708008, "learning_rate": 0.0002977266864201113, "loss": 1.7739, "step": 45303 }, { "epoch": 1.51, "grad_norm": 0.504707396030426, "learning_rate": 0.00029771623460509605, "loss": 1.7119, "step": 45304 }, { "epoch": 1.51, "grad_norm": 0.527756929397583, "learning_rate": 0.00029770578279285294, "loss": 1.7657, "step": 45305 }, { "epoch": 1.51, "grad_norm": 0.5029066801071167, "learning_rate": 0.0002976953309833947, "loss": 1.7008, "step": 45306 }, { "epoch": 1.51, "grad_norm": 0.5253719091415405, "learning_rate": 0.00029768487917673406, "loss": 1.767, "step": 45307 }, { "epoch": 1.51, "grad_norm": 0.5168102383613586, "learning_rate": 0.00029767442737288355, "loss": 1.731, "step": 45308 }, { "epoch": 1.51, "grad_norm": 0.5127444267272949, "learning_rate": 0.00029766397557185587, "loss": 1.7909, "step": 45309 }, { "epoch": 1.51, "grad_norm": 0.5349331498146057, "learning_rate": 0.0002976535237736639, "loss": 1.7964, "step": 45310 }, { "epoch": 1.51, "grad_norm": 0.5154542326927185, "learning_rate": 0.00029764307197832016, "loss": 1.6809, "step": 45311 }, { "epoch": 1.51, "grad_norm": 0.5122707486152649, "learning_rate": 0.00029763262018583746, "loss": 1.7685, "step": 45312 }, { "epoch": 1.51, "grad_norm": 0.5127909183502197, "learning_rate": 0.0002976221683962284, "loss": 1.7712, "step": 45313 }, { "epoch": 1.51, "grad_norm": 0.5304682850837708, "learning_rate": 0.0002976117166095056, "loss": 1.8129, "step": 45314 }, { "epoch": 1.51, "grad_norm": 0.5229842662811279, "learning_rate": 0.0002976012648256818, "loss": 1.7961, "step": 45315 }, { "epoch": 1.51, "grad_norm": 0.5053460597991943, "learning_rate": 0.0002975908130447697, "loss": 1.7971, "step": 45316 }, { "epoch": 1.51, "grad_norm": 0.5061862468719482, "learning_rate": 0.00029758036126678215, "loss": 1.7048, "step": 45317 }, { "epoch": 1.51, "grad_norm": 0.5208091735839844, "learning_rate": 0.00029756990949173157, "loss": 1.7641, "step": 45318 }, { "epoch": 1.51, "grad_norm": 0.5014839768409729, "learning_rate": 0.0002975594577196308, "loss": 1.7101, "step": 45319 }, { "epoch": 1.51, "grad_norm": 0.5147044658660889, "learning_rate": 0.0002975490059504925, "loss": 1.7716, "step": 45320 }, { "epoch": 1.51, "grad_norm": 0.5369483828544617, "learning_rate": 0.0002975385541843293, "loss": 1.8304, "step": 45321 }, { "epoch": 1.51, "grad_norm": 0.5096884965896606, "learning_rate": 0.00029752810242115394, "loss": 1.7518, "step": 45322 }, { "epoch": 1.51, "grad_norm": 0.533200740814209, "learning_rate": 0.0002975176506609792, "loss": 1.7029, "step": 45323 }, { "epoch": 1.51, "grad_norm": 0.5035287141799927, "learning_rate": 0.0002975071989038175, "loss": 1.7798, "step": 45324 }, { "epoch": 1.51, "grad_norm": 0.5229952931404114, "learning_rate": 0.00029749674714968176, "loss": 1.6911, "step": 45325 }, { "epoch": 1.51, "grad_norm": 0.5436401963233948, "learning_rate": 0.00029748629539858457, "loss": 1.788, "step": 45326 }, { "epoch": 1.51, "grad_norm": 0.5415088534355164, "learning_rate": 0.00029747584365053876, "loss": 1.7586, "step": 45327 }, { "epoch": 1.51, "grad_norm": 0.5293258428573608, "learning_rate": 0.00029746539190555685, "loss": 1.7184, "step": 45328 }, { "epoch": 1.51, "grad_norm": 0.5328556895256042, "learning_rate": 0.00029745494016365147, "loss": 1.7815, "step": 45329 }, { "epoch": 1.51, "grad_norm": 2.7195992469787598, "learning_rate": 0.00029744448842483563, "loss": 1.8249, "step": 45330 }, { "epoch": 1.51, "grad_norm": 0.5499322414398193, "learning_rate": 0.0002974340366891216, "loss": 1.7433, "step": 45331 }, { "epoch": 1.51, "grad_norm": 0.5489156246185303, "learning_rate": 0.00029742358495652235, "loss": 1.727, "step": 45332 }, { "epoch": 1.51, "grad_norm": 0.5177605152130127, "learning_rate": 0.00029741313322705055, "loss": 1.7884, "step": 45333 }, { "epoch": 1.51, "grad_norm": 0.5291682481765747, "learning_rate": 0.00029740268150071876, "loss": 1.8063, "step": 45334 }, { "epoch": 1.51, "grad_norm": 0.5450860261917114, "learning_rate": 0.0002973922297775397, "loss": 1.8064, "step": 45335 }, { "epoch": 1.51, "grad_norm": 0.5262487530708313, "learning_rate": 0.00029738177805752606, "loss": 1.8086, "step": 45336 }, { "epoch": 1.51, "grad_norm": 0.551826536655426, "learning_rate": 0.0002973713263406908, "loss": 1.6787, "step": 45337 }, { "epoch": 1.51, "grad_norm": 0.5429825186729431, "learning_rate": 0.00029736087462704607, "loss": 1.793, "step": 45338 }, { "epoch": 1.51, "grad_norm": 0.5035399198532104, "learning_rate": 0.000297350422916605, "loss": 1.7331, "step": 45339 }, { "epoch": 1.51, "grad_norm": 0.49917343258857727, "learning_rate": 0.0002973399712093801, "loss": 1.7507, "step": 45340 }, { "epoch": 1.51, "grad_norm": 0.5553039312362671, "learning_rate": 0.00029732951950538406, "loss": 1.7148, "step": 45341 }, { "epoch": 1.51, "grad_norm": 0.5216459035873413, "learning_rate": 0.00029731906780462963, "loss": 1.7257, "step": 45342 }, { "epoch": 1.51, "grad_norm": 0.5189358592033386, "learning_rate": 0.0002973086161071295, "loss": 1.7799, "step": 45343 }, { "epoch": 1.51, "grad_norm": 0.5253733396530151, "learning_rate": 0.0002972981644128962, "loss": 1.7642, "step": 45344 }, { "epoch": 1.51, "grad_norm": 0.5531970262527466, "learning_rate": 0.00029728771272194253, "loss": 1.7747, "step": 45345 }, { "epoch": 1.51, "grad_norm": 0.5572513937950134, "learning_rate": 0.0002972772610342812, "loss": 1.6955, "step": 45346 }, { "epoch": 1.51, "grad_norm": 0.529735803604126, "learning_rate": 0.00029726680934992494, "loss": 1.7704, "step": 45347 }, { "epoch": 1.51, "grad_norm": 0.5501123070716858, "learning_rate": 0.00029725635766888634, "loss": 1.7127, "step": 45348 }, { "epoch": 1.51, "grad_norm": 0.5208890438079834, "learning_rate": 0.0002972459059911781, "loss": 1.7581, "step": 45349 }, { "epoch": 1.51, "grad_norm": 0.5427709221839905, "learning_rate": 0.000297235454316813, "loss": 1.7489, "step": 45350 }, { "epoch": 1.51, "grad_norm": 0.5215395092964172, "learning_rate": 0.0002972250026458036, "loss": 1.8148, "step": 45351 }, { "epoch": 1.51, "grad_norm": 0.5191072821617126, "learning_rate": 0.0002972145509781625, "loss": 1.7698, "step": 45352 }, { "epoch": 1.51, "grad_norm": 0.5232014656066895, "learning_rate": 0.0002972040993139028, "loss": 1.7295, "step": 45353 }, { "epoch": 1.51, "grad_norm": 0.5084266066551208, "learning_rate": 0.00029719364765303663, "loss": 1.7765, "step": 45354 }, { "epoch": 1.51, "grad_norm": 0.516053318977356, "learning_rate": 0.0002971831959955771, "loss": 1.8002, "step": 45355 }, { "epoch": 1.51, "grad_norm": 0.5497590899467468, "learning_rate": 0.0002971727443415367, "loss": 1.6968, "step": 45356 }, { "epoch": 1.51, "grad_norm": 0.5921440124511719, "learning_rate": 0.0002971622926909283, "loss": 1.7636, "step": 45357 }, { "epoch": 1.51, "grad_norm": 0.5138211250305176, "learning_rate": 0.00029715184104376436, "loss": 1.7063, "step": 45358 }, { "epoch": 1.51, "grad_norm": 0.537052571773529, "learning_rate": 0.0002971413894000576, "loss": 1.7526, "step": 45359 }, { "epoch": 1.51, "grad_norm": 0.5388697981834412, "learning_rate": 0.00029713093775982096, "loss": 1.7969, "step": 45360 }, { "epoch": 1.51, "grad_norm": 0.5466203093528748, "learning_rate": 0.00029712048612306677, "loss": 1.689, "step": 45361 }, { "epoch": 1.51, "grad_norm": 0.5558322668075562, "learning_rate": 0.0002971100344898079, "loss": 1.7723, "step": 45362 }, { "epoch": 1.51, "grad_norm": 0.5000621676445007, "learning_rate": 0.00029709958286005714, "loss": 1.7518, "step": 45363 }, { "epoch": 1.51, "grad_norm": 0.5291129946708679, "learning_rate": 0.000297089131233827, "loss": 1.7329, "step": 45364 }, { "epoch": 1.51, "grad_norm": 0.5305778980255127, "learning_rate": 0.00029707867961113017, "loss": 1.7505, "step": 45365 }, { "epoch": 1.51, "grad_norm": 0.539906919002533, "learning_rate": 0.00029706822799197935, "loss": 1.751, "step": 45366 }, { "epoch": 1.51, "grad_norm": 0.521862268447876, "learning_rate": 0.0002970577763763875, "loss": 1.7858, "step": 45367 }, { "epoch": 1.51, "grad_norm": 0.5083935856819153, "learning_rate": 0.0002970473247643668, "loss": 1.728, "step": 45368 }, { "epoch": 1.51, "grad_norm": 0.5334364175796509, "learning_rate": 0.00029703687315593035, "loss": 1.7222, "step": 45369 }, { "epoch": 1.51, "grad_norm": 0.5281482934951782, "learning_rate": 0.00029702642155109073, "loss": 1.8301, "step": 45370 }, { "epoch": 1.51, "grad_norm": 0.5256802439689636, "learning_rate": 0.0002970159699498606, "loss": 1.7726, "step": 45371 }, { "epoch": 1.51, "grad_norm": 0.542724072933197, "learning_rate": 0.00029700551835225254, "loss": 1.7731, "step": 45372 }, { "epoch": 1.51, "grad_norm": 0.5008971095085144, "learning_rate": 0.00029699506675827944, "loss": 1.7453, "step": 45373 }, { "epoch": 1.51, "grad_norm": 0.5106827616691589, "learning_rate": 0.0002969846151679538, "loss": 1.746, "step": 45374 }, { "epoch": 1.51, "grad_norm": 0.5165422558784485, "learning_rate": 0.0002969741635812884, "loss": 1.7114, "step": 45375 }, { "epoch": 1.51, "grad_norm": 0.5045925378799438, "learning_rate": 0.00029696371199829596, "loss": 1.7975, "step": 45376 }, { "epoch": 1.51, "grad_norm": 0.5116901397705078, "learning_rate": 0.0002969532604189892, "loss": 1.787, "step": 45377 }, { "epoch": 1.51, "grad_norm": 0.5111700296401978, "learning_rate": 0.00029694280884338064, "loss": 1.7479, "step": 45378 }, { "epoch": 1.51, "grad_norm": 0.511102020740509, "learning_rate": 0.00029693235727148304, "loss": 1.819, "step": 45379 }, { "epoch": 1.51, "grad_norm": 0.7567493319511414, "learning_rate": 0.0002969219057033092, "loss": 1.8408, "step": 45380 }, { "epoch": 1.51, "grad_norm": 0.5213313102722168, "learning_rate": 0.00029691145413887165, "loss": 1.7248, "step": 45381 }, { "epoch": 1.51, "grad_norm": 0.48802459239959717, "learning_rate": 0.0002969010025781831, "loss": 1.7876, "step": 45382 }, { "epoch": 1.51, "grad_norm": 0.5208410620689392, "learning_rate": 0.0002968905510212564, "loss": 1.699, "step": 45383 }, { "epoch": 1.51, "grad_norm": 0.5238298773765564, "learning_rate": 0.00029688009946810396, "loss": 1.7491, "step": 45384 }, { "epoch": 1.51, "grad_norm": 0.535707414150238, "learning_rate": 0.0002968696479187387, "loss": 1.878, "step": 45385 }, { "epoch": 1.51, "grad_norm": 0.7148993015289307, "learning_rate": 0.0002968591963731732, "loss": 1.7207, "step": 45386 }, { "epoch": 1.51, "grad_norm": 0.496938019990921, "learning_rate": 0.00029684874483142023, "loss": 1.7503, "step": 45387 }, { "epoch": 1.51, "grad_norm": 0.5287271738052368, "learning_rate": 0.00029683829329349236, "loss": 1.8001, "step": 45388 }, { "epoch": 1.51, "grad_norm": 0.5148209929466248, "learning_rate": 0.0002968278417594023, "loss": 1.7215, "step": 45389 }, { "epoch": 1.51, "grad_norm": 0.5398377180099487, "learning_rate": 0.00029681739022916295, "loss": 1.7568, "step": 45390 }, { "epoch": 1.51, "grad_norm": 0.5260369181632996, "learning_rate": 0.0002968069387027866, "loss": 1.7865, "step": 45391 }, { "epoch": 1.51, "grad_norm": 0.5178327560424805, "learning_rate": 0.0002967964871802863, "loss": 1.7373, "step": 45392 }, { "epoch": 1.51, "grad_norm": 0.5289381146430969, "learning_rate": 0.0002967860356616745, "loss": 1.7975, "step": 45393 }, { "epoch": 1.51, "grad_norm": 0.5066354870796204, "learning_rate": 0.0002967755841469641, "loss": 1.6868, "step": 45394 }, { "epoch": 1.51, "grad_norm": 0.5175713300704956, "learning_rate": 0.00029676513263616756, "loss": 1.7808, "step": 45395 }, { "epoch": 1.51, "grad_norm": 0.5152212381362915, "learning_rate": 0.00029675468112929764, "loss": 1.7076, "step": 45396 }, { "epoch": 1.51, "grad_norm": 0.5203100442886353, "learning_rate": 0.00029674422962636723, "loss": 1.7831, "step": 45397 }, { "epoch": 1.51, "grad_norm": 0.5481036901473999, "learning_rate": 0.0002967337781273887, "loss": 1.8371, "step": 45398 }, { "epoch": 1.51, "grad_norm": 0.5072067975997925, "learning_rate": 0.0002967233266323749, "loss": 1.761, "step": 45399 }, { "epoch": 1.51, "grad_norm": 0.529512882232666, "learning_rate": 0.00029671287514133855, "loss": 1.7754, "step": 45400 }, { "epoch": 1.51, "grad_norm": 0.5292509198188782, "learning_rate": 0.0002967024236542923, "loss": 1.7156, "step": 45401 }, { "epoch": 1.51, "grad_norm": 0.5105208158493042, "learning_rate": 0.0002966919721712487, "loss": 1.7334, "step": 45402 }, { "epoch": 1.51, "grad_norm": 0.5007404685020447, "learning_rate": 0.0002966815206922206, "loss": 1.7305, "step": 45403 }, { "epoch": 1.51, "grad_norm": 0.5192382335662842, "learning_rate": 0.00029667106921722075, "loss": 1.7456, "step": 45404 }, { "epoch": 1.51, "grad_norm": 0.5200624465942383, "learning_rate": 0.0002966606177462616, "loss": 1.761, "step": 45405 }, { "epoch": 1.51, "grad_norm": 0.5371307134628296, "learning_rate": 0.00029665016627935603, "loss": 1.7732, "step": 45406 }, { "epoch": 1.51, "grad_norm": 0.5034050345420837, "learning_rate": 0.0002966397148165167, "loss": 1.7571, "step": 45407 }, { "epoch": 1.51, "grad_norm": 0.4967215955257416, "learning_rate": 0.00029662926335775625, "loss": 1.7092, "step": 45408 }, { "epoch": 1.51, "grad_norm": 0.4991585910320282, "learning_rate": 0.0002966188119030873, "loss": 1.7666, "step": 45409 }, { "epoch": 1.51, "grad_norm": 0.5331961512565613, "learning_rate": 0.00029660836045252277, "loss": 1.7395, "step": 45410 }, { "epoch": 1.51, "grad_norm": 0.529913067817688, "learning_rate": 0.00029659790900607506, "loss": 1.8051, "step": 45411 }, { "epoch": 1.51, "grad_norm": 0.5317167043685913, "learning_rate": 0.00029658745756375697, "loss": 1.7684, "step": 45412 }, { "epoch": 1.51, "grad_norm": 0.5208783745765686, "learning_rate": 0.00029657700612558123, "loss": 1.7308, "step": 45413 }, { "epoch": 1.51, "grad_norm": 0.5245900750160217, "learning_rate": 0.00029656655469156054, "loss": 1.7541, "step": 45414 }, { "epoch": 1.51, "grad_norm": 0.5143522620201111, "learning_rate": 0.0002965561032617075, "loss": 1.7663, "step": 45415 }, { "epoch": 1.51, "grad_norm": 0.7089934349060059, "learning_rate": 0.0002965456518360349, "loss": 1.7822, "step": 45416 }, { "epoch": 1.51, "grad_norm": 0.5433550477027893, "learning_rate": 0.0002965352004145554, "loss": 1.6487, "step": 45417 }, { "epoch": 1.51, "grad_norm": 0.5203267335891724, "learning_rate": 0.00029652474899728156, "loss": 1.7934, "step": 45418 }, { "epoch": 1.51, "grad_norm": 0.5069850087165833, "learning_rate": 0.00029651429758422614, "loss": 1.7525, "step": 45419 }, { "epoch": 1.51, "grad_norm": 0.5336228013038635, "learning_rate": 0.000296503846175402, "loss": 1.827, "step": 45420 }, { "epoch": 1.51, "grad_norm": 0.522875964641571, "learning_rate": 0.0002964933947708215, "loss": 1.8034, "step": 45421 }, { "epoch": 1.51, "grad_norm": 0.530768096446991, "learning_rate": 0.0002964829433704976, "loss": 1.7357, "step": 45422 }, { "epoch": 1.51, "grad_norm": 0.5135350823402405, "learning_rate": 0.00029647249197444284, "loss": 1.8276, "step": 45423 }, { "epoch": 1.51, "grad_norm": 0.5260753035545349, "learning_rate": 0.0002964620405826701, "loss": 1.7303, "step": 45424 }, { "epoch": 1.51, "grad_norm": 0.5181431770324707, "learning_rate": 0.00029645158919519175, "loss": 1.7985, "step": 45425 }, { "epoch": 1.51, "grad_norm": 0.5204240679740906, "learning_rate": 0.00029644113781202063, "loss": 1.7187, "step": 45426 }, { "epoch": 1.51, "grad_norm": 0.5242994427680969, "learning_rate": 0.0002964306864331696, "loss": 1.6362, "step": 45427 }, { "epoch": 1.51, "grad_norm": 0.5252204537391663, "learning_rate": 0.0002964202350586511, "loss": 1.7641, "step": 45428 }, { "epoch": 1.51, "grad_norm": 0.5172802209854126, "learning_rate": 0.0002964097836884779, "loss": 1.7995, "step": 45429 }, { "epoch": 1.51, "grad_norm": 0.5391225814819336, "learning_rate": 0.0002963993323226628, "loss": 1.8186, "step": 45430 }, { "epoch": 1.51, "grad_norm": 0.5261300802230835, "learning_rate": 0.0002963888809612183, "loss": 1.7898, "step": 45431 }, { "epoch": 1.51, "grad_norm": 0.5205276608467102, "learning_rate": 0.0002963784296041571, "loss": 1.7188, "step": 45432 }, { "epoch": 1.51, "grad_norm": 0.536814272403717, "learning_rate": 0.00029636797825149193, "loss": 1.663, "step": 45433 }, { "epoch": 1.51, "grad_norm": 0.515126645565033, "learning_rate": 0.0002963575269032357, "loss": 1.7317, "step": 45434 }, { "epoch": 1.51, "grad_norm": 0.5244478583335876, "learning_rate": 0.0002963470755594007, "loss": 1.7198, "step": 45435 }, { "epoch": 1.51, "grad_norm": 0.51729816198349, "learning_rate": 0.0002963366242199999, "loss": 1.7224, "step": 45436 }, { "epoch": 1.51, "grad_norm": 0.5166526436805725, "learning_rate": 0.00029632617288504593, "loss": 1.708, "step": 45437 }, { "epoch": 1.51, "grad_norm": 0.513353705406189, "learning_rate": 0.00029631572155455136, "loss": 1.8359, "step": 45438 }, { "epoch": 1.51, "grad_norm": 0.5054198503494263, "learning_rate": 0.000296305270228529, "loss": 1.7439, "step": 45439 }, { "epoch": 1.51, "grad_norm": 0.5149405002593994, "learning_rate": 0.0002962948189069916, "loss": 1.7695, "step": 45440 }, { "epoch": 1.51, "grad_norm": 0.511366605758667, "learning_rate": 0.00029628436758995154, "loss": 1.7445, "step": 45441 }, { "epoch": 1.51, "grad_norm": 0.5040007829666138, "learning_rate": 0.00029627391627742183, "loss": 1.7576, "step": 45442 }, { "epoch": 1.51, "grad_norm": 0.5126596093177795, "learning_rate": 0.00029626346496941503, "loss": 1.803, "step": 45443 }, { "epoch": 1.51, "grad_norm": 0.5205915570259094, "learning_rate": 0.0002962530136659439, "loss": 1.7879, "step": 45444 }, { "epoch": 1.51, "grad_norm": 0.5066823363304138, "learning_rate": 0.0002962425623670209, "loss": 1.7783, "step": 45445 }, { "epoch": 1.51, "grad_norm": 0.5156781673431396, "learning_rate": 0.0002962321110726589, "loss": 1.743, "step": 45446 }, { "epoch": 1.51, "grad_norm": 0.517468273639679, "learning_rate": 0.00029622165978287075, "loss": 1.7251, "step": 45447 }, { "epoch": 1.51, "grad_norm": 0.5107598304748535, "learning_rate": 0.0002962112084976688, "loss": 1.7699, "step": 45448 }, { "epoch": 1.51, "grad_norm": 0.528706431388855, "learning_rate": 0.00029620075721706584, "loss": 1.7571, "step": 45449 }, { "epoch": 1.51, "grad_norm": 0.5226397514343262, "learning_rate": 0.0002961903059410747, "loss": 1.8572, "step": 45450 }, { "epoch": 1.51, "grad_norm": 0.52317214012146, "learning_rate": 0.000296179854669708, "loss": 1.7786, "step": 45451 }, { "epoch": 1.51, "grad_norm": 0.5006576180458069, "learning_rate": 0.0002961694034029783, "loss": 1.6977, "step": 45452 }, { "epoch": 1.51, "grad_norm": 0.5089173316955566, "learning_rate": 0.00029615895214089833, "loss": 1.723, "step": 45453 }, { "epoch": 1.51, "grad_norm": 0.5302250385284424, "learning_rate": 0.000296148500883481, "loss": 1.812, "step": 45454 }, { "epoch": 1.51, "grad_norm": 0.5257757306098938, "learning_rate": 0.00029613804963073864, "loss": 1.795, "step": 45455 }, { "epoch": 1.51, "grad_norm": 0.5516816973686218, "learning_rate": 0.0002961275983826842, "loss": 1.8417, "step": 45456 }, { "epoch": 1.51, "grad_norm": 0.5026069283485413, "learning_rate": 0.0002961171471393303, "loss": 1.8058, "step": 45457 }, { "epoch": 1.51, "grad_norm": 0.5035083293914795, "learning_rate": 0.00029610669590068964, "loss": 1.7215, "step": 45458 }, { "epoch": 1.51, "grad_norm": 0.5080830454826355, "learning_rate": 0.00029609624466677483, "loss": 1.7896, "step": 45459 }, { "epoch": 1.51, "grad_norm": 0.5263737440109253, "learning_rate": 0.0002960857934375987, "loss": 1.8202, "step": 45460 }, { "epoch": 1.51, "grad_norm": 0.5263883471488953, "learning_rate": 0.0002960753422131737, "loss": 1.747, "step": 45461 }, { "epoch": 1.51, "grad_norm": 0.5188611149787903, "learning_rate": 0.00029606489099351266, "loss": 1.7235, "step": 45462 }, { "epoch": 1.51, "grad_norm": 0.5270819067955017, "learning_rate": 0.00029605443977862825, "loss": 1.7054, "step": 45463 }, { "epoch": 1.51, "grad_norm": 0.6712929010391235, "learning_rate": 0.0002960439885685333, "loss": 1.7818, "step": 45464 }, { "epoch": 1.51, "grad_norm": 0.5084925293922424, "learning_rate": 0.0002960335373632403, "loss": 1.7568, "step": 45465 }, { "epoch": 1.51, "grad_norm": 0.5220667123794556, "learning_rate": 0.00029602308616276195, "loss": 1.7325, "step": 45466 }, { "epoch": 1.51, "grad_norm": 0.5186840295791626, "learning_rate": 0.00029601263496711113, "loss": 1.8357, "step": 45467 }, { "epoch": 1.51, "grad_norm": 0.4915267825126648, "learning_rate": 0.00029600218377630024, "loss": 1.7487, "step": 45468 }, { "epoch": 1.51, "grad_norm": 0.4896116256713867, "learning_rate": 0.0002959917325903421, "loss": 1.7313, "step": 45469 }, { "epoch": 1.51, "grad_norm": 0.5242399573326111, "learning_rate": 0.0002959812814092496, "loss": 1.8239, "step": 45470 }, { "epoch": 1.51, "grad_norm": 0.54413241147995, "learning_rate": 0.000295970830233035, "loss": 1.8011, "step": 45471 }, { "epoch": 1.51, "grad_norm": 0.5184350609779358, "learning_rate": 0.0002959603790617113, "loss": 1.7418, "step": 45472 }, { "epoch": 1.51, "grad_norm": 0.5266809463500977, "learning_rate": 0.00029594992789529114, "loss": 1.7509, "step": 45473 }, { "epoch": 1.51, "grad_norm": 0.5157904028892517, "learning_rate": 0.00029593947673378716, "loss": 1.7224, "step": 45474 }, { "epoch": 1.51, "grad_norm": 0.5245979428291321, "learning_rate": 0.000295929025577212, "loss": 1.7712, "step": 45475 }, { "epoch": 1.51, "grad_norm": 0.5114774703979492, "learning_rate": 0.0002959185744255784, "loss": 1.7273, "step": 45476 }, { "epoch": 1.51, "grad_norm": 0.5102655291557312, "learning_rate": 0.00029590812327889924, "loss": 1.8121, "step": 45477 }, { "epoch": 1.51, "grad_norm": 0.5138953328132629, "learning_rate": 0.00029589767213718677, "loss": 1.7994, "step": 45478 }, { "epoch": 1.51, "grad_norm": 0.5212775468826294, "learning_rate": 0.00029588722100045404, "loss": 1.6465, "step": 45479 }, { "epoch": 1.51, "grad_norm": 0.5013113617897034, "learning_rate": 0.00029587676986871363, "loss": 1.7189, "step": 45480 }, { "epoch": 1.51, "grad_norm": 0.5433433055877686, "learning_rate": 0.0002958663187419782, "loss": 1.7811, "step": 45481 }, { "epoch": 1.51, "grad_norm": 0.5231573581695557, "learning_rate": 0.00029585586762026045, "loss": 1.7161, "step": 45482 }, { "epoch": 1.51, "grad_norm": 0.5101767778396606, "learning_rate": 0.000295845416503573, "loss": 1.7573, "step": 45483 }, { "epoch": 1.51, "grad_norm": 0.5155898332595825, "learning_rate": 0.0002958349653919287, "loss": 1.7048, "step": 45484 }, { "epoch": 1.51, "grad_norm": 0.5106092691421509, "learning_rate": 0.00029582451428534004, "loss": 1.7764, "step": 45485 }, { "epoch": 1.51, "grad_norm": 0.5003700852394104, "learning_rate": 0.0002958140631838198, "loss": 1.7139, "step": 45486 }, { "epoch": 1.51, "grad_norm": 0.5341899394989014, "learning_rate": 0.00029580361208738086, "loss": 1.701, "step": 45487 }, { "epoch": 1.51, "grad_norm": 0.5058437585830688, "learning_rate": 0.00029579316099603555, "loss": 1.7356, "step": 45488 }, { "epoch": 1.51, "grad_norm": 0.5060790181159973, "learning_rate": 0.0002957827099097967, "loss": 1.827, "step": 45489 }, { "epoch": 1.51, "grad_norm": 0.5478918552398682, "learning_rate": 0.00029577225882867716, "loss": 1.714, "step": 45490 }, { "epoch": 1.51, "grad_norm": 0.5096877813339233, "learning_rate": 0.0002957618077526894, "loss": 1.787, "step": 45491 }, { "epoch": 1.51, "grad_norm": 0.5075966119766235, "learning_rate": 0.0002957513566818461, "loss": 1.7398, "step": 45492 }, { "epoch": 1.51, "grad_norm": 0.4982089400291443, "learning_rate": 0.0002957409056161601, "loss": 1.7274, "step": 45493 }, { "epoch": 1.51, "grad_norm": 0.5230909585952759, "learning_rate": 0.000295730454555644, "loss": 1.8666, "step": 45494 }, { "epoch": 1.51, "grad_norm": 0.5162833333015442, "learning_rate": 0.0002957200035003106, "loss": 1.7668, "step": 45495 }, { "epoch": 1.51, "grad_norm": 0.5316119194030762, "learning_rate": 0.0002957095524501723, "loss": 1.7659, "step": 45496 }, { "epoch": 1.51, "grad_norm": 0.6255691051483154, "learning_rate": 0.00029569910140524213, "loss": 1.7816, "step": 45497 }, { "epoch": 1.51, "grad_norm": 0.5306063294410706, "learning_rate": 0.00029568865036553254, "loss": 1.7209, "step": 45498 }, { "epoch": 1.51, "grad_norm": 0.5083245635032654, "learning_rate": 0.0002956781993310563, "loss": 1.7624, "step": 45499 }, { "epoch": 1.51, "grad_norm": 0.5217892527580261, "learning_rate": 0.00029566774830182616, "loss": 1.7579, "step": 45500 }, { "epoch": 1.51, "grad_norm": 0.5316429138183594, "learning_rate": 0.0002956572972778546, "loss": 1.703, "step": 45501 }, { "epoch": 1.51, "grad_norm": 0.5187585949897766, "learning_rate": 0.0002956468462591545, "loss": 1.7359, "step": 45502 }, { "epoch": 1.51, "grad_norm": 0.5450098514556885, "learning_rate": 0.0002956363952457385, "loss": 1.8287, "step": 45503 }, { "epoch": 1.51, "grad_norm": 0.5183919668197632, "learning_rate": 0.00029562594423761934, "loss": 1.7877, "step": 45504 }, { "epoch": 1.51, "grad_norm": 0.5133829712867737, "learning_rate": 0.00029561549323480954, "loss": 1.7177, "step": 45505 }, { "epoch": 1.51, "grad_norm": 0.5261510014533997, "learning_rate": 0.0002956050422373219, "loss": 1.7251, "step": 45506 }, { "epoch": 1.51, "grad_norm": 0.5140872001647949, "learning_rate": 0.0002955945912451692, "loss": 1.7787, "step": 45507 }, { "epoch": 1.51, "grad_norm": 0.5481982827186584, "learning_rate": 0.00029558414025836384, "loss": 1.7775, "step": 45508 }, { "epoch": 1.51, "grad_norm": 0.5167749524116516, "learning_rate": 0.0002955736892769188, "loss": 1.791, "step": 45509 }, { "epoch": 1.51, "grad_norm": 0.5262120962142944, "learning_rate": 0.00029556323830084666, "loss": 1.8259, "step": 45510 }, { "epoch": 1.51, "grad_norm": 0.5409929752349854, "learning_rate": 0.00029555278733016003, "loss": 1.7457, "step": 45511 }, { "epoch": 1.51, "grad_norm": 0.5301690101623535, "learning_rate": 0.00029554233636487165, "loss": 1.7901, "step": 45512 }, { "epoch": 1.51, "grad_norm": 0.5207443833351135, "learning_rate": 0.0002955318854049942, "loss": 1.7628, "step": 45513 }, { "epoch": 1.51, "grad_norm": 0.5204482078552246, "learning_rate": 0.00029552143445054057, "loss": 1.6602, "step": 45514 }, { "epoch": 1.51, "grad_norm": 0.5245752334594727, "learning_rate": 0.00029551098350152303, "loss": 1.8239, "step": 45515 }, { "epoch": 1.51, "grad_norm": 0.554488480091095, "learning_rate": 0.0002955005325579546, "loss": 1.8639, "step": 45516 }, { "epoch": 1.51, "grad_norm": 0.5393537282943726, "learning_rate": 0.00029549008161984785, "loss": 1.795, "step": 45517 }, { "epoch": 1.51, "grad_norm": 0.5060177445411682, "learning_rate": 0.0002954796306872155, "loss": 1.7898, "step": 45518 }, { "epoch": 1.51, "grad_norm": 0.506818950176239, "learning_rate": 0.00029546917976007014, "loss": 1.775, "step": 45519 }, { "epoch": 1.51, "grad_norm": 0.5217493772506714, "learning_rate": 0.0002954587288384245, "loss": 1.7757, "step": 45520 }, { "epoch": 1.51, "grad_norm": 0.5250235199928284, "learning_rate": 0.0002954482779222915, "loss": 1.6882, "step": 45521 }, { "epoch": 1.51, "grad_norm": 0.813809871673584, "learning_rate": 0.00029543782701168343, "loss": 1.7228, "step": 45522 }, { "epoch": 1.51, "grad_norm": 0.5513847470283508, "learning_rate": 0.0002954273761066132, "loss": 1.7327, "step": 45523 }, { "epoch": 1.51, "grad_norm": 0.5427996516227722, "learning_rate": 0.00029541692520709356, "loss": 1.7784, "step": 45524 }, { "epoch": 1.51, "grad_norm": 0.502072274684906, "learning_rate": 0.000295406474313137, "loss": 1.7494, "step": 45525 }, { "epoch": 1.51, "grad_norm": 0.5130504369735718, "learning_rate": 0.00029539602342475635, "loss": 1.7807, "step": 45526 }, { "epoch": 1.51, "grad_norm": 0.4988720715045929, "learning_rate": 0.00029538557254196426, "loss": 1.8142, "step": 45527 }, { "epoch": 1.51, "grad_norm": 0.5342446565628052, "learning_rate": 0.0002953751216647733, "loss": 1.7934, "step": 45528 }, { "epoch": 1.51, "grad_norm": 0.5045401453971863, "learning_rate": 0.0002953646707931963, "loss": 1.7627, "step": 45529 }, { "epoch": 1.51, "grad_norm": 0.5152125954627991, "learning_rate": 0.0002953542199272459, "loss": 1.7537, "step": 45530 }, { "epoch": 1.51, "grad_norm": 0.502564549446106, "learning_rate": 0.0002953437690669349, "loss": 1.8044, "step": 45531 }, { "epoch": 1.51, "grad_norm": 0.5152223110198975, "learning_rate": 0.0002953333182122758, "loss": 1.7828, "step": 45532 }, { "epoch": 1.51, "grad_norm": 0.5012432932853699, "learning_rate": 0.0002953228673632814, "loss": 1.7534, "step": 45533 }, { "epoch": 1.51, "grad_norm": 0.5375933647155762, "learning_rate": 0.00029531241651996435, "loss": 1.8562, "step": 45534 }, { "epoch": 1.51, "grad_norm": 0.5280209183692932, "learning_rate": 0.0002953019656823373, "loss": 1.8037, "step": 45535 }, { "epoch": 1.51, "grad_norm": 0.5214883089065552, "learning_rate": 0.0002952915148504129, "loss": 1.7415, "step": 45536 }, { "epoch": 1.52, "grad_norm": 0.528610348701477, "learning_rate": 0.00029528106402420406, "loss": 1.7924, "step": 45537 }, { "epoch": 1.52, "grad_norm": 0.5286482572555542, "learning_rate": 0.00029527061320372324, "loss": 1.7846, "step": 45538 }, { "epoch": 1.52, "grad_norm": 0.521304190158844, "learning_rate": 0.00029526016238898316, "loss": 1.7439, "step": 45539 }, { "epoch": 1.52, "grad_norm": 0.5307634472846985, "learning_rate": 0.00029524971157999656, "loss": 1.7654, "step": 45540 }, { "epoch": 1.52, "grad_norm": 0.5443506836891174, "learning_rate": 0.0002952392607767762, "loss": 1.8143, "step": 45541 }, { "epoch": 1.52, "grad_norm": 0.5310898423194885, "learning_rate": 0.0002952288099793346, "loss": 1.7407, "step": 45542 }, { "epoch": 1.52, "grad_norm": 0.5294637084007263, "learning_rate": 0.0002952183591876845, "loss": 1.7773, "step": 45543 }, { "epoch": 1.52, "grad_norm": 0.5242366194725037, "learning_rate": 0.0002952079084018387, "loss": 1.7664, "step": 45544 }, { "epoch": 1.52, "grad_norm": 0.5253761410713196, "learning_rate": 0.0002951974576218097, "loss": 1.7294, "step": 45545 }, { "epoch": 1.52, "grad_norm": 0.5152155756950378, "learning_rate": 0.0002951870068476103, "loss": 1.7635, "step": 45546 }, { "epoch": 1.52, "grad_norm": 0.498568594455719, "learning_rate": 0.00029517655607925325, "loss": 1.6852, "step": 45547 }, { "epoch": 1.52, "grad_norm": 0.5277354717254639, "learning_rate": 0.00029516610531675106, "loss": 1.8214, "step": 45548 }, { "epoch": 1.52, "grad_norm": 0.5591145157814026, "learning_rate": 0.00029515565456011647, "loss": 1.7708, "step": 45549 }, { "epoch": 1.52, "grad_norm": 0.5030719041824341, "learning_rate": 0.0002951452038093622, "loss": 1.7854, "step": 45550 }, { "epoch": 1.52, "grad_norm": 0.5239118337631226, "learning_rate": 0.000295134753064501, "loss": 1.7031, "step": 45551 }, { "epoch": 1.52, "grad_norm": 0.5094759464263916, "learning_rate": 0.00029512430232554553, "loss": 1.7808, "step": 45552 }, { "epoch": 1.52, "grad_norm": 0.544400691986084, "learning_rate": 0.0002951138515925084, "loss": 1.7776, "step": 45553 }, { "epoch": 1.52, "grad_norm": 0.5131879448890686, "learning_rate": 0.00029510340086540235, "loss": 1.7414, "step": 45554 }, { "epoch": 1.52, "grad_norm": 0.5183420181274414, "learning_rate": 0.00029509295014424003, "loss": 1.7557, "step": 45555 }, { "epoch": 1.52, "grad_norm": 0.5192046761512756, "learning_rate": 0.0002950824994290341, "loss": 1.7981, "step": 45556 }, { "epoch": 1.52, "grad_norm": 0.5101843476295471, "learning_rate": 0.0002950720487197974, "loss": 1.7287, "step": 45557 }, { "epoch": 1.52, "grad_norm": 0.5084757208824158, "learning_rate": 0.00029506159801654237, "loss": 1.7527, "step": 45558 }, { "epoch": 1.52, "grad_norm": 0.5277690291404724, "learning_rate": 0.0002950511473192819, "loss": 1.7839, "step": 45559 }, { "epoch": 1.52, "grad_norm": 0.5116581320762634, "learning_rate": 0.0002950406966280286, "loss": 1.7283, "step": 45560 }, { "epoch": 1.52, "grad_norm": 0.5299116969108582, "learning_rate": 0.0002950302459427952, "loss": 1.817, "step": 45561 }, { "epoch": 1.52, "grad_norm": 0.5148864984512329, "learning_rate": 0.00029501979526359433, "loss": 1.7676, "step": 45562 }, { "epoch": 1.52, "grad_norm": 0.5240505337715149, "learning_rate": 0.0002950093445904386, "loss": 1.7156, "step": 45563 }, { "epoch": 1.52, "grad_norm": 0.5335382223129272, "learning_rate": 0.000294998893923341, "loss": 1.7288, "step": 45564 }, { "epoch": 1.52, "grad_norm": 0.535265326499939, "learning_rate": 0.0002949884432623138, "loss": 1.8205, "step": 45565 }, { "epoch": 1.52, "grad_norm": 0.5027626156806946, "learning_rate": 0.00029497799260737, "loss": 1.7394, "step": 45566 }, { "epoch": 1.52, "grad_norm": 0.5050599575042725, "learning_rate": 0.00029496754195852215, "loss": 1.7469, "step": 45567 }, { "epoch": 1.52, "grad_norm": 0.5108196139335632, "learning_rate": 0.000294957091315783, "loss": 1.8082, "step": 45568 }, { "epoch": 1.52, "grad_norm": 0.49622321128845215, "learning_rate": 0.00029494664067916513, "loss": 1.7531, "step": 45569 }, { "epoch": 1.52, "grad_norm": 0.5045062303543091, "learning_rate": 0.00029493619004868126, "loss": 1.7843, "step": 45570 }, { "epoch": 1.52, "grad_norm": 0.5251822471618652, "learning_rate": 0.00029492573942434426, "loss": 1.691, "step": 45571 }, { "epoch": 1.52, "grad_norm": 0.5309235453605652, "learning_rate": 0.00029491528880616657, "loss": 1.8037, "step": 45572 }, { "epoch": 1.52, "grad_norm": 0.5397896766662598, "learning_rate": 0.00029490483819416096, "loss": 1.7583, "step": 45573 }, { "epoch": 1.52, "grad_norm": 0.5189245343208313, "learning_rate": 0.0002948943875883402, "loss": 1.7991, "step": 45574 }, { "epoch": 1.52, "grad_norm": 0.5196191668510437, "learning_rate": 0.00029488393698871683, "loss": 1.7851, "step": 45575 }, { "epoch": 1.52, "grad_norm": 0.5039832592010498, "learning_rate": 0.0002948734863953036, "loss": 1.7584, "step": 45576 }, { "epoch": 1.52, "grad_norm": 0.5141347646713257, "learning_rate": 0.0002948630358081133, "loss": 1.7536, "step": 45577 }, { "epoch": 1.52, "grad_norm": 0.5382463335990906, "learning_rate": 0.0002948525852271584, "loss": 1.8697, "step": 45578 }, { "epoch": 1.52, "grad_norm": 0.5260884761810303, "learning_rate": 0.0002948421346524517, "loss": 1.7097, "step": 45579 }, { "epoch": 1.52, "grad_norm": 0.514361560344696, "learning_rate": 0.0002948316840840059, "loss": 1.8374, "step": 45580 }, { "epoch": 1.52, "grad_norm": 0.5164036154747009, "learning_rate": 0.0002948212335218338, "loss": 1.7659, "step": 45581 }, { "epoch": 1.52, "grad_norm": 0.5191470384597778, "learning_rate": 0.00029481078296594783, "loss": 1.7676, "step": 45582 }, { "epoch": 1.52, "grad_norm": 0.5261328220367432, "learning_rate": 0.00029480033241636083, "loss": 1.7643, "step": 45583 }, { "epoch": 1.52, "grad_norm": 0.5342034101486206, "learning_rate": 0.0002947898818730855, "loss": 1.6938, "step": 45584 }, { "epoch": 1.52, "grad_norm": 0.5168399214744568, "learning_rate": 0.0002947794313361345, "loss": 1.7558, "step": 45585 }, { "epoch": 1.52, "grad_norm": 0.5271340012550354, "learning_rate": 0.00029476898080552035, "loss": 1.8435, "step": 45586 }, { "epoch": 1.52, "grad_norm": 0.5092307925224304, "learning_rate": 0.00029475853028125613, "loss": 1.7498, "step": 45587 }, { "epoch": 1.52, "grad_norm": 0.5041764378547668, "learning_rate": 0.0002947480797633541, "loss": 1.7751, "step": 45588 }, { "epoch": 1.52, "grad_norm": 0.5273481607437134, "learning_rate": 0.0002947376292518271, "loss": 1.7156, "step": 45589 }, { "epoch": 1.52, "grad_norm": 0.5345277190208435, "learning_rate": 0.00029472717874668797, "loss": 1.8109, "step": 45590 }, { "epoch": 1.52, "grad_norm": 0.5218107104301453, "learning_rate": 0.0002947167282479493, "loss": 1.863, "step": 45591 }, { "epoch": 1.52, "grad_norm": 0.530371367931366, "learning_rate": 0.0002947062777556236, "loss": 1.6686, "step": 45592 }, { "epoch": 1.52, "grad_norm": 0.5092036128044128, "learning_rate": 0.00029469582726972374, "loss": 1.7822, "step": 45593 }, { "epoch": 1.52, "grad_norm": 0.54035484790802, "learning_rate": 0.0002946853767902625, "loss": 1.7775, "step": 45594 }, { "epoch": 1.52, "grad_norm": 0.5357542037963867, "learning_rate": 0.0002946749263172522, "loss": 1.76, "step": 45595 }, { "epoch": 1.52, "grad_norm": 0.5331169962882996, "learning_rate": 0.0002946644758507059, "loss": 1.7437, "step": 45596 }, { "epoch": 1.52, "grad_norm": 0.4956004023551941, "learning_rate": 0.00029465402539063614, "loss": 1.8026, "step": 45597 }, { "epoch": 1.52, "grad_norm": 0.5242006182670593, "learning_rate": 0.00029464357493705557, "loss": 1.7517, "step": 45598 }, { "epoch": 1.52, "grad_norm": 0.49940359592437744, "learning_rate": 0.0002946331244899769, "loss": 1.7688, "step": 45599 }, { "epoch": 1.52, "grad_norm": 0.5314505696296692, "learning_rate": 0.0002946226740494128, "loss": 1.8015, "step": 45600 }, { "epoch": 1.52, "grad_norm": 0.5265033841133118, "learning_rate": 0.0002946122236153761, "loss": 1.7208, "step": 45601 }, { "epoch": 1.52, "grad_norm": 0.5096414685249329, "learning_rate": 0.0002946017731878792, "loss": 1.7614, "step": 45602 }, { "epoch": 1.52, "grad_norm": 0.5053752064704895, "learning_rate": 0.000294591322766935, "loss": 1.7337, "step": 45603 }, { "epoch": 1.52, "grad_norm": 0.5107653737068176, "learning_rate": 0.00029458087235255626, "loss": 1.7352, "step": 45604 }, { "epoch": 1.52, "grad_norm": 0.5272955894470215, "learning_rate": 0.00029457042194475547, "loss": 1.7979, "step": 45605 }, { "epoch": 1.52, "grad_norm": 0.499287486076355, "learning_rate": 0.00029455997154354533, "loss": 1.6567, "step": 45606 }, { "epoch": 1.52, "grad_norm": 0.5284212827682495, "learning_rate": 0.0002945495211489387, "loss": 1.8129, "step": 45607 }, { "epoch": 1.52, "grad_norm": 0.5166622400283813, "learning_rate": 0.00029453907076094807, "loss": 1.6805, "step": 45608 }, { "epoch": 1.52, "grad_norm": 0.5189738273620605, "learning_rate": 0.00029452862037958615, "loss": 1.7585, "step": 45609 }, { "epoch": 1.52, "grad_norm": 0.5338543653488159, "learning_rate": 0.0002945181700048657, "loss": 1.7824, "step": 45610 }, { "epoch": 1.52, "grad_norm": 0.5077405571937561, "learning_rate": 0.00029450771963679954, "loss": 1.7048, "step": 45611 }, { "epoch": 1.52, "grad_norm": 0.5175160765647888, "learning_rate": 0.00029449726927540006, "loss": 1.7598, "step": 45612 }, { "epoch": 1.52, "grad_norm": 0.5133218765258789, "learning_rate": 0.0002944868189206801, "loss": 1.7576, "step": 45613 }, { "epoch": 1.52, "grad_norm": 0.532283365726471, "learning_rate": 0.0002944763685726524, "loss": 1.7006, "step": 45614 }, { "epoch": 1.52, "grad_norm": 0.506493091583252, "learning_rate": 0.0002944659182313295, "loss": 1.8437, "step": 45615 }, { "epoch": 1.52, "grad_norm": 0.5502592325210571, "learning_rate": 0.0002944554678967241, "loss": 1.7263, "step": 45616 }, { "epoch": 1.52, "grad_norm": 0.5213389992713928, "learning_rate": 0.0002944450175688491, "loss": 1.7587, "step": 45617 }, { "epoch": 1.52, "grad_norm": 0.5117519497871399, "learning_rate": 0.0002944345672477169, "loss": 1.7341, "step": 45618 }, { "epoch": 1.52, "grad_norm": 0.5315895676612854, "learning_rate": 0.00029442411693334034, "loss": 1.7085, "step": 45619 }, { "epoch": 1.52, "grad_norm": 0.5129110813140869, "learning_rate": 0.0002944136666257321, "loss": 1.7413, "step": 45620 }, { "epoch": 1.52, "grad_norm": 0.551864743232727, "learning_rate": 0.0002944032163249049, "loss": 1.769, "step": 45621 }, { "epoch": 1.52, "grad_norm": 0.5438714027404785, "learning_rate": 0.0002943927660308713, "loss": 1.7558, "step": 45622 }, { "epoch": 1.52, "grad_norm": 0.5220921635627747, "learning_rate": 0.000294382315743644, "loss": 1.7731, "step": 45623 }, { "epoch": 1.52, "grad_norm": 0.5220059156417847, "learning_rate": 0.00029437186546323594, "loss": 1.7772, "step": 45624 }, { "epoch": 1.52, "grad_norm": 0.5159822702407837, "learning_rate": 0.00029436141518965944, "loss": 1.8198, "step": 45625 }, { "epoch": 1.52, "grad_norm": 0.5483803749084473, "learning_rate": 0.00029435096492292734, "loss": 1.8155, "step": 45626 }, { "epoch": 1.52, "grad_norm": 0.5206061005592346, "learning_rate": 0.0002943405146630525, "loss": 1.7667, "step": 45627 }, { "epoch": 1.52, "grad_norm": 0.5239843130111694, "learning_rate": 0.0002943300644100473, "loss": 1.6929, "step": 45628 }, { "epoch": 1.52, "grad_norm": 0.5079132318496704, "learning_rate": 0.0002943196141639246, "loss": 1.8455, "step": 45629 }, { "epoch": 1.52, "grad_norm": 0.5327935218811035, "learning_rate": 0.0002943091639246969, "loss": 1.7251, "step": 45630 }, { "epoch": 1.52, "grad_norm": 0.5336289405822754, "learning_rate": 0.00029429871369237735, "loss": 1.7673, "step": 45631 }, { "epoch": 1.52, "grad_norm": 0.5399436354637146, "learning_rate": 0.0002942882634669781, "loss": 1.7867, "step": 45632 }, { "epoch": 1.52, "grad_norm": 0.5039909482002258, "learning_rate": 0.0002942778132485121, "loss": 1.7197, "step": 45633 }, { "epoch": 1.52, "grad_norm": 0.5378638505935669, "learning_rate": 0.00029426736303699204, "loss": 1.7347, "step": 45634 }, { "epoch": 1.52, "grad_norm": 0.527230978012085, "learning_rate": 0.00029425691283243053, "loss": 1.6988, "step": 45635 }, { "epoch": 1.52, "grad_norm": 0.5099201798439026, "learning_rate": 0.0002942464626348403, "loss": 1.7156, "step": 45636 }, { "epoch": 1.52, "grad_norm": 0.5254093408584595, "learning_rate": 0.00029423601244423387, "loss": 1.7424, "step": 45637 }, { "epoch": 1.52, "grad_norm": 0.5212332010269165, "learning_rate": 0.00029422556226062435, "loss": 1.8074, "step": 45638 }, { "epoch": 1.52, "grad_norm": 0.5194609761238098, "learning_rate": 0.0002942151120840239, "loss": 1.8358, "step": 45639 }, { "epoch": 1.52, "grad_norm": 0.5207921862602234, "learning_rate": 0.0002942046619144455, "loss": 1.7497, "step": 45640 }, { "epoch": 1.52, "grad_norm": 0.5196473598480225, "learning_rate": 0.00029419421175190194, "loss": 1.8036, "step": 45641 }, { "epoch": 1.52, "grad_norm": 0.5610209107398987, "learning_rate": 0.00029418376159640557, "loss": 1.7538, "step": 45642 }, { "epoch": 1.52, "grad_norm": 0.5232513546943665, "learning_rate": 0.00029417331144796936, "loss": 1.8427, "step": 45643 }, { "epoch": 1.52, "grad_norm": 0.5054677128791809, "learning_rate": 0.0002941628613066059, "loss": 1.6986, "step": 45644 }, { "epoch": 1.52, "grad_norm": 0.5340973734855652, "learning_rate": 0.0002941524111723278, "loss": 1.8124, "step": 45645 }, { "epoch": 1.52, "grad_norm": 0.5422444343566895, "learning_rate": 0.00029414196104514775, "loss": 1.7266, "step": 45646 }, { "epoch": 1.52, "grad_norm": 0.5219157934188843, "learning_rate": 0.00029413151092507855, "loss": 1.703, "step": 45647 }, { "epoch": 1.52, "grad_norm": 0.5094372034072876, "learning_rate": 0.00029412106081213294, "loss": 1.7627, "step": 45648 }, { "epoch": 1.52, "grad_norm": 0.5194021463394165, "learning_rate": 0.0002941106107063234, "loss": 1.7174, "step": 45649 }, { "epoch": 1.52, "grad_norm": 0.5077676773071289, "learning_rate": 0.00029410016060766274, "loss": 1.745, "step": 45650 }, { "epoch": 1.52, "grad_norm": 0.5075641870498657, "learning_rate": 0.0002940897105161637, "loss": 1.7544, "step": 45651 }, { "epoch": 1.52, "grad_norm": 0.5290761590003967, "learning_rate": 0.00029407926043183873, "loss": 1.7669, "step": 45652 }, { "epoch": 1.52, "grad_norm": 0.5208267569541931, "learning_rate": 0.00029406881035470064, "loss": 1.8166, "step": 45653 }, { "epoch": 1.52, "grad_norm": 0.5114953517913818, "learning_rate": 0.0002940583602847623, "loss": 1.7618, "step": 45654 }, { "epoch": 1.52, "grad_norm": 0.5051758885383606, "learning_rate": 0.0002940479102220361, "loss": 1.7709, "step": 45655 }, { "epoch": 1.52, "grad_norm": 0.511605441570282, "learning_rate": 0.0002940374601665349, "loss": 1.7458, "step": 45656 }, { "epoch": 1.52, "grad_norm": 0.5114591121673584, "learning_rate": 0.0002940270101182714, "loss": 1.7698, "step": 45657 }, { "epoch": 1.52, "grad_norm": 0.5188332796096802, "learning_rate": 0.0002940165600772582, "loss": 1.8182, "step": 45658 }, { "epoch": 1.52, "grad_norm": 0.5131997466087341, "learning_rate": 0.00029400611004350803, "loss": 1.7336, "step": 45659 }, { "epoch": 1.52, "grad_norm": 0.5010486245155334, "learning_rate": 0.00029399566001703345, "loss": 1.686, "step": 45660 }, { "epoch": 1.52, "grad_norm": 0.5480026602745056, "learning_rate": 0.00029398520999784737, "loss": 1.7708, "step": 45661 }, { "epoch": 1.52, "grad_norm": 0.5293819904327393, "learning_rate": 0.0002939747599859623, "loss": 1.8469, "step": 45662 }, { "epoch": 1.52, "grad_norm": 0.5154200792312622, "learning_rate": 0.000293964309981391, "loss": 1.7847, "step": 45663 }, { "epoch": 1.52, "grad_norm": 0.5314050912857056, "learning_rate": 0.00029395385998414624, "loss": 1.7027, "step": 45664 }, { "epoch": 1.52, "grad_norm": 0.5061198472976685, "learning_rate": 0.0002939434099942405, "loss": 1.7166, "step": 45665 }, { "epoch": 1.52, "grad_norm": 0.5021461248397827, "learning_rate": 0.0002939329600116866, "loss": 1.7354, "step": 45666 }, { "epoch": 1.52, "grad_norm": 0.5341541171073914, "learning_rate": 0.00029392251003649704, "loss": 1.7864, "step": 45667 }, { "epoch": 1.52, "grad_norm": 0.539252519607544, "learning_rate": 0.00029391206006868485, "loss": 1.8091, "step": 45668 }, { "epoch": 1.52, "grad_norm": 0.5152081847190857, "learning_rate": 0.00029390161010826247, "loss": 1.7312, "step": 45669 }, { "epoch": 1.52, "grad_norm": 0.5296494364738464, "learning_rate": 0.00029389116015524265, "loss": 1.7387, "step": 45670 }, { "epoch": 1.52, "grad_norm": 0.5120826363563538, "learning_rate": 0.00029388071020963806, "loss": 1.7594, "step": 45671 }, { "epoch": 1.52, "grad_norm": 0.5400768518447876, "learning_rate": 0.00029387026027146134, "loss": 1.7591, "step": 45672 }, { "epoch": 1.52, "grad_norm": 0.5018709897994995, "learning_rate": 0.00029385981034072523, "loss": 1.738, "step": 45673 }, { "epoch": 1.52, "grad_norm": 0.5323362946510315, "learning_rate": 0.0002938493604174425, "loss": 1.748, "step": 45674 }, { "epoch": 1.52, "grad_norm": 0.5220975875854492, "learning_rate": 0.0002938389105016256, "loss": 1.7932, "step": 45675 }, { "epoch": 1.52, "grad_norm": 0.5186799168586731, "learning_rate": 0.0002938284605932874, "loss": 1.6582, "step": 45676 }, { "epoch": 1.52, "grad_norm": 0.5413762331008911, "learning_rate": 0.00029381801069244053, "loss": 1.7755, "step": 45677 }, { "epoch": 1.52, "grad_norm": 0.5224374532699585, "learning_rate": 0.00029380756079909774, "loss": 1.6642, "step": 45678 }, { "epoch": 1.52, "grad_norm": 0.5354576706886292, "learning_rate": 0.0002937971109132716, "loss": 1.7888, "step": 45679 }, { "epoch": 1.52, "grad_norm": 0.5178759694099426, "learning_rate": 0.00029378666103497484, "loss": 1.815, "step": 45680 }, { "epoch": 1.52, "grad_norm": 0.5234026312828064, "learning_rate": 0.0002937762111642203, "loss": 1.7712, "step": 45681 }, { "epoch": 1.52, "grad_norm": 0.5232546925544739, "learning_rate": 0.0002937657613010204, "loss": 1.7695, "step": 45682 }, { "epoch": 1.52, "grad_norm": 0.5054047703742981, "learning_rate": 0.00029375531144538796, "loss": 1.7551, "step": 45683 }, { "epoch": 1.52, "grad_norm": 0.6479407548904419, "learning_rate": 0.0002937448615973357, "loss": 1.8198, "step": 45684 }, { "epoch": 1.52, "grad_norm": 0.521403968334198, "learning_rate": 0.00029373441175687624, "loss": 1.75, "step": 45685 }, { "epoch": 1.52, "grad_norm": 0.5153884887695312, "learning_rate": 0.0002937239619240222, "loss": 1.7678, "step": 45686 }, { "epoch": 1.52, "grad_norm": 0.5420321226119995, "learning_rate": 0.00029371351209878636, "loss": 1.7246, "step": 45687 }, { "epoch": 1.52, "grad_norm": 0.5325813889503479, "learning_rate": 0.00029370306228118154, "loss": 1.732, "step": 45688 }, { "epoch": 1.52, "grad_norm": 0.5106265544891357, "learning_rate": 0.0002936926124712201, "loss": 1.7813, "step": 45689 }, { "epoch": 1.52, "grad_norm": 0.5087833404541016, "learning_rate": 0.00029368216266891493, "loss": 1.7652, "step": 45690 }, { "epoch": 1.52, "grad_norm": 0.5331458449363708, "learning_rate": 0.00029367171287427883, "loss": 1.7143, "step": 45691 }, { "epoch": 1.52, "grad_norm": 0.5244367718696594, "learning_rate": 0.0002936612630873242, "loss": 1.8025, "step": 45692 }, { "epoch": 1.52, "grad_norm": 0.5290507078170776, "learning_rate": 0.00029365081330806386, "loss": 1.7052, "step": 45693 }, { "epoch": 1.52, "grad_norm": 0.5366264581680298, "learning_rate": 0.0002936403635365106, "loss": 1.7226, "step": 45694 }, { "epoch": 1.52, "grad_norm": 0.5267601013183594, "learning_rate": 0.00029362991377267694, "loss": 1.8133, "step": 45695 }, { "epoch": 1.52, "grad_norm": 0.540264904499054, "learning_rate": 0.00029361946401657555, "loss": 1.7565, "step": 45696 }, { "epoch": 1.52, "grad_norm": 0.49883559346199036, "learning_rate": 0.00029360901426821927, "loss": 1.7194, "step": 45697 }, { "epoch": 1.52, "grad_norm": 0.5300329923629761, "learning_rate": 0.0002935985645276207, "loss": 1.7598, "step": 45698 }, { "epoch": 1.52, "grad_norm": 0.5108484625816345, "learning_rate": 0.00029358811479479255, "loss": 1.7762, "step": 45699 }, { "epoch": 1.52, "grad_norm": 0.5441526174545288, "learning_rate": 0.00029357766506974743, "loss": 1.8201, "step": 45700 }, { "epoch": 1.52, "grad_norm": 0.5092552900314331, "learning_rate": 0.00029356721535249815, "loss": 1.7065, "step": 45701 }, { "epoch": 1.52, "grad_norm": 0.5186766982078552, "learning_rate": 0.0002935567656430573, "loss": 1.6967, "step": 45702 }, { "epoch": 1.52, "grad_norm": 0.5200387835502625, "learning_rate": 0.00029354631594143745, "loss": 1.7311, "step": 45703 }, { "epoch": 1.52, "grad_norm": 0.5365074872970581, "learning_rate": 0.00029353586624765167, "loss": 1.7514, "step": 45704 }, { "epoch": 1.52, "grad_norm": 0.5099128484725952, "learning_rate": 0.0002935254165617122, "loss": 1.761, "step": 45705 }, { "epoch": 1.52, "grad_norm": 0.5149646401405334, "learning_rate": 0.00029351496688363195, "loss": 1.7201, "step": 45706 }, { "epoch": 1.52, "grad_norm": 0.5287970900535583, "learning_rate": 0.0002935045172134236, "loss": 1.7379, "step": 45707 }, { "epoch": 1.52, "grad_norm": 0.5172369480133057, "learning_rate": 0.0002934940675510999, "loss": 1.8935, "step": 45708 }, { "epoch": 1.52, "grad_norm": 0.5321260690689087, "learning_rate": 0.0002934836178966733, "loss": 1.6886, "step": 45709 }, { "epoch": 1.52, "grad_norm": 0.5245335102081299, "learning_rate": 0.00029347316825015663, "loss": 1.7828, "step": 45710 }, { "epoch": 1.52, "grad_norm": 0.5184544324874878, "learning_rate": 0.00029346271861156276, "loss": 1.7347, "step": 45711 }, { "epoch": 1.52, "grad_norm": 0.5214033722877502, "learning_rate": 0.000293452268980904, "loss": 1.738, "step": 45712 }, { "epoch": 1.52, "grad_norm": 0.5364152193069458, "learning_rate": 0.00029344181935819324, "loss": 1.8418, "step": 45713 }, { "epoch": 1.52, "grad_norm": 0.5208003520965576, "learning_rate": 0.00029343136974344325, "loss": 1.7059, "step": 45714 }, { "epoch": 1.52, "grad_norm": 0.5285216569900513, "learning_rate": 0.00029342092013666653, "loss": 1.7982, "step": 45715 }, { "epoch": 1.52, "grad_norm": 0.5249214768409729, "learning_rate": 0.00029341047053787583, "loss": 1.9076, "step": 45716 }, { "epoch": 1.52, "grad_norm": 0.5040318965911865, "learning_rate": 0.0002934000209470838, "loss": 1.7772, "step": 45717 }, { "epoch": 1.52, "grad_norm": 0.5325911641120911, "learning_rate": 0.0002933895713643034, "loss": 1.7306, "step": 45718 }, { "epoch": 1.52, "grad_norm": 0.5348753333091736, "learning_rate": 0.00029337912178954684, "loss": 1.6991, "step": 45719 }, { "epoch": 1.52, "grad_norm": 0.520076334476471, "learning_rate": 0.0002933686722228271, "loss": 1.737, "step": 45720 }, { "epoch": 1.52, "grad_norm": 0.5187458992004395, "learning_rate": 0.00029335822266415693, "loss": 1.7962, "step": 45721 }, { "epoch": 1.52, "grad_norm": 0.5104618072509766, "learning_rate": 0.00029334777311354883, "loss": 1.8444, "step": 45722 }, { "epoch": 1.52, "grad_norm": 0.5384918451309204, "learning_rate": 0.00029333732357101554, "loss": 1.7865, "step": 45723 }, { "epoch": 1.52, "grad_norm": 0.516745924949646, "learning_rate": 0.0002933268740365698, "loss": 1.794, "step": 45724 }, { "epoch": 1.52, "grad_norm": 0.5016891956329346, "learning_rate": 0.00029331642451022424, "loss": 1.6878, "step": 45725 }, { "epoch": 1.52, "grad_norm": 0.5081747770309448, "learning_rate": 0.00029330597499199144, "loss": 1.7285, "step": 45726 }, { "epoch": 1.52, "grad_norm": 0.5245945453643799, "learning_rate": 0.0002932955254818843, "loss": 1.7769, "step": 45727 }, { "epoch": 1.52, "grad_norm": 0.516968846321106, "learning_rate": 0.00029328507597991546, "loss": 1.7797, "step": 45728 }, { "epoch": 1.52, "grad_norm": 0.5466977953910828, "learning_rate": 0.0002932746264860974, "loss": 1.7037, "step": 45729 }, { "epoch": 1.52, "grad_norm": 0.5321241617202759, "learning_rate": 0.00029326417700044306, "loss": 1.823, "step": 45730 }, { "epoch": 1.52, "grad_norm": 0.5090082883834839, "learning_rate": 0.0002932537275229651, "loss": 1.7427, "step": 45731 }, { "epoch": 1.52, "grad_norm": 0.5436406135559082, "learning_rate": 0.000293243278053676, "loss": 1.746, "step": 45732 }, { "epoch": 1.52, "grad_norm": 0.5122254490852356, "learning_rate": 0.00029323282859258846, "loss": 1.7585, "step": 45733 }, { "epoch": 1.52, "grad_norm": 0.5235583782196045, "learning_rate": 0.00029322237913971555, "loss": 1.8293, "step": 45734 }, { "epoch": 1.52, "grad_norm": 0.5194175839424133, "learning_rate": 0.00029321192969506945, "loss": 1.7896, "step": 45735 }, { "epoch": 1.52, "grad_norm": 0.5309656262397766, "learning_rate": 0.0002932014802586631, "loss": 1.7669, "step": 45736 }, { "epoch": 1.52, "grad_norm": 0.5215542316436768, "learning_rate": 0.0002931910308305092, "loss": 1.7585, "step": 45737 }, { "epoch": 1.52, "grad_norm": 0.5308156609535217, "learning_rate": 0.0002931805814106204, "loss": 1.7551, "step": 45738 }, { "epoch": 1.52, "grad_norm": 0.5152751803398132, "learning_rate": 0.0002931701319990093, "loss": 1.7968, "step": 45739 }, { "epoch": 1.52, "grad_norm": 0.5272331833839417, "learning_rate": 0.0002931596825956886, "loss": 1.7554, "step": 45740 }, { "epoch": 1.52, "grad_norm": 0.5375294089317322, "learning_rate": 0.0002931492332006713, "loss": 1.7308, "step": 45741 }, { "epoch": 1.52, "grad_norm": 0.527614951133728, "learning_rate": 0.00029313878381396954, "loss": 1.7499, "step": 45742 }, { "epoch": 1.52, "grad_norm": 0.5283091068267822, "learning_rate": 0.0002931283344355964, "loss": 1.7715, "step": 45743 }, { "epoch": 1.52, "grad_norm": 0.5231155157089233, "learning_rate": 0.00029311788506556453, "loss": 1.6772, "step": 45744 }, { "epoch": 1.52, "grad_norm": 0.5198484659194946, "learning_rate": 0.0002931074357038864, "loss": 1.7105, "step": 45745 }, { "epoch": 1.52, "grad_norm": 0.5150761008262634, "learning_rate": 0.00029309698635057485, "loss": 1.7849, "step": 45746 }, { "epoch": 1.52, "grad_norm": 0.5167330503463745, "learning_rate": 0.00029308653700564256, "loss": 1.7221, "step": 45747 }, { "epoch": 1.52, "grad_norm": 0.5419381260871887, "learning_rate": 0.0002930760876691023, "loss": 1.7837, "step": 45748 }, { "epoch": 1.52, "grad_norm": 0.5615850687026978, "learning_rate": 0.00029306563834096647, "loss": 1.6617, "step": 45749 }, { "epoch": 1.52, "grad_norm": 0.5341291427612305, "learning_rate": 0.000293055189021248, "loss": 1.7993, "step": 45750 }, { "epoch": 1.52, "grad_norm": 0.5246905088424683, "learning_rate": 0.0002930447397099595, "loss": 1.7807, "step": 45751 }, { "epoch": 1.52, "grad_norm": 0.5267379283905029, "learning_rate": 0.00029303429040711373, "loss": 1.7115, "step": 45752 }, { "epoch": 1.52, "grad_norm": 0.5620472431182861, "learning_rate": 0.00029302384111272326, "loss": 1.7528, "step": 45753 }, { "epoch": 1.52, "grad_norm": 0.5217322707176208, "learning_rate": 0.0002930133918268007, "loss": 1.7515, "step": 45754 }, { "epoch": 1.52, "grad_norm": 0.536344051361084, "learning_rate": 0.00029300294254935914, "loss": 1.7788, "step": 45755 }, { "epoch": 1.52, "grad_norm": 0.5295045375823975, "learning_rate": 0.0002929924932804107, "loss": 1.735, "step": 45756 }, { "epoch": 1.52, "grad_norm": 0.5094068646430969, "learning_rate": 0.00029298204401996845, "loss": 1.7897, "step": 45757 }, { "epoch": 1.52, "grad_norm": 0.5228164196014404, "learning_rate": 0.00029297159476804503, "loss": 1.7963, "step": 45758 }, { "epoch": 1.52, "grad_norm": 0.5202387571334839, "learning_rate": 0.000292961145524653, "loss": 1.7946, "step": 45759 }, { "epoch": 1.52, "grad_norm": 0.5532516837120056, "learning_rate": 0.00029295069628980505, "loss": 1.8088, "step": 45760 }, { "epoch": 1.52, "grad_norm": 0.521107017993927, "learning_rate": 0.000292940247063514, "loss": 1.6716, "step": 45761 }, { "epoch": 1.52, "grad_norm": 0.5060588121414185, "learning_rate": 0.0002929297978457924, "loss": 1.6966, "step": 45762 }, { "epoch": 1.52, "grad_norm": 0.5417998433113098, "learning_rate": 0.0002929193486366529, "loss": 1.7403, "step": 45763 }, { "epoch": 1.52, "grad_norm": 0.5110008120536804, "learning_rate": 0.0002929088994361084, "loss": 1.7149, "step": 45764 }, { "epoch": 1.52, "grad_norm": 0.5120601654052734, "learning_rate": 0.00029289845024417147, "loss": 1.776, "step": 45765 }, { "epoch": 1.52, "grad_norm": 0.511624276638031, "learning_rate": 0.0002928880010608547, "loss": 1.7315, "step": 45766 }, { "epoch": 1.52, "grad_norm": 0.518110990524292, "learning_rate": 0.00029287755188617086, "loss": 1.8178, "step": 45767 }, { "epoch": 1.52, "grad_norm": 0.5157389044761658, "learning_rate": 0.0002928671027201327, "loss": 1.7041, "step": 45768 }, { "epoch": 1.52, "grad_norm": 0.5314480662345886, "learning_rate": 0.0002928566535627527, "loss": 1.7141, "step": 45769 }, { "epoch": 1.52, "grad_norm": 0.5058520436286926, "learning_rate": 0.00029284620441404367, "loss": 1.7726, "step": 45770 }, { "epoch": 1.52, "grad_norm": 0.5267660617828369, "learning_rate": 0.0002928357552740184, "loss": 1.7546, "step": 45771 }, { "epoch": 1.52, "grad_norm": 0.49875590205192566, "learning_rate": 0.0002928253061426894, "loss": 1.7021, "step": 45772 }, { "epoch": 1.52, "grad_norm": 0.5155830979347229, "learning_rate": 0.00029281485702006946, "loss": 1.7428, "step": 45773 }, { "epoch": 1.52, "grad_norm": 0.5145854949951172, "learning_rate": 0.00029280440790617116, "loss": 1.7326, "step": 45774 }, { "epoch": 1.52, "grad_norm": 0.5274412631988525, "learning_rate": 0.00029279395880100735, "loss": 1.77, "step": 45775 }, { "epoch": 1.52, "grad_norm": 0.5342397689819336, "learning_rate": 0.0002927835097045905, "loss": 1.7392, "step": 45776 }, { "epoch": 1.52, "grad_norm": 0.49876925349235535, "learning_rate": 0.00029277306061693337, "loss": 1.7618, "step": 45777 }, { "epoch": 1.52, "grad_norm": 0.5172969102859497, "learning_rate": 0.00029276261153804884, "loss": 1.8222, "step": 45778 }, { "epoch": 1.52, "grad_norm": 0.5451807975769043, "learning_rate": 0.0002927521624679494, "loss": 1.6758, "step": 45779 }, { "epoch": 1.52, "grad_norm": 0.5301844477653503, "learning_rate": 0.00029274171340664773, "loss": 1.7314, "step": 45780 }, { "epoch": 1.52, "grad_norm": 0.4998622238636017, "learning_rate": 0.0002927312643541566, "loss": 1.71, "step": 45781 }, { "epoch": 1.52, "grad_norm": 0.5210150480270386, "learning_rate": 0.00029272081531048856, "loss": 1.7747, "step": 45782 }, { "epoch": 1.52, "grad_norm": 0.5181472301483154, "learning_rate": 0.0002927103662756564, "loss": 1.7558, "step": 45783 }, { "epoch": 1.52, "grad_norm": 0.5308042764663696, "learning_rate": 0.0002926999172496727, "loss": 1.7806, "step": 45784 }, { "epoch": 1.52, "grad_norm": 0.5463235974311829, "learning_rate": 0.0002926894682325504, "loss": 1.7321, "step": 45785 }, { "epoch": 1.52, "grad_norm": 0.538542628288269, "learning_rate": 0.0002926790192243019, "loss": 1.7271, "step": 45786 }, { "epoch": 1.52, "grad_norm": 0.5337582230567932, "learning_rate": 0.00029266857022494, "loss": 1.7138, "step": 45787 }, { "epoch": 1.52, "grad_norm": 0.5210466384887695, "learning_rate": 0.00029265812123447743, "loss": 1.7621, "step": 45788 }, { "epoch": 1.52, "grad_norm": 0.5130430459976196, "learning_rate": 0.00029264767225292677, "loss": 1.7037, "step": 45789 }, { "epoch": 1.52, "grad_norm": 0.515746533870697, "learning_rate": 0.00029263722328030066, "loss": 1.798, "step": 45790 }, { "epoch": 1.52, "grad_norm": 0.5135074257850647, "learning_rate": 0.00029262677431661217, "loss": 1.7612, "step": 45791 }, { "epoch": 1.52, "grad_norm": 0.5070306658744812, "learning_rate": 0.0002926163253618734, "loss": 1.7622, "step": 45792 }, { "epoch": 1.52, "grad_norm": 0.533980667591095, "learning_rate": 0.0002926058764160974, "loss": 1.8054, "step": 45793 }, { "epoch": 1.52, "grad_norm": 0.5466170907020569, "learning_rate": 0.0002925954274792968, "loss": 1.7948, "step": 45794 }, { "epoch": 1.52, "grad_norm": 0.5353041291236877, "learning_rate": 0.0002925849785514843, "loss": 1.7726, "step": 45795 }, { "epoch": 1.52, "grad_norm": 0.5236415863037109, "learning_rate": 0.0002925745296326725, "loss": 1.7136, "step": 45796 }, { "epoch": 1.52, "grad_norm": 0.5166998505592346, "learning_rate": 0.00029256408072287406, "loss": 1.6918, "step": 45797 }, { "epoch": 1.52, "grad_norm": 0.5148779153823853, "learning_rate": 0.00029255363182210194, "loss": 1.6622, "step": 45798 }, { "epoch": 1.52, "grad_norm": 0.5249757766723633, "learning_rate": 0.00029254318293036837, "loss": 1.796, "step": 45799 }, { "epoch": 1.52, "grad_norm": 0.5239253640174866, "learning_rate": 0.0002925327340476864, "loss": 1.7561, "step": 45800 }, { "epoch": 1.52, "grad_norm": 0.5060327053070068, "learning_rate": 0.0002925222851740686, "loss": 1.7566, "step": 45801 }, { "epoch": 1.52, "grad_norm": 0.5305019021034241, "learning_rate": 0.0002925118363095276, "loss": 1.7774, "step": 45802 }, { "epoch": 1.52, "grad_norm": 0.5209240913391113, "learning_rate": 0.0002925013874540761, "loss": 1.7129, "step": 45803 }, { "epoch": 1.52, "grad_norm": 0.5441924929618835, "learning_rate": 0.0002924909386077268, "loss": 1.8283, "step": 45804 }, { "epoch": 1.52, "grad_norm": 0.5284571051597595, "learning_rate": 0.00029248048977049257, "loss": 1.8525, "step": 45805 }, { "epoch": 1.52, "grad_norm": 0.5128813982009888, "learning_rate": 0.00029247004094238574, "loss": 1.7745, "step": 45806 }, { "epoch": 1.52, "grad_norm": 0.5222726464271545, "learning_rate": 0.0002924595921234192, "loss": 1.7993, "step": 45807 }, { "epoch": 1.52, "grad_norm": 0.5337982773780823, "learning_rate": 0.0002924491433136057, "loss": 1.8426, "step": 45808 }, { "epoch": 1.52, "grad_norm": 0.5068994164466858, "learning_rate": 0.00029243869451295773, "loss": 1.7304, "step": 45809 }, { "epoch": 1.52, "grad_norm": 0.5198009610176086, "learning_rate": 0.0002924282457214881, "loss": 1.7417, "step": 45810 }, { "epoch": 1.52, "grad_norm": 0.5307517647743225, "learning_rate": 0.0002924177969392095, "loss": 1.7844, "step": 45811 }, { "epoch": 1.52, "grad_norm": 0.5154677629470825, "learning_rate": 0.00029240734816613455, "loss": 1.6587, "step": 45812 }, { "epoch": 1.52, "grad_norm": 0.5289175510406494, "learning_rate": 0.00029239689940227585, "loss": 1.7499, "step": 45813 }, { "epoch": 1.52, "grad_norm": 0.5241437554359436, "learning_rate": 0.00029238645064764626, "loss": 1.7408, "step": 45814 }, { "epoch": 1.52, "grad_norm": 0.5297373533248901, "learning_rate": 0.0002923760019022585, "loss": 1.7193, "step": 45815 }, { "epoch": 1.52, "grad_norm": 0.5197266936302185, "learning_rate": 0.0002923655531661251, "loss": 1.7734, "step": 45816 }, { "epoch": 1.52, "grad_norm": 0.5189009308815002, "learning_rate": 0.0002923551044392588, "loss": 1.6841, "step": 45817 }, { "epoch": 1.52, "grad_norm": 0.5164163708686829, "learning_rate": 0.00029234465572167226, "loss": 1.7023, "step": 45818 }, { "epoch": 1.52, "grad_norm": 0.5189070701599121, "learning_rate": 0.00029233420701337817, "loss": 1.7515, "step": 45819 }, { "epoch": 1.52, "grad_norm": 0.5433962941169739, "learning_rate": 0.00029232375831438913, "loss": 1.8014, "step": 45820 }, { "epoch": 1.52, "grad_norm": 0.512187659740448, "learning_rate": 0.00029231330962471816, "loss": 1.7268, "step": 45821 }, { "epoch": 1.52, "grad_norm": 0.5171290040016174, "learning_rate": 0.00029230286094437746, "loss": 1.772, "step": 45822 }, { "epoch": 1.52, "grad_norm": 0.5250971913337708, "learning_rate": 0.00029229241227338, "loss": 1.7287, "step": 45823 }, { "epoch": 1.52, "grad_norm": 0.500212550163269, "learning_rate": 0.0002922819636117385, "loss": 1.787, "step": 45824 }, { "epoch": 1.52, "grad_norm": 0.5300208330154419, "learning_rate": 0.00029227151495946557, "loss": 1.7482, "step": 45825 }, { "epoch": 1.52, "grad_norm": 0.5174624919891357, "learning_rate": 0.0002922610663165738, "loss": 1.7877, "step": 45826 }, { "epoch": 1.52, "grad_norm": 0.528183102607727, "learning_rate": 0.00029225061768307594, "loss": 1.7716, "step": 45827 }, { "epoch": 1.52, "grad_norm": 0.5128828883171082, "learning_rate": 0.00029224016905898484, "loss": 1.725, "step": 45828 }, { "epoch": 1.52, "grad_norm": 0.5205310583114624, "learning_rate": 0.00029222972044431285, "loss": 1.8144, "step": 45829 }, { "epoch": 1.52, "grad_norm": 0.5455302000045776, "learning_rate": 0.00029221927183907293, "loss": 1.7525, "step": 45830 }, { "epoch": 1.52, "grad_norm": 0.5481475591659546, "learning_rate": 0.00029220882324327766, "loss": 1.7276, "step": 45831 }, { "epoch": 1.52, "grad_norm": 0.5380759239196777, "learning_rate": 0.0002921983746569397, "loss": 1.7108, "step": 45832 }, { "epoch": 1.52, "grad_norm": 0.5192047953605652, "learning_rate": 0.0002921879260800718, "loss": 1.7606, "step": 45833 }, { "epoch": 1.52, "grad_norm": 0.5423923134803772, "learning_rate": 0.00029217747751268653, "loss": 1.7647, "step": 45834 }, { "epoch": 1.52, "grad_norm": 0.5325933694839478, "learning_rate": 0.0002921670289547968, "loss": 1.7443, "step": 45835 }, { "epoch": 1.52, "grad_norm": 0.534980833530426, "learning_rate": 0.000292156580406415, "loss": 1.7691, "step": 45836 }, { "epoch": 1.53, "grad_norm": 0.5174782872200012, "learning_rate": 0.00029214613186755395, "loss": 1.7979, "step": 45837 }, { "epoch": 1.53, "grad_norm": 0.507792592048645, "learning_rate": 0.00029213568333822645, "loss": 1.7623, "step": 45838 }, { "epoch": 1.53, "grad_norm": 0.5391439199447632, "learning_rate": 0.00029212523481844503, "loss": 1.7744, "step": 45839 }, { "epoch": 1.53, "grad_norm": 0.5209131836891174, "learning_rate": 0.0002921147863082224, "loss": 1.7587, "step": 45840 }, { "epoch": 1.53, "grad_norm": 0.5191512107849121, "learning_rate": 0.0002921043378075713, "loss": 1.8357, "step": 45841 }, { "epoch": 1.53, "grad_norm": 0.5578480958938599, "learning_rate": 0.00029209388931650433, "loss": 1.7562, "step": 45842 }, { "epoch": 1.53, "grad_norm": 0.5405688285827637, "learning_rate": 0.00029208344083503417, "loss": 1.7925, "step": 45843 }, { "epoch": 1.53, "grad_norm": 0.5397164225578308, "learning_rate": 0.0002920729923631735, "loss": 1.7448, "step": 45844 }, { "epoch": 1.53, "grad_norm": 0.5489735007286072, "learning_rate": 0.00029206254390093525, "loss": 1.7569, "step": 45845 }, { "epoch": 1.53, "grad_norm": 0.5312212109565735, "learning_rate": 0.00029205209544833175, "loss": 1.7305, "step": 45846 }, { "epoch": 1.53, "grad_norm": 0.517197847366333, "learning_rate": 0.00029204164700537586, "loss": 1.8062, "step": 45847 }, { "epoch": 1.53, "grad_norm": 0.5524534583091736, "learning_rate": 0.0002920311985720803, "loss": 1.8217, "step": 45848 }, { "epoch": 1.53, "grad_norm": 0.5300657749176025, "learning_rate": 0.00029202075014845766, "loss": 1.7452, "step": 45849 }, { "epoch": 1.53, "grad_norm": 0.5192834734916687, "learning_rate": 0.00029201030173452055, "loss": 1.7682, "step": 45850 }, { "epoch": 1.53, "grad_norm": 0.528097927570343, "learning_rate": 0.00029199985333028196, "loss": 1.8016, "step": 45851 }, { "epoch": 1.53, "grad_norm": 0.9423908591270447, "learning_rate": 0.00029198940493575424, "loss": 1.8724, "step": 45852 }, { "epoch": 1.53, "grad_norm": 0.5356619358062744, "learning_rate": 0.0002919789565509502, "loss": 1.7554, "step": 45853 }, { "epoch": 1.53, "grad_norm": 0.5287923812866211, "learning_rate": 0.0002919685081758825, "loss": 1.7978, "step": 45854 }, { "epoch": 1.53, "grad_norm": 0.5355850458145142, "learning_rate": 0.00029195805981056393, "loss": 1.7086, "step": 45855 }, { "epoch": 1.53, "grad_norm": 0.5104234218597412, "learning_rate": 0.00029194761145500704, "loss": 1.7474, "step": 45856 }, { "epoch": 1.53, "grad_norm": 0.5246514081954956, "learning_rate": 0.0002919371631092245, "loss": 1.8362, "step": 45857 }, { "epoch": 1.53, "grad_norm": 0.5151283144950867, "learning_rate": 0.00029192671477322923, "loss": 1.7939, "step": 45858 }, { "epoch": 1.53, "grad_norm": 0.5214217901229858, "learning_rate": 0.0002919162664470336, "loss": 1.8118, "step": 45859 }, { "epoch": 1.53, "grad_norm": 0.5274528861045837, "learning_rate": 0.0002919058181306505, "loss": 1.7289, "step": 45860 }, { "epoch": 1.53, "grad_norm": 0.5291566848754883, "learning_rate": 0.00029189536982409254, "loss": 1.7324, "step": 45861 }, { "epoch": 1.53, "grad_norm": 0.524932861328125, "learning_rate": 0.0002918849215273723, "loss": 1.7409, "step": 45862 }, { "epoch": 1.53, "grad_norm": 0.5249937176704407, "learning_rate": 0.0002918744732405027, "loss": 1.7267, "step": 45863 }, { "epoch": 1.53, "grad_norm": 0.5268562436103821, "learning_rate": 0.00029186402496349614, "loss": 1.7861, "step": 45864 }, { "epoch": 1.53, "grad_norm": 0.5266032218933105, "learning_rate": 0.0002918535766963657, "loss": 1.7084, "step": 45865 }, { "epoch": 1.53, "grad_norm": 0.5237935781478882, "learning_rate": 0.00029184312843912356, "loss": 1.7272, "step": 45866 }, { "epoch": 1.53, "grad_norm": 0.5362173914909363, "learning_rate": 0.0002918326801917828, "loss": 1.7536, "step": 45867 }, { "epoch": 1.53, "grad_norm": 0.5363187789916992, "learning_rate": 0.00029182223195435596, "loss": 1.6803, "step": 45868 }, { "epoch": 1.53, "grad_norm": 0.547012209892273, "learning_rate": 0.0002918117837268557, "loss": 1.7633, "step": 45869 }, { "epoch": 1.53, "grad_norm": 0.5538346767425537, "learning_rate": 0.0002918013355092947, "loss": 1.8761, "step": 45870 }, { "epoch": 1.53, "grad_norm": 0.516805112361908, "learning_rate": 0.0002917908873016858, "loss": 1.7558, "step": 45871 }, { "epoch": 1.53, "grad_norm": 0.5240009427070618, "learning_rate": 0.00029178043910404143, "loss": 1.7735, "step": 45872 }, { "epoch": 1.53, "grad_norm": 0.5582231879234314, "learning_rate": 0.00029176999091637434, "loss": 1.8311, "step": 45873 }, { "epoch": 1.53, "grad_norm": 0.5425276160240173, "learning_rate": 0.00029175954273869735, "loss": 1.7831, "step": 45874 }, { "epoch": 1.53, "grad_norm": 0.5584009885787964, "learning_rate": 0.0002917490945710231, "loss": 1.8203, "step": 45875 }, { "epoch": 1.53, "grad_norm": 0.5316953063011169, "learning_rate": 0.0002917386464133642, "loss": 1.7569, "step": 45876 }, { "epoch": 1.53, "grad_norm": 0.5808159112930298, "learning_rate": 0.00029172819826573336, "loss": 1.696, "step": 45877 }, { "epoch": 1.53, "grad_norm": 0.5485376715660095, "learning_rate": 0.0002917177501281433, "loss": 1.7279, "step": 45878 }, { "epoch": 1.53, "grad_norm": 0.5246627330780029, "learning_rate": 0.00029170730200060664, "loss": 1.7386, "step": 45879 }, { "epoch": 1.53, "grad_norm": 0.5370644927024841, "learning_rate": 0.000291696853883136, "loss": 1.6775, "step": 45880 }, { "epoch": 1.53, "grad_norm": 0.5271453857421875, "learning_rate": 0.00029168640577574426, "loss": 1.736, "step": 45881 }, { "epoch": 1.53, "grad_norm": 0.5524426102638245, "learning_rate": 0.00029167595767844406, "loss": 1.7636, "step": 45882 }, { "epoch": 1.53, "grad_norm": 0.5794365406036377, "learning_rate": 0.00029166550959124787, "loss": 1.8219, "step": 45883 }, { "epoch": 1.53, "grad_norm": 0.5729711055755615, "learning_rate": 0.00029165506151416864, "loss": 1.7832, "step": 45884 }, { "epoch": 1.53, "grad_norm": 0.532705545425415, "learning_rate": 0.00029164461344721894, "loss": 1.7321, "step": 45885 }, { "epoch": 1.53, "grad_norm": 0.5006541013717651, "learning_rate": 0.0002916341653904114, "loss": 1.7539, "step": 45886 }, { "epoch": 1.53, "grad_norm": 0.5026536583900452, "learning_rate": 0.0002916237173437587, "loss": 1.8436, "step": 45887 }, { "epoch": 1.53, "grad_norm": 0.54276442527771, "learning_rate": 0.00029161326930727367, "loss": 1.7205, "step": 45888 }, { "epoch": 1.53, "grad_norm": 0.5347708463668823, "learning_rate": 0.0002916028212809688, "loss": 1.7426, "step": 45889 }, { "epoch": 1.53, "grad_norm": 0.5383334159851074, "learning_rate": 0.0002915923732648569, "loss": 1.7804, "step": 45890 }, { "epoch": 1.53, "grad_norm": 0.519091784954071, "learning_rate": 0.0002915819252589507, "loss": 1.7164, "step": 45891 }, { "epoch": 1.53, "grad_norm": 0.5322967171669006, "learning_rate": 0.00029157147726326273, "loss": 1.7495, "step": 45892 }, { "epoch": 1.53, "grad_norm": 0.5305795073509216, "learning_rate": 0.0002915610292778058, "loss": 1.7549, "step": 45893 }, { "epoch": 1.53, "grad_norm": 0.552227258682251, "learning_rate": 0.0002915505813025924, "loss": 1.7455, "step": 45894 }, { "epoch": 1.53, "grad_norm": 0.5201681852340698, "learning_rate": 0.0002915401333376355, "loss": 1.8166, "step": 45895 }, { "epoch": 1.53, "grad_norm": 0.5264380574226379, "learning_rate": 0.00029152968538294756, "loss": 1.7261, "step": 45896 }, { "epoch": 1.53, "grad_norm": 0.5187246799468994, "learning_rate": 0.0002915192374385414, "loss": 1.7677, "step": 45897 }, { "epoch": 1.53, "grad_norm": 0.5228352546691895, "learning_rate": 0.0002915087895044296, "loss": 1.7876, "step": 45898 }, { "epoch": 1.53, "grad_norm": 0.554884135723114, "learning_rate": 0.00029149834158062485, "loss": 1.7174, "step": 45899 }, { "epoch": 1.53, "grad_norm": 0.5463038086891174, "learning_rate": 0.00029148789366713985, "loss": 1.7678, "step": 45900 }, { "epoch": 1.53, "grad_norm": 0.5195926427841187, "learning_rate": 0.00029147744576398725, "loss": 1.763, "step": 45901 }, { "epoch": 1.53, "grad_norm": 0.5145928263664246, "learning_rate": 0.0002914669978711799, "loss": 1.7251, "step": 45902 }, { "epoch": 1.53, "grad_norm": 0.5298007130622864, "learning_rate": 0.0002914565499887303, "loss": 1.7251, "step": 45903 }, { "epoch": 1.53, "grad_norm": 0.5479479432106018, "learning_rate": 0.0002914461021166512, "loss": 1.7569, "step": 45904 }, { "epoch": 1.53, "grad_norm": 0.5201276540756226, "learning_rate": 0.0002914356542549553, "loss": 1.8058, "step": 45905 }, { "epoch": 1.53, "grad_norm": 0.5283462405204773, "learning_rate": 0.0002914252064036552, "loss": 1.7848, "step": 45906 }, { "epoch": 1.53, "grad_norm": 0.5051793456077576, "learning_rate": 0.0002914147585627636, "loss": 1.7551, "step": 45907 }, { "epoch": 1.53, "grad_norm": 0.5189399719238281, "learning_rate": 0.0002914043107322934, "loss": 1.7776, "step": 45908 }, { "epoch": 1.53, "grad_norm": 0.5058698058128357, "learning_rate": 0.0002913938629122569, "loss": 1.7772, "step": 45909 }, { "epoch": 1.53, "grad_norm": 0.5159910917282104, "learning_rate": 0.000291383415102667, "loss": 1.7124, "step": 45910 }, { "epoch": 1.53, "grad_norm": 0.5287071466445923, "learning_rate": 0.0002913729673035364, "loss": 1.7793, "step": 45911 }, { "epoch": 1.53, "grad_norm": 0.5210043787956238, "learning_rate": 0.0002913625195148778, "loss": 1.7807, "step": 45912 }, { "epoch": 1.53, "grad_norm": 0.527753472328186, "learning_rate": 0.0002913520717367038, "loss": 1.7806, "step": 45913 }, { "epoch": 1.53, "grad_norm": 0.5123043060302734, "learning_rate": 0.00029134162396902703, "loss": 1.7025, "step": 45914 }, { "epoch": 1.53, "grad_norm": 0.5150045156478882, "learning_rate": 0.00029133117621186046, "loss": 1.7641, "step": 45915 }, { "epoch": 1.53, "grad_norm": 0.5123704075813293, "learning_rate": 0.00029132072846521634, "loss": 1.8056, "step": 45916 }, { "epoch": 1.53, "grad_norm": 0.5196936130523682, "learning_rate": 0.0002913102807291077, "loss": 1.7716, "step": 45917 }, { "epoch": 1.53, "grad_norm": 0.5301433801651001, "learning_rate": 0.0002912998330035471, "loss": 1.756, "step": 45918 }, { "epoch": 1.53, "grad_norm": 0.5160157680511475, "learning_rate": 0.0002912893852885472, "loss": 1.7809, "step": 45919 }, { "epoch": 1.53, "grad_norm": 0.5356852412223816, "learning_rate": 0.00029127893758412065, "loss": 1.6563, "step": 45920 }, { "epoch": 1.53, "grad_norm": 0.5269819498062134, "learning_rate": 0.0002912684898902801, "loss": 1.8274, "step": 45921 }, { "epoch": 1.53, "grad_norm": 0.5352648496627808, "learning_rate": 0.0002912580422070386, "loss": 1.7533, "step": 45922 }, { "epoch": 1.53, "grad_norm": 0.5489072203636169, "learning_rate": 0.0002912475945344083, "loss": 1.7226, "step": 45923 }, { "epoch": 1.53, "grad_norm": 0.5270512104034424, "learning_rate": 0.0002912371468724022, "loss": 1.8111, "step": 45924 }, { "epoch": 1.53, "grad_norm": 0.5051823854446411, "learning_rate": 0.000291226699221033, "loss": 1.7501, "step": 45925 }, { "epoch": 1.53, "grad_norm": 0.5064332485198975, "learning_rate": 0.00029121625158031323, "loss": 1.7427, "step": 45926 }, { "epoch": 1.53, "grad_norm": 0.523093044757843, "learning_rate": 0.0002912058039502556, "loss": 1.7678, "step": 45927 }, { "epoch": 1.53, "grad_norm": 0.915062665939331, "learning_rate": 0.00029119535633087296, "loss": 1.7593, "step": 45928 }, { "epoch": 1.53, "grad_norm": 0.5233694911003113, "learning_rate": 0.00029118490872217774, "loss": 1.7938, "step": 45929 }, { "epoch": 1.53, "grad_norm": 0.5091848969459534, "learning_rate": 0.0002911744611241827, "loss": 1.7712, "step": 45930 }, { "epoch": 1.53, "grad_norm": 0.5280020236968994, "learning_rate": 0.0002911640135369006, "loss": 1.7299, "step": 45931 }, { "epoch": 1.53, "grad_norm": 0.5160413384437561, "learning_rate": 0.0002911535659603442, "loss": 1.7781, "step": 45932 }, { "epoch": 1.53, "grad_norm": 0.5164037942886353, "learning_rate": 0.00029114311839452595, "loss": 1.7567, "step": 45933 }, { "epoch": 1.53, "grad_norm": 0.5145567059516907, "learning_rate": 0.0002911326708394587, "loss": 1.7204, "step": 45934 }, { "epoch": 1.53, "grad_norm": 0.5180590152740479, "learning_rate": 0.00029112222329515504, "loss": 1.7948, "step": 45935 }, { "epoch": 1.53, "grad_norm": 0.527801513671875, "learning_rate": 0.00029111177576162777, "loss": 1.7865, "step": 45936 }, { "epoch": 1.53, "grad_norm": 0.5310487747192383, "learning_rate": 0.00029110132823888935, "loss": 1.7512, "step": 45937 }, { "epoch": 1.53, "grad_norm": 0.5229851007461548, "learning_rate": 0.0002910908807269528, "loss": 1.7393, "step": 45938 }, { "epoch": 1.53, "grad_norm": 0.532331645488739, "learning_rate": 0.0002910804332258304, "loss": 1.7767, "step": 45939 }, { "epoch": 1.53, "grad_norm": 0.5102497935295105, "learning_rate": 0.00029106998573553516, "loss": 1.7879, "step": 45940 }, { "epoch": 1.53, "grad_norm": 0.5284210443496704, "learning_rate": 0.0002910595382560796, "loss": 1.7152, "step": 45941 }, { "epoch": 1.53, "grad_norm": 0.5174779295921326, "learning_rate": 0.00029104909078747653, "loss": 1.7806, "step": 45942 }, { "epoch": 1.53, "grad_norm": 0.517404317855835, "learning_rate": 0.0002910386433297385, "loss": 1.75, "step": 45943 }, { "epoch": 1.53, "grad_norm": 0.5405375361442566, "learning_rate": 0.00029102819588287815, "loss": 1.7674, "step": 45944 }, { "epoch": 1.53, "grad_norm": 0.519883930683136, "learning_rate": 0.0002910177484469084, "loss": 1.7425, "step": 45945 }, { "epoch": 1.53, "grad_norm": 0.533591091632843, "learning_rate": 0.00029100730102184165, "loss": 1.7086, "step": 45946 }, { "epoch": 1.53, "grad_norm": 1.6199740171432495, "learning_rate": 0.00029099685360769073, "loss": 1.7841, "step": 45947 }, { "epoch": 1.53, "grad_norm": 0.511292576789856, "learning_rate": 0.0002909864062044684, "loss": 1.7751, "step": 45948 }, { "epoch": 1.53, "grad_norm": 0.5198271870613098, "learning_rate": 0.00029097595881218716, "loss": 1.6573, "step": 45949 }, { "epoch": 1.53, "grad_norm": 0.5193297266960144, "learning_rate": 0.0002909655114308598, "loss": 1.7828, "step": 45950 }, { "epoch": 1.53, "grad_norm": 0.508152425289154, "learning_rate": 0.0002909550640604989, "loss": 1.78, "step": 45951 }, { "epoch": 1.53, "grad_norm": 0.5233796238899231, "learning_rate": 0.00029094461670111743, "loss": 1.762, "step": 45952 }, { "epoch": 1.53, "grad_norm": 1.0441113710403442, "learning_rate": 0.0002909341693527276, "loss": 1.8028, "step": 45953 }, { "epoch": 1.53, "grad_norm": 0.5022746324539185, "learning_rate": 0.0002909237220153425, "loss": 1.7657, "step": 45954 }, { "epoch": 1.53, "grad_norm": 0.5078235864639282, "learning_rate": 0.00029091327468897465, "loss": 1.7426, "step": 45955 }, { "epoch": 1.53, "grad_norm": 0.5294604301452637, "learning_rate": 0.0002909028273736368, "loss": 1.7958, "step": 45956 }, { "epoch": 1.53, "grad_norm": 0.5088943243026733, "learning_rate": 0.00029089238006934146, "loss": 1.8083, "step": 45957 }, { "epoch": 1.53, "grad_norm": 0.5208804607391357, "learning_rate": 0.0002908819327761015, "loss": 1.7754, "step": 45958 }, { "epoch": 1.53, "grad_norm": 0.5300201773643494, "learning_rate": 0.0002908714854939295, "loss": 1.734, "step": 45959 }, { "epoch": 1.53, "grad_norm": 0.5317090153694153, "learning_rate": 0.0002908610382228381, "loss": 1.7777, "step": 45960 }, { "epoch": 1.53, "grad_norm": 0.5981634259223938, "learning_rate": 0.00029085059096284013, "loss": 1.7004, "step": 45961 }, { "epoch": 1.53, "grad_norm": 0.5138413906097412, "learning_rate": 0.0002908401437139483, "loss": 1.831, "step": 45962 }, { "epoch": 1.53, "grad_norm": 0.5238447785377502, "learning_rate": 0.00029082969647617506, "loss": 1.7588, "step": 45963 }, { "epoch": 1.53, "grad_norm": 0.5363412499427795, "learning_rate": 0.0002908192492495332, "loss": 1.7865, "step": 45964 }, { "epoch": 1.53, "grad_norm": 0.5055342316627502, "learning_rate": 0.0002908088020340356, "loss": 1.7797, "step": 45965 }, { "epoch": 1.53, "grad_norm": 0.5255951881408691, "learning_rate": 0.0002907983548296946, "loss": 1.7713, "step": 45966 }, { "epoch": 1.53, "grad_norm": 0.5211371183395386, "learning_rate": 0.00029078790763652294, "loss": 1.7245, "step": 45967 }, { "epoch": 1.53, "grad_norm": 0.539282500743866, "learning_rate": 0.00029077746045453364, "loss": 1.7463, "step": 45968 }, { "epoch": 1.53, "grad_norm": 0.5240447521209717, "learning_rate": 0.00029076701328373895, "loss": 1.7757, "step": 45969 }, { "epoch": 1.53, "grad_norm": 0.5296128392219543, "learning_rate": 0.0002907565661241518, "loss": 1.7782, "step": 45970 }, { "epoch": 1.53, "grad_norm": 0.5196357369422913, "learning_rate": 0.0002907461189757849, "loss": 1.6883, "step": 45971 }, { "epoch": 1.53, "grad_norm": 0.5115443468093872, "learning_rate": 0.0002907356718386508, "loss": 1.7543, "step": 45972 }, { "epoch": 1.53, "grad_norm": 0.5408348441123962, "learning_rate": 0.0002907252247127622, "loss": 1.8463, "step": 45973 }, { "epoch": 1.53, "grad_norm": 0.5196276307106018, "learning_rate": 0.0002907147775981318, "loss": 1.7967, "step": 45974 }, { "epoch": 1.53, "grad_norm": 0.5336630344390869, "learning_rate": 0.0002907043304947724, "loss": 1.6557, "step": 45975 }, { "epoch": 1.53, "grad_norm": 0.5423303842544556, "learning_rate": 0.00029069388340269644, "loss": 1.8047, "step": 45976 }, { "epoch": 1.53, "grad_norm": 0.5220896005630493, "learning_rate": 0.00029068343632191677, "loss": 1.8443, "step": 45977 }, { "epoch": 1.53, "grad_norm": 0.5154044032096863, "learning_rate": 0.0002906729892524461, "loss": 1.743, "step": 45978 }, { "epoch": 1.53, "grad_norm": 0.5438756346702576, "learning_rate": 0.000290662542194297, "loss": 1.8508, "step": 45979 }, { "epoch": 1.53, "grad_norm": 0.5189386010169983, "learning_rate": 0.0002906520951474822, "loss": 1.7111, "step": 45980 }, { "epoch": 1.53, "grad_norm": 0.5376380085945129, "learning_rate": 0.0002906416481120143, "loss": 1.7967, "step": 45981 }, { "epoch": 1.53, "grad_norm": 0.5195854902267456, "learning_rate": 0.00029063120108790627, "loss": 1.7946, "step": 45982 }, { "epoch": 1.53, "grad_norm": 0.5190117955207825, "learning_rate": 0.00029062075407517037, "loss": 1.772, "step": 45983 }, { "epoch": 1.53, "grad_norm": 0.51905757188797, "learning_rate": 0.00029061030707381956, "loss": 1.7798, "step": 45984 }, { "epoch": 1.53, "grad_norm": 0.5286656618118286, "learning_rate": 0.00029059986008386654, "loss": 1.796, "step": 45985 }, { "epoch": 1.53, "grad_norm": 0.5318763852119446, "learning_rate": 0.00029058941310532377, "loss": 1.8172, "step": 45986 }, { "epoch": 1.53, "grad_norm": 0.5229482650756836, "learning_rate": 0.00029057896613820416, "loss": 1.7589, "step": 45987 }, { "epoch": 1.53, "grad_norm": 0.5359799861907959, "learning_rate": 0.0002905685191825203, "loss": 1.751, "step": 45988 }, { "epoch": 1.53, "grad_norm": 0.5417637228965759, "learning_rate": 0.0002905580722382848, "loss": 1.758, "step": 45989 }, { "epoch": 1.53, "grad_norm": 0.5432787537574768, "learning_rate": 0.0002905476253055104, "loss": 1.7641, "step": 45990 }, { "epoch": 1.53, "grad_norm": 0.5242860913276672, "learning_rate": 0.0002905371783842098, "loss": 1.7339, "step": 45991 }, { "epoch": 1.53, "grad_norm": 0.5500859022140503, "learning_rate": 0.0002905267314743958, "loss": 1.7776, "step": 45992 }, { "epoch": 1.53, "grad_norm": 0.5401445031166077, "learning_rate": 0.00029051628457608085, "loss": 1.6961, "step": 45993 }, { "epoch": 1.53, "grad_norm": 0.5907460451126099, "learning_rate": 0.0002905058376892777, "loss": 1.7989, "step": 45994 }, { "epoch": 1.53, "grad_norm": 0.5486685633659363, "learning_rate": 0.00029049539081399915, "loss": 1.8373, "step": 45995 }, { "epoch": 1.53, "grad_norm": 0.5257874131202698, "learning_rate": 0.00029048494395025773, "loss": 1.8835, "step": 45996 }, { "epoch": 1.53, "grad_norm": 0.5339658260345459, "learning_rate": 0.00029047449709806614, "loss": 1.7547, "step": 45997 }, { "epoch": 1.53, "grad_norm": 0.5300934910774231, "learning_rate": 0.00029046405025743714, "loss": 1.7036, "step": 45998 }, { "epoch": 1.53, "grad_norm": 0.5804429054260254, "learning_rate": 0.00029045360342838347, "loss": 1.786, "step": 45999 }, { "epoch": 1.53, "grad_norm": 0.5320484638214111, "learning_rate": 0.0002904431566109177, "loss": 1.7693, "step": 46000 }, { "epoch": 1.53, "grad_norm": 0.5596234798431396, "learning_rate": 0.00029043270980505244, "loss": 1.6902, "step": 46001 }, { "epoch": 1.53, "grad_norm": 0.5262794494628906, "learning_rate": 0.00029042226301080056, "loss": 1.7157, "step": 46002 }, { "epoch": 1.53, "grad_norm": 0.5461794137954712, "learning_rate": 0.0002904118162281746, "loss": 1.6942, "step": 46003 }, { "epoch": 1.53, "grad_norm": 0.5413588881492615, "learning_rate": 0.0002904013694571872, "loss": 1.7824, "step": 46004 }, { "epoch": 1.53, "grad_norm": 0.5299088358879089, "learning_rate": 0.00029039092269785125, "loss": 1.7673, "step": 46005 }, { "epoch": 1.53, "grad_norm": 0.5141512751579285, "learning_rate": 0.00029038047595017925, "loss": 1.7487, "step": 46006 }, { "epoch": 1.53, "grad_norm": 0.5045349597930908, "learning_rate": 0.0002903700292141839, "loss": 1.7392, "step": 46007 }, { "epoch": 1.53, "grad_norm": 0.5406931638717651, "learning_rate": 0.0002903595824898779, "loss": 1.8006, "step": 46008 }, { "epoch": 1.53, "grad_norm": 0.5239468812942505, "learning_rate": 0.0002903491357772741, "loss": 1.7802, "step": 46009 }, { "epoch": 1.53, "grad_norm": 0.6384530067443848, "learning_rate": 0.0002903386890763849, "loss": 1.7378, "step": 46010 }, { "epoch": 1.53, "grad_norm": 0.5298306345939636, "learning_rate": 0.00029032824238722305, "loss": 1.818, "step": 46011 }, { "epoch": 1.53, "grad_norm": 0.5173022747039795, "learning_rate": 0.00029031779570980144, "loss": 1.7471, "step": 46012 }, { "epoch": 1.53, "grad_norm": 0.5455970764160156, "learning_rate": 0.00029030734904413253, "loss": 1.7656, "step": 46013 }, { "epoch": 1.53, "grad_norm": 0.5445685386657715, "learning_rate": 0.000290296902390229, "loss": 1.8028, "step": 46014 }, { "epoch": 1.53, "grad_norm": 0.5154754519462585, "learning_rate": 0.00029028645574810376, "loss": 1.7483, "step": 46015 }, { "epoch": 1.53, "grad_norm": 0.5286401510238647, "learning_rate": 0.0002902760091177692, "loss": 1.7429, "step": 46016 }, { "epoch": 1.53, "grad_norm": 0.5040125846862793, "learning_rate": 0.0002902655624992382, "loss": 1.7917, "step": 46017 }, { "epoch": 1.53, "grad_norm": 0.5185711979866028, "learning_rate": 0.0002902551158925233, "loss": 1.7824, "step": 46018 }, { "epoch": 1.53, "grad_norm": 0.5190505385398865, "learning_rate": 0.0002902446692976373, "loss": 1.6515, "step": 46019 }, { "epoch": 1.53, "grad_norm": 0.5512483716011047, "learning_rate": 0.00029023422271459286, "loss": 1.795, "step": 46020 }, { "epoch": 1.53, "grad_norm": 0.5327761769294739, "learning_rate": 0.0002902237761434026, "loss": 1.6982, "step": 46021 }, { "epoch": 1.53, "grad_norm": 0.5710309743881226, "learning_rate": 0.0002902133295840793, "loss": 1.8116, "step": 46022 }, { "epoch": 1.53, "grad_norm": 0.5276721119880676, "learning_rate": 0.0002902028830366355, "loss": 1.702, "step": 46023 }, { "epoch": 1.53, "grad_norm": 0.5180999636650085, "learning_rate": 0.00029019243650108386, "loss": 1.7244, "step": 46024 }, { "epoch": 1.53, "grad_norm": 0.5176194906234741, "learning_rate": 0.0002901819899774374, "loss": 1.7785, "step": 46025 }, { "epoch": 1.53, "grad_norm": 0.5021487474441528, "learning_rate": 0.0002901715434657084, "loss": 1.7404, "step": 46026 }, { "epoch": 1.53, "grad_norm": 0.5025882720947266, "learning_rate": 0.0002901610969659097, "loss": 1.7143, "step": 46027 }, { "epoch": 1.53, "grad_norm": 0.5240271091461182, "learning_rate": 0.000290150650478054, "loss": 1.7264, "step": 46028 }, { "epoch": 1.53, "grad_norm": 2.578092575073242, "learning_rate": 0.000290140204002154, "loss": 1.7923, "step": 46029 }, { "epoch": 1.53, "grad_norm": 0.526458203792572, "learning_rate": 0.0002901297575382223, "loss": 1.8564, "step": 46030 }, { "epoch": 1.53, "grad_norm": 0.5190527439117432, "learning_rate": 0.0002901193110862716, "loss": 1.8323, "step": 46031 }, { "epoch": 1.53, "grad_norm": 0.5264995098114014, "learning_rate": 0.00029010886464631475, "loss": 1.7358, "step": 46032 }, { "epoch": 1.53, "grad_norm": 0.5200693011283875, "learning_rate": 0.00029009841821836405, "loss": 1.7738, "step": 46033 }, { "epoch": 1.53, "grad_norm": 0.5170716047286987, "learning_rate": 0.0002900879718024325, "loss": 1.7728, "step": 46034 }, { "epoch": 1.53, "grad_norm": 0.5313464403152466, "learning_rate": 0.00029007752539853275, "loss": 1.7709, "step": 46035 }, { "epoch": 1.53, "grad_norm": 0.516440212726593, "learning_rate": 0.00029006707900667737, "loss": 1.7707, "step": 46036 }, { "epoch": 1.53, "grad_norm": 0.513831377029419, "learning_rate": 0.0002900566326268791, "loss": 1.774, "step": 46037 }, { "epoch": 1.53, "grad_norm": 0.5149788856506348, "learning_rate": 0.0002900461862591506, "loss": 1.822, "step": 46038 }, { "epoch": 1.53, "grad_norm": 2.400779962539673, "learning_rate": 0.0002900357399035047, "loss": 1.8312, "step": 46039 }, { "epoch": 1.53, "grad_norm": 0.5301233530044556, "learning_rate": 0.0002900252935599537, "loss": 1.768, "step": 46040 }, { "epoch": 1.53, "grad_norm": 0.515335202217102, "learning_rate": 0.00029001484722851066, "loss": 1.7762, "step": 46041 }, { "epoch": 1.53, "grad_norm": 0.5273790955543518, "learning_rate": 0.00029000440090918813, "loss": 1.7679, "step": 46042 }, { "epoch": 1.53, "grad_norm": 0.5214560627937317, "learning_rate": 0.0002899939546019988, "loss": 1.705, "step": 46043 }, { "epoch": 1.53, "grad_norm": 0.5203147530555725, "learning_rate": 0.0002899835083069553, "loss": 1.7303, "step": 46044 }, { "epoch": 1.53, "grad_norm": 0.5444552302360535, "learning_rate": 0.0002899730620240704, "loss": 1.7434, "step": 46045 }, { "epoch": 1.53, "grad_norm": 0.5062960982322693, "learning_rate": 0.00028996261575335667, "loss": 1.7684, "step": 46046 }, { "epoch": 1.53, "grad_norm": 0.5162211060523987, "learning_rate": 0.00028995216949482676, "loss": 1.8226, "step": 46047 }, { "epoch": 1.53, "grad_norm": 0.5248503684997559, "learning_rate": 0.00028994172324849353, "loss": 1.8638, "step": 46048 }, { "epoch": 1.53, "grad_norm": 0.505598783493042, "learning_rate": 0.0002899312770143696, "loss": 1.7819, "step": 46049 }, { "epoch": 1.53, "grad_norm": 0.5420969128608704, "learning_rate": 0.0002899208307924676, "loss": 1.8115, "step": 46050 }, { "epoch": 1.53, "grad_norm": 0.5191406011581421, "learning_rate": 0.00028991038458280017, "loss": 1.6852, "step": 46051 }, { "epoch": 1.53, "grad_norm": 0.518812358379364, "learning_rate": 0.0002898999383853801, "loss": 1.773, "step": 46052 }, { "epoch": 1.53, "grad_norm": 0.5153418183326721, "learning_rate": 0.00028988949220022, "loss": 1.7028, "step": 46053 }, { "epoch": 1.53, "grad_norm": 0.5379036664962769, "learning_rate": 0.0002898790460273325, "loss": 1.7539, "step": 46054 }, { "epoch": 1.53, "grad_norm": 0.5198157429695129, "learning_rate": 0.00028986859986673054, "loss": 1.7231, "step": 46055 }, { "epoch": 1.53, "grad_norm": 0.5331823229789734, "learning_rate": 0.0002898581537184264, "loss": 1.8302, "step": 46056 }, { "epoch": 1.53, "grad_norm": 0.5136744379997253, "learning_rate": 0.000289847707582433, "loss": 1.7856, "step": 46057 }, { "epoch": 1.53, "grad_norm": 0.5275598168373108, "learning_rate": 0.00028983726145876304, "loss": 1.6949, "step": 46058 }, { "epoch": 1.53, "grad_norm": 0.5350505709648132, "learning_rate": 0.0002898268153474292, "loss": 1.833, "step": 46059 }, { "epoch": 1.53, "grad_norm": 0.5421485304832458, "learning_rate": 0.00028981636924844403, "loss": 1.7815, "step": 46060 }, { "epoch": 1.53, "grad_norm": 0.5514633059501648, "learning_rate": 0.00028980592316182023, "loss": 1.8228, "step": 46061 }, { "epoch": 1.53, "grad_norm": 0.5257564783096313, "learning_rate": 0.0002897954770875707, "loss": 1.7951, "step": 46062 }, { "epoch": 1.53, "grad_norm": 0.5085745453834534, "learning_rate": 0.0002897850310257078, "loss": 1.8138, "step": 46063 }, { "epoch": 1.53, "grad_norm": 0.5274353623390198, "learning_rate": 0.00028977458497624444, "loss": 1.7383, "step": 46064 }, { "epoch": 1.53, "grad_norm": 0.5532341599464417, "learning_rate": 0.0002897641389391933, "loss": 1.8808, "step": 46065 }, { "epoch": 1.53, "grad_norm": 0.5311374664306641, "learning_rate": 0.0002897536929145669, "loss": 1.7838, "step": 46066 }, { "epoch": 1.53, "grad_norm": 0.5217092037200928, "learning_rate": 0.00028974324690237796, "loss": 1.7387, "step": 46067 }, { "epoch": 1.53, "grad_norm": 0.542283833026886, "learning_rate": 0.0002897328009026392, "loss": 1.753, "step": 46068 }, { "epoch": 1.53, "grad_norm": 0.5238398909568787, "learning_rate": 0.0002897223549153635, "loss": 1.8078, "step": 46069 }, { "epoch": 1.53, "grad_norm": 0.5321604609489441, "learning_rate": 0.0002897119089405631, "loss": 1.7361, "step": 46070 }, { "epoch": 1.53, "grad_norm": 0.5345132946968079, "learning_rate": 0.00028970146297825107, "loss": 1.7477, "step": 46071 }, { "epoch": 1.53, "grad_norm": 0.5260015726089478, "learning_rate": 0.00028969101702844, "loss": 1.8363, "step": 46072 }, { "epoch": 1.53, "grad_norm": 0.5044436454772949, "learning_rate": 0.00028968057109114237, "loss": 1.7984, "step": 46073 }, { "epoch": 1.53, "grad_norm": 0.5608265995979309, "learning_rate": 0.0002896701251663711, "loss": 1.7246, "step": 46074 }, { "epoch": 1.53, "grad_norm": 0.5267060995101929, "learning_rate": 0.00028965967925413883, "loss": 1.7803, "step": 46075 }, { "epoch": 1.53, "grad_norm": 0.5217353701591492, "learning_rate": 0.00028964923335445805, "loss": 1.7261, "step": 46076 }, { "epoch": 1.53, "grad_norm": 0.5535895824432373, "learning_rate": 0.00028963878746734155, "loss": 1.8045, "step": 46077 }, { "epoch": 1.53, "grad_norm": 0.514653205871582, "learning_rate": 0.0002896283415928021, "loss": 1.7431, "step": 46078 }, { "epoch": 1.53, "grad_norm": 0.5290655493736267, "learning_rate": 0.0002896178957308524, "loss": 1.6758, "step": 46079 }, { "epoch": 1.53, "grad_norm": 0.5162631273269653, "learning_rate": 0.0002896074498815049, "loss": 1.734, "step": 46080 }, { "epoch": 1.53, "grad_norm": 0.538601279258728, "learning_rate": 0.00028959700404477255, "loss": 1.8103, "step": 46081 }, { "epoch": 1.53, "grad_norm": 0.5109067559242249, "learning_rate": 0.0002895865582206679, "loss": 1.7664, "step": 46082 }, { "epoch": 1.53, "grad_norm": 0.5421122908592224, "learning_rate": 0.0002895761124092036, "loss": 1.7833, "step": 46083 }, { "epoch": 1.53, "grad_norm": 0.530778169631958, "learning_rate": 0.0002895656666103923, "loss": 1.7099, "step": 46084 }, { "epoch": 1.53, "grad_norm": 0.5287036895751953, "learning_rate": 0.0002895552208242469, "loss": 1.729, "step": 46085 }, { "epoch": 1.53, "grad_norm": 0.5167496204376221, "learning_rate": 0.0002895447750507797, "loss": 1.7701, "step": 46086 }, { "epoch": 1.53, "grad_norm": 0.5306955575942993, "learning_rate": 0.0002895343292900037, "loss": 1.7055, "step": 46087 }, { "epoch": 1.53, "grad_norm": 0.5490549802780151, "learning_rate": 0.00028952388354193154, "loss": 1.728, "step": 46088 }, { "epoch": 1.53, "grad_norm": 0.546994686126709, "learning_rate": 0.00028951343780657585, "loss": 1.7778, "step": 46089 }, { "epoch": 1.53, "grad_norm": 0.5164962410926819, "learning_rate": 0.00028950299208394925, "loss": 1.7327, "step": 46090 }, { "epoch": 1.53, "grad_norm": 0.5586389899253845, "learning_rate": 0.0002894925463740644, "loss": 1.7961, "step": 46091 }, { "epoch": 1.53, "grad_norm": 0.5458754301071167, "learning_rate": 0.0002894821006769342, "loss": 1.7249, "step": 46092 }, { "epoch": 1.53, "grad_norm": 0.5159302949905396, "learning_rate": 0.00028947165499257105, "loss": 1.7696, "step": 46093 }, { "epoch": 1.53, "grad_norm": 0.5218983292579651, "learning_rate": 0.0002894612093209878, "loss": 1.7746, "step": 46094 }, { "epoch": 1.53, "grad_norm": 0.53041672706604, "learning_rate": 0.0002894507636621972, "loss": 1.7068, "step": 46095 }, { "epoch": 1.53, "grad_norm": 0.5149569511413574, "learning_rate": 0.0002894403180162117, "loss": 1.6475, "step": 46096 }, { "epoch": 1.53, "grad_norm": 0.5072850584983826, "learning_rate": 0.0002894298723830441, "loss": 1.6762, "step": 46097 }, { "epoch": 1.53, "grad_norm": 0.5123111009597778, "learning_rate": 0.00028941942676270706, "loss": 1.7436, "step": 46098 }, { "epoch": 1.53, "grad_norm": 0.5111558437347412, "learning_rate": 0.0002894089811552134, "loss": 1.7187, "step": 46099 }, { "epoch": 1.53, "grad_norm": 0.5246598124504089, "learning_rate": 0.0002893985355605755, "loss": 1.7427, "step": 46100 }, { "epoch": 1.53, "grad_norm": 0.5084778070449829, "learning_rate": 0.0002893880899788063, "loss": 1.7116, "step": 46101 }, { "epoch": 1.53, "grad_norm": 0.5105836391448975, "learning_rate": 0.0002893776444099185, "loss": 1.6531, "step": 46102 }, { "epoch": 1.53, "grad_norm": 0.5178895592689514, "learning_rate": 0.00028936719885392455, "loss": 1.802, "step": 46103 }, { "epoch": 1.53, "grad_norm": 0.5020015239715576, "learning_rate": 0.0002893567533108373, "loss": 1.6913, "step": 46104 }, { "epoch": 1.53, "grad_norm": 0.5426477789878845, "learning_rate": 0.0002893463077806694, "loss": 1.706, "step": 46105 }, { "epoch": 1.53, "grad_norm": 0.5103890895843506, "learning_rate": 0.00028933586226343345, "loss": 1.7952, "step": 46106 }, { "epoch": 1.53, "grad_norm": 0.5170261859893799, "learning_rate": 0.00028932541675914217, "loss": 1.7195, "step": 46107 }, { "epoch": 1.53, "grad_norm": 0.5142325758934021, "learning_rate": 0.0002893149712678083, "loss": 1.7128, "step": 46108 }, { "epoch": 1.53, "grad_norm": 0.5032011866569519, "learning_rate": 0.00028930452578944453, "loss": 1.7449, "step": 46109 }, { "epoch": 1.53, "grad_norm": 0.5368678569793701, "learning_rate": 0.0002892940803240634, "loss": 1.7908, "step": 46110 }, { "epoch": 1.53, "grad_norm": 0.5276722311973572, "learning_rate": 0.0002892836348716778, "loss": 1.8154, "step": 46111 }, { "epoch": 1.53, "grad_norm": 0.539676308631897, "learning_rate": 0.00028927318943230024, "loss": 1.8075, "step": 46112 }, { "epoch": 1.53, "grad_norm": 0.5256733298301697, "learning_rate": 0.0002892627440059434, "loss": 1.686, "step": 46113 }, { "epoch": 1.53, "grad_norm": 0.5149873495101929, "learning_rate": 0.00028925229859261996, "loss": 1.7195, "step": 46114 }, { "epoch": 1.53, "grad_norm": 0.5250647664070129, "learning_rate": 0.0002892418531923428, "loss": 1.7971, "step": 46115 }, { "epoch": 1.53, "grad_norm": 0.5108292102813721, "learning_rate": 0.00028923140780512434, "loss": 1.8178, "step": 46116 }, { "epoch": 1.53, "grad_norm": 0.5098222494125366, "learning_rate": 0.0002892209624309774, "loss": 1.7708, "step": 46117 }, { "epoch": 1.53, "grad_norm": 0.5256657004356384, "learning_rate": 0.0002892105170699146, "loss": 1.712, "step": 46118 }, { "epoch": 1.53, "grad_norm": 0.5480225086212158, "learning_rate": 0.00028920007172194873, "loss": 1.7842, "step": 46119 }, { "epoch": 1.53, "grad_norm": 0.5202293992042542, "learning_rate": 0.00028918962638709223, "loss": 1.74, "step": 46120 }, { "epoch": 1.53, "grad_norm": 0.5393056273460388, "learning_rate": 0.00028917918106535794, "loss": 1.7501, "step": 46121 }, { "epoch": 1.53, "grad_norm": 0.5277137756347656, "learning_rate": 0.00028916873575675865, "loss": 1.7468, "step": 46122 }, { "epoch": 1.53, "grad_norm": 0.5097799301147461, "learning_rate": 0.0002891582904613069, "loss": 1.6826, "step": 46123 }, { "epoch": 1.53, "grad_norm": 0.5139756798744202, "learning_rate": 0.00028914784517901533, "loss": 1.7704, "step": 46124 }, { "epoch": 1.53, "grad_norm": 0.5344311594963074, "learning_rate": 0.00028913739990989667, "loss": 1.7631, "step": 46125 }, { "epoch": 1.53, "grad_norm": 0.5307592749595642, "learning_rate": 0.0002891269546539637, "loss": 1.7812, "step": 46126 }, { "epoch": 1.53, "grad_norm": 0.5523102283477783, "learning_rate": 0.0002891165094112289, "loss": 1.7928, "step": 46127 }, { "epoch": 1.53, "grad_norm": 0.5201991200447083, "learning_rate": 0.00028910606418170506, "loss": 1.7369, "step": 46128 }, { "epoch": 1.53, "grad_norm": 0.5267682075500488, "learning_rate": 0.00028909561896540493, "loss": 1.7706, "step": 46129 }, { "epoch": 1.53, "grad_norm": 0.5183048844337463, "learning_rate": 0.00028908517376234106, "loss": 1.7657, "step": 46130 }, { "epoch": 1.53, "grad_norm": 0.5136911869049072, "learning_rate": 0.0002890747285725262, "loss": 1.754, "step": 46131 }, { "epoch": 1.53, "grad_norm": 0.5275100469589233, "learning_rate": 0.00028906428339597304, "loss": 1.7234, "step": 46132 }, { "epoch": 1.53, "grad_norm": 0.5209907293319702, "learning_rate": 0.0002890538382326942, "loss": 1.8111, "step": 46133 }, { "epoch": 1.53, "grad_norm": 0.524870753288269, "learning_rate": 0.0002890433930827023, "loss": 1.7507, "step": 46134 }, { "epoch": 1.53, "grad_norm": 0.5313686728477478, "learning_rate": 0.00028903294794601015, "loss": 1.8384, "step": 46135 }, { "epoch": 1.53, "grad_norm": 0.5446987152099609, "learning_rate": 0.0002890225028226305, "loss": 1.7165, "step": 46136 }, { "epoch": 1.53, "grad_norm": 0.5243033170700073, "learning_rate": 0.0002890120577125758, "loss": 1.6454, "step": 46137 }, { "epoch": 1.54, "grad_norm": 0.5314646363258362, "learning_rate": 0.00028900161261585886, "loss": 1.8221, "step": 46138 }, { "epoch": 1.54, "grad_norm": 0.5473470687866211, "learning_rate": 0.00028899116753249243, "loss": 1.7569, "step": 46139 }, { "epoch": 1.54, "grad_norm": 0.5415840744972229, "learning_rate": 0.000288980722462489, "loss": 1.7391, "step": 46140 }, { "epoch": 1.54, "grad_norm": 0.5273110270500183, "learning_rate": 0.0002889702774058613, "loss": 1.7073, "step": 46141 }, { "epoch": 1.54, "grad_norm": 0.5195149779319763, "learning_rate": 0.0002889598323626223, "loss": 1.7979, "step": 46142 }, { "epoch": 1.54, "grad_norm": 0.5087250471115112, "learning_rate": 0.00028894938733278416, "loss": 1.8007, "step": 46143 }, { "epoch": 1.54, "grad_norm": 0.5157696008682251, "learning_rate": 0.00028893894231635996, "loss": 1.7303, "step": 46144 }, { "epoch": 1.54, "grad_norm": 0.5111384987831116, "learning_rate": 0.0002889284973133622, "loss": 1.7358, "step": 46145 }, { "epoch": 1.54, "grad_norm": 0.5037328004837036, "learning_rate": 0.00028891805232380375, "loss": 1.7531, "step": 46146 }, { "epoch": 1.54, "grad_norm": 0.528674840927124, "learning_rate": 0.000288907607347697, "loss": 1.729, "step": 46147 }, { "epoch": 1.54, "grad_norm": 0.5028572082519531, "learning_rate": 0.00028889716238505477, "loss": 1.686, "step": 46148 }, { "epoch": 1.54, "grad_norm": 0.5254361033439636, "learning_rate": 0.00028888671743588996, "loss": 1.78, "step": 46149 }, { "epoch": 1.54, "grad_norm": 0.530604898929596, "learning_rate": 0.0002888762725002148, "loss": 1.7474, "step": 46150 }, { "epoch": 1.54, "grad_norm": 0.5369373559951782, "learning_rate": 0.0002888658275780423, "loss": 1.7022, "step": 46151 }, { "epoch": 1.54, "grad_norm": 0.5164052248001099, "learning_rate": 0.00028885538266938514, "loss": 1.7668, "step": 46152 }, { "epoch": 1.54, "grad_norm": 0.5049262046813965, "learning_rate": 0.0002888449377742557, "loss": 1.7314, "step": 46153 }, { "epoch": 1.54, "grad_norm": 0.5299771428108215, "learning_rate": 0.000288834492892667, "loss": 1.7775, "step": 46154 }, { "epoch": 1.54, "grad_norm": 0.5342257618904114, "learning_rate": 0.0002888240480246315, "loss": 1.7769, "step": 46155 }, { "epoch": 1.54, "grad_norm": 0.4922426640987396, "learning_rate": 0.0002888136031701621, "loss": 1.7787, "step": 46156 }, { "epoch": 1.54, "grad_norm": 0.498625248670578, "learning_rate": 0.0002888031583292712, "loss": 1.6905, "step": 46157 }, { "epoch": 1.54, "grad_norm": 0.511372983455658, "learning_rate": 0.00028879271350197165, "loss": 1.8179, "step": 46158 }, { "epoch": 1.54, "grad_norm": 0.5085194706916809, "learning_rate": 0.0002887822686882762, "loss": 1.7352, "step": 46159 }, { "epoch": 1.54, "grad_norm": 0.5312691926956177, "learning_rate": 0.0002887718238881973, "loss": 1.8771, "step": 46160 }, { "epoch": 1.54, "grad_norm": 0.5833092331886292, "learning_rate": 0.0002887613791017477, "loss": 1.7421, "step": 46161 }, { "epoch": 1.54, "grad_norm": 0.5374385714530945, "learning_rate": 0.00028875093432894033, "loss": 1.7732, "step": 46162 }, { "epoch": 1.54, "grad_norm": 0.5234182476997375, "learning_rate": 0.00028874048956978755, "loss": 1.7063, "step": 46163 }, { "epoch": 1.54, "grad_norm": 0.5605022311210632, "learning_rate": 0.00028873004482430205, "loss": 1.7595, "step": 46164 }, { "epoch": 1.54, "grad_norm": 0.5332157611846924, "learning_rate": 0.0002887196000924967, "loss": 1.7417, "step": 46165 }, { "epoch": 1.54, "grad_norm": 0.5508340001106262, "learning_rate": 0.00028870915537438415, "loss": 1.6925, "step": 46166 }, { "epoch": 1.54, "grad_norm": 0.5205522775650024, "learning_rate": 0.000288698710669977, "loss": 1.7046, "step": 46167 }, { "epoch": 1.54, "grad_norm": 0.5195022225379944, "learning_rate": 0.0002886882659792879, "loss": 1.7733, "step": 46168 }, { "epoch": 1.54, "grad_norm": 0.5332536697387695, "learning_rate": 0.00028867782130232974, "loss": 1.6769, "step": 46169 }, { "epoch": 1.54, "grad_norm": 0.5526461005210876, "learning_rate": 0.00028866737663911485, "loss": 1.8098, "step": 46170 }, { "epoch": 1.54, "grad_norm": 0.5419982075691223, "learning_rate": 0.0002886569319896561, "loss": 1.7592, "step": 46171 }, { "epoch": 1.54, "grad_norm": 0.49984297156333923, "learning_rate": 0.00028864648735396636, "loss": 1.773, "step": 46172 }, { "epoch": 1.54, "grad_norm": 0.505836546421051, "learning_rate": 0.0002886360427320579, "loss": 1.7979, "step": 46173 }, { "epoch": 1.54, "grad_norm": 0.5074307918548584, "learning_rate": 0.00028862559812394364, "loss": 1.7902, "step": 46174 }, { "epoch": 1.54, "grad_norm": 0.5625159740447998, "learning_rate": 0.00028861515352963626, "loss": 1.8245, "step": 46175 }, { "epoch": 1.54, "grad_norm": 0.5201460719108582, "learning_rate": 0.0002886047089491485, "loss": 1.7902, "step": 46176 }, { "epoch": 1.54, "grad_norm": 0.5244845747947693, "learning_rate": 0.0002885942643824928, "loss": 1.7824, "step": 46177 }, { "epoch": 1.54, "grad_norm": 0.53502357006073, "learning_rate": 0.000288583819829682, "loss": 1.8437, "step": 46178 }, { "epoch": 1.54, "grad_norm": 0.5190637111663818, "learning_rate": 0.0002885733752907289, "loss": 1.7275, "step": 46179 }, { "epoch": 1.54, "grad_norm": 0.5371735692024231, "learning_rate": 0.0002885629307656459, "loss": 1.8092, "step": 46180 }, { "epoch": 1.54, "grad_norm": 0.5222951173782349, "learning_rate": 0.0002885524862544458, "loss": 1.6621, "step": 46181 }, { "epoch": 1.54, "grad_norm": 0.5183441638946533, "learning_rate": 0.00028854204175714145, "loss": 1.7447, "step": 46182 }, { "epoch": 1.54, "grad_norm": 0.530191957950592, "learning_rate": 0.00028853159727374524, "loss": 1.7877, "step": 46183 }, { "epoch": 1.54, "grad_norm": 0.5114781856536865, "learning_rate": 0.00028852115280427, "loss": 1.7549, "step": 46184 }, { "epoch": 1.54, "grad_norm": 0.5124315023422241, "learning_rate": 0.0002885107083487284, "loss": 1.7633, "step": 46185 }, { "epoch": 1.54, "grad_norm": 0.5172431468963623, "learning_rate": 0.0002885002639071332, "loss": 1.7064, "step": 46186 }, { "epoch": 1.54, "grad_norm": 0.5221030116081238, "learning_rate": 0.0002884898194794968, "loss": 1.7205, "step": 46187 }, { "epoch": 1.54, "grad_norm": 0.5299089550971985, "learning_rate": 0.00028847937506583214, "loss": 1.7298, "step": 46188 }, { "epoch": 1.54, "grad_norm": 0.5223606824874878, "learning_rate": 0.0002884689306661519, "loss": 1.7112, "step": 46189 }, { "epoch": 1.54, "grad_norm": 0.5261236429214478, "learning_rate": 0.00028845848628046865, "loss": 1.8405, "step": 46190 }, { "epoch": 1.54, "grad_norm": 0.5160866379737854, "learning_rate": 0.000288448041908795, "loss": 1.7876, "step": 46191 }, { "epoch": 1.54, "grad_norm": 0.5267457962036133, "learning_rate": 0.0002884375975511439, "loss": 1.7209, "step": 46192 }, { "epoch": 1.54, "grad_norm": 0.5292361378669739, "learning_rate": 0.00028842715320752773, "loss": 1.733, "step": 46193 }, { "epoch": 1.54, "grad_norm": 0.5672764778137207, "learning_rate": 0.0002884167088779592, "loss": 1.7923, "step": 46194 }, { "epoch": 1.54, "grad_norm": 0.5362516641616821, "learning_rate": 0.0002884062645624512, "loss": 1.6766, "step": 46195 }, { "epoch": 1.54, "grad_norm": 0.5256267786026001, "learning_rate": 0.00028839582026101633, "loss": 1.8008, "step": 46196 }, { "epoch": 1.54, "grad_norm": 0.5303182005882263, "learning_rate": 0.00028838537597366713, "loss": 1.771, "step": 46197 }, { "epoch": 1.54, "grad_norm": 0.5433747172355652, "learning_rate": 0.00028837493170041646, "loss": 1.7927, "step": 46198 }, { "epoch": 1.54, "grad_norm": 0.5258327722549438, "learning_rate": 0.0002883644874412769, "loss": 1.775, "step": 46199 }, { "epoch": 1.54, "grad_norm": 0.5209042429924011, "learning_rate": 0.0002883540431962611, "loss": 1.7642, "step": 46200 }, { "epoch": 1.54, "grad_norm": 0.5069372057914734, "learning_rate": 0.0002883435989653817, "loss": 1.7621, "step": 46201 }, { "epoch": 1.54, "grad_norm": 0.5034363269805908, "learning_rate": 0.0002883331547486516, "loss": 1.7573, "step": 46202 }, { "epoch": 1.54, "grad_norm": 0.5380741357803345, "learning_rate": 0.0002883227105460832, "loss": 1.8096, "step": 46203 }, { "epoch": 1.54, "grad_norm": 0.5355759859085083, "learning_rate": 0.00028831226635768936, "loss": 1.7635, "step": 46204 }, { "epoch": 1.54, "grad_norm": 0.5377627611160278, "learning_rate": 0.0002883018221834827, "loss": 1.7008, "step": 46205 }, { "epoch": 1.54, "grad_norm": 2.2879233360290527, "learning_rate": 0.000288291378023476, "loss": 1.7358, "step": 46206 }, { "epoch": 1.54, "grad_norm": 0.5263802409172058, "learning_rate": 0.0002882809338776817, "loss": 1.748, "step": 46207 }, { "epoch": 1.54, "grad_norm": 0.5437618494033813, "learning_rate": 0.0002882704897461126, "loss": 1.8021, "step": 46208 }, { "epoch": 1.54, "grad_norm": 0.8649740815162659, "learning_rate": 0.0002882600456287816, "loss": 1.8487, "step": 46209 }, { "epoch": 1.54, "grad_norm": 0.5261970162391663, "learning_rate": 0.000288249601525701, "loss": 1.8213, "step": 46210 }, { "epoch": 1.54, "grad_norm": 0.5281663537025452, "learning_rate": 0.0002882391574368837, "loss": 1.759, "step": 46211 }, { "epoch": 1.54, "grad_norm": 0.5728069543838501, "learning_rate": 0.0002882287133623424, "loss": 1.7327, "step": 46212 }, { "epoch": 1.54, "grad_norm": 0.5512362718582153, "learning_rate": 0.0002882182693020896, "loss": 1.8205, "step": 46213 }, { "epoch": 1.54, "grad_norm": 0.5009785890579224, "learning_rate": 0.0002882078252561381, "loss": 1.7739, "step": 46214 }, { "epoch": 1.54, "grad_norm": 0.5270786881446838, "learning_rate": 0.00028819738122450055, "loss": 1.7717, "step": 46215 }, { "epoch": 1.54, "grad_norm": 0.520919680595398, "learning_rate": 0.0002881869372071898, "loss": 1.7811, "step": 46216 }, { "epoch": 1.54, "grad_norm": 0.5478365421295166, "learning_rate": 0.00028817649320421815, "loss": 1.7494, "step": 46217 }, { "epoch": 1.54, "grad_norm": 0.5529407858848572, "learning_rate": 0.0002881660492155986, "loss": 1.7435, "step": 46218 }, { "epoch": 1.54, "grad_norm": 0.5468615293502808, "learning_rate": 0.00028815560524134376, "loss": 1.6882, "step": 46219 }, { "epoch": 1.54, "grad_norm": 0.5068994164466858, "learning_rate": 0.0002881451612814662, "loss": 1.7493, "step": 46220 }, { "epoch": 1.54, "grad_norm": 0.5199678540229797, "learning_rate": 0.0002881347173359787, "loss": 1.8336, "step": 46221 }, { "epoch": 1.54, "grad_norm": 0.5566740036010742, "learning_rate": 0.00028812427340489395, "loss": 1.8013, "step": 46222 }, { "epoch": 1.54, "grad_norm": 0.5719956159591675, "learning_rate": 0.0002881138294882245, "loss": 1.7257, "step": 46223 }, { "epoch": 1.54, "grad_norm": 0.5147968530654907, "learning_rate": 0.00028810338558598307, "loss": 1.7132, "step": 46224 }, { "epoch": 1.54, "grad_norm": 0.5369877219200134, "learning_rate": 0.0002880929416981825, "loss": 1.8412, "step": 46225 }, { "epoch": 1.54, "grad_norm": 0.5296240448951721, "learning_rate": 0.0002880824978248353, "loss": 1.7376, "step": 46226 }, { "epoch": 1.54, "grad_norm": 0.5154147744178772, "learning_rate": 0.00028807205396595416, "loss": 1.7178, "step": 46227 }, { "epoch": 1.54, "grad_norm": 0.5109637975692749, "learning_rate": 0.0002880616101215518, "loss": 1.7033, "step": 46228 }, { "epoch": 1.54, "grad_norm": 0.5538811087608337, "learning_rate": 0.00028805116629164094, "loss": 1.7989, "step": 46229 }, { "epoch": 1.54, "grad_norm": 0.5214338302612305, "learning_rate": 0.00028804072247623415, "loss": 1.7734, "step": 46230 }, { "epoch": 1.54, "grad_norm": 0.5050491094589233, "learning_rate": 0.00028803027867534413, "loss": 1.7875, "step": 46231 }, { "epoch": 1.54, "grad_norm": 0.5251203775405884, "learning_rate": 0.00028801983488898367, "loss": 1.8294, "step": 46232 }, { "epoch": 1.54, "grad_norm": 0.5621759295463562, "learning_rate": 0.00028800939111716533, "loss": 1.7822, "step": 46233 }, { "epoch": 1.54, "grad_norm": 0.5114419460296631, "learning_rate": 0.0002879989473599018, "loss": 1.8346, "step": 46234 }, { "epoch": 1.54, "grad_norm": 0.7564078569412231, "learning_rate": 0.0002879885036172058, "loss": 1.7413, "step": 46235 }, { "epoch": 1.54, "grad_norm": 1.3764922618865967, "learning_rate": 0.00028797805988909, "loss": 1.8075, "step": 46236 }, { "epoch": 1.54, "grad_norm": 0.5218270421028137, "learning_rate": 0.00028796761617556703, "loss": 1.724, "step": 46237 }, { "epoch": 1.54, "grad_norm": 0.5187690258026123, "learning_rate": 0.0002879571724766496, "loss": 1.7695, "step": 46238 }, { "epoch": 1.54, "grad_norm": 0.531050443649292, "learning_rate": 0.00028794672879235044, "loss": 1.8287, "step": 46239 }, { "epoch": 1.54, "grad_norm": 0.5049516558647156, "learning_rate": 0.0002879362851226821, "loss": 1.7416, "step": 46240 }, { "epoch": 1.54, "grad_norm": 0.5504412055015564, "learning_rate": 0.0002879258414676574, "loss": 1.7519, "step": 46241 }, { "epoch": 1.54, "grad_norm": 0.5399960875511169, "learning_rate": 0.0002879153978272889, "loss": 1.7704, "step": 46242 }, { "epoch": 1.54, "grad_norm": 0.49540287256240845, "learning_rate": 0.0002879049542015894, "loss": 1.6965, "step": 46243 }, { "epoch": 1.54, "grad_norm": 0.5189359784126282, "learning_rate": 0.00028789451059057147, "loss": 1.7995, "step": 46244 }, { "epoch": 1.54, "grad_norm": 0.5413158535957336, "learning_rate": 0.00028788406699424774, "loss": 1.7566, "step": 46245 }, { "epoch": 1.54, "grad_norm": 0.5172630548477173, "learning_rate": 0.00028787362341263105, "loss": 1.7107, "step": 46246 }, { "epoch": 1.54, "grad_norm": 0.5090526938438416, "learning_rate": 0.00028786317984573397, "loss": 1.7307, "step": 46247 }, { "epoch": 1.54, "grad_norm": 0.5196688771247864, "learning_rate": 0.0002878527362935692, "loss": 1.8109, "step": 46248 }, { "epoch": 1.54, "grad_norm": 0.5199418067932129, "learning_rate": 0.00028784229275614947, "loss": 1.7216, "step": 46249 }, { "epoch": 1.54, "grad_norm": 0.5135303139686584, "learning_rate": 0.00028783184923348735, "loss": 1.7372, "step": 46250 }, { "epoch": 1.54, "grad_norm": 0.5128087997436523, "learning_rate": 0.0002878214057255955, "loss": 1.7452, "step": 46251 }, { "epoch": 1.54, "grad_norm": 0.5188406109809875, "learning_rate": 0.0002878109622324867, "loss": 1.7594, "step": 46252 }, { "epoch": 1.54, "grad_norm": 0.5168049931526184, "learning_rate": 0.0002878005187541738, "loss": 1.7935, "step": 46253 }, { "epoch": 1.54, "grad_norm": 0.5165135264396667, "learning_rate": 0.0002877900752906691, "loss": 1.6736, "step": 46254 }, { "epoch": 1.54, "grad_norm": 0.516485333442688, "learning_rate": 0.00028777963184198547, "loss": 1.7889, "step": 46255 }, { "epoch": 1.54, "grad_norm": 0.5306912064552307, "learning_rate": 0.00028776918840813556, "loss": 1.75, "step": 46256 }, { "epoch": 1.54, "grad_norm": 2.2213990688323975, "learning_rate": 0.00028775874498913206, "loss": 1.8034, "step": 46257 }, { "epoch": 1.54, "grad_norm": 0.5124786496162415, "learning_rate": 0.00028774830158498756, "loss": 1.7846, "step": 46258 }, { "epoch": 1.54, "grad_norm": 0.510231614112854, "learning_rate": 0.00028773785819571505, "loss": 1.7842, "step": 46259 }, { "epoch": 1.54, "grad_norm": 0.5307221412658691, "learning_rate": 0.00028772741482132675, "loss": 1.7783, "step": 46260 }, { "epoch": 1.54, "grad_norm": 0.5013235807418823, "learning_rate": 0.0002877169714618356, "loss": 1.7504, "step": 46261 }, { "epoch": 1.54, "grad_norm": 0.7837315797805786, "learning_rate": 0.0002877065281172543, "loss": 1.7462, "step": 46262 }, { "epoch": 1.54, "grad_norm": 0.5204328894615173, "learning_rate": 0.00028769608478759545, "loss": 1.7537, "step": 46263 }, { "epoch": 1.54, "grad_norm": 0.5096883177757263, "learning_rate": 0.0002876856414728717, "loss": 1.7698, "step": 46264 }, { "epoch": 1.54, "grad_norm": 0.5073627829551697, "learning_rate": 0.0002876751981730957, "loss": 1.7696, "step": 46265 }, { "epoch": 1.54, "grad_norm": 0.511303186416626, "learning_rate": 0.0002876647548882804, "loss": 1.7174, "step": 46266 }, { "epoch": 1.54, "grad_norm": 0.5032422542572021, "learning_rate": 0.00028765431161843807, "loss": 1.7968, "step": 46267 }, { "epoch": 1.54, "grad_norm": 0.5148755311965942, "learning_rate": 0.00028764386836358164, "loss": 1.7324, "step": 46268 }, { "epoch": 1.54, "grad_norm": 0.5217425227165222, "learning_rate": 0.00028763342512372385, "loss": 1.8003, "step": 46269 }, { "epoch": 1.54, "grad_norm": 0.5343912839889526, "learning_rate": 0.00028762298189887717, "loss": 1.7819, "step": 46270 }, { "epoch": 1.54, "grad_norm": 0.5322833061218262, "learning_rate": 0.0002876125386890543, "loss": 1.6839, "step": 46271 }, { "epoch": 1.54, "grad_norm": 0.508928656578064, "learning_rate": 0.000287602095494268, "loss": 1.704, "step": 46272 }, { "epoch": 1.54, "grad_norm": 0.5277321934700012, "learning_rate": 0.00028759165231453113, "loss": 1.7926, "step": 46273 }, { "epoch": 1.54, "grad_norm": 0.5318009257316589, "learning_rate": 0.00028758120914985593, "loss": 1.7073, "step": 46274 }, { "epoch": 1.54, "grad_norm": 0.5082702040672302, "learning_rate": 0.0002875707660002554, "loss": 1.7725, "step": 46275 }, { "epoch": 1.54, "grad_norm": 0.4996356964111328, "learning_rate": 0.0002875603228657422, "loss": 1.7233, "step": 46276 }, { "epoch": 1.54, "grad_norm": 0.5312414169311523, "learning_rate": 0.00028754987974632885, "loss": 1.7377, "step": 46277 }, { "epoch": 1.54, "grad_norm": 0.5340635180473328, "learning_rate": 0.0002875394366420281, "loss": 1.8443, "step": 46278 }, { "epoch": 1.54, "grad_norm": 0.5327776074409485, "learning_rate": 0.0002875289935528527, "loss": 1.7624, "step": 46279 }, { "epoch": 1.54, "grad_norm": 0.504356861114502, "learning_rate": 0.0002875185504788152, "loss": 1.7369, "step": 46280 }, { "epoch": 1.54, "grad_norm": 0.5417243838310242, "learning_rate": 0.00028750810741992827, "loss": 1.7792, "step": 46281 }, { "epoch": 1.54, "grad_norm": 0.5301458835601807, "learning_rate": 0.00028749766437620477, "loss": 1.823, "step": 46282 }, { "epoch": 1.54, "grad_norm": 0.5264621376991272, "learning_rate": 0.0002874872213476573, "loss": 1.756, "step": 46283 }, { "epoch": 1.54, "grad_norm": 0.5407719016075134, "learning_rate": 0.00028747677833429843, "loss": 1.6952, "step": 46284 }, { "epoch": 1.54, "grad_norm": 0.5594256520271301, "learning_rate": 0.0002874663353361409, "loss": 1.7731, "step": 46285 }, { "epoch": 1.54, "grad_norm": 0.5459758639335632, "learning_rate": 0.0002874558923531975, "loss": 1.7754, "step": 46286 }, { "epoch": 1.54, "grad_norm": 0.5225343704223633, "learning_rate": 0.0002874454493854807, "loss": 1.711, "step": 46287 }, { "epoch": 1.54, "grad_norm": 0.5214718580245972, "learning_rate": 0.00028743500643300317, "loss": 1.7607, "step": 46288 }, { "epoch": 1.54, "grad_norm": 0.5355820059776306, "learning_rate": 0.00028742456349577797, "loss": 1.7331, "step": 46289 }, { "epoch": 1.54, "grad_norm": 0.5270386934280396, "learning_rate": 0.00028741412057381726, "loss": 1.8167, "step": 46290 }, { "epoch": 1.54, "grad_norm": 0.5319429636001587, "learning_rate": 0.000287403677667134, "loss": 1.7623, "step": 46291 }, { "epoch": 1.54, "grad_norm": 0.5273894667625427, "learning_rate": 0.00028739323477574083, "loss": 1.6744, "step": 46292 }, { "epoch": 1.54, "grad_norm": 0.525138795375824, "learning_rate": 0.0002873827918996505, "loss": 1.72, "step": 46293 }, { "epoch": 1.54, "grad_norm": 0.5248062610626221, "learning_rate": 0.00028737234903887553, "loss": 1.7508, "step": 46294 }, { "epoch": 1.54, "grad_norm": 0.5244185924530029, "learning_rate": 0.0002873619061934286, "loss": 1.796, "step": 46295 }, { "epoch": 1.54, "grad_norm": 0.5256636738777161, "learning_rate": 0.0002873514633633227, "loss": 1.8099, "step": 46296 }, { "epoch": 1.54, "grad_norm": 0.525378942489624, "learning_rate": 0.00028734102054856994, "loss": 1.7521, "step": 46297 }, { "epoch": 1.54, "grad_norm": 0.5166686773300171, "learning_rate": 0.0002873305777491835, "loss": 1.7884, "step": 46298 }, { "epoch": 1.54, "grad_norm": 0.5448896884918213, "learning_rate": 0.00028732013496517584, "loss": 1.8106, "step": 46299 }, { "epoch": 1.54, "grad_norm": 0.5341396927833557, "learning_rate": 0.0002873096921965597, "loss": 1.7032, "step": 46300 }, { "epoch": 1.54, "grad_norm": 0.5323981642723083, "learning_rate": 0.0002872992494433476, "loss": 1.7711, "step": 46301 }, { "epoch": 1.54, "grad_norm": 0.5393789410591125, "learning_rate": 0.00028728880670555237, "loss": 1.7522, "step": 46302 }, { "epoch": 1.54, "grad_norm": 0.516682505607605, "learning_rate": 0.0002872783639831869, "loss": 1.8485, "step": 46303 }, { "epoch": 1.54, "grad_norm": 0.5150520205497742, "learning_rate": 0.0002872679212762633, "loss": 1.8651, "step": 46304 }, { "epoch": 1.54, "grad_norm": 0.5225161910057068, "learning_rate": 0.0002872574785847947, "loss": 1.7256, "step": 46305 }, { "epoch": 1.54, "grad_norm": 0.552585244178772, "learning_rate": 0.0002872470359087937, "loss": 1.6645, "step": 46306 }, { "epoch": 1.54, "grad_norm": 0.531506359577179, "learning_rate": 0.00028723659324827284, "loss": 1.6778, "step": 46307 }, { "epoch": 1.54, "grad_norm": 0.5096578001976013, "learning_rate": 0.00028722615060324485, "loss": 1.7561, "step": 46308 }, { "epoch": 1.54, "grad_norm": 0.5351422429084778, "learning_rate": 0.00028721570797372257, "loss": 1.8247, "step": 46309 }, { "epoch": 1.54, "grad_norm": 0.5378262996673584, "learning_rate": 0.0002872052653597184, "loss": 1.7884, "step": 46310 }, { "epoch": 1.54, "grad_norm": 0.5369074940681458, "learning_rate": 0.00028719482276124515, "loss": 1.7718, "step": 46311 }, { "epoch": 1.54, "grad_norm": 0.5363847017288208, "learning_rate": 0.00028718438017831554, "loss": 1.7312, "step": 46312 }, { "epoch": 1.54, "grad_norm": 0.5380472540855408, "learning_rate": 0.0002871739376109423, "loss": 1.8046, "step": 46313 }, { "epoch": 1.54, "grad_norm": 0.5565807819366455, "learning_rate": 0.00028716349505913784, "loss": 1.845, "step": 46314 }, { "epoch": 1.54, "grad_norm": 0.5326650738716125, "learning_rate": 0.0002871530525229151, "loss": 1.7804, "step": 46315 }, { "epoch": 1.54, "grad_norm": 0.5344944000244141, "learning_rate": 0.00028714261000228674, "loss": 1.6907, "step": 46316 }, { "epoch": 1.54, "grad_norm": 0.5581009387969971, "learning_rate": 0.00028713216749726525, "loss": 1.7501, "step": 46317 }, { "epoch": 1.54, "grad_norm": 0.5186424851417542, "learning_rate": 0.0002871217250078633, "loss": 1.7811, "step": 46318 }, { "epoch": 1.54, "grad_norm": 0.5310656428337097, "learning_rate": 0.00028711128253409394, "loss": 1.7774, "step": 46319 }, { "epoch": 1.54, "grad_norm": 0.5266193747520447, "learning_rate": 0.0002871008400759694, "loss": 1.7807, "step": 46320 }, { "epoch": 1.54, "grad_norm": 0.5179809927940369, "learning_rate": 0.00028709039763350256, "loss": 1.761, "step": 46321 }, { "epoch": 1.54, "grad_norm": 0.5333163738250732, "learning_rate": 0.0002870799552067061, "loss": 1.7269, "step": 46322 }, { "epoch": 1.54, "grad_norm": 0.5212300419807434, "learning_rate": 0.0002870695127955927, "loss": 1.821, "step": 46323 }, { "epoch": 1.54, "grad_norm": 0.512931227684021, "learning_rate": 0.000287059070400175, "loss": 1.6772, "step": 46324 }, { "epoch": 1.54, "grad_norm": 0.5109387040138245, "learning_rate": 0.0002870486280204656, "loss": 1.7198, "step": 46325 }, { "epoch": 1.54, "grad_norm": 0.5289713144302368, "learning_rate": 0.0002870381856564774, "loss": 1.7713, "step": 46326 }, { "epoch": 1.54, "grad_norm": 0.5242717862129211, "learning_rate": 0.0002870277433082228, "loss": 1.7698, "step": 46327 }, { "epoch": 1.54, "grad_norm": 0.5363816022872925, "learning_rate": 0.00028701730097571465, "loss": 1.715, "step": 46328 }, { "epoch": 1.54, "grad_norm": 0.5087366104125977, "learning_rate": 0.0002870068586589656, "loss": 1.7957, "step": 46329 }, { "epoch": 1.54, "grad_norm": 0.5500132441520691, "learning_rate": 0.0002869964163579883, "loss": 1.7257, "step": 46330 }, { "epoch": 1.54, "grad_norm": 0.5449476838111877, "learning_rate": 0.0002869859740727955, "loss": 1.8323, "step": 46331 }, { "epoch": 1.54, "grad_norm": 0.5238035321235657, "learning_rate": 0.0002869755318033996, "loss": 1.7677, "step": 46332 }, { "epoch": 1.54, "grad_norm": 0.5177509784698486, "learning_rate": 0.00028696508954981383, "loss": 1.7334, "step": 46333 }, { "epoch": 1.54, "grad_norm": 0.5366294980049133, "learning_rate": 0.0002869546473120502, "loss": 1.8466, "step": 46334 }, { "epoch": 1.54, "grad_norm": 0.5258090496063232, "learning_rate": 0.00028694420509012183, "loss": 1.844, "step": 46335 }, { "epoch": 1.54, "grad_norm": 0.541917622089386, "learning_rate": 0.0002869337628840413, "loss": 1.7737, "step": 46336 }, { "epoch": 1.54, "grad_norm": 0.5321217775344849, "learning_rate": 0.0002869233206938213, "loss": 1.7438, "step": 46337 }, { "epoch": 1.54, "grad_norm": 0.5504303574562073, "learning_rate": 0.00028691287851947435, "loss": 1.7378, "step": 46338 }, { "epoch": 1.54, "grad_norm": 0.5210221409797668, "learning_rate": 0.0002869024363610133, "loss": 1.8221, "step": 46339 }, { "epoch": 1.54, "grad_norm": 0.5161638259887695, "learning_rate": 0.0002868919942184508, "loss": 1.7306, "step": 46340 }, { "epoch": 1.54, "grad_norm": 0.5267294049263, "learning_rate": 0.0002868815520917993, "loss": 1.8033, "step": 46341 }, { "epoch": 1.54, "grad_norm": 0.5239719152450562, "learning_rate": 0.0002868711099810718, "loss": 1.7323, "step": 46342 }, { "epoch": 1.54, "grad_norm": 0.5010883808135986, "learning_rate": 0.0002868606678862809, "loss": 1.6696, "step": 46343 }, { "epoch": 1.54, "grad_norm": 0.5229729413986206, "learning_rate": 0.0002868502258074391, "loss": 1.7166, "step": 46344 }, { "epoch": 1.54, "grad_norm": 0.5282803773880005, "learning_rate": 0.0002868397837445592, "loss": 1.7132, "step": 46345 }, { "epoch": 1.54, "grad_norm": 0.5325244069099426, "learning_rate": 0.00028682934169765395, "loss": 1.7589, "step": 46346 }, { "epoch": 1.54, "grad_norm": 0.5086855888366699, "learning_rate": 0.0002868188996667359, "loss": 1.7594, "step": 46347 }, { "epoch": 1.54, "grad_norm": 0.5266187191009521, "learning_rate": 0.00028680845765181764, "loss": 1.748, "step": 46348 }, { "epoch": 1.54, "grad_norm": 0.5330724120140076, "learning_rate": 0.0002867980156529121, "loss": 1.8048, "step": 46349 }, { "epoch": 1.54, "grad_norm": 0.5212149024009705, "learning_rate": 0.0002867875736700318, "loss": 1.8512, "step": 46350 }, { "epoch": 1.54, "grad_norm": 0.5272907614707947, "learning_rate": 0.0002867771317031895, "loss": 1.7486, "step": 46351 }, { "epoch": 1.54, "grad_norm": 0.542952835559845, "learning_rate": 0.0002867666897523977, "loss": 1.7629, "step": 46352 }, { "epoch": 1.54, "grad_norm": 0.545054018497467, "learning_rate": 0.0002867562478176693, "loss": 1.834, "step": 46353 }, { "epoch": 1.54, "grad_norm": 0.5031518340110779, "learning_rate": 0.00028674580589901677, "loss": 1.7576, "step": 46354 }, { "epoch": 1.54, "grad_norm": 0.5276250243186951, "learning_rate": 0.00028673536399645286, "loss": 1.7466, "step": 46355 }, { "epoch": 1.54, "grad_norm": 0.5312647819519043, "learning_rate": 0.0002867249221099904, "loss": 1.7461, "step": 46356 }, { "epoch": 1.54, "grad_norm": 0.5296387672424316, "learning_rate": 0.00028671448023964176, "loss": 1.7819, "step": 46357 }, { "epoch": 1.54, "grad_norm": 0.5285931825637817, "learning_rate": 0.0002867040383854199, "loss": 1.788, "step": 46358 }, { "epoch": 1.54, "grad_norm": 0.5312716960906982, "learning_rate": 0.0002866935965473374, "loss": 1.8023, "step": 46359 }, { "epoch": 1.54, "grad_norm": 0.5299947261810303, "learning_rate": 0.00028668315472540684, "loss": 1.783, "step": 46360 }, { "epoch": 1.54, "grad_norm": 0.5473331212997437, "learning_rate": 0.000286672712919641, "loss": 1.7631, "step": 46361 }, { "epoch": 1.54, "grad_norm": 0.5398343205451965, "learning_rate": 0.0002866622711300524, "loss": 1.7801, "step": 46362 }, { "epoch": 1.54, "grad_norm": 0.5312053561210632, "learning_rate": 0.000286651829356654, "loss": 1.8012, "step": 46363 }, { "epoch": 1.54, "grad_norm": 0.5181812644004822, "learning_rate": 0.00028664138759945825, "loss": 1.7369, "step": 46364 }, { "epoch": 1.54, "grad_norm": 0.5244043469429016, "learning_rate": 0.00028663094585847787, "loss": 1.735, "step": 46365 }, { "epoch": 1.54, "grad_norm": 0.5642679333686829, "learning_rate": 0.00028662050413372567, "loss": 1.7221, "step": 46366 }, { "epoch": 1.54, "grad_norm": 0.5314221978187561, "learning_rate": 0.0002866100624252141, "loss": 1.7571, "step": 46367 }, { "epoch": 1.54, "grad_norm": 0.5119216442108154, "learning_rate": 0.0002865996207329559, "loss": 1.8009, "step": 46368 }, { "epoch": 1.54, "grad_norm": 0.7093955278396606, "learning_rate": 0.0002865891790569638, "loss": 1.8738, "step": 46369 }, { "epoch": 1.54, "grad_norm": 0.5391128063201904, "learning_rate": 0.0002865787373972506, "loss": 1.697, "step": 46370 }, { "epoch": 1.54, "grad_norm": 0.5218332409858704, "learning_rate": 0.0002865682957538287, "loss": 1.7996, "step": 46371 }, { "epoch": 1.54, "grad_norm": 0.5550948977470398, "learning_rate": 0.00028655785412671093, "loss": 1.7522, "step": 46372 }, { "epoch": 1.54, "grad_norm": 0.5212260484695435, "learning_rate": 0.00028654741251591005, "loss": 1.7958, "step": 46373 }, { "epoch": 1.54, "grad_norm": 0.5247035026550293, "learning_rate": 0.0002865369709214385, "loss": 1.699, "step": 46374 }, { "epoch": 1.54, "grad_norm": 0.542222261428833, "learning_rate": 0.00028652652934330904, "loss": 1.7413, "step": 46375 }, { "epoch": 1.54, "grad_norm": 0.5149667263031006, "learning_rate": 0.0002865160877815346, "loss": 1.7627, "step": 46376 }, { "epoch": 1.54, "grad_norm": 0.514324426651001, "learning_rate": 0.00028650564623612743, "loss": 1.7464, "step": 46377 }, { "epoch": 1.54, "grad_norm": 0.523655354976654, "learning_rate": 0.0002864952047071005, "loss": 1.7622, "step": 46378 }, { "epoch": 1.54, "grad_norm": 0.5068849325180054, "learning_rate": 0.00028648476319446644, "loss": 1.7425, "step": 46379 }, { "epoch": 1.54, "grad_norm": 0.53391033411026, "learning_rate": 0.00028647432169823787, "loss": 1.7747, "step": 46380 }, { "epoch": 1.54, "grad_norm": 0.5446502566337585, "learning_rate": 0.0002864638802184275, "loss": 1.7335, "step": 46381 }, { "epoch": 1.54, "grad_norm": 0.520189106464386, "learning_rate": 0.00028645343875504785, "loss": 1.6982, "step": 46382 }, { "epoch": 1.54, "grad_norm": 0.525053083896637, "learning_rate": 0.00028644299730811194, "loss": 1.8572, "step": 46383 }, { "epoch": 1.54, "grad_norm": 0.5273181200027466, "learning_rate": 0.00028643255587763205, "loss": 1.7208, "step": 46384 }, { "epoch": 1.54, "grad_norm": 0.5216975808143616, "learning_rate": 0.0002864221144636211, "loss": 1.7738, "step": 46385 }, { "epoch": 1.54, "grad_norm": 0.5340259671211243, "learning_rate": 0.0002864116730660917, "loss": 1.6867, "step": 46386 }, { "epoch": 1.54, "grad_norm": 0.548750102519989, "learning_rate": 0.0002864012316850566, "loss": 1.7528, "step": 46387 }, { "epoch": 1.54, "grad_norm": 0.5167630910873413, "learning_rate": 0.0002863907903205283, "loss": 1.7628, "step": 46388 }, { "epoch": 1.54, "grad_norm": 0.5234085917472839, "learning_rate": 0.00028638034897251953, "loss": 1.676, "step": 46389 }, { "epoch": 1.54, "grad_norm": 0.5354451537132263, "learning_rate": 0.00028636990764104324, "loss": 1.7386, "step": 46390 }, { "epoch": 1.54, "grad_norm": 0.5302199125289917, "learning_rate": 0.00028635946632611165, "loss": 1.7643, "step": 46391 }, { "epoch": 1.54, "grad_norm": 0.5260769128799438, "learning_rate": 0.00028634902502773773, "loss": 1.8023, "step": 46392 }, { "epoch": 1.54, "grad_norm": 0.5161718130111694, "learning_rate": 0.0002863385837459341, "loss": 1.6789, "step": 46393 }, { "epoch": 1.54, "grad_norm": 0.5389440655708313, "learning_rate": 0.00028632814248071345, "loss": 1.7005, "step": 46394 }, { "epoch": 1.54, "grad_norm": 0.5235936045646667, "learning_rate": 0.0002863177012320883, "loss": 1.8008, "step": 46395 }, { "epoch": 1.54, "grad_norm": 0.5322741866111755, "learning_rate": 0.0002863072600000716, "loss": 1.8452, "step": 46396 }, { "epoch": 1.54, "grad_norm": 0.5299342274665833, "learning_rate": 0.00028629681878467576, "loss": 1.7118, "step": 46397 }, { "epoch": 1.54, "grad_norm": 0.5388906598091125, "learning_rate": 0.0002862863775859135, "loss": 1.8125, "step": 46398 }, { "epoch": 1.54, "grad_norm": 0.5261439085006714, "learning_rate": 0.0002862759364037977, "loss": 1.7696, "step": 46399 }, { "epoch": 1.54, "grad_norm": 0.5337773561477661, "learning_rate": 0.00028626549523834085, "loss": 1.7628, "step": 46400 }, { "epoch": 1.54, "grad_norm": 0.5306071043014526, "learning_rate": 0.00028625505408955566, "loss": 1.7321, "step": 46401 }, { "epoch": 1.54, "grad_norm": 0.5384228229522705, "learning_rate": 0.0002862446129574548, "loss": 1.8601, "step": 46402 }, { "epoch": 1.54, "grad_norm": 0.5010390281677246, "learning_rate": 0.000286234171842051, "loss": 1.779, "step": 46403 }, { "epoch": 1.54, "grad_norm": 0.5159780979156494, "learning_rate": 0.00028622373074335683, "loss": 1.6889, "step": 46404 }, { "epoch": 1.54, "grad_norm": 0.5303424000740051, "learning_rate": 0.000286213289661385, "loss": 1.838, "step": 46405 }, { "epoch": 1.54, "grad_norm": 0.4956502616405487, "learning_rate": 0.0002862028485961483, "loss": 1.7025, "step": 46406 }, { "epoch": 1.54, "grad_norm": 0.5196093916893005, "learning_rate": 0.0002861924075476592, "loss": 1.7345, "step": 46407 }, { "epoch": 1.54, "grad_norm": 0.5246129631996155, "learning_rate": 0.00028618196651593054, "loss": 1.7455, "step": 46408 }, { "epoch": 1.54, "grad_norm": 0.5205720067024231, "learning_rate": 0.0002861715255009749, "loss": 1.7861, "step": 46409 }, { "epoch": 1.54, "grad_norm": 0.6695921421051025, "learning_rate": 0.0002861610845028051, "loss": 1.7083, "step": 46410 }, { "epoch": 1.54, "grad_norm": 0.5198894143104553, "learning_rate": 0.00028615064352143356, "loss": 1.6991, "step": 46411 }, { "epoch": 1.54, "grad_norm": 0.5001160502433777, "learning_rate": 0.0002861402025568731, "loss": 1.665, "step": 46412 }, { "epoch": 1.54, "grad_norm": 0.5205610394477844, "learning_rate": 0.0002861297616091366, "loss": 1.6528, "step": 46413 }, { "epoch": 1.54, "grad_norm": 0.5240065455436707, "learning_rate": 0.0002861193206782363, "loss": 1.7639, "step": 46414 }, { "epoch": 1.54, "grad_norm": 0.5285075902938843, "learning_rate": 0.00028610887976418515, "loss": 1.7427, "step": 46415 }, { "epoch": 1.54, "grad_norm": 0.5222054123878479, "learning_rate": 0.0002860984388669959, "loss": 1.7257, "step": 46416 }, { "epoch": 1.54, "grad_norm": 0.5377401113510132, "learning_rate": 0.00028608799798668095, "loss": 1.7441, "step": 46417 }, { "epoch": 1.54, "grad_norm": 0.5116214752197266, "learning_rate": 0.00028607755712325316, "loss": 1.744, "step": 46418 }, { "epoch": 1.54, "grad_norm": 0.5034684538841248, "learning_rate": 0.0002860671162767251, "loss": 1.7098, "step": 46419 }, { "epoch": 1.54, "grad_norm": 0.5018720626831055, "learning_rate": 0.0002860566754471097, "loss": 1.7014, "step": 46420 }, { "epoch": 1.54, "grad_norm": 0.5287423729896545, "learning_rate": 0.00028604623463441924, "loss": 1.8145, "step": 46421 }, { "epoch": 1.54, "grad_norm": 0.566681981086731, "learning_rate": 0.0002860357938386667, "loss": 1.7833, "step": 46422 }, { "epoch": 1.54, "grad_norm": 0.5283209681510925, "learning_rate": 0.0002860253530598647, "loss": 1.755, "step": 46423 }, { "epoch": 1.54, "grad_norm": 0.5129607915878296, "learning_rate": 0.0002860149122980257, "loss": 1.7461, "step": 46424 }, { "epoch": 1.54, "grad_norm": 0.5406381487846375, "learning_rate": 0.00028600447155316264, "loss": 1.7732, "step": 46425 }, { "epoch": 1.54, "grad_norm": 0.5227345824241638, "learning_rate": 0.00028599403082528815, "loss": 1.7775, "step": 46426 }, { "epoch": 1.54, "grad_norm": 0.5217832326889038, "learning_rate": 0.00028598359011441476, "loss": 1.7527, "step": 46427 }, { "epoch": 1.54, "grad_norm": 0.5277174711227417, "learning_rate": 0.00028597314942055516, "loss": 1.8096, "step": 46428 }, { "epoch": 1.54, "grad_norm": 0.5205379128456116, "learning_rate": 0.00028596270874372214, "loss": 1.6886, "step": 46429 }, { "epoch": 1.54, "grad_norm": 0.5466570258140564, "learning_rate": 0.0002859522680839284, "loss": 1.7667, "step": 46430 }, { "epoch": 1.54, "grad_norm": 0.5165481567382812, "learning_rate": 0.0002859418274411865, "loss": 1.7624, "step": 46431 }, { "epoch": 1.54, "grad_norm": 0.5125688314437866, "learning_rate": 0.0002859313868155091, "loss": 1.6984, "step": 46432 }, { "epoch": 1.54, "grad_norm": 0.5029633641242981, "learning_rate": 0.000285920946206909, "loss": 1.7039, "step": 46433 }, { "epoch": 1.54, "grad_norm": 0.5300019383430481, "learning_rate": 0.00028591050561539873, "loss": 1.7415, "step": 46434 }, { "epoch": 1.54, "grad_norm": 0.5200485587120056, "learning_rate": 0.000285900065040991, "loss": 1.7801, "step": 46435 }, { "epoch": 1.54, "grad_norm": 0.5230355858802795, "learning_rate": 0.0002858896244836987, "loss": 1.7762, "step": 46436 }, { "epoch": 1.54, "grad_norm": 0.5299215316772461, "learning_rate": 0.0002858791839435341, "loss": 1.7481, "step": 46437 }, { "epoch": 1.54, "grad_norm": 0.5030495524406433, "learning_rate": 0.00028586874342051015, "loss": 1.735, "step": 46438 }, { "epoch": 1.55, "grad_norm": 0.5282208919525146, "learning_rate": 0.0002858583029146395, "loss": 1.7646, "step": 46439 }, { "epoch": 1.55, "grad_norm": 0.5027870535850525, "learning_rate": 0.0002858478624259348, "loss": 1.7239, "step": 46440 }, { "epoch": 1.55, "grad_norm": 0.5254913568496704, "learning_rate": 0.0002858374219544087, "loss": 1.7164, "step": 46441 }, { "epoch": 1.55, "grad_norm": 0.5142847299575806, "learning_rate": 0.0002858269815000738, "loss": 1.6964, "step": 46442 }, { "epoch": 1.55, "grad_norm": 0.5361666679382324, "learning_rate": 0.00028581654106294303, "loss": 1.7856, "step": 46443 }, { "epoch": 1.55, "grad_norm": 0.5161933302879333, "learning_rate": 0.0002858061006430287, "loss": 1.8068, "step": 46444 }, { "epoch": 1.55, "grad_norm": 0.5262462496757507, "learning_rate": 0.0002857956602403437, "loss": 1.7141, "step": 46445 }, { "epoch": 1.55, "grad_norm": 0.5404098629951477, "learning_rate": 0.0002857852198549008, "loss": 1.7084, "step": 46446 }, { "epoch": 1.55, "grad_norm": 0.5191655158996582, "learning_rate": 0.0002857747794867125, "loss": 1.7035, "step": 46447 }, { "epoch": 1.55, "grad_norm": 0.5246730446815491, "learning_rate": 0.0002857643391357915, "loss": 1.8282, "step": 46448 }, { "epoch": 1.55, "grad_norm": 0.5307207107543945, "learning_rate": 0.0002857538988021504, "loss": 1.7084, "step": 46449 }, { "epoch": 1.55, "grad_norm": 0.5240837335586548, "learning_rate": 0.0002857434584858022, "loss": 1.7702, "step": 46450 }, { "epoch": 1.55, "grad_norm": 0.5298915505409241, "learning_rate": 0.00028573301818675916, "loss": 1.8279, "step": 46451 }, { "epoch": 1.55, "grad_norm": 0.513706624507904, "learning_rate": 0.00028572257790503413, "loss": 1.7489, "step": 46452 }, { "epoch": 1.55, "grad_norm": 0.5180237293243408, "learning_rate": 0.00028571213764063986, "loss": 1.7213, "step": 46453 }, { "epoch": 1.55, "grad_norm": 0.5442338585853577, "learning_rate": 0.0002857016973935889, "loss": 1.8226, "step": 46454 }, { "epoch": 1.55, "grad_norm": 0.5088329315185547, "learning_rate": 0.00028569125716389405, "loss": 1.8068, "step": 46455 }, { "epoch": 1.55, "grad_norm": 0.5266215801239014, "learning_rate": 0.0002856808169515679, "loss": 1.7456, "step": 46456 }, { "epoch": 1.55, "grad_norm": 0.5276856422424316, "learning_rate": 0.00028567037675662305, "loss": 1.7625, "step": 46457 }, { "epoch": 1.55, "grad_norm": 0.527251660823822, "learning_rate": 0.00028565993657907223, "loss": 1.7073, "step": 46458 }, { "epoch": 1.55, "grad_norm": 0.5086457133293152, "learning_rate": 0.00028564949641892817, "loss": 1.7582, "step": 46459 }, { "epoch": 1.55, "grad_norm": 0.517861545085907, "learning_rate": 0.0002856390562762036, "loss": 1.7805, "step": 46460 }, { "epoch": 1.55, "grad_norm": 0.5270679593086243, "learning_rate": 0.00028562861615091096, "loss": 1.7134, "step": 46461 }, { "epoch": 1.55, "grad_norm": 0.5257717967033386, "learning_rate": 0.0002856181760430631, "loss": 1.7503, "step": 46462 }, { "epoch": 1.55, "grad_norm": 0.5395022630691528, "learning_rate": 0.0002856077359526728, "loss": 1.8241, "step": 46463 }, { "epoch": 1.55, "grad_norm": 0.5228899121284485, "learning_rate": 0.00028559729587975246, "loss": 1.763, "step": 46464 }, { "epoch": 1.55, "grad_norm": 0.5197663307189941, "learning_rate": 0.0002855868558243148, "loss": 1.7311, "step": 46465 }, { "epoch": 1.55, "grad_norm": 0.5074604749679565, "learning_rate": 0.00028557641578637273, "loss": 1.7648, "step": 46466 }, { "epoch": 1.55, "grad_norm": 0.52626633644104, "learning_rate": 0.00028556597576593864, "loss": 1.7595, "step": 46467 }, { "epoch": 1.55, "grad_norm": 0.535460889339447, "learning_rate": 0.0002855555357630254, "loss": 1.7235, "step": 46468 }, { "epoch": 1.55, "grad_norm": 0.5356984734535217, "learning_rate": 0.0002855450957776456, "loss": 1.8465, "step": 46469 }, { "epoch": 1.55, "grad_norm": 0.514502227306366, "learning_rate": 0.00028553465580981194, "loss": 1.7411, "step": 46470 }, { "epoch": 1.55, "grad_norm": 0.5270550847053528, "learning_rate": 0.00028552421585953706, "loss": 1.8051, "step": 46471 }, { "epoch": 1.55, "grad_norm": 0.515672504901886, "learning_rate": 0.00028551377592683357, "loss": 1.7373, "step": 46472 }, { "epoch": 1.55, "grad_norm": 0.5163585543632507, "learning_rate": 0.0002855033360117144, "loss": 1.6814, "step": 46473 }, { "epoch": 1.55, "grad_norm": 0.5224538445472717, "learning_rate": 0.0002854928961141919, "loss": 1.7694, "step": 46474 }, { "epoch": 1.55, "grad_norm": 0.539240837097168, "learning_rate": 0.0002854824562342789, "loss": 1.7706, "step": 46475 }, { "epoch": 1.55, "grad_norm": 0.5177353620529175, "learning_rate": 0.0002854720163719882, "loss": 1.8574, "step": 46476 }, { "epoch": 1.55, "grad_norm": 0.506267249584198, "learning_rate": 0.00028546157652733215, "loss": 1.7149, "step": 46477 }, { "epoch": 1.55, "grad_norm": 0.5550028085708618, "learning_rate": 0.00028545113670032363, "loss": 1.7489, "step": 46478 }, { "epoch": 1.55, "grad_norm": 0.544899582862854, "learning_rate": 0.0002854406968909753, "loss": 1.7619, "step": 46479 }, { "epoch": 1.55, "grad_norm": 0.5120036005973816, "learning_rate": 0.00028543025709929995, "loss": 1.755, "step": 46480 }, { "epoch": 1.55, "grad_norm": 0.5687265396118164, "learning_rate": 0.00028541981732531, "loss": 1.8232, "step": 46481 }, { "epoch": 1.55, "grad_norm": 0.5096772313117981, "learning_rate": 0.00028540937756901824, "loss": 1.7374, "step": 46482 }, { "epoch": 1.55, "grad_norm": 0.5045877695083618, "learning_rate": 0.0002853989378304374, "loss": 1.7761, "step": 46483 }, { "epoch": 1.55, "grad_norm": 0.5171622037887573, "learning_rate": 0.0002853884981095801, "loss": 1.681, "step": 46484 }, { "epoch": 1.55, "grad_norm": 0.5336374044418335, "learning_rate": 0.0002853780584064589, "loss": 1.8044, "step": 46485 }, { "epoch": 1.55, "grad_norm": 0.5357115268707275, "learning_rate": 0.00028536761872108665, "loss": 1.7891, "step": 46486 }, { "epoch": 1.55, "grad_norm": 0.5184436440467834, "learning_rate": 0.000285357179053476, "loss": 1.7555, "step": 46487 }, { "epoch": 1.55, "grad_norm": 0.525521457195282, "learning_rate": 0.00028534673940363957, "loss": 1.7868, "step": 46488 }, { "epoch": 1.55, "grad_norm": 0.6519111394882202, "learning_rate": 0.00028533629977159, "loss": 1.735, "step": 46489 }, { "epoch": 1.55, "grad_norm": 0.5395479202270508, "learning_rate": 0.00028532586015734006, "loss": 1.7055, "step": 46490 }, { "epoch": 1.55, "grad_norm": 0.545285701751709, "learning_rate": 0.0002853154205609023, "loss": 1.7759, "step": 46491 }, { "epoch": 1.55, "grad_norm": 0.5215877890586853, "learning_rate": 0.0002853049809822894, "loss": 1.8072, "step": 46492 }, { "epoch": 1.55, "grad_norm": 0.5038752555847168, "learning_rate": 0.0002852945414215143, "loss": 1.7646, "step": 46493 }, { "epoch": 1.55, "grad_norm": 0.5472500324249268, "learning_rate": 0.00028528410187858924, "loss": 1.719, "step": 46494 }, { "epoch": 1.55, "grad_norm": 0.5293319821357727, "learning_rate": 0.0002852736623535271, "loss": 1.7877, "step": 46495 }, { "epoch": 1.55, "grad_norm": 0.5255172252655029, "learning_rate": 0.0002852632228463407, "loss": 1.6603, "step": 46496 }, { "epoch": 1.55, "grad_norm": 0.5174632668495178, "learning_rate": 0.0002852527833570426, "loss": 1.7477, "step": 46497 }, { "epoch": 1.55, "grad_norm": 0.5193822383880615, "learning_rate": 0.0002852423438856454, "loss": 1.7415, "step": 46498 }, { "epoch": 1.55, "grad_norm": 0.5207655429840088, "learning_rate": 0.0002852319044321617, "loss": 1.7676, "step": 46499 }, { "epoch": 1.55, "grad_norm": 0.5153892040252686, "learning_rate": 0.0002852214649966045, "loss": 1.6785, "step": 46500 }, { "epoch": 1.55, "grad_norm": 0.5189733505249023, "learning_rate": 0.0002852110255789861, "loss": 1.7338, "step": 46501 }, { "epoch": 1.55, "grad_norm": 0.5244702696800232, "learning_rate": 0.0002852005861793194, "loss": 1.7805, "step": 46502 }, { "epoch": 1.55, "grad_norm": 0.5176029801368713, "learning_rate": 0.00028519014679761703, "loss": 1.7415, "step": 46503 }, { "epoch": 1.55, "grad_norm": 0.5213586688041687, "learning_rate": 0.0002851797074338916, "loss": 1.7385, "step": 46504 }, { "epoch": 1.55, "grad_norm": 0.514812171459198, "learning_rate": 0.00028516926808815583, "loss": 1.7641, "step": 46505 }, { "epoch": 1.55, "grad_norm": 0.5222633481025696, "learning_rate": 0.0002851588287604223, "loss": 1.742, "step": 46506 }, { "epoch": 1.55, "grad_norm": 0.5179610848426819, "learning_rate": 0.00028514838945070393, "loss": 1.7618, "step": 46507 }, { "epoch": 1.55, "grad_norm": 0.5130186080932617, "learning_rate": 0.0002851379501590131, "loss": 1.7825, "step": 46508 }, { "epoch": 1.55, "grad_norm": 0.5261832475662231, "learning_rate": 0.00028512751088536264, "loss": 1.8224, "step": 46509 }, { "epoch": 1.55, "grad_norm": 0.5280230641365051, "learning_rate": 0.00028511707162976526, "loss": 1.7074, "step": 46510 }, { "epoch": 1.55, "grad_norm": 0.5356696248054504, "learning_rate": 0.0002851066323922335, "loss": 1.7677, "step": 46511 }, { "epoch": 1.55, "grad_norm": 0.5238766074180603, "learning_rate": 0.0002850961931727801, "loss": 1.7116, "step": 46512 }, { "epoch": 1.55, "grad_norm": 0.5368825793266296, "learning_rate": 0.00028508575397141777, "loss": 1.7987, "step": 46513 }, { "epoch": 1.55, "grad_norm": 0.5304155349731445, "learning_rate": 0.0002850753147881591, "loss": 1.7722, "step": 46514 }, { "epoch": 1.55, "grad_norm": 0.5492523908615112, "learning_rate": 0.00028506487562301677, "loss": 1.7777, "step": 46515 }, { "epoch": 1.55, "grad_norm": 0.525700032711029, "learning_rate": 0.0002850544364760035, "loss": 1.75, "step": 46516 }, { "epoch": 1.55, "grad_norm": 0.5248683094978333, "learning_rate": 0.000285043997347132, "loss": 1.8145, "step": 46517 }, { "epoch": 1.55, "grad_norm": 0.5098150968551636, "learning_rate": 0.0002850335582364148, "loss": 1.7841, "step": 46518 }, { "epoch": 1.55, "grad_norm": 0.5237951874732971, "learning_rate": 0.00028502311914386467, "loss": 1.7706, "step": 46519 }, { "epoch": 1.55, "grad_norm": 0.49668240547180176, "learning_rate": 0.00028501268006949433, "loss": 1.7605, "step": 46520 }, { "epoch": 1.55, "grad_norm": 0.5229070782661438, "learning_rate": 0.0002850022410133163, "loss": 1.7693, "step": 46521 }, { "epoch": 1.55, "grad_norm": 0.5364157557487488, "learning_rate": 0.00028499180197534335, "loss": 1.8187, "step": 46522 }, { "epoch": 1.55, "grad_norm": 0.5512221455574036, "learning_rate": 0.00028498136295558823, "loss": 1.7202, "step": 46523 }, { "epoch": 1.55, "grad_norm": 0.5282056927680969, "learning_rate": 0.00028497092395406344, "loss": 1.7354, "step": 46524 }, { "epoch": 1.55, "grad_norm": 0.5185844898223877, "learning_rate": 0.00028496048497078175, "loss": 1.7723, "step": 46525 }, { "epoch": 1.55, "grad_norm": 0.5154186487197876, "learning_rate": 0.0002849500460057558, "loss": 1.8057, "step": 46526 }, { "epoch": 1.55, "grad_norm": 0.5074078440666199, "learning_rate": 0.00028493960705899834, "loss": 1.8241, "step": 46527 }, { "epoch": 1.55, "grad_norm": 0.5187281370162964, "learning_rate": 0.00028492916813052194, "loss": 1.763, "step": 46528 }, { "epoch": 1.55, "grad_norm": 0.5697011947631836, "learning_rate": 0.0002849187292203392, "loss": 1.8292, "step": 46529 }, { "epoch": 1.55, "grad_norm": 0.5316526293754578, "learning_rate": 0.00028490829032846315, "loss": 1.7958, "step": 46530 }, { "epoch": 1.55, "grad_norm": 0.525259792804718, "learning_rate": 0.000284897851454906, "loss": 1.6865, "step": 46531 }, { "epoch": 1.55, "grad_norm": 0.5039317607879639, "learning_rate": 0.00028488741259968066, "loss": 1.6945, "step": 46532 }, { "epoch": 1.55, "grad_norm": 0.5289673805236816, "learning_rate": 0.00028487697376279993, "loss": 1.7438, "step": 46533 }, { "epoch": 1.55, "grad_norm": 0.518352210521698, "learning_rate": 0.00028486653494427615, "loss": 1.6858, "step": 46534 }, { "epoch": 1.55, "grad_norm": 0.5181093215942383, "learning_rate": 0.00028485609614412223, "loss": 1.808, "step": 46535 }, { "epoch": 1.55, "grad_norm": 0.5060635209083557, "learning_rate": 0.0002848456573623507, "loss": 1.7336, "step": 46536 }, { "epoch": 1.55, "grad_norm": 0.5217666625976562, "learning_rate": 0.0002848352185989745, "loss": 1.7862, "step": 46537 }, { "epoch": 1.55, "grad_norm": 0.5216405987739563, "learning_rate": 0.00028482477985400595, "loss": 1.6517, "step": 46538 }, { "epoch": 1.55, "grad_norm": 0.5278512239456177, "learning_rate": 0.00028481434112745787, "loss": 1.7189, "step": 46539 }, { "epoch": 1.55, "grad_norm": 0.5196702480316162, "learning_rate": 0.0002848039024193431, "loss": 1.747, "step": 46540 }, { "epoch": 1.55, "grad_norm": 0.5193217396736145, "learning_rate": 0.00028479346372967405, "loss": 1.7368, "step": 46541 }, { "epoch": 1.55, "grad_norm": 0.526648998260498, "learning_rate": 0.0002847830250584635, "loss": 1.8545, "step": 46542 }, { "epoch": 1.55, "grad_norm": 0.5380569100379944, "learning_rate": 0.00028477258640572414, "loss": 1.7923, "step": 46543 }, { "epoch": 1.55, "grad_norm": 0.5162854790687561, "learning_rate": 0.0002847621477714686, "loss": 1.7833, "step": 46544 }, { "epoch": 1.55, "grad_norm": 0.5206199884414673, "learning_rate": 0.00028475170915570944, "loss": 1.8301, "step": 46545 }, { "epoch": 1.55, "grad_norm": 0.5374411344528198, "learning_rate": 0.00028474127055845956, "loss": 1.7418, "step": 46546 }, { "epoch": 1.55, "grad_norm": 0.5174556374549866, "learning_rate": 0.00028473083197973166, "loss": 1.7136, "step": 46547 }, { "epoch": 1.55, "grad_norm": 0.5182807445526123, "learning_rate": 0.00028472039341953813, "loss": 1.7646, "step": 46548 }, { "epoch": 1.55, "grad_norm": 0.5479572415351868, "learning_rate": 0.00028470995487789183, "loss": 1.7589, "step": 46549 }, { "epoch": 1.55, "grad_norm": 0.5220789313316345, "learning_rate": 0.00028469951635480545, "loss": 1.8149, "step": 46550 }, { "epoch": 1.55, "grad_norm": 0.5453187823295593, "learning_rate": 0.0002846890778502915, "loss": 1.6359, "step": 46551 }, { "epoch": 1.55, "grad_norm": 0.5400570034980774, "learning_rate": 0.0002846786393643627, "loss": 1.8415, "step": 46552 }, { "epoch": 1.55, "grad_norm": 0.7496301531791687, "learning_rate": 0.000284668200897032, "loss": 1.7927, "step": 46553 }, { "epoch": 1.55, "grad_norm": 0.5088041424751282, "learning_rate": 0.00028465776244831164, "loss": 1.6693, "step": 46554 }, { "epoch": 1.55, "grad_norm": 0.5355244278907776, "learning_rate": 0.00028464732401821456, "loss": 1.7673, "step": 46555 }, { "epoch": 1.55, "grad_norm": 0.5456355214118958, "learning_rate": 0.00028463688560675335, "loss": 1.766, "step": 46556 }, { "epoch": 1.55, "grad_norm": 0.5196906328201294, "learning_rate": 0.0002846264472139408, "loss": 1.6934, "step": 46557 }, { "epoch": 1.55, "grad_norm": 0.5379424095153809, "learning_rate": 0.0002846160088397894, "loss": 1.8515, "step": 46558 }, { "epoch": 1.55, "grad_norm": 0.5156736373901367, "learning_rate": 0.0002846055704843118, "loss": 1.8219, "step": 46559 }, { "epoch": 1.55, "grad_norm": 0.5401639342308044, "learning_rate": 0.000284595132147521, "loss": 1.7709, "step": 46560 }, { "epoch": 1.55, "grad_norm": 0.5152758359909058, "learning_rate": 0.00028458469382942924, "loss": 1.7309, "step": 46561 }, { "epoch": 1.55, "grad_norm": 0.5212709307670593, "learning_rate": 0.00028457425553004943, "loss": 1.7528, "step": 46562 }, { "epoch": 1.55, "grad_norm": 0.5107585787773132, "learning_rate": 0.0002845638172493943, "loss": 1.7509, "step": 46563 }, { "epoch": 1.55, "grad_norm": 0.5252143740653992, "learning_rate": 0.00028455337898747633, "loss": 1.7444, "step": 46564 }, { "epoch": 1.55, "grad_norm": 0.5246357917785645, "learning_rate": 0.0002845429407443083, "loss": 1.8384, "step": 46565 }, { "epoch": 1.55, "grad_norm": 0.5268224477767944, "learning_rate": 0.0002845325025199028, "loss": 1.7424, "step": 46566 }, { "epoch": 1.55, "grad_norm": 0.5057967901229858, "learning_rate": 0.0002845220643142727, "loss": 1.7185, "step": 46567 }, { "epoch": 1.55, "grad_norm": 0.5127920508384705, "learning_rate": 0.00028451162612743043, "loss": 1.6769, "step": 46568 }, { "epoch": 1.55, "grad_norm": 0.5233505368232727, "learning_rate": 0.00028450118795938877, "loss": 1.7459, "step": 46569 }, { "epoch": 1.55, "grad_norm": 0.5350674390792847, "learning_rate": 0.0002844907498101605, "loss": 1.8309, "step": 46570 }, { "epoch": 1.55, "grad_norm": 0.5274826884269714, "learning_rate": 0.00028448031167975805, "loss": 1.7393, "step": 46571 }, { "epoch": 1.55, "grad_norm": 0.524158775806427, "learning_rate": 0.0002844698735681942, "loss": 1.7429, "step": 46572 }, { "epoch": 1.55, "grad_norm": 0.5329762697219849, "learning_rate": 0.0002844594354754818, "loss": 1.7526, "step": 46573 }, { "epoch": 1.55, "grad_norm": 0.529318630695343, "learning_rate": 0.0002844489974016332, "loss": 1.799, "step": 46574 }, { "epoch": 1.55, "grad_norm": 0.5236777663230896, "learning_rate": 0.0002844385593466612, "loss": 1.7591, "step": 46575 }, { "epoch": 1.55, "grad_norm": 0.5449393391609192, "learning_rate": 0.00028442812131057853, "loss": 1.7773, "step": 46576 }, { "epoch": 1.55, "grad_norm": 0.5180738568305969, "learning_rate": 0.00028441768329339796, "loss": 1.7231, "step": 46577 }, { "epoch": 1.55, "grad_norm": 0.5289523601531982, "learning_rate": 0.0002844072452951319, "loss": 1.7764, "step": 46578 }, { "epoch": 1.55, "grad_norm": 0.5393314361572266, "learning_rate": 0.0002843968073157932, "loss": 1.8036, "step": 46579 }, { "epoch": 1.55, "grad_norm": 0.523813009262085, "learning_rate": 0.00028438636935539444, "loss": 1.7358, "step": 46580 }, { "epoch": 1.55, "grad_norm": 0.5130940079689026, "learning_rate": 0.0002843759314139483, "loss": 1.7975, "step": 46581 }, { "epoch": 1.55, "grad_norm": 0.5353794097900391, "learning_rate": 0.00028436549349146744, "loss": 1.783, "step": 46582 }, { "epoch": 1.55, "grad_norm": 0.5100676417350769, "learning_rate": 0.00028435505558796476, "loss": 1.7643, "step": 46583 }, { "epoch": 1.55, "grad_norm": 0.5076348781585693, "learning_rate": 0.0002843446177034526, "loss": 1.7101, "step": 46584 }, { "epoch": 1.55, "grad_norm": 0.5421704053878784, "learning_rate": 0.0002843341798379437, "loss": 1.7318, "step": 46585 }, { "epoch": 1.55, "grad_norm": 0.5419508814811707, "learning_rate": 0.00028432374199145093, "loss": 1.7568, "step": 46586 }, { "epoch": 1.55, "grad_norm": 0.5195716619491577, "learning_rate": 0.0002843133041639868, "loss": 1.7483, "step": 46587 }, { "epoch": 1.55, "grad_norm": 0.5469002723693848, "learning_rate": 0.000284302866355564, "loss": 1.769, "step": 46588 }, { "epoch": 1.55, "grad_norm": 0.5198719501495361, "learning_rate": 0.0002842924285661951, "loss": 1.7933, "step": 46589 }, { "epoch": 1.55, "grad_norm": 0.550486147403717, "learning_rate": 0.00028428199079589303, "loss": 1.7463, "step": 46590 }, { "epoch": 1.55, "grad_norm": 0.515011727809906, "learning_rate": 0.00028427155304467023, "loss": 1.7125, "step": 46591 }, { "epoch": 1.55, "grad_norm": 0.49510473012924194, "learning_rate": 0.0002842611153125395, "loss": 1.7422, "step": 46592 }, { "epoch": 1.55, "grad_norm": 0.5301793813705444, "learning_rate": 0.00028425067759951344, "loss": 1.7509, "step": 46593 }, { "epoch": 1.55, "grad_norm": 0.5259666442871094, "learning_rate": 0.0002842402399056047, "loss": 1.7285, "step": 46594 }, { "epoch": 1.55, "grad_norm": 0.5403916239738464, "learning_rate": 0.0002842298022308259, "loss": 1.7601, "step": 46595 }, { "epoch": 1.55, "grad_norm": 0.5366063117980957, "learning_rate": 0.00028421936457518997, "loss": 1.7813, "step": 46596 }, { "epoch": 1.55, "grad_norm": 0.5308462381362915, "learning_rate": 0.00028420892693870937, "loss": 1.7882, "step": 46597 }, { "epoch": 1.55, "grad_norm": 0.560837984085083, "learning_rate": 0.00028419848932139676, "loss": 1.741, "step": 46598 }, { "epoch": 1.55, "grad_norm": 0.5256195068359375, "learning_rate": 0.0002841880517232649, "loss": 1.7787, "step": 46599 }, { "epoch": 1.55, "grad_norm": 0.5349674224853516, "learning_rate": 0.0002841776141443264, "loss": 1.7765, "step": 46600 }, { "epoch": 1.55, "grad_norm": 0.5347357988357544, "learning_rate": 0.0002841671765845939, "loss": 1.7486, "step": 46601 }, { "epoch": 1.55, "grad_norm": 0.5190188884735107, "learning_rate": 0.0002841567390440801, "loss": 1.7967, "step": 46602 }, { "epoch": 1.55, "grad_norm": 0.5233432650566101, "learning_rate": 0.0002841463015227977, "loss": 1.7639, "step": 46603 }, { "epoch": 1.55, "grad_norm": 0.5345220565795898, "learning_rate": 0.0002841358640207595, "loss": 1.7713, "step": 46604 }, { "epoch": 1.55, "grad_norm": 0.536712110042572, "learning_rate": 0.0002841254265379779, "loss": 1.7671, "step": 46605 }, { "epoch": 1.55, "grad_norm": 0.5204736590385437, "learning_rate": 0.00028411498907446564, "loss": 1.7696, "step": 46606 }, { "epoch": 1.55, "grad_norm": 0.517531156539917, "learning_rate": 0.00028410455163023555, "loss": 1.7642, "step": 46607 }, { "epoch": 1.55, "grad_norm": 0.5128700733184814, "learning_rate": 0.00028409411420530015, "loss": 1.7307, "step": 46608 }, { "epoch": 1.55, "grad_norm": 0.5282867550849915, "learning_rate": 0.0002840836767996721, "loss": 1.7219, "step": 46609 }, { "epoch": 1.55, "grad_norm": 0.5068038105964661, "learning_rate": 0.00028407323941336427, "loss": 1.7181, "step": 46610 }, { "epoch": 1.55, "grad_norm": 0.5254154801368713, "learning_rate": 0.000284062802046389, "loss": 1.7691, "step": 46611 }, { "epoch": 1.55, "grad_norm": 0.5284119248390198, "learning_rate": 0.0002840523646987592, "loss": 1.7578, "step": 46612 }, { "epoch": 1.55, "grad_norm": 0.5292826890945435, "learning_rate": 0.0002840419273704875, "loss": 1.7495, "step": 46613 }, { "epoch": 1.55, "grad_norm": 0.5354962944984436, "learning_rate": 0.0002840314900615866, "loss": 1.7366, "step": 46614 }, { "epoch": 1.55, "grad_norm": 0.5185943245887756, "learning_rate": 0.0002840210527720691, "loss": 1.7558, "step": 46615 }, { "epoch": 1.55, "grad_norm": 0.5340204238891602, "learning_rate": 0.00028401061550194755, "loss": 1.7879, "step": 46616 }, { "epoch": 1.55, "grad_norm": 0.5179192423820496, "learning_rate": 0.00028400017825123497, "loss": 1.7819, "step": 46617 }, { "epoch": 1.55, "grad_norm": 0.5199077129364014, "learning_rate": 0.00028398974101994366, "loss": 1.842, "step": 46618 }, { "epoch": 1.55, "grad_norm": 0.5290175080299377, "learning_rate": 0.00028397930380808645, "loss": 1.7753, "step": 46619 }, { "epoch": 1.55, "grad_norm": 0.5315356850624084, "learning_rate": 0.00028396886661567614, "loss": 1.6784, "step": 46620 }, { "epoch": 1.55, "grad_norm": 0.5348025560379028, "learning_rate": 0.0002839584294427251, "loss": 1.7376, "step": 46621 }, { "epoch": 1.55, "grad_norm": 0.5375139713287354, "learning_rate": 0.00028394799228924627, "loss": 1.6865, "step": 46622 }, { "epoch": 1.55, "grad_norm": 0.5184134840965271, "learning_rate": 0.0002839375551552521, "loss": 1.7134, "step": 46623 }, { "epoch": 1.55, "grad_norm": 0.5220563411712646, "learning_rate": 0.00028392711804075556, "loss": 1.7006, "step": 46624 }, { "epoch": 1.55, "grad_norm": 0.5168408751487732, "learning_rate": 0.00028391668094576895, "loss": 1.7512, "step": 46625 }, { "epoch": 1.55, "grad_norm": 0.5065062046051025, "learning_rate": 0.00028390624387030514, "loss": 1.7894, "step": 46626 }, { "epoch": 1.55, "grad_norm": 0.5321483016014099, "learning_rate": 0.0002838958068143769, "loss": 1.7184, "step": 46627 }, { "epoch": 1.55, "grad_norm": 0.5158683657646179, "learning_rate": 0.0002838853697779967, "loss": 1.7078, "step": 46628 }, { "epoch": 1.55, "grad_norm": 0.5146824717521667, "learning_rate": 0.00028387493276117726, "loss": 1.7825, "step": 46629 }, { "epoch": 1.55, "grad_norm": 0.5122449994087219, "learning_rate": 0.00028386449576393136, "loss": 1.7416, "step": 46630 }, { "epoch": 1.55, "grad_norm": 0.5259222388267517, "learning_rate": 0.00028385405878627153, "loss": 1.6902, "step": 46631 }, { "epoch": 1.55, "grad_norm": 0.512695848941803, "learning_rate": 0.00028384362182821044, "loss": 1.7459, "step": 46632 }, { "epoch": 1.55, "grad_norm": 0.562809407711029, "learning_rate": 0.00028383318488976083, "loss": 1.7423, "step": 46633 }, { "epoch": 1.55, "grad_norm": 0.5220966935157776, "learning_rate": 0.00028382274797093544, "loss": 1.7803, "step": 46634 }, { "epoch": 1.55, "grad_norm": 0.5136512517929077, "learning_rate": 0.0002838123110717468, "loss": 1.7226, "step": 46635 }, { "epoch": 1.55, "grad_norm": 0.518526554107666, "learning_rate": 0.00028380187419220764, "loss": 1.7766, "step": 46636 }, { "epoch": 1.55, "grad_norm": 0.5132564306259155, "learning_rate": 0.00028379143733233065, "loss": 1.7249, "step": 46637 }, { "epoch": 1.55, "grad_norm": 0.5197465419769287, "learning_rate": 0.0002837810004921284, "loss": 1.7266, "step": 46638 }, { "epoch": 1.55, "grad_norm": 0.549921989440918, "learning_rate": 0.0002837705636716136, "loss": 1.6694, "step": 46639 }, { "epoch": 1.55, "grad_norm": 0.5368452668190002, "learning_rate": 0.0002837601268707991, "loss": 1.7968, "step": 46640 }, { "epoch": 1.55, "grad_norm": 0.5080565810203552, "learning_rate": 0.0002837496900896972, "loss": 1.7119, "step": 46641 }, { "epoch": 1.55, "grad_norm": 0.5230748653411865, "learning_rate": 0.0002837392533283209, "loss": 1.6975, "step": 46642 }, { "epoch": 1.55, "grad_norm": 0.530143678188324, "learning_rate": 0.00028372881658668273, "loss": 1.7572, "step": 46643 }, { "epoch": 1.55, "grad_norm": 0.5459274053573608, "learning_rate": 0.00028371837986479546, "loss": 1.769, "step": 46644 }, { "epoch": 1.55, "grad_norm": 0.5235917568206787, "learning_rate": 0.00028370794316267157, "loss": 1.8057, "step": 46645 }, { "epoch": 1.55, "grad_norm": 0.5542412996292114, "learning_rate": 0.0002836975064803238, "loss": 1.7681, "step": 46646 }, { "epoch": 1.55, "grad_norm": 0.5261927247047424, "learning_rate": 0.00028368706981776504, "loss": 1.7461, "step": 46647 }, { "epoch": 1.55, "grad_norm": 0.5279045701026917, "learning_rate": 0.0002836766331750076, "loss": 1.7442, "step": 46648 }, { "epoch": 1.55, "grad_norm": 0.5089848637580872, "learning_rate": 0.00028366619655206436, "loss": 1.6364, "step": 46649 }, { "epoch": 1.55, "grad_norm": 0.512829065322876, "learning_rate": 0.00028365575994894807, "loss": 1.75, "step": 46650 }, { "epoch": 1.55, "grad_norm": 0.5352202653884888, "learning_rate": 0.00028364532336567117, "loss": 1.8117, "step": 46651 }, { "epoch": 1.55, "grad_norm": 0.5059711337089539, "learning_rate": 0.0002836348868022464, "loss": 1.6951, "step": 46652 }, { "epoch": 1.55, "grad_norm": 0.5420085787773132, "learning_rate": 0.00028362445025868646, "loss": 1.8748, "step": 46653 }, { "epoch": 1.55, "grad_norm": 0.5287185907363892, "learning_rate": 0.0002836140137350042, "loss": 1.7917, "step": 46654 }, { "epoch": 1.55, "grad_norm": 0.5271176099777222, "learning_rate": 0.0002836035772312119, "loss": 1.7332, "step": 46655 }, { "epoch": 1.55, "grad_norm": 0.5303547382354736, "learning_rate": 0.00028359314074732255, "loss": 1.7854, "step": 46656 }, { "epoch": 1.55, "grad_norm": 0.5272451639175415, "learning_rate": 0.00028358270428334876, "loss": 1.6744, "step": 46657 }, { "epoch": 1.55, "grad_norm": 0.5237498879432678, "learning_rate": 0.0002835722678393031, "loss": 1.6561, "step": 46658 }, { "epoch": 1.55, "grad_norm": 0.5176107287406921, "learning_rate": 0.00028356183141519824, "loss": 1.6756, "step": 46659 }, { "epoch": 1.55, "grad_norm": 0.5071929097175598, "learning_rate": 0.000283551395011047, "loss": 1.6776, "step": 46660 }, { "epoch": 1.55, "grad_norm": 0.5301352143287659, "learning_rate": 0.0002835409586268618, "loss": 1.7876, "step": 46661 }, { "epoch": 1.55, "grad_norm": 0.5491205453872681, "learning_rate": 0.0002835305222626554, "loss": 1.7324, "step": 46662 }, { "epoch": 1.55, "grad_norm": 0.5243717432022095, "learning_rate": 0.0002835200859184407, "loss": 1.8034, "step": 46663 }, { "epoch": 1.55, "grad_norm": 0.5194695591926575, "learning_rate": 0.00028350964959423017, "loss": 1.8026, "step": 46664 }, { "epoch": 1.55, "grad_norm": 0.5299078226089478, "learning_rate": 0.0002834992132900364, "loss": 1.7459, "step": 46665 }, { "epoch": 1.55, "grad_norm": 0.5423107147216797, "learning_rate": 0.0002834887770058722, "loss": 1.814, "step": 46666 }, { "epoch": 1.55, "grad_norm": 0.5298559069633484, "learning_rate": 0.0002834783407417503, "loss": 1.7719, "step": 46667 }, { "epoch": 1.55, "grad_norm": 0.5506863594055176, "learning_rate": 0.0002834679044976831, "loss": 1.7344, "step": 46668 }, { "epoch": 1.55, "grad_norm": 0.5185644030570984, "learning_rate": 0.0002834574682736834, "loss": 1.688, "step": 46669 }, { "epoch": 1.55, "grad_norm": 0.5386057496070862, "learning_rate": 0.00028344703206976415, "loss": 1.8366, "step": 46670 }, { "epoch": 1.55, "grad_norm": 0.5314486026763916, "learning_rate": 0.0002834365958859375, "loss": 1.7545, "step": 46671 }, { "epoch": 1.55, "grad_norm": 0.5252835154533386, "learning_rate": 0.00028342615972221644, "loss": 1.7561, "step": 46672 }, { "epoch": 1.55, "grad_norm": 0.5340327024459839, "learning_rate": 0.0002834157235786136, "loss": 1.7952, "step": 46673 }, { "epoch": 1.55, "grad_norm": 0.5511220693588257, "learning_rate": 0.00028340528745514175, "loss": 1.7186, "step": 46674 }, { "epoch": 1.55, "grad_norm": 0.516130805015564, "learning_rate": 0.00028339485135181334, "loss": 1.7857, "step": 46675 }, { "epoch": 1.55, "grad_norm": 0.5097054243087769, "learning_rate": 0.000283384415268641, "loss": 1.7782, "step": 46676 }, { "epoch": 1.55, "grad_norm": 0.5267069339752197, "learning_rate": 0.0002833739792056378, "loss": 1.9055, "step": 46677 }, { "epoch": 1.55, "grad_norm": 0.5455381274223328, "learning_rate": 0.0002833635431628159, "loss": 1.7667, "step": 46678 }, { "epoch": 1.55, "grad_norm": 0.5505618453025818, "learning_rate": 0.0002833531071401883, "loss": 1.7852, "step": 46679 }, { "epoch": 1.55, "grad_norm": 0.5199102163314819, "learning_rate": 0.00028334267113776766, "loss": 1.7145, "step": 46680 }, { "epoch": 1.55, "grad_norm": 0.5373367667198181, "learning_rate": 0.0002833322351555664, "loss": 1.6441, "step": 46681 }, { "epoch": 1.55, "grad_norm": 0.5173965096473694, "learning_rate": 0.0002833217991935975, "loss": 1.6893, "step": 46682 }, { "epoch": 1.55, "grad_norm": 0.9951610565185547, "learning_rate": 0.0002833113632518733, "loss": 1.7487, "step": 46683 }, { "epoch": 1.55, "grad_norm": 0.5213727951049805, "learning_rate": 0.00028330092733040686, "loss": 1.7433, "step": 46684 }, { "epoch": 1.55, "grad_norm": 0.547139048576355, "learning_rate": 0.0002832904914292105, "loss": 1.7488, "step": 46685 }, { "epoch": 1.55, "grad_norm": 0.5303051471710205, "learning_rate": 0.00028328005554829704, "loss": 1.8073, "step": 46686 }, { "epoch": 1.55, "grad_norm": 0.5455249547958374, "learning_rate": 0.0002832696196876792, "loss": 1.7791, "step": 46687 }, { "epoch": 1.55, "grad_norm": 0.5219075679779053, "learning_rate": 0.00028325918384736946, "loss": 1.7925, "step": 46688 }, { "epoch": 1.55, "grad_norm": 0.5169829726219177, "learning_rate": 0.0002832487480273807, "loss": 1.7671, "step": 46689 }, { "epoch": 1.55, "grad_norm": 0.5410836935043335, "learning_rate": 0.0002832383122277255, "loss": 1.7058, "step": 46690 }, { "epoch": 1.55, "grad_norm": 0.5337898135185242, "learning_rate": 0.00028322787644841644, "loss": 1.7472, "step": 46691 }, { "epoch": 1.55, "grad_norm": 0.5085533857345581, "learning_rate": 0.0002832174406894662, "loss": 1.6953, "step": 46692 }, { "epoch": 1.55, "grad_norm": 0.5437140464782715, "learning_rate": 0.00028320700495088763, "loss": 1.7023, "step": 46693 }, { "epoch": 1.55, "grad_norm": 0.5270503163337708, "learning_rate": 0.0002831965692326933, "loss": 1.7731, "step": 46694 }, { "epoch": 1.55, "grad_norm": 0.5183296799659729, "learning_rate": 0.00028318613353489586, "loss": 1.6791, "step": 46695 }, { "epoch": 1.55, "grad_norm": 0.5271241664886475, "learning_rate": 0.00028317569785750793, "loss": 1.7478, "step": 46696 }, { "epoch": 1.55, "grad_norm": 0.5143594145774841, "learning_rate": 0.0002831652622005423, "loss": 1.7272, "step": 46697 }, { "epoch": 1.55, "grad_norm": 0.5162805914878845, "learning_rate": 0.00028315482656401144, "loss": 1.7455, "step": 46698 }, { "epoch": 1.55, "grad_norm": 0.5189842581748962, "learning_rate": 0.00028314439094792815, "loss": 1.6853, "step": 46699 }, { "epoch": 1.55, "grad_norm": 0.5342216491699219, "learning_rate": 0.00028313395535230516, "loss": 1.7551, "step": 46700 }, { "epoch": 1.55, "grad_norm": 0.5397266149520874, "learning_rate": 0.000283123519777155, "loss": 1.8242, "step": 46701 }, { "epoch": 1.55, "grad_norm": 0.5428574085235596, "learning_rate": 0.00028311308422249046, "loss": 1.7345, "step": 46702 }, { "epoch": 1.55, "grad_norm": 0.5475221276283264, "learning_rate": 0.0002831026486883241, "loss": 1.7953, "step": 46703 }, { "epoch": 1.55, "grad_norm": 0.5205640196800232, "learning_rate": 0.0002830922131746687, "loss": 1.7399, "step": 46704 }, { "epoch": 1.55, "grad_norm": 0.5533104538917542, "learning_rate": 0.0002830817776815368, "loss": 1.7054, "step": 46705 }, { "epoch": 1.55, "grad_norm": 0.5430741310119629, "learning_rate": 0.0002830713422089411, "loss": 1.7312, "step": 46706 }, { "epoch": 1.55, "grad_norm": 0.5250710844993591, "learning_rate": 0.00028306090675689436, "loss": 1.8132, "step": 46707 }, { "epoch": 1.55, "grad_norm": 0.5908464789390564, "learning_rate": 0.00028305047132540917, "loss": 1.7911, "step": 46708 }, { "epoch": 1.55, "grad_norm": 0.5058649182319641, "learning_rate": 0.00028304003591449824, "loss": 1.7468, "step": 46709 }, { "epoch": 1.55, "grad_norm": 0.5200055241584778, "learning_rate": 0.0002830296005241742, "loss": 1.7779, "step": 46710 }, { "epoch": 1.55, "grad_norm": 0.5296899080276489, "learning_rate": 0.0002830191651544497, "loss": 1.8203, "step": 46711 }, { "epoch": 1.55, "grad_norm": 0.5390626192092896, "learning_rate": 0.0002830087298053373, "loss": 1.7681, "step": 46712 }, { "epoch": 1.55, "grad_norm": 0.5178930163383484, "learning_rate": 0.00028299829447684993, "loss": 1.8482, "step": 46713 }, { "epoch": 1.55, "grad_norm": 0.5235018730163574, "learning_rate": 0.0002829878591690002, "loss": 1.7921, "step": 46714 }, { "epoch": 1.55, "grad_norm": 0.5261639356613159, "learning_rate": 0.0002829774238818006, "loss": 1.7834, "step": 46715 }, { "epoch": 1.55, "grad_norm": 0.526931881904602, "learning_rate": 0.0002829669886152639, "loss": 1.781, "step": 46716 }, { "epoch": 1.55, "grad_norm": 0.7440166473388672, "learning_rate": 0.0002829565533694029, "loss": 1.6569, "step": 46717 }, { "epoch": 1.55, "grad_norm": 0.5440355539321899, "learning_rate": 0.00028294611814423, "loss": 1.7779, "step": 46718 }, { "epoch": 1.55, "grad_norm": 0.5118301510810852, "learning_rate": 0.0002829356829397579, "loss": 1.7344, "step": 46719 }, { "epoch": 1.55, "grad_norm": 0.5061763525009155, "learning_rate": 0.0002829252477559997, "loss": 1.7526, "step": 46720 }, { "epoch": 1.55, "grad_norm": 0.5248022079467773, "learning_rate": 0.0002829148125929674, "loss": 1.7307, "step": 46721 }, { "epoch": 1.55, "grad_norm": 0.545333981513977, "learning_rate": 0.0002829043774506741, "loss": 1.819, "step": 46722 }, { "epoch": 1.55, "grad_norm": 0.554053008556366, "learning_rate": 0.0002828939423291324, "loss": 1.8068, "step": 46723 }, { "epoch": 1.55, "grad_norm": 0.5186201930046082, "learning_rate": 0.00028288350722835496, "loss": 1.8153, "step": 46724 }, { "epoch": 1.55, "grad_norm": 0.5309838652610779, "learning_rate": 0.0002828730721483544, "loss": 1.6929, "step": 46725 }, { "epoch": 1.55, "grad_norm": 0.5232118964195251, "learning_rate": 0.0002828626370891433, "loss": 1.7163, "step": 46726 }, { "epoch": 1.55, "grad_norm": 0.5142438411712646, "learning_rate": 0.00028285220205073465, "loss": 1.6706, "step": 46727 }, { "epoch": 1.55, "grad_norm": 0.5211843252182007, "learning_rate": 0.0002828417670331407, "loss": 1.6618, "step": 46728 }, { "epoch": 1.55, "grad_norm": 0.5242499113082886, "learning_rate": 0.0002828313320363744, "loss": 1.6967, "step": 46729 }, { "epoch": 1.55, "grad_norm": 0.5449293851852417, "learning_rate": 0.0002828208970604483, "loss": 1.8677, "step": 46730 }, { "epoch": 1.55, "grad_norm": 0.5216162204742432, "learning_rate": 0.00028281046210537516, "loss": 1.7703, "step": 46731 }, { "epoch": 1.55, "grad_norm": 0.5290922522544861, "learning_rate": 0.00028280002717116754, "loss": 1.7694, "step": 46732 }, { "epoch": 1.55, "grad_norm": 0.5150377154350281, "learning_rate": 0.00028278959225783813, "loss": 1.7193, "step": 46733 }, { "epoch": 1.55, "grad_norm": 0.5194418430328369, "learning_rate": 0.0002827791573653997, "loss": 1.6676, "step": 46734 }, { "epoch": 1.55, "grad_norm": 0.5258933305740356, "learning_rate": 0.0002827687224938648, "loss": 1.7987, "step": 46735 }, { "epoch": 1.55, "grad_norm": 0.5293256044387817, "learning_rate": 0.00028275828764324606, "loss": 1.7671, "step": 46736 }, { "epoch": 1.55, "grad_norm": 0.566964328289032, "learning_rate": 0.00028274785281355634, "loss": 1.8231, "step": 46737 }, { "epoch": 1.55, "grad_norm": 0.5122113823890686, "learning_rate": 0.00028273741800480815, "loss": 1.7584, "step": 46738 }, { "epoch": 1.56, "grad_norm": 0.5441895723342896, "learning_rate": 0.0002827269832170141, "loss": 1.679, "step": 46739 }, { "epoch": 1.56, "grad_norm": 0.5088231563568115, "learning_rate": 0.000282716548450187, "loss": 1.7703, "step": 46740 }, { "epoch": 1.56, "grad_norm": 0.519313633441925, "learning_rate": 0.0002827061137043396, "loss": 1.6916, "step": 46741 }, { "epoch": 1.56, "grad_norm": 0.5224509239196777, "learning_rate": 0.0002826956789794842, "loss": 1.7954, "step": 46742 }, { "epoch": 1.56, "grad_norm": 0.4971698820590973, "learning_rate": 0.0002826852442756338, "loss": 1.7331, "step": 46743 }, { "epoch": 1.56, "grad_norm": 0.5451725721359253, "learning_rate": 0.000282674809592801, "loss": 1.8497, "step": 46744 }, { "epoch": 1.56, "grad_norm": 0.5346706509590149, "learning_rate": 0.0002826643749309984, "loss": 1.7821, "step": 46745 }, { "epoch": 1.56, "grad_norm": 0.5207730531692505, "learning_rate": 0.0002826539402902387, "loss": 1.8056, "step": 46746 }, { "epoch": 1.56, "grad_norm": 0.5235346555709839, "learning_rate": 0.0002826435056705346, "loss": 1.7429, "step": 46747 }, { "epoch": 1.56, "grad_norm": 0.5282808542251587, "learning_rate": 0.00028263307107189865, "loss": 1.7961, "step": 46748 }, { "epoch": 1.56, "grad_norm": 0.5094265937805176, "learning_rate": 0.0002826226364943435, "loss": 1.7099, "step": 46749 }, { "epoch": 1.56, "grad_norm": 0.5230543613433838, "learning_rate": 0.00028261220193788206, "loss": 1.7244, "step": 46750 }, { "epoch": 1.56, "grad_norm": 0.5453388690948486, "learning_rate": 0.00028260176740252683, "loss": 1.779, "step": 46751 }, { "epoch": 1.56, "grad_norm": 0.5532698035240173, "learning_rate": 0.00028259133288829045, "loss": 1.8089, "step": 46752 }, { "epoch": 1.56, "grad_norm": 0.5221119523048401, "learning_rate": 0.00028258089839518566, "loss": 1.7809, "step": 46753 }, { "epoch": 1.56, "grad_norm": 0.5549181699752808, "learning_rate": 0.00028257046392322514, "loss": 1.7497, "step": 46754 }, { "epoch": 1.56, "grad_norm": 0.5231512784957886, "learning_rate": 0.0002825600294724214, "loss": 1.8327, "step": 46755 }, { "epoch": 1.56, "grad_norm": 0.5169869661331177, "learning_rate": 0.00028254959504278723, "loss": 1.7554, "step": 46756 }, { "epoch": 1.56, "grad_norm": 0.5238186120986938, "learning_rate": 0.0002825391606343354, "loss": 1.7649, "step": 46757 }, { "epoch": 1.56, "grad_norm": 0.5031035542488098, "learning_rate": 0.0002825287262470783, "loss": 1.7737, "step": 46758 }, { "epoch": 1.56, "grad_norm": 0.5370868444442749, "learning_rate": 0.00028251829188102884, "loss": 1.7481, "step": 46759 }, { "epoch": 1.56, "grad_norm": 0.5234057903289795, "learning_rate": 0.0002825078575361996, "loss": 1.7597, "step": 46760 }, { "epoch": 1.56, "grad_norm": 0.5266345143318176, "learning_rate": 0.00028249742321260324, "loss": 1.7662, "step": 46761 }, { "epoch": 1.56, "grad_norm": 0.5261684656143188, "learning_rate": 0.00028248698891025244, "loss": 1.8269, "step": 46762 }, { "epoch": 1.56, "grad_norm": 0.5387105941772461, "learning_rate": 0.00028247655462915975, "loss": 1.7934, "step": 46763 }, { "epoch": 1.56, "grad_norm": 0.9922338724136353, "learning_rate": 0.00028246612036933813, "loss": 1.7394, "step": 46764 }, { "epoch": 1.56, "grad_norm": 0.5232818722724915, "learning_rate": 0.00028245568613079983, "loss": 1.7553, "step": 46765 }, { "epoch": 1.56, "grad_norm": 0.5169113278388977, "learning_rate": 0.00028244525191355787, "loss": 1.7097, "step": 46766 }, { "epoch": 1.56, "grad_norm": 0.5346500873565674, "learning_rate": 0.00028243481771762487, "loss": 1.7627, "step": 46767 }, { "epoch": 1.56, "grad_norm": 0.530617356300354, "learning_rate": 0.0002824243835430133, "loss": 1.7892, "step": 46768 }, { "epoch": 1.56, "grad_norm": 0.5362333059310913, "learning_rate": 0.0002824139493897359, "loss": 1.7789, "step": 46769 }, { "epoch": 1.56, "grad_norm": 0.5241866707801819, "learning_rate": 0.0002824035152578054, "loss": 1.7943, "step": 46770 }, { "epoch": 1.56, "grad_norm": 0.5475721955299377, "learning_rate": 0.0002823930811472346, "loss": 1.8265, "step": 46771 }, { "epoch": 1.56, "grad_norm": 0.5282971858978271, "learning_rate": 0.0002823826470580358, "loss": 1.7595, "step": 46772 }, { "epoch": 1.56, "grad_norm": 0.5346415638923645, "learning_rate": 0.00028237221299022195, "loss": 1.762, "step": 46773 }, { "epoch": 1.56, "grad_norm": 0.520729660987854, "learning_rate": 0.00028236177894380563, "loss": 1.7368, "step": 46774 }, { "epoch": 1.56, "grad_norm": 0.5386219620704651, "learning_rate": 0.00028235134491879954, "loss": 1.7796, "step": 46775 }, { "epoch": 1.56, "grad_norm": 0.5104472637176514, "learning_rate": 0.00028234091091521626, "loss": 1.7138, "step": 46776 }, { "epoch": 1.56, "grad_norm": 0.5503804087638855, "learning_rate": 0.00028233047693306863, "loss": 1.757, "step": 46777 }, { "epoch": 1.56, "grad_norm": 0.53365159034729, "learning_rate": 0.00028232004297236907, "loss": 1.9063, "step": 46778 }, { "epoch": 1.56, "grad_norm": 0.5177245736122131, "learning_rate": 0.00028230960903313036, "loss": 1.7022, "step": 46779 }, { "epoch": 1.56, "grad_norm": 0.5307243466377258, "learning_rate": 0.0002822991751153652, "loss": 1.7855, "step": 46780 }, { "epoch": 1.56, "grad_norm": 0.4950561821460724, "learning_rate": 0.00028228874121908633, "loss": 1.79, "step": 46781 }, { "epoch": 1.56, "grad_norm": 0.5366365909576416, "learning_rate": 0.00028227830734430627, "loss": 1.7359, "step": 46782 }, { "epoch": 1.56, "grad_norm": 0.5079789161682129, "learning_rate": 0.0002822678734910377, "loss": 1.7175, "step": 46783 }, { "epoch": 1.56, "grad_norm": 0.5288292169570923, "learning_rate": 0.0002822574396592934, "loss": 1.729, "step": 46784 }, { "epoch": 1.56, "grad_norm": 0.546334445476532, "learning_rate": 0.0002822470058490859, "loss": 1.8182, "step": 46785 }, { "epoch": 1.56, "grad_norm": 0.5190197825431824, "learning_rate": 0.0002822365720604278, "loss": 1.7159, "step": 46786 }, { "epoch": 1.56, "grad_norm": 0.5313716530799866, "learning_rate": 0.00028222613829333214, "loss": 1.7253, "step": 46787 }, { "epoch": 1.56, "grad_norm": 0.5235893130302429, "learning_rate": 0.0002822157045478111, "loss": 1.7304, "step": 46788 }, { "epoch": 1.56, "grad_norm": 0.5433689951896667, "learning_rate": 0.00028220527082387765, "loss": 1.7823, "step": 46789 }, { "epoch": 1.56, "grad_norm": 0.5274702310562134, "learning_rate": 0.00028219483712154433, "loss": 1.7394, "step": 46790 }, { "epoch": 1.56, "grad_norm": 0.5236358046531677, "learning_rate": 0.00028218440344082404, "loss": 1.6965, "step": 46791 }, { "epoch": 1.56, "grad_norm": 0.5223762392997742, "learning_rate": 0.0002821739697817291, "loss": 1.7612, "step": 46792 }, { "epoch": 1.56, "grad_norm": 0.5163826942443848, "learning_rate": 0.00028216353614427225, "loss": 1.7397, "step": 46793 }, { "epoch": 1.56, "grad_norm": 0.5205085873603821, "learning_rate": 0.0002821531025284665, "loss": 1.7057, "step": 46794 }, { "epoch": 1.56, "grad_norm": 0.5224295258522034, "learning_rate": 0.00028214266893432404, "loss": 1.7214, "step": 46795 }, { "epoch": 1.56, "grad_norm": 0.5240365862846375, "learning_rate": 0.0002821322353618578, "loss": 1.7143, "step": 46796 }, { "epoch": 1.56, "grad_norm": 0.5228776931762695, "learning_rate": 0.00028212180181108045, "loss": 1.8401, "step": 46797 }, { "epoch": 1.56, "grad_norm": 0.5282037854194641, "learning_rate": 0.00028211136828200454, "loss": 1.6849, "step": 46798 }, { "epoch": 1.56, "grad_norm": 0.5226704478263855, "learning_rate": 0.0002821009347746428, "loss": 1.6819, "step": 46799 }, { "epoch": 1.56, "grad_norm": 0.5408076047897339, "learning_rate": 0.00028209050128900784, "loss": 1.7515, "step": 46800 }, { "epoch": 1.56, "grad_norm": 0.540606677532196, "learning_rate": 0.00028208006782511253, "loss": 1.8172, "step": 46801 }, { "epoch": 1.56, "grad_norm": 0.5176512002944946, "learning_rate": 0.00028206963438296925, "loss": 1.7471, "step": 46802 }, { "epoch": 1.56, "grad_norm": 0.5149701237678528, "learning_rate": 0.0002820592009625908, "loss": 1.659, "step": 46803 }, { "epoch": 1.56, "grad_norm": 0.5490391254425049, "learning_rate": 0.00028204876756398993, "loss": 1.7819, "step": 46804 }, { "epoch": 1.56, "grad_norm": 0.49983522295951843, "learning_rate": 0.0002820383341871791, "loss": 1.7027, "step": 46805 }, { "epoch": 1.56, "grad_norm": 0.4968106746673584, "learning_rate": 0.00028202790083217113, "loss": 1.7162, "step": 46806 }, { "epoch": 1.56, "grad_norm": 0.533084511756897, "learning_rate": 0.0002820174674989787, "loss": 1.769, "step": 46807 }, { "epoch": 1.56, "grad_norm": 0.5285505652427673, "learning_rate": 0.0002820070341876143, "loss": 1.7691, "step": 46808 }, { "epoch": 1.56, "grad_norm": 0.5433531403541565, "learning_rate": 0.0002819966008980907, "loss": 1.8474, "step": 46809 }, { "epoch": 1.56, "grad_norm": 0.5195083618164062, "learning_rate": 0.00028198616763042063, "loss": 1.7519, "step": 46810 }, { "epoch": 1.56, "grad_norm": 0.5227872729301453, "learning_rate": 0.00028197573438461677, "loss": 1.8108, "step": 46811 }, { "epoch": 1.56, "grad_norm": 0.6322980523109436, "learning_rate": 0.0002819653011606916, "loss": 1.8112, "step": 46812 }, { "epoch": 1.56, "grad_norm": 0.5292630195617676, "learning_rate": 0.0002819548679586579, "loss": 1.7991, "step": 46813 }, { "epoch": 1.56, "grad_norm": 0.5246851444244385, "learning_rate": 0.00028194443477852846, "loss": 1.7414, "step": 46814 }, { "epoch": 1.56, "grad_norm": 0.5345807671546936, "learning_rate": 0.00028193400162031577, "loss": 1.7768, "step": 46815 }, { "epoch": 1.56, "grad_norm": 0.5422051548957825, "learning_rate": 0.0002819235684840324, "loss": 1.7665, "step": 46816 }, { "epoch": 1.56, "grad_norm": 0.5518303513526917, "learning_rate": 0.0002819131353696913, "loss": 1.6855, "step": 46817 }, { "epoch": 1.56, "grad_norm": 0.5262324810028076, "learning_rate": 0.00028190270227730494, "loss": 1.7736, "step": 46818 }, { "epoch": 1.56, "grad_norm": 0.536255955696106, "learning_rate": 0.000281892269206886, "loss": 1.7982, "step": 46819 }, { "epoch": 1.56, "grad_norm": 0.5326343178749084, "learning_rate": 0.0002818818361584472, "loss": 1.7396, "step": 46820 }, { "epoch": 1.56, "grad_norm": 0.536845326423645, "learning_rate": 0.00028187140313200125, "loss": 1.7305, "step": 46821 }, { "epoch": 1.56, "grad_norm": 0.528021514415741, "learning_rate": 0.0002818609701275607, "loss": 1.7703, "step": 46822 }, { "epoch": 1.56, "grad_norm": 0.5276448130607605, "learning_rate": 0.0002818505371451382, "loss": 1.8043, "step": 46823 }, { "epoch": 1.56, "grad_norm": 0.5316599607467651, "learning_rate": 0.00028184010418474655, "loss": 1.7874, "step": 46824 }, { "epoch": 1.56, "grad_norm": 0.5313816666603088, "learning_rate": 0.0002818296712463983, "loss": 1.8172, "step": 46825 }, { "epoch": 1.56, "grad_norm": 0.5491235256195068, "learning_rate": 0.00028181923833010613, "loss": 1.8047, "step": 46826 }, { "epoch": 1.56, "grad_norm": 0.5105416178703308, "learning_rate": 0.00028180880543588285, "loss": 1.737, "step": 46827 }, { "epoch": 1.56, "grad_norm": 0.5353087186813354, "learning_rate": 0.0002817983725637409, "loss": 1.7594, "step": 46828 }, { "epoch": 1.56, "grad_norm": 0.5245321989059448, "learning_rate": 0.00028178793971369296, "loss": 1.7024, "step": 46829 }, { "epoch": 1.56, "grad_norm": 0.5323918461799622, "learning_rate": 0.00028177750688575183, "loss": 1.7089, "step": 46830 }, { "epoch": 1.56, "grad_norm": 0.5312896370887756, "learning_rate": 0.0002817670740799302, "loss": 1.6983, "step": 46831 }, { "epoch": 1.56, "grad_norm": 0.5195775032043457, "learning_rate": 0.00028175664129624064, "loss": 1.7019, "step": 46832 }, { "epoch": 1.56, "grad_norm": 0.5377015471458435, "learning_rate": 0.0002817462085346958, "loss": 1.713, "step": 46833 }, { "epoch": 1.56, "grad_norm": 0.5400851368904114, "learning_rate": 0.00028173577579530845, "loss": 1.863, "step": 46834 }, { "epoch": 1.56, "grad_norm": 0.5238675475120544, "learning_rate": 0.0002817253430780911, "loss": 1.7221, "step": 46835 }, { "epoch": 1.56, "grad_norm": 0.5256233811378479, "learning_rate": 0.0002817149103830564, "loss": 1.7332, "step": 46836 }, { "epoch": 1.56, "grad_norm": 0.5409219861030579, "learning_rate": 0.00028170447771021733, "loss": 1.7609, "step": 46837 }, { "epoch": 1.56, "grad_norm": 0.5215587019920349, "learning_rate": 0.00028169404505958614, "loss": 1.7343, "step": 46838 }, { "epoch": 1.56, "grad_norm": 0.5279709100723267, "learning_rate": 0.0002816836124311757, "loss": 1.7272, "step": 46839 }, { "epoch": 1.56, "grad_norm": 0.5213057994842529, "learning_rate": 0.0002816731798249987, "loss": 1.7321, "step": 46840 }, { "epoch": 1.56, "grad_norm": 0.5216305255889893, "learning_rate": 0.0002816627472410678, "loss": 1.7184, "step": 46841 }, { "epoch": 1.56, "grad_norm": 0.515672504901886, "learning_rate": 0.00028165231467939557, "loss": 1.7495, "step": 46842 }, { "epoch": 1.56, "grad_norm": 0.535077691078186, "learning_rate": 0.00028164188213999463, "loss": 1.7139, "step": 46843 }, { "epoch": 1.56, "grad_norm": 0.5293425917625427, "learning_rate": 0.000281631449622878, "loss": 1.7945, "step": 46844 }, { "epoch": 1.56, "grad_norm": 0.5205421447753906, "learning_rate": 0.00028162101712805784, "loss": 1.7521, "step": 46845 }, { "epoch": 1.56, "grad_norm": 0.5221512317657471, "learning_rate": 0.00028161058465554713, "loss": 1.8099, "step": 46846 }, { "epoch": 1.56, "grad_norm": 0.5240396857261658, "learning_rate": 0.00028160015220535847, "loss": 1.7903, "step": 46847 }, { "epoch": 1.56, "grad_norm": 0.5148292779922485, "learning_rate": 0.0002815897197775046, "loss": 1.7357, "step": 46848 }, { "epoch": 1.56, "grad_norm": 0.5407510995864868, "learning_rate": 0.000281579287371998, "loss": 1.7908, "step": 46849 }, { "epoch": 1.56, "grad_norm": 0.5368421077728271, "learning_rate": 0.00028156885498885136, "loss": 1.8037, "step": 46850 }, { "epoch": 1.56, "grad_norm": 0.5512887835502625, "learning_rate": 0.00028155842262807767, "loss": 1.7593, "step": 46851 }, { "epoch": 1.56, "grad_norm": 0.5310271382331848, "learning_rate": 0.00028154799028968906, "loss": 1.8621, "step": 46852 }, { "epoch": 1.56, "grad_norm": 0.5078608393669128, "learning_rate": 0.0002815375579736986, "loss": 1.7369, "step": 46853 }, { "epoch": 1.56, "grad_norm": 0.5257786512374878, "learning_rate": 0.0002815271256801189, "loss": 1.7406, "step": 46854 }, { "epoch": 1.56, "grad_norm": 0.5138864517211914, "learning_rate": 0.0002815166934089624, "loss": 1.7967, "step": 46855 }, { "epoch": 1.56, "grad_norm": 0.5175459980964661, "learning_rate": 0.000281506261160242, "loss": 1.7392, "step": 46856 }, { "epoch": 1.56, "grad_norm": 0.5486847758293152, "learning_rate": 0.0002814958289339701, "loss": 1.7162, "step": 46857 }, { "epoch": 1.56, "grad_norm": 0.5506228804588318, "learning_rate": 0.00028148539673015984, "loss": 1.7511, "step": 46858 }, { "epoch": 1.56, "grad_norm": 0.5211913585662842, "learning_rate": 0.00028147496454882334, "loss": 1.7259, "step": 46859 }, { "epoch": 1.56, "grad_norm": 0.5674930810928345, "learning_rate": 0.00028146453238997357, "loss": 1.7553, "step": 46860 }, { "epoch": 1.56, "grad_norm": 0.5191183686256409, "learning_rate": 0.00028145410025362316, "loss": 1.7673, "step": 46861 }, { "epoch": 1.56, "grad_norm": 0.5256606936454773, "learning_rate": 0.00028144366813978475, "loss": 1.7607, "step": 46862 }, { "epoch": 1.56, "grad_norm": 0.538843035697937, "learning_rate": 0.00028143323604847096, "loss": 1.7123, "step": 46863 }, { "epoch": 1.56, "grad_norm": 0.6157677173614502, "learning_rate": 0.00028142280397969454, "loss": 1.8932, "step": 46864 }, { "epoch": 1.56, "grad_norm": 0.5243902206420898, "learning_rate": 0.00028141237193346806, "loss": 1.6919, "step": 46865 }, { "epoch": 1.56, "grad_norm": 0.540299117565155, "learning_rate": 0.0002814019399098041, "loss": 1.8091, "step": 46866 }, { "epoch": 1.56, "grad_norm": 0.5181747078895569, "learning_rate": 0.0002813915079087155, "loss": 1.8295, "step": 46867 }, { "epoch": 1.56, "grad_norm": 0.5639646053314209, "learning_rate": 0.000281381075930215, "loss": 1.7166, "step": 46868 }, { "epoch": 1.56, "grad_norm": 0.5337366461753845, "learning_rate": 0.00028137064397431503, "loss": 1.7585, "step": 46869 }, { "epoch": 1.56, "grad_norm": 0.5154447555541992, "learning_rate": 0.0002813602120410284, "loss": 1.7688, "step": 46870 }, { "epoch": 1.56, "grad_norm": 0.5274105072021484, "learning_rate": 0.0002813497801303677, "loss": 1.7327, "step": 46871 }, { "epoch": 1.56, "grad_norm": 0.5460198521614075, "learning_rate": 0.00028133934824234563, "loss": 1.8173, "step": 46872 }, { "epoch": 1.56, "grad_norm": 0.5371896624565125, "learning_rate": 0.00028132891637697475, "loss": 1.6416, "step": 46873 }, { "epoch": 1.56, "grad_norm": 0.5172228217124939, "learning_rate": 0.00028131848453426803, "loss": 1.7489, "step": 46874 }, { "epoch": 1.56, "grad_norm": 0.5238266587257385, "learning_rate": 0.0002813080527142377, "loss": 1.7665, "step": 46875 }, { "epoch": 1.56, "grad_norm": 0.5406519174575806, "learning_rate": 0.0002812976209168967, "loss": 1.7006, "step": 46876 }, { "epoch": 1.56, "grad_norm": 0.5470735430717468, "learning_rate": 0.00028128718914225765, "loss": 1.7466, "step": 46877 }, { "epoch": 1.56, "grad_norm": 0.5074140429496765, "learning_rate": 0.00028127675739033324, "loss": 1.7538, "step": 46878 }, { "epoch": 1.56, "grad_norm": 0.5149818062782288, "learning_rate": 0.0002812663256611361, "loss": 1.7227, "step": 46879 }, { "epoch": 1.56, "grad_norm": 0.5128921866416931, "learning_rate": 0.0002812558939546787, "loss": 1.7802, "step": 46880 }, { "epoch": 1.56, "grad_norm": 0.5317720174789429, "learning_rate": 0.00028124546227097417, "loss": 1.8046, "step": 46881 }, { "epoch": 1.56, "grad_norm": 0.49008968472480774, "learning_rate": 0.0002812350306100346, "loss": 1.7026, "step": 46882 }, { "epoch": 1.56, "grad_norm": 0.5186367630958557, "learning_rate": 0.0002812245989718731, "loss": 1.7874, "step": 46883 }, { "epoch": 1.56, "grad_norm": 0.5528372526168823, "learning_rate": 0.00028121416735650215, "loss": 1.794, "step": 46884 }, { "epoch": 1.56, "grad_norm": 0.5234960317611694, "learning_rate": 0.0002812037357639344, "loss": 1.7438, "step": 46885 }, { "epoch": 1.56, "grad_norm": 0.5327050089836121, "learning_rate": 0.0002811933041941826, "loss": 1.6463, "step": 46886 }, { "epoch": 1.56, "grad_norm": 0.5375171303749084, "learning_rate": 0.00028118287264725924, "loss": 1.8315, "step": 46887 }, { "epoch": 1.56, "grad_norm": 0.5243256092071533, "learning_rate": 0.00028117244112317726, "loss": 1.7725, "step": 46888 }, { "epoch": 1.56, "grad_norm": 0.5178243517875671, "learning_rate": 0.000281162009621949, "loss": 1.7621, "step": 46889 }, { "epoch": 1.56, "grad_norm": 0.5060708522796631, "learning_rate": 0.00028115157814358736, "loss": 1.7391, "step": 46890 }, { "epoch": 1.56, "grad_norm": 0.5422177314758301, "learning_rate": 0.00028114114668810497, "loss": 1.7633, "step": 46891 }, { "epoch": 1.56, "grad_norm": 0.5279051065444946, "learning_rate": 0.0002811307152555144, "loss": 1.6543, "step": 46892 }, { "epoch": 1.56, "grad_norm": 0.521927535533905, "learning_rate": 0.0002811202838458284, "loss": 1.7607, "step": 46893 }, { "epoch": 1.56, "grad_norm": 0.5369247198104858, "learning_rate": 0.00028110985245905965, "loss": 1.8061, "step": 46894 }, { "epoch": 1.56, "grad_norm": 0.5298954844474792, "learning_rate": 0.00028109942109522064, "loss": 1.7471, "step": 46895 }, { "epoch": 1.56, "grad_norm": 0.5195484757423401, "learning_rate": 0.0002810889897543241, "loss": 1.7827, "step": 46896 }, { "epoch": 1.56, "grad_norm": 0.5510005354881287, "learning_rate": 0.00028107855843638284, "loss": 1.8513, "step": 46897 }, { "epoch": 1.56, "grad_norm": 0.5357556343078613, "learning_rate": 0.0002810681271414095, "loss": 1.7366, "step": 46898 }, { "epoch": 1.56, "grad_norm": 0.5267982482910156, "learning_rate": 0.0002810576958694165, "loss": 1.7366, "step": 46899 }, { "epoch": 1.56, "grad_norm": 0.53177410364151, "learning_rate": 0.0002810472646204167, "loss": 1.795, "step": 46900 }, { "epoch": 1.56, "grad_norm": 0.5193707942962646, "learning_rate": 0.00028103683339442283, "loss": 1.7281, "step": 46901 }, { "epoch": 1.56, "grad_norm": 0.5212125182151794, "learning_rate": 0.0002810264021914474, "loss": 1.7991, "step": 46902 }, { "epoch": 1.56, "grad_norm": 0.5253897309303284, "learning_rate": 0.000281015971011503, "loss": 1.7803, "step": 46903 }, { "epoch": 1.56, "grad_norm": 0.5201025009155273, "learning_rate": 0.00028100553985460267, "loss": 1.7362, "step": 46904 }, { "epoch": 1.56, "grad_norm": 0.5329849123954773, "learning_rate": 0.00028099510872075855, "loss": 1.7409, "step": 46905 }, { "epoch": 1.56, "grad_norm": 0.5028013586997986, "learning_rate": 0.0002809846776099837, "loss": 1.7478, "step": 46906 }, { "epoch": 1.56, "grad_norm": 0.5545014142990112, "learning_rate": 0.0002809742465222906, "loss": 1.7478, "step": 46907 }, { "epoch": 1.56, "grad_norm": 0.521462082862854, "learning_rate": 0.00028096381545769205, "loss": 1.7848, "step": 46908 }, { "epoch": 1.56, "grad_norm": 0.5214636921882629, "learning_rate": 0.00028095338441620055, "loss": 1.7763, "step": 46909 }, { "epoch": 1.56, "grad_norm": 0.5474923849105835, "learning_rate": 0.0002809429533978288, "loss": 1.8311, "step": 46910 }, { "epoch": 1.56, "grad_norm": 0.5006487369537354, "learning_rate": 0.0002809325224025897, "loss": 1.7626, "step": 46911 }, { "epoch": 1.56, "grad_norm": 1.0587047338485718, "learning_rate": 0.0002809220914304954, "loss": 1.8726, "step": 46912 }, { "epoch": 1.56, "grad_norm": 0.5320917963981628, "learning_rate": 0.00028091166048155905, "loss": 1.8141, "step": 46913 }, { "epoch": 1.56, "grad_norm": 0.5218529105186462, "learning_rate": 0.00028090122955579317, "loss": 1.7347, "step": 46914 }, { "epoch": 1.56, "grad_norm": 0.5218138098716736, "learning_rate": 0.0002808907986532103, "loss": 1.7537, "step": 46915 }, { "epoch": 1.56, "grad_norm": 0.5080283284187317, "learning_rate": 0.00028088036777382323, "loss": 1.7738, "step": 46916 }, { "epoch": 1.56, "grad_norm": 0.5145071148872375, "learning_rate": 0.00028086993691764443, "loss": 1.8497, "step": 46917 }, { "epoch": 1.56, "grad_norm": 0.5221047401428223, "learning_rate": 0.00028085950608468696, "loss": 1.8256, "step": 46918 }, { "epoch": 1.56, "grad_norm": 0.5176587700843811, "learning_rate": 0.00028084907527496303, "loss": 1.7568, "step": 46919 }, { "epoch": 1.56, "grad_norm": 0.5211375951766968, "learning_rate": 0.0002808386444884856, "loss": 1.7489, "step": 46920 }, { "epoch": 1.56, "grad_norm": 0.5120158791542053, "learning_rate": 0.00028082821372526725, "loss": 1.7257, "step": 46921 }, { "epoch": 1.56, "grad_norm": 0.5284700393676758, "learning_rate": 0.00028081778298532056, "loss": 1.7303, "step": 46922 }, { "epoch": 1.56, "grad_norm": 0.513824999332428, "learning_rate": 0.00028080735226865825, "loss": 1.6468, "step": 46923 }, { "epoch": 1.56, "grad_norm": 0.5344865322113037, "learning_rate": 0.00028079692157529306, "loss": 1.8442, "step": 46924 }, { "epoch": 1.56, "grad_norm": 0.5255751609802246, "learning_rate": 0.0002807864909052375, "loss": 1.7594, "step": 46925 }, { "epoch": 1.56, "grad_norm": 0.540107250213623, "learning_rate": 0.0002807760602585043, "loss": 1.6854, "step": 46926 }, { "epoch": 1.56, "grad_norm": 0.5240753293037415, "learning_rate": 0.00028076562963510614, "loss": 1.738, "step": 46927 }, { "epoch": 1.56, "grad_norm": 0.5362553596496582, "learning_rate": 0.0002807551990350558, "loss": 1.7175, "step": 46928 }, { "epoch": 1.56, "grad_norm": 0.520898699760437, "learning_rate": 0.00028074476845836566, "loss": 1.8173, "step": 46929 }, { "epoch": 1.56, "grad_norm": 0.522351086139679, "learning_rate": 0.00028073433790504855, "loss": 1.7777, "step": 46930 }, { "epoch": 1.56, "grad_norm": 0.5308693647384644, "learning_rate": 0.00028072390737511724, "loss": 1.7175, "step": 46931 }, { "epoch": 1.56, "grad_norm": 0.5338351130485535, "learning_rate": 0.00028071347686858415, "loss": 1.7079, "step": 46932 }, { "epoch": 1.56, "grad_norm": 0.5403316020965576, "learning_rate": 0.00028070304638546197, "loss": 1.7956, "step": 46933 }, { "epoch": 1.56, "grad_norm": 0.5378051400184631, "learning_rate": 0.00028069261592576365, "loss": 1.7598, "step": 46934 }, { "epoch": 1.56, "grad_norm": 0.5272151231765747, "learning_rate": 0.00028068218548950156, "loss": 1.7484, "step": 46935 }, { "epoch": 1.56, "grad_norm": 0.5103941559791565, "learning_rate": 0.0002806717550766884, "loss": 1.7136, "step": 46936 }, { "epoch": 1.56, "grad_norm": 0.5028680562973022, "learning_rate": 0.00028066132468733694, "loss": 1.7935, "step": 46937 }, { "epoch": 1.56, "grad_norm": 0.5225576162338257, "learning_rate": 0.00028065089432145984, "loss": 1.7164, "step": 46938 }, { "epoch": 1.56, "grad_norm": 0.5128803849220276, "learning_rate": 0.0002806404639790696, "loss": 1.7898, "step": 46939 }, { "epoch": 1.56, "grad_norm": 0.5249922275543213, "learning_rate": 0.00028063003366017895, "loss": 1.7731, "step": 46940 }, { "epoch": 1.56, "grad_norm": 0.5184035897254944, "learning_rate": 0.00028061960336480074, "loss": 1.7208, "step": 46941 }, { "epoch": 1.56, "grad_norm": 0.538581371307373, "learning_rate": 0.0002806091730929474, "loss": 1.7026, "step": 46942 }, { "epoch": 1.56, "grad_norm": 0.50434809923172, "learning_rate": 0.00028059874284463163, "loss": 1.7141, "step": 46943 }, { "epoch": 1.56, "grad_norm": 0.5198131203651428, "learning_rate": 0.0002805883126198662, "loss": 1.7084, "step": 46944 }, { "epoch": 1.56, "grad_norm": 0.5267184376716614, "learning_rate": 0.00028057788241866365, "loss": 1.7246, "step": 46945 }, { "epoch": 1.56, "grad_norm": 0.530363142490387, "learning_rate": 0.0002805674522410366, "loss": 1.7611, "step": 46946 }, { "epoch": 1.56, "grad_norm": 0.5066479444503784, "learning_rate": 0.00028055702208699794, "loss": 1.7089, "step": 46947 }, { "epoch": 1.56, "grad_norm": 0.5221108198165894, "learning_rate": 0.00028054659195656017, "loss": 1.7826, "step": 46948 }, { "epoch": 1.56, "grad_norm": 0.5180460810661316, "learning_rate": 0.00028053616184973594, "loss": 1.721, "step": 46949 }, { "epoch": 1.56, "grad_norm": 0.52183997631073, "learning_rate": 0.00028052573176653794, "loss": 1.7445, "step": 46950 }, { "epoch": 1.56, "grad_norm": 0.5370672941207886, "learning_rate": 0.0002805153017069789, "loss": 1.7631, "step": 46951 }, { "epoch": 1.56, "grad_norm": 0.5350480675697327, "learning_rate": 0.00028050487167107137, "loss": 1.7455, "step": 46952 }, { "epoch": 1.56, "grad_norm": 0.5326745510101318, "learning_rate": 0.00028049444165882795, "loss": 1.7327, "step": 46953 }, { "epoch": 1.56, "grad_norm": 1.3257503509521484, "learning_rate": 0.00028048401167026155, "loss": 1.7688, "step": 46954 }, { "epoch": 1.56, "grad_norm": 0.5470930337905884, "learning_rate": 0.0002804735817053846, "loss": 1.7813, "step": 46955 }, { "epoch": 1.56, "grad_norm": 0.5332785248756409, "learning_rate": 0.00028046315176420987, "loss": 1.7907, "step": 46956 }, { "epoch": 1.56, "grad_norm": 0.5552733540534973, "learning_rate": 0.00028045272184674995, "loss": 1.7164, "step": 46957 }, { "epoch": 1.56, "grad_norm": 0.5474939942359924, "learning_rate": 0.00028044229195301764, "loss": 1.7007, "step": 46958 }, { "epoch": 1.56, "grad_norm": 0.5255300998687744, "learning_rate": 0.00028043186208302545, "loss": 1.6955, "step": 46959 }, { "epoch": 1.56, "grad_norm": 0.5394689440727234, "learning_rate": 0.000280421432236786, "loss": 1.7363, "step": 46960 }, { "epoch": 1.56, "grad_norm": 0.533288836479187, "learning_rate": 0.0002804110024143123, "loss": 1.7697, "step": 46961 }, { "epoch": 1.56, "grad_norm": 0.5075486898422241, "learning_rate": 0.00028040057261561653, "loss": 1.695, "step": 46962 }, { "epoch": 1.56, "grad_norm": 0.507138729095459, "learning_rate": 0.0002803901428407117, "loss": 1.7807, "step": 46963 }, { "epoch": 1.56, "grad_norm": 0.5273354649543762, "learning_rate": 0.0002803797130896103, "loss": 1.7542, "step": 46964 }, { "epoch": 1.56, "grad_norm": 0.5428550839424133, "learning_rate": 0.000280369283362325, "loss": 1.802, "step": 46965 }, { "epoch": 1.56, "grad_norm": 0.5147606134414673, "learning_rate": 0.00028035885365886856, "loss": 1.7298, "step": 46966 }, { "epoch": 1.56, "grad_norm": 0.5201185941696167, "learning_rate": 0.0002803484239792535, "loss": 1.7754, "step": 46967 }, { "epoch": 1.56, "grad_norm": 0.5311740636825562, "learning_rate": 0.0002803379943234927, "loss": 1.8385, "step": 46968 }, { "epoch": 1.56, "grad_norm": 0.5144686102867126, "learning_rate": 0.0002803275646915985, "loss": 1.7467, "step": 46969 }, { "epoch": 1.56, "grad_norm": 0.5105807185173035, "learning_rate": 0.0002803171350835838, "loss": 1.7044, "step": 46970 }, { "epoch": 1.56, "grad_norm": 0.5281875729560852, "learning_rate": 0.0002803067054994613, "loss": 1.7049, "step": 46971 }, { "epoch": 1.56, "grad_norm": 0.5128110647201538, "learning_rate": 0.0002802962759392435, "loss": 1.7515, "step": 46972 }, { "epoch": 1.56, "grad_norm": 0.5371940732002258, "learning_rate": 0.00028028584640294303, "loss": 1.7821, "step": 46973 }, { "epoch": 1.56, "grad_norm": 0.5515851974487305, "learning_rate": 0.00028027541689057265, "loss": 1.7745, "step": 46974 }, { "epoch": 1.56, "grad_norm": 0.5341920852661133, "learning_rate": 0.0002802649874021452, "loss": 1.7934, "step": 46975 }, { "epoch": 1.56, "grad_norm": 0.533426821231842, "learning_rate": 0.00028025455793767297, "loss": 1.8056, "step": 46976 }, { "epoch": 1.56, "grad_norm": 0.5354275703430176, "learning_rate": 0.00028024412849716877, "loss": 1.7409, "step": 46977 }, { "epoch": 1.56, "grad_norm": 0.5415232181549072, "learning_rate": 0.00028023369908064543, "loss": 1.827, "step": 46978 }, { "epoch": 1.56, "grad_norm": 0.5467494130134583, "learning_rate": 0.0002802232696881154, "loss": 1.734, "step": 46979 }, { "epoch": 1.56, "grad_norm": 0.5149020552635193, "learning_rate": 0.00028021284031959137, "loss": 1.7465, "step": 46980 }, { "epoch": 1.56, "grad_norm": 0.5299416780471802, "learning_rate": 0.0002802024109750861, "loss": 1.7951, "step": 46981 }, { "epoch": 1.56, "grad_norm": 0.5268468260765076, "learning_rate": 0.00028019198165461215, "loss": 1.8184, "step": 46982 }, { "epoch": 1.56, "grad_norm": 0.534356951713562, "learning_rate": 0.00028018155235818215, "loss": 1.7042, "step": 46983 }, { "epoch": 1.56, "grad_norm": 0.5890213251113892, "learning_rate": 0.0002801711230858089, "loss": 1.7204, "step": 46984 }, { "epoch": 1.56, "grad_norm": 0.5292177200317383, "learning_rate": 0.00028016069383750503, "loss": 1.77, "step": 46985 }, { "epoch": 1.56, "grad_norm": 0.5361704230308533, "learning_rate": 0.00028015026461328306, "loss": 1.8042, "step": 46986 }, { "epoch": 1.56, "grad_norm": 0.5226774215698242, "learning_rate": 0.0002801398354131558, "loss": 1.7503, "step": 46987 }, { "epoch": 1.56, "grad_norm": 0.534855842590332, "learning_rate": 0.0002801294062371359, "loss": 1.7323, "step": 46988 }, { "epoch": 1.56, "grad_norm": 0.5426689386367798, "learning_rate": 0.0002801189770852359, "loss": 1.697, "step": 46989 }, { "epoch": 1.56, "grad_norm": 0.5439168810844421, "learning_rate": 0.0002801085479574685, "loss": 1.7418, "step": 46990 }, { "epoch": 1.56, "grad_norm": 0.5179073214530945, "learning_rate": 0.0002800981188538465, "loss": 1.7695, "step": 46991 }, { "epoch": 1.56, "grad_norm": 2.366781711578369, "learning_rate": 0.00028008768977438235, "loss": 1.786, "step": 46992 }, { "epoch": 1.56, "grad_norm": 0.519880473613739, "learning_rate": 0.00028007726071908884, "loss": 1.8068, "step": 46993 }, { "epoch": 1.56, "grad_norm": 0.49619990587234497, "learning_rate": 0.0002800668316879786, "loss": 1.8041, "step": 46994 }, { "epoch": 1.56, "grad_norm": 0.5399722456932068, "learning_rate": 0.0002800564026810643, "loss": 1.7605, "step": 46995 }, { "epoch": 1.56, "grad_norm": 0.5270783305168152, "learning_rate": 0.00028004597369835863, "loss": 1.8013, "step": 46996 }, { "epoch": 1.56, "grad_norm": 0.5488832592964172, "learning_rate": 0.00028003554473987407, "loss": 1.7377, "step": 46997 }, { "epoch": 1.56, "grad_norm": 0.5374983549118042, "learning_rate": 0.00028002511580562365, "loss": 1.7597, "step": 46998 }, { "epoch": 1.56, "grad_norm": 0.5275983214378357, "learning_rate": 0.0002800146868956196, "loss": 1.7938, "step": 46999 }, { "epoch": 1.56, "grad_norm": 0.5237709283828735, "learning_rate": 0.0002800042580098748, "loss": 1.7697, "step": 47000 }, { "epoch": 1.56, "grad_norm": 0.5151849389076233, "learning_rate": 0.000279993829148402, "loss": 1.7817, "step": 47001 }, { "epoch": 1.56, "grad_norm": 0.5219693183898926, "learning_rate": 0.0002799834003112136, "loss": 1.7907, "step": 47002 }, { "epoch": 1.56, "grad_norm": 0.5344549417495728, "learning_rate": 0.00027997297149832246, "loss": 1.7648, "step": 47003 }, { "epoch": 1.56, "grad_norm": 0.5236998796463013, "learning_rate": 0.00027996254270974116, "loss": 1.7557, "step": 47004 }, { "epoch": 1.56, "grad_norm": 0.5268940329551697, "learning_rate": 0.00027995211394548253, "loss": 1.7536, "step": 47005 }, { "epoch": 1.56, "grad_norm": 0.5121490955352783, "learning_rate": 0.00027994168520555887, "loss": 1.6985, "step": 47006 }, { "epoch": 1.56, "grad_norm": 0.5270565152168274, "learning_rate": 0.0002799312564899831, "loss": 1.6845, "step": 47007 }, { "epoch": 1.56, "grad_norm": 0.5271108746528625, "learning_rate": 0.0002799208277987679, "loss": 1.7154, "step": 47008 }, { "epoch": 1.56, "grad_norm": 0.5043619275093079, "learning_rate": 0.00027991039913192583, "loss": 1.7687, "step": 47009 }, { "epoch": 1.56, "grad_norm": 0.5389448404312134, "learning_rate": 0.00027989997048946956, "loss": 1.7765, "step": 47010 }, { "epoch": 1.56, "grad_norm": 0.5216280221939087, "learning_rate": 0.00027988954187141185, "loss": 1.7786, "step": 47011 }, { "epoch": 1.56, "grad_norm": 0.5253077149391174, "learning_rate": 0.0002798791132777652, "loss": 1.8251, "step": 47012 }, { "epoch": 1.56, "grad_norm": 2.2036149501800537, "learning_rate": 0.0002798686847085422, "loss": 1.7636, "step": 47013 }, { "epoch": 1.56, "grad_norm": 0.528272807598114, "learning_rate": 0.0002798582561637558, "loss": 1.7802, "step": 47014 }, { "epoch": 1.56, "grad_norm": 0.5338103175163269, "learning_rate": 0.00027984782764341854, "loss": 1.727, "step": 47015 }, { "epoch": 1.56, "grad_norm": 0.5307399034500122, "learning_rate": 0.000279837399147543, "loss": 1.7608, "step": 47016 }, { "epoch": 1.56, "grad_norm": 0.5127769708633423, "learning_rate": 0.0002798269706761419, "loss": 1.8197, "step": 47017 }, { "epoch": 1.56, "grad_norm": 0.5398790836334229, "learning_rate": 0.0002798165422292279, "loss": 1.7523, "step": 47018 }, { "epoch": 1.56, "grad_norm": 0.5251513719558716, "learning_rate": 0.0002798061138068136, "loss": 1.7722, "step": 47019 }, { "epoch": 1.56, "grad_norm": 0.5304470062255859, "learning_rate": 0.00027979568540891165, "loss": 1.706, "step": 47020 }, { "epoch": 1.56, "grad_norm": 0.5198735594749451, "learning_rate": 0.00027978525703553495, "loss": 1.7154, "step": 47021 }, { "epoch": 1.56, "grad_norm": 0.5335492491722107, "learning_rate": 0.0002797748286866958, "loss": 1.7568, "step": 47022 }, { "epoch": 1.56, "grad_norm": 0.541542112827301, "learning_rate": 0.00027976440036240707, "loss": 1.7441, "step": 47023 }, { "epoch": 1.56, "grad_norm": 0.520371675491333, "learning_rate": 0.00027975397206268137, "loss": 1.723, "step": 47024 }, { "epoch": 1.56, "grad_norm": 0.5223322510719299, "learning_rate": 0.00027974354378753144, "loss": 1.798, "step": 47025 }, { "epoch": 1.56, "grad_norm": 0.531831681728363, "learning_rate": 0.0002797331155369698, "loss": 1.796, "step": 47026 }, { "epoch": 1.56, "grad_norm": 0.5480408668518066, "learning_rate": 0.00027972268731100904, "loss": 1.7849, "step": 47027 }, { "epoch": 1.56, "grad_norm": 0.5051497220993042, "learning_rate": 0.00027971225910966227, "loss": 1.7341, "step": 47028 }, { "epoch": 1.56, "grad_norm": 0.5386834740638733, "learning_rate": 0.0002797018309329415, "loss": 1.7305, "step": 47029 }, { "epoch": 1.56, "grad_norm": 0.5192325711250305, "learning_rate": 0.00027969140278085987, "loss": 1.8349, "step": 47030 }, { "epoch": 1.56, "grad_norm": 0.5204930901527405, "learning_rate": 0.00027968097465342994, "loss": 1.7503, "step": 47031 }, { "epoch": 1.56, "grad_norm": 0.5447583794593811, "learning_rate": 0.00027967054655066425, "loss": 1.8419, "step": 47032 }, { "epoch": 1.56, "grad_norm": 0.5165861248970032, "learning_rate": 0.0002796601184725755, "loss": 1.741, "step": 47033 }, { "epoch": 1.56, "grad_norm": 0.5305519700050354, "learning_rate": 0.0002796496904191763, "loss": 1.7569, "step": 47034 }, { "epoch": 1.56, "grad_norm": 0.5420387983322144, "learning_rate": 0.0002796392623904796, "loss": 1.867, "step": 47035 }, { "epoch": 1.56, "grad_norm": 0.5262898206710815, "learning_rate": 0.00027962883438649764, "loss": 1.7562, "step": 47036 }, { "epoch": 1.56, "grad_norm": 0.5227133631706238, "learning_rate": 0.0002796184064072433, "loss": 1.8105, "step": 47037 }, { "epoch": 1.56, "grad_norm": 0.5442862510681152, "learning_rate": 0.0002796079784527293, "loss": 1.7615, "step": 47038 }, { "epoch": 1.56, "grad_norm": 0.5315666794776917, "learning_rate": 0.00027959755052296817, "loss": 1.7344, "step": 47039 }, { "epoch": 1.57, "grad_norm": 0.5197721719741821, "learning_rate": 0.00027958712261797256, "loss": 1.722, "step": 47040 }, { "epoch": 1.57, "grad_norm": 0.5277837514877319, "learning_rate": 0.00027957669473775525, "loss": 1.7465, "step": 47041 }, { "epoch": 1.57, "grad_norm": 0.530575156211853, "learning_rate": 0.00027956626688232876, "loss": 1.7165, "step": 47042 }, { "epoch": 1.57, "grad_norm": 0.5584084987640381, "learning_rate": 0.0002795558390517058, "loss": 1.7212, "step": 47043 }, { "epoch": 1.57, "grad_norm": 0.5207774043083191, "learning_rate": 0.000279545411245899, "loss": 1.815, "step": 47044 }, { "epoch": 1.57, "grad_norm": 0.519062876701355, "learning_rate": 0.0002795349834649212, "loss": 1.7128, "step": 47045 }, { "epoch": 1.57, "grad_norm": 0.5365561842918396, "learning_rate": 0.00027952455570878483, "loss": 1.73, "step": 47046 }, { "epoch": 1.57, "grad_norm": 0.5308647751808167, "learning_rate": 0.00027951412797750264, "loss": 1.7912, "step": 47047 }, { "epoch": 1.57, "grad_norm": 0.5381045341491699, "learning_rate": 0.0002795037002710874, "loss": 1.7641, "step": 47048 }, { "epoch": 1.57, "grad_norm": 0.5171234011650085, "learning_rate": 0.0002794932725895515, "loss": 1.7458, "step": 47049 }, { "epoch": 1.57, "grad_norm": 0.5280197858810425, "learning_rate": 0.0002794828449329077, "loss": 1.7301, "step": 47050 }, { "epoch": 1.57, "grad_norm": 0.5165807008743286, "learning_rate": 0.00027947241730116884, "loss": 1.7554, "step": 47051 }, { "epoch": 1.57, "grad_norm": 0.5278276205062866, "learning_rate": 0.00027946198969434736, "loss": 1.706, "step": 47052 }, { "epoch": 1.57, "grad_norm": 0.5120340585708618, "learning_rate": 0.00027945156211245603, "loss": 1.7363, "step": 47053 }, { "epoch": 1.57, "grad_norm": 0.5247275829315186, "learning_rate": 0.0002794411345555074, "loss": 1.7424, "step": 47054 }, { "epoch": 1.57, "grad_norm": 0.5606198906898499, "learning_rate": 0.0002794307070235144, "loss": 1.8529, "step": 47055 }, { "epoch": 1.57, "grad_norm": 0.5365861058235168, "learning_rate": 0.0002794202795164893, "loss": 1.7548, "step": 47056 }, { "epoch": 1.57, "grad_norm": 0.5463599562644958, "learning_rate": 0.00027940985203444495, "loss": 1.7776, "step": 47057 }, { "epoch": 1.57, "grad_norm": 0.5457172989845276, "learning_rate": 0.00027939942457739413, "loss": 1.7682, "step": 47058 }, { "epoch": 1.57, "grad_norm": 0.5373637676239014, "learning_rate": 0.0002793889971453493, "loss": 1.836, "step": 47059 }, { "epoch": 1.57, "grad_norm": 0.5316323041915894, "learning_rate": 0.0002793785697383232, "loss": 1.7819, "step": 47060 }, { "epoch": 1.57, "grad_norm": 0.5450982451438904, "learning_rate": 0.0002793681423563285, "loss": 1.7379, "step": 47061 }, { "epoch": 1.57, "grad_norm": 0.5062081813812256, "learning_rate": 0.0002793577149993778, "loss": 1.715, "step": 47062 }, { "epoch": 1.57, "grad_norm": 0.5035296082496643, "learning_rate": 0.00027934728766748374, "loss": 1.6985, "step": 47063 }, { "epoch": 1.57, "grad_norm": 0.546611487865448, "learning_rate": 0.00027933686036065906, "loss": 1.6776, "step": 47064 }, { "epoch": 1.57, "grad_norm": 0.5304504632949829, "learning_rate": 0.00027932643307891647, "loss": 1.7408, "step": 47065 }, { "epoch": 1.57, "grad_norm": 0.5102018117904663, "learning_rate": 0.00027931600582226855, "loss": 1.6545, "step": 47066 }, { "epoch": 1.57, "grad_norm": 0.5321968197822571, "learning_rate": 0.00027930557859072783, "loss": 1.7779, "step": 47067 }, { "epoch": 1.57, "grad_norm": 0.5364813804626465, "learning_rate": 0.00027929515138430726, "loss": 1.7813, "step": 47068 }, { "epoch": 1.57, "grad_norm": 0.5534294247627258, "learning_rate": 0.00027928472420301915, "loss": 1.7492, "step": 47069 }, { "epoch": 1.57, "grad_norm": 0.5242661237716675, "learning_rate": 0.0002792742970468764, "loss": 1.7579, "step": 47070 }, { "epoch": 1.57, "grad_norm": 0.5228620171546936, "learning_rate": 0.0002792638699158917, "loss": 1.7052, "step": 47071 }, { "epoch": 1.57, "grad_norm": 0.5213475823402405, "learning_rate": 0.00027925344281007743, "loss": 1.7263, "step": 47072 }, { "epoch": 1.57, "grad_norm": 0.5257376432418823, "learning_rate": 0.0002792430157294465, "loss": 1.7917, "step": 47073 }, { "epoch": 1.57, "grad_norm": 0.5279597640037537, "learning_rate": 0.00027923258867401147, "loss": 1.6887, "step": 47074 }, { "epoch": 1.57, "grad_norm": 0.5205515027046204, "learning_rate": 0.0002792221616437851, "loss": 1.704, "step": 47075 }, { "epoch": 1.57, "grad_norm": 0.5513380765914917, "learning_rate": 0.0002792117346387799, "loss": 1.8264, "step": 47076 }, { "epoch": 1.57, "grad_norm": 0.531119167804718, "learning_rate": 0.0002792013076590085, "loss": 1.7835, "step": 47077 }, { "epoch": 1.57, "grad_norm": 0.547020673751831, "learning_rate": 0.00027919088070448386, "loss": 1.8057, "step": 47078 }, { "epoch": 1.57, "grad_norm": 0.5073514580726624, "learning_rate": 0.00027918045377521827, "loss": 1.7223, "step": 47079 }, { "epoch": 1.57, "grad_norm": 0.5560494661331177, "learning_rate": 0.00027917002687122456, "loss": 1.7046, "step": 47080 }, { "epoch": 1.57, "grad_norm": 0.5257928371429443, "learning_rate": 0.00027915959999251543, "loss": 1.7634, "step": 47081 }, { "epoch": 1.57, "grad_norm": 0.5434329509735107, "learning_rate": 0.00027914917313910344, "loss": 1.7733, "step": 47082 }, { "epoch": 1.57, "grad_norm": 0.5311264395713806, "learning_rate": 0.00027913874631100124, "loss": 1.8375, "step": 47083 }, { "epoch": 1.57, "grad_norm": 0.5369150042533875, "learning_rate": 0.0002791283195082215, "loss": 1.7952, "step": 47084 }, { "epoch": 1.57, "grad_norm": 0.7558735013008118, "learning_rate": 0.0002791178927307771, "loss": 1.687, "step": 47085 }, { "epoch": 1.57, "grad_norm": 0.5213574767112732, "learning_rate": 0.0002791074659786803, "loss": 1.732, "step": 47086 }, { "epoch": 1.57, "grad_norm": 0.5434580445289612, "learning_rate": 0.000279097039251944, "loss": 1.7026, "step": 47087 }, { "epoch": 1.57, "grad_norm": 0.5333266854286194, "learning_rate": 0.0002790866125505809, "loss": 1.6848, "step": 47088 }, { "epoch": 1.57, "grad_norm": 0.5040221214294434, "learning_rate": 0.0002790761858746035, "loss": 1.7567, "step": 47089 }, { "epoch": 1.57, "grad_norm": 0.5335690975189209, "learning_rate": 0.0002790657592240245, "loss": 1.7108, "step": 47090 }, { "epoch": 1.57, "grad_norm": 0.5347099900245667, "learning_rate": 0.00027905533259885655, "loss": 1.7822, "step": 47091 }, { "epoch": 1.57, "grad_norm": 0.5590323209762573, "learning_rate": 0.0002790449059991126, "loss": 1.828, "step": 47092 }, { "epoch": 1.57, "grad_norm": 0.5219571590423584, "learning_rate": 0.00027903447942480476, "loss": 1.7451, "step": 47093 }, { "epoch": 1.57, "grad_norm": 0.5211960077285767, "learning_rate": 0.0002790240528759461, "loss": 1.7006, "step": 47094 }, { "epoch": 1.57, "grad_norm": 0.5329471826553345, "learning_rate": 0.00027901362635254914, "loss": 1.733, "step": 47095 }, { "epoch": 1.57, "grad_norm": 0.5218353867530823, "learning_rate": 0.0002790031998546265, "loss": 1.7295, "step": 47096 }, { "epoch": 1.57, "grad_norm": 0.5329602956771851, "learning_rate": 0.0002789927733821909, "loss": 1.7656, "step": 47097 }, { "epoch": 1.57, "grad_norm": 0.5115498304367065, "learning_rate": 0.00027898234693525504, "loss": 1.7333, "step": 47098 }, { "epoch": 1.57, "grad_norm": 0.5003607869148254, "learning_rate": 0.0002789719205138315, "loss": 1.7245, "step": 47099 }, { "epoch": 1.57, "grad_norm": 0.5363211035728455, "learning_rate": 0.0002789614941179328, "loss": 1.6048, "step": 47100 }, { "epoch": 1.57, "grad_norm": 0.531115710735321, "learning_rate": 0.00027895106774757187, "loss": 1.7544, "step": 47101 }, { "epoch": 1.57, "grad_norm": 0.5351870059967041, "learning_rate": 0.0002789406414027613, "loss": 1.7912, "step": 47102 }, { "epoch": 1.57, "grad_norm": 0.5099919438362122, "learning_rate": 0.0002789302150835136, "loss": 1.7397, "step": 47103 }, { "epoch": 1.57, "grad_norm": 0.5278807282447815, "learning_rate": 0.0002789197887898415, "loss": 1.6877, "step": 47104 }, { "epoch": 1.57, "grad_norm": 0.5224663019180298, "learning_rate": 0.00027890936252175777, "loss": 1.8039, "step": 47105 }, { "epoch": 1.57, "grad_norm": 0.512600839138031, "learning_rate": 0.0002788989362792749, "loss": 1.7582, "step": 47106 }, { "epoch": 1.57, "grad_norm": 0.516374945640564, "learning_rate": 0.00027888851006240554, "loss": 1.7601, "step": 47107 }, { "epoch": 1.57, "grad_norm": 0.5368173122406006, "learning_rate": 0.0002788780838711626, "loss": 1.7955, "step": 47108 }, { "epoch": 1.57, "grad_norm": 0.5155066251754761, "learning_rate": 0.0002788676577055584, "loss": 1.7015, "step": 47109 }, { "epoch": 1.57, "grad_norm": 0.543308436870575, "learning_rate": 0.00027885723156560575, "loss": 1.6867, "step": 47110 }, { "epoch": 1.57, "grad_norm": 0.5004355311393738, "learning_rate": 0.00027884680545131737, "loss": 1.6712, "step": 47111 }, { "epoch": 1.57, "grad_norm": 0.5295203328132629, "learning_rate": 0.0002788363793627059, "loss": 1.7549, "step": 47112 }, { "epoch": 1.57, "grad_norm": 0.529998242855072, "learning_rate": 0.0002788259532997838, "loss": 1.7776, "step": 47113 }, { "epoch": 1.57, "grad_norm": 0.5479533672332764, "learning_rate": 0.00027881552726256387, "loss": 1.7753, "step": 47114 }, { "epoch": 1.57, "grad_norm": 0.566154956817627, "learning_rate": 0.000278805101251059, "loss": 1.793, "step": 47115 }, { "epoch": 1.57, "grad_norm": 0.5225467681884766, "learning_rate": 0.0002787946752652814, "loss": 1.7622, "step": 47116 }, { "epoch": 1.57, "grad_norm": 0.5339186191558838, "learning_rate": 0.000278784249305244, "loss": 1.7277, "step": 47117 }, { "epoch": 1.57, "grad_norm": 0.5371698141098022, "learning_rate": 0.0002787738233709594, "loss": 1.7214, "step": 47118 }, { "epoch": 1.57, "grad_norm": 0.5454988479614258, "learning_rate": 0.0002787633974624403, "loss": 1.8289, "step": 47119 }, { "epoch": 1.57, "grad_norm": 0.5566295385360718, "learning_rate": 0.00027875297157969924, "loss": 1.8232, "step": 47120 }, { "epoch": 1.57, "grad_norm": 0.5358508825302124, "learning_rate": 0.00027874254572274885, "loss": 1.7132, "step": 47121 }, { "epoch": 1.57, "grad_norm": 0.5181850790977478, "learning_rate": 0.0002787321198916021, "loss": 1.7495, "step": 47122 }, { "epoch": 1.57, "grad_norm": 0.5193091630935669, "learning_rate": 0.00027872169408627126, "loss": 1.7399, "step": 47123 }, { "epoch": 1.57, "grad_norm": 0.550865650177002, "learning_rate": 0.00027871126830676917, "loss": 1.81, "step": 47124 }, { "epoch": 1.57, "grad_norm": 0.53659987449646, "learning_rate": 0.00027870084255310855, "loss": 1.7126, "step": 47125 }, { "epoch": 1.57, "grad_norm": 0.5317981839179993, "learning_rate": 0.0002786904168253019, "loss": 1.7292, "step": 47126 }, { "epoch": 1.57, "grad_norm": 0.5328246355056763, "learning_rate": 0.000278679991123362, "loss": 1.8371, "step": 47127 }, { "epoch": 1.57, "grad_norm": 0.5351428985595703, "learning_rate": 0.00027866956544730145, "loss": 1.7261, "step": 47128 }, { "epoch": 1.57, "grad_norm": 0.5236907601356506, "learning_rate": 0.0002786591397971328, "loss": 1.7933, "step": 47129 }, { "epoch": 1.57, "grad_norm": 0.5285444259643555, "learning_rate": 0.0002786487141728688, "loss": 1.7426, "step": 47130 }, { "epoch": 1.57, "grad_norm": 0.5385821461677551, "learning_rate": 0.0002786382885745222, "loss": 1.7444, "step": 47131 }, { "epoch": 1.57, "grad_norm": 0.5676458477973938, "learning_rate": 0.0002786278630021056, "loss": 1.8566, "step": 47132 }, { "epoch": 1.57, "grad_norm": 0.526059627532959, "learning_rate": 0.00027861743745563155, "loss": 1.7485, "step": 47133 }, { "epoch": 1.57, "grad_norm": 0.5546600222587585, "learning_rate": 0.00027860701193511283, "loss": 1.7702, "step": 47134 }, { "epoch": 1.57, "grad_norm": 2.74644136428833, "learning_rate": 0.00027859658644056206, "loss": 1.8708, "step": 47135 }, { "epoch": 1.57, "grad_norm": 0.5429778099060059, "learning_rate": 0.0002785861609719918, "loss": 1.7699, "step": 47136 }, { "epoch": 1.57, "grad_norm": 0.5477648973464966, "learning_rate": 0.00027857573552941475, "loss": 1.7893, "step": 47137 }, { "epoch": 1.57, "grad_norm": 0.5485695600509644, "learning_rate": 0.00027856531011284387, "loss": 1.7262, "step": 47138 }, { "epoch": 1.57, "grad_norm": 0.5288554430007935, "learning_rate": 0.0002785548847222912, "loss": 1.7261, "step": 47139 }, { "epoch": 1.57, "grad_norm": 0.5444756150245667, "learning_rate": 0.0002785444593577699, "loss": 1.7547, "step": 47140 }, { "epoch": 1.57, "grad_norm": 0.5203799605369568, "learning_rate": 0.00027853403401929247, "loss": 1.7353, "step": 47141 }, { "epoch": 1.57, "grad_norm": 0.5502527952194214, "learning_rate": 0.0002785236087068716, "loss": 1.7957, "step": 47142 }, { "epoch": 1.57, "grad_norm": 0.5235231518745422, "learning_rate": 0.00027851318342051986, "loss": 1.7287, "step": 47143 }, { "epoch": 1.57, "grad_norm": 0.5405009984970093, "learning_rate": 0.0002785027581602499, "loss": 1.6954, "step": 47144 }, { "epoch": 1.57, "grad_norm": 0.5692315697669983, "learning_rate": 0.00027849233292607454, "loss": 1.7682, "step": 47145 }, { "epoch": 1.57, "grad_norm": 0.533939778804779, "learning_rate": 0.0002784819077180062, "loss": 1.8296, "step": 47146 }, { "epoch": 1.57, "grad_norm": 0.5371934771537781, "learning_rate": 0.0002784714825360577, "loss": 1.7917, "step": 47147 }, { "epoch": 1.57, "grad_norm": 0.5249568819999695, "learning_rate": 0.0002784610573802417, "loss": 1.7307, "step": 47148 }, { "epoch": 1.57, "grad_norm": 0.5404142141342163, "learning_rate": 0.0002784506322505707, "loss": 1.7816, "step": 47149 }, { "epoch": 1.57, "grad_norm": 0.5314706563949585, "learning_rate": 0.0002784402071470575, "loss": 1.7758, "step": 47150 }, { "epoch": 1.57, "grad_norm": 0.5364078879356384, "learning_rate": 0.00027842978206971467, "loss": 1.7334, "step": 47151 }, { "epoch": 1.57, "grad_norm": 0.5199462175369263, "learning_rate": 0.0002784193570185551, "loss": 1.7299, "step": 47152 }, { "epoch": 1.57, "grad_norm": 0.5318365693092346, "learning_rate": 0.00027840893199359103, "loss": 1.8081, "step": 47153 }, { "epoch": 1.57, "grad_norm": 0.5201530456542969, "learning_rate": 0.0002783985069948354, "loss": 1.797, "step": 47154 }, { "epoch": 1.57, "grad_norm": 0.5271120071411133, "learning_rate": 0.0002783880820223008, "loss": 1.7472, "step": 47155 }, { "epoch": 1.57, "grad_norm": 0.5343503952026367, "learning_rate": 0.00027837765707599996, "loss": 1.7809, "step": 47156 }, { "epoch": 1.57, "grad_norm": 0.5402248501777649, "learning_rate": 0.00027836723215594536, "loss": 1.8205, "step": 47157 }, { "epoch": 1.57, "grad_norm": 0.5405377745628357, "learning_rate": 0.0002783568072621498, "loss": 1.8096, "step": 47158 }, { "epoch": 1.57, "grad_norm": 0.5279502272605896, "learning_rate": 0.0002783463823946259, "loss": 1.7139, "step": 47159 }, { "epoch": 1.57, "grad_norm": 0.5530707836151123, "learning_rate": 0.0002783359575533862, "loss": 1.7659, "step": 47160 }, { "epoch": 1.57, "grad_norm": 0.5346107482910156, "learning_rate": 0.00027832553273844353, "loss": 1.7782, "step": 47161 }, { "epoch": 1.57, "grad_norm": 0.5228561758995056, "learning_rate": 0.0002783151079498105, "loss": 1.7573, "step": 47162 }, { "epoch": 1.57, "grad_norm": 0.5242468118667603, "learning_rate": 0.0002783046831874997, "loss": 1.7543, "step": 47163 }, { "epoch": 1.57, "grad_norm": 0.5340865850448608, "learning_rate": 0.00027829425845152386, "loss": 1.7878, "step": 47164 }, { "epoch": 1.57, "grad_norm": 0.5369179248809814, "learning_rate": 0.00027828383374189556, "loss": 1.752, "step": 47165 }, { "epoch": 1.57, "grad_norm": 0.5799658298492432, "learning_rate": 0.0002782734090586275, "loss": 1.7001, "step": 47166 }, { "epoch": 1.57, "grad_norm": 0.536443293094635, "learning_rate": 0.0002782629844017322, "loss": 1.8164, "step": 47167 }, { "epoch": 1.57, "grad_norm": 0.5255300998687744, "learning_rate": 0.00027825255977122264, "loss": 1.8113, "step": 47168 }, { "epoch": 1.57, "grad_norm": 0.5226074457168579, "learning_rate": 0.0002782421351671111, "loss": 1.7143, "step": 47169 }, { "epoch": 1.57, "grad_norm": 0.5747126936912537, "learning_rate": 0.0002782317105894105, "loss": 1.6843, "step": 47170 }, { "epoch": 1.57, "grad_norm": 0.5051249265670776, "learning_rate": 0.00027822128603813333, "loss": 1.6968, "step": 47171 }, { "epoch": 1.57, "grad_norm": 0.5237691402435303, "learning_rate": 0.00027821086151329243, "loss": 1.7407, "step": 47172 }, { "epoch": 1.57, "grad_norm": 0.529736340045929, "learning_rate": 0.00027820043701490016, "loss": 1.7499, "step": 47173 }, { "epoch": 1.57, "grad_norm": 0.5628830194473267, "learning_rate": 0.0002781900125429694, "loss": 1.828, "step": 47174 }, { "epoch": 1.57, "grad_norm": 0.5352014303207397, "learning_rate": 0.00027817958809751283, "loss": 1.776, "step": 47175 }, { "epoch": 1.57, "grad_norm": 0.5187033414840698, "learning_rate": 0.000278169163678543, "loss": 1.7747, "step": 47176 }, { "epoch": 1.57, "grad_norm": 0.5357024073600769, "learning_rate": 0.00027815873928607255, "loss": 1.7777, "step": 47177 }, { "epoch": 1.57, "grad_norm": 0.5412710309028625, "learning_rate": 0.0002781483149201142, "loss": 1.7774, "step": 47178 }, { "epoch": 1.57, "grad_norm": 0.5167373418807983, "learning_rate": 0.00027813789058068057, "loss": 1.6975, "step": 47179 }, { "epoch": 1.57, "grad_norm": 0.5396282076835632, "learning_rate": 0.00027812746626778423, "loss": 1.715, "step": 47180 }, { "epoch": 1.57, "grad_norm": 0.5150866508483887, "learning_rate": 0.000278117041981438, "loss": 1.6728, "step": 47181 }, { "epoch": 1.57, "grad_norm": 0.5378625392913818, "learning_rate": 0.00027810661772165456, "loss": 1.6982, "step": 47182 }, { "epoch": 1.57, "grad_norm": 0.5388314723968506, "learning_rate": 0.0002780961934884463, "loss": 1.7853, "step": 47183 }, { "epoch": 1.57, "grad_norm": 0.5413528680801392, "learning_rate": 0.00027808576928182614, "loss": 1.8043, "step": 47184 }, { "epoch": 1.57, "grad_norm": 0.5316236615180969, "learning_rate": 0.00027807534510180664, "loss": 1.635, "step": 47185 }, { "epoch": 1.57, "grad_norm": 0.5363773107528687, "learning_rate": 0.00027806492094840034, "loss": 1.7804, "step": 47186 }, { "epoch": 1.57, "grad_norm": 0.5638921856880188, "learning_rate": 0.00027805449682162, "loss": 1.7708, "step": 47187 }, { "epoch": 1.57, "grad_norm": 0.5413013696670532, "learning_rate": 0.00027804407272147844, "loss": 1.7804, "step": 47188 }, { "epoch": 1.57, "grad_norm": 0.5320733785629272, "learning_rate": 0.00027803364864798793, "loss": 1.8041, "step": 47189 }, { "epoch": 1.57, "grad_norm": 0.5289170145988464, "learning_rate": 0.00027802322460116136, "loss": 1.6979, "step": 47190 }, { "epoch": 1.57, "grad_norm": 0.5604680776596069, "learning_rate": 0.00027801280058101143, "loss": 1.7833, "step": 47191 }, { "epoch": 1.57, "grad_norm": 0.5740087628364563, "learning_rate": 0.00027800237658755076, "loss": 1.7801, "step": 47192 }, { "epoch": 1.57, "grad_norm": 0.5398070216178894, "learning_rate": 0.0002779919526207919, "loss": 1.758, "step": 47193 }, { "epoch": 1.57, "grad_norm": 0.5432698130607605, "learning_rate": 0.0002779815286807475, "loss": 1.7587, "step": 47194 }, { "epoch": 1.57, "grad_norm": 0.5325490832328796, "learning_rate": 0.0002779711047674304, "loss": 1.83, "step": 47195 }, { "epoch": 1.57, "grad_norm": 0.5382957458496094, "learning_rate": 0.00027796068088085306, "loss": 1.7494, "step": 47196 }, { "epoch": 1.57, "grad_norm": 1.0244852304458618, "learning_rate": 0.0002779502570210282, "loss": 1.769, "step": 47197 }, { "epoch": 1.57, "grad_norm": 0.538702130317688, "learning_rate": 0.0002779398331879686, "loss": 1.7521, "step": 47198 }, { "epoch": 1.57, "grad_norm": 0.5535662174224854, "learning_rate": 0.0002779294093816867, "loss": 1.7623, "step": 47199 }, { "epoch": 1.57, "grad_norm": 0.5316020250320435, "learning_rate": 0.0002779189856021952, "loss": 1.6718, "step": 47200 }, { "epoch": 1.57, "grad_norm": 0.513758659362793, "learning_rate": 0.0002779085618495068, "loss": 1.6867, "step": 47201 }, { "epoch": 1.57, "grad_norm": 0.5364281535148621, "learning_rate": 0.0002778981381236343, "loss": 1.7966, "step": 47202 }, { "epoch": 1.57, "grad_norm": 0.5306099653244019, "learning_rate": 0.00027788771442459, "loss": 1.7801, "step": 47203 }, { "epoch": 1.57, "grad_norm": 0.5053665041923523, "learning_rate": 0.00027787729075238684, "loss": 1.7826, "step": 47204 }, { "epoch": 1.57, "grad_norm": 0.5380721092224121, "learning_rate": 0.00027786686710703747, "loss": 1.7304, "step": 47205 }, { "epoch": 1.57, "grad_norm": 0.5522968173027039, "learning_rate": 0.0002778564434885544, "loss": 1.7226, "step": 47206 }, { "epoch": 1.57, "grad_norm": 0.5409519672393799, "learning_rate": 0.00027784601989695035, "loss": 1.7754, "step": 47207 }, { "epoch": 1.57, "grad_norm": 0.5102825164794922, "learning_rate": 0.000277835596332238, "loss": 1.8369, "step": 47208 }, { "epoch": 1.57, "grad_norm": 0.5402560830116272, "learning_rate": 0.0002778251727944299, "loss": 1.7545, "step": 47209 }, { "epoch": 1.57, "grad_norm": 0.5268067121505737, "learning_rate": 0.00027781474928353873, "loss": 1.7504, "step": 47210 }, { "epoch": 1.57, "grad_norm": 0.5352422595024109, "learning_rate": 0.00027780432579957724, "loss": 1.7688, "step": 47211 }, { "epoch": 1.57, "grad_norm": 0.5251743197441101, "learning_rate": 0.0002777939023425581, "loss": 1.8132, "step": 47212 }, { "epoch": 1.57, "grad_norm": 0.5360786318778992, "learning_rate": 0.00027778347891249384, "loss": 1.7371, "step": 47213 }, { "epoch": 1.57, "grad_norm": 0.5380299091339111, "learning_rate": 0.00027777305550939714, "loss": 1.7552, "step": 47214 }, { "epoch": 1.57, "grad_norm": 0.517560601234436, "learning_rate": 0.00027776263213328075, "loss": 1.7417, "step": 47215 }, { "epoch": 1.57, "grad_norm": 0.6416947245597839, "learning_rate": 0.0002777522087841572, "loss": 1.8021, "step": 47216 }, { "epoch": 1.57, "grad_norm": 0.5531220436096191, "learning_rate": 0.00027774178546203913, "loss": 1.8294, "step": 47217 }, { "epoch": 1.57, "grad_norm": 0.5365769267082214, "learning_rate": 0.00027773136216693926, "loss": 1.783, "step": 47218 }, { "epoch": 1.57, "grad_norm": 0.529557466506958, "learning_rate": 0.00027772093889887034, "loss": 1.6956, "step": 47219 }, { "epoch": 1.57, "grad_norm": 0.54380202293396, "learning_rate": 0.00027771051565784484, "loss": 1.6957, "step": 47220 }, { "epoch": 1.57, "grad_norm": 0.5210419297218323, "learning_rate": 0.0002777000924438755, "loss": 1.7141, "step": 47221 }, { "epoch": 1.57, "grad_norm": 0.5290867686271667, "learning_rate": 0.00027768966925697505, "loss": 1.8, "step": 47222 }, { "epoch": 1.57, "grad_norm": 0.558376669883728, "learning_rate": 0.00027767924609715597, "loss": 1.7948, "step": 47223 }, { "epoch": 1.57, "grad_norm": 0.5618674755096436, "learning_rate": 0.00027766882296443093, "loss": 1.7597, "step": 47224 }, { "epoch": 1.57, "grad_norm": 0.5252427458763123, "learning_rate": 0.00027765839985881285, "loss": 1.8505, "step": 47225 }, { "epoch": 1.57, "grad_norm": 0.5520977973937988, "learning_rate": 0.000277647976780314, "loss": 1.8527, "step": 47226 }, { "epoch": 1.57, "grad_norm": 0.5181442499160767, "learning_rate": 0.0002776375537289472, "loss": 1.7829, "step": 47227 }, { "epoch": 1.57, "grad_norm": 0.535688579082489, "learning_rate": 0.0002776271307047252, "loss": 1.813, "step": 47228 }, { "epoch": 1.57, "grad_norm": 0.5415826439857483, "learning_rate": 0.0002776167077076606, "loss": 1.7381, "step": 47229 }, { "epoch": 1.57, "grad_norm": 0.5347350835800171, "learning_rate": 0.00027760628473776593, "loss": 1.7713, "step": 47230 }, { "epoch": 1.57, "grad_norm": 0.5319257378578186, "learning_rate": 0.0002775958617950539, "loss": 1.7377, "step": 47231 }, { "epoch": 1.57, "grad_norm": 0.5158923864364624, "learning_rate": 0.0002775854388795374, "loss": 1.8177, "step": 47232 }, { "epoch": 1.57, "grad_norm": 0.5313069224357605, "learning_rate": 0.00027757501599122864, "loss": 1.7069, "step": 47233 }, { "epoch": 1.57, "grad_norm": 0.5270814895629883, "learning_rate": 0.0002775645931301406, "loss": 1.733, "step": 47234 }, { "epoch": 1.57, "grad_norm": 0.5295951962471008, "learning_rate": 0.00027755417029628587, "loss": 1.8004, "step": 47235 }, { "epoch": 1.57, "grad_norm": 0.5441362857818604, "learning_rate": 0.0002775437474896771, "loss": 1.7961, "step": 47236 }, { "epoch": 1.57, "grad_norm": 0.5452069044113159, "learning_rate": 0.0002775333247103268, "loss": 1.7406, "step": 47237 }, { "epoch": 1.57, "grad_norm": 0.5164220929145813, "learning_rate": 0.00027752290195824766, "loss": 1.7124, "step": 47238 }, { "epoch": 1.57, "grad_norm": 0.5212799906730652, "learning_rate": 0.00027751247923345267, "loss": 1.7859, "step": 47239 }, { "epoch": 1.57, "grad_norm": 0.5247639417648315, "learning_rate": 0.00027750205653595403, "loss": 1.7351, "step": 47240 }, { "epoch": 1.57, "grad_norm": 0.5422250628471375, "learning_rate": 0.0002774916338657646, "loss": 1.7723, "step": 47241 }, { "epoch": 1.57, "grad_norm": 0.5493826270103455, "learning_rate": 0.0002774812112228971, "loss": 1.8299, "step": 47242 }, { "epoch": 1.57, "grad_norm": 0.5334975123405457, "learning_rate": 0.000277470788607364, "loss": 1.7462, "step": 47243 }, { "epoch": 1.57, "grad_norm": 0.5303884148597717, "learning_rate": 0.00027746036601917806, "loss": 1.7368, "step": 47244 }, { "epoch": 1.57, "grad_norm": 0.5407674908638, "learning_rate": 0.00027744994345835195, "loss": 1.7572, "step": 47245 }, { "epoch": 1.57, "grad_norm": 0.5200470685958862, "learning_rate": 0.00027743952092489825, "loss": 1.6746, "step": 47246 }, { "epoch": 1.57, "grad_norm": 0.5423315167427063, "learning_rate": 0.0002774290984188296, "loss": 1.729, "step": 47247 }, { "epoch": 1.57, "grad_norm": 0.5232755541801453, "learning_rate": 0.00027741867594015875, "loss": 1.7636, "step": 47248 }, { "epoch": 1.57, "grad_norm": 0.5306409001350403, "learning_rate": 0.00027740825348889837, "loss": 1.7903, "step": 47249 }, { "epoch": 1.57, "grad_norm": 2.616969108581543, "learning_rate": 0.0002773978310650609, "loss": 1.7606, "step": 47250 }, { "epoch": 1.57, "grad_norm": 0.5434560775756836, "learning_rate": 0.0002773874086686593, "loss": 1.7108, "step": 47251 }, { "epoch": 1.57, "grad_norm": 0.5371249318122864, "learning_rate": 0.000277376986299706, "loss": 1.7348, "step": 47252 }, { "epoch": 1.57, "grad_norm": 0.5374242067337036, "learning_rate": 0.00027736656395821365, "loss": 1.7566, "step": 47253 }, { "epoch": 1.57, "grad_norm": 0.5341247320175171, "learning_rate": 0.0002773561416441949, "loss": 1.7211, "step": 47254 }, { "epoch": 1.57, "grad_norm": 0.5276078581809998, "learning_rate": 0.00027734571935766265, "loss": 1.8205, "step": 47255 }, { "epoch": 1.57, "grad_norm": 0.5273454189300537, "learning_rate": 0.00027733529709862915, "loss": 1.8052, "step": 47256 }, { "epoch": 1.57, "grad_norm": 0.5434874892234802, "learning_rate": 0.0002773248748671073, "loss": 1.7563, "step": 47257 }, { "epoch": 1.57, "grad_norm": 0.5136141777038574, "learning_rate": 0.0002773144526631098, "loss": 1.7668, "step": 47258 }, { "epoch": 1.57, "grad_norm": 0.775813102722168, "learning_rate": 0.00027730403048664926, "loss": 1.7198, "step": 47259 }, { "epoch": 1.57, "grad_norm": 0.5311422348022461, "learning_rate": 0.00027729360833773816, "loss": 1.8006, "step": 47260 }, { "epoch": 1.57, "grad_norm": 0.5653214454650879, "learning_rate": 0.0002772831862163892, "loss": 1.7005, "step": 47261 }, { "epoch": 1.57, "grad_norm": 0.5383396148681641, "learning_rate": 0.0002772727641226154, "loss": 1.8084, "step": 47262 }, { "epoch": 1.57, "grad_norm": 0.5634738206863403, "learning_rate": 0.0002772623420564288, "loss": 1.8011, "step": 47263 }, { "epoch": 1.57, "grad_norm": 0.5338350534439087, "learning_rate": 0.0002772519200178425, "loss": 1.7187, "step": 47264 }, { "epoch": 1.57, "grad_norm": 0.5243450999259949, "learning_rate": 0.00027724149800686904, "loss": 1.6905, "step": 47265 }, { "epoch": 1.57, "grad_norm": 0.5253486037254333, "learning_rate": 0.00027723107602352104, "loss": 1.8004, "step": 47266 }, { "epoch": 1.57, "grad_norm": 0.5444092750549316, "learning_rate": 0.00027722065406781115, "loss": 1.6834, "step": 47267 }, { "epoch": 1.57, "grad_norm": 0.548147439956665, "learning_rate": 0.00027721023213975193, "loss": 1.7696, "step": 47268 }, { "epoch": 1.57, "grad_norm": 0.5367720723152161, "learning_rate": 0.00027719981023935635, "loss": 1.7031, "step": 47269 }, { "epoch": 1.57, "grad_norm": 0.5412752032279968, "learning_rate": 0.00027718938836663665, "loss": 1.7719, "step": 47270 }, { "epoch": 1.57, "grad_norm": 0.5213930010795593, "learning_rate": 0.00027717896652160575, "loss": 1.7547, "step": 47271 }, { "epoch": 1.57, "grad_norm": 0.5555241107940674, "learning_rate": 0.0002771685447042762, "loss": 1.7415, "step": 47272 }, { "epoch": 1.57, "grad_norm": 0.5394255518913269, "learning_rate": 0.0002771581229146607, "loss": 1.7656, "step": 47273 }, { "epoch": 1.57, "grad_norm": 0.5379632711410522, "learning_rate": 0.00027714770115277185, "loss": 1.7914, "step": 47274 }, { "epoch": 1.57, "grad_norm": 0.5128106474876404, "learning_rate": 0.0002771372794186224, "loss": 1.761, "step": 47275 }, { "epoch": 1.57, "grad_norm": 0.5645721554756165, "learning_rate": 0.0002771268577122248, "loss": 1.7923, "step": 47276 }, { "epoch": 1.57, "grad_norm": 0.5376904010772705, "learning_rate": 0.0002771164360335918, "loss": 1.7582, "step": 47277 }, { "epoch": 1.57, "grad_norm": 0.5193635821342468, "learning_rate": 0.00027710601438273616, "loss": 1.7899, "step": 47278 }, { "epoch": 1.57, "grad_norm": 0.5501657128334045, "learning_rate": 0.0002770955927596705, "loss": 1.7602, "step": 47279 }, { "epoch": 1.57, "grad_norm": 0.5376289486885071, "learning_rate": 0.0002770851711644073, "loss": 1.8086, "step": 47280 }, { "epoch": 1.57, "grad_norm": 0.5287642478942871, "learning_rate": 0.0002770747495969594, "loss": 1.7426, "step": 47281 }, { "epoch": 1.57, "grad_norm": 0.5325984954833984, "learning_rate": 0.00027706432805733937, "loss": 1.7921, "step": 47282 }, { "epoch": 1.57, "grad_norm": 0.5273135900497437, "learning_rate": 0.0002770539065455598, "loss": 1.763, "step": 47283 }, { "epoch": 1.57, "grad_norm": 0.5154771208763123, "learning_rate": 0.0002770434850616335, "loss": 1.745, "step": 47284 }, { "epoch": 1.57, "grad_norm": 1.0471268892288208, "learning_rate": 0.000277033063605573, "loss": 1.824, "step": 47285 }, { "epoch": 1.57, "grad_norm": 0.526658296585083, "learning_rate": 0.00027702264217739094, "loss": 1.7746, "step": 47286 }, { "epoch": 1.57, "grad_norm": 0.5360944271087646, "learning_rate": 0.0002770122207771, "loss": 1.7234, "step": 47287 }, { "epoch": 1.57, "grad_norm": 0.6935232877731323, "learning_rate": 0.00027700179940471286, "loss": 1.7101, "step": 47288 }, { "epoch": 1.57, "grad_norm": 0.5390319228172302, "learning_rate": 0.0002769913780602422, "loss": 1.7426, "step": 47289 }, { "epoch": 1.57, "grad_norm": 0.5385614633560181, "learning_rate": 0.0002769809567437005, "loss": 1.8001, "step": 47290 }, { "epoch": 1.57, "grad_norm": 0.5425243973731995, "learning_rate": 0.0002769705354551006, "loss": 1.7001, "step": 47291 }, { "epoch": 1.57, "grad_norm": 0.5217239856719971, "learning_rate": 0.0002769601141944551, "loss": 1.7443, "step": 47292 }, { "epoch": 1.57, "grad_norm": 0.546490490436554, "learning_rate": 0.0002769496929617766, "loss": 1.762, "step": 47293 }, { "epoch": 1.57, "grad_norm": 0.5660200715065002, "learning_rate": 0.00027693927175707775, "loss": 1.7669, "step": 47294 }, { "epoch": 1.57, "grad_norm": 0.5578656792640686, "learning_rate": 0.0002769288505803713, "loss": 1.7677, "step": 47295 }, { "epoch": 1.57, "grad_norm": 0.5254228115081787, "learning_rate": 0.00027691842943166977, "loss": 1.7398, "step": 47296 }, { "epoch": 1.57, "grad_norm": 0.5244763493537903, "learning_rate": 0.0002769080083109858, "loss": 1.8286, "step": 47297 }, { "epoch": 1.57, "grad_norm": 0.501584529876709, "learning_rate": 0.00027689758721833214, "loss": 1.7316, "step": 47298 }, { "epoch": 1.57, "grad_norm": 0.5273129940032959, "learning_rate": 0.0002768871661537215, "loss": 1.7911, "step": 47299 }, { "epoch": 1.57, "grad_norm": 0.5248973369598389, "learning_rate": 0.0002768767451171664, "loss": 1.7428, "step": 47300 }, { "epoch": 1.57, "grad_norm": 0.541168212890625, "learning_rate": 0.00027686632410867947, "loss": 1.8085, "step": 47301 }, { "epoch": 1.57, "grad_norm": 0.5539204478263855, "learning_rate": 0.0002768559031282735, "loss": 1.7189, "step": 47302 }, { "epoch": 1.57, "grad_norm": 0.5210332274436951, "learning_rate": 0.000276845482175961, "loss": 1.7825, "step": 47303 }, { "epoch": 1.57, "grad_norm": 0.5328579545021057, "learning_rate": 0.0002768350612517546, "loss": 1.6399, "step": 47304 }, { "epoch": 1.57, "grad_norm": 0.5318397283554077, "learning_rate": 0.0002768246403556672, "loss": 1.7767, "step": 47305 }, { "epoch": 1.57, "grad_norm": 0.5454884171485901, "learning_rate": 0.0002768142194877111, "loss": 1.7644, "step": 47306 }, { "epoch": 1.57, "grad_norm": 0.5513196587562561, "learning_rate": 0.00027680379864789917, "loss": 1.7819, "step": 47307 }, { "epoch": 1.57, "grad_norm": 0.5242847204208374, "learning_rate": 0.000276793377836244, "loss": 1.7747, "step": 47308 }, { "epoch": 1.57, "grad_norm": 0.5320792198181152, "learning_rate": 0.0002767829570527584, "loss": 1.8108, "step": 47309 }, { "epoch": 1.57, "grad_norm": 0.537267804145813, "learning_rate": 0.0002767725362974547, "loss": 1.7872, "step": 47310 }, { "epoch": 1.57, "grad_norm": 0.519634485244751, "learning_rate": 0.00027676211557034567, "loss": 1.726, "step": 47311 }, { "epoch": 1.57, "grad_norm": 0.5344937443733215, "learning_rate": 0.00027675169487144423, "loss": 1.7259, "step": 47312 }, { "epoch": 1.57, "grad_norm": 0.5152565240859985, "learning_rate": 0.0002767412742007626, "loss": 1.6886, "step": 47313 }, { "epoch": 1.57, "grad_norm": 0.5167405605316162, "learning_rate": 0.0002767308535583137, "loss": 1.7763, "step": 47314 }, { "epoch": 1.57, "grad_norm": 0.5051279067993164, "learning_rate": 0.0002767204329441102, "loss": 1.684, "step": 47315 }, { "epoch": 1.57, "grad_norm": 0.5219802260398865, "learning_rate": 0.0002767100123581646, "loss": 1.7857, "step": 47316 }, { "epoch": 1.57, "grad_norm": 0.5386872887611389, "learning_rate": 0.00027669959180048957, "loss": 1.7794, "step": 47317 }, { "epoch": 1.57, "grad_norm": 0.5417995452880859, "learning_rate": 0.0002766891712710978, "loss": 1.7457, "step": 47318 }, { "epoch": 1.57, "grad_norm": 0.5308611989021301, "learning_rate": 0.0002766787507700021, "loss": 1.7662, "step": 47319 }, { "epoch": 1.57, "grad_norm": 0.5266250371932983, "learning_rate": 0.00027666833029721475, "loss": 1.799, "step": 47320 }, { "epoch": 1.57, "grad_norm": 0.5224743485450745, "learning_rate": 0.00027665790985274873, "loss": 1.7394, "step": 47321 }, { "epoch": 1.57, "grad_norm": 0.5392645001411438, "learning_rate": 0.00027664748943661654, "loss": 1.8534, "step": 47322 }, { "epoch": 1.57, "grad_norm": 0.5251157283782959, "learning_rate": 0.0002766370690488309, "loss": 1.7396, "step": 47323 }, { "epoch": 1.57, "grad_norm": 0.5393803119659424, "learning_rate": 0.0002766266486894043, "loss": 1.7881, "step": 47324 }, { "epoch": 1.57, "grad_norm": 0.5194588899612427, "learning_rate": 0.0002766162283583497, "loss": 1.8037, "step": 47325 }, { "epoch": 1.57, "grad_norm": 0.5185491442680359, "learning_rate": 0.0002766058080556794, "loss": 1.7325, "step": 47326 }, { "epoch": 1.57, "grad_norm": 0.5466516613960266, "learning_rate": 0.00027659538778140614, "loss": 1.7, "step": 47327 }, { "epoch": 1.57, "grad_norm": 0.5168362259864807, "learning_rate": 0.00027658496753554275, "loss": 1.726, "step": 47328 }, { "epoch": 1.57, "grad_norm": 0.5235824584960938, "learning_rate": 0.0002765745473181018, "loss": 1.6463, "step": 47329 }, { "epoch": 1.57, "grad_norm": 0.5097219347953796, "learning_rate": 0.00027656412712909575, "loss": 1.6356, "step": 47330 }, { "epoch": 1.57, "grad_norm": 0.5197380185127258, "learning_rate": 0.0002765537069685375, "loss": 1.8658, "step": 47331 }, { "epoch": 1.57, "grad_norm": 0.5247802138328552, "learning_rate": 0.00027654328683643966, "loss": 1.7053, "step": 47332 }, { "epoch": 1.57, "grad_norm": 0.5293745994567871, "learning_rate": 0.0002765328667328147, "loss": 1.7121, "step": 47333 }, { "epoch": 1.57, "grad_norm": 0.7332676649093628, "learning_rate": 0.00027652244665767533, "loss": 1.751, "step": 47334 }, { "epoch": 1.57, "grad_norm": 0.5162738561630249, "learning_rate": 0.0002765120266110343, "loss": 1.7621, "step": 47335 }, { "epoch": 1.57, "grad_norm": 0.5265254974365234, "learning_rate": 0.00027650160659290434, "loss": 1.706, "step": 47336 }, { "epoch": 1.57, "grad_norm": 0.5452405214309692, "learning_rate": 0.00027649118660329783, "loss": 1.8047, "step": 47337 }, { "epoch": 1.57, "grad_norm": 0.5230512619018555, "learning_rate": 0.0002764807666422276, "loss": 1.6605, "step": 47338 }, { "epoch": 1.57, "grad_norm": 0.516833484172821, "learning_rate": 0.00027647034670970626, "loss": 1.6991, "step": 47339 }, { "epoch": 1.58, "grad_norm": 0.5094554424285889, "learning_rate": 0.00027645992680574646, "loss": 1.7481, "step": 47340 }, { "epoch": 1.58, "grad_norm": 0.5437379479408264, "learning_rate": 0.0002764495069303607, "loss": 1.7546, "step": 47341 }, { "epoch": 1.58, "grad_norm": 0.5384653210639954, "learning_rate": 0.000276439087083562, "loss": 1.7, "step": 47342 }, { "epoch": 1.58, "grad_norm": 0.5282270312309265, "learning_rate": 0.00027642866726536265, "loss": 1.712, "step": 47343 }, { "epoch": 1.58, "grad_norm": 0.5305731892585754, "learning_rate": 0.0002764182474757754, "loss": 1.7648, "step": 47344 }, { "epoch": 1.58, "grad_norm": 0.5169718861579895, "learning_rate": 0.0002764078277148129, "loss": 1.7424, "step": 47345 }, { "epoch": 1.58, "grad_norm": 0.5109960436820984, "learning_rate": 0.00027639740798248797, "loss": 1.7785, "step": 47346 }, { "epoch": 1.58, "grad_norm": 0.5228798985481262, "learning_rate": 0.000276386988278813, "loss": 1.8209, "step": 47347 }, { "epoch": 1.58, "grad_norm": 0.5211607813835144, "learning_rate": 0.0002763765686038007, "loss": 1.8036, "step": 47348 }, { "epoch": 1.58, "grad_norm": 0.5183361768722534, "learning_rate": 0.00027636614895746393, "loss": 1.7411, "step": 47349 }, { "epoch": 1.58, "grad_norm": 0.509465217590332, "learning_rate": 0.00027635572933981504, "loss": 1.7123, "step": 47350 }, { "epoch": 1.58, "grad_norm": 0.5270430445671082, "learning_rate": 0.0002763453097508668, "loss": 1.7336, "step": 47351 }, { "epoch": 1.58, "grad_norm": 0.5406268239021301, "learning_rate": 0.00027633489019063197, "loss": 1.8407, "step": 47352 }, { "epoch": 1.58, "grad_norm": 0.5333267450332642, "learning_rate": 0.00027632447065912305, "loss": 1.7517, "step": 47353 }, { "epoch": 1.58, "grad_norm": 0.5444222092628479, "learning_rate": 0.00027631405115635275, "loss": 1.7566, "step": 47354 }, { "epoch": 1.58, "grad_norm": 0.5070337653160095, "learning_rate": 0.0002763036316823336, "loss": 1.7158, "step": 47355 }, { "epoch": 1.58, "grad_norm": 0.5095266699790955, "learning_rate": 0.00027629321223707854, "loss": 1.6927, "step": 47356 }, { "epoch": 1.58, "grad_norm": 0.5389249324798584, "learning_rate": 0.00027628279282059984, "loss": 1.6637, "step": 47357 }, { "epoch": 1.58, "grad_norm": 0.5106807947158813, "learning_rate": 0.0002762723734329104, "loss": 1.7516, "step": 47358 }, { "epoch": 1.58, "grad_norm": 0.5303683876991272, "learning_rate": 0.00027626195407402283, "loss": 1.6606, "step": 47359 }, { "epoch": 1.58, "grad_norm": 0.5425812602043152, "learning_rate": 0.00027625153474394973, "loss": 1.6354, "step": 47360 }, { "epoch": 1.58, "grad_norm": 0.5302485227584839, "learning_rate": 0.0002762411154427038, "loss": 1.7423, "step": 47361 }, { "epoch": 1.58, "grad_norm": 0.5517587661743164, "learning_rate": 0.00027623069617029765, "loss": 1.8398, "step": 47362 }, { "epoch": 1.58, "grad_norm": 0.5421299338340759, "learning_rate": 0.0002762202769267439, "loss": 1.7556, "step": 47363 }, { "epoch": 1.58, "grad_norm": 0.524414598941803, "learning_rate": 0.00027620985771205516, "loss": 1.79, "step": 47364 }, { "epoch": 1.58, "grad_norm": 0.5288958549499512, "learning_rate": 0.0002761994385262442, "loss": 1.7671, "step": 47365 }, { "epoch": 1.58, "grad_norm": 0.5357520580291748, "learning_rate": 0.0002761890193693237, "loss": 1.755, "step": 47366 }, { "epoch": 1.58, "grad_norm": 0.5540825128555298, "learning_rate": 0.0002761786002413062, "loss": 1.7395, "step": 47367 }, { "epoch": 1.58, "grad_norm": 0.5329201221466064, "learning_rate": 0.0002761681811422043, "loss": 1.7524, "step": 47368 }, { "epoch": 1.58, "grad_norm": 0.5307511687278748, "learning_rate": 0.00027615776207203077, "loss": 1.7279, "step": 47369 }, { "epoch": 1.58, "grad_norm": 0.5279669761657715, "learning_rate": 0.0002761473430307982, "loss": 1.7375, "step": 47370 }, { "epoch": 1.58, "grad_norm": 0.5330118536949158, "learning_rate": 0.00027613692401851916, "loss": 1.7659, "step": 47371 }, { "epoch": 1.58, "grad_norm": 0.5252552628517151, "learning_rate": 0.00027612650503520655, "loss": 1.797, "step": 47372 }, { "epoch": 1.58, "grad_norm": 0.535831868648529, "learning_rate": 0.00027611608608087265, "loss": 1.8024, "step": 47373 }, { "epoch": 1.58, "grad_norm": 0.5283925533294678, "learning_rate": 0.0002761056671555304, "loss": 1.7635, "step": 47374 }, { "epoch": 1.58, "grad_norm": 0.539818286895752, "learning_rate": 0.0002760952482591923, "loss": 1.7927, "step": 47375 }, { "epoch": 1.58, "grad_norm": 0.5337530374526978, "learning_rate": 0.00027608482939187116, "loss": 1.7199, "step": 47376 }, { "epoch": 1.58, "grad_norm": 0.5331977009773254, "learning_rate": 0.00027607441055357943, "loss": 1.7896, "step": 47377 }, { "epoch": 1.58, "grad_norm": 0.5362467765808105, "learning_rate": 0.0002760639917443298, "loss": 1.6933, "step": 47378 }, { "epoch": 1.58, "grad_norm": 0.5065051317214966, "learning_rate": 0.0002760535729641351, "loss": 1.7118, "step": 47379 }, { "epoch": 1.58, "grad_norm": 0.5235915184020996, "learning_rate": 0.0002760431542130076, "loss": 1.7385, "step": 47380 }, { "epoch": 1.58, "grad_norm": 0.5445507764816284, "learning_rate": 0.00027603273549096035, "loss": 1.8149, "step": 47381 }, { "epoch": 1.58, "grad_norm": 0.5386719107627869, "learning_rate": 0.00027602231679800586, "loss": 1.7988, "step": 47382 }, { "epoch": 1.58, "grad_norm": 0.5280799865722656, "learning_rate": 0.00027601189813415667, "loss": 1.7947, "step": 47383 }, { "epoch": 1.58, "grad_norm": 0.516776978969574, "learning_rate": 0.0002760014794994255, "loss": 1.7052, "step": 47384 }, { "epoch": 1.58, "grad_norm": 0.5394301414489746, "learning_rate": 0.00027599106089382493, "loss": 1.7247, "step": 47385 }, { "epoch": 1.58, "grad_norm": 0.5287326574325562, "learning_rate": 0.0002759806423173679, "loss": 1.6888, "step": 47386 }, { "epoch": 1.58, "grad_norm": 0.5134762525558472, "learning_rate": 0.0002759702237700666, "loss": 1.8207, "step": 47387 }, { "epoch": 1.58, "grad_norm": 0.5124733448028564, "learning_rate": 0.00027595980525193397, "loss": 1.7246, "step": 47388 }, { "epoch": 1.58, "grad_norm": 0.5490007400512695, "learning_rate": 0.0002759493867629827, "loss": 1.7368, "step": 47389 }, { "epoch": 1.58, "grad_norm": 0.5771009922027588, "learning_rate": 0.00027593896830322524, "loss": 1.7354, "step": 47390 }, { "epoch": 1.58, "grad_norm": 0.5191507935523987, "learning_rate": 0.0002759285498726743, "loss": 1.7677, "step": 47391 }, { "epoch": 1.58, "grad_norm": 0.5226030349731445, "learning_rate": 0.00027591813147134265, "loss": 1.8119, "step": 47392 }, { "epoch": 1.58, "grad_norm": 0.5629056692123413, "learning_rate": 0.0002759077130992428, "loss": 1.7712, "step": 47393 }, { "epoch": 1.58, "grad_norm": 0.5376570820808411, "learning_rate": 0.0002758972947563873, "loss": 1.7549, "step": 47394 }, { "epoch": 1.58, "grad_norm": 0.5587068200111389, "learning_rate": 0.00027588687644278904, "loss": 1.7881, "step": 47395 }, { "epoch": 1.58, "grad_norm": 0.5236884355545044, "learning_rate": 0.00027587645815846065, "loss": 1.7466, "step": 47396 }, { "epoch": 1.58, "grad_norm": 0.5257718563079834, "learning_rate": 0.0002758660399034146, "loss": 1.8031, "step": 47397 }, { "epoch": 1.58, "grad_norm": 0.5158504843711853, "learning_rate": 0.0002758556216776636, "loss": 1.6985, "step": 47398 }, { "epoch": 1.58, "grad_norm": 0.5184717774391174, "learning_rate": 0.0002758452034812204, "loss": 1.7619, "step": 47399 }, { "epoch": 1.58, "grad_norm": 0.5084313750267029, "learning_rate": 0.00027583478531409747, "loss": 1.7078, "step": 47400 }, { "epoch": 1.58, "grad_norm": 0.5157411098480225, "learning_rate": 0.00027582436717630757, "loss": 1.7646, "step": 47401 }, { "epoch": 1.58, "grad_norm": 0.5308642387390137, "learning_rate": 0.0002758139490678634, "loss": 1.7654, "step": 47402 }, { "epoch": 1.58, "grad_norm": 0.5398392081260681, "learning_rate": 0.00027580353098877744, "loss": 1.703, "step": 47403 }, { "epoch": 1.58, "grad_norm": 0.5354815125465393, "learning_rate": 0.0002757931129390625, "loss": 1.7557, "step": 47404 }, { "epoch": 1.58, "grad_norm": 0.521586537361145, "learning_rate": 0.00027578269491873104, "loss": 1.7314, "step": 47405 }, { "epoch": 1.58, "grad_norm": 0.5414704084396362, "learning_rate": 0.000275772276927796, "loss": 1.7337, "step": 47406 }, { "epoch": 1.58, "grad_norm": 0.5142823457717896, "learning_rate": 0.00027576185896626966, "loss": 1.7023, "step": 47407 }, { "epoch": 1.58, "grad_norm": 0.5141065120697021, "learning_rate": 0.0002757514410341649, "loss": 1.7017, "step": 47408 }, { "epoch": 1.58, "grad_norm": 0.5472570657730103, "learning_rate": 0.00027574102313149447, "loss": 1.7856, "step": 47409 }, { "epoch": 1.58, "grad_norm": 0.5233122110366821, "learning_rate": 0.0002757306052582707, "loss": 1.7725, "step": 47410 }, { "epoch": 1.58, "grad_norm": 2.0843887329101562, "learning_rate": 0.0002757201874145064, "loss": 1.7053, "step": 47411 }, { "epoch": 1.58, "grad_norm": 0.5214740633964539, "learning_rate": 0.00027570976960021435, "loss": 1.7865, "step": 47412 }, { "epoch": 1.58, "grad_norm": 0.547972559928894, "learning_rate": 0.00027569935181540696, "loss": 1.7744, "step": 47413 }, { "epoch": 1.58, "grad_norm": 0.558445930480957, "learning_rate": 0.0002756889340600969, "loss": 1.7451, "step": 47414 }, { "epoch": 1.58, "grad_norm": 0.554305911064148, "learning_rate": 0.000275678516334297, "loss": 1.8141, "step": 47415 }, { "epoch": 1.58, "grad_norm": 0.5451433658599854, "learning_rate": 0.0002756680986380198, "loss": 1.8304, "step": 47416 }, { "epoch": 1.58, "grad_norm": 0.5289902687072754, "learning_rate": 0.00027565768097127796, "loss": 1.7586, "step": 47417 }, { "epoch": 1.58, "grad_norm": 0.5839420557022095, "learning_rate": 0.00027564726333408404, "loss": 1.8874, "step": 47418 }, { "epoch": 1.58, "grad_norm": 0.5602449774742126, "learning_rate": 0.00027563684572645084, "loss": 1.823, "step": 47419 }, { "epoch": 1.58, "grad_norm": 0.5223188996315002, "learning_rate": 0.0002756264281483908, "loss": 1.7042, "step": 47420 }, { "epoch": 1.58, "grad_norm": 0.5101203322410583, "learning_rate": 0.00027561601059991674, "loss": 1.7573, "step": 47421 }, { "epoch": 1.58, "grad_norm": 0.5351541042327881, "learning_rate": 0.00027560559308104136, "loss": 1.7628, "step": 47422 }, { "epoch": 1.58, "grad_norm": 0.5479260087013245, "learning_rate": 0.000275595175591777, "loss": 1.7365, "step": 47423 }, { "epoch": 1.58, "grad_norm": 0.523597776889801, "learning_rate": 0.0002755847581321366, "loss": 1.7671, "step": 47424 }, { "epoch": 1.58, "grad_norm": 0.5368543863296509, "learning_rate": 0.0002755743407021327, "loss": 1.7132, "step": 47425 }, { "epoch": 1.58, "grad_norm": 0.5218686461448669, "learning_rate": 0.000275563923301778, "loss": 1.7236, "step": 47426 }, { "epoch": 1.58, "grad_norm": 0.5281484723091125, "learning_rate": 0.00027555350593108505, "loss": 1.7025, "step": 47427 }, { "epoch": 1.58, "grad_norm": 0.529832124710083, "learning_rate": 0.0002755430885900665, "loss": 1.7741, "step": 47428 }, { "epoch": 1.58, "grad_norm": 0.5391772985458374, "learning_rate": 0.00027553267127873517, "loss": 1.7589, "step": 47429 }, { "epoch": 1.58, "grad_norm": 0.5483787655830383, "learning_rate": 0.0002755222539971034, "loss": 1.7631, "step": 47430 }, { "epoch": 1.58, "grad_norm": 0.5530807375907898, "learning_rate": 0.0002755118367451841, "loss": 1.7659, "step": 47431 }, { "epoch": 1.58, "grad_norm": 0.530480682849884, "learning_rate": 0.00027550141952298985, "loss": 1.7951, "step": 47432 }, { "epoch": 1.58, "grad_norm": 0.5367950797080994, "learning_rate": 0.0002754910023305332, "loss": 1.7631, "step": 47433 }, { "epoch": 1.58, "grad_norm": 0.5469735860824585, "learning_rate": 0.0002754805851678269, "loss": 1.6748, "step": 47434 }, { "epoch": 1.58, "grad_norm": 0.5433707237243652, "learning_rate": 0.00027547016803488347, "loss": 1.7801, "step": 47435 }, { "epoch": 1.58, "grad_norm": 0.5270782709121704, "learning_rate": 0.0002754597509317158, "loss": 1.7602, "step": 47436 }, { "epoch": 1.58, "grad_norm": 0.5250557661056519, "learning_rate": 0.00027544933385833625, "loss": 1.8468, "step": 47437 }, { "epoch": 1.58, "grad_norm": 0.5275979042053223, "learning_rate": 0.0002754389168147576, "loss": 1.6777, "step": 47438 }, { "epoch": 1.58, "grad_norm": 0.5558393597602844, "learning_rate": 0.0002754284998009926, "loss": 1.739, "step": 47439 }, { "epoch": 1.58, "grad_norm": 0.5450243949890137, "learning_rate": 0.0002754180828170537, "loss": 1.7593, "step": 47440 }, { "epoch": 1.58, "grad_norm": 0.5296481847763062, "learning_rate": 0.0002754076658629536, "loss": 1.7732, "step": 47441 }, { "epoch": 1.58, "grad_norm": 0.5450265407562256, "learning_rate": 0.000275397248938705, "loss": 1.7677, "step": 47442 }, { "epoch": 1.58, "grad_norm": 0.539680540561676, "learning_rate": 0.00027538683204432056, "loss": 1.7443, "step": 47443 }, { "epoch": 1.58, "grad_norm": 0.515586256980896, "learning_rate": 0.00027537641517981276, "loss": 1.7766, "step": 47444 }, { "epoch": 1.58, "grad_norm": 0.5480517148971558, "learning_rate": 0.00027536599834519445, "loss": 1.765, "step": 47445 }, { "epoch": 1.58, "grad_norm": 0.5561109185218811, "learning_rate": 0.0002753555815404782, "loss": 1.7558, "step": 47446 }, { "epoch": 1.58, "grad_norm": 0.5242757797241211, "learning_rate": 0.0002753451647656766, "loss": 1.7113, "step": 47447 }, { "epoch": 1.58, "grad_norm": 0.5375282168388367, "learning_rate": 0.0002753347480208024, "loss": 1.7545, "step": 47448 }, { "epoch": 1.58, "grad_norm": 0.5122447609901428, "learning_rate": 0.0002753243313058682, "loss": 1.7517, "step": 47449 }, { "epoch": 1.58, "grad_norm": 0.526823878288269, "learning_rate": 0.0002753139146208865, "loss": 1.7501, "step": 47450 }, { "epoch": 1.58, "grad_norm": 0.5327137112617493, "learning_rate": 0.00027530349796587005, "loss": 1.7182, "step": 47451 }, { "epoch": 1.58, "grad_norm": 0.5434296727180481, "learning_rate": 0.0002752930813408317, "loss": 1.8792, "step": 47452 }, { "epoch": 1.58, "grad_norm": 0.54681396484375, "learning_rate": 0.00027528266474578377, "loss": 1.7742, "step": 47453 }, { "epoch": 1.58, "grad_norm": 0.5127993226051331, "learning_rate": 0.00027527224818073905, "loss": 1.7597, "step": 47454 }, { "epoch": 1.58, "grad_norm": 0.5429198741912842, "learning_rate": 0.0002752618316457102, "loss": 1.75, "step": 47455 }, { "epoch": 1.58, "grad_norm": 0.5541810989379883, "learning_rate": 0.0002752514151407099, "loss": 1.7342, "step": 47456 }, { "epoch": 1.58, "grad_norm": 0.5439491271972656, "learning_rate": 0.0002752409986657507, "loss": 1.7735, "step": 47457 }, { "epoch": 1.58, "grad_norm": 0.556697428226471, "learning_rate": 0.00027523058222084516, "loss": 1.8066, "step": 47458 }, { "epoch": 1.58, "grad_norm": 0.5643734931945801, "learning_rate": 0.00027522016580600625, "loss": 1.7596, "step": 47459 }, { "epoch": 1.58, "grad_norm": 0.947468638420105, "learning_rate": 0.00027520974942124625, "loss": 1.8381, "step": 47460 }, { "epoch": 1.58, "grad_norm": 0.5403476357460022, "learning_rate": 0.000275199333066578, "loss": 1.7838, "step": 47461 }, { "epoch": 1.58, "grad_norm": 0.5524213314056396, "learning_rate": 0.0002751889167420141, "loss": 1.743, "step": 47462 }, { "epoch": 1.58, "grad_norm": 0.5578823685646057, "learning_rate": 0.0002751785004475673, "loss": 1.7069, "step": 47463 }, { "epoch": 1.58, "grad_norm": 0.5531190633773804, "learning_rate": 0.00027516808418325006, "loss": 1.7584, "step": 47464 }, { "epoch": 1.58, "grad_norm": 0.5302175879478455, "learning_rate": 0.000275157667949075, "loss": 1.7878, "step": 47465 }, { "epoch": 1.58, "grad_norm": 0.5559337735176086, "learning_rate": 0.00027514725174505514, "loss": 1.761, "step": 47466 }, { "epoch": 1.58, "grad_norm": 0.5221895575523376, "learning_rate": 0.0002751368355712026, "loss": 1.7164, "step": 47467 }, { "epoch": 1.58, "grad_norm": 0.5253417491912842, "learning_rate": 0.00027512641942753035, "loss": 1.7458, "step": 47468 }, { "epoch": 1.58, "grad_norm": 0.5214586853981018, "learning_rate": 0.0002751160033140511, "loss": 1.7727, "step": 47469 }, { "epoch": 1.58, "grad_norm": 0.5585869550704956, "learning_rate": 0.0002751055872307772, "loss": 1.7999, "step": 47470 }, { "epoch": 1.58, "grad_norm": 0.5477320551872253, "learning_rate": 0.00027509517117772146, "loss": 1.7492, "step": 47471 }, { "epoch": 1.58, "grad_norm": 0.5491436719894409, "learning_rate": 0.00027508475515489644, "loss": 1.8006, "step": 47472 }, { "epoch": 1.58, "grad_norm": 0.5257228016853333, "learning_rate": 0.00027507433916231514, "loss": 1.6986, "step": 47473 }, { "epoch": 1.58, "grad_norm": 0.5365951657295227, "learning_rate": 0.00027506392319998963, "loss": 1.7686, "step": 47474 }, { "epoch": 1.58, "grad_norm": 0.5577704310417175, "learning_rate": 0.00027505350726793294, "loss": 1.7909, "step": 47475 }, { "epoch": 1.58, "grad_norm": 0.5244395732879639, "learning_rate": 0.0002750430913661577, "loss": 1.7656, "step": 47476 }, { "epoch": 1.58, "grad_norm": 0.5238370299339294, "learning_rate": 0.0002750326754946764, "loss": 1.7431, "step": 47477 }, { "epoch": 1.58, "grad_norm": 0.543973982334137, "learning_rate": 0.0002750222596535017, "loss": 1.7219, "step": 47478 }, { "epoch": 1.58, "grad_norm": 0.5357491970062256, "learning_rate": 0.00027501184384264645, "loss": 1.6856, "step": 47479 }, { "epoch": 1.58, "grad_norm": 0.5191654562950134, "learning_rate": 0.000275001428062123, "loss": 1.7106, "step": 47480 }, { "epoch": 1.58, "grad_norm": 0.5970874428749084, "learning_rate": 0.0002749910123119441, "loss": 1.7349, "step": 47481 }, { "epoch": 1.58, "grad_norm": 0.5317978262901306, "learning_rate": 0.00027498059659212246, "loss": 1.7224, "step": 47482 }, { "epoch": 1.58, "grad_norm": 0.5280234813690186, "learning_rate": 0.0002749701809026708, "loss": 1.7351, "step": 47483 }, { "epoch": 1.58, "grad_norm": 0.5462819337844849, "learning_rate": 0.0002749597652436016, "loss": 1.7974, "step": 47484 }, { "epoch": 1.58, "grad_norm": 0.5334978699684143, "learning_rate": 0.00027494934961492753, "loss": 1.8127, "step": 47485 }, { "epoch": 1.58, "grad_norm": 1.7946785688400269, "learning_rate": 0.0002749389340166613, "loss": 1.8244, "step": 47486 }, { "epoch": 1.58, "grad_norm": 0.5685233473777771, "learning_rate": 0.0002749285184488154, "loss": 1.7355, "step": 47487 }, { "epoch": 1.58, "grad_norm": 0.5351362824440002, "learning_rate": 0.0002749181029114026, "loss": 1.7743, "step": 47488 }, { "epoch": 1.58, "grad_norm": 0.5174384713172913, "learning_rate": 0.00027490768740443574, "loss": 1.7781, "step": 47489 }, { "epoch": 1.58, "grad_norm": 0.5454174876213074, "learning_rate": 0.000274897271927927, "loss": 1.7666, "step": 47490 }, { "epoch": 1.58, "grad_norm": 0.5468964576721191, "learning_rate": 0.0002748868564818894, "loss": 1.7168, "step": 47491 }, { "epoch": 1.58, "grad_norm": 0.5424955487251282, "learning_rate": 0.0002748764410663354, "loss": 1.8281, "step": 47492 }, { "epoch": 1.58, "grad_norm": 0.5473820567131042, "learning_rate": 0.00027486602568127776, "loss": 1.7463, "step": 47493 }, { "epoch": 1.58, "grad_norm": 0.5480303168296814, "learning_rate": 0.000274855610326729, "loss": 1.6956, "step": 47494 }, { "epoch": 1.58, "grad_norm": 1.1584105491638184, "learning_rate": 0.0002748451950027018, "loss": 1.7834, "step": 47495 }, { "epoch": 1.58, "grad_norm": 0.5420719981193542, "learning_rate": 0.000274834779709209, "loss": 1.8455, "step": 47496 }, { "epoch": 1.58, "grad_norm": 0.5327961444854736, "learning_rate": 0.0002748243644462629, "loss": 1.7489, "step": 47497 }, { "epoch": 1.58, "grad_norm": 0.5425478219985962, "learning_rate": 0.0002748139492138763, "loss": 1.7208, "step": 47498 }, { "epoch": 1.58, "grad_norm": 0.5431861877441406, "learning_rate": 0.00027480353401206195, "loss": 1.7741, "step": 47499 }, { "epoch": 1.58, "grad_norm": 0.5640046000480652, "learning_rate": 0.00027479311884083237, "loss": 1.7461, "step": 47500 }, { "epoch": 1.58, "grad_norm": 0.5413689017295837, "learning_rate": 0.0002747827037002002, "loss": 1.7788, "step": 47501 }, { "epoch": 1.58, "grad_norm": 0.531042218208313, "learning_rate": 0.00027477228859017807, "loss": 1.7332, "step": 47502 }, { "epoch": 1.58, "grad_norm": 0.5731833577156067, "learning_rate": 0.00027476187351077884, "loss": 1.776, "step": 47503 }, { "epoch": 1.58, "grad_norm": 0.5413690805435181, "learning_rate": 0.00027475145846201477, "loss": 1.7483, "step": 47504 }, { "epoch": 1.58, "grad_norm": 0.551213800907135, "learning_rate": 0.0002747410434438988, "loss": 1.8202, "step": 47505 }, { "epoch": 1.58, "grad_norm": 0.5120099782943726, "learning_rate": 0.00027473062845644356, "loss": 1.7564, "step": 47506 }, { "epoch": 1.58, "grad_norm": 0.5379883050918579, "learning_rate": 0.0002747202134996615, "loss": 1.7253, "step": 47507 }, { "epoch": 1.58, "grad_norm": 0.5279549360275269, "learning_rate": 0.00027470979857356544, "loss": 1.6686, "step": 47508 }, { "epoch": 1.58, "grad_norm": 0.5345209836959839, "learning_rate": 0.000274699383678168, "loss": 1.7533, "step": 47509 }, { "epoch": 1.58, "grad_norm": 0.5514604449272156, "learning_rate": 0.0002746889688134817, "loss": 1.6891, "step": 47510 }, { "epoch": 1.58, "grad_norm": 0.5679988265037537, "learning_rate": 0.0002746785539795192, "loss": 1.7532, "step": 47511 }, { "epoch": 1.58, "grad_norm": 0.5549101829528809, "learning_rate": 0.00027466813917629326, "loss": 1.8427, "step": 47512 }, { "epoch": 1.58, "grad_norm": 0.524612307548523, "learning_rate": 0.00027465772440381654, "loss": 1.7589, "step": 47513 }, { "epoch": 1.58, "grad_norm": 0.5168747901916504, "learning_rate": 0.00027464730966210156, "loss": 1.7407, "step": 47514 }, { "epoch": 1.58, "grad_norm": 0.5359525680541992, "learning_rate": 0.00027463689495116095, "loss": 1.751, "step": 47515 }, { "epoch": 1.58, "grad_norm": 0.5154773592948914, "learning_rate": 0.0002746264802710075, "loss": 1.7594, "step": 47516 }, { "epoch": 1.58, "grad_norm": 0.5121973156929016, "learning_rate": 0.0002746160656216537, "loss": 1.7114, "step": 47517 }, { "epoch": 1.58, "grad_norm": 0.5521484017372131, "learning_rate": 0.0002746056510031123, "loss": 1.7624, "step": 47518 }, { "epoch": 1.58, "grad_norm": 0.5278701782226562, "learning_rate": 0.0002745952364153959, "loss": 1.8277, "step": 47519 }, { "epoch": 1.58, "grad_norm": 0.6325722336769104, "learning_rate": 0.00027458482185851714, "loss": 1.7283, "step": 47520 }, { "epoch": 1.58, "grad_norm": 0.5343660712242126, "learning_rate": 0.0002745744073324886, "loss": 1.752, "step": 47521 }, { "epoch": 1.58, "grad_norm": 0.5349492430686951, "learning_rate": 0.00027456399283732306, "loss": 1.7843, "step": 47522 }, { "epoch": 1.58, "grad_norm": 0.5445829629898071, "learning_rate": 0.0002745535783730331, "loss": 1.7725, "step": 47523 }, { "epoch": 1.58, "grad_norm": 0.5230178833007812, "learning_rate": 0.0002745431639396312, "loss": 1.7522, "step": 47524 }, { "epoch": 1.58, "grad_norm": 0.5221900939941406, "learning_rate": 0.00027453274953713025, "loss": 1.7189, "step": 47525 }, { "epoch": 1.58, "grad_norm": 0.5198723077774048, "learning_rate": 0.00027452233516554286, "loss": 1.7877, "step": 47526 }, { "epoch": 1.58, "grad_norm": 0.5305513739585876, "learning_rate": 0.00027451192082488153, "loss": 1.7121, "step": 47527 }, { "epoch": 1.58, "grad_norm": 0.5212634205818176, "learning_rate": 0.00027450150651515894, "loss": 1.7003, "step": 47528 }, { "epoch": 1.58, "grad_norm": 0.5228438973426819, "learning_rate": 0.0002744910922363879, "loss": 1.7667, "step": 47529 }, { "epoch": 1.58, "grad_norm": 0.5523504614830017, "learning_rate": 0.00027448067798858077, "loss": 1.7355, "step": 47530 }, { "epoch": 1.58, "grad_norm": 0.5252208709716797, "learning_rate": 0.0002744702637717503, "loss": 1.8352, "step": 47531 }, { "epoch": 1.58, "grad_norm": 0.5259588956832886, "learning_rate": 0.00027445984958590923, "loss": 1.6958, "step": 47532 }, { "epoch": 1.58, "grad_norm": 0.524494469165802, "learning_rate": 0.00027444943543107023, "loss": 1.7068, "step": 47533 }, { "epoch": 1.58, "grad_norm": 0.5322258472442627, "learning_rate": 0.00027443902130724576, "loss": 1.7378, "step": 47534 }, { "epoch": 1.58, "grad_norm": 0.5357791781425476, "learning_rate": 0.00027442860721444855, "loss": 1.7439, "step": 47535 }, { "epoch": 1.58, "grad_norm": 0.5096550583839417, "learning_rate": 0.00027441819315269134, "loss": 1.7193, "step": 47536 }, { "epoch": 1.58, "grad_norm": 0.5263587832450867, "learning_rate": 0.00027440777912198655, "loss": 1.8005, "step": 47537 }, { "epoch": 1.58, "grad_norm": 0.5167216062545776, "learning_rate": 0.0002743973651223469, "loss": 1.7885, "step": 47538 }, { "epoch": 1.58, "grad_norm": 0.5366268754005432, "learning_rate": 0.00027438695115378534, "loss": 1.7301, "step": 47539 }, { "epoch": 1.58, "grad_norm": 0.5305861234664917, "learning_rate": 0.000274376537216314, "loss": 1.7598, "step": 47540 }, { "epoch": 1.58, "grad_norm": 0.5511879920959473, "learning_rate": 0.0002743661233099458, "loss": 1.713, "step": 47541 }, { "epoch": 1.58, "grad_norm": 0.5268197059631348, "learning_rate": 0.00027435570943469344, "loss": 1.7896, "step": 47542 }, { "epoch": 1.58, "grad_norm": 0.5476568937301636, "learning_rate": 0.00027434529559056944, "loss": 1.7925, "step": 47543 }, { "epoch": 1.58, "grad_norm": 0.5384126305580139, "learning_rate": 0.00027433488177758643, "loss": 1.7662, "step": 47544 }, { "epoch": 1.58, "grad_norm": 0.5294668674468994, "learning_rate": 0.00027432446799575705, "loss": 1.7285, "step": 47545 }, { "epoch": 1.58, "grad_norm": 0.5479217171669006, "learning_rate": 0.0002743140542450942, "loss": 1.7656, "step": 47546 }, { "epoch": 1.58, "grad_norm": 0.5367677211761475, "learning_rate": 0.00027430364052561003, "loss": 1.7305, "step": 47547 }, { "epoch": 1.58, "grad_norm": 0.5339719653129578, "learning_rate": 0.0002742932268373176, "loss": 1.7605, "step": 47548 }, { "epoch": 1.58, "grad_norm": 0.5198001265525818, "learning_rate": 0.00027428281318022947, "loss": 1.7736, "step": 47549 }, { "epoch": 1.58, "grad_norm": 0.5452375411987305, "learning_rate": 0.00027427239955435807, "loss": 1.7217, "step": 47550 }, { "epoch": 1.58, "grad_norm": 0.52992182970047, "learning_rate": 0.00027426198595971625, "loss": 1.7813, "step": 47551 }, { "epoch": 1.58, "grad_norm": 0.5408432483673096, "learning_rate": 0.0002742515723963165, "loss": 1.7428, "step": 47552 }, { "epoch": 1.58, "grad_norm": 0.5522660613059998, "learning_rate": 0.00027424115886417176, "loss": 1.8733, "step": 47553 }, { "epoch": 1.58, "grad_norm": 0.517423152923584, "learning_rate": 0.00027423074536329423, "loss": 1.6926, "step": 47554 }, { "epoch": 1.58, "grad_norm": 0.5385900139808655, "learning_rate": 0.00027422033189369687, "loss": 1.7919, "step": 47555 }, { "epoch": 1.58, "grad_norm": 0.5303888916969299, "learning_rate": 0.0002742099184553923, "loss": 1.72, "step": 47556 }, { "epoch": 1.58, "grad_norm": 0.5316033363342285, "learning_rate": 0.000274199505048393, "loss": 1.8229, "step": 47557 }, { "epoch": 1.58, "grad_norm": 0.552076518535614, "learning_rate": 0.0002741890916727117, "loss": 1.7566, "step": 47558 }, { "epoch": 1.58, "grad_norm": 0.53278648853302, "learning_rate": 0.0002741786783283612, "loss": 1.7674, "step": 47559 }, { "epoch": 1.58, "grad_norm": 0.5273435115814209, "learning_rate": 0.0002741682650153538, "loss": 1.8629, "step": 47560 }, { "epoch": 1.58, "grad_norm": 0.5205374956130981, "learning_rate": 0.00027415785173370226, "loss": 1.7525, "step": 47561 }, { "epoch": 1.58, "grad_norm": 0.5426551103591919, "learning_rate": 0.00027414743848341936, "loss": 1.7956, "step": 47562 }, { "epoch": 1.58, "grad_norm": 0.5182554721832275, "learning_rate": 0.0002741370252645177, "loss": 1.8048, "step": 47563 }, { "epoch": 1.58, "grad_norm": 0.5218711495399475, "learning_rate": 0.00027412661207700987, "loss": 1.7601, "step": 47564 }, { "epoch": 1.58, "grad_norm": 0.536283016204834, "learning_rate": 0.00027411619892090853, "loss": 1.6708, "step": 47565 }, { "epoch": 1.58, "grad_norm": 0.5209915637969971, "learning_rate": 0.00027410578579622634, "loss": 1.7379, "step": 47566 }, { "epoch": 1.58, "grad_norm": 0.5414757132530212, "learning_rate": 0.0002740953727029758, "loss": 1.7547, "step": 47567 }, { "epoch": 1.58, "grad_norm": 0.5377335548400879, "learning_rate": 0.0002740849596411697, "loss": 1.7423, "step": 47568 }, { "epoch": 1.58, "grad_norm": 0.5239880681037903, "learning_rate": 0.0002740745466108208, "loss": 1.7243, "step": 47569 }, { "epoch": 1.58, "grad_norm": 0.5414917469024658, "learning_rate": 0.00027406413361194136, "loss": 1.7186, "step": 47570 }, { "epoch": 1.58, "grad_norm": 0.5432581305503845, "learning_rate": 0.0002740537206445443, "loss": 1.7809, "step": 47571 }, { "epoch": 1.58, "grad_norm": 0.5148763060569763, "learning_rate": 0.0002740433077086422, "loss": 1.8023, "step": 47572 }, { "epoch": 1.58, "grad_norm": 0.5346309542655945, "learning_rate": 0.0002740328948042478, "loss": 1.7928, "step": 47573 }, { "epoch": 1.58, "grad_norm": 0.5443794131278992, "learning_rate": 0.0002740224819313736, "loss": 1.8141, "step": 47574 }, { "epoch": 1.58, "grad_norm": 0.5196704268455505, "learning_rate": 0.00027401206909003216, "loss": 1.7893, "step": 47575 }, { "epoch": 1.58, "grad_norm": 0.538858950138092, "learning_rate": 0.00027400165628023643, "loss": 1.7126, "step": 47576 }, { "epoch": 1.58, "grad_norm": 0.5134643316268921, "learning_rate": 0.0002739912435019987, "loss": 1.7782, "step": 47577 }, { "epoch": 1.58, "grad_norm": 0.5270299315452576, "learning_rate": 0.0002739808307553318, "loss": 1.7564, "step": 47578 }, { "epoch": 1.58, "grad_norm": 0.5131019949913025, "learning_rate": 0.0002739704180402483, "loss": 1.7488, "step": 47579 }, { "epoch": 1.58, "grad_norm": 0.5346786975860596, "learning_rate": 0.000273960005356761, "loss": 1.7469, "step": 47580 }, { "epoch": 1.58, "grad_norm": 0.5230587124824524, "learning_rate": 0.00027394959270488236, "loss": 1.7491, "step": 47581 }, { "epoch": 1.58, "grad_norm": 0.5086740255355835, "learning_rate": 0.000273939180084625, "loss": 1.7735, "step": 47582 }, { "epoch": 1.58, "grad_norm": 0.5206144452095032, "learning_rate": 0.0002739287674960018, "loss": 1.7723, "step": 47583 }, { "epoch": 1.58, "grad_norm": 0.5124449133872986, "learning_rate": 0.0002739183549390251, "loss": 1.7685, "step": 47584 }, { "epoch": 1.58, "grad_norm": 0.5157077312469482, "learning_rate": 0.0002739079424137077, "loss": 1.6894, "step": 47585 }, { "epoch": 1.58, "grad_norm": 0.5178571343421936, "learning_rate": 0.00027389752992006225, "loss": 1.8193, "step": 47586 }, { "epoch": 1.58, "grad_norm": 0.5301551818847656, "learning_rate": 0.00027388711745810135, "loss": 1.7416, "step": 47587 }, { "epoch": 1.58, "grad_norm": 0.5657424330711365, "learning_rate": 0.0002738767050278376, "loss": 1.8071, "step": 47588 }, { "epoch": 1.58, "grad_norm": 0.525423526763916, "learning_rate": 0.00027386629262928365, "loss": 1.7832, "step": 47589 }, { "epoch": 1.58, "grad_norm": 0.5506057739257812, "learning_rate": 0.0002738558802624523, "loss": 1.8303, "step": 47590 }, { "epoch": 1.58, "grad_norm": 0.5231071710586548, "learning_rate": 0.000273845467927356, "loss": 1.7235, "step": 47591 }, { "epoch": 1.58, "grad_norm": 0.5483483672142029, "learning_rate": 0.00027383505562400735, "loss": 1.7418, "step": 47592 }, { "epoch": 1.58, "grad_norm": 0.5351446270942688, "learning_rate": 0.00027382464335241923, "loss": 1.729, "step": 47593 }, { "epoch": 1.58, "grad_norm": 0.5301723480224609, "learning_rate": 0.0002738142311126041, "loss": 1.7288, "step": 47594 }, { "epoch": 1.58, "grad_norm": 0.5406785607337952, "learning_rate": 0.00027380381890457456, "loss": 1.7627, "step": 47595 }, { "epoch": 1.58, "grad_norm": 0.5516313910484314, "learning_rate": 0.00027379340672834343, "loss": 1.7286, "step": 47596 }, { "epoch": 1.58, "grad_norm": 0.5482611656188965, "learning_rate": 0.00027378299458392317, "loss": 1.7437, "step": 47597 }, { "epoch": 1.58, "grad_norm": 0.5113010406494141, "learning_rate": 0.00027377258247132647, "loss": 1.7233, "step": 47598 }, { "epoch": 1.58, "grad_norm": 0.5325721502304077, "learning_rate": 0.00027376217039056595, "loss": 1.7806, "step": 47599 }, { "epoch": 1.58, "grad_norm": 0.5540051460266113, "learning_rate": 0.0002737517583416545, "loss": 1.7931, "step": 47600 }, { "epoch": 1.58, "grad_norm": 0.5255090594291687, "learning_rate": 0.0002737413463246044, "loss": 1.7684, "step": 47601 }, { "epoch": 1.58, "grad_norm": 0.5338660478591919, "learning_rate": 0.0002737309343394284, "loss": 1.7408, "step": 47602 }, { "epoch": 1.58, "grad_norm": 0.51891028881073, "learning_rate": 0.0002737205223861393, "loss": 1.7892, "step": 47603 }, { "epoch": 1.58, "grad_norm": 0.5324455499649048, "learning_rate": 0.0002737101104647495, "loss": 1.7574, "step": 47604 }, { "epoch": 1.58, "grad_norm": 0.5273996591567993, "learning_rate": 0.0002736996985752717, "loss": 1.7563, "step": 47605 }, { "epoch": 1.58, "grad_norm": 0.5437870025634766, "learning_rate": 0.0002736892867177188, "loss": 1.738, "step": 47606 }, { "epoch": 1.58, "grad_norm": 0.5337807536125183, "learning_rate": 0.00027367887489210306, "loss": 1.8099, "step": 47607 }, { "epoch": 1.58, "grad_norm": 0.5172799825668335, "learning_rate": 0.00027366846309843733, "loss": 1.7613, "step": 47608 }, { "epoch": 1.58, "grad_norm": 0.527043342590332, "learning_rate": 0.0002736580513367342, "loss": 1.7263, "step": 47609 }, { "epoch": 1.58, "grad_norm": 0.5302449464797974, "learning_rate": 0.00027364763960700643, "loss": 1.7291, "step": 47610 }, { "epoch": 1.58, "grad_norm": 0.5230087637901306, "learning_rate": 0.0002736372279092664, "loss": 1.7397, "step": 47611 }, { "epoch": 1.58, "grad_norm": 0.5153436064720154, "learning_rate": 0.0002736268162435269, "loss": 1.7337, "step": 47612 }, { "epoch": 1.58, "grad_norm": 0.5303691029548645, "learning_rate": 0.0002736164046098007, "loss": 1.7961, "step": 47613 }, { "epoch": 1.58, "grad_norm": 0.5227383375167847, "learning_rate": 0.0002736059930081001, "loss": 1.7953, "step": 47614 }, { "epoch": 1.58, "grad_norm": 0.5291464328765869, "learning_rate": 0.000273595581438438, "loss": 1.7302, "step": 47615 }, { "epoch": 1.58, "grad_norm": 0.5195274353027344, "learning_rate": 0.00027358516990082704, "loss": 1.7258, "step": 47616 }, { "epoch": 1.58, "grad_norm": 0.5648066401481628, "learning_rate": 0.00027357475839527974, "loss": 1.8305, "step": 47617 }, { "epoch": 1.58, "grad_norm": 0.5168322920799255, "learning_rate": 0.0002735643469218088, "loss": 1.7629, "step": 47618 }, { "epoch": 1.58, "grad_norm": 0.5331028699874878, "learning_rate": 0.0002735539354804268, "loss": 1.7272, "step": 47619 }, { "epoch": 1.58, "grad_norm": 0.5798478126525879, "learning_rate": 0.00027354352407114657, "loss": 1.8479, "step": 47620 }, { "epoch": 1.58, "grad_norm": 0.5375024080276489, "learning_rate": 0.00027353311269398046, "loss": 1.7319, "step": 47621 }, { "epoch": 1.58, "grad_norm": 0.5309854745864868, "learning_rate": 0.00027352270134894127, "loss": 1.716, "step": 47622 }, { "epoch": 1.58, "grad_norm": 0.5091412663459778, "learning_rate": 0.0002735122900360417, "loss": 1.7461, "step": 47623 }, { "epoch": 1.58, "grad_norm": 0.5337027907371521, "learning_rate": 0.0002735018787552943, "loss": 1.7455, "step": 47624 }, { "epoch": 1.58, "grad_norm": 0.517417848110199, "learning_rate": 0.0002734914675067116, "loss": 1.8143, "step": 47625 }, { "epoch": 1.58, "grad_norm": 0.5190092325210571, "learning_rate": 0.0002734810562903065, "loss": 1.7981, "step": 47626 }, { "epoch": 1.58, "grad_norm": 0.5376485586166382, "learning_rate": 0.00027347064510609137, "loss": 1.7985, "step": 47627 }, { "epoch": 1.58, "grad_norm": 0.6419046521186829, "learning_rate": 0.00027346023395407893, "loss": 1.8383, "step": 47628 }, { "epoch": 1.58, "grad_norm": 0.5829647183418274, "learning_rate": 0.000273449822834282, "loss": 1.7167, "step": 47629 }, { "epoch": 1.58, "grad_norm": 0.534516453742981, "learning_rate": 0.000273439411746713, "loss": 1.68, "step": 47630 }, { "epoch": 1.58, "grad_norm": 0.530677318572998, "learning_rate": 0.00027342900069138467, "loss": 1.7229, "step": 47631 }, { "epoch": 1.58, "grad_norm": 0.5171632170677185, "learning_rate": 0.00027341858966830956, "loss": 1.6972, "step": 47632 }, { "epoch": 1.58, "grad_norm": 0.5478945374488831, "learning_rate": 0.00027340817867750047, "loss": 1.7268, "step": 47633 }, { "epoch": 1.58, "grad_norm": 0.5625711679458618, "learning_rate": 0.0002733977677189698, "loss": 1.7538, "step": 47634 }, { "epoch": 1.58, "grad_norm": 0.52251797914505, "learning_rate": 0.0002733873567927304, "loss": 1.717, "step": 47635 }, { "epoch": 1.58, "grad_norm": 0.5322450995445251, "learning_rate": 0.0002733769458987948, "loss": 1.6937, "step": 47636 }, { "epoch": 1.58, "grad_norm": 0.5201094746589661, "learning_rate": 0.00027336653503717574, "loss": 1.7115, "step": 47637 }, { "epoch": 1.58, "grad_norm": 0.5634233951568604, "learning_rate": 0.0002733561242078857, "loss": 1.8429, "step": 47638 }, { "epoch": 1.58, "grad_norm": 0.5370164513587952, "learning_rate": 0.0002733457134109374, "loss": 1.7446, "step": 47639 }, { "epoch": 1.58, "grad_norm": 0.5259678363800049, "learning_rate": 0.00027333530264634357, "loss": 1.7528, "step": 47640 }, { "epoch": 1.59, "grad_norm": 0.5292342305183411, "learning_rate": 0.0002733248919141166, "loss": 1.815, "step": 47641 }, { "epoch": 1.59, "grad_norm": 1.1696934700012207, "learning_rate": 0.00027331448121426937, "loss": 1.7745, "step": 47642 }, { "epoch": 1.59, "grad_norm": 0.6054373979568481, "learning_rate": 0.00027330407054681447, "loss": 1.8042, "step": 47643 }, { "epoch": 1.59, "grad_norm": 0.5451382398605347, "learning_rate": 0.0002732936599117644, "loss": 1.7734, "step": 47644 }, { "epoch": 1.59, "grad_norm": 0.5249419808387756, "learning_rate": 0.00027328324930913197, "loss": 1.6847, "step": 47645 }, { "epoch": 1.59, "grad_norm": 0.5362652540206909, "learning_rate": 0.00027327283873892973, "loss": 1.8183, "step": 47646 }, { "epoch": 1.59, "grad_norm": 0.5371633768081665, "learning_rate": 0.00027326242820117035, "loss": 1.7718, "step": 47647 }, { "epoch": 1.59, "grad_norm": 0.5347884893417358, "learning_rate": 0.0002732520176958663, "loss": 1.786, "step": 47648 }, { "epoch": 1.59, "grad_norm": 0.5341176986694336, "learning_rate": 0.0002732416072230304, "loss": 1.7292, "step": 47649 }, { "epoch": 1.59, "grad_norm": 0.5344330668449402, "learning_rate": 0.0002732311967826754, "loss": 1.6591, "step": 47650 }, { "epoch": 1.59, "grad_norm": 0.5315181612968445, "learning_rate": 0.00027322078637481364, "loss": 1.7192, "step": 47651 }, { "epoch": 1.59, "grad_norm": 0.5459631681442261, "learning_rate": 0.00027321037599945794, "loss": 1.7168, "step": 47652 }, { "epoch": 1.59, "grad_norm": 0.5580579042434692, "learning_rate": 0.000273199965656621, "loss": 1.8152, "step": 47653 }, { "epoch": 1.59, "grad_norm": 0.5246623754501343, "learning_rate": 0.0002731895553463152, "loss": 1.6862, "step": 47654 }, { "epoch": 1.59, "grad_norm": 0.5107364058494568, "learning_rate": 0.0002731791450685533, "loss": 1.741, "step": 47655 }, { "epoch": 1.59, "grad_norm": 0.515311598777771, "learning_rate": 0.0002731687348233482, "loss": 1.6974, "step": 47656 }, { "epoch": 1.59, "grad_norm": 0.5253468751907349, "learning_rate": 0.000273158324610712, "loss": 1.7334, "step": 47657 }, { "epoch": 1.59, "grad_norm": 0.5349122285842896, "learning_rate": 0.00027314791443065783, "loss": 1.7299, "step": 47658 }, { "epoch": 1.59, "grad_norm": 0.5301855802536011, "learning_rate": 0.0002731375042831981, "loss": 1.7657, "step": 47659 }, { "epoch": 1.59, "grad_norm": 0.5295202136039734, "learning_rate": 0.0002731270941683455, "loss": 1.7132, "step": 47660 }, { "epoch": 1.59, "grad_norm": 0.5257949233055115, "learning_rate": 0.00027311668408611263, "loss": 1.7909, "step": 47661 }, { "epoch": 1.59, "grad_norm": 0.5307695865631104, "learning_rate": 0.0002731062740365121, "loss": 1.8313, "step": 47662 }, { "epoch": 1.59, "grad_norm": 0.5450847744941711, "learning_rate": 0.0002730958640195567, "loss": 1.7335, "step": 47663 }, { "epoch": 1.59, "grad_norm": 0.5220142602920532, "learning_rate": 0.00027308545403525883, "loss": 1.6957, "step": 47664 }, { "epoch": 1.59, "grad_norm": 0.5328345894813538, "learning_rate": 0.0002730750440836313, "loss": 1.7643, "step": 47665 }, { "epoch": 1.59, "grad_norm": 0.5315059423446655, "learning_rate": 0.00027306463416468675, "loss": 1.8202, "step": 47666 }, { "epoch": 1.59, "grad_norm": 0.5234146118164062, "learning_rate": 0.00027305422427843773, "loss": 1.7546, "step": 47667 }, { "epoch": 1.59, "grad_norm": 0.551678478717804, "learning_rate": 0.0002730438144248969, "loss": 1.7066, "step": 47668 }, { "epoch": 1.59, "grad_norm": 0.5262089371681213, "learning_rate": 0.0002730334046040768, "loss": 1.776, "step": 47669 }, { "epoch": 1.59, "grad_norm": 0.554337739944458, "learning_rate": 0.0002730229948159904, "loss": 1.7602, "step": 47670 }, { "epoch": 1.59, "grad_norm": 0.5188795328140259, "learning_rate": 0.00027301258506064993, "loss": 1.7426, "step": 47671 }, { "epoch": 1.59, "grad_norm": 0.5379037857055664, "learning_rate": 0.0002730021753380683, "loss": 1.8061, "step": 47672 }, { "epoch": 1.59, "grad_norm": 0.5325033068656921, "learning_rate": 0.00027299176564825805, "loss": 1.7898, "step": 47673 }, { "epoch": 1.59, "grad_norm": 0.5408854484558105, "learning_rate": 0.00027298135599123177, "loss": 1.8074, "step": 47674 }, { "epoch": 1.59, "grad_norm": 0.531377911567688, "learning_rate": 0.00027297094636700216, "loss": 1.7225, "step": 47675 }, { "epoch": 1.59, "grad_norm": 0.5488207340240479, "learning_rate": 0.0002729605367755819, "loss": 1.7447, "step": 47676 }, { "epoch": 1.59, "grad_norm": 0.5173225402832031, "learning_rate": 0.0002729501272169835, "loss": 1.6997, "step": 47677 }, { "epoch": 1.59, "grad_norm": 0.5299174189567566, "learning_rate": 0.0002729397176912196, "loss": 1.6676, "step": 47678 }, { "epoch": 1.59, "grad_norm": 0.5186449885368347, "learning_rate": 0.00027292930819830297, "loss": 1.7385, "step": 47679 }, { "epoch": 1.59, "grad_norm": 0.5278493762016296, "learning_rate": 0.0002729188987382462, "loss": 1.7461, "step": 47680 }, { "epoch": 1.59, "grad_norm": 0.5682671070098877, "learning_rate": 0.00027290848931106187, "loss": 1.8429, "step": 47681 }, { "epoch": 1.59, "grad_norm": 0.5239290595054626, "learning_rate": 0.0002728980799167626, "loss": 1.7417, "step": 47682 }, { "epoch": 1.59, "grad_norm": 0.5461407899856567, "learning_rate": 0.00027288767055536116, "loss": 1.7777, "step": 47683 }, { "epoch": 1.59, "grad_norm": 0.5352383255958557, "learning_rate": 0.00027287726122687003, "loss": 1.7837, "step": 47684 }, { "epoch": 1.59, "grad_norm": 0.5231577157974243, "learning_rate": 0.0002728668519313018, "loss": 1.7245, "step": 47685 }, { "epoch": 1.59, "grad_norm": 0.5368766188621521, "learning_rate": 0.00027285644266866944, "loss": 1.6869, "step": 47686 }, { "epoch": 1.59, "grad_norm": 0.5139430165290833, "learning_rate": 0.0002728460334389852, "loss": 1.6737, "step": 47687 }, { "epoch": 1.59, "grad_norm": 0.5711789131164551, "learning_rate": 0.0002728356242422619, "loss": 1.7725, "step": 47688 }, { "epoch": 1.59, "grad_norm": 0.5657854676246643, "learning_rate": 0.00027282521507851216, "loss": 1.821, "step": 47689 }, { "epoch": 1.59, "grad_norm": 0.532042384147644, "learning_rate": 0.00027281480594774866, "loss": 1.8277, "step": 47690 }, { "epoch": 1.59, "grad_norm": 0.5392037034034729, "learning_rate": 0.00027280439684998394, "loss": 1.7051, "step": 47691 }, { "epoch": 1.59, "grad_norm": 0.5067692399024963, "learning_rate": 0.0002727939877852306, "loss": 1.8096, "step": 47692 }, { "epoch": 1.59, "grad_norm": 0.5125330090522766, "learning_rate": 0.0002727835787535015, "loss": 1.8008, "step": 47693 }, { "epoch": 1.59, "grad_norm": 0.5324103832244873, "learning_rate": 0.000272773169754809, "loss": 1.7147, "step": 47694 }, { "epoch": 1.59, "grad_norm": 0.5068297982215881, "learning_rate": 0.0002727627607891659, "loss": 1.7662, "step": 47695 }, { "epoch": 1.59, "grad_norm": 0.5391927361488342, "learning_rate": 0.00027275235185658474, "loss": 1.7079, "step": 47696 }, { "epoch": 1.59, "grad_norm": 0.5484602451324463, "learning_rate": 0.0002727419429570783, "loss": 1.8517, "step": 47697 }, { "epoch": 1.59, "grad_norm": 0.5371079444885254, "learning_rate": 0.00027273153409065914, "loss": 1.7321, "step": 47698 }, { "epoch": 1.59, "grad_norm": 0.5292302966117859, "learning_rate": 0.00027272112525733973, "loss": 1.6665, "step": 47699 }, { "epoch": 1.59, "grad_norm": 0.5425273776054382, "learning_rate": 0.0002727107164571331, "loss": 1.7481, "step": 47700 }, { "epoch": 1.59, "grad_norm": 0.5217939615249634, "learning_rate": 0.0002727003076900514, "loss": 1.7679, "step": 47701 }, { "epoch": 1.59, "grad_norm": 0.9888283610343933, "learning_rate": 0.0002726898989561076, "loss": 1.8489, "step": 47702 }, { "epoch": 1.59, "grad_norm": 0.5198605060577393, "learning_rate": 0.00027267949025531433, "loss": 1.738, "step": 47703 }, { "epoch": 1.59, "grad_norm": 0.5078242421150208, "learning_rate": 0.000272669081587684, "loss": 1.673, "step": 47704 }, { "epoch": 1.59, "grad_norm": 0.541225790977478, "learning_rate": 0.00027265867295322944, "loss": 1.7264, "step": 47705 }, { "epoch": 1.59, "grad_norm": 0.5344299077987671, "learning_rate": 0.00027264826435196307, "loss": 1.836, "step": 47706 }, { "epoch": 1.59, "grad_norm": 0.5549006462097168, "learning_rate": 0.000272637855783898, "loss": 1.7695, "step": 47707 }, { "epoch": 1.59, "grad_norm": 0.5367564558982849, "learning_rate": 0.00027262744724904623, "loss": 1.7006, "step": 47708 }, { "epoch": 1.59, "grad_norm": 0.5200863480567932, "learning_rate": 0.00027261703874742086, "loss": 1.7645, "step": 47709 }, { "epoch": 1.59, "grad_norm": 0.5406054854393005, "learning_rate": 0.00027260663027903437, "loss": 1.8443, "step": 47710 }, { "epoch": 1.59, "grad_norm": 0.5267171263694763, "learning_rate": 0.0002725962218438994, "loss": 1.6776, "step": 47711 }, { "epoch": 1.59, "grad_norm": 0.5379601120948792, "learning_rate": 0.0002725858134420285, "loss": 1.7079, "step": 47712 }, { "epoch": 1.59, "grad_norm": 0.5428712368011475, "learning_rate": 0.0002725754050734345, "loss": 1.7262, "step": 47713 }, { "epoch": 1.59, "grad_norm": 0.5368234515190125, "learning_rate": 0.0002725649967381298, "loss": 1.8006, "step": 47714 }, { "epoch": 1.59, "grad_norm": 0.5239532589912415, "learning_rate": 0.00027255458843612713, "loss": 1.7466, "step": 47715 }, { "epoch": 1.59, "grad_norm": 0.5397892594337463, "learning_rate": 0.0002725441801674392, "loss": 1.6905, "step": 47716 }, { "epoch": 1.59, "grad_norm": 0.5298145413398743, "learning_rate": 0.00027253377193207863, "loss": 1.7688, "step": 47717 }, { "epoch": 1.59, "grad_norm": 0.527877926826477, "learning_rate": 0.000272523363730058, "loss": 1.7555, "step": 47718 }, { "epoch": 1.59, "grad_norm": 0.5190545320510864, "learning_rate": 0.0002725129555613899, "loss": 1.7589, "step": 47719 }, { "epoch": 1.59, "grad_norm": 0.521009087562561, "learning_rate": 0.00027250254742608714, "loss": 1.7761, "step": 47720 }, { "epoch": 1.59, "grad_norm": 0.5165894031524658, "learning_rate": 0.00027249213932416217, "loss": 1.7865, "step": 47721 }, { "epoch": 1.59, "grad_norm": 0.5262646079063416, "learning_rate": 0.0002724817312556276, "loss": 1.7794, "step": 47722 }, { "epoch": 1.59, "grad_norm": 0.5176414847373962, "learning_rate": 0.0002724713232204963, "loss": 1.7226, "step": 47723 }, { "epoch": 1.59, "grad_norm": 0.9234909415245056, "learning_rate": 0.0002724609152187806, "loss": 1.8133, "step": 47724 }, { "epoch": 1.59, "grad_norm": 0.5261099338531494, "learning_rate": 0.00027245050725049344, "loss": 1.6882, "step": 47725 }, { "epoch": 1.59, "grad_norm": 0.5149356126785278, "learning_rate": 0.0002724400993156472, "loss": 1.6835, "step": 47726 }, { "epoch": 1.59, "grad_norm": 0.5227946043014526, "learning_rate": 0.0002724296914142547, "loss": 1.7531, "step": 47727 }, { "epoch": 1.59, "grad_norm": 0.5307123064994812, "learning_rate": 0.0002724192835463285, "loss": 1.7503, "step": 47728 }, { "epoch": 1.59, "grad_norm": 0.5242488980293274, "learning_rate": 0.0002724088757118811, "loss": 1.8, "step": 47729 }, { "epoch": 1.59, "grad_norm": 0.5308505296707153, "learning_rate": 0.0002723984679109254, "loss": 1.7263, "step": 47730 }, { "epoch": 1.59, "grad_norm": 0.5357364416122437, "learning_rate": 0.0002723880601434738, "loss": 1.8081, "step": 47731 }, { "epoch": 1.59, "grad_norm": 0.5178228616714478, "learning_rate": 0.00027237765240953907, "loss": 1.8245, "step": 47732 }, { "epoch": 1.59, "grad_norm": 0.5329044461250305, "learning_rate": 0.00027236724470913385, "loss": 1.7716, "step": 47733 }, { "epoch": 1.59, "grad_norm": 0.5122511982917786, "learning_rate": 0.00027235683704227064, "loss": 1.735, "step": 47734 }, { "epoch": 1.59, "grad_norm": 0.5429859161376953, "learning_rate": 0.0002723464294089622, "loss": 1.7526, "step": 47735 }, { "epoch": 1.59, "grad_norm": 0.5438764691352844, "learning_rate": 0.000272336021809221, "loss": 1.7757, "step": 47736 }, { "epoch": 1.59, "grad_norm": 0.5363244414329529, "learning_rate": 0.00027232561424306, "loss": 1.8297, "step": 47737 }, { "epoch": 1.59, "grad_norm": 0.5296973586082458, "learning_rate": 0.00027231520671049147, "loss": 1.7899, "step": 47738 }, { "epoch": 1.59, "grad_norm": 0.5239097476005554, "learning_rate": 0.00027230479921152824, "loss": 1.759, "step": 47739 }, { "epoch": 1.59, "grad_norm": 0.5174229741096497, "learning_rate": 0.000272294391746183, "loss": 1.7613, "step": 47740 }, { "epoch": 1.59, "grad_norm": 0.5394114851951599, "learning_rate": 0.00027228398431446816, "loss": 1.906, "step": 47741 }, { "epoch": 1.59, "grad_norm": 0.5139964818954468, "learning_rate": 0.00027227357691639656, "loss": 1.7871, "step": 47742 }, { "epoch": 1.59, "grad_norm": 0.5210895538330078, "learning_rate": 0.00027226316955198076, "loss": 1.8019, "step": 47743 }, { "epoch": 1.59, "grad_norm": 0.5293024778366089, "learning_rate": 0.00027225276222123324, "loss": 1.7498, "step": 47744 }, { "epoch": 1.59, "grad_norm": 0.5295391082763672, "learning_rate": 0.0002722423549241669, "loss": 1.7406, "step": 47745 }, { "epoch": 1.59, "grad_norm": 0.5314894914627075, "learning_rate": 0.0002722319476607942, "loss": 1.7084, "step": 47746 }, { "epoch": 1.59, "grad_norm": 0.5307803153991699, "learning_rate": 0.0002722215404311279, "loss": 1.785, "step": 47747 }, { "epoch": 1.59, "grad_norm": 0.5249597430229187, "learning_rate": 0.0002722111332351805, "loss": 1.7588, "step": 47748 }, { "epoch": 1.59, "grad_norm": 0.5272547006607056, "learning_rate": 0.00027220072607296467, "loss": 1.7702, "step": 47749 }, { "epoch": 1.59, "grad_norm": 0.5208189487457275, "learning_rate": 0.00027219031894449317, "loss": 1.7672, "step": 47750 }, { "epoch": 1.59, "grad_norm": 0.5399715304374695, "learning_rate": 0.00027217991184977835, "loss": 1.7167, "step": 47751 }, { "epoch": 1.59, "grad_norm": 0.5187963247299194, "learning_rate": 0.0002721695047888331, "loss": 1.7441, "step": 47752 }, { "epoch": 1.59, "grad_norm": 0.5423560738563538, "learning_rate": 0.0002721590977616701, "loss": 1.7333, "step": 47753 }, { "epoch": 1.59, "grad_norm": 0.5284831523895264, "learning_rate": 0.0002721486907683017, "loss": 1.7143, "step": 47754 }, { "epoch": 1.59, "grad_norm": 0.5402932167053223, "learning_rate": 0.0002721382838087407, "loss": 1.6815, "step": 47755 }, { "epoch": 1.59, "grad_norm": 0.5403480529785156, "learning_rate": 0.00027212787688299974, "loss": 1.7316, "step": 47756 }, { "epoch": 1.59, "grad_norm": 0.5398887991905212, "learning_rate": 0.00027211746999109145, "loss": 1.7259, "step": 47757 }, { "epoch": 1.59, "grad_norm": 0.5211501717567444, "learning_rate": 0.0002721070631330284, "loss": 1.7456, "step": 47758 }, { "epoch": 1.59, "grad_norm": 0.5561370849609375, "learning_rate": 0.00027209665630882325, "loss": 1.6966, "step": 47759 }, { "epoch": 1.59, "grad_norm": 0.5331627130508423, "learning_rate": 0.00027208624951848877, "loss": 1.7586, "step": 47760 }, { "epoch": 1.59, "grad_norm": 0.5215796232223511, "learning_rate": 0.0002720758427620374, "loss": 1.7498, "step": 47761 }, { "epoch": 1.59, "grad_norm": 0.5391333103179932, "learning_rate": 0.0002720654360394818, "loss": 1.7793, "step": 47762 }, { "epoch": 1.59, "grad_norm": 0.5349199175834656, "learning_rate": 0.0002720550293508347, "loss": 1.7146, "step": 47763 }, { "epoch": 1.59, "grad_norm": 0.5300636887550354, "learning_rate": 0.00027204462269610864, "loss": 1.7142, "step": 47764 }, { "epoch": 1.59, "grad_norm": 0.5336624383926392, "learning_rate": 0.00027203421607531627, "loss": 1.7487, "step": 47765 }, { "epoch": 1.59, "grad_norm": 0.5366919636726379, "learning_rate": 0.00027202380948847027, "loss": 1.6911, "step": 47766 }, { "epoch": 1.59, "grad_norm": 0.5213238596916199, "learning_rate": 0.00027201340293558327, "loss": 1.6746, "step": 47767 }, { "epoch": 1.59, "grad_norm": 0.5289973020553589, "learning_rate": 0.00027200299641666785, "loss": 1.788, "step": 47768 }, { "epoch": 1.59, "grad_norm": 0.5483394265174866, "learning_rate": 0.0002719925899317367, "loss": 1.6971, "step": 47769 }, { "epoch": 1.59, "grad_norm": 0.5293442010879517, "learning_rate": 0.0002719821834808024, "loss": 1.7477, "step": 47770 }, { "epoch": 1.59, "grad_norm": 0.5216438174247742, "learning_rate": 0.0002719717770638776, "loss": 1.7279, "step": 47771 }, { "epoch": 1.59, "grad_norm": 0.5455155968666077, "learning_rate": 0.0002719613706809749, "loss": 1.7753, "step": 47772 }, { "epoch": 1.59, "grad_norm": 0.5488598346710205, "learning_rate": 0.00027195096433210703, "loss": 1.7438, "step": 47773 }, { "epoch": 1.59, "grad_norm": 0.5161619782447815, "learning_rate": 0.00027194055801728643, "loss": 1.748, "step": 47774 }, { "epoch": 1.59, "grad_norm": 0.5240254402160645, "learning_rate": 0.00027193015173652594, "loss": 1.6691, "step": 47775 }, { "epoch": 1.59, "grad_norm": 0.5224968791007996, "learning_rate": 0.0002719197454898381, "loss": 1.7564, "step": 47776 }, { "epoch": 1.59, "grad_norm": 0.5230571031570435, "learning_rate": 0.0002719093392772356, "loss": 1.7245, "step": 47777 }, { "epoch": 1.59, "grad_norm": 0.5318366289138794, "learning_rate": 0.000271898933098731, "loss": 1.8223, "step": 47778 }, { "epoch": 1.59, "grad_norm": 0.5414050221443176, "learning_rate": 0.0002718885269543369, "loss": 1.7241, "step": 47779 }, { "epoch": 1.59, "grad_norm": 0.5316558480262756, "learning_rate": 0.0002718781208440662, "loss": 1.7353, "step": 47780 }, { "epoch": 1.59, "grad_norm": 0.5504138469696045, "learning_rate": 0.000271867714767931, "loss": 1.7987, "step": 47781 }, { "epoch": 1.59, "grad_norm": 0.5306800007820129, "learning_rate": 0.0002718573087259444, "loss": 1.7176, "step": 47782 }, { "epoch": 1.59, "grad_norm": 0.5620908141136169, "learning_rate": 0.0002718469027181189, "loss": 1.8775, "step": 47783 }, { "epoch": 1.59, "grad_norm": 0.5292322635650635, "learning_rate": 0.0002718364967444671, "loss": 1.7676, "step": 47784 }, { "epoch": 1.59, "grad_norm": 0.5066142678260803, "learning_rate": 0.0002718260908050016, "loss": 1.7685, "step": 47785 }, { "epoch": 1.59, "grad_norm": 0.5522319674491882, "learning_rate": 0.00027181568489973503, "loss": 1.7904, "step": 47786 }, { "epoch": 1.59, "grad_norm": 0.5587641596794128, "learning_rate": 0.0002718052790286803, "loss": 1.7805, "step": 47787 }, { "epoch": 1.59, "grad_norm": 0.5287578701972961, "learning_rate": 0.00027179487319184956, "loss": 1.7596, "step": 47788 }, { "epoch": 1.59, "grad_norm": 0.5292407274246216, "learning_rate": 0.00027178446738925573, "loss": 1.7524, "step": 47789 }, { "epoch": 1.59, "grad_norm": 0.5341612100601196, "learning_rate": 0.0002717740616209115, "loss": 1.722, "step": 47790 }, { "epoch": 1.59, "grad_norm": 0.5447403192520142, "learning_rate": 0.00027176365588682934, "loss": 1.8174, "step": 47791 }, { "epoch": 1.59, "grad_norm": 0.5607990622520447, "learning_rate": 0.00027175325018702194, "loss": 1.7535, "step": 47792 }, { "epoch": 1.59, "grad_norm": 0.5525458455085754, "learning_rate": 0.000271742844521502, "loss": 1.7777, "step": 47793 }, { "epoch": 1.59, "grad_norm": 0.5372048020362854, "learning_rate": 0.00027173243889028203, "loss": 1.7975, "step": 47794 }, { "epoch": 1.59, "grad_norm": 0.5252063274383545, "learning_rate": 0.00027172203329337463, "loss": 1.755, "step": 47795 }, { "epoch": 1.59, "grad_norm": 0.5727666020393372, "learning_rate": 0.0002717116277307926, "loss": 1.7919, "step": 47796 }, { "epoch": 1.59, "grad_norm": 0.540307879447937, "learning_rate": 0.0002717012222025485, "loss": 1.7965, "step": 47797 }, { "epoch": 1.59, "grad_norm": 0.5402153730392456, "learning_rate": 0.0002716908167086549, "loss": 1.7402, "step": 47798 }, { "epoch": 1.59, "grad_norm": 1.3919222354888916, "learning_rate": 0.00027168041124912454, "loss": 1.7368, "step": 47799 }, { "epoch": 1.59, "grad_norm": 0.5592004656791687, "learning_rate": 0.00027167000582396995, "loss": 1.7128, "step": 47800 }, { "epoch": 1.59, "grad_norm": 0.5506813526153564, "learning_rate": 0.0002716596004332038, "loss": 1.7792, "step": 47801 }, { "epoch": 1.59, "grad_norm": 0.5231301188468933, "learning_rate": 0.00027164919507683864, "loss": 1.6752, "step": 47802 }, { "epoch": 1.59, "grad_norm": 0.5385015606880188, "learning_rate": 0.0002716387897548874, "loss": 1.7814, "step": 47803 }, { "epoch": 1.59, "grad_norm": 0.5091415643692017, "learning_rate": 0.0002716283844673623, "loss": 1.7333, "step": 47804 }, { "epoch": 1.59, "grad_norm": 0.5371158123016357, "learning_rate": 0.00027161797921427616, "loss": 1.8397, "step": 47805 }, { "epoch": 1.59, "grad_norm": 0.5390451550483704, "learning_rate": 0.00027160757399564164, "loss": 1.7934, "step": 47806 }, { "epoch": 1.59, "grad_norm": 0.5159468650817871, "learning_rate": 0.0002715971688114714, "loss": 1.6933, "step": 47807 }, { "epoch": 1.59, "grad_norm": 0.5319369435310364, "learning_rate": 0.000271586763661778, "loss": 1.7773, "step": 47808 }, { "epoch": 1.59, "grad_norm": 0.5268774628639221, "learning_rate": 0.000271576358546574, "loss": 1.7941, "step": 47809 }, { "epoch": 1.59, "grad_norm": 0.5194815993309021, "learning_rate": 0.00027156595346587226, "loss": 1.7183, "step": 47810 }, { "epoch": 1.59, "grad_norm": 0.5359110832214355, "learning_rate": 0.0002715555484196851, "loss": 1.7388, "step": 47811 }, { "epoch": 1.59, "grad_norm": 0.52504563331604, "learning_rate": 0.00027154514340802535, "loss": 1.7347, "step": 47812 }, { "epoch": 1.59, "grad_norm": 0.5243862867355347, "learning_rate": 0.0002715347384309057, "loss": 1.7623, "step": 47813 }, { "epoch": 1.59, "grad_norm": 0.5413926243782043, "learning_rate": 0.0002715243334883386, "loss": 1.7388, "step": 47814 }, { "epoch": 1.59, "grad_norm": 0.5522516965866089, "learning_rate": 0.00027151392858033684, "loss": 1.7616, "step": 47815 }, { "epoch": 1.59, "grad_norm": 0.5270829200744629, "learning_rate": 0.00027150352370691283, "loss": 1.786, "step": 47816 }, { "epoch": 1.59, "grad_norm": 0.5306275486946106, "learning_rate": 0.00027149311886807953, "loss": 1.8083, "step": 47817 }, { "epoch": 1.59, "grad_norm": 0.57418292760849, "learning_rate": 0.0002714827140638492, "loss": 1.8019, "step": 47818 }, { "epoch": 1.59, "grad_norm": 0.5219516754150391, "learning_rate": 0.00027147230929423473, "loss": 1.6883, "step": 47819 }, { "epoch": 1.59, "grad_norm": 0.5321807861328125, "learning_rate": 0.00027146190455924876, "loss": 1.7517, "step": 47820 }, { "epoch": 1.59, "grad_norm": 0.5418040752410889, "learning_rate": 0.00027145149985890375, "loss": 1.8182, "step": 47821 }, { "epoch": 1.59, "grad_norm": 0.5365766286849976, "learning_rate": 0.0002714410951932124, "loss": 1.7037, "step": 47822 }, { "epoch": 1.59, "grad_norm": 0.5172626376152039, "learning_rate": 0.0002714306905621873, "loss": 1.7911, "step": 47823 }, { "epoch": 1.59, "grad_norm": 0.5278835892677307, "learning_rate": 0.0002714202859658413, "loss": 1.8004, "step": 47824 }, { "epoch": 1.59, "grad_norm": 0.5332186818122864, "learning_rate": 0.0002714098814041867, "loss": 1.7995, "step": 47825 }, { "epoch": 1.59, "grad_norm": 0.5311266779899597, "learning_rate": 0.00027139947687723634, "loss": 1.7817, "step": 47826 }, { "epoch": 1.59, "grad_norm": 0.5486375689506531, "learning_rate": 0.00027138907238500286, "loss": 1.7532, "step": 47827 }, { "epoch": 1.59, "grad_norm": 0.5332619547843933, "learning_rate": 0.0002713786679274988, "loss": 1.7954, "step": 47828 }, { "epoch": 1.59, "grad_norm": 0.5258405208587646, "learning_rate": 0.00027136826350473677, "loss": 1.7714, "step": 47829 }, { "epoch": 1.59, "grad_norm": 0.5383103489875793, "learning_rate": 0.0002713578591167296, "loss": 1.7479, "step": 47830 }, { "epoch": 1.59, "grad_norm": 0.5357747077941895, "learning_rate": 0.00027134745476348963, "loss": 1.7585, "step": 47831 }, { "epoch": 1.59, "grad_norm": 0.5259218811988831, "learning_rate": 0.0002713370504450295, "loss": 1.7575, "step": 47832 }, { "epoch": 1.59, "grad_norm": 0.5558814406394958, "learning_rate": 0.00027132664616136217, "loss": 1.8041, "step": 47833 }, { "epoch": 1.59, "grad_norm": 0.5143795609474182, "learning_rate": 0.0002713162419125001, "loss": 1.7167, "step": 47834 }, { "epoch": 1.59, "grad_norm": 0.5372928977012634, "learning_rate": 0.0002713058376984558, "loss": 1.7923, "step": 47835 }, { "epoch": 1.59, "grad_norm": 0.537765622138977, "learning_rate": 0.00027129543351924195, "loss": 1.7376, "step": 47836 }, { "epoch": 1.59, "grad_norm": 0.5474253296852112, "learning_rate": 0.0002712850293748713, "loss": 1.7483, "step": 47837 }, { "epoch": 1.59, "grad_norm": 0.5104179382324219, "learning_rate": 0.0002712746252653563, "loss": 1.7344, "step": 47838 }, { "epoch": 1.59, "grad_norm": 0.5144397020339966, "learning_rate": 0.0002712642211907097, "loss": 1.7432, "step": 47839 }, { "epoch": 1.59, "grad_norm": 0.5455646514892578, "learning_rate": 0.0002712538171509442, "loss": 1.8264, "step": 47840 }, { "epoch": 1.59, "grad_norm": 0.5435919761657715, "learning_rate": 0.0002712434131460722, "loss": 1.7455, "step": 47841 }, { "epoch": 1.59, "grad_norm": 0.5203248262405396, "learning_rate": 0.0002712330091761065, "loss": 1.7592, "step": 47842 }, { "epoch": 1.59, "grad_norm": 0.5341371297836304, "learning_rate": 0.0002712226052410597, "loss": 1.752, "step": 47843 }, { "epoch": 1.59, "grad_norm": 0.5363115072250366, "learning_rate": 0.00027121220134094446, "loss": 1.8189, "step": 47844 }, { "epoch": 1.59, "grad_norm": 0.5405802726745605, "learning_rate": 0.0002712017974757733, "loss": 1.7714, "step": 47845 }, { "epoch": 1.59, "grad_norm": 0.5134640336036682, "learning_rate": 0.00027119139364555886, "loss": 1.7622, "step": 47846 }, { "epoch": 1.59, "grad_norm": 0.5506017804145813, "learning_rate": 0.000271180989850314, "loss": 1.7224, "step": 47847 }, { "epoch": 1.59, "grad_norm": 0.540048360824585, "learning_rate": 0.000271170586090051, "loss": 1.8146, "step": 47848 }, { "epoch": 1.59, "grad_norm": 0.7352591753005981, "learning_rate": 0.0002711601823647827, "loss": 1.8008, "step": 47849 }, { "epoch": 1.59, "grad_norm": 0.5281627178192139, "learning_rate": 0.0002711497786745218, "loss": 1.7023, "step": 47850 }, { "epoch": 1.59, "grad_norm": 0.5961170196533203, "learning_rate": 0.00027113937501928076, "loss": 1.8431, "step": 47851 }, { "epoch": 1.59, "grad_norm": 0.5721229910850525, "learning_rate": 0.0002711289713990722, "loss": 1.8161, "step": 47852 }, { "epoch": 1.59, "grad_norm": 0.5223346948623657, "learning_rate": 0.0002711185678139088, "loss": 1.7399, "step": 47853 }, { "epoch": 1.59, "grad_norm": 0.5415724515914917, "learning_rate": 0.0002711081642638034, "loss": 1.7023, "step": 47854 }, { "epoch": 1.59, "grad_norm": 0.5363854169845581, "learning_rate": 0.0002710977607487682, "loss": 1.8175, "step": 47855 }, { "epoch": 1.59, "grad_norm": 0.5386779308319092, "learning_rate": 0.00027108735726881614, "loss": 1.7403, "step": 47856 }, { "epoch": 1.59, "grad_norm": 0.535885751247406, "learning_rate": 0.00027107695382395984, "loss": 1.754, "step": 47857 }, { "epoch": 1.59, "grad_norm": 0.5144791603088379, "learning_rate": 0.0002710665504142118, "loss": 1.7224, "step": 47858 }, { "epoch": 1.59, "grad_norm": 0.5450452566146851, "learning_rate": 0.0002710561470395847, "loss": 1.704, "step": 47859 }, { "epoch": 1.59, "grad_norm": 0.5343257188796997, "learning_rate": 0.00027104574370009123, "loss": 1.751, "step": 47860 }, { "epoch": 1.59, "grad_norm": 0.5231720805168152, "learning_rate": 0.0002710353403957438, "loss": 1.7669, "step": 47861 }, { "epoch": 1.59, "grad_norm": 0.5506812930107117, "learning_rate": 0.0002710249371265553, "loss": 1.6978, "step": 47862 }, { "epoch": 1.59, "grad_norm": 0.543241024017334, "learning_rate": 0.0002710145338925383, "loss": 1.7805, "step": 47863 }, { "epoch": 1.59, "grad_norm": 0.5385482907295227, "learning_rate": 0.0002710041306937054, "loss": 1.7556, "step": 47864 }, { "epoch": 1.59, "grad_norm": 0.5372912287712097, "learning_rate": 0.00027099372753006915, "loss": 1.8004, "step": 47865 }, { "epoch": 1.59, "grad_norm": 0.5651111006736755, "learning_rate": 0.00027098332440164224, "loss": 1.7192, "step": 47866 }, { "epoch": 1.59, "grad_norm": 0.5369189381599426, "learning_rate": 0.0002709729213084374, "loss": 1.7482, "step": 47867 }, { "epoch": 1.59, "grad_norm": 0.536175012588501, "learning_rate": 0.0002709625182504669, "loss": 1.7213, "step": 47868 }, { "epoch": 1.59, "grad_norm": 0.5367769598960876, "learning_rate": 0.00027095211522774383, "loss": 1.8081, "step": 47869 }, { "epoch": 1.59, "grad_norm": 0.5340192317962646, "learning_rate": 0.00027094171224028066, "loss": 1.7747, "step": 47870 }, { "epoch": 1.59, "grad_norm": 0.5351793766021729, "learning_rate": 0.00027093130928808985, "loss": 1.775, "step": 47871 }, { "epoch": 1.59, "grad_norm": 0.5717435479164124, "learning_rate": 0.0002709209063711842, "loss": 1.7517, "step": 47872 }, { "epoch": 1.59, "grad_norm": 0.5389789342880249, "learning_rate": 0.0002709105034895762, "loss": 1.623, "step": 47873 }, { "epoch": 1.59, "grad_norm": 0.5415695309638977, "learning_rate": 0.0002709001006432787, "loss": 1.7461, "step": 47874 }, { "epoch": 1.59, "grad_norm": 0.5582709908485413, "learning_rate": 0.00027088969783230406, "loss": 1.6829, "step": 47875 }, { "epoch": 1.59, "grad_norm": 0.5417926907539368, "learning_rate": 0.0002708792950566651, "loss": 1.736, "step": 47876 }, { "epoch": 1.59, "grad_norm": 0.5410239696502686, "learning_rate": 0.00027086889231637445, "loss": 1.8036, "step": 47877 }, { "epoch": 1.59, "grad_norm": 0.5208551287651062, "learning_rate": 0.00027085848961144456, "loss": 1.7269, "step": 47878 }, { "epoch": 1.59, "grad_norm": 0.5591319799423218, "learning_rate": 0.0002708480869418882, "loss": 1.8045, "step": 47879 }, { "epoch": 1.59, "grad_norm": 0.5508561134338379, "learning_rate": 0.0002708376843077181, "loss": 1.8129, "step": 47880 }, { "epoch": 1.59, "grad_norm": 0.523023247718811, "learning_rate": 0.00027082728170894655, "loss": 1.6954, "step": 47881 }, { "epoch": 1.59, "grad_norm": 0.5411325693130493, "learning_rate": 0.0002708168791455864, "loss": 1.848, "step": 47882 }, { "epoch": 1.59, "grad_norm": 0.5278714299201965, "learning_rate": 0.0002708064766176503, "loss": 1.7361, "step": 47883 }, { "epoch": 1.59, "grad_norm": 0.5353083610534668, "learning_rate": 0.00027079607412515096, "loss": 1.805, "step": 47884 }, { "epoch": 1.59, "grad_norm": 0.5346009731292725, "learning_rate": 0.00027078567166810075, "loss": 1.7069, "step": 47885 }, { "epoch": 1.59, "grad_norm": 0.5239787101745605, "learning_rate": 0.00027077526924651246, "loss": 1.7457, "step": 47886 }, { "epoch": 1.59, "grad_norm": 0.5271226763725281, "learning_rate": 0.00027076486686039873, "loss": 1.7462, "step": 47887 }, { "epoch": 1.59, "grad_norm": 0.5312260985374451, "learning_rate": 0.0002707544645097721, "loss": 1.799, "step": 47888 }, { "epoch": 1.59, "grad_norm": 0.5438123345375061, "learning_rate": 0.0002707440621946452, "loss": 1.7704, "step": 47889 }, { "epoch": 1.59, "grad_norm": 0.5411900281906128, "learning_rate": 0.0002707336599150309, "loss": 1.777, "step": 47890 }, { "epoch": 1.59, "grad_norm": 0.5357195734977722, "learning_rate": 0.00027072325767094134, "loss": 1.7302, "step": 47891 }, { "epoch": 1.59, "grad_norm": 0.5284824967384338, "learning_rate": 0.00027071285546238956, "loss": 1.7664, "step": 47892 }, { "epoch": 1.59, "grad_norm": 0.5398292541503906, "learning_rate": 0.00027070245328938806, "loss": 1.7147, "step": 47893 }, { "epoch": 1.59, "grad_norm": 0.5238326787948608, "learning_rate": 0.00027069205115194954, "loss": 1.8006, "step": 47894 }, { "epoch": 1.59, "grad_norm": 0.535815954208374, "learning_rate": 0.00027068164905008646, "loss": 1.7526, "step": 47895 }, { "epoch": 1.59, "grad_norm": 0.5224077105522156, "learning_rate": 0.00027067124698381144, "loss": 1.7185, "step": 47896 }, { "epoch": 1.59, "grad_norm": 0.53753262758255, "learning_rate": 0.00027066084495313745, "loss": 1.8504, "step": 47897 }, { "epoch": 1.59, "grad_norm": 0.5241212844848633, "learning_rate": 0.0002706504429580767, "loss": 1.8258, "step": 47898 }, { "epoch": 1.59, "grad_norm": 0.5415094494819641, "learning_rate": 0.000270640040998642, "loss": 1.76, "step": 47899 }, { "epoch": 1.59, "grad_norm": 0.5418382287025452, "learning_rate": 0.00027062963907484604, "loss": 1.7288, "step": 47900 }, { "epoch": 1.59, "grad_norm": 0.5563463568687439, "learning_rate": 0.0002706192371867013, "loss": 1.7218, "step": 47901 }, { "epoch": 1.59, "grad_norm": 0.5176987648010254, "learning_rate": 0.0002706088353342205, "loss": 1.7486, "step": 47902 }, { "epoch": 1.59, "grad_norm": 0.518526017665863, "learning_rate": 0.0002705984335174162, "loss": 1.7176, "step": 47903 }, { "epoch": 1.59, "grad_norm": 0.5249520540237427, "learning_rate": 0.00027058803173630124, "loss": 1.7981, "step": 47904 }, { "epoch": 1.59, "grad_norm": 0.5258950591087341, "learning_rate": 0.00027057762999088783, "loss": 1.7388, "step": 47905 }, { "epoch": 1.59, "grad_norm": 0.5281403064727783, "learning_rate": 0.00027056722828118897, "loss": 1.7443, "step": 47906 }, { "epoch": 1.59, "grad_norm": 0.5397734642028809, "learning_rate": 0.0002705568266072172, "loss": 1.7126, "step": 47907 }, { "epoch": 1.59, "grad_norm": 0.5352507829666138, "learning_rate": 0.000270546424968985, "loss": 1.7624, "step": 47908 }, { "epoch": 1.59, "grad_norm": 0.5468019247055054, "learning_rate": 0.00027053602336650517, "loss": 1.7645, "step": 47909 }, { "epoch": 1.59, "grad_norm": 0.5219900608062744, "learning_rate": 0.0002705256217997903, "loss": 1.7421, "step": 47910 }, { "epoch": 1.59, "grad_norm": 0.5229606032371521, "learning_rate": 0.00027051522026885294, "loss": 1.784, "step": 47911 }, { "epoch": 1.59, "grad_norm": 0.5342648029327393, "learning_rate": 0.0002705048187737057, "loss": 1.7732, "step": 47912 }, { "epoch": 1.59, "grad_norm": 0.5500853061676025, "learning_rate": 0.00027049441731436127, "loss": 1.7078, "step": 47913 }, { "epoch": 1.59, "grad_norm": 0.5457475185394287, "learning_rate": 0.00027048401589083236, "loss": 1.7483, "step": 47914 }, { "epoch": 1.59, "grad_norm": 0.5312882661819458, "learning_rate": 0.00027047361450313146, "loss": 1.7925, "step": 47915 }, { "epoch": 1.59, "grad_norm": 0.5457113981246948, "learning_rate": 0.00027046321315127124, "loss": 1.7831, "step": 47916 }, { "epoch": 1.59, "grad_norm": 0.5475214719772339, "learning_rate": 0.0002704528118352644, "loss": 1.8231, "step": 47917 }, { "epoch": 1.59, "grad_norm": 0.5420079231262207, "learning_rate": 0.0002704424105551234, "loss": 1.7081, "step": 47918 }, { "epoch": 1.59, "grad_norm": 0.5559801459312439, "learning_rate": 0.0002704320093108609, "loss": 1.7824, "step": 47919 }, { "epoch": 1.59, "grad_norm": 0.5307813286781311, "learning_rate": 0.0002704216081024898, "loss": 1.733, "step": 47920 }, { "epoch": 1.59, "grad_norm": 0.5325208306312561, "learning_rate": 0.0002704112069300223, "loss": 1.7089, "step": 47921 }, { "epoch": 1.59, "grad_norm": 0.5341547131538391, "learning_rate": 0.0002704008057934713, "loss": 1.7558, "step": 47922 }, { "epoch": 1.59, "grad_norm": 0.5372039675712585, "learning_rate": 0.00027039040469284935, "loss": 1.8412, "step": 47923 }, { "epoch": 1.59, "grad_norm": 0.5622435212135315, "learning_rate": 0.0002703800036281692, "loss": 1.7768, "step": 47924 }, { "epoch": 1.59, "grad_norm": 0.5413342714309692, "learning_rate": 0.00027036960259944323, "loss": 1.695, "step": 47925 }, { "epoch": 1.59, "grad_norm": 0.5408797264099121, "learning_rate": 0.00027035920160668416, "loss": 1.7415, "step": 47926 }, { "epoch": 1.59, "grad_norm": 0.5303292274475098, "learning_rate": 0.00027034880064990484, "loss": 1.7916, "step": 47927 }, { "epoch": 1.59, "grad_norm": 0.564346969127655, "learning_rate": 0.0002703383997291175, "loss": 1.7233, "step": 47928 }, { "epoch": 1.59, "grad_norm": 0.5242384076118469, "learning_rate": 0.00027032799884433504, "loss": 1.7665, "step": 47929 }, { "epoch": 1.59, "grad_norm": 0.5335062742233276, "learning_rate": 0.0002703175979955701, "loss": 1.7295, "step": 47930 }, { "epoch": 1.59, "grad_norm": 0.5335718989372253, "learning_rate": 0.0002703071971828352, "loss": 1.7849, "step": 47931 }, { "epoch": 1.59, "grad_norm": 0.5488389134407043, "learning_rate": 0.0002702967964061429, "loss": 1.7139, "step": 47932 }, { "epoch": 1.59, "grad_norm": 0.5315576791763306, "learning_rate": 0.0002702863956655059, "loss": 1.6913, "step": 47933 }, { "epoch": 1.59, "grad_norm": 0.5388797521591187, "learning_rate": 0.00027027599496093704, "loss": 1.7237, "step": 47934 }, { "epoch": 1.59, "grad_norm": 0.5259852409362793, "learning_rate": 0.00027026559429244853, "loss": 1.6966, "step": 47935 }, { "epoch": 1.59, "grad_norm": 0.5320238471031189, "learning_rate": 0.0002702551936600533, "loss": 1.7698, "step": 47936 }, { "epoch": 1.59, "grad_norm": 0.5381234884262085, "learning_rate": 0.00027024479306376385, "loss": 1.7269, "step": 47937 }, { "epoch": 1.59, "grad_norm": 0.5370484590530396, "learning_rate": 0.0002702343925035929, "loss": 1.7689, "step": 47938 }, { "epoch": 1.59, "grad_norm": 0.5433202981948853, "learning_rate": 0.0002702239919795529, "loss": 1.7009, "step": 47939 }, { "epoch": 1.59, "grad_norm": 0.5166193842887878, "learning_rate": 0.0002702135914916566, "loss": 1.7611, "step": 47940 }, { "epoch": 1.6, "grad_norm": 0.5412293672561646, "learning_rate": 0.0002702031910399168, "loss": 1.7551, "step": 47941 }, { "epoch": 1.6, "grad_norm": 0.5287205576896667, "learning_rate": 0.0002701927906243457, "loss": 1.7584, "step": 47942 }, { "epoch": 1.6, "grad_norm": 0.520750105381012, "learning_rate": 0.00027018239024495625, "loss": 1.7678, "step": 47943 }, { "epoch": 1.6, "grad_norm": 0.5313173532485962, "learning_rate": 0.0002701719899017611, "loss": 1.8067, "step": 47944 }, { "epoch": 1.6, "grad_norm": 0.5355878472328186, "learning_rate": 0.0002701615895947726, "loss": 1.7613, "step": 47945 }, { "epoch": 1.6, "grad_norm": 0.5457999110221863, "learning_rate": 0.0002701511893240036, "loss": 1.7574, "step": 47946 }, { "epoch": 1.6, "grad_norm": 0.5581132769584656, "learning_rate": 0.00027014078908946666, "loss": 1.8987, "step": 47947 }, { "epoch": 1.6, "grad_norm": 0.5562142133712769, "learning_rate": 0.0002701303888911744, "loss": 1.7944, "step": 47948 }, { "epoch": 1.6, "grad_norm": 0.5493810176849365, "learning_rate": 0.00027011998872913934, "loss": 1.7068, "step": 47949 }, { "epoch": 1.6, "grad_norm": 0.5344375371932983, "learning_rate": 0.0002701095886033743, "loss": 1.7641, "step": 47950 }, { "epoch": 1.6, "grad_norm": 0.5393639802932739, "learning_rate": 0.0002700991885138919, "loss": 1.7132, "step": 47951 }, { "epoch": 1.6, "grad_norm": 0.5182369947433472, "learning_rate": 0.00027008878846070453, "loss": 1.7955, "step": 47952 }, { "epoch": 1.6, "grad_norm": 0.5461790561676025, "learning_rate": 0.00027007838844382505, "loss": 1.7989, "step": 47953 }, { "epoch": 1.6, "grad_norm": 0.5252695083618164, "learning_rate": 0.000270067988463266, "loss": 1.8177, "step": 47954 }, { "epoch": 1.6, "grad_norm": 0.552582859992981, "learning_rate": 0.00027005758851904, "loss": 1.7218, "step": 47955 }, { "epoch": 1.6, "grad_norm": 0.5292354226112366, "learning_rate": 0.0002700471886111596, "loss": 1.7382, "step": 47956 }, { "epoch": 1.6, "grad_norm": 0.5376253724098206, "learning_rate": 0.00027003678873963765, "loss": 1.7002, "step": 47957 }, { "epoch": 1.6, "grad_norm": 0.524634063243866, "learning_rate": 0.00027002638890448645, "loss": 1.7084, "step": 47958 }, { "epoch": 1.6, "grad_norm": 0.5513370633125305, "learning_rate": 0.0002700159891057189, "loss": 1.7424, "step": 47959 }, { "epoch": 1.6, "grad_norm": 0.5540769696235657, "learning_rate": 0.0002700055893433475, "loss": 1.7572, "step": 47960 }, { "epoch": 1.6, "grad_norm": 0.5507685542106628, "learning_rate": 0.0002699951896173849, "loss": 1.7498, "step": 47961 }, { "epoch": 1.6, "grad_norm": 0.5181271433830261, "learning_rate": 0.0002699847899278437, "loss": 1.7699, "step": 47962 }, { "epoch": 1.6, "grad_norm": 0.5184016227722168, "learning_rate": 0.0002699743902747365, "loss": 1.8239, "step": 47963 }, { "epoch": 1.6, "grad_norm": 0.5400746464729309, "learning_rate": 0.00026996399065807615, "loss": 1.7274, "step": 47964 }, { "epoch": 1.6, "grad_norm": 0.5710867047309875, "learning_rate": 0.0002699535910778749, "loss": 1.7656, "step": 47965 }, { "epoch": 1.6, "grad_norm": 0.5242725610733032, "learning_rate": 0.0002699431915341456, "loss": 1.7636, "step": 47966 }, { "epoch": 1.6, "grad_norm": 0.5227866768836975, "learning_rate": 0.0002699327920269009, "loss": 1.8484, "step": 47967 }, { "epoch": 1.6, "grad_norm": 0.5475698709487915, "learning_rate": 0.0002699223925561533, "loss": 1.7827, "step": 47968 }, { "epoch": 1.6, "grad_norm": 0.5346898436546326, "learning_rate": 0.0002699119931219155, "loss": 1.7033, "step": 47969 }, { "epoch": 1.6, "grad_norm": 0.5443798899650574, "learning_rate": 0.00026990159372420003, "loss": 1.7722, "step": 47970 }, { "epoch": 1.6, "grad_norm": 0.5237231254577637, "learning_rate": 0.0002698911943630198, "loss": 1.7282, "step": 47971 }, { "epoch": 1.6, "grad_norm": 0.5346439480781555, "learning_rate": 0.000269880795038387, "loss": 1.7848, "step": 47972 }, { "epoch": 1.6, "grad_norm": 0.5320025086402893, "learning_rate": 0.00026987039575031464, "loss": 1.7494, "step": 47973 }, { "epoch": 1.6, "grad_norm": 0.5182682275772095, "learning_rate": 0.0002698599964988151, "loss": 1.6467, "step": 47974 }, { "epoch": 1.6, "grad_norm": 0.5367665886878967, "learning_rate": 0.0002698495972839011, "loss": 1.8446, "step": 47975 }, { "epoch": 1.6, "grad_norm": 0.5316516757011414, "learning_rate": 0.0002698391981055853, "loss": 1.7451, "step": 47976 }, { "epoch": 1.6, "grad_norm": 0.5548280477523804, "learning_rate": 0.0002698287989638802, "loss": 1.7264, "step": 47977 }, { "epoch": 1.6, "grad_norm": 0.5142479538917542, "learning_rate": 0.0002698183998587985, "loss": 1.6805, "step": 47978 }, { "epoch": 1.6, "grad_norm": 0.5353073477745056, "learning_rate": 0.0002698080007903527, "loss": 1.7831, "step": 47979 }, { "epoch": 1.6, "grad_norm": 0.5198856592178345, "learning_rate": 0.0002697976017585557, "loss": 1.7748, "step": 47980 }, { "epoch": 1.6, "grad_norm": 0.5506659150123596, "learning_rate": 0.00026978720276342, "loss": 1.6983, "step": 47981 }, { "epoch": 1.6, "grad_norm": 0.5861994624137878, "learning_rate": 0.0002697768038049581, "loss": 1.6877, "step": 47982 }, { "epoch": 1.6, "grad_norm": 0.5331661701202393, "learning_rate": 0.00026976640488318265, "loss": 1.7532, "step": 47983 }, { "epoch": 1.6, "grad_norm": 0.5267735719680786, "learning_rate": 0.0002697560059981065, "loss": 1.687, "step": 47984 }, { "epoch": 1.6, "grad_norm": 0.5238035321235657, "learning_rate": 0.0002697456071497419, "loss": 1.7334, "step": 47985 }, { "epoch": 1.6, "grad_norm": 0.5551988482475281, "learning_rate": 0.00026973520833810177, "loss": 1.7316, "step": 47986 }, { "epoch": 1.6, "grad_norm": 0.5413949489593506, "learning_rate": 0.0002697248095631987, "loss": 1.7407, "step": 47987 }, { "epoch": 1.6, "grad_norm": 0.5333935022354126, "learning_rate": 0.00026971441082504515, "loss": 1.7238, "step": 47988 }, { "epoch": 1.6, "grad_norm": 0.5328022241592407, "learning_rate": 0.00026970401212365387, "loss": 1.7907, "step": 47989 }, { "epoch": 1.6, "grad_norm": 0.5351388454437256, "learning_rate": 0.00026969361345903747, "loss": 1.7438, "step": 47990 }, { "epoch": 1.6, "grad_norm": 0.5463546514511108, "learning_rate": 0.0002696832148312086, "loss": 1.746, "step": 47991 }, { "epoch": 1.6, "grad_norm": 0.5661569237709045, "learning_rate": 0.0002696728162401797, "loss": 1.731, "step": 47992 }, { "epoch": 1.6, "grad_norm": 0.5650377869606018, "learning_rate": 0.0002696624176859636, "loss": 1.7575, "step": 47993 }, { "epoch": 1.6, "grad_norm": 0.539057731628418, "learning_rate": 0.000269652019168573, "loss": 1.7663, "step": 47994 }, { "epoch": 1.6, "grad_norm": 0.5318616032600403, "learning_rate": 0.00026964162068802023, "loss": 1.7852, "step": 47995 }, { "epoch": 1.6, "grad_norm": 0.5707423090934753, "learning_rate": 0.0002696312222443181, "loss": 1.7038, "step": 47996 }, { "epoch": 1.6, "grad_norm": 0.5770593285560608, "learning_rate": 0.0002696208238374792, "loss": 1.731, "step": 47997 }, { "epoch": 1.6, "grad_norm": 0.5457145571708679, "learning_rate": 0.00026961042546751613, "loss": 1.7421, "step": 47998 }, { "epoch": 1.6, "grad_norm": 0.5489405393600464, "learning_rate": 0.00026960002713444144, "loss": 1.7499, "step": 47999 }, { "epoch": 1.6, "grad_norm": 0.5381519794464111, "learning_rate": 0.0002695896288382679, "loss": 1.8199, "step": 48000 }, { "epoch": 1.6, "grad_norm": 0.5546590685844421, "learning_rate": 0.00026957923057900813, "loss": 1.7066, "step": 48001 }, { "epoch": 1.6, "grad_norm": 0.53046053647995, "learning_rate": 0.0002695688323566746, "loss": 1.7427, "step": 48002 }, { "epoch": 1.6, "grad_norm": 0.5331242680549622, "learning_rate": 0.00026955843417128013, "loss": 1.7886, "step": 48003 }, { "epoch": 1.6, "grad_norm": 0.5587760210037231, "learning_rate": 0.0002695480360228372, "loss": 1.6893, "step": 48004 }, { "epoch": 1.6, "grad_norm": 0.5173309445381165, "learning_rate": 0.0002695376379113584, "loss": 1.7821, "step": 48005 }, { "epoch": 1.6, "grad_norm": 0.5486878156661987, "learning_rate": 0.0002695272398368564, "loss": 1.7377, "step": 48006 }, { "epoch": 1.6, "grad_norm": 0.532855749130249, "learning_rate": 0.00026951684179934407, "loss": 1.7532, "step": 48007 }, { "epoch": 1.6, "grad_norm": 0.5416166186332703, "learning_rate": 0.00026950644379883354, "loss": 1.8037, "step": 48008 }, { "epoch": 1.6, "grad_norm": 0.5352936387062073, "learning_rate": 0.0002694960458353378, "loss": 1.7057, "step": 48009 }, { "epoch": 1.6, "grad_norm": 0.519842803478241, "learning_rate": 0.0002694856479088693, "loss": 1.7189, "step": 48010 }, { "epoch": 1.6, "grad_norm": 0.5369565486907959, "learning_rate": 0.0002694752500194409, "loss": 1.7411, "step": 48011 }, { "epoch": 1.6, "grad_norm": 0.5298106074333191, "learning_rate": 0.0002694648521670649, "loss": 1.7376, "step": 48012 }, { "epoch": 1.6, "grad_norm": 0.5697459578514099, "learning_rate": 0.000269454454351754, "loss": 1.8748, "step": 48013 }, { "epoch": 1.6, "grad_norm": 0.5523886680603027, "learning_rate": 0.0002694440565735211, "loss": 1.7791, "step": 48014 }, { "epoch": 1.6, "grad_norm": 1.3197720050811768, "learning_rate": 0.0002694336588323785, "loss": 1.8166, "step": 48015 }, { "epoch": 1.6, "grad_norm": 0.5474056601524353, "learning_rate": 0.0002694232611283389, "loss": 1.7295, "step": 48016 }, { "epoch": 1.6, "grad_norm": 0.5584756731987, "learning_rate": 0.00026941286346141507, "loss": 1.732, "step": 48017 }, { "epoch": 1.6, "grad_norm": 0.5208542943000793, "learning_rate": 0.00026940246583161945, "loss": 1.7488, "step": 48018 }, { "epoch": 1.6, "grad_norm": 0.5145363211631775, "learning_rate": 0.0002693920682389647, "loss": 1.6953, "step": 48019 }, { "epoch": 1.6, "grad_norm": 0.5536160469055176, "learning_rate": 0.0002693816706834634, "loss": 1.6646, "step": 48020 }, { "epoch": 1.6, "grad_norm": 0.554859459400177, "learning_rate": 0.00026937127316512845, "loss": 1.688, "step": 48021 }, { "epoch": 1.6, "grad_norm": 0.5200726389884949, "learning_rate": 0.00026936087568397207, "loss": 1.7575, "step": 48022 }, { "epoch": 1.6, "grad_norm": 0.518078088760376, "learning_rate": 0.0002693504782400071, "loss": 1.7521, "step": 48023 }, { "epoch": 1.6, "grad_norm": 0.5324504971504211, "learning_rate": 0.0002693400808332463, "loss": 1.722, "step": 48024 }, { "epoch": 1.6, "grad_norm": 0.5361061692237854, "learning_rate": 0.000269329683463702, "loss": 1.7691, "step": 48025 }, { "epoch": 1.6, "grad_norm": 0.5480104684829712, "learning_rate": 0.00026931928613138694, "loss": 1.8067, "step": 48026 }, { "epoch": 1.6, "grad_norm": 0.5245692133903503, "learning_rate": 0.0002693088888363138, "loss": 1.7491, "step": 48027 }, { "epoch": 1.6, "grad_norm": 0.537583589553833, "learning_rate": 0.0002692984915784951, "loss": 1.8198, "step": 48028 }, { "epoch": 1.6, "grad_norm": 0.5374948382377625, "learning_rate": 0.00026928809435794346, "loss": 1.7398, "step": 48029 }, { "epoch": 1.6, "grad_norm": 0.5386086702346802, "learning_rate": 0.00026927769717467157, "loss": 1.7846, "step": 48030 }, { "epoch": 1.6, "grad_norm": 0.5265330672264099, "learning_rate": 0.0002692673000286921, "loss": 1.809, "step": 48031 }, { "epoch": 1.6, "grad_norm": 0.5228257775306702, "learning_rate": 0.00026925690292001756, "loss": 1.72, "step": 48032 }, { "epoch": 1.6, "grad_norm": 0.5088236331939697, "learning_rate": 0.0002692465058486606, "loss": 1.7715, "step": 48033 }, { "epoch": 1.6, "grad_norm": 0.5251394510269165, "learning_rate": 0.0002692361088146339, "loss": 1.7642, "step": 48034 }, { "epoch": 1.6, "grad_norm": 0.5146530866622925, "learning_rate": 0.00026922571181794996, "loss": 1.7308, "step": 48035 }, { "epoch": 1.6, "grad_norm": 0.5412095189094543, "learning_rate": 0.00026921531485862144, "loss": 1.7299, "step": 48036 }, { "epoch": 1.6, "grad_norm": 0.5300847887992859, "learning_rate": 0.0002692049179366612, "loss": 1.7857, "step": 48037 }, { "epoch": 1.6, "grad_norm": 0.5349124073982239, "learning_rate": 0.0002691945210520814, "loss": 1.703, "step": 48038 }, { "epoch": 1.6, "grad_norm": 0.5278509855270386, "learning_rate": 0.000269184124204895, "loss": 1.7796, "step": 48039 }, { "epoch": 1.6, "grad_norm": 0.5276466012001038, "learning_rate": 0.00026917372739511457, "loss": 1.7235, "step": 48040 }, { "epoch": 1.6, "grad_norm": 0.5163257718086243, "learning_rate": 0.00026916333062275275, "loss": 1.7715, "step": 48041 }, { "epoch": 1.6, "grad_norm": 0.5180938243865967, "learning_rate": 0.000269152933887822, "loss": 1.7751, "step": 48042 }, { "epoch": 1.6, "grad_norm": 0.5466743111610413, "learning_rate": 0.00026914253719033496, "loss": 1.8523, "step": 48043 }, { "epoch": 1.6, "grad_norm": 0.5320910215377808, "learning_rate": 0.00026913214053030457, "loss": 1.7973, "step": 48044 }, { "epoch": 1.6, "grad_norm": 0.5407829284667969, "learning_rate": 0.000269121743907743, "loss": 1.7432, "step": 48045 }, { "epoch": 1.6, "grad_norm": 0.5245352387428284, "learning_rate": 0.0002691113473226632, "loss": 1.7198, "step": 48046 }, { "epoch": 1.6, "grad_norm": 0.5202852487564087, "learning_rate": 0.0002691009507750777, "loss": 1.725, "step": 48047 }, { "epoch": 1.6, "grad_norm": 0.5486101508140564, "learning_rate": 0.000269090554264999, "loss": 1.8111, "step": 48048 }, { "epoch": 1.6, "grad_norm": 0.5249959230422974, "learning_rate": 0.0002690801577924398, "loss": 1.7535, "step": 48049 }, { "epoch": 1.6, "grad_norm": 0.5358589887619019, "learning_rate": 0.00026906976135741275, "loss": 1.7644, "step": 48050 }, { "epoch": 1.6, "grad_norm": 0.5475276708602905, "learning_rate": 0.0002690593649599306, "loss": 1.7738, "step": 48051 }, { "epoch": 1.6, "grad_norm": 0.5631310343742371, "learning_rate": 0.0002690489686000056, "loss": 1.7677, "step": 48052 }, { "epoch": 1.6, "grad_norm": 0.5357556939125061, "learning_rate": 0.0002690385722776507, "loss": 1.8248, "step": 48053 }, { "epoch": 1.6, "grad_norm": 0.531615138053894, "learning_rate": 0.00026902817599287846, "loss": 1.7084, "step": 48054 }, { "epoch": 1.6, "grad_norm": 0.5658892393112183, "learning_rate": 0.0002690177797457014, "loss": 1.781, "step": 48055 }, { "epoch": 1.6, "grad_norm": 0.5529002547264099, "learning_rate": 0.0002690073835361322, "loss": 1.735, "step": 48056 }, { "epoch": 1.6, "grad_norm": 0.5431792736053467, "learning_rate": 0.0002689969873641835, "loss": 1.7303, "step": 48057 }, { "epoch": 1.6, "grad_norm": 0.5198009014129639, "learning_rate": 0.00026898659122986783, "loss": 1.7663, "step": 48058 }, { "epoch": 1.6, "grad_norm": 0.547749936580658, "learning_rate": 0.00026897619513319783, "loss": 1.8247, "step": 48059 }, { "epoch": 1.6, "grad_norm": 0.5322719216346741, "learning_rate": 0.00026896579907418624, "loss": 1.7481, "step": 48060 }, { "epoch": 1.6, "grad_norm": 0.5449036359786987, "learning_rate": 0.0002689554030528456, "loss": 1.8146, "step": 48061 }, { "epoch": 1.6, "grad_norm": 0.5099839568138123, "learning_rate": 0.0002689450070691885, "loss": 1.7266, "step": 48062 }, { "epoch": 1.6, "grad_norm": 0.4985218942165375, "learning_rate": 0.00026893461112322757, "loss": 1.7494, "step": 48063 }, { "epoch": 1.6, "grad_norm": 0.5345342755317688, "learning_rate": 0.0002689242152149755, "loss": 1.7712, "step": 48064 }, { "epoch": 1.6, "grad_norm": 0.5127301216125488, "learning_rate": 0.0002689138193444448, "loss": 1.7672, "step": 48065 }, { "epoch": 1.6, "grad_norm": 0.5242788791656494, "learning_rate": 0.0002689034235116481, "loss": 1.787, "step": 48066 }, { "epoch": 1.6, "grad_norm": 0.5196834206581116, "learning_rate": 0.0002688930277165981, "loss": 1.7104, "step": 48067 }, { "epoch": 1.6, "grad_norm": 0.5034934282302856, "learning_rate": 0.00026888263195930744, "loss": 1.7277, "step": 48068 }, { "epoch": 1.6, "grad_norm": 0.5425003170967102, "learning_rate": 0.0002688722362397886, "loss": 1.7227, "step": 48069 }, { "epoch": 1.6, "grad_norm": 0.5138649344444275, "learning_rate": 0.0002688618405580543, "loss": 1.7392, "step": 48070 }, { "epoch": 1.6, "grad_norm": 0.5178122520446777, "learning_rate": 0.0002688514449141173, "loss": 1.7194, "step": 48071 }, { "epoch": 1.6, "grad_norm": 0.5198011994361877, "learning_rate": 0.00026884104930798984, "loss": 1.7553, "step": 48072 }, { "epoch": 1.6, "grad_norm": 0.5375523567199707, "learning_rate": 0.00026883065373968474, "loss": 1.8172, "step": 48073 }, { "epoch": 1.6, "grad_norm": 0.5199777483940125, "learning_rate": 0.0002688202582092149, "loss": 1.7457, "step": 48074 }, { "epoch": 1.6, "grad_norm": 0.5344419479370117, "learning_rate": 0.00026880986271659235, "loss": 1.6652, "step": 48075 }, { "epoch": 1.6, "grad_norm": 0.5480139851570129, "learning_rate": 0.0002687994672618302, "loss": 1.7584, "step": 48076 }, { "epoch": 1.6, "grad_norm": 0.5321511030197144, "learning_rate": 0.0002687890718449409, "loss": 1.7199, "step": 48077 }, { "epoch": 1.6, "grad_norm": 0.5330097675323486, "learning_rate": 0.0002687786764659371, "loss": 1.7268, "step": 48078 }, { "epoch": 1.6, "grad_norm": 0.5350900292396545, "learning_rate": 0.00026876828112483127, "loss": 1.6687, "step": 48079 }, { "epoch": 1.6, "grad_norm": 0.539431631565094, "learning_rate": 0.00026875788582163615, "loss": 1.7613, "step": 48080 }, { "epoch": 1.6, "grad_norm": 0.5247067213058472, "learning_rate": 0.0002687474905563645, "loss": 1.7119, "step": 48081 }, { "epoch": 1.6, "grad_norm": 0.5560553073883057, "learning_rate": 0.00026873709532902865, "loss": 1.714, "step": 48082 }, { "epoch": 1.6, "grad_norm": 0.5319381952285767, "learning_rate": 0.0002687267001396414, "loss": 1.7672, "step": 48083 }, { "epoch": 1.6, "grad_norm": 0.5346782207489014, "learning_rate": 0.00026871630498821535, "loss": 1.762, "step": 48084 }, { "epoch": 1.6, "grad_norm": 0.5572100281715393, "learning_rate": 0.0002687059098747631, "loss": 1.7519, "step": 48085 }, { "epoch": 1.6, "grad_norm": 0.5417314171791077, "learning_rate": 0.00026869551479929727, "loss": 1.7801, "step": 48086 }, { "epoch": 1.6, "grad_norm": 0.5211403369903564, "learning_rate": 0.00026868511976183034, "loss": 1.7435, "step": 48087 }, { "epoch": 1.6, "grad_norm": 0.5156753659248352, "learning_rate": 0.0002686747247623752, "loss": 1.7228, "step": 48088 }, { "epoch": 1.6, "grad_norm": 0.5478510856628418, "learning_rate": 0.0002686643298009443, "loss": 1.7321, "step": 48089 }, { "epoch": 1.6, "grad_norm": 0.5454543232917786, "learning_rate": 0.0002686539348775503, "loss": 1.7833, "step": 48090 }, { "epoch": 1.6, "grad_norm": 0.5569742918014526, "learning_rate": 0.00026864353999220584, "loss": 1.8527, "step": 48091 }, { "epoch": 1.6, "grad_norm": 0.5212863683700562, "learning_rate": 0.0002686331451449234, "loss": 1.7506, "step": 48092 }, { "epoch": 1.6, "grad_norm": 0.5346769094467163, "learning_rate": 0.00026862275033571575, "loss": 1.8038, "step": 48093 }, { "epoch": 1.6, "grad_norm": 0.5105636715888977, "learning_rate": 0.00026861235556459556, "loss": 1.7639, "step": 48094 }, { "epoch": 1.6, "grad_norm": 0.5248793363571167, "learning_rate": 0.00026860196083157515, "loss": 1.8277, "step": 48095 }, { "epoch": 1.6, "grad_norm": 0.5457700490951538, "learning_rate": 0.00026859156613666743, "loss": 1.7584, "step": 48096 }, { "epoch": 1.6, "grad_norm": 0.5452672243118286, "learning_rate": 0.0002685811714798849, "loss": 1.7401, "step": 48097 }, { "epoch": 1.6, "grad_norm": 0.8469381332397461, "learning_rate": 0.00026857077686124025, "loss": 1.7695, "step": 48098 }, { "epoch": 1.6, "grad_norm": 0.5269988179206848, "learning_rate": 0.00026856038228074606, "loss": 1.7865, "step": 48099 }, { "epoch": 1.6, "grad_norm": 0.5215649604797363, "learning_rate": 0.00026854998773841486, "loss": 1.7135, "step": 48100 }, { "epoch": 1.6, "grad_norm": 0.5280856490135193, "learning_rate": 0.00026853959323425944, "loss": 1.7701, "step": 48101 }, { "epoch": 1.6, "grad_norm": 0.5564571022987366, "learning_rate": 0.0002685291987682921, "loss": 1.7763, "step": 48102 }, { "epoch": 1.6, "grad_norm": 0.5251713991165161, "learning_rate": 0.00026851880434052586, "loss": 1.8059, "step": 48103 }, { "epoch": 1.6, "grad_norm": 0.5324860215187073, "learning_rate": 0.00026850840995097316, "loss": 1.7263, "step": 48104 }, { "epoch": 1.6, "grad_norm": 0.5440112352371216, "learning_rate": 0.00026849801559964656, "loss": 1.7714, "step": 48105 }, { "epoch": 1.6, "grad_norm": 0.528281033039093, "learning_rate": 0.00026848762128655875, "loss": 1.747, "step": 48106 }, { "epoch": 1.6, "grad_norm": 0.524980366230011, "learning_rate": 0.00026847722701172223, "loss": 1.6942, "step": 48107 }, { "epoch": 1.6, "grad_norm": 0.5397907495498657, "learning_rate": 0.00026846683277514987, "loss": 1.8609, "step": 48108 }, { "epoch": 1.6, "grad_norm": 1.5279784202575684, "learning_rate": 0.000268456438576854, "loss": 1.8019, "step": 48109 }, { "epoch": 1.6, "grad_norm": 0.5244579315185547, "learning_rate": 0.0002684460444168474, "loss": 1.7097, "step": 48110 }, { "epoch": 1.6, "grad_norm": 0.5422952175140381, "learning_rate": 0.0002684356502951427, "loss": 1.7494, "step": 48111 }, { "epoch": 1.6, "grad_norm": 0.534197986125946, "learning_rate": 0.0002684252562117524, "loss": 1.7442, "step": 48112 }, { "epoch": 1.6, "grad_norm": 0.5225622653961182, "learning_rate": 0.00026841486216668925, "loss": 1.7331, "step": 48113 }, { "epoch": 1.6, "grad_norm": 0.5196048617362976, "learning_rate": 0.0002684044681599658, "loss": 1.7335, "step": 48114 }, { "epoch": 1.6, "grad_norm": 0.5264400839805603, "learning_rate": 0.00026839407419159463, "loss": 1.7601, "step": 48115 }, { "epoch": 1.6, "grad_norm": 0.5163440704345703, "learning_rate": 0.0002683836802615884, "loss": 1.8037, "step": 48116 }, { "epoch": 1.6, "grad_norm": 0.5209622383117676, "learning_rate": 0.0002683732863699597, "loss": 1.7868, "step": 48117 }, { "epoch": 1.6, "grad_norm": 0.5445687174797058, "learning_rate": 0.0002683628925167213, "loss": 1.6941, "step": 48118 }, { "epoch": 1.6, "grad_norm": 0.5402061939239502, "learning_rate": 0.0002683524987018856, "loss": 1.7115, "step": 48119 }, { "epoch": 1.6, "grad_norm": 0.519609272480011, "learning_rate": 0.0002683421049254652, "loss": 1.7764, "step": 48120 }, { "epoch": 1.6, "grad_norm": 0.5250052809715271, "learning_rate": 0.000268331711187473, "loss": 1.6895, "step": 48121 }, { "epoch": 1.6, "grad_norm": 0.550336480140686, "learning_rate": 0.00026832131748792134, "loss": 1.7225, "step": 48122 }, { "epoch": 1.6, "grad_norm": 0.5427104234695435, "learning_rate": 0.0002683109238268229, "loss": 1.8049, "step": 48123 }, { "epoch": 1.6, "grad_norm": 0.521483302116394, "learning_rate": 0.00026830053020419046, "loss": 1.7267, "step": 48124 }, { "epoch": 1.6, "grad_norm": 0.519893229007721, "learning_rate": 0.0002682901366200364, "loss": 1.7139, "step": 48125 }, { "epoch": 1.6, "grad_norm": 0.5404049158096313, "learning_rate": 0.0002682797430743734, "loss": 1.708, "step": 48126 }, { "epoch": 1.6, "grad_norm": 0.5284122824668884, "learning_rate": 0.0002682693495672142, "loss": 1.7065, "step": 48127 }, { "epoch": 1.6, "grad_norm": 0.5464621186256409, "learning_rate": 0.00026825895609857136, "loss": 1.775, "step": 48128 }, { "epoch": 1.6, "grad_norm": 0.5198826193809509, "learning_rate": 0.0002682485626684574, "loss": 1.7895, "step": 48129 }, { "epoch": 1.6, "grad_norm": 0.535309374332428, "learning_rate": 0.000268238169276885, "loss": 1.7722, "step": 48130 }, { "epoch": 1.6, "grad_norm": 0.5224193930625916, "learning_rate": 0.00026822777592386694, "loss": 1.8332, "step": 48131 }, { "epoch": 1.6, "grad_norm": 0.5405677556991577, "learning_rate": 0.00026821738260941546, "loss": 1.8289, "step": 48132 }, { "epoch": 1.6, "grad_norm": 0.5229215025901794, "learning_rate": 0.0002682069893335435, "loss": 1.7323, "step": 48133 }, { "epoch": 1.6, "grad_norm": 0.5180866718292236, "learning_rate": 0.0002681965960962636, "loss": 1.7219, "step": 48134 }, { "epoch": 1.6, "grad_norm": 0.5406617522239685, "learning_rate": 0.00026818620289758834, "loss": 1.7707, "step": 48135 }, { "epoch": 1.6, "grad_norm": 0.525185763835907, "learning_rate": 0.0002681758097375303, "loss": 1.7298, "step": 48136 }, { "epoch": 1.6, "grad_norm": 0.5586808323860168, "learning_rate": 0.00026816541661610203, "loss": 1.8203, "step": 48137 }, { "epoch": 1.6, "grad_norm": 0.5308765769004822, "learning_rate": 0.0002681550235333165, "loss": 1.6672, "step": 48138 }, { "epoch": 1.6, "grad_norm": 0.541922926902771, "learning_rate": 0.00026814463048918587, "loss": 1.7523, "step": 48139 }, { "epoch": 1.6, "grad_norm": 0.5109740495681763, "learning_rate": 0.000268134237483723, "loss": 1.7465, "step": 48140 }, { "epoch": 1.6, "grad_norm": 0.5246584415435791, "learning_rate": 0.00026812384451694064, "loss": 1.679, "step": 48141 }, { "epoch": 1.6, "grad_norm": 0.5324564576148987, "learning_rate": 0.00026811345158885106, "loss": 1.7915, "step": 48142 }, { "epoch": 1.6, "grad_norm": 0.5381798148155212, "learning_rate": 0.0002681030586994671, "loss": 1.7343, "step": 48143 }, { "epoch": 1.6, "grad_norm": 0.5256534218788147, "learning_rate": 0.0002680926658488014, "loss": 1.7585, "step": 48144 }, { "epoch": 1.6, "grad_norm": 0.5255674719810486, "learning_rate": 0.00026808227303686644, "loss": 1.7205, "step": 48145 }, { "epoch": 1.6, "grad_norm": 0.5875750184059143, "learning_rate": 0.00026807188026367477, "loss": 1.7948, "step": 48146 }, { "epoch": 1.6, "grad_norm": 0.5368530750274658, "learning_rate": 0.00026806148752923925, "loss": 1.7743, "step": 48147 }, { "epoch": 1.6, "grad_norm": 0.5383884310722351, "learning_rate": 0.0002680510948335724, "loss": 1.6855, "step": 48148 }, { "epoch": 1.6, "grad_norm": 0.5371692776679993, "learning_rate": 0.00026804070217668685, "loss": 1.7872, "step": 48149 }, { "epoch": 1.6, "grad_norm": 0.5210779905319214, "learning_rate": 0.0002680303095585951, "loss": 1.8473, "step": 48150 }, { "epoch": 1.6, "grad_norm": 0.5811213850975037, "learning_rate": 0.0002680199169793099, "loss": 1.748, "step": 48151 }, { "epoch": 1.6, "grad_norm": 0.5361648797988892, "learning_rate": 0.0002680095244388438, "loss": 1.6767, "step": 48152 }, { "epoch": 1.6, "grad_norm": 0.5406816005706787, "learning_rate": 0.0002679991319372093, "loss": 1.7036, "step": 48153 }, { "epoch": 1.6, "grad_norm": 0.5174634456634521, "learning_rate": 0.00026798873947441934, "loss": 1.7687, "step": 48154 }, { "epoch": 1.6, "grad_norm": 0.5246635675430298, "learning_rate": 0.00026797834705048616, "loss": 1.7402, "step": 48155 }, { "epoch": 1.6, "grad_norm": 0.5185950994491577, "learning_rate": 0.00026796795466542265, "loss": 1.7539, "step": 48156 }, { "epoch": 1.6, "grad_norm": 0.5321258902549744, "learning_rate": 0.0002679575623192413, "loss": 1.7062, "step": 48157 }, { "epoch": 1.6, "grad_norm": 0.5457262992858887, "learning_rate": 0.00026794717001195475, "loss": 1.7642, "step": 48158 }, { "epoch": 1.6, "grad_norm": 0.5516402125358582, "learning_rate": 0.0002679367777435756, "loss": 1.7329, "step": 48159 }, { "epoch": 1.6, "grad_norm": 0.529944121837616, "learning_rate": 0.00026792638551411643, "loss": 1.7765, "step": 48160 }, { "epoch": 1.6, "grad_norm": 0.5286257863044739, "learning_rate": 0.0002679159933235901, "loss": 1.7846, "step": 48161 }, { "epoch": 1.6, "grad_norm": 0.5394362807273865, "learning_rate": 0.00026790560117200874, "loss": 1.7647, "step": 48162 }, { "epoch": 1.6, "grad_norm": 0.545803964138031, "learning_rate": 0.0002678952090593854, "loss": 1.8016, "step": 48163 }, { "epoch": 1.6, "grad_norm": 0.5381185412406921, "learning_rate": 0.0002678848169857326, "loss": 1.7127, "step": 48164 }, { "epoch": 1.6, "grad_norm": 0.5351747870445251, "learning_rate": 0.0002678744249510628, "loss": 1.8214, "step": 48165 }, { "epoch": 1.6, "grad_norm": 0.5395716428756714, "learning_rate": 0.0002678640329553888, "loss": 1.745, "step": 48166 }, { "epoch": 1.6, "grad_norm": 0.5535348653793335, "learning_rate": 0.000267853640998723, "loss": 1.7461, "step": 48167 }, { "epoch": 1.6, "grad_norm": 0.5489045977592468, "learning_rate": 0.00026784324908107833, "loss": 1.7502, "step": 48168 }, { "epoch": 1.6, "grad_norm": 0.5496821403503418, "learning_rate": 0.000267832857202467, "loss": 1.7075, "step": 48169 }, { "epoch": 1.6, "grad_norm": 0.5140077471733093, "learning_rate": 0.000267822465362902, "loss": 1.7194, "step": 48170 }, { "epoch": 1.6, "grad_norm": 0.525616466999054, "learning_rate": 0.0002678120735623958, "loss": 1.754, "step": 48171 }, { "epoch": 1.6, "grad_norm": 0.5660771727561951, "learning_rate": 0.00026780168180096087, "loss": 1.7841, "step": 48172 }, { "epoch": 1.6, "grad_norm": 0.5237407088279724, "learning_rate": 0.00026779129007861003, "loss": 1.8119, "step": 48173 }, { "epoch": 1.6, "grad_norm": 0.5470177531242371, "learning_rate": 0.00026778089839535584, "loss": 1.7432, "step": 48174 }, { "epoch": 1.6, "grad_norm": 0.5306345224380493, "learning_rate": 0.0002677705067512109, "loss": 1.777, "step": 48175 }, { "epoch": 1.6, "grad_norm": 0.5152953267097473, "learning_rate": 0.0002677601151461877, "loss": 1.6693, "step": 48176 }, { "epoch": 1.6, "grad_norm": 0.5388603806495667, "learning_rate": 0.00026774972358029905, "loss": 1.7866, "step": 48177 }, { "epoch": 1.6, "grad_norm": 0.5335081219673157, "learning_rate": 0.0002677393320535575, "loss": 1.6902, "step": 48178 }, { "epoch": 1.6, "grad_norm": 0.5595743656158447, "learning_rate": 0.00026772894056597565, "loss": 1.8551, "step": 48179 }, { "epoch": 1.6, "grad_norm": 1.8667441606521606, "learning_rate": 0.00026771854911756605, "loss": 1.7712, "step": 48180 }, { "epoch": 1.6, "grad_norm": 0.5229584574699402, "learning_rate": 0.0002677081577083415, "loss": 1.6808, "step": 48181 }, { "epoch": 1.6, "grad_norm": 0.5381147265434265, "learning_rate": 0.0002676977663383144, "loss": 1.7602, "step": 48182 }, { "epoch": 1.6, "grad_norm": 0.5163959264755249, "learning_rate": 0.0002676873750074974, "loss": 1.6867, "step": 48183 }, { "epoch": 1.6, "grad_norm": 0.5351518988609314, "learning_rate": 0.0002676769837159032, "loss": 1.8015, "step": 48184 }, { "epoch": 1.6, "grad_norm": 0.5228372812271118, "learning_rate": 0.0002676665924635445, "loss": 1.7339, "step": 48185 }, { "epoch": 1.6, "grad_norm": 0.5430194735527039, "learning_rate": 0.00026765620125043365, "loss": 1.7514, "step": 48186 }, { "epoch": 1.6, "grad_norm": 0.5555010437965393, "learning_rate": 0.00026764581007658345, "loss": 1.762, "step": 48187 }, { "epoch": 1.6, "grad_norm": 0.5354933738708496, "learning_rate": 0.00026763541894200653, "loss": 1.7442, "step": 48188 }, { "epoch": 1.6, "grad_norm": 0.5142012238502502, "learning_rate": 0.0002676250278467154, "loss": 1.7098, "step": 48189 }, { "epoch": 1.6, "grad_norm": 0.5254674553871155, "learning_rate": 0.0002676146367907226, "loss": 1.7505, "step": 48190 }, { "epoch": 1.6, "grad_norm": 0.5334911942481995, "learning_rate": 0.00026760424577404105, "loss": 1.8002, "step": 48191 }, { "epoch": 1.6, "grad_norm": 0.5406602025032043, "learning_rate": 0.00026759385479668303, "loss": 1.7981, "step": 48192 }, { "epoch": 1.6, "grad_norm": 0.5107195377349854, "learning_rate": 0.00026758346385866134, "loss": 1.7564, "step": 48193 }, { "epoch": 1.6, "grad_norm": 0.5326687693595886, "learning_rate": 0.00026757307295998854, "loss": 1.7821, "step": 48194 }, { "epoch": 1.6, "grad_norm": 0.5161872506141663, "learning_rate": 0.0002675626821006773, "loss": 1.7617, "step": 48195 }, { "epoch": 1.6, "grad_norm": 0.5325639843940735, "learning_rate": 0.00026755229128074014, "loss": 1.7743, "step": 48196 }, { "epoch": 1.6, "grad_norm": 0.5232017636299133, "learning_rate": 0.00026754190050018965, "loss": 1.7056, "step": 48197 }, { "epoch": 1.6, "grad_norm": 0.5439094305038452, "learning_rate": 0.00026753150975903874, "loss": 1.7013, "step": 48198 }, { "epoch": 1.6, "grad_norm": 0.5411337018013, "learning_rate": 0.00026752111905729956, "loss": 1.7389, "step": 48199 }, { "epoch": 1.6, "grad_norm": 0.5360122323036194, "learning_rate": 0.000267510728394985, "loss": 1.7959, "step": 48200 }, { "epoch": 1.6, "grad_norm": 0.5548933744430542, "learning_rate": 0.00026750033777210775, "loss": 1.7457, "step": 48201 }, { "epoch": 1.6, "grad_norm": 0.5595917701721191, "learning_rate": 0.00026748994718868023, "loss": 1.7567, "step": 48202 }, { "epoch": 1.6, "grad_norm": 0.5334498882293701, "learning_rate": 0.00026747955664471507, "loss": 1.7037, "step": 48203 }, { "epoch": 1.6, "grad_norm": 0.5199286937713623, "learning_rate": 0.00026746916614022495, "loss": 1.7554, "step": 48204 }, { "epoch": 1.6, "grad_norm": 0.5344223976135254, "learning_rate": 0.00026745877567522255, "loss": 1.7644, "step": 48205 }, { "epoch": 1.6, "grad_norm": 0.5491538643836975, "learning_rate": 0.0002674483852497203, "loss": 1.8348, "step": 48206 }, { "epoch": 1.6, "grad_norm": 0.5368584394454956, "learning_rate": 0.000267437994863731, "loss": 1.7543, "step": 48207 }, { "epoch": 1.6, "grad_norm": 0.5160761475563049, "learning_rate": 0.0002674276045172672, "loss": 1.7652, "step": 48208 }, { "epoch": 1.6, "grad_norm": 0.5471234917640686, "learning_rate": 0.0002674172142103414, "loss": 1.7349, "step": 48209 }, { "epoch": 1.6, "grad_norm": 0.5231315493583679, "learning_rate": 0.0002674068239429663, "loss": 1.6797, "step": 48210 }, { "epoch": 1.6, "grad_norm": 0.5353137254714966, "learning_rate": 0.0002673964337151546, "loss": 1.7505, "step": 48211 }, { "epoch": 1.6, "grad_norm": 0.5476976633071899, "learning_rate": 0.00026738604352691866, "loss": 1.7984, "step": 48212 }, { "epoch": 1.6, "grad_norm": 0.5332534313201904, "learning_rate": 0.0002673756533782714, "loss": 1.8221, "step": 48213 }, { "epoch": 1.6, "grad_norm": 0.5296302437782288, "learning_rate": 0.00026736526326922525, "loss": 1.6992, "step": 48214 }, { "epoch": 1.6, "grad_norm": 0.5157778859138489, "learning_rate": 0.0002673548731997929, "loss": 1.7513, "step": 48215 }, { "epoch": 1.6, "grad_norm": 0.5438331961631775, "learning_rate": 0.0002673444831699869, "loss": 1.7403, "step": 48216 }, { "epoch": 1.6, "grad_norm": 0.5322539210319519, "learning_rate": 0.00026733409317981986, "loss": 1.7797, "step": 48217 }, { "epoch": 1.6, "grad_norm": 0.5512552857398987, "learning_rate": 0.00026732370322930446, "loss": 1.7694, "step": 48218 }, { "epoch": 1.6, "grad_norm": 0.5251891016960144, "learning_rate": 0.0002673133133184532, "loss": 1.7941, "step": 48219 }, { "epoch": 1.6, "grad_norm": 0.5527540445327759, "learning_rate": 0.0002673029234472788, "loss": 1.7517, "step": 48220 }, { "epoch": 1.6, "grad_norm": 0.54285728931427, "learning_rate": 0.0002672925336157939, "loss": 1.7129, "step": 48221 }, { "epoch": 1.6, "grad_norm": 0.5189140439033508, "learning_rate": 0.00026728214382401095, "loss": 1.7448, "step": 48222 }, { "epoch": 1.6, "grad_norm": 0.5456625819206238, "learning_rate": 0.0002672717540719427, "loss": 1.7801, "step": 48223 }, { "epoch": 1.6, "grad_norm": 0.5274875164031982, "learning_rate": 0.0002672613643596017, "loss": 1.7326, "step": 48224 }, { "epoch": 1.6, "grad_norm": 0.5667573809623718, "learning_rate": 0.00026725097468700067, "loss": 1.8571, "step": 48225 }, { "epoch": 1.6, "grad_norm": 0.5421087145805359, "learning_rate": 0.00026724058505415196, "loss": 1.6594, "step": 48226 }, { "epoch": 1.6, "grad_norm": 0.5279707908630371, "learning_rate": 0.0002672301954610685, "loss": 1.8284, "step": 48227 }, { "epoch": 1.6, "grad_norm": 0.5478151440620422, "learning_rate": 0.0002672198059077627, "loss": 1.751, "step": 48228 }, { "epoch": 1.6, "grad_norm": 0.5366548299789429, "learning_rate": 0.0002672094163942472, "loss": 1.799, "step": 48229 }, { "epoch": 1.6, "grad_norm": 0.5500943064689636, "learning_rate": 0.0002671990269205347, "loss": 1.6646, "step": 48230 }, { "epoch": 1.6, "grad_norm": 0.5552246570587158, "learning_rate": 0.0002671886374866377, "loss": 1.686, "step": 48231 }, { "epoch": 1.6, "grad_norm": 0.5556247234344482, "learning_rate": 0.00026717824809256886, "loss": 1.8211, "step": 48232 }, { "epoch": 1.6, "grad_norm": 0.5171751379966736, "learning_rate": 0.00026716785873834076, "loss": 1.7141, "step": 48233 }, { "epoch": 1.6, "grad_norm": 0.5664453506469727, "learning_rate": 0.0002671574694239661, "loss": 1.7662, "step": 48234 }, { "epoch": 1.6, "grad_norm": 0.5207657217979431, "learning_rate": 0.0002671470801494574, "loss": 1.7259, "step": 48235 }, { "epoch": 1.6, "grad_norm": 0.5116896629333496, "learning_rate": 0.0002671366909148273, "loss": 1.6887, "step": 48236 }, { "epoch": 1.6, "grad_norm": 0.5621631741523743, "learning_rate": 0.00026712630172008844, "loss": 1.7443, "step": 48237 }, { "epoch": 1.6, "grad_norm": 0.5223894119262695, "learning_rate": 0.00026711591256525346, "loss": 1.784, "step": 48238 }, { "epoch": 1.6, "grad_norm": 0.54996657371521, "learning_rate": 0.0002671055234503349, "loss": 1.8113, "step": 48239 }, { "epoch": 1.6, "grad_norm": 0.5265146493911743, "learning_rate": 0.00026709513437534523, "loss": 1.7595, "step": 48240 }, { "epoch": 1.6, "grad_norm": 0.5566475987434387, "learning_rate": 0.00026708474534029746, "loss": 1.7081, "step": 48241 }, { "epoch": 1.61, "grad_norm": 0.5399596095085144, "learning_rate": 0.00026707435634520374, "loss": 1.6505, "step": 48242 }, { "epoch": 1.61, "grad_norm": 0.5412417650222778, "learning_rate": 0.000267063967390077, "loss": 1.7135, "step": 48243 }, { "epoch": 1.61, "grad_norm": 0.5560112595558167, "learning_rate": 0.0002670535784749297, "loss": 1.741, "step": 48244 }, { "epoch": 1.61, "grad_norm": 0.5226233005523682, "learning_rate": 0.00026704318959977464, "loss": 1.7311, "step": 48245 }, { "epoch": 1.61, "grad_norm": 0.5810348391532898, "learning_rate": 0.00026703280076462415, "loss": 1.8372, "step": 48246 }, { "epoch": 1.61, "grad_norm": 0.5304304957389832, "learning_rate": 0.00026702241196949097, "loss": 1.6721, "step": 48247 }, { "epoch": 1.61, "grad_norm": 0.5475698709487915, "learning_rate": 0.0002670120232143879, "loss": 1.8528, "step": 48248 }, { "epoch": 1.61, "grad_norm": 0.540408194065094, "learning_rate": 0.0002670016344993272, "loss": 1.7026, "step": 48249 }, { "epoch": 1.61, "grad_norm": 0.5536380410194397, "learning_rate": 0.0002669912458243217, "loss": 1.7074, "step": 48250 }, { "epoch": 1.61, "grad_norm": 0.5342468023300171, "learning_rate": 0.00026698085718938403, "loss": 1.7396, "step": 48251 }, { "epoch": 1.61, "grad_norm": 0.5498402714729309, "learning_rate": 0.0002669704685945267, "loss": 1.7575, "step": 48252 }, { "epoch": 1.61, "grad_norm": 0.5450877547264099, "learning_rate": 0.00026696008003976226, "loss": 1.7247, "step": 48253 }, { "epoch": 1.61, "grad_norm": 0.5380244255065918, "learning_rate": 0.00026694969152510345, "loss": 1.7328, "step": 48254 }, { "epoch": 1.61, "grad_norm": 0.5285723209381104, "learning_rate": 0.000266939303050563, "loss": 1.7689, "step": 48255 }, { "epoch": 1.61, "grad_norm": 0.524990439414978, "learning_rate": 0.00026692891461615315, "loss": 1.7335, "step": 48256 }, { "epoch": 1.61, "grad_norm": 0.5443496108055115, "learning_rate": 0.0002669185262218868, "loss": 1.7556, "step": 48257 }, { "epoch": 1.61, "grad_norm": 0.5304287075996399, "learning_rate": 0.00026690813786777655, "loss": 1.7363, "step": 48258 }, { "epoch": 1.61, "grad_norm": 0.5083786249160767, "learning_rate": 0.0002668977495538349, "loss": 1.6794, "step": 48259 }, { "epoch": 1.61, "grad_norm": 0.5297226905822754, "learning_rate": 0.0002668873612800745, "loss": 1.7583, "step": 48260 }, { "epoch": 1.61, "grad_norm": 0.5295445322990417, "learning_rate": 0.00026687697304650804, "loss": 1.8276, "step": 48261 }, { "epoch": 1.61, "grad_norm": 0.5436646342277527, "learning_rate": 0.000266866584853148, "loss": 1.7924, "step": 48262 }, { "epoch": 1.61, "grad_norm": 0.5471498370170593, "learning_rate": 0.0002668561967000069, "loss": 1.819, "step": 48263 }, { "epoch": 1.61, "grad_norm": 0.5386645793914795, "learning_rate": 0.00026684580858709765, "loss": 1.7963, "step": 48264 }, { "epoch": 1.61, "grad_norm": 0.530245840549469, "learning_rate": 0.0002668354205144327, "loss": 1.7826, "step": 48265 }, { "epoch": 1.61, "grad_norm": 0.5683040618896484, "learning_rate": 0.00026682503248202466, "loss": 1.7921, "step": 48266 }, { "epoch": 1.61, "grad_norm": 0.5267634391784668, "learning_rate": 0.0002668146444898861, "loss": 1.7831, "step": 48267 }, { "epoch": 1.61, "grad_norm": 0.5536751747131348, "learning_rate": 0.00026680425653802974, "loss": 1.7659, "step": 48268 }, { "epoch": 1.61, "grad_norm": 0.5305941104888916, "learning_rate": 0.0002667938686264681, "loss": 1.743, "step": 48269 }, { "epoch": 1.61, "grad_norm": 0.5434196591377258, "learning_rate": 0.00026678348075521367, "loss": 1.7785, "step": 48270 }, { "epoch": 1.61, "grad_norm": 0.5365842580795288, "learning_rate": 0.0002667730929242794, "loss": 1.8123, "step": 48271 }, { "epoch": 1.61, "grad_norm": 0.575927734375, "learning_rate": 0.00026676270513367755, "loss": 1.805, "step": 48272 }, { "epoch": 1.61, "grad_norm": 0.5338110327720642, "learning_rate": 0.00026675231738342096, "loss": 1.7161, "step": 48273 }, { "epoch": 1.61, "grad_norm": 0.5204708576202393, "learning_rate": 0.0002667419296735221, "loss": 1.8272, "step": 48274 }, { "epoch": 1.61, "grad_norm": 0.5352057814598083, "learning_rate": 0.00026673154200399375, "loss": 1.787, "step": 48275 }, { "epoch": 1.61, "grad_norm": 0.5205267071723938, "learning_rate": 0.00026672115437484834, "loss": 1.7611, "step": 48276 }, { "epoch": 1.61, "grad_norm": 0.5163364410400391, "learning_rate": 0.00026671076678609846, "loss": 1.8166, "step": 48277 }, { "epoch": 1.61, "grad_norm": 0.5472009181976318, "learning_rate": 0.000266700379237757, "loss": 1.7994, "step": 48278 }, { "epoch": 1.61, "grad_norm": 0.552406370639801, "learning_rate": 0.0002666899917298362, "loss": 1.8255, "step": 48279 }, { "epoch": 1.61, "grad_norm": 0.5251958966255188, "learning_rate": 0.00026667960426234884, "loss": 1.7744, "step": 48280 }, { "epoch": 1.61, "grad_norm": 0.5407959222793579, "learning_rate": 0.00026666921683530766, "loss": 1.7786, "step": 48281 }, { "epoch": 1.61, "grad_norm": 0.536113440990448, "learning_rate": 0.00026665882944872506, "loss": 1.7852, "step": 48282 }, { "epoch": 1.61, "grad_norm": 0.5491685271263123, "learning_rate": 0.0002666484421026137, "loss": 1.7289, "step": 48283 }, { "epoch": 1.61, "grad_norm": 0.526297390460968, "learning_rate": 0.0002666380547969862, "loss": 1.6992, "step": 48284 }, { "epoch": 1.61, "grad_norm": 0.5463951230049133, "learning_rate": 0.00026662766753185533, "loss": 1.7755, "step": 48285 }, { "epoch": 1.61, "grad_norm": 0.5611110329627991, "learning_rate": 0.00026661728030723345, "loss": 1.7746, "step": 48286 }, { "epoch": 1.61, "grad_norm": 0.5431392788887024, "learning_rate": 0.0002666068931231333, "loss": 1.7152, "step": 48287 }, { "epoch": 1.61, "grad_norm": 0.5350292921066284, "learning_rate": 0.00026659650597956746, "loss": 1.7397, "step": 48288 }, { "epoch": 1.61, "grad_norm": 0.5368210077285767, "learning_rate": 0.00026658611887654855, "loss": 1.7535, "step": 48289 }, { "epoch": 1.61, "grad_norm": 0.534433126449585, "learning_rate": 0.0002665757318140891, "loss": 1.7711, "step": 48290 }, { "epoch": 1.61, "grad_norm": 0.5316230654716492, "learning_rate": 0.0002665653447922019, "loss": 1.7495, "step": 48291 }, { "epoch": 1.61, "grad_norm": 0.576774537563324, "learning_rate": 0.0002665549578108994, "loss": 1.7082, "step": 48292 }, { "epoch": 1.61, "grad_norm": 0.542126476764679, "learning_rate": 0.0002665445708701941, "loss": 1.7911, "step": 48293 }, { "epoch": 1.61, "grad_norm": 0.5209596753120422, "learning_rate": 0.00026653418397009894, "loss": 1.7767, "step": 48294 }, { "epoch": 1.61, "grad_norm": 0.5795019865036011, "learning_rate": 0.0002665237971106263, "loss": 1.7566, "step": 48295 }, { "epoch": 1.61, "grad_norm": 0.5397458076477051, "learning_rate": 0.00026651341029178884, "loss": 1.6967, "step": 48296 }, { "epoch": 1.61, "grad_norm": 0.5292719602584839, "learning_rate": 0.0002665030235135992, "loss": 1.766, "step": 48297 }, { "epoch": 1.61, "grad_norm": 0.5437171459197998, "learning_rate": 0.00026649263677607, "loss": 1.7186, "step": 48298 }, { "epoch": 1.61, "grad_norm": 0.5257253646850586, "learning_rate": 0.0002664822500792137, "loss": 1.7691, "step": 48299 }, { "epoch": 1.61, "grad_norm": 0.566646933555603, "learning_rate": 0.0002664718634230429, "loss": 1.8291, "step": 48300 }, { "epoch": 1.61, "grad_norm": 0.575954794883728, "learning_rate": 0.0002664614768075706, "loss": 1.7404, "step": 48301 }, { "epoch": 1.61, "grad_norm": 0.5591398477554321, "learning_rate": 0.0002664510902328089, "loss": 1.6999, "step": 48302 }, { "epoch": 1.61, "grad_norm": 0.5337590575218201, "learning_rate": 0.0002664407036987707, "loss": 1.718, "step": 48303 }, { "epoch": 1.61, "grad_norm": 0.5829340219497681, "learning_rate": 0.00026643031720546855, "loss": 1.7289, "step": 48304 }, { "epoch": 1.61, "grad_norm": 0.5624971985816956, "learning_rate": 0.0002664199307529151, "loss": 1.8124, "step": 48305 }, { "epoch": 1.61, "grad_norm": 0.5538229942321777, "learning_rate": 0.00026640954434112285, "loss": 1.6772, "step": 48306 }, { "epoch": 1.61, "grad_norm": 0.5409965515136719, "learning_rate": 0.0002663991579701044, "loss": 1.7992, "step": 48307 }, { "epoch": 1.61, "grad_norm": 0.5269727110862732, "learning_rate": 0.0002663887716398726, "loss": 1.7259, "step": 48308 }, { "epoch": 1.61, "grad_norm": 0.5637626051902771, "learning_rate": 0.0002663783853504397, "loss": 1.7833, "step": 48309 }, { "epoch": 1.61, "grad_norm": 0.5691269040107727, "learning_rate": 0.0002663679991018185, "loss": 1.6967, "step": 48310 }, { "epoch": 1.61, "grad_norm": 0.5568838715553284, "learning_rate": 0.00026635761289402166, "loss": 1.8128, "step": 48311 }, { "epoch": 1.61, "grad_norm": 0.551498532295227, "learning_rate": 0.00026634722672706177, "loss": 1.7659, "step": 48312 }, { "epoch": 1.61, "grad_norm": 0.5242221355438232, "learning_rate": 0.00026633684060095136, "loss": 1.801, "step": 48313 }, { "epoch": 1.61, "grad_norm": 0.569215714931488, "learning_rate": 0.00026632645451570296, "loss": 1.7277, "step": 48314 }, { "epoch": 1.61, "grad_norm": 0.5411421060562134, "learning_rate": 0.0002663160684713294, "loss": 1.7944, "step": 48315 }, { "epoch": 1.61, "grad_norm": 0.5383463501930237, "learning_rate": 0.0002663056824678431, "loss": 1.8287, "step": 48316 }, { "epoch": 1.61, "grad_norm": 0.5343255996704102, "learning_rate": 0.00026629529650525676, "loss": 1.7538, "step": 48317 }, { "epoch": 1.61, "grad_norm": 0.5199200510978699, "learning_rate": 0.000266284910583583, "loss": 1.7046, "step": 48318 }, { "epoch": 1.61, "grad_norm": 0.5180115699768066, "learning_rate": 0.00026627452470283434, "loss": 1.7172, "step": 48319 }, { "epoch": 1.61, "grad_norm": 0.5539887547492981, "learning_rate": 0.00026626413886302345, "loss": 1.8404, "step": 48320 }, { "epoch": 1.61, "grad_norm": 0.5270891785621643, "learning_rate": 0.0002662537530641629, "loss": 1.7745, "step": 48321 }, { "epoch": 1.61, "grad_norm": 0.5152668952941895, "learning_rate": 0.00026624336730626536, "loss": 1.7134, "step": 48322 }, { "epoch": 1.61, "grad_norm": 0.5319297313690186, "learning_rate": 0.00026623298158934337, "loss": 1.747, "step": 48323 }, { "epoch": 1.61, "grad_norm": 0.5362818837165833, "learning_rate": 0.0002662225959134096, "loss": 1.7562, "step": 48324 }, { "epoch": 1.61, "grad_norm": 0.5528218746185303, "learning_rate": 0.0002662122102784767, "loss": 1.7761, "step": 48325 }, { "epoch": 1.61, "grad_norm": 0.5136051177978516, "learning_rate": 0.00026620182468455706, "loss": 1.747, "step": 48326 }, { "epoch": 1.61, "grad_norm": 0.5431759357452393, "learning_rate": 0.00026619143913166347, "loss": 1.753, "step": 48327 }, { "epoch": 1.61, "grad_norm": 0.5485643148422241, "learning_rate": 0.00026618105361980856, "loss": 1.7436, "step": 48328 }, { "epoch": 1.61, "grad_norm": 0.5213326215744019, "learning_rate": 0.0002661706681490047, "loss": 1.7276, "step": 48329 }, { "epoch": 1.61, "grad_norm": 0.5460560917854309, "learning_rate": 0.00026616028271926476, "loss": 1.7712, "step": 48330 }, { "epoch": 1.61, "grad_norm": 0.5299035906791687, "learning_rate": 0.00026614989733060124, "loss": 1.7937, "step": 48331 }, { "epoch": 1.61, "grad_norm": 0.5270946025848389, "learning_rate": 0.00026613951198302686, "loss": 1.82, "step": 48332 }, { "epoch": 1.61, "grad_norm": 0.5278937816619873, "learning_rate": 0.000266129126676554, "loss": 1.7286, "step": 48333 }, { "epoch": 1.61, "grad_norm": 0.5317172408103943, "learning_rate": 0.0002661187414111954, "loss": 1.7506, "step": 48334 }, { "epoch": 1.61, "grad_norm": 0.535853922367096, "learning_rate": 0.00026610835618696375, "loss": 1.8305, "step": 48335 }, { "epoch": 1.61, "grad_norm": 1.6985243558883667, "learning_rate": 0.0002660979710038714, "loss": 1.8125, "step": 48336 }, { "epoch": 1.61, "grad_norm": 0.5202535390853882, "learning_rate": 0.0002660875858619312, "loss": 1.692, "step": 48337 }, { "epoch": 1.61, "grad_norm": 0.5151253938674927, "learning_rate": 0.0002660772007611558, "loss": 1.7873, "step": 48338 }, { "epoch": 1.61, "grad_norm": 0.7069687843322754, "learning_rate": 0.00026606681570155754, "loss": 1.745, "step": 48339 }, { "epoch": 1.61, "grad_norm": 0.5403252840042114, "learning_rate": 0.0002660564306831491, "loss": 1.7727, "step": 48340 }, { "epoch": 1.61, "grad_norm": 0.5227727293968201, "learning_rate": 0.0002660460457059433, "loss": 1.7305, "step": 48341 }, { "epoch": 1.61, "grad_norm": 0.5309314727783203, "learning_rate": 0.00026603566076995256, "loss": 1.7827, "step": 48342 }, { "epoch": 1.61, "grad_norm": 0.7550686001777649, "learning_rate": 0.0002660252758751894, "loss": 1.7258, "step": 48343 }, { "epoch": 1.61, "grad_norm": 0.5473878383636475, "learning_rate": 0.0002660148910216667, "loss": 1.8195, "step": 48344 }, { "epoch": 1.61, "grad_norm": 0.5242168307304382, "learning_rate": 0.0002660045062093969, "loss": 1.7008, "step": 48345 }, { "epoch": 1.61, "grad_norm": 0.5133310556411743, "learning_rate": 0.0002659941214383926, "loss": 1.7479, "step": 48346 }, { "epoch": 1.61, "grad_norm": 0.5345670580863953, "learning_rate": 0.00026598373670866637, "loss": 1.7751, "step": 48347 }, { "epoch": 1.61, "grad_norm": 0.5350268483161926, "learning_rate": 0.00026597335202023097, "loss": 1.7575, "step": 48348 }, { "epoch": 1.61, "grad_norm": 0.5422881245613098, "learning_rate": 0.00026596296737309885, "loss": 1.7691, "step": 48349 }, { "epoch": 1.61, "grad_norm": 0.5349454879760742, "learning_rate": 0.0002659525827672826, "loss": 1.8001, "step": 48350 }, { "epoch": 1.61, "grad_norm": 0.5464874505996704, "learning_rate": 0.00026594219820279497, "loss": 1.8602, "step": 48351 }, { "epoch": 1.61, "grad_norm": 0.5277932286262512, "learning_rate": 0.0002659318136796486, "loss": 1.7273, "step": 48352 }, { "epoch": 1.61, "grad_norm": 0.5430305004119873, "learning_rate": 0.0002659214291978558, "loss": 1.8675, "step": 48353 }, { "epoch": 1.61, "grad_norm": 0.5269098877906799, "learning_rate": 0.00026591104475742945, "loss": 1.856, "step": 48354 }, { "epoch": 1.61, "grad_norm": 0.563775897026062, "learning_rate": 0.00026590066035838214, "loss": 1.7138, "step": 48355 }, { "epoch": 1.61, "grad_norm": 0.5211600065231323, "learning_rate": 0.0002658902760007263, "loss": 1.6992, "step": 48356 }, { "epoch": 1.61, "grad_norm": 0.5620609521865845, "learning_rate": 0.00026587989168447464, "loss": 1.7782, "step": 48357 }, { "epoch": 1.61, "grad_norm": 0.5547635555267334, "learning_rate": 0.0002658695074096399, "loss": 1.774, "step": 48358 }, { "epoch": 1.61, "grad_norm": 0.5340693593025208, "learning_rate": 0.00026585912317623437, "loss": 1.8531, "step": 48359 }, { "epoch": 1.61, "grad_norm": 0.5511797070503235, "learning_rate": 0.00026584873898427086, "loss": 1.701, "step": 48360 }, { "epoch": 1.61, "grad_norm": 0.5151098966598511, "learning_rate": 0.000265838354833762, "loss": 1.8227, "step": 48361 }, { "epoch": 1.61, "grad_norm": 0.5769508481025696, "learning_rate": 0.0002658279707247204, "loss": 1.7454, "step": 48362 }, { "epoch": 1.61, "grad_norm": 0.5541430115699768, "learning_rate": 0.00026581758665715857, "loss": 1.7825, "step": 48363 }, { "epoch": 1.61, "grad_norm": 0.5730056166648865, "learning_rate": 0.00026580720263108906, "loss": 1.7736, "step": 48364 }, { "epoch": 1.61, "grad_norm": 0.5370741486549377, "learning_rate": 0.0002657968186465247, "loss": 1.7462, "step": 48365 }, { "epoch": 1.61, "grad_norm": 0.580695390701294, "learning_rate": 0.00026578643470347786, "loss": 1.8034, "step": 48366 }, { "epoch": 1.61, "grad_norm": 0.5247267484664917, "learning_rate": 0.0002657760508019613, "loss": 1.7675, "step": 48367 }, { "epoch": 1.61, "grad_norm": 0.5327309966087341, "learning_rate": 0.0002657656669419876, "loss": 1.6967, "step": 48368 }, { "epoch": 1.61, "grad_norm": 0.5410861968994141, "learning_rate": 0.0002657552831235693, "loss": 1.6808, "step": 48369 }, { "epoch": 1.61, "grad_norm": 0.5564342141151428, "learning_rate": 0.000265744899346719, "loss": 1.7888, "step": 48370 }, { "epoch": 1.61, "grad_norm": 0.5318483114242554, "learning_rate": 0.0002657345156114493, "loss": 1.7432, "step": 48371 }, { "epoch": 1.61, "grad_norm": 0.5545812249183655, "learning_rate": 0.0002657241319177731, "loss": 1.7608, "step": 48372 }, { "epoch": 1.61, "grad_norm": 0.5399232506752014, "learning_rate": 0.0002657137482657025, "loss": 1.8864, "step": 48373 }, { "epoch": 1.61, "grad_norm": 0.5465124249458313, "learning_rate": 0.0002657033646552504, "loss": 1.7598, "step": 48374 }, { "epoch": 1.61, "grad_norm": 0.5310431718826294, "learning_rate": 0.0002656929810864295, "loss": 1.708, "step": 48375 }, { "epoch": 1.61, "grad_norm": 0.5189304351806641, "learning_rate": 0.00026568259755925213, "loss": 1.69, "step": 48376 }, { "epoch": 1.61, "grad_norm": 0.5168820023536682, "learning_rate": 0.00026567221407373107, "loss": 1.6518, "step": 48377 }, { "epoch": 1.61, "grad_norm": 0.5432471632957458, "learning_rate": 0.00026566183062987894, "loss": 1.7347, "step": 48378 }, { "epoch": 1.61, "grad_norm": 0.5306490659713745, "learning_rate": 0.00026565144722770826, "loss": 1.7332, "step": 48379 }, { "epoch": 1.61, "grad_norm": 0.5186184048652649, "learning_rate": 0.0002656410638672315, "loss": 1.6555, "step": 48380 }, { "epoch": 1.61, "grad_norm": 0.5361289978027344, "learning_rate": 0.0002656306805484616, "loss": 1.7159, "step": 48381 }, { "epoch": 1.61, "grad_norm": 0.5137982964515686, "learning_rate": 0.000265620297271411, "loss": 1.7481, "step": 48382 }, { "epoch": 1.61, "grad_norm": 0.5333437919616699, "learning_rate": 0.0002656099140360922, "loss": 1.7832, "step": 48383 }, { "epoch": 1.61, "grad_norm": 0.5504621267318726, "learning_rate": 0.00026559953084251794, "loss": 1.7534, "step": 48384 }, { "epoch": 1.61, "grad_norm": 0.5477246046066284, "learning_rate": 0.0002655891476907008, "loss": 1.6586, "step": 48385 }, { "epoch": 1.61, "grad_norm": 0.5136134624481201, "learning_rate": 0.0002655787645806534, "loss": 1.7184, "step": 48386 }, { "epoch": 1.61, "grad_norm": 0.5279668569564819, "learning_rate": 0.0002655683815123882, "loss": 1.7678, "step": 48387 }, { "epoch": 1.61, "grad_norm": 0.5420549511909485, "learning_rate": 0.00026555799848591806, "loss": 1.7509, "step": 48388 }, { "epoch": 1.61, "grad_norm": 0.543370246887207, "learning_rate": 0.00026554761550125525, "loss": 1.7429, "step": 48389 }, { "epoch": 1.61, "grad_norm": 0.5281623601913452, "learning_rate": 0.0002655372325584127, "loss": 1.7513, "step": 48390 }, { "epoch": 1.61, "grad_norm": 0.5594914555549622, "learning_rate": 0.00026552684965740274, "loss": 1.7558, "step": 48391 }, { "epoch": 1.61, "grad_norm": 0.5259193778038025, "learning_rate": 0.00026551646679823823, "loss": 1.7753, "step": 48392 }, { "epoch": 1.61, "grad_norm": 0.5237356424331665, "learning_rate": 0.0002655060839809316, "loss": 1.7961, "step": 48393 }, { "epoch": 1.61, "grad_norm": 0.5360481142997742, "learning_rate": 0.00026549570120549545, "loss": 1.7126, "step": 48394 }, { "epoch": 1.61, "grad_norm": 0.5528312921524048, "learning_rate": 0.00026548531847194265, "loss": 1.6403, "step": 48395 }, { "epoch": 1.61, "grad_norm": 0.5308994054794312, "learning_rate": 0.0002654749357802853, "loss": 1.7854, "step": 48396 }, { "epoch": 1.61, "grad_norm": 0.5191270709037781, "learning_rate": 0.00026546455313053647, "loss": 1.7818, "step": 48397 }, { "epoch": 1.61, "grad_norm": 0.5255034565925598, "learning_rate": 0.00026545417052270857, "loss": 1.6729, "step": 48398 }, { "epoch": 1.61, "grad_norm": 0.530362606048584, "learning_rate": 0.0002654437879568142, "loss": 1.8068, "step": 48399 }, { "epoch": 1.61, "grad_norm": 0.518079936504364, "learning_rate": 0.0002654334054328659, "loss": 1.7643, "step": 48400 }, { "epoch": 1.61, "grad_norm": 0.5531554222106934, "learning_rate": 0.0002654230229508763, "loss": 1.8033, "step": 48401 }, { "epoch": 1.61, "grad_norm": 0.5217167139053345, "learning_rate": 0.00026541264051085835, "loss": 1.7316, "step": 48402 }, { "epoch": 1.61, "grad_norm": 0.5283321142196655, "learning_rate": 0.00026540225811282406, "loss": 1.7644, "step": 48403 }, { "epoch": 1.61, "grad_norm": 0.5971522331237793, "learning_rate": 0.0002653918757567865, "loss": 1.7884, "step": 48404 }, { "epoch": 1.61, "grad_norm": 0.5434351563453674, "learning_rate": 0.0002653814934427581, "loss": 1.7777, "step": 48405 }, { "epoch": 1.61, "grad_norm": 0.5391384959220886, "learning_rate": 0.0002653711111707514, "loss": 1.8061, "step": 48406 }, { "epoch": 1.61, "grad_norm": 0.5273739695549011, "learning_rate": 0.00026536072894077907, "loss": 1.6774, "step": 48407 }, { "epoch": 1.61, "grad_norm": 0.5289618372917175, "learning_rate": 0.0002653503467528538, "loss": 1.7303, "step": 48408 }, { "epoch": 1.61, "grad_norm": 0.5340504050254822, "learning_rate": 0.000265339964606988, "loss": 1.7007, "step": 48409 }, { "epoch": 1.61, "grad_norm": 0.5275204181671143, "learning_rate": 0.00026532958250319435, "loss": 1.6909, "step": 48410 }, { "epoch": 1.61, "grad_norm": 0.5285821557044983, "learning_rate": 0.0002653192004414855, "loss": 1.709, "step": 48411 }, { "epoch": 1.61, "grad_norm": 0.521329402923584, "learning_rate": 0.0002653088184218741, "loss": 1.7103, "step": 48412 }, { "epoch": 1.61, "grad_norm": 0.5623168349266052, "learning_rate": 0.0002652984364443727, "loss": 1.8319, "step": 48413 }, { "epoch": 1.61, "grad_norm": 0.5497347116470337, "learning_rate": 0.0002652880545089938, "loss": 1.7888, "step": 48414 }, { "epoch": 1.61, "grad_norm": 0.5218170881271362, "learning_rate": 0.00026527767261575017, "loss": 1.6985, "step": 48415 }, { "epoch": 1.61, "grad_norm": 0.5336452126502991, "learning_rate": 0.0002652672907646543, "loss": 1.7218, "step": 48416 }, { "epoch": 1.61, "grad_norm": 0.5509163737297058, "learning_rate": 0.0002652569089557187, "loss": 1.745, "step": 48417 }, { "epoch": 1.61, "grad_norm": 0.5420930981636047, "learning_rate": 0.00026524652718895635, "loss": 1.679, "step": 48418 }, { "epoch": 1.61, "grad_norm": 0.5252350568771362, "learning_rate": 0.00026523614546437933, "loss": 1.7701, "step": 48419 }, { "epoch": 1.61, "grad_norm": 0.555216908454895, "learning_rate": 0.0002652257637820006, "loss": 1.6915, "step": 48420 }, { "epoch": 1.61, "grad_norm": 0.52998286485672, "learning_rate": 0.0002652153821418327, "loss": 1.6211, "step": 48421 }, { "epoch": 1.61, "grad_norm": 0.5415666103363037, "learning_rate": 0.00026520500054388825, "loss": 1.7035, "step": 48422 }, { "epoch": 1.61, "grad_norm": 0.6164860129356384, "learning_rate": 0.00026519461898817973, "loss": 1.7077, "step": 48423 }, { "epoch": 1.61, "grad_norm": 0.5484271049499512, "learning_rate": 0.00026518423747471975, "loss": 1.7363, "step": 48424 }, { "epoch": 1.61, "grad_norm": 0.5196980237960815, "learning_rate": 0.00026517385600352116, "loss": 1.679, "step": 48425 }, { "epoch": 1.61, "grad_norm": 0.5491450428962708, "learning_rate": 0.0002651634745745962, "loss": 1.7597, "step": 48426 }, { "epoch": 1.61, "grad_norm": 0.5585435628890991, "learning_rate": 0.00026515309318795774, "loss": 1.8526, "step": 48427 }, { "epoch": 1.61, "grad_norm": 0.5286967754364014, "learning_rate": 0.00026514271184361823, "loss": 1.7559, "step": 48428 }, { "epoch": 1.61, "grad_norm": 0.5326581001281738, "learning_rate": 0.0002651323305415904, "loss": 1.7253, "step": 48429 }, { "epoch": 1.61, "grad_norm": 0.5681881904602051, "learning_rate": 0.00026512194928188677, "loss": 1.6916, "step": 48430 }, { "epoch": 1.61, "grad_norm": 0.4970782399177551, "learning_rate": 0.00026511156806451985, "loss": 1.7323, "step": 48431 }, { "epoch": 1.61, "grad_norm": 1.0282421112060547, "learning_rate": 0.0002651011868895026, "loss": 1.7338, "step": 48432 }, { "epoch": 1.61, "grad_norm": 0.5265601277351379, "learning_rate": 0.0002650908057568471, "loss": 1.7786, "step": 48433 }, { "epoch": 1.61, "grad_norm": 0.5358497500419617, "learning_rate": 0.0002650804246665663, "loss": 1.7668, "step": 48434 }, { "epoch": 1.61, "grad_norm": 0.5298554301261902, "learning_rate": 0.0002650700436186728, "loss": 1.7399, "step": 48435 }, { "epoch": 1.61, "grad_norm": 0.5261245965957642, "learning_rate": 0.00026505966261317906, "loss": 1.763, "step": 48436 }, { "epoch": 1.61, "grad_norm": 0.540365993976593, "learning_rate": 0.0002650492816500977, "loss": 1.7629, "step": 48437 }, { "epoch": 1.61, "grad_norm": 0.5333473086357117, "learning_rate": 0.00026503890072944133, "loss": 1.7292, "step": 48438 }, { "epoch": 1.61, "grad_norm": 0.5301958322525024, "learning_rate": 0.0002650285198512227, "loss": 1.8027, "step": 48439 }, { "epoch": 1.61, "grad_norm": 0.5493376851081848, "learning_rate": 0.00026501813901545426, "loss": 1.7166, "step": 48440 }, { "epoch": 1.61, "grad_norm": 0.5527656078338623, "learning_rate": 0.00026500775822214863, "loss": 1.7599, "step": 48441 }, { "epoch": 1.61, "grad_norm": 0.5281077027320862, "learning_rate": 0.00026499737747131847, "loss": 1.7441, "step": 48442 }, { "epoch": 1.61, "grad_norm": 0.5222750306129456, "learning_rate": 0.0002649869967629763, "loss": 1.64, "step": 48443 }, { "epoch": 1.61, "grad_norm": 0.534985363483429, "learning_rate": 0.0002649766160971347, "loss": 1.8018, "step": 48444 }, { "epoch": 1.61, "grad_norm": 0.5309173464775085, "learning_rate": 0.00026496623547380647, "loss": 1.7138, "step": 48445 }, { "epoch": 1.61, "grad_norm": 0.5277440547943115, "learning_rate": 0.0002649558548930039, "loss": 1.6499, "step": 48446 }, { "epoch": 1.61, "grad_norm": 0.5470907688140869, "learning_rate": 0.00026494547435473983, "loss": 1.7043, "step": 48447 }, { "epoch": 1.61, "grad_norm": 0.5467696785926819, "learning_rate": 0.0002649350938590268, "loss": 1.812, "step": 48448 }, { "epoch": 1.61, "grad_norm": 0.5167481303215027, "learning_rate": 0.00026492471340587744, "loss": 1.7685, "step": 48449 }, { "epoch": 1.61, "grad_norm": 0.5398524403572083, "learning_rate": 0.0002649143329953043, "loss": 1.6869, "step": 48450 }, { "epoch": 1.61, "grad_norm": 0.5305525660514832, "learning_rate": 0.00026490395262731994, "loss": 1.783, "step": 48451 }, { "epoch": 1.61, "grad_norm": 0.5237852931022644, "learning_rate": 0.00026489357230193706, "loss": 1.7468, "step": 48452 }, { "epoch": 1.61, "grad_norm": 0.5366382002830505, "learning_rate": 0.0002648831920191681, "loss": 1.696, "step": 48453 }, { "epoch": 1.61, "grad_norm": 0.5391541123390198, "learning_rate": 0.0002648728117790258, "loss": 1.7969, "step": 48454 }, { "epoch": 1.61, "grad_norm": 0.5282782316207886, "learning_rate": 0.0002648624315815229, "loss": 1.7506, "step": 48455 }, { "epoch": 1.61, "grad_norm": 0.546174168586731, "learning_rate": 0.00026485205142667167, "loss": 1.7447, "step": 48456 }, { "epoch": 1.61, "grad_norm": 0.5345521569252014, "learning_rate": 0.0002648416713144849, "loss": 1.7735, "step": 48457 }, { "epoch": 1.61, "grad_norm": 0.5494077801704407, "learning_rate": 0.0002648312912449751, "loss": 1.7278, "step": 48458 }, { "epoch": 1.61, "grad_norm": 0.5382206439971924, "learning_rate": 0.0002648209112181551, "loss": 1.7695, "step": 48459 }, { "epoch": 1.61, "grad_norm": 0.5501284003257751, "learning_rate": 0.0002648105312340372, "loss": 1.7741, "step": 48460 }, { "epoch": 1.61, "grad_norm": 0.5240716338157654, "learning_rate": 0.00026480015129263417, "loss": 1.7729, "step": 48461 }, { "epoch": 1.61, "grad_norm": 0.5378606915473938, "learning_rate": 0.00026478977139395856, "loss": 1.705, "step": 48462 }, { "epoch": 1.61, "grad_norm": 0.5235183238983154, "learning_rate": 0.000264779391538023, "loss": 1.6892, "step": 48463 }, { "epoch": 1.61, "grad_norm": 0.5460441708564758, "learning_rate": 0.00026476901172484, "loss": 1.8154, "step": 48464 }, { "epoch": 1.61, "grad_norm": 0.5496273040771484, "learning_rate": 0.00026475863195442236, "loss": 1.8061, "step": 48465 }, { "epoch": 1.61, "grad_norm": 0.5378039479255676, "learning_rate": 0.00026474825222678246, "loss": 1.682, "step": 48466 }, { "epoch": 1.61, "grad_norm": 0.5548908710479736, "learning_rate": 0.00026473787254193294, "loss": 1.7691, "step": 48467 }, { "epoch": 1.61, "grad_norm": 0.5156210660934448, "learning_rate": 0.00026472749289988646, "loss": 1.858, "step": 48468 }, { "epoch": 1.61, "grad_norm": 0.5424847602844238, "learning_rate": 0.00026471711330065573, "loss": 1.7776, "step": 48469 }, { "epoch": 1.61, "grad_norm": 0.5591382384300232, "learning_rate": 0.00026470673374425315, "loss": 1.7559, "step": 48470 }, { "epoch": 1.61, "grad_norm": 0.5502294898033142, "learning_rate": 0.00026469635423069136, "loss": 1.7473, "step": 48471 }, { "epoch": 1.61, "grad_norm": 0.5506361722946167, "learning_rate": 0.0002646859747599831, "loss": 1.8052, "step": 48472 }, { "epoch": 1.61, "grad_norm": 0.5232120156288147, "learning_rate": 0.00026467559533214076, "loss": 1.7324, "step": 48473 }, { "epoch": 1.61, "grad_norm": 1.0703980922698975, "learning_rate": 0.000264665215947177, "loss": 1.7743, "step": 48474 }, { "epoch": 1.61, "grad_norm": 0.5517598986625671, "learning_rate": 0.0002646548366051046, "loss": 1.7193, "step": 48475 }, { "epoch": 1.61, "grad_norm": 0.5461022853851318, "learning_rate": 0.0002646444573059359, "loss": 1.7069, "step": 48476 }, { "epoch": 1.61, "grad_norm": 0.5312787294387817, "learning_rate": 0.00026463407804968366, "loss": 1.8067, "step": 48477 }, { "epoch": 1.61, "grad_norm": 0.5232335329055786, "learning_rate": 0.00026462369883636043, "loss": 1.8296, "step": 48478 }, { "epoch": 1.61, "grad_norm": 0.5309826135635376, "learning_rate": 0.00026461331966597893, "loss": 1.7348, "step": 48479 }, { "epoch": 1.61, "grad_norm": 0.5352795124053955, "learning_rate": 0.0002646029405385515, "loss": 1.736, "step": 48480 }, { "epoch": 1.61, "grad_norm": 0.5533869862556458, "learning_rate": 0.0002645925614540909, "loss": 1.8238, "step": 48481 }, { "epoch": 1.61, "grad_norm": 0.5304749011993408, "learning_rate": 0.0002645821824126099, "loss": 1.7708, "step": 48482 }, { "epoch": 1.61, "grad_norm": 0.5256710648536682, "learning_rate": 0.00026457180341412067, "loss": 1.7611, "step": 48483 }, { "epoch": 1.61, "grad_norm": 0.551182210445404, "learning_rate": 0.0002645614244586361, "loss": 1.7494, "step": 48484 }, { "epoch": 1.61, "grad_norm": 0.5198783278465271, "learning_rate": 0.00026455104554616886, "loss": 1.7586, "step": 48485 }, { "epoch": 1.61, "grad_norm": 0.5139663815498352, "learning_rate": 0.00026454066667673136, "loss": 1.7453, "step": 48486 }, { "epoch": 1.61, "grad_norm": 0.539035439491272, "learning_rate": 0.0002645302878503362, "loss": 1.6952, "step": 48487 }, { "epoch": 1.61, "grad_norm": 0.5241310596466064, "learning_rate": 0.000264519909066996, "loss": 1.7968, "step": 48488 }, { "epoch": 1.61, "grad_norm": 0.5668556690216064, "learning_rate": 0.0002645095303267237, "loss": 1.8346, "step": 48489 }, { "epoch": 1.61, "grad_norm": 0.5396491885185242, "learning_rate": 0.00026449915162953127, "loss": 1.7671, "step": 48490 }, { "epoch": 1.61, "grad_norm": 0.5334023237228394, "learning_rate": 0.0002644887729754318, "loss": 1.7264, "step": 48491 }, { "epoch": 1.61, "grad_norm": 0.5406315326690674, "learning_rate": 0.00026447839436443776, "loss": 1.7835, "step": 48492 }, { "epoch": 1.61, "grad_norm": 0.5310226082801819, "learning_rate": 0.0002644680157965616, "loss": 1.735, "step": 48493 }, { "epoch": 1.61, "grad_norm": 0.528372049331665, "learning_rate": 0.0002644576372718161, "loss": 1.8002, "step": 48494 }, { "epoch": 1.61, "grad_norm": 0.5098524689674377, "learning_rate": 0.0002644472587902138, "loss": 1.7157, "step": 48495 }, { "epoch": 1.61, "grad_norm": 0.5439126491546631, "learning_rate": 0.0002644368803517673, "loss": 1.7611, "step": 48496 }, { "epoch": 1.61, "grad_norm": 0.5179252624511719, "learning_rate": 0.0002644265019564891, "loss": 1.6669, "step": 48497 }, { "epoch": 1.61, "grad_norm": 0.5537827014923096, "learning_rate": 0.0002644161236043919, "loss": 1.8074, "step": 48498 }, { "epoch": 1.61, "grad_norm": 0.5224663615226746, "learning_rate": 0.0002644057452954884, "loss": 1.7037, "step": 48499 }, { "epoch": 1.61, "grad_norm": 0.5288441181182861, "learning_rate": 0.000264395367029791, "loss": 1.7111, "step": 48500 }, { "epoch": 1.61, "grad_norm": 0.5672640800476074, "learning_rate": 0.00026438498880731234, "loss": 1.763, "step": 48501 }, { "epoch": 1.61, "grad_norm": 0.5329458117485046, "learning_rate": 0.0002643746106280652, "loss": 1.7884, "step": 48502 }, { "epoch": 1.61, "grad_norm": 0.5198644399642944, "learning_rate": 0.0002643642324920619, "loss": 1.7586, "step": 48503 }, { "epoch": 1.61, "grad_norm": 0.5125144124031067, "learning_rate": 0.0002643538543993152, "loss": 1.8014, "step": 48504 }, { "epoch": 1.61, "grad_norm": 0.5323413610458374, "learning_rate": 0.0002643434763498377, "loss": 1.776, "step": 48505 }, { "epoch": 1.61, "grad_norm": 0.5276159644126892, "learning_rate": 0.0002643330983436419, "loss": 1.8054, "step": 48506 }, { "epoch": 1.61, "grad_norm": 0.5327860713005066, "learning_rate": 0.00026432272038074046, "loss": 1.7499, "step": 48507 }, { "epoch": 1.61, "grad_norm": 0.5248603224754333, "learning_rate": 0.00026431234246114603, "loss": 1.7048, "step": 48508 }, { "epoch": 1.61, "grad_norm": 0.5336995124816895, "learning_rate": 0.0002643019645848712, "loss": 1.7322, "step": 48509 }, { "epoch": 1.61, "grad_norm": 0.5229314565658569, "learning_rate": 0.0002642915867519285, "loss": 1.7181, "step": 48510 }, { "epoch": 1.61, "grad_norm": 0.5247921347618103, "learning_rate": 0.0002642812089623304, "loss": 1.7412, "step": 48511 }, { "epoch": 1.61, "grad_norm": 0.5571459531784058, "learning_rate": 0.0002642708312160899, "loss": 1.7682, "step": 48512 }, { "epoch": 1.61, "grad_norm": 0.5505213141441345, "learning_rate": 0.0002642604535132192, "loss": 1.7157, "step": 48513 }, { "epoch": 1.61, "grad_norm": 0.5543964505195618, "learning_rate": 0.00026425007585373106, "loss": 1.7818, "step": 48514 }, { "epoch": 1.61, "grad_norm": 0.5490931868553162, "learning_rate": 0.0002642396982376381, "loss": 1.7456, "step": 48515 }, { "epoch": 1.61, "grad_norm": 0.5414848327636719, "learning_rate": 0.00026422932066495286, "loss": 1.7684, "step": 48516 }, { "epoch": 1.61, "grad_norm": 0.5376243591308594, "learning_rate": 0.0002642189431356879, "loss": 1.7414, "step": 48517 }, { "epoch": 1.61, "grad_norm": 0.53611820936203, "learning_rate": 0.0002642085656498559, "loss": 1.7625, "step": 48518 }, { "epoch": 1.61, "grad_norm": 0.5311840176582336, "learning_rate": 0.0002641981882074695, "loss": 1.6911, "step": 48519 }, { "epoch": 1.61, "grad_norm": 0.5241869688034058, "learning_rate": 0.00026418781080854106, "loss": 1.6985, "step": 48520 }, { "epoch": 1.61, "grad_norm": 0.5241371989250183, "learning_rate": 0.00026417743345308347, "loss": 1.7897, "step": 48521 }, { "epoch": 1.61, "grad_norm": 0.5351036787033081, "learning_rate": 0.0002641670561411092, "loss": 1.8103, "step": 48522 }, { "epoch": 1.61, "grad_norm": 0.5409956574440002, "learning_rate": 0.00026415667887263084, "loss": 1.8316, "step": 48523 }, { "epoch": 1.61, "grad_norm": 0.524003803730011, "learning_rate": 0.00026414630164766094, "loss": 1.7624, "step": 48524 }, { "epoch": 1.61, "grad_norm": 0.5366364121437073, "learning_rate": 0.0002641359244662122, "loss": 1.775, "step": 48525 }, { "epoch": 1.61, "grad_norm": 0.5230324864387512, "learning_rate": 0.00026412554732829714, "loss": 1.7776, "step": 48526 }, { "epoch": 1.61, "grad_norm": 0.5270100831985474, "learning_rate": 0.00026411517023392834, "loss": 1.8092, "step": 48527 }, { "epoch": 1.61, "grad_norm": 0.5391936302185059, "learning_rate": 0.00026410479318311845, "loss": 1.8303, "step": 48528 }, { "epoch": 1.61, "grad_norm": 0.5344693660736084, "learning_rate": 0.00026409441617588014, "loss": 1.7507, "step": 48529 }, { "epoch": 1.61, "grad_norm": 0.5600454807281494, "learning_rate": 0.00026408403921222585, "loss": 1.7046, "step": 48530 }, { "epoch": 1.61, "grad_norm": 1.041810154914856, "learning_rate": 0.0002640736622921682, "loss": 1.7751, "step": 48531 }, { "epoch": 1.61, "grad_norm": 0.5413034558296204, "learning_rate": 0.00026406328541571996, "loss": 1.7175, "step": 48532 }, { "epoch": 1.61, "grad_norm": 0.5409988760948181, "learning_rate": 0.00026405290858289346, "loss": 1.784, "step": 48533 }, { "epoch": 1.61, "grad_norm": 0.5343325138092041, "learning_rate": 0.00026404253179370145, "loss": 1.6837, "step": 48534 }, { "epoch": 1.61, "grad_norm": 0.5413280129432678, "learning_rate": 0.00026403215504815664, "loss": 1.7445, "step": 48535 }, { "epoch": 1.61, "grad_norm": 0.5444694757461548, "learning_rate": 0.00026402177834627136, "loss": 1.7699, "step": 48536 }, { "epoch": 1.61, "grad_norm": 0.5373826026916504, "learning_rate": 0.0002640114016880584, "loss": 1.7759, "step": 48537 }, { "epoch": 1.61, "grad_norm": 0.5404611229896545, "learning_rate": 0.0002640010250735302, "loss": 1.762, "step": 48538 }, { "epoch": 1.61, "grad_norm": 0.522758424282074, "learning_rate": 0.0002639906485026996, "loss": 1.7436, "step": 48539 }, { "epoch": 1.61, "grad_norm": 0.5359678864479065, "learning_rate": 0.00026398027197557897, "loss": 1.6837, "step": 48540 }, { "epoch": 1.61, "grad_norm": 0.5450878739356995, "learning_rate": 0.00026396989549218086, "loss": 1.708, "step": 48541 }, { "epoch": 1.61, "grad_norm": 0.5472971796989441, "learning_rate": 0.0002639595190525183, "loss": 1.76, "step": 48542 }, { "epoch": 1.62, "grad_norm": 0.5561373829841614, "learning_rate": 0.00026394914265660325, "loss": 1.8152, "step": 48543 }, { "epoch": 1.62, "grad_norm": 0.5394043326377869, "learning_rate": 0.0002639387663044488, "loss": 1.7778, "step": 48544 }, { "epoch": 1.62, "grad_norm": 0.5236918926239014, "learning_rate": 0.0002639283899960673, "loss": 1.7089, "step": 48545 }, { "epoch": 1.62, "grad_norm": 0.5494449734687805, "learning_rate": 0.00026391801373147156, "loss": 1.6784, "step": 48546 }, { "epoch": 1.62, "grad_norm": 0.5416194796562195, "learning_rate": 0.0002639076375106739, "loss": 1.7244, "step": 48547 }, { "epoch": 1.62, "grad_norm": 0.5382938981056213, "learning_rate": 0.00026389726133368703, "loss": 1.6596, "step": 48548 }, { "epoch": 1.62, "grad_norm": 0.5367181301116943, "learning_rate": 0.0002638868852005236, "loss": 1.7693, "step": 48549 }, { "epoch": 1.62, "grad_norm": 0.5414093136787415, "learning_rate": 0.00026387650911119625, "loss": 1.763, "step": 48550 }, { "epoch": 1.62, "grad_norm": 0.551808774471283, "learning_rate": 0.00026386613306571743, "loss": 1.767, "step": 48551 }, { "epoch": 1.62, "grad_norm": 0.5460332036018372, "learning_rate": 0.00026385575706409985, "loss": 1.7391, "step": 48552 }, { "epoch": 1.62, "grad_norm": 0.5508157014846802, "learning_rate": 0.000263845381106356, "loss": 1.7249, "step": 48553 }, { "epoch": 1.62, "grad_norm": 0.5386752486228943, "learning_rate": 0.00026383500519249853, "loss": 1.7432, "step": 48554 }, { "epoch": 1.62, "grad_norm": 0.5563032031059265, "learning_rate": 0.00026382462932253995, "loss": 1.7919, "step": 48555 }, { "epoch": 1.62, "grad_norm": 0.5496903657913208, "learning_rate": 0.00026381425349649313, "loss": 1.7806, "step": 48556 }, { "epoch": 1.62, "grad_norm": 0.5773911476135254, "learning_rate": 0.0002638038777143703, "loss": 1.8727, "step": 48557 }, { "epoch": 1.62, "grad_norm": 0.5246707797050476, "learning_rate": 0.0002637935019761843, "loss": 1.7631, "step": 48558 }, { "epoch": 1.62, "grad_norm": 0.542236864566803, "learning_rate": 0.00026378312628194775, "loss": 1.7976, "step": 48559 }, { "epoch": 1.62, "grad_norm": 0.5771588087081909, "learning_rate": 0.0002637727506316731, "loss": 1.7319, "step": 48560 }, { "epoch": 1.62, "grad_norm": 0.5854530930519104, "learning_rate": 0.0002637623750253729, "loss": 1.7287, "step": 48561 }, { "epoch": 1.62, "grad_norm": 0.5508065819740295, "learning_rate": 0.00026375199946305995, "loss": 1.6961, "step": 48562 }, { "epoch": 1.62, "grad_norm": 0.5997686982154846, "learning_rate": 0.0002637416239447466, "loss": 1.8106, "step": 48563 }, { "epoch": 1.62, "grad_norm": 0.5344050526618958, "learning_rate": 0.0002637312484704457, "loss": 1.7858, "step": 48564 }, { "epoch": 1.62, "grad_norm": 0.556675910949707, "learning_rate": 0.0002637208730401696, "loss": 1.7661, "step": 48565 }, { "epoch": 1.62, "grad_norm": 0.530792772769928, "learning_rate": 0.0002637104976539312, "loss": 1.79, "step": 48566 }, { "epoch": 1.62, "grad_norm": 0.5421188473701477, "learning_rate": 0.0002637001223117427, "loss": 1.7135, "step": 48567 }, { "epoch": 1.62, "grad_norm": 0.5148066878318787, "learning_rate": 0.00026368974701361704, "loss": 1.7174, "step": 48568 }, { "epoch": 1.62, "grad_norm": 0.5367986559867859, "learning_rate": 0.00026367937175956666, "loss": 1.7961, "step": 48569 }, { "epoch": 1.62, "grad_norm": 0.5677838921546936, "learning_rate": 0.0002636689965496041, "loss": 1.7017, "step": 48570 }, { "epoch": 1.62, "grad_norm": 0.5588857531547546, "learning_rate": 0.00026365862138374204, "loss": 1.803, "step": 48571 }, { "epoch": 1.62, "grad_norm": 0.5373274683952332, "learning_rate": 0.00026364824626199317, "loss": 1.7717, "step": 48572 }, { "epoch": 1.62, "grad_norm": 0.5509748458862305, "learning_rate": 0.00026363787118436987, "loss": 1.8238, "step": 48573 }, { "epoch": 1.62, "grad_norm": 0.5398619174957275, "learning_rate": 0.00026362749615088486, "loss": 1.7038, "step": 48574 }, { "epoch": 1.62, "grad_norm": 0.5526056885719299, "learning_rate": 0.00026361712116155063, "loss": 1.7983, "step": 48575 }, { "epoch": 1.62, "grad_norm": 0.5340983271598816, "learning_rate": 0.00026360674621638, "loss": 1.7323, "step": 48576 }, { "epoch": 1.62, "grad_norm": 0.5391905903816223, "learning_rate": 0.00026359637131538534, "loss": 1.6877, "step": 48577 }, { "epoch": 1.62, "grad_norm": 0.5450388193130493, "learning_rate": 0.0002635859964585793, "loss": 1.745, "step": 48578 }, { "epoch": 1.62, "grad_norm": 0.5388544797897339, "learning_rate": 0.0002635756216459746, "loss": 1.7617, "step": 48579 }, { "epoch": 1.62, "grad_norm": 0.5251782536506653, "learning_rate": 0.00026356524687758364, "loss": 1.7912, "step": 48580 }, { "epoch": 1.62, "grad_norm": 0.5298420190811157, "learning_rate": 0.0002635548721534191, "loss": 1.7127, "step": 48581 }, { "epoch": 1.62, "grad_norm": 0.5362512469291687, "learning_rate": 0.00026354449747349366, "loss": 1.7715, "step": 48582 }, { "epoch": 1.62, "grad_norm": 0.581386923789978, "learning_rate": 0.0002635341228378197, "loss": 1.7304, "step": 48583 }, { "epoch": 1.62, "grad_norm": 0.5446288585662842, "learning_rate": 0.00026352374824640997, "loss": 1.7562, "step": 48584 }, { "epoch": 1.62, "grad_norm": 0.5449011921882629, "learning_rate": 0.0002635133736992771, "loss": 1.6815, "step": 48585 }, { "epoch": 1.62, "grad_norm": 0.5427494049072266, "learning_rate": 0.0002635029991964336, "loss": 1.7115, "step": 48586 }, { "epoch": 1.62, "grad_norm": 0.5572260618209839, "learning_rate": 0.00026349262473789205, "loss": 1.8291, "step": 48587 }, { "epoch": 1.62, "grad_norm": 0.5155461430549622, "learning_rate": 0.0002634822503236651, "loss": 1.7516, "step": 48588 }, { "epoch": 1.62, "grad_norm": 0.5429002642631531, "learning_rate": 0.00026347187595376535, "loss": 1.7531, "step": 48589 }, { "epoch": 1.62, "grad_norm": 0.5251010060310364, "learning_rate": 0.0002634615016282053, "loss": 1.7045, "step": 48590 }, { "epoch": 1.62, "grad_norm": 0.5315492749214172, "learning_rate": 0.0002634511273469976, "loss": 1.7937, "step": 48591 }, { "epoch": 1.62, "grad_norm": 0.5415001511573792, "learning_rate": 0.00026344075311015495, "loss": 1.7217, "step": 48592 }, { "epoch": 1.62, "grad_norm": 0.5398076772689819, "learning_rate": 0.0002634303789176897, "loss": 1.774, "step": 48593 }, { "epoch": 1.62, "grad_norm": 0.5578115582466125, "learning_rate": 0.00026342000476961466, "loss": 1.6608, "step": 48594 }, { "epoch": 1.62, "grad_norm": 0.5292214155197144, "learning_rate": 0.0002634096306659423, "loss": 1.7071, "step": 48595 }, { "epoch": 1.62, "grad_norm": 0.5144553184509277, "learning_rate": 0.0002633992566066853, "loss": 1.7445, "step": 48596 }, { "epoch": 1.62, "grad_norm": 0.5410905480384827, "learning_rate": 0.0002633888825918562, "loss": 1.7661, "step": 48597 }, { "epoch": 1.62, "grad_norm": 0.5399067401885986, "learning_rate": 0.0002633785086214675, "loss": 1.8012, "step": 48598 }, { "epoch": 1.62, "grad_norm": 0.5481894612312317, "learning_rate": 0.0002633681346955321, "loss": 1.8095, "step": 48599 }, { "epoch": 1.62, "grad_norm": 0.5321758985519409, "learning_rate": 0.00026335776081406223, "loss": 1.7264, "step": 48600 }, { "epoch": 1.62, "grad_norm": 0.5253651142120361, "learning_rate": 0.0002633473869770707, "loss": 1.7631, "step": 48601 }, { "epoch": 1.62, "grad_norm": 0.544169545173645, "learning_rate": 0.00026333701318457007, "loss": 1.7345, "step": 48602 }, { "epoch": 1.62, "grad_norm": 0.5169833302497864, "learning_rate": 0.00026332663943657284, "loss": 1.7054, "step": 48603 }, { "epoch": 1.62, "grad_norm": 0.529482901096344, "learning_rate": 0.0002633162657330917, "loss": 1.747, "step": 48604 }, { "epoch": 1.62, "grad_norm": 0.556826651096344, "learning_rate": 0.0002633058920741391, "loss": 1.8004, "step": 48605 }, { "epoch": 1.62, "grad_norm": 0.550487220287323, "learning_rate": 0.000263295518459728, "loss": 1.7683, "step": 48606 }, { "epoch": 1.62, "grad_norm": 0.5414158701896667, "learning_rate": 0.0002632851448898705, "loss": 1.749, "step": 48607 }, { "epoch": 1.62, "grad_norm": 0.5281496047973633, "learning_rate": 0.00026327477136457944, "loss": 1.7879, "step": 48608 }, { "epoch": 1.62, "grad_norm": 0.544657826423645, "learning_rate": 0.0002632643978838675, "loss": 1.7935, "step": 48609 }, { "epoch": 1.62, "grad_norm": 0.5299921035766602, "learning_rate": 0.0002632540244477471, "loss": 1.8092, "step": 48610 }, { "epoch": 1.62, "grad_norm": 0.5412049889564514, "learning_rate": 0.0002632436510562309, "loss": 1.7847, "step": 48611 }, { "epoch": 1.62, "grad_norm": 0.572546660900116, "learning_rate": 0.0002632332777093316, "loss": 1.7328, "step": 48612 }, { "epoch": 1.62, "grad_norm": 0.5459584593772888, "learning_rate": 0.0002632229044070616, "loss": 1.7507, "step": 48613 }, { "epoch": 1.62, "grad_norm": 0.5376312136650085, "learning_rate": 0.0002632125311494335, "loss": 1.7316, "step": 48614 }, { "epoch": 1.62, "grad_norm": 0.5260179042816162, "learning_rate": 0.00026320215793646, "loss": 1.7907, "step": 48615 }, { "epoch": 1.62, "grad_norm": 0.5341795086860657, "learning_rate": 0.00026319178476815375, "loss": 1.7482, "step": 48616 }, { "epoch": 1.62, "grad_norm": 0.5346525311470032, "learning_rate": 0.0002631814116445272, "loss": 1.7633, "step": 48617 }, { "epoch": 1.62, "grad_norm": 0.5454130172729492, "learning_rate": 0.00026317103856559297, "loss": 1.7358, "step": 48618 }, { "epoch": 1.62, "grad_norm": 0.5375067591667175, "learning_rate": 0.0002631606655313637, "loss": 1.7285, "step": 48619 }, { "epoch": 1.62, "grad_norm": 0.5331593751907349, "learning_rate": 0.00026315029254185193, "loss": 1.6556, "step": 48620 }, { "epoch": 1.62, "grad_norm": 0.5439176559448242, "learning_rate": 0.0002631399195970702, "loss": 1.7613, "step": 48621 }, { "epoch": 1.62, "grad_norm": 0.5397302508354187, "learning_rate": 0.0002631295466970314, "loss": 1.6952, "step": 48622 }, { "epoch": 1.62, "grad_norm": 0.5346928238868713, "learning_rate": 0.0002631191738417477, "loss": 1.7249, "step": 48623 }, { "epoch": 1.62, "grad_norm": 0.5559539198875427, "learning_rate": 0.0002631088010312319, "loss": 1.7776, "step": 48624 }, { "epoch": 1.62, "grad_norm": 0.5292665362358093, "learning_rate": 0.00026309842826549665, "loss": 1.7515, "step": 48625 }, { "epoch": 1.62, "grad_norm": 0.5357630252838135, "learning_rate": 0.0002630880555445545, "loss": 1.7718, "step": 48626 }, { "epoch": 1.62, "grad_norm": 0.5394555330276489, "learning_rate": 0.00026307768286841797, "loss": 1.8024, "step": 48627 }, { "epoch": 1.62, "grad_norm": 0.5336886644363403, "learning_rate": 0.0002630673102370996, "loss": 1.7188, "step": 48628 }, { "epoch": 1.62, "grad_norm": 0.5239052176475525, "learning_rate": 0.00026305693765061233, "loss": 1.787, "step": 48629 }, { "epoch": 1.62, "grad_norm": 0.5379868149757385, "learning_rate": 0.0002630465651089682, "loss": 1.7154, "step": 48630 }, { "epoch": 1.62, "grad_norm": 0.5368053913116455, "learning_rate": 0.00026303619261218024, "loss": 1.7278, "step": 48631 }, { "epoch": 1.62, "grad_norm": 0.5360241532325745, "learning_rate": 0.00026302582016026095, "loss": 1.6743, "step": 48632 }, { "epoch": 1.62, "grad_norm": 0.5383316278457642, "learning_rate": 0.0002630154477532227, "loss": 1.802, "step": 48633 }, { "epoch": 1.62, "grad_norm": 0.558718740940094, "learning_rate": 0.0002630050753910784, "loss": 1.862, "step": 48634 }, { "epoch": 1.62, "grad_norm": 0.5111987590789795, "learning_rate": 0.0002629947030738403, "loss": 1.7358, "step": 48635 }, { "epoch": 1.62, "grad_norm": 0.5515730381011963, "learning_rate": 0.00026298433080152144, "loss": 1.7538, "step": 48636 }, { "epoch": 1.62, "grad_norm": 0.5335015058517456, "learning_rate": 0.000262973958574134, "loss": 1.738, "step": 48637 }, { "epoch": 1.62, "grad_norm": 0.5474407076835632, "learning_rate": 0.00026296358639169064, "loss": 1.7683, "step": 48638 }, { "epoch": 1.62, "grad_norm": 0.5414120554924011, "learning_rate": 0.0002629532142542042, "loss": 1.8222, "step": 48639 }, { "epoch": 1.62, "grad_norm": 0.5579060912132263, "learning_rate": 0.000262942842161687, "loss": 1.7774, "step": 48640 }, { "epoch": 1.62, "grad_norm": 0.5308550000190735, "learning_rate": 0.00026293247011415177, "loss": 1.7527, "step": 48641 }, { "epoch": 1.62, "grad_norm": 0.5508776903152466, "learning_rate": 0.000262922098111611, "loss": 1.7586, "step": 48642 }, { "epoch": 1.62, "grad_norm": 0.5384984016418457, "learning_rate": 0.0002629117261540774, "loss": 1.7525, "step": 48643 }, { "epoch": 1.62, "grad_norm": 0.5339600443840027, "learning_rate": 0.00026290135424156345, "loss": 1.7867, "step": 48644 }, { "epoch": 1.62, "grad_norm": 0.5274344086647034, "learning_rate": 0.00026289098237408177, "loss": 1.7214, "step": 48645 }, { "epoch": 1.62, "grad_norm": 0.5230975151062012, "learning_rate": 0.00026288061055164505, "loss": 1.7194, "step": 48646 }, { "epoch": 1.62, "grad_norm": 0.5393137335777283, "learning_rate": 0.0002628702387742658, "loss": 1.7743, "step": 48647 }, { "epoch": 1.62, "grad_norm": 0.5550755262374878, "learning_rate": 0.00026285986704195657, "loss": 1.7541, "step": 48648 }, { "epoch": 1.62, "grad_norm": 0.5655659437179565, "learning_rate": 0.00026284949535473006, "loss": 1.7987, "step": 48649 }, { "epoch": 1.62, "grad_norm": 0.5649690628051758, "learning_rate": 0.0002628391237125987, "loss": 1.7213, "step": 48650 }, { "epoch": 1.62, "grad_norm": 0.5594232678413391, "learning_rate": 0.0002628287521155751, "loss": 1.8297, "step": 48651 }, { "epoch": 1.62, "grad_norm": 0.5339585542678833, "learning_rate": 0.00026281838056367216, "loss": 1.7506, "step": 48652 }, { "epoch": 1.62, "grad_norm": 0.5170247554779053, "learning_rate": 0.000262808009056902, "loss": 1.8399, "step": 48653 }, { "epoch": 1.62, "grad_norm": 0.5470843315124512, "learning_rate": 0.0002627976375952775, "loss": 1.6992, "step": 48654 }, { "epoch": 1.62, "grad_norm": 0.5675803422927856, "learning_rate": 0.0002627872661788112, "loss": 1.7835, "step": 48655 }, { "epoch": 1.62, "grad_norm": 0.5506885647773743, "learning_rate": 0.00026277689480751573, "loss": 1.7833, "step": 48656 }, { "epoch": 1.62, "grad_norm": 0.5764473676681519, "learning_rate": 0.00026276652348140357, "loss": 1.6633, "step": 48657 }, { "epoch": 1.62, "grad_norm": 0.5077370405197144, "learning_rate": 0.00026275615220048724, "loss": 1.757, "step": 48658 }, { "epoch": 1.62, "grad_norm": 0.5293810367584229, "learning_rate": 0.0002627457809647797, "loss": 1.7039, "step": 48659 }, { "epoch": 1.62, "grad_norm": 0.5454123020172119, "learning_rate": 0.0002627354097742931, "loss": 1.782, "step": 48660 }, { "epoch": 1.62, "grad_norm": 0.5342967510223389, "learning_rate": 0.0002627250386290403, "loss": 1.7675, "step": 48661 }, { "epoch": 1.62, "grad_norm": 0.5446010828018188, "learning_rate": 0.0002627146675290339, "loss": 1.7699, "step": 48662 }, { "epoch": 1.62, "grad_norm": 0.5529269576072693, "learning_rate": 0.00026270429647428625, "loss": 1.8104, "step": 48663 }, { "epoch": 1.62, "grad_norm": 0.5356332063674927, "learning_rate": 0.00026269392546481013, "loss": 1.7172, "step": 48664 }, { "epoch": 1.62, "grad_norm": 0.5200207233428955, "learning_rate": 0.00026268355450061805, "loss": 1.7077, "step": 48665 }, { "epoch": 1.62, "grad_norm": 0.5350154042243958, "learning_rate": 0.00026267318358172274, "loss": 1.7281, "step": 48666 }, { "epoch": 1.62, "grad_norm": 3.0722451210021973, "learning_rate": 0.0002626628127081366, "loss": 1.7916, "step": 48667 }, { "epoch": 1.62, "grad_norm": 0.5342901349067688, "learning_rate": 0.0002626524418798723, "loss": 1.7325, "step": 48668 }, { "epoch": 1.62, "grad_norm": 0.5403555035591125, "learning_rate": 0.00026264207109694257, "loss": 1.8028, "step": 48669 }, { "epoch": 1.62, "grad_norm": 0.5501351952552795, "learning_rate": 0.00026263170035935974, "loss": 1.7201, "step": 48670 }, { "epoch": 1.62, "grad_norm": 0.5068190693855286, "learning_rate": 0.0002626213296671365, "loss": 1.753, "step": 48671 }, { "epoch": 1.62, "grad_norm": 0.5347842574119568, "learning_rate": 0.0002626109590202855, "loss": 1.648, "step": 48672 }, { "epoch": 1.62, "grad_norm": 0.5491775274276733, "learning_rate": 0.0002626005884188193, "loss": 1.703, "step": 48673 }, { "epoch": 1.62, "grad_norm": 0.5359936952590942, "learning_rate": 0.00026259021786275046, "loss": 1.724, "step": 48674 }, { "epoch": 1.62, "grad_norm": 0.5159738063812256, "learning_rate": 0.0002625798473520916, "loss": 1.6593, "step": 48675 }, { "epoch": 1.62, "grad_norm": 0.5303804278373718, "learning_rate": 0.0002625694768868553, "loss": 1.7467, "step": 48676 }, { "epoch": 1.62, "grad_norm": 0.5682249069213867, "learning_rate": 0.0002625591064670541, "loss": 1.8071, "step": 48677 }, { "epoch": 1.62, "grad_norm": 0.5214309096336365, "learning_rate": 0.0002625487360927006, "loss": 1.7137, "step": 48678 }, { "epoch": 1.62, "grad_norm": 0.544908881187439, "learning_rate": 0.0002625383657638075, "loss": 1.6816, "step": 48679 }, { "epoch": 1.62, "grad_norm": 0.5387484431266785, "learning_rate": 0.0002625279954803872, "loss": 1.6739, "step": 48680 }, { "epoch": 1.62, "grad_norm": 0.5437459945678711, "learning_rate": 0.0002625176252424525, "loss": 1.7432, "step": 48681 }, { "epoch": 1.62, "grad_norm": 0.528572142124176, "learning_rate": 0.0002625072550500158, "loss": 1.7241, "step": 48682 }, { "epoch": 1.62, "grad_norm": 0.5286815762519836, "learning_rate": 0.0002624968849030899, "loss": 1.795, "step": 48683 }, { "epoch": 1.62, "grad_norm": 0.5863761901855469, "learning_rate": 0.00026248651480168715, "loss": 1.8155, "step": 48684 }, { "epoch": 1.62, "grad_norm": 0.5443456172943115, "learning_rate": 0.00026247614474582024, "loss": 1.744, "step": 48685 }, { "epoch": 1.62, "grad_norm": 0.546302080154419, "learning_rate": 0.00026246577473550185, "loss": 1.7847, "step": 48686 }, { "epoch": 1.62, "grad_norm": 0.5508676171302795, "learning_rate": 0.0002624554047707443, "loss": 1.7853, "step": 48687 }, { "epoch": 1.62, "grad_norm": 0.5240235328674316, "learning_rate": 0.0002624450348515605, "loss": 1.7967, "step": 48688 }, { "epoch": 1.62, "grad_norm": 0.5548256039619446, "learning_rate": 0.00026243466497796286, "loss": 1.7666, "step": 48689 }, { "epoch": 1.62, "grad_norm": 0.5647175908088684, "learning_rate": 0.00026242429514996396, "loss": 1.7885, "step": 48690 }, { "epoch": 1.62, "grad_norm": 0.5457278490066528, "learning_rate": 0.0002624139253675765, "loss": 1.7287, "step": 48691 }, { "epoch": 1.62, "grad_norm": 0.5380904078483582, "learning_rate": 0.00026240355563081287, "loss": 1.8148, "step": 48692 }, { "epoch": 1.62, "grad_norm": 0.5389641523361206, "learning_rate": 0.000262393185939686, "loss": 1.7389, "step": 48693 }, { "epoch": 1.62, "grad_norm": 0.5268386006355286, "learning_rate": 0.0002623828162942081, "loss": 1.7025, "step": 48694 }, { "epoch": 1.62, "grad_norm": 0.552795946598053, "learning_rate": 0.0002623724466943919, "loss": 1.7289, "step": 48695 }, { "epoch": 1.62, "grad_norm": 0.5704202055931091, "learning_rate": 0.0002623620771402501, "loss": 1.6829, "step": 48696 }, { "epoch": 1.62, "grad_norm": 0.5461297631263733, "learning_rate": 0.00026235170763179514, "loss": 1.7, "step": 48697 }, { "epoch": 1.62, "grad_norm": 0.5510181188583374, "learning_rate": 0.00026234133816903973, "loss": 1.7476, "step": 48698 }, { "epoch": 1.62, "grad_norm": 0.5178004503250122, "learning_rate": 0.00026233096875199634, "loss": 1.6841, "step": 48699 }, { "epoch": 1.62, "grad_norm": 0.5435531735420227, "learning_rate": 0.0002623205993806776, "loss": 1.8335, "step": 48700 }, { "epoch": 1.62, "grad_norm": 0.5358694195747375, "learning_rate": 0.00026231023005509604, "loss": 1.759, "step": 48701 }, { "epoch": 1.62, "grad_norm": 0.5572023987770081, "learning_rate": 0.00026229986077526434, "loss": 1.7312, "step": 48702 }, { "epoch": 1.62, "grad_norm": 0.5424597263336182, "learning_rate": 0.00026228949154119514, "loss": 1.732, "step": 48703 }, { "epoch": 1.62, "grad_norm": 0.5647688508033752, "learning_rate": 0.0002622791223529009, "loss": 1.7666, "step": 48704 }, { "epoch": 1.62, "grad_norm": 0.7238220572471619, "learning_rate": 0.0002622687532103942, "loss": 1.7612, "step": 48705 }, { "epoch": 1.62, "grad_norm": 0.5441581606864929, "learning_rate": 0.0002622583841136877, "loss": 1.7714, "step": 48706 }, { "epoch": 1.62, "grad_norm": 0.5348476767539978, "learning_rate": 0.000262248015062794, "loss": 1.796, "step": 48707 }, { "epoch": 1.62, "grad_norm": 0.5416030883789062, "learning_rate": 0.0002622376460577255, "loss": 1.7075, "step": 48708 }, { "epoch": 1.62, "grad_norm": 0.5393574833869934, "learning_rate": 0.00026222727709849516, "loss": 1.7988, "step": 48709 }, { "epoch": 1.62, "grad_norm": 0.540545642375946, "learning_rate": 0.0002622169081851152, "loss": 1.6916, "step": 48710 }, { "epoch": 1.62, "grad_norm": 0.5185180306434631, "learning_rate": 0.0002622065393175983, "loss": 1.6901, "step": 48711 }, { "epoch": 1.62, "grad_norm": 0.5223959684371948, "learning_rate": 0.00026219617049595716, "loss": 1.7753, "step": 48712 }, { "epoch": 1.62, "grad_norm": 0.5191739797592163, "learning_rate": 0.0002621858017202044, "loss": 1.7631, "step": 48713 }, { "epoch": 1.62, "grad_norm": 0.5383414626121521, "learning_rate": 0.00026217543299035236, "loss": 1.7877, "step": 48714 }, { "epoch": 1.62, "grad_norm": 0.5486063957214355, "learning_rate": 0.00026216506430641373, "loss": 1.7279, "step": 48715 }, { "epoch": 1.62, "grad_norm": 0.51480633020401, "learning_rate": 0.00026215469566840137, "loss": 1.7179, "step": 48716 }, { "epoch": 1.62, "grad_norm": 0.5394075512886047, "learning_rate": 0.0002621443270763274, "loss": 1.7723, "step": 48717 }, { "epoch": 1.62, "grad_norm": 0.5247815251350403, "learning_rate": 0.0002621339585302047, "loss": 1.732, "step": 48718 }, { "epoch": 1.62, "grad_norm": 0.5482362508773804, "learning_rate": 0.0002621235900300459, "loss": 1.7491, "step": 48719 }, { "epoch": 1.62, "grad_norm": 0.5309464335441589, "learning_rate": 0.00026211322157586336, "loss": 1.7338, "step": 48720 }, { "epoch": 1.62, "grad_norm": 0.5426546931266785, "learning_rate": 0.0002621028531676698, "loss": 1.7685, "step": 48721 }, { "epoch": 1.62, "grad_norm": 0.53961580991745, "learning_rate": 0.0002620924848054778, "loss": 1.7212, "step": 48722 }, { "epoch": 1.62, "grad_norm": 0.5362958908081055, "learning_rate": 0.00026208211648930005, "loss": 1.7822, "step": 48723 }, { "epoch": 1.62, "grad_norm": 0.543247401714325, "learning_rate": 0.00026207174821914886, "loss": 1.7492, "step": 48724 }, { "epoch": 1.62, "grad_norm": 0.5301516652107239, "learning_rate": 0.000262061379995037, "loss": 1.7136, "step": 48725 }, { "epoch": 1.62, "grad_norm": 0.536169707775116, "learning_rate": 0.0002620510118169772, "loss": 1.748, "step": 48726 }, { "epoch": 1.62, "grad_norm": 0.5269827842712402, "learning_rate": 0.00026204064368498173, "loss": 1.7498, "step": 48727 }, { "epoch": 1.62, "grad_norm": 0.5564352869987488, "learning_rate": 0.0002620302755990634, "loss": 1.7167, "step": 48728 }, { "epoch": 1.62, "grad_norm": 0.5339165925979614, "learning_rate": 0.00026201990755923474, "loss": 1.7532, "step": 48729 }, { "epoch": 1.62, "grad_norm": 0.5548962950706482, "learning_rate": 0.0002620095395655082, "loss": 1.7437, "step": 48730 }, { "epoch": 1.62, "grad_norm": 0.5378313660621643, "learning_rate": 0.0002619991716178965, "loss": 1.7453, "step": 48731 }, { "epoch": 1.62, "grad_norm": 0.5251936912536621, "learning_rate": 0.0002619888037164122, "loss": 1.7158, "step": 48732 }, { "epoch": 1.62, "grad_norm": 0.5163585543632507, "learning_rate": 0.000261978435861068, "loss": 1.8119, "step": 48733 }, { "epoch": 1.62, "grad_norm": 0.5467148423194885, "learning_rate": 0.00026196806805187626, "loss": 1.7907, "step": 48734 }, { "epoch": 1.62, "grad_norm": 0.5417703986167908, "learning_rate": 0.0002619577002888498, "loss": 1.7965, "step": 48735 }, { "epoch": 1.62, "grad_norm": 0.5409150719642639, "learning_rate": 0.00026194733257200103, "loss": 1.7687, "step": 48736 }, { "epoch": 1.62, "grad_norm": 0.5443679690361023, "learning_rate": 0.00026193696490134263, "loss": 1.7161, "step": 48737 }, { "epoch": 1.62, "grad_norm": 0.5484375357627869, "learning_rate": 0.000261926597276887, "loss": 1.8394, "step": 48738 }, { "epoch": 1.62, "grad_norm": 0.53577721118927, "learning_rate": 0.0002619162296986471, "loss": 1.6911, "step": 48739 }, { "epoch": 1.62, "grad_norm": 0.5493209362030029, "learning_rate": 0.00026190586216663506, "loss": 1.7477, "step": 48740 }, { "epoch": 1.62, "grad_norm": 0.5452466607093811, "learning_rate": 0.00026189549468086376, "loss": 1.7574, "step": 48741 }, { "epoch": 1.62, "grad_norm": 0.5292507410049438, "learning_rate": 0.0002618851272413457, "loss": 1.7689, "step": 48742 }, { "epoch": 1.62, "grad_norm": 0.55990070104599, "learning_rate": 0.0002618747598480937, "loss": 1.8245, "step": 48743 }, { "epoch": 1.62, "grad_norm": 0.5427711009979248, "learning_rate": 0.0002618643925011199, "loss": 1.7312, "step": 48744 }, { "epoch": 1.62, "grad_norm": 0.5520200729370117, "learning_rate": 0.00026185402520043706, "loss": 1.6613, "step": 48745 }, { "epoch": 1.62, "grad_norm": 0.5582035779953003, "learning_rate": 0.00026184365794605803, "loss": 1.7487, "step": 48746 }, { "epoch": 1.62, "grad_norm": 0.5333667993545532, "learning_rate": 0.000261833290737995, "loss": 1.7085, "step": 48747 }, { "epoch": 1.62, "grad_norm": 0.5356043577194214, "learning_rate": 0.00026182292357626076, "loss": 1.7797, "step": 48748 }, { "epoch": 1.62, "grad_norm": 0.5833146572113037, "learning_rate": 0.00026181255646086794, "loss": 1.8091, "step": 48749 }, { "epoch": 1.62, "grad_norm": 0.5561913251876831, "learning_rate": 0.000261802189391829, "loss": 1.7329, "step": 48750 }, { "epoch": 1.62, "grad_norm": 0.5328403115272522, "learning_rate": 0.0002617918223691566, "loss": 1.7299, "step": 48751 }, { "epoch": 1.62, "grad_norm": 0.5295205116271973, "learning_rate": 0.00026178145539286317, "loss": 1.7076, "step": 48752 }, { "epoch": 1.62, "grad_norm": 0.535405158996582, "learning_rate": 0.0002617710884629616, "loss": 1.692, "step": 48753 }, { "epoch": 1.62, "grad_norm": 0.5250808596611023, "learning_rate": 0.0002617607215794642, "loss": 1.7445, "step": 48754 }, { "epoch": 1.62, "grad_norm": 0.5442786812782288, "learning_rate": 0.0002617503547423836, "loss": 1.8155, "step": 48755 }, { "epoch": 1.62, "grad_norm": 0.523722767829895, "learning_rate": 0.0002617399879517326, "loss": 1.7804, "step": 48756 }, { "epoch": 1.62, "grad_norm": 0.5263969898223877, "learning_rate": 0.0002617296212075235, "loss": 1.7622, "step": 48757 }, { "epoch": 1.62, "grad_norm": 0.5340132117271423, "learning_rate": 0.000261719254509769, "loss": 1.7303, "step": 48758 }, { "epoch": 1.62, "grad_norm": 0.5236570835113525, "learning_rate": 0.0002617088878584818, "loss": 1.7341, "step": 48759 }, { "epoch": 1.62, "grad_norm": 0.5436486601829529, "learning_rate": 0.0002616985212536742, "loss": 1.7923, "step": 48760 }, { "epoch": 1.62, "grad_norm": 0.5363638997077942, "learning_rate": 0.000261688154695359, "loss": 1.7051, "step": 48761 }, { "epoch": 1.62, "grad_norm": 0.5444028377532959, "learning_rate": 0.00026167778818354874, "loss": 1.7704, "step": 48762 }, { "epoch": 1.62, "grad_norm": 0.5183229446411133, "learning_rate": 0.0002616674217182561, "loss": 1.7571, "step": 48763 }, { "epoch": 1.62, "grad_norm": 0.55246901512146, "learning_rate": 0.0002616570552994935, "loss": 1.7732, "step": 48764 }, { "epoch": 1.62, "grad_norm": 0.5352097153663635, "learning_rate": 0.00026164668892727355, "loss": 1.678, "step": 48765 }, { "epoch": 1.62, "grad_norm": 0.5726007223129272, "learning_rate": 0.0002616363226016089, "loss": 1.829, "step": 48766 }, { "epoch": 1.62, "grad_norm": 0.5547360181808472, "learning_rate": 0.0002616259563225121, "loss": 1.8152, "step": 48767 }, { "epoch": 1.62, "grad_norm": 0.5445252656936646, "learning_rate": 0.0002616155900899956, "loss": 1.7854, "step": 48768 }, { "epoch": 1.62, "grad_norm": 0.53461092710495, "learning_rate": 0.0002616052239040724, "loss": 1.7962, "step": 48769 }, { "epoch": 1.62, "grad_norm": 0.5655982494354248, "learning_rate": 0.00026159485776475455, "loss": 1.7274, "step": 48770 }, { "epoch": 1.62, "grad_norm": 0.5263580679893494, "learning_rate": 0.000261584491672055, "loss": 1.7493, "step": 48771 }, { "epoch": 1.62, "grad_norm": 0.5683137774467468, "learning_rate": 0.00026157412562598616, "loss": 1.7845, "step": 48772 }, { "epoch": 1.62, "grad_norm": 0.5496029257774353, "learning_rate": 0.00026156375962656074, "loss": 1.7591, "step": 48773 }, { "epoch": 1.62, "grad_norm": 0.5565618872642517, "learning_rate": 0.00026155339367379125, "loss": 1.8218, "step": 48774 }, { "epoch": 1.62, "grad_norm": 0.5380678176879883, "learning_rate": 0.00026154302776769015, "loss": 1.7325, "step": 48775 }, { "epoch": 1.62, "grad_norm": 0.5480255484580994, "learning_rate": 0.00026153266190827036, "loss": 1.7476, "step": 48776 }, { "epoch": 1.62, "grad_norm": 0.7526425719261169, "learning_rate": 0.00026152229609554406, "loss": 1.6981, "step": 48777 }, { "epoch": 1.62, "grad_norm": 0.5420429110527039, "learning_rate": 0.00026151193032952405, "loss": 1.7912, "step": 48778 }, { "epoch": 1.62, "grad_norm": 0.5431325435638428, "learning_rate": 0.000261501564610223, "loss": 1.7804, "step": 48779 }, { "epoch": 1.62, "grad_norm": 1.8062961101531982, "learning_rate": 0.00026149119893765333, "loss": 1.727, "step": 48780 }, { "epoch": 1.62, "grad_norm": 0.5428673028945923, "learning_rate": 0.0002614808333118276, "loss": 1.7322, "step": 48781 }, { "epoch": 1.62, "grad_norm": 0.5685939788818359, "learning_rate": 0.0002614704677327584, "loss": 1.7726, "step": 48782 }, { "epoch": 1.62, "grad_norm": 0.5198183655738831, "learning_rate": 0.0002614601022004586, "loss": 1.7703, "step": 48783 }, { "epoch": 1.62, "grad_norm": 0.5543308258056641, "learning_rate": 0.0002614497367149404, "loss": 1.7528, "step": 48784 }, { "epoch": 1.62, "grad_norm": 0.5332494378089905, "learning_rate": 0.0002614393712762166, "loss": 1.8237, "step": 48785 }, { "epoch": 1.62, "grad_norm": 0.5275288224220276, "learning_rate": 0.00026142900588429975, "loss": 1.7685, "step": 48786 }, { "epoch": 1.62, "grad_norm": 0.5306077599525452, "learning_rate": 0.0002614186405392024, "loss": 1.7193, "step": 48787 }, { "epoch": 1.62, "grad_norm": 0.537202000617981, "learning_rate": 0.000261408275240937, "loss": 1.7544, "step": 48788 }, { "epoch": 1.62, "grad_norm": 0.5258491635322571, "learning_rate": 0.0002613979099895163, "loss": 1.7377, "step": 48789 }, { "epoch": 1.62, "grad_norm": 0.5437581539154053, "learning_rate": 0.000261387544784953, "loss": 1.7281, "step": 48790 }, { "epoch": 1.62, "grad_norm": 0.5292056202888489, "learning_rate": 0.00026137717962725944, "loss": 1.7345, "step": 48791 }, { "epoch": 1.62, "grad_norm": 0.5276282429695129, "learning_rate": 0.0002613668145164483, "loss": 1.727, "step": 48792 }, { "epoch": 1.62, "grad_norm": 0.537965714931488, "learning_rate": 0.0002613564494525322, "loss": 1.7273, "step": 48793 }, { "epoch": 1.62, "grad_norm": 0.5460259318351746, "learning_rate": 0.0002613460844355236, "loss": 1.6889, "step": 48794 }, { "epoch": 1.62, "grad_norm": 0.5647318959236145, "learning_rate": 0.00026133571946543516, "loss": 1.7725, "step": 48795 }, { "epoch": 1.62, "grad_norm": 0.563471257686615, "learning_rate": 0.00026132535454227956, "loss": 1.7987, "step": 48796 }, { "epoch": 1.62, "grad_norm": 0.5415846109390259, "learning_rate": 0.0002613149896660691, "loss": 1.7983, "step": 48797 }, { "epoch": 1.62, "grad_norm": 0.5463413000106812, "learning_rate": 0.00026130462483681664, "loss": 1.8187, "step": 48798 }, { "epoch": 1.62, "grad_norm": 0.5812556147575378, "learning_rate": 0.0002612942600545346, "loss": 1.7377, "step": 48799 }, { "epoch": 1.62, "grad_norm": 0.5425578951835632, "learning_rate": 0.0002612838953192358, "loss": 1.7306, "step": 48800 }, { "epoch": 1.62, "grad_norm": 0.5537635087966919, "learning_rate": 0.00026127353063093255, "loss": 1.7724, "step": 48801 }, { "epoch": 1.62, "grad_norm": 0.5433792471885681, "learning_rate": 0.0002612631659896375, "loss": 1.7306, "step": 48802 }, { "epoch": 1.62, "grad_norm": 0.5305624008178711, "learning_rate": 0.00026125280139536333, "loss": 1.7733, "step": 48803 }, { "epoch": 1.62, "grad_norm": 0.5445451140403748, "learning_rate": 0.0002612424368481224, "loss": 1.7389, "step": 48804 }, { "epoch": 1.62, "grad_norm": 0.5471426844596863, "learning_rate": 0.0002612320723479276, "loss": 1.7353, "step": 48805 }, { "epoch": 1.62, "grad_norm": 0.54097980260849, "learning_rate": 0.0002612217078947913, "loss": 1.8182, "step": 48806 }, { "epoch": 1.62, "grad_norm": 1.2192760705947876, "learning_rate": 0.00026121134348872614, "loss": 1.7957, "step": 48807 }, { "epoch": 1.62, "grad_norm": 0.5417628288269043, "learning_rate": 0.0002612009791297447, "loss": 1.7148, "step": 48808 }, { "epoch": 1.62, "grad_norm": 0.5427639484405518, "learning_rate": 0.0002611906148178594, "loss": 1.8057, "step": 48809 }, { "epoch": 1.62, "grad_norm": 0.551655113697052, "learning_rate": 0.00026118025055308323, "loss": 1.7098, "step": 48810 }, { "epoch": 1.62, "grad_norm": 0.5242261290550232, "learning_rate": 0.00026116988633542834, "loss": 1.7292, "step": 48811 }, { "epoch": 1.62, "grad_norm": 0.5378908514976501, "learning_rate": 0.0002611595221649075, "loss": 1.7723, "step": 48812 }, { "epoch": 1.62, "grad_norm": 0.5228183269500732, "learning_rate": 0.0002611491580415334, "loss": 1.6714, "step": 48813 }, { "epoch": 1.62, "grad_norm": 0.5441789031028748, "learning_rate": 0.0002611387939653184, "loss": 1.7592, "step": 48814 }, { "epoch": 1.62, "grad_norm": 0.5432198643684387, "learning_rate": 0.0002611284299362752, "loss": 1.7524, "step": 48815 }, { "epoch": 1.62, "grad_norm": 0.5266749262809753, "learning_rate": 0.0002611180659544164, "loss": 1.779, "step": 48816 }, { "epoch": 1.62, "grad_norm": 0.5325214862823486, "learning_rate": 0.00026110770201975445, "loss": 1.692, "step": 48817 }, { "epoch": 1.62, "grad_norm": 0.5362863540649414, "learning_rate": 0.000261097338132302, "loss": 1.7737, "step": 48818 }, { "epoch": 1.62, "grad_norm": 0.5435535311698914, "learning_rate": 0.0002610869742920717, "loss": 1.7633, "step": 48819 }, { "epoch": 1.62, "grad_norm": 0.5273007154464722, "learning_rate": 0.00026107661049907616, "loss": 1.7816, "step": 48820 }, { "epoch": 1.62, "grad_norm": 0.5439462661743164, "learning_rate": 0.0002610662467533278, "loss": 1.8062, "step": 48821 }, { "epoch": 1.62, "grad_norm": 0.5350489616394043, "learning_rate": 0.00026105588305483927, "loss": 1.7313, "step": 48822 }, { "epoch": 1.62, "grad_norm": 0.5798490047454834, "learning_rate": 0.00026104551940362325, "loss": 1.7717, "step": 48823 }, { "epoch": 1.62, "grad_norm": 0.530636191368103, "learning_rate": 0.00026103515579969217, "loss": 1.7388, "step": 48824 }, { "epoch": 1.62, "grad_norm": 0.5391767024993896, "learning_rate": 0.00026102479224305854, "loss": 1.7993, "step": 48825 }, { "epoch": 1.62, "grad_norm": 0.5361228585243225, "learning_rate": 0.00026101442873373537, "loss": 1.7252, "step": 48826 }, { "epoch": 1.62, "grad_norm": 0.5534268617630005, "learning_rate": 0.00026100406527173464, "loss": 1.7728, "step": 48827 }, { "epoch": 1.62, "grad_norm": 0.5298944115638733, "learning_rate": 0.00026099370185706937, "loss": 1.7622, "step": 48828 }, { "epoch": 1.62, "grad_norm": 0.5383293628692627, "learning_rate": 0.0002609833384897519, "loss": 1.7601, "step": 48829 }, { "epoch": 1.62, "grad_norm": 0.5341705679893494, "learning_rate": 0.00026097297516979506, "loss": 1.7878, "step": 48830 }, { "epoch": 1.62, "grad_norm": 0.5466007590293884, "learning_rate": 0.0002609626118972112, "loss": 1.6785, "step": 48831 }, { "epoch": 1.62, "grad_norm": 0.524391233921051, "learning_rate": 0.0002609522486720129, "loss": 1.6715, "step": 48832 }, { "epoch": 1.62, "grad_norm": 0.5278224349021912, "learning_rate": 0.00026094188549421305, "loss": 1.7544, "step": 48833 }, { "epoch": 1.62, "grad_norm": 0.5231792330741882, "learning_rate": 0.00026093152236382374, "loss": 1.7256, "step": 48834 }, { "epoch": 1.62, "grad_norm": 0.5113348960876465, "learning_rate": 0.00026092115928085793, "loss": 1.6866, "step": 48835 }, { "epoch": 1.62, "grad_norm": 0.5397655367851257, "learning_rate": 0.0002609107962453281, "loss": 1.7792, "step": 48836 }, { "epoch": 1.62, "grad_norm": 0.5644978284835815, "learning_rate": 0.00026090043325724676, "loss": 1.8612, "step": 48837 }, { "epoch": 1.62, "grad_norm": 0.53398597240448, "learning_rate": 0.0002608900703166265, "loss": 1.6687, "step": 48838 }, { "epoch": 1.62, "grad_norm": 0.5278298854827881, "learning_rate": 0.00026087970742347987, "loss": 1.7258, "step": 48839 }, { "epoch": 1.62, "grad_norm": 0.5347702503204346, "learning_rate": 0.0002608693445778197, "loss": 1.6848, "step": 48840 }, { "epoch": 1.62, "grad_norm": 0.553013801574707, "learning_rate": 0.00026085898177965826, "loss": 1.7044, "step": 48841 }, { "epoch": 1.62, "grad_norm": 0.5563116073608398, "learning_rate": 0.0002608486190290082, "loss": 1.7422, "step": 48842 }, { "epoch": 1.63, "grad_norm": 0.5667232871055603, "learning_rate": 0.0002608382563258823, "loss": 1.7516, "step": 48843 }, { "epoch": 1.63, "grad_norm": 0.5829493999481201, "learning_rate": 0.0002608278936702929, "loss": 1.7496, "step": 48844 }, { "epoch": 1.63, "grad_norm": 0.5546627640724182, "learning_rate": 0.00026081753106225265, "loss": 1.7972, "step": 48845 }, { "epoch": 1.63, "grad_norm": 0.5405364632606506, "learning_rate": 0.0002608071685017742, "loss": 1.8237, "step": 48846 }, { "epoch": 1.63, "grad_norm": 0.5262530446052551, "learning_rate": 0.00026079680598887003, "loss": 1.7495, "step": 48847 }, { "epoch": 1.63, "grad_norm": 0.5504283905029297, "learning_rate": 0.00026078644352355273, "loss": 1.749, "step": 48848 }, { "epoch": 1.63, "grad_norm": 0.5315940976142883, "learning_rate": 0.00026077608110583494, "loss": 1.6197, "step": 48849 }, { "epoch": 1.63, "grad_norm": 0.5310635566711426, "learning_rate": 0.0002607657187357293, "loss": 1.7468, "step": 48850 }, { "epoch": 1.63, "grad_norm": 0.5249931216239929, "learning_rate": 0.0002607553564132482, "loss": 1.821, "step": 48851 }, { "epoch": 1.63, "grad_norm": 0.5498064160346985, "learning_rate": 0.0002607449941384043, "loss": 1.848, "step": 48852 }, { "epoch": 1.63, "grad_norm": 0.552990198135376, "learning_rate": 0.00026073463191121025, "loss": 1.7919, "step": 48853 }, { "epoch": 1.63, "grad_norm": 0.5427441000938416, "learning_rate": 0.00026072426973167855, "loss": 1.8535, "step": 48854 }, { "epoch": 1.63, "grad_norm": 0.5289319753646851, "learning_rate": 0.0002607139075998217, "loss": 1.7371, "step": 48855 }, { "epoch": 1.63, "grad_norm": 0.5333427786827087, "learning_rate": 0.0002607035455156526, "loss": 1.7788, "step": 48856 }, { "epoch": 1.63, "grad_norm": 2.8547933101654053, "learning_rate": 0.0002606931834791834, "loss": 1.8588, "step": 48857 }, { "epoch": 1.63, "grad_norm": 0.5542762279510498, "learning_rate": 0.00026068282149042693, "loss": 1.7895, "step": 48858 }, { "epoch": 1.63, "grad_norm": 0.5723567008972168, "learning_rate": 0.00026067245954939576, "loss": 1.7832, "step": 48859 }, { "epoch": 1.63, "grad_norm": 0.5352067351341248, "learning_rate": 0.0002606620976561025, "loss": 1.7049, "step": 48860 }, { "epoch": 1.63, "grad_norm": 0.5200418829917908, "learning_rate": 0.00026065173581055955, "loss": 1.75, "step": 48861 }, { "epoch": 1.63, "grad_norm": 0.5287309885025024, "learning_rate": 0.00026064137401277956, "loss": 1.7209, "step": 48862 }, { "epoch": 1.63, "grad_norm": 0.5553768873214722, "learning_rate": 0.0002606310122627753, "loss": 1.6818, "step": 48863 }, { "epoch": 1.63, "grad_norm": 0.5529921054840088, "learning_rate": 0.000260620650560559, "loss": 1.7489, "step": 48864 }, { "epoch": 1.63, "grad_norm": 0.5330197215080261, "learning_rate": 0.0002606102889061435, "loss": 1.6913, "step": 48865 }, { "epoch": 1.63, "grad_norm": 0.5420646071434021, "learning_rate": 0.00026059992729954144, "loss": 1.7186, "step": 48866 }, { "epoch": 1.63, "grad_norm": 0.5512756109237671, "learning_rate": 0.00026058956574076514, "loss": 1.8045, "step": 48867 }, { "epoch": 1.63, "grad_norm": 0.5321568250656128, "learning_rate": 0.0002605792042298273, "loss": 1.7099, "step": 48868 }, { "epoch": 1.63, "grad_norm": 0.5763324499130249, "learning_rate": 0.00026056884276674043, "loss": 1.6938, "step": 48869 }, { "epoch": 1.63, "grad_norm": 0.5322700142860413, "learning_rate": 0.0002605584813515174, "loss": 1.75, "step": 48870 }, { "epoch": 1.63, "grad_norm": 0.533642590045929, "learning_rate": 0.0002605481199841703, "loss": 1.7783, "step": 48871 }, { "epoch": 1.63, "grad_norm": 0.5486239790916443, "learning_rate": 0.0002605377586647121, "loss": 1.7262, "step": 48872 }, { "epoch": 1.63, "grad_norm": 0.5338521599769592, "learning_rate": 0.0002605273973931553, "loss": 1.7745, "step": 48873 }, { "epoch": 1.63, "grad_norm": 0.5383962988853455, "learning_rate": 0.0002605170361695123, "loss": 1.831, "step": 48874 }, { "epoch": 1.63, "grad_norm": 0.5259676575660706, "learning_rate": 0.00026050667499379586, "loss": 1.7178, "step": 48875 }, { "epoch": 1.63, "grad_norm": 0.5342270135879517, "learning_rate": 0.00026049631386601855, "loss": 1.7688, "step": 48876 }, { "epoch": 1.63, "grad_norm": 0.534899115562439, "learning_rate": 0.00026048595278619285, "loss": 1.7785, "step": 48877 }, { "epoch": 1.63, "grad_norm": 0.5365310311317444, "learning_rate": 0.00026047559175433133, "loss": 1.7198, "step": 48878 }, { "epoch": 1.63, "grad_norm": 0.5220498442649841, "learning_rate": 0.00026046523077044657, "loss": 1.7316, "step": 48879 }, { "epoch": 1.63, "grad_norm": 0.5613660216331482, "learning_rate": 0.00026045486983455136, "loss": 1.7413, "step": 48880 }, { "epoch": 1.63, "grad_norm": 0.5175876021385193, "learning_rate": 0.00026044450894665807, "loss": 1.7536, "step": 48881 }, { "epoch": 1.63, "grad_norm": 0.5207951068878174, "learning_rate": 0.00026043414810677926, "loss": 1.7748, "step": 48882 }, { "epoch": 1.63, "grad_norm": 0.5440012812614441, "learning_rate": 0.00026042378731492763, "loss": 1.7001, "step": 48883 }, { "epoch": 1.63, "grad_norm": 0.5483132004737854, "learning_rate": 0.00026041342657111564, "loss": 1.7255, "step": 48884 }, { "epoch": 1.63, "grad_norm": 0.5483314394950867, "learning_rate": 0.0002604030658753558, "loss": 1.7615, "step": 48885 }, { "epoch": 1.63, "grad_norm": 0.5270543098449707, "learning_rate": 0.00026039270522766107, "loss": 1.7685, "step": 48886 }, { "epoch": 1.63, "grad_norm": 0.5355863571166992, "learning_rate": 0.0002603823446280436, "loss": 1.8083, "step": 48887 }, { "epoch": 1.63, "grad_norm": 0.535337507724762, "learning_rate": 0.0002603719840765161, "loss": 1.7586, "step": 48888 }, { "epoch": 1.63, "grad_norm": 0.5540942549705505, "learning_rate": 0.0002603616235730912, "loss": 1.7416, "step": 48889 }, { "epoch": 1.63, "grad_norm": 0.5501620769500732, "learning_rate": 0.0002603512631177815, "loss": 1.8044, "step": 48890 }, { "epoch": 1.63, "grad_norm": 0.5670493245124817, "learning_rate": 0.0002603409027105995, "loss": 1.805, "step": 48891 }, { "epoch": 1.63, "grad_norm": 0.5576523542404175, "learning_rate": 0.00026033054235155773, "loss": 1.7013, "step": 48892 }, { "epoch": 1.63, "grad_norm": 0.5260434746742249, "learning_rate": 0.0002603201820406689, "loss": 1.7963, "step": 48893 }, { "epoch": 1.63, "grad_norm": 0.5690378546714783, "learning_rate": 0.0002603098217779455, "loss": 1.8005, "step": 48894 }, { "epoch": 1.63, "grad_norm": 0.5230255722999573, "learning_rate": 0.00026029946156340015, "loss": 1.7794, "step": 48895 }, { "epoch": 1.63, "grad_norm": 0.537106454372406, "learning_rate": 0.0002602891013970455, "loss": 1.7433, "step": 48896 }, { "epoch": 1.63, "grad_norm": 0.5408152341842651, "learning_rate": 0.0002602787412788939, "loss": 1.7202, "step": 48897 }, { "epoch": 1.63, "grad_norm": 0.5406733751296997, "learning_rate": 0.0002602683812089581, "loss": 1.7311, "step": 48898 }, { "epoch": 1.63, "grad_norm": 0.5312357544898987, "learning_rate": 0.00026025802118725055, "loss": 1.6977, "step": 48899 }, { "epoch": 1.63, "grad_norm": 0.54792720079422, "learning_rate": 0.000260247661213784, "loss": 1.7203, "step": 48900 }, { "epoch": 1.63, "grad_norm": 0.5407770276069641, "learning_rate": 0.0002602373012885709, "loss": 1.8182, "step": 48901 }, { "epoch": 1.63, "grad_norm": 0.5424286723136902, "learning_rate": 0.00026022694141162385, "loss": 1.8111, "step": 48902 }, { "epoch": 1.63, "grad_norm": 0.5361822247505188, "learning_rate": 0.00026021658158295553, "loss": 1.6858, "step": 48903 }, { "epoch": 1.63, "grad_norm": 0.537070631980896, "learning_rate": 0.00026020622180257833, "loss": 1.807, "step": 48904 }, { "epoch": 1.63, "grad_norm": 0.5434337854385376, "learning_rate": 0.0002601958620705049, "loss": 1.8053, "step": 48905 }, { "epoch": 1.63, "grad_norm": 0.534083366394043, "learning_rate": 0.0002601855023867479, "loss": 1.6797, "step": 48906 }, { "epoch": 1.63, "grad_norm": 0.563171923160553, "learning_rate": 0.0002601751427513197, "loss": 1.7051, "step": 48907 }, { "epoch": 1.63, "grad_norm": 0.5616876482963562, "learning_rate": 0.0002601647831642331, "loss": 1.7065, "step": 48908 }, { "epoch": 1.63, "grad_norm": 0.53617262840271, "learning_rate": 0.0002601544236255005, "loss": 1.786, "step": 48909 }, { "epoch": 1.63, "grad_norm": 0.5845575332641602, "learning_rate": 0.0002601440641351347, "loss": 1.8102, "step": 48910 }, { "epoch": 1.63, "grad_norm": 0.5520705580711365, "learning_rate": 0.0002601337046931481, "loss": 1.7352, "step": 48911 }, { "epoch": 1.63, "grad_norm": 0.5295994281768799, "learning_rate": 0.00026012334529955324, "loss": 1.7748, "step": 48912 }, { "epoch": 1.63, "grad_norm": 0.5560302734375, "learning_rate": 0.00026011298595436286, "loss": 1.6697, "step": 48913 }, { "epoch": 1.63, "grad_norm": 0.5501915216445923, "learning_rate": 0.00026010262665758933, "loss": 1.7061, "step": 48914 }, { "epoch": 1.63, "grad_norm": 0.5386826395988464, "learning_rate": 0.0002600922674092453, "loss": 1.766, "step": 48915 }, { "epoch": 1.63, "grad_norm": 0.5446826219558716, "learning_rate": 0.00026008190820934346, "loss": 1.7798, "step": 48916 }, { "epoch": 1.63, "grad_norm": 0.5383197665214539, "learning_rate": 0.0002600715490578964, "loss": 1.7354, "step": 48917 }, { "epoch": 1.63, "grad_norm": 0.5378761291503906, "learning_rate": 0.00026006118995491645, "loss": 1.7226, "step": 48918 }, { "epoch": 1.63, "grad_norm": 0.5455248951911926, "learning_rate": 0.0002600508309004163, "loss": 1.7534, "step": 48919 }, { "epoch": 1.63, "grad_norm": 0.5803642868995667, "learning_rate": 0.00026004047189440876, "loss": 1.7519, "step": 48920 }, { "epoch": 1.63, "grad_norm": 0.5577685236930847, "learning_rate": 0.000260030112936906, "loss": 1.7719, "step": 48921 }, { "epoch": 1.63, "grad_norm": 0.5379243493080139, "learning_rate": 0.00026001975402792084, "loss": 1.7994, "step": 48922 }, { "epoch": 1.63, "grad_norm": 0.5392481684684753, "learning_rate": 0.00026000939516746587, "loss": 1.705, "step": 48923 }, { "epoch": 1.63, "grad_norm": 0.5366023182868958, "learning_rate": 0.00025999903635555357, "loss": 1.8139, "step": 48924 }, { "epoch": 1.63, "grad_norm": 0.5368147492408752, "learning_rate": 0.00025998867759219655, "loss": 1.7454, "step": 48925 }, { "epoch": 1.63, "grad_norm": 0.5334550738334656, "learning_rate": 0.0002599783188774073, "loss": 1.735, "step": 48926 }, { "epoch": 1.63, "grad_norm": 0.556187093257904, "learning_rate": 0.00025996796021119864, "loss": 1.7576, "step": 48927 }, { "epoch": 1.63, "grad_norm": 0.5256823301315308, "learning_rate": 0.0002599576015935828, "loss": 1.7899, "step": 48928 }, { "epoch": 1.63, "grad_norm": 0.5528562068939209, "learning_rate": 0.0002599472430245726, "loss": 1.8075, "step": 48929 }, { "epoch": 1.63, "grad_norm": 0.5384575128555298, "learning_rate": 0.0002599368845041806, "loss": 1.7603, "step": 48930 }, { "epoch": 1.63, "grad_norm": 0.5463079810142517, "learning_rate": 0.0002599265260324193, "loss": 1.763, "step": 48931 }, { "epoch": 1.63, "grad_norm": 0.5388537645339966, "learning_rate": 0.00025991616760930123, "loss": 1.7971, "step": 48932 }, { "epoch": 1.63, "grad_norm": 0.5286138653755188, "learning_rate": 0.00025990580923483913, "loss": 1.8137, "step": 48933 }, { "epoch": 1.63, "grad_norm": 0.5348793268203735, "learning_rate": 0.0002598954509090454, "loss": 1.7875, "step": 48934 }, { "epoch": 1.63, "grad_norm": 0.5287662744522095, "learning_rate": 0.0002598850926319326, "loss": 1.7808, "step": 48935 }, { "epoch": 1.63, "grad_norm": 0.5373835563659668, "learning_rate": 0.0002598747344035135, "loss": 1.7169, "step": 48936 }, { "epoch": 1.63, "grad_norm": 0.5615841150283813, "learning_rate": 0.0002598643762238006, "loss": 1.7297, "step": 48937 }, { "epoch": 1.63, "grad_norm": 0.5304510593414307, "learning_rate": 0.00025985401809280634, "loss": 1.7742, "step": 48938 }, { "epoch": 1.63, "grad_norm": 0.5448558330535889, "learning_rate": 0.0002598436600105434, "loss": 1.7239, "step": 48939 }, { "epoch": 1.63, "grad_norm": 0.5241600275039673, "learning_rate": 0.0002598333019770244, "loss": 1.6934, "step": 48940 }, { "epoch": 1.63, "grad_norm": 3.543492555618286, "learning_rate": 0.0002598229439922618, "loss": 1.8928, "step": 48941 }, { "epoch": 1.63, "grad_norm": 0.5796396136283875, "learning_rate": 0.0002598125860562682, "loss": 1.7734, "step": 48942 }, { "epoch": 1.63, "grad_norm": 0.5417754054069519, "learning_rate": 0.00025980222816905635, "loss": 1.8359, "step": 48943 }, { "epoch": 1.63, "grad_norm": 0.5441850423812866, "learning_rate": 0.00025979187033063844, "loss": 1.7467, "step": 48944 }, { "epoch": 1.63, "grad_norm": 0.554441511631012, "learning_rate": 0.0002597815125410274, "loss": 1.7237, "step": 48945 }, { "epoch": 1.63, "grad_norm": 0.5373728275299072, "learning_rate": 0.0002597711548002356, "loss": 1.7144, "step": 48946 }, { "epoch": 1.63, "grad_norm": 0.5299705862998962, "learning_rate": 0.00025976079710827586, "loss": 1.7802, "step": 48947 }, { "epoch": 1.63, "grad_norm": 0.5367365479469299, "learning_rate": 0.0002597504394651605, "loss": 1.7514, "step": 48948 }, { "epoch": 1.63, "grad_norm": 0.5499480366706848, "learning_rate": 0.0002597400818709021, "loss": 1.7163, "step": 48949 }, { "epoch": 1.63, "grad_norm": 0.5282671451568604, "learning_rate": 0.00025972972432551346, "loss": 1.8133, "step": 48950 }, { "epoch": 1.63, "grad_norm": 0.5530850291252136, "learning_rate": 0.00025971936682900684, "loss": 1.6879, "step": 48951 }, { "epoch": 1.63, "grad_norm": 0.5263181924819946, "learning_rate": 0.000259709009381395, "loss": 1.7194, "step": 48952 }, { "epoch": 1.63, "grad_norm": 0.5487604737281799, "learning_rate": 0.0002596986519826906, "loss": 1.6905, "step": 48953 }, { "epoch": 1.63, "grad_norm": 0.5460273623466492, "learning_rate": 0.00025968829463290607, "loss": 1.7833, "step": 48954 }, { "epoch": 1.63, "grad_norm": 0.5261127948760986, "learning_rate": 0.0002596779373320539, "loss": 1.7992, "step": 48955 }, { "epoch": 1.63, "grad_norm": 0.5545729994773865, "learning_rate": 0.0002596675800801468, "loss": 1.8049, "step": 48956 }, { "epoch": 1.63, "grad_norm": 0.5423813462257385, "learning_rate": 0.0002596572228771975, "loss": 1.7934, "step": 48957 }, { "epoch": 1.63, "grad_norm": 0.5329983234405518, "learning_rate": 0.00025964686572321814, "loss": 1.7418, "step": 48958 }, { "epoch": 1.63, "grad_norm": 0.5389595031738281, "learning_rate": 0.00025963650861822164, "loss": 1.8272, "step": 48959 }, { "epoch": 1.63, "grad_norm": 0.5408053994178772, "learning_rate": 0.0002596261515622205, "loss": 1.6988, "step": 48960 }, { "epoch": 1.63, "grad_norm": 0.54886794090271, "learning_rate": 0.00025961579455522726, "loss": 1.7197, "step": 48961 }, { "epoch": 1.63, "grad_norm": 0.5343537926673889, "learning_rate": 0.00025960543759725443, "loss": 1.7613, "step": 48962 }, { "epoch": 1.63, "grad_norm": 0.5255477428436279, "learning_rate": 0.00025959508068831475, "loss": 1.7538, "step": 48963 }, { "epoch": 1.63, "grad_norm": 0.5315961241722107, "learning_rate": 0.00025958472382842064, "loss": 1.7474, "step": 48964 }, { "epoch": 1.63, "grad_norm": 0.5363532304763794, "learning_rate": 0.0002595743670175846, "loss": 1.7893, "step": 48965 }, { "epoch": 1.63, "grad_norm": 0.5403898358345032, "learning_rate": 0.00025956401025581947, "loss": 1.6774, "step": 48966 }, { "epoch": 1.63, "grad_norm": 0.5224726796150208, "learning_rate": 0.00025955365354313767, "loss": 1.7023, "step": 48967 }, { "epoch": 1.63, "grad_norm": 0.5443190932273865, "learning_rate": 0.00025954329687955175, "loss": 1.6468, "step": 48968 }, { "epoch": 1.63, "grad_norm": 0.5413309931755066, "learning_rate": 0.0002595329402650743, "loss": 1.7652, "step": 48969 }, { "epoch": 1.63, "grad_norm": 0.527441680431366, "learning_rate": 0.0002595225836997179, "loss": 1.7121, "step": 48970 }, { "epoch": 1.63, "grad_norm": 0.5434780120849609, "learning_rate": 0.0002595122271834951, "loss": 1.7626, "step": 48971 }, { "epoch": 1.63, "grad_norm": 0.5574002861976624, "learning_rate": 0.00025950187071641843, "loss": 1.7595, "step": 48972 }, { "epoch": 1.63, "grad_norm": 0.5386666655540466, "learning_rate": 0.0002594915142985007, "loss": 1.7921, "step": 48973 }, { "epoch": 1.63, "grad_norm": 0.545701265335083, "learning_rate": 0.00025948115792975417, "loss": 1.7877, "step": 48974 }, { "epoch": 1.63, "grad_norm": 0.5260012745857239, "learning_rate": 0.00025947080161019156, "loss": 1.6893, "step": 48975 }, { "epoch": 1.63, "grad_norm": 0.5439584255218506, "learning_rate": 0.0002594604453398254, "loss": 1.7399, "step": 48976 }, { "epoch": 1.63, "grad_norm": 0.5297003984451294, "learning_rate": 0.0002594500891186684, "loss": 1.8488, "step": 48977 }, { "epoch": 1.63, "grad_norm": 0.5268878936767578, "learning_rate": 0.0002594397329467329, "loss": 1.7246, "step": 48978 }, { "epoch": 1.63, "grad_norm": 0.5434796214103699, "learning_rate": 0.00025942937682403154, "loss": 1.7293, "step": 48979 }, { "epoch": 1.63, "grad_norm": 0.5467327833175659, "learning_rate": 0.0002594190207505772, "loss": 1.7942, "step": 48980 }, { "epoch": 1.63, "grad_norm": 0.5251450538635254, "learning_rate": 0.0002594086647263819, "loss": 1.7886, "step": 48981 }, { "epoch": 1.63, "grad_norm": 0.5244108438491821, "learning_rate": 0.00025939830875145863, "loss": 1.775, "step": 48982 }, { "epoch": 1.63, "grad_norm": 0.5381263494491577, "learning_rate": 0.0002593879528258199, "loss": 1.7853, "step": 48983 }, { "epoch": 1.63, "grad_norm": 0.5348019003868103, "learning_rate": 0.0002593775969494781, "loss": 1.7612, "step": 48984 }, { "epoch": 1.63, "grad_norm": 0.5253313183784485, "learning_rate": 0.0002593672411224459, "loss": 1.7897, "step": 48985 }, { "epoch": 1.63, "grad_norm": 0.5333340764045715, "learning_rate": 0.00025935688534473587, "loss": 1.74, "step": 48986 }, { "epoch": 1.63, "grad_norm": 0.530314564704895, "learning_rate": 0.0002593465296163608, "loss": 1.8065, "step": 48987 }, { "epoch": 1.63, "grad_norm": 0.5451135039329529, "learning_rate": 0.0002593361739373328, "loss": 1.7239, "step": 48988 }, { "epoch": 1.63, "grad_norm": 0.5371792316436768, "learning_rate": 0.00025932581830766477, "loss": 1.7408, "step": 48989 }, { "epoch": 1.63, "grad_norm": 0.525148868560791, "learning_rate": 0.00025931546272736926, "loss": 1.7617, "step": 48990 }, { "epoch": 1.63, "grad_norm": 0.5200926661491394, "learning_rate": 0.0002593051071964588, "loss": 1.7159, "step": 48991 }, { "epoch": 1.63, "grad_norm": 0.5405696630477905, "learning_rate": 0.00025929475171494585, "loss": 1.7863, "step": 48992 }, { "epoch": 1.63, "grad_norm": 0.5402170419692993, "learning_rate": 0.00025928439628284316, "loss": 1.78, "step": 48993 }, { "epoch": 1.63, "grad_norm": 0.5282036066055298, "learning_rate": 0.0002592740409001631, "loss": 1.7829, "step": 48994 }, { "epoch": 1.63, "grad_norm": 0.5458266139030457, "learning_rate": 0.0002592636855669184, "loss": 1.7896, "step": 48995 }, { "epoch": 1.63, "grad_norm": 0.5530000329017639, "learning_rate": 0.00025925333028312163, "loss": 1.7968, "step": 48996 }, { "epoch": 1.63, "grad_norm": 0.580567479133606, "learning_rate": 0.00025924297504878533, "loss": 1.7346, "step": 48997 }, { "epoch": 1.63, "grad_norm": 0.5557196140289307, "learning_rate": 0.000259232619863922, "loss": 1.7364, "step": 48998 }, { "epoch": 1.63, "grad_norm": 0.5319823622703552, "learning_rate": 0.0002592222647285443, "loss": 1.7401, "step": 48999 }, { "epoch": 1.63, "grad_norm": 0.5463509559631348, "learning_rate": 0.00025921190964266476, "loss": 1.7382, "step": 49000 }, { "epoch": 1.63, "grad_norm": 0.5705285668373108, "learning_rate": 0.0002592015546062959, "loss": 1.695, "step": 49001 }, { "epoch": 1.63, "grad_norm": 0.5465226769447327, "learning_rate": 0.00025919119961945034, "loss": 1.7275, "step": 49002 }, { "epoch": 1.63, "grad_norm": 0.5229786038398743, "learning_rate": 0.00025918084468214077, "loss": 1.7741, "step": 49003 }, { "epoch": 1.63, "grad_norm": 0.5474509596824646, "learning_rate": 0.0002591704897943795, "loss": 1.7839, "step": 49004 }, { "epoch": 1.63, "grad_norm": 0.559522807598114, "learning_rate": 0.0002591601349561793, "loss": 1.7512, "step": 49005 }, { "epoch": 1.63, "grad_norm": 0.544253945350647, "learning_rate": 0.00025914978016755265, "loss": 1.7726, "step": 49006 }, { "epoch": 1.63, "grad_norm": 0.542624831199646, "learning_rate": 0.0002591394254285122, "loss": 1.7678, "step": 49007 }, { "epoch": 1.63, "grad_norm": 0.5388781428337097, "learning_rate": 0.00025912907073907045, "loss": 1.7204, "step": 49008 }, { "epoch": 1.63, "grad_norm": 0.5305540561676025, "learning_rate": 0.00025911871609923993, "loss": 1.7172, "step": 49009 }, { "epoch": 1.63, "grad_norm": 0.5410799980163574, "learning_rate": 0.0002591083615090333, "loss": 1.8178, "step": 49010 }, { "epoch": 1.63, "grad_norm": 0.541580319404602, "learning_rate": 0.0002590980069684631, "loss": 1.7468, "step": 49011 }, { "epoch": 1.63, "grad_norm": 0.6436821818351746, "learning_rate": 0.0002590876524775419, "loss": 1.7887, "step": 49012 }, { "epoch": 1.63, "grad_norm": 0.5347773432731628, "learning_rate": 0.0002590772980362823, "loss": 1.6478, "step": 49013 }, { "epoch": 1.63, "grad_norm": 0.5275332927703857, "learning_rate": 0.0002590669436446968, "loss": 1.7424, "step": 49014 }, { "epoch": 1.63, "grad_norm": 0.5297940969467163, "learning_rate": 0.00025905658930279793, "loss": 1.7266, "step": 49015 }, { "epoch": 1.63, "grad_norm": 2.7327070236206055, "learning_rate": 0.00025904623501059836, "loss": 1.8033, "step": 49016 }, { "epoch": 1.63, "grad_norm": 0.5660569071769714, "learning_rate": 0.0002590358807681107, "loss": 1.9172, "step": 49017 }, { "epoch": 1.63, "grad_norm": 0.7675266861915588, "learning_rate": 0.00025902552657534737, "loss": 1.7646, "step": 49018 }, { "epoch": 1.63, "grad_norm": 0.5284852385520935, "learning_rate": 0.000259015172432321, "loss": 1.8477, "step": 49019 }, { "epoch": 1.63, "grad_norm": 0.5429702401161194, "learning_rate": 0.00025900481833904433, "loss": 1.7906, "step": 49020 }, { "epoch": 1.63, "grad_norm": 0.5122308135032654, "learning_rate": 0.0002589944642955296, "loss": 1.8052, "step": 49021 }, { "epoch": 1.63, "grad_norm": 0.5288752317428589, "learning_rate": 0.0002589841103017896, "loss": 1.6895, "step": 49022 }, { "epoch": 1.63, "grad_norm": 0.5238427519798279, "learning_rate": 0.0002589737563578369, "loss": 1.6968, "step": 49023 }, { "epoch": 1.63, "grad_norm": 0.5410361289978027, "learning_rate": 0.0002589634024636839, "loss": 1.7323, "step": 49024 }, { "epoch": 1.63, "grad_norm": 0.5555389523506165, "learning_rate": 0.00025895304861934333, "loss": 1.8024, "step": 49025 }, { "epoch": 1.63, "grad_norm": 0.6031458973884583, "learning_rate": 0.00025894269482482773, "loss": 1.7624, "step": 49026 }, { "epoch": 1.63, "grad_norm": 0.5239958763122559, "learning_rate": 0.00025893234108014966, "loss": 1.7462, "step": 49027 }, { "epoch": 1.63, "grad_norm": 0.5332511067390442, "learning_rate": 0.0002589219873853217, "loss": 1.7033, "step": 49028 }, { "epoch": 1.63, "grad_norm": 0.5389087796211243, "learning_rate": 0.0002589116337403563, "loss": 1.7469, "step": 49029 }, { "epoch": 1.63, "grad_norm": 0.5420352220535278, "learning_rate": 0.0002589012801452662, "loss": 1.7977, "step": 49030 }, { "epoch": 1.63, "grad_norm": 0.5394450426101685, "learning_rate": 0.00025889092660006383, "loss": 1.8042, "step": 49031 }, { "epoch": 1.63, "grad_norm": 0.5220977067947388, "learning_rate": 0.00025888057310476185, "loss": 1.7578, "step": 49032 }, { "epoch": 1.63, "grad_norm": 0.5463463664054871, "learning_rate": 0.0002588702196593728, "loss": 1.802, "step": 49033 }, { "epoch": 1.63, "grad_norm": 0.5413267612457275, "learning_rate": 0.0002588598662639093, "loss": 1.7331, "step": 49034 }, { "epoch": 1.63, "grad_norm": 0.5474348664283752, "learning_rate": 0.0002588495129183838, "loss": 1.685, "step": 49035 }, { "epoch": 1.63, "grad_norm": 0.5366102457046509, "learning_rate": 0.00025883915962280886, "loss": 1.7356, "step": 49036 }, { "epoch": 1.63, "grad_norm": 0.528998613357544, "learning_rate": 0.00025882880637719734, "loss": 1.6783, "step": 49037 }, { "epoch": 1.63, "grad_norm": 0.5329780578613281, "learning_rate": 0.0002588184531815613, "loss": 1.7304, "step": 49038 }, { "epoch": 1.63, "grad_norm": 0.5319254994392395, "learning_rate": 0.0002588081000359137, "loss": 1.8469, "step": 49039 }, { "epoch": 1.63, "grad_norm": 0.5392281413078308, "learning_rate": 0.0002587977469402671, "loss": 1.6884, "step": 49040 }, { "epoch": 1.63, "grad_norm": 0.5375978350639343, "learning_rate": 0.0002587873938946339, "loss": 1.8002, "step": 49041 }, { "epoch": 1.63, "grad_norm": 0.5235538482666016, "learning_rate": 0.00025877704089902664, "loss": 1.7316, "step": 49042 }, { "epoch": 1.63, "grad_norm": 0.5202000141143799, "learning_rate": 0.00025876668795345804, "loss": 1.764, "step": 49043 }, { "epoch": 1.63, "grad_norm": 0.5656656622886658, "learning_rate": 0.0002587563350579407, "loss": 1.7584, "step": 49044 }, { "epoch": 1.63, "grad_norm": 0.5545791387557983, "learning_rate": 0.00025874598221248697, "loss": 1.7398, "step": 49045 }, { "epoch": 1.63, "grad_norm": 0.555905282497406, "learning_rate": 0.00025873562941710953, "loss": 1.746, "step": 49046 }, { "epoch": 1.63, "grad_norm": 0.5417090058326721, "learning_rate": 0.0002587252766718211, "loss": 1.7099, "step": 49047 }, { "epoch": 1.63, "grad_norm": 0.5441380143165588, "learning_rate": 0.000258714923976634, "loss": 1.7545, "step": 49048 }, { "epoch": 1.63, "grad_norm": 0.5600460171699524, "learning_rate": 0.0002587045713315609, "loss": 1.7477, "step": 49049 }, { "epoch": 1.63, "grad_norm": 0.5249050855636597, "learning_rate": 0.0002586942187366144, "loss": 1.7213, "step": 49050 }, { "epoch": 1.63, "grad_norm": 0.5448896884918213, "learning_rate": 0.000258683866191807, "loss": 1.7931, "step": 49051 }, { "epoch": 1.63, "grad_norm": 0.5528186559677124, "learning_rate": 0.00025867351369715123, "loss": 1.7771, "step": 49052 }, { "epoch": 1.63, "grad_norm": 0.5602752566337585, "learning_rate": 0.0002586631612526598, "loss": 1.8111, "step": 49053 }, { "epoch": 1.63, "grad_norm": 0.5599164366722107, "learning_rate": 0.00025865280885834527, "loss": 1.7671, "step": 49054 }, { "epoch": 1.63, "grad_norm": 0.5396198034286499, "learning_rate": 0.00025864245651422004, "loss": 1.7146, "step": 49055 }, { "epoch": 1.63, "grad_norm": 0.5492213368415833, "learning_rate": 0.0002586321042202968, "loss": 1.8239, "step": 49056 }, { "epoch": 1.63, "grad_norm": 0.5369237065315247, "learning_rate": 0.00025862175197658815, "loss": 1.6909, "step": 49057 }, { "epoch": 1.63, "grad_norm": 0.531812310218811, "learning_rate": 0.00025861139978310653, "loss": 1.7375, "step": 49058 }, { "epoch": 1.63, "grad_norm": 0.5396780371665955, "learning_rate": 0.0002586010476398645, "loss": 1.7659, "step": 49059 }, { "epoch": 1.63, "grad_norm": 0.5355821251869202, "learning_rate": 0.0002585906955468749, "loss": 1.714, "step": 49060 }, { "epoch": 1.63, "grad_norm": 0.5338727235794067, "learning_rate": 0.0002585803435041499, "loss": 1.7622, "step": 49061 }, { "epoch": 1.63, "grad_norm": 0.5416702628135681, "learning_rate": 0.00025856999151170233, "loss": 1.7282, "step": 49062 }, { "epoch": 1.63, "grad_norm": 0.5181774497032166, "learning_rate": 0.0002585596395695447, "loss": 1.6963, "step": 49063 }, { "epoch": 1.63, "grad_norm": 0.525985598564148, "learning_rate": 0.00025854928767768957, "loss": 1.7493, "step": 49064 }, { "epoch": 1.63, "grad_norm": 0.5412479639053345, "learning_rate": 0.00025853893583614946, "loss": 1.7554, "step": 49065 }, { "epoch": 1.63, "grad_norm": 0.5320181846618652, "learning_rate": 0.0002585285840449369, "loss": 1.7712, "step": 49066 }, { "epoch": 1.63, "grad_norm": 0.5219255685806274, "learning_rate": 0.00025851823230406477, "loss": 1.7024, "step": 49067 }, { "epoch": 1.63, "grad_norm": 0.5354765057563782, "learning_rate": 0.0002585078806135452, "loss": 1.7763, "step": 49068 }, { "epoch": 1.63, "grad_norm": 0.5482393503189087, "learning_rate": 0.00025849752897339097, "loss": 1.6737, "step": 49069 }, { "epoch": 1.63, "grad_norm": 0.5643659234046936, "learning_rate": 0.0002584871773836147, "loss": 1.8352, "step": 49070 }, { "epoch": 1.63, "grad_norm": 0.5444732308387756, "learning_rate": 0.0002584768258442288, "loss": 1.7579, "step": 49071 }, { "epoch": 1.63, "grad_norm": 0.5421769618988037, "learning_rate": 0.00025846647435524595, "loss": 1.6714, "step": 49072 }, { "epoch": 1.63, "grad_norm": 0.5490589737892151, "learning_rate": 0.0002584561229166786, "loss": 1.7611, "step": 49073 }, { "epoch": 1.63, "grad_norm": 0.5210070610046387, "learning_rate": 0.00025844577152853957, "loss": 1.7014, "step": 49074 }, { "epoch": 1.63, "grad_norm": 0.5262601971626282, "learning_rate": 0.0002584354201908411, "loss": 1.7305, "step": 49075 }, { "epoch": 1.63, "grad_norm": 0.5480330586433411, "learning_rate": 0.00025842506890359594, "loss": 1.7776, "step": 49076 }, { "epoch": 1.63, "grad_norm": 0.5581606030464172, "learning_rate": 0.0002584147176668167, "loss": 1.7531, "step": 49077 }, { "epoch": 1.63, "grad_norm": 0.5557419657707214, "learning_rate": 0.0002584043664805158, "loss": 1.8245, "step": 49078 }, { "epoch": 1.63, "grad_norm": 0.528026282787323, "learning_rate": 0.0002583940153447058, "loss": 1.7047, "step": 49079 }, { "epoch": 1.63, "grad_norm": 0.5386764407157898, "learning_rate": 0.0002583836642593995, "loss": 1.7224, "step": 49080 }, { "epoch": 1.63, "grad_norm": 0.5509081482887268, "learning_rate": 0.00025837331322460923, "loss": 1.7413, "step": 49081 }, { "epoch": 1.63, "grad_norm": 0.5645232796669006, "learning_rate": 0.0002583629622403475, "loss": 1.8157, "step": 49082 }, { "epoch": 1.63, "grad_norm": 0.5551270842552185, "learning_rate": 0.0002583526113066271, "loss": 1.7879, "step": 49083 }, { "epoch": 1.63, "grad_norm": 0.5308481454849243, "learning_rate": 0.0002583422604234605, "loss": 1.712, "step": 49084 }, { "epoch": 1.63, "grad_norm": 0.5522768497467041, "learning_rate": 0.00025833190959086025, "loss": 1.7059, "step": 49085 }, { "epoch": 1.63, "grad_norm": 0.5325347781181335, "learning_rate": 0.00025832155880883893, "loss": 1.7966, "step": 49086 }, { "epoch": 1.63, "grad_norm": 0.5361136198043823, "learning_rate": 0.00025831120807740914, "loss": 1.7056, "step": 49087 }, { "epoch": 1.63, "grad_norm": 0.5492683053016663, "learning_rate": 0.00025830085739658335, "loss": 1.7253, "step": 49088 }, { "epoch": 1.63, "grad_norm": 0.5446105599403381, "learning_rate": 0.0002582905067663741, "loss": 1.7736, "step": 49089 }, { "epoch": 1.63, "grad_norm": 0.5460948348045349, "learning_rate": 0.00025828015618679424, "loss": 1.7183, "step": 49090 }, { "epoch": 1.63, "grad_norm": 0.5313975811004639, "learning_rate": 0.00025826980565785596, "loss": 1.7782, "step": 49091 }, { "epoch": 1.63, "grad_norm": 0.5429572463035583, "learning_rate": 0.000258259455179572, "loss": 1.8353, "step": 49092 }, { "epoch": 1.63, "grad_norm": 0.5388311743736267, "learning_rate": 0.0002582491047519549, "loss": 1.817, "step": 49093 }, { "epoch": 1.63, "grad_norm": 0.5288538336753845, "learning_rate": 0.0002582387543750173, "loss": 1.7764, "step": 49094 }, { "epoch": 1.63, "grad_norm": 1.088997483253479, "learning_rate": 0.00025822840404877165, "loss": 1.8382, "step": 49095 }, { "epoch": 1.63, "grad_norm": 0.5363363027572632, "learning_rate": 0.0002582180537732305, "loss": 1.763, "step": 49096 }, { "epoch": 1.63, "grad_norm": 0.5442221164703369, "learning_rate": 0.0002582077035484067, "loss": 1.7278, "step": 49097 }, { "epoch": 1.63, "grad_norm": 0.5311765074729919, "learning_rate": 0.00025819735337431234, "loss": 1.7557, "step": 49098 }, { "epoch": 1.63, "grad_norm": 0.5433215498924255, "learning_rate": 0.0002581870032509603, "loss": 1.792, "step": 49099 }, { "epoch": 1.63, "grad_norm": 0.5360518097877502, "learning_rate": 0.00025817665317836317, "loss": 1.7447, "step": 49100 }, { "epoch": 1.63, "grad_norm": 0.5526825189590454, "learning_rate": 0.00025816630315653335, "loss": 1.8157, "step": 49101 }, { "epoch": 1.63, "grad_norm": 0.5399055480957031, "learning_rate": 0.0002581559531854835, "loss": 1.7018, "step": 49102 }, { "epoch": 1.63, "grad_norm": 0.565046489238739, "learning_rate": 0.00025814560326522607, "loss": 1.6918, "step": 49103 }, { "epoch": 1.63, "grad_norm": 0.5422361493110657, "learning_rate": 0.0002581352533957739, "loss": 1.7693, "step": 49104 }, { "epoch": 1.63, "grad_norm": 0.5499099493026733, "learning_rate": 0.0002581249035771392, "loss": 1.7542, "step": 49105 }, { "epoch": 1.63, "grad_norm": 0.551726758480072, "learning_rate": 0.0002581145538093347, "loss": 1.7128, "step": 49106 }, { "epoch": 1.63, "grad_norm": 0.5472683906555176, "learning_rate": 0.0002581042040923731, "loss": 1.8008, "step": 49107 }, { "epoch": 1.63, "grad_norm": 0.528829038143158, "learning_rate": 0.0002580938544262667, "loss": 1.7543, "step": 49108 }, { "epoch": 1.63, "grad_norm": 0.5613725185394287, "learning_rate": 0.0002580835048110282, "loss": 1.8704, "step": 49109 }, { "epoch": 1.63, "grad_norm": 0.537230908870697, "learning_rate": 0.0002580731552466702, "loss": 1.7736, "step": 49110 }, { "epoch": 1.63, "grad_norm": 0.5483389496803284, "learning_rate": 0.0002580628057332052, "loss": 1.7339, "step": 49111 }, { "epoch": 1.63, "grad_norm": 0.5375376343727112, "learning_rate": 0.00025805245627064566, "loss": 1.7395, "step": 49112 }, { "epoch": 1.63, "grad_norm": 0.5595561861991882, "learning_rate": 0.00025804210685900433, "loss": 1.752, "step": 49113 }, { "epoch": 1.63, "grad_norm": 0.5408113598823547, "learning_rate": 0.0002580317574982938, "loss": 1.758, "step": 49114 }, { "epoch": 1.63, "grad_norm": 0.537180483341217, "learning_rate": 0.0002580214081885264, "loss": 1.8378, "step": 49115 }, { "epoch": 1.63, "grad_norm": 0.5486302971839905, "learning_rate": 0.0002580110589297149, "loss": 1.8045, "step": 49116 }, { "epoch": 1.63, "grad_norm": 0.5525571703910828, "learning_rate": 0.0002580007097218718, "loss": 1.7577, "step": 49117 }, { "epoch": 1.63, "grad_norm": 0.5484912395477295, "learning_rate": 0.00025799036056500964, "loss": 1.7799, "step": 49118 }, { "epoch": 1.63, "grad_norm": 0.521350622177124, "learning_rate": 0.00025798001145914087, "loss": 1.7403, "step": 49119 }, { "epoch": 1.63, "grad_norm": 0.5435423254966736, "learning_rate": 0.0002579696624042784, "loss": 1.8545, "step": 49120 }, { "epoch": 1.63, "grad_norm": 0.5469691753387451, "learning_rate": 0.00025795931340043434, "loss": 1.6443, "step": 49121 }, { "epoch": 1.63, "grad_norm": 0.538282573223114, "learning_rate": 0.00025794896444762157, "loss": 1.6208, "step": 49122 }, { "epoch": 1.63, "grad_norm": 0.5366392731666565, "learning_rate": 0.00025793861554585255, "loss": 1.7791, "step": 49123 }, { "epoch": 1.63, "grad_norm": 0.5487437844276428, "learning_rate": 0.0002579282666951399, "loss": 1.655, "step": 49124 }, { "epoch": 1.63, "grad_norm": 0.526326596736908, "learning_rate": 0.00025791791789549613, "loss": 1.6801, "step": 49125 }, { "epoch": 1.63, "grad_norm": 0.5429714322090149, "learning_rate": 0.0002579075691469337, "loss": 1.7672, "step": 49126 }, { "epoch": 1.63, "grad_norm": 0.5315601229667664, "learning_rate": 0.0002578972204494654, "loss": 1.7237, "step": 49127 }, { "epoch": 1.63, "grad_norm": 0.564799427986145, "learning_rate": 0.00025788687180310364, "loss": 1.8192, "step": 49128 }, { "epoch": 1.63, "grad_norm": 0.550530195236206, "learning_rate": 0.000257876523207861, "loss": 1.7162, "step": 49129 }, { "epoch": 1.63, "grad_norm": 0.5373548269271851, "learning_rate": 0.00025786617466375014, "loss": 1.7922, "step": 49130 }, { "epoch": 1.63, "grad_norm": 0.5401989221572876, "learning_rate": 0.00025785582617078345, "loss": 1.7223, "step": 49131 }, { "epoch": 1.63, "grad_norm": 0.5249917507171631, "learning_rate": 0.00025784547772897355, "loss": 1.7598, "step": 49132 }, { "epoch": 1.63, "grad_norm": 0.5258172750473022, "learning_rate": 0.000257835129338333, "loss": 1.7834, "step": 49133 }, { "epoch": 1.63, "grad_norm": 0.531011164188385, "learning_rate": 0.0002578247809988745, "loss": 1.7446, "step": 49134 }, { "epoch": 1.63, "grad_norm": 0.537300169467926, "learning_rate": 0.0002578144327106105, "loss": 1.7338, "step": 49135 }, { "epoch": 1.63, "grad_norm": 0.5464666485786438, "learning_rate": 0.00025780408447355346, "loss": 1.7808, "step": 49136 }, { "epoch": 1.63, "grad_norm": 0.5269560217857361, "learning_rate": 0.00025779373628771616, "loss": 1.7457, "step": 49137 }, { "epoch": 1.63, "grad_norm": 0.5486922264099121, "learning_rate": 0.000257783388153111, "loss": 1.7082, "step": 49138 }, { "epoch": 1.63, "grad_norm": 0.549737811088562, "learning_rate": 0.0002577730400697505, "loss": 1.817, "step": 49139 }, { "epoch": 1.63, "grad_norm": 0.5516168475151062, "learning_rate": 0.0002577626920376475, "loss": 1.7058, "step": 49140 }, { "epoch": 1.63, "grad_norm": 0.5464110970497131, "learning_rate": 0.00025775234405681416, "loss": 1.7778, "step": 49141 }, { "epoch": 1.63, "grad_norm": 0.5269601345062256, "learning_rate": 0.00025774199612726333, "loss": 1.6874, "step": 49142 }, { "epoch": 1.63, "grad_norm": 0.5342267751693726, "learning_rate": 0.0002577316482490075, "loss": 1.8351, "step": 49143 }, { "epoch": 1.64, "grad_norm": 0.5493230223655701, "learning_rate": 0.0002577213004220592, "loss": 1.7547, "step": 49144 }, { "epoch": 1.64, "grad_norm": 0.5329957604408264, "learning_rate": 0.00025771095264643106, "loss": 1.82, "step": 49145 }, { "epoch": 1.64, "grad_norm": 0.538202166557312, "learning_rate": 0.0002577006049221356, "loss": 1.7135, "step": 49146 }, { "epoch": 1.64, "grad_norm": 0.5265008211135864, "learning_rate": 0.00025769025724918537, "loss": 1.6791, "step": 49147 }, { "epoch": 1.64, "grad_norm": 0.5479215979576111, "learning_rate": 0.00025767990962759284, "loss": 1.772, "step": 49148 }, { "epoch": 1.64, "grad_norm": 0.5444393754005432, "learning_rate": 0.00025766956205737067, "loss": 1.789, "step": 49149 }, { "epoch": 1.64, "grad_norm": 0.5600559711456299, "learning_rate": 0.00025765921453853156, "loss": 1.7625, "step": 49150 }, { "epoch": 1.64, "grad_norm": 0.5628225207328796, "learning_rate": 0.0002576488670710878, "loss": 1.7388, "step": 49151 }, { "epoch": 1.64, "grad_norm": 0.5334570407867432, "learning_rate": 0.00025763851965505215, "loss": 1.7026, "step": 49152 }, { "epoch": 1.64, "grad_norm": 0.5392355918884277, "learning_rate": 0.00025762817229043694, "loss": 1.7482, "step": 49153 }, { "epoch": 1.64, "grad_norm": 0.5313341617584229, "learning_rate": 0.00025761782497725515, "loss": 1.7706, "step": 49154 }, { "epoch": 1.64, "grad_norm": 0.5266003012657166, "learning_rate": 0.00025760747771551886, "loss": 1.8014, "step": 49155 }, { "epoch": 1.64, "grad_norm": 0.5573871731758118, "learning_rate": 0.00025759713050524087, "loss": 1.7847, "step": 49156 }, { "epoch": 1.64, "grad_norm": 0.5570145845413208, "learning_rate": 0.0002575867833464338, "loss": 1.7484, "step": 49157 }, { "epoch": 1.64, "grad_norm": 0.5322079658508301, "learning_rate": 0.0002575764362391101, "loss": 1.756, "step": 49158 }, { "epoch": 1.64, "grad_norm": 0.5313595533370972, "learning_rate": 0.0002575660891832823, "loss": 1.7927, "step": 49159 }, { "epoch": 1.64, "grad_norm": 0.5482138395309448, "learning_rate": 0.000257555742178963, "loss": 1.7408, "step": 49160 }, { "epoch": 1.64, "grad_norm": 0.5296307802200317, "learning_rate": 0.000257545395226165, "loss": 1.7422, "step": 49161 }, { "epoch": 1.64, "grad_norm": 0.5307083129882812, "learning_rate": 0.0002575350483249003, "loss": 1.7747, "step": 49162 }, { "epoch": 1.64, "grad_norm": 0.544134795665741, "learning_rate": 0.000257524701475182, "loss": 1.6846, "step": 49163 }, { "epoch": 1.64, "grad_norm": 0.5226625204086304, "learning_rate": 0.0002575143546770224, "loss": 1.6924, "step": 49164 }, { "epoch": 1.64, "grad_norm": 0.565139651298523, "learning_rate": 0.0002575040079304342, "loss": 1.7723, "step": 49165 }, { "epoch": 1.64, "grad_norm": 0.5289851427078247, "learning_rate": 0.00025749366123542977, "loss": 1.7574, "step": 49166 }, { "epoch": 1.64, "grad_norm": 0.5579072833061218, "learning_rate": 0.00025748331459202184, "loss": 1.8231, "step": 49167 }, { "epoch": 1.64, "grad_norm": 0.5409412980079651, "learning_rate": 0.00025747296800022286, "loss": 1.7996, "step": 49168 }, { "epoch": 1.64, "grad_norm": 0.543795645236969, "learning_rate": 0.0002574626214600454, "loss": 1.7213, "step": 49169 }, { "epoch": 1.64, "grad_norm": 0.5339025259017944, "learning_rate": 0.00025745227497150203, "loss": 1.6936, "step": 49170 }, { "epoch": 1.64, "grad_norm": 0.5427507758140564, "learning_rate": 0.00025744192853460545, "loss": 1.7442, "step": 49171 }, { "epoch": 1.64, "grad_norm": 0.5501576066017151, "learning_rate": 0.000257431582149368, "loss": 1.6903, "step": 49172 }, { "epoch": 1.64, "grad_norm": 0.5498427152633667, "learning_rate": 0.00025742123581580235, "loss": 1.7202, "step": 49173 }, { "epoch": 1.64, "grad_norm": 0.5561266541481018, "learning_rate": 0.00025741088953392115, "loss": 1.7413, "step": 49174 }, { "epoch": 1.64, "grad_norm": 0.5299413204193115, "learning_rate": 0.00025740054330373677, "loss": 1.7058, "step": 49175 }, { "epoch": 1.64, "grad_norm": 1.1969531774520874, "learning_rate": 0.0002573901971252618, "loss": 1.7621, "step": 49176 }, { "epoch": 1.64, "grad_norm": 0.5537351965904236, "learning_rate": 0.000257379850998509, "loss": 1.6592, "step": 49177 }, { "epoch": 1.64, "grad_norm": 0.5587192177772522, "learning_rate": 0.00025736950492349063, "loss": 1.8178, "step": 49178 }, { "epoch": 1.64, "grad_norm": 0.5341238379478455, "learning_rate": 0.0002573591589002194, "loss": 1.6289, "step": 49179 }, { "epoch": 1.64, "grad_norm": 0.5509433150291443, "learning_rate": 0.00025734881292870793, "loss": 1.7561, "step": 49180 }, { "epoch": 1.64, "grad_norm": 0.5389958620071411, "learning_rate": 0.00025733846700896874, "loss": 1.7749, "step": 49181 }, { "epoch": 1.64, "grad_norm": 0.5376227498054504, "learning_rate": 0.00025732812114101435, "loss": 1.7263, "step": 49182 }, { "epoch": 1.64, "grad_norm": 0.5599076747894287, "learning_rate": 0.00025731777532485723, "loss": 1.8288, "step": 49183 }, { "epoch": 1.64, "grad_norm": 0.5838945508003235, "learning_rate": 0.0002573074295605103, "loss": 1.7919, "step": 49184 }, { "epoch": 1.64, "grad_norm": 0.5576805472373962, "learning_rate": 0.0002572970838479856, "loss": 1.6877, "step": 49185 }, { "epoch": 1.64, "grad_norm": 0.5357052087783813, "learning_rate": 0.00025728673818729603, "loss": 1.7243, "step": 49186 }, { "epoch": 1.64, "grad_norm": 0.5678180456161499, "learning_rate": 0.00025727639257845414, "loss": 1.7743, "step": 49187 }, { "epoch": 1.64, "grad_norm": 0.5750439763069153, "learning_rate": 0.00025726604702147235, "loss": 1.6875, "step": 49188 }, { "epoch": 1.64, "grad_norm": 0.5666033625602722, "learning_rate": 0.00025725570151636324, "loss": 1.7903, "step": 49189 }, { "epoch": 1.64, "grad_norm": 0.541536808013916, "learning_rate": 0.0002572453560631394, "loss": 1.7427, "step": 49190 }, { "epoch": 1.64, "grad_norm": 0.5591951608657837, "learning_rate": 0.0002572350106618136, "loss": 1.7164, "step": 49191 }, { "epoch": 1.64, "grad_norm": 0.5444297790527344, "learning_rate": 0.00025722466531239796, "loss": 1.7695, "step": 49192 }, { "epoch": 1.64, "grad_norm": 0.5529288053512573, "learning_rate": 0.00025721432001490533, "loss": 1.7336, "step": 49193 }, { "epoch": 1.64, "grad_norm": 0.5558069348335266, "learning_rate": 0.0002572039747693483, "loss": 1.7378, "step": 49194 }, { "epoch": 1.64, "grad_norm": 0.5343552231788635, "learning_rate": 0.00025719362957573927, "loss": 1.7006, "step": 49195 }, { "epoch": 1.64, "grad_norm": 0.5333411693572998, "learning_rate": 0.0002571832844340909, "loss": 1.8238, "step": 49196 }, { "epoch": 1.64, "grad_norm": 0.5380187034606934, "learning_rate": 0.00025717293934441574, "loss": 1.8453, "step": 49197 }, { "epoch": 1.64, "grad_norm": 0.5344347357749939, "learning_rate": 0.00025716259430672624, "loss": 1.744, "step": 49198 }, { "epoch": 1.64, "grad_norm": 0.5546997785568237, "learning_rate": 0.000257152249321035, "loss": 1.7192, "step": 49199 }, { "epoch": 1.64, "grad_norm": 0.542750358581543, "learning_rate": 0.0002571419043873547, "loss": 1.7838, "step": 49200 }, { "epoch": 1.64, "grad_norm": 0.5458711981773376, "learning_rate": 0.0002571315595056978, "loss": 1.6737, "step": 49201 }, { "epoch": 1.64, "grad_norm": 0.5264208912849426, "learning_rate": 0.0002571212146760769, "loss": 1.7236, "step": 49202 }, { "epoch": 1.64, "grad_norm": 1.157969355583191, "learning_rate": 0.0002571108698985045, "loss": 1.7487, "step": 49203 }, { "epoch": 1.64, "grad_norm": 0.567874014377594, "learning_rate": 0.0002571005251729932, "loss": 1.7374, "step": 49204 }, { "epoch": 1.64, "grad_norm": 0.5443356037139893, "learning_rate": 0.00025709018049955554, "loss": 1.801, "step": 49205 }, { "epoch": 1.64, "grad_norm": 0.5302780866622925, "learning_rate": 0.000257079835878204, "loss": 1.6742, "step": 49206 }, { "epoch": 1.64, "grad_norm": 0.5659183263778687, "learning_rate": 0.0002570694913089514, "loss": 1.7817, "step": 49207 }, { "epoch": 1.64, "grad_norm": 0.5356218218803406, "learning_rate": 0.0002570591467918099, "loss": 1.7327, "step": 49208 }, { "epoch": 1.64, "grad_norm": 0.5332604050636292, "learning_rate": 0.00025704880232679236, "loss": 1.7355, "step": 49209 }, { "epoch": 1.64, "grad_norm": 0.5366853475570679, "learning_rate": 0.00025703845791391123, "loss": 1.7398, "step": 49210 }, { "epoch": 1.64, "grad_norm": 0.5394409894943237, "learning_rate": 0.00025702811355317914, "loss": 1.7485, "step": 49211 }, { "epoch": 1.64, "grad_norm": 0.5681442618370056, "learning_rate": 0.00025701776924460856, "loss": 1.7971, "step": 49212 }, { "epoch": 1.64, "grad_norm": 0.5710291266441345, "learning_rate": 0.000257007424988212, "loss": 1.8262, "step": 49213 }, { "epoch": 1.64, "grad_norm": 0.5373475551605225, "learning_rate": 0.00025699708078400223, "loss": 1.7792, "step": 49214 }, { "epoch": 1.64, "grad_norm": 0.542137622833252, "learning_rate": 0.0002569867366319915, "loss": 1.7642, "step": 49215 }, { "epoch": 1.64, "grad_norm": 0.5345595479011536, "learning_rate": 0.00025697639253219264, "loss": 1.7698, "step": 49216 }, { "epoch": 1.64, "grad_norm": 0.5548030734062195, "learning_rate": 0.00025696604848461814, "loss": 1.6933, "step": 49217 }, { "epoch": 1.64, "grad_norm": 0.5463208556175232, "learning_rate": 0.00025695570448928045, "loss": 1.7617, "step": 49218 }, { "epoch": 1.64, "grad_norm": 0.5406075716018677, "learning_rate": 0.0002569453605461922, "loss": 1.7626, "step": 49219 }, { "epoch": 1.64, "grad_norm": 0.5316624045372009, "learning_rate": 0.00025693501665536585, "loss": 1.6968, "step": 49220 }, { "epoch": 1.64, "grad_norm": 0.5490003228187561, "learning_rate": 0.00025692467281681426, "loss": 1.7534, "step": 49221 }, { "epoch": 1.64, "grad_norm": 0.5672895908355713, "learning_rate": 0.0002569143290305496, "loss": 1.7688, "step": 49222 }, { "epoch": 1.64, "grad_norm": 0.5320770144462585, "learning_rate": 0.0002569039852965846, "loss": 1.7474, "step": 49223 }, { "epoch": 1.64, "grad_norm": 0.5584635734558105, "learning_rate": 0.00025689364161493193, "loss": 1.6631, "step": 49224 }, { "epoch": 1.64, "grad_norm": 0.5247564315795898, "learning_rate": 0.0002568832979856039, "loss": 1.6556, "step": 49225 }, { "epoch": 1.64, "grad_norm": 0.5527567863464355, "learning_rate": 0.0002568729544086133, "loss": 1.8261, "step": 49226 }, { "epoch": 1.64, "grad_norm": 0.5753876566886902, "learning_rate": 0.00025686261088397254, "loss": 1.708, "step": 49227 }, { "epoch": 1.64, "grad_norm": 0.5544824600219727, "learning_rate": 0.0002568522674116942, "loss": 1.7288, "step": 49228 }, { "epoch": 1.64, "grad_norm": 0.5481822490692139, "learning_rate": 0.0002568419239917908, "loss": 1.8319, "step": 49229 }, { "epoch": 1.64, "grad_norm": 0.5371811985969543, "learning_rate": 0.00025683158062427496, "loss": 1.6701, "step": 49230 }, { "epoch": 1.64, "grad_norm": 0.5308235883712769, "learning_rate": 0.0002568212373091593, "loss": 1.72, "step": 49231 }, { "epoch": 1.64, "grad_norm": 0.5440366864204407, "learning_rate": 0.0002568108940464563, "loss": 1.7362, "step": 49232 }, { "epoch": 1.64, "grad_norm": 0.5628151297569275, "learning_rate": 0.00025680055083617844, "loss": 1.7375, "step": 49233 }, { "epoch": 1.64, "grad_norm": 0.5362452268600464, "learning_rate": 0.0002567902076783384, "loss": 1.7293, "step": 49234 }, { "epoch": 1.64, "grad_norm": 0.5376772880554199, "learning_rate": 0.0002567798645729486, "loss": 1.7294, "step": 49235 }, { "epoch": 1.64, "grad_norm": 0.5423441529273987, "learning_rate": 0.0002567695215200217, "loss": 1.7092, "step": 49236 }, { "epoch": 1.64, "grad_norm": 0.5368932485580444, "learning_rate": 0.0002567591785195703, "loss": 1.7193, "step": 49237 }, { "epoch": 1.64, "grad_norm": 0.5505926012992859, "learning_rate": 0.00025674883557160686, "loss": 1.7338, "step": 49238 }, { "epoch": 1.64, "grad_norm": 0.5489392876625061, "learning_rate": 0.00025673849267614387, "loss": 1.8007, "step": 49239 }, { "epoch": 1.64, "grad_norm": 0.5461257696151733, "learning_rate": 0.00025672814983319404, "loss": 1.7371, "step": 49240 }, { "epoch": 1.64, "grad_norm": 0.5461920499801636, "learning_rate": 0.00025671780704276996, "loss": 1.8026, "step": 49241 }, { "epoch": 1.64, "grad_norm": 0.5281930565834045, "learning_rate": 0.000256707464304884, "loss": 1.6989, "step": 49242 }, { "epoch": 1.64, "grad_norm": 0.5697787404060364, "learning_rate": 0.0002566971216195487, "loss": 1.9107, "step": 49243 }, { "epoch": 1.64, "grad_norm": 0.553686797618866, "learning_rate": 0.00025668677898677683, "loss": 1.7729, "step": 49244 }, { "epoch": 1.64, "grad_norm": 0.5295992493629456, "learning_rate": 0.0002566764364065808, "loss": 1.719, "step": 49245 }, { "epoch": 1.64, "grad_norm": 0.5411692261695862, "learning_rate": 0.0002566660938789732, "loss": 1.6673, "step": 49246 }, { "epoch": 1.64, "grad_norm": 0.5161774754524231, "learning_rate": 0.0002566557514039666, "loss": 1.6934, "step": 49247 }, { "epoch": 1.64, "grad_norm": 0.5319823026657104, "learning_rate": 0.00025664540898157355, "loss": 1.7782, "step": 49248 }, { "epoch": 1.64, "grad_norm": 0.5342005491256714, "learning_rate": 0.0002566350666118065, "loss": 1.7573, "step": 49249 }, { "epoch": 1.64, "grad_norm": 0.5525446534156799, "learning_rate": 0.0002566247242946781, "loss": 1.7727, "step": 49250 }, { "epoch": 1.64, "grad_norm": 0.5541468262672424, "learning_rate": 0.00025661438203020094, "loss": 1.7112, "step": 49251 }, { "epoch": 1.64, "grad_norm": 0.5486128330230713, "learning_rate": 0.00025660403981838753, "loss": 1.7266, "step": 49252 }, { "epoch": 1.64, "grad_norm": 0.52260822057724, "learning_rate": 0.00025659369765925036, "loss": 1.7771, "step": 49253 }, { "epoch": 1.64, "grad_norm": 0.5684926509857178, "learning_rate": 0.00025658335555280216, "loss": 1.7385, "step": 49254 }, { "epoch": 1.64, "grad_norm": 0.534728467464447, "learning_rate": 0.0002565730134990553, "loss": 1.7255, "step": 49255 }, { "epoch": 1.64, "grad_norm": 0.5344809293746948, "learning_rate": 0.0002565626714980224, "loss": 1.7894, "step": 49256 }, { "epoch": 1.64, "grad_norm": 0.5451698303222656, "learning_rate": 0.0002565523295497161, "loss": 1.6749, "step": 49257 }, { "epoch": 1.64, "grad_norm": 0.5304238796234131, "learning_rate": 0.0002565419876541487, "loss": 1.787, "step": 49258 }, { "epoch": 1.64, "grad_norm": 0.5235358476638794, "learning_rate": 0.00025653164581133296, "loss": 1.7652, "step": 49259 }, { "epoch": 1.64, "grad_norm": 0.5503261089324951, "learning_rate": 0.00025652130402128146, "loss": 1.764, "step": 49260 }, { "epoch": 1.64, "grad_norm": 0.5765963792800903, "learning_rate": 0.0002565109622840067, "loss": 1.7957, "step": 49261 }, { "epoch": 1.64, "grad_norm": 0.5422433018684387, "learning_rate": 0.0002565006205995212, "loss": 1.7383, "step": 49262 }, { "epoch": 1.64, "grad_norm": 0.5308505892753601, "learning_rate": 0.0002564902789678375, "loss": 1.7293, "step": 49263 }, { "epoch": 1.64, "grad_norm": 0.9675066471099854, "learning_rate": 0.00025647993738896836, "loss": 1.7093, "step": 49264 }, { "epoch": 1.64, "grad_norm": 0.5394914746284485, "learning_rate": 0.00025646959586292595, "loss": 1.7729, "step": 49265 }, { "epoch": 1.64, "grad_norm": 0.5420227646827698, "learning_rate": 0.0002564592543897231, "loss": 1.7175, "step": 49266 }, { "epoch": 1.64, "grad_norm": 0.5616528987884521, "learning_rate": 0.0002564489129693724, "loss": 1.8326, "step": 49267 }, { "epoch": 1.64, "grad_norm": 0.536880612373352, "learning_rate": 0.0002564385716018862, "loss": 1.7403, "step": 49268 }, { "epoch": 1.64, "grad_norm": 0.5489515662193298, "learning_rate": 0.00025642823028727715, "loss": 1.7508, "step": 49269 }, { "epoch": 1.64, "grad_norm": 0.5563256740570068, "learning_rate": 0.0002564178890255578, "loss": 1.7427, "step": 49270 }, { "epoch": 1.64, "grad_norm": 0.5321853160858154, "learning_rate": 0.00025640754781674086, "loss": 1.7108, "step": 49271 }, { "epoch": 1.64, "grad_norm": 0.5491877794265747, "learning_rate": 0.0002563972066608385, "loss": 1.7544, "step": 49272 }, { "epoch": 1.64, "grad_norm": 0.5359297394752502, "learning_rate": 0.00025638686555786366, "loss": 1.7071, "step": 49273 }, { "epoch": 1.64, "grad_norm": 0.5742567777633667, "learning_rate": 0.00025637652450782877, "loss": 1.8028, "step": 49274 }, { "epoch": 1.64, "grad_norm": 0.5325215458869934, "learning_rate": 0.0002563661835107463, "loss": 1.8018, "step": 49275 }, { "epoch": 1.64, "grad_norm": 0.5455120205879211, "learning_rate": 0.0002563558425666288, "loss": 1.7724, "step": 49276 }, { "epoch": 1.64, "grad_norm": 0.5622637271881104, "learning_rate": 0.00025634550167548886, "loss": 1.7725, "step": 49277 }, { "epoch": 1.64, "grad_norm": 0.5703408122062683, "learning_rate": 0.0002563351608373392, "loss": 1.7618, "step": 49278 }, { "epoch": 1.64, "grad_norm": 0.5358994603157043, "learning_rate": 0.0002563248200521921, "loss": 1.7225, "step": 49279 }, { "epoch": 1.64, "grad_norm": 0.5565894246101379, "learning_rate": 0.00025631447932006026, "loss": 1.777, "step": 49280 }, { "epoch": 1.64, "grad_norm": 0.5447960495948792, "learning_rate": 0.00025630413864095625, "loss": 1.7983, "step": 49281 }, { "epoch": 1.64, "grad_norm": 0.5552036166191101, "learning_rate": 0.00025629379801489255, "loss": 1.7948, "step": 49282 }, { "epoch": 1.64, "grad_norm": 0.5478917956352234, "learning_rate": 0.00025628345744188174, "loss": 1.82, "step": 49283 }, { "epoch": 1.64, "grad_norm": 0.5400577783584595, "learning_rate": 0.0002562731169219364, "loss": 1.6573, "step": 49284 }, { "epoch": 1.64, "grad_norm": 0.5315210223197937, "learning_rate": 0.00025626277645506896, "loss": 1.7736, "step": 49285 }, { "epoch": 1.64, "grad_norm": 0.5627912878990173, "learning_rate": 0.0002562524360412921, "loss": 1.7636, "step": 49286 }, { "epoch": 1.64, "grad_norm": 0.5512555837631226, "learning_rate": 0.0002562420956806183, "loss": 1.7314, "step": 49287 }, { "epoch": 1.64, "grad_norm": 0.5440077781677246, "learning_rate": 0.00025623175537306026, "loss": 1.7831, "step": 49288 }, { "epoch": 1.64, "grad_norm": 0.5711111426353455, "learning_rate": 0.00025622141511863033, "loss": 1.734, "step": 49289 }, { "epoch": 1.64, "grad_norm": 0.5321996212005615, "learning_rate": 0.0002562110749173412, "loss": 1.7132, "step": 49290 }, { "epoch": 1.64, "grad_norm": 0.5516499876976013, "learning_rate": 0.00025620073476920543, "loss": 1.7237, "step": 49291 }, { "epoch": 1.64, "grad_norm": 0.5478395223617554, "learning_rate": 0.00025619039467423546, "loss": 1.6833, "step": 49292 }, { "epoch": 1.64, "grad_norm": 0.5417483448982239, "learning_rate": 0.00025618005463244375, "loss": 1.8205, "step": 49293 }, { "epoch": 1.64, "grad_norm": 0.5358104109764099, "learning_rate": 0.00025616971464384323, "loss": 1.6541, "step": 49294 }, { "epoch": 1.64, "grad_norm": 0.5474933385848999, "learning_rate": 0.0002561593747084461, "loss": 1.6452, "step": 49295 }, { "epoch": 1.64, "grad_norm": 0.5586243867874146, "learning_rate": 0.00025614903482626503, "loss": 1.7509, "step": 49296 }, { "epoch": 1.64, "grad_norm": 0.5437058210372925, "learning_rate": 0.0002561386949973125, "loss": 1.7384, "step": 49297 }, { "epoch": 1.64, "grad_norm": 0.5481588244438171, "learning_rate": 0.0002561283552216013, "loss": 1.7205, "step": 49298 }, { "epoch": 1.64, "grad_norm": 0.5684506297111511, "learning_rate": 0.00025611801549914375, "loss": 1.7457, "step": 49299 }, { "epoch": 1.64, "grad_norm": 0.5250623822212219, "learning_rate": 0.0002561076758299523, "loss": 1.7615, "step": 49300 }, { "epoch": 1.64, "grad_norm": 0.5247191190719604, "learning_rate": 0.00025609733621403995, "loss": 1.7646, "step": 49301 }, { "epoch": 1.64, "grad_norm": 0.5439404249191284, "learning_rate": 0.00025608699665141877, "loss": 1.7567, "step": 49302 }, { "epoch": 1.64, "grad_norm": 0.7418648600578308, "learning_rate": 0.0002560766571421015, "loss": 1.7151, "step": 49303 }, { "epoch": 1.64, "grad_norm": 0.5492781400680542, "learning_rate": 0.00025606631768610084, "loss": 1.8055, "step": 49304 }, { "epoch": 1.64, "grad_norm": 0.5250674486160278, "learning_rate": 0.00025605597828342913, "loss": 1.8051, "step": 49305 }, { "epoch": 1.64, "grad_norm": 0.5556349158287048, "learning_rate": 0.0002560456389340989, "loss": 1.8145, "step": 49306 }, { "epoch": 1.64, "grad_norm": 0.5519179701805115, "learning_rate": 0.0002560352996381228, "loss": 1.7804, "step": 49307 }, { "epoch": 1.64, "grad_norm": 0.550944447517395, "learning_rate": 0.0002560249603955136, "loss": 1.8236, "step": 49308 }, { "epoch": 1.64, "grad_norm": 0.5490378737449646, "learning_rate": 0.00025601462120628335, "loss": 1.7733, "step": 49309 }, { "epoch": 1.64, "grad_norm": 0.5421828627586365, "learning_rate": 0.0002560042820704449, "loss": 1.7283, "step": 49310 }, { "epoch": 1.64, "grad_norm": 0.5327959060668945, "learning_rate": 0.0002559939429880109, "loss": 1.7526, "step": 49311 }, { "epoch": 1.64, "grad_norm": 0.5704368352890015, "learning_rate": 0.0002559836039589937, "loss": 1.7113, "step": 49312 }, { "epoch": 1.64, "grad_norm": 0.534633457660675, "learning_rate": 0.0002559732649834059, "loss": 1.7831, "step": 49313 }, { "epoch": 1.64, "grad_norm": 0.5461790561676025, "learning_rate": 0.00025596292606126016, "loss": 1.7151, "step": 49314 }, { "epoch": 1.64, "grad_norm": 0.5443183183670044, "learning_rate": 0.00025595258719256887, "loss": 1.699, "step": 49315 }, { "epoch": 1.64, "grad_norm": 0.53708416223526, "learning_rate": 0.0002559422483773445, "loss": 1.7912, "step": 49316 }, { "epoch": 1.64, "grad_norm": 0.536282479763031, "learning_rate": 0.0002559319096155999, "loss": 1.7603, "step": 49317 }, { "epoch": 1.64, "grad_norm": 0.5711551904678345, "learning_rate": 0.00025592157090734754, "loss": 1.8722, "step": 49318 }, { "epoch": 1.64, "grad_norm": 0.5365567207336426, "learning_rate": 0.0002559112322525998, "loss": 1.6806, "step": 49319 }, { "epoch": 1.64, "grad_norm": 0.5496534705162048, "learning_rate": 0.00025590089365136936, "loss": 1.7316, "step": 49320 }, { "epoch": 1.64, "grad_norm": 0.5623407959938049, "learning_rate": 0.00025589055510366875, "loss": 1.8004, "step": 49321 }, { "epoch": 1.64, "grad_norm": 0.5325066447257996, "learning_rate": 0.0002558802166095105, "loss": 1.7453, "step": 49322 }, { "epoch": 1.64, "grad_norm": 0.5362565517425537, "learning_rate": 0.00025586987816890704, "loss": 1.6936, "step": 49323 }, { "epoch": 1.64, "grad_norm": 0.5461650490760803, "learning_rate": 0.0002558595397818713, "loss": 1.7797, "step": 49324 }, { "epoch": 1.64, "grad_norm": 0.5182777047157288, "learning_rate": 0.00025584920144841535, "loss": 1.7828, "step": 49325 }, { "epoch": 1.64, "grad_norm": 0.549720823764801, "learning_rate": 0.0002558388631685521, "loss": 1.7739, "step": 49326 }, { "epoch": 1.64, "grad_norm": 0.5326070785522461, "learning_rate": 0.00025582852494229383, "loss": 1.7653, "step": 49327 }, { "epoch": 1.64, "grad_norm": 0.5337206721305847, "learning_rate": 0.0002558181867696534, "loss": 1.7366, "step": 49328 }, { "epoch": 1.64, "grad_norm": 0.538724958896637, "learning_rate": 0.00025580784865064307, "loss": 1.7188, "step": 49329 }, { "epoch": 1.64, "grad_norm": 0.5989712476730347, "learning_rate": 0.00025579751058527544, "loss": 1.7336, "step": 49330 }, { "epoch": 1.64, "grad_norm": 0.5341804027557373, "learning_rate": 0.0002557871725735633, "loss": 1.7794, "step": 49331 }, { "epoch": 1.64, "grad_norm": 0.5379711389541626, "learning_rate": 0.0002557768346155188, "loss": 1.8142, "step": 49332 }, { "epoch": 1.64, "grad_norm": 0.5270790457725525, "learning_rate": 0.00025576649671115484, "loss": 1.8115, "step": 49333 }, { "epoch": 1.64, "grad_norm": 0.5339459180831909, "learning_rate": 0.00025575615886048385, "loss": 1.7937, "step": 49334 }, { "epoch": 1.64, "grad_norm": 0.5398044586181641, "learning_rate": 0.0002557458210635183, "loss": 1.701, "step": 49335 }, { "epoch": 1.64, "grad_norm": 0.5679370760917664, "learning_rate": 0.00025573548332027084, "loss": 1.7798, "step": 49336 }, { "epoch": 1.64, "grad_norm": 0.5335693955421448, "learning_rate": 0.00025572514563075387, "loss": 1.7959, "step": 49337 }, { "epoch": 1.64, "grad_norm": 0.5523864030838013, "learning_rate": 0.00025571480799498026, "loss": 1.7587, "step": 49338 }, { "epoch": 1.64, "grad_norm": 0.5226338505744934, "learning_rate": 0.0002557044704129622, "loss": 1.7988, "step": 49339 }, { "epoch": 1.64, "grad_norm": 0.5211270451545715, "learning_rate": 0.0002556941328847124, "loss": 1.7954, "step": 49340 }, { "epoch": 1.64, "grad_norm": 0.5271915197372437, "learning_rate": 0.00025568379541024346, "loss": 1.7088, "step": 49341 }, { "epoch": 1.64, "grad_norm": 0.5420148968696594, "learning_rate": 0.0002556734579895678, "loss": 1.7508, "step": 49342 }, { "epoch": 1.64, "grad_norm": 0.5476983189582825, "learning_rate": 0.000255663120622698, "loss": 1.7643, "step": 49343 }, { "epoch": 1.64, "grad_norm": 0.5424268245697021, "learning_rate": 0.00025565278330964684, "loss": 1.7459, "step": 49344 }, { "epoch": 1.64, "grad_norm": 0.5493574738502502, "learning_rate": 0.00025564244605042646, "loss": 1.7582, "step": 49345 }, { "epoch": 1.64, "grad_norm": 0.6187958121299744, "learning_rate": 0.0002556321088450496, "loss": 1.7995, "step": 49346 }, { "epoch": 1.64, "grad_norm": 0.5387786030769348, "learning_rate": 0.0002556217716935289, "loss": 1.783, "step": 49347 }, { "epoch": 1.64, "grad_norm": 0.5360535979270935, "learning_rate": 0.00025561143459587685, "loss": 1.7407, "step": 49348 }, { "epoch": 1.64, "grad_norm": 0.5615230202674866, "learning_rate": 0.000255601097552106, "loss": 1.7419, "step": 49349 }, { "epoch": 1.64, "grad_norm": 0.5482209324836731, "learning_rate": 0.0002555907605622288, "loss": 1.8028, "step": 49350 }, { "epoch": 1.64, "grad_norm": 0.5678106546401978, "learning_rate": 0.0002555804236262579, "loss": 1.7945, "step": 49351 }, { "epoch": 1.64, "grad_norm": 0.5169340968132019, "learning_rate": 0.00025557008674420583, "loss": 1.7435, "step": 49352 }, { "epoch": 1.64, "grad_norm": 0.5219016075134277, "learning_rate": 0.00025555974991608505, "loss": 1.7316, "step": 49353 }, { "epoch": 1.64, "grad_norm": 0.5641347765922546, "learning_rate": 0.0002555494131419083, "loss": 1.7888, "step": 49354 }, { "epoch": 1.64, "grad_norm": 0.5421176552772522, "learning_rate": 0.000255539076421688, "loss": 1.7279, "step": 49355 }, { "epoch": 1.64, "grad_norm": 0.525688886642456, "learning_rate": 0.00025552873975543663, "loss": 1.6973, "step": 49356 }, { "epoch": 1.64, "grad_norm": 0.5290632843971252, "learning_rate": 0.0002555184031431669, "loss": 1.784, "step": 49357 }, { "epoch": 1.64, "grad_norm": 0.5663846731185913, "learning_rate": 0.0002555080665848913, "loss": 1.7526, "step": 49358 }, { "epoch": 1.64, "grad_norm": 0.5367435812950134, "learning_rate": 0.00025549773008062224, "loss": 1.7989, "step": 49359 }, { "epoch": 1.64, "grad_norm": 0.5397224426269531, "learning_rate": 0.00025548739363037237, "loss": 1.7768, "step": 49360 }, { "epoch": 1.64, "grad_norm": 0.5436508655548096, "learning_rate": 0.00025547705723415437, "loss": 1.7285, "step": 49361 }, { "epoch": 1.64, "grad_norm": 0.5248621702194214, "learning_rate": 0.0002554667208919806, "loss": 1.7396, "step": 49362 }, { "epoch": 1.64, "grad_norm": 0.5241515040397644, "learning_rate": 0.00025545638460386366, "loss": 1.7728, "step": 49363 }, { "epoch": 1.64, "grad_norm": 0.5459583401679993, "learning_rate": 0.0002554460483698161, "loss": 1.7196, "step": 49364 }, { "epoch": 1.64, "grad_norm": 0.5522176027297974, "learning_rate": 0.00025543571218985053, "loss": 1.7913, "step": 49365 }, { "epoch": 1.64, "grad_norm": 0.5281074047088623, "learning_rate": 0.00025542537606397937, "loss": 1.8494, "step": 49366 }, { "epoch": 1.64, "grad_norm": 0.5454760193824768, "learning_rate": 0.0002554150399922152, "loss": 1.733, "step": 49367 }, { "epoch": 1.64, "grad_norm": 0.5505645275115967, "learning_rate": 0.00025540470397457073, "loss": 1.6697, "step": 49368 }, { "epoch": 1.64, "grad_norm": 0.5347238779067993, "learning_rate": 0.0002553943680110583, "loss": 1.7047, "step": 49369 }, { "epoch": 1.64, "grad_norm": 0.5316988229751587, "learning_rate": 0.00025538403210169055, "loss": 1.7729, "step": 49370 }, { "epoch": 1.64, "grad_norm": 0.5266970992088318, "learning_rate": 0.00025537369624648005, "loss": 1.7386, "step": 49371 }, { "epoch": 1.64, "grad_norm": 0.5343706607818604, "learning_rate": 0.00025536336044543927, "loss": 1.6891, "step": 49372 }, { "epoch": 1.64, "grad_norm": 0.5477913022041321, "learning_rate": 0.0002553530246985808, "loss": 1.8303, "step": 49373 }, { "epoch": 1.64, "grad_norm": 0.5308353900909424, "learning_rate": 0.0002553426890059172, "loss": 1.7471, "step": 49374 }, { "epoch": 1.64, "grad_norm": 0.5519259572029114, "learning_rate": 0.0002553323533674609, "loss": 1.7516, "step": 49375 }, { "epoch": 1.64, "grad_norm": 0.5660990476608276, "learning_rate": 0.0002553220177832246, "loss": 1.6901, "step": 49376 }, { "epoch": 1.64, "grad_norm": 0.5573843121528625, "learning_rate": 0.00025531168225322075, "loss": 1.7389, "step": 49377 }, { "epoch": 1.64, "grad_norm": 0.5370662808418274, "learning_rate": 0.00025530134677746204, "loss": 1.7588, "step": 49378 }, { "epoch": 1.64, "grad_norm": 0.5291873216629028, "learning_rate": 0.00025529101135596087, "loss": 1.7021, "step": 49379 }, { "epoch": 1.64, "grad_norm": 0.5472822189331055, "learning_rate": 0.0002552806759887297, "loss": 1.7406, "step": 49380 }, { "epoch": 1.64, "grad_norm": 0.5472683310508728, "learning_rate": 0.0002552703406757814, "loss": 1.8133, "step": 49381 }, { "epoch": 1.64, "grad_norm": 0.5322839021682739, "learning_rate": 0.00025526000541712816, "loss": 1.7012, "step": 49382 }, { "epoch": 1.64, "grad_norm": 0.5174828767776489, "learning_rate": 0.0002552496702127827, "loss": 1.675, "step": 49383 }, { "epoch": 1.64, "grad_norm": 0.5706241130828857, "learning_rate": 0.0002552393350627577, "loss": 1.7385, "step": 49384 }, { "epoch": 1.64, "grad_norm": 0.5378623604774475, "learning_rate": 0.0002552289999670654, "loss": 1.7634, "step": 49385 }, { "epoch": 1.64, "grad_norm": 0.528601884841919, "learning_rate": 0.0002552186649257185, "loss": 1.7324, "step": 49386 }, { "epoch": 1.64, "grad_norm": 0.5117399096488953, "learning_rate": 0.0002552083299387295, "loss": 1.768, "step": 49387 }, { "epoch": 1.64, "grad_norm": 0.5396540760993958, "learning_rate": 0.0002551979950061112, "loss": 1.7208, "step": 49388 }, { "epoch": 1.64, "grad_norm": 0.5495365262031555, "learning_rate": 0.00025518766012787574, "loss": 1.7939, "step": 49389 }, { "epoch": 1.64, "grad_norm": 0.5312873721122742, "learning_rate": 0.00025517732530403587, "loss": 1.7615, "step": 49390 }, { "epoch": 1.64, "grad_norm": 0.535478413105011, "learning_rate": 0.00025516699053460424, "loss": 1.6959, "step": 49391 }, { "epoch": 1.64, "grad_norm": 0.506764829158783, "learning_rate": 0.00025515665581959317, "loss": 1.736, "step": 49392 }, { "epoch": 1.64, "grad_norm": 0.5414061546325684, "learning_rate": 0.0002551463211590154, "loss": 1.7434, "step": 49393 }, { "epoch": 1.64, "grad_norm": 0.5397012233734131, "learning_rate": 0.0002551359865528832, "loss": 1.6798, "step": 49394 }, { "epoch": 1.64, "grad_norm": 0.5261090397834778, "learning_rate": 0.00025512565200120957, "loss": 1.7129, "step": 49395 }, { "epoch": 1.64, "grad_norm": 0.5428756475448608, "learning_rate": 0.0002551153175040066, "loss": 1.7766, "step": 49396 }, { "epoch": 1.64, "grad_norm": 0.5144922733306885, "learning_rate": 0.0002551049830612871, "loss": 1.7675, "step": 49397 }, { "epoch": 1.64, "grad_norm": 0.5607855916023254, "learning_rate": 0.00025509464867306354, "loss": 1.7001, "step": 49398 }, { "epoch": 1.64, "grad_norm": 0.5581629872322083, "learning_rate": 0.00025508431433934843, "loss": 1.7581, "step": 49399 }, { "epoch": 1.64, "grad_norm": 0.5545172095298767, "learning_rate": 0.0002550739800601543, "loss": 1.7445, "step": 49400 }, { "epoch": 1.64, "grad_norm": 0.5547675490379333, "learning_rate": 0.00025506364583549384, "loss": 1.7374, "step": 49401 }, { "epoch": 1.64, "grad_norm": 0.5720188617706299, "learning_rate": 0.0002550533116653794, "loss": 1.7853, "step": 49402 }, { "epoch": 1.64, "grad_norm": 0.5621891021728516, "learning_rate": 0.0002550429775498236, "loss": 1.791, "step": 49403 }, { "epoch": 1.64, "grad_norm": 0.5218554735183716, "learning_rate": 0.00025503264348883907, "loss": 1.7845, "step": 49404 }, { "epoch": 1.64, "grad_norm": 0.5375432968139648, "learning_rate": 0.0002550223094824383, "loss": 1.819, "step": 49405 }, { "epoch": 1.64, "grad_norm": 0.5655199289321899, "learning_rate": 0.0002550119755306338, "loss": 1.7761, "step": 49406 }, { "epoch": 1.64, "grad_norm": 0.5499343872070312, "learning_rate": 0.0002550016416334381, "loss": 1.7645, "step": 49407 }, { "epoch": 1.64, "grad_norm": 0.5219524502754211, "learning_rate": 0.00025499130779086385, "loss": 1.7954, "step": 49408 }, { "epoch": 1.64, "grad_norm": 0.5197061896324158, "learning_rate": 0.00025498097400292343, "loss": 1.7076, "step": 49409 }, { "epoch": 1.64, "grad_norm": 0.5447034239768982, "learning_rate": 0.00025497064026962945, "loss": 1.7454, "step": 49410 }, { "epoch": 1.64, "grad_norm": 0.5338742136955261, "learning_rate": 0.00025496030659099466, "loss": 1.7548, "step": 49411 }, { "epoch": 1.64, "grad_norm": 0.5440525412559509, "learning_rate": 0.00025494997296703124, "loss": 1.7854, "step": 49412 }, { "epoch": 1.64, "grad_norm": 0.5488507151603699, "learning_rate": 0.000254939639397752, "loss": 1.8226, "step": 49413 }, { "epoch": 1.64, "grad_norm": 0.5318591594696045, "learning_rate": 0.0002549293058831693, "loss": 1.7323, "step": 49414 }, { "epoch": 1.64, "grad_norm": 0.5376877188682556, "learning_rate": 0.00025491897242329594, "loss": 1.7931, "step": 49415 }, { "epoch": 1.64, "grad_norm": 0.5531253218650818, "learning_rate": 0.00025490863901814426, "loss": 1.7138, "step": 49416 }, { "epoch": 1.64, "grad_norm": 0.5386180281639099, "learning_rate": 0.0002548983056677267, "loss": 1.7842, "step": 49417 }, { "epoch": 1.64, "grad_norm": 0.5193271040916443, "learning_rate": 0.0002548879723720562, "loss": 1.7293, "step": 49418 }, { "epoch": 1.64, "grad_norm": 0.5456444025039673, "learning_rate": 0.00025487763913114485, "loss": 1.8041, "step": 49419 }, { "epoch": 1.64, "grad_norm": 0.5438269376754761, "learning_rate": 0.0002548673059450054, "loss": 1.8465, "step": 49420 }, { "epoch": 1.64, "grad_norm": 0.5438195466995239, "learning_rate": 0.00025485697281365057, "loss": 1.7087, "step": 49421 }, { "epoch": 1.64, "grad_norm": 0.5446614027023315, "learning_rate": 0.00025484663973709254, "loss": 1.7483, "step": 49422 }, { "epoch": 1.64, "grad_norm": 0.5253086090087891, "learning_rate": 0.00025483630671534413, "loss": 1.698, "step": 49423 }, { "epoch": 1.64, "grad_norm": 0.5403053760528564, "learning_rate": 0.0002548259737484177, "loss": 1.7545, "step": 49424 }, { "epoch": 1.64, "grad_norm": 0.549550473690033, "learning_rate": 0.00025481564083632604, "loss": 1.7914, "step": 49425 }, { "epoch": 1.64, "grad_norm": 0.5616524815559387, "learning_rate": 0.0002548053079790814, "loss": 1.7866, "step": 49426 }, { "epoch": 1.64, "grad_norm": 0.5574937462806702, "learning_rate": 0.0002547949751766965, "loss": 1.7272, "step": 49427 }, { "epoch": 1.64, "grad_norm": 0.5445470809936523, "learning_rate": 0.0002547846424291839, "loss": 1.7674, "step": 49428 }, { "epoch": 1.64, "grad_norm": 0.5438925623893738, "learning_rate": 0.000254774309736556, "loss": 1.7629, "step": 49429 }, { "epoch": 1.64, "grad_norm": 0.5382223129272461, "learning_rate": 0.0002547639770988254, "loss": 1.7309, "step": 49430 }, { "epoch": 1.64, "grad_norm": 0.5649062991142273, "learning_rate": 0.0002547536445160048, "loss": 1.7888, "step": 49431 }, { "epoch": 1.64, "grad_norm": 0.5405023694038391, "learning_rate": 0.00025474331198810645, "loss": 1.7107, "step": 49432 }, { "epoch": 1.64, "grad_norm": 0.5231085419654846, "learning_rate": 0.00025473297951514307, "loss": 1.7856, "step": 49433 }, { "epoch": 1.64, "grad_norm": 0.5668071508407593, "learning_rate": 0.0002547226470971272, "loss": 1.6679, "step": 49434 }, { "epoch": 1.64, "grad_norm": 0.5192643404006958, "learning_rate": 0.00025471231473407147, "loss": 1.7331, "step": 49435 }, { "epoch": 1.64, "grad_norm": 0.6424291729927063, "learning_rate": 0.0002547019824259883, "loss": 1.7355, "step": 49436 }, { "epoch": 1.64, "grad_norm": 0.5338011384010315, "learning_rate": 0.0002546916501728901, "loss": 1.7579, "step": 49437 }, { "epoch": 1.64, "grad_norm": 0.5621898174285889, "learning_rate": 0.00025468131797478976, "loss": 1.8447, "step": 49438 }, { "epoch": 1.64, "grad_norm": 0.5409039258956909, "learning_rate": 0.0002546709858316995, "loss": 1.7506, "step": 49439 }, { "epoch": 1.64, "grad_norm": 0.5372194647789001, "learning_rate": 0.00025466065374363196, "loss": 1.759, "step": 49440 }, { "epoch": 1.64, "grad_norm": 0.5351120829582214, "learning_rate": 0.0002546503217105998, "loss": 1.6998, "step": 49441 }, { "epoch": 1.64, "grad_norm": 0.5513677000999451, "learning_rate": 0.0002546399897326154, "loss": 1.6903, "step": 49442 }, { "epoch": 1.64, "grad_norm": 0.5582811236381531, "learning_rate": 0.00025462965780969135, "loss": 1.7609, "step": 49443 }, { "epoch": 1.65, "grad_norm": 0.5569915771484375, "learning_rate": 0.00025461932594184023, "loss": 1.7575, "step": 49444 }, { "epoch": 1.65, "grad_norm": 0.5360265374183655, "learning_rate": 0.0002546089941290746, "loss": 1.7562, "step": 49445 }, { "epoch": 1.65, "grad_norm": 0.5409759879112244, "learning_rate": 0.00025459866237140694, "loss": 1.7393, "step": 49446 }, { "epoch": 1.65, "grad_norm": 0.5298639535903931, "learning_rate": 0.00025458833066884976, "loss": 1.6963, "step": 49447 }, { "epoch": 1.65, "grad_norm": 0.5260176062583923, "learning_rate": 0.0002545779990214158, "loss": 1.7844, "step": 49448 }, { "epoch": 1.65, "grad_norm": 0.5482654571533203, "learning_rate": 0.0002545676674291173, "loss": 1.6964, "step": 49449 }, { "epoch": 1.65, "grad_norm": 0.5468025207519531, "learning_rate": 0.00025455733589196705, "loss": 1.7714, "step": 49450 }, { "epoch": 1.65, "grad_norm": 0.5567474961280823, "learning_rate": 0.0002545470044099775, "loss": 1.7204, "step": 49451 }, { "epoch": 1.65, "grad_norm": 0.5601539015769958, "learning_rate": 0.00025453667298316115, "loss": 1.7723, "step": 49452 }, { "epoch": 1.65, "grad_norm": 0.5517362356185913, "learning_rate": 0.0002545263416115306, "loss": 1.7317, "step": 49453 }, { "epoch": 1.65, "grad_norm": 0.5656343102455139, "learning_rate": 0.00025451601029509826, "loss": 1.7904, "step": 49454 }, { "epoch": 1.65, "grad_norm": 0.5722736120223999, "learning_rate": 0.000254505679033877, "loss": 1.7169, "step": 49455 }, { "epoch": 1.65, "grad_norm": 0.5393698215484619, "learning_rate": 0.000254495347827879, "loss": 1.7629, "step": 49456 }, { "epoch": 1.65, "grad_norm": 0.5304747819900513, "learning_rate": 0.0002544850166771169, "loss": 1.7323, "step": 49457 }, { "epoch": 1.65, "grad_norm": 0.5608041882514954, "learning_rate": 0.00025447468558160345, "loss": 1.747, "step": 49458 }, { "epoch": 1.65, "grad_norm": 0.5918318033218384, "learning_rate": 0.0002544643545413509, "loss": 1.8025, "step": 49459 }, { "epoch": 1.65, "grad_norm": 0.5478272438049316, "learning_rate": 0.00025445402355637195, "loss": 1.6718, "step": 49460 }, { "epoch": 1.65, "grad_norm": 0.5406782031059265, "learning_rate": 0.0002544436926266792, "loss": 1.732, "step": 49461 }, { "epoch": 1.65, "grad_norm": 0.5335124731063843, "learning_rate": 0.00025443336175228496, "loss": 1.7461, "step": 49462 }, { "epoch": 1.65, "grad_norm": 0.5389742851257324, "learning_rate": 0.00025442303093320184, "loss": 1.751, "step": 49463 }, { "epoch": 1.65, "grad_norm": 0.5255787968635559, "learning_rate": 0.00025441270016944253, "loss": 1.7068, "step": 49464 }, { "epoch": 1.65, "grad_norm": 0.544476330280304, "learning_rate": 0.00025440236946101954, "loss": 1.7618, "step": 49465 }, { "epoch": 1.65, "grad_norm": 0.514400064945221, "learning_rate": 0.0002543920388079453, "loss": 1.7195, "step": 49466 }, { "epoch": 1.65, "grad_norm": 0.555878758430481, "learning_rate": 0.0002543817082102324, "loss": 1.7675, "step": 49467 }, { "epoch": 1.65, "grad_norm": 0.5473299622535706, "learning_rate": 0.00025437137766789347, "loss": 1.7804, "step": 49468 }, { "epoch": 1.65, "grad_norm": 0.5270509123802185, "learning_rate": 0.0002543610471809409, "loss": 1.7433, "step": 49469 }, { "epoch": 1.65, "grad_norm": 0.5524877905845642, "learning_rate": 0.0002543507167493872, "loss": 1.7574, "step": 49470 }, { "epoch": 1.65, "grad_norm": 0.5337390303611755, "learning_rate": 0.0002543403863732451, "loss": 1.7432, "step": 49471 }, { "epoch": 1.65, "grad_norm": 0.5328614711761475, "learning_rate": 0.0002543300560525271, "loss": 1.7466, "step": 49472 }, { "epoch": 1.65, "grad_norm": 0.5361011624336243, "learning_rate": 0.0002543197257872456, "loss": 1.7518, "step": 49473 }, { "epoch": 1.65, "grad_norm": 0.5356749296188354, "learning_rate": 0.0002543093955774132, "loss": 1.714, "step": 49474 }, { "epoch": 1.65, "grad_norm": 0.5656880736351013, "learning_rate": 0.00025429906542304257, "loss": 1.6906, "step": 49475 }, { "epoch": 1.65, "grad_norm": 0.5344731211662292, "learning_rate": 0.00025428873532414606, "loss": 1.7577, "step": 49476 }, { "epoch": 1.65, "grad_norm": 0.5638214349746704, "learning_rate": 0.00025427840528073625, "loss": 1.8031, "step": 49477 }, { "epoch": 1.65, "grad_norm": 0.5194858312606812, "learning_rate": 0.00025426807529282583, "loss": 1.7518, "step": 49478 }, { "epoch": 1.65, "grad_norm": 0.5379349589347839, "learning_rate": 0.00025425774536042716, "loss": 1.8292, "step": 49479 }, { "epoch": 1.65, "grad_norm": 0.5347617864608765, "learning_rate": 0.0002542474154835528, "loss": 1.7128, "step": 49480 }, { "epoch": 1.65, "grad_norm": 0.5507106781005859, "learning_rate": 0.0002542370856622155, "loss": 1.7743, "step": 49481 }, { "epoch": 1.65, "grad_norm": 0.5539003610610962, "learning_rate": 0.0002542267558964275, "loss": 1.7873, "step": 49482 }, { "epoch": 1.65, "grad_norm": 0.5252991914749146, "learning_rate": 0.0002542164261862015, "loss": 1.7244, "step": 49483 }, { "epoch": 1.65, "grad_norm": 0.5384867191314697, "learning_rate": 0.00025420609653154994, "loss": 1.6674, "step": 49484 }, { "epoch": 1.65, "grad_norm": 0.5476820468902588, "learning_rate": 0.00025419576693248554, "loss": 1.7733, "step": 49485 }, { "epoch": 1.65, "grad_norm": 0.5493666529655457, "learning_rate": 0.0002541854373890207, "loss": 1.6584, "step": 49486 }, { "epoch": 1.65, "grad_norm": 0.5370606184005737, "learning_rate": 0.000254175107901168, "loss": 1.7478, "step": 49487 }, { "epoch": 1.65, "grad_norm": 0.5370239019393921, "learning_rate": 0.00025416477846894, "loss": 1.6967, "step": 49488 }, { "epoch": 1.65, "grad_norm": 0.5303987264633179, "learning_rate": 0.0002541544490923491, "loss": 1.6817, "step": 49489 }, { "epoch": 1.65, "grad_norm": 0.5551756024360657, "learning_rate": 0.00025414411977140805, "loss": 1.6738, "step": 49490 }, { "epoch": 1.65, "grad_norm": 0.5473955869674683, "learning_rate": 0.0002541337905061293, "loss": 1.7757, "step": 49491 }, { "epoch": 1.65, "grad_norm": 0.5557941794395447, "learning_rate": 0.0002541234612965252, "loss": 1.7583, "step": 49492 }, { "epoch": 1.65, "grad_norm": 0.5210733413696289, "learning_rate": 0.0002541131321426085, "loss": 1.815, "step": 49493 }, { "epoch": 1.65, "grad_norm": 0.5296471118927002, "learning_rate": 0.0002541028030443918, "loss": 1.7818, "step": 49494 }, { "epoch": 1.65, "grad_norm": 0.6580997109413147, "learning_rate": 0.0002540924740018876, "loss": 1.8028, "step": 49495 }, { "epoch": 1.65, "grad_norm": 0.5949567556381226, "learning_rate": 0.0002540821450151082, "loss": 1.7648, "step": 49496 }, { "epoch": 1.65, "grad_norm": 0.5526624321937561, "learning_rate": 0.0002540718160840663, "loss": 1.7636, "step": 49497 }, { "epoch": 1.65, "grad_norm": 0.5469919443130493, "learning_rate": 0.00025406148720877466, "loss": 1.7072, "step": 49498 }, { "epoch": 1.65, "grad_norm": 0.5414305925369263, "learning_rate": 0.0002540511583892454, "loss": 1.6981, "step": 49499 }, { "epoch": 1.65, "grad_norm": 0.5642437934875488, "learning_rate": 0.0002540408296254913, "loss": 1.773, "step": 49500 }, { "epoch": 1.65, "grad_norm": 0.5375410318374634, "learning_rate": 0.000254030500917525, "loss": 1.779, "step": 49501 }, { "epoch": 1.65, "grad_norm": 0.516938328742981, "learning_rate": 0.00025402017226535875, "loss": 1.7131, "step": 49502 }, { "epoch": 1.65, "grad_norm": 0.5507882237434387, "learning_rate": 0.0002540098436690053, "loss": 1.7753, "step": 49503 }, { "epoch": 1.65, "grad_norm": 0.5402017831802368, "learning_rate": 0.00025399951512847696, "loss": 1.7225, "step": 49504 }, { "epoch": 1.65, "grad_norm": 0.5551995635032654, "learning_rate": 0.0002539891866437867, "loss": 1.7986, "step": 49505 }, { "epoch": 1.65, "grad_norm": 0.549613356590271, "learning_rate": 0.0002539788582149466, "loss": 1.7872, "step": 49506 }, { "epoch": 1.65, "grad_norm": 0.5435906052589417, "learning_rate": 0.0002539685298419695, "loss": 1.7344, "step": 49507 }, { "epoch": 1.65, "grad_norm": 0.5688052177429199, "learning_rate": 0.00025395820152486774, "loss": 1.7292, "step": 49508 }, { "epoch": 1.65, "grad_norm": 0.5251876711845398, "learning_rate": 0.000253947873263654, "loss": 1.7144, "step": 49509 }, { "epoch": 1.65, "grad_norm": 0.5347615480422974, "learning_rate": 0.0002539375450583407, "loss": 1.72, "step": 49510 }, { "epoch": 1.65, "grad_norm": 0.5458388924598694, "learning_rate": 0.00025392721690894055, "loss": 1.7791, "step": 49511 }, { "epoch": 1.65, "grad_norm": 0.5265758633613586, "learning_rate": 0.00025391688881546586, "loss": 1.7288, "step": 49512 }, { "epoch": 1.65, "grad_norm": 0.5458202958106995, "learning_rate": 0.0002539065607779292, "loss": 1.7475, "step": 49513 }, { "epoch": 1.65, "grad_norm": 0.5414540767669678, "learning_rate": 0.00025389623279634333, "loss": 1.7844, "step": 49514 }, { "epoch": 1.65, "grad_norm": 0.5576239228248596, "learning_rate": 0.0002538859048707206, "loss": 1.8132, "step": 49515 }, { "epoch": 1.65, "grad_norm": 0.5286734700202942, "learning_rate": 0.0002538755770010735, "loss": 1.7393, "step": 49516 }, { "epoch": 1.65, "grad_norm": 0.5578469038009644, "learning_rate": 0.0002538652491874148, "loss": 1.7693, "step": 49517 }, { "epoch": 1.65, "grad_norm": 0.5294049978256226, "learning_rate": 0.00025385492142975685, "loss": 1.7523, "step": 49518 }, { "epoch": 1.65, "grad_norm": 0.5596516728401184, "learning_rate": 0.0002538445937281122, "loss": 1.8002, "step": 49519 }, { "epoch": 1.65, "grad_norm": 0.5347692370414734, "learning_rate": 0.0002538342660824933, "loss": 1.7268, "step": 49520 }, { "epoch": 1.65, "grad_norm": 0.5351504683494568, "learning_rate": 0.0002538239384929129, "loss": 1.8169, "step": 49521 }, { "epoch": 1.65, "grad_norm": 0.5501642823219299, "learning_rate": 0.00025381361095938354, "loss": 1.7157, "step": 49522 }, { "epoch": 1.65, "grad_norm": 0.5538666844367981, "learning_rate": 0.00025380328348191756, "loss": 1.7724, "step": 49523 }, { "epoch": 1.65, "grad_norm": 0.5293155312538147, "learning_rate": 0.00025379295606052754, "loss": 1.7407, "step": 49524 }, { "epoch": 1.65, "grad_norm": 0.5566093325614929, "learning_rate": 0.00025378262869522617, "loss": 1.7042, "step": 49525 }, { "epoch": 1.65, "grad_norm": 0.5607799291610718, "learning_rate": 0.00025377230138602576, "loss": 1.746, "step": 49526 }, { "epoch": 1.65, "grad_norm": 0.5612384676933289, "learning_rate": 0.000253761974132939, "loss": 1.7305, "step": 49527 }, { "epoch": 1.65, "grad_norm": 0.5373479127883911, "learning_rate": 0.0002537516469359785, "loss": 1.7941, "step": 49528 }, { "epoch": 1.65, "grad_norm": 0.5486821532249451, "learning_rate": 0.00025374131979515654, "loss": 1.6913, "step": 49529 }, { "epoch": 1.65, "grad_norm": 0.542563259601593, "learning_rate": 0.00025373099271048583, "loss": 1.7592, "step": 49530 }, { "epoch": 1.65, "grad_norm": 0.5257714986801147, "learning_rate": 0.00025372066568197886, "loss": 1.7665, "step": 49531 }, { "epoch": 1.65, "grad_norm": 0.5585843920707703, "learning_rate": 0.00025371033870964836, "loss": 1.8412, "step": 49532 }, { "epoch": 1.65, "grad_norm": 0.5336728692054749, "learning_rate": 0.00025370001179350653, "loss": 1.7963, "step": 49533 }, { "epoch": 1.65, "grad_norm": 0.547062337398529, "learning_rate": 0.000253689684933566, "loss": 1.7533, "step": 49534 }, { "epoch": 1.65, "grad_norm": 0.5600985288619995, "learning_rate": 0.00025367935812983957, "loss": 1.7269, "step": 49535 }, { "epoch": 1.65, "grad_norm": 0.5310497879981995, "learning_rate": 0.0002536690313823394, "loss": 1.7779, "step": 49536 }, { "epoch": 1.65, "grad_norm": 0.5505149364471436, "learning_rate": 0.00025365870469107825, "loss": 1.7173, "step": 49537 }, { "epoch": 1.65, "grad_norm": 0.5368886590003967, "learning_rate": 0.00025364837805606864, "loss": 1.7557, "step": 49538 }, { "epoch": 1.65, "grad_norm": 0.5995928645133972, "learning_rate": 0.00025363805147732304, "loss": 1.674, "step": 49539 }, { "epoch": 1.65, "grad_norm": 0.5358002781867981, "learning_rate": 0.000253627724954854, "loss": 1.8182, "step": 49540 }, { "epoch": 1.65, "grad_norm": 0.5576939582824707, "learning_rate": 0.000253617398488674, "loss": 1.6751, "step": 49541 }, { "epoch": 1.65, "grad_norm": 0.5523883104324341, "learning_rate": 0.0002536070720787959, "loss": 1.7773, "step": 49542 }, { "epoch": 1.65, "grad_norm": 0.5361815690994263, "learning_rate": 0.00025359674572523176, "loss": 1.7367, "step": 49543 }, { "epoch": 1.65, "grad_norm": 0.5604151487350464, "learning_rate": 0.0002535864194279943, "loss": 1.7374, "step": 49544 }, { "epoch": 1.65, "grad_norm": 0.547206461429596, "learning_rate": 0.00025357609318709626, "loss": 1.7724, "step": 49545 }, { "epoch": 1.65, "grad_norm": 0.5439406037330627, "learning_rate": 0.00025356576700254994, "loss": 1.7489, "step": 49546 }, { "epoch": 1.65, "grad_norm": 0.5257102251052856, "learning_rate": 0.00025355544087436786, "loss": 1.7177, "step": 49547 }, { "epoch": 1.65, "grad_norm": 0.5418915152549744, "learning_rate": 0.00025354511480256273, "loss": 1.8208, "step": 49548 }, { "epoch": 1.65, "grad_norm": 0.5468611121177673, "learning_rate": 0.00025353478878714696, "loss": 1.8393, "step": 49549 }, { "epoch": 1.65, "grad_norm": 0.5434263944625854, "learning_rate": 0.000253524462828133, "loss": 1.7093, "step": 49550 }, { "epoch": 1.65, "grad_norm": 0.56734299659729, "learning_rate": 0.00025351413692553356, "loss": 1.7699, "step": 49551 }, { "epoch": 1.65, "grad_norm": 0.5616453886032104, "learning_rate": 0.00025350381107936125, "loss": 1.7559, "step": 49552 }, { "epoch": 1.65, "grad_norm": 0.5330955982208252, "learning_rate": 0.0002534934852896283, "loss": 1.7493, "step": 49553 }, { "epoch": 1.65, "grad_norm": 0.535149872303009, "learning_rate": 0.00025348315955634745, "loss": 1.7696, "step": 49554 }, { "epoch": 1.65, "grad_norm": 0.5576171278953552, "learning_rate": 0.00025347283387953126, "loss": 1.7578, "step": 49555 }, { "epoch": 1.65, "grad_norm": 0.5465531945228577, "learning_rate": 0.00025346250825919215, "loss": 1.7164, "step": 49556 }, { "epoch": 1.65, "grad_norm": 0.539752185344696, "learning_rate": 0.00025345218269534256, "loss": 1.7504, "step": 49557 }, { "epoch": 1.65, "grad_norm": 0.5463006496429443, "learning_rate": 0.0002534418571879954, "loss": 1.6681, "step": 49558 }, { "epoch": 1.65, "grad_norm": 0.5446304082870483, "learning_rate": 0.00025343153173716274, "loss": 1.7916, "step": 49559 }, { "epoch": 1.65, "grad_norm": 0.5511529445648193, "learning_rate": 0.00025342120634285744, "loss": 1.8172, "step": 49560 }, { "epoch": 1.65, "grad_norm": 0.549979031085968, "learning_rate": 0.00025341088100509193, "loss": 1.7522, "step": 49561 }, { "epoch": 1.65, "grad_norm": 0.5380043983459473, "learning_rate": 0.0002534005557238788, "loss": 1.7272, "step": 49562 }, { "epoch": 1.65, "grad_norm": 0.5433745980262756, "learning_rate": 0.00025339023049923047, "loss": 1.6929, "step": 49563 }, { "epoch": 1.65, "grad_norm": 0.5353520512580872, "learning_rate": 0.00025337990533115946, "loss": 1.7024, "step": 49564 }, { "epoch": 1.65, "grad_norm": 0.5386446118354797, "learning_rate": 0.0002533695802196786, "loss": 1.769, "step": 49565 }, { "epoch": 1.65, "grad_norm": 0.535524308681488, "learning_rate": 0.00025335925516479997, "loss": 1.7841, "step": 49566 }, { "epoch": 1.65, "grad_norm": 0.5370897054672241, "learning_rate": 0.0002533489301665364, "loss": 1.7905, "step": 49567 }, { "epoch": 1.65, "grad_norm": 0.5357613563537598, "learning_rate": 0.00025333860522490045, "loss": 1.7177, "step": 49568 }, { "epoch": 1.65, "grad_norm": 0.5330333709716797, "learning_rate": 0.0002533282803399045, "loss": 1.7326, "step": 49569 }, { "epoch": 1.65, "grad_norm": 0.5484037399291992, "learning_rate": 0.00025331795551156107, "loss": 1.7844, "step": 49570 }, { "epoch": 1.65, "grad_norm": 0.5506904125213623, "learning_rate": 0.0002533076307398827, "loss": 1.683, "step": 49571 }, { "epoch": 1.65, "grad_norm": 0.5486845970153809, "learning_rate": 0.0002532973060248823, "loss": 1.7343, "step": 49572 }, { "epoch": 1.65, "grad_norm": 0.5489113926887512, "learning_rate": 0.00025328698136657175, "loss": 1.7404, "step": 49573 }, { "epoch": 1.65, "grad_norm": 0.535555899143219, "learning_rate": 0.00025327665676496406, "loss": 1.6818, "step": 49574 }, { "epoch": 1.65, "grad_norm": 0.5613248944282532, "learning_rate": 0.0002532663322200717, "loss": 1.7508, "step": 49575 }, { "epoch": 1.65, "grad_norm": 0.5372443199157715, "learning_rate": 0.00025325600773190706, "loss": 1.703, "step": 49576 }, { "epoch": 1.65, "grad_norm": 0.5342039465904236, "learning_rate": 0.0002532456833004827, "loss": 1.7022, "step": 49577 }, { "epoch": 1.65, "grad_norm": 0.5512285828590393, "learning_rate": 0.00025323535892581126, "loss": 1.8076, "step": 49578 }, { "epoch": 1.65, "grad_norm": 0.535388171672821, "learning_rate": 0.0002532250346079052, "loss": 1.7866, "step": 49579 }, { "epoch": 1.65, "grad_norm": 0.5473954677581787, "learning_rate": 0.00025321471034677693, "loss": 1.6947, "step": 49580 }, { "epoch": 1.65, "grad_norm": 0.5522646307945251, "learning_rate": 0.0002532043861424392, "loss": 1.7732, "step": 49581 }, { "epoch": 1.65, "grad_norm": 0.5435625314712524, "learning_rate": 0.00025319406199490453, "loss": 1.8033, "step": 49582 }, { "epoch": 1.65, "grad_norm": 0.523442268371582, "learning_rate": 0.00025318373790418527, "loss": 1.6954, "step": 49583 }, { "epoch": 1.65, "grad_norm": 0.5340326428413391, "learning_rate": 0.0002531734138702941, "loss": 1.7474, "step": 49584 }, { "epoch": 1.65, "grad_norm": 0.5449919104576111, "learning_rate": 0.0002531630898932435, "loss": 1.8046, "step": 49585 }, { "epoch": 1.65, "grad_norm": 0.572168231010437, "learning_rate": 0.000253152765973046, "loss": 1.7951, "step": 49586 }, { "epoch": 1.65, "grad_norm": 0.536358118057251, "learning_rate": 0.000253142442109714, "loss": 1.8038, "step": 49587 }, { "epoch": 1.65, "grad_norm": 0.5301367044448853, "learning_rate": 0.0002531321183032604, "loss": 1.7013, "step": 49588 }, { "epoch": 1.65, "grad_norm": 0.5333269238471985, "learning_rate": 0.0002531217945536974, "loss": 1.7144, "step": 49589 }, { "epoch": 1.65, "grad_norm": 0.5442388653755188, "learning_rate": 0.0002531114708610376, "loss": 1.785, "step": 49590 }, { "epoch": 1.65, "grad_norm": 0.53647381067276, "learning_rate": 0.00025310114722529363, "loss": 1.7383, "step": 49591 }, { "epoch": 1.65, "grad_norm": 0.5404106378555298, "learning_rate": 0.000253090823646478, "loss": 1.6786, "step": 49592 }, { "epoch": 1.65, "grad_norm": 0.5446927547454834, "learning_rate": 0.00025308050012460306, "loss": 1.7835, "step": 49593 }, { "epoch": 1.65, "grad_norm": 0.5603904128074646, "learning_rate": 0.0002530701766596815, "loss": 1.7958, "step": 49594 }, { "epoch": 1.65, "grad_norm": 0.575872004032135, "learning_rate": 0.000253059853251726, "loss": 1.8045, "step": 49595 }, { "epoch": 1.65, "grad_norm": 0.5362178683280945, "learning_rate": 0.00025304952990074875, "loss": 1.7739, "step": 49596 }, { "epoch": 1.65, "grad_norm": 0.5399520397186279, "learning_rate": 0.0002530392066067625, "loss": 1.723, "step": 49597 }, { "epoch": 1.65, "grad_norm": 0.5495808720588684, "learning_rate": 0.0002530288833697798, "loss": 1.8113, "step": 49598 }, { "epoch": 1.65, "grad_norm": 0.5315303206443787, "learning_rate": 0.00025301856018981303, "loss": 1.7214, "step": 49599 }, { "epoch": 1.65, "grad_norm": 0.5360812544822693, "learning_rate": 0.0002530082370668748, "loss": 1.693, "step": 49600 }, { "epoch": 1.65, "grad_norm": 0.5261959433555603, "learning_rate": 0.00025299791400097764, "loss": 1.7315, "step": 49601 }, { "epoch": 1.65, "grad_norm": 0.5523057579994202, "learning_rate": 0.0002529875909921342, "loss": 1.7662, "step": 49602 }, { "epoch": 1.65, "grad_norm": 0.5321862697601318, "learning_rate": 0.00025297726804035677, "loss": 1.744, "step": 49603 }, { "epoch": 1.65, "grad_norm": 0.5282736420631409, "learning_rate": 0.00025296694514565806, "loss": 1.7738, "step": 49604 }, { "epoch": 1.65, "grad_norm": 0.5331858396530151, "learning_rate": 0.0002529566223080506, "loss": 1.7458, "step": 49605 }, { "epoch": 1.65, "grad_norm": 0.5637890100479126, "learning_rate": 0.0002529462995275468, "loss": 1.7293, "step": 49606 }, { "epoch": 1.65, "grad_norm": 0.5711999535560608, "learning_rate": 0.0002529359768041593, "loss": 1.7759, "step": 49607 }, { "epoch": 1.65, "grad_norm": 0.5526803135871887, "learning_rate": 0.00025292565413790057, "loss": 1.7876, "step": 49608 }, { "epoch": 1.65, "grad_norm": 0.5478256940841675, "learning_rate": 0.0002529153315287831, "loss": 1.7155, "step": 49609 }, { "epoch": 1.65, "grad_norm": 0.5460473895072937, "learning_rate": 0.0002529050089768195, "loss": 1.722, "step": 49610 }, { "epoch": 1.65, "grad_norm": 0.5541841387748718, "learning_rate": 0.00025289468648202236, "loss": 1.8198, "step": 49611 }, { "epoch": 1.65, "grad_norm": 0.5436733365058899, "learning_rate": 0.00025288436404440415, "loss": 1.6876, "step": 49612 }, { "epoch": 1.65, "grad_norm": 0.5558754801750183, "learning_rate": 0.00025287404166397727, "loss": 1.793, "step": 49613 }, { "epoch": 1.65, "grad_norm": 0.5678018927574158, "learning_rate": 0.0002528637193407543, "loss": 1.7535, "step": 49614 }, { "epoch": 1.65, "grad_norm": 0.5424191355705261, "learning_rate": 0.00025285339707474807, "loss": 1.7182, "step": 49615 }, { "epoch": 1.65, "grad_norm": 0.5529922246932983, "learning_rate": 0.0002528430748659707, "loss": 1.7453, "step": 49616 }, { "epoch": 1.65, "grad_norm": 0.5518201589584351, "learning_rate": 0.0002528327527144349, "loss": 1.6988, "step": 49617 }, { "epoch": 1.65, "grad_norm": 0.5368961691856384, "learning_rate": 0.00025282243062015324, "loss": 1.7284, "step": 49618 }, { "epoch": 1.65, "grad_norm": 0.5223261117935181, "learning_rate": 0.0002528121085831382, "loss": 1.7376, "step": 49619 }, { "epoch": 1.65, "grad_norm": 0.5494186282157898, "learning_rate": 0.00025280178660340224, "loss": 1.8023, "step": 49620 }, { "epoch": 1.65, "grad_norm": 0.543269693851471, "learning_rate": 0.0002527914646809579, "loss": 1.7493, "step": 49621 }, { "epoch": 1.65, "grad_norm": 0.5327881574630737, "learning_rate": 0.000252781142815818, "loss": 1.7553, "step": 49622 }, { "epoch": 1.65, "grad_norm": 0.5429497957229614, "learning_rate": 0.00025277082100799464, "loss": 1.7446, "step": 49623 }, { "epoch": 1.65, "grad_norm": 0.5312883853912354, "learning_rate": 0.0002527604992575006, "loss": 1.7281, "step": 49624 }, { "epoch": 1.65, "grad_norm": 0.5428026914596558, "learning_rate": 0.00025275017756434836, "loss": 1.7689, "step": 49625 }, { "epoch": 1.65, "grad_norm": 0.5474759936332703, "learning_rate": 0.00025273985592855046, "loss": 1.7843, "step": 49626 }, { "epoch": 1.65, "grad_norm": 0.539766252040863, "learning_rate": 0.00025272953435011937, "loss": 1.7306, "step": 49627 }, { "epoch": 1.65, "grad_norm": 0.5250378251075745, "learning_rate": 0.00025271921282906773, "loss": 1.7541, "step": 49628 }, { "epoch": 1.65, "grad_norm": 0.5642262697219849, "learning_rate": 0.00025270889136540795, "loss": 1.7908, "step": 49629 }, { "epoch": 1.65, "grad_norm": 0.5202927589416504, "learning_rate": 0.0002526985699591525, "loss": 1.6967, "step": 49630 }, { "epoch": 1.65, "grad_norm": 0.5264323949813843, "learning_rate": 0.00025268824861031413, "loss": 1.7825, "step": 49631 }, { "epoch": 1.65, "grad_norm": 0.5412662625312805, "learning_rate": 0.0002526779273189053, "loss": 1.7554, "step": 49632 }, { "epoch": 1.65, "grad_norm": 0.5376022458076477, "learning_rate": 0.0002526676060849384, "loss": 1.7332, "step": 49633 }, { "epoch": 1.65, "grad_norm": 0.5371370911598206, "learning_rate": 0.0002526572849084261, "loss": 1.7809, "step": 49634 }, { "epoch": 1.65, "grad_norm": 0.5343523621559143, "learning_rate": 0.00025264696378938097, "loss": 1.7459, "step": 49635 }, { "epoch": 1.65, "grad_norm": 0.5571780800819397, "learning_rate": 0.0002526366427278153, "loss": 1.7198, "step": 49636 }, { "epoch": 1.65, "grad_norm": 0.5505824089050293, "learning_rate": 0.00025262632172374177, "loss": 1.8515, "step": 49637 }, { "epoch": 1.65, "grad_norm": 0.5393224954605103, "learning_rate": 0.0002526160007771729, "loss": 1.7534, "step": 49638 }, { "epoch": 1.65, "grad_norm": 0.5488407611846924, "learning_rate": 0.0002526056798881213, "loss": 1.7278, "step": 49639 }, { "epoch": 1.65, "grad_norm": 0.5451217889785767, "learning_rate": 0.00025259535905659944, "loss": 1.7226, "step": 49640 }, { "epoch": 1.65, "grad_norm": 0.5981292724609375, "learning_rate": 0.0002525850382826198, "loss": 1.8248, "step": 49641 }, { "epoch": 1.65, "grad_norm": 0.5349610447883606, "learning_rate": 0.000252574717566195, "loss": 1.7279, "step": 49642 }, { "epoch": 1.65, "grad_norm": 0.5629927515983582, "learning_rate": 0.0002525643969073374, "loss": 1.7571, "step": 49643 }, { "epoch": 1.65, "grad_norm": 0.566548228263855, "learning_rate": 0.00025255407630605956, "loss": 1.7123, "step": 49644 }, { "epoch": 1.65, "grad_norm": 0.5796965956687927, "learning_rate": 0.0002525437557623743, "loss": 1.7061, "step": 49645 }, { "epoch": 1.65, "grad_norm": 0.5365945100784302, "learning_rate": 0.00025253343527629373, "loss": 1.7387, "step": 49646 }, { "epoch": 1.65, "grad_norm": 0.5350355505943298, "learning_rate": 0.0002525231148478307, "loss": 1.8232, "step": 49647 }, { "epoch": 1.65, "grad_norm": 0.5348920226097107, "learning_rate": 0.0002525127944769975, "loss": 1.7288, "step": 49648 }, { "epoch": 1.65, "grad_norm": 0.5506434440612793, "learning_rate": 0.0002525024741638069, "loss": 1.783, "step": 49649 }, { "epoch": 1.65, "grad_norm": 0.5682147741317749, "learning_rate": 0.0002524921539082712, "loss": 1.7971, "step": 49650 }, { "epoch": 1.65, "grad_norm": 0.5267034769058228, "learning_rate": 0.00025248183371040305, "loss": 1.6615, "step": 49651 }, { "epoch": 1.65, "grad_norm": 0.5271252393722534, "learning_rate": 0.0002524715135702151, "loss": 1.8025, "step": 49652 }, { "epoch": 1.65, "grad_norm": 0.5185980200767517, "learning_rate": 0.0002524611934877195, "loss": 1.7321, "step": 49653 }, { "epoch": 1.65, "grad_norm": 0.5643640756607056, "learning_rate": 0.0002524508734629291, "loss": 1.7722, "step": 49654 }, { "epoch": 1.65, "grad_norm": 0.5465165972709656, "learning_rate": 0.00025244055349585634, "loss": 1.818, "step": 49655 }, { "epoch": 1.65, "grad_norm": 0.5312551856040955, "learning_rate": 0.00025243023358651373, "loss": 1.7167, "step": 49656 }, { "epoch": 1.65, "grad_norm": 0.543380618095398, "learning_rate": 0.00025241991373491383, "loss": 1.6693, "step": 49657 }, { "epoch": 1.65, "grad_norm": 0.5277925729751587, "learning_rate": 0.00025240959394106906, "loss": 1.7802, "step": 49658 }, { "epoch": 1.65, "grad_norm": 0.527976393699646, "learning_rate": 0.0002523992742049922, "loss": 1.813, "step": 49659 }, { "epoch": 1.65, "grad_norm": 0.5525571703910828, "learning_rate": 0.0002523889545266954, "loss": 1.7979, "step": 49660 }, { "epoch": 1.65, "grad_norm": 2.766953468322754, "learning_rate": 0.0002523786349061915, "loss": 1.8061, "step": 49661 }, { "epoch": 1.65, "grad_norm": 0.5338173508644104, "learning_rate": 0.000252368315343493, "loss": 1.71, "step": 49662 }, { "epoch": 1.65, "grad_norm": 0.5704449415206909, "learning_rate": 0.00025235799583861224, "loss": 1.7533, "step": 49663 }, { "epoch": 1.65, "grad_norm": 0.5379054546356201, "learning_rate": 0.0002523476763915619, "loss": 1.8218, "step": 49664 }, { "epoch": 1.65, "grad_norm": 0.5542699098587036, "learning_rate": 0.0002523373570023545, "loss": 1.7531, "step": 49665 }, { "epoch": 1.65, "grad_norm": 0.550845742225647, "learning_rate": 0.00025232703767100244, "loss": 1.7413, "step": 49666 }, { "epoch": 1.65, "grad_norm": 0.5547777414321899, "learning_rate": 0.0002523167183975183, "loss": 1.7355, "step": 49667 }, { "epoch": 1.65, "grad_norm": 0.5241604447364807, "learning_rate": 0.0002523063991819147, "loss": 1.6594, "step": 49668 }, { "epoch": 1.65, "grad_norm": 0.5394547581672668, "learning_rate": 0.0002522960800242041, "loss": 1.8015, "step": 49669 }, { "epoch": 1.65, "grad_norm": 0.5308430194854736, "learning_rate": 0.00025228576092439905, "loss": 1.7201, "step": 49670 }, { "epoch": 1.65, "grad_norm": 0.5729553699493408, "learning_rate": 0.0002522754418825121, "loss": 1.7863, "step": 49671 }, { "epoch": 1.65, "grad_norm": 0.5361676812171936, "learning_rate": 0.0002522651228985557, "loss": 1.7399, "step": 49672 }, { "epoch": 1.65, "grad_norm": 0.5349282622337341, "learning_rate": 0.00025225480397254234, "loss": 1.7688, "step": 49673 }, { "epoch": 1.65, "grad_norm": 0.5374242067337036, "learning_rate": 0.00025224448510448457, "loss": 1.7131, "step": 49674 }, { "epoch": 1.65, "grad_norm": 0.5363601446151733, "learning_rate": 0.00025223416629439514, "loss": 1.7085, "step": 49675 }, { "epoch": 1.65, "grad_norm": 0.5572067499160767, "learning_rate": 0.00025222384754228623, "loss": 1.7435, "step": 49676 }, { "epoch": 1.65, "grad_norm": 0.5589547157287598, "learning_rate": 0.0002522135288481706, "loss": 1.7507, "step": 49677 }, { "epoch": 1.65, "grad_norm": 0.5311318039894104, "learning_rate": 0.00025220321021206065, "loss": 1.777, "step": 49678 }, { "epoch": 1.65, "grad_norm": 0.5372166633605957, "learning_rate": 0.00025219289163396906, "loss": 1.7644, "step": 49679 }, { "epoch": 1.65, "grad_norm": 0.5514896512031555, "learning_rate": 0.0002521825731139082, "loss": 1.7977, "step": 49680 }, { "epoch": 1.65, "grad_norm": 0.5347201824188232, "learning_rate": 0.00025217225465189055, "loss": 1.6866, "step": 49681 }, { "epoch": 1.65, "grad_norm": 0.5458536744117737, "learning_rate": 0.000252161936247929, "loss": 1.7289, "step": 49682 }, { "epoch": 1.65, "grad_norm": 0.5596488118171692, "learning_rate": 0.0002521516179020356, "loss": 1.7022, "step": 49683 }, { "epoch": 1.65, "grad_norm": 0.5412273406982422, "learning_rate": 0.0002521412996142231, "loss": 1.7541, "step": 49684 }, { "epoch": 1.65, "grad_norm": 0.532617449760437, "learning_rate": 0.0002521309813845041, "loss": 1.8084, "step": 49685 }, { "epoch": 1.65, "grad_norm": 0.5688003897666931, "learning_rate": 0.00025212066321289096, "loss": 1.7654, "step": 49686 }, { "epoch": 1.65, "grad_norm": 0.5549619197845459, "learning_rate": 0.00025211034509939627, "loss": 1.6679, "step": 49687 }, { "epoch": 1.65, "grad_norm": 0.5608463883399963, "learning_rate": 0.00025210002704403254, "loss": 1.781, "step": 49688 }, { "epoch": 1.65, "grad_norm": 0.5378515124320984, "learning_rate": 0.00025208970904681247, "loss": 1.7737, "step": 49689 }, { "epoch": 1.65, "grad_norm": 0.5512612462043762, "learning_rate": 0.0002520793911077483, "loss": 1.7978, "step": 49690 }, { "epoch": 1.65, "grad_norm": 0.5627533793449402, "learning_rate": 0.0002520690732268527, "loss": 1.7937, "step": 49691 }, { "epoch": 1.65, "grad_norm": 0.5582911372184753, "learning_rate": 0.00025205875540413827, "loss": 1.7465, "step": 49692 }, { "epoch": 1.65, "grad_norm": 0.5571285486221313, "learning_rate": 0.0002520484376396174, "loss": 1.731, "step": 49693 }, { "epoch": 1.65, "grad_norm": 0.5467581152915955, "learning_rate": 0.0002520381199333026, "loss": 1.7546, "step": 49694 }, { "epoch": 1.65, "grad_norm": 0.5573139190673828, "learning_rate": 0.0002520278022852066, "loss": 1.7856, "step": 49695 }, { "epoch": 1.65, "grad_norm": 0.5642693042755127, "learning_rate": 0.00025201748469534164, "loss": 1.7643, "step": 49696 }, { "epoch": 1.65, "grad_norm": 0.5594624876976013, "learning_rate": 0.00025200716716372037, "loss": 1.7136, "step": 49697 }, { "epoch": 1.65, "grad_norm": 0.5257249474525452, "learning_rate": 0.0002519968496903554, "loss": 1.7801, "step": 49698 }, { "epoch": 1.65, "grad_norm": 0.5500858426094055, "learning_rate": 0.0002519865322752592, "loss": 1.8112, "step": 49699 }, { "epoch": 1.65, "grad_norm": 0.5449820160865784, "learning_rate": 0.0002519762149184442, "loss": 1.7221, "step": 49700 }, { "epoch": 1.65, "grad_norm": 0.5556868314743042, "learning_rate": 0.00025196589761992304, "loss": 1.7483, "step": 49701 }, { "epoch": 1.65, "grad_norm": 0.531579852104187, "learning_rate": 0.00025195558037970826, "loss": 1.7894, "step": 49702 }, { "epoch": 1.65, "grad_norm": 0.5424443483352661, "learning_rate": 0.0002519452631978123, "loss": 1.7591, "step": 49703 }, { "epoch": 1.65, "grad_norm": 0.5546669363975525, "learning_rate": 0.0002519349460742476, "loss": 1.8253, "step": 49704 }, { "epoch": 1.65, "grad_norm": 0.5346829295158386, "learning_rate": 0.0002519246290090269, "loss": 1.7285, "step": 49705 }, { "epoch": 1.65, "grad_norm": 0.5486770868301392, "learning_rate": 0.0002519143120021626, "loss": 1.7652, "step": 49706 }, { "epoch": 1.65, "grad_norm": 0.5179905891418457, "learning_rate": 0.0002519039950536672, "loss": 1.6906, "step": 49707 }, { "epoch": 1.65, "grad_norm": 0.5453588962554932, "learning_rate": 0.0002518936781635533, "loss": 1.7429, "step": 49708 }, { "epoch": 1.65, "grad_norm": 1.3943663835525513, "learning_rate": 0.00025188336133183343, "loss": 1.7257, "step": 49709 }, { "epoch": 1.65, "grad_norm": 0.5336883068084717, "learning_rate": 0.00025187304455852, "loss": 1.7307, "step": 49710 }, { "epoch": 1.65, "grad_norm": 0.5426545739173889, "learning_rate": 0.0002518627278436256, "loss": 1.7067, "step": 49711 }, { "epoch": 1.65, "grad_norm": 0.5459943413734436, "learning_rate": 0.00025185241118716277, "loss": 1.713, "step": 49712 }, { "epoch": 1.65, "grad_norm": 0.5385525822639465, "learning_rate": 0.00025184209458914403, "loss": 1.7843, "step": 49713 }, { "epoch": 1.65, "grad_norm": 0.5354313254356384, "learning_rate": 0.0002518317780495819, "loss": 1.7021, "step": 49714 }, { "epoch": 1.65, "grad_norm": 0.5423274636268616, "learning_rate": 0.00025182146156848894, "loss": 1.7312, "step": 49715 }, { "epoch": 1.65, "grad_norm": 0.526278018951416, "learning_rate": 0.00025181114514587753, "loss": 1.801, "step": 49716 }, { "epoch": 1.65, "grad_norm": 0.5519317388534546, "learning_rate": 0.00025180082878176034, "loss": 1.7559, "step": 49717 }, { "epoch": 1.65, "grad_norm": 0.5463430285453796, "learning_rate": 0.00025179051247614973, "loss": 1.7569, "step": 49718 }, { "epoch": 1.65, "grad_norm": 0.5774224400520325, "learning_rate": 0.0002517801962290585, "loss": 1.7214, "step": 49719 }, { "epoch": 1.65, "grad_norm": 0.5417073965072632, "learning_rate": 0.00025176988004049896, "loss": 1.7802, "step": 49720 }, { "epoch": 1.65, "grad_norm": 0.5399336814880371, "learning_rate": 0.0002517595639104837, "loss": 1.7872, "step": 49721 }, { "epoch": 1.65, "grad_norm": 0.5417090654373169, "learning_rate": 0.0002517492478390252, "loss": 1.7604, "step": 49722 }, { "epoch": 1.65, "grad_norm": 0.5268968939781189, "learning_rate": 0.000251738931826136, "loss": 1.7073, "step": 49723 }, { "epoch": 1.65, "grad_norm": 0.5493420362472534, "learning_rate": 0.00025172861587182847, "loss": 1.7541, "step": 49724 }, { "epoch": 1.65, "grad_norm": 0.5277126431465149, "learning_rate": 0.00025171829997611557, "loss": 1.7654, "step": 49725 }, { "epoch": 1.65, "grad_norm": 0.5365089774131775, "learning_rate": 0.0002517079841390093, "loss": 1.7283, "step": 49726 }, { "epoch": 1.65, "grad_norm": 0.5464138388633728, "learning_rate": 0.0002516976683605225, "loss": 1.8086, "step": 49727 }, { "epoch": 1.65, "grad_norm": 0.5416175723075867, "learning_rate": 0.0002516873526406677, "loss": 1.7736, "step": 49728 }, { "epoch": 1.65, "grad_norm": 0.526174783706665, "learning_rate": 0.0002516770369794573, "loss": 1.7462, "step": 49729 }, { "epoch": 1.65, "grad_norm": 0.5643733143806458, "learning_rate": 0.00025166672137690385, "loss": 1.7543, "step": 49730 }, { "epoch": 1.65, "grad_norm": 0.554172933101654, "learning_rate": 0.00025165640583301975, "loss": 1.7012, "step": 49731 }, { "epoch": 1.65, "grad_norm": 0.5267943143844604, "learning_rate": 0.0002516460903478179, "loss": 1.8141, "step": 49732 }, { "epoch": 1.65, "grad_norm": 0.555659294128418, "learning_rate": 0.00025163577492131033, "loss": 1.8004, "step": 49733 }, { "epoch": 1.65, "grad_norm": 0.529517650604248, "learning_rate": 0.0002516254595535099, "loss": 1.799, "step": 49734 }, { "epoch": 1.65, "grad_norm": 0.5377528667449951, "learning_rate": 0.0002516151442444291, "loss": 1.7526, "step": 49735 }, { "epoch": 1.65, "grad_norm": 0.5615880489349365, "learning_rate": 0.0002516048289940803, "loss": 1.7682, "step": 49736 }, { "epoch": 1.65, "grad_norm": 0.5569466948509216, "learning_rate": 0.00025159451380247616, "loss": 1.693, "step": 49737 }, { "epoch": 1.65, "grad_norm": 0.552769660949707, "learning_rate": 0.00025158419866962904, "loss": 1.7347, "step": 49738 }, { "epoch": 1.65, "grad_norm": 0.5239962935447693, "learning_rate": 0.0002515738835955517, "loss": 1.7215, "step": 49739 }, { "epoch": 1.65, "grad_norm": 0.5448157787322998, "learning_rate": 0.00025156356858025645, "loss": 1.7895, "step": 49740 }, { "epoch": 1.65, "grad_norm": 0.5733261704444885, "learning_rate": 0.0002515532536237559, "loss": 1.8109, "step": 49741 }, { "epoch": 1.65, "grad_norm": 0.5500380992889404, "learning_rate": 0.0002515429387260626, "loss": 1.7706, "step": 49742 }, { "epoch": 1.65, "grad_norm": 0.5481350421905518, "learning_rate": 0.00025153262388718904, "loss": 1.7584, "step": 49743 }, { "epoch": 1.65, "grad_norm": 0.5249063968658447, "learning_rate": 0.00025152230910714766, "loss": 1.7026, "step": 49744 }, { "epoch": 1.66, "grad_norm": 0.5382452607154846, "learning_rate": 0.0002515119943859512, "loss": 1.7718, "step": 49745 }, { "epoch": 1.66, "grad_norm": 0.5296524167060852, "learning_rate": 0.0002515016797236119, "loss": 1.7143, "step": 49746 }, { "epoch": 1.66, "grad_norm": 0.533197820186615, "learning_rate": 0.0002514913651201424, "loss": 1.7585, "step": 49747 }, { "epoch": 1.66, "grad_norm": 0.5640482902526855, "learning_rate": 0.00025148105057555526, "loss": 1.8, "step": 49748 }, { "epoch": 1.66, "grad_norm": 0.5261129140853882, "learning_rate": 0.00025147073608986306, "loss": 1.7492, "step": 49749 }, { "epoch": 1.66, "grad_norm": 0.5687608122825623, "learning_rate": 0.00025146042166307816, "loss": 1.7406, "step": 49750 }, { "epoch": 1.66, "grad_norm": 0.5262646079063416, "learning_rate": 0.0002514501072952132, "loss": 1.7775, "step": 49751 }, { "epoch": 1.66, "grad_norm": 0.5675839185714722, "learning_rate": 0.00025143979298628065, "loss": 1.7571, "step": 49752 }, { "epoch": 1.66, "grad_norm": 0.5308884382247925, "learning_rate": 0.000251429478736293, "loss": 1.7354, "step": 49753 }, { "epoch": 1.66, "grad_norm": 0.5340480804443359, "learning_rate": 0.00025141916454526274, "loss": 1.7244, "step": 49754 }, { "epoch": 1.66, "grad_norm": 0.5344316959381104, "learning_rate": 0.00025140885041320264, "loss": 1.7545, "step": 49755 }, { "epoch": 1.66, "grad_norm": 0.5337232947349548, "learning_rate": 0.0002513985363401249, "loss": 1.7794, "step": 49756 }, { "epoch": 1.66, "grad_norm": 0.5504516959190369, "learning_rate": 0.0002513882223260422, "loss": 1.7365, "step": 49757 }, { "epoch": 1.66, "grad_norm": 0.5282747149467468, "learning_rate": 0.00025137790837096704, "loss": 1.6878, "step": 49758 }, { "epoch": 1.66, "grad_norm": 0.5243284106254578, "learning_rate": 0.00025136759447491196, "loss": 1.7308, "step": 49759 }, { "epoch": 1.66, "grad_norm": 0.5512761473655701, "learning_rate": 0.00025135728063788946, "loss": 1.7965, "step": 49760 }, { "epoch": 1.66, "grad_norm": 0.5594666600227356, "learning_rate": 0.0002513469668599119, "loss": 1.795, "step": 49761 }, { "epoch": 1.66, "grad_norm": 0.5443091988563538, "learning_rate": 0.0002513366531409922, "loss": 1.7257, "step": 49762 }, { "epoch": 1.66, "grad_norm": 0.5551163554191589, "learning_rate": 0.00025132633948114243, "loss": 1.7535, "step": 49763 }, { "epoch": 1.66, "grad_norm": 0.5258064866065979, "learning_rate": 0.00025131602588037544, "loss": 1.7785, "step": 49764 }, { "epoch": 1.66, "grad_norm": 0.5302591919898987, "learning_rate": 0.00025130571233870355, "loss": 1.7699, "step": 49765 }, { "epoch": 1.66, "grad_norm": 0.5517438650131226, "learning_rate": 0.0002512953988561394, "loss": 1.7061, "step": 49766 }, { "epoch": 1.66, "grad_norm": 0.5508602857589722, "learning_rate": 0.00025128508543269544, "loss": 1.736, "step": 49767 }, { "epoch": 1.66, "grad_norm": 0.525138795375824, "learning_rate": 0.0002512747720683841, "loss": 1.763, "step": 49768 }, { "epoch": 1.66, "grad_norm": 0.5417372584342957, "learning_rate": 0.0002512644587632183, "loss": 1.6934, "step": 49769 }, { "epoch": 1.66, "grad_norm": 0.5373556017875671, "learning_rate": 0.00025125414551721, "loss": 1.8241, "step": 49770 }, { "epoch": 1.66, "grad_norm": 0.5353747606277466, "learning_rate": 0.00025124383233037206, "loss": 1.7236, "step": 49771 }, { "epoch": 1.66, "grad_norm": 0.5315842032432556, "learning_rate": 0.00025123351920271696, "loss": 1.7437, "step": 49772 }, { "epoch": 1.66, "grad_norm": 0.5454655289649963, "learning_rate": 0.0002512232061342572, "loss": 1.7546, "step": 49773 }, { "epoch": 1.66, "grad_norm": 0.5227761268615723, "learning_rate": 0.00025121289312500517, "loss": 1.7635, "step": 49774 }, { "epoch": 1.66, "grad_norm": 0.563602089881897, "learning_rate": 0.0002512025801749735, "loss": 1.7526, "step": 49775 }, { "epoch": 1.66, "grad_norm": 0.5498781204223633, "learning_rate": 0.0002511922672841749, "loss": 1.7847, "step": 49776 }, { "epoch": 1.66, "grad_norm": 0.5698331594467163, "learning_rate": 0.0002511819544526215, "loss": 1.7685, "step": 49777 }, { "epoch": 1.66, "grad_norm": 0.581464409828186, "learning_rate": 0.000251171641680326, "loss": 1.7694, "step": 49778 }, { "epoch": 1.66, "grad_norm": 0.5458431839942932, "learning_rate": 0.0002511613289673011, "loss": 1.8378, "step": 49779 }, { "epoch": 1.66, "grad_norm": 0.5218923687934875, "learning_rate": 0.0002511510163135591, "loss": 1.7707, "step": 49780 }, { "epoch": 1.66, "grad_norm": 0.5220226645469666, "learning_rate": 0.00025114070371911245, "loss": 1.7306, "step": 49781 }, { "epoch": 1.66, "grad_norm": 0.5485215783119202, "learning_rate": 0.00025113039118397396, "loss": 1.7047, "step": 49782 }, { "epoch": 1.66, "grad_norm": 0.5269054770469666, "learning_rate": 0.00025112007870815585, "loss": 1.7898, "step": 49783 }, { "epoch": 1.66, "grad_norm": 0.5433578491210938, "learning_rate": 0.00025110976629167075, "loss": 1.7829, "step": 49784 }, { "epoch": 1.66, "grad_norm": 0.5585143566131592, "learning_rate": 0.0002510994539345312, "loss": 1.831, "step": 49785 }, { "epoch": 1.66, "grad_norm": 0.5386961698532104, "learning_rate": 0.00025108914163674977, "loss": 1.711, "step": 49786 }, { "epoch": 1.66, "grad_norm": 0.5424867272377014, "learning_rate": 0.0002510788293983389, "loss": 1.8011, "step": 49787 }, { "epoch": 1.66, "grad_norm": 0.5623255372047424, "learning_rate": 0.0002510685172193111, "loss": 1.7806, "step": 49788 }, { "epoch": 1.66, "grad_norm": 0.5353295803070068, "learning_rate": 0.000251058205099679, "loss": 1.6708, "step": 49789 }, { "epoch": 1.66, "grad_norm": 0.5424395203590393, "learning_rate": 0.00025104789303945495, "loss": 1.7627, "step": 49790 }, { "epoch": 1.66, "grad_norm": 0.564248263835907, "learning_rate": 0.00025103758103865145, "loss": 1.6998, "step": 49791 }, { "epoch": 1.66, "grad_norm": 0.5284000635147095, "learning_rate": 0.0002510272690972813, "loss": 1.6855, "step": 49792 }, { "epoch": 1.66, "grad_norm": 0.5300031304359436, "learning_rate": 0.0002510169572153567, "loss": 1.6913, "step": 49793 }, { "epoch": 1.66, "grad_norm": 0.5360952019691467, "learning_rate": 0.0002510066453928903, "loss": 1.7401, "step": 49794 }, { "epoch": 1.66, "grad_norm": 0.5338654518127441, "learning_rate": 0.0002509963336298946, "loss": 1.8024, "step": 49795 }, { "epoch": 1.66, "grad_norm": 0.555111825466156, "learning_rate": 0.0002509860219263823, "loss": 1.7383, "step": 49796 }, { "epoch": 1.66, "grad_norm": 0.5238678455352783, "learning_rate": 0.0002509757102823656, "loss": 1.6883, "step": 49797 }, { "epoch": 1.66, "grad_norm": 0.5348387956619263, "learning_rate": 0.0002509653986978571, "loss": 1.7648, "step": 49798 }, { "epoch": 1.66, "grad_norm": 0.5462944507598877, "learning_rate": 0.0002509550871728696, "loss": 1.7386, "step": 49799 }, { "epoch": 1.66, "grad_norm": 0.5373898148536682, "learning_rate": 0.0002509447757074152, "loss": 1.7078, "step": 49800 }, { "epoch": 1.66, "grad_norm": 0.5317469835281372, "learning_rate": 0.00025093446430150666, "loss": 1.7668, "step": 49801 }, { "epoch": 1.66, "grad_norm": 0.5332717895507812, "learning_rate": 0.00025092415295515654, "loss": 1.7475, "step": 49802 }, { "epoch": 1.66, "grad_norm": 0.5316479802131653, "learning_rate": 0.00025091384166837715, "loss": 1.6633, "step": 49803 }, { "epoch": 1.66, "grad_norm": 0.5356335639953613, "learning_rate": 0.0002509035304411812, "loss": 1.7912, "step": 49804 }, { "epoch": 1.66, "grad_norm": 0.5403806567192078, "learning_rate": 0.000250893219273581, "loss": 1.6967, "step": 49805 }, { "epoch": 1.66, "grad_norm": 0.5432347655296326, "learning_rate": 0.00025088290816558946, "loss": 1.7863, "step": 49806 }, { "epoch": 1.66, "grad_norm": 0.5494604110717773, "learning_rate": 0.0002508725971172186, "loss": 1.7519, "step": 49807 }, { "epoch": 1.66, "grad_norm": 0.5366512537002563, "learning_rate": 0.0002508622861284812, "loss": 1.7674, "step": 49808 }, { "epoch": 1.66, "grad_norm": 0.527115523815155, "learning_rate": 0.00025085197519938984, "loss": 1.7542, "step": 49809 }, { "epoch": 1.66, "grad_norm": 0.5266666412353516, "learning_rate": 0.0002508416643299569, "loss": 1.7112, "step": 49810 }, { "epoch": 1.66, "grad_norm": 0.5519689917564392, "learning_rate": 0.0002508313535201949, "loss": 1.8087, "step": 49811 }, { "epoch": 1.66, "grad_norm": 0.5429218411445618, "learning_rate": 0.00025082104277011647, "loss": 1.7501, "step": 49812 }, { "epoch": 1.66, "grad_norm": 0.541885495185852, "learning_rate": 0.000250810732079734, "loss": 1.6732, "step": 49813 }, { "epoch": 1.66, "grad_norm": 0.5235862731933594, "learning_rate": 0.0002508004214490599, "loss": 1.7504, "step": 49814 }, { "epoch": 1.66, "grad_norm": 0.5285544395446777, "learning_rate": 0.000250790110878107, "loss": 1.7833, "step": 49815 }, { "epoch": 1.66, "grad_norm": 0.5197141170501709, "learning_rate": 0.0002507798003668877, "loss": 1.7104, "step": 49816 }, { "epoch": 1.66, "grad_norm": 0.5400208234786987, "learning_rate": 0.0002507694899154144, "loss": 1.8276, "step": 49817 }, { "epoch": 1.66, "grad_norm": 0.5248516201972961, "learning_rate": 0.0002507591795236997, "loss": 1.7265, "step": 49818 }, { "epoch": 1.66, "grad_norm": 0.5380339622497559, "learning_rate": 0.0002507488691917562, "loss": 1.775, "step": 49819 }, { "epoch": 1.66, "grad_norm": 0.544866681098938, "learning_rate": 0.0002507385589195961, "loss": 1.7147, "step": 49820 }, { "epoch": 1.66, "grad_norm": 0.519037663936615, "learning_rate": 0.0002507282487072322, "loss": 1.7323, "step": 49821 }, { "epoch": 1.66, "grad_norm": 0.5393233895301819, "learning_rate": 0.0002507179385546771, "loss": 1.7907, "step": 49822 }, { "epoch": 1.66, "grad_norm": 0.5230140686035156, "learning_rate": 0.000250707628461943, "loss": 1.7881, "step": 49823 }, { "epoch": 1.66, "grad_norm": 0.5862137675285339, "learning_rate": 0.0002506973184290426, "loss": 1.7203, "step": 49824 }, { "epoch": 1.66, "grad_norm": 0.5287305116653442, "learning_rate": 0.00025068700845598844, "loss": 1.7479, "step": 49825 }, { "epoch": 1.66, "grad_norm": 0.5471332669258118, "learning_rate": 0.000250676698542793, "loss": 1.7265, "step": 49826 }, { "epoch": 1.66, "grad_norm": 0.535325825214386, "learning_rate": 0.0002506663886894687, "loss": 1.788, "step": 49827 }, { "epoch": 1.66, "grad_norm": 0.5339736938476562, "learning_rate": 0.00025065607889602815, "loss": 1.6887, "step": 49828 }, { "epoch": 1.66, "grad_norm": 0.53404700756073, "learning_rate": 0.00025064576916248395, "loss": 1.7566, "step": 49829 }, { "epoch": 1.66, "grad_norm": 0.5379244089126587, "learning_rate": 0.0002506354594888484, "loss": 1.7, "step": 49830 }, { "epoch": 1.66, "grad_norm": 0.5718253254890442, "learning_rate": 0.00025062514987513415, "loss": 1.8599, "step": 49831 }, { "epoch": 1.66, "grad_norm": 0.5412546396255493, "learning_rate": 0.00025061484032135376, "loss": 1.7142, "step": 49832 }, { "epoch": 1.66, "grad_norm": 0.5483678579330444, "learning_rate": 0.00025060453082751966, "loss": 1.8199, "step": 49833 }, { "epoch": 1.66, "grad_norm": 0.550830066204071, "learning_rate": 0.00025059422139364434, "loss": 1.6772, "step": 49834 }, { "epoch": 1.66, "grad_norm": 0.5231660008430481, "learning_rate": 0.0002505839120197403, "loss": 1.7703, "step": 49835 }, { "epoch": 1.66, "grad_norm": 0.5464065670967102, "learning_rate": 0.00025057360270582023, "loss": 1.7235, "step": 49836 }, { "epoch": 1.66, "grad_norm": 0.5395373106002808, "learning_rate": 0.00025056329345189644, "loss": 1.8094, "step": 49837 }, { "epoch": 1.66, "grad_norm": 0.5287615656852722, "learning_rate": 0.00025055298425798156, "loss": 1.8314, "step": 49838 }, { "epoch": 1.66, "grad_norm": 0.5704002976417542, "learning_rate": 0.0002505426751240881, "loss": 1.7488, "step": 49839 }, { "epoch": 1.66, "grad_norm": 0.5356295704841614, "learning_rate": 0.0002505323660502285, "loss": 1.7602, "step": 49840 }, { "epoch": 1.66, "grad_norm": 0.5529335141181946, "learning_rate": 0.0002505220570364153, "loss": 1.7536, "step": 49841 }, { "epoch": 1.66, "grad_norm": 0.5499469637870789, "learning_rate": 0.00025051174808266117, "loss": 1.7869, "step": 49842 }, { "epoch": 1.66, "grad_norm": 0.5463702082633972, "learning_rate": 0.00025050143918897835, "loss": 1.7213, "step": 49843 }, { "epoch": 1.66, "grad_norm": 0.5362492799758911, "learning_rate": 0.0002504911303553795, "loss": 1.6214, "step": 49844 }, { "epoch": 1.66, "grad_norm": 0.5415356159210205, "learning_rate": 0.00025048082158187716, "loss": 1.7485, "step": 49845 }, { "epoch": 1.66, "grad_norm": 0.5312921404838562, "learning_rate": 0.0002504705128684838, "loss": 1.7256, "step": 49846 }, { "epoch": 1.66, "grad_norm": 0.5929427146911621, "learning_rate": 0.00025046020421521194, "loss": 1.7967, "step": 49847 }, { "epoch": 1.66, "grad_norm": 0.5634375214576721, "learning_rate": 0.000250449895622074, "loss": 1.7882, "step": 49848 }, { "epoch": 1.66, "grad_norm": 0.6070293188095093, "learning_rate": 0.0002504395870890828, "loss": 1.7741, "step": 49849 }, { "epoch": 1.66, "grad_norm": 0.5383157134056091, "learning_rate": 0.00025042927861625044, "loss": 1.6976, "step": 49850 }, { "epoch": 1.66, "grad_norm": 0.5423806309700012, "learning_rate": 0.0002504189702035897, "loss": 1.7871, "step": 49851 }, { "epoch": 1.66, "grad_norm": 0.5456956028938293, "learning_rate": 0.00025040866185111306, "loss": 1.7072, "step": 49852 }, { "epoch": 1.66, "grad_norm": 0.5375548005104065, "learning_rate": 0.00025039835355883296, "loss": 1.748, "step": 49853 }, { "epoch": 1.66, "grad_norm": 0.5483559370040894, "learning_rate": 0.0002503880453267619, "loss": 1.8012, "step": 49854 }, { "epoch": 1.66, "grad_norm": 0.5231634974479675, "learning_rate": 0.00025037773715491245, "loss": 1.7034, "step": 49855 }, { "epoch": 1.66, "grad_norm": 0.5501444339752197, "learning_rate": 0.0002503674290432973, "loss": 1.7737, "step": 49856 }, { "epoch": 1.66, "grad_norm": 0.5389891266822815, "learning_rate": 0.0002503571209919286, "loss": 1.7679, "step": 49857 }, { "epoch": 1.66, "grad_norm": 0.5370136499404907, "learning_rate": 0.00025034681300081904, "loss": 1.7877, "step": 49858 }, { "epoch": 1.66, "grad_norm": 0.5303992033004761, "learning_rate": 0.0002503365050699812, "loss": 1.7521, "step": 49859 }, { "epoch": 1.66, "grad_norm": 0.5318936705589294, "learning_rate": 0.0002503261971994275, "loss": 1.7182, "step": 49860 }, { "epoch": 1.66, "grad_norm": 0.5373069047927856, "learning_rate": 0.00025031588938917047, "loss": 1.7091, "step": 49861 }, { "epoch": 1.66, "grad_norm": 0.53438800573349, "learning_rate": 0.0002503055816392227, "loss": 1.7018, "step": 49862 }, { "epoch": 1.66, "grad_norm": 0.5331377983093262, "learning_rate": 0.00025029527394959656, "loss": 1.7547, "step": 49863 }, { "epoch": 1.66, "grad_norm": 0.545864462852478, "learning_rate": 0.00025028496632030457, "loss": 1.6859, "step": 49864 }, { "epoch": 1.66, "grad_norm": 0.5592917799949646, "learning_rate": 0.0002502746587513594, "loss": 1.7346, "step": 49865 }, { "epoch": 1.66, "grad_norm": 0.5585343837738037, "learning_rate": 0.00025026435124277346, "loss": 1.7028, "step": 49866 }, { "epoch": 1.66, "grad_norm": 0.5545951724052429, "learning_rate": 0.0002502540437945593, "loss": 1.7734, "step": 49867 }, { "epoch": 1.66, "grad_norm": 0.5441813468933105, "learning_rate": 0.0002502437364067293, "loss": 1.8172, "step": 49868 }, { "epoch": 1.66, "grad_norm": 0.5487985610961914, "learning_rate": 0.00025023342907929617, "loss": 1.7757, "step": 49869 }, { "epoch": 1.66, "grad_norm": 0.5217043161392212, "learning_rate": 0.0002502231218122723, "loss": 1.6874, "step": 49870 }, { "epoch": 1.66, "grad_norm": 0.5351887941360474, "learning_rate": 0.0002502128146056701, "loss": 1.6714, "step": 49871 }, { "epoch": 1.66, "grad_norm": 0.5316200256347656, "learning_rate": 0.00025020250745950247, "loss": 1.709, "step": 49872 }, { "epoch": 1.66, "grad_norm": 0.5769661664962769, "learning_rate": 0.0002501922003737814, "loss": 1.8352, "step": 49873 }, { "epoch": 1.66, "grad_norm": 0.5189846158027649, "learning_rate": 0.00025018189334851976, "loss": 1.6927, "step": 49874 }, { "epoch": 1.66, "grad_norm": 0.5419771671295166, "learning_rate": 0.00025017158638372997, "loss": 1.7647, "step": 49875 }, { "epoch": 1.66, "grad_norm": 0.5471159815788269, "learning_rate": 0.00025016127947942457, "loss": 1.7301, "step": 49876 }, { "epoch": 1.66, "grad_norm": 0.5552114844322205, "learning_rate": 0.00025015097263561596, "loss": 1.732, "step": 49877 }, { "epoch": 1.66, "grad_norm": 1.5149098634719849, "learning_rate": 0.00025014066585231674, "loss": 1.744, "step": 49878 }, { "epoch": 1.66, "grad_norm": 0.5230854749679565, "learning_rate": 0.0002501303591295395, "loss": 1.6899, "step": 49879 }, { "epoch": 1.66, "grad_norm": 0.5539467930793762, "learning_rate": 0.0002501200524672965, "loss": 1.6852, "step": 49880 }, { "epoch": 1.66, "grad_norm": 0.5483643412590027, "learning_rate": 0.0002501097458656005, "loss": 1.7032, "step": 49881 }, { "epoch": 1.66, "grad_norm": 0.5159466862678528, "learning_rate": 0.0002500994393244639, "loss": 1.7197, "step": 49882 }, { "epoch": 1.66, "grad_norm": 0.530685305595398, "learning_rate": 0.0002500891328438993, "loss": 1.7713, "step": 49883 }, { "epoch": 1.66, "grad_norm": 0.5496184825897217, "learning_rate": 0.00025007882642391903, "loss": 1.7406, "step": 49884 }, { "epoch": 1.66, "grad_norm": 0.5439271330833435, "learning_rate": 0.00025006852006453564, "loss": 1.6873, "step": 49885 }, { "epoch": 1.66, "grad_norm": 0.5327682495117188, "learning_rate": 0.000250058213765762, "loss": 1.7004, "step": 49886 }, { "epoch": 1.66, "grad_norm": 0.5352113842964172, "learning_rate": 0.00025004790752761007, "loss": 1.7529, "step": 49887 }, { "epoch": 1.66, "grad_norm": 0.5399633646011353, "learning_rate": 0.00025003760135009265, "loss": 1.7088, "step": 49888 }, { "epoch": 1.66, "grad_norm": 0.5663984417915344, "learning_rate": 0.00025002729523322236, "loss": 1.7676, "step": 49889 }, { "epoch": 1.66, "grad_norm": 0.583860456943512, "learning_rate": 0.00025001698917701145, "loss": 1.8247, "step": 49890 }, { "epoch": 1.66, "grad_norm": 0.5472649931907654, "learning_rate": 0.0002500066831814726, "loss": 1.7353, "step": 49891 }, { "epoch": 1.66, "grad_norm": 0.55195152759552, "learning_rate": 0.00024999637724661816, "loss": 1.7718, "step": 49892 }, { "epoch": 1.66, "grad_norm": 0.5413210988044739, "learning_rate": 0.00024998607137246095, "loss": 1.7121, "step": 49893 }, { "epoch": 1.66, "grad_norm": 0.5671042203903198, "learning_rate": 0.0002499757655590131, "loss": 1.7437, "step": 49894 }, { "epoch": 1.66, "grad_norm": 0.5436305999755859, "learning_rate": 0.0002499654598062874, "loss": 1.7433, "step": 49895 }, { "epoch": 1.66, "grad_norm": 0.5329686999320984, "learning_rate": 0.00024995515411429625, "loss": 1.7602, "step": 49896 }, { "epoch": 1.66, "grad_norm": 0.5563231706619263, "learning_rate": 0.00024994484848305215, "loss": 1.7106, "step": 49897 }, { "epoch": 1.66, "grad_norm": 0.5435848236083984, "learning_rate": 0.0002499345429125676, "loss": 1.8061, "step": 49898 }, { "epoch": 1.66, "grad_norm": 0.535811722278595, "learning_rate": 0.00024992423740285524, "loss": 1.7119, "step": 49899 }, { "epoch": 1.66, "grad_norm": 0.5696074962615967, "learning_rate": 0.0002499139319539273, "loss": 1.725, "step": 49900 }, { "epoch": 1.66, "grad_norm": 0.538224458694458, "learning_rate": 0.0002499036265657966, "loss": 1.7184, "step": 49901 }, { "epoch": 1.66, "grad_norm": 0.5434126257896423, "learning_rate": 0.00024989332123847544, "loss": 1.7026, "step": 49902 }, { "epoch": 1.66, "grad_norm": 0.5482321381568909, "learning_rate": 0.0002498830159719765, "loss": 1.7687, "step": 49903 }, { "epoch": 1.66, "grad_norm": 0.5338730216026306, "learning_rate": 0.0002498727107663121, "loss": 1.7928, "step": 49904 }, { "epoch": 1.66, "grad_norm": 0.5408102869987488, "learning_rate": 0.0002498624056214949, "loss": 1.7258, "step": 49905 }, { "epoch": 1.66, "grad_norm": 0.5377793908119202, "learning_rate": 0.0002498521005375374, "loss": 1.7542, "step": 49906 }, { "epoch": 1.66, "grad_norm": 0.5490210652351379, "learning_rate": 0.000249841795514452, "loss": 1.684, "step": 49907 }, { "epoch": 1.66, "grad_norm": 0.5444813966751099, "learning_rate": 0.0002498314905522512, "loss": 1.6953, "step": 49908 }, { "epoch": 1.66, "grad_norm": 0.5484896302223206, "learning_rate": 0.00024982118565094775, "loss": 1.7085, "step": 49909 }, { "epoch": 1.66, "grad_norm": 0.5286132097244263, "learning_rate": 0.00024981088081055383, "loss": 1.6955, "step": 49910 }, { "epoch": 1.66, "grad_norm": 0.5417529940605164, "learning_rate": 0.00024980057603108215, "loss": 1.6764, "step": 49911 }, { "epoch": 1.66, "grad_norm": 0.5500984191894531, "learning_rate": 0.0002497902713125452, "loss": 1.7226, "step": 49912 }, { "epoch": 1.66, "grad_norm": 0.5512811541557312, "learning_rate": 0.0002497799666549555, "loss": 1.7532, "step": 49913 }, { "epoch": 1.66, "grad_norm": 0.5566118955612183, "learning_rate": 0.0002497696620583254, "loss": 1.7126, "step": 49914 }, { "epoch": 1.66, "grad_norm": 0.5466560125350952, "learning_rate": 0.00024975935752266756, "loss": 1.7253, "step": 49915 }, { "epoch": 1.66, "grad_norm": 0.5459476709365845, "learning_rate": 0.00024974905304799466, "loss": 1.7556, "step": 49916 }, { "epoch": 1.66, "grad_norm": 0.5503557324409485, "learning_rate": 0.0002497387486343187, "loss": 1.755, "step": 49917 }, { "epoch": 1.66, "grad_norm": 0.5682820081710815, "learning_rate": 0.00024972844428165265, "loss": 1.7975, "step": 49918 }, { "epoch": 1.66, "grad_norm": 0.5371321439743042, "learning_rate": 0.0002497181399900089, "loss": 1.7418, "step": 49919 }, { "epoch": 1.66, "grad_norm": 0.5410525798797607, "learning_rate": 0.00024970783575939984, "loss": 1.7052, "step": 49920 }, { "epoch": 1.66, "grad_norm": 0.5445049405097961, "learning_rate": 0.00024969753158983807, "loss": 1.8341, "step": 49921 }, { "epoch": 1.66, "grad_norm": 0.5381482839584351, "learning_rate": 0.000249687227481336, "loss": 1.8453, "step": 49922 }, { "epoch": 1.66, "grad_norm": 0.5431039929389954, "learning_rate": 0.0002496769234339065, "loss": 1.7636, "step": 49923 }, { "epoch": 1.66, "grad_norm": 0.5445713996887207, "learning_rate": 0.00024966661944756155, "loss": 1.7849, "step": 49924 }, { "epoch": 1.66, "grad_norm": 0.5782416462898254, "learning_rate": 0.00024965631552231396, "loss": 1.7447, "step": 49925 }, { "epoch": 1.66, "grad_norm": 0.5515232682228088, "learning_rate": 0.0002496460116581763, "loss": 1.7605, "step": 49926 }, { "epoch": 1.66, "grad_norm": 0.5506274700164795, "learning_rate": 0.00024963570785516083, "loss": 1.7593, "step": 49927 }, { "epoch": 1.66, "grad_norm": 0.5638557076454163, "learning_rate": 0.00024962540411328023, "loss": 1.7443, "step": 49928 }, { "epoch": 1.66, "grad_norm": 0.5648659467697144, "learning_rate": 0.00024961510043254695, "loss": 1.7106, "step": 49929 }, { "epoch": 1.66, "grad_norm": 0.5281461477279663, "learning_rate": 0.00024960479681297357, "loss": 1.7003, "step": 49930 }, { "epoch": 1.66, "grad_norm": 0.5348171591758728, "learning_rate": 0.0002495944932545724, "loss": 1.789, "step": 49931 }, { "epoch": 1.66, "grad_norm": 0.569939374923706, "learning_rate": 0.00024958418975735616, "loss": 1.7964, "step": 49932 }, { "epoch": 1.66, "grad_norm": 0.5515848398208618, "learning_rate": 0.0002495738863213374, "loss": 1.7882, "step": 49933 }, { "epoch": 1.66, "grad_norm": 0.5509846210479736, "learning_rate": 0.0002495635829465284, "loss": 1.7639, "step": 49934 }, { "epoch": 1.66, "grad_norm": 0.5564067363739014, "learning_rate": 0.0002495532796329418, "loss": 1.7707, "step": 49935 }, { "epoch": 1.66, "grad_norm": 0.5377355217933655, "learning_rate": 0.00024954297638059015, "loss": 1.7558, "step": 49936 }, { "epoch": 1.66, "grad_norm": 0.5432065725326538, "learning_rate": 0.00024953267318948583, "loss": 1.679, "step": 49937 }, { "epoch": 1.66, "grad_norm": 0.5363063812255859, "learning_rate": 0.0002495223700596413, "loss": 1.7241, "step": 49938 }, { "epoch": 1.66, "grad_norm": 0.5360019207000732, "learning_rate": 0.0002495120669910694, "loss": 1.7325, "step": 49939 }, { "epoch": 1.66, "grad_norm": 0.5695762038230896, "learning_rate": 0.0002495017639837823, "loss": 1.7704, "step": 49940 }, { "epoch": 1.66, "grad_norm": 0.5394785404205322, "learning_rate": 0.00024949146103779254, "loss": 1.84, "step": 49941 }, { "epoch": 1.66, "grad_norm": 0.5454939603805542, "learning_rate": 0.0002494811581531128, "loss": 1.7255, "step": 49942 }, { "epoch": 1.66, "grad_norm": 0.5496522188186646, "learning_rate": 0.0002494708553297555, "loss": 1.6988, "step": 49943 }, { "epoch": 1.66, "grad_norm": 0.5485708117485046, "learning_rate": 0.0002494605525677331, "loss": 1.7224, "step": 49944 }, { "epoch": 1.66, "grad_norm": 0.5398874878883362, "learning_rate": 0.00024945024986705805, "loss": 1.7461, "step": 49945 }, { "epoch": 1.66, "grad_norm": 0.5820251703262329, "learning_rate": 0.00024943994722774314, "loss": 1.7829, "step": 49946 }, { "epoch": 1.66, "grad_norm": 0.5391455888748169, "learning_rate": 0.00024942964464980057, "loss": 1.7327, "step": 49947 }, { "epoch": 1.66, "grad_norm": 0.5160592198371887, "learning_rate": 0.000249419342133243, "loss": 1.6792, "step": 49948 }, { "epoch": 1.66, "grad_norm": 0.5480973124504089, "learning_rate": 0.0002494090396780829, "loss": 1.7329, "step": 49949 }, { "epoch": 1.66, "grad_norm": 0.5318827629089355, "learning_rate": 0.0002493987372843327, "loss": 1.7189, "step": 49950 }, { "epoch": 1.66, "grad_norm": 0.5440024733543396, "learning_rate": 0.00024938843495200504, "loss": 1.783, "step": 49951 }, { "epoch": 1.66, "grad_norm": 0.6735614538192749, "learning_rate": 0.0002493781326811123, "loss": 1.8255, "step": 49952 }, { "epoch": 1.66, "grad_norm": 0.5730890035629272, "learning_rate": 0.00024936783047166714, "loss": 1.8105, "step": 49953 }, { "epoch": 1.66, "grad_norm": 0.5565690994262695, "learning_rate": 0.00024935752832368197, "loss": 1.7454, "step": 49954 }, { "epoch": 1.66, "grad_norm": 0.5435783267021179, "learning_rate": 0.0002493472262371692, "loss": 1.7821, "step": 49955 }, { "epoch": 1.66, "grad_norm": 0.5495005249977112, "learning_rate": 0.00024933692421214156, "loss": 1.8084, "step": 49956 }, { "epoch": 1.66, "grad_norm": 0.5271344184875488, "learning_rate": 0.0002493266222486113, "loss": 1.6525, "step": 49957 }, { "epoch": 1.66, "grad_norm": 0.5414630770683289, "learning_rate": 0.0002493163203465911, "loss": 1.752, "step": 49958 }, { "epoch": 1.66, "grad_norm": 0.5487685203552246, "learning_rate": 0.00024930601850609355, "loss": 1.7689, "step": 49959 }, { "epoch": 1.66, "grad_norm": 0.5451198220252991, "learning_rate": 0.00024929571672713084, "loss": 1.7363, "step": 49960 }, { "epoch": 1.66, "grad_norm": 0.5513134598731995, "learning_rate": 0.00024928541500971574, "loss": 1.7428, "step": 49961 }, { "epoch": 1.66, "grad_norm": 0.5478376150131226, "learning_rate": 0.0002492751133538606, "loss": 1.7641, "step": 49962 }, { "epoch": 1.66, "grad_norm": 0.5535669922828674, "learning_rate": 0.00024926481175957814, "loss": 1.6921, "step": 49963 }, { "epoch": 1.66, "grad_norm": 0.5604958534240723, "learning_rate": 0.0002492545102268806, "loss": 1.8048, "step": 49964 }, { "epoch": 1.66, "grad_norm": 0.5314218401908875, "learning_rate": 0.0002492442087557806, "loss": 1.7351, "step": 49965 }, { "epoch": 1.66, "grad_norm": 0.5378918647766113, "learning_rate": 0.00024923390734629084, "loss": 1.7436, "step": 49966 }, { "epoch": 1.66, "grad_norm": 0.5316920876502991, "learning_rate": 0.00024922360599842345, "loss": 1.674, "step": 49967 }, { "epoch": 1.66, "grad_norm": 0.5637198090553284, "learning_rate": 0.00024921330471219116, "loss": 1.7462, "step": 49968 }, { "epoch": 1.66, "grad_norm": 0.5501906275749207, "learning_rate": 0.00024920300348760645, "loss": 1.766, "step": 49969 }, { "epoch": 1.66, "grad_norm": 0.5489657521247864, "learning_rate": 0.0002491927023246818, "loss": 1.7937, "step": 49970 }, { "epoch": 1.66, "grad_norm": 0.5583655834197998, "learning_rate": 0.0002491824012234297, "loss": 1.716, "step": 49971 }, { "epoch": 1.66, "grad_norm": 0.5939077138900757, "learning_rate": 0.00024917210018386265, "loss": 1.7604, "step": 49972 }, { "epoch": 1.66, "grad_norm": 0.5350610017776489, "learning_rate": 0.0002491617992059934, "loss": 1.6934, "step": 49973 }, { "epoch": 1.66, "grad_norm": 0.527725875377655, "learning_rate": 0.000249151498289834, "loss": 1.7109, "step": 49974 }, { "epoch": 1.66, "grad_norm": 0.5557849407196045, "learning_rate": 0.0002491411974353972, "loss": 1.8714, "step": 49975 }, { "epoch": 1.66, "grad_norm": 0.5477713346481323, "learning_rate": 0.0002491308966426956, "loss": 1.7593, "step": 49976 }, { "epoch": 1.66, "grad_norm": 0.5446840524673462, "learning_rate": 0.0002491205959117416, "loss": 1.7129, "step": 49977 }, { "epoch": 1.66, "grad_norm": 0.5607038736343384, "learning_rate": 0.0002491102952425476, "loss": 1.7356, "step": 49978 }, { "epoch": 1.66, "grad_norm": 0.5544845461845398, "learning_rate": 0.0002490999946351263, "loss": 1.7324, "step": 49979 }, { "epoch": 1.66, "grad_norm": 0.5251097083091736, "learning_rate": 0.00024908969408949007, "loss": 1.7403, "step": 49980 }, { "epoch": 1.66, "grad_norm": 0.5411417484283447, "learning_rate": 0.00024907939360565136, "loss": 1.6247, "step": 49981 }, { "epoch": 1.66, "grad_norm": 0.5276028513908386, "learning_rate": 0.00024906909318362286, "loss": 1.653, "step": 49982 }, { "epoch": 1.66, "grad_norm": 0.5313776731491089, "learning_rate": 0.00024905879282341697, "loss": 1.8199, "step": 49983 }, { "epoch": 1.66, "grad_norm": 0.5472577214241028, "learning_rate": 0.00024904849252504617, "loss": 1.6961, "step": 49984 }, { "epoch": 1.66, "grad_norm": 0.5463392734527588, "learning_rate": 0.00024903819228852303, "loss": 1.8132, "step": 49985 }, { "epoch": 1.66, "grad_norm": 0.5298985242843628, "learning_rate": 0.0002490278921138601, "loss": 1.7311, "step": 49986 }, { "epoch": 1.66, "grad_norm": 0.5528388619422913, "learning_rate": 0.0002490175920010696, "loss": 1.6977, "step": 49987 }, { "epoch": 1.66, "grad_norm": 0.5363515615463257, "learning_rate": 0.0002490072919501643, "loss": 1.6681, "step": 49988 }, { "epoch": 1.66, "grad_norm": 0.5272883772850037, "learning_rate": 0.0002489969919611568, "loss": 1.761, "step": 49989 }, { "epoch": 1.66, "grad_norm": 0.5327850580215454, "learning_rate": 0.0002489866920340592, "loss": 1.7283, "step": 49990 }, { "epoch": 1.66, "grad_norm": 0.5721969604492188, "learning_rate": 0.0002489763921688843, "loss": 1.7684, "step": 49991 }, { "epoch": 1.66, "grad_norm": 0.5354923009872437, "learning_rate": 0.00024896609236564463, "loss": 1.7431, "step": 49992 }, { "epoch": 1.66, "grad_norm": 0.5362933278083801, "learning_rate": 0.0002489557926243526, "loss": 1.6986, "step": 49993 }, { "epoch": 1.66, "grad_norm": 0.5292413830757141, "learning_rate": 0.00024894549294502067, "loss": 1.7515, "step": 49994 }, { "epoch": 1.66, "grad_norm": 0.5308426022529602, "learning_rate": 0.00024893519332766127, "loss": 1.7369, "step": 49995 }, { "epoch": 1.66, "grad_norm": 0.5592093467712402, "learning_rate": 0.0002489248937722873, "loss": 1.8251, "step": 49996 }, { "epoch": 1.66, "grad_norm": 0.9104157090187073, "learning_rate": 0.00024891459427891073, "loss": 1.7554, "step": 49997 }, { "epoch": 1.66, "grad_norm": 0.561042845249176, "learning_rate": 0.0002489042948475444, "loss": 1.7436, "step": 49998 }, { "epoch": 1.66, "grad_norm": 0.5395542979240417, "learning_rate": 0.0002488939954782008, "loss": 1.7871, "step": 49999 }, { "epoch": 1.66, "grad_norm": 0.5592344999313354, "learning_rate": 0.0002488836961708923, "loss": 1.8012, "step": 50000 }, { "epoch": 1.66, "grad_norm": 0.550025463104248, "learning_rate": 0.00024887339692563146, "loss": 1.7466, "step": 50001 }, { "epoch": 1.66, "grad_norm": 0.557162344455719, "learning_rate": 0.0002488630977424307, "loss": 1.8046, "step": 50002 }, { "epoch": 1.66, "grad_norm": 0.5437936186790466, "learning_rate": 0.0002488527986213028, "loss": 1.811, "step": 50003 }, { "epoch": 1.66, "grad_norm": 0.5373902916908264, "learning_rate": 0.00024884249956225987, "loss": 1.7196, "step": 50004 }, { "epoch": 1.66, "grad_norm": 0.5616486668586731, "learning_rate": 0.0002488322005653147, "loss": 1.7152, "step": 50005 }, { "epoch": 1.66, "grad_norm": 0.533358097076416, "learning_rate": 0.0002488219016304797, "loss": 1.7707, "step": 50006 }, { "epoch": 1.66, "grad_norm": 0.5378443002700806, "learning_rate": 0.00024881160275776735, "loss": 1.695, "step": 50007 }, { "epoch": 1.66, "grad_norm": 0.5289894938468933, "learning_rate": 0.00024880130394719016, "loss": 1.7734, "step": 50008 }, { "epoch": 1.66, "grad_norm": 0.5621049404144287, "learning_rate": 0.00024879100519876064, "loss": 1.8388, "step": 50009 }, { "epoch": 1.66, "grad_norm": 0.5335361361503601, "learning_rate": 0.0002487807065124914, "loss": 1.7338, "step": 50010 }, { "epoch": 1.66, "grad_norm": 0.5383021831512451, "learning_rate": 0.0002487704078883947, "loss": 1.7549, "step": 50011 }, { "epoch": 1.66, "grad_norm": 0.5497964024543762, "learning_rate": 0.00024876010932648325, "loss": 1.7874, "step": 50012 }, { "epoch": 1.66, "grad_norm": 0.5433239936828613, "learning_rate": 0.0002487498108267695, "loss": 1.7023, "step": 50013 }, { "epoch": 1.66, "grad_norm": 0.5236809849739075, "learning_rate": 0.0002487395123892659, "loss": 1.667, "step": 50014 }, { "epoch": 1.66, "grad_norm": 0.5415185689926147, "learning_rate": 0.0002487292140139849, "loss": 1.762, "step": 50015 }, { "epoch": 1.66, "grad_norm": 0.5239076614379883, "learning_rate": 0.0002487189157009392, "loss": 1.6485, "step": 50016 }, { "epoch": 1.66, "grad_norm": 0.5384061336517334, "learning_rate": 0.00024870861745014114, "loss": 1.7175, "step": 50017 }, { "epoch": 1.66, "grad_norm": 0.5422403812408447, "learning_rate": 0.00024869831926160315, "loss": 1.7196, "step": 50018 }, { "epoch": 1.66, "grad_norm": 0.5345625877380371, "learning_rate": 0.00024868802113533795, "loss": 1.8008, "step": 50019 }, { "epoch": 1.66, "grad_norm": 0.5332090258598328, "learning_rate": 0.000248677723071358, "loss": 1.7825, "step": 50020 }, { "epoch": 1.66, "grad_norm": 0.5343497395515442, "learning_rate": 0.00024866742506967563, "loss": 1.7164, "step": 50021 }, { "epoch": 1.66, "grad_norm": 0.5445441603660583, "learning_rate": 0.0002486571271303034, "loss": 1.7947, "step": 50022 }, { "epoch": 1.66, "grad_norm": 0.5544697642326355, "learning_rate": 0.000248646829253254, "loss": 1.7371, "step": 50023 }, { "epoch": 1.66, "grad_norm": 0.5485031008720398, "learning_rate": 0.0002486365314385397, "loss": 1.7747, "step": 50024 }, { "epoch": 1.66, "grad_norm": 0.5428510308265686, "learning_rate": 0.000248626233686173, "loss": 1.788, "step": 50025 }, { "epoch": 1.66, "grad_norm": 0.5512212514877319, "learning_rate": 0.00024861593599616674, "loss": 1.7352, "step": 50026 }, { "epoch": 1.66, "grad_norm": 0.5644697546958923, "learning_rate": 0.0002486056383685329, "loss": 1.6726, "step": 50027 }, { "epoch": 1.66, "grad_norm": 0.5306721329689026, "learning_rate": 0.0002485953408032843, "loss": 1.7309, "step": 50028 }, { "epoch": 1.66, "grad_norm": 0.5255934596061707, "learning_rate": 0.0002485850433004334, "loss": 1.8269, "step": 50029 }, { "epoch": 1.66, "grad_norm": 0.5542782545089722, "learning_rate": 0.0002485747458599928, "loss": 1.7152, "step": 50030 }, { "epoch": 1.66, "grad_norm": 0.5595807433128357, "learning_rate": 0.00024856444848197476, "loss": 1.7202, "step": 50031 }, { "epoch": 1.66, "grad_norm": 0.5393376350402832, "learning_rate": 0.0002485541511663918, "loss": 1.7147, "step": 50032 }, { "epoch": 1.66, "grad_norm": 0.5348098278045654, "learning_rate": 0.0002485438539132568, "loss": 1.7715, "step": 50033 }, { "epoch": 1.66, "grad_norm": 0.5484455823898315, "learning_rate": 0.0002485335567225817, "loss": 1.811, "step": 50034 }, { "epoch": 1.66, "grad_norm": 0.5564528703689575, "learning_rate": 0.0002485232595943794, "loss": 1.7476, "step": 50035 }, { "epoch": 1.66, "grad_norm": 0.5389146208763123, "learning_rate": 0.00024851296252866235, "loss": 1.6952, "step": 50036 }, { "epoch": 1.66, "grad_norm": 0.5639315843582153, "learning_rate": 0.0002485026655254429, "loss": 1.7187, "step": 50037 }, { "epoch": 1.66, "grad_norm": 0.5494461059570312, "learning_rate": 0.0002484923685847336, "loss": 1.7337, "step": 50038 }, { "epoch": 1.66, "grad_norm": 0.532272219657898, "learning_rate": 0.0002484820717065469, "loss": 1.7528, "step": 50039 }, { "epoch": 1.66, "grad_norm": 0.5394971370697021, "learning_rate": 0.0002484717748908956, "loss": 1.7287, "step": 50040 }, { "epoch": 1.66, "grad_norm": 0.5402843952178955, "learning_rate": 0.0002484614781377918, "loss": 1.7484, "step": 50041 }, { "epoch": 1.66, "grad_norm": 2.591644525527954, "learning_rate": 0.00024845118144724824, "loss": 1.8887, "step": 50042 }, { "epoch": 1.66, "grad_norm": 0.5520251393318176, "learning_rate": 0.00024844088481927736, "loss": 1.7333, "step": 50043 }, { "epoch": 1.66, "grad_norm": 0.5385363698005676, "learning_rate": 0.0002484305882538916, "loss": 1.7634, "step": 50044 }, { "epoch": 1.67, "grad_norm": 0.5485388040542603, "learning_rate": 0.00024842029175110354, "loss": 1.7143, "step": 50045 }, { "epoch": 1.67, "grad_norm": 0.5508395433425903, "learning_rate": 0.0002484099953109257, "loss": 1.7215, "step": 50046 }, { "epoch": 1.67, "grad_norm": 0.5399913787841797, "learning_rate": 0.00024839969893337044, "loss": 1.7873, "step": 50047 }, { "epoch": 1.67, "grad_norm": 0.5464562773704529, "learning_rate": 0.00024838940261845025, "loss": 1.783, "step": 50048 }, { "epoch": 1.67, "grad_norm": 0.5492023825645447, "learning_rate": 0.00024837910636617787, "loss": 1.775, "step": 50049 }, { "epoch": 1.67, "grad_norm": 0.5428246855735779, "learning_rate": 0.00024836881017656565, "loss": 1.8256, "step": 50050 }, { "epoch": 1.67, "grad_norm": 0.547551155090332, "learning_rate": 0.000248358514049626, "loss": 1.6971, "step": 50051 }, { "epoch": 1.67, "grad_norm": 0.549681544303894, "learning_rate": 0.0002483482179853716, "loss": 1.7105, "step": 50052 }, { "epoch": 1.67, "grad_norm": 0.5487030744552612, "learning_rate": 0.0002483379219838149, "loss": 1.7245, "step": 50053 }, { "epoch": 1.67, "grad_norm": 0.5310683250427246, "learning_rate": 0.0002483276260449682, "loss": 1.6876, "step": 50054 }, { "epoch": 1.67, "grad_norm": 0.5577578544616699, "learning_rate": 0.0002483173301688441, "loss": 1.7392, "step": 50055 }, { "epoch": 1.67, "grad_norm": 0.5547060966491699, "learning_rate": 0.00024830703435545535, "loss": 1.7832, "step": 50056 }, { "epoch": 1.67, "grad_norm": 0.5607022643089294, "learning_rate": 0.0002482967386048141, "loss": 1.7059, "step": 50057 }, { "epoch": 1.67, "grad_norm": 0.5627495646476746, "learning_rate": 0.00024828644291693307, "loss": 1.7401, "step": 50058 }, { "epoch": 1.67, "grad_norm": 0.5361513495445251, "learning_rate": 0.0002482761472918246, "loss": 1.7521, "step": 50059 }, { "epoch": 1.67, "grad_norm": 0.5369622707366943, "learning_rate": 0.0002482658517295014, "loss": 1.749, "step": 50060 }, { "epoch": 1.67, "grad_norm": 0.5542200803756714, "learning_rate": 0.00024825555622997567, "loss": 1.7862, "step": 50061 }, { "epoch": 1.67, "grad_norm": 0.5615105032920837, "learning_rate": 0.0002482452607932601, "loss": 1.7524, "step": 50062 }, { "epoch": 1.67, "grad_norm": 0.5790823101997375, "learning_rate": 0.00024823496541936723, "loss": 1.6821, "step": 50063 }, { "epoch": 1.67, "grad_norm": 0.5569458603858948, "learning_rate": 0.0002482246701083095, "loss": 1.7662, "step": 50064 }, { "epoch": 1.67, "grad_norm": 0.548754096031189, "learning_rate": 0.0002482143748600993, "loss": 1.7399, "step": 50065 }, { "epoch": 1.67, "grad_norm": 0.5483560562133789, "learning_rate": 0.00024820407967474935, "loss": 1.7741, "step": 50066 }, { "epoch": 1.67, "grad_norm": 0.5473145842552185, "learning_rate": 0.0002481937845522719, "loss": 1.7998, "step": 50067 }, { "epoch": 1.67, "grad_norm": 0.5700187087059021, "learning_rate": 0.00024818348949267954, "loss": 1.7671, "step": 50068 }, { "epoch": 1.67, "grad_norm": 0.5480830669403076, "learning_rate": 0.0002481731944959848, "loss": 1.7205, "step": 50069 }, { "epoch": 1.67, "grad_norm": 0.5439304113388062, "learning_rate": 0.0002481628995622003, "loss": 1.6863, "step": 50070 }, { "epoch": 1.67, "grad_norm": 0.5487874150276184, "learning_rate": 0.0002481526046913383, "loss": 1.7405, "step": 50071 }, { "epoch": 1.67, "grad_norm": 0.5380492806434631, "learning_rate": 0.0002481423098834114, "loss": 1.7943, "step": 50072 }, { "epoch": 1.67, "grad_norm": 0.5475709438323975, "learning_rate": 0.00024813201513843215, "loss": 1.697, "step": 50073 }, { "epoch": 1.67, "grad_norm": 0.569512128829956, "learning_rate": 0.00024812172045641297, "loss": 1.8489, "step": 50074 }, { "epoch": 1.67, "grad_norm": 0.5670740604400635, "learning_rate": 0.00024811142583736626, "loss": 1.6927, "step": 50075 }, { "epoch": 1.67, "grad_norm": 0.5649849772453308, "learning_rate": 0.0002481011312813048, "loss": 1.777, "step": 50076 }, { "epoch": 1.67, "grad_norm": 0.5561026930809021, "learning_rate": 0.0002480908367882408, "loss": 1.7296, "step": 50077 }, { "epoch": 1.67, "grad_norm": 0.5510157942771912, "learning_rate": 0.0002480805423581869, "loss": 1.7843, "step": 50078 }, { "epoch": 1.67, "grad_norm": 0.5494733452796936, "learning_rate": 0.00024807024799115563, "loss": 1.7494, "step": 50079 }, { "epoch": 1.67, "grad_norm": 0.5489688515663147, "learning_rate": 0.00024805995368715943, "loss": 1.7669, "step": 50080 }, { "epoch": 1.67, "grad_norm": 0.5418482422828674, "learning_rate": 0.0002480496594462107, "loss": 1.811, "step": 50081 }, { "epoch": 1.67, "grad_norm": 0.5687837600708008, "learning_rate": 0.0002480393652683221, "loss": 1.7973, "step": 50082 }, { "epoch": 1.67, "grad_norm": 0.5532401204109192, "learning_rate": 0.00024802907115350614, "loss": 1.8261, "step": 50083 }, { "epoch": 1.67, "grad_norm": 0.5455729365348816, "learning_rate": 0.00024801877710177504, "loss": 1.6658, "step": 50084 }, { "epoch": 1.67, "grad_norm": 0.5572330951690674, "learning_rate": 0.00024800848311314153, "loss": 1.7578, "step": 50085 }, { "epoch": 1.67, "grad_norm": 0.5579484701156616, "learning_rate": 0.0002479981891876182, "loss": 1.6591, "step": 50086 }, { "epoch": 1.67, "grad_norm": 0.5388556718826294, "learning_rate": 0.0002479878953252173, "loss": 1.772, "step": 50087 }, { "epoch": 1.67, "grad_norm": 0.5543184280395508, "learning_rate": 0.0002479776015259514, "loss": 1.7567, "step": 50088 }, { "epoch": 1.67, "grad_norm": 0.5582150816917419, "learning_rate": 0.000247967307789833, "loss": 1.7268, "step": 50089 }, { "epoch": 1.67, "grad_norm": 0.5379075407981873, "learning_rate": 0.0002479570141168748, "loss": 1.775, "step": 50090 }, { "epoch": 1.67, "grad_norm": 0.5294058322906494, "learning_rate": 0.00024794672050708897, "loss": 1.7423, "step": 50091 }, { "epoch": 1.67, "grad_norm": 0.5535125732421875, "learning_rate": 0.0002479364269604882, "loss": 1.7642, "step": 50092 }, { "epoch": 1.67, "grad_norm": 0.5383661389350891, "learning_rate": 0.00024792613347708495, "loss": 1.7762, "step": 50093 }, { "epoch": 1.67, "grad_norm": 0.5405633449554443, "learning_rate": 0.00024791584005689164, "loss": 1.7663, "step": 50094 }, { "epoch": 1.67, "grad_norm": 0.5351577997207642, "learning_rate": 0.0002479055466999209, "loss": 1.8396, "step": 50095 }, { "epoch": 1.67, "grad_norm": 0.5496026277542114, "learning_rate": 0.0002478952534061852, "loss": 1.7037, "step": 50096 }, { "epoch": 1.67, "grad_norm": 0.5383468270301819, "learning_rate": 0.00024788496017569685, "loss": 1.6763, "step": 50097 }, { "epoch": 1.67, "grad_norm": 0.5423423647880554, "learning_rate": 0.00024787466700846846, "loss": 1.7437, "step": 50098 }, { "epoch": 1.67, "grad_norm": 0.541178822517395, "learning_rate": 0.0002478643739045126, "loss": 1.769, "step": 50099 }, { "epoch": 1.67, "grad_norm": 0.5507998466491699, "learning_rate": 0.00024785408086384176, "loss": 1.753, "step": 50100 }, { "epoch": 1.67, "grad_norm": 0.5629006624221802, "learning_rate": 0.00024784378788646834, "loss": 1.6833, "step": 50101 }, { "epoch": 1.67, "grad_norm": 0.5329475998878479, "learning_rate": 0.0002478334949724049, "loss": 1.7556, "step": 50102 }, { "epoch": 1.67, "grad_norm": 0.5496012568473816, "learning_rate": 0.0002478232021216639, "loss": 1.8193, "step": 50103 }, { "epoch": 1.67, "grad_norm": 0.5661594867706299, "learning_rate": 0.00024781290933425783, "loss": 1.7971, "step": 50104 }, { "epoch": 1.67, "grad_norm": 0.5483298897743225, "learning_rate": 0.00024780261661019914, "loss": 1.6976, "step": 50105 }, { "epoch": 1.67, "grad_norm": 0.5566141605377197, "learning_rate": 0.00024779232394950057, "loss": 1.8143, "step": 50106 }, { "epoch": 1.67, "grad_norm": 0.5577918887138367, "learning_rate": 0.0002477820313521742, "loss": 1.7867, "step": 50107 }, { "epoch": 1.67, "grad_norm": 0.559657871723175, "learning_rate": 0.00024777173881823287, "loss": 1.7829, "step": 50108 }, { "epoch": 1.67, "grad_norm": 0.5561211705207825, "learning_rate": 0.00024776144634768894, "loss": 1.7775, "step": 50109 }, { "epoch": 1.67, "grad_norm": 0.5390756726264954, "learning_rate": 0.00024775115394055496, "loss": 1.7645, "step": 50110 }, { "epoch": 1.67, "grad_norm": 0.5482602119445801, "learning_rate": 0.00024774086159684333, "loss": 1.7869, "step": 50111 }, { "epoch": 1.67, "grad_norm": 0.5833775997161865, "learning_rate": 0.0002477305693165665, "loss": 1.6961, "step": 50112 }, { "epoch": 1.67, "grad_norm": 0.5751244425773621, "learning_rate": 0.0002477202770997373, "loss": 1.7811, "step": 50113 }, { "epoch": 1.67, "grad_norm": 0.5480043888092041, "learning_rate": 0.0002477099849463677, "loss": 1.6828, "step": 50114 }, { "epoch": 1.67, "grad_norm": 0.5280948281288147, "learning_rate": 0.00024769969285647064, "loss": 1.742, "step": 50115 }, { "epoch": 1.67, "grad_norm": 0.5797131657600403, "learning_rate": 0.0002476894008300585, "loss": 1.7055, "step": 50116 }, { "epoch": 1.67, "grad_norm": 0.5624654293060303, "learning_rate": 0.0002476791088671436, "loss": 1.8075, "step": 50117 }, { "epoch": 1.67, "grad_norm": 0.563950777053833, "learning_rate": 0.0002476688169677386, "loss": 1.7638, "step": 50118 }, { "epoch": 1.67, "grad_norm": 0.5625393390655518, "learning_rate": 0.00024765852513185585, "loss": 1.7048, "step": 50119 }, { "epoch": 1.67, "grad_norm": 0.5518787503242493, "learning_rate": 0.00024764823335950815, "loss": 1.7555, "step": 50120 }, { "epoch": 1.67, "grad_norm": 0.5485278367996216, "learning_rate": 0.0002476379416507076, "loss": 1.7538, "step": 50121 }, { "epoch": 1.67, "grad_norm": 0.5289353132247925, "learning_rate": 0.00024762765000546687, "loss": 1.7928, "step": 50122 }, { "epoch": 1.67, "grad_norm": 0.5182318091392517, "learning_rate": 0.00024761735842379856, "loss": 1.6627, "step": 50123 }, { "epoch": 1.67, "grad_norm": 0.5386806130409241, "learning_rate": 0.000247607066905715, "loss": 1.7113, "step": 50124 }, { "epoch": 1.67, "grad_norm": 0.5568515062332153, "learning_rate": 0.0002475967754512288, "loss": 1.7276, "step": 50125 }, { "epoch": 1.67, "grad_norm": 0.5198317766189575, "learning_rate": 0.00024758648406035224, "loss": 1.7626, "step": 50126 }, { "epoch": 1.67, "grad_norm": 0.5452837347984314, "learning_rate": 0.0002475761927330982, "loss": 1.8377, "step": 50127 }, { "epoch": 1.67, "grad_norm": 0.5482368469238281, "learning_rate": 0.0002475659014694787, "loss": 1.7431, "step": 50128 }, { "epoch": 1.67, "grad_norm": 0.5527687668800354, "learning_rate": 0.0002475556102695066, "loss": 1.7432, "step": 50129 }, { "epoch": 1.67, "grad_norm": 0.5679836869239807, "learning_rate": 0.0002475453191331943, "loss": 1.8016, "step": 50130 }, { "epoch": 1.67, "grad_norm": 0.5406829714775085, "learning_rate": 0.0002475350280605542, "loss": 1.7661, "step": 50131 }, { "epoch": 1.67, "grad_norm": 0.540143609046936, "learning_rate": 0.0002475247370515988, "loss": 1.8135, "step": 50132 }, { "epoch": 1.67, "grad_norm": 0.5672717690467834, "learning_rate": 0.0002475144461063408, "loss": 1.7629, "step": 50133 }, { "epoch": 1.67, "grad_norm": 0.5535745620727539, "learning_rate": 0.00024750415522479234, "loss": 1.6946, "step": 50134 }, { "epoch": 1.67, "grad_norm": 0.5454514026641846, "learning_rate": 0.00024749386440696614, "loss": 1.7972, "step": 50135 }, { "epoch": 1.67, "grad_norm": 0.5599886775016785, "learning_rate": 0.00024748357365287466, "loss": 1.7721, "step": 50136 }, { "epoch": 1.67, "grad_norm": 0.5519064664840698, "learning_rate": 0.0002474732829625304, "loss": 1.722, "step": 50137 }, { "epoch": 1.67, "grad_norm": 0.5692320466041565, "learning_rate": 0.0002474629923359459, "loss": 1.698, "step": 50138 }, { "epoch": 1.67, "grad_norm": 0.5518649816513062, "learning_rate": 0.0002474527017731335, "loss": 1.7247, "step": 50139 }, { "epoch": 1.67, "grad_norm": 0.5371819138526917, "learning_rate": 0.0002474424112741058, "loss": 1.7384, "step": 50140 }, { "epoch": 1.67, "grad_norm": 0.537626326084137, "learning_rate": 0.00024743212083887527, "loss": 1.6841, "step": 50141 }, { "epoch": 1.67, "grad_norm": 0.5486427545547485, "learning_rate": 0.00024742183046745435, "loss": 1.7642, "step": 50142 }, { "epoch": 1.67, "grad_norm": 0.5521467328071594, "learning_rate": 0.00024741154015985576, "loss": 1.6979, "step": 50143 }, { "epoch": 1.67, "grad_norm": 0.5394188761711121, "learning_rate": 0.0002474012499160916, "loss": 1.7561, "step": 50144 }, { "epoch": 1.67, "grad_norm": 0.639380156993866, "learning_rate": 0.00024739095973617465, "loss": 1.7439, "step": 50145 }, { "epoch": 1.67, "grad_norm": 0.5578838586807251, "learning_rate": 0.00024738066962011734, "loss": 1.7865, "step": 50146 }, { "epoch": 1.67, "grad_norm": 0.5483642816543579, "learning_rate": 0.00024737037956793216, "loss": 1.6837, "step": 50147 }, { "epoch": 1.67, "grad_norm": 0.5546362996101379, "learning_rate": 0.00024736008957963154, "loss": 1.7086, "step": 50148 }, { "epoch": 1.67, "grad_norm": 0.544079065322876, "learning_rate": 0.000247349799655228, "loss": 1.7447, "step": 50149 }, { "epoch": 1.67, "grad_norm": 0.5519934892654419, "learning_rate": 0.0002473395097947342, "loss": 1.7582, "step": 50150 }, { "epoch": 1.67, "grad_norm": 0.552544116973877, "learning_rate": 0.0002473292199981623, "loss": 1.6689, "step": 50151 }, { "epoch": 1.67, "grad_norm": 0.5393243432044983, "learning_rate": 0.000247318930265525, "loss": 1.7244, "step": 50152 }, { "epoch": 1.67, "grad_norm": 0.5365069508552551, "learning_rate": 0.0002473086405968348, "loss": 1.6907, "step": 50153 }, { "epoch": 1.67, "grad_norm": 0.5499047040939331, "learning_rate": 0.0002472983509921041, "loss": 1.6472, "step": 50154 }, { "epoch": 1.67, "grad_norm": 0.5574737191200256, "learning_rate": 0.00024728806145134544, "loss": 1.6916, "step": 50155 }, { "epoch": 1.67, "grad_norm": 0.5501691102981567, "learning_rate": 0.0002472777719745713, "loss": 1.7685, "step": 50156 }, { "epoch": 1.67, "grad_norm": 0.5384939908981323, "learning_rate": 0.0002472674825617943, "loss": 1.6466, "step": 50157 }, { "epoch": 1.67, "grad_norm": 0.5395609140396118, "learning_rate": 0.00024725719321302665, "loss": 1.7641, "step": 50158 }, { "epoch": 1.67, "grad_norm": 0.549217939376831, "learning_rate": 0.00024724690392828103, "loss": 1.7269, "step": 50159 }, { "epoch": 1.67, "grad_norm": 0.5592280030250549, "learning_rate": 0.00024723661470757, "loss": 1.743, "step": 50160 }, { "epoch": 1.67, "grad_norm": 0.5547047853469849, "learning_rate": 0.00024722632555090586, "loss": 1.7855, "step": 50161 }, { "epoch": 1.67, "grad_norm": 0.5557922720909119, "learning_rate": 0.0002472160364583012, "loss": 1.7667, "step": 50162 }, { "epoch": 1.67, "grad_norm": 0.555923581123352, "learning_rate": 0.0002472057474297685, "loss": 1.7627, "step": 50163 }, { "epoch": 1.67, "grad_norm": 0.5409207344055176, "learning_rate": 0.00024719545846532025, "loss": 1.7163, "step": 50164 }, { "epoch": 1.67, "grad_norm": 0.5316011905670166, "learning_rate": 0.0002471851695649688, "loss": 1.7161, "step": 50165 }, { "epoch": 1.67, "grad_norm": 0.5456441640853882, "learning_rate": 0.0002471748807287269, "loss": 1.8459, "step": 50166 }, { "epoch": 1.67, "grad_norm": 0.5658395290374756, "learning_rate": 0.00024716459195660696, "loss": 1.728, "step": 50167 }, { "epoch": 1.67, "grad_norm": 0.5635018944740295, "learning_rate": 0.00024715430324862134, "loss": 1.739, "step": 50168 }, { "epoch": 1.67, "grad_norm": 0.5408168435096741, "learning_rate": 0.00024714401460478264, "loss": 1.8008, "step": 50169 }, { "epoch": 1.67, "grad_norm": 0.5396624207496643, "learning_rate": 0.00024713372602510334, "loss": 1.8428, "step": 50170 }, { "epoch": 1.67, "grad_norm": 0.5672491192817688, "learning_rate": 0.00024712343750959585, "loss": 1.7327, "step": 50171 }, { "epoch": 1.67, "grad_norm": 0.5627879500389099, "learning_rate": 0.0002471131490582727, "loss": 1.7958, "step": 50172 }, { "epoch": 1.67, "grad_norm": 0.5359467267990112, "learning_rate": 0.00024710286067114646, "loss": 1.7552, "step": 50173 }, { "epoch": 1.67, "grad_norm": 0.5350674390792847, "learning_rate": 0.00024709257234822955, "loss": 1.7859, "step": 50174 }, { "epoch": 1.67, "grad_norm": 0.5372185111045837, "learning_rate": 0.00024708228408953437, "loss": 1.6867, "step": 50175 }, { "epoch": 1.67, "grad_norm": 0.5327826142311096, "learning_rate": 0.00024707199589507357, "loss": 1.7733, "step": 50176 }, { "epoch": 1.67, "grad_norm": 0.5442163348197937, "learning_rate": 0.00024706170776485966, "loss": 1.7791, "step": 50177 }, { "epoch": 1.67, "grad_norm": 0.5421615242958069, "learning_rate": 0.0002470514196989049, "loss": 1.7314, "step": 50178 }, { "epoch": 1.67, "grad_norm": 0.5561655759811401, "learning_rate": 0.0002470411316972219, "loss": 1.7446, "step": 50179 }, { "epoch": 1.67, "grad_norm": 0.5617974996566772, "learning_rate": 0.0002470308437598233, "loss": 1.7859, "step": 50180 }, { "epoch": 1.67, "grad_norm": 0.54649418592453, "learning_rate": 0.00024702055588672134, "loss": 1.8001, "step": 50181 }, { "epoch": 1.67, "grad_norm": 0.5671345591545105, "learning_rate": 0.00024701026807792865, "loss": 1.7865, "step": 50182 }, { "epoch": 1.67, "grad_norm": 0.5357069969177246, "learning_rate": 0.0002469999803334578, "loss": 1.7733, "step": 50183 }, { "epoch": 1.67, "grad_norm": 0.5703058242797852, "learning_rate": 0.00024698969265332096, "loss": 1.8188, "step": 50184 }, { "epoch": 1.67, "grad_norm": 0.5370723605155945, "learning_rate": 0.00024697940503753084, "loss": 1.7758, "step": 50185 }, { "epoch": 1.67, "grad_norm": 0.5423992872238159, "learning_rate": 0.0002469691174861, "loss": 1.7412, "step": 50186 }, { "epoch": 1.67, "grad_norm": 0.5390321612358093, "learning_rate": 0.0002469588299990409, "loss": 1.7528, "step": 50187 }, { "epoch": 1.67, "grad_norm": 0.5677047371864319, "learning_rate": 0.00024694854257636587, "loss": 1.7235, "step": 50188 }, { "epoch": 1.67, "grad_norm": 0.5271956920623779, "learning_rate": 0.0002469382552180875, "loss": 1.7848, "step": 50189 }, { "epoch": 1.67, "grad_norm": 0.5334526896476746, "learning_rate": 0.0002469279679242183, "loss": 1.7345, "step": 50190 }, { "epoch": 1.67, "grad_norm": 0.561426043510437, "learning_rate": 0.0002469176806947707, "loss": 1.731, "step": 50191 }, { "epoch": 1.67, "grad_norm": 0.5381661653518677, "learning_rate": 0.00024690739352975716, "loss": 1.8216, "step": 50192 }, { "epoch": 1.67, "grad_norm": 0.5450084209442139, "learning_rate": 0.00024689710642919043, "loss": 1.6786, "step": 50193 }, { "epoch": 1.67, "grad_norm": 0.5487474799156189, "learning_rate": 0.00024688681939308255, "loss": 1.7315, "step": 50194 }, { "epoch": 1.67, "grad_norm": 0.5402142405509949, "learning_rate": 0.0002468765324214463, "loss": 1.7095, "step": 50195 }, { "epoch": 1.67, "grad_norm": 0.5305220484733582, "learning_rate": 0.00024686624551429416, "loss": 1.7385, "step": 50196 }, { "epoch": 1.67, "grad_norm": 0.556218147277832, "learning_rate": 0.00024685595867163864, "loss": 1.7424, "step": 50197 }, { "epoch": 1.67, "grad_norm": 0.555461585521698, "learning_rate": 0.00024684567189349206, "loss": 1.7328, "step": 50198 }, { "epoch": 1.67, "grad_norm": 0.5384129881858826, "learning_rate": 0.00024683538517986694, "loss": 1.7511, "step": 50199 }, { "epoch": 1.67, "grad_norm": 0.5536054372787476, "learning_rate": 0.00024682509853077604, "loss": 1.757, "step": 50200 }, { "epoch": 1.67, "grad_norm": 0.5762767195701599, "learning_rate": 0.00024681481194623137, "loss": 1.7383, "step": 50201 }, { "epoch": 1.67, "grad_norm": 0.5262734293937683, "learning_rate": 0.00024680452542624585, "loss": 1.7069, "step": 50202 }, { "epoch": 1.67, "grad_norm": 0.5437085032463074, "learning_rate": 0.0002467942389708318, "loss": 1.7718, "step": 50203 }, { "epoch": 1.67, "grad_norm": 0.5359985828399658, "learning_rate": 0.0002467839525800016, "loss": 1.7513, "step": 50204 }, { "epoch": 1.67, "grad_norm": 0.5465548634529114, "learning_rate": 0.00024677366625376797, "loss": 1.7365, "step": 50205 }, { "epoch": 1.67, "grad_norm": 0.587554395198822, "learning_rate": 0.0002467633799921431, "loss": 1.8715, "step": 50206 }, { "epoch": 1.67, "grad_norm": 0.5442107915878296, "learning_rate": 0.00024675309379513984, "loss": 1.7136, "step": 50207 }, { "epoch": 1.67, "grad_norm": 0.5525946021080017, "learning_rate": 0.0002467428076627703, "loss": 1.758, "step": 50208 }, { "epoch": 1.67, "grad_norm": 0.5383391380310059, "learning_rate": 0.00024673252159504726, "loss": 1.7796, "step": 50209 }, { "epoch": 1.67, "grad_norm": 0.5772551894187927, "learning_rate": 0.0002467222355919831, "loss": 1.7347, "step": 50210 }, { "epoch": 1.67, "grad_norm": 0.5254228115081787, "learning_rate": 0.00024671194965359024, "loss": 1.7008, "step": 50211 }, { "epoch": 1.67, "grad_norm": 0.5422860383987427, "learning_rate": 0.00024670166377988126, "loss": 1.7273, "step": 50212 }, { "epoch": 1.67, "grad_norm": 0.5515137314796448, "learning_rate": 0.00024669137797086863, "loss": 1.7809, "step": 50213 }, { "epoch": 1.67, "grad_norm": 0.5531185269355774, "learning_rate": 0.00024668109222656477, "loss": 1.6768, "step": 50214 }, { "epoch": 1.67, "grad_norm": 0.5581793785095215, "learning_rate": 0.00024667080654698214, "loss": 1.7273, "step": 50215 }, { "epoch": 1.67, "grad_norm": 0.5589504837989807, "learning_rate": 0.0002466605209321333, "loss": 1.7673, "step": 50216 }, { "epoch": 1.67, "grad_norm": 0.5443083643913269, "learning_rate": 0.0002466502353820309, "loss": 1.7279, "step": 50217 }, { "epoch": 1.67, "grad_norm": 0.5454259514808655, "learning_rate": 0.0002466399498966871, "loss": 1.6924, "step": 50218 }, { "epoch": 1.67, "grad_norm": 0.5479760766029358, "learning_rate": 0.00024662966447611454, "loss": 1.7683, "step": 50219 }, { "epoch": 1.67, "grad_norm": 0.7342579364776611, "learning_rate": 0.0002466193791203258, "loss": 1.8138, "step": 50220 }, { "epoch": 1.67, "grad_norm": 0.5486183762550354, "learning_rate": 0.0002466090938293332, "loss": 1.8186, "step": 50221 }, { "epoch": 1.67, "grad_norm": 0.5482469797134399, "learning_rate": 0.00024659880860314923, "loss": 1.7205, "step": 50222 }, { "epoch": 1.67, "grad_norm": 0.5512736439704895, "learning_rate": 0.0002465885234417866, "loss": 1.817, "step": 50223 }, { "epoch": 1.67, "grad_norm": 0.5310134291648865, "learning_rate": 0.0002465782383452575, "loss": 1.7208, "step": 50224 }, { "epoch": 1.67, "grad_norm": 0.5574631094932556, "learning_rate": 0.0002465679533135746, "loss": 1.7786, "step": 50225 }, { "epoch": 1.67, "grad_norm": 0.5358862280845642, "learning_rate": 0.0002465576683467503, "loss": 1.8032, "step": 50226 }, { "epoch": 1.67, "grad_norm": 0.5568100810050964, "learning_rate": 0.0002465473834447972, "loss": 1.6941, "step": 50227 }, { "epoch": 1.67, "grad_norm": 0.535972535610199, "learning_rate": 0.00024653709860772763, "loss": 1.7708, "step": 50228 }, { "epoch": 1.67, "grad_norm": 0.5410226583480835, "learning_rate": 0.0002465268138355541, "loss": 1.776, "step": 50229 }, { "epoch": 1.67, "grad_norm": 0.5278066992759705, "learning_rate": 0.0002465165291282893, "loss": 1.7202, "step": 50230 }, { "epoch": 1.67, "grad_norm": 0.5482697486877441, "learning_rate": 0.00024650624448594534, "loss": 1.7348, "step": 50231 }, { "epoch": 1.67, "grad_norm": 0.5546585917472839, "learning_rate": 0.00024649595990853504, "loss": 1.7672, "step": 50232 }, { "epoch": 1.67, "grad_norm": 0.5421786308288574, "learning_rate": 0.0002464856753960708, "loss": 1.7205, "step": 50233 }, { "epoch": 1.67, "grad_norm": 0.5493571162223816, "learning_rate": 0.000246475390948565, "loss": 1.7415, "step": 50234 }, { "epoch": 1.67, "grad_norm": 0.5437704920768738, "learning_rate": 0.00024646510656603016, "loss": 1.695, "step": 50235 }, { "epoch": 1.67, "grad_norm": 0.5384203791618347, "learning_rate": 0.0002464548222484788, "loss": 1.7376, "step": 50236 }, { "epoch": 1.67, "grad_norm": 0.5509852766990662, "learning_rate": 0.00024644453799592354, "loss": 1.7732, "step": 50237 }, { "epoch": 1.67, "grad_norm": 0.5486976504325867, "learning_rate": 0.0002464342538083765, "loss": 1.7183, "step": 50238 }, { "epoch": 1.67, "grad_norm": 0.5395150184631348, "learning_rate": 0.0002464239696858505, "loss": 1.7597, "step": 50239 }, { "epoch": 1.67, "grad_norm": 0.561025857925415, "learning_rate": 0.000246413685628358, "loss": 1.7291, "step": 50240 }, { "epoch": 1.67, "grad_norm": 0.5451536178588867, "learning_rate": 0.00024640340163591123, "loss": 1.7865, "step": 50241 }, { "epoch": 1.67, "grad_norm": 0.54472815990448, "learning_rate": 0.00024639311770852286, "loss": 1.7944, "step": 50242 }, { "epoch": 1.67, "grad_norm": 0.5443395972251892, "learning_rate": 0.0002463828338462055, "loss": 1.7938, "step": 50243 }, { "epoch": 1.67, "grad_norm": 0.5494564771652222, "learning_rate": 0.0002463725500489713, "loss": 1.636, "step": 50244 }, { "epoch": 1.67, "grad_norm": 0.5531954765319824, "learning_rate": 0.00024636226631683287, "loss": 1.7261, "step": 50245 }, { "epoch": 1.67, "grad_norm": 0.5655908584594727, "learning_rate": 0.0002463519826498029, "loss": 1.8013, "step": 50246 }, { "epoch": 1.67, "grad_norm": 0.5532221794128418, "learning_rate": 0.0002463416990478937, "loss": 1.7176, "step": 50247 }, { "epoch": 1.67, "grad_norm": 0.5485464334487915, "learning_rate": 0.0002463314155111177, "loss": 1.7694, "step": 50248 }, { "epoch": 1.67, "grad_norm": 0.548582911491394, "learning_rate": 0.00024632113203948755, "loss": 1.7939, "step": 50249 }, { "epoch": 1.67, "grad_norm": 0.5439748167991638, "learning_rate": 0.0002463108486330156, "loss": 1.7325, "step": 50250 }, { "epoch": 1.67, "grad_norm": 0.5548073649406433, "learning_rate": 0.0002463005652917144, "loss": 1.7773, "step": 50251 }, { "epoch": 1.67, "grad_norm": 0.5747121572494507, "learning_rate": 0.00024629028201559623, "loss": 1.7424, "step": 50252 }, { "epoch": 1.67, "grad_norm": 0.655005156993866, "learning_rate": 0.00024627999880467386, "loss": 1.8554, "step": 50253 }, { "epoch": 1.67, "grad_norm": 0.5393614768981934, "learning_rate": 0.00024626971565895974, "loss": 1.7371, "step": 50254 }, { "epoch": 1.67, "grad_norm": 0.5334939956665039, "learning_rate": 0.0002462594325784662, "loss": 1.7143, "step": 50255 }, { "epoch": 1.67, "grad_norm": 0.5444390773773193, "learning_rate": 0.00024624914956320577, "loss": 1.7175, "step": 50256 }, { "epoch": 1.67, "grad_norm": 0.539796769618988, "learning_rate": 0.000246238866613191, "loss": 1.7888, "step": 50257 }, { "epoch": 1.67, "grad_norm": 0.5681859254837036, "learning_rate": 0.00024622858372843427, "loss": 1.736, "step": 50258 }, { "epoch": 1.67, "grad_norm": 0.5427384972572327, "learning_rate": 0.0002462183009089481, "loss": 1.6516, "step": 50259 }, { "epoch": 1.67, "grad_norm": 0.5439332127571106, "learning_rate": 0.00024620801815474516, "loss": 1.6795, "step": 50260 }, { "epoch": 1.67, "grad_norm": 0.5322784185409546, "learning_rate": 0.0002461977354658375, "loss": 1.765, "step": 50261 }, { "epoch": 1.67, "grad_norm": 0.5328320860862732, "learning_rate": 0.00024618745284223797, "loss": 1.6796, "step": 50262 }, { "epoch": 1.67, "grad_norm": 0.5422688722610474, "learning_rate": 0.000246177170283959, "loss": 1.7031, "step": 50263 }, { "epoch": 1.67, "grad_norm": 0.5505160689353943, "learning_rate": 0.00024616688779101304, "loss": 1.8212, "step": 50264 }, { "epoch": 1.67, "grad_norm": 0.5333021879196167, "learning_rate": 0.0002461566053634124, "loss": 1.7112, "step": 50265 }, { "epoch": 1.67, "grad_norm": 0.5462035536766052, "learning_rate": 0.00024614632300116974, "loss": 1.7906, "step": 50266 }, { "epoch": 1.67, "grad_norm": 0.5320131182670593, "learning_rate": 0.0002461360407042977, "loss": 1.7317, "step": 50267 }, { "epoch": 1.67, "grad_norm": 0.5365613698959351, "learning_rate": 0.00024612575847280836, "loss": 1.7321, "step": 50268 }, { "epoch": 1.67, "grad_norm": 0.5340938568115234, "learning_rate": 0.0002461154763067145, "loss": 1.7234, "step": 50269 }, { "epoch": 1.67, "grad_norm": 0.5411893129348755, "learning_rate": 0.0002461051942060285, "loss": 1.7204, "step": 50270 }, { "epoch": 1.67, "grad_norm": 0.5325371623039246, "learning_rate": 0.0002460949121707629, "loss": 1.8141, "step": 50271 }, { "epoch": 1.67, "grad_norm": 0.5287627577781677, "learning_rate": 0.00024608463020093005, "loss": 1.6984, "step": 50272 }, { "epoch": 1.67, "grad_norm": 0.540732204914093, "learning_rate": 0.0002460743482965425, "loss": 1.7758, "step": 50273 }, { "epoch": 1.67, "grad_norm": 0.5326504707336426, "learning_rate": 0.00024606406645761284, "loss": 1.7211, "step": 50274 }, { "epoch": 1.67, "grad_norm": 0.5393241047859192, "learning_rate": 0.00024605378468415336, "loss": 1.7373, "step": 50275 }, { "epoch": 1.67, "grad_norm": 0.5482591390609741, "learning_rate": 0.0002460435029761767, "loss": 1.7353, "step": 50276 }, { "epoch": 1.67, "grad_norm": 0.5207867622375488, "learning_rate": 0.0002460332213336953, "loss": 1.7474, "step": 50277 }, { "epoch": 1.67, "grad_norm": 0.5414230227470398, "learning_rate": 0.0002460229397567216, "loss": 1.814, "step": 50278 }, { "epoch": 1.67, "grad_norm": 0.5420275330543518, "learning_rate": 0.000246012658245268, "loss": 1.7235, "step": 50279 }, { "epoch": 1.67, "grad_norm": 0.572068452835083, "learning_rate": 0.00024600237679934725, "loss": 1.7866, "step": 50280 }, { "epoch": 1.67, "grad_norm": 0.5461363196372986, "learning_rate": 0.00024599209541897155, "loss": 1.721, "step": 50281 }, { "epoch": 1.67, "grad_norm": 0.546075701713562, "learning_rate": 0.00024598181410415344, "loss": 1.7058, "step": 50282 }, { "epoch": 1.67, "grad_norm": 0.5669633746147156, "learning_rate": 0.0002459715328549055, "loss": 1.7793, "step": 50283 }, { "epoch": 1.67, "grad_norm": 0.5593348145484924, "learning_rate": 0.0002459612516712402, "loss": 1.6472, "step": 50284 }, { "epoch": 1.67, "grad_norm": 0.5290570259094238, "learning_rate": 0.00024595097055316994, "loss": 1.7494, "step": 50285 }, { "epoch": 1.67, "grad_norm": 0.548803448677063, "learning_rate": 0.00024594068950070723, "loss": 1.7102, "step": 50286 }, { "epoch": 1.67, "grad_norm": 0.5396183133125305, "learning_rate": 0.00024593040851386466, "loss": 1.7661, "step": 50287 }, { "epoch": 1.67, "grad_norm": 0.5490895509719849, "learning_rate": 0.0002459201275926545, "loss": 1.7855, "step": 50288 }, { "epoch": 1.67, "grad_norm": 0.5455942749977112, "learning_rate": 0.0002459098467370893, "loss": 1.7361, "step": 50289 }, { "epoch": 1.67, "grad_norm": 0.5466750860214233, "learning_rate": 0.00024589956594718164, "loss": 1.7461, "step": 50290 }, { "epoch": 1.67, "grad_norm": 0.5454826951026917, "learning_rate": 0.00024588928522294395, "loss": 1.7285, "step": 50291 }, { "epoch": 1.67, "grad_norm": 0.5439699292182922, "learning_rate": 0.0002458790045643887, "loss": 1.7938, "step": 50292 }, { "epoch": 1.67, "grad_norm": 0.5447984337806702, "learning_rate": 0.0002458687239715283, "loss": 1.7073, "step": 50293 }, { "epoch": 1.67, "grad_norm": 0.5503438711166382, "learning_rate": 0.00024585844344437535, "loss": 1.6973, "step": 50294 }, { "epoch": 1.67, "grad_norm": 0.5557422637939453, "learning_rate": 0.00024584816298294225, "loss": 1.7409, "step": 50295 }, { "epoch": 1.67, "grad_norm": 0.5252096056938171, "learning_rate": 0.00024583788258724146, "loss": 1.6982, "step": 50296 }, { "epoch": 1.67, "grad_norm": 0.5806758403778076, "learning_rate": 0.00024582760225728555, "loss": 1.8543, "step": 50297 }, { "epoch": 1.67, "grad_norm": 0.5409500598907471, "learning_rate": 0.00024581732199308693, "loss": 1.7578, "step": 50298 }, { "epoch": 1.67, "grad_norm": 0.5592468976974487, "learning_rate": 0.00024580704179465813, "loss": 1.7131, "step": 50299 }, { "epoch": 1.67, "grad_norm": 0.5726981163024902, "learning_rate": 0.0002457967616620117, "loss": 1.6613, "step": 50300 }, { "epoch": 1.67, "grad_norm": 0.5547738671302795, "learning_rate": 0.0002457864815951598, "loss": 1.748, "step": 50301 }, { "epoch": 1.67, "grad_norm": 0.534504234790802, "learning_rate": 0.00024577620159411517, "loss": 1.697, "step": 50302 }, { "epoch": 1.67, "grad_norm": 0.5462870001792908, "learning_rate": 0.00024576592165889027, "loss": 1.7807, "step": 50303 }, { "epoch": 1.67, "grad_norm": 0.5470563769340515, "learning_rate": 0.0002457556417894976, "loss": 1.714, "step": 50304 }, { "epoch": 1.67, "grad_norm": 0.559971809387207, "learning_rate": 0.0002457453619859496, "loss": 1.7747, "step": 50305 }, { "epoch": 1.67, "grad_norm": 0.5492826700210571, "learning_rate": 0.0002457350822482587, "loss": 1.7731, "step": 50306 }, { "epoch": 1.67, "grad_norm": 0.5142186284065247, "learning_rate": 0.00024572480257643745, "loss": 1.754, "step": 50307 }, { "epoch": 1.67, "grad_norm": 0.5665695071220398, "learning_rate": 0.0002457145229704982, "loss": 1.7942, "step": 50308 }, { "epoch": 1.67, "grad_norm": 0.5605979561805725, "learning_rate": 0.0002457042434304535, "loss": 1.7578, "step": 50309 }, { "epoch": 1.67, "grad_norm": 0.5437124371528625, "learning_rate": 0.00024569396395631604, "loss": 1.7124, "step": 50310 }, { "epoch": 1.67, "grad_norm": 0.5379012227058411, "learning_rate": 0.0002456836845480979, "loss": 1.7465, "step": 50311 }, { "epoch": 1.67, "grad_norm": 0.5300654768943787, "learning_rate": 0.00024567340520581185, "loss": 1.7167, "step": 50312 }, { "epoch": 1.67, "grad_norm": 0.5563201904296875, "learning_rate": 0.00024566312592947027, "loss": 1.7563, "step": 50313 }, { "epoch": 1.67, "grad_norm": 0.5464742183685303, "learning_rate": 0.0002456528467190857, "loss": 1.7806, "step": 50314 }, { "epoch": 1.67, "grad_norm": 0.565548837184906, "learning_rate": 0.0002456425675746705, "loss": 1.7577, "step": 50315 }, { "epoch": 1.67, "grad_norm": 0.5460290312767029, "learning_rate": 0.0002456322884962372, "loss": 1.7109, "step": 50316 }, { "epoch": 1.67, "grad_norm": 0.5547974109649658, "learning_rate": 0.0002456220094837985, "loss": 1.7109, "step": 50317 }, { "epoch": 1.67, "grad_norm": 0.5358645915985107, "learning_rate": 0.0002456117305373664, "loss": 1.826, "step": 50318 }, { "epoch": 1.67, "grad_norm": 0.5453871488571167, "learning_rate": 0.00024560145165695367, "loss": 1.7664, "step": 50319 }, { "epoch": 1.67, "grad_norm": 0.5360562205314636, "learning_rate": 0.0002455911728425729, "loss": 1.6793, "step": 50320 }, { "epoch": 1.67, "grad_norm": 0.5508092045783997, "learning_rate": 0.0002455808940942364, "loss": 1.7838, "step": 50321 }, { "epoch": 1.67, "grad_norm": 0.5378758311271667, "learning_rate": 0.0002455706154119566, "loss": 1.7578, "step": 50322 }, { "epoch": 1.67, "grad_norm": 0.5572076439857483, "learning_rate": 0.00024556033679574605, "loss": 1.7104, "step": 50323 }, { "epoch": 1.67, "grad_norm": 0.5649408102035522, "learning_rate": 0.0002455500582456174, "loss": 1.7432, "step": 50324 }, { "epoch": 1.67, "grad_norm": 0.5609133839607239, "learning_rate": 0.00024553977976158274, "loss": 1.7178, "step": 50325 }, { "epoch": 1.67, "grad_norm": 0.5452287793159485, "learning_rate": 0.0002455295013436548, "loss": 1.7797, "step": 50326 }, { "epoch": 1.67, "grad_norm": 0.5624668002128601, "learning_rate": 0.00024551922299184616, "loss": 1.7534, "step": 50327 }, { "epoch": 1.67, "grad_norm": 0.5719530582427979, "learning_rate": 0.00024550894470616905, "loss": 1.8074, "step": 50328 }, { "epoch": 1.67, "grad_norm": 0.5598083138465881, "learning_rate": 0.0002454986664866361, "loss": 1.7833, "step": 50329 }, { "epoch": 1.67, "grad_norm": 0.5571144223213196, "learning_rate": 0.00024548838833325973, "loss": 1.7287, "step": 50330 }, { "epoch": 1.67, "grad_norm": 0.5511841177940369, "learning_rate": 0.0002454781102460524, "loss": 1.7846, "step": 50331 }, { "epoch": 1.67, "grad_norm": 0.5498836040496826, "learning_rate": 0.00024546783222502655, "loss": 1.6669, "step": 50332 }, { "epoch": 1.67, "grad_norm": 0.5537921786308289, "learning_rate": 0.00024545755427019477, "loss": 1.8196, "step": 50333 }, { "epoch": 1.67, "grad_norm": 0.5262158513069153, "learning_rate": 0.0002454472763815696, "loss": 1.7322, "step": 50334 }, { "epoch": 1.67, "grad_norm": 0.5441486239433289, "learning_rate": 0.0002454369985591633, "loss": 1.8588, "step": 50335 }, { "epoch": 1.67, "grad_norm": 0.5495877265930176, "learning_rate": 0.00024542672080298835, "loss": 1.7113, "step": 50336 }, { "epoch": 1.67, "grad_norm": 0.5547070503234863, "learning_rate": 0.00024541644311305746, "loss": 1.7709, "step": 50337 }, { "epoch": 1.67, "grad_norm": 0.5430611968040466, "learning_rate": 0.00024540616548938293, "loss": 1.7274, "step": 50338 }, { "epoch": 1.67, "grad_norm": 0.5414974093437195, "learning_rate": 0.00024539588793197716, "loss": 1.6898, "step": 50339 }, { "epoch": 1.67, "grad_norm": 0.5475478768348694, "learning_rate": 0.00024538561044085296, "loss": 1.7241, "step": 50340 }, { "epoch": 1.67, "grad_norm": 0.5733809471130371, "learning_rate": 0.0002453753330160224, "loss": 1.7338, "step": 50341 }, { "epoch": 1.67, "grad_norm": 0.5679919123649597, "learning_rate": 0.0002453650556574982, "loss": 1.8188, "step": 50342 }, { "epoch": 1.67, "grad_norm": 0.560641884803772, "learning_rate": 0.0002453547783652927, "loss": 1.8003, "step": 50343 }, { "epoch": 1.67, "grad_norm": 0.5423733592033386, "learning_rate": 0.0002453445011394186, "loss": 1.7448, "step": 50344 }, { "epoch": 1.67, "grad_norm": 0.5381230115890503, "learning_rate": 0.0002453342239798881, "loss": 1.7658, "step": 50345 }, { "epoch": 1.68, "grad_norm": 0.5460782051086426, "learning_rate": 0.00024532394688671377, "loss": 1.7905, "step": 50346 }, { "epoch": 1.68, "grad_norm": 0.5646564960479736, "learning_rate": 0.0002453136698599083, "loss": 1.8089, "step": 50347 }, { "epoch": 1.68, "grad_norm": 0.5424643754959106, "learning_rate": 0.0002453033928994838, "loss": 1.7356, "step": 50348 }, { "epoch": 1.68, "grad_norm": 0.558613657951355, "learning_rate": 0.00024529311600545296, "loss": 1.7152, "step": 50349 }, { "epoch": 1.68, "grad_norm": 0.5695609450340271, "learning_rate": 0.00024528283917782823, "loss": 1.7612, "step": 50350 }, { "epoch": 1.68, "grad_norm": 0.5483851432800293, "learning_rate": 0.0002452725624166221, "loss": 1.6903, "step": 50351 }, { "epoch": 1.68, "grad_norm": 0.572938859462738, "learning_rate": 0.00024526228572184695, "loss": 1.8333, "step": 50352 }, { "epoch": 1.68, "grad_norm": 0.560175359249115, "learning_rate": 0.0002452520090935153, "loss": 1.8385, "step": 50353 }, { "epoch": 1.68, "grad_norm": 0.5472186207771301, "learning_rate": 0.0002452417325316398, "loss": 1.712, "step": 50354 }, { "epoch": 1.68, "grad_norm": 0.5443617701530457, "learning_rate": 0.0002452314560362326, "loss": 1.7429, "step": 50355 }, { "epoch": 1.68, "grad_norm": 0.5284028649330139, "learning_rate": 0.0002452211796073064, "loss": 1.7273, "step": 50356 }, { "epoch": 1.68, "grad_norm": 0.5439044833183289, "learning_rate": 0.0002452109032448737, "loss": 1.7662, "step": 50357 }, { "epoch": 1.68, "grad_norm": 0.5305446982383728, "learning_rate": 0.0002452006269489468, "loss": 1.8349, "step": 50358 }, { "epoch": 1.68, "grad_norm": 0.5821540951728821, "learning_rate": 0.00024519035071953827, "loss": 1.7257, "step": 50359 }, { "epoch": 1.68, "grad_norm": 0.5472210645675659, "learning_rate": 0.00024518007455666066, "loss": 1.7256, "step": 50360 }, { "epoch": 1.68, "grad_norm": 0.5474991798400879, "learning_rate": 0.0002451697984603263, "loss": 1.7047, "step": 50361 }, { "epoch": 1.68, "grad_norm": 0.5449148416519165, "learning_rate": 0.0002451595224305476, "loss": 1.7752, "step": 50362 }, { "epoch": 1.68, "grad_norm": 0.5598827004432678, "learning_rate": 0.00024514924646733727, "loss": 1.814, "step": 50363 }, { "epoch": 1.68, "grad_norm": 1.386289358139038, "learning_rate": 0.00024513897057070773, "loss": 1.7974, "step": 50364 }, { "epoch": 1.68, "grad_norm": 0.5599095225334167, "learning_rate": 0.00024512869474067135, "loss": 1.7349, "step": 50365 }, { "epoch": 1.68, "grad_norm": 0.5466623306274414, "learning_rate": 0.0002451184189772406, "loss": 1.8145, "step": 50366 }, { "epoch": 1.68, "grad_norm": 0.5163062214851379, "learning_rate": 0.0002451081432804281, "loss": 1.6706, "step": 50367 }, { "epoch": 1.68, "grad_norm": 0.5415812730789185, "learning_rate": 0.0002450978676502462, "loss": 1.7742, "step": 50368 }, { "epoch": 1.68, "grad_norm": 0.5436038374900818, "learning_rate": 0.00024508759208670725, "loss": 1.8799, "step": 50369 }, { "epoch": 1.68, "grad_norm": 0.5245205760002136, "learning_rate": 0.00024507731658982403, "loss": 1.6778, "step": 50370 }, { "epoch": 1.68, "grad_norm": 0.5434521436691284, "learning_rate": 0.0002450670411596089, "loss": 1.7502, "step": 50371 }, { "epoch": 1.68, "grad_norm": 0.5404655933380127, "learning_rate": 0.0002450567657960742, "loss": 1.7432, "step": 50372 }, { "epoch": 1.68, "grad_norm": 0.5533702969551086, "learning_rate": 0.0002450464904992325, "loss": 1.7589, "step": 50373 }, { "epoch": 1.68, "grad_norm": 0.5625799894332886, "learning_rate": 0.0002450362152690963, "loss": 1.8211, "step": 50374 }, { "epoch": 1.68, "grad_norm": 0.5867583751678467, "learning_rate": 0.000245025940105678, "loss": 1.7853, "step": 50375 }, { "epoch": 1.68, "grad_norm": 0.5722480416297913, "learning_rate": 0.00024501566500899, "loss": 1.7612, "step": 50376 }, { "epoch": 1.68, "grad_norm": 0.5588228106498718, "learning_rate": 0.0002450053899790451, "loss": 1.7794, "step": 50377 }, { "epoch": 1.68, "grad_norm": 0.550910234451294, "learning_rate": 0.00024499511501585537, "loss": 1.7158, "step": 50378 }, { "epoch": 1.68, "grad_norm": 0.5311625599861145, "learning_rate": 0.00024498484011943356, "loss": 1.7596, "step": 50379 }, { "epoch": 1.68, "grad_norm": 0.5511795282363892, "learning_rate": 0.000244974565289792, "loss": 1.7685, "step": 50380 }, { "epoch": 1.68, "grad_norm": 0.5484753847122192, "learning_rate": 0.0002449642905269433, "loss": 1.7815, "step": 50381 }, { "epoch": 1.68, "grad_norm": 0.5428789854049683, "learning_rate": 0.00024495401583089977, "loss": 1.7086, "step": 50382 }, { "epoch": 1.68, "grad_norm": 0.5595927238464355, "learning_rate": 0.0002449437412016739, "loss": 1.7484, "step": 50383 }, { "epoch": 1.68, "grad_norm": 0.5667697191238403, "learning_rate": 0.0002449334666392784, "loss": 1.7344, "step": 50384 }, { "epoch": 1.68, "grad_norm": 0.5252524018287659, "learning_rate": 0.0002449231921437254, "loss": 1.7324, "step": 50385 }, { "epoch": 1.68, "grad_norm": 0.5529601573944092, "learning_rate": 0.0002449129177150275, "loss": 1.6963, "step": 50386 }, { "epoch": 1.68, "grad_norm": 0.5758167505264282, "learning_rate": 0.00024490264335319734, "loss": 1.7073, "step": 50387 }, { "epoch": 1.68, "grad_norm": 0.5557925701141357, "learning_rate": 0.00024489236905824715, "loss": 1.7317, "step": 50388 }, { "epoch": 1.68, "grad_norm": 0.535018801689148, "learning_rate": 0.0002448820948301895, "loss": 1.7661, "step": 50389 }, { "epoch": 1.68, "grad_norm": 0.5480566620826721, "learning_rate": 0.00024487182066903687, "loss": 1.8115, "step": 50390 }, { "epoch": 1.68, "grad_norm": 0.5667884945869446, "learning_rate": 0.0002448615465748019, "loss": 1.7812, "step": 50391 }, { "epoch": 1.68, "grad_norm": 0.549817681312561, "learning_rate": 0.00024485127254749665, "loss": 1.6788, "step": 50392 }, { "epoch": 1.68, "grad_norm": 0.5505388975143433, "learning_rate": 0.0002448409985871339, "loss": 1.7416, "step": 50393 }, { "epoch": 1.68, "grad_norm": 0.5330071449279785, "learning_rate": 0.00024483072469372617, "loss": 1.7943, "step": 50394 }, { "epoch": 1.68, "grad_norm": 0.5906981229782104, "learning_rate": 0.0002448204508672857, "loss": 1.7885, "step": 50395 }, { "epoch": 1.68, "grad_norm": 0.5568589568138123, "learning_rate": 0.0002448101771078251, "loss": 1.721, "step": 50396 }, { "epoch": 1.68, "grad_norm": 0.5304224491119385, "learning_rate": 0.00024479990341535687, "loss": 1.8194, "step": 50397 }, { "epoch": 1.68, "grad_norm": 0.5486346483230591, "learning_rate": 0.0002447896297898933, "loss": 1.7074, "step": 50398 }, { "epoch": 1.68, "grad_norm": 0.5375679135322571, "learning_rate": 0.000244779356231447, "loss": 1.7767, "step": 50399 }, { "epoch": 1.68, "grad_norm": 0.5487335324287415, "learning_rate": 0.0002447690827400304, "loss": 1.6758, "step": 50400 }, { "epoch": 1.68, "grad_norm": 0.5487985610961914, "learning_rate": 0.00024475880931565613, "loss": 1.7598, "step": 50401 }, { "epoch": 1.68, "grad_norm": 0.5515118837356567, "learning_rate": 0.00024474853595833645, "loss": 1.7343, "step": 50402 }, { "epoch": 1.68, "grad_norm": 0.5320002436637878, "learning_rate": 0.00024473826266808394, "loss": 1.7285, "step": 50403 }, { "epoch": 1.68, "grad_norm": 0.5320470333099365, "learning_rate": 0.00024472798944491107, "loss": 1.702, "step": 50404 }, { "epoch": 1.68, "grad_norm": 0.5444389581680298, "learning_rate": 0.00024471771628883015, "loss": 1.7452, "step": 50405 }, { "epoch": 1.68, "grad_norm": 0.5328315496444702, "learning_rate": 0.0002447074431998538, "loss": 1.7112, "step": 50406 }, { "epoch": 1.68, "grad_norm": 0.5610594749450684, "learning_rate": 0.00024469717017799457, "loss": 1.7264, "step": 50407 }, { "epoch": 1.68, "grad_norm": 0.5563873052597046, "learning_rate": 0.00024468689722326474, "loss": 1.7719, "step": 50408 }, { "epoch": 1.68, "grad_norm": 0.5714327096939087, "learning_rate": 0.0002446766243356769, "loss": 1.7197, "step": 50409 }, { "epoch": 1.68, "grad_norm": 0.5440818071365356, "learning_rate": 0.0002446663515152435, "loss": 1.7055, "step": 50410 }, { "epoch": 1.68, "grad_norm": 0.5543913245201111, "learning_rate": 0.000244656078761977, "loss": 1.7408, "step": 50411 }, { "epoch": 1.68, "grad_norm": 0.5482484698295593, "learning_rate": 0.00024464580607588983, "loss": 1.7954, "step": 50412 }, { "epoch": 1.68, "grad_norm": 0.5620259046554565, "learning_rate": 0.00024463553345699444, "loss": 1.8238, "step": 50413 }, { "epoch": 1.68, "grad_norm": 0.5353535413742065, "learning_rate": 0.0002446252609053035, "loss": 1.7269, "step": 50414 }, { "epoch": 1.68, "grad_norm": 0.5388596057891846, "learning_rate": 0.0002446149884208292, "loss": 1.7507, "step": 50415 }, { "epoch": 1.68, "grad_norm": 0.5498721599578857, "learning_rate": 0.0002446047160035842, "loss": 1.7483, "step": 50416 }, { "epoch": 1.68, "grad_norm": 0.5317752957344055, "learning_rate": 0.000244594443653581, "loss": 1.743, "step": 50417 }, { "epoch": 1.68, "grad_norm": 0.5556627511978149, "learning_rate": 0.0002445841713708319, "loss": 1.7517, "step": 50418 }, { "epoch": 1.68, "grad_norm": 0.5733194947242737, "learning_rate": 0.00024457389915534934, "loss": 1.8246, "step": 50419 }, { "epoch": 1.68, "grad_norm": 0.5548526644706726, "learning_rate": 0.00024456362700714607, "loss": 1.7318, "step": 50420 }, { "epoch": 1.68, "grad_norm": 0.5464059114456177, "learning_rate": 0.00024455335492623435, "loss": 1.707, "step": 50421 }, { "epoch": 1.68, "grad_norm": 0.5366321802139282, "learning_rate": 0.00024454308291262664, "loss": 1.7174, "step": 50422 }, { "epoch": 1.68, "grad_norm": 0.543305516242981, "learning_rate": 0.0002445328109663355, "loss": 1.7788, "step": 50423 }, { "epoch": 1.68, "grad_norm": 0.533737063407898, "learning_rate": 0.0002445225390873734, "loss": 1.7791, "step": 50424 }, { "epoch": 1.68, "grad_norm": 0.5588862299919128, "learning_rate": 0.00024451226727575264, "loss": 1.7506, "step": 50425 }, { "epoch": 1.68, "grad_norm": 0.5464051961898804, "learning_rate": 0.0002445019955314858, "loss": 1.8005, "step": 50426 }, { "epoch": 1.68, "grad_norm": 0.5573405623435974, "learning_rate": 0.00024449172385458554, "loss": 1.7336, "step": 50427 }, { "epoch": 1.68, "grad_norm": 0.5616288185119629, "learning_rate": 0.000244481452245064, "loss": 1.7312, "step": 50428 }, { "epoch": 1.68, "grad_norm": 0.5561009049415588, "learning_rate": 0.0002444711807029338, "loss": 1.771, "step": 50429 }, { "epoch": 1.68, "grad_norm": 0.5285295248031616, "learning_rate": 0.00024446090922820743, "loss": 1.7478, "step": 50430 }, { "epoch": 1.68, "grad_norm": 0.5727819800376892, "learning_rate": 0.0002444506378208974, "loss": 1.7888, "step": 50431 }, { "epoch": 1.68, "grad_norm": 0.5701630711555481, "learning_rate": 0.000244440366481016, "loss": 1.7633, "step": 50432 }, { "epoch": 1.68, "grad_norm": 0.5498299598693848, "learning_rate": 0.00024443009520857585, "loss": 1.8419, "step": 50433 }, { "epoch": 1.68, "grad_norm": 0.5464849472045898, "learning_rate": 0.0002444198240035895, "loss": 1.7419, "step": 50434 }, { "epoch": 1.68, "grad_norm": 0.5615257620811462, "learning_rate": 0.00024440955286606907, "loss": 1.7524, "step": 50435 }, { "epoch": 1.68, "grad_norm": 0.5606371760368347, "learning_rate": 0.0002443992817960274, "loss": 1.7249, "step": 50436 }, { "epoch": 1.68, "grad_norm": 0.5480127930641174, "learning_rate": 0.0002443890107934768, "loss": 1.7761, "step": 50437 }, { "epoch": 1.68, "grad_norm": 0.5418837070465088, "learning_rate": 0.00024437873985842974, "loss": 1.746, "step": 50438 }, { "epoch": 1.68, "grad_norm": 0.5491959452629089, "learning_rate": 0.00024436846899089865, "loss": 1.7134, "step": 50439 }, { "epoch": 1.68, "grad_norm": 0.5511080026626587, "learning_rate": 0.000244358198190896, "loss": 1.7295, "step": 50440 }, { "epoch": 1.68, "grad_norm": 0.5736274719238281, "learning_rate": 0.0002443479274584345, "loss": 1.7481, "step": 50441 }, { "epoch": 1.68, "grad_norm": 0.5606348514556885, "learning_rate": 0.0002443376567935262, "loss": 1.7356, "step": 50442 }, { "epoch": 1.68, "grad_norm": 0.5599077939987183, "learning_rate": 0.0002443273861961839, "loss": 1.7224, "step": 50443 }, { "epoch": 1.68, "grad_norm": 0.5593769550323486, "learning_rate": 0.00024431711566641994, "loss": 1.6984, "step": 50444 }, { "epoch": 1.68, "grad_norm": 0.5628560185432434, "learning_rate": 0.0002443068452042467, "loss": 1.7217, "step": 50445 }, { "epoch": 1.68, "grad_norm": 0.5498011112213135, "learning_rate": 0.0002442965748096769, "loss": 1.7452, "step": 50446 }, { "epoch": 1.68, "grad_norm": 0.5833860635757446, "learning_rate": 0.0002442863044827228, "loss": 1.7461, "step": 50447 }, { "epoch": 1.68, "grad_norm": 0.553420901298523, "learning_rate": 0.0002442760342233968, "loss": 1.6888, "step": 50448 }, { "epoch": 1.68, "grad_norm": 0.5490742325782776, "learning_rate": 0.00024426576403171156, "loss": 1.7871, "step": 50449 }, { "epoch": 1.68, "grad_norm": 0.5303136110305786, "learning_rate": 0.00024425549390767943, "loss": 1.7594, "step": 50450 }, { "epoch": 1.68, "grad_norm": 0.5578745603561401, "learning_rate": 0.00024424522385131306, "loss": 1.7511, "step": 50451 }, { "epoch": 1.68, "grad_norm": 0.5662733316421509, "learning_rate": 0.00024423495386262465, "loss": 1.744, "step": 50452 }, { "epoch": 1.68, "grad_norm": 0.5795210599899292, "learning_rate": 0.00024422468394162683, "loss": 1.7365, "step": 50453 }, { "epoch": 1.68, "grad_norm": 0.5510183572769165, "learning_rate": 0.00024421441408833207, "loss": 1.7153, "step": 50454 }, { "epoch": 1.68, "grad_norm": 0.5340389013290405, "learning_rate": 0.0002442041443027527, "loss": 1.7025, "step": 50455 }, { "epoch": 1.68, "grad_norm": 0.5702549815177917, "learning_rate": 0.00024419387458490127, "loss": 1.785, "step": 50456 }, { "epoch": 1.68, "grad_norm": 0.5592668652534485, "learning_rate": 0.0002441836049347904, "loss": 1.7001, "step": 50457 }, { "epoch": 1.68, "grad_norm": 0.5481120944023132, "learning_rate": 0.0002441733353524322, "loss": 1.7594, "step": 50458 }, { "epoch": 1.68, "grad_norm": 0.5449120402336121, "learning_rate": 0.00024416306583783945, "loss": 1.8162, "step": 50459 }, { "epoch": 1.68, "grad_norm": 0.5609211921691895, "learning_rate": 0.00024415279639102455, "loss": 1.7063, "step": 50460 }, { "epoch": 1.68, "grad_norm": 0.5411756634712219, "learning_rate": 0.00024414252701199994, "loss": 1.793, "step": 50461 }, { "epoch": 1.68, "grad_norm": 0.5445799231529236, "learning_rate": 0.000244132257700778, "loss": 1.7623, "step": 50462 }, { "epoch": 1.68, "grad_norm": 0.5463463664054871, "learning_rate": 0.0002441219884573712, "loss": 1.7556, "step": 50463 }, { "epoch": 1.68, "grad_norm": 0.5413956046104431, "learning_rate": 0.00024411171928179228, "loss": 1.7344, "step": 50464 }, { "epoch": 1.68, "grad_norm": 0.5561819076538086, "learning_rate": 0.00024410145017405332, "loss": 1.6915, "step": 50465 }, { "epoch": 1.68, "grad_norm": 0.5357840061187744, "learning_rate": 0.000244091181134167, "loss": 1.7633, "step": 50466 }, { "epoch": 1.68, "grad_norm": 0.5357611775398254, "learning_rate": 0.00024408091216214582, "loss": 1.6826, "step": 50467 }, { "epoch": 1.68, "grad_norm": 0.5410979986190796, "learning_rate": 0.00024407064325800213, "loss": 1.8054, "step": 50468 }, { "epoch": 1.68, "grad_norm": 0.5481558442115784, "learning_rate": 0.00024406037442174843, "loss": 1.7156, "step": 50469 }, { "epoch": 1.68, "grad_norm": 0.5652834177017212, "learning_rate": 0.0002440501056533971, "loss": 1.7937, "step": 50470 }, { "epoch": 1.68, "grad_norm": 0.5649851560592651, "learning_rate": 0.00024403983695296094, "loss": 1.7637, "step": 50471 }, { "epoch": 1.68, "grad_norm": 0.5461280345916748, "learning_rate": 0.00024402956832045194, "loss": 1.7739, "step": 50472 }, { "epoch": 1.68, "grad_norm": 0.526160478591919, "learning_rate": 0.00024401929975588284, "loss": 1.7685, "step": 50473 }, { "epoch": 1.68, "grad_norm": 0.5494406223297119, "learning_rate": 0.00024400903125926617, "loss": 1.7593, "step": 50474 }, { "epoch": 1.68, "grad_norm": 0.5535584092140198, "learning_rate": 0.00024399876283061426, "loss": 1.7174, "step": 50475 }, { "epoch": 1.68, "grad_norm": 0.5504030585289001, "learning_rate": 0.00024398849446993954, "loss": 1.6774, "step": 50476 }, { "epoch": 1.68, "grad_norm": 0.5627161264419556, "learning_rate": 0.0002439782261772546, "loss": 1.6967, "step": 50477 }, { "epoch": 1.68, "grad_norm": 0.5329872965812683, "learning_rate": 0.0002439679579525718, "loss": 1.7716, "step": 50478 }, { "epoch": 1.68, "grad_norm": 0.5747936367988586, "learning_rate": 0.00024395768979590352, "loss": 1.769, "step": 50479 }, { "epoch": 1.68, "grad_norm": 0.5457790493965149, "learning_rate": 0.00024394742170726248, "loss": 1.676, "step": 50480 }, { "epoch": 1.68, "grad_norm": 0.5565328001976013, "learning_rate": 0.00024393715368666106, "loss": 1.7947, "step": 50481 }, { "epoch": 1.68, "grad_norm": 0.5788531303405762, "learning_rate": 0.0002439268857341116, "loss": 1.838, "step": 50482 }, { "epoch": 1.68, "grad_norm": 0.5488007068634033, "learning_rate": 0.00024391661784962666, "loss": 1.7434, "step": 50483 }, { "epoch": 1.68, "grad_norm": 0.5438933372497559, "learning_rate": 0.00024390635003321872, "loss": 1.7264, "step": 50484 }, { "epoch": 1.68, "grad_norm": 0.5473737716674805, "learning_rate": 0.00024389608228490015, "loss": 1.6899, "step": 50485 }, { "epoch": 1.68, "grad_norm": 0.5593021512031555, "learning_rate": 0.0002438858146046834, "loss": 1.738, "step": 50486 }, { "epoch": 1.68, "grad_norm": 0.5485026240348816, "learning_rate": 0.00024387554699258106, "loss": 1.769, "step": 50487 }, { "epoch": 1.68, "grad_norm": 0.5347428321838379, "learning_rate": 0.00024386527944860562, "loss": 1.7727, "step": 50488 }, { "epoch": 1.68, "grad_norm": 0.5628513097763062, "learning_rate": 0.00024385501197276936, "loss": 1.7018, "step": 50489 }, { "epoch": 1.68, "grad_norm": 0.5594712495803833, "learning_rate": 0.0002438447445650849, "loss": 1.7957, "step": 50490 }, { "epoch": 1.68, "grad_norm": 0.5389196872711182, "learning_rate": 0.00024383447722556469, "loss": 1.7163, "step": 50491 }, { "epoch": 1.68, "grad_norm": 0.5329836010932922, "learning_rate": 0.00024382420995422107, "loss": 1.7319, "step": 50492 }, { "epoch": 1.68, "grad_norm": 0.5602399706840515, "learning_rate": 0.00024381394275106654, "loss": 1.6809, "step": 50493 }, { "epoch": 1.68, "grad_norm": 0.542309582233429, "learning_rate": 0.0002438036756161138, "loss": 1.8109, "step": 50494 }, { "epoch": 1.68, "grad_norm": 0.5636194348335266, "learning_rate": 0.00024379340854937485, "loss": 1.7714, "step": 50495 }, { "epoch": 1.68, "grad_norm": 0.5662462711334229, "learning_rate": 0.00024378314155086255, "loss": 1.7438, "step": 50496 }, { "epoch": 1.68, "grad_norm": 0.5553330183029175, "learning_rate": 0.00024377287462058923, "loss": 1.6638, "step": 50497 }, { "epoch": 1.68, "grad_norm": 0.5361394882202148, "learning_rate": 0.00024376260775856742, "loss": 1.6854, "step": 50498 }, { "epoch": 1.68, "grad_norm": 0.5468934178352356, "learning_rate": 0.00024375234096480944, "loss": 1.7322, "step": 50499 }, { "epoch": 1.68, "grad_norm": 0.5431551933288574, "learning_rate": 0.0002437420742393278, "loss": 1.773, "step": 50500 }, { "epoch": 1.68, "grad_norm": 0.5478761792182922, "learning_rate": 0.00024373180758213516, "loss": 1.7525, "step": 50501 }, { "epoch": 1.68, "grad_norm": 0.5535697937011719, "learning_rate": 0.0002437215409932436, "loss": 1.86, "step": 50502 }, { "epoch": 1.68, "grad_norm": 0.5641906261444092, "learning_rate": 0.0002437112744726659, "loss": 1.7437, "step": 50503 }, { "epoch": 1.68, "grad_norm": 0.5596019625663757, "learning_rate": 0.00024370100802041447, "loss": 1.7838, "step": 50504 }, { "epoch": 1.68, "grad_norm": 0.5418524742126465, "learning_rate": 0.00024369074163650166, "loss": 1.7664, "step": 50505 }, { "epoch": 1.68, "grad_norm": 0.5616893172264099, "learning_rate": 0.00024368047532093997, "loss": 1.7694, "step": 50506 }, { "epoch": 1.68, "grad_norm": 0.5569103956222534, "learning_rate": 0.00024367020907374183, "loss": 1.7495, "step": 50507 }, { "epoch": 1.68, "grad_norm": 0.5371351838111877, "learning_rate": 0.00024365994289491996, "loss": 1.7387, "step": 50508 }, { "epoch": 1.68, "grad_norm": 0.5439613461494446, "learning_rate": 0.00024364967678448644, "loss": 1.6433, "step": 50509 }, { "epoch": 1.68, "grad_norm": 0.5343941450119019, "learning_rate": 0.00024363941074245394, "loss": 1.7364, "step": 50510 }, { "epoch": 1.68, "grad_norm": 0.5454520583152771, "learning_rate": 0.00024362914476883498, "loss": 1.6995, "step": 50511 }, { "epoch": 1.68, "grad_norm": 0.5428484678268433, "learning_rate": 0.00024361887886364185, "loss": 1.7211, "step": 50512 }, { "epoch": 1.68, "grad_norm": 0.5400053858757019, "learning_rate": 0.0002436086130268871, "loss": 1.7776, "step": 50513 }, { "epoch": 1.68, "grad_norm": 0.5439255237579346, "learning_rate": 0.00024359834725858322, "loss": 1.7225, "step": 50514 }, { "epoch": 1.68, "grad_norm": 0.5372980833053589, "learning_rate": 0.0002435880815587426, "loss": 1.7746, "step": 50515 }, { "epoch": 1.68, "grad_norm": 0.5331303477287292, "learning_rate": 0.0002435778159273777, "loss": 1.7302, "step": 50516 }, { "epoch": 1.68, "grad_norm": 0.536464273929596, "learning_rate": 0.00024356755036450105, "loss": 1.7551, "step": 50517 }, { "epoch": 1.68, "grad_norm": 0.5320550799369812, "learning_rate": 0.00024355728487012517, "loss": 1.7338, "step": 50518 }, { "epoch": 1.68, "grad_norm": 0.5405219197273254, "learning_rate": 0.00024354701944426235, "loss": 1.7392, "step": 50519 }, { "epoch": 1.68, "grad_norm": 0.5533029437065125, "learning_rate": 0.0002435367540869251, "loss": 1.7502, "step": 50520 }, { "epoch": 1.68, "grad_norm": 0.5207718014717102, "learning_rate": 0.000243526488798126, "loss": 1.7245, "step": 50521 }, { "epoch": 1.68, "grad_norm": 0.5428163409233093, "learning_rate": 0.0002435162235778774, "loss": 1.7803, "step": 50522 }, { "epoch": 1.68, "grad_norm": 0.5430608987808228, "learning_rate": 0.00024350595842619166, "loss": 1.6643, "step": 50523 }, { "epoch": 1.68, "grad_norm": 0.5747380256652832, "learning_rate": 0.00024349569334308153, "loss": 1.8038, "step": 50524 }, { "epoch": 1.68, "grad_norm": 0.5446167588233948, "learning_rate": 0.0002434854283285592, "loss": 1.7071, "step": 50525 }, { "epoch": 1.68, "grad_norm": 0.5489491820335388, "learning_rate": 0.00024347516338263722, "loss": 1.728, "step": 50526 }, { "epoch": 1.68, "grad_norm": 0.5894727110862732, "learning_rate": 0.00024346489850532812, "loss": 1.7838, "step": 50527 }, { "epoch": 1.68, "grad_norm": 0.5431567430496216, "learning_rate": 0.00024345463369664433, "loss": 1.7758, "step": 50528 }, { "epoch": 1.68, "grad_norm": 0.5421567559242249, "learning_rate": 0.00024344436895659818, "loss": 1.7012, "step": 50529 }, { "epoch": 1.68, "grad_norm": 0.5568999648094177, "learning_rate": 0.00024343410428520225, "loss": 1.7855, "step": 50530 }, { "epoch": 1.68, "grad_norm": 0.5521809458732605, "learning_rate": 0.00024342383968246908, "loss": 1.7891, "step": 50531 }, { "epoch": 1.68, "grad_norm": 0.5457726120948792, "learning_rate": 0.000243413575148411, "loss": 1.7712, "step": 50532 }, { "epoch": 1.68, "grad_norm": 0.5585904717445374, "learning_rate": 0.00024340331068304047, "loss": 1.8094, "step": 50533 }, { "epoch": 1.68, "grad_norm": 0.5718613266944885, "learning_rate": 0.00024339304628637003, "loss": 1.7354, "step": 50534 }, { "epoch": 1.68, "grad_norm": 0.5799590945243835, "learning_rate": 0.00024338278195841205, "loss": 1.8434, "step": 50535 }, { "epoch": 1.68, "grad_norm": 0.5323535799980164, "learning_rate": 0.00024337251769917896, "loss": 1.7704, "step": 50536 }, { "epoch": 1.68, "grad_norm": 0.5531233549118042, "learning_rate": 0.00024336225350868334, "loss": 1.7348, "step": 50537 }, { "epoch": 1.68, "grad_norm": 0.5498574376106262, "learning_rate": 0.00024335198938693766, "loss": 1.7476, "step": 50538 }, { "epoch": 1.68, "grad_norm": 0.5593833923339844, "learning_rate": 0.0002433417253339543, "loss": 1.7982, "step": 50539 }, { "epoch": 1.68, "grad_norm": 0.5503844618797302, "learning_rate": 0.0002433314613497457, "loss": 1.7076, "step": 50540 }, { "epoch": 1.68, "grad_norm": 0.5500263571739197, "learning_rate": 0.00024332119743432443, "loss": 1.7611, "step": 50541 }, { "epoch": 1.68, "grad_norm": 0.5556331276893616, "learning_rate": 0.0002433109335877028, "loss": 1.7631, "step": 50542 }, { "epoch": 1.68, "grad_norm": 0.5526086091995239, "learning_rate": 0.0002433006698098933, "loss": 1.7593, "step": 50543 }, { "epoch": 1.68, "grad_norm": 0.5513865351676941, "learning_rate": 0.0002432904061009086, "loss": 1.7511, "step": 50544 }, { "epoch": 1.68, "grad_norm": 0.7026078701019287, "learning_rate": 0.00024328014246076081, "loss": 1.7281, "step": 50545 }, { "epoch": 1.68, "grad_norm": 0.5362473726272583, "learning_rate": 0.00024326987888946262, "loss": 1.818, "step": 50546 }, { "epoch": 1.68, "grad_norm": 0.5442618727684021, "learning_rate": 0.0002432596153870265, "loss": 1.7845, "step": 50547 }, { "epoch": 1.68, "grad_norm": 0.54768967628479, "learning_rate": 0.00024324935195346484, "loss": 1.697, "step": 50548 }, { "epoch": 1.68, "grad_norm": 0.5466717481613159, "learning_rate": 0.00024323908858879008, "loss": 1.7413, "step": 50549 }, { "epoch": 1.68, "grad_norm": 0.5885566473007202, "learning_rate": 0.00024322882529301463, "loss": 1.8094, "step": 50550 }, { "epoch": 1.68, "grad_norm": 0.5434100031852722, "learning_rate": 0.00024321856206615118, "loss": 1.7132, "step": 50551 }, { "epoch": 1.68, "grad_norm": 0.5620172619819641, "learning_rate": 0.00024320829890821187, "loss": 1.7132, "step": 50552 }, { "epoch": 1.68, "grad_norm": 0.5232622623443604, "learning_rate": 0.00024319803581920939, "loss": 1.7129, "step": 50553 }, { "epoch": 1.68, "grad_norm": 0.5251566171646118, "learning_rate": 0.00024318777279915614, "loss": 1.7564, "step": 50554 }, { "epoch": 1.68, "grad_norm": 0.5495322942733765, "learning_rate": 0.00024317750984806456, "loss": 1.6968, "step": 50555 }, { "epoch": 1.68, "grad_norm": 0.5774483680725098, "learning_rate": 0.0002431672469659471, "loss": 1.8142, "step": 50556 }, { "epoch": 1.68, "grad_norm": 0.5811646580696106, "learning_rate": 0.00024315698415281611, "loss": 1.7081, "step": 50557 }, { "epoch": 1.68, "grad_norm": 0.5324928164482117, "learning_rate": 0.0002431467214086844, "loss": 1.681, "step": 50558 }, { "epoch": 1.68, "grad_norm": 0.535298228263855, "learning_rate": 0.00024313645873356402, "loss": 1.6998, "step": 50559 }, { "epoch": 1.68, "grad_norm": 0.548556387424469, "learning_rate": 0.00024312619612746765, "loss": 1.7634, "step": 50560 }, { "epoch": 1.68, "grad_norm": 0.5702495574951172, "learning_rate": 0.00024311593359040777, "loss": 1.8369, "step": 50561 }, { "epoch": 1.68, "grad_norm": 0.5510034561157227, "learning_rate": 0.00024310567112239667, "loss": 1.7067, "step": 50562 }, { "epoch": 1.68, "grad_norm": 0.5271965265274048, "learning_rate": 0.0002430954087234469, "loss": 1.6862, "step": 50563 }, { "epoch": 1.68, "grad_norm": 0.5450313091278076, "learning_rate": 0.00024308514639357102, "loss": 1.7672, "step": 50564 }, { "epoch": 1.68, "grad_norm": 0.5504886507987976, "learning_rate": 0.0002430748841327813, "loss": 1.7567, "step": 50565 }, { "epoch": 1.68, "grad_norm": 0.5565407872200012, "learning_rate": 0.00024306462194109024, "loss": 1.731, "step": 50566 }, { "epoch": 1.68, "grad_norm": 0.5628857016563416, "learning_rate": 0.00024305435981851036, "loss": 1.7671, "step": 50567 }, { "epoch": 1.68, "grad_norm": 0.5399447083473206, "learning_rate": 0.0002430440977650542, "loss": 1.8114, "step": 50568 }, { "epoch": 1.68, "grad_norm": 0.5517692565917969, "learning_rate": 0.00024303383578073404, "loss": 1.69, "step": 50569 }, { "epoch": 1.68, "grad_norm": 0.529958188533783, "learning_rate": 0.00024302357386556242, "loss": 1.7176, "step": 50570 }, { "epoch": 1.68, "grad_norm": 0.5485224723815918, "learning_rate": 0.0002430133120195518, "loss": 1.8217, "step": 50571 }, { "epoch": 1.68, "grad_norm": 0.5429334044456482, "learning_rate": 0.00024300305024271463, "loss": 1.7809, "step": 50572 }, { "epoch": 1.68, "grad_norm": 0.5751799941062927, "learning_rate": 0.00024299278853506326, "loss": 1.7614, "step": 50573 }, { "epoch": 1.68, "grad_norm": 0.5453966856002808, "learning_rate": 0.00024298252689661042, "loss": 1.7398, "step": 50574 }, { "epoch": 1.68, "grad_norm": 0.5446513891220093, "learning_rate": 0.00024297226532736822, "loss": 1.695, "step": 50575 }, { "epoch": 1.68, "grad_norm": 0.5590317249298096, "learning_rate": 0.00024296200382734935, "loss": 1.7494, "step": 50576 }, { "epoch": 1.68, "grad_norm": 0.573427677154541, "learning_rate": 0.00024295174239656622, "loss": 1.7555, "step": 50577 }, { "epoch": 1.68, "grad_norm": 0.5701735019683838, "learning_rate": 0.00024294148103503132, "loss": 1.7565, "step": 50578 }, { "epoch": 1.68, "grad_norm": 0.5437827110290527, "learning_rate": 0.00024293121974275697, "loss": 1.8492, "step": 50579 }, { "epoch": 1.68, "grad_norm": 0.5606588125228882, "learning_rate": 0.00024292095851975566, "loss": 1.8158, "step": 50580 }, { "epoch": 1.68, "grad_norm": 0.5649381875991821, "learning_rate": 0.0002429106973660401, "loss": 1.715, "step": 50581 }, { "epoch": 1.68, "grad_norm": 0.566480815410614, "learning_rate": 0.00024290043628162234, "loss": 1.7911, "step": 50582 }, { "epoch": 1.68, "grad_norm": 0.7033846974372864, "learning_rate": 0.00024289017526651511, "loss": 1.7403, "step": 50583 }, { "epoch": 1.68, "grad_norm": 0.5528640151023865, "learning_rate": 0.00024287991432073082, "loss": 1.7576, "step": 50584 }, { "epoch": 1.68, "grad_norm": 0.5639595985412598, "learning_rate": 0.0002428696534442819, "loss": 1.8282, "step": 50585 }, { "epoch": 1.68, "grad_norm": 0.5848587155342102, "learning_rate": 0.00024285939263718074, "loss": 1.7522, "step": 50586 }, { "epoch": 1.68, "grad_norm": 0.5598735213279724, "learning_rate": 0.0002428491318994398, "loss": 1.7437, "step": 50587 }, { "epoch": 1.68, "grad_norm": 0.5526835322380066, "learning_rate": 0.00024283887123107182, "loss": 1.7008, "step": 50588 }, { "epoch": 1.68, "grad_norm": 0.560309648513794, "learning_rate": 0.00024282861063208884, "loss": 1.7634, "step": 50589 }, { "epoch": 1.68, "grad_norm": 0.5558609962463379, "learning_rate": 0.00024281835010250353, "loss": 1.7703, "step": 50590 }, { "epoch": 1.68, "grad_norm": 0.5656696557998657, "learning_rate": 0.0002428080896423284, "loss": 1.6551, "step": 50591 }, { "epoch": 1.68, "grad_norm": 0.5464715361595154, "learning_rate": 0.00024279782925157575, "loss": 1.8247, "step": 50592 }, { "epoch": 1.68, "grad_norm": 0.5498292446136475, "learning_rate": 0.0002427875689302581, "loss": 1.8345, "step": 50593 }, { "epoch": 1.68, "grad_norm": 0.5479094386100769, "learning_rate": 0.000242777308678388, "loss": 1.7623, "step": 50594 }, { "epoch": 1.68, "grad_norm": 0.5433099865913391, "learning_rate": 0.00024276704849597773, "loss": 1.7111, "step": 50595 }, { "epoch": 1.68, "grad_norm": 0.5431237816810608, "learning_rate": 0.0002427567883830398, "loss": 1.7647, "step": 50596 }, { "epoch": 1.68, "grad_norm": 0.5323710441589355, "learning_rate": 0.00024274652833958674, "loss": 1.7757, "step": 50597 }, { "epoch": 1.68, "grad_norm": 0.5426270365715027, "learning_rate": 0.000242736268365631, "loss": 1.8366, "step": 50598 }, { "epoch": 1.68, "grad_norm": 0.535656213760376, "learning_rate": 0.00024272600846118495, "loss": 1.8006, "step": 50599 }, { "epoch": 1.68, "grad_norm": 0.5371361970901489, "learning_rate": 0.00024271574862626106, "loss": 1.7889, "step": 50600 }, { "epoch": 1.68, "grad_norm": 0.5436752438545227, "learning_rate": 0.0002427054888608719, "loss": 1.7988, "step": 50601 }, { "epoch": 1.68, "grad_norm": 0.5341559052467346, "learning_rate": 0.00024269522916502983, "loss": 1.7377, "step": 50602 }, { "epoch": 1.68, "grad_norm": 0.5372747182846069, "learning_rate": 0.00024268496953874715, "loss": 1.7552, "step": 50603 }, { "epoch": 1.68, "grad_norm": 0.5351450443267822, "learning_rate": 0.0002426747099820367, "loss": 1.7529, "step": 50604 }, { "epoch": 1.68, "grad_norm": 0.5413128733634949, "learning_rate": 0.0002426644504949105, "loss": 1.6364, "step": 50605 }, { "epoch": 1.68, "grad_norm": 0.5493336915969849, "learning_rate": 0.00024265419107738132, "loss": 1.7191, "step": 50606 }, { "epoch": 1.68, "grad_norm": 0.5476064682006836, "learning_rate": 0.00024264393172946145, "loss": 1.6939, "step": 50607 }, { "epoch": 1.68, "grad_norm": 0.5628126859664917, "learning_rate": 0.00024263367245116348, "loss": 1.7753, "step": 50608 }, { "epoch": 1.68, "grad_norm": 0.5424625873565674, "learning_rate": 0.00024262341324249968, "loss": 1.7252, "step": 50609 }, { "epoch": 1.68, "grad_norm": 0.5278252959251404, "learning_rate": 0.00024261315410348255, "loss": 1.6801, "step": 50610 }, { "epoch": 1.68, "grad_norm": 0.5373811721801758, "learning_rate": 0.00024260289503412482, "loss": 1.7612, "step": 50611 }, { "epoch": 1.68, "grad_norm": 0.5571061968803406, "learning_rate": 0.00024259263603443855, "loss": 1.7642, "step": 50612 }, { "epoch": 1.68, "grad_norm": 0.5668445825576782, "learning_rate": 0.00024258237710443636, "loss": 1.7682, "step": 50613 }, { "epoch": 1.68, "grad_norm": 0.5537098050117493, "learning_rate": 0.00024257211824413075, "loss": 1.6986, "step": 50614 }, { "epoch": 1.68, "grad_norm": 0.568909764289856, "learning_rate": 0.0002425618594535342, "loss": 1.7794, "step": 50615 }, { "epoch": 1.68, "grad_norm": 0.5494387745857239, "learning_rate": 0.00024255160073265896, "loss": 1.6907, "step": 50616 }, { "epoch": 1.68, "grad_norm": 0.5368875861167908, "learning_rate": 0.0002425413420815176, "loss": 1.6804, "step": 50617 }, { "epoch": 1.68, "grad_norm": 0.5490382313728333, "learning_rate": 0.00024253108350012276, "loss": 1.8747, "step": 50618 }, { "epoch": 1.68, "grad_norm": 0.5481595396995544, "learning_rate": 0.00024252082498848654, "loss": 1.7022, "step": 50619 }, { "epoch": 1.68, "grad_norm": 0.5364113450050354, "learning_rate": 0.0002425105665466216, "loss": 1.6946, "step": 50620 }, { "epoch": 1.68, "grad_norm": 0.544253945350647, "learning_rate": 0.00024250030817454048, "loss": 1.7067, "step": 50621 }, { "epoch": 1.68, "grad_norm": 0.545075535774231, "learning_rate": 0.0002424900498722554, "loss": 1.7842, "step": 50622 }, { "epoch": 1.68, "grad_norm": 0.5559857487678528, "learning_rate": 0.00024247979163977894, "loss": 1.8224, "step": 50623 }, { "epoch": 1.68, "grad_norm": 0.5664588809013367, "learning_rate": 0.0002424695334771235, "loss": 1.8653, "step": 50624 }, { "epoch": 1.68, "grad_norm": 0.5355304479598999, "learning_rate": 0.0002424592753843017, "loss": 1.7761, "step": 50625 }, { "epoch": 1.68, "grad_norm": 0.5164570212364197, "learning_rate": 0.00024244901736132572, "loss": 1.7811, "step": 50626 }, { "epoch": 1.68, "grad_norm": 0.5569441318511963, "learning_rate": 0.00024243875940820823, "loss": 1.7877, "step": 50627 }, { "epoch": 1.68, "grad_norm": 0.5435729622840881, "learning_rate": 0.00024242850152496163, "loss": 1.6474, "step": 50628 }, { "epoch": 1.68, "grad_norm": 0.5387203693389893, "learning_rate": 0.0002424182437115983, "loss": 1.7637, "step": 50629 }, { "epoch": 1.68, "grad_norm": 0.5613214373588562, "learning_rate": 0.00024240798596813072, "loss": 1.7817, "step": 50630 }, { "epoch": 1.68, "grad_norm": 0.5973461866378784, "learning_rate": 0.00024239772829457142, "loss": 1.7306, "step": 50631 }, { "epoch": 1.68, "grad_norm": 0.5469008684158325, "learning_rate": 0.00024238747069093279, "loss": 1.8066, "step": 50632 }, { "epoch": 1.68, "grad_norm": 0.5535014867782593, "learning_rate": 0.00024237721315722714, "loss": 1.6798, "step": 50633 }, { "epoch": 1.68, "grad_norm": 0.54108726978302, "learning_rate": 0.00024236695569346717, "loss": 1.7048, "step": 50634 }, { "epoch": 1.68, "grad_norm": 0.6146844625473022, "learning_rate": 0.00024235669829966527, "loss": 1.8272, "step": 50635 }, { "epoch": 1.68, "grad_norm": 1.4282267093658447, "learning_rate": 0.00024234644097583378, "loss": 1.7872, "step": 50636 }, { "epoch": 1.68, "grad_norm": 0.558648407459259, "learning_rate": 0.00024233618372198522, "loss": 1.7976, "step": 50637 }, { "epoch": 1.68, "grad_norm": 0.5329378247261047, "learning_rate": 0.00024232592653813213, "loss": 1.7432, "step": 50638 }, { "epoch": 1.68, "grad_norm": 0.5499457120895386, "learning_rate": 0.00024231566942428678, "loss": 1.7521, "step": 50639 }, { "epoch": 1.68, "grad_norm": 0.5514832139015198, "learning_rate": 0.00024230541238046163, "loss": 1.72, "step": 50640 }, { "epoch": 1.68, "grad_norm": 0.5798051357269287, "learning_rate": 0.00024229515540666934, "loss": 1.7141, "step": 50641 }, { "epoch": 1.68, "grad_norm": 0.547782838344574, "learning_rate": 0.00024228489850292216, "loss": 1.6749, "step": 50642 }, { "epoch": 1.68, "grad_norm": 0.534288227558136, "learning_rate": 0.00024227464166923265, "loss": 1.7821, "step": 50643 }, { "epoch": 1.68, "grad_norm": 0.5644426345825195, "learning_rate": 0.0002422643849056132, "loss": 1.7613, "step": 50644 }, { "epoch": 1.68, "grad_norm": 0.5335317850112915, "learning_rate": 0.00024225412821207633, "loss": 1.8115, "step": 50645 }, { "epoch": 1.68, "grad_norm": 0.528076171875, "learning_rate": 0.0002422438715886343, "loss": 1.7168, "step": 50646 }, { "epoch": 1.69, "grad_norm": 0.5556198954582214, "learning_rate": 0.0002422336150352998, "loss": 1.7398, "step": 50647 }, { "epoch": 1.69, "grad_norm": 0.5453185439109802, "learning_rate": 0.00024222335855208523, "loss": 1.7966, "step": 50648 }, { "epoch": 1.69, "grad_norm": 0.5562068819999695, "learning_rate": 0.0002422131021390029, "loss": 1.7604, "step": 50649 }, { "epoch": 1.69, "grad_norm": 0.5686008930206299, "learning_rate": 0.00024220284579606538, "loss": 1.7562, "step": 50650 }, { "epoch": 1.69, "grad_norm": 0.5998761057853699, "learning_rate": 0.00024219258952328516, "loss": 1.7833, "step": 50651 }, { "epoch": 1.69, "grad_norm": 0.554495096206665, "learning_rate": 0.00024218233332067452, "loss": 1.7885, "step": 50652 }, { "epoch": 1.69, "grad_norm": 0.5339180827140808, "learning_rate": 0.00024217207718824597, "loss": 1.6906, "step": 50653 }, { "epoch": 1.69, "grad_norm": 0.5395711064338684, "learning_rate": 0.00024216182112601203, "loss": 1.7427, "step": 50654 }, { "epoch": 1.69, "grad_norm": 0.5401055812835693, "learning_rate": 0.00024215156513398523, "loss": 1.797, "step": 50655 }, { "epoch": 1.69, "grad_norm": 0.5480533838272095, "learning_rate": 0.00024214130921217783, "loss": 1.7305, "step": 50656 }, { "epoch": 1.69, "grad_norm": 0.5391445159912109, "learning_rate": 0.00024213105336060234, "loss": 1.7368, "step": 50657 }, { "epoch": 1.69, "grad_norm": 0.5534216165542603, "learning_rate": 0.00024212079757927128, "loss": 1.7215, "step": 50658 }, { "epoch": 1.69, "grad_norm": 0.5499567985534668, "learning_rate": 0.000242110541868197, "loss": 1.7381, "step": 50659 }, { "epoch": 1.69, "grad_norm": 0.5214980840682983, "learning_rate": 0.00024210028622739192, "loss": 1.6903, "step": 50660 }, { "epoch": 1.69, "grad_norm": 0.5340045690536499, "learning_rate": 0.00024209003065686873, "loss": 1.6841, "step": 50661 }, { "epoch": 1.69, "grad_norm": 0.5630109906196594, "learning_rate": 0.00024207977515663956, "loss": 1.7576, "step": 50662 }, { "epoch": 1.69, "grad_norm": 0.5628498196601868, "learning_rate": 0.00024206951972671704, "loss": 1.7568, "step": 50663 }, { "epoch": 1.69, "grad_norm": 0.5431241989135742, "learning_rate": 0.00024205926436711363, "loss": 1.7617, "step": 50664 }, { "epoch": 1.69, "grad_norm": 0.5554212927818298, "learning_rate": 0.00024204900907784176, "loss": 1.7222, "step": 50665 }, { "epoch": 1.69, "grad_norm": 0.539035975933075, "learning_rate": 0.00024203875385891377, "loss": 1.7674, "step": 50666 }, { "epoch": 1.69, "grad_norm": 0.5657144784927368, "learning_rate": 0.00024202849871034218, "loss": 1.7556, "step": 50667 }, { "epoch": 1.69, "grad_norm": 0.5680881142616272, "learning_rate": 0.00024201824363213962, "loss": 1.6997, "step": 50668 }, { "epoch": 1.69, "grad_norm": 0.5519923567771912, "learning_rate": 0.00024200798862431818, "loss": 1.6733, "step": 50669 }, { "epoch": 1.69, "grad_norm": 0.5747336745262146, "learning_rate": 0.00024199773368689057, "loss": 1.7482, "step": 50670 }, { "epoch": 1.69, "grad_norm": 0.5414619445800781, "learning_rate": 0.00024198747881986924, "loss": 1.676, "step": 50671 }, { "epoch": 1.69, "grad_norm": 0.5409253835678101, "learning_rate": 0.00024197722402326645, "loss": 1.8022, "step": 50672 }, { "epoch": 1.69, "grad_norm": 0.542199969291687, "learning_rate": 0.0002419669692970948, "loss": 1.7112, "step": 50673 }, { "epoch": 1.69, "grad_norm": 0.5639622807502747, "learning_rate": 0.00024195671464136663, "loss": 1.7657, "step": 50674 }, { "epoch": 1.69, "grad_norm": 0.5363563895225525, "learning_rate": 0.00024194646005609464, "loss": 1.6953, "step": 50675 }, { "epoch": 1.69, "grad_norm": 0.5398125052452087, "learning_rate": 0.0002419362055412909, "loss": 1.7787, "step": 50676 }, { "epoch": 1.69, "grad_norm": 0.5447584390640259, "learning_rate": 0.00024192595109696812, "loss": 1.7626, "step": 50677 }, { "epoch": 1.69, "grad_norm": 0.5543675422668457, "learning_rate": 0.00024191569672313873, "loss": 1.7288, "step": 50678 }, { "epoch": 1.69, "grad_norm": 0.5389851331710815, "learning_rate": 0.0002419054424198151, "loss": 1.7401, "step": 50679 }, { "epoch": 1.69, "grad_norm": 0.5379014611244202, "learning_rate": 0.00024189518818700965, "loss": 1.6548, "step": 50680 }, { "epoch": 1.69, "grad_norm": 0.55084228515625, "learning_rate": 0.000241884934024735, "loss": 1.7614, "step": 50681 }, { "epoch": 1.69, "grad_norm": 0.5540019869804382, "learning_rate": 0.00024187467993300337, "loss": 1.6862, "step": 50682 }, { "epoch": 1.69, "grad_norm": 0.5667557716369629, "learning_rate": 0.0002418644259118273, "loss": 1.7577, "step": 50683 }, { "epoch": 1.69, "grad_norm": 0.5416168570518494, "learning_rate": 0.00024185417196121925, "loss": 1.6855, "step": 50684 }, { "epoch": 1.69, "grad_norm": 0.5506880879402161, "learning_rate": 0.00024184391808119177, "loss": 1.7579, "step": 50685 }, { "epoch": 1.69, "grad_norm": 0.5436842441558838, "learning_rate": 0.00024183366427175715, "loss": 1.7799, "step": 50686 }, { "epoch": 1.69, "grad_norm": 0.5372978448867798, "learning_rate": 0.00024182341053292786, "loss": 1.7202, "step": 50687 }, { "epoch": 1.69, "grad_norm": 0.5694928169250488, "learning_rate": 0.00024181315686471646, "loss": 1.8294, "step": 50688 }, { "epoch": 1.69, "grad_norm": 0.5450876951217651, "learning_rate": 0.00024180290326713524, "loss": 1.7422, "step": 50689 }, { "epoch": 1.69, "grad_norm": 0.5471038222312927, "learning_rate": 0.00024179264974019665, "loss": 1.7463, "step": 50690 }, { "epoch": 1.69, "grad_norm": 0.5327418446540833, "learning_rate": 0.00024178239628391344, "loss": 1.7346, "step": 50691 }, { "epoch": 1.69, "grad_norm": 0.5500003695487976, "learning_rate": 0.00024177214289829757, "loss": 1.792, "step": 50692 }, { "epoch": 1.69, "grad_norm": 0.5536155700683594, "learning_rate": 0.00024176188958336184, "loss": 1.771, "step": 50693 }, { "epoch": 1.69, "grad_norm": 0.5564886927604675, "learning_rate": 0.00024175163633911862, "loss": 1.7414, "step": 50694 }, { "epoch": 1.69, "grad_norm": 0.5338401198387146, "learning_rate": 0.00024174138316558033, "loss": 1.6753, "step": 50695 }, { "epoch": 1.69, "grad_norm": 0.5393778085708618, "learning_rate": 0.0002417311300627594, "loss": 1.7142, "step": 50696 }, { "epoch": 1.69, "grad_norm": 0.5504177808761597, "learning_rate": 0.00024172087703066823, "loss": 1.7702, "step": 50697 }, { "epoch": 1.69, "grad_norm": 0.5393153429031372, "learning_rate": 0.00024171062406931948, "loss": 1.7423, "step": 50698 }, { "epoch": 1.69, "grad_norm": 0.547985315322876, "learning_rate": 0.00024170037117872528, "loss": 1.7314, "step": 50699 }, { "epoch": 1.69, "grad_norm": 0.5452367663383484, "learning_rate": 0.0002416901183588983, "loss": 1.7278, "step": 50700 }, { "epoch": 1.69, "grad_norm": 0.5460193157196045, "learning_rate": 0.000241679865609851, "loss": 1.7116, "step": 50701 }, { "epoch": 1.69, "grad_norm": 0.5370080471038818, "learning_rate": 0.0002416696129315957, "loss": 1.7044, "step": 50702 }, { "epoch": 1.69, "grad_norm": 0.5458659529685974, "learning_rate": 0.00024165936032414487, "loss": 1.7912, "step": 50703 }, { "epoch": 1.69, "grad_norm": 0.5395450592041016, "learning_rate": 0.00024164910778751088, "loss": 1.7474, "step": 50704 }, { "epoch": 1.69, "grad_norm": 0.550178587436676, "learning_rate": 0.00024163885532170652, "loss": 1.7811, "step": 50705 }, { "epoch": 1.69, "grad_norm": 0.5542328357696533, "learning_rate": 0.0002416286029267438, "loss": 1.7769, "step": 50706 }, { "epoch": 1.69, "grad_norm": 0.5556925535202026, "learning_rate": 0.0002416183506026354, "loss": 1.7386, "step": 50707 }, { "epoch": 1.69, "grad_norm": 0.5491122007369995, "learning_rate": 0.00024160809834939377, "loss": 1.7294, "step": 50708 }, { "epoch": 1.69, "grad_norm": 0.5345338582992554, "learning_rate": 0.00024159784616703127, "loss": 1.6912, "step": 50709 }, { "epoch": 1.69, "grad_norm": 0.5314531922340393, "learning_rate": 0.00024158759405556036, "loss": 1.7038, "step": 50710 }, { "epoch": 1.69, "grad_norm": 0.5434988141059875, "learning_rate": 0.0002415773420149936, "loss": 1.7748, "step": 50711 }, { "epoch": 1.69, "grad_norm": 0.5447112917900085, "learning_rate": 0.00024156709004534324, "loss": 1.6945, "step": 50712 }, { "epoch": 1.69, "grad_norm": 0.5356566309928894, "learning_rate": 0.00024155683814662177, "loss": 1.7167, "step": 50713 }, { "epoch": 1.69, "grad_norm": 0.5278279781341553, "learning_rate": 0.00024154658631884174, "loss": 1.7472, "step": 50714 }, { "epoch": 1.69, "grad_norm": 0.5704594254493713, "learning_rate": 0.0002415363345620156, "loss": 1.7199, "step": 50715 }, { "epoch": 1.69, "grad_norm": 0.5489363670349121, "learning_rate": 0.00024152608287615574, "loss": 1.7137, "step": 50716 }, { "epoch": 1.69, "grad_norm": 0.5610538125038147, "learning_rate": 0.00024151583126127453, "loss": 1.6805, "step": 50717 }, { "epoch": 1.69, "grad_norm": 0.5435957908630371, "learning_rate": 0.00024150557971738457, "loss": 1.7051, "step": 50718 }, { "epoch": 1.69, "grad_norm": 0.5618054866790771, "learning_rate": 0.00024149532824449814, "loss": 1.7583, "step": 50719 }, { "epoch": 1.69, "grad_norm": 0.5697077512741089, "learning_rate": 0.00024148507684262772, "loss": 1.8018, "step": 50720 }, { "epoch": 1.69, "grad_norm": 0.569793164730072, "learning_rate": 0.00024147482551178598, "loss": 1.7519, "step": 50721 }, { "epoch": 1.69, "grad_norm": 0.5614456534385681, "learning_rate": 0.00024146457425198496, "loss": 1.7262, "step": 50722 }, { "epoch": 1.69, "grad_norm": 0.5598570704460144, "learning_rate": 0.00024145432306323743, "loss": 1.7554, "step": 50723 }, { "epoch": 1.69, "grad_norm": 0.5294184684753418, "learning_rate": 0.00024144407194555572, "loss": 1.7845, "step": 50724 }, { "epoch": 1.69, "grad_norm": 0.565438985824585, "learning_rate": 0.0002414338208989523, "loss": 1.7508, "step": 50725 }, { "epoch": 1.69, "grad_norm": 0.5605765581130981, "learning_rate": 0.00024142356992343957, "loss": 1.7851, "step": 50726 }, { "epoch": 1.69, "grad_norm": 0.5389598608016968, "learning_rate": 0.0002414133190190299, "loss": 1.7665, "step": 50727 }, { "epoch": 1.69, "grad_norm": 0.5674965977668762, "learning_rate": 0.000241403068185736, "loss": 1.7308, "step": 50728 }, { "epoch": 1.69, "grad_norm": 0.5855565667152405, "learning_rate": 0.00024139281742357002, "loss": 1.7251, "step": 50729 }, { "epoch": 1.69, "grad_norm": 0.5704495310783386, "learning_rate": 0.00024138256673254455, "loss": 1.7252, "step": 50730 }, { "epoch": 1.69, "grad_norm": 0.5348681807518005, "learning_rate": 0.000241372316112672, "loss": 1.7685, "step": 50731 }, { "epoch": 1.69, "grad_norm": 0.566707193851471, "learning_rate": 0.0002413620655639649, "loss": 1.8206, "step": 50732 }, { "epoch": 1.69, "grad_norm": 0.5508384108543396, "learning_rate": 0.00024135181508643552, "loss": 1.8122, "step": 50733 }, { "epoch": 1.69, "grad_norm": 0.5621923804283142, "learning_rate": 0.00024134156468009636, "loss": 1.7994, "step": 50734 }, { "epoch": 1.69, "grad_norm": 0.533225953578949, "learning_rate": 0.0002413313143449601, "loss": 1.7373, "step": 50735 }, { "epoch": 1.69, "grad_norm": 0.5367751717567444, "learning_rate": 0.00024132106408103875, "loss": 1.7747, "step": 50736 }, { "epoch": 1.69, "grad_norm": 0.5241318345069885, "learning_rate": 0.00024131081388834507, "loss": 1.727, "step": 50737 }, { "epoch": 1.69, "grad_norm": 0.5434748530387878, "learning_rate": 0.0002413005637668915, "loss": 1.7395, "step": 50738 }, { "epoch": 1.69, "grad_norm": 0.5482529997825623, "learning_rate": 0.00024129031371669028, "loss": 1.724, "step": 50739 }, { "epoch": 1.69, "grad_norm": 0.5437292456626892, "learning_rate": 0.000241280063737754, "loss": 1.7231, "step": 50740 }, { "epoch": 1.69, "grad_norm": 0.5506348013877869, "learning_rate": 0.00024126981383009504, "loss": 1.7698, "step": 50741 }, { "epoch": 1.69, "grad_norm": 0.5601083636283875, "learning_rate": 0.000241259563993726, "loss": 1.8009, "step": 50742 }, { "epoch": 1.69, "grad_norm": 0.5278975367546082, "learning_rate": 0.00024124931422865904, "loss": 1.7549, "step": 50743 }, { "epoch": 1.69, "grad_norm": 0.5315637588500977, "learning_rate": 0.0002412390645349068, "loss": 1.7271, "step": 50744 }, { "epoch": 1.69, "grad_norm": 0.5440370440483093, "learning_rate": 0.00024122881491248175, "loss": 1.6936, "step": 50745 }, { "epoch": 1.69, "grad_norm": 0.5739362835884094, "learning_rate": 0.0002412185653613962, "loss": 1.8005, "step": 50746 }, { "epoch": 1.69, "grad_norm": 0.5581881403923035, "learning_rate": 0.00024120831588166265, "loss": 1.7421, "step": 50747 }, { "epoch": 1.69, "grad_norm": 0.5542953610420227, "learning_rate": 0.0002411980664732936, "loss": 1.7402, "step": 50748 }, { "epoch": 1.69, "grad_norm": 0.5476518273353577, "learning_rate": 0.00024118781713630135, "loss": 1.7834, "step": 50749 }, { "epoch": 1.69, "grad_norm": 0.5311501622200012, "learning_rate": 0.0002411775678706984, "loss": 1.6835, "step": 50750 }, { "epoch": 1.69, "grad_norm": 0.5814681649208069, "learning_rate": 0.00024116731867649728, "loss": 1.8598, "step": 50751 }, { "epoch": 1.69, "grad_norm": 0.5604539513587952, "learning_rate": 0.0002411570695537104, "loss": 1.7158, "step": 50752 }, { "epoch": 1.69, "grad_norm": 0.5628437399864197, "learning_rate": 0.0002411468205023501, "loss": 1.751, "step": 50753 }, { "epoch": 1.69, "grad_norm": 0.5707483887672424, "learning_rate": 0.00024113657152242891, "loss": 1.7728, "step": 50754 }, { "epoch": 1.69, "grad_norm": 0.5636557936668396, "learning_rate": 0.00024112632261395932, "loss": 1.7854, "step": 50755 }, { "epoch": 1.69, "grad_norm": 0.5579065680503845, "learning_rate": 0.00024111607377695364, "loss": 1.8009, "step": 50756 }, { "epoch": 1.69, "grad_norm": 0.542186439037323, "learning_rate": 0.00024110582501142428, "loss": 1.6735, "step": 50757 }, { "epoch": 1.69, "grad_norm": 0.5328566431999207, "learning_rate": 0.00024109557631738393, "loss": 1.7217, "step": 50758 }, { "epoch": 1.69, "grad_norm": 0.5524765849113464, "learning_rate": 0.00024108532769484477, "loss": 1.7392, "step": 50759 }, { "epoch": 1.69, "grad_norm": 0.5765082240104675, "learning_rate": 0.00024107507914381934, "loss": 1.79, "step": 50760 }, { "epoch": 1.69, "grad_norm": 0.553237795829773, "learning_rate": 0.0002410648306643201, "loss": 1.8102, "step": 50761 }, { "epoch": 1.69, "grad_norm": 0.5544796586036682, "learning_rate": 0.00024105458225635957, "loss": 1.7696, "step": 50762 }, { "epoch": 1.69, "grad_norm": 0.5361618995666504, "learning_rate": 0.0002410443339199499, "loss": 1.7229, "step": 50763 }, { "epoch": 1.69, "grad_norm": 0.5674282312393188, "learning_rate": 0.0002410340856551038, "loss": 1.7681, "step": 50764 }, { "epoch": 1.69, "grad_norm": 0.5634586215019226, "learning_rate": 0.00024102383746183373, "loss": 1.7643, "step": 50765 }, { "epoch": 1.69, "grad_norm": 0.5495567321777344, "learning_rate": 0.00024101358934015195, "loss": 1.742, "step": 50766 }, { "epoch": 1.69, "grad_norm": 0.5570095181465149, "learning_rate": 0.00024100334129007096, "loss": 1.6782, "step": 50767 }, { "epoch": 1.69, "grad_norm": 0.5986512899398804, "learning_rate": 0.00024099309331160332, "loss": 1.787, "step": 50768 }, { "epoch": 1.69, "grad_norm": 0.536185622215271, "learning_rate": 0.0002409828454047613, "loss": 1.734, "step": 50769 }, { "epoch": 1.69, "grad_norm": 0.5681697726249695, "learning_rate": 0.00024097259756955734, "loss": 1.7468, "step": 50770 }, { "epoch": 1.69, "grad_norm": 0.580894947052002, "learning_rate": 0.000240962349806004, "loss": 1.7245, "step": 50771 }, { "epoch": 1.69, "grad_norm": 0.5491847991943359, "learning_rate": 0.00024095210211411372, "loss": 1.7567, "step": 50772 }, { "epoch": 1.69, "grad_norm": 0.5423597693443298, "learning_rate": 0.00024094185449389887, "loss": 1.7689, "step": 50773 }, { "epoch": 1.69, "grad_norm": 0.5464706420898438, "learning_rate": 0.0002409316069453719, "loss": 1.7074, "step": 50774 }, { "epoch": 1.69, "grad_norm": 0.5681670904159546, "learning_rate": 0.0002409213594685453, "loss": 1.839, "step": 50775 }, { "epoch": 1.69, "grad_norm": 0.553497314453125, "learning_rate": 0.0002409111120634314, "loss": 1.8146, "step": 50776 }, { "epoch": 1.69, "grad_norm": 0.5382890105247498, "learning_rate": 0.00024090086473004262, "loss": 1.7505, "step": 50777 }, { "epoch": 1.69, "grad_norm": 0.5803531408309937, "learning_rate": 0.00024089061746839172, "loss": 1.6873, "step": 50778 }, { "epoch": 1.69, "grad_norm": 0.5567529797554016, "learning_rate": 0.00024088037027849066, "loss": 1.7787, "step": 50779 }, { "epoch": 1.69, "grad_norm": 0.5333929657936096, "learning_rate": 0.0002408701231603522, "loss": 1.7163, "step": 50780 }, { "epoch": 1.69, "grad_norm": 0.5549793243408203, "learning_rate": 0.0002408598761139887, "loss": 1.7741, "step": 50781 }, { "epoch": 1.69, "grad_norm": 0.5369131565093994, "learning_rate": 0.00024084962913941272, "loss": 1.6876, "step": 50782 }, { "epoch": 1.69, "grad_norm": 0.546495795249939, "learning_rate": 0.00024083938223663643, "loss": 1.7936, "step": 50783 }, { "epoch": 1.69, "grad_norm": 0.5456218123435974, "learning_rate": 0.0002408291354056724, "loss": 1.6909, "step": 50784 }, { "epoch": 1.69, "grad_norm": 0.5449771881103516, "learning_rate": 0.00024081888864653324, "loss": 1.7417, "step": 50785 }, { "epoch": 1.69, "grad_norm": 0.5650231242179871, "learning_rate": 0.00024080864195923107, "loss": 1.6984, "step": 50786 }, { "epoch": 1.69, "grad_norm": 0.5224078893661499, "learning_rate": 0.00024079839534377851, "loss": 1.6926, "step": 50787 }, { "epoch": 1.69, "grad_norm": 0.5623385906219482, "learning_rate": 0.00024078814880018807, "loss": 1.7714, "step": 50788 }, { "epoch": 1.69, "grad_norm": 0.5612012147903442, "learning_rate": 0.00024077790232847202, "loss": 1.6949, "step": 50789 }, { "epoch": 1.69, "grad_norm": 1.1194374561309814, "learning_rate": 0.00024076765592864286, "loss": 1.7746, "step": 50790 }, { "epoch": 1.69, "grad_norm": 0.537559986114502, "learning_rate": 0.00024075740960071297, "loss": 1.7819, "step": 50791 }, { "epoch": 1.69, "grad_norm": 0.567331075668335, "learning_rate": 0.0002407471633446951, "loss": 1.7131, "step": 50792 }, { "epoch": 1.69, "grad_norm": 0.5441160798072815, "learning_rate": 0.00024073691716060118, "loss": 1.8093, "step": 50793 }, { "epoch": 1.69, "grad_norm": 0.5472915768623352, "learning_rate": 0.00024072667104844402, "loss": 1.8346, "step": 50794 }, { "epoch": 1.69, "grad_norm": 0.5485895276069641, "learning_rate": 0.000240716425008236, "loss": 1.7878, "step": 50795 }, { "epoch": 1.69, "grad_norm": 0.5616072416305542, "learning_rate": 0.00024070617903998947, "loss": 1.7545, "step": 50796 }, { "epoch": 1.69, "grad_norm": 0.5597974061965942, "learning_rate": 0.00024069593314371684, "loss": 1.7669, "step": 50797 }, { "epoch": 1.69, "grad_norm": 0.8116515874862671, "learning_rate": 0.00024068568731943073, "loss": 1.7598, "step": 50798 }, { "epoch": 1.69, "grad_norm": 0.5907543897628784, "learning_rate": 0.00024067544156714335, "loss": 1.682, "step": 50799 }, { "epoch": 1.69, "grad_norm": 0.560510516166687, "learning_rate": 0.00024066519588686718, "loss": 1.8142, "step": 50800 }, { "epoch": 1.69, "grad_norm": 0.5595701932907104, "learning_rate": 0.0002406549502786148, "loss": 1.689, "step": 50801 }, { "epoch": 1.69, "grad_norm": 0.5406607389450073, "learning_rate": 0.00024064470474239864, "loss": 1.7343, "step": 50802 }, { "epoch": 1.69, "grad_norm": 0.5478097200393677, "learning_rate": 0.000240634459278231, "loss": 1.7111, "step": 50803 }, { "epoch": 1.69, "grad_norm": 0.544177770614624, "learning_rate": 0.00024062421388612435, "loss": 1.7315, "step": 50804 }, { "epoch": 1.69, "grad_norm": 0.5624105930328369, "learning_rate": 0.00024061396856609124, "loss": 1.6349, "step": 50805 }, { "epoch": 1.69, "grad_norm": 0.556736171245575, "learning_rate": 0.00024060372331814395, "loss": 1.824, "step": 50806 }, { "epoch": 1.69, "grad_norm": 0.5789386630058289, "learning_rate": 0.00024059347814229493, "loss": 1.8449, "step": 50807 }, { "epoch": 1.69, "grad_norm": 0.5670645833015442, "learning_rate": 0.00024058323303855687, "loss": 1.7687, "step": 50808 }, { "epoch": 1.69, "grad_norm": 0.5698896050453186, "learning_rate": 0.00024057298800694184, "loss": 1.69, "step": 50809 }, { "epoch": 1.69, "grad_norm": 0.5507078170776367, "learning_rate": 0.00024056274304746247, "loss": 1.7114, "step": 50810 }, { "epoch": 1.69, "grad_norm": 0.5627421140670776, "learning_rate": 0.0002405524981601312, "loss": 1.7647, "step": 50811 }, { "epoch": 1.69, "grad_norm": 0.5555039644241333, "learning_rate": 0.0002405422533449605, "loss": 1.7915, "step": 50812 }, { "epoch": 1.69, "grad_norm": 0.5629549026489258, "learning_rate": 0.00024053200860196266, "loss": 1.6996, "step": 50813 }, { "epoch": 1.69, "grad_norm": 0.5651286244392395, "learning_rate": 0.00024052176393115016, "loss": 1.6842, "step": 50814 }, { "epoch": 1.69, "grad_norm": 0.5398478507995605, "learning_rate": 0.0002405115193325356, "loss": 1.7614, "step": 50815 }, { "epoch": 1.69, "grad_norm": 0.5697923898696899, "learning_rate": 0.0002405012748061312, "loss": 1.7757, "step": 50816 }, { "epoch": 1.69, "grad_norm": 0.5467627048492432, "learning_rate": 0.00024049103035194945, "loss": 1.6984, "step": 50817 }, { "epoch": 1.69, "grad_norm": 0.5588690638542175, "learning_rate": 0.00024048078597000295, "loss": 1.7948, "step": 50818 }, { "epoch": 1.69, "grad_norm": 0.5490161776542664, "learning_rate": 0.00024047054166030394, "loss": 1.7928, "step": 50819 }, { "epoch": 1.69, "grad_norm": 0.531523585319519, "learning_rate": 0.0002404602974228649, "loss": 1.7729, "step": 50820 }, { "epoch": 1.69, "grad_norm": 0.5506786704063416, "learning_rate": 0.00024045005325769824, "loss": 1.7843, "step": 50821 }, { "epoch": 1.69, "grad_norm": 0.5411884784698486, "learning_rate": 0.00024043980916481662, "loss": 1.776, "step": 50822 }, { "epoch": 1.69, "grad_norm": 0.5667155385017395, "learning_rate": 0.0002404295651442321, "loss": 1.789, "step": 50823 }, { "epoch": 1.69, "grad_norm": 0.5605501532554626, "learning_rate": 0.00024041932119595738, "loss": 1.788, "step": 50824 }, { "epoch": 1.69, "grad_norm": 0.5519349575042725, "learning_rate": 0.0002404090773200049, "loss": 1.7434, "step": 50825 }, { "epoch": 1.69, "grad_norm": 0.5303769111633301, "learning_rate": 0.00024039883351638694, "loss": 1.7715, "step": 50826 }, { "epoch": 1.69, "grad_norm": 0.5460119843482971, "learning_rate": 0.00024038858978511603, "loss": 1.7893, "step": 50827 }, { "epoch": 1.69, "grad_norm": 0.5417457222938538, "learning_rate": 0.00024037834612620462, "loss": 1.7925, "step": 50828 }, { "epoch": 1.69, "grad_norm": 2.8687617778778076, "learning_rate": 0.0002403681025396651, "loss": 1.825, "step": 50829 }, { "epoch": 1.69, "grad_norm": 0.5672575235366821, "learning_rate": 0.0002403578590255098, "loss": 1.7638, "step": 50830 }, { "epoch": 1.69, "grad_norm": 0.5499355792999268, "learning_rate": 0.00024034761558375137, "loss": 1.7907, "step": 50831 }, { "epoch": 1.69, "grad_norm": 0.5636878609657288, "learning_rate": 0.0002403373722144022, "loss": 1.7135, "step": 50832 }, { "epoch": 1.69, "grad_norm": 0.5682346224784851, "learning_rate": 0.00024032712891747463, "loss": 1.7982, "step": 50833 }, { "epoch": 1.69, "grad_norm": 0.5973473787307739, "learning_rate": 0.00024031688569298108, "loss": 1.7382, "step": 50834 }, { "epoch": 1.69, "grad_norm": 0.5682016015052795, "learning_rate": 0.00024030664254093411, "loss": 1.7037, "step": 50835 }, { "epoch": 1.69, "grad_norm": 0.5364375114440918, "learning_rate": 0.00024029639946134606, "loss": 1.7698, "step": 50836 }, { "epoch": 1.69, "grad_norm": 0.5457815527915955, "learning_rate": 0.0002402861564542293, "loss": 1.7324, "step": 50837 }, { "epoch": 1.69, "grad_norm": 0.5788703560829163, "learning_rate": 0.00024027591351959656, "loss": 1.8563, "step": 50838 }, { "epoch": 1.69, "grad_norm": 0.5575957298278809, "learning_rate": 0.00024026567065745983, "loss": 1.787, "step": 50839 }, { "epoch": 1.69, "grad_norm": 0.5485098361968994, "learning_rate": 0.00024025542786783186, "loss": 1.6425, "step": 50840 }, { "epoch": 1.69, "grad_norm": 0.5570564270019531, "learning_rate": 0.00024024518515072498, "loss": 1.6848, "step": 50841 }, { "epoch": 1.69, "grad_norm": 0.5642724633216858, "learning_rate": 0.00024023494250615171, "loss": 1.8078, "step": 50842 }, { "epoch": 1.69, "grad_norm": 0.5372195839881897, "learning_rate": 0.00024022469993412434, "loss": 1.7973, "step": 50843 }, { "epoch": 1.69, "grad_norm": 0.5864247679710388, "learning_rate": 0.00024021445743465535, "loss": 1.732, "step": 50844 }, { "epoch": 1.69, "grad_norm": 0.5439503788948059, "learning_rate": 0.00024020421500775739, "loss": 1.7402, "step": 50845 }, { "epoch": 1.69, "grad_norm": 0.5366177558898926, "learning_rate": 0.00024019397265344247, "loss": 1.7817, "step": 50846 }, { "epoch": 1.69, "grad_norm": 0.5498273372650146, "learning_rate": 0.00024018373037172335, "loss": 1.7321, "step": 50847 }, { "epoch": 1.69, "grad_norm": 0.542906641960144, "learning_rate": 0.00024017348816261243, "loss": 1.7645, "step": 50848 }, { "epoch": 1.69, "grad_norm": 0.5366886258125305, "learning_rate": 0.000240163246026122, "loss": 1.7459, "step": 50849 }, { "epoch": 1.69, "grad_norm": 0.5350977182388306, "learning_rate": 0.00024015300396226457, "loss": 1.7586, "step": 50850 }, { "epoch": 1.69, "grad_norm": 0.5449669361114502, "learning_rate": 0.00024014276197105252, "loss": 1.8043, "step": 50851 }, { "epoch": 1.69, "grad_norm": 0.5688813328742981, "learning_rate": 0.0002401325200524985, "loss": 1.7024, "step": 50852 }, { "epoch": 1.69, "grad_norm": 0.5539562702178955, "learning_rate": 0.00024012227820661468, "loss": 1.71, "step": 50853 }, { "epoch": 1.69, "grad_norm": 0.5668025612831116, "learning_rate": 0.00024011203643341359, "loss": 1.8461, "step": 50854 }, { "epoch": 1.69, "grad_norm": 0.5469176769256592, "learning_rate": 0.0002401017947329077, "loss": 1.7258, "step": 50855 }, { "epoch": 1.69, "grad_norm": 0.5384122133255005, "learning_rate": 0.0002400915531051094, "loss": 1.7613, "step": 50856 }, { "epoch": 1.69, "grad_norm": 0.5379449129104614, "learning_rate": 0.0002400813115500311, "loss": 1.7623, "step": 50857 }, { "epoch": 1.69, "grad_norm": 0.5308010578155518, "learning_rate": 0.00024007107006768517, "loss": 1.7093, "step": 50858 }, { "epoch": 1.69, "grad_norm": 0.5413053035736084, "learning_rate": 0.00024006082865808437, "loss": 1.7622, "step": 50859 }, { "epoch": 1.69, "grad_norm": 0.5344851613044739, "learning_rate": 0.0002400505873212407, "loss": 1.71, "step": 50860 }, { "epoch": 1.69, "grad_norm": 0.5295012593269348, "learning_rate": 0.00024004034605716682, "loss": 1.7464, "step": 50861 }, { "epoch": 1.69, "grad_norm": 0.5430314540863037, "learning_rate": 0.00024003010486587517, "loss": 1.7859, "step": 50862 }, { "epoch": 1.69, "grad_norm": 0.5531279444694519, "learning_rate": 0.0002400198637473781, "loss": 1.7471, "step": 50863 }, { "epoch": 1.69, "grad_norm": 0.5510734915733337, "learning_rate": 0.0002400096227016881, "loss": 1.7534, "step": 50864 }, { "epoch": 1.69, "grad_norm": 0.5511283874511719, "learning_rate": 0.0002399993817288176, "loss": 1.8157, "step": 50865 }, { "epoch": 1.69, "grad_norm": 0.5502161979675293, "learning_rate": 0.00023998914082877895, "loss": 1.7204, "step": 50866 }, { "epoch": 1.69, "grad_norm": 0.5253309011459351, "learning_rate": 0.0002399789000015846, "loss": 1.8056, "step": 50867 }, { "epoch": 1.69, "grad_norm": 0.5462256669998169, "learning_rate": 0.00023996865924724711, "loss": 1.7839, "step": 50868 }, { "epoch": 1.69, "grad_norm": 0.5532869696617126, "learning_rate": 0.00023995841856577885, "loss": 1.7636, "step": 50869 }, { "epoch": 1.69, "grad_norm": 0.5508003234863281, "learning_rate": 0.00023994817795719217, "loss": 1.8605, "step": 50870 }, { "epoch": 1.69, "grad_norm": 0.5397890210151672, "learning_rate": 0.00023993793742149955, "loss": 1.7228, "step": 50871 }, { "epoch": 1.69, "grad_norm": 0.5610291957855225, "learning_rate": 0.00023992769695871345, "loss": 1.7571, "step": 50872 }, { "epoch": 1.69, "grad_norm": 0.5545547008514404, "learning_rate": 0.0002399174565688462, "loss": 1.663, "step": 50873 }, { "epoch": 1.69, "grad_norm": 0.5470830798149109, "learning_rate": 0.00023990721625191037, "loss": 1.6903, "step": 50874 }, { "epoch": 1.69, "grad_norm": 0.536590576171875, "learning_rate": 0.00023989697600791836, "loss": 1.683, "step": 50875 }, { "epoch": 1.69, "grad_norm": 0.5491065979003906, "learning_rate": 0.0002398867358368825, "loss": 1.7705, "step": 50876 }, { "epoch": 1.69, "grad_norm": 0.8465623259544373, "learning_rate": 0.00023987649573881532, "loss": 1.7992, "step": 50877 }, { "epoch": 1.69, "grad_norm": 0.5461208820343018, "learning_rate": 0.00023986625571372917, "loss": 1.7334, "step": 50878 }, { "epoch": 1.69, "grad_norm": 0.5342223048210144, "learning_rate": 0.0002398560157616366, "loss": 1.7681, "step": 50879 }, { "epoch": 1.69, "grad_norm": 0.5701367855072021, "learning_rate": 0.00023984577588254986, "loss": 1.7432, "step": 50880 }, { "epoch": 1.69, "grad_norm": 0.546134889125824, "learning_rate": 0.00023983553607648156, "loss": 1.7165, "step": 50881 }, { "epoch": 1.69, "grad_norm": 0.5540945529937744, "learning_rate": 0.0002398252963434441, "loss": 1.7362, "step": 50882 }, { "epoch": 1.69, "grad_norm": 0.5508314371109009, "learning_rate": 0.00023981505668344978, "loss": 1.8543, "step": 50883 }, { "epoch": 1.69, "grad_norm": 0.5506806969642639, "learning_rate": 0.00023980481709651112, "loss": 1.691, "step": 50884 }, { "epoch": 1.69, "grad_norm": 0.5360831022262573, "learning_rate": 0.0002397945775826406, "loss": 1.7484, "step": 50885 }, { "epoch": 1.69, "grad_norm": 1.6377419233322144, "learning_rate": 0.00023978433814185056, "loss": 1.7378, "step": 50886 }, { "epoch": 1.69, "grad_norm": 0.5405424237251282, "learning_rate": 0.0002397740987741534, "loss": 1.7541, "step": 50887 }, { "epoch": 1.69, "grad_norm": 0.5693733096122742, "learning_rate": 0.00023976385947956165, "loss": 1.7768, "step": 50888 }, { "epoch": 1.69, "grad_norm": 0.5366396903991699, "learning_rate": 0.00023975362025808778, "loss": 1.707, "step": 50889 }, { "epoch": 1.69, "grad_norm": 0.533470094203949, "learning_rate": 0.00023974338110974408, "loss": 1.7064, "step": 50890 }, { "epoch": 1.69, "grad_norm": 0.5353431105613708, "learning_rate": 0.000239733142034543, "loss": 1.7626, "step": 50891 }, { "epoch": 1.69, "grad_norm": 0.5483906865119934, "learning_rate": 0.00023972290303249708, "loss": 1.7175, "step": 50892 }, { "epoch": 1.69, "grad_norm": 0.5568409562110901, "learning_rate": 0.00023971266410361864, "loss": 1.7724, "step": 50893 }, { "epoch": 1.69, "grad_norm": 0.5500126481056213, "learning_rate": 0.00023970242524792005, "loss": 1.7361, "step": 50894 }, { "epoch": 1.69, "grad_norm": 0.5461212396621704, "learning_rate": 0.00023969218646541403, "loss": 1.8403, "step": 50895 }, { "epoch": 1.69, "grad_norm": 0.5577062964439392, "learning_rate": 0.00023968194775611263, "loss": 1.772, "step": 50896 }, { "epoch": 1.69, "grad_norm": 0.541329562664032, "learning_rate": 0.0002396717091200285, "loss": 1.7633, "step": 50897 }, { "epoch": 1.69, "grad_norm": 0.5555540919303894, "learning_rate": 0.00023966147055717402, "loss": 1.7851, "step": 50898 }, { "epoch": 1.69, "grad_norm": 0.554610013961792, "learning_rate": 0.00023965123206756174, "loss": 1.7433, "step": 50899 }, { "epoch": 1.69, "grad_norm": 0.5519633293151855, "learning_rate": 0.00023964099365120386, "loss": 1.7384, "step": 50900 }, { "epoch": 1.69, "grad_norm": 0.5789779424667358, "learning_rate": 0.00023963075530811288, "loss": 1.684, "step": 50901 }, { "epoch": 1.69, "grad_norm": 0.5420985221862793, "learning_rate": 0.00023962051703830143, "loss": 1.7264, "step": 50902 }, { "epoch": 1.69, "grad_norm": 0.5376324653625488, "learning_rate": 0.00023961027884178163, "loss": 1.7786, "step": 50903 }, { "epoch": 1.69, "grad_norm": 0.5268796682357788, "learning_rate": 0.0002396000407185661, "loss": 1.6897, "step": 50904 }, { "epoch": 1.69, "grad_norm": 0.5771673917770386, "learning_rate": 0.00023958980266866728, "loss": 1.847, "step": 50905 }, { "epoch": 1.69, "grad_norm": 0.5444858074188232, "learning_rate": 0.00023957956469209744, "loss": 1.7345, "step": 50906 }, { "epoch": 1.69, "grad_norm": 0.5730055570602417, "learning_rate": 0.00023956932678886913, "loss": 1.7533, "step": 50907 }, { "epoch": 1.69, "grad_norm": 0.5697349309921265, "learning_rate": 0.0002395590889589947, "loss": 1.7478, "step": 50908 }, { "epoch": 1.69, "grad_norm": 0.559461236000061, "learning_rate": 0.00023954885120248682, "loss": 1.7566, "step": 50909 }, { "epoch": 1.69, "grad_norm": 0.5501152276992798, "learning_rate": 0.00023953861351935755, "loss": 1.8138, "step": 50910 }, { "epoch": 1.69, "grad_norm": 0.5634045004844666, "learning_rate": 0.00023952837590961954, "loss": 1.6768, "step": 50911 }, { "epoch": 1.69, "grad_norm": 0.5778617858886719, "learning_rate": 0.00023951813837328528, "loss": 1.7279, "step": 50912 }, { "epoch": 1.69, "grad_norm": 0.5665637850761414, "learning_rate": 0.00023950790091036695, "loss": 1.7686, "step": 50913 }, { "epoch": 1.69, "grad_norm": 0.5378374457359314, "learning_rate": 0.00023949766352087713, "loss": 1.6742, "step": 50914 }, { "epoch": 1.69, "grad_norm": 0.5359374284744263, "learning_rate": 0.00023948742620482833, "loss": 1.7469, "step": 50915 }, { "epoch": 1.69, "grad_norm": 0.5443583726882935, "learning_rate": 0.00023947718896223278, "loss": 1.6908, "step": 50916 }, { "epoch": 1.69, "grad_norm": 0.5565851330757141, "learning_rate": 0.000239466951793103, "loss": 1.7439, "step": 50917 }, { "epoch": 1.69, "grad_norm": 0.5556212663650513, "learning_rate": 0.0002394567146974514, "loss": 1.6748, "step": 50918 }, { "epoch": 1.69, "grad_norm": 0.5939849615097046, "learning_rate": 0.00023944647767529056, "loss": 1.7576, "step": 50919 }, { "epoch": 1.69, "grad_norm": 0.55315101146698, "learning_rate": 0.00023943624072663268, "loss": 1.8134, "step": 50920 }, { "epoch": 1.69, "grad_norm": 0.5525588393211365, "learning_rate": 0.0002394260038514903, "loss": 1.7189, "step": 50921 }, { "epoch": 1.69, "grad_norm": 0.5639870166778564, "learning_rate": 0.00023941576704987588, "loss": 1.7875, "step": 50922 }, { "epoch": 1.69, "grad_norm": 0.5518220663070679, "learning_rate": 0.00023940553032180173, "loss": 1.7357, "step": 50923 }, { "epoch": 1.69, "grad_norm": 0.550992488861084, "learning_rate": 0.00023939529366728024, "loss": 1.7107, "step": 50924 }, { "epoch": 1.69, "grad_norm": 0.5516250729560852, "learning_rate": 0.00023938505708632417, "loss": 1.7162, "step": 50925 }, { "epoch": 1.69, "grad_norm": 0.5314817428588867, "learning_rate": 0.00023937482057894552, "loss": 1.7079, "step": 50926 }, { "epoch": 1.69, "grad_norm": 0.5420771241188049, "learning_rate": 0.00023936458414515694, "loss": 1.7747, "step": 50927 }, { "epoch": 1.69, "grad_norm": 1.1658128499984741, "learning_rate": 0.00023935434778497085, "loss": 1.7083, "step": 50928 }, { "epoch": 1.69, "grad_norm": 0.5460724234580994, "learning_rate": 0.00023934411149839968, "loss": 1.7597, "step": 50929 }, { "epoch": 1.69, "grad_norm": 0.5437108278274536, "learning_rate": 0.00023933387528545576, "loss": 1.7587, "step": 50930 }, { "epoch": 1.69, "grad_norm": 1.1873726844787598, "learning_rate": 0.00023932363914615153, "loss": 1.7661, "step": 50931 }, { "epoch": 1.69, "grad_norm": 0.5521501302719116, "learning_rate": 0.00023931340308049967, "loss": 1.7242, "step": 50932 }, { "epoch": 1.69, "grad_norm": 0.5494828224182129, "learning_rate": 0.00023930316708851217, "loss": 1.7866, "step": 50933 }, { "epoch": 1.69, "grad_norm": 0.538560152053833, "learning_rate": 0.00023929293117020176, "loss": 1.7021, "step": 50934 }, { "epoch": 1.69, "grad_norm": 0.5539351105690002, "learning_rate": 0.00023928269532558084, "loss": 1.6916, "step": 50935 }, { "epoch": 1.69, "grad_norm": 0.5488823652267456, "learning_rate": 0.00023927245955466175, "loss": 1.826, "step": 50936 }, { "epoch": 1.69, "grad_norm": 0.5489107966423035, "learning_rate": 0.0002392622238574569, "loss": 1.7119, "step": 50937 }, { "epoch": 1.69, "grad_norm": 0.5564302802085876, "learning_rate": 0.00023925198823397873, "loss": 1.6359, "step": 50938 }, { "epoch": 1.69, "grad_norm": 0.565224289894104, "learning_rate": 0.0002392417526842399, "loss": 1.7978, "step": 50939 }, { "epoch": 1.69, "grad_norm": 0.5451048612594604, "learning_rate": 0.00023923151720825237, "loss": 1.7568, "step": 50940 }, { "epoch": 1.69, "grad_norm": 0.5569504499435425, "learning_rate": 0.00023922128180602894, "loss": 1.7111, "step": 50941 }, { "epoch": 1.69, "grad_norm": 0.5774059891700745, "learning_rate": 0.00023921104647758198, "loss": 1.6724, "step": 50942 }, { "epoch": 1.69, "grad_norm": 0.5822616219520569, "learning_rate": 0.00023920081122292378, "loss": 1.7699, "step": 50943 }, { "epoch": 1.69, "grad_norm": 0.5527772307395935, "learning_rate": 0.00023919057604206685, "loss": 1.7282, "step": 50944 }, { "epoch": 1.69, "grad_norm": 0.5509000420570374, "learning_rate": 0.00023918034093502367, "loss": 1.7744, "step": 50945 }, { "epoch": 1.69, "grad_norm": 0.5653396844863892, "learning_rate": 0.0002391701059018065, "loss": 1.7402, "step": 50946 }, { "epoch": 1.7, "grad_norm": 0.5520338416099548, "learning_rate": 0.0002391598709424278, "loss": 1.7292, "step": 50947 }, { "epoch": 1.7, "grad_norm": 0.5538507699966431, "learning_rate": 0.00023914963605690013, "loss": 1.7464, "step": 50948 }, { "epoch": 1.7, "grad_norm": 0.5675752758979797, "learning_rate": 0.0002391394012452359, "loss": 1.7212, "step": 50949 }, { "epoch": 1.7, "grad_norm": 0.5466784834861755, "learning_rate": 0.0002391291665074474, "loss": 1.7357, "step": 50950 }, { "epoch": 1.7, "grad_norm": 0.5618402361869812, "learning_rate": 0.00023911893184354715, "loss": 1.7086, "step": 50951 }, { "epoch": 1.7, "grad_norm": 0.560531497001648, "learning_rate": 0.00023910869725354757, "loss": 1.6967, "step": 50952 }, { "epoch": 1.7, "grad_norm": 0.5503751039505005, "learning_rate": 0.00023909846273746104, "loss": 1.6331, "step": 50953 }, { "epoch": 1.7, "grad_norm": 0.5750737190246582, "learning_rate": 0.00023908822829529989, "loss": 1.7446, "step": 50954 }, { "epoch": 1.7, "grad_norm": 0.5729526281356812, "learning_rate": 0.00023907799392707685, "loss": 1.7811, "step": 50955 }, { "epoch": 1.7, "grad_norm": 0.559599757194519, "learning_rate": 0.000239067759632804, "loss": 1.7708, "step": 50956 }, { "epoch": 1.7, "grad_norm": 0.5347237586975098, "learning_rate": 0.00023905752541249393, "loss": 1.7546, "step": 50957 }, { "epoch": 1.7, "grad_norm": 0.5488592386245728, "learning_rate": 0.00023904729126615908, "loss": 1.8129, "step": 50958 }, { "epoch": 1.7, "grad_norm": 0.5403181314468384, "learning_rate": 0.0002390370571938119, "loss": 1.7677, "step": 50959 }, { "epoch": 1.7, "grad_norm": 0.5405957102775574, "learning_rate": 0.00023902682319546466, "loss": 1.7867, "step": 50960 }, { "epoch": 1.7, "grad_norm": 0.5479352474212646, "learning_rate": 0.00023901658927112982, "loss": 1.7411, "step": 50961 }, { "epoch": 1.7, "grad_norm": 0.5468878746032715, "learning_rate": 0.00023900635542082005, "loss": 1.7474, "step": 50962 }, { "epoch": 1.7, "grad_norm": 0.5784767866134644, "learning_rate": 0.00023899612164454738, "loss": 1.773, "step": 50963 }, { "epoch": 1.7, "grad_norm": 0.5429801940917969, "learning_rate": 0.0002389858879423245, "loss": 1.656, "step": 50964 }, { "epoch": 1.7, "grad_norm": 0.5521218776702881, "learning_rate": 0.00023897565431416383, "loss": 1.6655, "step": 50965 }, { "epoch": 1.7, "grad_norm": 0.5553819537162781, "learning_rate": 0.00023896542076007766, "loss": 1.7744, "step": 50966 }, { "epoch": 1.7, "grad_norm": 0.550546407699585, "learning_rate": 0.00023895518728007844, "loss": 1.7322, "step": 50967 }, { "epoch": 1.7, "grad_norm": 0.5388622283935547, "learning_rate": 0.0002389449538741786, "loss": 1.6348, "step": 50968 }, { "epoch": 1.7, "grad_norm": 0.5670095086097717, "learning_rate": 0.00023893472054239078, "loss": 1.7681, "step": 50969 }, { "epoch": 1.7, "grad_norm": 0.5692625045776367, "learning_rate": 0.00023892448728472703, "loss": 1.7866, "step": 50970 }, { "epoch": 1.7, "grad_norm": 0.5530820488929749, "learning_rate": 0.00023891425410120003, "loss": 1.7721, "step": 50971 }, { "epoch": 1.7, "grad_norm": 0.5677615404129028, "learning_rate": 0.00023890402099182215, "loss": 1.7608, "step": 50972 }, { "epoch": 1.7, "grad_norm": 0.547063946723938, "learning_rate": 0.00023889378795660573, "loss": 1.7266, "step": 50973 }, { "epoch": 1.7, "grad_norm": 0.5378739833831787, "learning_rate": 0.00023888355499556324, "loss": 1.7917, "step": 50974 }, { "epoch": 1.7, "grad_norm": 0.5286633372306824, "learning_rate": 0.00023887332210870707, "loss": 1.6914, "step": 50975 }, { "epoch": 1.7, "grad_norm": 0.5853333473205566, "learning_rate": 0.00023886308929604985, "loss": 1.6849, "step": 50976 }, { "epoch": 1.7, "grad_norm": 0.5502673387527466, "learning_rate": 0.00023885285655760364, "loss": 1.753, "step": 50977 }, { "epoch": 1.7, "grad_norm": 0.5535087585449219, "learning_rate": 0.00023884262389338115, "loss": 1.7103, "step": 50978 }, { "epoch": 1.7, "grad_norm": 0.5602213144302368, "learning_rate": 0.00023883239130339476, "loss": 1.7587, "step": 50979 }, { "epoch": 1.7, "grad_norm": 0.548349916934967, "learning_rate": 0.00023882215878765677, "loss": 1.7985, "step": 50980 }, { "epoch": 1.7, "grad_norm": 0.5375667214393616, "learning_rate": 0.00023881192634617963, "loss": 1.6539, "step": 50981 }, { "epoch": 1.7, "grad_norm": 0.5506793260574341, "learning_rate": 0.0002388016939789759, "loss": 1.7943, "step": 50982 }, { "epoch": 1.7, "grad_norm": 0.5476860404014587, "learning_rate": 0.0002387914616860578, "loss": 1.7624, "step": 50983 }, { "epoch": 1.7, "grad_norm": 0.542646050453186, "learning_rate": 0.00023878122946743778, "loss": 1.7674, "step": 50984 }, { "epoch": 1.7, "grad_norm": 0.535923182964325, "learning_rate": 0.0002387709973231284, "loss": 1.76, "step": 50985 }, { "epoch": 1.7, "grad_norm": 0.5383492112159729, "learning_rate": 0.00023876076525314207, "loss": 1.6879, "step": 50986 }, { "epoch": 1.7, "grad_norm": 0.5317990183830261, "learning_rate": 0.0002387505332574911, "loss": 1.7424, "step": 50987 }, { "epoch": 1.7, "grad_norm": 0.5353013873100281, "learning_rate": 0.00023874030133618797, "loss": 1.7275, "step": 50988 }, { "epoch": 1.7, "grad_norm": 0.5365278124809265, "learning_rate": 0.00023873006948924513, "loss": 1.6954, "step": 50989 }, { "epoch": 1.7, "grad_norm": 0.5428419709205627, "learning_rate": 0.0002387198377166748, "loss": 1.7202, "step": 50990 }, { "epoch": 1.7, "grad_norm": 0.5639769434928894, "learning_rate": 0.00023870960601848966, "loss": 1.7396, "step": 50991 }, { "epoch": 1.7, "grad_norm": 0.5312120318412781, "learning_rate": 0.0002386993743947021, "loss": 1.7446, "step": 50992 }, { "epoch": 1.7, "grad_norm": 0.5592846870422363, "learning_rate": 0.00023868914284532437, "loss": 1.7139, "step": 50993 }, { "epoch": 1.7, "grad_norm": 0.5366795659065247, "learning_rate": 0.00023867891137036902, "loss": 1.7696, "step": 50994 }, { "epoch": 1.7, "grad_norm": 0.5463972091674805, "learning_rate": 0.00023866867996984842, "loss": 1.7277, "step": 50995 }, { "epoch": 1.7, "grad_norm": 0.5522414445877075, "learning_rate": 0.00023865844864377506, "loss": 1.7594, "step": 50996 }, { "epoch": 1.7, "grad_norm": 0.5411425828933716, "learning_rate": 0.0002386482173921612, "loss": 1.7875, "step": 50997 }, { "epoch": 1.7, "grad_norm": 0.5489552021026611, "learning_rate": 0.00023863798621501944, "loss": 1.7777, "step": 50998 }, { "epoch": 1.7, "grad_norm": 0.550859272480011, "learning_rate": 0.00023862775511236217, "loss": 1.7767, "step": 50999 }, { "epoch": 1.7, "grad_norm": 0.535510241985321, "learning_rate": 0.00023861752408420165, "loss": 1.7177, "step": 51000 }, { "epoch": 1.7, "grad_norm": 0.5550424456596375, "learning_rate": 0.00023860729313055046, "loss": 1.7578, "step": 51001 }, { "epoch": 1.7, "grad_norm": 0.5647943615913391, "learning_rate": 0.00023859706225142104, "loss": 1.7595, "step": 51002 }, { "epoch": 1.7, "grad_norm": 0.5356709957122803, "learning_rate": 0.00023858683144682564, "loss": 1.7244, "step": 51003 }, { "epoch": 1.7, "grad_norm": 0.5705288648605347, "learning_rate": 0.00023857660071677678, "loss": 1.7577, "step": 51004 }, { "epoch": 1.7, "grad_norm": 0.5245755314826965, "learning_rate": 0.00023856637006128688, "loss": 1.6797, "step": 51005 }, { "epoch": 1.7, "grad_norm": 0.5488665103912354, "learning_rate": 0.00023855613948036846, "loss": 1.7796, "step": 51006 }, { "epoch": 1.7, "grad_norm": 0.550074577331543, "learning_rate": 0.00023854590897403374, "loss": 1.7227, "step": 51007 }, { "epoch": 1.7, "grad_norm": 0.5651056170463562, "learning_rate": 0.0002385356785422952, "loss": 1.8262, "step": 51008 }, { "epoch": 1.7, "grad_norm": 0.5697551965713501, "learning_rate": 0.0002385254481851654, "loss": 1.8405, "step": 51009 }, { "epoch": 1.7, "grad_norm": 0.5458357930183411, "learning_rate": 0.00023851521790265657, "loss": 1.7231, "step": 51010 }, { "epoch": 1.7, "grad_norm": 0.5391666889190674, "learning_rate": 0.00023850498769478114, "loss": 1.7179, "step": 51011 }, { "epoch": 1.7, "grad_norm": 0.5532718300819397, "learning_rate": 0.0002384947575615518, "loss": 1.8248, "step": 51012 }, { "epoch": 1.7, "grad_norm": 0.5530332922935486, "learning_rate": 0.00023848452750298058, "loss": 1.8431, "step": 51013 }, { "epoch": 1.7, "grad_norm": 0.533824622631073, "learning_rate": 0.0002384742975190801, "loss": 1.6861, "step": 51014 }, { "epoch": 1.7, "grad_norm": 0.5445839762687683, "learning_rate": 0.00023846406760986283, "loss": 1.7897, "step": 51015 }, { "epoch": 1.7, "grad_norm": 0.5443033576011658, "learning_rate": 0.00023845383777534111, "loss": 1.7198, "step": 51016 }, { "epoch": 1.7, "grad_norm": 0.5500208139419556, "learning_rate": 0.00023844360801552735, "loss": 1.7881, "step": 51017 }, { "epoch": 1.7, "grad_norm": 0.5400317311286926, "learning_rate": 0.00023843337833043387, "loss": 1.7184, "step": 51018 }, { "epoch": 1.7, "grad_norm": 0.547100841999054, "learning_rate": 0.0002384231487200734, "loss": 1.7065, "step": 51019 }, { "epoch": 1.7, "grad_norm": 0.5516229867935181, "learning_rate": 0.000238412919184458, "loss": 1.7798, "step": 51020 }, { "epoch": 1.7, "grad_norm": 0.5562490820884705, "learning_rate": 0.00023840268972360025, "loss": 1.7461, "step": 51021 }, { "epoch": 1.7, "grad_norm": 0.5561516284942627, "learning_rate": 0.00023839246033751266, "loss": 1.7338, "step": 51022 }, { "epoch": 1.7, "grad_norm": 0.5640122890472412, "learning_rate": 0.00023838223102620748, "loss": 1.6941, "step": 51023 }, { "epoch": 1.7, "grad_norm": 0.5796248912811279, "learning_rate": 0.00023837200178969718, "loss": 1.7166, "step": 51024 }, { "epoch": 1.7, "grad_norm": 0.5558468103408813, "learning_rate": 0.00023836177262799414, "loss": 1.8542, "step": 51025 }, { "epoch": 1.7, "grad_norm": 0.5601514577865601, "learning_rate": 0.00023835154354111097, "loss": 1.7581, "step": 51026 }, { "epoch": 1.7, "grad_norm": 0.5382722020149231, "learning_rate": 0.00023834131452905982, "loss": 1.6942, "step": 51027 }, { "epoch": 1.7, "grad_norm": 0.5632455945014954, "learning_rate": 0.00023833108559185326, "loss": 1.7032, "step": 51028 }, { "epoch": 1.7, "grad_norm": 0.5503568649291992, "learning_rate": 0.00023832085672950373, "loss": 1.7456, "step": 51029 }, { "epoch": 1.7, "grad_norm": 0.5633237957954407, "learning_rate": 0.00023831062794202353, "loss": 1.7354, "step": 51030 }, { "epoch": 1.7, "grad_norm": 0.5489751696586609, "learning_rate": 0.00023830039922942517, "loss": 1.8066, "step": 51031 }, { "epoch": 1.7, "grad_norm": 0.5610305070877075, "learning_rate": 0.00023829017059172106, "loss": 1.6897, "step": 51032 }, { "epoch": 1.7, "grad_norm": 0.5934572815895081, "learning_rate": 0.00023827994202892355, "loss": 1.7322, "step": 51033 }, { "epoch": 1.7, "grad_norm": 0.5554972290992737, "learning_rate": 0.00023826971354104503, "loss": 1.7282, "step": 51034 }, { "epoch": 1.7, "grad_norm": 0.5548686981201172, "learning_rate": 0.000238259485128098, "loss": 1.8058, "step": 51035 }, { "epoch": 1.7, "grad_norm": 0.5386963486671448, "learning_rate": 0.00023824925679009497, "loss": 1.6629, "step": 51036 }, { "epoch": 1.7, "grad_norm": 0.547177791595459, "learning_rate": 0.00023823902852704816, "loss": 1.7158, "step": 51037 }, { "epoch": 1.7, "grad_norm": 0.5416169762611389, "learning_rate": 0.00023822880033897012, "loss": 1.8106, "step": 51038 }, { "epoch": 1.7, "grad_norm": 0.5461702346801758, "learning_rate": 0.00023821857222587324, "loss": 1.717, "step": 51039 }, { "epoch": 1.7, "grad_norm": 0.5749426484107971, "learning_rate": 0.00023820834418776982, "loss": 1.7484, "step": 51040 }, { "epoch": 1.7, "grad_norm": 0.572195291519165, "learning_rate": 0.00023819811622467234, "loss": 1.702, "step": 51041 }, { "epoch": 1.7, "grad_norm": 0.5343641042709351, "learning_rate": 0.00023818788833659341, "loss": 1.7581, "step": 51042 }, { "epoch": 1.7, "grad_norm": 0.5471788644790649, "learning_rate": 0.0002381776605235451, "loss": 1.7431, "step": 51043 }, { "epoch": 1.7, "grad_norm": 0.5641005635261536, "learning_rate": 0.00023816743278554006, "loss": 1.6728, "step": 51044 }, { "epoch": 1.7, "grad_norm": 0.5558865070343018, "learning_rate": 0.00023815720512259062, "loss": 1.7744, "step": 51045 }, { "epoch": 1.7, "grad_norm": 0.560497522354126, "learning_rate": 0.00023814697753470935, "loss": 1.7315, "step": 51046 }, { "epoch": 1.7, "grad_norm": 0.51939457654953, "learning_rate": 0.00023813675002190842, "loss": 1.7084, "step": 51047 }, { "epoch": 1.7, "grad_norm": 0.5398678183555603, "learning_rate": 0.0002381265225842003, "loss": 1.7197, "step": 51048 }, { "epoch": 1.7, "grad_norm": 0.5501406788825989, "learning_rate": 0.00023811629522159768, "loss": 1.7149, "step": 51049 }, { "epoch": 1.7, "grad_norm": 0.5651761889457703, "learning_rate": 0.00023810606793411255, "loss": 1.7961, "step": 51050 }, { "epoch": 1.7, "grad_norm": 0.5396514534950256, "learning_rate": 0.00023809584072175758, "loss": 1.8225, "step": 51051 }, { "epoch": 1.7, "grad_norm": 0.549025297164917, "learning_rate": 0.00023808561358454526, "loss": 1.7306, "step": 51052 }, { "epoch": 1.7, "grad_norm": 0.5430842041969299, "learning_rate": 0.00023807538652248776, "loss": 1.7508, "step": 51053 }, { "epoch": 1.7, "grad_norm": 0.5563549399375916, "learning_rate": 0.00023806515953559763, "loss": 1.7994, "step": 51054 }, { "epoch": 1.7, "grad_norm": 0.5467541813850403, "learning_rate": 0.0002380549326238872, "loss": 1.7172, "step": 51055 }, { "epoch": 1.7, "grad_norm": 0.555070161819458, "learning_rate": 0.0002380447057873692, "loss": 1.8186, "step": 51056 }, { "epoch": 1.7, "grad_norm": 0.5467196702957153, "learning_rate": 0.00023803447902605553, "loss": 1.7477, "step": 51057 }, { "epoch": 1.7, "grad_norm": 0.574069082736969, "learning_rate": 0.00023802425233995897, "loss": 1.7251, "step": 51058 }, { "epoch": 1.7, "grad_norm": 0.551986813545227, "learning_rate": 0.00023801402572909194, "loss": 1.7571, "step": 51059 }, { "epoch": 1.7, "grad_norm": 0.5399277210235596, "learning_rate": 0.00023800379919346665, "loss": 1.745, "step": 51060 }, { "epoch": 1.7, "grad_norm": 0.566834568977356, "learning_rate": 0.00023799357273309558, "loss": 1.7092, "step": 51061 }, { "epoch": 1.7, "grad_norm": 0.5455084443092346, "learning_rate": 0.0002379833463479913, "loss": 1.706, "step": 51062 }, { "epoch": 1.7, "grad_norm": 0.5647923350334167, "learning_rate": 0.00023797312003816598, "loss": 1.7302, "step": 51063 }, { "epoch": 1.7, "grad_norm": 0.5505086183547974, "learning_rate": 0.00023796289380363214, "loss": 1.6926, "step": 51064 }, { "epoch": 1.7, "grad_norm": 0.5394046306610107, "learning_rate": 0.00023795266764440224, "loss": 1.6603, "step": 51065 }, { "epoch": 1.7, "grad_norm": 0.5575751662254333, "learning_rate": 0.00023794244156048874, "loss": 1.6141, "step": 51066 }, { "epoch": 1.7, "grad_norm": 0.5646394491195679, "learning_rate": 0.0002379322155519039, "loss": 1.7272, "step": 51067 }, { "epoch": 1.7, "grad_norm": 0.5568341612815857, "learning_rate": 0.00023792198961866017, "loss": 1.81, "step": 51068 }, { "epoch": 1.7, "grad_norm": 0.5318613648414612, "learning_rate": 0.00023791176376077012, "loss": 1.7768, "step": 51069 }, { "epoch": 1.7, "grad_norm": 0.5410919189453125, "learning_rate": 0.00023790153797824596, "loss": 1.7901, "step": 51070 }, { "epoch": 1.7, "grad_norm": 0.5568525195121765, "learning_rate": 0.00023789131227110012, "loss": 1.7829, "step": 51071 }, { "epoch": 1.7, "grad_norm": 0.5400531888008118, "learning_rate": 0.00023788108663934523, "loss": 1.7173, "step": 51072 }, { "epoch": 1.7, "grad_norm": 0.5640048980712891, "learning_rate": 0.0002378708610829934, "loss": 1.7189, "step": 51073 }, { "epoch": 1.7, "grad_norm": 0.5423722863197327, "learning_rate": 0.0002378606356020572, "loss": 1.7251, "step": 51074 }, { "epoch": 1.7, "grad_norm": 0.5370672345161438, "learning_rate": 0.0002378504101965491, "loss": 1.7493, "step": 51075 }, { "epoch": 1.7, "grad_norm": 0.5637511014938354, "learning_rate": 0.00023784018486648148, "loss": 1.7346, "step": 51076 }, { "epoch": 1.7, "grad_norm": 0.5515598654747009, "learning_rate": 0.00023782995961186667, "loss": 1.7113, "step": 51077 }, { "epoch": 1.7, "grad_norm": 0.5440930128097534, "learning_rate": 0.00023781973443271704, "loss": 1.7167, "step": 51078 }, { "epoch": 1.7, "grad_norm": 0.5479474067687988, "learning_rate": 0.0002378095093290453, "loss": 1.8329, "step": 51079 }, { "epoch": 1.7, "grad_norm": 0.5565230846405029, "learning_rate": 0.00023779928430086344, "loss": 1.7082, "step": 51080 }, { "epoch": 1.7, "grad_norm": 0.5532158017158508, "learning_rate": 0.00023778905934818416, "loss": 1.7269, "step": 51081 }, { "epoch": 1.7, "grad_norm": 0.5224722623825073, "learning_rate": 0.00023777883447101983, "loss": 1.685, "step": 51082 }, { "epoch": 1.7, "grad_norm": 0.5619081854820251, "learning_rate": 0.0002377686096693828, "loss": 1.6723, "step": 51083 }, { "epoch": 1.7, "grad_norm": 0.542878270149231, "learning_rate": 0.00023775838494328555, "loss": 1.7378, "step": 51084 }, { "epoch": 1.7, "grad_norm": 0.5496544241905212, "learning_rate": 0.00023774816029274032, "loss": 1.7169, "step": 51085 }, { "epoch": 1.7, "grad_norm": 0.5510363578796387, "learning_rate": 0.00023773793571775983, "loss": 1.8177, "step": 51086 }, { "epoch": 1.7, "grad_norm": 0.5450329780578613, "learning_rate": 0.00023772771121835618, "loss": 1.7066, "step": 51087 }, { "epoch": 1.7, "grad_norm": 0.5438259243965149, "learning_rate": 0.00023771748679454193, "loss": 1.6879, "step": 51088 }, { "epoch": 1.7, "grad_norm": 0.5635567903518677, "learning_rate": 0.00023770726244632957, "loss": 1.7732, "step": 51089 }, { "epoch": 1.7, "grad_norm": 0.5632807016372681, "learning_rate": 0.00023769703817373133, "loss": 1.7761, "step": 51090 }, { "epoch": 1.7, "grad_norm": 0.5566152930259705, "learning_rate": 0.00023768681397675972, "loss": 1.7446, "step": 51091 }, { "epoch": 1.7, "grad_norm": 0.5542683601379395, "learning_rate": 0.0002376765898554272, "loss": 1.7602, "step": 51092 }, { "epoch": 1.7, "grad_norm": 0.529088020324707, "learning_rate": 0.00023766636580974603, "loss": 1.7545, "step": 51093 }, { "epoch": 1.7, "grad_norm": 0.789331316947937, "learning_rate": 0.00023765614183972865, "loss": 1.7376, "step": 51094 }, { "epoch": 1.7, "grad_norm": 0.5449084043502808, "learning_rate": 0.0002376459179453876, "loss": 1.7192, "step": 51095 }, { "epoch": 1.7, "grad_norm": 0.5492725372314453, "learning_rate": 0.00023763569412673527, "loss": 1.7461, "step": 51096 }, { "epoch": 1.7, "grad_norm": 0.5624077320098877, "learning_rate": 0.00023762547038378397, "loss": 1.67, "step": 51097 }, { "epoch": 1.7, "grad_norm": 0.5366540551185608, "learning_rate": 0.00023761524671654614, "loss": 1.7549, "step": 51098 }, { "epoch": 1.7, "grad_norm": 0.5553120374679565, "learning_rate": 0.0002376050231250343, "loss": 1.7731, "step": 51099 }, { "epoch": 1.7, "grad_norm": 0.5570902228355408, "learning_rate": 0.00023759479960926068, "loss": 1.8163, "step": 51100 }, { "epoch": 1.7, "grad_norm": 0.545519232749939, "learning_rate": 0.00023758457616923775, "loss": 1.718, "step": 51101 }, { "epoch": 1.7, "grad_norm": 0.5438597202301025, "learning_rate": 0.000237574352804978, "loss": 1.7183, "step": 51102 }, { "epoch": 1.7, "grad_norm": 0.5445871949195862, "learning_rate": 0.0002375641295164938, "loss": 1.7152, "step": 51103 }, { "epoch": 1.7, "grad_norm": 0.5538864135742188, "learning_rate": 0.00023755390630379755, "loss": 1.6689, "step": 51104 }, { "epoch": 1.7, "grad_norm": 0.5325511693954468, "learning_rate": 0.00023754368316690163, "loss": 1.6636, "step": 51105 }, { "epoch": 1.7, "grad_norm": 0.5429225564002991, "learning_rate": 0.00023753346010581852, "loss": 1.7656, "step": 51106 }, { "epoch": 1.7, "grad_norm": 0.5524380803108215, "learning_rate": 0.0002375232371205605, "loss": 1.7693, "step": 51107 }, { "epoch": 1.7, "grad_norm": 0.5409654974937439, "learning_rate": 0.00023751301421114012, "loss": 1.721, "step": 51108 }, { "epoch": 1.7, "grad_norm": 0.5636852979660034, "learning_rate": 0.00023750279137756976, "loss": 1.7607, "step": 51109 }, { "epoch": 1.7, "grad_norm": 0.550433874130249, "learning_rate": 0.0002374925686198618, "loss": 1.7233, "step": 51110 }, { "epoch": 1.7, "grad_norm": 0.5977261066436768, "learning_rate": 0.00023748234593802856, "loss": 1.81, "step": 51111 }, { "epoch": 1.7, "grad_norm": 0.5638522505760193, "learning_rate": 0.00023747212333208263, "loss": 1.6672, "step": 51112 }, { "epoch": 1.7, "grad_norm": 0.5988169312477112, "learning_rate": 0.00023746190080203638, "loss": 1.796, "step": 51113 }, { "epoch": 1.7, "grad_norm": 0.5421448945999146, "learning_rate": 0.000237451678347902, "loss": 1.753, "step": 51114 }, { "epoch": 1.7, "grad_norm": 0.5556610822677612, "learning_rate": 0.00023744145596969214, "loss": 1.7714, "step": 51115 }, { "epoch": 1.7, "grad_norm": 0.5420179963111877, "learning_rate": 0.00023743123366741923, "loss": 1.7535, "step": 51116 }, { "epoch": 1.7, "grad_norm": 0.5547999143600464, "learning_rate": 0.0002374210114410955, "loss": 1.7298, "step": 51117 }, { "epoch": 1.7, "grad_norm": 0.6073409914970398, "learning_rate": 0.0002374107892907334, "loss": 1.7033, "step": 51118 }, { "epoch": 1.7, "grad_norm": 0.570533812046051, "learning_rate": 0.0002374005672163455, "loss": 1.7317, "step": 51119 }, { "epoch": 1.7, "grad_norm": 0.5409414172172546, "learning_rate": 0.00023739034521794402, "loss": 1.7176, "step": 51120 }, { "epoch": 1.7, "grad_norm": 0.5826696753501892, "learning_rate": 0.00023738012329554132, "loss": 1.7761, "step": 51121 }, { "epoch": 1.7, "grad_norm": 0.5400223731994629, "learning_rate": 0.00023736990144915002, "loss": 1.7532, "step": 51122 }, { "epoch": 1.7, "grad_norm": 0.5676211714744568, "learning_rate": 0.00023735967967878252, "loss": 1.7242, "step": 51123 }, { "epoch": 1.7, "grad_norm": 0.528086245059967, "learning_rate": 0.00023734945798445107, "loss": 1.7199, "step": 51124 }, { "epoch": 1.7, "grad_norm": 0.5816828608512878, "learning_rate": 0.0002373392363661681, "loss": 1.8685, "step": 51125 }, { "epoch": 1.7, "grad_norm": 0.5639647245407104, "learning_rate": 0.00023732901482394618, "loss": 1.7199, "step": 51126 }, { "epoch": 1.7, "grad_norm": 0.5683671236038208, "learning_rate": 0.00023731879335779754, "loss": 1.706, "step": 51127 }, { "epoch": 1.7, "grad_norm": 0.5843198299407959, "learning_rate": 0.00023730857196773455, "loss": 1.7127, "step": 51128 }, { "epoch": 1.7, "grad_norm": 0.5544586181640625, "learning_rate": 0.00023729835065376993, "loss": 1.7261, "step": 51129 }, { "epoch": 1.7, "grad_norm": 0.5482770800590515, "learning_rate": 0.00023728812941591567, "loss": 1.7565, "step": 51130 }, { "epoch": 1.7, "grad_norm": 0.5714203119277954, "learning_rate": 0.00023727790825418447, "loss": 1.7442, "step": 51131 }, { "epoch": 1.7, "grad_norm": 0.564277708530426, "learning_rate": 0.0002372676871685886, "loss": 1.7809, "step": 51132 }, { "epoch": 1.7, "grad_norm": 0.5736176371574402, "learning_rate": 0.00023725746615914063, "loss": 1.8066, "step": 51133 }, { "epoch": 1.7, "grad_norm": 0.5372083187103271, "learning_rate": 0.00023724724522585275, "loss": 1.7584, "step": 51134 }, { "epoch": 1.7, "grad_norm": 0.5372236371040344, "learning_rate": 0.0002372370243687374, "loss": 1.7485, "step": 51135 }, { "epoch": 1.7, "grad_norm": 0.5412325263023376, "learning_rate": 0.00023722680358780727, "loss": 1.7768, "step": 51136 }, { "epoch": 1.7, "grad_norm": 0.5475413203239441, "learning_rate": 0.00023721658288307435, "loss": 1.7509, "step": 51137 }, { "epoch": 1.7, "grad_norm": 0.5534247159957886, "learning_rate": 0.00023720636225455134, "loss": 1.7084, "step": 51138 }, { "epoch": 1.7, "grad_norm": 0.7417031526565552, "learning_rate": 0.00023719614170225058, "loss": 1.8026, "step": 51139 }, { "epoch": 1.7, "grad_norm": 0.5532934665679932, "learning_rate": 0.0002371859212261844, "loss": 1.6984, "step": 51140 }, { "epoch": 1.7, "grad_norm": 0.5665696263313293, "learning_rate": 0.00023717570082636523, "loss": 1.7846, "step": 51141 }, { "epoch": 1.7, "grad_norm": 0.5586739778518677, "learning_rate": 0.00023716548050280547, "loss": 1.7405, "step": 51142 }, { "epoch": 1.7, "grad_norm": 0.5693309307098389, "learning_rate": 0.00023715526025551775, "loss": 1.72, "step": 51143 }, { "epoch": 1.7, "grad_norm": 0.5775690674781799, "learning_rate": 0.00023714504008451412, "loss": 1.7375, "step": 51144 }, { "epoch": 1.7, "grad_norm": 0.5779367685317993, "learning_rate": 0.00023713481998980716, "loss": 1.7586, "step": 51145 }, { "epoch": 1.7, "grad_norm": 0.5361738801002502, "learning_rate": 0.00023712459997140935, "loss": 1.6264, "step": 51146 }, { "epoch": 1.7, "grad_norm": 0.5466064810752869, "learning_rate": 0.00023711438002933295, "loss": 1.7305, "step": 51147 }, { "epoch": 1.7, "grad_norm": 0.5567170977592468, "learning_rate": 0.00023710416016359042, "loss": 1.6961, "step": 51148 }, { "epoch": 1.7, "grad_norm": 0.5528371334075928, "learning_rate": 0.00023709394037419425, "loss": 1.7436, "step": 51149 }, { "epoch": 1.7, "grad_norm": 0.5589989423751831, "learning_rate": 0.0002370837206611567, "loss": 1.7659, "step": 51150 }, { "epoch": 1.7, "grad_norm": 0.548247218132019, "learning_rate": 0.00023707350102449018, "loss": 1.7988, "step": 51151 }, { "epoch": 1.7, "grad_norm": 0.5627917051315308, "learning_rate": 0.00023706328146420722, "loss": 1.7617, "step": 51152 }, { "epoch": 1.7, "grad_norm": 0.5442779660224915, "learning_rate": 0.00023705306198032022, "loss": 1.7279, "step": 51153 }, { "epoch": 1.7, "grad_norm": 0.5482019782066345, "learning_rate": 0.00023704284257284144, "loss": 1.7085, "step": 51154 }, { "epoch": 1.7, "grad_norm": 0.5474017262458801, "learning_rate": 0.0002370326232417834, "loss": 1.7379, "step": 51155 }, { "epoch": 1.7, "grad_norm": 0.541261613368988, "learning_rate": 0.00023702240398715857, "loss": 1.7684, "step": 51156 }, { "epoch": 1.7, "grad_norm": 0.5565887689590454, "learning_rate": 0.00023701218480897914, "loss": 1.8231, "step": 51157 }, { "epoch": 1.7, "grad_norm": 0.5471500754356384, "learning_rate": 0.00023700196570725763, "loss": 1.7644, "step": 51158 }, { "epoch": 1.7, "grad_norm": 0.5553187131881714, "learning_rate": 0.0002369917466820066, "loss": 1.7212, "step": 51159 }, { "epoch": 1.7, "grad_norm": 0.5318270921707153, "learning_rate": 0.00023698152773323812, "loss": 1.7682, "step": 51160 }, { "epoch": 1.7, "grad_norm": 0.5470613241195679, "learning_rate": 0.00023697130886096487, "loss": 1.7493, "step": 51161 }, { "epoch": 1.7, "grad_norm": 0.5409502387046814, "learning_rate": 0.00023696109006519914, "loss": 1.7465, "step": 51162 }, { "epoch": 1.7, "grad_norm": 0.5549856424331665, "learning_rate": 0.0002369508713459534, "loss": 1.8301, "step": 51163 }, { "epoch": 1.7, "grad_norm": 0.5684719085693359, "learning_rate": 0.00023694065270324, "loss": 1.734, "step": 51164 }, { "epoch": 1.7, "grad_norm": 0.5607266426086426, "learning_rate": 0.0002369304341370713, "loss": 1.8146, "step": 51165 }, { "epoch": 1.7, "grad_norm": 0.5577343702316284, "learning_rate": 0.00023692021564745988, "loss": 1.685, "step": 51166 }, { "epoch": 1.7, "grad_norm": 0.5691236257553101, "learning_rate": 0.0002369099972344179, "loss": 1.7578, "step": 51167 }, { "epoch": 1.7, "grad_norm": 0.5437883734703064, "learning_rate": 0.00023689977889795793, "loss": 1.7896, "step": 51168 }, { "epoch": 1.7, "grad_norm": 0.5482512712478638, "learning_rate": 0.00023688956063809238, "loss": 1.7253, "step": 51169 }, { "epoch": 1.7, "grad_norm": 0.5548125505447388, "learning_rate": 0.00023687934245483357, "loss": 1.747, "step": 51170 }, { "epoch": 1.7, "grad_norm": 0.5500358939170837, "learning_rate": 0.00023686912434819392, "loss": 1.6932, "step": 51171 }, { "epoch": 1.7, "grad_norm": 0.5906224846839905, "learning_rate": 0.00023685890631818576, "loss": 1.7312, "step": 51172 }, { "epoch": 1.7, "grad_norm": 0.5478891134262085, "learning_rate": 0.0002368486883648218, "loss": 1.6781, "step": 51173 }, { "epoch": 1.7, "grad_norm": 0.5416739583015442, "learning_rate": 0.00023683847048811406, "loss": 1.7174, "step": 51174 }, { "epoch": 1.7, "grad_norm": 0.5409152507781982, "learning_rate": 0.00023682825268807513, "loss": 1.6932, "step": 51175 }, { "epoch": 1.7, "grad_norm": 0.5819512009620667, "learning_rate": 0.00023681803496471748, "loss": 1.8219, "step": 51176 }, { "epoch": 1.7, "grad_norm": 0.551167905330658, "learning_rate": 0.00023680781731805336, "loss": 1.7036, "step": 51177 }, { "epoch": 1.7, "grad_norm": 0.5707873106002808, "learning_rate": 0.00023679759974809525, "loss": 1.8091, "step": 51178 }, { "epoch": 1.7, "grad_norm": 0.5546646118164062, "learning_rate": 0.00023678738225485558, "loss": 1.7112, "step": 51179 }, { "epoch": 1.7, "grad_norm": 0.562767744064331, "learning_rate": 0.0002367771648383467, "loss": 1.8147, "step": 51180 }, { "epoch": 1.7, "grad_norm": 0.5649248957633972, "learning_rate": 0.0002367669474985809, "loss": 1.6493, "step": 51181 }, { "epoch": 1.7, "grad_norm": 0.5527244210243225, "learning_rate": 0.0002367567302355708, "loss": 1.6501, "step": 51182 }, { "epoch": 1.7, "grad_norm": 0.5374149084091187, "learning_rate": 0.00023674651304932877, "loss": 1.6719, "step": 51183 }, { "epoch": 1.7, "grad_norm": 0.5524653792381287, "learning_rate": 0.0002367362959398671, "loss": 1.7184, "step": 51184 }, { "epoch": 1.7, "grad_norm": 0.5746735334396362, "learning_rate": 0.00023672607890719822, "loss": 1.6988, "step": 51185 }, { "epoch": 1.7, "grad_norm": 0.5529960989952087, "learning_rate": 0.00023671586195133466, "loss": 1.7823, "step": 51186 }, { "epoch": 1.7, "grad_norm": 0.5658115744590759, "learning_rate": 0.00023670564507228864, "loss": 1.8028, "step": 51187 }, { "epoch": 1.7, "grad_norm": 0.542073667049408, "learning_rate": 0.0002366954282700726, "loss": 1.7528, "step": 51188 }, { "epoch": 1.7, "grad_norm": 0.5491815805435181, "learning_rate": 0.00023668521154469914, "loss": 1.6399, "step": 51189 }, { "epoch": 1.7, "grad_norm": 0.7680583596229553, "learning_rate": 0.00023667499489618034, "loss": 1.7799, "step": 51190 }, { "epoch": 1.7, "grad_norm": 0.5431503653526306, "learning_rate": 0.00023666477832452878, "loss": 1.7355, "step": 51191 }, { "epoch": 1.7, "grad_norm": 0.5537592172622681, "learning_rate": 0.00023665456182975686, "loss": 1.8126, "step": 51192 }, { "epoch": 1.7, "grad_norm": 0.5495712757110596, "learning_rate": 0.0002366443454118771, "loss": 1.7312, "step": 51193 }, { "epoch": 1.7, "grad_norm": 0.5379619598388672, "learning_rate": 0.00023663412907090162, "loss": 1.7185, "step": 51194 }, { "epoch": 1.7, "grad_norm": 0.5557471513748169, "learning_rate": 0.00023662391280684296, "loss": 1.715, "step": 51195 }, { "epoch": 1.7, "grad_norm": 0.5697657465934753, "learning_rate": 0.0002366136966197137, "loss": 1.7442, "step": 51196 }, { "epoch": 1.7, "grad_norm": 0.5764588713645935, "learning_rate": 0.00023660348050952588, "loss": 1.7953, "step": 51197 }, { "epoch": 1.7, "grad_norm": 0.5444539785385132, "learning_rate": 0.0002365932644762922, "loss": 1.821, "step": 51198 }, { "epoch": 1.7, "grad_norm": 0.5395100712776184, "learning_rate": 0.00023658304852002495, "loss": 1.769, "step": 51199 }, { "epoch": 1.7, "grad_norm": 0.5773527026176453, "learning_rate": 0.00023657283264073652, "loss": 1.8166, "step": 51200 }, { "epoch": 1.7, "grad_norm": 0.5593236684799194, "learning_rate": 0.00023656261683843932, "loss": 1.7155, "step": 51201 }, { "epoch": 1.7, "grad_norm": 0.5588834285736084, "learning_rate": 0.00023655240111314566, "loss": 1.7947, "step": 51202 }, { "epoch": 1.7, "grad_norm": 0.5550607442855835, "learning_rate": 0.00023654218546486825, "loss": 1.7137, "step": 51203 }, { "epoch": 1.7, "grad_norm": 0.5381916761398315, "learning_rate": 0.00023653196989361906, "loss": 1.7544, "step": 51204 }, { "epoch": 1.7, "grad_norm": 0.5512942671775818, "learning_rate": 0.00023652175439941081, "loss": 1.7059, "step": 51205 }, { "epoch": 1.7, "grad_norm": 0.5550645589828491, "learning_rate": 0.00023651153898225587, "loss": 1.7771, "step": 51206 }, { "epoch": 1.7, "grad_norm": 0.5479092597961426, "learning_rate": 0.00023650132364216644, "loss": 1.6721, "step": 51207 }, { "epoch": 1.7, "grad_norm": 0.553392231464386, "learning_rate": 0.0002364911083791551, "loss": 1.7075, "step": 51208 }, { "epoch": 1.7, "grad_norm": 0.5324203968048096, "learning_rate": 0.00023648089319323426, "loss": 1.7326, "step": 51209 }, { "epoch": 1.7, "grad_norm": 0.5466927289962769, "learning_rate": 0.0002364706780844162, "loss": 1.7808, "step": 51210 }, { "epoch": 1.7, "grad_norm": 0.5300619602203369, "learning_rate": 0.0002364604630527133, "loss": 1.7261, "step": 51211 }, { "epoch": 1.7, "grad_norm": 0.541054904460907, "learning_rate": 0.0002364502480981381, "loss": 1.6685, "step": 51212 }, { "epoch": 1.7, "grad_norm": 0.5443156361579895, "learning_rate": 0.00023644003322070297, "loss": 1.7638, "step": 51213 }, { "epoch": 1.7, "grad_norm": 0.5521615743637085, "learning_rate": 0.00023642981842042022, "loss": 1.7366, "step": 51214 }, { "epoch": 1.7, "grad_norm": 0.5496729612350464, "learning_rate": 0.00023641960369730233, "loss": 1.706, "step": 51215 }, { "epoch": 1.7, "grad_norm": 0.5531584024429321, "learning_rate": 0.00023640938905136172, "loss": 1.6891, "step": 51216 }, { "epoch": 1.7, "grad_norm": 0.5492432713508606, "learning_rate": 0.0002363991744826106, "loss": 1.7632, "step": 51217 }, { "epoch": 1.7, "grad_norm": 0.554458498954773, "learning_rate": 0.00023638895999106158, "loss": 1.73, "step": 51218 }, { "epoch": 1.7, "grad_norm": 0.5674386620521545, "learning_rate": 0.000236378745576727, "loss": 1.695, "step": 51219 }, { "epoch": 1.7, "grad_norm": 0.5579362511634827, "learning_rate": 0.00023636853123961934, "loss": 1.7113, "step": 51220 }, { "epoch": 1.7, "grad_norm": 0.5630539655685425, "learning_rate": 0.00023635831697975084, "loss": 1.7569, "step": 51221 }, { "epoch": 1.7, "grad_norm": 0.5581981539726257, "learning_rate": 0.00023634810279713392, "loss": 1.7244, "step": 51222 }, { "epoch": 1.7, "grad_norm": 0.5363229513168335, "learning_rate": 0.00023633788869178112, "loss": 1.7286, "step": 51223 }, { "epoch": 1.7, "grad_norm": 0.5502913594245911, "learning_rate": 0.0002363276746637046, "loss": 1.6968, "step": 51224 }, { "epoch": 1.7, "grad_norm": 0.5406908988952637, "learning_rate": 0.00023631746071291698, "loss": 1.7283, "step": 51225 }, { "epoch": 1.7, "grad_norm": 0.548937976360321, "learning_rate": 0.00023630724683943065, "loss": 1.7483, "step": 51226 }, { "epoch": 1.7, "grad_norm": 0.5389089584350586, "learning_rate": 0.00023629703304325783, "loss": 1.7507, "step": 51227 }, { "epoch": 1.7, "grad_norm": 0.5719337463378906, "learning_rate": 0.00023628681932441107, "loss": 1.7381, "step": 51228 }, { "epoch": 1.7, "grad_norm": 0.5280478596687317, "learning_rate": 0.0002362766056829027, "loss": 1.7518, "step": 51229 }, { "epoch": 1.7, "grad_norm": 0.5780336856842041, "learning_rate": 0.00023626639211874522, "loss": 1.764, "step": 51230 }, { "epoch": 1.7, "grad_norm": 0.58797687292099, "learning_rate": 0.00023625617863195082, "loss": 1.7695, "step": 51231 }, { "epoch": 1.7, "grad_norm": 0.5370530486106873, "learning_rate": 0.00023624596522253208, "loss": 1.6765, "step": 51232 }, { "epoch": 1.7, "grad_norm": 0.549171507358551, "learning_rate": 0.00023623575189050144, "loss": 1.8083, "step": 51233 }, { "epoch": 1.7, "grad_norm": 0.5451832413673401, "learning_rate": 0.00023622553863587108, "loss": 1.7449, "step": 51234 }, { "epoch": 1.7, "grad_norm": 0.5584624409675598, "learning_rate": 0.00023621532545865356, "loss": 1.7767, "step": 51235 }, { "epoch": 1.7, "grad_norm": 0.5482407808303833, "learning_rate": 0.0002362051123588613, "loss": 1.7702, "step": 51236 }, { "epoch": 1.7, "grad_norm": 0.5732423663139343, "learning_rate": 0.00023619489933650656, "loss": 1.7974, "step": 51237 }, { "epoch": 1.7, "grad_norm": 0.5588244199752808, "learning_rate": 0.00023618468639160177, "loss": 1.8579, "step": 51238 }, { "epoch": 1.7, "grad_norm": 0.5496534705162048, "learning_rate": 0.00023617447352415938, "loss": 1.8084, "step": 51239 }, { "epoch": 1.7, "grad_norm": 0.5379568338394165, "learning_rate": 0.00023616426073419188, "loss": 1.7275, "step": 51240 }, { "epoch": 1.7, "grad_norm": 0.5636615753173828, "learning_rate": 0.00023615404802171153, "loss": 1.7489, "step": 51241 }, { "epoch": 1.7, "grad_norm": 0.5709517598152161, "learning_rate": 0.0002361438353867307, "loss": 1.7905, "step": 51242 }, { "epoch": 1.7, "grad_norm": 0.5455830693244934, "learning_rate": 0.0002361336228292619, "loss": 1.7785, "step": 51243 }, { "epoch": 1.7, "grad_norm": 0.5575234889984131, "learning_rate": 0.00023612341034931743, "loss": 1.7725, "step": 51244 }, { "epoch": 1.7, "grad_norm": 0.5651647448539734, "learning_rate": 0.0002361131979469097, "loss": 1.7869, "step": 51245 }, { "epoch": 1.7, "grad_norm": 0.5652443766593933, "learning_rate": 0.00023610298562205128, "loss": 1.7642, "step": 51246 }, { "epoch": 1.7, "grad_norm": 0.5449950695037842, "learning_rate": 0.00023609277337475424, "loss": 1.7186, "step": 51247 }, { "epoch": 1.71, "grad_norm": 0.5419553518295288, "learning_rate": 0.0002360825612050312, "loss": 1.6719, "step": 51248 }, { "epoch": 1.71, "grad_norm": 0.5544047951698303, "learning_rate": 0.00023607234911289456, "loss": 1.7206, "step": 51249 }, { "epoch": 1.71, "grad_norm": 0.5652027130126953, "learning_rate": 0.00023606213709835672, "loss": 1.801, "step": 51250 }, { "epoch": 1.71, "grad_norm": 0.5593137145042419, "learning_rate": 0.00023605192516142998, "loss": 1.7355, "step": 51251 }, { "epoch": 1.71, "grad_norm": 0.5356287360191345, "learning_rate": 0.00023604171330212666, "loss": 1.7196, "step": 51252 }, { "epoch": 1.71, "grad_norm": 0.540913999080658, "learning_rate": 0.0002360315015204595, "loss": 1.7258, "step": 51253 }, { "epoch": 1.71, "grad_norm": 0.5364275574684143, "learning_rate": 0.00023602128981644048, "loss": 1.7339, "step": 51254 }, { "epoch": 1.71, "grad_norm": 0.5357597470283508, "learning_rate": 0.00023601107819008226, "loss": 1.7092, "step": 51255 }, { "epoch": 1.71, "grad_norm": 0.5396129488945007, "learning_rate": 0.00023600086664139717, "loss": 1.7428, "step": 51256 }, { "epoch": 1.71, "grad_norm": 0.5302918553352356, "learning_rate": 0.0002359906551703976, "loss": 1.7865, "step": 51257 }, { "epoch": 1.71, "grad_norm": 0.5414015054702759, "learning_rate": 0.0002359804437770959, "loss": 1.7397, "step": 51258 }, { "epoch": 1.71, "grad_norm": 0.5480014085769653, "learning_rate": 0.00023597023246150448, "loss": 1.6858, "step": 51259 }, { "epoch": 1.71, "grad_norm": 0.5591943860054016, "learning_rate": 0.00023596002122363595, "loss": 1.7665, "step": 51260 }, { "epoch": 1.71, "grad_norm": 0.5452792048454285, "learning_rate": 0.0002359498100635023, "loss": 1.7593, "step": 51261 }, { "epoch": 1.71, "grad_norm": 0.553611159324646, "learning_rate": 0.00023593959898111623, "loss": 1.6744, "step": 51262 }, { "epoch": 1.71, "grad_norm": 0.5471647381782532, "learning_rate": 0.0002359293879764901, "loss": 1.769, "step": 51263 }, { "epoch": 1.71, "grad_norm": 0.5258053541183472, "learning_rate": 0.0002359191770496362, "loss": 1.8008, "step": 51264 }, { "epoch": 1.71, "grad_norm": 0.5553575754165649, "learning_rate": 0.000235908966200567, "loss": 1.772, "step": 51265 }, { "epoch": 1.71, "grad_norm": 0.550393283367157, "learning_rate": 0.0002358987554292949, "loss": 1.7896, "step": 51266 }, { "epoch": 1.71, "grad_norm": 0.5334031581878662, "learning_rate": 0.0002358885447358322, "loss": 1.8241, "step": 51267 }, { "epoch": 1.71, "grad_norm": 0.5759932398796082, "learning_rate": 0.00023587833412019128, "loss": 1.7815, "step": 51268 }, { "epoch": 1.71, "grad_norm": 0.5486210584640503, "learning_rate": 0.00023586812358238472, "loss": 1.8165, "step": 51269 }, { "epoch": 1.71, "grad_norm": 0.573795735836029, "learning_rate": 0.00023585791312242486, "loss": 1.8169, "step": 51270 }, { "epoch": 1.71, "grad_norm": 0.5455682277679443, "learning_rate": 0.00023584770274032396, "loss": 1.7332, "step": 51271 }, { "epoch": 1.71, "grad_norm": 0.5376877784729004, "learning_rate": 0.00023583749243609453, "loss": 1.7536, "step": 51272 }, { "epoch": 1.71, "grad_norm": 0.559729278087616, "learning_rate": 0.00023582728220974898, "loss": 1.7236, "step": 51273 }, { "epoch": 1.71, "grad_norm": 0.5916399955749512, "learning_rate": 0.0002358170720612996, "loss": 1.7891, "step": 51274 }, { "epoch": 1.71, "grad_norm": 0.5770192742347717, "learning_rate": 0.00023580686199075874, "loss": 1.7993, "step": 51275 }, { "epoch": 1.71, "grad_norm": 0.5598865151405334, "learning_rate": 0.0002357966519981391, "loss": 1.7387, "step": 51276 }, { "epoch": 1.71, "grad_norm": 0.5612307190895081, "learning_rate": 0.00023578644208345267, "loss": 1.7597, "step": 51277 }, { "epoch": 1.71, "grad_norm": 0.5617037415504456, "learning_rate": 0.00023577623224671214, "loss": 1.7163, "step": 51278 }, { "epoch": 1.71, "grad_norm": 0.540126383304596, "learning_rate": 0.00023576602248792975, "loss": 1.7331, "step": 51279 }, { "epoch": 1.71, "grad_norm": 0.5560431480407715, "learning_rate": 0.00023575581280711803, "loss": 1.765, "step": 51280 }, { "epoch": 1.71, "grad_norm": 0.5606785416603088, "learning_rate": 0.0002357456032042892, "loss": 1.753, "step": 51281 }, { "epoch": 1.71, "grad_norm": 0.5489160418510437, "learning_rate": 0.00023573539367945573, "loss": 1.7591, "step": 51282 }, { "epoch": 1.71, "grad_norm": 0.5607828497886658, "learning_rate": 0.00023572518423263018, "loss": 1.6844, "step": 51283 }, { "epoch": 1.71, "grad_norm": 0.5365282297134399, "learning_rate": 0.00023571497486382458, "loss": 1.803, "step": 51284 }, { "epoch": 1.71, "grad_norm": 0.5542569756507874, "learning_rate": 0.00023570476557305161, "loss": 1.7253, "step": 51285 }, { "epoch": 1.71, "grad_norm": 0.5514293313026428, "learning_rate": 0.00023569455636032364, "loss": 1.7783, "step": 51286 }, { "epoch": 1.71, "grad_norm": 0.5381231904029846, "learning_rate": 0.00023568434722565296, "loss": 1.7942, "step": 51287 }, { "epoch": 1.71, "grad_norm": 0.5588956475257874, "learning_rate": 0.00023567413816905197, "loss": 1.6846, "step": 51288 }, { "epoch": 1.71, "grad_norm": 0.5743208527565002, "learning_rate": 0.00023566392919053308, "loss": 1.6919, "step": 51289 }, { "epoch": 1.71, "grad_norm": 0.5585231184959412, "learning_rate": 0.00023565372029010884, "loss": 1.7953, "step": 51290 }, { "epoch": 1.71, "grad_norm": 0.5623801350593567, "learning_rate": 0.00023564351146779136, "loss": 1.7287, "step": 51291 }, { "epoch": 1.71, "grad_norm": 0.5653581619262695, "learning_rate": 0.0002356333027235932, "loss": 1.8294, "step": 51292 }, { "epoch": 1.71, "grad_norm": 0.5629050731658936, "learning_rate": 0.00023562309405752679, "loss": 1.7084, "step": 51293 }, { "epoch": 1.71, "grad_norm": 0.5546223521232605, "learning_rate": 0.00023561288546960444, "loss": 1.7424, "step": 51294 }, { "epoch": 1.71, "grad_norm": 0.5362226366996765, "learning_rate": 0.0002356026769598385, "loss": 1.7275, "step": 51295 }, { "epoch": 1.71, "grad_norm": 0.562792956829071, "learning_rate": 0.00023559246852824153, "loss": 1.8078, "step": 51296 }, { "epoch": 1.71, "grad_norm": 0.55405193567276, "learning_rate": 0.00023558226017482574, "loss": 1.7138, "step": 51297 }, { "epoch": 1.71, "grad_norm": 0.5520557761192322, "learning_rate": 0.0002355720518996035, "loss": 1.7526, "step": 51298 }, { "epoch": 1.71, "grad_norm": 0.5424163341522217, "learning_rate": 0.0002355618437025874, "loss": 1.8078, "step": 51299 }, { "epoch": 1.71, "grad_norm": 0.5634424686431885, "learning_rate": 0.00023555163558378978, "loss": 1.6753, "step": 51300 }, { "epoch": 1.71, "grad_norm": 0.5363824963569641, "learning_rate": 0.0002355414275432229, "loss": 1.7485, "step": 51301 }, { "epoch": 1.71, "grad_norm": 0.5709239840507507, "learning_rate": 0.00023553121958089926, "loss": 1.7957, "step": 51302 }, { "epoch": 1.71, "grad_norm": 0.5471879243850708, "learning_rate": 0.00023552101169683126, "loss": 1.7391, "step": 51303 }, { "epoch": 1.71, "grad_norm": 0.560386598110199, "learning_rate": 0.0002355108038910312, "loss": 1.7914, "step": 51304 }, { "epoch": 1.71, "grad_norm": 0.5392693281173706, "learning_rate": 0.00023550059616351144, "loss": 1.6872, "step": 51305 }, { "epoch": 1.71, "grad_norm": 0.5559350252151489, "learning_rate": 0.00023549038851428466, "loss": 1.7082, "step": 51306 }, { "epoch": 1.71, "grad_norm": 0.558051347732544, "learning_rate": 0.00023548018094336285, "loss": 1.7714, "step": 51307 }, { "epoch": 1.71, "grad_norm": 0.61374431848526, "learning_rate": 0.00023546997345075865, "loss": 1.7635, "step": 51308 }, { "epoch": 1.71, "grad_norm": 0.5600878000259399, "learning_rate": 0.0002354597660364844, "loss": 1.7517, "step": 51309 }, { "epoch": 1.71, "grad_norm": 0.5445536375045776, "learning_rate": 0.00023544955870055255, "loss": 1.7206, "step": 51310 }, { "epoch": 1.71, "grad_norm": 0.5517145395278931, "learning_rate": 0.00023543935144297536, "loss": 1.7972, "step": 51311 }, { "epoch": 1.71, "grad_norm": 0.5314270853996277, "learning_rate": 0.0002354291442637652, "loss": 1.6605, "step": 51312 }, { "epoch": 1.71, "grad_norm": 0.6386781930923462, "learning_rate": 0.00023541893716293478, "loss": 1.7411, "step": 51313 }, { "epoch": 1.71, "grad_norm": 0.5732927918434143, "learning_rate": 0.00023540873014049602, "loss": 1.7384, "step": 51314 }, { "epoch": 1.71, "grad_norm": 0.5759111046791077, "learning_rate": 0.00023539852319646163, "loss": 1.7857, "step": 51315 }, { "epoch": 1.71, "grad_norm": 0.5461474061012268, "learning_rate": 0.00023538831633084398, "loss": 1.7471, "step": 51316 }, { "epoch": 1.71, "grad_norm": 0.5403640270233154, "learning_rate": 0.0002353781095436553, "loss": 1.8198, "step": 51317 }, { "epoch": 1.71, "grad_norm": 1.4124716520309448, "learning_rate": 0.00023536790283490812, "loss": 1.7691, "step": 51318 }, { "epoch": 1.71, "grad_norm": 0.5768179893493652, "learning_rate": 0.0002353576962046147, "loss": 1.724, "step": 51319 }, { "epoch": 1.71, "grad_norm": 0.5890918374061584, "learning_rate": 0.0002353474896527877, "loss": 1.7845, "step": 51320 }, { "epoch": 1.71, "grad_norm": 0.5469356775283813, "learning_rate": 0.0002353372831794391, "loss": 1.7494, "step": 51321 }, { "epoch": 1.71, "grad_norm": 0.5624076128005981, "learning_rate": 0.00023532707678458161, "loss": 1.7596, "step": 51322 }, { "epoch": 1.71, "grad_norm": 0.551033079624176, "learning_rate": 0.0002353168704682276, "loss": 1.7264, "step": 51323 }, { "epoch": 1.71, "grad_norm": 0.5650011897087097, "learning_rate": 0.00023530666423038932, "loss": 1.7978, "step": 51324 }, { "epoch": 1.71, "grad_norm": 0.5353845953941345, "learning_rate": 0.00023529645807107916, "loss": 1.7018, "step": 51325 }, { "epoch": 1.71, "grad_norm": 0.5458130836486816, "learning_rate": 0.00023528625199030966, "loss": 1.7844, "step": 51326 }, { "epoch": 1.71, "grad_norm": 0.5729832649230957, "learning_rate": 0.00023527604598809306, "loss": 1.8088, "step": 51327 }, { "epoch": 1.71, "grad_norm": 0.5596246123313904, "learning_rate": 0.00023526584006444172, "loss": 1.7929, "step": 51328 }, { "epoch": 1.71, "grad_norm": 0.5508606433868408, "learning_rate": 0.00023525563421936818, "loss": 1.6552, "step": 51329 }, { "epoch": 1.71, "grad_norm": 0.5724422931671143, "learning_rate": 0.00023524542845288485, "loss": 1.7232, "step": 51330 }, { "epoch": 1.71, "grad_norm": 0.5461018681526184, "learning_rate": 0.00023523522276500393, "loss": 1.6874, "step": 51331 }, { "epoch": 1.71, "grad_norm": 0.5290303230285645, "learning_rate": 0.00023522501715573792, "loss": 1.6774, "step": 51332 }, { "epoch": 1.71, "grad_norm": 0.5463171005249023, "learning_rate": 0.00023521481162509927, "loss": 1.7583, "step": 51333 }, { "epoch": 1.71, "grad_norm": 0.5234887599945068, "learning_rate": 0.00023520460617310017, "loss": 1.6361, "step": 51334 }, { "epoch": 1.71, "grad_norm": 0.5689430832862854, "learning_rate": 0.00023519440079975318, "loss": 1.7531, "step": 51335 }, { "epoch": 1.71, "grad_norm": 0.5508560538291931, "learning_rate": 0.00023518419550507062, "loss": 1.7475, "step": 51336 }, { "epoch": 1.71, "grad_norm": 0.5459592342376709, "learning_rate": 0.000235173990289065, "loss": 1.7854, "step": 51337 }, { "epoch": 1.71, "grad_norm": 0.5548089146614075, "learning_rate": 0.00023516378515174853, "loss": 1.7073, "step": 51338 }, { "epoch": 1.71, "grad_norm": 0.5593104958534241, "learning_rate": 0.00023515358009313365, "loss": 1.7155, "step": 51339 }, { "epoch": 1.71, "grad_norm": 0.5577725768089294, "learning_rate": 0.00023514337511323287, "loss": 1.7408, "step": 51340 }, { "epoch": 1.71, "grad_norm": 0.5524041056632996, "learning_rate": 0.0002351331702120583, "loss": 1.7015, "step": 51341 }, { "epoch": 1.71, "grad_norm": 0.5433574318885803, "learning_rate": 0.0002351229653896226, "loss": 1.752, "step": 51342 }, { "epoch": 1.71, "grad_norm": 0.5321803689002991, "learning_rate": 0.0002351127606459381, "loss": 1.6893, "step": 51343 }, { "epoch": 1.71, "grad_norm": 0.5443305373191833, "learning_rate": 0.0002351025559810171, "loss": 1.734, "step": 51344 }, { "epoch": 1.71, "grad_norm": 0.5559456944465637, "learning_rate": 0.00023509235139487205, "loss": 1.838, "step": 51345 }, { "epoch": 1.71, "grad_norm": 0.5485798120498657, "learning_rate": 0.0002350821468875153, "loss": 1.6408, "step": 51346 }, { "epoch": 1.71, "grad_norm": 0.5833693742752075, "learning_rate": 0.00023507194245895934, "loss": 1.7795, "step": 51347 }, { "epoch": 1.71, "grad_norm": 0.543149471282959, "learning_rate": 0.00023506173810921636, "loss": 1.7853, "step": 51348 }, { "epoch": 1.71, "grad_norm": 0.558637261390686, "learning_rate": 0.00023505153383829888, "loss": 1.8141, "step": 51349 }, { "epoch": 1.71, "grad_norm": 0.5544869899749756, "learning_rate": 0.00023504132964621938, "loss": 1.7191, "step": 51350 }, { "epoch": 1.71, "grad_norm": 0.5429012775421143, "learning_rate": 0.00023503112553299003, "loss": 1.7995, "step": 51351 }, { "epoch": 1.71, "grad_norm": 0.5573044419288635, "learning_rate": 0.00023502092149862335, "loss": 1.7831, "step": 51352 }, { "epoch": 1.71, "grad_norm": 0.5537064671516418, "learning_rate": 0.00023501071754313172, "loss": 1.783, "step": 51353 }, { "epoch": 1.71, "grad_norm": 0.5588904619216919, "learning_rate": 0.0002350005136665275, "loss": 1.6823, "step": 51354 }, { "epoch": 1.71, "grad_norm": 0.5428593754768372, "learning_rate": 0.00023499030986882295, "loss": 1.6406, "step": 51355 }, { "epoch": 1.71, "grad_norm": 0.5373088717460632, "learning_rate": 0.00023498010615003072, "loss": 1.726, "step": 51356 }, { "epoch": 1.71, "grad_norm": 0.5563303828239441, "learning_rate": 0.00023496990251016309, "loss": 1.7448, "step": 51357 }, { "epoch": 1.71, "grad_norm": 0.5415096879005432, "learning_rate": 0.0002349596989492323, "loss": 1.6549, "step": 51358 }, { "epoch": 1.71, "grad_norm": 0.5784613490104675, "learning_rate": 0.00023494949546725092, "loss": 1.7944, "step": 51359 }, { "epoch": 1.71, "grad_norm": 0.5449806451797485, "learning_rate": 0.00023493929206423132, "loss": 1.7675, "step": 51360 }, { "epoch": 1.71, "grad_norm": 0.5540459752082825, "learning_rate": 0.00023492908874018573, "loss": 1.7507, "step": 51361 }, { "epoch": 1.71, "grad_norm": 0.5500959157943726, "learning_rate": 0.00023491888549512664, "loss": 1.8212, "step": 51362 }, { "epoch": 1.71, "grad_norm": 0.5850027203559875, "learning_rate": 0.0002349086823290666, "loss": 1.7607, "step": 51363 }, { "epoch": 1.71, "grad_norm": 0.5641365051269531, "learning_rate": 0.0002348984792420176, "loss": 1.7372, "step": 51364 }, { "epoch": 1.71, "grad_norm": 0.5570988059043884, "learning_rate": 0.00023488827623399235, "loss": 1.6865, "step": 51365 }, { "epoch": 1.71, "grad_norm": 0.5320091247558594, "learning_rate": 0.00023487807330500315, "loss": 1.6825, "step": 51366 }, { "epoch": 1.71, "grad_norm": 0.5326131582260132, "learning_rate": 0.00023486787045506243, "loss": 1.7201, "step": 51367 }, { "epoch": 1.71, "grad_norm": 0.554822564125061, "learning_rate": 0.00023485766768418246, "loss": 1.7639, "step": 51368 }, { "epoch": 1.71, "grad_norm": 0.5667795538902283, "learning_rate": 0.0002348474649923756, "loss": 1.7425, "step": 51369 }, { "epoch": 1.71, "grad_norm": 0.572083055973053, "learning_rate": 0.00023483726237965452, "loss": 1.7936, "step": 51370 }, { "epoch": 1.71, "grad_norm": 0.5631887316703796, "learning_rate": 0.0002348270598460312, "loss": 1.7181, "step": 51371 }, { "epoch": 1.71, "grad_norm": 0.55260169506073, "learning_rate": 0.0002348168573915183, "loss": 1.7919, "step": 51372 }, { "epoch": 1.71, "grad_norm": 0.5810693502426147, "learning_rate": 0.0002348066550161282, "loss": 1.7207, "step": 51373 }, { "epoch": 1.71, "grad_norm": 0.5561982989311218, "learning_rate": 0.00023479645271987312, "loss": 1.7763, "step": 51374 }, { "epoch": 1.71, "grad_norm": 0.5715852379798889, "learning_rate": 0.00023478625050276557, "loss": 1.8279, "step": 51375 }, { "epoch": 1.71, "grad_norm": 0.5625622272491455, "learning_rate": 0.00023477604836481784, "loss": 1.7248, "step": 51376 }, { "epoch": 1.71, "grad_norm": 0.5730921030044556, "learning_rate": 0.00023476584630604259, "loss": 1.7999, "step": 51377 }, { "epoch": 1.71, "grad_norm": 0.574354887008667, "learning_rate": 0.00023475564432645174, "loss": 1.9078, "step": 51378 }, { "epoch": 1.71, "grad_norm": 0.562366783618927, "learning_rate": 0.00023474544242605802, "loss": 1.7384, "step": 51379 }, { "epoch": 1.71, "grad_norm": 0.5875728130340576, "learning_rate": 0.00023473524060487377, "loss": 1.7675, "step": 51380 }, { "epoch": 1.71, "grad_norm": 0.5446977615356445, "learning_rate": 0.0002347250388629113, "loss": 1.709, "step": 51381 }, { "epoch": 1.71, "grad_norm": 0.53714519739151, "learning_rate": 0.00023471483720018297, "loss": 1.739, "step": 51382 }, { "epoch": 1.71, "grad_norm": 0.5603812336921692, "learning_rate": 0.00023470463561670128, "loss": 1.7318, "step": 51383 }, { "epoch": 1.71, "grad_norm": 0.5667495727539062, "learning_rate": 0.0002346944341124785, "loss": 1.7532, "step": 51384 }, { "epoch": 1.71, "grad_norm": 0.5342857837677002, "learning_rate": 0.00023468423268752694, "loss": 1.8054, "step": 51385 }, { "epoch": 1.71, "grad_norm": 0.5424696207046509, "learning_rate": 0.0002346740313418592, "loss": 1.7885, "step": 51386 }, { "epoch": 1.71, "grad_norm": 0.5513401627540588, "learning_rate": 0.00023466383007548762, "loss": 1.7036, "step": 51387 }, { "epoch": 1.71, "grad_norm": 0.5747990012168884, "learning_rate": 0.00023465362888842447, "loss": 1.7765, "step": 51388 }, { "epoch": 1.71, "grad_norm": 0.569290041923523, "learning_rate": 0.00023464342778068217, "loss": 1.6889, "step": 51389 }, { "epoch": 1.71, "grad_norm": 0.5392879843711853, "learning_rate": 0.0002346332267522732, "loss": 1.7474, "step": 51390 }, { "epoch": 1.71, "grad_norm": 0.5739343166351318, "learning_rate": 0.00023462302580320977, "loss": 1.7622, "step": 51391 }, { "epoch": 1.71, "grad_norm": 0.5638827681541443, "learning_rate": 0.0002346128249335043, "loss": 1.713, "step": 51392 }, { "epoch": 1.71, "grad_norm": 0.563291072845459, "learning_rate": 0.00023460262414316945, "loss": 1.7427, "step": 51393 }, { "epoch": 1.71, "grad_norm": 0.5471976399421692, "learning_rate": 0.00023459242343221715, "loss": 1.6863, "step": 51394 }, { "epoch": 1.71, "grad_norm": 0.5363648533821106, "learning_rate": 0.00023458222280066008, "loss": 1.6964, "step": 51395 }, { "epoch": 1.71, "grad_norm": 0.5432769656181335, "learning_rate": 0.00023457202224851057, "loss": 1.7401, "step": 51396 }, { "epoch": 1.71, "grad_norm": 0.5538418889045715, "learning_rate": 0.00023456182177578102, "loss": 1.7521, "step": 51397 }, { "epoch": 1.71, "grad_norm": 0.55876225233078, "learning_rate": 0.00023455162138248374, "loss": 1.694, "step": 51398 }, { "epoch": 1.71, "grad_norm": 0.5517333149909973, "learning_rate": 0.00023454142106863106, "loss": 1.7356, "step": 51399 }, { "epoch": 1.71, "grad_norm": 0.5495828986167908, "learning_rate": 0.00023453122083423567, "loss": 1.7524, "step": 51400 }, { "epoch": 1.71, "grad_norm": 0.5483554601669312, "learning_rate": 0.00023452102067930954, "loss": 1.7374, "step": 51401 }, { "epoch": 1.71, "grad_norm": 0.5715463161468506, "learning_rate": 0.00023451082060386524, "loss": 1.6992, "step": 51402 }, { "epoch": 1.71, "grad_norm": 0.5365962982177734, "learning_rate": 0.00023450062060791533, "loss": 1.7264, "step": 51403 }, { "epoch": 1.71, "grad_norm": 0.5391806960105896, "learning_rate": 0.00023449042069147185, "loss": 1.7892, "step": 51404 }, { "epoch": 1.71, "grad_norm": 0.5566462874412537, "learning_rate": 0.00023448022085454742, "loss": 1.7105, "step": 51405 }, { "epoch": 1.71, "grad_norm": 0.5357386469841003, "learning_rate": 0.00023447002109715424, "loss": 1.7406, "step": 51406 }, { "epoch": 1.71, "grad_norm": 0.5518442988395691, "learning_rate": 0.000234459821419305, "loss": 1.7954, "step": 51407 }, { "epoch": 1.71, "grad_norm": 0.5802026391029358, "learning_rate": 0.0002344496218210117, "loss": 1.8132, "step": 51408 }, { "epoch": 1.71, "grad_norm": 0.5581285357475281, "learning_rate": 0.00023443942230228694, "loss": 1.8004, "step": 51409 }, { "epoch": 1.71, "grad_norm": 0.5810433030128479, "learning_rate": 0.00023442922286314314, "loss": 1.8014, "step": 51410 }, { "epoch": 1.71, "grad_norm": 0.5620498061180115, "learning_rate": 0.00023441902350359255, "loss": 1.7134, "step": 51411 }, { "epoch": 1.71, "grad_norm": 0.571570873260498, "learning_rate": 0.0002344088242236476, "loss": 1.7571, "step": 51412 }, { "epoch": 1.71, "grad_norm": 0.5355604887008667, "learning_rate": 0.00023439862502332073, "loss": 1.7218, "step": 51413 }, { "epoch": 1.71, "grad_norm": 0.5386386513710022, "learning_rate": 0.0002343884259026242, "loss": 1.76, "step": 51414 }, { "epoch": 1.71, "grad_norm": 0.5457324981689453, "learning_rate": 0.0002343782268615704, "loss": 1.678, "step": 51415 }, { "epoch": 1.71, "grad_norm": 0.5321511030197144, "learning_rate": 0.0002343680279001718, "loss": 1.7446, "step": 51416 }, { "epoch": 1.71, "grad_norm": 0.5541909337043762, "learning_rate": 0.00023435782901844082, "loss": 1.6835, "step": 51417 }, { "epoch": 1.71, "grad_norm": 0.5534656643867493, "learning_rate": 0.0002343476302163897, "loss": 1.7365, "step": 51418 }, { "epoch": 1.71, "grad_norm": 0.5405570268630981, "learning_rate": 0.0002343374314940309, "loss": 1.7903, "step": 51419 }, { "epoch": 1.71, "grad_norm": 0.5459397435188293, "learning_rate": 0.00023432723285137685, "loss": 1.7371, "step": 51420 }, { "epoch": 1.71, "grad_norm": 0.5681132078170776, "learning_rate": 0.00023431703428843976, "loss": 1.7861, "step": 51421 }, { "epoch": 1.71, "grad_norm": 0.5710307359695435, "learning_rate": 0.0002343068358052321, "loss": 1.7139, "step": 51422 }, { "epoch": 1.71, "grad_norm": 0.5495111346244812, "learning_rate": 0.0002342966374017664, "loss": 1.763, "step": 51423 }, { "epoch": 1.71, "grad_norm": 0.561797559261322, "learning_rate": 0.0002342864390780547, "loss": 1.737, "step": 51424 }, { "epoch": 1.71, "grad_norm": 0.5462529063224792, "learning_rate": 0.0002342762408341097, "loss": 1.7852, "step": 51425 }, { "epoch": 1.71, "grad_norm": 0.5396590232849121, "learning_rate": 0.00023426604266994365, "loss": 1.7444, "step": 51426 }, { "epoch": 1.71, "grad_norm": 0.5510014295578003, "learning_rate": 0.00023425584458556902, "loss": 1.6862, "step": 51427 }, { "epoch": 1.71, "grad_norm": 0.5870794653892517, "learning_rate": 0.000234245646580998, "loss": 1.7481, "step": 51428 }, { "epoch": 1.71, "grad_norm": 0.57683926820755, "learning_rate": 0.00023423544865624303, "loss": 1.6888, "step": 51429 }, { "epoch": 1.71, "grad_norm": 0.5548464059829712, "learning_rate": 0.00023422525081131672, "loss": 1.7492, "step": 51430 }, { "epoch": 1.71, "grad_norm": 0.5353550910949707, "learning_rate": 0.00023421505304623108, "loss": 1.6749, "step": 51431 }, { "epoch": 1.71, "grad_norm": 0.5987560749053955, "learning_rate": 0.00023420485536099873, "loss": 1.732, "step": 51432 }, { "epoch": 1.71, "grad_norm": 0.5844233632087708, "learning_rate": 0.00023419465775563207, "loss": 1.7555, "step": 51433 }, { "epoch": 1.71, "grad_norm": 0.5730462670326233, "learning_rate": 0.00023418446023014332, "loss": 1.7473, "step": 51434 }, { "epoch": 1.71, "grad_norm": 0.5304170250892639, "learning_rate": 0.0002341742627845449, "loss": 1.7304, "step": 51435 }, { "epoch": 1.71, "grad_norm": 0.564333975315094, "learning_rate": 0.00023416406541884924, "loss": 1.7918, "step": 51436 }, { "epoch": 1.71, "grad_norm": 0.5638834834098816, "learning_rate": 0.00023415386813306884, "loss": 1.7274, "step": 51437 }, { "epoch": 1.71, "grad_norm": 0.6040619015693665, "learning_rate": 0.00023414367092721577, "loss": 1.7397, "step": 51438 }, { "epoch": 1.71, "grad_norm": 0.5620179176330566, "learning_rate": 0.00023413347380130264, "loss": 1.8122, "step": 51439 }, { "epoch": 1.71, "grad_norm": 0.5385503172874451, "learning_rate": 0.00023412327675534185, "loss": 1.6732, "step": 51440 }, { "epoch": 1.71, "grad_norm": 0.5738316774368286, "learning_rate": 0.00023411307978934564, "loss": 1.7989, "step": 51441 }, { "epoch": 1.71, "grad_norm": 0.5586630702018738, "learning_rate": 0.0002341028829033264, "loss": 1.7369, "step": 51442 }, { "epoch": 1.71, "grad_norm": 0.5629687309265137, "learning_rate": 0.00023409268609729663, "loss": 1.7223, "step": 51443 }, { "epoch": 1.71, "grad_norm": 0.5567848682403564, "learning_rate": 0.00023408248937126856, "loss": 1.7388, "step": 51444 }, { "epoch": 1.71, "grad_norm": 0.5529779195785522, "learning_rate": 0.0002340722927252546, "loss": 1.7511, "step": 51445 }, { "epoch": 1.71, "grad_norm": 0.56218421459198, "learning_rate": 0.0002340620961592672, "loss": 1.8314, "step": 51446 }, { "epoch": 1.71, "grad_norm": 0.5446409583091736, "learning_rate": 0.00023405189967331877, "loss": 1.6932, "step": 51447 }, { "epoch": 1.71, "grad_norm": 0.5563036799430847, "learning_rate": 0.00023404170326742156, "loss": 1.766, "step": 51448 }, { "epoch": 1.71, "grad_norm": 0.5908495783805847, "learning_rate": 0.000234031506941588, "loss": 1.781, "step": 51449 }, { "epoch": 1.71, "grad_norm": 0.5759695172309875, "learning_rate": 0.00023402131069583053, "loss": 1.7875, "step": 51450 }, { "epoch": 1.71, "grad_norm": 0.5615636706352234, "learning_rate": 0.00023401111453016135, "loss": 1.7723, "step": 51451 }, { "epoch": 1.71, "grad_norm": 0.536329448223114, "learning_rate": 0.000234000918444593, "loss": 1.687, "step": 51452 }, { "epoch": 1.71, "grad_norm": 0.5570781230926514, "learning_rate": 0.0002339907224391379, "loss": 1.8407, "step": 51453 }, { "epoch": 1.71, "grad_norm": 0.5662893056869507, "learning_rate": 0.00023398052651380825, "loss": 1.7944, "step": 51454 }, { "epoch": 1.71, "grad_norm": 0.7578145265579224, "learning_rate": 0.00023397033066861652, "loss": 1.7617, "step": 51455 }, { "epoch": 1.71, "grad_norm": 0.5638059973716736, "learning_rate": 0.00023396013490357506, "loss": 1.8167, "step": 51456 }, { "epoch": 1.71, "grad_norm": 0.539417564868927, "learning_rate": 0.00023394993921869637, "loss": 1.6686, "step": 51457 }, { "epoch": 1.71, "grad_norm": 1.7758445739746094, "learning_rate": 0.00023393974361399258, "loss": 1.7924, "step": 51458 }, { "epoch": 1.71, "grad_norm": 0.5744714140892029, "learning_rate": 0.00023392954808947627, "loss": 1.6659, "step": 51459 }, { "epoch": 1.71, "grad_norm": 0.5759677886962891, "learning_rate": 0.00023391935264515979, "loss": 1.6846, "step": 51460 }, { "epoch": 1.71, "grad_norm": 3.4443466663360596, "learning_rate": 0.00023390915728105545, "loss": 1.7346, "step": 51461 }, { "epoch": 1.71, "grad_norm": 0.5673052668571472, "learning_rate": 0.00023389896199717565, "loss": 1.7424, "step": 51462 }, { "epoch": 1.71, "grad_norm": 0.550595223903656, "learning_rate": 0.00023388876679353274, "loss": 1.748, "step": 51463 }, { "epoch": 1.71, "grad_norm": 0.5512011647224426, "learning_rate": 0.0002338785716701392, "loss": 1.6585, "step": 51464 }, { "epoch": 1.71, "grad_norm": 0.5644511580467224, "learning_rate": 0.00023386837662700723, "loss": 1.7606, "step": 51465 }, { "epoch": 1.71, "grad_norm": 0.5426928997039795, "learning_rate": 0.0002338581816641494, "loss": 1.6967, "step": 51466 }, { "epoch": 1.71, "grad_norm": 0.5611447691917419, "learning_rate": 0.000233847986781578, "loss": 1.7981, "step": 51467 }, { "epoch": 1.71, "grad_norm": 0.543184220790863, "learning_rate": 0.00023383779197930533, "loss": 1.7735, "step": 51468 }, { "epoch": 1.71, "grad_norm": 0.5825895667076111, "learning_rate": 0.00023382759725734388, "loss": 1.7074, "step": 51469 }, { "epoch": 1.71, "grad_norm": 0.5610464215278625, "learning_rate": 0.000233817402615706, "loss": 1.7616, "step": 51470 }, { "epoch": 1.71, "grad_norm": 0.557865560054779, "learning_rate": 0.00023380720805440397, "loss": 1.787, "step": 51471 }, { "epoch": 1.71, "grad_norm": 0.546229362487793, "learning_rate": 0.0002337970135734502, "loss": 1.7918, "step": 51472 }, { "epoch": 1.71, "grad_norm": 0.5796332359313965, "learning_rate": 0.00023378681917285717, "loss": 1.7978, "step": 51473 }, { "epoch": 1.71, "grad_norm": 0.5611228942871094, "learning_rate": 0.0002337766248526372, "loss": 1.6917, "step": 51474 }, { "epoch": 1.71, "grad_norm": 0.5631551146507263, "learning_rate": 0.00023376643061280263, "loss": 1.7615, "step": 51475 }, { "epoch": 1.71, "grad_norm": 0.5596742630004883, "learning_rate": 0.00023375623645336584, "loss": 1.7465, "step": 51476 }, { "epoch": 1.71, "grad_norm": 0.5577914714813232, "learning_rate": 0.0002337460423743393, "loss": 1.7325, "step": 51477 }, { "epoch": 1.71, "grad_norm": 0.5849473476409912, "learning_rate": 0.00023373584837573525, "loss": 1.7869, "step": 51478 }, { "epoch": 1.71, "grad_norm": 0.5703256130218506, "learning_rate": 0.000233725654457566, "loss": 1.7039, "step": 51479 }, { "epoch": 1.71, "grad_norm": 0.5633985996246338, "learning_rate": 0.00023371546061984425, "loss": 1.7337, "step": 51480 }, { "epoch": 1.71, "grad_norm": 0.5428975820541382, "learning_rate": 0.000233705266862582, "loss": 1.7122, "step": 51481 }, { "epoch": 1.71, "grad_norm": 0.5625788569450378, "learning_rate": 0.0002336950731857918, "loss": 1.7541, "step": 51482 }, { "epoch": 1.71, "grad_norm": 0.5498957633972168, "learning_rate": 0.0002336848795894861, "loss": 1.6969, "step": 51483 }, { "epoch": 1.71, "grad_norm": 0.582060694694519, "learning_rate": 0.00023367468607367718, "loss": 1.8755, "step": 51484 }, { "epoch": 1.71, "grad_norm": 0.5320222973823547, "learning_rate": 0.00023366449263837738, "loss": 1.6785, "step": 51485 }, { "epoch": 1.71, "grad_norm": 0.5520302653312683, "learning_rate": 0.000233654299283599, "loss": 1.7895, "step": 51486 }, { "epoch": 1.71, "grad_norm": 0.5588312149047852, "learning_rate": 0.00023364410600935478, "loss": 1.7779, "step": 51487 }, { "epoch": 1.71, "grad_norm": 0.54192054271698, "learning_rate": 0.0002336339128156566, "loss": 1.7256, "step": 51488 }, { "epoch": 1.71, "grad_norm": 0.5683103799819946, "learning_rate": 0.00023362371970251717, "loss": 1.7412, "step": 51489 }, { "epoch": 1.71, "grad_norm": 0.5491832494735718, "learning_rate": 0.00023361352666994884, "loss": 1.8184, "step": 51490 }, { "epoch": 1.71, "grad_norm": 0.5534602403640747, "learning_rate": 0.00023360333371796378, "loss": 1.7486, "step": 51491 }, { "epoch": 1.71, "grad_norm": 0.557134747505188, "learning_rate": 0.00023359314084657457, "loss": 1.6131, "step": 51492 }, { "epoch": 1.71, "grad_norm": 0.5350304245948792, "learning_rate": 0.00023358294805579338, "loss": 1.7171, "step": 51493 }, { "epoch": 1.71, "grad_norm": 0.5575899481773376, "learning_rate": 0.0002335727553456329, "loss": 1.6989, "step": 51494 }, { "epoch": 1.71, "grad_norm": 0.5404369831085205, "learning_rate": 0.00023356256271610514, "loss": 1.8211, "step": 51495 }, { "epoch": 1.71, "grad_norm": 0.5579970479011536, "learning_rate": 0.00023355237016722268, "loss": 1.7289, "step": 51496 }, { "epoch": 1.71, "grad_norm": 0.5512918829917908, "learning_rate": 0.00023354217769899793, "loss": 1.8064, "step": 51497 }, { "epoch": 1.71, "grad_norm": 0.5688048601150513, "learning_rate": 0.00023353198531144312, "loss": 1.8067, "step": 51498 }, { "epoch": 1.71, "grad_norm": 0.5859965682029724, "learning_rate": 0.0002335217930045707, "loss": 1.7364, "step": 51499 }, { "epoch": 1.71, "grad_norm": 0.5478304624557495, "learning_rate": 0.0002335116007783931, "loss": 1.6814, "step": 51500 }, { "epoch": 1.71, "grad_norm": 0.5897078514099121, "learning_rate": 0.00023350140863292247, "loss": 1.7516, "step": 51501 }, { "epoch": 1.71, "grad_norm": 0.5459911227226257, "learning_rate": 0.00023349121656817136, "loss": 1.7642, "step": 51502 }, { "epoch": 1.71, "grad_norm": 0.5569539666175842, "learning_rate": 0.00023348102458415213, "loss": 1.6955, "step": 51503 }, { "epoch": 1.71, "grad_norm": 0.5372709631919861, "learning_rate": 0.00023347083268087724, "loss": 1.7542, "step": 51504 }, { "epoch": 1.71, "grad_norm": 0.5534002780914307, "learning_rate": 0.00023346064085835887, "loss": 1.8598, "step": 51505 }, { "epoch": 1.71, "grad_norm": 0.5507358312606812, "learning_rate": 0.00023345044911660947, "loss": 1.7892, "step": 51506 }, { "epoch": 1.71, "grad_norm": 0.5493314266204834, "learning_rate": 0.00023344025745564148, "loss": 1.7517, "step": 51507 }, { "epoch": 1.71, "grad_norm": 0.5429936051368713, "learning_rate": 0.00023343006587546716, "loss": 1.6577, "step": 51508 }, { "epoch": 1.71, "grad_norm": 0.6002617478370667, "learning_rate": 0.00023341987437609888, "loss": 1.6888, "step": 51509 }, { "epoch": 1.71, "grad_norm": 0.5451680421829224, "learning_rate": 0.00023340968295754923, "loss": 1.7452, "step": 51510 }, { "epoch": 1.71, "grad_norm": 0.5664640665054321, "learning_rate": 0.00023339949161983023, "loss": 1.7627, "step": 51511 }, { "epoch": 1.71, "grad_norm": 0.5662350654602051, "learning_rate": 0.00023338930036295448, "loss": 1.7234, "step": 51512 }, { "epoch": 1.71, "grad_norm": 0.5671207904815674, "learning_rate": 0.00023337910918693434, "loss": 1.6745, "step": 51513 }, { "epoch": 1.71, "grad_norm": 0.5594944357872009, "learning_rate": 0.00023336891809178217, "loss": 1.7499, "step": 51514 }, { "epoch": 1.71, "grad_norm": 0.5363175272941589, "learning_rate": 0.0002333587270775103, "loss": 1.7094, "step": 51515 }, { "epoch": 1.71, "grad_norm": 0.5573289394378662, "learning_rate": 0.000233348536144131, "loss": 1.8241, "step": 51516 }, { "epoch": 1.71, "grad_norm": 0.5550157427787781, "learning_rate": 0.00023333834529165698, "loss": 1.8076, "step": 51517 }, { "epoch": 1.71, "grad_norm": 0.5655849575996399, "learning_rate": 0.0002333281545201002, "loss": 1.8261, "step": 51518 }, { "epoch": 1.71, "grad_norm": 0.5541846752166748, "learning_rate": 0.00023331796382947327, "loss": 1.7132, "step": 51519 }, { "epoch": 1.71, "grad_norm": 0.5735155344009399, "learning_rate": 0.00023330777321978858, "loss": 1.8117, "step": 51520 }, { "epoch": 1.71, "grad_norm": 0.5464504957199097, "learning_rate": 0.00023329758269105834, "loss": 1.7014, "step": 51521 }, { "epoch": 1.71, "grad_norm": 0.5781441330909729, "learning_rate": 0.00023328739224329502, "loss": 1.7019, "step": 51522 }, { "epoch": 1.71, "grad_norm": 0.5417816042900085, "learning_rate": 0.00023327720187651092, "loss": 1.7051, "step": 51523 }, { "epoch": 1.71, "grad_norm": 0.5539172291755676, "learning_rate": 0.00023326701159071862, "loss": 1.7455, "step": 51524 }, { "epoch": 1.71, "grad_norm": 0.5392478108406067, "learning_rate": 0.0002332568213859302, "loss": 1.719, "step": 51525 }, { "epoch": 1.71, "grad_norm": 0.5465668439865112, "learning_rate": 0.00023324663126215818, "loss": 1.7451, "step": 51526 }, { "epoch": 1.71, "grad_norm": 0.5608276724815369, "learning_rate": 0.000233236441219415, "loss": 1.7556, "step": 51527 }, { "epoch": 1.71, "grad_norm": 0.5524246692657471, "learning_rate": 0.0002332262512577129, "loss": 1.7156, "step": 51528 }, { "epoch": 1.71, "grad_norm": 0.5804831385612488, "learning_rate": 0.00023321606137706428, "loss": 1.7201, "step": 51529 }, { "epoch": 1.71, "grad_norm": 0.5385722517967224, "learning_rate": 0.00023320587157748157, "loss": 1.7711, "step": 51530 }, { "epoch": 1.71, "grad_norm": 0.5557865500450134, "learning_rate": 0.00023319568185897706, "loss": 1.7121, "step": 51531 }, { "epoch": 1.71, "grad_norm": 0.5673680901527405, "learning_rate": 0.00023318549222156303, "loss": 1.7253, "step": 51532 }, { "epoch": 1.71, "grad_norm": 0.585715115070343, "learning_rate": 0.00023317530266525206, "loss": 1.7241, "step": 51533 }, { "epoch": 1.71, "grad_norm": 0.6311829090118408, "learning_rate": 0.0002331651131900565, "loss": 1.7886, "step": 51534 }, { "epoch": 1.71, "grad_norm": 0.549436628818512, "learning_rate": 0.00023315492379598858, "loss": 1.7002, "step": 51535 }, { "epoch": 1.71, "grad_norm": 0.5430201292037964, "learning_rate": 0.0002331447344830607, "loss": 1.7389, "step": 51536 }, { "epoch": 1.71, "grad_norm": 0.5511295795440674, "learning_rate": 0.00023313454525128536, "loss": 1.7514, "step": 51537 }, { "epoch": 1.71, "grad_norm": 0.5911795496940613, "learning_rate": 0.00023312435610067477, "loss": 1.7501, "step": 51538 }, { "epoch": 1.71, "grad_norm": 0.5645159482955933, "learning_rate": 0.00023311416703124127, "loss": 1.7579, "step": 51539 }, { "epoch": 1.71, "grad_norm": 0.5649177432060242, "learning_rate": 0.00023310397804299752, "loss": 1.7106, "step": 51540 }, { "epoch": 1.71, "grad_norm": 0.5685685276985168, "learning_rate": 0.0002330937891359555, "loss": 1.8386, "step": 51541 }, { "epoch": 1.71, "grad_norm": 0.5494250059127808, "learning_rate": 0.00023308360031012778, "loss": 1.752, "step": 51542 }, { "epoch": 1.71, "grad_norm": 0.5644642114639282, "learning_rate": 0.00023307341156552676, "loss": 1.7309, "step": 51543 }, { "epoch": 1.71, "grad_norm": 0.559683620929718, "learning_rate": 0.00023306322290216484, "loss": 1.736, "step": 51544 }, { "epoch": 1.71, "grad_norm": 0.5840774774551392, "learning_rate": 0.00023305303432005417, "loss": 1.6781, "step": 51545 }, { "epoch": 1.71, "grad_norm": 0.5530660152435303, "learning_rate": 0.00023304284581920724, "loss": 1.7353, "step": 51546 }, { "epoch": 1.71, "grad_norm": 0.5503965616226196, "learning_rate": 0.00023303265739963661, "loss": 1.7253, "step": 51547 }, { "epoch": 1.72, "grad_norm": 0.5479670763015747, "learning_rate": 0.00023302246906135426, "loss": 1.6338, "step": 51548 }, { "epoch": 1.72, "grad_norm": 0.565888524055481, "learning_rate": 0.00023301228080437286, "loss": 1.7395, "step": 51549 }, { "epoch": 1.72, "grad_norm": 0.5510126352310181, "learning_rate": 0.0002330020926287047, "loss": 1.811, "step": 51550 }, { "epoch": 1.72, "grad_norm": 0.5557742714881897, "learning_rate": 0.00023299190453436212, "loss": 1.7575, "step": 51551 }, { "epoch": 1.72, "grad_norm": 0.5564471483230591, "learning_rate": 0.00023298171652135746, "loss": 1.8107, "step": 51552 }, { "epoch": 1.72, "grad_norm": 0.5731584429740906, "learning_rate": 0.00023297152858970304, "loss": 1.8303, "step": 51553 }, { "epoch": 1.72, "grad_norm": 0.5575768351554871, "learning_rate": 0.00023296134073941152, "loss": 1.7345, "step": 51554 }, { "epoch": 1.72, "grad_norm": 0.5988025665283203, "learning_rate": 0.00023295115297049487, "loss": 1.7802, "step": 51555 }, { "epoch": 1.72, "grad_norm": 0.564454197883606, "learning_rate": 0.0002329409652829657, "loss": 1.7167, "step": 51556 }, { "epoch": 1.72, "grad_norm": 0.5692065358161926, "learning_rate": 0.00023293077767683637, "loss": 1.7825, "step": 51557 }, { "epoch": 1.72, "grad_norm": 0.5493247509002686, "learning_rate": 0.00023292059015211913, "loss": 1.7578, "step": 51558 }, { "epoch": 1.72, "grad_norm": 0.5424115657806396, "learning_rate": 0.00023291040270882643, "loss": 1.7708, "step": 51559 }, { "epoch": 1.72, "grad_norm": 0.5394418239593506, "learning_rate": 0.0002329002153469707, "loss": 1.7629, "step": 51560 }, { "epoch": 1.72, "grad_norm": 0.5514131784439087, "learning_rate": 0.00023289002806656413, "loss": 1.8773, "step": 51561 }, { "epoch": 1.72, "grad_norm": 0.5537738800048828, "learning_rate": 0.0002328798408676191, "loss": 1.714, "step": 51562 }, { "epoch": 1.72, "grad_norm": 0.5523539185523987, "learning_rate": 0.00023286965375014811, "loss": 1.7813, "step": 51563 }, { "epoch": 1.72, "grad_norm": 0.5559664964675903, "learning_rate": 0.00023285946671416355, "loss": 1.7256, "step": 51564 }, { "epoch": 1.72, "grad_norm": 0.5329821705818176, "learning_rate": 0.00023284927975967767, "loss": 1.7672, "step": 51565 }, { "epoch": 1.72, "grad_norm": 0.5544143915176392, "learning_rate": 0.00023283909288670284, "loss": 1.7645, "step": 51566 }, { "epoch": 1.72, "grad_norm": 0.5547963976860046, "learning_rate": 0.0002328289060952515, "loss": 1.7456, "step": 51567 }, { "epoch": 1.72, "grad_norm": 0.5389211177825928, "learning_rate": 0.00023281871938533587, "loss": 1.7197, "step": 51568 }, { "epoch": 1.72, "grad_norm": 0.5412504076957703, "learning_rate": 0.0002328085327569685, "loss": 1.7409, "step": 51569 }, { "epoch": 1.72, "grad_norm": 0.5444921851158142, "learning_rate": 0.00023279834621016168, "loss": 1.6787, "step": 51570 }, { "epoch": 1.72, "grad_norm": 0.5483222603797913, "learning_rate": 0.00023278815974492773, "loss": 1.711, "step": 51571 }, { "epoch": 1.72, "grad_norm": 0.5544551014900208, "learning_rate": 0.00023277797336127907, "loss": 1.7284, "step": 51572 }, { "epoch": 1.72, "grad_norm": 0.5507304668426514, "learning_rate": 0.00023276778705922803, "loss": 1.683, "step": 51573 }, { "epoch": 1.72, "grad_norm": 0.536042332649231, "learning_rate": 0.00023275760083878703, "loss": 1.7021, "step": 51574 }, { "epoch": 1.72, "grad_norm": 0.5331584811210632, "learning_rate": 0.0002327474146999683, "loss": 1.7454, "step": 51575 }, { "epoch": 1.72, "grad_norm": 0.5784225463867188, "learning_rate": 0.00023273722864278434, "loss": 1.7542, "step": 51576 }, { "epoch": 1.72, "grad_norm": 0.5587382316589355, "learning_rate": 0.00023272704266724757, "loss": 1.7415, "step": 51577 }, { "epoch": 1.72, "grad_norm": 0.5476746559143066, "learning_rate": 0.00023271685677337016, "loss": 1.7951, "step": 51578 }, { "epoch": 1.72, "grad_norm": 0.5612614750862122, "learning_rate": 0.0002327066709611646, "loss": 1.7306, "step": 51579 }, { "epoch": 1.72, "grad_norm": 0.5597917437553406, "learning_rate": 0.0002326964852306432, "loss": 1.7052, "step": 51580 }, { "epoch": 1.72, "grad_norm": 0.5405037999153137, "learning_rate": 0.00023268629958181846, "loss": 1.6979, "step": 51581 }, { "epoch": 1.72, "grad_norm": 0.5680992603302002, "learning_rate": 0.00023267611401470246, "loss": 1.7841, "step": 51582 }, { "epoch": 1.72, "grad_norm": 0.5700458884239197, "learning_rate": 0.0002326659285293078, "loss": 1.7183, "step": 51583 }, { "epoch": 1.72, "grad_norm": 0.5872410535812378, "learning_rate": 0.00023265574312564686, "loss": 1.7262, "step": 51584 }, { "epoch": 1.72, "grad_norm": 0.5558076500892639, "learning_rate": 0.00023264555780373188, "loss": 1.7801, "step": 51585 }, { "epoch": 1.72, "grad_norm": 0.566758394241333, "learning_rate": 0.00023263537256357523, "loss": 1.684, "step": 51586 }, { "epoch": 1.72, "grad_norm": 0.5768778324127197, "learning_rate": 0.0002326251874051894, "loss": 1.8097, "step": 51587 }, { "epoch": 1.72, "grad_norm": 0.5778381824493408, "learning_rate": 0.0002326150023285866, "loss": 1.6901, "step": 51588 }, { "epoch": 1.72, "grad_norm": 0.552397608757019, "learning_rate": 0.0002326048173337792, "loss": 1.76, "step": 51589 }, { "epoch": 1.72, "grad_norm": 0.5642469525337219, "learning_rate": 0.00023259463242077967, "loss": 1.7748, "step": 51590 }, { "epoch": 1.72, "grad_norm": 0.5646181106567383, "learning_rate": 0.00023258444758960043, "loss": 1.6651, "step": 51591 }, { "epoch": 1.72, "grad_norm": 0.5628790259361267, "learning_rate": 0.0002325742628402536, "loss": 1.8055, "step": 51592 }, { "epoch": 1.72, "grad_norm": 0.5600260496139526, "learning_rate": 0.00023256407817275174, "loss": 1.7852, "step": 51593 }, { "epoch": 1.72, "grad_norm": 0.5561394095420837, "learning_rate": 0.0002325538935871072, "loss": 1.7796, "step": 51594 }, { "epoch": 1.72, "grad_norm": 0.5340335965156555, "learning_rate": 0.0002325437090833322, "loss": 1.698, "step": 51595 }, { "epoch": 1.72, "grad_norm": 0.5455406904220581, "learning_rate": 0.0002325335246614392, "loss": 1.7144, "step": 51596 }, { "epoch": 1.72, "grad_norm": 0.5730370283126831, "learning_rate": 0.0002325233403214407, "loss": 1.6891, "step": 51597 }, { "epoch": 1.72, "grad_norm": 0.5688207745552063, "learning_rate": 0.00023251315606334874, "loss": 1.8168, "step": 51598 }, { "epoch": 1.72, "grad_norm": 0.5270644426345825, "learning_rate": 0.00023250297188717593, "loss": 1.7332, "step": 51599 }, { "epoch": 1.72, "grad_norm": 0.5472318530082703, "learning_rate": 0.0002324927877929346, "loss": 1.7376, "step": 51600 }, { "epoch": 1.72, "grad_norm": 0.5601673126220703, "learning_rate": 0.0002324826037806371, "loss": 1.7447, "step": 51601 }, { "epoch": 1.72, "grad_norm": 0.5522246956825256, "learning_rate": 0.0002324724198502957, "loss": 1.8073, "step": 51602 }, { "epoch": 1.72, "grad_norm": 0.5828670859336853, "learning_rate": 0.0002324622360019228, "loss": 1.7574, "step": 51603 }, { "epoch": 1.72, "grad_norm": 0.557083249092102, "learning_rate": 0.00023245205223553097, "loss": 1.7144, "step": 51604 }, { "epoch": 1.72, "grad_norm": 0.5393394827842712, "learning_rate": 0.00023244186855113223, "loss": 1.6697, "step": 51605 }, { "epoch": 1.72, "grad_norm": 0.5606579184532166, "learning_rate": 0.00023243168494873915, "loss": 1.6968, "step": 51606 }, { "epoch": 1.72, "grad_norm": 0.568810760974884, "learning_rate": 0.0002324215014283641, "loss": 1.7756, "step": 51607 }, { "epoch": 1.72, "grad_norm": 0.5613196492195129, "learning_rate": 0.00023241131799001935, "loss": 1.7739, "step": 51608 }, { "epoch": 1.72, "grad_norm": 0.5721205472946167, "learning_rate": 0.0002324011346337173, "loss": 1.7387, "step": 51609 }, { "epoch": 1.72, "grad_norm": 0.5549734234809875, "learning_rate": 0.00023239095135947022, "loss": 1.7166, "step": 51610 }, { "epoch": 1.72, "grad_norm": 0.5308545827865601, "learning_rate": 0.00023238076816729078, "loss": 1.6729, "step": 51611 }, { "epoch": 1.72, "grad_norm": 0.547874391078949, "learning_rate": 0.00023237058505719093, "loss": 1.7824, "step": 51612 }, { "epoch": 1.72, "grad_norm": 0.5618131756782532, "learning_rate": 0.00023236040202918328, "loss": 1.787, "step": 51613 }, { "epoch": 1.72, "grad_norm": 0.5587849617004395, "learning_rate": 0.00023235021908328023, "loss": 1.6834, "step": 51614 }, { "epoch": 1.72, "grad_norm": 0.5593766570091248, "learning_rate": 0.00023234003621949394, "loss": 1.7448, "step": 51615 }, { "epoch": 1.72, "grad_norm": 0.5619447827339172, "learning_rate": 0.0002323298534378369, "loss": 1.7495, "step": 51616 }, { "epoch": 1.72, "grad_norm": 0.8502787947654724, "learning_rate": 0.00023231967073832153, "loss": 1.8327, "step": 51617 }, { "epoch": 1.72, "grad_norm": 0.5650544166564941, "learning_rate": 0.00023230948812095998, "loss": 1.7357, "step": 51618 }, { "epoch": 1.72, "grad_norm": 0.5538057684898376, "learning_rate": 0.00023229930558576472, "loss": 1.806, "step": 51619 }, { "epoch": 1.72, "grad_norm": 0.5443111062049866, "learning_rate": 0.00023228912313274816, "loss": 1.7367, "step": 51620 }, { "epoch": 1.72, "grad_norm": 0.5661879181861877, "learning_rate": 0.00023227894076192272, "loss": 1.8235, "step": 51621 }, { "epoch": 1.72, "grad_norm": 0.5835599303245544, "learning_rate": 0.00023226875847330062, "loss": 1.8119, "step": 51622 }, { "epoch": 1.72, "grad_norm": 0.5534437298774719, "learning_rate": 0.00023225857626689423, "loss": 1.7557, "step": 51623 }, { "epoch": 1.72, "grad_norm": 0.5549685955047607, "learning_rate": 0.00023224839414271603, "loss": 1.754, "step": 51624 }, { "epoch": 1.72, "grad_norm": 0.5347310900688171, "learning_rate": 0.00023223821210077825, "loss": 1.7869, "step": 51625 }, { "epoch": 1.72, "grad_norm": 0.5393872857093811, "learning_rate": 0.00023222803014109322, "loss": 1.717, "step": 51626 }, { "epoch": 1.72, "grad_norm": 0.5569407939910889, "learning_rate": 0.00023221784826367358, "loss": 1.764, "step": 51627 }, { "epoch": 1.72, "grad_norm": 0.5467260479927063, "learning_rate": 0.00023220766646853125, "loss": 1.8001, "step": 51628 }, { "epoch": 1.72, "grad_norm": 0.5294719934463501, "learning_rate": 0.0002321974847556789, "loss": 1.7219, "step": 51629 }, { "epoch": 1.72, "grad_norm": 0.5332551598548889, "learning_rate": 0.00023218730312512883, "loss": 1.7131, "step": 51630 }, { "epoch": 1.72, "grad_norm": 0.5560595393180847, "learning_rate": 0.00023217712157689346, "loss": 1.7111, "step": 51631 }, { "epoch": 1.72, "grad_norm": 0.576856791973114, "learning_rate": 0.00023216694011098498, "loss": 1.7716, "step": 51632 }, { "epoch": 1.72, "grad_norm": 0.5276914238929749, "learning_rate": 0.0002321567587274158, "loss": 1.7062, "step": 51633 }, { "epoch": 1.72, "grad_norm": 0.5395394563674927, "learning_rate": 0.00023214657742619849, "loss": 1.7815, "step": 51634 }, { "epoch": 1.72, "grad_norm": 0.5569029450416565, "learning_rate": 0.00023213639620734504, "loss": 1.7467, "step": 51635 }, { "epoch": 1.72, "grad_norm": 0.5461814999580383, "learning_rate": 0.00023212621507086808, "loss": 1.6874, "step": 51636 }, { "epoch": 1.72, "grad_norm": 0.534845769405365, "learning_rate": 0.00023211603401677996, "loss": 1.6779, "step": 51637 }, { "epoch": 1.72, "grad_norm": 0.5600048899650574, "learning_rate": 0.00023210585304509293, "loss": 1.7346, "step": 51638 }, { "epoch": 1.72, "grad_norm": 0.5654974579811096, "learning_rate": 0.00023209567215581937, "loss": 1.7431, "step": 51639 }, { "epoch": 1.72, "grad_norm": 0.5436151027679443, "learning_rate": 0.00023208549134897159, "loss": 1.7521, "step": 51640 }, { "epoch": 1.72, "grad_norm": 0.5398812890052795, "learning_rate": 0.0002320753106245622, "loss": 1.7002, "step": 51641 }, { "epoch": 1.72, "grad_norm": 0.5742798447608948, "learning_rate": 0.00023206512998260318, "loss": 1.8029, "step": 51642 }, { "epoch": 1.72, "grad_norm": 0.5722717046737671, "learning_rate": 0.00023205494942310717, "loss": 1.7549, "step": 51643 }, { "epoch": 1.72, "grad_norm": 1.7953882217407227, "learning_rate": 0.0002320447689460865, "loss": 1.8272, "step": 51644 }, { "epoch": 1.72, "grad_norm": 0.5641173720359802, "learning_rate": 0.00023203458855155338, "loss": 1.8266, "step": 51645 }, { "epoch": 1.72, "grad_norm": 0.5457839965820312, "learning_rate": 0.00023202440823952029, "loss": 1.7503, "step": 51646 }, { "epoch": 1.72, "grad_norm": 0.5850965976715088, "learning_rate": 0.0002320142280099996, "loss": 1.6962, "step": 51647 }, { "epoch": 1.72, "grad_norm": 0.5569489002227783, "learning_rate": 0.00023200404786300356, "loss": 1.7541, "step": 51648 }, { "epoch": 1.72, "grad_norm": 0.5323720574378967, "learning_rate": 0.00023199386779854453, "loss": 1.6779, "step": 51649 }, { "epoch": 1.72, "grad_norm": 0.5544238090515137, "learning_rate": 0.00023198368781663498, "loss": 1.7715, "step": 51650 }, { "epoch": 1.72, "grad_norm": 0.5701092481613159, "learning_rate": 0.00023197350791728726, "loss": 1.7672, "step": 51651 }, { "epoch": 1.72, "grad_norm": 0.5618531703948975, "learning_rate": 0.00023196332810051362, "loss": 1.7419, "step": 51652 }, { "epoch": 1.72, "grad_norm": 0.5581368803977966, "learning_rate": 0.0002319531483663265, "loss": 1.8411, "step": 51653 }, { "epoch": 1.72, "grad_norm": 0.5847973227500916, "learning_rate": 0.0002319429687147383, "loss": 1.7882, "step": 51654 }, { "epoch": 1.72, "grad_norm": 0.5494845509529114, "learning_rate": 0.0002319327891457612, "loss": 1.6977, "step": 51655 }, { "epoch": 1.72, "grad_norm": 0.5576867461204529, "learning_rate": 0.0002319226096594076, "loss": 1.8313, "step": 51656 }, { "epoch": 1.72, "grad_norm": 0.5639826059341431, "learning_rate": 0.00023191243025569018, "loss": 1.8102, "step": 51657 }, { "epoch": 1.72, "grad_norm": 0.553428053855896, "learning_rate": 0.00023190225093462077, "loss": 1.7757, "step": 51658 }, { "epoch": 1.72, "grad_norm": 0.5928753018379211, "learning_rate": 0.0002318920716962121, "loss": 1.7232, "step": 51659 }, { "epoch": 1.72, "grad_norm": 0.5654409527778625, "learning_rate": 0.00023188189254047642, "loss": 1.7342, "step": 51660 }, { "epoch": 1.72, "grad_norm": 0.5621832013130188, "learning_rate": 0.00023187171346742615, "loss": 1.7504, "step": 51661 }, { "epoch": 1.72, "grad_norm": 0.5669149160385132, "learning_rate": 0.0002318615344770735, "loss": 1.7279, "step": 51662 }, { "epoch": 1.72, "grad_norm": 0.5632251501083374, "learning_rate": 0.00023185135556943082, "loss": 1.7715, "step": 51663 }, { "epoch": 1.72, "grad_norm": 0.5646573901176453, "learning_rate": 0.00023184117674451078, "loss": 1.7888, "step": 51664 }, { "epoch": 1.72, "grad_norm": 0.5406965017318726, "learning_rate": 0.00023183099800232535, "loss": 1.7816, "step": 51665 }, { "epoch": 1.72, "grad_norm": 0.5416646599769592, "learning_rate": 0.00023182081934288703, "loss": 1.7372, "step": 51666 }, { "epoch": 1.72, "grad_norm": 0.5548124313354492, "learning_rate": 0.0002318106407662083, "loss": 1.6798, "step": 51667 }, { "epoch": 1.72, "grad_norm": 0.5676126480102539, "learning_rate": 0.00023180046227230133, "loss": 1.7132, "step": 51668 }, { "epoch": 1.72, "grad_norm": 0.5861505270004272, "learning_rate": 0.00023179028386117857, "loss": 1.7839, "step": 51669 }, { "epoch": 1.72, "grad_norm": 0.5580074191093445, "learning_rate": 0.0002317801055328523, "loss": 1.7587, "step": 51670 }, { "epoch": 1.72, "grad_norm": 0.5371021628379822, "learning_rate": 0.00023176992728733508, "loss": 1.7465, "step": 51671 }, { "epoch": 1.72, "grad_norm": 0.5810051560401917, "learning_rate": 0.00023175974912463895, "loss": 1.7397, "step": 51672 }, { "epoch": 1.72, "grad_norm": 0.541008710861206, "learning_rate": 0.00023174957104477647, "loss": 1.7285, "step": 51673 }, { "epoch": 1.72, "grad_norm": 0.6063350439071655, "learning_rate": 0.00023173939304776004, "loss": 1.7333, "step": 51674 }, { "epoch": 1.72, "grad_norm": 0.569578230381012, "learning_rate": 0.00023172921513360187, "loss": 1.8781, "step": 51675 }, { "epoch": 1.72, "grad_norm": 0.5435035228729248, "learning_rate": 0.00023171903730231434, "loss": 1.7453, "step": 51676 }, { "epoch": 1.72, "grad_norm": 0.5552501678466797, "learning_rate": 0.00023170885955390994, "loss": 1.7623, "step": 51677 }, { "epoch": 1.72, "grad_norm": 0.5659953355789185, "learning_rate": 0.00023169868188840077, "loss": 1.7645, "step": 51678 }, { "epoch": 1.72, "grad_norm": 0.5552716255187988, "learning_rate": 0.00023168850430579943, "loss": 1.7672, "step": 51679 }, { "epoch": 1.72, "grad_norm": 0.5526795983314514, "learning_rate": 0.00023167832680611814, "loss": 1.7662, "step": 51680 }, { "epoch": 1.72, "grad_norm": 0.5261886119842529, "learning_rate": 0.00023166814938936937, "loss": 1.7732, "step": 51681 }, { "epoch": 1.72, "grad_norm": 0.5665161609649658, "learning_rate": 0.00023165797205556532, "loss": 1.7676, "step": 51682 }, { "epoch": 1.72, "grad_norm": 0.5457975268363953, "learning_rate": 0.00023164779480471846, "loss": 1.762, "step": 51683 }, { "epoch": 1.72, "grad_norm": 0.5657293200492859, "learning_rate": 0.00023163761763684117, "loss": 1.6736, "step": 51684 }, { "epoch": 1.72, "grad_norm": 0.5553545951843262, "learning_rate": 0.00023162744055194557, "loss": 1.7801, "step": 51685 }, { "epoch": 1.72, "grad_norm": 0.5476077795028687, "learning_rate": 0.0002316172635500443, "loss": 1.7169, "step": 51686 }, { "epoch": 1.72, "grad_norm": 0.5459562540054321, "learning_rate": 0.00023160708663114962, "loss": 1.7556, "step": 51687 }, { "epoch": 1.72, "grad_norm": 0.5675743222236633, "learning_rate": 0.00023159690979527382, "loss": 1.7109, "step": 51688 }, { "epoch": 1.72, "grad_norm": 0.5573969483375549, "learning_rate": 0.0002315867330424293, "loss": 1.756, "step": 51689 }, { "epoch": 1.72, "grad_norm": 0.5596221685409546, "learning_rate": 0.00023157655637262835, "loss": 1.7027, "step": 51690 }, { "epoch": 1.72, "grad_norm": 0.551026463508606, "learning_rate": 0.0002315663797858835, "loss": 1.6896, "step": 51691 }, { "epoch": 1.72, "grad_norm": 1.313883900642395, "learning_rate": 0.00023155620328220683, "loss": 1.7399, "step": 51692 }, { "epoch": 1.72, "grad_norm": 0.5630906820297241, "learning_rate": 0.0002315460268616109, "loss": 1.7021, "step": 51693 }, { "epoch": 1.72, "grad_norm": 0.5625243186950684, "learning_rate": 0.0002315358505241081, "loss": 1.7433, "step": 51694 }, { "epoch": 1.72, "grad_norm": 0.5630849599838257, "learning_rate": 0.00023152567426971063, "loss": 1.7058, "step": 51695 }, { "epoch": 1.72, "grad_norm": 0.5492909550666809, "learning_rate": 0.00023151549809843086, "loss": 1.7472, "step": 51696 }, { "epoch": 1.72, "grad_norm": 0.5690013766288757, "learning_rate": 0.00023150532201028127, "loss": 1.7181, "step": 51697 }, { "epoch": 1.72, "grad_norm": 0.5586560964584351, "learning_rate": 0.00023149514600527408, "loss": 1.7742, "step": 51698 }, { "epoch": 1.72, "grad_norm": 0.5714336633682251, "learning_rate": 0.0002314849700834216, "loss": 1.7457, "step": 51699 }, { "epoch": 1.72, "grad_norm": 0.5739290714263916, "learning_rate": 0.00023147479424473635, "loss": 1.7714, "step": 51700 }, { "epoch": 1.72, "grad_norm": 0.552243173122406, "learning_rate": 0.00023146461848923066, "loss": 1.7193, "step": 51701 }, { "epoch": 1.72, "grad_norm": 0.5684165358543396, "learning_rate": 0.00023145444281691676, "loss": 1.7399, "step": 51702 }, { "epoch": 1.72, "grad_norm": 0.5905998945236206, "learning_rate": 0.00023144426722780705, "loss": 1.7536, "step": 51703 }, { "epoch": 1.72, "grad_norm": 0.5510866641998291, "learning_rate": 0.000231434091721914, "loss": 1.7522, "step": 51704 }, { "epoch": 1.72, "grad_norm": 0.5852461457252502, "learning_rate": 0.00023142391629924977, "loss": 1.7403, "step": 51705 }, { "epoch": 1.72, "grad_norm": 0.5509426593780518, "learning_rate": 0.00023141374095982673, "loss": 1.6641, "step": 51706 }, { "epoch": 1.72, "grad_norm": 0.5478494167327881, "learning_rate": 0.00023140356570365737, "loss": 1.7619, "step": 51707 }, { "epoch": 1.72, "grad_norm": 0.5674965381622314, "learning_rate": 0.00023139339053075407, "loss": 1.7554, "step": 51708 }, { "epoch": 1.72, "grad_norm": 0.5599204897880554, "learning_rate": 0.000231383215441129, "loss": 1.6721, "step": 51709 }, { "epoch": 1.72, "grad_norm": 0.5499539971351624, "learning_rate": 0.00023137304043479461, "loss": 1.7229, "step": 51710 }, { "epoch": 1.72, "grad_norm": 0.5548421740531921, "learning_rate": 0.0002313628655117633, "loss": 1.7805, "step": 51711 }, { "epoch": 1.72, "grad_norm": 0.5697308778762817, "learning_rate": 0.00023135269067204724, "loss": 1.8097, "step": 51712 }, { "epoch": 1.72, "grad_norm": 0.5721761584281921, "learning_rate": 0.00023134251591565888, "loss": 1.7488, "step": 51713 }, { "epoch": 1.72, "grad_norm": 0.5600535869598389, "learning_rate": 0.0002313323412426108, "loss": 1.733, "step": 51714 }, { "epoch": 1.72, "grad_norm": 0.5678883194923401, "learning_rate": 0.0002313221666529149, "loss": 1.7942, "step": 51715 }, { "epoch": 1.72, "grad_norm": 0.5858316421508789, "learning_rate": 0.00023131199214658385, "loss": 1.7867, "step": 51716 }, { "epoch": 1.72, "grad_norm": 0.5610181093215942, "learning_rate": 0.00023130181772362997, "loss": 1.8091, "step": 51717 }, { "epoch": 1.72, "grad_norm": 0.5672082901000977, "learning_rate": 0.0002312916433840656, "loss": 1.7619, "step": 51718 }, { "epoch": 1.72, "grad_norm": 0.5737371444702148, "learning_rate": 0.00023128146912790296, "loss": 1.7774, "step": 51719 }, { "epoch": 1.72, "grad_norm": 0.5701263546943665, "learning_rate": 0.00023127129495515442, "loss": 1.6939, "step": 51720 }, { "epoch": 1.72, "grad_norm": 0.547148585319519, "learning_rate": 0.0002312611208658326, "loss": 1.7601, "step": 51721 }, { "epoch": 1.72, "grad_norm": 0.544293224811554, "learning_rate": 0.00023125094685994948, "loss": 1.8634, "step": 51722 }, { "epoch": 1.72, "grad_norm": 0.5485628247261047, "learning_rate": 0.00023124077293751766, "loss": 1.721, "step": 51723 }, { "epoch": 1.72, "grad_norm": 0.5708747506141663, "learning_rate": 0.00023123059909854944, "loss": 1.7726, "step": 51724 }, { "epoch": 1.72, "grad_norm": 1.3083209991455078, "learning_rate": 0.00023122042534305708, "loss": 1.7057, "step": 51725 }, { "epoch": 1.72, "grad_norm": 0.5469354391098022, "learning_rate": 0.00023121025167105298, "loss": 1.775, "step": 51726 }, { "epoch": 1.72, "grad_norm": 0.5604869723320007, "learning_rate": 0.00023120007808254947, "loss": 1.7165, "step": 51727 }, { "epoch": 1.72, "grad_norm": 0.5496506690979004, "learning_rate": 0.00023118990457755912, "loss": 1.7128, "step": 51728 }, { "epoch": 1.72, "grad_norm": 0.551643431186676, "learning_rate": 0.00023117973115609387, "loss": 1.7713, "step": 51729 }, { "epoch": 1.72, "grad_norm": 0.5303718447685242, "learning_rate": 0.00023116955781816634, "loss": 1.7794, "step": 51730 }, { "epoch": 1.72, "grad_norm": 0.5602930784225464, "learning_rate": 0.0002311593845637889, "loss": 1.7437, "step": 51731 }, { "epoch": 1.72, "grad_norm": 0.5375990271568298, "learning_rate": 0.00023114921139297378, "loss": 1.8128, "step": 51732 }, { "epoch": 1.72, "grad_norm": 0.5589413642883301, "learning_rate": 0.00023113903830573334, "loss": 1.7955, "step": 51733 }, { "epoch": 1.72, "grad_norm": 0.533352255821228, "learning_rate": 0.00023112886530208008, "loss": 1.7162, "step": 51734 }, { "epoch": 1.72, "grad_norm": 0.5463238954544067, "learning_rate": 0.00023111869238202614, "loss": 1.7255, "step": 51735 }, { "epoch": 1.72, "grad_norm": 0.5423881411552429, "learning_rate": 0.00023110851954558385, "loss": 1.7342, "step": 51736 }, { "epoch": 1.72, "grad_norm": 0.5605951547622681, "learning_rate": 0.0002310983467927658, "loss": 1.8188, "step": 51737 }, { "epoch": 1.72, "grad_norm": 0.5883737802505493, "learning_rate": 0.0002310881741235842, "loss": 1.7329, "step": 51738 }, { "epoch": 1.72, "grad_norm": 0.575249969959259, "learning_rate": 0.00023107800153805138, "loss": 1.7029, "step": 51739 }, { "epoch": 1.72, "grad_norm": 0.5554585456848145, "learning_rate": 0.00023106782903617974, "loss": 1.6811, "step": 51740 }, { "epoch": 1.72, "grad_norm": 0.5415350794792175, "learning_rate": 0.00023105765661798162, "loss": 1.8194, "step": 51741 }, { "epoch": 1.72, "grad_norm": 0.554868221282959, "learning_rate": 0.0002310474842834693, "loss": 1.7941, "step": 51742 }, { "epoch": 1.72, "grad_norm": 0.5711103081703186, "learning_rate": 0.00023103731203265508, "loss": 1.7314, "step": 51743 }, { "epoch": 1.72, "grad_norm": 0.5582284331321716, "learning_rate": 0.0002310271398655516, "loss": 1.7013, "step": 51744 }, { "epoch": 1.72, "grad_norm": 0.5615896582603455, "learning_rate": 0.00023101696778217082, "loss": 1.6985, "step": 51745 }, { "epoch": 1.72, "grad_norm": 0.5348461866378784, "learning_rate": 0.00023100679578252537, "loss": 1.6991, "step": 51746 }, { "epoch": 1.72, "grad_norm": 0.5564152598381042, "learning_rate": 0.0002309966238666275, "loss": 1.791, "step": 51747 }, { "epoch": 1.72, "grad_norm": 0.5407190322875977, "learning_rate": 0.0002309864520344896, "loss": 1.7441, "step": 51748 }, { "epoch": 1.72, "grad_norm": 0.5457996129989624, "learning_rate": 0.00023097628028612393, "loss": 1.6665, "step": 51749 }, { "epoch": 1.72, "grad_norm": 0.567313015460968, "learning_rate": 0.0002309661086215428, "loss": 1.7391, "step": 51750 }, { "epoch": 1.72, "grad_norm": 0.5710442066192627, "learning_rate": 0.00023095593704075887, "loss": 1.7368, "step": 51751 }, { "epoch": 1.72, "grad_norm": 0.5459272265434265, "learning_rate": 0.00023094576554378405, "loss": 1.6911, "step": 51752 }, { "epoch": 1.72, "grad_norm": 0.5649738311767578, "learning_rate": 0.00023093559413063096, "loss": 1.7097, "step": 51753 }, { "epoch": 1.72, "grad_norm": 0.6113370656967163, "learning_rate": 0.0002309254228013119, "loss": 1.8419, "step": 51754 }, { "epoch": 1.72, "grad_norm": 0.5491823554039001, "learning_rate": 0.00023091525155583922, "loss": 1.772, "step": 51755 }, { "epoch": 1.72, "grad_norm": 0.5694308876991272, "learning_rate": 0.0002309050803942252, "loss": 1.6852, "step": 51756 }, { "epoch": 1.72, "grad_norm": 0.5601730346679688, "learning_rate": 0.0002308949093164822, "loss": 1.7825, "step": 51757 }, { "epoch": 1.72, "grad_norm": 0.5449023246765137, "learning_rate": 0.00023088473832262273, "loss": 1.7395, "step": 51758 }, { "epoch": 1.72, "grad_norm": 0.5532153844833374, "learning_rate": 0.00023087456741265888, "loss": 1.7985, "step": 51759 }, { "epoch": 1.72, "grad_norm": 0.5509687066078186, "learning_rate": 0.0002308643965866031, "loss": 1.7909, "step": 51760 }, { "epoch": 1.72, "grad_norm": 0.55356764793396, "learning_rate": 0.0002308542258444679, "loss": 1.7859, "step": 51761 }, { "epoch": 1.72, "grad_norm": 0.5364654064178467, "learning_rate": 0.00023084405518626538, "loss": 1.6976, "step": 51762 }, { "epoch": 1.72, "grad_norm": 0.5380992889404297, "learning_rate": 0.000230833884612008, "loss": 1.7126, "step": 51763 }, { "epoch": 1.72, "grad_norm": 0.5603134036064148, "learning_rate": 0.00023082371412170815, "loss": 1.7543, "step": 51764 }, { "epoch": 1.72, "grad_norm": 0.5502579808235168, "learning_rate": 0.00023081354371537807, "loss": 1.771, "step": 51765 }, { "epoch": 1.72, "grad_norm": 0.5551572442054749, "learning_rate": 0.00023080337339303006, "loss": 1.776, "step": 51766 }, { "epoch": 1.72, "grad_norm": 0.551707923412323, "learning_rate": 0.00023079320315467665, "loss": 1.6808, "step": 51767 }, { "epoch": 1.72, "grad_norm": 0.5369394421577454, "learning_rate": 0.00023078303300033017, "loss": 1.7139, "step": 51768 }, { "epoch": 1.72, "grad_norm": 0.5487523674964905, "learning_rate": 0.00023077286293000278, "loss": 1.6254, "step": 51769 }, { "epoch": 1.72, "grad_norm": 0.5242578387260437, "learning_rate": 0.00023076269294370697, "loss": 1.7052, "step": 51770 }, { "epoch": 1.72, "grad_norm": 0.5378285050392151, "learning_rate": 0.0002307525230414551, "loss": 1.6927, "step": 51771 }, { "epoch": 1.72, "grad_norm": 0.580551266670227, "learning_rate": 0.0002307423532232594, "loss": 1.6987, "step": 51772 }, { "epoch": 1.72, "grad_norm": 0.5603865385055542, "learning_rate": 0.0002307321834891322, "loss": 1.7794, "step": 51773 }, { "epoch": 1.72, "grad_norm": 0.5662368535995483, "learning_rate": 0.00023072201383908615, "loss": 1.7866, "step": 51774 }, { "epoch": 1.72, "grad_norm": 0.5411472320556641, "learning_rate": 0.00023071184427313316, "loss": 1.7086, "step": 51775 }, { "epoch": 1.72, "grad_norm": 0.5459219217300415, "learning_rate": 0.00023070167479128585, "loss": 1.7535, "step": 51776 }, { "epoch": 1.72, "grad_norm": 0.5386771559715271, "learning_rate": 0.00023069150539355648, "loss": 1.7451, "step": 51777 }, { "epoch": 1.72, "grad_norm": 0.5891384482383728, "learning_rate": 0.00023068133607995748, "loss": 1.739, "step": 51778 }, { "epoch": 1.72, "grad_norm": 0.5529069900512695, "learning_rate": 0.00023067116685050105, "loss": 1.7382, "step": 51779 }, { "epoch": 1.72, "grad_norm": 0.552918553352356, "learning_rate": 0.00023066099770519953, "loss": 1.7485, "step": 51780 }, { "epoch": 1.72, "grad_norm": 0.5566089749336243, "learning_rate": 0.00023065082864406558, "loss": 1.6702, "step": 51781 }, { "epoch": 1.72, "grad_norm": 0.5525904893875122, "learning_rate": 0.00023064065966711108, "loss": 1.7027, "step": 51782 }, { "epoch": 1.72, "grad_norm": 0.6066408157348633, "learning_rate": 0.0002306304907743487, "loss": 1.7322, "step": 51783 }, { "epoch": 1.72, "grad_norm": 0.5714436769485474, "learning_rate": 0.0002306203219657907, "loss": 1.7572, "step": 51784 }, { "epoch": 1.72, "grad_norm": 0.5456446409225464, "learning_rate": 0.00023061015324144939, "loss": 1.6718, "step": 51785 }, { "epoch": 1.72, "grad_norm": 0.5659361481666565, "learning_rate": 0.00023059998460133713, "loss": 1.6754, "step": 51786 }, { "epoch": 1.72, "grad_norm": 0.5613992214202881, "learning_rate": 0.00023058981604546616, "loss": 1.7329, "step": 51787 }, { "epoch": 1.72, "grad_norm": 0.5556769967079163, "learning_rate": 0.00023057964757384913, "loss": 1.709, "step": 51788 }, { "epoch": 1.72, "grad_norm": 0.5423263907432556, "learning_rate": 0.000230569479186498, "loss": 1.7742, "step": 51789 }, { "epoch": 1.72, "grad_norm": 0.5508928298950195, "learning_rate": 0.00023055931088342537, "loss": 1.7404, "step": 51790 }, { "epoch": 1.72, "grad_norm": 0.5762408971786499, "learning_rate": 0.00023054914266464353, "loss": 1.7339, "step": 51791 }, { "epoch": 1.72, "grad_norm": 0.5462493300437927, "learning_rate": 0.00023053897453016476, "loss": 1.7817, "step": 51792 }, { "epoch": 1.72, "grad_norm": 0.5464043617248535, "learning_rate": 0.0002305288064800014, "loss": 1.7289, "step": 51793 }, { "epoch": 1.72, "grad_norm": 0.5531631112098694, "learning_rate": 0.00023051863851416597, "loss": 1.7345, "step": 51794 }, { "epoch": 1.72, "grad_norm": 0.5579231381416321, "learning_rate": 0.00023050847063267049, "loss": 1.803, "step": 51795 }, { "epoch": 1.72, "grad_norm": 0.5594726204872131, "learning_rate": 0.00023049830283552756, "loss": 1.8119, "step": 51796 }, { "epoch": 1.72, "grad_norm": 0.5653409957885742, "learning_rate": 0.0002304881351227495, "loss": 1.7077, "step": 51797 }, { "epoch": 1.72, "grad_norm": 0.543132483959198, "learning_rate": 0.0002304779674943486, "loss": 1.7187, "step": 51798 }, { "epoch": 1.72, "grad_norm": 0.5526010990142822, "learning_rate": 0.00023046779995033716, "loss": 1.7251, "step": 51799 }, { "epoch": 1.72, "grad_norm": 0.5592803955078125, "learning_rate": 0.00023045763249072756, "loss": 1.7494, "step": 51800 }, { "epoch": 1.72, "grad_norm": 0.5567829012870789, "learning_rate": 0.00023044746511553221, "loss": 1.7932, "step": 51801 }, { "epoch": 1.72, "grad_norm": 0.5674378871917725, "learning_rate": 0.00023043729782476328, "loss": 1.8198, "step": 51802 }, { "epoch": 1.72, "grad_norm": 0.55259770154953, "learning_rate": 0.0002304271306184333, "loss": 1.7144, "step": 51803 }, { "epoch": 1.72, "grad_norm": 0.541947603225708, "learning_rate": 0.00023041696349655456, "loss": 1.7207, "step": 51804 }, { "epoch": 1.72, "grad_norm": 0.555767297744751, "learning_rate": 0.00023040679645913934, "loss": 1.733, "step": 51805 }, { "epoch": 1.72, "grad_norm": 0.5562347769737244, "learning_rate": 0.00023039662950619997, "loss": 1.6922, "step": 51806 }, { "epoch": 1.72, "grad_norm": 0.5324226021766663, "learning_rate": 0.00023038646263774887, "loss": 1.7408, "step": 51807 }, { "epoch": 1.72, "grad_norm": 0.547447681427002, "learning_rate": 0.00023037629585379843, "loss": 1.7049, "step": 51808 }, { "epoch": 1.72, "grad_norm": 0.5569254755973816, "learning_rate": 0.00023036612915436073, "loss": 1.8113, "step": 51809 }, { "epoch": 1.72, "grad_norm": 0.550788938999176, "learning_rate": 0.00023035596253944836, "loss": 1.8008, "step": 51810 }, { "epoch": 1.72, "grad_norm": 0.5466088652610779, "learning_rate": 0.00023034579600907367, "loss": 1.695, "step": 51811 }, { "epoch": 1.72, "grad_norm": 0.5607186555862427, "learning_rate": 0.0002303356295632489, "loss": 1.7035, "step": 51812 }, { "epoch": 1.72, "grad_norm": 0.553849995136261, "learning_rate": 0.00023032546320198632, "loss": 1.769, "step": 51813 }, { "epoch": 1.72, "grad_norm": 0.5530735850334167, "learning_rate": 0.0002303152969252985, "loss": 1.7549, "step": 51814 }, { "epoch": 1.72, "grad_norm": 0.5492194890975952, "learning_rate": 0.00023030513073319755, "loss": 1.822, "step": 51815 }, { "epoch": 1.72, "grad_norm": 0.5451756119728088, "learning_rate": 0.00023029496462569584, "loss": 1.7437, "step": 51816 }, { "epoch": 1.72, "grad_norm": 0.5353238582611084, "learning_rate": 0.00023028479860280585, "loss": 1.6717, "step": 51817 }, { "epoch": 1.72, "grad_norm": 0.5441721081733704, "learning_rate": 0.00023027463266453988, "loss": 1.7393, "step": 51818 }, { "epoch": 1.72, "grad_norm": 0.5680443644523621, "learning_rate": 0.00023026446681091017, "loss": 1.7256, "step": 51819 }, { "epoch": 1.72, "grad_norm": 0.5461670160293579, "learning_rate": 0.00023025430104192915, "loss": 1.7314, "step": 51820 }, { "epoch": 1.72, "grad_norm": 0.5473249554634094, "learning_rate": 0.00023024413535760918, "loss": 1.7729, "step": 51821 }, { "epoch": 1.72, "grad_norm": 0.5494490265846252, "learning_rate": 0.00023023396975796248, "loss": 1.797, "step": 51822 }, { "epoch": 1.72, "grad_norm": 0.5443301796913147, "learning_rate": 0.00023022380424300143, "loss": 1.6666, "step": 51823 }, { "epoch": 1.72, "grad_norm": 0.5661904811859131, "learning_rate": 0.00023021363881273844, "loss": 1.7993, "step": 51824 }, { "epoch": 1.72, "grad_norm": 0.5728062987327576, "learning_rate": 0.00023020347346718587, "loss": 1.7804, "step": 51825 }, { "epoch": 1.72, "grad_norm": 0.5494410395622253, "learning_rate": 0.00023019330820635593, "loss": 1.8034, "step": 51826 }, { "epoch": 1.72, "grad_norm": 0.5804322361946106, "learning_rate": 0.00023018314303026107, "loss": 1.7375, "step": 51827 }, { "epoch": 1.72, "grad_norm": 0.5806633830070496, "learning_rate": 0.0002301729779389136, "loss": 1.7432, "step": 51828 }, { "epoch": 1.72, "grad_norm": 1.0223630666732788, "learning_rate": 0.00023016281293232582, "loss": 1.782, "step": 51829 }, { "epoch": 1.72, "grad_norm": 0.7746025323867798, "learning_rate": 0.00023015264801051, "loss": 1.8271, "step": 51830 }, { "epoch": 1.72, "grad_norm": 0.5833958387374878, "learning_rate": 0.00023014248317347879, "loss": 1.7758, "step": 51831 }, { "epoch": 1.72, "grad_norm": 0.5583239793777466, "learning_rate": 0.0002301323184212441, "loss": 1.7973, "step": 51832 }, { "epoch": 1.72, "grad_norm": 0.5507944822311401, "learning_rate": 0.00023012215375381855, "loss": 1.714, "step": 51833 }, { "epoch": 1.72, "grad_norm": 0.5654262900352478, "learning_rate": 0.0002301119891712144, "loss": 1.7212, "step": 51834 }, { "epoch": 1.72, "grad_norm": 0.5480175018310547, "learning_rate": 0.0002301018246734441, "loss": 1.7799, "step": 51835 }, { "epoch": 1.72, "grad_norm": 0.5753230452537537, "learning_rate": 0.0002300916602605198, "loss": 1.7707, "step": 51836 }, { "epoch": 1.72, "grad_norm": 0.554168164730072, "learning_rate": 0.00023008149593245383, "loss": 1.7219, "step": 51837 }, { "epoch": 1.72, "grad_norm": 0.5468030571937561, "learning_rate": 0.00023007133168925886, "loss": 1.7285, "step": 51838 }, { "epoch": 1.72, "grad_norm": 1.2170329093933105, "learning_rate": 0.00023006116753094673, "loss": 1.7923, "step": 51839 }, { "epoch": 1.72, "grad_norm": 0.569267749786377, "learning_rate": 0.00023005100345753018, "loss": 1.7757, "step": 51840 }, { "epoch": 1.72, "grad_norm": 0.594214677810669, "learning_rate": 0.00023004083946902146, "loss": 1.7136, "step": 51841 }, { "epoch": 1.72, "grad_norm": 0.5608265399932861, "learning_rate": 0.00023003067556543274, "loss": 1.7506, "step": 51842 }, { "epoch": 1.72, "grad_norm": 0.534519374370575, "learning_rate": 0.00023002051174677652, "loss": 1.7285, "step": 51843 }, { "epoch": 1.72, "grad_norm": 0.5560035705566406, "learning_rate": 0.00023001034801306498, "loss": 1.7454, "step": 51844 }, { "epoch": 1.72, "grad_norm": 0.5448099970817566, "learning_rate": 0.00023000018436431077, "loss": 1.705, "step": 51845 }, { "epoch": 1.72, "grad_norm": 0.5483995676040649, "learning_rate": 0.00022999002080052586, "loss": 1.7954, "step": 51846 }, { "epoch": 1.72, "grad_norm": 0.565173864364624, "learning_rate": 0.0002299798573217228, "loss": 1.7072, "step": 51847 }, { "epoch": 1.72, "grad_norm": 0.5509572625160217, "learning_rate": 0.00022996969392791392, "loss": 1.7954, "step": 51848 }, { "epoch": 1.73, "grad_norm": 0.5682336091995239, "learning_rate": 0.0002299595306191115, "loss": 1.7075, "step": 51849 }, { "epoch": 1.73, "grad_norm": 0.5447267293930054, "learning_rate": 0.00022994936739532783, "loss": 1.716, "step": 51850 }, { "epoch": 1.73, "grad_norm": 0.5436976552009583, "learning_rate": 0.00022993920425657538, "loss": 1.7716, "step": 51851 }, { "epoch": 1.73, "grad_norm": 0.5768534541130066, "learning_rate": 0.00022992904120286634, "loss": 1.7611, "step": 51852 }, { "epoch": 1.73, "grad_norm": 0.5704572796821594, "learning_rate": 0.0002299188782342131, "loss": 1.7148, "step": 51853 }, { "epoch": 1.73, "grad_norm": 0.5734585523605347, "learning_rate": 0.00022990871535062807, "loss": 1.7824, "step": 51854 }, { "epoch": 1.73, "grad_norm": 0.5721144080162048, "learning_rate": 0.00022989855255212358, "loss": 1.7266, "step": 51855 }, { "epoch": 1.73, "grad_norm": 0.5652623176574707, "learning_rate": 0.00022988838983871183, "loss": 1.7173, "step": 51856 }, { "epoch": 1.73, "grad_norm": 0.5548903942108154, "learning_rate": 0.00022987822721040529, "loss": 1.797, "step": 51857 }, { "epoch": 1.73, "grad_norm": 0.5342856645584106, "learning_rate": 0.0002298680646672163, "loss": 1.8054, "step": 51858 }, { "epoch": 1.73, "grad_norm": 0.5753757357597351, "learning_rate": 0.00022985790220915708, "loss": 1.8039, "step": 51859 }, { "epoch": 1.73, "grad_norm": 0.5589750409126282, "learning_rate": 0.00022984773983623997, "loss": 1.7056, "step": 51860 }, { "epoch": 1.73, "grad_norm": 0.5562607645988464, "learning_rate": 0.00022983757754847754, "loss": 1.6453, "step": 51861 }, { "epoch": 1.73, "grad_norm": 0.5530706644058228, "learning_rate": 0.00022982741534588179, "loss": 1.709, "step": 51862 }, { "epoch": 1.73, "grad_norm": 0.5412853360176086, "learning_rate": 0.00022981725322846527, "loss": 1.7774, "step": 51863 }, { "epoch": 1.73, "grad_norm": 0.5303080081939697, "learning_rate": 0.00022980709119624026, "loss": 1.7237, "step": 51864 }, { "epoch": 1.73, "grad_norm": 0.5482611060142517, "learning_rate": 0.00022979692924921915, "loss": 1.7643, "step": 51865 }, { "epoch": 1.73, "grad_norm": 0.5835336446762085, "learning_rate": 0.00022978676738741416, "loss": 1.783, "step": 51866 }, { "epoch": 1.73, "grad_norm": 0.5539342164993286, "learning_rate": 0.00022977660561083766, "loss": 1.7889, "step": 51867 }, { "epoch": 1.73, "grad_norm": 0.5594929456710815, "learning_rate": 0.0002297664439195022, "loss": 1.8267, "step": 51868 }, { "epoch": 1.73, "grad_norm": 0.5581730604171753, "learning_rate": 0.0002297562823134197, "loss": 1.7953, "step": 51869 }, { "epoch": 1.73, "grad_norm": 0.5268248915672302, "learning_rate": 0.00022974612079260283, "loss": 1.784, "step": 51870 }, { "epoch": 1.73, "grad_norm": 0.5575002431869507, "learning_rate": 0.00022973595935706385, "loss": 1.6553, "step": 51871 }, { "epoch": 1.73, "grad_norm": 0.5444416403770447, "learning_rate": 0.00022972579800681502, "loss": 1.6528, "step": 51872 }, { "epoch": 1.73, "grad_norm": 0.5633131265640259, "learning_rate": 0.0002297156367418687, "loss": 1.7247, "step": 51873 }, { "epoch": 1.73, "grad_norm": 0.5593457818031311, "learning_rate": 0.00022970547556223718, "loss": 1.703, "step": 51874 }, { "epoch": 1.73, "grad_norm": 0.5561825037002563, "learning_rate": 0.00022969531446793306, "loss": 1.7644, "step": 51875 }, { "epoch": 1.73, "grad_norm": 0.5516559481620789, "learning_rate": 0.00022968515345896825, "loss": 1.7787, "step": 51876 }, { "epoch": 1.73, "grad_norm": 0.5519881248474121, "learning_rate": 0.0002296749925353554, "loss": 1.7823, "step": 51877 }, { "epoch": 1.73, "grad_norm": 0.5702983736991882, "learning_rate": 0.0002296648316971068, "loss": 1.6591, "step": 51878 }, { "epoch": 1.73, "grad_norm": 0.5798804759979248, "learning_rate": 0.00022965467094423467, "loss": 1.7677, "step": 51879 }, { "epoch": 1.73, "grad_norm": 0.5663175582885742, "learning_rate": 0.0002296445102767514, "loss": 1.7545, "step": 51880 }, { "epoch": 1.73, "grad_norm": 0.5406515002250671, "learning_rate": 0.00022963434969466937, "loss": 1.7518, "step": 51881 }, { "epoch": 1.73, "grad_norm": 0.5507768392562866, "learning_rate": 0.00022962418919800082, "loss": 1.723, "step": 51882 }, { "epoch": 1.73, "grad_norm": 0.5428318381309509, "learning_rate": 0.00022961402878675808, "loss": 1.691, "step": 51883 }, { "epoch": 1.73, "grad_norm": 0.5464465022087097, "learning_rate": 0.00022960386846095361, "loss": 1.6836, "step": 51884 }, { "epoch": 1.73, "grad_norm": 0.5350825786590576, "learning_rate": 0.00022959370822059973, "loss": 1.6944, "step": 51885 }, { "epoch": 1.73, "grad_norm": 0.6267797350883484, "learning_rate": 0.00022958354806570865, "loss": 1.7716, "step": 51886 }, { "epoch": 1.73, "grad_norm": 0.534433901309967, "learning_rate": 0.00022957338799629276, "loss": 1.6916, "step": 51887 }, { "epoch": 1.73, "grad_norm": 0.5659375786781311, "learning_rate": 0.00022956322801236448, "loss": 1.7831, "step": 51888 }, { "epoch": 1.73, "grad_norm": 0.5646713376045227, "learning_rate": 0.000229553068113936, "loss": 1.8131, "step": 51889 }, { "epoch": 1.73, "grad_norm": 0.5530418753623962, "learning_rate": 0.00022954290830101964, "loss": 1.6892, "step": 51890 }, { "epoch": 1.73, "grad_norm": 0.5568331480026245, "learning_rate": 0.00022953274857362798, "loss": 1.7848, "step": 51891 }, { "epoch": 1.73, "grad_norm": 0.5656036734580994, "learning_rate": 0.00022952258893177297, "loss": 1.7365, "step": 51892 }, { "epoch": 1.73, "grad_norm": 0.5709183812141418, "learning_rate": 0.00022951242937546728, "loss": 1.7718, "step": 51893 }, { "epoch": 1.73, "grad_norm": 0.5625276565551758, "learning_rate": 0.00022950226990472306, "loss": 1.7315, "step": 51894 }, { "epoch": 1.73, "grad_norm": 0.5315343141555786, "learning_rate": 0.0002294921105195528, "loss": 1.6971, "step": 51895 }, { "epoch": 1.73, "grad_norm": 0.5533760786056519, "learning_rate": 0.00022948195121996867, "loss": 1.6589, "step": 51896 }, { "epoch": 1.73, "grad_norm": 0.5406279563903809, "learning_rate": 0.00022947179200598299, "loss": 1.7494, "step": 51897 }, { "epoch": 1.73, "grad_norm": 0.5481691360473633, "learning_rate": 0.00022946163287760834, "loss": 1.7221, "step": 51898 }, { "epoch": 1.73, "grad_norm": 0.5555575489997864, "learning_rate": 0.00022945147383485667, "loss": 1.8105, "step": 51899 }, { "epoch": 1.73, "grad_norm": 0.5642800331115723, "learning_rate": 0.00022944131487774062, "loss": 1.7363, "step": 51900 }, { "epoch": 1.73, "grad_norm": 0.558026134967804, "learning_rate": 0.00022943115600627245, "loss": 1.6907, "step": 51901 }, { "epoch": 1.73, "grad_norm": 0.5390998125076294, "learning_rate": 0.0002294209972204644, "loss": 1.7572, "step": 51902 }, { "epoch": 1.73, "grad_norm": 0.5402296781539917, "learning_rate": 0.00022941083852032887, "loss": 1.6978, "step": 51903 }, { "epoch": 1.73, "grad_norm": 0.5946733951568604, "learning_rate": 0.0002294006799058781, "loss": 1.8057, "step": 51904 }, { "epoch": 1.73, "grad_norm": 0.542525053024292, "learning_rate": 0.0002293905213771247, "loss": 1.6346, "step": 51905 }, { "epoch": 1.73, "grad_norm": 0.5723793506622314, "learning_rate": 0.00022938036293408063, "loss": 1.7601, "step": 51906 }, { "epoch": 1.73, "grad_norm": 0.562982976436615, "learning_rate": 0.00022937020457675843, "loss": 1.7668, "step": 51907 }, { "epoch": 1.73, "grad_norm": 0.5629712343215942, "learning_rate": 0.00022936004630517049, "loss": 1.7719, "step": 51908 }, { "epoch": 1.73, "grad_norm": 0.5449275374412537, "learning_rate": 0.00022934988811932898, "loss": 1.7017, "step": 51909 }, { "epoch": 1.73, "grad_norm": 0.5546064376831055, "learning_rate": 0.0002293397300192463, "loss": 1.7316, "step": 51910 }, { "epoch": 1.73, "grad_norm": 0.557740330696106, "learning_rate": 0.00022932957200493478, "loss": 1.7139, "step": 51911 }, { "epoch": 1.73, "grad_norm": 0.5688043832778931, "learning_rate": 0.00022931941407640667, "loss": 1.7602, "step": 51912 }, { "epoch": 1.73, "grad_norm": 0.5457450747489929, "learning_rate": 0.00022930925623367444, "loss": 1.7371, "step": 51913 }, { "epoch": 1.73, "grad_norm": 0.582155168056488, "learning_rate": 0.00022929909847675036, "loss": 1.7296, "step": 51914 }, { "epoch": 1.73, "grad_norm": 0.5417430400848389, "learning_rate": 0.0002292889408056468, "loss": 1.794, "step": 51915 }, { "epoch": 1.73, "grad_norm": 0.5478335618972778, "learning_rate": 0.00022927878322037597, "loss": 1.7037, "step": 51916 }, { "epoch": 1.73, "grad_norm": 0.5504140257835388, "learning_rate": 0.00022926862572095032, "loss": 1.7241, "step": 51917 }, { "epoch": 1.73, "grad_norm": 0.5374554991722107, "learning_rate": 0.00022925846830738216, "loss": 1.7309, "step": 51918 }, { "epoch": 1.73, "grad_norm": 0.5324122905731201, "learning_rate": 0.0002292483109796837, "loss": 1.7274, "step": 51919 }, { "epoch": 1.73, "grad_norm": 0.545171320438385, "learning_rate": 0.00022923815373786742, "loss": 1.7919, "step": 51920 }, { "epoch": 1.73, "grad_norm": 0.5361267924308777, "learning_rate": 0.00022922799658194566, "loss": 1.7918, "step": 51921 }, { "epoch": 1.73, "grad_norm": 0.5596773028373718, "learning_rate": 0.0002292178395119306, "loss": 1.7609, "step": 51922 }, { "epoch": 1.73, "grad_norm": 0.544786810874939, "learning_rate": 0.00022920768252783467, "loss": 1.7413, "step": 51923 }, { "epoch": 1.73, "grad_norm": 0.565515398979187, "learning_rate": 0.0002291975256296702, "loss": 1.8082, "step": 51924 }, { "epoch": 1.73, "grad_norm": 0.5416052937507629, "learning_rate": 0.00022918736881744954, "loss": 1.7114, "step": 51925 }, { "epoch": 1.73, "grad_norm": 0.5640969276428223, "learning_rate": 0.00022917721209118487, "loss": 1.7861, "step": 51926 }, { "epoch": 1.73, "grad_norm": 0.540274977684021, "learning_rate": 0.00022916705545088868, "loss": 1.8031, "step": 51927 }, { "epoch": 1.73, "grad_norm": 0.5595851540565491, "learning_rate": 0.00022915689889657333, "loss": 1.6921, "step": 51928 }, { "epoch": 1.73, "grad_norm": 0.5466561317443848, "learning_rate": 0.00022914674242825096, "loss": 1.7358, "step": 51929 }, { "epoch": 1.73, "grad_norm": 0.5421134829521179, "learning_rate": 0.00022913658604593407, "loss": 1.7859, "step": 51930 }, { "epoch": 1.73, "grad_norm": 0.5682257413864136, "learning_rate": 0.00022912642974963495, "loss": 1.7595, "step": 51931 }, { "epoch": 1.73, "grad_norm": 0.5756475925445557, "learning_rate": 0.00022911627353936585, "loss": 1.817, "step": 51932 }, { "epoch": 1.73, "grad_norm": 0.5447810888290405, "learning_rate": 0.00022910611741513905, "loss": 1.7751, "step": 51933 }, { "epoch": 1.73, "grad_norm": 0.5479246973991394, "learning_rate": 0.00022909596137696706, "loss": 1.7608, "step": 51934 }, { "epoch": 1.73, "grad_norm": 0.5704919695854187, "learning_rate": 0.00022908580542486224, "loss": 1.7369, "step": 51935 }, { "epoch": 1.73, "grad_norm": 0.5342864394187927, "learning_rate": 0.00022907564955883668, "loss": 1.663, "step": 51936 }, { "epoch": 1.73, "grad_norm": 0.5576947331428528, "learning_rate": 0.00022906549377890283, "loss": 1.7694, "step": 51937 }, { "epoch": 1.73, "grad_norm": 0.5561281442642212, "learning_rate": 0.0002290553380850731, "loss": 1.6743, "step": 51938 }, { "epoch": 1.73, "grad_norm": 0.579869270324707, "learning_rate": 0.00022904518247735968, "loss": 1.7356, "step": 51939 }, { "epoch": 1.73, "grad_norm": 0.5749692916870117, "learning_rate": 0.00022903502695577487, "loss": 1.7542, "step": 51940 }, { "epoch": 1.73, "grad_norm": 0.5448660850524902, "learning_rate": 0.0002290248715203313, "loss": 1.7326, "step": 51941 }, { "epoch": 1.73, "grad_norm": 0.5611454844474792, "learning_rate": 0.00022901471617104086, "loss": 1.7098, "step": 51942 }, { "epoch": 1.73, "grad_norm": 0.5733640193939209, "learning_rate": 0.0002290045609079162, "loss": 1.7687, "step": 51943 }, { "epoch": 1.73, "grad_norm": 0.5421043634414673, "learning_rate": 0.0002289944057309695, "loss": 1.7704, "step": 51944 }, { "epoch": 1.73, "grad_norm": 0.5614239573478699, "learning_rate": 0.00022898425064021323, "loss": 1.6905, "step": 51945 }, { "epoch": 1.73, "grad_norm": 0.5675114393234253, "learning_rate": 0.0002289740956356595, "loss": 1.7747, "step": 51946 }, { "epoch": 1.73, "grad_norm": 0.5523297786712646, "learning_rate": 0.00022896394071732074, "loss": 1.7599, "step": 51947 }, { "epoch": 1.73, "grad_norm": 0.5671961307525635, "learning_rate": 0.00022895378588520945, "loss": 1.7096, "step": 51948 }, { "epoch": 1.73, "grad_norm": 0.5733045339584351, "learning_rate": 0.00022894363113933763, "loss": 1.7884, "step": 51949 }, { "epoch": 1.73, "grad_norm": 0.5427688360214233, "learning_rate": 0.00022893347647971778, "loss": 1.6777, "step": 51950 }, { "epoch": 1.73, "grad_norm": 0.5659084320068359, "learning_rate": 0.00022892332190636226, "loss": 1.7136, "step": 51951 }, { "epoch": 1.73, "grad_norm": 0.5542097091674805, "learning_rate": 0.0002289131674192834, "loss": 1.7682, "step": 51952 }, { "epoch": 1.73, "grad_norm": 0.5382627844810486, "learning_rate": 0.00022890301301849346, "loss": 1.7789, "step": 51953 }, { "epoch": 1.73, "grad_norm": 1.1957541704177856, "learning_rate": 0.0002288928587040047, "loss": 1.8771, "step": 51954 }, { "epoch": 1.73, "grad_norm": 0.5663815140724182, "learning_rate": 0.0002288827044758297, "loss": 1.7326, "step": 51955 }, { "epoch": 1.73, "grad_norm": 0.566476047039032, "learning_rate": 0.00022887255033398045, "loss": 1.7058, "step": 51956 }, { "epoch": 1.73, "grad_norm": 0.5517590641975403, "learning_rate": 0.00022886239627846952, "loss": 1.7459, "step": 51957 }, { "epoch": 1.73, "grad_norm": 0.5567907691001892, "learning_rate": 0.00022885224230930918, "loss": 1.7211, "step": 51958 }, { "epoch": 1.73, "grad_norm": 0.5662645101547241, "learning_rate": 0.0002288420884265117, "loss": 1.7291, "step": 51959 }, { "epoch": 1.73, "grad_norm": 0.5509935617446899, "learning_rate": 0.00022883193463008945, "loss": 1.7515, "step": 51960 }, { "epoch": 1.73, "grad_norm": 0.5553383827209473, "learning_rate": 0.00022882178092005465, "loss": 1.7269, "step": 51961 }, { "epoch": 1.73, "grad_norm": 0.555892288684845, "learning_rate": 0.00022881162729641993, "loss": 1.7884, "step": 51962 }, { "epoch": 1.73, "grad_norm": 0.5456620454788208, "learning_rate": 0.00022880147375919723, "loss": 1.7077, "step": 51963 }, { "epoch": 1.73, "grad_norm": 0.5501132011413574, "learning_rate": 0.0002287913203083991, "loss": 1.6739, "step": 51964 }, { "epoch": 1.73, "grad_norm": 0.542271614074707, "learning_rate": 0.0002287811669440379, "loss": 1.8362, "step": 51965 }, { "epoch": 1.73, "grad_norm": 0.5361223220825195, "learning_rate": 0.00022877101366612578, "loss": 1.6797, "step": 51966 }, { "epoch": 1.73, "grad_norm": 0.5455799102783203, "learning_rate": 0.00022876086047467515, "loss": 1.7398, "step": 51967 }, { "epoch": 1.73, "grad_norm": 0.557959258556366, "learning_rate": 0.00022875070736969838, "loss": 1.7391, "step": 51968 }, { "epoch": 1.73, "grad_norm": 0.5349229574203491, "learning_rate": 0.00022874055435120774, "loss": 1.7059, "step": 51969 }, { "epoch": 1.73, "grad_norm": 0.579477071762085, "learning_rate": 0.0002287304014192155, "loss": 1.6961, "step": 51970 }, { "epoch": 1.73, "grad_norm": 0.5572701692581177, "learning_rate": 0.0002287202485737341, "loss": 1.7365, "step": 51971 }, { "epoch": 1.73, "grad_norm": 0.5409989953041077, "learning_rate": 0.00022871009581477584, "loss": 1.7263, "step": 51972 }, { "epoch": 1.73, "grad_norm": 0.553983747959137, "learning_rate": 0.00022869994314235302, "loss": 1.732, "step": 51973 }, { "epoch": 1.73, "grad_norm": 0.5395932793617249, "learning_rate": 0.0002286897905564779, "loss": 1.7352, "step": 51974 }, { "epoch": 1.73, "grad_norm": 0.5603505969047546, "learning_rate": 0.000228679638057163, "loss": 1.7735, "step": 51975 }, { "epoch": 1.73, "grad_norm": 0.5504409074783325, "learning_rate": 0.0002286694856444204, "loss": 1.6362, "step": 51976 }, { "epoch": 1.73, "grad_norm": 0.5709936022758484, "learning_rate": 0.00022865933331826248, "loss": 1.8168, "step": 51977 }, { "epoch": 1.73, "grad_norm": 0.5674895644187927, "learning_rate": 0.00022864918107870185, "loss": 1.7614, "step": 51978 }, { "epoch": 1.73, "grad_norm": 0.5586214661598206, "learning_rate": 0.00022863902892575038, "loss": 1.6961, "step": 51979 }, { "epoch": 1.73, "grad_norm": 0.5686892867088318, "learning_rate": 0.00022862887685942064, "loss": 1.685, "step": 51980 }, { "epoch": 1.73, "grad_norm": 0.5651683211326599, "learning_rate": 0.00022861872487972497, "loss": 1.7735, "step": 51981 }, { "epoch": 1.73, "grad_norm": 0.560876727104187, "learning_rate": 0.0002286085729866757, "loss": 1.6964, "step": 51982 }, { "epoch": 1.73, "grad_norm": 0.5565065741539001, "learning_rate": 0.00022859842118028506, "loss": 1.7319, "step": 51983 }, { "epoch": 1.73, "grad_norm": 0.5486503839492798, "learning_rate": 0.00022858826946056533, "loss": 1.7604, "step": 51984 }, { "epoch": 1.73, "grad_norm": 0.5663805603981018, "learning_rate": 0.0002285781178275291, "loss": 1.7248, "step": 51985 }, { "epoch": 1.73, "grad_norm": 0.5485443472862244, "learning_rate": 0.00022856796628118836, "loss": 1.7185, "step": 51986 }, { "epoch": 1.73, "grad_norm": 0.5532690286636353, "learning_rate": 0.0002285578148215556, "loss": 1.7076, "step": 51987 }, { "epoch": 1.73, "grad_norm": 0.554918646812439, "learning_rate": 0.00022854766344864322, "loss": 1.761, "step": 51988 }, { "epoch": 1.73, "grad_norm": 0.5474493503570557, "learning_rate": 0.0002285375121624634, "loss": 1.6825, "step": 51989 }, { "epoch": 1.73, "grad_norm": 0.5715028643608093, "learning_rate": 0.00022852736096302847, "loss": 1.7098, "step": 51990 }, { "epoch": 1.73, "grad_norm": 0.5489093661308289, "learning_rate": 0.00022851720985035077, "loss": 1.7616, "step": 51991 }, { "epoch": 1.73, "grad_norm": 0.5438205003738403, "learning_rate": 0.00022850705882444282, "loss": 1.7115, "step": 51992 }, { "epoch": 1.73, "grad_norm": 0.5753997564315796, "learning_rate": 0.00022849690788531656, "loss": 1.7133, "step": 51993 }, { "epoch": 1.73, "grad_norm": 0.563838005065918, "learning_rate": 0.0002284867570329846, "loss": 1.7792, "step": 51994 }, { "epoch": 1.73, "grad_norm": 0.5931098461151123, "learning_rate": 0.0002284766062674592, "loss": 1.7074, "step": 51995 }, { "epoch": 1.73, "grad_norm": 0.5414533019065857, "learning_rate": 0.00022846645558875265, "loss": 1.7305, "step": 51996 }, { "epoch": 1.73, "grad_norm": 0.5849307179450989, "learning_rate": 0.00022845630499687726, "loss": 1.7489, "step": 51997 }, { "epoch": 1.73, "grad_norm": 0.5497220754623413, "learning_rate": 0.00022844615449184547, "loss": 1.752, "step": 51998 }, { "epoch": 1.73, "grad_norm": 0.5531803965568542, "learning_rate": 0.0002284360040736694, "loss": 1.693, "step": 51999 }, { "epoch": 1.73, "grad_norm": 0.5737524032592773, "learning_rate": 0.00022842585374236143, "loss": 1.7378, "step": 52000 }, { "epoch": 1.73, "grad_norm": 0.6856126189231873, "learning_rate": 0.00022841570349793402, "loss": 1.7328, "step": 52001 }, { "epoch": 1.73, "grad_norm": 0.5923392176628113, "learning_rate": 0.0002284055533403994, "loss": 1.7453, "step": 52002 }, { "epoch": 1.73, "grad_norm": 0.5763602256774902, "learning_rate": 0.00022839540326976987, "loss": 1.7876, "step": 52003 }, { "epoch": 1.73, "grad_norm": 0.5608773231506348, "learning_rate": 0.00022838525328605777, "loss": 1.6345, "step": 52004 }, { "epoch": 1.73, "grad_norm": 0.563110888004303, "learning_rate": 0.00022837510338927545, "loss": 1.7713, "step": 52005 }, { "epoch": 1.73, "grad_norm": 0.5518386363983154, "learning_rate": 0.0002283649535794352, "loss": 1.7393, "step": 52006 }, { "epoch": 1.73, "grad_norm": 0.5730468034744263, "learning_rate": 0.00022835480385654923, "loss": 1.7414, "step": 52007 }, { "epoch": 1.73, "grad_norm": 0.5561540126800537, "learning_rate": 0.00022834465422063015, "loss": 1.7373, "step": 52008 }, { "epoch": 1.73, "grad_norm": 0.5778634548187256, "learning_rate": 0.00022833450467168995, "loss": 1.7713, "step": 52009 }, { "epoch": 1.73, "grad_norm": 0.563673734664917, "learning_rate": 0.00022832435520974111, "loss": 1.7406, "step": 52010 }, { "epoch": 1.73, "grad_norm": 0.5558432936668396, "learning_rate": 0.00022831420583479598, "loss": 1.728, "step": 52011 }, { "epoch": 1.73, "grad_norm": 0.5393026471138, "learning_rate": 0.00022830405654686694, "loss": 1.7251, "step": 52012 }, { "epoch": 1.73, "grad_norm": 0.5419700145721436, "learning_rate": 0.0002282939073459661, "loss": 1.7556, "step": 52013 }, { "epoch": 1.73, "grad_norm": 0.5602597594261169, "learning_rate": 0.00022828375823210584, "loss": 1.7861, "step": 52014 }, { "epoch": 1.73, "grad_norm": 0.5514757633209229, "learning_rate": 0.0002282736092052987, "loss": 1.7483, "step": 52015 }, { "epoch": 1.73, "grad_norm": 0.5468658804893494, "learning_rate": 0.00022826346026555669, "loss": 1.7746, "step": 52016 }, { "epoch": 1.73, "grad_norm": 0.5580586791038513, "learning_rate": 0.00022825331141289228, "loss": 1.6692, "step": 52017 }, { "epoch": 1.73, "grad_norm": 0.5538278222084045, "learning_rate": 0.00022824316264731786, "loss": 1.6655, "step": 52018 }, { "epoch": 1.73, "grad_norm": 0.5629679560661316, "learning_rate": 0.00022823301396884563, "loss": 1.6777, "step": 52019 }, { "epoch": 1.73, "grad_norm": 0.552665650844574, "learning_rate": 0.00022822286537748791, "loss": 1.7756, "step": 52020 }, { "epoch": 1.73, "grad_norm": 0.5311955809593201, "learning_rate": 0.00022821271687325703, "loss": 1.6805, "step": 52021 }, { "epoch": 1.73, "grad_norm": 0.7816962599754333, "learning_rate": 0.00022820256845616544, "loss": 1.8165, "step": 52022 }, { "epoch": 1.73, "grad_norm": 0.5535742044448853, "learning_rate": 0.00022819242012622529, "loss": 1.7007, "step": 52023 }, { "epoch": 1.73, "grad_norm": 0.5577079057693481, "learning_rate": 0.00022818227188344892, "loss": 1.7815, "step": 52024 }, { "epoch": 1.73, "grad_norm": 0.5459678769111633, "learning_rate": 0.0002281721237278488, "loss": 1.7881, "step": 52025 }, { "epoch": 1.73, "grad_norm": 0.5730313658714294, "learning_rate": 0.00022816197565943708, "loss": 1.7565, "step": 52026 }, { "epoch": 1.73, "grad_norm": 0.5466699004173279, "learning_rate": 0.00022815182767822612, "loss": 1.7197, "step": 52027 }, { "epoch": 1.73, "grad_norm": 0.5454679727554321, "learning_rate": 0.00022814167978422835, "loss": 1.704, "step": 52028 }, { "epoch": 1.73, "grad_norm": 0.5861266851425171, "learning_rate": 0.00022813153197745584, "loss": 1.7066, "step": 52029 }, { "epoch": 1.73, "grad_norm": 0.5516733527183533, "learning_rate": 0.00022812138425792113, "loss": 1.6972, "step": 52030 }, { "epoch": 1.73, "grad_norm": 0.5470924973487854, "learning_rate": 0.00022811123662563646, "loss": 1.7565, "step": 52031 }, { "epoch": 1.73, "grad_norm": 0.5200729966163635, "learning_rate": 0.00022810108908061423, "loss": 1.6927, "step": 52032 }, { "epoch": 1.73, "grad_norm": 0.5811645984649658, "learning_rate": 0.00022809094162286659, "loss": 1.7403, "step": 52033 }, { "epoch": 1.73, "grad_norm": 0.5448089241981506, "learning_rate": 0.00022808079425240598, "loss": 1.7941, "step": 52034 }, { "epoch": 1.73, "grad_norm": 0.5582569241523743, "learning_rate": 0.00022807064696924476, "loss": 1.7258, "step": 52035 }, { "epoch": 1.73, "grad_norm": 0.5468480587005615, "learning_rate": 0.00022806049977339504, "loss": 1.7909, "step": 52036 }, { "epoch": 1.73, "grad_norm": 0.5645922422409058, "learning_rate": 0.00022805035266486933, "loss": 1.7815, "step": 52037 }, { "epoch": 1.73, "grad_norm": 0.5620279312133789, "learning_rate": 0.00022804020564367997, "loss": 1.744, "step": 52038 }, { "epoch": 1.73, "grad_norm": 0.5614765286445618, "learning_rate": 0.0002280300587098391, "loss": 1.7781, "step": 52039 }, { "epoch": 1.73, "grad_norm": 0.5776684880256653, "learning_rate": 0.00022801991186335917, "loss": 1.7887, "step": 52040 }, { "epoch": 1.73, "grad_norm": 0.5565110445022583, "learning_rate": 0.00022800976510425246, "loss": 1.7395, "step": 52041 }, { "epoch": 1.73, "grad_norm": 0.5523260831832886, "learning_rate": 0.00022799961843253134, "loss": 1.6543, "step": 52042 }, { "epoch": 1.73, "grad_norm": 0.564653754234314, "learning_rate": 0.00022798947184820791, "loss": 1.6755, "step": 52043 }, { "epoch": 1.73, "grad_norm": 0.5626018047332764, "learning_rate": 0.00022797932535129474, "loss": 1.6528, "step": 52044 }, { "epoch": 1.73, "grad_norm": 0.569172203540802, "learning_rate": 0.00022796917894180415, "loss": 1.7659, "step": 52045 }, { "epoch": 1.73, "grad_norm": 0.5582122206687927, "learning_rate": 0.00022795903261974828, "loss": 1.7391, "step": 52046 }, { "epoch": 1.73, "grad_norm": 0.5734145045280457, "learning_rate": 0.00022794888638513953, "loss": 1.794, "step": 52047 }, { "epoch": 1.73, "grad_norm": 0.5753498077392578, "learning_rate": 0.00022793874023799027, "loss": 1.8058, "step": 52048 }, { "epoch": 1.73, "grad_norm": 0.5670204162597656, "learning_rate": 0.00022792859417831272, "loss": 1.757, "step": 52049 }, { "epoch": 1.73, "grad_norm": 0.5650150179862976, "learning_rate": 0.0002279184482061192, "loss": 1.7864, "step": 52050 }, { "epoch": 1.73, "grad_norm": 0.5491464734077454, "learning_rate": 0.00022790830232142207, "loss": 1.7227, "step": 52051 }, { "epoch": 1.73, "grad_norm": 0.5619344711303711, "learning_rate": 0.0002278981565242337, "loss": 1.7502, "step": 52052 }, { "epoch": 1.73, "grad_norm": 0.5540482997894287, "learning_rate": 0.00022788801081456632, "loss": 1.7483, "step": 52053 }, { "epoch": 1.73, "grad_norm": 0.5650220513343811, "learning_rate": 0.00022787786519243227, "loss": 1.7004, "step": 52054 }, { "epoch": 1.73, "grad_norm": 0.5439684987068176, "learning_rate": 0.0002278677196578439, "loss": 1.7726, "step": 52055 }, { "epoch": 1.73, "grad_norm": 0.5547746419906616, "learning_rate": 0.00022785757421081343, "loss": 1.7195, "step": 52056 }, { "epoch": 1.73, "grad_norm": 0.5785515308380127, "learning_rate": 0.00022784742885135318, "loss": 1.6758, "step": 52057 }, { "epoch": 1.73, "grad_norm": 0.5834960341453552, "learning_rate": 0.0002278372835794757, "loss": 1.81, "step": 52058 }, { "epoch": 1.73, "grad_norm": 0.5838327407836914, "learning_rate": 0.00022782713839519296, "loss": 1.811, "step": 52059 }, { "epoch": 1.73, "grad_norm": 0.5427395701408386, "learning_rate": 0.00022781699329851748, "loss": 1.7447, "step": 52060 }, { "epoch": 1.73, "grad_norm": 0.5469731092453003, "learning_rate": 0.00022780684828946156, "loss": 1.6389, "step": 52061 }, { "epoch": 1.73, "grad_norm": 0.5543159246444702, "learning_rate": 0.00022779670336803753, "loss": 1.8003, "step": 52062 }, { "epoch": 1.73, "grad_norm": 0.5602580904960632, "learning_rate": 0.00022778655853425759, "loss": 1.8118, "step": 52063 }, { "epoch": 1.73, "grad_norm": 0.5615938901901245, "learning_rate": 0.00022777641378813408, "loss": 1.6814, "step": 52064 }, { "epoch": 1.73, "grad_norm": 0.6598368287086487, "learning_rate": 0.00022776626912967955, "loss": 1.7724, "step": 52065 }, { "epoch": 1.73, "grad_norm": 0.5638116598129272, "learning_rate": 0.00022775612455890593, "loss": 1.7485, "step": 52066 }, { "epoch": 1.73, "grad_norm": 0.5317807793617249, "learning_rate": 0.00022774598007582582, "loss": 1.7074, "step": 52067 }, { "epoch": 1.73, "grad_norm": 0.5881623029708862, "learning_rate": 0.00022773583568045142, "loss": 1.7072, "step": 52068 }, { "epoch": 1.73, "grad_norm": 0.5638335943222046, "learning_rate": 0.00022772569137279513, "loss": 1.7826, "step": 52069 }, { "epoch": 1.73, "grad_norm": 0.5575563311576843, "learning_rate": 0.00022771554715286915, "loss": 1.7924, "step": 52070 }, { "epoch": 1.73, "grad_norm": 0.5540319085121155, "learning_rate": 0.00022770540302068578, "loss": 1.7194, "step": 52071 }, { "epoch": 1.73, "grad_norm": 0.5761227607727051, "learning_rate": 0.0002276952589762576, "loss": 1.752, "step": 52072 }, { "epoch": 1.73, "grad_norm": 0.5750811696052551, "learning_rate": 0.0002276851150195965, "loss": 1.7403, "step": 52073 }, { "epoch": 1.73, "grad_norm": 0.5450812578201294, "learning_rate": 0.00022767497115071508, "loss": 1.7792, "step": 52074 }, { "epoch": 1.73, "grad_norm": 0.5665462017059326, "learning_rate": 0.0002276648273696257, "loss": 1.7766, "step": 52075 }, { "epoch": 1.73, "grad_norm": 0.584674060344696, "learning_rate": 0.00022765468367634046, "loss": 1.7754, "step": 52076 }, { "epoch": 1.73, "grad_norm": 0.5607351064682007, "learning_rate": 0.00022764454007087178, "loss": 1.7538, "step": 52077 }, { "epoch": 1.73, "grad_norm": 0.5690911412239075, "learning_rate": 0.0002276343965532319, "loss": 1.672, "step": 52078 }, { "epoch": 1.73, "grad_norm": 0.5307298302650452, "learning_rate": 0.0002276242531234334, "loss": 1.6914, "step": 52079 }, { "epoch": 1.73, "grad_norm": 0.5622636675834656, "learning_rate": 0.0002276141097814882, "loss": 1.7018, "step": 52080 }, { "epoch": 1.73, "grad_norm": 0.5602913498878479, "learning_rate": 0.00022760396652740886, "loss": 1.7561, "step": 52081 }, { "epoch": 1.73, "grad_norm": 0.5702647566795349, "learning_rate": 0.0002275938233612077, "loss": 1.6671, "step": 52082 }, { "epoch": 1.73, "grad_norm": 0.5562822222709656, "learning_rate": 0.0002275836802828969, "loss": 1.7598, "step": 52083 }, { "epoch": 1.73, "grad_norm": 0.5620170831680298, "learning_rate": 0.00022757353729248886, "loss": 1.8143, "step": 52084 }, { "epoch": 1.73, "grad_norm": 0.553956151008606, "learning_rate": 0.00022756339438999594, "loss": 1.7888, "step": 52085 }, { "epoch": 1.73, "grad_norm": 0.5547493100166321, "learning_rate": 0.0002275532515754303, "loss": 1.7464, "step": 52086 }, { "epoch": 1.73, "grad_norm": 0.5369367003440857, "learning_rate": 0.0002275431088488043, "loss": 1.7051, "step": 52087 }, { "epoch": 1.73, "grad_norm": 0.5649685859680176, "learning_rate": 0.00022753296621013034, "loss": 1.7247, "step": 52088 }, { "epoch": 1.73, "grad_norm": 0.5495521426200867, "learning_rate": 0.00022752282365942075, "loss": 1.7856, "step": 52089 }, { "epoch": 1.73, "grad_norm": 0.5536540746688843, "learning_rate": 0.0002275126811966877, "loss": 1.7506, "step": 52090 }, { "epoch": 1.73, "grad_norm": 0.5764063596725464, "learning_rate": 0.00022750253882194358, "loss": 1.81, "step": 52091 }, { "epoch": 1.73, "grad_norm": 0.5637141466140747, "learning_rate": 0.00022749239653520076, "loss": 1.7424, "step": 52092 }, { "epoch": 1.73, "grad_norm": 0.5692694783210754, "learning_rate": 0.00022748225433647144, "loss": 1.7827, "step": 52093 }, { "epoch": 1.73, "grad_norm": 0.5439847707748413, "learning_rate": 0.0002274721122257679, "loss": 1.7282, "step": 52094 }, { "epoch": 1.73, "grad_norm": 0.5570030808448792, "learning_rate": 0.0002274619702031027, "loss": 1.6627, "step": 52095 }, { "epoch": 1.73, "grad_norm": 0.5717446208000183, "learning_rate": 0.0002274518282684878, "loss": 1.7138, "step": 52096 }, { "epoch": 1.73, "grad_norm": 0.5463230609893799, "learning_rate": 0.00022744168642193578, "loss": 1.677, "step": 52097 }, { "epoch": 1.73, "grad_norm": 0.5783575177192688, "learning_rate": 0.00022743154466345886, "loss": 1.8051, "step": 52098 }, { "epoch": 1.73, "grad_norm": 0.5378260612487793, "learning_rate": 0.0002274214029930694, "loss": 1.7731, "step": 52099 }, { "epoch": 1.73, "grad_norm": 0.5584938526153564, "learning_rate": 0.00022741126141077961, "loss": 1.7279, "step": 52100 }, { "epoch": 1.73, "grad_norm": 0.6979681253433228, "learning_rate": 0.00022740111991660178, "loss": 1.7425, "step": 52101 }, { "epoch": 1.73, "grad_norm": 0.5717118978500366, "learning_rate": 0.00022739097851054847, "loss": 1.7364, "step": 52102 }, { "epoch": 1.73, "grad_norm": 0.5612077713012695, "learning_rate": 0.00022738083719263163, "loss": 1.8044, "step": 52103 }, { "epoch": 1.73, "grad_norm": 0.5500473380088806, "learning_rate": 0.00022737069596286382, "loss": 1.7695, "step": 52104 }, { "epoch": 1.73, "grad_norm": 0.5489403605461121, "learning_rate": 0.00022736055482125737, "loss": 1.7574, "step": 52105 }, { "epoch": 1.73, "grad_norm": 0.5526076555252075, "learning_rate": 0.0002273504137678244, "loss": 1.716, "step": 52106 }, { "epoch": 1.73, "grad_norm": 0.5461512804031372, "learning_rate": 0.00022734027280257735, "loss": 1.7946, "step": 52107 }, { "epoch": 1.73, "grad_norm": 0.5591017007827759, "learning_rate": 0.00022733013192552842, "loss": 1.7206, "step": 52108 }, { "epoch": 1.73, "grad_norm": 0.566920816898346, "learning_rate": 0.00022731999113669024, "loss": 1.7214, "step": 52109 }, { "epoch": 1.73, "grad_norm": 0.5695684552192688, "learning_rate": 0.00022730985043607465, "loss": 1.6982, "step": 52110 }, { "epoch": 1.73, "grad_norm": 0.5540854930877686, "learning_rate": 0.00022729970982369427, "loss": 1.7385, "step": 52111 }, { "epoch": 1.73, "grad_norm": 0.5683743357658386, "learning_rate": 0.0002272895692995614, "loss": 1.7152, "step": 52112 }, { "epoch": 1.73, "grad_norm": 0.576203465461731, "learning_rate": 0.00022727942886368818, "loss": 1.7574, "step": 52113 }, { "epoch": 1.73, "grad_norm": 0.5624761581420898, "learning_rate": 0.00022726928851608705, "loss": 1.6792, "step": 52114 }, { "epoch": 1.73, "grad_norm": 0.5568751692771912, "learning_rate": 0.0002272591482567703, "loss": 1.6976, "step": 52115 }, { "epoch": 1.73, "grad_norm": 0.5616719722747803, "learning_rate": 0.00022724900808575023, "loss": 1.7292, "step": 52116 }, { "epoch": 1.73, "grad_norm": 0.5493699908256531, "learning_rate": 0.00022723886800303907, "loss": 1.7901, "step": 52117 }, { "epoch": 1.73, "grad_norm": 0.5388626456260681, "learning_rate": 0.00022722872800864924, "loss": 1.7372, "step": 52118 }, { "epoch": 1.73, "grad_norm": 0.5501106381416321, "learning_rate": 0.00022721858810259308, "loss": 1.7648, "step": 52119 }, { "epoch": 1.73, "grad_norm": 0.5656585693359375, "learning_rate": 0.0002272084482848828, "loss": 1.7757, "step": 52120 }, { "epoch": 1.73, "grad_norm": 0.5571678876876831, "learning_rate": 0.00022719830855553072, "loss": 1.729, "step": 52121 }, { "epoch": 1.73, "grad_norm": 0.5446408987045288, "learning_rate": 0.0002271881689145492, "loss": 1.851, "step": 52122 }, { "epoch": 1.73, "grad_norm": 0.5696138739585876, "learning_rate": 0.0002271780293619505, "loss": 1.7617, "step": 52123 }, { "epoch": 1.73, "grad_norm": 0.5680028200149536, "learning_rate": 0.00022716788989774684, "loss": 1.8204, "step": 52124 }, { "epoch": 1.73, "grad_norm": 0.5825235843658447, "learning_rate": 0.00022715775052195084, "loss": 1.7144, "step": 52125 }, { "epoch": 1.73, "grad_norm": 0.5772866010665894, "learning_rate": 0.0002271476112345744, "loss": 1.76, "step": 52126 }, { "epoch": 1.73, "grad_norm": 0.5425692200660706, "learning_rate": 0.0002271374720356301, "loss": 1.7307, "step": 52127 }, { "epoch": 1.73, "grad_norm": 0.553381085395813, "learning_rate": 0.00022712733292513015, "loss": 1.7853, "step": 52128 }, { "epoch": 1.73, "grad_norm": 0.568350613117218, "learning_rate": 0.000227117193903087, "loss": 1.7192, "step": 52129 }, { "epoch": 1.73, "grad_norm": 0.5526097416877747, "learning_rate": 0.0002271070549695127, "loss": 1.7459, "step": 52130 }, { "epoch": 1.73, "grad_norm": 0.5523640513420105, "learning_rate": 0.00022709691612441966, "loss": 1.6873, "step": 52131 }, { "epoch": 1.73, "grad_norm": 0.5534005165100098, "learning_rate": 0.00022708677736782044, "loss": 1.674, "step": 52132 }, { "epoch": 1.73, "grad_norm": 0.5600108504295349, "learning_rate": 0.0002270766386997269, "loss": 1.7623, "step": 52133 }, { "epoch": 1.73, "grad_norm": 0.5833310484886169, "learning_rate": 0.00022706650012015164, "loss": 1.6626, "step": 52134 }, { "epoch": 1.73, "grad_norm": 0.5561836361885071, "learning_rate": 0.00022705636162910698, "loss": 1.7021, "step": 52135 }, { "epoch": 1.73, "grad_norm": 0.5428274869918823, "learning_rate": 0.0002270462232266051, "loss": 1.7123, "step": 52136 }, { "epoch": 1.73, "grad_norm": 0.5589079260826111, "learning_rate": 0.00022703608491265832, "loss": 1.7392, "step": 52137 }, { "epoch": 1.73, "grad_norm": 0.5830681324005127, "learning_rate": 0.00022702594668727895, "loss": 1.7275, "step": 52138 }, { "epoch": 1.73, "grad_norm": 0.5715787410736084, "learning_rate": 0.00022701580855047945, "loss": 1.7089, "step": 52139 }, { "epoch": 1.73, "grad_norm": 0.5524477958679199, "learning_rate": 0.00022700567050227196, "loss": 1.7769, "step": 52140 }, { "epoch": 1.73, "grad_norm": 0.5435237288475037, "learning_rate": 0.00022699553254266878, "loss": 1.7956, "step": 52141 }, { "epoch": 1.73, "grad_norm": 0.545689582824707, "learning_rate": 0.0002269853946716824, "loss": 1.6708, "step": 52142 }, { "epoch": 1.73, "grad_norm": 0.5593389868736267, "learning_rate": 0.00022697525688932486, "loss": 1.7416, "step": 52143 }, { "epoch": 1.73, "grad_norm": 0.5712342858314514, "learning_rate": 0.00022696511919560866, "loss": 1.7571, "step": 52144 }, { "epoch": 1.73, "grad_norm": 0.5531970858573914, "learning_rate": 0.00022695498159054604, "loss": 1.7176, "step": 52145 }, { "epoch": 1.73, "grad_norm": 0.5533292889595032, "learning_rate": 0.00022694484407414923, "loss": 1.7626, "step": 52146 }, { "epoch": 1.73, "grad_norm": 0.5878137946128845, "learning_rate": 0.00022693470664643067, "loss": 1.8083, "step": 52147 }, { "epoch": 1.73, "grad_norm": 0.575129508972168, "learning_rate": 0.0002269245693074026, "loss": 1.6854, "step": 52148 }, { "epoch": 1.74, "grad_norm": 0.5654681921005249, "learning_rate": 0.00022691443205707741, "loss": 1.7075, "step": 52149 }, { "epoch": 1.74, "grad_norm": 0.5762194991111755, "learning_rate": 0.00022690429489546725, "loss": 1.8214, "step": 52150 }, { "epoch": 1.74, "grad_norm": 0.5559539794921875, "learning_rate": 0.00022689415782258454, "loss": 1.7318, "step": 52151 }, { "epoch": 1.74, "grad_norm": 0.5421565771102905, "learning_rate": 0.00022688402083844157, "loss": 1.7454, "step": 52152 }, { "epoch": 1.74, "grad_norm": 0.5897610783576965, "learning_rate": 0.0002268738839430505, "loss": 1.7555, "step": 52153 }, { "epoch": 1.74, "grad_norm": 0.5964872241020203, "learning_rate": 0.00022686374713642385, "loss": 1.7856, "step": 52154 }, { "epoch": 1.74, "grad_norm": 0.5580023527145386, "learning_rate": 0.00022685361041857393, "loss": 1.7787, "step": 52155 }, { "epoch": 1.74, "grad_norm": 0.5723240971565247, "learning_rate": 0.00022684347378951281, "loss": 1.7257, "step": 52156 }, { "epoch": 1.74, "grad_norm": 0.5619889497756958, "learning_rate": 0.00022683333724925298, "loss": 1.7924, "step": 52157 }, { "epoch": 1.74, "grad_norm": 0.5692099332809448, "learning_rate": 0.0002268232007978067, "loss": 1.7411, "step": 52158 }, { "epoch": 1.74, "grad_norm": 0.5636519193649292, "learning_rate": 0.00022681306443518632, "loss": 1.8254, "step": 52159 }, { "epoch": 1.74, "grad_norm": 0.5884124040603638, "learning_rate": 0.000226802928161404, "loss": 1.7711, "step": 52160 }, { "epoch": 1.74, "grad_norm": 0.5967896580696106, "learning_rate": 0.00022679279197647216, "loss": 1.7153, "step": 52161 }, { "epoch": 1.74, "grad_norm": 0.5692384839057922, "learning_rate": 0.00022678265588040318, "loss": 1.7496, "step": 52162 }, { "epoch": 1.74, "grad_norm": 0.5638829469680786, "learning_rate": 0.00022677251987320918, "loss": 1.7139, "step": 52163 }, { "epoch": 1.74, "grad_norm": 0.5602425336837769, "learning_rate": 0.00022676238395490258, "loss": 1.816, "step": 52164 }, { "epoch": 1.74, "grad_norm": 0.5641763806343079, "learning_rate": 0.0002267522481254957, "loss": 1.6475, "step": 52165 }, { "epoch": 1.74, "grad_norm": 0.5715950727462769, "learning_rate": 0.00022674211238500074, "loss": 1.7206, "step": 52166 }, { "epoch": 1.74, "grad_norm": 0.5561953186988831, "learning_rate": 0.00022673197673343, "loss": 1.7109, "step": 52167 }, { "epoch": 1.74, "grad_norm": 0.5565632581710815, "learning_rate": 0.00022672184117079587, "loss": 1.8308, "step": 52168 }, { "epoch": 1.74, "grad_norm": 0.5485550761222839, "learning_rate": 0.00022671170569711074, "loss": 1.7414, "step": 52169 }, { "epoch": 1.74, "grad_norm": 0.5923395752906799, "learning_rate": 0.00022670157031238672, "loss": 1.7964, "step": 52170 }, { "epoch": 1.74, "grad_norm": 0.5654755234718323, "learning_rate": 0.00022669143501663619, "loss": 1.7209, "step": 52171 }, { "epoch": 1.74, "grad_norm": 1.0333465337753296, "learning_rate": 0.00022668129980987154, "loss": 1.727, "step": 52172 }, { "epoch": 1.74, "grad_norm": 0.556454598903656, "learning_rate": 0.00022667116469210488, "loss": 1.761, "step": 52173 }, { "epoch": 1.74, "grad_norm": 0.5886550545692444, "learning_rate": 0.00022666102966334862, "loss": 1.7368, "step": 52174 }, { "epoch": 1.74, "grad_norm": 0.5662863254547119, "learning_rate": 0.0002266508947236152, "loss": 1.7614, "step": 52175 }, { "epoch": 1.74, "grad_norm": 0.5472866892814636, "learning_rate": 0.0002266407598729166, "loss": 1.6763, "step": 52176 }, { "epoch": 1.74, "grad_norm": 0.5473200082778931, "learning_rate": 0.00022663062511126542, "loss": 1.7537, "step": 52177 }, { "epoch": 1.74, "grad_norm": 0.5901439189910889, "learning_rate": 0.0002266204904386738, "loss": 1.7407, "step": 52178 }, { "epoch": 1.74, "grad_norm": 0.539932906627655, "learning_rate": 0.00022661035585515418, "loss": 1.7916, "step": 52179 }, { "epoch": 1.74, "grad_norm": 0.5563209652900696, "learning_rate": 0.00022660022136071871, "loss": 1.6859, "step": 52180 }, { "epoch": 1.74, "grad_norm": 0.5586350560188293, "learning_rate": 0.0002265900869553797, "loss": 1.7633, "step": 52181 }, { "epoch": 1.74, "grad_norm": 0.5609845519065857, "learning_rate": 0.0002265799526391497, "loss": 1.8372, "step": 52182 }, { "epoch": 1.74, "grad_norm": 0.5585554242134094, "learning_rate": 0.00022656981841204062, "loss": 1.7322, "step": 52183 }, { "epoch": 1.74, "grad_norm": 0.5718304514884949, "learning_rate": 0.00022655968427406502, "loss": 1.7773, "step": 52184 }, { "epoch": 1.74, "grad_norm": 0.566860556602478, "learning_rate": 0.0002265495502252352, "loss": 1.7067, "step": 52185 }, { "epoch": 1.74, "grad_norm": 0.5703686475753784, "learning_rate": 0.00022653941626556335, "loss": 1.7069, "step": 52186 }, { "epoch": 1.74, "grad_norm": 0.5779355764389038, "learning_rate": 0.00022652928239506182, "loss": 1.7868, "step": 52187 }, { "epoch": 1.74, "grad_norm": 0.5564396977424622, "learning_rate": 0.00022651914861374284, "loss": 1.7608, "step": 52188 }, { "epoch": 1.74, "grad_norm": 0.5807813405990601, "learning_rate": 0.00022650901492161902, "loss": 1.6984, "step": 52189 }, { "epoch": 1.74, "grad_norm": 0.5487754940986633, "learning_rate": 0.00022649888131870218, "loss": 1.7269, "step": 52190 }, { "epoch": 1.74, "grad_norm": 0.5519519448280334, "learning_rate": 0.000226488747805005, "loss": 1.7394, "step": 52191 }, { "epoch": 1.74, "grad_norm": 0.5508652925491333, "learning_rate": 0.00022647861438053964, "loss": 1.7617, "step": 52192 }, { "epoch": 1.74, "grad_norm": 0.5605894327163696, "learning_rate": 0.0002264684810453184, "loss": 1.6968, "step": 52193 }, { "epoch": 1.74, "grad_norm": 0.5608726739883423, "learning_rate": 0.00022645834779935355, "loss": 1.6893, "step": 52194 }, { "epoch": 1.74, "grad_norm": 0.5454733371734619, "learning_rate": 0.00022644821464265738, "loss": 1.6972, "step": 52195 }, { "epoch": 1.74, "grad_norm": 0.5384985208511353, "learning_rate": 0.00022643808157524244, "loss": 1.7225, "step": 52196 }, { "epoch": 1.74, "grad_norm": 0.5601577758789062, "learning_rate": 0.00022642794859712066, "loss": 1.7585, "step": 52197 }, { "epoch": 1.74, "grad_norm": 0.5890491604804993, "learning_rate": 0.00022641781570830453, "loss": 1.8096, "step": 52198 }, { "epoch": 1.74, "grad_norm": 0.5752567052841187, "learning_rate": 0.0002264076829088064, "loss": 1.7289, "step": 52199 }, { "epoch": 1.74, "grad_norm": 0.5553200244903564, "learning_rate": 0.00022639755019863845, "loss": 1.7506, "step": 52200 }, { "epoch": 1.74, "grad_norm": 0.5439139604568481, "learning_rate": 0.00022638741757781308, "loss": 1.7575, "step": 52201 }, { "epoch": 1.74, "grad_norm": 0.5348865985870361, "learning_rate": 0.00022637728504634253, "loss": 1.6777, "step": 52202 }, { "epoch": 1.74, "grad_norm": 0.5681312084197998, "learning_rate": 0.00022636715260423906, "loss": 1.8204, "step": 52203 }, { "epoch": 1.74, "grad_norm": 0.5814041495323181, "learning_rate": 0.00022635702025151493, "loss": 1.691, "step": 52204 }, { "epoch": 1.74, "grad_norm": 0.5523496866226196, "learning_rate": 0.00022634688798818265, "loss": 1.737, "step": 52205 }, { "epoch": 1.74, "grad_norm": 0.5551075339317322, "learning_rate": 0.0002263367558142544, "loss": 1.7471, "step": 52206 }, { "epoch": 1.74, "grad_norm": 0.5602690577507019, "learning_rate": 0.00022632662372974247, "loss": 1.6953, "step": 52207 }, { "epoch": 1.74, "grad_norm": 0.5658900141716003, "learning_rate": 0.00022631649173465913, "loss": 1.7972, "step": 52208 }, { "epoch": 1.74, "grad_norm": 0.5894871354103088, "learning_rate": 0.0002263063598290168, "loss": 1.7477, "step": 52209 }, { "epoch": 1.74, "grad_norm": 0.5853198766708374, "learning_rate": 0.00022629622801282758, "loss": 1.7139, "step": 52210 }, { "epoch": 1.74, "grad_norm": 0.5663564205169678, "learning_rate": 0.00022628609628610384, "loss": 1.7689, "step": 52211 }, { "epoch": 1.74, "grad_norm": 0.5738937854766846, "learning_rate": 0.00022627596464885815, "loss": 1.7627, "step": 52212 }, { "epoch": 1.74, "grad_norm": 0.5848902463912964, "learning_rate": 0.0002262658331011023, "loss": 1.7257, "step": 52213 }, { "epoch": 1.74, "grad_norm": 0.557088315486908, "learning_rate": 0.00022625570164284896, "loss": 1.7791, "step": 52214 }, { "epoch": 1.74, "grad_norm": 0.5705087780952454, "learning_rate": 0.00022624557027411038, "loss": 1.6845, "step": 52215 }, { "epoch": 1.74, "grad_norm": 0.6067280769348145, "learning_rate": 0.00022623543899489884, "loss": 1.7537, "step": 52216 }, { "epoch": 1.74, "grad_norm": 0.5857236981391907, "learning_rate": 0.00022622530780522652, "loss": 1.8343, "step": 52217 }, { "epoch": 1.74, "grad_norm": 0.5523256659507751, "learning_rate": 0.00022621517670510575, "loss": 1.7594, "step": 52218 }, { "epoch": 1.74, "grad_norm": 0.5571842789649963, "learning_rate": 0.00022620504569454913, "loss": 1.8129, "step": 52219 }, { "epoch": 1.74, "grad_norm": 0.5873221158981323, "learning_rate": 0.00022619491477356848, "loss": 1.7129, "step": 52220 }, { "epoch": 1.74, "grad_norm": 0.609948992729187, "learning_rate": 0.00022618478394217638, "loss": 1.7255, "step": 52221 }, { "epoch": 1.74, "grad_norm": 0.5761149525642395, "learning_rate": 0.00022617465320038516, "loss": 1.7315, "step": 52222 }, { "epoch": 1.74, "grad_norm": 0.5605032444000244, "learning_rate": 0.000226164522548207, "loss": 1.7499, "step": 52223 }, { "epoch": 1.74, "grad_norm": 0.5477062463760376, "learning_rate": 0.00022615439198565417, "loss": 1.7049, "step": 52224 }, { "epoch": 1.74, "grad_norm": 0.5572670102119446, "learning_rate": 0.000226144261512739, "loss": 1.7114, "step": 52225 }, { "epoch": 1.74, "grad_norm": 0.5483261346817017, "learning_rate": 0.000226134131129474, "loss": 1.7363, "step": 52226 }, { "epoch": 1.74, "grad_norm": 0.5738041996955872, "learning_rate": 0.00022612400083587105, "loss": 1.7085, "step": 52227 }, { "epoch": 1.74, "grad_norm": 0.5583381652832031, "learning_rate": 0.0002261138706319428, "loss": 1.7049, "step": 52228 }, { "epoch": 1.74, "grad_norm": 0.5771013498306274, "learning_rate": 0.00022610374051770148, "loss": 1.7044, "step": 52229 }, { "epoch": 1.74, "grad_norm": 0.5632143020629883, "learning_rate": 0.00022609361049315925, "loss": 1.7146, "step": 52230 }, { "epoch": 1.74, "grad_norm": 0.5391039252281189, "learning_rate": 0.00022608348055832848, "loss": 1.8144, "step": 52231 }, { "epoch": 1.74, "grad_norm": 0.5714130401611328, "learning_rate": 0.00022607335071322154, "loss": 1.7349, "step": 52232 }, { "epoch": 1.74, "grad_norm": 0.5726321935653687, "learning_rate": 0.0002260632209578506, "loss": 1.7892, "step": 52233 }, { "epoch": 1.74, "grad_norm": 0.5670074224472046, "learning_rate": 0.00022605309129222797, "loss": 1.7506, "step": 52234 }, { "epoch": 1.74, "grad_norm": 0.5413600206375122, "learning_rate": 0.00022604296171636602, "loss": 1.7243, "step": 52235 }, { "epoch": 1.74, "grad_norm": 0.5380317568778992, "learning_rate": 0.0002260328322302771, "loss": 1.7024, "step": 52236 }, { "epoch": 1.74, "grad_norm": 0.5795408487319946, "learning_rate": 0.00022602270283397338, "loss": 1.7323, "step": 52237 }, { "epoch": 1.74, "grad_norm": 0.5757750868797302, "learning_rate": 0.00022601257352746715, "loss": 1.6994, "step": 52238 }, { "epoch": 1.74, "grad_norm": 0.5539047718048096, "learning_rate": 0.00022600244431077087, "loss": 1.7342, "step": 52239 }, { "epoch": 1.74, "grad_norm": 0.5617477893829346, "learning_rate": 0.00022599231518389664, "loss": 1.7491, "step": 52240 }, { "epoch": 1.74, "grad_norm": 0.5398340821266174, "learning_rate": 0.00022598218614685675, "loss": 1.7688, "step": 52241 }, { "epoch": 1.74, "grad_norm": 0.5719791054725647, "learning_rate": 0.0002259720571996638, "loss": 1.8379, "step": 52242 }, { "epoch": 1.74, "grad_norm": 0.5513821840286255, "learning_rate": 0.00022596192834232963, "loss": 1.778, "step": 52243 }, { "epoch": 1.74, "grad_norm": 0.5668266415596008, "learning_rate": 0.00022595179957486685, "loss": 1.7445, "step": 52244 }, { "epoch": 1.74, "grad_norm": 0.5505700707435608, "learning_rate": 0.00022594167089728771, "loss": 1.7715, "step": 52245 }, { "epoch": 1.74, "grad_norm": 0.5467292666435242, "learning_rate": 0.00022593154230960452, "loss": 1.7953, "step": 52246 }, { "epoch": 1.74, "grad_norm": 0.534737765789032, "learning_rate": 0.00022592141381182941, "loss": 1.7291, "step": 52247 }, { "epoch": 1.74, "grad_norm": 0.5706846714019775, "learning_rate": 0.0002259112854039748, "loss": 1.7123, "step": 52248 }, { "epoch": 1.74, "grad_norm": 0.540704607963562, "learning_rate": 0.00022590115708605313, "loss": 1.7857, "step": 52249 }, { "epoch": 1.74, "grad_norm": 0.572222113609314, "learning_rate": 0.00022589102885807634, "loss": 1.7803, "step": 52250 }, { "epoch": 1.74, "grad_norm": 0.5593220591545105, "learning_rate": 0.000225880900720057, "loss": 1.7431, "step": 52251 }, { "epoch": 1.74, "grad_norm": 0.5691507458686829, "learning_rate": 0.00022587077267200738, "loss": 1.6793, "step": 52252 }, { "epoch": 1.74, "grad_norm": 0.5531712770462036, "learning_rate": 0.00022586064471393966, "loss": 1.7959, "step": 52253 }, { "epoch": 1.74, "grad_norm": 0.5552816390991211, "learning_rate": 0.0002258505168458662, "loss": 1.7526, "step": 52254 }, { "epoch": 1.74, "grad_norm": 0.5369144082069397, "learning_rate": 0.0002258403890677992, "loss": 1.7471, "step": 52255 }, { "epoch": 1.74, "grad_norm": 0.5503248572349548, "learning_rate": 0.0002258302613797512, "loss": 1.725, "step": 52256 }, { "epoch": 1.74, "grad_norm": 0.5488881468772888, "learning_rate": 0.00022582013378173426, "loss": 1.7209, "step": 52257 }, { "epoch": 1.74, "grad_norm": 0.56854647397995, "learning_rate": 0.0002258100062737607, "loss": 1.7702, "step": 52258 }, { "epoch": 1.74, "grad_norm": 0.5516724586486816, "learning_rate": 0.000225799878855843, "loss": 1.746, "step": 52259 }, { "epoch": 1.74, "grad_norm": 0.5693464875221252, "learning_rate": 0.0002257897515279932, "loss": 1.7162, "step": 52260 }, { "epoch": 1.74, "grad_norm": 0.5418336391448975, "learning_rate": 0.0002257796242902237, "loss": 1.7818, "step": 52261 }, { "epoch": 1.74, "grad_norm": 0.5345637798309326, "learning_rate": 0.0002257694971425469, "loss": 1.688, "step": 52262 }, { "epoch": 1.74, "grad_norm": 0.5596845149993896, "learning_rate": 0.00022575937008497486, "loss": 1.8176, "step": 52263 }, { "epoch": 1.74, "grad_norm": 0.5415587425231934, "learning_rate": 0.00022574924311752006, "loss": 1.7398, "step": 52264 }, { "epoch": 1.74, "grad_norm": 0.562393069267273, "learning_rate": 0.00022573911624019472, "loss": 1.8009, "step": 52265 }, { "epoch": 1.74, "grad_norm": 0.5748319029808044, "learning_rate": 0.00022572898945301124, "loss": 1.7455, "step": 52266 }, { "epoch": 1.74, "grad_norm": 0.5521716475486755, "learning_rate": 0.00022571886275598175, "loss": 1.7911, "step": 52267 }, { "epoch": 1.74, "grad_norm": 0.5472891330718994, "learning_rate": 0.0002257087361491186, "loss": 1.7443, "step": 52268 }, { "epoch": 1.74, "grad_norm": 0.541553258895874, "learning_rate": 0.00022569860963243422, "loss": 1.7155, "step": 52269 }, { "epoch": 1.74, "grad_norm": 0.5565844774246216, "learning_rate": 0.00022568848320594063, "loss": 1.7875, "step": 52270 }, { "epoch": 1.74, "grad_norm": 1.7094248533248901, "learning_rate": 0.0002256783568696503, "loss": 1.7328, "step": 52271 }, { "epoch": 1.74, "grad_norm": 0.579731822013855, "learning_rate": 0.0002256682306235756, "loss": 1.7443, "step": 52272 }, { "epoch": 1.74, "grad_norm": 0.5563904047012329, "learning_rate": 0.00022565810446772863, "loss": 1.7146, "step": 52273 }, { "epoch": 1.74, "grad_norm": 0.5504049062728882, "learning_rate": 0.00022564797840212173, "loss": 1.7432, "step": 52274 }, { "epoch": 1.74, "grad_norm": 0.5518864393234253, "learning_rate": 0.0002256378524267673, "loss": 1.693, "step": 52275 }, { "epoch": 1.74, "grad_norm": 0.5785706639289856, "learning_rate": 0.00022562772654167757, "loss": 1.8145, "step": 52276 }, { "epoch": 1.74, "grad_norm": 0.5614410042762756, "learning_rate": 0.00022561760074686472, "loss": 1.7055, "step": 52277 }, { "epoch": 1.74, "grad_norm": 0.5677986741065979, "learning_rate": 0.00022560747504234122, "loss": 1.7602, "step": 52278 }, { "epoch": 1.74, "grad_norm": 0.5886181592941284, "learning_rate": 0.00022559734942811932, "loss": 1.7677, "step": 52279 }, { "epoch": 1.74, "grad_norm": 0.5488386750221252, "learning_rate": 0.0002255872239042112, "loss": 1.7349, "step": 52280 }, { "epoch": 1.74, "grad_norm": 0.5545316338539124, "learning_rate": 0.00022557709847062927, "loss": 1.7382, "step": 52281 }, { "epoch": 1.74, "grad_norm": 0.556713879108429, "learning_rate": 0.00022556697312738584, "loss": 1.6927, "step": 52282 }, { "epoch": 1.74, "grad_norm": 0.5968791246414185, "learning_rate": 0.00022555684787449305, "loss": 1.8259, "step": 52283 }, { "epoch": 1.74, "grad_norm": 0.5846030712127686, "learning_rate": 0.00022554672271196316, "loss": 1.7567, "step": 52284 }, { "epoch": 1.74, "grad_norm": 0.605809211730957, "learning_rate": 0.00022553659763980874, "loss": 1.7802, "step": 52285 }, { "epoch": 1.74, "grad_norm": 0.5717294812202454, "learning_rate": 0.00022552647265804195, "loss": 1.6648, "step": 52286 }, { "epoch": 1.74, "grad_norm": 0.5543877482414246, "learning_rate": 0.00022551634776667495, "loss": 1.7244, "step": 52287 }, { "epoch": 1.74, "grad_norm": 0.5691513419151306, "learning_rate": 0.00022550622296572015, "loss": 1.8173, "step": 52288 }, { "epoch": 1.74, "grad_norm": 0.5874528288841248, "learning_rate": 0.00022549609825518987, "loss": 1.7364, "step": 52289 }, { "epoch": 1.74, "grad_norm": 0.5672473311424255, "learning_rate": 0.0002254859736350963, "loss": 1.7173, "step": 52290 }, { "epoch": 1.74, "grad_norm": 0.5772430300712585, "learning_rate": 0.00022547584910545174, "loss": 1.6971, "step": 52291 }, { "epoch": 1.74, "grad_norm": 0.5794038772583008, "learning_rate": 0.0002254657246662687, "loss": 1.715, "step": 52292 }, { "epoch": 1.74, "grad_norm": 0.5593627691268921, "learning_rate": 0.0002254556003175591, "loss": 1.7603, "step": 52293 }, { "epoch": 1.74, "grad_norm": 0.5643244981765747, "learning_rate": 0.00022544547605933546, "loss": 1.7117, "step": 52294 }, { "epoch": 1.74, "grad_norm": 0.5868868827819824, "learning_rate": 0.00022543535189161002, "loss": 1.738, "step": 52295 }, { "epoch": 1.74, "grad_norm": 0.6326223015785217, "learning_rate": 0.00022542522781439516, "loss": 1.7275, "step": 52296 }, { "epoch": 1.74, "grad_norm": 0.5567752718925476, "learning_rate": 0.000225415103827703, "loss": 1.6843, "step": 52297 }, { "epoch": 1.74, "grad_norm": 0.5707502365112305, "learning_rate": 0.00022540497993154586, "loss": 1.7731, "step": 52298 }, { "epoch": 1.74, "grad_norm": 0.5665088295936584, "learning_rate": 0.0002253948561259363, "loss": 1.7694, "step": 52299 }, { "epoch": 1.74, "grad_norm": 0.5730504393577576, "learning_rate": 0.00022538473241088617, "loss": 1.8584, "step": 52300 }, { "epoch": 1.74, "grad_norm": 0.549829363822937, "learning_rate": 0.00022537460878640806, "loss": 1.7132, "step": 52301 }, { "epoch": 1.74, "grad_norm": 0.5539267063140869, "learning_rate": 0.00022536448525251423, "loss": 1.6606, "step": 52302 }, { "epoch": 1.74, "grad_norm": 0.5905542373657227, "learning_rate": 0.00022535436180921688, "loss": 1.7001, "step": 52303 }, { "epoch": 1.74, "grad_norm": 0.5639597177505493, "learning_rate": 0.0002253442384565283, "loss": 1.6468, "step": 52304 }, { "epoch": 1.74, "grad_norm": 0.5621209740638733, "learning_rate": 0.00022533411519446076, "loss": 1.7951, "step": 52305 }, { "epoch": 1.74, "grad_norm": 0.5526934862136841, "learning_rate": 0.00022532399202302683, "loss": 1.8071, "step": 52306 }, { "epoch": 1.74, "grad_norm": 0.5868035554885864, "learning_rate": 0.00022531386894223833, "loss": 1.7176, "step": 52307 }, { "epoch": 1.74, "grad_norm": 0.5744948387145996, "learning_rate": 0.00022530374595210788, "loss": 1.7387, "step": 52308 }, { "epoch": 1.74, "grad_norm": 0.584412693977356, "learning_rate": 0.00022529362305264775, "loss": 1.8117, "step": 52309 }, { "epoch": 1.74, "grad_norm": 0.5688130855560303, "learning_rate": 0.00022528350024387007, "loss": 1.769, "step": 52310 }, { "epoch": 1.74, "grad_norm": 0.5578622221946716, "learning_rate": 0.0002252733775257872, "loss": 1.7559, "step": 52311 }, { "epoch": 1.74, "grad_norm": 0.5747393369674683, "learning_rate": 0.00022526325489841142, "loss": 1.7756, "step": 52312 }, { "epoch": 1.74, "grad_norm": 0.5475685596466064, "learning_rate": 0.0002252531323617552, "loss": 1.7615, "step": 52313 }, { "epoch": 1.74, "grad_norm": 0.5367522835731506, "learning_rate": 0.00022524300991583047, "loss": 1.8135, "step": 52314 }, { "epoch": 1.74, "grad_norm": 0.577081561088562, "learning_rate": 0.00022523288756064977, "loss": 1.6984, "step": 52315 }, { "epoch": 1.74, "grad_norm": 0.5607748031616211, "learning_rate": 0.0002252227652962254, "loss": 1.7534, "step": 52316 }, { "epoch": 1.74, "grad_norm": 0.5497069954872131, "learning_rate": 0.0002252126431225695, "loss": 1.7694, "step": 52317 }, { "epoch": 1.74, "grad_norm": 0.5743525624275208, "learning_rate": 0.00022520252103969447, "loss": 1.7109, "step": 52318 }, { "epoch": 1.74, "grad_norm": 0.5605083107948303, "learning_rate": 0.0002251923990476126, "loss": 1.8279, "step": 52319 }, { "epoch": 1.74, "grad_norm": 0.5552632808685303, "learning_rate": 0.00022518227714633606, "loss": 1.7346, "step": 52320 }, { "epoch": 1.74, "grad_norm": 0.5667786002159119, "learning_rate": 0.00022517215533587713, "loss": 1.7215, "step": 52321 }, { "epoch": 1.74, "grad_norm": 0.5586612224578857, "learning_rate": 0.0002251620336162483, "loss": 1.6828, "step": 52322 }, { "epoch": 1.74, "grad_norm": 0.5607314705848694, "learning_rate": 0.00022515191198746175, "loss": 1.7149, "step": 52323 }, { "epoch": 1.74, "grad_norm": 0.5674193501472473, "learning_rate": 0.00022514179044952972, "loss": 1.8065, "step": 52324 }, { "epoch": 1.74, "grad_norm": 0.5557622313499451, "learning_rate": 0.0002251316690024645, "loss": 1.7423, "step": 52325 }, { "epoch": 1.74, "grad_norm": 0.5483333468437195, "learning_rate": 0.00022512154764627848, "loss": 1.7632, "step": 52326 }, { "epoch": 1.74, "grad_norm": 0.5536736249923706, "learning_rate": 0.0002251114263809838, "loss": 1.7765, "step": 52327 }, { "epoch": 1.74, "grad_norm": 0.5666648149490356, "learning_rate": 0.00022510130520659273, "loss": 1.7559, "step": 52328 }, { "epoch": 1.74, "grad_norm": 0.5492907166481018, "learning_rate": 0.00022509118412311787, "loss": 1.7128, "step": 52329 }, { "epoch": 1.74, "grad_norm": 0.555022656917572, "learning_rate": 0.00022508106313057105, "loss": 1.6624, "step": 52330 }, { "epoch": 1.74, "grad_norm": 0.5534342527389526, "learning_rate": 0.00022507094222896487, "loss": 1.6967, "step": 52331 }, { "epoch": 1.74, "grad_norm": 0.5548280477523804, "learning_rate": 0.0002250608214183115, "loss": 1.7592, "step": 52332 }, { "epoch": 1.74, "grad_norm": 0.5594927072525024, "learning_rate": 0.00022505070069862333, "loss": 1.7333, "step": 52333 }, { "epoch": 1.74, "grad_norm": 0.5973618626594543, "learning_rate": 0.00022504058006991251, "loss": 1.7548, "step": 52334 }, { "epoch": 1.74, "grad_norm": 0.583031952381134, "learning_rate": 0.0002250304595321913, "loss": 1.7673, "step": 52335 }, { "epoch": 1.74, "grad_norm": 0.5568521022796631, "learning_rate": 0.00022502033908547228, "loss": 1.7136, "step": 52336 }, { "epoch": 1.74, "grad_norm": 0.5504276752471924, "learning_rate": 0.0002250102187297673, "loss": 1.7783, "step": 52337 }, { "epoch": 1.74, "grad_norm": 0.5535194873809814, "learning_rate": 0.00022500009846508894, "loss": 1.7786, "step": 52338 }, { "epoch": 1.74, "grad_norm": 0.5762103199958801, "learning_rate": 0.0002249899782914495, "loss": 1.7268, "step": 52339 }, { "epoch": 1.74, "grad_norm": 0.9591526985168457, "learning_rate": 0.0002249798582088611, "loss": 1.8017, "step": 52340 }, { "epoch": 1.74, "grad_norm": 0.5731534957885742, "learning_rate": 0.00022496973821733605, "loss": 1.8002, "step": 52341 }, { "epoch": 1.74, "grad_norm": 0.5762037634849548, "learning_rate": 0.00022495961831688668, "loss": 1.7802, "step": 52342 }, { "epoch": 1.74, "grad_norm": 0.5789176225662231, "learning_rate": 0.00022494949850752544, "loss": 1.7669, "step": 52343 }, { "epoch": 1.74, "grad_norm": 0.574910044670105, "learning_rate": 0.00022493937878926426, "loss": 1.7863, "step": 52344 }, { "epoch": 1.74, "grad_norm": 0.5576579570770264, "learning_rate": 0.0002249292591621157, "loss": 1.7631, "step": 52345 }, { "epoch": 1.74, "grad_norm": 0.5718863606452942, "learning_rate": 0.00022491913962609196, "loss": 1.7219, "step": 52346 }, { "epoch": 1.74, "grad_norm": 0.5321968793869019, "learning_rate": 0.00022490902018120534, "loss": 1.7464, "step": 52347 }, { "epoch": 1.74, "grad_norm": 0.5719254016876221, "learning_rate": 0.00022489890082746805, "loss": 1.7223, "step": 52348 }, { "epoch": 1.74, "grad_norm": 0.5542387962341309, "learning_rate": 0.00022488878156489252, "loss": 1.7548, "step": 52349 }, { "epoch": 1.74, "grad_norm": 0.5409507155418396, "learning_rate": 0.00022487866239349083, "loss": 1.8066, "step": 52350 }, { "epoch": 1.74, "grad_norm": 0.5676807165145874, "learning_rate": 0.00022486854331327534, "loss": 1.707, "step": 52351 }, { "epoch": 1.74, "grad_norm": 0.5742266774177551, "learning_rate": 0.00022485842432425844, "loss": 1.8094, "step": 52352 }, { "epoch": 1.74, "grad_norm": 0.560091495513916, "learning_rate": 0.00022484830542645245, "loss": 1.7558, "step": 52353 }, { "epoch": 1.74, "grad_norm": 0.5592462420463562, "learning_rate": 0.0002248381866198694, "loss": 1.7248, "step": 52354 }, { "epoch": 1.74, "grad_norm": 0.5806986689567566, "learning_rate": 0.0002248280679045218, "loss": 1.8374, "step": 52355 }, { "epoch": 1.74, "grad_norm": 0.5754531621932983, "learning_rate": 0.00022481794928042186, "loss": 1.7395, "step": 52356 }, { "epoch": 1.74, "grad_norm": 0.5442296862602234, "learning_rate": 0.00022480783074758177, "loss": 1.6916, "step": 52357 }, { "epoch": 1.74, "grad_norm": 0.5690606236457825, "learning_rate": 0.00022479771230601386, "loss": 1.6714, "step": 52358 }, { "epoch": 1.74, "grad_norm": 0.550939679145813, "learning_rate": 0.00022478759395573066, "loss": 1.7396, "step": 52359 }, { "epoch": 1.74, "grad_norm": 0.6015428304672241, "learning_rate": 0.000224777475696744, "loss": 1.8476, "step": 52360 }, { "epoch": 1.74, "grad_norm": 0.5734716653823853, "learning_rate": 0.00022476735752906648, "loss": 1.7941, "step": 52361 }, { "epoch": 1.74, "grad_norm": 0.5467731356620789, "learning_rate": 0.0002247572394527103, "loss": 1.6593, "step": 52362 }, { "epoch": 1.74, "grad_norm": 0.5507708787918091, "learning_rate": 0.00022474712146768782, "loss": 1.7744, "step": 52363 }, { "epoch": 1.74, "grad_norm": 0.5565255284309387, "learning_rate": 0.0002247370035740112, "loss": 1.757, "step": 52364 }, { "epoch": 1.74, "grad_norm": 0.546944260597229, "learning_rate": 0.0002247268857716927, "loss": 1.7023, "step": 52365 }, { "epoch": 1.74, "grad_norm": 0.5504822731018066, "learning_rate": 0.00022471676806074483, "loss": 1.7336, "step": 52366 }, { "epoch": 1.74, "grad_norm": 0.5671964287757874, "learning_rate": 0.00022470665044117951, "loss": 1.7603, "step": 52367 }, { "epoch": 1.74, "grad_norm": 0.5742889046669006, "learning_rate": 0.0002246965329130093, "loss": 1.7722, "step": 52368 }, { "epoch": 1.74, "grad_norm": 0.5569514632225037, "learning_rate": 0.00022468641547624652, "loss": 1.7323, "step": 52369 }, { "epoch": 1.74, "grad_norm": 0.562425971031189, "learning_rate": 0.00022467629813090323, "loss": 1.8186, "step": 52370 }, { "epoch": 1.74, "grad_norm": 0.5637134313583374, "learning_rate": 0.00022466618087699182, "loss": 1.7308, "step": 52371 }, { "epoch": 1.74, "grad_norm": 0.5872213840484619, "learning_rate": 0.00022465606371452449, "loss": 1.7541, "step": 52372 }, { "epoch": 1.74, "grad_norm": 0.5659999847412109, "learning_rate": 0.0002246459466435137, "loss": 1.7607, "step": 52373 }, { "epoch": 1.74, "grad_norm": 0.5455510020256042, "learning_rate": 0.0002246358296639716, "loss": 1.7072, "step": 52374 }, { "epoch": 1.74, "grad_norm": 0.5640797019004822, "learning_rate": 0.0002246257127759105, "loss": 1.7197, "step": 52375 }, { "epoch": 1.74, "grad_norm": 0.5497885346412659, "learning_rate": 0.00022461559597934273, "loss": 1.72, "step": 52376 }, { "epoch": 1.74, "grad_norm": 0.5657824873924255, "learning_rate": 0.00022460547927428048, "loss": 1.741, "step": 52377 }, { "epoch": 1.74, "grad_norm": 0.5567991137504578, "learning_rate": 0.00022459536266073604, "loss": 1.7585, "step": 52378 }, { "epoch": 1.74, "grad_norm": 0.5501180291175842, "learning_rate": 0.0002245852461387218, "loss": 1.7555, "step": 52379 }, { "epoch": 1.74, "grad_norm": 0.5674526691436768, "learning_rate": 0.00022457512970824983, "loss": 1.7272, "step": 52380 }, { "epoch": 1.74, "grad_norm": 0.5396563410758972, "learning_rate": 0.00022456501336933263, "loss": 1.7695, "step": 52381 }, { "epoch": 1.74, "grad_norm": 0.5668553113937378, "learning_rate": 0.00022455489712198235, "loss": 1.8451, "step": 52382 }, { "epoch": 1.74, "grad_norm": 0.5286353826522827, "learning_rate": 0.00022454478096621135, "loss": 1.6693, "step": 52383 }, { "epoch": 1.74, "grad_norm": 0.5724548697471619, "learning_rate": 0.00022453466490203185, "loss": 1.7613, "step": 52384 }, { "epoch": 1.74, "grad_norm": 0.5579233765602112, "learning_rate": 0.00022452454892945615, "loss": 1.8408, "step": 52385 }, { "epoch": 1.74, "grad_norm": 0.5901100039482117, "learning_rate": 0.0002245144330484966, "loss": 1.7832, "step": 52386 }, { "epoch": 1.74, "grad_norm": 0.558517575263977, "learning_rate": 0.0002245043172591652, "loss": 1.7439, "step": 52387 }, { "epoch": 1.74, "grad_norm": 0.6350926160812378, "learning_rate": 0.00022449420156147456, "loss": 1.7636, "step": 52388 }, { "epoch": 1.74, "grad_norm": 0.5685639977455139, "learning_rate": 0.0002244840859554369, "loss": 1.7498, "step": 52389 }, { "epoch": 1.74, "grad_norm": 0.5696951746940613, "learning_rate": 0.00022447397044106436, "loss": 1.7631, "step": 52390 }, { "epoch": 1.74, "grad_norm": 0.5450879335403442, "learning_rate": 0.0002244638550183693, "loss": 1.7568, "step": 52391 }, { "epoch": 1.74, "grad_norm": 0.5524365305900574, "learning_rate": 0.00022445373968736396, "loss": 1.6945, "step": 52392 }, { "epoch": 1.74, "grad_norm": 0.5596733689308167, "learning_rate": 0.00022444362444806073, "loss": 1.6874, "step": 52393 }, { "epoch": 1.74, "grad_norm": 0.5803032517433167, "learning_rate": 0.00022443350930047168, "loss": 1.7267, "step": 52394 }, { "epoch": 1.74, "grad_norm": 0.569765031337738, "learning_rate": 0.00022442339424460928, "loss": 1.7987, "step": 52395 }, { "epoch": 1.74, "grad_norm": 0.5349653959274292, "learning_rate": 0.00022441327928048583, "loss": 1.7081, "step": 52396 }, { "epoch": 1.74, "grad_norm": 0.5329999923706055, "learning_rate": 0.00022440316440811345, "loss": 1.7303, "step": 52397 }, { "epoch": 1.74, "grad_norm": 0.5443132519721985, "learning_rate": 0.00022439304962750445, "loss": 1.7741, "step": 52398 }, { "epoch": 1.74, "grad_norm": 0.5838443636894226, "learning_rate": 0.00022438293493867124, "loss": 1.7337, "step": 52399 }, { "epoch": 1.74, "grad_norm": 0.5769546627998352, "learning_rate": 0.00022437282034162595, "loss": 1.7557, "step": 52400 }, { "epoch": 1.74, "grad_norm": 0.565343976020813, "learning_rate": 0.00022436270583638083, "loss": 1.7936, "step": 52401 }, { "epoch": 1.74, "grad_norm": 0.5465288162231445, "learning_rate": 0.00022435259142294836, "loss": 1.7547, "step": 52402 }, { "epoch": 1.74, "grad_norm": 0.5602872967720032, "learning_rate": 0.0002243424771013407, "loss": 1.7545, "step": 52403 }, { "epoch": 1.74, "grad_norm": 0.5701468586921692, "learning_rate": 0.00022433236287157005, "loss": 1.7116, "step": 52404 }, { "epoch": 1.74, "grad_norm": 0.5647473931312561, "learning_rate": 0.0002243222487336488, "loss": 1.7208, "step": 52405 }, { "epoch": 1.74, "grad_norm": 0.5695194602012634, "learning_rate": 0.0002243121346875892, "loss": 1.7714, "step": 52406 }, { "epoch": 1.74, "grad_norm": 0.5619930624961853, "learning_rate": 0.00022430202073340353, "loss": 1.6616, "step": 52407 }, { "epoch": 1.74, "grad_norm": 0.5525307059288025, "learning_rate": 0.00022429190687110395, "loss": 1.7788, "step": 52408 }, { "epoch": 1.74, "grad_norm": 0.5585227608680725, "learning_rate": 0.00022428179310070303, "loss": 1.7177, "step": 52409 }, { "epoch": 1.74, "grad_norm": 0.5668081045150757, "learning_rate": 0.0002242716794222127, "loss": 1.7198, "step": 52410 }, { "epoch": 1.74, "grad_norm": 0.5583616495132446, "learning_rate": 0.00022426156583564541, "loss": 1.8304, "step": 52411 }, { "epoch": 1.74, "grad_norm": 0.5595110058784485, "learning_rate": 0.00022425145234101346, "loss": 1.7563, "step": 52412 }, { "epoch": 1.74, "grad_norm": 0.5562263131141663, "learning_rate": 0.00022424133893832911, "loss": 1.7204, "step": 52413 }, { "epoch": 1.74, "grad_norm": 0.5451006889343262, "learning_rate": 0.00022423122562760455, "loss": 1.7296, "step": 52414 }, { "epoch": 1.74, "grad_norm": 0.5795450210571289, "learning_rate": 0.0002242211124088521, "loss": 1.7995, "step": 52415 }, { "epoch": 1.74, "grad_norm": 0.5510439872741699, "learning_rate": 0.00022421099928208421, "loss": 1.7261, "step": 52416 }, { "epoch": 1.74, "grad_norm": 0.5637337565422058, "learning_rate": 0.00022420088624731282, "loss": 1.6671, "step": 52417 }, { "epoch": 1.74, "grad_norm": 0.5718388557434082, "learning_rate": 0.00022419077330455042, "loss": 1.7317, "step": 52418 }, { "epoch": 1.74, "grad_norm": 0.547398567199707, "learning_rate": 0.00022418066045380936, "loss": 1.7211, "step": 52419 }, { "epoch": 1.74, "grad_norm": 0.5765575766563416, "learning_rate": 0.00022417054769510172, "loss": 1.7298, "step": 52420 }, { "epoch": 1.74, "grad_norm": 0.555289089679718, "learning_rate": 0.00022416043502843983, "loss": 1.7348, "step": 52421 }, { "epoch": 1.74, "grad_norm": 0.5557097792625427, "learning_rate": 0.00022415032245383599, "loss": 1.8078, "step": 52422 }, { "epoch": 1.74, "grad_norm": 0.5840684175491333, "learning_rate": 0.0002241402099713026, "loss": 1.741, "step": 52423 }, { "epoch": 1.74, "grad_norm": 0.9547377228736877, "learning_rate": 0.00022413009758085175, "loss": 1.8144, "step": 52424 }, { "epoch": 1.74, "grad_norm": 0.5547963380813599, "learning_rate": 0.00022411998528249573, "loss": 1.6986, "step": 52425 }, { "epoch": 1.74, "grad_norm": 0.5421475768089294, "learning_rate": 0.00022410987307624696, "loss": 1.6952, "step": 52426 }, { "epoch": 1.74, "grad_norm": 0.5509217977523804, "learning_rate": 0.0002240997609621176, "loss": 1.6628, "step": 52427 }, { "epoch": 1.74, "grad_norm": 0.576786994934082, "learning_rate": 0.00022408964894011992, "loss": 1.679, "step": 52428 }, { "epoch": 1.74, "grad_norm": 0.552611231803894, "learning_rate": 0.00022407953701026616, "loss": 1.7634, "step": 52429 }, { "epoch": 1.74, "grad_norm": 0.5520474314689636, "learning_rate": 0.00022406942517256884, "loss": 1.7016, "step": 52430 }, { "epoch": 1.74, "grad_norm": 0.5699852705001831, "learning_rate": 0.00022405931342703986, "loss": 1.8047, "step": 52431 }, { "epoch": 1.74, "grad_norm": 0.562619149684906, "learning_rate": 0.00022404920177369176, "loss": 1.7309, "step": 52432 }, { "epoch": 1.74, "grad_norm": 0.5653335452079773, "learning_rate": 0.00022403909021253678, "loss": 1.7611, "step": 52433 }, { "epoch": 1.74, "grad_norm": 0.5552979707717896, "learning_rate": 0.00022402897874358713, "loss": 1.7895, "step": 52434 }, { "epoch": 1.74, "grad_norm": 0.5566204786300659, "learning_rate": 0.00022401886736685508, "loss": 1.8307, "step": 52435 }, { "epoch": 1.74, "grad_norm": 0.5347487926483154, "learning_rate": 0.00022400875608235296, "loss": 1.6714, "step": 52436 }, { "epoch": 1.74, "grad_norm": 0.5512114763259888, "learning_rate": 0.000223998644890093, "loss": 1.715, "step": 52437 }, { "epoch": 1.74, "grad_norm": 0.5562455058097839, "learning_rate": 0.0002239885337900874, "loss": 1.796, "step": 52438 }, { "epoch": 1.74, "grad_norm": 0.5700522661209106, "learning_rate": 0.00022397842278234858, "loss": 1.7817, "step": 52439 }, { "epoch": 1.74, "grad_norm": 0.5498622059822083, "learning_rate": 0.00022396831186688882, "loss": 1.7879, "step": 52440 }, { "epoch": 1.74, "grad_norm": 0.5506370067596436, "learning_rate": 0.00022395820104372029, "loss": 1.7483, "step": 52441 }, { "epoch": 1.74, "grad_norm": 0.5846616625785828, "learning_rate": 0.00022394809031285528, "loss": 1.787, "step": 52442 }, { "epoch": 1.74, "grad_norm": 0.5469422936439514, "learning_rate": 0.00022393797967430612, "loss": 1.7487, "step": 52443 }, { "epoch": 1.74, "grad_norm": 0.5491272211074829, "learning_rate": 0.00022392786912808503, "loss": 1.7372, "step": 52444 }, { "epoch": 1.74, "grad_norm": 0.558559238910675, "learning_rate": 0.0002239177586742042, "loss": 1.7933, "step": 52445 }, { "epoch": 1.74, "grad_norm": 0.5638786554336548, "learning_rate": 0.0002239076483126762, "loss": 1.7542, "step": 52446 }, { "epoch": 1.74, "grad_norm": 0.54677414894104, "learning_rate": 0.0002238975380435129, "loss": 1.7271, "step": 52447 }, { "epoch": 1.74, "grad_norm": 0.5483716726303101, "learning_rate": 0.00022388742786672686, "loss": 1.7774, "step": 52448 }, { "epoch": 1.74, "grad_norm": 0.5593528747558594, "learning_rate": 0.00022387731778233027, "loss": 1.7685, "step": 52449 }, { "epoch": 1.75, "grad_norm": 0.5442951917648315, "learning_rate": 0.0002238672077903354, "loss": 1.6998, "step": 52450 }, { "epoch": 1.75, "grad_norm": 0.5483371615409851, "learning_rate": 0.00022385709789075455, "loss": 1.6779, "step": 52451 }, { "epoch": 1.75, "grad_norm": 0.5442413091659546, "learning_rate": 0.0002238469880835999, "loss": 1.6843, "step": 52452 }, { "epoch": 1.75, "grad_norm": 1.5785943269729614, "learning_rate": 0.0002238368783688839, "loss": 1.8316, "step": 52453 }, { "epoch": 1.75, "grad_norm": 0.5380905270576477, "learning_rate": 0.00022382676874661858, "loss": 1.7173, "step": 52454 }, { "epoch": 1.75, "grad_norm": 0.5717686414718628, "learning_rate": 0.00022381665921681636, "loss": 1.7235, "step": 52455 }, { "epoch": 1.75, "grad_norm": 0.5542157888412476, "learning_rate": 0.0002238065497794896, "loss": 1.6668, "step": 52456 }, { "epoch": 1.75, "grad_norm": 0.5707089900970459, "learning_rate": 0.00022379644043465038, "loss": 1.7591, "step": 52457 }, { "epoch": 1.75, "grad_norm": 0.5750434994697571, "learning_rate": 0.00022378633118231103, "loss": 1.7391, "step": 52458 }, { "epoch": 1.75, "grad_norm": 0.5655393600463867, "learning_rate": 0.00022377622202248378, "loss": 1.7119, "step": 52459 }, { "epoch": 1.75, "grad_norm": 0.5640551447868347, "learning_rate": 0.00022376611295518118, "loss": 1.7242, "step": 52460 }, { "epoch": 1.75, "grad_norm": 0.5742132067680359, "learning_rate": 0.0002237560039804151, "loss": 1.7105, "step": 52461 }, { "epoch": 1.75, "grad_norm": 0.5656759738922119, "learning_rate": 0.00022374589509819806, "loss": 1.7811, "step": 52462 }, { "epoch": 1.75, "grad_norm": 0.572516143321991, "learning_rate": 0.0002237357863085423, "loss": 1.704, "step": 52463 }, { "epoch": 1.75, "grad_norm": 0.558660089969635, "learning_rate": 0.00022372567761146003, "loss": 1.7557, "step": 52464 }, { "epoch": 1.75, "grad_norm": 0.5811558961868286, "learning_rate": 0.0002237155690069635, "loss": 1.7445, "step": 52465 }, { "epoch": 1.75, "grad_norm": 0.6029375195503235, "learning_rate": 0.00022370546049506516, "loss": 1.714, "step": 52466 }, { "epoch": 1.75, "grad_norm": 0.5727952718734741, "learning_rate": 0.00022369535207577703, "loss": 1.741, "step": 52467 }, { "epoch": 1.75, "grad_norm": 0.5516225099563599, "learning_rate": 0.00022368524374911144, "loss": 1.6903, "step": 52468 }, { "epoch": 1.75, "grad_norm": 0.5671817660331726, "learning_rate": 0.00022367513551508085, "loss": 1.8134, "step": 52469 }, { "epoch": 1.75, "grad_norm": 0.5611957907676697, "learning_rate": 0.0002236650273736974, "loss": 1.7357, "step": 52470 }, { "epoch": 1.75, "grad_norm": 0.5495127439498901, "learning_rate": 0.0002236549193249733, "loss": 1.7177, "step": 52471 }, { "epoch": 1.75, "grad_norm": 0.5562340021133423, "learning_rate": 0.00022364481136892092, "loss": 1.7007, "step": 52472 }, { "epoch": 1.75, "grad_norm": 0.5591086745262146, "learning_rate": 0.0002236347035055525, "loss": 1.7113, "step": 52473 }, { "epoch": 1.75, "grad_norm": 0.5362736582756042, "learning_rate": 0.00022362459573488025, "loss": 1.7651, "step": 52474 }, { "epoch": 1.75, "grad_norm": 0.540341317653656, "learning_rate": 0.00022361448805691643, "loss": 1.7841, "step": 52475 }, { "epoch": 1.75, "grad_norm": 0.5788633823394775, "learning_rate": 0.00022360438047167359, "loss": 1.733, "step": 52476 }, { "epoch": 1.75, "grad_norm": 0.5913865566253662, "learning_rate": 0.00022359427297916353, "loss": 1.7137, "step": 52477 }, { "epoch": 1.75, "grad_norm": 0.5421547293663025, "learning_rate": 0.00022358416557939886, "loss": 1.6735, "step": 52478 }, { "epoch": 1.75, "grad_norm": 0.5653882622718811, "learning_rate": 0.00022357405827239175, "loss": 1.7315, "step": 52479 }, { "epoch": 1.75, "grad_norm": 0.542322039604187, "learning_rate": 0.0002235639510581545, "loss": 1.7396, "step": 52480 }, { "epoch": 1.75, "grad_norm": 0.5558228492736816, "learning_rate": 0.00022355384393669934, "loss": 1.7915, "step": 52481 }, { "epoch": 1.75, "grad_norm": 0.5534772872924805, "learning_rate": 0.00022354373690803848, "loss": 1.769, "step": 52482 }, { "epoch": 1.75, "grad_norm": 0.5524948835372925, "learning_rate": 0.00022353362997218438, "loss": 1.7327, "step": 52483 }, { "epoch": 1.75, "grad_norm": 0.5705835819244385, "learning_rate": 0.0002235235231291491, "loss": 1.674, "step": 52484 }, { "epoch": 1.75, "grad_norm": 0.5555174350738525, "learning_rate": 0.000223513416378945, "loss": 1.8392, "step": 52485 }, { "epoch": 1.75, "grad_norm": 0.5838526487350464, "learning_rate": 0.0002235033097215844, "loss": 1.7083, "step": 52486 }, { "epoch": 1.75, "grad_norm": 0.5788313150405884, "learning_rate": 0.00022349320315707947, "loss": 1.8142, "step": 52487 }, { "epoch": 1.75, "grad_norm": 0.5691397190093994, "learning_rate": 0.0002234830966854425, "loss": 1.801, "step": 52488 }, { "epoch": 1.75, "grad_norm": 0.5499383807182312, "learning_rate": 0.00022347299030668575, "loss": 1.7538, "step": 52489 }, { "epoch": 1.75, "grad_norm": 0.5637235045433044, "learning_rate": 0.0002234628840208216, "loss": 1.7337, "step": 52490 }, { "epoch": 1.75, "grad_norm": 0.5756587386131287, "learning_rate": 0.0002234527778278622, "loss": 1.8118, "step": 52491 }, { "epoch": 1.75, "grad_norm": 0.5763654708862305, "learning_rate": 0.0002234426717278198, "loss": 1.7741, "step": 52492 }, { "epoch": 1.75, "grad_norm": 0.5590721964836121, "learning_rate": 0.00022343256572070682, "loss": 1.7909, "step": 52493 }, { "epoch": 1.75, "grad_norm": 0.5646155476570129, "learning_rate": 0.00022342245980653535, "loss": 1.7519, "step": 52494 }, { "epoch": 1.75, "grad_norm": 0.5405763387680054, "learning_rate": 0.00022341235398531774, "loss": 1.7615, "step": 52495 }, { "epoch": 1.75, "grad_norm": 0.5428594946861267, "learning_rate": 0.00022340224825706629, "loss": 1.7337, "step": 52496 }, { "epoch": 1.75, "grad_norm": 0.5706335306167603, "learning_rate": 0.0002233921426217931, "loss": 1.6978, "step": 52497 }, { "epoch": 1.75, "grad_norm": 0.5606546401977539, "learning_rate": 0.00022338203707951063, "loss": 1.766, "step": 52498 }, { "epoch": 1.75, "grad_norm": 0.579049825668335, "learning_rate": 0.0002233719316302311, "loss": 1.8101, "step": 52499 }, { "epoch": 1.75, "grad_norm": 0.5602087378501892, "learning_rate": 0.00022336182627396683, "loss": 1.6843, "step": 52500 }, { "epoch": 1.75, "grad_norm": 0.561995804309845, "learning_rate": 0.00022335172101072988, "loss": 1.7752, "step": 52501 }, { "epoch": 1.75, "grad_norm": 0.5631802678108215, "learning_rate": 0.00022334161584053268, "loss": 1.7305, "step": 52502 }, { "epoch": 1.75, "grad_norm": 0.5427907109260559, "learning_rate": 0.00022333151076338756, "loss": 1.7092, "step": 52503 }, { "epoch": 1.75, "grad_norm": 0.5633659958839417, "learning_rate": 0.00022332140577930652, "loss": 1.7636, "step": 52504 }, { "epoch": 1.75, "grad_norm": 0.5649205446243286, "learning_rate": 0.00022331130088830206, "loss": 1.6721, "step": 52505 }, { "epoch": 1.75, "grad_norm": 0.5676253437995911, "learning_rate": 0.00022330119609038646, "loss": 1.6876, "step": 52506 }, { "epoch": 1.75, "grad_norm": 0.5478671193122864, "learning_rate": 0.00022329109138557185, "loss": 1.7289, "step": 52507 }, { "epoch": 1.75, "grad_norm": 0.5574340224266052, "learning_rate": 0.00022328098677387053, "loss": 1.7218, "step": 52508 }, { "epoch": 1.75, "grad_norm": 0.5519748330116272, "learning_rate": 0.00022327088225529472, "loss": 1.7707, "step": 52509 }, { "epoch": 1.75, "grad_norm": 0.5441171526908875, "learning_rate": 0.00022326077782985697, "loss": 1.7587, "step": 52510 }, { "epoch": 1.75, "grad_norm": 0.584060788154602, "learning_rate": 0.00022325067349756916, "loss": 1.7545, "step": 52511 }, { "epoch": 1.75, "grad_norm": 0.5677643418312073, "learning_rate": 0.00022324056925844375, "loss": 1.832, "step": 52512 }, { "epoch": 1.75, "grad_norm": 0.553461492061615, "learning_rate": 0.00022323046511249302, "loss": 1.7336, "step": 52513 }, { "epoch": 1.75, "grad_norm": 0.5515953898429871, "learning_rate": 0.0002232203610597292, "loss": 1.7317, "step": 52514 }, { "epoch": 1.75, "grad_norm": 0.5584122538566589, "learning_rate": 0.00022321025710016448, "loss": 1.7708, "step": 52515 }, { "epoch": 1.75, "grad_norm": 0.5564799308776855, "learning_rate": 0.00022320015323381128, "loss": 1.693, "step": 52516 }, { "epoch": 1.75, "grad_norm": 0.5649417638778687, "learning_rate": 0.00022319004946068173, "loss": 1.672, "step": 52517 }, { "epoch": 1.75, "grad_norm": 0.5802289247512817, "learning_rate": 0.00022317994578078805, "loss": 1.7399, "step": 52518 }, { "epoch": 1.75, "grad_norm": 0.5583866238594055, "learning_rate": 0.0002231698421941427, "loss": 1.7551, "step": 52519 }, { "epoch": 1.75, "grad_norm": 0.5590459704399109, "learning_rate": 0.00022315973870075788, "loss": 1.857, "step": 52520 }, { "epoch": 1.75, "grad_norm": 0.568143904209137, "learning_rate": 0.00022314963530064576, "loss": 1.748, "step": 52521 }, { "epoch": 1.75, "grad_norm": 0.571757972240448, "learning_rate": 0.00022313953199381865, "loss": 1.7295, "step": 52522 }, { "epoch": 1.75, "grad_norm": 0.5627021193504333, "learning_rate": 0.00022312942878028891, "loss": 1.7041, "step": 52523 }, { "epoch": 1.75, "grad_norm": 0.553677499294281, "learning_rate": 0.00022311932566006863, "loss": 1.7572, "step": 52524 }, { "epoch": 1.75, "grad_norm": 0.5453357696533203, "learning_rate": 0.00022310922263317007, "loss": 1.6953, "step": 52525 }, { "epoch": 1.75, "grad_norm": 0.578179657459259, "learning_rate": 0.00022309911969960584, "loss": 1.7487, "step": 52526 }, { "epoch": 1.75, "grad_norm": 0.567771315574646, "learning_rate": 0.00022308901685938774, "loss": 1.7749, "step": 52527 }, { "epoch": 1.75, "grad_norm": 0.5629246234893799, "learning_rate": 0.00022307891411252827, "loss": 1.7163, "step": 52528 }, { "epoch": 1.75, "grad_norm": 0.5563037991523743, "learning_rate": 0.00022306881145903968, "loss": 1.6565, "step": 52529 }, { "epoch": 1.75, "grad_norm": 0.5472627282142639, "learning_rate": 0.0002230587088989343, "loss": 1.7175, "step": 52530 }, { "epoch": 1.75, "grad_norm": 0.5780500173568726, "learning_rate": 0.00022304860643222423, "loss": 1.765, "step": 52531 }, { "epoch": 1.75, "grad_norm": 0.5721786022186279, "learning_rate": 0.00022303850405892175, "loss": 1.6945, "step": 52532 }, { "epoch": 1.75, "grad_norm": 0.5565494894981384, "learning_rate": 0.00022302840177903938, "loss": 1.73, "step": 52533 }, { "epoch": 1.75, "grad_norm": 0.5585100054740906, "learning_rate": 0.00022301829959258898, "loss": 1.7167, "step": 52534 }, { "epoch": 1.75, "grad_norm": 0.5761417746543884, "learning_rate": 0.00022300819749958314, "loss": 1.7923, "step": 52535 }, { "epoch": 1.75, "grad_norm": 0.5601763129234314, "learning_rate": 0.000222998095500034, "loss": 1.7647, "step": 52536 }, { "epoch": 1.75, "grad_norm": 0.5546115040779114, "learning_rate": 0.00022298799359395382, "loss": 1.7335, "step": 52537 }, { "epoch": 1.75, "grad_norm": 0.5653970837593079, "learning_rate": 0.0002229778917813548, "loss": 1.7184, "step": 52538 }, { "epoch": 1.75, "grad_norm": 0.5535023808479309, "learning_rate": 0.0002229677900622493, "loss": 1.84, "step": 52539 }, { "epoch": 1.75, "grad_norm": 0.5702625513076782, "learning_rate": 0.00022295768843664967, "loss": 1.7344, "step": 52540 }, { "epoch": 1.75, "grad_norm": 0.5868515372276306, "learning_rate": 0.0002229475869045679, "loss": 1.79, "step": 52541 }, { "epoch": 1.75, "grad_norm": 0.5622718930244446, "learning_rate": 0.00022293748546601644, "loss": 1.706, "step": 52542 }, { "epoch": 1.75, "grad_norm": 0.6097672581672668, "learning_rate": 0.0002229273841210076, "loss": 1.7129, "step": 52543 }, { "epoch": 1.75, "grad_norm": 0.5945091843605042, "learning_rate": 0.0002229172828695535, "loss": 1.7568, "step": 52544 }, { "epoch": 1.75, "grad_norm": 0.5706674456596375, "learning_rate": 0.00022290718171166644, "loss": 1.7407, "step": 52545 }, { "epoch": 1.75, "grad_norm": 0.5601142048835754, "learning_rate": 0.00022289708064735878, "loss": 1.721, "step": 52546 }, { "epoch": 1.75, "grad_norm": 0.6043761372566223, "learning_rate": 0.00022288697967664267, "loss": 1.7269, "step": 52547 }, { "epoch": 1.75, "grad_norm": 0.5622905492782593, "learning_rate": 0.0002228768787995303, "loss": 1.7821, "step": 52548 }, { "epoch": 1.75, "grad_norm": 0.6252680420875549, "learning_rate": 0.00022286677801603412, "loss": 1.7718, "step": 52549 }, { "epoch": 1.75, "grad_norm": 0.5742687582969666, "learning_rate": 0.00022285667732616635, "loss": 1.7527, "step": 52550 }, { "epoch": 1.75, "grad_norm": 0.5558571815490723, "learning_rate": 0.00022284657672993916, "loss": 1.7615, "step": 52551 }, { "epoch": 1.75, "grad_norm": 0.579240620136261, "learning_rate": 0.00022283647622736488, "loss": 1.7942, "step": 52552 }, { "epoch": 1.75, "grad_norm": 0.5714614391326904, "learning_rate": 0.00022282637581845575, "loss": 1.6839, "step": 52553 }, { "epoch": 1.75, "grad_norm": 0.5890133380889893, "learning_rate": 0.00022281627550322402, "loss": 1.7021, "step": 52554 }, { "epoch": 1.75, "grad_norm": 0.5846062898635864, "learning_rate": 0.00022280617528168186, "loss": 1.7278, "step": 52555 }, { "epoch": 1.75, "grad_norm": 0.5841264724731445, "learning_rate": 0.0002227960751538417, "loss": 1.805, "step": 52556 }, { "epoch": 1.75, "grad_norm": 0.5876021981239319, "learning_rate": 0.00022278597511971582, "loss": 1.7348, "step": 52557 }, { "epoch": 1.75, "grad_norm": 0.5754939913749695, "learning_rate": 0.00022277587517931629, "loss": 1.7243, "step": 52558 }, { "epoch": 1.75, "grad_norm": 0.5692420601844788, "learning_rate": 0.00022276577533265546, "loss": 1.7038, "step": 52559 }, { "epoch": 1.75, "grad_norm": 0.5651823282241821, "learning_rate": 0.00022275567557974573, "loss": 1.6871, "step": 52560 }, { "epoch": 1.75, "grad_norm": 0.5655723214149475, "learning_rate": 0.00022274557592059913, "loss": 1.7304, "step": 52561 }, { "epoch": 1.75, "grad_norm": 0.5820937156677246, "learning_rate": 0.00022273547635522791, "loss": 1.7608, "step": 52562 }, { "epoch": 1.75, "grad_norm": 0.5751036405563354, "learning_rate": 0.0002227253768836447, "loss": 1.7419, "step": 52563 }, { "epoch": 1.75, "grad_norm": 0.5748395919799805, "learning_rate": 0.00022271527750586127, "loss": 1.7657, "step": 52564 }, { "epoch": 1.75, "grad_norm": 0.5916486978530884, "learning_rate": 0.00022270517822189017, "loss": 1.7639, "step": 52565 }, { "epoch": 1.75, "grad_norm": 0.5944066047668457, "learning_rate": 0.00022269507903174357, "loss": 1.8306, "step": 52566 }, { "epoch": 1.75, "grad_norm": 0.55239337682724, "learning_rate": 0.00022268497993543386, "loss": 1.7424, "step": 52567 }, { "epoch": 1.75, "grad_norm": 0.5440301299095154, "learning_rate": 0.0002226748809329731, "loss": 1.6584, "step": 52568 }, { "epoch": 1.75, "grad_norm": 0.5980315208435059, "learning_rate": 0.00022266478202437358, "loss": 1.7116, "step": 52569 }, { "epoch": 1.75, "grad_norm": 0.5634970664978027, "learning_rate": 0.00022265468320964785, "loss": 1.7475, "step": 52570 }, { "epoch": 1.75, "grad_norm": 0.5818617939949036, "learning_rate": 0.0002226445844888077, "loss": 1.7618, "step": 52571 }, { "epoch": 1.75, "grad_norm": 0.558586597442627, "learning_rate": 0.0002226344858618657, "loss": 1.7816, "step": 52572 }, { "epoch": 1.75, "grad_norm": 0.578014075756073, "learning_rate": 0.00022262438732883414, "loss": 1.7301, "step": 52573 }, { "epoch": 1.75, "grad_norm": 0.5670999884605408, "learning_rate": 0.00022261428888972507, "loss": 1.8091, "step": 52574 }, { "epoch": 1.75, "grad_norm": 0.5496091842651367, "learning_rate": 0.00022260419054455084, "loss": 1.6582, "step": 52575 }, { "epoch": 1.75, "grad_norm": 0.5393154621124268, "learning_rate": 0.00022259409229332365, "loss": 1.7576, "step": 52576 }, { "epoch": 1.75, "grad_norm": 0.5840243697166443, "learning_rate": 0.00022258399413605608, "loss": 1.7449, "step": 52577 }, { "epoch": 1.75, "grad_norm": 0.532490074634552, "learning_rate": 0.00022257389607275988, "loss": 1.7338, "step": 52578 }, { "epoch": 1.75, "grad_norm": 0.5366800427436829, "learning_rate": 0.00022256379810344767, "loss": 1.7052, "step": 52579 }, { "epoch": 1.75, "grad_norm": 0.5472609996795654, "learning_rate": 0.00022255370022813162, "loss": 1.6567, "step": 52580 }, { "epoch": 1.75, "grad_norm": 0.5863897800445557, "learning_rate": 0.00022254360244682394, "loss": 1.7679, "step": 52581 }, { "epoch": 1.75, "grad_norm": 0.5554313659667969, "learning_rate": 0.0002225335047595369, "loss": 1.7279, "step": 52582 }, { "epoch": 1.75, "grad_norm": 0.5550022125244141, "learning_rate": 0.00022252340716628284, "loss": 1.7617, "step": 52583 }, { "epoch": 1.75, "grad_norm": 0.5498954057693481, "learning_rate": 0.00022251330966707386, "loss": 1.7396, "step": 52584 }, { "epoch": 1.75, "grad_norm": 0.5437328815460205, "learning_rate": 0.00022250321226192224, "loss": 1.7114, "step": 52585 }, { "epoch": 1.75, "grad_norm": 0.5871968269348145, "learning_rate": 0.0002224931149508404, "loss": 1.7589, "step": 52586 }, { "epoch": 1.75, "grad_norm": 0.5422536730766296, "learning_rate": 0.00022248301773384054, "loss": 1.7344, "step": 52587 }, { "epoch": 1.75, "grad_norm": 0.552985668182373, "learning_rate": 0.00022247292061093482, "loss": 1.6934, "step": 52588 }, { "epoch": 1.75, "grad_norm": 0.5570551753044128, "learning_rate": 0.00022246282358213555, "loss": 1.7449, "step": 52589 }, { "epoch": 1.75, "grad_norm": 0.5681732892990112, "learning_rate": 0.0002224527266474551, "loss": 1.7276, "step": 52590 }, { "epoch": 1.75, "grad_norm": 0.567906379699707, "learning_rate": 0.00022244262980690544, "loss": 1.7393, "step": 52591 }, { "epoch": 1.75, "grad_norm": 0.5559841394424438, "learning_rate": 0.000222432533060499, "loss": 1.7119, "step": 52592 }, { "epoch": 1.75, "grad_norm": 0.5476827025413513, "learning_rate": 0.00022242243640824822, "loss": 1.7622, "step": 52593 }, { "epoch": 1.75, "grad_norm": 0.5448691844940186, "learning_rate": 0.00022241233985016503, "loss": 1.7769, "step": 52594 }, { "epoch": 1.75, "grad_norm": 0.562263011932373, "learning_rate": 0.00022240224338626182, "loss": 1.6791, "step": 52595 }, { "epoch": 1.75, "grad_norm": 0.5667158961296082, "learning_rate": 0.0002223921470165509, "loss": 1.8124, "step": 52596 }, { "epoch": 1.75, "grad_norm": 0.5606674551963806, "learning_rate": 0.0002223820507410445, "loss": 1.7378, "step": 52597 }, { "epoch": 1.75, "grad_norm": 0.5761696696281433, "learning_rate": 0.00022237195455975482, "loss": 1.7511, "step": 52598 }, { "epoch": 1.75, "grad_norm": 0.5696858763694763, "learning_rate": 0.00022236185847269408, "loss": 1.7021, "step": 52599 }, { "epoch": 1.75, "grad_norm": 0.5410240888595581, "learning_rate": 0.00022235176247987473, "loss": 1.8039, "step": 52600 }, { "epoch": 1.75, "grad_norm": 0.5412880182266235, "learning_rate": 0.00022234166658130882, "loss": 1.7306, "step": 52601 }, { "epoch": 1.75, "grad_norm": 0.5752592086791992, "learning_rate": 0.00022233157077700867, "loss": 1.6744, "step": 52602 }, { "epoch": 1.75, "grad_norm": 0.5588482618331909, "learning_rate": 0.00022232147506698666, "loss": 1.7823, "step": 52603 }, { "epoch": 1.75, "grad_norm": 0.5604219436645508, "learning_rate": 0.00022231137945125483, "loss": 1.7445, "step": 52604 }, { "epoch": 1.75, "grad_norm": 0.5606533288955688, "learning_rate": 0.0002223012839298256, "loss": 1.7507, "step": 52605 }, { "epoch": 1.75, "grad_norm": 0.5566132068634033, "learning_rate": 0.00022229118850271106, "loss": 1.709, "step": 52606 }, { "epoch": 1.75, "grad_norm": 0.5680105686187744, "learning_rate": 0.00022228109316992366, "loss": 1.7948, "step": 52607 }, { "epoch": 1.75, "grad_norm": 0.5652137994766235, "learning_rate": 0.00022227099793147554, "loss": 1.6901, "step": 52608 }, { "epoch": 1.75, "grad_norm": 0.5678727030754089, "learning_rate": 0.00022226090278737898, "loss": 1.7014, "step": 52609 }, { "epoch": 1.75, "grad_norm": 0.5490948557853699, "learning_rate": 0.00022225080773764628, "loss": 1.741, "step": 52610 }, { "epoch": 1.75, "grad_norm": 0.5615748167037964, "learning_rate": 0.00022224071278228959, "loss": 1.7542, "step": 52611 }, { "epoch": 1.75, "grad_norm": 0.5571508407592773, "learning_rate": 0.0002222306179213212, "loss": 1.7517, "step": 52612 }, { "epoch": 1.75, "grad_norm": 0.5598406195640564, "learning_rate": 0.00022222052315475348, "loss": 1.7652, "step": 52613 }, { "epoch": 1.75, "grad_norm": 0.5566858053207397, "learning_rate": 0.0002222104284825984, "loss": 1.7167, "step": 52614 }, { "epoch": 1.75, "grad_norm": 0.5759467482566833, "learning_rate": 0.0002222003339048685, "loss": 1.6825, "step": 52615 }, { "epoch": 1.75, "grad_norm": 0.5520846247673035, "learning_rate": 0.00022219023942157592, "loss": 1.736, "step": 52616 }, { "epoch": 1.75, "grad_norm": 0.5659406781196594, "learning_rate": 0.000222180145032733, "loss": 1.7838, "step": 52617 }, { "epoch": 1.75, "grad_norm": 0.5444150567054749, "learning_rate": 0.0002221700507383518, "loss": 1.6546, "step": 52618 }, { "epoch": 1.75, "grad_norm": 0.5708430409431458, "learning_rate": 0.00022215995653844476, "loss": 1.7756, "step": 52619 }, { "epoch": 1.75, "grad_norm": 0.5711545944213867, "learning_rate": 0.00022214986243302407, "loss": 1.7186, "step": 52620 }, { "epoch": 1.75, "grad_norm": 0.5738155245780945, "learning_rate": 0.0002221397684221019, "loss": 1.7824, "step": 52621 }, { "epoch": 1.75, "grad_norm": 0.5460374355316162, "learning_rate": 0.00022212967450569067, "loss": 1.7101, "step": 52622 }, { "epoch": 1.75, "grad_norm": 0.5576373934745789, "learning_rate": 0.00022211958068380253, "loss": 1.7222, "step": 52623 }, { "epoch": 1.75, "grad_norm": 0.5264050960540771, "learning_rate": 0.00022210948695644973, "loss": 1.69, "step": 52624 }, { "epoch": 1.75, "grad_norm": 0.5664354562759399, "learning_rate": 0.00022209939332364448, "loss": 1.7402, "step": 52625 }, { "epoch": 1.75, "grad_norm": 0.5495779514312744, "learning_rate": 0.00022208929978539905, "loss": 1.7482, "step": 52626 }, { "epoch": 1.75, "grad_norm": 1.1865657567977905, "learning_rate": 0.00022207920634172596, "loss": 1.7638, "step": 52627 }, { "epoch": 1.75, "grad_norm": 0.5949307084083557, "learning_rate": 0.000222069112992637, "loss": 1.7695, "step": 52628 }, { "epoch": 1.75, "grad_norm": 0.566423773765564, "learning_rate": 0.00022205901973814475, "loss": 1.7062, "step": 52629 }, { "epoch": 1.75, "grad_norm": 0.5546125173568726, "learning_rate": 0.0002220489265782614, "loss": 1.8315, "step": 52630 }, { "epoch": 1.75, "grad_norm": 0.5672116279602051, "learning_rate": 0.0002220388335129991, "loss": 1.7302, "step": 52631 }, { "epoch": 1.75, "grad_norm": 0.564354658126831, "learning_rate": 0.00022202874054237022, "loss": 1.7418, "step": 52632 }, { "epoch": 1.75, "grad_norm": 0.578601598739624, "learning_rate": 0.00022201864766638698, "loss": 1.771, "step": 52633 }, { "epoch": 1.75, "grad_norm": 0.5621334314346313, "learning_rate": 0.00022200855488506156, "loss": 1.7601, "step": 52634 }, { "epoch": 1.75, "grad_norm": 0.5590117573738098, "learning_rate": 0.00022199846219840617, "loss": 1.7917, "step": 52635 }, { "epoch": 1.75, "grad_norm": 0.5515950918197632, "learning_rate": 0.00022198836960643327, "loss": 1.8073, "step": 52636 }, { "epoch": 1.75, "grad_norm": 0.5809829831123352, "learning_rate": 0.00022197827710915502, "loss": 1.7163, "step": 52637 }, { "epoch": 1.75, "grad_norm": 0.5606433749198914, "learning_rate": 0.0002219681847065836, "loss": 1.7498, "step": 52638 }, { "epoch": 1.75, "grad_norm": 0.5564442873001099, "learning_rate": 0.0002219580923987313, "loss": 1.7726, "step": 52639 }, { "epoch": 1.75, "grad_norm": 0.5692948698997498, "learning_rate": 0.00022194800018561042, "loss": 1.6921, "step": 52640 }, { "epoch": 1.75, "grad_norm": 0.5462283492088318, "learning_rate": 0.00022193790806723318, "loss": 1.6932, "step": 52641 }, { "epoch": 1.75, "grad_norm": 0.5591286420822144, "learning_rate": 0.00022192781604361166, "loss": 1.7174, "step": 52642 }, { "epoch": 1.75, "grad_norm": 0.5700663328170776, "learning_rate": 0.00022191772411475852, "loss": 1.7225, "step": 52643 }, { "epoch": 1.75, "grad_norm": 0.5673047304153442, "learning_rate": 0.00022190763228068552, "loss": 1.7973, "step": 52644 }, { "epoch": 1.75, "grad_norm": 0.5333486795425415, "learning_rate": 0.00022189754054140524, "loss": 1.6942, "step": 52645 }, { "epoch": 1.75, "grad_norm": 0.5535867810249329, "learning_rate": 0.00022188744889692984, "loss": 1.7161, "step": 52646 }, { "epoch": 1.75, "grad_norm": 0.5621904730796814, "learning_rate": 0.0002218773573472716, "loss": 1.7305, "step": 52647 }, { "epoch": 1.75, "grad_norm": 0.5634559392929077, "learning_rate": 0.00022186726589244272, "loss": 1.7777, "step": 52648 }, { "epoch": 1.75, "grad_norm": 0.5582148432731628, "learning_rate": 0.00022185717453245538, "loss": 1.7065, "step": 52649 }, { "epoch": 1.75, "grad_norm": 0.5771487951278687, "learning_rate": 0.0002218470832673221, "loss": 1.7515, "step": 52650 }, { "epoch": 1.75, "grad_norm": 0.5570768713951111, "learning_rate": 0.00022183699209705479, "loss": 1.6402, "step": 52651 }, { "epoch": 1.75, "grad_norm": 0.5669731497764587, "learning_rate": 0.0002218269010216659, "loss": 1.7774, "step": 52652 }, { "epoch": 1.75, "grad_norm": 0.5624088048934937, "learning_rate": 0.0002218168100411677, "loss": 1.7296, "step": 52653 }, { "epoch": 1.75, "grad_norm": 0.5713001489639282, "learning_rate": 0.00022180671915557226, "loss": 1.6818, "step": 52654 }, { "epoch": 1.75, "grad_norm": 0.5558249354362488, "learning_rate": 0.00022179662836489198, "loss": 1.6969, "step": 52655 }, { "epoch": 1.75, "grad_norm": 0.5630596280097961, "learning_rate": 0.00022178653766913902, "loss": 1.7259, "step": 52656 }, { "epoch": 1.75, "grad_norm": 0.5694417357444763, "learning_rate": 0.00022177644706832588, "loss": 1.7614, "step": 52657 }, { "epoch": 1.75, "grad_norm": 0.5738039612770081, "learning_rate": 0.0002217663565624644, "loss": 1.7716, "step": 52658 }, { "epoch": 1.75, "grad_norm": 0.5861682295799255, "learning_rate": 0.00022175626615156708, "loss": 1.8077, "step": 52659 }, { "epoch": 1.75, "grad_norm": 0.5482121706008911, "learning_rate": 0.0002217461758356462, "loss": 1.7559, "step": 52660 }, { "epoch": 1.75, "grad_norm": 0.5572178363800049, "learning_rate": 0.00022173608561471388, "loss": 1.7878, "step": 52661 }, { "epoch": 1.75, "grad_norm": 0.5753841400146484, "learning_rate": 0.00022172599548878237, "loss": 1.8117, "step": 52662 }, { "epoch": 1.75, "grad_norm": 0.5511540174484253, "learning_rate": 0.00022171590545786408, "loss": 1.7359, "step": 52663 }, { "epoch": 1.75, "grad_norm": 0.5659512281417847, "learning_rate": 0.0002217058155219711, "loss": 1.7116, "step": 52664 }, { "epoch": 1.75, "grad_norm": 0.5547504425048828, "learning_rate": 0.00022169572568111565, "loss": 1.7228, "step": 52665 }, { "epoch": 1.75, "grad_norm": 0.5467748641967773, "learning_rate": 0.00022168563593531006, "loss": 1.7192, "step": 52666 }, { "epoch": 1.75, "grad_norm": 0.5532140135765076, "learning_rate": 0.0002216755462845667, "loss": 1.7735, "step": 52667 }, { "epoch": 1.75, "grad_norm": 0.5437707901000977, "learning_rate": 0.00022166545672889758, "loss": 1.7154, "step": 52668 }, { "epoch": 1.75, "grad_norm": 0.5408949851989746, "learning_rate": 0.0002216553672683151, "loss": 1.7639, "step": 52669 }, { "epoch": 1.75, "grad_norm": 0.5796299576759338, "learning_rate": 0.00022164527790283147, "loss": 1.7273, "step": 52670 }, { "epoch": 1.75, "grad_norm": 0.5653471946716309, "learning_rate": 0.0002216351886324589, "loss": 1.664, "step": 52671 }, { "epoch": 1.75, "grad_norm": 0.5518624782562256, "learning_rate": 0.0002216250994572096, "loss": 1.7236, "step": 52672 }, { "epoch": 1.75, "grad_norm": 0.5536937713623047, "learning_rate": 0.0002216150103770959, "loss": 1.6328, "step": 52673 }, { "epoch": 1.75, "grad_norm": 0.5502486824989319, "learning_rate": 0.00022160492139213013, "loss": 1.7856, "step": 52674 }, { "epoch": 1.75, "grad_norm": 0.570315957069397, "learning_rate": 0.00022159483250232438, "loss": 1.7745, "step": 52675 }, { "epoch": 1.75, "grad_norm": 0.5883567929267883, "learning_rate": 0.00022158474370769096, "loss": 1.7401, "step": 52676 }, { "epoch": 1.75, "grad_norm": 0.5860214233398438, "learning_rate": 0.0002215746550082421, "loss": 1.6839, "step": 52677 }, { "epoch": 1.75, "grad_norm": 0.5597760677337646, "learning_rate": 0.00022156456640399006, "loss": 1.7907, "step": 52678 }, { "epoch": 1.75, "grad_norm": 0.5712671279907227, "learning_rate": 0.000221554477894947, "loss": 1.7238, "step": 52679 }, { "epoch": 1.75, "grad_norm": 0.5948998332023621, "learning_rate": 0.00022154438948112542, "loss": 1.7216, "step": 52680 }, { "epoch": 1.75, "grad_norm": 0.5728771686553955, "learning_rate": 0.00022153430116253722, "loss": 1.83, "step": 52681 }, { "epoch": 1.75, "grad_norm": 0.5688039064407349, "learning_rate": 0.00022152421293919486, "loss": 1.7254, "step": 52682 }, { "epoch": 1.75, "grad_norm": 0.5591832995414734, "learning_rate": 0.00022151412481111058, "loss": 1.6842, "step": 52683 }, { "epoch": 1.75, "grad_norm": 0.5588542222976685, "learning_rate": 0.0002215040367782966, "loss": 1.6739, "step": 52684 }, { "epoch": 1.75, "grad_norm": 0.5735726356506348, "learning_rate": 0.00022149394884076512, "loss": 1.7407, "step": 52685 }, { "epoch": 1.75, "grad_norm": 0.5603712797164917, "learning_rate": 0.00022148386099852833, "loss": 1.6824, "step": 52686 }, { "epoch": 1.75, "grad_norm": 0.5533040165901184, "learning_rate": 0.00022147377325159882, "loss": 1.6895, "step": 52687 }, { "epoch": 1.75, "grad_norm": 0.5688694715499878, "learning_rate": 0.0002214636855999883, "loss": 1.789, "step": 52688 }, { "epoch": 1.75, "grad_norm": 0.5734622478485107, "learning_rate": 0.00022145359804370943, "loss": 1.8099, "step": 52689 }, { "epoch": 1.75, "grad_norm": 0.5934885144233704, "learning_rate": 0.00022144351058277432, "loss": 1.7763, "step": 52690 }, { "epoch": 1.75, "grad_norm": 0.5680502653121948, "learning_rate": 0.00022143342321719522, "loss": 1.7607, "step": 52691 }, { "epoch": 1.75, "grad_norm": 0.5877279043197632, "learning_rate": 0.00022142333594698429, "loss": 1.6559, "step": 52692 }, { "epoch": 1.75, "grad_norm": 0.5609264969825745, "learning_rate": 0.00022141324877215387, "loss": 1.6642, "step": 52693 }, { "epoch": 1.75, "grad_norm": 0.5616547465324402, "learning_rate": 0.00022140316169271633, "loss": 1.8258, "step": 52694 }, { "epoch": 1.75, "grad_norm": 0.5682710409164429, "learning_rate": 0.00022139307470868356, "loss": 1.7542, "step": 52695 }, { "epoch": 1.75, "grad_norm": 0.5991782546043396, "learning_rate": 0.00022138298782006812, "loss": 1.8195, "step": 52696 }, { "epoch": 1.75, "grad_norm": 0.5519226789474487, "learning_rate": 0.0002213729010268822, "loss": 1.7566, "step": 52697 }, { "epoch": 1.75, "grad_norm": 0.561387836933136, "learning_rate": 0.0002213628143291379, "loss": 1.7531, "step": 52698 }, { "epoch": 1.75, "grad_norm": 0.6070218086242676, "learning_rate": 0.0002213527277268476, "loss": 1.6924, "step": 52699 }, { "epoch": 1.75, "grad_norm": 0.5825493335723877, "learning_rate": 0.00022134264122002355, "loss": 1.7197, "step": 52700 }, { "epoch": 1.75, "grad_norm": 0.576290488243103, "learning_rate": 0.0002213325548086779, "loss": 1.693, "step": 52701 }, { "epoch": 1.75, "grad_norm": 0.5635651350021362, "learning_rate": 0.00022132246849282283, "loss": 1.7006, "step": 52702 }, { "epoch": 1.75, "grad_norm": 0.561754047870636, "learning_rate": 0.00022131238227247082, "loss": 1.7046, "step": 52703 }, { "epoch": 1.75, "grad_norm": 0.5654571652412415, "learning_rate": 0.00022130229614763394, "loss": 1.7105, "step": 52704 }, { "epoch": 1.75, "grad_norm": 0.5674179196357727, "learning_rate": 0.0002212922101183245, "loss": 1.7472, "step": 52705 }, { "epoch": 1.75, "grad_norm": 0.5685908794403076, "learning_rate": 0.0002212821241845547, "loss": 1.7623, "step": 52706 }, { "epoch": 1.75, "grad_norm": 0.5537695288658142, "learning_rate": 0.00022127203834633688, "loss": 1.6457, "step": 52707 }, { "epoch": 1.75, "grad_norm": 0.5756018161773682, "learning_rate": 0.00022126195260368313, "loss": 1.7442, "step": 52708 }, { "epoch": 1.75, "grad_norm": 0.5508803129196167, "learning_rate": 0.0002212518669566057, "loss": 1.7168, "step": 52709 }, { "epoch": 1.75, "grad_norm": 0.5677745342254639, "learning_rate": 0.00022124178140511708, "loss": 1.7909, "step": 52710 }, { "epoch": 1.75, "grad_norm": 0.5541406273841858, "learning_rate": 0.00022123169594922914, "loss": 1.7944, "step": 52711 }, { "epoch": 1.75, "grad_norm": 0.546942949295044, "learning_rate": 0.0002212216105889544, "loss": 1.8407, "step": 52712 }, { "epoch": 1.75, "grad_norm": 0.560712456703186, "learning_rate": 0.000221211525324305, "loss": 1.7793, "step": 52713 }, { "epoch": 1.75, "grad_norm": 0.582331657409668, "learning_rate": 0.0002212014401552933, "loss": 1.7817, "step": 52714 }, { "epoch": 1.75, "grad_norm": 0.5833765864372253, "learning_rate": 0.00022119135508193134, "loss": 1.7798, "step": 52715 }, { "epoch": 1.75, "grad_norm": 0.5610985159873962, "learning_rate": 0.0002211812701042314, "loss": 1.7357, "step": 52716 }, { "epoch": 1.75, "grad_norm": 0.5510309338569641, "learning_rate": 0.00022117118522220591, "loss": 1.7126, "step": 52717 }, { "epoch": 1.75, "grad_norm": 0.5637003779411316, "learning_rate": 0.00022116110043586697, "loss": 1.707, "step": 52718 }, { "epoch": 1.75, "grad_norm": 0.5617135167121887, "learning_rate": 0.0002211510157452268, "loss": 1.723, "step": 52719 }, { "epoch": 1.75, "grad_norm": 0.5558468103408813, "learning_rate": 0.00022114093115029775, "loss": 1.7465, "step": 52720 }, { "epoch": 1.75, "grad_norm": 0.5549415349960327, "learning_rate": 0.00022113084665109192, "loss": 1.7001, "step": 52721 }, { "epoch": 1.75, "grad_norm": 0.5720219016075134, "learning_rate": 0.00022112076224762163, "loss": 1.7431, "step": 52722 }, { "epoch": 1.75, "grad_norm": 0.5625739097595215, "learning_rate": 0.00022111067793989902, "loss": 1.7407, "step": 52723 }, { "epoch": 1.75, "grad_norm": 0.5733197331428528, "learning_rate": 0.00022110059372793656, "loss": 1.734, "step": 52724 }, { "epoch": 1.75, "grad_norm": 0.5804368853569031, "learning_rate": 0.00022109050961174632, "loss": 1.736, "step": 52725 }, { "epoch": 1.75, "grad_norm": 0.5743905901908875, "learning_rate": 0.00022108042559134056, "loss": 1.7208, "step": 52726 }, { "epoch": 1.75, "grad_norm": 0.5636065602302551, "learning_rate": 0.00022107034166673158, "loss": 1.7871, "step": 52727 }, { "epoch": 1.75, "grad_norm": 2.422193765640259, "learning_rate": 0.00022106025783793156, "loss": 1.7764, "step": 52728 }, { "epoch": 1.75, "grad_norm": 0.5940567851066589, "learning_rate": 0.0002210501741049527, "loss": 1.7106, "step": 52729 }, { "epoch": 1.75, "grad_norm": 0.5455796718597412, "learning_rate": 0.00022104009046780738, "loss": 1.7189, "step": 52730 }, { "epoch": 1.75, "grad_norm": 0.5643779039382935, "learning_rate": 0.00022103000692650764, "loss": 1.8082, "step": 52731 }, { "epoch": 1.75, "grad_norm": 0.5411801934242249, "learning_rate": 0.0002210199234810659, "loss": 1.6807, "step": 52732 }, { "epoch": 1.75, "grad_norm": 0.5657994747161865, "learning_rate": 0.00022100984013149433, "loss": 1.8028, "step": 52733 }, { "epoch": 1.75, "grad_norm": 0.5547535419464111, "learning_rate": 0.0002209997568778052, "loss": 1.7873, "step": 52734 }, { "epoch": 1.75, "grad_norm": 0.5229403376579285, "learning_rate": 0.00022098967372001074, "loss": 1.7207, "step": 52735 }, { "epoch": 1.75, "grad_norm": 0.5696253180503845, "learning_rate": 0.0002209795906581231, "loss": 1.775, "step": 52736 }, { "epoch": 1.75, "grad_norm": 0.5692737698554993, "learning_rate": 0.00022096950769215468, "loss": 1.7339, "step": 52737 }, { "epoch": 1.75, "grad_norm": 0.5770893096923828, "learning_rate": 0.00022095942482211752, "loss": 1.795, "step": 52738 }, { "epoch": 1.75, "grad_norm": 0.5650463104248047, "learning_rate": 0.00022094934204802406, "loss": 1.8231, "step": 52739 }, { "epoch": 1.75, "grad_norm": 0.5766975283622742, "learning_rate": 0.00022093925936988646, "loss": 1.745, "step": 52740 }, { "epoch": 1.75, "grad_norm": 0.5688456892967224, "learning_rate": 0.0002209291767877169, "loss": 1.806, "step": 52741 }, { "epoch": 1.75, "grad_norm": 0.5702866911888123, "learning_rate": 0.0002209190943015277, "loss": 1.7577, "step": 52742 }, { "epoch": 1.75, "grad_norm": 0.5481510162353516, "learning_rate": 0.00022090901191133098, "loss": 1.6681, "step": 52743 }, { "epoch": 1.75, "grad_norm": 0.57419753074646, "learning_rate": 0.00022089892961713926, "loss": 1.6715, "step": 52744 }, { "epoch": 1.75, "grad_norm": 1.1481596231460571, "learning_rate": 0.00022088884741896438, "loss": 1.775, "step": 52745 }, { "epoch": 1.75, "grad_norm": 0.5882630348205566, "learning_rate": 0.00022087876531681888, "loss": 1.8244, "step": 52746 }, { "epoch": 1.75, "grad_norm": 0.5614525079727173, "learning_rate": 0.00022086868331071496, "loss": 1.6608, "step": 52747 }, { "epoch": 1.75, "grad_norm": 0.5568708181381226, "learning_rate": 0.0002208586014006647, "loss": 1.7046, "step": 52748 }, { "epoch": 1.75, "grad_norm": 0.5636999011039734, "learning_rate": 0.00022084851958668043, "loss": 1.7138, "step": 52749 }, { "epoch": 1.75, "grad_norm": 0.5560986399650574, "learning_rate": 0.00022083843786877453, "loss": 1.7532, "step": 52750 }, { "epoch": 1.76, "grad_norm": 0.5760109424591064, "learning_rate": 0.00022082835624695897, "loss": 1.7248, "step": 52751 }, { "epoch": 1.76, "grad_norm": 0.5687297582626343, "learning_rate": 0.00022081827472124612, "loss": 1.7367, "step": 52752 }, { "epoch": 1.76, "grad_norm": 0.559843122959137, "learning_rate": 0.00022080819329164822, "loss": 1.7132, "step": 52753 }, { "epoch": 1.76, "grad_norm": 0.5640305280685425, "learning_rate": 0.0002207981119581776, "loss": 1.7192, "step": 52754 }, { "epoch": 1.76, "grad_norm": 0.5657153725624084, "learning_rate": 0.0002207880307208463, "loss": 1.6856, "step": 52755 }, { "epoch": 1.76, "grad_norm": 0.5769081115722656, "learning_rate": 0.00022077794957966673, "loss": 1.7749, "step": 52756 }, { "epoch": 1.76, "grad_norm": 0.5588663816452026, "learning_rate": 0.00022076786853465107, "loss": 1.6756, "step": 52757 }, { "epoch": 1.76, "grad_norm": 0.5683198571205139, "learning_rate": 0.00022075778758581146, "loss": 1.7261, "step": 52758 }, { "epoch": 1.76, "grad_norm": 0.6092416048049927, "learning_rate": 0.00022074770673316023, "loss": 1.7687, "step": 52759 }, { "epoch": 1.76, "grad_norm": 0.5532069802284241, "learning_rate": 0.00022073762597670974, "loss": 1.7283, "step": 52760 }, { "epoch": 1.76, "grad_norm": 0.546222984790802, "learning_rate": 0.0002207275453164719, "loss": 1.7696, "step": 52761 }, { "epoch": 1.76, "grad_norm": 0.5513536334037781, "learning_rate": 0.00022071746475245922, "loss": 1.7463, "step": 52762 }, { "epoch": 1.76, "grad_norm": 0.5395718812942505, "learning_rate": 0.00022070738428468385, "loss": 1.738, "step": 52763 }, { "epoch": 1.76, "grad_norm": 0.5557855367660522, "learning_rate": 0.0002206973039131581, "loss": 1.7417, "step": 52764 }, { "epoch": 1.76, "grad_norm": 0.5706793665885925, "learning_rate": 0.0002206872236378941, "loss": 1.7978, "step": 52765 }, { "epoch": 1.76, "grad_norm": 0.5737337470054626, "learning_rate": 0.00022067714345890403, "loss": 1.8321, "step": 52766 }, { "epoch": 1.76, "grad_norm": 0.5681381225585938, "learning_rate": 0.00022066706337620039, "loss": 1.7421, "step": 52767 }, { "epoch": 1.76, "grad_norm": 0.5461312532424927, "learning_rate": 0.0002206569833897951, "loss": 1.7623, "step": 52768 }, { "epoch": 1.76, "grad_norm": 0.5485028624534607, "learning_rate": 0.00022064690349970058, "loss": 1.6838, "step": 52769 }, { "epoch": 1.76, "grad_norm": 0.5591670870780945, "learning_rate": 0.0002206368237059291, "loss": 1.7339, "step": 52770 }, { "epoch": 1.76, "grad_norm": 0.5622618794441223, "learning_rate": 0.00022062674400849278, "loss": 1.8278, "step": 52771 }, { "epoch": 1.76, "grad_norm": 0.5510522723197937, "learning_rate": 0.00022061666440740385, "loss": 1.663, "step": 52772 }, { "epoch": 1.76, "grad_norm": 0.5499540567398071, "learning_rate": 0.00022060658490267456, "loss": 1.7002, "step": 52773 }, { "epoch": 1.76, "grad_norm": 0.552973747253418, "learning_rate": 0.00022059650549431737, "loss": 1.6732, "step": 52774 }, { "epoch": 1.76, "grad_norm": 0.5486284494400024, "learning_rate": 0.00022058642618234413, "loss": 1.7628, "step": 52775 }, { "epoch": 1.76, "grad_norm": 0.5552600622177124, "learning_rate": 0.00022057634696676733, "loss": 1.6892, "step": 52776 }, { "epoch": 1.76, "grad_norm": 0.5646688938140869, "learning_rate": 0.00022056626784759922, "loss": 1.7167, "step": 52777 }, { "epoch": 1.76, "grad_norm": 0.5904563069343567, "learning_rate": 0.00022055618882485187, "loss": 1.7629, "step": 52778 }, { "epoch": 1.76, "grad_norm": 0.5522554516792297, "learning_rate": 0.00022054610989853758, "loss": 1.6626, "step": 52779 }, { "epoch": 1.76, "grad_norm": 0.5675451755523682, "learning_rate": 0.00022053603106866868, "loss": 1.719, "step": 52780 }, { "epoch": 1.76, "grad_norm": 0.5632173418998718, "learning_rate": 0.0002205259523352573, "loss": 1.7408, "step": 52781 }, { "epoch": 1.76, "grad_norm": 0.5703911781311035, "learning_rate": 0.0002205158736983156, "loss": 1.7418, "step": 52782 }, { "epoch": 1.76, "grad_norm": 0.5904793739318848, "learning_rate": 0.00022050579515785603, "loss": 1.7544, "step": 52783 }, { "epoch": 1.76, "grad_norm": 0.5620431900024414, "learning_rate": 0.00022049571671389076, "loss": 1.7496, "step": 52784 }, { "epoch": 1.76, "grad_norm": 0.5442214608192444, "learning_rate": 0.0002204856383664319, "loss": 1.7283, "step": 52785 }, { "epoch": 1.76, "grad_norm": 0.561586856842041, "learning_rate": 0.00022047556011549175, "loss": 1.7404, "step": 52786 }, { "epoch": 1.76, "grad_norm": 0.5836759209632874, "learning_rate": 0.00022046548196108263, "loss": 1.8222, "step": 52787 }, { "epoch": 1.76, "grad_norm": 0.5730969905853271, "learning_rate": 0.00022045540390321663, "loss": 1.7422, "step": 52788 }, { "epoch": 1.76, "grad_norm": 0.5518857836723328, "learning_rate": 0.00022044532594190597, "loss": 1.7892, "step": 52789 }, { "epoch": 1.76, "grad_norm": 0.5610007047653198, "learning_rate": 0.0002204352480771632, "loss": 1.7257, "step": 52790 }, { "epoch": 1.76, "grad_norm": 0.5610657930374146, "learning_rate": 0.0002204251703090001, "loss": 1.7007, "step": 52791 }, { "epoch": 1.76, "grad_norm": 0.563666045665741, "learning_rate": 0.00022041509263742915, "loss": 1.7257, "step": 52792 }, { "epoch": 1.76, "grad_norm": 0.5645656585693359, "learning_rate": 0.0002204050150624626, "loss": 1.7021, "step": 52793 }, { "epoch": 1.76, "grad_norm": 0.5661416053771973, "learning_rate": 0.00022039493758411267, "loss": 1.751, "step": 52794 }, { "epoch": 1.76, "grad_norm": 0.5794844627380371, "learning_rate": 0.00022038486020239148, "loss": 1.7232, "step": 52795 }, { "epoch": 1.76, "grad_norm": 0.558345377445221, "learning_rate": 0.0002203747829173113, "loss": 1.7365, "step": 52796 }, { "epoch": 1.76, "grad_norm": 0.5709189176559448, "learning_rate": 0.00022036470572888459, "loss": 1.7431, "step": 52797 }, { "epoch": 1.76, "grad_norm": 0.5441118478775024, "learning_rate": 0.00022035462863712318, "loss": 1.7429, "step": 52798 }, { "epoch": 1.76, "grad_norm": 0.5653161406517029, "learning_rate": 0.00022034455164203956, "loss": 1.7166, "step": 52799 }, { "epoch": 1.76, "grad_norm": 0.5742078423500061, "learning_rate": 0.000220334474743646, "loss": 1.7597, "step": 52800 }, { "epoch": 1.76, "grad_norm": 0.5713816285133362, "learning_rate": 0.00022032439794195466, "loss": 1.764, "step": 52801 }, { "epoch": 1.76, "grad_norm": 0.5496088862419128, "learning_rate": 0.00022031432123697768, "loss": 1.7377, "step": 52802 }, { "epoch": 1.76, "grad_norm": 0.5622116327285767, "learning_rate": 0.00022030424462872735, "loss": 1.8297, "step": 52803 }, { "epoch": 1.76, "grad_norm": 0.5720327496528625, "learning_rate": 0.0002202941681172161, "loss": 1.6735, "step": 52804 }, { "epoch": 1.76, "grad_norm": 0.5997210144996643, "learning_rate": 0.00022028409170245578, "loss": 1.7345, "step": 52805 }, { "epoch": 1.76, "grad_norm": 0.582279622554779, "learning_rate": 0.0002202740153844589, "loss": 1.7149, "step": 52806 }, { "epoch": 1.76, "grad_norm": 0.5579678416252136, "learning_rate": 0.0002202639391632377, "loss": 1.7596, "step": 52807 }, { "epoch": 1.76, "grad_norm": 0.5486326813697815, "learning_rate": 0.00022025386303880425, "loss": 1.8605, "step": 52808 }, { "epoch": 1.76, "grad_norm": 0.5748746395111084, "learning_rate": 0.00022024378701117084, "loss": 1.7505, "step": 52809 }, { "epoch": 1.76, "grad_norm": 0.5656048655509949, "learning_rate": 0.00022023371108034973, "loss": 1.7806, "step": 52810 }, { "epoch": 1.76, "grad_norm": 0.5602602362632751, "learning_rate": 0.00022022363524635324, "loss": 1.7063, "step": 52811 }, { "epoch": 1.76, "grad_norm": 0.5469576716423035, "learning_rate": 0.00022021355950919337, "loss": 1.6843, "step": 52812 }, { "epoch": 1.76, "grad_norm": 0.5878297090530396, "learning_rate": 0.00022020348386888252, "loss": 1.7068, "step": 52813 }, { "epoch": 1.76, "grad_norm": 0.5645439028739929, "learning_rate": 0.00022019340832543297, "loss": 1.7085, "step": 52814 }, { "epoch": 1.76, "grad_norm": 0.5505819916725159, "learning_rate": 0.00022018333287885677, "loss": 1.7115, "step": 52815 }, { "epoch": 1.76, "grad_norm": 0.5580061078071594, "learning_rate": 0.00022017325752916625, "loss": 1.7176, "step": 52816 }, { "epoch": 1.76, "grad_norm": 0.5671122074127197, "learning_rate": 0.00022016318227637374, "loss": 1.7917, "step": 52817 }, { "epoch": 1.76, "grad_norm": 0.5594314336776733, "learning_rate": 0.00022015310712049127, "loss": 1.7616, "step": 52818 }, { "epoch": 1.76, "grad_norm": 0.5778989195823669, "learning_rate": 0.0002201430320615311, "loss": 1.7495, "step": 52819 }, { "epoch": 1.76, "grad_norm": 0.5526840090751648, "learning_rate": 0.00022013295709950558, "loss": 1.7768, "step": 52820 }, { "epoch": 1.76, "grad_norm": 0.5882656574249268, "learning_rate": 0.0002201228822344269, "loss": 1.7099, "step": 52821 }, { "epoch": 1.76, "grad_norm": 0.5926162004470825, "learning_rate": 0.0002201128074663073, "loss": 1.8293, "step": 52822 }, { "epoch": 1.76, "grad_norm": 0.5725292563438416, "learning_rate": 0.00022010273279515893, "loss": 1.7395, "step": 52823 }, { "epoch": 1.76, "grad_norm": 0.5622099041938782, "learning_rate": 0.00022009265822099414, "loss": 1.8499, "step": 52824 }, { "epoch": 1.76, "grad_norm": 0.5730171203613281, "learning_rate": 0.000220082583743825, "loss": 1.8193, "step": 52825 }, { "epoch": 1.76, "grad_norm": 0.5540552735328674, "learning_rate": 0.00022007250936366383, "loss": 1.7773, "step": 52826 }, { "epoch": 1.76, "grad_norm": 0.583288848400116, "learning_rate": 0.00022006243508052294, "loss": 1.7358, "step": 52827 }, { "epoch": 1.76, "grad_norm": 0.5956892371177673, "learning_rate": 0.00022005236089441444, "loss": 1.724, "step": 52828 }, { "epoch": 1.76, "grad_norm": 0.5529762506484985, "learning_rate": 0.00022004228680535054, "loss": 1.742, "step": 52829 }, { "epoch": 1.76, "grad_norm": 0.5592114925384521, "learning_rate": 0.00022003221281334355, "loss": 1.6912, "step": 52830 }, { "epoch": 1.76, "grad_norm": 0.5807710289955139, "learning_rate": 0.00022002213891840572, "loss": 1.7705, "step": 52831 }, { "epoch": 1.76, "grad_norm": 0.5623620748519897, "learning_rate": 0.0002200120651205492, "loss": 1.7729, "step": 52832 }, { "epoch": 1.76, "grad_norm": 0.584782063961029, "learning_rate": 0.00022000199141978616, "loss": 1.7497, "step": 52833 }, { "epoch": 1.76, "grad_norm": 0.5711170434951782, "learning_rate": 0.00021999191781612903, "loss": 1.7225, "step": 52834 }, { "epoch": 1.76, "grad_norm": 0.5958042740821838, "learning_rate": 0.00021998184430958991, "loss": 1.7202, "step": 52835 }, { "epoch": 1.76, "grad_norm": 0.5994269251823425, "learning_rate": 0.00021997177090018102, "loss": 1.8187, "step": 52836 }, { "epoch": 1.76, "grad_norm": 0.6244206428527832, "learning_rate": 0.00021996169758791464, "loss": 1.7492, "step": 52837 }, { "epoch": 1.76, "grad_norm": 0.577428936958313, "learning_rate": 0.00021995162437280294, "loss": 1.7876, "step": 52838 }, { "epoch": 1.76, "grad_norm": 0.5503688454627991, "learning_rate": 0.00021994155125485817, "loss": 1.7513, "step": 52839 }, { "epoch": 1.76, "grad_norm": 0.5848000049591064, "learning_rate": 0.00021993147823409247, "loss": 1.7414, "step": 52840 }, { "epoch": 1.76, "grad_norm": 0.5829916596412659, "learning_rate": 0.00021992140531051829, "loss": 1.7704, "step": 52841 }, { "epoch": 1.76, "grad_norm": 0.5974644422531128, "learning_rate": 0.0002199113324841477, "loss": 1.7417, "step": 52842 }, { "epoch": 1.76, "grad_norm": 0.596912145614624, "learning_rate": 0.00021990125975499293, "loss": 1.7414, "step": 52843 }, { "epoch": 1.76, "grad_norm": 0.564751386642456, "learning_rate": 0.00021989118712306625, "loss": 1.8119, "step": 52844 }, { "epoch": 1.76, "grad_norm": 0.5778619050979614, "learning_rate": 0.00021988111458837986, "loss": 1.7366, "step": 52845 }, { "epoch": 1.76, "grad_norm": 0.554091215133667, "learning_rate": 0.00021987104215094596, "loss": 1.7689, "step": 52846 }, { "epoch": 1.76, "grad_norm": 0.5885397791862488, "learning_rate": 0.0002198609698107769, "loss": 1.7216, "step": 52847 }, { "epoch": 1.76, "grad_norm": 0.5849214792251587, "learning_rate": 0.00021985089756788463, "loss": 1.7524, "step": 52848 }, { "epoch": 1.76, "grad_norm": 0.5641008615493774, "learning_rate": 0.00021984082542228167, "loss": 1.7702, "step": 52849 }, { "epoch": 1.76, "grad_norm": 0.5700339674949646, "learning_rate": 0.00021983075337398016, "loss": 1.7621, "step": 52850 }, { "epoch": 1.76, "grad_norm": 0.5765859484672546, "learning_rate": 0.00021982068142299233, "loss": 1.7716, "step": 52851 }, { "epoch": 1.76, "grad_norm": 0.5752660036087036, "learning_rate": 0.00021981060956933029, "loss": 1.7875, "step": 52852 }, { "epoch": 1.76, "grad_norm": 0.5618433356285095, "learning_rate": 0.00021980053781300634, "loss": 1.7448, "step": 52853 }, { "epoch": 1.76, "grad_norm": 0.5591868162155151, "learning_rate": 0.0002197904661540329, "loss": 1.7501, "step": 52854 }, { "epoch": 1.76, "grad_norm": 0.563031017780304, "learning_rate": 0.00021978039459242182, "loss": 1.7157, "step": 52855 }, { "epoch": 1.76, "grad_norm": 0.5645389556884766, "learning_rate": 0.00021977032312818558, "loss": 1.7265, "step": 52856 }, { "epoch": 1.76, "grad_norm": 0.5712410807609558, "learning_rate": 0.0002197602517613364, "loss": 1.729, "step": 52857 }, { "epoch": 1.76, "grad_norm": 0.5381956696510315, "learning_rate": 0.00021975018049188644, "loss": 1.7554, "step": 52858 }, { "epoch": 1.76, "grad_norm": 0.596121609210968, "learning_rate": 0.00021974010931984786, "loss": 1.7917, "step": 52859 }, { "epoch": 1.76, "grad_norm": 0.5584797263145447, "learning_rate": 0.00021973003824523295, "loss": 1.6896, "step": 52860 }, { "epoch": 1.76, "grad_norm": 0.6007225513458252, "learning_rate": 0.00021971996726805415, "loss": 1.7119, "step": 52861 }, { "epoch": 1.76, "grad_norm": 0.5541271567344666, "learning_rate": 0.00021970989638832325, "loss": 1.7509, "step": 52862 }, { "epoch": 1.76, "grad_norm": 0.5749210119247437, "learning_rate": 0.00021969982560605277, "loss": 1.6948, "step": 52863 }, { "epoch": 1.76, "grad_norm": 0.5412297248840332, "learning_rate": 0.00021968975492125495, "loss": 1.7178, "step": 52864 }, { "epoch": 1.76, "grad_norm": 0.5578084588050842, "learning_rate": 0.00021967968433394189, "loss": 1.7882, "step": 52865 }, { "epoch": 1.76, "grad_norm": 0.5395693778991699, "learning_rate": 0.0002196696138441258, "loss": 1.7164, "step": 52866 }, { "epoch": 1.76, "grad_norm": 0.5527278780937195, "learning_rate": 0.00021965954345181911, "loss": 1.6826, "step": 52867 }, { "epoch": 1.76, "grad_norm": 0.5737935900688171, "learning_rate": 0.00021964947315703377, "loss": 1.6951, "step": 52868 }, { "epoch": 1.76, "grad_norm": 0.5859190225601196, "learning_rate": 0.00021963940295978207, "loss": 1.7486, "step": 52869 }, { "epoch": 1.76, "grad_norm": 0.5666712522506714, "learning_rate": 0.00021962933286007636, "loss": 1.7949, "step": 52870 }, { "epoch": 1.76, "grad_norm": 0.5657163858413696, "learning_rate": 0.0002196192628579289, "loss": 1.6907, "step": 52871 }, { "epoch": 1.76, "grad_norm": 0.5671404600143433, "learning_rate": 0.00021960919295335173, "loss": 1.6927, "step": 52872 }, { "epoch": 1.76, "grad_norm": 0.5572801828384399, "learning_rate": 0.0002195991231463571, "loss": 1.692, "step": 52873 }, { "epoch": 1.76, "grad_norm": 0.5596802234649658, "learning_rate": 0.00021958905343695743, "loss": 1.6693, "step": 52874 }, { "epoch": 1.76, "grad_norm": 0.5540332198143005, "learning_rate": 0.0002195789838251647, "loss": 1.6834, "step": 52875 }, { "epoch": 1.76, "grad_norm": 0.5469491481781006, "learning_rate": 0.00021956891431099116, "loss": 1.6822, "step": 52876 }, { "epoch": 1.76, "grad_norm": 0.5455426573753357, "learning_rate": 0.0002195588448944493, "loss": 1.7172, "step": 52877 }, { "epoch": 1.76, "grad_norm": 0.5697252154350281, "learning_rate": 0.00021954877557555099, "loss": 1.688, "step": 52878 }, { "epoch": 1.76, "grad_norm": 0.5535866022109985, "learning_rate": 0.00021953870635430867, "loss": 1.7076, "step": 52879 }, { "epoch": 1.76, "grad_norm": 0.5912778973579407, "learning_rate": 0.0002195286372307345, "loss": 1.6958, "step": 52880 }, { "epoch": 1.76, "grad_norm": 0.5665357112884521, "learning_rate": 0.00021951856820484074, "loss": 1.7207, "step": 52881 }, { "epoch": 1.76, "grad_norm": 0.5679354071617126, "learning_rate": 0.00021950849927663955, "loss": 1.808, "step": 52882 }, { "epoch": 1.76, "grad_norm": 0.5522183775901794, "learning_rate": 0.0002194984304461431, "loss": 1.7467, "step": 52883 }, { "epoch": 1.76, "grad_norm": 0.5417225360870361, "learning_rate": 0.0002194883617133639, "loss": 1.7312, "step": 52884 }, { "epoch": 1.76, "grad_norm": 0.5489906668663025, "learning_rate": 0.00021947829307831376, "loss": 1.7704, "step": 52885 }, { "epoch": 1.76, "grad_norm": 0.5661196112632751, "learning_rate": 0.00021946822454100515, "loss": 1.7363, "step": 52886 }, { "epoch": 1.76, "grad_norm": 0.5644164085388184, "learning_rate": 0.00021945815610145038, "loss": 1.7569, "step": 52887 }, { "epoch": 1.76, "grad_norm": 0.5789999961853027, "learning_rate": 0.00021944808775966145, "loss": 1.6875, "step": 52888 }, { "epoch": 1.76, "grad_norm": 0.5633158087730408, "learning_rate": 0.00021943801951565065, "loss": 1.8665, "step": 52889 }, { "epoch": 1.76, "grad_norm": 0.5564867258071899, "learning_rate": 0.0002194279513694302, "loss": 1.7053, "step": 52890 }, { "epoch": 1.76, "grad_norm": 0.5478361248970032, "learning_rate": 0.0002194178833210125, "loss": 1.7636, "step": 52891 }, { "epoch": 1.76, "grad_norm": 0.5669894814491272, "learning_rate": 0.00021940781537040942, "loss": 1.7874, "step": 52892 }, { "epoch": 1.76, "grad_norm": 0.5573274493217468, "learning_rate": 0.00021939774751763343, "loss": 1.7036, "step": 52893 }, { "epoch": 1.76, "grad_norm": 0.5661652088165283, "learning_rate": 0.00021938767976269682, "loss": 1.7437, "step": 52894 }, { "epoch": 1.76, "grad_norm": 0.5645703077316284, "learning_rate": 0.00021937761210561157, "loss": 1.7494, "step": 52895 }, { "epoch": 1.76, "grad_norm": 0.5724974274635315, "learning_rate": 0.00021936754454639006, "loss": 1.7806, "step": 52896 }, { "epoch": 1.76, "grad_norm": 0.5462504029273987, "learning_rate": 0.00021935747708504453, "loss": 1.761, "step": 52897 }, { "epoch": 1.76, "grad_norm": 0.5641133189201355, "learning_rate": 0.00021934740972158703, "loss": 1.7501, "step": 52898 }, { "epoch": 1.76, "grad_norm": 0.5672893524169922, "learning_rate": 0.00021933734245602987, "loss": 1.7384, "step": 52899 }, { "epoch": 1.76, "grad_norm": 0.5735712647438049, "learning_rate": 0.00021932727528838533, "loss": 1.768, "step": 52900 }, { "epoch": 1.76, "grad_norm": 0.5606706142425537, "learning_rate": 0.00021931720821866569, "loss": 1.6887, "step": 52901 }, { "epoch": 1.76, "grad_norm": 0.5779650211334229, "learning_rate": 0.00021930714124688297, "loss": 1.7435, "step": 52902 }, { "epoch": 1.76, "grad_norm": 0.5517889857292175, "learning_rate": 0.00021929707437304955, "loss": 1.8172, "step": 52903 }, { "epoch": 1.76, "grad_norm": 0.5527046322822571, "learning_rate": 0.0002192870075971776, "loss": 1.7834, "step": 52904 }, { "epoch": 1.76, "grad_norm": 0.5473384261131287, "learning_rate": 0.00021927694091927928, "loss": 1.6993, "step": 52905 }, { "epoch": 1.76, "grad_norm": 0.5582682490348816, "learning_rate": 0.0002192668743393668, "loss": 1.8136, "step": 52906 }, { "epoch": 1.76, "grad_norm": 0.5604426860809326, "learning_rate": 0.00021925680785745262, "loss": 1.8069, "step": 52907 }, { "epoch": 1.76, "grad_norm": 0.5634908676147461, "learning_rate": 0.0002192467414735486, "loss": 1.7411, "step": 52908 }, { "epoch": 1.76, "grad_norm": 0.5600835084915161, "learning_rate": 0.0002192366751876672, "loss": 1.7612, "step": 52909 }, { "epoch": 1.76, "grad_norm": 0.5631020069122314, "learning_rate": 0.00021922660899982056, "loss": 1.7704, "step": 52910 }, { "epoch": 1.76, "grad_norm": 0.5355615615844727, "learning_rate": 0.000219216542910021, "loss": 1.6888, "step": 52911 }, { "epoch": 1.76, "grad_norm": 0.5695662498474121, "learning_rate": 0.00021920647691828059, "loss": 1.8012, "step": 52912 }, { "epoch": 1.76, "grad_norm": 0.5538513660430908, "learning_rate": 0.00021919641102461154, "loss": 1.739, "step": 52913 }, { "epoch": 1.76, "grad_norm": 0.5507523417472839, "learning_rate": 0.00021918634522902634, "loss": 1.7041, "step": 52914 }, { "epoch": 1.76, "grad_norm": 0.5545210838317871, "learning_rate": 0.00021917627953153677, "loss": 1.7569, "step": 52915 }, { "epoch": 1.76, "grad_norm": 0.5585024356842041, "learning_rate": 0.00021916621393215543, "loss": 1.7271, "step": 52916 }, { "epoch": 1.76, "grad_norm": 0.5785971879959106, "learning_rate": 0.00021915614843089434, "loss": 1.7714, "step": 52917 }, { "epoch": 1.76, "grad_norm": 0.557537317276001, "learning_rate": 0.00021914608302776587, "loss": 1.7297, "step": 52918 }, { "epoch": 1.76, "grad_norm": 0.5622476935386658, "learning_rate": 0.00021913601772278205, "loss": 1.7158, "step": 52919 }, { "epoch": 1.76, "grad_norm": 0.5678800940513611, "learning_rate": 0.00021912595251595516, "loss": 1.7802, "step": 52920 }, { "epoch": 1.76, "grad_norm": 0.5935571789741516, "learning_rate": 0.0002191158874072976, "loss": 1.7797, "step": 52921 }, { "epoch": 1.76, "grad_norm": 0.5631179809570312, "learning_rate": 0.00021910582239682128, "loss": 1.7999, "step": 52922 }, { "epoch": 1.76, "grad_norm": 0.5624579787254333, "learning_rate": 0.00021909575748453862, "loss": 1.8238, "step": 52923 }, { "epoch": 1.76, "grad_norm": 0.5671659111976624, "learning_rate": 0.00021908569267046186, "loss": 1.7228, "step": 52924 }, { "epoch": 1.76, "grad_norm": 0.5642794966697693, "learning_rate": 0.00021907562795460306, "loss": 1.7231, "step": 52925 }, { "epoch": 1.76, "grad_norm": 0.5732996463775635, "learning_rate": 0.00021906556333697454, "loss": 1.7767, "step": 52926 }, { "epoch": 1.76, "grad_norm": 0.5855042934417725, "learning_rate": 0.00021905549881758843, "loss": 1.7799, "step": 52927 }, { "epoch": 1.76, "grad_norm": 0.5475044846534729, "learning_rate": 0.00021904543439645721, "loss": 1.7593, "step": 52928 }, { "epoch": 1.76, "grad_norm": 0.545876145362854, "learning_rate": 0.00021903537007359272, "loss": 1.7999, "step": 52929 }, { "epoch": 1.76, "grad_norm": 0.5680086016654968, "learning_rate": 0.00021902530584900741, "loss": 1.7941, "step": 52930 }, { "epoch": 1.76, "grad_norm": 0.5511757731437683, "learning_rate": 0.00021901524172271354, "loss": 1.75, "step": 52931 }, { "epoch": 1.76, "grad_norm": 0.5808243751525879, "learning_rate": 0.00021900517769472315, "loss": 1.7789, "step": 52932 }, { "epoch": 1.76, "grad_norm": 0.5553603172302246, "learning_rate": 0.00021899511376504856, "loss": 1.6227, "step": 52933 }, { "epoch": 1.76, "grad_norm": 0.5767735242843628, "learning_rate": 0.00021898504993370202, "loss": 1.7866, "step": 52934 }, { "epoch": 1.76, "grad_norm": 0.5883725881576538, "learning_rate": 0.00021897498620069563, "loss": 1.7296, "step": 52935 }, { "epoch": 1.76, "grad_norm": 0.5536008477210999, "learning_rate": 0.00021896492256604156, "loss": 1.6633, "step": 52936 }, { "epoch": 1.76, "grad_norm": 0.5669912099838257, "learning_rate": 0.00021895485902975227, "loss": 1.8015, "step": 52937 }, { "epoch": 1.76, "grad_norm": 0.5859702229499817, "learning_rate": 0.00021894479559183988, "loss": 1.7625, "step": 52938 }, { "epoch": 1.76, "grad_norm": 0.5881221294403076, "learning_rate": 0.00021893473225231646, "loss": 1.72, "step": 52939 }, { "epoch": 1.76, "grad_norm": 0.555897057056427, "learning_rate": 0.00021892466901119438, "loss": 1.7601, "step": 52940 }, { "epoch": 1.76, "grad_norm": 0.5589478611946106, "learning_rate": 0.00021891460586848587, "loss": 1.7136, "step": 52941 }, { "epoch": 1.76, "grad_norm": 0.5644801259040833, "learning_rate": 0.000218904542824203, "loss": 1.7834, "step": 52942 }, { "epoch": 1.76, "grad_norm": 0.5648894906044006, "learning_rate": 0.000218894479878358, "loss": 1.754, "step": 52943 }, { "epoch": 1.76, "grad_norm": 0.5662654638290405, "learning_rate": 0.00021888441703096328, "loss": 1.7164, "step": 52944 }, { "epoch": 1.76, "grad_norm": 0.564105749130249, "learning_rate": 0.00021887435428203086, "loss": 1.7588, "step": 52945 }, { "epoch": 1.76, "grad_norm": 0.5520594716072083, "learning_rate": 0.00021886429163157301, "loss": 1.7872, "step": 52946 }, { "epoch": 1.76, "grad_norm": 0.5383750796318054, "learning_rate": 0.00021885422907960198, "loss": 1.7793, "step": 52947 }, { "epoch": 1.76, "grad_norm": 0.5667356252670288, "learning_rate": 0.00021884416662613002, "loss": 1.6991, "step": 52948 }, { "epoch": 1.76, "grad_norm": 0.5690362453460693, "learning_rate": 0.0002188341042711692, "loss": 1.747, "step": 52949 }, { "epoch": 1.76, "grad_norm": 0.571151614189148, "learning_rate": 0.00021882404201473179, "loss": 1.8047, "step": 52950 }, { "epoch": 1.76, "grad_norm": 0.5587547421455383, "learning_rate": 0.00021881397985683008, "loss": 1.6618, "step": 52951 }, { "epoch": 1.76, "grad_norm": 0.5472184419631958, "learning_rate": 0.00021880391779747628, "loss": 1.7478, "step": 52952 }, { "epoch": 1.76, "grad_norm": 0.5547398924827576, "learning_rate": 0.00021879385583668248, "loss": 1.7539, "step": 52953 }, { "epoch": 1.76, "grad_norm": 0.5582316517829895, "learning_rate": 0.00021878379397446106, "loss": 1.7642, "step": 52954 }, { "epoch": 1.76, "grad_norm": 0.5762203931808472, "learning_rate": 0.00021877373221082413, "loss": 1.7936, "step": 52955 }, { "epoch": 1.76, "grad_norm": 0.5742154717445374, "learning_rate": 0.00021876367054578385, "loss": 1.7587, "step": 52956 }, { "epoch": 1.76, "grad_norm": 0.5500802993774414, "learning_rate": 0.0002187536089793525, "loss": 1.7396, "step": 52957 }, { "epoch": 1.76, "grad_norm": 0.566620409488678, "learning_rate": 0.0002187435475115424, "loss": 1.7314, "step": 52958 }, { "epoch": 1.76, "grad_norm": 0.5521875619888306, "learning_rate": 0.00021873348614236557, "loss": 1.7012, "step": 52959 }, { "epoch": 1.76, "grad_norm": 0.5540353655815125, "learning_rate": 0.00021872342487183434, "loss": 1.7455, "step": 52960 }, { "epoch": 1.76, "grad_norm": 0.5697758793830872, "learning_rate": 0.00021871336369996098, "loss": 1.7232, "step": 52961 }, { "epoch": 1.76, "grad_norm": 0.5687609910964966, "learning_rate": 0.00021870330262675753, "loss": 1.6747, "step": 52962 }, { "epoch": 1.76, "grad_norm": 0.5712807178497314, "learning_rate": 0.00021869324165223628, "loss": 1.7324, "step": 52963 }, { "epoch": 1.76, "grad_norm": 0.5653950572013855, "learning_rate": 0.00021868318077640953, "loss": 1.7331, "step": 52964 }, { "epoch": 1.76, "grad_norm": 0.5611174702644348, "learning_rate": 0.0002186731199992893, "loss": 1.7254, "step": 52965 }, { "epoch": 1.76, "grad_norm": 0.5600690245628357, "learning_rate": 0.00021866305932088798, "loss": 1.7393, "step": 52966 }, { "epoch": 1.76, "grad_norm": 0.5647915601730347, "learning_rate": 0.0002186529987412177, "loss": 1.7724, "step": 52967 }, { "epoch": 1.76, "grad_norm": 0.5585087537765503, "learning_rate": 0.0002186429382602908, "loss": 1.7519, "step": 52968 }, { "epoch": 1.76, "grad_norm": 0.5766898989677429, "learning_rate": 0.00021863287787811928, "loss": 1.7054, "step": 52969 }, { "epoch": 1.76, "grad_norm": 0.5669436454772949, "learning_rate": 0.00021862281759471542, "loss": 1.7065, "step": 52970 }, { "epoch": 1.76, "grad_norm": 0.5761048793792725, "learning_rate": 0.00021861275741009165, "loss": 1.7497, "step": 52971 }, { "epoch": 1.76, "grad_norm": 0.5391499996185303, "learning_rate": 0.00021860269732425984, "loss": 1.7216, "step": 52972 }, { "epoch": 1.76, "grad_norm": 0.5720894932746887, "learning_rate": 0.00021859263733723234, "loss": 1.7642, "step": 52973 }, { "epoch": 1.76, "grad_norm": 0.5719680190086365, "learning_rate": 0.00021858257744902153, "loss": 1.7905, "step": 52974 }, { "epoch": 1.76, "grad_norm": 0.5624791383743286, "learning_rate": 0.00021857251765963938, "loss": 1.7486, "step": 52975 }, { "epoch": 1.76, "grad_norm": 0.5638854503631592, "learning_rate": 0.00021856245796909822, "loss": 1.7021, "step": 52976 }, { "epoch": 1.76, "grad_norm": 0.5677418112754822, "learning_rate": 0.00021855239837741017, "loss": 1.7562, "step": 52977 }, { "epoch": 1.76, "grad_norm": 0.5724778175354004, "learning_rate": 0.00021854233888458766, "loss": 1.7947, "step": 52978 }, { "epoch": 1.76, "grad_norm": 0.5552202463150024, "learning_rate": 0.00021853227949064256, "loss": 1.7799, "step": 52979 }, { "epoch": 1.76, "grad_norm": 0.5942884087562561, "learning_rate": 0.00021852222019558737, "loss": 1.7835, "step": 52980 }, { "epoch": 1.76, "grad_norm": 0.562583327293396, "learning_rate": 0.00021851216099943421, "loss": 1.7244, "step": 52981 }, { "epoch": 1.76, "grad_norm": 0.5605253577232361, "learning_rate": 0.0002185021019021953, "loss": 1.7826, "step": 52982 }, { "epoch": 1.76, "grad_norm": 0.5651002526283264, "learning_rate": 0.00021849204290388277, "loss": 1.7445, "step": 52983 }, { "epoch": 1.76, "grad_norm": 0.5472341775894165, "learning_rate": 0.00021848198400450898, "loss": 1.7457, "step": 52984 }, { "epoch": 1.76, "grad_norm": 0.5768225193023682, "learning_rate": 0.00021847192520408595, "loss": 1.7841, "step": 52985 }, { "epoch": 1.76, "grad_norm": 0.5524844527244568, "learning_rate": 0.00021846186650262595, "loss": 1.7436, "step": 52986 }, { "epoch": 1.76, "grad_norm": 0.5785751938819885, "learning_rate": 0.0002184518079001413, "loss": 1.7747, "step": 52987 }, { "epoch": 1.76, "grad_norm": 0.5409256815910339, "learning_rate": 0.00021844174939664418, "loss": 1.6315, "step": 52988 }, { "epoch": 1.76, "grad_norm": 0.5859010219573975, "learning_rate": 0.00021843169099214674, "loss": 1.7448, "step": 52989 }, { "epoch": 1.76, "grad_norm": 0.5473955869674683, "learning_rate": 0.00021842163268666117, "loss": 1.7891, "step": 52990 }, { "epoch": 1.76, "grad_norm": 0.5554314255714417, "learning_rate": 0.0002184115744801998, "loss": 1.7593, "step": 52991 }, { "epoch": 1.76, "grad_norm": 0.5708075761795044, "learning_rate": 0.00021840151637277466, "loss": 1.7168, "step": 52992 }, { "epoch": 1.76, "grad_norm": 0.5547173619270325, "learning_rate": 0.00021839145836439802, "loss": 1.7945, "step": 52993 }, { "epoch": 1.76, "grad_norm": 0.552009642124176, "learning_rate": 0.00021838140045508234, "loss": 1.7957, "step": 52994 }, { "epoch": 1.76, "grad_norm": 0.5636935830116272, "learning_rate": 0.0002183713426448394, "loss": 1.691, "step": 52995 }, { "epoch": 1.76, "grad_norm": 0.5712794065475464, "learning_rate": 0.00021836128493368166, "loss": 1.7422, "step": 52996 }, { "epoch": 1.76, "grad_norm": 0.5631189346313477, "learning_rate": 0.0002183512273216213, "loss": 1.6529, "step": 52997 }, { "epoch": 1.76, "grad_norm": 0.5593019127845764, "learning_rate": 0.00021834116980867064, "loss": 1.7924, "step": 52998 }, { "epoch": 1.76, "grad_norm": 0.5749477744102478, "learning_rate": 0.0002183311123948416, "loss": 1.7229, "step": 52999 }, { "epoch": 1.76, "grad_norm": 0.5583219528198242, "learning_rate": 0.0002183210550801466, "loss": 1.7247, "step": 53000 }, { "epoch": 1.76, "grad_norm": 0.5867478847503662, "learning_rate": 0.00021831099786459793, "loss": 1.7135, "step": 53001 }, { "epoch": 1.76, "grad_norm": 0.5642802119255066, "learning_rate": 0.00021830094074820752, "loss": 1.7893, "step": 53002 }, { "epoch": 1.76, "grad_norm": 0.59474116563797, "learning_rate": 0.00021829088373098777, "loss": 1.7747, "step": 53003 }, { "epoch": 1.76, "grad_norm": 0.6239389181137085, "learning_rate": 0.00021828082681295093, "loss": 1.7557, "step": 53004 }, { "epoch": 1.76, "grad_norm": 0.5554521083831787, "learning_rate": 0.00021827076999410906, "loss": 1.779, "step": 53005 }, { "epoch": 1.76, "grad_norm": 0.5533464550971985, "learning_rate": 0.0002182607132744744, "loss": 1.7557, "step": 53006 }, { "epoch": 1.76, "grad_norm": 0.5626137256622314, "learning_rate": 0.00021825065665405916, "loss": 1.6833, "step": 53007 }, { "epoch": 1.76, "grad_norm": 0.5607916116714478, "learning_rate": 0.00021824060013287576, "loss": 1.6579, "step": 53008 }, { "epoch": 1.76, "grad_norm": 0.5518122315406799, "learning_rate": 0.000218230543710936, "loss": 1.6766, "step": 53009 }, { "epoch": 1.76, "grad_norm": 0.5605218410491943, "learning_rate": 0.00021822048738825243, "loss": 1.7687, "step": 53010 }, { "epoch": 1.76, "grad_norm": 0.5693050026893616, "learning_rate": 0.00021821043116483716, "loss": 1.7236, "step": 53011 }, { "epoch": 1.76, "grad_norm": 0.5604596734046936, "learning_rate": 0.00021820037504070237, "loss": 1.771, "step": 53012 }, { "epoch": 1.76, "grad_norm": 0.5918126106262207, "learning_rate": 0.0002181903190158602, "loss": 1.7844, "step": 53013 }, { "epoch": 1.76, "grad_norm": 0.5507820844650269, "learning_rate": 0.00021818026309032302, "loss": 1.6509, "step": 53014 }, { "epoch": 1.76, "grad_norm": 1.8411033153533936, "learning_rate": 0.0002181702072641029, "loss": 1.7126, "step": 53015 }, { "epoch": 1.76, "grad_norm": 0.5639929175376892, "learning_rate": 0.00021816015153721202, "loss": 1.7407, "step": 53016 }, { "epoch": 1.76, "grad_norm": 0.5413669943809509, "learning_rate": 0.0002181500959096627, "loss": 1.6641, "step": 53017 }, { "epoch": 1.76, "grad_norm": 0.5511695146560669, "learning_rate": 0.00021814004038146718, "loss": 1.6881, "step": 53018 }, { "epoch": 1.76, "grad_norm": 0.5783937573432922, "learning_rate": 0.00021812998495263753, "loss": 1.7879, "step": 53019 }, { "epoch": 1.76, "grad_norm": 0.5759350061416626, "learning_rate": 0.00021811992962318604, "loss": 1.7096, "step": 53020 }, { "epoch": 1.76, "grad_norm": 0.5807304978370667, "learning_rate": 0.0002181098743931249, "loss": 1.6758, "step": 53021 }, { "epoch": 1.76, "grad_norm": 0.566608190536499, "learning_rate": 0.00021809981926246627, "loss": 1.6727, "step": 53022 }, { "epoch": 1.76, "grad_norm": 0.5835307240486145, "learning_rate": 0.0002180897642312223, "loss": 1.6619, "step": 53023 }, { "epoch": 1.76, "grad_norm": 0.5519641041755676, "learning_rate": 0.00021807970929940554, "loss": 1.6491, "step": 53024 }, { "epoch": 1.76, "grad_norm": 0.5418249368667603, "learning_rate": 0.0002180696544670277, "loss": 1.69, "step": 53025 }, { "epoch": 1.76, "grad_norm": 0.5774908065795898, "learning_rate": 0.00021805959973410132, "loss": 1.7135, "step": 53026 }, { "epoch": 1.76, "grad_norm": 0.5586768388748169, "learning_rate": 0.0002180495451006385, "loss": 1.6979, "step": 53027 }, { "epoch": 1.76, "grad_norm": 0.5575814247131348, "learning_rate": 0.00021803949056665151, "loss": 1.6863, "step": 53028 }, { "epoch": 1.76, "grad_norm": 0.5530298948287964, "learning_rate": 0.00021802943613215246, "loss": 1.7317, "step": 53029 }, { "epoch": 1.76, "grad_norm": 0.5514469742774963, "learning_rate": 0.00021801938179715353, "loss": 1.7415, "step": 53030 }, { "epoch": 1.76, "grad_norm": 0.5423330068588257, "learning_rate": 0.0002180093275616672, "loss": 1.8097, "step": 53031 }, { "epoch": 1.76, "grad_norm": 0.5618033409118652, "learning_rate": 0.00021799927342570522, "loss": 1.8099, "step": 53032 }, { "epoch": 1.76, "grad_norm": 0.564687192440033, "learning_rate": 0.00021798921938928016, "loss": 1.7195, "step": 53033 }, { "epoch": 1.76, "grad_norm": 0.5477091670036316, "learning_rate": 0.00021797916545240412, "loss": 1.7185, "step": 53034 }, { "epoch": 1.76, "grad_norm": 0.5639908909797668, "learning_rate": 0.00021796911161508925, "loss": 1.7213, "step": 53035 }, { "epoch": 1.76, "grad_norm": 0.5576368570327759, "learning_rate": 0.0002179590578773478, "loss": 1.6967, "step": 53036 }, { "epoch": 1.76, "grad_norm": 0.5367525219917297, "learning_rate": 0.0002179490042391919, "loss": 1.6343, "step": 53037 }, { "epoch": 1.76, "grad_norm": 0.5806555151939392, "learning_rate": 0.000217938950700634, "loss": 1.6592, "step": 53038 }, { "epoch": 1.76, "grad_norm": 0.5599269270896912, "learning_rate": 0.00021792889726168592, "loss": 1.6841, "step": 53039 }, { "epoch": 1.76, "grad_norm": 0.5714057087898254, "learning_rate": 0.00021791884392236013, "loss": 1.7777, "step": 53040 }, { "epoch": 1.76, "grad_norm": 0.5623928308486938, "learning_rate": 0.0002179087906826689, "loss": 1.6654, "step": 53041 }, { "epoch": 1.76, "grad_norm": 0.5601971745491028, "learning_rate": 0.00021789873754262415, "loss": 1.6722, "step": 53042 }, { "epoch": 1.76, "grad_norm": 0.5748767852783203, "learning_rate": 0.00021788868450223827, "loss": 1.7942, "step": 53043 }, { "epoch": 1.76, "grad_norm": 0.5589656233787537, "learning_rate": 0.00021787863156152336, "loss": 1.7711, "step": 53044 }, { "epoch": 1.76, "grad_norm": 0.5502819418907166, "learning_rate": 0.00021786857872049188, "loss": 1.748, "step": 53045 }, { "epoch": 1.76, "grad_norm": 0.5609118342399597, "learning_rate": 0.00021785852597915566, "loss": 1.742, "step": 53046 }, { "epoch": 1.76, "grad_norm": 0.5686896443367004, "learning_rate": 0.00021784847333752717, "loss": 1.7849, "step": 53047 }, { "epoch": 1.76, "grad_norm": 0.5659649968147278, "learning_rate": 0.00021783842079561858, "loss": 1.7487, "step": 53048 }, { "epoch": 1.76, "grad_norm": 0.5562857985496521, "learning_rate": 0.00021782836835344198, "loss": 1.7345, "step": 53049 }, { "epoch": 1.76, "grad_norm": 0.5452151894569397, "learning_rate": 0.0002178183160110096, "loss": 1.6432, "step": 53050 }, { "epoch": 1.77, "grad_norm": 0.5666295886039734, "learning_rate": 0.00021780826376833378, "loss": 1.7516, "step": 53051 }, { "epoch": 1.77, "grad_norm": 0.5617403984069824, "learning_rate": 0.00021779821162542655, "loss": 1.7725, "step": 53052 }, { "epoch": 1.77, "grad_norm": 0.5402332544326782, "learning_rate": 0.00021778815958230007, "loss": 1.7166, "step": 53053 }, { "epoch": 1.77, "grad_norm": 0.5499062538146973, "learning_rate": 0.00021777810763896676, "loss": 1.7205, "step": 53054 }, { "epoch": 1.77, "grad_norm": 0.5586181879043579, "learning_rate": 0.00021776805579543876, "loss": 1.7055, "step": 53055 }, { "epoch": 1.77, "grad_norm": 0.5482211112976074, "learning_rate": 0.00021775800405172816, "loss": 1.762, "step": 53056 }, { "epoch": 1.77, "grad_norm": 0.5652394890785217, "learning_rate": 0.0002177479524078472, "loss": 1.739, "step": 53057 }, { "epoch": 1.77, "grad_norm": 0.5583229064941406, "learning_rate": 0.00021773790086380824, "loss": 1.6774, "step": 53058 }, { "epoch": 1.77, "grad_norm": 0.58056640625, "learning_rate": 0.00021772784941962323, "loss": 1.7473, "step": 53059 }, { "epoch": 1.77, "grad_norm": 0.5573889017105103, "learning_rate": 0.00021771779807530444, "loss": 1.7677, "step": 53060 }, { "epoch": 1.77, "grad_norm": 0.5516056418418884, "learning_rate": 0.00021770774683086425, "loss": 1.7896, "step": 53061 }, { "epoch": 1.77, "grad_norm": 0.575748085975647, "learning_rate": 0.00021769769568631467, "loss": 1.7634, "step": 53062 }, { "epoch": 1.77, "grad_norm": 0.5732749104499817, "learning_rate": 0.00021768764464166795, "loss": 1.7112, "step": 53063 }, { "epoch": 1.77, "grad_norm": 0.5687710046768188, "learning_rate": 0.00021767759369693628, "loss": 1.7333, "step": 53064 }, { "epoch": 1.77, "grad_norm": 0.5695346593856812, "learning_rate": 0.00021766754285213198, "loss": 1.8093, "step": 53065 }, { "epoch": 1.77, "grad_norm": 0.5805347561836243, "learning_rate": 0.00021765749210726708, "loss": 1.775, "step": 53066 }, { "epoch": 1.77, "grad_norm": 0.5663564801216125, "learning_rate": 0.00021764744146235377, "loss": 1.6976, "step": 53067 }, { "epoch": 1.77, "grad_norm": 0.5615698099136353, "learning_rate": 0.00021763739091740448, "loss": 1.8064, "step": 53068 }, { "epoch": 1.77, "grad_norm": 0.5557622313499451, "learning_rate": 0.0002176273404724312, "loss": 1.7472, "step": 53069 }, { "epoch": 1.77, "grad_norm": 0.5518411993980408, "learning_rate": 0.00021761729012744618, "loss": 1.7147, "step": 53070 }, { "epoch": 1.77, "grad_norm": 0.5490729212760925, "learning_rate": 0.00021760723988246166, "loss": 1.756, "step": 53071 }, { "epoch": 1.77, "grad_norm": 0.5910466313362122, "learning_rate": 0.00021759718973748984, "loss": 1.7749, "step": 53072 }, { "epoch": 1.77, "grad_norm": 0.559075117111206, "learning_rate": 0.00021758713969254281, "loss": 1.6921, "step": 53073 }, { "epoch": 1.77, "grad_norm": 0.5670349597930908, "learning_rate": 0.00021757708974763285, "loss": 1.744, "step": 53074 }, { "epoch": 1.77, "grad_norm": 0.5622766017913818, "learning_rate": 0.0002175670399027722, "loss": 1.7466, "step": 53075 }, { "epoch": 1.77, "grad_norm": 0.5538992881774902, "learning_rate": 0.00021755699015797302, "loss": 1.8116, "step": 53076 }, { "epoch": 1.77, "grad_norm": 0.5676336884498596, "learning_rate": 0.0002175469405132475, "loss": 1.7288, "step": 53077 }, { "epoch": 1.77, "grad_norm": 0.5684890747070312, "learning_rate": 0.00021753689096860786, "loss": 1.7582, "step": 53078 }, { "epoch": 1.77, "grad_norm": 0.5610195398330688, "learning_rate": 0.00021752684152406628, "loss": 1.712, "step": 53079 }, { "epoch": 1.77, "grad_norm": 0.5847916007041931, "learning_rate": 0.00021751679217963494, "loss": 1.7135, "step": 53080 }, { "epoch": 1.77, "grad_norm": 0.5630136728286743, "learning_rate": 0.0002175067429353261, "loss": 1.7743, "step": 53081 }, { "epoch": 1.77, "grad_norm": 0.5567457675933838, "learning_rate": 0.00021749669379115185, "loss": 1.7282, "step": 53082 }, { "epoch": 1.77, "grad_norm": 0.5638718605041504, "learning_rate": 0.00021748664474712448, "loss": 1.818, "step": 53083 }, { "epoch": 1.77, "grad_norm": 0.5816875100135803, "learning_rate": 0.00021747659580325618, "loss": 1.7204, "step": 53084 }, { "epoch": 1.77, "grad_norm": 0.595669150352478, "learning_rate": 0.00021746654695955917, "loss": 1.6885, "step": 53085 }, { "epoch": 1.77, "grad_norm": 0.5548216104507446, "learning_rate": 0.0002174564982160456, "loss": 1.7455, "step": 53086 }, { "epoch": 1.77, "grad_norm": 0.5423032641410828, "learning_rate": 0.00021744644957272756, "loss": 1.7583, "step": 53087 }, { "epoch": 1.77, "grad_norm": 0.5705356001853943, "learning_rate": 0.00021743640102961758, "loss": 1.71, "step": 53088 }, { "epoch": 1.77, "grad_norm": 0.5805356502532959, "learning_rate": 0.00021742635258672748, "loss": 1.7882, "step": 53089 }, { "epoch": 1.77, "grad_norm": 0.5421903729438782, "learning_rate": 0.00021741630424406967, "loss": 1.7455, "step": 53090 }, { "epoch": 1.77, "grad_norm": 0.5694786310195923, "learning_rate": 0.00021740625600165633, "loss": 1.702, "step": 53091 }, { "epoch": 1.77, "grad_norm": 0.556644856929779, "learning_rate": 0.00021739620785949957, "loss": 1.7188, "step": 53092 }, { "epoch": 1.77, "grad_norm": 0.5476557016372681, "learning_rate": 0.00021738615981761167, "loss": 1.6706, "step": 53093 }, { "epoch": 1.77, "grad_norm": 0.5522710680961609, "learning_rate": 0.00021737611187600472, "loss": 1.7878, "step": 53094 }, { "epoch": 1.77, "grad_norm": 0.5763715505599976, "learning_rate": 0.00021736606403469118, "loss": 1.7436, "step": 53095 }, { "epoch": 1.77, "grad_norm": 0.5713068842887878, "learning_rate": 0.00021735601629368286, "loss": 1.6748, "step": 53096 }, { "epoch": 1.77, "grad_norm": 0.5834996104240417, "learning_rate": 0.00021734596865299226, "loss": 1.796, "step": 53097 }, { "epoch": 1.77, "grad_norm": 0.5718186497688293, "learning_rate": 0.00021733592111263154, "loss": 1.7433, "step": 53098 }, { "epoch": 1.77, "grad_norm": 0.5917965769767761, "learning_rate": 0.00021732587367261273, "loss": 1.7661, "step": 53099 }, { "epoch": 1.77, "grad_norm": 0.5891146659851074, "learning_rate": 0.00021731582633294813, "loss": 1.8226, "step": 53100 }, { "epoch": 1.77, "grad_norm": 0.5829676389694214, "learning_rate": 0.00021730577909365005, "loss": 1.8087, "step": 53101 }, { "epoch": 1.77, "grad_norm": 0.5687805414199829, "learning_rate": 0.00021729573195473047, "loss": 1.8215, "step": 53102 }, { "epoch": 1.77, "grad_norm": 0.5604267120361328, "learning_rate": 0.0002172856849162016, "loss": 1.7277, "step": 53103 }, { "epoch": 1.77, "grad_norm": 0.550403356552124, "learning_rate": 0.00021727563797807584, "loss": 1.6695, "step": 53104 }, { "epoch": 1.77, "grad_norm": 0.5755769610404968, "learning_rate": 0.00021726559114036527, "loss": 1.786, "step": 53105 }, { "epoch": 1.77, "grad_norm": 0.5566347241401672, "learning_rate": 0.00021725554440308204, "loss": 1.7506, "step": 53106 }, { "epoch": 1.77, "grad_norm": 0.5339310169219971, "learning_rate": 0.00021724549776623844, "loss": 1.7355, "step": 53107 }, { "epoch": 1.77, "grad_norm": 0.550718367099762, "learning_rate": 0.00021723545122984662, "loss": 1.6819, "step": 53108 }, { "epoch": 1.77, "grad_norm": 0.557246208190918, "learning_rate": 0.0002172254047939187, "loss": 1.6835, "step": 53109 }, { "epoch": 1.77, "grad_norm": 0.5997654795646667, "learning_rate": 0.0002172153584584669, "loss": 1.7924, "step": 53110 }, { "epoch": 1.77, "grad_norm": 0.5695965886116028, "learning_rate": 0.00021720531222350363, "loss": 1.7478, "step": 53111 }, { "epoch": 1.77, "grad_norm": 0.5589898228645325, "learning_rate": 0.00021719526608904074, "loss": 1.7191, "step": 53112 }, { "epoch": 1.77, "grad_norm": 0.5579617619514465, "learning_rate": 0.00021718522005509063, "loss": 1.6289, "step": 53113 }, { "epoch": 1.77, "grad_norm": 0.556206464767456, "learning_rate": 0.0002171751741216655, "loss": 1.7547, "step": 53114 }, { "epoch": 1.77, "grad_norm": 0.5586268901824951, "learning_rate": 0.00021716512828877754, "loss": 1.6901, "step": 53115 }, { "epoch": 1.77, "grad_norm": 0.5863971710205078, "learning_rate": 0.00021715508255643886, "loss": 1.7917, "step": 53116 }, { "epoch": 1.77, "grad_norm": 0.5627550482749939, "learning_rate": 0.00021714503692466165, "loss": 1.6571, "step": 53117 }, { "epoch": 1.77, "grad_norm": 0.5717164874076843, "learning_rate": 0.00021713499139345833, "loss": 1.7548, "step": 53118 }, { "epoch": 1.77, "grad_norm": 0.5701569318771362, "learning_rate": 0.00021712494596284074, "loss": 1.7787, "step": 53119 }, { "epoch": 1.77, "grad_norm": 0.5779879689216614, "learning_rate": 0.0002171149006328213, "loss": 1.7475, "step": 53120 }, { "epoch": 1.77, "grad_norm": 0.5534801483154297, "learning_rate": 0.00021710485540341225, "loss": 1.7694, "step": 53121 }, { "epoch": 1.77, "grad_norm": 0.5473880171775818, "learning_rate": 0.0002170948102746256, "loss": 1.7296, "step": 53122 }, { "epoch": 1.77, "grad_norm": 0.5635455250740051, "learning_rate": 0.00021708476524647365, "loss": 1.7352, "step": 53123 }, { "epoch": 1.77, "grad_norm": 0.5613542199134827, "learning_rate": 0.0002170747203189685, "loss": 1.6897, "step": 53124 }, { "epoch": 1.77, "grad_norm": 0.5871545076370239, "learning_rate": 0.00021706467549212263, "loss": 1.7507, "step": 53125 }, { "epoch": 1.77, "grad_norm": 0.5827954411506653, "learning_rate": 0.00021705463076594783, "loss": 1.7372, "step": 53126 }, { "epoch": 1.77, "grad_norm": 0.5481988191604614, "learning_rate": 0.00021704458614045651, "loss": 1.7381, "step": 53127 }, { "epoch": 1.77, "grad_norm": 0.5584032535552979, "learning_rate": 0.00021703454161566094, "loss": 1.7153, "step": 53128 }, { "epoch": 1.77, "grad_norm": 0.561908483505249, "learning_rate": 0.00021702449719157316, "loss": 1.7387, "step": 53129 }, { "epoch": 1.77, "grad_norm": 0.5717806220054626, "learning_rate": 0.0002170144528682054, "loss": 1.7459, "step": 53130 }, { "epoch": 1.77, "grad_norm": 0.5759108066558838, "learning_rate": 0.00021700440864556994, "loss": 1.7789, "step": 53131 }, { "epoch": 1.77, "grad_norm": 0.5785717964172363, "learning_rate": 0.00021699436452367882, "loss": 1.7821, "step": 53132 }, { "epoch": 1.77, "grad_norm": 0.5603093504905701, "learning_rate": 0.00021698432050254427, "loss": 1.7198, "step": 53133 }, { "epoch": 1.77, "grad_norm": 0.5823081135749817, "learning_rate": 0.0002169742765821786, "loss": 1.7654, "step": 53134 }, { "epoch": 1.77, "grad_norm": 0.5976523160934448, "learning_rate": 0.00021696423276259397, "loss": 1.6615, "step": 53135 }, { "epoch": 1.77, "grad_norm": 0.5480755567550659, "learning_rate": 0.00021695418904380245, "loss": 1.7502, "step": 53136 }, { "epoch": 1.77, "grad_norm": 0.5865883827209473, "learning_rate": 0.00021694414542581627, "loss": 1.6996, "step": 53137 }, { "epoch": 1.77, "grad_norm": 0.5735799670219421, "learning_rate": 0.0002169341019086478, "loss": 1.794, "step": 53138 }, { "epoch": 1.77, "grad_norm": 0.5761355757713318, "learning_rate": 0.00021692405849230896, "loss": 1.6992, "step": 53139 }, { "epoch": 1.77, "grad_norm": 0.5584927201271057, "learning_rate": 0.00021691401517681203, "loss": 1.7255, "step": 53140 }, { "epoch": 1.77, "grad_norm": 0.5613964200019836, "learning_rate": 0.00021690397196216947, "loss": 1.748, "step": 53141 }, { "epoch": 1.77, "grad_norm": 0.5550728440284729, "learning_rate": 0.00021689392884839301, "loss": 1.6876, "step": 53142 }, { "epoch": 1.77, "grad_norm": 0.5593236684799194, "learning_rate": 0.0002168838858354952, "loss": 1.8006, "step": 53143 }, { "epoch": 1.77, "grad_norm": 0.5560492873191833, "learning_rate": 0.00021687384292348803, "loss": 1.8116, "step": 53144 }, { "epoch": 1.77, "grad_norm": 0.5741224884986877, "learning_rate": 0.00021686380011238387, "loss": 1.7274, "step": 53145 }, { "epoch": 1.77, "grad_norm": 0.5777791142463684, "learning_rate": 0.00021685375740219473, "loss": 1.6876, "step": 53146 }, { "epoch": 1.77, "grad_norm": 0.5643320679664612, "learning_rate": 0.00021684371479293283, "loss": 1.7549, "step": 53147 }, { "epoch": 1.77, "grad_norm": 0.5568671226501465, "learning_rate": 0.00021683367228461062, "loss": 1.7441, "step": 53148 }, { "epoch": 1.77, "grad_norm": 0.5761916041374207, "learning_rate": 0.00021682362987723982, "loss": 1.7711, "step": 53149 }, { "epoch": 1.77, "grad_norm": 0.5511478781700134, "learning_rate": 0.000216813587570833, "loss": 1.7996, "step": 53150 }, { "epoch": 1.77, "grad_norm": 0.5519872307777405, "learning_rate": 0.00021680354536540228, "loss": 1.6674, "step": 53151 }, { "epoch": 1.77, "grad_norm": 0.5528324842453003, "learning_rate": 0.00021679350326095972, "loss": 1.7643, "step": 53152 }, { "epoch": 1.77, "grad_norm": 0.5529648661613464, "learning_rate": 0.00021678346125751757, "loss": 1.7086, "step": 53153 }, { "epoch": 1.77, "grad_norm": 0.5617604851722717, "learning_rate": 0.00021677341935508803, "loss": 1.7054, "step": 53154 }, { "epoch": 1.77, "grad_norm": 0.5744348764419556, "learning_rate": 0.00021676337755368344, "loss": 1.8218, "step": 53155 }, { "epoch": 1.77, "grad_norm": 0.5776305198669434, "learning_rate": 0.00021675333585331567, "loss": 1.7725, "step": 53156 }, { "epoch": 1.77, "grad_norm": 0.5572548508644104, "learning_rate": 0.00021674329425399715, "loss": 1.7253, "step": 53157 }, { "epoch": 1.77, "grad_norm": 0.555662989616394, "learning_rate": 0.00021673325275574004, "loss": 1.649, "step": 53158 }, { "epoch": 1.77, "grad_norm": 0.5899620652198792, "learning_rate": 0.00021672321135855643, "loss": 1.8062, "step": 53159 }, { "epoch": 1.77, "grad_norm": 0.5668824315071106, "learning_rate": 0.00021671317006245863, "loss": 1.7958, "step": 53160 }, { "epoch": 1.77, "grad_norm": 0.5691244006156921, "learning_rate": 0.00021670312886745864, "loss": 1.7525, "step": 53161 }, { "epoch": 1.77, "grad_norm": 0.5796030163764954, "learning_rate": 0.000216693087773569, "loss": 1.6932, "step": 53162 }, { "epoch": 1.77, "grad_norm": 0.5714533925056458, "learning_rate": 0.0002166830467808015, "loss": 1.6773, "step": 53163 }, { "epoch": 1.77, "grad_norm": 0.5635709762573242, "learning_rate": 0.00021667300588916854, "loss": 1.6931, "step": 53164 }, { "epoch": 1.77, "grad_norm": 0.5558324456214905, "learning_rate": 0.00021666296509868235, "loss": 1.7543, "step": 53165 }, { "epoch": 1.77, "grad_norm": 0.5624207854270935, "learning_rate": 0.00021665292440935498, "loss": 1.7587, "step": 53166 }, { "epoch": 1.77, "grad_norm": 0.5470188856124878, "learning_rate": 0.00021664288382119867, "loss": 1.6882, "step": 53167 }, { "epoch": 1.77, "grad_norm": 0.5548861026763916, "learning_rate": 0.0002166328433342257, "loss": 1.7381, "step": 53168 }, { "epoch": 1.77, "grad_norm": 0.5844188332557678, "learning_rate": 0.00021662280294844807, "loss": 1.824, "step": 53169 }, { "epoch": 1.77, "grad_norm": 0.5709677934646606, "learning_rate": 0.00021661276266387805, "loss": 1.6903, "step": 53170 }, { "epoch": 1.77, "grad_norm": 0.5484349131584167, "learning_rate": 0.00021660272248052792, "loss": 1.779, "step": 53171 }, { "epoch": 1.77, "grad_norm": 0.554996907711029, "learning_rate": 0.00021659268239840982, "loss": 1.7076, "step": 53172 }, { "epoch": 1.77, "grad_norm": 0.5522282123565674, "learning_rate": 0.00021658264241753588, "loss": 1.739, "step": 53173 }, { "epoch": 1.77, "grad_norm": 0.5671335458755493, "learning_rate": 0.0002165726025379183, "loss": 1.7783, "step": 53174 }, { "epoch": 1.77, "grad_norm": 0.5649815201759338, "learning_rate": 0.00021656256275956936, "loss": 1.7267, "step": 53175 }, { "epoch": 1.77, "grad_norm": 0.5647144317626953, "learning_rate": 0.0002165525230825011, "loss": 1.7789, "step": 53176 }, { "epoch": 1.77, "grad_norm": 0.5772868394851685, "learning_rate": 0.00021654248350672575, "loss": 1.7442, "step": 53177 }, { "epoch": 1.77, "grad_norm": 0.5764158368110657, "learning_rate": 0.00021653244403225562, "loss": 1.729, "step": 53178 }, { "epoch": 1.77, "grad_norm": 0.5779455900192261, "learning_rate": 0.00021652240465910275, "loss": 1.8114, "step": 53179 }, { "epoch": 1.77, "grad_norm": 0.5609429478645325, "learning_rate": 0.00021651236538727944, "loss": 1.6978, "step": 53180 }, { "epoch": 1.77, "grad_norm": 0.5816404819488525, "learning_rate": 0.00021650232621679775, "loss": 1.8132, "step": 53181 }, { "epoch": 1.77, "grad_norm": 0.5687428712844849, "learning_rate": 0.00021649228714766997, "loss": 1.799, "step": 53182 }, { "epoch": 1.77, "grad_norm": 0.5683069825172424, "learning_rate": 0.00021648224817990826, "loss": 1.7478, "step": 53183 }, { "epoch": 1.77, "grad_norm": 0.5921664237976074, "learning_rate": 0.0002164722093135247, "loss": 1.7698, "step": 53184 }, { "epoch": 1.77, "grad_norm": 0.5767970681190491, "learning_rate": 0.0002164621705485317, "loss": 1.7583, "step": 53185 }, { "epoch": 1.77, "grad_norm": 0.5845409035682678, "learning_rate": 0.00021645213188494123, "loss": 1.7632, "step": 53186 }, { "epoch": 1.77, "grad_norm": 0.5585552453994751, "learning_rate": 0.0002164420933227656, "loss": 1.7724, "step": 53187 }, { "epoch": 1.77, "grad_norm": 0.5681668519973755, "learning_rate": 0.000216432054862017, "loss": 1.7451, "step": 53188 }, { "epoch": 1.77, "grad_norm": 0.5677955150604248, "learning_rate": 0.0002164220165027075, "loss": 1.7455, "step": 53189 }, { "epoch": 1.77, "grad_norm": 0.5771797895431519, "learning_rate": 0.0002164119782448494, "loss": 1.7081, "step": 53190 }, { "epoch": 1.77, "grad_norm": 0.5684189796447754, "learning_rate": 0.00021640194008845472, "loss": 1.7094, "step": 53191 }, { "epoch": 1.77, "grad_norm": 0.5671722888946533, "learning_rate": 0.00021639190203353594, "loss": 1.7385, "step": 53192 }, { "epoch": 1.77, "grad_norm": 0.5690072774887085, "learning_rate": 0.00021638186408010497, "loss": 1.7162, "step": 53193 }, { "epoch": 1.77, "grad_norm": 0.5598737597465515, "learning_rate": 0.00021637182622817413, "loss": 1.6851, "step": 53194 }, { "epoch": 1.77, "grad_norm": 0.5671073794364929, "learning_rate": 0.00021636178847775562, "loss": 1.7706, "step": 53195 }, { "epoch": 1.77, "grad_norm": 0.5419412851333618, "learning_rate": 0.0002163517508288615, "loss": 1.7485, "step": 53196 }, { "epoch": 1.77, "grad_norm": 0.55743408203125, "learning_rate": 0.00021634171328150403, "loss": 1.7225, "step": 53197 }, { "epoch": 1.77, "grad_norm": 0.592645525932312, "learning_rate": 0.00021633167583569549, "loss": 1.8076, "step": 53198 }, { "epoch": 1.77, "grad_norm": 0.5637004375457764, "learning_rate": 0.0002163216384914478, "loss": 1.7696, "step": 53199 }, { "epoch": 1.77, "grad_norm": 0.5811740159988403, "learning_rate": 0.00021631160124877338, "loss": 1.7873, "step": 53200 }, { "epoch": 1.77, "grad_norm": 0.5527446269989014, "learning_rate": 0.00021630156410768438, "loss": 1.7328, "step": 53201 }, { "epoch": 1.77, "grad_norm": 0.5638286471366882, "learning_rate": 0.00021629152706819298, "loss": 1.6823, "step": 53202 }, { "epoch": 1.77, "grad_norm": 0.5571171045303345, "learning_rate": 0.00021628149013031128, "loss": 1.7574, "step": 53203 }, { "epoch": 1.77, "grad_norm": 0.5374204516410828, "learning_rate": 0.00021627145329405144, "loss": 1.7584, "step": 53204 }, { "epoch": 1.77, "grad_norm": 0.5417041778564453, "learning_rate": 0.00021626141655942596, "loss": 1.7568, "step": 53205 }, { "epoch": 1.77, "grad_norm": 0.5513191223144531, "learning_rate": 0.00021625137992644652, "loss": 1.7048, "step": 53206 }, { "epoch": 1.77, "grad_norm": 0.5592963695526123, "learning_rate": 0.0002162413433951257, "loss": 1.7158, "step": 53207 }, { "epoch": 1.77, "grad_norm": 0.5513197183609009, "learning_rate": 0.00021623130696547555, "loss": 1.6833, "step": 53208 }, { "epoch": 1.77, "grad_norm": 0.5696171522140503, "learning_rate": 0.00021622127063750823, "loss": 1.7627, "step": 53209 }, { "epoch": 1.77, "grad_norm": 0.5535875558853149, "learning_rate": 0.0002162112344112359, "loss": 1.7188, "step": 53210 }, { "epoch": 1.77, "grad_norm": 0.5684677362442017, "learning_rate": 0.0002162011982866708, "loss": 1.7233, "step": 53211 }, { "epoch": 1.77, "grad_norm": 0.5739210844039917, "learning_rate": 0.0002161911622638252, "loss": 1.7165, "step": 53212 }, { "epoch": 1.77, "grad_norm": 0.563208281993866, "learning_rate": 0.00021618112634271103, "loss": 1.7206, "step": 53213 }, { "epoch": 1.77, "grad_norm": 0.5704033374786377, "learning_rate": 0.00021617109052334064, "loss": 1.7724, "step": 53214 }, { "epoch": 1.77, "grad_norm": 0.5483601093292236, "learning_rate": 0.00021616105480572632, "loss": 1.7008, "step": 53215 }, { "epoch": 1.77, "grad_norm": 0.5833714008331299, "learning_rate": 0.00021615101918988006, "loss": 1.7409, "step": 53216 }, { "epoch": 1.77, "grad_norm": 0.5636204481124878, "learning_rate": 0.0002161409836758141, "loss": 1.7373, "step": 53217 }, { "epoch": 1.77, "grad_norm": 0.5846388936042786, "learning_rate": 0.00021613094826354064, "loss": 1.7477, "step": 53218 }, { "epoch": 1.77, "grad_norm": 0.5599942207336426, "learning_rate": 0.00021612091295307182, "loss": 1.7501, "step": 53219 }, { "epoch": 1.77, "grad_norm": 0.5627630949020386, "learning_rate": 0.00021611087774441983, "loss": 1.7086, "step": 53220 }, { "epoch": 1.77, "grad_norm": 0.5589980483055115, "learning_rate": 0.0002161008426375969, "loss": 1.6757, "step": 53221 }, { "epoch": 1.77, "grad_norm": 0.5897167921066284, "learning_rate": 0.00021609080763261524, "loss": 1.7779, "step": 53222 }, { "epoch": 1.77, "grad_norm": 0.5823259353637695, "learning_rate": 0.00021608077272948696, "loss": 1.8501, "step": 53223 }, { "epoch": 1.77, "grad_norm": 0.5782346129417419, "learning_rate": 0.00021607073792822423, "loss": 1.7662, "step": 53224 }, { "epoch": 1.77, "grad_norm": 0.5762997269630432, "learning_rate": 0.0002160607032288393, "loss": 1.6879, "step": 53225 }, { "epoch": 1.77, "grad_norm": 0.5450759530067444, "learning_rate": 0.00021605066863134427, "loss": 1.7921, "step": 53226 }, { "epoch": 1.77, "grad_norm": 0.5651350021362305, "learning_rate": 0.00021604063413575129, "loss": 1.6628, "step": 53227 }, { "epoch": 1.77, "grad_norm": 0.6058172583580017, "learning_rate": 0.00021603059974207277, "loss": 1.8152, "step": 53228 }, { "epoch": 1.77, "grad_norm": 0.573297917842865, "learning_rate": 0.00021602056545032058, "loss": 1.713, "step": 53229 }, { "epoch": 1.77, "grad_norm": 0.5609829425811768, "learning_rate": 0.0002160105312605071, "loss": 1.7541, "step": 53230 }, { "epoch": 1.77, "grad_norm": 0.5632793307304382, "learning_rate": 0.00021600049717264446, "loss": 1.7786, "step": 53231 }, { "epoch": 1.77, "grad_norm": 0.5481050610542297, "learning_rate": 0.0002159904631867449, "loss": 1.7576, "step": 53232 }, { "epoch": 1.77, "grad_norm": 0.5607383847236633, "learning_rate": 0.00021598042930282048, "loss": 1.7125, "step": 53233 }, { "epoch": 1.77, "grad_norm": 0.5608887672424316, "learning_rate": 0.00021597039552088336, "loss": 1.7099, "step": 53234 }, { "epoch": 1.77, "grad_norm": 0.5568759441375732, "learning_rate": 0.00021596036184094603, "loss": 1.7061, "step": 53235 }, { "epoch": 1.77, "grad_norm": 0.5633882284164429, "learning_rate": 0.00021595032826302022, "loss": 1.7227, "step": 53236 }, { "epoch": 1.77, "grad_norm": 0.5608574748039246, "learning_rate": 0.00021594029478711835, "loss": 1.7837, "step": 53237 }, { "epoch": 1.77, "grad_norm": 0.565122127532959, "learning_rate": 0.0002159302614132527, "loss": 1.7286, "step": 53238 }, { "epoch": 1.77, "grad_norm": 0.5491496920585632, "learning_rate": 0.00021592022814143525, "loss": 1.7914, "step": 53239 }, { "epoch": 1.77, "grad_norm": 0.5621215105056763, "learning_rate": 0.00021591019497167825, "loss": 1.736, "step": 53240 }, { "epoch": 1.77, "grad_norm": 0.567929208278656, "learning_rate": 0.0002159001619039938, "loss": 1.6715, "step": 53241 }, { "epoch": 1.77, "grad_norm": 0.5620205998420715, "learning_rate": 0.00021589012893839438, "loss": 1.796, "step": 53242 }, { "epoch": 1.77, "grad_norm": 0.564190685749054, "learning_rate": 0.00021588009607489175, "loss": 1.7118, "step": 53243 }, { "epoch": 1.77, "grad_norm": 0.5658192038536072, "learning_rate": 0.00021587006331349836, "loss": 1.726, "step": 53244 }, { "epoch": 1.77, "grad_norm": 0.5498701930046082, "learning_rate": 0.00021586003065422635, "loss": 1.79, "step": 53245 }, { "epoch": 1.77, "grad_norm": 0.581830620765686, "learning_rate": 0.00021584999809708783, "loss": 1.7384, "step": 53246 }, { "epoch": 1.77, "grad_norm": 0.5619534850120544, "learning_rate": 0.000215839965642095, "loss": 1.7025, "step": 53247 }, { "epoch": 1.77, "grad_norm": 0.5676419138908386, "learning_rate": 0.00021582993328926012, "loss": 1.7846, "step": 53248 }, { "epoch": 1.77, "grad_norm": 0.5609333515167236, "learning_rate": 0.00021581990103859526, "loss": 1.6871, "step": 53249 }, { "epoch": 1.77, "grad_norm": 0.5519362092018127, "learning_rate": 0.00021580986889011255, "loss": 1.7197, "step": 53250 }, { "epoch": 1.77, "grad_norm": 0.5693390965461731, "learning_rate": 0.0002157998368438243, "loss": 1.7802, "step": 53251 }, { "epoch": 1.77, "grad_norm": 0.5652189254760742, "learning_rate": 0.00021578980489974275, "loss": 1.7708, "step": 53252 }, { "epoch": 1.77, "grad_norm": 0.5910340547561646, "learning_rate": 0.00021577977305787987, "loss": 1.7745, "step": 53253 }, { "epoch": 1.77, "grad_norm": 0.547133207321167, "learning_rate": 0.00021576974131824796, "loss": 1.7626, "step": 53254 }, { "epoch": 1.77, "grad_norm": 0.5666205286979675, "learning_rate": 0.0002157597096808592, "loss": 1.7395, "step": 53255 }, { "epoch": 1.77, "grad_norm": 0.5757700800895691, "learning_rate": 0.00021574967814572572, "loss": 1.7393, "step": 53256 }, { "epoch": 1.77, "grad_norm": 0.5862870216369629, "learning_rate": 0.00021573964671285962, "loss": 1.6948, "step": 53257 }, { "epoch": 1.77, "grad_norm": 0.5680785179138184, "learning_rate": 0.0002157296153822734, "loss": 1.7101, "step": 53258 }, { "epoch": 1.77, "grad_norm": 0.5564923286437988, "learning_rate": 0.00021571958415397878, "loss": 1.6969, "step": 53259 }, { "epoch": 1.77, "grad_norm": 0.5706126093864441, "learning_rate": 0.00021570955302798828, "loss": 1.7118, "step": 53260 }, { "epoch": 1.77, "grad_norm": 0.5899351239204407, "learning_rate": 0.0002156995220043139, "loss": 1.7141, "step": 53261 }, { "epoch": 1.77, "grad_norm": 0.5655894875526428, "learning_rate": 0.00021568949108296797, "loss": 1.7539, "step": 53262 }, { "epoch": 1.77, "grad_norm": 0.569145143032074, "learning_rate": 0.0002156794602639625, "loss": 1.7539, "step": 53263 }, { "epoch": 1.77, "grad_norm": 0.5559637546539307, "learning_rate": 0.00021566942954730973, "loss": 1.787, "step": 53264 }, { "epoch": 1.77, "grad_norm": 0.5636852383613586, "learning_rate": 0.000215659398933022, "loss": 1.6937, "step": 53265 }, { "epoch": 1.77, "grad_norm": 0.5526819229125977, "learning_rate": 0.00021564936842111113, "loss": 1.6999, "step": 53266 }, { "epoch": 1.77, "grad_norm": 0.5534293055534363, "learning_rate": 0.00021563933801158958, "loss": 1.7589, "step": 53267 }, { "epoch": 1.77, "grad_norm": 0.5725886225700378, "learning_rate": 0.00021562930770446952, "loss": 1.7521, "step": 53268 }, { "epoch": 1.77, "grad_norm": 0.5623907446861267, "learning_rate": 0.00021561927749976295, "loss": 1.7498, "step": 53269 }, { "epoch": 1.77, "grad_norm": 0.5840973258018494, "learning_rate": 0.00021560924739748215, "loss": 1.7358, "step": 53270 }, { "epoch": 1.77, "grad_norm": 0.5748538970947266, "learning_rate": 0.00021559921739763924, "loss": 1.6961, "step": 53271 }, { "epoch": 1.77, "grad_norm": 0.5407553315162659, "learning_rate": 0.0002155891875002466, "loss": 1.7417, "step": 53272 }, { "epoch": 1.77, "grad_norm": 0.5604488253593445, "learning_rate": 0.00021557915770531612, "loss": 1.7589, "step": 53273 }, { "epoch": 1.77, "grad_norm": 0.5471654534339905, "learning_rate": 0.00021556912801286011, "loss": 1.7719, "step": 53274 }, { "epoch": 1.77, "grad_norm": 0.5562669038772583, "learning_rate": 0.00021555909842289086, "loss": 1.6765, "step": 53275 }, { "epoch": 1.77, "grad_norm": 0.5696810483932495, "learning_rate": 0.00021554906893542033, "loss": 1.7025, "step": 53276 }, { "epoch": 1.77, "grad_norm": 0.562900960445404, "learning_rate": 0.00021553903955046078, "loss": 1.8107, "step": 53277 }, { "epoch": 1.77, "grad_norm": 0.5856046080589294, "learning_rate": 0.0002155290102680243, "loss": 1.7679, "step": 53278 }, { "epoch": 1.77, "grad_norm": 0.5610694885253906, "learning_rate": 0.0002155189810881234, "loss": 1.7642, "step": 53279 }, { "epoch": 1.77, "grad_norm": 0.5685995221138, "learning_rate": 0.00021550895201076983, "loss": 1.7831, "step": 53280 }, { "epoch": 1.77, "grad_norm": 0.5476840734481812, "learning_rate": 0.00021549892303597594, "loss": 1.7206, "step": 53281 }, { "epoch": 1.77, "grad_norm": 0.579667866230011, "learning_rate": 0.00021548889416375403, "loss": 1.8051, "step": 53282 }, { "epoch": 1.77, "grad_norm": 0.5768405199050903, "learning_rate": 0.00021547886539411605, "loss": 1.7531, "step": 53283 }, { "epoch": 1.77, "grad_norm": 0.5529496669769287, "learning_rate": 0.00021546883672707428, "loss": 1.7426, "step": 53284 }, { "epoch": 1.77, "grad_norm": 0.5622509717941284, "learning_rate": 0.00021545880816264097, "loss": 1.7153, "step": 53285 }, { "epoch": 1.77, "grad_norm": 0.5622768998146057, "learning_rate": 0.00021544877970082813, "loss": 1.7522, "step": 53286 }, { "epoch": 1.77, "grad_norm": 0.5755616426467896, "learning_rate": 0.00021543875134164795, "loss": 1.7513, "step": 53287 }, { "epoch": 1.77, "grad_norm": 0.8001516461372375, "learning_rate": 0.00021542872308511274, "loss": 1.7, "step": 53288 }, { "epoch": 1.77, "grad_norm": 0.5607699751853943, "learning_rate": 0.00021541869493123464, "loss": 1.7392, "step": 53289 }, { "epoch": 1.77, "grad_norm": 0.5548664331436157, "learning_rate": 0.00021540866688002575, "loss": 1.7285, "step": 53290 }, { "epoch": 1.77, "grad_norm": 0.5608888864517212, "learning_rate": 0.00021539863893149826, "loss": 1.8213, "step": 53291 }, { "epoch": 1.77, "grad_norm": 0.5626612901687622, "learning_rate": 0.0002153886110856644, "loss": 1.78, "step": 53292 }, { "epoch": 1.77, "grad_norm": 0.5605239868164062, "learning_rate": 0.00021537858334253623, "loss": 1.8357, "step": 53293 }, { "epoch": 1.77, "grad_norm": 0.5668817758560181, "learning_rate": 0.00021536855570212599, "loss": 1.7951, "step": 53294 }, { "epoch": 1.77, "grad_norm": 0.5546082258224487, "learning_rate": 0.00021535852816444596, "loss": 1.7391, "step": 53295 }, { "epoch": 1.77, "grad_norm": 0.5821554064750671, "learning_rate": 0.0002153485007295081, "loss": 1.6766, "step": 53296 }, { "epoch": 1.77, "grad_norm": 0.5605784058570862, "learning_rate": 0.0002153384733973247, "loss": 1.7856, "step": 53297 }, { "epoch": 1.77, "grad_norm": 0.5632410049438477, "learning_rate": 0.0002153284461679079, "loss": 1.7645, "step": 53298 }, { "epoch": 1.77, "grad_norm": 0.5378888249397278, "learning_rate": 0.00021531841904127001, "loss": 1.7022, "step": 53299 }, { "epoch": 1.77, "grad_norm": 0.5674421191215515, "learning_rate": 0.00021530839201742302, "loss": 1.7572, "step": 53300 }, { "epoch": 1.77, "grad_norm": 0.541265070438385, "learning_rate": 0.00021529836509637907, "loss": 1.7208, "step": 53301 }, { "epoch": 1.77, "grad_norm": 0.5541596412658691, "learning_rate": 0.00021528833827815053, "loss": 1.7559, "step": 53302 }, { "epoch": 1.77, "grad_norm": 0.5694772005081177, "learning_rate": 0.00021527831156274943, "loss": 1.7848, "step": 53303 }, { "epoch": 1.77, "grad_norm": 0.5659990906715393, "learning_rate": 0.000215268284950188, "loss": 1.7372, "step": 53304 }, { "epoch": 1.77, "grad_norm": 0.5536578893661499, "learning_rate": 0.0002152582584404784, "loss": 1.7331, "step": 53305 }, { "epoch": 1.77, "grad_norm": 0.5572211146354675, "learning_rate": 0.00021524823203363276, "loss": 1.6706, "step": 53306 }, { "epoch": 1.77, "grad_norm": 0.5415575504302979, "learning_rate": 0.0002152382057296633, "loss": 1.6876, "step": 53307 }, { "epoch": 1.77, "grad_norm": 0.5588793158531189, "learning_rate": 0.00021522817952858206, "loss": 1.7607, "step": 53308 }, { "epoch": 1.77, "grad_norm": 0.540292501449585, "learning_rate": 0.00021521815343040143, "loss": 1.7257, "step": 53309 }, { "epoch": 1.77, "grad_norm": 0.5948951840400696, "learning_rate": 0.00021520812743513345, "loss": 1.805, "step": 53310 }, { "epoch": 1.77, "grad_norm": 0.5817079544067383, "learning_rate": 0.00021519810154279028, "loss": 1.6888, "step": 53311 }, { "epoch": 1.77, "grad_norm": 0.5729108452796936, "learning_rate": 0.00021518807575338422, "loss": 1.7202, "step": 53312 }, { "epoch": 1.77, "grad_norm": 0.563128650188446, "learning_rate": 0.00021517805006692725, "loss": 1.7971, "step": 53313 }, { "epoch": 1.77, "grad_norm": 0.5588750839233398, "learning_rate": 0.0002151680244834316, "loss": 1.7689, "step": 53314 }, { "epoch": 1.77, "grad_norm": 0.5704158544540405, "learning_rate": 0.0002151579990029096, "loss": 1.7611, "step": 53315 }, { "epoch": 1.77, "grad_norm": 0.5607097744941711, "learning_rate": 0.00021514797362537315, "loss": 1.715, "step": 53316 }, { "epoch": 1.77, "grad_norm": 0.5915705561637878, "learning_rate": 0.00021513794835083459, "loss": 1.7917, "step": 53317 }, { "epoch": 1.77, "grad_norm": 0.5723474025726318, "learning_rate": 0.00021512792317930605, "loss": 1.769, "step": 53318 }, { "epoch": 1.77, "grad_norm": 0.5432859659194946, "learning_rate": 0.00021511789811079978, "loss": 1.7339, "step": 53319 }, { "epoch": 1.77, "grad_norm": 0.5675907135009766, "learning_rate": 0.00021510787314532782, "loss": 1.7749, "step": 53320 }, { "epoch": 1.77, "grad_norm": 0.5594126582145691, "learning_rate": 0.00021509784828290234, "loss": 1.7041, "step": 53321 }, { "epoch": 1.77, "grad_norm": 0.5723046064376831, "learning_rate": 0.00021508782352353575, "loss": 1.7103, "step": 53322 }, { "epoch": 1.77, "grad_norm": 0.5557695627212524, "learning_rate": 0.00021507779886723982, "loss": 1.7993, "step": 53323 }, { "epoch": 1.77, "grad_norm": 0.5686209797859192, "learning_rate": 0.000215067774314027, "loss": 1.7071, "step": 53324 }, { "epoch": 1.77, "grad_norm": 0.5454330444335938, "learning_rate": 0.0002150577498639095, "loss": 1.7606, "step": 53325 }, { "epoch": 1.77, "grad_norm": 0.5599324107170105, "learning_rate": 0.00021504772551689922, "loss": 1.7983, "step": 53326 }, { "epoch": 1.77, "grad_norm": 0.5706611275672913, "learning_rate": 0.00021503770127300853, "loss": 1.7391, "step": 53327 }, { "epoch": 1.77, "grad_norm": 0.5585444569587708, "learning_rate": 0.00021502767713224954, "loss": 1.6462, "step": 53328 }, { "epoch": 1.77, "grad_norm": 0.5586050748825073, "learning_rate": 0.00021501765309463454, "loss": 1.7101, "step": 53329 }, { "epoch": 1.77, "grad_norm": 0.5496816039085388, "learning_rate": 0.00021500762916017543, "loss": 1.7397, "step": 53330 }, { "epoch": 1.77, "grad_norm": 0.5688285827636719, "learning_rate": 0.00021499760532888462, "loss": 1.6858, "step": 53331 }, { "epoch": 1.77, "grad_norm": 0.5593346357345581, "learning_rate": 0.0002149875816007742, "loss": 1.7372, "step": 53332 }, { "epoch": 1.77, "grad_norm": 0.5683373808860779, "learning_rate": 0.0002149775579758563, "loss": 1.7757, "step": 53333 }, { "epoch": 1.77, "grad_norm": 0.5919123888015747, "learning_rate": 0.00021496753445414315, "loss": 1.8179, "step": 53334 }, { "epoch": 1.77, "grad_norm": 0.5771446824073792, "learning_rate": 0.00021495751103564687, "loss": 1.7057, "step": 53335 }, { "epoch": 1.77, "grad_norm": 0.5715831518173218, "learning_rate": 0.00021494748772037963, "loss": 1.7665, "step": 53336 }, { "epoch": 1.77, "grad_norm": 0.5492846965789795, "learning_rate": 0.00021493746450835355, "loss": 1.674, "step": 53337 }, { "epoch": 1.77, "grad_norm": 0.5698116421699524, "learning_rate": 0.00021492744139958088, "loss": 1.7741, "step": 53338 }, { "epoch": 1.77, "grad_norm": 0.5839996337890625, "learning_rate": 0.00021491741839407385, "loss": 1.7274, "step": 53339 }, { "epoch": 1.77, "grad_norm": 0.5714841485023499, "learning_rate": 0.00021490739549184448, "loss": 1.7482, "step": 53340 }, { "epoch": 1.77, "grad_norm": 0.5686668753623962, "learning_rate": 0.00021489737269290502, "loss": 1.7388, "step": 53341 }, { "epoch": 1.77, "grad_norm": 0.6177629828453064, "learning_rate": 0.00021488734999726764, "loss": 1.7181, "step": 53342 }, { "epoch": 1.77, "grad_norm": 0.5707570314407349, "learning_rate": 0.0002148773274049444, "loss": 1.73, "step": 53343 }, { "epoch": 1.77, "grad_norm": 0.5601736307144165, "learning_rate": 0.0002148673049159475, "loss": 1.6819, "step": 53344 }, { "epoch": 1.77, "grad_norm": 0.5488076210021973, "learning_rate": 0.0002148572825302893, "loss": 1.7428, "step": 53345 }, { "epoch": 1.77, "grad_norm": 0.559415340423584, "learning_rate": 0.0002148472602479817, "loss": 1.7621, "step": 53346 }, { "epoch": 1.77, "grad_norm": 0.5873322486877441, "learning_rate": 0.00021483723806903695, "loss": 1.7776, "step": 53347 }, { "epoch": 1.77, "grad_norm": 0.5552006363868713, "learning_rate": 0.00021482721599346732, "loss": 1.763, "step": 53348 }, { "epoch": 1.77, "grad_norm": 0.585594654083252, "learning_rate": 0.00021481719402128492, "loss": 1.712, "step": 53349 }, { "epoch": 1.77, "grad_norm": 0.5587047934532166, "learning_rate": 0.00021480717215250188, "loss": 1.7059, "step": 53350 }, { "epoch": 1.77, "grad_norm": 0.5482491254806519, "learning_rate": 0.00021479715038713026, "loss": 1.7541, "step": 53351 }, { "epoch": 1.78, "grad_norm": 0.5703261494636536, "learning_rate": 0.00021478712872518256, "loss": 1.7335, "step": 53352 }, { "epoch": 1.78, "grad_norm": 0.5630490779876709, "learning_rate": 0.00021477710716667056, "loss": 1.6881, "step": 53353 }, { "epoch": 1.78, "grad_norm": 0.5853824019432068, "learning_rate": 0.00021476708571160663, "loss": 1.8109, "step": 53354 }, { "epoch": 1.78, "grad_norm": 0.5592891573905945, "learning_rate": 0.00021475706436000294, "loss": 1.7551, "step": 53355 }, { "epoch": 1.78, "grad_norm": 0.5735805630683899, "learning_rate": 0.00021474704311187163, "loss": 1.6949, "step": 53356 }, { "epoch": 1.78, "grad_norm": 0.5844322443008423, "learning_rate": 0.0002147370219672248, "loss": 1.777, "step": 53357 }, { "epoch": 1.78, "grad_norm": 0.565971851348877, "learning_rate": 0.00021472700092607457, "loss": 1.7071, "step": 53358 }, { "epoch": 1.78, "grad_norm": 0.5963684916496277, "learning_rate": 0.00021471697998843342, "loss": 1.7005, "step": 53359 }, { "epoch": 1.78, "grad_norm": 0.5644839406013489, "learning_rate": 0.00021470695915431313, "loss": 1.7156, "step": 53360 }, { "epoch": 1.78, "grad_norm": 0.5750465989112854, "learning_rate": 0.00021469693842372602, "loss": 1.6706, "step": 53361 }, { "epoch": 1.78, "grad_norm": 0.620637059211731, "learning_rate": 0.00021468691779668437, "loss": 1.7466, "step": 53362 }, { "epoch": 1.78, "grad_norm": 0.5686660408973694, "learning_rate": 0.00021467689727320017, "loss": 1.7654, "step": 53363 }, { "epoch": 1.78, "grad_norm": 0.5721777677536011, "learning_rate": 0.0002146668768532856, "loss": 1.7635, "step": 53364 }, { "epoch": 1.78, "grad_norm": 0.5709213018417358, "learning_rate": 0.000214656856536953, "loss": 1.7878, "step": 53365 }, { "epoch": 1.78, "grad_norm": 0.5820034742355347, "learning_rate": 0.00021464683632421429, "loss": 1.7859, "step": 53366 }, { "epoch": 1.78, "grad_norm": 0.5616236329078674, "learning_rate": 0.0002146368162150817, "loss": 1.7304, "step": 53367 }, { "epoch": 1.78, "grad_norm": 0.5556163191795349, "learning_rate": 0.00021462679620956745, "loss": 1.713, "step": 53368 }, { "epoch": 1.78, "grad_norm": 0.5421101450920105, "learning_rate": 0.0002146167763076838, "loss": 1.7184, "step": 53369 }, { "epoch": 1.78, "grad_norm": 0.5429890751838684, "learning_rate": 0.00021460675650944273, "loss": 1.6351, "step": 53370 }, { "epoch": 1.78, "grad_norm": 0.5704776644706726, "learning_rate": 0.0002145967368148565, "loss": 1.6909, "step": 53371 }, { "epoch": 1.78, "grad_norm": 0.547378420829773, "learning_rate": 0.00021458671722393729, "loss": 1.7547, "step": 53372 }, { "epoch": 1.78, "grad_norm": 0.5815537571907043, "learning_rate": 0.00021457669773669713, "loss": 1.8307, "step": 53373 }, { "epoch": 1.78, "grad_norm": 0.5611982941627502, "learning_rate": 0.00021456667835314825, "loss": 1.7685, "step": 53374 }, { "epoch": 1.78, "grad_norm": 0.5581226944923401, "learning_rate": 0.00021455665907330304, "loss": 1.7349, "step": 53375 }, { "epoch": 1.78, "grad_norm": 0.5348830819129944, "learning_rate": 0.00021454663989717323, "loss": 1.7297, "step": 53376 }, { "epoch": 1.78, "grad_norm": 0.5643601417541504, "learning_rate": 0.00021453662082477126, "loss": 1.7905, "step": 53377 }, { "epoch": 1.78, "grad_norm": 0.5734819769859314, "learning_rate": 0.00021452660185610926, "loss": 1.7264, "step": 53378 }, { "epoch": 1.78, "grad_norm": 0.5692765712738037, "learning_rate": 0.00021451658299119941, "loss": 1.7584, "step": 53379 }, { "epoch": 1.78, "grad_norm": 0.5584752559661865, "learning_rate": 0.00021450656423005382, "loss": 1.6851, "step": 53380 }, { "epoch": 1.78, "grad_norm": 0.5719944834709167, "learning_rate": 0.00021449654557268458, "loss": 1.7514, "step": 53381 }, { "epoch": 1.78, "grad_norm": 0.5611535310745239, "learning_rate": 0.00021448652701910412, "loss": 1.7743, "step": 53382 }, { "epoch": 1.78, "grad_norm": 0.5556827783584595, "learning_rate": 0.00021447650856932422, "loss": 1.7211, "step": 53383 }, { "epoch": 1.78, "grad_norm": 0.5748693943023682, "learning_rate": 0.00021446649022335729, "loss": 1.6284, "step": 53384 }, { "epoch": 1.78, "grad_norm": 0.5694574117660522, "learning_rate": 0.00021445647198121553, "loss": 1.7373, "step": 53385 }, { "epoch": 1.78, "grad_norm": 0.5414682626724243, "learning_rate": 0.00021444645384291092, "loss": 1.7339, "step": 53386 }, { "epoch": 1.78, "grad_norm": 0.5511725544929504, "learning_rate": 0.0002144364358084557, "loss": 1.7772, "step": 53387 }, { "epoch": 1.78, "grad_norm": 0.5477036237716675, "learning_rate": 0.00021442641787786204, "loss": 1.7831, "step": 53388 }, { "epoch": 1.78, "grad_norm": 0.5806983709335327, "learning_rate": 0.00021441640005114224, "loss": 1.7454, "step": 53389 }, { "epoch": 1.78, "grad_norm": 0.5727494359016418, "learning_rate": 0.00021440638232830814, "loss": 1.7299, "step": 53390 }, { "epoch": 1.78, "grad_norm": 0.5666535496711731, "learning_rate": 0.00021439636470937216, "loss": 1.7478, "step": 53391 }, { "epoch": 1.78, "grad_norm": 0.5664516687393188, "learning_rate": 0.0002143863471943464, "loss": 1.6924, "step": 53392 }, { "epoch": 1.78, "grad_norm": 0.5642156600952148, "learning_rate": 0.00021437632978324299, "loss": 1.7403, "step": 53393 }, { "epoch": 1.78, "grad_norm": 0.5768981575965881, "learning_rate": 0.00021436631247607408, "loss": 1.6258, "step": 53394 }, { "epoch": 1.78, "grad_norm": 0.5802571177482605, "learning_rate": 0.0002143562952728519, "loss": 1.7107, "step": 53395 }, { "epoch": 1.78, "grad_norm": 0.5770159959793091, "learning_rate": 0.0002143462781735885, "loss": 1.7302, "step": 53396 }, { "epoch": 1.78, "grad_norm": 0.5615199208259583, "learning_rate": 0.00021433626117829598, "loss": 1.704, "step": 53397 }, { "epoch": 1.78, "grad_norm": 0.5496477484703064, "learning_rate": 0.00021432624428698678, "loss": 1.7221, "step": 53398 }, { "epoch": 1.78, "grad_norm": 0.5581570267677307, "learning_rate": 0.0002143162274996729, "loss": 1.8017, "step": 53399 }, { "epoch": 1.78, "grad_norm": 0.5481955409049988, "learning_rate": 0.00021430621081636644, "loss": 1.7102, "step": 53400 }, { "epoch": 1.78, "grad_norm": 0.5841103196144104, "learning_rate": 0.00021429619423707962, "loss": 1.7133, "step": 53401 }, { "epoch": 1.78, "grad_norm": 1.137079119682312, "learning_rate": 0.00021428617776182466, "loss": 1.6949, "step": 53402 }, { "epoch": 1.78, "grad_norm": 0.5634037256240845, "learning_rate": 0.00021427616139061358, "loss": 1.6876, "step": 53403 }, { "epoch": 1.78, "grad_norm": 0.5844445824623108, "learning_rate": 0.00021426614512345856, "loss": 1.7679, "step": 53404 }, { "epoch": 1.78, "grad_norm": 0.5526670813560486, "learning_rate": 0.00021425612896037186, "loss": 1.7094, "step": 53405 }, { "epoch": 1.78, "grad_norm": 0.5577492713928223, "learning_rate": 0.00021424611290136566, "loss": 1.6997, "step": 53406 }, { "epoch": 1.78, "grad_norm": 0.5668813586235046, "learning_rate": 0.00021423609694645195, "loss": 1.8114, "step": 53407 }, { "epoch": 1.78, "grad_norm": 0.5618794560432434, "learning_rate": 0.000214226081095643, "loss": 1.7178, "step": 53408 }, { "epoch": 1.78, "grad_norm": 0.5652807354927063, "learning_rate": 0.000214216065348951, "loss": 1.7806, "step": 53409 }, { "epoch": 1.78, "grad_norm": 0.5627074241638184, "learning_rate": 0.00021420604970638803, "loss": 1.7438, "step": 53410 }, { "epoch": 1.78, "grad_norm": 0.5882782340049744, "learning_rate": 0.00021419603416796617, "loss": 1.7945, "step": 53411 }, { "epoch": 1.78, "grad_norm": 0.5845900774002075, "learning_rate": 0.0002141860187336978, "loss": 1.7612, "step": 53412 }, { "epoch": 1.78, "grad_norm": 0.5675984025001526, "learning_rate": 0.00021417600340359496, "loss": 1.778, "step": 53413 }, { "epoch": 1.78, "grad_norm": 0.5759602785110474, "learning_rate": 0.00021416598817766976, "loss": 1.7962, "step": 53414 }, { "epoch": 1.78, "grad_norm": 0.556174099445343, "learning_rate": 0.0002141559730559344, "loss": 1.7169, "step": 53415 }, { "epoch": 1.78, "grad_norm": 0.5902616381645203, "learning_rate": 0.0002141459580384011, "loss": 1.7751, "step": 53416 }, { "epoch": 1.78, "grad_norm": 0.579497218132019, "learning_rate": 0.00021413594312508195, "loss": 1.7312, "step": 53417 }, { "epoch": 1.78, "grad_norm": 0.5767619609832764, "learning_rate": 0.00021412592831598898, "loss": 1.7213, "step": 53418 }, { "epoch": 1.78, "grad_norm": 0.5759223699569702, "learning_rate": 0.00021411591361113465, "loss": 1.7691, "step": 53419 }, { "epoch": 1.78, "grad_norm": 0.5792202353477478, "learning_rate": 0.00021410589901053087, "loss": 1.7918, "step": 53420 }, { "epoch": 1.78, "grad_norm": 0.575387716293335, "learning_rate": 0.00021409588451418987, "loss": 1.7706, "step": 53421 }, { "epoch": 1.78, "grad_norm": 0.5894989371299744, "learning_rate": 0.00021408587012212388, "loss": 1.7593, "step": 53422 }, { "epoch": 1.78, "grad_norm": 0.5829758644104004, "learning_rate": 0.00021407585583434492, "loss": 1.7434, "step": 53423 }, { "epoch": 1.78, "grad_norm": 0.5565366744995117, "learning_rate": 0.00021406584165086523, "loss": 1.7958, "step": 53424 }, { "epoch": 1.78, "grad_norm": 0.6097094416618347, "learning_rate": 0.00021405582757169682, "loss": 1.798, "step": 53425 }, { "epoch": 1.78, "grad_norm": 0.5628690123558044, "learning_rate": 0.00021404581359685214, "loss": 1.7806, "step": 53426 }, { "epoch": 1.78, "grad_norm": 0.5865635871887207, "learning_rate": 0.0002140357997263431, "loss": 1.7056, "step": 53427 }, { "epoch": 1.78, "grad_norm": 0.5676522850990295, "learning_rate": 0.00021402578596018196, "loss": 1.7398, "step": 53428 }, { "epoch": 1.78, "grad_norm": 0.5616977214813232, "learning_rate": 0.00021401577229838092, "loss": 1.7432, "step": 53429 }, { "epoch": 1.78, "grad_norm": 0.5979675650596619, "learning_rate": 0.00021400575874095195, "loss": 1.6928, "step": 53430 }, { "epoch": 1.78, "grad_norm": 0.5380657911300659, "learning_rate": 0.0002139957452879073, "loss": 1.7, "step": 53431 }, { "epoch": 1.78, "grad_norm": 0.5735295414924622, "learning_rate": 0.0002139857319392593, "loss": 1.6966, "step": 53432 }, { "epoch": 1.78, "grad_norm": 0.5565692186355591, "learning_rate": 0.0002139757186950198, "loss": 1.6948, "step": 53433 }, { "epoch": 1.78, "grad_norm": 0.5739520192146301, "learning_rate": 0.00021396570555520108, "loss": 1.738, "step": 53434 }, { "epoch": 1.78, "grad_norm": 0.5347521901130676, "learning_rate": 0.00021395569251981539, "loss": 1.7008, "step": 53435 }, { "epoch": 1.78, "grad_norm": 0.5532275438308716, "learning_rate": 0.00021394567958887487, "loss": 1.6988, "step": 53436 }, { "epoch": 1.78, "grad_norm": 0.5538575053215027, "learning_rate": 0.00021393566676239153, "loss": 1.7185, "step": 53437 }, { "epoch": 1.78, "grad_norm": 0.5505011081695557, "learning_rate": 0.00021392565404037756, "loss": 1.7412, "step": 53438 }, { "epoch": 1.78, "grad_norm": 0.5654311776161194, "learning_rate": 0.00021391564142284535, "loss": 1.7921, "step": 53439 }, { "epoch": 1.78, "grad_norm": 0.553597629070282, "learning_rate": 0.00021390562890980666, "loss": 1.7813, "step": 53440 }, { "epoch": 1.78, "grad_norm": 0.567333459854126, "learning_rate": 0.00021389561650127397, "loss": 1.6764, "step": 53441 }, { "epoch": 1.78, "grad_norm": 0.5633355379104614, "learning_rate": 0.00021388560419725934, "loss": 1.8184, "step": 53442 }, { "epoch": 1.78, "grad_norm": 0.5608561635017395, "learning_rate": 0.00021387559199777484, "loss": 1.8013, "step": 53443 }, { "epoch": 1.78, "grad_norm": 0.5433641672134399, "learning_rate": 0.00021386557990283266, "loss": 1.6561, "step": 53444 }, { "epoch": 1.78, "grad_norm": 0.5466147661209106, "learning_rate": 0.00021385556791244493, "loss": 1.684, "step": 53445 }, { "epoch": 1.78, "grad_norm": 0.5781084299087524, "learning_rate": 0.00021384555602662405, "loss": 1.7436, "step": 53446 }, { "epoch": 1.78, "grad_norm": 0.5991970300674438, "learning_rate": 0.00021383554424538173, "loss": 1.7673, "step": 53447 }, { "epoch": 1.78, "grad_norm": 0.5552555322647095, "learning_rate": 0.0002138255325687305, "loss": 1.6975, "step": 53448 }, { "epoch": 1.78, "grad_norm": 0.5527522563934326, "learning_rate": 0.00021381552099668239, "loss": 1.7311, "step": 53449 }, { "epoch": 1.78, "grad_norm": 0.5698193311691284, "learning_rate": 0.0002138055095292495, "loss": 1.7479, "step": 53450 }, { "epoch": 1.78, "grad_norm": 0.5655034184455872, "learning_rate": 0.00021379549816644402, "loss": 1.7659, "step": 53451 }, { "epoch": 1.78, "grad_norm": 0.5644237399101257, "learning_rate": 0.00021378548690827816, "loss": 1.7264, "step": 53452 }, { "epoch": 1.78, "grad_norm": 0.5594006776809692, "learning_rate": 0.00021377547575476393, "loss": 1.8051, "step": 53453 }, { "epoch": 1.78, "grad_norm": 0.5599575638771057, "learning_rate": 0.00021376546470591354, "loss": 1.7496, "step": 53454 }, { "epoch": 1.78, "grad_norm": 0.5740293860435486, "learning_rate": 0.0002137554537617392, "loss": 1.7994, "step": 53455 }, { "epoch": 1.78, "grad_norm": 0.5829367637634277, "learning_rate": 0.00021374544292225312, "loss": 1.7455, "step": 53456 }, { "epoch": 1.78, "grad_norm": 0.6021072268486023, "learning_rate": 0.00021373543218746726, "loss": 1.7455, "step": 53457 }, { "epoch": 1.78, "grad_norm": 0.5756165981292725, "learning_rate": 0.00021372542155739392, "loss": 1.7686, "step": 53458 }, { "epoch": 1.78, "grad_norm": 0.5817073583602905, "learning_rate": 0.00021371541103204527, "loss": 1.7059, "step": 53459 }, { "epoch": 1.78, "grad_norm": 0.5737557411193848, "learning_rate": 0.0002137054006114333, "loss": 1.7344, "step": 53460 }, { "epoch": 1.78, "grad_norm": 0.5601312518119812, "learning_rate": 0.00021369539029557027, "loss": 1.7069, "step": 53461 }, { "epoch": 1.78, "grad_norm": 0.5957599878311157, "learning_rate": 0.00021368538008446843, "loss": 1.6982, "step": 53462 }, { "epoch": 1.78, "grad_norm": 0.554754912853241, "learning_rate": 0.00021367536997813965, "loss": 1.7661, "step": 53463 }, { "epoch": 1.78, "grad_norm": 0.5755719542503357, "learning_rate": 0.00021366535997659634, "loss": 1.7577, "step": 53464 }, { "epoch": 1.78, "grad_norm": 0.5458029508590698, "learning_rate": 0.0002136553500798505, "loss": 1.7708, "step": 53465 }, { "epoch": 1.78, "grad_norm": 0.5741029977798462, "learning_rate": 0.00021364534028791448, "loss": 1.682, "step": 53466 }, { "epoch": 1.78, "grad_norm": 0.5843891501426697, "learning_rate": 0.0002136353306008002, "loss": 1.7599, "step": 53467 }, { "epoch": 1.78, "grad_norm": 0.5728832483291626, "learning_rate": 0.00021362532101851985, "loss": 1.7642, "step": 53468 }, { "epoch": 1.78, "grad_norm": 0.5678866505622864, "learning_rate": 0.0002136153115410858, "loss": 1.7781, "step": 53469 }, { "epoch": 1.78, "grad_norm": 0.553003191947937, "learning_rate": 0.00021360530216850987, "loss": 1.7608, "step": 53470 }, { "epoch": 1.78, "grad_norm": 0.5549201369285583, "learning_rate": 0.0002135952929008044, "loss": 1.7149, "step": 53471 }, { "epoch": 1.78, "grad_norm": 0.5448605418205261, "learning_rate": 0.00021358528373798162, "loss": 1.6897, "step": 53472 }, { "epoch": 1.78, "grad_norm": 0.590543270111084, "learning_rate": 0.0002135752746800535, "loss": 1.7498, "step": 53473 }, { "epoch": 1.78, "grad_norm": 0.5725422501564026, "learning_rate": 0.00021356526572703226, "loss": 1.739, "step": 53474 }, { "epoch": 1.78, "grad_norm": 0.5471112728118896, "learning_rate": 0.00021355525687892995, "loss": 1.7699, "step": 53475 }, { "epoch": 1.78, "grad_norm": 0.5522356629371643, "learning_rate": 0.00021354524813575905, "loss": 1.736, "step": 53476 }, { "epoch": 1.78, "grad_norm": 0.5591379404067993, "learning_rate": 0.00021353523949753128, "loss": 1.8292, "step": 53477 }, { "epoch": 1.78, "grad_norm": 0.5705559849739075, "learning_rate": 0.00021352523096425906, "loss": 1.7692, "step": 53478 }, { "epoch": 1.78, "grad_norm": 0.5637239813804626, "learning_rate": 0.00021351522253595454, "loss": 1.7412, "step": 53479 }, { "epoch": 1.78, "grad_norm": 0.5729324817657471, "learning_rate": 0.00021350521421262971, "loss": 1.7624, "step": 53480 }, { "epoch": 1.78, "grad_norm": 0.5821526050567627, "learning_rate": 0.0002134952059942968, "loss": 1.675, "step": 53481 }, { "epoch": 1.78, "grad_norm": 0.568261444568634, "learning_rate": 0.00021348519788096806, "loss": 1.7691, "step": 53482 }, { "epoch": 1.78, "grad_norm": 0.5674226880073547, "learning_rate": 0.00021347518987265544, "loss": 1.7646, "step": 53483 }, { "epoch": 1.78, "grad_norm": 0.5727978944778442, "learning_rate": 0.00021346518196937116, "loss": 1.7793, "step": 53484 }, { "epoch": 1.78, "grad_norm": 0.5792263746261597, "learning_rate": 0.00021345517417112748, "loss": 1.6789, "step": 53485 }, { "epoch": 1.78, "grad_norm": 0.5600107312202454, "learning_rate": 0.00021344516647793646, "loss": 1.6362, "step": 53486 }, { "epoch": 1.78, "grad_norm": 0.5810540318489075, "learning_rate": 0.00021343515888981026, "loss": 1.7255, "step": 53487 }, { "epoch": 1.78, "grad_norm": 0.5683096051216125, "learning_rate": 0.000213425151406761, "loss": 1.7664, "step": 53488 }, { "epoch": 1.78, "grad_norm": 0.5500879287719727, "learning_rate": 0.0002134151440288009, "loss": 1.7608, "step": 53489 }, { "epoch": 1.78, "grad_norm": 0.5415267944335938, "learning_rate": 0.000213405136755942, "loss": 1.684, "step": 53490 }, { "epoch": 1.78, "grad_norm": 0.5587643980979919, "learning_rate": 0.00021339512958819644, "loss": 1.8376, "step": 53491 }, { "epoch": 1.78, "grad_norm": 0.5785082578659058, "learning_rate": 0.00021338512252557664, "loss": 1.8307, "step": 53492 }, { "epoch": 1.78, "grad_norm": 0.5589852333068848, "learning_rate": 0.00021337511556809432, "loss": 1.7701, "step": 53493 }, { "epoch": 1.78, "grad_norm": 0.584793746471405, "learning_rate": 0.00021336510871576193, "loss": 1.7532, "step": 53494 }, { "epoch": 1.78, "grad_norm": 0.576812207698822, "learning_rate": 0.00021335510196859154, "loss": 1.7743, "step": 53495 }, { "epoch": 1.78, "grad_norm": 0.5488056540489197, "learning_rate": 0.00021334509532659532, "loss": 1.7762, "step": 53496 }, { "epoch": 1.78, "grad_norm": 0.5648588538169861, "learning_rate": 0.00021333508878978536, "loss": 1.7775, "step": 53497 }, { "epoch": 1.78, "grad_norm": 0.5495123267173767, "learning_rate": 0.00021332508235817374, "loss": 1.779, "step": 53498 }, { "epoch": 1.78, "grad_norm": 0.5414437055587769, "learning_rate": 0.00021331507603177292, "loss": 1.8041, "step": 53499 }, { "epoch": 1.78, "grad_norm": 0.5567925572395325, "learning_rate": 0.00021330506981059462, "loss": 1.7822, "step": 53500 }, { "epoch": 1.78, "grad_norm": 0.5932060480117798, "learning_rate": 0.00021329506369465126, "loss": 1.7725, "step": 53501 }, { "epoch": 1.78, "grad_norm": 0.5508602261543274, "learning_rate": 0.00021328505768395496, "loss": 1.7422, "step": 53502 }, { "epoch": 1.78, "grad_norm": 0.5650374889373779, "learning_rate": 0.00021327505177851777, "loss": 1.8042, "step": 53503 }, { "epoch": 1.78, "grad_norm": 0.5525363087654114, "learning_rate": 0.0002132650459783519, "loss": 1.7597, "step": 53504 }, { "epoch": 1.78, "grad_norm": 0.5713686347007751, "learning_rate": 0.0002132550402834694, "loss": 1.7328, "step": 53505 }, { "epoch": 1.78, "grad_norm": 0.5743952393531799, "learning_rate": 0.00021324503469388268, "loss": 1.6894, "step": 53506 }, { "epoch": 1.78, "grad_norm": 0.5742058753967285, "learning_rate": 0.00021323502920960355, "loss": 1.7693, "step": 53507 }, { "epoch": 1.78, "grad_norm": 0.5913346409797668, "learning_rate": 0.00021322502383064437, "loss": 1.7373, "step": 53508 }, { "epoch": 1.78, "grad_norm": 0.5595690011978149, "learning_rate": 0.00021321501855701728, "loss": 1.6577, "step": 53509 }, { "epoch": 1.78, "grad_norm": 0.5556714534759521, "learning_rate": 0.0002132050133887343, "loss": 1.6393, "step": 53510 }, { "epoch": 1.78, "grad_norm": 0.5667980909347534, "learning_rate": 0.00021319500832580766, "loss": 1.7253, "step": 53511 }, { "epoch": 1.78, "grad_norm": 0.5785905122756958, "learning_rate": 0.00021318500336824954, "loss": 1.6853, "step": 53512 }, { "epoch": 1.78, "grad_norm": 0.5626202821731567, "learning_rate": 0.00021317499851607197, "loss": 1.7464, "step": 53513 }, { "epoch": 1.78, "grad_norm": 0.5645698308944702, "learning_rate": 0.0002131649937692871, "loss": 1.7065, "step": 53514 }, { "epoch": 1.78, "grad_norm": 0.5607157349586487, "learning_rate": 0.00021315498912790722, "loss": 1.786, "step": 53515 }, { "epoch": 1.78, "grad_norm": 0.5844230055809021, "learning_rate": 0.00021314498459194442, "loss": 1.7957, "step": 53516 }, { "epoch": 1.78, "grad_norm": 0.5619408488273621, "learning_rate": 0.00021313498016141074, "loss": 1.7322, "step": 53517 }, { "epoch": 1.78, "grad_norm": 0.5635640621185303, "learning_rate": 0.0002131249758363184, "loss": 1.7636, "step": 53518 }, { "epoch": 1.78, "grad_norm": 0.5368090271949768, "learning_rate": 0.00021311497161667964, "loss": 1.7221, "step": 53519 }, { "epoch": 1.78, "grad_norm": 0.5610998272895813, "learning_rate": 0.00021310496750250643, "loss": 1.7414, "step": 53520 }, { "epoch": 1.78, "grad_norm": 0.6024512648582458, "learning_rate": 0.0002130949634938109, "loss": 1.7794, "step": 53521 }, { "epoch": 1.78, "grad_norm": 0.560440719127655, "learning_rate": 0.00021308495959060532, "loss": 1.7848, "step": 53522 }, { "epoch": 1.78, "grad_norm": 0.565536618232727, "learning_rate": 0.00021307495579290188, "loss": 1.6483, "step": 53523 }, { "epoch": 1.78, "grad_norm": 0.5626809000968933, "learning_rate": 0.0002130649521007126, "loss": 1.7148, "step": 53524 }, { "epoch": 1.78, "grad_norm": 0.5521861910820007, "learning_rate": 0.00021305494851404962, "loss": 1.661, "step": 53525 }, { "epoch": 1.78, "grad_norm": 0.5622033476829529, "learning_rate": 0.0002130449450329252, "loss": 1.6771, "step": 53526 }, { "epoch": 1.78, "grad_norm": 0.6401418447494507, "learning_rate": 0.00021303494165735134, "loss": 1.8094, "step": 53527 }, { "epoch": 1.78, "grad_norm": 0.5813269019126892, "learning_rate": 0.0002130249383873402, "loss": 1.7644, "step": 53528 }, { "epoch": 1.78, "grad_norm": 0.6804452538490295, "learning_rate": 0.0002130149352229041, "loss": 1.6931, "step": 53529 }, { "epoch": 1.78, "grad_norm": 0.5674708485603333, "learning_rate": 0.00021300493216405494, "loss": 1.6934, "step": 53530 }, { "epoch": 1.78, "grad_norm": 0.6703352928161621, "learning_rate": 0.00021299492921080502, "loss": 1.7331, "step": 53531 }, { "epoch": 1.78, "grad_norm": 0.6021579504013062, "learning_rate": 0.0002129849263631664, "loss": 1.7304, "step": 53532 }, { "epoch": 1.78, "grad_norm": 0.5709975957870483, "learning_rate": 0.00021297492362115137, "loss": 1.7879, "step": 53533 }, { "epoch": 1.78, "grad_norm": 0.5558975338935852, "learning_rate": 0.00021296492098477185, "loss": 1.782, "step": 53534 }, { "epoch": 1.78, "grad_norm": 0.557772159576416, "learning_rate": 0.00021295491845404007, "loss": 1.7574, "step": 53535 }, { "epoch": 1.78, "grad_norm": 0.5792078971862793, "learning_rate": 0.0002129449160289683, "loss": 1.7179, "step": 53536 }, { "epoch": 1.78, "grad_norm": 0.5692434310913086, "learning_rate": 0.00021293491370956847, "loss": 1.7298, "step": 53537 }, { "epoch": 1.78, "grad_norm": 0.5621073246002197, "learning_rate": 0.0002129249114958529, "loss": 1.7498, "step": 53538 }, { "epoch": 1.78, "grad_norm": 0.5744421482086182, "learning_rate": 0.00021291490938783367, "loss": 1.7922, "step": 53539 }, { "epoch": 1.78, "grad_norm": 0.5528920292854309, "learning_rate": 0.00021290490738552287, "loss": 1.7137, "step": 53540 }, { "epoch": 1.78, "grad_norm": 0.5539025664329529, "learning_rate": 0.00021289490548893268, "loss": 1.7619, "step": 53541 }, { "epoch": 1.78, "grad_norm": 0.5676109790802002, "learning_rate": 0.00021288490369807518, "loss": 1.7568, "step": 53542 }, { "epoch": 1.78, "grad_norm": 0.556298017501831, "learning_rate": 0.00021287490201296266, "loss": 1.8124, "step": 53543 }, { "epoch": 1.78, "grad_norm": 0.5627880096435547, "learning_rate": 0.00021286490043360715, "loss": 1.6345, "step": 53544 }, { "epoch": 1.78, "grad_norm": 0.5486337542533875, "learning_rate": 0.0002128548989600208, "loss": 1.6628, "step": 53545 }, { "epoch": 1.78, "grad_norm": 0.5581715703010559, "learning_rate": 0.00021284489759221584, "loss": 1.6384, "step": 53546 }, { "epoch": 1.78, "grad_norm": 0.5706721544265747, "learning_rate": 0.00021283489633020422, "loss": 1.7456, "step": 53547 }, { "epoch": 1.78, "grad_norm": 0.5540194511413574, "learning_rate": 0.00021282489517399818, "loss": 1.7281, "step": 53548 }, { "epoch": 1.78, "grad_norm": 0.5355204343795776, "learning_rate": 0.00021281489412361003, "loss": 1.739, "step": 53549 }, { "epoch": 1.78, "grad_norm": 0.5771276354789734, "learning_rate": 0.0002128048931790516, "loss": 1.6857, "step": 53550 }, { "epoch": 1.78, "grad_norm": 0.573481559753418, "learning_rate": 0.0002127948923403352, "loss": 1.7855, "step": 53551 }, { "epoch": 1.78, "grad_norm": 0.5719993114471436, "learning_rate": 0.00021278489160747298, "loss": 1.6857, "step": 53552 }, { "epoch": 1.78, "grad_norm": 0.549103856086731, "learning_rate": 0.00021277489098047708, "loss": 1.7395, "step": 53553 }, { "epoch": 1.78, "grad_norm": 0.5580138564109802, "learning_rate": 0.0002127648904593596, "loss": 1.7288, "step": 53554 }, { "epoch": 1.78, "grad_norm": 0.571100652217865, "learning_rate": 0.0002127548900441326, "loss": 1.7579, "step": 53555 }, { "epoch": 1.78, "grad_norm": 0.5733301639556885, "learning_rate": 0.00021274488973480852, "loss": 1.7422, "step": 53556 }, { "epoch": 1.78, "grad_norm": 0.5547847747802734, "learning_rate": 0.00021273488953139907, "loss": 1.7823, "step": 53557 }, { "epoch": 1.78, "grad_norm": 0.5785882472991943, "learning_rate": 0.00021272488943391667, "loss": 1.7691, "step": 53558 }, { "epoch": 1.78, "grad_norm": 0.5935032367706299, "learning_rate": 0.00021271488944237346, "loss": 1.7648, "step": 53559 }, { "epoch": 1.78, "grad_norm": 0.5735769867897034, "learning_rate": 0.00021270488955678145, "loss": 1.8043, "step": 53560 }, { "epoch": 1.78, "grad_norm": 0.5647563338279724, "learning_rate": 0.00021269488977715284, "loss": 1.7567, "step": 53561 }, { "epoch": 1.78, "grad_norm": 0.5513827800750732, "learning_rate": 0.00021268489010349973, "loss": 1.7319, "step": 53562 }, { "epoch": 1.78, "grad_norm": 0.5711554288864136, "learning_rate": 0.00021267489053583446, "loss": 1.7846, "step": 53563 }, { "epoch": 1.78, "grad_norm": 0.5760101079940796, "learning_rate": 0.00021266489107416882, "loss": 1.738, "step": 53564 }, { "epoch": 1.78, "grad_norm": 0.5639139413833618, "learning_rate": 0.00021265489171851522, "loss": 1.7272, "step": 53565 }, { "epoch": 1.78, "grad_norm": 1.109898567199707, "learning_rate": 0.00021264489246888577, "loss": 1.7091, "step": 53566 }, { "epoch": 1.78, "grad_norm": 0.557453989982605, "learning_rate": 0.00021263489332529247, "loss": 1.6402, "step": 53567 }, { "epoch": 1.78, "grad_norm": 0.5619431734085083, "learning_rate": 0.00021262489428774755, "loss": 1.7405, "step": 53568 }, { "epoch": 1.78, "grad_norm": 0.5674838423728943, "learning_rate": 0.00021261489535626316, "loss": 1.7914, "step": 53569 }, { "epoch": 1.78, "grad_norm": 0.5648196935653687, "learning_rate": 0.0002126048965308514, "loss": 1.7168, "step": 53570 }, { "epoch": 1.78, "grad_norm": 0.5592935681343079, "learning_rate": 0.00021259489781152432, "loss": 1.7006, "step": 53571 }, { "epoch": 1.78, "grad_norm": 0.5536394119262695, "learning_rate": 0.00021258489919829426, "loss": 1.5885, "step": 53572 }, { "epoch": 1.78, "grad_norm": 0.5757066011428833, "learning_rate": 0.0002125749006911733, "loss": 1.7845, "step": 53573 }, { "epoch": 1.78, "grad_norm": 0.5519445538520813, "learning_rate": 0.00021256490229017342, "loss": 1.7101, "step": 53574 }, { "epoch": 1.78, "grad_norm": 1.3450795412063599, "learning_rate": 0.0002125549039953069, "loss": 1.7547, "step": 53575 }, { "epoch": 1.78, "grad_norm": 0.5563348531723022, "learning_rate": 0.0002125449058065859, "loss": 1.6862, "step": 53576 }, { "epoch": 1.78, "grad_norm": 0.5633922815322876, "learning_rate": 0.00021253490772402248, "loss": 1.7504, "step": 53577 }, { "epoch": 1.78, "grad_norm": 0.5509353876113892, "learning_rate": 0.00021252490974762867, "loss": 1.7707, "step": 53578 }, { "epoch": 1.78, "grad_norm": 0.5575370192527771, "learning_rate": 0.00021251491187741696, "loss": 1.7196, "step": 53579 }, { "epoch": 1.78, "grad_norm": 0.5541759133338928, "learning_rate": 0.00021250491411339904, "loss": 1.7066, "step": 53580 }, { "epoch": 1.78, "grad_norm": 0.5815542936325073, "learning_rate": 0.00021249491645558734, "loss": 1.7641, "step": 53581 }, { "epoch": 1.78, "grad_norm": 0.561090350151062, "learning_rate": 0.0002124849189039939, "loss": 1.7438, "step": 53582 }, { "epoch": 1.78, "grad_norm": 0.5773653984069824, "learning_rate": 0.00021247492145863095, "loss": 1.7261, "step": 53583 }, { "epoch": 1.78, "grad_norm": 0.5816711187362671, "learning_rate": 0.0002124649241195105, "loss": 1.7189, "step": 53584 }, { "epoch": 1.78, "grad_norm": 0.542589008808136, "learning_rate": 0.00021245492688664463, "loss": 1.7064, "step": 53585 }, { "epoch": 1.78, "grad_norm": 0.569197416305542, "learning_rate": 0.0002124449297600458, "loss": 1.7212, "step": 53586 }, { "epoch": 1.78, "grad_norm": 0.5698274970054626, "learning_rate": 0.00021243493273972574, "loss": 1.7748, "step": 53587 }, { "epoch": 1.78, "grad_norm": 0.5689706802368164, "learning_rate": 0.00021242493582569684, "loss": 1.7555, "step": 53588 }, { "epoch": 1.78, "grad_norm": 0.5770667195320129, "learning_rate": 0.0002124149390179712, "loss": 1.7476, "step": 53589 }, { "epoch": 1.78, "grad_norm": 0.577522873878479, "learning_rate": 0.00021240494231656088, "loss": 1.7518, "step": 53590 }, { "epoch": 1.78, "grad_norm": 0.5680429935455322, "learning_rate": 0.00021239494572147807, "loss": 1.7746, "step": 53591 }, { "epoch": 1.78, "grad_norm": 0.5652926564216614, "learning_rate": 0.00021238494923273474, "loss": 1.7793, "step": 53592 }, { "epoch": 1.78, "grad_norm": 0.5582521557807922, "learning_rate": 0.00021237495285034342, "loss": 1.7959, "step": 53593 }, { "epoch": 1.78, "grad_norm": 0.5781981945037842, "learning_rate": 0.0002123649565743158, "loss": 1.7762, "step": 53594 }, { "epoch": 1.78, "grad_norm": 0.5537256598472595, "learning_rate": 0.0002123549604046643, "loss": 1.7595, "step": 53595 }, { "epoch": 1.78, "grad_norm": 0.5610119700431824, "learning_rate": 0.000212344964341401, "loss": 1.704, "step": 53596 }, { "epoch": 1.78, "grad_norm": 0.5587238669395447, "learning_rate": 0.00021233496838453795, "loss": 1.7517, "step": 53597 }, { "epoch": 1.78, "grad_norm": 0.5506954789161682, "learning_rate": 0.00021232497253408734, "loss": 1.7806, "step": 53598 }, { "epoch": 1.78, "grad_norm": 0.5842094421386719, "learning_rate": 0.0002123149767900613, "loss": 1.7015, "step": 53599 }, { "epoch": 1.78, "grad_norm": 0.557065486907959, "learning_rate": 0.00021230498115247195, "loss": 1.8029, "step": 53600 }, { "epoch": 1.78, "grad_norm": 0.5516762137413025, "learning_rate": 0.00021229498562133135, "loss": 1.6396, "step": 53601 }, { "epoch": 1.78, "grad_norm": 0.5554277300834656, "learning_rate": 0.00021228499019665178, "loss": 1.7672, "step": 53602 }, { "epoch": 1.78, "grad_norm": 0.574636697769165, "learning_rate": 0.00021227499487844537, "loss": 1.7128, "step": 53603 }, { "epoch": 1.78, "grad_norm": 0.5755122303962708, "learning_rate": 0.00021226499966672414, "loss": 1.7571, "step": 53604 }, { "epoch": 1.78, "grad_norm": 0.5529680848121643, "learning_rate": 0.00021225500456150027, "loss": 1.7694, "step": 53605 }, { "epoch": 1.78, "grad_norm": 0.578606903553009, "learning_rate": 0.00021224500956278596, "loss": 1.783, "step": 53606 }, { "epoch": 1.78, "grad_norm": 0.5629949569702148, "learning_rate": 0.0002122350146705932, "loss": 1.6994, "step": 53607 }, { "epoch": 1.78, "grad_norm": 0.5589582920074463, "learning_rate": 0.00021222501988493413, "loss": 1.6937, "step": 53608 }, { "epoch": 1.78, "grad_norm": 0.5623560547828674, "learning_rate": 0.00021221502520582118, "loss": 1.8422, "step": 53609 }, { "epoch": 1.78, "grad_norm": 0.5588857531547546, "learning_rate": 0.000212205030633266, "loss": 1.7484, "step": 53610 }, { "epoch": 1.78, "grad_norm": 0.5806569457054138, "learning_rate": 0.00021219503616728112, "loss": 1.7623, "step": 53611 }, { "epoch": 1.78, "grad_norm": 0.5720134973526001, "learning_rate": 0.00021218504180787846, "loss": 1.7044, "step": 53612 }, { "epoch": 1.78, "grad_norm": 0.5582174062728882, "learning_rate": 0.0002121750475550703, "loss": 1.6752, "step": 53613 }, { "epoch": 1.78, "grad_norm": 0.5651940107345581, "learning_rate": 0.0002121650534088687, "loss": 1.7245, "step": 53614 }, { "epoch": 1.78, "grad_norm": 0.5392489433288574, "learning_rate": 0.00021215505936928564, "loss": 1.6786, "step": 53615 }, { "epoch": 1.78, "grad_norm": 0.5567869544029236, "learning_rate": 0.00021214506543633358, "loss": 1.7549, "step": 53616 }, { "epoch": 1.78, "grad_norm": 0.5521623492240906, "learning_rate": 0.00021213507161002435, "loss": 1.7426, "step": 53617 }, { "epoch": 1.78, "grad_norm": 0.5592093467712402, "learning_rate": 0.0002121250778903702, "loss": 1.7634, "step": 53618 }, { "epoch": 1.78, "grad_norm": 0.5614458322525024, "learning_rate": 0.0002121150842773833, "loss": 1.7446, "step": 53619 }, { "epoch": 1.78, "grad_norm": 0.5677769184112549, "learning_rate": 0.00021210509077107577, "loss": 1.6876, "step": 53620 }, { "epoch": 1.78, "grad_norm": 0.5589438080787659, "learning_rate": 0.0002120950973714596, "loss": 1.7276, "step": 53621 }, { "epoch": 1.78, "grad_norm": 0.551914393901825, "learning_rate": 0.000212085104078547, "loss": 1.6783, "step": 53622 }, { "epoch": 1.78, "grad_norm": 0.5631158351898193, "learning_rate": 0.00021207511089235033, "loss": 1.6977, "step": 53623 }, { "epoch": 1.78, "grad_norm": 0.5684959888458252, "learning_rate": 0.00021206511781288135, "loss": 1.7143, "step": 53624 }, { "epoch": 1.78, "grad_norm": 0.566428542137146, "learning_rate": 0.00021205512484015243, "loss": 1.785, "step": 53625 }, { "epoch": 1.78, "grad_norm": 0.5612491965293884, "learning_rate": 0.00021204513197417565, "loss": 1.7193, "step": 53626 }, { "epoch": 1.78, "grad_norm": 0.5632569789886475, "learning_rate": 0.0002120351392149631, "loss": 1.7388, "step": 53627 }, { "epoch": 1.78, "grad_norm": 0.5819079875946045, "learning_rate": 0.0002120251465625269, "loss": 1.7822, "step": 53628 }, { "epoch": 1.78, "grad_norm": 0.593997061252594, "learning_rate": 0.0002120151540168793, "loss": 1.7211, "step": 53629 }, { "epoch": 1.78, "grad_norm": 0.5625067949295044, "learning_rate": 0.00021200516157803226, "loss": 1.7137, "step": 53630 }, { "epoch": 1.78, "grad_norm": 0.5748275518417358, "learning_rate": 0.00021199516924599792, "loss": 1.7689, "step": 53631 }, { "epoch": 1.78, "grad_norm": 0.5644595623016357, "learning_rate": 0.00021198517702078856, "loss": 1.783, "step": 53632 }, { "epoch": 1.78, "grad_norm": 0.5784342885017395, "learning_rate": 0.00021197518490241623, "loss": 1.7286, "step": 53633 }, { "epoch": 1.78, "grad_norm": 0.5707840919494629, "learning_rate": 0.00021196519289089303, "loss": 1.7618, "step": 53634 }, { "epoch": 1.78, "grad_norm": 0.5817346572875977, "learning_rate": 0.00021195520098623116, "loss": 1.6901, "step": 53635 }, { "epoch": 1.78, "grad_norm": 0.598355770111084, "learning_rate": 0.00021194520918844272, "loss": 1.7652, "step": 53636 }, { "epoch": 1.78, "grad_norm": 0.5847685933113098, "learning_rate": 0.00021193521749753976, "loss": 1.7249, "step": 53637 }, { "epoch": 1.78, "grad_norm": 0.622029721736908, "learning_rate": 0.0002119252259135344, "loss": 1.7854, "step": 53638 }, { "epoch": 1.78, "grad_norm": 0.5912790894508362, "learning_rate": 0.00021191523443643898, "loss": 1.787, "step": 53639 }, { "epoch": 1.78, "grad_norm": 0.5824077129364014, "learning_rate": 0.00021190524306626544, "loss": 1.7565, "step": 53640 }, { "epoch": 1.78, "grad_norm": 0.5815169811248779, "learning_rate": 0.00021189525180302594, "loss": 1.6979, "step": 53641 }, { "epoch": 1.78, "grad_norm": 0.6078628897666931, "learning_rate": 0.00021188526064673259, "loss": 1.8024, "step": 53642 }, { "epoch": 1.78, "grad_norm": 0.5899901390075684, "learning_rate": 0.00021187526959739767, "loss": 1.7974, "step": 53643 }, { "epoch": 1.78, "grad_norm": 0.5701503753662109, "learning_rate": 0.00021186527865503312, "loss": 1.7633, "step": 53644 }, { "epoch": 1.78, "grad_norm": 0.563571035861969, "learning_rate": 0.00021185528781965103, "loss": 1.7486, "step": 53645 }, { "epoch": 1.78, "grad_norm": 0.6007009744644165, "learning_rate": 0.00021184529709126378, "loss": 1.7637, "step": 53646 }, { "epoch": 1.78, "grad_norm": 0.5550065040588379, "learning_rate": 0.00021183530646988328, "loss": 1.7921, "step": 53647 }, { "epoch": 1.78, "grad_norm": 0.5660527944564819, "learning_rate": 0.00021182531595552174, "loss": 1.778, "step": 53648 }, { "epoch": 1.78, "grad_norm": 0.5834550857543945, "learning_rate": 0.00021181532554819126, "loss": 1.7369, "step": 53649 }, { "epoch": 1.78, "grad_norm": 0.5689830780029297, "learning_rate": 0.00021180533524790403, "loss": 1.6803, "step": 53650 }, { "epoch": 1.78, "grad_norm": 0.5856500267982483, "learning_rate": 0.0002117953450546721, "loss": 1.7676, "step": 53651 }, { "epoch": 1.79, "grad_norm": 0.5579355955123901, "learning_rate": 0.00021178535496850756, "loss": 1.7943, "step": 53652 }, { "epoch": 1.79, "grad_norm": 0.5963614583015442, "learning_rate": 0.00021177536498942273, "loss": 1.7276, "step": 53653 }, { "epoch": 1.79, "grad_norm": 0.5668549537658691, "learning_rate": 0.0002117653751174295, "loss": 1.7176, "step": 53654 }, { "epoch": 1.79, "grad_norm": 0.5706864595413208, "learning_rate": 0.00021175538535254016, "loss": 1.7358, "step": 53655 }, { "epoch": 1.79, "grad_norm": 0.5518513917922974, "learning_rate": 0.00021174539569476683, "loss": 1.7765, "step": 53656 }, { "epoch": 1.79, "grad_norm": 0.5673600435256958, "learning_rate": 0.00021173540614412149, "loss": 1.6756, "step": 53657 }, { "epoch": 1.79, "grad_norm": 0.5812824964523315, "learning_rate": 0.00021172541670061632, "loss": 1.6738, "step": 53658 }, { "epoch": 1.79, "grad_norm": 0.583713710308075, "learning_rate": 0.00021171542736426357, "loss": 1.8209, "step": 53659 }, { "epoch": 1.79, "grad_norm": 0.5586079955101013, "learning_rate": 0.0002117054381350753, "loss": 1.6994, "step": 53660 }, { "epoch": 1.79, "grad_norm": 0.554785966873169, "learning_rate": 0.0002116954490130636, "loss": 1.6639, "step": 53661 }, { "epoch": 1.79, "grad_norm": 0.5672542452812195, "learning_rate": 0.0002116854599982406, "loss": 1.738, "step": 53662 }, { "epoch": 1.79, "grad_norm": 0.5967320799827576, "learning_rate": 0.0002116754710906185, "loss": 1.7593, "step": 53663 }, { "epoch": 1.79, "grad_norm": 0.5828818678855896, "learning_rate": 0.00021166548229020928, "loss": 1.637, "step": 53664 }, { "epoch": 1.79, "grad_norm": 0.5546014308929443, "learning_rate": 0.00021165549359702514, "loss": 1.7669, "step": 53665 }, { "epoch": 1.79, "grad_norm": 0.6001842617988586, "learning_rate": 0.00021164550501107834, "loss": 1.7367, "step": 53666 }, { "epoch": 1.79, "grad_norm": 0.5624064803123474, "learning_rate": 0.00021163551653238073, "loss": 1.7163, "step": 53667 }, { "epoch": 1.79, "grad_norm": 0.5861095190048218, "learning_rate": 0.0002116255281609446, "loss": 1.7189, "step": 53668 }, { "epoch": 1.79, "grad_norm": 0.5563062429428101, "learning_rate": 0.0002116155398967821, "loss": 1.6897, "step": 53669 }, { "epoch": 1.79, "grad_norm": 0.5794377326965332, "learning_rate": 0.00021160555173990538, "loss": 1.7408, "step": 53670 }, { "epoch": 1.79, "grad_norm": 0.5608201026916504, "learning_rate": 0.00021159556369032645, "loss": 1.7358, "step": 53671 }, { "epoch": 1.79, "grad_norm": 0.5581400990486145, "learning_rate": 0.00021158557574805736, "loss": 1.8238, "step": 53672 }, { "epoch": 1.79, "grad_norm": 0.5636817812919617, "learning_rate": 0.00021157558791311056, "loss": 1.7312, "step": 53673 }, { "epoch": 1.79, "grad_norm": 0.5768067836761475, "learning_rate": 0.0002115656001854978, "loss": 1.8038, "step": 53674 }, { "epoch": 1.79, "grad_norm": 0.5734663605690002, "learning_rate": 0.00021155561256523144, "loss": 1.7146, "step": 53675 }, { "epoch": 1.79, "grad_norm": 0.5528402328491211, "learning_rate": 0.00021154562505232353, "loss": 1.7258, "step": 53676 }, { "epoch": 1.79, "grad_norm": 0.5534673929214478, "learning_rate": 0.0002115356376467862, "loss": 1.7154, "step": 53677 }, { "epoch": 1.79, "grad_norm": 0.5646166205406189, "learning_rate": 0.0002115256503486316, "loss": 1.6742, "step": 53678 }, { "epoch": 1.79, "grad_norm": 0.6004992723464966, "learning_rate": 0.00021151566315787167, "loss": 1.7708, "step": 53679 }, { "epoch": 1.79, "grad_norm": 0.5849176049232483, "learning_rate": 0.00021150567607451895, "loss": 1.7807, "step": 53680 }, { "epoch": 1.79, "grad_norm": 0.5547285676002502, "learning_rate": 0.00021149568909858507, "loss": 1.7828, "step": 53681 }, { "epoch": 1.79, "grad_norm": 0.5749149918556213, "learning_rate": 0.00021148570223008246, "loss": 1.7701, "step": 53682 }, { "epoch": 1.79, "grad_norm": 0.5754387378692627, "learning_rate": 0.0002114757154690232, "loss": 1.8134, "step": 53683 }, { "epoch": 1.79, "grad_norm": 0.552003800868988, "learning_rate": 0.0002114657288154194, "loss": 1.8132, "step": 53684 }, { "epoch": 1.79, "grad_norm": 0.6297540664672852, "learning_rate": 0.00021145574226928308, "loss": 1.7282, "step": 53685 }, { "epoch": 1.79, "grad_norm": 0.5923585891723633, "learning_rate": 0.00021144575583062654, "loss": 1.8496, "step": 53686 }, { "epoch": 1.79, "grad_norm": 0.589805543422699, "learning_rate": 0.00021143576949946173, "loss": 1.7698, "step": 53687 }, { "epoch": 1.79, "grad_norm": 0.5470768213272095, "learning_rate": 0.00021142578327580078, "loss": 1.7465, "step": 53688 }, { "epoch": 1.79, "grad_norm": 0.6020606160163879, "learning_rate": 0.00021141579715965592, "loss": 1.732, "step": 53689 }, { "epoch": 1.79, "grad_norm": 0.5796566009521484, "learning_rate": 0.0002114058111510393, "loss": 1.6885, "step": 53690 }, { "epoch": 1.79, "grad_norm": 0.5690822005271912, "learning_rate": 0.00021139582524996295, "loss": 1.7519, "step": 53691 }, { "epoch": 1.79, "grad_norm": 0.5714021325111389, "learning_rate": 0.000211385839456439, "loss": 1.7785, "step": 53692 }, { "epoch": 1.79, "grad_norm": 0.5909273028373718, "learning_rate": 0.00021137585377047964, "loss": 1.7681, "step": 53693 }, { "epoch": 1.79, "grad_norm": 0.5697383284568787, "learning_rate": 0.00021136586819209686, "loss": 1.7928, "step": 53694 }, { "epoch": 1.79, "grad_norm": 0.5728355646133423, "learning_rate": 0.0002113558827213028, "loss": 1.7199, "step": 53695 }, { "epoch": 1.79, "grad_norm": 0.7047873139381409, "learning_rate": 0.00021134589735810985, "loss": 1.7176, "step": 53696 }, { "epoch": 1.79, "grad_norm": 0.5896031856536865, "learning_rate": 0.0002113359121025297, "loss": 1.7977, "step": 53697 }, { "epoch": 1.79, "grad_norm": 0.5645701885223389, "learning_rate": 0.00021132592695457476, "loss": 1.8432, "step": 53698 }, { "epoch": 1.79, "grad_norm": 0.575853168964386, "learning_rate": 0.00021131594191425707, "loss": 1.7624, "step": 53699 }, { "epoch": 1.79, "grad_norm": 0.5518099069595337, "learning_rate": 0.00021130595698158882, "loss": 1.677, "step": 53700 }, { "epoch": 1.79, "grad_norm": 0.5742427110671997, "learning_rate": 0.00021129597215658203, "loss": 1.7542, "step": 53701 }, { "epoch": 1.79, "grad_norm": 0.5742753744125366, "learning_rate": 0.0002112859874392488, "loss": 1.796, "step": 53702 }, { "epoch": 1.79, "grad_norm": 0.5742169618606567, "learning_rate": 0.00021127600282960148, "loss": 1.739, "step": 53703 }, { "epoch": 1.79, "grad_norm": 0.5719807147979736, "learning_rate": 0.00021126601832765183, "loss": 1.7973, "step": 53704 }, { "epoch": 1.79, "grad_norm": 0.5844692587852478, "learning_rate": 0.0002112560339334122, "loss": 1.7475, "step": 53705 }, { "epoch": 1.79, "grad_norm": 0.5546886920928955, "learning_rate": 0.0002112460496468948, "loss": 1.8167, "step": 53706 }, { "epoch": 1.79, "grad_norm": 0.555692732334137, "learning_rate": 0.0002112360654681115, "loss": 1.7737, "step": 53707 }, { "epoch": 1.79, "grad_norm": 0.572927713394165, "learning_rate": 0.00021122608139707456, "loss": 1.6218, "step": 53708 }, { "epoch": 1.79, "grad_norm": 0.545066237449646, "learning_rate": 0.000211216097433796, "loss": 1.6721, "step": 53709 }, { "epoch": 1.79, "grad_norm": 0.5533734560012817, "learning_rate": 0.0002112061135782882, "loss": 1.7734, "step": 53710 }, { "epoch": 1.79, "grad_norm": 0.5588926672935486, "learning_rate": 0.00021119612983056291, "loss": 1.7407, "step": 53711 }, { "epoch": 1.79, "grad_norm": 0.5622320771217346, "learning_rate": 0.00021118614619063255, "loss": 1.7577, "step": 53712 }, { "epoch": 1.79, "grad_norm": 0.5544114112854004, "learning_rate": 0.00021117616265850915, "loss": 1.6946, "step": 53713 }, { "epoch": 1.79, "grad_norm": 0.5640477538108826, "learning_rate": 0.0002111661792342047, "loss": 1.7327, "step": 53714 }, { "epoch": 1.79, "grad_norm": 0.5729394555091858, "learning_rate": 0.00021115619591773144, "loss": 1.6537, "step": 53715 }, { "epoch": 1.79, "grad_norm": 0.556307315826416, "learning_rate": 0.00021114621270910155, "loss": 1.7159, "step": 53716 }, { "epoch": 1.79, "grad_norm": 0.5735872983932495, "learning_rate": 0.00021113622960832696, "loss": 1.7522, "step": 53717 }, { "epoch": 1.79, "grad_norm": 0.5655727982521057, "learning_rate": 0.0002111262466154199, "loss": 1.6689, "step": 53718 }, { "epoch": 1.79, "grad_norm": 0.5997704863548279, "learning_rate": 0.00021111626373039247, "loss": 1.7529, "step": 53719 }, { "epoch": 1.79, "grad_norm": 0.5701402425765991, "learning_rate": 0.0002111062809532569, "loss": 1.7502, "step": 53720 }, { "epoch": 1.79, "grad_norm": 0.5763381123542786, "learning_rate": 0.00021109629828402516, "loss": 1.7142, "step": 53721 }, { "epoch": 1.79, "grad_norm": 0.5781635642051697, "learning_rate": 0.0002110863157227094, "loss": 1.7516, "step": 53722 }, { "epoch": 1.79, "grad_norm": 0.5879101753234863, "learning_rate": 0.00021107633326932182, "loss": 1.8185, "step": 53723 }, { "epoch": 1.79, "grad_norm": 0.5545278787612915, "learning_rate": 0.0002110663509238744, "loss": 1.6843, "step": 53724 }, { "epoch": 1.79, "grad_norm": 0.5818763375282288, "learning_rate": 0.00021105636868637924, "loss": 1.7431, "step": 53725 }, { "epoch": 1.79, "grad_norm": 0.5574068427085876, "learning_rate": 0.00021104638655684876, "loss": 1.7577, "step": 53726 }, { "epoch": 1.79, "grad_norm": 0.5798966884613037, "learning_rate": 0.00021103640453529464, "loss": 1.6796, "step": 53727 }, { "epoch": 1.79, "grad_norm": 0.5720051527023315, "learning_rate": 0.00021102642262172931, "loss": 1.7674, "step": 53728 }, { "epoch": 1.79, "grad_norm": 0.5684292912483215, "learning_rate": 0.0002110164408161648, "loss": 1.7557, "step": 53729 }, { "epoch": 1.79, "grad_norm": 0.5745517015457153, "learning_rate": 0.0002110064591186133, "loss": 1.6938, "step": 53730 }, { "epoch": 1.79, "grad_norm": 0.5572392344474792, "learning_rate": 0.00021099647752908672, "loss": 1.7009, "step": 53731 }, { "epoch": 1.79, "grad_norm": 0.577499270439148, "learning_rate": 0.0002109864960475973, "loss": 1.7788, "step": 53732 }, { "epoch": 1.79, "grad_norm": 0.5598523616790771, "learning_rate": 0.0002109765146741573, "loss": 1.7244, "step": 53733 }, { "epoch": 1.79, "grad_norm": 0.5901126861572266, "learning_rate": 0.0002109665334087785, "loss": 1.7414, "step": 53734 }, { "epoch": 1.79, "grad_norm": 0.5755717754364014, "learning_rate": 0.00021095655225147333, "loss": 1.7087, "step": 53735 }, { "epoch": 1.79, "grad_norm": 0.5885708928108215, "learning_rate": 0.00021094657120225381, "loss": 1.7771, "step": 53736 }, { "epoch": 1.79, "grad_norm": 0.584003746509552, "learning_rate": 0.00021093659026113197, "loss": 1.7443, "step": 53737 }, { "epoch": 1.79, "grad_norm": 0.5679042935371399, "learning_rate": 0.00021092660942812, "loss": 1.722, "step": 53738 }, { "epoch": 1.79, "grad_norm": 0.554252028465271, "learning_rate": 0.00021091662870322992, "loss": 1.7731, "step": 53739 }, { "epoch": 1.79, "grad_norm": 0.5667931437492371, "learning_rate": 0.00021090664808647412, "loss": 1.699, "step": 53740 }, { "epoch": 1.79, "grad_norm": 0.5640459060668945, "learning_rate": 0.00021089666757786438, "loss": 1.7309, "step": 53741 }, { "epoch": 1.79, "grad_norm": 0.5669443011283875, "learning_rate": 0.00021088668717741295, "loss": 1.6712, "step": 53742 }, { "epoch": 1.79, "grad_norm": 0.5847065448760986, "learning_rate": 0.00021087670688513203, "loss": 1.6885, "step": 53743 }, { "epoch": 1.79, "grad_norm": 0.5565983653068542, "learning_rate": 0.0002108667267010336, "loss": 1.7469, "step": 53744 }, { "epoch": 1.79, "grad_norm": 0.5753913521766663, "learning_rate": 0.00021085674662512985, "loss": 1.7693, "step": 53745 }, { "epoch": 1.79, "grad_norm": 0.554861307144165, "learning_rate": 0.00021084676665743293, "loss": 1.7406, "step": 53746 }, { "epoch": 1.79, "grad_norm": 0.5595706701278687, "learning_rate": 0.00021083678679795483, "loss": 1.7261, "step": 53747 }, { "epoch": 1.79, "grad_norm": 0.5676063895225525, "learning_rate": 0.00021082680704670768, "loss": 1.6951, "step": 53748 }, { "epoch": 1.79, "grad_norm": 0.5575376152992249, "learning_rate": 0.0002108168274037037, "loss": 1.7295, "step": 53749 }, { "epoch": 1.79, "grad_norm": 0.5713164806365967, "learning_rate": 0.00021080684786895502, "loss": 1.7815, "step": 53750 }, { "epoch": 1.79, "grad_norm": 0.5728820562362671, "learning_rate": 0.00021079686844247358, "loss": 1.7405, "step": 53751 }, { "epoch": 1.79, "grad_norm": 0.5609899759292603, "learning_rate": 0.00021078688912427166, "loss": 1.7022, "step": 53752 }, { "epoch": 1.79, "grad_norm": 0.5534465909004211, "learning_rate": 0.00021077690991436134, "loss": 1.7801, "step": 53753 }, { "epoch": 1.79, "grad_norm": 0.5434256792068481, "learning_rate": 0.00021076693081275465, "loss": 1.8093, "step": 53754 }, { "epoch": 1.79, "grad_norm": 0.5632168054580688, "learning_rate": 0.0002107569518194637, "loss": 1.774, "step": 53755 }, { "epoch": 1.79, "grad_norm": 0.5782229900360107, "learning_rate": 0.00021074697293450078, "loss": 1.7561, "step": 53756 }, { "epoch": 1.79, "grad_norm": 0.5523819327354431, "learning_rate": 0.00021073699415787784, "loss": 1.8076, "step": 53757 }, { "epoch": 1.79, "grad_norm": 0.5559927821159363, "learning_rate": 0.00021072701548960704, "loss": 1.6826, "step": 53758 }, { "epoch": 1.79, "grad_norm": 0.5682361125946045, "learning_rate": 0.00021071703692970042, "loss": 1.6507, "step": 53759 }, { "epoch": 1.79, "grad_norm": 0.5701289176940918, "learning_rate": 0.0002107070584781703, "loss": 1.7116, "step": 53760 }, { "epoch": 1.79, "grad_norm": 0.5867823362350464, "learning_rate": 0.00021069708013502858, "loss": 1.8046, "step": 53761 }, { "epoch": 1.79, "grad_norm": 0.550803005695343, "learning_rate": 0.00021068710190028737, "loss": 1.8031, "step": 53762 }, { "epoch": 1.79, "grad_norm": 0.5798251032829285, "learning_rate": 0.00021067712377395898, "loss": 1.7292, "step": 53763 }, { "epoch": 1.79, "grad_norm": 0.57097989320755, "learning_rate": 0.00021066714575605536, "loss": 1.771, "step": 53764 }, { "epoch": 1.79, "grad_norm": 0.5703704357147217, "learning_rate": 0.00021065716784658862, "loss": 1.7301, "step": 53765 }, { "epoch": 1.79, "grad_norm": 0.5760355591773987, "learning_rate": 0.00021064719004557095, "loss": 1.773, "step": 53766 }, { "epoch": 1.79, "grad_norm": 0.5616584420204163, "learning_rate": 0.0002106372123530145, "loss": 1.7095, "step": 53767 }, { "epoch": 1.79, "grad_norm": 0.5522599816322327, "learning_rate": 0.0002106272347689312, "loss": 1.7023, "step": 53768 }, { "epoch": 1.79, "grad_norm": 0.5518026351928711, "learning_rate": 0.00021061725729333325, "loss": 1.726, "step": 53769 }, { "epoch": 1.79, "grad_norm": 0.5892131328582764, "learning_rate": 0.00021060727992623288, "loss": 1.7289, "step": 53770 }, { "epoch": 1.79, "grad_norm": 0.5651692152023315, "learning_rate": 0.00021059730266764204, "loss": 1.8067, "step": 53771 }, { "epoch": 1.79, "grad_norm": 0.5751973390579224, "learning_rate": 0.00021058732551757292, "loss": 1.8282, "step": 53772 }, { "epoch": 1.79, "grad_norm": 0.5571826696395874, "learning_rate": 0.00021057734847603764, "loss": 1.6699, "step": 53773 }, { "epoch": 1.79, "grad_norm": 0.538033664226532, "learning_rate": 0.00021056737154304825, "loss": 1.6935, "step": 53774 }, { "epoch": 1.79, "grad_norm": 0.5771855711936951, "learning_rate": 0.0002105573947186168, "loss": 1.796, "step": 53775 }, { "epoch": 1.79, "grad_norm": 0.5833560824394226, "learning_rate": 0.0002105474180027556, "loss": 1.7115, "step": 53776 }, { "epoch": 1.79, "grad_norm": 0.5776985883712769, "learning_rate": 0.00021053744139547668, "loss": 1.7807, "step": 53777 }, { "epoch": 1.79, "grad_norm": 0.5958829522132874, "learning_rate": 0.00021052746489679208, "loss": 1.7508, "step": 53778 }, { "epoch": 1.79, "grad_norm": 0.5988540649414062, "learning_rate": 0.00021051748850671397, "loss": 1.7712, "step": 53779 }, { "epoch": 1.79, "grad_norm": 0.5718606114387512, "learning_rate": 0.00021050751222525448, "loss": 1.8245, "step": 53780 }, { "epoch": 1.79, "grad_norm": 0.5485100746154785, "learning_rate": 0.00021049753605242562, "loss": 1.6849, "step": 53781 }, { "epoch": 1.79, "grad_norm": 0.5573418140411377, "learning_rate": 0.00021048755998823951, "loss": 1.6789, "step": 53782 }, { "epoch": 1.79, "grad_norm": 0.5564510822296143, "learning_rate": 0.0002104775840327085, "loss": 1.682, "step": 53783 }, { "epoch": 1.79, "grad_norm": 0.5568459033966064, "learning_rate": 0.0002104676081858443, "loss": 1.8534, "step": 53784 }, { "epoch": 1.79, "grad_norm": 0.5680793523788452, "learning_rate": 0.00021045763244765934, "loss": 1.6338, "step": 53785 }, { "epoch": 1.79, "grad_norm": 0.5767549872398376, "learning_rate": 0.0002104476568181656, "loss": 1.8084, "step": 53786 }, { "epoch": 1.79, "grad_norm": 0.5519031882286072, "learning_rate": 0.0002104376812973753, "loss": 1.7521, "step": 53787 }, { "epoch": 1.79, "grad_norm": 0.5512263178825378, "learning_rate": 0.00021042770588530034, "loss": 1.7472, "step": 53788 }, { "epoch": 1.79, "grad_norm": 0.5851523876190186, "learning_rate": 0.00021041773058195291, "loss": 1.7846, "step": 53789 }, { "epoch": 1.79, "grad_norm": 0.5706243515014648, "learning_rate": 0.0002104077553873454, "loss": 1.6989, "step": 53790 }, { "epoch": 1.79, "grad_norm": 0.5633565783500671, "learning_rate": 0.00021039778030148938, "loss": 1.6858, "step": 53791 }, { "epoch": 1.79, "grad_norm": 0.5703228712081909, "learning_rate": 0.00021038780532439737, "loss": 1.7709, "step": 53792 }, { "epoch": 1.79, "grad_norm": 1.919623613357544, "learning_rate": 0.00021037783045608143, "loss": 1.7943, "step": 53793 }, { "epoch": 1.79, "grad_norm": 0.5530077815055847, "learning_rate": 0.00021036785569655353, "loss": 1.6795, "step": 53794 }, { "epoch": 1.79, "grad_norm": 0.5758652091026306, "learning_rate": 0.00021035788104582586, "loss": 1.7672, "step": 53795 }, { "epoch": 1.79, "grad_norm": 0.577172577381134, "learning_rate": 0.00021034790650391045, "loss": 1.7015, "step": 53796 }, { "epoch": 1.79, "grad_norm": 0.5635278224945068, "learning_rate": 0.00021033793207081964, "loss": 1.8087, "step": 53797 }, { "epoch": 1.79, "grad_norm": 0.5667970180511475, "learning_rate": 0.00021032795774656517, "loss": 1.7385, "step": 53798 }, { "epoch": 1.79, "grad_norm": 0.5866035223007202, "learning_rate": 0.00021031798353115944, "loss": 1.7426, "step": 53799 }, { "epoch": 1.79, "grad_norm": 0.5630604028701782, "learning_rate": 0.00021030800942461447, "loss": 1.7203, "step": 53800 }, { "epoch": 1.79, "grad_norm": 0.5590904951095581, "learning_rate": 0.00021029803542694237, "loss": 1.6286, "step": 53801 }, { "epoch": 1.79, "grad_norm": 0.5644844770431519, "learning_rate": 0.00021028806153815523, "loss": 1.7143, "step": 53802 }, { "epoch": 1.79, "grad_norm": 0.5941689014434814, "learning_rate": 0.00021027808775826517, "loss": 1.7383, "step": 53803 }, { "epoch": 1.79, "grad_norm": 0.581549882888794, "learning_rate": 0.00021026811408728427, "loss": 1.7837, "step": 53804 }, { "epoch": 1.79, "grad_norm": 0.5557680726051331, "learning_rate": 0.00021025814052522457, "loss": 1.7083, "step": 53805 }, { "epoch": 1.79, "grad_norm": 0.5440821051597595, "learning_rate": 0.00021024816707209838, "loss": 1.7256, "step": 53806 }, { "epoch": 1.79, "grad_norm": 0.5625043511390686, "learning_rate": 0.0002102381937279177, "loss": 1.7634, "step": 53807 }, { "epoch": 1.79, "grad_norm": 0.5837436318397522, "learning_rate": 0.00021022822049269463, "loss": 1.7166, "step": 53808 }, { "epoch": 1.79, "grad_norm": 0.5825515389442444, "learning_rate": 0.00021021824736644119, "loss": 1.6945, "step": 53809 }, { "epoch": 1.79, "grad_norm": 0.5799329876899719, "learning_rate": 0.00021020827434916972, "loss": 1.7068, "step": 53810 }, { "epoch": 1.79, "grad_norm": 0.5756556391716003, "learning_rate": 0.00021019830144089206, "loss": 1.7083, "step": 53811 }, { "epoch": 1.79, "grad_norm": 0.5709705352783203, "learning_rate": 0.00021018832864162036, "loss": 1.7546, "step": 53812 }, { "epoch": 1.79, "grad_norm": 0.5760256052017212, "learning_rate": 0.00021017835595136698, "loss": 1.6785, "step": 53813 }, { "epoch": 1.79, "grad_norm": 0.6217741966247559, "learning_rate": 0.00021016838337014368, "loss": 1.7162, "step": 53814 }, { "epoch": 1.79, "grad_norm": 0.5733892321586609, "learning_rate": 0.00021015841089796277, "loss": 1.6893, "step": 53815 }, { "epoch": 1.79, "grad_norm": 0.5973450541496277, "learning_rate": 0.00021014843853483633, "loss": 1.741, "step": 53816 }, { "epoch": 1.79, "grad_norm": 0.5836136937141418, "learning_rate": 0.00021013846628077645, "loss": 1.7673, "step": 53817 }, { "epoch": 1.79, "grad_norm": 0.5610267519950867, "learning_rate": 0.00021012849413579526, "loss": 1.7623, "step": 53818 }, { "epoch": 1.79, "grad_norm": 0.5705792903900146, "learning_rate": 0.00021011852209990473, "loss": 1.754, "step": 53819 }, { "epoch": 1.79, "grad_norm": 0.598331093788147, "learning_rate": 0.00021010855017311727, "loss": 1.7307, "step": 53820 }, { "epoch": 1.79, "grad_norm": 0.6008761525154114, "learning_rate": 0.00021009857835544454, "loss": 1.82, "step": 53821 }, { "epoch": 1.79, "grad_norm": 0.5746310949325562, "learning_rate": 0.00021008860664689901, "loss": 1.7526, "step": 53822 }, { "epoch": 1.79, "grad_norm": 0.5843725800514221, "learning_rate": 0.00021007863504749274, "loss": 1.7937, "step": 53823 }, { "epoch": 1.79, "grad_norm": 1.0980144739151, "learning_rate": 0.00021006866355723766, "loss": 1.7096, "step": 53824 }, { "epoch": 1.79, "grad_norm": 0.592337965965271, "learning_rate": 0.000210058692176146, "loss": 1.8016, "step": 53825 }, { "epoch": 1.79, "grad_norm": 0.5743362903594971, "learning_rate": 0.00021004872090422974, "loss": 1.7334, "step": 53826 }, { "epoch": 1.79, "grad_norm": 0.587242841720581, "learning_rate": 0.0002100387497415013, "loss": 1.7502, "step": 53827 }, { "epoch": 1.79, "grad_norm": 0.5641628503799438, "learning_rate": 0.00021002877868797233, "loss": 1.7268, "step": 53828 }, { "epoch": 1.79, "grad_norm": 0.5722032785415649, "learning_rate": 0.00021001880774365524, "loss": 1.7985, "step": 53829 }, { "epoch": 1.79, "grad_norm": 0.5988489389419556, "learning_rate": 0.00021000883690856214, "loss": 1.8111, "step": 53830 }, { "epoch": 1.79, "grad_norm": 0.5686326026916504, "learning_rate": 0.00020999886618270502, "loss": 1.7523, "step": 53831 }, { "epoch": 1.79, "grad_norm": 0.6206787824630737, "learning_rate": 0.00020998889556609598, "loss": 1.7391, "step": 53832 }, { "epoch": 1.79, "grad_norm": 0.6142051219940186, "learning_rate": 0.00020997892505874723, "loss": 1.701, "step": 53833 }, { "epoch": 1.79, "grad_norm": 0.5730181932449341, "learning_rate": 0.00020996895466067074, "loss": 1.7126, "step": 53834 }, { "epoch": 1.79, "grad_norm": 0.5866687297821045, "learning_rate": 0.00020995898437187858, "loss": 1.7548, "step": 53835 }, { "epoch": 1.79, "grad_norm": 0.581833004951477, "learning_rate": 0.00020994901419238305, "loss": 1.7368, "step": 53836 }, { "epoch": 1.79, "grad_norm": 0.5950989127159119, "learning_rate": 0.0002099390441221962, "loss": 1.7633, "step": 53837 }, { "epoch": 1.79, "grad_norm": 0.5891193151473999, "learning_rate": 0.00020992907416133, "loss": 1.7379, "step": 53838 }, { "epoch": 1.79, "grad_norm": 0.6046914458274841, "learning_rate": 0.00020991910430979665, "loss": 1.7782, "step": 53839 }, { "epoch": 1.79, "grad_norm": 0.5925371050834656, "learning_rate": 0.00020990913456760826, "loss": 1.6651, "step": 53840 }, { "epoch": 1.79, "grad_norm": 0.6136839985847473, "learning_rate": 0.00020989916493477687, "loss": 1.7916, "step": 53841 }, { "epoch": 1.79, "grad_norm": 0.5665197968482971, "learning_rate": 0.00020988919541131457, "loss": 1.8013, "step": 53842 }, { "epoch": 1.79, "grad_norm": 0.5807421803474426, "learning_rate": 0.0002098792259972337, "loss": 1.7735, "step": 53843 }, { "epoch": 1.79, "grad_norm": 0.5707536339759827, "learning_rate": 0.00020986925669254597, "loss": 1.6809, "step": 53844 }, { "epoch": 1.79, "grad_norm": 0.5614526867866516, "learning_rate": 0.00020985928749726375, "loss": 1.7184, "step": 53845 }, { "epoch": 1.79, "grad_norm": 0.5958548188209534, "learning_rate": 0.00020984931841139907, "loss": 1.7436, "step": 53846 }, { "epoch": 1.79, "grad_norm": 0.5871837735176086, "learning_rate": 0.00020983934943496409, "loss": 1.7062, "step": 53847 }, { "epoch": 1.79, "grad_norm": 0.5704832077026367, "learning_rate": 0.00020982938056797077, "loss": 1.6922, "step": 53848 }, { "epoch": 1.79, "grad_norm": 0.5684531331062317, "learning_rate": 0.00020981941181043127, "loss": 1.7885, "step": 53849 }, { "epoch": 1.79, "grad_norm": 0.5766302943229675, "learning_rate": 0.00020980944316235792, "loss": 1.7217, "step": 53850 }, { "epoch": 1.79, "grad_norm": 0.5595650672912598, "learning_rate": 0.0002097994746237624, "loss": 1.7538, "step": 53851 }, { "epoch": 1.79, "grad_norm": 0.5573667883872986, "learning_rate": 0.00020978950619465707, "loss": 1.7288, "step": 53852 }, { "epoch": 1.79, "grad_norm": 0.5732288956642151, "learning_rate": 0.0002097795378750541, "loss": 1.6884, "step": 53853 }, { "epoch": 1.79, "grad_norm": 0.5535578727722168, "learning_rate": 0.00020976956966496538, "loss": 1.7161, "step": 53854 }, { "epoch": 1.79, "grad_norm": 0.5504462122917175, "learning_rate": 0.00020975960156440312, "loss": 1.6852, "step": 53855 }, { "epoch": 1.79, "grad_norm": 0.5625969171524048, "learning_rate": 0.00020974963357337935, "loss": 1.7346, "step": 53856 }, { "epoch": 1.79, "grad_norm": 0.557023286819458, "learning_rate": 0.00020973966569190638, "loss": 1.7706, "step": 53857 }, { "epoch": 1.79, "grad_norm": 0.6002151966094971, "learning_rate": 0.000209729697919996, "loss": 1.7925, "step": 53858 }, { "epoch": 1.79, "grad_norm": 0.5661352872848511, "learning_rate": 0.00020971973025766052, "loss": 1.8746, "step": 53859 }, { "epoch": 1.79, "grad_norm": 0.5853999853134155, "learning_rate": 0.0002097097627049121, "loss": 1.7248, "step": 53860 }, { "epoch": 1.79, "grad_norm": 0.5520417094230652, "learning_rate": 0.00020969979526176257, "loss": 1.733, "step": 53861 }, { "epoch": 1.79, "grad_norm": 0.5517992377281189, "learning_rate": 0.00020968982792822423, "loss": 1.746, "step": 53862 }, { "epoch": 1.79, "grad_norm": 0.5502176880836487, "learning_rate": 0.00020967986070430923, "loss": 1.779, "step": 53863 }, { "epoch": 1.79, "grad_norm": 0.59303218126297, "learning_rate": 0.00020966989359002945, "loss": 1.7545, "step": 53864 }, { "epoch": 1.79, "grad_norm": 0.557858943939209, "learning_rate": 0.00020965992658539707, "loss": 1.6829, "step": 53865 }, { "epoch": 1.79, "grad_norm": 0.5800732970237732, "learning_rate": 0.00020964995969042425, "loss": 1.7195, "step": 53866 }, { "epoch": 1.79, "grad_norm": 0.5640984773635864, "learning_rate": 0.00020963999290512318, "loss": 1.7636, "step": 53867 }, { "epoch": 1.79, "grad_norm": 0.5761500000953674, "learning_rate": 0.00020963002622950577, "loss": 1.6988, "step": 53868 }, { "epoch": 1.79, "grad_norm": 0.5685430765151978, "learning_rate": 0.00020962005966358423, "loss": 1.6532, "step": 53869 }, { "epoch": 1.79, "grad_norm": 0.59931480884552, "learning_rate": 0.00020961009320737063, "loss": 1.6751, "step": 53870 }, { "epoch": 1.79, "grad_norm": 0.5459610819816589, "learning_rate": 0.00020960012686087703, "loss": 1.6905, "step": 53871 }, { "epoch": 1.79, "grad_norm": 0.5570580363273621, "learning_rate": 0.00020959016062411547, "loss": 1.6866, "step": 53872 }, { "epoch": 1.79, "grad_norm": 0.5840076208114624, "learning_rate": 0.00020958019449709823, "loss": 1.7099, "step": 53873 }, { "epoch": 1.79, "grad_norm": 0.5602741837501526, "learning_rate": 0.0002095702284798373, "loss": 1.7997, "step": 53874 }, { "epoch": 1.79, "grad_norm": 0.5703673958778381, "learning_rate": 0.00020956026257234475, "loss": 1.718, "step": 53875 }, { "epoch": 1.79, "grad_norm": 0.5909063816070557, "learning_rate": 0.00020955029677463275, "loss": 1.7692, "step": 53876 }, { "epoch": 1.79, "grad_norm": 0.5748092532157898, "learning_rate": 0.00020954033108671344, "loss": 1.7153, "step": 53877 }, { "epoch": 1.79, "grad_norm": 0.5930293202400208, "learning_rate": 0.00020953036550859874, "loss": 1.8502, "step": 53878 }, { "epoch": 1.79, "grad_norm": 0.5748286247253418, "learning_rate": 0.00020952040004030078, "loss": 1.8241, "step": 53879 }, { "epoch": 1.79, "grad_norm": 0.6010176539421082, "learning_rate": 0.0002095104346818319, "loss": 1.7263, "step": 53880 }, { "epoch": 1.79, "grad_norm": 0.5953933596611023, "learning_rate": 0.00020950046943320387, "loss": 1.7099, "step": 53881 }, { "epoch": 1.79, "grad_norm": 0.5709287524223328, "learning_rate": 0.000209490504294429, "loss": 1.7449, "step": 53882 }, { "epoch": 1.79, "grad_norm": 0.5790329575538635, "learning_rate": 0.00020948053926551937, "loss": 1.7317, "step": 53883 }, { "epoch": 1.79, "grad_norm": 0.5896996259689331, "learning_rate": 0.00020947057434648697, "loss": 1.7222, "step": 53884 }, { "epoch": 1.79, "grad_norm": 0.5532034635543823, "learning_rate": 0.00020946060953734396, "loss": 1.7768, "step": 53885 }, { "epoch": 1.79, "grad_norm": 0.6098640561103821, "learning_rate": 0.00020945064483810234, "loss": 1.8208, "step": 53886 }, { "epoch": 1.79, "grad_norm": 0.5729457139968872, "learning_rate": 0.0002094406802487744, "loss": 1.7705, "step": 53887 }, { "epoch": 1.79, "grad_norm": 0.5653059482574463, "learning_rate": 0.00020943071576937214, "loss": 1.7592, "step": 53888 }, { "epoch": 1.79, "grad_norm": 0.5659621953964233, "learning_rate": 0.00020942075139990758, "loss": 1.7742, "step": 53889 }, { "epoch": 1.79, "grad_norm": 0.5637078881263733, "learning_rate": 0.00020941078714039298, "loss": 1.6682, "step": 53890 }, { "epoch": 1.79, "grad_norm": 0.5561959147453308, "learning_rate": 0.00020940082299084027, "loss": 1.8043, "step": 53891 }, { "epoch": 1.79, "grad_norm": 0.5503213405609131, "learning_rate": 0.00020939085895126154, "loss": 1.7504, "step": 53892 }, { "epoch": 1.79, "grad_norm": 0.5672307014465332, "learning_rate": 0.000209380895021669, "loss": 1.7226, "step": 53893 }, { "epoch": 1.79, "grad_norm": 0.5410662293434143, "learning_rate": 0.00020937093120207482, "loss": 1.6984, "step": 53894 }, { "epoch": 1.79, "grad_norm": 0.5395645499229431, "learning_rate": 0.0002093609674924909, "loss": 1.6453, "step": 53895 }, { "epoch": 1.79, "grad_norm": 0.5902047157287598, "learning_rate": 0.0002093510038929294, "loss": 1.7977, "step": 53896 }, { "epoch": 1.79, "grad_norm": 0.5648837685585022, "learning_rate": 0.00020934104040340248, "loss": 1.7398, "step": 53897 }, { "epoch": 1.79, "grad_norm": 0.5763526558876038, "learning_rate": 0.0002093310770239221, "loss": 1.7036, "step": 53898 }, { "epoch": 1.79, "grad_norm": 0.5572947263717651, "learning_rate": 0.0002093211137545004, "loss": 1.7219, "step": 53899 }, { "epoch": 1.79, "grad_norm": 0.5697030425071716, "learning_rate": 0.00020931115059514966, "loss": 1.6643, "step": 53900 }, { "epoch": 1.79, "grad_norm": 0.6609795093536377, "learning_rate": 0.00020930118754588166, "loss": 1.7424, "step": 53901 }, { "epoch": 1.79, "grad_norm": 0.5703802704811096, "learning_rate": 0.0002092912246067087, "loss": 1.7175, "step": 53902 }, { "epoch": 1.79, "grad_norm": 0.5915942192077637, "learning_rate": 0.00020928126177764287, "loss": 1.7754, "step": 53903 }, { "epoch": 1.79, "grad_norm": 0.5708644390106201, "learning_rate": 0.00020927129905869626, "loss": 1.717, "step": 53904 }, { "epoch": 1.79, "grad_norm": 0.5650880336761475, "learning_rate": 0.00020926133644988084, "loss": 1.7344, "step": 53905 }, { "epoch": 1.79, "grad_norm": 0.5730404853820801, "learning_rate": 0.0002092513739512087, "loss": 1.6732, "step": 53906 }, { "epoch": 1.79, "grad_norm": 0.5701661109924316, "learning_rate": 0.00020924141156269233, "loss": 1.6925, "step": 53907 }, { "epoch": 1.79, "grad_norm": 0.586031436920166, "learning_rate": 0.0002092314492843432, "loss": 1.7517, "step": 53908 }, { "epoch": 1.79, "grad_norm": 0.5953072905540466, "learning_rate": 0.00020922148711617386, "loss": 1.7131, "step": 53909 }, { "epoch": 1.79, "grad_norm": 0.5658089518547058, "learning_rate": 0.00020921152505819625, "loss": 1.7837, "step": 53910 }, { "epoch": 1.79, "grad_norm": 0.5637607574462891, "learning_rate": 0.00020920156311042245, "loss": 1.6882, "step": 53911 }, { "epoch": 1.79, "grad_norm": 0.5427771210670471, "learning_rate": 0.00020919160127286457, "loss": 1.7184, "step": 53912 }, { "epoch": 1.79, "grad_norm": 0.568853497505188, "learning_rate": 0.00020918163954553466, "loss": 1.7675, "step": 53913 }, { "epoch": 1.79, "grad_norm": 0.5779386758804321, "learning_rate": 0.00020917167792844504, "loss": 1.6937, "step": 53914 }, { "epoch": 1.79, "grad_norm": 0.5424858927726746, "learning_rate": 0.0002091617164216074, "loss": 1.696, "step": 53915 }, { "epoch": 1.79, "grad_norm": 0.5545673370361328, "learning_rate": 0.00020915175502503417, "loss": 1.7545, "step": 53916 }, { "epoch": 1.79, "grad_norm": 0.5416285991668701, "learning_rate": 0.00020914179373873734, "loss": 1.6858, "step": 53917 }, { "epoch": 1.79, "grad_norm": 0.5603528618812561, "learning_rate": 0.0002091318325627289, "loss": 1.7431, "step": 53918 }, { "epoch": 1.79, "grad_norm": 0.5596425533294678, "learning_rate": 0.00020912187149702108, "loss": 1.7269, "step": 53919 }, { "epoch": 1.79, "grad_norm": 0.5827447772026062, "learning_rate": 0.00020911191054162598, "loss": 1.7706, "step": 53920 }, { "epoch": 1.79, "grad_norm": 0.5681747794151306, "learning_rate": 0.00020910194969655553, "loss": 1.7207, "step": 53921 }, { "epoch": 1.79, "grad_norm": 0.5684273838996887, "learning_rate": 0.00020909198896182187, "loss": 1.7176, "step": 53922 }, { "epoch": 1.79, "grad_norm": 0.5533787608146667, "learning_rate": 0.00020908202833743722, "loss": 1.7554, "step": 53923 }, { "epoch": 1.79, "grad_norm": 0.5584561824798584, "learning_rate": 0.00020907206782341364, "loss": 1.6957, "step": 53924 }, { "epoch": 1.79, "grad_norm": 0.5547075271606445, "learning_rate": 0.0002090621074197631, "loss": 1.7167, "step": 53925 }, { "epoch": 1.79, "grad_norm": 0.5719156861305237, "learning_rate": 0.0002090521471264978, "loss": 1.6796, "step": 53926 }, { "epoch": 1.79, "grad_norm": 0.5400481224060059, "learning_rate": 0.0002090421869436298, "loss": 1.6928, "step": 53927 }, { "epoch": 1.79, "grad_norm": 0.5759326815605164, "learning_rate": 0.00020903222687117113, "loss": 1.7556, "step": 53928 }, { "epoch": 1.79, "grad_norm": 0.5701917409896851, "learning_rate": 0.0002090222669091339, "loss": 1.7532, "step": 53929 }, { "epoch": 1.79, "grad_norm": 0.6430637836456299, "learning_rate": 0.00020901230705753043, "loss": 1.7263, "step": 53930 }, { "epoch": 1.79, "grad_norm": 0.5988616943359375, "learning_rate": 0.00020900234731637243, "loss": 1.7322, "step": 53931 }, { "epoch": 1.79, "grad_norm": 0.5847529768943787, "learning_rate": 0.0002089923876856722, "loss": 1.7843, "step": 53932 }, { "epoch": 1.79, "grad_norm": 0.5823338627815247, "learning_rate": 0.0002089824281654418, "loss": 1.7431, "step": 53933 }, { "epoch": 1.79, "grad_norm": 0.5580453276634216, "learning_rate": 0.00020897246875569342, "loss": 1.7352, "step": 53934 }, { "epoch": 1.79, "grad_norm": 0.5446292161941528, "learning_rate": 0.00020896250945643893, "loss": 1.8125, "step": 53935 }, { "epoch": 1.79, "grad_norm": 0.5647866725921631, "learning_rate": 0.0002089525502676905, "loss": 1.7275, "step": 53936 }, { "epoch": 1.79, "grad_norm": 0.5704221129417419, "learning_rate": 0.0002089425911894605, "loss": 1.7785, "step": 53937 }, { "epoch": 1.79, "grad_norm": 0.5693292617797852, "learning_rate": 0.00020893263222176054, "loss": 1.6832, "step": 53938 }, { "epoch": 1.79, "grad_norm": 0.5630760788917542, "learning_rate": 0.000208922673364603, "loss": 1.737, "step": 53939 }, { "epoch": 1.79, "grad_norm": 0.5724084377288818, "learning_rate": 0.000208912714618, "loss": 1.6468, "step": 53940 }, { "epoch": 1.79, "grad_norm": 0.5764049291610718, "learning_rate": 0.0002089027559819635, "loss": 1.7039, "step": 53941 }, { "epoch": 1.79, "grad_norm": 0.5808194875717163, "learning_rate": 0.0002088927974565056, "loss": 1.7353, "step": 53942 }, { "epoch": 1.79, "grad_norm": 0.5867059826850891, "learning_rate": 0.00020888283904163838, "loss": 1.719, "step": 53943 }, { "epoch": 1.79, "grad_norm": 0.574292004108429, "learning_rate": 0.00020887288073737415, "loss": 1.6471, "step": 53944 }, { "epoch": 1.79, "grad_norm": 0.5632061958312988, "learning_rate": 0.00020886292254372458, "loss": 1.6905, "step": 53945 }, { "epoch": 1.79, "grad_norm": 0.5638337135314941, "learning_rate": 0.00020885296446070212, "loss": 1.6445, "step": 53946 }, { "epoch": 1.79, "grad_norm": 0.5673892498016357, "learning_rate": 0.00020884300648831876, "loss": 1.7302, "step": 53947 }, { "epoch": 1.79, "grad_norm": 0.5824955701828003, "learning_rate": 0.00020883304862658652, "loss": 1.8336, "step": 53948 }, { "epoch": 1.79, "grad_norm": 0.562296986579895, "learning_rate": 0.0002088230908755175, "loss": 1.7578, "step": 53949 }, { "epoch": 1.79, "grad_norm": 0.5748216509819031, "learning_rate": 0.00020881313323512388, "loss": 1.7854, "step": 53950 }, { "epoch": 1.79, "grad_norm": 0.5811273455619812, "learning_rate": 0.00020880317570541764, "loss": 1.7855, "step": 53951 }, { "epoch": 1.79, "grad_norm": 0.5906131267547607, "learning_rate": 0.0002087932182864108, "loss": 1.7021, "step": 53952 }, { "epoch": 1.8, "grad_norm": 0.6065179705619812, "learning_rate": 0.00020878326097811567, "loss": 1.7055, "step": 53953 }, { "epoch": 1.8, "grad_norm": 0.5683399438858032, "learning_rate": 0.00020877330378054425, "loss": 1.7578, "step": 53954 }, { "epoch": 1.8, "grad_norm": 0.5590788722038269, "learning_rate": 0.00020876334669370854, "loss": 1.7471, "step": 53955 }, { "epoch": 1.8, "grad_norm": 0.5413955450057983, "learning_rate": 0.0002087533897176207, "loss": 1.6698, "step": 53956 }, { "epoch": 1.8, "grad_norm": 0.5724906325340271, "learning_rate": 0.00020874343285229286, "loss": 1.7254, "step": 53957 }, { "epoch": 1.8, "grad_norm": 0.5547798275947571, "learning_rate": 0.00020873347609773695, "loss": 1.7365, "step": 53958 }, { "epoch": 1.8, "grad_norm": 0.5730106830596924, "learning_rate": 0.00020872351945396513, "loss": 1.6693, "step": 53959 }, { "epoch": 1.8, "grad_norm": 0.5750297904014587, "learning_rate": 0.00020871356292098968, "loss": 1.7664, "step": 53960 }, { "epoch": 1.8, "grad_norm": 0.5554724931716919, "learning_rate": 0.00020870360649882235, "loss": 1.6971, "step": 53961 }, { "epoch": 1.8, "grad_norm": 0.583236038684845, "learning_rate": 0.00020869365018747542, "loss": 1.7086, "step": 53962 }, { "epoch": 1.8, "grad_norm": 0.6071122288703918, "learning_rate": 0.00020868369398696098, "loss": 1.7202, "step": 53963 }, { "epoch": 1.8, "grad_norm": 0.5592541694641113, "learning_rate": 0.0002086737378972911, "loss": 1.712, "step": 53964 }, { "epoch": 1.8, "grad_norm": 0.6024791598320007, "learning_rate": 0.0002086637819184778, "loss": 1.7525, "step": 53965 }, { "epoch": 1.8, "grad_norm": 0.5745774507522583, "learning_rate": 0.00020865382605053317, "loss": 1.7762, "step": 53966 }, { "epoch": 1.8, "grad_norm": 0.556111216545105, "learning_rate": 0.0002086438702934695, "loss": 1.7437, "step": 53967 }, { "epoch": 1.8, "grad_norm": 0.5730600357055664, "learning_rate": 0.0002086339146472985, "loss": 1.754, "step": 53968 }, { "epoch": 1.8, "grad_norm": 0.5644311904907227, "learning_rate": 0.0002086239591120326, "loss": 1.7122, "step": 53969 }, { "epoch": 1.8, "grad_norm": 0.5463907718658447, "learning_rate": 0.00020861400368768376, "loss": 1.6989, "step": 53970 }, { "epoch": 1.8, "grad_norm": 0.5894047021865845, "learning_rate": 0.00020860404837426402, "loss": 1.7599, "step": 53971 }, { "epoch": 1.8, "grad_norm": 0.595818281173706, "learning_rate": 0.00020859409317178543, "loss": 1.722, "step": 53972 }, { "epoch": 1.8, "grad_norm": 0.5672522783279419, "learning_rate": 0.00020858413808026014, "loss": 1.6784, "step": 53973 }, { "epoch": 1.8, "grad_norm": 0.5824123620986938, "learning_rate": 0.0002085741830997004, "loss": 1.7946, "step": 53974 }, { "epoch": 1.8, "grad_norm": 0.5662215948104858, "learning_rate": 0.000208564228230118, "loss": 1.7776, "step": 53975 }, { "epoch": 1.8, "grad_norm": 0.5559754967689514, "learning_rate": 0.0002085542734715252, "loss": 1.7385, "step": 53976 }, { "epoch": 1.8, "grad_norm": 0.5987609624862671, "learning_rate": 0.00020854431882393404, "loss": 1.7726, "step": 53977 }, { "epoch": 1.8, "grad_norm": 0.5585606098175049, "learning_rate": 0.00020853436428735657, "loss": 1.7739, "step": 53978 }, { "epoch": 1.8, "grad_norm": 0.5912970304489136, "learning_rate": 0.00020852440986180492, "loss": 1.791, "step": 53979 }, { "epoch": 1.8, "grad_norm": 0.5917795300483704, "learning_rate": 0.0002085144555472912, "loss": 1.841, "step": 53980 }, { "epoch": 1.8, "grad_norm": 0.5809125900268555, "learning_rate": 0.0002085045013438274, "loss": 1.8153, "step": 53981 }, { "epoch": 1.8, "grad_norm": 0.571250319480896, "learning_rate": 0.0002084945472514256, "loss": 1.7674, "step": 53982 }, { "epoch": 1.8, "grad_norm": 0.57798832654953, "learning_rate": 0.00020848459327009797, "loss": 1.7374, "step": 53983 }, { "epoch": 1.8, "grad_norm": 0.5703437328338623, "learning_rate": 0.00020847463939985665, "loss": 1.8148, "step": 53984 }, { "epoch": 1.8, "grad_norm": 0.5546762347221375, "learning_rate": 0.00020846468564071356, "loss": 1.7145, "step": 53985 }, { "epoch": 1.8, "grad_norm": 0.5690922737121582, "learning_rate": 0.00020845473199268086, "loss": 1.8611, "step": 53986 }, { "epoch": 1.8, "grad_norm": 0.564012885093689, "learning_rate": 0.00020844477845577067, "loss": 1.7837, "step": 53987 }, { "epoch": 1.8, "grad_norm": 0.5682284832000732, "learning_rate": 0.00020843482502999502, "loss": 1.6694, "step": 53988 }, { "epoch": 1.8, "grad_norm": 0.5860137343406677, "learning_rate": 0.00020842487171536585, "loss": 1.6787, "step": 53989 }, { "epoch": 1.8, "grad_norm": 0.5535593032836914, "learning_rate": 0.00020841491851189558, "loss": 1.7869, "step": 53990 }, { "epoch": 1.8, "grad_norm": 0.5549436807632446, "learning_rate": 0.00020840496541959602, "loss": 1.7067, "step": 53991 }, { "epoch": 1.8, "grad_norm": 0.5636439323425293, "learning_rate": 0.00020839501243847936, "loss": 1.7157, "step": 53992 }, { "epoch": 1.8, "grad_norm": 0.5641387701034546, "learning_rate": 0.00020838505956855767, "loss": 1.7353, "step": 53993 }, { "epoch": 1.8, "grad_norm": 0.5753702521324158, "learning_rate": 0.00020837510680984304, "loss": 1.7331, "step": 53994 }, { "epoch": 1.8, "grad_norm": 0.5706679821014404, "learning_rate": 0.00020836515416234748, "loss": 1.7318, "step": 53995 }, { "epoch": 1.8, "grad_norm": 0.556774914264679, "learning_rate": 0.0002083552016260831, "loss": 1.7035, "step": 53996 }, { "epoch": 1.8, "grad_norm": 0.5993314981460571, "learning_rate": 0.00020834524920106205, "loss": 1.7594, "step": 53997 }, { "epoch": 1.8, "grad_norm": 0.579488217830658, "learning_rate": 0.00020833529688729633, "loss": 1.6983, "step": 53998 }, { "epoch": 1.8, "grad_norm": 0.5526036024093628, "learning_rate": 0.0002083253446847981, "loss": 1.7906, "step": 53999 }, { "epoch": 1.8, "grad_norm": 0.5743351578712463, "learning_rate": 0.0002083153925935794, "loss": 1.7541, "step": 54000 }, { "epoch": 1.8, "grad_norm": 0.5641996264457703, "learning_rate": 0.00020830544061365225, "loss": 1.6969, "step": 54001 }, { "epoch": 1.8, "grad_norm": 0.5423793196678162, "learning_rate": 0.00020829548874502874, "loss": 1.6975, "step": 54002 }, { "epoch": 1.8, "grad_norm": 0.57441645860672, "learning_rate": 0.00020828553698772106, "loss": 1.7869, "step": 54003 }, { "epoch": 1.8, "grad_norm": 0.5571500658988953, "learning_rate": 0.00020827558534174126, "loss": 1.7993, "step": 54004 }, { "epoch": 1.8, "grad_norm": 0.5584832429885864, "learning_rate": 0.00020826563380710132, "loss": 1.8096, "step": 54005 }, { "epoch": 1.8, "grad_norm": 0.5695971846580505, "learning_rate": 0.00020825568238381343, "loss": 1.7418, "step": 54006 }, { "epoch": 1.8, "grad_norm": 0.5714563727378845, "learning_rate": 0.00020824573107188965, "loss": 1.7329, "step": 54007 }, { "epoch": 1.8, "grad_norm": 0.5561141967773438, "learning_rate": 0.00020823577987134196, "loss": 1.6576, "step": 54008 }, { "epoch": 1.8, "grad_norm": 0.5618748664855957, "learning_rate": 0.00020822582878218248, "loss": 1.7051, "step": 54009 }, { "epoch": 1.8, "grad_norm": 0.5837780237197876, "learning_rate": 0.00020821587780442337, "loss": 1.6755, "step": 54010 }, { "epoch": 1.8, "grad_norm": 0.5637273192405701, "learning_rate": 0.00020820592693807677, "loss": 1.7416, "step": 54011 }, { "epoch": 1.8, "grad_norm": 0.5593564510345459, "learning_rate": 0.00020819597618315451, "loss": 1.7062, "step": 54012 }, { "epoch": 1.8, "grad_norm": 0.5664071440696716, "learning_rate": 0.00020818602553966887, "loss": 1.7399, "step": 54013 }, { "epoch": 1.8, "grad_norm": 0.5798743367195129, "learning_rate": 0.00020817607500763188, "loss": 1.7951, "step": 54014 }, { "epoch": 1.8, "grad_norm": 0.5592033267021179, "learning_rate": 0.00020816612458705554, "loss": 1.6762, "step": 54015 }, { "epoch": 1.8, "grad_norm": 0.5826196670532227, "learning_rate": 0.000208156174277952, "loss": 1.7362, "step": 54016 }, { "epoch": 1.8, "grad_norm": 0.5626348853111267, "learning_rate": 0.00020814622408033343, "loss": 1.7996, "step": 54017 }, { "epoch": 1.8, "grad_norm": 0.549144446849823, "learning_rate": 0.00020813627399421167, "loss": 1.6691, "step": 54018 }, { "epoch": 1.8, "grad_norm": 0.5626159310340881, "learning_rate": 0.00020812632401959904, "loss": 1.7177, "step": 54019 }, { "epoch": 1.8, "grad_norm": 0.5569295287132263, "learning_rate": 0.00020811637415650745, "loss": 1.7163, "step": 54020 }, { "epoch": 1.8, "grad_norm": 0.5575626492500305, "learning_rate": 0.00020810642440494915, "loss": 1.8568, "step": 54021 }, { "epoch": 1.8, "grad_norm": 0.5639640092849731, "learning_rate": 0.00020809647476493598, "loss": 1.7394, "step": 54022 }, { "epoch": 1.8, "grad_norm": 0.5880261063575745, "learning_rate": 0.00020808652523648016, "loss": 1.7636, "step": 54023 }, { "epoch": 1.8, "grad_norm": 0.5439251065254211, "learning_rate": 0.0002080765758195939, "loss": 1.7007, "step": 54024 }, { "epoch": 1.8, "grad_norm": 0.5593618154525757, "learning_rate": 0.00020806662651428896, "loss": 1.7304, "step": 54025 }, { "epoch": 1.8, "grad_norm": 0.5657776594161987, "learning_rate": 0.00020805667732057764, "loss": 1.6872, "step": 54026 }, { "epoch": 1.8, "grad_norm": 0.5751878619194031, "learning_rate": 0.00020804672823847208, "loss": 1.7855, "step": 54027 }, { "epoch": 1.8, "grad_norm": 0.5755100846290588, "learning_rate": 0.00020803677926798414, "loss": 1.7962, "step": 54028 }, { "epoch": 1.8, "grad_norm": 0.5645067691802979, "learning_rate": 0.000208026830409126, "loss": 1.7325, "step": 54029 }, { "epoch": 1.8, "grad_norm": 0.566801130771637, "learning_rate": 0.00020801688166190968, "loss": 1.7804, "step": 54030 }, { "epoch": 1.8, "grad_norm": 0.5520011186599731, "learning_rate": 0.00020800693302634748, "loss": 1.8171, "step": 54031 }, { "epoch": 1.8, "grad_norm": 0.5741775035858154, "learning_rate": 0.0002079969845024511, "loss": 1.7426, "step": 54032 }, { "epoch": 1.8, "grad_norm": 0.5881533026695251, "learning_rate": 0.00020798703609023296, "loss": 1.7291, "step": 54033 }, { "epoch": 1.8, "grad_norm": 0.5553151369094849, "learning_rate": 0.00020797708778970503, "loss": 1.7495, "step": 54034 }, { "epoch": 1.8, "grad_norm": 0.5665882229804993, "learning_rate": 0.0002079671396008793, "loss": 1.6756, "step": 54035 }, { "epoch": 1.8, "grad_norm": 0.5636587142944336, "learning_rate": 0.0002079571915237679, "loss": 1.7141, "step": 54036 }, { "epoch": 1.8, "grad_norm": 0.5917978882789612, "learning_rate": 0.00020794724355838298, "loss": 1.7709, "step": 54037 }, { "epoch": 1.8, "grad_norm": 0.5590622425079346, "learning_rate": 0.00020793729570473647, "loss": 1.7529, "step": 54038 }, { "epoch": 1.8, "grad_norm": 0.5680005550384521, "learning_rate": 0.00020792734796284046, "loss": 1.7642, "step": 54039 }, { "epoch": 1.8, "grad_norm": 0.5488783121109009, "learning_rate": 0.0002079174003327071, "loss": 1.7349, "step": 54040 }, { "epoch": 1.8, "grad_norm": 0.6011990904808044, "learning_rate": 0.00020790745281434853, "loss": 1.7436, "step": 54041 }, { "epoch": 1.8, "grad_norm": 0.5777003169059753, "learning_rate": 0.00020789750540777674, "loss": 1.7266, "step": 54042 }, { "epoch": 1.8, "grad_norm": 0.5752912163734436, "learning_rate": 0.00020788755811300373, "loss": 1.7357, "step": 54043 }, { "epoch": 1.8, "grad_norm": 0.5828673839569092, "learning_rate": 0.00020787761093004177, "loss": 1.7373, "step": 54044 }, { "epoch": 1.8, "grad_norm": 0.5696517825126648, "learning_rate": 0.00020786766385890273, "loss": 1.7454, "step": 54045 }, { "epoch": 1.8, "grad_norm": 0.5573835372924805, "learning_rate": 0.00020785771689959873, "loss": 1.6954, "step": 54046 }, { "epoch": 1.8, "grad_norm": 0.5662498474121094, "learning_rate": 0.00020784777005214205, "loss": 1.7321, "step": 54047 }, { "epoch": 1.8, "grad_norm": 0.5840452909469604, "learning_rate": 0.00020783782331654438, "loss": 1.818, "step": 54048 }, { "epoch": 1.8, "grad_norm": 0.543175995349884, "learning_rate": 0.00020782787669281814, "loss": 1.7451, "step": 54049 }, { "epoch": 1.8, "grad_norm": 0.5858704447746277, "learning_rate": 0.00020781793018097525, "loss": 1.7483, "step": 54050 }, { "epoch": 1.8, "grad_norm": 0.5991709232330322, "learning_rate": 0.0002078079837810279, "loss": 1.6726, "step": 54051 }, { "epoch": 1.8, "grad_norm": 0.5498487949371338, "learning_rate": 0.00020779803749298797, "loss": 1.7267, "step": 54052 }, { "epoch": 1.8, "grad_norm": 0.6060361862182617, "learning_rate": 0.0002077880913168676, "loss": 1.7831, "step": 54053 }, { "epoch": 1.8, "grad_norm": 0.5850344300270081, "learning_rate": 0.00020777814525267904, "loss": 1.7676, "step": 54054 }, { "epoch": 1.8, "grad_norm": 0.5857297778129578, "learning_rate": 0.0002077681993004341, "loss": 1.7484, "step": 54055 }, { "epoch": 1.8, "grad_norm": 0.552111029624939, "learning_rate": 0.00020775825346014501, "loss": 1.7522, "step": 54056 }, { "epoch": 1.8, "grad_norm": 0.5645907521247864, "learning_rate": 0.00020774830773182386, "loss": 1.7168, "step": 54057 }, { "epoch": 1.8, "grad_norm": 0.5875610113143921, "learning_rate": 0.0002077383621154826, "loss": 1.783, "step": 54058 }, { "epoch": 1.8, "grad_norm": 0.5658596158027649, "learning_rate": 0.00020772841661113344, "loss": 1.7507, "step": 54059 }, { "epoch": 1.8, "grad_norm": 0.5592899918556213, "learning_rate": 0.00020771847121878827, "loss": 1.6458, "step": 54060 }, { "epoch": 1.8, "grad_norm": 0.5606278777122498, "learning_rate": 0.00020770852593845945, "loss": 1.6747, "step": 54061 }, { "epoch": 1.8, "grad_norm": 0.601852297782898, "learning_rate": 0.00020769858077015875, "loss": 1.7692, "step": 54062 }, { "epoch": 1.8, "grad_norm": 0.5452139973640442, "learning_rate": 0.0002076886357138984, "loss": 1.7128, "step": 54063 }, { "epoch": 1.8, "grad_norm": 0.5573535561561584, "learning_rate": 0.00020767869076969054, "loss": 1.7355, "step": 54064 }, { "epoch": 1.8, "grad_norm": 0.5776535272598267, "learning_rate": 0.00020766874593754702, "loss": 1.7399, "step": 54065 }, { "epoch": 1.8, "grad_norm": 0.5621238350868225, "learning_rate": 0.0002076588012174801, "loss": 1.7529, "step": 54066 }, { "epoch": 1.8, "grad_norm": 0.5602696537971497, "learning_rate": 0.0002076488566095018, "loss": 1.6537, "step": 54067 }, { "epoch": 1.8, "grad_norm": 0.5707968473434448, "learning_rate": 0.00020763891211362417, "loss": 1.7807, "step": 54068 }, { "epoch": 1.8, "grad_norm": 0.5870190858840942, "learning_rate": 0.0002076289677298592, "loss": 1.7977, "step": 54069 }, { "epoch": 1.8, "grad_norm": 0.5710574388504028, "learning_rate": 0.00020761902345821912, "loss": 1.7139, "step": 54070 }, { "epoch": 1.8, "grad_norm": 0.5783957242965698, "learning_rate": 0.000207609079298716, "loss": 1.7162, "step": 54071 }, { "epoch": 1.8, "grad_norm": 0.5718874931335449, "learning_rate": 0.00020759913525136178, "loss": 1.7948, "step": 54072 }, { "epoch": 1.8, "grad_norm": 0.5719223618507385, "learning_rate": 0.0002075891913161686, "loss": 1.7381, "step": 54073 }, { "epoch": 1.8, "grad_norm": 0.5625455379486084, "learning_rate": 0.00020757924749314856, "loss": 1.6944, "step": 54074 }, { "epoch": 1.8, "grad_norm": 0.5866352319717407, "learning_rate": 0.00020756930378231368, "loss": 1.7272, "step": 54075 }, { "epoch": 1.8, "grad_norm": 0.7220698595046997, "learning_rate": 0.00020755936018367594, "loss": 1.752, "step": 54076 }, { "epoch": 1.8, "grad_norm": 0.578359067440033, "learning_rate": 0.00020754941669724774, "loss": 1.7261, "step": 54077 }, { "epoch": 1.8, "grad_norm": 0.5835512280464172, "learning_rate": 0.00020753947332304072, "loss": 1.7465, "step": 54078 }, { "epoch": 1.8, "grad_norm": 0.5506150722503662, "learning_rate": 0.0002075295300610672, "loss": 1.7018, "step": 54079 }, { "epoch": 1.8, "grad_norm": 0.571715235710144, "learning_rate": 0.00020751958691133922, "loss": 1.7468, "step": 54080 }, { "epoch": 1.8, "grad_norm": 0.5814383625984192, "learning_rate": 0.00020750964387386892, "loss": 1.741, "step": 54081 }, { "epoch": 1.8, "grad_norm": 0.598368227481842, "learning_rate": 0.0002074997009486682, "loss": 1.7221, "step": 54082 }, { "epoch": 1.8, "grad_norm": 0.5610970854759216, "learning_rate": 0.00020748975813574916, "loss": 1.7094, "step": 54083 }, { "epoch": 1.8, "grad_norm": 0.5710296034812927, "learning_rate": 0.00020747981543512412, "loss": 1.7614, "step": 54084 }, { "epoch": 1.8, "grad_norm": 0.5717229247093201, "learning_rate": 0.00020746987284680474, "loss": 1.7562, "step": 54085 }, { "epoch": 1.8, "grad_norm": 0.5649083852767944, "learning_rate": 0.00020745993037080338, "loss": 1.7214, "step": 54086 }, { "epoch": 1.8, "grad_norm": 0.6052474975585938, "learning_rate": 0.0002074499880071321, "loss": 1.7294, "step": 54087 }, { "epoch": 1.8, "grad_norm": 0.5893916487693787, "learning_rate": 0.0002074400457558028, "loss": 1.6572, "step": 54088 }, { "epoch": 1.8, "grad_norm": 0.6056987643241882, "learning_rate": 0.00020743010361682767, "loss": 1.7916, "step": 54089 }, { "epoch": 1.8, "grad_norm": 0.577312171459198, "learning_rate": 0.00020742016159021868, "loss": 1.7555, "step": 54090 }, { "epoch": 1.8, "grad_norm": 0.5837948322296143, "learning_rate": 0.0002074102196759882, "loss": 1.7644, "step": 54091 }, { "epoch": 1.8, "grad_norm": 0.5841304063796997, "learning_rate": 0.00020740027787414784, "loss": 1.7325, "step": 54092 }, { "epoch": 1.8, "grad_norm": 0.5814383625984192, "learning_rate": 0.00020739033618470999, "loss": 1.7609, "step": 54093 }, { "epoch": 1.8, "grad_norm": 0.5615102648735046, "learning_rate": 0.00020738039460768665, "loss": 1.7244, "step": 54094 }, { "epoch": 1.8, "grad_norm": 0.587782621383667, "learning_rate": 0.00020737045314308985, "loss": 1.8119, "step": 54095 }, { "epoch": 1.8, "grad_norm": 0.568903923034668, "learning_rate": 0.00020736051179093162, "loss": 1.718, "step": 54096 }, { "epoch": 1.8, "grad_norm": 0.5524243712425232, "learning_rate": 0.00020735057055122418, "loss": 1.7267, "step": 54097 }, { "epoch": 1.8, "grad_norm": 0.5801106095314026, "learning_rate": 0.00020734062942397944, "loss": 1.7068, "step": 54098 }, { "epoch": 1.8, "grad_norm": 0.579197347164154, "learning_rate": 0.00020733068840920942, "loss": 1.765, "step": 54099 }, { "epoch": 1.8, "grad_norm": 0.5665361285209656, "learning_rate": 0.00020732074750692638, "loss": 1.7339, "step": 54100 }, { "epoch": 1.8, "grad_norm": 0.5796558856964111, "learning_rate": 0.00020731080671714235, "loss": 1.7645, "step": 54101 }, { "epoch": 1.8, "grad_norm": 0.5891402363777161, "learning_rate": 0.0002073008660398693, "loss": 1.7875, "step": 54102 }, { "epoch": 1.8, "grad_norm": 0.5961540341377258, "learning_rate": 0.00020729092547511934, "loss": 1.7511, "step": 54103 }, { "epoch": 1.8, "grad_norm": 0.5681268572807312, "learning_rate": 0.00020728098502290459, "loss": 1.7551, "step": 54104 }, { "epoch": 1.8, "grad_norm": 0.5738565325737, "learning_rate": 0.00020727104468323698, "loss": 1.7084, "step": 54105 }, { "epoch": 1.8, "grad_norm": 0.5769098997116089, "learning_rate": 0.00020726110445612862, "loss": 1.7442, "step": 54106 }, { "epoch": 1.8, "grad_norm": 0.5615960955619812, "learning_rate": 0.00020725116434159175, "loss": 1.7035, "step": 54107 }, { "epoch": 1.8, "grad_norm": 0.5684369206428528, "learning_rate": 0.00020724122433963822, "loss": 1.7889, "step": 54108 }, { "epoch": 1.8, "grad_norm": 0.5797733068466187, "learning_rate": 0.00020723128445028014, "loss": 1.8283, "step": 54109 }, { "epoch": 1.8, "grad_norm": 0.5983412265777588, "learning_rate": 0.00020722134467352965, "loss": 1.7718, "step": 54110 }, { "epoch": 1.8, "grad_norm": 0.5848204493522644, "learning_rate": 0.00020721140500939883, "loss": 1.6686, "step": 54111 }, { "epoch": 1.8, "grad_norm": 0.5636327266693115, "learning_rate": 0.00020720146545789964, "loss": 1.7845, "step": 54112 }, { "epoch": 1.8, "grad_norm": 0.5611898303031921, "learning_rate": 0.00020719152601904415, "loss": 1.7562, "step": 54113 }, { "epoch": 1.8, "grad_norm": 0.5846015214920044, "learning_rate": 0.00020718158669284458, "loss": 1.6749, "step": 54114 }, { "epoch": 1.8, "grad_norm": 0.5523696541786194, "learning_rate": 0.0002071716474793128, "loss": 1.7058, "step": 54115 }, { "epoch": 1.8, "grad_norm": 0.5790872573852539, "learning_rate": 0.000207161708378461, "loss": 1.7278, "step": 54116 }, { "epoch": 1.8, "grad_norm": 0.5615490674972534, "learning_rate": 0.0002071517693903013, "loss": 1.741, "step": 54117 }, { "epoch": 1.8, "grad_norm": 0.5642676949501038, "learning_rate": 0.00020714183051484552, "loss": 1.7153, "step": 54118 }, { "epoch": 1.8, "grad_norm": 0.5754729509353638, "learning_rate": 0.00020713189175210588, "loss": 1.7284, "step": 54119 }, { "epoch": 1.8, "grad_norm": 0.5550112724304199, "learning_rate": 0.00020712195310209448, "loss": 1.7803, "step": 54120 }, { "epoch": 1.8, "grad_norm": 0.5786117315292358, "learning_rate": 0.0002071120145648234, "loss": 1.7673, "step": 54121 }, { "epoch": 1.8, "grad_norm": 0.5891091227531433, "learning_rate": 0.0002071020761403046, "loss": 1.7277, "step": 54122 }, { "epoch": 1.8, "grad_norm": 0.546451985836029, "learning_rate": 0.0002070921378285502, "loss": 1.6855, "step": 54123 }, { "epoch": 1.8, "grad_norm": 0.5879524350166321, "learning_rate": 0.00020708219962957232, "loss": 1.7063, "step": 54124 }, { "epoch": 1.8, "grad_norm": 0.5631511211395264, "learning_rate": 0.00020707226154338286, "loss": 1.765, "step": 54125 }, { "epoch": 1.8, "grad_norm": 0.5922873020172119, "learning_rate": 0.00020706232356999395, "loss": 1.8256, "step": 54126 }, { "epoch": 1.8, "grad_norm": 0.6042899489402771, "learning_rate": 0.00020705238570941772, "loss": 1.7691, "step": 54127 }, { "epoch": 1.8, "grad_norm": 0.5728776454925537, "learning_rate": 0.00020704244796166627, "loss": 1.7434, "step": 54128 }, { "epoch": 1.8, "grad_norm": 0.5702029466629028, "learning_rate": 0.00020703251032675157, "loss": 1.7057, "step": 54129 }, { "epoch": 1.8, "grad_norm": 0.5740041136741638, "learning_rate": 0.00020702257280468567, "loss": 1.7389, "step": 54130 }, { "epoch": 1.8, "grad_norm": 0.5957003235816956, "learning_rate": 0.0002070126353954807, "loss": 1.7651, "step": 54131 }, { "epoch": 1.8, "grad_norm": 0.5685083866119385, "learning_rate": 0.00020700269809914866, "loss": 1.7367, "step": 54132 }, { "epoch": 1.8, "grad_norm": 0.5499985218048096, "learning_rate": 0.0002069927609157016, "loss": 1.693, "step": 54133 }, { "epoch": 1.8, "grad_norm": 1.7982516288757324, "learning_rate": 0.00020698282384515177, "loss": 1.7451, "step": 54134 }, { "epoch": 1.8, "grad_norm": 0.5560740232467651, "learning_rate": 0.00020697288688751095, "loss": 1.7726, "step": 54135 }, { "epoch": 1.8, "grad_norm": 0.5854447484016418, "learning_rate": 0.00020696295004279136, "loss": 1.7215, "step": 54136 }, { "epoch": 1.8, "grad_norm": 0.5487685799598694, "learning_rate": 0.00020695301331100503, "loss": 1.7392, "step": 54137 }, { "epoch": 1.8, "grad_norm": 0.5953066945075989, "learning_rate": 0.00020694307669216414, "loss": 1.8384, "step": 54138 }, { "epoch": 1.8, "grad_norm": 0.5598645806312561, "learning_rate": 0.00020693314018628054, "loss": 1.7535, "step": 54139 }, { "epoch": 1.8, "grad_norm": 0.5765284299850464, "learning_rate": 0.00020692320379336635, "loss": 1.7145, "step": 54140 }, { "epoch": 1.8, "grad_norm": 0.5822774767875671, "learning_rate": 0.00020691326751343388, "loss": 1.7618, "step": 54141 }, { "epoch": 1.8, "grad_norm": 0.5497303009033203, "learning_rate": 0.00020690333134649476, "loss": 1.7575, "step": 54142 }, { "epoch": 1.8, "grad_norm": 0.5399986505508423, "learning_rate": 0.00020689339529256134, "loss": 1.7039, "step": 54143 }, { "epoch": 1.8, "grad_norm": 0.5651021599769592, "learning_rate": 0.00020688345935164571, "loss": 1.6732, "step": 54144 }, { "epoch": 1.8, "grad_norm": 0.5747694969177246, "learning_rate": 0.00020687352352375982, "loss": 1.651, "step": 54145 }, { "epoch": 1.8, "grad_norm": 0.5456211566925049, "learning_rate": 0.00020686358780891567, "loss": 1.7757, "step": 54146 }, { "epoch": 1.8, "grad_norm": 0.5719161033630371, "learning_rate": 0.00020685365220712536, "loss": 1.7242, "step": 54147 }, { "epoch": 1.8, "grad_norm": 0.5739429593086243, "learning_rate": 0.00020684371671840122, "loss": 1.7605, "step": 54148 }, { "epoch": 1.8, "grad_norm": 0.5745218992233276, "learning_rate": 0.00020683378134275484, "loss": 1.733, "step": 54149 }, { "epoch": 1.8, "grad_norm": 0.5692432522773743, "learning_rate": 0.00020682384608019865, "loss": 1.7006, "step": 54150 }, { "epoch": 1.8, "grad_norm": 0.568756639957428, "learning_rate": 0.00020681391093074457, "loss": 1.7424, "step": 54151 }, { "epoch": 1.8, "grad_norm": 0.5713567137718201, "learning_rate": 0.00020680397589440465, "loss": 1.7657, "step": 54152 }, { "epoch": 1.8, "grad_norm": 0.5892868041992188, "learning_rate": 0.00020679404097119098, "loss": 1.7372, "step": 54153 }, { "epoch": 1.8, "grad_norm": 0.5836079716682434, "learning_rate": 0.00020678410616111566, "loss": 1.7495, "step": 54154 }, { "epoch": 1.8, "grad_norm": 0.5581561923027039, "learning_rate": 0.00020677417146419062, "loss": 1.7485, "step": 54155 }, { "epoch": 1.8, "grad_norm": 0.5774204730987549, "learning_rate": 0.00020676423688042797, "loss": 1.7793, "step": 54156 }, { "epoch": 1.8, "grad_norm": 0.5802954435348511, "learning_rate": 0.00020675430240983987, "loss": 1.7774, "step": 54157 }, { "epoch": 1.8, "grad_norm": 0.5816795825958252, "learning_rate": 0.00020674436805243835, "loss": 1.6842, "step": 54158 }, { "epoch": 1.8, "grad_norm": 0.5806137323379517, "learning_rate": 0.00020673443380823535, "loss": 1.7013, "step": 54159 }, { "epoch": 1.8, "grad_norm": 0.5759487152099609, "learning_rate": 0.000206724499677243, "loss": 1.6988, "step": 54160 }, { "epoch": 1.8, "grad_norm": 0.5639389753341675, "learning_rate": 0.00020671456565947346, "loss": 1.6858, "step": 54161 }, { "epoch": 1.8, "grad_norm": 0.5773292183876038, "learning_rate": 0.00020670463175493858, "loss": 1.7281, "step": 54162 }, { "epoch": 1.8, "grad_norm": 0.5660333633422852, "learning_rate": 0.0002066946979636505, "loss": 1.7263, "step": 54163 }, { "epoch": 1.8, "grad_norm": 0.5822086930274963, "learning_rate": 0.00020668476428562153, "loss": 1.7314, "step": 54164 }, { "epoch": 1.8, "grad_norm": 0.5740284323692322, "learning_rate": 0.00020667483072086325, "loss": 1.6756, "step": 54165 }, { "epoch": 1.8, "grad_norm": 0.574486494064331, "learning_rate": 0.0002066648972693881, "loss": 1.7217, "step": 54166 }, { "epoch": 1.8, "grad_norm": 0.5746152997016907, "learning_rate": 0.00020665496393120798, "loss": 1.7148, "step": 54167 }, { "epoch": 1.8, "grad_norm": 0.5750723481178284, "learning_rate": 0.00020664503070633506, "loss": 1.6901, "step": 54168 }, { "epoch": 1.8, "grad_norm": 0.5917890071868896, "learning_rate": 0.00020663509759478123, "loss": 1.7827, "step": 54169 }, { "epoch": 1.8, "grad_norm": 0.5753626823425293, "learning_rate": 0.0002066251645965586, "loss": 1.7857, "step": 54170 }, { "epoch": 1.8, "grad_norm": 0.5755299925804138, "learning_rate": 0.0002066152317116794, "loss": 1.7314, "step": 54171 }, { "epoch": 1.8, "grad_norm": 0.5633630752563477, "learning_rate": 0.0002066052989401554, "loss": 1.7586, "step": 54172 }, { "epoch": 1.8, "grad_norm": 0.5679383277893066, "learning_rate": 0.00020659536628199888, "loss": 1.8105, "step": 54173 }, { "epoch": 1.8, "grad_norm": 0.5676815509796143, "learning_rate": 0.00020658543373722185, "loss": 1.7877, "step": 54174 }, { "epoch": 1.8, "grad_norm": 0.572450578212738, "learning_rate": 0.00020657550130583633, "loss": 1.7502, "step": 54175 }, { "epoch": 1.8, "grad_norm": 0.5928055047988892, "learning_rate": 0.00020656556898785434, "loss": 1.7393, "step": 54176 }, { "epoch": 1.8, "grad_norm": 0.5648109912872314, "learning_rate": 0.00020655563678328794, "loss": 1.7171, "step": 54177 }, { "epoch": 1.8, "grad_norm": 0.5940399169921875, "learning_rate": 0.00020654570469214942, "loss": 1.7151, "step": 54178 }, { "epoch": 1.8, "grad_norm": 0.5727757811546326, "learning_rate": 0.0002065357727144504, "loss": 1.7365, "step": 54179 }, { "epoch": 1.8, "grad_norm": 0.5631778836250305, "learning_rate": 0.00020652584085020331, "loss": 1.7757, "step": 54180 }, { "epoch": 1.8, "grad_norm": 0.5696948766708374, "learning_rate": 0.00020651590909942012, "loss": 1.7714, "step": 54181 }, { "epoch": 1.8, "grad_norm": 0.564382791519165, "learning_rate": 0.00020650597746211277, "loss": 1.7467, "step": 54182 }, { "epoch": 1.8, "grad_norm": 0.5848598480224609, "learning_rate": 0.00020649604593829342, "loss": 1.678, "step": 54183 }, { "epoch": 1.8, "grad_norm": 0.5683344006538391, "learning_rate": 0.00020648611452797413, "loss": 1.7148, "step": 54184 }, { "epoch": 1.8, "grad_norm": 0.5740137696266174, "learning_rate": 0.00020647618323116685, "loss": 1.7763, "step": 54185 }, { "epoch": 1.8, "grad_norm": 0.6228547096252441, "learning_rate": 0.00020646625204788363, "loss": 1.7727, "step": 54186 }, { "epoch": 1.8, "grad_norm": 0.5622134804725647, "learning_rate": 0.0002064563209781367, "loss": 1.7865, "step": 54187 }, { "epoch": 1.8, "grad_norm": 0.5699694752693176, "learning_rate": 0.00020644639002193808, "loss": 1.8031, "step": 54188 }, { "epoch": 1.8, "grad_norm": 0.5581892728805542, "learning_rate": 0.00020643645917929965, "loss": 1.7727, "step": 54189 }, { "epoch": 1.8, "grad_norm": 0.5901704430580139, "learning_rate": 0.00020642652845023358, "loss": 1.7755, "step": 54190 }, { "epoch": 1.8, "grad_norm": 0.5826120376586914, "learning_rate": 0.000206416597834752, "loss": 1.7674, "step": 54191 }, { "epoch": 1.8, "grad_norm": 0.5597466230392456, "learning_rate": 0.00020640666733286682, "loss": 1.7474, "step": 54192 }, { "epoch": 1.8, "grad_norm": 0.5678688883781433, "learning_rate": 0.0002063967369445901, "loss": 1.725, "step": 54193 }, { "epoch": 1.8, "grad_norm": 0.5640252232551575, "learning_rate": 0.00020638680666993414, "loss": 1.6549, "step": 54194 }, { "epoch": 1.8, "grad_norm": 0.5774586200714111, "learning_rate": 0.00020637687650891057, "loss": 1.7842, "step": 54195 }, { "epoch": 1.8, "grad_norm": 0.5771169662475586, "learning_rate": 0.00020636694646153177, "loss": 1.741, "step": 54196 }, { "epoch": 1.8, "grad_norm": 0.5668714046478271, "learning_rate": 0.00020635701652780967, "loss": 1.7794, "step": 54197 }, { "epoch": 1.8, "grad_norm": 0.5860082507133484, "learning_rate": 0.00020634708670775646, "loss": 1.6897, "step": 54198 }, { "epoch": 1.8, "grad_norm": 0.5876260995864868, "learning_rate": 0.00020633715700138402, "loss": 1.7359, "step": 54199 }, { "epoch": 1.8, "grad_norm": 0.5639110803604126, "learning_rate": 0.00020632722740870438, "loss": 1.7215, "step": 54200 }, { "epoch": 1.8, "grad_norm": 0.5537812113761902, "learning_rate": 0.0002063172979297299, "loss": 1.7475, "step": 54201 }, { "epoch": 1.8, "grad_norm": 0.5647103786468506, "learning_rate": 0.00020630736856447224, "loss": 1.7665, "step": 54202 }, { "epoch": 1.8, "grad_norm": 0.547913134098053, "learning_rate": 0.00020629743931294364, "loss": 1.7085, "step": 54203 }, { "epoch": 1.8, "grad_norm": 0.5729541778564453, "learning_rate": 0.00020628751017515626, "loss": 1.6383, "step": 54204 }, { "epoch": 1.8, "grad_norm": 0.5683848857879639, "learning_rate": 0.00020627758115112193, "loss": 1.7396, "step": 54205 }, { "epoch": 1.8, "grad_norm": 0.6084766983985901, "learning_rate": 0.00020626765224085282, "loss": 1.7708, "step": 54206 }, { "epoch": 1.8, "grad_norm": 0.5725796222686768, "learning_rate": 0.00020625772344436092, "loss": 1.7206, "step": 54207 }, { "epoch": 1.8, "grad_norm": 0.5507251024246216, "learning_rate": 0.0002062477947616585, "loss": 1.788, "step": 54208 }, { "epoch": 1.8, "grad_norm": 0.5965286493301392, "learning_rate": 0.0002062378661927573, "loss": 1.7327, "step": 54209 }, { "epoch": 1.8, "grad_norm": 0.5882574915885925, "learning_rate": 0.00020622793773766954, "loss": 1.7709, "step": 54210 }, { "epoch": 1.8, "grad_norm": 0.5738104581832886, "learning_rate": 0.0002062180093964073, "loss": 1.769, "step": 54211 }, { "epoch": 1.8, "grad_norm": 0.5788158774375916, "learning_rate": 0.00020620808116898252, "loss": 1.7837, "step": 54212 }, { "epoch": 1.8, "grad_norm": 1.261976718902588, "learning_rate": 0.00020619815305540732, "loss": 1.733, "step": 54213 }, { "epoch": 1.8, "grad_norm": 0.5686022043228149, "learning_rate": 0.00020618822505569378, "loss": 1.7525, "step": 54214 }, { "epoch": 1.8, "grad_norm": 0.5748375654220581, "learning_rate": 0.00020617829716985391, "loss": 1.7552, "step": 54215 }, { "epoch": 1.8, "grad_norm": 0.5714519023895264, "learning_rate": 0.00020616836939789965, "loss": 1.7056, "step": 54216 }, { "epoch": 1.8, "grad_norm": 0.5713595747947693, "learning_rate": 0.00020615844173984326, "loss": 1.8097, "step": 54217 }, { "epoch": 1.8, "grad_norm": 0.5697513222694397, "learning_rate": 0.00020614851419569673, "loss": 1.7837, "step": 54218 }, { "epoch": 1.8, "grad_norm": 0.6134675145149231, "learning_rate": 0.00020613858676547204, "loss": 1.7418, "step": 54219 }, { "epoch": 1.8, "grad_norm": 0.5460445880889893, "learning_rate": 0.00020612865944918125, "loss": 1.6919, "step": 54220 }, { "epoch": 1.8, "grad_norm": 0.6030225157737732, "learning_rate": 0.00020611873224683647, "loss": 1.7371, "step": 54221 }, { "epoch": 1.8, "grad_norm": 0.5774251222610474, "learning_rate": 0.0002061088051584497, "loss": 1.7594, "step": 54222 }, { "epoch": 1.8, "grad_norm": 0.6134007573127747, "learning_rate": 0.00020609887818403294, "loss": 1.7137, "step": 54223 }, { "epoch": 1.8, "grad_norm": 0.572472333908081, "learning_rate": 0.00020608895132359843, "loss": 1.7836, "step": 54224 }, { "epoch": 1.8, "grad_norm": 0.5860922932624817, "learning_rate": 0.00020607902457715803, "loss": 1.7318, "step": 54225 }, { "epoch": 1.8, "grad_norm": 0.5752051472663879, "learning_rate": 0.00020606909794472386, "loss": 1.7775, "step": 54226 }, { "epoch": 1.8, "grad_norm": 0.5911651849746704, "learning_rate": 0.00020605917142630794, "loss": 1.6886, "step": 54227 }, { "epoch": 1.8, "grad_norm": 0.5777549147605896, "learning_rate": 0.0002060492450219224, "loss": 1.7073, "step": 54228 }, { "epoch": 1.8, "grad_norm": 0.5784733891487122, "learning_rate": 0.00020603931873157923, "loss": 1.7573, "step": 54229 }, { "epoch": 1.8, "grad_norm": 0.5850902199745178, "learning_rate": 0.0002060293925552904, "loss": 1.7404, "step": 54230 }, { "epoch": 1.8, "grad_norm": 0.5661598443984985, "learning_rate": 0.00020601946649306815, "loss": 1.8454, "step": 54231 }, { "epoch": 1.8, "grad_norm": 0.5864656567573547, "learning_rate": 0.00020600954054492436, "loss": 1.7291, "step": 54232 }, { "epoch": 1.8, "grad_norm": 0.5568768978118896, "learning_rate": 0.00020599961471087113, "loss": 1.6916, "step": 54233 }, { "epoch": 1.8, "grad_norm": 0.5739221572875977, "learning_rate": 0.0002059896889909206, "loss": 1.7131, "step": 54234 }, { "epoch": 1.8, "grad_norm": 0.5770162343978882, "learning_rate": 0.00020597976338508465, "loss": 1.7179, "step": 54235 }, { "epoch": 1.8, "grad_norm": 0.5541030764579773, "learning_rate": 0.00020596983789337536, "loss": 1.746, "step": 54236 }, { "epoch": 1.8, "grad_norm": 0.5702577829360962, "learning_rate": 0.00020595991251580491, "loss": 1.8007, "step": 54237 }, { "epoch": 1.8, "grad_norm": 0.5834130644798279, "learning_rate": 0.00020594998725238531, "loss": 1.7497, "step": 54238 }, { "epoch": 1.8, "grad_norm": 0.5814962983131409, "learning_rate": 0.00020594006210312852, "loss": 1.7486, "step": 54239 }, { "epoch": 1.8, "grad_norm": 0.5530962944030762, "learning_rate": 0.00020593013706804664, "loss": 1.7026, "step": 54240 }, { "epoch": 1.8, "grad_norm": 0.5576629042625427, "learning_rate": 0.00020592021214715177, "loss": 1.7945, "step": 54241 }, { "epoch": 1.8, "grad_norm": 0.6041240692138672, "learning_rate": 0.00020591028734045586, "loss": 1.7125, "step": 54242 }, { "epoch": 1.8, "grad_norm": 0.5930864214897156, "learning_rate": 0.00020590036264797086, "loss": 1.8158, "step": 54243 }, { "epoch": 1.8, "grad_norm": 0.6010084748268127, "learning_rate": 0.0002058904380697092, "loss": 1.6658, "step": 54244 }, { "epoch": 1.8, "grad_norm": 0.5513337254524231, "learning_rate": 0.0002058805136056825, "loss": 1.7021, "step": 54245 }, { "epoch": 1.8, "grad_norm": 0.5829140543937683, "learning_rate": 0.00020587058925590303, "loss": 1.6956, "step": 54246 }, { "epoch": 1.8, "grad_norm": 0.5775368213653564, "learning_rate": 0.00020586066502038282, "loss": 1.7252, "step": 54247 }, { "epoch": 1.8, "grad_norm": 0.5703701972961426, "learning_rate": 0.00020585074089913394, "loss": 1.7075, "step": 54248 }, { "epoch": 1.8, "grad_norm": 0.5964449048042297, "learning_rate": 0.00020584081689216832, "loss": 1.7905, "step": 54249 }, { "epoch": 1.8, "grad_norm": 0.5736923813819885, "learning_rate": 0.000205830892999498, "loss": 1.8102, "step": 54250 }, { "epoch": 1.8, "grad_norm": 0.5657874345779419, "learning_rate": 0.00020582096922113532, "loss": 1.7441, "step": 54251 }, { "epoch": 1.8, "grad_norm": 0.5827704071998596, "learning_rate": 0.0002058110455570919, "loss": 1.7582, "step": 54252 }, { "epoch": 1.81, "grad_norm": 0.5564104318618774, "learning_rate": 0.00020580112200738003, "loss": 1.7015, "step": 54253 }, { "epoch": 1.81, "grad_norm": 0.5828976035118103, "learning_rate": 0.00020579119857201176, "loss": 1.6559, "step": 54254 }, { "epoch": 1.81, "grad_norm": 0.5967854857444763, "learning_rate": 0.00020578127525099917, "loss": 1.6864, "step": 54255 }, { "epoch": 1.81, "grad_norm": 0.5803284049034119, "learning_rate": 0.0002057713520443541, "loss": 1.7725, "step": 54256 }, { "epoch": 1.81, "grad_norm": 0.5585548877716064, "learning_rate": 0.00020576142895208877, "loss": 1.7419, "step": 54257 }, { "epoch": 1.81, "grad_norm": 0.5629972815513611, "learning_rate": 0.00020575150597421527, "loss": 1.7463, "step": 54258 }, { "epoch": 1.81, "grad_norm": 0.5659908652305603, "learning_rate": 0.0002057415831107454, "loss": 1.713, "step": 54259 }, { "epoch": 1.81, "grad_norm": 0.5678708553314209, "learning_rate": 0.00020573166036169142, "loss": 1.7206, "step": 54260 }, { "epoch": 1.81, "grad_norm": 0.5640933513641357, "learning_rate": 0.00020572173772706535, "loss": 1.706, "step": 54261 }, { "epoch": 1.81, "grad_norm": 0.5691553950309753, "learning_rate": 0.00020571181520687918, "loss": 1.682, "step": 54262 }, { "epoch": 1.81, "grad_norm": 0.5748832821846008, "learning_rate": 0.00020570189280114497, "loss": 1.7101, "step": 54263 }, { "epoch": 1.81, "grad_norm": 0.5629842877388, "learning_rate": 0.00020569197050987467, "loss": 1.8028, "step": 54264 }, { "epoch": 1.81, "grad_norm": 0.5573194622993469, "learning_rate": 0.00020568204833308067, "loss": 1.7084, "step": 54265 }, { "epoch": 1.81, "grad_norm": 0.5658837556838989, "learning_rate": 0.0002056721262707745, "loss": 1.7617, "step": 54266 }, { "epoch": 1.81, "grad_norm": 0.5816764235496521, "learning_rate": 0.0002056622043229686, "loss": 1.693, "step": 54267 }, { "epoch": 1.81, "grad_norm": 0.5850841403007507, "learning_rate": 0.00020565228248967492, "loss": 1.6852, "step": 54268 }, { "epoch": 1.81, "grad_norm": 0.5670555830001831, "learning_rate": 0.0002056423607709054, "loss": 1.7614, "step": 54269 }, { "epoch": 1.81, "grad_norm": 0.5777071118354797, "learning_rate": 0.00020563243916667216, "loss": 1.7382, "step": 54270 }, { "epoch": 1.81, "grad_norm": 0.5920616388320923, "learning_rate": 0.00020562251767698727, "loss": 1.7108, "step": 54271 }, { "epoch": 1.81, "grad_norm": 0.5530090928077698, "learning_rate": 0.00020561259630186271, "loss": 1.7276, "step": 54272 }, { "epoch": 1.81, "grad_norm": 0.5687338709831238, "learning_rate": 0.0002056026750413105, "loss": 1.7364, "step": 54273 }, { "epoch": 1.81, "grad_norm": 0.5984005928039551, "learning_rate": 0.00020559275389534277, "loss": 1.7466, "step": 54274 }, { "epoch": 1.81, "grad_norm": 0.5673017501831055, "learning_rate": 0.00020558283286397158, "loss": 1.7886, "step": 54275 }, { "epoch": 1.81, "grad_norm": 0.5677646398544312, "learning_rate": 0.00020557291194720889, "loss": 1.6631, "step": 54276 }, { "epoch": 1.81, "grad_norm": 0.5721019506454468, "learning_rate": 0.00020556299114506672, "loss": 1.747, "step": 54277 }, { "epoch": 1.81, "grad_norm": 0.5988075137138367, "learning_rate": 0.00020555307045755727, "loss": 1.7919, "step": 54278 }, { "epoch": 1.81, "grad_norm": 0.5811006426811218, "learning_rate": 0.0002055431498846924, "loss": 1.7368, "step": 54279 }, { "epoch": 1.81, "grad_norm": 0.5853919982910156, "learning_rate": 0.00020553322942648417, "loss": 1.7506, "step": 54280 }, { "epoch": 1.81, "grad_norm": 0.5525928735733032, "learning_rate": 0.00020552330908294486, "loss": 1.735, "step": 54281 }, { "epoch": 1.81, "grad_norm": 0.5793038010597229, "learning_rate": 0.00020551338885408616, "loss": 1.7142, "step": 54282 }, { "epoch": 1.81, "grad_norm": 0.5599401593208313, "learning_rate": 0.0002055034687399203, "loss": 1.7935, "step": 54283 }, { "epoch": 1.81, "grad_norm": 0.5517855286598206, "learning_rate": 0.00020549354874045935, "loss": 1.6886, "step": 54284 }, { "epoch": 1.81, "grad_norm": 0.5683977007865906, "learning_rate": 0.0002054836288557154, "loss": 1.777, "step": 54285 }, { "epoch": 1.81, "grad_norm": 0.5772000551223755, "learning_rate": 0.00020547370908570025, "loss": 1.8069, "step": 54286 }, { "epoch": 1.81, "grad_norm": 0.5570205450057983, "learning_rate": 0.00020546378943042608, "loss": 1.7058, "step": 54287 }, { "epoch": 1.81, "grad_norm": 0.5741298794746399, "learning_rate": 0.00020545386988990514, "loss": 1.7617, "step": 54288 }, { "epoch": 1.81, "grad_norm": 0.5543111562728882, "learning_rate": 0.00020544395046414907, "loss": 1.7805, "step": 54289 }, { "epoch": 1.81, "grad_norm": 0.5881248712539673, "learning_rate": 0.00020543403115317013, "loss": 1.7531, "step": 54290 }, { "epoch": 1.81, "grad_norm": 0.5722891688346863, "learning_rate": 0.00020542411195698045, "loss": 1.7195, "step": 54291 }, { "epoch": 1.81, "grad_norm": 0.5615612268447876, "learning_rate": 0.00020541419287559194, "loss": 1.7022, "step": 54292 }, { "epoch": 1.81, "grad_norm": 0.5652024149894714, "learning_rate": 0.0002054042739090166, "loss": 1.7503, "step": 54293 }, { "epoch": 1.81, "grad_norm": 0.5893179774284363, "learning_rate": 0.00020539435505726648, "loss": 1.7189, "step": 54294 }, { "epoch": 1.81, "grad_norm": 0.5562937259674072, "learning_rate": 0.0002053844363203539, "loss": 1.7089, "step": 54295 }, { "epoch": 1.81, "grad_norm": 0.5824232697486877, "learning_rate": 0.0002053745176982904, "loss": 1.7251, "step": 54296 }, { "epoch": 1.81, "grad_norm": 0.5702622532844543, "learning_rate": 0.00020536459919108846, "loss": 1.6822, "step": 54297 }, { "epoch": 1.81, "grad_norm": 0.582810640335083, "learning_rate": 0.00020535468079875994, "loss": 1.7728, "step": 54298 }, { "epoch": 1.81, "grad_norm": 0.5829558968544006, "learning_rate": 0.00020534476252131686, "loss": 1.6738, "step": 54299 }, { "epoch": 1.81, "grad_norm": 0.5597270727157593, "learning_rate": 0.00020533484435877127, "loss": 1.6859, "step": 54300 }, { "epoch": 1.81, "grad_norm": 0.5532544255256653, "learning_rate": 0.00020532492631113533, "loss": 1.7692, "step": 54301 }, { "epoch": 1.81, "grad_norm": 0.5798559784889221, "learning_rate": 0.00020531500837842092, "loss": 1.6737, "step": 54302 }, { "epoch": 1.81, "grad_norm": 0.5631669759750366, "learning_rate": 0.00020530509056064004, "loss": 1.7494, "step": 54303 }, { "epoch": 1.81, "grad_norm": 0.5714370012283325, "learning_rate": 0.00020529517285780493, "loss": 1.6894, "step": 54304 }, { "epoch": 1.81, "grad_norm": 0.5608581304550171, "learning_rate": 0.00020528525526992752, "loss": 1.7098, "step": 54305 }, { "epoch": 1.81, "grad_norm": 0.5821042656898499, "learning_rate": 0.0002052753377970199, "loss": 1.7803, "step": 54306 }, { "epoch": 1.81, "grad_norm": 0.5804053544998169, "learning_rate": 0.00020526542043909393, "loss": 1.7796, "step": 54307 }, { "epoch": 1.81, "grad_norm": 0.5822831392288208, "learning_rate": 0.00020525550319616193, "loss": 1.7877, "step": 54308 }, { "epoch": 1.81, "grad_norm": 0.5633513331413269, "learning_rate": 0.00020524558606823572, "loss": 1.6961, "step": 54309 }, { "epoch": 1.81, "grad_norm": 0.5663213133811951, "learning_rate": 0.00020523566905532734, "loss": 1.7996, "step": 54310 }, { "epoch": 1.81, "grad_norm": 0.5580500364303589, "learning_rate": 0.00020522575215744905, "loss": 1.7544, "step": 54311 }, { "epoch": 1.81, "grad_norm": 0.5635181069374084, "learning_rate": 0.00020521583537461258, "loss": 1.7129, "step": 54312 }, { "epoch": 1.81, "grad_norm": 0.5661152005195618, "learning_rate": 0.00020520591870683017, "loss": 1.7114, "step": 54313 }, { "epoch": 1.81, "grad_norm": 0.5576295852661133, "learning_rate": 0.00020519600215411382, "loss": 1.7654, "step": 54314 }, { "epoch": 1.81, "grad_norm": 0.5735636949539185, "learning_rate": 0.00020518608571647558, "loss": 1.7071, "step": 54315 }, { "epoch": 1.81, "grad_norm": 0.5640453100204468, "learning_rate": 0.00020517616939392744, "loss": 1.7492, "step": 54316 }, { "epoch": 1.81, "grad_norm": 0.5669143795967102, "learning_rate": 0.0002051662531864814, "loss": 1.7805, "step": 54317 }, { "epoch": 1.81, "grad_norm": 0.5879071950912476, "learning_rate": 0.00020515633709414973, "loss": 1.7236, "step": 54318 }, { "epoch": 1.81, "grad_norm": 0.5809348821640015, "learning_rate": 0.0002051464211169441, "loss": 1.7289, "step": 54319 }, { "epoch": 1.81, "grad_norm": 0.5645474195480347, "learning_rate": 0.0002051365052548768, "loss": 1.7739, "step": 54320 }, { "epoch": 1.81, "grad_norm": 0.5535746216773987, "learning_rate": 0.00020512658950795986, "loss": 1.7084, "step": 54321 }, { "epoch": 1.81, "grad_norm": 0.5747655034065247, "learning_rate": 0.00020511667387620522, "loss": 1.785, "step": 54322 }, { "epoch": 1.81, "grad_norm": 0.5723010897636414, "learning_rate": 0.00020510675835962495, "loss": 1.7374, "step": 54323 }, { "epoch": 1.81, "grad_norm": 0.5597672462463379, "learning_rate": 0.00020509684295823104, "loss": 1.708, "step": 54324 }, { "epoch": 1.81, "grad_norm": 0.5589523911476135, "learning_rate": 0.00020508692767203573, "loss": 1.7966, "step": 54325 }, { "epoch": 1.81, "grad_norm": 0.5667171478271484, "learning_rate": 0.00020507701250105075, "loss": 1.7356, "step": 54326 }, { "epoch": 1.81, "grad_norm": 0.5862335562705994, "learning_rate": 0.00020506709744528832, "loss": 1.7233, "step": 54327 }, { "epoch": 1.81, "grad_norm": 0.5779036283493042, "learning_rate": 0.00020505718250476054, "loss": 1.7684, "step": 54328 }, { "epoch": 1.81, "grad_norm": 0.5792415738105774, "learning_rate": 0.00020504726767947928, "loss": 1.8729, "step": 54329 }, { "epoch": 1.81, "grad_norm": 0.5610368251800537, "learning_rate": 0.00020503735296945665, "loss": 1.7729, "step": 54330 }, { "epoch": 1.81, "grad_norm": 0.5718763470649719, "learning_rate": 0.00020502743837470477, "loss": 1.7535, "step": 54331 }, { "epoch": 1.81, "grad_norm": 0.5823989510536194, "learning_rate": 0.00020501752389523548, "loss": 1.7397, "step": 54332 }, { "epoch": 1.81, "grad_norm": 0.5781251192092896, "learning_rate": 0.00020500760953106085, "loss": 1.6688, "step": 54333 }, { "epoch": 1.81, "grad_norm": 0.5650449991226196, "learning_rate": 0.0002049976952821931, "loss": 1.7833, "step": 54334 }, { "epoch": 1.81, "grad_norm": 0.5527284145355225, "learning_rate": 0.00020498778114864418, "loss": 1.783, "step": 54335 }, { "epoch": 1.81, "grad_norm": 0.5433316826820374, "learning_rate": 0.00020497786713042604, "loss": 1.7603, "step": 54336 }, { "epoch": 1.81, "grad_norm": 1.0431336164474487, "learning_rate": 0.00020496795322755076, "loss": 1.7071, "step": 54337 }, { "epoch": 1.81, "grad_norm": 0.5772705078125, "learning_rate": 0.00020495803944003046, "loss": 1.736, "step": 54338 }, { "epoch": 1.81, "grad_norm": 2.5745797157287598, "learning_rate": 0.00020494812576787703, "loss": 1.8541, "step": 54339 }, { "epoch": 1.81, "grad_norm": 0.5603834986686707, "learning_rate": 0.00020493821221110247, "loss": 1.7054, "step": 54340 }, { "epoch": 1.81, "grad_norm": 0.5616782307624817, "learning_rate": 0.0002049282987697191, "loss": 1.7244, "step": 54341 }, { "epoch": 1.81, "grad_norm": 0.5801649689674377, "learning_rate": 0.00020491838544373865, "loss": 1.7864, "step": 54342 }, { "epoch": 1.81, "grad_norm": 0.5467206239700317, "learning_rate": 0.00020490847223317328, "loss": 1.7563, "step": 54343 }, { "epoch": 1.81, "grad_norm": 0.5602138638496399, "learning_rate": 0.00020489855913803507, "loss": 1.776, "step": 54344 }, { "epoch": 1.81, "grad_norm": 0.5821593999862671, "learning_rate": 0.00020488864615833603, "loss": 1.8012, "step": 54345 }, { "epoch": 1.81, "grad_norm": 0.5582925081253052, "learning_rate": 0.00020487873329408805, "loss": 1.7712, "step": 54346 }, { "epoch": 1.81, "grad_norm": 0.5788779258728027, "learning_rate": 0.00020486882054530326, "loss": 1.7387, "step": 54347 }, { "epoch": 1.81, "grad_norm": 0.5667386651039124, "learning_rate": 0.0002048589079119938, "loss": 1.7494, "step": 54348 }, { "epoch": 1.81, "grad_norm": 0.5565248131752014, "learning_rate": 0.00020484899539417154, "loss": 1.8081, "step": 54349 }, { "epoch": 1.81, "grad_norm": 0.6011777520179749, "learning_rate": 0.00020483908299184862, "loss": 1.7298, "step": 54350 }, { "epoch": 1.81, "grad_norm": 0.5646715760231018, "learning_rate": 0.00020482917070503711, "loss": 1.7403, "step": 54351 }, { "epoch": 1.81, "grad_norm": 0.582118570804596, "learning_rate": 0.00020481925853374884, "loss": 1.707, "step": 54352 }, { "epoch": 1.81, "grad_norm": 0.5667492151260376, "learning_rate": 0.00020480934647799592, "loss": 1.7874, "step": 54353 }, { "epoch": 1.81, "grad_norm": 0.5574175715446472, "learning_rate": 0.0002047994345377905, "loss": 1.748, "step": 54354 }, { "epoch": 1.81, "grad_norm": 0.5650337338447571, "learning_rate": 0.0002047895227131446, "loss": 1.7198, "step": 54355 }, { "epoch": 1.81, "grad_norm": 0.5860371589660645, "learning_rate": 0.00020477961100407013, "loss": 1.7376, "step": 54356 }, { "epoch": 1.81, "grad_norm": 0.5738615989685059, "learning_rate": 0.0002047696994105792, "loss": 1.7141, "step": 54357 }, { "epoch": 1.81, "grad_norm": 0.5910956859588623, "learning_rate": 0.00020475978793268383, "loss": 1.7133, "step": 54358 }, { "epoch": 1.81, "grad_norm": 0.5585570931434631, "learning_rate": 0.00020474987657039605, "loss": 1.7057, "step": 54359 }, { "epoch": 1.81, "grad_norm": 0.5777652263641357, "learning_rate": 0.0002047399653237278, "loss": 1.7219, "step": 54360 }, { "epoch": 1.81, "grad_norm": 0.6049957275390625, "learning_rate": 0.00020473005419269135, "loss": 1.678, "step": 54361 }, { "epoch": 1.81, "grad_norm": 0.5911795496940613, "learning_rate": 0.00020472014317729845, "loss": 1.827, "step": 54362 }, { "epoch": 1.81, "grad_norm": 0.5952838659286499, "learning_rate": 0.00020471023227756126, "loss": 1.6611, "step": 54363 }, { "epoch": 1.81, "grad_norm": 0.5717069506645203, "learning_rate": 0.00020470032149349184, "loss": 1.6998, "step": 54364 }, { "epoch": 1.81, "grad_norm": 0.5889241099357605, "learning_rate": 0.00020469041082510223, "loss": 1.8278, "step": 54365 }, { "epoch": 1.81, "grad_norm": 0.5747382044792175, "learning_rate": 0.0002046805002724044, "loss": 1.7401, "step": 54366 }, { "epoch": 1.81, "grad_norm": 0.5898442268371582, "learning_rate": 0.00020467058983541028, "loss": 1.6924, "step": 54367 }, { "epoch": 1.81, "grad_norm": 0.5795310735702515, "learning_rate": 0.00020466067951413225, "loss": 1.7782, "step": 54368 }, { "epoch": 1.81, "grad_norm": 0.5898537039756775, "learning_rate": 0.00020465076930858186, "loss": 1.6971, "step": 54369 }, { "epoch": 1.81, "grad_norm": 0.5706188678741455, "learning_rate": 0.00020464085921877148, "loss": 1.7435, "step": 54370 }, { "epoch": 1.81, "grad_norm": 0.5856810808181763, "learning_rate": 0.0002046309492447131, "loss": 1.7118, "step": 54371 }, { "epoch": 1.81, "grad_norm": 0.5834046602249146, "learning_rate": 0.0002046210393864187, "loss": 1.7651, "step": 54372 }, { "epoch": 1.81, "grad_norm": 0.5704922080039978, "learning_rate": 0.00020461112964390024, "loss": 1.8335, "step": 54373 }, { "epoch": 1.81, "grad_norm": 0.5569743514060974, "learning_rate": 0.0002046012200171698, "loss": 1.7268, "step": 54374 }, { "epoch": 1.81, "grad_norm": 0.5668307542800903, "learning_rate": 0.00020459131050623957, "loss": 1.7357, "step": 54375 }, { "epoch": 1.81, "grad_norm": 0.5696664452552795, "learning_rate": 0.00020458140111112122, "loss": 1.7134, "step": 54376 }, { "epoch": 1.81, "grad_norm": 0.5590829849243164, "learning_rate": 0.0002045714918318271, "loss": 1.7043, "step": 54377 }, { "epoch": 1.81, "grad_norm": 0.5903774499893188, "learning_rate": 0.0002045615826683692, "loss": 1.7137, "step": 54378 }, { "epoch": 1.81, "grad_norm": 0.5701920390129089, "learning_rate": 0.00020455167362075935, "loss": 1.7249, "step": 54379 }, { "epoch": 1.81, "grad_norm": 0.5711303353309631, "learning_rate": 0.00020454176468900976, "loss": 1.7185, "step": 54380 }, { "epoch": 1.81, "grad_norm": 0.5896598100662231, "learning_rate": 0.00020453185587313233, "loss": 1.7786, "step": 54381 }, { "epoch": 1.81, "grad_norm": 0.5630417466163635, "learning_rate": 0.00020452194717313933, "loss": 1.754, "step": 54382 }, { "epoch": 1.81, "grad_norm": 0.5831962823867798, "learning_rate": 0.00020451203858904245, "loss": 1.722, "step": 54383 }, { "epoch": 1.81, "grad_norm": 0.5663889050483704, "learning_rate": 0.00020450213012085394, "loss": 1.6918, "step": 54384 }, { "epoch": 1.81, "grad_norm": 0.5937162041664124, "learning_rate": 0.00020449222176858583, "loss": 1.8593, "step": 54385 }, { "epoch": 1.81, "grad_norm": 0.56247878074646, "learning_rate": 0.00020448231353225002, "loss": 1.7344, "step": 54386 }, { "epoch": 1.81, "grad_norm": 0.9367072582244873, "learning_rate": 0.0002044724054118586, "loss": 1.736, "step": 54387 }, { "epoch": 1.81, "grad_norm": 0.5622742772102356, "learning_rate": 0.00020446249740742372, "loss": 1.7842, "step": 54388 }, { "epoch": 1.81, "grad_norm": 0.5630329251289368, "learning_rate": 0.00020445258951895718, "loss": 1.7602, "step": 54389 }, { "epoch": 1.81, "grad_norm": 0.5791398286819458, "learning_rate": 0.00020444268174647106, "loss": 1.7612, "step": 54390 }, { "epoch": 1.81, "grad_norm": 0.5857625603675842, "learning_rate": 0.00020443277408997755, "loss": 1.7528, "step": 54391 }, { "epoch": 1.81, "grad_norm": 0.5870813131332397, "learning_rate": 0.0002044228665494886, "loss": 1.6741, "step": 54392 }, { "epoch": 1.81, "grad_norm": 0.5724174380302429, "learning_rate": 0.00020441295912501615, "loss": 1.7264, "step": 54393 }, { "epoch": 1.81, "grad_norm": 0.5882401466369629, "learning_rate": 0.00020440305181657227, "loss": 1.6407, "step": 54394 }, { "epoch": 1.81, "grad_norm": 0.558671236038208, "learning_rate": 0.0002043931446241691, "loss": 1.6958, "step": 54395 }, { "epoch": 1.81, "grad_norm": 0.5492973327636719, "learning_rate": 0.00020438323754781845, "loss": 1.7304, "step": 54396 }, { "epoch": 1.81, "grad_norm": 0.5545548796653748, "learning_rate": 0.00020437333058753244, "loss": 1.7121, "step": 54397 }, { "epoch": 1.81, "grad_norm": 0.5453513264656067, "learning_rate": 0.00020436342374332328, "loss": 1.6615, "step": 54398 }, { "epoch": 1.81, "grad_norm": 0.5837148427963257, "learning_rate": 0.00020435351701520265, "loss": 1.8298, "step": 54399 }, { "epoch": 1.81, "grad_norm": 0.5574028491973877, "learning_rate": 0.0002043436104031828, "loss": 1.6795, "step": 54400 }, { "epoch": 1.81, "grad_norm": 0.5674915909767151, "learning_rate": 0.00020433370390727574, "loss": 1.6673, "step": 54401 }, { "epoch": 1.81, "grad_norm": 0.5697156190872192, "learning_rate": 0.00020432379752749352, "loss": 1.7278, "step": 54402 }, { "epoch": 1.81, "grad_norm": 0.5819458961486816, "learning_rate": 0.00020431389126384806, "loss": 1.7598, "step": 54403 }, { "epoch": 1.81, "grad_norm": 0.5666975378990173, "learning_rate": 0.00020430398511635137, "loss": 1.7606, "step": 54404 }, { "epoch": 1.81, "grad_norm": 0.578924298286438, "learning_rate": 0.00020429407908501576, "loss": 1.7595, "step": 54405 }, { "epoch": 1.81, "grad_norm": 0.5584675073623657, "learning_rate": 0.0002042841731698528, "loss": 1.7845, "step": 54406 }, { "epoch": 1.81, "grad_norm": 0.5735871195793152, "learning_rate": 0.00020427426737087487, "loss": 1.7283, "step": 54407 }, { "epoch": 1.81, "grad_norm": 0.5698040723800659, "learning_rate": 0.0002042643616880939, "loss": 1.7835, "step": 54408 }, { "epoch": 1.81, "grad_norm": 0.5584329962730408, "learning_rate": 0.00020425445612152178, "loss": 1.7756, "step": 54409 }, { "epoch": 1.81, "grad_norm": 0.5728612542152405, "learning_rate": 0.00020424455067117068, "loss": 1.7133, "step": 54410 }, { "epoch": 1.81, "grad_norm": 0.5600970387458801, "learning_rate": 0.00020423464533705256, "loss": 1.7479, "step": 54411 }, { "epoch": 1.81, "grad_norm": 0.5667964220046997, "learning_rate": 0.00020422474011917965, "loss": 1.7204, "step": 54412 }, { "epoch": 1.81, "grad_norm": 0.5694096088409424, "learning_rate": 0.0002042148350175636, "loss": 1.7445, "step": 54413 }, { "epoch": 1.81, "grad_norm": 0.553609311580658, "learning_rate": 0.00020420493003221667, "loss": 1.7608, "step": 54414 }, { "epoch": 1.81, "grad_norm": 0.5502497553825378, "learning_rate": 0.00020419502516315087, "loss": 1.8031, "step": 54415 }, { "epoch": 1.81, "grad_norm": 0.5552939176559448, "learning_rate": 0.00020418512041037819, "loss": 1.8268, "step": 54416 }, { "epoch": 1.81, "grad_norm": 0.5792640447616577, "learning_rate": 0.00020417521577391064, "loss": 1.7858, "step": 54417 }, { "epoch": 1.81, "grad_norm": 0.5515738129615784, "learning_rate": 0.0002041653112537603, "loss": 1.783, "step": 54418 }, { "epoch": 1.81, "grad_norm": 0.5806017518043518, "learning_rate": 0.00020415540684993913, "loss": 1.6858, "step": 54419 }, { "epoch": 1.81, "grad_norm": 0.560436487197876, "learning_rate": 0.0002041455025624591, "loss": 1.7096, "step": 54420 }, { "epoch": 1.81, "grad_norm": 0.5697842240333557, "learning_rate": 0.00020413559839133234, "loss": 1.799, "step": 54421 }, { "epoch": 1.81, "grad_norm": 1.0259977579116821, "learning_rate": 0.00020412569433657092, "loss": 1.7238, "step": 54422 }, { "epoch": 1.81, "grad_norm": 0.5525538325309753, "learning_rate": 0.00020411579039818673, "loss": 1.6673, "step": 54423 }, { "epoch": 1.81, "grad_norm": 0.5633395910263062, "learning_rate": 0.0002041058865761918, "loss": 1.7422, "step": 54424 }, { "epoch": 1.81, "grad_norm": 0.5784299969673157, "learning_rate": 0.0002040959828705983, "loss": 1.7332, "step": 54425 }, { "epoch": 1.81, "grad_norm": 0.5559033751487732, "learning_rate": 0.0002040860792814181, "loss": 1.6867, "step": 54426 }, { "epoch": 1.81, "grad_norm": 0.5521710515022278, "learning_rate": 0.00020407617580866315, "loss": 1.7201, "step": 54427 }, { "epoch": 1.81, "grad_norm": 0.5554550886154175, "learning_rate": 0.00020406627245234582, "loss": 1.8275, "step": 54428 }, { "epoch": 1.81, "grad_norm": 0.5627405047416687, "learning_rate": 0.00020405636921247766, "loss": 1.7447, "step": 54429 }, { "epoch": 1.81, "grad_norm": 0.5779233574867249, "learning_rate": 0.00020404646608907103, "loss": 1.6743, "step": 54430 }, { "epoch": 1.81, "grad_norm": 0.5773008465766907, "learning_rate": 0.00020403656308213785, "loss": 1.7984, "step": 54431 }, { "epoch": 1.81, "grad_norm": 0.5939455032348633, "learning_rate": 0.0002040266601916902, "loss": 1.7484, "step": 54432 }, { "epoch": 1.81, "grad_norm": 0.5594385266304016, "learning_rate": 0.00020401675741774, "loss": 1.7147, "step": 54433 }, { "epoch": 1.81, "grad_norm": 0.5702671408653259, "learning_rate": 0.0002040068547602992, "loss": 1.7347, "step": 54434 }, { "epoch": 1.81, "grad_norm": 0.5823060274124146, "learning_rate": 0.00020399695221938019, "loss": 1.6669, "step": 54435 }, { "epoch": 1.81, "grad_norm": 0.5789060592651367, "learning_rate": 0.0002039870497949945, "loss": 1.7448, "step": 54436 }, { "epoch": 1.81, "grad_norm": 0.5766597390174866, "learning_rate": 0.00020397714748715448, "loss": 1.6839, "step": 54437 }, { "epoch": 1.81, "grad_norm": 0.5866168141365051, "learning_rate": 0.0002039672452958721, "loss": 1.7213, "step": 54438 }, { "epoch": 1.81, "grad_norm": 0.5440179109573364, "learning_rate": 0.00020395734322115928, "loss": 1.6988, "step": 54439 }, { "epoch": 1.81, "grad_norm": 0.5859760046005249, "learning_rate": 0.00020394744126302807, "loss": 1.7246, "step": 54440 }, { "epoch": 1.81, "grad_norm": 0.5920961499214172, "learning_rate": 0.0002039375394214905, "loss": 1.7492, "step": 54441 }, { "epoch": 1.81, "grad_norm": 0.574582040309906, "learning_rate": 0.0002039276376965587, "loss": 1.7149, "step": 54442 }, { "epoch": 1.81, "grad_norm": 0.586707592010498, "learning_rate": 0.00020391773608824452, "loss": 1.6905, "step": 54443 }, { "epoch": 1.81, "grad_norm": 0.5728742480278015, "learning_rate": 0.00020390783459656002, "loss": 1.6489, "step": 54444 }, { "epoch": 1.81, "grad_norm": 0.5630233287811279, "learning_rate": 0.00020389793322151735, "loss": 1.7042, "step": 54445 }, { "epoch": 1.81, "grad_norm": 0.5877054929733276, "learning_rate": 0.00020388803196312838, "loss": 1.7551, "step": 54446 }, { "epoch": 1.81, "grad_norm": 0.5886722803115845, "learning_rate": 0.00020387813082140514, "loss": 1.7469, "step": 54447 }, { "epoch": 1.81, "grad_norm": 0.5718152523040771, "learning_rate": 0.00020386822979635976, "loss": 1.6905, "step": 54448 }, { "epoch": 1.81, "grad_norm": 0.5604156255722046, "learning_rate": 0.00020385832888800416, "loss": 1.7792, "step": 54449 }, { "epoch": 1.81, "grad_norm": 0.5735320448875427, "learning_rate": 0.0002038484280963503, "loss": 1.8193, "step": 54450 }, { "epoch": 1.81, "grad_norm": 0.5547882318496704, "learning_rate": 0.00020383852742141033, "loss": 1.8113, "step": 54451 }, { "epoch": 1.81, "grad_norm": 0.5516731142997742, "learning_rate": 0.0002038286268631963, "loss": 1.7297, "step": 54452 }, { "epoch": 1.81, "grad_norm": 0.5639392137527466, "learning_rate": 0.0002038187264217201, "loss": 1.6781, "step": 54453 }, { "epoch": 1.81, "grad_norm": 0.5809480547904968, "learning_rate": 0.00020380882609699375, "loss": 1.7358, "step": 54454 }, { "epoch": 1.81, "grad_norm": 0.5902621150016785, "learning_rate": 0.0002037989258890294, "loss": 1.7595, "step": 54455 }, { "epoch": 1.81, "grad_norm": 0.5787453055381775, "learning_rate": 0.00020378902579783892, "loss": 1.7537, "step": 54456 }, { "epoch": 1.81, "grad_norm": 0.6012226343154907, "learning_rate": 0.0002037791258234343, "loss": 1.7019, "step": 54457 }, { "epoch": 1.81, "grad_norm": 0.5572245717048645, "learning_rate": 0.00020376922596582778, "loss": 1.6546, "step": 54458 }, { "epoch": 1.81, "grad_norm": 0.5600500702857971, "learning_rate": 0.00020375932622503117, "loss": 1.7754, "step": 54459 }, { "epoch": 1.81, "grad_norm": 0.5979153513908386, "learning_rate": 0.0002037494266010566, "loss": 1.7978, "step": 54460 }, { "epoch": 1.81, "grad_norm": 0.5601426362991333, "learning_rate": 0.00020373952709391597, "loss": 1.7123, "step": 54461 }, { "epoch": 1.81, "grad_norm": 0.576759397983551, "learning_rate": 0.0002037296277036215, "loss": 1.7711, "step": 54462 }, { "epoch": 1.81, "grad_norm": 0.5641526579856873, "learning_rate": 0.00020371972843018492, "loss": 1.7073, "step": 54463 }, { "epoch": 1.81, "grad_norm": 0.5730283856391907, "learning_rate": 0.0002037098292736185, "loss": 1.6805, "step": 54464 }, { "epoch": 1.81, "grad_norm": 0.5750975012779236, "learning_rate": 0.00020369993023393418, "loss": 1.6805, "step": 54465 }, { "epoch": 1.81, "grad_norm": 0.5572534799575806, "learning_rate": 0.00020369003131114391, "loss": 1.8206, "step": 54466 }, { "epoch": 1.81, "grad_norm": 0.5723760724067688, "learning_rate": 0.00020368013250525975, "loss": 1.7156, "step": 54467 }, { "epoch": 1.81, "grad_norm": 0.5646761059761047, "learning_rate": 0.00020367023381629378, "loss": 1.675, "step": 54468 }, { "epoch": 1.81, "grad_norm": 0.5597261786460876, "learning_rate": 0.00020366033524425788, "loss": 1.7657, "step": 54469 }, { "epoch": 1.81, "grad_norm": 0.5856857895851135, "learning_rate": 0.0002036504367891641, "loss": 1.8348, "step": 54470 }, { "epoch": 1.81, "grad_norm": 0.6053418517112732, "learning_rate": 0.00020364053845102454, "loss": 1.7323, "step": 54471 }, { "epoch": 1.81, "grad_norm": 0.5938437581062317, "learning_rate": 0.00020363064022985123, "loss": 1.7291, "step": 54472 }, { "epoch": 1.81, "grad_norm": 0.5564353466033936, "learning_rate": 0.00020362074212565607, "loss": 1.6434, "step": 54473 }, { "epoch": 1.81, "grad_norm": 0.5620351433753967, "learning_rate": 0.00020361084413845113, "loss": 1.6818, "step": 54474 }, { "epoch": 1.81, "grad_norm": 0.5766134262084961, "learning_rate": 0.00020360094626824845, "loss": 1.7278, "step": 54475 }, { "epoch": 1.81, "grad_norm": 0.5604318380355835, "learning_rate": 0.00020359104851505998, "loss": 1.7513, "step": 54476 }, { "epoch": 1.81, "grad_norm": 0.5582157373428345, "learning_rate": 0.00020358115087889772, "loss": 1.7665, "step": 54477 }, { "epoch": 1.81, "grad_norm": 0.5513079166412354, "learning_rate": 0.0002035712533597739, "loss": 1.6862, "step": 54478 }, { "epoch": 1.81, "grad_norm": 0.5720890760421753, "learning_rate": 0.00020356135595770024, "loss": 1.7741, "step": 54479 }, { "epoch": 1.81, "grad_norm": 0.5906085968017578, "learning_rate": 0.00020355145867268892, "loss": 1.7925, "step": 54480 }, { "epoch": 1.81, "grad_norm": 0.5578250288963318, "learning_rate": 0.00020354156150475188, "loss": 1.7301, "step": 54481 }, { "epoch": 1.81, "grad_norm": 0.5727977752685547, "learning_rate": 0.00020353166445390126, "loss": 1.7157, "step": 54482 }, { "epoch": 1.81, "grad_norm": 0.5714685916900635, "learning_rate": 0.00020352176752014894, "loss": 1.7634, "step": 54483 }, { "epoch": 1.81, "grad_norm": 0.5565898418426514, "learning_rate": 0.00020351187070350685, "loss": 1.7721, "step": 54484 }, { "epoch": 1.81, "grad_norm": 0.5725567936897278, "learning_rate": 0.00020350197400398738, "loss": 1.7367, "step": 54485 }, { "epoch": 1.81, "grad_norm": 0.5979335904121399, "learning_rate": 0.00020349207742160208, "loss": 1.7841, "step": 54486 }, { "epoch": 1.81, "grad_norm": 0.5845392942428589, "learning_rate": 0.00020348218095636326, "loss": 1.6841, "step": 54487 }, { "epoch": 1.81, "grad_norm": 0.5798557996749878, "learning_rate": 0.0002034722846082829, "loss": 1.7541, "step": 54488 }, { "epoch": 1.81, "grad_norm": 0.6002058982849121, "learning_rate": 0.00020346238837737294, "loss": 1.7266, "step": 54489 }, { "epoch": 1.81, "grad_norm": 0.5723272562026978, "learning_rate": 0.0002034524922636454, "loss": 1.7455, "step": 54490 }, { "epoch": 1.81, "grad_norm": 0.5605601668357849, "learning_rate": 0.00020344259626711223, "loss": 1.6804, "step": 54491 }, { "epoch": 1.81, "grad_norm": 0.5607483983039856, "learning_rate": 0.00020343270038778572, "loss": 1.8115, "step": 54492 }, { "epoch": 1.81, "grad_norm": 0.5755439400672913, "learning_rate": 0.00020342280462567745, "loss": 1.7369, "step": 54493 }, { "epoch": 1.81, "grad_norm": 0.5615248680114746, "learning_rate": 0.0002034129089807998, "loss": 1.7536, "step": 54494 }, { "epoch": 1.81, "grad_norm": 0.5555457472801208, "learning_rate": 0.00020340301345316464, "loss": 1.751, "step": 54495 }, { "epoch": 1.81, "grad_norm": 0.6007168292999268, "learning_rate": 0.000203393118042784, "loss": 1.7769, "step": 54496 }, { "epoch": 1.81, "grad_norm": 0.57893306016922, "learning_rate": 0.00020338322274966982, "loss": 1.7513, "step": 54497 }, { "epoch": 1.81, "grad_norm": 0.569694995880127, "learning_rate": 0.0002033733275738341, "loss": 1.7154, "step": 54498 }, { "epoch": 1.81, "grad_norm": 0.6783681511878967, "learning_rate": 0.00020336343251528918, "loss": 1.7627, "step": 54499 }, { "epoch": 1.81, "grad_norm": 0.5817838311195374, "learning_rate": 0.0002033535375740466, "loss": 1.7908, "step": 54500 }, { "epoch": 1.81, "grad_norm": 0.5732285976409912, "learning_rate": 0.00020334364275011863, "loss": 1.7191, "step": 54501 }, { "epoch": 1.81, "grad_norm": 0.5958322882652283, "learning_rate": 0.00020333374804351728, "loss": 1.7505, "step": 54502 }, { "epoch": 1.81, "grad_norm": 0.5950497984886169, "learning_rate": 0.00020332385345425447, "loss": 1.8122, "step": 54503 }, { "epoch": 1.81, "grad_norm": 0.5809577107429504, "learning_rate": 0.00020331395898234226, "loss": 1.6698, "step": 54504 }, { "epoch": 1.81, "grad_norm": 0.5548475384712219, "learning_rate": 0.00020330406462779275, "loss": 1.7829, "step": 54505 }, { "epoch": 1.81, "grad_norm": 0.5720657110214233, "learning_rate": 0.00020329417039061776, "loss": 1.749, "step": 54506 }, { "epoch": 1.81, "grad_norm": 0.575617253780365, "learning_rate": 0.00020328427627082935, "loss": 1.7104, "step": 54507 }, { "epoch": 1.81, "grad_norm": 0.5685485601425171, "learning_rate": 0.00020327438226843965, "loss": 1.7369, "step": 54508 }, { "epoch": 1.81, "grad_norm": 0.6137767434120178, "learning_rate": 0.00020326448838346064, "loss": 1.7693, "step": 54509 }, { "epoch": 1.81, "grad_norm": 0.5573219060897827, "learning_rate": 0.00020325459461590425, "loss": 1.6251, "step": 54510 }, { "epoch": 1.81, "grad_norm": 0.5757237076759338, "learning_rate": 0.00020324470096578248, "loss": 1.724, "step": 54511 }, { "epoch": 1.81, "grad_norm": 0.5633412003517151, "learning_rate": 0.00020323480743310752, "loss": 1.7706, "step": 54512 }, { "epoch": 1.81, "grad_norm": 0.5555997490882874, "learning_rate": 0.0002032249140178911, "loss": 1.7092, "step": 54513 }, { "epoch": 1.81, "grad_norm": 0.586393415927887, "learning_rate": 0.00020321502072014535, "loss": 1.8216, "step": 54514 }, { "epoch": 1.81, "grad_norm": 0.5872946977615356, "learning_rate": 0.00020320512753988255, "loss": 1.727, "step": 54515 }, { "epoch": 1.81, "grad_norm": 0.5711510181427002, "learning_rate": 0.00020319523447711422, "loss": 1.7995, "step": 54516 }, { "epoch": 1.81, "grad_norm": 0.5927251577377319, "learning_rate": 0.00020318534153185264, "loss": 1.6873, "step": 54517 }, { "epoch": 1.81, "grad_norm": 0.5687395334243774, "learning_rate": 0.00020317544870410983, "loss": 1.6971, "step": 54518 }, { "epoch": 1.81, "grad_norm": 0.5811471343040466, "learning_rate": 0.0002031655559938978, "loss": 1.7429, "step": 54519 }, { "epoch": 1.81, "grad_norm": 0.5813047885894775, "learning_rate": 0.0002031556634012285, "loss": 1.7322, "step": 54520 }, { "epoch": 1.81, "grad_norm": 0.5776674747467041, "learning_rate": 0.00020314577092611386, "loss": 1.7889, "step": 54521 }, { "epoch": 1.81, "grad_norm": 0.5752160549163818, "learning_rate": 0.0002031358785685662, "loss": 1.6686, "step": 54522 }, { "epoch": 1.81, "grad_norm": 0.5948396325111389, "learning_rate": 0.0002031259863285971, "loss": 1.7675, "step": 54523 }, { "epoch": 1.81, "grad_norm": 0.5684506893157959, "learning_rate": 0.00020311609420621887, "loss": 1.7115, "step": 54524 }, { "epoch": 1.81, "grad_norm": 0.567084014415741, "learning_rate": 0.00020310620220144343, "loss": 1.7139, "step": 54525 }, { "epoch": 1.81, "grad_norm": 0.578579306602478, "learning_rate": 0.00020309631031428278, "loss": 1.7367, "step": 54526 }, { "epoch": 1.81, "grad_norm": 0.581774890422821, "learning_rate": 0.0002030864185447489, "loss": 1.6381, "step": 54527 }, { "epoch": 1.81, "grad_norm": 0.560370147228241, "learning_rate": 0.00020307652689285376, "loss": 1.7465, "step": 54528 }, { "epoch": 1.81, "grad_norm": 0.5719232559204102, "learning_rate": 0.00020306663535860962, "loss": 1.708, "step": 54529 }, { "epoch": 1.81, "grad_norm": 0.5938377380371094, "learning_rate": 0.00020305674394202817, "loss": 1.7217, "step": 54530 }, { "epoch": 1.81, "grad_norm": 0.5904723405838013, "learning_rate": 0.00020304685264312155, "loss": 1.6856, "step": 54531 }, { "epoch": 1.81, "grad_norm": 0.5752065181732178, "learning_rate": 0.00020303696146190185, "loss": 1.7663, "step": 54532 }, { "epoch": 1.81, "grad_norm": 0.5973268747329712, "learning_rate": 0.0002030270703983809, "loss": 1.7224, "step": 54533 }, { "epoch": 1.81, "grad_norm": 0.5765711069107056, "learning_rate": 0.00020301717945257086, "loss": 1.8154, "step": 54534 }, { "epoch": 1.81, "grad_norm": 0.5683965682983398, "learning_rate": 0.00020300728862448366, "loss": 1.6809, "step": 54535 }, { "epoch": 1.81, "grad_norm": 1.0516774654388428, "learning_rate": 0.00020299739791413129, "loss": 1.778, "step": 54536 }, { "epoch": 1.81, "grad_norm": 0.576496422290802, "learning_rate": 0.00020298750732152579, "loss": 1.7089, "step": 54537 }, { "epoch": 1.81, "grad_norm": 0.5645363926887512, "learning_rate": 0.00020297761684667911, "loss": 1.6951, "step": 54538 }, { "epoch": 1.81, "grad_norm": 0.5667282938957214, "learning_rate": 0.00020296772648960347, "loss": 1.6749, "step": 54539 }, { "epoch": 1.81, "grad_norm": 0.5760062336921692, "learning_rate": 0.0002029578362503106, "loss": 1.7977, "step": 54540 }, { "epoch": 1.81, "grad_norm": 0.5719358921051025, "learning_rate": 0.00020294794612881262, "loss": 1.6643, "step": 54541 }, { "epoch": 1.81, "grad_norm": 0.5875155925750732, "learning_rate": 0.00020293805612512161, "loss": 1.6883, "step": 54542 }, { "epoch": 1.81, "grad_norm": 0.553663969039917, "learning_rate": 0.00020292816623924942, "loss": 1.707, "step": 54543 }, { "epoch": 1.81, "grad_norm": 0.5744572877883911, "learning_rate": 0.00020291827647120805, "loss": 1.7236, "step": 54544 }, { "epoch": 1.81, "grad_norm": 0.5615577697753906, "learning_rate": 0.00020290838682100984, "loss": 1.7587, "step": 54545 }, { "epoch": 1.81, "grad_norm": 0.5773535966873169, "learning_rate": 0.00020289849728866632, "loss": 1.7055, "step": 54546 }, { "epoch": 1.81, "grad_norm": 0.551651656627655, "learning_rate": 0.00020288860787418979, "loss": 1.72, "step": 54547 }, { "epoch": 1.81, "grad_norm": 0.5866392254829407, "learning_rate": 0.00020287871857759218, "loss": 1.7135, "step": 54548 }, { "epoch": 1.81, "grad_norm": 0.5924312472343445, "learning_rate": 0.00020286882939888552, "loss": 1.8268, "step": 54549 }, { "epoch": 1.81, "grad_norm": 0.5799105763435364, "learning_rate": 0.00020285894033808175, "loss": 1.7163, "step": 54550 }, { "epoch": 1.81, "grad_norm": 0.5634167194366455, "learning_rate": 0.00020284905139519285, "loss": 1.7496, "step": 54551 }, { "epoch": 1.81, "grad_norm": 0.5969016551971436, "learning_rate": 0.0002028391625702311, "loss": 1.8177, "step": 54552 }, { "epoch": 1.81, "grad_norm": 0.5564060211181641, "learning_rate": 0.0002028292738632081, "loss": 1.6965, "step": 54553 }, { "epoch": 1.82, "grad_norm": 0.5510474443435669, "learning_rate": 0.00020281938527413608, "loss": 1.7115, "step": 54554 }, { "epoch": 1.82, "grad_norm": 0.5784190893173218, "learning_rate": 0.00020280949680302708, "loss": 1.7675, "step": 54555 }, { "epoch": 1.82, "grad_norm": 0.5552302002906799, "learning_rate": 0.00020279960844989296, "loss": 1.7307, "step": 54556 }, { "epoch": 1.82, "grad_norm": 0.5746632218360901, "learning_rate": 0.0002027897202147458, "loss": 1.732, "step": 54557 }, { "epoch": 1.82, "grad_norm": 0.5888399481773376, "learning_rate": 0.00020277983209759755, "loss": 1.6898, "step": 54558 }, { "epoch": 1.82, "grad_norm": 0.5632861852645874, "learning_rate": 0.00020276994409846043, "loss": 1.6592, "step": 54559 }, { "epoch": 1.82, "grad_norm": 0.5538944005966187, "learning_rate": 0.0002027600562173461, "loss": 1.8058, "step": 54560 }, { "epoch": 1.82, "grad_norm": 0.5763612389564514, "learning_rate": 0.00020275016845426678, "loss": 1.7076, "step": 54561 }, { "epoch": 1.82, "grad_norm": 0.5811068415641785, "learning_rate": 0.0002027402808092345, "loss": 1.7408, "step": 54562 }, { "epoch": 1.82, "grad_norm": 0.559921383857727, "learning_rate": 0.00020273039328226112, "loss": 1.7443, "step": 54563 }, { "epoch": 1.82, "grad_norm": 0.5654842853546143, "learning_rate": 0.0002027205058733587, "loss": 1.7452, "step": 54564 }, { "epoch": 1.82, "grad_norm": 0.592975914478302, "learning_rate": 0.00020271061858253932, "loss": 1.7843, "step": 54565 }, { "epoch": 1.82, "grad_norm": 0.5506032109260559, "learning_rate": 0.0002027007314098149, "loss": 1.7429, "step": 54566 }, { "epoch": 1.82, "grad_norm": 0.5679553151130676, "learning_rate": 0.00020269084435519733, "loss": 1.783, "step": 54567 }, { "epoch": 1.82, "grad_norm": 0.5734114050865173, "learning_rate": 0.00020268095741869882, "loss": 1.7106, "step": 54568 }, { "epoch": 1.82, "grad_norm": 0.560620129108429, "learning_rate": 0.00020267107060033132, "loss": 1.7483, "step": 54569 }, { "epoch": 1.82, "grad_norm": 0.5933985710144043, "learning_rate": 0.0002026611839001068, "loss": 1.8071, "step": 54570 }, { "epoch": 1.82, "grad_norm": 0.5852058529853821, "learning_rate": 0.0002026512973180372, "loss": 1.7474, "step": 54571 }, { "epoch": 1.82, "grad_norm": 0.5657840371131897, "learning_rate": 0.0002026414108541347, "loss": 1.6637, "step": 54572 }, { "epoch": 1.82, "grad_norm": 0.5848092436790466, "learning_rate": 0.0002026315245084111, "loss": 1.7929, "step": 54573 }, { "epoch": 1.82, "grad_norm": 0.558587372303009, "learning_rate": 0.0002026216382808784, "loss": 1.6604, "step": 54574 }, { "epoch": 1.82, "grad_norm": 0.5532351732254028, "learning_rate": 0.0002026117521715488, "loss": 1.6718, "step": 54575 }, { "epoch": 1.82, "grad_norm": 0.5609320402145386, "learning_rate": 0.00020260186618043416, "loss": 1.7019, "step": 54576 }, { "epoch": 1.82, "grad_norm": 0.5713015794754028, "learning_rate": 0.00020259198030754647, "loss": 1.744, "step": 54577 }, { "epoch": 1.82, "grad_norm": 0.566410481929779, "learning_rate": 0.00020258209455289777, "loss": 1.7068, "step": 54578 }, { "epoch": 1.82, "grad_norm": 0.5607500076293945, "learning_rate": 0.00020257220891650013, "loss": 1.7388, "step": 54579 }, { "epoch": 1.82, "grad_norm": 0.5689830780029297, "learning_rate": 0.00020256232339836537, "loss": 1.715, "step": 54580 }, { "epoch": 1.82, "grad_norm": 0.5784095525741577, "learning_rate": 0.0002025524379985056, "loss": 1.8271, "step": 54581 }, { "epoch": 1.82, "grad_norm": 0.5851364731788635, "learning_rate": 0.0002025425527169329, "loss": 1.7547, "step": 54582 }, { "epoch": 1.82, "grad_norm": 0.5873983502388, "learning_rate": 0.0002025326675536591, "loss": 1.7015, "step": 54583 }, { "epoch": 1.82, "grad_norm": 0.5630930066108704, "learning_rate": 0.0002025227825086963, "loss": 1.7503, "step": 54584 }, { "epoch": 1.82, "grad_norm": 0.5696470141410828, "learning_rate": 0.00020251289758205655, "loss": 1.7687, "step": 54585 }, { "epoch": 1.82, "grad_norm": 0.5724508762359619, "learning_rate": 0.00020250301277375174, "loss": 1.7227, "step": 54586 }, { "epoch": 1.82, "grad_norm": 0.5799643397331238, "learning_rate": 0.00020249312808379378, "loss": 1.7362, "step": 54587 }, { "epoch": 1.82, "grad_norm": 0.5563346743583679, "learning_rate": 0.00020248324351219488, "loss": 1.6873, "step": 54588 }, { "epoch": 1.82, "grad_norm": 0.5880467295646667, "learning_rate": 0.00020247335905896705, "loss": 1.7269, "step": 54589 }, { "epoch": 1.82, "grad_norm": 0.5607498288154602, "learning_rate": 0.0002024634747241221, "loss": 1.6437, "step": 54590 }, { "epoch": 1.82, "grad_norm": 0.5775014758110046, "learning_rate": 0.00020245359050767215, "loss": 1.7738, "step": 54591 }, { "epoch": 1.82, "grad_norm": 0.576201319694519, "learning_rate": 0.0002024437064096292, "loss": 1.7, "step": 54592 }, { "epoch": 1.82, "grad_norm": 0.5838418006896973, "learning_rate": 0.00020243382243000517, "loss": 1.7187, "step": 54593 }, { "epoch": 1.82, "grad_norm": 0.5755168199539185, "learning_rate": 0.00020242393856881202, "loss": 1.6599, "step": 54594 }, { "epoch": 1.82, "grad_norm": 0.5761715769767761, "learning_rate": 0.00020241405482606206, "loss": 1.7688, "step": 54595 }, { "epoch": 1.82, "grad_norm": 0.5934220552444458, "learning_rate": 0.0002024041712017668, "loss": 1.7029, "step": 54596 }, { "epoch": 1.82, "grad_norm": 0.5665791034698486, "learning_rate": 0.00020239428769593863, "loss": 1.7375, "step": 54597 }, { "epoch": 1.82, "grad_norm": 0.5718841552734375, "learning_rate": 0.00020238440430858938, "loss": 1.8004, "step": 54598 }, { "epoch": 1.82, "grad_norm": 0.5915338397026062, "learning_rate": 0.00020237452103973112, "loss": 1.6656, "step": 54599 }, { "epoch": 1.82, "grad_norm": 0.5938311219215393, "learning_rate": 0.0002023646378893758, "loss": 1.7351, "step": 54600 }, { "epoch": 1.82, "grad_norm": 0.5420477986335754, "learning_rate": 0.0002023547548575353, "loss": 1.6329, "step": 54601 }, { "epoch": 1.82, "grad_norm": 0.5644875764846802, "learning_rate": 0.000202344871944222, "loss": 1.7709, "step": 54602 }, { "epoch": 1.82, "grad_norm": 0.559686005115509, "learning_rate": 0.00020233498914944739, "loss": 1.7655, "step": 54603 }, { "epoch": 1.82, "grad_norm": 0.5693039298057556, "learning_rate": 0.00020232510647322382, "loss": 1.7422, "step": 54604 }, { "epoch": 1.82, "grad_norm": 0.6013681888580322, "learning_rate": 0.0002023152239155632, "loss": 1.8022, "step": 54605 }, { "epoch": 1.82, "grad_norm": 0.5694449543952942, "learning_rate": 0.00020230534147647747, "loss": 1.7114, "step": 54606 }, { "epoch": 1.82, "grad_norm": 0.5965953469276428, "learning_rate": 0.00020229545915597863, "loss": 1.7553, "step": 54607 }, { "epoch": 1.82, "grad_norm": 0.5613579154014587, "learning_rate": 0.0002022855769540787, "loss": 1.733, "step": 54608 }, { "epoch": 1.82, "grad_norm": 0.6092646718025208, "learning_rate": 0.00020227569487078984, "loss": 1.7611, "step": 54609 }, { "epoch": 1.82, "grad_norm": 0.5717152953147888, "learning_rate": 0.00020226581290612366, "loss": 1.8039, "step": 54610 }, { "epoch": 1.82, "grad_norm": 0.569625735282898, "learning_rate": 0.00020225593106009248, "loss": 1.6925, "step": 54611 }, { "epoch": 1.82, "grad_norm": 0.5646869540214539, "learning_rate": 0.00020224604933270825, "loss": 1.6803, "step": 54612 }, { "epoch": 1.82, "grad_norm": 0.5650018453598022, "learning_rate": 0.0002022361677239829, "loss": 1.7284, "step": 54613 }, { "epoch": 1.82, "grad_norm": 0.5754712820053101, "learning_rate": 0.00020222628623392836, "loss": 1.7806, "step": 54614 }, { "epoch": 1.82, "grad_norm": 0.5780076384544373, "learning_rate": 0.0002022164048625567, "loss": 1.7432, "step": 54615 }, { "epoch": 1.82, "grad_norm": 0.5954657196998596, "learning_rate": 0.00020220652360988006, "loss": 1.7439, "step": 54616 }, { "epoch": 1.82, "grad_norm": 0.5688838958740234, "learning_rate": 0.00020219664247591015, "loss": 1.7805, "step": 54617 }, { "epoch": 1.82, "grad_norm": 0.5497856140136719, "learning_rate": 0.00020218676146065913, "loss": 1.7162, "step": 54618 }, { "epoch": 1.82, "grad_norm": 1.2652100324630737, "learning_rate": 0.00020217688056413903, "loss": 1.6933, "step": 54619 }, { "epoch": 1.82, "grad_norm": 0.5717625021934509, "learning_rate": 0.00020216699978636176, "loss": 1.6248, "step": 54620 }, { "epoch": 1.82, "grad_norm": 0.5701650381088257, "learning_rate": 0.0002021571191273393, "loss": 1.7198, "step": 54621 }, { "epoch": 1.82, "grad_norm": 0.5734086036682129, "learning_rate": 0.00020214723858708378, "loss": 1.7667, "step": 54622 }, { "epoch": 1.82, "grad_norm": 0.5451508164405823, "learning_rate": 0.000202137358165607, "loss": 1.7314, "step": 54623 }, { "epoch": 1.82, "grad_norm": 0.5671114325523376, "learning_rate": 0.00020212747786292097, "loss": 1.8128, "step": 54624 }, { "epoch": 1.82, "grad_norm": 0.5730360746383667, "learning_rate": 0.00020211759767903788, "loss": 1.7362, "step": 54625 }, { "epoch": 1.82, "grad_norm": 0.5699487924575806, "learning_rate": 0.0002021077176139696, "loss": 1.688, "step": 54626 }, { "epoch": 1.82, "grad_norm": 0.5776529908180237, "learning_rate": 0.00020209783766772814, "loss": 1.6981, "step": 54627 }, { "epoch": 1.82, "grad_norm": 0.5481054782867432, "learning_rate": 0.0002020879578403254, "loss": 1.7339, "step": 54628 }, { "epoch": 1.82, "grad_norm": 0.5845928192138672, "learning_rate": 0.00020207807813177355, "loss": 1.7256, "step": 54629 }, { "epoch": 1.82, "grad_norm": 0.5663468837738037, "learning_rate": 0.00020206819854208443, "loss": 1.7366, "step": 54630 }, { "epoch": 1.82, "grad_norm": 0.6019048094749451, "learning_rate": 0.00020205831907127002, "loss": 1.8021, "step": 54631 }, { "epoch": 1.82, "grad_norm": 0.5752533078193665, "learning_rate": 0.00020204843971934256, "loss": 1.733, "step": 54632 }, { "epoch": 1.82, "grad_norm": 0.5770128965377808, "learning_rate": 0.00020203856048631373, "loss": 1.7152, "step": 54633 }, { "epoch": 1.82, "grad_norm": 0.601513683795929, "learning_rate": 0.00020202868137219563, "loss": 1.7952, "step": 54634 }, { "epoch": 1.82, "grad_norm": 0.5735640525817871, "learning_rate": 0.00020201880237700033, "loss": 1.6756, "step": 54635 }, { "epoch": 1.82, "grad_norm": 0.6022791862487793, "learning_rate": 0.00020200892350073982, "loss": 1.7621, "step": 54636 }, { "epoch": 1.82, "grad_norm": 0.5812997221946716, "learning_rate": 0.00020199904474342594, "loss": 1.7619, "step": 54637 }, { "epoch": 1.82, "grad_norm": 0.5703936219215393, "learning_rate": 0.00020198916610507075, "loss": 1.7743, "step": 54638 }, { "epoch": 1.82, "grad_norm": 0.5665048956871033, "learning_rate": 0.00020197928758568646, "loss": 1.7848, "step": 54639 }, { "epoch": 1.82, "grad_norm": 0.559860348701477, "learning_rate": 0.0002019694091852847, "loss": 1.7615, "step": 54640 }, { "epoch": 1.82, "grad_norm": 0.5768144130706787, "learning_rate": 0.00020195953090387767, "loss": 1.7001, "step": 54641 }, { "epoch": 1.82, "grad_norm": 0.5881817936897278, "learning_rate": 0.0002019496527414774, "loss": 1.7951, "step": 54642 }, { "epoch": 1.82, "grad_norm": 0.601743221282959, "learning_rate": 0.00020193977469809573, "loss": 1.8072, "step": 54643 }, { "epoch": 1.82, "grad_norm": 0.5927951335906982, "learning_rate": 0.00020192989677374472, "loss": 1.6968, "step": 54644 }, { "epoch": 1.82, "grad_norm": 0.5586831569671631, "learning_rate": 0.00020192001896843632, "loss": 1.7194, "step": 54645 }, { "epoch": 1.82, "grad_norm": 0.5709750056266785, "learning_rate": 0.00020191014128218275, "loss": 1.7658, "step": 54646 }, { "epoch": 1.82, "grad_norm": 0.5727432370185852, "learning_rate": 0.00020190026371499564, "loss": 1.7779, "step": 54647 }, { "epoch": 1.82, "grad_norm": 0.5869582891464233, "learning_rate": 0.00020189038626688716, "loss": 1.7264, "step": 54648 }, { "epoch": 1.82, "grad_norm": 0.5647270083427429, "learning_rate": 0.0002018805089378694, "loss": 1.7659, "step": 54649 }, { "epoch": 1.82, "grad_norm": 0.5812776684761047, "learning_rate": 0.00020187063172795423, "loss": 1.7804, "step": 54650 }, { "epoch": 1.82, "grad_norm": 0.5635288953781128, "learning_rate": 0.00020186075463715357, "loss": 1.6733, "step": 54651 }, { "epoch": 1.82, "grad_norm": 0.6031169295310974, "learning_rate": 0.0002018508776654796, "loss": 1.6558, "step": 54652 }, { "epoch": 1.82, "grad_norm": 0.5630722045898438, "learning_rate": 0.0002018410008129441, "loss": 1.7516, "step": 54653 }, { "epoch": 1.82, "grad_norm": 0.5995416045188904, "learning_rate": 0.00020183112407955918, "loss": 1.6833, "step": 54654 }, { "epoch": 1.82, "grad_norm": 0.5856165289878845, "learning_rate": 0.0002018212474653368, "loss": 1.8, "step": 54655 }, { "epoch": 1.82, "grad_norm": 0.5861607193946838, "learning_rate": 0.00020181137097028903, "loss": 1.7118, "step": 54656 }, { "epoch": 1.82, "grad_norm": 0.581814169883728, "learning_rate": 0.00020180149459442777, "loss": 1.7537, "step": 54657 }, { "epoch": 1.82, "grad_norm": 0.5591261386871338, "learning_rate": 0.00020179161833776502, "loss": 1.7238, "step": 54658 }, { "epoch": 1.82, "grad_norm": 0.5693015456199646, "learning_rate": 0.0002017817422003128, "loss": 1.6697, "step": 54659 }, { "epoch": 1.82, "grad_norm": 0.5543076395988464, "learning_rate": 0.000201771866182083, "loss": 1.7887, "step": 54660 }, { "epoch": 1.82, "grad_norm": 0.5563647747039795, "learning_rate": 0.0002017619902830877, "loss": 1.7413, "step": 54661 }, { "epoch": 1.82, "grad_norm": 0.5743538737297058, "learning_rate": 0.00020175211450333897, "loss": 1.755, "step": 54662 }, { "epoch": 1.82, "grad_norm": 0.5926514863967896, "learning_rate": 0.00020174223884284853, "loss": 1.7764, "step": 54663 }, { "epoch": 1.82, "grad_norm": 0.5798279643058777, "learning_rate": 0.00020173236330162862, "loss": 1.7905, "step": 54664 }, { "epoch": 1.82, "grad_norm": 0.5789015889167786, "learning_rate": 0.00020172248787969115, "loss": 1.7232, "step": 54665 }, { "epoch": 1.82, "grad_norm": 0.5795538425445557, "learning_rate": 0.0002017126125770481, "loss": 1.7771, "step": 54666 }, { "epoch": 1.82, "grad_norm": 0.5724758505821228, "learning_rate": 0.00020170273739371147, "loss": 1.6883, "step": 54667 }, { "epoch": 1.82, "grad_norm": 0.5674079060554504, "learning_rate": 0.00020169286232969315, "loss": 1.707, "step": 54668 }, { "epoch": 1.82, "grad_norm": 0.571387767791748, "learning_rate": 0.0002016829873850054, "loss": 1.7264, "step": 54669 }, { "epoch": 1.82, "grad_norm": 0.5619073510169983, "learning_rate": 0.00020167311255965976, "loss": 1.7844, "step": 54670 }, { "epoch": 1.82, "grad_norm": 0.5510414242744446, "learning_rate": 0.0002016632378536686, "loss": 1.7736, "step": 54671 }, { "epoch": 1.82, "grad_norm": 0.5553814768791199, "learning_rate": 0.00020165336326704385, "loss": 1.7499, "step": 54672 }, { "epoch": 1.82, "grad_norm": 0.5770497918128967, "learning_rate": 0.00020164348879979734, "loss": 1.707, "step": 54673 }, { "epoch": 1.82, "grad_norm": 0.5791002511978149, "learning_rate": 0.00020163361445194118, "loss": 1.7364, "step": 54674 }, { "epoch": 1.82, "grad_norm": 0.559575080871582, "learning_rate": 0.00020162374022348722, "loss": 1.7496, "step": 54675 }, { "epoch": 1.82, "grad_norm": 0.6668707728385925, "learning_rate": 0.00020161386611444775, "loss": 1.7584, "step": 54676 }, { "epoch": 1.82, "grad_norm": 0.5811461806297302, "learning_rate": 0.00020160399212483434, "loss": 1.8143, "step": 54677 }, { "epoch": 1.82, "grad_norm": 0.5651829838752747, "learning_rate": 0.00020159411825465925, "loss": 1.7408, "step": 54678 }, { "epoch": 1.82, "grad_norm": 0.581421971321106, "learning_rate": 0.00020158424450393445, "loss": 1.8987, "step": 54679 }, { "epoch": 1.82, "grad_norm": 0.5766817331314087, "learning_rate": 0.00020157437087267184, "loss": 1.7308, "step": 54680 }, { "epoch": 1.82, "grad_norm": 0.5702363848686218, "learning_rate": 0.00020156449736088343, "loss": 1.6808, "step": 54681 }, { "epoch": 1.82, "grad_norm": 0.5663185715675354, "learning_rate": 0.0002015546239685813, "loss": 1.7539, "step": 54682 }, { "epoch": 1.82, "grad_norm": 0.5643728971481323, "learning_rate": 0.00020154475069577728, "loss": 1.7044, "step": 54683 }, { "epoch": 1.82, "grad_norm": 0.5925458669662476, "learning_rate": 0.00020153487754248335, "loss": 1.7832, "step": 54684 }, { "epoch": 1.82, "grad_norm": 0.5899810194969177, "learning_rate": 0.00020152500450871163, "loss": 1.7899, "step": 54685 }, { "epoch": 1.82, "grad_norm": 0.5630764365196228, "learning_rate": 0.00020151513159447413, "loss": 1.7545, "step": 54686 }, { "epoch": 1.82, "grad_norm": 0.5690654516220093, "learning_rate": 0.00020150525879978267, "loss": 1.6275, "step": 54687 }, { "epoch": 1.82, "grad_norm": 0.6370302438735962, "learning_rate": 0.00020149538612464934, "loss": 1.7233, "step": 54688 }, { "epoch": 1.82, "grad_norm": 0.5698711276054382, "learning_rate": 0.00020148551356908613, "loss": 1.7449, "step": 54689 }, { "epoch": 1.82, "grad_norm": 0.5608827471733093, "learning_rate": 0.00020147564113310494, "loss": 1.7072, "step": 54690 }, { "epoch": 1.82, "grad_norm": 0.570858359336853, "learning_rate": 0.00020146576881671774, "loss": 1.7422, "step": 54691 }, { "epoch": 1.82, "grad_norm": 0.5586537718772888, "learning_rate": 0.0002014558966199367, "loss": 1.6984, "step": 54692 }, { "epoch": 1.82, "grad_norm": 0.5953660607337952, "learning_rate": 0.00020144602454277364, "loss": 1.8104, "step": 54693 }, { "epoch": 1.82, "grad_norm": 0.5712786912918091, "learning_rate": 0.00020143615258524052, "loss": 1.7813, "step": 54694 }, { "epoch": 1.82, "grad_norm": 0.5631561279296875, "learning_rate": 0.00020142628074734946, "loss": 1.7191, "step": 54695 }, { "epoch": 1.82, "grad_norm": 0.5515434741973877, "learning_rate": 0.0002014164090291124, "loss": 1.685, "step": 54696 }, { "epoch": 1.82, "grad_norm": 0.5554938316345215, "learning_rate": 0.00020140653743054118, "loss": 1.7313, "step": 54697 }, { "epoch": 1.82, "grad_norm": 0.569421112537384, "learning_rate": 0.00020139666595164797, "loss": 1.7803, "step": 54698 }, { "epoch": 1.82, "grad_norm": 0.5604045391082764, "learning_rate": 0.00020138679459244472, "loss": 1.7234, "step": 54699 }, { "epoch": 1.82, "grad_norm": 0.6095386743545532, "learning_rate": 0.00020137692335294333, "loss": 1.8493, "step": 54700 }, { "epoch": 1.82, "grad_norm": 0.5848665833473206, "learning_rate": 0.0002013670522331558, "loss": 1.7599, "step": 54701 }, { "epoch": 1.82, "grad_norm": 0.5878427028656006, "learning_rate": 0.0002013571812330942, "loss": 1.7423, "step": 54702 }, { "epoch": 1.82, "grad_norm": 0.5473096966743469, "learning_rate": 0.0002013473103527704, "loss": 1.7122, "step": 54703 }, { "epoch": 1.82, "grad_norm": 0.5523826479911804, "learning_rate": 0.00020133743959219636, "loss": 1.7316, "step": 54704 }, { "epoch": 1.82, "grad_norm": 0.5568631291389465, "learning_rate": 0.00020132756895138417, "loss": 1.7061, "step": 54705 }, { "epoch": 1.82, "grad_norm": 0.5631932616233826, "learning_rate": 0.00020131769843034588, "loss": 1.7908, "step": 54706 }, { "epoch": 1.82, "grad_norm": 0.5791887640953064, "learning_rate": 0.00020130782802909328, "loss": 1.7535, "step": 54707 }, { "epoch": 1.82, "grad_norm": 0.6777456998825073, "learning_rate": 0.00020129795774763843, "loss": 1.6982, "step": 54708 }, { "epoch": 1.82, "grad_norm": 0.5957733392715454, "learning_rate": 0.00020128808758599338, "loss": 1.7683, "step": 54709 }, { "epoch": 1.82, "grad_norm": 0.5663678646087646, "learning_rate": 0.00020127821754417, "loss": 1.7813, "step": 54710 }, { "epoch": 1.82, "grad_norm": 0.5642024874687195, "learning_rate": 0.00020126834762218023, "loss": 1.7021, "step": 54711 }, { "epoch": 1.82, "grad_norm": 0.5665440559387207, "learning_rate": 0.00020125847782003636, "loss": 1.6884, "step": 54712 }, { "epoch": 1.82, "grad_norm": 0.5795692205429077, "learning_rate": 0.00020124860813774994, "loss": 1.7309, "step": 54713 }, { "epoch": 1.82, "grad_norm": 0.5534822344779968, "learning_rate": 0.0002012387385753332, "loss": 1.6835, "step": 54714 }, { "epoch": 1.82, "grad_norm": 0.5582941770553589, "learning_rate": 0.0002012288691327981, "loss": 1.7592, "step": 54715 }, { "epoch": 1.82, "grad_norm": 0.5505369305610657, "learning_rate": 0.0002012189998101567, "loss": 1.7604, "step": 54716 }, { "epoch": 1.82, "grad_norm": 0.5487939715385437, "learning_rate": 0.00020120913060742078, "loss": 1.6806, "step": 54717 }, { "epoch": 1.82, "grad_norm": 0.5678074359893799, "learning_rate": 0.00020119926152460237, "loss": 1.7284, "step": 54718 }, { "epoch": 1.82, "grad_norm": 0.586524486541748, "learning_rate": 0.00020118939256171366, "loss": 1.7526, "step": 54719 }, { "epoch": 1.82, "grad_norm": 0.5839634537696838, "learning_rate": 0.00020117952371876628, "loss": 1.7021, "step": 54720 }, { "epoch": 1.82, "grad_norm": 0.592927098274231, "learning_rate": 0.0002011696549957725, "loss": 1.765, "step": 54721 }, { "epoch": 1.82, "grad_norm": 0.5934789776802063, "learning_rate": 0.00020115978639274423, "loss": 1.7313, "step": 54722 }, { "epoch": 1.82, "grad_norm": 0.5947757959365845, "learning_rate": 0.00020114991790969337, "loss": 1.7724, "step": 54723 }, { "epoch": 1.82, "grad_norm": 0.5792107582092285, "learning_rate": 0.00020114004954663195, "loss": 1.7167, "step": 54724 }, { "epoch": 1.82, "grad_norm": 0.5990595817565918, "learning_rate": 0.00020113018130357184, "loss": 1.7558, "step": 54725 }, { "epoch": 1.82, "grad_norm": 0.5907124876976013, "learning_rate": 0.00020112031318052532, "loss": 1.677, "step": 54726 }, { "epoch": 1.82, "grad_norm": 0.5921989679336548, "learning_rate": 0.00020111044517750402, "loss": 1.6987, "step": 54727 }, { "epoch": 1.82, "grad_norm": 0.5786810517311096, "learning_rate": 0.0002011005772945201, "loss": 1.7237, "step": 54728 }, { "epoch": 1.82, "grad_norm": 0.5713577270507812, "learning_rate": 0.00020109070953158557, "loss": 1.7394, "step": 54729 }, { "epoch": 1.82, "grad_norm": 0.5767842531204224, "learning_rate": 0.0002010808418887123, "loss": 1.6938, "step": 54730 }, { "epoch": 1.82, "grad_norm": 0.588718831539154, "learning_rate": 0.0002010709743659123, "loss": 1.7367, "step": 54731 }, { "epoch": 1.82, "grad_norm": 0.5958362221717834, "learning_rate": 0.00020106110696319765, "loss": 1.7413, "step": 54732 }, { "epoch": 1.82, "grad_norm": 0.6012372374534607, "learning_rate": 0.00020105123968058015, "loss": 1.7448, "step": 54733 }, { "epoch": 1.82, "grad_norm": 0.587910532951355, "learning_rate": 0.0002010413725180718, "loss": 1.8023, "step": 54734 }, { "epoch": 1.82, "grad_norm": 0.5621408224105835, "learning_rate": 0.0002010315054756847, "loss": 1.798, "step": 54735 }, { "epoch": 1.82, "grad_norm": 0.5739943981170654, "learning_rate": 0.00020102163855343087, "loss": 1.8115, "step": 54736 }, { "epoch": 1.82, "grad_norm": 0.588204562664032, "learning_rate": 0.00020101177175132207, "loss": 1.7655, "step": 54737 }, { "epoch": 1.82, "grad_norm": 0.5886622071266174, "learning_rate": 0.0002010019050693704, "loss": 1.8251, "step": 54738 }, { "epoch": 1.82, "grad_norm": 0.612779974937439, "learning_rate": 0.00020099203850758795, "loss": 1.7375, "step": 54739 }, { "epoch": 1.82, "grad_norm": 0.5639687180519104, "learning_rate": 0.0002009821720659865, "loss": 1.7421, "step": 54740 }, { "epoch": 1.82, "grad_norm": 0.5871922373771667, "learning_rate": 0.00020097230574457804, "loss": 1.7602, "step": 54741 }, { "epoch": 1.82, "grad_norm": 0.5628033876419067, "learning_rate": 0.0002009624395433746, "loss": 1.7455, "step": 54742 }, { "epoch": 1.82, "grad_norm": 0.5863658785820007, "learning_rate": 0.0002009525734623883, "loss": 1.7399, "step": 54743 }, { "epoch": 1.82, "grad_norm": 0.5754233598709106, "learning_rate": 0.00020094270750163092, "loss": 1.7449, "step": 54744 }, { "epoch": 1.82, "grad_norm": 0.5718420147895813, "learning_rate": 0.00020093284166111444, "loss": 1.7787, "step": 54745 }, { "epoch": 1.82, "grad_norm": 0.567997932434082, "learning_rate": 0.00020092297594085103, "loss": 1.7316, "step": 54746 }, { "epoch": 1.82, "grad_norm": 0.5637056827545166, "learning_rate": 0.00020091311034085244, "loss": 1.6466, "step": 54747 }, { "epoch": 1.82, "grad_norm": 0.5640249252319336, "learning_rate": 0.00020090324486113064, "loss": 1.6746, "step": 54748 }, { "epoch": 1.82, "grad_norm": 0.5829424858093262, "learning_rate": 0.00020089337950169792, "loss": 1.7174, "step": 54749 }, { "epoch": 1.82, "grad_norm": 0.5776340365409851, "learning_rate": 0.00020088351426256586, "loss": 1.7959, "step": 54750 }, { "epoch": 1.82, "grad_norm": 0.5600442290306091, "learning_rate": 0.0002008736491437466, "loss": 1.7218, "step": 54751 }, { "epoch": 1.82, "grad_norm": 0.5616962313652039, "learning_rate": 0.0002008637841452522, "loss": 1.6197, "step": 54752 }, { "epoch": 1.82, "grad_norm": 0.6039464473724365, "learning_rate": 0.00020085391926709457, "loss": 1.7508, "step": 54753 }, { "epoch": 1.82, "grad_norm": 0.5633490085601807, "learning_rate": 0.00020084405450928567, "loss": 1.7354, "step": 54754 }, { "epoch": 1.82, "grad_norm": 0.5559484362602234, "learning_rate": 0.0002008341898718374, "loss": 1.7093, "step": 54755 }, { "epoch": 1.82, "grad_norm": 0.5697407722473145, "learning_rate": 0.00020082432535476193, "loss": 1.7177, "step": 54756 }, { "epoch": 1.82, "grad_norm": 0.5907840132713318, "learning_rate": 0.00020081446095807098, "loss": 1.7094, "step": 54757 }, { "epoch": 1.82, "grad_norm": 0.5686493515968323, "learning_rate": 0.00020080459668177673, "loss": 1.7167, "step": 54758 }, { "epoch": 1.82, "grad_norm": 0.5847095251083374, "learning_rate": 0.00020079473252589114, "loss": 1.7938, "step": 54759 }, { "epoch": 1.82, "grad_norm": 0.5778390169143677, "learning_rate": 0.00020078486849042608, "loss": 1.6931, "step": 54760 }, { "epoch": 1.82, "grad_norm": 0.5948904752731323, "learning_rate": 0.00020077500457539355, "loss": 1.7773, "step": 54761 }, { "epoch": 1.82, "grad_norm": 0.5633249878883362, "learning_rate": 0.0002007651407808055, "loss": 1.7372, "step": 54762 }, { "epoch": 1.82, "grad_norm": 0.5560188889503479, "learning_rate": 0.00020075527710667409, "loss": 1.7401, "step": 54763 }, { "epoch": 1.82, "grad_norm": 0.568109393119812, "learning_rate": 0.000200745413553011, "loss": 1.6742, "step": 54764 }, { "epoch": 1.82, "grad_norm": 0.5722441077232361, "learning_rate": 0.00020073555011982844, "loss": 1.7712, "step": 54765 }, { "epoch": 1.82, "grad_norm": 0.5591713190078735, "learning_rate": 0.00020072568680713833, "loss": 1.7948, "step": 54766 }, { "epoch": 1.82, "grad_norm": 0.5513641238212585, "learning_rate": 0.00020071582361495254, "loss": 1.7416, "step": 54767 }, { "epoch": 1.82, "grad_norm": 0.5646790266036987, "learning_rate": 0.0002007059605432831, "loss": 1.7309, "step": 54768 }, { "epoch": 1.82, "grad_norm": 0.5820709466934204, "learning_rate": 0.00020069609759214207, "loss": 1.6649, "step": 54769 }, { "epoch": 1.82, "grad_norm": 0.5748605728149414, "learning_rate": 0.0002006862347615413, "loss": 1.7447, "step": 54770 }, { "epoch": 1.82, "grad_norm": 0.5415125489234924, "learning_rate": 0.00020067637205149272, "loss": 1.7862, "step": 54771 }, { "epoch": 1.82, "grad_norm": 0.5818357467651367, "learning_rate": 0.00020066650946200847, "loss": 1.7675, "step": 54772 }, { "epoch": 1.82, "grad_norm": 0.578755259513855, "learning_rate": 0.0002006566469931005, "loss": 1.7161, "step": 54773 }, { "epoch": 1.82, "grad_norm": 0.5777593851089478, "learning_rate": 0.00020064678464478064, "loss": 1.7033, "step": 54774 }, { "epoch": 1.82, "grad_norm": 0.5690386891365051, "learning_rate": 0.00020063692241706097, "loss": 1.825, "step": 54775 }, { "epoch": 1.82, "grad_norm": 0.5930628180503845, "learning_rate": 0.00020062706030995353, "loss": 1.7754, "step": 54776 }, { "epoch": 1.82, "grad_norm": 0.5812004804611206, "learning_rate": 0.0002006171983234701, "loss": 1.7102, "step": 54777 }, { "epoch": 1.82, "grad_norm": 0.5828542113304138, "learning_rate": 0.0002006073364576227, "loss": 1.7353, "step": 54778 }, { "epoch": 1.82, "grad_norm": 0.594218909740448, "learning_rate": 0.00020059747471242348, "loss": 1.7768, "step": 54779 }, { "epoch": 1.82, "grad_norm": 1.3102174997329712, "learning_rate": 0.00020058761308788413, "loss": 1.6735, "step": 54780 }, { "epoch": 1.82, "grad_norm": 0.556172251701355, "learning_rate": 0.0002005777515840168, "loss": 1.6744, "step": 54781 }, { "epoch": 1.82, "grad_norm": 0.5870736837387085, "learning_rate": 0.0002005678902008335, "loss": 1.7501, "step": 54782 }, { "epoch": 1.82, "grad_norm": 0.5759369730949402, "learning_rate": 0.00020055802893834614, "loss": 1.7575, "step": 54783 }, { "epoch": 1.82, "grad_norm": 0.5751792788505554, "learning_rate": 0.00020054816779656664, "loss": 1.6859, "step": 54784 }, { "epoch": 1.82, "grad_norm": 0.5897475481033325, "learning_rate": 0.00020053830677550693, "loss": 1.7786, "step": 54785 }, { "epoch": 1.82, "grad_norm": 0.589116632938385, "learning_rate": 0.00020052844587517925, "loss": 1.7809, "step": 54786 }, { "epoch": 1.82, "grad_norm": 0.57723069190979, "learning_rate": 0.00020051858509559524, "loss": 1.7278, "step": 54787 }, { "epoch": 1.82, "grad_norm": 0.5703102946281433, "learning_rate": 0.00020050872443676702, "loss": 1.7235, "step": 54788 }, { "epoch": 1.82, "grad_norm": 0.5831423401832581, "learning_rate": 0.00020049886389870663, "loss": 1.7339, "step": 54789 }, { "epoch": 1.82, "grad_norm": 0.6147668957710266, "learning_rate": 0.00020048900348142588, "loss": 1.8096, "step": 54790 }, { "epoch": 1.82, "grad_norm": 0.5748673677444458, "learning_rate": 0.00020047914318493685, "loss": 1.7022, "step": 54791 }, { "epoch": 1.82, "grad_norm": 0.5783784985542297, "learning_rate": 0.0002004692830092514, "loss": 1.7876, "step": 54792 }, { "epoch": 1.82, "grad_norm": 0.5924882292747498, "learning_rate": 0.00020045942295438172, "loss": 1.7246, "step": 54793 }, { "epoch": 1.82, "grad_norm": 0.564219057559967, "learning_rate": 0.0002004495630203395, "loss": 1.6928, "step": 54794 }, { "epoch": 1.82, "grad_norm": 0.5922659635543823, "learning_rate": 0.00020043970320713687, "loss": 1.7736, "step": 54795 }, { "epoch": 1.82, "grad_norm": 0.5736767649650574, "learning_rate": 0.00020042984351478587, "loss": 1.7253, "step": 54796 }, { "epoch": 1.82, "grad_norm": 0.5811693072319031, "learning_rate": 0.00020041998394329828, "loss": 1.7385, "step": 54797 }, { "epoch": 1.82, "grad_norm": 0.5816694498062134, "learning_rate": 0.0002004101244926862, "loss": 1.7632, "step": 54798 }, { "epoch": 1.82, "grad_norm": 0.5486319065093994, "learning_rate": 0.00020040026516296154, "loss": 1.7271, "step": 54799 }, { "epoch": 1.82, "grad_norm": 0.5698067545890808, "learning_rate": 0.00020039040595413627, "loss": 1.6742, "step": 54800 }, { "epoch": 1.82, "grad_norm": 0.5545281767845154, "learning_rate": 0.0002003805468662223, "loss": 1.7697, "step": 54801 }, { "epoch": 1.82, "grad_norm": 0.5859848260879517, "learning_rate": 0.00020037068789923175, "loss": 1.7866, "step": 54802 }, { "epoch": 1.82, "grad_norm": 0.5606444478034973, "learning_rate": 0.00020036082905317655, "loss": 1.6718, "step": 54803 }, { "epoch": 1.82, "grad_norm": 0.5820301175117493, "learning_rate": 0.00020035097032806854, "loss": 1.8212, "step": 54804 }, { "epoch": 1.82, "grad_norm": 0.570177435874939, "learning_rate": 0.0002003411117239198, "loss": 1.7184, "step": 54805 }, { "epoch": 1.82, "grad_norm": 0.5835407972335815, "learning_rate": 0.00020033125324074233, "loss": 1.7102, "step": 54806 }, { "epoch": 1.82, "grad_norm": 0.5789136290550232, "learning_rate": 0.00020032139487854788, "loss": 1.7358, "step": 54807 }, { "epoch": 1.82, "grad_norm": 0.589026927947998, "learning_rate": 0.00020031153663734863, "loss": 1.7578, "step": 54808 }, { "epoch": 1.82, "grad_norm": 0.5580958127975464, "learning_rate": 0.00020030167851715658, "loss": 1.7186, "step": 54809 }, { "epoch": 1.82, "grad_norm": 0.5894059538841248, "learning_rate": 0.00020029182051798354, "loss": 1.7014, "step": 54810 }, { "epoch": 1.82, "grad_norm": 0.5629392862319946, "learning_rate": 0.0002002819626398415, "loss": 1.7745, "step": 54811 }, { "epoch": 1.82, "grad_norm": 1.3798331022262573, "learning_rate": 0.00020027210488274247, "loss": 1.7132, "step": 54812 }, { "epoch": 1.82, "grad_norm": 0.5681290030479431, "learning_rate": 0.0002002622472466985, "loss": 1.7044, "step": 54813 }, { "epoch": 1.82, "grad_norm": 0.5764819383621216, "learning_rate": 0.00020025238973172136, "loss": 1.6769, "step": 54814 }, { "epoch": 1.82, "grad_norm": 0.556571900844574, "learning_rate": 0.00020024253233782315, "loss": 1.7264, "step": 54815 }, { "epoch": 1.82, "grad_norm": 0.5755351781845093, "learning_rate": 0.00020023267506501588, "loss": 1.6967, "step": 54816 }, { "epoch": 1.82, "grad_norm": 0.5978530645370483, "learning_rate": 0.00020022281791331136, "loss": 1.7464, "step": 54817 }, { "epoch": 1.82, "grad_norm": 0.5983737707138062, "learning_rate": 0.00020021296088272166, "loss": 1.7078, "step": 54818 }, { "epoch": 1.82, "grad_norm": 0.5886064767837524, "learning_rate": 0.0002002031039732588, "loss": 1.7063, "step": 54819 }, { "epoch": 1.82, "grad_norm": 0.5984825491905212, "learning_rate": 0.0002001932471849346, "loss": 1.7834, "step": 54820 }, { "epoch": 1.82, "grad_norm": 0.5766542553901672, "learning_rate": 0.000200183390517761, "loss": 1.7741, "step": 54821 }, { "epoch": 1.82, "grad_norm": 0.6483868956565857, "learning_rate": 0.0002001735339717501, "loss": 1.7741, "step": 54822 }, { "epoch": 1.82, "grad_norm": 0.5775837302207947, "learning_rate": 0.00020016367754691392, "loss": 1.7479, "step": 54823 }, { "epoch": 1.82, "grad_norm": 0.5733554363250732, "learning_rate": 0.00020015382124326426, "loss": 1.715, "step": 54824 }, { "epoch": 1.82, "grad_norm": 0.5739683508872986, "learning_rate": 0.00020014396506081315, "loss": 1.6833, "step": 54825 }, { "epoch": 1.82, "grad_norm": 0.5655909776687622, "learning_rate": 0.0002001341089995726, "loss": 1.6542, "step": 54826 }, { "epoch": 1.82, "grad_norm": 0.5898109674453735, "learning_rate": 0.0002001242530595545, "loss": 1.7559, "step": 54827 }, { "epoch": 1.82, "grad_norm": 0.5960912108421326, "learning_rate": 0.00020011439724077076, "loss": 1.7484, "step": 54828 }, { "epoch": 1.82, "grad_norm": 0.5805477499961853, "learning_rate": 0.00020010454154323362, "loss": 1.7461, "step": 54829 }, { "epoch": 1.82, "grad_norm": 0.6026013493537903, "learning_rate": 0.00020009468596695465, "loss": 1.8078, "step": 54830 }, { "epoch": 1.82, "grad_norm": 0.555435299873352, "learning_rate": 0.00020008483051194603, "loss": 1.7173, "step": 54831 }, { "epoch": 1.82, "grad_norm": 0.6103540062904358, "learning_rate": 0.0002000749751782198, "loss": 1.7261, "step": 54832 }, { "epoch": 1.82, "grad_norm": 0.6190071105957031, "learning_rate": 0.0002000651199657878, "loss": 1.7098, "step": 54833 }, { "epoch": 1.82, "grad_norm": 0.5785872936248779, "learning_rate": 0.000200055264874662, "loss": 1.6528, "step": 54834 }, { "epoch": 1.82, "grad_norm": 0.5669233202934265, "learning_rate": 0.00020004540990485432, "loss": 1.7715, "step": 54835 }, { "epoch": 1.82, "grad_norm": 0.5837581753730774, "learning_rate": 0.00020003555505637696, "loss": 1.6319, "step": 54836 }, { "epoch": 1.82, "grad_norm": 0.5552621483802795, "learning_rate": 0.00020002570032924154, "loss": 1.7435, "step": 54837 }, { "epoch": 1.82, "grad_norm": 0.5938834547996521, "learning_rate": 0.00020001584572346023, "loss": 1.8169, "step": 54838 }, { "epoch": 1.82, "grad_norm": 0.6962640285491943, "learning_rate": 0.000200005991239045, "loss": 1.6789, "step": 54839 }, { "epoch": 1.82, "grad_norm": 0.5823893547058105, "learning_rate": 0.00019999613687600773, "loss": 1.746, "step": 54840 }, { "epoch": 1.82, "grad_norm": 0.573591947555542, "learning_rate": 0.00019998628263436042, "loss": 1.7924, "step": 54841 }, { "epoch": 1.82, "grad_norm": 0.6210533380508423, "learning_rate": 0.00019997642851411494, "loss": 1.7694, "step": 54842 }, { "epoch": 1.82, "grad_norm": 0.5718938112258911, "learning_rate": 0.00019996657451528353, "loss": 1.7006, "step": 54843 }, { "epoch": 1.82, "grad_norm": 0.5730859637260437, "learning_rate": 0.0001999567206378778, "loss": 1.7622, "step": 54844 }, { "epoch": 1.82, "grad_norm": 0.5566306710243225, "learning_rate": 0.0001999468668819099, "loss": 1.8024, "step": 54845 }, { "epoch": 1.82, "grad_norm": 0.5683873891830444, "learning_rate": 0.00019993701324739184, "loss": 1.7462, "step": 54846 }, { "epoch": 1.82, "grad_norm": 0.5864890217781067, "learning_rate": 0.00019992715973433543, "loss": 1.7115, "step": 54847 }, { "epoch": 1.82, "grad_norm": 0.558681309223175, "learning_rate": 0.00019991730634275268, "loss": 1.7969, "step": 54848 }, { "epoch": 1.82, "grad_norm": 0.5776765942573547, "learning_rate": 0.00019990745307265568, "loss": 1.7828, "step": 54849 }, { "epoch": 1.82, "grad_norm": 0.5788972973823547, "learning_rate": 0.00019989759992405622, "loss": 1.7166, "step": 54850 }, { "epoch": 1.82, "grad_norm": 0.5558459162712097, "learning_rate": 0.00019988774689696625, "loss": 1.7402, "step": 54851 }, { "epoch": 1.82, "grad_norm": 0.5667304992675781, "learning_rate": 0.00019987789399139783, "loss": 1.8127, "step": 54852 }, { "epoch": 1.82, "grad_norm": 0.5842357873916626, "learning_rate": 0.000199868041207363, "loss": 1.7848, "step": 54853 }, { "epoch": 1.82, "grad_norm": 0.575171947479248, "learning_rate": 0.00019985818854487356, "loss": 1.7852, "step": 54854 }, { "epoch": 1.83, "grad_norm": 0.5801572203636169, "learning_rate": 0.00019984833600394147, "loss": 1.7624, "step": 54855 }, { "epoch": 1.83, "grad_norm": 0.5835205912590027, "learning_rate": 0.00019983848358457885, "loss": 1.7672, "step": 54856 }, { "epoch": 1.83, "grad_norm": 0.5786184668540955, "learning_rate": 0.0001998286312867975, "loss": 1.8307, "step": 54857 }, { "epoch": 1.83, "grad_norm": 0.5694364905357361, "learning_rate": 0.00019981877911060938, "loss": 1.68, "step": 54858 }, { "epoch": 1.83, "grad_norm": 0.581307590007782, "learning_rate": 0.0001998089270560265, "loss": 1.7681, "step": 54859 }, { "epoch": 1.83, "grad_norm": 0.5909596085548401, "learning_rate": 0.00019979907512306092, "loss": 1.7653, "step": 54860 }, { "epoch": 1.83, "grad_norm": 0.5750903487205505, "learning_rate": 0.00019978922331172447, "loss": 1.765, "step": 54861 }, { "epoch": 1.83, "grad_norm": 0.5817005038261414, "learning_rate": 0.00019977937162202907, "loss": 1.7841, "step": 54862 }, { "epoch": 1.83, "grad_norm": 0.5736457705497742, "learning_rate": 0.00019976952005398683, "loss": 1.7474, "step": 54863 }, { "epoch": 1.83, "grad_norm": 0.5898492336273193, "learning_rate": 0.00019975966860760958, "loss": 1.6926, "step": 54864 }, { "epoch": 1.83, "grad_norm": 0.5699293613433838, "learning_rate": 0.00019974981728290925, "loss": 1.7667, "step": 54865 }, { "epoch": 1.83, "grad_norm": 0.5762177109718323, "learning_rate": 0.00019973996607989807, "loss": 1.6755, "step": 54866 }, { "epoch": 1.83, "grad_norm": 0.5749096870422363, "learning_rate": 0.00019973011499858762, "loss": 1.6822, "step": 54867 }, { "epoch": 1.83, "grad_norm": 0.6053391695022583, "learning_rate": 0.00019972026403899005, "loss": 1.7265, "step": 54868 }, { "epoch": 1.83, "grad_norm": 0.5738734602928162, "learning_rate": 0.00019971041320111736, "loss": 1.7702, "step": 54869 }, { "epoch": 1.83, "grad_norm": 0.58120197057724, "learning_rate": 0.0001997005624849815, "loss": 1.8003, "step": 54870 }, { "epoch": 1.83, "grad_norm": 0.581775963306427, "learning_rate": 0.00019969071189059428, "loss": 1.7513, "step": 54871 }, { "epoch": 1.83, "grad_norm": 0.5574228763580322, "learning_rate": 0.00019968086141796773, "loss": 1.7602, "step": 54872 }, { "epoch": 1.83, "grad_norm": 0.5920554399490356, "learning_rate": 0.000199671011067114, "loss": 1.7725, "step": 54873 }, { "epoch": 1.83, "grad_norm": 0.585443913936615, "learning_rate": 0.0001996611608380447, "loss": 1.6498, "step": 54874 }, { "epoch": 1.83, "grad_norm": 0.5555479526519775, "learning_rate": 0.00019965131073077202, "loss": 1.741, "step": 54875 }, { "epoch": 1.83, "grad_norm": 0.5816009640693665, "learning_rate": 0.00019964146074530795, "loss": 1.6888, "step": 54876 }, { "epoch": 1.83, "grad_norm": 0.5691388249397278, "learning_rate": 0.00019963161088166426, "loss": 1.7816, "step": 54877 }, { "epoch": 1.83, "grad_norm": 0.5826178789138794, "learning_rate": 0.00019962176113985305, "loss": 1.7744, "step": 54878 }, { "epoch": 1.83, "grad_norm": 0.5776297450065613, "learning_rate": 0.00019961191151988617, "loss": 1.7802, "step": 54879 }, { "epoch": 1.83, "grad_norm": 0.5709717869758606, "learning_rate": 0.00019960206202177577, "loss": 1.7152, "step": 54880 }, { "epoch": 1.83, "grad_norm": 0.5620348453521729, "learning_rate": 0.00019959221264553352, "loss": 1.7426, "step": 54881 }, { "epoch": 1.83, "grad_norm": 0.576504647731781, "learning_rate": 0.0001995823633911716, "loss": 1.7466, "step": 54882 }, { "epoch": 1.83, "grad_norm": 0.5554741621017456, "learning_rate": 0.00019957251425870197, "loss": 1.7034, "step": 54883 }, { "epoch": 1.83, "grad_norm": 0.5802398324012756, "learning_rate": 0.0001995626652481364, "loss": 1.7052, "step": 54884 }, { "epoch": 1.83, "grad_norm": 0.576618492603302, "learning_rate": 0.00019955281635948698, "loss": 1.7355, "step": 54885 }, { "epoch": 1.83, "grad_norm": 0.5674839019775391, "learning_rate": 0.0001995429675927657, "loss": 1.7279, "step": 54886 }, { "epoch": 1.83, "grad_norm": 0.5690516829490662, "learning_rate": 0.0001995331189479844, "loss": 1.7356, "step": 54887 }, { "epoch": 1.83, "grad_norm": 0.5674517154693604, "learning_rate": 0.00019952327042515501, "loss": 1.7844, "step": 54888 }, { "epoch": 1.83, "grad_norm": 0.5755521655082703, "learning_rate": 0.00019951342202428966, "loss": 1.6794, "step": 54889 }, { "epoch": 1.83, "grad_norm": 0.5734595656394958, "learning_rate": 0.00019950357374540027, "loss": 1.7983, "step": 54890 }, { "epoch": 1.83, "grad_norm": 0.5793695449829102, "learning_rate": 0.00019949372558849862, "loss": 1.7511, "step": 54891 }, { "epoch": 1.83, "grad_norm": 0.5786218047142029, "learning_rate": 0.00019948387755359687, "loss": 1.7676, "step": 54892 }, { "epoch": 1.83, "grad_norm": 0.578875720500946, "learning_rate": 0.00019947402964070688, "loss": 1.7923, "step": 54893 }, { "epoch": 1.83, "grad_norm": 0.5751141309738159, "learning_rate": 0.00019946418184984055, "loss": 1.8694, "step": 54894 }, { "epoch": 1.83, "grad_norm": 0.5565658807754517, "learning_rate": 0.00019945433418100983, "loss": 1.7429, "step": 54895 }, { "epoch": 1.83, "grad_norm": 0.5644168853759766, "learning_rate": 0.00019944448663422693, "loss": 1.754, "step": 54896 }, { "epoch": 1.83, "grad_norm": 0.6061076521873474, "learning_rate": 0.00019943463920950343, "loss": 1.7473, "step": 54897 }, { "epoch": 1.83, "grad_norm": 0.5526411533355713, "learning_rate": 0.00019942479190685145, "loss": 1.7213, "step": 54898 }, { "epoch": 1.83, "grad_norm": 0.5857957601547241, "learning_rate": 0.00019941494472628304, "loss": 1.6683, "step": 54899 }, { "epoch": 1.83, "grad_norm": 0.5790213346481323, "learning_rate": 0.00019940509766781008, "loss": 1.7232, "step": 54900 }, { "epoch": 1.83, "grad_norm": 0.580475926399231, "learning_rate": 0.00019939525073144447, "loss": 1.663, "step": 54901 }, { "epoch": 1.83, "grad_norm": 0.575859546661377, "learning_rate": 0.00019938540391719814, "loss": 1.6908, "step": 54902 }, { "epoch": 1.83, "grad_norm": 0.5606285929679871, "learning_rate": 0.0001993755572250833, "loss": 1.7271, "step": 54903 }, { "epoch": 1.83, "grad_norm": 0.5638282299041748, "learning_rate": 0.0001993657106551115, "loss": 1.7483, "step": 54904 }, { "epoch": 1.83, "grad_norm": 0.5693512558937073, "learning_rate": 0.00019935586420729496, "loss": 1.7184, "step": 54905 }, { "epoch": 1.83, "grad_norm": 0.5925362706184387, "learning_rate": 0.00019934601788164564, "loss": 1.7969, "step": 54906 }, { "epoch": 1.83, "grad_norm": 0.5940829515457153, "learning_rate": 0.00019933617167817538, "loss": 1.8291, "step": 54907 }, { "epoch": 1.83, "grad_norm": 0.5801048874855042, "learning_rate": 0.00019932632559689617, "loss": 1.7177, "step": 54908 }, { "epoch": 1.83, "grad_norm": 0.5532715320587158, "learning_rate": 0.00019931647963781989, "loss": 1.6791, "step": 54909 }, { "epoch": 1.83, "grad_norm": 0.5786250829696655, "learning_rate": 0.00019930663380095877, "loss": 1.7554, "step": 54910 }, { "epoch": 1.83, "grad_norm": 0.5901823043823242, "learning_rate": 0.00019929678808632435, "loss": 1.7328, "step": 54911 }, { "epoch": 1.83, "grad_norm": 0.5786012411117554, "learning_rate": 0.00019928694249392886, "loss": 1.7865, "step": 54912 }, { "epoch": 1.83, "grad_norm": 0.5588480234146118, "learning_rate": 0.00019927709702378428, "loss": 1.7161, "step": 54913 }, { "epoch": 1.83, "grad_norm": 0.5714145302772522, "learning_rate": 0.00019926725167590236, "loss": 1.7388, "step": 54914 }, { "epoch": 1.83, "grad_norm": 0.5879669189453125, "learning_rate": 0.00019925740645029515, "loss": 1.7498, "step": 54915 }, { "epoch": 1.83, "grad_norm": 0.5808685421943665, "learning_rate": 0.0001992475613469747, "loss": 1.7866, "step": 54916 }, { "epoch": 1.83, "grad_norm": 0.5777229070663452, "learning_rate": 0.00019923771636595275, "loss": 1.7093, "step": 54917 }, { "epoch": 1.83, "grad_norm": 0.5780338048934937, "learning_rate": 0.00019922787150724138, "loss": 1.7952, "step": 54918 }, { "epoch": 1.83, "grad_norm": 0.5528842210769653, "learning_rate": 0.00019921802677085255, "loss": 1.5848, "step": 54919 }, { "epoch": 1.83, "grad_norm": 0.5661815404891968, "learning_rate": 0.00019920818215679825, "loss": 1.6778, "step": 54920 }, { "epoch": 1.83, "grad_norm": 0.568035364151001, "learning_rate": 0.0001991983376650903, "loss": 1.681, "step": 54921 }, { "epoch": 1.83, "grad_norm": 0.5712311863899231, "learning_rate": 0.0001991884932957407, "loss": 1.7796, "step": 54922 }, { "epoch": 1.83, "grad_norm": 0.5777641534805298, "learning_rate": 0.00019917864904876148, "loss": 1.7792, "step": 54923 }, { "epoch": 1.83, "grad_norm": 0.560662567615509, "learning_rate": 0.00019916880492416443, "loss": 1.6894, "step": 54924 }, { "epoch": 1.83, "grad_norm": 0.5818105340003967, "learning_rate": 0.0001991589609219616, "loss": 1.7621, "step": 54925 }, { "epoch": 1.83, "grad_norm": 0.6005656123161316, "learning_rate": 0.00019914911704216508, "loss": 1.7014, "step": 54926 }, { "epoch": 1.83, "grad_norm": 0.597977876663208, "learning_rate": 0.00019913927328478657, "loss": 1.7424, "step": 54927 }, { "epoch": 1.83, "grad_norm": 0.5764068961143494, "learning_rate": 0.00019912942964983812, "loss": 1.6752, "step": 54928 }, { "epoch": 1.83, "grad_norm": 0.5993903279304504, "learning_rate": 0.00019911958613733167, "loss": 1.7766, "step": 54929 }, { "epoch": 1.83, "grad_norm": 0.5783799886703491, "learning_rate": 0.00019910974274727926, "loss": 1.6819, "step": 54930 }, { "epoch": 1.83, "grad_norm": 0.5722478628158569, "learning_rate": 0.00019909989947969263, "loss": 1.7171, "step": 54931 }, { "epoch": 1.83, "grad_norm": 0.5923408269882202, "learning_rate": 0.0001990900563345839, "loss": 1.8495, "step": 54932 }, { "epoch": 1.83, "grad_norm": 0.5652879476547241, "learning_rate": 0.00019908021331196504, "loss": 1.7633, "step": 54933 }, { "epoch": 1.83, "grad_norm": 0.5627319812774658, "learning_rate": 0.00019907037041184784, "loss": 1.7002, "step": 54934 }, { "epoch": 1.83, "grad_norm": 0.5790238380432129, "learning_rate": 0.0001990605276342444, "loss": 1.7474, "step": 54935 }, { "epoch": 1.83, "grad_norm": 0.5598799586296082, "learning_rate": 0.00019905068497916664, "loss": 1.7875, "step": 54936 }, { "epoch": 1.83, "grad_norm": 0.5666407346725464, "learning_rate": 0.0001990408424466264, "loss": 1.7052, "step": 54937 }, { "epoch": 1.83, "grad_norm": 0.5698826313018799, "learning_rate": 0.00019903100003663564, "loss": 1.7417, "step": 54938 }, { "epoch": 1.83, "grad_norm": 0.585037887096405, "learning_rate": 0.00019902115774920646, "loss": 1.7538, "step": 54939 }, { "epoch": 1.83, "grad_norm": 0.615568995475769, "learning_rate": 0.00019901131558435078, "loss": 1.7788, "step": 54940 }, { "epoch": 1.83, "grad_norm": 0.5681618452072144, "learning_rate": 0.00019900147354208036, "loss": 1.6714, "step": 54941 }, { "epoch": 1.83, "grad_norm": 0.580491840839386, "learning_rate": 0.0001989916316224073, "loss": 1.7278, "step": 54942 }, { "epoch": 1.83, "grad_norm": 0.5904351472854614, "learning_rate": 0.00019898178982534362, "loss": 1.7005, "step": 54943 }, { "epoch": 1.83, "grad_norm": 0.5665604472160339, "learning_rate": 0.00019897194815090104, "loss": 1.7806, "step": 54944 }, { "epoch": 1.83, "grad_norm": 0.5747690200805664, "learning_rate": 0.00019896210659909162, "loss": 1.7387, "step": 54945 }, { "epoch": 1.83, "grad_norm": 0.5757504105567932, "learning_rate": 0.00019895226516992748, "loss": 1.6732, "step": 54946 }, { "epoch": 1.83, "grad_norm": 0.642390251159668, "learning_rate": 0.00019894242386342024, "loss": 1.8199, "step": 54947 }, { "epoch": 1.83, "grad_norm": 0.5787594318389893, "learning_rate": 0.00019893258267958202, "loss": 1.754, "step": 54948 }, { "epoch": 1.83, "grad_norm": 0.5990543365478516, "learning_rate": 0.00019892274161842477, "loss": 1.7232, "step": 54949 }, { "epoch": 1.83, "grad_norm": 0.5853188037872314, "learning_rate": 0.00019891290067996047, "loss": 1.6766, "step": 54950 }, { "epoch": 1.83, "grad_norm": 0.5672383904457092, "learning_rate": 0.000198903059864201, "loss": 1.734, "step": 54951 }, { "epoch": 1.83, "grad_norm": 0.5846010446548462, "learning_rate": 0.00019889321917115823, "loss": 1.758, "step": 54952 }, { "epoch": 1.83, "grad_norm": 0.563847005367279, "learning_rate": 0.00019888337860084435, "loss": 1.681, "step": 54953 }, { "epoch": 1.83, "grad_norm": 0.5846042633056641, "learning_rate": 0.000198873538153271, "loss": 1.7952, "step": 54954 }, { "epoch": 1.83, "grad_norm": 0.572289764881134, "learning_rate": 0.0001988636978284503, "loss": 1.7741, "step": 54955 }, { "epoch": 1.83, "grad_norm": 0.560903787612915, "learning_rate": 0.00019885385762639427, "loss": 1.7459, "step": 54956 }, { "epoch": 1.83, "grad_norm": 0.5908294916152954, "learning_rate": 0.0001988440175471147, "loss": 1.746, "step": 54957 }, { "epoch": 1.83, "grad_norm": 0.5835570096969604, "learning_rate": 0.00019883417759062358, "loss": 1.7049, "step": 54958 }, { "epoch": 1.83, "grad_norm": 0.5892765522003174, "learning_rate": 0.00019882433775693278, "loss": 1.7964, "step": 54959 }, { "epoch": 1.83, "grad_norm": 0.5908539891242981, "learning_rate": 0.0001988144980460545, "loss": 1.6949, "step": 54960 }, { "epoch": 1.83, "grad_norm": 0.5696364641189575, "learning_rate": 0.00019880465845800035, "loss": 1.7034, "step": 54961 }, { "epoch": 1.83, "grad_norm": 0.5865800976753235, "learning_rate": 0.0001987948189927825, "loss": 1.6864, "step": 54962 }, { "epoch": 1.83, "grad_norm": 0.5616492629051208, "learning_rate": 0.0001987849796504129, "loss": 1.6888, "step": 54963 }, { "epoch": 1.83, "grad_norm": 0.5921059846878052, "learning_rate": 0.0001987751404309033, "loss": 1.6569, "step": 54964 }, { "epoch": 1.83, "grad_norm": 0.5910724401473999, "learning_rate": 0.0001987653013342658, "loss": 1.7432, "step": 54965 }, { "epoch": 1.83, "grad_norm": 0.6036646366119385, "learning_rate": 0.00019875546236051237, "loss": 1.7721, "step": 54966 }, { "epoch": 1.83, "grad_norm": 0.5869466066360474, "learning_rate": 0.00019874562350965489, "loss": 1.6785, "step": 54967 }, { "epoch": 1.83, "grad_norm": 0.5837821364402771, "learning_rate": 0.00019873578478170513, "loss": 1.7254, "step": 54968 }, { "epoch": 1.83, "grad_norm": 0.5692365169525146, "learning_rate": 0.00019872594617667536, "loss": 1.7491, "step": 54969 }, { "epoch": 1.83, "grad_norm": 0.5715200901031494, "learning_rate": 0.00019871610769457736, "loss": 1.7625, "step": 54970 }, { "epoch": 1.83, "grad_norm": 0.5680237412452698, "learning_rate": 0.0001987062693354231, "loss": 1.7114, "step": 54971 }, { "epoch": 1.83, "grad_norm": 0.5659439563751221, "learning_rate": 0.00019869643109922443, "loss": 1.6998, "step": 54972 }, { "epoch": 1.83, "grad_norm": 1.3997020721435547, "learning_rate": 0.00019868659298599345, "loss": 1.8069, "step": 54973 }, { "epoch": 1.83, "grad_norm": 0.6139088869094849, "learning_rate": 0.00019867675499574198, "loss": 1.78, "step": 54974 }, { "epoch": 1.83, "grad_norm": 0.5822421908378601, "learning_rate": 0.00019866691712848192, "loss": 1.7165, "step": 54975 }, { "epoch": 1.83, "grad_norm": 0.5850704908370972, "learning_rate": 0.0001986570793842255, "loss": 1.6657, "step": 54976 }, { "epoch": 1.83, "grad_norm": 0.5729446411132812, "learning_rate": 0.00019864724176298424, "loss": 1.7105, "step": 54977 }, { "epoch": 1.83, "grad_norm": 0.5727715492248535, "learning_rate": 0.00019863740426477036, "loss": 1.7397, "step": 54978 }, { "epoch": 1.83, "grad_norm": 0.5831745862960815, "learning_rate": 0.00019862756688959573, "loss": 1.8079, "step": 54979 }, { "epoch": 1.83, "grad_norm": 0.5863505601882935, "learning_rate": 0.00019861772963747237, "loss": 1.7543, "step": 54980 }, { "epoch": 1.83, "grad_norm": 0.5936755537986755, "learning_rate": 0.00019860789250841212, "loss": 1.6757, "step": 54981 }, { "epoch": 1.83, "grad_norm": 0.5608948469161987, "learning_rate": 0.00019859805550242684, "loss": 1.7024, "step": 54982 }, { "epoch": 1.83, "grad_norm": 0.5615212917327881, "learning_rate": 0.00019858821861952876, "loss": 1.6911, "step": 54983 }, { "epoch": 1.83, "grad_norm": 0.5913413166999817, "learning_rate": 0.00019857838185972947, "loss": 1.7511, "step": 54984 }, { "epoch": 1.83, "grad_norm": 0.5977703928947449, "learning_rate": 0.00019856854522304113, "loss": 1.7448, "step": 54985 }, { "epoch": 1.83, "grad_norm": 0.6003766059875488, "learning_rate": 0.00019855870870947565, "loss": 1.7297, "step": 54986 }, { "epoch": 1.83, "grad_norm": 0.5909087061882019, "learning_rate": 0.000198548872319045, "loss": 1.7169, "step": 54987 }, { "epoch": 1.83, "grad_norm": 0.5622125864028931, "learning_rate": 0.00019853903605176107, "loss": 1.6908, "step": 54988 }, { "epoch": 1.83, "grad_norm": 0.584369957447052, "learning_rate": 0.00019852919990763566, "loss": 1.688, "step": 54989 }, { "epoch": 1.83, "grad_norm": 0.5914549231529236, "learning_rate": 0.00019851936388668107, "loss": 1.7647, "step": 54990 }, { "epoch": 1.83, "grad_norm": 0.6113821268081665, "learning_rate": 0.0001985095279889088, "loss": 1.7421, "step": 54991 }, { "epoch": 1.83, "grad_norm": 0.5809487104415894, "learning_rate": 0.00019849969221433114, "loss": 1.7746, "step": 54992 }, { "epoch": 1.83, "grad_norm": 0.5623661875724792, "learning_rate": 0.00019848985656295993, "loss": 1.7543, "step": 54993 }, { "epoch": 1.83, "grad_norm": 0.8214673399925232, "learning_rate": 0.000198480021034807, "loss": 1.6513, "step": 54994 }, { "epoch": 1.83, "grad_norm": 0.5730289816856384, "learning_rate": 0.00019847018562988436, "loss": 1.6808, "step": 54995 }, { "epoch": 1.83, "grad_norm": 0.5900152921676636, "learning_rate": 0.0001984603503482039, "loss": 1.7421, "step": 54996 }, { "epoch": 1.83, "grad_norm": 0.5878956317901611, "learning_rate": 0.00019845051518977783, "loss": 1.7719, "step": 54997 }, { "epoch": 1.83, "grad_norm": 0.557396650314331, "learning_rate": 0.0001984406801546176, "loss": 1.7353, "step": 54998 }, { "epoch": 1.83, "grad_norm": 0.57618248462677, "learning_rate": 0.0001984308452427356, "loss": 1.7695, "step": 54999 }, { "epoch": 1.83, "grad_norm": 0.5618385672569275, "learning_rate": 0.00019842101045414355, "loss": 1.6841, "step": 55000 }, { "epoch": 1.83, "grad_norm": 0.5708776116371155, "learning_rate": 0.0001984111757888534, "loss": 1.7348, "step": 55001 }, { "epoch": 1.83, "grad_norm": 0.566202700138092, "learning_rate": 0.00019840134124687715, "loss": 1.7495, "step": 55002 }, { "epoch": 1.83, "grad_norm": 0.5916502475738525, "learning_rate": 0.0001983915068282267, "loss": 1.6846, "step": 55003 }, { "epoch": 1.83, "grad_norm": 0.5729557871818542, "learning_rate": 0.00019838167253291399, "loss": 1.7062, "step": 55004 }, { "epoch": 1.83, "grad_norm": 0.5839558243751526, "learning_rate": 0.00019837183836095084, "loss": 1.7781, "step": 55005 }, { "epoch": 1.83, "grad_norm": 0.5916597843170166, "learning_rate": 0.0001983620043123494, "loss": 1.7108, "step": 55006 }, { "epoch": 1.83, "grad_norm": 0.5574559569358826, "learning_rate": 0.00019835217038712157, "loss": 1.7839, "step": 55007 }, { "epoch": 1.83, "grad_norm": 0.5940548181533813, "learning_rate": 0.00019834233658527919, "loss": 1.7137, "step": 55008 }, { "epoch": 1.83, "grad_norm": 0.5868220925331116, "learning_rate": 0.0001983325029068342, "loss": 1.7363, "step": 55009 }, { "epoch": 1.83, "grad_norm": 0.5836670994758606, "learning_rate": 0.00019832266935179861, "loss": 1.6979, "step": 55010 }, { "epoch": 1.83, "grad_norm": 0.5739492774009705, "learning_rate": 0.00019831283592018428, "loss": 1.7497, "step": 55011 }, { "epoch": 1.83, "grad_norm": 0.5632876753807068, "learning_rate": 0.00019830300261200316, "loss": 1.7878, "step": 55012 }, { "epoch": 1.83, "grad_norm": 0.5982877612113953, "learning_rate": 0.00019829316942726738, "loss": 1.7211, "step": 55013 }, { "epoch": 1.83, "grad_norm": 0.5922422409057617, "learning_rate": 0.00019828333636598847, "loss": 1.8008, "step": 55014 }, { "epoch": 1.83, "grad_norm": 0.5703334808349609, "learning_rate": 0.0001982735034281787, "loss": 1.7524, "step": 55015 }, { "epoch": 1.83, "grad_norm": 0.5729442834854126, "learning_rate": 0.0001982636706138499, "loss": 1.7259, "step": 55016 }, { "epoch": 1.83, "grad_norm": 0.5957878828048706, "learning_rate": 0.00019825383792301408, "loss": 1.7469, "step": 55017 }, { "epoch": 1.83, "grad_norm": 0.5901333689689636, "learning_rate": 0.00019824400535568305, "loss": 1.6426, "step": 55018 }, { "epoch": 1.83, "grad_norm": 0.5743987560272217, "learning_rate": 0.00019823417291186876, "loss": 1.6729, "step": 55019 }, { "epoch": 1.83, "grad_norm": 0.5641874074935913, "learning_rate": 0.00019822434059158336, "loss": 1.7582, "step": 55020 }, { "epoch": 1.83, "grad_norm": 0.5621989965438843, "learning_rate": 0.00019821450839483843, "loss": 1.7445, "step": 55021 }, { "epoch": 1.83, "grad_norm": 0.5619373321533203, "learning_rate": 0.00019820467632164612, "loss": 1.6941, "step": 55022 }, { "epoch": 1.83, "grad_norm": 0.5730809569358826, "learning_rate": 0.00019819484437201844, "loss": 1.7595, "step": 55023 }, { "epoch": 1.83, "grad_norm": 0.5826541185379028, "learning_rate": 0.00019818501254596714, "loss": 1.6709, "step": 55024 }, { "epoch": 1.83, "grad_norm": 0.5671914219856262, "learning_rate": 0.0001981751808435042, "loss": 1.7441, "step": 55025 }, { "epoch": 1.83, "grad_norm": 0.5643296241760254, "learning_rate": 0.00019816534926464158, "loss": 1.7323, "step": 55026 }, { "epoch": 1.83, "grad_norm": 0.575829267501831, "learning_rate": 0.0001981555178093914, "loss": 1.6763, "step": 55027 }, { "epoch": 1.83, "grad_norm": 0.5725725889205933, "learning_rate": 0.0001981456864777652, "loss": 1.7567, "step": 55028 }, { "epoch": 1.83, "grad_norm": 0.5720559358596802, "learning_rate": 0.00019813585526977522, "loss": 1.7532, "step": 55029 }, { "epoch": 1.83, "grad_norm": 0.571274995803833, "learning_rate": 0.00019812602418543338, "loss": 1.6661, "step": 55030 }, { "epoch": 1.83, "grad_norm": 0.5796090364456177, "learning_rate": 0.0001981161932247514, "loss": 1.6437, "step": 55031 }, { "epoch": 1.83, "grad_norm": 0.5729971528053284, "learning_rate": 0.0001981063623877414, "loss": 1.7293, "step": 55032 }, { "epoch": 1.83, "grad_norm": 0.5662601590156555, "learning_rate": 0.00019809653167441536, "loss": 1.7338, "step": 55033 }, { "epoch": 1.83, "grad_norm": 0.591209352016449, "learning_rate": 0.000198086701084785, "loss": 1.7347, "step": 55034 }, { "epoch": 1.83, "grad_norm": 0.5940573215484619, "learning_rate": 0.00019807687061886231, "loss": 1.7689, "step": 55035 }, { "epoch": 1.83, "grad_norm": 0.5746362805366516, "learning_rate": 0.00019806704027665933, "loss": 1.7519, "step": 55036 }, { "epoch": 1.83, "grad_norm": 0.5960155725479126, "learning_rate": 0.00019805721005818804, "loss": 1.7478, "step": 55037 }, { "epoch": 1.83, "grad_norm": 0.5823906064033508, "learning_rate": 0.00019804737996346024, "loss": 1.6954, "step": 55038 }, { "epoch": 1.83, "grad_norm": 0.6031933426856995, "learning_rate": 0.00019803754999248783, "loss": 1.7465, "step": 55039 }, { "epoch": 1.83, "grad_norm": 0.5939745306968689, "learning_rate": 0.0001980277201452829, "loss": 1.692, "step": 55040 }, { "epoch": 1.83, "grad_norm": 0.5935380458831787, "learning_rate": 0.0001980178904218572, "loss": 1.758, "step": 55041 }, { "epoch": 1.83, "grad_norm": 0.5895292162895203, "learning_rate": 0.0001980080608222228, "loss": 1.7775, "step": 55042 }, { "epoch": 1.83, "grad_norm": 0.5912284851074219, "learning_rate": 0.00019799823134639163, "loss": 1.6977, "step": 55043 }, { "epoch": 1.83, "grad_norm": 0.5870230793952942, "learning_rate": 0.00019798840199437555, "loss": 1.7362, "step": 55044 }, { "epoch": 1.83, "grad_norm": 0.5881434082984924, "learning_rate": 0.00019797857276618647, "loss": 1.7962, "step": 55045 }, { "epoch": 1.83, "grad_norm": 0.5574958324432373, "learning_rate": 0.0001979687436618364, "loss": 1.7005, "step": 55046 }, { "epoch": 1.83, "grad_norm": 0.5957662463188171, "learning_rate": 0.0001979589146813373, "loss": 1.6977, "step": 55047 }, { "epoch": 1.83, "grad_norm": 0.5989259481430054, "learning_rate": 0.00019794908582470091, "loss": 1.7207, "step": 55048 }, { "epoch": 1.83, "grad_norm": 0.5743497014045715, "learning_rate": 0.00019793925709193937, "loss": 1.7217, "step": 55049 }, { "epoch": 1.83, "grad_norm": 0.5622519254684448, "learning_rate": 0.00019792942848306458, "loss": 1.7952, "step": 55050 }, { "epoch": 1.83, "grad_norm": 0.5670391917228699, "learning_rate": 0.00019791959999808836, "loss": 1.7604, "step": 55051 }, { "epoch": 1.83, "grad_norm": 0.5775157809257507, "learning_rate": 0.00019790977163702272, "loss": 1.7219, "step": 55052 }, { "epoch": 1.83, "grad_norm": 0.5647251605987549, "learning_rate": 0.00019789994339987964, "loss": 1.7371, "step": 55053 }, { "epoch": 1.83, "grad_norm": 0.6005327701568604, "learning_rate": 0.0001978901152866709, "loss": 1.7943, "step": 55054 }, { "epoch": 1.83, "grad_norm": 0.5706977248191833, "learning_rate": 0.0001978802872974085, "loss": 1.6958, "step": 55055 }, { "epoch": 1.83, "grad_norm": 0.5646889805793762, "learning_rate": 0.0001978704594321044, "loss": 1.7811, "step": 55056 }, { "epoch": 1.83, "grad_norm": 0.5969448685646057, "learning_rate": 0.0001978606316907706, "loss": 1.8409, "step": 55057 }, { "epoch": 1.83, "grad_norm": 0.6147668957710266, "learning_rate": 0.00019785080407341886, "loss": 1.6977, "step": 55058 }, { "epoch": 1.83, "grad_norm": 0.5868041515350342, "learning_rate": 0.0001978409765800612, "loss": 1.7576, "step": 55059 }, { "epoch": 1.83, "grad_norm": 0.6172440052032471, "learning_rate": 0.00019783114921070964, "loss": 1.7675, "step": 55060 }, { "epoch": 1.83, "grad_norm": 0.5795437693595886, "learning_rate": 0.00019782132196537587, "loss": 1.7874, "step": 55061 }, { "epoch": 1.83, "grad_norm": 0.5719722509384155, "learning_rate": 0.00019781149484407196, "loss": 1.7576, "step": 55062 }, { "epoch": 1.83, "grad_norm": 0.5854763984680176, "learning_rate": 0.00019780166784681005, "loss": 1.7738, "step": 55063 }, { "epoch": 1.83, "grad_norm": 0.6260597109794617, "learning_rate": 0.0001977918409736016, "loss": 1.7817, "step": 55064 }, { "epoch": 1.83, "grad_norm": 0.5916807651519775, "learning_rate": 0.00019778201422445888, "loss": 1.7167, "step": 55065 }, { "epoch": 1.83, "grad_norm": 0.5819199085235596, "learning_rate": 0.00019777218759939376, "loss": 1.7905, "step": 55066 }, { "epoch": 1.83, "grad_norm": 0.5533696413040161, "learning_rate": 0.00019776236109841816, "loss": 1.7106, "step": 55067 }, { "epoch": 1.83, "grad_norm": 0.5979459881782532, "learning_rate": 0.00019775253472154397, "loss": 1.7501, "step": 55068 }, { "epoch": 1.83, "grad_norm": 0.5845086574554443, "learning_rate": 0.00019774270846878306, "loss": 1.7687, "step": 55069 }, { "epoch": 1.83, "grad_norm": 0.6159976720809937, "learning_rate": 0.0001977328823401476, "loss": 1.6953, "step": 55070 }, { "epoch": 1.83, "grad_norm": 0.579690158367157, "learning_rate": 0.00019772305633564922, "loss": 1.7123, "step": 55071 }, { "epoch": 1.83, "grad_norm": 0.5734978914260864, "learning_rate": 0.0001977132304553, "loss": 1.7405, "step": 55072 }, { "epoch": 1.83, "grad_norm": 0.5777733325958252, "learning_rate": 0.00019770340469911192, "loss": 1.7383, "step": 55073 }, { "epoch": 1.83, "grad_norm": 0.5991165041923523, "learning_rate": 0.00019769357906709674, "loss": 1.7241, "step": 55074 }, { "epoch": 1.83, "grad_norm": 0.5844598412513733, "learning_rate": 0.00019768375355926654, "loss": 1.7185, "step": 55075 }, { "epoch": 1.83, "grad_norm": 0.5778494477272034, "learning_rate": 0.0001976739281756331, "loss": 1.653, "step": 55076 }, { "epoch": 1.83, "grad_norm": 0.558252215385437, "learning_rate": 0.00019766410291620863, "loss": 1.7501, "step": 55077 }, { "epoch": 1.83, "grad_norm": 0.5796612501144409, "learning_rate": 0.00019765427778100465, "loss": 1.7488, "step": 55078 }, { "epoch": 1.83, "grad_norm": 0.6010959148406982, "learning_rate": 0.00019764445277003335, "loss": 1.77, "step": 55079 }, { "epoch": 1.83, "grad_norm": 0.577286422252655, "learning_rate": 0.0001976346278833067, "loss": 1.748, "step": 55080 }, { "epoch": 1.83, "grad_norm": 0.5748291015625, "learning_rate": 0.00019762480312083647, "loss": 1.747, "step": 55081 }, { "epoch": 1.83, "grad_norm": 0.5649480223655701, "learning_rate": 0.0001976149784826346, "loss": 1.743, "step": 55082 }, { "epoch": 1.83, "grad_norm": 0.5633136630058289, "learning_rate": 0.00019760515396871313, "loss": 1.7411, "step": 55083 }, { "epoch": 1.83, "grad_norm": 0.5805373787879944, "learning_rate": 0.00019759532957908392, "loss": 1.7209, "step": 55084 }, { "epoch": 1.83, "grad_norm": 0.5806406736373901, "learning_rate": 0.00019758550531375879, "loss": 1.7176, "step": 55085 }, { "epoch": 1.83, "grad_norm": 0.5741958022117615, "learning_rate": 0.00019757568117274982, "loss": 1.7258, "step": 55086 }, { "epoch": 1.83, "grad_norm": 0.7084815502166748, "learning_rate": 0.00019756585715606894, "loss": 1.8101, "step": 55087 }, { "epoch": 1.83, "grad_norm": 0.5798469185829163, "learning_rate": 0.00019755603326372794, "loss": 1.715, "step": 55088 }, { "epoch": 1.83, "grad_norm": 0.6038373708724976, "learning_rate": 0.00019754620949573887, "loss": 1.7938, "step": 55089 }, { "epoch": 1.83, "grad_norm": 0.5615257024765015, "learning_rate": 0.0001975363858521136, "loss": 1.8028, "step": 55090 }, { "epoch": 1.83, "grad_norm": 0.5413444638252258, "learning_rate": 0.00019752656233286406, "loss": 1.7212, "step": 55091 }, { "epoch": 1.83, "grad_norm": 0.5659453272819519, "learning_rate": 0.0001975167389380021, "loss": 1.801, "step": 55092 }, { "epoch": 1.83, "grad_norm": 0.5524485111236572, "learning_rate": 0.00019750691566753988, "loss": 1.7279, "step": 55093 }, { "epoch": 1.83, "grad_norm": 0.5828251242637634, "learning_rate": 0.000197497092521489, "loss": 1.7187, "step": 55094 }, { "epoch": 1.83, "grad_norm": 0.5595085024833679, "learning_rate": 0.0001974872694998616, "loss": 1.7609, "step": 55095 }, { "epoch": 1.83, "grad_norm": 0.5651674866676331, "learning_rate": 0.0001974774466026695, "loss": 1.7829, "step": 55096 }, { "epoch": 1.83, "grad_norm": 0.5572850704193115, "learning_rate": 0.0001974676238299248, "loss": 1.6968, "step": 55097 }, { "epoch": 1.83, "grad_norm": 0.5553366541862488, "learning_rate": 0.00019745780118163924, "loss": 1.66, "step": 55098 }, { "epoch": 1.83, "grad_norm": 0.5887985825538635, "learning_rate": 0.0001974479786578247, "loss": 1.7078, "step": 55099 }, { "epoch": 1.83, "grad_norm": 0.580429196357727, "learning_rate": 0.00019743815625849342, "loss": 1.7727, "step": 55100 }, { "epoch": 1.83, "grad_norm": 0.5847392678260803, "learning_rate": 0.00019742833398365693, "loss": 1.7537, "step": 55101 }, { "epoch": 1.83, "grad_norm": 0.5668267011642456, "learning_rate": 0.00019741851183332733, "loss": 1.7241, "step": 55102 }, { "epoch": 1.83, "grad_norm": 0.5541937947273254, "learning_rate": 0.00019740868980751657, "loss": 1.7236, "step": 55103 }, { "epoch": 1.83, "grad_norm": 0.5798805952072144, "learning_rate": 0.00019739886790623665, "loss": 1.7616, "step": 55104 }, { "epoch": 1.83, "grad_norm": 0.5819372534751892, "learning_rate": 0.0001973890461294993, "loss": 1.7413, "step": 55105 }, { "epoch": 1.83, "grad_norm": 0.5755464434623718, "learning_rate": 0.00019737922447731645, "loss": 1.7079, "step": 55106 }, { "epoch": 1.83, "grad_norm": 0.5693857073783875, "learning_rate": 0.0001973694029497003, "loss": 1.6156, "step": 55107 }, { "epoch": 1.83, "grad_norm": 0.5589621663093567, "learning_rate": 0.0001973595815466624, "loss": 1.7078, "step": 55108 }, { "epoch": 1.83, "grad_norm": 0.5688289999961853, "learning_rate": 0.00019734976026821488, "loss": 1.8305, "step": 55109 }, { "epoch": 1.83, "grad_norm": 0.5708233118057251, "learning_rate": 0.0001973399391143697, "loss": 1.755, "step": 55110 }, { "epoch": 1.83, "grad_norm": 0.5404011011123657, "learning_rate": 0.00019733011808513863, "loss": 1.6849, "step": 55111 }, { "epoch": 1.83, "grad_norm": 0.5754523873329163, "learning_rate": 0.00019732029718053367, "loss": 1.7068, "step": 55112 }, { "epoch": 1.83, "grad_norm": 0.5468531847000122, "learning_rate": 0.00019731047640056672, "loss": 1.6508, "step": 55113 }, { "epoch": 1.83, "grad_norm": 0.5620028972625732, "learning_rate": 0.00019730065574524987, "loss": 1.603, "step": 55114 }, { "epoch": 1.83, "grad_norm": 0.564271092414856, "learning_rate": 0.00019729083521459473, "loss": 1.6712, "step": 55115 }, { "epoch": 1.83, "grad_norm": 0.5771214365959167, "learning_rate": 0.00019728101480861345, "loss": 1.7436, "step": 55116 }, { "epoch": 1.83, "grad_norm": 0.5739903450012207, "learning_rate": 0.0001972711945273179, "loss": 1.8039, "step": 55117 }, { "epoch": 1.83, "grad_norm": 0.597339928150177, "learning_rate": 0.00019726137437071992, "loss": 1.7766, "step": 55118 }, { "epoch": 1.83, "grad_norm": 0.5794937014579773, "learning_rate": 0.00019725155433883155, "loss": 1.746, "step": 55119 }, { "epoch": 1.83, "grad_norm": 0.5680918097496033, "learning_rate": 0.00019724173443166467, "loss": 1.7552, "step": 55120 }, { "epoch": 1.83, "grad_norm": 0.5744741559028625, "learning_rate": 0.00019723191464923113, "loss": 1.68, "step": 55121 }, { "epoch": 1.83, "grad_norm": 0.5614400506019592, "learning_rate": 0.0001972220949915429, "loss": 1.787, "step": 55122 }, { "epoch": 1.83, "grad_norm": 0.5707258582115173, "learning_rate": 0.0001972122754586119, "loss": 1.7854, "step": 55123 }, { "epoch": 1.83, "grad_norm": 0.5685776472091675, "learning_rate": 0.00019720245605045014, "loss": 1.743, "step": 55124 }, { "epoch": 1.83, "grad_norm": 0.5664641857147217, "learning_rate": 0.00019719263676706934, "loss": 1.6969, "step": 55125 }, { "epoch": 1.83, "grad_norm": 0.5866970419883728, "learning_rate": 0.00019718281760848158, "loss": 1.738, "step": 55126 }, { "epoch": 1.83, "grad_norm": 0.5793617963790894, "learning_rate": 0.0001971729985746988, "loss": 1.6953, "step": 55127 }, { "epoch": 1.83, "grad_norm": 0.5714825391769409, "learning_rate": 0.00019716317966573277, "loss": 1.7489, "step": 55128 }, { "epoch": 1.83, "grad_norm": 0.5762069821357727, "learning_rate": 0.0001971533608815954, "loss": 1.7748, "step": 55129 }, { "epoch": 1.83, "grad_norm": 0.5670064091682434, "learning_rate": 0.0001971435422222989, "loss": 1.7859, "step": 55130 }, { "epoch": 1.83, "grad_norm": 0.585809051990509, "learning_rate": 0.0001971337236878548, "loss": 1.8462, "step": 55131 }, { "epoch": 1.83, "grad_norm": 0.5824283957481384, "learning_rate": 0.00019712390527827528, "loss": 1.7244, "step": 55132 }, { "epoch": 1.83, "grad_norm": 0.5662003755569458, "learning_rate": 0.00019711408699357217, "loss": 1.7955, "step": 55133 }, { "epoch": 1.83, "grad_norm": 0.5696725845336914, "learning_rate": 0.00019710426883375748, "loss": 1.7193, "step": 55134 }, { "epoch": 1.83, "grad_norm": 0.5990938544273376, "learning_rate": 0.00019709445079884295, "loss": 1.7329, "step": 55135 }, { "epoch": 1.83, "grad_norm": 0.577796220779419, "learning_rate": 0.00019708463288884055, "loss": 1.7501, "step": 55136 }, { "epoch": 1.83, "grad_norm": 0.5941814184188843, "learning_rate": 0.00019707481510376244, "loss": 1.6548, "step": 55137 }, { "epoch": 1.83, "grad_norm": 0.5738027095794678, "learning_rate": 0.00019706499744362013, "loss": 1.8149, "step": 55138 }, { "epoch": 1.83, "grad_norm": 0.5656404495239258, "learning_rate": 0.0001970551799084258, "loss": 1.6961, "step": 55139 }, { "epoch": 1.83, "grad_norm": 0.557961642742157, "learning_rate": 0.0001970453624981914, "loss": 1.7528, "step": 55140 }, { "epoch": 1.83, "grad_norm": 0.5744898319244385, "learning_rate": 0.0001970355452129287, "loss": 1.7523, "step": 55141 }, { "epoch": 1.83, "grad_norm": 0.5601824522018433, "learning_rate": 0.00019702572805264967, "loss": 1.7156, "step": 55142 }, { "epoch": 1.83, "grad_norm": 0.5553699135780334, "learning_rate": 0.00019701591101736616, "loss": 1.7061, "step": 55143 }, { "epoch": 1.83, "grad_norm": 0.5812720656394958, "learning_rate": 0.00019700609410709037, "loss": 1.799, "step": 55144 }, { "epoch": 1.83, "grad_norm": 0.5577093362808228, "learning_rate": 0.0001969962773218338, "loss": 1.6852, "step": 55145 }, { "epoch": 1.83, "grad_norm": 0.5832073092460632, "learning_rate": 0.00019698646066160865, "loss": 1.7308, "step": 55146 }, { "epoch": 1.83, "grad_norm": 0.5616320967674255, "learning_rate": 0.00019697664412642678, "loss": 1.7331, "step": 55147 }, { "epoch": 1.83, "grad_norm": 0.5732324123382568, "learning_rate": 0.0001969668277163001, "loss": 1.6528, "step": 55148 }, { "epoch": 1.83, "grad_norm": 0.5820398926734924, "learning_rate": 0.0001969570114312404, "loss": 1.71, "step": 55149 }, { "epoch": 1.83, "grad_norm": 0.5704272985458374, "learning_rate": 0.00019694719527125985, "loss": 1.6767, "step": 55150 }, { "epoch": 1.83, "grad_norm": 0.5937227606773376, "learning_rate": 0.00019693737923637017, "loss": 1.779, "step": 55151 }, { "epoch": 1.83, "grad_norm": 0.6175925135612488, "learning_rate": 0.0001969275633265832, "loss": 1.763, "step": 55152 }, { "epoch": 1.83, "grad_norm": 0.5757188200950623, "learning_rate": 0.0001969177475419111, "loss": 1.7132, "step": 55153 }, { "epoch": 1.83, "grad_norm": 0.5763733983039856, "learning_rate": 0.00019690793188236567, "loss": 1.7269, "step": 55154 }, { "epoch": 1.84, "grad_norm": 0.575332522392273, "learning_rate": 0.0001968981163479588, "loss": 1.7427, "step": 55155 }, { "epoch": 1.84, "grad_norm": 0.5973445773124695, "learning_rate": 0.00019688830093870241, "loss": 1.7368, "step": 55156 }, { "epoch": 1.84, "grad_norm": 0.5984387993812561, "learning_rate": 0.00019687848565460854, "loss": 1.6801, "step": 55157 }, { "epoch": 1.84, "grad_norm": 0.598688006401062, "learning_rate": 0.0001968686704956888, "loss": 1.6704, "step": 55158 }, { "epoch": 1.84, "grad_norm": 0.5633633136749268, "learning_rate": 0.0001968588554619554, "loss": 1.6896, "step": 55159 }, { "epoch": 1.84, "grad_norm": 0.5884813070297241, "learning_rate": 0.0001968490405534202, "loss": 1.7453, "step": 55160 }, { "epoch": 1.84, "grad_norm": 0.5569795370101929, "learning_rate": 0.00019683922577009503, "loss": 1.71, "step": 55161 }, { "epoch": 1.84, "grad_norm": 0.5788999795913696, "learning_rate": 0.0001968294111119918, "loss": 1.6866, "step": 55162 }, { "epoch": 1.84, "grad_norm": 0.5850128531455994, "learning_rate": 0.0001968195965791225, "loss": 1.7167, "step": 55163 }, { "epoch": 1.84, "grad_norm": 0.5897790193557739, "learning_rate": 0.00019680978217149906, "loss": 1.7245, "step": 55164 }, { "epoch": 1.84, "grad_norm": 0.575184166431427, "learning_rate": 0.0001967999678891332, "loss": 1.7577, "step": 55165 }, { "epoch": 1.84, "grad_norm": 0.5691772699356079, "learning_rate": 0.00019679015373203702, "loss": 1.703, "step": 55166 }, { "epoch": 1.84, "grad_norm": 0.5944578647613525, "learning_rate": 0.00019678033970022248, "loss": 1.8168, "step": 55167 }, { "epoch": 1.84, "grad_norm": 0.5788830518722534, "learning_rate": 0.00019677052579370133, "loss": 1.8456, "step": 55168 }, { "epoch": 1.84, "grad_norm": 0.5928394794464111, "learning_rate": 0.00019676071201248554, "loss": 1.6766, "step": 55169 }, { "epoch": 1.84, "grad_norm": 0.5725478529930115, "learning_rate": 0.00019675089835658708, "loss": 1.7205, "step": 55170 }, { "epoch": 1.84, "grad_norm": 0.569128692150116, "learning_rate": 0.00019674108482601782, "loss": 1.7945, "step": 55171 }, { "epoch": 1.84, "grad_norm": 0.5630422234535217, "learning_rate": 0.00019673127142078955, "loss": 1.7124, "step": 55172 }, { "epoch": 1.84, "grad_norm": 0.5622739791870117, "learning_rate": 0.00019672145814091438, "loss": 1.7014, "step": 55173 }, { "epoch": 1.84, "grad_norm": 0.5897241830825806, "learning_rate": 0.0001967116449864042, "loss": 1.7674, "step": 55174 }, { "epoch": 1.84, "grad_norm": 0.5604220032691956, "learning_rate": 0.00019670183195727082, "loss": 1.6173, "step": 55175 }, { "epoch": 1.84, "grad_norm": 0.5773169994354248, "learning_rate": 0.00019669201905352617, "loss": 1.7954, "step": 55176 }, { "epoch": 1.84, "grad_norm": 0.5677125453948975, "learning_rate": 0.00019668220627518226, "loss": 1.7742, "step": 55177 }, { "epoch": 1.84, "grad_norm": 0.5652250647544861, "learning_rate": 0.00019667239362225085, "loss": 1.7067, "step": 55178 }, { "epoch": 1.84, "grad_norm": 0.5675665140151978, "learning_rate": 0.00019666258109474392, "loss": 1.6823, "step": 55179 }, { "epoch": 1.84, "grad_norm": 0.5751585960388184, "learning_rate": 0.0001966527686926735, "loss": 1.7833, "step": 55180 }, { "epoch": 1.84, "grad_norm": 0.5786107182502747, "learning_rate": 0.00019664295641605127, "loss": 1.7076, "step": 55181 }, { "epoch": 1.84, "grad_norm": 0.5631251335144043, "learning_rate": 0.0001966331442648893, "loss": 1.689, "step": 55182 }, { "epoch": 1.84, "grad_norm": 0.5847803354263306, "learning_rate": 0.0001966233322391995, "loss": 1.7147, "step": 55183 }, { "epoch": 1.84, "grad_norm": 0.5613805651664734, "learning_rate": 0.00019661352033899373, "loss": 1.7447, "step": 55184 }, { "epoch": 1.84, "grad_norm": 0.5781214237213135, "learning_rate": 0.00019660370856428388, "loss": 1.6317, "step": 55185 }, { "epoch": 1.84, "grad_norm": 0.5506272912025452, "learning_rate": 0.00019659389691508184, "loss": 1.7337, "step": 55186 }, { "epoch": 1.84, "grad_norm": 0.5787830352783203, "learning_rate": 0.00019658408539139976, "loss": 1.7535, "step": 55187 }, { "epoch": 1.84, "grad_norm": 0.5568540692329407, "learning_rate": 0.00019657427399324916, "loss": 1.6878, "step": 55188 }, { "epoch": 1.84, "grad_norm": 0.592334508895874, "learning_rate": 0.00019656446272064225, "loss": 1.7585, "step": 55189 }, { "epoch": 1.84, "grad_norm": 0.5820789933204651, "learning_rate": 0.0001965546515735909, "loss": 1.7613, "step": 55190 }, { "epoch": 1.84, "grad_norm": 0.9174913763999939, "learning_rate": 0.00019654484055210686, "loss": 1.7473, "step": 55191 }, { "epoch": 1.84, "grad_norm": 0.5854901671409607, "learning_rate": 0.00019653502965620218, "loss": 1.7613, "step": 55192 }, { "epoch": 1.84, "grad_norm": 0.5885050892829895, "learning_rate": 0.00019652521888588863, "loss": 1.6427, "step": 55193 }, { "epoch": 1.84, "grad_norm": 0.589535117149353, "learning_rate": 0.0001965154082411784, "loss": 1.6869, "step": 55194 }, { "epoch": 1.84, "grad_norm": 0.5786266922950745, "learning_rate": 0.0001965055977220831, "loss": 1.6587, "step": 55195 }, { "epoch": 1.84, "grad_norm": 1.0338042974472046, "learning_rate": 0.00019649578732861477, "loss": 1.6986, "step": 55196 }, { "epoch": 1.84, "grad_norm": 0.5699564218521118, "learning_rate": 0.00019648597706078537, "loss": 1.745, "step": 55197 }, { "epoch": 1.84, "grad_norm": 0.5775198936462402, "learning_rate": 0.00019647616691860663, "loss": 1.695, "step": 55198 }, { "epoch": 1.84, "grad_norm": 0.6150598526000977, "learning_rate": 0.00019646635690209066, "loss": 1.7792, "step": 55199 }, { "epoch": 1.84, "grad_norm": 0.5860666036605835, "learning_rate": 0.00019645654701124928, "loss": 1.7937, "step": 55200 }, { "epoch": 1.84, "grad_norm": 0.5631522536277771, "learning_rate": 0.00019644673724609436, "loss": 1.7778, "step": 55201 }, { "epoch": 1.84, "grad_norm": 0.5781003832817078, "learning_rate": 0.00019643692760663774, "loss": 1.7697, "step": 55202 }, { "epoch": 1.84, "grad_norm": 0.5812495946884155, "learning_rate": 0.00019642711809289155, "loss": 1.7799, "step": 55203 }, { "epoch": 1.84, "grad_norm": 0.570331871509552, "learning_rate": 0.0001964173087048676, "loss": 1.738, "step": 55204 }, { "epoch": 1.84, "grad_norm": 0.5628408193588257, "learning_rate": 0.00019640749944257774, "loss": 1.7439, "step": 55205 }, { "epoch": 1.84, "grad_norm": 0.5681331753730774, "learning_rate": 0.00019639769030603392, "loss": 1.7648, "step": 55206 }, { "epoch": 1.84, "grad_norm": 0.5598729848861694, "learning_rate": 0.00019638788129524803, "loss": 1.7712, "step": 55207 }, { "epoch": 1.84, "grad_norm": 0.5702282190322876, "learning_rate": 0.000196378072410232, "loss": 1.8095, "step": 55208 }, { "epoch": 1.84, "grad_norm": 0.5849233269691467, "learning_rate": 0.00019636826365099761, "loss": 1.81, "step": 55209 }, { "epoch": 1.84, "grad_norm": 0.5760454535484314, "learning_rate": 0.0001963584550175571, "loss": 1.6764, "step": 55210 }, { "epoch": 1.84, "grad_norm": 0.563660740852356, "learning_rate": 0.00019634864650992196, "loss": 1.6944, "step": 55211 }, { "epoch": 1.84, "grad_norm": 0.5713735818862915, "learning_rate": 0.00019633883812810436, "loss": 1.6946, "step": 55212 }, { "epoch": 1.84, "grad_norm": 0.5800513625144958, "learning_rate": 0.00019632902987211612, "loss": 1.7325, "step": 55213 }, { "epoch": 1.84, "grad_norm": 0.5865816473960876, "learning_rate": 0.00019631922174196922, "loss": 1.7299, "step": 55214 }, { "epoch": 1.84, "grad_norm": 0.5913509130477905, "learning_rate": 0.00019630941373767545, "loss": 1.7362, "step": 55215 }, { "epoch": 1.84, "grad_norm": 0.5982137322425842, "learning_rate": 0.0001962996058592467, "loss": 1.7681, "step": 55216 }, { "epoch": 1.84, "grad_norm": 0.5790061354637146, "learning_rate": 0.0001962897981066952, "loss": 1.7618, "step": 55217 }, { "epoch": 1.84, "grad_norm": 0.5796959400177002, "learning_rate": 0.00019627999048003232, "loss": 1.6411, "step": 55218 }, { "epoch": 1.84, "grad_norm": 0.5996963977813721, "learning_rate": 0.00019627018297927042, "loss": 1.8253, "step": 55219 }, { "epoch": 1.84, "grad_norm": 0.5901894569396973, "learning_rate": 0.00019626037560442115, "loss": 1.6914, "step": 55220 }, { "epoch": 1.84, "grad_norm": 0.5794878602027893, "learning_rate": 0.0001962505683554966, "loss": 1.7495, "step": 55221 }, { "epoch": 1.84, "grad_norm": 0.6041010022163391, "learning_rate": 0.00019624076123250848, "loss": 1.7301, "step": 55222 }, { "epoch": 1.84, "grad_norm": 0.5708182454109192, "learning_rate": 0.00019623095423546873, "loss": 1.7889, "step": 55223 }, { "epoch": 1.84, "grad_norm": 0.5702435374259949, "learning_rate": 0.00019622114736438952, "loss": 1.6249, "step": 55224 }, { "epoch": 1.84, "grad_norm": 0.5665565729141235, "learning_rate": 0.00019621134061928235, "loss": 1.7161, "step": 55225 }, { "epoch": 1.84, "grad_norm": 0.5736704468727112, "learning_rate": 0.0001962015340001594, "loss": 1.7004, "step": 55226 }, { "epoch": 1.84, "grad_norm": 0.5853767395019531, "learning_rate": 0.00019619172750703253, "loss": 1.7353, "step": 55227 }, { "epoch": 1.84, "grad_norm": 0.5745478272438049, "learning_rate": 0.00019618192113991358, "loss": 1.7455, "step": 55228 }, { "epoch": 1.84, "grad_norm": 0.5666413903236389, "learning_rate": 0.00019617211489881444, "loss": 1.6801, "step": 55229 }, { "epoch": 1.84, "grad_norm": 0.5629681944847107, "learning_rate": 0.00019616230878374702, "loss": 1.7606, "step": 55230 }, { "epoch": 1.84, "grad_norm": 0.5827373266220093, "learning_rate": 0.00019615250279472342, "loss": 1.7512, "step": 55231 }, { "epoch": 1.84, "grad_norm": 0.5621340274810791, "learning_rate": 0.0001961426969317552, "loss": 1.7473, "step": 55232 }, { "epoch": 1.84, "grad_norm": 0.5551856756210327, "learning_rate": 0.0001961328911948545, "loss": 1.6956, "step": 55233 }, { "epoch": 1.84, "grad_norm": 0.5586642622947693, "learning_rate": 0.00019612308558403325, "loss": 1.7551, "step": 55234 }, { "epoch": 1.84, "grad_norm": 0.5709241032600403, "learning_rate": 0.0001961132800993032, "loss": 1.7653, "step": 55235 }, { "epoch": 1.84, "grad_norm": 0.582130491733551, "learning_rate": 0.00019610347474067628, "loss": 1.7406, "step": 55236 }, { "epoch": 1.84, "grad_norm": 0.5880312919616699, "learning_rate": 0.00019609366950816456, "loss": 1.7401, "step": 55237 }, { "epoch": 1.84, "grad_norm": 0.5738301277160645, "learning_rate": 0.00019608386440177975, "loss": 1.6031, "step": 55238 }, { "epoch": 1.84, "grad_norm": 0.5631967782974243, "learning_rate": 0.00019607405942153369, "loss": 1.6607, "step": 55239 }, { "epoch": 1.84, "grad_norm": 0.5614783763885498, "learning_rate": 0.00019606425456743853, "loss": 1.7193, "step": 55240 }, { "epoch": 1.84, "grad_norm": 0.5890868902206421, "learning_rate": 0.00019605444983950612, "loss": 1.8131, "step": 55241 }, { "epoch": 1.84, "grad_norm": 0.5756587386131287, "learning_rate": 0.00019604464523774817, "loss": 1.7809, "step": 55242 }, { "epoch": 1.84, "grad_norm": 0.5674015879631042, "learning_rate": 0.00019603484076217674, "loss": 1.7359, "step": 55243 }, { "epoch": 1.84, "grad_norm": 0.5768505930900574, "learning_rate": 0.00019602503641280377, "loss": 1.7116, "step": 55244 }, { "epoch": 1.84, "grad_norm": 0.5787978172302246, "learning_rate": 0.00019601523218964098, "loss": 1.7797, "step": 55245 }, { "epoch": 1.84, "grad_norm": 0.571326732635498, "learning_rate": 0.00019600542809270036, "loss": 1.7287, "step": 55246 }, { "epoch": 1.84, "grad_norm": 0.5477153062820435, "learning_rate": 0.000195995624121994, "loss": 1.7331, "step": 55247 }, { "epoch": 1.84, "grad_norm": 0.5919092893600464, "learning_rate": 0.00019598582027753345, "loss": 1.7364, "step": 55248 }, { "epoch": 1.84, "grad_norm": 0.58888179063797, "learning_rate": 0.00019597601655933083, "loss": 1.7439, "step": 55249 }, { "epoch": 1.84, "grad_norm": 0.5784012675285339, "learning_rate": 0.000195966212967398, "loss": 1.7564, "step": 55250 }, { "epoch": 1.84, "grad_norm": 0.5572781562805176, "learning_rate": 0.00019595640950174694, "loss": 1.7051, "step": 55251 }, { "epoch": 1.84, "grad_norm": 0.5895259976387024, "learning_rate": 0.00019594660616238936, "loss": 1.7382, "step": 55252 }, { "epoch": 1.84, "grad_norm": 0.5709642171859741, "learning_rate": 0.00019593680294933727, "loss": 1.7618, "step": 55253 }, { "epoch": 1.84, "grad_norm": 0.5823925137519836, "learning_rate": 0.00019592699986260273, "loss": 1.6858, "step": 55254 }, { "epoch": 1.84, "grad_norm": 0.5817038416862488, "learning_rate": 0.00019591719690219728, "loss": 1.7598, "step": 55255 }, { "epoch": 1.84, "grad_norm": 0.5738663673400879, "learning_rate": 0.0001959073940681331, "loss": 1.7502, "step": 55256 }, { "epoch": 1.84, "grad_norm": 0.5706446170806885, "learning_rate": 0.00019589759136042205, "loss": 1.7067, "step": 55257 }, { "epoch": 1.84, "grad_norm": 0.5856943130493164, "learning_rate": 0.00019588778877907594, "loss": 1.6728, "step": 55258 }, { "epoch": 1.84, "grad_norm": 0.5635273456573486, "learning_rate": 0.0001958779863241067, "loss": 1.7038, "step": 55259 }, { "epoch": 1.84, "grad_norm": 0.5609838962554932, "learning_rate": 0.0001958681839955262, "loss": 1.6587, "step": 55260 }, { "epoch": 1.84, "grad_norm": 0.5813256502151489, "learning_rate": 0.0001958583817933465, "loss": 1.725, "step": 55261 }, { "epoch": 1.84, "grad_norm": 0.5578122138977051, "learning_rate": 0.00019584857971757927, "loss": 1.7338, "step": 55262 }, { "epoch": 1.84, "grad_norm": 0.5677599310874939, "learning_rate": 0.00019583877776823658, "loss": 1.7067, "step": 55263 }, { "epoch": 1.84, "grad_norm": 0.5657953023910522, "learning_rate": 0.00019582897594533027, "loss": 1.6889, "step": 55264 }, { "epoch": 1.84, "grad_norm": 0.5825560092926025, "learning_rate": 0.0001958191742488722, "loss": 1.774, "step": 55265 }, { "epoch": 1.84, "grad_norm": 0.5561230778694153, "learning_rate": 0.00019580937267887433, "loss": 1.7738, "step": 55266 }, { "epoch": 1.84, "grad_norm": 0.5788899660110474, "learning_rate": 0.00019579957123534855, "loss": 1.78, "step": 55267 }, { "epoch": 1.84, "grad_norm": 0.5988836884498596, "learning_rate": 0.00019578976991830663, "loss": 1.6952, "step": 55268 }, { "epoch": 1.84, "grad_norm": 0.5778411626815796, "learning_rate": 0.00019577996872776067, "loss": 1.7124, "step": 55269 }, { "epoch": 1.84, "grad_norm": 0.5793667435646057, "learning_rate": 0.00019577016766372246, "loss": 1.7698, "step": 55270 }, { "epoch": 1.84, "grad_norm": 0.5793725252151489, "learning_rate": 0.00019576036672620394, "loss": 1.6853, "step": 55271 }, { "epoch": 1.84, "grad_norm": 0.5948573350906372, "learning_rate": 0.00019575056591521688, "loss": 1.7477, "step": 55272 }, { "epoch": 1.84, "grad_norm": 0.5842556953430176, "learning_rate": 0.00019574076523077335, "loss": 1.6888, "step": 55273 }, { "epoch": 1.84, "grad_norm": 0.5769044756889343, "learning_rate": 0.0001957309646728852, "loss": 1.7264, "step": 55274 }, { "epoch": 1.84, "grad_norm": 0.5728225111961365, "learning_rate": 0.00019572116424156416, "loss": 1.7677, "step": 55275 }, { "epoch": 1.84, "grad_norm": 0.5732144117355347, "learning_rate": 0.00019571136393682235, "loss": 1.8034, "step": 55276 }, { "epoch": 1.84, "grad_norm": 0.5880789756774902, "learning_rate": 0.00019570156375867164, "loss": 1.7191, "step": 55277 }, { "epoch": 1.84, "grad_norm": 0.6113569736480713, "learning_rate": 0.00019569176370712375, "loss": 1.809, "step": 55278 }, { "epoch": 1.84, "grad_norm": 0.5634730458259583, "learning_rate": 0.00019568196378219073, "loss": 1.6899, "step": 55279 }, { "epoch": 1.84, "grad_norm": 0.570921003818512, "learning_rate": 0.00019567216398388442, "loss": 1.742, "step": 55280 }, { "epoch": 1.84, "grad_norm": 0.5843457579612732, "learning_rate": 0.00019566236431221683, "loss": 1.8484, "step": 55281 }, { "epoch": 1.84, "grad_norm": 0.6075517535209656, "learning_rate": 0.00019565256476719963, "loss": 1.7824, "step": 55282 }, { "epoch": 1.84, "grad_norm": 0.5972744226455688, "learning_rate": 0.00019564276534884487, "loss": 1.8057, "step": 55283 }, { "epoch": 1.84, "grad_norm": 0.6009905934333801, "learning_rate": 0.0001956329660571645, "loss": 1.7029, "step": 55284 }, { "epoch": 1.84, "grad_norm": 0.6000043153762817, "learning_rate": 0.00019562316689217027, "loss": 1.7862, "step": 55285 }, { "epoch": 1.84, "grad_norm": 0.5650899410247803, "learning_rate": 0.0001956133678538741, "loss": 1.6329, "step": 55286 }, { "epoch": 1.84, "grad_norm": 0.568423330783844, "learning_rate": 0.00019560356894228798, "loss": 1.6904, "step": 55287 }, { "epoch": 1.84, "grad_norm": 0.5832269191741943, "learning_rate": 0.00019559377015742374, "loss": 1.8143, "step": 55288 }, { "epoch": 1.84, "grad_norm": 0.5849069952964783, "learning_rate": 0.0001955839714992932, "loss": 1.7492, "step": 55289 }, { "epoch": 1.84, "grad_norm": 1.641376256942749, "learning_rate": 0.00019557417296790837, "loss": 1.6902, "step": 55290 }, { "epoch": 1.84, "grad_norm": 0.566609799861908, "learning_rate": 0.00019556437456328118, "loss": 1.6684, "step": 55291 }, { "epoch": 1.84, "grad_norm": 0.5696960091590881, "learning_rate": 0.0001955545762854234, "loss": 1.7378, "step": 55292 }, { "epoch": 1.84, "grad_norm": 0.5711424946784973, "learning_rate": 0.000195544778134347, "loss": 1.7273, "step": 55293 }, { "epoch": 1.84, "grad_norm": 0.5723592638969421, "learning_rate": 0.00019553498011006383, "loss": 1.7497, "step": 55294 }, { "epoch": 1.84, "grad_norm": 0.5790995955467224, "learning_rate": 0.0001955251822125858, "loss": 1.7852, "step": 55295 }, { "epoch": 1.84, "grad_norm": 0.5895918011665344, "learning_rate": 0.00019551538444192475, "loss": 1.7587, "step": 55296 }, { "epoch": 1.84, "grad_norm": 0.5633918046951294, "learning_rate": 0.0001955055867980928, "loss": 1.753, "step": 55297 }, { "epoch": 1.84, "grad_norm": 0.5702812671661377, "learning_rate": 0.00019549578928110148, "loss": 1.7812, "step": 55298 }, { "epoch": 1.84, "grad_norm": 0.6256771683692932, "learning_rate": 0.00019548599189096294, "loss": 1.7564, "step": 55299 }, { "epoch": 1.84, "grad_norm": 0.5639370679855347, "learning_rate": 0.00019547619462768898, "loss": 1.7279, "step": 55300 }, { "epoch": 1.84, "grad_norm": 0.557273805141449, "learning_rate": 0.00019546639749129163, "loss": 1.7511, "step": 55301 }, { "epoch": 1.84, "grad_norm": 0.590808629989624, "learning_rate": 0.00019545660048178256, "loss": 1.7012, "step": 55302 }, { "epoch": 1.84, "grad_norm": 0.5913090705871582, "learning_rate": 0.00019544680359917375, "loss": 1.7937, "step": 55303 }, { "epoch": 1.84, "grad_norm": 0.584995448589325, "learning_rate": 0.00019543700684347727, "loss": 1.6572, "step": 55304 }, { "epoch": 1.84, "grad_norm": 0.5872642993927002, "learning_rate": 0.0001954272102147047, "loss": 1.8029, "step": 55305 }, { "epoch": 1.84, "grad_norm": 0.5711658596992493, "learning_rate": 0.0001954174137128681, "loss": 1.7529, "step": 55306 }, { "epoch": 1.84, "grad_norm": 0.5835986733436584, "learning_rate": 0.0001954076173379795, "loss": 1.7169, "step": 55307 }, { "epoch": 1.84, "grad_norm": 0.588388979434967, "learning_rate": 0.0001953978210900505, "loss": 1.72, "step": 55308 }, { "epoch": 1.84, "grad_norm": 0.5718794465065002, "learning_rate": 0.0001953880249690932, "loss": 1.7763, "step": 55309 }, { "epoch": 1.84, "grad_norm": 0.5530903935432434, "learning_rate": 0.0001953782289751193, "loss": 1.6521, "step": 55310 }, { "epoch": 1.84, "grad_norm": 0.5669921636581421, "learning_rate": 0.00019536843310814106, "loss": 1.7744, "step": 55311 }, { "epoch": 1.84, "grad_norm": 0.5687416791915894, "learning_rate": 0.00019535863736816988, "loss": 1.7187, "step": 55312 }, { "epoch": 1.84, "grad_norm": 0.5752577185630798, "learning_rate": 0.00019534884175521797, "loss": 1.7517, "step": 55313 }, { "epoch": 1.84, "grad_norm": 0.6067082285881042, "learning_rate": 0.00019533904626929723, "loss": 1.748, "step": 55314 }, { "epoch": 1.84, "grad_norm": 0.5761491656303406, "learning_rate": 0.00019532925091041942, "loss": 1.7356, "step": 55315 }, { "epoch": 1.84, "grad_norm": 0.5693134069442749, "learning_rate": 0.00019531945567859645, "loss": 1.6765, "step": 55316 }, { "epoch": 1.84, "grad_norm": 0.5594109892845154, "learning_rate": 0.00019530966057384027, "loss": 1.7207, "step": 55317 }, { "epoch": 1.84, "grad_norm": 0.598799467086792, "learning_rate": 0.00019529986559616274, "loss": 1.6914, "step": 55318 }, { "epoch": 1.84, "grad_norm": 0.5925778150558472, "learning_rate": 0.00019529007074557567, "loss": 1.6925, "step": 55319 }, { "epoch": 1.84, "grad_norm": 0.5639840364456177, "learning_rate": 0.00019528027602209105, "loss": 1.7846, "step": 55320 }, { "epoch": 1.84, "grad_norm": 0.6066001653671265, "learning_rate": 0.00019527048142572081, "loss": 1.7682, "step": 55321 }, { "epoch": 1.84, "grad_norm": 0.5773820281028748, "learning_rate": 0.00019526068695647673, "loss": 1.7914, "step": 55322 }, { "epoch": 1.84, "grad_norm": 0.5652663111686707, "learning_rate": 0.00019525089261437074, "loss": 1.6878, "step": 55323 }, { "epoch": 1.84, "grad_norm": 0.627439558506012, "learning_rate": 0.0001952410983994148, "loss": 1.763, "step": 55324 }, { "epoch": 1.84, "grad_norm": 0.5854045748710632, "learning_rate": 0.00019523130431162064, "loss": 1.7594, "step": 55325 }, { "epoch": 1.84, "grad_norm": 0.5619756579399109, "learning_rate": 0.00019522151035100024, "loss": 1.7825, "step": 55326 }, { "epoch": 1.84, "grad_norm": 0.5834640860557556, "learning_rate": 0.00019521171651756562, "loss": 1.7295, "step": 55327 }, { "epoch": 1.84, "grad_norm": 0.5655062198638916, "learning_rate": 0.00019520192281132837, "loss": 1.7841, "step": 55328 }, { "epoch": 1.84, "grad_norm": 0.5719026923179626, "learning_rate": 0.0001951921292323006, "loss": 1.7328, "step": 55329 }, { "epoch": 1.84, "grad_norm": 0.5688188672065735, "learning_rate": 0.00019518233578049416, "loss": 1.6739, "step": 55330 }, { "epoch": 1.84, "grad_norm": 0.5804973244667053, "learning_rate": 0.00019517254245592097, "loss": 1.8375, "step": 55331 }, { "epoch": 1.84, "grad_norm": 0.5866074562072754, "learning_rate": 0.00019516274925859278, "loss": 1.7372, "step": 55332 }, { "epoch": 1.84, "grad_norm": 0.5822543501853943, "learning_rate": 0.00019515295618852154, "loss": 1.6487, "step": 55333 }, { "epoch": 1.84, "grad_norm": 0.5797688961029053, "learning_rate": 0.00019514316324571935, "loss": 1.6893, "step": 55334 }, { "epoch": 1.84, "grad_norm": 0.575226902961731, "learning_rate": 0.00019513337043019767, "loss": 1.6937, "step": 55335 }, { "epoch": 1.84, "grad_norm": 0.5668213963508606, "learning_rate": 0.00019512357774196875, "loss": 1.7414, "step": 55336 }, { "epoch": 1.84, "grad_norm": 0.5654938817024231, "learning_rate": 0.0001951137851810444, "loss": 1.7765, "step": 55337 }, { "epoch": 1.84, "grad_norm": 0.5861535668373108, "learning_rate": 0.00019510399274743639, "loss": 1.7468, "step": 55338 }, { "epoch": 1.84, "grad_norm": 0.561763346195221, "learning_rate": 0.0001950942004411567, "loss": 1.7103, "step": 55339 }, { "epoch": 1.84, "grad_norm": 0.6048444509506226, "learning_rate": 0.00019508440826221707, "loss": 1.8055, "step": 55340 }, { "epoch": 1.84, "grad_norm": 0.5816453695297241, "learning_rate": 0.00019507461621062977, "loss": 1.6958, "step": 55341 }, { "epoch": 1.84, "grad_norm": 0.581446647644043, "learning_rate": 0.0001950648242864062, "loss": 1.7525, "step": 55342 }, { "epoch": 1.84, "grad_norm": 0.5921391248703003, "learning_rate": 0.00019505503248955856, "loss": 1.7287, "step": 55343 }, { "epoch": 1.84, "grad_norm": 0.5966537594795227, "learning_rate": 0.00019504524082009867, "loss": 1.7458, "step": 55344 }, { "epoch": 1.84, "grad_norm": 0.5928130745887756, "learning_rate": 0.00019503544927803836, "loss": 1.8586, "step": 55345 }, { "epoch": 1.84, "grad_norm": 0.5917264819145203, "learning_rate": 0.00019502565786338956, "loss": 1.724, "step": 55346 }, { "epoch": 1.84, "grad_norm": 0.612001895904541, "learning_rate": 0.00019501586657616406, "loss": 1.6905, "step": 55347 }, { "epoch": 1.84, "grad_norm": 0.6050892472267151, "learning_rate": 0.000195006075416374, "loss": 1.7724, "step": 55348 }, { "epoch": 1.84, "grad_norm": 0.5866277813911438, "learning_rate": 0.00019499628438403093, "loss": 1.7843, "step": 55349 }, { "epoch": 1.84, "grad_norm": 0.5920944809913635, "learning_rate": 0.00019498649347914694, "loss": 1.7071, "step": 55350 }, { "epoch": 1.84, "grad_norm": 0.5782763361930847, "learning_rate": 0.00019497670270173396, "loss": 1.7243, "step": 55351 }, { "epoch": 1.84, "grad_norm": 0.5908235907554626, "learning_rate": 0.00019496691205180374, "loss": 1.7312, "step": 55352 }, { "epoch": 1.84, "grad_norm": 0.5800187587738037, "learning_rate": 0.00019495712152936816, "loss": 1.6866, "step": 55353 }, { "epoch": 1.84, "grad_norm": 0.5619417428970337, "learning_rate": 0.0001949473311344393, "loss": 1.6925, "step": 55354 }, { "epoch": 1.84, "grad_norm": 0.5916250348091125, "learning_rate": 0.00019493754086702877, "loss": 1.8054, "step": 55355 }, { "epoch": 1.84, "grad_norm": 0.5904877781867981, "learning_rate": 0.00019492775072714854, "loss": 1.7084, "step": 55356 }, { "epoch": 1.84, "grad_norm": 0.5915987491607666, "learning_rate": 0.0001949179607148106, "loss": 1.7607, "step": 55357 }, { "epoch": 1.84, "grad_norm": 0.5685688257217407, "learning_rate": 0.00019490817083002684, "loss": 1.7429, "step": 55358 }, { "epoch": 1.84, "grad_norm": 0.5666696429252625, "learning_rate": 0.00019489838107280904, "loss": 1.6816, "step": 55359 }, { "epoch": 1.84, "grad_norm": 0.5716186761856079, "learning_rate": 0.00019488859144316907, "loss": 1.7929, "step": 55360 }, { "epoch": 1.84, "grad_norm": 0.5761168599128723, "learning_rate": 0.00019487880194111893, "loss": 1.7721, "step": 55361 }, { "epoch": 1.84, "grad_norm": 0.600842297077179, "learning_rate": 0.0001948690125666704, "loss": 1.7603, "step": 55362 }, { "epoch": 1.84, "grad_norm": 0.5985011458396912, "learning_rate": 0.00019485922331983533, "loss": 1.7741, "step": 55363 }, { "epoch": 1.84, "grad_norm": 0.6017090678215027, "learning_rate": 0.0001948494342006259, "loss": 1.7497, "step": 55364 }, { "epoch": 1.84, "grad_norm": 0.5588511228561401, "learning_rate": 0.00019483964520905348, "loss": 1.746, "step": 55365 }, { "epoch": 1.84, "grad_norm": 0.6407998204231262, "learning_rate": 0.00019482985634513037, "loss": 1.8121, "step": 55366 }, { "epoch": 1.84, "grad_norm": 0.5761517882347107, "learning_rate": 0.00019482006760886827, "loss": 1.7213, "step": 55367 }, { "epoch": 1.84, "grad_norm": 0.5594378113746643, "learning_rate": 0.00019481027900027925, "loss": 1.7632, "step": 55368 }, { "epoch": 1.84, "grad_norm": 0.5704658031463623, "learning_rate": 0.00019480049051937492, "loss": 1.7573, "step": 55369 }, { "epoch": 1.84, "grad_norm": 0.572881817817688, "learning_rate": 0.00019479070216616725, "loss": 1.7176, "step": 55370 }, { "epoch": 1.84, "grad_norm": 0.5819440484046936, "learning_rate": 0.0001947809139406684, "loss": 1.7805, "step": 55371 }, { "epoch": 1.84, "grad_norm": 0.6136332750320435, "learning_rate": 0.00019477112584288976, "loss": 1.771, "step": 55372 }, { "epoch": 1.84, "grad_norm": 0.5976280570030212, "learning_rate": 0.00019476133787284356, "loss": 1.7941, "step": 55373 }, { "epoch": 1.84, "grad_norm": 0.5831419825553894, "learning_rate": 0.00019475155003054163, "loss": 1.7447, "step": 55374 }, { "epoch": 1.84, "grad_norm": 0.5684069991111755, "learning_rate": 0.0001947417623159958, "loss": 1.7465, "step": 55375 }, { "epoch": 1.84, "grad_norm": 0.572333812713623, "learning_rate": 0.00019473197472921793, "loss": 1.7723, "step": 55376 }, { "epoch": 1.84, "grad_norm": 0.5661160349845886, "learning_rate": 0.00019472218727021988, "loss": 1.7511, "step": 55377 }, { "epoch": 1.84, "grad_norm": 0.5875180959701538, "learning_rate": 0.0001947123999390137, "loss": 1.7169, "step": 55378 }, { "epoch": 1.84, "grad_norm": 0.5973072052001953, "learning_rate": 0.00019470261273561105, "loss": 1.6831, "step": 55379 }, { "epoch": 1.84, "grad_norm": 0.5566776990890503, "learning_rate": 0.00019469282566002397, "loss": 1.7925, "step": 55380 }, { "epoch": 1.84, "grad_norm": 0.5774474143981934, "learning_rate": 0.00019468303871226432, "loss": 1.7503, "step": 55381 }, { "epoch": 1.84, "grad_norm": 0.58945631980896, "learning_rate": 0.00019467325189234387, "loss": 1.784, "step": 55382 }, { "epoch": 1.84, "grad_norm": 0.5882489085197449, "learning_rate": 0.00019466346520027457, "loss": 1.7229, "step": 55383 }, { "epoch": 1.84, "grad_norm": 0.5826557874679565, "learning_rate": 0.0001946536786360684, "loss": 1.7579, "step": 55384 }, { "epoch": 1.84, "grad_norm": 0.5830718278884888, "learning_rate": 0.00019464389219973704, "loss": 1.7921, "step": 55385 }, { "epoch": 1.84, "grad_norm": 0.5869216918945312, "learning_rate": 0.00019463410589129249, "loss": 1.6161, "step": 55386 }, { "epoch": 1.84, "grad_norm": 0.6177464127540588, "learning_rate": 0.0001946243197107466, "loss": 1.7416, "step": 55387 }, { "epoch": 1.84, "grad_norm": 0.572412371635437, "learning_rate": 0.00019461453365811132, "loss": 1.8048, "step": 55388 }, { "epoch": 1.84, "grad_norm": 0.8709550499916077, "learning_rate": 0.00019460474773339841, "loss": 1.7369, "step": 55389 }, { "epoch": 1.84, "grad_norm": 0.5828526020050049, "learning_rate": 0.00019459496193661985, "loss": 1.6088, "step": 55390 }, { "epoch": 1.84, "grad_norm": 0.5844572186470032, "learning_rate": 0.00019458517626778752, "loss": 1.7206, "step": 55391 }, { "epoch": 1.84, "grad_norm": 0.5815366506576538, "learning_rate": 0.00019457539072691312, "loss": 1.7372, "step": 55392 }, { "epoch": 1.84, "grad_norm": 0.6041094660758972, "learning_rate": 0.00019456560531400872, "loss": 1.8301, "step": 55393 }, { "epoch": 1.84, "grad_norm": 0.5880311727523804, "learning_rate": 0.0001945558200290862, "loss": 1.7205, "step": 55394 }, { "epoch": 1.84, "grad_norm": 0.5579758286476135, "learning_rate": 0.00019454603487215733, "loss": 1.6675, "step": 55395 }, { "epoch": 1.84, "grad_norm": 0.6009455919265747, "learning_rate": 0.00019453624984323403, "loss": 1.7699, "step": 55396 }, { "epoch": 1.84, "grad_norm": 0.582804799079895, "learning_rate": 0.00019452646494232814, "loss": 1.8114, "step": 55397 }, { "epoch": 1.84, "grad_norm": 0.5741652846336365, "learning_rate": 0.0001945166801694517, "loss": 1.7428, "step": 55398 }, { "epoch": 1.84, "grad_norm": 0.5639420747756958, "learning_rate": 0.00019450689552461635, "loss": 1.6705, "step": 55399 }, { "epoch": 1.84, "grad_norm": 0.5880053639411926, "learning_rate": 0.00019449711100783413, "loss": 1.7357, "step": 55400 }, { "epoch": 1.84, "grad_norm": 0.5902950167655945, "learning_rate": 0.00019448732661911692, "loss": 1.7533, "step": 55401 }, { "epoch": 1.84, "grad_norm": 0.5665630102157593, "learning_rate": 0.0001944775423584765, "loss": 1.8098, "step": 55402 }, { "epoch": 1.84, "grad_norm": 0.5794261693954468, "learning_rate": 0.00019446775822592476, "loss": 1.7762, "step": 55403 }, { "epoch": 1.84, "grad_norm": 0.568026602268219, "learning_rate": 0.00019445797422147371, "loss": 1.7238, "step": 55404 }, { "epoch": 1.84, "grad_norm": 0.5698922276496887, "learning_rate": 0.00019444819034513506, "loss": 1.755, "step": 55405 }, { "epoch": 1.84, "grad_norm": 0.5767945051193237, "learning_rate": 0.0001944384065969207, "loss": 1.8185, "step": 55406 }, { "epoch": 1.84, "grad_norm": 0.6060461401939392, "learning_rate": 0.00019442862297684263, "loss": 1.7957, "step": 55407 }, { "epoch": 1.84, "grad_norm": 0.598648190498352, "learning_rate": 0.00019441883948491273, "loss": 1.7657, "step": 55408 }, { "epoch": 1.84, "grad_norm": 0.5763727426528931, "learning_rate": 0.0001944090561211427, "loss": 1.7006, "step": 55409 }, { "epoch": 1.84, "grad_norm": 0.5576131939888, "learning_rate": 0.0001943992728855445, "loss": 1.6908, "step": 55410 }, { "epoch": 1.84, "grad_norm": 0.5750431418418884, "learning_rate": 0.00019438948977813013, "loss": 1.6915, "step": 55411 }, { "epoch": 1.84, "grad_norm": 0.5778355002403259, "learning_rate": 0.00019437970679891129, "loss": 1.7069, "step": 55412 }, { "epoch": 1.84, "grad_norm": 0.5850872993469238, "learning_rate": 0.0001943699239478999, "loss": 1.7059, "step": 55413 }, { "epoch": 1.84, "grad_norm": 0.5776824951171875, "learning_rate": 0.00019436014122510797, "loss": 1.7866, "step": 55414 }, { "epoch": 1.84, "grad_norm": 0.5873444676399231, "learning_rate": 0.0001943503586305471, "loss": 1.7269, "step": 55415 }, { "epoch": 1.84, "grad_norm": 0.6035696268081665, "learning_rate": 0.00019434057616422938, "loss": 1.7706, "step": 55416 }, { "epoch": 1.84, "grad_norm": 0.5818225145339966, "learning_rate": 0.00019433079382616666, "loss": 1.7653, "step": 55417 }, { "epoch": 1.84, "grad_norm": 0.5960595607757568, "learning_rate": 0.00019432101161637083, "loss": 1.7825, "step": 55418 }, { "epoch": 1.84, "grad_norm": 0.6016345024108887, "learning_rate": 0.0001943112295348537, "loss": 1.6994, "step": 55419 }, { "epoch": 1.84, "grad_norm": 0.5748039484024048, "learning_rate": 0.00019430144758162706, "loss": 1.6973, "step": 55420 }, { "epoch": 1.84, "grad_norm": 0.5834479331970215, "learning_rate": 0.00019429166575670308, "loss": 1.7495, "step": 55421 }, { "epoch": 1.84, "grad_norm": 0.6010476350784302, "learning_rate": 0.00019428188406009323, "loss": 1.7484, "step": 55422 }, { "epoch": 1.84, "grad_norm": 0.5952821969985962, "learning_rate": 0.00019427210249180972, "loss": 1.7355, "step": 55423 }, { "epoch": 1.84, "grad_norm": 0.5766929388046265, "learning_rate": 0.00019426232105186435, "loss": 1.7103, "step": 55424 }, { "epoch": 1.84, "grad_norm": 0.5699343681335449, "learning_rate": 0.00019425253974026887, "loss": 1.7086, "step": 55425 }, { "epoch": 1.84, "grad_norm": 0.5813679099082947, "learning_rate": 0.00019424275855703518, "loss": 1.7694, "step": 55426 }, { "epoch": 1.84, "grad_norm": 0.5961466431617737, "learning_rate": 0.0001942329775021752, "loss": 1.7451, "step": 55427 }, { "epoch": 1.84, "grad_norm": 0.6101808547973633, "learning_rate": 0.00019422319657570093, "loss": 1.7104, "step": 55428 }, { "epoch": 1.84, "grad_norm": 0.5729995369911194, "learning_rate": 0.000194213415777624, "loss": 1.7925, "step": 55429 }, { "epoch": 1.84, "grad_norm": 0.562262237071991, "learning_rate": 0.0001942036351079564, "loss": 1.7377, "step": 55430 }, { "epoch": 1.84, "grad_norm": 0.5912166237831116, "learning_rate": 0.00019419385456671007, "loss": 1.7517, "step": 55431 }, { "epoch": 1.84, "grad_norm": 0.5687295794487, "learning_rate": 0.00019418407415389675, "loss": 1.7818, "step": 55432 }, { "epoch": 1.84, "grad_norm": 0.5998072028160095, "learning_rate": 0.00019417429386952838, "loss": 1.7431, "step": 55433 }, { "epoch": 1.84, "grad_norm": 0.5705221891403198, "learning_rate": 0.0001941645137136169, "loss": 1.7173, "step": 55434 }, { "epoch": 1.84, "grad_norm": 0.5760173201560974, "learning_rate": 0.00019415473368617403, "loss": 1.7446, "step": 55435 }, { "epoch": 1.84, "grad_norm": 0.5724644064903259, "learning_rate": 0.0001941449537872116, "loss": 1.6932, "step": 55436 }, { "epoch": 1.84, "grad_norm": 0.5680205821990967, "learning_rate": 0.00019413517401674176, "loss": 1.7913, "step": 55437 }, { "epoch": 1.84, "grad_norm": 0.5871708393096924, "learning_rate": 0.00019412539437477626, "loss": 1.7573, "step": 55438 }, { "epoch": 1.84, "grad_norm": 0.5958691835403442, "learning_rate": 0.0001941156148613268, "loss": 1.7404, "step": 55439 }, { "epoch": 1.84, "grad_norm": 0.5715280175209045, "learning_rate": 0.00019410583547640548, "loss": 1.8045, "step": 55440 }, { "epoch": 1.84, "grad_norm": 0.5926515460014343, "learning_rate": 0.00019409605622002407, "loss": 1.7707, "step": 55441 }, { "epoch": 1.84, "grad_norm": 0.5995834469795227, "learning_rate": 0.00019408627709219438, "loss": 1.7499, "step": 55442 }, { "epoch": 1.84, "grad_norm": 0.5712005496025085, "learning_rate": 0.0001940764980929283, "loss": 1.729, "step": 55443 }, { "epoch": 1.84, "grad_norm": 0.582849383354187, "learning_rate": 0.00019406671922223793, "loss": 1.6389, "step": 55444 }, { "epoch": 1.84, "grad_norm": 0.5857598185539246, "learning_rate": 0.00019405694048013478, "loss": 1.7811, "step": 55445 }, { "epoch": 1.84, "grad_norm": 0.5860005617141724, "learning_rate": 0.00019404716186663095, "loss": 1.6748, "step": 55446 }, { "epoch": 1.84, "grad_norm": 0.5610078573226929, "learning_rate": 0.00019403738338173823, "loss": 1.781, "step": 55447 }, { "epoch": 1.84, "grad_norm": 0.5960716009140015, "learning_rate": 0.0001940276050254686, "loss": 1.7146, "step": 55448 }, { "epoch": 1.84, "grad_norm": 0.5863560438156128, "learning_rate": 0.00019401782679783378, "loss": 1.7369, "step": 55449 }, { "epoch": 1.84, "grad_norm": 0.5775050520896912, "learning_rate": 0.00019400804869884562, "loss": 1.7159, "step": 55450 }, { "epoch": 1.84, "grad_norm": 0.563932478427887, "learning_rate": 0.0001939982707285163, "loss": 1.6817, "step": 55451 }, { "epoch": 1.84, "grad_norm": 0.5702658891677856, "learning_rate": 0.00019398849288685725, "loss": 1.7743, "step": 55452 }, { "epoch": 1.84, "grad_norm": 0.5993022322654724, "learning_rate": 0.00019397871517388057, "loss": 1.7016, "step": 55453 }, { "epoch": 1.84, "grad_norm": 0.5514323711395264, "learning_rate": 0.00019396893758959823, "loss": 1.6986, "step": 55454 }, { "epoch": 1.84, "grad_norm": 0.5875970721244812, "learning_rate": 0.0001939591601340219, "loss": 1.6812, "step": 55455 }, { "epoch": 1.85, "grad_norm": 0.5934790968894958, "learning_rate": 0.0001939493828071635, "loss": 1.7277, "step": 55456 }, { "epoch": 1.85, "grad_norm": 0.5875672101974487, "learning_rate": 0.0001939396056090349, "loss": 1.8026, "step": 55457 }, { "epoch": 1.85, "grad_norm": 0.5851253271102905, "learning_rate": 0.00019392982853964815, "loss": 1.7721, "step": 55458 }, { "epoch": 1.85, "grad_norm": 0.5865694284439087, "learning_rate": 0.00019392005159901482, "loss": 1.7128, "step": 55459 }, { "epoch": 1.85, "grad_norm": 0.5994710922241211, "learning_rate": 0.0001939102747871469, "loss": 1.7446, "step": 55460 }, { "epoch": 1.85, "grad_norm": 0.566805899143219, "learning_rate": 0.00019390049810405637, "loss": 1.7286, "step": 55461 }, { "epoch": 1.85, "grad_norm": 0.5794178247451782, "learning_rate": 0.00019389072154975495, "loss": 1.7558, "step": 55462 }, { "epoch": 1.85, "grad_norm": 0.5680704116821289, "learning_rate": 0.00019388094512425458, "loss": 1.7235, "step": 55463 }, { "epoch": 1.85, "grad_norm": 0.587272584438324, "learning_rate": 0.00019387116882756703, "loss": 1.7876, "step": 55464 }, { "epoch": 1.85, "grad_norm": 0.5989198684692383, "learning_rate": 0.0001938613926597044, "loss": 1.7424, "step": 55465 }, { "epoch": 1.85, "grad_norm": 0.588252067565918, "learning_rate": 0.00019385161662067825, "loss": 1.7211, "step": 55466 }, { "epoch": 1.85, "grad_norm": 0.5682669878005981, "learning_rate": 0.00019384184071050064, "loss": 1.7034, "step": 55467 }, { "epoch": 1.85, "grad_norm": 0.597480833530426, "learning_rate": 0.00019383206492918346, "loss": 1.7849, "step": 55468 }, { "epoch": 1.85, "grad_norm": 0.5934866666793823, "learning_rate": 0.00019382228927673846, "loss": 1.7757, "step": 55469 }, { "epoch": 1.85, "grad_norm": 0.572209894657135, "learning_rate": 0.00019381251375317753, "loss": 1.6869, "step": 55470 }, { "epoch": 1.85, "grad_norm": 0.5949051976203918, "learning_rate": 0.00019380273835851262, "loss": 1.7074, "step": 55471 }, { "epoch": 1.85, "grad_norm": 0.5849578976631165, "learning_rate": 0.0001937929630927555, "loss": 1.7798, "step": 55472 }, { "epoch": 1.85, "grad_norm": 0.6062964200973511, "learning_rate": 0.00019378318795591805, "loss": 1.769, "step": 55473 }, { "epoch": 1.85, "grad_norm": 0.6010141968727112, "learning_rate": 0.00019377341294801216, "loss": 1.7512, "step": 55474 }, { "epoch": 1.85, "grad_norm": 0.6138074994087219, "learning_rate": 0.00019376363806904978, "loss": 1.7189, "step": 55475 }, { "epoch": 1.85, "grad_norm": 0.5686543583869934, "learning_rate": 0.00019375386331904262, "loss": 1.6883, "step": 55476 }, { "epoch": 1.85, "grad_norm": 0.5774268507957458, "learning_rate": 0.00019374408869800262, "loss": 1.7011, "step": 55477 }, { "epoch": 1.85, "grad_norm": 0.5849278569221497, "learning_rate": 0.0001937343142059417, "loss": 1.7245, "step": 55478 }, { "epoch": 1.85, "grad_norm": 0.5825653672218323, "learning_rate": 0.0001937245398428716, "loss": 1.7375, "step": 55479 }, { "epoch": 1.85, "grad_norm": 0.5757724642753601, "learning_rate": 0.00019371476560880421, "loss": 1.6865, "step": 55480 }, { "epoch": 1.85, "grad_norm": 0.571071445941925, "learning_rate": 0.0001937049915037516, "loss": 1.7287, "step": 55481 }, { "epoch": 1.85, "grad_norm": 0.6022063493728638, "learning_rate": 0.0001936952175277253, "loss": 1.698, "step": 55482 }, { "epoch": 1.85, "grad_norm": 0.5831781625747681, "learning_rate": 0.00019368544368073739, "loss": 1.74, "step": 55483 }, { "epoch": 1.85, "grad_norm": 0.5811159610748291, "learning_rate": 0.00019367566996279968, "loss": 1.6808, "step": 55484 }, { "epoch": 1.85, "grad_norm": 0.5770167708396912, "learning_rate": 0.0001936658963739241, "loss": 1.7709, "step": 55485 }, { "epoch": 1.85, "grad_norm": 0.5714618563652039, "learning_rate": 0.00019365612291412243, "loss": 1.7196, "step": 55486 }, { "epoch": 1.85, "grad_norm": 0.581633448600769, "learning_rate": 0.00019364634958340645, "loss": 1.7424, "step": 55487 }, { "epoch": 1.85, "grad_norm": 0.6094950437545776, "learning_rate": 0.0001936365763817883, "loss": 1.7662, "step": 55488 }, { "epoch": 1.85, "grad_norm": 0.5759963393211365, "learning_rate": 0.00019362680330927958, "loss": 1.6863, "step": 55489 }, { "epoch": 1.85, "grad_norm": 0.5615593791007996, "learning_rate": 0.00019361703036589221, "loss": 1.7924, "step": 55490 }, { "epoch": 1.85, "grad_norm": 0.5728053450584412, "learning_rate": 0.0001936072575516382, "loss": 1.7504, "step": 55491 }, { "epoch": 1.85, "grad_norm": 0.5846853852272034, "learning_rate": 0.00019359748486652925, "loss": 1.7625, "step": 55492 }, { "epoch": 1.85, "grad_norm": 0.5849170684814453, "learning_rate": 0.00019358771231057728, "loss": 1.7538, "step": 55493 }, { "epoch": 1.85, "grad_norm": 0.5910176038742065, "learning_rate": 0.00019357793988379406, "loss": 1.7169, "step": 55494 }, { "epoch": 1.85, "grad_norm": 0.5767419338226318, "learning_rate": 0.00019356816758619172, "loss": 1.7391, "step": 55495 }, { "epoch": 1.85, "grad_norm": 0.5553480982780457, "learning_rate": 0.00019355839541778175, "loss": 1.7274, "step": 55496 }, { "epoch": 1.85, "grad_norm": 0.5694435834884644, "learning_rate": 0.00019354862337857634, "loss": 1.7678, "step": 55497 }, { "epoch": 1.85, "grad_norm": 0.5803172588348389, "learning_rate": 0.0001935388514685872, "loss": 1.7443, "step": 55498 }, { "epoch": 1.85, "grad_norm": 0.5857160687446594, "learning_rate": 0.00019352907968782617, "loss": 1.7005, "step": 55499 }, { "epoch": 1.85, "grad_norm": 0.5964331030845642, "learning_rate": 0.00019351930803630514, "loss": 1.7004, "step": 55500 }, { "epoch": 1.85, "grad_norm": 0.5717232823371887, "learning_rate": 0.00019350953651403605, "loss": 1.6913, "step": 55501 }, { "epoch": 1.85, "grad_norm": 0.5689801573753357, "learning_rate": 0.00019349976512103058, "loss": 1.6587, "step": 55502 }, { "epoch": 1.85, "grad_norm": 0.5522441267967224, "learning_rate": 0.00019348999385730077, "loss": 1.7328, "step": 55503 }, { "epoch": 1.85, "grad_norm": 0.5588070154190063, "learning_rate": 0.0001934802227228584, "loss": 1.6403, "step": 55504 }, { "epoch": 1.85, "grad_norm": 0.6037406921386719, "learning_rate": 0.00019347045171771543, "loss": 1.7284, "step": 55505 }, { "epoch": 1.85, "grad_norm": 0.5950527191162109, "learning_rate": 0.00019346068084188353, "loss": 1.7375, "step": 55506 }, { "epoch": 1.85, "grad_norm": 0.5702548623085022, "learning_rate": 0.0001934509100953747, "loss": 1.7281, "step": 55507 }, { "epoch": 1.85, "grad_norm": 0.5580790638923645, "learning_rate": 0.00019344113947820078, "loss": 1.7061, "step": 55508 }, { "epoch": 1.85, "grad_norm": 0.6108351945877075, "learning_rate": 0.00019343136899037353, "loss": 1.6975, "step": 55509 }, { "epoch": 1.85, "grad_norm": 0.5997098088264465, "learning_rate": 0.000193421598631905, "loss": 1.6814, "step": 55510 }, { "epoch": 1.85, "grad_norm": 0.5783941149711609, "learning_rate": 0.00019341182840280693, "loss": 1.6954, "step": 55511 }, { "epoch": 1.85, "grad_norm": 0.6198548078536987, "learning_rate": 0.00019340205830309116, "loss": 1.7412, "step": 55512 }, { "epoch": 1.85, "grad_norm": 0.6117647290229797, "learning_rate": 0.0001933922883327696, "loss": 1.7229, "step": 55513 }, { "epoch": 1.85, "grad_norm": 0.5789432525634766, "learning_rate": 0.00019338251849185408, "loss": 1.7618, "step": 55514 }, { "epoch": 1.85, "grad_norm": 0.5863460302352905, "learning_rate": 0.00019337274878035654, "loss": 1.7701, "step": 55515 }, { "epoch": 1.85, "grad_norm": 0.5945619940757751, "learning_rate": 0.00019336297919828868, "loss": 1.7786, "step": 55516 }, { "epoch": 1.85, "grad_norm": 0.5832988619804382, "learning_rate": 0.00019335320974566247, "loss": 1.6755, "step": 55517 }, { "epoch": 1.85, "grad_norm": 0.601870059967041, "learning_rate": 0.0001933434404224898, "loss": 1.7728, "step": 55518 }, { "epoch": 1.85, "grad_norm": 0.5797092914581299, "learning_rate": 0.00019333367122878245, "loss": 1.8254, "step": 55519 }, { "epoch": 1.85, "grad_norm": 0.5707264542579651, "learning_rate": 0.00019332390216455228, "loss": 1.7885, "step": 55520 }, { "epoch": 1.85, "grad_norm": 0.562912106513977, "learning_rate": 0.00019331413322981126, "loss": 1.7586, "step": 55521 }, { "epoch": 1.85, "grad_norm": 0.576306939125061, "learning_rate": 0.00019330436442457108, "loss": 1.7577, "step": 55522 }, { "epoch": 1.85, "grad_norm": 0.5814162492752075, "learning_rate": 0.00019329459574884362, "loss": 1.726, "step": 55523 }, { "epoch": 1.85, "grad_norm": 0.5592673420906067, "learning_rate": 0.00019328482720264089, "loss": 1.7388, "step": 55524 }, { "epoch": 1.85, "grad_norm": 0.5796229243278503, "learning_rate": 0.0001932750587859747, "loss": 1.8061, "step": 55525 }, { "epoch": 1.85, "grad_norm": 0.5578200221061707, "learning_rate": 0.00019326529049885682, "loss": 1.7298, "step": 55526 }, { "epoch": 1.85, "grad_norm": 0.6100658178329468, "learning_rate": 0.00019325552234129913, "loss": 1.7038, "step": 55527 }, { "epoch": 1.85, "grad_norm": 0.5726960301399231, "learning_rate": 0.00019324575431331356, "loss": 1.7547, "step": 55528 }, { "epoch": 1.85, "grad_norm": 0.5749787092208862, "learning_rate": 0.00019323598641491187, "loss": 1.7263, "step": 55529 }, { "epoch": 1.85, "grad_norm": 0.5873602032661438, "learning_rate": 0.00019322621864610587, "loss": 1.733, "step": 55530 }, { "epoch": 1.85, "grad_norm": 0.5811951160430908, "learning_rate": 0.00019321645100690775, "loss": 1.7554, "step": 55531 }, { "epoch": 1.85, "grad_norm": 0.5758061408996582, "learning_rate": 0.0001932066834973289, "loss": 1.7682, "step": 55532 }, { "epoch": 1.85, "grad_norm": 0.5689138770103455, "learning_rate": 0.00019319691611738147, "loss": 1.7263, "step": 55533 }, { "epoch": 1.85, "grad_norm": 0.564007043838501, "learning_rate": 0.00019318714886707724, "loss": 1.815, "step": 55534 }, { "epoch": 1.85, "grad_norm": 0.5975037217140198, "learning_rate": 0.00019317738174642814, "loss": 1.7424, "step": 55535 }, { "epoch": 1.85, "grad_norm": 0.5751042366027832, "learning_rate": 0.00019316761475544588, "loss": 1.746, "step": 55536 }, { "epoch": 1.85, "grad_norm": 0.5760000348091125, "learning_rate": 0.00019315784789414238, "loss": 1.7295, "step": 55537 }, { "epoch": 1.85, "grad_norm": 0.5967057943344116, "learning_rate": 0.00019314808116252963, "loss": 1.7425, "step": 55538 }, { "epoch": 1.85, "grad_norm": 0.5991756319999695, "learning_rate": 0.00019313831456061923, "loss": 1.807, "step": 55539 }, { "epoch": 1.85, "grad_norm": 0.5783708691596985, "learning_rate": 0.00019312854808842326, "loss": 1.7542, "step": 55540 }, { "epoch": 1.85, "grad_norm": 0.5764550566673279, "learning_rate": 0.0001931187817459535, "loss": 1.7289, "step": 55541 }, { "epoch": 1.85, "grad_norm": 0.6043315529823303, "learning_rate": 0.00019310901553322175, "loss": 1.7279, "step": 55542 }, { "epoch": 1.85, "grad_norm": 0.595784068107605, "learning_rate": 0.00019309924945023989, "loss": 1.7137, "step": 55543 }, { "epoch": 1.85, "grad_norm": 0.581967830657959, "learning_rate": 0.0001930894834970197, "loss": 1.7717, "step": 55544 }, { "epoch": 1.85, "grad_norm": 0.8278158903121948, "learning_rate": 0.00019307971767357332, "loss": 1.6665, "step": 55545 }, { "epoch": 1.85, "grad_norm": 0.5644451379776001, "learning_rate": 0.00019306995197991225, "loss": 1.7609, "step": 55546 }, { "epoch": 1.85, "grad_norm": 0.5830727219581604, "learning_rate": 0.00019306018641604856, "loss": 1.7263, "step": 55547 }, { "epoch": 1.85, "grad_norm": 0.6003508567810059, "learning_rate": 0.00019305042098199414, "loss": 1.651, "step": 55548 }, { "epoch": 1.85, "grad_norm": 0.5799375176429749, "learning_rate": 0.00019304065567776066, "loss": 1.7584, "step": 55549 }, { "epoch": 1.85, "grad_norm": 0.6026254892349243, "learning_rate": 0.00019303089050336006, "loss": 1.6559, "step": 55550 }, { "epoch": 1.85, "grad_norm": 0.5832222700119019, "learning_rate": 0.00019302112545880427, "loss": 1.77, "step": 55551 }, { "epoch": 1.85, "grad_norm": 0.5679726600646973, "learning_rate": 0.00019301136054410497, "loss": 1.7254, "step": 55552 }, { "epoch": 1.85, "grad_norm": 0.5707369446754456, "learning_rate": 0.0001930015957592741, "loss": 1.7327, "step": 55553 }, { "epoch": 1.85, "grad_norm": 0.6011423468589783, "learning_rate": 0.00019299183110432354, "loss": 1.6903, "step": 55554 }, { "epoch": 1.85, "grad_norm": 0.6350802183151245, "learning_rate": 0.00019298206657926523, "loss": 1.7309, "step": 55555 }, { "epoch": 1.85, "grad_norm": 0.5840566754341125, "learning_rate": 0.00019297230218411087, "loss": 1.7416, "step": 55556 }, { "epoch": 1.85, "grad_norm": 0.5691182613372803, "learning_rate": 0.00019296253791887237, "loss": 1.6077, "step": 55557 }, { "epoch": 1.85, "grad_norm": 0.6281844973564148, "learning_rate": 0.00019295277378356162, "loss": 1.6398, "step": 55558 }, { "epoch": 1.85, "grad_norm": 0.5869784951210022, "learning_rate": 0.00019294300977819033, "loss": 1.7788, "step": 55559 }, { "epoch": 1.85, "grad_norm": 0.6138584017753601, "learning_rate": 0.00019293324590277045, "loss": 1.7224, "step": 55560 }, { "epoch": 1.85, "grad_norm": 0.5839424133300781, "learning_rate": 0.000192923482157314, "loss": 1.7263, "step": 55561 }, { "epoch": 1.85, "grad_norm": 0.5670310854911804, "learning_rate": 0.00019291371854183247, "loss": 1.7653, "step": 55562 }, { "epoch": 1.85, "grad_norm": 0.584050714969635, "learning_rate": 0.00019290395505633796, "loss": 1.6911, "step": 55563 }, { "epoch": 1.85, "grad_norm": 0.5896576046943665, "learning_rate": 0.00019289419170084228, "loss": 1.7496, "step": 55564 }, { "epoch": 1.85, "grad_norm": 0.5890475511550903, "learning_rate": 0.00019288442847535735, "loss": 1.6521, "step": 55565 }, { "epoch": 1.85, "grad_norm": 0.5775442719459534, "learning_rate": 0.00019287466537989487, "loss": 1.6723, "step": 55566 }, { "epoch": 1.85, "grad_norm": 0.5932956337928772, "learning_rate": 0.0001928649024144667, "loss": 1.7114, "step": 55567 }, { "epoch": 1.85, "grad_norm": 0.5804232954978943, "learning_rate": 0.0001928551395790849, "loss": 1.7368, "step": 55568 }, { "epoch": 1.85, "grad_norm": 0.5764424800872803, "learning_rate": 0.00019284537687376107, "loss": 1.7878, "step": 55569 }, { "epoch": 1.85, "grad_norm": 0.5615183711051941, "learning_rate": 0.00019283561429850717, "loss": 1.6677, "step": 55570 }, { "epoch": 1.85, "grad_norm": 0.5992252826690674, "learning_rate": 0.0001928258518533351, "loss": 1.7778, "step": 55571 }, { "epoch": 1.85, "grad_norm": 0.5820267200469971, "learning_rate": 0.00019281608953825662, "loss": 1.7962, "step": 55572 }, { "epoch": 1.85, "grad_norm": 0.5698341727256775, "learning_rate": 0.0001928063273532836, "loss": 1.7186, "step": 55573 }, { "epoch": 1.85, "grad_norm": 0.5862465500831604, "learning_rate": 0.00019279656529842785, "loss": 1.7731, "step": 55574 }, { "epoch": 1.85, "grad_norm": 0.5752369165420532, "learning_rate": 0.00019278680337370146, "loss": 1.7313, "step": 55575 }, { "epoch": 1.85, "grad_norm": 0.5731242299079895, "learning_rate": 0.00019277704157911587, "loss": 1.7618, "step": 55576 }, { "epoch": 1.85, "grad_norm": 0.5808843970298767, "learning_rate": 0.00019276727991468328, "loss": 1.7194, "step": 55577 }, { "epoch": 1.85, "grad_norm": 0.5913830995559692, "learning_rate": 0.00019275751838041544, "loss": 1.7119, "step": 55578 }, { "epoch": 1.85, "grad_norm": 0.5795196294784546, "learning_rate": 0.00019274775697632407, "loss": 1.759, "step": 55579 }, { "epoch": 1.85, "grad_norm": 0.5672891736030579, "learning_rate": 0.00019273799570242116, "loss": 1.6889, "step": 55580 }, { "epoch": 1.85, "grad_norm": 0.5782814621925354, "learning_rate": 0.00019272823455871861, "loss": 1.7222, "step": 55581 }, { "epoch": 1.85, "grad_norm": 0.5635538697242737, "learning_rate": 0.0001927184735452281, "loss": 1.7111, "step": 55582 }, { "epoch": 1.85, "grad_norm": 0.590063750743866, "learning_rate": 0.00019270871266196144, "loss": 1.6106, "step": 55583 }, { "epoch": 1.85, "grad_norm": 0.6024097800254822, "learning_rate": 0.00019269895190893074, "loss": 1.8399, "step": 55584 }, { "epoch": 1.85, "grad_norm": 0.597831130027771, "learning_rate": 0.0001926891912861477, "loss": 1.7334, "step": 55585 }, { "epoch": 1.85, "grad_norm": 0.6006165742874146, "learning_rate": 0.00019267943079362412, "loss": 1.82, "step": 55586 }, { "epoch": 1.85, "grad_norm": 0.5966447591781616, "learning_rate": 0.00019266967043137193, "loss": 1.7632, "step": 55587 }, { "epoch": 1.85, "grad_norm": 0.5636077523231506, "learning_rate": 0.000192659910199403, "loss": 1.7072, "step": 55588 }, { "epoch": 1.85, "grad_norm": 0.596373438835144, "learning_rate": 0.00019265015009772903, "loss": 1.6999, "step": 55589 }, { "epoch": 1.85, "grad_norm": 0.6036047339439392, "learning_rate": 0.0001926403901263619, "loss": 1.7931, "step": 55590 }, { "epoch": 1.85, "grad_norm": 0.5895811915397644, "learning_rate": 0.00019263063028531358, "loss": 1.7643, "step": 55591 }, { "epoch": 1.85, "grad_norm": 0.5789121985435486, "learning_rate": 0.00019262087057459596, "loss": 1.7274, "step": 55592 }, { "epoch": 1.85, "grad_norm": 0.572730541229248, "learning_rate": 0.00019261111099422068, "loss": 1.6934, "step": 55593 }, { "epoch": 1.85, "grad_norm": 0.5833444595336914, "learning_rate": 0.0001926013515441997, "loss": 1.7896, "step": 55594 }, { "epoch": 1.85, "grad_norm": 0.583923876285553, "learning_rate": 0.0001925915922245449, "loss": 1.725, "step": 55595 }, { "epoch": 1.85, "grad_norm": 0.563005805015564, "learning_rate": 0.00019258183303526806, "loss": 1.7519, "step": 55596 }, { "epoch": 1.85, "grad_norm": 0.5890820622444153, "learning_rate": 0.00019257207397638097, "loss": 1.7626, "step": 55597 }, { "epoch": 1.85, "grad_norm": 0.5828685760498047, "learning_rate": 0.00019256231504789572, "loss": 1.7711, "step": 55598 }, { "epoch": 1.85, "grad_norm": 0.5691896677017212, "learning_rate": 0.0001925525562498238, "loss": 1.7196, "step": 55599 }, { "epoch": 1.85, "grad_norm": 0.5673454999923706, "learning_rate": 0.00019254279758217733, "loss": 1.7182, "step": 55600 }, { "epoch": 1.85, "grad_norm": 0.5785312652587891, "learning_rate": 0.00019253303904496805, "loss": 1.7183, "step": 55601 }, { "epoch": 1.85, "grad_norm": 0.718217670917511, "learning_rate": 0.00019252328063820793, "loss": 1.7624, "step": 55602 }, { "epoch": 1.85, "grad_norm": 0.5768158435821533, "learning_rate": 0.0001925135223619086, "loss": 1.7606, "step": 55603 }, { "epoch": 1.85, "grad_norm": 0.5944616794586182, "learning_rate": 0.00019250376421608197, "loss": 1.7928, "step": 55604 }, { "epoch": 1.85, "grad_norm": 0.5638570785522461, "learning_rate": 0.00019249400620074012, "loss": 1.7521, "step": 55605 }, { "epoch": 1.85, "grad_norm": 0.5784111022949219, "learning_rate": 0.0001924842483158945, "loss": 1.6807, "step": 55606 }, { "epoch": 1.85, "grad_norm": 0.5785990953445435, "learning_rate": 0.00019247449056155724, "loss": 1.688, "step": 55607 }, { "epoch": 1.85, "grad_norm": 0.5560647249221802, "learning_rate": 0.00019246473293774017, "loss": 1.7145, "step": 55608 }, { "epoch": 1.85, "grad_norm": 0.5932839512825012, "learning_rate": 0.000192454975444455, "loss": 1.7247, "step": 55609 }, { "epoch": 1.85, "grad_norm": 0.5619474649429321, "learning_rate": 0.0001924452180817137, "loss": 1.6643, "step": 55610 }, { "epoch": 1.85, "grad_norm": 0.5743725895881653, "learning_rate": 0.00019243546084952794, "loss": 1.7184, "step": 55611 }, { "epoch": 1.85, "grad_norm": 0.582698404788971, "learning_rate": 0.00019242570374790983, "loss": 1.7602, "step": 55612 }, { "epoch": 1.85, "grad_norm": 0.5609697699546814, "learning_rate": 0.00019241594677687097, "loss": 1.6455, "step": 55613 }, { "epoch": 1.85, "grad_norm": 0.5547275543212891, "learning_rate": 0.0001924061899364233, "loss": 1.6829, "step": 55614 }, { "epoch": 1.85, "grad_norm": 0.5663841962814331, "learning_rate": 0.00019239643322657875, "loss": 1.7645, "step": 55615 }, { "epoch": 1.85, "grad_norm": 0.5676417350769043, "learning_rate": 0.00019238667664734898, "loss": 1.6727, "step": 55616 }, { "epoch": 1.85, "grad_norm": 0.5642187595367432, "learning_rate": 0.00019237692019874597, "loss": 1.7587, "step": 55617 }, { "epoch": 1.85, "grad_norm": 0.5683274269104004, "learning_rate": 0.00019236716388078157, "loss": 1.7489, "step": 55618 }, { "epoch": 1.85, "grad_norm": 0.5826476216316223, "learning_rate": 0.0001923574076934674, "loss": 1.6748, "step": 55619 }, { "epoch": 1.85, "grad_norm": 0.5675370693206787, "learning_rate": 0.0001923476516368156, "loss": 1.6683, "step": 55620 }, { "epoch": 1.85, "grad_norm": 0.6078250408172607, "learning_rate": 0.0001923378957108379, "loss": 1.7251, "step": 55621 }, { "epoch": 1.85, "grad_norm": 0.5801858305931091, "learning_rate": 0.00019232813991554616, "loss": 1.7475, "step": 55622 }, { "epoch": 1.85, "grad_norm": 0.5844911932945251, "learning_rate": 0.00019231838425095213, "loss": 1.7741, "step": 55623 }, { "epoch": 1.85, "grad_norm": 0.5952900648117065, "learning_rate": 0.00019230862871706772, "loss": 1.7582, "step": 55624 }, { "epoch": 1.85, "grad_norm": 0.5848888158798218, "learning_rate": 0.00019229887331390484, "loss": 1.7437, "step": 55625 }, { "epoch": 1.85, "grad_norm": 0.5572288036346436, "learning_rate": 0.00019228911804147511, "loss": 1.7489, "step": 55626 }, { "epoch": 1.85, "grad_norm": 0.5750771760940552, "learning_rate": 0.0001922793628997906, "loss": 1.7374, "step": 55627 }, { "epoch": 1.85, "grad_norm": 0.5980005860328674, "learning_rate": 0.00019226960788886317, "loss": 1.7191, "step": 55628 }, { "epoch": 1.85, "grad_norm": 0.5697011947631836, "learning_rate": 0.00019225985300870446, "loss": 1.69, "step": 55629 }, { "epoch": 1.85, "grad_norm": 0.5732089877128601, "learning_rate": 0.0001922500982593264, "loss": 1.7332, "step": 55630 }, { "epoch": 1.85, "grad_norm": 0.583134651184082, "learning_rate": 0.00019224034364074085, "loss": 1.7715, "step": 55631 }, { "epoch": 1.85, "grad_norm": 0.5841173529624939, "learning_rate": 0.00019223058915295976, "loss": 1.6233, "step": 55632 }, { "epoch": 1.85, "grad_norm": 0.641961932182312, "learning_rate": 0.00019222083479599467, "loss": 1.8086, "step": 55633 }, { "epoch": 1.85, "grad_norm": 0.5837888121604919, "learning_rate": 0.00019221108056985766, "loss": 1.6734, "step": 55634 }, { "epoch": 1.85, "grad_norm": 0.5805028080940247, "learning_rate": 0.00019220132647456063, "loss": 1.7804, "step": 55635 }, { "epoch": 1.85, "grad_norm": 0.5894066095352173, "learning_rate": 0.00019219157251011522, "loss": 1.7283, "step": 55636 }, { "epoch": 1.85, "grad_norm": 0.5740200877189636, "learning_rate": 0.00019218181867653337, "loss": 1.6893, "step": 55637 }, { "epoch": 1.85, "grad_norm": 0.5793040990829468, "learning_rate": 0.00019217206497382694, "loss": 1.7107, "step": 55638 }, { "epoch": 1.85, "grad_norm": 0.5957147479057312, "learning_rate": 0.00019216231140200765, "loss": 1.8598, "step": 55639 }, { "epoch": 1.85, "grad_norm": 0.5918579697608948, "learning_rate": 0.00019215255796108742, "loss": 1.7035, "step": 55640 }, { "epoch": 1.85, "grad_norm": 0.5815760493278503, "learning_rate": 0.00019214280465107815, "loss": 1.7056, "step": 55641 }, { "epoch": 1.85, "grad_norm": 0.5765462517738342, "learning_rate": 0.0001921330514719917, "loss": 1.7273, "step": 55642 }, { "epoch": 1.85, "grad_norm": 0.583187997341156, "learning_rate": 0.00019212329842383972, "loss": 1.8309, "step": 55643 }, { "epoch": 1.85, "grad_norm": 0.5968294739723206, "learning_rate": 0.00019211354550663416, "loss": 1.8231, "step": 55644 }, { "epoch": 1.85, "grad_norm": 0.5668337941169739, "learning_rate": 0.00019210379272038697, "loss": 1.7128, "step": 55645 }, { "epoch": 1.85, "grad_norm": 0.5796639919281006, "learning_rate": 0.0001920940400651098, "loss": 1.7638, "step": 55646 }, { "epoch": 1.85, "grad_norm": 0.5776998400688171, "learning_rate": 0.00019208428754081445, "loss": 1.6775, "step": 55647 }, { "epoch": 1.85, "grad_norm": 0.5702068209648132, "learning_rate": 0.00019207453514751314, "loss": 1.7331, "step": 55648 }, { "epoch": 1.85, "grad_norm": 0.579056978225708, "learning_rate": 0.00019206478288521722, "loss": 1.6814, "step": 55649 }, { "epoch": 1.85, "grad_norm": 0.606025218963623, "learning_rate": 0.00019205503075393881, "loss": 1.8199, "step": 55650 }, { "epoch": 1.85, "grad_norm": 0.5779667496681213, "learning_rate": 0.0001920452787536897, "loss": 1.7288, "step": 55651 }, { "epoch": 1.85, "grad_norm": 0.5632390975952148, "learning_rate": 0.00019203552688448175, "loss": 1.758, "step": 55652 }, { "epoch": 1.85, "grad_norm": 0.5945680737495422, "learning_rate": 0.0001920257751463267, "loss": 1.8219, "step": 55653 }, { "epoch": 1.85, "grad_norm": 0.5788840651512146, "learning_rate": 0.0001920160235392364, "loss": 1.6937, "step": 55654 }, { "epoch": 1.85, "grad_norm": 0.5868991017341614, "learning_rate": 0.00019200627206322293, "loss": 1.6648, "step": 55655 }, { "epoch": 1.85, "grad_norm": 0.5791304111480713, "learning_rate": 0.0001919965207182978, "loss": 1.6231, "step": 55656 }, { "epoch": 1.85, "grad_norm": 0.6074408888816833, "learning_rate": 0.00019198676950447297, "loss": 1.7825, "step": 55657 }, { "epoch": 1.85, "grad_norm": 0.5749054551124573, "learning_rate": 0.0001919770184217604, "loss": 1.7981, "step": 55658 }, { "epoch": 1.85, "grad_norm": 0.5805556178092957, "learning_rate": 0.0001919672674701717, "loss": 1.7149, "step": 55659 }, { "epoch": 1.85, "grad_norm": 0.5928670167922974, "learning_rate": 0.0001919575166497188, "loss": 1.7371, "step": 55660 }, { "epoch": 1.85, "grad_norm": 0.5959222912788391, "learning_rate": 0.00019194776596041357, "loss": 1.7573, "step": 55661 }, { "epoch": 1.85, "grad_norm": 0.5570241808891296, "learning_rate": 0.00019193801540226798, "loss": 1.7499, "step": 55662 }, { "epoch": 1.85, "grad_norm": 0.5702275037765503, "learning_rate": 0.00019192826497529353, "loss": 1.7225, "step": 55663 }, { "epoch": 1.85, "grad_norm": 0.5888757109642029, "learning_rate": 0.0001919185146795023, "loss": 1.7868, "step": 55664 }, { "epoch": 1.85, "grad_norm": 0.5976489186286926, "learning_rate": 0.00019190876451490613, "loss": 1.698, "step": 55665 }, { "epoch": 1.85, "grad_norm": 0.5803733468055725, "learning_rate": 0.00019189901448151675, "loss": 1.718, "step": 55666 }, { "epoch": 1.85, "grad_norm": 0.5938425064086914, "learning_rate": 0.000191889264579346, "loss": 1.7579, "step": 55667 }, { "epoch": 1.85, "grad_norm": 0.5775147080421448, "learning_rate": 0.00019187951480840586, "loss": 1.7801, "step": 55668 }, { "epoch": 1.85, "grad_norm": 0.5887904763221741, "learning_rate": 0.00019186976516870796, "loss": 1.7048, "step": 55669 }, { "epoch": 1.85, "grad_norm": 0.5833311080932617, "learning_rate": 0.00019186001566026418, "loss": 1.7571, "step": 55670 }, { "epoch": 1.85, "grad_norm": 0.60495525598526, "learning_rate": 0.00019185026628308648, "loss": 1.7541, "step": 55671 }, { "epoch": 1.85, "grad_norm": 0.5880536437034607, "learning_rate": 0.0001918405170371867, "loss": 1.7135, "step": 55672 }, { "epoch": 1.85, "grad_norm": 0.5809547901153564, "learning_rate": 0.0001918307679225765, "loss": 1.7009, "step": 55673 }, { "epoch": 1.85, "grad_norm": 0.592018187046051, "learning_rate": 0.00019182101893926783, "loss": 1.7278, "step": 55674 }, { "epoch": 1.85, "grad_norm": 0.5860899686813354, "learning_rate": 0.00019181127008727257, "loss": 1.7753, "step": 55675 }, { "epoch": 1.85, "grad_norm": 0.5834484696388245, "learning_rate": 0.0001918015213666024, "loss": 1.8377, "step": 55676 }, { "epoch": 1.85, "grad_norm": 0.5806789994239807, "learning_rate": 0.00019179177277726923, "loss": 1.7141, "step": 55677 }, { "epoch": 1.85, "grad_norm": 0.5840474367141724, "learning_rate": 0.00019178202431928504, "loss": 1.7415, "step": 55678 }, { "epoch": 1.85, "grad_norm": 0.5923766493797302, "learning_rate": 0.00019177227599266136, "loss": 1.7111, "step": 55679 }, { "epoch": 1.85, "grad_norm": 0.9052703380584717, "learning_rate": 0.00019176252779741024, "loss": 1.6622, "step": 55680 }, { "epoch": 1.85, "grad_norm": 0.6062953472137451, "learning_rate": 0.00019175277973354347, "loss": 1.7919, "step": 55681 }, { "epoch": 1.85, "grad_norm": 0.5779584646224976, "learning_rate": 0.00019174303180107292, "loss": 1.7693, "step": 55682 }, { "epoch": 1.85, "grad_norm": 0.6024131774902344, "learning_rate": 0.00019173328400001033, "loss": 1.7136, "step": 55683 }, { "epoch": 1.85, "grad_norm": 0.589353084564209, "learning_rate": 0.00019172353633036752, "loss": 1.7976, "step": 55684 }, { "epoch": 1.85, "grad_norm": 0.5834022760391235, "learning_rate": 0.0001917137887921566, "loss": 1.7024, "step": 55685 }, { "epoch": 1.85, "grad_norm": 0.6211400032043457, "learning_rate": 0.00019170404138538896, "loss": 1.7968, "step": 55686 }, { "epoch": 1.85, "grad_norm": 0.5789201855659485, "learning_rate": 0.00019169429411007674, "loss": 1.7444, "step": 55687 }, { "epoch": 1.85, "grad_norm": 0.5855503082275391, "learning_rate": 0.0001916845469662317, "loss": 1.6142, "step": 55688 }, { "epoch": 1.85, "grad_norm": 0.5925629734992981, "learning_rate": 0.0001916747999538657, "loss": 1.6887, "step": 55689 }, { "epoch": 1.85, "grad_norm": 0.5829054713249207, "learning_rate": 0.00019166505307299046, "loss": 1.7942, "step": 55690 }, { "epoch": 1.85, "grad_norm": 0.6095012426376343, "learning_rate": 0.00019165530632361783, "loss": 1.6551, "step": 55691 }, { "epoch": 1.85, "grad_norm": 0.6009039282798767, "learning_rate": 0.0001916455597057599, "loss": 1.7482, "step": 55692 }, { "epoch": 1.85, "grad_norm": 0.5659914016723633, "learning_rate": 0.00019163581321942808, "loss": 1.7168, "step": 55693 }, { "epoch": 1.85, "grad_norm": 0.5873273611068726, "learning_rate": 0.00019162606686463446, "loss": 1.7662, "step": 55694 }, { "epoch": 1.85, "grad_norm": 0.608209490776062, "learning_rate": 0.00019161632064139094, "loss": 1.7276, "step": 55695 }, { "epoch": 1.85, "grad_norm": 0.583854079246521, "learning_rate": 0.00019160657454970917, "loss": 1.7339, "step": 55696 }, { "epoch": 1.85, "grad_norm": 0.5956107974052429, "learning_rate": 0.00019159682858960103, "loss": 1.7659, "step": 55697 }, { "epoch": 1.85, "grad_norm": 0.5813753008842468, "learning_rate": 0.00019158708276107843, "loss": 1.7392, "step": 55698 }, { "epoch": 1.85, "grad_norm": 0.5893614292144775, "learning_rate": 0.00019157733706415308, "loss": 1.7516, "step": 55699 }, { "epoch": 1.85, "grad_norm": 0.5790121555328369, "learning_rate": 0.0001915675914988368, "loss": 1.7872, "step": 55700 }, { "epoch": 1.85, "grad_norm": 0.5815481543540955, "learning_rate": 0.00019155784606514153, "loss": 1.7248, "step": 55701 }, { "epoch": 1.85, "grad_norm": 0.606242835521698, "learning_rate": 0.00019154810076307916, "loss": 1.7303, "step": 55702 }, { "epoch": 1.85, "grad_norm": 0.5811437964439392, "learning_rate": 0.00019153835559266135, "loss": 1.7216, "step": 55703 }, { "epoch": 1.85, "grad_norm": 1.3603837490081787, "learning_rate": 0.00019152861055389997, "loss": 1.7865, "step": 55704 }, { "epoch": 1.85, "grad_norm": 0.576720118522644, "learning_rate": 0.00019151886564680695, "loss": 1.6881, "step": 55705 }, { "epoch": 1.85, "grad_norm": 0.5660404562950134, "learning_rate": 0.00019150912087139397, "loss": 1.7225, "step": 55706 }, { "epoch": 1.85, "grad_norm": 0.5870943665504456, "learning_rate": 0.00019149937622767288, "loss": 1.7461, "step": 55707 }, { "epoch": 1.85, "grad_norm": 0.5548399090766907, "learning_rate": 0.00019148963171565563, "loss": 1.7416, "step": 55708 }, { "epoch": 1.85, "grad_norm": 0.5997445583343506, "learning_rate": 0.000191479887335354, "loss": 1.7292, "step": 55709 }, { "epoch": 1.85, "grad_norm": 0.5745518207550049, "learning_rate": 0.00019147014308677976, "loss": 1.7168, "step": 55710 }, { "epoch": 1.85, "grad_norm": 0.5745096802711487, "learning_rate": 0.00019146039896994482, "loss": 1.7071, "step": 55711 }, { "epoch": 1.85, "grad_norm": 0.5687968730926514, "learning_rate": 0.00019145065498486096, "loss": 1.7989, "step": 55712 }, { "epoch": 1.85, "grad_norm": 0.5786287784576416, "learning_rate": 0.00019144091113154, "loss": 1.7569, "step": 55713 }, { "epoch": 1.85, "grad_norm": 0.5553892254829407, "learning_rate": 0.00019143116740999368, "loss": 1.7011, "step": 55714 }, { "epoch": 1.85, "grad_norm": 0.5797610878944397, "learning_rate": 0.00019142142382023414, "loss": 1.7269, "step": 55715 }, { "epoch": 1.85, "grad_norm": 0.5855925679206848, "learning_rate": 0.0001914116803622728, "loss": 1.7725, "step": 55716 }, { "epoch": 1.85, "grad_norm": 0.5765829086303711, "learning_rate": 0.00019140193703612172, "loss": 1.689, "step": 55717 }, { "epoch": 1.85, "grad_norm": 0.5658690929412842, "learning_rate": 0.0001913921938417927, "loss": 1.7416, "step": 55718 }, { "epoch": 1.85, "grad_norm": 0.5798405408859253, "learning_rate": 0.0001913824507792976, "loss": 1.7309, "step": 55719 }, { "epoch": 1.85, "grad_norm": 0.5769349932670593, "learning_rate": 0.00019137270784864817, "loss": 1.7055, "step": 55720 }, { "epoch": 1.85, "grad_norm": 0.5700228810310364, "learning_rate": 0.0001913629650498562, "loss": 1.7964, "step": 55721 }, { "epoch": 1.85, "grad_norm": 0.5798779726028442, "learning_rate": 0.00019135322238293375, "loss": 1.7452, "step": 55722 }, { "epoch": 1.85, "grad_norm": 0.5817949771881104, "learning_rate": 0.0001913434798478923, "loss": 1.66, "step": 55723 }, { "epoch": 1.85, "grad_norm": 0.5749493837356567, "learning_rate": 0.0001913337374447439, "loss": 1.6879, "step": 55724 }, { "epoch": 1.85, "grad_norm": 0.5939337611198425, "learning_rate": 0.00019132399517350046, "loss": 1.718, "step": 55725 }, { "epoch": 1.85, "grad_norm": 0.5904415249824524, "learning_rate": 0.00019131425303417355, "loss": 1.7633, "step": 55726 }, { "epoch": 1.85, "grad_norm": 0.5920633673667908, "learning_rate": 0.00019130451102677515, "loss": 1.7703, "step": 55727 }, { "epoch": 1.85, "grad_norm": 0.5644052624702454, "learning_rate": 0.00019129476915131697, "loss": 1.6966, "step": 55728 }, { "epoch": 1.85, "grad_norm": 0.5852410197257996, "learning_rate": 0.0001912850274078111, "loss": 1.7665, "step": 55729 }, { "epoch": 1.85, "grad_norm": 0.6043687462806702, "learning_rate": 0.00019127528579626906, "loss": 1.7221, "step": 55730 }, { "epoch": 1.85, "grad_norm": 0.5884010195732117, "learning_rate": 0.00019126554431670285, "loss": 1.7755, "step": 55731 }, { "epoch": 1.85, "grad_norm": 0.6086695790290833, "learning_rate": 0.00019125580296912425, "loss": 1.7979, "step": 55732 }, { "epoch": 1.85, "grad_norm": 0.5623875856399536, "learning_rate": 0.0001912460617535451, "loss": 1.7333, "step": 55733 }, { "epoch": 1.85, "grad_norm": 0.5772852897644043, "learning_rate": 0.00019123632066997718, "loss": 1.6721, "step": 55734 }, { "epoch": 1.85, "grad_norm": 0.5896614193916321, "learning_rate": 0.0001912265797184324, "loss": 1.6902, "step": 55735 }, { "epoch": 1.85, "grad_norm": 0.5831125974655151, "learning_rate": 0.0001912168388989224, "loss": 1.7713, "step": 55736 }, { "epoch": 1.85, "grad_norm": 0.6031892895698547, "learning_rate": 0.0001912070982114592, "loss": 1.7275, "step": 55737 }, { "epoch": 1.85, "grad_norm": 0.6012179255485535, "learning_rate": 0.00019119735765605453, "loss": 1.6961, "step": 55738 }, { "epoch": 1.85, "grad_norm": 0.553027868270874, "learning_rate": 0.00019118761723272028, "loss": 1.6728, "step": 55739 }, { "epoch": 1.85, "grad_norm": 0.595710039138794, "learning_rate": 0.0001911778769414682, "loss": 1.6992, "step": 55740 }, { "epoch": 1.85, "grad_norm": 0.6074065566062927, "learning_rate": 0.00019116813678231017, "loss": 1.7554, "step": 55741 }, { "epoch": 1.85, "grad_norm": 0.5815228819847107, "learning_rate": 0.00019115839675525802, "loss": 1.6845, "step": 55742 }, { "epoch": 1.85, "grad_norm": 0.5871919393539429, "learning_rate": 0.0001911486568603234, "loss": 1.7414, "step": 55743 }, { "epoch": 1.85, "grad_norm": 0.6002357006072998, "learning_rate": 0.00019113891709751835, "loss": 1.7348, "step": 55744 }, { "epoch": 1.85, "grad_norm": 0.581087589263916, "learning_rate": 0.00019112917746685467, "loss": 1.6611, "step": 55745 }, { "epoch": 1.85, "grad_norm": 0.5804200172424316, "learning_rate": 0.00019111943796834406, "loss": 1.7038, "step": 55746 }, { "epoch": 1.85, "grad_norm": 0.5954979062080383, "learning_rate": 0.00019110969860199842, "loss": 1.7229, "step": 55747 }, { "epoch": 1.85, "grad_norm": 0.5646798014640808, "learning_rate": 0.00019109995936782956, "loss": 1.6982, "step": 55748 }, { "epoch": 1.85, "grad_norm": 0.5718303322792053, "learning_rate": 0.00019109022026584938, "loss": 1.7137, "step": 55749 }, { "epoch": 1.85, "grad_norm": 0.5810360908508301, "learning_rate": 0.00019108048129606948, "loss": 1.727, "step": 55750 }, { "epoch": 1.85, "grad_norm": 0.5676327347755432, "learning_rate": 0.00019107074245850193, "loss": 1.7498, "step": 55751 }, { "epoch": 1.85, "grad_norm": 0.588439404964447, "learning_rate": 0.00019106100375315845, "loss": 1.6996, "step": 55752 }, { "epoch": 1.85, "grad_norm": 0.6354576945304871, "learning_rate": 0.00019105126518005085, "loss": 1.6533, "step": 55753 }, { "epoch": 1.85, "grad_norm": 0.5957160592079163, "learning_rate": 0.00019104152673919092, "loss": 1.8063, "step": 55754 }, { "epoch": 1.85, "grad_norm": 0.5885326862335205, "learning_rate": 0.00019103178843059063, "loss": 1.7671, "step": 55755 }, { "epoch": 1.86, "grad_norm": 0.5556403994560242, "learning_rate": 0.00019102205025426156, "loss": 1.6767, "step": 55756 }, { "epoch": 1.86, "grad_norm": 0.5814053416252136, "learning_rate": 0.0001910123122102157, "loss": 1.7789, "step": 55757 }, { "epoch": 1.86, "grad_norm": 0.5818728804588318, "learning_rate": 0.0001910025742984648, "loss": 1.771, "step": 55758 }, { "epoch": 1.86, "grad_norm": 0.5855423808097839, "learning_rate": 0.00019099283651902082, "loss": 1.7289, "step": 55759 }, { "epoch": 1.86, "grad_norm": 0.578927755355835, "learning_rate": 0.00019098309887189544, "loss": 1.7746, "step": 55760 }, { "epoch": 1.86, "grad_norm": 0.5757154226303101, "learning_rate": 0.00019097336135710045, "loss": 1.679, "step": 55761 }, { "epoch": 1.86, "grad_norm": 0.567695677280426, "learning_rate": 0.00019096362397464785, "loss": 1.6524, "step": 55762 }, { "epoch": 1.86, "grad_norm": 0.5681609511375427, "learning_rate": 0.00019095388672454928, "loss": 1.7712, "step": 55763 }, { "epoch": 1.86, "grad_norm": 0.5815587043762207, "learning_rate": 0.00019094414960681654, "loss": 1.7513, "step": 55764 }, { "epoch": 1.86, "grad_norm": 0.5929045677185059, "learning_rate": 0.0001909344126214617, "loss": 1.7342, "step": 55765 }, { "epoch": 1.86, "grad_norm": 0.579643726348877, "learning_rate": 0.00019092467576849626, "loss": 1.6958, "step": 55766 }, { "epoch": 1.86, "grad_norm": 0.5722929239273071, "learning_rate": 0.00019091493904793225, "loss": 1.6264, "step": 55767 }, { "epoch": 1.86, "grad_norm": 0.5942294001579285, "learning_rate": 0.00019090520245978145, "loss": 1.785, "step": 55768 }, { "epoch": 1.86, "grad_norm": 0.5665941834449768, "learning_rate": 0.00019089546600405572, "loss": 1.6951, "step": 55769 }, { "epoch": 1.86, "grad_norm": 0.6131893992424011, "learning_rate": 0.00019088572968076672, "loss": 1.7309, "step": 55770 }, { "epoch": 1.86, "grad_norm": 0.593862771987915, "learning_rate": 0.0001908759934899263, "loss": 1.7555, "step": 55771 }, { "epoch": 1.86, "grad_norm": 0.5826038122177124, "learning_rate": 0.0001908662574315466, "loss": 1.712, "step": 55772 }, { "epoch": 1.86, "grad_norm": 0.6160188317298889, "learning_rate": 0.00019085652150563893, "loss": 1.6795, "step": 55773 }, { "epoch": 1.86, "grad_norm": 0.6047206521034241, "learning_rate": 0.00019084678571221545, "loss": 1.7396, "step": 55774 }, { "epoch": 1.86, "grad_norm": 0.5596440434455872, "learning_rate": 0.00019083705005128795, "loss": 1.7191, "step": 55775 }, { "epoch": 1.86, "grad_norm": 0.5784814953804016, "learning_rate": 0.0001908273145228681, "loss": 1.667, "step": 55776 }, { "epoch": 1.86, "grad_norm": 0.6209843754768372, "learning_rate": 0.00019081757912696784, "loss": 1.7871, "step": 55777 }, { "epoch": 1.86, "grad_norm": 0.5760023593902588, "learning_rate": 0.00019080784386359888, "loss": 1.7412, "step": 55778 }, { "epoch": 1.86, "grad_norm": 0.580808162689209, "learning_rate": 0.0001907981087327733, "loss": 1.7312, "step": 55779 }, { "epoch": 1.86, "grad_norm": 0.5747092366218567, "learning_rate": 0.00019078837373450252, "loss": 1.7567, "step": 55780 }, { "epoch": 1.86, "grad_norm": 0.5903196334838867, "learning_rate": 0.0001907786388687986, "loss": 1.7548, "step": 55781 }, { "epoch": 1.86, "grad_norm": 0.5696196556091309, "learning_rate": 0.00019076890413567343, "loss": 1.8061, "step": 55782 }, { "epoch": 1.86, "grad_norm": 0.5694291591644287, "learning_rate": 0.00019075916953513868, "loss": 1.7378, "step": 55783 }, { "epoch": 1.86, "grad_norm": 0.58619624376297, "learning_rate": 0.00019074943506720615, "loss": 1.6948, "step": 55784 }, { "epoch": 1.86, "grad_norm": 0.5670124292373657, "learning_rate": 0.00019073970073188776, "loss": 1.7157, "step": 55785 }, { "epoch": 1.86, "grad_norm": 0.5817989110946655, "learning_rate": 0.00019072996652919524, "loss": 1.7629, "step": 55786 }, { "epoch": 1.86, "grad_norm": 0.5889257788658142, "learning_rate": 0.00019072023245914036, "loss": 1.8417, "step": 55787 }, { "epoch": 1.86, "grad_norm": 0.5918568968772888, "learning_rate": 0.00019071049852173512, "loss": 1.7259, "step": 55788 }, { "epoch": 1.86, "grad_norm": 0.571004331111908, "learning_rate": 0.00019070076471699126, "loss": 1.7157, "step": 55789 }, { "epoch": 1.86, "grad_norm": 0.5785248279571533, "learning_rate": 0.0001906910310449205, "loss": 1.7405, "step": 55790 }, { "epoch": 1.86, "grad_norm": 0.6098780632019043, "learning_rate": 0.00019068129750553475, "loss": 1.7709, "step": 55791 }, { "epoch": 1.86, "grad_norm": 0.5958356857299805, "learning_rate": 0.00019067156409884585, "loss": 1.7953, "step": 55792 }, { "epoch": 1.86, "grad_norm": 0.586169421672821, "learning_rate": 0.00019066183082486549, "loss": 1.7494, "step": 55793 }, { "epoch": 1.86, "grad_norm": 0.5925601720809937, "learning_rate": 0.00019065209768360548, "loss": 1.7262, "step": 55794 }, { "epoch": 1.86, "grad_norm": 0.5877497792243958, "learning_rate": 0.00019064236467507793, "loss": 1.7054, "step": 55795 }, { "epoch": 1.86, "grad_norm": 0.6175898313522339, "learning_rate": 0.00019063263179929424, "loss": 1.7238, "step": 55796 }, { "epoch": 1.86, "grad_norm": 0.5741641521453857, "learning_rate": 0.0001906228990562665, "loss": 1.7991, "step": 55797 }, { "epoch": 1.86, "grad_norm": 0.5913159847259521, "learning_rate": 0.0001906131664460064, "loss": 1.8083, "step": 55798 }, { "epoch": 1.86, "grad_norm": 0.5657839775085449, "learning_rate": 0.00019060343396852587, "loss": 1.7386, "step": 55799 }, { "epoch": 1.86, "grad_norm": 0.5913501977920532, "learning_rate": 0.00019059370162383664, "loss": 1.7019, "step": 55800 }, { "epoch": 1.86, "grad_norm": 0.5876001715660095, "learning_rate": 0.0001905839694119504, "loss": 1.7754, "step": 55801 }, { "epoch": 1.86, "grad_norm": 0.5947432518005371, "learning_rate": 0.00019057423733287936, "loss": 1.7128, "step": 55802 }, { "epoch": 1.86, "grad_norm": 0.5683826804161072, "learning_rate": 0.0001905645053866348, "loss": 1.6801, "step": 55803 }, { "epoch": 1.86, "grad_norm": 0.5675246119499207, "learning_rate": 0.00019055477357322894, "loss": 1.7525, "step": 55804 }, { "epoch": 1.86, "grad_norm": 0.5807996392250061, "learning_rate": 0.00019054504189267352, "loss": 1.7478, "step": 55805 }, { "epoch": 1.86, "grad_norm": 0.5758967995643616, "learning_rate": 0.0001905353103449802, "loss": 1.786, "step": 55806 }, { "epoch": 1.86, "grad_norm": 0.5658926963806152, "learning_rate": 0.0001905255789301609, "loss": 1.7217, "step": 55807 }, { "epoch": 1.86, "grad_norm": 0.5711744427680969, "learning_rate": 0.00019051584764822737, "loss": 1.7646, "step": 55808 }, { "epoch": 1.86, "grad_norm": 0.5678184032440186, "learning_rate": 0.0001905061164991916, "loss": 1.7216, "step": 55809 }, { "epoch": 1.86, "grad_norm": 0.5753610730171204, "learning_rate": 0.00019049638548306513, "loss": 1.7209, "step": 55810 }, { "epoch": 1.86, "grad_norm": 0.5657088756561279, "learning_rate": 0.00019048665459985993, "loss": 1.743, "step": 55811 }, { "epoch": 1.86, "grad_norm": 0.5638036727905273, "learning_rate": 0.00019047692384958792, "loss": 1.6324, "step": 55812 }, { "epoch": 1.86, "grad_norm": 0.5842374563217163, "learning_rate": 0.0001904671932322607, "loss": 1.7286, "step": 55813 }, { "epoch": 1.86, "grad_norm": 0.5697266459465027, "learning_rate": 0.0001904574627478902, "loss": 1.683, "step": 55814 }, { "epoch": 1.86, "grad_norm": 0.5657174587249756, "learning_rate": 0.00019044773239648818, "loss": 1.6971, "step": 55815 }, { "epoch": 1.86, "grad_norm": 0.5790686011314392, "learning_rate": 0.00019043800217806648, "loss": 1.73, "step": 55816 }, { "epoch": 1.86, "grad_norm": 0.5727512836456299, "learning_rate": 0.0001904282720926368, "loss": 1.7996, "step": 55817 }, { "epoch": 1.86, "grad_norm": 0.567496657371521, "learning_rate": 0.00019041854214021115, "loss": 1.7348, "step": 55818 }, { "epoch": 1.86, "grad_norm": 0.5786210894584656, "learning_rate": 0.00019040881232080127, "loss": 1.6923, "step": 55819 }, { "epoch": 1.86, "grad_norm": 0.5881293416023254, "learning_rate": 0.0001903990826344189, "loss": 1.7421, "step": 55820 }, { "epoch": 1.86, "grad_norm": 0.5645156502723694, "learning_rate": 0.0001903893530810759, "loss": 1.6874, "step": 55821 }, { "epoch": 1.86, "grad_norm": 0.5776078701019287, "learning_rate": 0.00019037962366078414, "loss": 1.6747, "step": 55822 }, { "epoch": 1.86, "grad_norm": 0.5556231141090393, "learning_rate": 0.00019036989437355527, "loss": 1.7752, "step": 55823 }, { "epoch": 1.86, "grad_norm": 0.5974785685539246, "learning_rate": 0.00019036016521940116, "loss": 1.6665, "step": 55824 }, { "epoch": 1.86, "grad_norm": 0.5706562995910645, "learning_rate": 0.00019035043619833385, "loss": 1.7227, "step": 55825 }, { "epoch": 1.86, "grad_norm": 0.5775831341743469, "learning_rate": 0.00019034070731036476, "loss": 1.7665, "step": 55826 }, { "epoch": 1.86, "grad_norm": 0.572065532207489, "learning_rate": 0.00019033097855550597, "loss": 1.7695, "step": 55827 }, { "epoch": 1.86, "grad_norm": 0.5724339485168457, "learning_rate": 0.00019032124993376915, "loss": 1.6816, "step": 55828 }, { "epoch": 1.86, "grad_norm": 0.5718979835510254, "learning_rate": 0.00019031152144516632, "loss": 1.7331, "step": 55829 }, { "epoch": 1.86, "grad_norm": 0.5862047672271729, "learning_rate": 0.000190301793089709, "loss": 1.7748, "step": 55830 }, { "epoch": 1.86, "grad_norm": 1.0941828489303589, "learning_rate": 0.00019029206486740913, "loss": 1.788, "step": 55831 }, { "epoch": 1.86, "grad_norm": 0.5628705620765686, "learning_rate": 0.0001902823367782787, "loss": 1.7467, "step": 55832 }, { "epoch": 1.86, "grad_norm": 0.5690284371376038, "learning_rate": 0.00019027260882232918, "loss": 1.7466, "step": 55833 }, { "epoch": 1.86, "grad_norm": 0.596723735332489, "learning_rate": 0.0001902628809995726, "loss": 1.7067, "step": 55834 }, { "epoch": 1.86, "grad_norm": 0.5750383734703064, "learning_rate": 0.00019025315331002067, "loss": 1.6557, "step": 55835 }, { "epoch": 1.86, "grad_norm": 0.7917083501815796, "learning_rate": 0.00019024342575368533, "loss": 1.6931, "step": 55836 }, { "epoch": 1.86, "grad_norm": 0.5826000571250916, "learning_rate": 0.00019023369833057825, "loss": 1.7398, "step": 55837 }, { "epoch": 1.86, "grad_norm": 0.5979496836662292, "learning_rate": 0.00019022397104071124, "loss": 1.7525, "step": 55838 }, { "epoch": 1.86, "grad_norm": 0.5763856172561646, "learning_rate": 0.0001902142438840963, "loss": 1.73, "step": 55839 }, { "epoch": 1.86, "grad_norm": 0.5924685597419739, "learning_rate": 0.00019020451686074493, "loss": 1.8618, "step": 55840 }, { "epoch": 1.86, "grad_norm": 0.56241774559021, "learning_rate": 0.00019019478997066915, "loss": 1.7878, "step": 55841 }, { "epoch": 1.86, "grad_norm": 0.5674213171005249, "learning_rate": 0.0001901850632138808, "loss": 1.7, "step": 55842 }, { "epoch": 1.86, "grad_norm": 0.5893072485923767, "learning_rate": 0.00019017533659039155, "loss": 1.7652, "step": 55843 }, { "epoch": 1.86, "grad_norm": 0.5648804306983948, "learning_rate": 0.0001901656101002132, "loss": 1.697, "step": 55844 }, { "epoch": 1.86, "grad_norm": 0.5779508948326111, "learning_rate": 0.0001901558837433576, "loss": 1.6795, "step": 55845 }, { "epoch": 1.86, "grad_norm": 0.5694790482521057, "learning_rate": 0.00019014615751983667, "loss": 1.7311, "step": 55846 }, { "epoch": 1.86, "grad_norm": 0.5612656474113464, "learning_rate": 0.00019013643142966205, "loss": 1.6865, "step": 55847 }, { "epoch": 1.86, "grad_norm": 0.6040248870849609, "learning_rate": 0.00019012670547284568, "loss": 1.8017, "step": 55848 }, { "epoch": 1.86, "grad_norm": 0.5985047817230225, "learning_rate": 0.0001901169796493993, "loss": 1.7579, "step": 55849 }, { "epoch": 1.86, "grad_norm": 0.5761737823486328, "learning_rate": 0.00019010725395933467, "loss": 1.6978, "step": 55850 }, { "epoch": 1.86, "grad_norm": 0.5943541526794434, "learning_rate": 0.00019009752840266363, "loss": 1.6301, "step": 55851 }, { "epoch": 1.86, "grad_norm": 0.5675605535507202, "learning_rate": 0.00019008780297939809, "loss": 1.7396, "step": 55852 }, { "epoch": 1.86, "grad_norm": 0.5907601714134216, "learning_rate": 0.0001900780776895496, "loss": 1.7547, "step": 55853 }, { "epoch": 1.86, "grad_norm": 0.5862014293670654, "learning_rate": 0.00019006835253313023, "loss": 1.7784, "step": 55854 }, { "epoch": 1.86, "grad_norm": 0.5624400973320007, "learning_rate": 0.00019005862751015167, "loss": 1.7269, "step": 55855 }, { "epoch": 1.86, "grad_norm": 0.60288006067276, "learning_rate": 0.0001900489026206258, "loss": 1.8027, "step": 55856 }, { "epoch": 1.86, "grad_norm": 0.6038835644721985, "learning_rate": 0.0001900391778645643, "loss": 1.718, "step": 55857 }, { "epoch": 1.86, "grad_norm": 0.5708898305892944, "learning_rate": 0.000190029453241979, "loss": 1.6964, "step": 55858 }, { "epoch": 1.86, "grad_norm": 0.5957065224647522, "learning_rate": 0.00019001972875288184, "loss": 1.7578, "step": 55859 }, { "epoch": 1.86, "grad_norm": 0.58771151304245, "learning_rate": 0.00019001000439728438, "loss": 1.7119, "step": 55860 }, { "epoch": 1.86, "grad_norm": 0.5898857712745667, "learning_rate": 0.00019000028017519864, "loss": 1.7819, "step": 55861 }, { "epoch": 1.86, "grad_norm": 0.5659332275390625, "learning_rate": 0.00018999055608663644, "loss": 1.6769, "step": 55862 }, { "epoch": 1.86, "grad_norm": 0.5890876650810242, "learning_rate": 0.0001899808321316094, "loss": 1.8018, "step": 55863 }, { "epoch": 1.86, "grad_norm": 0.5814082026481628, "learning_rate": 0.00018997110831012946, "loss": 1.68, "step": 55864 }, { "epoch": 1.86, "grad_norm": 0.5904160141944885, "learning_rate": 0.00018996138462220834, "loss": 1.6944, "step": 55865 }, { "epoch": 1.86, "grad_norm": 0.5752964019775391, "learning_rate": 0.00018995166106785795, "loss": 1.7679, "step": 55866 }, { "epoch": 1.86, "grad_norm": 0.5899456739425659, "learning_rate": 0.00018994193764708988, "loss": 1.7957, "step": 55867 }, { "epoch": 1.86, "grad_norm": 0.5977876782417297, "learning_rate": 0.0001899322143599162, "loss": 1.7332, "step": 55868 }, { "epoch": 1.86, "grad_norm": 0.5822991728782654, "learning_rate": 0.00018992249120634863, "loss": 1.7285, "step": 55869 }, { "epoch": 1.86, "grad_norm": 0.5775150060653687, "learning_rate": 0.00018991276818639888, "loss": 1.7503, "step": 55870 }, { "epoch": 1.86, "grad_norm": 0.5729264616966248, "learning_rate": 0.00018990304530007882, "loss": 1.7461, "step": 55871 }, { "epoch": 1.86, "grad_norm": 0.5817567110061646, "learning_rate": 0.00018989332254740025, "loss": 1.7679, "step": 55872 }, { "epoch": 1.86, "grad_norm": 0.5825938582420349, "learning_rate": 0.00018988359992837497, "loss": 1.7442, "step": 55873 }, { "epoch": 1.86, "grad_norm": 0.5725772380828857, "learning_rate": 0.00018987387744301467, "loss": 1.7099, "step": 55874 }, { "epoch": 1.86, "grad_norm": 0.569146990776062, "learning_rate": 0.00018986415509133134, "loss": 1.7856, "step": 55875 }, { "epoch": 1.86, "grad_norm": 0.5634744763374329, "learning_rate": 0.00018985443287333672, "loss": 1.7881, "step": 55876 }, { "epoch": 1.86, "grad_norm": 0.5816077589988708, "learning_rate": 0.00018984471078904258, "loss": 1.7938, "step": 55877 }, { "epoch": 1.86, "grad_norm": 0.5783090591430664, "learning_rate": 0.00018983498883846074, "loss": 1.7524, "step": 55878 }, { "epoch": 1.86, "grad_norm": 0.5809846520423889, "learning_rate": 0.00018982526702160298, "loss": 1.7206, "step": 55879 }, { "epoch": 1.86, "grad_norm": 0.5735915899276733, "learning_rate": 0.00018981554533848113, "loss": 1.7142, "step": 55880 }, { "epoch": 1.86, "grad_norm": 0.5757306814193726, "learning_rate": 0.00018980582378910687, "loss": 1.698, "step": 55881 }, { "epoch": 1.86, "grad_norm": 0.5728392601013184, "learning_rate": 0.00018979610237349228, "loss": 1.8496, "step": 55882 }, { "epoch": 1.86, "grad_norm": 0.6033422946929932, "learning_rate": 0.0001897863810916488, "loss": 1.7641, "step": 55883 }, { "epoch": 1.86, "grad_norm": 0.5628284811973572, "learning_rate": 0.00018977665994358847, "loss": 1.7485, "step": 55884 }, { "epoch": 1.86, "grad_norm": 0.6147921681404114, "learning_rate": 0.00018976693892932306, "loss": 1.7653, "step": 55885 }, { "epoch": 1.86, "grad_norm": 0.5952851176261902, "learning_rate": 0.00018975721804886443, "loss": 1.7462, "step": 55886 }, { "epoch": 1.86, "grad_norm": 0.5741626620292664, "learning_rate": 0.00018974749730222418, "loss": 1.8366, "step": 55887 }, { "epoch": 1.86, "grad_norm": 0.566117525100708, "learning_rate": 0.00018973777668941416, "loss": 1.7279, "step": 55888 }, { "epoch": 1.86, "grad_norm": 0.5943552255630493, "learning_rate": 0.0001897280562104465, "loss": 1.7465, "step": 55889 }, { "epoch": 1.86, "grad_norm": 0.5836990475654602, "learning_rate": 0.00018971833586533247, "loss": 1.6988, "step": 55890 }, { "epoch": 1.86, "grad_norm": 0.5922739505767822, "learning_rate": 0.00018970861565408418, "loss": 1.7116, "step": 55891 }, { "epoch": 1.86, "grad_norm": 0.5876060724258423, "learning_rate": 0.00018969889557671348, "loss": 1.7235, "step": 55892 }, { "epoch": 1.86, "grad_norm": 0.5870820879936218, "learning_rate": 0.000189689175633232, "loss": 1.7059, "step": 55893 }, { "epoch": 1.86, "grad_norm": 0.610869288444519, "learning_rate": 0.00018967945582365164, "loss": 1.729, "step": 55894 }, { "epoch": 1.86, "grad_norm": 0.5639597177505493, "learning_rate": 0.00018966973614798404, "loss": 1.7893, "step": 55895 }, { "epoch": 1.86, "grad_norm": 0.579980731010437, "learning_rate": 0.00018966001660624134, "loss": 1.743, "step": 55896 }, { "epoch": 1.86, "grad_norm": 0.5897050499916077, "learning_rate": 0.00018965029719843494, "loss": 1.7116, "step": 55897 }, { "epoch": 1.86, "grad_norm": 0.5729696154594421, "learning_rate": 0.00018964057792457688, "loss": 1.7361, "step": 55898 }, { "epoch": 1.86, "grad_norm": 1.1447874307632446, "learning_rate": 0.0001896308587846789, "loss": 1.7198, "step": 55899 }, { "epoch": 1.86, "grad_norm": 0.5800186395645142, "learning_rate": 0.00018962113977875284, "loss": 1.6824, "step": 55900 }, { "epoch": 1.86, "grad_norm": 0.6100151538848877, "learning_rate": 0.0001896114209068104, "loss": 1.6916, "step": 55901 }, { "epoch": 1.86, "grad_norm": 0.5929461121559143, "learning_rate": 0.0001896017021688635, "loss": 1.7426, "step": 55902 }, { "epoch": 1.86, "grad_norm": 0.581712543964386, "learning_rate": 0.0001895919835649238, "loss": 1.7285, "step": 55903 }, { "epoch": 1.86, "grad_norm": 0.5708261132240295, "learning_rate": 0.0001895822650950031, "loss": 1.7259, "step": 55904 }, { "epoch": 1.86, "grad_norm": 0.5679064989089966, "learning_rate": 0.0001895725467591133, "loss": 1.7431, "step": 55905 }, { "epoch": 1.86, "grad_norm": 0.5842376947402954, "learning_rate": 0.00018956282855726623, "loss": 1.7401, "step": 55906 }, { "epoch": 1.86, "grad_norm": 0.5765219330787659, "learning_rate": 0.00018955311048947357, "loss": 1.731, "step": 55907 }, { "epoch": 1.86, "grad_norm": 0.5883512496948242, "learning_rate": 0.00018954339255574716, "loss": 1.7293, "step": 55908 }, { "epoch": 1.86, "grad_norm": 0.5836966037750244, "learning_rate": 0.00018953367475609884, "loss": 1.784, "step": 55909 }, { "epoch": 1.86, "grad_norm": 0.5951349139213562, "learning_rate": 0.00018952395709054035, "loss": 1.6964, "step": 55910 }, { "epoch": 1.86, "grad_norm": 0.5746804475784302, "learning_rate": 0.0001895142395590834, "loss": 1.7279, "step": 55911 }, { "epoch": 1.86, "grad_norm": 0.5812745690345764, "learning_rate": 0.00018950452216174004, "loss": 1.8045, "step": 55912 }, { "epoch": 1.86, "grad_norm": 0.5722911953926086, "learning_rate": 0.00018949480489852176, "loss": 1.7303, "step": 55913 }, { "epoch": 1.86, "grad_norm": 0.595069944858551, "learning_rate": 0.0001894850877694406, "loss": 1.6975, "step": 55914 }, { "epoch": 1.86, "grad_norm": 0.5857588648796082, "learning_rate": 0.0001894753707745082, "loss": 1.7585, "step": 55915 }, { "epoch": 1.86, "grad_norm": 0.582760214805603, "learning_rate": 0.0001894656539137365, "loss": 1.7732, "step": 55916 }, { "epoch": 1.86, "grad_norm": 0.5647737383842468, "learning_rate": 0.00018945593718713713, "loss": 1.7225, "step": 55917 }, { "epoch": 1.86, "grad_norm": 0.5557015538215637, "learning_rate": 0.00018944622059472192, "loss": 1.7345, "step": 55918 }, { "epoch": 1.86, "grad_norm": 0.588629961013794, "learning_rate": 0.00018943650413650288, "loss": 1.7312, "step": 55919 }, { "epoch": 1.86, "grad_norm": 0.6026501059532166, "learning_rate": 0.00018942678781249146, "loss": 1.7524, "step": 55920 }, { "epoch": 1.86, "grad_norm": 0.5628169178962708, "learning_rate": 0.00018941707162269973, "loss": 1.6603, "step": 55921 }, { "epoch": 1.86, "grad_norm": 0.5855769515037537, "learning_rate": 0.0001894073555671394, "loss": 1.7174, "step": 55922 }, { "epoch": 1.86, "grad_norm": 0.5684745907783508, "learning_rate": 0.00018939763964582215, "loss": 1.667, "step": 55923 }, { "epoch": 1.86, "grad_norm": 0.5653365254402161, "learning_rate": 0.00018938792385875993, "loss": 1.7108, "step": 55924 }, { "epoch": 1.86, "grad_norm": 0.5703812837600708, "learning_rate": 0.00018937820820596435, "loss": 1.7172, "step": 55925 }, { "epoch": 1.86, "grad_norm": 0.5895806550979614, "learning_rate": 0.00018936849268744757, "loss": 1.8148, "step": 55926 }, { "epoch": 1.86, "grad_norm": 0.5877040028572083, "learning_rate": 0.00018935877730322096, "loss": 1.7023, "step": 55927 }, { "epoch": 1.86, "grad_norm": 0.5713939666748047, "learning_rate": 0.00018934906205329648, "loss": 1.7542, "step": 55928 }, { "epoch": 1.86, "grad_norm": 0.5965158343315125, "learning_rate": 0.00018933934693768606, "loss": 1.7348, "step": 55929 }, { "epoch": 1.86, "grad_norm": 0.5966594815254211, "learning_rate": 0.0001893296319564013, "loss": 1.7403, "step": 55930 }, { "epoch": 1.86, "grad_norm": 0.5701469779014587, "learning_rate": 0.00018931991710945402, "loss": 1.6832, "step": 55931 }, { "epoch": 1.86, "grad_norm": 0.5559270977973938, "learning_rate": 0.00018931020239685615, "loss": 1.747, "step": 55932 }, { "epoch": 1.86, "grad_norm": 0.5889892578125, "learning_rate": 0.00018930048781861931, "loss": 1.7636, "step": 55933 }, { "epoch": 1.86, "grad_norm": 0.5967998504638672, "learning_rate": 0.00018929077337475532, "loss": 1.7733, "step": 55934 }, { "epoch": 1.86, "grad_norm": 0.5758049488067627, "learning_rate": 0.00018928105906527607, "loss": 1.734, "step": 55935 }, { "epoch": 1.86, "grad_norm": 0.5702003240585327, "learning_rate": 0.00018927134489019336, "loss": 1.7333, "step": 55936 }, { "epoch": 1.86, "grad_norm": 0.5935834646224976, "learning_rate": 0.0001892616308495189, "loss": 1.7566, "step": 55937 }, { "epoch": 1.86, "grad_norm": 0.5898721814155579, "learning_rate": 0.00018925191694326444, "loss": 1.7308, "step": 55938 }, { "epoch": 1.86, "grad_norm": 0.5897097587585449, "learning_rate": 0.0001892422031714419, "loss": 1.7683, "step": 55939 }, { "epoch": 1.86, "grad_norm": 0.6104775071144104, "learning_rate": 0.00018923248953406295, "loss": 1.7355, "step": 55940 }, { "epoch": 1.86, "grad_norm": 0.6048377752304077, "learning_rate": 0.00018922277603113943, "loss": 1.6408, "step": 55941 }, { "epoch": 1.86, "grad_norm": 0.5954821109771729, "learning_rate": 0.00018921306266268326, "loss": 1.6846, "step": 55942 }, { "epoch": 1.86, "grad_norm": 0.5912178158760071, "learning_rate": 0.00018920334942870598, "loss": 1.794, "step": 55943 }, { "epoch": 1.86, "grad_norm": 0.5716065168380737, "learning_rate": 0.00018919363632921953, "loss": 1.7787, "step": 55944 }, { "epoch": 1.86, "grad_norm": 0.56490558385849, "learning_rate": 0.00018918392336423568, "loss": 1.7197, "step": 55945 }, { "epoch": 1.86, "grad_norm": 0.5986593961715698, "learning_rate": 0.00018917421053376633, "loss": 1.7072, "step": 55946 }, { "epoch": 1.86, "grad_norm": 0.6149312257766724, "learning_rate": 0.00018916449783782302, "loss": 1.7193, "step": 55947 }, { "epoch": 1.86, "grad_norm": 0.6057956218719482, "learning_rate": 0.00018915478527641765, "loss": 1.6575, "step": 55948 }, { "epoch": 1.86, "grad_norm": 0.5765491127967834, "learning_rate": 0.00018914507284956223, "loss": 1.7822, "step": 55949 }, { "epoch": 1.86, "grad_norm": 0.5788861513137817, "learning_rate": 0.00018913536055726818, "loss": 1.7281, "step": 55950 }, { "epoch": 1.86, "grad_norm": 0.5840628743171692, "learning_rate": 0.0001891256483995475, "loss": 1.7486, "step": 55951 }, { "epoch": 1.86, "grad_norm": 0.6031345129013062, "learning_rate": 0.000189115936376412, "loss": 1.7513, "step": 55952 }, { "epoch": 1.86, "grad_norm": 0.5955947041511536, "learning_rate": 0.00018910622448787341, "loss": 1.7306, "step": 55953 }, { "epoch": 1.86, "grad_norm": 0.5697970390319824, "learning_rate": 0.00018909651273394347, "loss": 1.7154, "step": 55954 }, { "epoch": 1.86, "grad_norm": 0.5975294709205627, "learning_rate": 0.000189086801114634, "loss": 1.7102, "step": 55955 }, { "epoch": 1.86, "grad_norm": 0.572646975517273, "learning_rate": 0.00018907708962995692, "loss": 1.7425, "step": 55956 }, { "epoch": 1.86, "grad_norm": 0.5853056907653809, "learning_rate": 0.00018906737827992383, "loss": 1.7517, "step": 55957 }, { "epoch": 1.86, "grad_norm": 0.5714492201805115, "learning_rate": 0.00018905766706454666, "loss": 1.685, "step": 55958 }, { "epoch": 1.86, "grad_norm": 0.6015294790267944, "learning_rate": 0.00018904795598383714, "loss": 1.702, "step": 55959 }, { "epoch": 1.86, "grad_norm": 0.5912420153617859, "learning_rate": 0.000189038245037807, "loss": 1.7033, "step": 55960 }, { "epoch": 1.86, "grad_norm": 0.5718099474906921, "learning_rate": 0.0001890285342264681, "loss": 1.7385, "step": 55961 }, { "epoch": 1.86, "grad_norm": 0.601494312286377, "learning_rate": 0.00018901882354983214, "loss": 1.7788, "step": 55962 }, { "epoch": 1.86, "grad_norm": 0.6005257964134216, "learning_rate": 0.00018900911300791107, "loss": 1.7791, "step": 55963 }, { "epoch": 1.86, "grad_norm": 0.5882067680358887, "learning_rate": 0.00018899940260071654, "loss": 1.754, "step": 55964 }, { "epoch": 1.86, "grad_norm": 0.5827626585960388, "learning_rate": 0.0001889896923282604, "loss": 1.8116, "step": 55965 }, { "epoch": 1.86, "grad_norm": 0.5711203813552856, "learning_rate": 0.00018897998219055443, "loss": 1.7458, "step": 55966 }, { "epoch": 1.86, "grad_norm": 0.5673489570617676, "learning_rate": 0.0001889702721876104, "loss": 1.7568, "step": 55967 }, { "epoch": 1.86, "grad_norm": 0.5678947567939758, "learning_rate": 0.00018896056231944007, "loss": 1.7669, "step": 55968 }, { "epoch": 1.86, "grad_norm": 0.596502423286438, "learning_rate": 0.00018895085258605532, "loss": 1.719, "step": 55969 }, { "epoch": 1.86, "grad_norm": 0.5602728724479675, "learning_rate": 0.00018894114298746776, "loss": 1.7002, "step": 55970 }, { "epoch": 1.86, "grad_norm": 0.5962790250778198, "learning_rate": 0.00018893143352368932, "loss": 1.6615, "step": 55971 }, { "epoch": 1.86, "grad_norm": 0.5627060532569885, "learning_rate": 0.0001889217241947318, "loss": 1.7383, "step": 55972 }, { "epoch": 1.86, "grad_norm": 0.5774224400520325, "learning_rate": 0.00018891201500060697, "loss": 1.7125, "step": 55973 }, { "epoch": 1.86, "grad_norm": 0.5884135365486145, "learning_rate": 0.00018890230594132651, "loss": 1.7384, "step": 55974 }, { "epoch": 1.86, "grad_norm": 0.578686535358429, "learning_rate": 0.00018889259701690233, "loss": 1.7405, "step": 55975 }, { "epoch": 1.86, "grad_norm": 0.5773381590843201, "learning_rate": 0.0001888828882273462, "loss": 1.7226, "step": 55976 }, { "epoch": 1.86, "grad_norm": 0.5797662734985352, "learning_rate": 0.00018887317957266971, "loss": 1.665, "step": 55977 }, { "epoch": 1.86, "grad_norm": 0.5776336193084717, "learning_rate": 0.00018886347105288496, "loss": 1.735, "step": 55978 }, { "epoch": 1.86, "grad_norm": 0.5869444012641907, "learning_rate": 0.00018885376266800354, "loss": 1.6954, "step": 55979 }, { "epoch": 1.86, "grad_norm": 0.6020947098731995, "learning_rate": 0.00018884405441803727, "loss": 1.7096, "step": 55980 }, { "epoch": 1.86, "grad_norm": 0.5808149576187134, "learning_rate": 0.00018883434630299794, "loss": 1.6283, "step": 55981 }, { "epoch": 1.86, "grad_norm": 0.5824756026268005, "learning_rate": 0.00018882463832289726, "loss": 1.7387, "step": 55982 }, { "epoch": 1.86, "grad_norm": 0.5673008561134338, "learning_rate": 0.00018881493047774733, "loss": 1.6773, "step": 55983 }, { "epoch": 1.86, "grad_norm": 0.5769721865653992, "learning_rate": 0.0001888052227675594, "loss": 1.6853, "step": 55984 }, { "epoch": 1.86, "grad_norm": 0.5806927680969238, "learning_rate": 0.00018879551519234566, "loss": 1.7056, "step": 55985 }, { "epoch": 1.86, "grad_norm": 0.6211197376251221, "learning_rate": 0.00018878580775211787, "loss": 1.7014, "step": 55986 }, { "epoch": 1.86, "grad_norm": 0.5732706189155579, "learning_rate": 0.00018877610044688763, "loss": 1.7527, "step": 55987 }, { "epoch": 1.86, "grad_norm": 0.5862782597541809, "learning_rate": 0.0001887663932766668, "loss": 1.7396, "step": 55988 }, { "epoch": 1.86, "grad_norm": 0.5775646567344666, "learning_rate": 0.00018875668624146726, "loss": 1.6454, "step": 55989 }, { "epoch": 1.86, "grad_norm": 0.5964394211769104, "learning_rate": 0.00018874697934130068, "loss": 1.7343, "step": 55990 }, { "epoch": 1.86, "grad_norm": 0.5915969610214233, "learning_rate": 0.0001887372725761788, "loss": 1.751, "step": 55991 }, { "epoch": 1.86, "grad_norm": 0.6017529368400574, "learning_rate": 0.0001887275659461135, "loss": 1.7363, "step": 55992 }, { "epoch": 1.86, "grad_norm": 0.5597505569458008, "learning_rate": 0.00018871785945111664, "loss": 1.7358, "step": 55993 }, { "epoch": 1.86, "grad_norm": 0.5793968439102173, "learning_rate": 0.00018870815309119982, "loss": 1.7856, "step": 55994 }, { "epoch": 1.86, "grad_norm": 0.5723766684532166, "learning_rate": 0.0001886984468663749, "loss": 1.7899, "step": 55995 }, { "epoch": 1.86, "grad_norm": 0.5921076536178589, "learning_rate": 0.00018868874077665373, "loss": 1.7244, "step": 55996 }, { "epoch": 1.86, "grad_norm": 0.5842415690422058, "learning_rate": 0.000188679034822048, "loss": 1.7324, "step": 55997 }, { "epoch": 1.86, "grad_norm": 0.5743157863616943, "learning_rate": 0.00018866932900256944, "loss": 1.706, "step": 55998 }, { "epoch": 1.86, "grad_norm": 0.5664824843406677, "learning_rate": 0.00018865962331823008, "loss": 1.7253, "step": 55999 }, { "epoch": 1.86, "grad_norm": 0.5696239471435547, "learning_rate": 0.00018864991776904134, "loss": 1.6671, "step": 56000 }, { "epoch": 1.86, "grad_norm": 0.5762404203414917, "learning_rate": 0.00018864021235501526, "loss": 1.7165, "step": 56001 }, { "epoch": 1.86, "grad_norm": 0.5746208429336548, "learning_rate": 0.00018863050707616357, "loss": 1.7747, "step": 56002 }, { "epoch": 1.86, "grad_norm": 0.5661672949790955, "learning_rate": 0.00018862080193249807, "loss": 1.6741, "step": 56003 }, { "epoch": 1.86, "grad_norm": 0.5827441811561584, "learning_rate": 0.00018861109692403047, "loss": 1.7058, "step": 56004 }, { "epoch": 1.86, "grad_norm": 0.5833359956741333, "learning_rate": 0.0001886013920507725, "loss": 1.6578, "step": 56005 }, { "epoch": 1.86, "grad_norm": 0.5740552544593811, "learning_rate": 0.00018859168731273626, "loss": 1.7451, "step": 56006 }, { "epoch": 1.86, "grad_norm": 0.5708730220794678, "learning_rate": 0.0001885819827099331, "loss": 1.7807, "step": 56007 }, { "epoch": 1.86, "grad_norm": 0.5927271842956543, "learning_rate": 0.00018857227824237502, "loss": 1.6873, "step": 56008 }, { "epoch": 1.86, "grad_norm": 0.5897649526596069, "learning_rate": 0.00018856257391007387, "loss": 1.7219, "step": 56009 }, { "epoch": 1.86, "grad_norm": 0.5868104100227356, "learning_rate": 0.00018855286971304126, "loss": 1.7805, "step": 56010 }, { "epoch": 1.86, "grad_norm": 0.5728816390037537, "learning_rate": 0.00018854316565128905, "loss": 1.76, "step": 56011 }, { "epoch": 1.86, "grad_norm": 0.5683140754699707, "learning_rate": 0.00018853346172482892, "loss": 1.6727, "step": 56012 }, { "epoch": 1.86, "grad_norm": 0.5755091309547424, "learning_rate": 0.00018852375793367297, "loss": 1.7137, "step": 56013 }, { "epoch": 1.86, "grad_norm": 0.5835175514221191, "learning_rate": 0.00018851405427783253, "loss": 1.7096, "step": 56014 }, { "epoch": 1.86, "grad_norm": 0.6095649003982544, "learning_rate": 0.00018850435075731968, "loss": 1.7182, "step": 56015 }, { "epoch": 1.86, "grad_norm": 0.570095419883728, "learning_rate": 0.00018849464737214622, "loss": 1.6449, "step": 56016 }, { "epoch": 1.86, "grad_norm": 0.5853554010391235, "learning_rate": 0.00018848494412232374, "loss": 1.7601, "step": 56017 }, { "epoch": 1.86, "grad_norm": 0.5690740942955017, "learning_rate": 0.0001884752410078641, "loss": 1.6871, "step": 56018 }, { "epoch": 1.86, "grad_norm": 0.5729517340660095, "learning_rate": 0.00018846553802877913, "loss": 1.7289, "step": 56019 }, { "epoch": 1.86, "grad_norm": 0.5810453295707703, "learning_rate": 0.00018845583518508054, "loss": 1.7191, "step": 56020 }, { "epoch": 1.86, "grad_norm": 0.5726410150527954, "learning_rate": 0.00018844613247678005, "loss": 1.6915, "step": 56021 }, { "epoch": 1.86, "grad_norm": 0.5859336256980896, "learning_rate": 0.00018843642990388958, "loss": 1.7006, "step": 56022 }, { "epoch": 1.86, "grad_norm": 0.587907612323761, "learning_rate": 0.0001884267274664209, "loss": 1.7497, "step": 56023 }, { "epoch": 1.86, "grad_norm": 0.5697144865989685, "learning_rate": 0.0001884170251643857, "loss": 1.6889, "step": 56024 }, { "epoch": 1.86, "grad_norm": 0.5734257698059082, "learning_rate": 0.00018840732299779577, "loss": 1.7792, "step": 56025 }, { "epoch": 1.86, "grad_norm": 0.5921319127082825, "learning_rate": 0.00018839762096666294, "loss": 1.7479, "step": 56026 }, { "epoch": 1.86, "grad_norm": 0.5863007307052612, "learning_rate": 0.00018838791907099895, "loss": 1.7072, "step": 56027 }, { "epoch": 1.86, "grad_norm": 0.6238715052604675, "learning_rate": 0.00018837821731081548, "loss": 1.7907, "step": 56028 }, { "epoch": 1.86, "grad_norm": 0.5712808966636658, "learning_rate": 0.00018836851568612462, "loss": 1.7419, "step": 56029 }, { "epoch": 1.86, "grad_norm": 0.5742876529693604, "learning_rate": 0.00018835881419693772, "loss": 1.7264, "step": 56030 }, { "epoch": 1.86, "grad_norm": 0.5951271057128906, "learning_rate": 0.0001883491128432668, "loss": 1.649, "step": 56031 }, { "epoch": 1.86, "grad_norm": 0.5909215211868286, "learning_rate": 0.00018833941162512368, "loss": 1.751, "step": 56032 }, { "epoch": 1.86, "grad_norm": 0.5823991894721985, "learning_rate": 0.0001883297105425201, "loss": 1.8445, "step": 56033 }, { "epoch": 1.86, "grad_norm": 0.5826588273048401, "learning_rate": 0.00018832000959546772, "loss": 1.7429, "step": 56034 }, { "epoch": 1.86, "grad_norm": 0.5707458257675171, "learning_rate": 0.00018831030878397836, "loss": 1.7292, "step": 56035 }, { "epoch": 1.86, "grad_norm": 0.5879969000816345, "learning_rate": 0.000188300608108064, "loss": 1.7601, "step": 56036 }, { "epoch": 1.86, "grad_norm": 0.5923907160758972, "learning_rate": 0.00018829090756773605, "loss": 1.7089, "step": 56037 }, { "epoch": 1.86, "grad_norm": 0.5954000949859619, "learning_rate": 0.00018828120716300653, "loss": 1.6826, "step": 56038 }, { "epoch": 1.86, "grad_norm": 0.5734550356864929, "learning_rate": 0.00018827150689388724, "loss": 1.7364, "step": 56039 }, { "epoch": 1.86, "grad_norm": 0.5960898995399475, "learning_rate": 0.0001882618067603898, "loss": 1.7551, "step": 56040 }, { "epoch": 1.86, "grad_norm": 0.585569441318512, "learning_rate": 0.0001882521067625261, "loss": 1.7324, "step": 56041 }, { "epoch": 1.86, "grad_norm": 0.5912767648696899, "learning_rate": 0.0001882424069003078, "loss": 1.7093, "step": 56042 }, { "epoch": 1.86, "grad_norm": 0.5651892423629761, "learning_rate": 0.0001882327071737469, "loss": 1.7157, "step": 56043 }, { "epoch": 1.86, "grad_norm": 0.5673145651817322, "learning_rate": 0.00018822300758285487, "loss": 1.6596, "step": 56044 }, { "epoch": 1.86, "grad_norm": 0.5887587070465088, "learning_rate": 0.00018821330812764372, "loss": 1.7377, "step": 56045 }, { "epoch": 1.86, "grad_norm": 0.6012347936630249, "learning_rate": 0.00018820360880812518, "loss": 1.7489, "step": 56046 }, { "epoch": 1.86, "grad_norm": 0.5699573159217834, "learning_rate": 0.00018819390962431092, "loss": 1.7282, "step": 56047 }, { "epoch": 1.86, "grad_norm": 0.5716428160667419, "learning_rate": 0.0001881842105762128, "loss": 1.6984, "step": 56048 }, { "epoch": 1.86, "grad_norm": 0.5864100456237793, "learning_rate": 0.00018817451166384263, "loss": 1.8323, "step": 56049 }, { "epoch": 1.86, "grad_norm": 0.5618188977241516, "learning_rate": 0.00018816481288721207, "loss": 1.7353, "step": 56050 }, { "epoch": 1.86, "grad_norm": 0.5824953317642212, "learning_rate": 0.00018815511424633287, "loss": 1.7921, "step": 56051 }, { "epoch": 1.86, "grad_norm": 0.5924299359321594, "learning_rate": 0.00018814541574121692, "loss": 1.7734, "step": 56052 }, { "epoch": 1.86, "grad_norm": 0.5768876075744629, "learning_rate": 0.00018813571737187604, "loss": 1.7218, "step": 56053 }, { "epoch": 1.86, "grad_norm": 0.556729793548584, "learning_rate": 0.0001881260191383219, "loss": 1.7017, "step": 56054 }, { "epoch": 1.86, "grad_norm": 0.5727187395095825, "learning_rate": 0.00018811632104056623, "loss": 1.7344, "step": 56055 }, { "epoch": 1.86, "grad_norm": 0.5614892840385437, "learning_rate": 0.00018810662307862096, "loss": 1.7528, "step": 56056 }, { "epoch": 1.87, "grad_norm": 0.5574355721473694, "learning_rate": 0.00018809692525249768, "loss": 1.7247, "step": 56057 }, { "epoch": 1.87, "grad_norm": 0.559528648853302, "learning_rate": 0.00018808722756220818, "loss": 1.7948, "step": 56058 }, { "epoch": 1.87, "grad_norm": 0.575533926486969, "learning_rate": 0.00018807753000776452, "loss": 1.7711, "step": 56059 }, { "epoch": 1.87, "grad_norm": 0.5675163865089417, "learning_rate": 0.000188067832589178, "loss": 1.6533, "step": 56060 }, { "epoch": 1.87, "grad_norm": 0.6029622554779053, "learning_rate": 0.00018805813530646073, "loss": 1.7055, "step": 56061 }, { "epoch": 1.87, "grad_norm": 0.5694456696510315, "learning_rate": 0.0001880484381596244, "loss": 1.7543, "step": 56062 }, { "epoch": 1.87, "grad_norm": 0.5836997032165527, "learning_rate": 0.0001880387411486808, "loss": 1.745, "step": 56063 }, { "epoch": 1.87, "grad_norm": 0.6090084314346313, "learning_rate": 0.00018802904427364168, "loss": 1.7093, "step": 56064 }, { "epoch": 1.87, "grad_norm": 0.5792859196662903, "learning_rate": 0.00018801934753451868, "loss": 1.8574, "step": 56065 }, { "epoch": 1.87, "grad_norm": 0.5739666223526001, "learning_rate": 0.0001880096509313239, "loss": 1.686, "step": 56066 }, { "epoch": 1.87, "grad_norm": 0.5720750093460083, "learning_rate": 0.0001879999544640687, "loss": 1.7345, "step": 56067 }, { "epoch": 1.87, "grad_norm": 0.5729103684425354, "learning_rate": 0.0001879902581327651, "loss": 1.6981, "step": 56068 }, { "epoch": 1.87, "grad_norm": 0.577822208404541, "learning_rate": 0.00018798056193742487, "loss": 1.7647, "step": 56069 }, { "epoch": 1.87, "grad_norm": 0.6090120077133179, "learning_rate": 0.00018797086587805974, "loss": 1.662, "step": 56070 }, { "epoch": 1.87, "grad_norm": 0.576992928981781, "learning_rate": 0.00018796116995468146, "loss": 1.717, "step": 56071 }, { "epoch": 1.87, "grad_norm": 0.5842545032501221, "learning_rate": 0.00018795147416730173, "loss": 1.7239, "step": 56072 }, { "epoch": 1.87, "grad_norm": 0.6033017635345459, "learning_rate": 0.00018794177851593253, "loss": 1.8363, "step": 56073 }, { "epoch": 1.87, "grad_norm": 0.6044117212295532, "learning_rate": 0.0001879320830005854, "loss": 1.7728, "step": 56074 }, { "epoch": 1.87, "grad_norm": 0.5730369687080383, "learning_rate": 0.00018792238762127227, "loss": 1.753, "step": 56075 }, { "epoch": 1.87, "grad_norm": 0.5818361639976501, "learning_rate": 0.00018791269237800488, "loss": 1.7273, "step": 56076 }, { "epoch": 1.87, "grad_norm": 0.5887333750724792, "learning_rate": 0.0001879029972707949, "loss": 1.7064, "step": 56077 }, { "epoch": 1.87, "grad_norm": 0.6372275948524475, "learning_rate": 0.00018789330229965418, "loss": 1.7154, "step": 56078 }, { "epoch": 1.87, "grad_norm": 0.6309067606925964, "learning_rate": 0.00018788360746459438, "loss": 1.7447, "step": 56079 }, { "epoch": 1.87, "grad_norm": 0.5726636648178101, "learning_rate": 0.0001878739127656275, "loss": 1.7603, "step": 56080 }, { "epoch": 1.87, "grad_norm": 0.5845897793769836, "learning_rate": 0.00018786421820276517, "loss": 1.7529, "step": 56081 }, { "epoch": 1.87, "grad_norm": 0.576913595199585, "learning_rate": 0.00018785452377601905, "loss": 1.753, "step": 56082 }, { "epoch": 1.87, "grad_norm": 0.5990855693817139, "learning_rate": 0.00018784482948540113, "loss": 1.7524, "step": 56083 }, { "epoch": 1.87, "grad_norm": 0.575433075428009, "learning_rate": 0.00018783513533092297, "loss": 1.8072, "step": 56084 }, { "epoch": 1.87, "grad_norm": 0.5837537050247192, "learning_rate": 0.00018782544131259652, "loss": 1.7563, "step": 56085 }, { "epoch": 1.87, "grad_norm": 0.5647026896476746, "learning_rate": 0.00018781574743043344, "loss": 1.6614, "step": 56086 }, { "epoch": 1.87, "grad_norm": 0.5888810753822327, "learning_rate": 0.00018780605368444537, "loss": 1.7103, "step": 56087 }, { "epoch": 1.87, "grad_norm": 0.5826219320297241, "learning_rate": 0.00018779636007464434, "loss": 1.6825, "step": 56088 }, { "epoch": 1.87, "grad_norm": 0.5780286192893982, "learning_rate": 0.00018778666660104195, "loss": 1.7611, "step": 56089 }, { "epoch": 1.87, "grad_norm": 0.5760056972503662, "learning_rate": 0.0001877769732636501, "loss": 1.7598, "step": 56090 }, { "epoch": 1.87, "grad_norm": 0.6202883124351501, "learning_rate": 0.0001877672800624804, "loss": 1.8287, "step": 56091 }, { "epoch": 1.87, "grad_norm": 0.5786629319190979, "learning_rate": 0.0001877575869975447, "loss": 1.6817, "step": 56092 }, { "epoch": 1.87, "grad_norm": 0.6045230627059937, "learning_rate": 0.00018774789406885478, "loss": 1.704, "step": 56093 }, { "epoch": 1.87, "grad_norm": 0.5682886242866516, "learning_rate": 0.00018773820127642227, "loss": 1.745, "step": 56094 }, { "epoch": 1.87, "grad_norm": 0.6014961004257202, "learning_rate": 0.00018772850862025913, "loss": 1.697, "step": 56095 }, { "epoch": 1.87, "grad_norm": 0.5746934413909912, "learning_rate": 0.00018771881610037708, "loss": 1.695, "step": 56096 }, { "epoch": 1.87, "grad_norm": 0.5949756503105164, "learning_rate": 0.00018770912371678778, "loss": 1.7737, "step": 56097 }, { "epoch": 1.87, "grad_norm": 0.5831429958343506, "learning_rate": 0.00018769943146950305, "loss": 1.7201, "step": 56098 }, { "epoch": 1.87, "grad_norm": 0.5963223576545715, "learning_rate": 0.00018768973935853463, "loss": 1.7305, "step": 56099 }, { "epoch": 1.87, "grad_norm": 0.5574605464935303, "learning_rate": 0.00018768004738389446, "loss": 1.6981, "step": 56100 }, { "epoch": 1.87, "grad_norm": 0.5887901782989502, "learning_rate": 0.000187670355545594, "loss": 1.7169, "step": 56101 }, { "epoch": 1.87, "grad_norm": 0.6341869235038757, "learning_rate": 0.00018766066384364523, "loss": 1.7437, "step": 56102 }, { "epoch": 1.87, "grad_norm": 0.5962637662887573, "learning_rate": 0.00018765097227805996, "loss": 1.7221, "step": 56103 }, { "epoch": 1.87, "grad_norm": 0.5697005391120911, "learning_rate": 0.00018764128084884974, "loss": 1.7683, "step": 56104 }, { "epoch": 1.87, "grad_norm": 0.6000767350196838, "learning_rate": 0.0001876315895560265, "loss": 1.7446, "step": 56105 }, { "epoch": 1.87, "grad_norm": 0.6106420159339905, "learning_rate": 0.00018762189839960198, "loss": 1.6369, "step": 56106 }, { "epoch": 1.87, "grad_norm": 0.5957557559013367, "learning_rate": 0.0001876122073795879, "loss": 1.7432, "step": 56107 }, { "epoch": 1.87, "grad_norm": 0.5787166357040405, "learning_rate": 0.0001876025164959959, "loss": 1.707, "step": 56108 }, { "epoch": 1.87, "grad_norm": 0.569670557975769, "learning_rate": 0.00018759282574883803, "loss": 1.7449, "step": 56109 }, { "epoch": 1.87, "grad_norm": 0.5748127698898315, "learning_rate": 0.0001875831351381259, "loss": 1.7503, "step": 56110 }, { "epoch": 1.87, "grad_norm": 0.587104320526123, "learning_rate": 0.00018757344466387125, "loss": 1.7009, "step": 56111 }, { "epoch": 1.87, "grad_norm": 0.5759302973747253, "learning_rate": 0.00018756375432608588, "loss": 1.7531, "step": 56112 }, { "epoch": 1.87, "grad_norm": 0.5913043022155762, "learning_rate": 0.00018755406412478162, "loss": 1.7497, "step": 56113 }, { "epoch": 1.87, "grad_norm": 0.5865498781204224, "learning_rate": 0.00018754437405997008, "loss": 1.6984, "step": 56114 }, { "epoch": 1.87, "grad_norm": 0.5861383080482483, "learning_rate": 0.00018753468413166302, "loss": 1.6868, "step": 56115 }, { "epoch": 1.87, "grad_norm": 0.5901643633842468, "learning_rate": 0.0001875249943398725, "loss": 1.8142, "step": 56116 }, { "epoch": 1.87, "grad_norm": 0.5775710344314575, "learning_rate": 0.00018751530468460981, "loss": 1.7163, "step": 56117 }, { "epoch": 1.87, "grad_norm": 0.5551875233650208, "learning_rate": 0.00018750561516588712, "loss": 1.675, "step": 56118 }, { "epoch": 1.87, "grad_norm": 0.5749303102493286, "learning_rate": 0.00018749592578371597, "loss": 1.686, "step": 56119 }, { "epoch": 1.87, "grad_norm": 0.5513176321983337, "learning_rate": 0.00018748623653810833, "loss": 1.6606, "step": 56120 }, { "epoch": 1.87, "grad_norm": 0.6065776944160461, "learning_rate": 0.0001874765474290757, "loss": 1.7778, "step": 56121 }, { "epoch": 1.87, "grad_norm": 0.6062982678413391, "learning_rate": 0.00018746685845662988, "loss": 1.7286, "step": 56122 }, { "epoch": 1.87, "grad_norm": 0.6979096531867981, "learning_rate": 0.00018745716962078292, "loss": 1.7153, "step": 56123 }, { "epoch": 1.87, "grad_norm": 0.5685274600982666, "learning_rate": 0.00018744748092154622, "loss": 1.7621, "step": 56124 }, { "epoch": 1.87, "grad_norm": 0.5834521651268005, "learning_rate": 0.0001874377923589317, "loss": 1.7762, "step": 56125 }, { "epoch": 1.87, "grad_norm": 0.5812793374061584, "learning_rate": 0.00018742810393295123, "loss": 1.7249, "step": 56126 }, { "epoch": 1.87, "grad_norm": 0.5876359343528748, "learning_rate": 0.00018741841564361638, "loss": 1.706, "step": 56127 }, { "epoch": 1.87, "grad_norm": 2.433781385421753, "learning_rate": 0.00018740872749093898, "loss": 1.7216, "step": 56128 }, { "epoch": 1.87, "grad_norm": 0.5785115957260132, "learning_rate": 0.00018739903947493072, "loss": 1.7423, "step": 56129 }, { "epoch": 1.87, "grad_norm": 0.5804533958435059, "learning_rate": 0.00018738935159560362, "loss": 1.7453, "step": 56130 }, { "epoch": 1.87, "grad_norm": 0.5925990343093872, "learning_rate": 0.0001873796638529691, "loss": 1.719, "step": 56131 }, { "epoch": 1.87, "grad_norm": 0.5880982875823975, "learning_rate": 0.0001873699762470391, "loss": 1.7475, "step": 56132 }, { "epoch": 1.87, "grad_norm": 0.5712166428565979, "learning_rate": 0.00018736028877782546, "loss": 1.733, "step": 56133 }, { "epoch": 1.87, "grad_norm": 0.6514493227005005, "learning_rate": 0.00018735060144533974, "loss": 1.7913, "step": 56134 }, { "epoch": 1.87, "grad_norm": 0.5769656896591187, "learning_rate": 0.00018734091424959376, "loss": 1.7761, "step": 56135 }, { "epoch": 1.87, "grad_norm": 0.5942635536193848, "learning_rate": 0.00018733122719059942, "loss": 1.6505, "step": 56136 }, { "epoch": 1.87, "grad_norm": 1.9040451049804688, "learning_rate": 0.0001873215402683683, "loss": 1.7501, "step": 56137 }, { "epoch": 1.87, "grad_norm": 0.6002500653266907, "learning_rate": 0.00018731185348291215, "loss": 1.7818, "step": 56138 }, { "epoch": 1.87, "grad_norm": 0.5867593884468079, "learning_rate": 0.00018730216683424288, "loss": 1.7976, "step": 56139 }, { "epoch": 1.87, "grad_norm": 0.5918721556663513, "learning_rate": 0.00018729248032237222, "loss": 1.7121, "step": 56140 }, { "epoch": 1.87, "grad_norm": 0.6500757336616516, "learning_rate": 0.00018728279394731185, "loss": 1.743, "step": 56141 }, { "epoch": 1.87, "grad_norm": 0.57303386926651, "learning_rate": 0.00018727310770907355, "loss": 1.6844, "step": 56142 }, { "epoch": 1.87, "grad_norm": 0.5906677842140198, "learning_rate": 0.00018726342160766915, "loss": 1.7229, "step": 56143 }, { "epoch": 1.87, "grad_norm": 0.5968078374862671, "learning_rate": 0.00018725373564311025, "loss": 1.7085, "step": 56144 }, { "epoch": 1.87, "grad_norm": 0.6081321835517883, "learning_rate": 0.00018724404981540867, "loss": 1.8376, "step": 56145 }, { "epoch": 1.87, "grad_norm": 0.5808076858520508, "learning_rate": 0.0001872343641245764, "loss": 1.729, "step": 56146 }, { "epoch": 1.87, "grad_norm": 0.5621390342712402, "learning_rate": 0.0001872246785706248, "loss": 1.8007, "step": 56147 }, { "epoch": 1.87, "grad_norm": 0.5851932764053345, "learning_rate": 0.00018721499315356587, "loss": 1.7548, "step": 56148 }, { "epoch": 1.87, "grad_norm": 0.5824102163314819, "learning_rate": 0.0001872053078734113, "loss": 1.7594, "step": 56149 }, { "epoch": 1.87, "grad_norm": 0.5986180901527405, "learning_rate": 0.00018719562273017298, "loss": 1.7576, "step": 56150 }, { "epoch": 1.87, "grad_norm": 0.6088573932647705, "learning_rate": 0.00018718593772386249, "loss": 1.7423, "step": 56151 }, { "epoch": 1.87, "grad_norm": 0.5731558203697205, "learning_rate": 0.00018717625285449155, "loss": 1.6428, "step": 56152 }, { "epoch": 1.87, "grad_norm": 0.5944254398345947, "learning_rate": 0.0001871665681220722, "loss": 1.6535, "step": 56153 }, { "epoch": 1.87, "grad_norm": 0.5850105881690979, "learning_rate": 0.00018715688352661586, "loss": 1.7429, "step": 56154 }, { "epoch": 1.87, "grad_norm": 0.5925225615501404, "learning_rate": 0.00018714719906813448, "loss": 1.7295, "step": 56155 }, { "epoch": 1.87, "grad_norm": 0.5925521850585938, "learning_rate": 0.00018713751474663985, "loss": 1.6907, "step": 56156 }, { "epoch": 1.87, "grad_norm": 0.5887697339057922, "learning_rate": 0.00018712783056214354, "loss": 1.6771, "step": 56157 }, { "epoch": 1.87, "grad_norm": 0.5805655121803284, "learning_rate": 0.00018711814651465746, "loss": 1.6842, "step": 56158 }, { "epoch": 1.87, "grad_norm": 0.5860987901687622, "learning_rate": 0.00018710846260419325, "loss": 1.7479, "step": 56159 }, { "epoch": 1.87, "grad_norm": 0.6121459603309631, "learning_rate": 0.0001870987788307629, "loss": 1.7734, "step": 56160 }, { "epoch": 1.87, "grad_norm": 0.6000816226005554, "learning_rate": 0.0001870890951943778, "loss": 1.7186, "step": 56161 }, { "epoch": 1.87, "grad_norm": 0.6086536645889282, "learning_rate": 0.00018707941169505, "loss": 1.7921, "step": 56162 }, { "epoch": 1.87, "grad_norm": 0.5877619981765747, "learning_rate": 0.0001870697283327912, "loss": 1.6997, "step": 56163 }, { "epoch": 1.87, "grad_norm": 0.5939064621925354, "learning_rate": 0.00018706004510761305, "loss": 1.7039, "step": 56164 }, { "epoch": 1.87, "grad_norm": 0.6147053837776184, "learning_rate": 0.00018705036201952736, "loss": 1.7455, "step": 56165 }, { "epoch": 1.87, "grad_norm": 0.5920335054397583, "learning_rate": 0.00018704067906854597, "loss": 1.6751, "step": 56166 }, { "epoch": 1.87, "grad_norm": 0.5964602828025818, "learning_rate": 0.00018703099625468045, "loss": 1.7469, "step": 56167 }, { "epoch": 1.87, "grad_norm": 0.5693337321281433, "learning_rate": 0.00018702131357794263, "loss": 1.7399, "step": 56168 }, { "epoch": 1.87, "grad_norm": 0.5803206562995911, "learning_rate": 0.00018701163103834436, "loss": 1.7992, "step": 56169 }, { "epoch": 1.87, "grad_norm": 0.5923517942428589, "learning_rate": 0.00018700194863589735, "loss": 1.7384, "step": 56170 }, { "epoch": 1.87, "grad_norm": 0.5823521018028259, "learning_rate": 0.00018699226637061328, "loss": 1.7881, "step": 56171 }, { "epoch": 1.87, "grad_norm": 0.57988440990448, "learning_rate": 0.00018698258424250394, "loss": 1.7927, "step": 56172 }, { "epoch": 1.87, "grad_norm": 0.5898551940917969, "learning_rate": 0.00018697290225158113, "loss": 1.7129, "step": 56173 }, { "epoch": 1.87, "grad_norm": 0.5813463926315308, "learning_rate": 0.00018696322039785657, "loss": 1.7171, "step": 56174 }, { "epoch": 1.87, "grad_norm": 0.584363579750061, "learning_rate": 0.00018695353868134187, "loss": 1.7304, "step": 56175 }, { "epoch": 1.87, "grad_norm": 0.6019347310066223, "learning_rate": 0.00018694385710204913, "loss": 1.6991, "step": 56176 }, { "epoch": 1.87, "grad_norm": 0.556830883026123, "learning_rate": 0.00018693417565998968, "loss": 1.7215, "step": 56177 }, { "epoch": 1.87, "grad_norm": 0.5779988169670105, "learning_rate": 0.0001869244943551756, "loss": 1.8217, "step": 56178 }, { "epoch": 1.87, "grad_norm": 0.5838726162910461, "learning_rate": 0.00018691481318761847, "loss": 1.7402, "step": 56179 }, { "epoch": 1.87, "grad_norm": 0.5860998034477234, "learning_rate": 0.0001869051321573302, "loss": 1.6442, "step": 56180 }, { "epoch": 1.87, "grad_norm": 0.5844873189926147, "learning_rate": 0.00018689545126432232, "loss": 1.7567, "step": 56181 }, { "epoch": 1.87, "grad_norm": 0.5832632184028625, "learning_rate": 0.0001868857705086067, "loss": 1.7698, "step": 56182 }, { "epoch": 1.87, "grad_norm": 0.5758301615715027, "learning_rate": 0.00018687608989019524, "loss": 1.7266, "step": 56183 }, { "epoch": 1.87, "grad_norm": 0.5882052183151245, "learning_rate": 0.00018686640940909934, "loss": 1.6693, "step": 56184 }, { "epoch": 1.87, "grad_norm": 0.5904910564422607, "learning_rate": 0.00018685672906533103, "loss": 1.7095, "step": 56185 }, { "epoch": 1.87, "grad_norm": 0.6002079248428345, "learning_rate": 0.00018684704885890203, "loss": 1.6773, "step": 56186 }, { "epoch": 1.87, "grad_norm": 0.5932360291481018, "learning_rate": 0.00018683736878982398, "loss": 1.8214, "step": 56187 }, { "epoch": 1.87, "grad_norm": 0.6014948487281799, "learning_rate": 0.00018682768885810872, "loss": 1.7439, "step": 56188 }, { "epoch": 1.87, "grad_norm": 0.573660135269165, "learning_rate": 0.00018681800906376788, "loss": 1.7006, "step": 56189 }, { "epoch": 1.87, "grad_norm": 0.5906664729118347, "learning_rate": 0.00018680832940681344, "loss": 1.7592, "step": 56190 }, { "epoch": 1.87, "grad_norm": 0.5995991826057434, "learning_rate": 0.00018679864988725692, "loss": 1.7822, "step": 56191 }, { "epoch": 1.87, "grad_norm": 0.584097146987915, "learning_rate": 0.00018678897050511018, "loss": 1.76, "step": 56192 }, { "epoch": 1.87, "grad_norm": 0.5976606011390686, "learning_rate": 0.000186779291260385, "loss": 1.7247, "step": 56193 }, { "epoch": 1.87, "grad_norm": 0.6314252614974976, "learning_rate": 0.00018676961215309304, "loss": 1.721, "step": 56194 }, { "epoch": 1.87, "grad_norm": 0.5915196537971497, "learning_rate": 0.00018675993318324608, "loss": 1.7978, "step": 56195 }, { "epoch": 1.87, "grad_norm": 0.6080626249313354, "learning_rate": 0.00018675025435085582, "loss": 1.8224, "step": 56196 }, { "epoch": 1.87, "grad_norm": 0.5991207957267761, "learning_rate": 0.00018674057565593415, "loss": 1.7115, "step": 56197 }, { "epoch": 1.87, "grad_norm": 0.5729650855064392, "learning_rate": 0.00018673089709849268, "loss": 1.7499, "step": 56198 }, { "epoch": 1.87, "grad_norm": 0.5805099010467529, "learning_rate": 0.00018672121867854325, "loss": 1.7073, "step": 56199 }, { "epoch": 1.87, "grad_norm": 0.5832701921463013, "learning_rate": 0.0001867115403960976, "loss": 1.7514, "step": 56200 }, { "epoch": 1.87, "grad_norm": 0.5581914186477661, "learning_rate": 0.0001867018622511674, "loss": 1.7274, "step": 56201 }, { "epoch": 1.87, "grad_norm": 2.80611515045166, "learning_rate": 0.00018669218424376447, "loss": 1.7662, "step": 56202 }, { "epoch": 1.87, "grad_norm": 0.5812644362449646, "learning_rate": 0.00018668250637390052, "loss": 1.7463, "step": 56203 }, { "epoch": 1.87, "grad_norm": 0.5767186284065247, "learning_rate": 0.00018667282864158724, "loss": 1.763, "step": 56204 }, { "epoch": 1.87, "grad_norm": 0.592708170413971, "learning_rate": 0.00018666315104683652, "loss": 1.8191, "step": 56205 }, { "epoch": 1.87, "grad_norm": 0.6838870644569397, "learning_rate": 0.00018665347358965998, "loss": 1.6562, "step": 56206 }, { "epoch": 1.87, "grad_norm": 0.626336932182312, "learning_rate": 0.00018664379627006956, "loss": 1.6852, "step": 56207 }, { "epoch": 1.87, "grad_norm": 0.5929291844367981, "learning_rate": 0.00018663411908807678, "loss": 1.7475, "step": 56208 }, { "epoch": 1.87, "grad_norm": 0.6299894452095032, "learning_rate": 0.00018662444204369343, "loss": 1.705, "step": 56209 }, { "epoch": 1.87, "grad_norm": 0.5997822880744934, "learning_rate": 0.0001866147651369314, "loss": 1.7437, "step": 56210 }, { "epoch": 1.87, "grad_norm": 0.5913219451904297, "learning_rate": 0.00018660508836780223, "loss": 1.771, "step": 56211 }, { "epoch": 1.87, "grad_norm": 0.5967040061950684, "learning_rate": 0.00018659541173631782, "loss": 1.7458, "step": 56212 }, { "epoch": 1.87, "grad_norm": 0.5936046242713928, "learning_rate": 0.00018658573524248995, "loss": 1.6763, "step": 56213 }, { "epoch": 1.87, "grad_norm": 0.5980157256126404, "learning_rate": 0.0001865760588863302, "loss": 1.8097, "step": 56214 }, { "epoch": 1.87, "grad_norm": 0.5692189335823059, "learning_rate": 0.0001865663826678504, "loss": 1.7016, "step": 56215 }, { "epoch": 1.87, "grad_norm": 0.5905011892318726, "learning_rate": 0.00018655670658706226, "loss": 1.735, "step": 56216 }, { "epoch": 1.87, "grad_norm": 0.5873668789863586, "learning_rate": 0.00018654703064397769, "loss": 1.6779, "step": 56217 }, { "epoch": 1.87, "grad_norm": 0.5724883079528809, "learning_rate": 0.0001865373548386082, "loss": 1.7257, "step": 56218 }, { "epoch": 1.87, "grad_norm": 0.5507696866989136, "learning_rate": 0.00018652767917096567, "loss": 1.7767, "step": 56219 }, { "epoch": 1.87, "grad_norm": 0.5527318716049194, "learning_rate": 0.00018651800364106186, "loss": 1.7987, "step": 56220 }, { "epoch": 1.87, "grad_norm": 0.5721407532691956, "learning_rate": 0.00018650832824890847, "loss": 1.6378, "step": 56221 }, { "epoch": 1.87, "grad_norm": 0.5917242765426636, "learning_rate": 0.0001864986529945172, "loss": 1.7435, "step": 56222 }, { "epoch": 1.87, "grad_norm": 0.6024255156517029, "learning_rate": 0.0001864889778778999, "loss": 1.7726, "step": 56223 }, { "epoch": 1.87, "grad_norm": 0.5975350737571716, "learning_rate": 0.00018647930289906821, "loss": 1.7464, "step": 56224 }, { "epoch": 1.87, "grad_norm": 0.5831933617591858, "learning_rate": 0.00018646962805803386, "loss": 1.6792, "step": 56225 }, { "epoch": 1.87, "grad_norm": 0.5843349099159241, "learning_rate": 0.0001864599533548087, "loss": 1.7745, "step": 56226 }, { "epoch": 1.87, "grad_norm": 0.6135761141777039, "learning_rate": 0.00018645027878940452, "loss": 1.8515, "step": 56227 }, { "epoch": 1.87, "grad_norm": 0.5839861631393433, "learning_rate": 0.0001864406043618329, "loss": 1.7779, "step": 56228 }, { "epoch": 1.87, "grad_norm": 0.5774019360542297, "learning_rate": 0.00018643093007210564, "loss": 1.7352, "step": 56229 }, { "epoch": 1.87, "grad_norm": 0.5648746490478516, "learning_rate": 0.00018642125592023456, "loss": 1.6671, "step": 56230 }, { "epoch": 1.87, "grad_norm": 0.5812769532203674, "learning_rate": 0.00018641158190623125, "loss": 1.806, "step": 56231 }, { "epoch": 1.87, "grad_norm": 0.5714751482009888, "learning_rate": 0.00018640190803010756, "loss": 1.7993, "step": 56232 }, { "epoch": 1.87, "grad_norm": 0.5998616814613342, "learning_rate": 0.00018639223429187533, "loss": 1.747, "step": 56233 }, { "epoch": 1.87, "grad_norm": 0.5814979076385498, "learning_rate": 0.000186382560691546, "loss": 1.7267, "step": 56234 }, { "epoch": 1.87, "grad_norm": 0.5854993462562561, "learning_rate": 0.0001863728872291316, "loss": 1.6839, "step": 56235 }, { "epoch": 1.87, "grad_norm": 0.5744626522064209, "learning_rate": 0.00018636321390464377, "loss": 1.7301, "step": 56236 }, { "epoch": 1.87, "grad_norm": 0.6215783357620239, "learning_rate": 0.0001863535407180943, "loss": 1.8331, "step": 56237 }, { "epoch": 1.87, "grad_norm": 0.5907004475593567, "learning_rate": 0.00018634386766949485, "loss": 1.6565, "step": 56238 }, { "epoch": 1.87, "grad_norm": 0.5781123638153076, "learning_rate": 0.00018633419475885712, "loss": 1.7295, "step": 56239 }, { "epoch": 1.87, "grad_norm": 0.5657490491867065, "learning_rate": 0.00018632452198619307, "loss": 1.777, "step": 56240 }, { "epoch": 1.87, "grad_norm": 0.5785447955131531, "learning_rate": 0.0001863148493515142, "loss": 1.7678, "step": 56241 }, { "epoch": 1.87, "grad_norm": 0.5984971523284912, "learning_rate": 0.00018630517685483237, "loss": 1.8147, "step": 56242 }, { "epoch": 1.87, "grad_norm": 0.584562361240387, "learning_rate": 0.00018629550449615937, "loss": 1.7984, "step": 56243 }, { "epoch": 1.87, "grad_norm": 0.5674003958702087, "learning_rate": 0.0001862858322755068, "loss": 1.6403, "step": 56244 }, { "epoch": 1.87, "grad_norm": 0.5971959233283997, "learning_rate": 0.0001862761601928865, "loss": 1.7536, "step": 56245 }, { "epoch": 1.87, "grad_norm": 0.7655929327011108, "learning_rate": 0.00018626648824831006, "loss": 1.7143, "step": 56246 }, { "epoch": 1.87, "grad_norm": 0.6063776612281799, "learning_rate": 0.0001862568164417896, "loss": 1.7639, "step": 56247 }, { "epoch": 1.87, "grad_norm": 0.5933383703231812, "learning_rate": 0.00018624714477333642, "loss": 1.7494, "step": 56248 }, { "epoch": 1.87, "grad_norm": 0.5812846422195435, "learning_rate": 0.0001862374732429625, "loss": 1.6785, "step": 56249 }, { "epoch": 1.87, "grad_norm": 0.5926107168197632, "learning_rate": 0.00018622780185067957, "loss": 1.7045, "step": 56250 }, { "epoch": 1.87, "grad_norm": 0.5889684557914734, "learning_rate": 0.00018621813059649926, "loss": 1.7969, "step": 56251 }, { "epoch": 1.87, "grad_norm": 0.5741806030273438, "learning_rate": 0.00018620845948043337, "loss": 1.7227, "step": 56252 }, { "epoch": 1.87, "grad_norm": 0.5855379104614258, "learning_rate": 0.00018619878850249373, "loss": 1.7657, "step": 56253 }, { "epoch": 1.87, "grad_norm": 0.5841487050056458, "learning_rate": 0.00018618911766269192, "loss": 1.7124, "step": 56254 }, { "epoch": 1.87, "grad_norm": 0.567588210105896, "learning_rate": 0.0001861794469610397, "loss": 1.6901, "step": 56255 }, { "epoch": 1.87, "grad_norm": 0.5561389923095703, "learning_rate": 0.0001861697763975489, "loss": 1.6973, "step": 56256 }, { "epoch": 1.87, "grad_norm": 0.5929005146026611, "learning_rate": 0.00018616010597223128, "loss": 1.7268, "step": 56257 }, { "epoch": 1.87, "grad_norm": 0.5871497392654419, "learning_rate": 0.00018615043568509846, "loss": 1.7184, "step": 56258 }, { "epoch": 1.87, "grad_norm": 0.57814621925354, "learning_rate": 0.00018614076553616227, "loss": 1.7415, "step": 56259 }, { "epoch": 1.87, "grad_norm": 0.5699395537376404, "learning_rate": 0.00018613109552543443, "loss": 1.7589, "step": 56260 }, { "epoch": 1.87, "grad_norm": 0.5700502991676331, "learning_rate": 0.00018612142565292664, "loss": 1.7981, "step": 56261 }, { "epoch": 1.87, "grad_norm": 0.5741065740585327, "learning_rate": 0.00018611175591865054, "loss": 1.7629, "step": 56262 }, { "epoch": 1.87, "grad_norm": 0.5870324969291687, "learning_rate": 0.00018610208632261824, "loss": 1.7297, "step": 56263 }, { "epoch": 1.87, "grad_norm": 0.5616134405136108, "learning_rate": 0.000186092416864841, "loss": 1.7274, "step": 56264 }, { "epoch": 1.87, "grad_norm": 0.5828726291656494, "learning_rate": 0.00018608274754533085, "loss": 1.7621, "step": 56265 }, { "epoch": 1.87, "grad_norm": 0.600062906742096, "learning_rate": 0.0001860730783640995, "loss": 1.7037, "step": 56266 }, { "epoch": 1.87, "grad_norm": 0.595464825630188, "learning_rate": 0.00018606340932115866, "loss": 1.7106, "step": 56267 }, { "epoch": 1.87, "grad_norm": 0.5833181738853455, "learning_rate": 0.00018605374041652, "loss": 1.7412, "step": 56268 }, { "epoch": 1.87, "grad_norm": 0.5870839357376099, "learning_rate": 0.00018604407165019528, "loss": 1.7624, "step": 56269 }, { "epoch": 1.87, "grad_norm": 0.6049970984458923, "learning_rate": 0.0001860344030221964, "loss": 1.76, "step": 56270 }, { "epoch": 1.87, "grad_norm": 0.5692600607872009, "learning_rate": 0.0001860247345325348, "loss": 1.7605, "step": 56271 }, { "epoch": 1.87, "grad_norm": 0.579795241355896, "learning_rate": 0.00018601506618122246, "loss": 1.7258, "step": 56272 }, { "epoch": 1.87, "grad_norm": 0.5722761154174805, "learning_rate": 0.00018600539796827104, "loss": 1.7221, "step": 56273 }, { "epoch": 1.87, "grad_norm": 0.5639479756355286, "learning_rate": 0.00018599572989369228, "loss": 1.7918, "step": 56274 }, { "epoch": 1.87, "grad_norm": 0.5807411074638367, "learning_rate": 0.00018598606195749785, "loss": 1.6896, "step": 56275 }, { "epoch": 1.87, "grad_norm": 0.5555434823036194, "learning_rate": 0.00018597639415969953, "loss": 1.6955, "step": 56276 }, { "epoch": 1.87, "grad_norm": 0.5867510437965393, "learning_rate": 0.00018596672650030924, "loss": 1.6803, "step": 56277 }, { "epoch": 1.87, "grad_norm": 0.5834146738052368, "learning_rate": 0.00018595705897933834, "loss": 1.7395, "step": 56278 }, { "epoch": 1.87, "grad_norm": 0.5914411544799805, "learning_rate": 0.00018594739159679882, "loss": 1.7041, "step": 56279 }, { "epoch": 1.87, "grad_norm": 0.6092196106910706, "learning_rate": 0.00018593772435270242, "loss": 1.6571, "step": 56280 }, { "epoch": 1.87, "grad_norm": 0.5646829009056091, "learning_rate": 0.00018592805724706077, "loss": 1.6847, "step": 56281 }, { "epoch": 1.87, "grad_norm": 0.5839349031448364, "learning_rate": 0.0001859183902798857, "loss": 1.7183, "step": 56282 }, { "epoch": 1.87, "grad_norm": 0.5820813179016113, "learning_rate": 0.0001859087234511889, "loss": 1.6815, "step": 56283 }, { "epoch": 1.87, "grad_norm": 0.5871643424034119, "learning_rate": 0.00018589905676098204, "loss": 1.7062, "step": 56284 }, { "epoch": 1.87, "grad_norm": 0.5708918571472168, "learning_rate": 0.00018588939020927685, "loss": 1.7274, "step": 56285 }, { "epoch": 1.87, "grad_norm": 0.6443006992340088, "learning_rate": 0.0001858797237960852, "loss": 1.728, "step": 56286 }, { "epoch": 1.87, "grad_norm": 0.5879496335983276, "learning_rate": 0.0001858700575214188, "loss": 1.6574, "step": 56287 }, { "epoch": 1.87, "grad_norm": 0.5682125091552734, "learning_rate": 0.00018586039138528926, "loss": 1.6765, "step": 56288 }, { "epoch": 1.87, "grad_norm": 0.5752070546150208, "learning_rate": 0.00018585072538770844, "loss": 1.729, "step": 56289 }, { "epoch": 1.87, "grad_norm": 0.5814886689186096, "learning_rate": 0.000185841059528688, "loss": 1.7479, "step": 56290 }, { "epoch": 1.87, "grad_norm": 0.5872571468353271, "learning_rate": 0.00018583139380823972, "loss": 1.7614, "step": 56291 }, { "epoch": 1.87, "grad_norm": 0.5992026925086975, "learning_rate": 0.00018582172822637517, "loss": 1.7807, "step": 56292 }, { "epoch": 1.87, "grad_norm": 0.5590859055519104, "learning_rate": 0.00018581206278310647, "loss": 1.7521, "step": 56293 }, { "epoch": 1.87, "grad_norm": 0.5894873738288879, "learning_rate": 0.00018580239747844487, "loss": 1.7045, "step": 56294 }, { "epoch": 1.87, "grad_norm": 0.5774818062782288, "learning_rate": 0.00018579273231240245, "loss": 1.6311, "step": 56295 }, { "epoch": 1.87, "grad_norm": 0.5689765214920044, "learning_rate": 0.00018578306728499078, "loss": 1.7353, "step": 56296 }, { "epoch": 1.87, "grad_norm": 0.5741506814956665, "learning_rate": 0.00018577340239622168, "loss": 1.7673, "step": 56297 }, { "epoch": 1.87, "grad_norm": 0.5896691083908081, "learning_rate": 0.0001857637376461068, "loss": 1.6943, "step": 56298 }, { "epoch": 1.87, "grad_norm": 0.5750758647918701, "learning_rate": 0.00018575407303465785, "loss": 1.689, "step": 56299 }, { "epoch": 1.87, "grad_norm": 0.6169111132621765, "learning_rate": 0.0001857444085618868, "loss": 1.732, "step": 56300 }, { "epoch": 1.87, "grad_norm": 0.5860762596130371, "learning_rate": 0.00018573474422780504, "loss": 1.7757, "step": 56301 }, { "epoch": 1.87, "grad_norm": 1.2857531309127808, "learning_rate": 0.0001857250800324245, "loss": 1.7929, "step": 56302 }, { "epoch": 1.87, "grad_norm": 0.5810738205909729, "learning_rate": 0.00018571541597575697, "loss": 1.6642, "step": 56303 }, { "epoch": 1.87, "grad_norm": 0.5834296941757202, "learning_rate": 0.00018570575205781399, "loss": 1.7662, "step": 56304 }, { "epoch": 1.87, "grad_norm": 0.5696992874145508, "learning_rate": 0.00018569608827860738, "loss": 1.6653, "step": 56305 }, { "epoch": 1.87, "grad_norm": 0.5981510281562805, "learning_rate": 0.00018568642463814887, "loss": 1.744, "step": 56306 }, { "epoch": 1.87, "grad_norm": 0.5874832272529602, "learning_rate": 0.00018567676113645027, "loss": 1.7676, "step": 56307 }, { "epoch": 1.87, "grad_norm": 0.5846959948539734, "learning_rate": 0.00018566709777352317, "loss": 1.7877, "step": 56308 }, { "epoch": 1.87, "grad_norm": 0.5853227376937866, "learning_rate": 0.00018565743454937938, "loss": 1.7235, "step": 56309 }, { "epoch": 1.87, "grad_norm": 0.5674139857292175, "learning_rate": 0.0001856477714640307, "loss": 1.7124, "step": 56310 }, { "epoch": 1.87, "grad_norm": 0.5885957479476929, "learning_rate": 0.0001856381085174887, "loss": 1.6303, "step": 56311 }, { "epoch": 1.87, "grad_norm": 0.5865092873573303, "learning_rate": 0.0001856284457097652, "loss": 1.6977, "step": 56312 }, { "epoch": 1.87, "grad_norm": 0.5718674063682556, "learning_rate": 0.00018561878304087186, "loss": 1.7171, "step": 56313 }, { "epoch": 1.87, "grad_norm": 0.6124682426452637, "learning_rate": 0.00018560912051082055, "loss": 1.8028, "step": 56314 }, { "epoch": 1.87, "grad_norm": 0.5652614831924438, "learning_rate": 0.00018559945811962285, "loss": 1.792, "step": 56315 }, { "epoch": 1.87, "grad_norm": 0.5789269208908081, "learning_rate": 0.00018558979586729058, "loss": 1.7506, "step": 56316 }, { "epoch": 1.87, "grad_norm": 0.5779136419296265, "learning_rate": 0.00018558013375383548, "loss": 1.7846, "step": 56317 }, { "epoch": 1.87, "grad_norm": 0.5798994302749634, "learning_rate": 0.0001855704717792692, "loss": 1.753, "step": 56318 }, { "epoch": 1.87, "grad_norm": 0.5934127569198608, "learning_rate": 0.0001855608099436035, "loss": 1.7344, "step": 56319 }, { "epoch": 1.87, "grad_norm": 0.5837206244468689, "learning_rate": 0.00018555114824685018, "loss": 1.7416, "step": 56320 }, { "epoch": 1.87, "grad_norm": 0.5874433517456055, "learning_rate": 0.00018554148668902077, "loss": 1.7463, "step": 56321 }, { "epoch": 1.87, "grad_norm": 0.5960611701011658, "learning_rate": 0.00018553182527012718, "loss": 1.7366, "step": 56322 }, { "epoch": 1.87, "grad_norm": 0.5758479237556458, "learning_rate": 0.0001855221639901811, "loss": 1.7315, "step": 56323 }, { "epoch": 1.87, "grad_norm": 0.5733141303062439, "learning_rate": 0.0001855125028491943, "loss": 1.7861, "step": 56324 }, { "epoch": 1.87, "grad_norm": 0.6036107540130615, "learning_rate": 0.0001855028418471784, "loss": 1.6653, "step": 56325 }, { "epoch": 1.87, "grad_norm": 0.5702257752418518, "learning_rate": 0.00018549318098414519, "loss": 1.7467, "step": 56326 }, { "epoch": 1.87, "grad_norm": 0.5884594917297363, "learning_rate": 0.00018548352026010647, "loss": 1.6893, "step": 56327 }, { "epoch": 1.87, "grad_norm": 0.5747120976448059, "learning_rate": 0.00018547385967507369, "loss": 1.7257, "step": 56328 }, { "epoch": 1.87, "grad_norm": 0.5909407138824463, "learning_rate": 0.00018546419922905888, "loss": 1.6794, "step": 56329 }, { "epoch": 1.87, "grad_norm": 0.5758976936340332, "learning_rate": 0.00018545453892207373, "loss": 1.6837, "step": 56330 }, { "epoch": 1.87, "grad_norm": 0.5824741721153259, "learning_rate": 0.00018544487875412976, "loss": 1.703, "step": 56331 }, { "epoch": 1.87, "grad_norm": 0.5951324105262756, "learning_rate": 0.0001854352187252389, "loss": 1.6463, "step": 56332 }, { "epoch": 1.87, "grad_norm": 0.6027312278747559, "learning_rate": 0.00018542555883541273, "loss": 1.6371, "step": 56333 }, { "epoch": 1.87, "grad_norm": 0.6132921576499939, "learning_rate": 0.00018541589908466325, "loss": 1.6841, "step": 56334 }, { "epoch": 1.87, "grad_norm": 0.6010757088661194, "learning_rate": 0.00018540623947300175, "loss": 1.7123, "step": 56335 }, { "epoch": 1.87, "grad_norm": 0.5764088034629822, "learning_rate": 0.00018539658000044025, "loss": 1.7415, "step": 56336 }, { "epoch": 1.87, "grad_norm": 0.5851728320121765, "learning_rate": 0.0001853869206669905, "loss": 1.7809, "step": 56337 }, { "epoch": 1.87, "grad_norm": 0.5680225491523743, "learning_rate": 0.00018537726147266413, "loss": 1.7105, "step": 56338 }, { "epoch": 1.87, "grad_norm": 0.5745171308517456, "learning_rate": 0.00018536760241747281, "loss": 1.7333, "step": 56339 }, { "epoch": 1.87, "grad_norm": 0.5973657369613647, "learning_rate": 0.0001853579435014284, "loss": 1.7788, "step": 56340 }, { "epoch": 1.87, "grad_norm": 0.5781413316726685, "learning_rate": 0.00018534828472454247, "loss": 1.7432, "step": 56341 }, { "epoch": 1.87, "grad_norm": 0.58768630027771, "learning_rate": 0.00018533862608682687, "loss": 1.7951, "step": 56342 }, { "epoch": 1.87, "grad_norm": 0.5860021710395813, "learning_rate": 0.00018532896758829322, "loss": 1.6973, "step": 56343 }, { "epoch": 1.87, "grad_norm": 0.6254052519798279, "learning_rate": 0.00018531930922895346, "loss": 1.7256, "step": 56344 }, { "epoch": 1.87, "grad_norm": 0.597959578037262, "learning_rate": 0.00018530965100881907, "loss": 1.8036, "step": 56345 }, { "epoch": 1.87, "grad_norm": 0.5798053741455078, "learning_rate": 0.00018529999292790186, "loss": 1.7465, "step": 56346 }, { "epoch": 1.87, "grad_norm": 0.5909086465835571, "learning_rate": 0.00018529033498621358, "loss": 1.7626, "step": 56347 }, { "epoch": 1.87, "grad_norm": 0.5635033845901489, "learning_rate": 0.00018528067718376595, "loss": 1.7338, "step": 56348 }, { "epoch": 1.87, "grad_norm": 0.5934145450592041, "learning_rate": 0.00018527101952057055, "loss": 1.706, "step": 56349 }, { "epoch": 1.87, "grad_norm": 0.5679618716239929, "learning_rate": 0.00018526136199663944, "loss": 1.7253, "step": 56350 }, { "epoch": 1.87, "grad_norm": 0.5896379351615906, "learning_rate": 0.00018525170461198393, "loss": 1.7909, "step": 56351 }, { "epoch": 1.87, "grad_norm": 0.6075180768966675, "learning_rate": 0.000185242047366616, "loss": 1.7346, "step": 56352 }, { "epoch": 1.87, "grad_norm": 0.5853242874145508, "learning_rate": 0.00018523239026054737, "loss": 1.7229, "step": 56353 }, { "epoch": 1.87, "grad_norm": 0.5743592381477356, "learning_rate": 0.00018522273329378973, "loss": 1.7222, "step": 56354 }, { "epoch": 1.87, "grad_norm": 0.5784553289413452, "learning_rate": 0.0001852130764663547, "loss": 1.7441, "step": 56355 }, { "epoch": 1.87, "grad_norm": 0.5807310342788696, "learning_rate": 0.00018520341977825404, "loss": 1.7575, "step": 56356 }, { "epoch": 1.88, "grad_norm": 0.5757361054420471, "learning_rate": 0.00018519376322949973, "loss": 1.7535, "step": 56357 }, { "epoch": 1.88, "grad_norm": 0.5869264006614685, "learning_rate": 0.00018518410682010302, "loss": 1.72, "step": 56358 }, { "epoch": 1.88, "grad_norm": 0.5857718586921692, "learning_rate": 0.000185174450550076, "loss": 1.6869, "step": 56359 }, { "epoch": 1.88, "grad_norm": 0.5764046907424927, "learning_rate": 0.00018516479441943034, "loss": 1.6867, "step": 56360 }, { "epoch": 1.88, "grad_norm": 0.5763877034187317, "learning_rate": 0.00018515513842817764, "loss": 1.7115, "step": 56361 }, { "epoch": 1.88, "grad_norm": 0.5752043724060059, "learning_rate": 0.00018514548257632965, "loss": 1.6905, "step": 56362 }, { "epoch": 1.88, "grad_norm": 0.5831003189086914, "learning_rate": 0.0001851358268638981, "loss": 1.6269, "step": 56363 }, { "epoch": 1.88, "grad_norm": 0.5704144835472107, "learning_rate": 0.00018512617129089488, "loss": 1.636, "step": 56364 }, { "epoch": 1.88, "grad_norm": 0.5708524584770203, "learning_rate": 0.00018511651585733141, "loss": 1.6999, "step": 56365 }, { "epoch": 1.88, "grad_norm": 0.5683344602584839, "learning_rate": 0.00018510686056321957, "loss": 1.7241, "step": 56366 }, { "epoch": 1.88, "grad_norm": 0.5779455900192261, "learning_rate": 0.00018509720540857118, "loss": 1.7134, "step": 56367 }, { "epoch": 1.88, "grad_norm": 0.5994879603385925, "learning_rate": 0.00018508755039339775, "loss": 1.7676, "step": 56368 }, { "epoch": 1.88, "grad_norm": 0.5899763703346252, "learning_rate": 0.00018507789551771114, "loss": 1.7236, "step": 56369 }, { "epoch": 1.88, "grad_norm": 0.5981875061988831, "learning_rate": 0.0001850682407815231, "loss": 1.6764, "step": 56370 }, { "epoch": 1.88, "grad_norm": 0.5831061005592346, "learning_rate": 0.0001850585861848452, "loss": 1.7524, "step": 56371 }, { "epoch": 1.88, "grad_norm": 0.5848836898803711, "learning_rate": 0.00018504893172768916, "loss": 1.7172, "step": 56372 }, { "epoch": 1.88, "grad_norm": 0.5710499286651611, "learning_rate": 0.0001850392774100669, "loss": 1.7162, "step": 56373 }, { "epoch": 1.88, "grad_norm": 0.5751835107803345, "learning_rate": 0.00018502962323199004, "loss": 1.6624, "step": 56374 }, { "epoch": 1.88, "grad_norm": 0.5800622701644897, "learning_rate": 0.0001850199691934702, "loss": 1.7296, "step": 56375 }, { "epoch": 1.88, "grad_norm": 0.5907113552093506, "learning_rate": 0.00018501031529451922, "loss": 1.6882, "step": 56376 }, { "epoch": 1.88, "grad_norm": 0.5733291506767273, "learning_rate": 0.0001850006615351488, "loss": 1.7352, "step": 56377 }, { "epoch": 1.88, "grad_norm": 0.5914897918701172, "learning_rate": 0.00018499100791537058, "loss": 1.7643, "step": 56378 }, { "epoch": 1.88, "grad_norm": 0.5894184112548828, "learning_rate": 0.00018498135443519625, "loss": 1.7251, "step": 56379 }, { "epoch": 1.88, "grad_norm": 0.5984609127044678, "learning_rate": 0.00018497170109463786, "loss": 1.7315, "step": 56380 }, { "epoch": 1.88, "grad_norm": 0.6001360416412354, "learning_rate": 0.0001849620478937066, "loss": 1.758, "step": 56381 }, { "epoch": 1.88, "grad_norm": 0.607650637626648, "learning_rate": 0.00018495239483241462, "loss": 1.6365, "step": 56382 }, { "epoch": 1.88, "grad_norm": 0.5916895270347595, "learning_rate": 0.00018494274191077344, "loss": 1.6825, "step": 56383 }, { "epoch": 1.88, "grad_norm": 0.5985897183418274, "learning_rate": 0.00018493308912879484, "loss": 1.7504, "step": 56384 }, { "epoch": 1.88, "grad_norm": 0.6009621620178223, "learning_rate": 0.0001849234364864905, "loss": 1.7202, "step": 56385 }, { "epoch": 1.88, "grad_norm": 0.6035997867584229, "learning_rate": 0.00018491378398387208, "loss": 1.6957, "step": 56386 }, { "epoch": 1.88, "grad_norm": 0.5970312356948853, "learning_rate": 0.00018490413162095155, "loss": 1.7637, "step": 56387 }, { "epoch": 1.88, "grad_norm": 0.5984460115432739, "learning_rate": 0.00018489447939774026, "loss": 1.7589, "step": 56388 }, { "epoch": 1.88, "grad_norm": 0.6101243495941162, "learning_rate": 0.00018488482731425016, "loss": 1.723, "step": 56389 }, { "epoch": 1.88, "grad_norm": 0.5982292294502258, "learning_rate": 0.00018487517537049304, "loss": 1.7156, "step": 56390 }, { "epoch": 1.88, "grad_norm": 0.5804169178009033, "learning_rate": 0.00018486552356648036, "loss": 1.7188, "step": 56391 }, { "epoch": 1.88, "grad_norm": 0.6057165265083313, "learning_rate": 0.000184855871902224, "loss": 1.689, "step": 56392 }, { "epoch": 1.88, "grad_norm": 0.5873373746871948, "learning_rate": 0.00018484622037773558, "loss": 1.6447, "step": 56393 }, { "epoch": 1.88, "grad_norm": 0.6066939830780029, "learning_rate": 0.00018483656899302708, "loss": 1.6826, "step": 56394 }, { "epoch": 1.88, "grad_norm": 0.5993371605873108, "learning_rate": 0.00018482691774810982, "loss": 1.6388, "step": 56395 }, { "epoch": 1.88, "grad_norm": 0.5940225720405579, "learning_rate": 0.00018481726664299579, "loss": 1.7797, "step": 56396 }, { "epoch": 1.88, "grad_norm": 0.6052525043487549, "learning_rate": 0.00018480761567769664, "loss": 1.7654, "step": 56397 }, { "epoch": 1.88, "grad_norm": 0.5786620378494263, "learning_rate": 0.00018479796485222405, "loss": 1.678, "step": 56398 }, { "epoch": 1.88, "grad_norm": 0.5808671116828918, "learning_rate": 0.00018478831416658974, "loss": 1.7284, "step": 56399 }, { "epoch": 1.88, "grad_norm": 0.5948267579078674, "learning_rate": 0.00018477866362080552, "loss": 1.781, "step": 56400 }, { "epoch": 1.88, "grad_norm": 0.5954374074935913, "learning_rate": 0.00018476901321488296, "loss": 1.6764, "step": 56401 }, { "epoch": 1.88, "grad_norm": 0.5879339575767517, "learning_rate": 0.0001847593629488338, "loss": 1.7255, "step": 56402 }, { "epoch": 1.88, "grad_norm": 0.5863478183746338, "learning_rate": 0.00018474971282266984, "loss": 1.7459, "step": 56403 }, { "epoch": 1.88, "grad_norm": 0.5770202875137329, "learning_rate": 0.00018474006283640277, "loss": 1.7448, "step": 56404 }, { "epoch": 1.88, "grad_norm": 0.5742554068565369, "learning_rate": 0.00018473041299004426, "loss": 1.7698, "step": 56405 }, { "epoch": 1.88, "grad_norm": 0.5710617303848267, "learning_rate": 0.000184720763283606, "loss": 1.6616, "step": 56406 }, { "epoch": 1.88, "grad_norm": 0.6039716005325317, "learning_rate": 0.00018471111371709986, "loss": 1.7786, "step": 56407 }, { "epoch": 1.88, "grad_norm": 0.6009865999221802, "learning_rate": 0.0001847014642905374, "loss": 1.7871, "step": 56408 }, { "epoch": 1.88, "grad_norm": 0.6356561779975891, "learning_rate": 0.00018469181500393024, "loss": 1.6863, "step": 56409 }, { "epoch": 1.88, "grad_norm": 0.5857741236686707, "learning_rate": 0.00018468216585729044, "loss": 1.716, "step": 56410 }, { "epoch": 1.88, "grad_norm": 0.5736016631126404, "learning_rate": 0.00018467251685062934, "loss": 1.7687, "step": 56411 }, { "epoch": 1.88, "grad_norm": 0.5821307897567749, "learning_rate": 0.0001846628679839588, "loss": 1.7593, "step": 56412 }, { "epoch": 1.88, "grad_norm": 0.6081649661064148, "learning_rate": 0.00018465321925729064, "loss": 1.7277, "step": 56413 }, { "epoch": 1.88, "grad_norm": 0.5816329717636108, "learning_rate": 0.00018464357067063647, "loss": 1.6865, "step": 56414 }, { "epoch": 1.88, "grad_norm": 0.6162465810775757, "learning_rate": 0.000184633922224008, "loss": 1.767, "step": 56415 }, { "epoch": 1.88, "grad_norm": 0.6029888987541199, "learning_rate": 0.00018462427391741684, "loss": 1.7704, "step": 56416 }, { "epoch": 1.88, "grad_norm": 0.5716372728347778, "learning_rate": 0.000184614625750875, "loss": 1.7018, "step": 56417 }, { "epoch": 1.88, "grad_norm": 0.5888556241989136, "learning_rate": 0.00018460497772439385, "loss": 1.7416, "step": 56418 }, { "epoch": 1.88, "grad_norm": 0.5895245671272278, "learning_rate": 0.00018459532983798534, "loss": 1.7015, "step": 56419 }, { "epoch": 1.88, "grad_norm": 0.6240014433860779, "learning_rate": 0.0001845856820916611, "loss": 1.7049, "step": 56420 }, { "epoch": 1.88, "grad_norm": 0.6084954142570496, "learning_rate": 0.00018457603448543278, "loss": 1.8032, "step": 56421 }, { "epoch": 1.88, "grad_norm": 0.5867515802383423, "learning_rate": 0.00018456638701931215, "loss": 1.7126, "step": 56422 }, { "epoch": 1.88, "grad_norm": 0.5941675901412964, "learning_rate": 0.0001845567396933109, "loss": 1.7744, "step": 56423 }, { "epoch": 1.88, "grad_norm": 0.6043799519538879, "learning_rate": 0.00018454709250744083, "loss": 1.7158, "step": 56424 }, { "epoch": 1.88, "grad_norm": 0.5860251784324646, "learning_rate": 0.00018453744546171355, "loss": 1.7667, "step": 56425 }, { "epoch": 1.88, "grad_norm": 0.6246358752250671, "learning_rate": 0.00018452779855614076, "loss": 1.8343, "step": 56426 }, { "epoch": 1.88, "grad_norm": 0.5869385004043579, "learning_rate": 0.00018451815179073428, "loss": 1.7093, "step": 56427 }, { "epoch": 1.88, "grad_norm": 0.5847687721252441, "learning_rate": 0.00018450850516550573, "loss": 1.7593, "step": 56428 }, { "epoch": 1.88, "grad_norm": 0.5886812210083008, "learning_rate": 0.00018449885868046677, "loss": 1.7884, "step": 56429 }, { "epoch": 1.88, "grad_norm": 0.6030128598213196, "learning_rate": 0.00018448921233562928, "loss": 1.8041, "step": 56430 }, { "epoch": 1.88, "grad_norm": 0.5912336111068726, "learning_rate": 0.00018447956613100474, "loss": 1.7563, "step": 56431 }, { "epoch": 1.88, "grad_norm": 0.643500804901123, "learning_rate": 0.00018446992006660508, "loss": 1.7997, "step": 56432 }, { "epoch": 1.88, "grad_norm": 0.6047309041023254, "learning_rate": 0.00018446027414244185, "loss": 1.702, "step": 56433 }, { "epoch": 1.88, "grad_norm": 0.6107562780380249, "learning_rate": 0.00018445062835852692, "loss": 1.7235, "step": 56434 }, { "epoch": 1.88, "grad_norm": 0.6211362481117249, "learning_rate": 0.00018444098271487187, "loss": 1.736, "step": 56435 }, { "epoch": 1.88, "grad_norm": 0.5647592544555664, "learning_rate": 0.0001844313372114884, "loss": 1.663, "step": 56436 }, { "epoch": 1.88, "grad_norm": 0.5800124406814575, "learning_rate": 0.0001844216918483883, "loss": 1.7544, "step": 56437 }, { "epoch": 1.88, "grad_norm": 0.6208360195159912, "learning_rate": 0.00018441204662558312, "loss": 1.7915, "step": 56438 }, { "epoch": 1.88, "grad_norm": 0.6061001420021057, "learning_rate": 0.0001844024015430848, "loss": 1.7976, "step": 56439 }, { "epoch": 1.88, "grad_norm": 0.6129659414291382, "learning_rate": 0.00018439275660090486, "loss": 1.7414, "step": 56440 }, { "epoch": 1.88, "grad_norm": 0.5928890109062195, "learning_rate": 0.00018438311179905522, "loss": 1.7227, "step": 56441 }, { "epoch": 1.88, "grad_norm": 0.5852135419845581, "learning_rate": 0.00018437346713754733, "loss": 1.7498, "step": 56442 }, { "epoch": 1.88, "grad_norm": 0.5853811502456665, "learning_rate": 0.00018436382261639294, "loss": 1.7921, "step": 56443 }, { "epoch": 1.88, "grad_norm": 0.5964411497116089, "learning_rate": 0.00018435417823560405, "loss": 1.7636, "step": 56444 }, { "epoch": 1.88, "grad_norm": 0.6087105870246887, "learning_rate": 0.00018434453399519195, "loss": 1.758, "step": 56445 }, { "epoch": 1.88, "grad_norm": 0.602852463722229, "learning_rate": 0.00018433488989516865, "loss": 1.6792, "step": 56446 }, { "epoch": 1.88, "grad_norm": 0.6060737371444702, "learning_rate": 0.00018432524593554577, "loss": 1.7728, "step": 56447 }, { "epoch": 1.88, "grad_norm": 0.5671210289001465, "learning_rate": 0.00018431560211633495, "loss": 1.733, "step": 56448 }, { "epoch": 1.88, "grad_norm": 0.575425922870636, "learning_rate": 0.00018430595843754792, "loss": 1.7211, "step": 56449 }, { "epoch": 1.88, "grad_norm": 0.5970407128334045, "learning_rate": 0.00018429631489919637, "loss": 1.8103, "step": 56450 }, { "epoch": 1.88, "grad_norm": 0.562650740146637, "learning_rate": 0.0001842866715012922, "loss": 1.7181, "step": 56451 }, { "epoch": 1.88, "grad_norm": 0.5952248573303223, "learning_rate": 0.00018427702824384682, "loss": 1.6998, "step": 56452 }, { "epoch": 1.88, "grad_norm": 0.5913777351379395, "learning_rate": 0.0001842673851268721, "loss": 1.7551, "step": 56453 }, { "epoch": 1.88, "grad_norm": 0.5797672867774963, "learning_rate": 0.00018425774215037981, "loss": 1.7155, "step": 56454 }, { "epoch": 1.88, "grad_norm": 0.5867234468460083, "learning_rate": 0.0001842480993143815, "loss": 1.6534, "step": 56455 }, { "epoch": 1.88, "grad_norm": 0.5944183468818665, "learning_rate": 0.00018423845661888897, "loss": 1.7483, "step": 56456 }, { "epoch": 1.88, "grad_norm": 0.600573718547821, "learning_rate": 0.00018422881406391387, "loss": 1.6914, "step": 56457 }, { "epoch": 1.88, "grad_norm": 0.5705841183662415, "learning_rate": 0.00018421917164946797, "loss": 1.7981, "step": 56458 }, { "epoch": 1.88, "grad_norm": 0.5884420871734619, "learning_rate": 0.00018420952937556282, "loss": 1.742, "step": 56459 }, { "epoch": 1.88, "grad_norm": 0.5877729654312134, "learning_rate": 0.0001841998872422103, "loss": 1.7017, "step": 56460 }, { "epoch": 1.88, "grad_norm": 0.5760862827301025, "learning_rate": 0.00018419024524942215, "loss": 1.7219, "step": 56461 }, { "epoch": 1.88, "grad_norm": 0.5894151329994202, "learning_rate": 0.00018418060339720987, "loss": 1.6902, "step": 56462 }, { "epoch": 1.88, "grad_norm": 0.5883505940437317, "learning_rate": 0.00018417096168558533, "loss": 1.7699, "step": 56463 }, { "epoch": 1.88, "grad_norm": 0.5762234926223755, "learning_rate": 0.00018416132011456023, "loss": 1.7444, "step": 56464 }, { "epoch": 1.88, "grad_norm": 0.6041070818901062, "learning_rate": 0.00018415167868414612, "loss": 1.7388, "step": 56465 }, { "epoch": 1.88, "grad_norm": 0.5728855729103088, "learning_rate": 0.00018414203739435474, "loss": 1.7778, "step": 56466 }, { "epoch": 1.88, "grad_norm": 0.6335024833679199, "learning_rate": 0.00018413239624519805, "loss": 1.7071, "step": 56467 }, { "epoch": 1.88, "grad_norm": 0.5853342413902283, "learning_rate": 0.0001841227552366874, "loss": 1.7202, "step": 56468 }, { "epoch": 1.88, "grad_norm": 0.589081883430481, "learning_rate": 0.00018411311436883468, "loss": 1.7205, "step": 56469 }, { "epoch": 1.88, "grad_norm": 0.5825109481811523, "learning_rate": 0.00018410347364165158, "loss": 1.8028, "step": 56470 }, { "epoch": 1.88, "grad_norm": 0.5718794465065002, "learning_rate": 0.00018409383305514985, "loss": 1.7815, "step": 56471 }, { "epoch": 1.88, "grad_norm": 0.5805202722549438, "learning_rate": 0.00018408419260934107, "loss": 1.7734, "step": 56472 }, { "epoch": 1.88, "grad_norm": 0.6027653217315674, "learning_rate": 0.00018407455230423693, "loss": 1.7793, "step": 56473 }, { "epoch": 1.88, "grad_norm": 0.588608980178833, "learning_rate": 0.00018406491213984936, "loss": 1.7018, "step": 56474 }, { "epoch": 1.88, "grad_norm": 0.5816527009010315, "learning_rate": 0.00018405527211618977, "loss": 1.6814, "step": 56475 }, { "epoch": 1.88, "grad_norm": 0.5844159722328186, "learning_rate": 0.00018404563223327003, "loss": 1.7737, "step": 56476 }, { "epoch": 1.88, "grad_norm": 0.5701022148132324, "learning_rate": 0.0001840359924911019, "loss": 1.6276, "step": 56477 }, { "epoch": 1.88, "grad_norm": 0.6025455594062805, "learning_rate": 0.00018402635288969688, "loss": 1.7413, "step": 56478 }, { "epoch": 1.88, "grad_norm": 0.6252598166465759, "learning_rate": 0.0001840167134290668, "loss": 1.736, "step": 56479 }, { "epoch": 1.88, "grad_norm": 0.6015552282333374, "learning_rate": 0.00018400707410922332, "loss": 1.739, "step": 56480 }, { "epoch": 1.88, "grad_norm": 0.6068979501724243, "learning_rate": 0.00018399743493017828, "loss": 1.8167, "step": 56481 }, { "epoch": 1.88, "grad_norm": 0.568308413028717, "learning_rate": 0.0001839877958919431, "loss": 1.711, "step": 56482 }, { "epoch": 1.88, "grad_norm": 0.5861597061157227, "learning_rate": 0.00018397815699452971, "loss": 1.7524, "step": 56483 }, { "epoch": 1.88, "grad_norm": 0.6096687912940979, "learning_rate": 0.00018396851823794977, "loss": 1.7555, "step": 56484 }, { "epoch": 1.88, "grad_norm": 0.9179266691207886, "learning_rate": 0.00018395887962221495, "loss": 1.6912, "step": 56485 }, { "epoch": 1.88, "grad_norm": 0.6112377047538757, "learning_rate": 0.00018394924114733695, "loss": 1.7605, "step": 56486 }, { "epoch": 1.88, "grad_norm": 0.593142569065094, "learning_rate": 0.00018393960281332748, "loss": 1.7024, "step": 56487 }, { "epoch": 1.88, "grad_norm": 0.5992514491081238, "learning_rate": 0.00018392996462019822, "loss": 1.7039, "step": 56488 }, { "epoch": 1.88, "grad_norm": 0.5810289978981018, "learning_rate": 0.00018392032656796078, "loss": 1.6877, "step": 56489 }, { "epoch": 1.88, "grad_norm": 0.6010001301765442, "learning_rate": 0.00018391068865662702, "loss": 1.7033, "step": 56490 }, { "epoch": 1.88, "grad_norm": 0.6092328429222107, "learning_rate": 0.0001839010508862087, "loss": 1.7282, "step": 56491 }, { "epoch": 1.88, "grad_norm": 0.5819879174232483, "learning_rate": 0.0001838914132567173, "loss": 1.7545, "step": 56492 }, { "epoch": 1.88, "grad_norm": 0.571226179599762, "learning_rate": 0.0001838817757681646, "loss": 1.6945, "step": 56493 }, { "epoch": 1.88, "grad_norm": 0.5749040246009827, "learning_rate": 0.00018387213842056238, "loss": 1.6764, "step": 56494 }, { "epoch": 1.88, "grad_norm": 0.6406533718109131, "learning_rate": 0.00018386250121392222, "loss": 1.7909, "step": 56495 }, { "epoch": 1.88, "grad_norm": 0.5982289910316467, "learning_rate": 0.00018385286414825583, "loss": 1.6498, "step": 56496 }, { "epoch": 1.88, "grad_norm": 0.5824163556098938, "learning_rate": 0.0001838432272235751, "loss": 1.8029, "step": 56497 }, { "epoch": 1.88, "grad_norm": 0.5872124433517456, "learning_rate": 0.0001838335904398914, "loss": 1.7508, "step": 56498 }, { "epoch": 1.88, "grad_norm": 0.5837256908416748, "learning_rate": 0.0001838239537972167, "loss": 1.7101, "step": 56499 }, { "epoch": 1.88, "grad_norm": 0.5711679458618164, "learning_rate": 0.00018381431729556257, "loss": 1.7494, "step": 56500 }, { "epoch": 1.88, "grad_norm": 0.5717476010322571, "learning_rate": 0.0001838046809349408, "loss": 1.7155, "step": 56501 }, { "epoch": 1.88, "grad_norm": 0.5800724029541016, "learning_rate": 0.00018379504471536297, "loss": 1.7413, "step": 56502 }, { "epoch": 1.88, "grad_norm": 0.5826293230056763, "learning_rate": 0.00018378540863684079, "loss": 1.7487, "step": 56503 }, { "epoch": 1.88, "grad_norm": 0.5953526496887207, "learning_rate": 0.00018377577269938617, "loss": 1.7414, "step": 56504 }, { "epoch": 1.88, "grad_norm": 0.5622990727424622, "learning_rate": 0.00018376613690301044, "loss": 1.711, "step": 56505 }, { "epoch": 1.88, "grad_norm": 0.5808601379394531, "learning_rate": 0.00018375650124772557, "loss": 1.7686, "step": 56506 }, { "epoch": 1.88, "grad_norm": 0.602754533290863, "learning_rate": 0.00018374686573354322, "loss": 1.7858, "step": 56507 }, { "epoch": 1.88, "grad_norm": 0.5766808390617371, "learning_rate": 0.000183737230360475, "loss": 1.7428, "step": 56508 }, { "epoch": 1.88, "grad_norm": 0.5765627026557922, "learning_rate": 0.00018372759512853267, "loss": 1.7284, "step": 56509 }, { "epoch": 1.88, "grad_norm": 0.6032941341400146, "learning_rate": 0.00018371796003772783, "loss": 1.7244, "step": 56510 }, { "epoch": 1.88, "grad_norm": 0.5948770642280579, "learning_rate": 0.00018370832508807244, "loss": 1.7595, "step": 56511 }, { "epoch": 1.88, "grad_norm": 0.6228325963020325, "learning_rate": 0.00018369869027957782, "loss": 1.7607, "step": 56512 }, { "epoch": 1.88, "grad_norm": 0.5942620038986206, "learning_rate": 0.00018368905561225594, "loss": 1.8072, "step": 56513 }, { "epoch": 1.88, "grad_norm": 0.5751489996910095, "learning_rate": 0.00018367942108611844, "loss": 1.7401, "step": 56514 }, { "epoch": 1.88, "grad_norm": 0.5553445219993591, "learning_rate": 0.00018366978670117693, "loss": 1.6752, "step": 56515 }, { "epoch": 1.88, "grad_norm": 1.6747316122055054, "learning_rate": 0.00018366015245744314, "loss": 1.7771, "step": 56516 }, { "epoch": 1.88, "grad_norm": 0.5873481631278992, "learning_rate": 0.00018365051835492885, "loss": 1.8058, "step": 56517 }, { "epoch": 1.88, "grad_norm": 0.5913313031196594, "learning_rate": 0.00018364088439364566, "loss": 1.7116, "step": 56518 }, { "epoch": 1.88, "grad_norm": 0.5721818208694458, "learning_rate": 0.0001836312505736052, "loss": 1.6396, "step": 56519 }, { "epoch": 1.88, "grad_norm": 0.5971009135246277, "learning_rate": 0.00018362161689481935, "loss": 1.7263, "step": 56520 }, { "epoch": 1.88, "grad_norm": 0.609869658946991, "learning_rate": 0.00018361198335729975, "loss": 1.7337, "step": 56521 }, { "epoch": 1.88, "grad_norm": 0.5881155133247375, "learning_rate": 0.00018360234996105798, "loss": 1.7507, "step": 56522 }, { "epoch": 1.88, "grad_norm": 0.5802938342094421, "learning_rate": 0.0001835927167061058, "loss": 1.725, "step": 56523 }, { "epoch": 1.88, "grad_norm": 0.6286677718162537, "learning_rate": 0.000183583083592455, "loss": 1.7463, "step": 56524 }, { "epoch": 1.88, "grad_norm": 0.5831020474433899, "learning_rate": 0.00018357345062011712, "loss": 1.7233, "step": 56525 }, { "epoch": 1.88, "grad_norm": 0.5828679800033569, "learning_rate": 0.00018356381778910383, "loss": 1.75, "step": 56526 }, { "epoch": 1.88, "grad_norm": 0.6023390889167786, "learning_rate": 0.00018355418509942713, "loss": 1.776, "step": 56527 }, { "epoch": 1.88, "grad_norm": 0.6030288934707642, "learning_rate": 0.00018354455255109828, "loss": 1.7561, "step": 56528 }, { "epoch": 1.88, "grad_norm": 0.5909748673439026, "learning_rate": 0.00018353492014412927, "loss": 1.7089, "step": 56529 }, { "epoch": 1.88, "grad_norm": 0.5913355350494385, "learning_rate": 0.00018352528787853167, "loss": 1.7255, "step": 56530 }, { "epoch": 1.88, "grad_norm": 0.5641481280326843, "learning_rate": 0.0001835156557543173, "loss": 1.7101, "step": 56531 }, { "epoch": 1.88, "grad_norm": 0.5957715511322021, "learning_rate": 0.0001835060237714977, "loss": 1.699, "step": 56532 }, { "epoch": 1.88, "grad_norm": 0.5837624073028564, "learning_rate": 0.00018349639193008453, "loss": 1.6521, "step": 56533 }, { "epoch": 1.88, "grad_norm": 0.581762433052063, "learning_rate": 0.00018348676023008972, "loss": 1.65, "step": 56534 }, { "epoch": 1.88, "grad_norm": 0.5769962668418884, "learning_rate": 0.00018347712867152475, "loss": 1.7463, "step": 56535 }, { "epoch": 1.88, "grad_norm": 0.600723147392273, "learning_rate": 0.0001834674972544014, "loss": 1.636, "step": 56536 }, { "epoch": 1.88, "grad_norm": 0.5757995247840881, "learning_rate": 0.00018345786597873134, "loss": 1.7236, "step": 56537 }, { "epoch": 1.88, "grad_norm": 0.58274245262146, "learning_rate": 0.00018344823484452624, "loss": 1.6926, "step": 56538 }, { "epoch": 1.88, "grad_norm": 0.5903875827789307, "learning_rate": 0.00018343860385179782, "loss": 1.7059, "step": 56539 }, { "epoch": 1.88, "grad_norm": 0.6196852922439575, "learning_rate": 0.00018342897300055767, "loss": 1.7481, "step": 56540 }, { "epoch": 1.88, "grad_norm": 0.5909525752067566, "learning_rate": 0.00018341934229081772, "loss": 1.7235, "step": 56541 }, { "epoch": 1.88, "grad_norm": 0.6027225255966187, "learning_rate": 0.00018340971172258944, "loss": 1.693, "step": 56542 }, { "epoch": 1.88, "grad_norm": 0.5889396071434021, "learning_rate": 0.0001834000812958846, "loss": 1.6594, "step": 56543 }, { "epoch": 1.88, "grad_norm": 0.5729928612709045, "learning_rate": 0.0001833904510107149, "loss": 1.7428, "step": 56544 }, { "epoch": 1.88, "grad_norm": 0.593027651309967, "learning_rate": 0.00018338082086709201, "loss": 1.6757, "step": 56545 }, { "epoch": 1.88, "grad_norm": 0.5947793126106262, "learning_rate": 0.00018337119086502755, "loss": 1.6406, "step": 56546 }, { "epoch": 1.88, "grad_norm": 0.6297547221183777, "learning_rate": 0.0001833615610045334, "loss": 1.7548, "step": 56547 }, { "epoch": 1.88, "grad_norm": 0.5872159600257874, "learning_rate": 0.000183351931285621, "loss": 1.7737, "step": 56548 }, { "epoch": 1.88, "grad_norm": 0.5832213759422302, "learning_rate": 0.0001833423017083022, "loss": 1.7019, "step": 56549 }, { "epoch": 1.88, "grad_norm": 0.6050727367401123, "learning_rate": 0.00018333267227258864, "loss": 1.662, "step": 56550 }, { "epoch": 1.88, "grad_norm": 0.6106407642364502, "learning_rate": 0.00018332304297849215, "loss": 1.7465, "step": 56551 }, { "epoch": 1.88, "grad_norm": 0.6292849183082581, "learning_rate": 0.0001833134138260242, "loss": 1.7394, "step": 56552 }, { "epoch": 1.88, "grad_norm": 0.6067069172859192, "learning_rate": 0.00018330378481519658, "loss": 1.7574, "step": 56553 }, { "epoch": 1.88, "grad_norm": 0.5676988363265991, "learning_rate": 0.00018329415594602104, "loss": 1.713, "step": 56554 }, { "epoch": 1.88, "grad_norm": 0.5903072357177734, "learning_rate": 0.00018328452721850906, "loss": 1.7395, "step": 56555 }, { "epoch": 1.88, "grad_norm": 0.639305830001831, "learning_rate": 0.00018327489863267254, "loss": 1.8242, "step": 56556 }, { "epoch": 1.88, "grad_norm": 0.6338085532188416, "learning_rate": 0.0001832652701885231, "loss": 1.7293, "step": 56557 }, { "epoch": 1.88, "grad_norm": 0.5911015868186951, "learning_rate": 0.00018325564188607246, "loss": 1.7093, "step": 56558 }, { "epoch": 1.88, "grad_norm": 0.5984460711479187, "learning_rate": 0.00018324601372533223, "loss": 1.6918, "step": 56559 }, { "epoch": 1.88, "grad_norm": 0.6312419176101685, "learning_rate": 0.00018323638570631406, "loss": 1.7898, "step": 56560 }, { "epoch": 1.88, "grad_norm": 0.586135745048523, "learning_rate": 0.0001832267578290299, "loss": 1.6968, "step": 56561 }, { "epoch": 1.88, "grad_norm": 0.5836191177368164, "learning_rate": 0.00018321713009349108, "loss": 1.7259, "step": 56562 }, { "epoch": 1.88, "grad_norm": 0.5938560962677002, "learning_rate": 0.00018320750249970947, "loss": 1.6667, "step": 56563 }, { "epoch": 1.88, "grad_norm": 0.6078310012817383, "learning_rate": 0.00018319787504769686, "loss": 1.6637, "step": 56564 }, { "epoch": 1.88, "grad_norm": 0.5603449940681458, "learning_rate": 0.00018318824773746472, "loss": 1.6748, "step": 56565 }, { "epoch": 1.88, "grad_norm": 0.5753946304321289, "learning_rate": 0.0001831786205690249, "loss": 1.6995, "step": 56566 }, { "epoch": 1.88, "grad_norm": 0.5823570489883423, "learning_rate": 0.00018316899354238887, "loss": 1.7293, "step": 56567 }, { "epoch": 1.88, "grad_norm": 0.5838419198989868, "learning_rate": 0.00018315936665756869, "loss": 1.7058, "step": 56568 }, { "epoch": 1.88, "grad_norm": 0.598685085773468, "learning_rate": 0.00018314973991457567, "loss": 1.7024, "step": 56569 }, { "epoch": 1.88, "grad_norm": 0.6119007468223572, "learning_rate": 0.0001831401133134217, "loss": 1.8721, "step": 56570 }, { "epoch": 1.88, "grad_norm": 0.5836758017539978, "learning_rate": 0.00018313048685411845, "loss": 1.7032, "step": 56571 }, { "epoch": 1.88, "grad_norm": 0.6066374778747559, "learning_rate": 0.00018312086053667753, "loss": 1.7314, "step": 56572 }, { "epoch": 1.88, "grad_norm": 0.9163810610771179, "learning_rate": 0.00018311123436111067, "loss": 1.7226, "step": 56573 }, { "epoch": 1.88, "grad_norm": 0.6065731644630432, "learning_rate": 0.00018310160832742958, "loss": 1.743, "step": 56574 }, { "epoch": 1.88, "grad_norm": 0.5846014618873596, "learning_rate": 0.00018309198243564586, "loss": 1.6717, "step": 56575 }, { "epoch": 1.88, "grad_norm": 0.5985738039016724, "learning_rate": 0.0001830823566857712, "loss": 1.7457, "step": 56576 }, { "epoch": 1.88, "grad_norm": 0.6082566380500793, "learning_rate": 0.00018307273107781737, "loss": 1.7409, "step": 56577 }, { "epoch": 1.88, "grad_norm": 0.6067689657211304, "learning_rate": 0.0001830631056117961, "loss": 1.7718, "step": 56578 }, { "epoch": 1.88, "grad_norm": 0.5996071100234985, "learning_rate": 0.00018305348028771894, "loss": 1.6667, "step": 56579 }, { "epoch": 1.88, "grad_norm": 0.6098069548606873, "learning_rate": 0.00018304385510559755, "loss": 1.7799, "step": 56580 }, { "epoch": 1.88, "grad_norm": 0.5676109194755554, "learning_rate": 0.00018303423006544382, "loss": 1.726, "step": 56581 }, { "epoch": 1.88, "grad_norm": 0.5896992683410645, "learning_rate": 0.00018302460516726924, "loss": 1.754, "step": 56582 }, { "epoch": 1.88, "grad_norm": 0.5920477509498596, "learning_rate": 0.0001830149804110855, "loss": 1.8086, "step": 56583 }, { "epoch": 1.88, "grad_norm": 0.5957318544387817, "learning_rate": 0.00018300535579690444, "loss": 1.7486, "step": 56584 }, { "epoch": 1.88, "grad_norm": 0.5756929516792297, "learning_rate": 0.00018299573132473754, "loss": 1.7639, "step": 56585 }, { "epoch": 1.88, "grad_norm": 0.594401478767395, "learning_rate": 0.00018298610699459664, "loss": 1.7838, "step": 56586 }, { "epoch": 1.88, "grad_norm": 0.5733397006988525, "learning_rate": 0.0001829764828064933, "loss": 1.6864, "step": 56587 }, { "epoch": 1.88, "grad_norm": 0.6108176708221436, "learning_rate": 0.00018296685876043937, "loss": 1.804, "step": 56588 }, { "epoch": 1.88, "grad_norm": 0.6020001173019409, "learning_rate": 0.0001829572348564464, "loss": 1.7367, "step": 56589 }, { "epoch": 1.88, "grad_norm": 0.573762834072113, "learning_rate": 0.00018294761109452598, "loss": 1.7324, "step": 56590 }, { "epoch": 1.88, "grad_norm": 0.5789722204208374, "learning_rate": 0.00018293798747469014, "loss": 1.7338, "step": 56591 }, { "epoch": 1.88, "grad_norm": 0.5762789249420166, "learning_rate": 0.00018292836399695013, "loss": 1.7074, "step": 56592 }, { "epoch": 1.88, "grad_norm": 0.5847268104553223, "learning_rate": 0.00018291874066131788, "loss": 1.8156, "step": 56593 }, { "epoch": 1.88, "grad_norm": 0.5573068857192993, "learning_rate": 0.00018290911746780513, "loss": 1.6948, "step": 56594 }, { "epoch": 1.88, "grad_norm": 0.5839301347732544, "learning_rate": 0.0001828994944164234, "loss": 1.8272, "step": 56595 }, { "epoch": 1.88, "grad_norm": 0.5887020826339722, "learning_rate": 0.00018288987150718436, "loss": 1.8168, "step": 56596 }, { "epoch": 1.88, "grad_norm": 0.5804385542869568, "learning_rate": 0.0001828802487400998, "loss": 1.744, "step": 56597 }, { "epoch": 1.88, "grad_norm": 0.5749484300613403, "learning_rate": 0.00018287062611518147, "loss": 1.7411, "step": 56598 }, { "epoch": 1.88, "grad_norm": 0.5778012275695801, "learning_rate": 0.0001828610036324408, "loss": 1.6345, "step": 56599 }, { "epoch": 1.88, "grad_norm": 0.5777799487113953, "learning_rate": 0.00018285138129188962, "loss": 1.7438, "step": 56600 }, { "epoch": 1.88, "grad_norm": 0.5900617241859436, "learning_rate": 0.00018284175909353973, "loss": 1.7298, "step": 56601 }, { "epoch": 1.88, "grad_norm": 0.5852850079536438, "learning_rate": 0.0001828321370374026, "loss": 1.7131, "step": 56602 }, { "epoch": 1.88, "grad_norm": 0.5775451064109802, "learning_rate": 0.00018282251512348997, "loss": 1.7686, "step": 56603 }, { "epoch": 1.88, "grad_norm": 0.5813186168670654, "learning_rate": 0.0001828128933518136, "loss": 1.7914, "step": 56604 }, { "epoch": 1.88, "grad_norm": 0.5923166871070862, "learning_rate": 0.00018280327172238503, "loss": 1.7295, "step": 56605 }, { "epoch": 1.88, "grad_norm": 0.5735240578651428, "learning_rate": 0.00018279365023521604, "loss": 1.695, "step": 56606 }, { "epoch": 1.88, "grad_norm": 0.5704202055931091, "learning_rate": 0.0001827840288903183, "loss": 1.6443, "step": 56607 }, { "epoch": 1.88, "grad_norm": 0.588024914264679, "learning_rate": 0.00018277440768770352, "loss": 1.7194, "step": 56608 }, { "epoch": 1.88, "grad_norm": 0.5654937624931335, "learning_rate": 0.0001827647866273833, "loss": 1.7672, "step": 56609 }, { "epoch": 1.88, "grad_norm": 0.5804949402809143, "learning_rate": 0.00018275516570936937, "loss": 1.7255, "step": 56610 }, { "epoch": 1.88, "grad_norm": 0.6002407670021057, "learning_rate": 0.00018274554493367343, "loss": 1.6715, "step": 56611 }, { "epoch": 1.88, "grad_norm": 0.5945295691490173, "learning_rate": 0.00018273592430030706, "loss": 1.7196, "step": 56612 }, { "epoch": 1.88, "grad_norm": 0.6194693446159363, "learning_rate": 0.00018272630380928196, "loss": 1.769, "step": 56613 }, { "epoch": 1.88, "grad_norm": 0.5893226861953735, "learning_rate": 0.00018271668346061, "loss": 1.7528, "step": 56614 }, { "epoch": 1.88, "grad_norm": 0.6160523295402527, "learning_rate": 0.00018270706325430256, "loss": 1.8007, "step": 56615 }, { "epoch": 1.88, "grad_norm": 0.5941541194915771, "learning_rate": 0.0001826974431903715, "loss": 1.7159, "step": 56616 }, { "epoch": 1.88, "grad_norm": 0.5769661664962769, "learning_rate": 0.00018268782326882847, "loss": 1.7084, "step": 56617 }, { "epoch": 1.88, "grad_norm": 0.5589417815208435, "learning_rate": 0.0001826782034896852, "loss": 1.7633, "step": 56618 }, { "epoch": 1.88, "grad_norm": 0.5808897614479065, "learning_rate": 0.00018266858385295321, "loss": 1.7477, "step": 56619 }, { "epoch": 1.88, "grad_norm": 0.6146211624145508, "learning_rate": 0.00018265896435864425, "loss": 1.6846, "step": 56620 }, { "epoch": 1.88, "grad_norm": 0.5858252048492432, "learning_rate": 0.0001826493450067702, "loss": 1.7216, "step": 56621 }, { "epoch": 1.88, "grad_norm": 0.5905061364173889, "learning_rate": 0.00018263972579734235, "loss": 1.8028, "step": 56622 }, { "epoch": 1.88, "grad_norm": 0.5675506591796875, "learning_rate": 0.00018263010673037267, "loss": 1.7194, "step": 56623 }, { "epoch": 1.88, "grad_norm": 0.5795826315879822, "learning_rate": 0.00018262048780587275, "loss": 1.6631, "step": 56624 }, { "epoch": 1.88, "grad_norm": 0.5880404710769653, "learning_rate": 0.00018261086902385428, "loss": 1.7147, "step": 56625 }, { "epoch": 1.88, "grad_norm": 0.5896238684654236, "learning_rate": 0.00018260125038432887, "loss": 1.6852, "step": 56626 }, { "epoch": 1.88, "grad_norm": 0.577852725982666, "learning_rate": 0.00018259163188730814, "loss": 1.7234, "step": 56627 }, { "epoch": 1.88, "grad_norm": 0.5702638626098633, "learning_rate": 0.00018258201353280414, "loss": 1.6713, "step": 56628 }, { "epoch": 1.88, "grad_norm": 0.5774964690208435, "learning_rate": 0.00018257239532082803, "loss": 1.7292, "step": 56629 }, { "epoch": 1.88, "grad_norm": 1.1224573850631714, "learning_rate": 0.00018256277725139182, "loss": 1.8142, "step": 56630 }, { "epoch": 1.88, "grad_norm": 0.5846418738365173, "learning_rate": 0.00018255315932450713, "loss": 1.7152, "step": 56631 }, { "epoch": 1.88, "grad_norm": 0.5872815251350403, "learning_rate": 0.00018254354154018557, "loss": 1.7503, "step": 56632 }, { "epoch": 1.88, "grad_norm": 1.4094023704528809, "learning_rate": 0.00018253392389843884, "loss": 1.7572, "step": 56633 }, { "epoch": 1.88, "grad_norm": 0.5792416334152222, "learning_rate": 0.00018252430639927871, "loss": 1.7157, "step": 56634 }, { "epoch": 1.88, "grad_norm": 0.5845817923545837, "learning_rate": 0.00018251468904271665, "loss": 1.7647, "step": 56635 }, { "epoch": 1.88, "grad_norm": 0.5555691123008728, "learning_rate": 0.00018250507182876441, "loss": 1.7232, "step": 56636 }, { "epoch": 1.88, "grad_norm": 0.5755248665809631, "learning_rate": 0.00018249545475743374, "loss": 1.8461, "step": 56637 }, { "epoch": 1.88, "grad_norm": 0.5832387804985046, "learning_rate": 0.00018248583782873635, "loss": 1.6682, "step": 56638 }, { "epoch": 1.88, "grad_norm": 0.6071819067001343, "learning_rate": 0.0001824762210426838, "loss": 1.8229, "step": 56639 }, { "epoch": 1.88, "grad_norm": 0.575736939907074, "learning_rate": 0.00018246660439928778, "loss": 1.7268, "step": 56640 }, { "epoch": 1.88, "grad_norm": 0.589167058467865, "learning_rate": 0.00018245698789856, "loss": 1.784, "step": 56641 }, { "epoch": 1.88, "grad_norm": 0.5668550133705139, "learning_rate": 0.00018244737154051214, "loss": 1.7283, "step": 56642 }, { "epoch": 1.88, "grad_norm": 0.5706480145454407, "learning_rate": 0.00018243775532515575, "loss": 1.8079, "step": 56643 }, { "epoch": 1.88, "grad_norm": 0.5771066546440125, "learning_rate": 0.00018242813925250275, "loss": 1.65, "step": 56644 }, { "epoch": 1.88, "grad_norm": 0.5790541768074036, "learning_rate": 0.00018241852332256454, "loss": 1.7585, "step": 56645 }, { "epoch": 1.88, "grad_norm": 0.598914384841919, "learning_rate": 0.00018240890753535294, "loss": 1.7256, "step": 56646 }, { "epoch": 1.88, "grad_norm": 0.585189700126648, "learning_rate": 0.0001823992918908796, "loss": 1.7375, "step": 56647 }, { "epoch": 1.88, "grad_norm": 0.567844569683075, "learning_rate": 0.0001823896763891563, "loss": 1.6891, "step": 56648 }, { "epoch": 1.88, "grad_norm": 0.5959212779998779, "learning_rate": 0.00018238006103019451, "loss": 1.7273, "step": 56649 }, { "epoch": 1.88, "grad_norm": 0.5975249409675598, "learning_rate": 0.00018237044581400594, "loss": 1.7066, "step": 56650 }, { "epoch": 1.88, "grad_norm": 0.5715746879577637, "learning_rate": 0.00018236083074060243, "loss": 1.7768, "step": 56651 }, { "epoch": 1.88, "grad_norm": 0.6145306825637817, "learning_rate": 0.0001823512158099955, "loss": 1.8095, "step": 56652 }, { "epoch": 1.88, "grad_norm": 0.5638794302940369, "learning_rate": 0.00018234160102219684, "loss": 1.7659, "step": 56653 }, { "epoch": 1.88, "grad_norm": 0.5881473422050476, "learning_rate": 0.0001823319863772182, "loss": 1.7618, "step": 56654 }, { "epoch": 1.88, "grad_norm": 0.5721683502197266, "learning_rate": 0.0001823223718750711, "loss": 1.7762, "step": 56655 }, { "epoch": 1.88, "grad_norm": 0.5958300828933716, "learning_rate": 0.00018231275751576736, "loss": 1.7836, "step": 56656 }, { "epoch": 1.88, "grad_norm": 0.5710697174072266, "learning_rate": 0.00018230314329931852, "loss": 1.7434, "step": 56657 }, { "epoch": 1.89, "grad_norm": 0.5868313312530518, "learning_rate": 0.0001822935292257364, "loss": 1.7521, "step": 56658 }, { "epoch": 1.89, "grad_norm": 0.5856460332870483, "learning_rate": 0.00018228391529503257, "loss": 1.7264, "step": 56659 }, { "epoch": 1.89, "grad_norm": 0.5820747017860413, "learning_rate": 0.00018227430150721872, "loss": 1.695, "step": 56660 }, { "epoch": 1.89, "grad_norm": 0.5966435670852661, "learning_rate": 0.00018226468786230657, "loss": 1.6914, "step": 56661 }, { "epoch": 1.89, "grad_norm": 0.5892274975776672, "learning_rate": 0.00018225507436030772, "loss": 1.7023, "step": 56662 }, { "epoch": 1.89, "grad_norm": 0.5688251256942749, "learning_rate": 0.00018224546100123378, "loss": 1.7014, "step": 56663 }, { "epoch": 1.89, "grad_norm": 0.5763199329376221, "learning_rate": 0.00018223584778509662, "loss": 1.6544, "step": 56664 }, { "epoch": 1.89, "grad_norm": 0.574965238571167, "learning_rate": 0.00018222623471190764, "loss": 1.7155, "step": 56665 }, { "epoch": 1.89, "grad_norm": 0.5620882511138916, "learning_rate": 0.00018221662178167876, "loss": 1.7277, "step": 56666 }, { "epoch": 1.89, "grad_norm": 0.5692005157470703, "learning_rate": 0.0001822070089944215, "loss": 1.7459, "step": 56667 }, { "epoch": 1.89, "grad_norm": 0.6135169267654419, "learning_rate": 0.00018219739635014765, "loss": 1.7041, "step": 56668 }, { "epoch": 1.89, "grad_norm": 0.5808886885643005, "learning_rate": 0.00018218778384886876, "loss": 1.7089, "step": 56669 }, { "epoch": 1.89, "grad_norm": 0.5825655460357666, "learning_rate": 0.00018217817149059653, "loss": 1.8195, "step": 56670 }, { "epoch": 1.89, "grad_norm": 0.600978672504425, "learning_rate": 0.00018216855927534272, "loss": 1.7524, "step": 56671 }, { "epoch": 1.89, "grad_norm": 0.5992704033851624, "learning_rate": 0.00018215894720311876, "loss": 1.6962, "step": 56672 }, { "epoch": 1.89, "grad_norm": 0.5894690752029419, "learning_rate": 0.0001821493352739366, "loss": 1.7726, "step": 56673 }, { "epoch": 1.89, "grad_norm": 0.5938388705253601, "learning_rate": 0.00018213972348780778, "loss": 1.7401, "step": 56674 }, { "epoch": 1.89, "grad_norm": 0.5836588144302368, "learning_rate": 0.0001821301118447439, "loss": 1.7538, "step": 56675 }, { "epoch": 1.89, "grad_norm": 0.5901992917060852, "learning_rate": 0.00018212050034475673, "loss": 1.6919, "step": 56676 }, { "epoch": 1.89, "grad_norm": 0.5844406485557556, "learning_rate": 0.00018211088898785783, "loss": 1.6171, "step": 56677 }, { "epoch": 1.89, "grad_norm": 0.5885781049728394, "learning_rate": 0.00018210127777405915, "loss": 1.7605, "step": 56678 }, { "epoch": 1.89, "grad_norm": 0.5795716643333435, "learning_rate": 0.00018209166670337193, "loss": 1.7377, "step": 56679 }, { "epoch": 1.89, "grad_norm": 0.5932682156562805, "learning_rate": 0.00018208205577580812, "loss": 1.7138, "step": 56680 }, { "epoch": 1.89, "grad_norm": 0.5923343896865845, "learning_rate": 0.0001820724449913794, "loss": 1.7432, "step": 56681 }, { "epoch": 1.89, "grad_norm": 0.5855081677436829, "learning_rate": 0.00018206283435009726, "loss": 1.7174, "step": 56682 }, { "epoch": 1.89, "grad_norm": 0.5813297033309937, "learning_rate": 0.0001820532238519735, "loss": 1.7971, "step": 56683 }, { "epoch": 1.89, "grad_norm": 0.565902829170227, "learning_rate": 0.00018204361349701966, "loss": 1.7262, "step": 56684 }, { "epoch": 1.89, "grad_norm": 0.5858670473098755, "learning_rate": 0.0001820340032852477, "loss": 1.6701, "step": 56685 }, { "epoch": 1.89, "grad_norm": 0.5771957635879517, "learning_rate": 0.00018202439321666892, "loss": 1.7353, "step": 56686 }, { "epoch": 1.89, "grad_norm": 0.6017306447029114, "learning_rate": 0.00018201478329129514, "loss": 1.8005, "step": 56687 }, { "epoch": 1.89, "grad_norm": 0.6170523166656494, "learning_rate": 0.0001820051735091381, "loss": 1.7612, "step": 56688 }, { "epoch": 1.89, "grad_norm": 0.5943900346755981, "learning_rate": 0.00018199556387020935, "loss": 1.746, "step": 56689 }, { "epoch": 1.89, "grad_norm": 0.590505063533783, "learning_rate": 0.00018198595437452064, "loss": 1.7409, "step": 56690 }, { "epoch": 1.89, "grad_norm": 0.5495378375053406, "learning_rate": 0.0001819763450220836, "loss": 1.6836, "step": 56691 }, { "epoch": 1.89, "grad_norm": 0.5579556822776794, "learning_rate": 0.00018196673581290983, "loss": 1.6958, "step": 56692 }, { "epoch": 1.89, "grad_norm": 0.5705546140670776, "learning_rate": 0.000181957126747011, "loss": 1.7402, "step": 56693 }, { "epoch": 1.89, "grad_norm": 0.5923300385475159, "learning_rate": 0.00018194751782439888, "loss": 1.7728, "step": 56694 }, { "epoch": 1.89, "grad_norm": 0.5868980884552002, "learning_rate": 0.00018193790904508515, "loss": 1.7513, "step": 56695 }, { "epoch": 1.89, "grad_norm": 0.5865055918693542, "learning_rate": 0.00018192830040908133, "loss": 1.7677, "step": 56696 }, { "epoch": 1.89, "grad_norm": 0.6173808574676514, "learning_rate": 0.00018191869191639916, "loss": 1.8296, "step": 56697 }, { "epoch": 1.89, "grad_norm": 0.5765331387519836, "learning_rate": 0.00018190908356705037, "loss": 1.7559, "step": 56698 }, { "epoch": 1.89, "grad_norm": 0.5909487009048462, "learning_rate": 0.0001818994753610465, "loss": 1.7516, "step": 56699 }, { "epoch": 1.89, "grad_norm": 0.5977370738983154, "learning_rate": 0.00018188986729839916, "loss": 1.8195, "step": 56700 }, { "epoch": 1.89, "grad_norm": 0.5813042521476746, "learning_rate": 0.00018188025937912035, "loss": 1.6406, "step": 56701 }, { "epoch": 1.89, "grad_norm": 0.5862544178962708, "learning_rate": 0.00018187065160322132, "loss": 1.7222, "step": 56702 }, { "epoch": 1.89, "grad_norm": 0.5893439650535583, "learning_rate": 0.00018186104397071394, "loss": 1.7303, "step": 56703 }, { "epoch": 1.89, "grad_norm": 0.5783624649047852, "learning_rate": 0.00018185143648160988, "loss": 1.6713, "step": 56704 }, { "epoch": 1.89, "grad_norm": 0.5657280087471008, "learning_rate": 0.0001818418291359208, "loss": 1.7533, "step": 56705 }, { "epoch": 1.89, "grad_norm": 0.5790467262268066, "learning_rate": 0.00018183222193365826, "loss": 1.6875, "step": 56706 }, { "epoch": 1.89, "grad_norm": 0.6215606331825256, "learning_rate": 0.00018182261487483393, "loss": 1.7985, "step": 56707 }, { "epoch": 1.89, "grad_norm": 0.5839459896087646, "learning_rate": 0.00018181300795945977, "loss": 1.6498, "step": 56708 }, { "epoch": 1.89, "grad_norm": 0.5815013647079468, "learning_rate": 0.000181803401187547, "loss": 1.7276, "step": 56709 }, { "epoch": 1.89, "grad_norm": 0.5749348402023315, "learning_rate": 0.0001817937945591075, "loss": 1.7187, "step": 56710 }, { "epoch": 1.89, "grad_norm": 0.5833757519721985, "learning_rate": 0.00018178418807415303, "loss": 1.7442, "step": 56711 }, { "epoch": 1.89, "grad_norm": 0.5859989523887634, "learning_rate": 0.00018177458173269505, "loss": 1.7513, "step": 56712 }, { "epoch": 1.89, "grad_norm": 0.5849945545196533, "learning_rate": 0.0001817649755347453, "loss": 1.7723, "step": 56713 }, { "epoch": 1.89, "grad_norm": 0.5942628979682922, "learning_rate": 0.0001817553694803154, "loss": 1.6684, "step": 56714 }, { "epoch": 1.89, "grad_norm": 0.5845986604690552, "learning_rate": 0.00018174576356941728, "loss": 1.7327, "step": 56715 }, { "epoch": 1.89, "grad_norm": 0.5898423790931702, "learning_rate": 0.00018173615780206213, "loss": 1.7038, "step": 56716 }, { "epoch": 1.89, "grad_norm": 0.5794076919555664, "learning_rate": 0.00018172655217826196, "loss": 1.7523, "step": 56717 }, { "epoch": 1.89, "grad_norm": 0.5972775816917419, "learning_rate": 0.00018171694669802835, "loss": 1.6763, "step": 56718 }, { "epoch": 1.89, "grad_norm": 0.5893380045890808, "learning_rate": 0.00018170734136137293, "loss": 1.6851, "step": 56719 }, { "epoch": 1.89, "grad_norm": 0.605324923992157, "learning_rate": 0.00018169773616830736, "loss": 1.7471, "step": 56720 }, { "epoch": 1.89, "grad_norm": 0.5883374810218811, "learning_rate": 0.00018168813111884332, "loss": 1.7016, "step": 56721 }, { "epoch": 1.89, "grad_norm": 0.5804440975189209, "learning_rate": 0.00018167852621299246, "loss": 1.7442, "step": 56722 }, { "epoch": 1.89, "grad_norm": 0.5851049423217773, "learning_rate": 0.00018166892145076634, "loss": 1.7255, "step": 56723 }, { "epoch": 1.89, "grad_norm": 0.5669289231300354, "learning_rate": 0.0001816593168321768, "loss": 1.6913, "step": 56724 }, { "epoch": 1.89, "grad_norm": 0.6117409467697144, "learning_rate": 0.0001816497123572354, "loss": 1.7549, "step": 56725 }, { "epoch": 1.89, "grad_norm": 0.5901790261268616, "learning_rate": 0.0001816401080259538, "loss": 1.7809, "step": 56726 }, { "epoch": 1.89, "grad_norm": 0.5935847163200378, "learning_rate": 0.00018163050383834367, "loss": 1.7514, "step": 56727 }, { "epoch": 1.89, "grad_norm": 0.5930980443954468, "learning_rate": 0.00018162089979441675, "loss": 1.7456, "step": 56728 }, { "epoch": 1.89, "grad_norm": 0.5897578001022339, "learning_rate": 0.00018161129589418454, "loss": 1.7859, "step": 56729 }, { "epoch": 1.89, "grad_norm": 0.5693085193634033, "learning_rate": 0.0001816016921376587, "loss": 1.7392, "step": 56730 }, { "epoch": 1.89, "grad_norm": 0.6042793393135071, "learning_rate": 0.00018159208852485116, "loss": 1.7357, "step": 56731 }, { "epoch": 1.89, "grad_norm": 0.6073909997940063, "learning_rate": 0.0001815824850557732, "loss": 1.7002, "step": 56732 }, { "epoch": 1.89, "grad_norm": 0.583446741104126, "learning_rate": 0.00018157288173043667, "loss": 1.737, "step": 56733 }, { "epoch": 1.89, "grad_norm": 0.5660433173179626, "learning_rate": 0.00018156327854885324, "loss": 1.749, "step": 56734 }, { "epoch": 1.89, "grad_norm": 0.5876251459121704, "learning_rate": 0.00018155367551103463, "loss": 1.7011, "step": 56735 }, { "epoch": 1.89, "grad_norm": 0.5561180114746094, "learning_rate": 0.0001815440726169923, "loss": 1.7437, "step": 56736 }, { "epoch": 1.89, "grad_norm": 0.5823013782501221, "learning_rate": 0.00018153446986673798, "loss": 1.6952, "step": 56737 }, { "epoch": 1.89, "grad_norm": 0.5872265100479126, "learning_rate": 0.00018152486726028352, "loss": 1.8135, "step": 56738 }, { "epoch": 1.89, "grad_norm": 0.5697723031044006, "learning_rate": 0.00018151526479764024, "loss": 1.6245, "step": 56739 }, { "epoch": 1.89, "grad_norm": 0.5727819800376892, "learning_rate": 0.00018150566247882, "loss": 1.7546, "step": 56740 }, { "epoch": 1.89, "grad_norm": 0.5760576725006104, "learning_rate": 0.00018149606030383456, "loss": 1.6913, "step": 56741 }, { "epoch": 1.89, "grad_norm": 0.6109605431556702, "learning_rate": 0.00018148645827269534, "loss": 1.7044, "step": 56742 }, { "epoch": 1.89, "grad_norm": 0.5924643874168396, "learning_rate": 0.0001814768563854141, "loss": 1.746, "step": 56743 }, { "epoch": 1.89, "grad_norm": 0.5908953547477722, "learning_rate": 0.00018146725464200243, "loss": 1.7232, "step": 56744 }, { "epoch": 1.89, "grad_norm": 0.582723081111908, "learning_rate": 0.00018145765304247222, "loss": 1.7119, "step": 56745 }, { "epoch": 1.89, "grad_norm": 0.5765039920806885, "learning_rate": 0.00018144805158683478, "loss": 1.7237, "step": 56746 }, { "epoch": 1.89, "grad_norm": 0.5960153937339783, "learning_rate": 0.000181438450275102, "loss": 1.7377, "step": 56747 }, { "epoch": 1.89, "grad_norm": 0.5905638337135315, "learning_rate": 0.00018142884910728557, "loss": 1.7764, "step": 56748 }, { "epoch": 1.89, "grad_norm": 0.6038795113563538, "learning_rate": 0.00018141924808339694, "loss": 1.7232, "step": 56749 }, { "epoch": 1.89, "grad_norm": 0.6030593514442444, "learning_rate": 0.0001814096472034479, "loss": 1.7852, "step": 56750 }, { "epoch": 1.89, "grad_norm": 0.5920888781547546, "learning_rate": 0.00018140004646745008, "loss": 1.7798, "step": 56751 }, { "epoch": 1.89, "grad_norm": 0.6295003294944763, "learning_rate": 0.00018139044587541512, "loss": 1.7667, "step": 56752 }, { "epoch": 1.89, "grad_norm": 0.5820667743682861, "learning_rate": 0.0001813808454273546, "loss": 1.6981, "step": 56753 }, { "epoch": 1.89, "grad_norm": 0.5760704874992371, "learning_rate": 0.00018137124512328034, "loss": 1.7198, "step": 56754 }, { "epoch": 1.89, "grad_norm": 0.5584575533866882, "learning_rate": 0.0001813616449632039, "loss": 1.7301, "step": 56755 }, { "epoch": 1.89, "grad_norm": 0.5991696715354919, "learning_rate": 0.00018135204494713696, "loss": 1.7746, "step": 56756 }, { "epoch": 1.89, "grad_norm": 0.5782868266105652, "learning_rate": 0.00018134244507509112, "loss": 1.6703, "step": 56757 }, { "epoch": 1.89, "grad_norm": 0.5883470773696899, "learning_rate": 0.00018133284534707815, "loss": 1.7029, "step": 56758 }, { "epoch": 1.89, "grad_norm": 0.5830247402191162, "learning_rate": 0.00018132324576310954, "loss": 1.7135, "step": 56759 }, { "epoch": 1.89, "grad_norm": 0.5806632041931152, "learning_rate": 0.00018131364632319698, "loss": 1.6812, "step": 56760 }, { "epoch": 1.89, "grad_norm": 0.5708046555519104, "learning_rate": 0.00018130404702735228, "loss": 1.6552, "step": 56761 }, { "epoch": 1.89, "grad_norm": 0.5982905030250549, "learning_rate": 0.0001812944478755869, "loss": 1.7402, "step": 56762 }, { "epoch": 1.89, "grad_norm": 0.5892536640167236, "learning_rate": 0.0001812848488679126, "loss": 1.7527, "step": 56763 }, { "epoch": 1.89, "grad_norm": 0.626794695854187, "learning_rate": 0.000181275250004341, "loss": 1.7189, "step": 56764 }, { "epoch": 1.89, "grad_norm": 0.5745231509208679, "learning_rate": 0.0001812656512848838, "loss": 1.7422, "step": 56765 }, { "epoch": 1.89, "grad_norm": 0.5827659964561462, "learning_rate": 0.00018125605270955254, "loss": 1.6912, "step": 56766 }, { "epoch": 1.89, "grad_norm": 0.5889241099357605, "learning_rate": 0.0001812464542783589, "loss": 1.764, "step": 56767 }, { "epoch": 1.89, "grad_norm": 0.5832980275154114, "learning_rate": 0.00018123685599131465, "loss": 1.7181, "step": 56768 }, { "epoch": 1.89, "grad_norm": 0.5964111685752869, "learning_rate": 0.00018122725784843133, "loss": 1.76, "step": 56769 }, { "epoch": 1.89, "grad_norm": 0.5996780395507812, "learning_rate": 0.00018121765984972058, "loss": 1.6618, "step": 56770 }, { "epoch": 1.89, "grad_norm": 0.5992722511291504, "learning_rate": 0.00018120806199519417, "loss": 1.7368, "step": 56771 }, { "epoch": 1.89, "grad_norm": 0.5955555438995361, "learning_rate": 0.00018119846428486363, "loss": 1.7187, "step": 56772 }, { "epoch": 1.89, "grad_norm": 0.5987145900726318, "learning_rate": 0.00018118886671874066, "loss": 1.8002, "step": 56773 }, { "epoch": 1.89, "grad_norm": 0.5812615156173706, "learning_rate": 0.00018117926929683676, "loss": 1.7492, "step": 56774 }, { "epoch": 1.89, "grad_norm": 0.5878258943557739, "learning_rate": 0.00018116967201916394, "loss": 1.6998, "step": 56775 }, { "epoch": 1.89, "grad_norm": 0.6011333465576172, "learning_rate": 0.0001811600748857335, "loss": 1.8026, "step": 56776 }, { "epoch": 1.89, "grad_norm": 0.5964089035987854, "learning_rate": 0.00018115047789655722, "loss": 1.7433, "step": 56777 }, { "epoch": 1.89, "grad_norm": 0.610048234462738, "learning_rate": 0.00018114088105164686, "loss": 1.7508, "step": 56778 }, { "epoch": 1.89, "grad_norm": 0.6078264117240906, "learning_rate": 0.00018113128435101383, "loss": 1.7339, "step": 56779 }, { "epoch": 1.89, "grad_norm": 0.7746304869651794, "learning_rate": 0.00018112168779466998, "loss": 1.714, "step": 56780 }, { "epoch": 1.89, "grad_norm": 0.5871615409851074, "learning_rate": 0.0001811120913826269, "loss": 1.7673, "step": 56781 }, { "epoch": 1.89, "grad_norm": 0.5839587450027466, "learning_rate": 0.0001811024951148961, "loss": 1.6433, "step": 56782 }, { "epoch": 1.89, "grad_norm": 0.5902548432350159, "learning_rate": 0.00018109289899148943, "loss": 1.7416, "step": 56783 }, { "epoch": 1.89, "grad_norm": 0.5953676700592041, "learning_rate": 0.00018108330301241845, "loss": 1.7267, "step": 56784 }, { "epoch": 1.89, "grad_norm": 0.5808927416801453, "learning_rate": 0.00018107370717769484, "loss": 1.7639, "step": 56785 }, { "epoch": 1.89, "grad_norm": 0.5861726403236389, "learning_rate": 0.00018106411148733023, "loss": 1.7481, "step": 56786 }, { "epoch": 1.89, "grad_norm": 0.5810124278068542, "learning_rate": 0.00018105451594133617, "loss": 1.7253, "step": 56787 }, { "epoch": 1.89, "grad_norm": 0.5949678421020508, "learning_rate": 0.00018104492053972463, "loss": 1.7533, "step": 56788 }, { "epoch": 1.89, "grad_norm": 0.5865489840507507, "learning_rate": 0.00018103532528250679, "loss": 1.7222, "step": 56789 }, { "epoch": 1.89, "grad_norm": 0.570462703704834, "learning_rate": 0.00018102573016969464, "loss": 1.8136, "step": 56790 }, { "epoch": 1.89, "grad_norm": 0.6034653782844543, "learning_rate": 0.00018101613520129977, "loss": 1.6872, "step": 56791 }, { "epoch": 1.89, "grad_norm": 0.5974816679954529, "learning_rate": 0.0001810065403773337, "loss": 1.7179, "step": 56792 }, { "epoch": 1.89, "grad_norm": 0.5997535586357117, "learning_rate": 0.00018099694569780818, "loss": 1.6797, "step": 56793 }, { "epoch": 1.89, "grad_norm": 0.5832249522209167, "learning_rate": 0.00018098735116273476, "loss": 1.7242, "step": 56794 }, { "epoch": 1.89, "grad_norm": 0.5982587337493896, "learning_rate": 0.00018097775677212535, "loss": 1.6867, "step": 56795 }, { "epoch": 1.89, "grad_norm": 0.5898826718330383, "learning_rate": 0.00018096816252599122, "loss": 1.7456, "step": 56796 }, { "epoch": 1.89, "grad_norm": 0.6219818592071533, "learning_rate": 0.0001809585684243443, "loss": 1.7852, "step": 56797 }, { "epoch": 1.89, "grad_norm": 0.579517126083374, "learning_rate": 0.00018094897446719612, "loss": 1.7092, "step": 56798 }, { "epoch": 1.89, "grad_norm": 0.5931597352027893, "learning_rate": 0.0001809393806545583, "loss": 1.721, "step": 56799 }, { "epoch": 1.89, "grad_norm": 0.5898218750953674, "learning_rate": 0.0001809297869864426, "loss": 1.7271, "step": 56800 }, { "epoch": 1.89, "grad_norm": 0.6033375263214111, "learning_rate": 0.00018092019346286047, "loss": 1.721, "step": 56801 }, { "epoch": 1.89, "grad_norm": 0.6050302982330322, "learning_rate": 0.0001809106000838239, "loss": 1.7998, "step": 56802 }, { "epoch": 1.89, "grad_norm": 0.5812905430793762, "learning_rate": 0.00018090100684934408, "loss": 1.7157, "step": 56803 }, { "epoch": 1.89, "grad_norm": 0.5857701301574707, "learning_rate": 0.000180891413759433, "loss": 1.6592, "step": 56804 }, { "epoch": 1.89, "grad_norm": 0.5868096351623535, "learning_rate": 0.00018088182081410224, "loss": 1.6708, "step": 56805 }, { "epoch": 1.89, "grad_norm": 0.5884625911712646, "learning_rate": 0.00018087222801336332, "loss": 1.8346, "step": 56806 }, { "epoch": 1.89, "grad_norm": 0.5619899034500122, "learning_rate": 0.000180862635357228, "loss": 1.7308, "step": 56807 }, { "epoch": 1.89, "grad_norm": 0.5756580829620361, "learning_rate": 0.0001808530428457079, "loss": 1.7341, "step": 56808 }, { "epoch": 1.89, "grad_norm": 0.5799835324287415, "learning_rate": 0.00018084345047881463, "loss": 1.663, "step": 56809 }, { "epoch": 1.89, "grad_norm": 0.6141290664672852, "learning_rate": 0.00018083385825655977, "loss": 1.6906, "step": 56810 }, { "epoch": 1.89, "grad_norm": 0.5920719504356384, "learning_rate": 0.00018082426617895513, "loss": 1.7397, "step": 56811 }, { "epoch": 1.89, "grad_norm": 0.5612922310829163, "learning_rate": 0.0001808146742460123, "loss": 1.7083, "step": 56812 }, { "epoch": 1.89, "grad_norm": 0.5914310812950134, "learning_rate": 0.00018080508245774286, "loss": 1.6787, "step": 56813 }, { "epoch": 1.89, "grad_norm": 0.5944854617118835, "learning_rate": 0.00018079549081415847, "loss": 1.7543, "step": 56814 }, { "epoch": 1.89, "grad_norm": 0.6166165471076965, "learning_rate": 0.00018078589931527086, "loss": 1.7133, "step": 56815 }, { "epoch": 1.89, "grad_norm": 0.5849422812461853, "learning_rate": 0.00018077630796109156, "loss": 1.6689, "step": 56816 }, { "epoch": 1.89, "grad_norm": 0.5987993478775024, "learning_rate": 0.00018076671675163215, "loss": 1.7365, "step": 56817 }, { "epoch": 1.89, "grad_norm": 0.5845490097999573, "learning_rate": 0.0001807571256869046, "loss": 1.7516, "step": 56818 }, { "epoch": 1.89, "grad_norm": 0.5788844227790833, "learning_rate": 0.00018074753476692013, "loss": 1.6549, "step": 56819 }, { "epoch": 1.89, "grad_norm": 0.6159206628799438, "learning_rate": 0.00018073794399169066, "loss": 1.7264, "step": 56820 }, { "epoch": 1.89, "grad_norm": 0.5917778611183167, "learning_rate": 0.00018072835336122774, "loss": 1.6633, "step": 56821 }, { "epoch": 1.89, "grad_norm": 0.6044912338256836, "learning_rate": 0.00018071876287554308, "loss": 1.6802, "step": 56822 }, { "epoch": 1.89, "grad_norm": 0.5843770503997803, "learning_rate": 0.00018070917253464822, "loss": 1.8103, "step": 56823 }, { "epoch": 1.89, "grad_norm": 0.5879012942314148, "learning_rate": 0.00018069958233855478, "loss": 1.7327, "step": 56824 }, { "epoch": 1.89, "grad_norm": 0.5953748226165771, "learning_rate": 0.00018068999228727464, "loss": 1.7575, "step": 56825 }, { "epoch": 1.89, "grad_norm": 0.5958318114280701, "learning_rate": 0.0001806804023808191, "loss": 1.7489, "step": 56826 }, { "epoch": 1.89, "grad_norm": 0.5849892497062683, "learning_rate": 0.00018067081261920003, "loss": 1.7301, "step": 56827 }, { "epoch": 1.89, "grad_norm": 0.6077801585197449, "learning_rate": 0.00018066122300242906, "loss": 1.7283, "step": 56828 }, { "epoch": 1.89, "grad_norm": 0.6032097339630127, "learning_rate": 0.00018065163353051772, "loss": 1.718, "step": 56829 }, { "epoch": 1.89, "grad_norm": 0.6162337064743042, "learning_rate": 0.00018064204420347777, "loss": 1.7202, "step": 56830 }, { "epoch": 1.89, "grad_norm": 0.5906444787979126, "learning_rate": 0.00018063245502132063, "loss": 1.6998, "step": 56831 }, { "epoch": 1.89, "grad_norm": 0.5851576924324036, "learning_rate": 0.00018062286598405835, "loss": 1.6987, "step": 56832 }, { "epoch": 1.89, "grad_norm": 0.5904026031494141, "learning_rate": 0.00018061327709170214, "loss": 1.6776, "step": 56833 }, { "epoch": 1.89, "grad_norm": 0.6010310649871826, "learning_rate": 0.00018060368834426388, "loss": 1.7936, "step": 56834 }, { "epoch": 1.89, "grad_norm": 0.5650426149368286, "learning_rate": 0.0001805940997417552, "loss": 1.7602, "step": 56835 }, { "epoch": 1.89, "grad_norm": 0.5932319760322571, "learning_rate": 0.00018058451128418764, "loss": 1.7888, "step": 56836 }, { "epoch": 1.89, "grad_norm": 0.5725540518760681, "learning_rate": 0.0001805749229715729, "loss": 1.7219, "step": 56837 }, { "epoch": 1.89, "grad_norm": 0.5868086814880371, "learning_rate": 0.00018056533480392264, "loss": 1.7653, "step": 56838 }, { "epoch": 1.89, "grad_norm": 0.5992282032966614, "learning_rate": 0.00018055574678124838, "loss": 1.708, "step": 56839 }, { "epoch": 1.89, "grad_norm": 0.5927542448043823, "learning_rate": 0.00018054615890356182, "loss": 1.7633, "step": 56840 }, { "epoch": 1.89, "grad_norm": 0.5784516930580139, "learning_rate": 0.00018053657117087464, "loss": 1.7313, "step": 56841 }, { "epoch": 1.89, "grad_norm": 0.5884691476821899, "learning_rate": 0.00018052698358319858, "loss": 1.7012, "step": 56842 }, { "epoch": 1.89, "grad_norm": 0.6152672171592712, "learning_rate": 0.0001805173961405451, "loss": 1.673, "step": 56843 }, { "epoch": 1.89, "grad_norm": 0.5879809260368347, "learning_rate": 0.00018050780884292585, "loss": 1.7528, "step": 56844 }, { "epoch": 1.89, "grad_norm": 0.5976151823997498, "learning_rate": 0.00018049822169035262, "loss": 1.6823, "step": 56845 }, { "epoch": 1.89, "grad_norm": 0.5783298015594482, "learning_rate": 0.00018048863468283683, "loss": 1.6585, "step": 56846 }, { "epoch": 1.89, "grad_norm": 0.6064713597297668, "learning_rate": 0.0001804790478203902, "loss": 1.7357, "step": 56847 }, { "epoch": 1.89, "grad_norm": 0.6434983611106873, "learning_rate": 0.00018046946110302457, "loss": 1.7082, "step": 56848 }, { "epoch": 1.89, "grad_norm": 0.5769110321998596, "learning_rate": 0.00018045987453075123, "loss": 1.743, "step": 56849 }, { "epoch": 1.89, "grad_norm": 0.5842919945716858, "learning_rate": 0.00018045028810358203, "loss": 1.7483, "step": 56850 }, { "epoch": 1.89, "grad_norm": 0.6263072490692139, "learning_rate": 0.0001804407018215286, "loss": 1.8608, "step": 56851 }, { "epoch": 1.89, "grad_norm": 0.5822681188583374, "learning_rate": 0.00018043111568460254, "loss": 1.7285, "step": 56852 }, { "epoch": 1.89, "grad_norm": 0.6022370457649231, "learning_rate": 0.0001804215296928155, "loss": 1.727, "step": 56853 }, { "epoch": 1.89, "grad_norm": 0.6347350478172302, "learning_rate": 0.00018041194384617898, "loss": 1.7637, "step": 56854 }, { "epoch": 1.89, "grad_norm": 0.5840421915054321, "learning_rate": 0.00018040235814470494, "loss": 1.6407, "step": 56855 }, { "epoch": 1.89, "grad_norm": 0.5678278207778931, "learning_rate": 0.00018039277258840466, "loss": 1.6885, "step": 56856 }, { "epoch": 1.89, "grad_norm": 0.5820144414901733, "learning_rate": 0.00018038318717729, "loss": 1.7864, "step": 56857 }, { "epoch": 1.89, "grad_norm": 0.6027579307556152, "learning_rate": 0.00018037360191137253, "loss": 1.6309, "step": 56858 }, { "epoch": 1.89, "grad_norm": 0.6216725707054138, "learning_rate": 0.00018036401679066385, "loss": 1.7058, "step": 56859 }, { "epoch": 1.89, "grad_norm": 0.594617486000061, "learning_rate": 0.00018035443181517566, "loss": 1.6884, "step": 56860 }, { "epoch": 1.89, "grad_norm": 0.5767571330070496, "learning_rate": 0.00018034484698491946, "loss": 1.6836, "step": 56861 }, { "epoch": 1.89, "grad_norm": 0.5830777883529663, "learning_rate": 0.00018033526229990716, "loss": 1.793, "step": 56862 }, { "epoch": 1.89, "grad_norm": 0.5760355591773987, "learning_rate": 0.00018032567776015006, "loss": 1.7253, "step": 56863 }, { "epoch": 1.89, "grad_norm": 0.5953825116157532, "learning_rate": 0.00018031609336566, "loss": 1.7302, "step": 56864 }, { "epoch": 1.89, "grad_norm": 0.5860743522644043, "learning_rate": 0.00018030650911644866, "loss": 1.8018, "step": 56865 }, { "epoch": 1.89, "grad_norm": 0.5737281441688538, "learning_rate": 0.0001802969250125275, "loss": 1.6657, "step": 56866 }, { "epoch": 1.89, "grad_norm": 0.5935390591621399, "learning_rate": 0.0001802873410539082, "loss": 1.7517, "step": 56867 }, { "epoch": 1.89, "grad_norm": 0.5970631837844849, "learning_rate": 0.00018027775724060256, "loss": 1.7606, "step": 56868 }, { "epoch": 1.89, "grad_norm": 0.6027324795722961, "learning_rate": 0.000180268173572622, "loss": 1.7455, "step": 56869 }, { "epoch": 1.89, "grad_norm": 0.5823255777359009, "learning_rate": 0.00018025859004997815, "loss": 1.7086, "step": 56870 }, { "epoch": 1.89, "grad_norm": 0.5887976884841919, "learning_rate": 0.00018024900667268278, "loss": 1.6704, "step": 56871 }, { "epoch": 1.89, "grad_norm": 0.5887711048126221, "learning_rate": 0.0001802394234407476, "loss": 1.717, "step": 56872 }, { "epoch": 1.89, "grad_norm": 0.5921943187713623, "learning_rate": 0.00018022984035418396, "loss": 1.7112, "step": 56873 }, { "epoch": 1.89, "grad_norm": 0.5747694373130798, "learning_rate": 0.0001802202574130037, "loss": 1.7402, "step": 56874 }, { "epoch": 1.89, "grad_norm": 0.5997486114501953, "learning_rate": 0.00018021067461721846, "loss": 1.7606, "step": 56875 }, { "epoch": 1.89, "grad_norm": 0.581780731678009, "learning_rate": 0.00018020109196683975, "loss": 1.7548, "step": 56876 }, { "epoch": 1.89, "grad_norm": 0.5915253162384033, "learning_rate": 0.0001801915094618792, "loss": 1.7379, "step": 56877 }, { "epoch": 1.89, "grad_norm": 0.5847592949867249, "learning_rate": 0.00018018192710234863, "loss": 1.7686, "step": 56878 }, { "epoch": 1.89, "grad_norm": 0.6034730076789856, "learning_rate": 0.00018017234488825943, "loss": 1.6995, "step": 56879 }, { "epoch": 1.89, "grad_norm": 0.5803274512290955, "learning_rate": 0.0001801627628196234, "loss": 1.6944, "step": 56880 }, { "epoch": 1.89, "grad_norm": 0.5892190933227539, "learning_rate": 0.0001801531808964521, "loss": 1.7223, "step": 56881 }, { "epoch": 1.89, "grad_norm": 0.608289897441864, "learning_rate": 0.00018014359911875728, "loss": 1.6755, "step": 56882 }, { "epoch": 1.89, "grad_norm": 0.6129747033119202, "learning_rate": 0.00018013401748655035, "loss": 1.7559, "step": 56883 }, { "epoch": 1.89, "grad_norm": 0.5976985096931458, "learning_rate": 0.00018012443599984307, "loss": 1.7218, "step": 56884 }, { "epoch": 1.89, "grad_norm": 0.5926783680915833, "learning_rate": 0.00018011485465864715, "loss": 1.704, "step": 56885 }, { "epoch": 1.89, "grad_norm": 0.5926001667976379, "learning_rate": 0.00018010527346297404, "loss": 1.7594, "step": 56886 }, { "epoch": 1.89, "grad_norm": 0.6092845797538757, "learning_rate": 0.0001800956924128355, "loss": 1.7022, "step": 56887 }, { "epoch": 1.89, "grad_norm": 0.6060206294059753, "learning_rate": 0.0001800861115082432, "loss": 1.7668, "step": 56888 }, { "epoch": 1.89, "grad_norm": 0.5940741896629333, "learning_rate": 0.00018007653074920857, "loss": 1.7589, "step": 56889 }, { "epoch": 1.89, "grad_norm": 0.5962247848510742, "learning_rate": 0.0001800669501357434, "loss": 1.7704, "step": 56890 }, { "epoch": 1.89, "grad_norm": 0.5865519046783447, "learning_rate": 0.00018005736966785924, "loss": 1.7566, "step": 56891 }, { "epoch": 1.89, "grad_norm": 0.562759518623352, "learning_rate": 0.00018004778934556782, "loss": 1.7319, "step": 56892 }, { "epoch": 1.89, "grad_norm": 0.5808831453323364, "learning_rate": 0.00018003820916888072, "loss": 1.8332, "step": 56893 }, { "epoch": 1.89, "grad_norm": 0.5834660530090332, "learning_rate": 0.00018002862913780952, "loss": 1.7718, "step": 56894 }, { "epoch": 1.89, "grad_norm": 0.5649910569190979, "learning_rate": 0.00018001904925236595, "loss": 1.7355, "step": 56895 }, { "epoch": 1.89, "grad_norm": 0.594129204750061, "learning_rate": 0.0001800094695125615, "loss": 1.7558, "step": 56896 }, { "epoch": 1.89, "grad_norm": 0.5971303582191467, "learning_rate": 0.0001799998899184079, "loss": 1.7209, "step": 56897 }, { "epoch": 1.89, "grad_norm": 0.5912209749221802, "learning_rate": 0.00017999031046991683, "loss": 1.7483, "step": 56898 }, { "epoch": 1.89, "grad_norm": 0.6028876304626465, "learning_rate": 0.00017998073116709968, "loss": 1.7614, "step": 56899 }, { "epoch": 1.89, "grad_norm": 0.577932596206665, "learning_rate": 0.0001799711520099683, "loss": 1.6318, "step": 56900 }, { "epoch": 1.89, "grad_norm": 0.5987112522125244, "learning_rate": 0.0001799615729985343, "loss": 1.7241, "step": 56901 }, { "epoch": 1.89, "grad_norm": 0.5926230549812317, "learning_rate": 0.0001799519941328093, "loss": 1.7543, "step": 56902 }, { "epoch": 1.89, "grad_norm": 0.5788482427597046, "learning_rate": 0.0001799424154128048, "loss": 1.7496, "step": 56903 }, { "epoch": 1.89, "grad_norm": 0.5913141369819641, "learning_rate": 0.0001799328368385325, "loss": 1.6916, "step": 56904 }, { "epoch": 1.89, "grad_norm": 0.5858167409896851, "learning_rate": 0.0001799232584100042, "loss": 1.711, "step": 56905 }, { "epoch": 1.89, "grad_norm": 0.5814462304115295, "learning_rate": 0.0001799136801272312, "loss": 1.7053, "step": 56906 }, { "epoch": 1.89, "grad_norm": 0.5978341698646545, "learning_rate": 0.0001799041019902254, "loss": 1.7222, "step": 56907 }, { "epoch": 1.89, "grad_norm": 0.6001561284065247, "learning_rate": 0.0001798945239989983, "loss": 1.735, "step": 56908 }, { "epoch": 1.89, "grad_norm": 0.5869896411895752, "learning_rate": 0.00017988494615356156, "loss": 1.7519, "step": 56909 }, { "epoch": 1.89, "grad_norm": 0.635566234588623, "learning_rate": 0.00017987536845392676, "loss": 1.8132, "step": 56910 }, { "epoch": 1.89, "grad_norm": 0.5856479406356812, "learning_rate": 0.0001798657909001055, "loss": 1.7056, "step": 56911 }, { "epoch": 1.89, "grad_norm": 0.5850477814674377, "learning_rate": 0.00017985621349210965, "loss": 1.793, "step": 56912 }, { "epoch": 1.89, "grad_norm": 0.5820454955101013, "learning_rate": 0.00017984663622995053, "loss": 1.7516, "step": 56913 }, { "epoch": 1.89, "grad_norm": 0.5962101817131042, "learning_rate": 0.0001798370591136399, "loss": 1.7994, "step": 56914 }, { "epoch": 1.89, "grad_norm": 0.6013028621673584, "learning_rate": 0.00017982748214318944, "loss": 1.7, "step": 56915 }, { "epoch": 1.89, "grad_norm": 0.5873754620552063, "learning_rate": 0.00017981790531861064, "loss": 1.7517, "step": 56916 }, { "epoch": 1.89, "grad_norm": 0.5956289172172546, "learning_rate": 0.00017980832863991525, "loss": 1.7097, "step": 56917 }, { "epoch": 1.89, "grad_norm": 0.5993118286132812, "learning_rate": 0.00017979875210711482, "loss": 1.7576, "step": 56918 }, { "epoch": 1.89, "grad_norm": 0.5690764784812927, "learning_rate": 0.00017978917572022098, "loss": 1.7082, "step": 56919 }, { "epoch": 1.89, "grad_norm": 0.5942898392677307, "learning_rate": 0.00017977959947924527, "loss": 1.7767, "step": 56920 }, { "epoch": 1.89, "grad_norm": 0.5579756498336792, "learning_rate": 0.00017977002338419955, "loss": 1.74, "step": 56921 }, { "epoch": 1.89, "grad_norm": 0.5901393294334412, "learning_rate": 0.00017976044743509527, "loss": 1.6941, "step": 56922 }, { "epoch": 1.89, "grad_norm": 0.5833348631858826, "learning_rate": 0.00017975087163194412, "loss": 1.7009, "step": 56923 }, { "epoch": 1.89, "grad_norm": 0.6166220307350159, "learning_rate": 0.00017974129597475761, "loss": 1.6857, "step": 56924 }, { "epoch": 1.89, "grad_norm": 0.6164832711219788, "learning_rate": 0.0001797317204635476, "loss": 1.7221, "step": 56925 }, { "epoch": 1.89, "grad_norm": 0.5927384495735168, "learning_rate": 0.00017972214509832542, "loss": 1.741, "step": 56926 }, { "epoch": 1.89, "grad_norm": 0.5921661257743835, "learning_rate": 0.00017971256987910284, "loss": 1.667, "step": 56927 }, { "epoch": 1.89, "grad_norm": 0.6052416563034058, "learning_rate": 0.00017970299480589148, "loss": 1.7513, "step": 56928 }, { "epoch": 1.89, "grad_norm": 0.6008642315864563, "learning_rate": 0.00017969341987870304, "loss": 1.8165, "step": 56929 }, { "epoch": 1.89, "grad_norm": 0.6002892851829529, "learning_rate": 0.000179683845097549, "loss": 1.7884, "step": 56930 }, { "epoch": 1.89, "grad_norm": 0.5771270394325256, "learning_rate": 0.00017967427046244105, "loss": 1.6871, "step": 56931 }, { "epoch": 1.89, "grad_norm": 0.5907043218612671, "learning_rate": 0.00017966469597339085, "loss": 1.7613, "step": 56932 }, { "epoch": 1.89, "grad_norm": 0.5930061340332031, "learning_rate": 0.00017965512163040995, "loss": 1.7501, "step": 56933 }, { "epoch": 1.89, "grad_norm": 0.5786327123641968, "learning_rate": 0.00017964554743350993, "loss": 1.7129, "step": 56934 }, { "epoch": 1.89, "grad_norm": 0.588440477848053, "learning_rate": 0.00017963597338270266, "loss": 1.7236, "step": 56935 }, { "epoch": 1.89, "grad_norm": 0.6037921905517578, "learning_rate": 0.00017962639947799938, "loss": 1.8142, "step": 56936 }, { "epoch": 1.89, "grad_norm": 0.5856491923332214, "learning_rate": 0.00017961682571941202, "loss": 1.8087, "step": 56937 }, { "epoch": 1.89, "grad_norm": 0.5833706855773926, "learning_rate": 0.00017960725210695204, "loss": 1.7084, "step": 56938 }, { "epoch": 1.89, "grad_norm": 0.5804842710494995, "learning_rate": 0.00017959767864063122, "loss": 1.7744, "step": 56939 }, { "epoch": 1.89, "grad_norm": 0.5795536637306213, "learning_rate": 0.000179588105320461, "loss": 1.7436, "step": 56940 }, { "epoch": 1.89, "grad_norm": 0.5880717635154724, "learning_rate": 0.00017957853214645302, "loss": 1.6842, "step": 56941 }, { "epoch": 1.89, "grad_norm": 0.5887648463249207, "learning_rate": 0.00017956895911861917, "loss": 1.7634, "step": 56942 }, { "epoch": 1.89, "grad_norm": 0.5981624126434326, "learning_rate": 0.00017955938623697064, "loss": 1.7746, "step": 56943 }, { "epoch": 1.89, "grad_norm": 0.5772214531898499, "learning_rate": 0.00017954981350151933, "loss": 1.7633, "step": 56944 }, { "epoch": 1.89, "grad_norm": 0.5655730366706848, "learning_rate": 0.00017954024091227687, "loss": 1.7023, "step": 56945 }, { "epoch": 1.89, "grad_norm": 0.5851355791091919, "learning_rate": 0.00017953066846925473, "loss": 1.6986, "step": 56946 }, { "epoch": 1.89, "grad_norm": 0.5850818753242493, "learning_rate": 0.00017952109617246466, "loss": 1.6893, "step": 56947 }, { "epoch": 1.89, "grad_norm": 0.5844089984893799, "learning_rate": 0.00017951152402191813, "loss": 1.7419, "step": 56948 }, { "epoch": 1.89, "grad_norm": 0.5968867540359497, "learning_rate": 0.000179501952017627, "loss": 1.7553, "step": 56949 }, { "epoch": 1.89, "grad_norm": 0.5886403918266296, "learning_rate": 0.00017949238015960262, "loss": 1.7884, "step": 56950 }, { "epoch": 1.89, "grad_norm": 0.5800895094871521, "learning_rate": 0.0001794828084478568, "loss": 1.7845, "step": 56951 }, { "epoch": 1.89, "grad_norm": 0.5841960310935974, "learning_rate": 0.00017947323688240113, "loss": 1.7076, "step": 56952 }, { "epoch": 1.89, "grad_norm": 0.6116596460342407, "learning_rate": 0.00017946366546324713, "loss": 1.7367, "step": 56953 }, { "epoch": 1.89, "grad_norm": 0.6047293543815613, "learning_rate": 0.00017945409419040648, "loss": 1.653, "step": 56954 }, { "epoch": 1.89, "grad_norm": 0.6106657981872559, "learning_rate": 0.00017944452306389088, "loss": 1.7317, "step": 56955 }, { "epoch": 1.89, "grad_norm": 0.5912730097770691, "learning_rate": 0.00017943495208371177, "loss": 1.6885, "step": 56956 }, { "epoch": 1.89, "grad_norm": 0.5751062631607056, "learning_rate": 0.00017942538124988085, "loss": 1.7176, "step": 56957 }, { "epoch": 1.89, "grad_norm": 0.6067967414855957, "learning_rate": 0.00017941581056240977, "loss": 1.7536, "step": 56958 }, { "epoch": 1.9, "grad_norm": 0.588830828666687, "learning_rate": 0.0001794062400213102, "loss": 1.758, "step": 56959 }, { "epoch": 1.9, "grad_norm": 0.5843462347984314, "learning_rate": 0.00017939666962659364, "loss": 1.7193, "step": 56960 }, { "epoch": 1.9, "grad_norm": 0.6163128614425659, "learning_rate": 0.00017938709937827173, "loss": 1.7313, "step": 56961 }, { "epoch": 1.9, "grad_norm": 0.6229811310768127, "learning_rate": 0.00017937752927635618, "loss": 1.7693, "step": 56962 }, { "epoch": 1.9, "grad_norm": 0.5950708389282227, "learning_rate": 0.00017936795932085847, "loss": 1.6656, "step": 56963 }, { "epoch": 1.9, "grad_norm": 0.5832328796386719, "learning_rate": 0.0001793583895117902, "loss": 1.7596, "step": 56964 }, { "epoch": 1.9, "grad_norm": 0.5935361385345459, "learning_rate": 0.0001793488198491633, "loss": 1.7245, "step": 56965 }, { "epoch": 1.9, "grad_norm": 0.6060996651649475, "learning_rate": 0.00017933925033298894, "loss": 1.7706, "step": 56966 }, { "epoch": 1.9, "grad_norm": 0.5879074335098267, "learning_rate": 0.00017932968096327905, "loss": 1.7518, "step": 56967 }, { "epoch": 1.9, "grad_norm": 0.6134430170059204, "learning_rate": 0.0001793201117400451, "loss": 1.7632, "step": 56968 }, { "epoch": 1.9, "grad_norm": 0.6064471006393433, "learning_rate": 0.00017931054266329886, "loss": 1.6973, "step": 56969 }, { "epoch": 1.9, "grad_norm": 0.5793341994285583, "learning_rate": 0.00017930097373305176, "loss": 1.6717, "step": 56970 }, { "epoch": 1.9, "grad_norm": 0.5923931002616882, "learning_rate": 0.00017929140494931544, "loss": 1.7467, "step": 56971 }, { "epoch": 1.9, "grad_norm": 0.5979226231575012, "learning_rate": 0.00017928183631210173, "loss": 1.7374, "step": 56972 }, { "epoch": 1.9, "grad_norm": 0.6019798517227173, "learning_rate": 0.0001792722678214219, "loss": 1.7562, "step": 56973 }, { "epoch": 1.9, "grad_norm": 0.6082244515419006, "learning_rate": 0.00017926269947728782, "loss": 1.6785, "step": 56974 }, { "epoch": 1.9, "grad_norm": 0.5700640678405762, "learning_rate": 0.00017925313127971107, "loss": 1.7307, "step": 56975 }, { "epoch": 1.9, "grad_norm": 0.5669920444488525, "learning_rate": 0.00017924356322870322, "loss": 1.7247, "step": 56976 }, { "epoch": 1.9, "grad_norm": 0.5861692428588867, "learning_rate": 0.00017923399532427585, "loss": 1.7518, "step": 56977 }, { "epoch": 1.9, "grad_norm": 0.5867334008216858, "learning_rate": 0.00017922442756644058, "loss": 1.7721, "step": 56978 }, { "epoch": 1.9, "grad_norm": 0.6075064539909363, "learning_rate": 0.00017921485995520922, "loss": 1.6598, "step": 56979 }, { "epoch": 1.9, "grad_norm": 0.5994271636009216, "learning_rate": 0.00017920529249059307, "loss": 1.7376, "step": 56980 }, { "epoch": 1.9, "grad_norm": 0.6023844480514526, "learning_rate": 0.00017919572517260393, "loss": 1.7969, "step": 56981 }, { "epoch": 1.9, "grad_norm": 0.58258056640625, "learning_rate": 0.00017918615800125343, "loss": 1.799, "step": 56982 }, { "epoch": 1.9, "grad_norm": 0.5869125127792358, "learning_rate": 0.00017917659097655312, "loss": 1.7772, "step": 56983 }, { "epoch": 1.9, "grad_norm": 0.6412777900695801, "learning_rate": 0.0001791670240985146, "loss": 1.75, "step": 56984 }, { "epoch": 1.9, "grad_norm": 0.599647045135498, "learning_rate": 0.00017915745736714957, "loss": 1.8372, "step": 56985 }, { "epoch": 1.9, "grad_norm": 0.6006563901901245, "learning_rate": 0.00017914789078246946, "loss": 1.7061, "step": 56986 }, { "epoch": 1.9, "grad_norm": 0.608715295791626, "learning_rate": 0.00017913832434448602, "loss": 1.7103, "step": 56987 }, { "epoch": 1.9, "grad_norm": 0.606868326663971, "learning_rate": 0.00017912875805321088, "loss": 1.8029, "step": 56988 }, { "epoch": 1.9, "grad_norm": 0.5666092038154602, "learning_rate": 0.0001791191919086557, "loss": 1.7179, "step": 56989 }, { "epoch": 1.9, "grad_norm": 0.5716628432273865, "learning_rate": 0.00017910962591083192, "loss": 1.7208, "step": 56990 }, { "epoch": 1.9, "grad_norm": 0.6019060015678406, "learning_rate": 0.00017910006005975127, "loss": 1.6876, "step": 56991 }, { "epoch": 1.9, "grad_norm": 0.5919181704521179, "learning_rate": 0.00017909049435542538, "loss": 1.7501, "step": 56992 }, { "epoch": 1.9, "grad_norm": 0.5966299176216125, "learning_rate": 0.00017908092879786577, "loss": 1.7477, "step": 56993 }, { "epoch": 1.9, "grad_norm": 0.5780283808708191, "learning_rate": 0.000179071363387084, "loss": 1.683, "step": 56994 }, { "epoch": 1.9, "grad_norm": 0.5712892413139343, "learning_rate": 0.00017906179812309195, "loss": 1.6937, "step": 56995 }, { "epoch": 1.9, "grad_norm": 0.6165528893470764, "learning_rate": 0.00017905223300590097, "loss": 1.678, "step": 56996 }, { "epoch": 1.9, "grad_norm": 0.5893334150314331, "learning_rate": 0.00017904266803552273, "loss": 1.7426, "step": 56997 }, { "epoch": 1.9, "grad_norm": 0.597599983215332, "learning_rate": 0.0001790331032119689, "loss": 1.6632, "step": 56998 }, { "epoch": 1.9, "grad_norm": 0.5850951671600342, "learning_rate": 0.00017902353853525114, "loss": 1.748, "step": 56999 }, { "epoch": 1.9, "grad_norm": 0.579435408115387, "learning_rate": 0.00017901397400538085, "loss": 1.7398, "step": 57000 }, { "epoch": 1.9, "grad_norm": 0.6067100167274475, "learning_rate": 0.00017900440962236977, "loss": 1.7023, "step": 57001 }, { "epoch": 1.9, "grad_norm": 0.5958970189094543, "learning_rate": 0.0001789948453862296, "loss": 1.7524, "step": 57002 }, { "epoch": 1.9, "grad_norm": 0.594710648059845, "learning_rate": 0.0001789852812969718, "loss": 1.7991, "step": 57003 }, { "epoch": 1.9, "grad_norm": 0.5881949663162231, "learning_rate": 0.00017897571735460806, "loss": 1.6794, "step": 57004 }, { "epoch": 1.9, "grad_norm": 0.5850266814231873, "learning_rate": 0.00017896615355915004, "loss": 1.7523, "step": 57005 }, { "epoch": 1.9, "grad_norm": 0.5928528308868408, "learning_rate": 0.00017895658991060913, "loss": 1.7159, "step": 57006 }, { "epoch": 1.9, "grad_norm": 0.5994041562080383, "learning_rate": 0.00017894702640899716, "loss": 1.7696, "step": 57007 }, { "epoch": 1.9, "grad_norm": 0.5628020763397217, "learning_rate": 0.0001789374630543256, "loss": 1.7317, "step": 57008 }, { "epoch": 1.9, "grad_norm": 0.5700189471244812, "learning_rate": 0.0001789278998466062, "loss": 1.7399, "step": 57009 }, { "epoch": 1.9, "grad_norm": 0.597157895565033, "learning_rate": 0.00017891833678585045, "loss": 1.7681, "step": 57010 }, { "epoch": 1.9, "grad_norm": 0.5682005882263184, "learning_rate": 0.00017890877387207005, "loss": 1.742, "step": 57011 }, { "epoch": 1.9, "grad_norm": 0.5806123614311218, "learning_rate": 0.0001788992111052766, "loss": 1.7739, "step": 57012 }, { "epoch": 1.9, "grad_norm": 0.5897178649902344, "learning_rate": 0.00017888964848548154, "loss": 1.7088, "step": 57013 }, { "epoch": 1.9, "grad_norm": 0.5702993869781494, "learning_rate": 0.00017888008601269663, "loss": 1.7471, "step": 57014 }, { "epoch": 1.9, "grad_norm": 0.5714563131332397, "learning_rate": 0.00017887052368693355, "loss": 1.7501, "step": 57015 }, { "epoch": 1.9, "grad_norm": 0.6065906286239624, "learning_rate": 0.00017886096150820366, "loss": 1.7567, "step": 57016 }, { "epoch": 1.9, "grad_norm": 0.5751425623893738, "learning_rate": 0.00017885139947651873, "loss": 1.662, "step": 57017 }, { "epoch": 1.9, "grad_norm": 0.5602611303329468, "learning_rate": 0.00017884183759189038, "loss": 1.6866, "step": 57018 }, { "epoch": 1.9, "grad_norm": 0.5812093615531921, "learning_rate": 0.00017883227585433026, "loss": 1.7009, "step": 57019 }, { "epoch": 1.9, "grad_norm": 0.5957129597663879, "learning_rate": 0.0001788227142638499, "loss": 1.763, "step": 57020 }, { "epoch": 1.9, "grad_norm": 0.5805513262748718, "learning_rate": 0.0001788131528204608, "loss": 1.7494, "step": 57021 }, { "epoch": 1.9, "grad_norm": 0.5950107574462891, "learning_rate": 0.0001788035915241748, "loss": 1.6607, "step": 57022 }, { "epoch": 1.9, "grad_norm": 0.5818864703178406, "learning_rate": 0.0001787940303750033, "loss": 1.6896, "step": 57023 }, { "epoch": 1.9, "grad_norm": 0.5892800688743591, "learning_rate": 0.00017878446937295793, "loss": 1.7716, "step": 57024 }, { "epoch": 1.9, "grad_norm": 0.5849766135215759, "learning_rate": 0.00017877490851805048, "loss": 1.7568, "step": 57025 }, { "epoch": 1.9, "grad_norm": 0.5906940698623657, "learning_rate": 0.00017876534781029242, "loss": 1.7468, "step": 57026 }, { "epoch": 1.9, "grad_norm": 0.6040204167366028, "learning_rate": 0.00017875578724969532, "loss": 1.703, "step": 57027 }, { "epoch": 1.9, "grad_norm": 0.5775268077850342, "learning_rate": 0.00017874622683627077, "loss": 1.7076, "step": 57028 }, { "epoch": 1.9, "grad_norm": 0.5950050354003906, "learning_rate": 0.0001787366665700306, "loss": 1.7822, "step": 57029 }, { "epoch": 1.9, "grad_norm": 0.563259482383728, "learning_rate": 0.0001787271064509861, "loss": 1.7902, "step": 57030 }, { "epoch": 1.9, "grad_norm": 0.6055042743682861, "learning_rate": 0.00017871754647914907, "loss": 1.7447, "step": 57031 }, { "epoch": 1.9, "grad_norm": 0.579195499420166, "learning_rate": 0.0001787079866545311, "loss": 1.7582, "step": 57032 }, { "epoch": 1.9, "grad_norm": 0.6029013395309448, "learning_rate": 0.00017869842697714372, "loss": 1.6826, "step": 57033 }, { "epoch": 1.9, "grad_norm": 0.5762181878089905, "learning_rate": 0.00017868886744699857, "loss": 1.7562, "step": 57034 }, { "epoch": 1.9, "grad_norm": 0.5644401907920837, "learning_rate": 0.00017867930806410737, "loss": 1.7811, "step": 57035 }, { "epoch": 1.9, "grad_norm": 0.596316397190094, "learning_rate": 0.00017866974882848152, "loss": 1.748, "step": 57036 }, { "epoch": 1.9, "grad_norm": 0.6000397205352783, "learning_rate": 0.00017866018974013262, "loss": 1.7312, "step": 57037 }, { "epoch": 1.9, "grad_norm": 0.5876913070678711, "learning_rate": 0.00017865063079907246, "loss": 1.7119, "step": 57038 }, { "epoch": 1.9, "grad_norm": 0.5744946599006653, "learning_rate": 0.00017864107200531265, "loss": 1.7158, "step": 57039 }, { "epoch": 1.9, "grad_norm": 0.5585148930549622, "learning_rate": 0.0001786315133588646, "loss": 1.6638, "step": 57040 }, { "epoch": 1.9, "grad_norm": 0.5863620638847351, "learning_rate": 0.00017862195485974003, "loss": 1.7874, "step": 57041 }, { "epoch": 1.9, "grad_norm": 0.5872727036476135, "learning_rate": 0.00017861239650795058, "loss": 1.7299, "step": 57042 }, { "epoch": 1.9, "grad_norm": 0.5983611941337585, "learning_rate": 0.00017860283830350773, "loss": 1.7269, "step": 57043 }, { "epoch": 1.9, "grad_norm": 0.5915403962135315, "learning_rate": 0.00017859328024642305, "loss": 1.7258, "step": 57044 }, { "epoch": 1.9, "grad_norm": 0.5925348997116089, "learning_rate": 0.00017858372233670836, "loss": 1.7532, "step": 57045 }, { "epoch": 1.9, "grad_norm": 0.5977879762649536, "learning_rate": 0.00017857416457437515, "loss": 1.7776, "step": 57046 }, { "epoch": 1.9, "grad_norm": 0.5761361718177795, "learning_rate": 0.00017856460695943498, "loss": 1.6529, "step": 57047 }, { "epoch": 1.9, "grad_norm": 0.563480019569397, "learning_rate": 0.0001785550494918995, "loss": 1.756, "step": 57048 }, { "epoch": 1.9, "grad_norm": 0.5572279095649719, "learning_rate": 0.00017854549217178033, "loss": 1.7094, "step": 57049 }, { "epoch": 1.9, "grad_norm": 0.5691276788711548, "learning_rate": 0.00017853593499908896, "loss": 1.7114, "step": 57050 }, { "epoch": 1.9, "grad_norm": 0.5731202363967896, "learning_rate": 0.00017852637797383705, "loss": 1.7151, "step": 57051 }, { "epoch": 1.9, "grad_norm": 0.5986870527267456, "learning_rate": 0.0001785168210960364, "loss": 1.7501, "step": 57052 }, { "epoch": 1.9, "grad_norm": 0.5879186391830444, "learning_rate": 0.00017850726436569823, "loss": 1.7783, "step": 57053 }, { "epoch": 1.9, "grad_norm": 0.5885493755340576, "learning_rate": 0.00017849770778283436, "loss": 1.6863, "step": 57054 }, { "epoch": 1.9, "grad_norm": 0.5825567245483398, "learning_rate": 0.00017848815134745643, "loss": 1.803, "step": 57055 }, { "epoch": 1.9, "grad_norm": 0.5801718235015869, "learning_rate": 0.00017847859505957606, "loss": 1.725, "step": 57056 }, { "epoch": 1.9, "grad_norm": 0.6087987422943115, "learning_rate": 0.00017846903891920464, "loss": 1.6947, "step": 57057 }, { "epoch": 1.9, "grad_norm": 0.5818895697593689, "learning_rate": 0.0001784594829263539, "loss": 1.7738, "step": 57058 }, { "epoch": 1.9, "grad_norm": 0.614581823348999, "learning_rate": 0.00017844992708103557, "loss": 1.7784, "step": 57059 }, { "epoch": 1.9, "grad_norm": 0.5621945261955261, "learning_rate": 0.00017844037138326096, "loss": 1.6984, "step": 57060 }, { "epoch": 1.9, "grad_norm": 0.6078688502311707, "learning_rate": 0.00017843081583304187, "loss": 1.6942, "step": 57061 }, { "epoch": 1.9, "grad_norm": 0.6021113395690918, "learning_rate": 0.00017842126043038993, "loss": 1.7647, "step": 57062 }, { "epoch": 1.9, "grad_norm": 0.5800351500511169, "learning_rate": 0.00017841170517531664, "loss": 1.7508, "step": 57063 }, { "epoch": 1.9, "grad_norm": 0.5931163430213928, "learning_rate": 0.0001784021500678336, "loss": 1.7597, "step": 57064 }, { "epoch": 1.9, "grad_norm": 0.5960102081298828, "learning_rate": 0.00017839259510795235, "loss": 1.7665, "step": 57065 }, { "epoch": 1.9, "grad_norm": 0.5984082221984863, "learning_rate": 0.0001783830402956848, "loss": 1.8244, "step": 57066 }, { "epoch": 1.9, "grad_norm": 0.5961835980415344, "learning_rate": 0.0001783734856310421, "loss": 1.705, "step": 57067 }, { "epoch": 1.9, "grad_norm": 0.6161410212516785, "learning_rate": 0.00017836393111403612, "loss": 1.7012, "step": 57068 }, { "epoch": 1.9, "grad_norm": 0.5995670557022095, "learning_rate": 0.00017835437674467848, "loss": 1.7353, "step": 57069 }, { "epoch": 1.9, "grad_norm": 0.6021544933319092, "learning_rate": 0.00017834482252298067, "loss": 1.6811, "step": 57070 }, { "epoch": 1.9, "grad_norm": 0.5645151734352112, "learning_rate": 0.0001783352684489543, "loss": 1.6927, "step": 57071 }, { "epoch": 1.9, "grad_norm": 0.5864941477775574, "learning_rate": 0.00017832571452261104, "loss": 1.7177, "step": 57072 }, { "epoch": 1.9, "grad_norm": 0.6000954508781433, "learning_rate": 0.0001783161607439624, "loss": 1.7568, "step": 57073 }, { "epoch": 1.9, "grad_norm": 1.0446809530258179, "learning_rate": 0.0001783066071130199, "loss": 1.7599, "step": 57074 }, { "epoch": 1.9, "grad_norm": 0.617447555065155, "learning_rate": 0.00017829705362979532, "loss": 1.7647, "step": 57075 }, { "epoch": 1.9, "grad_norm": 0.5690435171127319, "learning_rate": 0.0001782875002943003, "loss": 1.6818, "step": 57076 }, { "epoch": 1.9, "grad_norm": 0.5878141522407532, "learning_rate": 0.0001782779471065462, "loss": 1.662, "step": 57077 }, { "epoch": 1.9, "grad_norm": 0.5893089175224304, "learning_rate": 0.00017826839406654478, "loss": 1.7177, "step": 57078 }, { "epoch": 1.9, "grad_norm": 0.57850581407547, "learning_rate": 0.0001782588411743076, "loss": 1.7033, "step": 57079 }, { "epoch": 1.9, "grad_norm": 0.6160012483596802, "learning_rate": 0.00017824928842984626, "loss": 1.7756, "step": 57080 }, { "epoch": 1.9, "grad_norm": 2.3386294841766357, "learning_rate": 0.00017823973583317227, "loss": 1.7403, "step": 57081 }, { "epoch": 1.9, "grad_norm": 0.589108407497406, "learning_rate": 0.00017823018338429742, "loss": 1.7264, "step": 57082 }, { "epoch": 1.9, "grad_norm": 0.5747376084327698, "learning_rate": 0.00017822063108323305, "loss": 1.6848, "step": 57083 }, { "epoch": 1.9, "grad_norm": 0.5896065831184387, "learning_rate": 0.00017821107892999097, "loss": 1.6516, "step": 57084 }, { "epoch": 1.9, "grad_norm": 0.6075263619422913, "learning_rate": 0.00017820152692458264, "loss": 1.8024, "step": 57085 }, { "epoch": 1.9, "grad_norm": 0.5851593613624573, "learning_rate": 0.00017819197506701982, "loss": 1.7389, "step": 57086 }, { "epoch": 1.9, "grad_norm": 0.5927895307540894, "learning_rate": 0.0001781824233573139, "loss": 1.7146, "step": 57087 }, { "epoch": 1.9, "grad_norm": 0.5943758487701416, "learning_rate": 0.00017817287179547654, "loss": 1.7222, "step": 57088 }, { "epoch": 1.9, "grad_norm": 0.5737580060958862, "learning_rate": 0.00017816332038151953, "loss": 1.7103, "step": 57089 }, { "epoch": 1.9, "grad_norm": 0.5859928131103516, "learning_rate": 0.0001781537691154541, "loss": 1.783, "step": 57090 }, { "epoch": 1.9, "grad_norm": 0.6009300947189331, "learning_rate": 0.00017814421799729213, "loss": 1.7419, "step": 57091 }, { "epoch": 1.9, "grad_norm": 0.589562177658081, "learning_rate": 0.00017813466702704518, "loss": 1.7691, "step": 57092 }, { "epoch": 1.9, "grad_norm": 0.5920814275741577, "learning_rate": 0.0001781251162047247, "loss": 1.68, "step": 57093 }, { "epoch": 1.9, "grad_norm": 0.5922302007675171, "learning_rate": 0.0001781155655303424, "loss": 1.6761, "step": 57094 }, { "epoch": 1.9, "grad_norm": 0.5749721527099609, "learning_rate": 0.00017810601500390977, "loss": 1.721, "step": 57095 }, { "epoch": 1.9, "grad_norm": 0.575628936290741, "learning_rate": 0.00017809646462543864, "loss": 1.7349, "step": 57096 }, { "epoch": 1.9, "grad_norm": 0.6048937439918518, "learning_rate": 0.0001780869143949403, "loss": 1.7772, "step": 57097 }, { "epoch": 1.9, "grad_norm": 0.5786409974098206, "learning_rate": 0.00017807736431242654, "loss": 1.7905, "step": 57098 }, { "epoch": 1.9, "grad_norm": 0.5741573572158813, "learning_rate": 0.0001780678143779089, "loss": 1.7144, "step": 57099 }, { "epoch": 1.9, "grad_norm": 0.5907508134841919, "learning_rate": 0.000178058264591399, "loss": 1.6968, "step": 57100 }, { "epoch": 1.9, "grad_norm": 0.5943389534950256, "learning_rate": 0.00017804871495290836, "loss": 1.675, "step": 57101 }, { "epoch": 1.9, "grad_norm": 0.5838918685913086, "learning_rate": 0.00017803916546244863, "loss": 1.7058, "step": 57102 }, { "epoch": 1.9, "grad_norm": 0.6176891326904297, "learning_rate": 0.00017802961612003134, "loss": 1.7849, "step": 57103 }, { "epoch": 1.9, "grad_norm": 0.6204861402511597, "learning_rate": 0.0001780200669256681, "loss": 1.7161, "step": 57104 }, { "epoch": 1.9, "grad_norm": 0.5895558595657349, "learning_rate": 0.00017801051787937056, "loss": 1.7337, "step": 57105 }, { "epoch": 1.9, "grad_norm": 0.6171831488609314, "learning_rate": 0.00017800096898115035, "loss": 1.7426, "step": 57106 }, { "epoch": 1.9, "grad_norm": 0.6002967953681946, "learning_rate": 0.00017799142023101888, "loss": 1.784, "step": 57107 }, { "epoch": 1.9, "grad_norm": 0.571208119392395, "learning_rate": 0.0001779818716289879, "loss": 1.7014, "step": 57108 }, { "epoch": 1.9, "grad_norm": 0.6012839674949646, "learning_rate": 0.00017797232317506902, "loss": 1.8108, "step": 57109 }, { "epoch": 1.9, "grad_norm": 0.5948771238327026, "learning_rate": 0.00017796277486927366, "loss": 1.7003, "step": 57110 }, { "epoch": 1.9, "grad_norm": 0.5973734855651855, "learning_rate": 0.00017795322671161347, "loss": 1.7132, "step": 57111 }, { "epoch": 1.9, "grad_norm": 0.582659125328064, "learning_rate": 0.00017794367870210018, "loss": 1.7942, "step": 57112 }, { "epoch": 1.9, "grad_norm": 0.6078625321388245, "learning_rate": 0.00017793413084074523, "loss": 1.6864, "step": 57113 }, { "epoch": 1.9, "grad_norm": 0.6001207232475281, "learning_rate": 0.0001779245831275603, "loss": 1.775, "step": 57114 }, { "epoch": 1.9, "grad_norm": 0.57610023021698, "learning_rate": 0.0001779150355625569, "loss": 1.7194, "step": 57115 }, { "epoch": 1.9, "grad_norm": 0.5947416424751282, "learning_rate": 0.0001779054881457467, "loss": 1.7402, "step": 57116 }, { "epoch": 1.9, "grad_norm": 0.6364041566848755, "learning_rate": 0.00017789594087714122, "loss": 1.7402, "step": 57117 }, { "epoch": 1.9, "grad_norm": 0.6165585517883301, "learning_rate": 0.00017788639375675201, "loss": 1.7686, "step": 57118 }, { "epoch": 1.9, "grad_norm": 0.5932832956314087, "learning_rate": 0.00017787684678459087, "loss": 1.7009, "step": 57119 }, { "epoch": 1.9, "grad_norm": 0.588913083076477, "learning_rate": 0.00017786729996066913, "loss": 1.6875, "step": 57120 }, { "epoch": 1.9, "grad_norm": 0.5934475660324097, "learning_rate": 0.00017785775328499855, "loss": 1.7482, "step": 57121 }, { "epoch": 1.9, "grad_norm": 0.5942955613136292, "learning_rate": 0.00017784820675759069, "loss": 1.7498, "step": 57122 }, { "epoch": 1.9, "grad_norm": 0.5813034176826477, "learning_rate": 0.000177838660378457, "loss": 1.6791, "step": 57123 }, { "epoch": 1.9, "grad_norm": 0.575192391872406, "learning_rate": 0.00017782911414760923, "loss": 1.6477, "step": 57124 }, { "epoch": 1.9, "grad_norm": 0.6042748689651489, "learning_rate": 0.00017781956806505887, "loss": 1.7716, "step": 57125 }, { "epoch": 1.9, "grad_norm": 0.581071674823761, "learning_rate": 0.00017781002213081764, "loss": 1.7593, "step": 57126 }, { "epoch": 1.9, "grad_norm": 0.5822622179985046, "learning_rate": 0.00017780047634489697, "loss": 1.6874, "step": 57127 }, { "epoch": 1.9, "grad_norm": 0.575006902217865, "learning_rate": 0.00017779093070730858, "loss": 1.7147, "step": 57128 }, { "epoch": 1.9, "grad_norm": 0.5609384179115295, "learning_rate": 0.00017778138521806397, "loss": 1.643, "step": 57129 }, { "epoch": 1.9, "grad_norm": 0.5967103838920593, "learning_rate": 0.00017777183987717474, "loss": 1.7092, "step": 57130 }, { "epoch": 1.9, "grad_norm": 0.5843383073806763, "learning_rate": 0.00017776229468465248, "loss": 1.7813, "step": 57131 }, { "epoch": 1.9, "grad_norm": 0.571414589881897, "learning_rate": 0.00017775274964050886, "loss": 1.7274, "step": 57132 }, { "epoch": 1.9, "grad_norm": 0.5740437507629395, "learning_rate": 0.00017774320474475523, "loss": 1.7091, "step": 57133 }, { "epoch": 1.9, "grad_norm": 0.6222213506698608, "learning_rate": 0.00017773365999740345, "loss": 1.6976, "step": 57134 }, { "epoch": 1.9, "grad_norm": 0.5929864645004272, "learning_rate": 0.00017772411539846493, "loss": 1.6613, "step": 57135 }, { "epoch": 1.9, "grad_norm": 0.6209601759910583, "learning_rate": 0.00017771457094795142, "loss": 1.7533, "step": 57136 }, { "epoch": 1.9, "grad_norm": 0.6093383431434631, "learning_rate": 0.00017770502664587434, "loss": 1.7688, "step": 57137 }, { "epoch": 1.9, "grad_norm": 0.5769264698028564, "learning_rate": 0.00017769548249224525, "loss": 1.7528, "step": 57138 }, { "epoch": 1.9, "grad_norm": 0.6043710112571716, "learning_rate": 0.00017768593848707602, "loss": 1.7487, "step": 57139 }, { "epoch": 1.9, "grad_norm": 0.5682295560836792, "learning_rate": 0.0001776763946303779, "loss": 1.6731, "step": 57140 }, { "epoch": 1.9, "grad_norm": 0.5837966799736023, "learning_rate": 0.0001776668509221626, "loss": 1.7479, "step": 57141 }, { "epoch": 1.9, "grad_norm": 0.577851414680481, "learning_rate": 0.00017765730736244182, "loss": 1.7227, "step": 57142 }, { "epoch": 1.9, "grad_norm": 0.601196825504303, "learning_rate": 0.00017764776395122697, "loss": 1.7407, "step": 57143 }, { "epoch": 1.9, "grad_norm": 0.6099095344543457, "learning_rate": 0.00017763822068852972, "loss": 1.7227, "step": 57144 }, { "epoch": 1.9, "grad_norm": 0.5840960741043091, "learning_rate": 0.00017762867757436154, "loss": 1.7403, "step": 57145 }, { "epoch": 1.9, "grad_norm": 0.5870208144187927, "learning_rate": 0.00017761913460873435, "loss": 1.7438, "step": 57146 }, { "epoch": 1.9, "grad_norm": 0.5848580002784729, "learning_rate": 0.00017760959179165928, "loss": 1.6742, "step": 57147 }, { "epoch": 1.9, "grad_norm": 0.5795612931251526, "learning_rate": 0.00017760004912314824, "loss": 1.7858, "step": 57148 }, { "epoch": 1.9, "grad_norm": 0.5979260206222534, "learning_rate": 0.00017759050660321273, "loss": 1.7157, "step": 57149 }, { "epoch": 1.9, "grad_norm": 0.586078405380249, "learning_rate": 0.00017758096423186424, "loss": 1.7125, "step": 57150 }, { "epoch": 1.9, "grad_norm": 0.5957487225532532, "learning_rate": 0.0001775714220091144, "loss": 1.7089, "step": 57151 }, { "epoch": 1.9, "grad_norm": 0.5823975205421448, "learning_rate": 0.00017756187993497494, "loss": 1.8064, "step": 57152 }, { "epoch": 1.9, "grad_norm": 0.5982845425605774, "learning_rate": 0.0001775523380094572, "loss": 1.8253, "step": 57153 }, { "epoch": 1.9, "grad_norm": 0.5937276482582092, "learning_rate": 0.00017754279623257284, "loss": 1.7245, "step": 57154 }, { "epoch": 1.9, "grad_norm": 0.60105299949646, "learning_rate": 0.00017753325460433358, "loss": 1.7126, "step": 57155 }, { "epoch": 1.9, "grad_norm": 0.5760327577590942, "learning_rate": 0.00017752371312475092, "loss": 1.7306, "step": 57156 }, { "epoch": 1.9, "grad_norm": 0.5668760538101196, "learning_rate": 0.00017751417179383638, "loss": 1.7034, "step": 57157 }, { "epoch": 1.9, "grad_norm": 0.5747125744819641, "learning_rate": 0.00017750463061160158, "loss": 1.7266, "step": 57158 }, { "epoch": 1.9, "grad_norm": 0.5698007345199585, "learning_rate": 0.00017749508957805817, "loss": 1.7257, "step": 57159 }, { "epoch": 1.9, "grad_norm": 0.5797914862632751, "learning_rate": 0.0001774855486932176, "loss": 1.7101, "step": 57160 }, { "epoch": 1.9, "grad_norm": 0.5849871039390564, "learning_rate": 0.0001774760079570915, "loss": 1.7061, "step": 57161 }, { "epoch": 1.9, "grad_norm": 0.6026488542556763, "learning_rate": 0.00017746646736969152, "loss": 1.6893, "step": 57162 }, { "epoch": 1.9, "grad_norm": 0.5687332153320312, "learning_rate": 0.00017745692693102924, "loss": 1.6387, "step": 57163 }, { "epoch": 1.9, "grad_norm": 0.5797294974327087, "learning_rate": 0.00017744738664111613, "loss": 1.7149, "step": 57164 }, { "epoch": 1.9, "grad_norm": 0.5718696117401123, "learning_rate": 0.00017743784649996386, "loss": 1.6726, "step": 57165 }, { "epoch": 1.9, "grad_norm": 0.5684768557548523, "learning_rate": 0.00017742830650758403, "loss": 1.6815, "step": 57166 }, { "epoch": 1.9, "grad_norm": 0.5904301404953003, "learning_rate": 0.00017741876666398811, "loss": 1.7054, "step": 57167 }, { "epoch": 1.9, "grad_norm": 0.616966962814331, "learning_rate": 0.0001774092269691877, "loss": 1.7639, "step": 57168 }, { "epoch": 1.9, "grad_norm": 0.5828335285186768, "learning_rate": 0.00017739968742319458, "loss": 1.7262, "step": 57169 }, { "epoch": 1.9, "grad_norm": 0.6022710204124451, "learning_rate": 0.00017739014802602, "loss": 1.6759, "step": 57170 }, { "epoch": 1.9, "grad_norm": 0.5703551769256592, "learning_rate": 0.00017738060877767583, "loss": 1.7021, "step": 57171 }, { "epoch": 1.9, "grad_norm": 0.5995365381240845, "learning_rate": 0.00017737106967817348, "loss": 1.72, "step": 57172 }, { "epoch": 1.9, "grad_norm": 0.6052312254905701, "learning_rate": 0.00017736153072752463, "loss": 1.7838, "step": 57173 }, { "epoch": 1.9, "grad_norm": 0.5899354219436646, "learning_rate": 0.00017735199192574078, "loss": 1.6721, "step": 57174 }, { "epoch": 1.9, "grad_norm": 0.5726131200790405, "learning_rate": 0.00017734245327283353, "loss": 1.7209, "step": 57175 }, { "epoch": 1.9, "grad_norm": 0.6022435426712036, "learning_rate": 0.00017733291476881456, "loss": 1.8111, "step": 57176 }, { "epoch": 1.9, "grad_norm": 0.5879961848258972, "learning_rate": 0.0001773233764136952, "loss": 1.8176, "step": 57177 }, { "epoch": 1.9, "grad_norm": 2.0659379959106445, "learning_rate": 0.00017731383820748727, "loss": 1.7737, "step": 57178 }, { "epoch": 1.9, "grad_norm": 0.600654661655426, "learning_rate": 0.00017730430015020228, "loss": 1.723, "step": 57179 }, { "epoch": 1.9, "grad_norm": 0.5808038115501404, "learning_rate": 0.00017729476224185182, "loss": 1.7753, "step": 57180 }, { "epoch": 1.9, "grad_norm": 0.5959522128105164, "learning_rate": 0.00017728522448244737, "loss": 1.6883, "step": 57181 }, { "epoch": 1.9, "grad_norm": 0.6006289124488831, "learning_rate": 0.00017727568687200053, "loss": 1.7622, "step": 57182 }, { "epoch": 1.9, "grad_norm": 0.5928208827972412, "learning_rate": 0.0001772661494105231, "loss": 1.7567, "step": 57183 }, { "epoch": 1.9, "grad_norm": 0.5868100523948669, "learning_rate": 0.00017725661209802628, "loss": 1.7241, "step": 57184 }, { "epoch": 1.9, "grad_norm": 0.5726208686828613, "learning_rate": 0.00017724707493452194, "loss": 1.6851, "step": 57185 }, { "epoch": 1.9, "grad_norm": 0.5739598870277405, "learning_rate": 0.00017723753792002163, "loss": 1.7249, "step": 57186 }, { "epoch": 1.9, "grad_norm": 0.6118518710136414, "learning_rate": 0.00017722800105453675, "loss": 1.773, "step": 57187 }, { "epoch": 1.9, "grad_norm": 0.6011248826980591, "learning_rate": 0.00017721846433807906, "loss": 1.689, "step": 57188 }, { "epoch": 1.9, "grad_norm": 0.6132439970970154, "learning_rate": 0.00017720892777066006, "loss": 1.6953, "step": 57189 }, { "epoch": 1.9, "grad_norm": 0.5801923274993896, "learning_rate": 0.00017719939135229132, "loss": 1.7213, "step": 57190 }, { "epoch": 1.9, "grad_norm": 0.608776330947876, "learning_rate": 0.00017718985508298433, "loss": 1.8473, "step": 57191 }, { "epoch": 1.9, "grad_norm": 0.5696070194244385, "learning_rate": 0.00017718031896275083, "loss": 1.7301, "step": 57192 }, { "epoch": 1.9, "grad_norm": 0.5843757390975952, "learning_rate": 0.00017717078299160242, "loss": 1.7819, "step": 57193 }, { "epoch": 1.9, "grad_norm": 0.5844491720199585, "learning_rate": 0.00017716124716955046, "loss": 1.7938, "step": 57194 }, { "epoch": 1.9, "grad_norm": 0.5868029594421387, "learning_rate": 0.0001771517114966067, "loss": 1.7075, "step": 57195 }, { "epoch": 1.9, "grad_norm": 0.5945838689804077, "learning_rate": 0.00017714217597278268, "loss": 1.7711, "step": 57196 }, { "epoch": 1.9, "grad_norm": 0.5810437202453613, "learning_rate": 0.0001771326405980899, "loss": 1.6941, "step": 57197 }, { "epoch": 1.9, "grad_norm": 0.5835089683532715, "learning_rate": 0.00017712310537253996, "loss": 1.7362, "step": 57198 }, { "epoch": 1.9, "grad_norm": 0.5751386284828186, "learning_rate": 0.00017711357029614463, "loss": 1.7364, "step": 57199 }, { "epoch": 1.9, "grad_norm": 0.5717137455940247, "learning_rate": 0.00017710403536891513, "loss": 1.7583, "step": 57200 }, { "epoch": 1.9, "grad_norm": 0.5900229811668396, "learning_rate": 0.00017709450059086329, "loss": 1.7606, "step": 57201 }, { "epoch": 1.9, "grad_norm": 0.61770099401474, "learning_rate": 0.00017708496596200061, "loss": 1.716, "step": 57202 }, { "epoch": 1.9, "grad_norm": 0.5903173685073853, "learning_rate": 0.00017707543148233877, "loss": 1.8202, "step": 57203 }, { "epoch": 1.9, "grad_norm": 0.5929604768753052, "learning_rate": 0.00017706589715188914, "loss": 1.6907, "step": 57204 }, { "epoch": 1.9, "grad_norm": 0.5901339650154114, "learning_rate": 0.00017705636297066333, "loss": 1.7582, "step": 57205 }, { "epoch": 1.9, "grad_norm": 0.5744034647941589, "learning_rate": 0.00017704682893867322, "loss": 1.7106, "step": 57206 }, { "epoch": 1.9, "grad_norm": 0.597104012966156, "learning_rate": 0.00017703729505592986, "loss": 1.6882, "step": 57207 }, { "epoch": 1.9, "grad_norm": 0.5920062065124512, "learning_rate": 0.00017702776132244528, "loss": 1.6879, "step": 57208 }, { "epoch": 1.9, "grad_norm": 0.5803871154785156, "learning_rate": 0.00017701822773823087, "loss": 1.8163, "step": 57209 }, { "epoch": 1.9, "grad_norm": 0.5831092596054077, "learning_rate": 0.0001770086943032982, "loss": 1.6763, "step": 57210 }, { "epoch": 1.9, "grad_norm": 0.5847409963607788, "learning_rate": 0.00017699916101765882, "loss": 1.7506, "step": 57211 }, { "epoch": 1.9, "grad_norm": 0.6039308905601501, "learning_rate": 0.00017698962788132428, "loss": 1.7615, "step": 57212 }, { "epoch": 1.9, "grad_norm": 0.5870593190193176, "learning_rate": 0.00017698009489430636, "loss": 1.7275, "step": 57213 }, { "epoch": 1.9, "grad_norm": 0.5832770466804504, "learning_rate": 0.00017697056205661632, "loss": 1.7173, "step": 57214 }, { "epoch": 1.9, "grad_norm": 0.565937876701355, "learning_rate": 0.00017696102936826596, "loss": 1.723, "step": 57215 }, { "epoch": 1.9, "grad_norm": 0.5978323221206665, "learning_rate": 0.0001769514968292668, "loss": 1.8064, "step": 57216 }, { "epoch": 1.9, "grad_norm": 0.5851625800132751, "learning_rate": 0.00017694196443963038, "loss": 1.7375, "step": 57217 }, { "epoch": 1.9, "grad_norm": 0.5837740898132324, "learning_rate": 0.0001769324321993683, "loss": 1.7419, "step": 57218 }, { "epoch": 1.9, "grad_norm": 0.6181831955909729, "learning_rate": 0.00017692290010849215, "loss": 1.7776, "step": 57219 }, { "epoch": 1.9, "grad_norm": 0.581476628780365, "learning_rate": 0.00017691336816701337, "loss": 1.7251, "step": 57220 }, { "epoch": 1.9, "grad_norm": 0.6199582815170288, "learning_rate": 0.0001769038363749436, "loss": 1.7528, "step": 57221 }, { "epoch": 1.9, "grad_norm": 0.5758575201034546, "learning_rate": 0.00017689430473229452, "loss": 1.6934, "step": 57222 }, { "epoch": 1.9, "grad_norm": 0.5905568599700928, "learning_rate": 0.00017688477323907762, "loss": 1.7563, "step": 57223 }, { "epoch": 1.9, "grad_norm": 0.5867756605148315, "learning_rate": 0.00017687524189530444, "loss": 1.7146, "step": 57224 }, { "epoch": 1.9, "grad_norm": 0.5844408273696899, "learning_rate": 0.00017686571070098654, "loss": 1.7079, "step": 57225 }, { "epoch": 1.9, "grad_norm": 0.6065728068351746, "learning_rate": 0.00017685617965613562, "loss": 1.7528, "step": 57226 }, { "epoch": 1.9, "grad_norm": 0.585331916809082, "learning_rate": 0.0001768466487607631, "loss": 1.7385, "step": 57227 }, { "epoch": 1.9, "grad_norm": 0.5994712114334106, "learning_rate": 0.0001768371180148805, "loss": 1.7656, "step": 57228 }, { "epoch": 1.9, "grad_norm": 0.6006796360015869, "learning_rate": 0.00017682758741849965, "loss": 1.7565, "step": 57229 }, { "epoch": 1.9, "grad_norm": 0.5826152563095093, "learning_rate": 0.0001768180569716319, "loss": 1.6789, "step": 57230 }, { "epoch": 1.9, "grad_norm": 0.596554696559906, "learning_rate": 0.00017680852667428888, "loss": 1.7138, "step": 57231 }, { "epoch": 1.9, "grad_norm": 0.6362544894218445, "learning_rate": 0.0001767989965264821, "loss": 1.7681, "step": 57232 }, { "epoch": 1.9, "grad_norm": 0.6049484610557556, "learning_rate": 0.0001767894665282233, "loss": 1.7768, "step": 57233 }, { "epoch": 1.9, "grad_norm": 0.5889469981193542, "learning_rate": 0.00017677993667952386, "loss": 1.7235, "step": 57234 }, { "epoch": 1.9, "grad_norm": 0.592491626739502, "learning_rate": 0.00017677040698039535, "loss": 1.6862, "step": 57235 }, { "epoch": 1.9, "grad_norm": 0.606217622756958, "learning_rate": 0.00017676087743084956, "loss": 1.7373, "step": 57236 }, { "epoch": 1.9, "grad_norm": 1.3442847728729248, "learning_rate": 0.00017675134803089783, "loss": 1.7131, "step": 57237 }, { "epoch": 1.9, "grad_norm": 0.6360631585121155, "learning_rate": 0.0001767418187805518, "loss": 1.7638, "step": 57238 }, { "epoch": 1.9, "grad_norm": 0.6086997389793396, "learning_rate": 0.00017673228967982313, "loss": 1.6944, "step": 57239 }, { "epoch": 1.9, "grad_norm": 0.5750600695610046, "learning_rate": 0.0001767227607287232, "loss": 1.7408, "step": 57240 }, { "epoch": 1.9, "grad_norm": 0.6002359390258789, "learning_rate": 0.0001767132319272637, "loss": 1.5972, "step": 57241 }, { "epoch": 1.9, "grad_norm": 0.5918363332748413, "learning_rate": 0.00017670370327545608, "loss": 1.6761, "step": 57242 }, { "epoch": 1.9, "grad_norm": 0.617622435092926, "learning_rate": 0.00017669417477331211, "loss": 1.7178, "step": 57243 }, { "epoch": 1.9, "grad_norm": 0.6332434415817261, "learning_rate": 0.00017668464642084324, "loss": 1.6619, "step": 57244 }, { "epoch": 1.9, "grad_norm": 0.6043184399604797, "learning_rate": 0.00017667511821806102, "loss": 1.7294, "step": 57245 }, { "epoch": 1.9, "grad_norm": 0.6127467751502991, "learning_rate": 0.00017666559016497703, "loss": 1.7884, "step": 57246 }, { "epoch": 1.9, "grad_norm": 0.6005125045776367, "learning_rate": 0.00017665606226160285, "loss": 1.7503, "step": 57247 }, { "epoch": 1.9, "grad_norm": 0.5998417735099792, "learning_rate": 0.00017664653450795, "loss": 1.6432, "step": 57248 }, { "epoch": 1.9, "grad_norm": 0.575567901134491, "learning_rate": 0.0001766370069040302, "loss": 1.7445, "step": 57249 }, { "epoch": 1.9, "grad_norm": 0.6216451525688171, "learning_rate": 0.0001766274794498547, "loss": 1.769, "step": 57250 }, { "epoch": 1.9, "grad_norm": 0.6168114542961121, "learning_rate": 0.0001766179521454354, "loss": 1.7445, "step": 57251 }, { "epoch": 1.9, "grad_norm": 0.575204610824585, "learning_rate": 0.00017660842499078367, "loss": 1.6922, "step": 57252 }, { "epoch": 1.9, "grad_norm": 0.5875694751739502, "learning_rate": 0.0001765988979859112, "loss": 1.8336, "step": 57253 }, { "epoch": 1.9, "grad_norm": 0.5777204632759094, "learning_rate": 0.00017658937113082944, "loss": 1.7353, "step": 57254 }, { "epoch": 1.9, "grad_norm": 0.59480881690979, "learning_rate": 0.00017657984442554994, "loss": 1.6929, "step": 57255 }, { "epoch": 1.9, "grad_norm": 0.5832011103630066, "learning_rate": 0.0001765703178700845, "loss": 1.7228, "step": 57256 }, { "epoch": 1.9, "grad_norm": 0.5936520099639893, "learning_rate": 0.0001765607914644443, "loss": 1.6714, "step": 57257 }, { "epoch": 1.9, "grad_norm": 0.6111495494842529, "learning_rate": 0.00017655126520864123, "loss": 1.7166, "step": 57258 }, { "epoch": 1.91, "grad_norm": 0.6013599634170532, "learning_rate": 0.00017654173910268674, "loss": 1.7068, "step": 57259 }, { "epoch": 1.91, "grad_norm": 0.5847983360290527, "learning_rate": 0.00017653221314659239, "loss": 1.6684, "step": 57260 }, { "epoch": 1.91, "grad_norm": 0.590036928653717, "learning_rate": 0.0001765226873403697, "loss": 1.7365, "step": 57261 }, { "epoch": 1.91, "grad_norm": 0.5794296860694885, "learning_rate": 0.00017651316168403017, "loss": 1.719, "step": 57262 }, { "epoch": 1.91, "grad_norm": 0.5799947381019592, "learning_rate": 0.00017650363617758568, "loss": 1.6783, "step": 57263 }, { "epoch": 1.91, "grad_norm": 0.5793039798736572, "learning_rate": 0.00017649411082104743, "loss": 1.709, "step": 57264 }, { "epoch": 1.91, "grad_norm": 0.6010639667510986, "learning_rate": 0.00017648458561442718, "loss": 1.7246, "step": 57265 }, { "epoch": 1.91, "grad_norm": 0.5839925408363342, "learning_rate": 0.0001764750605577365, "loss": 1.7849, "step": 57266 }, { "epoch": 1.91, "grad_norm": 0.5760990977287292, "learning_rate": 0.00017646553565098683, "loss": 1.6546, "step": 57267 }, { "epoch": 1.91, "grad_norm": 0.5716334581375122, "learning_rate": 0.00017645601089418979, "loss": 1.6944, "step": 57268 }, { "epoch": 1.91, "grad_norm": 0.5672783255577087, "learning_rate": 0.00017644648628735703, "loss": 1.7396, "step": 57269 }, { "epoch": 1.91, "grad_norm": 0.6251777410507202, "learning_rate": 0.00017643696183049996, "loss": 1.7393, "step": 57270 }, { "epoch": 1.91, "grad_norm": 0.6075776815414429, "learning_rate": 0.00017642743752363014, "loss": 1.7676, "step": 57271 }, { "epoch": 1.91, "grad_norm": 0.6101520657539368, "learning_rate": 0.0001764179133667593, "loss": 1.7408, "step": 57272 }, { "epoch": 1.91, "grad_norm": 0.5763036608695984, "learning_rate": 0.00017640838935989892, "loss": 1.722, "step": 57273 }, { "epoch": 1.91, "grad_norm": 0.5934880971908569, "learning_rate": 0.00017639886550306052, "loss": 1.7254, "step": 57274 }, { "epoch": 1.91, "grad_norm": 0.5953794121742249, "learning_rate": 0.00017638934179625568, "loss": 1.6551, "step": 57275 }, { "epoch": 1.91, "grad_norm": 0.581135094165802, "learning_rate": 0.000176379818239496, "loss": 1.7657, "step": 57276 }, { "epoch": 1.91, "grad_norm": 0.613375186920166, "learning_rate": 0.00017637029483279297, "loss": 1.764, "step": 57277 }, { "epoch": 1.91, "grad_norm": 0.5973495841026306, "learning_rate": 0.0001763607715761581, "loss": 1.7157, "step": 57278 }, { "epoch": 1.91, "grad_norm": 0.5869041681289673, "learning_rate": 0.00017635124846960326, "loss": 1.6022, "step": 57279 }, { "epoch": 1.91, "grad_norm": 0.5822877287864685, "learning_rate": 0.00017634172551313955, "loss": 1.7212, "step": 57280 }, { "epoch": 1.91, "grad_norm": 0.5866519212722778, "learning_rate": 0.00017633220270677887, "loss": 1.7226, "step": 57281 }, { "epoch": 1.91, "grad_norm": 0.5934329032897949, "learning_rate": 0.00017632268005053265, "loss": 1.7099, "step": 57282 }, { "epoch": 1.91, "grad_norm": 0.5874645709991455, "learning_rate": 0.00017631315754441256, "loss": 1.8021, "step": 57283 }, { "epoch": 1.91, "grad_norm": 0.5815119743347168, "learning_rate": 0.00017630363518842995, "loss": 1.8082, "step": 57284 }, { "epoch": 1.91, "grad_norm": 0.5983582139015198, "learning_rate": 0.00017629411298259652, "loss": 1.7562, "step": 57285 }, { "epoch": 1.91, "grad_norm": 0.589551568031311, "learning_rate": 0.00017628459092692398, "loss": 1.6915, "step": 57286 }, { "epoch": 1.91, "grad_norm": 0.5964634418487549, "learning_rate": 0.0001762750690214235, "loss": 1.7152, "step": 57287 }, { "epoch": 1.91, "grad_norm": 0.6059650182723999, "learning_rate": 0.0001762655472661069, "loss": 1.7761, "step": 57288 }, { "epoch": 1.91, "grad_norm": 0.5751703977584839, "learning_rate": 0.00017625602566098572, "loss": 1.6637, "step": 57289 }, { "epoch": 1.91, "grad_norm": 0.611743152141571, "learning_rate": 0.00017624650420607155, "loss": 1.6905, "step": 57290 }, { "epoch": 1.91, "grad_norm": 0.5915838479995728, "learning_rate": 0.00017623698290137585, "loss": 1.7101, "step": 57291 }, { "epoch": 1.91, "grad_norm": 0.5936067700386047, "learning_rate": 0.00017622746174691015, "loss": 1.804, "step": 57292 }, { "epoch": 1.91, "grad_norm": 0.5880687236785889, "learning_rate": 0.00017621794074268625, "loss": 1.6441, "step": 57293 }, { "epoch": 1.91, "grad_norm": 0.5880429148674011, "learning_rate": 0.00017620841988871534, "loss": 1.7346, "step": 57294 }, { "epoch": 1.91, "grad_norm": 0.6406511664390564, "learning_rate": 0.00017619889918500927, "loss": 1.7788, "step": 57295 }, { "epoch": 1.91, "grad_norm": 0.5757653713226318, "learning_rate": 0.0001761893786315795, "loss": 1.7372, "step": 57296 }, { "epoch": 1.91, "grad_norm": 0.61673903465271, "learning_rate": 0.00017617985822843753, "loss": 1.7602, "step": 57297 }, { "epoch": 1.91, "grad_norm": 0.5625714063644409, "learning_rate": 0.000176170337975595, "loss": 1.6319, "step": 57298 }, { "epoch": 1.91, "grad_norm": 0.6062814593315125, "learning_rate": 0.00017616081787306335, "loss": 1.7439, "step": 57299 }, { "epoch": 1.91, "grad_norm": 0.5879337787628174, "learning_rate": 0.00017615129792085442, "loss": 1.695, "step": 57300 }, { "epoch": 1.91, "grad_norm": 0.568520724773407, "learning_rate": 0.00017614177811897937, "loss": 1.6783, "step": 57301 }, { "epoch": 1.91, "grad_norm": 0.6313791275024414, "learning_rate": 0.00017613225846745004, "loss": 1.7578, "step": 57302 }, { "epoch": 1.91, "grad_norm": 0.5798341631889343, "learning_rate": 0.0001761227389662779, "loss": 1.7281, "step": 57303 }, { "epoch": 1.91, "grad_norm": 0.6017122864723206, "learning_rate": 0.00017611321961547447, "loss": 1.6811, "step": 57304 }, { "epoch": 1.91, "grad_norm": 0.5730801224708557, "learning_rate": 0.00017610370041505136, "loss": 1.7282, "step": 57305 }, { "epoch": 1.91, "grad_norm": 0.5846092700958252, "learning_rate": 0.00017609418136502013, "loss": 1.7551, "step": 57306 }, { "epoch": 1.91, "grad_norm": 0.5783010721206665, "learning_rate": 0.00017608466246539227, "loss": 1.7473, "step": 57307 }, { "epoch": 1.91, "grad_norm": 0.5871214270591736, "learning_rate": 0.0001760751437161793, "loss": 1.6753, "step": 57308 }, { "epoch": 1.91, "grad_norm": 0.5810860991477966, "learning_rate": 0.0001760656251173929, "loss": 1.7843, "step": 57309 }, { "epoch": 1.91, "grad_norm": 0.5752798914909363, "learning_rate": 0.00017605610666904462, "loss": 1.7529, "step": 57310 }, { "epoch": 1.91, "grad_norm": 0.5844531059265137, "learning_rate": 0.00017604658837114595, "loss": 1.7012, "step": 57311 }, { "epoch": 1.91, "grad_norm": 0.5917012095451355, "learning_rate": 0.00017603707022370842, "loss": 1.7403, "step": 57312 }, { "epoch": 1.91, "grad_norm": 0.5816587209701538, "learning_rate": 0.0001760275522267437, "loss": 1.7066, "step": 57313 }, { "epoch": 1.91, "grad_norm": 0.6138940453529358, "learning_rate": 0.0001760180343802632, "loss": 1.7456, "step": 57314 }, { "epoch": 1.91, "grad_norm": 0.5950987935066223, "learning_rate": 0.00017600851668427846, "loss": 1.6959, "step": 57315 }, { "epoch": 1.91, "grad_norm": 0.5871778726577759, "learning_rate": 0.00017599899913880133, "loss": 1.6792, "step": 57316 }, { "epoch": 1.91, "grad_norm": 0.5957356691360474, "learning_rate": 0.00017598948174384294, "loss": 1.6893, "step": 57317 }, { "epoch": 1.91, "grad_norm": 0.5705294609069824, "learning_rate": 0.0001759799644994151, "loss": 1.7285, "step": 57318 }, { "epoch": 1.91, "grad_norm": 0.5794587731361389, "learning_rate": 0.0001759704474055293, "loss": 1.7459, "step": 57319 }, { "epoch": 1.91, "grad_norm": 0.5711456537246704, "learning_rate": 0.00017596093046219722, "loss": 1.6962, "step": 57320 }, { "epoch": 1.91, "grad_norm": 0.5845057368278503, "learning_rate": 0.00017595141366943018, "loss": 1.796, "step": 57321 }, { "epoch": 1.91, "grad_norm": 0.5897882580757141, "learning_rate": 0.00017594189702723978, "loss": 1.8095, "step": 57322 }, { "epoch": 1.91, "grad_norm": 0.5897399187088013, "learning_rate": 0.00017593238053563787, "loss": 1.7861, "step": 57323 }, { "epoch": 1.91, "grad_norm": 0.5821155905723572, "learning_rate": 0.00017592286419463552, "loss": 1.7096, "step": 57324 }, { "epoch": 1.91, "grad_norm": 0.6017127633094788, "learning_rate": 0.00017591334800424462, "loss": 1.7795, "step": 57325 }, { "epoch": 1.91, "grad_norm": 0.5959872603416443, "learning_rate": 0.00017590383196447673, "loss": 1.7155, "step": 57326 }, { "epoch": 1.91, "grad_norm": 0.5729191899299622, "learning_rate": 0.00017589431607534323, "loss": 1.715, "step": 57327 }, { "epoch": 1.91, "grad_norm": 0.6137257218360901, "learning_rate": 0.00017588480033685572, "loss": 1.725, "step": 57328 }, { "epoch": 1.91, "grad_norm": 0.5775505900382996, "learning_rate": 0.00017587528474902572, "loss": 1.7308, "step": 57329 }, { "epoch": 1.91, "grad_norm": 0.6231070160865784, "learning_rate": 0.000175865769311865, "loss": 1.6491, "step": 57330 }, { "epoch": 1.91, "grad_norm": 0.611429750919342, "learning_rate": 0.0001758562540253848, "loss": 1.6711, "step": 57331 }, { "epoch": 1.91, "grad_norm": 0.5839609503746033, "learning_rate": 0.00017584673888959687, "loss": 1.7955, "step": 57332 }, { "epoch": 1.91, "grad_norm": 0.6167100667953491, "learning_rate": 0.00017583722390451275, "loss": 1.7535, "step": 57333 }, { "epoch": 1.91, "grad_norm": 0.5860170722007751, "learning_rate": 0.00017582770907014392, "loss": 1.6717, "step": 57334 }, { "epoch": 1.91, "grad_norm": 0.5927215218544006, "learning_rate": 0.00017581819438650188, "loss": 1.7757, "step": 57335 }, { "epoch": 1.91, "grad_norm": 0.5904407501220703, "learning_rate": 0.00017580867985359837, "loss": 1.7322, "step": 57336 }, { "epoch": 1.91, "grad_norm": 0.6253118515014648, "learning_rate": 0.00017579916547144474, "loss": 1.7614, "step": 57337 }, { "epoch": 1.91, "grad_norm": 0.6110461354255676, "learning_rate": 0.00017578965124005261, "loss": 1.7649, "step": 57338 }, { "epoch": 1.91, "grad_norm": 0.5938661694526672, "learning_rate": 0.00017578013715943355, "loss": 1.7062, "step": 57339 }, { "epoch": 1.91, "grad_norm": 0.6057673096656799, "learning_rate": 0.0001757706232295992, "loss": 1.7593, "step": 57340 }, { "epoch": 1.91, "grad_norm": 0.6964871883392334, "learning_rate": 0.00017576110945056095, "loss": 1.795, "step": 57341 }, { "epoch": 1.91, "grad_norm": 0.5818383097648621, "learning_rate": 0.00017575159582233036, "loss": 1.699, "step": 57342 }, { "epoch": 1.91, "grad_norm": 0.5989533066749573, "learning_rate": 0.0001757420823449191, "loss": 1.7618, "step": 57343 }, { "epoch": 1.91, "grad_norm": 0.7845578193664551, "learning_rate": 0.0001757325690183386, "loss": 1.7718, "step": 57344 }, { "epoch": 1.91, "grad_norm": 0.5950974225997925, "learning_rate": 0.00017572305584260043, "loss": 1.7098, "step": 57345 }, { "epoch": 1.91, "grad_norm": 0.9601626396179199, "learning_rate": 0.0001757135428177162, "loss": 1.748, "step": 57346 }, { "epoch": 1.91, "grad_norm": 0.6034885048866272, "learning_rate": 0.00017570402994369742, "loss": 1.7862, "step": 57347 }, { "epoch": 1.91, "grad_norm": 0.5889760851860046, "learning_rate": 0.00017569451722055559, "loss": 1.7245, "step": 57348 }, { "epoch": 1.91, "grad_norm": 0.585323691368103, "learning_rate": 0.00017568500464830233, "loss": 1.7601, "step": 57349 }, { "epoch": 1.91, "grad_norm": 0.5833515524864197, "learning_rate": 0.0001756754922269492, "loss": 1.7591, "step": 57350 }, { "epoch": 1.91, "grad_norm": 0.5848813056945801, "learning_rate": 0.00017566597995650766, "loss": 1.7445, "step": 57351 }, { "epoch": 1.91, "grad_norm": 0.5910346508026123, "learning_rate": 0.0001756564678369892, "loss": 1.7338, "step": 57352 }, { "epoch": 1.91, "grad_norm": 0.5773630738258362, "learning_rate": 0.00017564695586840562, "loss": 1.7279, "step": 57353 }, { "epoch": 1.91, "grad_norm": 0.6296313405036926, "learning_rate": 0.00017563744405076827, "loss": 1.7317, "step": 57354 }, { "epoch": 1.91, "grad_norm": 0.5936127305030823, "learning_rate": 0.00017562793238408872, "loss": 1.7475, "step": 57355 }, { "epoch": 1.91, "grad_norm": 0.6118401288986206, "learning_rate": 0.00017561842086837858, "loss": 1.77, "step": 57356 }, { "epoch": 1.91, "grad_norm": 0.5775331854820251, "learning_rate": 0.00017560890950364932, "loss": 1.6599, "step": 57357 }, { "epoch": 1.91, "grad_norm": 0.6101458072662354, "learning_rate": 0.00017559939828991247, "loss": 1.7928, "step": 57358 }, { "epoch": 1.91, "grad_norm": 0.6113364100456238, "learning_rate": 0.0001755898872271796, "loss": 1.6941, "step": 57359 }, { "epoch": 1.91, "grad_norm": 0.6234070062637329, "learning_rate": 0.00017558037631546243, "loss": 1.7339, "step": 57360 }, { "epoch": 1.91, "grad_norm": 0.6061148047447205, "learning_rate": 0.00017557086555477223, "loss": 1.6774, "step": 57361 }, { "epoch": 1.91, "grad_norm": 0.5872924327850342, "learning_rate": 0.00017556135494512068, "loss": 1.6893, "step": 57362 }, { "epoch": 1.91, "grad_norm": 0.5773197412490845, "learning_rate": 0.00017555184448651934, "loss": 1.7179, "step": 57363 }, { "epoch": 1.91, "grad_norm": 0.5779038071632385, "learning_rate": 0.00017554233417897973, "loss": 1.7586, "step": 57364 }, { "epoch": 1.91, "grad_norm": 0.6107012033462524, "learning_rate": 0.00017553282402251328, "loss": 1.7344, "step": 57365 }, { "epoch": 1.91, "grad_norm": 0.613116979598999, "learning_rate": 0.00017552331401713182, "loss": 1.7862, "step": 57366 }, { "epoch": 1.91, "grad_norm": 0.6235618591308594, "learning_rate": 0.00017551380416284658, "loss": 1.7536, "step": 57367 }, { "epoch": 1.91, "grad_norm": 0.9748354554176331, "learning_rate": 0.00017550429445966926, "loss": 1.8226, "step": 57368 }, { "epoch": 1.91, "grad_norm": 0.5887970924377441, "learning_rate": 0.0001754947849076114, "loss": 1.7139, "step": 57369 }, { "epoch": 1.91, "grad_norm": 0.5982781648635864, "learning_rate": 0.0001754852755066846, "loss": 1.7722, "step": 57370 }, { "epoch": 1.91, "grad_norm": 0.571043848991394, "learning_rate": 0.00017547576625690024, "loss": 1.7179, "step": 57371 }, { "epoch": 1.91, "grad_norm": 0.5906434059143066, "learning_rate": 0.0001754662571582699, "loss": 1.7809, "step": 57372 }, { "epoch": 1.91, "grad_norm": 0.6063562035560608, "learning_rate": 0.00017545674821080537, "loss": 1.7323, "step": 57373 }, { "epoch": 1.91, "grad_norm": 0.5849772691726685, "learning_rate": 0.00017544723941451777, "loss": 1.7594, "step": 57374 }, { "epoch": 1.91, "grad_norm": 0.5941389799118042, "learning_rate": 0.000175437730769419, "loss": 1.7239, "step": 57375 }, { "epoch": 1.91, "grad_norm": 0.5975131392478943, "learning_rate": 0.0001754282222755205, "loss": 1.7445, "step": 57376 }, { "epoch": 1.91, "grad_norm": 0.5831208825111389, "learning_rate": 0.0001754187139328337, "loss": 1.7323, "step": 57377 }, { "epoch": 1.91, "grad_norm": 0.5717445611953735, "learning_rate": 0.00017540920574137024, "loss": 1.7008, "step": 57378 }, { "epoch": 1.91, "grad_norm": 0.5979856848716736, "learning_rate": 0.00017539969770114156, "loss": 1.7013, "step": 57379 }, { "epoch": 1.91, "grad_norm": 0.5843128561973572, "learning_rate": 0.00017539018981215952, "loss": 1.7487, "step": 57380 }, { "epoch": 1.91, "grad_norm": 0.585483729839325, "learning_rate": 0.00017538068207443525, "loss": 1.7575, "step": 57381 }, { "epoch": 1.91, "grad_norm": 0.6042578816413879, "learning_rate": 0.0001753711744879805, "loss": 1.6896, "step": 57382 }, { "epoch": 1.91, "grad_norm": 0.5909408926963806, "learning_rate": 0.00017536166705280686, "loss": 1.708, "step": 57383 }, { "epoch": 1.91, "grad_norm": 0.599144458770752, "learning_rate": 0.0001753521597689257, "loss": 1.7823, "step": 57384 }, { "epoch": 1.91, "grad_norm": 0.6136866211891174, "learning_rate": 0.0001753426526363487, "loss": 1.7199, "step": 57385 }, { "epoch": 1.91, "grad_norm": 0.5982818603515625, "learning_rate": 0.00017533314565508737, "loss": 1.7164, "step": 57386 }, { "epoch": 1.91, "grad_norm": 0.6247634291648865, "learning_rate": 0.0001753236388251532, "loss": 1.7985, "step": 57387 }, { "epoch": 1.91, "grad_norm": 0.6043142080307007, "learning_rate": 0.00017531413214655765, "loss": 1.7285, "step": 57388 }, { "epoch": 1.91, "grad_norm": 0.6105950474739075, "learning_rate": 0.0001753046256193125, "loss": 1.7403, "step": 57389 }, { "epoch": 1.91, "grad_norm": 0.6150440573692322, "learning_rate": 0.00017529511924342919, "loss": 1.7272, "step": 57390 }, { "epoch": 1.91, "grad_norm": 0.6178696155548096, "learning_rate": 0.0001752856130189192, "loss": 1.7126, "step": 57391 }, { "epoch": 1.91, "grad_norm": 0.5782614350318909, "learning_rate": 0.00017527610694579405, "loss": 1.7612, "step": 57392 }, { "epoch": 1.91, "grad_norm": 1.6075397729873657, "learning_rate": 0.00017526660102406544, "loss": 1.8632, "step": 57393 }, { "epoch": 1.91, "grad_norm": 0.6041785478591919, "learning_rate": 0.00017525709525374468, "loss": 1.7217, "step": 57394 }, { "epoch": 1.91, "grad_norm": 0.6235259175300598, "learning_rate": 0.0001752475896348434, "loss": 1.8434, "step": 57395 }, { "epoch": 1.91, "grad_norm": 0.6126490235328674, "learning_rate": 0.00017523808416737333, "loss": 1.7688, "step": 57396 }, { "epoch": 1.91, "grad_norm": 0.5875255465507507, "learning_rate": 0.0001752285788513457, "loss": 1.7199, "step": 57397 }, { "epoch": 1.91, "grad_norm": 0.5774497389793396, "learning_rate": 0.0001752190736867722, "loss": 1.6646, "step": 57398 }, { "epoch": 1.91, "grad_norm": 0.5903023481369019, "learning_rate": 0.0001752095686736644, "loss": 1.7322, "step": 57399 }, { "epoch": 1.91, "grad_norm": 0.5969825983047485, "learning_rate": 0.00017520006381203384, "loss": 1.7783, "step": 57400 }, { "epoch": 1.91, "grad_norm": 0.6091139912605286, "learning_rate": 0.00017519055910189194, "loss": 1.7714, "step": 57401 }, { "epoch": 1.91, "grad_norm": 0.6134060621261597, "learning_rate": 0.00017518105454325026, "loss": 1.7171, "step": 57402 }, { "epoch": 1.91, "grad_norm": 0.5776241421699524, "learning_rate": 0.0001751715501361206, "loss": 1.7327, "step": 57403 }, { "epoch": 1.91, "grad_norm": 0.5872099995613098, "learning_rate": 0.00017516204588051407, "loss": 1.7935, "step": 57404 }, { "epoch": 1.91, "grad_norm": 0.5883595943450928, "learning_rate": 0.0001751525417764425, "loss": 1.6919, "step": 57405 }, { "epoch": 1.91, "grad_norm": 0.5644225478172302, "learning_rate": 0.0001751430378239173, "loss": 1.7079, "step": 57406 }, { "epoch": 1.91, "grad_norm": 0.5908227562904358, "learning_rate": 0.00017513353402295013, "loss": 1.7588, "step": 57407 }, { "epoch": 1.91, "grad_norm": 0.5784258246421814, "learning_rate": 0.0001751240303735525, "loss": 1.7561, "step": 57408 }, { "epoch": 1.91, "grad_norm": 0.6003746390342712, "learning_rate": 0.00017511452687573575, "loss": 1.6988, "step": 57409 }, { "epoch": 1.91, "grad_norm": 0.6041626334190369, "learning_rate": 0.00017510502352951164, "loss": 1.6846, "step": 57410 }, { "epoch": 1.91, "grad_norm": 0.5994849801063538, "learning_rate": 0.00017509552033489169, "loss": 1.7233, "step": 57411 }, { "epoch": 1.91, "grad_norm": 0.5844740867614746, "learning_rate": 0.00017508601729188724, "loss": 1.784, "step": 57412 }, { "epoch": 1.91, "grad_norm": 0.5892377495765686, "learning_rate": 0.00017507651440051012, "loss": 1.7883, "step": 57413 }, { "epoch": 1.91, "grad_norm": 0.5883052349090576, "learning_rate": 0.00017506701166077155, "loss": 1.7501, "step": 57414 }, { "epoch": 1.91, "grad_norm": 0.5978346467018127, "learning_rate": 0.00017505750907268334, "loss": 1.696, "step": 57415 }, { "epoch": 1.91, "grad_norm": 0.6230738759040833, "learning_rate": 0.00017504800663625675, "loss": 1.7459, "step": 57416 }, { "epoch": 1.91, "grad_norm": 0.6009508967399597, "learning_rate": 0.00017503850435150365, "loss": 1.7621, "step": 57417 }, { "epoch": 1.91, "grad_norm": 0.599470317363739, "learning_rate": 0.00017502900221843536, "loss": 1.7208, "step": 57418 }, { "epoch": 1.91, "grad_norm": 0.6009764075279236, "learning_rate": 0.00017501950023706333, "loss": 1.7745, "step": 57419 }, { "epoch": 1.91, "grad_norm": 0.5751432776451111, "learning_rate": 0.00017500999840739935, "loss": 1.7084, "step": 57420 }, { "epoch": 1.91, "grad_norm": 0.5806718468666077, "learning_rate": 0.00017500049672945466, "loss": 1.7765, "step": 57421 }, { "epoch": 1.91, "grad_norm": 0.6156861186027527, "learning_rate": 0.0001749909952032411, "loss": 1.6872, "step": 57422 }, { "epoch": 1.91, "grad_norm": 0.5853085517883301, "learning_rate": 0.00017498149382877006, "loss": 1.7203, "step": 57423 }, { "epoch": 1.91, "grad_norm": 0.5947005152702332, "learning_rate": 0.00017497199260605294, "loss": 1.7503, "step": 57424 }, { "epoch": 1.91, "grad_norm": 0.5885940194129944, "learning_rate": 0.00017496249153510146, "loss": 1.714, "step": 57425 }, { "epoch": 1.91, "grad_norm": 0.5916581153869629, "learning_rate": 0.00017495299061592704, "loss": 1.6662, "step": 57426 }, { "epoch": 1.91, "grad_norm": 0.5839270353317261, "learning_rate": 0.00017494348984854136, "loss": 1.7914, "step": 57427 }, { "epoch": 1.91, "grad_norm": 0.5907347798347473, "learning_rate": 0.00017493398923295575, "loss": 1.7201, "step": 57428 }, { "epoch": 1.91, "grad_norm": 0.5869433283805847, "learning_rate": 0.00017492448876918191, "loss": 1.7725, "step": 57429 }, { "epoch": 1.91, "grad_norm": 0.5901448726654053, "learning_rate": 0.0001749149884572314, "loss": 1.606, "step": 57430 }, { "epoch": 1.91, "grad_norm": 0.5868546962738037, "learning_rate": 0.0001749054882971155, "loss": 1.755, "step": 57431 }, { "epoch": 1.91, "grad_norm": 0.5523740649223328, "learning_rate": 0.00017489598828884602, "loss": 1.6354, "step": 57432 }, { "epoch": 1.91, "grad_norm": 0.5899949073791504, "learning_rate": 0.00017488648843243438, "loss": 1.6983, "step": 57433 }, { "epoch": 1.91, "grad_norm": 0.5903195142745972, "learning_rate": 0.000174876988727892, "loss": 1.7176, "step": 57434 }, { "epoch": 1.91, "grad_norm": 0.5820114612579346, "learning_rate": 0.00017486748917523053, "loss": 1.6808, "step": 57435 }, { "epoch": 1.91, "grad_norm": 0.5927003026008606, "learning_rate": 0.0001748579897744616, "loss": 1.6809, "step": 57436 }, { "epoch": 1.91, "grad_norm": 0.5995044708251953, "learning_rate": 0.00017484849052559664, "loss": 1.7208, "step": 57437 }, { "epoch": 1.91, "grad_norm": 0.5895245671272278, "learning_rate": 0.00017483899142864702, "loss": 1.8233, "step": 57438 }, { "epoch": 1.91, "grad_norm": 0.5848045945167542, "learning_rate": 0.00017482949248362456, "loss": 1.7673, "step": 57439 }, { "epoch": 1.91, "grad_norm": 0.6013477444648743, "learning_rate": 0.00017481999369054065, "loss": 1.8043, "step": 57440 }, { "epoch": 1.91, "grad_norm": 0.5917900800704956, "learning_rate": 0.00017481049504940673, "loss": 1.6746, "step": 57441 }, { "epoch": 1.91, "grad_norm": 0.5813334584236145, "learning_rate": 0.0001748009965602344, "loss": 1.7224, "step": 57442 }, { "epoch": 1.91, "grad_norm": 0.5742650032043457, "learning_rate": 0.00017479149822303546, "loss": 1.7191, "step": 57443 }, { "epoch": 1.91, "grad_norm": 0.5653868317604065, "learning_rate": 0.00017478200003782094, "loss": 1.7353, "step": 57444 }, { "epoch": 1.91, "grad_norm": 0.5876443386077881, "learning_rate": 0.00017477250200460261, "loss": 1.7334, "step": 57445 }, { "epoch": 1.91, "grad_norm": 0.6038182973861694, "learning_rate": 0.00017476300412339217, "loss": 1.7203, "step": 57446 }, { "epoch": 1.91, "grad_norm": 0.6083919405937195, "learning_rate": 0.00017475350639420097, "loss": 1.7038, "step": 57447 }, { "epoch": 1.91, "grad_norm": 0.5909854769706726, "learning_rate": 0.00017474400881704045, "loss": 1.6949, "step": 57448 }, { "epoch": 1.91, "grad_norm": 0.581718385219574, "learning_rate": 0.00017473451139192223, "loss": 1.7412, "step": 57449 }, { "epoch": 1.91, "grad_norm": 2.1034939289093018, "learning_rate": 0.0001747250141188581, "loss": 1.7893, "step": 57450 }, { "epoch": 1.91, "grad_norm": 1.1384344100952148, "learning_rate": 0.00017471551699785908, "loss": 1.7701, "step": 57451 }, { "epoch": 1.91, "grad_norm": 0.5953465104103088, "learning_rate": 0.000174706020028937, "loss": 1.6925, "step": 57452 }, { "epoch": 1.91, "grad_norm": 0.6080805659294128, "learning_rate": 0.00017469652321210348, "loss": 1.6785, "step": 57453 }, { "epoch": 1.91, "grad_norm": 0.5727748274803162, "learning_rate": 0.00017468702654736988, "loss": 1.6481, "step": 57454 }, { "epoch": 1.91, "grad_norm": 0.5927760004997253, "learning_rate": 0.00017467753003474768, "loss": 1.7251, "step": 57455 }, { "epoch": 1.91, "grad_norm": 0.6029953956604004, "learning_rate": 0.00017466803367424848, "loss": 1.7548, "step": 57456 }, { "epoch": 1.91, "grad_norm": 0.5763439536094666, "learning_rate": 0.00017465853746588406, "loss": 1.7029, "step": 57457 }, { "epoch": 1.91, "grad_norm": 0.5748143196105957, "learning_rate": 0.0001746490414096655, "loss": 1.7495, "step": 57458 }, { "epoch": 1.91, "grad_norm": 0.574651837348938, "learning_rate": 0.00017463954550560448, "loss": 1.6983, "step": 57459 }, { "epoch": 1.91, "grad_norm": 0.5918043255805969, "learning_rate": 0.00017463004975371268, "loss": 1.7759, "step": 57460 }, { "epoch": 1.91, "grad_norm": 0.6174436807632446, "learning_rate": 0.00017462055415400158, "loss": 1.7761, "step": 57461 }, { "epoch": 1.91, "grad_norm": 0.5836385488510132, "learning_rate": 0.00017461105870648255, "loss": 1.7174, "step": 57462 }, { "epoch": 1.91, "grad_norm": 0.6007050275802612, "learning_rate": 0.0001746015634111673, "loss": 1.7537, "step": 57463 }, { "epoch": 1.91, "grad_norm": 0.5796338319778442, "learning_rate": 0.00017459206826806728, "loss": 1.8009, "step": 57464 }, { "epoch": 1.91, "grad_norm": 0.6155681610107422, "learning_rate": 0.0001745825732771939, "loss": 1.7405, "step": 57465 }, { "epoch": 1.91, "grad_norm": 0.596003532409668, "learning_rate": 0.00017457307843855881, "loss": 1.7705, "step": 57466 }, { "epoch": 1.91, "grad_norm": 0.5841793417930603, "learning_rate": 0.00017456358375217364, "loss": 1.6168, "step": 57467 }, { "epoch": 1.91, "grad_norm": 0.5977972149848938, "learning_rate": 0.00017455408921804975, "loss": 1.7733, "step": 57468 }, { "epoch": 1.91, "grad_norm": 0.6042376756668091, "learning_rate": 0.00017454459483619864, "loss": 1.6829, "step": 57469 }, { "epoch": 1.91, "grad_norm": 0.576898992061615, "learning_rate": 0.00017453510060663202, "loss": 1.744, "step": 57470 }, { "epoch": 1.91, "grad_norm": 0.5833827257156372, "learning_rate": 0.0001745256065293613, "loss": 1.8267, "step": 57471 }, { "epoch": 1.91, "grad_norm": 0.5988215804100037, "learning_rate": 0.00017451611260439786, "loss": 1.7221, "step": 57472 }, { "epoch": 1.91, "grad_norm": 0.5732517242431641, "learning_rate": 0.0001745066188317535, "loss": 1.7196, "step": 57473 }, { "epoch": 1.91, "grad_norm": 0.598501443862915, "learning_rate": 0.00017449712521143952, "loss": 1.7509, "step": 57474 }, { "epoch": 1.91, "grad_norm": 0.592081606388092, "learning_rate": 0.00017448763174346765, "loss": 1.775, "step": 57475 }, { "epoch": 1.91, "grad_norm": 0.6164749264717102, "learning_rate": 0.00017447813842784915, "loss": 1.6954, "step": 57476 }, { "epoch": 1.91, "grad_norm": 0.5815890431404114, "learning_rate": 0.00017446864526459582, "loss": 1.7652, "step": 57477 }, { "epoch": 1.91, "grad_norm": 0.603428304195404, "learning_rate": 0.00017445915225371912, "loss": 1.7869, "step": 57478 }, { "epoch": 1.91, "grad_norm": 0.584631621837616, "learning_rate": 0.00017444965939523034, "loss": 1.7549, "step": 57479 }, { "epoch": 1.91, "grad_norm": 0.6110125780105591, "learning_rate": 0.00017444016668914132, "loss": 1.6802, "step": 57480 }, { "epoch": 1.91, "grad_norm": 0.5825884938240051, "learning_rate": 0.00017443067413546327, "loss": 1.7763, "step": 57481 }, { "epoch": 1.91, "grad_norm": 0.6040770411491394, "learning_rate": 0.00017442118173420805, "loss": 1.7255, "step": 57482 }, { "epoch": 1.91, "grad_norm": 0.5914794206619263, "learning_rate": 0.000174411689485387, "loss": 1.7412, "step": 57483 }, { "epoch": 1.91, "grad_norm": 0.5844026207923889, "learning_rate": 0.0001744021973890115, "loss": 1.6904, "step": 57484 }, { "epoch": 1.91, "grad_norm": 0.5845785140991211, "learning_rate": 0.0001743927054450934, "loss": 1.812, "step": 57485 }, { "epoch": 1.91, "grad_norm": 0.591246485710144, "learning_rate": 0.0001743832136536439, "loss": 1.7287, "step": 57486 }, { "epoch": 1.91, "grad_norm": 0.5876204967498779, "learning_rate": 0.00017437372201467477, "loss": 1.7388, "step": 57487 }, { "epoch": 1.91, "grad_norm": 0.5833049416542053, "learning_rate": 0.00017436423052819732, "loss": 1.6552, "step": 57488 }, { "epoch": 1.91, "grad_norm": 0.5788847804069519, "learning_rate": 0.00017435473919422335, "loss": 1.6623, "step": 57489 }, { "epoch": 1.91, "grad_norm": 0.6262891292572021, "learning_rate": 0.0001743452480127642, "loss": 1.7211, "step": 57490 }, { "epoch": 1.91, "grad_norm": 0.5753971338272095, "learning_rate": 0.0001743357569838312, "loss": 1.7746, "step": 57491 }, { "epoch": 1.91, "grad_norm": 0.5848342776298523, "learning_rate": 0.0001743262661074363, "loss": 1.6748, "step": 57492 }, { "epoch": 1.91, "grad_norm": 0.6007567644119263, "learning_rate": 0.00017431677538359074, "loss": 1.7114, "step": 57493 }, { "epoch": 1.91, "grad_norm": 0.6023811101913452, "learning_rate": 0.00017430728481230602, "loss": 1.7325, "step": 57494 }, { "epoch": 1.91, "grad_norm": 0.5827851891517639, "learning_rate": 0.0001742977943935937, "loss": 1.6887, "step": 57495 }, { "epoch": 1.91, "grad_norm": 0.5808696150779724, "learning_rate": 0.00017428830412746544, "loss": 1.7015, "step": 57496 }, { "epoch": 1.91, "grad_norm": 0.5854523777961731, "learning_rate": 0.0001742788140139327, "loss": 1.6737, "step": 57497 }, { "epoch": 1.91, "grad_norm": 0.579369843006134, "learning_rate": 0.0001742693240530068, "loss": 1.684, "step": 57498 }, { "epoch": 1.91, "grad_norm": 0.5705263614654541, "learning_rate": 0.00017425983424469957, "loss": 1.7007, "step": 57499 }, { "epoch": 1.91, "grad_norm": 0.5956050157546997, "learning_rate": 0.00017425034458902236, "loss": 1.7108, "step": 57500 }, { "epoch": 1.91, "grad_norm": 0.6150107383728027, "learning_rate": 0.00017424085508598657, "loss": 1.6534, "step": 57501 }, { "epoch": 1.91, "grad_norm": 0.6033648252487183, "learning_rate": 0.00017423136573560386, "loss": 1.7785, "step": 57502 }, { "epoch": 1.91, "grad_norm": 0.6220500469207764, "learning_rate": 0.000174221876537886, "loss": 1.703, "step": 57503 }, { "epoch": 1.91, "grad_norm": 0.5974249839782715, "learning_rate": 0.00017421238749284397, "loss": 1.7936, "step": 57504 }, { "epoch": 1.91, "grad_norm": 0.587881326675415, "learning_rate": 0.00017420289860048957, "loss": 1.7951, "step": 57505 }, { "epoch": 1.91, "grad_norm": 0.6064293384552002, "learning_rate": 0.00017419340986083448, "loss": 1.7116, "step": 57506 }, { "epoch": 1.91, "grad_norm": 0.6054440140724182, "learning_rate": 0.00017418392127389, "loss": 1.7519, "step": 57507 }, { "epoch": 1.91, "grad_norm": 0.6018663644790649, "learning_rate": 0.00017417443283966762, "loss": 1.7182, "step": 57508 }, { "epoch": 1.91, "grad_norm": 0.5919182300567627, "learning_rate": 0.00017416494455817894, "loss": 1.6444, "step": 57509 }, { "epoch": 1.91, "grad_norm": 0.5958579778671265, "learning_rate": 0.0001741554564294357, "loss": 1.7405, "step": 57510 }, { "epoch": 1.91, "grad_norm": 0.5833181142807007, "learning_rate": 0.00017414596845344895, "loss": 1.7208, "step": 57511 }, { "epoch": 1.91, "grad_norm": 0.5829282402992249, "learning_rate": 0.00017413648063023044, "loss": 1.7338, "step": 57512 }, { "epoch": 1.91, "grad_norm": 1.0027801990509033, "learning_rate": 0.00017412699295979184, "loss": 1.7564, "step": 57513 }, { "epoch": 1.91, "grad_norm": 0.6293995976448059, "learning_rate": 0.00017411750544214455, "loss": 1.7668, "step": 57514 }, { "epoch": 1.91, "grad_norm": 0.5824753642082214, "learning_rate": 0.0001741080180772999, "loss": 1.692, "step": 57515 }, { "epoch": 1.91, "grad_norm": 0.5906266570091248, "learning_rate": 0.0001740985308652696, "loss": 1.735, "step": 57516 }, { "epoch": 1.91, "grad_norm": 0.5987841486930847, "learning_rate": 0.00017408904380606532, "loss": 1.7533, "step": 57517 }, { "epoch": 1.91, "grad_norm": 0.5975958704948425, "learning_rate": 0.00017407955689969818, "loss": 1.7235, "step": 57518 }, { "epoch": 1.91, "grad_norm": 0.6093045473098755, "learning_rate": 0.00017407007014617993, "loss": 1.7321, "step": 57519 }, { "epoch": 1.91, "grad_norm": 0.5897626876831055, "learning_rate": 0.00017406058354552215, "loss": 1.6994, "step": 57520 }, { "epoch": 1.91, "grad_norm": 0.5996557474136353, "learning_rate": 0.00017405109709773625, "loss": 1.6902, "step": 57521 }, { "epoch": 1.91, "grad_norm": 0.6060288548469543, "learning_rate": 0.00017404161080283365, "loss": 1.7203, "step": 57522 }, { "epoch": 1.91, "grad_norm": 0.5665626525878906, "learning_rate": 0.0001740321246608261, "loss": 1.7286, "step": 57523 }, { "epoch": 1.91, "grad_norm": 0.5782188773155212, "learning_rate": 0.0001740226386717249, "loss": 1.711, "step": 57524 }, { "epoch": 1.91, "grad_norm": 0.5782407522201538, "learning_rate": 0.00017401315283554174, "loss": 1.6542, "step": 57525 }, { "epoch": 1.91, "grad_norm": 0.5915840864181519, "learning_rate": 0.00017400366715228794, "loss": 1.7151, "step": 57526 }, { "epoch": 1.91, "grad_norm": 0.5808777809143066, "learning_rate": 0.00017399418162197523, "loss": 1.7089, "step": 57527 }, { "epoch": 1.91, "grad_norm": 0.5930199027061462, "learning_rate": 0.00017398469624461507, "loss": 1.6598, "step": 57528 }, { "epoch": 1.91, "grad_norm": 0.5785400867462158, "learning_rate": 0.00017397521102021873, "loss": 1.6483, "step": 57529 }, { "epoch": 1.91, "grad_norm": 0.5661877989768982, "learning_rate": 0.00017396572594879806, "loss": 1.6725, "step": 57530 }, { "epoch": 1.91, "grad_norm": 0.5694593787193298, "learning_rate": 0.00017395624103036433, "loss": 1.6701, "step": 57531 }, { "epoch": 1.91, "grad_norm": 0.5845058560371399, "learning_rate": 0.00017394675626492928, "loss": 1.7255, "step": 57532 }, { "epoch": 1.91, "grad_norm": 0.6169214844703674, "learning_rate": 0.0001739372716525041, "loss": 1.753, "step": 57533 }, { "epoch": 1.91, "grad_norm": 0.5845785140991211, "learning_rate": 0.0001739277871931007, "loss": 1.7339, "step": 57534 }, { "epoch": 1.91, "grad_norm": 0.6107629537582397, "learning_rate": 0.00017391830288673036, "loss": 1.8563, "step": 57535 }, { "epoch": 1.91, "grad_norm": 0.5775308609008789, "learning_rate": 0.00017390881873340447, "loss": 1.8028, "step": 57536 }, { "epoch": 1.91, "grad_norm": 0.6214895248413086, "learning_rate": 0.00017389933473313486, "loss": 1.7701, "step": 57537 }, { "epoch": 1.91, "grad_norm": 0.6453900933265686, "learning_rate": 0.00017388985088593272, "loss": 1.7906, "step": 57538 }, { "epoch": 1.91, "grad_norm": 0.5982986092567444, "learning_rate": 0.0001738803671918099, "loss": 1.7463, "step": 57539 }, { "epoch": 1.91, "grad_norm": 0.6055513620376587, "learning_rate": 0.0001738708836507777, "loss": 1.8017, "step": 57540 }, { "epoch": 1.91, "grad_norm": 0.5916498899459839, "learning_rate": 0.00017386140026284753, "loss": 1.7639, "step": 57541 }, { "epoch": 1.91, "grad_norm": 0.6048485040664673, "learning_rate": 0.00017385191702803113, "loss": 1.6814, "step": 57542 }, { "epoch": 1.91, "grad_norm": 0.605006217956543, "learning_rate": 0.00017384243394633983, "loss": 1.6783, "step": 57543 }, { "epoch": 1.91, "grad_norm": 0.6309384107589722, "learning_rate": 0.00017383295101778537, "loss": 1.8098, "step": 57544 }, { "epoch": 1.91, "grad_norm": 0.6055126786231995, "learning_rate": 0.00017382346824237899, "loss": 1.7066, "step": 57545 }, { "epoch": 1.91, "grad_norm": 0.5974142551422119, "learning_rate": 0.00017381398562013242, "loss": 1.6879, "step": 57546 }, { "epoch": 1.91, "grad_norm": 0.6204231977462769, "learning_rate": 0.00017380450315105706, "loss": 1.7638, "step": 57547 }, { "epoch": 1.91, "grad_norm": 0.5734055042266846, "learning_rate": 0.00017379502083516438, "loss": 1.6643, "step": 57548 }, { "epoch": 1.91, "grad_norm": 0.598029375076294, "learning_rate": 0.00017378553867246604, "loss": 1.7447, "step": 57549 }, { "epoch": 1.91, "grad_norm": 0.6143290996551514, "learning_rate": 0.00017377605666297344, "loss": 1.7803, "step": 57550 }, { "epoch": 1.91, "grad_norm": 0.5937637090682983, "learning_rate": 0.000173766574806698, "loss": 1.7266, "step": 57551 }, { "epoch": 1.91, "grad_norm": 0.6025023460388184, "learning_rate": 0.00017375709310365136, "loss": 1.6797, "step": 57552 }, { "epoch": 1.91, "grad_norm": 0.5895469188690186, "learning_rate": 0.0001737476115538451, "loss": 1.7502, "step": 57553 }, { "epoch": 1.91, "grad_norm": 0.63545823097229, "learning_rate": 0.00017373813015729068, "loss": 1.8796, "step": 57554 }, { "epoch": 1.91, "grad_norm": 0.5922671556472778, "learning_rate": 0.0001737286489139994, "loss": 1.7125, "step": 57555 }, { "epoch": 1.91, "grad_norm": 0.5780951976776123, "learning_rate": 0.00017371916782398309, "loss": 1.7136, "step": 57556 }, { "epoch": 1.91, "grad_norm": 0.6022705435752869, "learning_rate": 0.00017370968688725308, "loss": 1.7221, "step": 57557 }, { "epoch": 1.91, "grad_norm": 0.5864066481590271, "learning_rate": 0.00017370020610382077, "loss": 1.7325, "step": 57558 }, { "epoch": 1.91, "grad_norm": 0.6045368313789368, "learning_rate": 0.00017369072547369782, "loss": 1.8153, "step": 57559 }, { "epoch": 1.92, "grad_norm": 0.6020723581314087, "learning_rate": 0.00017368124499689595, "loss": 1.6419, "step": 57560 }, { "epoch": 1.92, "grad_norm": 0.5950978398323059, "learning_rate": 0.00017367176467342622, "loss": 1.7894, "step": 57561 }, { "epoch": 1.92, "grad_norm": 2.3452939987182617, "learning_rate": 0.0001736622845033003, "loss": 1.809, "step": 57562 }, { "epoch": 1.92, "grad_norm": 0.5843443274497986, "learning_rate": 0.0001736528044865299, "loss": 1.7562, "step": 57563 }, { "epoch": 1.92, "grad_norm": 0.595836341381073, "learning_rate": 0.00017364332462312644, "loss": 1.7191, "step": 57564 }, { "epoch": 1.92, "grad_norm": 0.5846635103225708, "learning_rate": 0.00017363384491310118, "loss": 1.7662, "step": 57565 }, { "epoch": 1.92, "grad_norm": 0.5879071950912476, "learning_rate": 0.00017362436535646583, "loss": 1.6962, "step": 57566 }, { "epoch": 1.92, "grad_norm": 0.6040590405464172, "learning_rate": 0.00017361488595323213, "loss": 1.7116, "step": 57567 }, { "epoch": 1.92, "grad_norm": 0.5993356704711914, "learning_rate": 0.00017360540670341106, "loss": 1.7255, "step": 57568 }, { "epoch": 1.92, "grad_norm": 0.6036584973335266, "learning_rate": 0.00017359592760701441, "loss": 1.7201, "step": 57569 }, { "epoch": 1.92, "grad_norm": 0.5992217063903809, "learning_rate": 0.00017358644866405384, "loss": 1.803, "step": 57570 }, { "epoch": 1.92, "grad_norm": 0.6147229671478271, "learning_rate": 0.00017357696987454067, "loss": 1.6475, "step": 57571 }, { "epoch": 1.92, "grad_norm": 0.8991996049880981, "learning_rate": 0.00017356749123848632, "loss": 1.79, "step": 57572 }, { "epoch": 1.92, "grad_norm": 0.6030752062797546, "learning_rate": 0.00017355801275590238, "loss": 1.7601, "step": 57573 }, { "epoch": 1.92, "grad_norm": 0.6094349026679993, "learning_rate": 0.00017354853442680065, "loss": 1.6947, "step": 57574 }, { "epoch": 1.92, "grad_norm": 0.6118741631507874, "learning_rate": 0.00017353905625119208, "loss": 1.7275, "step": 57575 }, { "epoch": 1.92, "grad_norm": 0.5949523448944092, "learning_rate": 0.00017352957822908854, "loss": 1.7336, "step": 57576 }, { "epoch": 1.92, "grad_norm": 0.6091657280921936, "learning_rate": 0.00017352010036050154, "loss": 1.6864, "step": 57577 }, { "epoch": 1.92, "grad_norm": 0.6091935038566589, "learning_rate": 0.00017351062264544246, "loss": 1.8304, "step": 57578 }, { "epoch": 1.92, "grad_norm": 0.6367812752723694, "learning_rate": 0.0001735011450839228, "loss": 1.708, "step": 57579 }, { "epoch": 1.92, "grad_norm": 0.5943264961242676, "learning_rate": 0.0001734916676759542, "loss": 1.7904, "step": 57580 }, { "epoch": 1.92, "grad_norm": 0.5836728811264038, "learning_rate": 0.00017348219042154808, "loss": 1.6607, "step": 57581 }, { "epoch": 1.92, "grad_norm": 0.6024505496025085, "learning_rate": 0.00017347271332071582, "loss": 1.6867, "step": 57582 }, { "epoch": 1.92, "grad_norm": 0.6127176284790039, "learning_rate": 0.000173463236373469, "loss": 1.7243, "step": 57583 }, { "epoch": 1.92, "grad_norm": 0.6064069271087646, "learning_rate": 0.00017345375957981933, "loss": 1.6994, "step": 57584 }, { "epoch": 1.92, "grad_norm": 0.5969660878181458, "learning_rate": 0.00017344428293977816, "loss": 1.7493, "step": 57585 }, { "epoch": 1.92, "grad_norm": 0.6098523736000061, "learning_rate": 0.00017343480645335685, "loss": 1.7754, "step": 57586 }, { "epoch": 1.92, "grad_norm": 0.5939704775810242, "learning_rate": 0.0001734253301205671, "loss": 1.7165, "step": 57587 }, { "epoch": 1.92, "grad_norm": 0.5967133045196533, "learning_rate": 0.0001734158539414204, "loss": 1.6502, "step": 57588 }, { "epoch": 1.92, "grad_norm": 0.6012501120567322, "learning_rate": 0.00017340637791592812, "loss": 1.6935, "step": 57589 }, { "epoch": 1.92, "grad_norm": 0.6124951839447021, "learning_rate": 0.00017339690204410193, "loss": 1.7989, "step": 57590 }, { "epoch": 1.92, "grad_norm": 0.6066516041755676, "learning_rate": 0.00017338742632595314, "loss": 1.7079, "step": 57591 }, { "epoch": 1.92, "grad_norm": 0.5743032097816467, "learning_rate": 0.00017337795076149346, "loss": 1.6728, "step": 57592 }, { "epoch": 1.92, "grad_norm": 0.5983273386955261, "learning_rate": 0.00017336847535073417, "loss": 1.7155, "step": 57593 }, { "epoch": 1.92, "grad_norm": 0.6770734786987305, "learning_rate": 0.00017335900009368699, "loss": 1.7661, "step": 57594 }, { "epoch": 1.92, "grad_norm": 0.5936694741249084, "learning_rate": 0.00017334952499036338, "loss": 1.7743, "step": 57595 }, { "epoch": 1.92, "grad_norm": 0.5984781384468079, "learning_rate": 0.0001733400500407746, "loss": 1.7348, "step": 57596 }, { "epoch": 1.92, "grad_norm": 0.5911133289337158, "learning_rate": 0.0001733305752449325, "loss": 1.6725, "step": 57597 }, { "epoch": 1.92, "grad_norm": 0.5927721858024597, "learning_rate": 0.0001733211006028483, "loss": 1.7387, "step": 57598 }, { "epoch": 1.92, "grad_norm": 0.587360680103302, "learning_rate": 0.00017331162611453375, "loss": 1.7048, "step": 57599 }, { "epoch": 1.92, "grad_norm": 0.5705351233482361, "learning_rate": 0.0001733021517800002, "loss": 1.7388, "step": 57600 }, { "epoch": 1.92, "grad_norm": 0.5945654511451721, "learning_rate": 0.00017329267759925903, "loss": 1.7165, "step": 57601 }, { "epoch": 1.92, "grad_norm": 0.5768080949783325, "learning_rate": 0.00017328320357232205, "loss": 1.7236, "step": 57602 }, { "epoch": 1.92, "grad_norm": 0.5787280201911926, "learning_rate": 0.00017327372969920044, "loss": 1.7871, "step": 57603 }, { "epoch": 1.92, "grad_norm": 0.6021968722343445, "learning_rate": 0.00017326425597990596, "loss": 1.7115, "step": 57604 }, { "epoch": 1.92, "grad_norm": 0.5723342299461365, "learning_rate": 0.00017325478241444987, "loss": 1.7603, "step": 57605 }, { "epoch": 1.92, "grad_norm": 0.5816193222999573, "learning_rate": 0.00017324530900284397, "loss": 1.7358, "step": 57606 }, { "epoch": 1.92, "grad_norm": 0.5874594449996948, "learning_rate": 0.00017323583574509953, "loss": 1.7735, "step": 57607 }, { "epoch": 1.92, "grad_norm": 0.577950656414032, "learning_rate": 0.000173226362641228, "loss": 1.7566, "step": 57608 }, { "epoch": 1.92, "grad_norm": 0.6059481501579285, "learning_rate": 0.00017321688969124113, "loss": 1.7584, "step": 57609 }, { "epoch": 1.92, "grad_norm": 0.6218649744987488, "learning_rate": 0.00017320741689515027, "loss": 1.713, "step": 57610 }, { "epoch": 1.92, "grad_norm": 0.6094450354576111, "learning_rate": 0.00017319794425296682, "loss": 1.7412, "step": 57611 }, { "epoch": 1.92, "grad_norm": 0.6043369770050049, "learning_rate": 0.00017318847176470232, "loss": 1.8155, "step": 57612 }, { "epoch": 1.92, "grad_norm": 0.5699974894523621, "learning_rate": 0.0001731789994303685, "loss": 1.6906, "step": 57613 }, { "epoch": 1.92, "grad_norm": 0.5956308245658875, "learning_rate": 0.0001731695272499767, "loss": 1.714, "step": 57614 }, { "epoch": 1.92, "grad_norm": 0.5932124853134155, "learning_rate": 0.00017316005522353823, "loss": 1.7684, "step": 57615 }, { "epoch": 1.92, "grad_norm": 0.6002540588378906, "learning_rate": 0.00017315058335106494, "loss": 1.7342, "step": 57616 }, { "epoch": 1.92, "grad_norm": 0.6036220192909241, "learning_rate": 0.00017314111163256815, "loss": 1.7373, "step": 57617 }, { "epoch": 1.92, "grad_norm": 0.6231599450111389, "learning_rate": 0.0001731316400680592, "loss": 1.7423, "step": 57618 }, { "epoch": 1.92, "grad_norm": 0.5851954221725464, "learning_rate": 0.00017312216865754979, "loss": 1.7483, "step": 57619 }, { "epoch": 1.92, "grad_norm": 0.5746564269065857, "learning_rate": 0.0001731126974010516, "loss": 1.7349, "step": 57620 }, { "epoch": 1.92, "grad_norm": 0.5831764340400696, "learning_rate": 0.00017310322629857564, "loss": 1.7228, "step": 57621 }, { "epoch": 1.92, "grad_norm": 0.5645751953125, "learning_rate": 0.00017309375535013366, "loss": 1.7764, "step": 57622 }, { "epoch": 1.92, "grad_norm": 0.5828089118003845, "learning_rate": 0.00017308428455573732, "loss": 1.7377, "step": 57623 }, { "epoch": 1.92, "grad_norm": 0.604864239692688, "learning_rate": 0.00017307481391539792, "loss": 1.7139, "step": 57624 }, { "epoch": 1.92, "grad_norm": 0.6269627213478088, "learning_rate": 0.00017306534342912688, "loss": 1.7756, "step": 57625 }, { "epoch": 1.92, "grad_norm": 0.6027111411094666, "learning_rate": 0.00017305587309693582, "loss": 1.8014, "step": 57626 }, { "epoch": 1.92, "grad_norm": 0.5942143797874451, "learning_rate": 0.00017304640291883647, "loss": 1.7006, "step": 57627 }, { "epoch": 1.92, "grad_norm": 0.5773611068725586, "learning_rate": 0.00017303693289483986, "loss": 1.7203, "step": 57628 }, { "epoch": 1.92, "grad_norm": 0.5989490151405334, "learning_rate": 0.00017302746302495767, "loss": 1.6776, "step": 57629 }, { "epoch": 1.92, "grad_norm": 0.5947979092597961, "learning_rate": 0.00017301799330920155, "loss": 1.7658, "step": 57630 }, { "epoch": 1.92, "grad_norm": 0.5858624577522278, "learning_rate": 0.0001730085237475829, "loss": 1.719, "step": 57631 }, { "epoch": 1.92, "grad_norm": 0.6191486120223999, "learning_rate": 0.00017299905434011305, "loss": 1.7721, "step": 57632 }, { "epoch": 1.92, "grad_norm": 0.6039458513259888, "learning_rate": 0.00017298958508680365, "loss": 1.6957, "step": 57633 }, { "epoch": 1.92, "grad_norm": 0.6003793478012085, "learning_rate": 0.00017298011598766643, "loss": 1.6946, "step": 57634 }, { "epoch": 1.92, "grad_norm": 0.5756292939186096, "learning_rate": 0.0001729706470427124, "loss": 1.7545, "step": 57635 }, { "epoch": 1.92, "grad_norm": 0.5901249647140503, "learning_rate": 0.00017296117825195325, "loss": 1.7525, "step": 57636 }, { "epoch": 1.92, "grad_norm": 0.5984951853752136, "learning_rate": 0.00017295170961540064, "loss": 1.7093, "step": 57637 }, { "epoch": 1.92, "grad_norm": 0.5827994346618652, "learning_rate": 0.00017294224113306595, "loss": 1.6877, "step": 57638 }, { "epoch": 1.92, "grad_norm": 0.6141384840011597, "learning_rate": 0.00017293277280496053, "loss": 1.6573, "step": 57639 }, { "epoch": 1.92, "grad_norm": 0.5994029641151428, "learning_rate": 0.00017292330463109613, "loss": 1.7852, "step": 57640 }, { "epoch": 1.92, "grad_norm": 0.6130708456039429, "learning_rate": 0.00017291383661148396, "loss": 1.716, "step": 57641 }, { "epoch": 1.92, "grad_norm": 0.6235649585723877, "learning_rate": 0.00017290436874613585, "loss": 1.6826, "step": 57642 }, { "epoch": 1.92, "grad_norm": 0.6083130836486816, "learning_rate": 0.00017289490103506293, "loss": 1.7343, "step": 57643 }, { "epoch": 1.92, "grad_norm": 0.5708426237106323, "learning_rate": 0.00017288543347827705, "loss": 1.6938, "step": 57644 }, { "epoch": 1.92, "grad_norm": 0.5874281525611877, "learning_rate": 0.00017287596607578948, "loss": 1.7036, "step": 57645 }, { "epoch": 1.92, "grad_norm": 0.6080272793769836, "learning_rate": 0.00017286649882761166, "loss": 1.7255, "step": 57646 }, { "epoch": 1.92, "grad_norm": 0.6143455505371094, "learning_rate": 0.00017285703173375527, "loss": 1.7126, "step": 57647 }, { "epoch": 1.92, "grad_norm": 0.596648633480072, "learning_rate": 0.0001728475647942316, "loss": 1.761, "step": 57648 }, { "epoch": 1.92, "grad_norm": 0.6157499551773071, "learning_rate": 0.0001728380980090524, "loss": 1.7404, "step": 57649 }, { "epoch": 1.92, "grad_norm": 0.616073727607727, "learning_rate": 0.0001728286313782288, "loss": 1.8083, "step": 57650 }, { "epoch": 1.92, "grad_norm": 0.5946497321128845, "learning_rate": 0.00017281916490177274, "loss": 1.7611, "step": 57651 }, { "epoch": 1.92, "grad_norm": 0.59651780128479, "learning_rate": 0.00017280969857969542, "loss": 1.6889, "step": 57652 }, { "epoch": 1.92, "grad_norm": 0.5951414108276367, "learning_rate": 0.00017280023241200828, "loss": 1.7771, "step": 57653 }, { "epoch": 1.92, "grad_norm": 0.6671054363250732, "learning_rate": 0.00017279076639872303, "loss": 1.7632, "step": 57654 }, { "epoch": 1.92, "grad_norm": 0.6008744239807129, "learning_rate": 0.00017278130053985092, "loss": 1.756, "step": 57655 }, { "epoch": 1.92, "grad_norm": 2.4266719818115234, "learning_rate": 0.0001727718348354037, "loss": 1.6576, "step": 57656 }, { "epoch": 1.92, "grad_norm": 0.590990424156189, "learning_rate": 0.00017276236928539268, "loss": 1.7114, "step": 57657 }, { "epoch": 1.92, "grad_norm": 0.5854730606079102, "learning_rate": 0.00017275290388982933, "loss": 1.6991, "step": 57658 }, { "epoch": 1.92, "grad_norm": 0.6060678958892822, "learning_rate": 0.0001727434386487253, "loss": 1.806, "step": 57659 }, { "epoch": 1.92, "grad_norm": 0.5795003771781921, "learning_rate": 0.00017273397356209192, "loss": 1.7029, "step": 57660 }, { "epoch": 1.92, "grad_norm": 0.6012661457061768, "learning_rate": 0.0001727245086299408, "loss": 1.6726, "step": 57661 }, { "epoch": 1.92, "grad_norm": 0.5964775681495667, "learning_rate": 0.00017271504385228326, "loss": 1.7063, "step": 57662 }, { "epoch": 1.92, "grad_norm": 0.6057835817337036, "learning_rate": 0.00017270557922913104, "loss": 1.7576, "step": 57663 }, { "epoch": 1.92, "grad_norm": 0.5818461179733276, "learning_rate": 0.0001726961147604955, "loss": 1.7748, "step": 57664 }, { "epoch": 1.92, "grad_norm": 0.5900220274925232, "learning_rate": 0.000172686650446388, "loss": 1.7044, "step": 57665 }, { "epoch": 1.92, "grad_norm": 0.5697636604309082, "learning_rate": 0.00017267718628682024, "loss": 1.6887, "step": 57666 }, { "epoch": 1.92, "grad_norm": 0.5885265469551086, "learning_rate": 0.00017266772228180364, "loss": 1.6962, "step": 57667 }, { "epoch": 1.92, "grad_norm": 0.5812827348709106, "learning_rate": 0.00017265825843134952, "loss": 1.7508, "step": 57668 }, { "epoch": 1.92, "grad_norm": 0.5686988234519958, "learning_rate": 0.0001726487947354695, "loss": 1.7559, "step": 57669 }, { "epoch": 1.92, "grad_norm": 0.5956637263298035, "learning_rate": 0.00017263933119417523, "loss": 1.7269, "step": 57670 }, { "epoch": 1.92, "grad_norm": 0.5816628932952881, "learning_rate": 0.00017262986780747802, "loss": 1.6891, "step": 57671 }, { "epoch": 1.92, "grad_norm": 0.5871933102607727, "learning_rate": 0.0001726204045753893, "loss": 1.734, "step": 57672 }, { "epoch": 1.92, "grad_norm": 0.5694074034690857, "learning_rate": 0.0001726109414979207, "loss": 1.7412, "step": 57673 }, { "epoch": 1.92, "grad_norm": 0.6049656867980957, "learning_rate": 0.00017260147857508373, "loss": 1.7068, "step": 57674 }, { "epoch": 1.92, "grad_norm": 0.6027247905731201, "learning_rate": 0.00017259201580688963, "loss": 1.6691, "step": 57675 }, { "epoch": 1.92, "grad_norm": 0.5680949091911316, "learning_rate": 0.00017258255319335005, "loss": 1.7128, "step": 57676 }, { "epoch": 1.92, "grad_norm": 0.568350076675415, "learning_rate": 0.0001725730907344767, "loss": 1.7327, "step": 57677 }, { "epoch": 1.92, "grad_norm": 0.5828070640563965, "learning_rate": 0.00017256362843028067, "loss": 1.7382, "step": 57678 }, { "epoch": 1.92, "grad_norm": 0.5817134976387024, "learning_rate": 0.00017255416628077355, "loss": 1.7688, "step": 57679 }, { "epoch": 1.92, "grad_norm": 0.5824872255325317, "learning_rate": 0.000172544704285967, "loss": 1.6998, "step": 57680 }, { "epoch": 1.92, "grad_norm": 0.58518385887146, "learning_rate": 0.00017253524244587243, "loss": 1.7586, "step": 57681 }, { "epoch": 1.92, "grad_norm": 0.608892023563385, "learning_rate": 0.00017252578076050118, "loss": 1.7569, "step": 57682 }, { "epoch": 1.92, "grad_norm": 0.5966166853904724, "learning_rate": 0.00017251631922986487, "loss": 1.7303, "step": 57683 }, { "epoch": 1.92, "grad_norm": 0.5903090834617615, "learning_rate": 0.0001725068578539752, "loss": 1.7312, "step": 57684 }, { "epoch": 1.92, "grad_norm": 0.5898618102073669, "learning_rate": 0.00017249739663284313, "loss": 1.7354, "step": 57685 }, { "epoch": 1.92, "grad_norm": 0.6034262776374817, "learning_rate": 0.00017248793556648046, "loss": 1.8019, "step": 57686 }, { "epoch": 1.92, "grad_norm": 0.6067445874214172, "learning_rate": 0.00017247847465489882, "loss": 1.6738, "step": 57687 }, { "epoch": 1.92, "grad_norm": 0.5988931655883789, "learning_rate": 0.00017246901389810943, "loss": 1.7844, "step": 57688 }, { "epoch": 1.92, "grad_norm": 0.5971362590789795, "learning_rate": 0.0001724595532961238, "loss": 1.8174, "step": 57689 }, { "epoch": 1.92, "grad_norm": 0.5838363170623779, "learning_rate": 0.0001724500928489535, "loss": 1.7097, "step": 57690 }, { "epoch": 1.92, "grad_norm": 0.6003477573394775, "learning_rate": 0.0001724406325566102, "loss": 1.8098, "step": 57691 }, { "epoch": 1.92, "grad_norm": 0.5878250002861023, "learning_rate": 0.00017243117241910492, "loss": 1.748, "step": 57692 }, { "epoch": 1.92, "grad_norm": 0.5991796851158142, "learning_rate": 0.00017242171243644943, "loss": 1.7447, "step": 57693 }, { "epoch": 1.92, "grad_norm": 0.5959569811820984, "learning_rate": 0.0001724122526086553, "loss": 1.7907, "step": 57694 }, { "epoch": 1.92, "grad_norm": 0.6167302131652832, "learning_rate": 0.00017240279293573386, "loss": 1.7474, "step": 57695 }, { "epoch": 1.92, "grad_norm": 0.5830578207969666, "learning_rate": 0.0001723933334176966, "loss": 1.7201, "step": 57696 }, { "epoch": 1.92, "grad_norm": 0.5782203078269958, "learning_rate": 0.00017238387405455507, "loss": 1.749, "step": 57697 }, { "epoch": 1.92, "grad_norm": 0.6156362891197205, "learning_rate": 0.00017237441484632075, "loss": 1.6842, "step": 57698 }, { "epoch": 1.92, "grad_norm": 0.6077248454093933, "learning_rate": 0.00017236495579300495, "loss": 1.6951, "step": 57699 }, { "epoch": 1.92, "grad_norm": 0.5831353664398193, "learning_rate": 0.00017235549689461927, "loss": 1.7818, "step": 57700 }, { "epoch": 1.92, "grad_norm": 0.5882256031036377, "learning_rate": 0.00017234603815117534, "loss": 1.7444, "step": 57701 }, { "epoch": 1.92, "grad_norm": 0.6078891754150391, "learning_rate": 0.00017233657956268453, "loss": 1.6445, "step": 57702 }, { "epoch": 1.92, "grad_norm": 0.5821759104728699, "learning_rate": 0.00017232712112915817, "loss": 1.7205, "step": 57703 }, { "epoch": 1.92, "grad_norm": 0.5662293434143066, "learning_rate": 0.00017231766285060796, "loss": 1.7278, "step": 57704 }, { "epoch": 1.92, "grad_norm": 0.5875909924507141, "learning_rate": 0.00017230820472704535, "loss": 1.7409, "step": 57705 }, { "epoch": 1.92, "grad_norm": 0.6166378259658813, "learning_rate": 0.00017229874675848164, "loss": 1.7071, "step": 57706 }, { "epoch": 1.92, "grad_norm": 0.6060569286346436, "learning_rate": 0.00017228928894492853, "loss": 1.7175, "step": 57707 }, { "epoch": 1.92, "grad_norm": 0.599075973033905, "learning_rate": 0.0001722798312863973, "loss": 1.7822, "step": 57708 }, { "epoch": 1.92, "grad_norm": 0.5958131551742554, "learning_rate": 0.00017227037378289965, "loss": 1.7204, "step": 57709 }, { "epoch": 1.92, "grad_norm": 0.5956994891166687, "learning_rate": 0.00017226091643444684, "loss": 1.771, "step": 57710 }, { "epoch": 1.92, "grad_norm": 0.6015878915786743, "learning_rate": 0.00017225145924105057, "loss": 1.7725, "step": 57711 }, { "epoch": 1.92, "grad_norm": 0.5777137279510498, "learning_rate": 0.00017224200220272218, "loss": 1.6679, "step": 57712 }, { "epoch": 1.92, "grad_norm": 0.5864192843437195, "learning_rate": 0.00017223254531947306, "loss": 1.7299, "step": 57713 }, { "epoch": 1.92, "grad_norm": 0.6131381988525391, "learning_rate": 0.00017222308859131497, "loss": 1.7194, "step": 57714 }, { "epoch": 1.92, "grad_norm": 0.5989353060722351, "learning_rate": 0.00017221363201825904, "loss": 1.7257, "step": 57715 }, { "epoch": 1.92, "grad_norm": 0.5965222120285034, "learning_rate": 0.00017220417560031713, "loss": 1.7824, "step": 57716 }, { "epoch": 1.92, "grad_norm": 0.620402455329895, "learning_rate": 0.00017219471933750045, "loss": 1.6565, "step": 57717 }, { "epoch": 1.92, "grad_norm": 0.5669071078300476, "learning_rate": 0.00017218526322982045, "loss": 1.7254, "step": 57718 }, { "epoch": 1.92, "grad_norm": 0.5928134322166443, "learning_rate": 0.00017217580727728885, "loss": 1.7503, "step": 57719 }, { "epoch": 1.92, "grad_norm": 0.6109541654586792, "learning_rate": 0.00017216635147991683, "loss": 1.7985, "step": 57720 }, { "epoch": 1.92, "grad_norm": 0.6047946214675903, "learning_rate": 0.00017215689583771618, "loss": 1.7663, "step": 57721 }, { "epoch": 1.92, "grad_norm": 0.6137033700942993, "learning_rate": 0.00017214744035069806, "loss": 1.7121, "step": 57722 }, { "epoch": 1.92, "grad_norm": 0.576099693775177, "learning_rate": 0.00017213798501887427, "loss": 1.774, "step": 57723 }, { "epoch": 1.92, "grad_norm": 0.5822563767433167, "learning_rate": 0.0001721285298422561, "loss": 1.741, "step": 57724 }, { "epoch": 1.92, "grad_norm": 0.5920455455780029, "learning_rate": 0.00017211907482085495, "loss": 1.7094, "step": 57725 }, { "epoch": 1.92, "grad_norm": 0.6042883992195129, "learning_rate": 0.00017210961995468252, "loss": 1.6714, "step": 57726 }, { "epoch": 1.92, "grad_norm": 0.5831521153450012, "learning_rate": 0.00017210016524375017, "loss": 1.7776, "step": 57727 }, { "epoch": 1.92, "grad_norm": 0.614262580871582, "learning_rate": 0.0001720907106880692, "loss": 1.7084, "step": 57728 }, { "epoch": 1.92, "grad_norm": 0.5898951888084412, "learning_rate": 0.00017208125628765132, "loss": 1.6543, "step": 57729 }, { "epoch": 1.92, "grad_norm": 0.6363778710365295, "learning_rate": 0.00017207180204250808, "loss": 1.7066, "step": 57730 }, { "epoch": 1.92, "grad_norm": 0.6070788502693176, "learning_rate": 0.0001720623479526508, "loss": 1.8235, "step": 57731 }, { "epoch": 1.92, "grad_norm": 0.5765327215194702, "learning_rate": 0.00017205289401809085, "loss": 1.6707, "step": 57732 }, { "epoch": 1.92, "grad_norm": 0.5866740345954895, "learning_rate": 0.00017204344023883996, "loss": 1.7902, "step": 57733 }, { "epoch": 1.92, "grad_norm": 0.5993163585662842, "learning_rate": 0.0001720339866149095, "loss": 1.8404, "step": 57734 }, { "epoch": 1.92, "grad_norm": 0.5848888158798218, "learning_rate": 0.00017202453314631074, "loss": 1.7373, "step": 57735 }, { "epoch": 1.92, "grad_norm": 0.6067484021186829, "learning_rate": 0.00017201507983305542, "loss": 1.6878, "step": 57736 }, { "epoch": 1.92, "grad_norm": 0.6125907897949219, "learning_rate": 0.00017200562667515513, "loss": 1.7365, "step": 57737 }, { "epoch": 1.92, "grad_norm": 0.5870140790939331, "learning_rate": 0.0001719961736726209, "loss": 1.7201, "step": 57738 }, { "epoch": 1.92, "grad_norm": 0.6061384677886963, "learning_rate": 0.0001719867208254645, "loss": 1.7237, "step": 57739 }, { "epoch": 1.92, "grad_norm": 0.6027132868766785, "learning_rate": 0.00017197726813369745, "loss": 1.7612, "step": 57740 }, { "epoch": 1.92, "grad_norm": 0.605911374092102, "learning_rate": 0.00017196781559733116, "loss": 1.7887, "step": 57741 }, { "epoch": 1.92, "grad_norm": 0.60367751121521, "learning_rate": 0.00017195836321637692, "loss": 1.7838, "step": 57742 }, { "epoch": 1.92, "grad_norm": 0.5984861254692078, "learning_rate": 0.00017194891099084637, "loss": 1.7623, "step": 57743 }, { "epoch": 1.92, "grad_norm": 0.5853146314620972, "learning_rate": 0.00017193945892075122, "loss": 1.7244, "step": 57744 }, { "epoch": 1.92, "grad_norm": 0.6011695861816406, "learning_rate": 0.00017193000700610247, "loss": 1.6944, "step": 57745 }, { "epoch": 1.92, "grad_norm": 0.6246993541717529, "learning_rate": 0.0001719205552469118, "loss": 1.6824, "step": 57746 }, { "epoch": 1.92, "grad_norm": 0.591025173664093, "learning_rate": 0.00017191110364319083, "loss": 1.7082, "step": 57747 }, { "epoch": 1.92, "grad_norm": 0.5753006339073181, "learning_rate": 0.00017190165219495092, "loss": 1.649, "step": 57748 }, { "epoch": 1.92, "grad_norm": 0.6008917689323425, "learning_rate": 0.0001718922009022034, "loss": 1.7403, "step": 57749 }, { "epoch": 1.92, "grad_norm": 0.6091225743293762, "learning_rate": 0.00017188274976495988, "loss": 1.8049, "step": 57750 }, { "epoch": 1.92, "grad_norm": 0.5820744037628174, "learning_rate": 0.00017187329878323195, "loss": 1.7073, "step": 57751 }, { "epoch": 1.92, "grad_norm": 0.6028335094451904, "learning_rate": 0.00017186384795703092, "loss": 1.7525, "step": 57752 }, { "epoch": 1.92, "grad_norm": 0.6067817211151123, "learning_rate": 0.0001718543972863682, "loss": 1.7918, "step": 57753 }, { "epoch": 1.92, "grad_norm": 0.5983718633651733, "learning_rate": 0.0001718449467712555, "loss": 1.6992, "step": 57754 }, { "epoch": 1.92, "grad_norm": 0.6181626915931702, "learning_rate": 0.00017183549641170415, "loss": 1.8208, "step": 57755 }, { "epoch": 1.92, "grad_norm": 0.6117380261421204, "learning_rate": 0.00017182604620772548, "loss": 1.7403, "step": 57756 }, { "epoch": 1.92, "grad_norm": 0.5801809430122375, "learning_rate": 0.00017181659615933122, "loss": 1.7231, "step": 57757 }, { "epoch": 1.92, "grad_norm": 0.6239596009254456, "learning_rate": 0.00017180714626653263, "loss": 1.792, "step": 57758 }, { "epoch": 1.92, "grad_norm": 0.5977470874786377, "learning_rate": 0.00017179769652934137, "loss": 1.7045, "step": 57759 }, { "epoch": 1.92, "grad_norm": 0.6299168467521667, "learning_rate": 0.00017178824694776874, "loss": 1.7075, "step": 57760 }, { "epoch": 1.92, "grad_norm": 0.6172895431518555, "learning_rate": 0.00017177879752182634, "loss": 1.7374, "step": 57761 }, { "epoch": 1.92, "grad_norm": 0.5827767252922058, "learning_rate": 0.00017176934825152565, "loss": 1.7105, "step": 57762 }, { "epoch": 1.92, "grad_norm": 0.6126000881195068, "learning_rate": 0.00017175989913687794, "loss": 1.72, "step": 57763 }, { "epoch": 1.92, "grad_norm": 0.624698281288147, "learning_rate": 0.00017175045017789496, "loss": 1.7989, "step": 57764 }, { "epoch": 1.92, "grad_norm": 0.6066815257072449, "learning_rate": 0.00017174100137458783, "loss": 1.66, "step": 57765 }, { "epoch": 1.92, "grad_norm": 0.5964369177818298, "learning_rate": 0.00017173155272696842, "loss": 1.7405, "step": 57766 }, { "epoch": 1.92, "grad_norm": 0.5849480032920837, "learning_rate": 0.000171722104235048, "loss": 1.7987, "step": 57767 }, { "epoch": 1.92, "grad_norm": 0.6049372553825378, "learning_rate": 0.00017171265589883793, "loss": 1.7498, "step": 57768 }, { "epoch": 1.92, "grad_norm": 0.6082398295402527, "learning_rate": 0.00017170320771834986, "loss": 1.6598, "step": 57769 }, { "epoch": 1.92, "grad_norm": 0.6053235530853271, "learning_rate": 0.0001716937596935951, "loss": 1.7181, "step": 57770 }, { "epoch": 1.92, "grad_norm": 0.5962575078010559, "learning_rate": 0.00017168431182458538, "loss": 1.7229, "step": 57771 }, { "epoch": 1.92, "grad_norm": 0.6272128820419312, "learning_rate": 0.00017167486411133186, "loss": 1.6411, "step": 57772 }, { "epoch": 1.92, "grad_norm": 0.6019316911697388, "learning_rate": 0.0001716654165538462, "loss": 1.6933, "step": 57773 }, { "epoch": 1.92, "grad_norm": 0.589957058429718, "learning_rate": 0.00017165596915213988, "loss": 1.705, "step": 57774 }, { "epoch": 1.92, "grad_norm": 0.6021490097045898, "learning_rate": 0.00017164652190622415, "loss": 1.7562, "step": 57775 }, { "epoch": 1.92, "grad_norm": 0.6242413520812988, "learning_rate": 0.00017163707481611076, "loss": 1.7312, "step": 57776 }, { "epoch": 1.92, "grad_norm": 0.6000858545303345, "learning_rate": 0.00017162762788181093, "loss": 1.7237, "step": 57777 }, { "epoch": 1.92, "grad_norm": 0.5951230525970459, "learning_rate": 0.00017161818110333638, "loss": 1.7536, "step": 57778 }, { "epoch": 1.92, "grad_norm": 0.5928602814674377, "learning_rate": 0.0001716087344806983, "loss": 1.7788, "step": 57779 }, { "epoch": 1.92, "grad_norm": 0.5719766020774841, "learning_rate": 0.00017159928801390847, "loss": 1.7005, "step": 57780 }, { "epoch": 1.92, "grad_norm": 0.573073148727417, "learning_rate": 0.0001715898417029781, "loss": 1.7475, "step": 57781 }, { "epoch": 1.92, "grad_norm": 0.6046662926673889, "learning_rate": 0.0001715803955479187, "loss": 1.7675, "step": 57782 }, { "epoch": 1.92, "grad_norm": 0.5802448391914368, "learning_rate": 0.00017157094954874184, "loss": 1.6369, "step": 57783 }, { "epoch": 1.92, "grad_norm": 0.6110067963600159, "learning_rate": 0.00017156150370545897, "loss": 1.6927, "step": 57784 }, { "epoch": 1.92, "grad_norm": 0.5928812623023987, "learning_rate": 0.00017155205801808136, "loss": 1.6862, "step": 57785 }, { "epoch": 1.92, "grad_norm": 0.6057575941085815, "learning_rate": 0.00017154261248662064, "loss": 1.6533, "step": 57786 }, { "epoch": 1.92, "grad_norm": 0.6043040156364441, "learning_rate": 0.00017153316711108837, "loss": 1.798, "step": 57787 }, { "epoch": 1.92, "grad_norm": 0.5951259136199951, "learning_rate": 0.00017152372189149589, "loss": 1.641, "step": 57788 }, { "epoch": 1.92, "grad_norm": 0.5930480360984802, "learning_rate": 0.0001715142768278546, "loss": 1.7558, "step": 57789 }, { "epoch": 1.92, "grad_norm": 0.6075461506843567, "learning_rate": 0.00017150483192017612, "loss": 1.7086, "step": 57790 }, { "epoch": 1.92, "grad_norm": 0.5988262891769409, "learning_rate": 0.0001714953871684719, "loss": 1.6849, "step": 57791 }, { "epoch": 1.92, "grad_norm": 0.5861021280288696, "learning_rate": 0.0001714859425727532, "loss": 1.6699, "step": 57792 }, { "epoch": 1.92, "grad_norm": 0.5947000980377197, "learning_rate": 0.00017147649813303162, "loss": 1.7131, "step": 57793 }, { "epoch": 1.92, "grad_norm": 0.597618579864502, "learning_rate": 0.00017146705384931887, "loss": 1.674, "step": 57794 }, { "epoch": 1.92, "grad_norm": 0.592189610004425, "learning_rate": 0.00017145760972162597, "loss": 1.7447, "step": 57795 }, { "epoch": 1.92, "grad_norm": 0.611375629901886, "learning_rate": 0.00017144816574996455, "loss": 1.8352, "step": 57796 }, { "epoch": 1.92, "grad_norm": 0.5896768569946289, "learning_rate": 0.00017143872193434626, "loss": 1.6166, "step": 57797 }, { "epoch": 1.92, "grad_norm": 0.6048051714897156, "learning_rate": 0.00017142927827478244, "loss": 1.6874, "step": 57798 }, { "epoch": 1.92, "grad_norm": 0.5970196723937988, "learning_rate": 0.00017141983477128437, "loss": 1.6513, "step": 57799 }, { "epoch": 1.92, "grad_norm": 0.5973109602928162, "learning_rate": 0.00017141039142386369, "loss": 1.7081, "step": 57800 }, { "epoch": 1.92, "grad_norm": 0.5930357575416565, "learning_rate": 0.0001714009482325321, "loss": 1.7262, "step": 57801 }, { "epoch": 1.92, "grad_norm": 0.6176626682281494, "learning_rate": 0.00017139150519730056, "loss": 1.7728, "step": 57802 }, { "epoch": 1.92, "grad_norm": 0.6094842553138733, "learning_rate": 0.0001713820623181808, "loss": 1.7617, "step": 57803 }, { "epoch": 1.92, "grad_norm": 0.5971918106079102, "learning_rate": 0.00017137261959518433, "loss": 1.7401, "step": 57804 }, { "epoch": 1.92, "grad_norm": 0.612745463848114, "learning_rate": 0.0001713631770283226, "loss": 1.6722, "step": 57805 }, { "epoch": 1.92, "grad_norm": 0.5951665043830872, "learning_rate": 0.00017135373461760686, "loss": 1.7352, "step": 57806 }, { "epoch": 1.92, "grad_norm": 0.6140020489692688, "learning_rate": 0.00017134429236304876, "loss": 1.784, "step": 57807 }, { "epoch": 1.92, "grad_norm": 0.5950144529342651, "learning_rate": 0.00017133485026466, "loss": 1.7377, "step": 57808 }, { "epoch": 1.92, "grad_norm": 0.5912507176399231, "learning_rate": 0.0001713254083224515, "loss": 1.7195, "step": 57809 }, { "epoch": 1.92, "grad_norm": 0.6159191131591797, "learning_rate": 0.000171315966536435, "loss": 1.6323, "step": 57810 }, { "epoch": 1.92, "grad_norm": 0.6051453351974487, "learning_rate": 0.00017130652490662205, "loss": 1.7364, "step": 57811 }, { "epoch": 1.92, "grad_norm": 0.6121286749839783, "learning_rate": 0.00017129708343302405, "loss": 1.7332, "step": 57812 }, { "epoch": 1.92, "grad_norm": 0.5991272330284119, "learning_rate": 0.0001712876421156523, "loss": 1.7198, "step": 57813 }, { "epoch": 1.92, "grad_norm": 0.5966050624847412, "learning_rate": 0.0001712782009545185, "loss": 1.7568, "step": 57814 }, { "epoch": 1.92, "grad_norm": 0.7647677659988403, "learning_rate": 0.000171268759949634, "loss": 1.7451, "step": 57815 }, { "epoch": 1.92, "grad_norm": 0.60027676820755, "learning_rate": 0.00017125931910101015, "loss": 1.7803, "step": 57816 }, { "epoch": 1.92, "grad_norm": 0.6130263209342957, "learning_rate": 0.0001712498784086585, "loss": 1.8313, "step": 57817 }, { "epoch": 1.92, "grad_norm": 0.5919727683067322, "learning_rate": 0.0001712404378725907, "loss": 1.7537, "step": 57818 }, { "epoch": 1.92, "grad_norm": 0.5968126058578491, "learning_rate": 0.000171230997492818, "loss": 1.8637, "step": 57819 }, { "epoch": 1.92, "grad_norm": 0.6136329770088196, "learning_rate": 0.00017122155726935172, "loss": 1.7546, "step": 57820 }, { "epoch": 1.92, "grad_norm": 0.6078302264213562, "learning_rate": 0.00017121211720220366, "loss": 1.8029, "step": 57821 }, { "epoch": 1.92, "grad_norm": 0.5966590046882629, "learning_rate": 0.00017120267729138513, "loss": 1.6655, "step": 57822 }, { "epoch": 1.92, "grad_norm": 0.6057316660881042, "learning_rate": 0.00017119323753690745, "loss": 1.7902, "step": 57823 }, { "epoch": 1.92, "grad_norm": 0.6051257848739624, "learning_rate": 0.00017118379793878231, "loss": 1.6682, "step": 57824 }, { "epoch": 1.92, "grad_norm": 0.5826564431190491, "learning_rate": 0.00017117435849702095, "loss": 1.7314, "step": 57825 }, { "epoch": 1.92, "grad_norm": 0.5956423878669739, "learning_rate": 0.00017116491921163502, "loss": 1.7638, "step": 57826 }, { "epoch": 1.92, "grad_norm": 0.5915115475654602, "learning_rate": 0.0001711554800826358, "loss": 1.6848, "step": 57827 }, { "epoch": 1.92, "grad_norm": 0.8784452080726624, "learning_rate": 0.000171146041110035, "loss": 1.7053, "step": 57828 }, { "epoch": 1.92, "grad_norm": 0.5951642990112305, "learning_rate": 0.00017113660229384385, "loss": 1.7374, "step": 57829 }, { "epoch": 1.92, "grad_norm": 0.5921217799186707, "learning_rate": 0.0001711271636340738, "loss": 1.8066, "step": 57830 }, { "epoch": 1.92, "grad_norm": 0.5822176933288574, "learning_rate": 0.0001711177251307365, "loss": 1.6657, "step": 57831 }, { "epoch": 1.92, "grad_norm": 0.5951647162437439, "learning_rate": 0.00017110828678384318, "loss": 1.6943, "step": 57832 }, { "epoch": 1.92, "grad_norm": 0.5873976945877075, "learning_rate": 0.0001710988485934055, "loss": 1.706, "step": 57833 }, { "epoch": 1.92, "grad_norm": 0.5994910597801208, "learning_rate": 0.00017108941055943484, "loss": 1.7369, "step": 57834 }, { "epoch": 1.92, "grad_norm": 0.5924538373947144, "learning_rate": 0.00017107997268194254, "loss": 1.7142, "step": 57835 }, { "epoch": 1.92, "grad_norm": 0.5749821662902832, "learning_rate": 0.00017107053496094027, "loss": 1.6706, "step": 57836 }, { "epoch": 1.92, "grad_norm": 0.580062747001648, "learning_rate": 0.00017106109739643924, "loss": 1.633, "step": 57837 }, { "epoch": 1.92, "grad_norm": 0.5776596069335938, "learning_rate": 0.00017105165998845115, "loss": 1.7063, "step": 57838 }, { "epoch": 1.92, "grad_norm": 1.8650048971176147, "learning_rate": 0.00017104222273698727, "loss": 1.7646, "step": 57839 }, { "epoch": 1.92, "grad_norm": 0.5985437035560608, "learning_rate": 0.00017103278564205922, "loss": 1.7461, "step": 57840 }, { "epoch": 1.92, "grad_norm": 0.5880322456359863, "learning_rate": 0.00017102334870367836, "loss": 1.7041, "step": 57841 }, { "epoch": 1.92, "grad_norm": 0.5905121564865112, "learning_rate": 0.00017101391192185606, "loss": 1.8125, "step": 57842 }, { "epoch": 1.92, "grad_norm": 0.5877067446708679, "learning_rate": 0.00017100447529660393, "loss": 1.7159, "step": 57843 }, { "epoch": 1.92, "grad_norm": 0.5913257002830505, "learning_rate": 0.0001709950388279334, "loss": 1.6706, "step": 57844 }, { "epoch": 1.92, "grad_norm": 0.5929746031761169, "learning_rate": 0.00017098560251585577, "loss": 1.7629, "step": 57845 }, { "epoch": 1.92, "grad_norm": 0.5878273844718933, "learning_rate": 0.0001709761663603826, "loss": 1.7091, "step": 57846 }, { "epoch": 1.92, "grad_norm": 0.5995277166366577, "learning_rate": 0.00017096673036152545, "loss": 1.7755, "step": 57847 }, { "epoch": 1.92, "grad_norm": 0.6066105961799622, "learning_rate": 0.0001709572945192957, "loss": 1.727, "step": 57848 }, { "epoch": 1.92, "grad_norm": 0.593451738357544, "learning_rate": 0.0001709478588337047, "loss": 1.7927, "step": 57849 }, { "epoch": 1.92, "grad_norm": 0.5898224115371704, "learning_rate": 0.00017093842330476403, "loss": 1.7457, "step": 57850 }, { "epoch": 1.92, "grad_norm": 0.5857393741607666, "learning_rate": 0.00017092898793248516, "loss": 1.6771, "step": 57851 }, { "epoch": 1.92, "grad_norm": 0.5742534399032593, "learning_rate": 0.0001709195527168793, "loss": 1.7517, "step": 57852 }, { "epoch": 1.92, "grad_norm": 0.5771057605743408, "learning_rate": 0.0001709101176579581, "loss": 1.7284, "step": 57853 }, { "epoch": 1.92, "grad_norm": 0.604351282119751, "learning_rate": 0.0001709006827557333, "loss": 1.7318, "step": 57854 }, { "epoch": 1.92, "grad_norm": 0.5929930806159973, "learning_rate": 0.00017089124801021572, "loss": 1.7705, "step": 57855 }, { "epoch": 1.92, "grad_norm": 0.6041619181632996, "learning_rate": 0.00017088181342141717, "loss": 1.7213, "step": 57856 }, { "epoch": 1.92, "grad_norm": 0.5752113461494446, "learning_rate": 0.00017087237898934923, "loss": 1.7359, "step": 57857 }, { "epoch": 1.92, "grad_norm": 0.6021581888198853, "learning_rate": 0.0001708629447140232, "loss": 1.6128, "step": 57858 }, { "epoch": 1.92, "grad_norm": 0.6175550222396851, "learning_rate": 0.00017085351059545036, "loss": 1.7076, "step": 57859 }, { "epoch": 1.93, "grad_norm": 0.5838400721549988, "learning_rate": 0.00017084407663364235, "loss": 1.752, "step": 57860 }, { "epoch": 1.93, "grad_norm": 0.6058401465415955, "learning_rate": 0.00017083464282861087, "loss": 1.818, "step": 57861 }, { "epoch": 1.93, "grad_norm": 0.6153745651245117, "learning_rate": 0.0001708252091803668, "loss": 1.7313, "step": 57862 }, { "epoch": 1.93, "grad_norm": 0.6055343747138977, "learning_rate": 0.00017081577568892195, "loss": 1.7113, "step": 57863 }, { "epoch": 1.93, "grad_norm": 0.5829718708992004, "learning_rate": 0.00017080634235428779, "loss": 1.697, "step": 57864 }, { "epoch": 1.93, "grad_norm": 0.5982956290245056, "learning_rate": 0.0001707969091764757, "loss": 1.7572, "step": 57865 }, { "epoch": 1.93, "grad_norm": 0.6029433012008667, "learning_rate": 0.00017078747615549705, "loss": 1.7218, "step": 57866 }, { "epoch": 1.93, "grad_norm": 0.6009949445724487, "learning_rate": 0.00017077804329136332, "loss": 1.6642, "step": 57867 }, { "epoch": 1.93, "grad_norm": 0.607154369354248, "learning_rate": 0.00017076861058408613, "loss": 1.7331, "step": 57868 }, { "epoch": 1.93, "grad_norm": 0.5829648971557617, "learning_rate": 0.0001707591780336768, "loss": 1.657, "step": 57869 }, { "epoch": 1.93, "grad_norm": 0.621749222278595, "learning_rate": 0.00017074974564014673, "loss": 1.7051, "step": 57870 }, { "epoch": 1.93, "grad_norm": 0.6075372695922852, "learning_rate": 0.00017074031340350747, "loss": 1.7667, "step": 57871 }, { "epoch": 1.93, "grad_norm": 0.6266674399375916, "learning_rate": 0.00017073088132377046, "loss": 1.7429, "step": 57872 }, { "epoch": 1.93, "grad_norm": 0.6092589497566223, "learning_rate": 0.00017072144940094696, "loss": 1.7329, "step": 57873 }, { "epoch": 1.93, "grad_norm": 0.6018900871276855, "learning_rate": 0.00017071201763504872, "loss": 1.699, "step": 57874 }, { "epoch": 1.93, "grad_norm": 0.6166496276855469, "learning_rate": 0.00017070258602608693, "loss": 1.7292, "step": 57875 }, { "epoch": 1.93, "grad_norm": 0.5778695940971375, "learning_rate": 0.00017069315457407327, "loss": 1.7516, "step": 57876 }, { "epoch": 1.93, "grad_norm": 0.5918533205986023, "learning_rate": 0.00017068372327901893, "loss": 1.7078, "step": 57877 }, { "epoch": 1.93, "grad_norm": 0.6113030314445496, "learning_rate": 0.0001706742921409356, "loss": 1.7182, "step": 57878 }, { "epoch": 1.93, "grad_norm": 0.6099157333374023, "learning_rate": 0.00017066486115983464, "loss": 1.655, "step": 57879 }, { "epoch": 1.93, "grad_norm": 0.6041949987411499, "learning_rate": 0.0001706554303357274, "loss": 1.6912, "step": 57880 }, { "epoch": 1.93, "grad_norm": 0.6099101305007935, "learning_rate": 0.0001706459996686255, "loss": 1.7327, "step": 57881 }, { "epoch": 1.93, "grad_norm": 0.5832850337028503, "learning_rate": 0.00017063656915854015, "loss": 1.7092, "step": 57882 }, { "epoch": 1.93, "grad_norm": 0.5757212042808533, "learning_rate": 0.00017062713880548308, "loss": 1.7311, "step": 57883 }, { "epoch": 1.93, "grad_norm": 0.5718792080879211, "learning_rate": 0.0001706177086094656, "loss": 1.5903, "step": 57884 }, { "epoch": 1.93, "grad_norm": 0.5856162905693054, "learning_rate": 0.00017060827857049904, "loss": 1.7434, "step": 57885 }, { "epoch": 1.93, "grad_norm": 0.5863436460494995, "learning_rate": 0.0001705988486885951, "loss": 1.7549, "step": 57886 }, { "epoch": 1.93, "grad_norm": 0.6187267899513245, "learning_rate": 0.00017058941896376498, "loss": 1.7607, "step": 57887 }, { "epoch": 1.93, "grad_norm": 0.6113333702087402, "learning_rate": 0.00017057998939602035, "loss": 1.6807, "step": 57888 }, { "epoch": 1.93, "grad_norm": 0.6103758811950684, "learning_rate": 0.00017057055998537238, "loss": 1.7183, "step": 57889 }, { "epoch": 1.93, "grad_norm": 0.5936592221260071, "learning_rate": 0.00017056113073183284, "loss": 1.728, "step": 57890 }, { "epoch": 1.93, "grad_norm": 0.5823115110397339, "learning_rate": 0.00017055170163541303, "loss": 1.6774, "step": 57891 }, { "epoch": 1.93, "grad_norm": 0.5859315991401672, "learning_rate": 0.0001705422726961242, "loss": 1.759, "step": 57892 }, { "epoch": 1.93, "grad_norm": 0.5859346985816956, "learning_rate": 0.00017053284391397815, "loss": 1.6761, "step": 57893 }, { "epoch": 1.93, "grad_norm": 0.6012164354324341, "learning_rate": 0.00017052341528898601, "loss": 1.7352, "step": 57894 }, { "epoch": 1.93, "grad_norm": 0.6462993025779724, "learning_rate": 0.00017051398682115952, "loss": 1.7752, "step": 57895 }, { "epoch": 1.93, "grad_norm": 0.6031238436698914, "learning_rate": 0.0001705045585105098, "loss": 1.6847, "step": 57896 }, { "epoch": 1.93, "grad_norm": 0.5949090123176575, "learning_rate": 0.00017049513035704864, "loss": 1.7405, "step": 57897 }, { "epoch": 1.93, "grad_norm": 0.5987764000892639, "learning_rate": 0.00017048570236078727, "loss": 1.6505, "step": 57898 }, { "epoch": 1.93, "grad_norm": 0.6582611799240112, "learning_rate": 0.00017047627452173705, "loss": 1.7236, "step": 57899 }, { "epoch": 1.93, "grad_norm": 0.620860755443573, "learning_rate": 0.0001704668468399097, "loss": 1.6198, "step": 57900 }, { "epoch": 1.93, "grad_norm": 0.5966668725013733, "learning_rate": 0.00017045741931531652, "loss": 1.787, "step": 57901 }, { "epoch": 1.93, "grad_norm": 0.5987091064453125, "learning_rate": 0.00017044799194796878, "loss": 1.7654, "step": 57902 }, { "epoch": 1.93, "grad_norm": 0.5999448895454407, "learning_rate": 0.00017043856473787812, "loss": 1.7003, "step": 57903 }, { "epoch": 1.93, "grad_norm": 0.5928992629051208, "learning_rate": 0.00017042913768505603, "loss": 1.6805, "step": 57904 }, { "epoch": 1.93, "grad_norm": 0.5753490924835205, "learning_rate": 0.0001704197107895139, "loss": 1.7937, "step": 57905 }, { "epoch": 1.93, "grad_norm": 0.5913754105567932, "learning_rate": 0.00017041028405126305, "loss": 1.7409, "step": 57906 }, { "epoch": 1.93, "grad_norm": 0.5742378830909729, "learning_rate": 0.00017040085747031515, "loss": 1.6802, "step": 57907 }, { "epoch": 1.93, "grad_norm": 0.5838748216629028, "learning_rate": 0.0001703914310466815, "loss": 1.7193, "step": 57908 }, { "epoch": 1.93, "grad_norm": 0.6026648879051208, "learning_rate": 0.00017038200478037337, "loss": 1.7095, "step": 57909 }, { "epoch": 1.93, "grad_norm": 0.580410897731781, "learning_rate": 0.00017037257867140248, "loss": 1.7889, "step": 57910 }, { "epoch": 1.93, "grad_norm": 0.5867766737937927, "learning_rate": 0.00017036315271978038, "loss": 1.7161, "step": 57911 }, { "epoch": 1.93, "grad_norm": 0.5886825323104858, "learning_rate": 0.00017035372692551808, "loss": 1.756, "step": 57912 }, { "epoch": 1.93, "grad_norm": 0.5896472930908203, "learning_rate": 0.00017034430128862725, "loss": 1.7672, "step": 57913 }, { "epoch": 1.93, "grad_norm": 0.6088935732841492, "learning_rate": 0.00017033487580911947, "loss": 1.7223, "step": 57914 }, { "epoch": 1.93, "grad_norm": 0.587331235408783, "learning_rate": 0.00017032545048700602, "loss": 1.6688, "step": 57915 }, { "epoch": 1.93, "grad_norm": 0.651586651802063, "learning_rate": 0.00017031602532229824, "loss": 1.7164, "step": 57916 }, { "epoch": 1.93, "grad_norm": 0.5940775275230408, "learning_rate": 0.0001703066003150077, "loss": 1.6904, "step": 57917 }, { "epoch": 1.93, "grad_norm": 0.5897067189216614, "learning_rate": 0.0001702971754651461, "loss": 1.7089, "step": 57918 }, { "epoch": 1.93, "grad_norm": 0.6044797897338867, "learning_rate": 0.00017028775077272438, "loss": 1.7543, "step": 57919 }, { "epoch": 1.93, "grad_norm": 0.6061624884605408, "learning_rate": 0.0001702783262377542, "loss": 1.6862, "step": 57920 }, { "epoch": 1.93, "grad_norm": 0.609920859336853, "learning_rate": 0.00017026890186024716, "loss": 1.7561, "step": 57921 }, { "epoch": 1.93, "grad_norm": 0.6043735146522522, "learning_rate": 0.00017025947764021451, "loss": 1.7763, "step": 57922 }, { "epoch": 1.93, "grad_norm": 0.5900044441223145, "learning_rate": 0.00017025005357766766, "loss": 1.7158, "step": 57923 }, { "epoch": 1.93, "grad_norm": 0.5786848664283752, "learning_rate": 0.00017024062967261813, "loss": 1.7073, "step": 57924 }, { "epoch": 1.93, "grad_norm": 0.649087131023407, "learning_rate": 0.0001702312059250776, "loss": 1.7171, "step": 57925 }, { "epoch": 1.93, "grad_norm": 0.6211104393005371, "learning_rate": 0.00017022178233505698, "loss": 1.7301, "step": 57926 }, { "epoch": 1.93, "grad_norm": 0.6110102534294128, "learning_rate": 0.00017021235890256803, "loss": 1.5802, "step": 57927 }, { "epoch": 1.93, "grad_norm": 0.6082298755645752, "learning_rate": 0.00017020293562762227, "loss": 1.7965, "step": 57928 }, { "epoch": 1.93, "grad_norm": 0.6015006899833679, "learning_rate": 0.00017019351251023105, "loss": 1.7372, "step": 57929 }, { "epoch": 1.93, "grad_norm": 0.5764721632003784, "learning_rate": 0.00017018408955040562, "loss": 1.739, "step": 57930 }, { "epoch": 1.93, "grad_norm": 0.5970853567123413, "learning_rate": 0.00017017466674815768, "loss": 1.7527, "step": 57931 }, { "epoch": 1.93, "grad_norm": 0.6152924299240112, "learning_rate": 0.0001701652441034986, "loss": 1.7501, "step": 57932 }, { "epoch": 1.93, "grad_norm": 0.6081960797309875, "learning_rate": 0.00017015582161643964, "loss": 1.7582, "step": 57933 }, { "epoch": 1.93, "grad_norm": 0.5940033793449402, "learning_rate": 0.00017014639928699242, "loss": 1.7721, "step": 57934 }, { "epoch": 1.93, "grad_norm": 0.5959121584892273, "learning_rate": 0.0001701369771151684, "loss": 1.6814, "step": 57935 }, { "epoch": 1.93, "grad_norm": 0.5986502170562744, "learning_rate": 0.000170127555100979, "loss": 1.7828, "step": 57936 }, { "epoch": 1.93, "grad_norm": 0.6638129949569702, "learning_rate": 0.0001701181332444355, "loss": 1.8447, "step": 57937 }, { "epoch": 1.93, "grad_norm": 0.5985199213027954, "learning_rate": 0.00017010871154554954, "loss": 1.6656, "step": 57938 }, { "epoch": 1.93, "grad_norm": 0.6135350465774536, "learning_rate": 0.00017009929000433248, "loss": 1.7192, "step": 57939 }, { "epoch": 1.93, "grad_norm": 0.6244401335716248, "learning_rate": 0.00017008986862079563, "loss": 1.7283, "step": 57940 }, { "epoch": 1.93, "grad_norm": 0.6220287084579468, "learning_rate": 0.00017008044739495065, "loss": 1.7637, "step": 57941 }, { "epoch": 1.93, "grad_norm": 0.6159228086471558, "learning_rate": 0.00017007102632680873, "loss": 1.8237, "step": 57942 }, { "epoch": 1.93, "grad_norm": 0.5794140100479126, "learning_rate": 0.00017006160541638157, "loss": 1.7008, "step": 57943 }, { "epoch": 1.93, "grad_norm": 0.6030516028404236, "learning_rate": 0.00017005218466368033, "loss": 1.7625, "step": 57944 }, { "epoch": 1.93, "grad_norm": 0.5835751295089722, "learning_rate": 0.00017004276406871674, "loss": 1.7791, "step": 57945 }, { "epoch": 1.93, "grad_norm": 0.5840494632720947, "learning_rate": 0.00017003334363150212, "loss": 1.7366, "step": 57946 }, { "epoch": 1.93, "grad_norm": 0.5992707014083862, "learning_rate": 0.00017002392335204768, "loss": 1.7144, "step": 57947 }, { "epoch": 1.93, "grad_norm": 0.5959812998771667, "learning_rate": 0.00017001450323036522, "loss": 1.7513, "step": 57948 }, { "epoch": 1.93, "grad_norm": 0.603875458240509, "learning_rate": 0.00017000508326646586, "loss": 1.782, "step": 57949 }, { "epoch": 1.93, "grad_norm": 0.6275442838668823, "learning_rate": 0.0001699956634603613, "loss": 1.8315, "step": 57950 }, { "epoch": 1.93, "grad_norm": 0.6012515425682068, "learning_rate": 0.00016998624381206283, "loss": 1.7358, "step": 57951 }, { "epoch": 1.93, "grad_norm": 0.6040508151054382, "learning_rate": 0.00016997682432158182, "loss": 1.7744, "step": 57952 }, { "epoch": 1.93, "grad_norm": 0.6026250720024109, "learning_rate": 0.00016996740498892988, "loss": 1.7344, "step": 57953 }, { "epoch": 1.93, "grad_norm": 0.5860159993171692, "learning_rate": 0.00016995798581411823, "loss": 1.7342, "step": 57954 }, { "epoch": 1.93, "grad_norm": 0.6138126254081726, "learning_rate": 0.00016994856679715858, "loss": 1.7275, "step": 57955 }, { "epoch": 1.93, "grad_norm": 0.6360256671905518, "learning_rate": 0.00016993914793806206, "loss": 1.7009, "step": 57956 }, { "epoch": 1.93, "grad_norm": 0.5855801105499268, "learning_rate": 0.00016992972923684035, "loss": 1.6575, "step": 57957 }, { "epoch": 1.93, "grad_norm": 0.6190091967582703, "learning_rate": 0.00016992031069350478, "loss": 1.7385, "step": 57958 }, { "epoch": 1.93, "grad_norm": 0.5798891186714172, "learning_rate": 0.0001699108923080667, "loss": 1.7248, "step": 57959 }, { "epoch": 1.93, "grad_norm": 0.6073750853538513, "learning_rate": 0.00016990147408053776, "loss": 1.7305, "step": 57960 }, { "epoch": 1.93, "grad_norm": 0.5989829897880554, "learning_rate": 0.00016989205601092923, "loss": 1.7556, "step": 57961 }, { "epoch": 1.93, "grad_norm": 0.5999404788017273, "learning_rate": 0.00016988263809925244, "loss": 1.6828, "step": 57962 }, { "epoch": 1.93, "grad_norm": 0.5973706245422363, "learning_rate": 0.000169873220345519, "loss": 1.7069, "step": 57963 }, { "epoch": 1.93, "grad_norm": 0.5784508585929871, "learning_rate": 0.0001698638027497404, "loss": 1.7075, "step": 57964 }, { "epoch": 1.93, "grad_norm": 0.6129603385925293, "learning_rate": 0.000169854385311928, "loss": 1.6699, "step": 57965 }, { "epoch": 1.93, "grad_norm": 0.614157497882843, "learning_rate": 0.000169844968032093, "loss": 1.7242, "step": 57966 }, { "epoch": 1.93, "grad_norm": 0.5995820164680481, "learning_rate": 0.00016983555091024718, "loss": 1.7662, "step": 57967 }, { "epoch": 1.93, "grad_norm": 0.5793509483337402, "learning_rate": 0.00016982613394640189, "loss": 1.7759, "step": 57968 }, { "epoch": 1.93, "grad_norm": 0.5998114347457886, "learning_rate": 0.0001698167171405683, "loss": 1.6826, "step": 57969 }, { "epoch": 1.93, "grad_norm": 0.6358934640884399, "learning_rate": 0.00016980730049275805, "loss": 1.689, "step": 57970 }, { "epoch": 1.93, "grad_norm": 0.5674774050712585, "learning_rate": 0.0001697978840029828, "loss": 1.7795, "step": 57971 }, { "epoch": 1.93, "grad_norm": 0.5868914127349854, "learning_rate": 0.0001697884676712535, "loss": 1.7363, "step": 57972 }, { "epoch": 1.93, "grad_norm": 0.5873768329620361, "learning_rate": 0.0001697790514975818, "loss": 1.6979, "step": 57973 }, { "epoch": 1.93, "grad_norm": 0.6142832040786743, "learning_rate": 0.0001697696354819793, "loss": 1.7443, "step": 57974 }, { "epoch": 1.93, "grad_norm": 0.6089946031570435, "learning_rate": 0.00016976021962445724, "loss": 1.672, "step": 57975 }, { "epoch": 1.93, "grad_norm": 0.5916361808776855, "learning_rate": 0.00016975080392502695, "loss": 1.7577, "step": 57976 }, { "epoch": 1.93, "grad_norm": 0.6095548868179321, "learning_rate": 0.0001697413883837, "loss": 1.7023, "step": 57977 }, { "epoch": 1.93, "grad_norm": 0.5749329328536987, "learning_rate": 0.0001697319730004881, "loss": 1.7685, "step": 57978 }, { "epoch": 1.93, "grad_norm": 0.5830926895141602, "learning_rate": 0.0001697225577754021, "loss": 1.7383, "step": 57979 }, { "epoch": 1.93, "grad_norm": 0.5852459073066711, "learning_rate": 0.00016971314270845374, "loss": 1.7067, "step": 57980 }, { "epoch": 1.93, "grad_norm": 0.6069285869598389, "learning_rate": 0.00016970372779965458, "loss": 1.7808, "step": 57981 }, { "epoch": 1.93, "grad_norm": 0.6048991084098816, "learning_rate": 0.00016969431304901582, "loss": 1.7306, "step": 57982 }, { "epoch": 1.93, "grad_norm": 0.6177142262458801, "learning_rate": 0.0001696848984565489, "loss": 1.7263, "step": 57983 }, { "epoch": 1.93, "grad_norm": 0.5957144498825073, "learning_rate": 0.00016967548402226532, "loss": 1.7612, "step": 57984 }, { "epoch": 1.93, "grad_norm": 0.5991164445877075, "learning_rate": 0.00016966606974617657, "loss": 1.7446, "step": 57985 }, { "epoch": 1.93, "grad_norm": 0.6072734594345093, "learning_rate": 0.00016965665562829405, "loss": 1.7345, "step": 57986 }, { "epoch": 1.93, "grad_norm": 0.5748928189277649, "learning_rate": 0.00016964724166862897, "loss": 1.7038, "step": 57987 }, { "epoch": 1.93, "grad_norm": 0.5936296582221985, "learning_rate": 0.00016963782786719314, "loss": 1.7348, "step": 57988 }, { "epoch": 1.93, "grad_norm": 0.5963783264160156, "learning_rate": 0.00016962841422399772, "loss": 1.7479, "step": 57989 }, { "epoch": 1.93, "grad_norm": 0.6299612522125244, "learning_rate": 0.00016961900073905406, "loss": 1.7573, "step": 57990 }, { "epoch": 1.93, "grad_norm": 0.5792394280433655, "learning_rate": 0.0001696095874123739, "loss": 1.6703, "step": 57991 }, { "epoch": 1.93, "grad_norm": 0.6036450266838074, "learning_rate": 0.00016960017424396832, "loss": 1.7373, "step": 57992 }, { "epoch": 1.93, "grad_norm": 0.5940293073654175, "learning_rate": 0.00016959076123384905, "loss": 1.7757, "step": 57993 }, { "epoch": 1.93, "grad_norm": 0.5869153738021851, "learning_rate": 0.00016958134838202725, "loss": 1.6867, "step": 57994 }, { "epoch": 1.93, "grad_norm": 0.5920790433883667, "learning_rate": 0.00016957193568851463, "loss": 1.7662, "step": 57995 }, { "epoch": 1.93, "grad_norm": 0.5816903114318848, "learning_rate": 0.00016956252315332245, "loss": 1.7114, "step": 57996 }, { "epoch": 1.93, "grad_norm": 0.5780184864997864, "learning_rate": 0.000169553110776462, "loss": 1.7323, "step": 57997 }, { "epoch": 1.93, "grad_norm": 0.5753734111785889, "learning_rate": 0.000169543698557945, "loss": 1.7049, "step": 57998 }, { "epoch": 1.93, "grad_norm": 0.6166741847991943, "learning_rate": 0.00016953428649778263, "loss": 1.7731, "step": 57999 }, { "epoch": 1.93, "grad_norm": 0.5960728526115417, "learning_rate": 0.0001695248745959865, "loss": 1.7308, "step": 58000 }, { "epoch": 1.93, "grad_norm": 0.6711774468421936, "learning_rate": 0.00016951546285256795, "loss": 1.7446, "step": 58001 }, { "epoch": 1.93, "grad_norm": 0.5945992469787598, "learning_rate": 0.00016950605126753827, "loss": 1.7292, "step": 58002 }, { "epoch": 1.93, "grad_norm": 0.5923960208892822, "learning_rate": 0.00016949663984090914, "loss": 1.7418, "step": 58003 }, { "epoch": 1.93, "grad_norm": 0.6184524893760681, "learning_rate": 0.00016948722857269174, "loss": 1.7085, "step": 58004 }, { "epoch": 1.93, "grad_norm": 0.5894601941108704, "learning_rate": 0.00016947781746289776, "loss": 1.7221, "step": 58005 }, { "epoch": 1.93, "grad_norm": 0.5888702273368835, "learning_rate": 0.00016946840651153834, "loss": 1.7275, "step": 58006 }, { "epoch": 1.93, "grad_norm": 0.6036027073860168, "learning_rate": 0.0001694589957186252, "loss": 1.7049, "step": 58007 }, { "epoch": 1.93, "grad_norm": 2.173314332962036, "learning_rate": 0.00016944958508416955, "loss": 1.7225, "step": 58008 }, { "epoch": 1.93, "grad_norm": 0.5970633029937744, "learning_rate": 0.00016944017460818276, "loss": 1.7091, "step": 58009 }, { "epoch": 1.93, "grad_norm": 0.6045898795127869, "learning_rate": 0.00016943076429067646, "loss": 1.6544, "step": 58010 }, { "epoch": 1.93, "grad_norm": 0.6375357508659363, "learning_rate": 0.0001694213541316619, "loss": 1.6907, "step": 58011 }, { "epoch": 1.93, "grad_norm": 0.6498501300811768, "learning_rate": 0.00016941194413115068, "loss": 1.6882, "step": 58012 }, { "epoch": 1.93, "grad_norm": 0.6175432801246643, "learning_rate": 0.000169402534289154, "loss": 1.7895, "step": 58013 }, { "epoch": 1.93, "grad_norm": 0.5869237780570984, "learning_rate": 0.00016939312460568351, "loss": 1.7124, "step": 58014 }, { "epoch": 1.93, "grad_norm": 0.5819641351699829, "learning_rate": 0.00016938371508075055, "loss": 1.7446, "step": 58015 }, { "epoch": 1.93, "grad_norm": 0.6184810400009155, "learning_rate": 0.00016937430571436636, "loss": 1.7678, "step": 58016 }, { "epoch": 1.93, "grad_norm": 0.6195046305656433, "learning_rate": 0.00016936489650654264, "loss": 1.8037, "step": 58017 }, { "epoch": 1.93, "grad_norm": 0.613010585308075, "learning_rate": 0.00016935548745729067, "loss": 1.7802, "step": 58018 }, { "epoch": 1.93, "grad_norm": 0.6266846060752869, "learning_rate": 0.00016934607856662184, "loss": 1.699, "step": 58019 }, { "epoch": 1.93, "grad_norm": 0.5950621962547302, "learning_rate": 0.00016933666983454758, "loss": 1.741, "step": 58020 }, { "epoch": 1.93, "grad_norm": 0.6111023426055908, "learning_rate": 0.00016932726126107946, "loss": 1.7399, "step": 58021 }, { "epoch": 1.93, "grad_norm": 0.6084415316581726, "learning_rate": 0.0001693178528462288, "loss": 1.7778, "step": 58022 }, { "epoch": 1.93, "grad_norm": 0.6150413155555725, "learning_rate": 0.00016930844459000688, "loss": 1.65, "step": 58023 }, { "epoch": 1.93, "grad_norm": 0.6434804201126099, "learning_rate": 0.0001692990364924254, "loss": 1.6777, "step": 58024 }, { "epoch": 1.93, "grad_norm": 0.566830039024353, "learning_rate": 0.0001692896285534956, "loss": 1.7511, "step": 58025 }, { "epoch": 1.93, "grad_norm": 0.6005077362060547, "learning_rate": 0.00016928022077322883, "loss": 1.7184, "step": 58026 }, { "epoch": 1.93, "grad_norm": 0.6455740928649902, "learning_rate": 0.0001692708131516366, "loss": 1.7065, "step": 58027 }, { "epoch": 1.93, "grad_norm": 0.5987151265144348, "learning_rate": 0.0001692614056887306, "loss": 1.7484, "step": 58028 }, { "epoch": 1.93, "grad_norm": 0.6126673221588135, "learning_rate": 0.0001692519983845217, "loss": 1.8406, "step": 58029 }, { "epoch": 1.93, "grad_norm": 0.5856159329414368, "learning_rate": 0.0001692425912390217, "loss": 1.7348, "step": 58030 }, { "epoch": 1.93, "grad_norm": 0.6090537905693054, "learning_rate": 0.00016923318425224196, "loss": 1.7624, "step": 58031 }, { "epoch": 1.93, "grad_norm": 0.6011387705802917, "learning_rate": 0.0001692237774241939, "loss": 1.777, "step": 58032 }, { "epoch": 1.93, "grad_norm": 0.5943302512168884, "learning_rate": 0.0001692143707548888, "loss": 1.686, "step": 58033 }, { "epoch": 1.93, "grad_norm": 0.6534861922264099, "learning_rate": 0.00016920496424433816, "loss": 1.8112, "step": 58034 }, { "epoch": 1.93, "grad_norm": 0.5982413291931152, "learning_rate": 0.00016919555789255366, "loss": 1.6767, "step": 58035 }, { "epoch": 1.93, "grad_norm": 0.5867146849632263, "learning_rate": 0.00016918615169954628, "loss": 1.7539, "step": 58036 }, { "epoch": 1.93, "grad_norm": 0.6049354672431946, "learning_rate": 0.0001691767456653276, "loss": 1.7014, "step": 58037 }, { "epoch": 1.93, "grad_norm": 0.5929229259490967, "learning_rate": 0.0001691673397899092, "loss": 1.7698, "step": 58038 }, { "epoch": 1.93, "grad_norm": 0.6147065162658691, "learning_rate": 0.00016915793407330237, "loss": 1.7653, "step": 58039 }, { "epoch": 1.93, "grad_norm": 0.6126933693885803, "learning_rate": 0.00016914852851551842, "loss": 1.7385, "step": 58040 }, { "epoch": 1.93, "grad_norm": 0.586064338684082, "learning_rate": 0.00016913912311656887, "loss": 1.7511, "step": 58041 }, { "epoch": 1.93, "grad_norm": 0.6014925241470337, "learning_rate": 0.0001691297178764654, "loss": 1.7256, "step": 58042 }, { "epoch": 1.93, "grad_norm": 0.5956340432167053, "learning_rate": 0.0001691203127952189, "loss": 1.7212, "step": 58043 }, { "epoch": 1.93, "grad_norm": 0.6515806913375854, "learning_rate": 0.00016911090787284104, "loss": 1.7199, "step": 58044 }, { "epoch": 1.93, "grad_norm": 0.6156630516052246, "learning_rate": 0.00016910150310934344, "loss": 1.7306, "step": 58045 }, { "epoch": 1.93, "grad_norm": 0.6136452555656433, "learning_rate": 0.00016909209850473727, "loss": 1.7744, "step": 58046 }, { "epoch": 1.93, "grad_norm": 0.5901323556900024, "learning_rate": 0.00016908269405903394, "loss": 1.6969, "step": 58047 }, { "epoch": 1.93, "grad_norm": 0.5796850919723511, "learning_rate": 0.000169073289772245, "loss": 1.6955, "step": 58048 }, { "epoch": 1.93, "grad_norm": 0.6205594539642334, "learning_rate": 0.0001690638856443818, "loss": 1.7726, "step": 58049 }, { "epoch": 1.93, "grad_norm": 0.6011289954185486, "learning_rate": 0.00016905448167545562, "loss": 1.7029, "step": 58050 }, { "epoch": 1.93, "grad_norm": 0.6319125294685364, "learning_rate": 0.00016904507786547805, "loss": 1.7853, "step": 58051 }, { "epoch": 1.93, "grad_norm": 0.6022225022315979, "learning_rate": 0.00016903567421446054, "loss": 1.7101, "step": 58052 }, { "epoch": 1.93, "grad_norm": 0.5998311042785645, "learning_rate": 0.00016902627072241445, "loss": 1.7486, "step": 58053 }, { "epoch": 1.93, "grad_norm": 0.6321349143981934, "learning_rate": 0.00016901686738935104, "loss": 1.746, "step": 58054 }, { "epoch": 1.93, "grad_norm": 0.636432945728302, "learning_rate": 0.00016900746421528195, "loss": 1.7177, "step": 58055 }, { "epoch": 1.93, "grad_norm": 0.5910929441452026, "learning_rate": 0.00016899806120021854, "loss": 1.7228, "step": 58056 }, { "epoch": 1.93, "grad_norm": 0.6062920093536377, "learning_rate": 0.00016898865834417205, "loss": 1.7155, "step": 58057 }, { "epoch": 1.93, "grad_norm": 0.5886253714561462, "learning_rate": 0.00016897925564715414, "loss": 1.7518, "step": 58058 }, { "epoch": 1.93, "grad_norm": 0.5855057239532471, "learning_rate": 0.00016896985310917598, "loss": 1.7346, "step": 58059 }, { "epoch": 1.93, "grad_norm": 0.6005046367645264, "learning_rate": 0.00016896045073024922, "loss": 1.7541, "step": 58060 }, { "epoch": 1.93, "grad_norm": 0.593064546585083, "learning_rate": 0.00016895104851038508, "loss": 1.7253, "step": 58061 }, { "epoch": 1.93, "grad_norm": 0.6299785375595093, "learning_rate": 0.00016894164644959517, "loss": 1.7519, "step": 58062 }, { "epoch": 1.93, "grad_norm": 0.608843982219696, "learning_rate": 0.00016893224454789078, "loss": 1.8022, "step": 58063 }, { "epoch": 1.93, "grad_norm": 0.6008834838867188, "learning_rate": 0.0001689228428052832, "loss": 1.7637, "step": 58064 }, { "epoch": 1.93, "grad_norm": 0.5634847283363342, "learning_rate": 0.0001689134412217841, "loss": 1.7317, "step": 58065 }, { "epoch": 1.93, "grad_norm": 0.601999819278717, "learning_rate": 0.00016890403979740468, "loss": 1.7369, "step": 58066 }, { "epoch": 1.93, "grad_norm": 0.642402172088623, "learning_rate": 0.00016889463853215654, "loss": 1.7636, "step": 58067 }, { "epoch": 1.93, "grad_norm": 0.6206887364387512, "learning_rate": 0.00016888523742605095, "loss": 1.787, "step": 58068 }, { "epoch": 1.93, "grad_norm": 0.6005672216415405, "learning_rate": 0.00016887583647909934, "loss": 1.654, "step": 58069 }, { "epoch": 1.93, "grad_norm": 0.5889714360237122, "learning_rate": 0.00016886643569131318, "loss": 1.7489, "step": 58070 }, { "epoch": 1.93, "grad_norm": 0.622143030166626, "learning_rate": 0.00016885703506270376, "loss": 1.6816, "step": 58071 }, { "epoch": 1.93, "grad_norm": 0.5845379829406738, "learning_rate": 0.00016884763459328267, "loss": 1.7275, "step": 58072 }, { "epoch": 1.93, "grad_norm": 0.5966262221336365, "learning_rate": 0.00016883823428306112, "loss": 1.7599, "step": 58073 }, { "epoch": 1.93, "grad_norm": 0.6085964441299438, "learning_rate": 0.00016882883413205075, "loss": 1.7912, "step": 58074 }, { "epoch": 1.93, "grad_norm": 0.5920599699020386, "learning_rate": 0.00016881943414026284, "loss": 1.7362, "step": 58075 }, { "epoch": 1.93, "grad_norm": 0.6088549494743347, "learning_rate": 0.00016881003430770872, "loss": 1.7632, "step": 58076 }, { "epoch": 1.93, "grad_norm": 0.5864195823669434, "learning_rate": 0.0001688006346344, "loss": 1.7789, "step": 58077 }, { "epoch": 1.93, "grad_norm": 0.5961506366729736, "learning_rate": 0.00016879123512034797, "loss": 1.7759, "step": 58078 }, { "epoch": 1.93, "grad_norm": 0.6045230031013489, "learning_rate": 0.00016878183576556392, "loss": 1.7448, "step": 58079 }, { "epoch": 1.93, "grad_norm": 0.6036795973777771, "learning_rate": 0.00016877243657005938, "loss": 1.7023, "step": 58080 }, { "epoch": 1.93, "grad_norm": 0.5789700150489807, "learning_rate": 0.0001687630375338459, "loss": 1.7736, "step": 58081 }, { "epoch": 1.93, "grad_norm": 0.6144149899482727, "learning_rate": 0.00016875363865693474, "loss": 1.6358, "step": 58082 }, { "epoch": 1.93, "grad_norm": 0.6157846450805664, "learning_rate": 0.00016874423993933722, "loss": 1.7574, "step": 58083 }, { "epoch": 1.93, "grad_norm": 0.6306575536727905, "learning_rate": 0.00016873484138106497, "loss": 1.758, "step": 58084 }, { "epoch": 1.93, "grad_norm": 0.5953798890113831, "learning_rate": 0.0001687254429821293, "loss": 1.768, "step": 58085 }, { "epoch": 1.93, "grad_norm": 0.5899891257286072, "learning_rate": 0.00016871604474254145, "loss": 1.6511, "step": 58086 }, { "epoch": 1.93, "grad_norm": 0.5858210921287537, "learning_rate": 0.000168706646662313, "loss": 1.7472, "step": 58087 }, { "epoch": 1.93, "grad_norm": 0.6002244353294373, "learning_rate": 0.00016869724874145556, "loss": 1.7428, "step": 58088 }, { "epoch": 1.93, "grad_norm": 0.6097825765609741, "learning_rate": 0.00016868785097998008, "loss": 1.7824, "step": 58089 }, { "epoch": 1.93, "grad_norm": 0.5780349373817444, "learning_rate": 0.0001686784533778982, "loss": 1.7242, "step": 58090 }, { "epoch": 1.93, "grad_norm": 0.5968165397644043, "learning_rate": 0.00016866905593522144, "loss": 1.7068, "step": 58091 }, { "epoch": 1.93, "grad_norm": 0.6140719652175903, "learning_rate": 0.00016865965865196108, "loss": 1.7529, "step": 58092 }, { "epoch": 1.93, "grad_norm": 0.5852494835853577, "learning_rate": 0.00016865026152812848, "loss": 1.6957, "step": 58093 }, { "epoch": 1.93, "grad_norm": 0.5861707329750061, "learning_rate": 0.00016864086456373507, "loss": 1.7356, "step": 58094 }, { "epoch": 1.93, "grad_norm": 0.5808255076408386, "learning_rate": 0.00016863146775879256, "loss": 1.6884, "step": 58095 }, { "epoch": 1.93, "grad_norm": 0.5946029424667358, "learning_rate": 0.00016862207111331183, "loss": 1.7085, "step": 58096 }, { "epoch": 1.93, "grad_norm": 0.6093810200691223, "learning_rate": 0.00016861267462730455, "loss": 1.8047, "step": 58097 }, { "epoch": 1.93, "grad_norm": 0.5887668132781982, "learning_rate": 0.00016860327830078224, "loss": 1.7101, "step": 58098 }, { "epoch": 1.93, "grad_norm": 0.6124067902565002, "learning_rate": 0.0001685938821337562, "loss": 1.7006, "step": 58099 }, { "epoch": 1.93, "grad_norm": 0.5963624715805054, "learning_rate": 0.0001685844861262377, "loss": 1.8275, "step": 58100 }, { "epoch": 1.93, "grad_norm": 0.5876996517181396, "learning_rate": 0.00016857509027823825, "loss": 1.6877, "step": 58101 }, { "epoch": 1.93, "grad_norm": 0.6073026657104492, "learning_rate": 0.0001685656945897694, "loss": 1.7011, "step": 58102 }, { "epoch": 1.93, "grad_norm": 0.5963665843009949, "learning_rate": 0.00016855629906084246, "loss": 1.7603, "step": 58103 }, { "epoch": 1.93, "grad_norm": 0.6145041584968567, "learning_rate": 0.0001685469036914687, "loss": 1.7076, "step": 58104 }, { "epoch": 1.93, "grad_norm": 0.6060776710510254, "learning_rate": 0.0001685375084816597, "loss": 1.8119, "step": 58105 }, { "epoch": 1.93, "grad_norm": 0.594473123550415, "learning_rate": 0.00016852811343142686, "loss": 1.7357, "step": 58106 }, { "epoch": 1.93, "grad_norm": 0.6088912487030029, "learning_rate": 0.00016851871854078133, "loss": 1.6622, "step": 58107 }, { "epoch": 1.93, "grad_norm": 0.6094327569007874, "learning_rate": 0.00016850932380973484, "loss": 1.6961, "step": 58108 }, { "epoch": 1.93, "grad_norm": 0.5954654216766357, "learning_rate": 0.00016849992923829855, "loss": 1.726, "step": 58109 }, { "epoch": 1.93, "grad_norm": 0.5893466472625732, "learning_rate": 0.00016849053482648408, "loss": 1.6706, "step": 58110 }, { "epoch": 1.93, "grad_norm": 0.6150326728820801, "learning_rate": 0.0001684811405743026, "loss": 1.7286, "step": 58111 }, { "epoch": 1.93, "grad_norm": 0.5901166200637817, "learning_rate": 0.00016847174648176576, "loss": 1.6869, "step": 58112 }, { "epoch": 1.93, "grad_norm": 0.5883081555366516, "learning_rate": 0.00016846235254888486, "loss": 1.7557, "step": 58113 }, { "epoch": 1.93, "grad_norm": 0.6064667105674744, "learning_rate": 0.00016845295877567117, "loss": 1.7606, "step": 58114 }, { "epoch": 1.93, "grad_norm": 0.6437761187553406, "learning_rate": 0.0001684435651621363, "loss": 1.7822, "step": 58115 }, { "epoch": 1.93, "grad_norm": 0.5699495077133179, "learning_rate": 0.00016843417170829143, "loss": 1.6683, "step": 58116 }, { "epoch": 1.93, "grad_norm": 0.6139613389968872, "learning_rate": 0.00016842477841414824, "loss": 1.7866, "step": 58117 }, { "epoch": 1.93, "grad_norm": 0.6107682585716248, "learning_rate": 0.00016841538527971795, "loss": 1.7606, "step": 58118 }, { "epoch": 1.93, "grad_norm": 0.5935943722724915, "learning_rate": 0.0001684059923050119, "loss": 1.7333, "step": 58119 }, { "epoch": 1.93, "grad_norm": 0.5862687230110168, "learning_rate": 0.00016839659949004167, "loss": 1.7247, "step": 58120 }, { "epoch": 1.93, "grad_norm": 0.5983774662017822, "learning_rate": 0.00016838720683481852, "loss": 1.7385, "step": 58121 }, { "epoch": 1.93, "grad_norm": 0.5770798325538635, "learning_rate": 0.00016837781433935394, "loss": 1.715, "step": 58122 }, { "epoch": 1.93, "grad_norm": 0.5933457612991333, "learning_rate": 0.00016836842200365927, "loss": 1.6985, "step": 58123 }, { "epoch": 1.93, "grad_norm": 0.5976809859275818, "learning_rate": 0.00016835902982774602, "loss": 1.6773, "step": 58124 }, { "epoch": 1.93, "grad_norm": 0.6099990606307983, "learning_rate": 0.0001683496378116255, "loss": 1.7141, "step": 58125 }, { "epoch": 1.93, "grad_norm": 0.5735923647880554, "learning_rate": 0.00016834024595530902, "loss": 1.7451, "step": 58126 }, { "epoch": 1.93, "grad_norm": 0.5798499584197998, "learning_rate": 0.00016833085425880818, "loss": 1.7346, "step": 58127 }, { "epoch": 1.93, "grad_norm": 0.6034507751464844, "learning_rate": 0.00016832146272213432, "loss": 1.6841, "step": 58128 }, { "epoch": 1.93, "grad_norm": 0.5945076942443848, "learning_rate": 0.00016831207134529864, "loss": 1.6642, "step": 58129 }, { "epoch": 1.93, "grad_norm": 0.5910331010818481, "learning_rate": 0.00016830268012831275, "loss": 1.7302, "step": 58130 }, { "epoch": 1.93, "grad_norm": 0.5907127857208252, "learning_rate": 0.0001682932890711881, "loss": 1.7107, "step": 58131 }, { "epoch": 1.93, "grad_norm": 0.5964946746826172, "learning_rate": 0.00016828389817393597, "loss": 1.7607, "step": 58132 }, { "epoch": 1.93, "grad_norm": 0.6009799838066101, "learning_rate": 0.0001682745074365677, "loss": 1.6943, "step": 58133 }, { "epoch": 1.93, "grad_norm": 0.6153888702392578, "learning_rate": 0.00016826511685909487, "loss": 1.7127, "step": 58134 }, { "epoch": 1.93, "grad_norm": 0.6009283661842346, "learning_rate": 0.00016825572644152878, "loss": 1.6845, "step": 58135 }, { "epoch": 1.93, "grad_norm": 0.6044597029685974, "learning_rate": 0.0001682463361838807, "loss": 1.7314, "step": 58136 }, { "epoch": 1.93, "grad_norm": 0.6151572465896606, "learning_rate": 0.00016823694608616215, "loss": 1.7125, "step": 58137 }, { "epoch": 1.93, "grad_norm": 0.5854911804199219, "learning_rate": 0.00016822755614838463, "loss": 1.7117, "step": 58138 }, { "epoch": 1.93, "grad_norm": 0.594418466091156, "learning_rate": 0.00016821816637055948, "loss": 1.7341, "step": 58139 }, { "epoch": 1.93, "grad_norm": 0.5927445292472839, "learning_rate": 0.00016820877675269793, "loss": 1.8224, "step": 58140 }, { "epoch": 1.93, "grad_norm": 0.6028257608413696, "learning_rate": 0.00016819938729481163, "loss": 1.7776, "step": 58141 }, { "epoch": 1.93, "grad_norm": 0.6130520105361938, "learning_rate": 0.00016818999799691185, "loss": 1.6902, "step": 58142 }, { "epoch": 1.93, "grad_norm": 0.5938624739646912, "learning_rate": 0.00016818060885900985, "loss": 1.8199, "step": 58143 }, { "epoch": 1.93, "grad_norm": 0.5855697989463806, "learning_rate": 0.00016817121988111725, "loss": 1.7331, "step": 58144 }, { "epoch": 1.93, "grad_norm": 2.5069589614868164, "learning_rate": 0.00016816183106324552, "loss": 1.7818, "step": 58145 }, { "epoch": 1.93, "grad_norm": 0.6097783446311951, "learning_rate": 0.00016815244240540568, "loss": 1.6847, "step": 58146 }, { "epoch": 1.93, "grad_norm": 0.6053882837295532, "learning_rate": 0.00016814305390760935, "loss": 1.7555, "step": 58147 }, { "epoch": 1.93, "grad_norm": 0.597714900970459, "learning_rate": 0.00016813366556986802, "loss": 1.7689, "step": 58148 }, { "epoch": 1.93, "grad_norm": 0.576012909412384, "learning_rate": 0.00016812427739219303, "loss": 1.73, "step": 58149 }, { "epoch": 1.93, "grad_norm": 0.5939261317253113, "learning_rate": 0.0001681148893745956, "loss": 1.6945, "step": 58150 }, { "epoch": 1.93, "grad_norm": 0.5909332633018494, "learning_rate": 0.0001681055015170873, "loss": 1.7137, "step": 58151 }, { "epoch": 1.93, "grad_norm": 0.6356024742126465, "learning_rate": 0.00016809611381967969, "loss": 1.7208, "step": 58152 }, { "epoch": 1.93, "grad_norm": 0.5917854905128479, "learning_rate": 0.00016808672628238372, "loss": 1.7183, "step": 58153 }, { "epoch": 1.93, "grad_norm": 0.6282084584236145, "learning_rate": 0.00016807733890521103, "loss": 1.6926, "step": 58154 }, { "epoch": 1.93, "grad_norm": 0.590305507183075, "learning_rate": 0.0001680679516881731, "loss": 1.7328, "step": 58155 }, { "epoch": 1.93, "grad_norm": 0.5982357859611511, "learning_rate": 0.0001680585646312813, "loss": 1.7394, "step": 58156 }, { "epoch": 1.93, "grad_norm": 0.5942020416259766, "learning_rate": 0.0001680491777345468, "loss": 1.672, "step": 58157 }, { "epoch": 1.93, "grad_norm": 0.5976910591125488, "learning_rate": 0.00016803979099798118, "loss": 1.7276, "step": 58158 }, { "epoch": 1.93, "grad_norm": 0.61982262134552, "learning_rate": 0.00016803040442159607, "loss": 1.7046, "step": 58159 }, { "epoch": 1.93, "grad_norm": 0.5779327750205994, "learning_rate": 0.00016802101800540231, "loss": 1.7466, "step": 58160 }, { "epoch": 1.94, "grad_norm": 0.582404375076294, "learning_rate": 0.00016801163174941163, "loss": 1.692, "step": 58161 }, { "epoch": 1.94, "grad_norm": 0.576524019241333, "learning_rate": 0.00016800224565363548, "loss": 1.7041, "step": 58162 }, { "epoch": 1.94, "grad_norm": 0.5739064812660217, "learning_rate": 0.00016799285971808516, "loss": 1.704, "step": 58163 }, { "epoch": 1.94, "grad_norm": 0.6008076667785645, "learning_rate": 0.00016798347394277196, "loss": 1.7729, "step": 58164 }, { "epoch": 1.94, "grad_norm": 0.604681670665741, "learning_rate": 0.00016797408832770744, "loss": 1.6774, "step": 58165 }, { "epoch": 1.94, "grad_norm": 0.5861013531684875, "learning_rate": 0.00016796470287290297, "loss": 1.7061, "step": 58166 }, { "epoch": 1.94, "grad_norm": 0.5802745223045349, "learning_rate": 0.00016795531757836978, "loss": 1.692, "step": 58167 }, { "epoch": 1.94, "grad_norm": 0.586018979549408, "learning_rate": 0.00016794593244411936, "loss": 1.6901, "step": 58168 }, { "epoch": 1.94, "grad_norm": 0.6124733090400696, "learning_rate": 0.0001679365474701632, "loss": 1.7441, "step": 58169 }, { "epoch": 1.94, "grad_norm": 0.6153123378753662, "learning_rate": 0.00016792716265651264, "loss": 1.7959, "step": 58170 }, { "epoch": 1.94, "grad_norm": 0.5856764912605286, "learning_rate": 0.00016791777800317898, "loss": 1.761, "step": 58171 }, { "epoch": 1.94, "grad_norm": 0.6162322163581848, "learning_rate": 0.00016790839351017375, "loss": 1.7691, "step": 58172 }, { "epoch": 1.94, "grad_norm": 0.5874679684638977, "learning_rate": 0.00016789900917750825, "loss": 1.6899, "step": 58173 }, { "epoch": 1.94, "grad_norm": 0.5808966755867004, "learning_rate": 0.00016788962500519382, "loss": 1.7524, "step": 58174 }, { "epoch": 1.94, "grad_norm": 0.6000489592552185, "learning_rate": 0.000167880240993242, "loss": 1.7539, "step": 58175 }, { "epoch": 1.94, "grad_norm": 0.5984749794006348, "learning_rate": 0.000167870857141664, "loss": 1.7108, "step": 58176 }, { "epoch": 1.94, "grad_norm": 0.5957679152488708, "learning_rate": 0.00016786147345047137, "loss": 1.7488, "step": 58177 }, { "epoch": 1.94, "grad_norm": 0.6177102327346802, "learning_rate": 0.0001678520899196754, "loss": 1.8068, "step": 58178 }, { "epoch": 1.94, "grad_norm": 0.5998175144195557, "learning_rate": 0.0001678427065492876, "loss": 1.6891, "step": 58179 }, { "epoch": 1.94, "grad_norm": 0.6024454832077026, "learning_rate": 0.00016783332333931929, "loss": 1.755, "step": 58180 }, { "epoch": 1.94, "grad_norm": 0.5929647088050842, "learning_rate": 0.00016782394028978174, "loss": 1.7526, "step": 58181 }, { "epoch": 1.94, "grad_norm": 0.600823700428009, "learning_rate": 0.00016781455740068658, "loss": 1.7709, "step": 58182 }, { "epoch": 1.94, "grad_norm": 0.5987376570701599, "learning_rate": 0.00016780517467204496, "loss": 1.7382, "step": 58183 }, { "epoch": 1.94, "grad_norm": 0.5999256372451782, "learning_rate": 0.00016779579210386846, "loss": 1.7675, "step": 58184 }, { "epoch": 1.94, "grad_norm": 0.592894434928894, "learning_rate": 0.00016778640969616838, "loss": 1.7692, "step": 58185 }, { "epoch": 1.94, "grad_norm": 0.5758771300315857, "learning_rate": 0.00016777702744895603, "loss": 1.7219, "step": 58186 }, { "epoch": 1.94, "grad_norm": 0.6122571229934692, "learning_rate": 0.000167767645362243, "loss": 1.709, "step": 58187 }, { "epoch": 1.94, "grad_norm": 0.5934750437736511, "learning_rate": 0.00016775826343604045, "loss": 1.7503, "step": 58188 }, { "epoch": 1.94, "grad_norm": 0.6095139980316162, "learning_rate": 0.00016774888167036, "loss": 1.7149, "step": 58189 }, { "epoch": 1.94, "grad_norm": 0.5785990953445435, "learning_rate": 0.00016773950006521282, "loss": 1.7393, "step": 58190 }, { "epoch": 1.94, "grad_norm": 0.6058868169784546, "learning_rate": 0.0001677301186206105, "loss": 1.782, "step": 58191 }, { "epoch": 1.94, "grad_norm": 0.588152289390564, "learning_rate": 0.00016772073733656434, "loss": 1.7767, "step": 58192 }, { "epoch": 1.94, "grad_norm": 0.612800657749176, "learning_rate": 0.00016771135621308558, "loss": 1.652, "step": 58193 }, { "epoch": 1.94, "grad_norm": 0.6021871566772461, "learning_rate": 0.00016770197525018584, "loss": 1.7653, "step": 58194 }, { "epoch": 1.94, "grad_norm": 0.6045895218849182, "learning_rate": 0.00016769259444787644, "loss": 1.7644, "step": 58195 }, { "epoch": 1.94, "grad_norm": 0.5844131708145142, "learning_rate": 0.00016768321380616864, "loss": 1.7848, "step": 58196 }, { "epoch": 1.94, "grad_norm": 0.5984638333320618, "learning_rate": 0.00016767383332507387, "loss": 1.7554, "step": 58197 }, { "epoch": 1.94, "grad_norm": 0.5934352278709412, "learning_rate": 0.0001676644530046037, "loss": 1.7208, "step": 58198 }, { "epoch": 1.94, "grad_norm": 0.5942202806472778, "learning_rate": 0.00016765507284476942, "loss": 1.6773, "step": 58199 }, { "epoch": 1.94, "grad_norm": 0.5875250101089478, "learning_rate": 0.00016764569284558224, "loss": 1.7086, "step": 58200 }, { "epoch": 1.94, "grad_norm": 0.5873530507087708, "learning_rate": 0.0001676363130070538, "loss": 1.7725, "step": 58201 }, { "epoch": 1.94, "grad_norm": 0.5986053347587585, "learning_rate": 0.00016762693332919535, "loss": 1.7182, "step": 58202 }, { "epoch": 1.94, "grad_norm": 0.5793585777282715, "learning_rate": 0.0001676175538120182, "loss": 1.667, "step": 58203 }, { "epoch": 1.94, "grad_norm": 0.6055647730827332, "learning_rate": 0.00016760817445553384, "loss": 1.7426, "step": 58204 }, { "epoch": 1.94, "grad_norm": 0.6112313866615295, "learning_rate": 0.00016759879525975391, "loss": 1.6723, "step": 58205 }, { "epoch": 1.94, "grad_norm": 0.6028800010681152, "learning_rate": 0.0001675894162246893, "loss": 1.71, "step": 58206 }, { "epoch": 1.94, "grad_norm": 0.5798163414001465, "learning_rate": 0.0001675800373503516, "loss": 1.7141, "step": 58207 }, { "epoch": 1.94, "grad_norm": 0.5798397660255432, "learning_rate": 0.0001675706586367523, "loss": 1.7935, "step": 58208 }, { "epoch": 1.94, "grad_norm": 0.6047534942626953, "learning_rate": 0.00016756128008390278, "loss": 1.7091, "step": 58209 }, { "epoch": 1.94, "grad_norm": 0.591087818145752, "learning_rate": 0.0001675519016918142, "loss": 1.7454, "step": 58210 }, { "epoch": 1.94, "grad_norm": 0.6068152785301208, "learning_rate": 0.0001675425234604981, "loss": 1.7946, "step": 58211 }, { "epoch": 1.94, "grad_norm": 0.6057793498039246, "learning_rate": 0.000167533145389966, "loss": 1.7033, "step": 58212 }, { "epoch": 1.94, "grad_norm": 0.5833024978637695, "learning_rate": 0.0001675237674802291, "loss": 1.7633, "step": 58213 }, { "epoch": 1.94, "grad_norm": 0.6018728613853455, "learning_rate": 0.00016751438973129876, "loss": 1.7349, "step": 58214 }, { "epoch": 1.94, "grad_norm": 0.5988460779190063, "learning_rate": 0.00016750501214318652, "loss": 1.7284, "step": 58215 }, { "epoch": 1.94, "grad_norm": 0.5886886119842529, "learning_rate": 0.00016749563471590365, "loss": 1.7394, "step": 58216 }, { "epoch": 1.94, "grad_norm": 0.61855548620224, "learning_rate": 0.00016748625744946144, "loss": 1.7105, "step": 58217 }, { "epoch": 1.94, "grad_norm": 0.5983774662017822, "learning_rate": 0.00016747688034387143, "loss": 1.6989, "step": 58218 }, { "epoch": 1.94, "grad_norm": 0.5957253575325012, "learning_rate": 0.00016746750339914507, "loss": 1.7243, "step": 58219 }, { "epoch": 1.94, "grad_norm": 0.6076456904411316, "learning_rate": 0.00016745812661529363, "loss": 1.6469, "step": 58220 }, { "epoch": 1.94, "grad_norm": 0.6141288876533508, "learning_rate": 0.00016744874999232836, "loss": 1.7842, "step": 58221 }, { "epoch": 1.94, "grad_norm": 0.6104076504707336, "learning_rate": 0.00016743937353026092, "loss": 1.743, "step": 58222 }, { "epoch": 1.94, "grad_norm": 0.5798232555389404, "learning_rate": 0.00016742999722910252, "loss": 1.7571, "step": 58223 }, { "epoch": 1.94, "grad_norm": 0.5975107550621033, "learning_rate": 0.00016742062108886447, "loss": 1.7798, "step": 58224 }, { "epoch": 1.94, "grad_norm": 0.5827193260192871, "learning_rate": 0.00016741124510955837, "loss": 1.6262, "step": 58225 }, { "epoch": 1.94, "grad_norm": 0.5880056619644165, "learning_rate": 0.00016740186929119536, "loss": 1.7174, "step": 58226 }, { "epoch": 1.94, "grad_norm": 0.6245437264442444, "learning_rate": 0.00016739249363378707, "loss": 1.6995, "step": 58227 }, { "epoch": 1.94, "grad_norm": 0.6194352507591248, "learning_rate": 0.00016738311813734459, "loss": 1.7221, "step": 58228 }, { "epoch": 1.94, "grad_norm": 0.5799506306648254, "learning_rate": 0.00016737374280187964, "loss": 1.761, "step": 58229 }, { "epoch": 1.94, "grad_norm": 0.5836091041564941, "learning_rate": 0.0001673643676274034, "loss": 1.72, "step": 58230 }, { "epoch": 1.94, "grad_norm": 0.5861541032791138, "learning_rate": 0.00016735499261392715, "loss": 1.7025, "step": 58231 }, { "epoch": 1.94, "grad_norm": 0.6037291884422302, "learning_rate": 0.00016734561776146257, "loss": 1.7931, "step": 58232 }, { "epoch": 1.94, "grad_norm": 0.6095672249794006, "learning_rate": 0.00016733624307002065, "loss": 1.7694, "step": 58233 }, { "epoch": 1.94, "grad_norm": 0.6025161147117615, "learning_rate": 0.00016732686853961315, "loss": 1.7228, "step": 58234 }, { "epoch": 1.94, "grad_norm": 0.6012619137763977, "learning_rate": 0.00016731749417025132, "loss": 1.6956, "step": 58235 }, { "epoch": 1.94, "grad_norm": 0.577451765537262, "learning_rate": 0.00016730811996194632, "loss": 1.6967, "step": 58236 }, { "epoch": 1.94, "grad_norm": 0.6350842118263245, "learning_rate": 0.00016729874591470985, "loss": 1.735, "step": 58237 }, { "epoch": 1.94, "grad_norm": 0.6415469646453857, "learning_rate": 0.00016728937202855303, "loss": 1.7003, "step": 58238 }, { "epoch": 1.94, "grad_norm": 0.6124021410942078, "learning_rate": 0.0001672799983034875, "loss": 1.7615, "step": 58239 }, { "epoch": 1.94, "grad_norm": 0.5994206070899963, "learning_rate": 0.0001672706247395244, "loss": 1.7067, "step": 58240 }, { "epoch": 1.94, "grad_norm": 0.600063681602478, "learning_rate": 0.00016726125133667525, "loss": 1.7075, "step": 58241 }, { "epoch": 1.94, "grad_norm": 0.598653256893158, "learning_rate": 0.00016725187809495145, "loss": 1.7971, "step": 58242 }, { "epoch": 1.94, "grad_norm": 0.6017600297927856, "learning_rate": 0.00016724250501436416, "loss": 1.698, "step": 58243 }, { "epoch": 1.94, "grad_norm": 0.5996886491775513, "learning_rate": 0.00016723313209492503, "loss": 1.6998, "step": 58244 }, { "epoch": 1.94, "grad_norm": 0.5989423990249634, "learning_rate": 0.0001672237593366453, "loss": 1.7414, "step": 58245 }, { "epoch": 1.94, "grad_norm": 0.6132335662841797, "learning_rate": 0.00016721438673953623, "loss": 1.7238, "step": 58246 }, { "epoch": 1.94, "grad_norm": 0.608579695224762, "learning_rate": 0.00016720501430360937, "loss": 1.7576, "step": 58247 }, { "epoch": 1.94, "grad_norm": 0.5864393711090088, "learning_rate": 0.0001671956420288762, "loss": 1.685, "step": 58248 }, { "epoch": 1.94, "grad_norm": 0.6000611186027527, "learning_rate": 0.00016718626991534788, "loss": 1.655, "step": 58249 }, { "epoch": 1.94, "grad_norm": 0.5931009650230408, "learning_rate": 0.0001671768979630358, "loss": 1.7434, "step": 58250 }, { "epoch": 1.94, "grad_norm": 0.598408043384552, "learning_rate": 0.0001671675261719515, "loss": 1.7556, "step": 58251 }, { "epoch": 1.94, "grad_norm": 0.5868601202964783, "learning_rate": 0.0001671581545421062, "loss": 1.6939, "step": 58252 }, { "epoch": 1.94, "grad_norm": 0.7234808802604675, "learning_rate": 0.00016714878307351125, "loss": 1.6584, "step": 58253 }, { "epoch": 1.94, "grad_norm": 0.6097123622894287, "learning_rate": 0.0001671394117661781, "loss": 1.7475, "step": 58254 }, { "epoch": 1.94, "grad_norm": 0.5687872171401978, "learning_rate": 0.00016713004062011828, "loss": 1.7132, "step": 58255 }, { "epoch": 1.94, "grad_norm": 0.614915132522583, "learning_rate": 0.00016712066963534298, "loss": 1.7579, "step": 58256 }, { "epoch": 1.94, "grad_norm": 0.6049394607543945, "learning_rate": 0.0001671112988118635, "loss": 1.7039, "step": 58257 }, { "epoch": 1.94, "grad_norm": 0.5911660194396973, "learning_rate": 0.00016710192814969142, "loss": 1.7662, "step": 58258 }, { "epoch": 1.94, "grad_norm": 0.6536353826522827, "learning_rate": 0.00016709255764883803, "loss": 1.8163, "step": 58259 }, { "epoch": 1.94, "grad_norm": 0.6538627743721008, "learning_rate": 0.00016708318730931457, "loss": 1.7509, "step": 58260 }, { "epoch": 1.94, "grad_norm": 0.6063726544380188, "learning_rate": 0.00016707381713113254, "loss": 1.719, "step": 58261 }, { "epoch": 1.94, "grad_norm": 0.5943247675895691, "learning_rate": 0.00016706444711430353, "loss": 1.6499, "step": 58262 }, { "epoch": 1.94, "grad_norm": 0.614595353603363, "learning_rate": 0.00016705507725883848, "loss": 1.7773, "step": 58263 }, { "epoch": 1.94, "grad_norm": 0.5859324336051941, "learning_rate": 0.000167045707564749, "loss": 1.734, "step": 58264 }, { "epoch": 1.94, "grad_norm": 0.5833689570426941, "learning_rate": 0.0001670363380320465, "loss": 1.6559, "step": 58265 }, { "epoch": 1.94, "grad_norm": 0.6119612455368042, "learning_rate": 0.00016702696866074233, "loss": 1.7313, "step": 58266 }, { "epoch": 1.94, "grad_norm": 0.6047775149345398, "learning_rate": 0.00016701759945084773, "loss": 1.7398, "step": 58267 }, { "epoch": 1.94, "grad_norm": 0.592353105545044, "learning_rate": 0.00016700823040237414, "loss": 1.697, "step": 58268 }, { "epoch": 1.94, "grad_norm": 0.5781882405281067, "learning_rate": 0.0001669988615153332, "loss": 1.7607, "step": 58269 }, { "epoch": 1.94, "grad_norm": 0.6127947568893433, "learning_rate": 0.00016698949278973579, "loss": 1.7513, "step": 58270 }, { "epoch": 1.94, "grad_norm": 0.6038485765457153, "learning_rate": 0.0001669801242255936, "loss": 1.7148, "step": 58271 }, { "epoch": 1.94, "grad_norm": 0.5815765261650085, "learning_rate": 0.000166970755822918, "loss": 1.7726, "step": 58272 }, { "epoch": 1.94, "grad_norm": 0.5869405269622803, "learning_rate": 0.0001669613875817203, "loss": 1.7153, "step": 58273 }, { "epoch": 1.94, "grad_norm": 0.6012016534805298, "learning_rate": 0.0001669520195020118, "loss": 1.7192, "step": 58274 }, { "epoch": 1.94, "grad_norm": 0.6287211775779724, "learning_rate": 0.00016694265158380392, "loss": 1.7048, "step": 58275 }, { "epoch": 1.94, "grad_norm": 0.5981878638267517, "learning_rate": 0.0001669332838271083, "loss": 1.6961, "step": 58276 }, { "epoch": 1.94, "grad_norm": 0.6176261901855469, "learning_rate": 0.0001669239162319358, "loss": 1.7406, "step": 58277 }, { "epoch": 1.94, "grad_norm": 0.6220760941505432, "learning_rate": 0.0001669145487982981, "loss": 1.794, "step": 58278 }, { "epoch": 1.94, "grad_norm": 0.6222420334815979, "learning_rate": 0.00016690518152620663, "loss": 1.7505, "step": 58279 }, { "epoch": 1.94, "grad_norm": 0.6090993881225586, "learning_rate": 0.00016689581441567264, "loss": 1.7423, "step": 58280 }, { "epoch": 1.94, "grad_norm": 0.8328428864479065, "learning_rate": 0.00016688644746670742, "loss": 1.6543, "step": 58281 }, { "epoch": 1.94, "grad_norm": 0.6258827447891235, "learning_rate": 0.00016687708067932255, "loss": 1.7883, "step": 58282 }, { "epoch": 1.94, "grad_norm": 0.5988135933876038, "learning_rate": 0.00016686771405352932, "loss": 1.754, "step": 58283 }, { "epoch": 1.94, "grad_norm": 0.595806896686554, "learning_rate": 0.00016685834758933888, "loss": 1.7223, "step": 58284 }, { "epoch": 1.94, "grad_norm": 0.6129022836685181, "learning_rate": 0.00016684898128676285, "loss": 1.759, "step": 58285 }, { "epoch": 1.94, "grad_norm": 0.5887205600738525, "learning_rate": 0.00016683961514581269, "loss": 1.7217, "step": 58286 }, { "epoch": 1.94, "grad_norm": 0.6123367547988892, "learning_rate": 0.00016683024916649953, "loss": 1.7367, "step": 58287 }, { "epoch": 1.94, "grad_norm": 0.599850058555603, "learning_rate": 0.00016682088334883473, "loss": 1.7322, "step": 58288 }, { "epoch": 1.94, "grad_norm": 0.580754816532135, "learning_rate": 0.0001668115176928299, "loss": 1.741, "step": 58289 }, { "epoch": 1.94, "grad_norm": 0.5975124835968018, "learning_rate": 0.00016680215219849626, "loss": 1.6399, "step": 58290 }, { "epoch": 1.94, "grad_norm": 0.5982988476753235, "learning_rate": 0.00016679278686584502, "loss": 1.75, "step": 58291 }, { "epoch": 1.94, "grad_norm": 1.1150983572006226, "learning_rate": 0.0001667834216948879, "loss": 1.7529, "step": 58292 }, { "epoch": 1.94, "grad_norm": 0.59731525182724, "learning_rate": 0.0001667740566856359, "loss": 1.7384, "step": 58293 }, { "epoch": 1.94, "grad_norm": 2.0943892002105713, "learning_rate": 0.00016676469183810072, "loss": 1.7761, "step": 58294 }, { "epoch": 1.94, "grad_norm": 0.5896283984184265, "learning_rate": 0.00016675532715229344, "loss": 1.7023, "step": 58295 }, { "epoch": 1.94, "grad_norm": 0.6296567320823669, "learning_rate": 0.00016674596262822568, "loss": 1.7234, "step": 58296 }, { "epoch": 1.94, "grad_norm": 0.6028828024864197, "learning_rate": 0.00016673659826590867, "loss": 1.7559, "step": 58297 }, { "epoch": 1.94, "grad_norm": 0.5876752138137817, "learning_rate": 0.0001667272340653537, "loss": 1.7398, "step": 58298 }, { "epoch": 1.94, "grad_norm": 0.6161116361618042, "learning_rate": 0.00016671787002657233, "loss": 1.7624, "step": 58299 }, { "epoch": 1.94, "grad_norm": 2.610138416290283, "learning_rate": 0.0001667085061495757, "loss": 1.6599, "step": 58300 }, { "epoch": 1.94, "grad_norm": 0.6311700940132141, "learning_rate": 0.00016669914243437546, "loss": 1.7476, "step": 58301 }, { "epoch": 1.94, "grad_norm": 0.592970073223114, "learning_rate": 0.00016668977888098282, "loss": 1.6914, "step": 58302 }, { "epoch": 1.94, "grad_norm": 0.6238303184509277, "learning_rate": 0.00016668041548940902, "loss": 1.7547, "step": 58303 }, { "epoch": 1.94, "grad_norm": 0.6142381429672241, "learning_rate": 0.00016667105225966566, "loss": 1.7215, "step": 58304 }, { "epoch": 1.94, "grad_norm": 0.5817909240722656, "learning_rate": 0.00016666168919176388, "loss": 1.7038, "step": 58305 }, { "epoch": 1.94, "grad_norm": 0.5971181988716125, "learning_rate": 0.0001666523262857153, "loss": 1.7693, "step": 58306 }, { "epoch": 1.94, "grad_norm": 0.5911939144134521, "learning_rate": 0.00016664296354153098, "loss": 1.7212, "step": 58307 }, { "epoch": 1.94, "grad_norm": 0.5858710408210754, "learning_rate": 0.00016663360095922259, "loss": 1.7147, "step": 58308 }, { "epoch": 1.94, "grad_norm": 0.5863339304924011, "learning_rate": 0.00016662423853880135, "loss": 1.6628, "step": 58309 }, { "epoch": 1.94, "grad_norm": 0.5857802629470825, "learning_rate": 0.00016661487628027855, "loss": 1.7574, "step": 58310 }, { "epoch": 1.94, "grad_norm": 0.6009950041770935, "learning_rate": 0.00016660551418366576, "loss": 1.7362, "step": 58311 }, { "epoch": 1.94, "grad_norm": 0.6006565690040588, "learning_rate": 0.00016659615224897415, "loss": 1.6984, "step": 58312 }, { "epoch": 1.94, "grad_norm": 0.600865364074707, "learning_rate": 0.00016658679047621512, "loss": 1.7646, "step": 58313 }, { "epoch": 1.94, "grad_norm": 0.5836856365203857, "learning_rate": 0.00016657742886540004, "loss": 1.7508, "step": 58314 }, { "epoch": 1.94, "grad_norm": 0.5970569849014282, "learning_rate": 0.00016656806741654042, "loss": 1.712, "step": 58315 }, { "epoch": 1.94, "grad_norm": 0.567976176738739, "learning_rate": 0.0001665587061296475, "loss": 1.6623, "step": 58316 }, { "epoch": 1.94, "grad_norm": 0.6196128129959106, "learning_rate": 0.00016654934500473254, "loss": 1.6499, "step": 58317 }, { "epoch": 1.94, "grad_norm": 0.5855448246002197, "learning_rate": 0.00016653998404180713, "loss": 1.735, "step": 58318 }, { "epoch": 1.94, "grad_norm": 0.598443329334259, "learning_rate": 0.00016653062324088248, "loss": 1.7132, "step": 58319 }, { "epoch": 1.94, "grad_norm": 0.5923877954483032, "learning_rate": 0.00016652126260196991, "loss": 1.7119, "step": 58320 }, { "epoch": 1.94, "grad_norm": 0.595719039440155, "learning_rate": 0.00016651190212508085, "loss": 1.6979, "step": 58321 }, { "epoch": 1.94, "grad_norm": 0.6063315868377686, "learning_rate": 0.0001665025418102269, "loss": 1.7295, "step": 58322 }, { "epoch": 1.94, "grad_norm": 0.58650141954422, "learning_rate": 0.00016649318165741896, "loss": 1.6513, "step": 58323 }, { "epoch": 1.94, "grad_norm": 0.6228632926940918, "learning_rate": 0.00016648382166666864, "loss": 1.735, "step": 58324 }, { "epoch": 1.94, "grad_norm": 0.6027542948722839, "learning_rate": 0.00016647446183798737, "loss": 1.7426, "step": 58325 }, { "epoch": 1.94, "grad_norm": 0.605077862739563, "learning_rate": 0.00016646510217138648, "loss": 1.6616, "step": 58326 }, { "epoch": 1.94, "grad_norm": 0.6064828038215637, "learning_rate": 0.00016645574266687714, "loss": 1.7511, "step": 58327 }, { "epoch": 1.94, "grad_norm": 0.5739050507545471, "learning_rate": 0.00016644638332447085, "loss": 1.6156, "step": 58328 }, { "epoch": 1.94, "grad_norm": 0.5930579304695129, "learning_rate": 0.00016643702414417908, "loss": 1.6747, "step": 58329 }, { "epoch": 1.94, "grad_norm": 0.5880841612815857, "learning_rate": 0.00016642766512601307, "loss": 1.7174, "step": 58330 }, { "epoch": 1.94, "grad_norm": 0.5919061303138733, "learning_rate": 0.00016641830626998414, "loss": 1.762, "step": 58331 }, { "epoch": 1.94, "grad_norm": 0.5960403680801392, "learning_rate": 0.00016640894757610373, "loss": 1.6943, "step": 58332 }, { "epoch": 1.94, "grad_norm": 0.5758919715881348, "learning_rate": 0.00016639958904438325, "loss": 1.6557, "step": 58333 }, { "epoch": 1.94, "grad_norm": 0.6089361310005188, "learning_rate": 0.00016639023067483382, "loss": 1.6848, "step": 58334 }, { "epoch": 1.94, "grad_norm": 0.5707613229751587, "learning_rate": 0.000166380872467467, "loss": 1.7495, "step": 58335 }, { "epoch": 1.94, "grad_norm": 0.6188065409660339, "learning_rate": 0.00016637151442229421, "loss": 1.7592, "step": 58336 }, { "epoch": 1.94, "grad_norm": 0.5961630344390869, "learning_rate": 0.0001663621565393267, "loss": 1.7351, "step": 58337 }, { "epoch": 1.94, "grad_norm": 0.6068735718727112, "learning_rate": 0.00016635279881857575, "loss": 1.8032, "step": 58338 }, { "epoch": 1.94, "grad_norm": 0.6080620288848877, "learning_rate": 0.00016634344126005294, "loss": 1.7107, "step": 58339 }, { "epoch": 1.94, "grad_norm": 0.5973990559577942, "learning_rate": 0.00016633408386376947, "loss": 1.6912, "step": 58340 }, { "epoch": 1.94, "grad_norm": 0.6152263283729553, "learning_rate": 0.00016632472662973658, "loss": 1.7544, "step": 58341 }, { "epoch": 1.94, "grad_norm": 0.6219906806945801, "learning_rate": 0.00016631536955796598, "loss": 1.7244, "step": 58342 }, { "epoch": 1.94, "grad_norm": 0.6101881265640259, "learning_rate": 0.00016630601264846863, "loss": 1.6838, "step": 58343 }, { "epoch": 1.94, "grad_norm": 0.6021406054496765, "learning_rate": 0.00016629665590125624, "loss": 1.6708, "step": 58344 }, { "epoch": 1.94, "grad_norm": 0.6013993620872498, "learning_rate": 0.00016628729931633988, "loss": 1.6694, "step": 58345 }, { "epoch": 1.94, "grad_norm": 0.5911078453063965, "learning_rate": 0.00016627794289373113, "loss": 1.7389, "step": 58346 }, { "epoch": 1.94, "grad_norm": 0.6025649905204773, "learning_rate": 0.00016626858663344127, "loss": 1.7546, "step": 58347 }, { "epoch": 1.94, "grad_norm": 0.5897293090820312, "learning_rate": 0.00016625923053548156, "loss": 1.7296, "step": 58348 }, { "epoch": 1.94, "grad_norm": 0.6124734878540039, "learning_rate": 0.00016624987459986352, "loss": 1.655, "step": 58349 }, { "epoch": 1.94, "grad_norm": 0.5846604704856873, "learning_rate": 0.0001662405188265983, "loss": 1.7532, "step": 58350 }, { "epoch": 1.94, "grad_norm": 0.5961676836013794, "learning_rate": 0.00016623116321569752, "loss": 1.694, "step": 58351 }, { "epoch": 1.94, "grad_norm": 0.5914062261581421, "learning_rate": 0.00016622180776717238, "loss": 1.6931, "step": 58352 }, { "epoch": 1.94, "grad_norm": 0.6027711033821106, "learning_rate": 0.0001662124524810342, "loss": 1.7415, "step": 58353 }, { "epoch": 1.94, "grad_norm": 0.5998326539993286, "learning_rate": 0.00016620309735729442, "loss": 1.7044, "step": 58354 }, { "epoch": 1.94, "grad_norm": 0.5986221432685852, "learning_rate": 0.00016619374239596426, "loss": 1.7355, "step": 58355 }, { "epoch": 1.94, "grad_norm": 0.6052870750427246, "learning_rate": 0.00016618438759705536, "loss": 1.6974, "step": 58356 }, { "epoch": 1.94, "grad_norm": 0.5942937731742859, "learning_rate": 0.0001661750329605787, "loss": 1.7324, "step": 58357 }, { "epoch": 1.94, "grad_norm": 0.637951672077179, "learning_rate": 0.00016616567848654602, "loss": 1.7779, "step": 58358 }, { "epoch": 1.94, "grad_norm": 0.5899222493171692, "learning_rate": 0.00016615632417496846, "loss": 1.7001, "step": 58359 }, { "epoch": 1.94, "grad_norm": 0.6119886636734009, "learning_rate": 0.0001661469700258573, "loss": 1.7153, "step": 58360 }, { "epoch": 1.94, "grad_norm": 0.5845491886138916, "learning_rate": 0.0001661376160392241, "loss": 1.6905, "step": 58361 }, { "epoch": 1.94, "grad_norm": 0.5931934714317322, "learning_rate": 0.00016612826221508009, "loss": 1.7619, "step": 58362 }, { "epoch": 1.94, "grad_norm": 0.6035275459289551, "learning_rate": 0.00016611890855343654, "loss": 1.7329, "step": 58363 }, { "epoch": 1.94, "grad_norm": 0.6086992025375366, "learning_rate": 0.00016610955505430488, "loss": 1.7005, "step": 58364 }, { "epoch": 1.94, "grad_norm": 0.5950561165809631, "learning_rate": 0.00016610020171769666, "loss": 1.7, "step": 58365 }, { "epoch": 1.94, "grad_norm": 0.5843966603279114, "learning_rate": 0.00016609084854362304, "loss": 1.6929, "step": 58366 }, { "epoch": 1.94, "grad_norm": 0.6410692930221558, "learning_rate": 0.0001660814955320953, "loss": 1.727, "step": 58367 }, { "epoch": 1.94, "grad_norm": 0.5985875129699707, "learning_rate": 0.00016607214268312502, "loss": 1.7324, "step": 58368 }, { "epoch": 1.94, "grad_norm": 0.5742983222007751, "learning_rate": 0.00016606278999672336, "loss": 1.6908, "step": 58369 }, { "epoch": 1.94, "grad_norm": 0.5895459651947021, "learning_rate": 0.00016605343747290167, "loss": 1.7096, "step": 58370 }, { "epoch": 1.94, "grad_norm": 0.5945616960525513, "learning_rate": 0.00016604408511167136, "loss": 1.7305, "step": 58371 }, { "epoch": 1.94, "grad_norm": 0.5982688069343567, "learning_rate": 0.00016603473291304405, "loss": 1.6862, "step": 58372 }, { "epoch": 1.94, "grad_norm": 1.458150863647461, "learning_rate": 0.00016602538087703055, "loss": 1.7447, "step": 58373 }, { "epoch": 1.94, "grad_norm": 0.5920405387878418, "learning_rate": 0.00016601602900364254, "loss": 1.6859, "step": 58374 }, { "epoch": 1.94, "grad_norm": 0.6111009120941162, "learning_rate": 0.00016600667729289146, "loss": 1.7157, "step": 58375 }, { "epoch": 1.94, "grad_norm": 0.5996288657188416, "learning_rate": 0.00016599732574478851, "loss": 1.7367, "step": 58376 }, { "epoch": 1.94, "grad_norm": 0.5791721940040588, "learning_rate": 0.00016598797435934495, "loss": 1.7248, "step": 58377 }, { "epoch": 1.94, "grad_norm": 0.6162506937980652, "learning_rate": 0.0001659786231365722, "loss": 1.7252, "step": 58378 }, { "epoch": 1.94, "grad_norm": 0.6059128642082214, "learning_rate": 0.00016596927207648197, "loss": 1.741, "step": 58379 }, { "epoch": 1.94, "grad_norm": 0.6131691932678223, "learning_rate": 0.000165959921179085, "loss": 1.6991, "step": 58380 }, { "epoch": 1.94, "grad_norm": 0.6154579520225525, "learning_rate": 0.00016595057044439294, "loss": 1.7317, "step": 58381 }, { "epoch": 1.94, "grad_norm": 0.5926600098609924, "learning_rate": 0.00016594121987241727, "loss": 1.8006, "step": 58382 }, { "epoch": 1.94, "grad_norm": 0.5891928672790527, "learning_rate": 0.00016593186946316923, "loss": 1.6615, "step": 58383 }, { "epoch": 1.94, "grad_norm": 0.6067362427711487, "learning_rate": 0.00016592251921665997, "loss": 1.7516, "step": 58384 }, { "epoch": 1.94, "grad_norm": 0.6005886197090149, "learning_rate": 0.00016591316913290108, "loss": 1.767, "step": 58385 }, { "epoch": 1.94, "grad_norm": 0.6044664978981018, "learning_rate": 0.00016590381921190403, "loss": 1.7967, "step": 58386 }, { "epoch": 1.94, "grad_norm": 0.599040687084198, "learning_rate": 0.0001658944694536798, "loss": 1.716, "step": 58387 }, { "epoch": 1.94, "grad_norm": 0.597389280796051, "learning_rate": 0.00016588511985823987, "loss": 1.6784, "step": 58388 }, { "epoch": 1.94, "grad_norm": 0.5919012427330017, "learning_rate": 0.00016587577042559578, "loss": 1.6901, "step": 58389 }, { "epoch": 1.94, "grad_norm": 0.5840058326721191, "learning_rate": 0.00016586642115575873, "loss": 1.7361, "step": 58390 }, { "epoch": 1.94, "grad_norm": 0.5932948589324951, "learning_rate": 0.00016585707204874, "loss": 1.6577, "step": 58391 }, { "epoch": 1.94, "grad_norm": 0.5910874009132385, "learning_rate": 0.00016584772310455098, "loss": 1.7139, "step": 58392 }, { "epoch": 1.94, "grad_norm": 0.5851888656616211, "learning_rate": 0.00016583837432320331, "loss": 1.6643, "step": 58393 }, { "epoch": 1.94, "grad_norm": 0.6120976209640503, "learning_rate": 0.00016582902570470785, "loss": 1.6764, "step": 58394 }, { "epoch": 1.94, "grad_norm": 0.6057092547416687, "learning_rate": 0.00016581967724907618, "loss": 1.7712, "step": 58395 }, { "epoch": 1.94, "grad_norm": 0.6036198139190674, "learning_rate": 0.00016581032895631983, "loss": 1.7902, "step": 58396 }, { "epoch": 1.94, "grad_norm": 0.5968679785728455, "learning_rate": 0.0001658009808264499, "loss": 1.6787, "step": 58397 }, { "epoch": 1.94, "grad_norm": 0.6172785758972168, "learning_rate": 0.00016579163285947767, "loss": 1.779, "step": 58398 }, { "epoch": 1.94, "grad_norm": 0.6099433898925781, "learning_rate": 0.0001657822850554148, "loss": 1.6892, "step": 58399 }, { "epoch": 1.94, "grad_norm": 0.5834097266197205, "learning_rate": 0.00016577293741427244, "loss": 1.7414, "step": 58400 }, { "epoch": 1.94, "grad_norm": 0.5983850955963135, "learning_rate": 0.0001657635899360618, "loss": 1.7134, "step": 58401 }, { "epoch": 1.94, "grad_norm": 0.6280667781829834, "learning_rate": 0.00016575424262079444, "loss": 1.7697, "step": 58402 }, { "epoch": 1.94, "grad_norm": 0.6415380239486694, "learning_rate": 0.00016574489546848173, "loss": 1.7071, "step": 58403 }, { "epoch": 1.94, "grad_norm": 0.6105301976203918, "learning_rate": 0.00016573554847913496, "loss": 1.7118, "step": 58404 }, { "epoch": 1.94, "grad_norm": 0.6045137047767639, "learning_rate": 0.0001657262016527653, "loss": 1.7165, "step": 58405 }, { "epoch": 1.94, "grad_norm": 0.6101857423782349, "learning_rate": 0.00016571685498938438, "loss": 1.7657, "step": 58406 }, { "epoch": 1.94, "grad_norm": 0.592353880405426, "learning_rate": 0.00016570750848900344, "loss": 1.8283, "step": 58407 }, { "epoch": 1.94, "grad_norm": 0.6035131216049194, "learning_rate": 0.00016569816215163366, "loss": 1.6808, "step": 58408 }, { "epoch": 1.94, "grad_norm": 0.5901581645011902, "learning_rate": 0.00016568881597728665, "loss": 1.755, "step": 58409 }, { "epoch": 1.94, "grad_norm": 0.6000032424926758, "learning_rate": 0.00016567946996597348, "loss": 1.777, "step": 58410 }, { "epoch": 1.94, "grad_norm": 0.6082091331481934, "learning_rate": 0.00016567012411770578, "loss": 1.6828, "step": 58411 }, { "epoch": 1.94, "grad_norm": 0.6057386994361877, "learning_rate": 0.00016566077843249466, "loss": 1.7025, "step": 58412 }, { "epoch": 1.94, "grad_norm": 0.6072537899017334, "learning_rate": 0.00016565143291035164, "loss": 1.7135, "step": 58413 }, { "epoch": 1.94, "grad_norm": 0.6216581463813782, "learning_rate": 0.000165642087551288, "loss": 1.7925, "step": 58414 }, { "epoch": 1.94, "grad_norm": 0.5927900671958923, "learning_rate": 0.00016563274235531497, "loss": 1.7181, "step": 58415 }, { "epoch": 1.94, "grad_norm": 0.6106728315353394, "learning_rate": 0.0001656233973224441, "loss": 1.7425, "step": 58416 }, { "epoch": 1.94, "grad_norm": 0.6008104085922241, "learning_rate": 0.0001656140524526865, "loss": 1.7686, "step": 58417 }, { "epoch": 1.94, "grad_norm": 0.6033191680908203, "learning_rate": 0.00016560470774605373, "loss": 1.7174, "step": 58418 }, { "epoch": 1.94, "grad_norm": 0.5663428902626038, "learning_rate": 0.00016559536320255708, "loss": 1.6719, "step": 58419 }, { "epoch": 1.94, "grad_norm": 0.5803486704826355, "learning_rate": 0.00016558601882220775, "loss": 1.7238, "step": 58420 }, { "epoch": 1.94, "grad_norm": 0.5888548493385315, "learning_rate": 0.00016557667460501726, "loss": 1.7874, "step": 58421 }, { "epoch": 1.94, "grad_norm": 0.5952700972557068, "learning_rate": 0.0001655673305509968, "loss": 1.7737, "step": 58422 }, { "epoch": 1.94, "grad_norm": 0.6312333345413208, "learning_rate": 0.0001655579866601579, "loss": 1.7732, "step": 58423 }, { "epoch": 1.94, "grad_norm": 0.6148368716239929, "learning_rate": 0.00016554864293251168, "loss": 1.7176, "step": 58424 }, { "epoch": 1.94, "grad_norm": 0.5998666286468506, "learning_rate": 0.0001655392993680697, "loss": 1.7481, "step": 58425 }, { "epoch": 1.94, "grad_norm": 0.6011976003646851, "learning_rate": 0.0001655299559668432, "loss": 1.7598, "step": 58426 }, { "epoch": 1.94, "grad_norm": 0.6022152304649353, "learning_rate": 0.0001655206127288434, "loss": 1.744, "step": 58427 }, { "epoch": 1.94, "grad_norm": 0.599391758441925, "learning_rate": 0.0001655112696540819, "loss": 1.7019, "step": 58428 }, { "epoch": 1.94, "grad_norm": 0.592864453792572, "learning_rate": 0.0001655019267425699, "loss": 1.6312, "step": 58429 }, { "epoch": 1.94, "grad_norm": 0.5830758810043335, "learning_rate": 0.00016549258399431864, "loss": 1.731, "step": 58430 }, { "epoch": 1.94, "grad_norm": 0.5810918807983398, "learning_rate": 0.00016548324140933953, "loss": 1.7512, "step": 58431 }, { "epoch": 1.94, "grad_norm": 0.5793787240982056, "learning_rate": 0.00016547389898764409, "loss": 1.7319, "step": 58432 }, { "epoch": 1.94, "grad_norm": 0.600265622138977, "learning_rate": 0.00016546455672924348, "loss": 1.693, "step": 58433 }, { "epoch": 1.94, "grad_norm": 0.6117245554924011, "learning_rate": 0.00016545521463414898, "loss": 1.7018, "step": 58434 }, { "epoch": 1.94, "grad_norm": 0.6104939579963684, "learning_rate": 0.00016544587270237216, "loss": 1.7534, "step": 58435 }, { "epoch": 1.94, "grad_norm": 0.5923904180526733, "learning_rate": 0.00016543653093392422, "loss": 1.7036, "step": 58436 }, { "epoch": 1.94, "grad_norm": 0.5787227749824524, "learning_rate": 0.00016542718932881635, "loss": 1.7543, "step": 58437 }, { "epoch": 1.94, "grad_norm": 0.595404863357544, "learning_rate": 0.00016541784788706007, "loss": 1.7435, "step": 58438 }, { "epoch": 1.94, "grad_norm": 0.6145595908164978, "learning_rate": 0.00016540850660866694, "loss": 1.7154, "step": 58439 }, { "epoch": 1.94, "grad_norm": 0.5928691029548645, "learning_rate": 0.0001653991654936478, "loss": 1.6982, "step": 58440 }, { "epoch": 1.94, "grad_norm": 0.5938001275062561, "learning_rate": 0.00016538982454201423, "loss": 1.7627, "step": 58441 }, { "epoch": 1.94, "grad_norm": 0.5870346426963806, "learning_rate": 0.00016538048375377772, "loss": 1.707, "step": 58442 }, { "epoch": 1.94, "grad_norm": 0.5908302068710327, "learning_rate": 0.00016537114312894946, "loss": 1.6787, "step": 58443 }, { "epoch": 1.94, "grad_norm": 0.6376904845237732, "learning_rate": 0.00016536180266754066, "loss": 1.7204, "step": 58444 }, { "epoch": 1.94, "grad_norm": 0.5989342927932739, "learning_rate": 0.00016535246236956281, "loss": 1.7732, "step": 58445 }, { "epoch": 1.94, "grad_norm": 0.6349623203277588, "learning_rate": 0.00016534312223502734, "loss": 1.7159, "step": 58446 }, { "epoch": 1.94, "grad_norm": 0.6029857397079468, "learning_rate": 0.00016533378226394547, "loss": 1.7415, "step": 58447 }, { "epoch": 1.94, "grad_norm": 0.6040247082710266, "learning_rate": 0.00016532444245632847, "loss": 1.6663, "step": 58448 }, { "epoch": 1.94, "grad_norm": 0.6364268064498901, "learning_rate": 0.00016531510281218783, "loss": 1.6864, "step": 58449 }, { "epoch": 1.94, "grad_norm": 0.6167617440223694, "learning_rate": 0.00016530576333153484, "loss": 1.7501, "step": 58450 }, { "epoch": 1.94, "grad_norm": 0.6076183319091797, "learning_rate": 0.00016529642401438068, "loss": 1.6471, "step": 58451 }, { "epoch": 1.94, "grad_norm": 0.5784868597984314, "learning_rate": 0.00016528708486073678, "loss": 1.6665, "step": 58452 }, { "epoch": 1.94, "grad_norm": 2.2917532920837402, "learning_rate": 0.0001652777458706147, "loss": 1.6765, "step": 58453 }, { "epoch": 1.94, "grad_norm": 0.5875594019889832, "learning_rate": 0.0001652684070440255, "loss": 1.6976, "step": 58454 }, { "epoch": 1.94, "grad_norm": 0.5891395807266235, "learning_rate": 0.00016525906838098055, "loss": 1.7315, "step": 58455 }, { "epoch": 1.94, "grad_norm": 0.5898757576942444, "learning_rate": 0.00016524972988149134, "loss": 1.7063, "step": 58456 }, { "epoch": 1.94, "grad_norm": 0.5980557203292847, "learning_rate": 0.00016524039154556906, "loss": 1.7041, "step": 58457 }, { "epoch": 1.94, "grad_norm": 0.5777699947357178, "learning_rate": 0.000165231053373225, "loss": 1.7088, "step": 58458 }, { "epoch": 1.94, "grad_norm": 0.5894610285758972, "learning_rate": 0.00016522171536447074, "loss": 1.7716, "step": 58459 }, { "epoch": 1.94, "grad_norm": 0.615750789642334, "learning_rate": 0.00016521237751931732, "loss": 1.8186, "step": 58460 }, { "epoch": 1.95, "grad_norm": 0.6115266680717468, "learning_rate": 0.00016520303983777633, "loss": 1.7549, "step": 58461 }, { "epoch": 1.95, "grad_norm": 0.5780505537986755, "learning_rate": 0.00016519370231985887, "loss": 1.667, "step": 58462 }, { "epoch": 1.95, "grad_norm": 0.5835447907447815, "learning_rate": 0.0001651843649655765, "loss": 1.7121, "step": 58463 }, { "epoch": 1.95, "grad_norm": 0.5864070653915405, "learning_rate": 0.00016517502777494043, "loss": 1.6886, "step": 58464 }, { "epoch": 1.95, "grad_norm": 0.5954453945159912, "learning_rate": 0.00016516569074796192, "loss": 1.7355, "step": 58465 }, { "epoch": 1.95, "grad_norm": 0.6253460049629211, "learning_rate": 0.0001651563538846525, "loss": 1.8102, "step": 58466 }, { "epoch": 1.95, "grad_norm": 0.5895305275917053, "learning_rate": 0.0001651470171850233, "loss": 1.7155, "step": 58467 }, { "epoch": 1.95, "grad_norm": 0.5846362113952637, "learning_rate": 0.00016513768064908586, "loss": 1.7647, "step": 58468 }, { "epoch": 1.95, "grad_norm": 0.5859235525131226, "learning_rate": 0.0001651283442768514, "loss": 1.7832, "step": 58469 }, { "epoch": 1.95, "grad_norm": 0.6209906935691833, "learning_rate": 0.0001651190080683311, "loss": 1.6693, "step": 58470 }, { "epoch": 1.95, "grad_norm": 0.6106436252593994, "learning_rate": 0.0001651096720235366, "loss": 1.7563, "step": 58471 }, { "epoch": 1.95, "grad_norm": 0.5924012064933777, "learning_rate": 0.00016510033614247898, "loss": 1.7088, "step": 58472 }, { "epoch": 1.95, "grad_norm": 0.616266131401062, "learning_rate": 0.0001650910004251698, "loss": 1.6576, "step": 58473 }, { "epoch": 1.95, "grad_norm": 0.6117256283760071, "learning_rate": 0.00016508166487162016, "loss": 1.7374, "step": 58474 }, { "epoch": 1.95, "grad_norm": 0.5840488076210022, "learning_rate": 0.00016507232948184154, "loss": 1.75, "step": 58475 }, { "epoch": 1.95, "grad_norm": 0.574043333530426, "learning_rate": 0.00016506299425584532, "loss": 1.7503, "step": 58476 }, { "epoch": 1.95, "grad_norm": 0.5747308135032654, "learning_rate": 0.0001650536591936426, "loss": 1.7367, "step": 58477 }, { "epoch": 1.95, "grad_norm": 0.5778113603591919, "learning_rate": 0.00016504432429524494, "loss": 1.6818, "step": 58478 }, { "epoch": 1.95, "grad_norm": 0.6082818508148193, "learning_rate": 0.00016503498956066363, "loss": 1.7174, "step": 58479 }, { "epoch": 1.95, "grad_norm": 0.585757851600647, "learning_rate": 0.00016502565498990982, "loss": 1.7197, "step": 58480 }, { "epoch": 1.95, "grad_norm": 0.5866622924804688, "learning_rate": 0.00016501632058299492, "loss": 1.7359, "step": 58481 }, { "epoch": 1.95, "grad_norm": 0.581890344619751, "learning_rate": 0.0001650069863399305, "loss": 1.7223, "step": 58482 }, { "epoch": 1.95, "grad_norm": 0.5774770379066467, "learning_rate": 0.00016499765226072772, "loss": 1.7338, "step": 58483 }, { "epoch": 1.95, "grad_norm": 0.6090145707130432, "learning_rate": 0.00016498831834539776, "loss": 1.6911, "step": 58484 }, { "epoch": 1.95, "grad_norm": 0.5798969268798828, "learning_rate": 0.0001649789845939522, "loss": 1.7289, "step": 58485 }, { "epoch": 1.95, "grad_norm": 0.5876364707946777, "learning_rate": 0.00016496965100640224, "loss": 1.6437, "step": 58486 }, { "epoch": 1.95, "grad_norm": 0.6560931205749512, "learning_rate": 0.00016496031758275916, "loss": 1.734, "step": 58487 }, { "epoch": 1.95, "grad_norm": 0.5955608487129211, "learning_rate": 0.00016495098432303432, "loss": 1.6185, "step": 58488 }, { "epoch": 1.95, "grad_norm": 0.5848280787467957, "learning_rate": 0.00016494165122723934, "loss": 1.7216, "step": 58489 }, { "epoch": 1.95, "grad_norm": 0.5945940613746643, "learning_rate": 0.00016493231829538504, "loss": 1.7417, "step": 58490 }, { "epoch": 1.95, "grad_norm": 0.5846675038337708, "learning_rate": 0.00016492298552748302, "loss": 1.6659, "step": 58491 }, { "epoch": 1.95, "grad_norm": 0.6373893022537231, "learning_rate": 0.0001649136529235447, "loss": 1.7144, "step": 58492 }, { "epoch": 1.95, "grad_norm": 0.6321582198143005, "learning_rate": 0.0001649043204835813, "loss": 1.6811, "step": 58493 }, { "epoch": 1.95, "grad_norm": 0.5908052325248718, "learning_rate": 0.00016489498820760405, "loss": 1.7217, "step": 58494 }, { "epoch": 1.95, "grad_norm": 0.6042709350585938, "learning_rate": 0.00016488565609562434, "loss": 1.6637, "step": 58495 }, { "epoch": 1.95, "grad_norm": 0.600865364074707, "learning_rate": 0.00016487632414765384, "loss": 1.7826, "step": 58496 }, { "epoch": 1.95, "grad_norm": 0.6146101355552673, "learning_rate": 0.00016486699236370326, "loss": 1.6956, "step": 58497 }, { "epoch": 1.95, "grad_norm": 0.6123342514038086, "learning_rate": 0.00016485766074378428, "loss": 1.6728, "step": 58498 }, { "epoch": 1.95, "grad_norm": 0.6312100291252136, "learning_rate": 0.00016484832928790832, "loss": 1.6614, "step": 58499 }, { "epoch": 1.95, "grad_norm": 0.5815719962120056, "learning_rate": 0.00016483899799608654, "loss": 1.7165, "step": 58500 }, { "epoch": 1.95, "grad_norm": 0.5866883397102356, "learning_rate": 0.00016482966686833021, "loss": 1.7021, "step": 58501 }, { "epoch": 1.95, "grad_norm": 0.6432433724403381, "learning_rate": 0.0001648203359046507, "loss": 1.7379, "step": 58502 }, { "epoch": 1.95, "grad_norm": 0.612682044506073, "learning_rate": 0.00016481100510505966, "loss": 1.6656, "step": 58503 }, { "epoch": 1.95, "grad_norm": 0.6228473782539368, "learning_rate": 0.00016480167446956796, "loss": 1.7906, "step": 58504 }, { "epoch": 1.95, "grad_norm": 0.6204705834388733, "learning_rate": 0.00016479234399818704, "loss": 1.8162, "step": 58505 }, { "epoch": 1.95, "grad_norm": 0.6169923543930054, "learning_rate": 0.0001647830136909284, "loss": 1.7318, "step": 58506 }, { "epoch": 1.95, "grad_norm": 0.5996835231781006, "learning_rate": 0.00016477368354780327, "loss": 1.6565, "step": 58507 }, { "epoch": 1.95, "grad_norm": 0.5945860743522644, "learning_rate": 0.00016476435356882287, "loss": 1.7469, "step": 58508 }, { "epoch": 1.95, "grad_norm": 0.5947964191436768, "learning_rate": 0.0001647550237539986, "loss": 1.7309, "step": 58509 }, { "epoch": 1.95, "grad_norm": 0.6025248169898987, "learning_rate": 0.00016474569410334202, "loss": 1.7307, "step": 58510 }, { "epoch": 1.95, "grad_norm": 0.6111776828765869, "learning_rate": 0.00016473636461686403, "loss": 1.7376, "step": 58511 }, { "epoch": 1.95, "grad_norm": 0.6293194890022278, "learning_rate": 0.00016472703529457616, "loss": 1.6937, "step": 58512 }, { "epoch": 1.95, "grad_norm": 0.5917171835899353, "learning_rate": 0.00016471770613648985, "loss": 1.7749, "step": 58513 }, { "epoch": 1.95, "grad_norm": 0.6034731864929199, "learning_rate": 0.00016470837714261633, "loss": 1.7051, "step": 58514 }, { "epoch": 1.95, "grad_norm": 0.592896580696106, "learning_rate": 0.00016469904831296678, "loss": 1.7321, "step": 58515 }, { "epoch": 1.95, "grad_norm": 0.610305666923523, "learning_rate": 0.00016468971964755274, "loss": 1.7537, "step": 58516 }, { "epoch": 1.95, "grad_norm": 0.6134997606277466, "learning_rate": 0.00016468039114638544, "loss": 1.7809, "step": 58517 }, { "epoch": 1.95, "grad_norm": 0.618141770362854, "learning_rate": 0.00016467106280947612, "loss": 1.7772, "step": 58518 }, { "epoch": 1.95, "grad_norm": 0.6178396940231323, "learning_rate": 0.0001646617346368362, "loss": 1.7199, "step": 58519 }, { "epoch": 1.95, "grad_norm": 0.6074080467224121, "learning_rate": 0.0001646524066284771, "loss": 1.7714, "step": 58520 }, { "epoch": 1.95, "grad_norm": 0.5924749970436096, "learning_rate": 0.00016464307878441003, "loss": 1.6985, "step": 58521 }, { "epoch": 1.95, "grad_norm": 0.6445873975753784, "learning_rate": 0.0001646337511046462, "loss": 1.6977, "step": 58522 }, { "epoch": 1.95, "grad_norm": 0.6027913093566895, "learning_rate": 0.00016462442358919718, "loss": 1.7273, "step": 58523 }, { "epoch": 1.95, "grad_norm": 0.5997010469436646, "learning_rate": 0.0001646150962380741, "loss": 1.7353, "step": 58524 }, { "epoch": 1.95, "grad_norm": 0.6124644875526428, "learning_rate": 0.00016460576905128832, "loss": 1.7242, "step": 58525 }, { "epoch": 1.95, "grad_norm": 0.6163075566291809, "learning_rate": 0.00016459644202885127, "loss": 1.7447, "step": 58526 }, { "epoch": 1.95, "grad_norm": 0.5844713449478149, "learning_rate": 0.00016458711517077404, "loss": 1.6509, "step": 58527 }, { "epoch": 1.95, "grad_norm": 0.5839309692382812, "learning_rate": 0.00016457778847706826, "loss": 1.7348, "step": 58528 }, { "epoch": 1.95, "grad_norm": 0.6048457622528076, "learning_rate": 0.00016456846194774495, "loss": 1.7309, "step": 58529 }, { "epoch": 1.95, "grad_norm": 0.6166725158691406, "learning_rate": 0.0001645591355828157, "loss": 1.7308, "step": 58530 }, { "epoch": 1.95, "grad_norm": 0.600312352180481, "learning_rate": 0.00016454980938229168, "loss": 1.6995, "step": 58531 }, { "epoch": 1.95, "grad_norm": 0.6135219931602478, "learning_rate": 0.00016454048334618412, "loss": 1.756, "step": 58532 }, { "epoch": 1.95, "grad_norm": 0.5897256135940552, "learning_rate": 0.00016453115747450461, "loss": 1.8299, "step": 58533 }, { "epoch": 1.95, "grad_norm": 0.5732020735740662, "learning_rate": 0.0001645218317672642, "loss": 1.6881, "step": 58534 }, { "epoch": 1.95, "grad_norm": 0.6184999346733093, "learning_rate": 0.0001645125062244744, "loss": 1.7405, "step": 58535 }, { "epoch": 1.95, "grad_norm": 0.6043917536735535, "learning_rate": 0.00016450318084614647, "loss": 1.6931, "step": 58536 }, { "epoch": 1.95, "grad_norm": 0.5838714241981506, "learning_rate": 0.0001644938556322916, "loss": 1.6838, "step": 58537 }, { "epoch": 1.95, "grad_norm": 0.6145036220550537, "learning_rate": 0.00016448453058292135, "loss": 1.7487, "step": 58538 }, { "epoch": 1.95, "grad_norm": 0.6004245281219482, "learning_rate": 0.00016447520569804676, "loss": 1.7082, "step": 58539 }, { "epoch": 1.95, "grad_norm": 0.6489274501800537, "learning_rate": 0.00016446588097767943, "loss": 1.8283, "step": 58540 }, { "epoch": 1.95, "grad_norm": 0.6315536499023438, "learning_rate": 0.00016445655642183043, "loss": 1.6794, "step": 58541 }, { "epoch": 1.95, "grad_norm": 0.6032244563102722, "learning_rate": 0.00016444723203051133, "loss": 1.7706, "step": 58542 }, { "epoch": 1.95, "grad_norm": 0.6382471323013306, "learning_rate": 0.00016443790780373336, "loss": 1.7316, "step": 58543 }, { "epoch": 1.95, "grad_norm": 0.5924931764602661, "learning_rate": 0.00016442858374150762, "loss": 1.744, "step": 58544 }, { "epoch": 1.95, "grad_norm": 0.589400589466095, "learning_rate": 0.00016441925984384573, "loss": 1.7127, "step": 58545 }, { "epoch": 1.95, "grad_norm": 0.6397104263305664, "learning_rate": 0.0001644099361107589, "loss": 1.7664, "step": 58546 }, { "epoch": 1.95, "grad_norm": 0.6004810333251953, "learning_rate": 0.0001644006125422583, "loss": 1.7016, "step": 58547 }, { "epoch": 1.95, "grad_norm": 0.6342341899871826, "learning_rate": 0.00016439128913835535, "loss": 1.714, "step": 58548 }, { "epoch": 1.95, "grad_norm": 0.5957645177841187, "learning_rate": 0.00016438196589906154, "loss": 1.7681, "step": 58549 }, { "epoch": 1.95, "grad_norm": 0.5915950536727905, "learning_rate": 0.00016437264282438807, "loss": 1.7165, "step": 58550 }, { "epoch": 1.95, "grad_norm": 0.6008519530296326, "learning_rate": 0.00016436331991434607, "loss": 1.7436, "step": 58551 }, { "epoch": 1.95, "grad_norm": 0.5791670680046082, "learning_rate": 0.00016435399716894716, "loss": 1.7431, "step": 58552 }, { "epoch": 1.95, "grad_norm": 0.6217405796051025, "learning_rate": 0.00016434467458820246, "loss": 1.764, "step": 58553 }, { "epoch": 1.95, "grad_norm": 0.6273528933525085, "learning_rate": 0.00016433535217212327, "loss": 1.7657, "step": 58554 }, { "epoch": 1.95, "grad_norm": 0.5999072194099426, "learning_rate": 0.00016432602992072094, "loss": 1.7663, "step": 58555 }, { "epoch": 1.95, "grad_norm": 0.6131035685539246, "learning_rate": 0.000164316707834007, "loss": 1.685, "step": 58556 }, { "epoch": 1.95, "grad_norm": 0.600776195526123, "learning_rate": 0.0001643073859119925, "loss": 1.6525, "step": 58557 }, { "epoch": 1.95, "grad_norm": 0.6093694567680359, "learning_rate": 0.00016429806415468875, "loss": 1.7588, "step": 58558 }, { "epoch": 1.95, "grad_norm": 0.5967537760734558, "learning_rate": 0.0001642887425621073, "loss": 1.6938, "step": 58559 }, { "epoch": 1.95, "grad_norm": 0.6047354340553284, "learning_rate": 0.00016427942113425933, "loss": 1.7025, "step": 58560 }, { "epoch": 1.95, "grad_norm": 0.5854408740997314, "learning_rate": 0.000164270099871156, "loss": 1.7542, "step": 58561 }, { "epoch": 1.95, "grad_norm": 0.6167820692062378, "learning_rate": 0.00016426077877280875, "loss": 1.7517, "step": 58562 }, { "epoch": 1.95, "grad_norm": 0.6345483064651489, "learning_rate": 0.00016425145783922906, "loss": 1.6659, "step": 58563 }, { "epoch": 1.95, "grad_norm": 0.6034674644470215, "learning_rate": 0.0001642421370704281, "loss": 1.6911, "step": 58564 }, { "epoch": 1.95, "grad_norm": 0.6030392646789551, "learning_rate": 0.00016423281646641704, "loss": 1.775, "step": 58565 }, { "epoch": 1.95, "grad_norm": 0.6227540373802185, "learning_rate": 0.00016422349602720747, "loss": 1.7638, "step": 58566 }, { "epoch": 1.95, "grad_norm": 0.6036365628242493, "learning_rate": 0.0001642141757528106, "loss": 1.6768, "step": 58567 }, { "epoch": 1.95, "grad_norm": 0.6204359531402588, "learning_rate": 0.00016420485564323754, "loss": 1.7517, "step": 58568 }, { "epoch": 1.95, "grad_norm": 0.6091482043266296, "learning_rate": 0.0001641955356984998, "loss": 1.7988, "step": 58569 }, { "epoch": 1.95, "grad_norm": 0.6024636030197144, "learning_rate": 0.0001641862159186088, "loss": 1.6844, "step": 58570 }, { "epoch": 1.95, "grad_norm": 0.6042703986167908, "learning_rate": 0.00016417689630357572, "loss": 1.7511, "step": 58571 }, { "epoch": 1.95, "grad_norm": 0.614621102809906, "learning_rate": 0.00016416757685341175, "loss": 1.7104, "step": 58572 }, { "epoch": 1.95, "grad_norm": 0.5917660593986511, "learning_rate": 0.00016415825756812844, "loss": 1.6874, "step": 58573 }, { "epoch": 1.95, "grad_norm": 0.6090906858444214, "learning_rate": 0.000164148938447737, "loss": 1.7452, "step": 58574 }, { "epoch": 1.95, "grad_norm": 0.5938366055488586, "learning_rate": 0.00016413961949224862, "loss": 1.7688, "step": 58575 }, { "epoch": 1.95, "grad_norm": 0.5895509123802185, "learning_rate": 0.0001641303007016748, "loss": 1.6821, "step": 58576 }, { "epoch": 1.95, "grad_norm": 0.5968580842018127, "learning_rate": 0.00016412098207602672, "loss": 1.6779, "step": 58577 }, { "epoch": 1.95, "grad_norm": 0.5942137241363525, "learning_rate": 0.00016411166361531584, "loss": 1.6797, "step": 58578 }, { "epoch": 1.95, "grad_norm": 0.6089525818824768, "learning_rate": 0.00016410234531955326, "loss": 1.7, "step": 58579 }, { "epoch": 1.95, "grad_norm": 0.5947006940841675, "learning_rate": 0.0001640930271887505, "loss": 1.7043, "step": 58580 }, { "epoch": 1.95, "grad_norm": 0.6274215579032898, "learning_rate": 0.00016408370922291882, "loss": 1.6392, "step": 58581 }, { "epoch": 1.95, "grad_norm": 0.5968395471572876, "learning_rate": 0.00016407439142206935, "loss": 1.7095, "step": 58582 }, { "epoch": 1.95, "grad_norm": 0.6126542687416077, "learning_rate": 0.00016406507378621368, "loss": 1.7347, "step": 58583 }, { "epoch": 1.95, "grad_norm": 0.596580445766449, "learning_rate": 0.00016405575631536285, "loss": 1.6535, "step": 58584 }, { "epoch": 1.95, "grad_norm": 0.589138925075531, "learning_rate": 0.0001640464390095284, "loss": 1.7301, "step": 58585 }, { "epoch": 1.95, "grad_norm": 0.6159675717353821, "learning_rate": 0.00016403712186872156, "loss": 1.7244, "step": 58586 }, { "epoch": 1.95, "grad_norm": 0.606218159198761, "learning_rate": 0.0001640278048929535, "loss": 1.7418, "step": 58587 }, { "epoch": 1.95, "grad_norm": 0.6194138526916504, "learning_rate": 0.00016401848808223577, "loss": 1.6691, "step": 58588 }, { "epoch": 1.95, "grad_norm": 0.6000748872756958, "learning_rate": 0.00016400917143657943, "loss": 1.7101, "step": 58589 }, { "epoch": 1.95, "grad_norm": 0.5905672907829285, "learning_rate": 0.00016399985495599607, "loss": 1.683, "step": 58590 }, { "epoch": 1.95, "grad_norm": 0.5695036053657532, "learning_rate": 0.00016399053864049671, "loss": 1.6443, "step": 58591 }, { "epoch": 1.95, "grad_norm": 0.6100351214408875, "learning_rate": 0.0001639812224900929, "loss": 1.6909, "step": 58592 }, { "epoch": 1.95, "grad_norm": 0.6194343566894531, "learning_rate": 0.00016397190650479588, "loss": 1.7587, "step": 58593 }, { "epoch": 1.95, "grad_norm": 0.6004598140716553, "learning_rate": 0.00016396259068461675, "loss": 1.7596, "step": 58594 }, { "epoch": 1.95, "grad_norm": 0.6079794764518738, "learning_rate": 0.00016395327502956712, "loss": 1.6734, "step": 58595 }, { "epoch": 1.95, "grad_norm": 0.5897533893585205, "learning_rate": 0.0001639439595396582, "loss": 1.799, "step": 58596 }, { "epoch": 1.95, "grad_norm": 0.5978493690490723, "learning_rate": 0.00016393464421490117, "loss": 1.757, "step": 58597 }, { "epoch": 1.95, "grad_norm": 0.5991159081459045, "learning_rate": 0.00016392532905530738, "loss": 1.6721, "step": 58598 }, { "epoch": 1.95, "grad_norm": 0.609765887260437, "learning_rate": 0.00016391601406088834, "loss": 1.7997, "step": 58599 }, { "epoch": 1.95, "grad_norm": 0.6287640929222107, "learning_rate": 0.0001639066992316552, "loss": 1.7721, "step": 58600 }, { "epoch": 1.95, "grad_norm": 0.6023027300834656, "learning_rate": 0.00016389738456761913, "loss": 1.7082, "step": 58601 }, { "epoch": 1.95, "grad_norm": 0.5972956418991089, "learning_rate": 0.00016388807006879175, "loss": 1.6677, "step": 58602 }, { "epoch": 1.95, "grad_norm": 0.6039788126945496, "learning_rate": 0.00016387875573518415, "loss": 1.7556, "step": 58603 }, { "epoch": 1.95, "grad_norm": 0.596771240234375, "learning_rate": 0.00016386944156680756, "loss": 1.6883, "step": 58604 }, { "epoch": 1.95, "grad_norm": 0.5841633081436157, "learning_rate": 0.00016386012756367348, "loss": 1.68, "step": 58605 }, { "epoch": 1.95, "grad_norm": 0.6161471009254456, "learning_rate": 0.00016385081372579334, "loss": 1.6608, "step": 58606 }, { "epoch": 1.95, "grad_norm": 0.6078146696090698, "learning_rate": 0.000163841500053178, "loss": 1.7997, "step": 58607 }, { "epoch": 1.95, "grad_norm": 0.590787410736084, "learning_rate": 0.000163832186545839, "loss": 1.7355, "step": 58608 }, { "epoch": 1.95, "grad_norm": 0.5941102504730225, "learning_rate": 0.00016382287320378783, "loss": 1.8336, "step": 58609 }, { "epoch": 1.95, "grad_norm": 0.6087741851806641, "learning_rate": 0.00016381356002703564, "loss": 1.7321, "step": 58610 }, { "epoch": 1.95, "grad_norm": 0.6013451218605042, "learning_rate": 0.00016380424701559353, "loss": 1.6964, "step": 58611 }, { "epoch": 1.95, "grad_norm": 0.5774562954902649, "learning_rate": 0.00016379493416947307, "loss": 1.7021, "step": 58612 }, { "epoch": 1.95, "grad_norm": 0.5980967879295349, "learning_rate": 0.0001637856214886857, "loss": 1.7466, "step": 58613 }, { "epoch": 1.95, "grad_norm": 0.6014178991317749, "learning_rate": 0.00016377630897324223, "loss": 1.7557, "step": 58614 }, { "epoch": 1.95, "grad_norm": 0.5931845307350159, "learning_rate": 0.00016376699662315431, "loss": 1.7689, "step": 58615 }, { "epoch": 1.95, "grad_norm": 0.5828981399536133, "learning_rate": 0.0001637576844384333, "loss": 1.7893, "step": 58616 }, { "epoch": 1.95, "grad_norm": 0.6135703921318054, "learning_rate": 0.00016374837241909035, "loss": 1.7537, "step": 58617 }, { "epoch": 1.95, "grad_norm": 0.6033428907394409, "learning_rate": 0.0001637390605651367, "loss": 1.791, "step": 58618 }, { "epoch": 1.95, "grad_norm": 0.5849841833114624, "learning_rate": 0.00016372974887658378, "loss": 1.6878, "step": 58619 }, { "epoch": 1.95, "grad_norm": 0.5757637023925781, "learning_rate": 0.00016372043735344308, "loss": 1.7257, "step": 58620 }, { "epoch": 1.95, "grad_norm": 0.5959864258766174, "learning_rate": 0.00016371112599572547, "loss": 1.769, "step": 58621 }, { "epoch": 1.95, "grad_norm": 0.605188250541687, "learning_rate": 0.00016370181480344242, "loss": 1.7374, "step": 58622 }, { "epoch": 1.95, "grad_norm": 0.6192870140075684, "learning_rate": 0.00016369250377660545, "loss": 1.6904, "step": 58623 }, { "epoch": 1.95, "grad_norm": 0.5994483828544617, "learning_rate": 0.0001636831929152257, "loss": 1.7254, "step": 58624 }, { "epoch": 1.95, "grad_norm": 0.5860302448272705, "learning_rate": 0.0001636738822193143, "loss": 1.7625, "step": 58625 }, { "epoch": 1.95, "grad_norm": 0.6025925278663635, "learning_rate": 0.00016366457168888278, "loss": 1.7075, "step": 58626 }, { "epoch": 1.95, "grad_norm": 0.5928526520729065, "learning_rate": 0.0001636552613239426, "loss": 1.6086, "step": 58627 }, { "epoch": 1.95, "grad_norm": 0.6036989688873291, "learning_rate": 0.0001636459511245046, "loss": 1.7723, "step": 58628 }, { "epoch": 1.95, "grad_norm": 0.602512001991272, "learning_rate": 0.00016363664109058032, "loss": 1.8036, "step": 58629 }, { "epoch": 1.95, "grad_norm": 0.5932621359825134, "learning_rate": 0.00016362733122218118, "loss": 1.6841, "step": 58630 }, { "epoch": 1.95, "grad_norm": 0.6080980896949768, "learning_rate": 0.0001636180215193184, "loss": 1.7353, "step": 58631 }, { "epoch": 1.95, "grad_norm": 0.5958166718482971, "learning_rate": 0.0001636087119820031, "loss": 1.7298, "step": 58632 }, { "epoch": 1.95, "grad_norm": 0.5925736427307129, "learning_rate": 0.00016359940261024682, "loss": 1.7406, "step": 58633 }, { "epoch": 1.95, "grad_norm": 0.5878015160560608, "learning_rate": 0.0001635900934040608, "loss": 1.662, "step": 58634 }, { "epoch": 1.95, "grad_norm": 0.595611035823822, "learning_rate": 0.00016358078436345621, "loss": 1.724, "step": 58635 }, { "epoch": 1.95, "grad_norm": 0.6044238805770874, "learning_rate": 0.00016357147548844438, "loss": 1.8144, "step": 58636 }, { "epoch": 1.95, "grad_norm": 0.5938586592674255, "learning_rate": 0.00016356216677903686, "loss": 1.7132, "step": 58637 }, { "epoch": 1.95, "grad_norm": 0.5878064632415771, "learning_rate": 0.00016355285823524475, "loss": 1.7905, "step": 58638 }, { "epoch": 1.95, "grad_norm": 0.6031472086906433, "learning_rate": 0.00016354354985707924, "loss": 1.683, "step": 58639 }, { "epoch": 1.95, "grad_norm": 0.6032316088676453, "learning_rate": 0.0001635342416445519, "loss": 1.7251, "step": 58640 }, { "epoch": 1.95, "grad_norm": 0.6088300347328186, "learning_rate": 0.00016352493359767382, "loss": 1.6891, "step": 58641 }, { "epoch": 1.95, "grad_norm": 0.6037794947624207, "learning_rate": 0.00016351562571645628, "loss": 1.6767, "step": 58642 }, { "epoch": 1.95, "grad_norm": 0.5888869762420654, "learning_rate": 0.00016350631800091082, "loss": 1.7169, "step": 58643 }, { "epoch": 1.95, "grad_norm": 0.6306205987930298, "learning_rate": 0.00016349701045104836, "loss": 1.694, "step": 58644 }, { "epoch": 1.95, "grad_norm": 0.5821361541748047, "learning_rate": 0.00016348770306688062, "loss": 1.6417, "step": 58645 }, { "epoch": 1.95, "grad_norm": 0.5878260135650635, "learning_rate": 0.00016347839584841852, "loss": 1.7174, "step": 58646 }, { "epoch": 1.95, "grad_norm": 0.5761451125144958, "learning_rate": 0.0001634690887956737, "loss": 1.7199, "step": 58647 }, { "epoch": 1.95, "grad_norm": 0.5852448344230652, "learning_rate": 0.00016345978190865728, "loss": 1.7429, "step": 58648 }, { "epoch": 1.95, "grad_norm": 0.596809983253479, "learning_rate": 0.00016345047518738045, "loss": 1.7353, "step": 58649 }, { "epoch": 1.95, "grad_norm": 0.6325275301933289, "learning_rate": 0.00016344116863185472, "loss": 1.7701, "step": 58650 }, { "epoch": 1.95, "grad_norm": 0.5961843729019165, "learning_rate": 0.00016343186224209116, "loss": 1.7405, "step": 58651 }, { "epoch": 1.95, "grad_norm": 0.60906583070755, "learning_rate": 0.00016342255601810133, "loss": 1.6611, "step": 58652 }, { "epoch": 1.95, "grad_norm": 0.6064466834068298, "learning_rate": 0.00016341324995989645, "loss": 1.675, "step": 58653 }, { "epoch": 1.95, "grad_norm": 0.5903513431549072, "learning_rate": 0.00016340394406748758, "loss": 1.7121, "step": 58654 }, { "epoch": 1.95, "grad_norm": 0.6060833930969238, "learning_rate": 0.0001633946383408863, "loss": 1.7067, "step": 58655 }, { "epoch": 1.95, "grad_norm": 0.6335659027099609, "learning_rate": 0.00016338533278010376, "loss": 1.6995, "step": 58656 }, { "epoch": 1.95, "grad_norm": 0.6259622573852539, "learning_rate": 0.00016337602738515132, "loss": 1.7628, "step": 58657 }, { "epoch": 1.95, "grad_norm": 0.6132369637489319, "learning_rate": 0.00016336672215604017, "loss": 1.7469, "step": 58658 }, { "epoch": 1.95, "grad_norm": 0.5903415083885193, "learning_rate": 0.00016335741709278185, "loss": 1.7273, "step": 58659 }, { "epoch": 1.95, "grad_norm": 0.6090483069419861, "learning_rate": 0.00016334811219538746, "loss": 1.6857, "step": 58660 }, { "epoch": 1.95, "grad_norm": 0.6077914834022522, "learning_rate": 0.00016333880746386822, "loss": 1.716, "step": 58661 }, { "epoch": 1.95, "grad_norm": 0.6069769859313965, "learning_rate": 0.0001633295028982356, "loss": 1.8283, "step": 58662 }, { "epoch": 1.95, "grad_norm": 0.6222810745239258, "learning_rate": 0.00016332019849850084, "loss": 1.746, "step": 58663 }, { "epoch": 1.95, "grad_norm": 0.599109411239624, "learning_rate": 0.00016331089426467518, "loss": 1.7231, "step": 58664 }, { "epoch": 1.95, "grad_norm": 0.6248614192008972, "learning_rate": 0.00016330159019676985, "loss": 1.6732, "step": 58665 }, { "epoch": 1.95, "grad_norm": 0.5866126418113708, "learning_rate": 0.0001632922862947964, "loss": 1.6134, "step": 58666 }, { "epoch": 1.95, "grad_norm": 0.6053285002708435, "learning_rate": 0.000163282982558766, "loss": 1.7572, "step": 58667 }, { "epoch": 1.95, "grad_norm": 0.604083776473999, "learning_rate": 0.00016327367898868978, "loss": 1.7655, "step": 58668 }, { "epoch": 1.95, "grad_norm": 0.5859062671661377, "learning_rate": 0.0001632643755845793, "loss": 1.7139, "step": 58669 }, { "epoch": 1.95, "grad_norm": 0.6096214056015015, "learning_rate": 0.00016325507234644574, "loss": 1.7175, "step": 58670 }, { "epoch": 1.95, "grad_norm": 0.5852081179618835, "learning_rate": 0.0001632457692743002, "loss": 1.7962, "step": 58671 }, { "epoch": 1.95, "grad_norm": 0.5887547731399536, "learning_rate": 0.00016323646636815417, "loss": 1.777, "step": 58672 }, { "epoch": 1.95, "grad_norm": 0.5986962914466858, "learning_rate": 0.00016322716362801904, "loss": 1.7151, "step": 58673 }, { "epoch": 1.95, "grad_norm": 0.6028904318809509, "learning_rate": 0.00016321786105390594, "loss": 1.8105, "step": 58674 }, { "epoch": 1.95, "grad_norm": 0.6292505264282227, "learning_rate": 0.00016320855864582616, "loss": 1.7941, "step": 58675 }, { "epoch": 1.95, "grad_norm": 0.5899873375892639, "learning_rate": 0.0001631992564037911, "loss": 1.7237, "step": 58676 }, { "epoch": 1.95, "grad_norm": 0.5982253551483154, "learning_rate": 0.000163189954327812, "loss": 1.703, "step": 58677 }, { "epoch": 1.95, "grad_norm": 0.5919378995895386, "learning_rate": 0.00016318065241790002, "loss": 1.7062, "step": 58678 }, { "epoch": 1.95, "grad_norm": 0.604966402053833, "learning_rate": 0.00016317135067406657, "loss": 1.7768, "step": 58679 }, { "epoch": 1.95, "grad_norm": 0.6312280893325806, "learning_rate": 0.00016316204909632304, "loss": 1.758, "step": 58680 }, { "epoch": 1.95, "grad_norm": 0.6128849387168884, "learning_rate": 0.00016315274768468065, "loss": 1.7666, "step": 58681 }, { "epoch": 1.95, "grad_norm": 0.5964341759681702, "learning_rate": 0.00016314344643915055, "loss": 1.7604, "step": 58682 }, { "epoch": 1.95, "grad_norm": 0.6153475642204285, "learning_rate": 0.0001631341453597442, "loss": 1.7269, "step": 58683 }, { "epoch": 1.95, "grad_norm": 0.6098747849464417, "learning_rate": 0.00016312484444647288, "loss": 1.7562, "step": 58684 }, { "epoch": 1.95, "grad_norm": 0.5964288115501404, "learning_rate": 0.00016311554369934772, "loss": 1.7206, "step": 58685 }, { "epoch": 1.95, "grad_norm": 0.6018314957618713, "learning_rate": 0.00016310624311838008, "loss": 1.7527, "step": 58686 }, { "epoch": 1.95, "grad_norm": 0.5873985886573792, "learning_rate": 0.00016309694270358145, "loss": 1.6817, "step": 58687 }, { "epoch": 1.95, "grad_norm": 0.5929102897644043, "learning_rate": 0.00016308764245496295, "loss": 1.769, "step": 58688 }, { "epoch": 1.95, "grad_norm": 0.6073485612869263, "learning_rate": 0.00016307834237253572, "loss": 1.7585, "step": 58689 }, { "epoch": 1.95, "grad_norm": 0.6174802184104919, "learning_rate": 0.0001630690424563114, "loss": 1.7521, "step": 58690 }, { "epoch": 1.95, "grad_norm": 0.600056529045105, "learning_rate": 0.00016305974270630104, "loss": 1.7528, "step": 58691 }, { "epoch": 1.95, "grad_norm": 0.571298360824585, "learning_rate": 0.00016305044312251582, "loss": 1.664, "step": 58692 }, { "epoch": 1.95, "grad_norm": 0.5951361060142517, "learning_rate": 0.00016304114370496739, "loss": 1.689, "step": 58693 }, { "epoch": 1.95, "grad_norm": 0.5840413570404053, "learning_rate": 0.00016303184445366664, "loss": 1.8648, "step": 58694 }, { "epoch": 1.95, "grad_norm": 0.6114392876625061, "learning_rate": 0.00016302254536862523, "loss": 1.667, "step": 58695 }, { "epoch": 1.95, "grad_norm": 0.5887253284454346, "learning_rate": 0.0001630132464498541, "loss": 1.7344, "step": 58696 }, { "epoch": 1.95, "grad_norm": 0.6098701357841492, "learning_rate": 0.00016300394769736484, "loss": 1.7729, "step": 58697 }, { "epoch": 1.95, "grad_norm": 0.6068621873855591, "learning_rate": 0.0001629946491111686, "loss": 1.6535, "step": 58698 }, { "epoch": 1.95, "grad_norm": 0.5817847847938538, "learning_rate": 0.00016298535069127654, "loss": 1.6797, "step": 58699 }, { "epoch": 1.95, "grad_norm": 0.5973995923995972, "learning_rate": 0.0001629760524377002, "loss": 1.7534, "step": 58700 }, { "epoch": 1.95, "grad_norm": 0.5885140895843506, "learning_rate": 0.00016296675435045063, "loss": 1.771, "step": 58701 }, { "epoch": 1.95, "grad_norm": 0.5904308557510376, "learning_rate": 0.00016295745642953938, "loss": 1.7553, "step": 58702 }, { "epoch": 1.95, "grad_norm": 0.6056292653083801, "learning_rate": 0.00016294815867497753, "loss": 1.7018, "step": 58703 }, { "epoch": 1.95, "grad_norm": 0.6143082976341248, "learning_rate": 0.00016293886108677632, "loss": 1.6699, "step": 58704 }, { "epoch": 1.95, "grad_norm": 0.6042358875274658, "learning_rate": 0.00016292956366494726, "loss": 1.6901, "step": 58705 }, { "epoch": 1.95, "grad_norm": 0.6006438136100769, "learning_rate": 0.0001629202664095014, "loss": 1.6917, "step": 58706 }, { "epoch": 1.95, "grad_norm": 0.636818528175354, "learning_rate": 0.00016291096932045026, "loss": 1.7703, "step": 58707 }, { "epoch": 1.95, "grad_norm": 0.6072841882705688, "learning_rate": 0.00016290167239780488, "loss": 1.7122, "step": 58708 }, { "epoch": 1.95, "grad_norm": 0.6402614116668701, "learning_rate": 0.0001628923756415768, "loss": 1.7732, "step": 58709 }, { "epoch": 1.95, "grad_norm": 0.61561119556427, "learning_rate": 0.00016288307905177715, "loss": 1.8011, "step": 58710 }, { "epoch": 1.95, "grad_norm": 0.5980082154273987, "learning_rate": 0.00016287378262841713, "loss": 1.6232, "step": 58711 }, { "epoch": 1.95, "grad_norm": 0.5806463360786438, "learning_rate": 0.00016286448637150826, "loss": 1.7733, "step": 58712 }, { "epoch": 1.95, "grad_norm": 0.607831597328186, "learning_rate": 0.0001628551902810617, "loss": 1.7104, "step": 58713 }, { "epoch": 1.95, "grad_norm": 0.6270090937614441, "learning_rate": 0.00016284589435708858, "loss": 1.6754, "step": 58714 }, { "epoch": 1.95, "grad_norm": 0.6019377112388611, "learning_rate": 0.00016283659859960036, "loss": 1.7435, "step": 58715 }, { "epoch": 1.95, "grad_norm": 0.5798974633216858, "learning_rate": 0.00016282730300860843, "loss": 1.6541, "step": 58716 }, { "epoch": 1.95, "grad_norm": 0.5873888731002808, "learning_rate": 0.00016281800758412396, "loss": 1.7754, "step": 58717 }, { "epoch": 1.95, "grad_norm": 0.5998296737670898, "learning_rate": 0.000162808712326158, "loss": 1.6752, "step": 58718 }, { "epoch": 1.95, "grad_norm": 0.6060011982917786, "learning_rate": 0.00016279941723472226, "loss": 1.6365, "step": 58719 }, { "epoch": 1.95, "grad_norm": 0.634467601776123, "learning_rate": 0.0001627901223098278, "loss": 1.8158, "step": 58720 }, { "epoch": 1.95, "grad_norm": 0.6019042134284973, "learning_rate": 0.0001627808275514858, "loss": 1.712, "step": 58721 }, { "epoch": 1.95, "grad_norm": 0.6062589883804321, "learning_rate": 0.00016277153295970762, "loss": 1.7541, "step": 58722 }, { "epoch": 1.95, "grad_norm": 0.6181173920631409, "learning_rate": 0.00016276223853450484, "loss": 1.6941, "step": 58723 }, { "epoch": 1.95, "grad_norm": 0.6338357925415039, "learning_rate": 0.00016275294427588828, "loss": 1.8066, "step": 58724 }, { "epoch": 1.95, "grad_norm": 0.6520909070968628, "learning_rate": 0.00016274365018386933, "loss": 1.7235, "step": 58725 }, { "epoch": 1.95, "grad_norm": 0.6038656830787659, "learning_rate": 0.00016273435625845955, "loss": 1.7751, "step": 58726 }, { "epoch": 1.95, "grad_norm": 0.5851969122886658, "learning_rate": 0.00016272506249967006, "loss": 1.6977, "step": 58727 }, { "epoch": 1.95, "grad_norm": 0.6195117235183716, "learning_rate": 0.00016271576890751195, "loss": 1.7169, "step": 58728 }, { "epoch": 1.95, "grad_norm": 0.590401291847229, "learning_rate": 0.00016270647548199667, "loss": 1.6345, "step": 58729 }, { "epoch": 1.95, "grad_norm": 2.1275904178619385, "learning_rate": 0.00016269718222313575, "loss": 1.7178, "step": 58730 }, { "epoch": 1.95, "grad_norm": 0.608303427696228, "learning_rate": 0.00016268788913093997, "loss": 1.7473, "step": 58731 }, { "epoch": 1.95, "grad_norm": 0.5926012992858887, "learning_rate": 0.00016267859620542087, "loss": 1.7131, "step": 58732 }, { "epoch": 1.95, "grad_norm": 0.6009145379066467, "learning_rate": 0.00016266930344658985, "loss": 1.8379, "step": 58733 }, { "epoch": 1.95, "grad_norm": 0.5960937142372131, "learning_rate": 0.00016266001085445806, "loss": 1.7655, "step": 58734 }, { "epoch": 1.95, "grad_norm": 0.6191814541816711, "learning_rate": 0.00016265071842903666, "loss": 1.7442, "step": 58735 }, { "epoch": 1.95, "grad_norm": 0.5881415605545044, "learning_rate": 0.00016264142617033705, "loss": 1.658, "step": 58736 }, { "epoch": 1.95, "grad_norm": 0.5864989161491394, "learning_rate": 0.00016263213407837076, "loss": 1.802, "step": 58737 }, { "epoch": 1.95, "grad_norm": 0.6083377599716187, "learning_rate": 0.00016262284215314856, "loss": 1.7394, "step": 58738 }, { "epoch": 1.95, "grad_norm": 0.6223579049110413, "learning_rate": 0.000162613550394682, "loss": 1.7465, "step": 58739 }, { "epoch": 1.95, "grad_norm": 0.5929783582687378, "learning_rate": 0.00016260425880298244, "loss": 1.7039, "step": 58740 }, { "epoch": 1.95, "grad_norm": 0.591880202293396, "learning_rate": 0.00016259496737806107, "loss": 1.724, "step": 58741 }, { "epoch": 1.95, "grad_norm": 0.608304500579834, "learning_rate": 0.00016258567611992906, "loss": 1.7781, "step": 58742 }, { "epoch": 1.95, "grad_norm": 0.6057959198951721, "learning_rate": 0.0001625763850285978, "loss": 1.7117, "step": 58743 }, { "epoch": 1.95, "grad_norm": 0.6084893345832825, "learning_rate": 0.00016256709410407877, "loss": 1.6915, "step": 58744 }, { "epoch": 1.95, "grad_norm": 0.604763388633728, "learning_rate": 0.00016255780334638282, "loss": 1.7235, "step": 58745 }, { "epoch": 1.95, "grad_norm": 0.6071876287460327, "learning_rate": 0.00016254851275552144, "loss": 1.7105, "step": 58746 }, { "epoch": 1.95, "grad_norm": 0.5934662222862244, "learning_rate": 0.00016253922233150608, "loss": 1.6495, "step": 58747 }, { "epoch": 1.95, "grad_norm": 0.6011900305747986, "learning_rate": 0.00016252993207434776, "loss": 1.6806, "step": 58748 }, { "epoch": 1.95, "grad_norm": 0.6084989309310913, "learning_rate": 0.00016252064198405777, "loss": 1.7484, "step": 58749 }, { "epoch": 1.95, "grad_norm": 0.6092692017555237, "learning_rate": 0.00016251135206064762, "loss": 1.6929, "step": 58750 }, { "epoch": 1.95, "grad_norm": 0.6117172837257385, "learning_rate": 0.0001625020623041284, "loss": 1.7284, "step": 58751 }, { "epoch": 1.95, "grad_norm": 0.592736542224884, "learning_rate": 0.00016249277271451128, "loss": 1.7354, "step": 58752 }, { "epoch": 1.95, "grad_norm": 0.5820085406303406, "learning_rate": 0.0001624834832918077, "loss": 1.755, "step": 58753 }, { "epoch": 1.95, "grad_norm": 0.5957657098770142, "learning_rate": 0.00016247419403602901, "loss": 1.6743, "step": 58754 }, { "epoch": 1.95, "grad_norm": 0.6167011260986328, "learning_rate": 0.00016246490494718638, "loss": 1.7915, "step": 58755 }, { "epoch": 1.95, "grad_norm": 0.5855933427810669, "learning_rate": 0.00016245561602529102, "loss": 1.6524, "step": 58756 }, { "epoch": 1.95, "grad_norm": 0.6005947589874268, "learning_rate": 0.00016244632727035433, "loss": 1.6984, "step": 58757 }, { "epoch": 1.95, "grad_norm": 0.5798401832580566, "learning_rate": 0.00016243703868238756, "loss": 1.7651, "step": 58758 }, { "epoch": 1.95, "grad_norm": 0.5985009074211121, "learning_rate": 0.00016242775026140187, "loss": 1.7475, "step": 58759 }, { "epoch": 1.95, "grad_norm": 0.5841178894042969, "learning_rate": 0.00016241846200740868, "loss": 1.7426, "step": 58760 }, { "epoch": 1.95, "grad_norm": 0.608305811882019, "learning_rate": 0.0001624091739204191, "loss": 1.7788, "step": 58761 }, { "epoch": 1.96, "grad_norm": 0.6078950762748718, "learning_rate": 0.00016239988600044464, "loss": 1.7126, "step": 58762 }, { "epoch": 1.96, "grad_norm": 0.5805719494819641, "learning_rate": 0.00016239059824749635, "loss": 1.7289, "step": 58763 }, { "epoch": 1.96, "grad_norm": 0.6152112483978271, "learning_rate": 0.0001623813106615857, "loss": 1.7385, "step": 58764 }, { "epoch": 1.96, "grad_norm": 0.6326860785484314, "learning_rate": 0.00016237202324272384, "loss": 1.7813, "step": 58765 }, { "epoch": 1.96, "grad_norm": 0.5953282117843628, "learning_rate": 0.00016236273599092198, "loss": 1.7289, "step": 58766 }, { "epoch": 1.96, "grad_norm": 0.6136124134063721, "learning_rate": 0.00016235344890619155, "loss": 1.8146, "step": 58767 }, { "epoch": 1.96, "grad_norm": 0.6165659427642822, "learning_rate": 0.0001623441619885437, "loss": 1.715, "step": 58768 }, { "epoch": 1.96, "grad_norm": 0.5909519791603088, "learning_rate": 0.0001623348752379898, "loss": 1.6803, "step": 58769 }, { "epoch": 1.96, "grad_norm": 0.5974538326263428, "learning_rate": 0.00016232558865454113, "loss": 1.7534, "step": 58770 }, { "epoch": 1.96, "grad_norm": 0.6068386435508728, "learning_rate": 0.0001623163022382088, "loss": 1.6908, "step": 58771 }, { "epoch": 1.96, "grad_norm": 0.6392160058021545, "learning_rate": 0.00016230701598900427, "loss": 1.8012, "step": 58772 }, { "epoch": 1.96, "grad_norm": 0.5895939469337463, "learning_rate": 0.00016229772990693862, "loss": 1.6927, "step": 58773 }, { "epoch": 1.96, "grad_norm": 0.5831875801086426, "learning_rate": 0.00016228844399202337, "loss": 1.685, "step": 58774 }, { "epoch": 1.96, "grad_norm": 0.6086000800132751, "learning_rate": 0.0001622791582442695, "loss": 1.7123, "step": 58775 }, { "epoch": 1.96, "grad_norm": 0.5875905156135559, "learning_rate": 0.00016226987266368864, "loss": 1.6654, "step": 58776 }, { "epoch": 1.96, "grad_norm": 0.5935607552528381, "learning_rate": 0.00016226058725029183, "loss": 1.7447, "step": 58777 }, { "epoch": 1.96, "grad_norm": 0.6179724335670471, "learning_rate": 0.00016225130200409022, "loss": 1.7207, "step": 58778 }, { "epoch": 1.96, "grad_norm": 0.5858446955680847, "learning_rate": 0.00016224201692509538, "loss": 1.7133, "step": 58779 }, { "epoch": 1.96, "grad_norm": 0.5916761755943298, "learning_rate": 0.00016223273201331842, "loss": 1.6575, "step": 58780 }, { "epoch": 1.96, "grad_norm": 0.6159848570823669, "learning_rate": 0.0001622234472687705, "loss": 1.7622, "step": 58781 }, { "epoch": 1.96, "grad_norm": 0.5921335220336914, "learning_rate": 0.00016221416269146305, "loss": 1.7384, "step": 58782 }, { "epoch": 1.96, "grad_norm": 0.5998979806900024, "learning_rate": 0.00016220487828140737, "loss": 1.6763, "step": 58783 }, { "epoch": 1.96, "grad_norm": 0.5923091769218445, "learning_rate": 0.0001621955940386147, "loss": 1.7661, "step": 58784 }, { "epoch": 1.96, "grad_norm": 0.5946678519248962, "learning_rate": 0.00016218630996309613, "loss": 1.6776, "step": 58785 }, { "epoch": 1.96, "grad_norm": 0.5901941657066345, "learning_rate": 0.0001621770260548632, "loss": 1.6138, "step": 58786 }, { "epoch": 1.96, "grad_norm": 0.6082900762557983, "learning_rate": 0.00016216774231392708, "loss": 1.7134, "step": 58787 }, { "epoch": 1.96, "grad_norm": 0.5996742248535156, "learning_rate": 0.00016215845874029892, "loss": 1.6917, "step": 58788 }, { "epoch": 1.96, "grad_norm": 0.6027754545211792, "learning_rate": 0.00016214917533399003, "loss": 1.7308, "step": 58789 }, { "epoch": 1.96, "grad_norm": 0.6181665062904358, "learning_rate": 0.00016213989209501185, "loss": 1.6877, "step": 58790 }, { "epoch": 1.96, "grad_norm": 0.584134042263031, "learning_rate": 0.0001621306090233755, "loss": 1.7713, "step": 58791 }, { "epoch": 1.96, "grad_norm": 0.5829663872718811, "learning_rate": 0.00016212132611909222, "loss": 1.6514, "step": 58792 }, { "epoch": 1.96, "grad_norm": 0.5990311503410339, "learning_rate": 0.0001621120433821734, "loss": 1.7336, "step": 58793 }, { "epoch": 1.96, "grad_norm": 0.5960900783538818, "learning_rate": 0.0001621027608126303, "loss": 1.7065, "step": 58794 }, { "epoch": 1.96, "grad_norm": 0.6077897548675537, "learning_rate": 0.00016209347841047398, "loss": 1.6536, "step": 58795 }, { "epoch": 1.96, "grad_norm": 0.6271606683731079, "learning_rate": 0.00016208419617571586, "loss": 1.7239, "step": 58796 }, { "epoch": 1.96, "grad_norm": 0.5975467562675476, "learning_rate": 0.00016207491410836732, "loss": 1.765, "step": 58797 }, { "epoch": 1.96, "grad_norm": 0.6186724305152893, "learning_rate": 0.0001620656322084395, "loss": 1.7131, "step": 58798 }, { "epoch": 1.96, "grad_norm": 0.6063388586044312, "learning_rate": 0.00016205635047594356, "loss": 1.7481, "step": 58799 }, { "epoch": 1.96, "grad_norm": 0.596669614315033, "learning_rate": 0.00016204706891089103, "loss": 1.7701, "step": 58800 }, { "epoch": 1.96, "grad_norm": 0.6571149826049805, "learning_rate": 0.00016203778751329301, "loss": 1.6275, "step": 58801 }, { "epoch": 1.96, "grad_norm": 0.6050436496734619, "learning_rate": 0.0001620285062831607, "loss": 1.7245, "step": 58802 }, { "epoch": 1.96, "grad_norm": 0.5910789370536804, "learning_rate": 0.00016201922522050542, "loss": 1.7561, "step": 58803 }, { "epoch": 1.96, "grad_norm": 0.6162364482879639, "learning_rate": 0.00016200994432533862, "loss": 1.7051, "step": 58804 }, { "epoch": 1.96, "grad_norm": 0.6177548766136169, "learning_rate": 0.00016200066359767142, "loss": 1.7078, "step": 58805 }, { "epoch": 1.96, "grad_norm": 0.6465985774993896, "learning_rate": 0.0001619913830375149, "loss": 1.7987, "step": 58806 }, { "epoch": 1.96, "grad_norm": 0.6205759644508362, "learning_rate": 0.00016198210264488068, "loss": 1.6342, "step": 58807 }, { "epoch": 1.96, "grad_norm": 0.6452734470367432, "learning_rate": 0.00016197282241977982, "loss": 1.6956, "step": 58808 }, { "epoch": 1.96, "grad_norm": 0.6172245740890503, "learning_rate": 0.00016196354236222354, "loss": 1.7209, "step": 58809 }, { "epoch": 1.96, "grad_norm": 0.591235876083374, "learning_rate": 0.00016195426247222325, "loss": 1.7134, "step": 58810 }, { "epoch": 1.96, "grad_norm": 0.5800077319145203, "learning_rate": 0.00016194498274979004, "loss": 1.7858, "step": 58811 }, { "epoch": 1.96, "grad_norm": 0.5931660532951355, "learning_rate": 0.00016193570319493545, "loss": 1.727, "step": 58812 }, { "epoch": 1.96, "grad_norm": 0.6596641540527344, "learning_rate": 0.0001619264238076704, "loss": 1.7891, "step": 58813 }, { "epoch": 1.96, "grad_norm": 0.5965840816497803, "learning_rate": 0.00016191714458800645, "loss": 1.613, "step": 58814 }, { "epoch": 1.96, "grad_norm": 0.6179895401000977, "learning_rate": 0.00016190786553595476, "loss": 1.7467, "step": 58815 }, { "epoch": 1.96, "grad_norm": 0.6086971759796143, "learning_rate": 0.00016189858665152643, "loss": 1.6982, "step": 58816 }, { "epoch": 1.96, "grad_norm": 0.6008155345916748, "learning_rate": 0.00016188930793473302, "loss": 1.7773, "step": 58817 }, { "epoch": 1.96, "grad_norm": 0.6192253828048706, "learning_rate": 0.0001618800293855855, "loss": 1.7221, "step": 58818 }, { "epoch": 1.96, "grad_norm": 0.6370538473129272, "learning_rate": 0.00016187075100409539, "loss": 1.7537, "step": 58819 }, { "epoch": 1.96, "grad_norm": 0.6697754859924316, "learning_rate": 0.00016186147279027385, "loss": 1.7901, "step": 58820 }, { "epoch": 1.96, "grad_norm": 0.5851079225540161, "learning_rate": 0.000161852194744132, "loss": 1.6458, "step": 58821 }, { "epoch": 1.96, "grad_norm": 0.5993281006813049, "learning_rate": 0.00016184291686568135, "loss": 1.7226, "step": 58822 }, { "epoch": 1.96, "grad_norm": 0.6138100624084473, "learning_rate": 0.00016183363915493295, "loss": 1.6566, "step": 58823 }, { "epoch": 1.96, "grad_norm": 0.6134365797042847, "learning_rate": 0.0001618243616118982, "loss": 1.7188, "step": 58824 }, { "epoch": 1.96, "grad_norm": 0.6023145914077759, "learning_rate": 0.0001618150842365883, "loss": 1.678, "step": 58825 }, { "epoch": 1.96, "grad_norm": 0.6251529455184937, "learning_rate": 0.00016180580702901458, "loss": 1.7073, "step": 58826 }, { "epoch": 1.96, "grad_norm": 0.619570791721344, "learning_rate": 0.00016179652998918827, "loss": 1.7994, "step": 58827 }, { "epoch": 1.96, "grad_norm": 0.6119441390037537, "learning_rate": 0.00016178725311712046, "loss": 1.7625, "step": 58828 }, { "epoch": 1.96, "grad_norm": 0.6059136986732483, "learning_rate": 0.00016177797641282266, "loss": 1.7809, "step": 58829 }, { "epoch": 1.96, "grad_norm": 1.012633204460144, "learning_rate": 0.00016176869987630612, "loss": 1.7861, "step": 58830 }, { "epoch": 1.96, "grad_norm": 0.6111462712287903, "learning_rate": 0.00016175942350758182, "loss": 1.7643, "step": 58831 }, { "epoch": 1.96, "grad_norm": 0.5964064598083496, "learning_rate": 0.00016175014730666123, "loss": 1.742, "step": 58832 }, { "epoch": 1.96, "grad_norm": 0.5964031219482422, "learning_rate": 0.00016174087127355572, "loss": 1.7647, "step": 58833 }, { "epoch": 1.96, "grad_norm": 0.5951940417289734, "learning_rate": 0.0001617315954082764, "loss": 1.7814, "step": 58834 }, { "epoch": 1.96, "grad_norm": 0.6073440909385681, "learning_rate": 0.00016172231971083443, "loss": 1.8026, "step": 58835 }, { "epoch": 1.96, "grad_norm": 0.6512185335159302, "learning_rate": 0.0001617130441812413, "loss": 1.6985, "step": 58836 }, { "epoch": 1.96, "grad_norm": 0.5876273512840271, "learning_rate": 0.00016170376881950824, "loss": 1.7831, "step": 58837 }, { "epoch": 1.96, "grad_norm": 0.6074648499488831, "learning_rate": 0.0001616944936256462, "loss": 1.6626, "step": 58838 }, { "epoch": 1.96, "grad_norm": 0.5954795479774475, "learning_rate": 0.00016168521859966678, "loss": 1.7353, "step": 58839 }, { "epoch": 1.96, "grad_norm": 0.5886780023574829, "learning_rate": 0.0001616759437415813, "loss": 1.7501, "step": 58840 }, { "epoch": 1.96, "grad_norm": 0.6065228581428528, "learning_rate": 0.00016166666905140055, "loss": 1.7611, "step": 58841 }, { "epoch": 1.96, "grad_norm": 0.5772038102149963, "learning_rate": 0.00016165739452913617, "loss": 1.7277, "step": 58842 }, { "epoch": 1.96, "grad_norm": 0.6099864840507507, "learning_rate": 0.0001616481201747994, "loss": 1.6617, "step": 58843 }, { "epoch": 1.96, "grad_norm": 0.6140049695968628, "learning_rate": 0.00016163884598840143, "loss": 1.7236, "step": 58844 }, { "epoch": 1.96, "grad_norm": 0.5992186069488525, "learning_rate": 0.00016162957196995344, "loss": 1.7483, "step": 58845 }, { "epoch": 1.96, "grad_norm": 0.629123866558075, "learning_rate": 0.00016162029811946668, "loss": 1.784, "step": 58846 }, { "epoch": 1.96, "grad_norm": 0.6144829392433167, "learning_rate": 0.00016161102443695277, "loss": 1.8006, "step": 58847 }, { "epoch": 1.96, "grad_norm": 0.592796266078949, "learning_rate": 0.00016160175092242248, "loss": 1.7026, "step": 58848 }, { "epoch": 1.96, "grad_norm": 0.602118730545044, "learning_rate": 0.00016159247757588724, "loss": 1.7165, "step": 58849 }, { "epoch": 1.96, "grad_norm": 0.6083784699440002, "learning_rate": 0.00016158320439735847, "loss": 1.7019, "step": 58850 }, { "epoch": 1.96, "grad_norm": 0.6137606501579285, "learning_rate": 0.00016157393138684727, "loss": 1.6867, "step": 58851 }, { "epoch": 1.96, "grad_norm": 0.6338527202606201, "learning_rate": 0.00016156465854436485, "loss": 1.7584, "step": 58852 }, { "epoch": 1.96, "grad_norm": 0.5879619717597961, "learning_rate": 0.0001615553858699225, "loss": 1.8319, "step": 58853 }, { "epoch": 1.96, "grad_norm": 0.5943074226379395, "learning_rate": 0.00016154611336353174, "loss": 1.7684, "step": 58854 }, { "epoch": 1.96, "grad_norm": 0.5795511603355408, "learning_rate": 0.00016153684102520336, "loss": 1.706, "step": 58855 }, { "epoch": 1.96, "grad_norm": 0.6090835332870483, "learning_rate": 0.00016152756885494892, "loss": 1.6551, "step": 58856 }, { "epoch": 1.96, "grad_norm": 0.6111650466918945, "learning_rate": 0.00016151829685277967, "loss": 1.7539, "step": 58857 }, { "epoch": 1.96, "grad_norm": 0.6053131222724915, "learning_rate": 0.0001615090250187068, "loss": 1.6944, "step": 58858 }, { "epoch": 1.96, "grad_norm": 0.583863377571106, "learning_rate": 0.00016149975335274147, "loss": 1.7773, "step": 58859 }, { "epoch": 1.96, "grad_norm": 0.5836477279663086, "learning_rate": 0.00016149048185489504, "loss": 1.8041, "step": 58860 }, { "epoch": 1.96, "grad_norm": 0.6051970720291138, "learning_rate": 0.00016148121052517896, "loss": 1.7318, "step": 58861 }, { "epoch": 1.96, "grad_norm": 0.6066388487815857, "learning_rate": 0.00016147193936360402, "loss": 1.7655, "step": 58862 }, { "epoch": 1.96, "grad_norm": 0.6071018576622009, "learning_rate": 0.0001614626683701818, "loss": 1.7319, "step": 58863 }, { "epoch": 1.96, "grad_norm": 0.5788999199867249, "learning_rate": 0.0001614533975449236, "loss": 1.6941, "step": 58864 }, { "epoch": 1.96, "grad_norm": 0.5988565683364868, "learning_rate": 0.00016144412688784052, "loss": 1.8047, "step": 58865 }, { "epoch": 1.96, "grad_norm": 0.6152496337890625, "learning_rate": 0.00016143485639894377, "loss": 1.7924, "step": 58866 }, { "epoch": 1.96, "grad_norm": 0.6019349694252014, "learning_rate": 0.0001614255860782448, "loss": 1.7679, "step": 58867 }, { "epoch": 1.96, "grad_norm": 0.5991809368133545, "learning_rate": 0.00016141631592575471, "loss": 1.7655, "step": 58868 }, { "epoch": 1.96, "grad_norm": 0.6191893815994263, "learning_rate": 0.00016140704594148471, "loss": 1.7001, "step": 58869 }, { "epoch": 1.96, "grad_norm": 0.5904421806335449, "learning_rate": 0.00016139777612544616, "loss": 1.6267, "step": 58870 }, { "epoch": 1.96, "grad_norm": 0.6033770442008972, "learning_rate": 0.0001613885064776504, "loss": 1.6807, "step": 58871 }, { "epoch": 1.96, "grad_norm": 0.6032602787017822, "learning_rate": 0.00016137923699810852, "loss": 1.6466, "step": 58872 }, { "epoch": 1.96, "grad_norm": 2.3767216205596924, "learning_rate": 0.00016136996768683175, "loss": 1.7288, "step": 58873 }, { "epoch": 1.96, "grad_norm": 0.614142656326294, "learning_rate": 0.00016136069854383152, "loss": 1.7305, "step": 58874 }, { "epoch": 1.96, "grad_norm": 0.6049841642379761, "learning_rate": 0.00016135142956911897, "loss": 1.7083, "step": 58875 }, { "epoch": 1.96, "grad_norm": 0.5971150994300842, "learning_rate": 0.00016134216076270523, "loss": 1.7511, "step": 58876 }, { "epoch": 1.96, "grad_norm": 0.6086962819099426, "learning_rate": 0.00016133289212460182, "loss": 1.7363, "step": 58877 }, { "epoch": 1.96, "grad_norm": 0.5893570780754089, "learning_rate": 0.0001613236236548197, "loss": 1.7743, "step": 58878 }, { "epoch": 1.96, "grad_norm": 0.5972610712051392, "learning_rate": 0.00016131435535337042, "loss": 1.6692, "step": 58879 }, { "epoch": 1.96, "grad_norm": 0.5928539037704468, "learning_rate": 0.0001613050872202649, "loss": 1.677, "step": 58880 }, { "epoch": 1.96, "grad_norm": 0.5756288766860962, "learning_rate": 0.00016129581925551474, "loss": 1.712, "step": 58881 }, { "epoch": 1.96, "grad_norm": 0.599818766117096, "learning_rate": 0.000161286551459131, "loss": 1.6952, "step": 58882 }, { "epoch": 1.96, "grad_norm": 0.5798447132110596, "learning_rate": 0.00016127728383112487, "loss": 1.7434, "step": 58883 }, { "epoch": 1.96, "grad_norm": 0.6459836363792419, "learning_rate": 0.00016126801637150775, "loss": 1.7935, "step": 58884 }, { "epoch": 1.96, "grad_norm": 0.5865105390548706, "learning_rate": 0.0001612587490802907, "loss": 1.7178, "step": 58885 }, { "epoch": 1.96, "grad_norm": 0.5868324041366577, "learning_rate": 0.0001612494819574852, "loss": 1.7346, "step": 58886 }, { "epoch": 1.96, "grad_norm": 0.5887354612350464, "learning_rate": 0.0001612402150031024, "loss": 1.7796, "step": 58887 }, { "epoch": 1.96, "grad_norm": 0.5851125717163086, "learning_rate": 0.00016123094821715342, "loss": 1.6975, "step": 58888 }, { "epoch": 1.96, "grad_norm": 0.5839484333992004, "learning_rate": 0.00016122168159964973, "loss": 1.7546, "step": 58889 }, { "epoch": 1.96, "grad_norm": 0.5963109135627747, "learning_rate": 0.00016121241515060236, "loss": 1.7234, "step": 58890 }, { "epoch": 1.96, "grad_norm": 0.6208257079124451, "learning_rate": 0.0001612031488700228, "loss": 1.7484, "step": 58891 }, { "epoch": 1.96, "grad_norm": 0.5931599140167236, "learning_rate": 0.000161193882757922, "loss": 1.6388, "step": 58892 }, { "epoch": 1.96, "grad_norm": 0.5840097069740295, "learning_rate": 0.00016118461681431155, "loss": 1.7235, "step": 58893 }, { "epoch": 1.96, "grad_norm": 0.6013891100883484, "learning_rate": 0.00016117535103920248, "loss": 1.6725, "step": 58894 }, { "epoch": 1.96, "grad_norm": 0.597270131111145, "learning_rate": 0.00016116608543260597, "loss": 1.7142, "step": 58895 }, { "epoch": 1.96, "grad_norm": 0.5887192487716675, "learning_rate": 0.0001611568199945335, "loss": 1.704, "step": 58896 }, { "epoch": 1.96, "grad_norm": 0.602630078792572, "learning_rate": 0.0001611475547249962, "loss": 1.7079, "step": 58897 }, { "epoch": 1.96, "grad_norm": 0.6088754534721375, "learning_rate": 0.00016113828962400519, "loss": 1.7476, "step": 58898 }, { "epoch": 1.96, "grad_norm": 0.5999466180801392, "learning_rate": 0.0001611290246915718, "loss": 1.76, "step": 58899 }, { "epoch": 1.96, "grad_norm": 0.5864920616149902, "learning_rate": 0.00016111975992770747, "loss": 1.6917, "step": 58900 }, { "epoch": 1.96, "grad_norm": 0.599664568901062, "learning_rate": 0.0001611104953324233, "loss": 1.7347, "step": 58901 }, { "epoch": 1.96, "grad_norm": 0.6146872043609619, "learning_rate": 0.00016110123090573038, "loss": 1.805, "step": 58902 }, { "epoch": 1.96, "grad_norm": 0.6265632510185242, "learning_rate": 0.00016109196664764025, "loss": 1.6924, "step": 58903 }, { "epoch": 1.96, "grad_norm": 0.6010378003120422, "learning_rate": 0.00016108270255816396, "loss": 1.6942, "step": 58904 }, { "epoch": 1.96, "grad_norm": 0.6282969117164612, "learning_rate": 0.0001610734386373127, "loss": 1.7407, "step": 58905 }, { "epoch": 1.96, "grad_norm": 0.6033785939216614, "learning_rate": 0.0001610641748850978, "loss": 1.6972, "step": 58906 }, { "epoch": 1.96, "grad_norm": 0.5963643789291382, "learning_rate": 0.00016105491130153068, "loss": 1.7182, "step": 58907 }, { "epoch": 1.96, "grad_norm": 0.6016436219215393, "learning_rate": 0.00016104564788662235, "loss": 1.6603, "step": 58908 }, { "epoch": 1.96, "grad_norm": 0.5889533758163452, "learning_rate": 0.00016103638464038409, "loss": 1.7156, "step": 58909 }, { "epoch": 1.96, "grad_norm": 0.5851845145225525, "learning_rate": 0.0001610271215628273, "loss": 1.7834, "step": 58910 }, { "epoch": 1.96, "grad_norm": 0.5781211853027344, "learning_rate": 0.00016101785865396304, "loss": 1.7222, "step": 58911 }, { "epoch": 1.96, "grad_norm": 0.5961679220199585, "learning_rate": 0.00016100859591380257, "loss": 1.6453, "step": 58912 }, { "epoch": 1.96, "grad_norm": 0.5955862998962402, "learning_rate": 0.00016099933334235716, "loss": 1.7125, "step": 58913 }, { "epoch": 1.96, "grad_norm": 0.610074520111084, "learning_rate": 0.0001609900709396382, "loss": 1.6969, "step": 58914 }, { "epoch": 1.96, "grad_norm": 0.5875881314277649, "learning_rate": 0.00016098080870565682, "loss": 1.6738, "step": 58915 }, { "epoch": 1.96, "grad_norm": 0.5864881277084351, "learning_rate": 0.0001609715466404241, "loss": 1.6781, "step": 58916 }, { "epoch": 1.96, "grad_norm": 0.6278621554374695, "learning_rate": 0.00016096228474395157, "loss": 1.8562, "step": 58917 }, { "epoch": 1.96, "grad_norm": 0.5828605890274048, "learning_rate": 0.00016095302301625037, "loss": 1.758, "step": 58918 }, { "epoch": 1.96, "grad_norm": 0.5736246109008789, "learning_rate": 0.00016094376145733155, "loss": 1.6826, "step": 58919 }, { "epoch": 1.96, "grad_norm": 0.5736446380615234, "learning_rate": 0.00016093450006720656, "loss": 1.6858, "step": 58920 }, { "epoch": 1.96, "grad_norm": 0.5887590050697327, "learning_rate": 0.0001609252388458867, "loss": 1.7119, "step": 58921 }, { "epoch": 1.96, "grad_norm": 0.615202784538269, "learning_rate": 0.00016091597779338307, "loss": 1.7087, "step": 58922 }, { "epoch": 1.96, "grad_norm": 0.5872875452041626, "learning_rate": 0.00016090671690970689, "loss": 1.685, "step": 58923 }, { "epoch": 1.96, "grad_norm": 0.6005867719650269, "learning_rate": 0.00016089745619486955, "loss": 1.7144, "step": 58924 }, { "epoch": 1.96, "grad_norm": 0.5922450423240662, "learning_rate": 0.0001608881956488822, "loss": 1.7542, "step": 58925 }, { "epoch": 1.96, "grad_norm": 0.6818374395370483, "learning_rate": 0.00016087893527175594, "loss": 1.7198, "step": 58926 }, { "epoch": 1.96, "grad_norm": 0.6034347414970398, "learning_rate": 0.00016086967506350234, "loss": 1.6831, "step": 58927 }, { "epoch": 1.96, "grad_norm": 0.5966142416000366, "learning_rate": 0.0001608604150241323, "loss": 1.6198, "step": 58928 }, { "epoch": 1.96, "grad_norm": 0.5769391059875488, "learning_rate": 0.0001608511551536573, "loss": 1.7157, "step": 58929 }, { "epoch": 1.96, "grad_norm": 0.5856105089187622, "learning_rate": 0.00016084189545208846, "loss": 1.6999, "step": 58930 }, { "epoch": 1.96, "grad_norm": 0.6142396926879883, "learning_rate": 0.0001608326359194371, "loss": 1.7085, "step": 58931 }, { "epoch": 1.96, "grad_norm": 0.5915361046791077, "learning_rate": 0.00016082337655571441, "loss": 1.7696, "step": 58932 }, { "epoch": 1.96, "grad_norm": 0.5892436504364014, "learning_rate": 0.00016081411736093158, "loss": 1.7152, "step": 58933 }, { "epoch": 1.96, "grad_norm": 0.5835627913475037, "learning_rate": 0.00016080485833509998, "loss": 1.7245, "step": 58934 }, { "epoch": 1.96, "grad_norm": 0.6121525764465332, "learning_rate": 0.00016079559947823065, "loss": 1.7782, "step": 58935 }, { "epoch": 1.96, "grad_norm": 0.6026487350463867, "learning_rate": 0.00016078634079033508, "loss": 1.7338, "step": 58936 }, { "epoch": 1.96, "grad_norm": 0.5930898189544678, "learning_rate": 0.0001607770822714244, "loss": 1.7263, "step": 58937 }, { "epoch": 1.96, "grad_norm": 0.5929427742958069, "learning_rate": 0.00016076782392150966, "loss": 1.6974, "step": 58938 }, { "epoch": 1.96, "grad_norm": 0.619672417640686, "learning_rate": 0.00016075856574060242, "loss": 1.6907, "step": 58939 }, { "epoch": 1.96, "grad_norm": 0.6107420921325684, "learning_rate": 0.00016074930772871365, "loss": 1.7389, "step": 58940 }, { "epoch": 1.96, "grad_norm": 0.6090030670166016, "learning_rate": 0.00016074004988585482, "loss": 1.7427, "step": 58941 }, { "epoch": 1.96, "grad_norm": 0.5861012935638428, "learning_rate": 0.0001607307922120369, "loss": 1.7106, "step": 58942 }, { "epoch": 1.96, "grad_norm": 0.5990402102470398, "learning_rate": 0.00016072153470727144, "loss": 1.6694, "step": 58943 }, { "epoch": 1.96, "grad_norm": 0.5930945873260498, "learning_rate": 0.0001607122773715695, "loss": 1.6505, "step": 58944 }, { "epoch": 1.96, "grad_norm": 0.5789595246315002, "learning_rate": 0.0001607030202049422, "loss": 1.6925, "step": 58945 }, { "epoch": 1.96, "grad_norm": 0.6094095706939697, "learning_rate": 0.00016069376320740103, "loss": 1.809, "step": 58946 }, { "epoch": 1.96, "grad_norm": 0.6209654808044434, "learning_rate": 0.00016068450637895713, "loss": 1.7046, "step": 58947 }, { "epoch": 1.96, "grad_norm": 0.6061144471168518, "learning_rate": 0.00016067524971962157, "loss": 1.7248, "step": 58948 }, { "epoch": 1.96, "grad_norm": 0.5872981548309326, "learning_rate": 0.00016066599322940572, "loss": 1.7623, "step": 58949 }, { "epoch": 1.96, "grad_norm": 0.635263979434967, "learning_rate": 0.00016065673690832095, "loss": 1.7485, "step": 58950 }, { "epoch": 1.96, "grad_norm": 0.7615755200386047, "learning_rate": 0.0001606474807563784, "loss": 1.7031, "step": 58951 }, { "epoch": 1.96, "grad_norm": 0.5818032622337341, "learning_rate": 0.00016063822477358915, "loss": 1.7598, "step": 58952 }, { "epoch": 1.96, "grad_norm": 0.5972574353218079, "learning_rate": 0.00016062896895996465, "loss": 1.6993, "step": 58953 }, { "epoch": 1.96, "grad_norm": 0.5962674021720886, "learning_rate": 0.00016061971331551606, "loss": 1.6746, "step": 58954 }, { "epoch": 1.96, "grad_norm": 0.6024505496025085, "learning_rate": 0.00016061045784025445, "loss": 1.6964, "step": 58955 }, { "epoch": 1.96, "grad_norm": 0.6000965237617493, "learning_rate": 0.00016060120253419128, "loss": 1.6947, "step": 58956 }, { "epoch": 1.96, "grad_norm": 0.5944385528564453, "learning_rate": 0.00016059194739733788, "loss": 1.6977, "step": 58957 }, { "epoch": 1.96, "grad_norm": 0.5852182507514954, "learning_rate": 0.00016058269242970514, "loss": 1.6565, "step": 58958 }, { "epoch": 1.96, "grad_norm": 0.5979974269866943, "learning_rate": 0.00016057343763130444, "loss": 1.7117, "step": 58959 }, { "epoch": 1.96, "grad_norm": 0.6018124222755432, "learning_rate": 0.00016056418300214716, "loss": 1.7795, "step": 58960 }, { "epoch": 1.96, "grad_norm": 0.6207270622253418, "learning_rate": 0.0001605549285422444, "loss": 1.7671, "step": 58961 }, { "epoch": 1.96, "grad_norm": 0.5966742038726807, "learning_rate": 0.00016054567425160727, "loss": 1.7237, "step": 58962 }, { "epoch": 1.96, "grad_norm": 0.6039363145828247, "learning_rate": 0.00016053642013024718, "loss": 1.6887, "step": 58963 }, { "epoch": 1.96, "grad_norm": 0.6043458580970764, "learning_rate": 0.00016052716617817557, "loss": 1.6802, "step": 58964 }, { "epoch": 1.96, "grad_norm": 0.5919878482818604, "learning_rate": 0.0001605179123954032, "loss": 1.6984, "step": 58965 }, { "epoch": 1.96, "grad_norm": 0.587070107460022, "learning_rate": 0.0001605086587819415, "loss": 1.6969, "step": 58966 }, { "epoch": 1.96, "grad_norm": 0.5954676866531372, "learning_rate": 0.00016049940533780188, "loss": 1.6754, "step": 58967 }, { "epoch": 1.96, "grad_norm": 0.5947849750518799, "learning_rate": 0.00016049015206299542, "loss": 1.6608, "step": 58968 }, { "epoch": 1.96, "grad_norm": 0.6043015122413635, "learning_rate": 0.00016048089895753323, "loss": 1.7988, "step": 58969 }, { "epoch": 1.96, "grad_norm": 0.593524694442749, "learning_rate": 0.00016047164602142673, "loss": 1.7404, "step": 58970 }, { "epoch": 1.96, "grad_norm": 0.594228982925415, "learning_rate": 0.00016046239325468727, "loss": 1.6501, "step": 58971 }, { "epoch": 1.96, "grad_norm": 0.5943349599838257, "learning_rate": 0.00016045314065732565, "loss": 1.7508, "step": 58972 }, { "epoch": 1.96, "grad_norm": 0.603004515171051, "learning_rate": 0.0001604438882293534, "loss": 1.7337, "step": 58973 }, { "epoch": 1.96, "grad_norm": 0.6187727451324463, "learning_rate": 0.00016043463597078183, "loss": 1.688, "step": 58974 }, { "epoch": 1.96, "grad_norm": 0.5745676755905151, "learning_rate": 0.00016042538388162203, "loss": 1.6896, "step": 58975 }, { "epoch": 1.96, "grad_norm": 0.5974113941192627, "learning_rate": 0.0001604161319618851, "loss": 1.7515, "step": 58976 }, { "epoch": 1.96, "grad_norm": 0.6090691685676575, "learning_rate": 0.00016040688021158256, "loss": 1.739, "step": 58977 }, { "epoch": 1.96, "grad_norm": 0.5836783647537231, "learning_rate": 0.00016039762863072552, "loss": 1.7307, "step": 58978 }, { "epoch": 1.96, "grad_norm": 0.6088492274284363, "learning_rate": 0.00016038837721932508, "loss": 1.7465, "step": 58979 }, { "epoch": 1.96, "grad_norm": 0.6182153820991516, "learning_rate": 0.00016037912597739254, "loss": 1.7537, "step": 58980 }, { "epoch": 1.96, "grad_norm": 0.5705564618110657, "learning_rate": 0.0001603698749049393, "loss": 1.7276, "step": 58981 }, { "epoch": 1.96, "grad_norm": 0.6009879112243652, "learning_rate": 0.00016036062400197646, "loss": 1.7808, "step": 58982 }, { "epoch": 1.96, "grad_norm": 0.5941376686096191, "learning_rate": 0.00016035137326851515, "loss": 1.7447, "step": 58983 }, { "epoch": 1.96, "grad_norm": 0.6134685277938843, "learning_rate": 0.00016034212270456675, "loss": 1.7278, "step": 58984 }, { "epoch": 1.96, "grad_norm": 0.5943081974983215, "learning_rate": 0.0001603328723101425, "loss": 1.745, "step": 58985 }, { "epoch": 1.96, "grad_norm": 0.5762192010879517, "learning_rate": 0.00016032362208525346, "loss": 1.6726, "step": 58986 }, { "epoch": 1.96, "grad_norm": 0.616766095161438, "learning_rate": 0.00016031437202991092, "loss": 1.7566, "step": 58987 }, { "epoch": 1.96, "grad_norm": 0.6012428402900696, "learning_rate": 0.0001603051221441263, "loss": 1.7055, "step": 58988 }, { "epoch": 1.96, "grad_norm": 0.5910431146621704, "learning_rate": 0.00016029587242791064, "loss": 1.7134, "step": 58989 }, { "epoch": 1.96, "grad_norm": 0.6084408164024353, "learning_rate": 0.0001602866228812751, "loss": 1.7976, "step": 58990 }, { "epoch": 1.96, "grad_norm": 0.6139047741889954, "learning_rate": 0.0001602773735042311, "loss": 1.7117, "step": 58991 }, { "epoch": 1.96, "grad_norm": 0.5901423096656799, "learning_rate": 0.00016026812429678982, "loss": 1.7337, "step": 58992 }, { "epoch": 1.96, "grad_norm": 0.6098308563232422, "learning_rate": 0.00016025887525896235, "loss": 1.7289, "step": 58993 }, { "epoch": 1.96, "grad_norm": 0.6133909821510315, "learning_rate": 0.00016024962639076012, "loss": 1.7559, "step": 58994 }, { "epoch": 1.96, "grad_norm": 0.5789414048194885, "learning_rate": 0.0001602403776921941, "loss": 1.7626, "step": 58995 }, { "epoch": 1.96, "grad_norm": 0.6135308742523193, "learning_rate": 0.00016023112916327588, "loss": 1.6436, "step": 58996 }, { "epoch": 1.96, "grad_norm": 0.6439493298530579, "learning_rate": 0.00016022188080401625, "loss": 1.7879, "step": 58997 }, { "epoch": 1.96, "grad_norm": 0.5951915383338928, "learning_rate": 0.00016021263261442684, "loss": 1.7889, "step": 58998 }, { "epoch": 1.96, "grad_norm": 0.5873783826828003, "learning_rate": 0.00016020338459451868, "loss": 1.7199, "step": 58999 }, { "epoch": 1.96, "grad_norm": 0.609636127948761, "learning_rate": 0.0001601941367443029, "loss": 1.756, "step": 59000 }, { "epoch": 1.96, "grad_norm": 0.6041679978370667, "learning_rate": 0.00016018488906379094, "loss": 1.7223, "step": 59001 }, { "epoch": 1.96, "grad_norm": 0.6231062412261963, "learning_rate": 0.00016017564155299384, "loss": 1.7311, "step": 59002 }, { "epoch": 1.96, "grad_norm": 0.6017203330993652, "learning_rate": 0.00016016639421192298, "loss": 1.7554, "step": 59003 }, { "epoch": 1.96, "grad_norm": 0.5970062613487244, "learning_rate": 0.00016015714704058955, "loss": 1.8025, "step": 59004 }, { "epoch": 1.96, "grad_norm": 0.5948940515518188, "learning_rate": 0.00016014790003900459, "loss": 1.7771, "step": 59005 }, { "epoch": 1.96, "grad_norm": 0.6006175875663757, "learning_rate": 0.0001601386532071796, "loss": 1.7075, "step": 59006 }, { "epoch": 1.96, "grad_norm": 0.6012393236160278, "learning_rate": 0.00016012940654512556, "loss": 1.7454, "step": 59007 }, { "epoch": 1.96, "grad_norm": 0.5989058613777161, "learning_rate": 0.00016012016005285398, "loss": 1.6791, "step": 59008 }, { "epoch": 1.96, "grad_norm": 0.5953278541564941, "learning_rate": 0.00016011091373037574, "loss": 1.7534, "step": 59009 }, { "epoch": 1.96, "grad_norm": 0.6200664639472961, "learning_rate": 0.00016010166757770235, "loss": 1.6534, "step": 59010 }, { "epoch": 1.96, "grad_norm": 0.6092997193336487, "learning_rate": 0.00016009242159484494, "loss": 1.6457, "step": 59011 }, { "epoch": 1.96, "grad_norm": 0.5759836435317993, "learning_rate": 0.0001600831757818146, "loss": 1.6603, "step": 59012 }, { "epoch": 1.96, "grad_norm": 0.6108725070953369, "learning_rate": 0.00016007393013862283, "loss": 1.7659, "step": 59013 }, { "epoch": 1.96, "grad_norm": 0.628017008304596, "learning_rate": 0.00016006468466528065, "loss": 1.8371, "step": 59014 }, { "epoch": 1.96, "grad_norm": 0.6028887033462524, "learning_rate": 0.00016005543936179918, "loss": 1.6886, "step": 59015 }, { "epoch": 1.96, "grad_norm": 0.6225523352622986, "learning_rate": 0.00016004619422818983, "loss": 1.7854, "step": 59016 }, { "epoch": 1.96, "grad_norm": 0.6052026748657227, "learning_rate": 0.0001600369492644639, "loss": 1.6979, "step": 59017 }, { "epoch": 1.96, "grad_norm": 0.5924788117408752, "learning_rate": 0.00016002770447063241, "loss": 1.7103, "step": 59018 }, { "epoch": 1.96, "grad_norm": 0.6467716097831726, "learning_rate": 0.00016001845984670664, "loss": 1.8236, "step": 59019 }, { "epoch": 1.96, "grad_norm": 0.6217010021209717, "learning_rate": 0.00016000921539269792, "loss": 1.7132, "step": 59020 }, { "epoch": 1.96, "grad_norm": 0.6229653358459473, "learning_rate": 0.0001599999711086174, "loss": 1.6571, "step": 59021 }, { "epoch": 1.96, "grad_norm": 0.6084604263305664, "learning_rate": 0.00015999072699447614, "loss": 1.6293, "step": 59022 }, { "epoch": 1.96, "grad_norm": 0.593810498714447, "learning_rate": 0.00015998148305028552, "loss": 1.7188, "step": 59023 }, { "epoch": 1.96, "grad_norm": 0.590535044670105, "learning_rate": 0.00015997223927605687, "loss": 1.7061, "step": 59024 }, { "epoch": 1.96, "grad_norm": 0.5865451097488403, "learning_rate": 0.0001599629956718013, "loss": 1.6738, "step": 59025 }, { "epoch": 1.96, "grad_norm": 0.6165003180503845, "learning_rate": 0.00015995375223752987, "loss": 1.7407, "step": 59026 }, { "epoch": 1.96, "grad_norm": 0.5908771753311157, "learning_rate": 0.00015994450897325409, "loss": 1.7481, "step": 59027 }, { "epoch": 1.96, "grad_norm": 0.6137229800224304, "learning_rate": 0.00015993526587898503, "loss": 1.7882, "step": 59028 }, { "epoch": 1.96, "grad_norm": 0.5988976955413818, "learning_rate": 0.00015992602295473386, "loss": 1.6679, "step": 59029 }, { "epoch": 1.96, "grad_norm": 0.5978711247444153, "learning_rate": 0.00015991678020051177, "loss": 1.7253, "step": 59030 }, { "epoch": 1.96, "grad_norm": 0.6042655110359192, "learning_rate": 0.00015990753761633024, "loss": 1.7255, "step": 59031 }, { "epoch": 1.96, "grad_norm": 0.6061018705368042, "learning_rate": 0.0001598982952022003, "loss": 1.6768, "step": 59032 }, { "epoch": 1.96, "grad_norm": 0.5944395065307617, "learning_rate": 0.00015988905295813308, "loss": 1.7422, "step": 59033 }, { "epoch": 1.96, "grad_norm": 0.5994961857795715, "learning_rate": 0.00015987981088414, "loss": 1.6915, "step": 59034 }, { "epoch": 1.96, "grad_norm": 0.61988765001297, "learning_rate": 0.0001598705689802322, "loss": 1.7624, "step": 59035 }, { "epoch": 1.96, "grad_norm": 0.5936785340309143, "learning_rate": 0.00015986132724642074, "loss": 1.7558, "step": 59036 }, { "epoch": 1.96, "grad_norm": 0.6168842315673828, "learning_rate": 0.00015985208568271704, "loss": 1.7193, "step": 59037 }, { "epoch": 1.96, "grad_norm": 0.5863493084907532, "learning_rate": 0.00015984284428913233, "loss": 1.6846, "step": 59038 }, { "epoch": 1.96, "grad_norm": 0.5854707360267639, "learning_rate": 0.00015983360306567776, "loss": 1.7629, "step": 59039 }, { "epoch": 1.96, "grad_norm": 0.6041215062141418, "learning_rate": 0.00015982436201236443, "loss": 1.7385, "step": 59040 }, { "epoch": 1.96, "grad_norm": 0.5971999168395996, "learning_rate": 0.0001598151211292038, "loss": 1.7277, "step": 59041 }, { "epoch": 1.96, "grad_norm": 0.6282555460929871, "learning_rate": 0.00015980588041620693, "loss": 1.763, "step": 59042 }, { "epoch": 1.96, "grad_norm": 0.5919246077537537, "learning_rate": 0.00015979663987338496, "loss": 1.7777, "step": 59043 }, { "epoch": 1.96, "grad_norm": 0.5897965431213379, "learning_rate": 0.00015978739950074934, "loss": 1.7149, "step": 59044 }, { "epoch": 1.96, "grad_norm": 0.6089423298835754, "learning_rate": 0.00015977815929831105, "loss": 1.7961, "step": 59045 }, { "epoch": 1.96, "grad_norm": 0.5901129245758057, "learning_rate": 0.0001597689192660815, "loss": 1.8091, "step": 59046 }, { "epoch": 1.96, "grad_norm": 0.6178951859474182, "learning_rate": 0.0001597596794040717, "loss": 1.7666, "step": 59047 }, { "epoch": 1.96, "grad_norm": 0.5952200889587402, "learning_rate": 0.0001597504397122931, "loss": 1.7665, "step": 59048 }, { "epoch": 1.96, "grad_norm": 0.5932700037956238, "learning_rate": 0.00015974120019075685, "loss": 1.6704, "step": 59049 }, { "epoch": 1.96, "grad_norm": 0.5944938659667969, "learning_rate": 0.00015973196083947396, "loss": 1.6884, "step": 59050 }, { "epoch": 1.96, "grad_norm": 0.580103874206543, "learning_rate": 0.00015972272165845592, "loss": 1.7253, "step": 59051 }, { "epoch": 1.96, "grad_norm": 0.5985608696937561, "learning_rate": 0.00015971348264771373, "loss": 1.7547, "step": 59052 }, { "epoch": 1.96, "grad_norm": 0.5879887342453003, "learning_rate": 0.00015970424380725882, "loss": 1.7142, "step": 59053 }, { "epoch": 1.96, "grad_norm": 0.6005063652992249, "learning_rate": 0.00015969500513710229, "loss": 1.6644, "step": 59054 }, { "epoch": 1.96, "grad_norm": 0.6239652633666992, "learning_rate": 0.00015968576663725518, "loss": 1.7495, "step": 59055 }, { "epoch": 1.96, "grad_norm": 0.6068224906921387, "learning_rate": 0.00015967652830772906, "loss": 1.8139, "step": 59056 }, { "epoch": 1.96, "grad_norm": 0.5966732501983643, "learning_rate": 0.0001596672901485348, "loss": 1.7581, "step": 59057 }, { "epoch": 1.96, "grad_norm": 0.5831787586212158, "learning_rate": 0.0001596580521596839, "loss": 1.6776, "step": 59058 }, { "epoch": 1.96, "grad_norm": 0.6210506558418274, "learning_rate": 0.00015964881434118734, "loss": 1.8089, "step": 59059 }, { "epoch": 1.96, "grad_norm": 0.6075058579444885, "learning_rate": 0.00015963957669305656, "loss": 1.7461, "step": 59060 }, { "epoch": 1.96, "grad_norm": 0.6254838109016418, "learning_rate": 0.00015963033921530264, "loss": 1.681, "step": 59061 }, { "epoch": 1.96, "grad_norm": 0.614041805267334, "learning_rate": 0.0001596211019079367, "loss": 1.7195, "step": 59062 }, { "epoch": 1.97, "grad_norm": 0.6021468639373779, "learning_rate": 0.00015961186477097018, "loss": 1.7034, "step": 59063 }, { "epoch": 1.97, "grad_norm": 0.5962874293327332, "learning_rate": 0.00015960262780441412, "loss": 1.732, "step": 59064 }, { "epoch": 1.97, "grad_norm": 0.5993381142616272, "learning_rate": 0.0001595933910082797, "loss": 1.7566, "step": 59065 }, { "epoch": 1.97, "grad_norm": 0.5808029174804688, "learning_rate": 0.00015958415438257822, "loss": 1.6869, "step": 59066 }, { "epoch": 1.97, "grad_norm": 0.6063328385353088, "learning_rate": 0.000159574917927321, "loss": 1.7393, "step": 59067 }, { "epoch": 1.97, "grad_norm": 0.5933148860931396, "learning_rate": 0.00015956568164251912, "loss": 1.6822, "step": 59068 }, { "epoch": 1.97, "grad_norm": 0.6134757995605469, "learning_rate": 0.00015955644552818372, "loss": 1.7163, "step": 59069 }, { "epoch": 1.97, "grad_norm": 0.6201207041740417, "learning_rate": 0.0001595472095843262, "loss": 1.7332, "step": 59070 }, { "epoch": 1.97, "grad_norm": 0.6078055500984192, "learning_rate": 0.00015953797381095772, "loss": 1.6619, "step": 59071 }, { "epoch": 1.97, "grad_norm": 0.6294675469398499, "learning_rate": 0.00015952873820808928, "loss": 1.7219, "step": 59072 }, { "epoch": 1.97, "grad_norm": 0.5953217148780823, "learning_rate": 0.0001595195027757323, "loss": 1.7054, "step": 59073 }, { "epoch": 1.97, "grad_norm": 0.5987063646316528, "learning_rate": 0.0001595102675138981, "loss": 1.7561, "step": 59074 }, { "epoch": 1.97, "grad_norm": 0.6054058074951172, "learning_rate": 0.00015950103242259756, "loss": 1.7635, "step": 59075 }, { "epoch": 1.97, "grad_norm": 0.5932121872901917, "learning_rate": 0.00015949179750184204, "loss": 1.6657, "step": 59076 }, { "epoch": 1.97, "grad_norm": 0.5807180404663086, "learning_rate": 0.0001594825627516429, "loss": 1.7507, "step": 59077 }, { "epoch": 1.97, "grad_norm": 0.6122241616249084, "learning_rate": 0.00015947332817201125, "loss": 1.7196, "step": 59078 }, { "epoch": 1.97, "grad_norm": 0.629037082195282, "learning_rate": 0.00015946409376295812, "loss": 1.762, "step": 59079 }, { "epoch": 1.97, "grad_norm": 0.6048666834831238, "learning_rate": 0.00015945485952449488, "loss": 1.7589, "step": 59080 }, { "epoch": 1.97, "grad_norm": 0.5967500805854797, "learning_rate": 0.000159445625456633, "loss": 1.7246, "step": 59081 }, { "epoch": 1.97, "grad_norm": 0.6045476198196411, "learning_rate": 0.00015943639155938317, "loss": 1.7337, "step": 59082 }, { "epoch": 1.97, "grad_norm": 0.6255638599395752, "learning_rate": 0.00015942715783275684, "loss": 1.759, "step": 59083 }, { "epoch": 1.97, "grad_norm": 0.6163645386695862, "learning_rate": 0.0001594179242767653, "loss": 1.7223, "step": 59084 }, { "epoch": 1.97, "grad_norm": 0.5828008651733398, "learning_rate": 0.00015940869089141975, "loss": 1.7035, "step": 59085 }, { "epoch": 1.97, "grad_norm": 0.6194522976875305, "learning_rate": 0.0001593994576767312, "loss": 1.7557, "step": 59086 }, { "epoch": 1.97, "grad_norm": 0.6052390933036804, "learning_rate": 0.00015939022463271098, "loss": 1.7246, "step": 59087 }, { "epoch": 1.97, "grad_norm": 0.6161068677902222, "learning_rate": 0.00015938099175937057, "loss": 1.729, "step": 59088 }, { "epoch": 1.97, "grad_norm": 0.5998123288154602, "learning_rate": 0.0001593717590567207, "loss": 1.6929, "step": 59089 }, { "epoch": 1.97, "grad_norm": 0.6041457653045654, "learning_rate": 0.00015936252652477275, "loss": 1.7258, "step": 59090 }, { "epoch": 1.97, "grad_norm": 0.8228172063827515, "learning_rate": 0.0001593532941635381, "loss": 1.7784, "step": 59091 }, { "epoch": 1.97, "grad_norm": 0.6036468744277954, "learning_rate": 0.00015934406197302782, "loss": 1.7629, "step": 59092 }, { "epoch": 1.97, "grad_norm": 0.6049376726150513, "learning_rate": 0.000159334829953253, "loss": 1.7111, "step": 59093 }, { "epoch": 1.97, "grad_norm": 0.6118543744087219, "learning_rate": 0.0001593255981042251, "loss": 1.6935, "step": 59094 }, { "epoch": 1.97, "grad_norm": 0.61404949426651, "learning_rate": 0.0001593163664259552, "loss": 1.714, "step": 59095 }, { "epoch": 1.97, "grad_norm": 0.5893808603286743, "learning_rate": 0.00015930713491845442, "loss": 1.7026, "step": 59096 }, { "epoch": 1.97, "grad_norm": 0.6080076694488525, "learning_rate": 0.000159297903581734, "loss": 1.8246, "step": 59097 }, { "epoch": 1.97, "grad_norm": 0.5862051248550415, "learning_rate": 0.00015928867241580532, "loss": 1.7987, "step": 59098 }, { "epoch": 1.97, "grad_norm": 0.6158999800682068, "learning_rate": 0.00015927944142067943, "loss": 1.6669, "step": 59099 }, { "epoch": 1.97, "grad_norm": 0.5933249592781067, "learning_rate": 0.0001592702105963675, "loss": 1.7187, "step": 59100 }, { "epoch": 1.97, "grad_norm": 0.618704080581665, "learning_rate": 0.00015926097994288087, "loss": 1.6952, "step": 59101 }, { "epoch": 1.97, "grad_norm": 0.5986279249191284, "learning_rate": 0.0001592517494602307, "loss": 1.6775, "step": 59102 }, { "epoch": 1.97, "grad_norm": 0.5964927077293396, "learning_rate": 0.00015924251914842805, "loss": 1.7244, "step": 59103 }, { "epoch": 1.97, "grad_norm": 0.5937132835388184, "learning_rate": 0.00015923328900748426, "loss": 1.6993, "step": 59104 }, { "epoch": 1.97, "grad_norm": 0.5958700776100159, "learning_rate": 0.0001592240590374106, "loss": 1.7455, "step": 59105 }, { "epoch": 1.97, "grad_norm": 0.6204421520233154, "learning_rate": 0.0001592148292382182, "loss": 1.7185, "step": 59106 }, { "epoch": 1.97, "grad_norm": 0.6164533495903015, "learning_rate": 0.00015920559960991813, "loss": 1.7379, "step": 59107 }, { "epoch": 1.97, "grad_norm": 0.6176408529281616, "learning_rate": 0.00015919637015252185, "loss": 1.7357, "step": 59108 }, { "epoch": 1.97, "grad_norm": 0.602114737033844, "learning_rate": 0.00015918714086604046, "loss": 1.6244, "step": 59109 }, { "epoch": 1.97, "grad_norm": 0.6223181486129761, "learning_rate": 0.00015917791175048494, "loss": 1.7324, "step": 59110 }, { "epoch": 1.97, "grad_norm": 0.6103357076644897, "learning_rate": 0.00015916868280586687, "loss": 1.7417, "step": 59111 }, { "epoch": 1.97, "grad_norm": 0.6179816722869873, "learning_rate": 0.00015915945403219712, "loss": 1.7287, "step": 59112 }, { "epoch": 1.97, "grad_norm": 0.5851530432701111, "learning_rate": 0.00015915022542948716, "loss": 1.6666, "step": 59113 }, { "epoch": 1.97, "grad_norm": 0.6186284422874451, "learning_rate": 0.00015914099699774795, "loss": 1.6438, "step": 59114 }, { "epoch": 1.97, "grad_norm": 0.6186733841896057, "learning_rate": 0.0001591317687369909, "loss": 1.7244, "step": 59115 }, { "epoch": 1.97, "grad_norm": 0.6007137298583984, "learning_rate": 0.00015912254064722722, "loss": 1.7847, "step": 59116 }, { "epoch": 1.97, "grad_norm": 0.5876394510269165, "learning_rate": 0.00015911331272846786, "loss": 1.6989, "step": 59117 }, { "epoch": 1.97, "grad_norm": 0.5989842414855957, "learning_rate": 0.00015910408498072425, "loss": 1.7761, "step": 59118 }, { "epoch": 1.97, "grad_norm": 0.6205424666404724, "learning_rate": 0.00015909485740400747, "loss": 1.7987, "step": 59119 }, { "epoch": 1.97, "grad_norm": 0.6143836379051208, "learning_rate": 0.00015908562999832887, "loss": 1.7374, "step": 59120 }, { "epoch": 1.97, "grad_norm": 0.6026760339736938, "learning_rate": 0.00015907640276369947, "loss": 1.689, "step": 59121 }, { "epoch": 1.97, "grad_norm": 0.5961484909057617, "learning_rate": 0.00015906717570013054, "loss": 1.6982, "step": 59122 }, { "epoch": 1.97, "grad_norm": 0.5953457951545715, "learning_rate": 0.00015905794880763336, "loss": 1.6467, "step": 59123 }, { "epoch": 1.97, "grad_norm": 0.6115416288375854, "learning_rate": 0.00015904872208621898, "loss": 1.6198, "step": 59124 }, { "epoch": 1.97, "grad_norm": 0.6244437098503113, "learning_rate": 0.00015903949553589875, "loss": 1.7305, "step": 59125 }, { "epoch": 1.97, "grad_norm": 0.5909857749938965, "learning_rate": 0.00015903026915668373, "loss": 1.6385, "step": 59126 }, { "epoch": 1.97, "grad_norm": 0.5992751121520996, "learning_rate": 0.00015902104294858533, "loss": 1.742, "step": 59127 }, { "epoch": 1.97, "grad_norm": 0.5996500253677368, "learning_rate": 0.00015901181691161455, "loss": 1.6231, "step": 59128 }, { "epoch": 1.97, "grad_norm": 0.5804639458656311, "learning_rate": 0.0001590025910457826, "loss": 1.6762, "step": 59129 }, { "epoch": 1.97, "grad_norm": 0.59676194190979, "learning_rate": 0.0001589933653511008, "loss": 1.6952, "step": 59130 }, { "epoch": 1.97, "grad_norm": 0.5805032253265381, "learning_rate": 0.00015898413982758027, "loss": 1.7197, "step": 59131 }, { "epoch": 1.97, "grad_norm": 0.6068198680877686, "learning_rate": 0.00015897491447523212, "loss": 1.6859, "step": 59132 }, { "epoch": 1.97, "grad_norm": 0.6026418805122375, "learning_rate": 0.00015896568929406766, "loss": 1.7375, "step": 59133 }, { "epoch": 1.97, "grad_norm": 0.5890988111495972, "learning_rate": 0.00015895646428409815, "loss": 1.7011, "step": 59134 }, { "epoch": 1.97, "grad_norm": 0.593773603439331, "learning_rate": 0.00015894723944533473, "loss": 1.6698, "step": 59135 }, { "epoch": 1.97, "grad_norm": 0.603874146938324, "learning_rate": 0.0001589380147777885, "loss": 1.7364, "step": 59136 }, { "epoch": 1.97, "grad_norm": 0.5881920456886292, "learning_rate": 0.0001589287902814708, "loss": 1.7262, "step": 59137 }, { "epoch": 1.97, "grad_norm": 0.6069552302360535, "learning_rate": 0.00015891956595639277, "loss": 1.7676, "step": 59138 }, { "epoch": 1.97, "grad_norm": 0.6187874674797058, "learning_rate": 0.00015891034180256548, "loss": 1.7184, "step": 59139 }, { "epoch": 1.97, "grad_norm": 0.5841575264930725, "learning_rate": 0.00015890111782000026, "loss": 1.6988, "step": 59140 }, { "epoch": 1.97, "grad_norm": 0.6006975173950195, "learning_rate": 0.0001588918940087084, "loss": 1.7436, "step": 59141 }, { "epoch": 1.97, "grad_norm": 0.6098339557647705, "learning_rate": 0.00015888267036870104, "loss": 1.7318, "step": 59142 }, { "epoch": 1.97, "grad_norm": 0.6059287190437317, "learning_rate": 0.00015887344689998915, "loss": 1.7266, "step": 59143 }, { "epoch": 1.97, "grad_norm": 0.6025897264480591, "learning_rate": 0.00015886422360258422, "loss": 1.6674, "step": 59144 }, { "epoch": 1.97, "grad_norm": 0.6046391129493713, "learning_rate": 0.00015885500047649735, "loss": 1.72, "step": 59145 }, { "epoch": 1.97, "grad_norm": 0.6110021471977234, "learning_rate": 0.00015884577752173958, "loss": 1.8122, "step": 59146 }, { "epoch": 1.97, "grad_norm": 0.6437134146690369, "learning_rate": 0.00015883655473832226, "loss": 1.7901, "step": 59147 }, { "epoch": 1.97, "grad_norm": 0.5945891737937927, "learning_rate": 0.00015882733212625665, "loss": 1.8005, "step": 59148 }, { "epoch": 1.97, "grad_norm": 0.6235275864601135, "learning_rate": 0.0001588181096855539, "loss": 1.6997, "step": 59149 }, { "epoch": 1.97, "grad_norm": 0.5773077011108398, "learning_rate": 0.000158808887416225, "loss": 1.7834, "step": 59150 }, { "epoch": 1.97, "grad_norm": 0.5911509990692139, "learning_rate": 0.00015879966531828145, "loss": 1.7585, "step": 59151 }, { "epoch": 1.97, "grad_norm": 0.6196814179420471, "learning_rate": 0.0001587904433917343, "loss": 1.7311, "step": 59152 }, { "epoch": 1.97, "grad_norm": 0.6180190443992615, "learning_rate": 0.00015878122163659464, "loss": 1.7075, "step": 59153 }, { "epoch": 1.97, "grad_norm": 0.5952947735786438, "learning_rate": 0.00015877200005287372, "loss": 1.7023, "step": 59154 }, { "epoch": 1.97, "grad_norm": 0.6984022259712219, "learning_rate": 0.00015876277864058295, "loss": 1.7017, "step": 59155 }, { "epoch": 1.97, "grad_norm": 0.5887889862060547, "learning_rate": 0.00015875355739973333, "loss": 1.6932, "step": 59156 }, { "epoch": 1.97, "grad_norm": 0.6115857362747192, "learning_rate": 0.00015874433633033597, "loss": 1.7081, "step": 59157 }, { "epoch": 1.97, "grad_norm": 0.6107307076454163, "learning_rate": 0.0001587351154324023, "loss": 1.7188, "step": 59158 }, { "epoch": 1.97, "grad_norm": 0.6085805892944336, "learning_rate": 0.00015872589470594336, "loss": 1.7689, "step": 59159 }, { "epoch": 1.97, "grad_norm": 0.6235459446907043, "learning_rate": 0.00015871667415097025, "loss": 1.7349, "step": 59160 }, { "epoch": 1.97, "grad_norm": 0.597675085067749, "learning_rate": 0.00015870745376749446, "loss": 1.7545, "step": 59161 }, { "epoch": 1.97, "grad_norm": 0.6033005118370056, "learning_rate": 0.0001586982335555268, "loss": 1.7365, "step": 59162 }, { "epoch": 1.97, "grad_norm": 0.597917914390564, "learning_rate": 0.00015868901351507884, "loss": 1.7071, "step": 59163 }, { "epoch": 1.97, "grad_norm": 0.6155480146408081, "learning_rate": 0.0001586797936461615, "loss": 1.7268, "step": 59164 }, { "epoch": 1.97, "grad_norm": 0.5932039022445679, "learning_rate": 0.00015867057394878612, "loss": 1.7027, "step": 59165 }, { "epoch": 1.97, "grad_norm": 0.5981885194778442, "learning_rate": 0.00015866135442296384, "loss": 1.7007, "step": 59166 }, { "epoch": 1.97, "grad_norm": 0.6192299723625183, "learning_rate": 0.00015865213506870577, "loss": 1.759, "step": 59167 }, { "epoch": 1.97, "grad_norm": 0.6148778200149536, "learning_rate": 0.00015864291588602326, "loss": 1.7294, "step": 59168 }, { "epoch": 1.97, "grad_norm": 0.5980664491653442, "learning_rate": 0.00015863369687492735, "loss": 1.6994, "step": 59169 }, { "epoch": 1.97, "grad_norm": 0.6458309888839722, "learning_rate": 0.00015862447803542944, "loss": 1.8352, "step": 59170 }, { "epoch": 1.97, "grad_norm": 0.6155471801757812, "learning_rate": 0.00015861525936754052, "loss": 1.7193, "step": 59171 }, { "epoch": 1.97, "grad_norm": 0.6134781837463379, "learning_rate": 0.00015860604087127175, "loss": 1.7886, "step": 59172 }, { "epoch": 1.97, "grad_norm": 0.5991750359535217, "learning_rate": 0.00015859682254663456, "loss": 1.6825, "step": 59173 }, { "epoch": 1.97, "grad_norm": 0.6350336074829102, "learning_rate": 0.0001585876043936398, "loss": 1.7135, "step": 59174 }, { "epoch": 1.97, "grad_norm": 0.6187660694122314, "learning_rate": 0.00015857838641229906, "loss": 1.7313, "step": 59175 }, { "epoch": 1.97, "grad_norm": 0.611359179019928, "learning_rate": 0.00015856916860262313, "loss": 1.6579, "step": 59176 }, { "epoch": 1.97, "grad_norm": 0.6059446334838867, "learning_rate": 0.00015855995096462355, "loss": 1.7003, "step": 59177 }, { "epoch": 1.97, "grad_norm": 0.5914077162742615, "learning_rate": 0.00015855073349831133, "loss": 1.7526, "step": 59178 }, { "epoch": 1.97, "grad_norm": 0.6281887888908386, "learning_rate": 0.00015854151620369757, "loss": 1.7358, "step": 59179 }, { "epoch": 1.97, "grad_norm": 0.6310235857963562, "learning_rate": 0.00015853229908079368, "loss": 1.7894, "step": 59180 }, { "epoch": 1.97, "grad_norm": 0.5934430956840515, "learning_rate": 0.00015852308212961074, "loss": 1.7419, "step": 59181 }, { "epoch": 1.97, "grad_norm": 0.6175618171691895, "learning_rate": 0.0001585138653501598, "loss": 1.7551, "step": 59182 }, { "epoch": 1.97, "grad_norm": 0.5881667733192444, "learning_rate": 0.0001585046487424522, "loss": 1.692, "step": 59183 }, { "epoch": 1.97, "grad_norm": 0.6601538062095642, "learning_rate": 0.00015849543230649923, "loss": 1.6727, "step": 59184 }, { "epoch": 1.97, "grad_norm": 0.6210839152336121, "learning_rate": 0.00015848621604231194, "loss": 1.7723, "step": 59185 }, { "epoch": 1.97, "grad_norm": 0.5948190689086914, "learning_rate": 0.0001584769999499014, "loss": 1.7356, "step": 59186 }, { "epoch": 1.97, "grad_norm": 0.5923687219619751, "learning_rate": 0.00015846778402927905, "loss": 1.7427, "step": 59187 }, { "epoch": 1.97, "grad_norm": 0.5917041897773743, "learning_rate": 0.000158458568280456, "loss": 1.7573, "step": 59188 }, { "epoch": 1.97, "grad_norm": 0.6057139039039612, "learning_rate": 0.00015844935270344323, "loss": 1.6916, "step": 59189 }, { "epoch": 1.97, "grad_norm": 0.6006188988685608, "learning_rate": 0.00015844013729825212, "loss": 1.7388, "step": 59190 }, { "epoch": 1.97, "grad_norm": 0.5815427303314209, "learning_rate": 0.00015843092206489404, "loss": 1.6798, "step": 59191 }, { "epoch": 1.97, "grad_norm": 0.6215246319770813, "learning_rate": 0.00015842170700337973, "loss": 1.7693, "step": 59192 }, { "epoch": 1.97, "grad_norm": 0.6147106885910034, "learning_rate": 0.00015841249211372065, "loss": 1.7704, "step": 59193 }, { "epoch": 1.97, "grad_norm": 0.6113464832305908, "learning_rate": 0.00015840327739592797, "loss": 1.6448, "step": 59194 }, { "epoch": 1.97, "grad_norm": 0.6165294647216797, "learning_rate": 0.00015839406285001294, "loss": 1.7599, "step": 59195 }, { "epoch": 1.97, "grad_norm": 0.6023716330528259, "learning_rate": 0.00015838484847598648, "loss": 1.7781, "step": 59196 }, { "epoch": 1.97, "grad_norm": 0.6015070676803589, "learning_rate": 0.00015837563427385996, "loss": 1.7276, "step": 59197 }, { "epoch": 1.97, "grad_norm": 0.5978748798370361, "learning_rate": 0.0001583664202436448, "loss": 1.7537, "step": 59198 }, { "epoch": 1.97, "grad_norm": 0.6127643585205078, "learning_rate": 0.00015835720638535168, "loss": 1.7533, "step": 59199 }, { "epoch": 1.97, "grad_norm": 0.5884910821914673, "learning_rate": 0.00015834799269899205, "loss": 1.7555, "step": 59200 }, { "epoch": 1.97, "grad_norm": 0.6107850670814514, "learning_rate": 0.0001583387791845772, "loss": 1.7666, "step": 59201 }, { "epoch": 1.97, "grad_norm": 0.6197155117988586, "learning_rate": 0.00015832956584211822, "loss": 1.7547, "step": 59202 }, { "epoch": 1.97, "grad_norm": 0.6079310774803162, "learning_rate": 0.00015832035267162613, "loss": 1.7097, "step": 59203 }, { "epoch": 1.97, "grad_norm": 0.6025208830833435, "learning_rate": 0.00015831113967311228, "loss": 1.7542, "step": 59204 }, { "epoch": 1.97, "grad_norm": 0.6030377745628357, "learning_rate": 0.00015830192684658805, "loss": 1.6539, "step": 59205 }, { "epoch": 1.97, "grad_norm": 0.6149246096611023, "learning_rate": 0.00015829271419206417, "loss": 1.7349, "step": 59206 }, { "epoch": 1.97, "grad_norm": 0.6216812133789062, "learning_rate": 0.00015828350170955205, "loss": 1.8423, "step": 59207 }, { "epoch": 1.97, "grad_norm": 0.6130452156066895, "learning_rate": 0.000158274289399063, "loss": 1.7741, "step": 59208 }, { "epoch": 1.97, "grad_norm": 0.6224838495254517, "learning_rate": 0.00015826507726060808, "loss": 1.7048, "step": 59209 }, { "epoch": 1.97, "grad_norm": 0.6022628545761108, "learning_rate": 0.00015825586529419835, "loss": 1.6655, "step": 59210 }, { "epoch": 1.97, "grad_norm": 0.6021583676338196, "learning_rate": 0.00015824665349984526, "loss": 1.7174, "step": 59211 }, { "epoch": 1.97, "grad_norm": 0.6114369034767151, "learning_rate": 0.0001582374418775598, "loss": 1.6085, "step": 59212 }, { "epoch": 1.97, "grad_norm": 0.5976195931434631, "learning_rate": 0.00015822823042735312, "loss": 1.7219, "step": 59213 }, { "epoch": 1.97, "grad_norm": 0.6234501004219055, "learning_rate": 0.00015821901914923647, "loss": 1.7106, "step": 59214 }, { "epoch": 1.97, "grad_norm": 0.6081475019454956, "learning_rate": 0.00015820980804322115, "loss": 1.7418, "step": 59215 }, { "epoch": 1.97, "grad_norm": 0.5835367441177368, "learning_rate": 0.0001582005971093182, "loss": 1.7373, "step": 59216 }, { "epoch": 1.97, "grad_norm": 0.5982725620269775, "learning_rate": 0.00015819138634753878, "loss": 1.7547, "step": 59217 }, { "epoch": 1.97, "grad_norm": 0.6166829466819763, "learning_rate": 0.0001581821757578942, "loss": 1.747, "step": 59218 }, { "epoch": 1.97, "grad_norm": 0.5797961354255676, "learning_rate": 0.00015817296534039558, "loss": 1.7194, "step": 59219 }, { "epoch": 1.97, "grad_norm": 0.6352839469909668, "learning_rate": 0.00015816375509505393, "loss": 1.7933, "step": 59220 }, { "epoch": 1.97, "grad_norm": 0.6144431829452515, "learning_rate": 0.00015815454502188074, "loss": 1.7253, "step": 59221 }, { "epoch": 1.97, "grad_norm": 0.5896070599555969, "learning_rate": 0.0001581453351208869, "loss": 1.6914, "step": 59222 }, { "epoch": 1.97, "grad_norm": 0.6086485385894775, "learning_rate": 0.00015813612539208383, "loss": 1.7368, "step": 59223 }, { "epoch": 1.97, "grad_norm": 0.6095778346061707, "learning_rate": 0.0001581269158354825, "loss": 1.7114, "step": 59224 }, { "epoch": 1.97, "grad_norm": 0.6219330430030823, "learning_rate": 0.00015811770645109428, "loss": 1.7061, "step": 59225 }, { "epoch": 1.97, "grad_norm": 0.6144116520881653, "learning_rate": 0.0001581084972389303, "loss": 1.7623, "step": 59226 }, { "epoch": 1.97, "grad_norm": 0.5864135026931763, "learning_rate": 0.00015809928819900156, "loss": 1.6902, "step": 59227 }, { "epoch": 1.97, "grad_norm": 0.5848524570465088, "learning_rate": 0.00015809007933131948, "loss": 1.7118, "step": 59228 }, { "epoch": 1.97, "grad_norm": 0.5838872790336609, "learning_rate": 0.00015808087063589503, "loss": 1.6946, "step": 59229 }, { "epoch": 1.97, "grad_norm": 0.5969866514205933, "learning_rate": 0.0001580716621127396, "loss": 1.6929, "step": 59230 }, { "epoch": 1.97, "grad_norm": 0.6066684126853943, "learning_rate": 0.0001580624537618642, "loss": 1.7412, "step": 59231 }, { "epoch": 1.97, "grad_norm": 0.5936357378959656, "learning_rate": 0.0001580532455832801, "loss": 1.749, "step": 59232 }, { "epoch": 1.97, "grad_norm": 0.5964764356613159, "learning_rate": 0.0001580440375769985, "loss": 1.7615, "step": 59233 }, { "epoch": 1.97, "grad_norm": 0.621105968952179, "learning_rate": 0.00015803482974303037, "loss": 1.737, "step": 59234 }, { "epoch": 1.97, "grad_norm": 0.6290982365608215, "learning_rate": 0.0001580256220813872, "loss": 1.7709, "step": 59235 }, { "epoch": 1.97, "grad_norm": 0.5916589498519897, "learning_rate": 0.00015801641459207985, "loss": 1.6302, "step": 59236 }, { "epoch": 1.97, "grad_norm": 0.5927661061286926, "learning_rate": 0.00015800720727511978, "loss": 1.7672, "step": 59237 }, { "epoch": 1.97, "grad_norm": 0.5907096862792969, "learning_rate": 0.00015799800013051803, "loss": 1.7142, "step": 59238 }, { "epoch": 1.97, "grad_norm": 0.6032069325447083, "learning_rate": 0.00015798879315828568, "loss": 1.7131, "step": 59239 }, { "epoch": 1.97, "grad_norm": 0.6284040212631226, "learning_rate": 0.00015797958635843415, "loss": 1.7278, "step": 59240 }, { "epoch": 1.97, "grad_norm": 0.5955398678779602, "learning_rate": 0.0001579703797309743, "loss": 1.6863, "step": 59241 }, { "epoch": 1.97, "grad_norm": 0.5968207716941833, "learning_rate": 0.00015796117327591765, "loss": 1.6837, "step": 59242 }, { "epoch": 1.97, "grad_norm": 0.6063620448112488, "learning_rate": 0.0001579519669932751, "loss": 1.7286, "step": 59243 }, { "epoch": 1.97, "grad_norm": 0.6003033518791199, "learning_rate": 0.00015794276088305803, "loss": 1.7449, "step": 59244 }, { "epoch": 1.97, "grad_norm": 0.6000701189041138, "learning_rate": 0.00015793355494527747, "loss": 1.7027, "step": 59245 }, { "epoch": 1.97, "grad_norm": 0.6002342104911804, "learning_rate": 0.00015792434917994458, "loss": 1.7093, "step": 59246 }, { "epoch": 1.97, "grad_norm": 0.5910143852233887, "learning_rate": 0.0001579151435870707, "loss": 1.7389, "step": 59247 }, { "epoch": 1.97, "grad_norm": 0.6053318977355957, "learning_rate": 0.0001579059381666669, "loss": 1.7484, "step": 59248 }, { "epoch": 1.97, "grad_norm": 0.6075477004051208, "learning_rate": 0.00015789673291874426, "loss": 1.7258, "step": 59249 }, { "epoch": 1.97, "grad_norm": 0.606942892074585, "learning_rate": 0.000157887527843314, "loss": 1.7655, "step": 59250 }, { "epoch": 1.97, "grad_norm": 0.620491623878479, "learning_rate": 0.00015787832294038748, "loss": 1.7049, "step": 59251 }, { "epoch": 1.97, "grad_norm": 0.5822573304176331, "learning_rate": 0.00015786911820997576, "loss": 1.7211, "step": 59252 }, { "epoch": 1.97, "grad_norm": 0.6069583296775818, "learning_rate": 0.00015785991365208982, "loss": 1.7404, "step": 59253 }, { "epoch": 1.97, "grad_norm": 0.6190529465675354, "learning_rate": 0.00015785070926674117, "loss": 1.7923, "step": 59254 }, { "epoch": 1.97, "grad_norm": 0.6191276907920837, "learning_rate": 0.0001578415050539408, "loss": 1.687, "step": 59255 }, { "epoch": 1.97, "grad_norm": 0.6066632270812988, "learning_rate": 0.00015783230101369978, "loss": 1.6535, "step": 59256 }, { "epoch": 1.97, "grad_norm": 0.5823377966880798, "learning_rate": 0.00015782309714602936, "loss": 1.7078, "step": 59257 }, { "epoch": 1.97, "grad_norm": 0.6068274974822998, "learning_rate": 0.00015781389345094092, "loss": 1.7292, "step": 59258 }, { "epoch": 1.97, "grad_norm": 0.5942962765693665, "learning_rate": 0.00015780468992844545, "loss": 1.7248, "step": 59259 }, { "epoch": 1.97, "grad_norm": 0.6133533716201782, "learning_rate": 0.00015779548657855402, "loss": 1.7948, "step": 59260 }, { "epoch": 1.97, "grad_norm": 0.6130629777908325, "learning_rate": 0.00015778628340127801, "loss": 1.7989, "step": 59261 }, { "epoch": 1.97, "grad_norm": 0.6046206951141357, "learning_rate": 0.00015777708039662854, "loss": 1.711, "step": 59262 }, { "epoch": 1.97, "grad_norm": 0.6034048795700073, "learning_rate": 0.00015776787756461655, "loss": 1.7802, "step": 59263 }, { "epoch": 1.97, "grad_norm": 0.6016395688056946, "learning_rate": 0.00015775867490525346, "loss": 1.7325, "step": 59264 }, { "epoch": 1.97, "grad_norm": 0.6074799299240112, "learning_rate": 0.00015774947241855048, "loss": 1.7095, "step": 59265 }, { "epoch": 1.97, "grad_norm": 0.6061127185821533, "learning_rate": 0.00015774027010451868, "loss": 1.699, "step": 59266 }, { "epoch": 1.97, "grad_norm": 0.617065966129303, "learning_rate": 0.0001577310679631691, "loss": 1.6694, "step": 59267 }, { "epoch": 1.97, "grad_norm": 0.6201308369636536, "learning_rate": 0.0001577218659945132, "loss": 1.6309, "step": 59268 }, { "epoch": 1.97, "grad_norm": 0.6337616443634033, "learning_rate": 0.00015771266419856197, "loss": 1.7563, "step": 59269 }, { "epoch": 1.97, "grad_norm": 0.6015503406524658, "learning_rate": 0.00015770346257532646, "loss": 1.741, "step": 59270 }, { "epoch": 1.97, "grad_norm": 0.5870888829231262, "learning_rate": 0.000157694261124818, "loss": 1.6301, "step": 59271 }, { "epoch": 1.97, "grad_norm": 0.6278638243675232, "learning_rate": 0.00015768505984704788, "loss": 1.7722, "step": 59272 }, { "epoch": 1.97, "grad_norm": 0.5969273447990417, "learning_rate": 0.00015767585874202708, "loss": 1.656, "step": 59273 }, { "epoch": 1.97, "grad_norm": 0.6163796186447144, "learning_rate": 0.00015766665780976675, "loss": 1.6681, "step": 59274 }, { "epoch": 1.97, "grad_norm": 0.6019055843353271, "learning_rate": 0.00015765745705027822, "loss": 1.7178, "step": 59275 }, { "epoch": 1.97, "grad_norm": 0.6062941551208496, "learning_rate": 0.00015764825646357255, "loss": 1.7313, "step": 59276 }, { "epoch": 1.97, "grad_norm": 0.6094810962677002, "learning_rate": 0.00015763905604966082, "loss": 1.6798, "step": 59277 }, { "epoch": 1.97, "grad_norm": 0.58687824010849, "learning_rate": 0.00015762985580855443, "loss": 1.6468, "step": 59278 }, { "epoch": 1.97, "grad_norm": 0.6048545837402344, "learning_rate": 0.00015762065574026428, "loss": 1.7494, "step": 59279 }, { "epoch": 1.97, "grad_norm": 0.5897431969642639, "learning_rate": 0.00015761145584480179, "loss": 1.8361, "step": 59280 }, { "epoch": 1.97, "grad_norm": 0.5929194092750549, "learning_rate": 0.0001576022561221779, "loss": 1.7485, "step": 59281 }, { "epoch": 1.97, "grad_norm": 0.6017695069313049, "learning_rate": 0.000157593056572404, "loss": 1.7432, "step": 59282 }, { "epoch": 1.97, "grad_norm": 0.6204313039779663, "learning_rate": 0.00015758385719549117, "loss": 1.7644, "step": 59283 }, { "epoch": 1.97, "grad_norm": 0.6103656888008118, "learning_rate": 0.0001575746579914504, "loss": 1.6971, "step": 59284 }, { "epoch": 1.97, "grad_norm": 0.5909843444824219, "learning_rate": 0.00015756545896029316, "loss": 1.7046, "step": 59285 }, { "epoch": 1.97, "grad_norm": 0.5917575359344482, "learning_rate": 0.0001575562601020303, "loss": 1.7152, "step": 59286 }, { "epoch": 1.97, "grad_norm": 0.5956612229347229, "learning_rate": 0.00015754706141667336, "loss": 1.7395, "step": 59287 }, { "epoch": 1.97, "grad_norm": 0.6154516935348511, "learning_rate": 0.0001575378629042332, "loss": 1.7187, "step": 59288 }, { "epoch": 1.97, "grad_norm": 0.5881394743919373, "learning_rate": 0.00015752866456472103, "loss": 1.6934, "step": 59289 }, { "epoch": 1.97, "grad_norm": 4.480369567871094, "learning_rate": 0.0001575194663981482, "loss": 1.899, "step": 59290 }, { "epoch": 1.97, "grad_norm": 0.6012954115867615, "learning_rate": 0.00015751026840452558, "loss": 1.661, "step": 59291 }, { "epoch": 1.97, "grad_norm": 0.5847117304801941, "learning_rate": 0.00015750107058386462, "loss": 1.6926, "step": 59292 }, { "epoch": 1.97, "grad_norm": 0.5994306802749634, "learning_rate": 0.00015749187293617625, "loss": 1.6917, "step": 59293 }, { "epoch": 1.97, "grad_norm": 0.5942597985267639, "learning_rate": 0.00015748267546147185, "loss": 1.6875, "step": 59294 }, { "epoch": 1.97, "grad_norm": 0.5823104977607727, "learning_rate": 0.00015747347815976253, "loss": 1.6346, "step": 59295 }, { "epoch": 1.97, "grad_norm": 0.6162524223327637, "learning_rate": 0.00015746428103105927, "loss": 1.6844, "step": 59296 }, { "epoch": 1.97, "grad_norm": 0.609048068523407, "learning_rate": 0.00015745508407537348, "loss": 1.758, "step": 59297 }, { "epoch": 1.97, "grad_norm": 0.6137356162071228, "learning_rate": 0.00015744588729271622, "loss": 1.659, "step": 59298 }, { "epoch": 1.97, "grad_norm": 0.618789553642273, "learning_rate": 0.00015743669068309853, "loss": 1.7666, "step": 59299 }, { "epoch": 1.97, "grad_norm": 0.5997784733772278, "learning_rate": 0.00015742749424653172, "loss": 1.7723, "step": 59300 }, { "epoch": 1.97, "grad_norm": 0.5895174145698547, "learning_rate": 0.000157418297983027, "loss": 1.7319, "step": 59301 }, { "epoch": 1.97, "grad_norm": 0.6056285500526428, "learning_rate": 0.00015740910189259545, "loss": 1.7512, "step": 59302 }, { "epoch": 1.97, "grad_norm": 0.6006742715835571, "learning_rate": 0.00015739990597524818, "loss": 1.7395, "step": 59303 }, { "epoch": 1.97, "grad_norm": 0.584178626537323, "learning_rate": 0.00015739071023099647, "loss": 1.7029, "step": 59304 }, { "epoch": 1.97, "grad_norm": 0.6223104596138, "learning_rate": 0.00015738151465985144, "loss": 1.6926, "step": 59305 }, { "epoch": 1.97, "grad_norm": 0.6242539882659912, "learning_rate": 0.00015737231926182417, "loss": 1.732, "step": 59306 }, { "epoch": 1.97, "grad_norm": 0.588690459728241, "learning_rate": 0.00015736312403692585, "loss": 1.7217, "step": 59307 }, { "epoch": 1.97, "grad_norm": 0.5992349982261658, "learning_rate": 0.00015735392898516792, "loss": 1.6819, "step": 59308 }, { "epoch": 1.97, "grad_norm": 0.5941495895385742, "learning_rate": 0.00015734473410656104, "loss": 1.741, "step": 59309 }, { "epoch": 1.97, "grad_norm": 0.653588593006134, "learning_rate": 0.00015733553940111664, "loss": 1.7235, "step": 59310 }, { "epoch": 1.97, "grad_norm": 0.5916978716850281, "learning_rate": 0.00015732634486884602, "loss": 1.7512, "step": 59311 }, { "epoch": 1.97, "grad_norm": 0.6233444213867188, "learning_rate": 0.00015731715050976023, "loss": 1.7578, "step": 59312 }, { "epoch": 1.97, "grad_norm": 0.6225733757019043, "learning_rate": 0.0001573079563238702, "loss": 1.7889, "step": 59313 }, { "epoch": 1.97, "grad_norm": 0.5922598242759705, "learning_rate": 0.00015729876231118734, "loss": 1.6747, "step": 59314 }, { "epoch": 1.97, "grad_norm": 0.6006033420562744, "learning_rate": 0.00015728956847172295, "loss": 1.7228, "step": 59315 }, { "epoch": 1.97, "grad_norm": 0.6033174395561218, "learning_rate": 0.0001572803748054878, "loss": 1.742, "step": 59316 }, { "epoch": 1.97, "grad_norm": 0.6336559057235718, "learning_rate": 0.0001572711813124932, "loss": 1.6781, "step": 59317 }, { "epoch": 1.97, "grad_norm": 0.606873631477356, "learning_rate": 0.00015726198799275053, "loss": 1.7366, "step": 59318 }, { "epoch": 1.97, "grad_norm": 0.5732390284538269, "learning_rate": 0.00015725279484627073, "loss": 1.6613, "step": 59319 }, { "epoch": 1.97, "grad_norm": 0.5931658744812012, "learning_rate": 0.00015724360187306492, "loss": 1.8041, "step": 59320 }, { "epoch": 1.97, "grad_norm": 0.5940274000167847, "learning_rate": 0.0001572344090731443, "loss": 1.7325, "step": 59321 }, { "epoch": 1.97, "grad_norm": 0.5955715179443359, "learning_rate": 0.00015722521644652033, "loss": 1.758, "step": 59322 }, { "epoch": 1.97, "grad_norm": 0.5882154703140259, "learning_rate": 0.00015721602399320373, "loss": 1.7549, "step": 59323 }, { "epoch": 1.97, "grad_norm": 0.6243512034416199, "learning_rate": 0.00015720683171320575, "loss": 1.6793, "step": 59324 }, { "epoch": 1.97, "grad_norm": 0.5986009836196899, "learning_rate": 0.0001571976396065378, "loss": 1.7711, "step": 59325 }, { "epoch": 1.97, "grad_norm": 0.6153962016105652, "learning_rate": 0.0001571884476732109, "loss": 1.6163, "step": 59326 }, { "epoch": 1.97, "grad_norm": 0.5926675200462341, "learning_rate": 0.000157179255913236, "loss": 1.7888, "step": 59327 }, { "epoch": 1.97, "grad_norm": 0.605423092842102, "learning_rate": 0.00015717006432662458, "loss": 1.7751, "step": 59328 }, { "epoch": 1.97, "grad_norm": 0.6025916337966919, "learning_rate": 0.0001571608729133877, "loss": 1.6283, "step": 59329 }, { "epoch": 1.97, "grad_norm": 0.6190840005874634, "learning_rate": 0.0001571516816735363, "loss": 1.7533, "step": 59330 }, { "epoch": 1.97, "grad_norm": 0.6366637945175171, "learning_rate": 0.00015714249060708174, "loss": 1.6621, "step": 59331 }, { "epoch": 1.97, "grad_norm": 0.604315459728241, "learning_rate": 0.00015713329971403528, "loss": 1.6665, "step": 59332 }, { "epoch": 1.97, "grad_norm": 0.6071586608886719, "learning_rate": 0.0001571241089944079, "loss": 1.7227, "step": 59333 }, { "epoch": 1.97, "grad_norm": 0.6249037384986877, "learning_rate": 0.00015711491844821071, "loss": 1.7446, "step": 59334 }, { "epoch": 1.97, "grad_norm": 0.5983213186264038, "learning_rate": 0.00015710572807545506, "loss": 1.6291, "step": 59335 }, { "epoch": 1.97, "grad_norm": 0.6077702045440674, "learning_rate": 0.000157096537876152, "loss": 1.7364, "step": 59336 }, { "epoch": 1.97, "grad_norm": 0.5984311699867249, "learning_rate": 0.0001570873478503126, "loss": 1.6828, "step": 59337 }, { "epoch": 1.97, "grad_norm": 0.6033621430397034, "learning_rate": 0.0001570781579979482, "loss": 1.8145, "step": 59338 }, { "epoch": 1.97, "grad_norm": 0.6245523691177368, "learning_rate": 0.0001570689683190697, "loss": 1.6684, "step": 59339 }, { "epoch": 1.97, "grad_norm": 0.588861346244812, "learning_rate": 0.00015705977881368862, "loss": 1.7257, "step": 59340 }, { "epoch": 1.97, "grad_norm": 0.614647388458252, "learning_rate": 0.0001570505894818157, "loss": 1.7276, "step": 59341 }, { "epoch": 1.97, "grad_norm": 0.589457631111145, "learning_rate": 0.00015704140032346247, "loss": 1.6582, "step": 59342 }, { "epoch": 1.97, "grad_norm": 0.6016380190849304, "learning_rate": 0.00015703221133863988, "loss": 1.7686, "step": 59343 }, { "epoch": 1.97, "grad_norm": 0.6112847924232483, "learning_rate": 0.00015702302252735905, "loss": 1.695, "step": 59344 }, { "epoch": 1.97, "grad_norm": 0.596335768699646, "learning_rate": 0.00015701383388963132, "loss": 1.6906, "step": 59345 }, { "epoch": 1.97, "grad_norm": 1.0970901250839233, "learning_rate": 0.00015700464542546758, "loss": 1.765, "step": 59346 }, { "epoch": 1.97, "grad_norm": 0.6038473844528198, "learning_rate": 0.00015699545713487926, "loss": 1.7169, "step": 59347 }, { "epoch": 1.97, "grad_norm": 0.5997029542922974, "learning_rate": 0.00015698626901787722, "loss": 1.7827, "step": 59348 }, { "epoch": 1.97, "grad_norm": 0.5914803743362427, "learning_rate": 0.00015697708107447297, "loss": 1.7992, "step": 59349 }, { "epoch": 1.97, "grad_norm": 0.6024496555328369, "learning_rate": 0.00015696789330467743, "loss": 1.7159, "step": 59350 }, { "epoch": 1.97, "grad_norm": 0.5982974171638489, "learning_rate": 0.0001569587057085017, "loss": 1.7662, "step": 59351 }, { "epoch": 1.97, "grad_norm": 0.6017321944236755, "learning_rate": 0.0001569495182859571, "loss": 1.6862, "step": 59352 }, { "epoch": 1.97, "grad_norm": 0.6171180605888367, "learning_rate": 0.00015694033103705462, "loss": 1.7625, "step": 59353 }, { "epoch": 1.97, "grad_norm": 0.6094034314155579, "learning_rate": 0.00015693114396180563, "loss": 1.7737, "step": 59354 }, { "epoch": 1.97, "grad_norm": 0.5929202437400818, "learning_rate": 0.0001569219570602211, "loss": 1.7014, "step": 59355 }, { "epoch": 1.97, "grad_norm": 0.6045965552330017, "learning_rate": 0.00015691277033231217, "loss": 1.7576, "step": 59356 }, { "epoch": 1.97, "grad_norm": 0.6103060245513916, "learning_rate": 0.00015690358377809014, "loss": 1.6891, "step": 59357 }, { "epoch": 1.97, "grad_norm": 0.5909797549247742, "learning_rate": 0.00015689439739756598, "loss": 1.7497, "step": 59358 }, { "epoch": 1.97, "grad_norm": 0.608571469783783, "learning_rate": 0.00015688521119075106, "loss": 1.7807, "step": 59359 }, { "epoch": 1.97, "grad_norm": 0.5926345586776733, "learning_rate": 0.00015687602515765625, "loss": 1.702, "step": 59360 }, { "epoch": 1.97, "grad_norm": 0.6026214957237244, "learning_rate": 0.000156866839298293, "loss": 1.6957, "step": 59361 }, { "epoch": 1.97, "grad_norm": 0.6171843409538269, "learning_rate": 0.00015685765361267233, "loss": 1.6376, "step": 59362 }, { "epoch": 1.98, "grad_norm": 0.6372520923614502, "learning_rate": 0.0001568484681008052, "loss": 1.6604, "step": 59363 }, { "epoch": 1.98, "grad_norm": 0.6112825870513916, "learning_rate": 0.00015683928276270315, "loss": 1.7601, "step": 59364 }, { "epoch": 1.98, "grad_norm": 0.5939033627510071, "learning_rate": 0.00015683009759837703, "loss": 1.6471, "step": 59365 }, { "epoch": 1.98, "grad_norm": 0.6016491055488586, "learning_rate": 0.000156820912607838, "loss": 1.7152, "step": 59366 }, { "epoch": 1.98, "grad_norm": 0.597515881061554, "learning_rate": 0.0001568117277910973, "loss": 1.7344, "step": 59367 }, { "epoch": 1.98, "grad_norm": 0.6010398864746094, "learning_rate": 0.00015680254314816617, "loss": 1.6946, "step": 59368 }, { "epoch": 1.98, "grad_norm": 0.6018692255020142, "learning_rate": 0.0001567933586790556, "loss": 1.768, "step": 59369 }, { "epoch": 1.98, "grad_norm": 0.6010898351669312, "learning_rate": 0.00015678417438377674, "loss": 1.7103, "step": 59370 }, { "epoch": 1.98, "grad_norm": 0.6125553250312805, "learning_rate": 0.00015677499026234086, "loss": 1.7035, "step": 59371 }, { "epoch": 1.98, "grad_norm": 0.5957149863243103, "learning_rate": 0.0001567658063147591, "loss": 1.6804, "step": 59372 }, { "epoch": 1.98, "grad_norm": 0.5987799763679504, "learning_rate": 0.0001567566225410424, "loss": 1.7758, "step": 59373 }, { "epoch": 1.98, "grad_norm": 0.6098708510398865, "learning_rate": 0.00015674743894120204, "loss": 1.6966, "step": 59374 }, { "epoch": 1.98, "grad_norm": 0.6412744522094727, "learning_rate": 0.00015673825551524933, "loss": 1.6889, "step": 59375 }, { "epoch": 1.98, "grad_norm": 0.6013413071632385, "learning_rate": 0.0001567290722631952, "loss": 1.7086, "step": 59376 }, { "epoch": 1.98, "grad_norm": 0.6232564449310303, "learning_rate": 0.00015671988918505083, "loss": 1.6908, "step": 59377 }, { "epoch": 1.98, "grad_norm": 0.5866588950157166, "learning_rate": 0.00015671070628082748, "loss": 1.618, "step": 59378 }, { "epoch": 1.98, "grad_norm": 0.6123679280281067, "learning_rate": 0.0001567015235505362, "loss": 1.7965, "step": 59379 }, { "epoch": 1.98, "grad_norm": 0.6021289229393005, "learning_rate": 0.00015669234099418807, "loss": 1.6371, "step": 59380 }, { "epoch": 1.98, "grad_norm": 0.5993831157684326, "learning_rate": 0.00015668315861179434, "loss": 1.7798, "step": 59381 }, { "epoch": 1.98, "grad_norm": 0.598918080329895, "learning_rate": 0.00015667397640336622, "loss": 1.6841, "step": 59382 }, { "epoch": 1.98, "grad_norm": 0.6077523231506348, "learning_rate": 0.0001566647943689148, "loss": 1.7468, "step": 59383 }, { "epoch": 1.98, "grad_norm": 0.6012629866600037, "learning_rate": 0.0001566556125084511, "loss": 1.6915, "step": 59384 }, { "epoch": 1.98, "grad_norm": 0.604893147945404, "learning_rate": 0.00015664643082198643, "loss": 1.7888, "step": 59385 }, { "epoch": 1.98, "grad_norm": 0.6010993719100952, "learning_rate": 0.0001566372493095319, "loss": 1.7515, "step": 59386 }, { "epoch": 1.98, "grad_norm": 0.6043745875358582, "learning_rate": 0.0001566280679710985, "loss": 1.6931, "step": 59387 }, { "epoch": 1.98, "grad_norm": 0.6129938364028931, "learning_rate": 0.00015661888680669752, "loss": 1.7222, "step": 59388 }, { "epoch": 1.98, "grad_norm": 0.5979718565940857, "learning_rate": 0.00015660970581634015, "loss": 1.8103, "step": 59389 }, { "epoch": 1.98, "grad_norm": 0.5943294167518616, "learning_rate": 0.00015660052500003751, "loss": 1.6815, "step": 59390 }, { "epoch": 1.98, "grad_norm": 0.599104642868042, "learning_rate": 0.0001565913443578006, "loss": 1.7975, "step": 59391 }, { "epoch": 1.98, "grad_norm": 0.601303219795227, "learning_rate": 0.00015658216388964076, "loss": 1.8157, "step": 59392 }, { "epoch": 1.98, "grad_norm": 0.5936409831047058, "learning_rate": 0.00015657298359556907, "loss": 1.6901, "step": 59393 }, { "epoch": 1.98, "grad_norm": 0.6014497876167297, "learning_rate": 0.00015656380347559647, "loss": 1.6767, "step": 59394 }, { "epoch": 1.98, "grad_norm": 0.6127963662147522, "learning_rate": 0.00015655462352973446, "loss": 1.7032, "step": 59395 }, { "epoch": 1.98, "grad_norm": 0.6050546765327454, "learning_rate": 0.00015654544375799385, "loss": 1.6541, "step": 59396 }, { "epoch": 1.98, "grad_norm": 0.6053736209869385, "learning_rate": 0.00015653626416038606, "loss": 1.7616, "step": 59397 }, { "epoch": 1.98, "grad_norm": 0.629185676574707, "learning_rate": 0.000156527084736922, "loss": 1.739, "step": 59398 }, { "epoch": 1.98, "grad_norm": 0.6020424962043762, "learning_rate": 0.000156517905487613, "loss": 1.7538, "step": 59399 }, { "epoch": 1.98, "grad_norm": 0.6003460884094238, "learning_rate": 0.00015650872641247016, "loss": 1.6908, "step": 59400 }, { "epoch": 1.98, "grad_norm": 0.58826744556427, "learning_rate": 0.00015649954751150447, "loss": 1.654, "step": 59401 }, { "epoch": 1.98, "grad_norm": 0.6076999306678772, "learning_rate": 0.0001564903687847273, "loss": 1.7563, "step": 59402 }, { "epoch": 1.98, "grad_norm": 0.5899958610534668, "learning_rate": 0.0001564811902321495, "loss": 1.7172, "step": 59403 }, { "epoch": 1.98, "grad_norm": 0.5880734920501709, "learning_rate": 0.00015647201185378258, "loss": 1.8092, "step": 59404 }, { "epoch": 1.98, "grad_norm": 0.6061639189720154, "learning_rate": 0.00015646283364963747, "loss": 1.7239, "step": 59405 }, { "epoch": 1.98, "grad_norm": 0.6150164008140564, "learning_rate": 0.0001564536556197252, "loss": 1.7895, "step": 59406 }, { "epoch": 1.98, "grad_norm": 0.6059725284576416, "learning_rate": 0.00015644447776405715, "loss": 1.734, "step": 59407 }, { "epoch": 1.98, "grad_norm": 0.6238056421279907, "learning_rate": 0.00015643530008264424, "loss": 1.7817, "step": 59408 }, { "epoch": 1.98, "grad_norm": 0.6081591844558716, "learning_rate": 0.00015642612257549786, "loss": 1.7004, "step": 59409 }, { "epoch": 1.98, "grad_norm": 0.6030012369155884, "learning_rate": 0.0001564169452426289, "loss": 1.7839, "step": 59410 }, { "epoch": 1.98, "grad_norm": 0.6061607599258423, "learning_rate": 0.00015640776808404868, "loss": 1.7718, "step": 59411 }, { "epoch": 1.98, "grad_norm": 0.6014842391014099, "learning_rate": 0.00015639859109976834, "loss": 1.7561, "step": 59412 }, { "epoch": 1.98, "grad_norm": 0.5860603451728821, "learning_rate": 0.00015638941428979878, "loss": 1.7456, "step": 59413 }, { "epoch": 1.98, "grad_norm": 0.6177551746368408, "learning_rate": 0.00015638023765415145, "loss": 1.7269, "step": 59414 }, { "epoch": 1.98, "grad_norm": 0.6073908805847168, "learning_rate": 0.00015637106119283735, "loss": 1.7147, "step": 59415 }, { "epoch": 1.98, "grad_norm": 0.6186800599098206, "learning_rate": 0.00015636188490586748, "loss": 1.8263, "step": 59416 }, { "epoch": 1.98, "grad_norm": 0.6501563787460327, "learning_rate": 0.00015635270879325316, "loss": 1.7534, "step": 59417 }, { "epoch": 1.98, "grad_norm": 0.6108757257461548, "learning_rate": 0.00015634353285500557, "loss": 1.691, "step": 59418 }, { "epoch": 1.98, "grad_norm": 0.6261948943138123, "learning_rate": 0.00015633435709113577, "loss": 1.7543, "step": 59419 }, { "epoch": 1.98, "grad_norm": 0.5949761271476746, "learning_rate": 0.0001563251815016548, "loss": 1.6997, "step": 59420 }, { "epoch": 1.98, "grad_norm": 0.6153793334960938, "learning_rate": 0.00015631600608657397, "loss": 1.7079, "step": 59421 }, { "epoch": 1.98, "grad_norm": 0.6124382615089417, "learning_rate": 0.00015630683084590433, "loss": 1.7377, "step": 59422 }, { "epoch": 1.98, "grad_norm": 0.5843077898025513, "learning_rate": 0.000156297655779657, "loss": 1.6757, "step": 59423 }, { "epoch": 1.98, "grad_norm": 0.5904276371002197, "learning_rate": 0.00015628848088784303, "loss": 1.7924, "step": 59424 }, { "epoch": 1.98, "grad_norm": 0.6097971796989441, "learning_rate": 0.00015627930617047396, "loss": 1.7028, "step": 59425 }, { "epoch": 1.98, "grad_norm": 0.6089911460876465, "learning_rate": 0.0001562701316275604, "loss": 1.6808, "step": 59426 }, { "epoch": 1.98, "grad_norm": 0.6209830641746521, "learning_rate": 0.00015626095725911372, "loss": 1.7692, "step": 59427 }, { "epoch": 1.98, "grad_norm": 0.6013623476028442, "learning_rate": 0.00015625178306514516, "loss": 1.7089, "step": 59428 }, { "epoch": 1.98, "grad_norm": 0.5963939428329468, "learning_rate": 0.00015624260904566575, "loss": 1.5968, "step": 59429 }, { "epoch": 1.98, "grad_norm": 0.6021506190299988, "learning_rate": 0.00015623343520068653, "loss": 1.7241, "step": 59430 }, { "epoch": 1.98, "grad_norm": 0.9030590653419495, "learning_rate": 0.00015622426153021873, "loss": 1.7327, "step": 59431 }, { "epoch": 1.98, "grad_norm": 0.6308261752128601, "learning_rate": 0.00015621508803427374, "loss": 1.7642, "step": 59432 }, { "epoch": 1.98, "grad_norm": 0.5903941988945007, "learning_rate": 0.0001562059147128622, "loss": 1.7219, "step": 59433 }, { "epoch": 1.98, "grad_norm": 0.644117534160614, "learning_rate": 0.0001561967415659955, "loss": 1.8386, "step": 59434 }, { "epoch": 1.98, "grad_norm": 0.5969755053520203, "learning_rate": 0.0001561875685936849, "loss": 1.7418, "step": 59435 }, { "epoch": 1.98, "grad_norm": 0.5934677720069885, "learning_rate": 0.00015617839579594139, "loss": 1.766, "step": 59436 }, { "epoch": 1.98, "grad_norm": 0.6179055571556091, "learning_rate": 0.00015616922317277597, "loss": 1.7627, "step": 59437 }, { "epoch": 1.98, "grad_norm": 0.6093106269836426, "learning_rate": 0.00015616005072419996, "loss": 1.7526, "step": 59438 }, { "epoch": 1.98, "grad_norm": 0.6164962649345398, "learning_rate": 0.00015615087845022467, "loss": 1.6278, "step": 59439 }, { "epoch": 1.98, "grad_norm": 0.6041269898414612, "learning_rate": 0.00015614170635086077, "loss": 1.7294, "step": 59440 }, { "epoch": 1.98, "grad_norm": 0.60634446144104, "learning_rate": 0.0001561325344261197, "loss": 1.6917, "step": 59441 }, { "epoch": 1.98, "grad_norm": 0.5714605450630188, "learning_rate": 0.00015612336267601258, "loss": 1.743, "step": 59442 }, { "epoch": 1.98, "grad_norm": 0.5968811511993408, "learning_rate": 0.00015611419110055053, "loss": 1.7297, "step": 59443 }, { "epoch": 1.98, "grad_norm": 0.6093752980232239, "learning_rate": 0.00015610501969974452, "loss": 1.6976, "step": 59444 }, { "epoch": 1.98, "grad_norm": 0.6116952300071716, "learning_rate": 0.00015609584847360597, "loss": 1.7554, "step": 59445 }, { "epoch": 1.98, "grad_norm": 0.5930830836296082, "learning_rate": 0.0001560866774221458, "loss": 1.7325, "step": 59446 }, { "epoch": 1.98, "grad_norm": 0.5922375321388245, "learning_rate": 0.00015607750654537513, "loss": 1.7221, "step": 59447 }, { "epoch": 1.98, "grad_norm": 0.5843313932418823, "learning_rate": 0.00015606833584330514, "loss": 1.7976, "step": 59448 }, { "epoch": 1.98, "grad_norm": 0.5872946381568909, "learning_rate": 0.0001560591653159471, "loss": 1.6645, "step": 59449 }, { "epoch": 1.98, "grad_norm": 0.6081438660621643, "learning_rate": 0.00015604999496331205, "loss": 1.688, "step": 59450 }, { "epoch": 1.98, "grad_norm": 0.608371376991272, "learning_rate": 0.00015604082478541095, "loss": 1.7446, "step": 59451 }, { "epoch": 1.98, "grad_norm": 0.588836133480072, "learning_rate": 0.00015603165478225521, "loss": 1.6697, "step": 59452 }, { "epoch": 1.98, "grad_norm": 0.6274052262306213, "learning_rate": 0.00015602248495385582, "loss": 1.7356, "step": 59453 }, { "epoch": 1.98, "grad_norm": 0.6242036819458008, "learning_rate": 0.0001560133153002238, "loss": 1.6649, "step": 59454 }, { "epoch": 1.98, "grad_norm": 0.6373134255409241, "learning_rate": 0.00015600414582137053, "loss": 1.757, "step": 59455 }, { "epoch": 1.98, "grad_norm": 0.613774299621582, "learning_rate": 0.00015599497651730691, "loss": 1.7882, "step": 59456 }, { "epoch": 1.98, "grad_norm": 0.607113242149353, "learning_rate": 0.00015598580738804426, "loss": 1.76, "step": 59457 }, { "epoch": 1.98, "grad_norm": 0.5911158323287964, "learning_rate": 0.00015597663843359348, "loss": 1.7027, "step": 59458 }, { "epoch": 1.98, "grad_norm": 0.6088117361068726, "learning_rate": 0.00015596746965396597, "loss": 1.7716, "step": 59459 }, { "epoch": 1.98, "grad_norm": 0.6149047017097473, "learning_rate": 0.00015595830104917275, "loss": 1.6693, "step": 59460 }, { "epoch": 1.98, "grad_norm": 0.6270379424095154, "learning_rate": 0.0001559491326192248, "loss": 1.7807, "step": 59461 }, { "epoch": 1.98, "grad_norm": 0.5903343558311462, "learning_rate": 0.00015593996436413348, "loss": 1.799, "step": 59462 }, { "epoch": 1.98, "grad_norm": 0.6386378407478333, "learning_rate": 0.00015593079628390975, "loss": 1.7585, "step": 59463 }, { "epoch": 1.98, "grad_norm": 0.6190114617347717, "learning_rate": 0.00015592162837856487, "loss": 1.6917, "step": 59464 }, { "epoch": 1.98, "grad_norm": 0.6121678352355957, "learning_rate": 0.00015591246064810993, "loss": 1.6853, "step": 59465 }, { "epoch": 1.98, "grad_norm": 0.6073219776153564, "learning_rate": 0.0001559032930925559, "loss": 1.6902, "step": 59466 }, { "epoch": 1.98, "grad_norm": 0.6127270460128784, "learning_rate": 0.00015589412571191417, "loss": 1.7408, "step": 59467 }, { "epoch": 1.98, "grad_norm": 0.6104156970977783, "learning_rate": 0.00015588495850619565, "loss": 1.7579, "step": 59468 }, { "epoch": 1.98, "grad_norm": 0.6065676808357239, "learning_rate": 0.0001558757914754116, "loss": 1.7207, "step": 59469 }, { "epoch": 1.98, "grad_norm": 0.5957702994346619, "learning_rate": 0.00015586662461957305, "loss": 1.7576, "step": 59470 }, { "epoch": 1.98, "grad_norm": 0.6162128448486328, "learning_rate": 0.00015585745793869128, "loss": 1.7142, "step": 59471 }, { "epoch": 1.98, "grad_norm": 0.5879386067390442, "learning_rate": 0.00015584829143277731, "loss": 1.7036, "step": 59472 }, { "epoch": 1.98, "grad_norm": 0.5996005535125732, "learning_rate": 0.00015583912510184214, "loss": 1.7164, "step": 59473 }, { "epoch": 1.98, "grad_norm": 0.5868703126907349, "learning_rate": 0.0001558299589458972, "loss": 1.6691, "step": 59474 }, { "epoch": 1.98, "grad_norm": 0.598720908164978, "learning_rate": 0.0001558207929649533, "loss": 1.8142, "step": 59475 }, { "epoch": 1.98, "grad_norm": 0.6034210920333862, "learning_rate": 0.0001558116271590218, "loss": 1.6466, "step": 59476 }, { "epoch": 1.98, "grad_norm": 0.5820876955986023, "learning_rate": 0.00015580246152811368, "loss": 1.7385, "step": 59477 }, { "epoch": 1.98, "grad_norm": 0.6120288968086243, "learning_rate": 0.00015579329607224023, "loss": 1.7, "step": 59478 }, { "epoch": 1.98, "grad_norm": 0.6037392020225525, "learning_rate": 0.0001557841307914124, "loss": 1.7108, "step": 59479 }, { "epoch": 1.98, "grad_norm": 0.6097264289855957, "learning_rate": 0.00015577496568564136, "loss": 1.7868, "step": 59480 }, { "epoch": 1.98, "grad_norm": 0.5822895169258118, "learning_rate": 0.00015576580075493833, "loss": 1.7276, "step": 59481 }, { "epoch": 1.98, "grad_norm": 0.5951581597328186, "learning_rate": 0.00015575663599931442, "loss": 1.7495, "step": 59482 }, { "epoch": 1.98, "grad_norm": 0.6036105155944824, "learning_rate": 0.0001557474714187805, "loss": 1.7034, "step": 59483 }, { "epoch": 1.98, "grad_norm": 0.5935340523719788, "learning_rate": 0.00015573830701334795, "loss": 1.6932, "step": 59484 }, { "epoch": 1.98, "grad_norm": 0.5879199504852295, "learning_rate": 0.00015572914278302795, "loss": 1.7426, "step": 59485 }, { "epoch": 1.98, "grad_norm": 0.601736843585968, "learning_rate": 0.0001557199787278315, "loss": 1.7265, "step": 59486 }, { "epoch": 1.98, "grad_norm": 0.6087328195571899, "learning_rate": 0.00015571081484776962, "loss": 1.6419, "step": 59487 }, { "epoch": 1.98, "grad_norm": 0.6009359359741211, "learning_rate": 0.00015570165114285367, "loss": 1.7093, "step": 59488 }, { "epoch": 1.98, "grad_norm": 0.6065216064453125, "learning_rate": 0.00015569248761309468, "loss": 1.7017, "step": 59489 }, { "epoch": 1.98, "grad_norm": 0.5990334153175354, "learning_rate": 0.00015568332425850359, "loss": 1.6763, "step": 59490 }, { "epoch": 1.98, "grad_norm": 0.5952928066253662, "learning_rate": 0.00015567416107909173, "loss": 1.7045, "step": 59491 }, { "epoch": 1.98, "grad_norm": 0.6026208996772766, "learning_rate": 0.00015566499807487025, "loss": 1.8146, "step": 59492 }, { "epoch": 1.98, "grad_norm": 0.6199430227279663, "learning_rate": 0.00015565583524585023, "loss": 1.7274, "step": 59493 }, { "epoch": 1.98, "grad_norm": 0.6332609057426453, "learning_rate": 0.0001556466725920426, "loss": 1.7771, "step": 59494 }, { "epoch": 1.98, "grad_norm": 0.58991539478302, "learning_rate": 0.00015563751011345874, "loss": 1.6643, "step": 59495 }, { "epoch": 1.98, "grad_norm": 0.5996133685112, "learning_rate": 0.00015562834781010973, "loss": 1.7638, "step": 59496 }, { "epoch": 1.98, "grad_norm": 0.6316794157028198, "learning_rate": 0.00015561918568200646, "loss": 1.803, "step": 59497 }, { "epoch": 1.98, "grad_norm": 0.6012080311775208, "learning_rate": 0.00015561002372916027, "loss": 1.7091, "step": 59498 }, { "epoch": 1.98, "grad_norm": 0.5967634916305542, "learning_rate": 0.00015560086195158234, "loss": 1.7349, "step": 59499 }, { "epoch": 1.98, "grad_norm": 0.6036257147789001, "learning_rate": 0.00015559170034928367, "loss": 1.6971, "step": 59500 }, { "epoch": 1.98, "grad_norm": 0.6091745495796204, "learning_rate": 0.00015558253892227532, "loss": 1.6823, "step": 59501 }, { "epoch": 1.98, "grad_norm": 0.6019291281700134, "learning_rate": 0.00015557337767056862, "loss": 1.6833, "step": 59502 }, { "epoch": 1.98, "grad_norm": 0.600549578666687, "learning_rate": 0.0001555642165941745, "loss": 1.7598, "step": 59503 }, { "epoch": 1.98, "grad_norm": 0.6085272431373596, "learning_rate": 0.00015555505569310403, "loss": 1.6571, "step": 59504 }, { "epoch": 1.98, "grad_norm": 0.6055076122283936, "learning_rate": 0.0001555458949673685, "loss": 1.6701, "step": 59505 }, { "epoch": 1.98, "grad_norm": 0.6036688685417175, "learning_rate": 0.000155536734416979, "loss": 1.6817, "step": 59506 }, { "epoch": 1.98, "grad_norm": 0.6100262999534607, "learning_rate": 0.00015552757404194666, "loss": 1.7993, "step": 59507 }, { "epoch": 1.98, "grad_norm": 0.5847475528717041, "learning_rate": 0.00015551841384228246, "loss": 1.684, "step": 59508 }, { "epoch": 1.98, "grad_norm": 0.598029375076294, "learning_rate": 0.00015550925381799773, "loss": 1.721, "step": 59509 }, { "epoch": 1.98, "grad_norm": 0.5982574224472046, "learning_rate": 0.00015550009396910345, "loss": 1.7108, "step": 59510 }, { "epoch": 1.98, "grad_norm": 0.6544528603553772, "learning_rate": 0.00015549093429561065, "loss": 1.7168, "step": 59511 }, { "epoch": 1.98, "grad_norm": 0.577490508556366, "learning_rate": 0.0001554817747975307, "loss": 1.7048, "step": 59512 }, { "epoch": 1.98, "grad_norm": 0.5983541011810303, "learning_rate": 0.00015547261547487447, "loss": 1.7223, "step": 59513 }, { "epoch": 1.98, "grad_norm": 0.5871177911758423, "learning_rate": 0.00015546345632765326, "loss": 1.7972, "step": 59514 }, { "epoch": 1.98, "grad_norm": 0.5958232283592224, "learning_rate": 0.00015545429735587806, "loss": 1.6685, "step": 59515 }, { "epoch": 1.98, "grad_norm": 0.6076377630233765, "learning_rate": 0.0001554451385595601, "loss": 1.6406, "step": 59516 }, { "epoch": 1.98, "grad_norm": 0.5904266834259033, "learning_rate": 0.0001554359799387105, "loss": 1.6977, "step": 59517 }, { "epoch": 1.98, "grad_norm": 0.6216689348220825, "learning_rate": 0.0001554268214933402, "loss": 1.7706, "step": 59518 }, { "epoch": 1.98, "grad_norm": 0.628475546836853, "learning_rate": 0.00015541766322346053, "loss": 1.6817, "step": 59519 }, { "epoch": 1.98, "grad_norm": 0.6072902083396912, "learning_rate": 0.00015540850512908236, "loss": 1.7336, "step": 59520 }, { "epoch": 1.98, "grad_norm": 0.6080593466758728, "learning_rate": 0.00015539934721021714, "loss": 1.7408, "step": 59521 }, { "epoch": 1.98, "grad_norm": 0.6016520261764526, "learning_rate": 0.00015539018946687577, "loss": 1.6707, "step": 59522 }, { "epoch": 1.98, "grad_norm": 0.6220414042472839, "learning_rate": 0.00015538103189906933, "loss": 1.7044, "step": 59523 }, { "epoch": 1.98, "grad_norm": 0.6118638515472412, "learning_rate": 0.00015537187450680908, "loss": 1.7255, "step": 59524 }, { "epoch": 1.98, "grad_norm": 0.6241231560707092, "learning_rate": 0.00015536271729010595, "loss": 1.751, "step": 59525 }, { "epoch": 1.98, "grad_norm": 0.6167783737182617, "learning_rate": 0.0001553535602489713, "loss": 1.7633, "step": 59526 }, { "epoch": 1.98, "grad_norm": 0.6088589429855347, "learning_rate": 0.00015534440338341602, "loss": 1.697, "step": 59527 }, { "epoch": 1.98, "grad_norm": 0.598507821559906, "learning_rate": 0.0001553352466934514, "loss": 1.7551, "step": 59528 }, { "epoch": 1.98, "grad_norm": 0.60817950963974, "learning_rate": 0.0001553260901790885, "loss": 1.7314, "step": 59529 }, { "epoch": 1.98, "grad_norm": 0.6067612171173096, "learning_rate": 0.0001553169338403383, "loss": 1.7589, "step": 59530 }, { "epoch": 1.98, "grad_norm": 0.5896853804588318, "learning_rate": 0.0001553077776772121, "loss": 1.7099, "step": 59531 }, { "epoch": 1.98, "grad_norm": 0.606769859790802, "learning_rate": 0.00015529862168972095, "loss": 1.648, "step": 59532 }, { "epoch": 1.98, "grad_norm": 0.6115221977233887, "learning_rate": 0.00015528946587787588, "loss": 1.7316, "step": 59533 }, { "epoch": 1.98, "grad_norm": 0.6020097136497498, "learning_rate": 0.00015528031024168802, "loss": 1.7054, "step": 59534 }, { "epoch": 1.98, "grad_norm": 0.5922266244888306, "learning_rate": 0.0001552711547811687, "loss": 1.7697, "step": 59535 }, { "epoch": 1.98, "grad_norm": 0.5989289283752441, "learning_rate": 0.00015526199949632884, "loss": 1.755, "step": 59536 }, { "epoch": 1.98, "grad_norm": 0.6199634075164795, "learning_rate": 0.0001552528443871795, "loss": 1.7464, "step": 59537 }, { "epoch": 1.98, "grad_norm": 0.5952683687210083, "learning_rate": 0.00015524368945373196, "loss": 1.7596, "step": 59538 }, { "epoch": 1.98, "grad_norm": 0.6088180541992188, "learning_rate": 0.00015523453469599727, "loss": 1.7098, "step": 59539 }, { "epoch": 1.98, "grad_norm": 0.6205762028694153, "learning_rate": 0.00015522538011398642, "loss": 1.6986, "step": 59540 }, { "epoch": 1.98, "grad_norm": 0.628042995929718, "learning_rate": 0.00015521622570771065, "loss": 1.7233, "step": 59541 }, { "epoch": 1.98, "grad_norm": 0.5858150124549866, "learning_rate": 0.00015520707147718122, "loss": 1.763, "step": 59542 }, { "epoch": 1.98, "grad_norm": 0.6117535829544067, "learning_rate": 0.00015519791742240886, "loss": 1.7031, "step": 59543 }, { "epoch": 1.98, "grad_norm": 0.6282062530517578, "learning_rate": 0.0001551887635434049, "loss": 1.7514, "step": 59544 }, { "epoch": 1.98, "grad_norm": 0.6071929335594177, "learning_rate": 0.00015517960984018055, "loss": 1.7171, "step": 59545 }, { "epoch": 1.98, "grad_norm": 0.6223006844520569, "learning_rate": 0.00015517045631274686, "loss": 1.7088, "step": 59546 }, { "epoch": 1.98, "grad_norm": 0.6315589547157288, "learning_rate": 0.00015516130296111476, "loss": 1.7554, "step": 59547 }, { "epoch": 1.98, "grad_norm": 0.6034726500511169, "learning_rate": 0.00015515214978529548, "loss": 1.6883, "step": 59548 }, { "epoch": 1.98, "grad_norm": 0.6230876445770264, "learning_rate": 0.00015514299678530036, "loss": 1.7542, "step": 59549 }, { "epoch": 1.98, "grad_norm": 0.5965253114700317, "learning_rate": 0.00015513384396114008, "loss": 1.7266, "step": 59550 }, { "epoch": 1.98, "grad_norm": 0.6006882190704346, "learning_rate": 0.000155124691312826, "loss": 1.7549, "step": 59551 }, { "epoch": 1.98, "grad_norm": 0.6115830540657043, "learning_rate": 0.00015511553884036932, "loss": 1.6316, "step": 59552 }, { "epoch": 1.98, "grad_norm": 0.6200066208839417, "learning_rate": 0.00015510638654378097, "loss": 1.7497, "step": 59553 }, { "epoch": 1.98, "grad_norm": 0.6213765740394592, "learning_rate": 0.00015509723442307203, "loss": 1.6728, "step": 59554 }, { "epoch": 1.98, "grad_norm": 0.6156272292137146, "learning_rate": 0.0001550880824782537, "loss": 1.7491, "step": 59555 }, { "epoch": 1.98, "grad_norm": 0.6379172801971436, "learning_rate": 0.00015507893070933735, "loss": 1.693, "step": 59556 }, { "epoch": 1.98, "grad_norm": 0.5970625281333923, "learning_rate": 0.00015506977911633354, "loss": 1.7086, "step": 59557 }, { "epoch": 1.98, "grad_norm": 0.6140509247779846, "learning_rate": 0.00015506062769925368, "loss": 1.7553, "step": 59558 }, { "epoch": 1.98, "grad_norm": 0.6129535436630249, "learning_rate": 0.000155051476458109, "loss": 1.6806, "step": 59559 }, { "epoch": 1.98, "grad_norm": 0.6133803725242615, "learning_rate": 0.00015504232539291046, "loss": 1.778, "step": 59560 }, { "epoch": 1.98, "grad_norm": 0.5873034000396729, "learning_rate": 0.00015503317450366903, "loss": 1.6334, "step": 59561 }, { "epoch": 1.98, "grad_norm": 0.5997750163078308, "learning_rate": 0.0001550240237903961, "loss": 1.699, "step": 59562 }, { "epoch": 1.98, "grad_norm": 0.872737467288971, "learning_rate": 0.0001550148732531027, "loss": 1.7599, "step": 59563 }, { "epoch": 1.98, "grad_norm": 0.6099601984024048, "learning_rate": 0.0001550057228917997, "loss": 1.7212, "step": 59564 }, { "epoch": 1.98, "grad_norm": 0.6255617737770081, "learning_rate": 0.00015499657270649837, "loss": 1.765, "step": 59565 }, { "epoch": 1.98, "grad_norm": 0.5951480865478516, "learning_rate": 0.00015498742269721, "loss": 1.7843, "step": 59566 }, { "epoch": 1.98, "grad_norm": 0.5908306837081909, "learning_rate": 0.00015497827286394553, "loss": 1.7902, "step": 59567 }, { "epoch": 1.98, "grad_norm": 0.5880757570266724, "learning_rate": 0.00015496912320671593, "loss": 1.7184, "step": 59568 }, { "epoch": 1.98, "grad_norm": 0.6129833459854126, "learning_rate": 0.00015495997372553258, "loss": 1.7668, "step": 59569 }, { "epoch": 1.98, "grad_norm": 0.5921768546104431, "learning_rate": 0.00015495082442040645, "loss": 1.684, "step": 59570 }, { "epoch": 1.98, "grad_norm": 0.6159849762916565, "learning_rate": 0.00015494167529134852, "loss": 1.7304, "step": 59571 }, { "epoch": 1.98, "grad_norm": 0.5978758335113525, "learning_rate": 0.00015493252633837015, "loss": 1.738, "step": 59572 }, { "epoch": 1.98, "grad_norm": 0.5803540349006653, "learning_rate": 0.00015492337756148222, "loss": 1.6851, "step": 59573 }, { "epoch": 1.98, "grad_norm": 0.6269450187683105, "learning_rate": 0.000154914228960696, "loss": 1.6742, "step": 59574 }, { "epoch": 1.98, "grad_norm": 0.6059013605117798, "learning_rate": 0.00015490508053602245, "loss": 1.7839, "step": 59575 }, { "epoch": 1.98, "grad_norm": 0.6061713695526123, "learning_rate": 0.00015489593228747283, "loss": 1.8035, "step": 59576 }, { "epoch": 1.98, "grad_norm": 0.6004797220230103, "learning_rate": 0.00015488678421505826, "loss": 1.6608, "step": 59577 }, { "epoch": 1.98, "grad_norm": 0.5694141387939453, "learning_rate": 0.00015487763631878956, "loss": 1.7124, "step": 59578 }, { "epoch": 1.98, "grad_norm": 0.6067284941673279, "learning_rate": 0.00015486848859867815, "loss": 1.7414, "step": 59579 }, { "epoch": 1.98, "grad_norm": 0.6052488088607788, "learning_rate": 0.00015485934105473488, "loss": 1.6558, "step": 59580 }, { "epoch": 1.98, "grad_norm": 0.6144862174987793, "learning_rate": 0.0001548501936869711, "loss": 1.6948, "step": 59581 }, { "epoch": 1.98, "grad_norm": 0.6161826252937317, "learning_rate": 0.00015484104649539785, "loss": 1.6728, "step": 59582 }, { "epoch": 1.98, "grad_norm": 0.6175665855407715, "learning_rate": 0.00015483189948002602, "loss": 1.7245, "step": 59583 }, { "epoch": 1.98, "grad_norm": 0.6136493682861328, "learning_rate": 0.00015482275264086702, "loss": 1.73, "step": 59584 }, { "epoch": 1.98, "grad_norm": 0.6128588318824768, "learning_rate": 0.0001548136059779317, "loss": 1.7643, "step": 59585 }, { "epoch": 1.98, "grad_norm": 0.6103055477142334, "learning_rate": 0.00015480445949123134, "loss": 1.7235, "step": 59586 }, { "epoch": 1.98, "grad_norm": 0.6133304834365845, "learning_rate": 0.00015479531318077688, "loss": 1.736, "step": 59587 }, { "epoch": 1.98, "grad_norm": 0.6136863827705383, "learning_rate": 0.00015478616704657963, "loss": 1.7035, "step": 59588 }, { "epoch": 1.98, "grad_norm": 0.5881713032722473, "learning_rate": 0.0001547770210886506, "loss": 1.7057, "step": 59589 }, { "epoch": 1.98, "grad_norm": 0.6044982671737671, "learning_rate": 0.00015476787530700073, "loss": 1.7965, "step": 59590 }, { "epoch": 1.98, "grad_norm": 0.5936207175254822, "learning_rate": 0.00015475872970164135, "loss": 1.6726, "step": 59591 }, { "epoch": 1.98, "grad_norm": 0.5905076861381531, "learning_rate": 0.00015474958427258336, "loss": 1.7757, "step": 59592 }, { "epoch": 1.98, "grad_norm": 0.5930196642875671, "learning_rate": 0.00015474043901983814, "loss": 1.6829, "step": 59593 }, { "epoch": 1.98, "grad_norm": 0.6182404160499573, "learning_rate": 0.00015473129394341648, "loss": 1.6734, "step": 59594 }, { "epoch": 1.98, "grad_norm": 0.6036377549171448, "learning_rate": 0.00015472214904332975, "loss": 1.7657, "step": 59595 }, { "epoch": 1.98, "grad_norm": 0.6113919615745544, "learning_rate": 0.0001547130043195889, "loss": 1.7485, "step": 59596 }, { "epoch": 1.98, "grad_norm": 0.6226925849914551, "learning_rate": 0.00015470385977220494, "loss": 1.6969, "step": 59597 }, { "epoch": 1.98, "grad_norm": 0.5917689800262451, "learning_rate": 0.00015469471540118925, "loss": 1.7009, "step": 59598 }, { "epoch": 1.98, "grad_norm": 0.6235986948013306, "learning_rate": 0.00015468557120655273, "loss": 1.6599, "step": 59599 }, { "epoch": 1.98, "grad_norm": 0.6027570366859436, "learning_rate": 0.00015467642718830637, "loss": 1.7392, "step": 59600 }, { "epoch": 1.98, "grad_norm": 0.5816159844398499, "learning_rate": 0.0001546672833464615, "loss": 1.7555, "step": 59601 }, { "epoch": 1.98, "grad_norm": 0.5935675501823425, "learning_rate": 0.00015465813968102918, "loss": 1.6863, "step": 59602 }, { "epoch": 1.98, "grad_norm": 0.6146324276924133, "learning_rate": 0.00015464899619202046, "loss": 1.7401, "step": 59603 }, { "epoch": 1.98, "grad_norm": 0.6189820766448975, "learning_rate": 0.00015463985287944633, "loss": 1.7148, "step": 59604 }, { "epoch": 1.98, "grad_norm": 0.5994885563850403, "learning_rate": 0.00015463070974331816, "loss": 1.6716, "step": 59605 }, { "epoch": 1.98, "grad_norm": 0.6048521995544434, "learning_rate": 0.00015462156678364687, "loss": 1.7164, "step": 59606 }, { "epoch": 1.98, "grad_norm": 0.6040774583816528, "learning_rate": 0.00015461242400044344, "loss": 1.6813, "step": 59607 }, { "epoch": 1.98, "grad_norm": 0.6146764755249023, "learning_rate": 0.00015460328139371913, "loss": 1.7451, "step": 59608 }, { "epoch": 1.98, "grad_norm": 0.5963579416275024, "learning_rate": 0.00015459413896348512, "loss": 1.7045, "step": 59609 }, { "epoch": 1.98, "grad_norm": 0.617486834526062, "learning_rate": 0.0001545849967097524, "loss": 1.6422, "step": 59610 }, { "epoch": 1.98, "grad_norm": 0.6357702016830444, "learning_rate": 0.00015457585463253196, "loss": 1.7524, "step": 59611 }, { "epoch": 1.98, "grad_norm": 0.6068212389945984, "learning_rate": 0.0001545667127318351, "loss": 1.7687, "step": 59612 }, { "epoch": 1.98, "grad_norm": 0.6171939373016357, "learning_rate": 0.00015455757100767283, "loss": 1.6726, "step": 59613 }, { "epoch": 1.98, "grad_norm": 0.6029164791107178, "learning_rate": 0.00015454842946005612, "loss": 1.7312, "step": 59614 }, { "epoch": 1.98, "grad_norm": 0.6004990339279175, "learning_rate": 0.0001545392880889962, "loss": 1.7254, "step": 59615 }, { "epoch": 1.98, "grad_norm": 0.6064716577529907, "learning_rate": 0.0001545301468945043, "loss": 1.7824, "step": 59616 }, { "epoch": 1.98, "grad_norm": 0.6266048550605774, "learning_rate": 0.0001545210058765913, "loss": 1.785, "step": 59617 }, { "epoch": 1.98, "grad_norm": 0.5874868035316467, "learning_rate": 0.00015451186503526827, "loss": 1.7969, "step": 59618 }, { "epoch": 1.98, "grad_norm": 0.640666127204895, "learning_rate": 0.00015450272437054653, "loss": 1.704, "step": 59619 }, { "epoch": 1.98, "grad_norm": 0.6168372631072998, "learning_rate": 0.00015449358388243704, "loss": 1.6847, "step": 59620 }, { "epoch": 1.98, "grad_norm": 0.5774304270744324, "learning_rate": 0.00015448444357095078, "loss": 1.6953, "step": 59621 }, { "epoch": 1.98, "grad_norm": 0.6159971952438354, "learning_rate": 0.00015447530343609897, "loss": 1.7121, "step": 59622 }, { "epoch": 1.98, "grad_norm": 0.6215145587921143, "learning_rate": 0.0001544661634778928, "loss": 1.8079, "step": 59623 }, { "epoch": 1.98, "grad_norm": 0.5917137265205383, "learning_rate": 0.0001544570236963433, "loss": 1.6731, "step": 59624 }, { "epoch": 1.98, "grad_norm": 0.6360725164413452, "learning_rate": 0.00015444788409146138, "loss": 1.7375, "step": 59625 }, { "epoch": 1.98, "grad_norm": 0.6420245170593262, "learning_rate": 0.0001544387446632584, "loss": 1.688, "step": 59626 }, { "epoch": 1.98, "grad_norm": 0.6045045852661133, "learning_rate": 0.00015442960541174535, "loss": 1.741, "step": 59627 }, { "epoch": 1.98, "grad_norm": 0.5991430282592773, "learning_rate": 0.00015442046633693316, "loss": 1.7078, "step": 59628 }, { "epoch": 1.98, "grad_norm": 0.6036050915718079, "learning_rate": 0.00015441132743883323, "loss": 1.7311, "step": 59629 }, { "epoch": 1.98, "grad_norm": 0.6155804395675659, "learning_rate": 0.00015440218871745637, "loss": 1.712, "step": 59630 }, { "epoch": 1.98, "grad_norm": 0.6084995865821838, "learning_rate": 0.00015439305017281389, "loss": 1.7236, "step": 59631 }, { "epoch": 1.98, "grad_norm": 0.6238564848899841, "learning_rate": 0.00015438391180491672, "loss": 1.7377, "step": 59632 }, { "epoch": 1.98, "grad_norm": 0.6222984790802002, "learning_rate": 0.0001543747736137761, "loss": 1.7355, "step": 59633 }, { "epoch": 1.98, "grad_norm": 0.6201426386833191, "learning_rate": 0.0001543656355994031, "loss": 1.6776, "step": 59634 }, { "epoch": 1.98, "grad_norm": 0.5792083740234375, "learning_rate": 0.00015435649776180863, "loss": 1.6654, "step": 59635 }, { "epoch": 1.98, "grad_norm": 0.6037546396255493, "learning_rate": 0.00015434736010100397, "loss": 1.7401, "step": 59636 }, { "epoch": 1.98, "grad_norm": 0.6014629602432251, "learning_rate": 0.0001543382226170001, "loss": 1.7652, "step": 59637 }, { "epoch": 1.98, "grad_norm": 0.6073052883148193, "learning_rate": 0.00015432908530980824, "loss": 1.788, "step": 59638 }, { "epoch": 1.98, "grad_norm": 0.6077693700790405, "learning_rate": 0.00015431994817943946, "loss": 1.7276, "step": 59639 }, { "epoch": 1.98, "grad_norm": 0.6210217475891113, "learning_rate": 0.00015431081122590463, "loss": 1.7174, "step": 59640 }, { "epoch": 1.98, "grad_norm": 0.6051003932952881, "learning_rate": 0.00015430167444921517, "loss": 1.6762, "step": 59641 }, { "epoch": 1.98, "grad_norm": 0.5995690822601318, "learning_rate": 0.00015429253784938186, "loss": 1.722, "step": 59642 }, { "epoch": 1.98, "grad_norm": 0.5991703271865845, "learning_rate": 0.00015428340142641605, "loss": 1.683, "step": 59643 }, { "epoch": 1.98, "grad_norm": 0.6279022693634033, "learning_rate": 0.00015427426518032863, "loss": 1.7083, "step": 59644 }, { "epoch": 1.98, "grad_norm": 0.5988302826881409, "learning_rate": 0.00015426512911113086, "loss": 1.7522, "step": 59645 }, { "epoch": 1.98, "grad_norm": 0.6062426567077637, "learning_rate": 0.0001542559932188338, "loss": 1.7848, "step": 59646 }, { "epoch": 1.98, "grad_norm": 0.6057500243186951, "learning_rate": 0.00015424685750344835, "loss": 1.6772, "step": 59647 }, { "epoch": 1.98, "grad_norm": 0.5986740589141846, "learning_rate": 0.00015423772196498586, "loss": 1.7381, "step": 59648 }, { "epoch": 1.98, "grad_norm": 0.599094033241272, "learning_rate": 0.0001542285866034573, "loss": 1.6689, "step": 59649 }, { "epoch": 1.98, "grad_norm": 0.6057148575782776, "learning_rate": 0.00015421945141887362, "loss": 1.6909, "step": 59650 }, { "epoch": 1.98, "grad_norm": 0.6320880055427551, "learning_rate": 0.00015421031641124606, "loss": 1.7797, "step": 59651 }, { "epoch": 1.98, "grad_norm": 0.6210245490074158, "learning_rate": 0.00015420118158058582, "loss": 1.7439, "step": 59652 }, { "epoch": 1.98, "grad_norm": 0.6277243494987488, "learning_rate": 0.00015419204692690387, "loss": 1.7605, "step": 59653 }, { "epoch": 1.98, "grad_norm": 0.5890801548957825, "learning_rate": 0.0001541829124502111, "loss": 1.6671, "step": 59654 }, { "epoch": 1.98, "grad_norm": 0.6166292428970337, "learning_rate": 0.00015417377815051903, "loss": 1.6071, "step": 59655 }, { "epoch": 1.98, "grad_norm": 0.6071001887321472, "learning_rate": 0.00015416464402783842, "loss": 1.8061, "step": 59656 }, { "epoch": 1.98, "grad_norm": 0.6320834755897522, "learning_rate": 0.00015415551008218036, "loss": 1.7152, "step": 59657 }, { "epoch": 1.98, "grad_norm": 0.5980127453804016, "learning_rate": 0.00015414637631355602, "loss": 1.692, "step": 59658 }, { "epoch": 1.98, "grad_norm": 0.6223247051239014, "learning_rate": 0.0001541372427219767, "loss": 1.7526, "step": 59659 }, { "epoch": 1.98, "grad_norm": 0.6324347853660583, "learning_rate": 0.00015412810930745308, "loss": 1.7419, "step": 59660 }, { "epoch": 1.98, "grad_norm": 0.6257874369621277, "learning_rate": 0.0001541189760699964, "loss": 1.7475, "step": 59661 }, { "epoch": 1.98, "grad_norm": 0.5961077213287354, "learning_rate": 0.0001541098430096179, "loss": 1.7847, "step": 59662 }, { "epoch": 1.98, "grad_norm": 0.6474069952964783, "learning_rate": 0.0001541007101263286, "loss": 1.7023, "step": 59663 }, { "epoch": 1.99, "grad_norm": 0.5947747826576233, "learning_rate": 0.00015409157742013937, "loss": 1.7634, "step": 59664 }, { "epoch": 1.99, "grad_norm": 0.5945212841033936, "learning_rate": 0.00015408244489106154, "loss": 1.6918, "step": 59665 }, { "epoch": 1.99, "grad_norm": 0.629510223865509, "learning_rate": 0.0001540733125391063, "loss": 1.8162, "step": 59666 }, { "epoch": 1.99, "grad_norm": 0.644848108291626, "learning_rate": 0.00015406418036428433, "loss": 1.7229, "step": 59667 }, { "epoch": 1.99, "grad_norm": 0.6065182089805603, "learning_rate": 0.00015405504836660697, "loss": 1.8073, "step": 59668 }, { "epoch": 1.99, "grad_norm": 0.6096965074539185, "learning_rate": 0.00015404591654608537, "loss": 1.7247, "step": 59669 }, { "epoch": 1.99, "grad_norm": 0.6188319325447083, "learning_rate": 0.00015403678490273054, "loss": 1.7378, "step": 59670 }, { "epoch": 1.99, "grad_norm": 0.6177109479904175, "learning_rate": 0.0001540276534365534, "loss": 1.7695, "step": 59671 }, { "epoch": 1.99, "grad_norm": 0.6112204194068909, "learning_rate": 0.0001540185221475652, "loss": 1.7847, "step": 59672 }, { "epoch": 1.99, "grad_norm": 0.6037759184837341, "learning_rate": 0.00015400939103577723, "loss": 1.6406, "step": 59673 }, { "epoch": 1.99, "grad_norm": 0.6300584077835083, "learning_rate": 0.00015400026010120013, "loss": 1.7017, "step": 59674 }, { "epoch": 1.99, "grad_norm": 0.5997665524482727, "learning_rate": 0.00015399112934384517, "loss": 1.7148, "step": 59675 }, { "epoch": 1.99, "grad_norm": 0.5876434445381165, "learning_rate": 0.0001539819987637236, "loss": 1.7052, "step": 59676 }, { "epoch": 1.99, "grad_norm": 0.5942957401275635, "learning_rate": 0.00015397286836084639, "loss": 1.7039, "step": 59677 }, { "epoch": 1.99, "grad_norm": 0.6046779155731201, "learning_rate": 0.00015396373813522445, "loss": 1.7571, "step": 59678 }, { "epoch": 1.99, "grad_norm": 0.599814772605896, "learning_rate": 0.00015395460808686917, "loss": 1.709, "step": 59679 }, { "epoch": 1.99, "grad_norm": 0.6130501627922058, "learning_rate": 0.00015394547821579146, "loss": 1.7154, "step": 59680 }, { "epoch": 1.99, "grad_norm": 0.6166996359825134, "learning_rate": 0.00015393634852200227, "loss": 1.6756, "step": 59681 }, { "epoch": 1.99, "grad_norm": 0.5993776321411133, "learning_rate": 0.00015392721900551285, "loss": 1.6991, "step": 59682 }, { "epoch": 1.99, "grad_norm": 0.6319126486778259, "learning_rate": 0.00015391808966633443, "loss": 1.6796, "step": 59683 }, { "epoch": 1.99, "grad_norm": 0.5928047299385071, "learning_rate": 0.0001539089605044779, "loss": 1.7989, "step": 59684 }, { "epoch": 1.99, "grad_norm": 0.6346765756607056, "learning_rate": 0.00015389983151995422, "loss": 1.7027, "step": 59685 }, { "epoch": 1.99, "grad_norm": 0.6015229821205139, "learning_rate": 0.00015389070271277472, "loss": 1.7619, "step": 59686 }, { "epoch": 1.99, "grad_norm": 0.6757721900939941, "learning_rate": 0.00015388157408295042, "loss": 1.7359, "step": 59687 }, { "epoch": 1.99, "grad_norm": 0.5980561971664429, "learning_rate": 0.00015387244563049224, "loss": 1.756, "step": 59688 }, { "epoch": 1.99, "grad_norm": 0.6060286164283752, "learning_rate": 0.00015386331735541147, "loss": 1.708, "step": 59689 }, { "epoch": 1.99, "grad_norm": 0.6258297562599182, "learning_rate": 0.00015385418925771902, "loss": 1.7363, "step": 59690 }, { "epoch": 1.99, "grad_norm": 0.6143187284469604, "learning_rate": 0.00015384506133742613, "loss": 1.672, "step": 59691 }, { "epoch": 1.99, "grad_norm": 0.6226704120635986, "learning_rate": 0.00015383593359454375, "loss": 1.752, "step": 59692 }, { "epoch": 1.99, "grad_norm": 0.5972923636436462, "learning_rate": 0.00015382680602908305, "loss": 1.691, "step": 59693 }, { "epoch": 1.99, "grad_norm": 0.6004873514175415, "learning_rate": 0.0001538176786410551, "loss": 1.7227, "step": 59694 }, { "epoch": 1.99, "grad_norm": 0.5966812968254089, "learning_rate": 0.00015380855143047083, "loss": 1.7412, "step": 59695 }, { "epoch": 1.99, "grad_norm": 0.5923440456390381, "learning_rate": 0.00015379942439734155, "loss": 1.678, "step": 59696 }, { "epoch": 1.99, "grad_norm": 0.5910901427268982, "learning_rate": 0.00015379029754167812, "loss": 1.7074, "step": 59697 }, { "epoch": 1.99, "grad_norm": 0.7065436244010925, "learning_rate": 0.00015378117086349186, "loss": 1.7059, "step": 59698 }, { "epoch": 1.99, "grad_norm": 0.6225166320800781, "learning_rate": 0.0001537720443627937, "loss": 1.7221, "step": 59699 }, { "epoch": 1.99, "grad_norm": 0.6149229407310486, "learning_rate": 0.0001537629180395946, "loss": 1.6618, "step": 59700 }, { "epoch": 1.99, "grad_norm": 0.591920793056488, "learning_rate": 0.0001537537918939059, "loss": 1.7392, "step": 59701 }, { "epoch": 1.99, "grad_norm": 0.6361244916915894, "learning_rate": 0.00015374466592573842, "loss": 1.7985, "step": 59702 }, { "epoch": 1.99, "grad_norm": 0.6309322118759155, "learning_rate": 0.00015373554013510348, "loss": 1.7522, "step": 59703 }, { "epoch": 1.99, "grad_norm": 0.6124933362007141, "learning_rate": 0.00015372641452201193, "loss": 1.712, "step": 59704 }, { "epoch": 1.99, "grad_norm": 0.6204445958137512, "learning_rate": 0.00015371728908647513, "loss": 1.7868, "step": 59705 }, { "epoch": 1.99, "grad_norm": 0.6114707589149475, "learning_rate": 0.00015370816382850392, "loss": 1.7467, "step": 59706 }, { "epoch": 1.99, "grad_norm": 0.5991960763931274, "learning_rate": 0.00015369903874810936, "loss": 1.7514, "step": 59707 }, { "epoch": 1.99, "grad_norm": 0.6248981952667236, "learning_rate": 0.0001536899138453027, "loss": 1.7352, "step": 59708 }, { "epoch": 1.99, "grad_norm": 0.5897312760353088, "learning_rate": 0.00015368078912009496, "loss": 1.7415, "step": 59709 }, { "epoch": 1.99, "grad_norm": 0.618974506855011, "learning_rate": 0.00015367166457249702, "loss": 1.6706, "step": 59710 }, { "epoch": 1.99, "grad_norm": 0.5985434651374817, "learning_rate": 0.0001536625402025202, "loss": 1.7885, "step": 59711 }, { "epoch": 1.99, "grad_norm": 0.5928726196289062, "learning_rate": 0.00015365341601017551, "loss": 1.8053, "step": 59712 }, { "epoch": 1.99, "grad_norm": 0.606899082660675, "learning_rate": 0.00015364429199547409, "loss": 1.7948, "step": 59713 }, { "epoch": 1.99, "grad_norm": 0.58879554271698, "learning_rate": 0.00015363516815842677, "loss": 1.6992, "step": 59714 }, { "epoch": 1.99, "grad_norm": 0.811860203742981, "learning_rate": 0.00015362604449904493, "loss": 1.7557, "step": 59715 }, { "epoch": 1.99, "grad_norm": 0.607701301574707, "learning_rate": 0.0001536169210173395, "loss": 1.7427, "step": 59716 }, { "epoch": 1.99, "grad_norm": 0.6237592101097107, "learning_rate": 0.00015360779771332145, "loss": 1.6827, "step": 59717 }, { "epoch": 1.99, "grad_norm": 0.6129240989685059, "learning_rate": 0.00015359867458700197, "loss": 1.7326, "step": 59718 }, { "epoch": 1.99, "grad_norm": 0.6177148222923279, "learning_rate": 0.00015358955163839223, "loss": 1.6618, "step": 59719 }, { "epoch": 1.99, "grad_norm": 0.5976301431655884, "learning_rate": 0.00015358042886750317, "loss": 1.7727, "step": 59720 }, { "epoch": 1.99, "grad_norm": 0.6269221305847168, "learning_rate": 0.0001535713062743458, "loss": 1.8345, "step": 59721 }, { "epoch": 1.99, "grad_norm": 0.6400700807571411, "learning_rate": 0.0001535621838589314, "loss": 1.7468, "step": 59722 }, { "epoch": 1.99, "grad_norm": 0.6097226142883301, "learning_rate": 0.00015355306162127098, "loss": 1.6702, "step": 59723 }, { "epoch": 1.99, "grad_norm": 0.6226852536201477, "learning_rate": 0.00015354393956137541, "loss": 1.7314, "step": 59724 }, { "epoch": 1.99, "grad_norm": 0.6120319962501526, "learning_rate": 0.00015353481767925594, "loss": 1.7455, "step": 59725 }, { "epoch": 1.99, "grad_norm": 0.5999785661697388, "learning_rate": 0.00015352569597492372, "loss": 1.7206, "step": 59726 }, { "epoch": 1.99, "grad_norm": 0.6541019678115845, "learning_rate": 0.00015351657444838973, "loss": 1.6764, "step": 59727 }, { "epoch": 1.99, "grad_norm": 0.630500853061676, "learning_rate": 0.0001535074530996649, "loss": 1.8272, "step": 59728 }, { "epoch": 1.99, "grad_norm": 0.6347726583480835, "learning_rate": 0.0001534983319287606, "loss": 1.7868, "step": 59729 }, { "epoch": 1.99, "grad_norm": 0.659437894821167, "learning_rate": 0.00015348921093568768, "loss": 1.6874, "step": 59730 }, { "epoch": 1.99, "grad_norm": 0.6192141771316528, "learning_rate": 0.00015348009012045716, "loss": 1.7467, "step": 59731 }, { "epoch": 1.99, "grad_norm": 0.6146800518035889, "learning_rate": 0.00015347096948308027, "loss": 1.6284, "step": 59732 }, { "epoch": 1.99, "grad_norm": 0.5976855754852295, "learning_rate": 0.00015346184902356814, "loss": 1.7239, "step": 59733 }, { "epoch": 1.99, "grad_norm": 0.6405776143074036, "learning_rate": 0.00015345272874193167, "loss": 1.7633, "step": 59734 }, { "epoch": 1.99, "grad_norm": 0.621357262134552, "learning_rate": 0.00015344360863818194, "loss": 1.7435, "step": 59735 }, { "epoch": 1.99, "grad_norm": 0.6543195843696594, "learning_rate": 0.00015343448871233016, "loss": 1.8069, "step": 59736 }, { "epoch": 1.99, "grad_norm": 0.615181565284729, "learning_rate": 0.00015342536896438734, "loss": 1.7568, "step": 59737 }, { "epoch": 1.99, "grad_norm": 0.6192304491996765, "learning_rate": 0.00015341624939436442, "loss": 1.6971, "step": 59738 }, { "epoch": 1.99, "grad_norm": 0.6230681538581848, "learning_rate": 0.00015340713000227253, "loss": 1.7608, "step": 59739 }, { "epoch": 1.99, "grad_norm": 0.6076130270957947, "learning_rate": 0.00015339801078812296, "loss": 1.7363, "step": 59740 }, { "epoch": 1.99, "grad_norm": 0.6036407947540283, "learning_rate": 0.00015338889175192658, "loss": 1.6395, "step": 59741 }, { "epoch": 1.99, "grad_norm": 0.5930737853050232, "learning_rate": 0.0001533797728936943, "loss": 1.6996, "step": 59742 }, { "epoch": 1.99, "grad_norm": 0.6013799905776978, "learning_rate": 0.00015337065421343758, "loss": 1.7272, "step": 59743 }, { "epoch": 1.99, "grad_norm": 0.6091524362564087, "learning_rate": 0.00015336153571116724, "loss": 1.7366, "step": 59744 }, { "epoch": 1.99, "grad_norm": 0.6065428256988525, "learning_rate": 0.00015335241738689427, "loss": 1.7436, "step": 59745 }, { "epoch": 1.99, "grad_norm": 0.5903762578964233, "learning_rate": 0.00015334329924063, "loss": 1.6915, "step": 59746 }, { "epoch": 1.99, "grad_norm": 0.6137210130691528, "learning_rate": 0.0001533341812723852, "loss": 1.7677, "step": 59747 }, { "epoch": 1.99, "grad_norm": 0.5972310900688171, "learning_rate": 0.00015332506348217126, "loss": 1.7505, "step": 59748 }, { "epoch": 1.99, "grad_norm": 0.6147395968437195, "learning_rate": 0.00015331594586999895, "loss": 1.6857, "step": 59749 }, { "epoch": 1.99, "grad_norm": 0.5992609262466431, "learning_rate": 0.00015330682843587955, "loss": 1.6612, "step": 59750 }, { "epoch": 1.99, "grad_norm": 4.425258636474609, "learning_rate": 0.0001532977111798241, "loss": 1.6979, "step": 59751 }, { "epoch": 1.99, "grad_norm": 0.6085082292556763, "learning_rate": 0.00015328859410184347, "loss": 1.6476, "step": 59752 }, { "epoch": 1.99, "grad_norm": 0.6091211438179016, "learning_rate": 0.00015327947720194897, "loss": 1.7629, "step": 59753 }, { "epoch": 1.99, "grad_norm": 0.6139100193977356, "learning_rate": 0.00015327036048015148, "loss": 1.779, "step": 59754 }, { "epoch": 1.99, "grad_norm": 0.6110336184501648, "learning_rate": 0.00015326124393646225, "loss": 1.7072, "step": 59755 }, { "epoch": 1.99, "grad_norm": 0.6060741543769836, "learning_rate": 0.00015325212757089223, "loss": 1.7752, "step": 59756 }, { "epoch": 1.99, "grad_norm": 0.607900857925415, "learning_rate": 0.00015324301138345243, "loss": 1.8416, "step": 59757 }, { "epoch": 1.99, "grad_norm": 0.619258463382721, "learning_rate": 0.00015323389537415407, "loss": 1.708, "step": 59758 }, { "epoch": 1.99, "grad_norm": 0.6328524947166443, "learning_rate": 0.00015322477954300803, "loss": 1.7996, "step": 59759 }, { "epoch": 1.99, "grad_norm": 0.6036967635154724, "learning_rate": 0.0001532156638900256, "loss": 1.6966, "step": 59760 }, { "epoch": 1.99, "grad_norm": 0.5864855647087097, "learning_rate": 0.00015320654841521764, "loss": 1.7212, "step": 59761 }, { "epoch": 1.99, "grad_norm": 0.6083919405937195, "learning_rate": 0.0001531974331185954, "loss": 1.727, "step": 59762 }, { "epoch": 1.99, "grad_norm": 0.6153901219367981, "learning_rate": 0.00015318831800016983, "loss": 1.7632, "step": 59763 }, { "epoch": 1.99, "grad_norm": 0.6033831238746643, "learning_rate": 0.0001531792030599519, "loss": 1.7874, "step": 59764 }, { "epoch": 1.99, "grad_norm": 0.6087221503257751, "learning_rate": 0.00015317008829795293, "loss": 1.7289, "step": 59765 }, { "epoch": 1.99, "grad_norm": 0.5979778170585632, "learning_rate": 0.00015316097371418378, "loss": 1.712, "step": 59766 }, { "epoch": 1.99, "grad_norm": 0.6113152503967285, "learning_rate": 0.00015315185930865548, "loss": 1.7459, "step": 59767 }, { "epoch": 1.99, "grad_norm": 0.5852436423301697, "learning_rate": 0.00015314274508137918, "loss": 1.7314, "step": 59768 }, { "epoch": 1.99, "grad_norm": 0.5840221047401428, "learning_rate": 0.00015313363103236608, "loss": 1.6365, "step": 59769 }, { "epoch": 1.99, "grad_norm": 0.5842517614364624, "learning_rate": 0.00015312451716162711, "loss": 1.6915, "step": 59770 }, { "epoch": 1.99, "grad_norm": 0.664970874786377, "learning_rate": 0.00015311540346917322, "loss": 1.7194, "step": 59771 }, { "epoch": 1.99, "grad_norm": 0.6023766994476318, "learning_rate": 0.00015310628995501572, "loss": 1.6623, "step": 59772 }, { "epoch": 1.99, "grad_norm": 0.5996789932250977, "learning_rate": 0.00015309717661916552, "loss": 1.8058, "step": 59773 }, { "epoch": 1.99, "grad_norm": 0.625644862651825, "learning_rate": 0.00015308806346163358, "loss": 1.7955, "step": 59774 }, { "epoch": 1.99, "grad_norm": 0.6328991651535034, "learning_rate": 0.00015307895048243108, "loss": 1.7497, "step": 59775 }, { "epoch": 1.99, "grad_norm": 0.5964829325675964, "learning_rate": 0.00015306983768156935, "loss": 1.7913, "step": 59776 }, { "epoch": 1.99, "grad_norm": 0.6032319664955139, "learning_rate": 0.00015306072505905894, "loss": 1.6614, "step": 59777 }, { "epoch": 1.99, "grad_norm": 0.6253906488418579, "learning_rate": 0.00015305161261491116, "loss": 1.6943, "step": 59778 }, { "epoch": 1.99, "grad_norm": 0.6099511384963989, "learning_rate": 0.0001530425003491372, "loss": 1.7005, "step": 59779 }, { "epoch": 1.99, "grad_norm": 0.6288910508155823, "learning_rate": 0.00015303338826174796, "loss": 1.7682, "step": 59780 }, { "epoch": 1.99, "grad_norm": 0.6062506437301636, "learning_rate": 0.00015302427635275443, "loss": 1.6683, "step": 59781 }, { "epoch": 1.99, "grad_norm": 0.590472936630249, "learning_rate": 0.00015301516462216778, "loss": 1.7293, "step": 59782 }, { "epoch": 1.99, "grad_norm": 0.6228043437004089, "learning_rate": 0.0001530060530699993, "loss": 1.7457, "step": 59783 }, { "epoch": 1.99, "grad_norm": 0.5999060273170471, "learning_rate": 0.00015299694169625954, "loss": 1.6718, "step": 59784 }, { "epoch": 1.99, "grad_norm": 0.6114014387130737, "learning_rate": 0.00015298783050095988, "loss": 1.6458, "step": 59785 }, { "epoch": 1.99, "grad_norm": 0.615250825881958, "learning_rate": 0.00015297871948411143, "loss": 1.7477, "step": 59786 }, { "epoch": 1.99, "grad_norm": 0.5980390310287476, "learning_rate": 0.00015296960864572513, "loss": 1.7201, "step": 59787 }, { "epoch": 1.99, "grad_norm": 0.596322238445282, "learning_rate": 0.00015296049798581198, "loss": 1.7209, "step": 59788 }, { "epoch": 1.99, "grad_norm": 0.626858115196228, "learning_rate": 0.00015295138750438312, "loss": 1.746, "step": 59789 }, { "epoch": 1.99, "grad_norm": 0.6104669570922852, "learning_rate": 0.00015294227720144982, "loss": 1.6929, "step": 59790 }, { "epoch": 1.99, "grad_norm": 0.6397727727890015, "learning_rate": 0.0001529331670770227, "loss": 1.7881, "step": 59791 }, { "epoch": 1.99, "grad_norm": 0.6236669421195984, "learning_rate": 0.00015292405713111307, "loss": 1.7166, "step": 59792 }, { "epoch": 1.99, "grad_norm": 0.5987226963043213, "learning_rate": 0.00015291494736373207, "loss": 1.6703, "step": 59793 }, { "epoch": 1.99, "grad_norm": 0.6253364086151123, "learning_rate": 0.00015290583777489064, "loss": 1.8024, "step": 59794 }, { "epoch": 1.99, "grad_norm": 0.6097957491874695, "learning_rate": 0.00015289672836459975, "loss": 1.6847, "step": 59795 }, { "epoch": 1.99, "grad_norm": 0.6039647459983826, "learning_rate": 0.0001528876191328706, "loss": 1.6553, "step": 59796 }, { "epoch": 1.99, "grad_norm": 0.6594818830490112, "learning_rate": 0.00015287851007971432, "loss": 1.765, "step": 59797 }, { "epoch": 1.99, "grad_norm": 0.5966539978981018, "learning_rate": 0.00015286940120514165, "loss": 1.7751, "step": 59798 }, { "epoch": 1.99, "grad_norm": 0.6111415028572083, "learning_rate": 0.0001528602925091639, "loss": 1.7393, "step": 59799 }, { "epoch": 1.99, "grad_norm": 0.6403798460960388, "learning_rate": 0.00015285118399179214, "loss": 1.7324, "step": 59800 }, { "epoch": 1.99, "grad_norm": 0.6349837779998779, "learning_rate": 0.0001528420756530374, "loss": 1.7523, "step": 59801 }, { "epoch": 1.99, "grad_norm": 0.608503520488739, "learning_rate": 0.00015283296749291056, "loss": 1.8393, "step": 59802 }, { "epoch": 1.99, "grad_norm": 0.610174298286438, "learning_rate": 0.00015282385951142296, "loss": 1.6603, "step": 59803 }, { "epoch": 1.99, "grad_norm": 0.6027151942253113, "learning_rate": 0.0001528147517085855, "loss": 1.7226, "step": 59804 }, { "epoch": 1.99, "grad_norm": 0.6254961490631104, "learning_rate": 0.00015280564408440912, "loss": 1.688, "step": 59805 }, { "epoch": 1.99, "grad_norm": 0.603091835975647, "learning_rate": 0.0001527965366389051, "loss": 1.7175, "step": 59806 }, { "epoch": 1.99, "grad_norm": 0.604857325553894, "learning_rate": 0.0001527874293720843, "loss": 1.7229, "step": 59807 }, { "epoch": 1.99, "grad_norm": 0.6261650323867798, "learning_rate": 0.000152778322283958, "loss": 1.7441, "step": 59808 }, { "epoch": 1.99, "grad_norm": 0.5994437336921692, "learning_rate": 0.00015276921537453694, "loss": 1.7063, "step": 59809 }, { "epoch": 1.99, "grad_norm": 0.6425709128379822, "learning_rate": 0.00015276010864383256, "loss": 1.7505, "step": 59810 }, { "epoch": 1.99, "grad_norm": 0.6212832927703857, "learning_rate": 0.00015275100209185565, "loss": 1.7546, "step": 59811 }, { "epoch": 1.99, "grad_norm": 0.6072742938995361, "learning_rate": 0.00015274189571861725, "loss": 1.6977, "step": 59812 }, { "epoch": 1.99, "grad_norm": 0.6158303618431091, "learning_rate": 0.0001527327895241286, "loss": 1.8257, "step": 59813 }, { "epoch": 1.99, "grad_norm": 0.6245070695877075, "learning_rate": 0.00015272368350840052, "loss": 1.7517, "step": 59814 }, { "epoch": 1.99, "grad_norm": 0.582664430141449, "learning_rate": 0.00015271457767144432, "loss": 1.7227, "step": 59815 }, { "epoch": 1.99, "grad_norm": 0.6147477626800537, "learning_rate": 0.0001527054720132709, "loss": 1.7499, "step": 59816 }, { "epoch": 1.99, "grad_norm": 0.5945436358451843, "learning_rate": 0.00015269636653389123, "loss": 1.7389, "step": 59817 }, { "epoch": 1.99, "grad_norm": 0.628189206123352, "learning_rate": 0.0001526872612333166, "loss": 1.7602, "step": 59818 }, { "epoch": 1.99, "grad_norm": 0.6291849613189697, "learning_rate": 0.00015267815611155777, "loss": 1.6858, "step": 59819 }, { "epoch": 1.99, "grad_norm": 0.6276566386222839, "learning_rate": 0.00015266905116862608, "loss": 1.7429, "step": 59820 }, { "epoch": 1.99, "grad_norm": 0.6129144430160522, "learning_rate": 0.00015265994640453236, "loss": 1.7416, "step": 59821 }, { "epoch": 1.99, "grad_norm": 0.6208592057228088, "learning_rate": 0.00015265084181928786, "loss": 1.8146, "step": 59822 }, { "epoch": 1.99, "grad_norm": 0.5938396453857422, "learning_rate": 0.00015264173741290354, "loss": 1.784, "step": 59823 }, { "epoch": 1.99, "grad_norm": 0.5943357944488525, "learning_rate": 0.0001526326331853903, "loss": 1.7272, "step": 59824 }, { "epoch": 1.99, "grad_norm": 0.6077789664268494, "learning_rate": 0.00015262352913675945, "loss": 1.7318, "step": 59825 }, { "epoch": 1.99, "grad_norm": 0.6514522433280945, "learning_rate": 0.00015261442526702193, "loss": 1.6796, "step": 59826 }, { "epoch": 1.99, "grad_norm": 0.6153094172477722, "learning_rate": 0.00015260532157618869, "loss": 1.6631, "step": 59827 }, { "epoch": 1.99, "grad_norm": 0.6065728664398193, "learning_rate": 0.00015259621806427084, "loss": 1.7545, "step": 59828 }, { "epoch": 1.99, "grad_norm": 0.6209034323692322, "learning_rate": 0.00015258711473127955, "loss": 1.7631, "step": 59829 }, { "epoch": 1.99, "grad_norm": 0.6065047979354858, "learning_rate": 0.00015257801157722583, "loss": 1.7944, "step": 59830 }, { "epoch": 1.99, "grad_norm": 0.617239773273468, "learning_rate": 0.00015256890860212058, "loss": 1.6917, "step": 59831 }, { "epoch": 1.99, "grad_norm": 0.6533868312835693, "learning_rate": 0.000152559805805975, "loss": 1.7753, "step": 59832 }, { "epoch": 1.99, "grad_norm": 0.6117622256278992, "learning_rate": 0.00015255070318880015, "loss": 1.7772, "step": 59833 }, { "epoch": 1.99, "grad_norm": 0.651084303855896, "learning_rate": 0.0001525416007506069, "loss": 1.7414, "step": 59834 }, { "epoch": 1.99, "grad_norm": 0.6068913340568542, "learning_rate": 0.00015253249849140642, "loss": 1.7471, "step": 59835 }, { "epoch": 1.99, "grad_norm": 0.6013659834861755, "learning_rate": 0.00015252339641120985, "loss": 1.7417, "step": 59836 }, { "epoch": 1.99, "grad_norm": 0.6376093626022339, "learning_rate": 0.00015251429451002812, "loss": 1.7928, "step": 59837 }, { "epoch": 1.99, "grad_norm": 0.6190723180770874, "learning_rate": 0.00015250519278787225, "loss": 1.7672, "step": 59838 }, { "epoch": 1.99, "grad_norm": 0.6328273415565491, "learning_rate": 0.00015249609124475344, "loss": 1.8002, "step": 59839 }, { "epoch": 1.99, "grad_norm": 0.6378727555274963, "learning_rate": 0.00015248698988068265, "loss": 1.7398, "step": 59840 }, { "epoch": 1.99, "grad_norm": 0.6006180644035339, "learning_rate": 0.00015247788869567075, "loss": 1.7717, "step": 59841 }, { "epoch": 1.99, "grad_norm": 0.5852500796318054, "learning_rate": 0.000152468787689729, "loss": 1.6749, "step": 59842 }, { "epoch": 1.99, "grad_norm": 0.6482147574424744, "learning_rate": 0.00015245968686286856, "loss": 1.7301, "step": 59843 }, { "epoch": 1.99, "grad_norm": 0.6068700551986694, "learning_rate": 0.00015245058621510024, "loss": 1.6415, "step": 59844 }, { "epoch": 1.99, "grad_norm": 0.5886829495429993, "learning_rate": 0.00015244148574643509, "loss": 1.699, "step": 59845 }, { "epoch": 1.99, "grad_norm": 0.602959394454956, "learning_rate": 0.00015243238545688435, "loss": 1.7558, "step": 59846 }, { "epoch": 1.99, "grad_norm": 0.6040030121803284, "learning_rate": 0.00015242328534645897, "loss": 1.6627, "step": 59847 }, { "epoch": 1.99, "grad_norm": 0.5795344710350037, "learning_rate": 0.00015241418541516984, "loss": 1.6612, "step": 59848 }, { "epoch": 1.99, "grad_norm": 0.6076956391334534, "learning_rate": 0.00015240508566302815, "loss": 1.7218, "step": 59849 }, { "epoch": 1.99, "grad_norm": 0.5952850580215454, "learning_rate": 0.00015239598609004503, "loss": 1.7067, "step": 59850 }, { "epoch": 1.99, "grad_norm": 0.6072521805763245, "learning_rate": 0.00015238688669623148, "loss": 1.7428, "step": 59851 }, { "epoch": 1.99, "grad_norm": 0.6253664493560791, "learning_rate": 0.00015237778748159836, "loss": 1.7018, "step": 59852 }, { "epoch": 1.99, "grad_norm": 1.5622614622116089, "learning_rate": 0.00015236868844615696, "loss": 1.8314, "step": 59853 }, { "epoch": 1.99, "grad_norm": 0.595112681388855, "learning_rate": 0.00015235958958991825, "loss": 1.7177, "step": 59854 }, { "epoch": 1.99, "grad_norm": 0.5952828526496887, "learning_rate": 0.0001523504909128931, "loss": 1.7171, "step": 59855 }, { "epoch": 1.99, "grad_norm": 0.6157467365264893, "learning_rate": 0.0001523413924150927, "loss": 1.6337, "step": 59856 }, { "epoch": 1.99, "grad_norm": 0.5978900790214539, "learning_rate": 0.0001523322940965282, "loss": 1.6166, "step": 59857 }, { "epoch": 1.99, "grad_norm": 0.5847402215003967, "learning_rate": 0.00015232319595721058, "loss": 1.7443, "step": 59858 }, { "epoch": 1.99, "grad_norm": 0.5967628955841064, "learning_rate": 0.0001523140979971507, "loss": 1.7387, "step": 59859 }, { "epoch": 1.99, "grad_norm": 0.6113021969795227, "learning_rate": 0.00015230500021635987, "loss": 1.7635, "step": 59860 }, { "epoch": 1.99, "grad_norm": 0.6225453019142151, "learning_rate": 0.00015229590261484896, "loss": 1.7812, "step": 59861 }, { "epoch": 1.99, "grad_norm": 0.5978413820266724, "learning_rate": 0.000152286805192629, "loss": 1.7108, "step": 59862 }, { "epoch": 1.99, "grad_norm": 0.6011012196540833, "learning_rate": 0.0001522777079497112, "loss": 1.7746, "step": 59863 }, { "epoch": 1.99, "grad_norm": 0.6224221587181091, "learning_rate": 0.00015226861088610635, "loss": 1.7449, "step": 59864 }, { "epoch": 1.99, "grad_norm": 0.5919440984725952, "learning_rate": 0.0001522595140018258, "loss": 1.6626, "step": 59865 }, { "epoch": 1.99, "grad_norm": 0.6019123196601868, "learning_rate": 0.00015225041729688028, "loss": 1.759, "step": 59866 }, { "epoch": 1.99, "grad_norm": 0.6172223687171936, "learning_rate": 0.00015224132077128115, "loss": 1.6459, "step": 59867 }, { "epoch": 1.99, "grad_norm": 0.6046431660652161, "learning_rate": 0.00015223222442503924, "loss": 1.731, "step": 59868 }, { "epoch": 1.99, "grad_norm": 0.6068094968795776, "learning_rate": 0.0001522231282581655, "loss": 1.7456, "step": 59869 }, { "epoch": 1.99, "grad_norm": 0.6090803146362305, "learning_rate": 0.00015221403227067126, "loss": 1.7225, "step": 59870 }, { "epoch": 1.99, "grad_norm": 0.5980326533317566, "learning_rate": 0.0001522049364625673, "loss": 1.6999, "step": 59871 }, { "epoch": 1.99, "grad_norm": 0.6187777519226074, "learning_rate": 0.00015219584083386487, "loss": 1.7047, "step": 59872 }, { "epoch": 1.99, "grad_norm": 0.6080006957054138, "learning_rate": 0.00015218674538457493, "loss": 1.6359, "step": 59873 }, { "epoch": 1.99, "grad_norm": 0.5889986157417297, "learning_rate": 0.00015217765011470835, "loss": 1.6912, "step": 59874 }, { "epoch": 1.99, "grad_norm": 0.6255099773406982, "learning_rate": 0.00015216855502427648, "loss": 1.6979, "step": 59875 }, { "epoch": 1.99, "grad_norm": 0.6030710339546204, "learning_rate": 0.00015215946011329007, "loss": 1.7196, "step": 59876 }, { "epoch": 1.99, "grad_norm": 0.6045045852661133, "learning_rate": 0.0001521503653817604, "loss": 1.7275, "step": 59877 }, { "epoch": 1.99, "grad_norm": 0.6322401762008667, "learning_rate": 0.0001521412708296983, "loss": 1.7181, "step": 59878 }, { "epoch": 1.99, "grad_norm": 0.5882013440132141, "learning_rate": 0.000152132176457115, "loss": 1.7018, "step": 59879 }, { "epoch": 1.99, "grad_norm": 2.80438494682312, "learning_rate": 0.00015212308226402146, "loss": 1.7065, "step": 59880 }, { "epoch": 1.99, "grad_norm": 0.6093392372131348, "learning_rate": 0.0001521139882504286, "loss": 1.7426, "step": 59881 }, { "epoch": 1.99, "grad_norm": 0.6087260842323303, "learning_rate": 0.0001521048944163477, "loss": 1.7367, "step": 59882 }, { "epoch": 1.99, "grad_norm": 0.5919033288955688, "learning_rate": 0.00015209580076178965, "loss": 1.7244, "step": 59883 }, { "epoch": 1.99, "grad_norm": 0.6054392457008362, "learning_rate": 0.00015208670728676538, "loss": 1.6241, "step": 59884 }, { "epoch": 1.99, "grad_norm": 0.611976146697998, "learning_rate": 0.0001520776139912861, "loss": 1.6908, "step": 59885 }, { "epoch": 1.99, "grad_norm": 0.603916585445404, "learning_rate": 0.00015206852087536286, "loss": 1.7757, "step": 59886 }, { "epoch": 1.99, "grad_norm": 0.6123679280281067, "learning_rate": 0.00015205942793900665, "loss": 1.6839, "step": 59887 }, { "epoch": 1.99, "grad_norm": 0.5921382308006287, "learning_rate": 0.0001520503351822284, "loss": 1.7066, "step": 59888 }, { "epoch": 1.99, "grad_norm": 0.5990267992019653, "learning_rate": 0.00015204124260503936, "loss": 1.8066, "step": 59889 }, { "epoch": 1.99, "grad_norm": 0.5869538187980652, "learning_rate": 0.00015203215020745042, "loss": 1.7992, "step": 59890 }, { "epoch": 1.99, "grad_norm": 0.575925886631012, "learning_rate": 0.00015202305798947254, "loss": 1.7497, "step": 59891 }, { "epoch": 1.99, "grad_norm": 0.6117789149284363, "learning_rate": 0.0001520139659511169, "loss": 1.7173, "step": 59892 }, { "epoch": 1.99, "grad_norm": 0.6040624380111694, "learning_rate": 0.00015200487409239473, "loss": 1.7161, "step": 59893 }, { "epoch": 1.99, "grad_norm": 0.5973864793777466, "learning_rate": 0.00015199578241331657, "loss": 1.722, "step": 59894 }, { "epoch": 1.99, "grad_norm": 0.5944528579711914, "learning_rate": 0.00015198669091389373, "loss": 1.7311, "step": 59895 }, { "epoch": 1.99, "grad_norm": 0.6052952408790588, "learning_rate": 0.00015197759959413736, "loss": 1.7485, "step": 59896 }, { "epoch": 1.99, "grad_norm": 0.603898286819458, "learning_rate": 0.0001519685084540584, "loss": 1.6847, "step": 59897 }, { "epoch": 1.99, "grad_norm": 0.6279658675193787, "learning_rate": 0.0001519594174936677, "loss": 1.7641, "step": 59898 }, { "epoch": 1.99, "grad_norm": 0.6027080416679382, "learning_rate": 0.00015195032671297648, "loss": 1.7264, "step": 59899 }, { "epoch": 1.99, "grad_norm": 0.6288487911224365, "learning_rate": 0.00015194123611199598, "loss": 1.725, "step": 59900 }, { "epoch": 1.99, "grad_norm": 0.6037801504135132, "learning_rate": 0.00015193214569073676, "loss": 1.7497, "step": 59901 }, { "epoch": 1.99, "grad_norm": 0.5938424468040466, "learning_rate": 0.00015192305544921007, "loss": 1.7204, "step": 59902 }, { "epoch": 1.99, "grad_norm": 0.5730493068695068, "learning_rate": 0.00015191396538742716, "loss": 1.7172, "step": 59903 }, { "epoch": 1.99, "grad_norm": 0.6208409667015076, "learning_rate": 0.00015190487550539885, "loss": 1.6483, "step": 59904 }, { "epoch": 1.99, "grad_norm": 0.6028932929039001, "learning_rate": 0.00015189578580313608, "loss": 1.7084, "step": 59905 }, { "epoch": 1.99, "grad_norm": 0.6147067546844482, "learning_rate": 0.00015188669628065, "loss": 1.8352, "step": 59906 }, { "epoch": 1.99, "grad_norm": 0.6080551743507385, "learning_rate": 0.00015187760693795185, "loss": 1.7607, "step": 59907 }, { "epoch": 1.99, "grad_norm": 0.6462409496307373, "learning_rate": 0.00015186851777505226, "loss": 1.7326, "step": 59908 }, { "epoch": 1.99, "grad_norm": 0.6144606471061707, "learning_rate": 0.00015185942879196245, "loss": 1.7114, "step": 59909 }, { "epoch": 1.99, "grad_norm": 0.608143150806427, "learning_rate": 0.00015185033998869362, "loss": 1.7877, "step": 59910 }, { "epoch": 1.99, "grad_norm": 0.618529200553894, "learning_rate": 0.0001518412513652566, "loss": 1.6465, "step": 59911 }, { "epoch": 1.99, "grad_norm": 0.6222443580627441, "learning_rate": 0.0001518321629216624, "loss": 1.7603, "step": 59912 }, { "epoch": 1.99, "grad_norm": 0.6030773520469666, "learning_rate": 0.0001518230746579222, "loss": 1.7447, "step": 59913 }, { "epoch": 1.99, "grad_norm": 0.6165046095848083, "learning_rate": 0.00015181398657404698, "loss": 1.7146, "step": 59914 }, { "epoch": 1.99, "grad_norm": 0.6329008936882019, "learning_rate": 0.0001518048986700476, "loss": 1.7328, "step": 59915 }, { "epoch": 1.99, "grad_norm": 0.6067973971366882, "learning_rate": 0.00015179581094593525, "loss": 1.7203, "step": 59916 }, { "epoch": 1.99, "grad_norm": 0.6115908622741699, "learning_rate": 0.0001517867234017211, "loss": 1.7119, "step": 59917 }, { "epoch": 1.99, "grad_norm": 0.6332417726516724, "learning_rate": 0.00015177763603741598, "loss": 1.7928, "step": 59918 }, { "epoch": 1.99, "grad_norm": 0.6192002892494202, "learning_rate": 0.00015176854885303088, "loss": 1.7688, "step": 59919 }, { "epoch": 1.99, "grad_norm": 0.6017873883247375, "learning_rate": 0.00015175946184857702, "loss": 1.7604, "step": 59920 }, { "epoch": 1.99, "grad_norm": 0.6228309869766235, "learning_rate": 0.00015175037502406533, "loss": 1.7408, "step": 59921 }, { "epoch": 1.99, "grad_norm": 0.5937238931655884, "learning_rate": 0.00015174128837950674, "loss": 1.7696, "step": 59922 }, { "epoch": 1.99, "grad_norm": 0.5873517394065857, "learning_rate": 0.00015173220191491254, "loss": 1.7593, "step": 59923 }, { "epoch": 1.99, "grad_norm": 0.618162214756012, "learning_rate": 0.00015172311563029344, "loss": 1.8577, "step": 59924 }, { "epoch": 1.99, "grad_norm": 0.6122264862060547, "learning_rate": 0.00015171402952566073, "loss": 1.6889, "step": 59925 }, { "epoch": 1.99, "grad_norm": 0.6179199814796448, "learning_rate": 0.0001517049436010252, "loss": 1.7899, "step": 59926 }, { "epoch": 1.99, "grad_norm": 0.610069990158081, "learning_rate": 0.00015169585785639823, "loss": 1.7397, "step": 59927 }, { "epoch": 1.99, "grad_norm": 0.6152095198631287, "learning_rate": 0.00015168677229179055, "loss": 1.7826, "step": 59928 }, { "epoch": 1.99, "grad_norm": 1.3013875484466553, "learning_rate": 0.0001516776869072132, "loss": 1.7696, "step": 59929 }, { "epoch": 1.99, "grad_norm": 0.6088608503341675, "learning_rate": 0.00015166860170267736, "loss": 1.741, "step": 59930 }, { "epoch": 1.99, "grad_norm": 0.5913773775100708, "learning_rate": 0.0001516595166781939, "loss": 1.6605, "step": 59931 }, { "epoch": 1.99, "grad_norm": 0.6293506026268005, "learning_rate": 0.0001516504318337741, "loss": 1.7765, "step": 59932 }, { "epoch": 1.99, "grad_norm": 0.6240285634994507, "learning_rate": 0.00015164134716942872, "loss": 1.6911, "step": 59933 }, { "epoch": 1.99, "grad_norm": 0.6106643676757812, "learning_rate": 0.0001516322626851688, "loss": 1.6929, "step": 59934 }, { "epoch": 1.99, "grad_norm": 0.5951381325721741, "learning_rate": 0.00015162317838100559, "loss": 1.6659, "step": 59935 }, { "epoch": 1.99, "grad_norm": 0.6175088882446289, "learning_rate": 0.00015161409425694982, "loss": 1.7314, "step": 59936 }, { "epoch": 1.99, "grad_norm": 0.6003652215003967, "learning_rate": 0.0001516050103130128, "loss": 1.6955, "step": 59937 }, { "epoch": 1.99, "grad_norm": 0.5881338715553284, "learning_rate": 0.00015159592654920536, "loss": 1.7169, "step": 59938 }, { "epoch": 1.99, "grad_norm": 0.6391560435295105, "learning_rate": 0.0001515868429655387, "loss": 1.7239, "step": 59939 }, { "epoch": 1.99, "grad_norm": 0.6051037311553955, "learning_rate": 0.00015157775956202373, "loss": 1.736, "step": 59940 }, { "epoch": 1.99, "grad_norm": 0.6135575175285339, "learning_rate": 0.0001515686763386714, "loss": 1.6867, "step": 59941 }, { "epoch": 1.99, "grad_norm": 0.6024516820907593, "learning_rate": 0.0001515595932954929, "loss": 1.6606, "step": 59942 }, { "epoch": 1.99, "grad_norm": 0.5895205736160278, "learning_rate": 0.00015155051043249922, "loss": 1.7042, "step": 59943 }, { "epoch": 1.99, "grad_norm": 0.6117933988571167, "learning_rate": 0.0001515414277497012, "loss": 1.7315, "step": 59944 }, { "epoch": 1.99, "grad_norm": 0.610114574432373, "learning_rate": 0.00015153234524711007, "loss": 1.6631, "step": 59945 }, { "epoch": 1.99, "grad_norm": 0.6139802932739258, "learning_rate": 0.00015152326292473687, "loss": 1.7113, "step": 59946 }, { "epoch": 1.99, "grad_norm": 0.6497924327850342, "learning_rate": 0.00015151418078259254, "loss": 1.7627, "step": 59947 }, { "epoch": 1.99, "grad_norm": 0.6031321287155151, "learning_rate": 0.000151505098820688, "loss": 1.7514, "step": 59948 }, { "epoch": 1.99, "grad_norm": 0.5781720876693726, "learning_rate": 0.00015149601703903453, "loss": 1.71, "step": 59949 }, { "epoch": 1.99, "grad_norm": 0.6283918619155884, "learning_rate": 0.000151486935437643, "loss": 1.8071, "step": 59950 }, { "epoch": 1.99, "grad_norm": 0.6223295331001282, "learning_rate": 0.00015147785401652437, "loss": 1.7666, "step": 59951 }, { "epoch": 1.99, "grad_norm": 0.6128430366516113, "learning_rate": 0.0001514687727756897, "loss": 1.7965, "step": 59952 }, { "epoch": 1.99, "grad_norm": 0.5895830392837524, "learning_rate": 0.00015145969171515017, "loss": 1.7638, "step": 59953 }, { "epoch": 1.99, "grad_norm": 0.6234570741653442, "learning_rate": 0.0001514506108349167, "loss": 1.8227, "step": 59954 }, { "epoch": 1.99, "grad_norm": 0.6151039600372314, "learning_rate": 0.0001514415301350002, "loss": 1.746, "step": 59955 }, { "epoch": 1.99, "grad_norm": 0.6076898574829102, "learning_rate": 0.0001514324496154119, "loss": 1.7859, "step": 59956 }, { "epoch": 1.99, "grad_norm": 0.6062235832214355, "learning_rate": 0.00015142336927616268, "loss": 1.7649, "step": 59957 }, { "epoch": 1.99, "grad_norm": 0.5964785218238831, "learning_rate": 0.00015141428911726352, "loss": 1.6865, "step": 59958 }, { "epoch": 1.99, "grad_norm": 0.6120803952217102, "learning_rate": 0.0001514052091387255, "loss": 1.7564, "step": 59959 }, { "epoch": 1.99, "grad_norm": 0.6138738989830017, "learning_rate": 0.0001513961293405598, "loss": 1.7134, "step": 59960 }, { "epoch": 1.99, "grad_norm": 0.6027318835258484, "learning_rate": 0.00015138704972277728, "loss": 1.7547, "step": 59961 }, { "epoch": 1.99, "grad_norm": 0.602347195148468, "learning_rate": 0.00015137797028538888, "loss": 1.7275, "step": 59962 }, { "epoch": 1.99, "grad_norm": 0.6114287972450256, "learning_rate": 0.00015136889102840582, "loss": 1.6553, "step": 59963 }, { "epoch": 2.0, "grad_norm": 0.595673680305481, "learning_rate": 0.00015135981195183907, "loss": 1.8212, "step": 59964 }, { "epoch": 2.0, "grad_norm": 0.6290754675865173, "learning_rate": 0.00015135073305569944, "loss": 1.7882, "step": 59965 }, { "epoch": 2.0, "grad_norm": 0.5890951752662659, "learning_rate": 0.00015134165433999816, "loss": 1.7918, "step": 59966 }, { "epoch": 2.0, "grad_norm": 0.6162616014480591, "learning_rate": 0.00015133257580474634, "loss": 1.7446, "step": 59967 }, { "epoch": 2.0, "grad_norm": 0.5992260575294495, "learning_rate": 0.00015132349744995482, "loss": 1.7131, "step": 59968 }, { "epoch": 2.0, "grad_norm": 0.6015775203704834, "learning_rate": 0.00015131441927563455, "loss": 1.6658, "step": 59969 }, { "epoch": 2.0, "grad_norm": 0.6044572591781616, "learning_rate": 0.00015130534128179684, "loss": 1.7353, "step": 59970 }, { "epoch": 2.0, "grad_norm": 0.6108770370483398, "learning_rate": 0.00015129626346845247, "loss": 1.7805, "step": 59971 }, { "epoch": 2.0, "grad_norm": 0.5992788076400757, "learning_rate": 0.00015128718583561245, "loss": 1.6776, "step": 59972 }, { "epoch": 2.0, "grad_norm": 0.6156718134880066, "learning_rate": 0.00015127810838328786, "loss": 1.7248, "step": 59973 }, { "epoch": 2.0, "grad_norm": 0.5991492867469788, "learning_rate": 0.00015126903111148987, "loss": 1.7404, "step": 59974 }, { "epoch": 2.0, "grad_norm": 0.6198238134384155, "learning_rate": 0.00015125995402022932, "loss": 1.714, "step": 59975 }, { "epoch": 2.0, "grad_norm": 0.6160191893577576, "learning_rate": 0.0001512508771095172, "loss": 1.7183, "step": 59976 }, { "epoch": 2.0, "grad_norm": 0.5861296057701111, "learning_rate": 0.00015124180037936468, "loss": 1.7151, "step": 59977 }, { "epoch": 2.0, "grad_norm": 0.5961844325065613, "learning_rate": 0.0001512327238297827, "loss": 1.6707, "step": 59978 }, { "epoch": 2.0, "grad_norm": 0.6272200345993042, "learning_rate": 0.0001512236474607822, "loss": 1.7826, "step": 59979 }, { "epoch": 2.0, "grad_norm": 0.580485463142395, "learning_rate": 0.00015121457127237434, "loss": 1.6577, "step": 59980 }, { "epoch": 2.0, "grad_norm": 0.6101312637329102, "learning_rate": 0.00015120549526456998, "loss": 1.7342, "step": 59981 }, { "epoch": 2.0, "grad_norm": 0.5991806983947754, "learning_rate": 0.00015119641943738032, "loss": 1.6885, "step": 59982 }, { "epoch": 2.0, "grad_norm": 0.6142104268074036, "learning_rate": 0.00015118734379081616, "loss": 1.6467, "step": 59983 }, { "epoch": 2.0, "grad_norm": 0.6204186081886292, "learning_rate": 0.00015117826832488875, "loss": 1.7283, "step": 59984 }, { "epoch": 2.0, "grad_norm": 0.5904033780097961, "learning_rate": 0.00015116919303960903, "loss": 1.741, "step": 59985 }, { "epoch": 2.0, "grad_norm": 0.6043970584869385, "learning_rate": 0.00015116011793498782, "loss": 1.7636, "step": 59986 }, { "epoch": 2.0, "grad_norm": 0.5919092893600464, "learning_rate": 0.00015115104301103647, "loss": 1.7085, "step": 59987 }, { "epoch": 2.0, "grad_norm": 0.6073580384254456, "learning_rate": 0.00015114196826776564, "loss": 1.7251, "step": 59988 }, { "epoch": 2.0, "grad_norm": 0.6137391328811646, "learning_rate": 0.00015113289370518665, "loss": 1.7614, "step": 59989 }, { "epoch": 2.0, "grad_norm": 0.613213837146759, "learning_rate": 0.00015112381932331045, "loss": 1.7113, "step": 59990 }, { "epoch": 2.0, "grad_norm": 0.6264559626579285, "learning_rate": 0.00015111474512214786, "loss": 1.7208, "step": 59991 }, { "epoch": 2.0, "grad_norm": 0.6018765568733215, "learning_rate": 0.00015110567110171013, "loss": 1.7439, "step": 59992 }, { "epoch": 2.0, "grad_norm": 0.6223489046096802, "learning_rate": 0.00015109659726200808, "loss": 1.5967, "step": 59993 }, { "epoch": 2.0, "grad_norm": 0.6056574583053589, "learning_rate": 0.0001510875236030529, "loss": 1.6553, "step": 59994 }, { "epoch": 2.0, "grad_norm": 0.6023274064064026, "learning_rate": 0.00015107845012485543, "loss": 1.7215, "step": 59995 }, { "epoch": 2.0, "grad_norm": 0.63282710313797, "learning_rate": 0.00015106937682742691, "loss": 1.7788, "step": 59996 }, { "epoch": 2.0, "grad_norm": 0.622143566608429, "learning_rate": 0.00015106030371077824, "loss": 1.7058, "step": 59997 }, { "epoch": 2.0, "grad_norm": 0.5918899178504944, "learning_rate": 0.00015105123077492025, "loss": 1.7242, "step": 59998 }, { "epoch": 2.0, "grad_norm": 0.584381639957428, "learning_rate": 0.00015104215801986428, "loss": 1.6967, "step": 59999 }, { "epoch": 2.0, "grad_norm": 0.621743381023407, "learning_rate": 0.00015103308544562113, "loss": 1.7215, "step": 60000 }, { "epoch": 2.0, "grad_norm": 0.5934115052223206, "learning_rate": 0.0001510240130522018, "loss": 1.6859, "step": 60001 }, { "epoch": 2.0, "grad_norm": 0.5934209227561951, "learning_rate": 0.00015101494083961736, "loss": 1.6674, "step": 60002 }, { "epoch": 2.0, "grad_norm": 0.6231840252876282, "learning_rate": 0.00015100586880787892, "loss": 1.6868, "step": 60003 }, { "epoch": 2.0, "grad_norm": 0.6087573766708374, "learning_rate": 0.00015099679695699741, "loss": 1.7305, "step": 60004 }, { "epoch": 2.0, "grad_norm": 0.6212121248245239, "learning_rate": 0.00015098772528698373, "loss": 1.729, "step": 60005 }, { "epoch": 2.0, "grad_norm": 0.6005017757415771, "learning_rate": 0.0001509786537978491, "loss": 1.7789, "step": 60006 }, { "epoch": 2.0, "grad_norm": 0.6128495335578918, "learning_rate": 0.0001509695824896044, "loss": 1.7021, "step": 60007 }, { "epoch": 2.0, "grad_norm": 0.6151067018508911, "learning_rate": 0.00015096051136226062, "loss": 1.7649, "step": 60008 }, { "epoch": 2.0, "grad_norm": 0.5997098088264465, "learning_rate": 0.00015095144041582875, "loss": 1.7365, "step": 60009 }, { "epoch": 2.0, "grad_norm": 0.932295560836792, "learning_rate": 0.00015094236965032014, "loss": 1.748, "step": 60010 }, { "epoch": 2.0, "grad_norm": 0.6226831078529358, "learning_rate": 0.0001509332990657453, "loss": 1.7343, "step": 60011 }, { "epoch": 2.0, "grad_norm": 0.613437294960022, "learning_rate": 0.00015092422866211548, "loss": 1.7013, "step": 60012 }, { "epoch": 2.0, "grad_norm": 0.6020416021347046, "learning_rate": 0.00015091515843944174, "loss": 1.7379, "step": 60013 }, { "epoch": 2.0, "grad_norm": 0.6181198358535767, "learning_rate": 0.00015090608839773512, "loss": 1.7318, "step": 60014 }, { "epoch": 2.0, "grad_norm": 0.6113527417182922, "learning_rate": 0.00015089701853700636, "loss": 1.6981, "step": 60015 }, { "epoch": 2.0, "grad_norm": 0.5896982550621033, "learning_rate": 0.0001508879488572667, "loss": 1.7758, "step": 60016 }, { "epoch": 2.0, "grad_norm": 0.6052857041358948, "learning_rate": 0.00015087887935852728, "loss": 1.7244, "step": 60017 }, { "epoch": 2.0, "grad_norm": 0.5885829925537109, "learning_rate": 0.00015086981004079871, "loss": 1.7103, "step": 60018 }, { "epoch": 2.0, "grad_norm": 0.5905928015708923, "learning_rate": 0.00015086074090409223, "loss": 1.6323, "step": 60019 }, { "epoch": 2.0, "grad_norm": 0.5892515778541565, "learning_rate": 0.00015085167194841894, "loss": 1.7135, "step": 60020 }, { "epoch": 2.0, "grad_norm": 0.6157000064849854, "learning_rate": 0.00015084260317378977, "loss": 1.7424, "step": 60021 }, { "epoch": 2.0, "grad_norm": 0.5881361365318298, "learning_rate": 0.00015083353458021555, "loss": 1.6572, "step": 60022 }, { "epoch": 2.0, "grad_norm": 0.6226689219474792, "learning_rate": 0.00015082446616770741, "loss": 1.6883, "step": 60023 }, { "epoch": 2.0, "grad_norm": 0.6241903901100159, "learning_rate": 0.0001508153979362767, "loss": 1.6289, "step": 60024 }, { "epoch": 2.0, "grad_norm": 0.6085041761398315, "learning_rate": 0.00015080632988593383, "loss": 1.8, "step": 60025 }, { "epoch": 2.0, "grad_norm": 2.1129798889160156, "learning_rate": 0.00015079726201669012, "loss": 1.8225, "step": 60026 }, { "epoch": 2.0, "grad_norm": 0.5971353054046631, "learning_rate": 0.00015078819432855667, "loss": 1.7065, "step": 60027 }, { "epoch": 2.0, "grad_norm": 0.6061505079269409, "learning_rate": 0.00015077912682154437, "loss": 1.7001, "step": 60028 }, { "epoch": 2.0, "grad_norm": 0.5953531265258789, "learning_rate": 0.0001507700594956641, "loss": 1.8005, "step": 60029 }, { "epoch": 2.0, "grad_norm": 0.6131647229194641, "learning_rate": 0.00015076099235092707, "loss": 1.7054, "step": 60030 }, { "epoch": 2.0, "grad_norm": 0.5894837379455566, "learning_rate": 0.00015075192538734427, "loss": 1.6929, "step": 60031 }, { "epoch": 2.0, "grad_norm": 0.6199395060539246, "learning_rate": 0.00015074285860492648, "loss": 1.6894, "step": 60032 }, { "epoch": 2.0, "grad_norm": 0.6117421388626099, "learning_rate": 0.00015073379200368487, "loss": 1.727, "step": 60033 }, { "epoch": 2.0, "grad_norm": 0.6050604581832886, "learning_rate": 0.00015072472558363053, "loss": 1.7066, "step": 60034 }, { "epoch": 2.0, "grad_norm": 0.5857786536216736, "learning_rate": 0.00015071565934477444, "loss": 1.669, "step": 60035 }, { "epoch": 2.0, "grad_norm": 0.5847418308258057, "learning_rate": 0.0001507065932871274, "loss": 1.6745, "step": 60036 }, { "epoch": 2.0, "grad_norm": 0.5978551506996155, "learning_rate": 0.00015069752741070068, "loss": 1.7087, "step": 60037 }, { "epoch": 2.0, "grad_norm": 0.6030404567718506, "learning_rate": 0.00015068846171550516, "loss": 1.6895, "step": 60038 }, { "epoch": 2.0, "grad_norm": 0.6373453736305237, "learning_rate": 0.00015067939620155174, "loss": 1.7727, "step": 60039 }, { "epoch": 2.0, "grad_norm": 0.6172735691070557, "learning_rate": 0.00015067033086885162, "loss": 1.7953, "step": 60040 }, { "epoch": 2.0, "grad_norm": 0.5830224752426147, "learning_rate": 0.0001506612657174156, "loss": 1.658, "step": 60041 }, { "epoch": 2.0, "grad_norm": 0.6120754480361938, "learning_rate": 0.00015065220074725495, "loss": 1.7161, "step": 60042 }, { "epoch": 2.0, "grad_norm": 0.595112144947052, "learning_rate": 0.00015064313595838043, "loss": 1.7487, "step": 60043 }, { "epoch": 2.0, "grad_norm": 0.6184566617012024, "learning_rate": 0.00015063407135080322, "loss": 1.7462, "step": 60044 }, { "epoch": 2.0, "grad_norm": 0.5962333083152771, "learning_rate": 0.0001506250069245342, "loss": 1.7241, "step": 60045 }, { "epoch": 2.0, "grad_norm": 0.5933297276496887, "learning_rate": 0.00015061594267958434, "loss": 1.7803, "step": 60046 }, { "epoch": 2.0, "grad_norm": 0.5849518179893494, "learning_rate": 0.0001506068786159648, "loss": 1.7492, "step": 60047 }, { "epoch": 2.0, "grad_norm": 0.6097337007522583, "learning_rate": 0.00015059781473368642, "loss": 1.7611, "step": 60048 }, { "epoch": 2.0, "grad_norm": 0.597716212272644, "learning_rate": 0.00015058875103276034, "loss": 1.777, "step": 60049 }, { "epoch": 2.0, "grad_norm": 0.6018591523170471, "learning_rate": 0.00015057968751319755, "loss": 1.7152, "step": 60050 }, { "epoch": 2.0, "grad_norm": 0.6345246434211731, "learning_rate": 0.00015057062417500887, "loss": 1.6793, "step": 60051 }, { "epoch": 2.0, "grad_norm": 0.6181885600090027, "learning_rate": 0.00015056156101820556, "loss": 1.8464, "step": 60052 }, { "epoch": 2.0, "grad_norm": 0.6149420738220215, "learning_rate": 0.0001505524980427984, "loss": 1.7085, "step": 60053 }, { "epoch": 2.0, "grad_norm": 0.6241090297698975, "learning_rate": 0.00015054343524879856, "loss": 1.7563, "step": 60054 }, { "epoch": 2.0, "grad_norm": 0.5931819081306458, "learning_rate": 0.0001505343726362169, "loss": 1.7049, "step": 60055 }, { "epoch": 2.0, "grad_norm": 0.6105698347091675, "learning_rate": 0.00015052531020506456, "loss": 1.7256, "step": 60056 }, { "epoch": 2.0, "grad_norm": 0.5975303649902344, "learning_rate": 0.0001505162479553525, "loss": 1.7396, "step": 60057 }, { "epoch": 2.0, "grad_norm": 0.6074296236038208, "learning_rate": 0.00015050718588709155, "loss": 1.6652, "step": 60058 }, { "epoch": 2.0, "grad_norm": 0.6442152261734009, "learning_rate": 0.00015049812400029297, "loss": 1.7277, "step": 60059 }, { "epoch": 2.0, "grad_norm": 0.6017526984214783, "learning_rate": 0.00015048906229496765, "loss": 1.6692, "step": 60060 }, { "epoch": 2.0, "grad_norm": 0.619910478591919, "learning_rate": 0.00015048000077112647, "loss": 1.7483, "step": 60061 }, { "epoch": 2.0, "grad_norm": 0.6182032227516174, "learning_rate": 0.0001504709394287805, "loss": 1.7254, "step": 60062 }, { "epoch": 2.0, "grad_norm": 0.5987672209739685, "learning_rate": 0.0001504618782679409, "loss": 1.6331, "step": 60063 }, { "epoch": 2.0, "grad_norm": 0.6105146408081055, "learning_rate": 0.0001504528172886186, "loss": 1.6942, "step": 60064 }, { "epoch": 2.0, "grad_norm": 0.6141668558120728, "learning_rate": 0.0001504437564908244, "loss": 1.7494, "step": 60065 }, { "epoch": 2.0, "grad_norm": 0.621819019317627, "learning_rate": 0.00015043469587456955, "loss": 1.7382, "step": 60066 }, { "epoch": 2.0, "grad_norm": 0.614029049873352, "learning_rate": 0.00015042563543986496, "loss": 1.7722, "step": 60067 }, { "epoch": 2.0, "grad_norm": 0.598307728767395, "learning_rate": 0.00015041657518672147, "loss": 1.7548, "step": 60068 }, { "epoch": 2.0, "grad_norm": 0.6363444328308105, "learning_rate": 0.0001504075151151502, "loss": 1.7556, "step": 60069 }, { "epoch": 2.0, "grad_norm": 0.6200292706489563, "learning_rate": 0.00015039845522516233, "loss": 1.797, "step": 60070 }, { "epoch": 2.0, "grad_norm": 0.619893491268158, "learning_rate": 0.00015038939551676868, "loss": 1.7324, "step": 60071 }, { "epoch": 2.0, "grad_norm": 0.6110194325447083, "learning_rate": 0.00015038033598998012, "loss": 1.6801, "step": 60072 }, { "epoch": 2.0, "grad_norm": 0.6041280031204224, "learning_rate": 0.00015037127664480795, "loss": 1.7023, "step": 60073 }, { "epoch": 2.0, "grad_norm": 0.6107800006866455, "learning_rate": 0.00015036221748126295, "loss": 1.7355, "step": 60074 }, { "epoch": 2.0, "grad_norm": 0.6197571158409119, "learning_rate": 0.00015035315849935607, "loss": 1.7922, "step": 60075 }, { "epoch": 2.0, "grad_norm": 0.6032947301864624, "learning_rate": 0.0001503440996990984, "loss": 1.7823, "step": 60076 }, { "epoch": 2.0, "grad_norm": 0.5958849191665649, "learning_rate": 0.0001503350410805011, "loss": 1.7473, "step": 60077 }, { "epoch": 2.0, "grad_norm": 0.6333444118499756, "learning_rate": 0.00015032598264357497, "loss": 1.7055, "step": 60078 }, { "epoch": 2.0, "grad_norm": 0.6022598147392273, "learning_rate": 0.00015031692438833094, "loss": 1.7243, "step": 60079 }, { "epoch": 2.0, "grad_norm": 0.6050827503204346, "learning_rate": 0.00015030786631478022, "loss": 1.818, "step": 60080 }, { "epoch": 2.0, "grad_norm": 0.6079522371292114, "learning_rate": 0.00015029880842293367, "loss": 1.7146, "step": 60081 }, { "epoch": 2.0, "grad_norm": 0.6033141016960144, "learning_rate": 0.00015028975071280224, "loss": 1.6843, "step": 60082 }, { "epoch": 2.0, "grad_norm": 0.6161516904830933, "learning_rate": 0.00015028069318439696, "loss": 1.736, "step": 60083 }, { "epoch": 2.0, "grad_norm": 0.628969132900238, "learning_rate": 0.000150271635837729, "loss": 1.7257, "step": 60084 }, { "epoch": 2.0, "grad_norm": 0.6061666011810303, "learning_rate": 0.00015026257867280918, "loss": 1.6995, "step": 60085 }, { "epoch": 2.0, "grad_norm": 0.5882505178451538, "learning_rate": 0.00015025352168964847, "loss": 1.6983, "step": 60086 }, { "epoch": 2.0, "grad_norm": 0.6153950095176697, "learning_rate": 0.00015024446488825797, "loss": 1.6768, "step": 60087 }, { "epoch": 2.0, "grad_norm": 0.5964565873146057, "learning_rate": 0.00015023540826864865, "loss": 1.72, "step": 60088 }, { "epoch": 2.0, "grad_norm": 0.6110820770263672, "learning_rate": 0.00015022635183083134, "loss": 1.6779, "step": 60089 }, { "epoch": 2.0, "grad_norm": 0.6181241273880005, "learning_rate": 0.00015021729557481724, "loss": 1.6835, "step": 60090 }, { "epoch": 2.0, "grad_norm": 0.6190640926361084, "learning_rate": 0.00015020823950061732, "loss": 1.6799, "step": 60091 }, { "epoch": 2.0, "grad_norm": 0.6191475987434387, "learning_rate": 0.00015019918360824257, "loss": 1.7047, "step": 60092 }, { "epoch": 2.0, "grad_norm": 0.6110884547233582, "learning_rate": 0.0001501901278977038, "loss": 1.7226, "step": 60093 }, { "epoch": 2.0, "grad_norm": 0.5971030592918396, "learning_rate": 0.00015018107236901222, "loss": 1.7207, "step": 60094 }, { "epoch": 2.0, "grad_norm": 0.6230428814888, "learning_rate": 0.0001501720170221788, "loss": 1.744, "step": 60095 }, { "epoch": 2.0, "grad_norm": 0.6199617981910706, "learning_rate": 0.00015016296185721434, "loss": 1.7966, "step": 60096 }, { "epoch": 2.0, "grad_norm": 0.6242964863777161, "learning_rate": 0.00015015390687413008, "loss": 1.7413, "step": 60097 }, { "epoch": 2.0, "grad_norm": 0.5908533334732056, "learning_rate": 0.00015014485207293682, "loss": 1.749, "step": 60098 }, { "epoch": 2.0, "grad_norm": 0.6224748492240906, "learning_rate": 0.00015013579745364568, "loss": 1.6964, "step": 60099 }, { "epoch": 2.0, "grad_norm": 0.6069356799125671, "learning_rate": 0.00015012674301626754, "loss": 1.7717, "step": 60100 }, { "epoch": 2.0, "grad_norm": 0.6287937760353088, "learning_rate": 0.00015011768876081353, "loss": 1.7073, "step": 60101 }, { "epoch": 2.0, "grad_norm": 0.6177534461021423, "learning_rate": 0.00015010863468729456, "loss": 1.7609, "step": 60102 }, { "epoch": 2.0, "grad_norm": 0.5996343493461609, "learning_rate": 0.00015009958079572155, "loss": 1.6883, "step": 60103 }, { "epoch": 2.0, "grad_norm": 0.6014593243598938, "learning_rate": 0.00015009052708610563, "loss": 1.6764, "step": 60104 }, { "epoch": 2.0, "grad_norm": 0.60184645652771, "learning_rate": 0.00015008147355845763, "loss": 1.7268, "step": 60105 }, { "epoch": 2.0, "grad_norm": 0.6251314878463745, "learning_rate": 0.00015007242021278876, "loss": 1.6935, "step": 60106 }, { "epoch": 2.0, "grad_norm": 0.5961307287216187, "learning_rate": 0.00015006336704910983, "loss": 1.7502, "step": 60107 }, { "epoch": 2.0, "grad_norm": 0.624173104763031, "learning_rate": 0.0001500543140674318, "loss": 1.7034, "step": 60108 }, { "epoch": 2.0, "grad_norm": 0.6011382341384888, "learning_rate": 0.00015004526126776588, "loss": 1.7504, "step": 60109 }, { "epoch": 2.0, "grad_norm": 0.6221648454666138, "learning_rate": 0.00015003620865012277, "loss": 1.7755, "step": 60110 }, { "epoch": 2.0, "grad_norm": 0.5936391353607178, "learning_rate": 0.00015002715621451372, "loss": 1.7167, "step": 60111 }, { "epoch": 2.0, "grad_norm": 0.610750138759613, "learning_rate": 0.0001500181039609495, "loss": 1.6713, "step": 60112 }, { "epoch": 2.0, "grad_norm": 0.5899613499641418, "learning_rate": 0.00015000905188944133, "loss": 1.7178, "step": 60113 }, { "epoch": 2.0, "grad_norm": 1.2584894895553589, "learning_rate": 0.00015000000000000004, "loss": 1.7065, "step": 60114 }, { "epoch": 2.0, "grad_norm": 0.6030653715133667, "learning_rate": 0.00014999094829263657, "loss": 1.7622, "step": 60115 }, { "epoch": 2.0, "grad_norm": 0.6319400668144226, "learning_rate": 0.00014998189676736207, "loss": 1.7111, "step": 60116 }, { "epoch": 2.0, "grad_norm": 0.620395839214325, "learning_rate": 0.00014997284542418745, "loss": 1.6926, "step": 60117 }, { "epoch": 2.0, "grad_norm": 0.6103312373161316, "learning_rate": 0.0001499637942631236, "loss": 1.7972, "step": 60118 }, { "epoch": 2.0, "grad_norm": 0.6136842370033264, "learning_rate": 0.00014995474328418157, "loss": 1.7932, "step": 60119 }, { "epoch": 2.0, "grad_norm": 0.606467068195343, "learning_rate": 0.00014994569248737247, "loss": 1.7558, "step": 60120 }, { "epoch": 2.0, "grad_norm": 0.6071985960006714, "learning_rate": 0.00014993664187270723, "loss": 1.7307, "step": 60121 }, { "epoch": 2.0, "grad_norm": 0.5933546423912048, "learning_rate": 0.00014992759144019665, "loss": 1.708, "step": 60122 }, { "epoch": 2.0, "grad_norm": 0.5978216528892517, "learning_rate": 0.00014991854118985202, "loss": 1.6998, "step": 60123 }, { "epoch": 2.0, "grad_norm": 0.5974336862564087, "learning_rate": 0.00014990949112168414, "loss": 1.7267, "step": 60124 }, { "epoch": 2.0, "grad_norm": 0.6189903020858765, "learning_rate": 0.0001499004412357039, "loss": 1.7807, "step": 60125 }, { "epoch": 2.0, "grad_norm": 0.608330249786377, "learning_rate": 0.00014989139153192242, "loss": 1.7313, "step": 60126 }, { "epoch": 2.0, "grad_norm": 0.6126210689544678, "learning_rate": 0.0001498823420103509, "loss": 1.691, "step": 60127 }, { "epoch": 2.0, "grad_norm": 0.611642599105835, "learning_rate": 0.00014987329267099987, "loss": 1.7632, "step": 60128 }, { "epoch": 2.0, "grad_norm": 0.6090025305747986, "learning_rate": 0.00014986424351388052, "loss": 1.7073, "step": 60129 }, { "epoch": 2.0, "grad_norm": 0.6185542345046997, "learning_rate": 0.000149855194539004, "loss": 1.7136, "step": 60130 }, { "epoch": 2.0, "grad_norm": 0.6074901819229126, "learning_rate": 0.00014984614574638114, "loss": 1.6193, "step": 60131 }, { "epoch": 2.0, "grad_norm": 0.5970445275306702, "learning_rate": 0.00014983709713602285, "loss": 1.7235, "step": 60132 }, { "epoch": 2.0, "grad_norm": 0.6135126352310181, "learning_rate": 0.00014982804870794015, "loss": 1.6514, "step": 60133 }, { "epoch": 2.0, "grad_norm": 0.6152249574661255, "learning_rate": 0.00014981900046214433, "loss": 1.8003, "step": 60134 }, { "epoch": 2.0, "grad_norm": 0.6560746431350708, "learning_rate": 0.00014980995239864588, "loss": 1.7017, "step": 60135 }, { "epoch": 2.0, "grad_norm": 0.5930824279785156, "learning_rate": 0.000149800904517456, "loss": 1.7496, "step": 60136 }, { "epoch": 2.0, "grad_norm": 0.61673903465271, "learning_rate": 0.00014979185681858584, "loss": 1.6592, "step": 60137 }, { "epoch": 2.0, "grad_norm": 0.630612313747406, "learning_rate": 0.00014978280930204625, "loss": 1.7492, "step": 60138 }, { "epoch": 2.0, "grad_norm": 0.599679172039032, "learning_rate": 0.00014977376196784802, "loss": 1.642, "step": 60139 }, { "epoch": 2.0, "grad_norm": 0.6294643878936768, "learning_rate": 0.00014976471481600233, "loss": 1.741, "step": 60140 }, { "epoch": 2.0, "grad_norm": 0.6131872534751892, "learning_rate": 0.00014975566784652036, "loss": 1.6871, "step": 60141 }, { "epoch": 2.0, "grad_norm": 0.6585743427276611, "learning_rate": 0.0001497466210594127, "loss": 1.6627, "step": 60142 }, { "epoch": 2.0, "grad_norm": 0.6111752390861511, "learning_rate": 0.00014973757445469043, "loss": 1.7765, "step": 60143 }, { "epoch": 2.0, "grad_norm": 0.5904842019081116, "learning_rate": 0.00014972852803236478, "loss": 1.6972, "step": 60144 }, { "epoch": 2.0, "grad_norm": 0.6021046042442322, "learning_rate": 0.00014971948179244652, "loss": 1.6767, "step": 60145 }, { "epoch": 2.0, "grad_norm": 0.5996476411819458, "learning_rate": 0.00014971043573494655, "loss": 1.6631, "step": 60146 }, { "epoch": 2.0, "grad_norm": 0.6160239577293396, "learning_rate": 0.00014970138985987607, "loss": 1.7019, "step": 60147 }, { "epoch": 2.0, "grad_norm": 0.6201492547988892, "learning_rate": 0.00014969234416724602, "loss": 1.6967, "step": 60148 }, { "epoch": 2.0, "grad_norm": 0.6073055863380432, "learning_rate": 0.00014968329865706716, "loss": 1.6967, "step": 60149 }, { "epoch": 2.0, "grad_norm": 0.6002058982849121, "learning_rate": 0.0001496742533293506, "loss": 1.6483, "step": 60150 }, { "epoch": 2.0, "grad_norm": 0.613933801651001, "learning_rate": 0.00014966520818410748, "loss": 1.7859, "step": 60151 }, { "epoch": 2.0, "grad_norm": 0.6191068887710571, "learning_rate": 0.00014965616322134867, "loss": 1.7566, "step": 60152 }, { "epoch": 2.0, "grad_norm": 0.6224455833435059, "learning_rate": 0.00014964711844108497, "loss": 1.7918, "step": 60153 }, { "epoch": 2.0, "grad_norm": 0.5994123816490173, "learning_rate": 0.00014963807384332767, "loss": 1.7752, "step": 60154 }, { "epoch": 2.0, "grad_norm": 0.6062115430831909, "learning_rate": 0.0001496290294280876, "loss": 1.7152, "step": 60155 }, { "epoch": 2.0, "grad_norm": 0.6306790113449097, "learning_rate": 0.0001496199851953756, "loss": 1.6981, "step": 60156 }, { "epoch": 2.0, "grad_norm": 0.6056991815567017, "learning_rate": 0.00014961094114520287, "loss": 1.7311, "step": 60157 }, { "epoch": 2.0, "grad_norm": 0.6398627758026123, "learning_rate": 0.00014960189727758022, "loss": 1.771, "step": 60158 }, { "epoch": 2.0, "grad_norm": 0.6601909399032593, "learning_rate": 0.0001495928535925188, "loss": 1.7051, "step": 60159 }, { "epoch": 2.0, "grad_norm": 0.6285353899002075, "learning_rate": 0.00014958381009002938, "loss": 1.749, "step": 60160 }, { "epoch": 2.0, "grad_norm": 0.6141253113746643, "learning_rate": 0.0001495747667701232, "loss": 1.6876, "step": 60161 }, { "epoch": 2.0, "grad_norm": 0.6063979268074036, "learning_rate": 0.00014956572363281103, "loss": 1.704, "step": 60162 }, { "epoch": 2.0, "grad_norm": 0.8056572675704956, "learning_rate": 0.00014955668067810382, "loss": 1.7856, "step": 60163 }, { "epoch": 2.0, "grad_norm": 0.6187211275100708, "learning_rate": 0.00014954763790601277, "loss": 1.6986, "step": 60164 }, { "epoch": 2.0, "grad_norm": 0.6134055256843567, "learning_rate": 0.00014953859531654858, "loss": 1.7382, "step": 60165 }, { "epoch": 2.0, "grad_norm": 0.6520835757255554, "learning_rate": 0.0001495295529097225, "loss": 1.668, "step": 60166 }, { "epoch": 2.0, "grad_norm": 0.5929803252220154, "learning_rate": 0.00014952051068554536, "loss": 1.7348, "step": 60167 }, { "epoch": 2.0, "grad_norm": 0.6227035522460938, "learning_rate": 0.00014951146864402803, "loss": 1.7006, "step": 60168 }, { "epoch": 2.0, "grad_norm": 0.6169750690460205, "learning_rate": 0.00014950242678518172, "loss": 1.6319, "step": 60169 }, { "epoch": 2.0, "grad_norm": 0.6042098999023438, "learning_rate": 0.00014949338510901716, "loss": 1.745, "step": 60170 }, { "epoch": 2.0, "grad_norm": 0.6119475364685059, "learning_rate": 0.0001494843436155456, "loss": 1.7854, "step": 60171 }, { "epoch": 2.0, "grad_norm": 0.6022999286651611, "learning_rate": 0.00014947530230477772, "loss": 1.7232, "step": 60172 }, { "epoch": 2.0, "grad_norm": 0.6434705257415771, "learning_rate": 0.0001494662611767248, "loss": 1.6922, "step": 60173 }, { "epoch": 2.0, "grad_norm": 0.6088129281997681, "learning_rate": 0.0001494572202313976, "loss": 1.7518, "step": 60174 }, { "epoch": 2.0, "grad_norm": 0.605659008026123, "learning_rate": 0.00014944817946880708, "loss": 1.7328, "step": 60175 }, { "epoch": 2.0, "grad_norm": 0.6006207466125488, "learning_rate": 0.0001494391388889644, "loss": 1.7176, "step": 60176 }, { "epoch": 2.0, "grad_norm": 0.6185194849967957, "learning_rate": 0.00014943009849188043, "loss": 1.7124, "step": 60177 }, { "epoch": 2.0, "grad_norm": 0.6059390902519226, "learning_rate": 0.000149421058277566, "loss": 1.6866, "step": 60178 }, { "epoch": 2.0, "grad_norm": 0.6285667419433594, "learning_rate": 0.00014941201824603226, "loss": 1.7062, "step": 60179 }, { "epoch": 2.0, "grad_norm": 0.6239555478096008, "learning_rate": 0.00014940297839729022, "loss": 1.6941, "step": 60180 }, { "epoch": 2.0, "grad_norm": 0.6132259368896484, "learning_rate": 0.00014939393873135076, "loss": 1.762, "step": 60181 }, { "epoch": 2.0, "grad_norm": 0.6122101545333862, "learning_rate": 0.00014938489924822476, "loss": 1.7827, "step": 60182 }, { "epoch": 2.0, "grad_norm": 0.6089259386062622, "learning_rate": 0.00014937585994792343, "loss": 1.6422, "step": 60183 }, { "epoch": 2.0, "grad_norm": 0.6067856550216675, "learning_rate": 0.0001493668208304576, "loss": 1.6789, "step": 60184 }, { "epoch": 2.0, "grad_norm": 0.6073519587516785, "learning_rate": 0.00014935778189583817, "loss": 1.6751, "step": 60185 }, { "epoch": 2.0, "grad_norm": 0.5964182615280151, "learning_rate": 0.00014934874314407616, "loss": 1.7619, "step": 60186 }, { "epoch": 2.0, "grad_norm": 0.6118429899215698, "learning_rate": 0.00014933970457518272, "loss": 1.7005, "step": 60187 }, { "epoch": 2.0, "grad_norm": 0.6269901990890503, "learning_rate": 0.00014933066618916864, "loss": 1.7463, "step": 60188 }, { "epoch": 2.0, "grad_norm": 0.60689777135849, "learning_rate": 0.00014932162798604487, "loss": 1.6873, "step": 60189 }, { "epoch": 2.0, "grad_norm": 0.6182310581207275, "learning_rate": 0.00014931258996582253, "loss": 1.7758, "step": 60190 }, { "epoch": 2.0, "grad_norm": 0.6086856126785278, "learning_rate": 0.0001493035521285125, "loss": 1.6604, "step": 60191 }, { "epoch": 2.0, "grad_norm": 0.623370885848999, "learning_rate": 0.00014929451447412567, "loss": 1.7487, "step": 60192 }, { "epoch": 2.0, "grad_norm": 0.6077513694763184, "learning_rate": 0.00014928547700267307, "loss": 1.7137, "step": 60193 }, { "epoch": 2.0, "grad_norm": 0.6029098629951477, "learning_rate": 0.0001492764397141658, "loss": 1.7648, "step": 60194 }, { "epoch": 2.0, "grad_norm": 0.5866281390190125, "learning_rate": 0.00014926740260861473, "loss": 1.7142, "step": 60195 }, { "epoch": 2.0, "grad_norm": 0.6168428063392639, "learning_rate": 0.00014925836568603068, "loss": 1.7609, "step": 60196 }, { "epoch": 2.0, "grad_norm": 0.6645855903625488, "learning_rate": 0.00014924932894642493, "loss": 1.7624, "step": 60197 }, { "epoch": 2.0, "grad_norm": 0.618373692035675, "learning_rate": 0.00014924029238980828, "loss": 1.6577, "step": 60198 }, { "epoch": 2.0, "grad_norm": 0.6035346984863281, "learning_rate": 0.00014923125601619153, "loss": 1.7007, "step": 60199 }, { "epoch": 2.0, "grad_norm": 0.5853749513626099, "learning_rate": 0.00014922221982558586, "loss": 1.7203, "step": 60200 }, { "epoch": 2.0, "grad_norm": 0.6096802949905396, "learning_rate": 0.0001492131838180023, "loss": 1.7794, "step": 60201 }, { "epoch": 2.0, "grad_norm": 0.6197864413261414, "learning_rate": 0.00014920414799345175, "loss": 1.716, "step": 60202 }, { "epoch": 2.0, "grad_norm": 0.6020680069923401, "learning_rate": 0.00014919511235194502, "loss": 1.7639, "step": 60203 }, { "epoch": 2.0, "grad_norm": 0.5929385423660278, "learning_rate": 0.0001491860768934933, "loss": 1.6973, "step": 60204 }, { "epoch": 2.0, "grad_norm": 0.6245373487472534, "learning_rate": 0.00014917704161810746, "loss": 1.6878, "step": 60205 }, { "epoch": 2.0, "grad_norm": 0.5896468162536621, "learning_rate": 0.00014916800652579835, "loss": 1.6931, "step": 60206 }, { "epoch": 2.0, "grad_norm": 0.6003813147544861, "learning_rate": 0.00014915897161657708, "loss": 1.7329, "step": 60207 }, { "epoch": 2.0, "grad_norm": 0.6151282787322998, "learning_rate": 0.00014914993689045468, "loss": 1.732, "step": 60208 }, { "epoch": 2.0, "grad_norm": 0.6048913598060608, "learning_rate": 0.00014914090234744207, "loss": 1.6535, "step": 60209 }, { "epoch": 2.0, "grad_norm": 0.5978078246116638, "learning_rate": 0.00014913186798755005, "loss": 1.6469, "step": 60210 }, { "epoch": 2.0, "grad_norm": 0.6184185147285461, "learning_rate": 0.00014912283381078982, "loss": 1.6736, "step": 60211 }, { "epoch": 2.0, "grad_norm": 0.6096803545951843, "learning_rate": 0.00014911379981717224, "loss": 1.6853, "step": 60212 }, { "epoch": 2.0, "grad_norm": 0.6047434210777283, "learning_rate": 0.00014910476600670815, "loss": 1.6938, "step": 60213 }, { "epoch": 2.0, "grad_norm": 0.6065720915794373, "learning_rate": 0.00014909573237940874, "loss": 1.7672, "step": 60214 }, { "epoch": 2.0, "grad_norm": 0.6090170741081238, "learning_rate": 0.0001490866989352848, "loss": 1.7096, "step": 60215 }, { "epoch": 2.0, "grad_norm": 0.6030942797660828, "learning_rate": 0.00014907766567434744, "loss": 1.7446, "step": 60216 }, { "epoch": 2.0, "grad_norm": 0.5885596871376038, "learning_rate": 0.0001490686325966075, "loss": 1.6884, "step": 60217 }, { "epoch": 2.0, "grad_norm": 0.6238536834716797, "learning_rate": 0.00014905959970207608, "loss": 1.6771, "step": 60218 }, { "epoch": 2.0, "grad_norm": 0.591677188873291, "learning_rate": 0.00014905056699076406, "loss": 1.707, "step": 60219 }, { "epoch": 2.0, "grad_norm": 0.6397533416748047, "learning_rate": 0.00014904153446268232, "loss": 1.7406, "step": 60220 }, { "epoch": 2.0, "grad_norm": 0.6099369525909424, "learning_rate": 0.00014903250211784202, "loss": 1.7886, "step": 60221 }, { "epoch": 2.0, "grad_norm": 0.6077999472618103, "learning_rate": 0.0001490234699562539, "loss": 1.6875, "step": 60222 }, { "epoch": 2.0, "grad_norm": 0.6281943917274475, "learning_rate": 0.00014901443797792915, "loss": 1.7405, "step": 60223 }, { "epoch": 2.0, "grad_norm": 0.6143932342529297, "learning_rate": 0.00014900540618287863, "loss": 1.679, "step": 60224 }, { "epoch": 2.0, "grad_norm": 0.6210618019104004, "learning_rate": 0.0001489963745711132, "loss": 1.8106, "step": 60225 }, { "epoch": 2.0, "grad_norm": 0.6409868001937866, "learning_rate": 0.00014898734314264402, "loss": 1.698, "step": 60226 }, { "epoch": 2.0, "grad_norm": 0.6005218029022217, "learning_rate": 0.00014897831189748185, "loss": 1.6565, "step": 60227 }, { "epoch": 2.0, "grad_norm": 0.6150743961334229, "learning_rate": 0.00014896928083563788, "loss": 1.6875, "step": 60228 }, { "epoch": 2.0, "grad_norm": 0.6152004599571228, "learning_rate": 0.0001489602499571228, "loss": 1.7735, "step": 60229 }, { "epoch": 2.0, "grad_norm": 0.6220003962516785, "learning_rate": 0.00014895121926194788, "loss": 1.6605, "step": 60230 }, { "epoch": 2.0, "grad_norm": 0.622404932975769, "learning_rate": 0.00014894218875012387, "loss": 1.6889, "step": 60231 }, { "epoch": 2.0, "grad_norm": 0.5998366475105286, "learning_rate": 0.0001489331584216617, "loss": 1.7927, "step": 60232 }, { "epoch": 2.0, "grad_norm": 0.6054990887641907, "learning_rate": 0.00014892412827657256, "loss": 1.7094, "step": 60233 }, { "epoch": 2.0, "grad_norm": 0.6198086142539978, "learning_rate": 0.0001489150983148672, "loss": 1.6896, "step": 60234 }, { "epoch": 2.0, "grad_norm": 0.6376445889472961, "learning_rate": 0.0001489060685365566, "loss": 1.677, "step": 60235 }, { "epoch": 2.0, "grad_norm": 0.5926403999328613, "learning_rate": 0.00014889703894165176, "loss": 1.6617, "step": 60236 }, { "epoch": 2.0, "grad_norm": 0.6150608658790588, "learning_rate": 0.00014888800953016376, "loss": 1.7588, "step": 60237 }, { "epoch": 2.0, "grad_norm": 0.6148839592933655, "learning_rate": 0.00014887898030210343, "loss": 1.611, "step": 60238 }, { "epoch": 2.0, "grad_norm": 0.6249314546585083, "learning_rate": 0.00014886995125748165, "loss": 1.6969, "step": 60239 }, { "epoch": 2.0, "grad_norm": 0.6378170251846313, "learning_rate": 0.00014886092239630962, "loss": 1.7784, "step": 60240 }, { "epoch": 2.0, "grad_norm": 0.6341256499290466, "learning_rate": 0.00014885189371859813, "loss": 1.6782, "step": 60241 }, { "epoch": 2.0, "grad_norm": 0.6082748770713806, "learning_rate": 0.00014884286522435806, "loss": 1.8018, "step": 60242 }, { "epoch": 2.0, "grad_norm": 0.6006911993026733, "learning_rate": 0.00014883383691360048, "loss": 1.6754, "step": 60243 }, { "epoch": 2.0, "grad_norm": 0.6191865801811218, "learning_rate": 0.00014882480878633658, "loss": 1.6145, "step": 60244 }, { "epoch": 2.0, "grad_norm": 0.6245487928390503, "learning_rate": 0.00014881578084257685, "loss": 1.6911, "step": 60245 }, { "epoch": 2.0, "grad_norm": 0.606043815612793, "learning_rate": 0.00014880675308233252, "loss": 1.6708, "step": 60246 }, { "epoch": 2.0, "grad_norm": 0.6446478366851807, "learning_rate": 0.0001487977255056146, "loss": 1.7769, "step": 60247 }, { "epoch": 2.0, "grad_norm": 0.6029661297798157, "learning_rate": 0.00014878869811243398, "loss": 1.7817, "step": 60248 }, { "epoch": 2.0, "grad_norm": 0.6124480962753296, "learning_rate": 0.00014877967090280147, "loss": 1.7348, "step": 60249 }, { "epoch": 2.0, "grad_norm": 0.5902868509292603, "learning_rate": 0.00014877064387672817, "loss": 1.713, "step": 60250 }, { "epoch": 2.0, "grad_norm": 0.5827146768569946, "learning_rate": 0.00014876161703422522, "loss": 1.6782, "step": 60251 }, { "epoch": 2.0, "grad_norm": 0.6446282863616943, "learning_rate": 0.00014875259037530318, "loss": 1.7086, "step": 60252 }, { "epoch": 2.0, "grad_norm": 0.6189299821853638, "learning_rate": 0.0001487435638999732, "loss": 1.7059, "step": 60253 }, { "epoch": 2.0, "grad_norm": 0.6071868538856506, "learning_rate": 0.00014873453760824636, "loss": 1.7489, "step": 60254 }, { "epoch": 2.0, "grad_norm": 0.6038219332695007, "learning_rate": 0.00014872551150013353, "loss": 1.7197, "step": 60255 }, { "epoch": 2.0, "grad_norm": 0.6350604295730591, "learning_rate": 0.00014871648557564552, "loss": 1.7255, "step": 60256 }, { "epoch": 2.0, "grad_norm": 0.6172757744789124, "learning_rate": 0.00014870745983479338, "loss": 1.7264, "step": 60257 }, { "epoch": 2.0, "grad_norm": 0.6150712966918945, "learning_rate": 0.00014869843427758834, "loss": 1.6449, "step": 60258 }, { "epoch": 2.0, "grad_norm": 0.5957980155944824, "learning_rate": 0.0001486894089040409, "loss": 1.701, "step": 60259 }, { "epoch": 2.0, "grad_norm": 0.5993431806564331, "learning_rate": 0.0001486803837141622, "loss": 1.7335, "step": 60260 }, { "epoch": 2.0, "grad_norm": 0.620184063911438, "learning_rate": 0.0001486713587079633, "loss": 1.7084, "step": 60261 }, { "epoch": 2.0, "grad_norm": 0.6139060854911804, "learning_rate": 0.00014866233388545513, "loss": 1.7203, "step": 60262 }, { "epoch": 2.0, "grad_norm": 0.6113861203193665, "learning_rate": 0.00014865330924664845, "loss": 1.7477, "step": 60263 }, { "epoch": 2.0, "grad_norm": 0.589525580406189, "learning_rate": 0.00014864428479155443, "loss": 1.69, "step": 60264 }, { "epoch": 2.01, "grad_norm": 0.612396240234375, "learning_rate": 0.000148635260520184, "loss": 1.7754, "step": 60265 }, { "epoch": 2.01, "grad_norm": 0.6204644441604614, "learning_rate": 0.00014862623643254792, "loss": 1.7309, "step": 60266 }, { "epoch": 2.01, "grad_norm": 0.6180413365364075, "learning_rate": 0.0001486172125286573, "loss": 1.6879, "step": 60267 }, { "epoch": 2.01, "grad_norm": 0.5925174951553345, "learning_rate": 0.0001486081888085232, "loss": 1.7853, "step": 60268 }, { "epoch": 2.01, "grad_norm": 0.6171659231185913, "learning_rate": 0.00014859916527215644, "loss": 1.809, "step": 60269 }, { "epoch": 2.01, "grad_norm": 0.589039146900177, "learning_rate": 0.00014859014191956788, "loss": 1.6991, "step": 60270 }, { "epoch": 2.01, "grad_norm": 0.6233271956443787, "learning_rate": 0.00014858111875076866, "loss": 1.7078, "step": 60271 }, { "epoch": 2.01, "grad_norm": 0.6003165245056152, "learning_rate": 0.00014857209576576966, "loss": 1.7106, "step": 60272 }, { "epoch": 2.01, "grad_norm": 0.6420077681541443, "learning_rate": 0.00014856307296458174, "loss": 1.7159, "step": 60273 }, { "epoch": 2.01, "grad_norm": 0.6245397329330444, "learning_rate": 0.00014855405034721605, "loss": 1.7148, "step": 60274 }, { "epoch": 2.01, "grad_norm": 0.6285946369171143, "learning_rate": 0.00014854502791368328, "loss": 1.7286, "step": 60275 }, { "epoch": 2.01, "grad_norm": 0.6271462440490723, "learning_rate": 0.00014853600566399467, "loss": 1.7316, "step": 60276 }, { "epoch": 2.01, "grad_norm": 0.5972735285758972, "learning_rate": 0.0001485269835981609, "loss": 1.6274, "step": 60277 }, { "epoch": 2.01, "grad_norm": 0.6134470701217651, "learning_rate": 0.00014851796171619317, "loss": 1.6431, "step": 60278 }, { "epoch": 2.01, "grad_norm": 0.6206455230712891, "learning_rate": 0.00014850894001810236, "loss": 1.7386, "step": 60279 }, { "epoch": 2.01, "grad_norm": 0.6053020358085632, "learning_rate": 0.00014849991850389923, "loss": 1.7184, "step": 60280 }, { "epoch": 2.01, "grad_norm": 0.6157089471817017, "learning_rate": 0.00014849089717359497, "loss": 1.7188, "step": 60281 }, { "epoch": 2.01, "grad_norm": 0.6245776414871216, "learning_rate": 0.00014848187602720033, "loss": 1.7735, "step": 60282 }, { "epoch": 2.01, "grad_norm": 0.6189479827880859, "learning_rate": 0.0001484728550647265, "loss": 1.7677, "step": 60283 }, { "epoch": 2.01, "grad_norm": 0.6135355830192566, "learning_rate": 0.0001484638342861843, "loss": 1.7465, "step": 60284 }, { "epoch": 2.01, "grad_norm": 0.5844075679779053, "learning_rate": 0.00014845481369158453, "loss": 1.6896, "step": 60285 }, { "epoch": 2.01, "grad_norm": 0.5991296768188477, "learning_rate": 0.00014844579328093842, "loss": 1.7502, "step": 60286 }, { "epoch": 2.01, "grad_norm": 0.5989100337028503, "learning_rate": 0.00014843677305425668, "loss": 1.6365, "step": 60287 }, { "epoch": 2.01, "grad_norm": 0.6072575449943542, "learning_rate": 0.0001484277530115505, "loss": 1.6967, "step": 60288 }, { "epoch": 2.01, "grad_norm": 0.6108738780021667, "learning_rate": 0.00014841873315283053, "loss": 1.7598, "step": 60289 }, { "epoch": 2.01, "grad_norm": 0.6193929314613342, "learning_rate": 0.000148409713478108, "loss": 1.7651, "step": 60290 }, { "epoch": 2.01, "grad_norm": 0.6332147121429443, "learning_rate": 0.00014840069398739379, "loss": 1.6695, "step": 60291 }, { "epoch": 2.01, "grad_norm": 0.6149330735206604, "learning_rate": 0.00014839167468069865, "loss": 1.7255, "step": 60292 }, { "epoch": 2.01, "grad_norm": 0.5922834277153015, "learning_rate": 0.0001483826555580338, "loss": 1.7312, "step": 60293 }, { "epoch": 2.01, "grad_norm": 0.6322404146194458, "learning_rate": 0.00014837363661941009, "loss": 1.7063, "step": 60294 }, { "epoch": 2.01, "grad_norm": 0.6011108160018921, "learning_rate": 0.0001483646178648383, "loss": 1.7275, "step": 60295 }, { "epoch": 2.01, "grad_norm": 0.628080427646637, "learning_rate": 0.00014835559929432952, "loss": 1.6861, "step": 60296 }, { "epoch": 2.01, "grad_norm": 0.611453652381897, "learning_rate": 0.00014834658090789483, "loss": 1.7374, "step": 60297 }, { "epoch": 2.01, "grad_norm": 0.5942494869232178, "learning_rate": 0.00014833756270554504, "loss": 1.6312, "step": 60298 }, { "epoch": 2.01, "grad_norm": 0.5971953272819519, "learning_rate": 0.000148328544687291, "loss": 1.7432, "step": 60299 }, { "epoch": 2.01, "grad_norm": 0.6201603412628174, "learning_rate": 0.00014831952685314385, "loss": 1.6621, "step": 60300 }, { "epoch": 2.01, "grad_norm": 0.6263854503631592, "learning_rate": 0.00014831050920311446, "loss": 1.61, "step": 60301 }, { "epoch": 2.01, "grad_norm": 0.6151107549667358, "learning_rate": 0.00014830149173721365, "loss": 1.7708, "step": 60302 }, { "epoch": 2.01, "grad_norm": 0.6376286745071411, "learning_rate": 0.00014829247445545247, "loss": 1.6929, "step": 60303 }, { "epoch": 2.01, "grad_norm": 0.5998795032501221, "learning_rate": 0.000148283457357842, "loss": 1.6792, "step": 60304 }, { "epoch": 2.01, "grad_norm": 0.595421314239502, "learning_rate": 0.00014827444044439303, "loss": 1.6493, "step": 60305 }, { "epoch": 2.01, "grad_norm": 0.6200767159461975, "learning_rate": 0.00014826542371511645, "loss": 1.767, "step": 60306 }, { "epoch": 2.01, "grad_norm": 0.6401851773262024, "learning_rate": 0.00014825640717002338, "loss": 1.7057, "step": 60307 }, { "epoch": 2.01, "grad_norm": 0.6123217940330505, "learning_rate": 0.00014824739080912468, "loss": 1.7223, "step": 60308 }, { "epoch": 2.01, "grad_norm": 0.610363245010376, "learning_rate": 0.00014823837463243117, "loss": 1.7096, "step": 60309 }, { "epoch": 2.01, "grad_norm": 0.6273322105407715, "learning_rate": 0.0001482293586399539, "loss": 1.7792, "step": 60310 }, { "epoch": 2.01, "grad_norm": 0.6095405220985413, "learning_rate": 0.00014822034283170398, "loss": 1.7259, "step": 60311 }, { "epoch": 2.01, "grad_norm": 0.6135423183441162, "learning_rate": 0.00014821132720769216, "loss": 1.688, "step": 60312 }, { "epoch": 2.01, "grad_norm": 0.6770018935203552, "learning_rate": 0.00014820231176792935, "loss": 1.7761, "step": 60313 }, { "epoch": 2.01, "grad_norm": 0.6108980774879456, "learning_rate": 0.00014819329651242664, "loss": 1.7365, "step": 60314 }, { "epoch": 2.01, "grad_norm": 0.6028031706809998, "learning_rate": 0.00014818428144119494, "loss": 1.7493, "step": 60315 }, { "epoch": 2.01, "grad_norm": 0.6051914095878601, "learning_rate": 0.00014817526655424502, "loss": 1.8329, "step": 60316 }, { "epoch": 2.01, "grad_norm": 0.6247576475143433, "learning_rate": 0.00014816625185158796, "loss": 1.7037, "step": 60317 }, { "epoch": 2.01, "grad_norm": 0.6240453124046326, "learning_rate": 0.0001481572373332348, "loss": 1.7264, "step": 60318 }, { "epoch": 2.01, "grad_norm": 0.6236699223518372, "learning_rate": 0.00014814822299919638, "loss": 1.748, "step": 60319 }, { "epoch": 2.01, "grad_norm": 0.6160011887550354, "learning_rate": 0.00014813920884948356, "loss": 1.7235, "step": 60320 }, { "epoch": 2.01, "grad_norm": 0.6099808216094971, "learning_rate": 0.00014813019488410745, "loss": 1.7394, "step": 60321 }, { "epoch": 2.01, "grad_norm": 0.6223258972167969, "learning_rate": 0.0001481211811030789, "loss": 1.7233, "step": 60322 }, { "epoch": 2.01, "grad_norm": 0.6221323609352112, "learning_rate": 0.00014811216750640875, "loss": 1.723, "step": 60323 }, { "epoch": 2.01, "grad_norm": 0.5978306531906128, "learning_rate": 0.00014810315409410807, "loss": 1.7085, "step": 60324 }, { "epoch": 2.01, "grad_norm": 0.6361507177352905, "learning_rate": 0.00014809414086618786, "loss": 1.7216, "step": 60325 }, { "epoch": 2.01, "grad_norm": 0.6133407950401306, "learning_rate": 0.00014808512782265904, "loss": 1.7183, "step": 60326 }, { "epoch": 2.01, "grad_norm": 0.6105668544769287, "learning_rate": 0.0001480761149635323, "loss": 1.7411, "step": 60327 }, { "epoch": 2.01, "grad_norm": 0.6110548377037048, "learning_rate": 0.00014806710228881893, "loss": 1.7379, "step": 60328 }, { "epoch": 2.01, "grad_norm": 0.6440240144729614, "learning_rate": 0.0001480580897985297, "loss": 1.668, "step": 60329 }, { "epoch": 2.01, "grad_norm": 0.6596327424049377, "learning_rate": 0.00014804907749267548, "loss": 1.707, "step": 60330 }, { "epoch": 2.01, "grad_norm": 0.5943066477775574, "learning_rate": 0.00014804006537126737, "loss": 1.7676, "step": 60331 }, { "epoch": 2.01, "grad_norm": 0.6304259896278381, "learning_rate": 0.00014803105343431613, "loss": 1.733, "step": 60332 }, { "epoch": 2.01, "grad_norm": 0.6216108202934265, "learning_rate": 0.0001480220416818329, "loss": 1.7859, "step": 60333 }, { "epoch": 2.01, "grad_norm": 0.5967682600021362, "learning_rate": 0.0001480130301138284, "loss": 1.6451, "step": 60334 }, { "epoch": 2.01, "grad_norm": 0.6200982332229614, "learning_rate": 0.00014800401873031382, "loss": 1.7544, "step": 60335 }, { "epoch": 2.01, "grad_norm": 0.6034727096557617, "learning_rate": 0.00014799500753129996, "loss": 1.7058, "step": 60336 }, { "epoch": 2.01, "grad_norm": 0.6136420965194702, "learning_rate": 0.00014798599651679766, "loss": 1.7039, "step": 60337 }, { "epoch": 2.01, "grad_norm": 0.6307217478752136, "learning_rate": 0.00014797698568681805, "loss": 1.7163, "step": 60338 }, { "epoch": 2.01, "grad_norm": 0.6153732538223267, "learning_rate": 0.00014796797504137188, "loss": 1.6305, "step": 60339 }, { "epoch": 2.01, "grad_norm": 0.634103536605835, "learning_rate": 0.0001479589645804703, "loss": 1.7108, "step": 60340 }, { "epoch": 2.01, "grad_norm": 0.6114563345909119, "learning_rate": 0.00014794995430412414, "loss": 1.7744, "step": 60341 }, { "epoch": 2.01, "grad_norm": 0.6450275182723999, "learning_rate": 0.0001479409442123442, "loss": 1.7234, "step": 60342 }, { "epoch": 2.01, "grad_norm": 0.6199347972869873, "learning_rate": 0.00014793193430514168, "loss": 1.7146, "step": 60343 }, { "epoch": 2.01, "grad_norm": 0.6135603785514832, "learning_rate": 0.00014792292458252728, "loss": 1.7255, "step": 60344 }, { "epoch": 2.01, "grad_norm": 0.6327192187309265, "learning_rate": 0.00014791391504451215, "loss": 1.6974, "step": 60345 }, { "epoch": 2.01, "grad_norm": 0.6095327138900757, "learning_rate": 0.000147904905691107, "loss": 1.6523, "step": 60346 }, { "epoch": 2.01, "grad_norm": 0.59876948595047, "learning_rate": 0.000147895896522323, "loss": 1.7269, "step": 60347 }, { "epoch": 2.01, "grad_norm": 0.6237714886665344, "learning_rate": 0.00014788688753817095, "loss": 1.7266, "step": 60348 }, { "epoch": 2.01, "grad_norm": 0.6345177292823792, "learning_rate": 0.0001478778787386617, "loss": 1.693, "step": 60349 }, { "epoch": 2.01, "grad_norm": 0.6332368850708008, "learning_rate": 0.00014786887012380642, "loss": 1.7181, "step": 60350 }, { "epoch": 2.01, "grad_norm": 0.6362684369087219, "learning_rate": 0.0001478598616936159, "loss": 1.692, "step": 60351 }, { "epoch": 2.01, "grad_norm": 0.6245947480201721, "learning_rate": 0.00014785085344810102, "loss": 1.7398, "step": 60352 }, { "epoch": 2.01, "grad_norm": 0.6139755845069885, "learning_rate": 0.00014784184538727275, "loss": 1.7153, "step": 60353 }, { "epoch": 2.01, "grad_norm": 0.617918074131012, "learning_rate": 0.00014783283751114217, "loss": 1.673, "step": 60354 }, { "epoch": 2.01, "grad_norm": 0.6079279184341431, "learning_rate": 0.00014782382981972012, "loss": 1.7785, "step": 60355 }, { "epoch": 2.01, "grad_norm": 0.6050735712051392, "learning_rate": 0.0001478148223130174, "loss": 1.7342, "step": 60356 }, { "epoch": 2.01, "grad_norm": 0.5851110816001892, "learning_rate": 0.00014780581499104518, "loss": 1.6531, "step": 60357 }, { "epoch": 2.01, "grad_norm": 0.610306441783905, "learning_rate": 0.00014779680785381426, "loss": 1.8025, "step": 60358 }, { "epoch": 2.01, "grad_norm": 0.6156531572341919, "learning_rate": 0.00014778780090133546, "loss": 1.6876, "step": 60359 }, { "epoch": 2.01, "grad_norm": 0.6248275637626648, "learning_rate": 0.0001477787941336199, "loss": 1.7132, "step": 60360 }, { "epoch": 2.01, "grad_norm": 0.5853433609008789, "learning_rate": 0.00014776978755067863, "loss": 1.7002, "step": 60361 }, { "epoch": 2.01, "grad_norm": 0.6249309182167053, "learning_rate": 0.0001477607811525222, "loss": 1.7193, "step": 60362 }, { "epoch": 2.01, "grad_norm": 0.6005472540855408, "learning_rate": 0.00014775177493916175, "loss": 1.6919, "step": 60363 }, { "epoch": 2.01, "grad_norm": 0.5927996039390564, "learning_rate": 0.00014774276891060833, "loss": 1.6801, "step": 60364 }, { "epoch": 2.01, "grad_norm": 0.6273278594017029, "learning_rate": 0.00014773376306687275, "loss": 1.7295, "step": 60365 }, { "epoch": 2.01, "grad_norm": 0.6018630266189575, "learning_rate": 0.00014772475740796584, "loss": 1.6295, "step": 60366 }, { "epoch": 2.01, "grad_norm": 0.6051371693611145, "learning_rate": 0.00014771575193389861, "loss": 1.6735, "step": 60367 }, { "epoch": 2.01, "grad_norm": 0.6288729310035706, "learning_rate": 0.0001477067466446823, "loss": 1.7218, "step": 60368 }, { "epoch": 2.01, "grad_norm": 0.5980788469314575, "learning_rate": 0.0001476977415403273, "loss": 1.6755, "step": 60369 }, { "epoch": 2.01, "grad_norm": 0.6157421469688416, "learning_rate": 0.0001476887366208448, "loss": 1.7464, "step": 60370 }, { "epoch": 2.01, "grad_norm": 0.6070852279663086, "learning_rate": 0.00014767973188624584, "loss": 1.7241, "step": 60371 }, { "epoch": 2.01, "grad_norm": 0.6278901696205139, "learning_rate": 0.00014767072733654123, "loss": 1.6992, "step": 60372 }, { "epoch": 2.01, "grad_norm": 0.6131814122200012, "learning_rate": 0.00014766172297174182, "loss": 1.6815, "step": 60373 }, { "epoch": 2.01, "grad_norm": 0.6088966131210327, "learning_rate": 0.00014765271879185862, "loss": 1.7208, "step": 60374 }, { "epoch": 2.01, "grad_norm": 0.6077744364738464, "learning_rate": 0.00014764371479690283, "loss": 1.7943, "step": 60375 }, { "epoch": 2.01, "grad_norm": 0.610322117805481, "learning_rate": 0.00014763471098688488, "loss": 1.7516, "step": 60376 }, { "epoch": 2.01, "grad_norm": 0.6034087538719177, "learning_rate": 0.00014762570736181593, "loss": 1.6584, "step": 60377 }, { "epoch": 2.01, "grad_norm": 0.612743079662323, "learning_rate": 0.00014761670392170705, "loss": 1.7687, "step": 60378 }, { "epoch": 2.01, "grad_norm": 0.615617036819458, "learning_rate": 0.000147607700666569, "loss": 1.7688, "step": 60379 }, { "epoch": 2.01, "grad_norm": 0.6040959358215332, "learning_rate": 0.00014759869759641266, "loss": 1.7664, "step": 60380 }, { "epoch": 2.01, "grad_norm": 0.5871499180793762, "learning_rate": 0.00014758969471124917, "loss": 1.7077, "step": 60381 }, { "epoch": 2.01, "grad_norm": 0.6103892922401428, "learning_rate": 0.00014758069201108932, "loss": 1.6793, "step": 60382 }, { "epoch": 2.01, "grad_norm": 0.5908783674240112, "learning_rate": 0.00014757168949594394, "loss": 1.6924, "step": 60383 }, { "epoch": 2.01, "grad_norm": 0.6175392866134644, "learning_rate": 0.0001475626871658241, "loss": 1.6878, "step": 60384 }, { "epoch": 2.01, "grad_norm": 0.6157854795455933, "learning_rate": 0.00014755368502074076, "loss": 1.712, "step": 60385 }, { "epoch": 2.01, "grad_norm": 0.6074619293212891, "learning_rate": 0.0001475446830607048, "loss": 1.6713, "step": 60386 }, { "epoch": 2.01, "grad_norm": 0.6270627379417419, "learning_rate": 0.00014753568128572701, "loss": 1.7961, "step": 60387 }, { "epoch": 2.01, "grad_norm": 0.5982743501663208, "learning_rate": 0.0001475266796958186, "loss": 1.7867, "step": 60388 }, { "epoch": 2.01, "grad_norm": 0.6013206839561462, "learning_rate": 0.00014751767829099027, "loss": 1.6676, "step": 60389 }, { "epoch": 2.01, "grad_norm": 0.608886182308197, "learning_rate": 0.00014750867707125296, "loss": 1.7014, "step": 60390 }, { "epoch": 2.01, "grad_norm": 0.615382969379425, "learning_rate": 0.00014749967603661774, "loss": 1.7198, "step": 60391 }, { "epoch": 2.01, "grad_norm": 0.6251940727233887, "learning_rate": 0.0001474906751870953, "loss": 1.6833, "step": 60392 }, { "epoch": 2.01, "grad_norm": 0.597141444683075, "learning_rate": 0.00014748167452269687, "loss": 1.7368, "step": 60393 }, { "epoch": 2.01, "grad_norm": 0.6053937077522278, "learning_rate": 0.00014747267404343305, "loss": 1.6973, "step": 60394 }, { "epoch": 2.01, "grad_norm": 0.6058710813522339, "learning_rate": 0.0001474636737493151, "loss": 1.7217, "step": 60395 }, { "epoch": 2.01, "grad_norm": 0.6071389317512512, "learning_rate": 0.00014745467364035372, "loss": 1.7066, "step": 60396 }, { "epoch": 2.01, "grad_norm": 0.6016531586647034, "learning_rate": 0.00014744567371655982, "loss": 1.6885, "step": 60397 }, { "epoch": 2.01, "grad_norm": 0.6152533888816833, "learning_rate": 0.00014743667397794447, "loss": 1.7019, "step": 60398 }, { "epoch": 2.01, "grad_norm": 0.6291232109069824, "learning_rate": 0.00014742767442451843, "loss": 1.7031, "step": 60399 }, { "epoch": 2.01, "grad_norm": 0.6233746409416199, "learning_rate": 0.0001474186750562928, "loss": 1.6389, "step": 60400 }, { "epoch": 2.01, "grad_norm": 0.603408932685852, "learning_rate": 0.00014740967587327844, "loss": 1.6582, "step": 60401 }, { "epoch": 2.01, "grad_norm": 0.614058792591095, "learning_rate": 0.00014740067687548614, "loss": 1.7168, "step": 60402 }, { "epoch": 2.01, "grad_norm": 0.6073086261749268, "learning_rate": 0.000147391678062927, "loss": 1.7791, "step": 60403 }, { "epoch": 2.01, "grad_norm": 0.6136860251426697, "learning_rate": 0.00014738267943561182, "loss": 1.6225, "step": 60404 }, { "epoch": 2.01, "grad_norm": 0.600477397441864, "learning_rate": 0.00014737368099355168, "loss": 1.628, "step": 60405 }, { "epoch": 2.01, "grad_norm": 0.6589958071708679, "learning_rate": 0.0001473646827367573, "loss": 1.6639, "step": 60406 }, { "epoch": 2.01, "grad_norm": 0.6405128240585327, "learning_rate": 0.00014735568466523981, "loss": 1.7392, "step": 60407 }, { "epoch": 2.01, "grad_norm": 0.6224762797355652, "learning_rate": 0.00014734668677901, "loss": 1.692, "step": 60408 }, { "epoch": 2.01, "grad_norm": 0.5966238379478455, "learning_rate": 0.00014733768907807877, "loss": 1.6389, "step": 60409 }, { "epoch": 2.01, "grad_norm": 0.6142339110374451, "learning_rate": 0.00014732869156245713, "loss": 1.7043, "step": 60410 }, { "epoch": 2.01, "grad_norm": 0.5943561792373657, "learning_rate": 0.000147319694232156, "loss": 1.7095, "step": 60411 }, { "epoch": 2.01, "grad_norm": 0.6146330237388611, "learning_rate": 0.00014731069708718615, "loss": 1.6962, "step": 60412 }, { "epoch": 2.01, "grad_norm": 0.6025080680847168, "learning_rate": 0.00014730170012755862, "loss": 1.664, "step": 60413 }, { "epoch": 2.01, "grad_norm": 0.6318098306655884, "learning_rate": 0.00014729270335328445, "loss": 1.7165, "step": 60414 }, { "epoch": 2.01, "grad_norm": 0.6331232190132141, "learning_rate": 0.0001472837067643744, "loss": 1.7148, "step": 60415 }, { "epoch": 2.01, "grad_norm": 0.5943846702575684, "learning_rate": 0.00014727471036083932, "loss": 1.7388, "step": 60416 }, { "epoch": 2.01, "grad_norm": 0.6204037666320801, "learning_rate": 0.0001472657141426904, "loss": 1.8112, "step": 60417 }, { "epoch": 2.01, "grad_norm": 0.6373065114021301, "learning_rate": 0.00014725671810993836, "loss": 1.6868, "step": 60418 }, { "epoch": 2.01, "grad_norm": 0.6500932574272156, "learning_rate": 0.00014724772226259406, "loss": 1.7638, "step": 60419 }, { "epoch": 2.01, "grad_norm": 0.6098122000694275, "learning_rate": 0.00014723872660066852, "loss": 1.7761, "step": 60420 }, { "epoch": 2.01, "grad_norm": 0.6608915328979492, "learning_rate": 0.00014722973112417276, "loss": 1.6765, "step": 60421 }, { "epoch": 2.01, "grad_norm": 0.6345483660697937, "learning_rate": 0.0001472207358331176, "loss": 1.7365, "step": 60422 }, { "epoch": 2.01, "grad_norm": 0.6482385993003845, "learning_rate": 0.00014721174072751381, "loss": 1.7039, "step": 60423 }, { "epoch": 2.01, "grad_norm": 0.6455329060554504, "learning_rate": 0.00014720274580737262, "loss": 1.7, "step": 60424 }, { "epoch": 2.01, "grad_norm": 0.6455720663070679, "learning_rate": 0.0001471937510727048, "loss": 1.818, "step": 60425 }, { "epoch": 2.01, "grad_norm": 0.6359795928001404, "learning_rate": 0.00014718475652352104, "loss": 1.7014, "step": 60426 }, { "epoch": 2.01, "grad_norm": 0.6034011840820312, "learning_rate": 0.00014717576215983255, "loss": 1.7613, "step": 60427 }, { "epoch": 2.01, "grad_norm": 0.6458808779716492, "learning_rate": 0.00014716676798165029, "loss": 1.7786, "step": 60428 }, { "epoch": 2.01, "grad_norm": 0.6106569766998291, "learning_rate": 0.00014715777398898502, "loss": 1.7477, "step": 60429 }, { "epoch": 2.01, "grad_norm": 0.6237738728523254, "learning_rate": 0.0001471487801818476, "loss": 1.6691, "step": 60430 }, { "epoch": 2.01, "grad_norm": 0.6117565631866455, "learning_rate": 0.0001471397865602491, "loss": 1.7506, "step": 60431 }, { "epoch": 2.01, "grad_norm": 0.6136085391044617, "learning_rate": 0.0001471307931242004, "loss": 1.6958, "step": 60432 }, { "epoch": 2.01, "grad_norm": 0.6104464530944824, "learning_rate": 0.0001471217998737123, "loss": 1.7608, "step": 60433 }, { "epoch": 2.01, "grad_norm": 0.5906792283058167, "learning_rate": 0.00014711280680879582, "loss": 1.6494, "step": 60434 }, { "epoch": 2.01, "grad_norm": 0.641895592212677, "learning_rate": 0.00014710381392946196, "loss": 1.6557, "step": 60435 }, { "epoch": 2.01, "grad_norm": 0.5998769402503967, "learning_rate": 0.00014709482123572153, "loss": 1.7409, "step": 60436 }, { "epoch": 2.01, "grad_norm": 0.6080667972564697, "learning_rate": 0.00014708582872758537, "loss": 1.7541, "step": 60437 }, { "epoch": 2.01, "grad_norm": 0.5990254282951355, "learning_rate": 0.0001470768364050646, "loss": 1.6917, "step": 60438 }, { "epoch": 2.01, "grad_norm": 0.6136438250541687, "learning_rate": 0.00014706784426816996, "loss": 1.6974, "step": 60439 }, { "epoch": 2.01, "grad_norm": 0.5931515097618103, "learning_rate": 0.00014705885231691237, "loss": 1.7148, "step": 60440 }, { "epoch": 2.01, "grad_norm": 0.6112851500511169, "learning_rate": 0.00014704986055130278, "loss": 1.7142, "step": 60441 }, { "epoch": 2.01, "grad_norm": 0.6243736743927002, "learning_rate": 0.00014704086897135223, "loss": 1.7389, "step": 60442 }, { "epoch": 2.01, "grad_norm": 0.6012459993362427, "learning_rate": 0.00014703187757707153, "loss": 1.7287, "step": 60443 }, { "epoch": 2.01, "grad_norm": 0.5929033160209656, "learning_rate": 0.00014702288636847147, "loss": 1.7694, "step": 60444 }, { "epoch": 2.01, "grad_norm": 0.6000803709030151, "learning_rate": 0.00014701389534556322, "loss": 1.7081, "step": 60445 }, { "epoch": 2.01, "grad_norm": 0.6254243850708008, "learning_rate": 0.00014700490450835757, "loss": 1.7342, "step": 60446 }, { "epoch": 2.01, "grad_norm": 0.6093995571136475, "learning_rate": 0.0001469959138568653, "loss": 1.7014, "step": 60447 }, { "epoch": 2.01, "grad_norm": 0.6050164103507996, "learning_rate": 0.00014698692339109754, "loss": 1.7249, "step": 60448 }, { "epoch": 2.01, "grad_norm": 0.5936728119850159, "learning_rate": 0.00014697793311106502, "loss": 1.6998, "step": 60449 }, { "epoch": 2.01, "grad_norm": 0.6127805113792419, "learning_rate": 0.00014696894301677885, "loss": 1.6878, "step": 60450 }, { "epoch": 2.01, "grad_norm": 0.6265658736228943, "learning_rate": 0.0001469599531082497, "loss": 1.7191, "step": 60451 }, { "epoch": 2.01, "grad_norm": 0.6205087304115295, "learning_rate": 0.00014695096338548873, "loss": 1.7612, "step": 60452 }, { "epoch": 2.01, "grad_norm": 0.6116853356361389, "learning_rate": 0.00014694197384850676, "loss": 1.714, "step": 60453 }, { "epoch": 2.01, "grad_norm": 0.6076205372810364, "learning_rate": 0.00014693298449731457, "loss": 1.7242, "step": 60454 }, { "epoch": 2.01, "grad_norm": 0.6312188506126404, "learning_rate": 0.0001469239953319233, "loss": 1.7022, "step": 60455 }, { "epoch": 2.01, "grad_norm": 0.6309304237365723, "learning_rate": 0.0001469150063523436, "loss": 1.647, "step": 60456 }, { "epoch": 2.01, "grad_norm": 0.6110554933547974, "learning_rate": 0.0001469060175585867, "loss": 1.6356, "step": 60457 }, { "epoch": 2.01, "grad_norm": 0.6288540363311768, "learning_rate": 0.00014689702895066326, "loss": 1.6595, "step": 60458 }, { "epoch": 2.01, "grad_norm": 0.6097049117088318, "learning_rate": 0.0001468880405285842, "loss": 1.7124, "step": 60459 }, { "epoch": 2.01, "grad_norm": 0.6206814050674438, "learning_rate": 0.00014687905229236064, "loss": 1.6846, "step": 60460 }, { "epoch": 2.01, "grad_norm": 0.6047213077545166, "learning_rate": 0.0001468700642420032, "loss": 1.7386, "step": 60461 }, { "epoch": 2.01, "grad_norm": 0.640042781829834, "learning_rate": 0.00014686107637752304, "loss": 1.7411, "step": 60462 }, { "epoch": 2.01, "grad_norm": 0.6096276640892029, "learning_rate": 0.00014685208869893087, "loss": 1.61, "step": 60463 }, { "epoch": 2.01, "grad_norm": 0.6135927438735962, "learning_rate": 0.00014684310120623782, "loss": 1.683, "step": 60464 }, { "epoch": 2.01, "grad_norm": 0.6109227538108826, "learning_rate": 0.00014683411389945466, "loss": 1.7041, "step": 60465 }, { "epoch": 2.01, "grad_norm": 0.581023633480072, "learning_rate": 0.00014682512677859224, "loss": 1.6089, "step": 60466 }, { "epoch": 2.01, "grad_norm": 0.6206515431404114, "learning_rate": 0.00014681613984366163, "loss": 1.7426, "step": 60467 }, { "epoch": 2.01, "grad_norm": 0.62091463804245, "learning_rate": 0.00014680715309467365, "loss": 1.8411, "step": 60468 }, { "epoch": 2.01, "grad_norm": 0.6121993064880371, "learning_rate": 0.0001467981665316391, "loss": 1.7431, "step": 60469 }, { "epoch": 2.01, "grad_norm": 0.591030478477478, "learning_rate": 0.00014678918015456903, "loss": 1.7327, "step": 60470 }, { "epoch": 2.01, "grad_norm": 0.6088941097259521, "learning_rate": 0.00014678019396347443, "loss": 1.7434, "step": 60471 }, { "epoch": 2.01, "grad_norm": 0.6226982474327087, "learning_rate": 0.00014677120795836607, "loss": 1.6385, "step": 60472 }, { "epoch": 2.01, "grad_norm": 0.6407784223556519, "learning_rate": 0.00014676222213925481, "loss": 1.6908, "step": 60473 }, { "epoch": 2.01, "grad_norm": 0.6189733147621155, "learning_rate": 0.00014675323650615172, "loss": 1.7346, "step": 60474 }, { "epoch": 2.01, "grad_norm": 0.5933812260627747, "learning_rate": 0.00014674425105906764, "loss": 1.652, "step": 60475 }, { "epoch": 2.01, "grad_norm": 0.6292269825935364, "learning_rate": 0.00014673526579801332, "loss": 1.6758, "step": 60476 }, { "epoch": 2.01, "grad_norm": 0.6272045969963074, "learning_rate": 0.00014672628072299982, "loss": 1.7252, "step": 60477 }, { "epoch": 2.01, "grad_norm": 0.6135754585266113, "learning_rate": 0.00014671729583403827, "loss": 1.7544, "step": 60478 }, { "epoch": 2.01, "grad_norm": 0.6254025101661682, "learning_rate": 0.0001467083111311391, "loss": 1.6708, "step": 60479 }, { "epoch": 2.01, "grad_norm": 0.623251736164093, "learning_rate": 0.00014669932661431345, "loss": 1.6129, "step": 60480 }, { "epoch": 2.01, "grad_norm": 0.6146725416183472, "learning_rate": 0.00014669034228357233, "loss": 1.6251, "step": 60481 }, { "epoch": 2.01, "grad_norm": 0.5992367267608643, "learning_rate": 0.00014668135813892657, "loss": 1.6938, "step": 60482 }, { "epoch": 2.01, "grad_norm": 0.5795066952705383, "learning_rate": 0.0001466723741803869, "loss": 1.7039, "step": 60483 }, { "epoch": 2.01, "grad_norm": 0.6415151357650757, "learning_rate": 0.00014666339040796442, "loss": 1.7599, "step": 60484 }, { "epoch": 2.01, "grad_norm": 0.6334680318832397, "learning_rate": 0.0001466544068216702, "loss": 1.7148, "step": 60485 }, { "epoch": 2.01, "grad_norm": 0.6000737547874451, "learning_rate": 0.00014664542342151472, "loss": 1.7606, "step": 60486 }, { "epoch": 2.01, "grad_norm": 0.6075112223625183, "learning_rate": 0.00014663644020750907, "loss": 1.7296, "step": 60487 }, { "epoch": 2.01, "grad_norm": 0.626249372959137, "learning_rate": 0.00014662745717966433, "loss": 1.7022, "step": 60488 }, { "epoch": 2.01, "grad_norm": 0.6255540251731873, "learning_rate": 0.00014661847433799127, "loss": 1.7342, "step": 60489 }, { "epoch": 2.01, "grad_norm": 0.6124805212020874, "learning_rate": 0.00014660949168250064, "loss": 1.7743, "step": 60490 }, { "epoch": 2.01, "grad_norm": 0.6014910936355591, "learning_rate": 0.0001466005092132035, "loss": 1.6952, "step": 60491 }, { "epoch": 2.01, "grad_norm": 0.6428966522216797, "learning_rate": 0.00014659152693011096, "loss": 1.703, "step": 60492 }, { "epoch": 2.01, "grad_norm": 0.6302376985549927, "learning_rate": 0.00014658254483323343, "loss": 1.6994, "step": 60493 }, { "epoch": 2.01, "grad_norm": 0.6055392622947693, "learning_rate": 0.00014657356292258216, "loss": 1.7345, "step": 60494 }, { "epoch": 2.01, "grad_norm": 0.614715576171875, "learning_rate": 0.00014656458119816806, "loss": 1.6103, "step": 60495 }, { "epoch": 2.01, "grad_norm": 0.6107013821601868, "learning_rate": 0.00014655559966000195, "loss": 1.6353, "step": 60496 }, { "epoch": 2.01, "grad_norm": 0.6283382773399353, "learning_rate": 0.00014654661830809464, "loss": 1.748, "step": 60497 }, { "epoch": 2.01, "grad_norm": 0.5969927310943604, "learning_rate": 0.0001465376371424572, "loss": 1.7336, "step": 60498 }, { "epoch": 2.01, "grad_norm": 0.6105113625526428, "learning_rate": 0.0001465286561631005, "loss": 1.6551, "step": 60499 }, { "epoch": 2.01, "grad_norm": 0.5993727445602417, "learning_rate": 0.00014651967537003522, "loss": 1.6946, "step": 60500 }, { "epoch": 2.01, "grad_norm": 0.623948872089386, "learning_rate": 0.00014651069476327253, "loss": 1.7096, "step": 60501 }, { "epoch": 2.01, "grad_norm": 0.6173028945922852, "learning_rate": 0.00014650171434282328, "loss": 1.6741, "step": 60502 }, { "epoch": 2.01, "grad_norm": 0.6047273874282837, "learning_rate": 0.00014649273410869836, "loss": 1.7418, "step": 60503 }, { "epoch": 2.01, "grad_norm": 0.6312038898468018, "learning_rate": 0.00014648375406090854, "loss": 1.7081, "step": 60504 }, { "epoch": 2.01, "grad_norm": 0.6200631260871887, "learning_rate": 0.0001464747741994649, "loss": 1.6834, "step": 60505 }, { "epoch": 2.01, "grad_norm": 0.61164790391922, "learning_rate": 0.0001464657945243783, "loss": 1.6956, "step": 60506 }, { "epoch": 2.01, "grad_norm": 0.6091893315315247, "learning_rate": 0.00014645681503565948, "loss": 1.6967, "step": 60507 }, { "epoch": 2.01, "grad_norm": 0.6442001461982727, "learning_rate": 0.0001464478357333196, "loss": 1.7549, "step": 60508 }, { "epoch": 2.01, "grad_norm": 0.5814266800880432, "learning_rate": 0.00014643885661736928, "loss": 1.6672, "step": 60509 }, { "epoch": 2.01, "grad_norm": 0.5906035304069519, "learning_rate": 0.00014642987768781967, "loss": 1.6475, "step": 60510 }, { "epoch": 2.01, "grad_norm": 0.5932087898254395, "learning_rate": 0.00014642089894468144, "loss": 1.7204, "step": 60511 }, { "epoch": 2.01, "grad_norm": 0.6038262844085693, "learning_rate": 0.00014641192038796573, "loss": 1.6942, "step": 60512 }, { "epoch": 2.01, "grad_norm": 0.5878545045852661, "learning_rate": 0.00014640294201768333, "loss": 1.6777, "step": 60513 }, { "epoch": 2.01, "grad_norm": 0.6104402542114258, "learning_rate": 0.00014639396383384502, "loss": 1.7026, "step": 60514 }, { "epoch": 2.01, "grad_norm": 0.6197922825813293, "learning_rate": 0.00014638498583646192, "loss": 1.7348, "step": 60515 }, { "epoch": 2.01, "grad_norm": 0.6396465301513672, "learning_rate": 0.0001463760080255447, "loss": 1.7138, "step": 60516 }, { "epoch": 2.01, "grad_norm": 0.6125813126564026, "learning_rate": 0.00014636703040110443, "loss": 1.7223, "step": 60517 }, { "epoch": 2.01, "grad_norm": 0.6477968692779541, "learning_rate": 0.000146358052963152, "loss": 1.7259, "step": 60518 }, { "epoch": 2.01, "grad_norm": 0.6053777933120728, "learning_rate": 0.00014634907571169816, "loss": 1.7013, "step": 60519 }, { "epoch": 2.01, "grad_norm": 0.6122144460678101, "learning_rate": 0.000146340098646754, "loss": 1.6783, "step": 60520 }, { "epoch": 2.01, "grad_norm": 0.5889448523521423, "learning_rate": 0.00014633112176833018, "loss": 1.7018, "step": 60521 }, { "epoch": 2.01, "grad_norm": 0.6245903968811035, "learning_rate": 0.00014632214507643787, "loss": 1.7303, "step": 60522 }, { "epoch": 2.01, "grad_norm": 0.6070051193237305, "learning_rate": 0.0001463131685710877, "loss": 1.6857, "step": 60523 }, { "epoch": 2.01, "grad_norm": 0.608675479888916, "learning_rate": 0.00014630419225229084, "loss": 1.7509, "step": 60524 }, { "epoch": 2.01, "grad_norm": 0.6267934441566467, "learning_rate": 0.000146295216120058, "loss": 1.6853, "step": 60525 }, { "epoch": 2.01, "grad_norm": 0.5988243222236633, "learning_rate": 0.00014628624017440006, "loss": 1.6862, "step": 60526 }, { "epoch": 2.01, "grad_norm": 0.6287843585014343, "learning_rate": 0.0001462772644153281, "loss": 1.7135, "step": 60527 }, { "epoch": 2.01, "grad_norm": 0.6193876266479492, "learning_rate": 0.0001462682888428528, "loss": 1.748, "step": 60528 }, { "epoch": 2.01, "grad_norm": 0.63154536485672, "learning_rate": 0.00014625931345698512, "loss": 1.7193, "step": 60529 }, { "epoch": 2.01, "grad_norm": 0.6365899443626404, "learning_rate": 0.00014625033825773595, "loss": 1.6973, "step": 60530 }, { "epoch": 2.01, "grad_norm": 0.6278895139694214, "learning_rate": 0.00014624136324511633, "loss": 1.7011, "step": 60531 }, { "epoch": 2.01, "grad_norm": 0.6281079053878784, "learning_rate": 0.00014623238841913704, "loss": 1.6925, "step": 60532 }, { "epoch": 2.01, "grad_norm": 0.6181545853614807, "learning_rate": 0.00014622341377980888, "loss": 1.7012, "step": 60533 }, { "epoch": 2.01, "grad_norm": 0.6327733993530273, "learning_rate": 0.00014621443932714292, "loss": 1.7547, "step": 60534 }, { "epoch": 2.01, "grad_norm": 0.6275696754455566, "learning_rate": 0.00014620546506115, "loss": 1.694, "step": 60535 }, { "epoch": 2.01, "grad_norm": 0.6074400544166565, "learning_rate": 0.00014619649098184082, "loss": 1.6513, "step": 60536 }, { "epoch": 2.01, "grad_norm": 0.6116673946380615, "learning_rate": 0.0001461875170892265, "loss": 1.774, "step": 60537 }, { "epoch": 2.01, "grad_norm": 0.6372818350791931, "learning_rate": 0.00014617854338331792, "loss": 1.6932, "step": 60538 }, { "epoch": 2.01, "grad_norm": 0.6659752130508423, "learning_rate": 0.000146169569864126, "loss": 1.7587, "step": 60539 }, { "epoch": 2.01, "grad_norm": 0.6378899216651917, "learning_rate": 0.00014616059653166139, "loss": 1.6364, "step": 60540 }, { "epoch": 2.01, "grad_norm": 0.6107573509216309, "learning_rate": 0.00014615162338593528, "loss": 1.7027, "step": 60541 }, { "epoch": 2.01, "grad_norm": 0.6054028272628784, "learning_rate": 0.00014614265042695843, "loss": 1.6804, "step": 60542 }, { "epoch": 2.01, "grad_norm": 0.6077181696891785, "learning_rate": 0.00014613367765474163, "loss": 1.6963, "step": 60543 }, { "epoch": 2.01, "grad_norm": 0.6199588775634766, "learning_rate": 0.00014612470506929586, "loss": 1.6515, "step": 60544 }, { "epoch": 2.01, "grad_norm": 0.6329836845397949, "learning_rate": 0.0001461157326706321, "loss": 1.6122, "step": 60545 }, { "epoch": 2.01, "grad_norm": 0.6037861704826355, "learning_rate": 0.00014610676045876122, "loss": 1.7111, "step": 60546 }, { "epoch": 2.01, "grad_norm": 0.6215133666992188, "learning_rate": 0.00014609778843369394, "loss": 1.7054, "step": 60547 }, { "epoch": 2.01, "grad_norm": 0.6134605407714844, "learning_rate": 0.00014608881659544136, "loss": 1.6702, "step": 60548 }, { "epoch": 2.01, "grad_norm": 0.6631802916526794, "learning_rate": 0.0001460798449440143, "loss": 1.6517, "step": 60549 }, { "epoch": 2.01, "grad_norm": 0.6175206303596497, "learning_rate": 0.00014607087347942352, "loss": 1.6682, "step": 60550 }, { "epoch": 2.01, "grad_norm": 0.6275662779808044, "learning_rate": 0.00014606190220168, "loss": 1.6988, "step": 60551 }, { "epoch": 2.01, "grad_norm": 0.5947750210762024, "learning_rate": 0.00014605293111079478, "loss": 1.6508, "step": 60552 }, { "epoch": 2.01, "grad_norm": 0.6108496189117432, "learning_rate": 0.0001460439602067786, "loss": 1.7657, "step": 60553 }, { "epoch": 2.01, "grad_norm": 0.6336315274238586, "learning_rate": 0.00014603498948964223, "loss": 1.7235, "step": 60554 }, { "epoch": 2.01, "grad_norm": 0.6063607335090637, "learning_rate": 0.00014602601895939685, "loss": 1.6614, "step": 60555 }, { "epoch": 2.01, "grad_norm": 0.6226791739463806, "learning_rate": 0.00014601704861605318, "loss": 1.6848, "step": 60556 }, { "epoch": 2.01, "grad_norm": 0.6049418449401855, "learning_rate": 0.00014600807845962203, "loss": 1.6745, "step": 60557 }, { "epoch": 2.01, "grad_norm": 0.6129810214042664, "learning_rate": 0.00014599910849011453, "loss": 1.6613, "step": 60558 }, { "epoch": 2.01, "grad_norm": 0.6519216895103455, "learning_rate": 0.00014599013870754125, "loss": 1.6948, "step": 60559 }, { "epoch": 2.01, "grad_norm": 0.6082926988601685, "learning_rate": 0.00014598116911191338, "loss": 1.7417, "step": 60560 }, { "epoch": 2.01, "grad_norm": 0.6328594088554382, "learning_rate": 0.00014597219970324156, "loss": 1.691, "step": 60561 }, { "epoch": 2.01, "grad_norm": 0.6228665113449097, "learning_rate": 0.00014596323048153686, "loss": 1.6955, "step": 60562 }, { "epoch": 2.01, "grad_norm": 0.6661983728408813, "learning_rate": 0.0001459542614468102, "loss": 1.7774, "step": 60563 }, { "epoch": 2.01, "grad_norm": 0.6053240299224854, "learning_rate": 0.00014594529259907217, "loss": 1.7258, "step": 60564 }, { "epoch": 2.02, "grad_norm": 0.6044825315475464, "learning_rate": 0.00014593632393833402, "loss": 1.6447, "step": 60565 }, { "epoch": 2.02, "grad_norm": 0.6128706932067871, "learning_rate": 0.00014592735546460633, "loss": 1.6872, "step": 60566 }, { "epoch": 2.02, "grad_norm": 0.5948845744132996, "learning_rate": 0.00014591838717790026, "loss": 1.7079, "step": 60567 }, { "epoch": 2.02, "grad_norm": 0.6261129379272461, "learning_rate": 0.00014590941907822644, "loss": 1.7216, "step": 60568 }, { "epoch": 2.02, "grad_norm": 0.6058521270751953, "learning_rate": 0.00014590045116559603, "loss": 1.668, "step": 60569 }, { "epoch": 2.02, "grad_norm": 0.6152607798576355, "learning_rate": 0.00014589148344001972, "loss": 1.7206, "step": 60570 }, { "epoch": 2.02, "grad_norm": 0.6261875629425049, "learning_rate": 0.00014588251590150837, "loss": 1.7197, "step": 60571 }, { "epoch": 2.02, "grad_norm": 0.6330010890960693, "learning_rate": 0.00014587354855007302, "loss": 1.7672, "step": 60572 }, { "epoch": 2.02, "grad_norm": 0.6223087906837463, "learning_rate": 0.00014586458138572436, "loss": 1.7753, "step": 60573 }, { "epoch": 2.02, "grad_norm": 0.6240429282188416, "learning_rate": 0.0001458556144084735, "loss": 1.6761, "step": 60574 }, { "epoch": 2.02, "grad_norm": 0.6091040968894958, "learning_rate": 0.00014584664761833124, "loss": 1.6785, "step": 60575 }, { "epoch": 2.02, "grad_norm": 0.6074760556221008, "learning_rate": 0.0001458376810153083, "loss": 1.6081, "step": 60576 }, { "epoch": 2.02, "grad_norm": 0.5876896381378174, "learning_rate": 0.00014582871459941583, "loss": 1.7259, "step": 60577 }, { "epoch": 2.02, "grad_norm": 0.6049466729164124, "learning_rate": 0.00014581974837066445, "loss": 1.6986, "step": 60578 }, { "epoch": 2.02, "grad_norm": 0.6311745047569275, "learning_rate": 0.00014581078232906531, "loss": 1.7597, "step": 60579 }, { "epoch": 2.02, "grad_norm": 0.6463485956192017, "learning_rate": 0.00014580181647462902, "loss": 1.7199, "step": 60580 }, { "epoch": 2.02, "grad_norm": 0.6305893659591675, "learning_rate": 0.00014579285080736675, "loss": 1.8058, "step": 60581 }, { "epoch": 2.02, "grad_norm": 0.6332371234893799, "learning_rate": 0.0001457838853272892, "loss": 1.7911, "step": 60582 }, { "epoch": 2.02, "grad_norm": 0.6517937183380127, "learning_rate": 0.00014577492003440724, "loss": 1.6379, "step": 60583 }, { "epoch": 2.02, "grad_norm": 0.6124114394187927, "learning_rate": 0.00014576595492873188, "loss": 1.7514, "step": 60584 }, { "epoch": 2.02, "grad_norm": 0.6232169270515442, "learning_rate": 0.00014575699001027393, "loss": 1.7223, "step": 60585 }, { "epoch": 2.02, "grad_norm": 0.630304217338562, "learning_rate": 0.00014574802527904415, "loss": 1.7507, "step": 60586 }, { "epoch": 2.02, "grad_norm": 0.6220560669898987, "learning_rate": 0.0001457390607350535, "loss": 1.712, "step": 60587 }, { "epoch": 2.02, "grad_norm": 0.6111673712730408, "learning_rate": 0.00014573009637831305, "loss": 1.673, "step": 60588 }, { "epoch": 2.02, "grad_norm": 0.6130720376968384, "learning_rate": 0.00014572113220883356, "loss": 1.7535, "step": 60589 }, { "epoch": 2.02, "grad_norm": 0.6280719637870789, "learning_rate": 0.00014571216822662573, "loss": 1.7118, "step": 60590 }, { "epoch": 2.02, "grad_norm": 0.6042279005050659, "learning_rate": 0.00014570320443170072, "loss": 1.7327, "step": 60591 }, { "epoch": 2.02, "grad_norm": 0.6152588129043579, "learning_rate": 0.0001456942408240693, "loss": 1.7425, "step": 60592 }, { "epoch": 2.02, "grad_norm": 0.6226147413253784, "learning_rate": 0.0001456852774037422, "loss": 1.7272, "step": 60593 }, { "epoch": 2.02, "grad_norm": 0.5991748571395874, "learning_rate": 0.00014567631417073047, "loss": 1.6985, "step": 60594 }, { "epoch": 2.02, "grad_norm": 0.6147009134292603, "learning_rate": 0.00014566735112504515, "loss": 1.7774, "step": 60595 }, { "epoch": 2.02, "grad_norm": 0.6093457341194153, "learning_rate": 0.00014565838826669671, "loss": 1.7851, "step": 60596 }, { "epoch": 2.02, "grad_norm": 0.6072591543197632, "learning_rate": 0.00014564942559569625, "loss": 1.7558, "step": 60597 }, { "epoch": 2.02, "grad_norm": 0.6600787043571472, "learning_rate": 0.00014564046311205475, "loss": 1.7498, "step": 60598 }, { "epoch": 2.02, "grad_norm": 0.6055343747138977, "learning_rate": 0.00014563150081578298, "loss": 1.7189, "step": 60599 }, { "epoch": 2.02, "grad_norm": 0.6028220653533936, "learning_rate": 0.0001456225387068917, "loss": 1.7051, "step": 60600 }, { "epoch": 2.02, "grad_norm": 0.6056771278381348, "learning_rate": 0.00014561357678539194, "loss": 1.8059, "step": 60601 }, { "epoch": 2.02, "grad_norm": 0.6101837158203125, "learning_rate": 0.00014560461505129475, "loss": 1.7096, "step": 60602 }, { "epoch": 2.02, "grad_norm": 0.6343199610710144, "learning_rate": 0.0001455956535046106, "loss": 1.7479, "step": 60603 }, { "epoch": 2.02, "grad_norm": 0.6241602301597595, "learning_rate": 0.0001455866921453506, "loss": 1.704, "step": 60604 }, { "epoch": 2.02, "grad_norm": 0.6131064891815186, "learning_rate": 0.00014557773097352567, "loss": 1.7074, "step": 60605 }, { "epoch": 2.02, "grad_norm": 0.6158153414726257, "learning_rate": 0.00014556876998914663, "loss": 1.7365, "step": 60606 }, { "epoch": 2.02, "grad_norm": 0.621156632900238, "learning_rate": 0.00014555980919222423, "loss": 1.7414, "step": 60607 }, { "epoch": 2.02, "grad_norm": 0.620746910572052, "learning_rate": 0.0001455508485827695, "loss": 1.7605, "step": 60608 }, { "epoch": 2.02, "grad_norm": 0.6163447499275208, "learning_rate": 0.0001455418881607935, "loss": 1.7107, "step": 60609 }, { "epoch": 2.02, "grad_norm": 0.594828724861145, "learning_rate": 0.00014553292792630665, "loss": 1.6789, "step": 60610 }, { "epoch": 2.02, "grad_norm": 0.6237765550613403, "learning_rate": 0.00014552396787932006, "loss": 1.7063, "step": 60611 }, { "epoch": 2.02, "grad_norm": 0.6061657667160034, "learning_rate": 0.00014551500801984475, "loss": 1.7748, "step": 60612 }, { "epoch": 2.02, "grad_norm": 0.6067057251930237, "learning_rate": 0.00014550604834789144, "loss": 1.619, "step": 60613 }, { "epoch": 2.02, "grad_norm": 0.5969440937042236, "learning_rate": 0.0001454970888634709, "loss": 1.7361, "step": 60614 }, { "epoch": 2.02, "grad_norm": 0.603704035282135, "learning_rate": 0.00014548812956659426, "loss": 1.6214, "step": 60615 }, { "epoch": 2.02, "grad_norm": 0.6137762069702148, "learning_rate": 0.00014547917045727227, "loss": 1.7327, "step": 60616 }, { "epoch": 2.02, "grad_norm": 0.6056119203567505, "learning_rate": 0.00014547021153551567, "loss": 1.6693, "step": 60617 }, { "epoch": 2.02, "grad_norm": 0.6108656525611877, "learning_rate": 0.00014546125280133547, "loss": 1.7358, "step": 60618 }, { "epoch": 2.02, "grad_norm": 0.6217585206031799, "learning_rate": 0.00014545229425474263, "loss": 1.6991, "step": 60619 }, { "epoch": 2.02, "grad_norm": 0.6302139759063721, "learning_rate": 0.000145443335895748, "loss": 1.7126, "step": 60620 }, { "epoch": 2.02, "grad_norm": 0.6557661294937134, "learning_rate": 0.0001454343777243622, "loss": 1.7791, "step": 60621 }, { "epoch": 2.02, "grad_norm": 0.6036756634712219, "learning_rate": 0.00014542541974059644, "loss": 1.7033, "step": 60622 }, { "epoch": 2.02, "grad_norm": 0.6405375003814697, "learning_rate": 0.00014541646194446146, "loss": 1.6943, "step": 60623 }, { "epoch": 2.02, "grad_norm": 0.6281511187553406, "learning_rate": 0.00014540750433596797, "loss": 1.6675, "step": 60624 }, { "epoch": 2.02, "grad_norm": 0.6073472499847412, "learning_rate": 0.0001453985469151271, "loss": 1.6664, "step": 60625 }, { "epoch": 2.02, "grad_norm": 0.6381626129150391, "learning_rate": 0.00014538958968194954, "loss": 1.7728, "step": 60626 }, { "epoch": 2.02, "grad_norm": 0.6120712757110596, "learning_rate": 0.00014538063263644632, "loss": 1.7063, "step": 60627 }, { "epoch": 2.02, "grad_norm": 0.6294214725494385, "learning_rate": 0.00014537167577862814, "loss": 1.7199, "step": 60628 }, { "epoch": 2.02, "grad_norm": 0.6116907596588135, "learning_rate": 0.00014536271910850604, "loss": 1.7165, "step": 60629 }, { "epoch": 2.02, "grad_norm": 0.6249227523803711, "learning_rate": 0.00014535376262609086, "loss": 1.7271, "step": 60630 }, { "epoch": 2.02, "grad_norm": 0.5977962613105774, "learning_rate": 0.00014534480633139328, "loss": 1.7528, "step": 60631 }, { "epoch": 2.02, "grad_norm": 0.6110115051269531, "learning_rate": 0.00014533585022442446, "loss": 1.6917, "step": 60632 }, { "epoch": 2.02, "grad_norm": 0.6196854710578918, "learning_rate": 0.00014532689430519499, "loss": 1.7199, "step": 60633 }, { "epoch": 2.02, "grad_norm": 0.6171793937683105, "learning_rate": 0.00014531793857371599, "loss": 1.7452, "step": 60634 }, { "epoch": 2.02, "grad_norm": 0.6303761601448059, "learning_rate": 0.00014530898302999822, "loss": 1.7301, "step": 60635 }, { "epoch": 2.02, "grad_norm": 0.6262273192405701, "learning_rate": 0.00014530002767405248, "loss": 1.7599, "step": 60636 }, { "epoch": 2.02, "grad_norm": 0.6311631798744202, "learning_rate": 0.0001452910725058898, "loss": 1.6389, "step": 60637 }, { "epoch": 2.02, "grad_norm": 0.61784428358078, "learning_rate": 0.00014528211752552083, "loss": 1.6975, "step": 60638 }, { "epoch": 2.02, "grad_norm": 0.5945881605148315, "learning_rate": 0.0001452731627329567, "loss": 1.6822, "step": 60639 }, { "epoch": 2.02, "grad_norm": 0.6312041878700256, "learning_rate": 0.00014526420812820804, "loss": 1.7617, "step": 60640 }, { "epoch": 2.02, "grad_norm": 0.6281608939170837, "learning_rate": 0.00014525525371128595, "loss": 1.69, "step": 60641 }, { "epoch": 2.02, "grad_norm": 0.6170132756233215, "learning_rate": 0.0001452462994822012, "loss": 1.7302, "step": 60642 }, { "epoch": 2.02, "grad_norm": 0.6321436166763306, "learning_rate": 0.00014523734544096448, "loss": 1.7641, "step": 60643 }, { "epoch": 2.02, "grad_norm": 0.5989545583724976, "learning_rate": 0.00014522839158758697, "loss": 1.684, "step": 60644 }, { "epoch": 2.02, "grad_norm": 0.6168293356895447, "learning_rate": 0.00014521943792207938, "loss": 1.7258, "step": 60645 }, { "epoch": 2.02, "grad_norm": 0.6058930158615112, "learning_rate": 0.00014521048444445246, "loss": 1.7572, "step": 60646 }, { "epoch": 2.02, "grad_norm": 0.5981738567352295, "learning_rate": 0.00014520153115471724, "loss": 1.6371, "step": 60647 }, { "epoch": 2.02, "grad_norm": 0.6347239017486572, "learning_rate": 0.00014519257805288463, "loss": 1.7123, "step": 60648 }, { "epoch": 2.02, "grad_norm": 0.6256354451179504, "learning_rate": 0.00014518362513896542, "loss": 1.7427, "step": 60649 }, { "epoch": 2.02, "grad_norm": 0.5988695025444031, "learning_rate": 0.00014517467241297039, "loss": 1.7119, "step": 60650 }, { "epoch": 2.02, "grad_norm": 0.6206066012382507, "learning_rate": 0.00014516571987491057, "loss": 1.7223, "step": 60651 }, { "epoch": 2.02, "grad_norm": 0.6268531084060669, "learning_rate": 0.00014515676752479677, "loss": 1.7117, "step": 60652 }, { "epoch": 2.02, "grad_norm": 0.6130306124687195, "learning_rate": 0.00014514781536263975, "loss": 1.6213, "step": 60653 }, { "epoch": 2.02, "grad_norm": 0.6166168451309204, "learning_rate": 0.00014513886338845045, "loss": 1.7609, "step": 60654 }, { "epoch": 2.02, "grad_norm": 0.5957102179527283, "learning_rate": 0.00014512991160223985, "loss": 1.7317, "step": 60655 }, { "epoch": 2.02, "grad_norm": 0.617323100566864, "learning_rate": 0.00014512096000401874, "loss": 1.7689, "step": 60656 }, { "epoch": 2.02, "grad_norm": 0.6122122406959534, "learning_rate": 0.00014511200859379785, "loss": 1.7093, "step": 60657 }, { "epoch": 2.02, "grad_norm": 0.6034095883369446, "learning_rate": 0.00014510305737158827, "loss": 1.7005, "step": 60658 }, { "epoch": 2.02, "grad_norm": 0.5981813669204712, "learning_rate": 0.00014509410633740073, "loss": 1.6524, "step": 60659 }, { "epoch": 2.02, "grad_norm": 0.5975528359413147, "learning_rate": 0.00014508515549124603, "loss": 1.6925, "step": 60660 }, { "epoch": 2.02, "grad_norm": 0.5999813079833984, "learning_rate": 0.00014507620483313513, "loss": 1.6745, "step": 60661 }, { "epoch": 2.02, "grad_norm": 0.5989865660667419, "learning_rate": 0.00014506725436307902, "loss": 1.6455, "step": 60662 }, { "epoch": 2.02, "grad_norm": 0.6137352585792542, "learning_rate": 0.0001450583040810884, "loss": 1.6314, "step": 60663 }, { "epoch": 2.02, "grad_norm": 0.6031184196472168, "learning_rate": 0.00014504935398717406, "loss": 1.7537, "step": 60664 }, { "epoch": 2.02, "grad_norm": 0.6198256015777588, "learning_rate": 0.00014504040408134708, "loss": 1.7051, "step": 60665 }, { "epoch": 2.02, "grad_norm": 0.6270689964294434, "learning_rate": 0.00014503145436361824, "loss": 1.7293, "step": 60666 }, { "epoch": 2.02, "grad_norm": 0.6145403981208801, "learning_rate": 0.00014502250483399825, "loss": 1.6931, "step": 60667 }, { "epoch": 2.02, "grad_norm": 0.604289710521698, "learning_rate": 0.00014501355549249813, "loss": 1.7612, "step": 60668 }, { "epoch": 2.02, "grad_norm": 0.6264117956161499, "learning_rate": 0.0001450046063391288, "loss": 1.7266, "step": 60669 }, { "epoch": 2.02, "grad_norm": 0.6417695879936218, "learning_rate": 0.00014499565737390109, "loss": 1.7268, "step": 60670 }, { "epoch": 2.02, "grad_norm": 0.610005259513855, "learning_rate": 0.00014498670859682566, "loss": 1.7375, "step": 60671 }, { "epoch": 2.02, "grad_norm": 0.6407153606414795, "learning_rate": 0.00014497776000791364, "loss": 1.7499, "step": 60672 }, { "epoch": 2.02, "grad_norm": 0.6182010173797607, "learning_rate": 0.00014496881160717576, "loss": 1.699, "step": 60673 }, { "epoch": 2.02, "grad_norm": 0.6295932531356812, "learning_rate": 0.00014495986339462284, "loss": 1.6977, "step": 60674 }, { "epoch": 2.02, "grad_norm": 0.6127429008483887, "learning_rate": 0.00014495091537026586, "loss": 1.6539, "step": 60675 }, { "epoch": 2.02, "grad_norm": 0.6180431246757507, "learning_rate": 0.00014494196753411554, "loss": 1.7137, "step": 60676 }, { "epoch": 2.02, "grad_norm": 0.5954732298851013, "learning_rate": 0.00014493301988618293, "loss": 1.6779, "step": 60677 }, { "epoch": 2.02, "grad_norm": 0.6278916597366333, "learning_rate": 0.00014492407242647868, "loss": 1.7167, "step": 60678 }, { "epoch": 2.02, "grad_norm": 0.6520236134529114, "learning_rate": 0.00014491512515501387, "loss": 1.7023, "step": 60679 }, { "epoch": 2.02, "grad_norm": 0.6100153923034668, "learning_rate": 0.00014490617807179923, "loss": 1.7247, "step": 60680 }, { "epoch": 2.02, "grad_norm": 0.6220183968544006, "learning_rate": 0.00014489723117684552, "loss": 1.7053, "step": 60681 }, { "epoch": 2.02, "grad_norm": 0.6073545813560486, "learning_rate": 0.00014488828447016387, "loss": 1.7127, "step": 60682 }, { "epoch": 2.02, "grad_norm": 0.589449942111969, "learning_rate": 0.00014487933795176485, "loss": 1.7055, "step": 60683 }, { "epoch": 2.02, "grad_norm": 0.5980774760246277, "learning_rate": 0.0001448703916216596, "loss": 1.6985, "step": 60684 }, { "epoch": 2.02, "grad_norm": 0.5904732942581177, "learning_rate": 0.00014486144547985866, "loss": 1.7491, "step": 60685 }, { "epoch": 2.02, "grad_norm": 0.6152763366699219, "learning_rate": 0.00014485249952637323, "loss": 1.755, "step": 60686 }, { "epoch": 2.02, "grad_norm": 0.6006457209587097, "learning_rate": 0.000144843553761214, "loss": 1.7958, "step": 60687 }, { "epoch": 2.02, "grad_norm": 0.6323761940002441, "learning_rate": 0.0001448346081843917, "loss": 1.6623, "step": 60688 }, { "epoch": 2.02, "grad_norm": 0.6042920351028442, "learning_rate": 0.00014482566279591744, "loss": 1.688, "step": 60689 }, { "epoch": 2.02, "grad_norm": 0.6228876709938049, "learning_rate": 0.00014481671759580188, "loss": 1.7686, "step": 60690 }, { "epoch": 2.02, "grad_norm": 0.598809003829956, "learning_rate": 0.00014480777258405603, "loss": 1.6878, "step": 60691 }, { "epoch": 2.02, "grad_norm": 0.6049461960792542, "learning_rate": 0.0001447988277606907, "loss": 1.62, "step": 60692 }, { "epoch": 2.02, "grad_norm": 0.6049755215644836, "learning_rate": 0.0001447898831257166, "loss": 1.6877, "step": 60693 }, { "epoch": 2.02, "grad_norm": 0.6278064250946045, "learning_rate": 0.00014478093867914486, "loss": 1.7921, "step": 60694 }, { "epoch": 2.02, "grad_norm": 0.6290305852890015, "learning_rate": 0.00014477199442098607, "loss": 1.7578, "step": 60695 }, { "epoch": 2.02, "grad_norm": 0.6360822916030884, "learning_rate": 0.00014476305035125128, "loss": 1.7046, "step": 60696 }, { "epoch": 2.02, "grad_norm": 0.6059794425964355, "learning_rate": 0.00014475410646995119, "loss": 1.65, "step": 60697 }, { "epoch": 2.02, "grad_norm": 0.6111857891082764, "learning_rate": 0.00014474516277709687, "loss": 1.6864, "step": 60698 }, { "epoch": 2.02, "grad_norm": 0.6198762655258179, "learning_rate": 0.000144736219272699, "loss": 1.7941, "step": 60699 }, { "epoch": 2.02, "grad_norm": 0.5792697668075562, "learning_rate": 0.0001447272759567684, "loss": 1.6766, "step": 60700 }, { "epoch": 2.02, "grad_norm": 0.6329138875007629, "learning_rate": 0.00014471833282931614, "loss": 1.7245, "step": 60701 }, { "epoch": 2.02, "grad_norm": 0.6164165735244751, "learning_rate": 0.0001447093898903529, "loss": 1.7192, "step": 60702 }, { "epoch": 2.02, "grad_norm": 0.6209880113601685, "learning_rate": 0.00014470044713988948, "loss": 1.6843, "step": 60703 }, { "epoch": 2.02, "grad_norm": 0.5884515047073364, "learning_rate": 0.00014469150457793685, "loss": 1.69, "step": 60704 }, { "epoch": 2.02, "grad_norm": 0.6209537386894226, "learning_rate": 0.00014468256220450597, "loss": 1.6923, "step": 60705 }, { "epoch": 2.02, "grad_norm": 0.5978740453720093, "learning_rate": 0.00014467362001960754, "loss": 1.6142, "step": 60706 }, { "epoch": 2.02, "grad_norm": 0.6166635751724243, "learning_rate": 0.00014466467802325234, "loss": 1.7699, "step": 60707 }, { "epoch": 2.02, "grad_norm": 0.6122793555259705, "learning_rate": 0.00014465573621545147, "loss": 1.6888, "step": 60708 }, { "epoch": 2.02, "grad_norm": 0.6192600727081299, "learning_rate": 0.00014464679459621563, "loss": 1.7581, "step": 60709 }, { "epoch": 2.02, "grad_norm": 0.6170936822891235, "learning_rate": 0.0001446378531655556, "loss": 1.7439, "step": 60710 }, { "epoch": 2.02, "grad_norm": 0.6260588765144348, "learning_rate": 0.00014462891192348234, "loss": 1.6546, "step": 60711 }, { "epoch": 2.02, "grad_norm": 0.6199353337287903, "learning_rate": 0.00014461997087000686, "loss": 1.7945, "step": 60712 }, { "epoch": 2.02, "grad_norm": 0.6111093759536743, "learning_rate": 0.00014461103000513965, "loss": 1.6903, "step": 60713 }, { "epoch": 2.02, "grad_norm": 0.6242801547050476, "learning_rate": 0.00014460208932889175, "loss": 1.6628, "step": 60714 }, { "epoch": 2.02, "grad_norm": 0.6089634299278259, "learning_rate": 0.00014459314884127412, "loss": 1.709, "step": 60715 }, { "epoch": 2.02, "grad_norm": 0.5994100570678711, "learning_rate": 0.0001445842085422975, "loss": 1.7039, "step": 60716 }, { "epoch": 2.02, "grad_norm": 0.628002405166626, "learning_rate": 0.00014457526843197267, "loss": 1.6604, "step": 60717 }, { "epoch": 2.02, "grad_norm": 0.6227778196334839, "learning_rate": 0.00014456632851031056, "loss": 1.6945, "step": 60718 }, { "epoch": 2.02, "grad_norm": 0.6585351228713989, "learning_rate": 0.0001445573887773222, "loss": 1.7174, "step": 60719 }, { "epoch": 2.02, "grad_norm": 0.621638298034668, "learning_rate": 0.0001445484492330181, "loss": 1.7719, "step": 60720 }, { "epoch": 2.02, "grad_norm": 0.6404769420623779, "learning_rate": 0.00014453950987740923, "loss": 1.7206, "step": 60721 }, { "epoch": 2.02, "grad_norm": 0.6619137525558472, "learning_rate": 0.00014453057071050663, "loss": 1.7711, "step": 60722 }, { "epoch": 2.02, "grad_norm": 0.6508383750915527, "learning_rate": 0.00014452163173232102, "loss": 1.7669, "step": 60723 }, { "epoch": 2.02, "grad_norm": 0.6252899169921875, "learning_rate": 0.00014451269294286313, "loss": 1.7109, "step": 60724 }, { "epoch": 2.02, "grad_norm": 0.6369677782058716, "learning_rate": 0.0001445037543421439, "loss": 1.7778, "step": 60725 }, { "epoch": 2.02, "grad_norm": 0.6357350945472717, "learning_rate": 0.00014449481593017445, "loss": 1.7381, "step": 60726 }, { "epoch": 2.02, "grad_norm": 0.627324104309082, "learning_rate": 0.0001444858777069651, "loss": 1.7526, "step": 60727 }, { "epoch": 2.02, "grad_norm": 0.6237763166427612, "learning_rate": 0.00014447693967252704, "loss": 1.6861, "step": 60728 }, { "epoch": 2.02, "grad_norm": 0.625942587852478, "learning_rate": 0.00014446800182687115, "loss": 1.7238, "step": 60729 }, { "epoch": 2.02, "grad_norm": 0.6390668749809265, "learning_rate": 0.0001444590641700082, "loss": 1.7447, "step": 60730 }, { "epoch": 2.02, "grad_norm": 0.6756964325904846, "learning_rate": 0.0001444501267019489, "loss": 1.7553, "step": 60731 }, { "epoch": 2.02, "grad_norm": 0.6247581839561462, "learning_rate": 0.00014444118942270434, "loss": 1.7354, "step": 60732 }, { "epoch": 2.02, "grad_norm": 0.6129499673843384, "learning_rate": 0.00014443225233228525, "loss": 1.706, "step": 60733 }, { "epoch": 2.02, "grad_norm": 0.6144128441810608, "learning_rate": 0.0001444233154307024, "loss": 1.7327, "step": 60734 }, { "epoch": 2.02, "grad_norm": 0.6317787170410156, "learning_rate": 0.00014441437871796667, "loss": 1.7408, "step": 60735 }, { "epoch": 2.02, "grad_norm": 0.6409733295440674, "learning_rate": 0.0001444054421940891, "loss": 1.7914, "step": 60736 }, { "epoch": 2.02, "grad_norm": 0.6468479633331299, "learning_rate": 0.00014439650585908043, "loss": 1.7583, "step": 60737 }, { "epoch": 2.02, "grad_norm": 0.6485216617584229, "learning_rate": 0.00014438756971295135, "loss": 1.6871, "step": 60738 }, { "epoch": 2.02, "grad_norm": 0.6445097923278809, "learning_rate": 0.0001443786337557129, "loss": 1.736, "step": 60739 }, { "epoch": 2.02, "grad_norm": 0.6228011846542358, "learning_rate": 0.0001443696979873759, "loss": 1.7228, "step": 60740 }, { "epoch": 2.02, "grad_norm": 0.6037638187408447, "learning_rate": 0.00014436076240795104, "loss": 1.6955, "step": 60741 }, { "epoch": 2.02, "grad_norm": 0.6203622221946716, "learning_rate": 0.00014435182701744937, "loss": 1.7761, "step": 60742 }, { "epoch": 2.02, "grad_norm": 0.5998157262802124, "learning_rate": 0.00014434289181588155, "loss": 1.6611, "step": 60743 }, { "epoch": 2.02, "grad_norm": 0.6092838048934937, "learning_rate": 0.00014433395680325865, "loss": 1.6934, "step": 60744 }, { "epoch": 2.02, "grad_norm": 0.6520600318908691, "learning_rate": 0.00014432502197959127, "loss": 1.6877, "step": 60745 }, { "epoch": 2.02, "grad_norm": 0.6077320575714111, "learning_rate": 0.0001443160873448905, "loss": 1.7455, "step": 60746 }, { "epoch": 2.02, "grad_norm": 0.6247098445892334, "learning_rate": 0.00014430715289916703, "loss": 1.7173, "step": 60747 }, { "epoch": 2.02, "grad_norm": 0.617123544216156, "learning_rate": 0.00014429821864243167, "loss": 1.7161, "step": 60748 }, { "epoch": 2.02, "grad_norm": 0.6186127662658691, "learning_rate": 0.00014428928457469538, "loss": 1.6722, "step": 60749 }, { "epoch": 2.02, "grad_norm": 0.6090624928474426, "learning_rate": 0.0001442803506959689, "loss": 1.6922, "step": 60750 }, { "epoch": 2.02, "grad_norm": 0.6033574938774109, "learning_rate": 0.00014427141700626322, "loss": 1.6993, "step": 60751 }, { "epoch": 2.02, "grad_norm": 0.6162459254264832, "learning_rate": 0.00014426248350558906, "loss": 1.7037, "step": 60752 }, { "epoch": 2.02, "grad_norm": 0.5910478234291077, "learning_rate": 0.00014425355019395724, "loss": 1.6641, "step": 60753 }, { "epoch": 2.02, "grad_norm": 0.604797899723053, "learning_rate": 0.00014424461707137875, "loss": 1.6738, "step": 60754 }, { "epoch": 2.02, "grad_norm": 0.5938661694526672, "learning_rate": 0.00014423568413786424, "loss": 1.7023, "step": 60755 }, { "epoch": 2.02, "grad_norm": 0.6134892702102661, "learning_rate": 0.0001442267513934248, "loss": 1.6656, "step": 60756 }, { "epoch": 2.02, "grad_norm": 0.6002718210220337, "learning_rate": 0.00014421781883807095, "loss": 1.6232, "step": 60757 }, { "epoch": 2.02, "grad_norm": 0.6001912355422974, "learning_rate": 0.00014420888647181387, "loss": 1.6504, "step": 60758 }, { "epoch": 2.02, "grad_norm": 0.6457329988479614, "learning_rate": 0.00014419995429466425, "loss": 1.6717, "step": 60759 }, { "epoch": 2.02, "grad_norm": 0.6194918751716614, "learning_rate": 0.0001441910223066328, "loss": 1.6114, "step": 60760 }, { "epoch": 2.02, "grad_norm": 0.6140524744987488, "learning_rate": 0.00014418209050773062, "loss": 1.7407, "step": 60761 }, { "epoch": 2.02, "grad_norm": 0.6132692694664001, "learning_rate": 0.0001441731588979684, "loss": 1.7793, "step": 60762 }, { "epoch": 2.02, "grad_norm": 0.6140428781509399, "learning_rate": 0.00014416422747735692, "loss": 1.6837, "step": 60763 }, { "epoch": 2.02, "grad_norm": 0.6405277848243713, "learning_rate": 0.00014415529624590707, "loss": 1.6847, "step": 60764 }, { "epoch": 2.02, "grad_norm": 0.6135900020599365, "learning_rate": 0.00014414636520362987, "loss": 1.7246, "step": 60765 }, { "epoch": 2.02, "grad_norm": 0.6213713884353638, "learning_rate": 0.00014413743435053603, "loss": 1.6946, "step": 60766 }, { "epoch": 2.02, "grad_norm": 0.6161884069442749, "learning_rate": 0.00014412850368663625, "loss": 1.7516, "step": 60767 }, { "epoch": 2.02, "grad_norm": 0.6141983270645142, "learning_rate": 0.0001441195732119416, "loss": 1.7162, "step": 60768 }, { "epoch": 2.02, "grad_norm": 0.6331758499145508, "learning_rate": 0.00014411064292646283, "loss": 1.7005, "step": 60769 }, { "epoch": 2.02, "grad_norm": 0.6193728446960449, "learning_rate": 0.00014410171283021068, "loss": 1.6979, "step": 60770 }, { "epoch": 2.02, "grad_norm": 0.6266379952430725, "learning_rate": 0.00014409278292319607, "loss": 1.6544, "step": 60771 }, { "epoch": 2.02, "grad_norm": 0.6119965314865112, "learning_rate": 0.00014408385320542993, "loss": 1.6929, "step": 60772 }, { "epoch": 2.02, "grad_norm": 0.6026610136032104, "learning_rate": 0.00014407492367692304, "loss": 1.7419, "step": 60773 }, { "epoch": 2.02, "grad_norm": 0.6312024593353271, "learning_rate": 0.0001440659943376861, "loss": 1.7289, "step": 60774 }, { "epoch": 2.02, "grad_norm": 0.6275114417076111, "learning_rate": 0.0001440570651877302, "loss": 1.7515, "step": 60775 }, { "epoch": 2.02, "grad_norm": 0.7248068451881409, "learning_rate": 0.00014404813622706603, "loss": 1.7451, "step": 60776 }, { "epoch": 2.02, "grad_norm": 0.6164492964744568, "learning_rate": 0.00014403920745570436, "loss": 1.6924, "step": 60777 }, { "epoch": 2.02, "grad_norm": 0.617242157459259, "learning_rate": 0.00014403027887365604, "loss": 1.6914, "step": 60778 }, { "epoch": 2.02, "grad_norm": 0.5971062183380127, "learning_rate": 0.00014402135048093215, "loss": 1.7432, "step": 60779 }, { "epoch": 2.02, "grad_norm": 0.612659752368927, "learning_rate": 0.00014401242227754337, "loss": 1.6747, "step": 60780 }, { "epoch": 2.02, "grad_norm": 0.6118119955062866, "learning_rate": 0.0001440034942635004, "loss": 1.7512, "step": 60781 }, { "epoch": 2.02, "grad_norm": 0.6044161915779114, "learning_rate": 0.0001439945664388143, "loss": 1.6849, "step": 60782 }, { "epoch": 2.02, "grad_norm": 0.6075129508972168, "learning_rate": 0.00014398563880349582, "loss": 1.5644, "step": 60783 }, { "epoch": 2.02, "grad_norm": 0.616538405418396, "learning_rate": 0.00014397671135755566, "loss": 1.7546, "step": 60784 }, { "epoch": 2.02, "grad_norm": 0.5924496650695801, "learning_rate": 0.0001439677841010048, "loss": 1.6953, "step": 60785 }, { "epoch": 2.02, "grad_norm": 0.6003491878509521, "learning_rate": 0.00014395885703385417, "loss": 1.7626, "step": 60786 }, { "epoch": 2.02, "grad_norm": 0.6269001364707947, "learning_rate": 0.00014394993015611447, "loss": 1.7793, "step": 60787 }, { "epoch": 2.02, "grad_norm": 0.6122819185256958, "learning_rate": 0.00014394100346779647, "loss": 1.6669, "step": 60788 }, { "epoch": 2.02, "grad_norm": 0.6064428687095642, "learning_rate": 0.00014393207696891122, "loss": 1.7791, "step": 60789 }, { "epoch": 2.02, "grad_norm": 0.6088005900382996, "learning_rate": 0.00014392315065946945, "loss": 1.6826, "step": 60790 }, { "epoch": 2.02, "grad_norm": 0.6188067197799683, "learning_rate": 0.00014391422453948182, "loss": 1.7222, "step": 60791 }, { "epoch": 2.02, "grad_norm": 0.603646993637085, "learning_rate": 0.00014390529860895944, "loss": 1.7254, "step": 60792 }, { "epoch": 2.02, "grad_norm": 0.6298602819442749, "learning_rate": 0.00014389637286791294, "loss": 1.6699, "step": 60793 }, { "epoch": 2.02, "grad_norm": 0.6044743061065674, "learning_rate": 0.00014388744731635338, "loss": 1.6765, "step": 60794 }, { "epoch": 2.02, "grad_norm": 0.6114811301231384, "learning_rate": 0.0001438785219542913, "loss": 1.7128, "step": 60795 }, { "epoch": 2.02, "grad_norm": 0.5877650380134583, "learning_rate": 0.00014386959678173782, "loss": 1.6858, "step": 60796 }, { "epoch": 2.02, "grad_norm": 0.5898182392120361, "learning_rate": 0.00014386067179870363, "loss": 1.7763, "step": 60797 }, { "epoch": 2.02, "grad_norm": 0.622943639755249, "learning_rate": 0.00014385174700519947, "loss": 1.7382, "step": 60798 }, { "epoch": 2.02, "grad_norm": 0.6220338344573975, "learning_rate": 0.0001438428224012364, "loss": 1.6784, "step": 60799 }, { "epoch": 2.02, "grad_norm": 0.6075100898742676, "learning_rate": 0.000143833897986825, "loss": 1.7092, "step": 60800 }, { "epoch": 2.02, "grad_norm": 0.608830988407135, "learning_rate": 0.00014382497376197638, "loss": 1.6699, "step": 60801 }, { "epoch": 2.02, "grad_norm": 0.6199405789375305, "learning_rate": 0.00014381604972670114, "loss": 1.7525, "step": 60802 }, { "epoch": 2.02, "grad_norm": 0.6151498556137085, "learning_rate": 0.00014380712588101026, "loss": 1.7047, "step": 60803 }, { "epoch": 2.02, "grad_norm": 0.6152432560920715, "learning_rate": 0.00014379820222491457, "loss": 1.7515, "step": 60804 }, { "epoch": 2.02, "grad_norm": 0.6275086998939514, "learning_rate": 0.00014378927875842467, "loss": 1.7871, "step": 60805 }, { "epoch": 2.02, "grad_norm": 0.6234519481658936, "learning_rate": 0.00014378035548155173, "loss": 1.7395, "step": 60806 }, { "epoch": 2.02, "grad_norm": 0.5968338847160339, "learning_rate": 0.00014377143239430632, "loss": 1.6218, "step": 60807 }, { "epoch": 2.02, "grad_norm": 0.60019850730896, "learning_rate": 0.00014376250949669944, "loss": 1.7051, "step": 60808 }, { "epoch": 2.02, "grad_norm": 0.6319738626480103, "learning_rate": 0.00014375358678874188, "loss": 1.7576, "step": 60809 }, { "epoch": 2.02, "grad_norm": 0.6060092449188232, "learning_rate": 0.00014374466427044433, "loss": 1.7098, "step": 60810 }, { "epoch": 2.02, "grad_norm": 0.6100035309791565, "learning_rate": 0.00014373574194181783, "loss": 1.6751, "step": 60811 }, { "epoch": 2.02, "grad_norm": 0.6087980270385742, "learning_rate": 0.000143726819802873, "loss": 1.6917, "step": 60812 }, { "epoch": 2.02, "grad_norm": 0.6050625443458557, "learning_rate": 0.00014371789785362095, "loss": 1.6916, "step": 60813 }, { "epoch": 2.02, "grad_norm": 0.6165248155593872, "learning_rate": 0.00014370897609407217, "loss": 1.7278, "step": 60814 }, { "epoch": 2.02, "grad_norm": 0.5959529280662537, "learning_rate": 0.0001437000545242378, "loss": 1.7561, "step": 60815 }, { "epoch": 2.02, "grad_norm": 0.610294759273529, "learning_rate": 0.00014369113314412855, "loss": 1.6676, "step": 60816 }, { "epoch": 2.02, "grad_norm": 0.6286206841468811, "learning_rate": 0.00014368221195375512, "loss": 1.6966, "step": 60817 }, { "epoch": 2.02, "grad_norm": 0.6303411722183228, "learning_rate": 0.00014367329095312854, "loss": 1.6908, "step": 60818 }, { "epoch": 2.02, "grad_norm": 0.6203510165214539, "learning_rate": 0.00014366437014225956, "loss": 1.7117, "step": 60819 }, { "epoch": 2.02, "grad_norm": 0.6121566295623779, "learning_rate": 0.0001436554495211589, "loss": 1.715, "step": 60820 }, { "epoch": 2.02, "grad_norm": 0.6301658749580383, "learning_rate": 0.00014364652908983747, "loss": 1.6094, "step": 60821 }, { "epoch": 2.02, "grad_norm": 0.6025936603546143, "learning_rate": 0.0001436376088483062, "loss": 1.681, "step": 60822 }, { "epoch": 2.02, "grad_norm": 0.6181861162185669, "learning_rate": 0.00014362868879657588, "loss": 1.7532, "step": 60823 }, { "epoch": 2.02, "grad_norm": 0.6284273266792297, "learning_rate": 0.00014361976893465715, "loss": 1.8162, "step": 60824 }, { "epoch": 2.02, "grad_norm": 0.6367691159248352, "learning_rate": 0.00014361084926256114, "loss": 1.6922, "step": 60825 }, { "epoch": 2.02, "grad_norm": 0.6363062858581543, "learning_rate": 0.0001436019297802985, "loss": 1.6477, "step": 60826 }, { "epoch": 2.02, "grad_norm": 0.6292522549629211, "learning_rate": 0.00014359301048787995, "loss": 1.7189, "step": 60827 }, { "epoch": 2.02, "grad_norm": 0.638230562210083, "learning_rate": 0.0001435840913853164, "loss": 1.7581, "step": 60828 }, { "epoch": 2.02, "grad_norm": 1.4019125699996948, "learning_rate": 0.00014357517247261898, "loss": 1.7155, "step": 60829 }, { "epoch": 2.02, "grad_norm": 0.6178274750709534, "learning_rate": 0.00014356625374979806, "loss": 1.7512, "step": 60830 }, { "epoch": 2.02, "grad_norm": 0.6201173067092896, "learning_rate": 0.00014355733521686462, "loss": 1.6852, "step": 60831 }, { "epoch": 2.02, "grad_norm": 0.6124610304832458, "learning_rate": 0.0001435484168738296, "loss": 1.6702, "step": 60832 }, { "epoch": 2.02, "grad_norm": 0.624919056892395, "learning_rate": 0.00014353949872070382, "loss": 1.6692, "step": 60833 }, { "epoch": 2.02, "grad_norm": 0.6277567744255066, "learning_rate": 0.0001435305807574979, "loss": 1.7553, "step": 60834 }, { "epoch": 2.02, "grad_norm": 0.6421692371368408, "learning_rate": 0.0001435216629842228, "loss": 1.6851, "step": 60835 }, { "epoch": 2.02, "grad_norm": 0.627508282661438, "learning_rate": 0.00014351274540088955, "loss": 1.7212, "step": 60836 }, { "epoch": 2.02, "grad_norm": 0.6224376559257507, "learning_rate": 0.00014350382800750854, "loss": 1.7695, "step": 60837 }, { "epoch": 2.02, "grad_norm": 0.6201963424682617, "learning_rate": 0.00014349491080409084, "loss": 1.767, "step": 60838 }, { "epoch": 2.02, "grad_norm": 0.6783586740493774, "learning_rate": 0.00014348599379064738, "loss": 1.7359, "step": 60839 }, { "epoch": 2.02, "grad_norm": 0.634520411491394, "learning_rate": 0.00014347707696718887, "loss": 1.6568, "step": 60840 }, { "epoch": 2.02, "grad_norm": 0.6238270998001099, "learning_rate": 0.00014346816033372598, "loss": 1.796, "step": 60841 }, { "epoch": 2.02, "grad_norm": 0.6037254333496094, "learning_rate": 0.0001434592438902697, "loss": 1.7143, "step": 60842 }, { "epoch": 2.02, "grad_norm": 0.6092997789382935, "learning_rate": 0.00014345032763683107, "loss": 1.6488, "step": 60843 }, { "epoch": 2.02, "grad_norm": 0.6152649521827698, "learning_rate": 0.00014344141157342043, "loss": 1.6875, "step": 60844 }, { "epoch": 2.02, "grad_norm": 0.602121889591217, "learning_rate": 0.00014343249570004886, "loss": 1.605, "step": 60845 }, { "epoch": 2.02, "grad_norm": 0.6198570132255554, "learning_rate": 0.00014342358001672727, "loss": 1.6942, "step": 60846 }, { "epoch": 2.02, "grad_norm": 0.6280208230018616, "learning_rate": 0.00014341466452346638, "loss": 1.6842, "step": 60847 }, { "epoch": 2.02, "grad_norm": 0.6172690987586975, "learning_rate": 0.00014340574922027692, "loss": 1.6838, "step": 60848 }, { "epoch": 2.02, "grad_norm": 0.6428211331367493, "learning_rate": 0.00014339683410716992, "loss": 1.7179, "step": 60849 }, { "epoch": 2.02, "grad_norm": 0.6139693856239319, "learning_rate": 0.0001433879191841561, "loss": 1.7256, "step": 60850 }, { "epoch": 2.02, "grad_norm": 0.6275315284729004, "learning_rate": 0.00014337900445124616, "loss": 1.6972, "step": 60851 }, { "epoch": 2.02, "grad_norm": 0.6228654980659485, "learning_rate": 0.000143370089908451, "loss": 1.8127, "step": 60852 }, { "epoch": 2.02, "grad_norm": 0.6180043816566467, "learning_rate": 0.0001433611755557816, "loss": 1.6891, "step": 60853 }, { "epoch": 2.02, "grad_norm": 0.6086583733558655, "learning_rate": 0.00014335226139324865, "loss": 1.661, "step": 60854 }, { "epoch": 2.02, "grad_norm": 0.6150997877120972, "learning_rate": 0.00014334334742086284, "loss": 1.6815, "step": 60855 }, { "epoch": 2.02, "grad_norm": 0.620100200176239, "learning_rate": 0.0001433344336386353, "loss": 1.7123, "step": 60856 }, { "epoch": 2.02, "grad_norm": 0.6198456883430481, "learning_rate": 0.00014332552004657663, "loss": 1.6906, "step": 60857 }, { "epoch": 2.02, "grad_norm": 0.6219033002853394, "learning_rate": 0.00014331660664469758, "loss": 1.7871, "step": 60858 }, { "epoch": 2.02, "grad_norm": 0.6150419116020203, "learning_rate": 0.0001433076934330092, "loss": 1.6508, "step": 60859 }, { "epoch": 2.02, "grad_norm": 0.5986428260803223, "learning_rate": 0.0001432987804115221, "loss": 1.6951, "step": 60860 }, { "epoch": 2.02, "grad_norm": 0.6284772157669067, "learning_rate": 0.00014328986758024726, "loss": 1.7099, "step": 60861 }, { "epoch": 2.02, "grad_norm": 0.621356189250946, "learning_rate": 0.00014328095493919534, "loss": 1.6985, "step": 60862 }, { "epoch": 2.02, "grad_norm": 0.6395602822303772, "learning_rate": 0.00014327204248837735, "loss": 1.7187, "step": 60863 }, { "epoch": 2.02, "grad_norm": 0.6243987083435059, "learning_rate": 0.00014326313022780402, "loss": 1.6936, "step": 60864 }, { "epoch": 2.02, "grad_norm": 0.631527304649353, "learning_rate": 0.00014325421815748608, "loss": 1.7673, "step": 60865 }, { "epoch": 2.03, "grad_norm": 0.6148160696029663, "learning_rate": 0.00014324530627743447, "loss": 1.7448, "step": 60866 }, { "epoch": 2.03, "grad_norm": 0.618100106716156, "learning_rate": 0.0001432363945876599, "loss": 1.6866, "step": 60867 }, { "epoch": 2.03, "grad_norm": 0.6399084329605103, "learning_rate": 0.00014322748308817334, "loss": 1.703, "step": 60868 }, { "epoch": 2.03, "grad_norm": 0.637987494468689, "learning_rate": 0.0001432185717789855, "loss": 1.7005, "step": 60869 }, { "epoch": 2.03, "grad_norm": 0.6007722616195679, "learning_rate": 0.00014320966066010715, "loss": 1.7957, "step": 60870 }, { "epoch": 2.03, "grad_norm": 0.6144461035728455, "learning_rate": 0.00014320074973154923, "loss": 1.7042, "step": 60871 }, { "epoch": 2.03, "grad_norm": 0.6000798344612122, "learning_rate": 0.00014319183899332244, "loss": 1.6692, "step": 60872 }, { "epoch": 2.03, "grad_norm": 0.6159224510192871, "learning_rate": 0.00014318292844543772, "loss": 1.7374, "step": 60873 }, { "epoch": 2.03, "grad_norm": 0.6257740259170532, "learning_rate": 0.0001431740180879057, "loss": 1.7131, "step": 60874 }, { "epoch": 2.03, "grad_norm": 0.6249258518218994, "learning_rate": 0.00014316510792073745, "loss": 1.7734, "step": 60875 }, { "epoch": 2.03, "grad_norm": 0.609449028968811, "learning_rate": 0.00014315619794394365, "loss": 1.7168, "step": 60876 }, { "epoch": 2.03, "grad_norm": 0.6048433780670166, "learning_rate": 0.000143147288157535, "loss": 1.686, "step": 60877 }, { "epoch": 2.03, "grad_norm": 0.5980913043022156, "learning_rate": 0.00014313837856152253, "loss": 1.6823, "step": 60878 }, { "epoch": 2.03, "grad_norm": 0.6197661757469177, "learning_rate": 0.00014312946915591697, "loss": 1.6744, "step": 60879 }, { "epoch": 2.03, "grad_norm": 0.62659752368927, "learning_rate": 0.000143120559940729, "loss": 1.6821, "step": 60880 }, { "epoch": 2.03, "grad_norm": 0.598761796951294, "learning_rate": 0.00014311165091596958, "loss": 1.7336, "step": 60881 }, { "epoch": 2.03, "grad_norm": 0.5986248850822449, "learning_rate": 0.0001431027420816496, "loss": 1.6514, "step": 60882 }, { "epoch": 2.03, "grad_norm": 0.6398320198059082, "learning_rate": 0.00014309383343777976, "loss": 1.7114, "step": 60883 }, { "epoch": 2.03, "grad_norm": 0.6162963509559631, "learning_rate": 0.00014308492498437078, "loss": 1.7517, "step": 60884 }, { "epoch": 2.03, "grad_norm": 0.6237890124320984, "learning_rate": 0.00014307601672143368, "loss": 1.8166, "step": 60885 }, { "epoch": 2.03, "grad_norm": 0.6221484541893005, "learning_rate": 0.00014306710864897922, "loss": 1.7037, "step": 60886 }, { "epoch": 2.03, "grad_norm": 0.6226851940155029, "learning_rate": 0.00014305820076701803, "loss": 1.5862, "step": 60887 }, { "epoch": 2.03, "grad_norm": 0.6158646941184998, "learning_rate": 0.00014304929307556104, "loss": 1.7134, "step": 60888 }, { "epoch": 2.03, "grad_norm": 0.5896250605583191, "learning_rate": 0.00014304038557461917, "loss": 1.7516, "step": 60889 }, { "epoch": 2.03, "grad_norm": 0.6564757823944092, "learning_rate": 0.0001430314782642032, "loss": 1.778, "step": 60890 }, { "epoch": 2.03, "grad_norm": 0.612852931022644, "learning_rate": 0.00014302257114432378, "loss": 1.6896, "step": 60891 }, { "epoch": 2.03, "grad_norm": 0.6322700381278992, "learning_rate": 0.00014301366421499192, "loss": 1.705, "step": 60892 }, { "epoch": 2.03, "grad_norm": 0.611642062664032, "learning_rate": 0.00014300475747621835, "loss": 1.6826, "step": 60893 }, { "epoch": 2.03, "grad_norm": 0.603517472743988, "learning_rate": 0.00014299585092801374, "loss": 1.6764, "step": 60894 }, { "epoch": 2.03, "grad_norm": 0.6219421625137329, "learning_rate": 0.00014298694457038903, "loss": 1.6802, "step": 60895 }, { "epoch": 2.03, "grad_norm": 0.6149545311927795, "learning_rate": 0.0001429780384033552, "loss": 1.7376, "step": 60896 }, { "epoch": 2.03, "grad_norm": 0.6106319427490234, "learning_rate": 0.00014296913242692281, "loss": 1.7173, "step": 60897 }, { "epoch": 2.03, "grad_norm": 0.609856367111206, "learning_rate": 0.00014296022664110272, "loss": 1.7874, "step": 60898 }, { "epoch": 2.03, "grad_norm": 0.6376019716262817, "learning_rate": 0.00014295132104590584, "loss": 1.6685, "step": 60899 }, { "epoch": 2.03, "grad_norm": 0.6198615431785583, "learning_rate": 0.00014294241564134292, "loss": 1.7553, "step": 60900 }, { "epoch": 2.03, "grad_norm": 0.6015761494636536, "learning_rate": 0.00014293351042742467, "loss": 1.6482, "step": 60901 }, { "epoch": 2.03, "grad_norm": 0.6049268841743469, "learning_rate": 0.000142924605404162, "loss": 1.6548, "step": 60902 }, { "epoch": 2.03, "grad_norm": 0.613227903842926, "learning_rate": 0.00014291570057156582, "loss": 1.7608, "step": 60903 }, { "epoch": 2.03, "grad_norm": 0.6377643942832947, "learning_rate": 0.00014290679592964683, "loss": 1.7267, "step": 60904 }, { "epoch": 2.03, "grad_norm": 0.5977379083633423, "learning_rate": 0.00014289789147841573, "loss": 1.681, "step": 60905 }, { "epoch": 2.03, "grad_norm": 0.6110206246376038, "learning_rate": 0.00014288898721788356, "loss": 1.6366, "step": 60906 }, { "epoch": 2.03, "grad_norm": 0.6127700209617615, "learning_rate": 0.00014288008314806104, "loss": 1.6904, "step": 60907 }, { "epoch": 2.03, "grad_norm": 0.6370914578437805, "learning_rate": 0.0001428711792689588, "loss": 1.7838, "step": 60908 }, { "epoch": 2.03, "grad_norm": 0.6361835598945618, "learning_rate": 0.0001428622755805879, "loss": 1.6629, "step": 60909 }, { "epoch": 2.03, "grad_norm": 0.6515944004058838, "learning_rate": 0.00014285337208295898, "loss": 1.6692, "step": 60910 }, { "epoch": 2.03, "grad_norm": 0.6182467341423035, "learning_rate": 0.00014284446877608296, "loss": 1.7753, "step": 60911 }, { "epoch": 2.03, "grad_norm": 0.6179164052009583, "learning_rate": 0.00014283556565997053, "loss": 1.6813, "step": 60912 }, { "epoch": 2.03, "grad_norm": 0.6184332966804504, "learning_rate": 0.00014282666273463267, "loss": 1.7714, "step": 60913 }, { "epoch": 2.03, "grad_norm": 0.6077275276184082, "learning_rate": 0.00014281776000008007, "loss": 1.6891, "step": 60914 }, { "epoch": 2.03, "grad_norm": 0.6178060173988342, "learning_rate": 0.00014280885745632344, "loss": 1.697, "step": 60915 }, { "epoch": 2.03, "grad_norm": 0.6307972073554993, "learning_rate": 0.00014279995510337384, "loss": 1.725, "step": 60916 }, { "epoch": 2.03, "grad_norm": 0.6324244737625122, "learning_rate": 0.00014279105294124177, "loss": 1.8092, "step": 60917 }, { "epoch": 2.03, "grad_norm": 0.6061687469482422, "learning_rate": 0.00014278215096993834, "loss": 1.5854, "step": 60918 }, { "epoch": 2.03, "grad_norm": 0.6267591714859009, "learning_rate": 0.00014277324918947421, "loss": 1.6343, "step": 60919 }, { "epoch": 2.03, "grad_norm": 0.6106981635093689, "learning_rate": 0.0001427643475998601, "loss": 1.6539, "step": 60920 }, { "epoch": 2.03, "grad_norm": 0.6178103089332581, "learning_rate": 0.00014275544620110699, "loss": 1.7164, "step": 60921 }, { "epoch": 2.03, "grad_norm": 0.6173655390739441, "learning_rate": 0.00014274654499322548, "loss": 1.74, "step": 60922 }, { "epoch": 2.03, "grad_norm": 0.5885648727416992, "learning_rate": 0.0001427376439762266, "loss": 1.7138, "step": 60923 }, { "epoch": 2.03, "grad_norm": 0.6271238923072815, "learning_rate": 0.00014272874315012098, "loss": 1.7466, "step": 60924 }, { "epoch": 2.03, "grad_norm": 0.6244236826896667, "learning_rate": 0.0001427198425149196, "loss": 1.6735, "step": 60925 }, { "epoch": 2.03, "grad_norm": 0.6130804419517517, "learning_rate": 0.00014271094207063309, "loss": 1.6506, "step": 60926 }, { "epoch": 2.03, "grad_norm": 0.5901846289634705, "learning_rate": 0.00014270204181727228, "loss": 1.7149, "step": 60927 }, { "epoch": 2.03, "grad_norm": 0.9564165472984314, "learning_rate": 0.00014269314175484812, "loss": 1.7278, "step": 60928 }, { "epoch": 2.03, "grad_norm": 0.6210466027259827, "learning_rate": 0.00014268424188337115, "loss": 1.709, "step": 60929 }, { "epoch": 2.03, "grad_norm": 0.6360110640525818, "learning_rate": 0.0001426753422028525, "loss": 1.696, "step": 60930 }, { "epoch": 2.03, "grad_norm": 0.6581920385360718, "learning_rate": 0.00014266644271330263, "loss": 1.7925, "step": 60931 }, { "epoch": 2.03, "grad_norm": 0.5995151400566101, "learning_rate": 0.00014265754341473265, "loss": 1.6664, "step": 60932 }, { "epoch": 2.03, "grad_norm": 0.6527132987976074, "learning_rate": 0.00014264864430715325, "loss": 1.7685, "step": 60933 }, { "epoch": 2.03, "grad_norm": 0.6382923126220703, "learning_rate": 0.0001426397453905751, "loss": 1.7433, "step": 60934 }, { "epoch": 2.03, "grad_norm": 0.6205087900161743, "learning_rate": 0.00014263084666500921, "loss": 1.6758, "step": 60935 }, { "epoch": 2.03, "grad_norm": 0.6037775874137878, "learning_rate": 0.00014262194813046628, "loss": 1.696, "step": 60936 }, { "epoch": 2.03, "grad_norm": 0.6104315519332886, "learning_rate": 0.000142613049786957, "loss": 1.7245, "step": 60937 }, { "epoch": 2.03, "grad_norm": 0.6310052275657654, "learning_rate": 0.00014260415163449232, "loss": 1.7135, "step": 60938 }, { "epoch": 2.03, "grad_norm": 0.6344286203384399, "learning_rate": 0.0001425952536730831, "loss": 1.7551, "step": 60939 }, { "epoch": 2.03, "grad_norm": 0.6100786328315735, "learning_rate": 0.00014258635590274005, "loss": 1.7118, "step": 60940 }, { "epoch": 2.03, "grad_norm": 0.6023684144020081, "learning_rate": 0.00014257745832347386, "loss": 1.775, "step": 60941 }, { "epoch": 2.03, "grad_norm": 0.6315997242927551, "learning_rate": 0.00014256856093529554, "loss": 1.728, "step": 60942 }, { "epoch": 2.03, "grad_norm": 0.5904486775398254, "learning_rate": 0.0001425596637382158, "loss": 1.6614, "step": 60943 }, { "epoch": 2.03, "grad_norm": 0.6144890189170837, "learning_rate": 0.00014255076673224534, "loss": 1.6419, "step": 60944 }, { "epoch": 2.03, "grad_norm": 0.6092205047607422, "learning_rate": 0.000142541869917395, "loss": 1.7486, "step": 60945 }, { "epoch": 2.03, "grad_norm": 0.6096169948577881, "learning_rate": 0.00014253297329367592, "loss": 1.8021, "step": 60946 }, { "epoch": 2.03, "grad_norm": 0.5982688069343567, "learning_rate": 0.00014252407686109832, "loss": 1.7263, "step": 60947 }, { "epoch": 2.03, "grad_norm": 0.6442797183990479, "learning_rate": 0.00014251518061967334, "loss": 1.7336, "step": 60948 }, { "epoch": 2.03, "grad_norm": 0.6023520231246948, "learning_rate": 0.0001425062845694118, "loss": 1.6596, "step": 60949 }, { "epoch": 2.03, "grad_norm": 0.6268136501312256, "learning_rate": 0.00014249738871032445, "loss": 1.7672, "step": 60950 }, { "epoch": 2.03, "grad_norm": 0.625484824180603, "learning_rate": 0.00014248849304242193, "loss": 1.6793, "step": 60951 }, { "epoch": 2.03, "grad_norm": 0.6374257802963257, "learning_rate": 0.0001424795975657152, "loss": 1.7563, "step": 60952 }, { "epoch": 2.03, "grad_norm": 0.6166850924491882, "learning_rate": 0.00014247070228021522, "loss": 1.6531, "step": 60953 }, { "epoch": 2.03, "grad_norm": 0.6122030019760132, "learning_rate": 0.00014246180718593238, "loss": 1.7321, "step": 60954 }, { "epoch": 2.03, "grad_norm": 0.634773313999176, "learning_rate": 0.0001424529122828777, "loss": 1.6739, "step": 60955 }, { "epoch": 2.03, "grad_norm": 0.5981944799423218, "learning_rate": 0.0001424440175710621, "loss": 1.6689, "step": 60956 }, { "epoch": 2.03, "grad_norm": 0.6139915585517883, "learning_rate": 0.00014243512305049623, "loss": 1.6888, "step": 60957 }, { "epoch": 2.03, "grad_norm": 0.6548444628715515, "learning_rate": 0.00014242622872119077, "loss": 1.7142, "step": 60958 }, { "epoch": 2.03, "grad_norm": 0.5984450578689575, "learning_rate": 0.0001424173345831567, "loss": 1.6529, "step": 60959 }, { "epoch": 2.03, "grad_norm": 0.6497200131416321, "learning_rate": 0.00014240844063640498, "loss": 1.6436, "step": 60960 }, { "epoch": 2.03, "grad_norm": 0.6124367117881775, "learning_rate": 0.00014239954688094594, "loss": 1.6364, "step": 60961 }, { "epoch": 2.03, "grad_norm": 0.6127820611000061, "learning_rate": 0.00014239065331679066, "loss": 1.6075, "step": 60962 }, { "epoch": 2.03, "grad_norm": 0.6305793523788452, "learning_rate": 0.00014238175994395, "loss": 1.8144, "step": 60963 }, { "epoch": 2.03, "grad_norm": 0.6219968199729919, "learning_rate": 0.00014237286676243468, "loss": 1.6681, "step": 60964 }, { "epoch": 2.03, "grad_norm": 0.6225371360778809, "learning_rate": 0.0001423639737722553, "loss": 1.7314, "step": 60965 }, { "epoch": 2.03, "grad_norm": 0.6272487044334412, "learning_rate": 0.000142355080973423, "loss": 1.7318, "step": 60966 }, { "epoch": 2.03, "grad_norm": 0.6025276184082031, "learning_rate": 0.00014234618836594836, "loss": 1.7305, "step": 60967 }, { "epoch": 2.03, "grad_norm": 0.6175521016120911, "learning_rate": 0.0001423372959498422, "loss": 1.6358, "step": 60968 }, { "epoch": 2.03, "grad_norm": 0.6027630567550659, "learning_rate": 0.00014232840372511524, "loss": 1.7056, "step": 60969 }, { "epoch": 2.03, "grad_norm": 0.6271932721138, "learning_rate": 0.0001423195116917785, "loss": 1.7112, "step": 60970 }, { "epoch": 2.03, "grad_norm": 0.6589077115058899, "learning_rate": 0.00014231061984984263, "loss": 1.7032, "step": 60971 }, { "epoch": 2.03, "grad_norm": 0.6534666419029236, "learning_rate": 0.00014230172819931834, "loss": 1.7926, "step": 60972 }, { "epoch": 2.03, "grad_norm": 0.6152136921882629, "learning_rate": 0.00014229283674021664, "loss": 1.6937, "step": 60973 }, { "epoch": 2.03, "grad_norm": 0.6547189950942993, "learning_rate": 0.00014228394547254817, "loss": 1.7726, "step": 60974 }, { "epoch": 2.03, "grad_norm": 0.6362757086753845, "learning_rate": 0.00014227505439632366, "loss": 1.737, "step": 60975 }, { "epoch": 2.03, "grad_norm": 0.6031624674797058, "learning_rate": 0.00014226616351155406, "loss": 1.7275, "step": 60976 }, { "epoch": 2.03, "grad_norm": 0.6354076266288757, "learning_rate": 0.00014225727281825005, "loss": 1.7268, "step": 60977 }, { "epoch": 2.03, "grad_norm": 0.6289883255958557, "learning_rate": 0.00014224838231642254, "loss": 1.7192, "step": 60978 }, { "epoch": 2.03, "grad_norm": 0.6345782279968262, "learning_rate": 0.00014223949200608217, "loss": 1.7427, "step": 60979 }, { "epoch": 2.03, "grad_norm": 0.6317718625068665, "learning_rate": 0.0001422306018872399, "loss": 1.6659, "step": 60980 }, { "epoch": 2.03, "grad_norm": 0.6315056085586548, "learning_rate": 0.00014222171195990644, "loss": 1.6745, "step": 60981 }, { "epoch": 2.03, "grad_norm": 0.6149632930755615, "learning_rate": 0.00014221282222409248, "loss": 1.6733, "step": 60982 }, { "epoch": 2.03, "grad_norm": 0.6163378357887268, "learning_rate": 0.000142203932679809, "loss": 1.6683, "step": 60983 }, { "epoch": 2.03, "grad_norm": 0.6505472660064697, "learning_rate": 0.0001421950433270666, "loss": 1.7277, "step": 60984 }, { "epoch": 2.03, "grad_norm": 0.6370960474014282, "learning_rate": 0.00014218615416587623, "loss": 1.7416, "step": 60985 }, { "epoch": 2.03, "grad_norm": 0.6583722233772278, "learning_rate": 0.00014217726519624865, "loss": 1.8132, "step": 60986 }, { "epoch": 2.03, "grad_norm": 0.6228563785552979, "learning_rate": 0.0001421683764181945, "loss": 1.7668, "step": 60987 }, { "epoch": 2.03, "grad_norm": 0.6145220398902893, "learning_rate": 0.0001421594878317248, "loss": 1.7175, "step": 60988 }, { "epoch": 2.03, "grad_norm": 0.6219715476036072, "learning_rate": 0.0001421505994368501, "loss": 1.6358, "step": 60989 }, { "epoch": 2.03, "grad_norm": 0.5999311804771423, "learning_rate": 0.00014214171123358142, "loss": 1.7129, "step": 60990 }, { "epoch": 2.03, "grad_norm": 0.6099991798400879, "learning_rate": 0.00014213282322192935, "loss": 1.7646, "step": 60991 }, { "epoch": 2.03, "grad_norm": 0.6319248080253601, "learning_rate": 0.0001421239354019049, "loss": 1.6805, "step": 60992 }, { "epoch": 2.03, "grad_norm": 0.5958771705627441, "learning_rate": 0.0001421150477735187, "loss": 1.7116, "step": 60993 }, { "epoch": 2.03, "grad_norm": 0.5999693274497986, "learning_rate": 0.00014210616033678147, "loss": 1.7564, "step": 60994 }, { "epoch": 2.03, "grad_norm": 0.6076668500900269, "learning_rate": 0.00014209727309170422, "loss": 1.7659, "step": 60995 }, { "epoch": 2.03, "grad_norm": 0.5994969010353088, "learning_rate": 0.00014208838603829761, "loss": 1.7371, "step": 60996 }, { "epoch": 2.03, "grad_norm": 0.6068323850631714, "learning_rate": 0.0001420794991765723, "loss": 1.6502, "step": 60997 }, { "epoch": 2.03, "grad_norm": 0.6314089298248291, "learning_rate": 0.00014207061250653922, "loss": 1.7708, "step": 60998 }, { "epoch": 2.03, "grad_norm": 0.6114450097084045, "learning_rate": 0.00014206172602820925, "loss": 1.6204, "step": 60999 }, { "epoch": 2.03, "grad_norm": 0.6086073517799377, "learning_rate": 0.00014205283974159312, "loss": 1.6831, "step": 61000 }, { "epoch": 2.03, "grad_norm": 0.6351736187934875, "learning_rate": 0.00014204395364670138, "loss": 1.6908, "step": 61001 }, { "epoch": 2.03, "grad_norm": 0.5993108153343201, "learning_rate": 0.00014203506774354517, "loss": 1.7519, "step": 61002 }, { "epoch": 2.03, "grad_norm": 0.6350833773612976, "learning_rate": 0.00014202618203213512, "loss": 1.7129, "step": 61003 }, { "epoch": 2.03, "grad_norm": 0.6049622297286987, "learning_rate": 0.0001420172965124819, "loss": 1.5903, "step": 61004 }, { "epoch": 2.03, "grad_norm": 0.638819694519043, "learning_rate": 0.00014200841118459635, "loss": 1.6855, "step": 61005 }, { "epoch": 2.03, "grad_norm": 0.6014401316642761, "learning_rate": 0.00014199952604848946, "loss": 1.75, "step": 61006 }, { "epoch": 2.03, "grad_norm": 0.5959612727165222, "learning_rate": 0.00014199064110417185, "loss": 1.7204, "step": 61007 }, { "epoch": 2.03, "grad_norm": 0.6120784282684326, "learning_rate": 0.00014198175635165425, "loss": 1.7491, "step": 61008 }, { "epoch": 2.03, "grad_norm": 0.6206096410751343, "learning_rate": 0.0001419728717909476, "loss": 1.7228, "step": 61009 }, { "epoch": 2.03, "grad_norm": 0.6476597189903259, "learning_rate": 0.00014196398742206257, "loss": 1.7728, "step": 61010 }, { "epoch": 2.03, "grad_norm": 0.6158069372177124, "learning_rate": 0.00014195510324500988, "loss": 1.6894, "step": 61011 }, { "epoch": 2.03, "grad_norm": 0.6324895620346069, "learning_rate": 0.00014194621925980042, "loss": 1.7146, "step": 61012 }, { "epoch": 2.03, "grad_norm": 0.6353959441184998, "learning_rate": 0.0001419373354664451, "loss": 1.6374, "step": 61013 }, { "epoch": 2.03, "grad_norm": 0.6188151240348816, "learning_rate": 0.0001419284518649545, "loss": 1.7142, "step": 61014 }, { "epoch": 2.03, "grad_norm": 0.6353424191474915, "learning_rate": 0.0001419195684553394, "loss": 1.6947, "step": 61015 }, { "epoch": 2.03, "grad_norm": 0.6099164485931396, "learning_rate": 0.00014191068523761077, "loss": 1.7358, "step": 61016 }, { "epoch": 2.03, "grad_norm": 0.6125006079673767, "learning_rate": 0.00014190180221177926, "loss": 1.7357, "step": 61017 }, { "epoch": 2.03, "grad_norm": 0.603527843952179, "learning_rate": 0.00014189291937785555, "loss": 1.7946, "step": 61018 }, { "epoch": 2.03, "grad_norm": 0.6372543573379517, "learning_rate": 0.00014188403673585053, "loss": 1.6947, "step": 61019 }, { "epoch": 2.03, "grad_norm": 0.639190673828125, "learning_rate": 0.00014187515428577515, "loss": 1.6713, "step": 61020 }, { "epoch": 2.03, "grad_norm": 0.6359873414039612, "learning_rate": 0.00014186627202763998, "loss": 1.7122, "step": 61021 }, { "epoch": 2.03, "grad_norm": 0.6418070197105408, "learning_rate": 0.00014185738996145577, "loss": 1.7197, "step": 61022 }, { "epoch": 2.03, "grad_norm": 0.618230402469635, "learning_rate": 0.0001418485080872335, "loss": 1.7614, "step": 61023 }, { "epoch": 2.03, "grad_norm": 0.6052609086036682, "learning_rate": 0.00014183962640498384, "loss": 1.7082, "step": 61024 }, { "epoch": 2.03, "grad_norm": 0.5945993661880493, "learning_rate": 0.00014183074491471746, "loss": 1.7083, "step": 61025 }, { "epoch": 2.03, "grad_norm": 0.6155524849891663, "learning_rate": 0.00014182186361644535, "loss": 1.7764, "step": 61026 }, { "epoch": 2.03, "grad_norm": 0.6405133605003357, "learning_rate": 0.0001418129825101781, "loss": 1.609, "step": 61027 }, { "epoch": 2.03, "grad_norm": 0.6216168999671936, "learning_rate": 0.00014180410159592664, "loss": 1.7134, "step": 61028 }, { "epoch": 2.03, "grad_norm": 0.6217516660690308, "learning_rate": 0.00014179522087370163, "loss": 1.7266, "step": 61029 }, { "epoch": 2.03, "grad_norm": 0.6075387597084045, "learning_rate": 0.000141786340343514, "loss": 1.7375, "step": 61030 }, { "epoch": 2.03, "grad_norm": 0.6031447052955627, "learning_rate": 0.00014177746000537447, "loss": 1.712, "step": 61031 }, { "epoch": 2.03, "grad_norm": 0.6150335669517517, "learning_rate": 0.00014176857985929365, "loss": 1.7166, "step": 61032 }, { "epoch": 2.03, "grad_norm": 0.6142870783805847, "learning_rate": 0.00014175969990528258, "loss": 1.6752, "step": 61033 }, { "epoch": 2.03, "grad_norm": 0.6283630728721619, "learning_rate": 0.00014175082014335184, "loss": 1.6826, "step": 61034 }, { "epoch": 2.03, "grad_norm": 1.722518801689148, "learning_rate": 0.00014174194057351236, "loss": 1.7464, "step": 61035 }, { "epoch": 2.03, "grad_norm": 0.6481301188468933, "learning_rate": 0.00014173306119577483, "loss": 1.6866, "step": 61036 }, { "epoch": 2.03, "grad_norm": 0.611924946308136, "learning_rate": 0.00014172418201014998, "loss": 1.7071, "step": 61037 }, { "epoch": 2.03, "grad_norm": 0.614555299282074, "learning_rate": 0.00014171530301664874, "loss": 1.6661, "step": 61038 }, { "epoch": 2.03, "grad_norm": 0.6258671879768372, "learning_rate": 0.00014170642421528168, "loss": 1.7779, "step": 61039 }, { "epoch": 2.03, "grad_norm": 0.6163763403892517, "learning_rate": 0.00014169754560605983, "loss": 1.6499, "step": 61040 }, { "epoch": 2.03, "grad_norm": 0.6395906209945679, "learning_rate": 0.0001416886671889937, "loss": 1.682, "step": 61041 }, { "epoch": 2.03, "grad_norm": 0.6158575415611267, "learning_rate": 0.00014167978896409433, "loss": 1.7086, "step": 61042 }, { "epoch": 2.03, "grad_norm": 0.6434216499328613, "learning_rate": 0.00014167091093137237, "loss": 1.7191, "step": 61043 }, { "epoch": 2.03, "grad_norm": 0.6235684156417847, "learning_rate": 0.00014166203309083845, "loss": 1.6499, "step": 61044 }, { "epoch": 2.03, "grad_norm": 0.6124048233032227, "learning_rate": 0.00014165315544250367, "loss": 1.7218, "step": 61045 }, { "epoch": 2.03, "grad_norm": 0.6483035683631897, "learning_rate": 0.00014164427798637845, "loss": 1.6802, "step": 61046 }, { "epoch": 2.03, "grad_norm": 0.6208265423774719, "learning_rate": 0.00014163540072247388, "loss": 1.7509, "step": 61047 }, { "epoch": 2.03, "grad_norm": 0.6097694039344788, "learning_rate": 0.0001416265236508005, "loss": 1.7347, "step": 61048 }, { "epoch": 2.03, "grad_norm": 0.6233323812484741, "learning_rate": 0.00014161764677136929, "loss": 1.7073, "step": 61049 }, { "epoch": 2.03, "grad_norm": 0.6168429255485535, "learning_rate": 0.0001416087700841909, "loss": 1.6729, "step": 61050 }, { "epoch": 2.03, "grad_norm": 0.6052649617195129, "learning_rate": 0.00014159989358927604, "loss": 1.7327, "step": 61051 }, { "epoch": 2.03, "grad_norm": 0.6259661912918091, "learning_rate": 0.00014159101728663569, "loss": 1.7295, "step": 61052 }, { "epoch": 2.03, "grad_norm": 0.6044672131538391, "learning_rate": 0.0001415821411762805, "loss": 1.7505, "step": 61053 }, { "epoch": 2.03, "grad_norm": 0.6166931390762329, "learning_rate": 0.0001415732652582211, "loss": 1.7066, "step": 61054 }, { "epoch": 2.03, "grad_norm": 0.6051786541938782, "learning_rate": 0.00014156438953246846, "loss": 1.6973, "step": 61055 }, { "epoch": 2.03, "grad_norm": 0.6029382348060608, "learning_rate": 0.00014155551399903338, "loss": 1.6878, "step": 61056 }, { "epoch": 2.03, "grad_norm": 0.6045696139335632, "learning_rate": 0.00014154663865792658, "loss": 1.7052, "step": 61057 }, { "epoch": 2.03, "grad_norm": 0.611148476600647, "learning_rate": 0.00014153776350915868, "loss": 1.7002, "step": 61058 }, { "epoch": 2.03, "grad_norm": 0.6240487694740295, "learning_rate": 0.00014152888855274072, "loss": 1.6676, "step": 61059 }, { "epoch": 2.03, "grad_norm": 0.6198828220367432, "learning_rate": 0.00014152001378868334, "loss": 1.7334, "step": 61060 }, { "epoch": 2.03, "grad_norm": 0.6025763154029846, "learning_rate": 0.0001415111392169972, "loss": 1.6968, "step": 61061 }, { "epoch": 2.03, "grad_norm": 0.609804630279541, "learning_rate": 0.00014150226483769317, "loss": 1.7131, "step": 61062 }, { "epoch": 2.03, "grad_norm": 0.6299407482147217, "learning_rate": 0.0001414933906507823, "loss": 1.6791, "step": 61063 }, { "epoch": 2.03, "grad_norm": 0.6108198165893555, "learning_rate": 0.00014148451665627485, "loss": 1.8067, "step": 61064 }, { "epoch": 2.03, "grad_norm": 0.6097611784934998, "learning_rate": 0.00014147564285418182, "loss": 1.7785, "step": 61065 }, { "epoch": 2.03, "grad_norm": 0.6047524213790894, "learning_rate": 0.00014146676924451415, "loss": 1.747, "step": 61066 }, { "epoch": 2.03, "grad_norm": 0.6210091710090637, "learning_rate": 0.00014145789582728244, "loss": 1.7027, "step": 61067 }, { "epoch": 2.03, "grad_norm": 0.6250130534172058, "learning_rate": 0.00014144902260249735, "loss": 1.7403, "step": 61068 }, { "epoch": 2.03, "grad_norm": 0.6313573718070984, "learning_rate": 0.00014144014957016984, "loss": 1.7084, "step": 61069 }, { "epoch": 2.03, "grad_norm": 0.623507559299469, "learning_rate": 0.00014143127673031083, "loss": 1.7421, "step": 61070 }, { "epoch": 2.03, "grad_norm": 0.6043781042098999, "learning_rate": 0.00014142240408293065, "loss": 1.6902, "step": 61071 }, { "epoch": 2.03, "grad_norm": 0.6290656924247742, "learning_rate": 0.00014141353162804032, "loss": 1.7305, "step": 61072 }, { "epoch": 2.03, "grad_norm": 0.6144677400588989, "learning_rate": 0.0001414046593656507, "loss": 1.7767, "step": 61073 }, { "epoch": 2.03, "grad_norm": 0.6088259220123291, "learning_rate": 0.00014139578729577244, "loss": 1.71, "step": 61074 }, { "epoch": 2.03, "grad_norm": 0.6300002336502075, "learning_rate": 0.00014138691541841625, "loss": 1.6637, "step": 61075 }, { "epoch": 2.03, "grad_norm": 0.6006218791007996, "learning_rate": 0.00014137804373359295, "loss": 1.7497, "step": 61076 }, { "epoch": 2.03, "grad_norm": 0.6202319264411926, "learning_rate": 0.00014136917224131355, "loss": 1.7097, "step": 61077 }, { "epoch": 2.03, "grad_norm": 0.6196519732475281, "learning_rate": 0.0001413603009415884, "loss": 1.6747, "step": 61078 }, { "epoch": 2.03, "grad_norm": 0.6037138104438782, "learning_rate": 0.00014135142983442842, "loss": 1.7143, "step": 61079 }, { "epoch": 2.03, "grad_norm": 0.6061475872993469, "learning_rate": 0.0001413425589198446, "loss": 1.6569, "step": 61080 }, { "epoch": 2.03, "grad_norm": 0.5912663340568542, "learning_rate": 0.0001413336881978475, "loss": 1.6553, "step": 61081 }, { "epoch": 2.03, "grad_norm": 0.6286709308624268, "learning_rate": 0.00014132481766844783, "loss": 1.6505, "step": 61082 }, { "epoch": 2.03, "grad_norm": 0.6079201698303223, "learning_rate": 0.00014131594733165657, "loss": 1.6964, "step": 61083 }, { "epoch": 2.03, "grad_norm": 0.6175034642219543, "learning_rate": 0.00014130707718748437, "loss": 1.6583, "step": 61084 }, { "epoch": 2.03, "grad_norm": 0.6157909035682678, "learning_rate": 0.00014129820723594188, "loss": 1.7553, "step": 61085 }, { "epoch": 2.03, "grad_norm": 0.619205117225647, "learning_rate": 0.00014128933747703995, "loss": 1.6977, "step": 61086 }, { "epoch": 2.03, "grad_norm": 0.6296625137329102, "learning_rate": 0.00014128046791078952, "loss": 1.6953, "step": 61087 }, { "epoch": 2.03, "grad_norm": 0.6246616840362549, "learning_rate": 0.00014127159853720122, "loss": 1.6527, "step": 61088 }, { "epoch": 2.03, "grad_norm": 0.6191772818565369, "learning_rate": 0.0001412627293562857, "loss": 1.7012, "step": 61089 }, { "epoch": 2.03, "grad_norm": 0.6298670768737793, "learning_rate": 0.00014125386036805394, "loss": 1.7094, "step": 61090 }, { "epoch": 2.03, "grad_norm": 0.6286022663116455, "learning_rate": 0.00014124499157251662, "loss": 1.7637, "step": 61091 }, { "epoch": 2.03, "grad_norm": 0.6219828724861145, "learning_rate": 0.00014123612296968434, "loss": 1.6396, "step": 61092 }, { "epoch": 2.03, "grad_norm": 0.6222254037857056, "learning_rate": 0.00014122725455956815, "loss": 1.7235, "step": 61093 }, { "epoch": 2.03, "grad_norm": 0.6163574457168579, "learning_rate": 0.00014121838634217855, "loss": 1.7591, "step": 61094 }, { "epoch": 2.03, "grad_norm": 0.6282122135162354, "learning_rate": 0.0001412095183175266, "loss": 1.7649, "step": 61095 }, { "epoch": 2.03, "grad_norm": 0.6292837262153625, "learning_rate": 0.0001412006504856227, "loss": 1.6606, "step": 61096 }, { "epoch": 2.03, "grad_norm": 0.6341550350189209, "learning_rate": 0.00014119178284647797, "loss": 1.728, "step": 61097 }, { "epoch": 2.03, "grad_norm": 0.5985755324363708, "learning_rate": 0.00014118291540010303, "loss": 1.7237, "step": 61098 }, { "epoch": 2.03, "grad_norm": 0.6164702773094177, "learning_rate": 0.0001411740481465085, "loss": 1.6598, "step": 61099 }, { "epoch": 2.03, "grad_norm": 0.6215617060661316, "learning_rate": 0.00014116518108570539, "loss": 1.6296, "step": 61100 }, { "epoch": 2.03, "grad_norm": 0.6070733666419983, "learning_rate": 0.00014115631421770427, "loss": 1.7106, "step": 61101 }, { "epoch": 2.03, "grad_norm": 0.6228760480880737, "learning_rate": 0.00014114744754251604, "loss": 1.727, "step": 61102 }, { "epoch": 2.03, "grad_norm": 0.6159365773200989, "learning_rate": 0.0001411385810601514, "loss": 1.6489, "step": 61103 }, { "epoch": 2.03, "grad_norm": 0.6261276602745056, "learning_rate": 0.00014112971477062104, "loss": 1.7039, "step": 61104 }, { "epoch": 2.03, "grad_norm": 0.6084238886833191, "learning_rate": 0.0001411208486739359, "loss": 1.7748, "step": 61105 }, { "epoch": 2.03, "grad_norm": 0.5786847472190857, "learning_rate": 0.00014111198277010653, "loss": 1.6966, "step": 61106 }, { "epoch": 2.03, "grad_norm": 0.5956570506095886, "learning_rate": 0.0001411031170591439, "loss": 1.6886, "step": 61107 }, { "epoch": 2.03, "grad_norm": 0.6176058650016785, "learning_rate": 0.00014109425154105857, "loss": 1.7376, "step": 61108 }, { "epoch": 2.03, "grad_norm": 0.623116672039032, "learning_rate": 0.0001410853862158615, "loss": 1.6717, "step": 61109 }, { "epoch": 2.03, "grad_norm": 0.611707329750061, "learning_rate": 0.00014107652108356339, "loss": 1.6952, "step": 61110 }, { "epoch": 2.03, "grad_norm": 0.6238450407981873, "learning_rate": 0.0001410676561441748, "loss": 1.6496, "step": 61111 }, { "epoch": 2.03, "grad_norm": 0.629623532295227, "learning_rate": 0.00014105879139770684, "loss": 1.7392, "step": 61112 }, { "epoch": 2.03, "grad_norm": 0.6204876899719238, "learning_rate": 0.00014104992684417004, "loss": 1.7306, "step": 61113 }, { "epoch": 2.03, "grad_norm": 0.6132418513298035, "learning_rate": 0.00014104106248357512, "loss": 1.6983, "step": 61114 }, { "epoch": 2.03, "grad_norm": 0.6091408133506775, "learning_rate": 0.00014103219831593293, "loss": 1.6534, "step": 61115 }, { "epoch": 2.03, "grad_norm": 0.6054028868675232, "learning_rate": 0.0001410233343412543, "loss": 1.6743, "step": 61116 }, { "epoch": 2.03, "grad_norm": 0.6107643246650696, "learning_rate": 0.00014101447055954997, "loss": 1.7016, "step": 61117 }, { "epoch": 2.03, "grad_norm": 0.6148720979690552, "learning_rate": 0.0001410056069708305, "loss": 1.7233, "step": 61118 }, { "epoch": 2.03, "grad_norm": 0.623927116394043, "learning_rate": 0.00014099674357510695, "loss": 1.6556, "step": 61119 }, { "epoch": 2.03, "grad_norm": 0.6015140414237976, "learning_rate": 0.0001409878803723899, "loss": 1.7324, "step": 61120 }, { "epoch": 2.03, "grad_norm": 0.6215668320655823, "learning_rate": 0.00014097901736269, "loss": 1.7124, "step": 61121 }, { "epoch": 2.03, "grad_norm": 0.6111153960227966, "learning_rate": 0.00014097015454601815, "loss": 1.7348, "step": 61122 }, { "epoch": 2.03, "grad_norm": 0.6221988797187805, "learning_rate": 0.0001409612919223852, "loss": 1.6897, "step": 61123 }, { "epoch": 2.03, "grad_norm": 0.625353217124939, "learning_rate": 0.00014095242949180187, "loss": 1.7781, "step": 61124 }, { "epoch": 2.03, "grad_norm": 0.6389385461807251, "learning_rate": 0.0001409435672542787, "loss": 1.7448, "step": 61125 }, { "epoch": 2.03, "grad_norm": 0.6128767728805542, "learning_rate": 0.0001409347052098267, "loss": 1.7086, "step": 61126 }, { "epoch": 2.03, "grad_norm": 0.636130154132843, "learning_rate": 0.00014092584335845655, "loss": 1.6604, "step": 61127 }, { "epoch": 2.03, "grad_norm": 0.6140454411506653, "learning_rate": 0.0001409169817001789, "loss": 1.7771, "step": 61128 }, { "epoch": 2.03, "grad_norm": 0.6122501492500305, "learning_rate": 0.00014090812023500457, "loss": 1.6838, "step": 61129 }, { "epoch": 2.03, "grad_norm": 0.6058634519577026, "learning_rate": 0.00014089925896294448, "loss": 1.7121, "step": 61130 }, { "epoch": 2.03, "grad_norm": 0.6461003422737122, "learning_rate": 0.0001408903978840092, "loss": 1.683, "step": 61131 }, { "epoch": 2.03, "grad_norm": 0.6131126284599304, "learning_rate": 0.00014088153699820944, "loss": 1.7213, "step": 61132 }, { "epoch": 2.03, "grad_norm": 0.6289128065109253, "learning_rate": 0.00014087267630555616, "loss": 1.7565, "step": 61133 }, { "epoch": 2.03, "grad_norm": 0.6615027189254761, "learning_rate": 0.00014086381580606002, "loss": 1.7349, "step": 61134 }, { "epoch": 2.03, "grad_norm": 0.6193097829818726, "learning_rate": 0.00014085495549973163, "loss": 1.6795, "step": 61135 }, { "epoch": 2.03, "grad_norm": 0.611447811126709, "learning_rate": 0.0001408460953865819, "loss": 1.7796, "step": 61136 }, { "epoch": 2.03, "grad_norm": 0.6302499771118164, "learning_rate": 0.00014083723546662164, "loss": 1.7008, "step": 61137 }, { "epoch": 2.03, "grad_norm": 0.6178863048553467, "learning_rate": 0.00014082837573986155, "loss": 1.7208, "step": 61138 }, { "epoch": 2.03, "grad_norm": 0.6225202679634094, "learning_rate": 0.00014081951620631221, "loss": 1.7137, "step": 61139 }, { "epoch": 2.03, "grad_norm": 0.6205957531929016, "learning_rate": 0.00014081065686598468, "loss": 1.7176, "step": 61140 }, { "epoch": 2.03, "grad_norm": 0.6229216456413269, "learning_rate": 0.00014080179771888956, "loss": 1.6395, "step": 61141 }, { "epoch": 2.03, "grad_norm": 0.6056917309761047, "learning_rate": 0.00014079293876503746, "loss": 1.7054, "step": 61142 }, { "epoch": 2.03, "grad_norm": 0.6248993873596191, "learning_rate": 0.00014078408000443938, "loss": 1.6969, "step": 61143 }, { "epoch": 2.03, "grad_norm": 0.6138153672218323, "learning_rate": 0.00014077522143710587, "loss": 1.7768, "step": 61144 }, { "epoch": 2.03, "grad_norm": 0.616322934627533, "learning_rate": 0.0001407663630630479, "loss": 1.7427, "step": 61145 }, { "epoch": 2.03, "grad_norm": 0.6504879593849182, "learning_rate": 0.00014075750488227597, "loss": 1.7435, "step": 61146 }, { "epoch": 2.03, "grad_norm": 0.6359161138534546, "learning_rate": 0.00014074864689480107, "loss": 1.7197, "step": 61147 }, { "epoch": 2.03, "grad_norm": 0.6032006144523621, "learning_rate": 0.0001407397891006339, "loss": 1.7202, "step": 61148 }, { "epoch": 2.03, "grad_norm": 0.6163203716278076, "learning_rate": 0.000140730931499785, "loss": 1.7641, "step": 61149 }, { "epoch": 2.03, "grad_norm": 0.6471628546714783, "learning_rate": 0.00014072207409226542, "loss": 1.7358, "step": 61150 }, { "epoch": 2.03, "grad_norm": 0.6366732716560364, "learning_rate": 0.00014071321687808566, "loss": 1.6976, "step": 61151 }, { "epoch": 2.03, "grad_norm": 0.625891387462616, "learning_rate": 0.00014070435985725666, "loss": 1.7136, "step": 61152 }, { "epoch": 2.03, "grad_norm": 0.6101082563400269, "learning_rate": 0.00014069550302978914, "loss": 1.6697, "step": 61153 }, { "epoch": 2.03, "grad_norm": 0.6314375400543213, "learning_rate": 0.0001406866463956937, "loss": 1.8319, "step": 61154 }, { "epoch": 2.03, "grad_norm": 0.6168882846832275, "learning_rate": 0.00014067778995498128, "loss": 1.6694, "step": 61155 }, { "epoch": 2.03, "grad_norm": 0.6203089356422424, "learning_rate": 0.00014066893370766245, "loss": 1.7578, "step": 61156 }, { "epoch": 2.03, "grad_norm": 0.631712794303894, "learning_rate": 0.00014066007765374815, "loss": 1.7602, "step": 61157 }, { "epoch": 2.03, "grad_norm": 0.620177686214447, "learning_rate": 0.00014065122179324896, "loss": 1.692, "step": 61158 }, { "epoch": 2.03, "grad_norm": 0.5993893146514893, "learning_rate": 0.00014064236612617578, "loss": 1.6455, "step": 61159 }, { "epoch": 2.03, "grad_norm": 0.6160155534744263, "learning_rate": 0.00014063351065253933, "loss": 1.6443, "step": 61160 }, { "epoch": 2.03, "grad_norm": 0.6202662587165833, "learning_rate": 0.00014062465537235017, "loss": 1.7256, "step": 61161 }, { "epoch": 2.03, "grad_norm": 0.6170555949211121, "learning_rate": 0.00014061580028561932, "loss": 1.7243, "step": 61162 }, { "epoch": 2.03, "grad_norm": 0.6368217468261719, "learning_rate": 0.00014060694539235743, "loss": 1.7143, "step": 61163 }, { "epoch": 2.03, "grad_norm": 0.645521879196167, "learning_rate": 0.00014059809069257506, "loss": 1.7685, "step": 61164 }, { "epoch": 2.03, "grad_norm": 0.6497113108634949, "learning_rate": 0.00014058923618628314, "loss": 1.7322, "step": 61165 }, { "epoch": 2.04, "grad_norm": 0.6278460025787354, "learning_rate": 0.00014058038187349253, "loss": 1.7263, "step": 61166 }, { "epoch": 2.04, "grad_norm": 0.6364409923553467, "learning_rate": 0.00014057152775421382, "loss": 1.7094, "step": 61167 }, { "epoch": 2.04, "grad_norm": 0.6462271809577942, "learning_rate": 0.0001405626738284577, "loss": 1.6668, "step": 61168 }, { "epoch": 2.04, "grad_norm": 0.598829448223114, "learning_rate": 0.0001405538200962351, "loss": 1.6602, "step": 61169 }, { "epoch": 2.04, "grad_norm": 0.6404990553855896, "learning_rate": 0.0001405449665575567, "loss": 1.7153, "step": 61170 }, { "epoch": 2.04, "grad_norm": 0.634161114692688, "learning_rate": 0.00014053611321243308, "loss": 1.8052, "step": 61171 }, { "epoch": 2.04, "grad_norm": 0.6275418400764465, "learning_rate": 0.0001405272600608751, "loss": 1.7811, "step": 61172 }, { "epoch": 2.04, "grad_norm": 0.6167843341827393, "learning_rate": 0.00014051840710289364, "loss": 1.7038, "step": 61173 }, { "epoch": 2.04, "grad_norm": 0.6320014595985413, "learning_rate": 0.00014050955433849937, "loss": 1.6721, "step": 61174 }, { "epoch": 2.04, "grad_norm": 0.6072850227355957, "learning_rate": 0.0001405007017677029, "loss": 1.7345, "step": 61175 }, { "epoch": 2.04, "grad_norm": 0.6053739786148071, "learning_rate": 0.00014049184939051514, "loss": 1.7651, "step": 61176 }, { "epoch": 2.04, "grad_norm": 0.6213034391403198, "learning_rate": 0.0001404829972069468, "loss": 1.6404, "step": 61177 }, { "epoch": 2.04, "grad_norm": 0.6080091595649719, "learning_rate": 0.00014047414521700847, "loss": 1.7247, "step": 61178 }, { "epoch": 2.04, "grad_norm": 0.5962039828300476, "learning_rate": 0.00014046529342071103, "loss": 1.6755, "step": 61179 }, { "epoch": 2.04, "grad_norm": 0.6297922730445862, "learning_rate": 0.00014045644181806543, "loss": 1.7324, "step": 61180 }, { "epoch": 2.04, "grad_norm": 0.6383582353591919, "learning_rate": 0.000140447590409082, "loss": 1.6299, "step": 61181 }, { "epoch": 2.04, "grad_norm": 0.6323023438453674, "learning_rate": 0.00014043873919377168, "loss": 1.6689, "step": 61182 }, { "epoch": 2.04, "grad_norm": 0.6155735850334167, "learning_rate": 0.00014042988817214533, "loss": 1.6865, "step": 61183 }, { "epoch": 2.04, "grad_norm": 0.6083195209503174, "learning_rate": 0.0001404210373442136, "loss": 1.7328, "step": 61184 }, { "epoch": 2.04, "grad_norm": 0.633310079574585, "learning_rate": 0.0001404121867099871, "loss": 1.7592, "step": 61185 }, { "epoch": 2.04, "grad_norm": 0.6074907779693604, "learning_rate": 0.00014040333626947666, "loss": 1.6778, "step": 61186 }, { "epoch": 2.04, "grad_norm": 0.6362143754959106, "learning_rate": 0.0001403944860226933, "loss": 1.7332, "step": 61187 }, { "epoch": 2.04, "grad_norm": 0.6119740605354309, "learning_rate": 0.00014038563596964728, "loss": 1.6867, "step": 61188 }, { "epoch": 2.04, "grad_norm": 0.6132382750511169, "learning_rate": 0.0001403767861103496, "loss": 1.756, "step": 61189 }, { "epoch": 2.04, "grad_norm": 0.619629979133606, "learning_rate": 0.0001403679364448111, "loss": 1.6943, "step": 61190 }, { "epoch": 2.04, "grad_norm": 0.6054676175117493, "learning_rate": 0.0001403590869730424, "loss": 1.6673, "step": 61191 }, { "epoch": 2.04, "grad_norm": 0.6264855861663818, "learning_rate": 0.00014035023769505412, "loss": 1.7369, "step": 61192 }, { "epoch": 2.04, "grad_norm": 0.6019007563591003, "learning_rate": 0.00014034138861085716, "loss": 1.6936, "step": 61193 }, { "epoch": 2.04, "grad_norm": 0.636080801486969, "learning_rate": 0.00014033253972046243, "loss": 1.714, "step": 61194 }, { "epoch": 2.04, "grad_norm": 0.617986261844635, "learning_rate": 0.0001403236910238802, "loss": 1.7487, "step": 61195 }, { "epoch": 2.04, "grad_norm": 0.6317929625511169, "learning_rate": 0.00014031484252112155, "loss": 1.7478, "step": 61196 }, { "epoch": 2.04, "grad_norm": 0.6187304854393005, "learning_rate": 0.00014030599421219726, "loss": 1.6651, "step": 61197 }, { "epoch": 2.04, "grad_norm": 0.6321940422058105, "learning_rate": 0.00014029714609711794, "loss": 1.7595, "step": 61198 }, { "epoch": 2.04, "grad_norm": 0.6039813160896301, "learning_rate": 0.00014028829817589426, "loss": 1.671, "step": 61199 }, { "epoch": 2.04, "grad_norm": 0.6210909485816956, "learning_rate": 0.00014027945044853713, "loss": 1.7771, "step": 61200 }, { "epoch": 2.04, "grad_norm": 0.6253042221069336, "learning_rate": 0.00014027060291505725, "loss": 1.6993, "step": 61201 }, { "epoch": 2.04, "grad_norm": 0.6153061389923096, "learning_rate": 0.0001402617555754652, "loss": 1.6792, "step": 61202 }, { "epoch": 2.04, "grad_norm": 0.6104673743247986, "learning_rate": 0.0001402529084297718, "loss": 1.6698, "step": 61203 }, { "epoch": 2.04, "grad_norm": 0.6319364309310913, "learning_rate": 0.00014024406147798802, "loss": 1.7476, "step": 61204 }, { "epoch": 2.04, "grad_norm": 0.6227009892463684, "learning_rate": 0.00014023521472012439, "loss": 1.7479, "step": 61205 }, { "epoch": 2.04, "grad_norm": 0.6263406872749329, "learning_rate": 0.0001402263681561915, "loss": 1.667, "step": 61206 }, { "epoch": 2.04, "grad_norm": 0.6096834540367126, "learning_rate": 0.00014021752178620042, "loss": 1.5886, "step": 61207 }, { "epoch": 2.04, "grad_norm": 0.6334256529808044, "learning_rate": 0.0001402086756101617, "loss": 1.7364, "step": 61208 }, { "epoch": 2.04, "grad_norm": 0.6314287781715393, "learning_rate": 0.00014019982962808601, "loss": 1.6422, "step": 61209 }, { "epoch": 2.04, "grad_norm": 0.6040219664573669, "learning_rate": 0.0001401909838399843, "loss": 1.711, "step": 61210 }, { "epoch": 2.04, "grad_norm": 0.6257721781730652, "learning_rate": 0.00014018213824586707, "loss": 1.694, "step": 61211 }, { "epoch": 2.04, "grad_norm": 0.630980372428894, "learning_rate": 0.00014017329284574527, "loss": 1.6503, "step": 61212 }, { "epoch": 2.04, "grad_norm": 0.6188051700592041, "learning_rate": 0.00014016444763962946, "loss": 1.7014, "step": 61213 }, { "epoch": 2.04, "grad_norm": 0.635687530040741, "learning_rate": 0.00014015560262753058, "loss": 1.7199, "step": 61214 }, { "epoch": 2.04, "grad_norm": 0.625234067440033, "learning_rate": 0.0001401467578094592, "loss": 1.6893, "step": 61215 }, { "epoch": 2.04, "grad_norm": 0.641323983669281, "learning_rate": 0.00014013791318542604, "loss": 1.7478, "step": 61216 }, { "epoch": 2.04, "grad_norm": 0.6236904859542847, "learning_rate": 0.00014012906875544197, "loss": 1.7268, "step": 61217 }, { "epoch": 2.04, "grad_norm": 0.6197914481163025, "learning_rate": 0.00014012022451951757, "loss": 1.718, "step": 61218 }, { "epoch": 2.04, "grad_norm": 0.6104041934013367, "learning_rate": 0.00014011138047766375, "loss": 1.7016, "step": 61219 }, { "epoch": 2.04, "grad_norm": 0.6154066920280457, "learning_rate": 0.00014010253662989118, "loss": 1.7108, "step": 61220 }, { "epoch": 2.04, "grad_norm": 0.6316078305244446, "learning_rate": 0.00014009369297621045, "loss": 1.7455, "step": 61221 }, { "epoch": 2.04, "grad_norm": 0.6079638600349426, "learning_rate": 0.0001400848495166325, "loss": 1.7053, "step": 61222 }, { "epoch": 2.04, "grad_norm": 0.6170667409896851, "learning_rate": 0.0001400760062511679, "loss": 1.7592, "step": 61223 }, { "epoch": 2.04, "grad_norm": 0.633985698223114, "learning_rate": 0.00014006716317982757, "loss": 1.7864, "step": 61224 }, { "epoch": 2.04, "grad_norm": 0.6064211130142212, "learning_rate": 0.00014005832030262202, "loss": 1.6562, "step": 61225 }, { "epoch": 2.04, "grad_norm": 0.595893383026123, "learning_rate": 0.00014004947761956225, "loss": 1.7598, "step": 61226 }, { "epoch": 2.04, "grad_norm": 0.6301383972167969, "learning_rate": 0.00014004063513065878, "loss": 1.6944, "step": 61227 }, { "epoch": 2.04, "grad_norm": 0.6271547079086304, "learning_rate": 0.00014003179283592234, "loss": 1.7388, "step": 61228 }, { "epoch": 2.04, "grad_norm": 0.6127680540084839, "learning_rate": 0.00014002295073536387, "loss": 1.7909, "step": 61229 }, { "epoch": 2.04, "grad_norm": 0.6024599671363831, "learning_rate": 0.00014001410882899393, "loss": 1.6759, "step": 61230 }, { "epoch": 2.04, "grad_norm": 0.6328742504119873, "learning_rate": 0.0001400052671168232, "loss": 1.6697, "step": 61231 }, { "epoch": 2.04, "grad_norm": 0.61390620470047, "learning_rate": 0.00013999642559886245, "loss": 1.7398, "step": 61232 }, { "epoch": 2.04, "grad_norm": 0.6241106390953064, "learning_rate": 0.00013998758427512261, "loss": 1.6718, "step": 61233 }, { "epoch": 2.04, "grad_norm": 0.6204619407653809, "learning_rate": 0.0001399787431456143, "loss": 1.7804, "step": 61234 }, { "epoch": 2.04, "grad_norm": 0.6125386953353882, "learning_rate": 0.00013996990221034806, "loss": 1.7184, "step": 61235 }, { "epoch": 2.04, "grad_norm": 0.6256560683250427, "learning_rate": 0.00013996106146933488, "loss": 1.6637, "step": 61236 }, { "epoch": 2.04, "grad_norm": 0.6049728989601135, "learning_rate": 0.00013995222092258545, "loss": 1.7133, "step": 61237 }, { "epoch": 2.04, "grad_norm": 0.6098934412002563, "learning_rate": 0.00013994338057011027, "loss": 1.6747, "step": 61238 }, { "epoch": 2.04, "grad_norm": 0.6137485504150391, "learning_rate": 0.00013993454041192027, "loss": 1.6972, "step": 61239 }, { "epoch": 2.04, "grad_norm": 0.6205629706382751, "learning_rate": 0.00013992570044802627, "loss": 1.7565, "step": 61240 }, { "epoch": 2.04, "grad_norm": 0.6320013999938965, "learning_rate": 0.00013991686067843889, "loss": 1.7248, "step": 61241 }, { "epoch": 2.04, "grad_norm": 0.5983147621154785, "learning_rate": 0.0001399080211031687, "loss": 1.7055, "step": 61242 }, { "epoch": 2.04, "grad_norm": 0.6502641439437866, "learning_rate": 0.00013989918172222673, "loss": 1.7369, "step": 61243 }, { "epoch": 2.04, "grad_norm": 0.6506111025810242, "learning_rate": 0.00013989034253562358, "loss": 1.7011, "step": 61244 }, { "epoch": 2.04, "grad_norm": 0.6280636191368103, "learning_rate": 0.0001398815035433698, "loss": 1.7877, "step": 61245 }, { "epoch": 2.04, "grad_norm": 0.6008032560348511, "learning_rate": 0.0001398726647454763, "loss": 1.7331, "step": 61246 }, { "epoch": 2.04, "grad_norm": 0.61738121509552, "learning_rate": 0.00013986382614195395, "loss": 1.7857, "step": 61247 }, { "epoch": 2.04, "grad_norm": 0.6162024736404419, "learning_rate": 0.00013985498773281328, "loss": 1.7379, "step": 61248 }, { "epoch": 2.04, "grad_norm": 0.650431215763092, "learning_rate": 0.00013984614951806495, "loss": 1.7291, "step": 61249 }, { "epoch": 2.04, "grad_norm": 0.6098279356956482, "learning_rate": 0.0001398373114977199, "loss": 1.7567, "step": 61250 }, { "epoch": 2.04, "grad_norm": 0.607636034488678, "learning_rate": 0.0001398284736717888, "loss": 1.7398, "step": 61251 }, { "epoch": 2.04, "grad_norm": 0.609703779220581, "learning_rate": 0.00013981963604028217, "loss": 1.7063, "step": 61252 }, { "epoch": 2.04, "grad_norm": 0.6346032023429871, "learning_rate": 0.0001398107986032109, "loss": 1.7282, "step": 61253 }, { "epoch": 2.04, "grad_norm": 0.631340503692627, "learning_rate": 0.00013980196136058586, "loss": 1.7542, "step": 61254 }, { "epoch": 2.04, "grad_norm": 0.6039698719978333, "learning_rate": 0.00013979312431241763, "loss": 1.7244, "step": 61255 }, { "epoch": 2.04, "grad_norm": 0.6245028972625732, "learning_rate": 0.0001397842874587168, "loss": 1.7457, "step": 61256 }, { "epoch": 2.04, "grad_norm": 0.6169653534889221, "learning_rate": 0.00013977545079949438, "loss": 1.7051, "step": 61257 }, { "epoch": 2.04, "grad_norm": 0.6342935562133789, "learning_rate": 0.00013976661433476095, "loss": 1.6636, "step": 61258 }, { "epoch": 2.04, "grad_norm": 0.6222278475761414, "learning_rate": 0.00013975777806452712, "loss": 1.6754, "step": 61259 }, { "epoch": 2.04, "grad_norm": 0.6098350882530212, "learning_rate": 0.00013974894198880386, "loss": 1.6574, "step": 61260 }, { "epoch": 2.04, "grad_norm": 0.6314998865127563, "learning_rate": 0.00013974010610760167, "loss": 1.7256, "step": 61261 }, { "epoch": 2.04, "grad_norm": 0.6599200367927551, "learning_rate": 0.00013973127042093148, "loss": 1.6917, "step": 61262 }, { "epoch": 2.04, "grad_norm": 0.6346707344055176, "learning_rate": 0.00013972243492880378, "loss": 1.7197, "step": 61263 }, { "epoch": 2.04, "grad_norm": 0.6317821145057678, "learning_rate": 0.00013971359963122954, "loss": 1.7385, "step": 61264 }, { "epoch": 2.04, "grad_norm": 0.6161850690841675, "learning_rate": 0.00013970476452821942, "loss": 1.7081, "step": 61265 }, { "epoch": 2.04, "grad_norm": 0.6100655198097229, "learning_rate": 0.00013969592961978394, "loss": 1.7308, "step": 61266 }, { "epoch": 2.04, "grad_norm": 0.5933393836021423, "learning_rate": 0.00013968709490593412, "loss": 1.6642, "step": 61267 }, { "epoch": 2.04, "grad_norm": 0.6102612018585205, "learning_rate": 0.00013967826038668044, "loss": 1.7608, "step": 61268 }, { "epoch": 2.04, "grad_norm": 0.5919176936149597, "learning_rate": 0.0001396694260620338, "loss": 1.6784, "step": 61269 }, { "epoch": 2.04, "grad_norm": 0.6069028377532959, "learning_rate": 0.00013966059193200488, "loss": 1.712, "step": 61270 }, { "epoch": 2.04, "grad_norm": 0.6230412125587463, "learning_rate": 0.00013965175799660425, "loss": 1.6998, "step": 61271 }, { "epoch": 2.04, "grad_norm": 0.6308858394622803, "learning_rate": 0.0001396429242558429, "loss": 1.7302, "step": 61272 }, { "epoch": 2.04, "grad_norm": 0.6229353547096252, "learning_rate": 0.00013963409070973128, "loss": 1.7102, "step": 61273 }, { "epoch": 2.04, "grad_norm": 0.6181745529174805, "learning_rate": 0.00013962525735828035, "loss": 1.7549, "step": 61274 }, { "epoch": 2.04, "grad_norm": 0.63592928647995, "learning_rate": 0.00013961642420150063, "loss": 1.7792, "step": 61275 }, { "epoch": 2.04, "grad_norm": 0.6144134402275085, "learning_rate": 0.00013960759123940304, "loss": 1.7261, "step": 61276 }, { "epoch": 2.04, "grad_norm": 0.6080965995788574, "learning_rate": 0.00013959875847199823, "loss": 1.6952, "step": 61277 }, { "epoch": 2.04, "grad_norm": 0.6110870838165283, "learning_rate": 0.00013958992589929676, "loss": 1.7335, "step": 61278 }, { "epoch": 2.04, "grad_norm": 0.6150442361831665, "learning_rate": 0.00013958109352130957, "loss": 1.7126, "step": 61279 }, { "epoch": 2.04, "grad_norm": 0.6096490025520325, "learning_rate": 0.00013957226133804736, "loss": 1.7267, "step": 61280 }, { "epoch": 2.04, "grad_norm": 0.6099240183830261, "learning_rate": 0.00013956342934952064, "loss": 1.6979, "step": 61281 }, { "epoch": 2.04, "grad_norm": 0.628925621509552, "learning_rate": 0.00013955459755574025, "loss": 1.7064, "step": 61282 }, { "epoch": 2.04, "grad_norm": 0.6334477066993713, "learning_rate": 0.00013954576595671709, "loss": 1.7237, "step": 61283 }, { "epoch": 2.04, "grad_norm": 0.6204904317855835, "learning_rate": 0.0001395369345524617, "loss": 1.6927, "step": 61284 }, { "epoch": 2.04, "grad_norm": 0.6231476664543152, "learning_rate": 0.00013952810334298473, "loss": 1.6568, "step": 61285 }, { "epoch": 2.04, "grad_norm": 0.5951319932937622, "learning_rate": 0.00013951927232829712, "loss": 1.615, "step": 61286 }, { "epoch": 2.04, "grad_norm": 0.6300860643386841, "learning_rate": 0.00013951044150840945, "loss": 1.6971, "step": 61287 }, { "epoch": 2.04, "grad_norm": 0.6234089136123657, "learning_rate": 0.00013950161088333233, "loss": 1.7449, "step": 61288 }, { "epoch": 2.04, "grad_norm": 0.6168287396430969, "learning_rate": 0.00013949278045307666, "loss": 1.6747, "step": 61289 }, { "epoch": 2.04, "grad_norm": 0.6188153624534607, "learning_rate": 0.00013948395021765316, "loss": 1.6693, "step": 61290 }, { "epoch": 2.04, "grad_norm": 0.5950708985328674, "learning_rate": 0.00013947512017707255, "loss": 1.7011, "step": 61291 }, { "epoch": 2.04, "grad_norm": 0.6159440279006958, "learning_rate": 0.00013946629033134532, "loss": 1.7361, "step": 61292 }, { "epoch": 2.04, "grad_norm": 0.6132069826126099, "learning_rate": 0.00013945746068048253, "loss": 1.6197, "step": 61293 }, { "epoch": 2.04, "grad_norm": 0.6344912052154541, "learning_rate": 0.00013944863122449468, "loss": 1.7386, "step": 61294 }, { "epoch": 2.04, "grad_norm": 0.6159380078315735, "learning_rate": 0.00013943980196339244, "loss": 1.675, "step": 61295 }, { "epoch": 2.04, "grad_norm": 0.6229506134986877, "learning_rate": 0.00013943097289718663, "loss": 1.6994, "step": 61296 }, { "epoch": 2.04, "grad_norm": 0.6073604226112366, "learning_rate": 0.0001394221440258882, "loss": 1.6992, "step": 61297 }, { "epoch": 2.04, "grad_norm": 0.6320779323577881, "learning_rate": 0.00013941331534950737, "loss": 1.6693, "step": 61298 }, { "epoch": 2.04, "grad_norm": 0.6399348378181458, "learning_rate": 0.00013940448686805513, "loss": 1.8473, "step": 61299 }, { "epoch": 2.04, "grad_norm": 0.5861917734146118, "learning_rate": 0.0001393956585815423, "loss": 1.7129, "step": 61300 }, { "epoch": 2.04, "grad_norm": 0.6191909909248352, "learning_rate": 0.00013938683048997946, "loss": 1.7185, "step": 61301 }, { "epoch": 2.04, "grad_norm": 0.60291987657547, "learning_rate": 0.00013937800259337723, "loss": 1.6611, "step": 61302 }, { "epoch": 2.04, "grad_norm": 0.6346415281295776, "learning_rate": 0.00013936917489174643, "loss": 1.6909, "step": 61303 }, { "epoch": 2.04, "grad_norm": 0.6224215030670166, "learning_rate": 0.000139360347385098, "loss": 1.674, "step": 61304 }, { "epoch": 2.04, "grad_norm": 0.6376472115516663, "learning_rate": 0.00013935152007344228, "loss": 1.7097, "step": 61305 }, { "epoch": 2.04, "grad_norm": 0.6143127679824829, "learning_rate": 0.0001393426929567901, "loss": 1.7557, "step": 61306 }, { "epoch": 2.04, "grad_norm": 0.6402751207351685, "learning_rate": 0.0001393338660351523, "loss": 1.7241, "step": 61307 }, { "epoch": 2.04, "grad_norm": 0.6033194065093994, "learning_rate": 0.00013932503930853956, "loss": 1.7507, "step": 61308 }, { "epoch": 2.04, "grad_norm": 0.6355686187744141, "learning_rate": 0.00013931621277696242, "loss": 1.7747, "step": 61309 }, { "epoch": 2.04, "grad_norm": 0.637388288974762, "learning_rate": 0.00013930738644043174, "loss": 1.7185, "step": 61310 }, { "epoch": 2.04, "grad_norm": 0.6246333718299866, "learning_rate": 0.00013929856029895843, "loss": 1.6619, "step": 61311 }, { "epoch": 2.04, "grad_norm": 0.6154969930648804, "learning_rate": 0.00013928973435255277, "loss": 1.7091, "step": 61312 }, { "epoch": 2.04, "grad_norm": 0.6140713095664978, "learning_rate": 0.00013928090860122565, "loss": 1.7128, "step": 61313 }, { "epoch": 2.04, "grad_norm": 0.6557966470718384, "learning_rate": 0.00013927208304498798, "loss": 1.6788, "step": 61314 }, { "epoch": 2.04, "grad_norm": 0.6095607876777649, "learning_rate": 0.00013926325768385032, "loss": 1.7514, "step": 61315 }, { "epoch": 2.04, "grad_norm": 0.6009148955345154, "learning_rate": 0.00013925443251782327, "loss": 1.6457, "step": 61316 }, { "epoch": 2.04, "grad_norm": 0.6144835948944092, "learning_rate": 0.00013924560754691775, "loss": 1.6861, "step": 61317 }, { "epoch": 2.04, "grad_norm": 0.6352554559707642, "learning_rate": 0.0001392367827711444, "loss": 1.7508, "step": 61318 }, { "epoch": 2.04, "grad_norm": 0.6137653589248657, "learning_rate": 0.00013922795819051376, "loss": 1.7696, "step": 61319 }, { "epoch": 2.04, "grad_norm": 0.6196277737617493, "learning_rate": 0.00013921913380503668, "loss": 1.7131, "step": 61320 }, { "epoch": 2.04, "grad_norm": 0.6025391817092896, "learning_rate": 0.00013921030961472404, "loss": 1.617, "step": 61321 }, { "epoch": 2.04, "grad_norm": 0.6164255142211914, "learning_rate": 0.00013920148561958636, "loss": 1.6969, "step": 61322 }, { "epoch": 2.04, "grad_norm": 0.6273407340049744, "learning_rate": 0.00013919266181963428, "loss": 1.6793, "step": 61323 }, { "epoch": 2.04, "grad_norm": 0.6076944470405579, "learning_rate": 0.00013918383821487874, "loss": 1.7309, "step": 61324 }, { "epoch": 2.04, "grad_norm": 0.6350786685943604, "learning_rate": 0.00013917501480533034, "loss": 1.6685, "step": 61325 }, { "epoch": 2.04, "grad_norm": 0.6092838048934937, "learning_rate": 0.00013916619159099963, "loss": 1.6574, "step": 61326 }, { "epoch": 2.04, "grad_norm": 0.6523579359054565, "learning_rate": 0.0001391573685718976, "loss": 1.6967, "step": 61327 }, { "epoch": 2.04, "grad_norm": 0.6186392307281494, "learning_rate": 0.0001391485457480347, "loss": 1.66, "step": 61328 }, { "epoch": 2.04, "grad_norm": 0.6179606318473816, "learning_rate": 0.00013913972311942188, "loss": 1.7574, "step": 61329 }, { "epoch": 2.04, "grad_norm": 0.6145197153091431, "learning_rate": 0.00013913090068606962, "loss": 1.6736, "step": 61330 }, { "epoch": 2.04, "grad_norm": 0.627255380153656, "learning_rate": 0.00013912207844798887, "loss": 1.7022, "step": 61331 }, { "epoch": 2.04, "grad_norm": 0.6267418265342712, "learning_rate": 0.0001391132564051902, "loss": 1.6806, "step": 61332 }, { "epoch": 2.04, "grad_norm": 0.647893488407135, "learning_rate": 0.00013910443455768422, "loss": 1.6972, "step": 61333 }, { "epoch": 2.04, "grad_norm": 0.627498209476471, "learning_rate": 0.00013909561290548187, "loss": 1.7454, "step": 61334 }, { "epoch": 2.04, "grad_norm": 0.603476881980896, "learning_rate": 0.00013908679144859358, "loss": 1.6962, "step": 61335 }, { "epoch": 2.04, "grad_norm": 0.6372290253639221, "learning_rate": 0.0001390779701870304, "loss": 1.6762, "step": 61336 }, { "epoch": 2.04, "grad_norm": 0.6235145926475525, "learning_rate": 0.0001390691491208028, "loss": 1.6604, "step": 61337 }, { "epoch": 2.04, "grad_norm": 0.6163303256034851, "learning_rate": 0.00013906032824992148, "loss": 1.6409, "step": 61338 }, { "epoch": 2.04, "grad_norm": 0.6303847432136536, "learning_rate": 0.0001390515075743973, "loss": 1.787, "step": 61339 }, { "epoch": 2.04, "grad_norm": 0.6144389510154724, "learning_rate": 0.00013904268709424072, "loss": 1.6844, "step": 61340 }, { "epoch": 2.04, "grad_norm": 0.6247105598449707, "learning_rate": 0.00013903386680946274, "loss": 1.7135, "step": 61341 }, { "epoch": 2.04, "grad_norm": 0.6172444224357605, "learning_rate": 0.0001390250467200738, "loss": 1.7454, "step": 61342 }, { "epoch": 2.04, "grad_norm": 0.6395789980888367, "learning_rate": 0.00013901622682608488, "loss": 1.8367, "step": 61343 }, { "epoch": 2.04, "grad_norm": 0.613781213760376, "learning_rate": 0.00013900740712750656, "loss": 1.6183, "step": 61344 }, { "epoch": 2.04, "grad_norm": 0.6328322291374207, "learning_rate": 0.00013899858762434937, "loss": 1.695, "step": 61345 }, { "epoch": 2.04, "grad_norm": 0.6301019191741943, "learning_rate": 0.00013898976831662428, "loss": 1.6797, "step": 61346 }, { "epoch": 2.04, "grad_norm": 0.6290013194084167, "learning_rate": 0.00013898094920434192, "loss": 1.6883, "step": 61347 }, { "epoch": 2.04, "grad_norm": 0.6126603484153748, "learning_rate": 0.00013897213028751283, "loss": 1.8073, "step": 61348 }, { "epoch": 2.04, "grad_norm": 0.6477445363998413, "learning_rate": 0.00013896331156614789, "loss": 1.712, "step": 61349 }, { "epoch": 2.04, "grad_norm": 0.6130208969116211, "learning_rate": 0.00013895449304025782, "loss": 1.672, "step": 61350 }, { "epoch": 2.04, "grad_norm": 0.6376668810844421, "learning_rate": 0.00013894567470985327, "loss": 1.6785, "step": 61351 }, { "epoch": 2.04, "grad_norm": 0.6198559999465942, "learning_rate": 0.00013893685657494486, "loss": 1.7274, "step": 61352 }, { "epoch": 2.04, "grad_norm": 0.6478403210639954, "learning_rate": 0.00013892803863554346, "loss": 1.7395, "step": 61353 }, { "epoch": 2.04, "grad_norm": 0.5995394587516785, "learning_rate": 0.0001389192208916597, "loss": 1.72, "step": 61354 }, { "epoch": 2.04, "grad_norm": 0.6384313702583313, "learning_rate": 0.00013891040334330413, "loss": 1.6692, "step": 61355 }, { "epoch": 2.04, "grad_norm": 0.6138201951980591, "learning_rate": 0.00013890158599048766, "loss": 1.7186, "step": 61356 }, { "epoch": 2.04, "grad_norm": 0.6450520157814026, "learning_rate": 0.000138892768833221, "loss": 1.7387, "step": 61357 }, { "epoch": 2.04, "grad_norm": 0.653313934803009, "learning_rate": 0.0001388839518715148, "loss": 1.7307, "step": 61358 }, { "epoch": 2.04, "grad_norm": 0.6167369484901428, "learning_rate": 0.00013887513510537958, "loss": 1.6738, "step": 61359 }, { "epoch": 2.04, "grad_norm": 0.6186559200286865, "learning_rate": 0.00013886631853482633, "loss": 1.6993, "step": 61360 }, { "epoch": 2.04, "grad_norm": 0.644775927066803, "learning_rate": 0.00013885750215986565, "loss": 1.75, "step": 61361 }, { "epoch": 2.04, "grad_norm": 0.630986750125885, "learning_rate": 0.00013884868598050814, "loss": 1.723, "step": 61362 }, { "epoch": 2.04, "grad_norm": 0.6382575035095215, "learning_rate": 0.00013883986999676455, "loss": 1.6848, "step": 61363 }, { "epoch": 2.04, "grad_norm": 0.5962796211242676, "learning_rate": 0.0001388310542086457, "loss": 1.6506, "step": 61364 }, { "epoch": 2.04, "grad_norm": 0.66949462890625, "learning_rate": 0.00013882223861616224, "loss": 1.8014, "step": 61365 }, { "epoch": 2.04, "grad_norm": 0.6248634457588196, "learning_rate": 0.00013881342321932468, "loss": 1.718, "step": 61366 }, { "epoch": 2.04, "grad_norm": 0.6302432417869568, "learning_rate": 0.00013880460801814402, "loss": 1.6996, "step": 61367 }, { "epoch": 2.04, "grad_norm": 0.6111756563186646, "learning_rate": 0.0001387957930126308, "loss": 1.6356, "step": 61368 }, { "epoch": 2.04, "grad_norm": 0.6293747425079346, "learning_rate": 0.00013878697820279563, "loss": 1.7221, "step": 61369 }, { "epoch": 2.04, "grad_norm": 0.6034970879554749, "learning_rate": 0.00013877816358864935, "loss": 1.7017, "step": 61370 }, { "epoch": 2.04, "grad_norm": 0.6275407075881958, "learning_rate": 0.00013876934917020267, "loss": 1.6967, "step": 61371 }, { "epoch": 2.04, "grad_norm": 0.6260722279548645, "learning_rate": 0.0001387605349474663, "loss": 1.7105, "step": 61372 }, { "epoch": 2.04, "grad_norm": 0.6310840249061584, "learning_rate": 0.00013875172092045075, "loss": 1.7139, "step": 61373 }, { "epoch": 2.04, "grad_norm": 0.641385555267334, "learning_rate": 0.00013874290708916693, "loss": 1.703, "step": 61374 }, { "epoch": 2.04, "grad_norm": 0.6698122620582581, "learning_rate": 0.00013873409345362548, "loss": 1.7731, "step": 61375 }, { "epoch": 2.04, "grad_norm": 0.649436354637146, "learning_rate": 0.00013872528001383698, "loss": 1.6933, "step": 61376 }, { "epoch": 2.04, "grad_norm": 0.6319220066070557, "learning_rate": 0.00013871646676981233, "loss": 1.8243, "step": 61377 }, { "epoch": 2.04, "grad_norm": 0.6285793781280518, "learning_rate": 0.00013870765372156203, "loss": 1.7343, "step": 61378 }, { "epoch": 2.04, "grad_norm": 0.6164472103118896, "learning_rate": 0.00013869884086909696, "loss": 1.7238, "step": 61379 }, { "epoch": 2.04, "grad_norm": 0.6273540258407593, "learning_rate": 0.00013869002821242766, "loss": 1.725, "step": 61380 }, { "epoch": 2.04, "grad_norm": 0.6017805933952332, "learning_rate": 0.00013868121575156498, "loss": 1.6142, "step": 61381 }, { "epoch": 2.04, "grad_norm": 0.6098361015319824, "learning_rate": 0.00013867240348651954, "loss": 1.7331, "step": 61382 }, { "epoch": 2.04, "grad_norm": 0.642096996307373, "learning_rate": 0.00013866359141730193, "loss": 1.7522, "step": 61383 }, { "epoch": 2.04, "grad_norm": 0.6221994161605835, "learning_rate": 0.00013865477954392306, "loss": 1.7007, "step": 61384 }, { "epoch": 2.04, "grad_norm": 0.6253177523612976, "learning_rate": 0.00013864596786639338, "loss": 1.7567, "step": 61385 }, { "epoch": 2.04, "grad_norm": 0.6198129057884216, "learning_rate": 0.00013863715638472382, "loss": 1.7581, "step": 61386 }, { "epoch": 2.04, "grad_norm": 0.6101395487785339, "learning_rate": 0.00013862834509892504, "loss": 1.6976, "step": 61387 }, { "epoch": 2.04, "grad_norm": 0.611907958984375, "learning_rate": 0.0001386195340090075, "loss": 1.7794, "step": 61388 }, { "epoch": 2.04, "grad_norm": 0.6111746430397034, "learning_rate": 0.0001386107231149822, "loss": 1.6165, "step": 61389 }, { "epoch": 2.04, "grad_norm": 0.6072239875793457, "learning_rate": 0.0001386019124168596, "loss": 1.6547, "step": 61390 }, { "epoch": 2.04, "grad_norm": 0.6385168433189392, "learning_rate": 0.00013859310191465058, "loss": 1.774, "step": 61391 }, { "epoch": 2.04, "grad_norm": 0.6194650530815125, "learning_rate": 0.00013858429160836566, "loss": 1.6171, "step": 61392 }, { "epoch": 2.04, "grad_norm": 0.6074290871620178, "learning_rate": 0.00013857548149801574, "loss": 1.7187, "step": 61393 }, { "epoch": 2.04, "grad_norm": 0.6229881644248962, "learning_rate": 0.0001385666715836114, "loss": 1.7735, "step": 61394 }, { "epoch": 2.04, "grad_norm": 0.6251237392425537, "learning_rate": 0.00013855786186516323, "loss": 1.7294, "step": 61395 }, { "epoch": 2.04, "grad_norm": 0.6028616428375244, "learning_rate": 0.00013854905234268213, "loss": 1.7661, "step": 61396 }, { "epoch": 2.04, "grad_norm": 0.6386591792106628, "learning_rate": 0.0001385402430161787, "loss": 1.8005, "step": 61397 }, { "epoch": 2.04, "grad_norm": 0.631493866443634, "learning_rate": 0.0001385314338856635, "loss": 1.6584, "step": 61398 }, { "epoch": 2.04, "grad_norm": 0.6276713013648987, "learning_rate": 0.00013852262495114736, "loss": 1.8202, "step": 61399 }, { "epoch": 2.04, "grad_norm": 0.6009230017662048, "learning_rate": 0.00013851381621264106, "loss": 1.669, "step": 61400 }, { "epoch": 2.04, "grad_norm": 0.6277291178703308, "learning_rate": 0.0001385050076701552, "loss": 1.739, "step": 61401 }, { "epoch": 2.04, "grad_norm": 0.5978251099586487, "learning_rate": 0.00013849619932370034, "loss": 1.6808, "step": 61402 }, { "epoch": 2.04, "grad_norm": 0.6319586634635925, "learning_rate": 0.00013848739117328744, "loss": 1.6973, "step": 61403 }, { "epoch": 2.04, "grad_norm": 0.6137429475784302, "learning_rate": 0.00013847858321892702, "loss": 1.7759, "step": 61404 }, { "epoch": 2.04, "grad_norm": 0.6220341324806213, "learning_rate": 0.00013846977546062972, "loss": 1.7368, "step": 61405 }, { "epoch": 2.04, "grad_norm": 0.6025591492652893, "learning_rate": 0.00013846096789840628, "loss": 1.6912, "step": 61406 }, { "epoch": 2.04, "grad_norm": 0.6036941409111023, "learning_rate": 0.00013845216053226766, "loss": 1.73, "step": 61407 }, { "epoch": 2.04, "grad_norm": 0.6192564368247986, "learning_rate": 0.0001384433533622241, "loss": 1.5729, "step": 61408 }, { "epoch": 2.04, "grad_norm": 0.6554059982299805, "learning_rate": 0.0001384345463882865, "loss": 1.7711, "step": 61409 }, { "epoch": 2.04, "grad_norm": 0.6174576282501221, "learning_rate": 0.00013842573961046566, "loss": 1.7732, "step": 61410 }, { "epoch": 2.04, "grad_norm": 0.6069210171699524, "learning_rate": 0.00013841693302877217, "loss": 1.6731, "step": 61411 }, { "epoch": 2.04, "grad_norm": 0.60733962059021, "learning_rate": 0.0001384081266432166, "loss": 1.6752, "step": 61412 }, { "epoch": 2.04, "grad_norm": 0.621351420879364, "learning_rate": 0.00013839932045380974, "loss": 1.697, "step": 61413 }, { "epoch": 2.04, "grad_norm": 0.6450603008270264, "learning_rate": 0.00013839051446056256, "loss": 1.7176, "step": 61414 }, { "epoch": 2.04, "grad_norm": 0.6202074885368347, "learning_rate": 0.00013838170866348524, "loss": 1.6586, "step": 61415 }, { "epoch": 2.04, "grad_norm": 0.6096070408821106, "learning_rate": 0.00013837290306258868, "loss": 1.6883, "step": 61416 }, { "epoch": 2.04, "grad_norm": 0.6231719851493835, "learning_rate": 0.00013836409765788374, "loss": 1.6725, "step": 61417 }, { "epoch": 2.04, "grad_norm": 0.6324372887611389, "learning_rate": 0.00013835529244938098, "loss": 1.7444, "step": 61418 }, { "epoch": 2.04, "grad_norm": 0.6178560256958008, "learning_rate": 0.00013834648743709098, "loss": 1.6848, "step": 61419 }, { "epoch": 2.04, "grad_norm": 0.6085560321807861, "learning_rate": 0.0001383376826210245, "loss": 1.6268, "step": 61420 }, { "epoch": 2.04, "grad_norm": 0.6177966594696045, "learning_rate": 0.00013832887800119246, "loss": 1.7105, "step": 61421 }, { "epoch": 2.04, "grad_norm": 0.6283639669418335, "learning_rate": 0.00013832007357760513, "loss": 1.6509, "step": 61422 }, { "epoch": 2.04, "grad_norm": 0.6136783957481384, "learning_rate": 0.0001383112693502734, "loss": 1.724, "step": 61423 }, { "epoch": 2.04, "grad_norm": 0.6136823892593384, "learning_rate": 0.0001383024653192081, "loss": 1.7153, "step": 61424 }, { "epoch": 2.04, "grad_norm": 0.6297089457511902, "learning_rate": 0.00013829366148441978, "loss": 1.7532, "step": 61425 }, { "epoch": 2.04, "grad_norm": 0.6194643378257751, "learning_rate": 0.000138284857845919, "loss": 1.7634, "step": 61426 }, { "epoch": 2.04, "grad_norm": 0.633213460445404, "learning_rate": 0.0001382760544037166, "loss": 1.6805, "step": 61427 }, { "epoch": 2.04, "grad_norm": 0.6370744109153748, "learning_rate": 0.00013826725115782343, "loss": 1.7021, "step": 61428 }, { "epoch": 2.04, "grad_norm": 0.6396992206573486, "learning_rate": 0.0001382584481082498, "loss": 1.6473, "step": 61429 }, { "epoch": 2.04, "grad_norm": 0.6064879894256592, "learning_rate": 0.00013824964525500658, "loss": 1.7292, "step": 61430 }, { "epoch": 2.04, "grad_norm": 0.6464502811431885, "learning_rate": 0.00013824084259810455, "loss": 1.7878, "step": 61431 }, { "epoch": 2.04, "grad_norm": 0.6202075481414795, "learning_rate": 0.0001382320401375543, "loss": 1.7237, "step": 61432 }, { "epoch": 2.04, "grad_norm": 0.6068635582923889, "learning_rate": 0.0001382232378733664, "loss": 1.6495, "step": 61433 }, { "epoch": 2.04, "grad_norm": 0.6121695637702942, "learning_rate": 0.0001382144358055518, "loss": 1.7021, "step": 61434 }, { "epoch": 2.04, "grad_norm": 0.603674054145813, "learning_rate": 0.000138205633934121, "loss": 1.6879, "step": 61435 }, { "epoch": 2.04, "grad_norm": 0.6317718625068665, "learning_rate": 0.0001381968322590846, "loss": 1.7146, "step": 61436 }, { "epoch": 2.04, "grad_norm": 0.6218113899230957, "learning_rate": 0.00013818803078045346, "loss": 1.6921, "step": 61437 }, { "epoch": 2.04, "grad_norm": 0.6469023823738098, "learning_rate": 0.0001381792294982383, "loss": 1.6768, "step": 61438 }, { "epoch": 2.04, "grad_norm": 0.6344637870788574, "learning_rate": 0.00013817042841244968, "loss": 1.7616, "step": 61439 }, { "epoch": 2.04, "grad_norm": 0.6186712384223938, "learning_rate": 0.00013816162752309822, "loss": 1.6708, "step": 61440 }, { "epoch": 2.04, "grad_norm": 0.6149179339408875, "learning_rate": 0.0001381528268301948, "loss": 1.7104, "step": 61441 }, { "epoch": 2.04, "grad_norm": 0.6039031147956848, "learning_rate": 0.00013814402633375002, "loss": 1.7008, "step": 61442 }, { "epoch": 2.04, "grad_norm": 0.6293326616287231, "learning_rate": 0.00013813522603377447, "loss": 1.7848, "step": 61443 }, { "epoch": 2.04, "grad_norm": 0.6325868964195251, "learning_rate": 0.00013812642593027895, "loss": 1.7409, "step": 61444 }, { "epoch": 2.04, "grad_norm": 0.6224324703216553, "learning_rate": 0.00013811762602327404, "loss": 1.7278, "step": 61445 }, { "epoch": 2.04, "grad_norm": 0.6229913830757141, "learning_rate": 0.00013810882631277055, "loss": 1.7329, "step": 61446 }, { "epoch": 2.04, "grad_norm": 0.6313900947570801, "learning_rate": 0.00013810002679877898, "loss": 1.6446, "step": 61447 }, { "epoch": 2.04, "grad_norm": 0.6197430491447449, "learning_rate": 0.00013809122748131025, "loss": 1.7429, "step": 61448 }, { "epoch": 2.04, "grad_norm": 0.6131081581115723, "learning_rate": 0.00013808242836037493, "loss": 1.6297, "step": 61449 }, { "epoch": 2.04, "grad_norm": 0.650459349155426, "learning_rate": 0.00013807362943598356, "loss": 1.6864, "step": 61450 }, { "epoch": 2.04, "grad_norm": 0.6132329702377319, "learning_rate": 0.00013806483070814704, "loss": 1.6462, "step": 61451 }, { "epoch": 2.04, "grad_norm": 0.6292087435722351, "learning_rate": 0.00013805603217687586, "loss": 1.6477, "step": 61452 }, { "epoch": 2.04, "grad_norm": 0.6284645795822144, "learning_rate": 0.00013804723384218092, "loss": 1.7337, "step": 61453 }, { "epoch": 2.04, "grad_norm": 0.6291024684906006, "learning_rate": 0.00013803843570407275, "loss": 1.696, "step": 61454 }, { "epoch": 2.04, "grad_norm": 0.6312735080718994, "learning_rate": 0.00013802963776256193, "loss": 1.7273, "step": 61455 }, { "epoch": 2.04, "grad_norm": 0.6286826729774475, "learning_rate": 0.00013802084001765942, "loss": 1.726, "step": 61456 }, { "epoch": 2.04, "grad_norm": 0.6154383420944214, "learning_rate": 0.0001380120424693756, "loss": 1.7089, "step": 61457 }, { "epoch": 2.04, "grad_norm": 0.5994606018066406, "learning_rate": 0.0001380032451177214, "loss": 1.6535, "step": 61458 }, { "epoch": 2.04, "grad_norm": 0.6146202087402344, "learning_rate": 0.0001379944479627073, "loss": 1.6796, "step": 61459 }, { "epoch": 2.04, "grad_norm": 0.6075558662414551, "learning_rate": 0.00013798565100434418, "loss": 1.7277, "step": 61460 }, { "epoch": 2.04, "grad_norm": 0.656729519367218, "learning_rate": 0.0001379768542426426, "loss": 1.6412, "step": 61461 }, { "epoch": 2.04, "grad_norm": 0.6416419744491577, "learning_rate": 0.00013796805767761315, "loss": 1.7823, "step": 61462 }, { "epoch": 2.04, "grad_norm": 0.6340447664260864, "learning_rate": 0.0001379592613092667, "loss": 1.6451, "step": 61463 }, { "epoch": 2.04, "grad_norm": 0.648942768573761, "learning_rate": 0.00013795046513761383, "loss": 1.6677, "step": 61464 }, { "epoch": 2.04, "grad_norm": 0.6179187893867493, "learning_rate": 0.0001379416691626651, "loss": 1.699, "step": 61465 }, { "epoch": 2.04, "grad_norm": 0.6109229326248169, "learning_rate": 0.00013793287338443135, "loss": 1.725, "step": 61466 }, { "epoch": 2.05, "grad_norm": 0.6426629424095154, "learning_rate": 0.00013792407780292327, "loss": 1.7175, "step": 61467 }, { "epoch": 2.05, "grad_norm": 0.6390286087989807, "learning_rate": 0.0001379152824181515, "loss": 1.744, "step": 61468 }, { "epoch": 2.05, "grad_norm": 0.6269792318344116, "learning_rate": 0.00013790648723012658, "loss": 1.6848, "step": 61469 }, { "epoch": 2.05, "grad_norm": 0.6284006834030151, "learning_rate": 0.00013789769223885944, "loss": 1.7035, "step": 61470 }, { "epoch": 2.05, "grad_norm": 0.6305555105209351, "learning_rate": 0.00013788889744436057, "loss": 1.6672, "step": 61471 }, { "epoch": 2.05, "grad_norm": 0.6237515807151794, "learning_rate": 0.0001378801028466406, "loss": 1.7, "step": 61472 }, { "epoch": 2.05, "grad_norm": 0.6169845461845398, "learning_rate": 0.0001378713084457103, "loss": 1.7594, "step": 61473 }, { "epoch": 2.05, "grad_norm": 0.6026621460914612, "learning_rate": 0.00013786251424158046, "loss": 1.7214, "step": 61474 }, { "epoch": 2.05, "grad_norm": 0.6065531969070435, "learning_rate": 0.00013785372023426164, "loss": 1.6621, "step": 61475 }, { "epoch": 2.05, "grad_norm": 0.6242952942848206, "learning_rate": 0.0001378449264237644, "loss": 1.6967, "step": 61476 }, { "epoch": 2.05, "grad_norm": 0.6132022738456726, "learning_rate": 0.00013783613281009963, "loss": 1.7745, "step": 61477 }, { "epoch": 2.05, "grad_norm": 0.6190557479858398, "learning_rate": 0.0001378273393932779, "loss": 1.7848, "step": 61478 }, { "epoch": 2.05, "grad_norm": 0.6466349363327026, "learning_rate": 0.00013781854617330976, "loss": 1.6923, "step": 61479 }, { "epoch": 2.05, "grad_norm": 0.612770140171051, "learning_rate": 0.00013780975315020607, "loss": 1.6692, "step": 61480 }, { "epoch": 2.05, "grad_norm": 0.6333711743354797, "learning_rate": 0.00013780096032397747, "loss": 1.7038, "step": 61481 }, { "epoch": 2.05, "grad_norm": 0.6325703859329224, "learning_rate": 0.00013779216769463465, "loss": 1.7084, "step": 61482 }, { "epoch": 2.05, "grad_norm": 0.6431586742401123, "learning_rate": 0.00013778337526218815, "loss": 1.7292, "step": 61483 }, { "epoch": 2.05, "grad_norm": 0.6247663497924805, "learning_rate": 0.00013777458302664878, "loss": 1.7216, "step": 61484 }, { "epoch": 2.05, "grad_norm": 0.6263726353645325, "learning_rate": 0.00013776579098802723, "loss": 1.6301, "step": 61485 }, { "epoch": 2.05, "grad_norm": 0.6533176302909851, "learning_rate": 0.00013775699914633395, "loss": 1.7109, "step": 61486 }, { "epoch": 2.05, "grad_norm": 0.6689257025718689, "learning_rate": 0.00013774820750157978, "loss": 1.7394, "step": 61487 }, { "epoch": 2.05, "grad_norm": 0.6173778772354126, "learning_rate": 0.0001377394160537755, "loss": 1.7205, "step": 61488 }, { "epoch": 2.05, "grad_norm": 0.6374589204788208, "learning_rate": 0.00013773062480293164, "loss": 1.7653, "step": 61489 }, { "epoch": 2.05, "grad_norm": 0.6370372176170349, "learning_rate": 0.00013772183374905877, "loss": 1.8208, "step": 61490 }, { "epoch": 2.05, "grad_norm": 0.6018237471580505, "learning_rate": 0.00013771304289216785, "loss": 1.6836, "step": 61491 }, { "epoch": 2.05, "grad_norm": 0.6301659941673279, "learning_rate": 0.00013770425223226935, "loss": 1.6962, "step": 61492 }, { "epoch": 2.05, "grad_norm": 0.6463118195533752, "learning_rate": 0.00013769546176937384, "loss": 1.718, "step": 61493 }, { "epoch": 2.05, "grad_norm": 0.6220232844352722, "learning_rate": 0.00013768667150349225, "loss": 1.7305, "step": 61494 }, { "epoch": 2.05, "grad_norm": 0.6135868430137634, "learning_rate": 0.00013767788143463503, "loss": 1.723, "step": 61495 }, { "epoch": 2.05, "grad_norm": 0.6140295267105103, "learning_rate": 0.00013766909156281306, "loss": 1.6981, "step": 61496 }, { "epoch": 2.05, "grad_norm": 0.6331613659858704, "learning_rate": 0.0001376603018880368, "loss": 1.6572, "step": 61497 }, { "epoch": 2.05, "grad_norm": 0.6213550567626953, "learning_rate": 0.00013765151241031706, "loss": 1.6589, "step": 61498 }, { "epoch": 2.05, "grad_norm": 0.6202613115310669, "learning_rate": 0.00013764272312966451, "loss": 1.6748, "step": 61499 }, { "epoch": 2.05, "grad_norm": 0.6173386573791504, "learning_rate": 0.00013763393404608967, "loss": 1.6275, "step": 61500 }, { "epoch": 2.05, "grad_norm": 0.6107855439186096, "learning_rate": 0.00013762514515960338, "loss": 1.6997, "step": 61501 }, { "epoch": 2.05, "grad_norm": 0.6437028050422668, "learning_rate": 0.00013761635647021618, "loss": 1.7328, "step": 61502 }, { "epoch": 2.05, "grad_norm": 0.6130414009094238, "learning_rate": 0.0001376075679779389, "loss": 1.7034, "step": 61503 }, { "epoch": 2.05, "grad_norm": 0.6164382100105286, "learning_rate": 0.00013759877968278205, "loss": 1.7461, "step": 61504 }, { "epoch": 2.05, "grad_norm": 0.6358962059020996, "learning_rate": 0.00013758999158475628, "loss": 1.6558, "step": 61505 }, { "epoch": 2.05, "grad_norm": 0.6210237145423889, "learning_rate": 0.00013758120368387243, "loss": 1.7393, "step": 61506 }, { "epoch": 2.05, "grad_norm": 0.6274365186691284, "learning_rate": 0.00013757241598014096, "loss": 1.6826, "step": 61507 }, { "epoch": 2.05, "grad_norm": 0.6170324683189392, "learning_rate": 0.00013756362847357277, "loss": 1.7739, "step": 61508 }, { "epoch": 2.05, "grad_norm": 0.6218661665916443, "learning_rate": 0.0001375548411641783, "loss": 1.7723, "step": 61509 }, { "epoch": 2.05, "grad_norm": 0.6189107298851013, "learning_rate": 0.00013754605405196841, "loss": 1.73, "step": 61510 }, { "epoch": 2.05, "grad_norm": 0.6210997700691223, "learning_rate": 0.00013753726713695367, "loss": 1.7193, "step": 61511 }, { "epoch": 2.05, "grad_norm": 0.5907073616981506, "learning_rate": 0.0001375284804191446, "loss": 1.7371, "step": 61512 }, { "epoch": 2.05, "grad_norm": 0.611165463924408, "learning_rate": 0.0001375196938985522, "loss": 1.7291, "step": 61513 }, { "epoch": 2.05, "grad_norm": 0.5990627408027649, "learning_rate": 0.00013751090757518694, "loss": 1.6969, "step": 61514 }, { "epoch": 2.05, "grad_norm": 0.6175311207771301, "learning_rate": 0.00013750212144905935, "loss": 1.6518, "step": 61515 }, { "epoch": 2.05, "grad_norm": 0.6247674226760864, "learning_rate": 0.00013749333552018025, "loss": 1.746, "step": 61516 }, { "epoch": 2.05, "grad_norm": 0.6214916706085205, "learning_rate": 0.0001374845497885604, "loss": 1.726, "step": 61517 }, { "epoch": 2.05, "grad_norm": 0.6090380549430847, "learning_rate": 0.0001374757642542104, "loss": 1.7145, "step": 61518 }, { "epoch": 2.05, "grad_norm": 0.6119027137756348, "learning_rate": 0.00013746697891714072, "loss": 1.7068, "step": 61519 }, { "epoch": 2.05, "grad_norm": 0.6194946765899658, "learning_rate": 0.00013745819377736232, "loss": 1.7736, "step": 61520 }, { "epoch": 2.05, "grad_norm": 0.6038391590118408, "learning_rate": 0.00013744940883488572, "loss": 1.7523, "step": 61521 }, { "epoch": 2.05, "grad_norm": 0.5920557379722595, "learning_rate": 0.00013744062408972148, "loss": 1.7022, "step": 61522 }, { "epoch": 2.05, "grad_norm": 0.6285304427146912, "learning_rate": 0.00013743183954188033, "loss": 1.703, "step": 61523 }, { "epoch": 2.05, "grad_norm": 0.6149886250495911, "learning_rate": 0.00013742305519137324, "loss": 1.7131, "step": 61524 }, { "epoch": 2.05, "grad_norm": 0.609845757484436, "learning_rate": 0.00013741427103821039, "loss": 1.7457, "step": 61525 }, { "epoch": 2.05, "grad_norm": 0.6247430443763733, "learning_rate": 0.00013740548708240263, "loss": 1.6769, "step": 61526 }, { "epoch": 2.05, "grad_norm": 0.6071978807449341, "learning_rate": 0.00013739670332396075, "loss": 1.7067, "step": 61527 }, { "epoch": 2.05, "grad_norm": 0.60244220495224, "learning_rate": 0.00013738791976289534, "loss": 1.6596, "step": 61528 }, { "epoch": 2.05, "grad_norm": 0.6112425923347473, "learning_rate": 0.00013737913639921693, "loss": 1.6722, "step": 61529 }, { "epoch": 2.05, "grad_norm": 0.6119443774223328, "learning_rate": 0.00013737035323293628, "loss": 1.6806, "step": 61530 }, { "epoch": 2.05, "grad_norm": 0.6264909505844116, "learning_rate": 0.00013736157026406426, "loss": 1.7098, "step": 61531 }, { "epoch": 2.05, "grad_norm": 0.617344081401825, "learning_rate": 0.00013735278749261114, "loss": 1.7001, "step": 61532 }, { "epoch": 2.05, "grad_norm": 0.6401188373565674, "learning_rate": 0.00013734400491858775, "loss": 1.7844, "step": 61533 }, { "epoch": 2.05, "grad_norm": 0.6363831758499146, "learning_rate": 0.0001373352225420049, "loss": 1.732, "step": 61534 }, { "epoch": 2.05, "grad_norm": 0.6012490391731262, "learning_rate": 0.0001373264403628731, "loss": 1.7491, "step": 61535 }, { "epoch": 2.05, "grad_norm": 0.6065024137496948, "learning_rate": 0.00013731765838120296, "loss": 1.7271, "step": 61536 }, { "epoch": 2.05, "grad_norm": 0.6028480529785156, "learning_rate": 0.00013730887659700516, "loss": 1.7605, "step": 61537 }, { "epoch": 2.05, "grad_norm": 0.6173416376113892, "learning_rate": 0.00013730009501029065, "loss": 1.7564, "step": 61538 }, { "epoch": 2.05, "grad_norm": 0.6713539361953735, "learning_rate": 0.00013729131362106965, "loss": 1.6635, "step": 61539 }, { "epoch": 2.05, "grad_norm": 0.6104961037635803, "learning_rate": 0.000137282532429353, "loss": 1.6709, "step": 61540 }, { "epoch": 2.05, "grad_norm": 0.620245635509491, "learning_rate": 0.0001372737514351515, "loss": 1.7146, "step": 61541 }, { "epoch": 2.05, "grad_norm": 0.6146896481513977, "learning_rate": 0.00013726497063847571, "loss": 1.6208, "step": 61542 }, { "epoch": 2.05, "grad_norm": 0.6160085201263428, "learning_rate": 0.00013725619003933614, "loss": 1.7048, "step": 61543 }, { "epoch": 2.05, "grad_norm": 0.6076108813285828, "learning_rate": 0.00013724740963774358, "loss": 1.6892, "step": 61544 }, { "epoch": 2.05, "grad_norm": 0.633874773979187, "learning_rate": 0.0001372386294337089, "loss": 1.7224, "step": 61545 }, { "epoch": 2.05, "grad_norm": 0.6164913773536682, "learning_rate": 0.00013722984942724233, "loss": 1.67, "step": 61546 }, { "epoch": 2.05, "grad_norm": 0.6034560203552246, "learning_rate": 0.0001372210696183547, "loss": 1.6335, "step": 61547 }, { "epoch": 2.05, "grad_norm": 0.6284170150756836, "learning_rate": 0.0001372122900070569, "loss": 1.7403, "step": 61548 }, { "epoch": 2.05, "grad_norm": 0.6299524903297424, "learning_rate": 0.0001372035105933593, "loss": 1.771, "step": 61549 }, { "epoch": 2.05, "grad_norm": 0.6253650784492493, "learning_rate": 0.00013719473137727262, "loss": 1.802, "step": 61550 }, { "epoch": 2.05, "grad_norm": 0.6066194772720337, "learning_rate": 0.00013718595235880763, "loss": 1.6703, "step": 61551 }, { "epoch": 2.05, "grad_norm": 0.6449807286262512, "learning_rate": 0.00013717717353797486, "loss": 1.7433, "step": 61552 }, { "epoch": 2.05, "grad_norm": 0.6373516321182251, "learning_rate": 0.00013716839491478497, "loss": 1.651, "step": 61553 }, { "epoch": 2.05, "grad_norm": 0.6008410453796387, "learning_rate": 0.00013715961648924863, "loss": 1.7307, "step": 61554 }, { "epoch": 2.05, "grad_norm": 0.5986825823783875, "learning_rate": 0.00013715083826137663, "loss": 1.6765, "step": 61555 }, { "epoch": 2.05, "grad_norm": 0.6199740767478943, "learning_rate": 0.00013714206023117948, "loss": 1.7085, "step": 61556 }, { "epoch": 2.05, "grad_norm": 0.6213842034339905, "learning_rate": 0.00013713328239866782, "loss": 1.7072, "step": 61557 }, { "epoch": 2.05, "grad_norm": 0.6854100227355957, "learning_rate": 0.0001371245047638524, "loss": 1.7176, "step": 61558 }, { "epoch": 2.05, "grad_norm": 0.6241161227226257, "learning_rate": 0.0001371157273267439, "loss": 1.7658, "step": 61559 }, { "epoch": 2.05, "grad_norm": 0.6402220726013184, "learning_rate": 0.00013710695008735277, "loss": 1.6543, "step": 61560 }, { "epoch": 2.05, "grad_norm": 0.6065995693206787, "learning_rate": 0.0001370981730456899, "loss": 1.7066, "step": 61561 }, { "epoch": 2.05, "grad_norm": 0.6360622644424438, "learning_rate": 0.00013708939620176577, "loss": 1.6626, "step": 61562 }, { "epoch": 2.05, "grad_norm": 0.613292932510376, "learning_rate": 0.00013708061955559122, "loss": 1.6638, "step": 61563 }, { "epoch": 2.05, "grad_norm": 0.5983847379684448, "learning_rate": 0.00013707184310717666, "loss": 1.6237, "step": 61564 }, { "epoch": 2.05, "grad_norm": 0.611248791217804, "learning_rate": 0.000137063066856533, "loss": 1.6604, "step": 61565 }, { "epoch": 2.05, "grad_norm": 0.640778124332428, "learning_rate": 0.00013705429080367076, "loss": 1.7184, "step": 61566 }, { "epoch": 2.05, "grad_norm": 0.6496175527572632, "learning_rate": 0.00013704551494860047, "loss": 1.7949, "step": 61567 }, { "epoch": 2.05, "grad_norm": 0.6240695714950562, "learning_rate": 0.00013703673929133303, "loss": 1.712, "step": 61568 }, { "epoch": 2.05, "grad_norm": 0.6243255138397217, "learning_rate": 0.0001370279638318789, "loss": 1.6926, "step": 61569 }, { "epoch": 2.05, "grad_norm": 0.6262256503105164, "learning_rate": 0.0001370191885702489, "loss": 1.6387, "step": 61570 }, { "epoch": 2.05, "grad_norm": 0.6120246648788452, "learning_rate": 0.0001370104135064536, "loss": 1.653, "step": 61571 }, { "epoch": 2.05, "grad_norm": 0.6248162984848022, "learning_rate": 0.0001370016386405035, "loss": 1.7401, "step": 61572 }, { "epoch": 2.05, "grad_norm": 0.6433102488517761, "learning_rate": 0.00013699286397240954, "loss": 1.7147, "step": 61573 }, { "epoch": 2.05, "grad_norm": 0.6115557551383972, "learning_rate": 0.0001369840895021821, "loss": 1.625, "step": 61574 }, { "epoch": 2.05, "grad_norm": 0.6211625337600708, "learning_rate": 0.00013697531522983207, "loss": 1.7747, "step": 61575 }, { "epoch": 2.05, "grad_norm": 0.6507084965705872, "learning_rate": 0.00013696654115536987, "loss": 1.6772, "step": 61576 }, { "epoch": 2.05, "grad_norm": 0.6142850518226624, "learning_rate": 0.0001369577672788064, "loss": 1.7748, "step": 61577 }, { "epoch": 2.05, "grad_norm": 0.6227501034736633, "learning_rate": 0.00013694899360015218, "loss": 1.7381, "step": 61578 }, { "epoch": 2.05, "grad_norm": 0.6262158751487732, "learning_rate": 0.00013694022011941773, "loss": 1.6746, "step": 61579 }, { "epoch": 2.05, "grad_norm": 0.6517915725708008, "learning_rate": 0.0001369314468366139, "loss": 1.6916, "step": 61580 }, { "epoch": 2.05, "grad_norm": 0.6310909390449524, "learning_rate": 0.0001369226737517513, "loss": 1.7039, "step": 61581 }, { "epoch": 2.05, "grad_norm": 0.5962733030319214, "learning_rate": 0.00013691390086484044, "loss": 1.7182, "step": 61582 }, { "epoch": 2.05, "grad_norm": 0.6251683235168457, "learning_rate": 0.0001369051281758921, "loss": 1.6624, "step": 61583 }, { "epoch": 2.05, "grad_norm": 0.6203468441963196, "learning_rate": 0.00013689635568491697, "loss": 1.6587, "step": 61584 }, { "epoch": 2.05, "grad_norm": 0.627922534942627, "learning_rate": 0.00013688758339192565, "loss": 1.6738, "step": 61585 }, { "epoch": 2.05, "grad_norm": 0.6533862352371216, "learning_rate": 0.00013687881129692867, "loss": 1.8413, "step": 61586 }, { "epoch": 2.05, "grad_norm": 0.6149023771286011, "learning_rate": 0.00013687003939993688, "loss": 1.7794, "step": 61587 }, { "epoch": 2.05, "grad_norm": 0.6382749676704407, "learning_rate": 0.00013686126770096082, "loss": 1.6919, "step": 61588 }, { "epoch": 2.05, "grad_norm": 0.6224200129508972, "learning_rate": 0.00013685249620001104, "loss": 1.6355, "step": 61589 }, { "epoch": 2.05, "grad_norm": 0.6430201530456543, "learning_rate": 0.00013684372489709828, "loss": 1.7111, "step": 61590 }, { "epoch": 2.05, "grad_norm": 0.6560434103012085, "learning_rate": 0.0001368349537922333, "loss": 1.6875, "step": 61591 }, { "epoch": 2.05, "grad_norm": 0.6009636521339417, "learning_rate": 0.00013682618288542669, "loss": 1.7082, "step": 61592 }, { "epoch": 2.05, "grad_norm": 0.6255137920379639, "learning_rate": 0.0001368174121766889, "loss": 1.7241, "step": 61593 }, { "epoch": 2.05, "grad_norm": 0.5922872424125671, "learning_rate": 0.00013680864166603084, "loss": 1.7141, "step": 61594 }, { "epoch": 2.05, "grad_norm": 0.6163794994354248, "learning_rate": 0.00013679987135346305, "loss": 1.6932, "step": 61595 }, { "epoch": 2.05, "grad_norm": 0.6218802332878113, "learning_rate": 0.00013679110123899607, "loss": 1.67, "step": 61596 }, { "epoch": 2.05, "grad_norm": 0.6505094766616821, "learning_rate": 0.00013678233132264066, "loss": 1.6761, "step": 61597 }, { "epoch": 2.05, "grad_norm": 0.619185209274292, "learning_rate": 0.0001367735616044075, "loss": 1.6791, "step": 61598 }, { "epoch": 2.05, "grad_norm": 0.6205735802650452, "learning_rate": 0.00013676479208430726, "loss": 1.7196, "step": 61599 }, { "epoch": 2.05, "grad_norm": 0.6331912875175476, "learning_rate": 0.00013675602276235036, "loss": 1.6368, "step": 61600 }, { "epoch": 2.05, "grad_norm": 0.6356785893440247, "learning_rate": 0.0001367472536385477, "loss": 1.6406, "step": 61601 }, { "epoch": 2.05, "grad_norm": 0.6335084438323975, "learning_rate": 0.00013673848471290986, "loss": 1.7457, "step": 61602 }, { "epoch": 2.05, "grad_norm": 0.6198650002479553, "learning_rate": 0.00013672971598544727, "loss": 1.7426, "step": 61603 }, { "epoch": 2.05, "grad_norm": 0.6426188349723816, "learning_rate": 0.00013672094745617082, "loss": 1.6571, "step": 61604 }, { "epoch": 2.05, "grad_norm": 0.6176185607910156, "learning_rate": 0.00013671217912509113, "loss": 1.7363, "step": 61605 }, { "epoch": 2.05, "grad_norm": 0.6131662130355835, "learning_rate": 0.00013670341099221884, "loss": 1.6971, "step": 61606 }, { "epoch": 2.05, "grad_norm": 0.6254614591598511, "learning_rate": 0.00013669464305756442, "loss": 1.7442, "step": 61607 }, { "epoch": 2.05, "grad_norm": 0.6416682004928589, "learning_rate": 0.00013668587532113873, "loss": 1.6914, "step": 61608 }, { "epoch": 2.05, "grad_norm": 0.6346734762191772, "learning_rate": 0.00013667710778295233, "loss": 1.7068, "step": 61609 }, { "epoch": 2.05, "grad_norm": 0.6213590502738953, "learning_rate": 0.0001366683404430158, "loss": 1.7208, "step": 61610 }, { "epoch": 2.05, "grad_norm": 0.6217200756072998, "learning_rate": 0.00013665957330133988, "loss": 1.696, "step": 61611 }, { "epoch": 2.05, "grad_norm": 0.6648373007774353, "learning_rate": 0.00013665080635793508, "loss": 1.6496, "step": 61612 }, { "epoch": 2.05, "grad_norm": 0.6342737078666687, "learning_rate": 0.00013664203961281227, "loss": 1.6888, "step": 61613 }, { "epoch": 2.05, "grad_norm": 0.6154128313064575, "learning_rate": 0.00013663327306598185, "loss": 1.6512, "step": 61614 }, { "epoch": 2.05, "grad_norm": 0.6172235608100891, "learning_rate": 0.00013662450671745463, "loss": 1.6507, "step": 61615 }, { "epoch": 2.05, "grad_norm": 0.6464677453041077, "learning_rate": 0.00013661574056724122, "loss": 1.8078, "step": 61616 }, { "epoch": 2.05, "grad_norm": 0.6408476829528809, "learning_rate": 0.0001366069746153521, "loss": 1.7508, "step": 61617 }, { "epoch": 2.05, "grad_norm": 0.6264316439628601, "learning_rate": 0.00013659820886179817, "loss": 1.7222, "step": 61618 }, { "epoch": 2.05, "grad_norm": 0.6306285858154297, "learning_rate": 0.0001365894433065898, "loss": 1.7028, "step": 61619 }, { "epoch": 2.05, "grad_norm": 0.6186371445655823, "learning_rate": 0.00013658067794973788, "loss": 1.6992, "step": 61620 }, { "epoch": 2.05, "grad_norm": 0.632200300693512, "learning_rate": 0.00013657191279125295, "loss": 1.7111, "step": 61621 }, { "epoch": 2.05, "grad_norm": 0.6902598142623901, "learning_rate": 0.00013656314783114555, "loss": 1.6814, "step": 61622 }, { "epoch": 2.05, "grad_norm": 0.6577616930007935, "learning_rate": 0.0001365543830694265, "loss": 1.6712, "step": 61623 }, { "epoch": 2.05, "grad_norm": 0.6321958899497986, "learning_rate": 0.00013654561850610624, "loss": 1.714, "step": 61624 }, { "epoch": 2.05, "grad_norm": 0.612922728061676, "learning_rate": 0.0001365368541411956, "loss": 1.7196, "step": 61625 }, { "epoch": 2.05, "grad_norm": 0.6246379613876343, "learning_rate": 0.00013652808997470506, "loss": 1.6905, "step": 61626 }, { "epoch": 2.05, "grad_norm": 0.6064888834953308, "learning_rate": 0.0001365193260066454, "loss": 1.7556, "step": 61627 }, { "epoch": 2.05, "grad_norm": 0.6064164042472839, "learning_rate": 0.0001365105622370272, "loss": 1.6979, "step": 61628 }, { "epoch": 2.05, "grad_norm": 0.6386668682098389, "learning_rate": 0.00013650179866586104, "loss": 1.6693, "step": 61629 }, { "epoch": 2.05, "grad_norm": 0.6493942737579346, "learning_rate": 0.00013649303529315762, "loss": 1.7379, "step": 61630 }, { "epoch": 2.05, "grad_norm": 0.6159563064575195, "learning_rate": 0.00013648427211892763, "loss": 1.7082, "step": 61631 }, { "epoch": 2.05, "grad_norm": 0.6173943281173706, "learning_rate": 0.00013647550914318152, "loss": 1.7016, "step": 61632 }, { "epoch": 2.05, "grad_norm": 0.6065173745155334, "learning_rate": 0.00013646674636593004, "loss": 1.7145, "step": 61633 }, { "epoch": 2.05, "grad_norm": 0.6328535079956055, "learning_rate": 0.00013645798378718394, "loss": 1.7226, "step": 61634 }, { "epoch": 2.05, "grad_norm": 0.6169157028198242, "learning_rate": 0.00013644922140695375, "loss": 1.7052, "step": 61635 }, { "epoch": 2.05, "grad_norm": 0.6453319191932678, "learning_rate": 0.00013644045922525002, "loss": 1.7048, "step": 61636 }, { "epoch": 2.05, "grad_norm": 0.6346915364265442, "learning_rate": 0.00013643169724208355, "loss": 1.7593, "step": 61637 }, { "epoch": 2.05, "grad_norm": 0.6273993849754333, "learning_rate": 0.0001364229354574649, "loss": 1.7526, "step": 61638 }, { "epoch": 2.05, "grad_norm": 0.6213563084602356, "learning_rate": 0.00013641417387140462, "loss": 1.7171, "step": 61639 }, { "epoch": 2.05, "grad_norm": 0.6416969895362854, "learning_rate": 0.00013640541248391343, "loss": 1.7397, "step": 61640 }, { "epoch": 2.05, "grad_norm": 0.6311931610107422, "learning_rate": 0.0001363966512950022, "loss": 1.6859, "step": 61641 }, { "epoch": 2.05, "grad_norm": 0.7150188088417053, "learning_rate": 0.00013638789030468105, "loss": 1.7414, "step": 61642 }, { "epoch": 2.05, "grad_norm": 0.6090793013572693, "learning_rate": 0.00013637912951296093, "loss": 1.681, "step": 61643 }, { "epoch": 2.05, "grad_norm": 0.6161657571792603, "learning_rate": 0.00013637036891985256, "loss": 1.7537, "step": 61644 }, { "epoch": 2.05, "grad_norm": 0.6219984889030457, "learning_rate": 0.00013636160852536643, "loss": 1.6334, "step": 61645 }, { "epoch": 2.05, "grad_norm": 0.6160393953323364, "learning_rate": 0.00013635284832951312, "loss": 1.7068, "step": 61646 }, { "epoch": 2.05, "grad_norm": 0.6265818476676941, "learning_rate": 0.0001363440883323033, "loss": 1.634, "step": 61647 }, { "epoch": 2.05, "grad_norm": 0.6246123313903809, "learning_rate": 0.00013633532853374786, "loss": 1.6424, "step": 61648 }, { "epoch": 2.05, "grad_norm": 0.6047483682632446, "learning_rate": 0.00013632656893385704, "loss": 1.7129, "step": 61649 }, { "epoch": 2.05, "grad_norm": 0.6131422519683838, "learning_rate": 0.0001363178095326416, "loss": 1.7368, "step": 61650 }, { "epoch": 2.05, "grad_norm": 0.6298206448554993, "learning_rate": 0.00013630905033011238, "loss": 1.7072, "step": 61651 }, { "epoch": 2.05, "grad_norm": 0.6251435875892639, "learning_rate": 0.0001363002913262798, "loss": 1.722, "step": 61652 }, { "epoch": 2.05, "grad_norm": 0.6113038659095764, "learning_rate": 0.00013629153252115448, "loss": 1.712, "step": 61653 }, { "epoch": 2.05, "grad_norm": 0.6196500062942505, "learning_rate": 0.00013628277391474711, "loss": 1.6961, "step": 61654 }, { "epoch": 2.05, "grad_norm": 0.6358391642570496, "learning_rate": 0.00013627401550706853, "loss": 1.686, "step": 61655 }, { "epoch": 2.05, "grad_norm": 0.6164013147354126, "learning_rate": 0.00013626525729812898, "loss": 1.7006, "step": 61656 }, { "epoch": 2.05, "grad_norm": 0.6300088167190552, "learning_rate": 0.00013625649928793927, "loss": 1.7383, "step": 61657 }, { "epoch": 2.05, "grad_norm": 0.6311336159706116, "learning_rate": 0.00013624774147651014, "loss": 1.763, "step": 61658 }, { "epoch": 2.05, "grad_norm": 0.6299490332603455, "learning_rate": 0.00013623898386385216, "loss": 1.7454, "step": 61659 }, { "epoch": 2.05, "grad_norm": 0.6092640161514282, "learning_rate": 0.00013623022644997578, "loss": 1.6271, "step": 61660 }, { "epoch": 2.05, "grad_norm": 0.6315196752548218, "learning_rate": 0.00013622146923489176, "loss": 1.6887, "step": 61661 }, { "epoch": 2.05, "grad_norm": 0.6129171848297119, "learning_rate": 0.00013621271221861102, "loss": 1.6362, "step": 61662 }, { "epoch": 2.05, "grad_norm": 0.6182136535644531, "learning_rate": 0.00013620395540114364, "loss": 1.6479, "step": 61663 }, { "epoch": 2.05, "grad_norm": 0.6428191661834717, "learning_rate": 0.00013619519878250053, "loss": 1.7071, "step": 61664 }, { "epoch": 2.05, "grad_norm": 0.6430352926254272, "learning_rate": 0.00013618644236269246, "loss": 1.6937, "step": 61665 }, { "epoch": 2.05, "grad_norm": 0.616722822189331, "learning_rate": 0.00013617768614172988, "loss": 1.7144, "step": 61666 }, { "epoch": 2.05, "grad_norm": 0.6433179378509521, "learning_rate": 0.00013616893011962334, "loss": 1.7944, "step": 61667 }, { "epoch": 2.05, "grad_norm": 0.6184707283973694, "learning_rate": 0.00013616017429638375, "loss": 1.7508, "step": 61668 }, { "epoch": 2.05, "grad_norm": 0.6344327330589294, "learning_rate": 0.0001361514186720215, "loss": 1.7086, "step": 61669 }, { "epoch": 2.05, "grad_norm": 0.6268168091773987, "learning_rate": 0.0001361426632465472, "loss": 1.7541, "step": 61670 }, { "epoch": 2.05, "grad_norm": 0.6295334696769714, "learning_rate": 0.00013613390801997156, "loss": 1.78, "step": 61671 }, { "epoch": 2.05, "grad_norm": 0.6613619923591614, "learning_rate": 0.0001361251529923053, "loss": 1.6749, "step": 61672 }, { "epoch": 2.05, "grad_norm": 0.6243694424629211, "learning_rate": 0.000136116398163559, "loss": 1.7732, "step": 61673 }, { "epoch": 2.05, "grad_norm": 0.64205402135849, "learning_rate": 0.0001361076435337431, "loss": 1.6669, "step": 61674 }, { "epoch": 2.05, "grad_norm": 0.6032131314277649, "learning_rate": 0.0001360988891028685, "loss": 1.7246, "step": 61675 }, { "epoch": 2.05, "grad_norm": 0.610607385635376, "learning_rate": 0.0001360901348709457, "loss": 1.6905, "step": 61676 }, { "epoch": 2.05, "grad_norm": 0.6554216146469116, "learning_rate": 0.0001360813808379852, "loss": 1.7138, "step": 61677 }, { "epoch": 2.05, "grad_norm": 0.6364830732345581, "learning_rate": 0.0001360726270039979, "loss": 1.7462, "step": 61678 }, { "epoch": 2.05, "grad_norm": 0.6369485855102539, "learning_rate": 0.00013606387336899414, "loss": 1.7026, "step": 61679 }, { "epoch": 2.05, "grad_norm": 0.6059752106666565, "learning_rate": 0.00013605511993298476, "loss": 1.7349, "step": 61680 }, { "epoch": 2.05, "grad_norm": 0.6256052255630493, "learning_rate": 0.00013604636669598024, "loss": 1.73, "step": 61681 }, { "epoch": 2.05, "grad_norm": 0.6167227029800415, "learning_rate": 0.0001360376136579914, "loss": 1.6889, "step": 61682 }, { "epoch": 2.05, "grad_norm": 0.6343370079994202, "learning_rate": 0.0001360288608190287, "loss": 1.658, "step": 61683 }, { "epoch": 2.05, "grad_norm": 0.6331462860107422, "learning_rate": 0.0001360201081791027, "loss": 1.7497, "step": 61684 }, { "epoch": 2.05, "grad_norm": 0.6481809020042419, "learning_rate": 0.00013601135573822426, "loss": 1.6842, "step": 61685 }, { "epoch": 2.05, "grad_norm": 0.6501690745353699, "learning_rate": 0.00013600260349640373, "loss": 1.692, "step": 61686 }, { "epoch": 2.05, "grad_norm": 0.6029707789421082, "learning_rate": 0.000135993851453652, "loss": 1.6497, "step": 61687 }, { "epoch": 2.05, "grad_norm": 0.6351339817047119, "learning_rate": 0.00013598509960997956, "loss": 1.6875, "step": 61688 }, { "epoch": 2.05, "grad_norm": 0.6406311392784119, "learning_rate": 0.00013597634796539698, "loss": 1.7601, "step": 61689 }, { "epoch": 2.05, "grad_norm": 0.6482413411140442, "learning_rate": 0.000135967596519915, "loss": 1.6679, "step": 61690 }, { "epoch": 2.05, "grad_norm": 0.6314672827720642, "learning_rate": 0.00013595884527354407, "loss": 1.6549, "step": 61691 }, { "epoch": 2.05, "grad_norm": 0.6406851410865784, "learning_rate": 0.0001359500942262951, "loss": 1.7432, "step": 61692 }, { "epoch": 2.05, "grad_norm": 0.6258600354194641, "learning_rate": 0.0001359413433781784, "loss": 1.7155, "step": 61693 }, { "epoch": 2.05, "grad_norm": 0.6359564661979675, "learning_rate": 0.00013593259272920484, "loss": 1.732, "step": 61694 }, { "epoch": 2.05, "grad_norm": 0.6385771632194519, "learning_rate": 0.00013592384227938497, "loss": 1.6508, "step": 61695 }, { "epoch": 2.05, "grad_norm": 0.6233561635017395, "learning_rate": 0.00013591509202872922, "loss": 1.6426, "step": 61696 }, { "epoch": 2.05, "grad_norm": 0.6381032466888428, "learning_rate": 0.0001359063419772485, "loss": 1.8041, "step": 61697 }, { "epoch": 2.05, "grad_norm": 0.6224628686904907, "learning_rate": 0.00013589759212495332, "loss": 1.7059, "step": 61698 }, { "epoch": 2.05, "grad_norm": 0.6109148263931274, "learning_rate": 0.00013588884247185418, "loss": 1.5938, "step": 61699 }, { "epoch": 2.05, "grad_norm": 0.6028047204017639, "learning_rate": 0.00013588009301796178, "loss": 1.703, "step": 61700 }, { "epoch": 2.05, "grad_norm": 0.6225947141647339, "learning_rate": 0.00013587134376328688, "loss": 1.6427, "step": 61701 }, { "epoch": 2.05, "grad_norm": 0.6177605986595154, "learning_rate": 0.00013586259470784, "loss": 1.6149, "step": 61702 }, { "epoch": 2.05, "grad_norm": 0.6221736073493958, "learning_rate": 0.0001358538458516316, "loss": 1.7595, "step": 61703 }, { "epoch": 2.05, "grad_norm": 0.636317789554596, "learning_rate": 0.00013584509719467258, "loss": 1.7359, "step": 61704 }, { "epoch": 2.05, "grad_norm": 0.6293550133705139, "learning_rate": 0.00013583634873697345, "loss": 1.7422, "step": 61705 }, { "epoch": 2.05, "grad_norm": 0.6338822245597839, "learning_rate": 0.00013582760047854464, "loss": 1.7231, "step": 61706 }, { "epoch": 2.05, "grad_norm": 0.6039565205574036, "learning_rate": 0.00013581885241939698, "loss": 1.6874, "step": 61707 }, { "epoch": 2.05, "grad_norm": 0.6554745435714722, "learning_rate": 0.00013581010455954114, "loss": 1.7947, "step": 61708 }, { "epoch": 2.05, "grad_norm": 0.630708634853363, "learning_rate": 0.00013580135689898767, "loss": 1.7344, "step": 61709 }, { "epoch": 2.05, "grad_norm": 0.6052767634391785, "learning_rate": 0.000135792609437747, "loss": 1.7323, "step": 61710 }, { "epoch": 2.05, "grad_norm": 0.6067220568656921, "learning_rate": 0.0001357838621758299, "loss": 1.7331, "step": 61711 }, { "epoch": 2.05, "grad_norm": 0.6074087023735046, "learning_rate": 0.00013577511511324726, "loss": 1.7407, "step": 61712 }, { "epoch": 2.05, "grad_norm": 0.622890830039978, "learning_rate": 0.00013576636825000918, "loss": 1.6675, "step": 61713 }, { "epoch": 2.05, "grad_norm": 0.6543210744857788, "learning_rate": 0.00013575762158612653, "loss": 1.7658, "step": 61714 }, { "epoch": 2.05, "grad_norm": 0.6207115650177002, "learning_rate": 0.00013574887512161007, "loss": 1.6188, "step": 61715 }, { "epoch": 2.05, "grad_norm": 0.6166830658912659, "learning_rate": 0.00013574012885647025, "loss": 1.695, "step": 61716 }, { "epoch": 2.05, "grad_norm": 0.6253591775894165, "learning_rate": 0.0001357313827907176, "loss": 1.6805, "step": 61717 }, { "epoch": 2.05, "grad_norm": 0.6057742238044739, "learning_rate": 0.00013572263692436295, "loss": 1.6754, "step": 61718 }, { "epoch": 2.05, "grad_norm": 0.6214630603790283, "learning_rate": 0.00013571389125741686, "loss": 1.6377, "step": 61719 }, { "epoch": 2.05, "grad_norm": 0.6599559783935547, "learning_rate": 0.00013570514578988973, "loss": 1.7145, "step": 61720 }, { "epoch": 2.05, "grad_norm": 0.6301336884498596, "learning_rate": 0.00013569640052179236, "loss": 1.7178, "step": 61721 }, { "epoch": 2.05, "grad_norm": 0.6148961186408997, "learning_rate": 0.00013568765545313548, "loss": 1.7226, "step": 61722 }, { "epoch": 2.05, "grad_norm": 0.6360344886779785, "learning_rate": 0.00013567891058392957, "loss": 1.7294, "step": 61723 }, { "epoch": 2.05, "grad_norm": 0.6331379413604736, "learning_rate": 0.00013567016591418512, "loss": 1.8184, "step": 61724 }, { "epoch": 2.05, "grad_norm": 0.6634219288825989, "learning_rate": 0.000135661421443913, "loss": 1.7278, "step": 61725 }, { "epoch": 2.05, "grad_norm": 0.6320810914039612, "learning_rate": 0.00013565267717312372, "loss": 1.6522, "step": 61726 }, { "epoch": 2.05, "grad_norm": 0.6263916492462158, "learning_rate": 0.00013564393310182775, "loss": 1.6672, "step": 61727 }, { "epoch": 2.05, "grad_norm": 0.6103459596633911, "learning_rate": 0.00013563518923003594, "loss": 1.6497, "step": 61728 }, { "epoch": 2.05, "grad_norm": 0.6208166480064392, "learning_rate": 0.00013562644555775868, "loss": 1.748, "step": 61729 }, { "epoch": 2.05, "grad_norm": 0.6203702092170715, "learning_rate": 0.0001356177020850068, "loss": 1.7062, "step": 61730 }, { "epoch": 2.05, "grad_norm": 0.619722306728363, "learning_rate": 0.00013560895881179069, "loss": 1.6969, "step": 61731 }, { "epoch": 2.05, "grad_norm": 0.6341168880462646, "learning_rate": 0.0001356002157381212, "loss": 1.788, "step": 61732 }, { "epoch": 2.05, "grad_norm": 0.6477423310279846, "learning_rate": 0.00013559147286400886, "loss": 1.7229, "step": 61733 }, { "epoch": 2.05, "grad_norm": 0.613460123538971, "learning_rate": 0.00013558273018946407, "loss": 1.6658, "step": 61734 }, { "epoch": 2.05, "grad_norm": 0.648709237575531, "learning_rate": 0.00013557398771449778, "loss": 1.6946, "step": 61735 }, { "epoch": 2.05, "grad_norm": 0.612334668636322, "learning_rate": 0.00013556524543912034, "loss": 1.6171, "step": 61736 }, { "epoch": 2.05, "grad_norm": 0.6053209900856018, "learning_rate": 0.00013555650336334257, "loss": 1.7142, "step": 61737 }, { "epoch": 2.05, "grad_norm": 0.6093460917472839, "learning_rate": 0.00013554776148717493, "loss": 1.7181, "step": 61738 }, { "epoch": 2.05, "grad_norm": 0.6294494867324829, "learning_rate": 0.00013553901981062802, "loss": 1.7671, "step": 61739 }, { "epoch": 2.05, "grad_norm": 0.6449854373931885, "learning_rate": 0.0001355302783337126, "loss": 1.7037, "step": 61740 }, { "epoch": 2.05, "grad_norm": 0.6260998845100403, "learning_rate": 0.00013552153705643905, "loss": 1.7098, "step": 61741 }, { "epoch": 2.05, "grad_norm": 0.6287221908569336, "learning_rate": 0.00013551279597881824, "loss": 1.7423, "step": 61742 }, { "epoch": 2.05, "grad_norm": 0.6132826805114746, "learning_rate": 0.00013550405510086058, "loss": 1.6985, "step": 61743 }, { "epoch": 2.05, "grad_norm": 0.6445779204368591, "learning_rate": 0.00013549531442257683, "loss": 1.7104, "step": 61744 }, { "epoch": 2.05, "grad_norm": 0.6376983523368835, "learning_rate": 0.00013548657394397756, "loss": 1.7155, "step": 61745 }, { "epoch": 2.05, "grad_norm": 0.6274529695510864, "learning_rate": 0.00013547783366507323, "loss": 1.6421, "step": 61746 }, { "epoch": 2.05, "grad_norm": 0.6306628584861755, "learning_rate": 0.00013546909358587469, "loss": 1.7609, "step": 61747 }, { "epoch": 2.05, "grad_norm": 0.623254656791687, "learning_rate": 0.0001354603537063924, "loss": 1.6832, "step": 61748 }, { "epoch": 2.05, "grad_norm": 0.6136996150016785, "learning_rate": 0.0001354516140266369, "loss": 1.6768, "step": 61749 }, { "epoch": 2.05, "grad_norm": 0.6237747669219971, "learning_rate": 0.0001354428745466189, "loss": 1.6806, "step": 61750 }, { "epoch": 2.05, "grad_norm": 0.626338541507721, "learning_rate": 0.0001354341352663491, "loss": 1.686, "step": 61751 }, { "epoch": 2.05, "grad_norm": 0.6340632438659668, "learning_rate": 0.00013542539618583807, "loss": 1.6898, "step": 61752 }, { "epoch": 2.05, "grad_norm": 0.623911440372467, "learning_rate": 0.0001354166573050962, "loss": 1.6484, "step": 61753 }, { "epoch": 2.05, "grad_norm": 0.6226978302001953, "learning_rate": 0.00013540791862413437, "loss": 1.617, "step": 61754 }, { "epoch": 2.05, "grad_norm": 0.6377989649772644, "learning_rate": 0.00013539918014296307, "loss": 1.739, "step": 61755 }, { "epoch": 2.05, "grad_norm": 0.6110935211181641, "learning_rate": 0.00013539044186159282, "loss": 1.7087, "step": 61756 }, { "epoch": 2.05, "grad_norm": 0.6278270483016968, "learning_rate": 0.0001353817037800343, "loss": 1.802, "step": 61757 }, { "epoch": 2.05, "grad_norm": 0.6205071210861206, "learning_rate": 0.00013537296589829836, "loss": 1.7958, "step": 61758 }, { "epoch": 2.05, "grad_norm": 0.7894445061683655, "learning_rate": 0.00013536422821639518, "loss": 1.7191, "step": 61759 }, { "epoch": 2.05, "grad_norm": 0.6086149215698242, "learning_rate": 0.00013535549073433553, "loss": 1.6989, "step": 61760 }, { "epoch": 2.05, "grad_norm": 0.6165265440940857, "learning_rate": 0.0001353467534521302, "loss": 1.7108, "step": 61761 }, { "epoch": 2.05, "grad_norm": 0.6342741250991821, "learning_rate": 0.00013533801636978965, "loss": 1.7297, "step": 61762 }, { "epoch": 2.05, "grad_norm": 0.6174923181533813, "learning_rate": 0.00013532927948732437, "loss": 1.7047, "step": 61763 }, { "epoch": 2.05, "grad_norm": 0.6093345880508423, "learning_rate": 0.00013532054280474508, "loss": 1.665, "step": 61764 }, { "epoch": 2.05, "grad_norm": 0.6056756377220154, "learning_rate": 0.0001353118063220626, "loss": 1.6726, "step": 61765 }, { "epoch": 2.05, "grad_norm": 0.6318561434745789, "learning_rate": 0.00013530307003928714, "loss": 1.7276, "step": 61766 }, { "epoch": 2.05, "grad_norm": 0.6342734098434448, "learning_rate": 0.00013529433395642945, "loss": 1.7387, "step": 61767 }, { "epoch": 2.06, "grad_norm": 0.6028304696083069, "learning_rate": 0.00013528559807350027, "loss": 1.7177, "step": 61768 }, { "epoch": 2.06, "grad_norm": 0.6172543168067932, "learning_rate": 0.00013527686239051012, "loss": 1.683, "step": 61769 }, { "epoch": 2.06, "grad_norm": 0.6550960540771484, "learning_rate": 0.0001352681269074695, "loss": 1.7436, "step": 61770 }, { "epoch": 2.06, "grad_norm": 0.6223489046096802, "learning_rate": 0.00013525939162438905, "loss": 1.7766, "step": 61771 }, { "epoch": 2.06, "grad_norm": 0.6305251121520996, "learning_rate": 0.00013525065654127968, "loss": 1.6456, "step": 61772 }, { "epoch": 2.06, "grad_norm": 0.6395177245140076, "learning_rate": 0.00013524192165815152, "loss": 1.6935, "step": 61773 }, { "epoch": 2.06, "grad_norm": 0.6111776828765869, "learning_rate": 0.0001352331869750154, "loss": 1.7071, "step": 61774 }, { "epoch": 2.06, "grad_norm": 0.6233773231506348, "learning_rate": 0.000135224452491882, "loss": 1.6565, "step": 61775 }, { "epoch": 2.06, "grad_norm": 0.6213949918746948, "learning_rate": 0.00013521571820876186, "loss": 1.6844, "step": 61776 }, { "epoch": 2.06, "grad_norm": 0.629490077495575, "learning_rate": 0.00013520698412566542, "loss": 1.6667, "step": 61777 }, { "epoch": 2.06, "grad_norm": 0.6364595890045166, "learning_rate": 0.00013519825024260344, "loss": 1.7198, "step": 61778 }, { "epoch": 2.06, "grad_norm": 0.631079375743866, "learning_rate": 0.00013518951655958671, "loss": 1.7089, "step": 61779 }, { "epoch": 2.06, "grad_norm": 0.6227938532829285, "learning_rate": 0.0001351807830766254, "loss": 1.7489, "step": 61780 }, { "epoch": 2.06, "grad_norm": 0.6560292840003967, "learning_rate": 0.00013517204979373034, "loss": 1.7406, "step": 61781 }, { "epoch": 2.06, "grad_norm": 0.6293596625328064, "learning_rate": 0.00013516331671091224, "loss": 1.6341, "step": 61782 }, { "epoch": 2.06, "grad_norm": 0.6228736042976379, "learning_rate": 0.0001351545838281816, "loss": 1.6695, "step": 61783 }, { "epoch": 2.06, "grad_norm": 0.6332147121429443, "learning_rate": 0.00013514585114554888, "loss": 1.762, "step": 61784 }, { "epoch": 2.06, "grad_norm": 0.6399898529052734, "learning_rate": 0.00013513711866302493, "loss": 1.674, "step": 61785 }, { "epoch": 2.06, "grad_norm": 0.6050206422805786, "learning_rate": 0.0001351283863806202, "loss": 1.6372, "step": 61786 }, { "epoch": 2.06, "grad_norm": 0.6069988012313843, "learning_rate": 0.00013511965429834522, "loss": 1.6673, "step": 61787 }, { "epoch": 2.06, "grad_norm": 0.6062056422233582, "learning_rate": 0.0001351109224162107, "loss": 1.753, "step": 61788 }, { "epoch": 2.06, "grad_norm": 0.619976818561554, "learning_rate": 0.00013510219073422728, "loss": 1.6237, "step": 61789 }, { "epoch": 2.06, "grad_norm": 0.6536538600921631, "learning_rate": 0.00013509345925240556, "loss": 1.6357, "step": 61790 }, { "epoch": 2.06, "grad_norm": 0.6363202333450317, "learning_rate": 0.00013508472797075596, "loss": 1.8051, "step": 61791 }, { "epoch": 2.06, "grad_norm": 0.6070480942726135, "learning_rate": 0.00013507599688928931, "loss": 1.7165, "step": 61792 }, { "epoch": 2.06, "grad_norm": 0.6320441961288452, "learning_rate": 0.0001350672660080161, "loss": 1.7238, "step": 61793 }, { "epoch": 2.06, "grad_norm": 0.6486245393753052, "learning_rate": 0.00013505853532694683, "loss": 1.7624, "step": 61794 }, { "epoch": 2.06, "grad_norm": 0.6293184757232666, "learning_rate": 0.00013504980484609227, "loss": 1.7714, "step": 61795 }, { "epoch": 2.06, "grad_norm": 0.6354429125785828, "learning_rate": 0.00013504107456546285, "loss": 1.7497, "step": 61796 }, { "epoch": 2.06, "grad_norm": 0.6205618381500244, "learning_rate": 0.0001350323444850694, "loss": 1.6512, "step": 61797 }, { "epoch": 2.06, "grad_norm": 0.634669840335846, "learning_rate": 0.00013502361460492222, "loss": 1.6327, "step": 61798 }, { "epoch": 2.06, "grad_norm": 0.6291672587394714, "learning_rate": 0.00013501488492503218, "loss": 1.7489, "step": 61799 }, { "epoch": 2.06, "grad_norm": 0.6340786814689636, "learning_rate": 0.0001350061554454098, "loss": 1.7705, "step": 61800 }, { "epoch": 2.06, "grad_norm": 0.6167036294937134, "learning_rate": 0.0001349974261660655, "loss": 1.7581, "step": 61801 }, { "epoch": 2.06, "grad_norm": 0.6191506385803223, "learning_rate": 0.0001349886970870101, "loss": 1.7086, "step": 61802 }, { "epoch": 2.06, "grad_norm": 0.6371389627456665, "learning_rate": 0.00013497996820825405, "loss": 1.7472, "step": 61803 }, { "epoch": 2.06, "grad_norm": 0.6325555443763733, "learning_rate": 0.00013497123952980808, "loss": 1.6933, "step": 61804 }, { "epoch": 2.06, "grad_norm": 0.6162337064743042, "learning_rate": 0.0001349625110516827, "loss": 1.6716, "step": 61805 }, { "epoch": 2.06, "grad_norm": 0.6298668384552002, "learning_rate": 0.0001349537827738884, "loss": 1.7067, "step": 61806 }, { "epoch": 2.06, "grad_norm": 0.6212170720100403, "learning_rate": 0.00013494505469643602, "loss": 1.7028, "step": 61807 }, { "epoch": 2.06, "grad_norm": 0.6045317053794861, "learning_rate": 0.00013493632681933591, "loss": 1.7291, "step": 61808 }, { "epoch": 2.06, "grad_norm": 0.6187089681625366, "learning_rate": 0.0001349275991425989, "loss": 1.771, "step": 61809 }, { "epoch": 2.06, "grad_norm": 0.6348762512207031, "learning_rate": 0.00013491887166623535, "loss": 1.6974, "step": 61810 }, { "epoch": 2.06, "grad_norm": 0.6234301328659058, "learning_rate": 0.00013491014439025607, "loss": 1.7362, "step": 61811 }, { "epoch": 2.06, "grad_norm": 0.6216906309127808, "learning_rate": 0.00013490141731467155, "loss": 1.7162, "step": 61812 }, { "epoch": 2.06, "grad_norm": 0.6099600195884705, "learning_rate": 0.0001348926904394923, "loss": 1.6912, "step": 61813 }, { "epoch": 2.06, "grad_norm": 0.6170092225074768, "learning_rate": 0.00013488396376472908, "loss": 1.7764, "step": 61814 }, { "epoch": 2.06, "grad_norm": 0.6020862460136414, "learning_rate": 0.00013487523729039234, "loss": 1.762, "step": 61815 }, { "epoch": 2.06, "grad_norm": 0.6014295220375061, "learning_rate": 0.00013486651101649272, "loss": 1.667, "step": 61816 }, { "epoch": 2.06, "grad_norm": 0.6111703515052795, "learning_rate": 0.00013485778494304077, "loss": 1.7471, "step": 61817 }, { "epoch": 2.06, "grad_norm": 0.634280800819397, "learning_rate": 0.00013484905907004728, "loss": 1.6588, "step": 61818 }, { "epoch": 2.06, "grad_norm": 0.6307386159896851, "learning_rate": 0.00013484033339752267, "loss": 1.6509, "step": 61819 }, { "epoch": 2.06, "grad_norm": 0.6482769250869751, "learning_rate": 0.00013483160792547745, "loss": 1.7615, "step": 61820 }, { "epoch": 2.06, "grad_norm": 0.6280612349510193, "learning_rate": 0.00013482288265392246, "loss": 1.7538, "step": 61821 }, { "epoch": 2.06, "grad_norm": 0.6300206184387207, "learning_rate": 0.00013481415758286811, "loss": 1.7236, "step": 61822 }, { "epoch": 2.06, "grad_norm": 0.6336271166801453, "learning_rate": 0.00013480543271232498, "loss": 1.7506, "step": 61823 }, { "epoch": 2.06, "grad_norm": 1.8816417455673218, "learning_rate": 0.00013479670804230365, "loss": 1.7531, "step": 61824 }, { "epoch": 2.06, "grad_norm": 0.6351212859153748, "learning_rate": 0.00013478798357281495, "loss": 1.6839, "step": 61825 }, { "epoch": 2.06, "grad_norm": 0.6034071445465088, "learning_rate": 0.00013477925930386925, "loss": 1.7903, "step": 61826 }, { "epoch": 2.06, "grad_norm": 0.6122825145721436, "learning_rate": 0.00013477053523547707, "loss": 1.7165, "step": 61827 }, { "epoch": 2.06, "grad_norm": 0.6287263035774231, "learning_rate": 0.00013476181136764915, "loss": 1.6472, "step": 61828 }, { "epoch": 2.06, "grad_norm": 0.6188594102859497, "learning_rate": 0.00013475308770039622, "loss": 1.6951, "step": 61829 }, { "epoch": 2.06, "grad_norm": 0.6273409128189087, "learning_rate": 0.0001347443642337285, "loss": 1.7438, "step": 61830 }, { "epoch": 2.06, "grad_norm": 0.6228238940238953, "learning_rate": 0.00013473564096765678, "loss": 1.7244, "step": 61831 }, { "epoch": 2.06, "grad_norm": 0.6332061290740967, "learning_rate": 0.00013472691790219173, "loss": 1.6435, "step": 61832 }, { "epoch": 2.06, "grad_norm": 0.6220325231552124, "learning_rate": 0.00013471819503734387, "loss": 1.6622, "step": 61833 }, { "epoch": 2.06, "grad_norm": 0.6261084079742432, "learning_rate": 0.00013470947237312366, "loss": 1.7456, "step": 61834 }, { "epoch": 2.06, "grad_norm": 0.6237117052078247, "learning_rate": 0.0001347007499095419, "loss": 1.7215, "step": 61835 }, { "epoch": 2.06, "grad_norm": 0.6250741481781006, "learning_rate": 0.00013469202764660905, "loss": 1.666, "step": 61836 }, { "epoch": 2.06, "grad_norm": 0.5936030745506287, "learning_rate": 0.00013468330558433567, "loss": 1.6378, "step": 61837 }, { "epoch": 2.06, "grad_norm": 0.6306096315383911, "learning_rate": 0.00013467458372273236, "loss": 1.756, "step": 61838 }, { "epoch": 2.06, "grad_norm": 0.631337583065033, "learning_rate": 0.00013466586206180986, "loss": 1.7405, "step": 61839 }, { "epoch": 2.06, "grad_norm": 0.6635193228721619, "learning_rate": 0.00013465714060157863, "loss": 1.6862, "step": 61840 }, { "epoch": 2.06, "grad_norm": 0.6209350228309631, "learning_rate": 0.00013464841934204917, "loss": 1.6844, "step": 61841 }, { "epoch": 2.06, "grad_norm": 0.6223675608634949, "learning_rate": 0.00013463969828323227, "loss": 1.7142, "step": 61842 }, { "epoch": 2.06, "grad_norm": 0.6151314973831177, "learning_rate": 0.00013463097742513842, "loss": 1.7521, "step": 61843 }, { "epoch": 2.06, "grad_norm": 0.6438694596290588, "learning_rate": 0.00013462225676777806, "loss": 1.7022, "step": 61844 }, { "epoch": 2.06, "grad_norm": 0.613716721534729, "learning_rate": 0.00013461353631116205, "loss": 1.7377, "step": 61845 }, { "epoch": 2.06, "grad_norm": 0.6306077241897583, "learning_rate": 0.00013460481605530075, "loss": 1.7928, "step": 61846 }, { "epoch": 2.06, "grad_norm": 0.6365132927894592, "learning_rate": 0.00013459609600020492, "loss": 1.6844, "step": 61847 }, { "epoch": 2.06, "grad_norm": 0.6522067785263062, "learning_rate": 0.00013458737614588495, "loss": 1.7016, "step": 61848 }, { "epoch": 2.06, "grad_norm": 0.6239535808563232, "learning_rate": 0.00013457865649235164, "loss": 1.7716, "step": 61849 }, { "epoch": 2.06, "grad_norm": 0.6224475502967834, "learning_rate": 0.00013456993703961546, "loss": 1.6884, "step": 61850 }, { "epoch": 2.06, "grad_norm": 0.6299344897270203, "learning_rate": 0.0001345612177876869, "loss": 1.7713, "step": 61851 }, { "epoch": 2.06, "grad_norm": 0.6463457345962524, "learning_rate": 0.00013455249873657674, "loss": 1.7578, "step": 61852 }, { "epoch": 2.06, "grad_norm": 0.6553384065628052, "learning_rate": 0.0001345437798862954, "loss": 1.675, "step": 61853 }, { "epoch": 2.06, "grad_norm": 0.6083621382713318, "learning_rate": 0.00013453506123685359, "loss": 1.6876, "step": 61854 }, { "epoch": 2.06, "grad_norm": 0.6069186329841614, "learning_rate": 0.00013452634278826188, "loss": 1.7133, "step": 61855 }, { "epoch": 2.06, "grad_norm": 0.6323696970939636, "learning_rate": 0.00013451762454053066, "loss": 1.7652, "step": 61856 }, { "epoch": 2.06, "grad_norm": 0.6276090741157532, "learning_rate": 0.0001345089064936708, "loss": 1.6431, "step": 61857 }, { "epoch": 2.06, "grad_norm": 0.6097904443740845, "learning_rate": 0.00013450018864769262, "loss": 1.6459, "step": 61858 }, { "epoch": 2.06, "grad_norm": 0.6455184817314148, "learning_rate": 0.0001344914710026069, "loss": 1.7494, "step": 61859 }, { "epoch": 2.06, "grad_norm": 0.6113212704658508, "learning_rate": 0.0001344827535584241, "loss": 1.799, "step": 61860 }, { "epoch": 2.06, "grad_norm": 0.6074082851409912, "learning_rate": 0.00013447403631515494, "loss": 1.6488, "step": 61861 }, { "epoch": 2.06, "grad_norm": 0.6218075752258301, "learning_rate": 0.0001344653192728099, "loss": 1.7403, "step": 61862 }, { "epoch": 2.06, "grad_norm": 0.6201559901237488, "learning_rate": 0.00013445660243139947, "loss": 1.6461, "step": 61863 }, { "epoch": 2.06, "grad_norm": 0.6096519231796265, "learning_rate": 0.00013444788579093444, "loss": 1.6088, "step": 61864 }, { "epoch": 2.06, "grad_norm": 0.6404334306716919, "learning_rate": 0.0001344391693514253, "loss": 1.7632, "step": 61865 }, { "epoch": 2.06, "grad_norm": 0.6234031915664673, "learning_rate": 0.00013443045311288247, "loss": 1.8065, "step": 61866 }, { "epoch": 2.06, "grad_norm": 0.6256292462348938, "learning_rate": 0.00013442173707531674, "loss": 1.6635, "step": 61867 }, { "epoch": 2.06, "grad_norm": 1.3404452800750732, "learning_rate": 0.00013441302123873866, "loss": 1.7534, "step": 61868 }, { "epoch": 2.06, "grad_norm": 0.6131122708320618, "learning_rate": 0.00013440430560315882, "loss": 1.7208, "step": 61869 }, { "epoch": 2.06, "grad_norm": 0.6067322492599487, "learning_rate": 0.0001343955901685876, "loss": 1.7078, "step": 61870 }, { "epoch": 2.06, "grad_norm": 0.6128248572349548, "learning_rate": 0.00013438687493503592, "loss": 1.6686, "step": 61871 }, { "epoch": 2.06, "grad_norm": 0.6379319429397583, "learning_rate": 0.0001343781599025141, "loss": 1.7192, "step": 61872 }, { "epoch": 2.06, "grad_norm": 0.6354076266288757, "learning_rate": 0.0001343694450710327, "loss": 1.7189, "step": 61873 }, { "epoch": 2.06, "grad_norm": 0.6159617304801941, "learning_rate": 0.00013436073044060238, "loss": 1.736, "step": 61874 }, { "epoch": 2.06, "grad_norm": 0.6074877381324768, "learning_rate": 0.00013435201601123397, "loss": 1.6512, "step": 61875 }, { "epoch": 2.06, "grad_norm": 0.6330360770225525, "learning_rate": 0.00013434330178293757, "loss": 1.7003, "step": 61876 }, { "epoch": 2.06, "grad_norm": 0.6311817169189453, "learning_rate": 0.000134334587755724, "loss": 1.6525, "step": 61877 }, { "epoch": 2.06, "grad_norm": 0.6156623363494873, "learning_rate": 0.00013432587392960392, "loss": 1.7368, "step": 61878 }, { "epoch": 2.06, "grad_norm": 0.6268989443778992, "learning_rate": 0.00013431716030458786, "loss": 1.7506, "step": 61879 }, { "epoch": 2.06, "grad_norm": 0.6257323026657104, "learning_rate": 0.00013430844688068624, "loss": 1.716, "step": 61880 }, { "epoch": 2.06, "grad_norm": 0.6308696269989014, "learning_rate": 0.0001342997336579097, "loss": 1.6726, "step": 61881 }, { "epoch": 2.06, "grad_norm": 0.6096304059028625, "learning_rate": 0.00013429102063626912, "loss": 1.7221, "step": 61882 }, { "epoch": 2.06, "grad_norm": 0.6204456090927124, "learning_rate": 0.0001342823078157746, "loss": 1.7335, "step": 61883 }, { "epoch": 2.06, "grad_norm": 0.6329290270805359, "learning_rate": 0.00013427359519643697, "loss": 1.7457, "step": 61884 }, { "epoch": 2.06, "grad_norm": 0.6103780269622803, "learning_rate": 0.00013426488277826687, "loss": 1.7308, "step": 61885 }, { "epoch": 2.06, "grad_norm": 0.6438834071159363, "learning_rate": 0.00013425617056127476, "loss": 1.7319, "step": 61886 }, { "epoch": 2.06, "grad_norm": 0.6335570812225342, "learning_rate": 0.00013424745854547117, "loss": 1.6403, "step": 61887 }, { "epoch": 2.06, "grad_norm": 0.6510605216026306, "learning_rate": 0.0001342387467308667, "loss": 1.6496, "step": 61888 }, { "epoch": 2.06, "grad_norm": 0.6046063899993896, "learning_rate": 0.0001342300351174722, "loss": 1.6675, "step": 61889 }, { "epoch": 2.06, "grad_norm": 0.6334733963012695, "learning_rate": 0.0001342213237052978, "loss": 1.699, "step": 61890 }, { "epoch": 2.06, "grad_norm": 0.6384310126304626, "learning_rate": 0.0001342126124943543, "loss": 1.746, "step": 61891 }, { "epoch": 2.06, "grad_norm": 0.6386129260063171, "learning_rate": 0.00013420390148465234, "loss": 1.7425, "step": 61892 }, { "epoch": 2.06, "grad_norm": 0.6055551171302795, "learning_rate": 0.00013419519067620243, "loss": 1.7598, "step": 61893 }, { "epoch": 2.06, "grad_norm": 0.6132940053939819, "learning_rate": 0.000134186480069015, "loss": 1.6934, "step": 61894 }, { "epoch": 2.06, "grad_norm": 0.6275312900543213, "learning_rate": 0.0001341777696631008, "loss": 1.7509, "step": 61895 }, { "epoch": 2.06, "grad_norm": 0.619704008102417, "learning_rate": 0.00013416905945847055, "loss": 1.7625, "step": 61896 }, { "epoch": 2.06, "grad_norm": 0.6321040391921997, "learning_rate": 0.0001341603494551344, "loss": 1.7659, "step": 61897 }, { "epoch": 2.06, "grad_norm": 0.6266348958015442, "learning_rate": 0.00013415163965310316, "loss": 1.8228, "step": 61898 }, { "epoch": 2.06, "grad_norm": 0.6163597702980042, "learning_rate": 0.0001341429300523875, "loss": 1.6988, "step": 61899 }, { "epoch": 2.06, "grad_norm": 0.6323023438453674, "learning_rate": 0.00013413422065299788, "loss": 1.619, "step": 61900 }, { "epoch": 2.06, "grad_norm": 0.6427395939826965, "learning_rate": 0.00013412551145494474, "loss": 1.674, "step": 61901 }, { "epoch": 2.06, "grad_norm": 0.6054390072822571, "learning_rate": 0.00013411680245823892, "loss": 1.6645, "step": 61902 }, { "epoch": 2.06, "grad_norm": 0.6115884780883789, "learning_rate": 0.00013410809366289086, "loss": 1.7075, "step": 61903 }, { "epoch": 2.06, "grad_norm": 0.628891110420227, "learning_rate": 0.00013409938506891105, "loss": 1.7524, "step": 61904 }, { "epoch": 2.06, "grad_norm": 1.3466265201568604, "learning_rate": 0.00013409067667631013, "loss": 1.6941, "step": 61905 }, { "epoch": 2.06, "grad_norm": 0.6400847434997559, "learning_rate": 0.00013408196848509876, "loss": 1.7238, "step": 61906 }, { "epoch": 2.06, "grad_norm": 0.6126829981803894, "learning_rate": 0.00013407326049528747, "loss": 1.738, "step": 61907 }, { "epoch": 2.06, "grad_norm": 0.6052166223526001, "learning_rate": 0.00013406455270688667, "loss": 1.6465, "step": 61908 }, { "epoch": 2.06, "grad_norm": 0.6216317415237427, "learning_rate": 0.00013405584511990716, "loss": 1.7271, "step": 61909 }, { "epoch": 2.06, "grad_norm": 0.6365792155265808, "learning_rate": 0.0001340471377343594, "loss": 1.6809, "step": 61910 }, { "epoch": 2.06, "grad_norm": 0.6595121026039124, "learning_rate": 0.00013403843055025388, "loss": 1.7411, "step": 61911 }, { "epoch": 2.06, "grad_norm": 0.6235899329185486, "learning_rate": 0.00013402972356760134, "loss": 1.6929, "step": 61912 }, { "epoch": 2.06, "grad_norm": 0.6175367832183838, "learning_rate": 0.00013402101678641214, "loss": 1.6935, "step": 61913 }, { "epoch": 2.06, "grad_norm": 0.6199659705162048, "learning_rate": 0.00013401231020669708, "loss": 1.6745, "step": 61914 }, { "epoch": 2.06, "grad_norm": 0.6096675992012024, "learning_rate": 0.00013400360382846653, "loss": 1.7043, "step": 61915 }, { "epoch": 2.06, "grad_norm": 0.6216724514961243, "learning_rate": 0.00013399489765173126, "loss": 1.7096, "step": 61916 }, { "epoch": 2.06, "grad_norm": 0.6192544102668762, "learning_rate": 0.00013398619167650168, "loss": 1.6848, "step": 61917 }, { "epoch": 2.06, "grad_norm": 0.6511402130126953, "learning_rate": 0.00013397748590278835, "loss": 1.6672, "step": 61918 }, { "epoch": 2.06, "grad_norm": 0.6352983713150024, "learning_rate": 0.000133968780330602, "loss": 1.7048, "step": 61919 }, { "epoch": 2.06, "grad_norm": 0.614105224609375, "learning_rate": 0.00013396007495995293, "loss": 1.6699, "step": 61920 }, { "epoch": 2.06, "grad_norm": 0.6196790933609009, "learning_rate": 0.000133951369790852, "loss": 1.7083, "step": 61921 }, { "epoch": 2.06, "grad_norm": 0.6154392957687378, "learning_rate": 0.00013394266482330964, "loss": 1.6757, "step": 61922 }, { "epoch": 2.06, "grad_norm": 0.6371930837631226, "learning_rate": 0.00013393396005733632, "loss": 1.7244, "step": 61923 }, { "epoch": 2.06, "grad_norm": 0.6362491250038147, "learning_rate": 0.00013392525549294276, "loss": 1.6984, "step": 61924 }, { "epoch": 2.06, "grad_norm": 0.6457179188728333, "learning_rate": 0.0001339165511301394, "loss": 1.6829, "step": 61925 }, { "epoch": 2.06, "grad_norm": 0.6320897340774536, "learning_rate": 0.000133907846968937, "loss": 1.668, "step": 61926 }, { "epoch": 2.06, "grad_norm": 0.6136990189552307, "learning_rate": 0.00013389914300934586, "loss": 1.7132, "step": 61927 }, { "epoch": 2.06, "grad_norm": 0.6302836537361145, "learning_rate": 0.00013389043925137682, "loss": 1.6845, "step": 61928 }, { "epoch": 2.06, "grad_norm": 0.6637894511222839, "learning_rate": 0.0001338817356950403, "loss": 1.6664, "step": 61929 }, { "epoch": 2.06, "grad_norm": 0.6145538687705994, "learning_rate": 0.00013387303234034677, "loss": 1.6798, "step": 61930 }, { "epoch": 2.06, "grad_norm": 0.6117485761642456, "learning_rate": 0.000133864329187307, "loss": 1.6998, "step": 61931 }, { "epoch": 2.06, "grad_norm": 0.6372912526130676, "learning_rate": 0.0001338556262359315, "loss": 1.676, "step": 61932 }, { "epoch": 2.06, "grad_norm": 0.6356120705604553, "learning_rate": 0.00013384692348623062, "loss": 1.7775, "step": 61933 }, { "epoch": 2.06, "grad_norm": 0.6186434030532837, "learning_rate": 0.00013383822093821513, "loss": 1.6957, "step": 61934 }, { "epoch": 2.06, "grad_norm": 0.6188610196113586, "learning_rate": 0.00013382951859189566, "loss": 1.8031, "step": 61935 }, { "epoch": 2.06, "grad_norm": 0.6122562289237976, "learning_rate": 0.00013382081644728265, "loss": 1.7343, "step": 61936 }, { "epoch": 2.06, "grad_norm": 0.620697557926178, "learning_rate": 0.0001338121145043866, "loss": 1.653, "step": 61937 }, { "epoch": 2.06, "grad_norm": 0.6144194006919861, "learning_rate": 0.00013380341276321824, "loss": 1.6692, "step": 61938 }, { "epoch": 2.06, "grad_norm": 0.6342363357543945, "learning_rate": 0.00013379471122378807, "loss": 1.7596, "step": 61939 }, { "epoch": 2.06, "grad_norm": 0.6446253657341003, "learning_rate": 0.00013378600988610652, "loss": 1.7451, "step": 61940 }, { "epoch": 2.06, "grad_norm": 0.632095217704773, "learning_rate": 0.00013377730875018424, "loss": 1.7742, "step": 61941 }, { "epoch": 2.06, "grad_norm": 0.6385939717292786, "learning_rate": 0.00013376860781603197, "loss": 1.6712, "step": 61942 }, { "epoch": 2.06, "grad_norm": 0.6230031847953796, "learning_rate": 0.0001337599070836601, "loss": 1.669, "step": 61943 }, { "epoch": 2.06, "grad_norm": 0.608121931552887, "learning_rate": 0.00013375120655307907, "loss": 1.6534, "step": 61944 }, { "epoch": 2.06, "grad_norm": 0.6073524951934814, "learning_rate": 0.0001337425062242996, "loss": 1.769, "step": 61945 }, { "epoch": 2.06, "grad_norm": 0.6252589821815491, "learning_rate": 0.00013373380609733244, "loss": 1.72, "step": 61946 }, { "epoch": 2.06, "grad_norm": 0.6253929138183594, "learning_rate": 0.00013372510617218775, "loss": 1.6541, "step": 61947 }, { "epoch": 2.06, "grad_norm": 0.6391361355781555, "learning_rate": 0.00013371640644887624, "loss": 1.6261, "step": 61948 }, { "epoch": 2.06, "grad_norm": 0.6269542574882507, "learning_rate": 0.00013370770692740867, "loss": 1.7358, "step": 61949 }, { "epoch": 2.06, "grad_norm": 0.6574408411979675, "learning_rate": 0.0001336990076077954, "loss": 1.6568, "step": 61950 }, { "epoch": 2.06, "grad_norm": 0.6128338575363159, "learning_rate": 0.00013369030849004696, "loss": 1.7093, "step": 61951 }, { "epoch": 2.06, "grad_norm": 0.6255796551704407, "learning_rate": 0.00013368160957417407, "loss": 1.7392, "step": 61952 }, { "epoch": 2.06, "grad_norm": 0.6352143287658691, "learning_rate": 0.0001336729108601872, "loss": 1.7796, "step": 61953 }, { "epoch": 2.06, "grad_norm": 0.6441231369972229, "learning_rate": 0.0001336642123480968, "loss": 1.682, "step": 61954 }, { "epoch": 2.06, "grad_norm": 0.6203439831733704, "learning_rate": 0.00013365551403791352, "loss": 1.7339, "step": 61955 }, { "epoch": 2.06, "grad_norm": 0.6493792533874512, "learning_rate": 0.00013364681592964808, "loss": 1.7861, "step": 61956 }, { "epoch": 2.06, "grad_norm": 0.6253163814544678, "learning_rate": 0.00013363811802331087, "loss": 1.7204, "step": 61957 }, { "epoch": 2.06, "grad_norm": 0.6376144886016846, "learning_rate": 0.00013362942031891237, "loss": 1.7493, "step": 61958 }, { "epoch": 2.06, "grad_norm": 0.6194290518760681, "learning_rate": 0.00013362072281646335, "loss": 1.7286, "step": 61959 }, { "epoch": 2.06, "grad_norm": 0.6314106583595276, "learning_rate": 0.00013361202551597423, "loss": 1.6574, "step": 61960 }, { "epoch": 2.06, "grad_norm": 0.6332298517227173, "learning_rate": 0.00013360332841745554, "loss": 1.6593, "step": 61961 }, { "epoch": 2.06, "grad_norm": 0.6297563910484314, "learning_rate": 0.00013359463152091798, "loss": 1.6908, "step": 61962 }, { "epoch": 2.06, "grad_norm": 0.620669424533844, "learning_rate": 0.00013358593482637191, "loss": 1.7537, "step": 61963 }, { "epoch": 2.06, "grad_norm": 0.619594156742096, "learning_rate": 0.00013357723833382808, "loss": 1.7353, "step": 61964 }, { "epoch": 2.06, "grad_norm": 0.6186349987983704, "learning_rate": 0.00013356854204329686, "loss": 1.6671, "step": 61965 }, { "epoch": 2.06, "grad_norm": 0.6379572153091431, "learning_rate": 0.00013355984595478905, "loss": 1.655, "step": 61966 }, { "epoch": 2.06, "grad_norm": 0.6254261136054993, "learning_rate": 0.00013355115006831504, "loss": 1.7232, "step": 61967 }, { "epoch": 2.06, "grad_norm": 0.651310920715332, "learning_rate": 0.0001335424543838853, "loss": 1.7146, "step": 61968 }, { "epoch": 2.06, "grad_norm": 0.6050337553024292, "learning_rate": 0.0001335337589015106, "loss": 1.6634, "step": 61969 }, { "epoch": 2.06, "grad_norm": 0.6448848247528076, "learning_rate": 0.00013352506362120128, "loss": 1.6996, "step": 61970 }, { "epoch": 2.06, "grad_norm": 0.635611891746521, "learning_rate": 0.00013351636854296812, "loss": 1.6489, "step": 61971 }, { "epoch": 2.06, "grad_norm": 0.6260899901390076, "learning_rate": 0.00013350767366682154, "loss": 1.6809, "step": 61972 }, { "epoch": 2.06, "grad_norm": 0.6327772736549377, "learning_rate": 0.000133498978992772, "loss": 1.7684, "step": 61973 }, { "epoch": 2.06, "grad_norm": 0.6437767148017883, "learning_rate": 0.00013349028452083031, "loss": 1.6595, "step": 61974 }, { "epoch": 2.06, "grad_norm": 0.6160812377929688, "learning_rate": 0.00013348159025100675, "loss": 1.7391, "step": 61975 }, { "epoch": 2.06, "grad_norm": 0.6326379776000977, "learning_rate": 0.00013347289618331212, "loss": 1.6129, "step": 61976 }, { "epoch": 2.06, "grad_norm": 0.6339937448501587, "learning_rate": 0.00013346420231775675, "loss": 1.7586, "step": 61977 }, { "epoch": 2.06, "grad_norm": 0.6275767087936401, "learning_rate": 0.00013345550865435143, "loss": 1.6845, "step": 61978 }, { "epoch": 2.06, "grad_norm": 0.6535049080848694, "learning_rate": 0.00013344681519310655, "loss": 1.659, "step": 61979 }, { "epoch": 2.06, "grad_norm": 0.6307316422462463, "learning_rate": 0.00013343812193403258, "loss": 1.7555, "step": 61980 }, { "epoch": 2.06, "grad_norm": 0.6362243294715881, "learning_rate": 0.0001334294288771403, "loss": 1.656, "step": 61981 }, { "epoch": 2.06, "grad_norm": 0.6329204440116882, "learning_rate": 0.00013342073602244019, "loss": 1.7193, "step": 61982 }, { "epoch": 2.06, "grad_norm": 0.6234325170516968, "learning_rate": 0.00013341204336994264, "loss": 1.7232, "step": 61983 }, { "epoch": 2.06, "grad_norm": 0.6193152070045471, "learning_rate": 0.0001334033509196583, "loss": 1.6426, "step": 61984 }, { "epoch": 2.06, "grad_norm": 0.6206735372543335, "learning_rate": 0.00013339465867159785, "loss": 1.6401, "step": 61985 }, { "epoch": 2.06, "grad_norm": 0.6310892105102539, "learning_rate": 0.00013338596662577175, "loss": 1.6928, "step": 61986 }, { "epoch": 2.06, "grad_norm": 0.6405479907989502, "learning_rate": 0.00013337727478219044, "loss": 1.7407, "step": 61987 }, { "epoch": 2.06, "grad_norm": 0.6319171190261841, "learning_rate": 0.00013336858314086467, "loss": 1.7164, "step": 61988 }, { "epoch": 2.06, "grad_norm": 0.6317494511604309, "learning_rate": 0.00013335989170180486, "loss": 1.744, "step": 61989 }, { "epoch": 2.06, "grad_norm": 0.6366816163063049, "learning_rate": 0.0001333512004650215, "loss": 1.6497, "step": 61990 }, { "epoch": 2.06, "grad_norm": 0.619796097278595, "learning_rate": 0.00013334250943052524, "loss": 1.6638, "step": 61991 }, { "epoch": 2.06, "grad_norm": 0.6145424246788025, "learning_rate": 0.00013333381859832684, "loss": 1.706, "step": 61992 }, { "epoch": 2.06, "grad_norm": 0.6241456866264343, "learning_rate": 0.0001333251279684364, "loss": 1.7601, "step": 61993 }, { "epoch": 2.06, "grad_norm": 0.5993616580963135, "learning_rate": 0.00013331643754086467, "loss": 1.7055, "step": 61994 }, { "epoch": 2.06, "grad_norm": 0.626559853553772, "learning_rate": 0.00013330774731562235, "loss": 1.7761, "step": 61995 }, { "epoch": 2.06, "grad_norm": 0.6286891102790833, "learning_rate": 0.00013329905729271988, "loss": 1.6456, "step": 61996 }, { "epoch": 2.06, "grad_norm": 0.6292126774787903, "learning_rate": 0.0001332903674721677, "loss": 1.6264, "step": 61997 }, { "epoch": 2.06, "grad_norm": 0.6115646362304688, "learning_rate": 0.00013328167785397641, "loss": 1.6145, "step": 61998 }, { "epoch": 2.06, "grad_norm": 0.6287336349487305, "learning_rate": 0.00013327298843815682, "loss": 1.7502, "step": 61999 }, { "epoch": 2.06, "grad_norm": 0.6380103826522827, "learning_rate": 0.00013326429922471908, "loss": 1.6845, "step": 62000 }, { "epoch": 2.06, "grad_norm": 0.6074896454811096, "learning_rate": 0.00013325561021367385, "loss": 1.7713, "step": 62001 }, { "epoch": 2.06, "grad_norm": 0.637254536151886, "learning_rate": 0.0001332469214050319, "loss": 1.6526, "step": 62002 }, { "epoch": 2.06, "grad_norm": 0.6494620442390442, "learning_rate": 0.00013323823279880358, "loss": 1.7479, "step": 62003 }, { "epoch": 2.06, "grad_norm": 0.6498000025749207, "learning_rate": 0.00013322954439499938, "loss": 1.7341, "step": 62004 }, { "epoch": 2.06, "grad_norm": 0.6022520661354065, "learning_rate": 0.00013322085619362997, "loss": 1.6716, "step": 62005 }, { "epoch": 2.06, "grad_norm": 0.6260820031166077, "learning_rate": 0.00013321216819470606, "loss": 1.6468, "step": 62006 }, { "epoch": 2.06, "grad_norm": 0.6519027352333069, "learning_rate": 0.00013320348039823777, "loss": 1.8001, "step": 62007 }, { "epoch": 2.06, "grad_norm": 0.6222720146179199, "learning_rate": 0.0001331947928042359, "loss": 1.6944, "step": 62008 }, { "epoch": 2.06, "grad_norm": 0.6464110016822815, "learning_rate": 0.00013318610541271107, "loss": 1.6716, "step": 62009 }, { "epoch": 2.06, "grad_norm": 0.6401143074035645, "learning_rate": 0.0001331774182236737, "loss": 1.804, "step": 62010 }, { "epoch": 2.06, "grad_norm": 0.6157622933387756, "learning_rate": 0.00013316873123713432, "loss": 1.7566, "step": 62011 }, { "epoch": 2.06, "grad_norm": 0.6538980603218079, "learning_rate": 0.0001331600444531036, "loss": 1.724, "step": 62012 }, { "epoch": 2.06, "grad_norm": 0.6118985414505005, "learning_rate": 0.000133151357871592, "loss": 1.6558, "step": 62013 }, { "epoch": 2.06, "grad_norm": 0.6047289371490479, "learning_rate": 0.00013314267149260994, "loss": 1.6993, "step": 62014 }, { "epoch": 2.06, "grad_norm": 0.6300864219665527, "learning_rate": 0.00013313398531616813, "loss": 1.7082, "step": 62015 }, { "epoch": 2.06, "grad_norm": 0.6223859190940857, "learning_rate": 0.00013312529934227717, "loss": 1.7092, "step": 62016 }, { "epoch": 2.06, "grad_norm": 0.6313668489456177, "learning_rate": 0.0001331166135709475, "loss": 1.6592, "step": 62017 }, { "epoch": 2.06, "grad_norm": 0.622472882270813, "learning_rate": 0.00013310792800218956, "loss": 1.72, "step": 62018 }, { "epoch": 2.06, "grad_norm": 0.6476857662200928, "learning_rate": 0.0001330992426360141, "loss": 1.6251, "step": 62019 }, { "epoch": 2.06, "grad_norm": 0.6380345821380615, "learning_rate": 0.0001330905574724316, "loss": 1.7199, "step": 62020 }, { "epoch": 2.06, "grad_norm": 0.6286443471908569, "learning_rate": 0.00013308187251145247, "loss": 1.754, "step": 62021 }, { "epoch": 2.06, "grad_norm": 0.6138399243354797, "learning_rate": 0.00013307318775308732, "loss": 1.5949, "step": 62022 }, { "epoch": 2.06, "grad_norm": 0.6235526204109192, "learning_rate": 0.00013306450319734684, "loss": 1.7249, "step": 62023 }, { "epoch": 2.06, "grad_norm": 0.6416633725166321, "learning_rate": 0.00013305581884424147, "loss": 1.7411, "step": 62024 }, { "epoch": 2.06, "grad_norm": 0.6203577518463135, "learning_rate": 0.00013304713469378164, "loss": 1.6857, "step": 62025 }, { "epoch": 2.06, "grad_norm": 0.6043232083320618, "learning_rate": 0.00013303845074597804, "loss": 1.7109, "step": 62026 }, { "epoch": 2.06, "grad_norm": 0.6081408262252808, "learning_rate": 0.00013302976700084123, "loss": 1.6472, "step": 62027 }, { "epoch": 2.06, "grad_norm": 0.6156632304191589, "learning_rate": 0.00013302108345838155, "loss": 1.7343, "step": 62028 }, { "epoch": 2.06, "grad_norm": 0.6482535004615784, "learning_rate": 0.00013301240011860975, "loss": 1.7434, "step": 62029 }, { "epoch": 2.06, "grad_norm": 0.639315664768219, "learning_rate": 0.00013300371698153624, "loss": 1.752, "step": 62030 }, { "epoch": 2.06, "grad_norm": 0.6280415654182434, "learning_rate": 0.00013299503404717164, "loss": 1.7136, "step": 62031 }, { "epoch": 2.06, "grad_norm": 0.6268008351325989, "learning_rate": 0.0001329863513155264, "loss": 1.7413, "step": 62032 }, { "epoch": 2.06, "grad_norm": 0.6235957145690918, "learning_rate": 0.00013297766878661124, "loss": 1.716, "step": 62033 }, { "epoch": 2.06, "grad_norm": 0.6288557648658752, "learning_rate": 0.00013296898646043658, "loss": 1.6674, "step": 62034 }, { "epoch": 2.06, "grad_norm": 0.6408005952835083, "learning_rate": 0.0001329603043370128, "loss": 1.6724, "step": 62035 }, { "epoch": 2.06, "grad_norm": 0.612846314907074, "learning_rate": 0.00013295162241635077, "loss": 1.7168, "step": 62036 }, { "epoch": 2.06, "grad_norm": 0.639443039894104, "learning_rate": 0.00013294294069846074, "loss": 1.6879, "step": 62037 }, { "epoch": 2.06, "grad_norm": 0.6467657685279846, "learning_rate": 0.00013293425918335343, "loss": 1.7651, "step": 62038 }, { "epoch": 2.06, "grad_norm": 0.6469812989234924, "learning_rate": 0.00013292557787103937, "loss": 1.6722, "step": 62039 }, { "epoch": 2.06, "grad_norm": 0.6189086437225342, "learning_rate": 0.00013291689676152893, "loss": 1.6901, "step": 62040 }, { "epoch": 2.06, "grad_norm": 0.6402640342712402, "learning_rate": 0.0001329082158548328, "loss": 1.7106, "step": 62041 }, { "epoch": 2.06, "grad_norm": 0.640751838684082, "learning_rate": 0.0001328995351509614, "loss": 1.7229, "step": 62042 }, { "epoch": 2.06, "grad_norm": 0.6315170526504517, "learning_rate": 0.00013289085464992547, "loss": 1.735, "step": 62043 }, { "epoch": 2.06, "grad_norm": 0.6421251893043518, "learning_rate": 0.0001328821743517353, "loss": 1.6749, "step": 62044 }, { "epoch": 2.06, "grad_norm": 0.6129945516586304, "learning_rate": 0.00013287349425640166, "loss": 1.7239, "step": 62045 }, { "epoch": 2.06, "grad_norm": 0.6159884929656982, "learning_rate": 0.000132864814363935, "loss": 1.7332, "step": 62046 }, { "epoch": 2.06, "grad_norm": 0.6267679929733276, "learning_rate": 0.0001328561346743457, "loss": 1.6495, "step": 62047 }, { "epoch": 2.06, "grad_norm": 0.6567927002906799, "learning_rate": 0.0001328474551876445, "loss": 1.7251, "step": 62048 }, { "epoch": 2.06, "grad_norm": 0.6325790286064148, "learning_rate": 0.0001328387759038419, "loss": 1.7109, "step": 62049 }, { "epoch": 2.06, "grad_norm": 0.6438435316085815, "learning_rate": 0.00013283009682294827, "loss": 1.7623, "step": 62050 }, { "epoch": 2.06, "grad_norm": 0.6088228225708008, "learning_rate": 0.0001328214179449743, "loss": 1.7645, "step": 62051 }, { "epoch": 2.06, "grad_norm": 0.6152975559234619, "learning_rate": 0.00013281273926993057, "loss": 1.7108, "step": 62052 }, { "epoch": 2.06, "grad_norm": 0.6390643119812012, "learning_rate": 0.00013280406079782756, "loss": 1.6741, "step": 62053 }, { "epoch": 2.06, "grad_norm": 0.6483087539672852, "learning_rate": 0.00013279538252867565, "loss": 1.7599, "step": 62054 }, { "epoch": 2.06, "grad_norm": 0.6426154971122742, "learning_rate": 0.00013278670446248563, "loss": 1.6816, "step": 62055 }, { "epoch": 2.06, "grad_norm": 0.6436241865158081, "learning_rate": 0.00013277802659926794, "loss": 1.7005, "step": 62056 }, { "epoch": 2.06, "grad_norm": 0.5959004759788513, "learning_rate": 0.00013276934893903292, "loss": 1.7245, "step": 62057 }, { "epoch": 2.06, "grad_norm": 0.6292836666107178, "learning_rate": 0.00013276067148179134, "loss": 1.732, "step": 62058 }, { "epoch": 2.06, "grad_norm": 0.6464440822601318, "learning_rate": 0.0001327519942275537, "loss": 1.7221, "step": 62059 }, { "epoch": 2.06, "grad_norm": 0.6517719626426697, "learning_rate": 0.00013274331717633056, "loss": 1.7431, "step": 62060 }, { "epoch": 2.06, "grad_norm": 0.6317165493965149, "learning_rate": 0.00013273464032813223, "loss": 1.6649, "step": 62061 }, { "epoch": 2.06, "grad_norm": 0.616118848323822, "learning_rate": 0.00013272596368296943, "loss": 1.6832, "step": 62062 }, { "epoch": 2.06, "grad_norm": 0.6182806491851807, "learning_rate": 0.00013271728724085287, "loss": 1.7154, "step": 62063 }, { "epoch": 2.06, "grad_norm": 0.6307541131973267, "learning_rate": 0.00013270861100179266, "loss": 1.6751, "step": 62064 }, { "epoch": 2.06, "grad_norm": 0.6426118612289429, "learning_rate": 0.00013269993496579956, "loss": 1.7721, "step": 62065 }, { "epoch": 2.06, "grad_norm": 0.6340188384056091, "learning_rate": 0.0001326912591328842, "loss": 1.6584, "step": 62066 }, { "epoch": 2.06, "grad_norm": 0.6244276762008667, "learning_rate": 0.00013268258350305698, "loss": 1.7491, "step": 62067 }, { "epoch": 2.07, "grad_norm": 0.6378016471862793, "learning_rate": 0.00013267390807632836, "loss": 1.748, "step": 62068 }, { "epoch": 2.07, "grad_norm": 0.6252539753913879, "learning_rate": 0.00013266523285270906, "loss": 1.6565, "step": 62069 }, { "epoch": 2.07, "grad_norm": 0.6326756477355957, "learning_rate": 0.0001326565578322095, "loss": 1.8423, "step": 62070 }, { "epoch": 2.07, "grad_norm": 0.6432397961616516, "learning_rate": 0.00013264788301484013, "loss": 1.797, "step": 62071 }, { "epoch": 2.07, "grad_norm": 0.6477609276771545, "learning_rate": 0.00013263920840061156, "loss": 1.6903, "step": 62072 }, { "epoch": 2.07, "grad_norm": 0.6354021430015564, "learning_rate": 0.00013263053398953446, "loss": 1.784, "step": 62073 }, { "epoch": 2.07, "grad_norm": 0.6194789409637451, "learning_rate": 0.00013262185978161922, "loss": 1.7536, "step": 62074 }, { "epoch": 2.07, "grad_norm": 0.6272941827774048, "learning_rate": 0.00013261318577687624, "loss": 1.7066, "step": 62075 }, { "epoch": 2.07, "grad_norm": 0.6301218867301941, "learning_rate": 0.00013260451197531634, "loss": 1.7259, "step": 62076 }, { "epoch": 2.07, "grad_norm": 0.6258700489997864, "learning_rate": 0.00013259583837694987, "loss": 1.7647, "step": 62077 }, { "epoch": 2.07, "grad_norm": 0.6507979035377502, "learning_rate": 0.00013258716498178733, "loss": 1.7907, "step": 62078 }, { "epoch": 2.07, "grad_norm": 0.6345500349998474, "learning_rate": 0.00013257849178983935, "loss": 1.7274, "step": 62079 }, { "epoch": 2.07, "grad_norm": 0.6513128876686096, "learning_rate": 0.00013256981880111634, "loss": 1.7013, "step": 62080 }, { "epoch": 2.07, "grad_norm": 0.6191998720169067, "learning_rate": 0.00013256114601562898, "loss": 1.712, "step": 62081 }, { "epoch": 2.07, "grad_norm": 0.6195080280303955, "learning_rate": 0.00013255247343338763, "loss": 1.7205, "step": 62082 }, { "epoch": 2.07, "grad_norm": 0.6229941844940186, "learning_rate": 0.000132543801054403, "loss": 1.725, "step": 62083 }, { "epoch": 2.07, "grad_norm": 0.6357453465461731, "learning_rate": 0.00013253512887868553, "loss": 1.6995, "step": 62084 }, { "epoch": 2.07, "grad_norm": 0.616942822933197, "learning_rate": 0.00013252645690624567, "loss": 1.6794, "step": 62085 }, { "epoch": 2.07, "grad_norm": 0.6349778771400452, "learning_rate": 0.00013251778513709406, "loss": 1.6926, "step": 62086 }, { "epoch": 2.07, "grad_norm": 0.6326594352722168, "learning_rate": 0.00013250911357124112, "loss": 1.675, "step": 62087 }, { "epoch": 2.07, "grad_norm": 0.6027352213859558, "learning_rate": 0.0001325004422086975, "loss": 1.7276, "step": 62088 }, { "epoch": 2.07, "grad_norm": 0.6163780689239502, "learning_rate": 0.0001324917710494737, "loss": 1.7443, "step": 62089 }, { "epoch": 2.07, "grad_norm": 0.6293909549713135, "learning_rate": 0.0001324831000935801, "loss": 1.6561, "step": 62090 }, { "epoch": 2.07, "grad_norm": 0.6305345296859741, "learning_rate": 0.00013247442934102743, "loss": 1.6193, "step": 62091 }, { "epoch": 2.07, "grad_norm": 0.6539681553840637, "learning_rate": 0.00013246575879182602, "loss": 1.7203, "step": 62092 }, { "epoch": 2.07, "grad_norm": 0.6265504956245422, "learning_rate": 0.00013245708844598661, "loss": 1.781, "step": 62093 }, { "epoch": 2.07, "grad_norm": 0.6124368906021118, "learning_rate": 0.00013244841830351947, "loss": 1.6875, "step": 62094 }, { "epoch": 2.07, "grad_norm": 0.6267955899238586, "learning_rate": 0.00013243974836443537, "loss": 1.6844, "step": 62095 }, { "epoch": 2.07, "grad_norm": 0.6182248592376709, "learning_rate": 0.00013243107862874476, "loss": 1.6375, "step": 62096 }, { "epoch": 2.07, "grad_norm": 0.6319238543510437, "learning_rate": 0.00013242240909645798, "loss": 1.7064, "step": 62097 }, { "epoch": 2.07, "grad_norm": 0.6239112019538879, "learning_rate": 0.00013241373976758587, "loss": 1.7209, "step": 62098 }, { "epoch": 2.07, "grad_norm": 0.6206913590431213, "learning_rate": 0.00013240507064213872, "loss": 1.7451, "step": 62099 }, { "epoch": 2.07, "grad_norm": 0.633826494216919, "learning_rate": 0.00013239640172012706, "loss": 1.7879, "step": 62100 }, { "epoch": 2.07, "grad_norm": 0.6223515868186951, "learning_rate": 0.00013238773300156147, "loss": 1.6898, "step": 62101 }, { "epoch": 2.07, "grad_norm": 0.6430574059486389, "learning_rate": 0.00013237906448645257, "loss": 1.7625, "step": 62102 }, { "epoch": 2.07, "grad_norm": 0.6518206000328064, "learning_rate": 0.0001323703961748108, "loss": 1.6886, "step": 62103 }, { "epoch": 2.07, "grad_norm": 0.6234949827194214, "learning_rate": 0.00013236172806664655, "loss": 1.6832, "step": 62104 }, { "epoch": 2.07, "grad_norm": 0.5950300693511963, "learning_rate": 0.00013235306016197055, "loss": 1.7373, "step": 62105 }, { "epoch": 2.07, "grad_norm": 0.6144975423812866, "learning_rate": 0.00013234439246079325, "loss": 1.7201, "step": 62106 }, { "epoch": 2.07, "grad_norm": 0.6418056488037109, "learning_rate": 0.00013233572496312507, "loss": 1.7143, "step": 62107 }, { "epoch": 2.07, "grad_norm": 0.6653344631195068, "learning_rate": 0.00013232705766897662, "loss": 1.7103, "step": 62108 }, { "epoch": 2.07, "grad_norm": 0.6321195960044861, "learning_rate": 0.00013231839057835859, "loss": 1.6638, "step": 62109 }, { "epoch": 2.07, "grad_norm": 0.6086052060127258, "learning_rate": 0.00013230972369128113, "loss": 1.7206, "step": 62110 }, { "epoch": 2.07, "grad_norm": 0.6518712043762207, "learning_rate": 0.00013230105700775497, "loss": 1.7528, "step": 62111 }, { "epoch": 2.07, "grad_norm": 0.6321803331375122, "learning_rate": 0.0001322923905277907, "loss": 1.6999, "step": 62112 }, { "epoch": 2.07, "grad_norm": 0.621522843837738, "learning_rate": 0.00013228372425139878, "loss": 1.6885, "step": 62113 }, { "epoch": 2.07, "grad_norm": 0.6210629343986511, "learning_rate": 0.00013227505817858958, "loss": 1.7569, "step": 62114 }, { "epoch": 2.07, "grad_norm": 0.6403946876525879, "learning_rate": 0.00013226639230937376, "loss": 1.7888, "step": 62115 }, { "epoch": 2.07, "grad_norm": 0.639781653881073, "learning_rate": 0.00013225772664376205, "loss": 1.692, "step": 62116 }, { "epoch": 2.07, "grad_norm": 0.6364551186561584, "learning_rate": 0.00013224906118176452, "loss": 1.711, "step": 62117 }, { "epoch": 2.07, "grad_norm": 0.6265953779220581, "learning_rate": 0.0001322403959233919, "loss": 1.6819, "step": 62118 }, { "epoch": 2.07, "grad_norm": 0.6250408887863159, "learning_rate": 0.00013223173086865484, "loss": 1.6519, "step": 62119 }, { "epoch": 2.07, "grad_norm": 0.6198500990867615, "learning_rate": 0.00013222306601756375, "loss": 1.6403, "step": 62120 }, { "epoch": 2.07, "grad_norm": 0.6454575061798096, "learning_rate": 0.00013221440137012902, "loss": 1.6874, "step": 62121 }, { "epoch": 2.07, "grad_norm": 0.6353134512901306, "learning_rate": 0.00013220573692636128, "loss": 1.6176, "step": 62122 }, { "epoch": 2.07, "grad_norm": 0.6294753551483154, "learning_rate": 0.00013219707268627128, "loss": 1.7452, "step": 62123 }, { "epoch": 2.07, "grad_norm": 0.6181919574737549, "learning_rate": 0.00013218840864986909, "loss": 1.7852, "step": 62124 }, { "epoch": 2.07, "grad_norm": 0.6361633539199829, "learning_rate": 0.00013217974481716542, "loss": 1.653, "step": 62125 }, { "epoch": 2.07, "grad_norm": 0.6303602457046509, "learning_rate": 0.00013217108118817095, "loss": 1.698, "step": 62126 }, { "epoch": 2.07, "grad_norm": 0.639509916305542, "learning_rate": 0.00013216241776289604, "loss": 1.6849, "step": 62127 }, { "epoch": 2.07, "grad_norm": 0.630561888217926, "learning_rate": 0.00013215375454135112, "loss": 1.7237, "step": 62128 }, { "epoch": 2.07, "grad_norm": 0.6146813035011292, "learning_rate": 0.00013214509152354692, "loss": 1.6018, "step": 62129 }, { "epoch": 2.07, "grad_norm": 0.6152480840682983, "learning_rate": 0.00013213642870949386, "loss": 1.6789, "step": 62130 }, { "epoch": 2.07, "grad_norm": 0.6058117151260376, "learning_rate": 0.0001321277660992023, "loss": 1.6431, "step": 62131 }, { "epoch": 2.07, "grad_norm": 0.6655490398406982, "learning_rate": 0.00013211910369268295, "loss": 1.7853, "step": 62132 }, { "epoch": 2.07, "grad_norm": 0.6332143545150757, "learning_rate": 0.00013211044148994636, "loss": 1.7073, "step": 62133 }, { "epoch": 2.07, "grad_norm": 0.6230263710021973, "learning_rate": 0.00013210177949100294, "loss": 1.7235, "step": 62134 }, { "epoch": 2.07, "grad_norm": 0.6260277628898621, "learning_rate": 0.00013209311769586313, "loss": 1.7416, "step": 62135 }, { "epoch": 2.07, "grad_norm": 0.6305209994316101, "learning_rate": 0.00013208445610453763, "loss": 1.624, "step": 62136 }, { "epoch": 2.07, "grad_norm": 0.6263151168823242, "learning_rate": 0.0001320757947170369, "loss": 1.7395, "step": 62137 }, { "epoch": 2.07, "grad_norm": 0.6179726719856262, "learning_rate": 0.00013206713353337124, "loss": 1.7376, "step": 62138 }, { "epoch": 2.07, "grad_norm": 0.6142584681510925, "learning_rate": 0.00013205847255355133, "loss": 1.6851, "step": 62139 }, { "epoch": 2.07, "grad_norm": 0.6216129660606384, "learning_rate": 0.00013204981177758788, "loss": 1.7178, "step": 62140 }, { "epoch": 2.07, "grad_norm": 0.6301337480545044, "learning_rate": 0.00013204115120549114, "loss": 1.7143, "step": 62141 }, { "epoch": 2.07, "grad_norm": 0.6029447913169861, "learning_rate": 0.0001320324908372716, "loss": 1.7332, "step": 62142 }, { "epoch": 2.07, "grad_norm": 0.6221742630004883, "learning_rate": 0.00013202383067294, "loss": 1.6828, "step": 62143 }, { "epoch": 2.07, "grad_norm": 0.6104903817176819, "learning_rate": 0.00013201517071250671, "loss": 1.7789, "step": 62144 }, { "epoch": 2.07, "grad_norm": 0.6175349354743958, "learning_rate": 0.00013200651095598214, "loss": 1.7273, "step": 62145 }, { "epoch": 2.07, "grad_norm": 0.6286017298698425, "learning_rate": 0.00013199785140337702, "loss": 1.7219, "step": 62146 }, { "epoch": 2.07, "grad_norm": 0.6009697318077087, "learning_rate": 0.00013198919205470165, "loss": 1.7723, "step": 62147 }, { "epoch": 2.07, "grad_norm": 0.6432344913482666, "learning_rate": 0.00013198053290996677, "loss": 1.7364, "step": 62148 }, { "epoch": 2.07, "grad_norm": 0.620914101600647, "learning_rate": 0.00013197187396918263, "loss": 1.6411, "step": 62149 }, { "epoch": 2.07, "grad_norm": 0.6267585754394531, "learning_rate": 0.00013196321523236, "loss": 1.7507, "step": 62150 }, { "epoch": 2.07, "grad_norm": 0.5919775366783142, "learning_rate": 0.00013195455669950926, "loss": 1.7172, "step": 62151 }, { "epoch": 2.07, "grad_norm": 0.6353681683540344, "learning_rate": 0.00013194589837064083, "loss": 1.6657, "step": 62152 }, { "epoch": 2.07, "grad_norm": 0.6397610306739807, "learning_rate": 0.00013193724024576541, "loss": 1.693, "step": 62153 }, { "epoch": 2.07, "grad_norm": 0.6129492521286011, "learning_rate": 0.00013192858232489332, "loss": 1.7644, "step": 62154 }, { "epoch": 2.07, "grad_norm": 0.6136062145233154, "learning_rate": 0.00013191992460803527, "loss": 1.7389, "step": 62155 }, { "epoch": 2.07, "grad_norm": 0.6443483829498291, "learning_rate": 0.0001319112670952017, "loss": 1.6892, "step": 62156 }, { "epoch": 2.07, "grad_norm": 0.6154688000679016, "learning_rate": 0.0001319026097864029, "loss": 1.7689, "step": 62157 }, { "epoch": 2.07, "grad_norm": 0.6174061894416809, "learning_rate": 0.00013189395268164975, "loss": 1.6969, "step": 62158 }, { "epoch": 2.07, "grad_norm": 0.6122552156448364, "learning_rate": 0.00013188529578095243, "loss": 1.6813, "step": 62159 }, { "epoch": 2.07, "grad_norm": 0.6034148931503296, "learning_rate": 0.0001318766390843217, "loss": 1.694, "step": 62160 }, { "epoch": 2.07, "grad_norm": 0.6069318652153015, "learning_rate": 0.00013186798259176783, "loss": 1.6648, "step": 62161 }, { "epoch": 2.07, "grad_norm": 0.6397261619567871, "learning_rate": 0.00013185932630330158, "loss": 1.7224, "step": 62162 }, { "epoch": 2.07, "grad_norm": 0.6492828726768494, "learning_rate": 0.00013185067021893334, "loss": 1.6732, "step": 62163 }, { "epoch": 2.07, "grad_norm": 0.6422369480133057, "learning_rate": 0.00013184201433867352, "loss": 1.7531, "step": 62164 }, { "epoch": 2.07, "grad_norm": 0.61929851770401, "learning_rate": 0.0001318333586625328, "loss": 1.778, "step": 62165 }, { "epoch": 2.07, "grad_norm": 0.6104023456573486, "learning_rate": 0.0001318247031905216, "loss": 1.6363, "step": 62166 }, { "epoch": 2.07, "grad_norm": 0.6188828349113464, "learning_rate": 0.00013181604792265034, "loss": 1.7122, "step": 62167 }, { "epoch": 2.07, "grad_norm": 0.6281861066818237, "learning_rate": 0.00013180739285892959, "loss": 1.7353, "step": 62168 }, { "epoch": 2.07, "grad_norm": 0.613885760307312, "learning_rate": 0.00013179873799937003, "loss": 1.6962, "step": 62169 }, { "epoch": 2.07, "grad_norm": 0.6128590106964111, "learning_rate": 0.000131790083343982, "loss": 1.7154, "step": 62170 }, { "epoch": 2.07, "grad_norm": 0.6000019311904907, "learning_rate": 0.0001317814288927759, "loss": 1.607, "step": 62171 }, { "epoch": 2.07, "grad_norm": 0.6135883927345276, "learning_rate": 0.00013177277464576238, "loss": 1.7633, "step": 62172 }, { "epoch": 2.07, "grad_norm": 0.6394283175468445, "learning_rate": 0.00013176412060295214, "loss": 1.7149, "step": 62173 }, { "epoch": 2.07, "grad_norm": 0.6188950538635254, "learning_rate": 0.00013175546676435528, "loss": 1.6993, "step": 62174 }, { "epoch": 2.07, "grad_norm": 0.929794192314148, "learning_rate": 0.00013174681312998245, "loss": 1.7128, "step": 62175 }, { "epoch": 2.07, "grad_norm": 0.6301672458648682, "learning_rate": 0.0001317381596998443, "loss": 1.727, "step": 62176 }, { "epoch": 2.07, "grad_norm": 0.6205496191978455, "learning_rate": 0.00013172950647395126, "loss": 1.7835, "step": 62177 }, { "epoch": 2.07, "grad_norm": 0.5990840792655945, "learning_rate": 0.0001317208534523137, "loss": 1.7592, "step": 62178 }, { "epoch": 2.07, "grad_norm": 2.5917487144470215, "learning_rate": 0.0001317122006349422, "loss": 1.7189, "step": 62179 }, { "epoch": 2.07, "grad_norm": 0.6305195093154907, "learning_rate": 0.00013170354802184752, "loss": 1.6972, "step": 62180 }, { "epoch": 2.07, "grad_norm": 0.6268097758293152, "learning_rate": 0.00013169489561303972, "loss": 1.6913, "step": 62181 }, { "epoch": 2.07, "grad_norm": 0.6284063458442688, "learning_rate": 0.00013168624340852955, "loss": 1.6994, "step": 62182 }, { "epoch": 2.07, "grad_norm": 0.6124011278152466, "learning_rate": 0.00013167759140832758, "loss": 1.6739, "step": 62183 }, { "epoch": 2.07, "grad_norm": 0.6287640333175659, "learning_rate": 0.00013166893961244418, "loss": 1.7448, "step": 62184 }, { "epoch": 2.07, "grad_norm": 0.6169742345809937, "learning_rate": 0.00013166028802088977, "loss": 1.6681, "step": 62185 }, { "epoch": 2.07, "grad_norm": 0.6070547103881836, "learning_rate": 0.00013165163663367513, "loss": 1.755, "step": 62186 }, { "epoch": 2.07, "grad_norm": 0.6458856463432312, "learning_rate": 0.00013164298545081054, "loss": 1.7376, "step": 62187 }, { "epoch": 2.07, "grad_norm": 0.6305891275405884, "learning_rate": 0.00013163433447230648, "loss": 1.7342, "step": 62188 }, { "epoch": 2.07, "grad_norm": 0.6266566514968872, "learning_rate": 0.00013162568369817353, "loss": 1.5924, "step": 62189 }, { "epoch": 2.07, "grad_norm": 0.6305639147758484, "learning_rate": 0.0001316170331284223, "loss": 1.6977, "step": 62190 }, { "epoch": 2.07, "grad_norm": 0.6415489315986633, "learning_rate": 0.00013160838276306318, "loss": 1.7678, "step": 62191 }, { "epoch": 2.07, "grad_norm": 0.6146013736724854, "learning_rate": 0.00013159973260210657, "loss": 1.7311, "step": 62192 }, { "epoch": 2.07, "grad_norm": 0.644376277923584, "learning_rate": 0.00013159108264556316, "loss": 1.7194, "step": 62193 }, { "epoch": 2.07, "grad_norm": 0.6091004610061646, "learning_rate": 0.00013158243289344338, "loss": 1.7317, "step": 62194 }, { "epoch": 2.07, "grad_norm": 0.6240668296813965, "learning_rate": 0.0001315737833457576, "loss": 1.6871, "step": 62195 }, { "epoch": 2.07, "grad_norm": 0.6223350167274475, "learning_rate": 0.00013156513400251654, "loss": 1.6918, "step": 62196 }, { "epoch": 2.07, "grad_norm": 0.6231013536453247, "learning_rate": 0.00013155648486373045, "loss": 1.6471, "step": 62197 }, { "epoch": 2.07, "grad_norm": 0.6128875017166138, "learning_rate": 0.00013154783592941012, "loss": 1.7076, "step": 62198 }, { "epoch": 2.07, "grad_norm": 0.6282804608345032, "learning_rate": 0.00013153918719956576, "loss": 1.698, "step": 62199 }, { "epoch": 2.07, "grad_norm": 0.6256665587425232, "learning_rate": 0.00013153053867420814, "loss": 1.7335, "step": 62200 }, { "epoch": 2.07, "grad_norm": 0.6033344864845276, "learning_rate": 0.0001315218903533476, "loss": 1.6696, "step": 62201 }, { "epoch": 2.07, "grad_norm": 0.6217260956764221, "learning_rate": 0.00013151324223699458, "loss": 1.7035, "step": 62202 }, { "epoch": 2.07, "grad_norm": 0.6536509394645691, "learning_rate": 0.00013150459432515973, "loss": 1.7193, "step": 62203 }, { "epoch": 2.07, "grad_norm": 0.625670850276947, "learning_rate": 0.00013149594661785339, "loss": 1.703, "step": 62204 }, { "epoch": 2.07, "grad_norm": 0.6578965187072754, "learning_rate": 0.00013148729911508625, "loss": 1.7159, "step": 62205 }, { "epoch": 2.07, "grad_norm": 0.6721459627151489, "learning_rate": 0.00013147865181686869, "loss": 1.6528, "step": 62206 }, { "epoch": 2.07, "grad_norm": 0.6236878037452698, "learning_rate": 0.00013147000472321112, "loss": 1.6812, "step": 62207 }, { "epoch": 2.07, "grad_norm": 0.6274235844612122, "learning_rate": 0.00013146135783412424, "loss": 1.7943, "step": 62208 }, { "epoch": 2.07, "grad_norm": 0.6368746161460876, "learning_rate": 0.00013145271114961833, "loss": 1.7265, "step": 62209 }, { "epoch": 2.07, "grad_norm": 0.6626419425010681, "learning_rate": 0.0001314440646697041, "loss": 1.7227, "step": 62210 }, { "epoch": 2.07, "grad_norm": 0.6111258864402771, "learning_rate": 0.00013143541839439184, "loss": 1.6936, "step": 62211 }, { "epoch": 2.07, "grad_norm": 0.6364099383354187, "learning_rate": 0.00013142677232369225, "loss": 1.7101, "step": 62212 }, { "epoch": 2.07, "grad_norm": 0.6112748384475708, "learning_rate": 0.0001314181264576157, "loss": 1.6747, "step": 62213 }, { "epoch": 2.07, "grad_norm": 0.61310875415802, "learning_rate": 0.0001314094807961726, "loss": 1.7026, "step": 62214 }, { "epoch": 2.07, "grad_norm": 0.6285282373428345, "learning_rate": 0.00013140083533937368, "loss": 1.7388, "step": 62215 }, { "epoch": 2.07, "grad_norm": 0.6398983597755432, "learning_rate": 0.0001313921900872293, "loss": 1.8076, "step": 62216 }, { "epoch": 2.07, "grad_norm": 0.6114543080329895, "learning_rate": 0.00013138354503974982, "loss": 1.6607, "step": 62217 }, { "epoch": 2.07, "grad_norm": 0.606107771396637, "learning_rate": 0.00013137490019694592, "loss": 1.6177, "step": 62218 }, { "epoch": 2.07, "grad_norm": 0.6261700391769409, "learning_rate": 0.00013136625555882812, "loss": 1.6897, "step": 62219 }, { "epoch": 2.07, "grad_norm": 0.6328455805778503, "learning_rate": 0.00013135761112540684, "loss": 1.7434, "step": 62220 }, { "epoch": 2.07, "grad_norm": 0.6303315758705139, "learning_rate": 0.00013134896689669248, "loss": 1.7508, "step": 62221 }, { "epoch": 2.07, "grad_norm": 0.6427447199821472, "learning_rate": 0.00013134032287269573, "loss": 1.7337, "step": 62222 }, { "epoch": 2.07, "grad_norm": 0.6215280890464783, "learning_rate": 0.00013133167905342698, "loss": 1.7223, "step": 62223 }, { "epoch": 2.07, "grad_norm": 0.6342480182647705, "learning_rate": 0.00013132303543889658, "loss": 1.7111, "step": 62224 }, { "epoch": 2.07, "grad_norm": 0.6191211342811584, "learning_rate": 0.00013131439202911517, "loss": 1.6954, "step": 62225 }, { "epoch": 2.07, "grad_norm": 0.6186383962631226, "learning_rate": 0.0001313057488240935, "loss": 1.6969, "step": 62226 }, { "epoch": 2.07, "grad_norm": 0.6090342998504639, "learning_rate": 0.00013129710582384152, "loss": 1.7803, "step": 62227 }, { "epoch": 2.07, "grad_norm": 0.629127025604248, "learning_rate": 0.00013128846302837, "loss": 1.6874, "step": 62228 }, { "epoch": 2.07, "grad_norm": 0.6202609539031982, "learning_rate": 0.00013127982043768956, "loss": 1.6819, "step": 62229 }, { "epoch": 2.07, "grad_norm": 0.6238913536071777, "learning_rate": 0.0001312711780518106, "loss": 1.7683, "step": 62230 }, { "epoch": 2.07, "grad_norm": 0.6235190033912659, "learning_rate": 0.00013126253587074339, "loss": 1.6848, "step": 62231 }, { "epoch": 2.07, "grad_norm": 0.6325408816337585, "learning_rate": 0.00013125389389449864, "loss": 1.6834, "step": 62232 }, { "epoch": 2.07, "grad_norm": 0.6270888447761536, "learning_rate": 0.00013124525212308697, "loss": 1.7182, "step": 62233 }, { "epoch": 2.07, "grad_norm": 0.6194561719894409, "learning_rate": 0.0001312366105565185, "loss": 1.6305, "step": 62234 }, { "epoch": 2.07, "grad_norm": 0.6174989342689514, "learning_rate": 0.00013122796919480396, "loss": 1.6763, "step": 62235 }, { "epoch": 2.07, "grad_norm": 0.6213933229446411, "learning_rate": 0.00013121932803795386, "loss": 1.7352, "step": 62236 }, { "epoch": 2.07, "grad_norm": 0.6253203749656677, "learning_rate": 0.00013121068708597865, "loss": 1.8233, "step": 62237 }, { "epoch": 2.07, "grad_norm": 0.6198300123214722, "learning_rate": 0.00013120204633888867, "loss": 1.7085, "step": 62238 }, { "epoch": 2.07, "grad_norm": 0.6373725533485413, "learning_rate": 0.00013119340579669455, "loss": 1.6771, "step": 62239 }, { "epoch": 2.07, "grad_norm": 0.6251411437988281, "learning_rate": 0.00013118476545940697, "loss": 1.751, "step": 62240 }, { "epoch": 2.07, "grad_norm": 0.6369213461875916, "learning_rate": 0.00013117612532703599, "loss": 1.693, "step": 62241 }, { "epoch": 2.07, "grad_norm": 0.6132652759552002, "learning_rate": 0.00013116748539959233, "loss": 1.7213, "step": 62242 }, { "epoch": 2.07, "grad_norm": 0.6483410596847534, "learning_rate": 0.00013115884567708658, "loss": 1.7099, "step": 62243 }, { "epoch": 2.07, "grad_norm": 0.6699226498603821, "learning_rate": 0.0001311502061595291, "loss": 1.7694, "step": 62244 }, { "epoch": 2.07, "grad_norm": 0.620335578918457, "learning_rate": 0.0001311415668469303, "loss": 1.736, "step": 62245 }, { "epoch": 2.07, "grad_norm": 0.6273351311683655, "learning_rate": 0.00013113292773930086, "loss": 1.6664, "step": 62246 }, { "epoch": 2.07, "grad_norm": 0.604306697845459, "learning_rate": 0.00013112428883665117, "loss": 1.686, "step": 62247 }, { "epoch": 2.07, "grad_norm": 0.6300435066223145, "learning_rate": 0.0001311156501389916, "loss": 1.7182, "step": 62248 }, { "epoch": 2.07, "grad_norm": 0.6332311034202576, "learning_rate": 0.0001311070116463327, "loss": 1.723, "step": 62249 }, { "epoch": 2.07, "grad_norm": 0.6406014561653137, "learning_rate": 0.00013109837335868515, "loss": 1.7887, "step": 62250 }, { "epoch": 2.07, "grad_norm": 0.6231949925422668, "learning_rate": 0.0001310897352760593, "loss": 1.7303, "step": 62251 }, { "epoch": 2.07, "grad_norm": 0.6660370826721191, "learning_rate": 0.0001310810973984655, "loss": 1.7022, "step": 62252 }, { "epoch": 2.07, "grad_norm": 0.6487094759941101, "learning_rate": 0.00013107245972591446, "loss": 1.7853, "step": 62253 }, { "epoch": 2.07, "grad_norm": 0.6418164968490601, "learning_rate": 0.00013106382225841658, "loss": 1.7037, "step": 62254 }, { "epoch": 2.07, "grad_norm": 0.6069366931915283, "learning_rate": 0.00013105518499598222, "loss": 1.6761, "step": 62255 }, { "epoch": 2.07, "grad_norm": 0.5842803120613098, "learning_rate": 0.00013104654793862205, "loss": 1.6684, "step": 62256 }, { "epoch": 2.07, "grad_norm": 0.633564293384552, "learning_rate": 0.00013103791108634642, "loss": 1.7374, "step": 62257 }, { "epoch": 2.07, "grad_norm": 0.6335132122039795, "learning_rate": 0.00013102927443916593, "loss": 1.7473, "step": 62258 }, { "epoch": 2.07, "grad_norm": 0.6213709115982056, "learning_rate": 0.00013102063799709095, "loss": 1.6742, "step": 62259 }, { "epoch": 2.07, "grad_norm": 0.6121247410774231, "learning_rate": 0.00013101200176013206, "loss": 1.6859, "step": 62260 }, { "epoch": 2.07, "grad_norm": 0.6204112768173218, "learning_rate": 0.00013100336572829976, "loss": 1.7149, "step": 62261 }, { "epoch": 2.07, "grad_norm": 0.6295069456100464, "learning_rate": 0.00013099472990160433, "loss": 1.6712, "step": 62262 }, { "epoch": 2.07, "grad_norm": 0.6525688171386719, "learning_rate": 0.0001309860942800565, "loss": 1.6839, "step": 62263 }, { "epoch": 2.07, "grad_norm": 0.6027906537055969, "learning_rate": 0.00013097745886366655, "loss": 1.6908, "step": 62264 }, { "epoch": 2.07, "grad_norm": 0.6246898770332336, "learning_rate": 0.00013096882365244518, "loss": 1.7003, "step": 62265 }, { "epoch": 2.07, "grad_norm": 0.6349974870681763, "learning_rate": 0.00013096018864640265, "loss": 1.7699, "step": 62266 }, { "epoch": 2.07, "grad_norm": 0.6442150473594666, "learning_rate": 0.0001309515538455496, "loss": 1.7638, "step": 62267 }, { "epoch": 2.07, "grad_norm": 0.6290051341056824, "learning_rate": 0.00013094291924989655, "loss": 1.6476, "step": 62268 }, { "epoch": 2.07, "grad_norm": 0.6105611324310303, "learning_rate": 0.0001309342848594537, "loss": 1.6247, "step": 62269 }, { "epoch": 2.07, "grad_norm": 0.6307856440544128, "learning_rate": 0.00013092565067423186, "loss": 1.69, "step": 62270 }, { "epoch": 2.07, "grad_norm": 0.6358948945999146, "learning_rate": 0.00013091701669424124, "loss": 1.7744, "step": 62271 }, { "epoch": 2.07, "grad_norm": 0.6062924265861511, "learning_rate": 0.00013090838291949256, "loss": 1.6081, "step": 62272 }, { "epoch": 2.07, "grad_norm": 0.6181859970092773, "learning_rate": 0.00013089974934999622, "loss": 1.6924, "step": 62273 }, { "epoch": 2.07, "grad_norm": 0.6317038536071777, "learning_rate": 0.0001308911159857625, "loss": 1.6987, "step": 62274 }, { "epoch": 2.07, "grad_norm": 0.6281235814094543, "learning_rate": 0.0001308824828268022, "loss": 1.7327, "step": 62275 }, { "epoch": 2.07, "grad_norm": 0.6465333104133606, "learning_rate": 0.00013087384987312553, "loss": 1.7166, "step": 62276 }, { "epoch": 2.07, "grad_norm": 0.6184455156326294, "learning_rate": 0.00013086521712474318, "loss": 1.6923, "step": 62277 }, { "epoch": 2.07, "grad_norm": 0.6576915383338928, "learning_rate": 0.00013085658458166546, "loss": 1.734, "step": 62278 }, { "epoch": 2.07, "grad_norm": 0.6239171624183655, "learning_rate": 0.00013084795224390302, "loss": 1.638, "step": 62279 }, { "epoch": 2.07, "grad_norm": 0.6392784118652344, "learning_rate": 0.00013083932011146622, "loss": 1.762, "step": 62280 }, { "epoch": 2.07, "grad_norm": 0.6229586601257324, "learning_rate": 0.00013083068818436547, "loss": 1.638, "step": 62281 }, { "epoch": 2.07, "grad_norm": 0.6466792225837708, "learning_rate": 0.00013082205646261147, "loss": 1.6793, "step": 62282 }, { "epoch": 2.07, "grad_norm": 0.6318241953849792, "learning_rate": 0.00013081342494621455, "loss": 1.7346, "step": 62283 }, { "epoch": 2.07, "grad_norm": 0.6387602686882019, "learning_rate": 0.00013080479363518507, "loss": 1.7686, "step": 62284 }, { "epoch": 2.07, "grad_norm": 0.6425466537475586, "learning_rate": 0.00013079616252953364, "loss": 1.7313, "step": 62285 }, { "epoch": 2.07, "grad_norm": 0.621100127696991, "learning_rate": 0.0001307875316292709, "loss": 1.6794, "step": 62286 }, { "epoch": 2.07, "grad_norm": 0.6267958879470825, "learning_rate": 0.00013077890093440712, "loss": 1.686, "step": 62287 }, { "epoch": 2.07, "grad_norm": 0.6362444758415222, "learning_rate": 0.0001307702704449527, "loss": 1.7339, "step": 62288 }, { "epoch": 2.07, "grad_norm": 0.6251396536827087, "learning_rate": 0.00013076164016091827, "loss": 1.6882, "step": 62289 }, { "epoch": 2.07, "grad_norm": 0.6093906164169312, "learning_rate": 0.00013075301008231449, "loss": 1.7383, "step": 62290 }, { "epoch": 2.07, "grad_norm": 0.646195113658905, "learning_rate": 0.00013074438020915137, "loss": 1.7502, "step": 62291 }, { "epoch": 2.07, "grad_norm": 0.6296769380569458, "learning_rate": 0.00013073575054143964, "loss": 1.6709, "step": 62292 }, { "epoch": 2.07, "grad_norm": 0.6094210147857666, "learning_rate": 0.0001307271210791899, "loss": 1.7656, "step": 62293 }, { "epoch": 2.07, "grad_norm": 0.6017264723777771, "learning_rate": 0.00013071849182241249, "loss": 1.6895, "step": 62294 }, { "epoch": 2.07, "grad_norm": 0.6111893057823181, "learning_rate": 0.0001307098627711178, "loss": 1.6599, "step": 62295 }, { "epoch": 2.07, "grad_norm": 0.7579348683357239, "learning_rate": 0.00013070123392531634, "loss": 1.7164, "step": 62296 }, { "epoch": 2.07, "grad_norm": 0.6498633623123169, "learning_rate": 0.0001306926052850189, "loss": 1.7016, "step": 62297 }, { "epoch": 2.07, "grad_norm": 0.6505122780799866, "learning_rate": 0.00013068397685023545, "loss": 1.724, "step": 62298 }, { "epoch": 2.07, "grad_norm": 0.6267780065536499, "learning_rate": 0.0001306753486209767, "loss": 1.7794, "step": 62299 }, { "epoch": 2.07, "grad_norm": 0.6296454668045044, "learning_rate": 0.00013066672059725327, "loss": 1.6347, "step": 62300 }, { "epoch": 2.07, "grad_norm": 0.600012481212616, "learning_rate": 0.00013065809277907546, "loss": 1.6612, "step": 62301 }, { "epoch": 2.07, "grad_norm": 0.6468856334686279, "learning_rate": 0.00013064946516645366, "loss": 1.7082, "step": 62302 }, { "epoch": 2.07, "grad_norm": 0.630781888961792, "learning_rate": 0.00013064083775939857, "loss": 1.7652, "step": 62303 }, { "epoch": 2.07, "grad_norm": 0.6221902370452881, "learning_rate": 0.00013063221055792056, "loss": 1.6606, "step": 62304 }, { "epoch": 2.07, "grad_norm": 0.6289313435554504, "learning_rate": 0.00013062358356202998, "loss": 1.7182, "step": 62305 }, { "epoch": 2.07, "grad_norm": 0.6089164614677429, "learning_rate": 0.00013061495677173742, "loss": 1.6543, "step": 62306 }, { "epoch": 2.07, "grad_norm": 0.6190308928489685, "learning_rate": 0.00013060633018705344, "loss": 1.7244, "step": 62307 }, { "epoch": 2.07, "grad_norm": 0.6179214715957642, "learning_rate": 0.00013059770380798843, "loss": 1.7633, "step": 62308 }, { "epoch": 2.07, "grad_norm": 0.609580934047699, "learning_rate": 0.00013058907763455275, "loss": 1.731, "step": 62309 }, { "epoch": 2.07, "grad_norm": 0.6106346845626831, "learning_rate": 0.00013058045166675701, "loss": 1.7363, "step": 62310 }, { "epoch": 2.07, "grad_norm": 0.6228243112564087, "learning_rate": 0.00013057182590461172, "loss": 1.7674, "step": 62311 }, { "epoch": 2.07, "grad_norm": 0.6198481321334839, "learning_rate": 0.0001305632003481271, "loss": 1.697, "step": 62312 }, { "epoch": 2.07, "grad_norm": 0.6297184824943542, "learning_rate": 0.00013055457499731396, "loss": 1.6873, "step": 62313 }, { "epoch": 2.07, "grad_norm": 0.6272503137588501, "learning_rate": 0.00013054594985218245, "loss": 1.789, "step": 62314 }, { "epoch": 2.07, "grad_norm": 0.6332128047943115, "learning_rate": 0.0001305373249127433, "loss": 1.6936, "step": 62315 }, { "epoch": 2.07, "grad_norm": 0.6336835622787476, "learning_rate": 0.00013052870017900677, "loss": 1.6858, "step": 62316 }, { "epoch": 2.07, "grad_norm": 0.6274914741516113, "learning_rate": 0.00013052007565098351, "loss": 1.7385, "step": 62317 }, { "epoch": 2.07, "grad_norm": 0.6375693678855896, "learning_rate": 0.00013051145132868394, "loss": 1.6655, "step": 62318 }, { "epoch": 2.07, "grad_norm": 0.6367275714874268, "learning_rate": 0.00013050282721211836, "loss": 1.676, "step": 62319 }, { "epoch": 2.07, "grad_norm": 0.6149873733520508, "learning_rate": 0.0001304942033012975, "loss": 1.6006, "step": 62320 }, { "epoch": 2.07, "grad_norm": 0.6145528554916382, "learning_rate": 0.00013048557959623159, "loss": 1.6535, "step": 62321 }, { "epoch": 2.07, "grad_norm": 0.6325986385345459, "learning_rate": 0.0001304769560969313, "loss": 1.6816, "step": 62322 }, { "epoch": 2.07, "grad_norm": 0.633328378200531, "learning_rate": 0.00013046833280340705, "loss": 1.7432, "step": 62323 }, { "epoch": 2.07, "grad_norm": 0.6200571656227112, "learning_rate": 0.00013045970971566913, "loss": 1.6954, "step": 62324 }, { "epoch": 2.07, "grad_norm": 0.6110100746154785, "learning_rate": 0.00013045108683372827, "loss": 1.6726, "step": 62325 }, { "epoch": 2.07, "grad_norm": 0.6223573088645935, "learning_rate": 0.00013044246415759464, "loss": 1.7018, "step": 62326 }, { "epoch": 2.07, "grad_norm": 0.6089856028556824, "learning_rate": 0.00013043384168727904, "loss": 1.6685, "step": 62327 }, { "epoch": 2.07, "grad_norm": 0.6268588900566101, "learning_rate": 0.00013042521942279166, "loss": 1.7741, "step": 62328 }, { "epoch": 2.07, "grad_norm": 0.643115758895874, "learning_rate": 0.00013041659736414318, "loss": 1.7956, "step": 62329 }, { "epoch": 2.07, "grad_norm": 0.6187031269073486, "learning_rate": 0.00013040797551134393, "loss": 1.7553, "step": 62330 }, { "epoch": 2.07, "grad_norm": 0.622891902923584, "learning_rate": 0.00013039935386440431, "loss": 1.7149, "step": 62331 }, { "epoch": 2.07, "grad_norm": 0.6298221349716187, "learning_rate": 0.00013039073242333504, "loss": 1.7841, "step": 62332 }, { "epoch": 2.07, "grad_norm": 0.6353055238723755, "learning_rate": 0.00013038211118814638, "loss": 1.7836, "step": 62333 }, { "epoch": 2.07, "grad_norm": 0.6598727107048035, "learning_rate": 0.0001303734901588488, "loss": 1.7218, "step": 62334 }, { "epoch": 2.07, "grad_norm": 0.6533183455467224, "learning_rate": 0.00013036486933545275, "loss": 1.7033, "step": 62335 }, { "epoch": 2.07, "grad_norm": 0.6534799933433533, "learning_rate": 0.0001303562487179689, "loss": 1.7084, "step": 62336 }, { "epoch": 2.07, "grad_norm": 0.6347053647041321, "learning_rate": 0.00013034762830640753, "loss": 1.7312, "step": 62337 }, { "epoch": 2.07, "grad_norm": 0.6172410249710083, "learning_rate": 0.00013033900810077907, "loss": 1.7536, "step": 62338 }, { "epoch": 2.07, "grad_norm": 0.6402463316917419, "learning_rate": 0.00013033038810109413, "loss": 1.771, "step": 62339 }, { "epoch": 2.07, "grad_norm": 0.6480158567428589, "learning_rate": 0.00013032176830736313, "loss": 1.7281, "step": 62340 }, { "epoch": 2.07, "grad_norm": 0.6326571106910706, "learning_rate": 0.00013031314871959635, "loss": 1.7924, "step": 62341 }, { "epoch": 2.07, "grad_norm": 0.6269838809967041, "learning_rate": 0.00013030452933780444, "loss": 1.6892, "step": 62342 }, { "epoch": 2.07, "grad_norm": 0.6177950501441956, "learning_rate": 0.00013029591016199807, "loss": 1.742, "step": 62343 }, { "epoch": 2.07, "grad_norm": 0.6419463157653809, "learning_rate": 0.0001302872911921872, "loss": 1.7529, "step": 62344 }, { "epoch": 2.07, "grad_norm": 0.6420416235923767, "learning_rate": 0.00013027867242838256, "loss": 1.697, "step": 62345 }, { "epoch": 2.07, "grad_norm": 0.6572357416152954, "learning_rate": 0.0001302700538705947, "loss": 1.7359, "step": 62346 }, { "epoch": 2.07, "grad_norm": 0.6431333422660828, "learning_rate": 0.00013026143551883403, "loss": 1.6425, "step": 62347 }, { "epoch": 2.07, "grad_norm": 0.6440539956092834, "learning_rate": 0.00013025281737311084, "loss": 1.6523, "step": 62348 }, { "epoch": 2.07, "grad_norm": 0.6538920998573303, "learning_rate": 0.0001302441994334357, "loss": 1.7026, "step": 62349 }, { "epoch": 2.07, "grad_norm": 0.641565203666687, "learning_rate": 0.0001302355816998193, "loss": 1.7739, "step": 62350 }, { "epoch": 2.07, "grad_norm": 0.6441964507102966, "learning_rate": 0.00013022696417227172, "loss": 1.788, "step": 62351 }, { "epoch": 2.07, "grad_norm": 0.6466280221939087, "learning_rate": 0.00013021834685080356, "loss": 1.6876, "step": 62352 }, { "epoch": 2.07, "grad_norm": 0.6191727519035339, "learning_rate": 0.0001302097297354254, "loss": 1.7141, "step": 62353 }, { "epoch": 2.07, "grad_norm": 0.6719415187835693, "learning_rate": 0.00013020111282614766, "loss": 1.6641, "step": 62354 }, { "epoch": 2.07, "grad_norm": 0.6730623245239258, "learning_rate": 0.00013019249612298062, "loss": 1.7199, "step": 62355 }, { "epoch": 2.07, "grad_norm": 0.6281522512435913, "learning_rate": 0.00013018387962593486, "loss": 1.6981, "step": 62356 }, { "epoch": 2.07, "grad_norm": 0.6442573666572571, "learning_rate": 0.00013017526333502106, "loss": 1.7325, "step": 62357 }, { "epoch": 2.07, "grad_norm": 0.6733773946762085, "learning_rate": 0.00013016664725024927, "loss": 1.7086, "step": 62358 }, { "epoch": 2.07, "grad_norm": 0.6427796483039856, "learning_rate": 0.00013015803137163013, "loss": 1.7346, "step": 62359 }, { "epoch": 2.07, "grad_norm": 0.6216902136802673, "learning_rate": 0.00013014941569917422, "loss": 1.7293, "step": 62360 }, { "epoch": 2.07, "grad_norm": 0.6233334541320801, "learning_rate": 0.00013014080023289194, "loss": 1.7298, "step": 62361 }, { "epoch": 2.07, "grad_norm": 0.6205969452857971, "learning_rate": 0.00013013218497279355, "loss": 1.7337, "step": 62362 }, { "epoch": 2.07, "grad_norm": 0.627429187297821, "learning_rate": 0.00013012356991888975, "loss": 1.6837, "step": 62363 }, { "epoch": 2.07, "grad_norm": 0.64081871509552, "learning_rate": 0.00013011495507119094, "loss": 1.7459, "step": 62364 }, { "epoch": 2.07, "grad_norm": 0.6157658100128174, "learning_rate": 0.0001301063404297074, "loss": 1.7087, "step": 62365 }, { "epoch": 2.07, "grad_norm": 0.6060009002685547, "learning_rate": 0.00013009772599444978, "loss": 1.6922, "step": 62366 }, { "epoch": 2.07, "grad_norm": 0.6216638684272766, "learning_rate": 0.00013008911176542854, "loss": 1.6612, "step": 62367 }, { "epoch": 2.07, "grad_norm": 0.6332569718360901, "learning_rate": 0.00013008049774265414, "loss": 1.7049, "step": 62368 }, { "epoch": 2.08, "grad_norm": 0.6235708594322205, "learning_rate": 0.00013007188392613684, "loss": 1.7098, "step": 62369 }, { "epoch": 2.08, "grad_norm": 0.626953661441803, "learning_rate": 0.00013006327031588735, "loss": 1.7278, "step": 62370 }, { "epoch": 2.08, "grad_norm": 0.6286094188690186, "learning_rate": 0.000130054656911916, "loss": 1.7411, "step": 62371 }, { "epoch": 2.08, "grad_norm": 0.6228538155555725, "learning_rate": 0.00013004604371423317, "loss": 1.6904, "step": 62372 }, { "epoch": 2.08, "grad_norm": 0.6387779712677002, "learning_rate": 0.00013003743072284948, "loss": 1.6653, "step": 62373 }, { "epoch": 2.08, "grad_norm": 0.6085131764411926, "learning_rate": 0.00013002881793777523, "loss": 1.7177, "step": 62374 }, { "epoch": 2.08, "grad_norm": 0.6235620975494385, "learning_rate": 0.00013002020535902105, "loss": 1.6522, "step": 62375 }, { "epoch": 2.08, "grad_norm": 0.6354684233665466, "learning_rate": 0.00013001159298659716, "loss": 1.6944, "step": 62376 }, { "epoch": 2.08, "grad_norm": 0.6167551279067993, "learning_rate": 0.0001300029808205143, "loss": 1.6411, "step": 62377 }, { "epoch": 2.08, "grad_norm": 0.6538599729537964, "learning_rate": 0.00012999436886078274, "loss": 1.7545, "step": 62378 }, { "epoch": 2.08, "grad_norm": 0.6429638862609863, "learning_rate": 0.00012998575710741287, "loss": 1.7501, "step": 62379 }, { "epoch": 2.08, "grad_norm": 0.6340513229370117, "learning_rate": 0.00012997714556041537, "loss": 1.6218, "step": 62380 }, { "epoch": 2.08, "grad_norm": 0.6277589797973633, "learning_rate": 0.00012996853421980042, "loss": 1.7975, "step": 62381 }, { "epoch": 2.08, "grad_norm": 0.6118720769882202, "learning_rate": 0.00012995992308557873, "loss": 1.7279, "step": 62382 }, { "epoch": 2.08, "grad_norm": 0.6597282290458679, "learning_rate": 0.00012995131215776055, "loss": 1.7551, "step": 62383 }, { "epoch": 2.08, "grad_norm": 0.6425527334213257, "learning_rate": 0.0001299427014363565, "loss": 1.7173, "step": 62384 }, { "epoch": 2.08, "grad_norm": 0.6466565132141113, "learning_rate": 0.000129934090921377, "loss": 1.7325, "step": 62385 }, { "epoch": 2.08, "grad_norm": 0.6172950267791748, "learning_rate": 0.0001299254806128323, "loss": 1.7762, "step": 62386 }, { "epoch": 2.08, "grad_norm": 0.6547548174858093, "learning_rate": 0.00012991687051073315, "loss": 1.7908, "step": 62387 }, { "epoch": 2.08, "grad_norm": 0.6366536617279053, "learning_rate": 0.00012990826061508973, "loss": 1.8119, "step": 62388 }, { "epoch": 2.08, "grad_norm": 0.6339670419692993, "learning_rate": 0.00012989965092591272, "loss": 1.7054, "step": 62389 }, { "epoch": 2.08, "grad_norm": 0.6285591125488281, "learning_rate": 0.0001298910414432125, "loss": 1.7075, "step": 62390 }, { "epoch": 2.08, "grad_norm": 0.6509697437286377, "learning_rate": 0.00012988243216699937, "loss": 1.7248, "step": 62391 }, { "epoch": 2.08, "grad_norm": 0.6403716206550598, "learning_rate": 0.00012987382309728398, "loss": 1.6811, "step": 62392 }, { "epoch": 2.08, "grad_norm": 0.6541900038719177, "learning_rate": 0.00012986521423407666, "loss": 1.708, "step": 62393 }, { "epoch": 2.08, "grad_norm": 0.6211304068565369, "learning_rate": 0.00012985660557738796, "loss": 1.7683, "step": 62394 }, { "epoch": 2.08, "grad_norm": 0.6504876017570496, "learning_rate": 0.00012984799712722816, "loss": 1.7631, "step": 62395 }, { "epoch": 2.08, "grad_norm": 0.6237545013427734, "learning_rate": 0.00012983938888360796, "loss": 1.7874, "step": 62396 }, { "epoch": 2.08, "grad_norm": 0.6240800023078918, "learning_rate": 0.00012983078084653765, "loss": 1.7509, "step": 62397 }, { "epoch": 2.08, "grad_norm": 0.6442459225654602, "learning_rate": 0.0001298221730160276, "loss": 1.6154, "step": 62398 }, { "epoch": 2.08, "grad_norm": 0.6207448244094849, "learning_rate": 0.00012981356539208845, "loss": 1.7167, "step": 62399 }, { "epoch": 2.08, "grad_norm": 0.6298891305923462, "learning_rate": 0.0001298049579747306, "loss": 1.7794, "step": 62400 }, { "epoch": 2.08, "grad_norm": 0.6388211250305176, "learning_rate": 0.0001297963507639643, "loss": 1.6549, "step": 62401 }, { "epoch": 2.08, "grad_norm": 0.6507696509361267, "learning_rate": 0.00012978774375980014, "loss": 1.7052, "step": 62402 }, { "epoch": 2.08, "grad_norm": 0.6317746639251709, "learning_rate": 0.00012977913696224873, "loss": 1.7077, "step": 62403 }, { "epoch": 2.08, "grad_norm": 0.6015254855155945, "learning_rate": 0.00012977053037132035, "loss": 1.6627, "step": 62404 }, { "epoch": 2.08, "grad_norm": 0.6351994872093201, "learning_rate": 0.00012976192398702534, "loss": 1.6664, "step": 62405 }, { "epoch": 2.08, "grad_norm": 0.6350446939468384, "learning_rate": 0.00012975331780937428, "loss": 1.7202, "step": 62406 }, { "epoch": 2.08, "grad_norm": 0.6148601174354553, "learning_rate": 0.00012974471183837782, "loss": 1.7679, "step": 62407 }, { "epoch": 2.08, "grad_norm": 0.6359113454818726, "learning_rate": 0.000129736106074046, "loss": 1.6732, "step": 62408 }, { "epoch": 2.08, "grad_norm": 0.6087930798530579, "learning_rate": 0.00012972750051638943, "loss": 1.6881, "step": 62409 }, { "epoch": 2.08, "grad_norm": 0.6178702712059021, "learning_rate": 0.00012971889516541873, "loss": 1.6736, "step": 62410 }, { "epoch": 2.08, "grad_norm": 0.6417747139930725, "learning_rate": 0.00012971029002114415, "loss": 1.7458, "step": 62411 }, { "epoch": 2.08, "grad_norm": 0.6217993497848511, "learning_rate": 0.00012970168508357613, "loss": 1.6933, "step": 62412 }, { "epoch": 2.08, "grad_norm": 0.6361322999000549, "learning_rate": 0.00012969308035272516, "loss": 1.6972, "step": 62413 }, { "epoch": 2.08, "grad_norm": 0.5928803086280823, "learning_rate": 0.00012968447582860192, "loss": 1.6306, "step": 62414 }, { "epoch": 2.08, "grad_norm": 0.6525281667709351, "learning_rate": 0.00012967587151121638, "loss": 1.7286, "step": 62415 }, { "epoch": 2.08, "grad_norm": 0.6682435274124146, "learning_rate": 0.00012966726740057925, "loss": 1.6869, "step": 62416 }, { "epoch": 2.08, "grad_norm": 0.5970247387886047, "learning_rate": 0.0001296586634967011, "loss": 1.7245, "step": 62417 }, { "epoch": 2.08, "grad_norm": 0.6203390955924988, "learning_rate": 0.0001296500597995922, "loss": 1.6872, "step": 62418 }, { "epoch": 2.08, "grad_norm": 0.6406183242797852, "learning_rate": 0.00012964145630926292, "loss": 1.6562, "step": 62419 }, { "epoch": 2.08, "grad_norm": 0.6446515321731567, "learning_rate": 0.00012963285302572395, "loss": 1.7269, "step": 62420 }, { "epoch": 2.08, "grad_norm": 0.6461597084999084, "learning_rate": 0.00012962424994898553, "loss": 1.7437, "step": 62421 }, { "epoch": 2.08, "grad_norm": 0.6312035918235779, "learning_rate": 0.0001296156470790581, "loss": 1.717, "step": 62422 }, { "epoch": 2.08, "grad_norm": 0.6351891160011292, "learning_rate": 0.0001296070444159522, "loss": 1.6789, "step": 62423 }, { "epoch": 2.08, "grad_norm": 0.6506574749946594, "learning_rate": 0.0001295984419596783, "loss": 1.7799, "step": 62424 }, { "epoch": 2.08, "grad_norm": 0.630267322063446, "learning_rate": 0.0001295898397102468, "loss": 1.7077, "step": 62425 }, { "epoch": 2.08, "grad_norm": 0.652498185634613, "learning_rate": 0.00012958123766766802, "loss": 1.7214, "step": 62426 }, { "epoch": 2.08, "grad_norm": 0.6271788477897644, "learning_rate": 0.00012957263583195265, "loss": 1.6878, "step": 62427 }, { "epoch": 2.08, "grad_norm": 0.6352306604385376, "learning_rate": 0.00012956403420311095, "loss": 1.7057, "step": 62428 }, { "epoch": 2.08, "grad_norm": 0.6129403710365295, "learning_rate": 0.00012955543278115328, "loss": 1.7214, "step": 62429 }, { "epoch": 2.08, "grad_norm": 0.6030014753341675, "learning_rate": 0.00012954683156609034, "loss": 1.6606, "step": 62430 }, { "epoch": 2.08, "grad_norm": 0.6224569082260132, "learning_rate": 0.00012953823055793232, "loss": 1.7251, "step": 62431 }, { "epoch": 2.08, "grad_norm": 0.6253851652145386, "learning_rate": 0.00012952962975668983, "loss": 1.7361, "step": 62432 }, { "epoch": 2.08, "grad_norm": 0.6199462413787842, "learning_rate": 0.0001295210291623732, "loss": 1.6993, "step": 62433 }, { "epoch": 2.08, "grad_norm": 0.6021706461906433, "learning_rate": 0.000129512428774993, "loss": 1.7707, "step": 62434 }, { "epoch": 2.08, "grad_norm": 0.6499665379524231, "learning_rate": 0.00012950382859455963, "loss": 1.7107, "step": 62435 }, { "epoch": 2.08, "grad_norm": 0.6319631338119507, "learning_rate": 0.00012949522862108333, "loss": 1.6821, "step": 62436 }, { "epoch": 2.08, "grad_norm": 0.6121941208839417, "learning_rate": 0.00012948662885457483, "loss": 1.7182, "step": 62437 }, { "epoch": 2.08, "grad_norm": 0.6244613528251648, "learning_rate": 0.00012947802929504437, "loss": 1.7292, "step": 62438 }, { "epoch": 2.08, "grad_norm": 0.615744948387146, "learning_rate": 0.0001294694299425025, "loss": 1.7007, "step": 62439 }, { "epoch": 2.08, "grad_norm": 0.6231760382652283, "learning_rate": 0.00012946083079695965, "loss": 1.7476, "step": 62440 }, { "epoch": 2.08, "grad_norm": 0.6281002163887024, "learning_rate": 0.00012945223185842613, "loss": 1.7439, "step": 62441 }, { "epoch": 2.08, "grad_norm": 0.6308625936508179, "learning_rate": 0.00012944363312691255, "loss": 1.6959, "step": 62442 }, { "epoch": 2.08, "grad_norm": 0.6547336578369141, "learning_rate": 0.00012943503460242917, "loss": 1.7051, "step": 62443 }, { "epoch": 2.08, "grad_norm": 0.628929078578949, "learning_rate": 0.00012942643628498663, "loss": 1.7166, "step": 62444 }, { "epoch": 2.08, "grad_norm": 0.6219997406005859, "learning_rate": 0.00012941783817459516, "loss": 1.7486, "step": 62445 }, { "epoch": 2.08, "grad_norm": 0.6508774757385254, "learning_rate": 0.00012940924027126543, "loss": 1.6835, "step": 62446 }, { "epoch": 2.08, "grad_norm": 0.6277532577514648, "learning_rate": 0.00012940064257500772, "loss": 1.6901, "step": 62447 }, { "epoch": 2.08, "grad_norm": 0.6308816075325012, "learning_rate": 0.0001293920450858324, "loss": 1.6452, "step": 62448 }, { "epoch": 2.08, "grad_norm": 0.6350952386856079, "learning_rate": 0.00012938344780375009, "loss": 1.675, "step": 62449 }, { "epoch": 2.08, "grad_norm": 0.6441677808761597, "learning_rate": 0.00012937485072877115, "loss": 1.6387, "step": 62450 }, { "epoch": 2.08, "grad_norm": 0.6166480779647827, "learning_rate": 0.0001293662538609059, "loss": 1.6717, "step": 62451 }, { "epoch": 2.08, "grad_norm": 0.6471231579780579, "learning_rate": 0.00012935765720016487, "loss": 1.6736, "step": 62452 }, { "epoch": 2.08, "grad_norm": 0.6191932559013367, "learning_rate": 0.00012934906074655857, "loss": 1.6603, "step": 62453 }, { "epoch": 2.08, "grad_norm": 0.6360173225402832, "learning_rate": 0.0001293404645000974, "loss": 1.683, "step": 62454 }, { "epoch": 2.08, "grad_norm": 0.6404964923858643, "learning_rate": 0.00012933186846079164, "loss": 1.7672, "step": 62455 }, { "epoch": 2.08, "grad_norm": 0.607438862323761, "learning_rate": 0.000129323272628652, "loss": 1.7301, "step": 62456 }, { "epoch": 2.08, "grad_norm": 0.6247460842132568, "learning_rate": 0.0001293146770036887, "loss": 1.683, "step": 62457 }, { "epoch": 2.08, "grad_norm": 0.6248064637184143, "learning_rate": 0.00012930608158591216, "loss": 1.7504, "step": 62458 }, { "epoch": 2.08, "grad_norm": 0.6261265873908997, "learning_rate": 0.0001292974863753329, "loss": 1.7019, "step": 62459 }, { "epoch": 2.08, "grad_norm": 0.6195431351661682, "learning_rate": 0.0001292888913719615, "loss": 1.7252, "step": 62460 }, { "epoch": 2.08, "grad_norm": 0.6432557106018066, "learning_rate": 0.00012928029657580806, "loss": 1.6548, "step": 62461 }, { "epoch": 2.08, "grad_norm": 0.6423700451850891, "learning_rate": 0.00012927170198688318, "loss": 1.739, "step": 62462 }, { "epoch": 2.08, "grad_norm": 0.6249111294746399, "learning_rate": 0.0001292631076051974, "loss": 1.6676, "step": 62463 }, { "epoch": 2.08, "grad_norm": 0.6335757970809937, "learning_rate": 0.00012925451343076105, "loss": 1.7855, "step": 62464 }, { "epoch": 2.08, "grad_norm": 0.6338487863540649, "learning_rate": 0.00012924591946358445, "loss": 1.7133, "step": 62465 }, { "epoch": 2.08, "grad_norm": 0.6412407755851746, "learning_rate": 0.00012923732570367815, "loss": 1.6495, "step": 62466 }, { "epoch": 2.08, "grad_norm": 0.6268599033355713, "learning_rate": 0.0001292287321510528, "loss": 1.6597, "step": 62467 }, { "epoch": 2.08, "grad_norm": 0.6307459473609924, "learning_rate": 0.00012922013880571837, "loss": 1.6682, "step": 62468 }, { "epoch": 2.08, "grad_norm": 0.6363348960876465, "learning_rate": 0.00012921154566768552, "loss": 1.6206, "step": 62469 }, { "epoch": 2.08, "grad_norm": 0.6222668886184692, "learning_rate": 0.00012920295273696482, "loss": 1.64, "step": 62470 }, { "epoch": 2.08, "grad_norm": 0.6380704641342163, "learning_rate": 0.00012919436001356658, "loss": 1.668, "step": 62471 }, { "epoch": 2.08, "grad_norm": 0.636197566986084, "learning_rate": 0.0001291857674975011, "loss": 1.7485, "step": 62472 }, { "epoch": 2.08, "grad_norm": 0.6264415383338928, "learning_rate": 0.00012917717518877892, "loss": 1.7987, "step": 62473 }, { "epoch": 2.08, "grad_norm": 0.6305333971977234, "learning_rate": 0.00012916858308741072, "loss": 1.716, "step": 62474 }, { "epoch": 2.08, "grad_norm": 0.6312220096588135, "learning_rate": 0.00012915999119340643, "loss": 1.681, "step": 62475 }, { "epoch": 2.08, "grad_norm": 0.6491034030914307, "learning_rate": 0.0001291513995067768, "loss": 1.709, "step": 62476 }, { "epoch": 2.08, "grad_norm": 0.6271453499794006, "learning_rate": 0.00012914280802753226, "loss": 1.6605, "step": 62477 }, { "epoch": 2.08, "grad_norm": 0.6565467119216919, "learning_rate": 0.00012913421675568319, "loss": 1.7363, "step": 62478 }, { "epoch": 2.08, "grad_norm": 0.6210536956787109, "learning_rate": 0.00012912562569123988, "loss": 1.7396, "step": 62479 }, { "epoch": 2.08, "grad_norm": 0.6427831053733826, "learning_rate": 0.000129117034834213, "loss": 1.7399, "step": 62480 }, { "epoch": 2.08, "grad_norm": 0.6223671436309814, "learning_rate": 0.00012910844418461288, "loss": 1.7848, "step": 62481 }, { "epoch": 2.08, "grad_norm": 0.6047990918159485, "learning_rate": 0.00012909985374244981, "loss": 1.6617, "step": 62482 }, { "epoch": 2.08, "grad_norm": 0.6356945633888245, "learning_rate": 0.00012909126350773434, "loss": 1.6856, "step": 62483 }, { "epoch": 2.08, "grad_norm": 0.6178338527679443, "learning_rate": 0.000129082673480477, "loss": 1.642, "step": 62484 }, { "epoch": 2.08, "grad_norm": 0.6176305413246155, "learning_rate": 0.0001290740836606881, "loss": 1.7118, "step": 62485 }, { "epoch": 2.08, "grad_norm": 0.6264008283615112, "learning_rate": 0.00012906549404837796, "loss": 1.7153, "step": 62486 }, { "epoch": 2.08, "grad_norm": 0.6590904593467712, "learning_rate": 0.00012905690464355728, "loss": 1.7131, "step": 62487 }, { "epoch": 2.08, "grad_norm": 0.6182131767272949, "learning_rate": 0.00012904831544623629, "loss": 1.6839, "step": 62488 }, { "epoch": 2.08, "grad_norm": 0.6185646653175354, "learning_rate": 0.00012903972645642538, "loss": 1.7183, "step": 62489 }, { "epoch": 2.08, "grad_norm": 0.6516335606575012, "learning_rate": 0.00012903113767413513, "loss": 1.7518, "step": 62490 }, { "epoch": 2.08, "grad_norm": 0.6095467805862427, "learning_rate": 0.0001290225490993758, "loss": 1.6727, "step": 62491 }, { "epoch": 2.08, "grad_norm": 0.6526399254798889, "learning_rate": 0.00012901396073215798, "loss": 1.7568, "step": 62492 }, { "epoch": 2.08, "grad_norm": 0.6075724363327026, "learning_rate": 0.00012900537257249192, "loss": 1.6888, "step": 62493 }, { "epoch": 2.08, "grad_norm": 0.651831328868866, "learning_rate": 0.00012899678462038828, "loss": 1.6961, "step": 62494 }, { "epoch": 2.08, "grad_norm": 0.6164539456367493, "learning_rate": 0.0001289881968758573, "loss": 1.624, "step": 62495 }, { "epoch": 2.08, "grad_norm": 0.6356621384620667, "learning_rate": 0.0001289796093389094, "loss": 1.7052, "step": 62496 }, { "epoch": 2.08, "grad_norm": 0.6410440802574158, "learning_rate": 0.00012897102200955512, "loss": 1.8043, "step": 62497 }, { "epoch": 2.08, "grad_norm": 0.6282525062561035, "learning_rate": 0.0001289624348878047, "loss": 1.6795, "step": 62498 }, { "epoch": 2.08, "grad_norm": 0.6414344906806946, "learning_rate": 0.00012895384797366883, "loss": 1.7554, "step": 62499 }, { "epoch": 2.08, "grad_norm": 0.6475931406021118, "learning_rate": 0.00012894526126715778, "loss": 1.7111, "step": 62500 }, { "epoch": 2.08, "grad_norm": 0.6355317831039429, "learning_rate": 0.00012893667476828187, "loss": 1.6857, "step": 62501 }, { "epoch": 2.08, "grad_norm": 0.6205508708953857, "learning_rate": 0.00012892808847705174, "loss": 1.7222, "step": 62502 }, { "epoch": 2.08, "grad_norm": 0.6095629930496216, "learning_rate": 0.00012891950239347759, "loss": 1.685, "step": 62503 }, { "epoch": 2.08, "grad_norm": 0.6254705786705017, "learning_rate": 0.00012891091651757007, "loss": 1.7023, "step": 62504 }, { "epoch": 2.08, "grad_norm": 0.6447629332542419, "learning_rate": 0.00012890233084933937, "loss": 1.7414, "step": 62505 }, { "epoch": 2.08, "grad_norm": 0.654297947883606, "learning_rate": 0.00012889374538879616, "loss": 1.7227, "step": 62506 }, { "epoch": 2.08, "grad_norm": 0.6549244523048401, "learning_rate": 0.00012888516013595072, "loss": 1.7772, "step": 62507 }, { "epoch": 2.08, "grad_norm": 0.6325559020042419, "learning_rate": 0.00012887657509081337, "loss": 1.6835, "step": 62508 }, { "epoch": 2.08, "grad_norm": 0.6278195381164551, "learning_rate": 0.00012886799025339476, "loss": 1.7485, "step": 62509 }, { "epoch": 2.08, "grad_norm": 0.6152299642562866, "learning_rate": 0.00012885940562370508, "loss": 1.6844, "step": 62510 }, { "epoch": 2.08, "grad_norm": 0.610673725605011, "learning_rate": 0.00012885082120175494, "loss": 1.7195, "step": 62511 }, { "epoch": 2.08, "grad_norm": 0.6299711465835571, "learning_rate": 0.0001288422369875546, "loss": 1.7664, "step": 62512 }, { "epoch": 2.08, "grad_norm": 0.630638599395752, "learning_rate": 0.00012883365298111473, "loss": 1.6805, "step": 62513 }, { "epoch": 2.08, "grad_norm": 0.6619216799736023, "learning_rate": 0.00012882506918244553, "loss": 1.7396, "step": 62514 }, { "epoch": 2.08, "grad_norm": 0.6209167838096619, "learning_rate": 0.00012881648559155736, "loss": 1.7295, "step": 62515 }, { "epoch": 2.08, "grad_norm": 0.6476319432258606, "learning_rate": 0.0001288079022084608, "loss": 1.682, "step": 62516 }, { "epoch": 2.08, "grad_norm": 0.6412091851234436, "learning_rate": 0.00012879931903316637, "loss": 1.6256, "step": 62517 }, { "epoch": 2.08, "grad_norm": 0.6156301498413086, "learning_rate": 0.00012879073606568417, "loss": 1.7139, "step": 62518 }, { "epoch": 2.08, "grad_norm": 0.647270917892456, "learning_rate": 0.00012878215330602478, "loss": 1.59, "step": 62519 }, { "epoch": 2.08, "grad_norm": 0.6699581146240234, "learning_rate": 0.00012877357075419875, "loss": 1.6917, "step": 62520 }, { "epoch": 2.08, "grad_norm": 0.6351213455200195, "learning_rate": 0.00012876498841021637, "loss": 1.825, "step": 62521 }, { "epoch": 2.08, "grad_norm": 0.6664126515388489, "learning_rate": 0.00012875640627408794, "loss": 1.7665, "step": 62522 }, { "epoch": 2.08, "grad_norm": 0.6430489420890808, "learning_rate": 0.00012874782434582402, "loss": 1.724, "step": 62523 }, { "epoch": 2.08, "grad_norm": 0.6354692578315735, "learning_rate": 0.0001287392426254352, "loss": 1.7429, "step": 62524 }, { "epoch": 2.08, "grad_norm": 0.629406750202179, "learning_rate": 0.00012873066111293154, "loss": 1.7238, "step": 62525 }, { "epoch": 2.08, "grad_norm": 0.643791675567627, "learning_rate": 0.00012872207980832356, "loss": 1.7442, "step": 62526 }, { "epoch": 2.08, "grad_norm": 0.6371656060218811, "learning_rate": 0.00012871349871162188, "loss": 1.6777, "step": 62527 }, { "epoch": 2.08, "grad_norm": 0.6268234848976135, "learning_rate": 0.00012870491782283677, "loss": 1.6915, "step": 62528 }, { "epoch": 2.08, "grad_norm": 0.636721670627594, "learning_rate": 0.00012869633714197854, "loss": 1.7056, "step": 62529 }, { "epoch": 2.08, "grad_norm": 0.6277191042900085, "learning_rate": 0.00012868775666905773, "loss": 1.7428, "step": 62530 }, { "epoch": 2.08, "grad_norm": 0.6650472283363342, "learning_rate": 0.00012867917640408495, "loss": 1.5907, "step": 62531 }, { "epoch": 2.08, "grad_norm": 0.6312346458435059, "learning_rate": 0.00012867059634707017, "loss": 1.7238, "step": 62532 }, { "epoch": 2.08, "grad_norm": 0.6240113973617554, "learning_rate": 0.00012866201649802407, "loss": 1.6923, "step": 62533 }, { "epoch": 2.08, "grad_norm": 0.6265661716461182, "learning_rate": 0.00012865343685695713, "loss": 1.712, "step": 62534 }, { "epoch": 2.08, "grad_norm": 0.6350154280662537, "learning_rate": 0.00012864485742387967, "loss": 1.7512, "step": 62535 }, { "epoch": 2.08, "grad_norm": 0.6357619166374207, "learning_rate": 0.000128636278198802, "loss": 1.7671, "step": 62536 }, { "epoch": 2.08, "grad_norm": 0.6095552444458008, "learning_rate": 0.00012862769918173478, "loss": 1.707, "step": 62537 }, { "epoch": 2.08, "grad_norm": 0.630224883556366, "learning_rate": 0.00012861912037268823, "loss": 1.7486, "step": 62538 }, { "epoch": 2.08, "grad_norm": 0.6208720803260803, "learning_rate": 0.00012861054177167275, "loss": 1.7944, "step": 62539 }, { "epoch": 2.08, "grad_norm": 0.624516487121582, "learning_rate": 0.00012860196337869878, "loss": 1.7166, "step": 62540 }, { "epoch": 2.08, "grad_norm": 0.62126624584198, "learning_rate": 0.00012859338519377692, "loss": 1.6891, "step": 62541 }, { "epoch": 2.08, "grad_norm": 0.6276116371154785, "learning_rate": 0.0001285848072169174, "loss": 1.6865, "step": 62542 }, { "epoch": 2.08, "grad_norm": 0.6365664005279541, "learning_rate": 0.00012857622944813056, "loss": 1.686, "step": 62543 }, { "epoch": 2.08, "grad_norm": 0.6609490513801575, "learning_rate": 0.00012856765188742705, "loss": 1.7721, "step": 62544 }, { "epoch": 2.08, "grad_norm": 0.6352582573890686, "learning_rate": 0.00012855907453481713, "loss": 1.6328, "step": 62545 }, { "epoch": 2.08, "grad_norm": 0.6138311624526978, "learning_rate": 0.00012855049739031113, "loss": 1.6771, "step": 62546 }, { "epoch": 2.08, "grad_norm": 0.6155233979225159, "learning_rate": 0.00012854192045391968, "loss": 1.7106, "step": 62547 }, { "epoch": 2.08, "grad_norm": 0.6193318963050842, "learning_rate": 0.00012853334372565299, "loss": 1.653, "step": 62548 }, { "epoch": 2.08, "grad_norm": 0.6034110188484192, "learning_rate": 0.00012852476720552164, "loss": 1.7289, "step": 62549 }, { "epoch": 2.08, "grad_norm": 0.6478765606880188, "learning_rate": 0.00012851619089353583, "loss": 1.6738, "step": 62550 }, { "epoch": 2.08, "grad_norm": 0.6619967222213745, "learning_rate": 0.0001285076147897062, "loss": 1.6517, "step": 62551 }, { "epoch": 2.08, "grad_norm": 0.6086904406547546, "learning_rate": 0.00012849903889404311, "loss": 1.7111, "step": 62552 }, { "epoch": 2.08, "grad_norm": 0.649864137172699, "learning_rate": 0.00012849046320655678, "loss": 1.6712, "step": 62553 }, { "epoch": 2.08, "grad_norm": 0.6565210819244385, "learning_rate": 0.00012848188772725787, "loss": 1.7464, "step": 62554 }, { "epoch": 2.08, "grad_norm": 0.6294956207275391, "learning_rate": 0.00012847331245615656, "loss": 1.6694, "step": 62555 }, { "epoch": 2.08, "grad_norm": 0.6447566151618958, "learning_rate": 0.0001284647373932635, "loss": 1.6984, "step": 62556 }, { "epoch": 2.08, "grad_norm": 0.6221912503242493, "learning_rate": 0.00012845616253858897, "loss": 1.685, "step": 62557 }, { "epoch": 2.08, "grad_norm": 0.6368712186813354, "learning_rate": 0.00012844758789214324, "loss": 1.6959, "step": 62558 }, { "epoch": 2.08, "grad_norm": 0.6324202418327332, "learning_rate": 0.000128439013453937, "loss": 1.6459, "step": 62559 }, { "epoch": 2.08, "grad_norm": 0.6213163733482361, "learning_rate": 0.00012843043922398042, "loss": 1.7223, "step": 62560 }, { "epoch": 2.08, "grad_norm": 0.6235828399658203, "learning_rate": 0.0001284218652022841, "loss": 1.7515, "step": 62561 }, { "epoch": 2.08, "grad_norm": 0.6226080060005188, "learning_rate": 0.00012841329138885824, "loss": 1.7504, "step": 62562 }, { "epoch": 2.08, "grad_norm": 0.6257824301719666, "learning_rate": 0.0001284047177837135, "loss": 1.8011, "step": 62563 }, { "epoch": 2.08, "grad_norm": 0.6411236524581909, "learning_rate": 0.0001283961443868601, "loss": 1.7126, "step": 62564 }, { "epoch": 2.08, "grad_norm": 0.6253783702850342, "learning_rate": 0.00012838757119830845, "loss": 1.732, "step": 62565 }, { "epoch": 2.08, "grad_norm": 0.6358214616775513, "learning_rate": 0.00012837899821806906, "loss": 1.7215, "step": 62566 }, { "epoch": 2.08, "grad_norm": 0.6176518797874451, "learning_rate": 0.00012837042544615229, "loss": 1.7542, "step": 62567 }, { "epoch": 2.08, "grad_norm": 0.624377429485321, "learning_rate": 0.00012836185288256846, "loss": 1.7469, "step": 62568 }, { "epoch": 2.08, "grad_norm": 0.6454678177833557, "learning_rate": 0.000128353280527328, "loss": 1.6898, "step": 62569 }, { "epoch": 2.08, "grad_norm": 0.6273975968360901, "learning_rate": 0.0001283447083804415, "loss": 1.7475, "step": 62570 }, { "epoch": 2.08, "grad_norm": 0.6442764401435852, "learning_rate": 0.00012833613644191926, "loss": 1.7968, "step": 62571 }, { "epoch": 2.08, "grad_norm": 0.609808623790741, "learning_rate": 0.0001283275647117715, "loss": 1.6461, "step": 62572 }, { "epoch": 2.08, "grad_norm": 0.6044608950614929, "learning_rate": 0.0001283189931900089, "loss": 1.695, "step": 62573 }, { "epoch": 2.08, "grad_norm": 0.6061815619468689, "learning_rate": 0.00012831042187664177, "loss": 1.6801, "step": 62574 }, { "epoch": 2.08, "grad_norm": 0.6107978224754333, "learning_rate": 0.00012830185077168036, "loss": 1.6758, "step": 62575 }, { "epoch": 2.08, "grad_norm": 0.627400279045105, "learning_rate": 0.0001282932798751352, "loss": 1.6747, "step": 62576 }, { "epoch": 2.08, "grad_norm": 0.6280922293663025, "learning_rate": 0.00012828470918701697, "loss": 1.748, "step": 62577 }, { "epoch": 2.08, "grad_norm": 0.632111668586731, "learning_rate": 0.0001282761387073355, "loss": 1.686, "step": 62578 }, { "epoch": 2.08, "grad_norm": 0.6399752497673035, "learning_rate": 0.00012826756843610154, "loss": 1.6576, "step": 62579 }, { "epoch": 2.08, "grad_norm": 0.6286644339561462, "learning_rate": 0.00012825899837332554, "loss": 1.672, "step": 62580 }, { "epoch": 2.08, "grad_norm": 0.6484835743904114, "learning_rate": 0.00012825042851901785, "loss": 1.6487, "step": 62581 }, { "epoch": 2.08, "grad_norm": 0.6601134538650513, "learning_rate": 0.0001282418588731887, "loss": 1.765, "step": 62582 }, { "epoch": 2.08, "grad_norm": 0.6075013279914856, "learning_rate": 0.00012823328943584864, "loss": 1.6694, "step": 62583 }, { "epoch": 2.08, "grad_norm": 0.6465925574302673, "learning_rate": 0.00012822472020700826, "loss": 1.7916, "step": 62584 }, { "epoch": 2.08, "grad_norm": 0.6354616284370422, "learning_rate": 0.00012821615118667755, "loss": 1.6738, "step": 62585 }, { "epoch": 2.08, "grad_norm": 0.6397051811218262, "learning_rate": 0.0001282075823748671, "loss": 1.6913, "step": 62586 }, { "epoch": 2.08, "grad_norm": 0.6363564729690552, "learning_rate": 0.00012819901377158745, "loss": 1.7007, "step": 62587 }, { "epoch": 2.08, "grad_norm": 0.6213715672492981, "learning_rate": 0.0001281904453768489, "loss": 1.6792, "step": 62588 }, { "epoch": 2.08, "grad_norm": 0.6324080228805542, "learning_rate": 0.0001281818771906617, "loss": 1.7179, "step": 62589 }, { "epoch": 2.08, "grad_norm": 0.6453632712364197, "learning_rate": 0.00012817330921303642, "loss": 1.7847, "step": 62590 }, { "epoch": 2.08, "grad_norm": 0.6194164752960205, "learning_rate": 0.00012816474144398363, "loss": 1.6432, "step": 62591 }, { "epoch": 2.08, "grad_norm": 0.6478091478347778, "learning_rate": 0.00012815617388351328, "loss": 1.6588, "step": 62592 }, { "epoch": 2.08, "grad_norm": 0.639841616153717, "learning_rate": 0.00012814760653163606, "loss": 1.7617, "step": 62593 }, { "epoch": 2.08, "grad_norm": 0.6183687448501587, "learning_rate": 0.0001281390393883624, "loss": 1.7133, "step": 62594 }, { "epoch": 2.08, "grad_norm": 0.6263116002082825, "learning_rate": 0.00012813047245370263, "loss": 1.7348, "step": 62595 }, { "epoch": 2.08, "grad_norm": 0.6354742646217346, "learning_rate": 0.00012812190572766705, "loss": 1.6791, "step": 62596 }, { "epoch": 2.08, "grad_norm": 0.623262345790863, "learning_rate": 0.00012811333921026622, "loss": 1.7604, "step": 62597 }, { "epoch": 2.08, "grad_norm": 0.6096886396408081, "learning_rate": 0.0001281047729015105, "loss": 1.6801, "step": 62598 }, { "epoch": 2.08, "grad_norm": 0.6465634107589722, "learning_rate": 0.00012809620680141019, "loss": 1.7484, "step": 62599 }, { "epoch": 2.08, "grad_norm": 0.6379911303520203, "learning_rate": 0.0001280876409099757, "loss": 1.6554, "step": 62600 }, { "epoch": 2.08, "grad_norm": 0.6269594430923462, "learning_rate": 0.0001280790752272176, "loss": 1.7346, "step": 62601 }, { "epoch": 2.08, "grad_norm": 0.6521380543708801, "learning_rate": 0.00012807050975314617, "loss": 1.7711, "step": 62602 }, { "epoch": 2.08, "grad_norm": 0.6146534085273743, "learning_rate": 0.00012806194448777172, "loss": 1.6961, "step": 62603 }, { "epoch": 2.08, "grad_norm": 0.6293632984161377, "learning_rate": 0.00012805337943110482, "loss": 1.6842, "step": 62604 }, { "epoch": 2.08, "grad_norm": 0.6195942759513855, "learning_rate": 0.0001280448145831558, "loss": 1.7075, "step": 62605 }, { "epoch": 2.08, "grad_norm": 0.6419922113418579, "learning_rate": 0.00012803624994393493, "loss": 1.697, "step": 62606 }, { "epoch": 2.08, "grad_norm": 0.6404669284820557, "learning_rate": 0.0001280276855134528, "loss": 1.7327, "step": 62607 }, { "epoch": 2.08, "grad_norm": 0.6534096002578735, "learning_rate": 0.00012801912129171964, "loss": 1.7443, "step": 62608 }, { "epoch": 2.08, "grad_norm": 0.6234692931175232, "learning_rate": 0.00012801055727874603, "loss": 1.6561, "step": 62609 }, { "epoch": 2.08, "grad_norm": 0.6236074566841125, "learning_rate": 0.00012800199347454214, "loss": 1.7204, "step": 62610 }, { "epoch": 2.08, "grad_norm": 0.6135203838348389, "learning_rate": 0.0001279934298791186, "loss": 1.6955, "step": 62611 }, { "epoch": 2.08, "grad_norm": 0.6288707852363586, "learning_rate": 0.0001279848664924857, "loss": 1.6815, "step": 62612 }, { "epoch": 2.08, "grad_norm": 0.6090608239173889, "learning_rate": 0.00012797630331465372, "loss": 1.6743, "step": 62613 }, { "epoch": 2.08, "grad_norm": 0.6565486788749695, "learning_rate": 0.00012796774034563326, "loss": 1.688, "step": 62614 }, { "epoch": 2.08, "grad_norm": 0.6400554776191711, "learning_rate": 0.0001279591775854345, "loss": 1.7904, "step": 62615 }, { "epoch": 2.08, "grad_norm": 0.6264108419418335, "learning_rate": 0.00012795061503406805, "loss": 1.7558, "step": 62616 }, { "epoch": 2.08, "grad_norm": 0.643235445022583, "learning_rate": 0.00012794205269154427, "loss": 1.6741, "step": 62617 }, { "epoch": 2.08, "grad_norm": 0.6443129181861877, "learning_rate": 0.0001279334905578733, "loss": 1.6604, "step": 62618 }, { "epoch": 2.08, "grad_norm": 0.6082738637924194, "learning_rate": 0.00012792492863306586, "loss": 1.6953, "step": 62619 }, { "epoch": 2.08, "grad_norm": 0.6246596574783325, "learning_rate": 0.0001279163669171321, "loss": 1.7415, "step": 62620 }, { "epoch": 2.08, "grad_norm": 0.6158429980278015, "learning_rate": 0.0001279078054100826, "loss": 1.6759, "step": 62621 }, { "epoch": 2.08, "grad_norm": 0.6263644099235535, "learning_rate": 0.0001278992441119276, "loss": 1.6922, "step": 62622 }, { "epoch": 2.08, "grad_norm": 0.6214290261268616, "learning_rate": 0.00012789068302267764, "loss": 1.7193, "step": 62623 }, { "epoch": 2.08, "grad_norm": 0.6390082836151123, "learning_rate": 0.000127882122142343, "loss": 1.698, "step": 62624 }, { "epoch": 2.08, "grad_norm": 0.6166698336601257, "learning_rate": 0.00012787356147093406, "loss": 1.7007, "step": 62625 }, { "epoch": 2.08, "grad_norm": 0.6316641569137573, "learning_rate": 0.00012786500100846134, "loss": 1.7297, "step": 62626 }, { "epoch": 2.08, "grad_norm": 0.6169258952140808, "learning_rate": 0.00012785644075493502, "loss": 1.6642, "step": 62627 }, { "epoch": 2.08, "grad_norm": 0.6161374449729919, "learning_rate": 0.00012784788071036574, "loss": 1.7296, "step": 62628 }, { "epoch": 2.08, "grad_norm": 0.6266903281211853, "learning_rate": 0.00012783932087476364, "loss": 1.6842, "step": 62629 }, { "epoch": 2.08, "grad_norm": 0.6259377598762512, "learning_rate": 0.00012783076124813937, "loss": 1.7743, "step": 62630 }, { "epoch": 2.08, "grad_norm": 0.6252423524856567, "learning_rate": 0.0001278222018305032, "loss": 1.6532, "step": 62631 }, { "epoch": 2.08, "grad_norm": 0.6143120527267456, "learning_rate": 0.00012781364262186534, "loss": 1.6688, "step": 62632 }, { "epoch": 2.08, "grad_norm": 0.6350704431533813, "learning_rate": 0.0001278050836222364, "loss": 1.677, "step": 62633 }, { "epoch": 2.08, "grad_norm": 0.617486834526062, "learning_rate": 0.00012779652483162692, "loss": 1.7121, "step": 62634 }, { "epoch": 2.08, "grad_norm": 0.6457192301750183, "learning_rate": 0.00012778796625004685, "loss": 1.7116, "step": 62635 }, { "epoch": 2.08, "grad_norm": 0.6600388288497925, "learning_rate": 0.00012777940787750685, "loss": 1.7102, "step": 62636 }, { "epoch": 2.08, "grad_norm": 0.628354012966156, "learning_rate": 0.00012777084971401736, "loss": 1.7302, "step": 62637 }, { "epoch": 2.08, "grad_norm": 0.6245472431182861, "learning_rate": 0.00012776229175958867, "loss": 1.67, "step": 62638 }, { "epoch": 2.08, "grad_norm": 0.6231604218482971, "learning_rate": 0.00012775373401423108, "loss": 1.6765, "step": 62639 }, { "epoch": 2.08, "grad_norm": 0.6261289715766907, "learning_rate": 0.00012774517647795512, "loss": 1.6933, "step": 62640 }, { "epoch": 2.08, "grad_norm": 0.6213179230690002, "learning_rate": 0.0001277366191507713, "loss": 1.6957, "step": 62641 }, { "epoch": 2.08, "grad_norm": 0.608814001083374, "learning_rate": 0.00012772806203268966, "loss": 1.7424, "step": 62642 }, { "epoch": 2.08, "grad_norm": 0.6651068925857544, "learning_rate": 0.00012771950512372075, "loss": 1.7245, "step": 62643 }, { "epoch": 2.08, "grad_norm": 0.639438807964325, "learning_rate": 0.0001277109484238751, "loss": 1.7358, "step": 62644 }, { "epoch": 2.08, "grad_norm": 0.6197793483734131, "learning_rate": 0.00012770239193316294, "loss": 1.6953, "step": 62645 }, { "epoch": 2.08, "grad_norm": 0.6205458045005798, "learning_rate": 0.0001276938356515946, "loss": 1.7618, "step": 62646 }, { "epoch": 2.08, "grad_norm": 0.6174835562705994, "learning_rate": 0.00012768527957918055, "loss": 1.6962, "step": 62647 }, { "epoch": 2.08, "grad_norm": 0.6138362288475037, "learning_rate": 0.0001276767237159314, "loss": 1.6683, "step": 62648 }, { "epoch": 2.08, "grad_norm": 0.6429693698883057, "learning_rate": 0.0001276681680618571, "loss": 1.7157, "step": 62649 }, { "epoch": 2.08, "grad_norm": 0.6161854863166809, "learning_rate": 0.00012765961261696827, "loss": 1.7301, "step": 62650 }, { "epoch": 2.08, "grad_norm": 0.6454472541809082, "learning_rate": 0.00012765105738127538, "loss": 1.7082, "step": 62651 }, { "epoch": 2.08, "grad_norm": 0.6232296824455261, "learning_rate": 0.0001276425023547887, "loss": 1.6809, "step": 62652 }, { "epoch": 2.08, "grad_norm": 0.6332927346229553, "learning_rate": 0.00012763394753751852, "loss": 1.6979, "step": 62653 }, { "epoch": 2.08, "grad_norm": 0.6166622638702393, "learning_rate": 0.00012762539292947544, "loss": 1.6035, "step": 62654 }, { "epoch": 2.08, "grad_norm": 0.6130394339561462, "learning_rate": 0.00012761683853066972, "loss": 1.6807, "step": 62655 }, { "epoch": 2.08, "grad_norm": 0.609515905380249, "learning_rate": 0.0001276082843411117, "loss": 1.7399, "step": 62656 }, { "epoch": 2.08, "grad_norm": 0.6077887415885925, "learning_rate": 0.00012759973036081178, "loss": 1.6966, "step": 62657 }, { "epoch": 2.08, "grad_norm": 0.6178797483444214, "learning_rate": 0.0001275911765897805, "loss": 1.5871, "step": 62658 }, { "epoch": 2.08, "grad_norm": 0.6241347193717957, "learning_rate": 0.00012758262302802815, "loss": 1.62, "step": 62659 }, { "epoch": 2.08, "grad_norm": 0.6216477155685425, "learning_rate": 0.00012757406967556494, "loss": 1.7548, "step": 62660 }, { "epoch": 2.08, "grad_norm": 0.6107281446456909, "learning_rate": 0.00012756551653240154, "loss": 1.6794, "step": 62661 }, { "epoch": 2.08, "grad_norm": 0.6226685643196106, "learning_rate": 0.0001275569635985482, "loss": 1.6806, "step": 62662 }, { "epoch": 2.08, "grad_norm": 0.6115248203277588, "learning_rate": 0.00012754841087401516, "loss": 1.6936, "step": 62663 }, { "epoch": 2.08, "grad_norm": 0.6231907606124878, "learning_rate": 0.0001275398583588131, "loss": 1.763, "step": 62664 }, { "epoch": 2.08, "grad_norm": 0.6477262377738953, "learning_rate": 0.0001275313060529521, "loss": 1.7492, "step": 62665 }, { "epoch": 2.08, "grad_norm": 0.6360146403312683, "learning_rate": 0.00012752275395644275, "loss": 1.6859, "step": 62666 }, { "epoch": 2.08, "grad_norm": 0.6130068302154541, "learning_rate": 0.00012751420206929534, "loss": 1.7132, "step": 62667 }, { "epoch": 2.08, "grad_norm": 0.632502019405365, "learning_rate": 0.0001275056503915203, "loss": 1.6886, "step": 62668 }, { "epoch": 2.09, "grad_norm": 0.6417232751846313, "learning_rate": 0.00012749709892312803, "loss": 1.708, "step": 62669 }, { "epoch": 2.09, "grad_norm": 0.6224957704544067, "learning_rate": 0.00012748854766412877, "loss": 1.7212, "step": 62670 }, { "epoch": 2.09, "grad_norm": 0.6999971270561218, "learning_rate": 0.00012747999661453306, "loss": 1.7247, "step": 62671 }, { "epoch": 2.09, "grad_norm": 0.6505081653594971, "learning_rate": 0.00012747144577435115, "loss": 1.7672, "step": 62672 }, { "epoch": 2.09, "grad_norm": 0.6091455221176147, "learning_rate": 0.00012746289514359356, "loss": 1.6972, "step": 62673 }, { "epoch": 2.09, "grad_norm": 0.6151720285415649, "learning_rate": 0.0001274543447222706, "loss": 1.6333, "step": 62674 }, { "epoch": 2.09, "grad_norm": 0.6413775682449341, "learning_rate": 0.00012744579451039255, "loss": 1.7375, "step": 62675 }, { "epoch": 2.09, "grad_norm": 0.6287107467651367, "learning_rate": 0.00012743724450796996, "loss": 1.6856, "step": 62676 }, { "epoch": 2.09, "grad_norm": 0.6267780065536499, "learning_rate": 0.00012742869471501304, "loss": 1.7403, "step": 62677 }, { "epoch": 2.09, "grad_norm": 0.6414621472358704, "learning_rate": 0.00012742014513153236, "loss": 1.7743, "step": 62678 }, { "epoch": 2.09, "grad_norm": 1.1349676847457886, "learning_rate": 0.00012741159575753813, "loss": 1.8149, "step": 62679 }, { "epoch": 2.09, "grad_norm": 0.6222999691963196, "learning_rate": 0.00012740304659304087, "loss": 1.7416, "step": 62680 }, { "epoch": 2.09, "grad_norm": 0.6138064861297607, "learning_rate": 0.00012739449763805085, "loss": 1.6768, "step": 62681 }, { "epoch": 2.09, "grad_norm": 0.6240801811218262, "learning_rate": 0.0001273859488925784, "loss": 1.6758, "step": 62682 }, { "epoch": 2.09, "grad_norm": 0.632124125957489, "learning_rate": 0.00012737740035663407, "loss": 1.7098, "step": 62683 }, { "epoch": 2.09, "grad_norm": 0.6284591555595398, "learning_rate": 0.00012736885203022815, "loss": 1.7032, "step": 62684 }, { "epoch": 2.09, "grad_norm": 0.6297531127929688, "learning_rate": 0.0001273603039133709, "loss": 1.7004, "step": 62685 }, { "epoch": 2.09, "grad_norm": 0.6068007349967957, "learning_rate": 0.00012735175600607282, "loss": 1.7309, "step": 62686 }, { "epoch": 2.09, "grad_norm": 0.6253106594085693, "learning_rate": 0.00012734320830834432, "loss": 1.6757, "step": 62687 }, { "epoch": 2.09, "grad_norm": 0.6293894648551941, "learning_rate": 0.0001273346608201958, "loss": 1.747, "step": 62688 }, { "epoch": 2.09, "grad_norm": 0.6451623439788818, "learning_rate": 0.0001273261135416374, "loss": 1.7441, "step": 62689 }, { "epoch": 2.09, "grad_norm": 0.6384947299957275, "learning_rate": 0.00012731756647267977, "loss": 1.7495, "step": 62690 }, { "epoch": 2.09, "grad_norm": 0.6153325438499451, "learning_rate": 0.00012730901961333318, "loss": 1.7022, "step": 62691 }, { "epoch": 2.09, "grad_norm": 0.6348295211791992, "learning_rate": 0.0001273004729636079, "loss": 1.6804, "step": 62692 }, { "epoch": 2.09, "grad_norm": 0.6199361085891724, "learning_rate": 0.00012729192652351438, "loss": 1.7393, "step": 62693 }, { "epoch": 2.09, "grad_norm": 0.6203721761703491, "learning_rate": 0.00012728338029306322, "loss": 1.7378, "step": 62694 }, { "epoch": 2.09, "grad_norm": 0.6490267515182495, "learning_rate": 0.0001272748342722644, "loss": 1.6932, "step": 62695 }, { "epoch": 2.09, "grad_norm": 0.6306833624839783, "learning_rate": 0.00012726628846112848, "loss": 1.7765, "step": 62696 }, { "epoch": 2.09, "grad_norm": 0.6271101832389832, "learning_rate": 0.00012725774285966593, "loss": 1.621, "step": 62697 }, { "epoch": 2.09, "grad_norm": 0.6315069794654846, "learning_rate": 0.00012724919746788703, "loss": 1.7382, "step": 62698 }, { "epoch": 2.09, "grad_norm": 0.6401116251945496, "learning_rate": 0.00012724065228580204, "loss": 1.6952, "step": 62699 }, { "epoch": 2.09, "grad_norm": 0.6344788670539856, "learning_rate": 0.00012723210731342146, "loss": 1.714, "step": 62700 }, { "epoch": 2.09, "grad_norm": 0.6213399767875671, "learning_rate": 0.00012722356255075583, "loss": 1.6759, "step": 62701 }, { "epoch": 2.09, "grad_norm": 0.6620875597000122, "learning_rate": 0.00012721501799781515, "loss": 1.6499, "step": 62702 }, { "epoch": 2.09, "grad_norm": 0.6325422525405884, "learning_rate": 0.00012720647365461, "loss": 1.6607, "step": 62703 }, { "epoch": 2.09, "grad_norm": 0.6203202605247498, "learning_rate": 0.00012719792952115076, "loss": 1.6789, "step": 62704 }, { "epoch": 2.09, "grad_norm": 0.6297844052314758, "learning_rate": 0.00012718938559744784, "loss": 1.6713, "step": 62705 }, { "epoch": 2.09, "grad_norm": 0.6288349032402039, "learning_rate": 0.0001271808418835114, "loss": 1.7059, "step": 62706 }, { "epoch": 2.09, "grad_norm": 0.6292918920516968, "learning_rate": 0.00012717229837935198, "loss": 1.7849, "step": 62707 }, { "epoch": 2.09, "grad_norm": 0.6355923414230347, "learning_rate": 0.00012716375508498013, "loss": 1.7087, "step": 62708 }, { "epoch": 2.09, "grad_norm": 0.621624231338501, "learning_rate": 0.0001271552120004058, "loss": 1.7027, "step": 62709 }, { "epoch": 2.09, "grad_norm": 0.6294960379600525, "learning_rate": 0.00012714666912563957, "loss": 1.7425, "step": 62710 }, { "epoch": 2.09, "grad_norm": 0.624287486076355, "learning_rate": 0.00012713812646069194, "loss": 1.7739, "step": 62711 }, { "epoch": 2.09, "grad_norm": 0.6305856108665466, "learning_rate": 0.00012712958400557314, "loss": 1.7315, "step": 62712 }, { "epoch": 2.09, "grad_norm": 0.6231840252876282, "learning_rate": 0.00012712104176029344, "loss": 1.7251, "step": 62713 }, { "epoch": 2.09, "grad_norm": 0.6291132569313049, "learning_rate": 0.00012711249972486346, "loss": 1.7792, "step": 62714 }, { "epoch": 2.09, "grad_norm": 0.6109665632247925, "learning_rate": 0.0001271039578992934, "loss": 1.6505, "step": 62715 }, { "epoch": 2.09, "grad_norm": 0.6187404990196228, "learning_rate": 0.00012709541628359356, "loss": 1.7133, "step": 62716 }, { "epoch": 2.09, "grad_norm": 0.6426220536231995, "learning_rate": 0.00012708687487777442, "loss": 1.7009, "step": 62717 }, { "epoch": 2.09, "grad_norm": 0.6360853314399719, "learning_rate": 0.00012707833368184643, "loss": 1.6354, "step": 62718 }, { "epoch": 2.09, "grad_norm": 0.6206450462341309, "learning_rate": 0.0001270697926958199, "loss": 1.7491, "step": 62719 }, { "epoch": 2.09, "grad_norm": 0.6310251355171204, "learning_rate": 0.000127061251919705, "loss": 1.7266, "step": 62720 }, { "epoch": 2.09, "grad_norm": 0.6276689767837524, "learning_rate": 0.0001270527113535124, "loss": 1.7494, "step": 62721 }, { "epoch": 2.09, "grad_norm": 0.6256211996078491, "learning_rate": 0.0001270441709972523, "loss": 1.6823, "step": 62722 }, { "epoch": 2.09, "grad_norm": 0.6151105165481567, "learning_rate": 0.000127035630850935, "loss": 1.6268, "step": 62723 }, { "epoch": 2.09, "grad_norm": 0.6134063601493835, "learning_rate": 0.00012702709091457108, "loss": 1.7084, "step": 62724 }, { "epoch": 2.09, "grad_norm": 0.6267198324203491, "learning_rate": 0.00012701855118817064, "loss": 1.6971, "step": 62725 }, { "epoch": 2.09, "grad_norm": 0.6290532350540161, "learning_rate": 0.00012701001167174437, "loss": 1.6992, "step": 62726 }, { "epoch": 2.09, "grad_norm": 0.6280941367149353, "learning_rate": 0.00012700147236530227, "loss": 1.6544, "step": 62727 }, { "epoch": 2.09, "grad_norm": 0.6370776295661926, "learning_rate": 0.00012699293326885503, "loss": 1.63, "step": 62728 }, { "epoch": 2.09, "grad_norm": 0.6201872825622559, "learning_rate": 0.00012698439438241292, "loss": 1.7239, "step": 62729 }, { "epoch": 2.09, "grad_norm": 0.6444845199584961, "learning_rate": 0.0001269758557059861, "loss": 1.6267, "step": 62730 }, { "epoch": 2.09, "grad_norm": 0.6358699202537537, "learning_rate": 0.0001269673172395852, "loss": 1.7241, "step": 62731 }, { "epoch": 2.09, "grad_norm": 0.6457214951515198, "learning_rate": 0.00012695877898322042, "loss": 1.6343, "step": 62732 }, { "epoch": 2.09, "grad_norm": 0.6144652366638184, "learning_rate": 0.00012695024093690226, "loss": 1.7534, "step": 62733 }, { "epoch": 2.09, "grad_norm": 0.6215337514877319, "learning_rate": 0.000126941703100641, "loss": 1.7042, "step": 62734 }, { "epoch": 2.09, "grad_norm": 0.6306843161582947, "learning_rate": 0.00012693316547444694, "loss": 1.7422, "step": 62735 }, { "epoch": 2.09, "grad_norm": 0.6376821994781494, "learning_rate": 0.0001269246280583306, "loss": 1.685, "step": 62736 }, { "epoch": 2.09, "grad_norm": 0.6264235973358154, "learning_rate": 0.0001269160908523022, "loss": 1.7326, "step": 62737 }, { "epoch": 2.09, "grad_norm": 0.6363928318023682, "learning_rate": 0.00012690755385637222, "loss": 1.6602, "step": 62738 }, { "epoch": 2.09, "grad_norm": 0.635423481464386, "learning_rate": 0.00012689901707055087, "loss": 1.7088, "step": 62739 }, { "epoch": 2.09, "grad_norm": 0.6404872536659241, "learning_rate": 0.00012689048049484872, "loss": 1.7235, "step": 62740 }, { "epoch": 2.09, "grad_norm": 0.6262311339378357, "learning_rate": 0.000126881944129276, "loss": 1.7101, "step": 62741 }, { "epoch": 2.09, "grad_norm": 0.6391382217407227, "learning_rate": 0.000126873407973843, "loss": 1.7096, "step": 62742 }, { "epoch": 2.09, "grad_norm": 0.6585995554924011, "learning_rate": 0.0001268648720285603, "loss": 1.7123, "step": 62743 }, { "epoch": 2.09, "grad_norm": 0.6331838369369507, "learning_rate": 0.00012685633629343798, "loss": 1.6524, "step": 62744 }, { "epoch": 2.09, "grad_norm": 0.6396482586860657, "learning_rate": 0.0001268478007684867, "loss": 1.7437, "step": 62745 }, { "epoch": 2.09, "grad_norm": 0.6287227869033813, "learning_rate": 0.00012683926545371658, "loss": 1.6418, "step": 62746 }, { "epoch": 2.09, "grad_norm": 0.6233346462249756, "learning_rate": 0.00012683073034913813, "loss": 1.6532, "step": 62747 }, { "epoch": 2.09, "grad_norm": 0.6346569657325745, "learning_rate": 0.00012682219545476174, "loss": 1.725, "step": 62748 }, { "epoch": 2.09, "grad_norm": 0.6226746439933777, "learning_rate": 0.0001268136607705975, "loss": 1.7243, "step": 62749 }, { "epoch": 2.09, "grad_norm": 0.6106583476066589, "learning_rate": 0.000126805126296656, "loss": 1.6989, "step": 62750 }, { "epoch": 2.09, "grad_norm": 0.6320390105247498, "learning_rate": 0.0001267965920329478, "loss": 1.6746, "step": 62751 }, { "epoch": 2.09, "grad_norm": 0.6277555227279663, "learning_rate": 0.00012678805797948275, "loss": 1.6956, "step": 62752 }, { "epoch": 2.09, "grad_norm": 0.6399627923965454, "learning_rate": 0.00012677952413627148, "loss": 1.7092, "step": 62753 }, { "epoch": 2.09, "grad_norm": 0.6588811874389648, "learning_rate": 0.00012677099050332447, "loss": 1.6882, "step": 62754 }, { "epoch": 2.09, "grad_norm": 0.6310097575187683, "learning_rate": 0.00012676245708065196, "loss": 1.7765, "step": 62755 }, { "epoch": 2.09, "grad_norm": 0.6300686001777649, "learning_rate": 0.00012675392386826418, "loss": 1.6621, "step": 62756 }, { "epoch": 2.09, "grad_norm": 0.637151300907135, "learning_rate": 0.00012674539086617162, "loss": 1.6944, "step": 62757 }, { "epoch": 2.09, "grad_norm": 0.6207499504089355, "learning_rate": 0.00012673685807438484, "loss": 1.6711, "step": 62758 }, { "epoch": 2.09, "grad_norm": 0.6432602405548096, "learning_rate": 0.00012672832549291376, "loss": 1.7676, "step": 62759 }, { "epoch": 2.09, "grad_norm": 0.6309822797775269, "learning_rate": 0.00012671979312176896, "loss": 1.742, "step": 62760 }, { "epoch": 2.09, "grad_norm": 0.63311767578125, "learning_rate": 0.00012671126096096093, "loss": 1.7202, "step": 62761 }, { "epoch": 2.09, "grad_norm": 0.6260786056518555, "learning_rate": 0.0001267027290104999, "loss": 1.6493, "step": 62762 }, { "epoch": 2.09, "grad_norm": 0.6636794805526733, "learning_rate": 0.0001266941972703961, "loss": 1.7805, "step": 62763 }, { "epoch": 2.09, "grad_norm": 0.648815393447876, "learning_rate": 0.00012668566574066, "loss": 1.7764, "step": 62764 }, { "epoch": 2.09, "grad_norm": 0.6231383681297302, "learning_rate": 0.0001266771344213022, "loss": 1.6837, "step": 62765 }, { "epoch": 2.09, "grad_norm": 0.6232311725616455, "learning_rate": 0.00012666860331233257, "loss": 1.6304, "step": 62766 }, { "epoch": 2.09, "grad_norm": 0.6232823133468628, "learning_rate": 0.00012666007241376174, "loss": 1.702, "step": 62767 }, { "epoch": 2.09, "grad_norm": 0.615283727645874, "learning_rate": 0.00012665154172560015, "loss": 1.7155, "step": 62768 }, { "epoch": 2.09, "grad_norm": 0.6421821117401123, "learning_rate": 0.00012664301124785807, "loss": 1.7144, "step": 62769 }, { "epoch": 2.09, "grad_norm": 0.6236143112182617, "learning_rate": 0.00012663448098054568, "loss": 1.6958, "step": 62770 }, { "epoch": 2.09, "grad_norm": 0.6261329650878906, "learning_rate": 0.00012662595092367363, "loss": 1.663, "step": 62771 }, { "epoch": 2.09, "grad_norm": 0.6181532740592957, "learning_rate": 0.00012661742107725212, "loss": 1.7661, "step": 62772 }, { "epoch": 2.09, "grad_norm": 0.622606098651886, "learning_rate": 0.00012660889144129137, "loss": 1.6891, "step": 62773 }, { "epoch": 2.09, "grad_norm": 0.6209208369255066, "learning_rate": 0.00012660036201580193, "loss": 1.7229, "step": 62774 }, { "epoch": 2.09, "grad_norm": 0.632085919380188, "learning_rate": 0.00012659183280079418, "loss": 1.7263, "step": 62775 }, { "epoch": 2.09, "grad_norm": 0.6388049721717834, "learning_rate": 0.00012658330379627838, "loss": 1.6831, "step": 62776 }, { "epoch": 2.09, "grad_norm": 0.6291918158531189, "learning_rate": 0.00012657477500226482, "loss": 1.7991, "step": 62777 }, { "epoch": 2.09, "grad_norm": 0.6403204202651978, "learning_rate": 0.00012656624641876405, "loss": 1.6583, "step": 62778 }, { "epoch": 2.09, "grad_norm": 0.6252939701080322, "learning_rate": 0.00012655771804578628, "loss": 1.6819, "step": 62779 }, { "epoch": 2.09, "grad_norm": 0.6345338821411133, "learning_rate": 0.0001265491898833418, "loss": 1.6844, "step": 62780 }, { "epoch": 2.09, "grad_norm": 0.6140950918197632, "learning_rate": 0.0001265406619314411, "loss": 1.6861, "step": 62781 }, { "epoch": 2.09, "grad_norm": 0.6345421671867371, "learning_rate": 0.0001265321341900944, "loss": 1.7595, "step": 62782 }, { "epoch": 2.09, "grad_norm": 0.6602627635002136, "learning_rate": 0.00012652360665931224, "loss": 1.7106, "step": 62783 }, { "epoch": 2.09, "grad_norm": 0.6482725143432617, "learning_rate": 0.00012651507933910476, "loss": 1.7413, "step": 62784 }, { "epoch": 2.09, "grad_norm": 0.63629549741745, "learning_rate": 0.00012650655222948252, "loss": 1.7156, "step": 62785 }, { "epoch": 2.09, "grad_norm": 0.6274031400680542, "learning_rate": 0.00012649802533045576, "loss": 1.6985, "step": 62786 }, { "epoch": 2.09, "grad_norm": 0.6450579762458801, "learning_rate": 0.0001264894986420347, "loss": 1.7347, "step": 62787 }, { "epoch": 2.09, "grad_norm": 0.6144134998321533, "learning_rate": 0.00012648097216422997, "loss": 1.664, "step": 62788 }, { "epoch": 2.09, "grad_norm": 0.6313647627830505, "learning_rate": 0.00012647244589705165, "loss": 1.6902, "step": 62789 }, { "epoch": 2.09, "grad_norm": 0.6387535333633423, "learning_rate": 0.00012646391984051032, "loss": 1.6983, "step": 62790 }, { "epoch": 2.09, "grad_norm": 0.6282404065132141, "learning_rate": 0.0001264553939946162, "loss": 1.7001, "step": 62791 }, { "epoch": 2.09, "grad_norm": 0.6876360177993774, "learning_rate": 0.0001264468683593796, "loss": 1.7444, "step": 62792 }, { "epoch": 2.09, "grad_norm": 0.6138893365859985, "learning_rate": 0.00012643834293481099, "loss": 1.7122, "step": 62793 }, { "epoch": 2.09, "grad_norm": 0.6198500394821167, "learning_rate": 0.00012642981772092058, "loss": 1.7203, "step": 62794 }, { "epoch": 2.09, "grad_norm": 0.6295443773269653, "learning_rate": 0.0001264212927177189, "loss": 1.7193, "step": 62795 }, { "epoch": 2.09, "grad_norm": 0.6155924201011658, "learning_rate": 0.00012641276792521607, "loss": 1.7109, "step": 62796 }, { "epoch": 2.09, "grad_norm": 0.6255429983139038, "learning_rate": 0.0001264042433434227, "loss": 1.7271, "step": 62797 }, { "epoch": 2.09, "grad_norm": 0.6114926338195801, "learning_rate": 0.00012639571897234898, "loss": 1.7065, "step": 62798 }, { "epoch": 2.09, "grad_norm": 0.6187950372695923, "learning_rate": 0.00012638719481200517, "loss": 1.7073, "step": 62799 }, { "epoch": 2.09, "grad_norm": 0.6365979909896851, "learning_rate": 0.00012637867086240183, "loss": 1.6996, "step": 62800 }, { "epoch": 2.09, "grad_norm": 0.6409075856208801, "learning_rate": 0.00012637014712354924, "loss": 1.7616, "step": 62801 }, { "epoch": 2.09, "grad_norm": 0.6582432985305786, "learning_rate": 0.00012636162359545754, "loss": 1.6389, "step": 62802 }, { "epoch": 2.09, "grad_norm": 0.6344300508499146, "learning_rate": 0.00012635310027813728, "loss": 1.7058, "step": 62803 }, { "epoch": 2.09, "grad_norm": 0.6237556338310242, "learning_rate": 0.00012634457717159887, "loss": 1.7221, "step": 62804 }, { "epoch": 2.09, "grad_norm": 0.6380411982536316, "learning_rate": 0.0001263360542758526, "loss": 1.6874, "step": 62805 }, { "epoch": 2.09, "grad_norm": 0.6201279759407043, "learning_rate": 0.0001263275315909086, "loss": 1.7075, "step": 62806 }, { "epoch": 2.09, "grad_norm": 0.6638434529304504, "learning_rate": 0.00012631900911677752, "loss": 1.7433, "step": 62807 }, { "epoch": 2.09, "grad_norm": 0.6126746535301208, "learning_rate": 0.00012631048685346957, "loss": 1.6562, "step": 62808 }, { "epoch": 2.09, "grad_norm": 0.6130210757255554, "learning_rate": 0.00012630196480099498, "loss": 1.743, "step": 62809 }, { "epoch": 2.09, "grad_norm": 0.6285949945449829, "learning_rate": 0.0001262934429593642, "loss": 1.6827, "step": 62810 }, { "epoch": 2.09, "grad_norm": 0.6346195340156555, "learning_rate": 0.00012628492132858786, "loss": 1.7471, "step": 62811 }, { "epoch": 2.09, "grad_norm": 0.6231746077537537, "learning_rate": 0.00012627639990867576, "loss": 1.6813, "step": 62812 }, { "epoch": 2.09, "grad_norm": 0.628976047039032, "learning_rate": 0.00012626787869963853, "loss": 1.7773, "step": 62813 }, { "epoch": 2.09, "grad_norm": 0.6185082197189331, "learning_rate": 0.0001262593577014866, "loss": 1.776, "step": 62814 }, { "epoch": 2.09, "grad_norm": 0.6558392643928528, "learning_rate": 0.00012625083691423023, "loss": 1.7085, "step": 62815 }, { "epoch": 2.09, "grad_norm": 0.6439152956008911, "learning_rate": 0.0001262423163378796, "loss": 1.6715, "step": 62816 }, { "epoch": 2.09, "grad_norm": 0.6244376301765442, "learning_rate": 0.00012623379597244522, "loss": 1.7551, "step": 62817 }, { "epoch": 2.09, "grad_norm": 0.6241918802261353, "learning_rate": 0.00012622527581793763, "loss": 1.7686, "step": 62818 }, { "epoch": 2.09, "grad_norm": 0.6286998987197876, "learning_rate": 0.00012621675587436675, "loss": 1.7684, "step": 62819 }, { "epoch": 2.09, "grad_norm": 0.6492622494697571, "learning_rate": 0.0001262082361417431, "loss": 1.693, "step": 62820 }, { "epoch": 2.09, "grad_norm": 0.6504084467887878, "learning_rate": 0.00012619971662007712, "loss": 1.7353, "step": 62821 }, { "epoch": 2.09, "grad_norm": 0.6434333920478821, "learning_rate": 0.00012619119730937912, "loss": 1.7215, "step": 62822 }, { "epoch": 2.09, "grad_norm": 0.645380973815918, "learning_rate": 0.00012618267820965927, "loss": 1.7297, "step": 62823 }, { "epoch": 2.09, "grad_norm": 0.640080451965332, "learning_rate": 0.00012617415932092806, "loss": 1.6563, "step": 62824 }, { "epoch": 2.09, "grad_norm": 0.6286851763725281, "learning_rate": 0.000126165640643196, "loss": 1.729, "step": 62825 }, { "epoch": 2.09, "grad_norm": 0.6345338225364685, "learning_rate": 0.00012615712217647305, "loss": 1.7753, "step": 62826 }, { "epoch": 2.09, "grad_norm": 0.6234797835350037, "learning_rate": 0.0001261486039207697, "loss": 1.7596, "step": 62827 }, { "epoch": 2.09, "grad_norm": 0.6242600679397583, "learning_rate": 0.00012614008587609644, "loss": 1.6603, "step": 62828 }, { "epoch": 2.09, "grad_norm": 0.622920036315918, "learning_rate": 0.00012613156804246352, "loss": 1.6119, "step": 62829 }, { "epoch": 2.09, "grad_norm": 0.6402145028114319, "learning_rate": 0.00012612305041988115, "loss": 1.7529, "step": 62830 }, { "epoch": 2.09, "grad_norm": 0.6373133063316345, "learning_rate": 0.0001261145330083599, "loss": 1.6775, "step": 62831 }, { "epoch": 2.09, "grad_norm": 0.6277307271957397, "learning_rate": 0.00012610601580790996, "loss": 1.7502, "step": 62832 }, { "epoch": 2.09, "grad_norm": 0.6270638704299927, "learning_rate": 0.00012609749881854158, "loss": 1.6834, "step": 62833 }, { "epoch": 2.09, "grad_norm": 0.621699333190918, "learning_rate": 0.00012608898204026522, "loss": 1.7613, "step": 62834 }, { "epoch": 2.09, "grad_norm": 0.6135820746421814, "learning_rate": 0.00012608046547309132, "loss": 1.6576, "step": 62835 }, { "epoch": 2.09, "grad_norm": 0.6010799407958984, "learning_rate": 0.00012607194911703015, "loss": 1.693, "step": 62836 }, { "epoch": 2.09, "grad_norm": 0.6348611116409302, "learning_rate": 0.00012606343297209187, "loss": 1.6936, "step": 62837 }, { "epoch": 2.09, "grad_norm": 0.6196300387382507, "learning_rate": 0.00012605491703828703, "loss": 1.6725, "step": 62838 }, { "epoch": 2.09, "grad_norm": 0.6307969689369202, "learning_rate": 0.00012604640131562593, "loss": 1.7317, "step": 62839 }, { "epoch": 2.09, "grad_norm": 0.6342654824256897, "learning_rate": 0.00012603788580411874, "loss": 1.6354, "step": 62840 }, { "epoch": 2.09, "grad_norm": 0.6232984662055969, "learning_rate": 0.00012602937050377604, "loss": 1.662, "step": 62841 }, { "epoch": 2.09, "grad_norm": 0.6440419554710388, "learning_rate": 0.00012602085541460795, "loss": 1.6671, "step": 62842 }, { "epoch": 2.09, "grad_norm": 0.6093063354492188, "learning_rate": 0.00012601234053662502, "loss": 1.6228, "step": 62843 }, { "epoch": 2.09, "grad_norm": 0.6068936586380005, "learning_rate": 0.00012600382586983733, "loss": 1.6738, "step": 62844 }, { "epoch": 2.09, "grad_norm": 0.6181118488311768, "learning_rate": 0.0001259953114142555, "loss": 1.6914, "step": 62845 }, { "epoch": 2.09, "grad_norm": 0.6153128743171692, "learning_rate": 0.00012598679716988972, "loss": 1.6415, "step": 62846 }, { "epoch": 2.09, "grad_norm": 0.6459154486656189, "learning_rate": 0.0001259782831367502, "loss": 1.7674, "step": 62847 }, { "epoch": 2.09, "grad_norm": 0.6345862746238708, "learning_rate": 0.00012596976931484753, "loss": 1.6862, "step": 62848 }, { "epoch": 2.09, "grad_norm": 0.6166757345199585, "learning_rate": 0.0001259612557041918, "loss": 1.6912, "step": 62849 }, { "epoch": 2.09, "grad_norm": 0.6274350881576538, "learning_rate": 0.00012595274230479356, "loss": 1.7384, "step": 62850 }, { "epoch": 2.09, "grad_norm": 0.6618857383728027, "learning_rate": 0.00012594422911666307, "loss": 1.7369, "step": 62851 }, { "epoch": 2.09, "grad_norm": 0.6132516264915466, "learning_rate": 0.00012593571613981052, "loss": 1.6644, "step": 62852 }, { "epoch": 2.09, "grad_norm": 0.6376177072525024, "learning_rate": 0.00012592720337424647, "loss": 1.8116, "step": 62853 }, { "epoch": 2.09, "grad_norm": 0.6385114192962646, "learning_rate": 0.00012591869081998104, "loss": 1.7675, "step": 62854 }, { "epoch": 2.09, "grad_norm": 0.6208165884017944, "learning_rate": 0.00012591017847702476, "loss": 1.6965, "step": 62855 }, { "epoch": 2.09, "grad_norm": 0.6567952036857605, "learning_rate": 0.00012590166634538782, "loss": 1.7357, "step": 62856 }, { "epoch": 2.09, "grad_norm": 0.6427664160728455, "learning_rate": 0.00012589315442508066, "loss": 1.6334, "step": 62857 }, { "epoch": 2.09, "grad_norm": 0.6326628923416138, "learning_rate": 0.00012588464271611358, "loss": 1.7269, "step": 62858 }, { "epoch": 2.09, "grad_norm": 0.6332632303237915, "learning_rate": 0.0001258761312184968, "loss": 1.6926, "step": 62859 }, { "epoch": 2.09, "grad_norm": 0.646794855594635, "learning_rate": 0.00012586761993224082, "loss": 1.7292, "step": 62860 }, { "epoch": 2.09, "grad_norm": 0.6649464964866638, "learning_rate": 0.00012585910885735588, "loss": 1.6723, "step": 62861 }, { "epoch": 2.09, "grad_norm": 0.634127140045166, "learning_rate": 0.0001258505979938522, "loss": 1.702, "step": 62862 }, { "epoch": 2.09, "grad_norm": 0.6373319625854492, "learning_rate": 0.00012584208734174032, "loss": 1.7065, "step": 62863 }, { "epoch": 2.09, "grad_norm": 0.625683069229126, "learning_rate": 0.00012583357690103053, "loss": 1.6336, "step": 62864 }, { "epoch": 2.09, "grad_norm": 0.6217206120491028, "learning_rate": 0.0001258250666717331, "loss": 1.734, "step": 62865 }, { "epoch": 2.09, "grad_norm": 0.6066542267799377, "learning_rate": 0.0001258165566538583, "loss": 1.7201, "step": 62866 }, { "epoch": 2.09, "grad_norm": 0.647564709186554, "learning_rate": 0.00012580804684741653, "loss": 1.7734, "step": 62867 }, { "epoch": 2.09, "grad_norm": 0.6299809217453003, "learning_rate": 0.00012579953725241834, "loss": 1.6479, "step": 62868 }, { "epoch": 2.09, "grad_norm": 0.6470022797584534, "learning_rate": 0.0001257910278688736, "loss": 1.7171, "step": 62869 }, { "epoch": 2.09, "grad_norm": 0.6394937038421631, "learning_rate": 0.0001257825186967929, "loss": 1.666, "step": 62870 }, { "epoch": 2.09, "grad_norm": 0.6263404488563538, "learning_rate": 0.0001257740097361867, "loss": 1.6845, "step": 62871 }, { "epoch": 2.09, "grad_norm": 0.6165645718574524, "learning_rate": 0.00012576550098706516, "loss": 1.7407, "step": 62872 }, { "epoch": 2.09, "grad_norm": 0.6327574849128723, "learning_rate": 0.0001257569924494385, "loss": 1.724, "step": 62873 }, { "epoch": 2.09, "grad_norm": 0.641758382320404, "learning_rate": 0.00012574848412331722, "loss": 1.7323, "step": 62874 }, { "epoch": 2.09, "grad_norm": 0.6360152363777161, "learning_rate": 0.00012573997600871177, "loss": 1.7057, "step": 62875 }, { "epoch": 2.09, "grad_norm": 0.6167809963226318, "learning_rate": 0.00012573146810563213, "loss": 1.7231, "step": 62876 }, { "epoch": 2.09, "grad_norm": 0.6284648180007935, "learning_rate": 0.0001257229604140888, "loss": 1.7347, "step": 62877 }, { "epoch": 2.09, "grad_norm": 0.6240683197975159, "learning_rate": 0.00012571445293409222, "loss": 1.7736, "step": 62878 }, { "epoch": 2.09, "grad_norm": 0.6090338230133057, "learning_rate": 0.00012570594566565263, "loss": 1.6613, "step": 62879 }, { "epoch": 2.09, "grad_norm": 0.6587561964988708, "learning_rate": 0.0001256974386087802, "loss": 1.6512, "step": 62880 }, { "epoch": 2.09, "grad_norm": 0.6164034008979797, "learning_rate": 0.00012568893176348543, "loss": 1.672, "step": 62881 }, { "epoch": 2.09, "grad_norm": 0.6089452505111694, "learning_rate": 0.00012568042512977882, "loss": 1.73, "step": 62882 }, { "epoch": 2.09, "grad_norm": 0.6275842785835266, "learning_rate": 0.0001256719187076703, "loss": 1.8102, "step": 62883 }, { "epoch": 2.09, "grad_norm": 0.6275513768196106, "learning_rate": 0.00012566341249717035, "loss": 1.7119, "step": 62884 }, { "epoch": 2.09, "grad_norm": 0.6309109330177307, "learning_rate": 0.00012565490649828944, "loss": 1.6673, "step": 62885 }, { "epoch": 2.09, "grad_norm": 0.6404201984405518, "learning_rate": 0.0001256464007110378, "loss": 1.7221, "step": 62886 }, { "epoch": 2.09, "grad_norm": 0.6174923181533813, "learning_rate": 0.0001256378951354256, "loss": 1.6875, "step": 62887 }, { "epoch": 2.09, "grad_norm": 0.6293957233428955, "learning_rate": 0.00012562938977146344, "loss": 1.6626, "step": 62888 }, { "epoch": 2.09, "grad_norm": 0.6290311217308044, "learning_rate": 0.0001256208846191615, "loss": 1.7271, "step": 62889 }, { "epoch": 2.09, "grad_norm": 0.6582825183868408, "learning_rate": 0.00012561237967853, "loss": 1.7077, "step": 62890 }, { "epoch": 2.09, "grad_norm": 0.6354908347129822, "learning_rate": 0.00012560387494957938, "loss": 1.6568, "step": 62891 }, { "epoch": 2.09, "grad_norm": 0.6139757633209229, "learning_rate": 0.0001255953704323201, "loss": 1.6946, "step": 62892 }, { "epoch": 2.09, "grad_norm": 0.6337154507637024, "learning_rate": 0.00012558686612676228, "loss": 1.6907, "step": 62893 }, { "epoch": 2.09, "grad_norm": 0.6393325924873352, "learning_rate": 0.00012557836203291626, "loss": 1.6987, "step": 62894 }, { "epoch": 2.09, "grad_norm": 0.6186642050743103, "learning_rate": 0.0001255698581507925, "loss": 1.7638, "step": 62895 }, { "epoch": 2.09, "grad_norm": 0.6212435364723206, "learning_rate": 0.00012556135448040122, "loss": 1.6845, "step": 62896 }, { "epoch": 2.09, "grad_norm": 0.6157922744750977, "learning_rate": 0.00012555285102175263, "loss": 1.7149, "step": 62897 }, { "epoch": 2.09, "grad_norm": 0.6193056106567383, "learning_rate": 0.00012554434777485734, "loss": 1.6736, "step": 62898 }, { "epoch": 2.09, "grad_norm": 0.6487174034118652, "learning_rate": 0.00012553584473972532, "loss": 1.7078, "step": 62899 }, { "epoch": 2.09, "grad_norm": 0.6113443374633789, "learning_rate": 0.00012552734191636721, "loss": 1.6815, "step": 62900 }, { "epoch": 2.09, "grad_norm": 0.6403807997703552, "learning_rate": 0.00012551883930479315, "loss": 1.7261, "step": 62901 }, { "epoch": 2.09, "grad_norm": 0.63843834400177, "learning_rate": 0.00012551033690501355, "loss": 1.6813, "step": 62902 }, { "epoch": 2.09, "grad_norm": 0.6257122755050659, "learning_rate": 0.0001255018347170387, "loss": 1.6937, "step": 62903 }, { "epoch": 2.09, "grad_norm": 0.6335577368736267, "learning_rate": 0.00012549333274087884, "loss": 1.7448, "step": 62904 }, { "epoch": 2.09, "grad_norm": 0.6279811859130859, "learning_rate": 0.00012548483097654447, "loss": 1.6884, "step": 62905 }, { "epoch": 2.09, "grad_norm": 0.6482640504837036, "learning_rate": 0.00012547632942404565, "loss": 1.7017, "step": 62906 }, { "epoch": 2.09, "grad_norm": 0.6296238303184509, "learning_rate": 0.000125467828083393, "loss": 1.7204, "step": 62907 }, { "epoch": 2.09, "grad_norm": 0.6376578211784363, "learning_rate": 0.00012545932695459666, "loss": 1.6958, "step": 62908 }, { "epoch": 2.09, "grad_norm": 0.6349919438362122, "learning_rate": 0.00012545082603766686, "loss": 1.7086, "step": 62909 }, { "epoch": 2.09, "grad_norm": 0.6279785633087158, "learning_rate": 0.0001254423253326142, "loss": 1.703, "step": 62910 }, { "epoch": 2.09, "grad_norm": 0.6132824420928955, "learning_rate": 0.00012543382483944869, "loss": 1.695, "step": 62911 }, { "epoch": 2.09, "grad_norm": 0.6431005597114563, "learning_rate": 0.00012542532455818092, "loss": 1.7681, "step": 62912 }, { "epoch": 2.09, "grad_norm": 0.6370111703872681, "learning_rate": 0.00012541682448882096, "loss": 1.7567, "step": 62913 }, { "epoch": 2.09, "grad_norm": 0.6406170725822449, "learning_rate": 0.00012540832463137937, "loss": 1.6199, "step": 62914 }, { "epoch": 2.09, "grad_norm": 0.6442936658859253, "learning_rate": 0.00012539982498586635, "loss": 1.6587, "step": 62915 }, { "epoch": 2.09, "grad_norm": 0.6474650502204895, "learning_rate": 0.0001253913255522921, "loss": 1.6713, "step": 62916 }, { "epoch": 2.09, "grad_norm": 0.6536749601364136, "learning_rate": 0.00012538282633066717, "loss": 1.6685, "step": 62917 }, { "epoch": 2.09, "grad_norm": 0.6411370038986206, "learning_rate": 0.00012537432732100177, "loss": 1.6791, "step": 62918 }, { "epoch": 2.09, "grad_norm": 0.6518673300743103, "learning_rate": 0.00012536582852330606, "loss": 1.651, "step": 62919 }, { "epoch": 2.09, "grad_norm": 0.6369161605834961, "learning_rate": 0.00012535732993759053, "loss": 1.7237, "step": 62920 }, { "epoch": 2.09, "grad_norm": 0.6360087394714355, "learning_rate": 0.0001253488315638656, "loss": 1.7171, "step": 62921 }, { "epoch": 2.09, "grad_norm": 0.6409900784492493, "learning_rate": 0.00012534033340214144, "loss": 1.6776, "step": 62922 }, { "epoch": 2.09, "grad_norm": 0.6266482472419739, "learning_rate": 0.00012533183545242825, "loss": 1.7529, "step": 62923 }, { "epoch": 2.09, "grad_norm": 0.6415392160415649, "learning_rate": 0.0001253233377147366, "loss": 1.721, "step": 62924 }, { "epoch": 2.09, "grad_norm": 0.6722650527954102, "learning_rate": 0.0001253148401890767, "loss": 1.66, "step": 62925 }, { "epoch": 2.09, "grad_norm": 0.6633744835853577, "learning_rate": 0.00012530634287545868, "loss": 1.7394, "step": 62926 }, { "epoch": 2.09, "grad_norm": 0.6280845999717712, "learning_rate": 0.00012529784577389305, "loss": 1.7253, "step": 62927 }, { "epoch": 2.09, "grad_norm": 0.6321609020233154, "learning_rate": 0.00012528934888439032, "loss": 1.7047, "step": 62928 }, { "epoch": 2.09, "grad_norm": 0.6492643356323242, "learning_rate": 0.00012528085220696038, "loss": 1.7206, "step": 62929 }, { "epoch": 2.09, "grad_norm": 0.647257387638092, "learning_rate": 0.0001252723557416137, "loss": 1.7091, "step": 62930 }, { "epoch": 2.09, "grad_norm": 0.6221999526023865, "learning_rate": 0.00012526385948836074, "loss": 1.7538, "step": 62931 }, { "epoch": 2.09, "grad_norm": 0.6634195446968079, "learning_rate": 0.00012525536344721173, "loss": 1.752, "step": 62932 }, { "epoch": 2.09, "grad_norm": 0.6323288679122925, "learning_rate": 0.0001252468676181769, "loss": 1.7919, "step": 62933 }, { "epoch": 2.09, "grad_norm": 0.6254991292953491, "learning_rate": 0.00012523837200126655, "loss": 1.6373, "step": 62934 }, { "epoch": 2.09, "grad_norm": 0.6397138237953186, "learning_rate": 0.0001252298765964913, "loss": 1.6833, "step": 62935 }, { "epoch": 2.09, "grad_norm": 0.6209978461265564, "learning_rate": 0.00012522138140386106, "loss": 1.7222, "step": 62936 }, { "epoch": 2.09, "grad_norm": 0.6374308466911316, "learning_rate": 0.00012521288642338625, "loss": 1.5981, "step": 62937 }, { "epoch": 2.09, "grad_norm": 0.6296473145484924, "learning_rate": 0.0001252043916550774, "loss": 1.7051, "step": 62938 }, { "epoch": 2.09, "grad_norm": 0.6460304856300354, "learning_rate": 0.00012519589709894462, "loss": 1.6426, "step": 62939 }, { "epoch": 2.09, "grad_norm": 0.6306740045547485, "learning_rate": 0.00012518740275499815, "loss": 1.7352, "step": 62940 }, { "epoch": 2.09, "grad_norm": 0.6487283110618591, "learning_rate": 0.00012517890862324846, "loss": 1.6861, "step": 62941 }, { "epoch": 2.09, "grad_norm": 0.632392168045044, "learning_rate": 0.000125170414703706, "loss": 1.6477, "step": 62942 }, { "epoch": 2.09, "grad_norm": 0.622254490852356, "learning_rate": 0.00012516192099638069, "loss": 1.7316, "step": 62943 }, { "epoch": 2.09, "grad_norm": 0.6438558101654053, "learning_rate": 0.00012515342750128304, "loss": 1.714, "step": 62944 }, { "epoch": 2.09, "grad_norm": 0.6325715184211731, "learning_rate": 0.0001251449342184235, "loss": 1.7469, "step": 62945 }, { "epoch": 2.09, "grad_norm": 0.6280980706214905, "learning_rate": 0.00012513644114781224, "loss": 1.6708, "step": 62946 }, { "epoch": 2.09, "grad_norm": 0.641018271446228, "learning_rate": 0.00012512794828945946, "loss": 1.7122, "step": 62947 }, { "epoch": 2.09, "grad_norm": 0.6075816750526428, "learning_rate": 0.00012511945564337568, "loss": 1.7011, "step": 62948 }, { "epoch": 2.09, "grad_norm": 0.6471604704856873, "learning_rate": 0.00012511096320957113, "loss": 1.6994, "step": 62949 }, { "epoch": 2.09, "grad_norm": 0.6220157146453857, "learning_rate": 0.000125102470988056, "loss": 1.6928, "step": 62950 }, { "epoch": 2.09, "grad_norm": 0.6205911636352539, "learning_rate": 0.00012509397897884068, "loss": 1.6923, "step": 62951 }, { "epoch": 2.09, "grad_norm": 0.6348642110824585, "learning_rate": 0.00012508548718193562, "loss": 1.6827, "step": 62952 }, { "epoch": 2.09, "grad_norm": 0.6208551526069641, "learning_rate": 0.00012507699559735101, "loss": 1.6309, "step": 62953 }, { "epoch": 2.09, "grad_norm": 0.6459080576896667, "learning_rate": 0.00012506850422509702, "loss": 1.6661, "step": 62954 }, { "epoch": 2.09, "grad_norm": 0.6427393555641174, "learning_rate": 0.00012506001306518421, "loss": 1.7742, "step": 62955 }, { "epoch": 2.09, "grad_norm": 0.6476610898971558, "learning_rate": 0.00012505152211762278, "loss": 1.6913, "step": 62956 }, { "epoch": 2.09, "grad_norm": 0.6629873514175415, "learning_rate": 0.00012504303138242292, "loss": 1.8373, "step": 62957 }, { "epoch": 2.09, "grad_norm": 0.6184988021850586, "learning_rate": 0.00012503454085959512, "loss": 1.69, "step": 62958 }, { "epoch": 2.09, "grad_norm": 0.6230586171150208, "learning_rate": 0.0001250260505491495, "loss": 1.6992, "step": 62959 }, { "epoch": 2.09, "grad_norm": 0.6369688510894775, "learning_rate": 0.0001250175604510966, "loss": 1.743, "step": 62960 }, { "epoch": 2.09, "grad_norm": 0.6300853490829468, "learning_rate": 0.00012500907056544653, "loss": 1.7339, "step": 62961 }, { "epoch": 2.09, "grad_norm": 0.6236990094184875, "learning_rate": 0.00012500058089220972, "loss": 1.7361, "step": 62962 }, { "epoch": 2.09, "grad_norm": 0.6437469720840454, "learning_rate": 0.00012499209143139646, "loss": 1.6633, "step": 62963 }, { "epoch": 2.09, "grad_norm": 0.6416844725608826, "learning_rate": 0.00012498360218301686, "loss": 1.6827, "step": 62964 }, { "epoch": 2.09, "grad_norm": 0.6197940707206726, "learning_rate": 0.00012497511314708148, "loss": 1.6843, "step": 62965 }, { "epoch": 2.09, "grad_norm": 0.6332178711891174, "learning_rate": 0.00012496662432360048, "loss": 1.7547, "step": 62966 }, { "epoch": 2.09, "grad_norm": 0.6253712773323059, "learning_rate": 0.00012495813571258428, "loss": 1.7272, "step": 62967 }, { "epoch": 2.09, "grad_norm": 0.6420027613639832, "learning_rate": 0.00012494964731404312, "loss": 1.7305, "step": 62968 }, { "epoch": 2.09, "grad_norm": 0.6335229277610779, "learning_rate": 0.00012494115912798715, "loss": 1.7007, "step": 62969 }, { "epoch": 2.1, "grad_norm": 0.6203688979148865, "learning_rate": 0.00012493267115442697, "loss": 1.7548, "step": 62970 }, { "epoch": 2.1, "grad_norm": 0.631003737449646, "learning_rate": 0.0001249241833933726, "loss": 1.6936, "step": 62971 }, { "epoch": 2.1, "grad_norm": 0.6238890886306763, "learning_rate": 0.0001249156958448346, "loss": 1.6961, "step": 62972 }, { "epoch": 2.1, "grad_norm": 0.6163986325263977, "learning_rate": 0.00012490720850882303, "loss": 1.7212, "step": 62973 }, { "epoch": 2.1, "grad_norm": 0.8920132517814636, "learning_rate": 0.00012489872138534843, "loss": 1.7491, "step": 62974 }, { "epoch": 2.1, "grad_norm": 0.6392580270767212, "learning_rate": 0.00012489023447442098, "loss": 1.6617, "step": 62975 }, { "epoch": 2.1, "grad_norm": 0.6256873607635498, "learning_rate": 0.00012488174777605085, "loss": 1.7794, "step": 62976 }, { "epoch": 2.1, "grad_norm": 0.6376219987869263, "learning_rate": 0.0001248732612902485, "loss": 1.7584, "step": 62977 }, { "epoch": 2.1, "grad_norm": 0.6249892115592957, "learning_rate": 0.0001248647750170244, "loss": 1.717, "step": 62978 }, { "epoch": 2.1, "grad_norm": 0.6231803894042969, "learning_rate": 0.00012485628895638848, "loss": 1.796, "step": 62979 }, { "epoch": 2.1, "grad_norm": 0.645415186882019, "learning_rate": 0.0001248478031083512, "loss": 1.7041, "step": 62980 }, { "epoch": 2.1, "grad_norm": 0.6498185992240906, "learning_rate": 0.000124839317472923, "loss": 1.7178, "step": 62981 }, { "epoch": 2.1, "grad_norm": 0.6143671870231628, "learning_rate": 0.00012483083205011404, "loss": 1.6858, "step": 62982 }, { "epoch": 2.1, "grad_norm": 0.6452741026878357, "learning_rate": 0.00012482234683993456, "loss": 1.692, "step": 62983 }, { "epoch": 2.1, "grad_norm": 0.6542780995368958, "learning_rate": 0.0001248138618423949, "loss": 1.7055, "step": 62984 }, { "epoch": 2.1, "grad_norm": 0.6539480090141296, "learning_rate": 0.00012480537705750566, "loss": 1.7081, "step": 62985 }, { "epoch": 2.1, "grad_norm": 0.6366807222366333, "learning_rate": 0.00012479689248527662, "loss": 1.7368, "step": 62986 }, { "epoch": 2.1, "grad_norm": 0.6524569988250732, "learning_rate": 0.00012478840812571833, "loss": 1.7447, "step": 62987 }, { "epoch": 2.1, "grad_norm": 0.6834621429443359, "learning_rate": 0.00012477992397884124, "loss": 1.7213, "step": 62988 }, { "epoch": 2.1, "grad_norm": 0.6425999999046326, "learning_rate": 0.0001247714400446555, "loss": 1.7735, "step": 62989 }, { "epoch": 2.1, "grad_norm": 0.6475998163223267, "learning_rate": 0.0001247629563231713, "loss": 1.5865, "step": 62990 }, { "epoch": 2.1, "grad_norm": 0.6938510537147522, "learning_rate": 0.00012475447281439907, "loss": 1.7628, "step": 62991 }, { "epoch": 2.1, "grad_norm": 0.652573823928833, "learning_rate": 0.00012474598951834926, "loss": 1.794, "step": 62992 }, { "epoch": 2.1, "grad_norm": 0.6305559873580933, "learning_rate": 0.0001247375064350318, "loss": 1.6817, "step": 62993 }, { "epoch": 2.1, "grad_norm": 0.6699714660644531, "learning_rate": 0.00012472902356445718, "loss": 1.7804, "step": 62994 }, { "epoch": 2.1, "grad_norm": 0.6409665942192078, "learning_rate": 0.00012472054090663582, "loss": 1.6745, "step": 62995 }, { "epoch": 2.1, "grad_norm": 0.6608452200889587, "learning_rate": 0.0001247120584615779, "loss": 1.6831, "step": 62996 }, { "epoch": 2.1, "grad_norm": 0.63490229845047, "learning_rate": 0.0001247035762292936, "loss": 1.7412, "step": 62997 }, { "epoch": 2.1, "grad_norm": 0.634185254573822, "learning_rate": 0.00012469509420979334, "loss": 1.7262, "step": 62998 }, { "epoch": 2.1, "grad_norm": 0.6407965421676636, "learning_rate": 0.0001246866124030876, "loss": 1.8069, "step": 62999 }, { "epoch": 2.1, "grad_norm": 0.6482874155044556, "learning_rate": 0.0001246781308091863, "loss": 1.7781, "step": 63000 }, { "epoch": 2.1, "grad_norm": 0.6334132552146912, "learning_rate": 0.00012466964942809986, "loss": 1.6851, "step": 63001 }, { "epoch": 2.1, "grad_norm": 0.6235538721084595, "learning_rate": 0.0001246611682598388, "loss": 1.6645, "step": 63002 }, { "epoch": 2.1, "grad_norm": 0.6262897849082947, "learning_rate": 0.00012465268730441322, "loss": 1.6756, "step": 63003 }, { "epoch": 2.1, "grad_norm": 0.6157209873199463, "learning_rate": 0.00012464420656183332, "loss": 1.6701, "step": 63004 }, { "epoch": 2.1, "grad_norm": 0.6543464064598083, "learning_rate": 0.00012463572603210965, "loss": 1.7339, "step": 63005 }, { "epoch": 2.1, "grad_norm": 0.6054736971855164, "learning_rate": 0.00012462724571525236, "loss": 1.6926, "step": 63006 }, { "epoch": 2.1, "grad_norm": 0.6466373801231384, "learning_rate": 0.00012461876561127162, "loss": 1.6749, "step": 63007 }, { "epoch": 2.1, "grad_norm": 0.6606988906860352, "learning_rate": 0.0001246102857201779, "loss": 1.8015, "step": 63008 }, { "epoch": 2.1, "grad_norm": 0.6100978851318359, "learning_rate": 0.00012460180604198151, "loss": 1.7334, "step": 63009 }, { "epoch": 2.1, "grad_norm": 0.6388208270072937, "learning_rate": 0.00012459332657669274, "loss": 1.7418, "step": 63010 }, { "epoch": 2.1, "grad_norm": 0.6234754920005798, "learning_rate": 0.0001245848473243217, "loss": 1.6449, "step": 63011 }, { "epoch": 2.1, "grad_norm": 0.6364652514457703, "learning_rate": 0.00012457636828487888, "loss": 1.7006, "step": 63012 }, { "epoch": 2.1, "grad_norm": 0.6361355781555176, "learning_rate": 0.00012456788945837454, "loss": 1.7044, "step": 63013 }, { "epoch": 2.1, "grad_norm": 0.6145954728126526, "learning_rate": 0.00012455941084481883, "loss": 1.675, "step": 63014 }, { "epoch": 2.1, "grad_norm": 0.6304358839988708, "learning_rate": 0.00012455093244422222, "loss": 1.7018, "step": 63015 }, { "epoch": 2.1, "grad_norm": 0.6283884048461914, "learning_rate": 0.00012454245425659485, "loss": 1.7417, "step": 63016 }, { "epoch": 2.1, "grad_norm": 0.6306331157684326, "learning_rate": 0.00012453397628194718, "loss": 1.714, "step": 63017 }, { "epoch": 2.1, "grad_norm": 0.6376404762268066, "learning_rate": 0.00012452549852028932, "loss": 1.7425, "step": 63018 }, { "epoch": 2.1, "grad_norm": 0.6359145641326904, "learning_rate": 0.0001245170209716317, "loss": 1.7371, "step": 63019 }, { "epoch": 2.1, "grad_norm": 0.612112820148468, "learning_rate": 0.00012450854363598463, "loss": 1.6297, "step": 63020 }, { "epoch": 2.1, "grad_norm": 0.6423841714859009, "learning_rate": 0.00012450006651335818, "loss": 1.7106, "step": 63021 }, { "epoch": 2.1, "grad_norm": 0.6224169731140137, "learning_rate": 0.00012449158960376293, "loss": 1.6649, "step": 63022 }, { "epoch": 2.1, "grad_norm": 0.6178708076477051, "learning_rate": 0.0001244831129072089, "loss": 1.663, "step": 63023 }, { "epoch": 2.1, "grad_norm": 0.6398038864135742, "learning_rate": 0.00012447463642370664, "loss": 1.7505, "step": 63024 }, { "epoch": 2.1, "grad_norm": 0.6200108528137207, "learning_rate": 0.00012446616015326627, "loss": 1.7536, "step": 63025 }, { "epoch": 2.1, "grad_norm": 0.6322819590568542, "learning_rate": 0.00012445768409589806, "loss": 1.7683, "step": 63026 }, { "epoch": 2.1, "grad_norm": 0.6599460244178772, "learning_rate": 0.0001244492082516124, "loss": 1.6961, "step": 63027 }, { "epoch": 2.1, "grad_norm": 0.6396234035491943, "learning_rate": 0.00012444073262041946, "loss": 1.7149, "step": 63028 }, { "epoch": 2.1, "grad_norm": 0.636760950088501, "learning_rate": 0.00012443225720232972, "loss": 1.7236, "step": 63029 }, { "epoch": 2.1, "grad_norm": 0.6342809796333313, "learning_rate": 0.00012442378199735326, "loss": 1.6418, "step": 63030 }, { "epoch": 2.1, "grad_norm": 0.6488075852394104, "learning_rate": 0.00012441530700550054, "loss": 1.6411, "step": 63031 }, { "epoch": 2.1, "grad_norm": 0.6422026753425598, "learning_rate": 0.00012440683222678177, "loss": 1.638, "step": 63032 }, { "epoch": 2.1, "grad_norm": 0.6254748702049255, "learning_rate": 0.0001243983576612071, "loss": 1.6397, "step": 63033 }, { "epoch": 2.1, "grad_norm": 0.6487403512001038, "learning_rate": 0.00012438988330878712, "loss": 1.7185, "step": 63034 }, { "epoch": 2.1, "grad_norm": 0.6346417665481567, "learning_rate": 0.0001243814091695319, "loss": 1.6631, "step": 63035 }, { "epoch": 2.1, "grad_norm": 0.6301175951957703, "learning_rate": 0.00012437293524345167, "loss": 1.6946, "step": 63036 }, { "epoch": 2.1, "grad_norm": 0.6231750249862671, "learning_rate": 0.00012436446153055682, "loss": 1.6687, "step": 63037 }, { "epoch": 2.1, "grad_norm": 0.623519241809845, "learning_rate": 0.00012435598803085774, "loss": 1.7716, "step": 63038 }, { "epoch": 2.1, "grad_norm": 0.6181240677833557, "learning_rate": 0.00012434751474436464, "loss": 1.7269, "step": 63039 }, { "epoch": 2.1, "grad_norm": 1.9791327714920044, "learning_rate": 0.00012433904167108765, "loss": 1.7271, "step": 63040 }, { "epoch": 2.1, "grad_norm": 0.6336736083030701, "learning_rate": 0.00012433056881103728, "loss": 1.7264, "step": 63041 }, { "epoch": 2.1, "grad_norm": 0.614454984664917, "learning_rate": 0.00012432209616422373, "loss": 1.656, "step": 63042 }, { "epoch": 2.1, "grad_norm": 0.6235505938529968, "learning_rate": 0.00012431362373065713, "loss": 1.6758, "step": 63043 }, { "epoch": 2.1, "grad_norm": 0.6184641122817993, "learning_rate": 0.00012430515151034795, "loss": 1.7935, "step": 63044 }, { "epoch": 2.1, "grad_norm": 0.6380554437637329, "learning_rate": 0.00012429667950330663, "loss": 1.6749, "step": 63045 }, { "epoch": 2.1, "grad_norm": 0.6354856491088867, "learning_rate": 0.00012428820770954302, "loss": 1.6537, "step": 63046 }, { "epoch": 2.1, "grad_norm": 0.6121655702590942, "learning_rate": 0.00012427973612906765, "loss": 1.7436, "step": 63047 }, { "epoch": 2.1, "grad_norm": 0.6221966743469238, "learning_rate": 0.00012427126476189087, "loss": 1.6989, "step": 63048 }, { "epoch": 2.1, "grad_norm": 0.6373509168624878, "learning_rate": 0.00012426279360802295, "loss": 1.7478, "step": 63049 }, { "epoch": 2.1, "grad_norm": 0.6265418529510498, "learning_rate": 0.00012425432266747392, "loss": 1.7099, "step": 63050 }, { "epoch": 2.1, "grad_norm": 0.6828973889350891, "learning_rate": 0.00012424585194025428, "loss": 1.7695, "step": 63051 }, { "epoch": 2.1, "grad_norm": 0.6580021381378174, "learning_rate": 0.0001242373814263745, "loss": 1.7282, "step": 63052 }, { "epoch": 2.1, "grad_norm": 0.6364734172821045, "learning_rate": 0.0001242289111258444, "loss": 1.6836, "step": 63053 }, { "epoch": 2.1, "grad_norm": 0.6270712614059448, "learning_rate": 0.00012422044103867455, "loss": 1.7029, "step": 63054 }, { "epoch": 2.1, "grad_norm": 0.6350755095481873, "learning_rate": 0.00012421197116487526, "loss": 1.6567, "step": 63055 }, { "epoch": 2.1, "grad_norm": 0.5979878902435303, "learning_rate": 0.00012420350150445675, "loss": 1.7475, "step": 63056 }, { "epoch": 2.1, "grad_norm": 0.6255877017974854, "learning_rate": 0.00012419503205742915, "loss": 1.6932, "step": 63057 }, { "epoch": 2.1, "grad_norm": 0.6266286969184875, "learning_rate": 0.0001241865628238029, "loss": 1.7206, "step": 63058 }, { "epoch": 2.1, "grad_norm": 0.6350488066673279, "learning_rate": 0.00012417809380358848, "loss": 1.7669, "step": 63059 }, { "epoch": 2.1, "grad_norm": 0.6492012739181519, "learning_rate": 0.0001241696249967957, "loss": 1.5944, "step": 63060 }, { "epoch": 2.1, "grad_norm": 0.6336605548858643, "learning_rate": 0.00012416115640343514, "loss": 1.6948, "step": 63061 }, { "epoch": 2.1, "grad_norm": 0.6402438879013062, "learning_rate": 0.0001241526880235171, "loss": 1.6671, "step": 63062 }, { "epoch": 2.1, "grad_norm": 0.639432430267334, "learning_rate": 0.0001241442198570518, "loss": 1.6532, "step": 63063 }, { "epoch": 2.1, "grad_norm": 0.6333227753639221, "learning_rate": 0.0001241357519040494, "loss": 1.7897, "step": 63064 }, { "epoch": 2.1, "grad_norm": 0.618828535079956, "learning_rate": 0.00012412728416452036, "loss": 1.7065, "step": 63065 }, { "epoch": 2.1, "grad_norm": 0.6226261258125305, "learning_rate": 0.00012411881663847494, "loss": 1.6455, "step": 63066 }, { "epoch": 2.1, "grad_norm": 0.6261612176895142, "learning_rate": 0.0001241103493259232, "loss": 1.6697, "step": 63067 }, { "epoch": 2.1, "grad_norm": 0.6270849704742432, "learning_rate": 0.00012410188222687562, "loss": 1.6901, "step": 63068 }, { "epoch": 2.1, "grad_norm": 0.6234721541404724, "learning_rate": 0.00012409341534134252, "loss": 1.6607, "step": 63069 }, { "epoch": 2.1, "grad_norm": 0.6450979709625244, "learning_rate": 0.0001240849486693341, "loss": 1.743, "step": 63070 }, { "epoch": 2.1, "grad_norm": 0.6307287216186523, "learning_rate": 0.00012407648221086053, "loss": 1.737, "step": 63071 }, { "epoch": 2.1, "grad_norm": 0.6333625912666321, "learning_rate": 0.0001240680159659323, "loss": 1.701, "step": 63072 }, { "epoch": 2.1, "grad_norm": 0.6449541449546814, "learning_rate": 0.00012405954993455957, "loss": 1.7091, "step": 63073 }, { "epoch": 2.1, "grad_norm": 0.6376273036003113, "learning_rate": 0.0001240510841167525, "loss": 1.7126, "step": 63074 }, { "epoch": 2.1, "grad_norm": 0.6349923014640808, "learning_rate": 0.00012404261851252162, "loss": 1.7653, "step": 63075 }, { "epoch": 2.1, "grad_norm": 0.6287829875946045, "learning_rate": 0.00012403415312187697, "loss": 1.7549, "step": 63076 }, { "epoch": 2.1, "grad_norm": 0.609273374080658, "learning_rate": 0.00012402568794482903, "loss": 1.6989, "step": 63077 }, { "epoch": 2.1, "grad_norm": 0.6161729693412781, "learning_rate": 0.00012401722298138786, "loss": 1.7246, "step": 63078 }, { "epoch": 2.1, "grad_norm": 0.6443277597427368, "learning_rate": 0.00012400875823156398, "loss": 1.7613, "step": 63079 }, { "epoch": 2.1, "grad_norm": 0.6441117525100708, "learning_rate": 0.0001240002936953675, "loss": 1.6586, "step": 63080 }, { "epoch": 2.1, "grad_norm": 0.650904655456543, "learning_rate": 0.00012399182937280866, "loss": 1.7267, "step": 63081 }, { "epoch": 2.1, "grad_norm": 0.6323397755622864, "learning_rate": 0.0001239833652638979, "loss": 1.6993, "step": 63082 }, { "epoch": 2.1, "grad_norm": 0.6386244297027588, "learning_rate": 0.00012397490136864528, "loss": 1.6358, "step": 63083 }, { "epoch": 2.1, "grad_norm": 0.6294991970062256, "learning_rate": 0.00012396643768706133, "loss": 1.6738, "step": 63084 }, { "epoch": 2.1, "grad_norm": 0.6403635740280151, "learning_rate": 0.00012395797421915614, "loss": 1.6977, "step": 63085 }, { "epoch": 2.1, "grad_norm": 0.6175007224082947, "learning_rate": 0.00012394951096493997, "loss": 1.6896, "step": 63086 }, { "epoch": 2.1, "grad_norm": 0.629371166229248, "learning_rate": 0.00012394104792442324, "loss": 1.6536, "step": 63087 }, { "epoch": 2.1, "grad_norm": 0.6400466561317444, "learning_rate": 0.00012393258509761602, "loss": 1.7901, "step": 63088 }, { "epoch": 2.1, "grad_norm": 0.6410954594612122, "learning_rate": 0.00012392412248452884, "loss": 1.7106, "step": 63089 }, { "epoch": 2.1, "grad_norm": 0.6313101649284363, "learning_rate": 0.00012391566008517168, "loss": 1.7013, "step": 63090 }, { "epoch": 2.1, "grad_norm": 0.6401991248130798, "learning_rate": 0.00012390719789955512, "loss": 1.7158, "step": 63091 }, { "epoch": 2.1, "grad_norm": 0.6407359838485718, "learning_rate": 0.00012389873592768923, "loss": 1.7073, "step": 63092 }, { "epoch": 2.1, "grad_norm": 0.6228740811347961, "learning_rate": 0.00012389027416958426, "loss": 1.719, "step": 63093 }, { "epoch": 2.1, "grad_norm": 0.6530517935752869, "learning_rate": 0.00012388181262525054, "loss": 1.6681, "step": 63094 }, { "epoch": 2.1, "grad_norm": 0.658807635307312, "learning_rate": 0.00012387335129469856, "loss": 1.7451, "step": 63095 }, { "epoch": 2.1, "grad_norm": 0.630752444267273, "learning_rate": 0.00012386489017793815, "loss": 1.7105, "step": 63096 }, { "epoch": 2.1, "grad_norm": 0.6288892030715942, "learning_rate": 0.0001238564292749798, "loss": 1.6915, "step": 63097 }, { "epoch": 2.1, "grad_norm": 0.6167214512825012, "learning_rate": 0.00012384796858583392, "loss": 1.6097, "step": 63098 }, { "epoch": 2.1, "grad_norm": 0.6241296529769897, "learning_rate": 0.00012383950811051066, "loss": 1.6494, "step": 63099 }, { "epoch": 2.1, "grad_norm": 0.6108671426773071, "learning_rate": 0.00012383104784902016, "loss": 1.7132, "step": 63100 }, { "epoch": 2.1, "grad_norm": 0.6486726403236389, "learning_rate": 0.00012382258780137284, "loss": 1.6666, "step": 63101 }, { "epoch": 2.1, "grad_norm": 0.60415118932724, "learning_rate": 0.00012381412796757914, "loss": 1.7196, "step": 63102 }, { "epoch": 2.1, "grad_norm": 0.6317774653434753, "learning_rate": 0.0001238056683476489, "loss": 1.7137, "step": 63103 }, { "epoch": 2.1, "grad_norm": 0.6352390050888062, "learning_rate": 0.0001237972089415926, "loss": 1.6506, "step": 63104 }, { "epoch": 2.1, "grad_norm": 0.6182838678359985, "learning_rate": 0.00012378874974942067, "loss": 1.6688, "step": 63105 }, { "epoch": 2.1, "grad_norm": 0.6281622648239136, "learning_rate": 0.0001237802907711432, "loss": 1.7396, "step": 63106 }, { "epoch": 2.1, "grad_norm": 0.6381036639213562, "learning_rate": 0.00012377183200677042, "loss": 1.6869, "step": 63107 }, { "epoch": 2.1, "grad_norm": 0.6655173897743225, "learning_rate": 0.00012376337345631266, "loss": 1.7954, "step": 63108 }, { "epoch": 2.1, "grad_norm": 0.638462483882904, "learning_rate": 0.0001237549151197804, "loss": 1.8077, "step": 63109 }, { "epoch": 2.1, "grad_norm": 0.6327744722366333, "learning_rate": 0.0001237464569971835, "loss": 1.7231, "step": 63110 }, { "epoch": 2.1, "grad_norm": 0.6549596786499023, "learning_rate": 0.00012373799908853243, "loss": 1.7345, "step": 63111 }, { "epoch": 2.1, "grad_norm": 0.6495568156242371, "learning_rate": 0.00012372954139383758, "loss": 1.717, "step": 63112 }, { "epoch": 2.1, "grad_norm": 0.6346675157546997, "learning_rate": 0.00012372108391310905, "loss": 1.6048, "step": 63113 }, { "epoch": 2.1, "grad_norm": 0.64805668592453, "learning_rate": 0.00012371262664635706, "loss": 1.7525, "step": 63114 }, { "epoch": 2.1, "grad_norm": 0.614219069480896, "learning_rate": 0.00012370416959359197, "loss": 1.7008, "step": 63115 }, { "epoch": 2.1, "grad_norm": 0.6312708258628845, "learning_rate": 0.0001236957127548243, "loss": 1.7152, "step": 63116 }, { "epoch": 2.1, "grad_norm": 0.6382371783256531, "learning_rate": 0.00012368725613006378, "loss": 1.7024, "step": 63117 }, { "epoch": 2.1, "grad_norm": 0.6117929816246033, "learning_rate": 0.00012367879971932102, "loss": 1.7417, "step": 63118 }, { "epoch": 2.1, "grad_norm": 0.6331681609153748, "learning_rate": 0.00012367034352260626, "loss": 1.7562, "step": 63119 }, { "epoch": 2.1, "grad_norm": 0.6332924962043762, "learning_rate": 0.00012366188753992974, "loss": 1.703, "step": 63120 }, { "epoch": 2.1, "grad_norm": 0.6491342782974243, "learning_rate": 0.00012365343177130162, "loss": 1.7247, "step": 63121 }, { "epoch": 2.1, "grad_norm": 0.6368427276611328, "learning_rate": 0.00012364497621673233, "loss": 1.6653, "step": 63122 }, { "epoch": 2.1, "grad_norm": 0.6611168384552002, "learning_rate": 0.00012363652087623209, "loss": 1.7655, "step": 63123 }, { "epoch": 2.1, "grad_norm": 0.6368465423583984, "learning_rate": 0.00012362806574981098, "loss": 1.7445, "step": 63124 }, { "epoch": 2.1, "grad_norm": 0.6473009586334229, "learning_rate": 0.00012361961083747942, "loss": 1.7363, "step": 63125 }, { "epoch": 2.1, "grad_norm": 0.6394450664520264, "learning_rate": 0.0001236111561392478, "loss": 1.7757, "step": 63126 }, { "epoch": 2.1, "grad_norm": 0.6148187518119812, "learning_rate": 0.00012360270165512623, "loss": 1.685, "step": 63127 }, { "epoch": 2.1, "grad_norm": 0.6274693012237549, "learning_rate": 0.00012359424738512487, "loss": 1.7616, "step": 63128 }, { "epoch": 2.1, "grad_norm": 0.6323553919792175, "learning_rate": 0.00012358579332925424, "loss": 1.7221, "step": 63129 }, { "epoch": 2.1, "grad_norm": 0.6040836572647095, "learning_rate": 0.00012357733948752445, "loss": 1.7533, "step": 63130 }, { "epoch": 2.1, "grad_norm": 0.6473385095596313, "learning_rate": 0.00012356888585994563, "loss": 1.7672, "step": 63131 }, { "epoch": 2.1, "grad_norm": 0.638290524482727, "learning_rate": 0.0001235604324465283, "loss": 1.765, "step": 63132 }, { "epoch": 2.1, "grad_norm": 0.6390402913093567, "learning_rate": 0.00012355197924728253, "loss": 1.6175, "step": 63133 }, { "epoch": 2.1, "grad_norm": 0.6240751147270203, "learning_rate": 0.0001235435262622188, "loss": 1.6782, "step": 63134 }, { "epoch": 2.1, "grad_norm": 0.6261131763458252, "learning_rate": 0.00012353507349134707, "loss": 1.6666, "step": 63135 }, { "epoch": 2.1, "grad_norm": 0.6088261604309082, "learning_rate": 0.00012352662093467788, "loss": 1.7146, "step": 63136 }, { "epoch": 2.1, "grad_norm": 0.6321735382080078, "learning_rate": 0.00012351816859222138, "loss": 1.7186, "step": 63137 }, { "epoch": 2.1, "grad_norm": 0.6302587389945984, "learning_rate": 0.00012350971646398767, "loss": 1.703, "step": 63138 }, { "epoch": 2.1, "grad_norm": 0.6387844681739807, "learning_rate": 0.00012350126454998734, "loss": 1.7568, "step": 63139 }, { "epoch": 2.1, "grad_norm": 0.6474905610084534, "learning_rate": 0.00012349281285023027, "loss": 1.7829, "step": 63140 }, { "epoch": 2.1, "grad_norm": 0.6554089188575745, "learning_rate": 0.0001234843613647271, "loss": 1.6926, "step": 63141 }, { "epoch": 2.1, "grad_norm": 0.6445611119270325, "learning_rate": 0.00012347591009348788, "loss": 1.711, "step": 63142 }, { "epoch": 2.1, "grad_norm": 0.6335849761962891, "learning_rate": 0.00012346745903652278, "loss": 1.7101, "step": 63143 }, { "epoch": 2.1, "grad_norm": 0.6583284139633179, "learning_rate": 0.0001234590081938423, "loss": 1.722, "step": 63144 }, { "epoch": 2.1, "grad_norm": 0.6608732342720032, "learning_rate": 0.00012345055756545644, "loss": 1.7206, "step": 63145 }, { "epoch": 2.1, "grad_norm": 0.6350666880607605, "learning_rate": 0.00012344210715137575, "loss": 1.6956, "step": 63146 }, { "epoch": 2.1, "grad_norm": 0.6667432188987732, "learning_rate": 0.00012343365695161019, "loss": 1.6657, "step": 63147 }, { "epoch": 2.1, "grad_norm": 0.6301184296607971, "learning_rate": 0.00012342520696617025, "loss": 1.6407, "step": 63148 }, { "epoch": 2.1, "grad_norm": 0.6376528739929199, "learning_rate": 0.00012341675719506611, "loss": 1.6251, "step": 63149 }, { "epoch": 2.1, "grad_norm": 0.625020444393158, "learning_rate": 0.00012340830763830788, "loss": 1.6689, "step": 63150 }, { "epoch": 2.1, "grad_norm": 0.6236244440078735, "learning_rate": 0.00012339985829590606, "loss": 1.7404, "step": 63151 }, { "epoch": 2.1, "grad_norm": 0.652033269405365, "learning_rate": 0.0001233914091678708, "loss": 1.719, "step": 63152 }, { "epoch": 2.1, "grad_norm": 0.6373589038848877, "learning_rate": 0.00012338296025421222, "loss": 1.672, "step": 63153 }, { "epoch": 2.1, "grad_norm": 0.6197296977043152, "learning_rate": 0.00012337451155494073, "loss": 1.694, "step": 63154 }, { "epoch": 2.1, "grad_norm": 0.6506905555725098, "learning_rate": 0.00012336606307006664, "loss": 1.7117, "step": 63155 }, { "epoch": 2.1, "grad_norm": 0.6142472624778748, "learning_rate": 0.00012335761479960014, "loss": 1.6457, "step": 63156 }, { "epoch": 2.1, "grad_norm": 0.618624746799469, "learning_rate": 0.00012334916674355134, "loss": 1.6491, "step": 63157 }, { "epoch": 2.1, "grad_norm": 0.617720365524292, "learning_rate": 0.00012334071890193074, "loss": 1.7373, "step": 63158 }, { "epoch": 2.1, "grad_norm": 0.6321735978126526, "learning_rate": 0.00012333227127474852, "loss": 1.72, "step": 63159 }, { "epoch": 2.1, "grad_norm": 0.6388806104660034, "learning_rate": 0.0001233238238620147, "loss": 1.6391, "step": 63160 }, { "epoch": 2.1, "grad_norm": 0.6318081617355347, "learning_rate": 0.00012331537666373978, "loss": 1.6199, "step": 63161 }, { "epoch": 2.1, "grad_norm": 0.6343351006507874, "learning_rate": 0.00012330692967993422, "loss": 1.7159, "step": 63162 }, { "epoch": 2.1, "grad_norm": 0.6211177110671997, "learning_rate": 0.00012329848291060773, "loss": 1.7474, "step": 63163 }, { "epoch": 2.1, "grad_norm": 0.6282010674476624, "learning_rate": 0.00012329003635577087, "loss": 1.7251, "step": 63164 }, { "epoch": 2.1, "grad_norm": 0.6442350149154663, "learning_rate": 0.00012328159001543396, "loss": 1.7555, "step": 63165 }, { "epoch": 2.1, "grad_norm": 0.6189399361610413, "learning_rate": 0.00012327314388960715, "loss": 1.6837, "step": 63166 }, { "epoch": 2.1, "grad_norm": 0.6424304842948914, "learning_rate": 0.0001232646979783006, "loss": 1.7071, "step": 63167 }, { "epoch": 2.1, "grad_norm": 0.6149339079856873, "learning_rate": 0.00012325625228152472, "loss": 1.6231, "step": 63168 }, { "epoch": 2.1, "grad_norm": 0.6368152499198914, "learning_rate": 0.0001232478067992899, "loss": 1.6947, "step": 63169 }, { "epoch": 2.1, "grad_norm": 0.6189168095588684, "learning_rate": 0.00012323936153160595, "loss": 1.6579, "step": 63170 }, { "epoch": 2.1, "grad_norm": 0.6491994261741638, "learning_rate": 0.00012323091647848338, "loss": 1.7323, "step": 63171 }, { "epoch": 2.1, "grad_norm": 0.6142435073852539, "learning_rate": 0.00012322247163993257, "loss": 1.676, "step": 63172 }, { "epoch": 2.1, "grad_norm": 0.6223691701889038, "learning_rate": 0.0001232140270159636, "loss": 1.6481, "step": 63173 }, { "epoch": 2.1, "grad_norm": 0.6613650918006897, "learning_rate": 0.00012320558260658666, "loss": 1.7116, "step": 63174 }, { "epoch": 2.1, "grad_norm": 0.6456679701805115, "learning_rate": 0.00012319713841181212, "loss": 1.6803, "step": 63175 }, { "epoch": 2.1, "grad_norm": 0.6222097873687744, "learning_rate": 0.00012318869443165035, "loss": 1.7038, "step": 63176 }, { "epoch": 2.1, "grad_norm": 0.6221948862075806, "learning_rate": 0.0001231802506661113, "loss": 1.798, "step": 63177 }, { "epoch": 2.1, "grad_norm": 0.6242426037788391, "learning_rate": 0.00012317180711520536, "loss": 1.6505, "step": 63178 }, { "epoch": 2.1, "grad_norm": 0.6176607608795166, "learning_rate": 0.00012316336377894287, "loss": 1.715, "step": 63179 }, { "epoch": 2.1, "grad_norm": 0.620485246181488, "learning_rate": 0.000123154920657334, "loss": 1.6376, "step": 63180 }, { "epoch": 2.1, "grad_norm": 0.6358604431152344, "learning_rate": 0.00012314647775038895, "loss": 1.6838, "step": 63181 }, { "epoch": 2.1, "grad_norm": 0.5984359979629517, "learning_rate": 0.00012313803505811807, "loss": 1.6967, "step": 63182 }, { "epoch": 2.1, "grad_norm": 0.6432055830955505, "learning_rate": 0.00012312959258053158, "loss": 1.6129, "step": 63183 }, { "epoch": 2.1, "grad_norm": 0.6482066512107849, "learning_rate": 0.0001231211503176396, "loss": 1.702, "step": 63184 }, { "epoch": 2.1, "grad_norm": 0.6387411952018738, "learning_rate": 0.00012311270826945246, "loss": 1.6743, "step": 63185 }, { "epoch": 2.1, "grad_norm": 0.622284471988678, "learning_rate": 0.00012310426643598056, "loss": 1.7797, "step": 63186 }, { "epoch": 2.1, "grad_norm": 0.6344974040985107, "learning_rate": 0.000123095824817234, "loss": 1.7586, "step": 63187 }, { "epoch": 2.1, "grad_norm": 0.6663739085197449, "learning_rate": 0.00012308738341322293, "loss": 1.6316, "step": 63188 }, { "epoch": 2.1, "grad_norm": 0.6351519823074341, "learning_rate": 0.00012307894222395784, "loss": 1.6708, "step": 63189 }, { "epoch": 2.1, "grad_norm": 0.6136402487754822, "learning_rate": 0.0001230705012494488, "loss": 1.6653, "step": 63190 }, { "epoch": 2.1, "grad_norm": 0.6749123930931091, "learning_rate": 0.00012306206048970605, "loss": 1.7193, "step": 63191 }, { "epoch": 2.1, "grad_norm": 0.6221038699150085, "learning_rate": 0.00012305361994474, "loss": 1.6863, "step": 63192 }, { "epoch": 2.1, "grad_norm": 0.6646670699119568, "learning_rate": 0.00012304517961456062, "loss": 1.6432, "step": 63193 }, { "epoch": 2.1, "grad_norm": 0.6274534463882446, "learning_rate": 0.00012303673949917845, "loss": 1.6364, "step": 63194 }, { "epoch": 2.1, "grad_norm": 0.6452450156211853, "learning_rate": 0.0001230282995986035, "loss": 1.6909, "step": 63195 }, { "epoch": 2.1, "grad_norm": 0.6401393413543701, "learning_rate": 0.00012301985991284626, "loss": 1.7199, "step": 63196 }, { "epoch": 2.1, "grad_norm": 0.6142489314079285, "learning_rate": 0.0001230114204419168, "loss": 1.6523, "step": 63197 }, { "epoch": 2.1, "grad_norm": 0.629856526851654, "learning_rate": 0.00012300298118582526, "loss": 1.6868, "step": 63198 }, { "epoch": 2.1, "grad_norm": 0.6305352449417114, "learning_rate": 0.00012299454214458218, "loss": 1.705, "step": 63199 }, { "epoch": 2.1, "grad_norm": 0.6488809585571289, "learning_rate": 0.0001229861033181975, "loss": 1.7255, "step": 63200 }, { "epoch": 2.1, "grad_norm": 0.6365092396736145, "learning_rate": 0.00012297766470668176, "loss": 1.6759, "step": 63201 }, { "epoch": 2.1, "grad_norm": 0.6587585806846619, "learning_rate": 0.000122969226310045, "loss": 1.7207, "step": 63202 }, { "epoch": 2.1, "grad_norm": 0.6348106265068054, "learning_rate": 0.00012296078812829746, "loss": 1.744, "step": 63203 }, { "epoch": 2.1, "grad_norm": 2.771125555038452, "learning_rate": 0.00012295235016144952, "loss": 1.749, "step": 63204 }, { "epoch": 2.1, "grad_norm": 0.628106951713562, "learning_rate": 0.00012294391240951123, "loss": 1.6839, "step": 63205 }, { "epoch": 2.1, "grad_norm": 0.6509712338447571, "learning_rate": 0.00012293547487249304, "loss": 1.6985, "step": 63206 }, { "epoch": 2.1, "grad_norm": 0.6523686051368713, "learning_rate": 0.000122927037550405, "loss": 1.7472, "step": 63207 }, { "epoch": 2.1, "grad_norm": 0.6549367308616638, "learning_rate": 0.00012291860044325755, "loss": 1.7755, "step": 63208 }, { "epoch": 2.1, "grad_norm": 0.6455658674240112, "learning_rate": 0.00012291016355106082, "loss": 1.7228, "step": 63209 }, { "epoch": 2.1, "grad_norm": 0.6211928725242615, "learning_rate": 0.00012290172687382499, "loss": 1.6409, "step": 63210 }, { "epoch": 2.1, "grad_norm": 0.6235781908035278, "learning_rate": 0.00012289329041156034, "loss": 1.7427, "step": 63211 }, { "epoch": 2.1, "grad_norm": 0.6460360288619995, "learning_rate": 0.00012288485416427734, "loss": 1.6794, "step": 63212 }, { "epoch": 2.1, "grad_norm": 0.6157165169715881, "learning_rate": 0.00012287641813198583, "loss": 1.7765, "step": 63213 }, { "epoch": 2.1, "grad_norm": 0.6392773985862732, "learning_rate": 0.00012286798231469625, "loss": 1.7516, "step": 63214 }, { "epoch": 2.1, "grad_norm": 0.6340175271034241, "learning_rate": 0.00012285954671241895, "loss": 1.6924, "step": 63215 }, { "epoch": 2.1, "grad_norm": 0.6155986785888672, "learning_rate": 0.0001228511113251641, "loss": 1.6613, "step": 63216 }, { "epoch": 2.1, "grad_norm": 0.6565324068069458, "learning_rate": 0.0001228426761529417, "loss": 1.7356, "step": 63217 }, { "epoch": 2.1, "grad_norm": 0.6449633240699768, "learning_rate": 0.00012283424119576225, "loss": 1.7749, "step": 63218 }, { "epoch": 2.1, "grad_norm": 0.6151316165924072, "learning_rate": 0.00012282580645363612, "loss": 1.7624, "step": 63219 }, { "epoch": 2.1, "grad_norm": 0.6284418702125549, "learning_rate": 0.00012281737192657317, "loss": 1.6579, "step": 63220 }, { "epoch": 2.1, "grad_norm": 0.6209912300109863, "learning_rate": 0.0001228089376145838, "loss": 1.7147, "step": 63221 }, { "epoch": 2.1, "grad_norm": 0.6247105002403259, "learning_rate": 0.00012280050351767837, "loss": 1.7668, "step": 63222 }, { "epoch": 2.1, "grad_norm": 0.6318837404251099, "learning_rate": 0.00012279206963586705, "loss": 1.6646, "step": 63223 }, { "epoch": 2.1, "grad_norm": 0.6321563720703125, "learning_rate": 0.0001227836359691599, "loss": 1.6328, "step": 63224 }, { "epoch": 2.1, "grad_norm": 0.6429620981216431, "learning_rate": 0.00012277520251756735, "loss": 1.7708, "step": 63225 }, { "epoch": 2.1, "grad_norm": 0.6329768896102905, "learning_rate": 0.0001227667692810998, "loss": 1.6841, "step": 63226 }, { "epoch": 2.1, "grad_norm": 0.6252687573432922, "learning_rate": 0.00012275833625976704, "loss": 1.6435, "step": 63227 }, { "epoch": 2.1, "grad_norm": 0.6341965794563293, "learning_rate": 0.00012274990345357954, "loss": 1.6826, "step": 63228 }, { "epoch": 2.1, "grad_norm": 0.6480289697647095, "learning_rate": 0.00012274147086254765, "loss": 1.777, "step": 63229 }, { "epoch": 2.1, "grad_norm": 0.6282421350479126, "learning_rate": 0.0001227330384866815, "loss": 1.7163, "step": 63230 }, { "epoch": 2.1, "grad_norm": 0.658912181854248, "learning_rate": 0.00012272460632599123, "loss": 1.6996, "step": 63231 }, { "epoch": 2.1, "grad_norm": 0.6378553509712219, "learning_rate": 0.00012271617438048716, "loss": 1.6909, "step": 63232 }, { "epoch": 2.1, "grad_norm": 0.6217338442802429, "learning_rate": 0.00012270774265017977, "loss": 1.687, "step": 63233 }, { "epoch": 2.1, "grad_norm": 0.6316652894020081, "learning_rate": 0.00012269931113507879, "loss": 1.7247, "step": 63234 }, { "epoch": 2.1, "grad_norm": 0.6672648787498474, "learning_rate": 0.00012269087983519478, "loss": 1.7415, "step": 63235 }, { "epoch": 2.1, "grad_norm": 0.6357765197753906, "learning_rate": 0.00012268244875053797, "loss": 1.7613, "step": 63236 }, { "epoch": 2.1, "grad_norm": 0.6429575681686401, "learning_rate": 0.00012267401788111857, "loss": 1.7516, "step": 63237 }, { "epoch": 2.1, "grad_norm": 0.6291240453720093, "learning_rate": 0.00012266558722694667, "loss": 1.735, "step": 63238 }, { "epoch": 2.1, "grad_norm": 0.6419636607170105, "learning_rate": 0.00012265715678803273, "loss": 1.6798, "step": 63239 }, { "epoch": 2.1, "grad_norm": 0.6419161558151245, "learning_rate": 0.00012264872656438688, "loss": 1.6187, "step": 63240 }, { "epoch": 2.1, "grad_norm": 0.6281778216362, "learning_rate": 0.00012264029655601922, "loss": 1.7077, "step": 63241 }, { "epoch": 2.1, "grad_norm": 0.6357265710830688, "learning_rate": 0.00012263186676294015, "loss": 1.7314, "step": 63242 }, { "epoch": 2.1, "grad_norm": 0.6404989361763, "learning_rate": 0.0001226234371851599, "loss": 1.703, "step": 63243 }, { "epoch": 2.1, "grad_norm": 0.6354804635047913, "learning_rate": 0.0001226150078226887, "loss": 1.6626, "step": 63244 }, { "epoch": 2.1, "grad_norm": 0.6516255736351013, "learning_rate": 0.00012260657867553665, "loss": 1.6775, "step": 63245 }, { "epoch": 2.1, "grad_norm": 0.6150158643722534, "learning_rate": 0.0001225981497437141, "loss": 1.7413, "step": 63246 }, { "epoch": 2.1, "grad_norm": 0.6328858137130737, "learning_rate": 0.00012258972102723137, "loss": 1.7296, "step": 63247 }, { "epoch": 2.1, "grad_norm": 0.6349713802337646, "learning_rate": 0.0001225812925260984, "loss": 1.688, "step": 63248 }, { "epoch": 2.1, "grad_norm": 0.6352963447570801, "learning_rate": 0.00012257286424032573, "loss": 1.7086, "step": 63249 }, { "epoch": 2.1, "grad_norm": 0.6383205056190491, "learning_rate": 0.00012256443616992334, "loss": 1.7228, "step": 63250 }, { "epoch": 2.1, "grad_norm": 0.6328099370002747, "learning_rate": 0.00012255600831490173, "loss": 1.717, "step": 63251 }, { "epoch": 2.1, "grad_norm": 0.6114974021911621, "learning_rate": 0.00012254758067527084, "loss": 1.6822, "step": 63252 }, { "epoch": 2.1, "grad_norm": 0.626348078250885, "learning_rate": 0.00012253915325104113, "loss": 1.7007, "step": 63253 }, { "epoch": 2.1, "grad_norm": 0.642603874206543, "learning_rate": 0.0001225307260422228, "loss": 1.7659, "step": 63254 }, { "epoch": 2.1, "grad_norm": 0.6343302130699158, "learning_rate": 0.00012252229904882587, "loss": 1.7006, "step": 63255 }, { "epoch": 2.1, "grad_norm": 0.6323164701461792, "learning_rate": 0.0001225138722708608, "loss": 1.6378, "step": 63256 }, { "epoch": 2.1, "grad_norm": 0.6305569410324097, "learning_rate": 0.00012250544570833766, "loss": 1.6899, "step": 63257 }, { "epoch": 2.1, "grad_norm": 0.6630139350891113, "learning_rate": 0.00012249701936126688, "loss": 1.7596, "step": 63258 }, { "epoch": 2.1, "grad_norm": 0.6217658519744873, "learning_rate": 0.00012248859322965854, "loss": 1.6824, "step": 63259 }, { "epoch": 2.1, "grad_norm": 0.6638656258583069, "learning_rate": 0.0001224801673135228, "loss": 1.6839, "step": 63260 }, { "epoch": 2.1, "grad_norm": 0.6373670697212219, "learning_rate": 0.00012247174161287007, "loss": 1.6437, "step": 63261 }, { "epoch": 2.1, "grad_norm": 0.6607341170310974, "learning_rate": 0.0001224633161277104, "loss": 1.8025, "step": 63262 }, { "epoch": 2.1, "grad_norm": 0.6790103316307068, "learning_rate": 0.0001224548908580542, "loss": 1.7254, "step": 63263 }, { "epoch": 2.1, "grad_norm": 0.6465797424316406, "learning_rate": 0.00012244646580391151, "loss": 1.7085, "step": 63264 }, { "epoch": 2.1, "grad_norm": 0.6621467471122742, "learning_rate": 0.00012243804096529273, "loss": 1.6458, "step": 63265 }, { "epoch": 2.1, "grad_norm": 0.616553783416748, "learning_rate": 0.00012242961634220803, "loss": 1.7342, "step": 63266 }, { "epoch": 2.1, "grad_norm": 0.6555047631263733, "learning_rate": 0.00012242119193466749, "loss": 1.7842, "step": 63267 }, { "epoch": 2.1, "grad_norm": 0.6289803385734558, "learning_rate": 0.00012241276774268157, "loss": 1.7275, "step": 63268 }, { "epoch": 2.1, "grad_norm": 0.6126735806465149, "learning_rate": 0.00012240434376626041, "loss": 1.7021, "step": 63269 }, { "epoch": 2.11, "grad_norm": 0.6197786927223206, "learning_rate": 0.00012239592000541408, "loss": 1.7486, "step": 63270 }, { "epoch": 2.11, "grad_norm": 0.6298990249633789, "learning_rate": 0.00012238749646015294, "loss": 1.654, "step": 63271 }, { "epoch": 2.11, "grad_norm": 0.6225448250770569, "learning_rate": 0.0001223790731304873, "loss": 1.7321, "step": 63272 }, { "epoch": 2.11, "grad_norm": 0.6470564603805542, "learning_rate": 0.00012237065001642728, "loss": 1.6801, "step": 63273 }, { "epoch": 2.11, "grad_norm": 0.640082836151123, "learning_rate": 0.00012236222711798308, "loss": 1.7586, "step": 63274 }, { "epoch": 2.11, "grad_norm": 0.6183261275291443, "learning_rate": 0.000122353804435165, "loss": 1.6412, "step": 63275 }, { "epoch": 2.11, "grad_norm": 0.6538365483283997, "learning_rate": 0.00012234538196798325, "loss": 1.7333, "step": 63276 }, { "epoch": 2.11, "grad_norm": 0.63584303855896, "learning_rate": 0.00012233695971644793, "loss": 1.739, "step": 63277 }, { "epoch": 2.11, "grad_norm": 0.6301296353340149, "learning_rate": 0.00012232853768056936, "loss": 1.773, "step": 63278 }, { "epoch": 2.11, "grad_norm": 0.6374945044517517, "learning_rate": 0.000122320115860358, "loss": 1.63, "step": 63279 }, { "epoch": 2.11, "grad_norm": 0.617927074432373, "learning_rate": 0.00012231169425582357, "loss": 1.6372, "step": 63280 }, { "epoch": 2.11, "grad_norm": 0.6359766721725464, "learning_rate": 0.0001223032728669766, "loss": 1.7868, "step": 63281 }, { "epoch": 2.11, "grad_norm": 0.6282639503479004, "learning_rate": 0.00012229485169382737, "loss": 1.7078, "step": 63282 }, { "epoch": 2.11, "grad_norm": 0.6417881846427917, "learning_rate": 0.000122286430736386, "loss": 1.6511, "step": 63283 }, { "epoch": 2.11, "grad_norm": 0.6201448440551758, "learning_rate": 0.00012227800999466262, "loss": 1.659, "step": 63284 }, { "epoch": 2.11, "grad_norm": 0.643669605255127, "learning_rate": 0.00012226958946866758, "loss": 1.7387, "step": 63285 }, { "epoch": 2.11, "grad_norm": 0.646815836429596, "learning_rate": 0.00012226116915841126, "loss": 1.8127, "step": 63286 }, { "epoch": 2.11, "grad_norm": 0.6363437175750732, "learning_rate": 0.00012225274906390346, "loss": 1.6203, "step": 63287 }, { "epoch": 2.11, "grad_norm": 0.643412709236145, "learning_rate": 0.00012224432918515467, "loss": 1.7898, "step": 63288 }, { "epoch": 2.11, "grad_norm": 0.6338469982147217, "learning_rate": 0.00012223590952217516, "loss": 1.6786, "step": 63289 }, { "epoch": 2.11, "grad_norm": 0.6327105760574341, "learning_rate": 0.00012222749007497507, "loss": 1.7077, "step": 63290 }, { "epoch": 2.11, "grad_norm": 0.6132076382637024, "learning_rate": 0.00012221907084356448, "loss": 1.7736, "step": 63291 }, { "epoch": 2.11, "grad_norm": 0.6516522169113159, "learning_rate": 0.00012221065182795377, "loss": 1.7001, "step": 63292 }, { "epoch": 2.11, "grad_norm": 0.6452181339263916, "learning_rate": 0.00012220223302815335, "loss": 1.7301, "step": 63293 }, { "epoch": 2.11, "grad_norm": 0.6407424211502075, "learning_rate": 0.00012219381444417298, "loss": 1.7182, "step": 63294 }, { "epoch": 2.11, "grad_norm": 0.6408933401107788, "learning_rate": 0.00012218539607602313, "loss": 1.7484, "step": 63295 }, { "epoch": 2.11, "grad_norm": 0.6244024634361267, "learning_rate": 0.00012217697792371414, "loss": 1.6856, "step": 63296 }, { "epoch": 2.11, "grad_norm": 0.6529175639152527, "learning_rate": 0.00012216855998725608, "loss": 1.6907, "step": 63297 }, { "epoch": 2.11, "grad_norm": 0.600578784942627, "learning_rate": 0.00012216014226665907, "loss": 1.6308, "step": 63298 }, { "epoch": 2.11, "grad_norm": 0.6140075922012329, "learning_rate": 0.00012215172476193358, "loss": 1.7071, "step": 63299 }, { "epoch": 2.11, "grad_norm": 0.6329989433288574, "learning_rate": 0.00012214330747308967, "loss": 1.7349, "step": 63300 }, { "epoch": 2.11, "grad_norm": 0.617726743221283, "learning_rate": 0.00012213489040013747, "loss": 1.7058, "step": 63301 }, { "epoch": 2.11, "grad_norm": 0.6285961270332336, "learning_rate": 0.0001221264735430873, "loss": 1.7191, "step": 63302 }, { "epoch": 2.11, "grad_norm": 0.6450424790382385, "learning_rate": 0.00012211805690194954, "loss": 1.6966, "step": 63303 }, { "epoch": 2.11, "grad_norm": 0.6619349718093872, "learning_rate": 0.0001221096404767342, "loss": 1.6117, "step": 63304 }, { "epoch": 2.11, "grad_norm": 0.6593354940414429, "learning_rate": 0.00012210122426745145, "loss": 1.7551, "step": 63305 }, { "epoch": 2.11, "grad_norm": 0.6736496686935425, "learning_rate": 0.0001220928082741117, "loss": 1.755, "step": 63306 }, { "epoch": 2.11, "grad_norm": 0.6430701613426208, "learning_rate": 0.00012208439249672507, "loss": 1.7373, "step": 63307 }, { "epoch": 2.11, "grad_norm": 0.6515089869499207, "learning_rate": 0.0001220759769353017, "loss": 1.7001, "step": 63308 }, { "epoch": 2.11, "grad_norm": 0.6755569577217102, "learning_rate": 0.00012206756158985194, "loss": 1.7741, "step": 63309 }, { "epoch": 2.11, "grad_norm": 0.6338649988174438, "learning_rate": 0.00012205914646038584, "loss": 1.6911, "step": 63310 }, { "epoch": 2.11, "grad_norm": 0.6242109537124634, "learning_rate": 0.00012205073154691385, "loss": 1.7336, "step": 63311 }, { "epoch": 2.11, "grad_norm": 0.635665237903595, "learning_rate": 0.00012204231684944593, "loss": 1.7196, "step": 63312 }, { "epoch": 2.11, "grad_norm": 0.6206697821617126, "learning_rate": 0.00012203390236799252, "loss": 1.7023, "step": 63313 }, { "epoch": 2.11, "grad_norm": 0.6560699343681335, "learning_rate": 0.00012202548810256374, "loss": 1.7047, "step": 63314 }, { "epoch": 2.11, "grad_norm": 0.6422571539878845, "learning_rate": 0.00012201707405316968, "loss": 1.7201, "step": 63315 }, { "epoch": 2.11, "grad_norm": 0.6425846815109253, "learning_rate": 0.00012200866021982078, "loss": 1.7149, "step": 63316 }, { "epoch": 2.11, "grad_norm": 0.6265373229980469, "learning_rate": 0.00012200024660252704, "loss": 1.636, "step": 63317 }, { "epoch": 2.11, "grad_norm": 0.6515790820121765, "learning_rate": 0.00012199183320129887, "loss": 1.6726, "step": 63318 }, { "epoch": 2.11, "grad_norm": 0.6464956998825073, "learning_rate": 0.00012198342001614639, "loss": 1.6768, "step": 63319 }, { "epoch": 2.11, "grad_norm": 0.641947865486145, "learning_rate": 0.0001219750070470797, "loss": 1.721, "step": 63320 }, { "epoch": 2.11, "grad_norm": 0.6278810501098633, "learning_rate": 0.00012196659429410911, "loss": 1.7281, "step": 63321 }, { "epoch": 2.11, "grad_norm": 0.6475393772125244, "learning_rate": 0.00012195818175724496, "loss": 1.7344, "step": 63322 }, { "epoch": 2.11, "grad_norm": 0.6925879120826721, "learning_rate": 0.00012194976943649735, "loss": 1.7334, "step": 63323 }, { "epoch": 2.11, "grad_norm": 0.6201292276382446, "learning_rate": 0.00012194135733187637, "loss": 1.6325, "step": 63324 }, { "epoch": 2.11, "grad_norm": 0.6329718232154846, "learning_rate": 0.00012193294544339248, "loss": 1.6845, "step": 63325 }, { "epoch": 2.11, "grad_norm": 0.6135035157203674, "learning_rate": 0.0001219245337710557, "loss": 1.7121, "step": 63326 }, { "epoch": 2.11, "grad_norm": 0.6366643905639648, "learning_rate": 0.00012191612231487625, "loss": 1.7627, "step": 63327 }, { "epoch": 2.11, "grad_norm": 0.6448333263397217, "learning_rate": 0.00012190771107486435, "loss": 1.6908, "step": 63328 }, { "epoch": 2.11, "grad_norm": 0.6219983696937561, "learning_rate": 0.00012189930005103047, "loss": 1.7131, "step": 63329 }, { "epoch": 2.11, "grad_norm": 0.6570113301277161, "learning_rate": 0.00012189088924338439, "loss": 1.7167, "step": 63330 }, { "epoch": 2.11, "grad_norm": 0.6331431865692139, "learning_rate": 0.00012188247865193652, "loss": 1.7077, "step": 63331 }, { "epoch": 2.11, "grad_norm": 0.6252874732017517, "learning_rate": 0.00012187406827669721, "loss": 1.6715, "step": 63332 }, { "epoch": 2.11, "grad_norm": 0.6541569828987122, "learning_rate": 0.00012186565811767651, "loss": 1.7789, "step": 63333 }, { "epoch": 2.11, "grad_norm": 0.6681420207023621, "learning_rate": 0.00012185724817488454, "loss": 1.6709, "step": 63334 }, { "epoch": 2.11, "grad_norm": 0.6762945055961609, "learning_rate": 0.00012184883844833164, "loss": 1.7872, "step": 63335 }, { "epoch": 2.11, "grad_norm": 0.6456632018089294, "learning_rate": 0.00012184042893802823, "loss": 1.6809, "step": 63336 }, { "epoch": 2.11, "grad_norm": 0.6356169581413269, "learning_rate": 0.00012183201964398405, "loss": 1.6831, "step": 63337 }, { "epoch": 2.11, "grad_norm": 0.6691874265670776, "learning_rate": 0.00012182361056620955, "loss": 1.6706, "step": 63338 }, { "epoch": 2.11, "grad_norm": 0.628934919834137, "learning_rate": 0.00012181520170471502, "loss": 1.6527, "step": 63339 }, { "epoch": 2.11, "grad_norm": 0.6426869034767151, "learning_rate": 0.0001218067930595106, "loss": 1.7102, "step": 63340 }, { "epoch": 2.11, "grad_norm": 0.6358240246772766, "learning_rate": 0.00012179838463060638, "loss": 1.756, "step": 63341 }, { "epoch": 2.11, "grad_norm": 0.6315943002700806, "learning_rate": 0.00012178997641801267, "loss": 1.6316, "step": 63342 }, { "epoch": 2.11, "grad_norm": 0.6476730108261108, "learning_rate": 0.00012178156842173984, "loss": 1.6964, "step": 63343 }, { "epoch": 2.11, "grad_norm": 0.640336811542511, "learning_rate": 0.00012177316064179778, "loss": 1.7823, "step": 63344 }, { "epoch": 2.11, "grad_norm": 0.6645477414131165, "learning_rate": 0.0001217647530781968, "loss": 1.7993, "step": 63345 }, { "epoch": 2.11, "grad_norm": 0.6383113265037537, "learning_rate": 0.00012175634573094725, "loss": 1.6923, "step": 63346 }, { "epoch": 2.11, "grad_norm": 0.6570906043052673, "learning_rate": 0.00012174793860005926, "loss": 1.7523, "step": 63347 }, { "epoch": 2.11, "grad_norm": 0.6356687545776367, "learning_rate": 0.00012173953168554289, "loss": 1.7633, "step": 63348 }, { "epoch": 2.11, "grad_norm": 0.6460525989532471, "learning_rate": 0.00012173112498740857, "loss": 1.7078, "step": 63349 }, { "epoch": 2.11, "grad_norm": 0.6406911015510559, "learning_rate": 0.00012172271850566638, "loss": 1.7404, "step": 63350 }, { "epoch": 2.11, "grad_norm": 0.6398073434829712, "learning_rate": 0.00012171431224032646, "loss": 1.6979, "step": 63351 }, { "epoch": 2.11, "grad_norm": 0.638252317905426, "learning_rate": 0.00012170590619139906, "loss": 1.7426, "step": 63352 }, { "epoch": 2.11, "grad_norm": 0.6598655581474304, "learning_rate": 0.00012169750035889457, "loss": 1.7185, "step": 63353 }, { "epoch": 2.11, "grad_norm": 0.6307492256164551, "learning_rate": 0.00012168909474282302, "loss": 1.6283, "step": 63354 }, { "epoch": 2.11, "grad_norm": 0.6613825559616089, "learning_rate": 0.00012168068934319453, "loss": 1.7124, "step": 63355 }, { "epoch": 2.11, "grad_norm": 0.622877299785614, "learning_rate": 0.00012167228416001951, "loss": 1.7162, "step": 63356 }, { "epoch": 2.11, "grad_norm": 0.6129649877548218, "learning_rate": 0.00012166387919330809, "loss": 1.6456, "step": 63357 }, { "epoch": 2.11, "grad_norm": 0.634272038936615, "learning_rate": 0.00012165547444307031, "loss": 1.7081, "step": 63358 }, { "epoch": 2.11, "grad_norm": 0.6436749696731567, "learning_rate": 0.00012164706990931653, "loss": 1.6855, "step": 63359 }, { "epoch": 2.11, "grad_norm": 0.6830742955207825, "learning_rate": 0.000121638665592057, "loss": 1.7034, "step": 63360 }, { "epoch": 2.11, "grad_norm": 0.6799157857894897, "learning_rate": 0.00012163026149130186, "loss": 1.7177, "step": 63361 }, { "epoch": 2.11, "grad_norm": 0.6432413458824158, "learning_rate": 0.00012162185760706122, "loss": 1.7438, "step": 63362 }, { "epoch": 2.11, "grad_norm": 0.611774742603302, "learning_rate": 0.00012161345393934545, "loss": 1.7488, "step": 63363 }, { "epoch": 2.11, "grad_norm": 0.6444394588470459, "learning_rate": 0.00012160505048816467, "loss": 1.722, "step": 63364 }, { "epoch": 2.11, "grad_norm": 0.6257690787315369, "learning_rate": 0.00012159664725352896, "loss": 1.6633, "step": 63365 }, { "epoch": 2.11, "grad_norm": 0.645636260509491, "learning_rate": 0.00012158824423544873, "loss": 1.696, "step": 63366 }, { "epoch": 2.11, "grad_norm": 0.6447984576225281, "learning_rate": 0.00012157984143393401, "loss": 1.6633, "step": 63367 }, { "epoch": 2.11, "grad_norm": 0.6203197240829468, "learning_rate": 0.00012157143884899514, "loss": 1.6906, "step": 63368 }, { "epoch": 2.11, "grad_norm": 0.6550727486610413, "learning_rate": 0.00012156303648064218, "loss": 1.758, "step": 63369 }, { "epoch": 2.11, "grad_norm": 0.649368166923523, "learning_rate": 0.0001215546343288855, "loss": 1.7996, "step": 63370 }, { "epoch": 2.11, "grad_norm": 0.6288108229637146, "learning_rate": 0.00012154623239373519, "loss": 1.705, "step": 63371 }, { "epoch": 2.11, "grad_norm": 0.6283801198005676, "learning_rate": 0.00012153783067520137, "loss": 1.7005, "step": 63372 }, { "epoch": 2.11, "grad_norm": 0.6451046466827393, "learning_rate": 0.00012152942917329439, "loss": 1.7145, "step": 63373 }, { "epoch": 2.11, "grad_norm": 0.6403344869613647, "learning_rate": 0.00012152102788802432, "loss": 1.6694, "step": 63374 }, { "epoch": 2.11, "grad_norm": 0.6343215703964233, "learning_rate": 0.00012151262681940153, "loss": 1.661, "step": 63375 }, { "epoch": 2.11, "grad_norm": 0.6591005325317383, "learning_rate": 0.00012150422596743612, "loss": 1.7087, "step": 63376 }, { "epoch": 2.11, "grad_norm": 0.6257131099700928, "learning_rate": 0.00012149582533213816, "loss": 1.7532, "step": 63377 }, { "epoch": 2.11, "grad_norm": 0.644001841545105, "learning_rate": 0.00012148742491351808, "loss": 1.7167, "step": 63378 }, { "epoch": 2.11, "grad_norm": 0.623008668422699, "learning_rate": 0.00012147902471158584, "loss": 1.7347, "step": 63379 }, { "epoch": 2.11, "grad_norm": 0.6098794937133789, "learning_rate": 0.00012147062472635188, "loss": 1.6233, "step": 63380 }, { "epoch": 2.11, "grad_norm": 0.6487095355987549, "learning_rate": 0.00012146222495782615, "loss": 1.7032, "step": 63381 }, { "epoch": 2.11, "grad_norm": 0.6595009565353394, "learning_rate": 0.00012145382540601912, "loss": 1.6965, "step": 63382 }, { "epoch": 2.11, "grad_norm": 0.6317155361175537, "learning_rate": 0.0001214454260709408, "loss": 1.6417, "step": 63383 }, { "epoch": 2.11, "grad_norm": 0.6575537919998169, "learning_rate": 0.00012143702695260133, "loss": 1.6623, "step": 63384 }, { "epoch": 2.11, "grad_norm": 0.6304742693901062, "learning_rate": 0.00012142862805101112, "loss": 1.6972, "step": 63385 }, { "epoch": 2.11, "grad_norm": 0.6221243739128113, "learning_rate": 0.00012142022936618022, "loss": 1.7552, "step": 63386 }, { "epoch": 2.11, "grad_norm": 0.6325609683990479, "learning_rate": 0.00012141183089811876, "loss": 1.6595, "step": 63387 }, { "epoch": 2.11, "grad_norm": 0.6071624159812927, "learning_rate": 0.00012140343264683702, "loss": 1.685, "step": 63388 }, { "epoch": 2.11, "grad_norm": 0.6562658548355103, "learning_rate": 0.0001213950346123453, "loss": 1.6746, "step": 63389 }, { "epoch": 2.11, "grad_norm": 0.6511408090591431, "learning_rate": 0.00012138663679465369, "loss": 1.6889, "step": 63390 }, { "epoch": 2.11, "grad_norm": 0.6652127504348755, "learning_rate": 0.00012137823919377228, "loss": 1.6454, "step": 63391 }, { "epoch": 2.11, "grad_norm": 0.6491014957427979, "learning_rate": 0.00012136984180971148, "loss": 1.7502, "step": 63392 }, { "epoch": 2.11, "grad_norm": 0.6153392195701599, "learning_rate": 0.00012136144464248138, "loss": 1.6985, "step": 63393 }, { "epoch": 2.11, "grad_norm": 0.6313032507896423, "learning_rate": 0.00012135304769209204, "loss": 1.7284, "step": 63394 }, { "epoch": 2.11, "grad_norm": 0.6459189057350159, "learning_rate": 0.00012134465095855381, "loss": 1.7066, "step": 63395 }, { "epoch": 2.11, "grad_norm": 0.6528508067131042, "learning_rate": 0.00012133625444187702, "loss": 1.6858, "step": 63396 }, { "epoch": 2.11, "grad_norm": 0.6865084767341614, "learning_rate": 0.0001213278581420715, "loss": 1.5844, "step": 63397 }, { "epoch": 2.11, "grad_norm": 0.627702534198761, "learning_rate": 0.00012131946205914765, "loss": 1.6554, "step": 63398 }, { "epoch": 2.11, "grad_norm": 0.6332451105117798, "learning_rate": 0.00012131106619311572, "loss": 1.7021, "step": 63399 }, { "epoch": 2.11, "grad_norm": 0.6135438084602356, "learning_rate": 0.00012130267054398587, "loss": 1.6505, "step": 63400 }, { "epoch": 2.11, "grad_norm": 0.6430573463439941, "learning_rate": 0.00012129427511176812, "loss": 1.76, "step": 63401 }, { "epoch": 2.11, "grad_norm": 0.6610058546066284, "learning_rate": 0.00012128587989647277, "loss": 1.6835, "step": 63402 }, { "epoch": 2.11, "grad_norm": 0.6355692148208618, "learning_rate": 0.00012127748489811023, "loss": 1.7343, "step": 63403 }, { "epoch": 2.11, "grad_norm": 0.6409355998039246, "learning_rate": 0.00012126909011669032, "loss": 1.6763, "step": 63404 }, { "epoch": 2.11, "grad_norm": 0.6304514408111572, "learning_rate": 0.00012126069555222338, "loss": 1.6553, "step": 63405 }, { "epoch": 2.11, "grad_norm": 0.6283400058746338, "learning_rate": 0.00012125230120471973, "loss": 1.7033, "step": 63406 }, { "epoch": 2.11, "grad_norm": 0.6295274496078491, "learning_rate": 0.00012124390707418943, "loss": 1.7357, "step": 63407 }, { "epoch": 2.11, "grad_norm": 0.65384441614151, "learning_rate": 0.0001212355131606426, "loss": 1.7703, "step": 63408 }, { "epoch": 2.11, "grad_norm": 0.6141164898872375, "learning_rate": 0.00012122711946408952, "loss": 1.6631, "step": 63409 }, { "epoch": 2.11, "grad_norm": 0.6644152402877808, "learning_rate": 0.0001212187259845406, "loss": 1.7337, "step": 63410 }, { "epoch": 2.11, "grad_norm": 0.6326597929000854, "learning_rate": 0.00012121033272200557, "loss": 1.6918, "step": 63411 }, { "epoch": 2.11, "grad_norm": 0.6514625549316406, "learning_rate": 0.00012120193967649488, "loss": 1.7316, "step": 63412 }, { "epoch": 2.11, "grad_norm": 0.6430734992027283, "learning_rate": 0.00012119354684801879, "loss": 1.7203, "step": 63413 }, { "epoch": 2.11, "grad_norm": 0.6456831097602844, "learning_rate": 0.00012118515423658737, "loss": 1.5978, "step": 63414 }, { "epoch": 2.11, "grad_norm": 0.6680451035499573, "learning_rate": 0.00012117676184221073, "loss": 1.6781, "step": 63415 }, { "epoch": 2.11, "grad_norm": 0.6160480976104736, "learning_rate": 0.00012116836966489924, "loss": 1.6646, "step": 63416 }, { "epoch": 2.11, "grad_norm": 0.6476497650146484, "learning_rate": 0.00012115997770466302, "loss": 1.6974, "step": 63417 }, { "epoch": 2.11, "grad_norm": 0.633927047252655, "learning_rate": 0.00012115158596151212, "loss": 1.7884, "step": 63418 }, { "epoch": 2.11, "grad_norm": 0.6222127676010132, "learning_rate": 0.00012114319443545682, "loss": 1.7252, "step": 63419 }, { "epoch": 2.11, "grad_norm": 0.6616502404212952, "learning_rate": 0.0001211348031265075, "loss": 1.6828, "step": 63420 }, { "epoch": 2.11, "grad_norm": 0.6043034195899963, "learning_rate": 0.00012112641203467413, "loss": 1.6968, "step": 63421 }, { "epoch": 2.11, "grad_norm": 0.6139912605285645, "learning_rate": 0.00012111802115996683, "loss": 1.6699, "step": 63422 }, { "epoch": 2.11, "grad_norm": 0.6223362684249878, "learning_rate": 0.00012110963050239599, "loss": 1.6473, "step": 63423 }, { "epoch": 2.11, "grad_norm": 0.6480838060379028, "learning_rate": 0.00012110124006197174, "loss": 1.7608, "step": 63424 }, { "epoch": 2.11, "grad_norm": 0.6053759455680847, "learning_rate": 0.0001210928498387041, "loss": 1.7147, "step": 63425 }, { "epoch": 2.11, "grad_norm": 0.6249138116836548, "learning_rate": 0.0001210844598326035, "loss": 1.7016, "step": 63426 }, { "epoch": 2.11, "grad_norm": 0.6699004173278809, "learning_rate": 0.00012107607004367987, "loss": 1.7395, "step": 63427 }, { "epoch": 2.11, "grad_norm": 0.6278579235076904, "learning_rate": 0.00012106768047194366, "loss": 1.6746, "step": 63428 }, { "epoch": 2.11, "grad_norm": 0.6140955686569214, "learning_rate": 0.0001210592911174048, "loss": 1.7624, "step": 63429 }, { "epoch": 2.11, "grad_norm": 0.6578510403633118, "learning_rate": 0.00012105090198007368, "loss": 1.7856, "step": 63430 }, { "epoch": 2.11, "grad_norm": 0.6691341400146484, "learning_rate": 0.00012104251305996044, "loss": 1.7517, "step": 63431 }, { "epoch": 2.11, "grad_norm": 0.636782705783844, "learning_rate": 0.00012103412435707507, "loss": 1.7208, "step": 63432 }, { "epoch": 2.11, "grad_norm": 0.6436432003974915, "learning_rate": 0.00012102573587142804, "loss": 1.6433, "step": 63433 }, { "epoch": 2.11, "grad_norm": 0.6341239809989929, "learning_rate": 0.0001210173476030293, "loss": 1.7063, "step": 63434 }, { "epoch": 2.11, "grad_norm": 0.6402558088302612, "learning_rate": 0.00012100895955188922, "loss": 1.7127, "step": 63435 }, { "epoch": 2.11, "grad_norm": 0.6299099922180176, "learning_rate": 0.00012100057171801789, "loss": 1.7526, "step": 63436 }, { "epoch": 2.11, "grad_norm": 0.6300966143608093, "learning_rate": 0.00012099218410142537, "loss": 1.7383, "step": 63437 }, { "epoch": 2.11, "grad_norm": 0.6131842732429504, "learning_rate": 0.00012098379670212198, "loss": 1.6447, "step": 63438 }, { "epoch": 2.11, "grad_norm": 0.6360828280448914, "learning_rate": 0.00012097540952011801, "loss": 1.7465, "step": 63439 }, { "epoch": 2.11, "grad_norm": 0.6278889775276184, "learning_rate": 0.0001209670225554235, "loss": 1.6921, "step": 63440 }, { "epoch": 2.11, "grad_norm": 0.627187192440033, "learning_rate": 0.00012095863580804852, "loss": 1.6745, "step": 63441 }, { "epoch": 2.11, "grad_norm": 0.6421858072280884, "learning_rate": 0.0001209502492780035, "loss": 1.7522, "step": 63442 }, { "epoch": 2.11, "grad_norm": 0.6352816820144653, "learning_rate": 0.00012094186296529848, "loss": 1.7512, "step": 63443 }, { "epoch": 2.11, "grad_norm": 0.6301189661026001, "learning_rate": 0.00012093347686994357, "loss": 1.6943, "step": 63444 }, { "epoch": 2.11, "grad_norm": 0.6203352212905884, "learning_rate": 0.00012092509099194904, "loss": 1.7072, "step": 63445 }, { "epoch": 2.11, "grad_norm": 0.6170910000801086, "learning_rate": 0.00012091670533132525, "loss": 1.725, "step": 63446 }, { "epoch": 2.11, "grad_norm": 0.6139204502105713, "learning_rate": 0.00012090831988808197, "loss": 1.7491, "step": 63447 }, { "epoch": 2.11, "grad_norm": 0.603937566280365, "learning_rate": 0.00012089993466222963, "loss": 1.6751, "step": 63448 }, { "epoch": 2.11, "grad_norm": 0.6494714021682739, "learning_rate": 0.0001208915496537785, "loss": 1.7002, "step": 63449 }, { "epoch": 2.11, "grad_norm": 0.6321950554847717, "learning_rate": 0.00012088316486273862, "loss": 1.668, "step": 63450 }, { "epoch": 2.11, "grad_norm": 0.6519387364387512, "learning_rate": 0.00012087478028912008, "loss": 1.6175, "step": 63451 }, { "epoch": 2.11, "grad_norm": 0.6527784466743469, "learning_rate": 0.00012086639593293315, "loss": 1.7338, "step": 63452 }, { "epoch": 2.11, "grad_norm": 0.6468055844306946, "learning_rate": 0.00012085801179418825, "loss": 1.7686, "step": 63453 }, { "epoch": 2.11, "grad_norm": 0.6274418830871582, "learning_rate": 0.00012084962787289511, "loss": 1.6367, "step": 63454 }, { "epoch": 2.11, "grad_norm": 0.6264486312866211, "learning_rate": 0.00012084124416906411, "loss": 1.7053, "step": 63455 }, { "epoch": 2.11, "grad_norm": 0.6518028974533081, "learning_rate": 0.00012083286068270559, "loss": 1.7246, "step": 63456 }, { "epoch": 2.11, "grad_norm": 0.6495382785797119, "learning_rate": 0.00012082447741382953, "loss": 1.6924, "step": 63457 }, { "epoch": 2.11, "grad_norm": 0.644410252571106, "learning_rate": 0.00012081609436244607, "loss": 1.776, "step": 63458 }, { "epoch": 2.11, "grad_norm": 0.6719818711280823, "learning_rate": 0.00012080771152856549, "loss": 1.7686, "step": 63459 }, { "epoch": 2.11, "grad_norm": 0.6430016160011292, "learning_rate": 0.00012079932891219814, "loss": 1.7042, "step": 63460 }, { "epoch": 2.11, "grad_norm": 0.6490839123725891, "learning_rate": 0.00012079094651335377, "loss": 1.6434, "step": 63461 }, { "epoch": 2.11, "grad_norm": 0.6253182291984558, "learning_rate": 0.00012078256433204282, "loss": 1.6769, "step": 63462 }, { "epoch": 2.11, "grad_norm": 0.604225754737854, "learning_rate": 0.00012077418236827552, "loss": 1.6431, "step": 63463 }, { "epoch": 2.11, "grad_norm": 0.6226206421852112, "learning_rate": 0.00012076580062206196, "loss": 1.6977, "step": 63464 }, { "epoch": 2.11, "grad_norm": 0.6581053733825684, "learning_rate": 0.00012075741909341216, "loss": 1.7019, "step": 63465 }, { "epoch": 2.11, "grad_norm": 0.6415883302688599, "learning_rate": 0.0001207490377823366, "loss": 1.7062, "step": 63466 }, { "epoch": 2.11, "grad_norm": 0.6213707327842712, "learning_rate": 0.00012074065668884528, "loss": 1.6443, "step": 63467 }, { "epoch": 2.11, "grad_norm": 0.6437134742736816, "learning_rate": 0.00012073227581294831, "loss": 1.6103, "step": 63468 }, { "epoch": 2.11, "grad_norm": 0.6398991942405701, "learning_rate": 0.0001207238951546559, "loss": 1.7274, "step": 63469 }, { "epoch": 2.11, "grad_norm": 0.6535762548446655, "learning_rate": 0.00012071551471397839, "loss": 1.7533, "step": 63470 }, { "epoch": 2.11, "grad_norm": 0.6721140742301941, "learning_rate": 0.00012070713449092582, "loss": 1.7007, "step": 63471 }, { "epoch": 2.11, "grad_norm": 0.6219107508659363, "learning_rate": 0.0001206987544855083, "loss": 1.6746, "step": 63472 }, { "epoch": 2.11, "grad_norm": 0.6340690851211548, "learning_rate": 0.00012069037469773616, "loss": 1.6477, "step": 63473 }, { "epoch": 2.11, "grad_norm": 0.6669308543205261, "learning_rate": 0.00012068199512761947, "loss": 1.7292, "step": 63474 }, { "epoch": 2.11, "grad_norm": 0.6567535400390625, "learning_rate": 0.00012067361577516832, "loss": 1.757, "step": 63475 }, { "epoch": 2.11, "grad_norm": 0.6457029581069946, "learning_rate": 0.000120665236640393, "loss": 1.7265, "step": 63476 }, { "epoch": 2.11, "grad_norm": 0.6386926174163818, "learning_rate": 0.00012065685772330375, "loss": 1.6341, "step": 63477 }, { "epoch": 2.11, "grad_norm": 0.6496116518974304, "learning_rate": 0.00012064847902391061, "loss": 1.7186, "step": 63478 }, { "epoch": 2.11, "grad_norm": 0.647183358669281, "learning_rate": 0.00012064010054222374, "loss": 1.7906, "step": 63479 }, { "epoch": 2.11, "grad_norm": 0.6342317461967468, "learning_rate": 0.00012063172227825345, "loss": 1.6517, "step": 63480 }, { "epoch": 2.11, "grad_norm": 0.6242513656616211, "learning_rate": 0.00012062334423200981, "loss": 1.7106, "step": 63481 }, { "epoch": 2.11, "grad_norm": 0.654555082321167, "learning_rate": 0.00012061496640350291, "loss": 1.7308, "step": 63482 }, { "epoch": 2.11, "grad_norm": 0.6333873271942139, "learning_rate": 0.0001206065887927431, "loss": 1.7235, "step": 63483 }, { "epoch": 2.11, "grad_norm": 0.6387994885444641, "learning_rate": 0.00012059821139974036, "loss": 1.7184, "step": 63484 }, { "epoch": 2.11, "grad_norm": 0.6307002902030945, "learning_rate": 0.00012058983422450506, "loss": 1.6557, "step": 63485 }, { "epoch": 2.11, "grad_norm": 0.6265392899513245, "learning_rate": 0.00012058145726704716, "loss": 1.7155, "step": 63486 }, { "epoch": 2.11, "grad_norm": 0.6383879780769348, "learning_rate": 0.00012057308052737706, "loss": 1.6712, "step": 63487 }, { "epoch": 2.11, "grad_norm": 0.630879282951355, "learning_rate": 0.00012056470400550475, "loss": 1.7357, "step": 63488 }, { "epoch": 2.11, "grad_norm": 0.6476627588272095, "learning_rate": 0.0001205563277014404, "loss": 1.7067, "step": 63489 }, { "epoch": 2.11, "grad_norm": 0.6382145285606384, "learning_rate": 0.00012054795161519433, "loss": 1.7143, "step": 63490 }, { "epoch": 2.11, "grad_norm": 0.646535336971283, "learning_rate": 0.00012053957574677645, "loss": 1.6823, "step": 63491 }, { "epoch": 2.11, "grad_norm": 0.6334092617034912, "learning_rate": 0.00012053120009619725, "loss": 1.707, "step": 63492 }, { "epoch": 2.11, "grad_norm": 0.6341106295585632, "learning_rate": 0.0001205228246634667, "loss": 1.7441, "step": 63493 }, { "epoch": 2.11, "grad_norm": 0.660912275314331, "learning_rate": 0.00012051444944859487, "loss": 1.695, "step": 63494 }, { "epoch": 2.11, "grad_norm": 0.6210737228393555, "learning_rate": 0.0001205060744515922, "loss": 1.6963, "step": 63495 }, { "epoch": 2.11, "grad_norm": 0.6517672538757324, "learning_rate": 0.00012049769967246859, "loss": 1.753, "step": 63496 }, { "epoch": 2.11, "grad_norm": 0.6309105753898621, "learning_rate": 0.0001204893251112344, "loss": 1.7396, "step": 63497 }, { "epoch": 2.11, "grad_norm": 0.6483434438705444, "learning_rate": 0.00012048095076789966, "loss": 1.6851, "step": 63498 }, { "epoch": 2.11, "grad_norm": 0.6366896629333496, "learning_rate": 0.00012047257664247466, "loss": 1.6669, "step": 63499 }, { "epoch": 2.11, "grad_norm": 0.6343241333961487, "learning_rate": 0.00012046420273496955, "loss": 1.6499, "step": 63500 }, { "epoch": 2.11, "grad_norm": 0.6503344178199768, "learning_rate": 0.00012045582904539428, "loss": 1.7203, "step": 63501 }, { "epoch": 2.11, "grad_norm": 0.6346984505653381, "learning_rate": 0.00012044745557375931, "loss": 1.7138, "step": 63502 }, { "epoch": 2.11, "grad_norm": 0.6408447027206421, "learning_rate": 0.0001204390823200747, "loss": 1.6838, "step": 63503 }, { "epoch": 2.11, "grad_norm": 0.6532934904098511, "learning_rate": 0.00012043070928435043, "loss": 1.7725, "step": 63504 }, { "epoch": 2.11, "grad_norm": 0.6344611048698425, "learning_rate": 0.00012042233646659689, "loss": 1.7335, "step": 63505 }, { "epoch": 2.11, "grad_norm": 0.6145246028900146, "learning_rate": 0.00012041396386682422, "loss": 1.756, "step": 63506 }, { "epoch": 2.11, "grad_norm": 0.6538660526275635, "learning_rate": 0.00012040559148504257, "loss": 1.7961, "step": 63507 }, { "epoch": 2.11, "grad_norm": 0.640262246131897, "learning_rate": 0.00012039721932126192, "loss": 1.6984, "step": 63508 }, { "epoch": 2.11, "grad_norm": 0.6416515707969666, "learning_rate": 0.00012038884737549272, "loss": 1.6905, "step": 63509 }, { "epoch": 2.11, "grad_norm": 0.658886730670929, "learning_rate": 0.00012038047564774498, "loss": 1.717, "step": 63510 }, { "epoch": 2.11, "grad_norm": 0.6485720276832581, "learning_rate": 0.0001203721041380288, "loss": 1.6686, "step": 63511 }, { "epoch": 2.11, "grad_norm": 0.6452507376670837, "learning_rate": 0.00012036373284635441, "loss": 1.7892, "step": 63512 }, { "epoch": 2.11, "grad_norm": 0.6430637836456299, "learning_rate": 0.00012035536177273216, "loss": 1.738, "step": 63513 }, { "epoch": 2.11, "grad_norm": 0.6358440518379211, "learning_rate": 0.00012034699091717185, "loss": 1.698, "step": 63514 }, { "epoch": 2.11, "grad_norm": 0.6413361430168152, "learning_rate": 0.00012033862027968383, "loss": 1.6691, "step": 63515 }, { "epoch": 2.11, "grad_norm": 0.6515567898750305, "learning_rate": 0.00012033024986027832, "loss": 1.6969, "step": 63516 }, { "epoch": 2.11, "grad_norm": 0.6467527747154236, "learning_rate": 0.00012032187965896544, "loss": 1.7283, "step": 63517 }, { "epoch": 2.11, "grad_norm": 0.6315562129020691, "learning_rate": 0.00012031350967575521, "loss": 1.7285, "step": 63518 }, { "epoch": 2.11, "grad_norm": 0.6568049192428589, "learning_rate": 0.00012030513991065792, "loss": 1.6737, "step": 63519 }, { "epoch": 2.11, "grad_norm": 0.6460630893707275, "learning_rate": 0.00012029677036368391, "loss": 1.7059, "step": 63520 }, { "epoch": 2.11, "grad_norm": 0.6062464118003845, "learning_rate": 0.00012028840103484295, "loss": 1.6812, "step": 63521 }, { "epoch": 2.11, "grad_norm": 0.6544901132583618, "learning_rate": 0.00012028003192414535, "loss": 1.6464, "step": 63522 }, { "epoch": 2.11, "grad_norm": 0.6329675316810608, "learning_rate": 0.00012027166303160147, "loss": 1.7312, "step": 63523 }, { "epoch": 2.11, "grad_norm": 0.62895268201828, "learning_rate": 0.00012026329435722128, "loss": 1.69, "step": 63524 }, { "epoch": 2.11, "grad_norm": 0.6379907727241516, "learning_rate": 0.00012025492590101485, "loss": 1.7073, "step": 63525 }, { "epoch": 2.11, "grad_norm": 0.6334419250488281, "learning_rate": 0.0001202465576629925, "loss": 1.659, "step": 63526 }, { "epoch": 2.11, "grad_norm": 0.6642849445343018, "learning_rate": 0.00012023818964316453, "loss": 1.6718, "step": 63527 }, { "epoch": 2.11, "grad_norm": 0.6635530591011047, "learning_rate": 0.00012022982184154068, "loss": 1.7595, "step": 63528 }, { "epoch": 2.11, "grad_norm": 0.673833966255188, "learning_rate": 0.00012022145425813135, "loss": 1.6999, "step": 63529 }, { "epoch": 2.11, "grad_norm": 0.6464807391166687, "learning_rate": 0.0001202130868929468, "loss": 1.752, "step": 63530 }, { "epoch": 2.11, "grad_norm": 0.6290116906166077, "learning_rate": 0.00012020471974599707, "loss": 1.7219, "step": 63531 }, { "epoch": 2.11, "grad_norm": 0.636702299118042, "learning_rate": 0.0001201963528172922, "loss": 1.7421, "step": 63532 }, { "epoch": 2.11, "grad_norm": 0.6372058987617493, "learning_rate": 0.00012018798610684256, "loss": 1.7338, "step": 63533 }, { "epoch": 2.11, "grad_norm": 0.6250330209732056, "learning_rate": 0.00012017961961465825, "loss": 1.619, "step": 63534 }, { "epoch": 2.11, "grad_norm": 0.6656450629234314, "learning_rate": 0.00012017125334074926, "loss": 1.7659, "step": 63535 }, { "epoch": 2.11, "grad_norm": 0.6461783647537231, "learning_rate": 0.0001201628872851259, "loss": 1.7168, "step": 63536 }, { "epoch": 2.11, "grad_norm": 0.6184322834014893, "learning_rate": 0.00012015452144779834, "loss": 1.6776, "step": 63537 }, { "epoch": 2.11, "grad_norm": 0.6650526523590088, "learning_rate": 0.00012014615582877673, "loss": 1.7279, "step": 63538 }, { "epoch": 2.11, "grad_norm": 0.6413251757621765, "learning_rate": 0.00012013779042807109, "loss": 1.6739, "step": 63539 }, { "epoch": 2.11, "grad_norm": 0.636752188205719, "learning_rate": 0.00012012942524569177, "loss": 1.6674, "step": 63540 }, { "epoch": 2.11, "grad_norm": 0.6396476626396179, "learning_rate": 0.00012012106028164882, "loss": 1.6471, "step": 63541 }, { "epoch": 2.11, "grad_norm": 0.6365363001823425, "learning_rate": 0.0001201126955359523, "loss": 1.7177, "step": 63542 }, { "epoch": 2.11, "grad_norm": 0.6507905125617981, "learning_rate": 0.00012010433100861257, "loss": 1.7038, "step": 63543 }, { "epoch": 2.11, "grad_norm": 0.6577805876731873, "learning_rate": 0.00012009596669963956, "loss": 1.7238, "step": 63544 }, { "epoch": 2.11, "grad_norm": 0.6589013934135437, "learning_rate": 0.00012008760260904367, "loss": 1.6891, "step": 63545 }, { "epoch": 2.11, "grad_norm": 0.6367880702018738, "learning_rate": 0.00012007923873683479, "loss": 1.7396, "step": 63546 }, { "epoch": 2.11, "grad_norm": 0.6555919051170349, "learning_rate": 0.0001200708750830233, "loss": 1.7944, "step": 63547 }, { "epoch": 2.11, "grad_norm": 0.6284776926040649, "learning_rate": 0.00012006251164761927, "loss": 1.6628, "step": 63548 }, { "epoch": 2.11, "grad_norm": 0.6770424246788025, "learning_rate": 0.00012005414843063273, "loss": 1.7128, "step": 63549 }, { "epoch": 2.11, "grad_norm": 0.6235517859458923, "learning_rate": 0.00012004578543207404, "loss": 1.7425, "step": 63550 }, { "epoch": 2.11, "grad_norm": 0.6362491250038147, "learning_rate": 0.00012003742265195318, "loss": 1.7294, "step": 63551 }, { "epoch": 2.11, "grad_norm": 0.6506075263023376, "learning_rate": 0.00012002906009028043, "loss": 1.7144, "step": 63552 }, { "epoch": 2.11, "grad_norm": 0.6460460424423218, "learning_rate": 0.00012002069774706591, "loss": 1.697, "step": 63553 }, { "epoch": 2.11, "grad_norm": 0.6348323822021484, "learning_rate": 0.00012001233562231964, "loss": 1.6846, "step": 63554 }, { "epoch": 2.11, "grad_norm": 0.665373682975769, "learning_rate": 0.00012000397371605188, "loss": 1.6637, "step": 63555 }, { "epoch": 2.11, "grad_norm": 0.6486231684684753, "learning_rate": 0.00011999561202827286, "loss": 1.6715, "step": 63556 }, { "epoch": 2.11, "grad_norm": 0.6333622336387634, "learning_rate": 0.00011998725055899264, "loss": 1.7253, "step": 63557 }, { "epoch": 2.11, "grad_norm": 0.6365450024604797, "learning_rate": 0.00011997888930822128, "loss": 1.7072, "step": 63558 }, { "epoch": 2.11, "grad_norm": 0.6774013638496399, "learning_rate": 0.00011997052827596913, "loss": 1.6826, "step": 63559 }, { "epoch": 2.11, "grad_norm": 0.640113115310669, "learning_rate": 0.00011996216746224625, "loss": 1.696, "step": 63560 }, { "epoch": 2.11, "grad_norm": 0.6121644973754883, "learning_rate": 0.00011995380686706265, "loss": 1.7463, "step": 63561 }, { "epoch": 2.11, "grad_norm": 0.6304810047149658, "learning_rate": 0.0001199454464904286, "loss": 1.6845, "step": 63562 }, { "epoch": 2.11, "grad_norm": 0.6524349451065063, "learning_rate": 0.00011993708633235445, "loss": 1.7092, "step": 63563 }, { "epoch": 2.11, "grad_norm": 0.6386783123016357, "learning_rate": 0.00011992872639284996, "loss": 1.7116, "step": 63564 }, { "epoch": 2.11, "grad_norm": 0.6499601006507874, "learning_rate": 0.00011992036667192542, "loss": 1.7085, "step": 63565 }, { "epoch": 2.11, "grad_norm": 0.6114594340324402, "learning_rate": 0.00011991200716959117, "loss": 1.6634, "step": 63566 }, { "epoch": 2.11, "grad_norm": 0.6219593286514282, "learning_rate": 0.0001199036478858572, "loss": 1.7352, "step": 63567 }, { "epoch": 2.11, "grad_norm": 0.6165978312492371, "learning_rate": 0.00011989528882073354, "loss": 1.676, "step": 63568 }, { "epoch": 2.11, "grad_norm": 0.6560126543045044, "learning_rate": 0.00011988692997423047, "loss": 1.7536, "step": 63569 }, { "epoch": 2.11, "grad_norm": 0.6263629794120789, "learning_rate": 0.00011987857134635836, "loss": 1.704, "step": 63570 }, { "epoch": 2.12, "grad_norm": 0.6479006409645081, "learning_rate": 0.0001198702129371269, "loss": 1.745, "step": 63571 }, { "epoch": 2.12, "grad_norm": 0.6244933009147644, "learning_rate": 0.00011986185474654646, "loss": 1.647, "step": 63572 }, { "epoch": 2.12, "grad_norm": 0.6319838762283325, "learning_rate": 0.0001198534967746273, "loss": 1.7453, "step": 63573 }, { "epoch": 2.12, "grad_norm": 0.6134430766105652, "learning_rate": 0.00011984513902137943, "loss": 1.6922, "step": 63574 }, { "epoch": 2.12, "grad_norm": 0.6413850784301758, "learning_rate": 0.00011983678148681292, "loss": 1.7209, "step": 63575 }, { "epoch": 2.12, "grad_norm": 0.640539824962616, "learning_rate": 0.000119828424170938, "loss": 1.7776, "step": 63576 }, { "epoch": 2.12, "grad_norm": 0.6442137956619263, "learning_rate": 0.0001198200670737651, "loss": 1.6529, "step": 63577 }, { "epoch": 2.12, "grad_norm": 0.6423588991165161, "learning_rate": 0.00011981171019530382, "loss": 1.6373, "step": 63578 }, { "epoch": 2.12, "grad_norm": 0.635594367980957, "learning_rate": 0.00011980335353556456, "loss": 1.7093, "step": 63579 }, { "epoch": 2.12, "grad_norm": 0.6307634711265564, "learning_rate": 0.00011979499709455763, "loss": 1.7535, "step": 63580 }, { "epoch": 2.12, "grad_norm": 0.6266970634460449, "learning_rate": 0.000119786640872293, "loss": 1.7609, "step": 63581 }, { "epoch": 2.12, "grad_norm": 0.6271200180053711, "learning_rate": 0.00011977828486878074, "loss": 1.6723, "step": 63582 }, { "epoch": 2.12, "grad_norm": 0.640572726726532, "learning_rate": 0.00011976992908403116, "loss": 1.6814, "step": 63583 }, { "epoch": 2.12, "grad_norm": 0.6424252390861511, "learning_rate": 0.00011976157351805437, "loss": 1.6999, "step": 63584 }, { "epoch": 2.12, "grad_norm": 0.6189002990722656, "learning_rate": 0.00011975321817086036, "loss": 1.6507, "step": 63585 }, { "epoch": 2.12, "grad_norm": 0.6276265978813171, "learning_rate": 0.00011974486304245935, "loss": 1.6504, "step": 63586 }, { "epoch": 2.12, "grad_norm": 0.6115384101867676, "learning_rate": 0.00011973650813286165, "loss": 1.7578, "step": 63587 }, { "epoch": 2.12, "grad_norm": 0.6276840567588806, "learning_rate": 0.00011972815344207728, "loss": 1.6974, "step": 63588 }, { "epoch": 2.12, "grad_norm": 1.0223416090011597, "learning_rate": 0.00011971979897011626, "loss": 1.7709, "step": 63589 }, { "epoch": 2.12, "grad_norm": 0.6207275986671448, "learning_rate": 0.00011971144471698894, "loss": 1.6528, "step": 63590 }, { "epoch": 2.12, "grad_norm": 0.6301619410514832, "learning_rate": 0.00011970309068270537, "loss": 1.6625, "step": 63591 }, { "epoch": 2.12, "grad_norm": 0.6343346834182739, "learning_rate": 0.00011969473686727555, "loss": 1.7731, "step": 63592 }, { "epoch": 2.12, "grad_norm": 0.6499739289283752, "learning_rate": 0.00011968638327070988, "loss": 1.734, "step": 63593 }, { "epoch": 2.12, "grad_norm": 0.6210792660713196, "learning_rate": 0.00011967802989301825, "loss": 1.8401, "step": 63594 }, { "epoch": 2.12, "grad_norm": 0.6224662661552429, "learning_rate": 0.00011966967673421106, "loss": 1.6728, "step": 63595 }, { "epoch": 2.12, "grad_norm": 0.6075523495674133, "learning_rate": 0.00011966132379429818, "loss": 1.6342, "step": 63596 }, { "epoch": 2.12, "grad_norm": 0.6537590622901917, "learning_rate": 0.00011965297107328999, "loss": 1.6695, "step": 63597 }, { "epoch": 2.12, "grad_norm": 0.6149469017982483, "learning_rate": 0.00011964461857119652, "loss": 1.6934, "step": 63598 }, { "epoch": 2.12, "grad_norm": 0.6357336640357971, "learning_rate": 0.0001196362662880278, "loss": 1.7327, "step": 63599 }, { "epoch": 2.12, "grad_norm": 0.6168409585952759, "learning_rate": 0.00011962791422379421, "loss": 1.7243, "step": 63600 }, { "epoch": 2.12, "grad_norm": 0.6313285827636719, "learning_rate": 0.00011961956237850563, "loss": 1.7039, "step": 63601 }, { "epoch": 2.12, "grad_norm": 0.6431690454483032, "learning_rate": 0.00011961121075217243, "loss": 1.7123, "step": 63602 }, { "epoch": 2.12, "grad_norm": 0.6354314088821411, "learning_rate": 0.00011960285934480454, "loss": 1.7656, "step": 63603 }, { "epoch": 2.12, "grad_norm": 0.6390007138252258, "learning_rate": 0.00011959450815641229, "loss": 1.7085, "step": 63604 }, { "epoch": 2.12, "grad_norm": 0.6476592421531677, "learning_rate": 0.00011958615718700572, "loss": 1.7799, "step": 63605 }, { "epoch": 2.12, "grad_norm": 0.6354652047157288, "learning_rate": 0.0001195778064365949, "loss": 1.6962, "step": 63606 }, { "epoch": 2.12, "grad_norm": 0.6501832604408264, "learning_rate": 0.00011956945590519014, "loss": 1.7129, "step": 63607 }, { "epoch": 2.12, "grad_norm": 0.6252106428146362, "learning_rate": 0.00011956110559280135, "loss": 1.6415, "step": 63608 }, { "epoch": 2.12, "grad_norm": 0.638883113861084, "learning_rate": 0.0001195527554994389, "loss": 1.706, "step": 63609 }, { "epoch": 2.12, "grad_norm": 0.6222396492958069, "learning_rate": 0.00011954440562511281, "loss": 1.6399, "step": 63610 }, { "epoch": 2.12, "grad_norm": 0.6483104825019836, "learning_rate": 0.00011953605596983313, "loss": 1.7163, "step": 63611 }, { "epoch": 2.12, "grad_norm": 0.6287535429000854, "learning_rate": 0.00011952770653361018, "loss": 1.7315, "step": 63612 }, { "epoch": 2.12, "grad_norm": 0.6354935765266418, "learning_rate": 0.00011951935731645394, "loss": 1.6637, "step": 63613 }, { "epoch": 2.12, "grad_norm": 0.642437756061554, "learning_rate": 0.00011951100831837468, "loss": 1.8216, "step": 63614 }, { "epoch": 2.12, "grad_norm": 0.6475939750671387, "learning_rate": 0.00011950265953938235, "loss": 1.7117, "step": 63615 }, { "epoch": 2.12, "grad_norm": 0.6350905299186707, "learning_rate": 0.00011949431097948731, "loss": 1.7994, "step": 63616 }, { "epoch": 2.12, "grad_norm": 0.643124520778656, "learning_rate": 0.00011948596263869957, "loss": 1.728, "step": 63617 }, { "epoch": 2.12, "grad_norm": 0.6218314170837402, "learning_rate": 0.00011947761451702916, "loss": 1.672, "step": 63618 }, { "epoch": 2.12, "grad_norm": 0.6309219598770142, "learning_rate": 0.00011946926661448647, "loss": 1.6724, "step": 63619 }, { "epoch": 2.12, "grad_norm": 0.6248065829277039, "learning_rate": 0.00011946091893108148, "loss": 1.7629, "step": 63620 }, { "epoch": 2.12, "grad_norm": 0.6393972635269165, "learning_rate": 0.0001194525714668242, "loss": 1.6267, "step": 63621 }, { "epoch": 2.12, "grad_norm": 0.6425939202308655, "learning_rate": 0.00011944422422172494, "loss": 1.7865, "step": 63622 }, { "epoch": 2.12, "grad_norm": 0.6305299997329712, "learning_rate": 0.00011943587719579386, "loss": 1.6641, "step": 63623 }, { "epoch": 2.12, "grad_norm": 0.6553437113761902, "learning_rate": 0.00011942753038904103, "loss": 1.7602, "step": 63624 }, { "epoch": 2.12, "grad_norm": 0.6347674131393433, "learning_rate": 0.00011941918380147646, "loss": 1.7292, "step": 63625 }, { "epoch": 2.12, "grad_norm": 0.6429327130317688, "learning_rate": 0.00011941083743311052, "loss": 1.7622, "step": 63626 }, { "epoch": 2.12, "grad_norm": 0.6470013856887817, "learning_rate": 0.0001194024912839532, "loss": 1.7149, "step": 63627 }, { "epoch": 2.12, "grad_norm": 0.6450084447860718, "learning_rate": 0.00011939414535401451, "loss": 1.7388, "step": 63628 }, { "epoch": 2.12, "grad_norm": 0.6309012770652771, "learning_rate": 0.00011938579964330477, "loss": 1.7448, "step": 63629 }, { "epoch": 2.12, "grad_norm": 0.6199851036071777, "learning_rate": 0.00011937745415183425, "loss": 1.7245, "step": 63630 }, { "epoch": 2.12, "grad_norm": 0.6145531535148621, "learning_rate": 0.00011936910887961266, "loss": 1.7114, "step": 63631 }, { "epoch": 2.12, "grad_norm": 0.6164025068283081, "learning_rate": 0.00011936076382665036, "loss": 1.6986, "step": 63632 }, { "epoch": 2.12, "grad_norm": 0.6644461750984192, "learning_rate": 0.00011935241899295758, "loss": 1.7455, "step": 63633 }, { "epoch": 2.12, "grad_norm": 0.6410293579101562, "learning_rate": 0.00011934407437854438, "loss": 1.7159, "step": 63634 }, { "epoch": 2.12, "grad_norm": 1.0422221422195435, "learning_rate": 0.00011933572998342071, "loss": 1.7286, "step": 63635 }, { "epoch": 2.12, "grad_norm": 0.6418756246566772, "learning_rate": 0.00011932738580759685, "loss": 1.7154, "step": 63636 }, { "epoch": 2.12, "grad_norm": 0.6340610980987549, "learning_rate": 0.00011931904185108316, "loss": 1.6569, "step": 63637 }, { "epoch": 2.12, "grad_norm": 0.6449454426765442, "learning_rate": 0.0001193106981138893, "loss": 1.6696, "step": 63638 }, { "epoch": 2.12, "grad_norm": 0.6467761397361755, "learning_rate": 0.00011930235459602563, "loss": 1.7659, "step": 63639 }, { "epoch": 2.12, "grad_norm": 0.6442511677742004, "learning_rate": 0.0001192940112975024, "loss": 1.7443, "step": 63640 }, { "epoch": 2.12, "grad_norm": 0.6167856454849243, "learning_rate": 0.00011928566821832958, "loss": 1.723, "step": 63641 }, { "epoch": 2.12, "grad_norm": 0.6261381506919861, "learning_rate": 0.00011927732535851724, "loss": 1.7592, "step": 63642 }, { "epoch": 2.12, "grad_norm": 0.615606963634491, "learning_rate": 0.00011926898271807561, "loss": 1.6878, "step": 63643 }, { "epoch": 2.12, "grad_norm": 0.6302647590637207, "learning_rate": 0.00011926064029701501, "loss": 1.6964, "step": 63644 }, { "epoch": 2.12, "grad_norm": 0.6549134254455566, "learning_rate": 0.00011925229809534516, "loss": 1.7322, "step": 63645 }, { "epoch": 2.12, "grad_norm": 0.6657555103302002, "learning_rate": 0.00011924395611307638, "loss": 1.7553, "step": 63646 }, { "epoch": 2.12, "grad_norm": 0.6332710385322571, "learning_rate": 0.00011923561435021891, "loss": 1.6344, "step": 63647 }, { "epoch": 2.12, "grad_norm": 0.6201314330101013, "learning_rate": 0.0001192272728067828, "loss": 1.7508, "step": 63648 }, { "epoch": 2.12, "grad_norm": 0.625008225440979, "learning_rate": 0.00011921893148277798, "loss": 1.7172, "step": 63649 }, { "epoch": 2.12, "grad_norm": 0.622243344783783, "learning_rate": 0.0001192105903782149, "loss": 1.6172, "step": 63650 }, { "epoch": 2.12, "grad_norm": 0.6242600083351135, "learning_rate": 0.00011920224949310348, "loss": 1.6645, "step": 63651 }, { "epoch": 2.12, "grad_norm": 0.6241230368614197, "learning_rate": 0.0001191939088274538, "loss": 1.7017, "step": 63652 }, { "epoch": 2.12, "grad_norm": 0.6325385570526123, "learning_rate": 0.00011918556838127609, "loss": 1.6791, "step": 63653 }, { "epoch": 2.12, "grad_norm": 0.6228674054145813, "learning_rate": 0.00011917722815458056, "loss": 1.7215, "step": 63654 }, { "epoch": 2.12, "grad_norm": 0.6218200325965881, "learning_rate": 0.00011916888814737721, "loss": 1.6983, "step": 63655 }, { "epoch": 2.12, "grad_norm": 0.644123375415802, "learning_rate": 0.00011916054835967608, "loss": 1.7237, "step": 63656 }, { "epoch": 2.12, "grad_norm": 0.6605099439620972, "learning_rate": 0.00011915220879148753, "loss": 1.74, "step": 63657 }, { "epoch": 2.12, "grad_norm": 0.6417322754859924, "learning_rate": 0.00011914386944282155, "loss": 1.708, "step": 63658 }, { "epoch": 2.12, "grad_norm": 0.6358529329299927, "learning_rate": 0.00011913553031368815, "loss": 1.7081, "step": 63659 }, { "epoch": 2.12, "grad_norm": 0.6451885104179382, "learning_rate": 0.00011912719140409768, "loss": 1.7145, "step": 63660 }, { "epoch": 2.12, "grad_norm": 0.6382375955581665, "learning_rate": 0.00011911885271406004, "loss": 1.733, "step": 63661 }, { "epoch": 2.12, "grad_norm": 0.6368594169616699, "learning_rate": 0.00011911051424358554, "loss": 1.7364, "step": 63662 }, { "epoch": 2.12, "grad_norm": 0.6471229791641235, "learning_rate": 0.00011910217599268417, "loss": 1.6827, "step": 63663 }, { "epoch": 2.12, "grad_norm": 0.6317417025566101, "learning_rate": 0.00011909383796136618, "loss": 1.5886, "step": 63664 }, { "epoch": 2.12, "grad_norm": 0.6449378132820129, "learning_rate": 0.0001190855001496416, "loss": 1.7259, "step": 63665 }, { "epoch": 2.12, "grad_norm": 0.651414155960083, "learning_rate": 0.00011907716255752049, "loss": 1.635, "step": 63666 }, { "epoch": 2.12, "grad_norm": 0.6221434473991394, "learning_rate": 0.00011906882518501314, "loss": 1.7375, "step": 63667 }, { "epoch": 2.12, "grad_norm": 0.6189209222793579, "learning_rate": 0.00011906048803212948, "loss": 1.7027, "step": 63668 }, { "epoch": 2.12, "grad_norm": 0.6549643278121948, "learning_rate": 0.00011905215109887983, "loss": 1.6844, "step": 63669 }, { "epoch": 2.12, "grad_norm": 0.64476478099823, "learning_rate": 0.00011904381438527417, "loss": 1.7809, "step": 63670 }, { "epoch": 2.12, "grad_norm": 0.6332094073295593, "learning_rate": 0.00011903547789132259, "loss": 1.7344, "step": 63671 }, { "epoch": 2.12, "grad_norm": 0.6135591268539429, "learning_rate": 0.00011902714161703529, "loss": 1.6604, "step": 63672 }, { "epoch": 2.12, "grad_norm": 0.6398953795433044, "learning_rate": 0.00011901880556242242, "loss": 1.6845, "step": 63673 }, { "epoch": 2.12, "grad_norm": 0.6304867267608643, "learning_rate": 0.00011901046972749411, "loss": 1.7638, "step": 63674 }, { "epoch": 2.12, "grad_norm": 0.6573461294174194, "learning_rate": 0.00011900213411226029, "loss": 1.6939, "step": 63675 }, { "epoch": 2.12, "grad_norm": 0.6794555187225342, "learning_rate": 0.00011899379871673135, "loss": 1.7098, "step": 63676 }, { "epoch": 2.12, "grad_norm": 0.6665827631950378, "learning_rate": 0.00011898546354091723, "loss": 1.7495, "step": 63677 }, { "epoch": 2.12, "grad_norm": 0.6575623154640198, "learning_rate": 0.00011897712858482798, "loss": 1.7913, "step": 63678 }, { "epoch": 2.12, "grad_norm": 0.6360629200935364, "learning_rate": 0.00011896879384847386, "loss": 1.7606, "step": 63679 }, { "epoch": 2.12, "grad_norm": 0.6398917436599731, "learning_rate": 0.00011896045933186512, "loss": 1.6965, "step": 63680 }, { "epoch": 2.12, "grad_norm": 0.6439223289489746, "learning_rate": 0.00011895212503501154, "loss": 1.7259, "step": 63681 }, { "epoch": 2.12, "grad_norm": 0.6737189888954163, "learning_rate": 0.00011894379095792336, "loss": 1.7538, "step": 63682 }, { "epoch": 2.12, "grad_norm": 0.6667077541351318, "learning_rate": 0.00011893545710061087, "loss": 1.7432, "step": 63683 }, { "epoch": 2.12, "grad_norm": 0.6687096357345581, "learning_rate": 0.00011892712346308406, "loss": 1.6223, "step": 63684 }, { "epoch": 2.12, "grad_norm": 0.6717377305030823, "learning_rate": 0.00011891879004535291, "loss": 1.6856, "step": 63685 }, { "epoch": 2.12, "grad_norm": 0.6574465036392212, "learning_rate": 0.0001189104568474277, "loss": 1.7191, "step": 63686 }, { "epoch": 2.12, "grad_norm": 0.6323922276496887, "learning_rate": 0.0001189021238693187, "loss": 1.6556, "step": 63687 }, { "epoch": 2.12, "grad_norm": 0.6449010372161865, "learning_rate": 0.00011889379111103565, "loss": 1.7959, "step": 63688 }, { "epoch": 2.12, "grad_norm": 0.6168363690376282, "learning_rate": 0.00011888545857258882, "loss": 1.6624, "step": 63689 }, { "epoch": 2.12, "grad_norm": 0.6399067640304565, "learning_rate": 0.0001188771262539885, "loss": 1.6725, "step": 63690 }, { "epoch": 2.12, "grad_norm": 0.6264187097549438, "learning_rate": 0.00011886879415524465, "loss": 1.6809, "step": 63691 }, { "epoch": 2.12, "grad_norm": 0.6368676424026489, "learning_rate": 0.00011886046227636727, "loss": 1.6581, "step": 63692 }, { "epoch": 2.12, "grad_norm": 0.6552048325538635, "learning_rate": 0.00011885213061736662, "loss": 1.7221, "step": 63693 }, { "epoch": 2.12, "grad_norm": 0.6376426815986633, "learning_rate": 0.00011884379917825298, "loss": 1.7065, "step": 63694 }, { "epoch": 2.12, "grad_norm": 0.6224479675292969, "learning_rate": 0.00011883546795903611, "loss": 1.7684, "step": 63695 }, { "epoch": 2.12, "grad_norm": 0.6343033909797668, "learning_rate": 0.00011882713695972627, "loss": 1.7007, "step": 63696 }, { "epoch": 2.12, "grad_norm": 0.6308680772781372, "learning_rate": 0.00011881880618033367, "loss": 1.6655, "step": 63697 }, { "epoch": 2.12, "grad_norm": 0.6376525163650513, "learning_rate": 0.00011881047562086836, "loss": 1.7144, "step": 63698 }, { "epoch": 2.12, "grad_norm": 0.6504107713699341, "learning_rate": 0.00011880214528134034, "loss": 1.7005, "step": 63699 }, { "epoch": 2.12, "grad_norm": 0.6670672297477722, "learning_rate": 0.00011879381516175991, "loss": 1.7319, "step": 63700 }, { "epoch": 2.12, "grad_norm": 0.6356406211853027, "learning_rate": 0.00011878548526213711, "loss": 1.6934, "step": 63701 }, { "epoch": 2.12, "grad_norm": 0.6500291228294373, "learning_rate": 0.00011877715558248188, "loss": 1.7745, "step": 63702 }, { "epoch": 2.12, "grad_norm": 0.6768766045570374, "learning_rate": 0.00011876882612280452, "loss": 1.682, "step": 63703 }, { "epoch": 2.12, "grad_norm": 0.6436851024627686, "learning_rate": 0.00011876049688311519, "loss": 1.7415, "step": 63704 }, { "epoch": 2.12, "grad_norm": 0.6511238217353821, "learning_rate": 0.0001187521678634239, "loss": 1.6849, "step": 63705 }, { "epoch": 2.12, "grad_norm": 0.647497296333313, "learning_rate": 0.00011874383906374071, "loss": 1.7047, "step": 63706 }, { "epoch": 2.12, "grad_norm": 0.643021285533905, "learning_rate": 0.00011873551048407588, "loss": 1.6991, "step": 63707 }, { "epoch": 2.12, "grad_norm": 0.6535636782646179, "learning_rate": 0.00011872718212443941, "loss": 1.7545, "step": 63708 }, { "epoch": 2.12, "grad_norm": 0.6577938795089722, "learning_rate": 0.00011871885398484134, "loss": 1.6961, "step": 63709 }, { "epoch": 2.12, "grad_norm": 0.6335391998291016, "learning_rate": 0.000118710526065292, "loss": 1.7771, "step": 63710 }, { "epoch": 2.12, "grad_norm": 0.656447172164917, "learning_rate": 0.00011870219836580123, "loss": 1.7468, "step": 63711 }, { "epoch": 2.12, "grad_norm": 0.6340792179107666, "learning_rate": 0.0001186938708863794, "loss": 1.6901, "step": 63712 }, { "epoch": 2.12, "grad_norm": 0.6419306397438049, "learning_rate": 0.00011868554362703639, "loss": 1.7671, "step": 63713 }, { "epoch": 2.12, "grad_norm": 0.6291486024856567, "learning_rate": 0.0001186772165877825, "loss": 1.7158, "step": 63714 }, { "epoch": 2.12, "grad_norm": 0.6227237582206726, "learning_rate": 0.00011866888976862778, "loss": 1.6051, "step": 63715 }, { "epoch": 2.12, "grad_norm": 0.7456849813461304, "learning_rate": 0.00011866056316958218, "loss": 1.8457, "step": 63716 }, { "epoch": 2.12, "grad_norm": 0.642559289932251, "learning_rate": 0.00011865223679065607, "loss": 1.6733, "step": 63717 }, { "epoch": 2.12, "grad_norm": 0.6342856287956238, "learning_rate": 0.00011864391063185932, "loss": 1.7942, "step": 63718 }, { "epoch": 2.12, "grad_norm": 0.6669901013374329, "learning_rate": 0.0001186355846932022, "loss": 1.7367, "step": 63719 }, { "epoch": 2.12, "grad_norm": 0.657436728477478, "learning_rate": 0.00011862725897469468, "loss": 1.7052, "step": 63720 }, { "epoch": 2.12, "grad_norm": 0.6217700839042664, "learning_rate": 0.00011861893347634705, "loss": 1.7026, "step": 63721 }, { "epoch": 2.12, "grad_norm": 0.6557795405387878, "learning_rate": 0.00011861060819816935, "loss": 1.7386, "step": 63722 }, { "epoch": 2.12, "grad_norm": 0.6824068427085876, "learning_rate": 0.00011860228314017148, "loss": 1.7047, "step": 63723 }, { "epoch": 2.12, "grad_norm": 0.6475536823272705, "learning_rate": 0.00011859395830236384, "loss": 1.668, "step": 63724 }, { "epoch": 2.12, "grad_norm": 0.6632737517356873, "learning_rate": 0.00011858563368475628, "loss": 1.6776, "step": 63725 }, { "epoch": 2.12, "grad_norm": 0.6587268114089966, "learning_rate": 0.00011857730928735919, "loss": 1.7151, "step": 63726 }, { "epoch": 2.12, "grad_norm": 0.6196150779724121, "learning_rate": 0.00011856898511018247, "loss": 1.731, "step": 63727 }, { "epoch": 2.12, "grad_norm": 0.6319526433944702, "learning_rate": 0.00011856066115323618, "loss": 1.6745, "step": 63728 }, { "epoch": 2.12, "grad_norm": 0.644447922706604, "learning_rate": 0.00011855233741653059, "loss": 1.7674, "step": 63729 }, { "epoch": 2.12, "grad_norm": 0.6331623792648315, "learning_rate": 0.00011854401390007564, "loss": 1.6705, "step": 63730 }, { "epoch": 2.12, "grad_norm": 0.6748111248016357, "learning_rate": 0.00011853569060388163, "loss": 1.69, "step": 63731 }, { "epoch": 2.12, "grad_norm": 0.665891706943512, "learning_rate": 0.00011852736752795848, "loss": 1.7534, "step": 63732 }, { "epoch": 2.12, "grad_norm": 0.6581810116767883, "learning_rate": 0.00011851904467231643, "loss": 1.7298, "step": 63733 }, { "epoch": 2.12, "grad_norm": 0.6322426795959473, "learning_rate": 0.00011851072203696554, "loss": 1.6911, "step": 63734 }, { "epoch": 2.12, "grad_norm": 0.6284381151199341, "learning_rate": 0.00011850239962191577, "loss": 1.7184, "step": 63735 }, { "epoch": 2.12, "grad_norm": 0.6258386373519897, "learning_rate": 0.00011849407742717746, "loss": 1.6667, "step": 63736 }, { "epoch": 2.12, "grad_norm": 0.635358989238739, "learning_rate": 0.00011848575545276058, "loss": 1.6688, "step": 63737 }, { "epoch": 2.12, "grad_norm": 0.64727383852005, "learning_rate": 0.00011847743369867518, "loss": 1.7206, "step": 63738 }, { "epoch": 2.12, "grad_norm": 0.6242323517799377, "learning_rate": 0.0001184691121649314, "loss": 1.7201, "step": 63739 }, { "epoch": 2.12, "grad_norm": 0.6470685601234436, "learning_rate": 0.00011846079085153949, "loss": 1.7878, "step": 63740 }, { "epoch": 2.12, "grad_norm": 0.6282480955123901, "learning_rate": 0.0001184524697585094, "loss": 1.7564, "step": 63741 }, { "epoch": 2.12, "grad_norm": 0.6348286867141724, "learning_rate": 0.00011844414888585118, "loss": 1.6833, "step": 63742 }, { "epoch": 2.12, "grad_norm": 0.6185595393180847, "learning_rate": 0.00011843582823357512, "loss": 1.7098, "step": 63743 }, { "epoch": 2.12, "grad_norm": 0.6410976648330688, "learning_rate": 0.00011842750780169121, "loss": 1.6608, "step": 63744 }, { "epoch": 2.12, "grad_norm": 0.659652054309845, "learning_rate": 0.00011841918759020946, "loss": 1.666, "step": 63745 }, { "epoch": 2.12, "grad_norm": 0.6583878397941589, "learning_rate": 0.00011841086759914003, "loss": 1.7724, "step": 63746 }, { "epoch": 2.12, "grad_norm": 0.6208137273788452, "learning_rate": 0.00011840254782849329, "loss": 1.6446, "step": 63747 }, { "epoch": 2.12, "grad_norm": 0.6137274503707886, "learning_rate": 0.00011839422827827888, "loss": 1.6806, "step": 63748 }, { "epoch": 2.12, "grad_norm": 0.6180790662765503, "learning_rate": 0.00011838590894850713, "loss": 1.7774, "step": 63749 }, { "epoch": 2.12, "grad_norm": 0.6183565855026245, "learning_rate": 0.00011837758983918822, "loss": 1.7051, "step": 63750 }, { "epoch": 2.12, "grad_norm": 0.6321848034858704, "learning_rate": 0.00011836927095033214, "loss": 1.6865, "step": 63751 }, { "epoch": 2.12, "grad_norm": 0.6293230056762695, "learning_rate": 0.0001183609522819489, "loss": 1.7658, "step": 63752 }, { "epoch": 2.12, "grad_norm": 0.6461612582206726, "learning_rate": 0.00011835263383404873, "loss": 1.7584, "step": 63753 }, { "epoch": 2.12, "grad_norm": 0.6662540435791016, "learning_rate": 0.0001183443156066419, "loss": 1.7316, "step": 63754 }, { "epoch": 2.12, "grad_norm": 0.6366286873817444, "learning_rate": 0.0001183359975997381, "loss": 1.7334, "step": 63755 }, { "epoch": 2.12, "grad_norm": 0.6367861032485962, "learning_rate": 0.0001183276798133476, "loss": 1.721, "step": 63756 }, { "epoch": 2.12, "grad_norm": 0.6335288286209106, "learning_rate": 0.00011831936224748066, "loss": 1.6793, "step": 63757 }, { "epoch": 2.12, "grad_norm": 0.6366311311721802, "learning_rate": 0.00011831104490214723, "loss": 1.7799, "step": 63758 }, { "epoch": 2.12, "grad_norm": 0.645257294178009, "learning_rate": 0.00011830272777735735, "loss": 1.7047, "step": 63759 }, { "epoch": 2.12, "grad_norm": 0.6457765698432922, "learning_rate": 0.00011829441087312113, "loss": 1.6583, "step": 63760 }, { "epoch": 2.12, "grad_norm": 0.6246399283409119, "learning_rate": 0.00011828609418944896, "loss": 1.6716, "step": 63761 }, { "epoch": 2.12, "grad_norm": 0.6557360887527466, "learning_rate": 0.00011827777772635049, "loss": 1.7237, "step": 63762 }, { "epoch": 2.12, "grad_norm": 0.6185245513916016, "learning_rate": 0.00011826946148383602, "loss": 1.714, "step": 63763 }, { "epoch": 2.12, "grad_norm": 0.7799285054206848, "learning_rate": 0.00011826114546191577, "loss": 1.6844, "step": 63764 }, { "epoch": 2.12, "grad_norm": 0.6273802518844604, "learning_rate": 0.0001182528296605997, "loss": 1.646, "step": 63765 }, { "epoch": 2.12, "grad_norm": 0.6494832634925842, "learning_rate": 0.0001182445140798978, "loss": 1.687, "step": 63766 }, { "epoch": 2.12, "grad_norm": 0.642205536365509, "learning_rate": 0.0001182361987198204, "loss": 1.7087, "step": 63767 }, { "epoch": 2.12, "grad_norm": 0.6551015973091125, "learning_rate": 0.00011822788358037743, "loss": 1.6902, "step": 63768 }, { "epoch": 2.12, "grad_norm": 0.6317152976989746, "learning_rate": 0.00011821956866157898, "loss": 1.7554, "step": 63769 }, { "epoch": 2.12, "grad_norm": 0.6478295922279358, "learning_rate": 0.00011821125396343514, "loss": 1.712, "step": 63770 }, { "epoch": 2.12, "grad_norm": 0.6207090616226196, "learning_rate": 0.00011820293948595621, "loss": 1.6396, "step": 63771 }, { "epoch": 2.12, "grad_norm": 0.6339889168739319, "learning_rate": 0.00011819462522915208, "loss": 1.7048, "step": 63772 }, { "epoch": 2.12, "grad_norm": 0.6327976584434509, "learning_rate": 0.0001181863111930328, "loss": 1.7645, "step": 63773 }, { "epoch": 2.12, "grad_norm": 0.6442064046859741, "learning_rate": 0.00011817799737760865, "loss": 1.7092, "step": 63774 }, { "epoch": 2.12, "grad_norm": 0.6407288312911987, "learning_rate": 0.00011816968378288963, "loss": 1.6936, "step": 63775 }, { "epoch": 2.12, "grad_norm": 0.6450719833374023, "learning_rate": 0.0001181613704088857, "loss": 1.6569, "step": 63776 }, { "epoch": 2.12, "grad_norm": 0.6367694735527039, "learning_rate": 0.0001181530572556072, "loss": 1.7046, "step": 63777 }, { "epoch": 2.12, "grad_norm": 2.565941333770752, "learning_rate": 0.00011814474432306398, "loss": 1.7726, "step": 63778 }, { "epoch": 2.12, "grad_norm": 0.652211606502533, "learning_rate": 0.00011813643161126638, "loss": 1.6713, "step": 63779 }, { "epoch": 2.12, "grad_norm": 0.6339805126190186, "learning_rate": 0.0001181281191202242, "loss": 1.7426, "step": 63780 }, { "epoch": 2.12, "grad_norm": 0.6214369535446167, "learning_rate": 0.00011811980684994781, "loss": 1.6385, "step": 63781 }, { "epoch": 2.12, "grad_norm": 0.6134688854217529, "learning_rate": 0.00011811149480044706, "loss": 1.6404, "step": 63782 }, { "epoch": 2.12, "grad_norm": 0.6591307520866394, "learning_rate": 0.00011810318297173229, "loss": 1.8246, "step": 63783 }, { "epoch": 2.12, "grad_norm": 0.633830726146698, "learning_rate": 0.00011809487136381343, "loss": 1.6965, "step": 63784 }, { "epoch": 2.12, "grad_norm": 0.6268784403800964, "learning_rate": 0.00011808655997670049, "loss": 1.6642, "step": 63785 }, { "epoch": 2.12, "grad_norm": 0.6151180863380432, "learning_rate": 0.00011807824881040378, "loss": 1.6369, "step": 63786 }, { "epoch": 2.12, "grad_norm": 0.6518924236297607, "learning_rate": 0.00011806993786493325, "loss": 1.7177, "step": 63787 }, { "epoch": 2.12, "grad_norm": 0.6466712951660156, "learning_rate": 0.00011806162714029894, "loss": 1.6243, "step": 63788 }, { "epoch": 2.12, "grad_norm": 0.637657880783081, "learning_rate": 0.00011805331663651097, "loss": 1.6455, "step": 63789 }, { "epoch": 2.12, "grad_norm": 0.6483870148658752, "learning_rate": 0.00011804500635357961, "loss": 1.7542, "step": 63790 }, { "epoch": 2.12, "grad_norm": 0.6266157031059265, "learning_rate": 0.00011803669629151477, "loss": 1.8031, "step": 63791 }, { "epoch": 2.12, "grad_norm": 0.6407210826873779, "learning_rate": 0.00011802838645032647, "loss": 1.7436, "step": 63792 }, { "epoch": 2.12, "grad_norm": 0.6422076225280762, "learning_rate": 0.00011802007683002499, "loss": 1.7073, "step": 63793 }, { "epoch": 2.12, "grad_norm": 0.6643180847167969, "learning_rate": 0.00011801176743062035, "loss": 1.6858, "step": 63794 }, { "epoch": 2.12, "grad_norm": 0.6076605319976807, "learning_rate": 0.00011800345825212249, "loss": 1.7538, "step": 63795 }, { "epoch": 2.12, "grad_norm": 0.6344445943832397, "learning_rate": 0.00011799514929454161, "loss": 1.7181, "step": 63796 }, { "epoch": 2.12, "grad_norm": 2.030949831008911, "learning_rate": 0.00011798684055788801, "loss": 1.7436, "step": 63797 }, { "epoch": 2.12, "grad_norm": 0.6384409070014954, "learning_rate": 0.00011797853204217139, "loss": 1.7334, "step": 63798 }, { "epoch": 2.12, "grad_norm": 0.6424784660339355, "learning_rate": 0.00011797022374740199, "loss": 1.6807, "step": 63799 }, { "epoch": 2.12, "grad_norm": 0.6433277130126953, "learning_rate": 0.00011796191567359002, "loss": 1.7135, "step": 63800 }, { "epoch": 2.12, "grad_norm": 0.6184304356575012, "learning_rate": 0.00011795360782074548, "loss": 1.7094, "step": 63801 }, { "epoch": 2.12, "grad_norm": 0.6323108673095703, "learning_rate": 0.00011794530018887831, "loss": 1.676, "step": 63802 }, { "epoch": 2.12, "grad_norm": 0.6139794588088989, "learning_rate": 0.0001179369927779987, "loss": 1.6798, "step": 63803 }, { "epoch": 2.12, "grad_norm": 0.6354068517684937, "learning_rate": 0.00011792868558811703, "loss": 1.7102, "step": 63804 }, { "epoch": 2.12, "grad_norm": 0.6330972909927368, "learning_rate": 0.00011792037861924285, "loss": 1.6966, "step": 63805 }, { "epoch": 2.12, "grad_norm": 0.6306551694869995, "learning_rate": 0.00011791207187138652, "loss": 1.7922, "step": 63806 }, { "epoch": 2.12, "grad_norm": 0.6307851672172546, "learning_rate": 0.00011790376534455818, "loss": 1.6876, "step": 63807 }, { "epoch": 2.12, "grad_norm": 0.6605713963508606, "learning_rate": 0.00011789545903876786, "loss": 1.7887, "step": 63808 }, { "epoch": 2.12, "grad_norm": 0.613089919090271, "learning_rate": 0.00011788715295402552, "loss": 1.793, "step": 63809 }, { "epoch": 2.12, "grad_norm": 0.6386929154396057, "learning_rate": 0.00011787884709034135, "loss": 1.704, "step": 63810 }, { "epoch": 2.12, "grad_norm": 0.6303347945213318, "learning_rate": 0.00011787054144772563, "loss": 1.6911, "step": 63811 }, { "epoch": 2.12, "grad_norm": 0.6397626996040344, "learning_rate": 0.00011786223602618805, "loss": 1.7282, "step": 63812 }, { "epoch": 2.12, "grad_norm": 0.6255177855491638, "learning_rate": 0.00011785393082573885, "loss": 1.8163, "step": 63813 }, { "epoch": 2.12, "grad_norm": 0.6494067907333374, "learning_rate": 0.00011784562584638822, "loss": 1.6534, "step": 63814 }, { "epoch": 2.12, "grad_norm": 0.6465438008308411, "learning_rate": 0.00011783732108814619, "loss": 1.6248, "step": 63815 }, { "epoch": 2.12, "grad_norm": 0.6265502572059631, "learning_rate": 0.00011782901655102272, "loss": 1.7133, "step": 63816 }, { "epoch": 2.12, "grad_norm": 0.6306397318840027, "learning_rate": 0.00011782071223502806, "loss": 1.7691, "step": 63817 }, { "epoch": 2.12, "grad_norm": 0.6250466704368591, "learning_rate": 0.00011781240814017222, "loss": 1.7321, "step": 63818 }, { "epoch": 2.12, "grad_norm": 0.6227020025253296, "learning_rate": 0.0001178041042664652, "loss": 1.7124, "step": 63819 }, { "epoch": 2.12, "grad_norm": 0.6129958033561707, "learning_rate": 0.0001177958006139171, "loss": 1.639, "step": 63820 }, { "epoch": 2.12, "grad_norm": 0.6540390849113464, "learning_rate": 0.0001177874971825382, "loss": 1.8192, "step": 63821 }, { "epoch": 2.12, "grad_norm": 0.6427711248397827, "learning_rate": 0.0001177791939723384, "loss": 1.7563, "step": 63822 }, { "epoch": 2.12, "grad_norm": 0.6426864862442017, "learning_rate": 0.00011777089098332773, "loss": 1.7039, "step": 63823 }, { "epoch": 2.12, "grad_norm": 0.6391104459762573, "learning_rate": 0.00011776258821551642, "loss": 1.6146, "step": 63824 }, { "epoch": 2.12, "grad_norm": 0.6389747262001038, "learning_rate": 0.00011775428566891449, "loss": 1.6549, "step": 63825 }, { "epoch": 2.12, "grad_norm": 0.6373469233512878, "learning_rate": 0.00011774598334353192, "loss": 1.7181, "step": 63826 }, { "epoch": 2.12, "grad_norm": 0.6275612115859985, "learning_rate": 0.00011773768123937897, "loss": 1.7405, "step": 63827 }, { "epoch": 2.12, "grad_norm": 0.6360265016555786, "learning_rate": 0.00011772937935646548, "loss": 1.7037, "step": 63828 }, { "epoch": 2.12, "grad_norm": 0.6402831673622131, "learning_rate": 0.00011772107769480181, "loss": 1.7704, "step": 63829 }, { "epoch": 2.12, "grad_norm": 0.6396111845970154, "learning_rate": 0.00011771277625439778, "loss": 1.7303, "step": 63830 }, { "epoch": 2.12, "grad_norm": 0.6232145428657532, "learning_rate": 0.0001177044750352637, "loss": 1.6591, "step": 63831 }, { "epoch": 2.12, "grad_norm": 0.6498866081237793, "learning_rate": 0.0001176961740374095, "loss": 1.6935, "step": 63832 }, { "epoch": 2.12, "grad_norm": 0.6370056867599487, "learning_rate": 0.0001176878732608452, "loss": 1.686, "step": 63833 }, { "epoch": 2.12, "grad_norm": 0.6559202671051025, "learning_rate": 0.00011767957270558103, "loss": 1.7717, "step": 63834 }, { "epoch": 2.12, "grad_norm": 0.6422150731086731, "learning_rate": 0.00011767127237162694, "loss": 1.7072, "step": 63835 }, { "epoch": 2.12, "grad_norm": 0.6532477736473083, "learning_rate": 0.00011766297225899315, "loss": 1.7192, "step": 63836 }, { "epoch": 2.12, "grad_norm": 0.6289487481117249, "learning_rate": 0.0001176546723676895, "loss": 1.6818, "step": 63837 }, { "epoch": 2.12, "grad_norm": 0.6267196536064148, "learning_rate": 0.00011764637269772638, "loss": 1.7538, "step": 63838 }, { "epoch": 2.12, "grad_norm": 0.6258310079574585, "learning_rate": 0.00011763807324911366, "loss": 1.6391, "step": 63839 }, { "epoch": 2.12, "grad_norm": 0.6605279445648193, "learning_rate": 0.00011762977402186132, "loss": 1.7411, "step": 63840 }, { "epoch": 2.12, "grad_norm": 0.6307114362716675, "learning_rate": 0.00011762147501597972, "loss": 1.6974, "step": 63841 }, { "epoch": 2.12, "grad_norm": 0.6232866048812866, "learning_rate": 0.00011761317623147863, "loss": 1.7141, "step": 63842 }, { "epoch": 2.12, "grad_norm": 0.6352821588516235, "learning_rate": 0.0001176048776683684, "loss": 1.7547, "step": 63843 }, { "epoch": 2.12, "grad_norm": 0.6339907050132751, "learning_rate": 0.00011759657932665894, "loss": 1.6445, "step": 63844 }, { "epoch": 2.12, "grad_norm": 0.6333194971084595, "learning_rate": 0.00011758828120636031, "loss": 1.6726, "step": 63845 }, { "epoch": 2.12, "grad_norm": 0.6472806334495544, "learning_rate": 0.0001175799833074827, "loss": 1.7535, "step": 63846 }, { "epoch": 2.12, "grad_norm": 0.661919355392456, "learning_rate": 0.00011757168563003598, "loss": 1.7613, "step": 63847 }, { "epoch": 2.12, "grad_norm": 0.6240410804748535, "learning_rate": 0.0001175633881740305, "loss": 1.6809, "step": 63848 }, { "epoch": 2.12, "grad_norm": 0.6559758186340332, "learning_rate": 0.00011755509093947608, "loss": 1.6875, "step": 63849 }, { "epoch": 2.12, "grad_norm": 0.6397803425788879, "learning_rate": 0.00011754679392638297, "loss": 1.6946, "step": 63850 }, { "epoch": 2.12, "grad_norm": 0.6689490079879761, "learning_rate": 0.00011753849713476121, "loss": 1.6704, "step": 63851 }, { "epoch": 2.12, "grad_norm": 0.667925238609314, "learning_rate": 0.00011753020056462072, "loss": 1.7035, "step": 63852 }, { "epoch": 2.12, "grad_norm": 0.6373001933097839, "learning_rate": 0.00011752190421597174, "loss": 1.687, "step": 63853 }, { "epoch": 2.12, "grad_norm": 0.6226780414581299, "learning_rate": 0.00011751360808882431, "loss": 1.7199, "step": 63854 }, { "epoch": 2.12, "grad_norm": 0.6483103036880493, "learning_rate": 0.0001175053121831884, "loss": 1.6815, "step": 63855 }, { "epoch": 2.12, "grad_norm": 0.6264051795005798, "learning_rate": 0.00011749701649907412, "loss": 1.7148, "step": 63856 }, { "epoch": 2.12, "grad_norm": 0.6423889398574829, "learning_rate": 0.00011748872103649167, "loss": 1.7157, "step": 63857 }, { "epoch": 2.12, "grad_norm": 0.6859326958656311, "learning_rate": 0.00011748042579545104, "loss": 1.7218, "step": 63858 }, { "epoch": 2.12, "grad_norm": 0.6216353178024292, "learning_rate": 0.00011747213077596215, "loss": 1.6517, "step": 63859 }, { "epoch": 2.12, "grad_norm": 0.6402207016944885, "learning_rate": 0.00011746383597803533, "loss": 1.6551, "step": 63860 }, { "epoch": 2.12, "grad_norm": 0.6193740963935852, "learning_rate": 0.00011745554140168049, "loss": 1.7061, "step": 63861 }, { "epoch": 2.12, "grad_norm": 0.638943612575531, "learning_rate": 0.00011744724704690766, "loss": 1.6865, "step": 63862 }, { "epoch": 2.12, "grad_norm": 0.6387432813644409, "learning_rate": 0.00011743895291372693, "loss": 1.6967, "step": 63863 }, { "epoch": 2.12, "grad_norm": 0.6303592324256897, "learning_rate": 0.00011743065900214866, "loss": 1.8074, "step": 63864 }, { "epoch": 2.12, "grad_norm": 0.6106823682785034, "learning_rate": 0.00011742236531218243, "loss": 1.7008, "step": 63865 }, { "epoch": 2.12, "grad_norm": 0.637535810470581, "learning_rate": 0.00011741407184383855, "loss": 1.6785, "step": 63866 }, { "epoch": 2.12, "grad_norm": 0.6336334347724915, "learning_rate": 0.00011740577859712722, "loss": 1.6915, "step": 63867 }, { "epoch": 2.12, "grad_norm": 0.6213611960411072, "learning_rate": 0.00011739748557205833, "loss": 1.6648, "step": 63868 }, { "epoch": 2.12, "grad_norm": 0.6547256112098694, "learning_rate": 0.0001173891927686419, "loss": 1.8768, "step": 63869 }, { "epoch": 2.12, "grad_norm": 0.6239242553710938, "learning_rate": 0.00011738090018688809, "loss": 1.6879, "step": 63870 }, { "epoch": 2.12, "grad_norm": 0.6732184886932373, "learning_rate": 0.00011737260782680718, "loss": 1.731, "step": 63871 }, { "epoch": 2.13, "grad_norm": 0.6252590417861938, "learning_rate": 0.0001173643156884088, "loss": 1.6732, "step": 63872 }, { "epoch": 2.13, "grad_norm": 0.6397578120231628, "learning_rate": 0.0001173560237717032, "loss": 1.7097, "step": 63873 }, { "epoch": 2.13, "grad_norm": 0.6240492463111877, "learning_rate": 0.0001173477320767006, "loss": 1.6458, "step": 63874 }, { "epoch": 2.13, "grad_norm": 0.6374194622039795, "learning_rate": 0.00011733944060341094, "loss": 1.7194, "step": 63875 }, { "epoch": 2.13, "grad_norm": 0.6325097680091858, "learning_rate": 0.00011733114935184418, "loss": 1.7642, "step": 63876 }, { "epoch": 2.13, "grad_norm": 0.6112052202224731, "learning_rate": 0.0001173228583220105, "loss": 1.7015, "step": 63877 }, { "epoch": 2.13, "grad_norm": 0.6436132788658142, "learning_rate": 0.00011731456751392014, "loss": 1.7107, "step": 63878 }, { "epoch": 2.13, "grad_norm": 0.617961049079895, "learning_rate": 0.0001173062769275828, "loss": 1.7788, "step": 63879 }, { "epoch": 2.13, "grad_norm": 0.6397677659988403, "learning_rate": 0.0001172979865630087, "loss": 1.7568, "step": 63880 }, { "epoch": 2.13, "grad_norm": 0.6482418179512024, "learning_rate": 0.00011728969642020802, "loss": 1.7654, "step": 63881 }, { "epoch": 2.13, "grad_norm": 0.64335697889328, "learning_rate": 0.00011728140649919074, "loss": 1.7195, "step": 63882 }, { "epoch": 2.13, "grad_norm": 0.6586174964904785, "learning_rate": 0.00011727311679996682, "loss": 1.638, "step": 63883 }, { "epoch": 2.13, "grad_norm": 0.6227993369102478, "learning_rate": 0.0001172648273225465, "loss": 1.6564, "step": 63884 }, { "epoch": 2.13, "grad_norm": 0.6316463351249695, "learning_rate": 0.00011725653806693978, "loss": 1.7105, "step": 63885 }, { "epoch": 2.13, "grad_norm": 0.6405825018882751, "learning_rate": 0.00011724824903315655, "loss": 1.6038, "step": 63886 }, { "epoch": 2.13, "grad_norm": 0.638037919998169, "learning_rate": 0.00011723996022120704, "loss": 1.6345, "step": 63887 }, { "epoch": 2.13, "grad_norm": 0.6380301713943481, "learning_rate": 0.0001172316716311014, "loss": 1.6685, "step": 63888 }, { "epoch": 2.13, "grad_norm": 0.6131252646446228, "learning_rate": 0.00011722338326284958, "loss": 1.6518, "step": 63889 }, { "epoch": 2.13, "grad_norm": 0.6296740770339966, "learning_rate": 0.00011721509511646155, "loss": 1.6824, "step": 63890 }, { "epoch": 2.13, "grad_norm": 0.6420280933380127, "learning_rate": 0.00011720680719194752, "loss": 1.7111, "step": 63891 }, { "epoch": 2.13, "grad_norm": 0.6536555290222168, "learning_rate": 0.00011719851948931753, "loss": 1.6344, "step": 63892 }, { "epoch": 2.13, "grad_norm": 0.6521454453468323, "learning_rate": 0.00011719023200858149, "loss": 1.787, "step": 63893 }, { "epoch": 2.13, "grad_norm": 0.6475874781608582, "learning_rate": 0.00011718194474974968, "loss": 1.6223, "step": 63894 }, { "epoch": 2.13, "grad_norm": 0.6516855955123901, "learning_rate": 0.00011717365771283196, "loss": 1.7699, "step": 63895 }, { "epoch": 2.13, "grad_norm": 0.6303783655166626, "learning_rate": 0.00011716537089783855, "loss": 1.682, "step": 63896 }, { "epoch": 2.13, "grad_norm": 0.6231922507286072, "learning_rate": 0.00011715708430477937, "loss": 1.6913, "step": 63897 }, { "epoch": 2.13, "grad_norm": 0.6503998637199402, "learning_rate": 0.00011714879793366464, "loss": 1.7294, "step": 63898 }, { "epoch": 2.13, "grad_norm": 0.6653122305870056, "learning_rate": 0.00011714051178450423, "loss": 1.6545, "step": 63899 }, { "epoch": 2.13, "grad_norm": 0.6246384382247925, "learning_rate": 0.00011713222585730838, "loss": 1.7456, "step": 63900 }, { "epoch": 2.13, "grad_norm": 0.6431546807289124, "learning_rate": 0.00011712394015208708, "loss": 1.7128, "step": 63901 }, { "epoch": 2.13, "grad_norm": 0.6176387071609497, "learning_rate": 0.00011711565466885028, "loss": 1.6818, "step": 63902 }, { "epoch": 2.13, "grad_norm": 0.6316055059432983, "learning_rate": 0.0001171073694076082, "loss": 1.6646, "step": 63903 }, { "epoch": 2.13, "grad_norm": 0.6369510889053345, "learning_rate": 0.00011709908436837084, "loss": 1.7795, "step": 63904 }, { "epoch": 2.13, "grad_norm": 0.6810557246208191, "learning_rate": 0.00011709079955114814, "loss": 1.7111, "step": 63905 }, { "epoch": 2.13, "grad_norm": 0.6440154910087585, "learning_rate": 0.00011708251495595025, "loss": 1.7201, "step": 63906 }, { "epoch": 2.13, "grad_norm": 0.6498458385467529, "learning_rate": 0.00011707423058278737, "loss": 1.7387, "step": 63907 }, { "epoch": 2.13, "grad_norm": 0.6498518586158752, "learning_rate": 0.00011706594643166938, "loss": 1.7098, "step": 63908 }, { "epoch": 2.13, "grad_norm": 0.6496348977088928, "learning_rate": 0.00011705766250260631, "loss": 1.7125, "step": 63909 }, { "epoch": 2.13, "grad_norm": 0.6523679494857788, "learning_rate": 0.00011704937879560836, "loss": 1.6071, "step": 63910 }, { "epoch": 2.13, "grad_norm": 0.6132100820541382, "learning_rate": 0.00011704109531068551, "loss": 1.7235, "step": 63911 }, { "epoch": 2.13, "grad_norm": 0.6321336627006531, "learning_rate": 0.00011703281204784773, "loss": 1.6546, "step": 63912 }, { "epoch": 2.13, "grad_norm": 0.6206198334693909, "learning_rate": 0.00011702452900710514, "loss": 1.7001, "step": 63913 }, { "epoch": 2.13, "grad_norm": 0.6305674314498901, "learning_rate": 0.00011701624618846801, "loss": 1.7377, "step": 63914 }, { "epoch": 2.13, "grad_norm": 0.6121771931648254, "learning_rate": 0.00011700796359194601, "loss": 1.707, "step": 63915 }, { "epoch": 2.13, "grad_norm": 0.6303988695144653, "learning_rate": 0.00011699968121754935, "loss": 1.7165, "step": 63916 }, { "epoch": 2.13, "grad_norm": 0.6337670087814331, "learning_rate": 0.00011699139906528825, "loss": 1.6953, "step": 63917 }, { "epoch": 2.13, "grad_norm": 0.6430150866508484, "learning_rate": 0.00011698311713517262, "loss": 1.7973, "step": 63918 }, { "epoch": 2.13, "grad_norm": 0.651747465133667, "learning_rate": 0.0001169748354272124, "loss": 1.6441, "step": 63919 }, { "epoch": 2.13, "grad_norm": 0.6411638259887695, "learning_rate": 0.00011696655394141778, "loss": 1.6855, "step": 63920 }, { "epoch": 2.13, "grad_norm": 0.6341305375099182, "learning_rate": 0.00011695827267779899, "loss": 1.6858, "step": 63921 }, { "epoch": 2.13, "grad_norm": 0.6183634996414185, "learning_rate": 0.00011694999163636568, "loss": 1.6864, "step": 63922 }, { "epoch": 2.13, "grad_norm": 0.6345937848091125, "learning_rate": 0.00011694171081712813, "loss": 1.7727, "step": 63923 }, { "epoch": 2.13, "grad_norm": 0.621722400188446, "learning_rate": 0.00011693343022009643, "loss": 1.6971, "step": 63924 }, { "epoch": 2.13, "grad_norm": 0.6194835901260376, "learning_rate": 0.00011692514984528062, "loss": 1.6672, "step": 63925 }, { "epoch": 2.13, "grad_norm": 0.6208500862121582, "learning_rate": 0.00011691686969269058, "loss": 1.7049, "step": 63926 }, { "epoch": 2.13, "grad_norm": 0.6382849216461182, "learning_rate": 0.00011690858976233646, "loss": 1.731, "step": 63927 }, { "epoch": 2.13, "grad_norm": 0.6375912427902222, "learning_rate": 0.00011690031005422856, "loss": 1.6896, "step": 63928 }, { "epoch": 2.13, "grad_norm": 0.6360033750534058, "learning_rate": 0.00011689203056837652, "loss": 1.67, "step": 63929 }, { "epoch": 2.13, "grad_norm": 0.6232947111129761, "learning_rate": 0.00011688375130479054, "loss": 1.6731, "step": 63930 }, { "epoch": 2.13, "grad_norm": 0.6387973427772522, "learning_rate": 0.00011687547226348083, "loss": 1.6901, "step": 63931 }, { "epoch": 2.13, "grad_norm": 0.6050901412963867, "learning_rate": 0.0001168671934444573, "loss": 1.7266, "step": 63932 }, { "epoch": 2.13, "grad_norm": 0.6319769620895386, "learning_rate": 0.00011685891484772993, "loss": 1.6325, "step": 63933 }, { "epoch": 2.13, "grad_norm": 0.6449350714683533, "learning_rate": 0.00011685063647330896, "loss": 1.6695, "step": 63934 }, { "epoch": 2.13, "grad_norm": 0.6357609033584595, "learning_rate": 0.00011684235832120434, "loss": 1.7251, "step": 63935 }, { "epoch": 2.13, "grad_norm": 0.6332690119743347, "learning_rate": 0.00011683408039142595, "loss": 1.7427, "step": 63936 }, { "epoch": 2.13, "grad_norm": 0.67190021276474, "learning_rate": 0.00011682580268398407, "loss": 1.8054, "step": 63937 }, { "epoch": 2.13, "grad_norm": 0.6453516483306885, "learning_rate": 0.00011681752519888871, "loss": 1.774, "step": 63938 }, { "epoch": 2.13, "grad_norm": 0.6257822513580322, "learning_rate": 0.00011680924793614992, "loss": 1.7036, "step": 63939 }, { "epoch": 2.13, "grad_norm": 0.6253867149353027, "learning_rate": 0.00011680097089577763, "loss": 1.6873, "step": 63940 }, { "epoch": 2.13, "grad_norm": 0.6235050559043884, "learning_rate": 0.00011679269407778201, "loss": 1.7089, "step": 63941 }, { "epoch": 2.13, "grad_norm": 0.6227855682373047, "learning_rate": 0.00011678441748217311, "loss": 1.6683, "step": 63942 }, { "epoch": 2.13, "grad_norm": 0.6591367721557617, "learning_rate": 0.00011677614110896084, "loss": 1.6793, "step": 63943 }, { "epoch": 2.13, "grad_norm": 0.6220057010650635, "learning_rate": 0.0001167678649581554, "loss": 1.7065, "step": 63944 }, { "epoch": 2.13, "grad_norm": 0.648006021976471, "learning_rate": 0.0001167595890297667, "loss": 1.6961, "step": 63945 }, { "epoch": 2.13, "grad_norm": 0.6324922442436218, "learning_rate": 0.00011675131332380496, "loss": 1.7591, "step": 63946 }, { "epoch": 2.13, "grad_norm": 0.6545921564102173, "learning_rate": 0.00011674303784028001, "loss": 1.7605, "step": 63947 }, { "epoch": 2.13, "grad_norm": 0.6366949677467346, "learning_rate": 0.00011673476257920213, "loss": 1.7212, "step": 63948 }, { "epoch": 2.13, "grad_norm": 0.6419044137001038, "learning_rate": 0.00011672648754058124, "loss": 1.6829, "step": 63949 }, { "epoch": 2.13, "grad_norm": 0.647025465965271, "learning_rate": 0.00011671821272442726, "loss": 1.6743, "step": 63950 }, { "epoch": 2.13, "grad_norm": 0.6375580430030823, "learning_rate": 0.0001167099381307505, "loss": 1.663, "step": 63951 }, { "epoch": 2.13, "grad_norm": 0.6448701024055481, "learning_rate": 0.00011670166375956075, "loss": 1.6839, "step": 63952 }, { "epoch": 2.13, "grad_norm": 0.6242424249649048, "learning_rate": 0.00011669338961086828, "loss": 1.6944, "step": 63953 }, { "epoch": 2.13, "grad_norm": 0.6077324748039246, "learning_rate": 0.00011668511568468302, "loss": 1.7093, "step": 63954 }, { "epoch": 2.13, "grad_norm": 0.6466198563575745, "learning_rate": 0.00011667684198101494, "loss": 1.72, "step": 63955 }, { "epoch": 2.13, "grad_norm": 0.6550317406654358, "learning_rate": 0.00011666856849987423, "loss": 1.6862, "step": 63956 }, { "epoch": 2.13, "grad_norm": 0.6478221416473389, "learning_rate": 0.0001166602952412708, "loss": 1.7043, "step": 63957 }, { "epoch": 2.13, "grad_norm": 0.6577361822128296, "learning_rate": 0.00011665202220521485, "loss": 1.7814, "step": 63958 }, { "epoch": 2.13, "grad_norm": 0.6595866084098816, "learning_rate": 0.0001166437493917162, "loss": 1.7276, "step": 63959 }, { "epoch": 2.13, "grad_norm": 0.6338669061660767, "learning_rate": 0.00011663547680078516, "loss": 1.7201, "step": 63960 }, { "epoch": 2.13, "grad_norm": 0.6108055710792542, "learning_rate": 0.0001166272044324316, "loss": 1.6594, "step": 63961 }, { "epoch": 2.13, "grad_norm": 0.6547881364822388, "learning_rate": 0.00011661893228666552, "loss": 1.6425, "step": 63962 }, { "epoch": 2.13, "grad_norm": 0.6579234600067139, "learning_rate": 0.00011661066036349712, "loss": 1.6713, "step": 63963 }, { "epoch": 2.13, "grad_norm": 0.6400306820869446, "learning_rate": 0.00011660238866293625, "loss": 1.7273, "step": 63964 }, { "epoch": 2.13, "grad_norm": 0.6236164569854736, "learning_rate": 0.00011659411718499317, "loss": 1.6943, "step": 63965 }, { "epoch": 2.13, "grad_norm": 0.6539838910102844, "learning_rate": 0.00011658584592967772, "loss": 1.7711, "step": 63966 }, { "epoch": 2.13, "grad_norm": 0.6359229683876038, "learning_rate": 0.00011657757489700013, "loss": 1.7598, "step": 63967 }, { "epoch": 2.13, "grad_norm": 0.6291882991790771, "learning_rate": 0.00011656930408697034, "loss": 1.6843, "step": 63968 }, { "epoch": 2.13, "grad_norm": 0.6109620928764343, "learning_rate": 0.00011656103349959825, "loss": 1.6653, "step": 63969 }, { "epoch": 2.13, "grad_norm": 0.6265296339988708, "learning_rate": 0.00011655276313489416, "loss": 1.7017, "step": 63970 }, { "epoch": 2.13, "grad_norm": 0.6790220737457275, "learning_rate": 0.000116544492992868, "loss": 1.724, "step": 63971 }, { "epoch": 2.13, "grad_norm": 0.660240113735199, "learning_rate": 0.00011653622307352967, "loss": 1.7446, "step": 63972 }, { "epoch": 2.13, "grad_norm": 0.6421242952346802, "learning_rate": 0.00011652795337688936, "loss": 1.6553, "step": 63973 }, { "epoch": 2.13, "grad_norm": 1.0478310585021973, "learning_rate": 0.00011651968390295713, "loss": 1.7235, "step": 63974 }, { "epoch": 2.13, "grad_norm": 0.6147271394729614, "learning_rate": 0.00011651141465174303, "loss": 1.6715, "step": 63975 }, { "epoch": 2.13, "grad_norm": 0.6248312592506409, "learning_rate": 0.00011650314562325692, "loss": 1.683, "step": 63976 }, { "epoch": 2.13, "grad_norm": 0.6583056449890137, "learning_rate": 0.00011649487681750905, "loss": 1.7017, "step": 63977 }, { "epoch": 2.13, "grad_norm": 0.6306301355361938, "learning_rate": 0.00011648660823450934, "loss": 1.7297, "step": 63978 }, { "epoch": 2.13, "grad_norm": 0.6319660544395447, "learning_rate": 0.00011647833987426777, "loss": 1.6544, "step": 63979 }, { "epoch": 2.13, "grad_norm": 0.6311959624290466, "learning_rate": 0.00011647007173679446, "loss": 1.652, "step": 63980 }, { "epoch": 2.13, "grad_norm": 0.6196425557136536, "learning_rate": 0.00011646180382209964, "loss": 1.7241, "step": 63981 }, { "epoch": 2.13, "grad_norm": 0.6322503089904785, "learning_rate": 0.00011645353613019291, "loss": 1.6766, "step": 63982 }, { "epoch": 2.13, "grad_norm": 0.6407742500305176, "learning_rate": 0.00011644526866108456, "loss": 1.6648, "step": 63983 }, { "epoch": 2.13, "grad_norm": 0.6465764045715332, "learning_rate": 0.00011643700141478476, "loss": 1.6689, "step": 63984 }, { "epoch": 2.13, "grad_norm": 0.6368240118026733, "learning_rate": 0.00011642873439130335, "loss": 1.7399, "step": 63985 }, { "epoch": 2.13, "grad_norm": 0.6384724974632263, "learning_rate": 0.0001164204675906503, "loss": 1.7556, "step": 63986 }, { "epoch": 2.13, "grad_norm": 0.6312910318374634, "learning_rate": 0.00011641220101283575, "loss": 1.6863, "step": 63987 }, { "epoch": 2.13, "grad_norm": 0.6364914774894714, "learning_rate": 0.00011640393465786998, "loss": 1.648, "step": 63988 }, { "epoch": 2.13, "grad_norm": 0.6383266448974609, "learning_rate": 0.00011639566852576254, "loss": 1.6899, "step": 63989 }, { "epoch": 2.13, "grad_norm": 0.6262656450271606, "learning_rate": 0.00011638740261652373, "loss": 1.6719, "step": 63990 }, { "epoch": 2.13, "grad_norm": 0.6443034410476685, "learning_rate": 0.00011637913693016362, "loss": 1.7273, "step": 63991 }, { "epoch": 2.13, "grad_norm": 0.6298379302024841, "learning_rate": 0.00011637087146669224, "loss": 1.6423, "step": 63992 }, { "epoch": 2.13, "grad_norm": 0.6703752279281616, "learning_rate": 0.00011636260622611942, "loss": 1.7005, "step": 63993 }, { "epoch": 2.13, "grad_norm": 0.6659619212150574, "learning_rate": 0.00011635434120845535, "loss": 1.7429, "step": 63994 }, { "epoch": 2.13, "grad_norm": 0.6405571699142456, "learning_rate": 0.00011634607641371027, "loss": 1.6404, "step": 63995 }, { "epoch": 2.13, "grad_norm": 0.6174845695495605, "learning_rate": 0.0001163378118418938, "loss": 1.6136, "step": 63996 }, { "epoch": 2.13, "grad_norm": 0.6560362577438354, "learning_rate": 0.00011632954749301612, "loss": 1.7607, "step": 63997 }, { "epoch": 2.13, "grad_norm": 0.6286706924438477, "learning_rate": 0.00011632128336708743, "loss": 1.7656, "step": 63998 }, { "epoch": 2.13, "grad_norm": 0.6505202651023865, "learning_rate": 0.00011631301946411764, "loss": 1.7219, "step": 63999 }, { "epoch": 2.13, "grad_norm": 0.6548977494239807, "learning_rate": 0.00011630475578411669, "loss": 1.6986, "step": 64000 }, { "epoch": 2.13, "grad_norm": 0.6745439767837524, "learning_rate": 0.00011629649232709477, "loss": 1.7249, "step": 64001 }, { "epoch": 2.13, "grad_norm": 0.6556320190429688, "learning_rate": 0.00011628822909306189, "loss": 1.7301, "step": 64002 }, { "epoch": 2.13, "grad_norm": 0.6483166217803955, "learning_rate": 0.00011627996608202788, "loss": 1.6658, "step": 64003 }, { "epoch": 2.13, "grad_norm": 0.6568607091903687, "learning_rate": 0.00011627170329400295, "loss": 1.6427, "step": 64004 }, { "epoch": 2.13, "grad_norm": 0.656753659248352, "learning_rate": 0.0001162634407289972, "loss": 1.7458, "step": 64005 }, { "epoch": 2.13, "grad_norm": 0.6606135368347168, "learning_rate": 0.00011625517838702053, "loss": 1.729, "step": 64006 }, { "epoch": 2.13, "grad_norm": 0.6257666945457458, "learning_rate": 0.00011624691626808293, "loss": 1.6953, "step": 64007 }, { "epoch": 2.13, "grad_norm": 0.6363507509231567, "learning_rate": 0.0001162386543721946, "loss": 1.6705, "step": 64008 }, { "epoch": 2.13, "grad_norm": 0.6479242444038391, "learning_rate": 0.00011623039269936547, "loss": 1.733, "step": 64009 }, { "epoch": 2.13, "grad_norm": 0.6605752110481262, "learning_rate": 0.00011622213124960545, "loss": 1.6769, "step": 64010 }, { "epoch": 2.13, "grad_norm": 0.6234572529792786, "learning_rate": 0.00011621387002292478, "loss": 1.7857, "step": 64011 }, { "epoch": 2.13, "grad_norm": 0.6585731506347656, "learning_rate": 0.00011620560901933331, "loss": 1.7573, "step": 64012 }, { "epoch": 2.13, "grad_norm": 0.6322683691978455, "learning_rate": 0.00011619734823884123, "loss": 1.7183, "step": 64013 }, { "epoch": 2.13, "grad_norm": 0.6326274275779724, "learning_rate": 0.0001161890876814584, "loss": 1.7498, "step": 64014 }, { "epoch": 2.13, "grad_norm": 0.6407779455184937, "learning_rate": 0.00011618082734719504, "loss": 1.686, "step": 64015 }, { "epoch": 2.13, "grad_norm": 0.6532644033432007, "learning_rate": 0.00011617256723606095, "loss": 1.6123, "step": 64016 }, { "epoch": 2.13, "grad_norm": 0.6587754487991333, "learning_rate": 0.00011616430734806641, "loss": 1.7103, "step": 64017 }, { "epoch": 2.13, "grad_norm": 0.6352819800376892, "learning_rate": 0.0001161560476832213, "loss": 1.6539, "step": 64018 }, { "epoch": 2.13, "grad_norm": 0.6207473278045654, "learning_rate": 0.00011614778824153554, "loss": 1.741, "step": 64019 }, { "epoch": 2.13, "grad_norm": 0.6256035566329956, "learning_rate": 0.00011613952902301941, "loss": 1.6561, "step": 64020 }, { "epoch": 2.13, "grad_norm": 0.657553493976593, "learning_rate": 0.00011613127002768278, "loss": 1.6912, "step": 64021 }, { "epoch": 2.13, "grad_norm": 0.6299335956573486, "learning_rate": 0.00011612301125553562, "loss": 1.7082, "step": 64022 }, { "epoch": 2.13, "grad_norm": 0.6565772891044617, "learning_rate": 0.00011611475270658798, "loss": 1.7672, "step": 64023 }, { "epoch": 2.13, "grad_norm": 0.6373128294944763, "learning_rate": 0.00011610649438085009, "loss": 1.6915, "step": 64024 }, { "epoch": 2.13, "grad_norm": 0.6300690770149231, "learning_rate": 0.00011609823627833178, "loss": 1.7373, "step": 64025 }, { "epoch": 2.13, "grad_norm": 0.6367238759994507, "learning_rate": 0.00011608997839904304, "loss": 1.7724, "step": 64026 }, { "epoch": 2.13, "grad_norm": 0.646111249923706, "learning_rate": 0.00011608172074299406, "loss": 1.7263, "step": 64027 }, { "epoch": 2.13, "grad_norm": 0.6177177429199219, "learning_rate": 0.00011607346331019479, "loss": 1.668, "step": 64028 }, { "epoch": 2.13, "grad_norm": 0.6606717109680176, "learning_rate": 0.0001160652061006551, "loss": 1.7326, "step": 64029 }, { "epoch": 2.13, "grad_norm": 0.6481027007102966, "learning_rate": 0.00011605694911438515, "loss": 1.7524, "step": 64030 }, { "epoch": 2.13, "grad_norm": 0.636536180973053, "learning_rate": 0.0001160486923513952, "loss": 1.6767, "step": 64031 }, { "epoch": 2.13, "grad_norm": 0.6195096969604492, "learning_rate": 0.0001160404358116948, "loss": 1.7129, "step": 64032 }, { "epoch": 2.13, "grad_norm": 1.3625872135162354, "learning_rate": 0.00011603217949529424, "loss": 1.7371, "step": 64033 }, { "epoch": 2.13, "grad_norm": 0.6599123477935791, "learning_rate": 0.00011602392340220359, "loss": 1.7035, "step": 64034 }, { "epoch": 2.13, "grad_norm": 0.6245085000991821, "learning_rate": 0.00011601566753243277, "loss": 1.7024, "step": 64035 }, { "epoch": 2.13, "grad_norm": 0.6474822163581848, "learning_rate": 0.00011600741188599173, "loss": 1.6983, "step": 64036 }, { "epoch": 2.13, "grad_norm": 0.6464003920555115, "learning_rate": 0.00011599915646289058, "loss": 1.7579, "step": 64037 }, { "epoch": 2.13, "grad_norm": 0.650841236114502, "learning_rate": 0.00011599090126313955, "loss": 1.7673, "step": 64038 }, { "epoch": 2.13, "grad_norm": 0.6527811288833618, "learning_rate": 0.00011598264628674827, "loss": 1.7356, "step": 64039 }, { "epoch": 2.13, "grad_norm": 0.6155880093574524, "learning_rate": 0.0001159743915337269, "loss": 1.7264, "step": 64040 }, { "epoch": 2.13, "grad_norm": 0.636804461479187, "learning_rate": 0.00011596613700408566, "loss": 1.6999, "step": 64041 }, { "epoch": 2.13, "grad_norm": 0.6428743004798889, "learning_rate": 0.00011595788269783438, "loss": 1.681, "step": 64042 }, { "epoch": 2.13, "grad_norm": 0.6210783123970032, "learning_rate": 0.00011594962861498301, "loss": 1.7195, "step": 64043 }, { "epoch": 2.13, "grad_norm": 0.6399785876274109, "learning_rate": 0.00011594137475554168, "loss": 1.6778, "step": 64044 }, { "epoch": 2.13, "grad_norm": 0.643438994884491, "learning_rate": 0.00011593312111952061, "loss": 1.7048, "step": 64045 }, { "epoch": 2.13, "grad_norm": 0.6404476165771484, "learning_rate": 0.00011592486770692938, "loss": 1.7179, "step": 64046 }, { "epoch": 2.13, "grad_norm": 0.6616342663764954, "learning_rate": 0.00011591661451777825, "loss": 1.7283, "step": 64047 }, { "epoch": 2.13, "grad_norm": 0.6271497011184692, "learning_rate": 0.00011590836155207734, "loss": 1.6295, "step": 64048 }, { "epoch": 2.13, "grad_norm": 0.6586237549781799, "learning_rate": 0.00011590010880983656, "loss": 1.7765, "step": 64049 }, { "epoch": 2.13, "grad_norm": 0.6285706758499146, "learning_rate": 0.00011589185629106581, "loss": 1.623, "step": 64050 }, { "epoch": 2.13, "grad_norm": 0.6366919875144958, "learning_rate": 0.00011588360399577533, "loss": 1.6739, "step": 64051 }, { "epoch": 2.13, "grad_norm": 0.6151952743530273, "learning_rate": 0.00011587535192397501, "loss": 1.6422, "step": 64052 }, { "epoch": 2.13, "grad_norm": 0.6417872309684753, "learning_rate": 0.00011586710007567478, "loss": 1.6879, "step": 64053 }, { "epoch": 2.13, "grad_norm": 0.6346861124038696, "learning_rate": 0.00011585884845088479, "loss": 1.7207, "step": 64054 }, { "epoch": 2.13, "grad_norm": 0.6370694637298584, "learning_rate": 0.00011585059704961511, "loss": 1.7787, "step": 64055 }, { "epoch": 2.13, "grad_norm": 0.6236859560012817, "learning_rate": 0.00011584234587187568, "loss": 1.7499, "step": 64056 }, { "epoch": 2.13, "grad_norm": 0.6379460692405701, "learning_rate": 0.00011583409491767637, "loss": 1.6294, "step": 64057 }, { "epoch": 2.13, "grad_norm": 0.6415701508522034, "learning_rate": 0.00011582584418702748, "loss": 1.7521, "step": 64058 }, { "epoch": 2.13, "grad_norm": 0.6375263929367065, "learning_rate": 0.00011581759367993885, "loss": 1.7081, "step": 64059 }, { "epoch": 2.13, "grad_norm": 0.6841588616371155, "learning_rate": 0.00011580934339642042, "loss": 1.7723, "step": 64060 }, { "epoch": 2.13, "grad_norm": 0.6388629078865051, "learning_rate": 0.00011580109333648245, "loss": 1.7634, "step": 64061 }, { "epoch": 2.13, "grad_norm": 0.6597045063972473, "learning_rate": 0.00011579284350013464, "loss": 1.7484, "step": 64062 }, { "epoch": 2.13, "grad_norm": 0.644170880317688, "learning_rate": 0.00011578459388738732, "loss": 1.7569, "step": 64063 }, { "epoch": 2.13, "grad_norm": 0.6264175772666931, "learning_rate": 0.00011577634449825026, "loss": 1.7069, "step": 64064 }, { "epoch": 2.13, "grad_norm": 0.6478546857833862, "learning_rate": 0.00011576809533273368, "loss": 1.6538, "step": 64065 }, { "epoch": 2.13, "grad_norm": 0.6477378606796265, "learning_rate": 0.0001157598463908475, "loss": 1.7411, "step": 64066 }, { "epoch": 2.13, "grad_norm": 0.6499363780021667, "learning_rate": 0.00011575159767260158, "loss": 1.7035, "step": 64067 }, { "epoch": 2.13, "grad_norm": 0.6447346210479736, "learning_rate": 0.00011574334917800619, "loss": 1.7219, "step": 64068 }, { "epoch": 2.13, "grad_norm": 0.6517519354820251, "learning_rate": 0.00011573510090707113, "loss": 1.6988, "step": 64069 }, { "epoch": 2.13, "grad_norm": 0.6243858933448792, "learning_rate": 0.00011572685285980662, "loss": 1.6954, "step": 64070 }, { "epoch": 2.13, "grad_norm": 0.6268067955970764, "learning_rate": 0.00011571860503622254, "loss": 1.6435, "step": 64071 }, { "epoch": 2.13, "grad_norm": 0.6388691067695618, "learning_rate": 0.0001157103574363288, "loss": 1.7201, "step": 64072 }, { "epoch": 2.13, "grad_norm": 0.6256643533706665, "learning_rate": 0.00011570211006013569, "loss": 1.6916, "step": 64073 }, { "epoch": 2.13, "grad_norm": 0.6335269808769226, "learning_rate": 0.00011569386290765293, "loss": 1.6701, "step": 64074 }, { "epoch": 2.13, "grad_norm": 0.648015558719635, "learning_rate": 0.00011568561597889074, "loss": 1.7056, "step": 64075 }, { "epoch": 2.13, "grad_norm": 0.6567534804344177, "learning_rate": 0.00011567736927385898, "loss": 1.7448, "step": 64076 }, { "epoch": 2.13, "grad_norm": 0.6179929375648499, "learning_rate": 0.00011566912279256784, "loss": 1.7379, "step": 64077 }, { "epoch": 2.13, "grad_norm": 0.6356794834136963, "learning_rate": 0.0001156608765350272, "loss": 1.6805, "step": 64078 }, { "epoch": 2.13, "grad_norm": 0.6388513445854187, "learning_rate": 0.00011565263050124703, "loss": 1.6879, "step": 64079 }, { "epoch": 2.13, "grad_norm": 0.6258000135421753, "learning_rate": 0.0001156443846912375, "loss": 1.6352, "step": 64080 }, { "epoch": 2.13, "grad_norm": 0.6454208493232727, "learning_rate": 0.0001156361391050084, "loss": 1.6271, "step": 64081 }, { "epoch": 2.13, "grad_norm": 0.6252293586730957, "learning_rate": 0.00011562789374256998, "loss": 1.6907, "step": 64082 }, { "epoch": 2.13, "grad_norm": 0.629924476146698, "learning_rate": 0.000115619648603932, "loss": 1.7109, "step": 64083 }, { "epoch": 2.13, "grad_norm": 0.6248866319656372, "learning_rate": 0.00011561140368910473, "loss": 1.7297, "step": 64084 }, { "epoch": 2.13, "grad_norm": 0.6252842545509338, "learning_rate": 0.00011560315899809805, "loss": 1.6724, "step": 64085 }, { "epoch": 2.13, "grad_norm": 0.6138225793838501, "learning_rate": 0.00011559491453092186, "loss": 1.7286, "step": 64086 }, { "epoch": 2.13, "grad_norm": 0.6666294932365417, "learning_rate": 0.00011558667028758635, "loss": 1.748, "step": 64087 }, { "epoch": 2.13, "grad_norm": 0.6399202942848206, "learning_rate": 0.00011557842626810149, "loss": 1.71, "step": 64088 }, { "epoch": 2.13, "grad_norm": 0.6277528405189514, "learning_rate": 0.00011557018247247714, "loss": 1.7025, "step": 64089 }, { "epoch": 2.13, "grad_norm": 0.6464217901229858, "learning_rate": 0.00011556193890072339, "loss": 1.7446, "step": 64090 }, { "epoch": 2.13, "grad_norm": 0.6536722183227539, "learning_rate": 0.00011555369555285039, "loss": 1.7123, "step": 64091 }, { "epoch": 2.13, "grad_norm": 0.6343750953674316, "learning_rate": 0.00011554545242886803, "loss": 1.6508, "step": 64092 }, { "epoch": 2.13, "grad_norm": 0.6307540535926819, "learning_rate": 0.0001155372095287862, "loss": 1.7552, "step": 64093 }, { "epoch": 2.13, "grad_norm": 0.6517623662948608, "learning_rate": 0.00011552896685261511, "loss": 1.6252, "step": 64094 }, { "epoch": 2.13, "grad_norm": 0.6397890448570251, "learning_rate": 0.00011552072440036468, "loss": 1.6561, "step": 64095 }, { "epoch": 2.13, "grad_norm": 0.609718918800354, "learning_rate": 0.00011551248217204479, "loss": 1.7819, "step": 64096 }, { "epoch": 2.13, "grad_norm": 0.6691725254058838, "learning_rate": 0.0001155042401676656, "loss": 1.6937, "step": 64097 }, { "epoch": 2.13, "grad_norm": 0.6728287935256958, "learning_rate": 0.00011549599838723728, "loss": 1.6869, "step": 64098 }, { "epoch": 2.13, "grad_norm": 0.6508171558380127, "learning_rate": 0.0001154877568307694, "loss": 1.6663, "step": 64099 }, { "epoch": 2.13, "grad_norm": 0.6374871730804443, "learning_rate": 0.00011547951549827223, "loss": 1.7237, "step": 64100 }, { "epoch": 2.13, "grad_norm": 0.6517813205718994, "learning_rate": 0.00011547127438975582, "loss": 1.6588, "step": 64101 }, { "epoch": 2.13, "grad_norm": 0.645763099193573, "learning_rate": 0.0001154630335052301, "loss": 1.6845, "step": 64102 }, { "epoch": 2.13, "grad_norm": 0.632756769657135, "learning_rate": 0.00011545479284470495, "loss": 1.6182, "step": 64103 }, { "epoch": 2.13, "grad_norm": 0.6219310164451599, "learning_rate": 0.0001154465524081905, "loss": 1.6784, "step": 64104 }, { "epoch": 2.13, "grad_norm": 0.6381940841674805, "learning_rate": 0.00011543831219569695, "loss": 1.7038, "step": 64105 }, { "epoch": 2.13, "grad_norm": 0.6613773107528687, "learning_rate": 0.0001154300722072339, "loss": 1.6776, "step": 64106 }, { "epoch": 2.13, "grad_norm": 0.655116081237793, "learning_rate": 0.00011542183244281152, "loss": 1.7197, "step": 64107 }, { "epoch": 2.13, "grad_norm": 0.6223410367965698, "learning_rate": 0.00011541359290243997, "loss": 1.6348, "step": 64108 }, { "epoch": 2.13, "grad_norm": 0.7881081104278564, "learning_rate": 0.00011540535358612911, "loss": 1.7076, "step": 64109 }, { "epoch": 2.13, "grad_norm": 0.621985912322998, "learning_rate": 0.00011539711449388885, "loss": 1.6698, "step": 64110 }, { "epoch": 2.13, "grad_norm": 0.66783207654953, "learning_rate": 0.00011538887562572931, "loss": 1.7531, "step": 64111 }, { "epoch": 2.13, "grad_norm": 0.6764554977416992, "learning_rate": 0.00011538063698166065, "loss": 1.7583, "step": 64112 }, { "epoch": 2.13, "grad_norm": 0.6565667390823364, "learning_rate": 0.0001153723985616925, "loss": 1.6719, "step": 64113 }, { "epoch": 2.13, "grad_norm": 0.6374695897102356, "learning_rate": 0.00011536416036583507, "loss": 1.6604, "step": 64114 }, { "epoch": 2.13, "grad_norm": 0.6603772044181824, "learning_rate": 0.00011535592239409845, "loss": 1.6877, "step": 64115 }, { "epoch": 2.13, "grad_norm": 0.6291075348854065, "learning_rate": 0.00011534768464649249, "loss": 1.6579, "step": 64116 }, { "epoch": 2.13, "grad_norm": 0.6468717455863953, "learning_rate": 0.00011533944712302718, "loss": 1.6419, "step": 64117 }, { "epoch": 2.13, "grad_norm": 0.6687736511230469, "learning_rate": 0.00011533120982371263, "loss": 1.6477, "step": 64118 }, { "epoch": 2.13, "grad_norm": 0.6238109469413757, "learning_rate": 0.00011532297274855883, "loss": 1.6646, "step": 64119 }, { "epoch": 2.13, "grad_norm": 0.6314833760261536, "learning_rate": 0.00011531473589757558, "loss": 1.724, "step": 64120 }, { "epoch": 2.13, "grad_norm": 0.6439820528030396, "learning_rate": 0.00011530649927077305, "loss": 1.7022, "step": 64121 }, { "epoch": 2.13, "grad_norm": 0.6262089014053345, "learning_rate": 0.00011529826286816134, "loss": 1.7081, "step": 64122 }, { "epoch": 2.13, "grad_norm": 0.6395482420921326, "learning_rate": 0.00011529002668975029, "loss": 1.692, "step": 64123 }, { "epoch": 2.13, "grad_norm": 0.6655665040016174, "learning_rate": 0.00011528179073554983, "loss": 1.7173, "step": 64124 }, { "epoch": 2.13, "grad_norm": 0.6525301337242126, "learning_rate": 0.00011527355500557021, "loss": 1.7017, "step": 64125 }, { "epoch": 2.13, "grad_norm": 0.6912898421287537, "learning_rate": 0.00011526531949982112, "loss": 1.78, "step": 64126 }, { "epoch": 2.13, "grad_norm": 0.6346611976623535, "learning_rate": 0.00011525708421831282, "loss": 1.737, "step": 64127 }, { "epoch": 2.13, "grad_norm": 0.6365346312522888, "learning_rate": 0.00011524884916105525, "loss": 1.7763, "step": 64128 }, { "epoch": 2.13, "grad_norm": 0.6564444899559021, "learning_rate": 0.00011524061432805818, "loss": 1.7365, "step": 64129 }, { "epoch": 2.13, "grad_norm": 0.6380736231803894, "learning_rate": 0.00011523237971933194, "loss": 1.6821, "step": 64130 }, { "epoch": 2.13, "grad_norm": 0.6697516441345215, "learning_rate": 0.00011522414533488624, "loss": 1.6759, "step": 64131 }, { "epoch": 2.13, "grad_norm": 0.6462276577949524, "learning_rate": 0.00011521591117473133, "loss": 1.6595, "step": 64132 }, { "epoch": 2.13, "grad_norm": 0.6199610233306885, "learning_rate": 0.00011520767723887691, "loss": 1.7251, "step": 64133 }, { "epoch": 2.13, "grad_norm": 0.6310585737228394, "learning_rate": 0.00011519944352733332, "loss": 1.7139, "step": 64134 }, { "epoch": 2.13, "grad_norm": 0.638505220413208, "learning_rate": 0.00011519121004011033, "loss": 1.7019, "step": 64135 }, { "epoch": 2.13, "grad_norm": 0.658174455165863, "learning_rate": 0.00011518297677721789, "loss": 1.6668, "step": 64136 }, { "epoch": 2.13, "grad_norm": 0.6481996178627014, "learning_rate": 0.00011517474373866621, "loss": 1.6979, "step": 64137 }, { "epoch": 2.13, "grad_norm": 0.6185612678527832, "learning_rate": 0.00011516651092446515, "loss": 1.6231, "step": 64138 }, { "epoch": 2.13, "grad_norm": 0.6307878494262695, "learning_rate": 0.00011515827833462457, "loss": 1.6937, "step": 64139 }, { "epoch": 2.13, "grad_norm": 0.6226508021354675, "learning_rate": 0.00011515004596915465, "loss": 1.7142, "step": 64140 }, { "epoch": 2.13, "grad_norm": 0.6392741799354553, "learning_rate": 0.0001151418138280654, "loss": 1.6744, "step": 64141 }, { "epoch": 2.13, "grad_norm": 0.6226838827133179, "learning_rate": 0.00011513358191136677, "loss": 1.7111, "step": 64142 }, { "epoch": 2.13, "grad_norm": 0.6355123519897461, "learning_rate": 0.00011512535021906865, "loss": 1.6827, "step": 64143 }, { "epoch": 2.13, "grad_norm": 0.6540481448173523, "learning_rate": 0.0001151171187511812, "loss": 1.6728, "step": 64144 }, { "epoch": 2.13, "grad_norm": 0.6146589517593384, "learning_rate": 0.00011510888750771434, "loss": 1.7639, "step": 64145 }, { "epoch": 2.13, "grad_norm": 0.6217461228370667, "learning_rate": 0.00011510065648867792, "loss": 1.7004, "step": 64146 }, { "epoch": 2.13, "grad_norm": 0.6296672821044922, "learning_rate": 0.00011509242569408206, "loss": 1.6615, "step": 64147 }, { "epoch": 2.13, "grad_norm": 0.6231836676597595, "learning_rate": 0.00011508419512393697, "loss": 1.7392, "step": 64148 }, { "epoch": 2.13, "grad_norm": 0.6629961729049683, "learning_rate": 0.00011507596477825223, "loss": 1.7296, "step": 64149 }, { "epoch": 2.13, "grad_norm": 0.6242939829826355, "learning_rate": 0.00011506773465703799, "loss": 1.6994, "step": 64150 }, { "epoch": 2.13, "grad_norm": 0.6280528903007507, "learning_rate": 0.00011505950476030439, "loss": 1.6917, "step": 64151 }, { "epoch": 2.13, "grad_norm": 0.6215589046478271, "learning_rate": 0.00011505127508806132, "loss": 1.7291, "step": 64152 }, { "epoch": 2.13, "grad_norm": 0.658839225769043, "learning_rate": 0.00011504304564031862, "loss": 1.7297, "step": 64153 }, { "epoch": 2.13, "grad_norm": 0.661098301410675, "learning_rate": 0.00011503481641708641, "loss": 1.6934, "step": 64154 }, { "epoch": 2.13, "grad_norm": 0.6775213479995728, "learning_rate": 0.0001150265874183749, "loss": 1.7621, "step": 64155 }, { "epoch": 2.13, "grad_norm": 0.6240637898445129, "learning_rate": 0.00011501835864419364, "loss": 1.6185, "step": 64156 }, { "epoch": 2.13, "grad_norm": 0.6550336480140686, "learning_rate": 0.00011501013009455285, "loss": 1.7613, "step": 64157 }, { "epoch": 2.13, "grad_norm": 0.6232014894485474, "learning_rate": 0.00011500190176946263, "loss": 1.688, "step": 64158 }, { "epoch": 2.13, "grad_norm": 0.6239668726921082, "learning_rate": 0.0001149936736689328, "loss": 1.7378, "step": 64159 }, { "epoch": 2.13, "grad_norm": 0.6323040723800659, "learning_rate": 0.00011498544579297332, "loss": 1.7162, "step": 64160 }, { "epoch": 2.13, "grad_norm": 0.6379273533821106, "learning_rate": 0.00011497721814159422, "loss": 1.651, "step": 64161 }, { "epoch": 2.13, "grad_norm": 0.6239676475524902, "learning_rate": 0.00011496899071480573, "loss": 1.742, "step": 64162 }, { "epoch": 2.13, "grad_norm": 0.6359822154045105, "learning_rate": 0.00011496076351261741, "loss": 1.6689, "step": 64163 }, { "epoch": 2.13, "grad_norm": 0.6694557666778564, "learning_rate": 0.00011495253653503947, "loss": 1.6618, "step": 64164 }, { "epoch": 2.13, "grad_norm": 0.623224139213562, "learning_rate": 0.000114944309782082, "loss": 1.7095, "step": 64165 }, { "epoch": 2.13, "grad_norm": 0.6246106028556824, "learning_rate": 0.00011493608325375485, "loss": 1.7153, "step": 64166 }, { "epoch": 2.13, "grad_norm": 0.6147491335868835, "learning_rate": 0.00011492785695006795, "loss": 1.6721, "step": 64167 }, { "epoch": 2.13, "grad_norm": 0.6490445137023926, "learning_rate": 0.00011491963087103143, "loss": 1.7034, "step": 64168 }, { "epoch": 2.13, "grad_norm": 0.6458133459091187, "learning_rate": 0.00011491140501665521, "loss": 1.7085, "step": 64169 }, { "epoch": 2.13, "grad_norm": 0.6364882588386536, "learning_rate": 0.0001149031793869492, "loss": 1.749, "step": 64170 }, { "epoch": 2.13, "grad_norm": 0.6263108253479004, "learning_rate": 0.00011489495398192345, "loss": 1.7015, "step": 64171 }, { "epoch": 2.14, "grad_norm": 0.6427097916603088, "learning_rate": 0.00011488672880158805, "loss": 1.6405, "step": 64172 }, { "epoch": 2.14, "grad_norm": 0.6292046308517456, "learning_rate": 0.00011487850384595287, "loss": 1.7077, "step": 64173 }, { "epoch": 2.14, "grad_norm": 0.6287356019020081, "learning_rate": 0.00011487027911502782, "loss": 1.6641, "step": 64174 }, { "epoch": 2.14, "grad_norm": 0.6394410729408264, "learning_rate": 0.00011486205460882308, "loss": 1.7084, "step": 64175 }, { "epoch": 2.14, "grad_norm": 0.6123428344726562, "learning_rate": 0.00011485383032734852, "loss": 1.7046, "step": 64176 }, { "epoch": 2.14, "grad_norm": 0.6349093914031982, "learning_rate": 0.00011484560627061404, "loss": 1.7242, "step": 64177 }, { "epoch": 2.14, "grad_norm": 0.6615522503852844, "learning_rate": 0.00011483738243862981, "loss": 1.7993, "step": 64178 }, { "epoch": 2.14, "grad_norm": 0.6417121291160583, "learning_rate": 0.00011482915883140558, "loss": 1.6741, "step": 64179 }, { "epoch": 2.14, "grad_norm": 0.6361244320869446, "learning_rate": 0.00011482093544895163, "loss": 1.6956, "step": 64180 }, { "epoch": 2.14, "grad_norm": 0.6267308592796326, "learning_rate": 0.00011481271229127765, "loss": 1.7403, "step": 64181 }, { "epoch": 2.14, "grad_norm": 0.6163235306739807, "learning_rate": 0.00011480448935839383, "loss": 1.756, "step": 64182 }, { "epoch": 2.14, "grad_norm": 0.6145355105400085, "learning_rate": 0.00011479626665031011, "loss": 1.7204, "step": 64183 }, { "epoch": 2.14, "grad_norm": 0.6326925754547119, "learning_rate": 0.00011478804416703628, "loss": 1.7158, "step": 64184 }, { "epoch": 2.14, "grad_norm": 0.6362618803977966, "learning_rate": 0.00011477982190858262, "loss": 1.7424, "step": 64185 }, { "epoch": 2.14, "grad_norm": 0.6495897769927979, "learning_rate": 0.00011477159987495886, "loss": 1.7055, "step": 64186 }, { "epoch": 2.14, "grad_norm": 0.6581894755363464, "learning_rate": 0.0001147633780661752, "loss": 1.7413, "step": 64187 }, { "epoch": 2.14, "grad_norm": 0.6513819694519043, "learning_rate": 0.00011475515648224147, "loss": 1.6825, "step": 64188 }, { "epoch": 2.14, "grad_norm": 0.6209508180618286, "learning_rate": 0.00011474693512316761, "loss": 1.604, "step": 64189 }, { "epoch": 2.14, "grad_norm": 0.6371029019355774, "learning_rate": 0.00011473871398896378, "loss": 1.7674, "step": 64190 }, { "epoch": 2.14, "grad_norm": 0.6435036659240723, "learning_rate": 0.00011473049307963973, "loss": 1.747, "step": 64191 }, { "epoch": 2.14, "grad_norm": 0.6235638856887817, "learning_rate": 0.00011472227239520568, "loss": 1.6436, "step": 64192 }, { "epoch": 2.14, "grad_norm": 0.624728262424469, "learning_rate": 0.00011471405193567137, "loss": 1.7713, "step": 64193 }, { "epoch": 2.14, "grad_norm": 0.632256031036377, "learning_rate": 0.00011470583170104702, "loss": 1.6386, "step": 64194 }, { "epoch": 2.14, "grad_norm": 0.6483126878738403, "learning_rate": 0.0001146976116913425, "loss": 1.6909, "step": 64195 }, { "epoch": 2.14, "grad_norm": 0.6198863387107849, "learning_rate": 0.00011468939190656764, "loss": 1.7152, "step": 64196 }, { "epoch": 2.14, "grad_norm": 0.6456180214881897, "learning_rate": 0.00011468117234673268, "loss": 1.7345, "step": 64197 }, { "epoch": 2.14, "grad_norm": 0.6448021531105042, "learning_rate": 0.00011467295301184747, "loss": 1.749, "step": 64198 }, { "epoch": 2.14, "grad_norm": 0.6405766606330872, "learning_rate": 0.00011466473390192189, "loss": 1.7068, "step": 64199 }, { "epoch": 2.14, "grad_norm": 0.6205247640609741, "learning_rate": 0.000114656515016966, "loss": 1.769, "step": 64200 }, { "epoch": 2.14, "grad_norm": 0.6430550813674927, "learning_rate": 0.00011464829635698992, "loss": 1.6428, "step": 64201 }, { "epoch": 2.14, "grad_norm": 0.654059886932373, "learning_rate": 0.00011464007792200347, "loss": 1.7046, "step": 64202 }, { "epoch": 2.14, "grad_norm": 0.6559520363807678, "learning_rate": 0.00011463185971201658, "loss": 1.8476, "step": 64203 }, { "epoch": 2.14, "grad_norm": 0.6335240602493286, "learning_rate": 0.00011462364172703938, "loss": 1.6959, "step": 64204 }, { "epoch": 2.14, "grad_norm": 0.6484379768371582, "learning_rate": 0.00011461542396708176, "loss": 1.7246, "step": 64205 }, { "epoch": 2.14, "grad_norm": 0.6498313546180725, "learning_rate": 0.0001146072064321536, "loss": 1.646, "step": 64206 }, { "epoch": 2.14, "grad_norm": 0.6322322487831116, "learning_rate": 0.000114598989122265, "loss": 1.6848, "step": 64207 }, { "epoch": 2.14, "grad_norm": 0.6461678147315979, "learning_rate": 0.00011459077203742599, "loss": 1.6541, "step": 64208 }, { "epoch": 2.14, "grad_norm": 0.6257516145706177, "learning_rate": 0.00011458255517764646, "loss": 1.7106, "step": 64209 }, { "epoch": 2.14, "grad_norm": 0.6352956295013428, "learning_rate": 0.00011457433854293631, "loss": 1.7542, "step": 64210 }, { "epoch": 2.14, "grad_norm": 0.6486543416976929, "learning_rate": 0.00011456612213330567, "loss": 1.7432, "step": 64211 }, { "epoch": 2.14, "grad_norm": 0.6348186731338501, "learning_rate": 0.00011455790594876445, "loss": 1.741, "step": 64212 }, { "epoch": 2.14, "grad_norm": 0.6299421191215515, "learning_rate": 0.0001145496899893225, "loss": 1.6622, "step": 64213 }, { "epoch": 2.14, "grad_norm": 0.6381835341453552, "learning_rate": 0.00011454147425498988, "loss": 1.7051, "step": 64214 }, { "epoch": 2.14, "grad_norm": 0.6399009823799133, "learning_rate": 0.00011453325874577682, "loss": 1.6726, "step": 64215 }, { "epoch": 2.14, "grad_norm": 0.6484602093696594, "learning_rate": 0.00011452504346169284, "loss": 1.6541, "step": 64216 }, { "epoch": 2.14, "grad_norm": 0.606935441493988, "learning_rate": 0.00011451682840274814, "loss": 1.6562, "step": 64217 }, { "epoch": 2.14, "grad_norm": 0.6244409680366516, "learning_rate": 0.00011450861356895276, "loss": 1.654, "step": 64218 }, { "epoch": 2.14, "grad_norm": 0.6505177021026611, "learning_rate": 0.00011450039896031662, "loss": 1.6252, "step": 64219 }, { "epoch": 2.14, "grad_norm": 0.631350040435791, "learning_rate": 0.00011449218457684956, "loss": 1.7164, "step": 64220 }, { "epoch": 2.14, "grad_norm": 0.6358551383018494, "learning_rate": 0.00011448397041856164, "loss": 1.6652, "step": 64221 }, { "epoch": 2.14, "grad_norm": 0.6496862769126892, "learning_rate": 0.00011447575648546305, "loss": 1.7796, "step": 64222 }, { "epoch": 2.14, "grad_norm": 0.6452537178993225, "learning_rate": 0.00011446754277756335, "loss": 1.7235, "step": 64223 }, { "epoch": 2.14, "grad_norm": 0.6236796975135803, "learning_rate": 0.0001144593292948727, "loss": 1.7119, "step": 64224 }, { "epoch": 2.14, "grad_norm": 0.6429697275161743, "learning_rate": 0.00011445111603740124, "loss": 1.7006, "step": 64225 }, { "epoch": 2.14, "grad_norm": 0.6523550748825073, "learning_rate": 0.00011444290300515874, "loss": 1.6553, "step": 64226 }, { "epoch": 2.14, "grad_norm": 0.6677262783050537, "learning_rate": 0.00011443469019815513, "loss": 1.7051, "step": 64227 }, { "epoch": 2.14, "grad_norm": 0.6278210878372192, "learning_rate": 0.00011442647761640044, "loss": 1.7063, "step": 64228 }, { "epoch": 2.14, "grad_norm": 0.6471679210662842, "learning_rate": 0.0001144182652599049, "loss": 1.7843, "step": 64229 }, { "epoch": 2.14, "grad_norm": 0.6231229305267334, "learning_rate": 0.00011441005312867804, "loss": 1.6654, "step": 64230 }, { "epoch": 2.14, "grad_norm": 0.6515024900436401, "learning_rate": 0.00011440184122272999, "loss": 1.7013, "step": 64231 }, { "epoch": 2.14, "grad_norm": 0.6425850987434387, "learning_rate": 0.00011439362954207091, "loss": 1.6848, "step": 64232 }, { "epoch": 2.14, "grad_norm": 0.6284764409065247, "learning_rate": 0.00011438541808671061, "loss": 1.7795, "step": 64233 }, { "epoch": 2.14, "grad_norm": 0.6272414922714233, "learning_rate": 0.00011437720685665894, "loss": 1.7345, "step": 64234 }, { "epoch": 2.14, "grad_norm": 0.6471743583679199, "learning_rate": 0.0001143689958519261, "loss": 1.629, "step": 64235 }, { "epoch": 2.14, "grad_norm": 0.6501975059509277, "learning_rate": 0.00011436078507252191, "loss": 1.7735, "step": 64236 }, { "epoch": 2.14, "grad_norm": 0.6389163732528687, "learning_rate": 0.0001143525745184563, "loss": 1.6756, "step": 64237 }, { "epoch": 2.14, "grad_norm": 0.6572193503379822, "learning_rate": 0.00011434436418973935, "loss": 1.7028, "step": 64238 }, { "epoch": 2.14, "grad_norm": 0.6487857103347778, "learning_rate": 0.00011433615408638105, "loss": 1.7719, "step": 64239 }, { "epoch": 2.14, "grad_norm": 0.6230828762054443, "learning_rate": 0.00011432794420839132, "loss": 1.7276, "step": 64240 }, { "epoch": 2.14, "grad_norm": 0.7005200982093811, "learning_rate": 0.00011431973455577999, "loss": 1.731, "step": 64241 }, { "epoch": 2.14, "grad_norm": 0.617792546749115, "learning_rate": 0.00011431152512855724, "loss": 1.6479, "step": 64242 }, { "epoch": 2.14, "grad_norm": 0.6217479705810547, "learning_rate": 0.00011430331592673285, "loss": 1.722, "step": 64243 }, { "epoch": 2.14, "grad_norm": 0.6633934378623962, "learning_rate": 0.00011429510695031696, "loss": 1.7345, "step": 64244 }, { "epoch": 2.14, "grad_norm": 0.6649240255355835, "learning_rate": 0.00011428689819931946, "loss": 1.7404, "step": 64245 }, { "epoch": 2.14, "grad_norm": 0.6315767168998718, "learning_rate": 0.00011427868967375019, "loss": 1.6758, "step": 64246 }, { "epoch": 2.14, "grad_norm": 0.6449462175369263, "learning_rate": 0.00011427048137361937, "loss": 1.679, "step": 64247 }, { "epoch": 2.14, "grad_norm": 0.6325246095657349, "learning_rate": 0.00011426227329893666, "loss": 1.7361, "step": 64248 }, { "epoch": 2.14, "grad_norm": 0.6478357911109924, "learning_rate": 0.00011425406544971231, "loss": 1.6628, "step": 64249 }, { "epoch": 2.14, "grad_norm": 0.6563084721565247, "learning_rate": 0.00011424585782595606, "loss": 1.6423, "step": 64250 }, { "epoch": 2.14, "grad_norm": 0.6286344528198242, "learning_rate": 0.00011423765042767806, "loss": 1.6842, "step": 64251 }, { "epoch": 2.14, "grad_norm": 0.6375771164894104, "learning_rate": 0.00011422944325488819, "loss": 1.7389, "step": 64252 }, { "epoch": 2.14, "grad_norm": 0.6379299163818359, "learning_rate": 0.0001142212363075963, "loss": 1.6935, "step": 64253 }, { "epoch": 2.14, "grad_norm": 0.6508601307868958, "learning_rate": 0.00011421302958581256, "loss": 1.6866, "step": 64254 }, { "epoch": 2.14, "grad_norm": 0.6401705741882324, "learning_rate": 0.00011420482308954682, "loss": 1.6996, "step": 64255 }, { "epoch": 2.14, "grad_norm": 0.6411434412002563, "learning_rate": 0.00011419661681880898, "loss": 1.6919, "step": 64256 }, { "epoch": 2.14, "grad_norm": 0.640722393989563, "learning_rate": 0.00011418841077360901, "loss": 1.7368, "step": 64257 }, { "epoch": 2.14, "grad_norm": 0.6456212997436523, "learning_rate": 0.00011418020495395708, "loss": 1.7709, "step": 64258 }, { "epoch": 2.14, "grad_norm": 0.6494461894035339, "learning_rate": 0.00011417199935986297, "loss": 1.7155, "step": 64259 }, { "epoch": 2.14, "grad_norm": 0.6273016333580017, "learning_rate": 0.00011416379399133661, "loss": 1.6391, "step": 64260 }, { "epoch": 2.14, "grad_norm": 0.6589719653129578, "learning_rate": 0.0001141555888483881, "loss": 1.7821, "step": 64261 }, { "epoch": 2.14, "grad_norm": 0.6391521692276001, "learning_rate": 0.00011414738393102733, "loss": 1.6664, "step": 64262 }, { "epoch": 2.14, "grad_norm": 0.644298791885376, "learning_rate": 0.00011413917923926414, "loss": 1.7385, "step": 64263 }, { "epoch": 2.14, "grad_norm": 0.6509617567062378, "learning_rate": 0.0001141309747731086, "loss": 1.6956, "step": 64264 }, { "epoch": 2.14, "grad_norm": 0.6399683952331543, "learning_rate": 0.00011412277053257087, "loss": 1.682, "step": 64265 }, { "epoch": 2.14, "grad_norm": 0.6496461033821106, "learning_rate": 0.00011411456651766053, "loss": 1.7081, "step": 64266 }, { "epoch": 2.14, "grad_norm": 0.6438987851142883, "learning_rate": 0.00011410636272838771, "loss": 1.7093, "step": 64267 }, { "epoch": 2.14, "grad_norm": 0.6872451305389404, "learning_rate": 0.00011409815916476247, "loss": 1.7825, "step": 64268 }, { "epoch": 2.14, "grad_norm": 0.6089912056922913, "learning_rate": 0.00011408995582679465, "loss": 1.7654, "step": 64269 }, { "epoch": 2.14, "grad_norm": 0.6354473233222961, "learning_rate": 0.00011408175271449416, "loss": 1.7228, "step": 64270 }, { "epoch": 2.14, "grad_norm": 0.6457308530807495, "learning_rate": 0.00011407354982787103, "loss": 1.7051, "step": 64271 }, { "epoch": 2.14, "grad_norm": 0.6707041263580322, "learning_rate": 0.00011406534716693542, "loss": 1.6695, "step": 64272 }, { "epoch": 2.14, "grad_norm": 0.6525685787200928, "learning_rate": 0.00011405714473169686, "loss": 1.722, "step": 64273 }, { "epoch": 2.14, "grad_norm": 0.644878089427948, "learning_rate": 0.00011404894252216552, "loss": 1.6842, "step": 64274 }, { "epoch": 2.14, "grad_norm": 0.64909428358078, "learning_rate": 0.00011404074053835149, "loss": 1.7285, "step": 64275 }, { "epoch": 2.14, "grad_norm": 0.6520754098892212, "learning_rate": 0.0001140325387802646, "loss": 1.7341, "step": 64276 }, { "epoch": 2.14, "grad_norm": 0.662151575088501, "learning_rate": 0.00011402433724791471, "loss": 1.7322, "step": 64277 }, { "epoch": 2.14, "grad_norm": 0.6431730389595032, "learning_rate": 0.00011401613594131184, "loss": 1.7107, "step": 64278 }, { "epoch": 2.14, "grad_norm": 0.6282607316970825, "learning_rate": 0.00011400793486046622, "loss": 1.7137, "step": 64279 }, { "epoch": 2.14, "grad_norm": 0.6391915678977966, "learning_rate": 0.00011399973400538736, "loss": 1.7043, "step": 64280 }, { "epoch": 2.14, "grad_norm": 0.6320086717605591, "learning_rate": 0.00011399153337608536, "loss": 1.6973, "step": 64281 }, { "epoch": 2.14, "grad_norm": 0.6710820198059082, "learning_rate": 0.0001139833329725704, "loss": 1.7378, "step": 64282 }, { "epoch": 2.14, "grad_norm": 0.6234803795814514, "learning_rate": 0.00011397513279485224, "loss": 1.6751, "step": 64283 }, { "epoch": 2.14, "grad_norm": 0.6240094304084778, "learning_rate": 0.00011396693284294076, "loss": 1.62, "step": 64284 }, { "epoch": 2.14, "grad_norm": 0.6362857818603516, "learning_rate": 0.00011395873311684612, "loss": 1.6951, "step": 64285 }, { "epoch": 2.14, "grad_norm": 0.6723791360855103, "learning_rate": 0.00011395053361657815, "loss": 1.7337, "step": 64286 }, { "epoch": 2.14, "grad_norm": 0.6506255269050598, "learning_rate": 0.00011394233434214672, "loss": 1.7575, "step": 64287 }, { "epoch": 2.14, "grad_norm": 0.6679556965827942, "learning_rate": 0.00011393413529356187, "loss": 1.7625, "step": 64288 }, { "epoch": 2.14, "grad_norm": 0.6327464580535889, "learning_rate": 0.00011392593647083368, "loss": 1.7448, "step": 64289 }, { "epoch": 2.14, "grad_norm": 0.6369790434837341, "learning_rate": 0.000113917737873972, "loss": 1.6667, "step": 64290 }, { "epoch": 2.14, "grad_norm": 0.6322360038757324, "learning_rate": 0.00011390953950298665, "loss": 1.7069, "step": 64291 }, { "epoch": 2.14, "grad_norm": 0.633751392364502, "learning_rate": 0.0001139013413578878, "loss": 1.6792, "step": 64292 }, { "epoch": 2.14, "grad_norm": 2.3789987564086914, "learning_rate": 0.00011389314343868532, "loss": 1.7795, "step": 64293 }, { "epoch": 2.14, "grad_norm": 0.6596028208732605, "learning_rate": 0.00011388494574538904, "loss": 1.6821, "step": 64294 }, { "epoch": 2.14, "grad_norm": 0.6296790242195129, "learning_rate": 0.0001138767482780091, "loss": 1.6447, "step": 64295 }, { "epoch": 2.14, "grad_norm": 0.6460643410682678, "learning_rate": 0.00011386855103655524, "loss": 1.6833, "step": 64296 }, { "epoch": 2.14, "grad_norm": 0.6320388913154602, "learning_rate": 0.00011386035402103766, "loss": 1.7216, "step": 64297 }, { "epoch": 2.14, "grad_norm": 0.6369601488113403, "learning_rate": 0.0001138521572314661, "loss": 1.781, "step": 64298 }, { "epoch": 2.14, "grad_norm": 0.6223374605178833, "learning_rate": 0.00011384396066785066, "loss": 1.6207, "step": 64299 }, { "epoch": 2.14, "grad_norm": 0.6197826266288757, "learning_rate": 0.00011383576433020127, "loss": 1.6515, "step": 64300 }, { "epoch": 2.14, "grad_norm": 0.6169255375862122, "learning_rate": 0.00011382756821852768, "loss": 1.6624, "step": 64301 }, { "epoch": 2.14, "grad_norm": 0.6624554395675659, "learning_rate": 0.00011381937233284013, "loss": 1.7219, "step": 64302 }, { "epoch": 2.14, "grad_norm": 0.6391879916191101, "learning_rate": 0.00011381117667314829, "loss": 1.7098, "step": 64303 }, { "epoch": 2.14, "grad_norm": 0.644143283367157, "learning_rate": 0.00011380298123946237, "loss": 1.7247, "step": 64304 }, { "epoch": 2.14, "grad_norm": 0.6493722200393677, "learning_rate": 0.00011379478603179223, "loss": 1.6952, "step": 64305 }, { "epoch": 2.14, "grad_norm": 0.6337885856628418, "learning_rate": 0.00011378659105014765, "loss": 1.6517, "step": 64306 }, { "epoch": 2.14, "grad_norm": 0.6185256838798523, "learning_rate": 0.00011377839629453879, "loss": 1.6585, "step": 64307 }, { "epoch": 2.14, "grad_norm": 0.6549766659736633, "learning_rate": 0.00011377020176497544, "loss": 1.7293, "step": 64308 }, { "epoch": 2.14, "grad_norm": 0.6536165475845337, "learning_rate": 0.00011376200746146774, "loss": 1.6976, "step": 64309 }, { "epoch": 2.14, "grad_norm": 0.6378613710403442, "learning_rate": 0.00011375381338402541, "loss": 1.7149, "step": 64310 }, { "epoch": 2.14, "grad_norm": 0.6474434733390808, "learning_rate": 0.00011374561953265859, "loss": 1.7557, "step": 64311 }, { "epoch": 2.14, "grad_norm": 0.6314258575439453, "learning_rate": 0.00011373742590737717, "loss": 1.706, "step": 64312 }, { "epoch": 2.14, "grad_norm": 0.6356308460235596, "learning_rate": 0.00011372923250819095, "loss": 1.7011, "step": 64313 }, { "epoch": 2.14, "grad_norm": 0.6568168997764587, "learning_rate": 0.0001137210393351101, "loss": 1.7391, "step": 64314 }, { "epoch": 2.14, "grad_norm": 0.6354544162750244, "learning_rate": 0.00011371284638814448, "loss": 1.6406, "step": 64315 }, { "epoch": 2.14, "grad_norm": 0.640393078327179, "learning_rate": 0.0001137046536673039, "loss": 1.7184, "step": 64316 }, { "epoch": 2.14, "grad_norm": 0.6641966104507446, "learning_rate": 0.00011369646117259844, "loss": 1.7002, "step": 64317 }, { "epoch": 2.14, "grad_norm": 0.6302587985992432, "learning_rate": 0.00011368826890403812, "loss": 1.7268, "step": 64318 }, { "epoch": 2.14, "grad_norm": 0.6278825998306274, "learning_rate": 0.00011368007686163275, "loss": 1.6258, "step": 64319 }, { "epoch": 2.14, "grad_norm": 0.6375747919082642, "learning_rate": 0.00011367188504539224, "loss": 1.6446, "step": 64320 }, { "epoch": 2.14, "grad_norm": 0.6581600904464722, "learning_rate": 0.00011366369345532674, "loss": 1.7032, "step": 64321 }, { "epoch": 2.14, "grad_norm": 0.6372297406196594, "learning_rate": 0.00011365550209144601, "loss": 1.7206, "step": 64322 }, { "epoch": 2.14, "grad_norm": 0.6484408974647522, "learning_rate": 0.00011364731095375998, "loss": 1.6796, "step": 64323 }, { "epoch": 2.14, "grad_norm": 0.6346555352210999, "learning_rate": 0.00011363912004227864, "loss": 1.7102, "step": 64324 }, { "epoch": 2.14, "grad_norm": 0.6644982099533081, "learning_rate": 0.00011363092935701205, "loss": 1.666, "step": 64325 }, { "epoch": 2.14, "grad_norm": 0.6368158459663391, "learning_rate": 0.00011362273889797006, "loss": 1.7881, "step": 64326 }, { "epoch": 2.14, "grad_norm": 0.6597943305969238, "learning_rate": 0.0001136145486651625, "loss": 1.6647, "step": 64327 }, { "epoch": 2.14, "grad_norm": 0.6334744095802307, "learning_rate": 0.00011360635865859953, "loss": 1.6578, "step": 64328 }, { "epoch": 2.14, "grad_norm": 0.6276025176048279, "learning_rate": 0.00011359816887829094, "loss": 1.682, "step": 64329 }, { "epoch": 2.14, "grad_norm": 0.6347182989120483, "learning_rate": 0.00011358997932424663, "loss": 1.6782, "step": 64330 }, { "epoch": 2.14, "grad_norm": 0.6950379610061646, "learning_rate": 0.00011358178999647661, "loss": 1.6939, "step": 64331 }, { "epoch": 2.14, "grad_norm": 0.6423162817955017, "learning_rate": 0.00011357360089499105, "loss": 1.7259, "step": 64332 }, { "epoch": 2.14, "grad_norm": 0.6424093842506409, "learning_rate": 0.00011356541201979945, "loss": 1.7247, "step": 64333 }, { "epoch": 2.14, "grad_norm": 0.6395741701126099, "learning_rate": 0.00011355722337091199, "loss": 1.679, "step": 64334 }, { "epoch": 2.14, "grad_norm": 0.6373746991157532, "learning_rate": 0.00011354903494833866, "loss": 1.6376, "step": 64335 }, { "epoch": 2.14, "grad_norm": 0.635597288608551, "learning_rate": 0.00011354084675208936, "loss": 1.7358, "step": 64336 }, { "epoch": 2.14, "grad_norm": 0.6301660537719727, "learning_rate": 0.00011353265878217387, "loss": 1.6316, "step": 64337 }, { "epoch": 2.14, "grad_norm": 0.6428801417350769, "learning_rate": 0.00011352447103860225, "loss": 1.5702, "step": 64338 }, { "epoch": 2.14, "grad_norm": 0.6095076203346252, "learning_rate": 0.00011351628352138469, "loss": 1.7016, "step": 64339 }, { "epoch": 2.14, "grad_norm": 0.6427554488182068, "learning_rate": 0.00011350809623053068, "loss": 1.7104, "step": 64340 }, { "epoch": 2.14, "grad_norm": 0.6470354795455933, "learning_rate": 0.00011349990916605031, "loss": 1.7082, "step": 64341 }, { "epoch": 2.14, "grad_norm": 0.6330886483192444, "learning_rate": 0.00011349172232795373, "loss": 1.7411, "step": 64342 }, { "epoch": 2.14, "grad_norm": 0.6182287931442261, "learning_rate": 0.0001134835357162507, "loss": 1.6693, "step": 64343 }, { "epoch": 2.14, "grad_norm": 0.6466121673583984, "learning_rate": 0.00011347534933095105, "loss": 1.6583, "step": 64344 }, { "epoch": 2.14, "grad_norm": 0.6564297080039978, "learning_rate": 0.00011346716317206489, "loss": 1.6649, "step": 64345 }, { "epoch": 2.14, "grad_norm": 0.6446847915649414, "learning_rate": 0.00011345897723960228, "loss": 1.7099, "step": 64346 }, { "epoch": 2.14, "grad_norm": 0.6415489315986633, "learning_rate": 0.00011345079153357277, "loss": 1.6473, "step": 64347 }, { "epoch": 2.14, "grad_norm": 0.659066915512085, "learning_rate": 0.00011344260605398653, "loss": 1.7705, "step": 64348 }, { "epoch": 2.14, "grad_norm": 0.6387782096862793, "learning_rate": 0.00011343442080085358, "loss": 1.761, "step": 64349 }, { "epoch": 2.14, "grad_norm": 0.6599301695823669, "learning_rate": 0.00011342623577418377, "loss": 1.7302, "step": 64350 }, { "epoch": 2.14, "grad_norm": 0.6363856196403503, "learning_rate": 0.0001134180509739869, "loss": 1.6802, "step": 64351 }, { "epoch": 2.14, "grad_norm": 0.6385701298713684, "learning_rate": 0.00011340986640027314, "loss": 1.6488, "step": 64352 }, { "epoch": 2.14, "grad_norm": 0.6345522999763489, "learning_rate": 0.0001134016820530523, "loss": 1.6499, "step": 64353 }, { "epoch": 2.14, "grad_norm": 0.6355538964271545, "learning_rate": 0.00011339349793233422, "loss": 1.7371, "step": 64354 }, { "epoch": 2.14, "grad_norm": 0.6352439522743225, "learning_rate": 0.00011338531403812893, "loss": 1.6806, "step": 64355 }, { "epoch": 2.14, "grad_norm": 0.655601441860199, "learning_rate": 0.00011337713037044648, "loss": 1.7363, "step": 64356 }, { "epoch": 2.14, "grad_norm": 0.6275938749313354, "learning_rate": 0.00011336894692929668, "loss": 1.6541, "step": 64357 }, { "epoch": 2.14, "grad_norm": 0.6136118173599243, "learning_rate": 0.00011336076371468942, "loss": 1.7251, "step": 64358 }, { "epoch": 2.14, "grad_norm": 0.6321021914482117, "learning_rate": 0.00011335258072663476, "loss": 1.7342, "step": 64359 }, { "epoch": 2.14, "grad_norm": 0.611037015914917, "learning_rate": 0.00011334439796514247, "loss": 1.7268, "step": 64360 }, { "epoch": 2.14, "grad_norm": 0.6407113671302795, "learning_rate": 0.00011333621543022269, "loss": 1.7379, "step": 64361 }, { "epoch": 2.14, "grad_norm": 0.6338363885879517, "learning_rate": 0.00011332803312188525, "loss": 1.7116, "step": 64362 }, { "epoch": 2.14, "grad_norm": 0.6452631950378418, "learning_rate": 0.00011331985104013995, "loss": 1.7055, "step": 64363 }, { "epoch": 2.14, "grad_norm": 0.6426517963409424, "learning_rate": 0.00011331166918499695, "loss": 1.776, "step": 64364 }, { "epoch": 2.14, "grad_norm": 0.6270748376846313, "learning_rate": 0.000113303487556466, "loss": 1.6219, "step": 64365 }, { "epoch": 2.14, "grad_norm": 0.6256220936775208, "learning_rate": 0.00011329530615455717, "loss": 1.6577, "step": 64366 }, { "epoch": 2.14, "grad_norm": 0.6199750304222107, "learning_rate": 0.00011328712497928024, "loss": 1.6665, "step": 64367 }, { "epoch": 2.14, "grad_norm": 0.6287906169891357, "learning_rate": 0.00011327894403064534, "loss": 1.7188, "step": 64368 }, { "epoch": 2.14, "grad_norm": 0.6230790019035339, "learning_rate": 0.00011327076330866228, "loss": 1.6848, "step": 64369 }, { "epoch": 2.14, "grad_norm": 0.6629548072814941, "learning_rate": 0.00011326258281334091, "loss": 1.7087, "step": 64370 }, { "epoch": 2.14, "grad_norm": 0.6270881295204163, "learning_rate": 0.00011325440254469134, "loss": 1.7262, "step": 64371 }, { "epoch": 2.14, "grad_norm": 0.6427192687988281, "learning_rate": 0.00011324622250272343, "loss": 1.7274, "step": 64372 }, { "epoch": 2.14, "grad_norm": 0.6381996273994446, "learning_rate": 0.00011323804268744698, "loss": 1.7348, "step": 64373 }, { "epoch": 2.14, "grad_norm": 0.6409730911254883, "learning_rate": 0.000113229863098872, "loss": 1.6827, "step": 64374 }, { "epoch": 2.14, "grad_norm": 0.6739356517791748, "learning_rate": 0.00011322168373700856, "loss": 1.6279, "step": 64375 }, { "epoch": 2.14, "grad_norm": 0.6642686128616333, "learning_rate": 0.00011321350460186649, "loss": 1.7065, "step": 64376 }, { "epoch": 2.14, "grad_norm": 0.6574414372444153, "learning_rate": 0.00011320532569345561, "loss": 1.6598, "step": 64377 }, { "epoch": 2.14, "grad_norm": 0.6358475685119629, "learning_rate": 0.00011319714701178604, "loss": 1.7123, "step": 64378 }, { "epoch": 2.14, "grad_norm": 0.6343142986297607, "learning_rate": 0.00011318896855686758, "loss": 1.7463, "step": 64379 }, { "epoch": 2.14, "grad_norm": 0.6197128891944885, "learning_rate": 0.00011318079032871012, "loss": 1.6079, "step": 64380 }, { "epoch": 2.14, "grad_norm": 0.6471425890922546, "learning_rate": 0.00011317261232732365, "loss": 1.6912, "step": 64381 }, { "epoch": 2.14, "grad_norm": 0.6623093485832214, "learning_rate": 0.00011316443455271829, "loss": 1.7512, "step": 64382 }, { "epoch": 2.14, "grad_norm": 0.6252831816673279, "learning_rate": 0.00011315625700490355, "loss": 1.6557, "step": 64383 }, { "epoch": 2.14, "grad_norm": 0.6463759541511536, "learning_rate": 0.00011314807968388962, "loss": 1.6811, "step": 64384 }, { "epoch": 2.14, "grad_norm": 0.6411784291267395, "learning_rate": 0.00011313990258968647, "loss": 1.6756, "step": 64385 }, { "epoch": 2.14, "grad_norm": 0.6946739554405212, "learning_rate": 0.00011313172572230397, "loss": 1.8631, "step": 64386 }, { "epoch": 2.14, "grad_norm": 0.6369454860687256, "learning_rate": 0.0001131235490817519, "loss": 1.7341, "step": 64387 }, { "epoch": 2.14, "grad_norm": 0.6850215792655945, "learning_rate": 0.00011311537266804031, "loss": 1.7057, "step": 64388 }, { "epoch": 2.14, "grad_norm": 0.6384050846099854, "learning_rate": 0.00011310719648117936, "loss": 1.6086, "step": 64389 }, { "epoch": 2.14, "grad_norm": 0.6106681823730469, "learning_rate": 0.0001130990205211785, "loss": 1.7447, "step": 64390 }, { "epoch": 2.14, "grad_norm": 0.6092139482498169, "learning_rate": 0.00011309084478804787, "loss": 1.7074, "step": 64391 }, { "epoch": 2.14, "grad_norm": 0.6375264525413513, "learning_rate": 0.00011308266928179756, "loss": 1.7735, "step": 64392 }, { "epoch": 2.14, "grad_norm": 0.6365552544593811, "learning_rate": 0.00011307449400243731, "loss": 1.7536, "step": 64393 }, { "epoch": 2.14, "grad_norm": 0.634202778339386, "learning_rate": 0.000113066318949977, "loss": 1.6991, "step": 64394 }, { "epoch": 2.14, "grad_norm": 0.6412591934204102, "learning_rate": 0.00011305814412442664, "loss": 1.7406, "step": 64395 }, { "epoch": 2.14, "grad_norm": 0.6565550565719604, "learning_rate": 0.00011304996952579634, "loss": 1.7525, "step": 64396 }, { "epoch": 2.14, "grad_norm": 0.6320338249206543, "learning_rate": 0.00011304179515409563, "loss": 1.7006, "step": 64397 }, { "epoch": 2.14, "grad_norm": 0.6792269349098206, "learning_rate": 0.00011303362100933464, "loss": 1.7215, "step": 64398 }, { "epoch": 2.14, "grad_norm": 0.6360599398612976, "learning_rate": 0.0001130254470915234, "loss": 1.7257, "step": 64399 }, { "epoch": 2.14, "grad_norm": 0.6490557789802551, "learning_rate": 0.00011301727340067168, "loss": 1.7542, "step": 64400 }, { "epoch": 2.14, "grad_norm": 0.6349518299102783, "learning_rate": 0.00011300909993678937, "loss": 1.705, "step": 64401 }, { "epoch": 2.14, "grad_norm": 0.623467206954956, "learning_rate": 0.00011300092669988657, "loss": 1.6524, "step": 64402 }, { "epoch": 2.14, "grad_norm": 0.6440834403038025, "learning_rate": 0.00011299275368997305, "loss": 1.758, "step": 64403 }, { "epoch": 2.14, "grad_norm": 0.6326814889907837, "learning_rate": 0.00011298458090705872, "loss": 1.7109, "step": 64404 }, { "epoch": 2.14, "grad_norm": 0.640717625617981, "learning_rate": 0.00011297640835115351, "loss": 1.6991, "step": 64405 }, { "epoch": 2.14, "grad_norm": 0.606295645236969, "learning_rate": 0.00011296823602226752, "loss": 1.6748, "step": 64406 }, { "epoch": 2.14, "grad_norm": 0.6316699981689453, "learning_rate": 0.0001129600639204105, "loss": 1.7245, "step": 64407 }, { "epoch": 2.14, "grad_norm": 0.67977374792099, "learning_rate": 0.00011295189204559234, "loss": 1.724, "step": 64408 }, { "epoch": 2.14, "grad_norm": 0.6452290415763855, "learning_rate": 0.0001129437203978231, "loss": 1.7287, "step": 64409 }, { "epoch": 2.14, "grad_norm": 0.6497899889945984, "learning_rate": 0.0001129355489771126, "loss": 1.7157, "step": 64410 }, { "epoch": 2.14, "grad_norm": 0.6561964750289917, "learning_rate": 0.00011292737778347072, "loss": 1.6998, "step": 64411 }, { "epoch": 2.14, "grad_norm": 0.6289269924163818, "learning_rate": 0.00011291920681690753, "loss": 1.7442, "step": 64412 }, { "epoch": 2.14, "grad_norm": 0.6257585883140564, "learning_rate": 0.00011291103607743277, "loss": 1.7255, "step": 64413 }, { "epoch": 2.14, "grad_norm": 0.6570000648498535, "learning_rate": 0.00011290286556505652, "loss": 1.7417, "step": 64414 }, { "epoch": 2.14, "grad_norm": 0.6564992666244507, "learning_rate": 0.00011289469527978852, "loss": 1.6827, "step": 64415 }, { "epoch": 2.14, "grad_norm": 1.8138036727905273, "learning_rate": 0.0001128865252216389, "loss": 1.616, "step": 64416 }, { "epoch": 2.14, "grad_norm": 0.6257514953613281, "learning_rate": 0.0001128783553906175, "loss": 1.6608, "step": 64417 }, { "epoch": 2.14, "grad_norm": 0.6418931484222412, "learning_rate": 0.00011287018578673408, "loss": 1.7869, "step": 64418 }, { "epoch": 2.14, "grad_norm": 0.6600105166435242, "learning_rate": 0.00011286201640999876, "loss": 1.7465, "step": 64419 }, { "epoch": 2.14, "grad_norm": 0.6437127590179443, "learning_rate": 0.0001128538472604213, "loss": 1.6988, "step": 64420 }, { "epoch": 2.14, "grad_norm": 0.6229603290557861, "learning_rate": 0.00011284567833801181, "loss": 1.7537, "step": 64421 }, { "epoch": 2.14, "grad_norm": 0.6349884271621704, "learning_rate": 0.00011283750964278008, "loss": 1.6739, "step": 64422 }, { "epoch": 2.14, "grad_norm": 0.6296643018722534, "learning_rate": 0.00011282934117473591, "loss": 1.6196, "step": 64423 }, { "epoch": 2.14, "grad_norm": 0.6584874391555786, "learning_rate": 0.00011282117293388947, "loss": 1.7345, "step": 64424 }, { "epoch": 2.14, "grad_norm": 0.6447892785072327, "learning_rate": 0.00011281300492025045, "loss": 1.7458, "step": 64425 }, { "epoch": 2.14, "grad_norm": 0.6092079281806946, "learning_rate": 0.00011280483713382894, "loss": 1.7475, "step": 64426 }, { "epoch": 2.14, "grad_norm": 0.6349844932556152, "learning_rate": 0.00011279666957463467, "loss": 1.6715, "step": 64427 }, { "epoch": 2.14, "grad_norm": 0.6450216770172119, "learning_rate": 0.00011278850224267779, "loss": 1.7165, "step": 64428 }, { "epoch": 2.14, "grad_norm": 0.641899824142456, "learning_rate": 0.00011278033513796806, "loss": 1.6784, "step": 64429 }, { "epoch": 2.14, "grad_norm": 0.6212630867958069, "learning_rate": 0.00011277216826051533, "loss": 1.7558, "step": 64430 }, { "epoch": 2.14, "grad_norm": 0.6315792202949524, "learning_rate": 0.00011276400161032969, "loss": 1.6956, "step": 64431 }, { "epoch": 2.14, "grad_norm": 0.6467799544334412, "learning_rate": 0.00011275583518742099, "loss": 1.7014, "step": 64432 }, { "epoch": 2.14, "grad_norm": 0.6858559250831604, "learning_rate": 0.000112747668991799, "loss": 1.7854, "step": 64433 }, { "epoch": 2.14, "grad_norm": 0.6467240452766418, "learning_rate": 0.00011273950302347374, "loss": 1.6752, "step": 64434 }, { "epoch": 2.14, "grad_norm": 0.627564013004303, "learning_rate": 0.00011273133728245524, "loss": 1.7721, "step": 64435 }, { "epoch": 2.14, "grad_norm": 0.634629487991333, "learning_rate": 0.00011272317176875331, "loss": 1.6996, "step": 64436 }, { "epoch": 2.14, "grad_norm": 0.6886643171310425, "learning_rate": 0.00011271500648237777, "loss": 1.6718, "step": 64437 }, { "epoch": 2.14, "grad_norm": 0.6314207315444946, "learning_rate": 0.00011270684142333868, "loss": 1.7, "step": 64438 }, { "epoch": 2.14, "grad_norm": 0.6318165063858032, "learning_rate": 0.00011269867659164589, "loss": 1.7103, "step": 64439 }, { "epoch": 2.14, "grad_norm": 0.6265606880187988, "learning_rate": 0.00011269051198730924, "loss": 1.6377, "step": 64440 }, { "epoch": 2.14, "grad_norm": 0.6503956913948059, "learning_rate": 0.0001126823476103387, "loss": 1.6688, "step": 64441 }, { "epoch": 2.14, "grad_norm": 0.6341687440872192, "learning_rate": 0.0001126741834607444, "loss": 1.6854, "step": 64442 }, { "epoch": 2.14, "grad_norm": 0.6514054536819458, "learning_rate": 0.0001126660195385358, "loss": 1.7324, "step": 64443 }, { "epoch": 2.14, "grad_norm": 0.6341586709022522, "learning_rate": 0.00011265785584372308, "loss": 1.7168, "step": 64444 }, { "epoch": 2.14, "grad_norm": 0.620048463344574, "learning_rate": 0.00011264969237631622, "loss": 1.7344, "step": 64445 }, { "epoch": 2.14, "grad_norm": 0.6255214810371399, "learning_rate": 0.000112641529136325, "loss": 1.7242, "step": 64446 }, { "epoch": 2.14, "grad_norm": 0.6623878479003906, "learning_rate": 0.00011263336612375931, "loss": 1.6951, "step": 64447 }, { "epoch": 2.14, "grad_norm": 0.644214391708374, "learning_rate": 0.00011262520333862908, "loss": 1.7196, "step": 64448 }, { "epoch": 2.14, "grad_norm": 0.645081102848053, "learning_rate": 0.00011261704078094441, "loss": 1.6738, "step": 64449 }, { "epoch": 2.14, "grad_norm": 0.6686267852783203, "learning_rate": 0.0001126088784507149, "loss": 1.7085, "step": 64450 }, { "epoch": 2.14, "grad_norm": 0.649325430393219, "learning_rate": 0.00011260071634795058, "loss": 1.6228, "step": 64451 }, { "epoch": 2.14, "grad_norm": 0.6354203820228577, "learning_rate": 0.00011259255447266145, "loss": 1.7358, "step": 64452 }, { "epoch": 2.14, "grad_norm": 0.6400789022445679, "learning_rate": 0.00011258439282485738, "loss": 1.5889, "step": 64453 }, { "epoch": 2.14, "grad_norm": 0.6256904602050781, "learning_rate": 0.00011257623140454811, "loss": 1.674, "step": 64454 }, { "epoch": 2.14, "grad_norm": 0.6472252607345581, "learning_rate": 0.00011256807021174372, "loss": 1.7044, "step": 64455 }, { "epoch": 2.14, "grad_norm": 0.6544479131698608, "learning_rate": 0.00011255990924645427, "loss": 1.7378, "step": 64456 }, { "epoch": 2.14, "grad_norm": 0.6482055187225342, "learning_rate": 0.00011255174850868925, "loss": 1.7154, "step": 64457 }, { "epoch": 2.14, "grad_norm": 0.6387206315994263, "learning_rate": 0.0001125435879984588, "loss": 1.6819, "step": 64458 }, { "epoch": 2.14, "grad_norm": 0.6437341570854187, "learning_rate": 0.00011253542771577292, "loss": 1.7101, "step": 64459 }, { "epoch": 2.14, "grad_norm": 0.6266692876815796, "learning_rate": 0.00011252726766064139, "loss": 1.6955, "step": 64460 }, { "epoch": 2.14, "grad_norm": 0.6638854146003723, "learning_rate": 0.00011251910783307405, "loss": 1.6878, "step": 64461 }, { "epoch": 2.14, "grad_norm": 0.6344428062438965, "learning_rate": 0.00011251094823308087, "loss": 1.7147, "step": 64462 }, { "epoch": 2.14, "grad_norm": 0.6383979916572571, "learning_rate": 0.00011250278886067199, "loss": 1.6495, "step": 64463 }, { "epoch": 2.14, "grad_norm": 0.6469175815582275, "learning_rate": 0.0001124946297158569, "loss": 1.6668, "step": 64464 }, { "epoch": 2.14, "grad_norm": 0.6498557329177856, "learning_rate": 0.0001124864707986457, "loss": 1.7584, "step": 64465 }, { "epoch": 2.14, "grad_norm": 0.6368793249130249, "learning_rate": 0.00011247831210904838, "loss": 1.7665, "step": 64466 }, { "epoch": 2.14, "grad_norm": 0.6366866827011108, "learning_rate": 0.00011247015364707478, "loss": 1.6848, "step": 64467 }, { "epoch": 2.14, "grad_norm": 0.6447522044181824, "learning_rate": 0.00011246199541273467, "loss": 1.72, "step": 64468 }, { "epoch": 2.14, "grad_norm": 0.6463358998298645, "learning_rate": 0.00011245383740603816, "loss": 1.7109, "step": 64469 }, { "epoch": 2.14, "grad_norm": 0.6447787880897522, "learning_rate": 0.00011244567962699498, "loss": 1.6736, "step": 64470 }, { "epoch": 2.14, "grad_norm": 0.6629824638366699, "learning_rate": 0.00011243752207561518, "loss": 1.6613, "step": 64471 }, { "epoch": 2.14, "grad_norm": 0.6347697973251343, "learning_rate": 0.00011242936475190853, "loss": 1.7368, "step": 64472 }, { "epoch": 2.15, "grad_norm": 0.6543457508087158, "learning_rate": 0.00011242120765588505, "loss": 1.6722, "step": 64473 }, { "epoch": 2.15, "grad_norm": 0.6498515605926514, "learning_rate": 0.00011241305078755462, "loss": 1.6909, "step": 64474 }, { "epoch": 2.15, "grad_norm": 0.6321117877960205, "learning_rate": 0.000112404894146927, "loss": 1.7197, "step": 64475 }, { "epoch": 2.15, "grad_norm": 0.6408000588417053, "learning_rate": 0.00011239673773401229, "loss": 1.7156, "step": 64476 }, { "epoch": 2.15, "grad_norm": 0.6710917353630066, "learning_rate": 0.00011238858154882019, "loss": 1.6821, "step": 64477 }, { "epoch": 2.15, "grad_norm": 0.6336916089057922, "learning_rate": 0.00011238042559136082, "loss": 1.7465, "step": 64478 }, { "epoch": 2.15, "grad_norm": 0.6748185157775879, "learning_rate": 0.00011237226986164398, "loss": 1.7737, "step": 64479 }, { "epoch": 2.15, "grad_norm": 0.6586846113204956, "learning_rate": 0.00011236411435967945, "loss": 1.6001, "step": 64480 }, { "epoch": 2.15, "grad_norm": 0.6538149118423462, "learning_rate": 0.00011235595908547734, "loss": 1.6555, "step": 64481 }, { "epoch": 2.15, "grad_norm": 0.6548097133636475, "learning_rate": 0.00011234780403904732, "loss": 1.6405, "step": 64482 }, { "epoch": 2.15, "grad_norm": 0.6354917883872986, "learning_rate": 0.00011233964922039958, "loss": 1.6856, "step": 64483 }, { "epoch": 2.15, "grad_norm": 0.6344751119613647, "learning_rate": 0.00011233149462954371, "loss": 1.7571, "step": 64484 }, { "epoch": 2.15, "grad_norm": 0.6744551062583923, "learning_rate": 0.00011232334026648985, "loss": 1.7714, "step": 64485 }, { "epoch": 2.15, "grad_norm": 0.698937714099884, "learning_rate": 0.00011231518613124782, "loss": 1.6991, "step": 64486 }, { "epoch": 2.15, "grad_norm": 0.6309660077095032, "learning_rate": 0.0001123070322238274, "loss": 1.6626, "step": 64487 }, { "epoch": 2.15, "grad_norm": 0.6559773087501526, "learning_rate": 0.00011229887854423866, "loss": 1.768, "step": 64488 }, { "epoch": 2.15, "grad_norm": 0.6499228477478027, "learning_rate": 0.00011229072509249146, "loss": 1.7716, "step": 64489 }, { "epoch": 2.15, "grad_norm": 0.641652524471283, "learning_rate": 0.00011228257186859553, "loss": 1.6891, "step": 64490 }, { "epoch": 2.15, "grad_norm": 0.6442229747772217, "learning_rate": 0.00011227441887256091, "loss": 1.737, "step": 64491 }, { "epoch": 2.15, "grad_norm": 0.6502395868301392, "learning_rate": 0.0001122662661043976, "loss": 1.6717, "step": 64492 }, { "epoch": 2.15, "grad_norm": 0.6529041528701782, "learning_rate": 0.00011225811356411538, "loss": 1.7355, "step": 64493 }, { "epoch": 2.15, "grad_norm": 0.6423230767250061, "learning_rate": 0.00011224996125172403, "loss": 1.5962, "step": 64494 }, { "epoch": 2.15, "grad_norm": 0.6314644813537598, "learning_rate": 0.00011224180916723368, "loss": 1.7795, "step": 64495 }, { "epoch": 2.15, "grad_norm": 0.6342262029647827, "learning_rate": 0.00011223365731065412, "loss": 1.7194, "step": 64496 }, { "epoch": 2.15, "grad_norm": 0.6221365332603455, "learning_rate": 0.0001122255056819951, "loss": 1.6809, "step": 64497 }, { "epoch": 2.15, "grad_norm": 0.643267810344696, "learning_rate": 0.00011221735428126666, "loss": 1.703, "step": 64498 }, { "epoch": 2.15, "grad_norm": 0.6685413718223572, "learning_rate": 0.00011220920310847888, "loss": 1.7352, "step": 64499 }, { "epoch": 2.15, "grad_norm": 0.6430477499961853, "learning_rate": 0.00011220105216364126, "loss": 1.7406, "step": 64500 }, { "epoch": 2.15, "grad_norm": 0.6425957083702087, "learning_rate": 0.0001121929014467639, "loss": 1.6402, "step": 64501 }, { "epoch": 2.15, "grad_norm": 0.6698027849197388, "learning_rate": 0.00011218475095785678, "loss": 1.6898, "step": 64502 }, { "epoch": 2.15, "grad_norm": 0.6716304421424866, "learning_rate": 0.00011217660069692973, "loss": 1.7278, "step": 64503 }, { "epoch": 2.15, "grad_norm": 0.6480791568756104, "learning_rate": 0.00011216845066399247, "loss": 1.7323, "step": 64504 }, { "epoch": 2.15, "grad_norm": 0.6374173760414124, "learning_rate": 0.00011216030085905506, "loss": 1.6884, "step": 64505 }, { "epoch": 2.15, "grad_norm": 0.6560141444206238, "learning_rate": 0.00011215215128212756, "loss": 1.6666, "step": 64506 }, { "epoch": 2.15, "grad_norm": 0.6223787665367126, "learning_rate": 0.00011214400193321947, "loss": 1.7161, "step": 64507 }, { "epoch": 2.15, "grad_norm": 0.6453838348388672, "learning_rate": 0.00011213585281234087, "loss": 1.7116, "step": 64508 }, { "epoch": 2.15, "grad_norm": 0.6477162837982178, "learning_rate": 0.00011212770391950177, "loss": 1.7004, "step": 64509 }, { "epoch": 2.15, "grad_norm": 0.6174057126045227, "learning_rate": 0.00011211955525471196, "loss": 1.6605, "step": 64510 }, { "epoch": 2.15, "grad_norm": 0.7019975781440735, "learning_rate": 0.00011211140681798124, "loss": 1.6979, "step": 64511 }, { "epoch": 2.15, "grad_norm": 0.6604467630386353, "learning_rate": 0.00011210325860931958, "loss": 1.7857, "step": 64512 }, { "epoch": 2.15, "grad_norm": 0.6366599202156067, "learning_rate": 0.00011209511062873708, "loss": 1.7618, "step": 64513 }, { "epoch": 2.15, "grad_norm": 0.6236822009086609, "learning_rate": 0.00011208696287624321, "loss": 1.6637, "step": 64514 }, { "epoch": 2.15, "grad_norm": 0.6341508030891418, "learning_rate": 0.0001120788153518481, "loss": 1.6922, "step": 64515 }, { "epoch": 2.15, "grad_norm": 0.6533125042915344, "learning_rate": 0.0001120706680555617, "loss": 1.7111, "step": 64516 }, { "epoch": 2.15, "grad_norm": 0.642807126045227, "learning_rate": 0.00011206252098739385, "loss": 1.6407, "step": 64517 }, { "epoch": 2.15, "grad_norm": 0.650073766708374, "learning_rate": 0.00011205437414735426, "loss": 1.7423, "step": 64518 }, { "epoch": 2.15, "grad_norm": 2.6454854011535645, "learning_rate": 0.00011204622753545309, "loss": 1.7751, "step": 64519 }, { "epoch": 2.15, "grad_norm": 0.6379122138023376, "learning_rate": 0.00011203808115170012, "loss": 1.6768, "step": 64520 }, { "epoch": 2.15, "grad_norm": 0.8053463697433472, "learning_rate": 0.00011202993499610514, "loss": 1.7071, "step": 64521 }, { "epoch": 2.15, "grad_norm": 0.6320326924324036, "learning_rate": 0.00011202178906867808, "loss": 1.7165, "step": 64522 }, { "epoch": 2.15, "grad_norm": 0.6956852674484253, "learning_rate": 0.000112013643369429, "loss": 1.7148, "step": 64523 }, { "epoch": 2.15, "grad_norm": 0.6793968677520752, "learning_rate": 0.00011200549789836765, "loss": 1.8249, "step": 64524 }, { "epoch": 2.15, "grad_norm": 0.6454137563705444, "learning_rate": 0.00011199735265550383, "loss": 1.7517, "step": 64525 }, { "epoch": 2.15, "grad_norm": 0.6372925639152527, "learning_rate": 0.00011198920764084762, "loss": 1.7469, "step": 64526 }, { "epoch": 2.15, "grad_norm": 0.6817904710769653, "learning_rate": 0.0001119810628544088, "loss": 1.7018, "step": 64527 }, { "epoch": 2.15, "grad_norm": 0.6528896689414978, "learning_rate": 0.00011197291829619717, "loss": 1.7654, "step": 64528 }, { "epoch": 2.15, "grad_norm": 0.6380854249000549, "learning_rate": 0.00011196477396622282, "loss": 1.6939, "step": 64529 }, { "epoch": 2.15, "grad_norm": 0.6365036368370056, "learning_rate": 0.00011195662986449542, "loss": 1.7198, "step": 64530 }, { "epoch": 2.15, "grad_norm": 0.6276071071624756, "learning_rate": 0.0001119484859910251, "loss": 1.6855, "step": 64531 }, { "epoch": 2.15, "grad_norm": 0.6458083987236023, "learning_rate": 0.00011194034234582144, "loss": 1.6735, "step": 64532 }, { "epoch": 2.15, "grad_norm": 0.6282960772514343, "learning_rate": 0.00011193219892889465, "loss": 1.7314, "step": 64533 }, { "epoch": 2.15, "grad_norm": 0.6502975821495056, "learning_rate": 0.00011192405574025446, "loss": 1.6684, "step": 64534 }, { "epoch": 2.15, "grad_norm": 0.6525968909263611, "learning_rate": 0.00011191591277991064, "loss": 1.6698, "step": 64535 }, { "epoch": 2.15, "grad_norm": 0.635335385799408, "learning_rate": 0.00011190777004787327, "loss": 1.688, "step": 64536 }, { "epoch": 2.15, "grad_norm": 0.6311815977096558, "learning_rate": 0.0001118996275441521, "loss": 1.7522, "step": 64537 }, { "epoch": 2.15, "grad_norm": 0.626436710357666, "learning_rate": 0.0001118914852687571, "loss": 1.6847, "step": 64538 }, { "epoch": 2.15, "grad_norm": 0.6532641053199768, "learning_rate": 0.00011188334322169817, "loss": 1.6941, "step": 64539 }, { "epoch": 2.15, "grad_norm": 0.6487674713134766, "learning_rate": 0.00011187520140298505, "loss": 1.7206, "step": 64540 }, { "epoch": 2.15, "grad_norm": 0.6598730087280273, "learning_rate": 0.0001118670598126278, "loss": 1.7525, "step": 64541 }, { "epoch": 2.15, "grad_norm": 0.6145220994949341, "learning_rate": 0.00011185891845063613, "loss": 1.7409, "step": 64542 }, { "epoch": 2.15, "grad_norm": 0.6570978164672852, "learning_rate": 0.00011185077731702009, "loss": 1.6755, "step": 64543 }, { "epoch": 2.15, "grad_norm": 0.6386669278144836, "learning_rate": 0.00011184263641178937, "loss": 1.7117, "step": 64544 }, { "epoch": 2.15, "grad_norm": 0.631803035736084, "learning_rate": 0.0001118344957349541, "loss": 1.6425, "step": 64545 }, { "epoch": 2.15, "grad_norm": 0.6306102275848389, "learning_rate": 0.00011182635528652403, "loss": 1.6955, "step": 64546 }, { "epoch": 2.15, "grad_norm": 0.6652001142501831, "learning_rate": 0.00011181821506650892, "loss": 1.7034, "step": 64547 }, { "epoch": 2.15, "grad_norm": 0.6322733759880066, "learning_rate": 0.00011181007507491889, "loss": 1.7277, "step": 64548 }, { "epoch": 2.15, "grad_norm": 0.665208637714386, "learning_rate": 0.00011180193531176368, "loss": 1.7393, "step": 64549 }, { "epoch": 2.15, "grad_norm": 0.67818284034729, "learning_rate": 0.00011179379577705308, "loss": 1.6692, "step": 64550 }, { "epoch": 2.15, "grad_norm": 0.6237277388572693, "learning_rate": 0.00011178565647079709, "loss": 1.7653, "step": 64551 }, { "epoch": 2.15, "grad_norm": 0.6381058096885681, "learning_rate": 0.00011177751739300573, "loss": 1.6653, "step": 64552 }, { "epoch": 2.15, "grad_norm": 0.6360390186309814, "learning_rate": 0.00011176937854368866, "loss": 1.6654, "step": 64553 }, { "epoch": 2.15, "grad_norm": 0.6247064471244812, "learning_rate": 0.00011176123992285576, "loss": 1.7239, "step": 64554 }, { "epoch": 2.15, "grad_norm": 0.6360538005828857, "learning_rate": 0.00011175310153051709, "loss": 1.6961, "step": 64555 }, { "epoch": 2.15, "grad_norm": 0.6484143137931824, "learning_rate": 0.0001117449633666824, "loss": 1.7199, "step": 64556 }, { "epoch": 2.15, "grad_norm": 0.6214093565940857, "learning_rate": 0.00011173682543136152, "loss": 1.6729, "step": 64557 }, { "epoch": 2.15, "grad_norm": 0.6367716193199158, "learning_rate": 0.00011172868772456432, "loss": 1.7108, "step": 64558 }, { "epoch": 2.15, "grad_norm": 0.6660841703414917, "learning_rate": 0.00011172055024630103, "loss": 1.6951, "step": 64559 }, { "epoch": 2.15, "grad_norm": 0.6352400779724121, "learning_rate": 0.00011171241299658103, "loss": 1.7547, "step": 64560 }, { "epoch": 2.15, "grad_norm": 0.6469696164131165, "learning_rate": 0.0001117042759754144, "loss": 1.6783, "step": 64561 }, { "epoch": 2.15, "grad_norm": 0.6304528117179871, "learning_rate": 0.00011169613918281113, "loss": 1.7212, "step": 64562 }, { "epoch": 2.15, "grad_norm": 0.6527869701385498, "learning_rate": 0.00011168800261878102, "loss": 1.7205, "step": 64563 }, { "epoch": 2.15, "grad_norm": 0.6362940669059753, "learning_rate": 0.0001116798662833338, "loss": 1.6965, "step": 64564 }, { "epoch": 2.15, "grad_norm": 0.6639881134033203, "learning_rate": 0.0001116717301764795, "loss": 1.7166, "step": 64565 }, { "epoch": 2.15, "grad_norm": 0.6671647429466248, "learning_rate": 0.0001116635942982282, "loss": 1.8405, "step": 64566 }, { "epoch": 2.15, "grad_norm": 0.6404116749763489, "learning_rate": 0.0001116554586485893, "loss": 1.6849, "step": 64567 }, { "epoch": 2.15, "grad_norm": 0.640950083732605, "learning_rate": 0.00011164732322757293, "loss": 1.7486, "step": 64568 }, { "epoch": 2.15, "grad_norm": 0.620540201663971, "learning_rate": 0.00011163918803518907, "loss": 1.6637, "step": 64569 }, { "epoch": 2.15, "grad_norm": 0.654827892780304, "learning_rate": 0.0001116310530714475, "loss": 1.6863, "step": 64570 }, { "epoch": 2.15, "grad_norm": 0.6347596049308777, "learning_rate": 0.00011162291833635792, "loss": 1.6725, "step": 64571 }, { "epoch": 2.15, "grad_norm": 0.6410075426101685, "learning_rate": 0.00011161478382993041, "loss": 1.7489, "step": 64572 }, { "epoch": 2.15, "grad_norm": 0.6479308605194092, "learning_rate": 0.00011160664955217497, "loss": 1.6868, "step": 64573 }, { "epoch": 2.15, "grad_norm": 0.6385155916213989, "learning_rate": 0.00011159851550310111, "loss": 1.7662, "step": 64574 }, { "epoch": 2.15, "grad_norm": 0.6608477234840393, "learning_rate": 0.00011159038168271889, "loss": 1.7712, "step": 64575 }, { "epoch": 2.15, "grad_norm": 0.6379209756851196, "learning_rate": 0.00011158224809103829, "loss": 1.663, "step": 64576 }, { "epoch": 2.15, "grad_norm": 0.6137112975120544, "learning_rate": 0.00011157411472806906, "loss": 1.6499, "step": 64577 }, { "epoch": 2.15, "grad_norm": 0.6412487626075745, "learning_rate": 0.00011156598159382102, "loss": 1.6933, "step": 64578 }, { "epoch": 2.15, "grad_norm": 0.6533039808273315, "learning_rate": 0.00011155784868830411, "loss": 1.7076, "step": 64579 }, { "epoch": 2.15, "grad_norm": 0.6514553427696228, "learning_rate": 0.00011154971601152839, "loss": 1.7947, "step": 64580 }, { "epoch": 2.15, "grad_norm": 0.6444240808486938, "learning_rate": 0.00011154158356350336, "loss": 1.7097, "step": 64581 }, { "epoch": 2.15, "grad_norm": 0.621641218662262, "learning_rate": 0.00011153345134423906, "loss": 1.7649, "step": 64582 }, { "epoch": 2.15, "grad_norm": 0.6033585667610168, "learning_rate": 0.00011152531935374552, "loss": 1.6374, "step": 64583 }, { "epoch": 2.15, "grad_norm": 0.6288999319076538, "learning_rate": 0.00011151718759203243, "loss": 1.7496, "step": 64584 }, { "epoch": 2.15, "grad_norm": 0.6578649878501892, "learning_rate": 0.00011150905605910962, "loss": 1.6915, "step": 64585 }, { "epoch": 2.15, "grad_norm": 0.6545814275741577, "learning_rate": 0.00011150092475498716, "loss": 1.6128, "step": 64586 }, { "epoch": 2.15, "grad_norm": 0.6493937373161316, "learning_rate": 0.00011149279367967466, "loss": 1.7132, "step": 64587 }, { "epoch": 2.15, "grad_norm": 0.6776854991912842, "learning_rate": 0.00011148466283318227, "loss": 1.6962, "step": 64588 }, { "epoch": 2.15, "grad_norm": 0.6372990012168884, "learning_rate": 0.0001114765322155196, "loss": 1.8046, "step": 64589 }, { "epoch": 2.15, "grad_norm": 0.6310349702835083, "learning_rate": 0.00011146840182669678, "loss": 1.7204, "step": 64590 }, { "epoch": 2.15, "grad_norm": 0.6301400065422058, "learning_rate": 0.0001114602716667235, "loss": 1.7599, "step": 64591 }, { "epoch": 2.15, "grad_norm": 0.6381927728652954, "learning_rate": 0.00011145214173560961, "loss": 1.7503, "step": 64592 }, { "epoch": 2.15, "grad_norm": 0.6367765665054321, "learning_rate": 0.00011144401203336511, "loss": 1.6635, "step": 64593 }, { "epoch": 2.15, "grad_norm": 0.6526839137077332, "learning_rate": 0.00011143588255999968, "loss": 1.7242, "step": 64594 }, { "epoch": 2.15, "grad_norm": 0.6586007475852966, "learning_rate": 0.00011142775331552342, "loss": 1.7078, "step": 64595 }, { "epoch": 2.15, "grad_norm": 0.6564663052558899, "learning_rate": 0.00011141962429994612, "loss": 1.703, "step": 64596 }, { "epoch": 2.15, "grad_norm": 0.6234537959098816, "learning_rate": 0.00011141149551327746, "loss": 1.6887, "step": 64597 }, { "epoch": 2.15, "grad_norm": 0.6493406295776367, "learning_rate": 0.00011140336695552756, "loss": 1.7616, "step": 64598 }, { "epoch": 2.15, "grad_norm": 0.6320676803588867, "learning_rate": 0.00011139523862670611, "loss": 1.7467, "step": 64599 }, { "epoch": 2.15, "grad_norm": 0.6255652904510498, "learning_rate": 0.00011138711052682315, "loss": 1.7176, "step": 64600 }, { "epoch": 2.15, "grad_norm": 0.6241717338562012, "learning_rate": 0.00011137898265588833, "loss": 1.7098, "step": 64601 }, { "epoch": 2.15, "grad_norm": 0.6455335021018982, "learning_rate": 0.00011137085501391175, "loss": 1.6951, "step": 64602 }, { "epoch": 2.15, "grad_norm": 0.6441836357116699, "learning_rate": 0.00011136272760090316, "loss": 1.6984, "step": 64603 }, { "epoch": 2.15, "grad_norm": 0.6352394819259644, "learning_rate": 0.0001113546004168723, "loss": 1.6996, "step": 64604 }, { "epoch": 2.15, "grad_norm": 0.6290537714958191, "learning_rate": 0.00011134647346182927, "loss": 1.7744, "step": 64605 }, { "epoch": 2.15, "grad_norm": 0.6464682817459106, "learning_rate": 0.00011133834673578383, "loss": 1.7067, "step": 64606 }, { "epoch": 2.15, "grad_norm": 0.6416446566581726, "learning_rate": 0.00011133022023874573, "loss": 1.6937, "step": 64607 }, { "epoch": 2.15, "grad_norm": 0.631644606590271, "learning_rate": 0.00011132209397072496, "loss": 1.7074, "step": 64608 }, { "epoch": 2.15, "grad_norm": 0.6478639245033264, "learning_rate": 0.00011131396793173144, "loss": 1.6936, "step": 64609 }, { "epoch": 2.15, "grad_norm": 0.6231815814971924, "learning_rate": 0.000111305842121775, "loss": 1.6618, "step": 64610 }, { "epoch": 2.15, "grad_norm": 0.6419661641120911, "learning_rate": 0.00011129771654086534, "loss": 1.6511, "step": 64611 }, { "epoch": 2.15, "grad_norm": 0.6426059007644653, "learning_rate": 0.00011128959118901256, "loss": 1.6735, "step": 64612 }, { "epoch": 2.15, "grad_norm": 0.6569552421569824, "learning_rate": 0.0001112814660662264, "loss": 1.6777, "step": 64613 }, { "epoch": 2.15, "grad_norm": 0.6277894973754883, "learning_rate": 0.00011127334117251665, "loss": 1.6762, "step": 64614 }, { "epoch": 2.15, "grad_norm": 0.6557334065437317, "learning_rate": 0.00011126521650789324, "loss": 1.6935, "step": 64615 }, { "epoch": 2.15, "grad_norm": 0.6477593183517456, "learning_rate": 0.00011125709207236623, "loss": 1.7201, "step": 64616 }, { "epoch": 2.15, "grad_norm": 0.6527528762817383, "learning_rate": 0.00011124896786594513, "loss": 1.8252, "step": 64617 }, { "epoch": 2.15, "grad_norm": 0.62621009349823, "learning_rate": 0.00011124084388863997, "loss": 1.7286, "step": 64618 }, { "epoch": 2.15, "grad_norm": 0.6534952521324158, "learning_rate": 0.00011123272014046071, "loss": 1.7541, "step": 64619 }, { "epoch": 2.15, "grad_norm": 0.6444231271743774, "learning_rate": 0.0001112245966214171, "loss": 1.7914, "step": 64620 }, { "epoch": 2.15, "grad_norm": 0.6358441114425659, "learning_rate": 0.00011121647333151892, "loss": 1.7407, "step": 64621 }, { "epoch": 2.15, "grad_norm": 0.6488077640533447, "learning_rate": 0.00011120835027077614, "loss": 1.7066, "step": 64622 }, { "epoch": 2.15, "grad_norm": 0.6442327499389648, "learning_rate": 0.00011120022743919881, "loss": 1.6636, "step": 64623 }, { "epoch": 2.15, "grad_norm": 0.6401707530021667, "learning_rate": 0.00011119210483679636, "loss": 1.6353, "step": 64624 }, { "epoch": 2.15, "grad_norm": 0.6464952826499939, "learning_rate": 0.00011118398246357888, "loss": 1.6537, "step": 64625 }, { "epoch": 2.15, "grad_norm": 0.6485065817832947, "learning_rate": 0.00011117586031955636, "loss": 1.6921, "step": 64626 }, { "epoch": 2.15, "grad_norm": 0.6445828080177307, "learning_rate": 0.00011116773840473846, "loss": 1.7126, "step": 64627 }, { "epoch": 2.15, "grad_norm": 0.6259200572967529, "learning_rate": 0.00011115961671913505, "loss": 1.6537, "step": 64628 }, { "epoch": 2.15, "grad_norm": 0.6373433470726013, "learning_rate": 0.00011115149526275605, "loss": 1.7091, "step": 64629 }, { "epoch": 2.15, "grad_norm": 0.6307209730148315, "learning_rate": 0.00011114337403561147, "loss": 1.6611, "step": 64630 }, { "epoch": 2.15, "grad_norm": 0.6351798176765442, "learning_rate": 0.00011113525303771082, "loss": 1.6258, "step": 64631 }, { "epoch": 2.15, "grad_norm": 0.662449300289154, "learning_rate": 0.00011112713226906416, "loss": 1.7374, "step": 64632 }, { "epoch": 2.15, "grad_norm": 0.6379021406173706, "learning_rate": 0.00011111901172968143, "loss": 1.7063, "step": 64633 }, { "epoch": 2.15, "grad_norm": 0.6417059302330017, "learning_rate": 0.0001111108914195724, "loss": 1.7218, "step": 64634 }, { "epoch": 2.15, "grad_norm": 0.6502596735954285, "learning_rate": 0.0001111027713387468, "loss": 1.7338, "step": 64635 }, { "epoch": 2.15, "grad_norm": 0.6442095637321472, "learning_rate": 0.00011109465148721468, "loss": 1.7595, "step": 64636 }, { "epoch": 2.15, "grad_norm": 0.6441658139228821, "learning_rate": 0.00011108653186498584, "loss": 1.7733, "step": 64637 }, { "epoch": 2.15, "grad_norm": 0.650839626789093, "learning_rate": 0.00011107841247207003, "loss": 1.7792, "step": 64638 }, { "epoch": 2.15, "grad_norm": 0.6250425577163696, "learning_rate": 0.00011107029330847719, "loss": 1.636, "step": 64639 }, { "epoch": 2.15, "grad_norm": 0.635985791683197, "learning_rate": 0.00011106217437421726, "loss": 1.7135, "step": 64640 }, { "epoch": 2.15, "grad_norm": 0.6217367053031921, "learning_rate": 0.00011105405566930002, "loss": 1.6918, "step": 64641 }, { "epoch": 2.15, "grad_norm": 0.6314323544502258, "learning_rate": 0.0001110459371937352, "loss": 1.6645, "step": 64642 }, { "epoch": 2.15, "grad_norm": 0.6261072158813477, "learning_rate": 0.00011103781894753289, "loss": 1.6949, "step": 64643 }, { "epoch": 2.15, "grad_norm": 0.6489126086235046, "learning_rate": 0.00011102970093070283, "loss": 1.6488, "step": 64644 }, { "epoch": 2.15, "grad_norm": 0.6422672271728516, "learning_rate": 0.00011102158314325475, "loss": 1.7025, "step": 64645 }, { "epoch": 2.15, "grad_norm": 0.6440290808677673, "learning_rate": 0.00011101346558519873, "loss": 1.6475, "step": 64646 }, { "epoch": 2.15, "grad_norm": 0.6424493193626404, "learning_rate": 0.00011100534825654442, "loss": 1.6788, "step": 64647 }, { "epoch": 2.15, "grad_norm": 0.6267181634902954, "learning_rate": 0.00011099723115730187, "loss": 1.7211, "step": 64648 }, { "epoch": 2.15, "grad_norm": 0.6307163834571838, "learning_rate": 0.00011098911428748073, "loss": 1.6878, "step": 64649 }, { "epoch": 2.15, "grad_norm": 0.6378067135810852, "learning_rate": 0.00011098099764709106, "loss": 1.7047, "step": 64650 }, { "epoch": 2.15, "grad_norm": 0.6232114434242249, "learning_rate": 0.00011097288123614259, "loss": 1.6732, "step": 64651 }, { "epoch": 2.15, "grad_norm": 0.6418118476867676, "learning_rate": 0.00011096476505464513, "loss": 1.7936, "step": 64652 }, { "epoch": 2.15, "grad_norm": 0.665942907333374, "learning_rate": 0.00011095664910260866, "loss": 1.7046, "step": 64653 }, { "epoch": 2.15, "grad_norm": 0.6608579754829407, "learning_rate": 0.00011094853338004289, "loss": 1.6429, "step": 64654 }, { "epoch": 2.15, "grad_norm": 0.6426955461502075, "learning_rate": 0.0001109404178869578, "loss": 1.6698, "step": 64655 }, { "epoch": 2.15, "grad_norm": 0.6326848864555359, "learning_rate": 0.0001109323026233632, "loss": 1.7263, "step": 64656 }, { "epoch": 2.15, "grad_norm": 0.6573845744132996, "learning_rate": 0.00011092418758926884, "loss": 1.7086, "step": 64657 }, { "epoch": 2.15, "grad_norm": 0.6289997100830078, "learning_rate": 0.00011091607278468478, "loss": 1.7428, "step": 64658 }, { "epoch": 2.15, "grad_norm": 0.6528629064559937, "learning_rate": 0.00011090795820962063, "loss": 1.6717, "step": 64659 }, { "epoch": 2.15, "grad_norm": 0.6350796222686768, "learning_rate": 0.00011089984386408643, "loss": 1.6763, "step": 64660 }, { "epoch": 2.15, "grad_norm": 0.6505107283592224, "learning_rate": 0.00011089172974809189, "loss": 1.7562, "step": 64661 }, { "epoch": 2.15, "grad_norm": 0.6594658493995667, "learning_rate": 0.00011088361586164704, "loss": 1.764, "step": 64662 }, { "epoch": 2.15, "grad_norm": 0.6412069201469421, "learning_rate": 0.00011087550220476158, "loss": 1.6849, "step": 64663 }, { "epoch": 2.15, "grad_norm": 0.6686384081840515, "learning_rate": 0.00011086738877744529, "loss": 1.6813, "step": 64664 }, { "epoch": 2.15, "grad_norm": 0.6170542240142822, "learning_rate": 0.00011085927557970825, "loss": 1.7294, "step": 64665 }, { "epoch": 2.15, "grad_norm": 0.6664729118347168, "learning_rate": 0.00011085116261156016, "loss": 1.7137, "step": 64666 }, { "epoch": 2.15, "grad_norm": 0.6502447128295898, "learning_rate": 0.00011084304987301081, "loss": 1.6852, "step": 64667 }, { "epoch": 2.15, "grad_norm": 0.6649304628372192, "learning_rate": 0.0001108349373640701, "loss": 1.7515, "step": 64668 }, { "epoch": 2.15, "grad_norm": 0.6591559052467346, "learning_rate": 0.00011082682508474804, "loss": 1.6884, "step": 64669 }, { "epoch": 2.15, "grad_norm": 0.6115130186080933, "learning_rate": 0.0001108187130350543, "loss": 1.714, "step": 64670 }, { "epoch": 2.15, "grad_norm": 0.6479175686836243, "learning_rate": 0.0001108106012149987, "loss": 1.7036, "step": 64671 }, { "epoch": 2.15, "grad_norm": 0.6269969940185547, "learning_rate": 0.00011080248962459122, "loss": 1.6453, "step": 64672 }, { "epoch": 2.15, "grad_norm": 0.6333402395248413, "learning_rate": 0.00011079437826384169, "loss": 1.6972, "step": 64673 }, { "epoch": 2.15, "grad_norm": 0.6226426362991333, "learning_rate": 0.00011078626713275977, "loss": 1.6863, "step": 64674 }, { "epoch": 2.15, "grad_norm": 0.6351363658905029, "learning_rate": 0.00011077815623135545, "loss": 1.6876, "step": 64675 }, { "epoch": 2.15, "grad_norm": 0.6999414563179016, "learning_rate": 0.00011077004555963877, "loss": 1.763, "step": 64676 }, { "epoch": 2.15, "grad_norm": 0.6302885413169861, "learning_rate": 0.00011076193511761915, "loss": 1.6892, "step": 64677 }, { "epoch": 2.15, "grad_norm": 0.6306769251823425, "learning_rate": 0.0001107538249053067, "loss": 1.6949, "step": 64678 }, { "epoch": 2.15, "grad_norm": 0.6377978324890137, "learning_rate": 0.00011074571492271128, "loss": 1.7487, "step": 64679 }, { "epoch": 2.15, "grad_norm": 0.6433026194572449, "learning_rate": 0.0001107376051698427, "loss": 1.7478, "step": 64680 }, { "epoch": 2.15, "grad_norm": 0.6471424698829651, "learning_rate": 0.00011072949564671069, "loss": 1.6833, "step": 64681 }, { "epoch": 2.15, "grad_norm": 0.6615245342254639, "learning_rate": 0.00011072138635332516, "loss": 1.7434, "step": 64682 }, { "epoch": 2.15, "grad_norm": 0.6650222539901733, "learning_rate": 0.00011071327728969622, "loss": 1.7528, "step": 64683 }, { "epoch": 2.15, "grad_norm": 0.62921541929245, "learning_rate": 0.00011070516845583323, "loss": 1.6739, "step": 64684 }, { "epoch": 2.15, "grad_norm": 0.6263558268547058, "learning_rate": 0.0001106970598517463, "loss": 1.7105, "step": 64685 }, { "epoch": 2.15, "grad_norm": 0.6594997644424438, "learning_rate": 0.0001106889514774453, "loss": 1.6962, "step": 64686 }, { "epoch": 2.15, "grad_norm": 0.6169931888580322, "learning_rate": 0.00011068084333294007, "loss": 1.6567, "step": 64687 }, { "epoch": 2.15, "grad_norm": 0.6352435350418091, "learning_rate": 0.00011067273541824028, "loss": 1.8086, "step": 64688 }, { "epoch": 2.15, "grad_norm": 0.637525200843811, "learning_rate": 0.00011066462773335592, "loss": 1.7409, "step": 64689 }, { "epoch": 2.15, "grad_norm": 0.6874117255210876, "learning_rate": 0.00011065652027829697, "loss": 1.6999, "step": 64690 }, { "epoch": 2.15, "grad_norm": 0.6545160412788391, "learning_rate": 0.00011064841305307292, "loss": 1.7835, "step": 64691 }, { "epoch": 2.15, "grad_norm": 0.6519598960876465, "learning_rate": 0.00011064030605769378, "loss": 1.7165, "step": 64692 }, { "epoch": 2.15, "grad_norm": 0.6362059116363525, "learning_rate": 0.00011063219929216954, "loss": 1.7044, "step": 64693 }, { "epoch": 2.15, "grad_norm": 0.6329993605613708, "learning_rate": 0.0001106240927565099, "loss": 1.7181, "step": 64694 }, { "epoch": 2.15, "grad_norm": 0.6603808999061584, "learning_rate": 0.00011061598645072461, "loss": 1.6756, "step": 64695 }, { "epoch": 2.15, "grad_norm": 0.6404622793197632, "learning_rate": 0.00011060788037482359, "loss": 1.7607, "step": 64696 }, { "epoch": 2.15, "grad_norm": 0.6230103969573975, "learning_rate": 0.00011059977452881691, "loss": 1.7419, "step": 64697 }, { "epoch": 2.15, "grad_norm": 0.6752079725265503, "learning_rate": 0.000110591668912714, "loss": 1.6612, "step": 64698 }, { "epoch": 2.15, "grad_norm": 0.6470519304275513, "learning_rate": 0.00011058356352652486, "loss": 1.6355, "step": 64699 }, { "epoch": 2.15, "grad_norm": 0.6343635320663452, "learning_rate": 0.00011057545837025949, "loss": 1.6968, "step": 64700 }, { "epoch": 2.15, "grad_norm": 0.633741557598114, "learning_rate": 0.00011056735344392761, "loss": 1.7128, "step": 64701 }, { "epoch": 2.15, "grad_norm": 0.6355466246604919, "learning_rate": 0.00011055924874753896, "loss": 1.6562, "step": 64702 }, { "epoch": 2.15, "grad_norm": 0.6429546475410461, "learning_rate": 0.00011055114428110354, "loss": 1.6718, "step": 64703 }, { "epoch": 2.15, "grad_norm": 0.6559239029884338, "learning_rate": 0.00011054304004463104, "loss": 1.6949, "step": 64704 }, { "epoch": 2.15, "grad_norm": 0.6631672978401184, "learning_rate": 0.00011053493603813147, "loss": 1.7733, "step": 64705 }, { "epoch": 2.15, "grad_norm": 0.6274310350418091, "learning_rate": 0.00011052683226161448, "loss": 1.6983, "step": 64706 }, { "epoch": 2.15, "grad_norm": 0.6585783362388611, "learning_rate": 0.00011051872871509006, "loss": 1.6654, "step": 64707 }, { "epoch": 2.15, "grad_norm": 0.6338374018669128, "learning_rate": 0.00011051062539856804, "loss": 1.7426, "step": 64708 }, { "epoch": 2.15, "grad_norm": 0.6470146179199219, "learning_rate": 0.00011050252231205806, "loss": 1.6782, "step": 64709 }, { "epoch": 2.15, "grad_norm": 0.6558733582496643, "learning_rate": 0.00011049441945557022, "loss": 1.7844, "step": 64710 }, { "epoch": 2.15, "grad_norm": 0.6393581628799438, "learning_rate": 0.00011048631682911409, "loss": 1.7239, "step": 64711 }, { "epoch": 2.15, "grad_norm": 0.6315011978149414, "learning_rate": 0.0001104782144326998, "loss": 1.7152, "step": 64712 }, { "epoch": 2.15, "grad_norm": 0.6518221497535706, "learning_rate": 0.000110470112266337, "loss": 1.7096, "step": 64713 }, { "epoch": 2.15, "grad_norm": 0.6350388526916504, "learning_rate": 0.00011046201033003549, "loss": 1.7396, "step": 64714 }, { "epoch": 2.15, "grad_norm": 0.6309176087379456, "learning_rate": 0.00011045390862380523, "loss": 1.6852, "step": 64715 }, { "epoch": 2.15, "grad_norm": 0.6460835933685303, "learning_rate": 0.00011044580714765593, "loss": 1.7019, "step": 64716 }, { "epoch": 2.15, "grad_norm": 0.6437836289405823, "learning_rate": 0.00011043770590159758, "loss": 1.669, "step": 64717 }, { "epoch": 2.15, "grad_norm": 0.6298677325248718, "learning_rate": 0.00011042960488563982, "loss": 1.7154, "step": 64718 }, { "epoch": 2.15, "grad_norm": 0.6371027827262878, "learning_rate": 0.0001104215040997927, "loss": 1.6878, "step": 64719 }, { "epoch": 2.15, "grad_norm": 0.6226949095726013, "learning_rate": 0.00011041340354406594, "loss": 1.7106, "step": 64720 }, { "epoch": 2.15, "grad_norm": 0.6194712519645691, "learning_rate": 0.00011040530321846925, "loss": 1.7154, "step": 64721 }, { "epoch": 2.15, "grad_norm": 0.6198995113372803, "learning_rate": 0.0001103972031230127, "loss": 1.7921, "step": 64722 }, { "epoch": 2.15, "grad_norm": 0.6225258111953735, "learning_rate": 0.00011038910325770603, "loss": 1.6847, "step": 64723 }, { "epoch": 2.15, "grad_norm": 0.6430913209915161, "learning_rate": 0.00011038100362255892, "loss": 1.7191, "step": 64724 }, { "epoch": 2.15, "grad_norm": 0.6576118469238281, "learning_rate": 0.00011037290421758135, "loss": 1.76, "step": 64725 }, { "epoch": 2.15, "grad_norm": 0.6388260126113892, "learning_rate": 0.00011036480504278323, "loss": 1.6627, "step": 64726 }, { "epoch": 2.15, "grad_norm": 0.6434230208396912, "learning_rate": 0.0001103567060981743, "loss": 1.6846, "step": 64727 }, { "epoch": 2.15, "grad_norm": 0.6272926926612854, "learning_rate": 0.00011034860738376426, "loss": 1.664, "step": 64728 }, { "epoch": 2.15, "grad_norm": 0.6518182754516602, "learning_rate": 0.00011034050889956319, "loss": 1.7047, "step": 64729 }, { "epoch": 2.15, "grad_norm": 0.6531379222869873, "learning_rate": 0.00011033241064558081, "loss": 1.6965, "step": 64730 }, { "epoch": 2.15, "grad_norm": 0.6493589282035828, "learning_rate": 0.00011032431262182682, "loss": 1.7683, "step": 64731 }, { "epoch": 2.15, "grad_norm": 0.6414583325386047, "learning_rate": 0.00011031621482831118, "loss": 1.7434, "step": 64732 }, { "epoch": 2.15, "grad_norm": 0.6250656247138977, "learning_rate": 0.0001103081172650439, "loss": 1.6928, "step": 64733 }, { "epoch": 2.15, "grad_norm": 0.6488673090934753, "learning_rate": 0.00011030001993203439, "loss": 1.6777, "step": 64734 }, { "epoch": 2.15, "grad_norm": 0.6622880697250366, "learning_rate": 0.00011029192282929272, "loss": 1.6325, "step": 64735 }, { "epoch": 2.15, "grad_norm": 0.6313623785972595, "learning_rate": 0.00011028382595682883, "loss": 1.6455, "step": 64736 }, { "epoch": 2.15, "grad_norm": 0.6594017148017883, "learning_rate": 0.00011027572931465241, "loss": 1.6929, "step": 64737 }, { "epoch": 2.15, "grad_norm": 0.6409728527069092, "learning_rate": 0.00011026763290277318, "loss": 1.6529, "step": 64738 }, { "epoch": 2.15, "grad_norm": 0.6346012949943542, "learning_rate": 0.00011025953672120108, "loss": 1.7086, "step": 64739 }, { "epoch": 2.15, "grad_norm": 0.6495639085769653, "learning_rate": 0.00011025144076994619, "loss": 1.7156, "step": 64740 }, { "epoch": 2.15, "grad_norm": 0.6719672679901123, "learning_rate": 0.00011024334504901785, "loss": 1.6858, "step": 64741 }, { "epoch": 2.15, "grad_norm": 0.6648728251457214, "learning_rate": 0.00011023524955842616, "loss": 1.6964, "step": 64742 }, { "epoch": 2.15, "grad_norm": 0.6337031722068787, "learning_rate": 0.00011022715429818099, "loss": 1.6371, "step": 64743 }, { "epoch": 2.15, "grad_norm": 0.6668931245803833, "learning_rate": 0.00011021905926829211, "loss": 1.7195, "step": 64744 }, { "epoch": 2.15, "grad_norm": 0.639570415019989, "learning_rate": 0.00011021096446876923, "loss": 1.7102, "step": 64745 }, { "epoch": 2.15, "grad_norm": 0.6633663177490234, "learning_rate": 0.00011020286989962227, "loss": 1.7722, "step": 64746 }, { "epoch": 2.15, "grad_norm": 0.6375348567962646, "learning_rate": 0.00011019477556086127, "loss": 1.7293, "step": 64747 }, { "epoch": 2.15, "grad_norm": 0.6582533717155457, "learning_rate": 0.00011018668145249562, "loss": 1.7153, "step": 64748 }, { "epoch": 2.15, "grad_norm": 0.6328884363174438, "learning_rate": 0.00011017858757453539, "loss": 1.6394, "step": 64749 }, { "epoch": 2.15, "grad_norm": 0.6290428042411804, "learning_rate": 0.00011017049392699049, "loss": 1.6833, "step": 64750 }, { "epoch": 2.15, "grad_norm": 0.632004976272583, "learning_rate": 0.00011016240050987064, "loss": 1.6907, "step": 64751 }, { "epoch": 2.15, "grad_norm": 0.6800612807273865, "learning_rate": 0.00011015430732318554, "loss": 1.8168, "step": 64752 }, { "epoch": 2.15, "grad_norm": 0.6557099223136902, "learning_rate": 0.00011014621436694528, "loss": 1.716, "step": 64753 }, { "epoch": 2.15, "grad_norm": 0.643895149230957, "learning_rate": 0.00011013812164115952, "loss": 1.7249, "step": 64754 }, { "epoch": 2.15, "grad_norm": 0.6503926515579224, "learning_rate": 0.000110130029145838, "loss": 1.7218, "step": 64755 }, { "epoch": 2.15, "grad_norm": 0.6422532796859741, "learning_rate": 0.00011012193688099065, "loss": 1.7589, "step": 64756 }, { "epoch": 2.15, "grad_norm": 0.6271899938583374, "learning_rate": 0.0001101138448466274, "loss": 1.7235, "step": 64757 }, { "epoch": 2.15, "grad_norm": 0.640528678894043, "learning_rate": 0.00011010575304275797, "loss": 1.7352, "step": 64758 }, { "epoch": 2.15, "grad_norm": 0.6627888083457947, "learning_rate": 0.00011009766146939206, "loss": 1.7221, "step": 64759 }, { "epoch": 2.15, "grad_norm": 0.6579465270042419, "learning_rate": 0.0001100895701265397, "loss": 1.7665, "step": 64760 }, { "epoch": 2.15, "grad_norm": 0.6344079971313477, "learning_rate": 0.00011008147901421067, "loss": 1.6838, "step": 64761 }, { "epoch": 2.15, "grad_norm": 0.6521207690238953, "learning_rate": 0.00011007338813241458, "loss": 1.7142, "step": 64762 }, { "epoch": 2.15, "grad_norm": 0.6713144779205322, "learning_rate": 0.00011006529748116153, "loss": 1.6989, "step": 64763 }, { "epoch": 2.15, "grad_norm": 0.6423065662384033, "learning_rate": 0.00011005720706046115, "loss": 1.6498, "step": 64764 }, { "epoch": 2.15, "grad_norm": 0.618666410446167, "learning_rate": 0.00011004911687032341, "loss": 1.7136, "step": 64765 }, { "epoch": 2.15, "grad_norm": 0.6403965353965759, "learning_rate": 0.00011004102691075796, "loss": 1.6599, "step": 64766 }, { "epoch": 2.15, "grad_norm": 0.655486524105072, "learning_rate": 0.00011003293718177479, "loss": 1.6323, "step": 64767 }, { "epoch": 2.15, "grad_norm": 0.6266669034957886, "learning_rate": 0.00011002484768338368, "loss": 1.73, "step": 64768 }, { "epoch": 2.15, "grad_norm": 0.6697186231613159, "learning_rate": 0.0001100167584155943, "loss": 1.623, "step": 64769 }, { "epoch": 2.15, "grad_norm": 0.637234091758728, "learning_rate": 0.00011000866937841668, "loss": 1.7269, "step": 64770 }, { "epoch": 2.15, "grad_norm": 0.6609978079795837, "learning_rate": 0.00011000058057186045, "loss": 1.7248, "step": 64771 }, { "epoch": 2.15, "grad_norm": 0.6733059287071228, "learning_rate": 0.00010999249199593561, "loss": 1.6671, "step": 64772 }, { "epoch": 2.16, "grad_norm": 0.6246360540390015, "learning_rate": 0.0001099844036506519, "loss": 1.723, "step": 64773 }, { "epoch": 2.16, "grad_norm": 1.101712942123413, "learning_rate": 0.00010997631553601902, "loss": 1.7435, "step": 64774 }, { "epoch": 2.16, "grad_norm": 0.6514136791229248, "learning_rate": 0.00010996822765204701, "loss": 1.6986, "step": 64775 }, { "epoch": 2.16, "grad_norm": 0.6420896649360657, "learning_rate": 0.00010996013999874544, "loss": 1.7025, "step": 64776 }, { "epoch": 2.16, "grad_norm": 0.652875542640686, "learning_rate": 0.00010995205257612437, "loss": 1.7171, "step": 64777 }, { "epoch": 2.16, "grad_norm": 0.6746114492416382, "learning_rate": 0.00010994396538419342, "loss": 1.7181, "step": 64778 }, { "epoch": 2.16, "grad_norm": 0.6222919225692749, "learning_rate": 0.00010993587842296258, "loss": 1.732, "step": 64779 }, { "epoch": 2.16, "grad_norm": 0.7626917362213135, "learning_rate": 0.00010992779169244156, "loss": 1.7162, "step": 64780 }, { "epoch": 2.16, "grad_norm": 0.6432373523712158, "learning_rate": 0.00010991970519264011, "loss": 1.6161, "step": 64781 }, { "epoch": 2.16, "grad_norm": 0.6182140111923218, "learning_rate": 0.00010991161892356825, "loss": 1.7184, "step": 64782 }, { "epoch": 2.16, "grad_norm": 0.6462531685829163, "learning_rate": 0.00010990353288523566, "loss": 1.7027, "step": 64783 }, { "epoch": 2.16, "grad_norm": 0.633043646812439, "learning_rate": 0.00010989544707765207, "loss": 1.7938, "step": 64784 }, { "epoch": 2.16, "grad_norm": 0.6271365880966187, "learning_rate": 0.00010988736150082743, "loss": 1.7201, "step": 64785 }, { "epoch": 2.16, "grad_norm": 0.6420279145240784, "learning_rate": 0.00010987927615477159, "loss": 1.7039, "step": 64786 }, { "epoch": 2.16, "grad_norm": 0.6639403104782104, "learning_rate": 0.00010987119103949432, "loss": 1.7444, "step": 64787 }, { "epoch": 2.16, "grad_norm": 0.632000744342804, "learning_rate": 0.00010986310615500525, "loss": 1.6903, "step": 64788 }, { "epoch": 2.16, "grad_norm": 0.6617299914360046, "learning_rate": 0.00010985502150131451, "loss": 1.7477, "step": 64789 }, { "epoch": 2.16, "grad_norm": 0.6429959535598755, "learning_rate": 0.00010984693707843174, "loss": 1.6886, "step": 64790 }, { "epoch": 2.16, "grad_norm": 0.6726977229118347, "learning_rate": 0.00010983885288636666, "loss": 1.6617, "step": 64791 }, { "epoch": 2.16, "grad_norm": 0.6462862491607666, "learning_rate": 0.00010983076892512921, "loss": 1.6618, "step": 64792 }, { "epoch": 2.16, "grad_norm": 0.6629438996315002, "learning_rate": 0.00010982268519472939, "loss": 1.7811, "step": 64793 }, { "epoch": 2.16, "grad_norm": 0.622317373752594, "learning_rate": 0.0001098146016951766, "loss": 1.65, "step": 64794 }, { "epoch": 2.16, "grad_norm": 0.6524220108985901, "learning_rate": 0.00010980651842648087, "loss": 1.7183, "step": 64795 }, { "epoch": 2.16, "grad_norm": 0.6557086706161499, "learning_rate": 0.00010979843538865207, "loss": 1.7654, "step": 64796 }, { "epoch": 2.16, "grad_norm": 0.6504511833190918, "learning_rate": 0.00010979035258169999, "loss": 1.7081, "step": 64797 }, { "epoch": 2.16, "grad_norm": 0.6401693820953369, "learning_rate": 0.00010978227000563425, "loss": 1.6459, "step": 64798 }, { "epoch": 2.16, "grad_norm": 0.6955028772354126, "learning_rate": 0.00010977418766046485, "loss": 1.7167, "step": 64799 }, { "epoch": 2.16, "grad_norm": 0.6208294034004211, "learning_rate": 0.00010976610554620175, "loss": 1.6592, "step": 64800 }, { "epoch": 2.16, "grad_norm": 0.628459095954895, "learning_rate": 0.00010975802366285435, "loss": 1.7092, "step": 64801 }, { "epoch": 2.16, "grad_norm": 0.643621027469635, "learning_rate": 0.00010974994201043268, "loss": 1.673, "step": 64802 }, { "epoch": 2.16, "grad_norm": 0.6469494700431824, "learning_rate": 0.00010974186058894664, "loss": 1.7458, "step": 64803 }, { "epoch": 2.16, "grad_norm": 0.6370836496353149, "learning_rate": 0.00010973377939840596, "loss": 1.71, "step": 64804 }, { "epoch": 2.16, "grad_norm": 0.6441069841384888, "learning_rate": 0.00010972569843882034, "loss": 1.7057, "step": 64805 }, { "epoch": 2.16, "grad_norm": 0.6369728446006775, "learning_rate": 0.0001097176177101997, "loss": 1.6475, "step": 64806 }, { "epoch": 2.16, "grad_norm": 0.6638950705528259, "learning_rate": 0.00010970953721255404, "loss": 1.6801, "step": 64807 }, { "epoch": 2.16, "grad_norm": 0.6423776745796204, "learning_rate": 0.00010970145694589273, "loss": 1.6553, "step": 64808 }, { "epoch": 2.16, "grad_norm": 0.6409444808959961, "learning_rate": 0.00010969337691022582, "loss": 1.6644, "step": 64809 }, { "epoch": 2.16, "grad_norm": 0.6411526799201965, "learning_rate": 0.00010968529710556322, "loss": 1.6231, "step": 64810 }, { "epoch": 2.16, "grad_norm": 0.6235461235046387, "learning_rate": 0.00010967721753191463, "loss": 1.667, "step": 64811 }, { "epoch": 2.16, "grad_norm": 0.9448285698890686, "learning_rate": 0.00010966913818928975, "loss": 1.73, "step": 64812 }, { "epoch": 2.16, "grad_norm": 0.658235490322113, "learning_rate": 0.00010966105907769848, "loss": 1.715, "step": 64813 }, { "epoch": 2.16, "grad_norm": 0.64589923620224, "learning_rate": 0.00010965298019715086, "loss": 1.714, "step": 64814 }, { "epoch": 2.16, "grad_norm": 0.6413772702217102, "learning_rate": 0.00010964490154765625, "loss": 1.7248, "step": 64815 }, { "epoch": 2.16, "grad_norm": 0.6174655556678772, "learning_rate": 0.00010963682312922473, "loss": 1.6914, "step": 64816 }, { "epoch": 2.16, "grad_norm": 0.6215618252754211, "learning_rate": 0.00010962874494186612, "loss": 1.7144, "step": 64817 }, { "epoch": 2.16, "grad_norm": 0.6702437996864319, "learning_rate": 0.00010962066698559017, "loss": 1.727, "step": 64818 }, { "epoch": 2.16, "grad_norm": 0.644403874874115, "learning_rate": 0.00010961258926040659, "loss": 1.7712, "step": 64819 }, { "epoch": 2.16, "grad_norm": 0.660564661026001, "learning_rate": 0.00010960451176632535, "loss": 1.6874, "step": 64820 }, { "epoch": 2.16, "grad_norm": 0.6570870876312256, "learning_rate": 0.00010959643450335612, "loss": 1.6516, "step": 64821 }, { "epoch": 2.16, "grad_norm": 0.6453903913497925, "learning_rate": 0.00010958835747150882, "loss": 1.7577, "step": 64822 }, { "epoch": 2.16, "grad_norm": 0.6341419816017151, "learning_rate": 0.00010958028067079312, "loss": 1.6679, "step": 64823 }, { "epoch": 2.16, "grad_norm": 0.6553687453269958, "learning_rate": 0.00010957220410121899, "loss": 1.7758, "step": 64824 }, { "epoch": 2.16, "grad_norm": 0.6373105645179749, "learning_rate": 0.00010956412776279615, "loss": 1.674, "step": 64825 }, { "epoch": 2.16, "grad_norm": 0.6271929740905762, "learning_rate": 0.00010955605165553431, "loss": 1.6732, "step": 64826 }, { "epoch": 2.16, "grad_norm": 0.6443453431129456, "learning_rate": 0.00010954797577944348, "loss": 1.6282, "step": 64827 }, { "epoch": 2.16, "grad_norm": 0.6381743550300598, "learning_rate": 0.00010953990013453322, "loss": 1.7263, "step": 64828 }, { "epoch": 2.16, "grad_norm": 0.6373418569564819, "learning_rate": 0.00010953182472081354, "loss": 1.7243, "step": 64829 }, { "epoch": 2.16, "grad_norm": 0.6539525985717773, "learning_rate": 0.0001095237495382942, "loss": 1.6927, "step": 64830 }, { "epoch": 2.16, "grad_norm": 0.6303071975708008, "learning_rate": 0.00010951567458698482, "loss": 1.6919, "step": 64831 }, { "epoch": 2.16, "grad_norm": 0.6280308961868286, "learning_rate": 0.00010950759986689549, "loss": 1.6891, "step": 64832 }, { "epoch": 2.16, "grad_norm": 0.6444993615150452, "learning_rate": 0.00010949952537803573, "loss": 1.7497, "step": 64833 }, { "epoch": 2.16, "grad_norm": 0.6253268122673035, "learning_rate": 0.00010949145112041559, "loss": 1.656, "step": 64834 }, { "epoch": 2.16, "grad_norm": 0.6449616551399231, "learning_rate": 0.00010948337709404467, "loss": 1.7772, "step": 64835 }, { "epoch": 2.16, "grad_norm": 0.6278563141822815, "learning_rate": 0.00010947530329893296, "loss": 1.7103, "step": 64836 }, { "epoch": 2.16, "grad_norm": 0.6447813510894775, "learning_rate": 0.00010946722973509012, "loss": 1.6105, "step": 64837 }, { "epoch": 2.16, "grad_norm": 0.6314682960510254, "learning_rate": 0.00010945915640252591, "loss": 1.6534, "step": 64838 }, { "epoch": 2.16, "grad_norm": 0.6606996655464172, "learning_rate": 0.00010945108330125033, "loss": 1.6364, "step": 64839 }, { "epoch": 2.16, "grad_norm": 0.6244741678237915, "learning_rate": 0.00010944301043127306, "loss": 1.6739, "step": 64840 }, { "epoch": 2.16, "grad_norm": 0.6348716020584106, "learning_rate": 0.0001094349377926038, "loss": 1.7069, "step": 64841 }, { "epoch": 2.16, "grad_norm": 0.669830322265625, "learning_rate": 0.0001094268653852524, "loss": 1.7428, "step": 64842 }, { "epoch": 2.16, "grad_norm": 0.6699134707450867, "learning_rate": 0.00010941879320922885, "loss": 1.7484, "step": 64843 }, { "epoch": 2.16, "grad_norm": 0.6391950249671936, "learning_rate": 0.00010941072126454279, "loss": 1.6044, "step": 64844 }, { "epoch": 2.16, "grad_norm": 0.6418435573577881, "learning_rate": 0.00010940264955120394, "loss": 1.6959, "step": 64845 }, { "epoch": 2.16, "grad_norm": 0.6295208930969238, "learning_rate": 0.00010939457806922228, "loss": 1.7401, "step": 64846 }, { "epoch": 2.16, "grad_norm": 0.636044442653656, "learning_rate": 0.00010938650681860752, "loss": 1.7049, "step": 64847 }, { "epoch": 2.16, "grad_norm": 0.6412264704704285, "learning_rate": 0.00010937843579936935, "loss": 1.7673, "step": 64848 }, { "epoch": 2.16, "grad_norm": 0.6393185257911682, "learning_rate": 0.00010937036501151768, "loss": 1.6278, "step": 64849 }, { "epoch": 2.16, "grad_norm": 0.6344767808914185, "learning_rate": 0.00010936229445506252, "loss": 1.6794, "step": 64850 }, { "epoch": 2.16, "grad_norm": 0.6362974047660828, "learning_rate": 0.00010935422413001321, "loss": 1.7141, "step": 64851 }, { "epoch": 2.16, "grad_norm": 0.6468308568000793, "learning_rate": 0.0001093461540363798, "loss": 1.7275, "step": 64852 }, { "epoch": 2.16, "grad_norm": 0.6472748517990112, "learning_rate": 0.00010933808417417215, "loss": 1.6765, "step": 64853 }, { "epoch": 2.16, "grad_norm": 0.6178920865058899, "learning_rate": 0.00010933001454339998, "loss": 1.676, "step": 64854 }, { "epoch": 2.16, "grad_norm": 0.6526771187782288, "learning_rate": 0.00010932194514407298, "loss": 1.6981, "step": 64855 }, { "epoch": 2.16, "grad_norm": 0.6624688506126404, "learning_rate": 0.00010931387597620104, "loss": 1.7277, "step": 64856 }, { "epoch": 2.16, "grad_norm": 0.6202918887138367, "learning_rate": 0.00010930580703979416, "loss": 1.6774, "step": 64857 }, { "epoch": 2.16, "grad_norm": 0.6480621099472046, "learning_rate": 0.00010929773833486178, "loss": 1.7049, "step": 64858 }, { "epoch": 2.16, "grad_norm": 0.6355927586555481, "learning_rate": 0.00010928966986141378, "loss": 1.7258, "step": 64859 }, { "epoch": 2.16, "grad_norm": 0.6563752889633179, "learning_rate": 0.00010928160161946014, "loss": 1.6577, "step": 64860 }, { "epoch": 2.16, "grad_norm": 0.6436594128608704, "learning_rate": 0.00010927353360901054, "loss": 1.6596, "step": 64861 }, { "epoch": 2.16, "grad_norm": 0.6452718377113342, "learning_rate": 0.00010926546583007466, "loss": 1.7191, "step": 64862 }, { "epoch": 2.16, "grad_norm": 0.6565884947776794, "learning_rate": 0.00010925739828266241, "loss": 1.714, "step": 64863 }, { "epoch": 2.16, "grad_norm": 0.6659584641456604, "learning_rate": 0.00010924933096678379, "loss": 1.7121, "step": 64864 }, { "epoch": 2.16, "grad_norm": 0.6482638120651245, "learning_rate": 0.00010924126388244815, "loss": 1.6919, "step": 64865 }, { "epoch": 2.16, "grad_norm": 0.6256135106086731, "learning_rate": 0.00010923319702966552, "loss": 1.6573, "step": 64866 }, { "epoch": 2.16, "grad_norm": 0.6424244046211243, "learning_rate": 0.0001092251304084458, "loss": 1.6784, "step": 64867 }, { "epoch": 2.16, "grad_norm": 0.6635797023773193, "learning_rate": 0.00010921706401879863, "loss": 1.7099, "step": 64868 }, { "epoch": 2.16, "grad_norm": 0.6247556805610657, "learning_rate": 0.00010920899786073376, "loss": 1.6638, "step": 64869 }, { "epoch": 2.16, "grad_norm": 0.6602555513381958, "learning_rate": 0.00010920093193426111, "loss": 1.7004, "step": 64870 }, { "epoch": 2.16, "grad_norm": 0.6311010718345642, "learning_rate": 0.00010919286623939045, "loss": 1.6808, "step": 64871 }, { "epoch": 2.16, "grad_norm": 0.6384832859039307, "learning_rate": 0.00010918480077613146, "loss": 1.6618, "step": 64872 }, { "epoch": 2.16, "grad_norm": 0.6499321460723877, "learning_rate": 0.00010917673554449398, "loss": 1.7022, "step": 64873 }, { "epoch": 2.16, "grad_norm": 0.6314674615859985, "learning_rate": 0.00010916867054448792, "loss": 1.676, "step": 64874 }, { "epoch": 2.16, "grad_norm": 0.6402528882026672, "learning_rate": 0.00010916060577612295, "loss": 1.7166, "step": 64875 }, { "epoch": 2.16, "grad_norm": 0.6456084251403809, "learning_rate": 0.00010915254123940883, "loss": 1.7301, "step": 64876 }, { "epoch": 2.16, "grad_norm": 0.6219509243965149, "learning_rate": 0.00010914447693435549, "loss": 1.6812, "step": 64877 }, { "epoch": 2.16, "grad_norm": 0.6326143145561218, "learning_rate": 0.00010913641286097264, "loss": 1.6569, "step": 64878 }, { "epoch": 2.16, "grad_norm": 0.647529661655426, "learning_rate": 0.00010912834901926996, "loss": 1.7793, "step": 64879 }, { "epoch": 2.16, "grad_norm": 0.637450098991394, "learning_rate": 0.00010912028540925745, "loss": 1.7192, "step": 64880 }, { "epoch": 2.16, "grad_norm": 0.6346060037612915, "learning_rate": 0.00010911222203094468, "loss": 1.7216, "step": 64881 }, { "epoch": 2.16, "grad_norm": 0.6630461812019348, "learning_rate": 0.00010910415888434164, "loss": 1.7059, "step": 64882 }, { "epoch": 2.16, "grad_norm": 0.6581446528434753, "learning_rate": 0.00010909609596945793, "loss": 1.6737, "step": 64883 }, { "epoch": 2.16, "grad_norm": 0.64609295129776, "learning_rate": 0.00010908803328630353, "loss": 1.7708, "step": 64884 }, { "epoch": 2.16, "grad_norm": 0.6461005806922913, "learning_rate": 0.00010907997083488811, "loss": 1.6835, "step": 64885 }, { "epoch": 2.16, "grad_norm": 0.6647524833679199, "learning_rate": 0.0001090719086152214, "loss": 1.6682, "step": 64886 }, { "epoch": 2.16, "grad_norm": 0.6400347948074341, "learning_rate": 0.00010906384662731335, "loss": 1.6426, "step": 64887 }, { "epoch": 2.16, "grad_norm": 0.6427531242370605, "learning_rate": 0.00010905578487117355, "loss": 1.7594, "step": 64888 }, { "epoch": 2.16, "grad_norm": 0.6375173330307007, "learning_rate": 0.00010904772334681203, "loss": 1.6664, "step": 64889 }, { "epoch": 2.16, "grad_norm": 0.6481874585151672, "learning_rate": 0.0001090396620542384, "loss": 1.6454, "step": 64890 }, { "epoch": 2.16, "grad_norm": 0.6228539943695068, "learning_rate": 0.00010903160099346241, "loss": 1.6276, "step": 64891 }, { "epoch": 2.16, "grad_norm": 0.7101825475692749, "learning_rate": 0.00010902354016449399, "loss": 1.7829, "step": 64892 }, { "epoch": 2.16, "grad_norm": 0.6379523277282715, "learning_rate": 0.00010901547956734277, "loss": 1.7034, "step": 64893 }, { "epoch": 2.16, "grad_norm": 0.6491634845733643, "learning_rate": 0.00010900741920201871, "loss": 1.6715, "step": 64894 }, { "epoch": 2.16, "grad_norm": 0.6434823870658875, "learning_rate": 0.00010899935906853141, "loss": 1.7279, "step": 64895 }, { "epoch": 2.16, "grad_norm": 0.632969081401825, "learning_rate": 0.00010899129916689085, "loss": 1.7248, "step": 64896 }, { "epoch": 2.16, "grad_norm": 0.6338464021682739, "learning_rate": 0.00010898323949710667, "loss": 1.7574, "step": 64897 }, { "epoch": 2.16, "grad_norm": 0.6481916308403015, "learning_rate": 0.00010897518005918863, "loss": 1.7275, "step": 64898 }, { "epoch": 2.16, "grad_norm": 0.6448917388916016, "learning_rate": 0.00010896712085314668, "loss": 1.6551, "step": 64899 }, { "epoch": 2.16, "grad_norm": 0.6218259334564209, "learning_rate": 0.00010895906187899047, "loss": 1.7708, "step": 64900 }, { "epoch": 2.16, "grad_norm": 0.6605492234230042, "learning_rate": 0.00010895100313672973, "loss": 1.6516, "step": 64901 }, { "epoch": 2.16, "grad_norm": 0.6533161401748657, "learning_rate": 0.00010894294462637432, "loss": 1.7193, "step": 64902 }, { "epoch": 2.16, "grad_norm": 0.6629765033721924, "learning_rate": 0.00010893488634793412, "loss": 1.7015, "step": 64903 }, { "epoch": 2.16, "grad_norm": 0.6153842210769653, "learning_rate": 0.00010892682830141885, "loss": 1.6594, "step": 64904 }, { "epoch": 2.16, "grad_norm": 0.6706284284591675, "learning_rate": 0.0001089187704868381, "loss": 1.674, "step": 64905 }, { "epoch": 2.16, "grad_norm": 0.6391380429267883, "learning_rate": 0.00010891071290420193, "loss": 1.7063, "step": 64906 }, { "epoch": 2.16, "grad_norm": 0.6567094922065735, "learning_rate": 0.00010890265555352, "loss": 1.6867, "step": 64907 }, { "epoch": 2.16, "grad_norm": 0.6484729647636414, "learning_rate": 0.00010889459843480199, "loss": 1.7017, "step": 64908 }, { "epoch": 2.16, "grad_norm": 0.6264581084251404, "learning_rate": 0.0001088865415480578, "loss": 1.6523, "step": 64909 }, { "epoch": 2.16, "grad_norm": 0.6529916524887085, "learning_rate": 0.00010887848489329736, "loss": 1.702, "step": 64910 }, { "epoch": 2.16, "grad_norm": 0.644713819026947, "learning_rate": 0.00010887042847053009, "loss": 1.7293, "step": 64911 }, { "epoch": 2.16, "grad_norm": 0.6498860716819763, "learning_rate": 0.00010886237227976595, "loss": 1.6934, "step": 64912 }, { "epoch": 2.16, "grad_norm": 0.6498456001281738, "learning_rate": 0.00010885431632101483, "loss": 1.7085, "step": 64913 }, { "epoch": 2.16, "grad_norm": 0.6306748390197754, "learning_rate": 0.00010884626059428642, "loss": 1.7316, "step": 64914 }, { "epoch": 2.16, "grad_norm": 0.6626954674720764, "learning_rate": 0.00010883820509959039, "loss": 1.7371, "step": 64915 }, { "epoch": 2.16, "grad_norm": 0.6703392267227173, "learning_rate": 0.00010883014983693659, "loss": 1.7642, "step": 64916 }, { "epoch": 2.16, "grad_norm": 0.6422877907752991, "learning_rate": 0.00010882209480633502, "loss": 1.6749, "step": 64917 }, { "epoch": 2.16, "grad_norm": 0.6414510607719421, "learning_rate": 0.00010881404000779507, "loss": 1.7416, "step": 64918 }, { "epoch": 2.16, "grad_norm": 0.6376394033432007, "learning_rate": 0.00010880598544132671, "loss": 1.7286, "step": 64919 }, { "epoch": 2.16, "grad_norm": 0.6639971733093262, "learning_rate": 0.0001087979311069398, "loss": 1.6985, "step": 64920 }, { "epoch": 2.16, "grad_norm": 0.646259605884552, "learning_rate": 0.00010878987700464403, "loss": 1.7228, "step": 64921 }, { "epoch": 2.16, "grad_norm": 0.6413798332214355, "learning_rate": 0.00010878182313444907, "loss": 1.7694, "step": 64922 }, { "epoch": 2.16, "grad_norm": 0.6357279419898987, "learning_rate": 0.00010877376949636482, "loss": 1.6975, "step": 64923 }, { "epoch": 2.16, "grad_norm": 0.6186638474464417, "learning_rate": 0.00010876571609040124, "loss": 1.6688, "step": 64924 }, { "epoch": 2.16, "grad_norm": 0.6417349576950073, "learning_rate": 0.00010875766291656768, "loss": 1.6064, "step": 64925 }, { "epoch": 2.16, "grad_norm": 0.6654760241508484, "learning_rate": 0.00010874960997487417, "loss": 1.7711, "step": 64926 }, { "epoch": 2.16, "grad_norm": 0.6581688523292542, "learning_rate": 0.00010874155726533053, "loss": 1.6639, "step": 64927 }, { "epoch": 2.16, "grad_norm": 0.6385937333106995, "learning_rate": 0.00010873350478794647, "loss": 1.6796, "step": 64928 }, { "epoch": 2.16, "grad_norm": 0.6310034394264221, "learning_rate": 0.00010872545254273165, "loss": 1.6502, "step": 64929 }, { "epoch": 2.16, "grad_norm": 0.6671936511993408, "learning_rate": 0.00010871740052969593, "loss": 1.7472, "step": 64930 }, { "epoch": 2.16, "grad_norm": 0.6467107534408569, "learning_rate": 0.00010870934874884924, "loss": 1.6888, "step": 64931 }, { "epoch": 2.16, "grad_norm": 0.6357900500297546, "learning_rate": 0.0001087012972002012, "loss": 1.6843, "step": 64932 }, { "epoch": 2.16, "grad_norm": 0.6265302300453186, "learning_rate": 0.00010869324588376148, "loss": 1.6455, "step": 64933 }, { "epoch": 2.16, "grad_norm": 0.6447551846504211, "learning_rate": 0.00010868519479954007, "loss": 1.6099, "step": 64934 }, { "epoch": 2.16, "grad_norm": 0.6477961540222168, "learning_rate": 0.00010867714394754668, "loss": 1.7674, "step": 64935 }, { "epoch": 2.16, "grad_norm": 0.6578699946403503, "learning_rate": 0.0001086690933277909, "loss": 1.721, "step": 64936 }, { "epoch": 2.16, "grad_norm": 0.6537878513336182, "learning_rate": 0.00010866104294028278, "loss": 1.7468, "step": 64937 }, { "epoch": 2.16, "grad_norm": 0.6275776624679565, "learning_rate": 0.00010865299278503188, "loss": 1.6702, "step": 64938 }, { "epoch": 2.16, "grad_norm": 0.6332516670227051, "learning_rate": 0.0001086449428620481, "loss": 1.6517, "step": 64939 }, { "epoch": 2.16, "grad_norm": 0.6482363939285278, "learning_rate": 0.00010863689317134111, "loss": 1.6983, "step": 64940 }, { "epoch": 2.16, "grad_norm": 0.6521512269973755, "learning_rate": 0.00010862884371292084, "loss": 1.7479, "step": 64941 }, { "epoch": 2.16, "grad_norm": 0.6620273590087891, "learning_rate": 0.00010862079448679691, "loss": 1.7304, "step": 64942 }, { "epoch": 2.16, "grad_norm": 0.6381518840789795, "learning_rate": 0.00010861274549297909, "loss": 1.6697, "step": 64943 }, { "epoch": 2.16, "grad_norm": 0.6416782736778259, "learning_rate": 0.00010860469673147726, "loss": 1.6522, "step": 64944 }, { "epoch": 2.16, "grad_norm": 0.6261588335037231, "learning_rate": 0.00010859664820230103, "loss": 1.6756, "step": 64945 }, { "epoch": 2.16, "grad_norm": 0.6615686416625977, "learning_rate": 0.00010858859990546039, "loss": 1.7506, "step": 64946 }, { "epoch": 2.16, "grad_norm": 0.6303713917732239, "learning_rate": 0.00010858055184096494, "loss": 1.6939, "step": 64947 }, { "epoch": 2.16, "grad_norm": 0.6424956321716309, "learning_rate": 0.00010857250400882445, "loss": 1.7132, "step": 64948 }, { "epoch": 2.16, "grad_norm": 0.6310818195343018, "learning_rate": 0.0001085644564090488, "loss": 1.6396, "step": 64949 }, { "epoch": 2.16, "grad_norm": 0.6370071768760681, "learning_rate": 0.00010855640904164762, "loss": 1.7534, "step": 64950 }, { "epoch": 2.16, "grad_norm": 0.6382655501365662, "learning_rate": 0.00010854836190663085, "loss": 1.7025, "step": 64951 }, { "epoch": 2.16, "grad_norm": 0.6518677473068237, "learning_rate": 0.00010854031500400804, "loss": 1.6642, "step": 64952 }, { "epoch": 2.16, "grad_norm": 0.6387780904769897, "learning_rate": 0.00010853226833378916, "loss": 1.724, "step": 64953 }, { "epoch": 2.16, "grad_norm": 0.6775373220443726, "learning_rate": 0.00010852422189598395, "loss": 1.6959, "step": 64954 }, { "epoch": 2.16, "grad_norm": 0.6500796675682068, "learning_rate": 0.00010851617569060198, "loss": 1.6109, "step": 64955 }, { "epoch": 2.16, "grad_norm": 0.6495608687400818, "learning_rate": 0.00010850812971765327, "loss": 1.7152, "step": 64956 }, { "epoch": 2.16, "grad_norm": 0.647040605545044, "learning_rate": 0.00010850008397714747, "loss": 1.6922, "step": 64957 }, { "epoch": 2.16, "grad_norm": 0.6740280389785767, "learning_rate": 0.00010849203846909426, "loss": 1.706, "step": 64958 }, { "epoch": 2.16, "grad_norm": 0.6715750098228455, "learning_rate": 0.0001084839931935035, "loss": 1.7359, "step": 64959 }, { "epoch": 2.16, "grad_norm": 0.6724088191986084, "learning_rate": 0.00010847594815038504, "loss": 1.6992, "step": 64960 }, { "epoch": 2.16, "grad_norm": 0.6407166123390198, "learning_rate": 0.00010846790333974855, "loss": 1.6592, "step": 64961 }, { "epoch": 2.16, "grad_norm": 0.666800320148468, "learning_rate": 0.00010845985876160372, "loss": 1.7964, "step": 64962 }, { "epoch": 2.16, "grad_norm": 0.6134241819381714, "learning_rate": 0.00010845181441596052, "loss": 1.674, "step": 64963 }, { "epoch": 2.16, "grad_norm": 0.6281288266181946, "learning_rate": 0.00010844377030282858, "loss": 1.6745, "step": 64964 }, { "epoch": 2.16, "grad_norm": 0.6762239336967468, "learning_rate": 0.00010843572642221755, "loss": 1.7235, "step": 64965 }, { "epoch": 2.16, "grad_norm": 0.6419856548309326, "learning_rate": 0.00010842768277413734, "loss": 1.6682, "step": 64966 }, { "epoch": 2.16, "grad_norm": 0.633927047252655, "learning_rate": 0.00010841963935859793, "loss": 1.6961, "step": 64967 }, { "epoch": 2.16, "grad_norm": 0.6552302837371826, "learning_rate": 0.00010841159617560863, "loss": 1.6906, "step": 64968 }, { "epoch": 2.16, "grad_norm": 0.6353012323379517, "learning_rate": 0.0001084035532251794, "loss": 1.8053, "step": 64969 }, { "epoch": 2.16, "grad_norm": 0.6410965919494629, "learning_rate": 0.00010839551050732013, "loss": 1.6943, "step": 64970 }, { "epoch": 2.16, "grad_norm": 0.655889093875885, "learning_rate": 0.00010838746802204052, "loss": 1.7014, "step": 64971 }, { "epoch": 2.16, "grad_norm": 0.7082205414772034, "learning_rate": 0.00010837942576935014, "loss": 1.7044, "step": 64972 }, { "epoch": 2.16, "grad_norm": 0.6464101076126099, "learning_rate": 0.00010837138374925893, "loss": 1.681, "step": 64973 }, { "epoch": 2.16, "grad_norm": 0.649191677570343, "learning_rate": 0.00010836334196177683, "loss": 1.716, "step": 64974 }, { "epoch": 2.16, "grad_norm": 0.6439705491065979, "learning_rate": 0.00010835530040691316, "loss": 1.7625, "step": 64975 }, { "epoch": 2.16, "grad_norm": 0.6386271119117737, "learning_rate": 0.00010834725908467794, "loss": 1.746, "step": 64976 }, { "epoch": 2.16, "grad_norm": 0.6370332837104797, "learning_rate": 0.00010833921799508103, "loss": 1.6641, "step": 64977 }, { "epoch": 2.16, "grad_norm": 0.6247832179069519, "learning_rate": 0.00010833117713813205, "loss": 1.6784, "step": 64978 }, { "epoch": 2.16, "grad_norm": 0.6719650030136108, "learning_rate": 0.00010832313651384066, "loss": 1.7182, "step": 64979 }, { "epoch": 2.16, "grad_norm": 0.630782425403595, "learning_rate": 0.00010831509612221678, "loss": 1.7084, "step": 64980 }, { "epoch": 2.16, "grad_norm": 0.6223143339157104, "learning_rate": 0.00010830705596327032, "loss": 1.7435, "step": 64981 }, { "epoch": 2.16, "grad_norm": 0.704862117767334, "learning_rate": 0.00010829901603701063, "loss": 1.6726, "step": 64982 }, { "epoch": 2.16, "grad_norm": 0.6562325358390808, "learning_rate": 0.00010829097634344771, "loss": 1.7011, "step": 64983 }, { "epoch": 2.16, "grad_norm": 0.6385786533355713, "learning_rate": 0.00010828293688259138, "loss": 1.7726, "step": 64984 }, { "epoch": 2.16, "grad_norm": 0.6297286152839661, "learning_rate": 0.00010827489765445131, "loss": 1.7032, "step": 64985 }, { "epoch": 2.16, "grad_norm": 0.6568287014961243, "learning_rate": 0.00010826685865903721, "loss": 1.6279, "step": 64986 }, { "epoch": 2.16, "grad_norm": 0.6451336145401001, "learning_rate": 0.00010825881989635896, "loss": 1.7314, "step": 64987 }, { "epoch": 2.16, "grad_norm": 0.8384917378425598, "learning_rate": 0.00010825078136642625, "loss": 1.7435, "step": 64988 }, { "epoch": 2.16, "grad_norm": 0.6371577978134155, "learning_rate": 0.00010824274306924874, "loss": 1.7245, "step": 64989 }, { "epoch": 2.16, "grad_norm": 0.6322133541107178, "learning_rate": 0.00010823470500483627, "loss": 1.7056, "step": 64990 }, { "epoch": 2.16, "grad_norm": 0.654037356376648, "learning_rate": 0.00010822666717319872, "loss": 1.6991, "step": 64991 }, { "epoch": 2.16, "grad_norm": 0.6525024771690369, "learning_rate": 0.00010821862957434577, "loss": 1.6801, "step": 64992 }, { "epoch": 2.16, "grad_norm": 0.6347719430923462, "learning_rate": 0.000108210592208287, "loss": 1.7219, "step": 64993 }, { "epoch": 2.16, "grad_norm": 0.6725878119468689, "learning_rate": 0.00010820255507503245, "loss": 1.7468, "step": 64994 }, { "epoch": 2.16, "grad_norm": 0.636091947555542, "learning_rate": 0.00010819451817459172, "loss": 1.7298, "step": 64995 }, { "epoch": 2.16, "grad_norm": 0.6355144381523132, "learning_rate": 0.0001081864815069745, "loss": 1.7382, "step": 64996 }, { "epoch": 2.16, "grad_norm": 0.6365163922309875, "learning_rate": 0.00010817844507219072, "loss": 1.656, "step": 64997 }, { "epoch": 2.16, "grad_norm": 0.6544215083122253, "learning_rate": 0.00010817040887024996, "loss": 1.7412, "step": 64998 }, { "epoch": 2.16, "grad_norm": 0.6395999193191528, "learning_rate": 0.00010816237290116214, "loss": 1.6476, "step": 64999 }, { "epoch": 2.16, "grad_norm": 0.6489384770393372, "learning_rate": 0.00010815433716493683, "loss": 1.7009, "step": 65000 }, { "epoch": 2.16, "grad_norm": 0.6724028587341309, "learning_rate": 0.000108146301661584, "loss": 1.7188, "step": 65001 }, { "epoch": 2.16, "grad_norm": 0.6372383832931519, "learning_rate": 0.00010813826639111326, "loss": 1.7556, "step": 65002 }, { "epoch": 2.16, "grad_norm": 0.6286243796348572, "learning_rate": 0.00010813023135353434, "loss": 1.5962, "step": 65003 }, { "epoch": 2.16, "grad_norm": 0.6276435852050781, "learning_rate": 0.00010812219654885711, "loss": 1.708, "step": 65004 }, { "epoch": 2.16, "grad_norm": 0.639366626739502, "learning_rate": 0.00010811416197709118, "loss": 1.7363, "step": 65005 }, { "epoch": 2.16, "grad_norm": 0.656535267829895, "learning_rate": 0.00010810612763824649, "loss": 1.6671, "step": 65006 }, { "epoch": 2.16, "grad_norm": 0.7090985178947449, "learning_rate": 0.00010809809353233267, "loss": 1.7287, "step": 65007 }, { "epoch": 2.16, "grad_norm": 0.6309010982513428, "learning_rate": 0.00010809005965935939, "loss": 1.7093, "step": 65008 }, { "epoch": 2.16, "grad_norm": 0.6345393061637878, "learning_rate": 0.00010808202601933664, "loss": 1.6416, "step": 65009 }, { "epoch": 2.16, "grad_norm": 0.6564882397651672, "learning_rate": 0.00010807399261227388, "loss": 1.698, "step": 65010 }, { "epoch": 2.16, "grad_norm": 0.6514339447021484, "learning_rate": 0.00010806595943818115, "loss": 1.6555, "step": 65011 }, { "epoch": 2.16, "grad_norm": 0.6313648223876953, "learning_rate": 0.00010805792649706796, "loss": 1.6703, "step": 65012 }, { "epoch": 2.16, "grad_norm": 0.6524590849876404, "learning_rate": 0.00010804989378894423, "loss": 1.7437, "step": 65013 }, { "epoch": 2.16, "grad_norm": 0.6344342231750488, "learning_rate": 0.00010804186131381968, "loss": 1.6388, "step": 65014 }, { "epoch": 2.16, "grad_norm": 0.6510899066925049, "learning_rate": 0.00010803382907170392, "loss": 1.7886, "step": 65015 }, { "epoch": 2.16, "grad_norm": 0.6378697752952576, "learning_rate": 0.00010802579706260694, "loss": 1.6237, "step": 65016 }, { "epoch": 2.16, "grad_norm": 0.6498932242393494, "learning_rate": 0.0001080177652865383, "loss": 1.695, "step": 65017 }, { "epoch": 2.16, "grad_norm": 0.6546392440795898, "learning_rate": 0.00010800973374350773, "loss": 1.7268, "step": 65018 }, { "epoch": 2.16, "grad_norm": 0.6379793286323547, "learning_rate": 0.00010800170243352504, "loss": 1.6885, "step": 65019 }, { "epoch": 2.16, "grad_norm": 0.6212782859802246, "learning_rate": 0.00010799367135660012, "loss": 1.686, "step": 65020 }, { "epoch": 2.16, "grad_norm": 0.6127384305000305, "learning_rate": 0.0001079856405127426, "loss": 1.7021, "step": 65021 }, { "epoch": 2.16, "grad_norm": 0.6579448580741882, "learning_rate": 0.00010797760990196206, "loss": 1.6499, "step": 65022 }, { "epoch": 2.16, "grad_norm": 0.6394723653793335, "learning_rate": 0.00010796957952426856, "loss": 1.7544, "step": 65023 }, { "epoch": 2.16, "grad_norm": 0.6767576336860657, "learning_rate": 0.00010796154937967168, "loss": 1.6718, "step": 65024 }, { "epoch": 2.16, "grad_norm": 0.6363019943237305, "learning_rate": 0.00010795351946818112, "loss": 1.6553, "step": 65025 }, { "epoch": 2.16, "grad_norm": 0.6211232542991638, "learning_rate": 0.00010794548978980664, "loss": 1.7389, "step": 65026 }, { "epoch": 2.16, "grad_norm": 0.6514908671379089, "learning_rate": 0.00010793746034455824, "loss": 1.6823, "step": 65027 }, { "epoch": 2.16, "grad_norm": 0.631738007068634, "learning_rate": 0.0001079294311324453, "loss": 1.715, "step": 65028 }, { "epoch": 2.16, "grad_norm": 0.672024130821228, "learning_rate": 0.00010792140215347773, "loss": 1.6314, "step": 65029 }, { "epoch": 2.16, "grad_norm": 0.6522988080978394, "learning_rate": 0.00010791337340766534, "loss": 1.6696, "step": 65030 }, { "epoch": 2.16, "grad_norm": 0.6464220881462097, "learning_rate": 0.00010790534489501784, "loss": 1.7467, "step": 65031 }, { "epoch": 2.16, "grad_norm": 0.6574890613555908, "learning_rate": 0.00010789731661554483, "loss": 1.6436, "step": 65032 }, { "epoch": 2.16, "grad_norm": 0.614600658416748, "learning_rate": 0.00010788928856925619, "loss": 1.7053, "step": 65033 }, { "epoch": 2.16, "grad_norm": 0.6512948274612427, "learning_rate": 0.00010788126075616184, "loss": 1.738, "step": 65034 }, { "epoch": 2.16, "grad_norm": 0.6526303887367249, "learning_rate": 0.00010787323317627111, "loss": 1.7133, "step": 65035 }, { "epoch": 2.16, "grad_norm": 0.6493434906005859, "learning_rate": 0.00010786520582959397, "loss": 1.6897, "step": 65036 }, { "epoch": 2.16, "grad_norm": 0.68634432554245, "learning_rate": 0.0001078571787161403, "loss": 1.8015, "step": 65037 }, { "epoch": 2.16, "grad_norm": 0.6474424600601196, "learning_rate": 0.00010784915183591967, "loss": 1.7676, "step": 65038 }, { "epoch": 2.16, "grad_norm": 0.6444979310035706, "learning_rate": 0.00010784112518894176, "loss": 1.6826, "step": 65039 }, { "epoch": 2.16, "grad_norm": 0.6348983645439148, "learning_rate": 0.00010783309877521644, "loss": 1.7127, "step": 65040 }, { "epoch": 2.16, "grad_norm": 0.636569082736969, "learning_rate": 0.00010782507259475358, "loss": 1.7501, "step": 65041 }, { "epoch": 2.16, "grad_norm": 0.6423043012619019, "learning_rate": 0.0001078170466475626, "loss": 1.6476, "step": 65042 }, { "epoch": 2.16, "grad_norm": 0.6488098502159119, "learning_rate": 0.00010780902093365338, "loss": 1.7113, "step": 65043 }, { "epoch": 2.16, "grad_norm": 0.6650853157043457, "learning_rate": 0.00010780099545303582, "loss": 1.7749, "step": 65044 }, { "epoch": 2.16, "grad_norm": 0.6473982930183411, "learning_rate": 0.00010779297020571952, "loss": 1.7785, "step": 65045 }, { "epoch": 2.16, "grad_norm": 0.6419059038162231, "learning_rate": 0.00010778494519171413, "loss": 1.7493, "step": 65046 }, { "epoch": 2.16, "grad_norm": 0.6558548212051392, "learning_rate": 0.00010777692041102961, "loss": 1.7013, "step": 65047 }, { "epoch": 2.16, "grad_norm": 0.6350533366203308, "learning_rate": 0.00010776889586367548, "loss": 1.7039, "step": 65048 }, { "epoch": 2.16, "grad_norm": 0.6498600840568542, "learning_rate": 0.00010776087154966165, "loss": 1.7195, "step": 65049 }, { "epoch": 2.16, "grad_norm": 0.6743763089179993, "learning_rate": 0.00010775284746899774, "loss": 1.7201, "step": 65050 }, { "epoch": 2.16, "grad_norm": 0.6539546251296997, "learning_rate": 0.00010774482362169362, "loss": 1.6586, "step": 65051 }, { "epoch": 2.16, "grad_norm": 0.6398411989212036, "learning_rate": 0.000107736800007759, "loss": 1.6855, "step": 65052 }, { "epoch": 2.16, "grad_norm": 0.6436398029327393, "learning_rate": 0.00010772877662720345, "loss": 1.7201, "step": 65053 }, { "epoch": 2.16, "grad_norm": 0.6363444924354553, "learning_rate": 0.00010772075348003691, "loss": 1.6324, "step": 65054 }, { "epoch": 2.16, "grad_norm": 0.6270835399627686, "learning_rate": 0.00010771273056626898, "loss": 1.69, "step": 65055 }, { "epoch": 2.16, "grad_norm": 0.6546576619148254, "learning_rate": 0.00010770470788590954, "loss": 1.7455, "step": 65056 }, { "epoch": 2.16, "grad_norm": 0.6442671418190002, "learning_rate": 0.00010769668543896818, "loss": 1.7613, "step": 65057 }, { "epoch": 2.16, "grad_norm": 0.6590513586997986, "learning_rate": 0.00010768866322545477, "loss": 1.7307, "step": 65058 }, { "epoch": 2.16, "grad_norm": 0.6393681168556213, "learning_rate": 0.00010768064124537902, "loss": 1.6581, "step": 65059 }, { "epoch": 2.16, "grad_norm": 0.6377359628677368, "learning_rate": 0.0001076726194987505, "loss": 1.6938, "step": 65060 }, { "epoch": 2.16, "grad_norm": 0.6544098854064941, "learning_rate": 0.0001076645979855792, "loss": 1.6817, "step": 65061 }, { "epoch": 2.16, "grad_norm": 0.6500393748283386, "learning_rate": 0.00010765657670587465, "loss": 1.703, "step": 65062 }, { "epoch": 2.16, "grad_norm": 0.6433366537094116, "learning_rate": 0.00010764855565964678, "loss": 1.7674, "step": 65063 }, { "epoch": 2.16, "grad_norm": 0.6284173727035522, "learning_rate": 0.00010764053484690523, "loss": 1.7208, "step": 65064 }, { "epoch": 2.16, "grad_norm": 0.6383954882621765, "learning_rate": 0.0001076325142676596, "loss": 1.7142, "step": 65065 }, { "epoch": 2.16, "grad_norm": 0.6344938278198242, "learning_rate": 0.00010762449392191986, "loss": 1.7363, "step": 65066 }, { "epoch": 2.16, "grad_norm": 0.6322655081748962, "learning_rate": 0.00010761647380969552, "loss": 1.6222, "step": 65067 }, { "epoch": 2.16, "grad_norm": 0.6612281203269958, "learning_rate": 0.00010760845393099657, "loss": 1.7334, "step": 65068 }, { "epoch": 2.16, "grad_norm": 0.6462553143501282, "learning_rate": 0.00010760043428583249, "loss": 1.7318, "step": 65069 }, { "epoch": 2.16, "grad_norm": 0.6264997124671936, "learning_rate": 0.00010759241487421323, "loss": 1.7212, "step": 65070 }, { "epoch": 2.16, "grad_norm": 0.6517712473869324, "learning_rate": 0.00010758439569614843, "loss": 1.7346, "step": 65071 }, { "epoch": 2.16, "grad_norm": 0.6463004946708679, "learning_rate": 0.00010757637675164776, "loss": 1.7075, "step": 65072 }, { "epoch": 2.16, "grad_norm": 0.6514096856117249, "learning_rate": 0.00010756835804072106, "loss": 1.6734, "step": 65073 }, { "epoch": 2.17, "grad_norm": 0.6337398290634155, "learning_rate": 0.00010756033956337805, "loss": 1.7752, "step": 65074 }, { "epoch": 2.17, "grad_norm": 0.6647716760635376, "learning_rate": 0.00010755232131962832, "loss": 1.7198, "step": 65075 }, { "epoch": 2.17, "grad_norm": 0.6665955781936646, "learning_rate": 0.00010754430330948172, "loss": 1.6977, "step": 65076 }, { "epoch": 2.17, "grad_norm": 0.628684937953949, "learning_rate": 0.0001075362855329481, "loss": 1.6487, "step": 65077 }, { "epoch": 2.17, "grad_norm": 0.622769832611084, "learning_rate": 0.00010752826799003705, "loss": 1.7052, "step": 65078 }, { "epoch": 2.17, "grad_norm": 0.6440925002098083, "learning_rate": 0.00010752025068075823, "loss": 1.6946, "step": 65079 }, { "epoch": 2.17, "grad_norm": 0.6355393528938293, "learning_rate": 0.00010751223360512157, "loss": 1.6946, "step": 65080 }, { "epoch": 2.17, "grad_norm": 0.6292645931243896, "learning_rate": 0.00010750421676313668, "loss": 1.713, "step": 65081 }, { "epoch": 2.17, "grad_norm": 0.6566026210784912, "learning_rate": 0.0001074962001548132, "loss": 1.7281, "step": 65082 }, { "epoch": 2.17, "grad_norm": 0.6237800121307373, "learning_rate": 0.00010748818378016102, "loss": 1.7262, "step": 65083 }, { "epoch": 2.17, "grad_norm": 0.8026023507118225, "learning_rate": 0.00010748016763918997, "loss": 1.6798, "step": 65084 }, { "epoch": 2.17, "grad_norm": 0.6577298641204834, "learning_rate": 0.00010747215173190946, "loss": 1.7891, "step": 65085 }, { "epoch": 2.17, "grad_norm": 0.6679889559745789, "learning_rate": 0.00010746413605832937, "loss": 1.7476, "step": 65086 }, { "epoch": 2.17, "grad_norm": 0.6310563087463379, "learning_rate": 0.00010745612061845954, "loss": 1.6083, "step": 65087 }, { "epoch": 2.17, "grad_norm": 0.6487143039703369, "learning_rate": 0.00010744810541230965, "loss": 1.6581, "step": 65088 }, { "epoch": 2.17, "grad_norm": 0.6911807656288147, "learning_rate": 0.00010744009043988926, "loss": 1.5835, "step": 65089 }, { "epoch": 2.17, "grad_norm": 0.6525706648826599, "learning_rate": 0.00010743207570120823, "loss": 1.7141, "step": 65090 }, { "epoch": 2.17, "grad_norm": 0.6473953723907471, "learning_rate": 0.00010742406119627649, "loss": 1.7431, "step": 65091 }, { "epoch": 2.17, "grad_norm": 0.6231892108917236, "learning_rate": 0.00010741604692510336, "loss": 1.6823, "step": 65092 }, { "epoch": 2.17, "grad_norm": 0.6385924220085144, "learning_rate": 0.00010740803288769878, "loss": 1.6975, "step": 65093 }, { "epoch": 2.17, "grad_norm": 0.6443174481391907, "learning_rate": 0.00010740001908407257, "loss": 1.7095, "step": 65094 }, { "epoch": 2.17, "grad_norm": 0.6574185490608215, "learning_rate": 0.00010739200551423434, "loss": 1.7172, "step": 65095 }, { "epoch": 2.17, "grad_norm": 0.6364244222640991, "learning_rate": 0.00010738399217819374, "loss": 1.6247, "step": 65096 }, { "epoch": 2.17, "grad_norm": 0.6490427851676941, "learning_rate": 0.00010737597907596063, "loss": 1.7276, "step": 65097 }, { "epoch": 2.17, "grad_norm": 0.6564835906028748, "learning_rate": 0.00010736796620754487, "loss": 1.7804, "step": 65098 }, { "epoch": 2.17, "grad_norm": 0.6339844465255737, "learning_rate": 0.00010735995357295581, "loss": 1.6912, "step": 65099 }, { "epoch": 2.17, "grad_norm": 0.6353603005409241, "learning_rate": 0.0001073519411722034, "loss": 1.7463, "step": 65100 }, { "epoch": 2.17, "grad_norm": 0.6347240805625916, "learning_rate": 0.00010734392900529743, "loss": 1.7055, "step": 65101 }, { "epoch": 2.17, "grad_norm": 0.6482712030410767, "learning_rate": 0.00010733591707224757, "loss": 1.6751, "step": 65102 }, { "epoch": 2.17, "grad_norm": 0.658149778842926, "learning_rate": 0.0001073279053730634, "loss": 1.7383, "step": 65103 }, { "epoch": 2.17, "grad_norm": 0.6215232014656067, "learning_rate": 0.00010731989390775487, "loss": 1.6526, "step": 65104 }, { "epoch": 2.17, "grad_norm": 0.6665868759155273, "learning_rate": 0.00010731188267633159, "loss": 1.6934, "step": 65105 }, { "epoch": 2.17, "grad_norm": 0.6617581248283386, "learning_rate": 0.0001073038716788032, "loss": 1.6584, "step": 65106 }, { "epoch": 2.17, "grad_norm": 0.6382997632026672, "learning_rate": 0.00010729586091517951, "loss": 1.7087, "step": 65107 }, { "epoch": 2.17, "grad_norm": 0.6815709471702576, "learning_rate": 0.00010728785038547035, "loss": 1.693, "step": 65108 }, { "epoch": 2.17, "grad_norm": 0.6462951302528381, "learning_rate": 0.00010727984008968537, "loss": 1.7234, "step": 65109 }, { "epoch": 2.17, "grad_norm": 0.6349969506263733, "learning_rate": 0.00010727183002783414, "loss": 1.6897, "step": 65110 }, { "epoch": 2.17, "grad_norm": 0.6489465236663818, "learning_rate": 0.00010726382019992663, "loss": 1.6552, "step": 65111 }, { "epoch": 2.17, "grad_norm": 0.6364222168922424, "learning_rate": 0.00010725581060597244, "loss": 1.7023, "step": 65112 }, { "epoch": 2.17, "grad_norm": 0.6235946416854858, "learning_rate": 0.00010724780124598118, "loss": 1.7242, "step": 65113 }, { "epoch": 2.17, "grad_norm": 0.6490333676338196, "learning_rate": 0.00010723979211996282, "loss": 1.6759, "step": 65114 }, { "epoch": 2.17, "grad_norm": 0.6506946086883545, "learning_rate": 0.00010723178322792683, "loss": 1.6947, "step": 65115 }, { "epoch": 2.17, "grad_norm": 0.6403470039367676, "learning_rate": 0.00010722377456988315, "loss": 1.64, "step": 65116 }, { "epoch": 2.17, "grad_norm": 0.6187722682952881, "learning_rate": 0.0001072157661458413, "loss": 1.6471, "step": 65117 }, { "epoch": 2.17, "grad_norm": 0.6579684615135193, "learning_rate": 0.00010720775795581124, "loss": 1.7707, "step": 65118 }, { "epoch": 2.17, "grad_norm": 0.6564855575561523, "learning_rate": 0.0001071997499998025, "loss": 1.7725, "step": 65119 }, { "epoch": 2.17, "grad_norm": 1.307591438293457, "learning_rate": 0.00010719174227782479, "loss": 1.7709, "step": 65120 }, { "epoch": 2.17, "grad_norm": 0.6397584080696106, "learning_rate": 0.000107183734789888, "loss": 1.7656, "step": 65121 }, { "epoch": 2.17, "grad_norm": 0.6247067451477051, "learning_rate": 0.00010717572753600162, "loss": 1.6758, "step": 65122 }, { "epoch": 2.17, "grad_norm": 0.6622755527496338, "learning_rate": 0.00010716772051617561, "loss": 1.698, "step": 65123 }, { "epoch": 2.17, "grad_norm": 0.6559242606163025, "learning_rate": 0.00010715971373041957, "loss": 1.7213, "step": 65124 }, { "epoch": 2.17, "grad_norm": 0.6436184644699097, "learning_rate": 0.00010715170717874315, "loss": 1.7131, "step": 65125 }, { "epoch": 2.17, "grad_norm": 0.6397982835769653, "learning_rate": 0.00010714370086115621, "loss": 1.7089, "step": 65126 }, { "epoch": 2.17, "grad_norm": 0.6523116230964661, "learning_rate": 0.00010713569477766831, "loss": 1.6964, "step": 65127 }, { "epoch": 2.17, "grad_norm": 0.6705203652381897, "learning_rate": 0.00010712768892828934, "loss": 1.7743, "step": 65128 }, { "epoch": 2.17, "grad_norm": 1.8730388879776, "learning_rate": 0.00010711968331302887, "loss": 1.7397, "step": 65129 }, { "epoch": 2.17, "grad_norm": 0.6639014482498169, "learning_rate": 0.00010711167793189678, "loss": 1.6765, "step": 65130 }, { "epoch": 2.17, "grad_norm": 0.6411364674568176, "learning_rate": 0.0001071036727849027, "loss": 1.6973, "step": 65131 }, { "epoch": 2.17, "grad_norm": 0.6585567593574524, "learning_rate": 0.0001070956678720562, "loss": 1.7238, "step": 65132 }, { "epoch": 2.17, "grad_norm": 0.6507400274276733, "learning_rate": 0.00010708766319336729, "loss": 1.7363, "step": 65133 }, { "epoch": 2.17, "grad_norm": 0.6172429323196411, "learning_rate": 0.0001070796587488455, "loss": 1.6639, "step": 65134 }, { "epoch": 2.17, "grad_norm": 0.6317700147628784, "learning_rate": 0.00010707165453850046, "loss": 1.7598, "step": 65135 }, { "epoch": 2.17, "grad_norm": 0.6521359086036682, "learning_rate": 0.00010706365056234204, "loss": 1.6727, "step": 65136 }, { "epoch": 2.17, "grad_norm": 0.6383662819862366, "learning_rate": 0.00010705564682038001, "loss": 1.7539, "step": 65137 }, { "epoch": 2.17, "grad_norm": 0.6539492011070251, "learning_rate": 0.00010704764331262398, "loss": 1.7298, "step": 65138 }, { "epoch": 2.17, "grad_norm": 0.6407076120376587, "learning_rate": 0.00010703964003908363, "loss": 1.6792, "step": 65139 }, { "epoch": 2.17, "grad_norm": 0.6831831932067871, "learning_rate": 0.00010703163699976877, "loss": 1.7174, "step": 65140 }, { "epoch": 2.17, "grad_norm": 0.684003472328186, "learning_rate": 0.00010702363419468908, "loss": 1.7104, "step": 65141 }, { "epoch": 2.17, "grad_norm": 0.6587038636207581, "learning_rate": 0.00010701563162385418, "loss": 1.6625, "step": 65142 }, { "epoch": 2.17, "grad_norm": 0.652938961982727, "learning_rate": 0.00010700762928727386, "loss": 1.6922, "step": 65143 }, { "epoch": 2.17, "grad_norm": 0.6429492831230164, "learning_rate": 0.00010699962718495806, "loss": 1.6928, "step": 65144 }, { "epoch": 2.17, "grad_norm": 0.6586573123931885, "learning_rate": 0.00010699162531691605, "loss": 1.7181, "step": 65145 }, { "epoch": 2.17, "grad_norm": 0.6430620551109314, "learning_rate": 0.00010698362368315777, "loss": 1.672, "step": 65146 }, { "epoch": 2.17, "grad_norm": 0.663930356502533, "learning_rate": 0.00010697562228369304, "loss": 1.7388, "step": 65147 }, { "epoch": 2.17, "grad_norm": 0.6303261518478394, "learning_rate": 0.00010696762111853145, "loss": 1.6778, "step": 65148 }, { "epoch": 2.17, "grad_norm": 0.6584806442260742, "learning_rate": 0.00010695962018768267, "loss": 1.729, "step": 65149 }, { "epoch": 2.17, "grad_norm": 0.6617085933685303, "learning_rate": 0.00010695161949115641, "loss": 1.7502, "step": 65150 }, { "epoch": 2.17, "grad_norm": 0.635887861251831, "learning_rate": 0.00010694361902896267, "loss": 1.7742, "step": 65151 }, { "epoch": 2.17, "grad_norm": 0.6367804408073425, "learning_rate": 0.00010693561880111075, "loss": 1.6676, "step": 65152 }, { "epoch": 2.17, "grad_norm": 0.6550871729850769, "learning_rate": 0.0001069276188076105, "loss": 1.6263, "step": 65153 }, { "epoch": 2.17, "grad_norm": 0.6494579315185547, "learning_rate": 0.00010691961904847181, "loss": 1.6526, "step": 65154 }, { "epoch": 2.17, "grad_norm": 0.6228771209716797, "learning_rate": 0.00010691161952370422, "loss": 1.6952, "step": 65155 }, { "epoch": 2.17, "grad_norm": 0.6363996863365173, "learning_rate": 0.0001069036202333174, "loss": 1.6762, "step": 65156 }, { "epoch": 2.17, "grad_norm": 0.6587578654289246, "learning_rate": 0.00010689562117732113, "loss": 1.71, "step": 65157 }, { "epoch": 2.17, "grad_norm": 0.6458075642585754, "learning_rate": 0.00010688762235572531, "loss": 1.676, "step": 65158 }, { "epoch": 2.17, "grad_norm": 0.6370593905448914, "learning_rate": 0.00010687962376853925, "loss": 1.7903, "step": 65159 }, { "epoch": 2.17, "grad_norm": 0.6534955501556396, "learning_rate": 0.00010687162541577292, "loss": 1.725, "step": 65160 }, { "epoch": 2.17, "grad_norm": 0.6457342505455017, "learning_rate": 0.00010686362729743602, "loss": 1.6973, "step": 65161 }, { "epoch": 2.17, "grad_norm": 0.646198570728302, "learning_rate": 0.00010685562941353826, "loss": 1.7498, "step": 65162 }, { "epoch": 2.17, "grad_norm": 0.6388343572616577, "learning_rate": 0.0001068476317640892, "loss": 1.6642, "step": 65163 }, { "epoch": 2.17, "grad_norm": 0.6445721983909607, "learning_rate": 0.00010683963434909876, "loss": 1.6873, "step": 65164 }, { "epoch": 2.17, "grad_norm": 0.6303476095199585, "learning_rate": 0.00010683163716857645, "loss": 1.728, "step": 65165 }, { "epoch": 2.17, "grad_norm": 0.6438179612159729, "learning_rate": 0.00010682364022253216, "loss": 1.6677, "step": 65166 }, { "epoch": 2.17, "grad_norm": 0.6354956030845642, "learning_rate": 0.0001068156435109754, "loss": 1.6998, "step": 65167 }, { "epoch": 2.17, "grad_norm": 0.645423173904419, "learning_rate": 0.00010680764703391608, "loss": 1.6652, "step": 65168 }, { "epoch": 2.17, "grad_norm": 0.6504349112510681, "learning_rate": 0.00010679965079136381, "loss": 1.717, "step": 65169 }, { "epoch": 2.17, "grad_norm": 0.6819482445716858, "learning_rate": 0.00010679165478332821, "loss": 1.7791, "step": 65170 }, { "epoch": 2.17, "grad_norm": 0.63140469789505, "learning_rate": 0.00010678365900981919, "loss": 1.6971, "step": 65171 }, { "epoch": 2.17, "grad_norm": 0.6408975720405579, "learning_rate": 0.00010677566347084622, "loss": 1.6475, "step": 65172 }, { "epoch": 2.17, "grad_norm": 0.6359089016914368, "learning_rate": 0.00010676766816641922, "loss": 1.6588, "step": 65173 }, { "epoch": 2.17, "grad_norm": 0.6454825401306152, "learning_rate": 0.00010675967309654768, "loss": 1.6969, "step": 65174 }, { "epoch": 2.17, "grad_norm": 0.6448075175285339, "learning_rate": 0.00010675167826124157, "loss": 1.7642, "step": 65175 }, { "epoch": 2.17, "grad_norm": 0.6707038879394531, "learning_rate": 0.00010674368366051041, "loss": 1.7713, "step": 65176 }, { "epoch": 2.17, "grad_norm": 0.6425778269767761, "learning_rate": 0.00010673568929436388, "loss": 1.7139, "step": 65177 }, { "epoch": 2.17, "grad_norm": 0.6497896909713745, "learning_rate": 0.00010672769516281182, "loss": 1.7342, "step": 65178 }, { "epoch": 2.17, "grad_norm": 0.6425251960754395, "learning_rate": 0.00010671970126586377, "loss": 1.7488, "step": 65179 }, { "epoch": 2.17, "grad_norm": 0.6688380837440491, "learning_rate": 0.0001067117076035296, "loss": 1.7852, "step": 65180 }, { "epoch": 2.17, "grad_norm": 0.6745931506156921, "learning_rate": 0.00010670371417581897, "loss": 1.7159, "step": 65181 }, { "epoch": 2.17, "grad_norm": 0.6434001922607422, "learning_rate": 0.00010669572098274144, "loss": 1.6944, "step": 65182 }, { "epoch": 2.17, "grad_norm": 0.6304253339767456, "learning_rate": 0.00010668772802430691, "loss": 1.6626, "step": 65183 }, { "epoch": 2.17, "grad_norm": 0.6331098079681396, "learning_rate": 0.00010667973530052492, "loss": 1.74, "step": 65184 }, { "epoch": 2.17, "grad_norm": 0.6472117304801941, "learning_rate": 0.00010667174281140533, "loss": 1.7329, "step": 65185 }, { "epoch": 2.17, "grad_norm": 0.6431252360343933, "learning_rate": 0.00010666375055695767, "loss": 1.6715, "step": 65186 }, { "epoch": 2.17, "grad_norm": 0.6387264132499695, "learning_rate": 0.00010665575853719183, "loss": 1.7049, "step": 65187 }, { "epoch": 2.17, "grad_norm": 0.6441521048545837, "learning_rate": 0.00010664776675211738, "loss": 1.7015, "step": 65188 }, { "epoch": 2.17, "grad_norm": 0.6491292715072632, "learning_rate": 0.00010663977520174399, "loss": 1.7716, "step": 65189 }, { "epoch": 2.17, "grad_norm": 0.6348248720169067, "learning_rate": 0.0001066317838860815, "loss": 1.682, "step": 65190 }, { "epoch": 2.17, "grad_norm": 0.653906524181366, "learning_rate": 0.00010662379280513953, "loss": 1.7924, "step": 65191 }, { "epoch": 2.17, "grad_norm": 0.6493869423866272, "learning_rate": 0.0001066158019589277, "loss": 1.783, "step": 65192 }, { "epoch": 2.17, "grad_norm": 0.6545364260673523, "learning_rate": 0.0001066078113474558, "loss": 1.6939, "step": 65193 }, { "epoch": 2.17, "grad_norm": 0.6477441191673279, "learning_rate": 0.0001065998209707336, "loss": 1.6307, "step": 65194 }, { "epoch": 2.17, "grad_norm": 0.6546785831451416, "learning_rate": 0.00010659183082877073, "loss": 1.7153, "step": 65195 }, { "epoch": 2.17, "grad_norm": 0.6363094449043274, "learning_rate": 0.00010658384092157676, "loss": 1.6991, "step": 65196 }, { "epoch": 2.17, "grad_norm": 0.654854416847229, "learning_rate": 0.00010657585124916164, "loss": 1.721, "step": 65197 }, { "epoch": 2.17, "grad_norm": 0.6284076571464539, "learning_rate": 0.00010656786181153492, "loss": 1.6907, "step": 65198 }, { "epoch": 2.17, "grad_norm": 0.6211332678794861, "learning_rate": 0.00010655987260870623, "loss": 1.6976, "step": 65199 }, { "epoch": 2.17, "grad_norm": 0.6520222425460815, "learning_rate": 0.00010655188364068534, "loss": 1.7537, "step": 65200 }, { "epoch": 2.17, "grad_norm": 0.6515670418739319, "learning_rate": 0.00010654389490748216, "loss": 1.6931, "step": 65201 }, { "epoch": 2.17, "grad_norm": 0.6779091954231262, "learning_rate": 0.00010653590640910598, "loss": 1.7773, "step": 65202 }, { "epoch": 2.17, "grad_norm": 0.6992472410202026, "learning_rate": 0.00010652791814556674, "loss": 1.6833, "step": 65203 }, { "epoch": 2.17, "grad_norm": 0.6411461234092712, "learning_rate": 0.00010651993011687416, "loss": 1.622, "step": 65204 }, { "epoch": 2.17, "grad_norm": 0.6295434832572937, "learning_rate": 0.00010651194232303789, "loss": 1.6735, "step": 65205 }, { "epoch": 2.17, "grad_norm": 0.6290529370307922, "learning_rate": 0.00010650395476406751, "loss": 1.6735, "step": 65206 }, { "epoch": 2.17, "grad_norm": 0.6394067406654358, "learning_rate": 0.00010649596743997282, "loss": 1.6864, "step": 65207 }, { "epoch": 2.17, "grad_norm": 0.6575030088424683, "learning_rate": 0.00010648798035076373, "loss": 1.706, "step": 65208 }, { "epoch": 2.17, "grad_norm": 0.6440751552581787, "learning_rate": 0.00010647999349644949, "loss": 1.6782, "step": 65209 }, { "epoch": 2.17, "grad_norm": 0.8822689056396484, "learning_rate": 0.00010647200687704004, "loss": 1.6684, "step": 65210 }, { "epoch": 2.17, "grad_norm": 0.646284282207489, "learning_rate": 0.00010646402049254518, "loss": 1.6976, "step": 65211 }, { "epoch": 2.17, "grad_norm": 0.6361141204833984, "learning_rate": 0.00010645603434297447, "loss": 1.7173, "step": 65212 }, { "epoch": 2.17, "grad_norm": 0.6775155663490295, "learning_rate": 0.00010644804842833748, "loss": 1.723, "step": 65213 }, { "epoch": 2.17, "grad_norm": 0.6728388071060181, "learning_rate": 0.00010644006274864407, "loss": 1.7131, "step": 65214 }, { "epoch": 2.17, "grad_norm": 0.6560539603233337, "learning_rate": 0.00010643207730390412, "loss": 1.7564, "step": 65215 }, { "epoch": 2.17, "grad_norm": 0.6374527812004089, "learning_rate": 0.00010642409209412688, "loss": 1.6671, "step": 65216 }, { "epoch": 2.17, "grad_norm": 0.6223060488700867, "learning_rate": 0.00010641610711932227, "loss": 1.6716, "step": 65217 }, { "epoch": 2.17, "grad_norm": 0.6459227800369263, "learning_rate": 0.00010640812237950012, "loss": 1.6812, "step": 65218 }, { "epoch": 2.17, "grad_norm": 0.6392678022384644, "learning_rate": 0.00010640013787466994, "loss": 1.7447, "step": 65219 }, { "epoch": 2.17, "grad_norm": 0.6307410001754761, "learning_rate": 0.00010639215360484139, "loss": 1.6504, "step": 65220 }, { "epoch": 2.17, "grad_norm": 0.6601330637931824, "learning_rate": 0.00010638416957002435, "loss": 1.7541, "step": 65221 }, { "epoch": 2.17, "grad_norm": 0.6211273074150085, "learning_rate": 0.00010637618577022837, "loss": 1.6639, "step": 65222 }, { "epoch": 2.17, "grad_norm": 0.648156464099884, "learning_rate": 0.00010636820220546309, "loss": 1.6882, "step": 65223 }, { "epoch": 2.17, "grad_norm": 0.6406368613243103, "learning_rate": 0.00010636021887573827, "loss": 1.766, "step": 65224 }, { "epoch": 2.17, "grad_norm": 0.6605027318000793, "learning_rate": 0.00010635223578106373, "loss": 1.7536, "step": 65225 }, { "epoch": 2.17, "grad_norm": 0.630534827709198, "learning_rate": 0.00010634425292144901, "loss": 1.7643, "step": 65226 }, { "epoch": 2.17, "grad_norm": 0.6789512634277344, "learning_rate": 0.00010633627029690378, "loss": 1.7813, "step": 65227 }, { "epoch": 2.17, "grad_norm": 0.6457812786102295, "learning_rate": 0.00010632828790743784, "loss": 1.6644, "step": 65228 }, { "epoch": 2.17, "grad_norm": 0.6447781920433044, "learning_rate": 0.00010632030575306083, "loss": 1.6659, "step": 65229 }, { "epoch": 2.17, "grad_norm": 0.6490470170974731, "learning_rate": 0.00010631232383378238, "loss": 1.755, "step": 65230 }, { "epoch": 2.17, "grad_norm": 0.6430725455284119, "learning_rate": 0.00010630434214961228, "loss": 1.7486, "step": 65231 }, { "epoch": 2.17, "grad_norm": 0.6471828818321228, "learning_rate": 0.00010629636070056006, "loss": 1.6912, "step": 65232 }, { "epoch": 2.17, "grad_norm": 0.6388542056083679, "learning_rate": 0.00010628837948663567, "loss": 1.5878, "step": 65233 }, { "epoch": 2.17, "grad_norm": 0.6360466480255127, "learning_rate": 0.0001062803985078485, "loss": 1.732, "step": 65234 }, { "epoch": 2.17, "grad_norm": 0.6360151767730713, "learning_rate": 0.00010627241776420852, "loss": 1.7307, "step": 65235 }, { "epoch": 2.17, "grad_norm": 0.6347790956497192, "learning_rate": 0.00010626443725572524, "loss": 1.69, "step": 65236 }, { "epoch": 2.17, "grad_norm": 0.6268565058708191, "learning_rate": 0.0001062564569824083, "loss": 1.6331, "step": 65237 }, { "epoch": 2.17, "grad_norm": 0.6433207988739014, "learning_rate": 0.00010624847694426758, "loss": 1.6918, "step": 65238 }, { "epoch": 2.17, "grad_norm": 0.6639734506607056, "learning_rate": 0.00010624049714131256, "loss": 1.6665, "step": 65239 }, { "epoch": 2.17, "grad_norm": 0.6521316170692444, "learning_rate": 0.00010623251757355314, "loss": 1.6744, "step": 65240 }, { "epoch": 2.17, "grad_norm": 0.6581486463546753, "learning_rate": 0.00010622453824099888, "loss": 1.5814, "step": 65241 }, { "epoch": 2.17, "grad_norm": 0.6411075592041016, "learning_rate": 0.0001062165591436594, "loss": 1.7293, "step": 65242 }, { "epoch": 2.17, "grad_norm": 0.6494767665863037, "learning_rate": 0.00010620858028154452, "loss": 1.7327, "step": 65243 }, { "epoch": 2.17, "grad_norm": 0.6349315047264099, "learning_rate": 0.00010620060165466381, "loss": 1.664, "step": 65244 }, { "epoch": 2.17, "grad_norm": 0.6284913420677185, "learning_rate": 0.00010619262326302709, "loss": 1.7189, "step": 65245 }, { "epoch": 2.17, "grad_norm": 0.7537500262260437, "learning_rate": 0.00010618464510664388, "loss": 1.6893, "step": 65246 }, { "epoch": 2.17, "grad_norm": 0.6924753189086914, "learning_rate": 0.00010617666718552405, "loss": 1.7207, "step": 65247 }, { "epoch": 2.17, "grad_norm": 0.6439677476882935, "learning_rate": 0.00010616868949967719, "loss": 1.7211, "step": 65248 }, { "epoch": 2.17, "grad_norm": 0.6531688570976257, "learning_rate": 0.0001061607120491129, "loss": 1.7097, "step": 65249 }, { "epoch": 2.17, "grad_norm": 0.6705819964408875, "learning_rate": 0.00010615273483384102, "loss": 1.6909, "step": 65250 }, { "epoch": 2.17, "grad_norm": 0.6572697758674622, "learning_rate": 0.00010614475785387118, "loss": 1.6736, "step": 65251 }, { "epoch": 2.17, "grad_norm": 0.6660965085029602, "learning_rate": 0.00010613678110921292, "loss": 1.6532, "step": 65252 }, { "epoch": 2.17, "grad_norm": 0.6342602968215942, "learning_rate": 0.00010612880459987604, "loss": 1.6969, "step": 65253 }, { "epoch": 2.17, "grad_norm": 0.6644073724746704, "learning_rate": 0.00010612082832587032, "loss": 1.6322, "step": 65254 }, { "epoch": 2.17, "grad_norm": 0.6520786285400391, "learning_rate": 0.00010611285228720535, "loss": 1.7147, "step": 65255 }, { "epoch": 2.17, "grad_norm": 0.6439322829246521, "learning_rate": 0.00010610487648389075, "loss": 1.7378, "step": 65256 }, { "epoch": 2.17, "grad_norm": 0.6470199227333069, "learning_rate": 0.00010609690091593632, "loss": 1.7065, "step": 65257 }, { "epoch": 2.17, "grad_norm": 0.6670951843261719, "learning_rate": 0.00010608892558335168, "loss": 1.7036, "step": 65258 }, { "epoch": 2.17, "grad_norm": 0.6353113055229187, "learning_rate": 0.00010608095048614646, "loss": 1.6936, "step": 65259 }, { "epoch": 2.17, "grad_norm": 0.6380375027656555, "learning_rate": 0.00010607297562433033, "loss": 1.7149, "step": 65260 }, { "epoch": 2.17, "grad_norm": 0.6361600756645203, "learning_rate": 0.00010606500099791328, "loss": 1.7537, "step": 65261 }, { "epoch": 2.17, "grad_norm": 0.6390131711959839, "learning_rate": 0.00010605702660690452, "loss": 1.6839, "step": 65262 }, { "epoch": 2.17, "grad_norm": 0.6407687067985535, "learning_rate": 0.00010604905245131395, "loss": 1.741, "step": 65263 }, { "epoch": 2.17, "grad_norm": 0.6591219902038574, "learning_rate": 0.00010604107853115139, "loss": 1.7265, "step": 65264 }, { "epoch": 2.17, "grad_norm": 0.6423380374908447, "learning_rate": 0.00010603310484642633, "loss": 1.7469, "step": 65265 }, { "epoch": 2.17, "grad_norm": 0.6461402773857117, "learning_rate": 0.00010602513139714843, "loss": 1.719, "step": 65266 }, { "epoch": 2.17, "grad_norm": 0.6560499668121338, "learning_rate": 0.00010601715818332745, "loss": 1.6994, "step": 65267 }, { "epoch": 2.17, "grad_norm": 0.6400919556617737, "learning_rate": 0.00010600918520497326, "loss": 1.7548, "step": 65268 }, { "epoch": 2.17, "grad_norm": 0.6551983952522278, "learning_rate": 0.0001060012124620951, "loss": 1.7108, "step": 65269 }, { "epoch": 2.17, "grad_norm": 0.6479334831237793, "learning_rate": 0.0001059932399547029, "loss": 1.7285, "step": 65270 }, { "epoch": 2.17, "grad_norm": 0.6636032462120056, "learning_rate": 0.00010598526768280643, "loss": 1.7529, "step": 65271 }, { "epoch": 2.17, "grad_norm": 0.6500352025032043, "learning_rate": 0.00010597729564641527, "loss": 1.6237, "step": 65272 }, { "epoch": 2.17, "grad_norm": 0.6475721597671509, "learning_rate": 0.00010596932384553896, "loss": 1.6956, "step": 65273 }, { "epoch": 2.17, "grad_norm": 0.6781564354896545, "learning_rate": 0.0001059613522801873, "loss": 1.6756, "step": 65274 }, { "epoch": 2.17, "grad_norm": 0.638174831867218, "learning_rate": 0.00010595338095037008, "loss": 1.6325, "step": 65275 }, { "epoch": 2.17, "grad_norm": 0.6401684284210205, "learning_rate": 0.00010594540985609687, "loss": 1.6344, "step": 65276 }, { "epoch": 2.17, "grad_norm": 0.6583329439163208, "learning_rate": 0.00010593743899737724, "loss": 1.7303, "step": 65277 }, { "epoch": 2.17, "grad_norm": 0.630770742893219, "learning_rate": 0.00010592946837422105, "loss": 1.6781, "step": 65278 }, { "epoch": 2.17, "grad_norm": 0.6398777365684509, "learning_rate": 0.00010592149798663792, "loss": 1.7066, "step": 65279 }, { "epoch": 2.17, "grad_norm": 0.6485889554023743, "learning_rate": 0.00010591352783463737, "loss": 1.6967, "step": 65280 }, { "epoch": 2.17, "grad_norm": 0.6464017033576965, "learning_rate": 0.0001059055579182293, "loss": 1.7449, "step": 65281 }, { "epoch": 2.17, "grad_norm": 0.6419976353645325, "learning_rate": 0.00010589758823742317, "loss": 1.6732, "step": 65282 }, { "epoch": 2.17, "grad_norm": 0.6754840016365051, "learning_rate": 0.0001058896187922289, "loss": 1.627, "step": 65283 }, { "epoch": 2.17, "grad_norm": 0.6424427628517151, "learning_rate": 0.00010588164958265592, "loss": 1.7276, "step": 65284 }, { "epoch": 2.17, "grad_norm": 0.6478937864303589, "learning_rate": 0.00010587368060871409, "loss": 1.6842, "step": 65285 }, { "epoch": 2.17, "grad_norm": 0.6540886759757996, "learning_rate": 0.00010586571187041304, "loss": 1.704, "step": 65286 }, { "epoch": 2.17, "grad_norm": 0.6382516026496887, "learning_rate": 0.00010585774336776231, "loss": 1.6465, "step": 65287 }, { "epoch": 2.17, "grad_norm": 0.6338322758674622, "learning_rate": 0.00010584977510077178, "loss": 1.7005, "step": 65288 }, { "epoch": 2.17, "grad_norm": 0.673774778842926, "learning_rate": 0.00010584180706945091, "loss": 1.7501, "step": 65289 }, { "epoch": 2.17, "grad_norm": 0.6579136252403259, "learning_rate": 0.00010583383927380956, "loss": 1.7709, "step": 65290 }, { "epoch": 2.17, "grad_norm": 0.6658544540405273, "learning_rate": 0.00010582587171385732, "loss": 1.7227, "step": 65291 }, { "epoch": 2.17, "grad_norm": 0.669880211353302, "learning_rate": 0.00010581790438960378, "loss": 1.7049, "step": 65292 }, { "epoch": 2.17, "grad_norm": 0.6463152170181274, "learning_rate": 0.00010580993730105879, "loss": 1.784, "step": 65293 }, { "epoch": 2.17, "grad_norm": 0.6358594298362732, "learning_rate": 0.00010580197044823181, "loss": 1.6884, "step": 65294 }, { "epoch": 2.17, "grad_norm": 0.6489802002906799, "learning_rate": 0.00010579400383113273, "loss": 1.7361, "step": 65295 }, { "epoch": 2.17, "grad_norm": 0.6392239332199097, "learning_rate": 0.00010578603744977102, "loss": 1.7036, "step": 65296 }, { "epoch": 2.17, "grad_norm": 0.6281838417053223, "learning_rate": 0.00010577807130415657, "loss": 1.6616, "step": 65297 }, { "epoch": 2.17, "grad_norm": 0.6578385233879089, "learning_rate": 0.00010577010539429889, "loss": 1.6408, "step": 65298 }, { "epoch": 2.17, "grad_norm": 0.6246962547302246, "learning_rate": 0.00010576213972020758, "loss": 1.6715, "step": 65299 }, { "epoch": 2.17, "grad_norm": 0.6308326125144958, "learning_rate": 0.00010575417428189255, "loss": 1.7505, "step": 65300 }, { "epoch": 2.17, "grad_norm": 0.6354165077209473, "learning_rate": 0.0001057462090793632, "loss": 1.6246, "step": 65301 }, { "epoch": 2.17, "grad_norm": 0.6390684843063354, "learning_rate": 0.00010573824411262944, "loss": 1.6551, "step": 65302 }, { "epoch": 2.17, "grad_norm": 0.6333647966384888, "learning_rate": 0.00010573027938170071, "loss": 1.6505, "step": 65303 }, { "epoch": 2.17, "grad_norm": 0.6619501709938049, "learning_rate": 0.00010572231488658693, "loss": 1.6462, "step": 65304 }, { "epoch": 2.17, "grad_norm": 0.634903609752655, "learning_rate": 0.0001057143506272976, "loss": 1.6518, "step": 65305 }, { "epoch": 2.17, "grad_norm": 0.6737716197967529, "learning_rate": 0.00010570638660384232, "loss": 1.6244, "step": 65306 }, { "epoch": 2.17, "grad_norm": 0.658664882183075, "learning_rate": 0.00010569842281623099, "loss": 1.6687, "step": 65307 }, { "epoch": 2.17, "grad_norm": 0.6670061945915222, "learning_rate": 0.00010569045926447313, "loss": 1.7184, "step": 65308 }, { "epoch": 2.17, "grad_norm": 0.6444719433784485, "learning_rate": 0.00010568249594857833, "loss": 1.7289, "step": 65309 }, { "epoch": 2.17, "grad_norm": 0.6473847031593323, "learning_rate": 0.00010567453286855632, "loss": 1.7488, "step": 65310 }, { "epoch": 2.17, "grad_norm": 0.6456215381622314, "learning_rate": 0.00010566657002441692, "loss": 1.7366, "step": 65311 }, { "epoch": 2.17, "grad_norm": 0.6551437973976135, "learning_rate": 0.0001056586074161697, "loss": 1.6877, "step": 65312 }, { "epoch": 2.17, "grad_norm": 0.6340730786323547, "learning_rate": 0.00010565064504382413, "loss": 1.7137, "step": 65313 }, { "epoch": 2.17, "grad_norm": 0.646070659160614, "learning_rate": 0.0001056426829073902, "loss": 1.6697, "step": 65314 }, { "epoch": 2.17, "grad_norm": 0.6453714966773987, "learning_rate": 0.00010563472100687737, "loss": 1.6711, "step": 65315 }, { "epoch": 2.17, "grad_norm": 0.644020676612854, "learning_rate": 0.00010562675934229527, "loss": 1.7087, "step": 65316 }, { "epoch": 2.17, "grad_norm": 0.6400461196899414, "learning_rate": 0.00010561879791365365, "loss": 1.6653, "step": 65317 }, { "epoch": 2.17, "grad_norm": 0.6593153476715088, "learning_rate": 0.00010561083672096236, "loss": 1.7931, "step": 65318 }, { "epoch": 2.17, "grad_norm": 0.6336478590965271, "learning_rate": 0.00010560287576423066, "loss": 1.6681, "step": 65319 }, { "epoch": 2.17, "grad_norm": 0.6503645777702332, "learning_rate": 0.00010559491504346844, "loss": 1.7511, "step": 65320 }, { "epoch": 2.17, "grad_norm": 0.6463659405708313, "learning_rate": 0.00010558695455868546, "loss": 1.7544, "step": 65321 }, { "epoch": 2.17, "grad_norm": 0.6577848792076111, "learning_rate": 0.00010557899430989126, "loss": 1.6965, "step": 65322 }, { "epoch": 2.17, "grad_norm": 0.6327127814292908, "learning_rate": 0.00010557103429709542, "loss": 1.7174, "step": 65323 }, { "epoch": 2.17, "grad_norm": 0.6368548274040222, "learning_rate": 0.0001055630745203077, "loss": 1.796, "step": 65324 }, { "epoch": 2.17, "grad_norm": 0.6577869653701782, "learning_rate": 0.00010555511497953795, "loss": 1.64, "step": 65325 }, { "epoch": 2.17, "grad_norm": 0.6448683142662048, "learning_rate": 0.00010554715567479543, "loss": 1.6868, "step": 65326 }, { "epoch": 2.17, "grad_norm": 0.6432574987411499, "learning_rate": 0.00010553919660609003, "loss": 1.812, "step": 65327 }, { "epoch": 2.17, "grad_norm": 0.6295223832130432, "learning_rate": 0.00010553123777343148, "loss": 1.6582, "step": 65328 }, { "epoch": 2.17, "grad_norm": 0.6409332156181335, "learning_rate": 0.00010552327917682937, "loss": 1.6412, "step": 65329 }, { "epoch": 2.17, "grad_norm": 0.6304104328155518, "learning_rate": 0.00010551532081629323, "loss": 1.6868, "step": 65330 }, { "epoch": 2.17, "grad_norm": 0.6656284332275391, "learning_rate": 0.00010550736269183284, "loss": 1.7022, "step": 65331 }, { "epoch": 2.17, "grad_norm": 0.6877418160438538, "learning_rate": 0.00010549940480345805, "loss": 1.6454, "step": 65332 }, { "epoch": 2.17, "grad_norm": 0.6648380160331726, "learning_rate": 0.00010549144715117811, "loss": 1.7461, "step": 65333 }, { "epoch": 2.17, "grad_norm": 0.6325523257255554, "learning_rate": 0.00010548348973500294, "loss": 1.7271, "step": 65334 }, { "epoch": 2.17, "grad_norm": 0.6386557817459106, "learning_rate": 0.0001054755325549422, "loss": 1.6704, "step": 65335 }, { "epoch": 2.17, "grad_norm": 0.644644558429718, "learning_rate": 0.00010546757561100554, "loss": 1.7175, "step": 65336 }, { "epoch": 2.17, "grad_norm": 0.6466041803359985, "learning_rate": 0.00010545961890320249, "loss": 1.7425, "step": 65337 }, { "epoch": 2.17, "grad_norm": 0.6872232556343079, "learning_rate": 0.00010545166243154286, "loss": 1.6889, "step": 65338 }, { "epoch": 2.17, "grad_norm": 0.6316468715667725, "learning_rate": 0.00010544370619603627, "loss": 1.6545, "step": 65339 }, { "epoch": 2.17, "grad_norm": 0.6328554749488831, "learning_rate": 0.00010543575019669224, "loss": 1.6985, "step": 65340 }, { "epoch": 2.17, "grad_norm": 0.6459369659423828, "learning_rate": 0.00010542779443352055, "loss": 1.7101, "step": 65341 }, { "epoch": 2.17, "grad_norm": 0.6446433663368225, "learning_rate": 0.00010541983890653094, "loss": 1.7147, "step": 65342 }, { "epoch": 2.17, "grad_norm": 0.6526457667350769, "learning_rate": 0.00010541188361573296, "loss": 1.7328, "step": 65343 }, { "epoch": 2.17, "grad_norm": 0.6428157091140747, "learning_rate": 0.0001054039285611362, "loss": 1.7226, "step": 65344 }, { "epoch": 2.17, "grad_norm": 0.6498385667800903, "learning_rate": 0.00010539597374275051, "loss": 1.7317, "step": 65345 }, { "epoch": 2.17, "grad_norm": 0.6333255171775818, "learning_rate": 0.00010538801916058545, "loss": 1.7614, "step": 65346 }, { "epoch": 2.17, "grad_norm": 0.6428662538528442, "learning_rate": 0.00010538006481465052, "loss": 1.7373, "step": 65347 }, { "epoch": 2.17, "grad_norm": 0.6368095874786377, "learning_rate": 0.00010537211070495563, "loss": 1.7146, "step": 65348 }, { "epoch": 2.17, "grad_norm": 0.6395599246025085, "learning_rate": 0.00010536415683151021, "loss": 1.7361, "step": 65349 }, { "epoch": 2.17, "grad_norm": 0.6465008854866028, "learning_rate": 0.00010535620319432414, "loss": 1.7317, "step": 65350 }, { "epoch": 2.17, "grad_norm": 0.6624149680137634, "learning_rate": 0.00010534824979340686, "loss": 1.685, "step": 65351 }, { "epoch": 2.17, "grad_norm": 0.6597599387168884, "learning_rate": 0.00010534029662876822, "loss": 1.7001, "step": 65352 }, { "epoch": 2.17, "grad_norm": 0.646447479724884, "learning_rate": 0.00010533234370041779, "loss": 1.7327, "step": 65353 }, { "epoch": 2.17, "grad_norm": 0.6568617224693298, "learning_rate": 0.0001053243910083651, "loss": 1.6575, "step": 65354 }, { "epoch": 2.17, "grad_norm": 0.6490552425384521, "learning_rate": 0.00010531643855262, "loss": 1.769, "step": 65355 }, { "epoch": 2.17, "grad_norm": 0.6473581194877625, "learning_rate": 0.00010530848633319198, "loss": 1.6614, "step": 65356 }, { "epoch": 2.17, "grad_norm": 0.6373421549797058, "learning_rate": 0.00010530053435009087, "loss": 1.7874, "step": 65357 }, { "epoch": 2.17, "grad_norm": 0.6420662999153137, "learning_rate": 0.00010529258260332621, "loss": 1.7743, "step": 65358 }, { "epoch": 2.17, "grad_norm": 0.6489583253860474, "learning_rate": 0.00010528463109290758, "loss": 1.661, "step": 65359 }, { "epoch": 2.17, "grad_norm": 0.6411041021347046, "learning_rate": 0.00010527667981884481, "loss": 1.6504, "step": 65360 }, { "epoch": 2.17, "grad_norm": 0.6272825002670288, "learning_rate": 0.00010526872878114738, "loss": 1.6688, "step": 65361 }, { "epoch": 2.17, "grad_norm": 0.6408196687698364, "learning_rate": 0.00010526077797982508, "loss": 1.6908, "step": 65362 }, { "epoch": 2.17, "grad_norm": 0.6465418934822083, "learning_rate": 0.00010525282741488745, "loss": 1.7447, "step": 65363 }, { "epoch": 2.17, "grad_norm": 0.6387848258018494, "learning_rate": 0.00010524487708634426, "loss": 1.7163, "step": 65364 }, { "epoch": 2.17, "grad_norm": 1.001613974571228, "learning_rate": 0.00010523692699420509, "loss": 1.6753, "step": 65365 }, { "epoch": 2.17, "grad_norm": 0.6384919285774231, "learning_rate": 0.00010522897713847951, "loss": 1.6807, "step": 65366 }, { "epoch": 2.17, "grad_norm": 0.654119074344635, "learning_rate": 0.00010522102751917737, "loss": 1.6739, "step": 65367 }, { "epoch": 2.17, "grad_norm": 0.6527838706970215, "learning_rate": 0.00010521307813630817, "loss": 1.6924, "step": 65368 }, { "epoch": 2.17, "grad_norm": 0.6664118766784668, "learning_rate": 0.0001052051289898815, "loss": 1.7567, "step": 65369 }, { "epoch": 2.17, "grad_norm": 0.6585663557052612, "learning_rate": 0.0001051971800799071, "loss": 1.7563, "step": 65370 }, { "epoch": 2.17, "grad_norm": 0.636807382106781, "learning_rate": 0.00010518923140639474, "loss": 1.7007, "step": 65371 }, { "epoch": 2.17, "grad_norm": 0.6528151631355286, "learning_rate": 0.00010518128296935392, "loss": 1.7434, "step": 65372 }, { "epoch": 2.17, "grad_norm": 0.6420140266418457, "learning_rate": 0.00010517333476879423, "loss": 1.7113, "step": 65373 }, { "epoch": 2.18, "grad_norm": 0.6314871907234192, "learning_rate": 0.00010516538680472551, "loss": 1.6717, "step": 65374 }, { "epoch": 2.18, "grad_norm": 0.6382696628570557, "learning_rate": 0.00010515743907715732, "loss": 1.7005, "step": 65375 }, { "epoch": 2.18, "grad_norm": 0.6530048847198486, "learning_rate": 0.00010514949158609916, "loss": 1.6489, "step": 65376 }, { "epoch": 2.18, "grad_norm": 0.6620505452156067, "learning_rate": 0.00010514154433156084, "loss": 1.6537, "step": 65377 }, { "epoch": 2.18, "grad_norm": 0.6352149844169617, "learning_rate": 0.00010513359731355215, "loss": 1.714, "step": 65378 }, { "epoch": 2.18, "grad_norm": 0.6419814825057983, "learning_rate": 0.00010512565053208234, "loss": 1.6268, "step": 65379 }, { "epoch": 2.18, "grad_norm": 0.662899911403656, "learning_rate": 0.00010511770398716132, "loss": 1.7395, "step": 65380 }, { "epoch": 2.18, "grad_norm": 0.6570858359336853, "learning_rate": 0.00010510975767879877, "loss": 1.6816, "step": 65381 }, { "epoch": 2.18, "grad_norm": 0.6599147915840149, "learning_rate": 0.00010510181160700425, "loss": 1.7293, "step": 65382 }, { "epoch": 2.18, "grad_norm": 0.7932364344596863, "learning_rate": 0.00010509386577178733, "loss": 1.7696, "step": 65383 }, { "epoch": 2.18, "grad_norm": 0.6280003786087036, "learning_rate": 0.00010508592017315775, "loss": 1.7242, "step": 65384 }, { "epoch": 2.18, "grad_norm": 0.6418247818946838, "learning_rate": 0.0001050779748111253, "loss": 1.7238, "step": 65385 }, { "epoch": 2.18, "grad_norm": 0.6221802234649658, "learning_rate": 0.00010507002968569931, "loss": 1.6791, "step": 65386 }, { "epoch": 2.18, "grad_norm": 0.6383702754974365, "learning_rate": 0.00010506208479688957, "loss": 1.6428, "step": 65387 }, { "epoch": 2.18, "grad_norm": 0.670916736125946, "learning_rate": 0.00010505414014470585, "loss": 1.7568, "step": 65388 }, { "epoch": 2.18, "grad_norm": 0.6674716472625732, "learning_rate": 0.00010504619572915766, "loss": 1.7354, "step": 65389 }, { "epoch": 2.18, "grad_norm": 0.6708225607872009, "learning_rate": 0.00010503825155025456, "loss": 1.732, "step": 65390 }, { "epoch": 2.18, "grad_norm": 0.6844077110290527, "learning_rate": 0.00010503030760800632, "loss": 1.7235, "step": 65391 }, { "epoch": 2.18, "grad_norm": 0.6393288969993591, "learning_rate": 0.00010502236390242263, "loss": 1.6514, "step": 65392 }, { "epoch": 2.18, "grad_norm": 0.6454498171806335, "learning_rate": 0.00010501442043351309, "loss": 1.7182, "step": 65393 }, { "epoch": 2.18, "grad_norm": 0.6695637702941895, "learning_rate": 0.00010500647720128717, "loss": 1.6478, "step": 65394 }, { "epoch": 2.18, "grad_norm": 0.6571775078773499, "learning_rate": 0.0001049985342057548, "loss": 1.6886, "step": 65395 }, { "epoch": 2.18, "grad_norm": 0.6577553153038025, "learning_rate": 0.00010499059144692545, "loss": 1.6621, "step": 65396 }, { "epoch": 2.18, "grad_norm": 0.6541407108306885, "learning_rate": 0.00010498264892480872, "loss": 1.7117, "step": 65397 }, { "epoch": 2.18, "grad_norm": 0.6727221012115479, "learning_rate": 0.0001049747066394144, "loss": 1.7584, "step": 65398 }, { "epoch": 2.18, "grad_norm": 0.654668927192688, "learning_rate": 0.00010496676459075194, "loss": 1.737, "step": 65399 }, { "epoch": 2.18, "grad_norm": 0.6491138935089111, "learning_rate": 0.00010495882277883121, "loss": 1.7619, "step": 65400 }, { "epoch": 2.18, "grad_norm": 0.6640270352363586, "learning_rate": 0.00010495088120366162, "loss": 1.7417, "step": 65401 }, { "epoch": 2.18, "grad_norm": 0.6583123207092285, "learning_rate": 0.00010494293986525303, "loss": 1.7921, "step": 65402 }, { "epoch": 2.18, "grad_norm": 0.6513846516609192, "learning_rate": 0.00010493499876361497, "loss": 1.6997, "step": 65403 }, { "epoch": 2.18, "grad_norm": 0.6384277939796448, "learning_rate": 0.00010492705789875696, "loss": 1.6882, "step": 65404 }, { "epoch": 2.18, "grad_norm": 0.670173704624176, "learning_rate": 0.00010491911727068888, "loss": 1.6965, "step": 65405 }, { "epoch": 2.18, "grad_norm": 0.6416956186294556, "learning_rate": 0.00010491117687942014, "loss": 1.6847, "step": 65406 }, { "epoch": 2.18, "grad_norm": 0.6396077275276184, "learning_rate": 0.00010490323672496062, "loss": 1.7017, "step": 65407 }, { "epoch": 2.18, "grad_norm": 0.6478593349456787, "learning_rate": 0.00010489529680731979, "loss": 1.6885, "step": 65408 }, { "epoch": 2.18, "grad_norm": 0.6476489305496216, "learning_rate": 0.00010488735712650721, "loss": 1.6625, "step": 65409 }, { "epoch": 2.18, "grad_norm": 0.6758708357810974, "learning_rate": 0.00010487941768253275, "loss": 1.762, "step": 65410 }, { "epoch": 2.18, "grad_norm": 0.6589094400405884, "learning_rate": 0.0001048714784754058, "loss": 1.744, "step": 65411 }, { "epoch": 2.18, "grad_norm": 0.6310391426086426, "learning_rate": 0.00010486353950513625, "loss": 1.7063, "step": 65412 }, { "epoch": 2.18, "grad_norm": 0.6279330849647522, "learning_rate": 0.00010485560077173353, "loss": 1.6895, "step": 65413 }, { "epoch": 2.18, "grad_norm": 0.6699091196060181, "learning_rate": 0.00010484766227520742, "loss": 1.6533, "step": 65414 }, { "epoch": 2.18, "grad_norm": 0.6507762670516968, "learning_rate": 0.00010483972401556753, "loss": 1.6882, "step": 65415 }, { "epoch": 2.18, "grad_norm": 0.6536126732826233, "learning_rate": 0.00010483178599282333, "loss": 1.7687, "step": 65416 }, { "epoch": 2.18, "grad_norm": 0.6679401397705078, "learning_rate": 0.00010482384820698468, "loss": 1.6729, "step": 65417 }, { "epoch": 2.18, "grad_norm": 0.6379944086074829, "learning_rate": 0.00010481591065806104, "loss": 1.7017, "step": 65418 }, { "epoch": 2.18, "grad_norm": 0.6314027905464172, "learning_rate": 0.00010480797334606224, "loss": 1.6399, "step": 65419 }, { "epoch": 2.18, "grad_norm": 0.6497007012367249, "learning_rate": 0.00010480003627099764, "loss": 1.7092, "step": 65420 }, { "epoch": 2.18, "grad_norm": 0.6345862746238708, "learning_rate": 0.00010479209943287718, "loss": 1.7199, "step": 65421 }, { "epoch": 2.18, "grad_norm": 0.6513299345970154, "learning_rate": 0.00010478416283171035, "loss": 1.6918, "step": 65422 }, { "epoch": 2.18, "grad_norm": 0.6437274217605591, "learning_rate": 0.0001047762264675067, "loss": 1.7198, "step": 65423 }, { "epoch": 2.18, "grad_norm": 0.6413167119026184, "learning_rate": 0.00010476829034027602, "loss": 1.6511, "step": 65424 }, { "epoch": 2.18, "grad_norm": 0.6429770588874817, "learning_rate": 0.00010476035445002787, "loss": 1.6283, "step": 65425 }, { "epoch": 2.18, "grad_norm": 0.6636187434196472, "learning_rate": 0.00010475241879677179, "loss": 1.7408, "step": 65426 }, { "epoch": 2.18, "grad_norm": 0.6451936960220337, "learning_rate": 0.00010474448338051748, "loss": 1.6482, "step": 65427 }, { "epoch": 2.18, "grad_norm": 0.6566078662872314, "learning_rate": 0.00010473654820127473, "loss": 1.7576, "step": 65428 }, { "epoch": 2.18, "grad_norm": 0.6396980285644531, "learning_rate": 0.00010472861325905301, "loss": 1.7082, "step": 65429 }, { "epoch": 2.18, "grad_norm": 0.6510742902755737, "learning_rate": 0.00010472067855386192, "loss": 1.6886, "step": 65430 }, { "epoch": 2.18, "grad_norm": 0.6772415637969971, "learning_rate": 0.00010471274408571121, "loss": 1.7085, "step": 65431 }, { "epoch": 2.18, "grad_norm": 0.6573366522789001, "learning_rate": 0.00010470480985461049, "loss": 1.6848, "step": 65432 }, { "epoch": 2.18, "grad_norm": 0.6693254709243774, "learning_rate": 0.00010469687586056923, "loss": 1.6904, "step": 65433 }, { "epoch": 2.18, "grad_norm": 0.6322686672210693, "learning_rate": 0.00010468894210359721, "loss": 1.7312, "step": 65434 }, { "epoch": 2.18, "grad_norm": 0.6603307723999023, "learning_rate": 0.0001046810085837042, "loss": 1.7272, "step": 65435 }, { "epoch": 2.18, "grad_norm": 0.6407734751701355, "learning_rate": 0.00010467307530089949, "loss": 1.7252, "step": 65436 }, { "epoch": 2.18, "grad_norm": 0.6562560200691223, "learning_rate": 0.00010466514225519291, "loss": 1.7473, "step": 65437 }, { "epoch": 2.18, "grad_norm": 0.6547955274581909, "learning_rate": 0.00010465720944659414, "loss": 1.8219, "step": 65438 }, { "epoch": 2.18, "grad_norm": 0.6390478014945984, "learning_rate": 0.00010464927687511271, "loss": 1.6531, "step": 65439 }, { "epoch": 2.18, "grad_norm": 0.6465243697166443, "learning_rate": 0.00010464134454075822, "loss": 1.7588, "step": 65440 }, { "epoch": 2.18, "grad_norm": 0.6381946206092834, "learning_rate": 0.00010463341244354033, "loss": 1.696, "step": 65441 }, { "epoch": 2.18, "grad_norm": 0.6973491311073303, "learning_rate": 0.00010462548058346892, "loss": 1.784, "step": 65442 }, { "epoch": 2.18, "grad_norm": 0.6615398526191711, "learning_rate": 0.00010461754896055315, "loss": 1.6718, "step": 65443 }, { "epoch": 2.18, "grad_norm": 0.6483706831932068, "learning_rate": 0.00010460961757480292, "loss": 1.7162, "step": 65444 }, { "epoch": 2.18, "grad_norm": 0.6304868459701538, "learning_rate": 0.0001046016864262279, "loss": 1.7483, "step": 65445 }, { "epoch": 2.18, "grad_norm": 0.6459845900535583, "learning_rate": 0.00010459375551483764, "loss": 1.6908, "step": 65446 }, { "epoch": 2.18, "grad_norm": 0.6365971565246582, "learning_rate": 0.00010458582484064169, "loss": 1.714, "step": 65447 }, { "epoch": 2.18, "grad_norm": 0.6580587029457092, "learning_rate": 0.00010457789440364975, "loss": 1.6604, "step": 65448 }, { "epoch": 2.18, "grad_norm": 0.6302230358123779, "learning_rate": 0.00010456996420387163, "loss": 1.6815, "step": 65449 }, { "epoch": 2.18, "grad_norm": 0.6494223475456238, "learning_rate": 0.00010456203424131659, "loss": 1.7792, "step": 65450 }, { "epoch": 2.18, "grad_norm": 0.6614875197410583, "learning_rate": 0.00010455410451599445, "loss": 1.6557, "step": 65451 }, { "epoch": 2.18, "grad_norm": 0.6625015735626221, "learning_rate": 0.00010454617502791493, "loss": 1.7632, "step": 65452 }, { "epoch": 2.18, "grad_norm": 0.6495508551597595, "learning_rate": 0.00010453824577708757, "loss": 1.7249, "step": 65453 }, { "epoch": 2.18, "grad_norm": 0.6484564542770386, "learning_rate": 0.00010453031676352188, "loss": 1.6493, "step": 65454 }, { "epoch": 2.18, "grad_norm": 0.6655233502388, "learning_rate": 0.00010452238798722766, "loss": 1.6242, "step": 65455 }, { "epoch": 2.18, "grad_norm": 0.6449052691459656, "learning_rate": 0.00010451445944821447, "loss": 1.6904, "step": 65456 }, { "epoch": 2.18, "grad_norm": 0.6519249677658081, "learning_rate": 0.0001045065311464918, "loss": 1.6443, "step": 65457 }, { "epoch": 2.18, "grad_norm": 0.6662757396697998, "learning_rate": 0.00010449860308206945, "loss": 1.6964, "step": 65458 }, { "epoch": 2.18, "grad_norm": 0.6339479684829712, "learning_rate": 0.00010449067525495702, "loss": 1.6812, "step": 65459 }, { "epoch": 2.18, "grad_norm": 0.664459228515625, "learning_rate": 0.00010448274766516416, "loss": 1.7569, "step": 65460 }, { "epoch": 2.18, "grad_norm": 0.6638408303260803, "learning_rate": 0.0001044748203127003, "loss": 1.6961, "step": 65461 }, { "epoch": 2.18, "grad_norm": 0.6775611042976379, "learning_rate": 0.00010446689319757532, "loss": 1.7291, "step": 65462 }, { "epoch": 2.18, "grad_norm": 0.6311269402503967, "learning_rate": 0.00010445896631979871, "loss": 1.7046, "step": 65463 }, { "epoch": 2.18, "grad_norm": 0.6591721177101135, "learning_rate": 0.00010445103967938003, "loss": 1.728, "step": 65464 }, { "epoch": 2.18, "grad_norm": 0.653311014175415, "learning_rate": 0.00010444311327632905, "loss": 1.7401, "step": 65465 }, { "epoch": 2.18, "grad_norm": 0.6276276111602783, "learning_rate": 0.00010443518711065522, "loss": 1.8092, "step": 65466 }, { "epoch": 2.18, "grad_norm": 0.6279314160346985, "learning_rate": 0.00010442726118236836, "loss": 1.705, "step": 65467 }, { "epoch": 2.18, "grad_norm": 0.6268622279167175, "learning_rate": 0.00010441933549147788, "loss": 1.7037, "step": 65468 }, { "epoch": 2.18, "grad_norm": 0.6356616020202637, "learning_rate": 0.00010441141003799362, "loss": 1.745, "step": 65469 }, { "epoch": 2.18, "grad_norm": 0.6650793552398682, "learning_rate": 0.00010440348482192508, "loss": 1.6718, "step": 65470 }, { "epoch": 2.18, "grad_norm": 0.6310399770736694, "learning_rate": 0.00010439555984328178, "loss": 1.6801, "step": 65471 }, { "epoch": 2.18, "grad_norm": 0.6582794189453125, "learning_rate": 0.0001043876351020736, "loss": 1.6916, "step": 65472 }, { "epoch": 2.18, "grad_norm": 0.6266674399375916, "learning_rate": 0.00010437971059830987, "loss": 1.6758, "step": 65473 }, { "epoch": 2.18, "grad_norm": 0.6434153914451599, "learning_rate": 0.00010437178633200044, "loss": 1.7798, "step": 65474 }, { "epoch": 2.18, "grad_norm": 0.6325783729553223, "learning_rate": 0.00010436386230315486, "loss": 1.6693, "step": 65475 }, { "epoch": 2.18, "grad_norm": 0.6475895047187805, "learning_rate": 0.00010435593851178261, "loss": 1.6921, "step": 65476 }, { "epoch": 2.18, "grad_norm": 0.663078248500824, "learning_rate": 0.00010434801495789354, "loss": 1.6759, "step": 65477 }, { "epoch": 2.18, "grad_norm": 0.6723420023918152, "learning_rate": 0.00010434009164149701, "loss": 1.7512, "step": 65478 }, { "epoch": 2.18, "grad_norm": 0.6412007808685303, "learning_rate": 0.00010433216856260291, "loss": 1.7489, "step": 65479 }, { "epoch": 2.18, "grad_norm": 0.6519474983215332, "learning_rate": 0.00010432424572122062, "loss": 1.6584, "step": 65480 }, { "epoch": 2.18, "grad_norm": 0.6385083794593811, "learning_rate": 0.00010431632311736, "loss": 1.6425, "step": 65481 }, { "epoch": 2.18, "grad_norm": 0.6401997208595276, "learning_rate": 0.00010430840075103051, "loss": 1.6564, "step": 65482 }, { "epoch": 2.18, "grad_norm": 0.6597621440887451, "learning_rate": 0.00010430047862224167, "loss": 1.7468, "step": 65483 }, { "epoch": 2.18, "grad_norm": 0.6986271739006042, "learning_rate": 0.00010429255673100331, "loss": 1.6708, "step": 65484 }, { "epoch": 2.18, "grad_norm": 0.6366750597953796, "learning_rate": 0.00010428463507732499, "loss": 1.7302, "step": 65485 }, { "epoch": 2.18, "grad_norm": 0.6437788605690002, "learning_rate": 0.00010427671366121613, "loss": 1.755, "step": 65486 }, { "epoch": 2.18, "grad_norm": 0.6626904010772705, "learning_rate": 0.00010426879248268653, "loss": 1.7404, "step": 65487 }, { "epoch": 2.18, "grad_norm": 0.6799260973930359, "learning_rate": 0.00010426087154174587, "loss": 1.7279, "step": 65488 }, { "epoch": 2.18, "grad_norm": 0.6579970121383667, "learning_rate": 0.00010425295083840369, "loss": 1.7381, "step": 65489 }, { "epoch": 2.18, "grad_norm": 0.6338252425193787, "learning_rate": 0.00010424503037266948, "loss": 1.6827, "step": 65490 }, { "epoch": 2.18, "grad_norm": 0.6313182711601257, "learning_rate": 0.00010423711014455306, "loss": 1.6251, "step": 65491 }, { "epoch": 2.18, "grad_norm": 0.6387666463851929, "learning_rate": 0.00010422919015406393, "loss": 1.6926, "step": 65492 }, { "epoch": 2.18, "grad_norm": 0.649063229560852, "learning_rate": 0.00010422127040121163, "loss": 1.7147, "step": 65493 }, { "epoch": 2.18, "grad_norm": 0.636932909488678, "learning_rate": 0.00010421335088600584, "loss": 1.723, "step": 65494 }, { "epoch": 2.18, "grad_norm": 0.6534720063209534, "learning_rate": 0.00010420543160845641, "loss": 1.6426, "step": 65495 }, { "epoch": 2.18, "grad_norm": 0.6378759741783142, "learning_rate": 0.00010419751256857254, "loss": 1.664, "step": 65496 }, { "epoch": 2.18, "grad_norm": 0.6554065942764282, "learning_rate": 0.00010418959376636404, "loss": 1.6957, "step": 65497 }, { "epoch": 2.18, "grad_norm": 0.6766220331192017, "learning_rate": 0.00010418167520184062, "loss": 1.744, "step": 65498 }, { "epoch": 2.18, "grad_norm": 0.6839724183082581, "learning_rate": 0.0001041737568750118, "loss": 1.7387, "step": 65499 }, { "epoch": 2.18, "grad_norm": 0.674439549446106, "learning_rate": 0.00010416583878588709, "loss": 1.6662, "step": 65500 }, { "epoch": 2.18, "grad_norm": 0.6525957584381104, "learning_rate": 0.00010415792093447617, "loss": 1.5942, "step": 65501 }, { "epoch": 2.18, "grad_norm": 0.6379313468933105, "learning_rate": 0.00010415000332078889, "loss": 1.714, "step": 65502 }, { "epoch": 2.18, "grad_norm": 0.6367925405502319, "learning_rate": 0.00010414208594483448, "loss": 1.7134, "step": 65503 }, { "epoch": 2.18, "grad_norm": 0.6650841236114502, "learning_rate": 0.00010413416880662268, "loss": 1.7516, "step": 65504 }, { "epoch": 2.18, "grad_norm": 0.6398169994354248, "learning_rate": 0.00010412625190616325, "loss": 1.7181, "step": 65505 }, { "epoch": 2.18, "grad_norm": 0.6634626984596252, "learning_rate": 0.00010411833524346569, "loss": 1.7631, "step": 65506 }, { "epoch": 2.18, "grad_norm": 0.6399111747741699, "learning_rate": 0.00010411041881853949, "loss": 1.6815, "step": 65507 }, { "epoch": 2.18, "grad_norm": 0.6438046097755432, "learning_rate": 0.0001041025026313944, "loss": 1.669, "step": 65508 }, { "epoch": 2.18, "grad_norm": 0.6557700634002686, "learning_rate": 0.00010409458668204013, "loss": 1.7686, "step": 65509 }, { "epoch": 2.18, "grad_norm": 0.644952654838562, "learning_rate": 0.00010408667097048613, "loss": 1.6877, "step": 65510 }, { "epoch": 2.18, "grad_norm": 0.6346755027770996, "learning_rate": 0.00010407875549674198, "loss": 1.6574, "step": 65511 }, { "epoch": 2.18, "grad_norm": 0.644494891166687, "learning_rate": 0.00010407084026081741, "loss": 1.7708, "step": 65512 }, { "epoch": 2.18, "grad_norm": 0.6362259387969971, "learning_rate": 0.00010406292526272201, "loss": 1.6442, "step": 65513 }, { "epoch": 2.18, "grad_norm": 0.6624689698219299, "learning_rate": 0.00010405501050246522, "loss": 1.7831, "step": 65514 }, { "epoch": 2.18, "grad_norm": 0.6488574147224426, "learning_rate": 0.00010404709598005687, "loss": 1.6413, "step": 65515 }, { "epoch": 2.18, "grad_norm": 0.6479779481887817, "learning_rate": 0.00010403918169550639, "loss": 1.6708, "step": 65516 }, { "epoch": 2.18, "grad_norm": 0.6853601932525635, "learning_rate": 0.00010403126764882356, "loss": 1.7404, "step": 65517 }, { "epoch": 2.18, "grad_norm": 0.6646432876586914, "learning_rate": 0.00010402335384001784, "loss": 1.7181, "step": 65518 }, { "epoch": 2.18, "grad_norm": 0.6341077089309692, "learning_rate": 0.00010401544026909894, "loss": 1.7159, "step": 65519 }, { "epoch": 2.18, "grad_norm": 0.6522809863090515, "learning_rate": 0.00010400752693607645, "loss": 1.66, "step": 65520 }, { "epoch": 2.18, "grad_norm": 0.6801909804344177, "learning_rate": 0.00010399961384095981, "loss": 1.6955, "step": 65521 }, { "epoch": 2.18, "grad_norm": 0.624992847442627, "learning_rate": 0.00010399170098375891, "loss": 1.6114, "step": 65522 }, { "epoch": 2.18, "grad_norm": 0.6510463356971741, "learning_rate": 0.00010398378836448308, "loss": 1.7207, "step": 65523 }, { "epoch": 2.18, "grad_norm": 0.6354666948318481, "learning_rate": 0.00010397587598314212, "loss": 1.6473, "step": 65524 }, { "epoch": 2.18, "grad_norm": 0.6411395072937012, "learning_rate": 0.0001039679638397456, "loss": 1.7043, "step": 65525 }, { "epoch": 2.18, "grad_norm": 0.6595699191093445, "learning_rate": 0.000103960051934303, "loss": 1.7168, "step": 65526 }, { "epoch": 2.18, "grad_norm": 0.6452122330665588, "learning_rate": 0.00010395214026682407, "loss": 1.6638, "step": 65527 }, { "epoch": 2.18, "grad_norm": 0.638161301612854, "learning_rate": 0.00010394422883731826, "loss": 1.7184, "step": 65528 }, { "epoch": 2.18, "grad_norm": 0.640616774559021, "learning_rate": 0.00010393631764579537, "loss": 1.8046, "step": 65529 }, { "epoch": 2.18, "grad_norm": 0.7869766354560852, "learning_rate": 0.00010392840669226482, "loss": 1.6732, "step": 65530 }, { "epoch": 2.18, "grad_norm": 0.6284933090209961, "learning_rate": 0.00010392049597673641, "loss": 1.7436, "step": 65531 }, { "epoch": 2.18, "grad_norm": 0.639910101890564, "learning_rate": 0.00010391258549921959, "loss": 1.7362, "step": 65532 }, { "epoch": 2.18, "grad_norm": 0.6382462978363037, "learning_rate": 0.00010390467525972394, "loss": 1.5975, "step": 65533 }, { "epoch": 2.18, "grad_norm": 0.6502986550331116, "learning_rate": 0.00010389676525825917, "loss": 1.6288, "step": 65534 }, { "epoch": 2.18, "grad_norm": 0.653445839881897, "learning_rate": 0.00010388885549483477, "loss": 1.6851, "step": 65535 }, { "epoch": 2.18, "grad_norm": 0.6322970986366272, "learning_rate": 0.00010388094596946051, "loss": 1.6897, "step": 65536 }, { "epoch": 2.18, "grad_norm": 0.6577155590057373, "learning_rate": 0.00010387303668214575, "loss": 1.7216, "step": 65537 }, { "epoch": 2.18, "grad_norm": 0.6711282730102539, "learning_rate": 0.00010386512763290035, "loss": 1.761, "step": 65538 }, { "epoch": 2.18, "grad_norm": 0.6243945956230164, "learning_rate": 0.00010385721882173376, "loss": 1.7382, "step": 65539 }, { "epoch": 2.18, "grad_norm": 0.651974618434906, "learning_rate": 0.00010384931024865554, "loss": 1.7226, "step": 65540 }, { "epoch": 2.18, "grad_norm": 0.6070218682289124, "learning_rate": 0.00010384140191367542, "loss": 1.6712, "step": 65541 }, { "epoch": 2.18, "grad_norm": 0.6557172536849976, "learning_rate": 0.00010383349381680298, "loss": 1.7515, "step": 65542 }, { "epoch": 2.18, "grad_norm": 0.6551075577735901, "learning_rate": 0.00010382558595804763, "loss": 1.7353, "step": 65543 }, { "epoch": 2.18, "grad_norm": 0.6565145254135132, "learning_rate": 0.00010381767833741913, "loss": 1.6765, "step": 65544 }, { "epoch": 2.18, "grad_norm": 0.6422243118286133, "learning_rate": 0.00010380977095492717, "loss": 1.6845, "step": 65545 }, { "epoch": 2.18, "grad_norm": 0.643130898475647, "learning_rate": 0.00010380186381058126, "loss": 1.6849, "step": 65546 }, { "epoch": 2.18, "grad_norm": 0.6513677835464478, "learning_rate": 0.00010379395690439086, "loss": 1.7456, "step": 65547 }, { "epoch": 2.18, "grad_norm": 0.645696759223938, "learning_rate": 0.00010378605023636575, "loss": 1.7142, "step": 65548 }, { "epoch": 2.18, "grad_norm": 0.6494081020355225, "learning_rate": 0.00010377814380651553, "loss": 1.7449, "step": 65549 }, { "epoch": 2.18, "grad_norm": 0.646439790725708, "learning_rate": 0.00010377023761484958, "loss": 1.7099, "step": 65550 }, { "epoch": 2.18, "grad_norm": 0.6411290168762207, "learning_rate": 0.00010376233166137766, "loss": 1.7339, "step": 65551 }, { "epoch": 2.18, "grad_norm": 0.6401147842407227, "learning_rate": 0.00010375442594610957, "loss": 1.686, "step": 65552 }, { "epoch": 2.18, "grad_norm": 0.6302374601364136, "learning_rate": 0.00010374652046905448, "loss": 1.7173, "step": 65553 }, { "epoch": 2.18, "grad_norm": 0.6368882656097412, "learning_rate": 0.00010373861523022221, "loss": 1.7931, "step": 65554 }, { "epoch": 2.18, "grad_norm": 0.6307058930397034, "learning_rate": 0.00010373071022962244, "loss": 1.6335, "step": 65555 }, { "epoch": 2.18, "grad_norm": 0.6520921587944031, "learning_rate": 0.00010372280546726466, "loss": 1.6758, "step": 65556 }, { "epoch": 2.18, "grad_norm": 0.6481490135192871, "learning_rate": 0.00010371490094315839, "loss": 1.6537, "step": 65557 }, { "epoch": 2.18, "grad_norm": 0.6625548601150513, "learning_rate": 0.00010370699665731328, "loss": 1.7291, "step": 65558 }, { "epoch": 2.18, "grad_norm": 0.645476758480072, "learning_rate": 0.00010369909260973917, "loss": 1.6883, "step": 65559 }, { "epoch": 2.18, "grad_norm": 0.6704649925231934, "learning_rate": 0.00010369118880044524, "loss": 1.6937, "step": 65560 }, { "epoch": 2.18, "grad_norm": 0.6438248157501221, "learning_rate": 0.00010368328522944128, "loss": 1.7312, "step": 65561 }, { "epoch": 2.18, "grad_norm": 0.6321197748184204, "learning_rate": 0.000103675381896737, "loss": 1.6565, "step": 65562 }, { "epoch": 2.18, "grad_norm": 0.6615287661552429, "learning_rate": 0.00010366747880234186, "loss": 1.6615, "step": 65563 }, { "epoch": 2.18, "grad_norm": 0.6359157562255859, "learning_rate": 0.00010365957594626538, "loss": 1.6762, "step": 65564 }, { "epoch": 2.18, "grad_norm": 0.6358926296234131, "learning_rate": 0.00010365167332851725, "loss": 1.7408, "step": 65565 }, { "epoch": 2.18, "grad_norm": 0.6473016142845154, "learning_rate": 0.00010364377094910724, "loss": 1.6346, "step": 65566 }, { "epoch": 2.18, "grad_norm": 0.6519724726676941, "learning_rate": 0.00010363586880804457, "loss": 1.7235, "step": 65567 }, { "epoch": 2.18, "grad_norm": 0.6480927467346191, "learning_rate": 0.00010362796690533904, "loss": 1.6693, "step": 65568 }, { "epoch": 2.18, "grad_norm": 0.6684699654579163, "learning_rate": 0.0001036200652410003, "loss": 1.7858, "step": 65569 }, { "epoch": 2.18, "grad_norm": 0.657720148563385, "learning_rate": 0.00010361216381503788, "loss": 1.6362, "step": 65570 }, { "epoch": 2.18, "grad_norm": 0.6398521661758423, "learning_rate": 0.00010360426262746125, "loss": 1.6802, "step": 65571 }, { "epoch": 2.18, "grad_norm": 0.6514142751693726, "learning_rate": 0.00010359636167828021, "loss": 1.6573, "step": 65572 }, { "epoch": 2.18, "grad_norm": 0.659706711769104, "learning_rate": 0.00010358846096750422, "loss": 1.6939, "step": 65573 }, { "epoch": 2.18, "grad_norm": 0.6674554347991943, "learning_rate": 0.00010358056049514283, "loss": 1.8255, "step": 65574 }, { "epoch": 2.18, "grad_norm": 0.6490519046783447, "learning_rate": 0.0001035726602612057, "loss": 1.7209, "step": 65575 }, { "epoch": 2.18, "grad_norm": 0.6573572754859924, "learning_rate": 0.00010356476026570252, "loss": 1.6881, "step": 65576 }, { "epoch": 2.18, "grad_norm": 0.6319682002067566, "learning_rate": 0.00010355686050864276, "loss": 1.7629, "step": 65577 }, { "epoch": 2.18, "grad_norm": 0.6408226490020752, "learning_rate": 0.00010354896099003594, "loss": 1.74, "step": 65578 }, { "epoch": 2.18, "grad_norm": 0.6432692408561707, "learning_rate": 0.00010354106170989185, "loss": 1.6459, "step": 65579 }, { "epoch": 2.18, "grad_norm": 0.6154656410217285, "learning_rate": 0.00010353316266821993, "loss": 1.6858, "step": 65580 }, { "epoch": 2.18, "grad_norm": 0.6772193908691406, "learning_rate": 0.0001035252638650297, "loss": 1.7214, "step": 65581 }, { "epoch": 2.18, "grad_norm": 0.6474997997283936, "learning_rate": 0.00010351736530033098, "loss": 1.647, "step": 65582 }, { "epoch": 2.18, "grad_norm": 0.6336764693260193, "learning_rate": 0.00010350946697413314, "loss": 1.7642, "step": 65583 }, { "epoch": 2.18, "grad_norm": 0.6513354182243347, "learning_rate": 0.00010350156888644591, "loss": 1.6992, "step": 65584 }, { "epoch": 2.18, "grad_norm": 0.6519773006439209, "learning_rate": 0.00010349367103727874, "loss": 1.662, "step": 65585 }, { "epoch": 2.18, "grad_norm": 0.6529841423034668, "learning_rate": 0.00010348577342664142, "loss": 1.7185, "step": 65586 }, { "epoch": 2.18, "grad_norm": 0.6458284854888916, "learning_rate": 0.0001034778760545434, "loss": 1.7323, "step": 65587 }, { "epoch": 2.18, "grad_norm": 0.6868685483932495, "learning_rate": 0.00010346997892099417, "loss": 1.7696, "step": 65588 }, { "epoch": 2.18, "grad_norm": 0.6433203220367432, "learning_rate": 0.00010346208202600356, "loss": 1.6449, "step": 65589 }, { "epoch": 2.18, "grad_norm": 0.6546640396118164, "learning_rate": 0.0001034541853695809, "loss": 1.783, "step": 65590 }, { "epoch": 2.18, "grad_norm": 0.6643638014793396, "learning_rate": 0.000103446288951736, "loss": 1.7595, "step": 65591 }, { "epoch": 2.18, "grad_norm": 0.662422239780426, "learning_rate": 0.00010343839277247835, "loss": 1.7088, "step": 65592 }, { "epoch": 2.18, "grad_norm": 0.6359772682189941, "learning_rate": 0.00010343049683181744, "loss": 1.7308, "step": 65593 }, { "epoch": 2.18, "grad_norm": 0.6476801633834839, "learning_rate": 0.00010342260112976303, "loss": 1.6808, "step": 65594 }, { "epoch": 2.18, "grad_norm": 0.643366813659668, "learning_rate": 0.00010341470566632452, "loss": 1.7257, "step": 65595 }, { "epoch": 2.18, "grad_norm": 0.6495909690856934, "learning_rate": 0.0001034068104415117, "loss": 1.7173, "step": 65596 }, { "epoch": 2.18, "grad_norm": 0.6584223508834839, "learning_rate": 0.00010339891545533392, "loss": 1.6435, "step": 65597 }, { "epoch": 2.18, "grad_norm": 0.6367965936660767, "learning_rate": 0.000103391020707801, "loss": 1.7372, "step": 65598 }, { "epoch": 2.18, "grad_norm": 0.6154040098190308, "learning_rate": 0.00010338312619892243, "loss": 1.7167, "step": 65599 }, { "epoch": 2.18, "grad_norm": 0.66399747133255, "learning_rate": 0.00010337523192870766, "loss": 1.7365, "step": 65600 }, { "epoch": 2.18, "grad_norm": 0.6278302073478699, "learning_rate": 0.00010336733789716649, "loss": 1.6988, "step": 65601 }, { "epoch": 2.18, "grad_norm": 0.6301536560058594, "learning_rate": 0.00010335944410430839, "loss": 1.721, "step": 65602 }, { "epoch": 2.18, "grad_norm": 0.6390878558158875, "learning_rate": 0.00010335155055014288, "loss": 1.7672, "step": 65603 }, { "epoch": 2.18, "grad_norm": 0.6706554293632507, "learning_rate": 0.0001033436572346796, "loss": 1.7733, "step": 65604 }, { "epoch": 2.18, "grad_norm": 0.648996889591217, "learning_rate": 0.00010333576415792823, "loss": 1.7508, "step": 65605 }, { "epoch": 2.18, "grad_norm": 0.6166841387748718, "learning_rate": 0.00010332787131989825, "loss": 1.6311, "step": 65606 }, { "epoch": 2.18, "grad_norm": 0.6423142552375793, "learning_rate": 0.0001033199787205992, "loss": 1.7192, "step": 65607 }, { "epoch": 2.18, "grad_norm": 0.6661468744277954, "learning_rate": 0.0001033120863600408, "loss": 1.7604, "step": 65608 }, { "epoch": 2.18, "grad_norm": 0.6462807059288025, "learning_rate": 0.00010330419423823253, "loss": 1.7365, "step": 65609 }, { "epoch": 2.18, "grad_norm": 0.6601662635803223, "learning_rate": 0.00010329630235518391, "loss": 1.6659, "step": 65610 }, { "epoch": 2.18, "grad_norm": 0.6530010104179382, "learning_rate": 0.00010328841071090458, "loss": 1.708, "step": 65611 }, { "epoch": 2.18, "grad_norm": 0.6393709778785706, "learning_rate": 0.00010328051930540432, "loss": 1.7255, "step": 65612 }, { "epoch": 2.18, "grad_norm": 0.6394449472427368, "learning_rate": 0.00010327262813869234, "loss": 1.7211, "step": 65613 }, { "epoch": 2.18, "grad_norm": 0.6417199969291687, "learning_rate": 0.00010326473721077843, "loss": 1.6992, "step": 65614 }, { "epoch": 2.18, "grad_norm": 0.6557499766349792, "learning_rate": 0.0001032568465216722, "loss": 1.7599, "step": 65615 }, { "epoch": 2.18, "grad_norm": 0.6359962224960327, "learning_rate": 0.0001032489560713832, "loss": 1.7098, "step": 65616 }, { "epoch": 2.18, "grad_norm": 0.6372656226158142, "learning_rate": 0.00010324106585992087, "loss": 1.7474, "step": 65617 }, { "epoch": 2.18, "grad_norm": 0.6434640288352966, "learning_rate": 0.0001032331758872949, "loss": 1.6473, "step": 65618 }, { "epoch": 2.18, "grad_norm": 0.6527852416038513, "learning_rate": 0.00010322528615351503, "loss": 1.7237, "step": 65619 }, { "epoch": 2.18, "grad_norm": 0.649539053440094, "learning_rate": 0.0001032173966585905, "loss": 1.6413, "step": 65620 }, { "epoch": 2.18, "grad_norm": 0.64189612865448, "learning_rate": 0.00010320950740253104, "loss": 1.6919, "step": 65621 }, { "epoch": 2.18, "grad_norm": 0.6507749557495117, "learning_rate": 0.00010320161838534635, "loss": 1.6466, "step": 65622 }, { "epoch": 2.18, "grad_norm": 0.6604596972465515, "learning_rate": 0.00010319372960704588, "loss": 1.7772, "step": 65623 }, { "epoch": 2.18, "grad_norm": 0.6355461478233337, "learning_rate": 0.00010318584106763915, "loss": 1.664, "step": 65624 }, { "epoch": 2.18, "grad_norm": 0.6468254923820496, "learning_rate": 0.00010317795276713582, "loss": 1.6343, "step": 65625 }, { "epoch": 2.18, "grad_norm": 0.6520206332206726, "learning_rate": 0.00010317006470554554, "loss": 1.6714, "step": 65626 }, { "epoch": 2.18, "grad_norm": 0.6444025635719299, "learning_rate": 0.00010316217688287778, "loss": 1.759, "step": 65627 }, { "epoch": 2.18, "grad_norm": 0.6380919814109802, "learning_rate": 0.00010315428929914205, "loss": 1.6892, "step": 65628 }, { "epoch": 2.18, "grad_norm": 0.6474652886390686, "learning_rate": 0.00010314640195434813, "loss": 1.7136, "step": 65629 }, { "epoch": 2.18, "grad_norm": 0.6451388001441956, "learning_rate": 0.00010313851484850544, "loss": 1.7015, "step": 65630 }, { "epoch": 2.18, "grad_norm": 0.6388351917266846, "learning_rate": 0.00010313062798162349, "loss": 1.7119, "step": 65631 }, { "epoch": 2.18, "grad_norm": 0.649103045463562, "learning_rate": 0.00010312274135371205, "loss": 1.6751, "step": 65632 }, { "epoch": 2.18, "grad_norm": 0.6451873183250427, "learning_rate": 0.00010311485496478052, "loss": 1.6829, "step": 65633 }, { "epoch": 2.18, "grad_norm": 0.6490252614021301, "learning_rate": 0.00010310696881483865, "loss": 1.7472, "step": 65634 }, { "epoch": 2.18, "grad_norm": 0.6571379899978638, "learning_rate": 0.00010309908290389577, "loss": 1.759, "step": 65635 }, { "epoch": 2.18, "grad_norm": 0.6444180011749268, "learning_rate": 0.00010309119723196173, "loss": 1.7384, "step": 65636 }, { "epoch": 2.18, "grad_norm": 0.6329737305641174, "learning_rate": 0.00010308331179904594, "loss": 1.7581, "step": 65637 }, { "epoch": 2.18, "grad_norm": 0.6487410068511963, "learning_rate": 0.0001030754266051579, "loss": 1.6621, "step": 65638 }, { "epoch": 2.18, "grad_norm": 0.6218752861022949, "learning_rate": 0.00010306754165030738, "loss": 1.7228, "step": 65639 }, { "epoch": 2.18, "grad_norm": 0.6274771094322205, "learning_rate": 0.00010305965693450372, "loss": 1.7081, "step": 65640 }, { "epoch": 2.18, "grad_norm": 0.6455463767051697, "learning_rate": 0.00010305177245775676, "loss": 1.6827, "step": 65641 }, { "epoch": 2.18, "grad_norm": 0.6153810024261475, "learning_rate": 0.00010304388822007591, "loss": 1.7039, "step": 65642 }, { "epoch": 2.18, "grad_norm": 0.6447104811668396, "learning_rate": 0.00010303600422147065, "loss": 1.7458, "step": 65643 }, { "epoch": 2.18, "grad_norm": 0.6360307931900024, "learning_rate": 0.00010302812046195077, "loss": 1.7321, "step": 65644 }, { "epoch": 2.18, "grad_norm": 0.6522197127342224, "learning_rate": 0.00010302023694152563, "loss": 1.7348, "step": 65645 }, { "epoch": 2.18, "grad_norm": 0.6500458121299744, "learning_rate": 0.000103012353660205, "loss": 1.802, "step": 65646 }, { "epoch": 2.18, "grad_norm": 0.6567806005477905, "learning_rate": 0.00010300447061799827, "loss": 1.7399, "step": 65647 }, { "epoch": 2.18, "grad_norm": 0.6461440920829773, "learning_rate": 0.00010299658781491515, "loss": 1.7268, "step": 65648 }, { "epoch": 2.18, "grad_norm": 0.6273655891418457, "learning_rate": 0.00010298870525096515, "loss": 1.6782, "step": 65649 }, { "epoch": 2.18, "grad_norm": 0.6264531016349792, "learning_rate": 0.00010298082292615775, "loss": 1.6462, "step": 65650 }, { "epoch": 2.18, "grad_norm": 0.6373111605644226, "learning_rate": 0.00010297294084050269, "loss": 1.6526, "step": 65651 }, { "epoch": 2.18, "grad_norm": 0.6221089363098145, "learning_rate": 0.00010296505899400945, "loss": 1.7023, "step": 65652 }, { "epoch": 2.18, "grad_norm": 0.6508859395980835, "learning_rate": 0.00010295717738668749, "loss": 1.7065, "step": 65653 }, { "epoch": 2.18, "grad_norm": 0.6692060828208923, "learning_rate": 0.00010294929601854651, "loss": 1.7111, "step": 65654 }, { "epoch": 2.18, "grad_norm": 0.6439793109893799, "learning_rate": 0.00010294141488959612, "loss": 1.7232, "step": 65655 }, { "epoch": 2.18, "grad_norm": 0.6440397500991821, "learning_rate": 0.00010293353399984585, "loss": 1.6657, "step": 65656 }, { "epoch": 2.18, "grad_norm": 0.6613205671310425, "learning_rate": 0.0001029256533493051, "loss": 1.8092, "step": 65657 }, { "epoch": 2.18, "grad_norm": 0.6398440003395081, "learning_rate": 0.00010291777293798371, "loss": 1.6835, "step": 65658 }, { "epoch": 2.18, "grad_norm": 0.618074357509613, "learning_rate": 0.00010290989276589105, "loss": 1.7175, "step": 65659 }, { "epoch": 2.18, "grad_norm": 0.6599798202514648, "learning_rate": 0.0001029020128330367, "loss": 1.7732, "step": 65660 }, { "epoch": 2.18, "grad_norm": 0.6535094380378723, "learning_rate": 0.00010289413313943022, "loss": 1.6948, "step": 65661 }, { "epoch": 2.18, "grad_norm": 0.6723485589027405, "learning_rate": 0.00010288625368508133, "loss": 1.687, "step": 65662 }, { "epoch": 2.18, "grad_norm": 0.64570552110672, "learning_rate": 0.00010287837446999948, "loss": 1.5847, "step": 65663 }, { "epoch": 2.18, "grad_norm": 0.6316831707954407, "learning_rate": 0.00010287049549419416, "loss": 1.6944, "step": 65664 }, { "epoch": 2.18, "grad_norm": 0.6645311117172241, "learning_rate": 0.0001028626167576751, "loss": 1.6035, "step": 65665 }, { "epoch": 2.18, "grad_norm": 0.6643527150154114, "learning_rate": 0.00010285473826045178, "loss": 1.7046, "step": 65666 }, { "epoch": 2.18, "grad_norm": 0.6572355628013611, "learning_rate": 0.00010284686000253365, "loss": 1.7105, "step": 65667 }, { "epoch": 2.18, "grad_norm": 0.6484959721565247, "learning_rate": 0.00010283898198393042, "loss": 1.7606, "step": 65668 }, { "epoch": 2.18, "grad_norm": 0.6548752784729004, "learning_rate": 0.00010283110420465178, "loss": 1.7049, "step": 65669 }, { "epoch": 2.18, "grad_norm": 0.66965252161026, "learning_rate": 0.00010282322666470692, "loss": 1.7312, "step": 65670 }, { "epoch": 2.18, "grad_norm": 0.6687576174736023, "learning_rate": 0.00010281534936410564, "loss": 1.787, "step": 65671 }, { "epoch": 2.18, "grad_norm": 0.6457635164260864, "learning_rate": 0.00010280747230285753, "loss": 1.7049, "step": 65672 }, { "epoch": 2.18, "grad_norm": 0.6219426989555359, "learning_rate": 0.00010279959548097212, "loss": 1.7147, "step": 65673 }, { "epoch": 2.18, "grad_norm": 0.6444395780563354, "learning_rate": 0.00010279171889845887, "loss": 1.7129, "step": 65674 }, { "epoch": 2.19, "grad_norm": 0.6733295321464539, "learning_rate": 0.0001027838425553274, "loss": 1.6956, "step": 65675 }, { "epoch": 2.19, "grad_norm": 0.6469715237617493, "learning_rate": 0.00010277596645158745, "loss": 1.7102, "step": 65676 }, { "epoch": 2.19, "grad_norm": 0.6423653364181519, "learning_rate": 0.00010276809058724825, "loss": 1.629, "step": 65677 }, { "epoch": 2.19, "grad_norm": 0.657113254070282, "learning_rate": 0.00010276021496231951, "loss": 1.6235, "step": 65678 }, { "epoch": 2.19, "grad_norm": 0.6361027956008911, "learning_rate": 0.00010275233957681092, "loss": 1.6406, "step": 65679 }, { "epoch": 2.19, "grad_norm": 0.6392182111740112, "learning_rate": 0.00010274446443073193, "loss": 1.7535, "step": 65680 }, { "epoch": 2.19, "grad_norm": 0.6607216596603394, "learning_rate": 0.00010273658952409198, "loss": 1.6814, "step": 65681 }, { "epoch": 2.19, "grad_norm": 0.6773547530174255, "learning_rate": 0.00010272871485690076, "loss": 1.714, "step": 65682 }, { "epoch": 2.19, "grad_norm": 0.6721291542053223, "learning_rate": 0.00010272084042916802, "loss": 1.686, "step": 65683 }, { "epoch": 2.19, "grad_norm": 0.6774510145187378, "learning_rate": 0.00010271296624090291, "loss": 1.6923, "step": 65684 }, { "epoch": 2.19, "grad_norm": 0.6914032697677612, "learning_rate": 0.0001027050922921152, "loss": 1.7522, "step": 65685 }, { "epoch": 2.19, "grad_norm": 0.6440914869308472, "learning_rate": 0.0001026972185828145, "loss": 1.6855, "step": 65686 }, { "epoch": 2.19, "grad_norm": 0.6558615565299988, "learning_rate": 0.00010268934511301033, "loss": 1.6499, "step": 65687 }, { "epoch": 2.19, "grad_norm": 0.6772076487541199, "learning_rate": 0.00010268147188271212, "loss": 1.7866, "step": 65688 }, { "epoch": 2.19, "grad_norm": 0.6948404908180237, "learning_rate": 0.0001026735988919296, "loss": 1.6885, "step": 65689 }, { "epoch": 2.19, "grad_norm": 0.6557911038398743, "learning_rate": 0.0001026657261406723, "loss": 1.7056, "step": 65690 }, { "epoch": 2.19, "grad_norm": 0.6407127976417542, "learning_rate": 0.00010265785362894962, "loss": 1.7076, "step": 65691 }, { "epoch": 2.19, "grad_norm": 0.6303101181983948, "learning_rate": 0.00010264998135677122, "loss": 1.6529, "step": 65692 }, { "epoch": 2.19, "grad_norm": 0.6611578464508057, "learning_rate": 0.00010264210932414677, "loss": 1.7434, "step": 65693 }, { "epoch": 2.19, "grad_norm": 0.6376841068267822, "learning_rate": 0.00010263423753108574, "loss": 1.7, "step": 65694 }, { "epoch": 2.19, "grad_norm": 0.664447009563446, "learning_rate": 0.00010262636597759751, "loss": 1.7106, "step": 65695 }, { "epoch": 2.19, "grad_norm": 0.648780882358551, "learning_rate": 0.00010261849466369194, "loss": 1.7732, "step": 65696 }, { "epoch": 2.19, "grad_norm": 0.6399521231651306, "learning_rate": 0.00010261062358937841, "loss": 1.6682, "step": 65697 }, { "epoch": 2.19, "grad_norm": 0.6731547713279724, "learning_rate": 0.00010260275275466643, "loss": 1.7171, "step": 65698 }, { "epoch": 2.19, "grad_norm": 0.6906112432479858, "learning_rate": 0.0001025948821595657, "loss": 1.6851, "step": 65699 }, { "epoch": 2.19, "grad_norm": 0.6321195363998413, "learning_rate": 0.00010258701180408561, "loss": 1.6893, "step": 65700 }, { "epoch": 2.19, "grad_norm": 0.6640874743461609, "learning_rate": 0.00010257914168823586, "loss": 1.7638, "step": 65701 }, { "epoch": 2.19, "grad_norm": 0.6426985859870911, "learning_rate": 0.00010257127181202588, "loss": 1.6898, "step": 65702 }, { "epoch": 2.19, "grad_norm": 0.7567998766899109, "learning_rate": 0.00010256340217546538, "loss": 1.6997, "step": 65703 }, { "epoch": 2.19, "grad_norm": 0.6473933458328247, "learning_rate": 0.00010255553277856382, "loss": 1.6715, "step": 65704 }, { "epoch": 2.19, "grad_norm": 0.6461325287818909, "learning_rate": 0.00010254766362133065, "loss": 1.7419, "step": 65705 }, { "epoch": 2.19, "grad_norm": 0.6675934791564941, "learning_rate": 0.00010253979470377563, "loss": 1.65, "step": 65706 }, { "epoch": 2.19, "grad_norm": 0.6485311388969421, "learning_rate": 0.00010253192602590808, "loss": 1.6872, "step": 65707 }, { "epoch": 2.19, "grad_norm": 0.6340094804763794, "learning_rate": 0.0001025240575877378, "loss": 1.6792, "step": 65708 }, { "epoch": 2.19, "grad_norm": 0.6661874055862427, "learning_rate": 0.0001025161893892742, "loss": 1.766, "step": 65709 }, { "epoch": 2.19, "grad_norm": 0.6575098037719727, "learning_rate": 0.00010250832143052677, "loss": 1.6853, "step": 65710 }, { "epoch": 2.19, "grad_norm": 0.6744932532310486, "learning_rate": 0.00010250045371150523, "loss": 1.7326, "step": 65711 }, { "epoch": 2.19, "grad_norm": 0.634009063243866, "learning_rate": 0.00010249258623221894, "loss": 1.6294, "step": 65712 }, { "epoch": 2.19, "grad_norm": 0.6498371362686157, "learning_rate": 0.00010248471899267765, "loss": 1.6946, "step": 65713 }, { "epoch": 2.19, "grad_norm": 0.6396075487136841, "learning_rate": 0.00010247685199289068, "loss": 1.6835, "step": 65714 }, { "epoch": 2.19, "grad_norm": 0.6409270167350769, "learning_rate": 0.00010246898523286785, "loss": 1.6795, "step": 65715 }, { "epoch": 2.19, "grad_norm": 0.6470938324928284, "learning_rate": 0.00010246111871261855, "loss": 1.7474, "step": 65716 }, { "epoch": 2.19, "grad_norm": 0.6543645262718201, "learning_rate": 0.00010245325243215224, "loss": 1.7425, "step": 65717 }, { "epoch": 2.19, "grad_norm": 0.6621806621551514, "learning_rate": 0.00010244538639147868, "loss": 1.6385, "step": 65718 }, { "epoch": 2.19, "grad_norm": 0.6523528695106506, "learning_rate": 0.0001024375205906073, "loss": 1.756, "step": 65719 }, { "epoch": 2.19, "grad_norm": 0.643821656703949, "learning_rate": 0.00010242965502954756, "loss": 1.7015, "step": 65720 }, { "epoch": 2.19, "grad_norm": 0.6516991853713989, "learning_rate": 0.00010242178970830912, "loss": 1.7512, "step": 65721 }, { "epoch": 2.19, "grad_norm": 0.6619833707809448, "learning_rate": 0.00010241392462690164, "loss": 1.7207, "step": 65722 }, { "epoch": 2.19, "grad_norm": 0.6581994891166687, "learning_rate": 0.00010240605978533453, "loss": 1.6328, "step": 65723 }, { "epoch": 2.19, "grad_norm": 0.6373807787895203, "learning_rate": 0.00010239819518361723, "loss": 1.6839, "step": 65724 }, { "epoch": 2.19, "grad_norm": 0.6588411927223206, "learning_rate": 0.00010239033082175949, "loss": 1.7355, "step": 65725 }, { "epoch": 2.19, "grad_norm": 0.6462858319282532, "learning_rate": 0.00010238246669977081, "loss": 1.6609, "step": 65726 }, { "epoch": 2.19, "grad_norm": 0.6547935009002686, "learning_rate": 0.0001023746028176606, "loss": 1.7195, "step": 65727 }, { "epoch": 2.19, "grad_norm": 0.6497928500175476, "learning_rate": 0.00010236673917543847, "loss": 1.679, "step": 65728 }, { "epoch": 2.19, "grad_norm": 0.6796637773513794, "learning_rate": 0.00010235887577311422, "loss": 1.6428, "step": 65729 }, { "epoch": 2.19, "grad_norm": 0.6762769222259521, "learning_rate": 0.00010235101261069697, "loss": 1.796, "step": 65730 }, { "epoch": 2.19, "grad_norm": 0.6282497048377991, "learning_rate": 0.00010234314968819647, "loss": 1.6932, "step": 65731 }, { "epoch": 2.19, "grad_norm": 0.6514167189598083, "learning_rate": 0.00010233528700562234, "loss": 1.7449, "step": 65732 }, { "epoch": 2.19, "grad_norm": 0.9819656610488892, "learning_rate": 0.00010232742456298406, "loss": 1.7246, "step": 65733 }, { "epoch": 2.19, "grad_norm": 0.6643403172492981, "learning_rate": 0.00010231956236029107, "loss": 1.6784, "step": 65734 }, { "epoch": 2.19, "grad_norm": 0.644036591053009, "learning_rate": 0.00010231170039755301, "loss": 1.6224, "step": 65735 }, { "epoch": 2.19, "grad_norm": 0.619373083114624, "learning_rate": 0.00010230383867477953, "loss": 1.6616, "step": 65736 }, { "epoch": 2.19, "grad_norm": 0.6423523426055908, "learning_rate": 0.00010229597719198001, "loss": 1.6985, "step": 65737 }, { "epoch": 2.19, "grad_norm": 0.6447542905807495, "learning_rate": 0.000102288115949164, "loss": 1.6657, "step": 65738 }, { "epoch": 2.19, "grad_norm": 0.6484699249267578, "learning_rate": 0.00010228025494634115, "loss": 1.693, "step": 65739 }, { "epoch": 2.19, "grad_norm": 0.6593667268753052, "learning_rate": 0.00010227239418352096, "loss": 1.7977, "step": 65740 }, { "epoch": 2.19, "grad_norm": 0.6265245079994202, "learning_rate": 0.00010226453366071284, "loss": 1.7127, "step": 65741 }, { "epoch": 2.19, "grad_norm": 0.65009605884552, "learning_rate": 0.00010225667337792645, "loss": 1.6137, "step": 65742 }, { "epoch": 2.19, "grad_norm": 0.6255070567131042, "learning_rate": 0.00010224881333517139, "loss": 1.6768, "step": 65743 }, { "epoch": 2.19, "grad_norm": 0.6521070003509521, "learning_rate": 0.00010224095353245715, "loss": 1.7446, "step": 65744 }, { "epoch": 2.19, "grad_norm": 0.6423099637031555, "learning_rate": 0.00010223309396979315, "loss": 1.6339, "step": 65745 }, { "epoch": 2.19, "grad_norm": 0.6511532068252563, "learning_rate": 0.00010222523464718918, "loss": 1.6495, "step": 65746 }, { "epoch": 2.19, "grad_norm": 0.6411308646202087, "learning_rate": 0.00010221737556465459, "loss": 1.7303, "step": 65747 }, { "epoch": 2.19, "grad_norm": 0.6280441880226135, "learning_rate": 0.00010220951672219885, "loss": 1.6499, "step": 65748 }, { "epoch": 2.19, "grad_norm": 0.6510460376739502, "learning_rate": 0.00010220165811983176, "loss": 1.7207, "step": 65749 }, { "epoch": 2.19, "grad_norm": 0.6480408310890198, "learning_rate": 0.00010219379975756259, "loss": 1.6777, "step": 65750 }, { "epoch": 2.19, "grad_norm": 0.6601958870887756, "learning_rate": 0.00010218594163540108, "loss": 1.7375, "step": 65751 }, { "epoch": 2.19, "grad_norm": 0.6411158442497253, "learning_rate": 0.00010217808375335664, "loss": 1.7606, "step": 65752 }, { "epoch": 2.19, "grad_norm": 0.6824747323989868, "learning_rate": 0.0001021702261114389, "loss": 1.6848, "step": 65753 }, { "epoch": 2.19, "grad_norm": 0.6429836750030518, "learning_rate": 0.00010216236870965739, "loss": 1.6888, "step": 65754 }, { "epoch": 2.19, "grad_norm": 0.6520289778709412, "learning_rate": 0.0001021545115480215, "loss": 1.7093, "step": 65755 }, { "epoch": 2.19, "grad_norm": 0.6455608010292053, "learning_rate": 0.00010214665462654101, "loss": 1.7506, "step": 65756 }, { "epoch": 2.19, "grad_norm": 0.6343677639961243, "learning_rate": 0.00010213879794522521, "loss": 1.7523, "step": 65757 }, { "epoch": 2.19, "grad_norm": 0.6368318796157837, "learning_rate": 0.00010213094150408387, "loss": 1.6909, "step": 65758 }, { "epoch": 2.19, "grad_norm": 0.6242920756340027, "learning_rate": 0.00010212308530312639, "loss": 1.7291, "step": 65759 }, { "epoch": 2.19, "grad_norm": 0.6435689926147461, "learning_rate": 0.00010211522934236223, "loss": 1.6682, "step": 65760 }, { "epoch": 2.19, "grad_norm": 0.6504405736923218, "learning_rate": 0.00010210737362180112, "loss": 1.7221, "step": 65761 }, { "epoch": 2.19, "grad_norm": 0.6489588022232056, "learning_rate": 0.0001020995181414524, "loss": 1.7379, "step": 65762 }, { "epoch": 2.19, "grad_norm": 0.6758259534835815, "learning_rate": 0.00010209166290132584, "loss": 1.6767, "step": 65763 }, { "epoch": 2.19, "grad_norm": 0.6287974119186401, "learning_rate": 0.00010208380790143071, "loss": 1.7244, "step": 65764 }, { "epoch": 2.19, "grad_norm": 0.662124514579773, "learning_rate": 0.00010207595314177681, "loss": 1.7674, "step": 65765 }, { "epoch": 2.19, "grad_norm": 0.6534469723701477, "learning_rate": 0.00010206809862237349, "loss": 1.703, "step": 65766 }, { "epoch": 2.19, "grad_norm": 0.6629295945167542, "learning_rate": 0.00010206024434323026, "loss": 1.7084, "step": 65767 }, { "epoch": 2.19, "grad_norm": 0.6537965536117554, "learning_rate": 0.00010205239030435682, "loss": 1.6573, "step": 65768 }, { "epoch": 2.19, "grad_norm": 0.6584192514419556, "learning_rate": 0.00010204453650576265, "loss": 1.7213, "step": 65769 }, { "epoch": 2.19, "grad_norm": 0.6273858547210693, "learning_rate": 0.0001020366829474571, "loss": 1.7316, "step": 65770 }, { "epoch": 2.19, "grad_norm": 0.6639775037765503, "learning_rate": 0.00010202882962944988, "loss": 1.6897, "step": 65771 }, { "epoch": 2.19, "grad_norm": 0.6335755586624146, "learning_rate": 0.00010202097655175055, "loss": 1.6915, "step": 65772 }, { "epoch": 2.19, "grad_norm": 0.6511983275413513, "learning_rate": 0.00010201312371436861, "loss": 1.7073, "step": 65773 }, { "epoch": 2.19, "grad_norm": 0.6489986181259155, "learning_rate": 0.00010200527111731348, "loss": 1.6988, "step": 65774 }, { "epoch": 2.19, "grad_norm": 0.6383740901947021, "learning_rate": 0.00010199741876059486, "loss": 1.6099, "step": 65775 }, { "epoch": 2.19, "grad_norm": 0.6383628249168396, "learning_rate": 0.00010198956664422219, "loss": 1.7333, "step": 65776 }, { "epoch": 2.19, "grad_norm": 0.6221656203269958, "learning_rate": 0.00010198171476820491, "loss": 1.7086, "step": 65777 }, { "epoch": 2.19, "grad_norm": 0.6389585733413696, "learning_rate": 0.00010197386313255268, "loss": 1.668, "step": 65778 }, { "epoch": 2.19, "grad_norm": 0.6480564475059509, "learning_rate": 0.00010196601173727507, "loss": 1.6881, "step": 65779 }, { "epoch": 2.19, "grad_norm": 0.6546148657798767, "learning_rate": 0.00010195816058238157, "loss": 1.6549, "step": 65780 }, { "epoch": 2.19, "grad_norm": 0.6567779779434204, "learning_rate": 0.00010195030966788156, "loss": 1.6774, "step": 65781 }, { "epoch": 2.19, "grad_norm": 0.6218969225883484, "learning_rate": 0.00010194245899378481, "loss": 1.7292, "step": 65782 }, { "epoch": 2.19, "grad_norm": 0.6575678586959839, "learning_rate": 0.00010193460856010073, "loss": 1.6415, "step": 65783 }, { "epoch": 2.19, "grad_norm": 0.6639870405197144, "learning_rate": 0.00010192675836683875, "loss": 1.7984, "step": 65784 }, { "epoch": 2.19, "grad_norm": 0.6485878229141235, "learning_rate": 0.0001019189084140085, "loss": 1.6504, "step": 65785 }, { "epoch": 2.19, "grad_norm": 0.6382336020469666, "learning_rate": 0.00010191105870161969, "loss": 1.6678, "step": 65786 }, { "epoch": 2.19, "grad_norm": 0.6442287564277649, "learning_rate": 0.00010190320922968149, "loss": 1.6683, "step": 65787 }, { "epoch": 2.19, "grad_norm": 0.6273040175437927, "learning_rate": 0.00010189535999820361, "loss": 1.7874, "step": 65788 }, { "epoch": 2.19, "grad_norm": 0.644045352935791, "learning_rate": 0.00010188751100719566, "loss": 1.7143, "step": 65789 }, { "epoch": 2.19, "grad_norm": 0.6263516545295715, "learning_rate": 0.0001018796622566671, "loss": 1.6474, "step": 65790 }, { "epoch": 2.19, "grad_norm": 0.6187499165534973, "learning_rate": 0.00010187181374662732, "loss": 1.6602, "step": 65791 }, { "epoch": 2.19, "grad_norm": 0.6594284176826477, "learning_rate": 0.00010186396547708596, "loss": 1.713, "step": 65792 }, { "epoch": 2.19, "grad_norm": 0.6230263113975525, "learning_rate": 0.00010185611744805278, "loss": 1.7022, "step": 65793 }, { "epoch": 2.19, "grad_norm": 0.6323202252388, "learning_rate": 0.00010184826965953685, "loss": 1.7241, "step": 65794 }, { "epoch": 2.19, "grad_norm": 0.6380891799926758, "learning_rate": 0.00010184042211154795, "loss": 1.6918, "step": 65795 }, { "epoch": 2.19, "grad_norm": 0.6506502628326416, "learning_rate": 0.00010183257480409566, "loss": 1.6195, "step": 65796 }, { "epoch": 2.19, "grad_norm": 0.6742892861366272, "learning_rate": 0.00010182472773718943, "loss": 1.7223, "step": 65797 }, { "epoch": 2.19, "grad_norm": 0.6432538628578186, "learning_rate": 0.00010181688091083869, "loss": 1.7295, "step": 65798 }, { "epoch": 2.19, "grad_norm": 0.660811185836792, "learning_rate": 0.00010180903432505303, "loss": 1.7225, "step": 65799 }, { "epoch": 2.19, "grad_norm": 0.647491455078125, "learning_rate": 0.00010180118797984223, "loss": 1.7098, "step": 65800 }, { "epoch": 2.19, "grad_norm": 0.6230000853538513, "learning_rate": 0.00010179334187521536, "loss": 1.6347, "step": 65801 }, { "epoch": 2.19, "grad_norm": 0.6297378540039062, "learning_rate": 0.00010178549601118216, "loss": 1.6837, "step": 65802 }, { "epoch": 2.19, "grad_norm": 0.6413159966468811, "learning_rate": 0.0001017776503877523, "loss": 1.7222, "step": 65803 }, { "epoch": 2.19, "grad_norm": 0.645012617111206, "learning_rate": 0.00010176980500493514, "loss": 1.7535, "step": 65804 }, { "epoch": 2.19, "grad_norm": 0.9433248043060303, "learning_rate": 0.00010176195986274014, "loss": 1.7424, "step": 65805 }, { "epoch": 2.19, "grad_norm": 0.6638512015342712, "learning_rate": 0.00010175411496117701, "loss": 1.7256, "step": 65806 }, { "epoch": 2.19, "grad_norm": 0.624770998954773, "learning_rate": 0.0001017462703002552, "loss": 1.657, "step": 65807 }, { "epoch": 2.19, "grad_norm": 0.644218921661377, "learning_rate": 0.00010173842587998408, "loss": 1.7398, "step": 65808 }, { "epoch": 2.19, "grad_norm": 0.6405953764915466, "learning_rate": 0.00010173058170037328, "loss": 1.7188, "step": 65809 }, { "epoch": 2.19, "grad_norm": 0.6599988341331482, "learning_rate": 0.00010172273776143247, "loss": 1.6842, "step": 65810 }, { "epoch": 2.19, "grad_norm": 0.6385497450828552, "learning_rate": 0.000101714894063171, "loss": 1.7202, "step": 65811 }, { "epoch": 2.19, "grad_norm": 0.6568212509155273, "learning_rate": 0.00010170705060559837, "loss": 1.7507, "step": 65812 }, { "epoch": 2.19, "grad_norm": 0.6520181894302368, "learning_rate": 0.00010169920738872427, "loss": 1.6661, "step": 65813 }, { "epoch": 2.19, "grad_norm": 0.6607931852340698, "learning_rate": 0.00010169136441255808, "loss": 1.6943, "step": 65814 }, { "epoch": 2.19, "grad_norm": 0.6488666534423828, "learning_rate": 0.00010168352167710928, "loss": 1.6498, "step": 65815 }, { "epoch": 2.19, "grad_norm": 0.6487502455711365, "learning_rate": 0.00010167567918238758, "loss": 1.7227, "step": 65816 }, { "epoch": 2.19, "grad_norm": 0.6505704522132874, "learning_rate": 0.00010166783692840225, "loss": 1.7617, "step": 65817 }, { "epoch": 2.19, "grad_norm": 0.6698721051216125, "learning_rate": 0.00010165999491516307, "loss": 1.7948, "step": 65818 }, { "epoch": 2.19, "grad_norm": 0.6562557816505432, "learning_rate": 0.0001016521531426793, "loss": 1.6825, "step": 65819 }, { "epoch": 2.19, "grad_norm": 0.6522994637489319, "learning_rate": 0.00010164431161096071, "loss": 1.6796, "step": 65820 }, { "epoch": 2.19, "grad_norm": 0.6313385963439941, "learning_rate": 0.0001016364703200167, "loss": 1.7438, "step": 65821 }, { "epoch": 2.19, "grad_norm": 0.6600678563117981, "learning_rate": 0.0001016286292698567, "loss": 1.73, "step": 65822 }, { "epoch": 2.19, "grad_norm": 0.6269173622131348, "learning_rate": 0.00010162078846049039, "loss": 1.6919, "step": 65823 }, { "epoch": 2.19, "grad_norm": 0.650820791721344, "learning_rate": 0.00010161294789192712, "loss": 1.7022, "step": 65824 }, { "epoch": 2.19, "grad_norm": 0.6570278406143188, "learning_rate": 0.00010160510756417659, "loss": 1.6545, "step": 65825 }, { "epoch": 2.19, "grad_norm": 0.6503477692604065, "learning_rate": 0.00010159726747724826, "loss": 1.7094, "step": 65826 }, { "epoch": 2.19, "grad_norm": 0.6420509219169617, "learning_rate": 0.00010158942763115147, "loss": 1.6818, "step": 65827 }, { "epoch": 2.19, "grad_norm": 0.640398383140564, "learning_rate": 0.00010158158802589602, "loss": 1.74, "step": 65828 }, { "epoch": 2.19, "grad_norm": 0.6526151299476624, "learning_rate": 0.00010157374866149118, "loss": 1.7526, "step": 65829 }, { "epoch": 2.19, "grad_norm": 0.676224410533905, "learning_rate": 0.00010156590953794666, "loss": 1.6581, "step": 65830 }, { "epoch": 2.19, "grad_norm": 0.665221631526947, "learning_rate": 0.00010155807065527179, "loss": 1.7169, "step": 65831 }, { "epoch": 2.19, "grad_norm": 0.6623745560646057, "learning_rate": 0.00010155023201347629, "loss": 1.6475, "step": 65832 }, { "epoch": 2.19, "grad_norm": 0.6575045585632324, "learning_rate": 0.00010154239361256955, "loss": 1.7374, "step": 65833 }, { "epoch": 2.19, "grad_norm": 0.6724136471748352, "learning_rate": 0.00010153455545256104, "loss": 1.7897, "step": 65834 }, { "epoch": 2.19, "grad_norm": 0.6837338805198669, "learning_rate": 0.00010152671753346039, "loss": 1.7335, "step": 65835 }, { "epoch": 2.19, "grad_norm": 0.6727361083030701, "learning_rate": 0.0001015188798552771, "loss": 1.5991, "step": 65836 }, { "epoch": 2.19, "grad_norm": 0.6470555067062378, "learning_rate": 0.00010151104241802054, "loss": 1.7728, "step": 65837 }, { "epoch": 2.19, "grad_norm": 0.6601004600524902, "learning_rate": 0.00010150320522170029, "loss": 1.6289, "step": 65838 }, { "epoch": 2.19, "grad_norm": 0.6829264760017395, "learning_rate": 0.00010149536826632603, "loss": 1.6752, "step": 65839 }, { "epoch": 2.19, "grad_norm": 0.6775108575820923, "learning_rate": 0.00010148753155190717, "loss": 1.6921, "step": 65840 }, { "epoch": 2.19, "grad_norm": 0.6414090394973755, "learning_rate": 0.00010147969507845306, "loss": 1.726, "step": 65841 }, { "epoch": 2.19, "grad_norm": 0.6638811230659485, "learning_rate": 0.00010147185884597346, "loss": 1.6396, "step": 65842 }, { "epoch": 2.19, "grad_norm": 0.6622814536094666, "learning_rate": 0.00010146402285447778, "loss": 1.6849, "step": 65843 }, { "epoch": 2.19, "grad_norm": 0.642136812210083, "learning_rate": 0.00010145618710397543, "loss": 1.7213, "step": 65844 }, { "epoch": 2.19, "grad_norm": 0.6544574499130249, "learning_rate": 0.000101448351594476, "loss": 1.6699, "step": 65845 }, { "epoch": 2.19, "grad_norm": 0.6232779026031494, "learning_rate": 0.00010144051632598921, "loss": 1.6175, "step": 65846 }, { "epoch": 2.19, "grad_norm": 0.6481201648712158, "learning_rate": 0.00010143268129852419, "loss": 1.696, "step": 65847 }, { "epoch": 2.19, "grad_norm": 0.6268712878227234, "learning_rate": 0.00010142484651209063, "loss": 1.6013, "step": 65848 }, { "epoch": 2.19, "grad_norm": 0.6455004215240479, "learning_rate": 0.00010141701196669812, "loss": 1.7225, "step": 65849 }, { "epoch": 2.19, "grad_norm": 0.662487268447876, "learning_rate": 0.00010140917766235613, "loss": 1.7377, "step": 65850 }, { "epoch": 2.19, "grad_norm": 0.65531986951828, "learning_rate": 0.00010140134359907405, "loss": 1.7863, "step": 65851 }, { "epoch": 2.19, "grad_norm": 0.6516252756118774, "learning_rate": 0.00010139350977686145, "loss": 1.6758, "step": 65852 }, { "epoch": 2.19, "grad_norm": 0.6384181976318359, "learning_rate": 0.000101385676195728, "loss": 1.7345, "step": 65853 }, { "epoch": 2.19, "grad_norm": 0.6574490070343018, "learning_rate": 0.00010137784285568305, "loss": 1.715, "step": 65854 }, { "epoch": 2.19, "grad_norm": 0.6377255320549011, "learning_rate": 0.00010137000975673604, "loss": 1.7099, "step": 65855 }, { "epoch": 2.19, "grad_norm": 0.6373767852783203, "learning_rate": 0.00010136217689889666, "loss": 1.685, "step": 65856 }, { "epoch": 2.19, "grad_norm": 0.6672680377960205, "learning_rate": 0.00010135434428217432, "loss": 1.7783, "step": 65857 }, { "epoch": 2.19, "grad_norm": 0.6407818794250488, "learning_rate": 0.00010134651190657845, "loss": 1.706, "step": 65858 }, { "epoch": 2.19, "grad_norm": 0.6459872126579285, "learning_rate": 0.00010133867977211867, "loss": 1.6747, "step": 65859 }, { "epoch": 2.19, "grad_norm": 0.6507032513618469, "learning_rate": 0.00010133084787880453, "loss": 1.7503, "step": 65860 }, { "epoch": 2.19, "grad_norm": 0.6547020673751831, "learning_rate": 0.00010132301622664547, "loss": 1.7262, "step": 65861 }, { "epoch": 2.19, "grad_norm": 0.659665048122406, "learning_rate": 0.0001013151848156509, "loss": 1.7771, "step": 65862 }, { "epoch": 2.19, "grad_norm": 0.6668440103530884, "learning_rate": 0.00010130735364583053, "loss": 1.7087, "step": 65863 }, { "epoch": 2.19, "grad_norm": 0.6603355407714844, "learning_rate": 0.00010129952271719374, "loss": 1.739, "step": 65864 }, { "epoch": 2.19, "grad_norm": 0.6487072110176086, "learning_rate": 0.00010129169202974997, "loss": 1.7094, "step": 65865 }, { "epoch": 2.19, "grad_norm": 0.6798749566078186, "learning_rate": 0.00010128386158350888, "loss": 1.6746, "step": 65866 }, { "epoch": 2.19, "grad_norm": 0.6363469362258911, "learning_rate": 0.00010127603137847982, "loss": 1.672, "step": 65867 }, { "epoch": 2.19, "grad_norm": 0.6465025544166565, "learning_rate": 0.00010126820141467249, "loss": 1.6949, "step": 65868 }, { "epoch": 2.19, "grad_norm": 0.6470922231674194, "learning_rate": 0.00010126037169209616, "loss": 1.669, "step": 65869 }, { "epoch": 2.19, "grad_norm": 0.662706196308136, "learning_rate": 0.00010125254221076057, "loss": 1.7257, "step": 65870 }, { "epoch": 2.19, "grad_norm": 0.6498910188674927, "learning_rate": 0.00010124471297067513, "loss": 1.77, "step": 65871 }, { "epoch": 2.19, "grad_norm": 0.6492206454277039, "learning_rate": 0.00010123688397184919, "loss": 1.7602, "step": 65872 }, { "epoch": 2.19, "grad_norm": 0.642147958278656, "learning_rate": 0.0001012290552142925, "loss": 1.6727, "step": 65873 }, { "epoch": 2.19, "grad_norm": 0.6399387121200562, "learning_rate": 0.00010122122669801434, "loss": 1.7452, "step": 65874 }, { "epoch": 2.19, "grad_norm": 0.6517333388328552, "learning_rate": 0.00010121339842302443, "loss": 1.679, "step": 65875 }, { "epoch": 2.19, "grad_norm": 0.6472288370132446, "learning_rate": 0.00010120557038933214, "loss": 1.6874, "step": 65876 }, { "epoch": 2.19, "grad_norm": 0.6671860814094543, "learning_rate": 0.00010119774259694692, "loss": 1.6434, "step": 65877 }, { "epoch": 2.19, "grad_norm": 0.6422061920166016, "learning_rate": 0.00010118991504587846, "loss": 1.6553, "step": 65878 }, { "epoch": 2.19, "grad_norm": 0.6513165235519409, "learning_rate": 0.00010118208773613605, "loss": 1.6116, "step": 65879 }, { "epoch": 2.19, "grad_norm": 0.6504409313201904, "learning_rate": 0.00010117426066772936, "loss": 1.6712, "step": 65880 }, { "epoch": 2.19, "grad_norm": 0.6555099487304688, "learning_rate": 0.00010116643384066774, "loss": 1.7287, "step": 65881 }, { "epoch": 2.19, "grad_norm": 0.6620624661445618, "learning_rate": 0.00010115860725496087, "loss": 1.7171, "step": 65882 }, { "epoch": 2.19, "grad_norm": 0.6479513645172119, "learning_rate": 0.00010115078091061814, "loss": 1.7359, "step": 65883 }, { "epoch": 2.19, "grad_norm": 0.6482674479484558, "learning_rate": 0.000101142954807649, "loss": 1.6527, "step": 65884 }, { "epoch": 2.19, "grad_norm": 0.6575446724891663, "learning_rate": 0.00010113512894606307, "loss": 1.6738, "step": 65885 }, { "epoch": 2.19, "grad_norm": 0.6404712796211243, "learning_rate": 0.00010112730332586982, "loss": 1.6846, "step": 65886 }, { "epoch": 2.19, "grad_norm": 0.6698447465896606, "learning_rate": 0.00010111947794707862, "loss": 1.6365, "step": 65887 }, { "epoch": 2.19, "grad_norm": 0.6593686938285828, "learning_rate": 0.00010111165280969905, "loss": 1.6722, "step": 65888 }, { "epoch": 2.19, "grad_norm": 0.6225177049636841, "learning_rate": 0.00010110382791374074, "loss": 1.7021, "step": 65889 }, { "epoch": 2.19, "grad_norm": 0.6388472318649292, "learning_rate": 0.00010109600325921309, "loss": 1.7248, "step": 65890 }, { "epoch": 2.19, "grad_norm": 0.6684455275535583, "learning_rate": 0.00010108817884612548, "loss": 1.7318, "step": 65891 }, { "epoch": 2.19, "grad_norm": 0.6564123630523682, "learning_rate": 0.0001010803546744876, "loss": 1.7115, "step": 65892 }, { "epoch": 2.19, "grad_norm": 0.6480308771133423, "learning_rate": 0.00010107253074430886, "loss": 1.6121, "step": 65893 }, { "epoch": 2.19, "grad_norm": 0.6574090719223022, "learning_rate": 0.00010106470705559869, "loss": 1.6724, "step": 65894 }, { "epoch": 2.19, "grad_norm": 0.6503986716270447, "learning_rate": 0.0001010568836083666, "loss": 1.7091, "step": 65895 }, { "epoch": 2.19, "grad_norm": 0.6523553729057312, "learning_rate": 0.00010104906040262239, "loss": 1.674, "step": 65896 }, { "epoch": 2.19, "grad_norm": 0.6495644450187683, "learning_rate": 0.00010104123743837506, "loss": 1.7335, "step": 65897 }, { "epoch": 2.19, "grad_norm": 0.6716663837432861, "learning_rate": 0.00010103341471563437, "loss": 1.6775, "step": 65898 }, { "epoch": 2.19, "grad_norm": 0.6357101798057556, "learning_rate": 0.0001010255922344099, "loss": 1.6181, "step": 65899 }, { "epoch": 2.19, "grad_norm": 0.6686229705810547, "learning_rate": 0.00010101776999471106, "loss": 1.7466, "step": 65900 }, { "epoch": 2.19, "grad_norm": 0.6368430852890015, "learning_rate": 0.00010100994799654721, "loss": 1.7316, "step": 65901 }, { "epoch": 2.19, "grad_norm": 0.6647369265556335, "learning_rate": 0.00010100212623992798, "loss": 1.6937, "step": 65902 }, { "epoch": 2.19, "grad_norm": 0.6600374579429626, "learning_rate": 0.00010099430472486303, "loss": 1.6568, "step": 65903 }, { "epoch": 2.19, "grad_norm": 0.6957284212112427, "learning_rate": 0.00010098648345136152, "loss": 1.7231, "step": 65904 }, { "epoch": 2.19, "grad_norm": 0.6871126890182495, "learning_rate": 0.00010097866241943302, "loss": 1.6905, "step": 65905 }, { "epoch": 2.19, "grad_norm": 0.6513567566871643, "learning_rate": 0.00010097084162908725, "loss": 1.6905, "step": 65906 }, { "epoch": 2.19, "grad_norm": 0.6642938256263733, "learning_rate": 0.00010096302108033353, "loss": 1.6871, "step": 65907 }, { "epoch": 2.19, "grad_norm": 0.6446695923805237, "learning_rate": 0.00010095520077318128, "loss": 1.6905, "step": 65908 }, { "epoch": 2.19, "grad_norm": 0.6421328186988831, "learning_rate": 0.00010094738070764009, "loss": 1.7279, "step": 65909 }, { "epoch": 2.19, "grad_norm": 0.6631367206573486, "learning_rate": 0.00010093956088371967, "loss": 1.6923, "step": 65910 }, { "epoch": 2.19, "grad_norm": 0.6235644817352295, "learning_rate": 0.00010093174130142908, "loss": 1.7277, "step": 65911 }, { "epoch": 2.19, "grad_norm": 0.6438444256782532, "learning_rate": 0.00010092392196077801, "loss": 1.7204, "step": 65912 }, { "epoch": 2.19, "grad_norm": 0.631614089012146, "learning_rate": 0.00010091610286177607, "loss": 1.7708, "step": 65913 }, { "epoch": 2.19, "grad_norm": 0.6638270616531372, "learning_rate": 0.00010090828400443266, "loss": 1.7318, "step": 65914 }, { "epoch": 2.19, "grad_norm": 0.6414390802383423, "learning_rate": 0.00010090046538875717, "loss": 1.6224, "step": 65915 }, { "epoch": 2.19, "grad_norm": 0.6579257845878601, "learning_rate": 0.00010089264701475915, "loss": 1.7526, "step": 65916 }, { "epoch": 2.19, "grad_norm": 0.6774085164070129, "learning_rate": 0.00010088482888244835, "loss": 1.707, "step": 65917 }, { "epoch": 2.19, "grad_norm": 0.6344614028930664, "learning_rate": 0.00010087701099183379, "loss": 1.746, "step": 65918 }, { "epoch": 2.19, "grad_norm": 0.6685661673545837, "learning_rate": 0.00010086919334292523, "loss": 1.7786, "step": 65919 }, { "epoch": 2.19, "grad_norm": 0.6396817564964294, "learning_rate": 0.00010086137593573222, "loss": 1.7374, "step": 65920 }, { "epoch": 2.19, "grad_norm": 0.6539793014526367, "learning_rate": 0.00010085355877026415, "loss": 1.6701, "step": 65921 }, { "epoch": 2.19, "grad_norm": 0.6453092098236084, "learning_rate": 0.00010084574184653041, "loss": 1.6754, "step": 65922 }, { "epoch": 2.19, "grad_norm": 0.6438486576080322, "learning_rate": 0.00010083792516454073, "loss": 1.7165, "step": 65923 }, { "epoch": 2.19, "grad_norm": 0.6654400825500488, "learning_rate": 0.00010083010872430444, "loss": 1.6551, "step": 65924 }, { "epoch": 2.19, "grad_norm": 0.6290121674537659, "learning_rate": 0.00010082229252583096, "loss": 1.6558, "step": 65925 }, { "epoch": 2.19, "grad_norm": 0.6600940227508545, "learning_rate": 0.00010081447656912986, "loss": 1.673, "step": 65926 }, { "epoch": 2.19, "grad_norm": 0.6367186903953552, "learning_rate": 0.00010080666085421075, "loss": 1.7015, "step": 65927 }, { "epoch": 2.19, "grad_norm": 0.6374725699424744, "learning_rate": 0.00010079884538108297, "loss": 1.6501, "step": 65928 }, { "epoch": 2.19, "grad_norm": 0.621799111366272, "learning_rate": 0.00010079103014975597, "loss": 1.6237, "step": 65929 }, { "epoch": 2.19, "grad_norm": 0.6488654613494873, "learning_rate": 0.0001007832151602394, "loss": 1.7514, "step": 65930 }, { "epoch": 2.19, "grad_norm": 0.6270895004272461, "learning_rate": 0.00010077540041254268, "loss": 1.694, "step": 65931 }, { "epoch": 2.19, "grad_norm": 0.6339349746704102, "learning_rate": 0.00010076758590667514, "loss": 1.7093, "step": 65932 }, { "epoch": 2.19, "grad_norm": 0.6508980393409729, "learning_rate": 0.0001007597716426465, "loss": 1.738, "step": 65933 }, { "epoch": 2.19, "grad_norm": 0.6641063094139099, "learning_rate": 0.00010075195762046602, "loss": 1.6857, "step": 65934 }, { "epoch": 2.19, "grad_norm": 0.6441766619682312, "learning_rate": 0.00010074414384014345, "loss": 1.6217, "step": 65935 }, { "epoch": 2.19, "grad_norm": 0.683214545249939, "learning_rate": 0.000100736330301688, "loss": 1.7275, "step": 65936 }, { "epoch": 2.19, "grad_norm": 0.637867271900177, "learning_rate": 0.0001007285170051094, "loss": 1.7393, "step": 65937 }, { "epoch": 2.19, "grad_norm": 0.6698017716407776, "learning_rate": 0.00010072070395041701, "loss": 1.7834, "step": 65938 }, { "epoch": 2.19, "grad_norm": 0.6764196753501892, "learning_rate": 0.00010071289113762023, "loss": 1.7001, "step": 65939 }, { "epoch": 2.19, "grad_norm": 0.6509032249450684, "learning_rate": 0.00010070507856672872, "loss": 1.7219, "step": 65940 }, { "epoch": 2.19, "grad_norm": 0.634850025177002, "learning_rate": 0.00010069726623775179, "loss": 1.643, "step": 65941 }, { "epoch": 2.19, "grad_norm": 0.6510412693023682, "learning_rate": 0.0001006894541506991, "loss": 1.7542, "step": 65942 }, { "epoch": 2.19, "grad_norm": 0.6307615041732788, "learning_rate": 0.00010068164230558005, "loss": 1.6939, "step": 65943 }, { "epoch": 2.19, "grad_norm": 0.6302950978279114, "learning_rate": 0.00010067383070240404, "loss": 1.6891, "step": 65944 }, { "epoch": 2.19, "grad_norm": 0.6534287333488464, "learning_rate": 0.00010066601934118073, "loss": 1.686, "step": 65945 }, { "epoch": 2.19, "grad_norm": 0.6452478766441345, "learning_rate": 0.00010065820822191939, "loss": 1.6711, "step": 65946 }, { "epoch": 2.19, "grad_norm": 0.6465466022491455, "learning_rate": 0.00010065039734462973, "loss": 1.6251, "step": 65947 }, { "epoch": 2.19, "grad_norm": 0.6396175622940063, "learning_rate": 0.00010064258670932097, "loss": 1.7322, "step": 65948 }, { "epoch": 2.19, "grad_norm": 0.6400958299636841, "learning_rate": 0.00010063477631600287, "loss": 1.658, "step": 65949 }, { "epoch": 2.19, "grad_norm": 0.6669199466705322, "learning_rate": 0.00010062696616468476, "loss": 1.6575, "step": 65950 }, { "epoch": 2.19, "grad_norm": 0.654082715511322, "learning_rate": 0.00010061915625537606, "loss": 1.6529, "step": 65951 }, { "epoch": 2.19, "grad_norm": 0.6523648500442505, "learning_rate": 0.0001006113465880864, "loss": 1.7329, "step": 65952 }, { "epoch": 2.19, "grad_norm": 0.6381587982177734, "learning_rate": 0.0001006035371628252, "loss": 1.6854, "step": 65953 }, { "epoch": 2.19, "grad_norm": 0.6326565146446228, "learning_rate": 0.00010059572797960183, "loss": 1.7068, "step": 65954 }, { "epoch": 2.19, "grad_norm": 1.1912236213684082, "learning_rate": 0.00010058791903842588, "loss": 1.7515, "step": 65955 }, { "epoch": 2.19, "grad_norm": 0.6589077115058899, "learning_rate": 0.0001005801103393069, "loss": 1.7706, "step": 65956 }, { "epoch": 2.19, "grad_norm": 0.6707414388656616, "learning_rate": 0.00010057230188225429, "loss": 1.7503, "step": 65957 }, { "epoch": 2.19, "grad_norm": 0.6747937798500061, "learning_rate": 0.0001005644936672774, "loss": 1.7112, "step": 65958 }, { "epoch": 2.19, "grad_norm": 0.6353434920310974, "learning_rate": 0.00010055668569438595, "loss": 1.6874, "step": 65959 }, { "epoch": 2.19, "grad_norm": 0.681077778339386, "learning_rate": 0.0001005488779635893, "loss": 1.6551, "step": 65960 }, { "epoch": 2.19, "grad_norm": 0.616944432258606, "learning_rate": 0.0001005410704748968, "loss": 1.6433, "step": 65961 }, { "epoch": 2.19, "grad_norm": 0.6426705121994019, "learning_rate": 0.00010053326322831804, "loss": 1.7315, "step": 65962 }, { "epoch": 2.19, "grad_norm": 0.6556696891784668, "learning_rate": 0.00010052545622386273, "loss": 1.6909, "step": 65963 }, { "epoch": 2.19, "grad_norm": 0.6573834419250488, "learning_rate": 0.00010051764946153994, "loss": 1.6933, "step": 65964 }, { "epoch": 2.19, "grad_norm": 0.668361246585846, "learning_rate": 0.00010050984294135931, "loss": 1.6952, "step": 65965 }, { "epoch": 2.19, "grad_norm": 0.6488593220710754, "learning_rate": 0.00010050203666333046, "loss": 1.736, "step": 65966 }, { "epoch": 2.19, "grad_norm": 0.6506154537200928, "learning_rate": 0.00010049423062746273, "loss": 1.619, "step": 65967 }, { "epoch": 2.19, "grad_norm": 0.652283251285553, "learning_rate": 0.00010048642483376548, "loss": 1.7417, "step": 65968 }, { "epoch": 2.19, "grad_norm": 0.6550622582435608, "learning_rate": 0.00010047861928224835, "loss": 1.742, "step": 65969 }, { "epoch": 2.19, "grad_norm": 0.6808457374572754, "learning_rate": 0.00010047081397292088, "loss": 1.6634, "step": 65970 }, { "epoch": 2.19, "grad_norm": 0.6594038605690002, "learning_rate": 0.00010046300890579243, "loss": 1.6872, "step": 65971 }, { "epoch": 2.19, "grad_norm": 0.6492330431938171, "learning_rate": 0.00010045520408087239, "loss": 1.706, "step": 65972 }, { "epoch": 2.19, "grad_norm": 0.656402051448822, "learning_rate": 0.00010044739949817041, "loss": 1.7144, "step": 65973 }, { "epoch": 2.19, "grad_norm": 0.6394080519676208, "learning_rate": 0.0001004395951576959, "loss": 1.6669, "step": 65974 }, { "epoch": 2.19, "grad_norm": 0.6507351994514465, "learning_rate": 0.00010043179105945823, "loss": 1.6993, "step": 65975 }, { "epoch": 2.2, "grad_norm": 0.6569844484329224, "learning_rate": 0.00010042398720346695, "loss": 1.7464, "step": 65976 }, { "epoch": 2.2, "grad_norm": 0.653791606426239, "learning_rate": 0.00010041618358973164, "loss": 1.6662, "step": 65977 }, { "epoch": 2.2, "grad_norm": 0.646579921245575, "learning_rate": 0.00010040838021826169, "loss": 1.7013, "step": 65978 }, { "epoch": 2.2, "grad_norm": 0.6582754254341125, "learning_rate": 0.00010040057708906646, "loss": 1.7342, "step": 65979 }, { "epoch": 2.2, "grad_norm": 0.6495039463043213, "learning_rate": 0.00010039277420215559, "loss": 1.6601, "step": 65980 }, { "epoch": 2.2, "grad_norm": 0.6502013802528381, "learning_rate": 0.0001003849715575385, "loss": 1.6535, "step": 65981 }, { "epoch": 2.2, "grad_norm": 0.6571050882339478, "learning_rate": 0.00010037716915522454, "loss": 1.6684, "step": 65982 }, { "epoch": 2.2, "grad_norm": 0.6463527679443359, "learning_rate": 0.0001003693669952234, "loss": 1.7195, "step": 65983 }, { "epoch": 2.2, "grad_norm": 0.6576893329620361, "learning_rate": 0.0001003615650775443, "loss": 1.6322, "step": 65984 }, { "epoch": 2.2, "grad_norm": 0.6568703651428223, "learning_rate": 0.00010035376340219697, "loss": 1.7075, "step": 65985 }, { "epoch": 2.2, "grad_norm": 0.6503039598464966, "learning_rate": 0.00010034596196919064, "loss": 1.7087, "step": 65986 }, { "epoch": 2.2, "grad_norm": 0.6340512037277222, "learning_rate": 0.000100338160778535, "loss": 1.6703, "step": 65987 }, { "epoch": 2.2, "grad_norm": 0.6657364964485168, "learning_rate": 0.00010033035983023944, "loss": 1.7642, "step": 65988 }, { "epoch": 2.2, "grad_norm": 1.804195761680603, "learning_rate": 0.00010032255912431326, "loss": 1.6806, "step": 65989 }, { "epoch": 2.2, "grad_norm": 0.6799106597900391, "learning_rate": 0.00010031475866076621, "loss": 1.6462, "step": 65990 }, { "epoch": 2.2, "grad_norm": 0.6888753175735474, "learning_rate": 0.00010030695843960749, "loss": 1.6772, "step": 65991 }, { "epoch": 2.2, "grad_norm": 0.6491397023200989, "learning_rate": 0.0001002991584608468, "loss": 1.7673, "step": 65992 }, { "epoch": 2.2, "grad_norm": 0.6376940011978149, "learning_rate": 0.00010029135872449352, "loss": 1.7137, "step": 65993 }, { "epoch": 2.2, "grad_norm": 0.6816470623016357, "learning_rate": 0.00010028355923055702, "loss": 1.641, "step": 65994 }, { "epoch": 2.2, "grad_norm": 0.6438845992088318, "learning_rate": 0.00010027575997904694, "loss": 1.7486, "step": 65995 }, { "epoch": 2.2, "grad_norm": 0.644054651260376, "learning_rate": 0.00010026796096997255, "loss": 1.7128, "step": 65996 }, { "epoch": 2.2, "grad_norm": 0.6663632392883301, "learning_rate": 0.00010026016220334354, "loss": 1.6625, "step": 65997 }, { "epoch": 2.2, "grad_norm": 0.6684395670890808, "learning_rate": 0.00010025236367916914, "loss": 1.6565, "step": 65998 }, { "epoch": 2.2, "grad_norm": 0.6421328783035278, "learning_rate": 0.00010024456539745906, "loss": 1.7126, "step": 65999 }, { "epoch": 2.2, "grad_norm": 1.0127851963043213, "learning_rate": 0.00010023676735822265, "loss": 1.7119, "step": 66000 }, { "epoch": 2.2, "grad_norm": 0.641666829586029, "learning_rate": 0.00010022896956146924, "loss": 1.6314, "step": 66001 }, { "epoch": 2.2, "grad_norm": 0.6360046863555908, "learning_rate": 0.00010022117200720857, "loss": 1.6718, "step": 66002 }, { "epoch": 2.2, "grad_norm": 0.6462119817733765, "learning_rate": 0.00010021337469544993, "loss": 1.6647, "step": 66003 }, { "epoch": 2.2, "grad_norm": 0.6860544681549072, "learning_rate": 0.00010020557762620272, "loss": 1.7122, "step": 66004 }, { "epoch": 2.2, "grad_norm": 0.6440476775169373, "learning_rate": 0.0001001977807994765, "loss": 1.755, "step": 66005 }, { "epoch": 2.2, "grad_norm": 0.6646468639373779, "learning_rate": 0.00010018998421528085, "loss": 1.7616, "step": 66006 }, { "epoch": 2.2, "grad_norm": 0.6672528982162476, "learning_rate": 0.00010018218787362509, "loss": 1.6683, "step": 66007 }, { "epoch": 2.2, "grad_norm": 0.6278814673423767, "learning_rate": 0.00010017439177451864, "loss": 1.6704, "step": 66008 }, { "epoch": 2.2, "grad_norm": 0.656182587146759, "learning_rate": 0.00010016659591797113, "loss": 1.8229, "step": 66009 }, { "epoch": 2.2, "grad_norm": 0.6478111147880554, "learning_rate": 0.00010015880030399191, "loss": 1.7441, "step": 66010 }, { "epoch": 2.2, "grad_norm": 0.6641599535942078, "learning_rate": 0.00010015100493259036, "loss": 1.6698, "step": 66011 }, { "epoch": 2.2, "grad_norm": 0.6502562761306763, "learning_rate": 0.00010014320980377606, "loss": 1.7716, "step": 66012 }, { "epoch": 2.2, "grad_norm": 0.6606882214546204, "learning_rate": 0.00010013541491755866, "loss": 1.7473, "step": 66013 }, { "epoch": 2.2, "grad_norm": 0.6792407631874084, "learning_rate": 0.00010012762027394721, "loss": 1.6664, "step": 66014 }, { "epoch": 2.2, "grad_norm": 0.6201740503311157, "learning_rate": 0.00010011982587295137, "loss": 1.6516, "step": 66015 }, { "epoch": 2.2, "grad_norm": 0.6616283059120178, "learning_rate": 0.00010011203171458073, "loss": 1.7548, "step": 66016 }, { "epoch": 2.2, "grad_norm": 0.6496513485908508, "learning_rate": 0.00010010423779884464, "loss": 1.7108, "step": 66017 }, { "epoch": 2.2, "grad_norm": 0.6320799589157104, "learning_rate": 0.00010009644412575244, "loss": 1.7137, "step": 66018 }, { "epoch": 2.2, "grad_norm": 0.6683024764060974, "learning_rate": 0.00010008865069531368, "loss": 1.6953, "step": 66019 }, { "epoch": 2.2, "grad_norm": 0.6451866626739502, "learning_rate": 0.00010008085750753807, "loss": 1.6889, "step": 66020 }, { "epoch": 2.2, "grad_norm": 0.6416363716125488, "learning_rate": 0.00010007306456243461, "loss": 1.6876, "step": 66021 }, { "epoch": 2.2, "grad_norm": 0.6378628611564636, "learning_rate": 0.00010006527186001303, "loss": 1.7507, "step": 66022 }, { "epoch": 2.2, "grad_norm": 0.6472862958908081, "learning_rate": 0.00010005747940028281, "loss": 1.7025, "step": 66023 }, { "epoch": 2.2, "grad_norm": 0.6580198407173157, "learning_rate": 0.00010004968718325339, "loss": 1.7452, "step": 66024 }, { "epoch": 2.2, "grad_norm": 0.6446789503097534, "learning_rate": 0.00010004189520893408, "loss": 1.6659, "step": 66025 }, { "epoch": 2.2, "grad_norm": 0.6336398720741272, "learning_rate": 0.00010003410347733443, "loss": 1.7419, "step": 66026 }, { "epoch": 2.2, "grad_norm": 0.6349062919616699, "learning_rate": 0.00010002631198846414, "loss": 1.6679, "step": 66027 }, { "epoch": 2.2, "grad_norm": 0.6361519694328308, "learning_rate": 0.00010001852074233221, "loss": 1.674, "step": 66028 }, { "epoch": 2.2, "grad_norm": 0.6381352543830872, "learning_rate": 0.00010001072973894833, "loss": 1.6454, "step": 66029 }, { "epoch": 2.2, "grad_norm": 0.6428213715553284, "learning_rate": 0.00010000293897832208, "loss": 1.6582, "step": 66030 }, { "epoch": 2.2, "grad_norm": 0.6491595506668091, "learning_rate": 9.999514846046279e-05, "loss": 1.6679, "step": 66031 }, { "epoch": 2.2, "grad_norm": 0.6521590352058411, "learning_rate": 9.998735818537984e-05, "loss": 1.7039, "step": 66032 }, { "epoch": 2.2, "grad_norm": 0.6309777498245239, "learning_rate": 9.997956815308274e-05, "loss": 1.6803, "step": 66033 }, { "epoch": 2.2, "grad_norm": 0.6237483620643616, "learning_rate": 9.997177836358117e-05, "loss": 1.6768, "step": 66034 }, { "epoch": 2.2, "grad_norm": 0.652772843837738, "learning_rate": 9.99639888168842e-05, "loss": 1.7752, "step": 66035 }, { "epoch": 2.2, "grad_norm": 0.638771116733551, "learning_rate": 9.995619951300147e-05, "loss": 1.6833, "step": 66036 }, { "epoch": 2.2, "grad_norm": 0.6641961932182312, "learning_rate": 9.994841045194253e-05, "loss": 1.7056, "step": 66037 }, { "epoch": 2.2, "grad_norm": 0.6432130932807922, "learning_rate": 9.994062163371675e-05, "loss": 1.7443, "step": 66038 }, { "epoch": 2.2, "grad_norm": 0.6513537764549255, "learning_rate": 9.993283305833348e-05, "loss": 1.7326, "step": 66039 }, { "epoch": 2.2, "grad_norm": 0.6544753313064575, "learning_rate": 9.992504472580237e-05, "loss": 1.7585, "step": 66040 }, { "epoch": 2.2, "grad_norm": 0.6508422493934631, "learning_rate": 9.991725663613277e-05, "loss": 1.6871, "step": 66041 }, { "epoch": 2.2, "grad_norm": 0.6417076587677002, "learning_rate": 9.990946878933406e-05, "loss": 1.6398, "step": 66042 }, { "epoch": 2.2, "grad_norm": 0.6349560618400574, "learning_rate": 9.990168118541576e-05, "loss": 1.7185, "step": 66043 }, { "epoch": 2.2, "grad_norm": 0.647849977016449, "learning_rate": 9.989389382438744e-05, "loss": 1.6421, "step": 66044 }, { "epoch": 2.2, "grad_norm": 0.6645526885986328, "learning_rate": 9.988610670625845e-05, "loss": 1.7205, "step": 66045 }, { "epoch": 2.2, "grad_norm": 0.6407400965690613, "learning_rate": 9.987831983103813e-05, "loss": 1.6885, "step": 66046 }, { "epoch": 2.2, "grad_norm": 0.6584017872810364, "learning_rate": 9.987053319873614e-05, "loss": 1.6791, "step": 66047 }, { "epoch": 2.2, "grad_norm": 0.6600825190544128, "learning_rate": 9.986274680936184e-05, "loss": 1.6803, "step": 66048 }, { "epoch": 2.2, "grad_norm": 0.631472647190094, "learning_rate": 9.985496066292459e-05, "loss": 1.7314, "step": 66049 }, { "epoch": 2.2, "grad_norm": 0.637873649597168, "learning_rate": 9.984717475943402e-05, "loss": 1.709, "step": 66050 }, { "epoch": 2.2, "grad_norm": 0.677905261516571, "learning_rate": 9.983938909889939e-05, "loss": 1.7362, "step": 66051 }, { "epoch": 2.2, "grad_norm": 0.6622077822685242, "learning_rate": 9.983160368133032e-05, "loss": 1.729, "step": 66052 }, { "epoch": 2.2, "grad_norm": 0.6407917737960815, "learning_rate": 9.982381850673613e-05, "loss": 1.6615, "step": 66053 }, { "epoch": 2.2, "grad_norm": 0.6290119290351868, "learning_rate": 9.981603357512641e-05, "loss": 1.6596, "step": 66054 }, { "epoch": 2.2, "grad_norm": 0.6294857263565063, "learning_rate": 9.980824888651054e-05, "loss": 1.6043, "step": 66055 }, { "epoch": 2.2, "grad_norm": 0.6800932884216309, "learning_rate": 9.980046444089786e-05, "loss": 1.7213, "step": 66056 }, { "epoch": 2.2, "grad_norm": 0.6650084257125854, "learning_rate": 9.979268023829799e-05, "loss": 1.7148, "step": 66057 }, { "epoch": 2.2, "grad_norm": 0.6513927578926086, "learning_rate": 9.978489627872026e-05, "loss": 1.7806, "step": 66058 }, { "epoch": 2.2, "grad_norm": 0.6360049247741699, "learning_rate": 9.977711256217423e-05, "loss": 1.7277, "step": 66059 }, { "epoch": 2.2, "grad_norm": 0.6471287608146667, "learning_rate": 9.976932908866927e-05, "loss": 1.7189, "step": 66060 }, { "epoch": 2.2, "grad_norm": 0.6288798451423645, "learning_rate": 9.976154585821476e-05, "loss": 1.6882, "step": 66061 }, { "epoch": 2.2, "grad_norm": 0.6661720275878906, "learning_rate": 9.975376287082033e-05, "loss": 1.7447, "step": 66062 }, { "epoch": 2.2, "grad_norm": 0.6645998358726501, "learning_rate": 9.974598012649522e-05, "loss": 1.7515, "step": 66063 }, { "epoch": 2.2, "grad_norm": 0.6414833068847656, "learning_rate": 9.973819762524912e-05, "loss": 1.6939, "step": 66064 }, { "epoch": 2.2, "grad_norm": 0.659212052822113, "learning_rate": 9.973041536709118e-05, "loss": 1.7412, "step": 66065 }, { "epoch": 2.2, "grad_norm": 0.6352084875106812, "learning_rate": 9.972263335203117e-05, "loss": 1.6658, "step": 66066 }, { "epoch": 2.2, "grad_norm": 0.6453691720962524, "learning_rate": 9.971485158007835e-05, "loss": 1.7838, "step": 66067 }, { "epoch": 2.2, "grad_norm": 0.6411565542221069, "learning_rate": 9.970707005124206e-05, "loss": 1.6846, "step": 66068 }, { "epoch": 2.2, "grad_norm": 0.6475999355316162, "learning_rate": 9.969928876553202e-05, "loss": 1.6803, "step": 66069 }, { "epoch": 2.2, "grad_norm": 0.6503974199295044, "learning_rate": 9.969150772295749e-05, "loss": 1.7347, "step": 66070 }, { "epoch": 2.2, "grad_norm": 0.6819591522216797, "learning_rate": 9.968372692352786e-05, "loss": 1.7641, "step": 66071 }, { "epoch": 2.2, "grad_norm": 0.6522319912910461, "learning_rate": 9.967594636725271e-05, "loss": 1.6478, "step": 66072 }, { "epoch": 2.2, "grad_norm": 0.6408806443214417, "learning_rate": 9.96681660541415e-05, "loss": 1.7403, "step": 66073 }, { "epoch": 2.2, "grad_norm": 0.6391364932060242, "learning_rate": 9.966038598420364e-05, "loss": 1.7595, "step": 66074 }, { "epoch": 2.2, "grad_norm": 0.6244757175445557, "learning_rate": 9.965260615744845e-05, "loss": 1.6736, "step": 66075 }, { "epoch": 2.2, "grad_norm": 0.684425413608551, "learning_rate": 9.964482657388557e-05, "loss": 1.7267, "step": 66076 }, { "epoch": 2.2, "grad_norm": 0.6448599100112915, "learning_rate": 9.963704723352438e-05, "loss": 1.6488, "step": 66077 }, { "epoch": 2.2, "grad_norm": 0.6542539000511169, "learning_rate": 9.962926813637418e-05, "loss": 1.6925, "step": 66078 }, { "epoch": 2.2, "grad_norm": 0.6802955865859985, "learning_rate": 9.962148928244452e-05, "loss": 1.7528, "step": 66079 }, { "epoch": 2.2, "grad_norm": 0.6569627523422241, "learning_rate": 9.961371067174492e-05, "loss": 1.7, "step": 66080 }, { "epoch": 2.2, "grad_norm": 0.672595202922821, "learning_rate": 9.960593230428475e-05, "loss": 1.6637, "step": 66081 }, { "epoch": 2.2, "grad_norm": 0.6582250595092773, "learning_rate": 9.959815418007338e-05, "loss": 1.6611, "step": 66082 }, { "epoch": 2.2, "grad_norm": 0.6636221408843994, "learning_rate": 9.959037629912042e-05, "loss": 1.755, "step": 66083 }, { "epoch": 2.2, "grad_norm": 0.6367795467376709, "learning_rate": 9.958259866143521e-05, "loss": 1.6763, "step": 66084 }, { "epoch": 2.2, "grad_norm": 0.6592910885810852, "learning_rate": 9.957482126702709e-05, "loss": 1.7192, "step": 66085 }, { "epoch": 2.2, "grad_norm": 0.6628260612487793, "learning_rate": 9.956704411590562e-05, "loss": 1.6347, "step": 66086 }, { "epoch": 2.2, "grad_norm": 0.6515822410583496, "learning_rate": 9.95592672080803e-05, "loss": 1.6812, "step": 66087 }, { "epoch": 2.2, "grad_norm": 0.6409459114074707, "learning_rate": 9.955149054356053e-05, "loss": 1.6637, "step": 66088 }, { "epoch": 2.2, "grad_norm": 0.6767871379852295, "learning_rate": 9.954371412235559e-05, "loss": 1.7308, "step": 66089 }, { "epoch": 2.2, "grad_norm": 0.695176899433136, "learning_rate": 9.953593794447517e-05, "loss": 1.6828, "step": 66090 }, { "epoch": 2.2, "grad_norm": 0.675145149230957, "learning_rate": 9.952816200992858e-05, "loss": 1.7348, "step": 66091 }, { "epoch": 2.2, "grad_norm": 0.6504316329956055, "learning_rate": 9.952038631872513e-05, "loss": 1.6176, "step": 66092 }, { "epoch": 2.2, "grad_norm": 0.645982563495636, "learning_rate": 9.951261087087442e-05, "loss": 1.6653, "step": 66093 }, { "epoch": 2.2, "grad_norm": 0.6632314920425415, "learning_rate": 9.950483566638594e-05, "loss": 1.7203, "step": 66094 }, { "epoch": 2.2, "grad_norm": 0.6499263048171997, "learning_rate": 9.949706070526909e-05, "loss": 1.663, "step": 66095 }, { "epoch": 2.2, "grad_norm": 0.6510930061340332, "learning_rate": 9.948928598753314e-05, "loss": 1.6784, "step": 66096 }, { "epoch": 2.2, "grad_norm": 0.6645875573158264, "learning_rate": 9.948151151318774e-05, "loss": 1.7372, "step": 66097 }, { "epoch": 2.2, "grad_norm": 0.6497928500175476, "learning_rate": 9.947373728224224e-05, "loss": 1.6639, "step": 66098 }, { "epoch": 2.2, "grad_norm": 0.6407074332237244, "learning_rate": 9.9465963294706e-05, "loss": 1.7128, "step": 66099 }, { "epoch": 2.2, "grad_norm": 0.6741877198219299, "learning_rate": 9.945818955058861e-05, "loss": 1.7504, "step": 66100 }, { "epoch": 2.2, "grad_norm": 0.6472254991531372, "learning_rate": 9.945041604989933e-05, "loss": 1.6686, "step": 66101 }, { "epoch": 2.2, "grad_norm": 0.6635649800300598, "learning_rate": 9.944264279264782e-05, "loss": 1.7309, "step": 66102 }, { "epoch": 2.2, "grad_norm": 0.6659704446792603, "learning_rate": 9.943486977884328e-05, "loss": 1.6613, "step": 66103 }, { "epoch": 2.2, "grad_norm": 0.6531034708023071, "learning_rate": 9.942709700849535e-05, "loss": 1.6902, "step": 66104 }, { "epoch": 2.2, "grad_norm": 0.6609233021736145, "learning_rate": 9.941932448161338e-05, "loss": 1.7044, "step": 66105 }, { "epoch": 2.2, "grad_norm": 0.651912271976471, "learning_rate": 9.941155219820666e-05, "loss": 1.6721, "step": 66106 }, { "epoch": 2.2, "grad_norm": 0.641449511051178, "learning_rate": 9.94037801582849e-05, "loss": 1.7529, "step": 66107 }, { "epoch": 2.2, "grad_norm": 0.6611088514328003, "learning_rate": 9.939600836185728e-05, "loss": 1.6492, "step": 66108 }, { "epoch": 2.2, "grad_norm": 0.6408100128173828, "learning_rate": 9.938823680893346e-05, "loss": 1.6851, "step": 66109 }, { "epoch": 2.2, "grad_norm": 0.6411873698234558, "learning_rate": 9.938046549952277e-05, "loss": 1.6662, "step": 66110 }, { "epoch": 2.2, "grad_norm": 0.6399454474449158, "learning_rate": 9.937269443363452e-05, "loss": 1.7193, "step": 66111 }, { "epoch": 2.2, "grad_norm": 0.65745609998703, "learning_rate": 9.936492361127833e-05, "loss": 1.72, "step": 66112 }, { "epoch": 2.2, "grad_norm": 0.6592267155647278, "learning_rate": 9.935715303246348e-05, "loss": 1.665, "step": 66113 }, { "epoch": 2.2, "grad_norm": 0.6493494510650635, "learning_rate": 9.93493826971996e-05, "loss": 1.705, "step": 66114 }, { "epoch": 2.2, "grad_norm": 0.6412525773048401, "learning_rate": 9.934161260549591e-05, "loss": 1.786, "step": 66115 }, { "epoch": 2.2, "grad_norm": 0.6460675001144409, "learning_rate": 9.933384275736203e-05, "loss": 1.6356, "step": 66116 }, { "epoch": 2.2, "grad_norm": 0.6610525846481323, "learning_rate": 9.932607315280726e-05, "loss": 1.705, "step": 66117 }, { "epoch": 2.2, "grad_norm": 0.6653791666030884, "learning_rate": 9.931830379184101e-05, "loss": 1.7255, "step": 66118 }, { "epoch": 2.2, "grad_norm": 0.6802162528038025, "learning_rate": 9.931053467447288e-05, "loss": 1.8106, "step": 66119 }, { "epoch": 2.2, "grad_norm": 0.6670374870300293, "learning_rate": 9.930276580071214e-05, "loss": 1.6426, "step": 66120 }, { "epoch": 2.2, "grad_norm": 0.6678249835968018, "learning_rate": 9.929499717056822e-05, "loss": 1.7332, "step": 66121 }, { "epoch": 2.2, "grad_norm": 0.6396048069000244, "learning_rate": 9.928722878405057e-05, "loss": 1.702, "step": 66122 }, { "epoch": 2.2, "grad_norm": 0.655655562877655, "learning_rate": 9.927946064116875e-05, "loss": 1.6906, "step": 66123 }, { "epoch": 2.2, "grad_norm": 0.6503990292549133, "learning_rate": 9.92716927419321e-05, "loss": 1.6492, "step": 66124 }, { "epoch": 2.2, "grad_norm": 0.6298402547836304, "learning_rate": 9.926392508634996e-05, "loss": 1.7501, "step": 66125 }, { "epoch": 2.2, "grad_norm": 0.6487192511558533, "learning_rate": 9.925615767443194e-05, "loss": 1.6686, "step": 66126 }, { "epoch": 2.2, "grad_norm": 0.6534335017204285, "learning_rate": 9.924839050618732e-05, "loss": 1.665, "step": 66127 }, { "epoch": 2.2, "grad_norm": 0.6611361503601074, "learning_rate": 9.924062358162552e-05, "loss": 1.738, "step": 66128 }, { "epoch": 2.2, "grad_norm": 0.6698451042175293, "learning_rate": 9.923285690075599e-05, "loss": 1.8034, "step": 66129 }, { "epoch": 2.2, "grad_norm": 0.645416259765625, "learning_rate": 9.92250904635884e-05, "loss": 1.7316, "step": 66130 }, { "epoch": 2.2, "grad_norm": 0.6614234447479248, "learning_rate": 9.921732427013178e-05, "loss": 1.6414, "step": 66131 }, { "epoch": 2.2, "grad_norm": 0.6644194722175598, "learning_rate": 9.920955832039573e-05, "loss": 1.7782, "step": 66132 }, { "epoch": 2.2, "grad_norm": 0.6516732573509216, "learning_rate": 9.920179261438979e-05, "loss": 1.6711, "step": 66133 }, { "epoch": 2.2, "grad_norm": 0.6819943785667419, "learning_rate": 9.91940271521233e-05, "loss": 1.7653, "step": 66134 }, { "epoch": 2.2, "grad_norm": 0.6608541011810303, "learning_rate": 9.918626193360557e-05, "loss": 1.7285, "step": 66135 }, { "epoch": 2.2, "grad_norm": 0.6452525854110718, "learning_rate": 9.917849695884612e-05, "loss": 1.677, "step": 66136 }, { "epoch": 2.2, "grad_norm": 0.6341838240623474, "learning_rate": 9.917073222785458e-05, "loss": 1.728, "step": 66137 }, { "epoch": 2.2, "grad_norm": 0.6503412127494812, "learning_rate": 9.916296774064e-05, "loss": 1.6675, "step": 66138 }, { "epoch": 2.2, "grad_norm": 0.6503691077232361, "learning_rate": 9.915520349721198e-05, "loss": 1.654, "step": 66139 }, { "epoch": 2.2, "grad_norm": 0.6544477343559265, "learning_rate": 9.914743949758004e-05, "loss": 1.708, "step": 66140 }, { "epoch": 2.2, "grad_norm": 0.6592679619789124, "learning_rate": 9.913967574175352e-05, "loss": 1.6677, "step": 66141 }, { "epoch": 2.2, "grad_norm": 0.7148124575614929, "learning_rate": 9.913191222974175e-05, "loss": 1.7232, "step": 66142 }, { "epoch": 2.2, "grad_norm": 0.6456906795501709, "learning_rate": 9.912414896155421e-05, "loss": 1.6725, "step": 66143 }, { "epoch": 2.2, "grad_norm": 0.6453155279159546, "learning_rate": 9.91163859372006e-05, "loss": 1.6401, "step": 66144 }, { "epoch": 2.2, "grad_norm": 0.6562414169311523, "learning_rate": 9.910862315668986e-05, "loss": 1.7517, "step": 66145 }, { "epoch": 2.2, "grad_norm": 0.645757257938385, "learning_rate": 9.910086062003167e-05, "loss": 1.6869, "step": 66146 }, { "epoch": 2.2, "grad_norm": 0.6653872728347778, "learning_rate": 9.909309832723555e-05, "loss": 1.7017, "step": 66147 }, { "epoch": 2.2, "grad_norm": 0.6586810946464539, "learning_rate": 9.908533627831077e-05, "loss": 1.6798, "step": 66148 }, { "epoch": 2.2, "grad_norm": 0.643485426902771, "learning_rate": 9.907757447326671e-05, "loss": 1.7156, "step": 66149 }, { "epoch": 2.2, "grad_norm": 0.6602867841720581, "learning_rate": 9.906981291211285e-05, "loss": 1.6585, "step": 66150 }, { "epoch": 2.2, "grad_norm": 0.6514474749565125, "learning_rate": 9.906205159485887e-05, "loss": 1.6367, "step": 66151 }, { "epoch": 2.2, "grad_norm": 0.6600486040115356, "learning_rate": 9.905429052151371e-05, "loss": 1.6647, "step": 66152 }, { "epoch": 2.2, "grad_norm": 0.643996000289917, "learning_rate": 9.904652969208707e-05, "loss": 1.7409, "step": 66153 }, { "epoch": 2.2, "grad_norm": 0.682036280632019, "learning_rate": 9.903876910658842e-05, "loss": 1.7251, "step": 66154 }, { "epoch": 2.2, "grad_norm": 0.6454877257347107, "learning_rate": 9.903100876502708e-05, "loss": 1.6848, "step": 66155 }, { "epoch": 2.2, "grad_norm": 0.661130428314209, "learning_rate": 9.902324866741239e-05, "loss": 1.5614, "step": 66156 }, { "epoch": 2.2, "grad_norm": 0.6469355821609497, "learning_rate": 9.901548881375399e-05, "loss": 1.6823, "step": 66157 }, { "epoch": 2.2, "grad_norm": 0.6535384654998779, "learning_rate": 9.900772920406113e-05, "loss": 1.6795, "step": 66158 }, { "epoch": 2.2, "grad_norm": 0.6441418528556824, "learning_rate": 9.899996983834321e-05, "loss": 1.7228, "step": 66159 }, { "epoch": 2.2, "grad_norm": 0.6744873523712158, "learning_rate": 9.899221071660969e-05, "loss": 1.8022, "step": 66160 }, { "epoch": 2.2, "grad_norm": 0.632266104221344, "learning_rate": 9.898445183887012e-05, "loss": 1.6721, "step": 66161 }, { "epoch": 2.2, "grad_norm": 0.6390666365623474, "learning_rate": 9.897669320513378e-05, "loss": 1.6694, "step": 66162 }, { "epoch": 2.2, "grad_norm": 0.6603644490242004, "learning_rate": 9.896893481541005e-05, "loss": 1.7005, "step": 66163 }, { "epoch": 2.2, "grad_norm": 0.6487095952033997, "learning_rate": 9.896117666970849e-05, "loss": 1.6347, "step": 66164 }, { "epoch": 2.2, "grad_norm": 0.6301453709602356, "learning_rate": 9.895341876803845e-05, "loss": 1.6605, "step": 66165 }, { "epoch": 2.2, "grad_norm": 0.6827780604362488, "learning_rate": 9.894566111040924e-05, "loss": 1.7129, "step": 66166 }, { "epoch": 2.2, "grad_norm": 0.6568929553031921, "learning_rate": 9.893790369683047e-05, "loss": 1.626, "step": 66167 }, { "epoch": 2.2, "grad_norm": 0.6440914869308472, "learning_rate": 9.893014652731136e-05, "loss": 1.6229, "step": 66168 }, { "epoch": 2.2, "grad_norm": 0.6886963844299316, "learning_rate": 9.89223896018615e-05, "loss": 1.7645, "step": 66169 }, { "epoch": 2.2, "grad_norm": 0.6277087926864624, "learning_rate": 9.891463292049017e-05, "loss": 1.688, "step": 66170 }, { "epoch": 2.2, "grad_norm": 0.6568511724472046, "learning_rate": 9.890687648320694e-05, "loss": 1.771, "step": 66171 }, { "epoch": 2.2, "grad_norm": 0.6332147717475891, "learning_rate": 9.889912029002112e-05, "loss": 1.6473, "step": 66172 }, { "epoch": 2.2, "grad_norm": 0.6566804051399231, "learning_rate": 9.889136434094205e-05, "loss": 1.6272, "step": 66173 }, { "epoch": 2.2, "grad_norm": 0.6752902269363403, "learning_rate": 9.888360863597932e-05, "loss": 1.6836, "step": 66174 }, { "epoch": 2.2, "grad_norm": 0.6701644659042358, "learning_rate": 9.887585317514217e-05, "loss": 1.783, "step": 66175 }, { "epoch": 2.2, "grad_norm": 0.6640825867652893, "learning_rate": 9.886809795844018e-05, "loss": 1.8041, "step": 66176 }, { "epoch": 2.2, "grad_norm": 0.6488948464393616, "learning_rate": 9.88603429858827e-05, "loss": 1.6554, "step": 66177 }, { "epoch": 2.2, "grad_norm": 0.6469985842704773, "learning_rate": 9.8852588257479e-05, "loss": 1.7015, "step": 66178 }, { "epoch": 2.2, "grad_norm": 0.6501981019973755, "learning_rate": 9.884483377323874e-05, "loss": 1.6701, "step": 66179 }, { "epoch": 2.2, "grad_norm": 0.6676663756370544, "learning_rate": 9.883707953317112e-05, "loss": 1.6957, "step": 66180 }, { "epoch": 2.2, "grad_norm": 0.6435070633888245, "learning_rate": 9.882932553728575e-05, "loss": 1.6263, "step": 66181 }, { "epoch": 2.2, "grad_norm": 0.663770318031311, "learning_rate": 9.882157178559181e-05, "loss": 1.6302, "step": 66182 }, { "epoch": 2.2, "grad_norm": 0.6529266238212585, "learning_rate": 9.881381827809897e-05, "loss": 1.7162, "step": 66183 }, { "epoch": 2.2, "grad_norm": 0.6490017175674438, "learning_rate": 9.88060650148165e-05, "loss": 1.6898, "step": 66184 }, { "epoch": 2.2, "grad_norm": 0.6599785089492798, "learning_rate": 9.879831199575372e-05, "loss": 1.669, "step": 66185 }, { "epoch": 2.2, "grad_norm": 0.6593306064605713, "learning_rate": 9.879055922092024e-05, "loss": 1.6819, "step": 66186 }, { "epoch": 2.2, "grad_norm": 0.6787288784980774, "learning_rate": 9.878280669032538e-05, "loss": 1.7247, "step": 66187 }, { "epoch": 2.2, "grad_norm": 0.6445782780647278, "learning_rate": 9.877505440397846e-05, "loss": 1.7103, "step": 66188 }, { "epoch": 2.2, "grad_norm": 0.6583380103111267, "learning_rate": 9.876730236188894e-05, "loss": 1.7208, "step": 66189 }, { "epoch": 2.2, "grad_norm": 0.646476149559021, "learning_rate": 9.875955056406636e-05, "loss": 1.697, "step": 66190 }, { "epoch": 2.2, "grad_norm": 0.6611199378967285, "learning_rate": 9.875179901052009e-05, "loss": 1.7237, "step": 66191 }, { "epoch": 2.2, "grad_norm": 0.6614996194839478, "learning_rate": 9.874404770125934e-05, "loss": 1.6665, "step": 66192 }, { "epoch": 2.2, "grad_norm": 0.6534422039985657, "learning_rate": 9.873629663629377e-05, "loss": 1.6777, "step": 66193 }, { "epoch": 2.2, "grad_norm": 0.6238384246826172, "learning_rate": 9.872854581563269e-05, "loss": 1.7099, "step": 66194 }, { "epoch": 2.2, "grad_norm": 0.6643041372299194, "learning_rate": 9.87207952392854e-05, "loss": 1.7082, "step": 66195 }, { "epoch": 2.2, "grad_norm": 0.655604362487793, "learning_rate": 9.871304490726141e-05, "loss": 1.7954, "step": 66196 }, { "epoch": 2.2, "grad_norm": 0.6792319416999817, "learning_rate": 9.87052948195702e-05, "loss": 1.6817, "step": 66197 }, { "epoch": 2.2, "grad_norm": 0.6489170789718628, "learning_rate": 9.869754497622111e-05, "loss": 1.7152, "step": 66198 }, { "epoch": 2.2, "grad_norm": 0.6426340937614441, "learning_rate": 9.868979537722345e-05, "loss": 1.682, "step": 66199 }, { "epoch": 2.2, "grad_norm": 0.6667107343673706, "learning_rate": 9.86820460225868e-05, "loss": 1.7069, "step": 66200 }, { "epoch": 2.2, "grad_norm": 0.6551187634468079, "learning_rate": 9.86742969123205e-05, "loss": 1.7077, "step": 66201 }, { "epoch": 2.2, "grad_norm": 0.6732543706893921, "learning_rate": 9.866654804643386e-05, "loss": 1.7469, "step": 66202 }, { "epoch": 2.2, "grad_norm": 0.6583605408668518, "learning_rate": 9.865879942493635e-05, "loss": 1.6411, "step": 66203 }, { "epoch": 2.2, "grad_norm": 0.6529622673988342, "learning_rate": 9.865105104783748e-05, "loss": 1.6591, "step": 66204 }, { "epoch": 2.2, "grad_norm": 0.6615090370178223, "learning_rate": 9.864330291514658e-05, "loss": 1.6895, "step": 66205 }, { "epoch": 2.2, "grad_norm": 0.6480081081390381, "learning_rate": 9.863555502687291e-05, "loss": 1.6401, "step": 66206 }, { "epoch": 2.2, "grad_norm": 0.6500263810157776, "learning_rate": 9.862780738302613e-05, "loss": 1.6916, "step": 66207 }, { "epoch": 2.2, "grad_norm": 0.677405834197998, "learning_rate": 9.862005998361553e-05, "loss": 1.6911, "step": 66208 }, { "epoch": 2.2, "grad_norm": 0.6670881509780884, "learning_rate": 9.861231282865039e-05, "loss": 1.6874, "step": 66209 }, { "epoch": 2.2, "grad_norm": 0.6736336946487427, "learning_rate": 9.860456591814024e-05, "loss": 1.7494, "step": 66210 }, { "epoch": 2.2, "grad_norm": 0.6739354133605957, "learning_rate": 9.859681925209457e-05, "loss": 1.7012, "step": 66211 }, { "epoch": 2.2, "grad_norm": 0.6438397169113159, "learning_rate": 9.858907283052266e-05, "loss": 1.6962, "step": 66212 }, { "epoch": 2.2, "grad_norm": 0.6534072756767273, "learning_rate": 9.85813266534339e-05, "loss": 1.6739, "step": 66213 }, { "epoch": 2.2, "grad_norm": 0.6630507707595825, "learning_rate": 9.857358072083777e-05, "loss": 1.6566, "step": 66214 }, { "epoch": 2.2, "grad_norm": 0.6822433471679688, "learning_rate": 9.856583503274368e-05, "loss": 1.7467, "step": 66215 }, { "epoch": 2.2, "grad_norm": 0.6222450733184814, "learning_rate": 9.855808958916084e-05, "loss": 1.7032, "step": 66216 }, { "epoch": 2.2, "grad_norm": 0.6776727437973022, "learning_rate": 9.855034439009893e-05, "loss": 1.687, "step": 66217 }, { "epoch": 2.2, "grad_norm": 0.6335405707359314, "learning_rate": 9.854259943556712e-05, "loss": 1.708, "step": 66218 }, { "epoch": 2.2, "grad_norm": 0.6508786678314209, "learning_rate": 9.853485472557501e-05, "loss": 1.7226, "step": 66219 }, { "epoch": 2.2, "grad_norm": 0.6491057872772217, "learning_rate": 9.852711026013181e-05, "loss": 1.6222, "step": 66220 }, { "epoch": 2.2, "grad_norm": 0.6762176752090454, "learning_rate": 9.851936603924707e-05, "loss": 1.7357, "step": 66221 }, { "epoch": 2.2, "grad_norm": 0.6860509514808655, "learning_rate": 9.851162206293017e-05, "loss": 1.6615, "step": 66222 }, { "epoch": 2.2, "grad_norm": 0.6263336539268494, "learning_rate": 9.850387833119036e-05, "loss": 1.6426, "step": 66223 }, { "epoch": 2.2, "grad_norm": 0.6427426338195801, "learning_rate": 9.849613484403728e-05, "loss": 1.7182, "step": 66224 }, { "epoch": 2.2, "grad_norm": 0.6390058398246765, "learning_rate": 9.848839160148007e-05, "loss": 1.7183, "step": 66225 }, { "epoch": 2.2, "grad_norm": 0.6352601051330566, "learning_rate": 9.848064860352835e-05, "loss": 1.6816, "step": 66226 }, { "epoch": 2.2, "grad_norm": 0.6604129076004028, "learning_rate": 9.847290585019148e-05, "loss": 1.6757, "step": 66227 }, { "epoch": 2.2, "grad_norm": 0.6441400051116943, "learning_rate": 9.846516334147865e-05, "loss": 1.7315, "step": 66228 }, { "epoch": 2.2, "grad_norm": 0.656470775604248, "learning_rate": 9.845742107739956e-05, "loss": 1.6809, "step": 66229 }, { "epoch": 2.2, "grad_norm": 0.6697624325752258, "learning_rate": 9.844967905796336e-05, "loss": 1.7207, "step": 66230 }, { "epoch": 2.2, "grad_norm": 0.6693149209022522, "learning_rate": 9.844193728317962e-05, "loss": 1.6742, "step": 66231 }, { "epoch": 2.2, "grad_norm": 0.6499252319335938, "learning_rate": 9.84341957530576e-05, "loss": 1.606, "step": 66232 }, { "epoch": 2.2, "grad_norm": 0.661916196346283, "learning_rate": 9.842645446760687e-05, "loss": 1.7574, "step": 66233 }, { "epoch": 2.2, "grad_norm": 0.6714871525764465, "learning_rate": 9.841871342683671e-05, "loss": 1.6856, "step": 66234 }, { "epoch": 2.2, "grad_norm": 0.6500107049942017, "learning_rate": 9.841097263075644e-05, "loss": 1.7302, "step": 66235 }, { "epoch": 2.2, "grad_norm": 0.6360374689102173, "learning_rate": 9.840323207937565e-05, "loss": 1.6328, "step": 66236 }, { "epoch": 2.2, "grad_norm": 0.6836323142051697, "learning_rate": 9.839549177270361e-05, "loss": 1.7706, "step": 66237 }, { "epoch": 2.2, "grad_norm": 0.6699687838554382, "learning_rate": 9.838775171074964e-05, "loss": 1.6786, "step": 66238 }, { "epoch": 2.2, "grad_norm": 0.6688021421432495, "learning_rate": 9.838001189352325e-05, "loss": 1.6828, "step": 66239 }, { "epoch": 2.2, "grad_norm": 0.6369900107383728, "learning_rate": 9.837227232103391e-05, "loss": 1.612, "step": 66240 }, { "epoch": 2.2, "grad_norm": 0.641464114189148, "learning_rate": 9.836453299329095e-05, "loss": 1.742, "step": 66241 }, { "epoch": 2.2, "grad_norm": 0.6232810616493225, "learning_rate": 9.835679391030359e-05, "loss": 1.7228, "step": 66242 }, { "epoch": 2.2, "grad_norm": 0.6393019556999207, "learning_rate": 9.83490550720815e-05, "loss": 1.7304, "step": 66243 }, { "epoch": 2.2, "grad_norm": 0.6579306721687317, "learning_rate": 9.834131647863392e-05, "loss": 1.6632, "step": 66244 }, { "epoch": 2.2, "grad_norm": 0.6442331075668335, "learning_rate": 9.83335781299702e-05, "loss": 1.7042, "step": 66245 }, { "epoch": 2.2, "grad_norm": 0.658572793006897, "learning_rate": 9.83258400260998e-05, "loss": 1.77, "step": 66246 }, { "epoch": 2.2, "grad_norm": 0.6530411839485168, "learning_rate": 9.831810216703231e-05, "loss": 1.7561, "step": 66247 }, { "epoch": 2.2, "grad_norm": 0.6518682241439819, "learning_rate": 9.831036455277672e-05, "loss": 1.6921, "step": 66248 }, { "epoch": 2.2, "grad_norm": 0.6426253318786621, "learning_rate": 9.830262718334264e-05, "loss": 1.7652, "step": 66249 }, { "epoch": 2.2, "grad_norm": 0.6704853773117065, "learning_rate": 9.829489005873952e-05, "loss": 1.6781, "step": 66250 }, { "epoch": 2.2, "grad_norm": 0.6455252766609192, "learning_rate": 9.828715317897673e-05, "loss": 1.7389, "step": 66251 }, { "epoch": 2.2, "grad_norm": 0.6719194054603577, "learning_rate": 9.82794165440635e-05, "loss": 1.6896, "step": 66252 }, { "epoch": 2.2, "grad_norm": 0.6302348375320435, "learning_rate": 9.827168015400931e-05, "loss": 1.7533, "step": 66253 }, { "epoch": 2.2, "grad_norm": 0.6507050395011902, "learning_rate": 9.826394400882379e-05, "loss": 1.7398, "step": 66254 }, { "epoch": 2.2, "grad_norm": 0.6517130136489868, "learning_rate": 9.825620810851595e-05, "loss": 1.785, "step": 66255 }, { "epoch": 2.2, "grad_norm": 0.6733784675598145, "learning_rate": 9.824847245309533e-05, "loss": 1.5963, "step": 66256 }, { "epoch": 2.2, "grad_norm": 0.6497204899787903, "learning_rate": 9.82407370425714e-05, "loss": 1.6898, "step": 66257 }, { "epoch": 2.2, "grad_norm": 0.6496380567550659, "learning_rate": 9.823300187695354e-05, "loss": 1.6939, "step": 66258 }, { "epoch": 2.2, "grad_norm": 0.632536768913269, "learning_rate": 9.822526695625097e-05, "loss": 1.7142, "step": 66259 }, { "epoch": 2.2, "grad_norm": 0.653991162776947, "learning_rate": 9.821753228047318e-05, "loss": 1.6421, "step": 66260 }, { "epoch": 2.2, "grad_norm": 0.6499489545822144, "learning_rate": 9.82097978496298e-05, "loss": 1.7263, "step": 66261 }, { "epoch": 2.2, "grad_norm": 0.663882851600647, "learning_rate": 9.820206366372977e-05, "loss": 1.77, "step": 66262 }, { "epoch": 2.2, "grad_norm": 0.6561007499694824, "learning_rate": 9.81943297227827e-05, "loss": 1.678, "step": 66263 }, { "epoch": 2.2, "grad_norm": 0.6712610721588135, "learning_rate": 9.818659602679809e-05, "loss": 1.7045, "step": 66264 }, { "epoch": 2.2, "grad_norm": 0.6464877128601074, "learning_rate": 9.817886257578522e-05, "loss": 1.6992, "step": 66265 }, { "epoch": 2.2, "grad_norm": 0.6508388519287109, "learning_rate": 9.817112936975336e-05, "loss": 1.6515, "step": 66266 }, { "epoch": 2.2, "grad_norm": 0.6534252166748047, "learning_rate": 9.816339640871201e-05, "loss": 1.754, "step": 66267 }, { "epoch": 2.2, "grad_norm": 0.6557474732398987, "learning_rate": 9.815566369267077e-05, "loss": 1.6995, "step": 66268 }, { "epoch": 2.2, "grad_norm": 0.6781654953956604, "learning_rate": 9.814793122163863e-05, "loss": 1.6878, "step": 66269 }, { "epoch": 2.2, "grad_norm": 0.657569408416748, "learning_rate": 9.814019899562512e-05, "loss": 1.7247, "step": 66270 }, { "epoch": 2.2, "grad_norm": 0.6336402893066406, "learning_rate": 9.813246701463978e-05, "loss": 1.7065, "step": 66271 }, { "epoch": 2.2, "grad_norm": 0.6583701372146606, "learning_rate": 9.812473527869191e-05, "loss": 1.6536, "step": 66272 }, { "epoch": 2.2, "grad_norm": 0.6697074770927429, "learning_rate": 9.811700378779074e-05, "loss": 1.7088, "step": 66273 }, { "epoch": 2.2, "grad_norm": 0.6772878766059875, "learning_rate": 9.810927254194589e-05, "loss": 1.7815, "step": 66274 }, { "epoch": 2.2, "grad_norm": 0.6525053381919861, "learning_rate": 9.810154154116663e-05, "loss": 1.6368, "step": 66275 }, { "epoch": 2.21, "grad_norm": 0.6475625038146973, "learning_rate": 9.809381078546223e-05, "loss": 1.7404, "step": 66276 }, { "epoch": 2.21, "grad_norm": 0.6811238527297974, "learning_rate": 9.808608027484223e-05, "loss": 1.6967, "step": 66277 }, { "epoch": 2.21, "grad_norm": 0.664840042591095, "learning_rate": 9.807835000931606e-05, "loss": 1.657, "step": 66278 }, { "epoch": 2.21, "grad_norm": 0.636343240737915, "learning_rate": 9.8070619988893e-05, "loss": 1.6301, "step": 66279 }, { "epoch": 2.21, "grad_norm": 0.6483921408653259, "learning_rate": 9.806289021358241e-05, "loss": 1.7021, "step": 66280 }, { "epoch": 2.21, "grad_norm": 0.6516885161399841, "learning_rate": 9.805516068339378e-05, "loss": 1.7547, "step": 66281 }, { "epoch": 2.21, "grad_norm": 0.6325523853302002, "learning_rate": 9.804743139833642e-05, "loss": 1.6705, "step": 66282 }, { "epoch": 2.21, "grad_norm": 0.6302069425582886, "learning_rate": 9.803970235841968e-05, "loss": 1.7455, "step": 66283 }, { "epoch": 2.21, "grad_norm": 0.6510455012321472, "learning_rate": 9.803197356365302e-05, "loss": 1.7853, "step": 66284 }, { "epoch": 2.21, "grad_norm": 0.6580858826637268, "learning_rate": 9.80242450140457e-05, "loss": 1.6433, "step": 66285 }, { "epoch": 2.21, "grad_norm": 0.7473524808883667, "learning_rate": 9.801651670960729e-05, "loss": 1.671, "step": 66286 }, { "epoch": 2.21, "grad_norm": 0.6419034004211426, "learning_rate": 9.800878865034697e-05, "loss": 1.6361, "step": 66287 }, { "epoch": 2.21, "grad_norm": 0.6506040692329407, "learning_rate": 9.800106083627434e-05, "loss": 1.7301, "step": 66288 }, { "epoch": 2.21, "grad_norm": 0.6437307000160217, "learning_rate": 9.799333326739862e-05, "loss": 1.7152, "step": 66289 }, { "epoch": 2.21, "grad_norm": 0.6326209306716919, "learning_rate": 9.798560594372916e-05, "loss": 1.7342, "step": 66290 }, { "epoch": 2.21, "grad_norm": 0.6565349698066711, "learning_rate": 9.797787886527549e-05, "loss": 1.7827, "step": 66291 }, { "epoch": 2.21, "grad_norm": 0.6360813975334167, "learning_rate": 9.797015203204681e-05, "loss": 1.716, "step": 66292 }, { "epoch": 2.21, "grad_norm": 0.6549431085586548, "learning_rate": 9.796242544405273e-05, "loss": 1.609, "step": 66293 }, { "epoch": 2.21, "grad_norm": 0.6426548957824707, "learning_rate": 9.795469910130247e-05, "loss": 1.5982, "step": 66294 }, { "epoch": 2.21, "grad_norm": 0.6242008209228516, "learning_rate": 9.794697300380533e-05, "loss": 1.6505, "step": 66295 }, { "epoch": 2.21, "grad_norm": 0.6830727458000183, "learning_rate": 9.79392471515709e-05, "loss": 1.6944, "step": 66296 }, { "epoch": 2.21, "grad_norm": 0.7016481161117554, "learning_rate": 9.793152154460834e-05, "loss": 1.6829, "step": 66297 }, { "epoch": 2.21, "grad_norm": 0.6397261619567871, "learning_rate": 9.792379618292728e-05, "loss": 1.6787, "step": 66298 }, { "epoch": 2.21, "grad_norm": 0.6523007154464722, "learning_rate": 9.79160710665368e-05, "loss": 1.6822, "step": 66299 }, { "epoch": 2.21, "grad_norm": 0.6334179043769836, "learning_rate": 9.790834619544656e-05, "loss": 1.6917, "step": 66300 }, { "epoch": 2.21, "grad_norm": 0.6884452104568481, "learning_rate": 9.790062156966584e-05, "loss": 1.7724, "step": 66301 }, { "epoch": 2.21, "grad_norm": 0.6344766020774841, "learning_rate": 9.789289718920385e-05, "loss": 1.7029, "step": 66302 }, { "epoch": 2.21, "grad_norm": 0.6593713760375977, "learning_rate": 9.788517305407021e-05, "loss": 1.7189, "step": 66303 }, { "epoch": 2.21, "grad_norm": 0.634824812412262, "learning_rate": 9.787744916427419e-05, "loss": 1.6716, "step": 66304 }, { "epoch": 2.21, "grad_norm": 0.6816598176956177, "learning_rate": 9.786972551982508e-05, "loss": 1.6831, "step": 66305 }, { "epoch": 2.21, "grad_norm": 0.6890680193901062, "learning_rate": 9.78620021207323e-05, "loss": 1.6509, "step": 66306 }, { "epoch": 2.21, "grad_norm": 0.6499656438827515, "learning_rate": 9.785427896700542e-05, "loss": 1.7485, "step": 66307 }, { "epoch": 2.21, "grad_norm": 0.6720394492149353, "learning_rate": 9.784655605865364e-05, "loss": 1.6965, "step": 66308 }, { "epoch": 2.21, "grad_norm": 0.6398126482963562, "learning_rate": 9.783883339568625e-05, "loss": 1.7517, "step": 66309 }, { "epoch": 2.21, "grad_norm": 0.6651354432106018, "learning_rate": 9.783111097811281e-05, "loss": 1.6961, "step": 66310 }, { "epoch": 2.21, "grad_norm": 0.6444082260131836, "learning_rate": 9.782338880594264e-05, "loss": 1.7273, "step": 66311 }, { "epoch": 2.21, "grad_norm": 0.6470184922218323, "learning_rate": 9.781566687918498e-05, "loss": 1.7315, "step": 66312 }, { "epoch": 2.21, "grad_norm": 0.642217218875885, "learning_rate": 9.780794519784932e-05, "loss": 1.702, "step": 66313 }, { "epoch": 2.21, "grad_norm": 0.66166090965271, "learning_rate": 9.780022376194512e-05, "loss": 1.7365, "step": 66314 }, { "epoch": 2.21, "grad_norm": 0.6506202220916748, "learning_rate": 9.779250257148162e-05, "loss": 1.6272, "step": 66315 }, { "epoch": 2.21, "grad_norm": 0.6638926863670349, "learning_rate": 9.778478162646819e-05, "loss": 1.6635, "step": 66316 }, { "epoch": 2.21, "grad_norm": 0.6503148674964905, "learning_rate": 9.777706092691431e-05, "loss": 1.7236, "step": 66317 }, { "epoch": 2.21, "grad_norm": 0.6640267968177795, "learning_rate": 9.776934047282927e-05, "loss": 1.7275, "step": 66318 }, { "epoch": 2.21, "grad_norm": 0.6625652313232422, "learning_rate": 9.776162026422234e-05, "loss": 1.7771, "step": 66319 }, { "epoch": 2.21, "grad_norm": 0.6375136971473694, "learning_rate": 9.775390030110303e-05, "loss": 1.6847, "step": 66320 }, { "epoch": 2.21, "grad_norm": 0.6492217779159546, "learning_rate": 9.77461805834808e-05, "loss": 1.6961, "step": 66321 }, { "epoch": 2.21, "grad_norm": 0.6482125520706177, "learning_rate": 9.77384611113649e-05, "loss": 1.7096, "step": 66322 }, { "epoch": 2.21, "grad_norm": 0.6506245732307434, "learning_rate": 9.773074188476457e-05, "loss": 1.6675, "step": 66323 }, { "epoch": 2.21, "grad_norm": 0.6437025666236877, "learning_rate": 9.772302290368946e-05, "loss": 1.7329, "step": 66324 }, { "epoch": 2.21, "grad_norm": 0.6297351121902466, "learning_rate": 9.771530416814875e-05, "loss": 1.7375, "step": 66325 }, { "epoch": 2.21, "grad_norm": 0.649503231048584, "learning_rate": 9.770758567815181e-05, "loss": 1.7076, "step": 66326 }, { "epoch": 2.21, "grad_norm": 0.6517939567565918, "learning_rate": 9.769986743370801e-05, "loss": 1.6662, "step": 66327 }, { "epoch": 2.21, "grad_norm": 0.6313934326171875, "learning_rate": 9.769214943482688e-05, "loss": 1.6478, "step": 66328 }, { "epoch": 2.21, "grad_norm": 0.6521166563034058, "learning_rate": 9.768443168151769e-05, "loss": 1.6624, "step": 66329 }, { "epoch": 2.21, "grad_norm": 0.6721993684768677, "learning_rate": 9.767671417378968e-05, "loss": 1.7539, "step": 66330 }, { "epoch": 2.21, "grad_norm": 0.6475777626037598, "learning_rate": 9.76689969116524e-05, "loss": 1.7895, "step": 66331 }, { "epoch": 2.21, "grad_norm": 0.6361050605773926, "learning_rate": 9.766127989511515e-05, "loss": 1.6727, "step": 66332 }, { "epoch": 2.21, "grad_norm": 0.6235324144363403, "learning_rate": 9.765356312418722e-05, "loss": 1.6778, "step": 66333 }, { "epoch": 2.21, "grad_norm": 0.6390722393989563, "learning_rate": 9.764584659887814e-05, "loss": 1.7287, "step": 66334 }, { "epoch": 2.21, "grad_norm": 0.6606811881065369, "learning_rate": 9.763813031919709e-05, "loss": 1.6847, "step": 66335 }, { "epoch": 2.21, "grad_norm": 0.6539565920829773, "learning_rate": 9.763041428515365e-05, "loss": 1.7541, "step": 66336 }, { "epoch": 2.21, "grad_norm": 0.6565287113189697, "learning_rate": 9.762269849675694e-05, "loss": 1.6792, "step": 66337 }, { "epoch": 2.21, "grad_norm": 0.6183711886405945, "learning_rate": 9.761498295401654e-05, "loss": 1.7091, "step": 66338 }, { "epoch": 2.21, "grad_norm": 0.6309576630592346, "learning_rate": 9.760726765694175e-05, "loss": 1.7476, "step": 66339 }, { "epoch": 2.21, "grad_norm": 0.646348774433136, "learning_rate": 9.759955260554185e-05, "loss": 1.679, "step": 66340 }, { "epoch": 2.21, "grad_norm": 0.6471198797225952, "learning_rate": 9.759183779982632e-05, "loss": 1.7214, "step": 66341 }, { "epoch": 2.21, "grad_norm": 0.6625316739082336, "learning_rate": 9.758412323980442e-05, "loss": 1.7494, "step": 66342 }, { "epoch": 2.21, "grad_norm": 0.6507276296615601, "learning_rate": 9.757640892548561e-05, "loss": 1.7133, "step": 66343 }, { "epoch": 2.21, "grad_norm": 0.6446573138237, "learning_rate": 9.756869485687925e-05, "loss": 1.7465, "step": 66344 }, { "epoch": 2.21, "grad_norm": 0.6595309972763062, "learning_rate": 9.756098103399458e-05, "loss": 1.648, "step": 66345 }, { "epoch": 2.21, "grad_norm": 0.6528318524360657, "learning_rate": 9.755326745684113e-05, "loss": 1.6927, "step": 66346 }, { "epoch": 2.21, "grad_norm": 0.6527049541473389, "learning_rate": 9.754555412542812e-05, "loss": 1.729, "step": 66347 }, { "epoch": 2.21, "grad_norm": 0.6358283162117004, "learning_rate": 9.753784103976505e-05, "loss": 1.739, "step": 66348 }, { "epoch": 2.21, "grad_norm": 0.6235843896865845, "learning_rate": 9.75301281998611e-05, "loss": 1.6629, "step": 66349 }, { "epoch": 2.21, "grad_norm": 0.6296330690383911, "learning_rate": 9.752241560572585e-05, "loss": 1.6619, "step": 66350 }, { "epoch": 2.21, "grad_norm": 0.621589183807373, "learning_rate": 9.751470325736855e-05, "loss": 1.6623, "step": 66351 }, { "epoch": 2.21, "grad_norm": 0.6498218774795532, "learning_rate": 9.750699115479849e-05, "loss": 1.7272, "step": 66352 }, { "epoch": 2.21, "grad_norm": 0.6677510142326355, "learning_rate": 9.749927929802517e-05, "loss": 1.7273, "step": 66353 }, { "epoch": 2.21, "grad_norm": 0.6526870131492615, "learning_rate": 9.749156768705792e-05, "loss": 1.7269, "step": 66354 }, { "epoch": 2.21, "grad_norm": 0.6574613451957703, "learning_rate": 9.748385632190595e-05, "loss": 1.6908, "step": 66355 }, { "epoch": 2.21, "grad_norm": 0.6578575968742371, "learning_rate": 9.747614520257875e-05, "loss": 1.6882, "step": 66356 }, { "epoch": 2.21, "grad_norm": 0.6473146677017212, "learning_rate": 9.746843432908577e-05, "loss": 1.7426, "step": 66357 }, { "epoch": 2.21, "grad_norm": 0.643794059753418, "learning_rate": 9.746072370143623e-05, "loss": 1.7691, "step": 66358 }, { "epoch": 2.21, "grad_norm": 0.6734840869903564, "learning_rate": 9.745301331963946e-05, "loss": 1.6774, "step": 66359 }, { "epoch": 2.21, "grad_norm": 0.6622915863990784, "learning_rate": 9.744530318370499e-05, "loss": 1.7287, "step": 66360 }, { "epoch": 2.21, "grad_norm": 0.682590663433075, "learning_rate": 9.743759329364208e-05, "loss": 1.7482, "step": 66361 }, { "epoch": 2.21, "grad_norm": 0.673644483089447, "learning_rate": 9.742988364945998e-05, "loss": 1.652, "step": 66362 }, { "epoch": 2.21, "grad_norm": 0.6475186347961426, "learning_rate": 9.742217425116816e-05, "loss": 1.6478, "step": 66363 }, { "epoch": 2.21, "grad_norm": 0.6690562963485718, "learning_rate": 9.741446509877617e-05, "loss": 1.7201, "step": 66364 }, { "epoch": 2.21, "grad_norm": 0.6682055592536926, "learning_rate": 9.740675619229298e-05, "loss": 1.7476, "step": 66365 }, { "epoch": 2.21, "grad_norm": 0.656175971031189, "learning_rate": 9.73990475317281e-05, "loss": 1.6858, "step": 66366 }, { "epoch": 2.21, "grad_norm": 0.6476162075996399, "learning_rate": 9.739133911709107e-05, "loss": 1.6437, "step": 66367 }, { "epoch": 2.21, "grad_norm": 0.6556480526924133, "learning_rate": 9.738363094839108e-05, "loss": 1.6456, "step": 66368 }, { "epoch": 2.21, "grad_norm": 0.6735356450080872, "learning_rate": 9.737592302563738e-05, "loss": 1.5788, "step": 66369 }, { "epoch": 2.21, "grad_norm": 0.6471542119979858, "learning_rate": 9.73682153488395e-05, "loss": 1.6633, "step": 66370 }, { "epoch": 2.21, "grad_norm": 0.6536978483200073, "learning_rate": 9.736050791800692e-05, "loss": 1.714, "step": 66371 }, { "epoch": 2.21, "grad_norm": 0.6561408638954163, "learning_rate": 9.735280073314862e-05, "loss": 1.7055, "step": 66372 }, { "epoch": 2.21, "grad_norm": 0.626973569393158, "learning_rate": 9.734509379427421e-05, "loss": 1.668, "step": 66373 }, { "epoch": 2.21, "grad_norm": 0.6372559070587158, "learning_rate": 9.733738710139305e-05, "loss": 1.7776, "step": 66374 }, { "epoch": 2.21, "grad_norm": 0.6630235910415649, "learning_rate": 9.732968065451447e-05, "loss": 1.692, "step": 66375 }, { "epoch": 2.21, "grad_norm": 0.6508293151855469, "learning_rate": 9.732197445364768e-05, "loss": 1.6704, "step": 66376 }, { "epoch": 2.21, "grad_norm": 0.6610071063041687, "learning_rate": 9.731426849880215e-05, "loss": 1.6607, "step": 66377 }, { "epoch": 2.21, "grad_norm": 0.6417245268821716, "learning_rate": 9.730656278998742e-05, "loss": 1.711, "step": 66378 }, { "epoch": 2.21, "grad_norm": 0.6470199823379517, "learning_rate": 9.729885732721248e-05, "loss": 1.7437, "step": 66379 }, { "epoch": 2.21, "grad_norm": 0.6585112810134888, "learning_rate": 9.729115211048685e-05, "loss": 1.702, "step": 66380 }, { "epoch": 2.21, "grad_norm": 0.6824153065681458, "learning_rate": 9.728344713981999e-05, "loss": 1.7461, "step": 66381 }, { "epoch": 2.21, "grad_norm": 0.6642021536827087, "learning_rate": 9.727574241522117e-05, "loss": 1.6563, "step": 66382 }, { "epoch": 2.21, "grad_norm": 0.6413729190826416, "learning_rate": 9.726803793669964e-05, "loss": 1.6693, "step": 66383 }, { "epoch": 2.21, "grad_norm": 0.6626297235488892, "learning_rate": 9.726033370426493e-05, "loss": 1.637, "step": 66384 }, { "epoch": 2.21, "grad_norm": 0.6376610994338989, "learning_rate": 9.725262971792629e-05, "loss": 1.6569, "step": 66385 }, { "epoch": 2.21, "grad_norm": 0.6703832149505615, "learning_rate": 9.7244925977693e-05, "loss": 1.7526, "step": 66386 }, { "epoch": 2.21, "grad_norm": 0.6312707662582397, "learning_rate": 9.723722248357446e-05, "loss": 1.6351, "step": 66387 }, { "epoch": 2.21, "grad_norm": 0.6703101992607117, "learning_rate": 9.722951923558016e-05, "loss": 1.6785, "step": 66388 }, { "epoch": 2.21, "grad_norm": 0.6572514772415161, "learning_rate": 9.722181623371939e-05, "loss": 1.6679, "step": 66389 }, { "epoch": 2.21, "grad_norm": 0.6655354499816895, "learning_rate": 9.721411347800134e-05, "loss": 1.8144, "step": 66390 }, { "epoch": 2.21, "grad_norm": 0.6495212316513062, "learning_rate": 9.720641096843557e-05, "loss": 1.6967, "step": 66391 }, { "epoch": 2.21, "grad_norm": 0.6550660133361816, "learning_rate": 9.719870870503132e-05, "loss": 1.69, "step": 66392 }, { "epoch": 2.21, "grad_norm": 0.6279439330101013, "learning_rate": 9.71910066877979e-05, "loss": 1.7101, "step": 66393 }, { "epoch": 2.21, "grad_norm": 0.6787326335906982, "learning_rate": 9.718330491674468e-05, "loss": 1.6815, "step": 66394 }, { "epoch": 2.21, "grad_norm": 0.649013102054596, "learning_rate": 9.717560339188114e-05, "loss": 1.6569, "step": 66395 }, { "epoch": 2.21, "grad_norm": 0.6485865116119385, "learning_rate": 9.716790211321655e-05, "loss": 1.7713, "step": 66396 }, { "epoch": 2.21, "grad_norm": 0.6697196364402771, "learning_rate": 9.716020108076015e-05, "loss": 1.7191, "step": 66397 }, { "epoch": 2.21, "grad_norm": 0.6535560488700867, "learning_rate": 9.715250029452148e-05, "loss": 1.7131, "step": 66398 }, { "epoch": 2.21, "grad_norm": 0.6572616696357727, "learning_rate": 9.714479975450976e-05, "loss": 1.6488, "step": 66399 }, { "epoch": 2.21, "grad_norm": 0.6665886044502258, "learning_rate": 9.713709946073427e-05, "loss": 1.7094, "step": 66400 }, { "epoch": 2.21, "grad_norm": 0.6381272673606873, "learning_rate": 9.712939941320456e-05, "loss": 1.6658, "step": 66401 }, { "epoch": 2.21, "grad_norm": 0.6486685276031494, "learning_rate": 9.712169961192978e-05, "loss": 1.6944, "step": 66402 }, { "epoch": 2.21, "grad_norm": 0.6545125842094421, "learning_rate": 9.711400005691945e-05, "loss": 1.7295, "step": 66403 }, { "epoch": 2.21, "grad_norm": 0.6609176397323608, "learning_rate": 9.710630074818272e-05, "loss": 1.7162, "step": 66404 }, { "epoch": 2.21, "grad_norm": 0.6648814678192139, "learning_rate": 9.709860168572915e-05, "loss": 1.7705, "step": 66405 }, { "epoch": 2.21, "grad_norm": 0.6698096990585327, "learning_rate": 9.7090902869568e-05, "loss": 1.6862, "step": 66406 }, { "epoch": 2.21, "grad_norm": 0.6385520100593567, "learning_rate": 9.708320429970846e-05, "loss": 1.7064, "step": 66407 }, { "epoch": 2.21, "grad_norm": 0.652917742729187, "learning_rate": 9.707550597616015e-05, "loss": 1.6642, "step": 66408 }, { "epoch": 2.21, "grad_norm": 0.6385378241539001, "learning_rate": 9.706780789893216e-05, "loss": 1.6853, "step": 66409 }, { "epoch": 2.21, "grad_norm": 0.6495837569236755, "learning_rate": 9.706011006803404e-05, "loss": 1.6648, "step": 66410 }, { "epoch": 2.21, "grad_norm": 0.6448388695716858, "learning_rate": 9.705241248347509e-05, "loss": 1.8099, "step": 66411 }, { "epoch": 2.21, "grad_norm": 0.64849853515625, "learning_rate": 9.704471514526446e-05, "loss": 1.6476, "step": 66412 }, { "epoch": 2.21, "grad_norm": 0.6340789794921875, "learning_rate": 9.703701805341174e-05, "loss": 1.7166, "step": 66413 }, { "epoch": 2.21, "grad_norm": 0.7435329556465149, "learning_rate": 9.702932120792611e-05, "loss": 1.7918, "step": 66414 }, { "epoch": 2.21, "grad_norm": 0.6646929383277893, "learning_rate": 9.702162460881704e-05, "loss": 1.7317, "step": 66415 }, { "epoch": 2.21, "grad_norm": 0.6773720979690552, "learning_rate": 9.701392825609374e-05, "loss": 1.6837, "step": 66416 }, { "epoch": 2.21, "grad_norm": 0.646892249584198, "learning_rate": 9.700623214976572e-05, "loss": 1.6895, "step": 66417 }, { "epoch": 2.21, "grad_norm": 0.6591985821723938, "learning_rate": 9.699853628984223e-05, "loss": 1.7501, "step": 66418 }, { "epoch": 2.21, "grad_norm": 0.651734471321106, "learning_rate": 9.699084067633248e-05, "loss": 1.7167, "step": 66419 }, { "epoch": 2.21, "grad_norm": 0.6623138785362244, "learning_rate": 9.698314530924606e-05, "loss": 1.7932, "step": 66420 }, { "epoch": 2.21, "grad_norm": 0.6710879802703857, "learning_rate": 9.697545018859217e-05, "loss": 1.7094, "step": 66421 }, { "epoch": 2.21, "grad_norm": 0.6764693260192871, "learning_rate": 9.696775531438006e-05, "loss": 1.6632, "step": 66422 }, { "epoch": 2.21, "grad_norm": 0.6494616866111755, "learning_rate": 9.69600606866192e-05, "loss": 1.7321, "step": 66423 }, { "epoch": 2.21, "grad_norm": 0.6240350008010864, "learning_rate": 9.695236630531903e-05, "loss": 1.666, "step": 66424 }, { "epoch": 2.21, "grad_norm": 0.6629384160041809, "learning_rate": 9.694467217048873e-05, "loss": 1.7305, "step": 66425 }, { "epoch": 2.21, "grad_norm": 0.6550397872924805, "learning_rate": 9.693697828213761e-05, "loss": 1.8004, "step": 66426 }, { "epoch": 2.21, "grad_norm": 0.6521532535552979, "learning_rate": 9.692928464027516e-05, "loss": 1.6834, "step": 66427 }, { "epoch": 2.21, "grad_norm": 0.6796285510063171, "learning_rate": 9.692159124491064e-05, "loss": 1.694, "step": 66428 }, { "epoch": 2.21, "grad_norm": 0.6641263365745544, "learning_rate": 9.69138980960533e-05, "loss": 1.6642, "step": 66429 }, { "epoch": 2.21, "grad_norm": 0.6262801885604858, "learning_rate": 9.690620519371254e-05, "loss": 1.7013, "step": 66430 }, { "epoch": 2.21, "grad_norm": 0.6567983627319336, "learning_rate": 9.689851253789782e-05, "loss": 1.7581, "step": 66431 }, { "epoch": 2.21, "grad_norm": 0.6668062210083008, "learning_rate": 9.68908201286184e-05, "loss": 1.7466, "step": 66432 }, { "epoch": 2.21, "grad_norm": 0.6430646777153015, "learning_rate": 9.68831279658835e-05, "loss": 1.6016, "step": 66433 }, { "epoch": 2.21, "grad_norm": 0.6397263407707214, "learning_rate": 9.687543604970262e-05, "loss": 1.7279, "step": 66434 }, { "epoch": 2.21, "grad_norm": 0.6330968737602234, "learning_rate": 9.686774438008507e-05, "loss": 1.6846, "step": 66435 }, { "epoch": 2.21, "grad_norm": 0.6615708470344543, "learning_rate": 9.686005295704003e-05, "loss": 1.7409, "step": 66436 }, { "epoch": 2.21, "grad_norm": 0.666235089302063, "learning_rate": 9.685236178057699e-05, "loss": 1.691, "step": 66437 }, { "epoch": 2.21, "grad_norm": 0.6479648351669312, "learning_rate": 9.684467085070529e-05, "loss": 1.708, "step": 66438 }, { "epoch": 2.21, "grad_norm": 0.6720579266548157, "learning_rate": 9.683698016743427e-05, "loss": 1.6862, "step": 66439 }, { "epoch": 2.21, "grad_norm": 0.6236212849617004, "learning_rate": 9.682928973077312e-05, "loss": 1.62, "step": 66440 }, { "epoch": 2.21, "grad_norm": 0.6810553669929504, "learning_rate": 9.682159954073135e-05, "loss": 1.6662, "step": 66441 }, { "epoch": 2.21, "grad_norm": 0.6879072189331055, "learning_rate": 9.681390959731824e-05, "loss": 1.6985, "step": 66442 }, { "epoch": 2.21, "grad_norm": 0.6722579598426819, "learning_rate": 9.6806219900543e-05, "loss": 1.6729, "step": 66443 }, { "epoch": 2.21, "grad_norm": 0.6605449914932251, "learning_rate": 9.67985304504151e-05, "loss": 1.675, "step": 66444 }, { "epoch": 2.21, "grad_norm": 0.6368291974067688, "learning_rate": 9.679084124694389e-05, "loss": 1.6937, "step": 66445 }, { "epoch": 2.21, "grad_norm": 0.667222797870636, "learning_rate": 9.678315229013871e-05, "loss": 1.7459, "step": 66446 }, { "epoch": 2.21, "grad_norm": 0.666530966758728, "learning_rate": 9.677546358000871e-05, "loss": 1.7246, "step": 66447 }, { "epoch": 2.21, "grad_norm": 0.6819785833358765, "learning_rate": 9.676777511656344e-05, "loss": 1.6385, "step": 66448 }, { "epoch": 2.21, "grad_norm": 0.6484602093696594, "learning_rate": 9.676008689981218e-05, "loss": 1.7582, "step": 66449 }, { "epoch": 2.21, "grad_norm": 0.6699519157409668, "learning_rate": 9.67523989297641e-05, "loss": 1.6966, "step": 66450 }, { "epoch": 2.21, "grad_norm": 0.65834641456604, "learning_rate": 9.674471120642877e-05, "loss": 1.7082, "step": 66451 }, { "epoch": 2.21, "grad_norm": 0.661690890789032, "learning_rate": 9.673702372981532e-05, "loss": 1.6908, "step": 66452 }, { "epoch": 2.21, "grad_norm": 0.6811721920967102, "learning_rate": 9.672933649993328e-05, "loss": 1.7162, "step": 66453 }, { "epoch": 2.21, "grad_norm": 0.6568586230278015, "learning_rate": 9.672164951679175e-05, "loss": 1.6775, "step": 66454 }, { "epoch": 2.21, "grad_norm": 0.6551854014396667, "learning_rate": 9.671396278040029e-05, "loss": 1.7553, "step": 66455 }, { "epoch": 2.21, "grad_norm": 0.6489095687866211, "learning_rate": 9.670627629076814e-05, "loss": 1.6812, "step": 66456 }, { "epoch": 2.21, "grad_norm": 0.6470688581466675, "learning_rate": 9.669859004790453e-05, "loss": 1.7505, "step": 66457 }, { "epoch": 2.21, "grad_norm": 0.66248619556427, "learning_rate": 9.669090405181895e-05, "loss": 1.6348, "step": 66458 }, { "epoch": 2.21, "grad_norm": 0.6389203071594238, "learning_rate": 9.668321830252059e-05, "loss": 1.6672, "step": 66459 }, { "epoch": 2.21, "grad_norm": 0.6551252007484436, "learning_rate": 9.667553280001891e-05, "loss": 1.7492, "step": 66460 }, { "epoch": 2.21, "grad_norm": 0.6553294062614441, "learning_rate": 9.666784754432318e-05, "loss": 1.6327, "step": 66461 }, { "epoch": 2.21, "grad_norm": 0.6451868414878845, "learning_rate": 9.666016253544261e-05, "loss": 1.7188, "step": 66462 }, { "epoch": 2.21, "grad_norm": 0.6455569863319397, "learning_rate": 9.665247777338677e-05, "loss": 1.6718, "step": 66463 }, { "epoch": 2.21, "grad_norm": 0.6633388996124268, "learning_rate": 9.664479325816475e-05, "loss": 1.6743, "step": 66464 }, { "epoch": 2.21, "grad_norm": 0.7033964991569519, "learning_rate": 9.663710898978609e-05, "loss": 1.7087, "step": 66465 }, { "epoch": 2.21, "grad_norm": 0.6819592714309692, "learning_rate": 9.662942496825992e-05, "loss": 1.6944, "step": 66466 }, { "epoch": 2.21, "grad_norm": 0.6482305526733398, "learning_rate": 9.662174119359575e-05, "loss": 1.7548, "step": 66467 }, { "epoch": 2.21, "grad_norm": 0.6582058668136597, "learning_rate": 9.661405766580283e-05, "loss": 1.7015, "step": 66468 }, { "epoch": 2.21, "grad_norm": 0.67646723985672, "learning_rate": 9.660637438489037e-05, "loss": 1.6869, "step": 66469 }, { "epoch": 2.21, "grad_norm": 0.6511855721473694, "learning_rate": 9.659869135086792e-05, "loss": 1.6999, "step": 66470 }, { "epoch": 2.21, "grad_norm": 0.6818530559539795, "learning_rate": 9.659100856374467e-05, "loss": 1.7262, "step": 66471 }, { "epoch": 2.21, "grad_norm": 0.6628114581108093, "learning_rate": 9.658332602352988e-05, "loss": 1.7003, "step": 66472 }, { "epoch": 2.21, "grad_norm": 0.6637195944786072, "learning_rate": 9.657564373023297e-05, "loss": 1.6812, "step": 66473 }, { "epoch": 2.21, "grad_norm": 0.700620174407959, "learning_rate": 9.65679616838633e-05, "loss": 1.7209, "step": 66474 }, { "epoch": 2.21, "grad_norm": 0.6718577742576599, "learning_rate": 9.656027988443022e-05, "loss": 1.6773, "step": 66475 }, { "epoch": 2.21, "grad_norm": 0.660455584526062, "learning_rate": 9.655259833194282e-05, "loss": 1.7014, "step": 66476 }, { "epoch": 2.21, "grad_norm": 0.6408825516700745, "learning_rate": 9.654491702641072e-05, "loss": 1.6258, "step": 66477 }, { "epoch": 2.21, "grad_norm": 0.6508885025978088, "learning_rate": 9.653723596784313e-05, "loss": 1.7044, "step": 66478 }, { "epoch": 2.21, "grad_norm": 0.6704232692718506, "learning_rate": 9.652955515624926e-05, "loss": 1.7075, "step": 66479 }, { "epoch": 2.21, "grad_norm": 0.6654738187789917, "learning_rate": 9.65218745916385e-05, "loss": 1.7183, "step": 66480 }, { "epoch": 2.21, "grad_norm": 0.6646055579185486, "learning_rate": 9.651419427402038e-05, "loss": 1.6612, "step": 66481 }, { "epoch": 2.21, "grad_norm": 0.6647700667381287, "learning_rate": 9.650651420340388e-05, "loss": 1.702, "step": 66482 }, { "epoch": 2.21, "grad_norm": 0.6498508453369141, "learning_rate": 9.64988343797985e-05, "loss": 1.6935, "step": 66483 }, { "epoch": 2.21, "grad_norm": 0.6604196429252625, "learning_rate": 9.649115480321365e-05, "loss": 1.6393, "step": 66484 }, { "epoch": 2.21, "grad_norm": 0.6349970698356628, "learning_rate": 9.648347547365852e-05, "loss": 1.6562, "step": 66485 }, { "epoch": 2.21, "grad_norm": 0.6480123400688171, "learning_rate": 9.647579639114239e-05, "loss": 1.7241, "step": 66486 }, { "epoch": 2.21, "grad_norm": 0.6597715020179749, "learning_rate": 9.646811755567468e-05, "loss": 1.6999, "step": 66487 }, { "epoch": 2.21, "grad_norm": 0.6463211178779602, "learning_rate": 9.646043896726484e-05, "loss": 1.6276, "step": 66488 }, { "epoch": 2.21, "grad_norm": 0.6557837128639221, "learning_rate": 9.645276062592186e-05, "loss": 1.7393, "step": 66489 }, { "epoch": 2.21, "grad_norm": 0.6422760486602783, "learning_rate": 9.644508253165523e-05, "loss": 1.7361, "step": 66490 }, { "epoch": 2.21, "grad_norm": 0.6651342511177063, "learning_rate": 9.643740468447438e-05, "loss": 1.6702, "step": 66491 }, { "epoch": 2.21, "grad_norm": 0.6502783298492432, "learning_rate": 9.642972708438851e-05, "loss": 1.7366, "step": 66492 }, { "epoch": 2.21, "grad_norm": 0.65929114818573, "learning_rate": 9.642204973140686e-05, "loss": 1.6548, "step": 66493 }, { "epoch": 2.21, "grad_norm": 0.6467944979667664, "learning_rate": 9.641437262553888e-05, "loss": 1.652, "step": 66494 }, { "epoch": 2.21, "grad_norm": 0.6322922110557556, "learning_rate": 9.640669576679402e-05, "loss": 1.7519, "step": 66495 }, { "epoch": 2.21, "grad_norm": 0.6649563312530518, "learning_rate": 9.639901915518126e-05, "loss": 1.6765, "step": 66496 }, { "epoch": 2.21, "grad_norm": 0.6485690474510193, "learning_rate": 9.639134279071006e-05, "loss": 1.7474, "step": 66497 }, { "epoch": 2.21, "grad_norm": 0.669520378112793, "learning_rate": 9.638366667338988e-05, "loss": 1.6762, "step": 66498 }, { "epoch": 2.21, "grad_norm": 0.6353362202644348, "learning_rate": 9.637599080322996e-05, "loss": 1.6115, "step": 66499 }, { "epoch": 2.21, "grad_norm": 0.6441549062728882, "learning_rate": 9.636831518023946e-05, "loss": 1.6812, "step": 66500 }, { "epoch": 2.21, "grad_norm": 0.650814950466156, "learning_rate": 9.636063980442791e-05, "loss": 1.7064, "step": 66501 }, { "epoch": 2.21, "grad_norm": 0.6700350642204285, "learning_rate": 9.635296467580456e-05, "loss": 1.6603, "step": 66502 }, { "epoch": 2.21, "grad_norm": 0.6881127953529358, "learning_rate": 9.634528979437857e-05, "loss": 1.6758, "step": 66503 }, { "epoch": 2.21, "grad_norm": 0.6458722949028015, "learning_rate": 9.633761516015943e-05, "loss": 1.7035, "step": 66504 }, { "epoch": 2.21, "grad_norm": 0.6244745850563049, "learning_rate": 9.632994077315651e-05, "loss": 1.6477, "step": 66505 }, { "epoch": 2.21, "grad_norm": 0.6373531818389893, "learning_rate": 9.6322266633379e-05, "loss": 1.6361, "step": 66506 }, { "epoch": 2.21, "grad_norm": 0.6595832109451294, "learning_rate": 9.631459274083618e-05, "loss": 1.6966, "step": 66507 }, { "epoch": 2.21, "grad_norm": 0.6629855632781982, "learning_rate": 9.630691909553753e-05, "loss": 1.7791, "step": 66508 }, { "epoch": 2.21, "grad_norm": 0.6375837922096252, "learning_rate": 9.629924569749225e-05, "loss": 1.6008, "step": 66509 }, { "epoch": 2.21, "grad_norm": 0.6457625031471252, "learning_rate": 9.629157254670959e-05, "loss": 1.7039, "step": 66510 }, { "epoch": 2.21, "grad_norm": 0.6447165012359619, "learning_rate": 9.628389964319894e-05, "loss": 1.6819, "step": 66511 }, { "epoch": 2.21, "grad_norm": 0.6710841655731201, "learning_rate": 9.62762269869697e-05, "loss": 1.749, "step": 66512 }, { "epoch": 2.21, "grad_norm": 0.6383293867111206, "learning_rate": 9.626855457803113e-05, "loss": 1.7593, "step": 66513 }, { "epoch": 2.21, "grad_norm": 0.6574957370758057, "learning_rate": 9.626088241639244e-05, "loss": 1.7081, "step": 66514 }, { "epoch": 2.21, "grad_norm": 0.6643832325935364, "learning_rate": 9.625321050206309e-05, "loss": 1.7916, "step": 66515 }, { "epoch": 2.21, "grad_norm": 0.6645674705505371, "learning_rate": 9.62455388350523e-05, "loss": 1.6597, "step": 66516 }, { "epoch": 2.21, "grad_norm": 0.6355512738227844, "learning_rate": 9.623786741536933e-05, "loss": 1.6768, "step": 66517 }, { "epoch": 2.21, "grad_norm": 0.6575948596000671, "learning_rate": 9.623019624302365e-05, "loss": 1.6629, "step": 66518 }, { "epoch": 2.21, "grad_norm": 0.6446154713630676, "learning_rate": 9.62225253180244e-05, "loss": 1.7001, "step": 66519 }, { "epoch": 2.21, "grad_norm": 0.6368927955627441, "learning_rate": 9.621485464038107e-05, "loss": 1.6786, "step": 66520 }, { "epoch": 2.21, "grad_norm": 0.6644281148910522, "learning_rate": 9.62071842101028e-05, "loss": 1.6537, "step": 66521 }, { "epoch": 2.21, "grad_norm": 0.6419290900230408, "learning_rate": 9.619951402719905e-05, "loss": 1.734, "step": 66522 }, { "epoch": 2.21, "grad_norm": 0.6328554153442383, "learning_rate": 9.619184409167908e-05, "loss": 1.6593, "step": 66523 }, { "epoch": 2.21, "grad_norm": 0.6279025673866272, "learning_rate": 9.618417440355208e-05, "loss": 1.623, "step": 66524 }, { "epoch": 2.21, "grad_norm": 0.6664051413536072, "learning_rate": 9.617650496282758e-05, "loss": 1.7675, "step": 66525 }, { "epoch": 2.21, "grad_norm": 0.6646650433540344, "learning_rate": 9.616883576951464e-05, "loss": 1.6391, "step": 66526 }, { "epoch": 2.21, "grad_norm": 0.6490832567214966, "learning_rate": 9.616116682362282e-05, "loss": 1.6706, "step": 66527 }, { "epoch": 2.21, "grad_norm": 0.6364113092422485, "learning_rate": 9.61534981251613e-05, "loss": 1.7029, "step": 66528 }, { "epoch": 2.21, "grad_norm": 0.6336820125579834, "learning_rate": 9.614582967413932e-05, "loss": 1.6388, "step": 66529 }, { "epoch": 2.21, "grad_norm": 0.6478126645088196, "learning_rate": 9.613816147056634e-05, "loss": 1.7075, "step": 66530 }, { "epoch": 2.21, "grad_norm": 0.6553949117660522, "learning_rate": 9.61304935144515e-05, "loss": 1.7099, "step": 66531 }, { "epoch": 2.21, "grad_norm": 0.6529918909072876, "learning_rate": 9.612282580580429e-05, "loss": 1.6777, "step": 66532 }, { "epoch": 2.21, "grad_norm": 0.6328454613685608, "learning_rate": 9.611515834463383e-05, "loss": 1.7302, "step": 66533 }, { "epoch": 2.21, "grad_norm": 0.6554214358329773, "learning_rate": 9.610749113094965e-05, "loss": 1.7607, "step": 66534 }, { "epoch": 2.21, "grad_norm": 0.6558917760848999, "learning_rate": 9.609982416476094e-05, "loss": 1.6994, "step": 66535 }, { "epoch": 2.21, "grad_norm": 0.660538911819458, "learning_rate": 9.609215744607689e-05, "loss": 1.7333, "step": 66536 }, { "epoch": 2.21, "grad_norm": 0.6500020027160645, "learning_rate": 9.6084490974907e-05, "loss": 1.6679, "step": 66537 }, { "epoch": 2.21, "grad_norm": 0.6383295655250549, "learning_rate": 9.60768247512605e-05, "loss": 1.7241, "step": 66538 }, { "epoch": 2.21, "grad_norm": 0.6442558169364929, "learning_rate": 9.60691587751466e-05, "loss": 1.6467, "step": 66539 }, { "epoch": 2.21, "grad_norm": 0.6753605604171753, "learning_rate": 9.606149304657468e-05, "loss": 1.6579, "step": 66540 }, { "epoch": 2.21, "grad_norm": 0.6314078569412231, "learning_rate": 9.605382756555414e-05, "loss": 1.6971, "step": 66541 }, { "epoch": 2.21, "grad_norm": 0.6593027114868164, "learning_rate": 9.604616233209424e-05, "loss": 1.7579, "step": 66542 }, { "epoch": 2.21, "grad_norm": 0.6437370777130127, "learning_rate": 9.603849734620414e-05, "loss": 1.6979, "step": 66543 }, { "epoch": 2.21, "grad_norm": 0.6847325563430786, "learning_rate": 9.603083260789334e-05, "loss": 1.6768, "step": 66544 }, { "epoch": 2.21, "grad_norm": 0.6460233926773071, "learning_rate": 9.602316811717105e-05, "loss": 1.5964, "step": 66545 }, { "epoch": 2.21, "grad_norm": 0.6519311666488647, "learning_rate": 9.60155038740465e-05, "loss": 1.7185, "step": 66546 }, { "epoch": 2.21, "grad_norm": 0.6472446918487549, "learning_rate": 9.600783987852905e-05, "loss": 1.5902, "step": 66547 }, { "epoch": 2.21, "grad_norm": 0.6531407833099365, "learning_rate": 9.600017613062816e-05, "loss": 1.7091, "step": 66548 }, { "epoch": 2.21, "grad_norm": 0.6642814874649048, "learning_rate": 9.599251263035296e-05, "loss": 1.6446, "step": 66549 }, { "epoch": 2.21, "grad_norm": 0.6703311204910278, "learning_rate": 9.59848493777127e-05, "loss": 1.6789, "step": 66550 }, { "epoch": 2.21, "grad_norm": 0.6358094215393066, "learning_rate": 9.597718637271688e-05, "loss": 1.6177, "step": 66551 }, { "epoch": 2.21, "grad_norm": 0.651975691318512, "learning_rate": 9.59695236153747e-05, "loss": 1.6952, "step": 66552 }, { "epoch": 2.21, "grad_norm": 0.6268793940544128, "learning_rate": 9.596186110569537e-05, "loss": 1.6836, "step": 66553 }, { "epoch": 2.21, "grad_norm": 0.6750993132591248, "learning_rate": 9.595419884368826e-05, "loss": 1.6243, "step": 66554 }, { "epoch": 2.21, "grad_norm": 0.6575257182121277, "learning_rate": 9.594653682936279e-05, "loss": 1.6538, "step": 66555 }, { "epoch": 2.21, "grad_norm": 0.6403027772903442, "learning_rate": 9.593887506272815e-05, "loss": 1.6491, "step": 66556 }, { "epoch": 2.21, "grad_norm": 0.6603425145149231, "learning_rate": 9.593121354379354e-05, "loss": 1.7504, "step": 66557 }, { "epoch": 2.21, "grad_norm": 0.6756948232650757, "learning_rate": 9.592355227256848e-05, "loss": 1.6556, "step": 66558 }, { "epoch": 2.21, "grad_norm": 0.6474677324295044, "learning_rate": 9.591589124906213e-05, "loss": 1.7209, "step": 66559 }, { "epoch": 2.21, "grad_norm": 0.664999783039093, "learning_rate": 9.590823047328376e-05, "loss": 1.7378, "step": 66560 }, { "epoch": 2.21, "grad_norm": 0.6358121037483215, "learning_rate": 9.590056994524272e-05, "loss": 1.6666, "step": 66561 }, { "epoch": 2.21, "grad_norm": 0.6466672420501709, "learning_rate": 9.58929096649484e-05, "loss": 1.6212, "step": 66562 }, { "epoch": 2.21, "grad_norm": 0.6418358087539673, "learning_rate": 9.588524963241003e-05, "loss": 1.7496, "step": 66563 }, { "epoch": 2.21, "grad_norm": 0.6611606478691101, "learning_rate": 9.58775898476368e-05, "loss": 1.6775, "step": 66564 }, { "epoch": 2.21, "grad_norm": 0.6506626009941101, "learning_rate": 9.586993031063818e-05, "loss": 1.7117, "step": 66565 }, { "epoch": 2.21, "grad_norm": 0.6609035134315491, "learning_rate": 9.586227102142337e-05, "loss": 1.7451, "step": 66566 }, { "epoch": 2.21, "grad_norm": 0.6531594395637512, "learning_rate": 9.585461198000162e-05, "loss": 1.6532, "step": 66567 }, { "epoch": 2.21, "grad_norm": 0.6650193333625793, "learning_rate": 9.58469531863824e-05, "loss": 1.735, "step": 66568 }, { "epoch": 2.21, "grad_norm": 0.6430389881134033, "learning_rate": 9.583929464057476e-05, "loss": 1.7099, "step": 66569 }, { "epoch": 2.21, "grad_norm": 0.6487313508987427, "learning_rate": 9.583163634258825e-05, "loss": 1.8023, "step": 66570 }, { "epoch": 2.21, "grad_norm": 0.6433577537536621, "learning_rate": 9.582397829243195e-05, "loss": 1.6845, "step": 66571 }, { "epoch": 2.21, "grad_norm": 0.6528821587562561, "learning_rate": 9.581632049011535e-05, "loss": 1.7031, "step": 66572 }, { "epoch": 2.21, "grad_norm": 0.6294700503349304, "learning_rate": 9.580866293564766e-05, "loss": 1.7159, "step": 66573 }, { "epoch": 2.21, "grad_norm": 0.6550005078315735, "learning_rate": 9.580100562903807e-05, "loss": 1.6443, "step": 66574 }, { "epoch": 2.21, "grad_norm": 0.6402396559715271, "learning_rate": 9.579334857029608e-05, "loss": 1.6788, "step": 66575 }, { "epoch": 2.21, "grad_norm": 0.6612702012062073, "learning_rate": 9.578569175943078e-05, "loss": 1.7446, "step": 66576 }, { "epoch": 2.22, "grad_norm": 0.6646876335144043, "learning_rate": 9.577803519645165e-05, "loss": 1.6278, "step": 66577 }, { "epoch": 2.22, "grad_norm": 0.6555589437484741, "learning_rate": 9.577037888136788e-05, "loss": 1.7331, "step": 66578 }, { "epoch": 2.22, "grad_norm": 0.6475701332092285, "learning_rate": 9.576272281418868e-05, "loss": 1.647, "step": 66579 }, { "epoch": 2.22, "grad_norm": 0.6478427052497864, "learning_rate": 9.575506699492355e-05, "loss": 1.702, "step": 66580 }, { "epoch": 2.22, "grad_norm": 0.6565282344818115, "learning_rate": 9.57474114235816e-05, "loss": 1.6851, "step": 66581 }, { "epoch": 2.22, "grad_norm": 0.6442283987998962, "learning_rate": 9.573975610017223e-05, "loss": 1.7762, "step": 66582 }, { "epoch": 2.22, "grad_norm": 0.6522282958030701, "learning_rate": 9.573210102470466e-05, "loss": 1.6861, "step": 66583 }, { "epoch": 2.22, "grad_norm": 0.6831120848655701, "learning_rate": 9.57244461971883e-05, "loss": 1.7334, "step": 66584 }, { "epoch": 2.22, "grad_norm": 0.6821805834770203, "learning_rate": 9.571679161763236e-05, "loss": 1.7017, "step": 66585 }, { "epoch": 2.22, "grad_norm": 0.6613685488700867, "learning_rate": 9.570913728604607e-05, "loss": 1.6908, "step": 66586 }, { "epoch": 2.22, "grad_norm": 0.6813000440597534, "learning_rate": 9.570148320243885e-05, "loss": 1.6868, "step": 66587 }, { "epoch": 2.22, "grad_norm": 0.6327166557312012, "learning_rate": 9.569382936681992e-05, "loss": 1.7628, "step": 66588 }, { "epoch": 2.22, "grad_norm": 0.6420964598655701, "learning_rate": 9.568617577919852e-05, "loss": 1.6915, "step": 66589 }, { "epoch": 2.22, "grad_norm": 0.6271716356277466, "learning_rate": 9.567852243958396e-05, "loss": 1.719, "step": 66590 }, { "epoch": 2.22, "grad_norm": 0.6606418490409851, "learning_rate": 9.567086934798567e-05, "loss": 1.7232, "step": 66591 }, { "epoch": 2.22, "grad_norm": 0.6400014162063599, "learning_rate": 9.566321650441286e-05, "loss": 1.6832, "step": 66592 }, { "epoch": 2.22, "grad_norm": 0.6371477246284485, "learning_rate": 9.565556390887468e-05, "loss": 1.6841, "step": 66593 }, { "epoch": 2.22, "grad_norm": 0.6859681010246277, "learning_rate": 9.564791156138065e-05, "loss": 1.701, "step": 66594 }, { "epoch": 2.22, "grad_norm": 0.6392362713813782, "learning_rate": 9.564025946193996e-05, "loss": 1.6907, "step": 66595 }, { "epoch": 2.22, "grad_norm": 0.6394190788269043, "learning_rate": 9.563260761056177e-05, "loss": 1.6397, "step": 66596 }, { "epoch": 2.22, "grad_norm": 0.6558093428611755, "learning_rate": 9.562495600725547e-05, "loss": 1.7596, "step": 66597 }, { "epoch": 2.22, "grad_norm": 0.65534907579422, "learning_rate": 9.561730465203053e-05, "loss": 1.6855, "step": 66598 }, { "epoch": 2.22, "grad_norm": 0.6346668601036072, "learning_rate": 9.560965354489592e-05, "loss": 1.6799, "step": 66599 }, { "epoch": 2.22, "grad_norm": 0.6742308735847473, "learning_rate": 9.560200268586107e-05, "loss": 1.7084, "step": 66600 }, { "epoch": 2.22, "grad_norm": 0.675681471824646, "learning_rate": 9.559435207493535e-05, "loss": 1.7575, "step": 66601 }, { "epoch": 2.22, "grad_norm": 0.6480420827865601, "learning_rate": 9.558670171212797e-05, "loss": 1.7249, "step": 66602 }, { "epoch": 2.22, "grad_norm": 0.6320854425430298, "learning_rate": 9.557905159744813e-05, "loss": 1.6617, "step": 66603 }, { "epoch": 2.22, "grad_norm": 0.6270684599876404, "learning_rate": 9.557140173090521e-05, "loss": 1.6542, "step": 66604 }, { "epoch": 2.22, "grad_norm": 0.6281620860099792, "learning_rate": 9.556375211250865e-05, "loss": 1.6337, "step": 66605 }, { "epoch": 2.22, "grad_norm": 0.6440256237983704, "learning_rate": 9.555610274226743e-05, "loss": 1.6316, "step": 66606 }, { "epoch": 2.22, "grad_norm": 0.644218921661377, "learning_rate": 9.554845362019092e-05, "loss": 1.7029, "step": 66607 }, { "epoch": 2.22, "grad_norm": 0.6673896312713623, "learning_rate": 9.55408047462886e-05, "loss": 1.7423, "step": 66608 }, { "epoch": 2.22, "grad_norm": 0.657052755355835, "learning_rate": 9.553315612056959e-05, "loss": 1.6745, "step": 66609 }, { "epoch": 2.22, "grad_norm": 0.6495680809020996, "learning_rate": 9.552550774304314e-05, "loss": 1.7572, "step": 66610 }, { "epoch": 2.22, "grad_norm": 0.6572737097740173, "learning_rate": 9.551785961371855e-05, "loss": 1.7249, "step": 66611 }, { "epoch": 2.22, "grad_norm": 0.6591264009475708, "learning_rate": 9.551021173260538e-05, "loss": 1.6894, "step": 66612 }, { "epoch": 2.22, "grad_norm": 0.6551892757415771, "learning_rate": 9.550256409971249e-05, "loss": 1.6612, "step": 66613 }, { "epoch": 2.22, "grad_norm": 0.6360670924186707, "learning_rate": 9.549491671504933e-05, "loss": 1.7086, "step": 66614 }, { "epoch": 2.22, "grad_norm": 0.6530922055244446, "learning_rate": 9.548726957862531e-05, "loss": 1.6524, "step": 66615 }, { "epoch": 2.22, "grad_norm": 0.6475019454956055, "learning_rate": 9.547962269044963e-05, "loss": 1.7637, "step": 66616 }, { "epoch": 2.22, "grad_norm": 0.6504343152046204, "learning_rate": 9.547197605053144e-05, "loss": 1.68, "step": 66617 }, { "epoch": 2.22, "grad_norm": 0.6597347855567932, "learning_rate": 9.546432965888022e-05, "loss": 1.6626, "step": 66618 }, { "epoch": 2.22, "grad_norm": 0.6594337821006775, "learning_rate": 9.545668351550521e-05, "loss": 1.7858, "step": 66619 }, { "epoch": 2.22, "grad_norm": 0.6302176713943481, "learning_rate": 9.544903762041552e-05, "loss": 1.6009, "step": 66620 }, { "epoch": 2.22, "grad_norm": 0.6679631471633911, "learning_rate": 9.544139197362058e-05, "loss": 1.7292, "step": 66621 }, { "epoch": 2.22, "grad_norm": 0.6592594981193542, "learning_rate": 9.543374657512971e-05, "loss": 1.7184, "step": 66622 }, { "epoch": 2.22, "grad_norm": 0.6524413824081421, "learning_rate": 9.542610142495215e-05, "loss": 1.6878, "step": 66623 }, { "epoch": 2.22, "grad_norm": 0.6580374240875244, "learning_rate": 9.541845652309705e-05, "loss": 1.6836, "step": 66624 }, { "epoch": 2.22, "grad_norm": 0.6647682189941406, "learning_rate": 9.541081186957394e-05, "loss": 1.7769, "step": 66625 }, { "epoch": 2.22, "grad_norm": 0.658783495426178, "learning_rate": 9.540316746439192e-05, "loss": 1.763, "step": 66626 }, { "epoch": 2.22, "grad_norm": 0.6523791551589966, "learning_rate": 9.539552330756023e-05, "loss": 1.6684, "step": 66627 }, { "epoch": 2.22, "grad_norm": 0.6615627408027649, "learning_rate": 9.53878793990882e-05, "loss": 1.7737, "step": 66628 }, { "epoch": 2.22, "grad_norm": 0.6892818808555603, "learning_rate": 9.538023573898528e-05, "loss": 1.7722, "step": 66629 }, { "epoch": 2.22, "grad_norm": 0.6308024525642395, "learning_rate": 9.537259232726057e-05, "loss": 1.7434, "step": 66630 }, { "epoch": 2.22, "grad_norm": 0.6569337248802185, "learning_rate": 9.53649491639233e-05, "loss": 1.7638, "step": 66631 }, { "epoch": 2.22, "grad_norm": 0.6292827725410461, "learning_rate": 9.53573062489829e-05, "loss": 1.6318, "step": 66632 }, { "epoch": 2.22, "grad_norm": 0.6480131149291992, "learning_rate": 9.53496635824486e-05, "loss": 1.7746, "step": 66633 }, { "epoch": 2.22, "grad_norm": 0.6797618865966797, "learning_rate": 9.534202116432955e-05, "loss": 1.6343, "step": 66634 }, { "epoch": 2.22, "grad_norm": 0.6363646388053894, "learning_rate": 9.533437899463525e-05, "loss": 1.7117, "step": 66635 }, { "epoch": 2.22, "grad_norm": 0.6356507539749146, "learning_rate": 9.532673707337476e-05, "loss": 1.6869, "step": 66636 }, { "epoch": 2.22, "grad_norm": 0.6551089882850647, "learning_rate": 9.531909540055753e-05, "loss": 1.7164, "step": 66637 }, { "epoch": 2.22, "grad_norm": 0.6461840867996216, "learning_rate": 9.531145397619267e-05, "loss": 1.6731, "step": 66638 }, { "epoch": 2.22, "grad_norm": 0.6610152125358582, "learning_rate": 9.530381280028964e-05, "loss": 1.7464, "step": 66639 }, { "epoch": 2.22, "grad_norm": 0.6602885723114014, "learning_rate": 9.529617187285761e-05, "loss": 1.7113, "step": 66640 }, { "epoch": 2.22, "grad_norm": 0.6434887647628784, "learning_rate": 9.528853119390581e-05, "loss": 1.6673, "step": 66641 }, { "epoch": 2.22, "grad_norm": 0.6507732272148132, "learning_rate": 9.528089076344363e-05, "loss": 1.7157, "step": 66642 }, { "epoch": 2.22, "grad_norm": 0.6345840096473694, "learning_rate": 9.527325058148017e-05, "loss": 1.6437, "step": 66643 }, { "epoch": 2.22, "grad_norm": 0.6496020555496216, "learning_rate": 9.526561064802497e-05, "loss": 1.6527, "step": 66644 }, { "epoch": 2.22, "grad_norm": 0.6842479109764099, "learning_rate": 9.52579709630871e-05, "loss": 1.7874, "step": 66645 }, { "epoch": 2.22, "grad_norm": 0.6532102227210999, "learning_rate": 9.525033152667583e-05, "loss": 1.7286, "step": 66646 }, { "epoch": 2.22, "grad_norm": 0.6600731611251831, "learning_rate": 9.524269233880057e-05, "loss": 1.702, "step": 66647 }, { "epoch": 2.22, "grad_norm": 0.6351181268692017, "learning_rate": 9.523505339947041e-05, "loss": 1.6441, "step": 66648 }, { "epoch": 2.22, "grad_norm": 0.6302167773246765, "learning_rate": 9.522741470869482e-05, "loss": 1.6908, "step": 66649 }, { "epoch": 2.22, "grad_norm": 0.6494839787483215, "learning_rate": 9.521977626648288e-05, "loss": 1.6636, "step": 66650 }, { "epoch": 2.22, "grad_norm": 0.6422512531280518, "learning_rate": 9.521213807284407e-05, "loss": 1.7293, "step": 66651 }, { "epoch": 2.22, "grad_norm": 0.635796308517456, "learning_rate": 9.520450012778754e-05, "loss": 1.6995, "step": 66652 }, { "epoch": 2.22, "grad_norm": 0.652360200881958, "learning_rate": 9.51968624313225e-05, "loss": 1.7001, "step": 66653 }, { "epoch": 2.22, "grad_norm": 0.62885981798172, "learning_rate": 9.518922498345836e-05, "loss": 1.6803, "step": 66654 }, { "epoch": 2.22, "grad_norm": 0.6683905124664307, "learning_rate": 9.518158778420432e-05, "loss": 1.6772, "step": 66655 }, { "epoch": 2.22, "grad_norm": 0.6516064405441284, "learning_rate": 9.517395083356957e-05, "loss": 1.6611, "step": 66656 }, { "epoch": 2.22, "grad_norm": 0.63948655128479, "learning_rate": 9.516631413156344e-05, "loss": 1.7376, "step": 66657 }, { "epoch": 2.22, "grad_norm": 0.640755832195282, "learning_rate": 9.515867767819535e-05, "loss": 1.7282, "step": 66658 }, { "epoch": 2.22, "grad_norm": 0.6791035532951355, "learning_rate": 9.515104147347443e-05, "loss": 1.7187, "step": 66659 }, { "epoch": 2.22, "grad_norm": 0.6432515978813171, "learning_rate": 9.514340551740987e-05, "loss": 1.6892, "step": 66660 }, { "epoch": 2.22, "grad_norm": 0.6675350069999695, "learning_rate": 9.513576981001114e-05, "loss": 1.7067, "step": 66661 }, { "epoch": 2.22, "grad_norm": 0.6849474310874939, "learning_rate": 9.51281343512874e-05, "loss": 1.6942, "step": 66662 }, { "epoch": 2.22, "grad_norm": 0.6420687437057495, "learning_rate": 9.512049914124781e-05, "loss": 1.688, "step": 66663 }, { "epoch": 2.22, "grad_norm": 0.634558379650116, "learning_rate": 9.511286417990173e-05, "loss": 1.6458, "step": 66664 }, { "epoch": 2.22, "grad_norm": 0.6570700407028198, "learning_rate": 9.510522946725857e-05, "loss": 1.7292, "step": 66665 }, { "epoch": 2.22, "grad_norm": 0.6526119112968445, "learning_rate": 9.509759500332743e-05, "loss": 1.7756, "step": 66666 }, { "epoch": 2.22, "grad_norm": 0.6509016752243042, "learning_rate": 9.508996078811757e-05, "loss": 1.6627, "step": 66667 }, { "epoch": 2.22, "grad_norm": 0.6598028540611267, "learning_rate": 9.508232682163838e-05, "loss": 1.6617, "step": 66668 }, { "epoch": 2.22, "grad_norm": 0.6857271194458008, "learning_rate": 9.507469310389904e-05, "loss": 1.6951, "step": 66669 }, { "epoch": 2.22, "grad_norm": 0.6700880527496338, "learning_rate": 9.506705963490878e-05, "loss": 1.7373, "step": 66670 }, { "epoch": 2.22, "grad_norm": 0.6603504419326782, "learning_rate": 9.505942641467687e-05, "loss": 1.7227, "step": 66671 }, { "epoch": 2.22, "grad_norm": 0.6828653812408447, "learning_rate": 9.505179344321272e-05, "loss": 1.8331, "step": 66672 }, { "epoch": 2.22, "grad_norm": 0.6843070387840271, "learning_rate": 9.504416072052549e-05, "loss": 1.6698, "step": 66673 }, { "epoch": 2.22, "grad_norm": 0.6252308487892151, "learning_rate": 9.503652824662437e-05, "loss": 1.714, "step": 66674 }, { "epoch": 2.22, "grad_norm": 0.6242550611495972, "learning_rate": 9.502889602151882e-05, "loss": 1.595, "step": 66675 }, { "epoch": 2.22, "grad_norm": 0.6557124257087708, "learning_rate": 9.502126404521795e-05, "loss": 1.6084, "step": 66676 }, { "epoch": 2.22, "grad_norm": 0.6730135679244995, "learning_rate": 9.501363231773098e-05, "loss": 1.7129, "step": 66677 }, { "epoch": 2.22, "grad_norm": 0.6598637700080872, "learning_rate": 9.500600083906728e-05, "loss": 1.6767, "step": 66678 }, { "epoch": 2.22, "grad_norm": 0.6274039149284363, "learning_rate": 9.499836960923616e-05, "loss": 1.6868, "step": 66679 }, { "epoch": 2.22, "grad_norm": 0.6393670439720154, "learning_rate": 9.499073862824682e-05, "loss": 1.7323, "step": 66680 }, { "epoch": 2.22, "grad_norm": 0.6682111024856567, "learning_rate": 9.498310789610842e-05, "loss": 1.7203, "step": 66681 }, { "epoch": 2.22, "grad_norm": 0.6494622230529785, "learning_rate": 9.497547741283042e-05, "loss": 1.6592, "step": 66682 }, { "epoch": 2.22, "grad_norm": 0.7118916511535645, "learning_rate": 9.496784717842198e-05, "loss": 1.7434, "step": 66683 }, { "epoch": 2.22, "grad_norm": 0.6463485956192017, "learning_rate": 9.496021719289226e-05, "loss": 1.6709, "step": 66684 }, { "epoch": 2.22, "grad_norm": 0.6515669822692871, "learning_rate": 9.495258745625071e-05, "loss": 1.7063, "step": 66685 }, { "epoch": 2.22, "grad_norm": 0.6430738568305969, "learning_rate": 9.494495796850645e-05, "loss": 1.6597, "step": 66686 }, { "epoch": 2.22, "grad_norm": 0.6729795336723328, "learning_rate": 9.493732872966885e-05, "loss": 1.7275, "step": 66687 }, { "epoch": 2.22, "grad_norm": 0.6427829265594482, "learning_rate": 9.492969973974706e-05, "loss": 1.6967, "step": 66688 }, { "epoch": 2.22, "grad_norm": 0.6784283518791199, "learning_rate": 9.492207099875048e-05, "loss": 1.6677, "step": 66689 }, { "epoch": 2.22, "grad_norm": 0.6617840528488159, "learning_rate": 9.491444250668828e-05, "loss": 1.6472, "step": 66690 }, { "epoch": 2.22, "grad_norm": 0.6643577814102173, "learning_rate": 9.490681426356965e-05, "loss": 1.7118, "step": 66691 }, { "epoch": 2.22, "grad_norm": 0.6589558124542236, "learning_rate": 9.4899186269404e-05, "loss": 1.7446, "step": 66692 }, { "epoch": 2.22, "grad_norm": 0.6650592684745789, "learning_rate": 9.489155852420042e-05, "loss": 1.656, "step": 66693 }, { "epoch": 2.22, "grad_norm": 0.6458162069320679, "learning_rate": 9.48839310279684e-05, "loss": 1.7285, "step": 66694 }, { "epoch": 2.22, "grad_norm": 0.6512877345085144, "learning_rate": 9.487630378071706e-05, "loss": 1.7416, "step": 66695 }, { "epoch": 2.22, "grad_norm": 0.6626591086387634, "learning_rate": 9.486867678245553e-05, "loss": 1.725, "step": 66696 }, { "epoch": 2.22, "grad_norm": 0.6383534669876099, "learning_rate": 9.486105003319333e-05, "loss": 1.6628, "step": 66697 }, { "epoch": 2.22, "grad_norm": 0.6412660479545593, "learning_rate": 9.485342353293948e-05, "loss": 1.7291, "step": 66698 }, { "epoch": 2.22, "grad_norm": 0.6206352710723877, "learning_rate": 9.484579728170343e-05, "loss": 1.6638, "step": 66699 }, { "epoch": 2.22, "grad_norm": 0.6599981188774109, "learning_rate": 9.483817127949428e-05, "loss": 1.6527, "step": 66700 }, { "epoch": 2.22, "grad_norm": 0.6505518555641174, "learning_rate": 9.483054552632146e-05, "loss": 1.7241, "step": 66701 }, { "epoch": 2.22, "grad_norm": 0.6779561638832092, "learning_rate": 9.482292002219411e-05, "loss": 1.7293, "step": 66702 }, { "epoch": 2.22, "grad_norm": 0.6487877368927002, "learning_rate": 9.481529476712142e-05, "loss": 1.6259, "step": 66703 }, { "epoch": 2.22, "grad_norm": 0.6746079921722412, "learning_rate": 9.480766976111282e-05, "loss": 1.6638, "step": 66704 }, { "epoch": 2.22, "grad_norm": 0.6649724245071411, "learning_rate": 9.48000450041775e-05, "loss": 1.6662, "step": 66705 }, { "epoch": 2.22, "grad_norm": 0.661837637424469, "learning_rate": 9.479242049632455e-05, "loss": 1.6986, "step": 66706 }, { "epoch": 2.22, "grad_norm": 0.6517599821090698, "learning_rate": 9.478479623756338e-05, "loss": 1.7519, "step": 66707 }, { "epoch": 2.22, "grad_norm": 0.6292675733566284, "learning_rate": 9.477717222790333e-05, "loss": 1.7268, "step": 66708 }, { "epoch": 2.22, "grad_norm": 0.6492536664009094, "learning_rate": 9.476954846735358e-05, "loss": 1.7472, "step": 66709 }, { "epoch": 2.22, "grad_norm": 0.6367638111114502, "learning_rate": 9.476192495592324e-05, "loss": 1.6821, "step": 66710 }, { "epoch": 2.22, "grad_norm": 0.6707761883735657, "learning_rate": 9.475430169362175e-05, "loss": 1.713, "step": 66711 }, { "epoch": 2.22, "grad_norm": 0.6535598635673523, "learning_rate": 9.474667868045835e-05, "loss": 1.61, "step": 66712 }, { "epoch": 2.22, "grad_norm": 0.6664580702781677, "learning_rate": 9.473905591644212e-05, "loss": 1.696, "step": 66713 }, { "epoch": 2.22, "grad_norm": 0.6700422763824463, "learning_rate": 9.473143340158242e-05, "loss": 1.6635, "step": 66714 }, { "epoch": 2.22, "grad_norm": 0.6656559109687805, "learning_rate": 9.472381113588873e-05, "loss": 1.7268, "step": 66715 }, { "epoch": 2.22, "grad_norm": 0.6521692276000977, "learning_rate": 9.47161891193699e-05, "loss": 1.6612, "step": 66716 }, { "epoch": 2.22, "grad_norm": 0.643482506275177, "learning_rate": 9.470856735203532e-05, "loss": 1.7005, "step": 66717 }, { "epoch": 2.22, "grad_norm": 0.6507243514060974, "learning_rate": 9.470094583389443e-05, "loss": 1.699, "step": 66718 }, { "epoch": 2.22, "grad_norm": 0.6454470753669739, "learning_rate": 9.469332456495631e-05, "loss": 1.7172, "step": 66719 }, { "epoch": 2.22, "grad_norm": 0.6599940657615662, "learning_rate": 9.468570354523019e-05, "loss": 1.7564, "step": 66720 }, { "epoch": 2.22, "grad_norm": 0.6533907055854797, "learning_rate": 9.467808277472535e-05, "loss": 1.719, "step": 66721 }, { "epoch": 2.22, "grad_norm": 0.6310375332832336, "learning_rate": 9.467046225345127e-05, "loss": 1.6151, "step": 66722 }, { "epoch": 2.22, "grad_norm": 0.6692225337028503, "learning_rate": 9.46628419814168e-05, "loss": 1.7837, "step": 66723 }, { "epoch": 2.22, "grad_norm": 0.6317923069000244, "learning_rate": 9.465522195863137e-05, "loss": 1.6793, "step": 66724 }, { "epoch": 2.22, "grad_norm": 0.6360732316970825, "learning_rate": 9.464760218510436e-05, "loss": 1.7101, "step": 66725 }, { "epoch": 2.22, "grad_norm": 0.6473205089569092, "learning_rate": 9.463998266084488e-05, "loss": 1.6174, "step": 66726 }, { "epoch": 2.22, "grad_norm": 0.682779848575592, "learning_rate": 9.46323633858621e-05, "loss": 1.7697, "step": 66727 }, { "epoch": 2.22, "grad_norm": 0.6361193656921387, "learning_rate": 9.462474436016539e-05, "loss": 1.7009, "step": 66728 }, { "epoch": 2.22, "grad_norm": 0.6424698829650879, "learning_rate": 9.461712558376419e-05, "loss": 1.7061, "step": 66729 }, { "epoch": 2.22, "grad_norm": 0.6887864470481873, "learning_rate": 9.460950705666731e-05, "loss": 1.7167, "step": 66730 }, { "epoch": 2.22, "grad_norm": 0.6543805003166199, "learning_rate": 9.460188877888425e-05, "loss": 1.6752, "step": 66731 }, { "epoch": 2.22, "grad_norm": 0.6509044766426086, "learning_rate": 9.459427075042434e-05, "loss": 1.6406, "step": 66732 }, { "epoch": 2.22, "grad_norm": 0.6982157826423645, "learning_rate": 9.458665297129671e-05, "loss": 1.7314, "step": 66733 }, { "epoch": 2.22, "grad_norm": 0.6818252801895142, "learning_rate": 9.457903544151049e-05, "loss": 1.7178, "step": 66734 }, { "epoch": 2.22, "grad_norm": 0.6362784504890442, "learning_rate": 9.457141816107517e-05, "loss": 1.67, "step": 66735 }, { "epoch": 2.22, "grad_norm": 0.6379858255386353, "learning_rate": 9.45638011299999e-05, "loss": 1.7349, "step": 66736 }, { "epoch": 2.22, "grad_norm": 0.6534304618835449, "learning_rate": 9.455618434829377e-05, "loss": 1.6498, "step": 66737 }, { "epoch": 2.22, "grad_norm": 0.6478326916694641, "learning_rate": 9.45485678159662e-05, "loss": 1.6413, "step": 66738 }, { "epoch": 2.22, "grad_norm": 0.6476686596870422, "learning_rate": 9.454095153302647e-05, "loss": 1.6123, "step": 66739 }, { "epoch": 2.22, "grad_norm": 0.6772103309631348, "learning_rate": 9.453333549948378e-05, "loss": 1.7083, "step": 66740 }, { "epoch": 2.22, "grad_norm": 0.6717978715896606, "learning_rate": 9.452571971534722e-05, "loss": 1.6909, "step": 66741 }, { "epoch": 2.22, "grad_norm": 0.6627500653266907, "learning_rate": 9.451810418062625e-05, "loss": 1.6457, "step": 66742 }, { "epoch": 2.22, "grad_norm": 0.647858738899231, "learning_rate": 9.451048889533006e-05, "loss": 1.718, "step": 66743 }, { "epoch": 2.22, "grad_norm": 0.6410207152366638, "learning_rate": 9.450287385946775e-05, "loss": 1.6855, "step": 66744 }, { "epoch": 2.22, "grad_norm": 0.6428818106651306, "learning_rate": 9.449525907304876e-05, "loss": 1.6813, "step": 66745 }, { "epoch": 2.22, "grad_norm": 0.6739776134490967, "learning_rate": 9.448764453608216e-05, "loss": 1.7883, "step": 66746 }, { "epoch": 2.22, "grad_norm": 0.6517673134803772, "learning_rate": 9.448003024857736e-05, "loss": 1.7322, "step": 66747 }, { "epoch": 2.22, "grad_norm": 0.6567949056625366, "learning_rate": 9.447241621054342e-05, "loss": 1.7068, "step": 66748 }, { "epoch": 2.22, "grad_norm": 0.672426700592041, "learning_rate": 9.44648024219898e-05, "loss": 1.732, "step": 66749 }, { "epoch": 2.22, "grad_norm": 0.6355026364326477, "learning_rate": 9.445718888292562e-05, "loss": 1.7356, "step": 66750 }, { "epoch": 2.22, "grad_norm": 0.6840841770172119, "learning_rate": 9.444957559336005e-05, "loss": 1.6506, "step": 66751 }, { "epoch": 2.22, "grad_norm": 0.6726331114768982, "learning_rate": 9.444196255330248e-05, "loss": 1.6957, "step": 66752 }, { "epoch": 2.22, "grad_norm": 0.6456204652786255, "learning_rate": 9.443434976276196e-05, "loss": 1.6566, "step": 66753 }, { "epoch": 2.22, "grad_norm": 0.6756398677825928, "learning_rate": 9.442673722174801e-05, "loss": 1.7026, "step": 66754 }, { "epoch": 2.22, "grad_norm": 0.6810798048973083, "learning_rate": 9.441912493026957e-05, "loss": 1.7331, "step": 66755 }, { "epoch": 2.22, "grad_norm": 0.6564079523086548, "learning_rate": 9.441151288833613e-05, "loss": 1.7733, "step": 66756 }, { "epoch": 2.22, "grad_norm": 0.6738218069076538, "learning_rate": 9.440390109595683e-05, "loss": 1.7003, "step": 66757 }, { "epoch": 2.22, "grad_norm": 0.6272814273834229, "learning_rate": 9.439628955314079e-05, "loss": 1.6954, "step": 66758 }, { "epoch": 2.22, "grad_norm": 0.6757652163505554, "learning_rate": 9.438867825989746e-05, "loss": 1.757, "step": 66759 }, { "epoch": 2.22, "grad_norm": 0.6500956416130066, "learning_rate": 9.438106721623588e-05, "loss": 1.7873, "step": 66760 }, { "epoch": 2.22, "grad_norm": 0.6567636728286743, "learning_rate": 9.437345642216547e-05, "loss": 1.7018, "step": 66761 }, { "epoch": 2.22, "grad_norm": 0.6783140897750854, "learning_rate": 9.436584587769541e-05, "loss": 1.6868, "step": 66762 }, { "epoch": 2.22, "grad_norm": 0.6509516835212708, "learning_rate": 9.435823558283482e-05, "loss": 1.7046, "step": 66763 }, { "epoch": 2.22, "grad_norm": 0.6638467907905579, "learning_rate": 9.435062553759312e-05, "loss": 1.7645, "step": 66764 }, { "epoch": 2.22, "grad_norm": 0.6863393187522888, "learning_rate": 9.434301574197936e-05, "loss": 1.6321, "step": 66765 }, { "epoch": 2.22, "grad_norm": 0.662970781326294, "learning_rate": 9.433540619600299e-05, "loss": 1.659, "step": 66766 }, { "epoch": 2.22, "grad_norm": 0.6431214809417725, "learning_rate": 9.432779689967302e-05, "loss": 1.6416, "step": 66767 }, { "epoch": 2.22, "grad_norm": 0.6733641028404236, "learning_rate": 9.432018785299888e-05, "loss": 1.6995, "step": 66768 }, { "epoch": 2.22, "grad_norm": 0.6526457071304321, "learning_rate": 9.431257905598976e-05, "loss": 1.6291, "step": 66769 }, { "epoch": 2.22, "grad_norm": 0.6665232181549072, "learning_rate": 9.430497050865476e-05, "loss": 1.6756, "step": 66770 }, { "epoch": 2.22, "grad_norm": 0.6620458364486694, "learning_rate": 9.42973622110033e-05, "loss": 1.7113, "step": 66771 }, { "epoch": 2.22, "grad_norm": 0.6855408549308777, "learning_rate": 9.428975416304455e-05, "loss": 1.6603, "step": 66772 }, { "epoch": 2.22, "grad_norm": 0.647392749786377, "learning_rate": 9.42821463647876e-05, "loss": 1.7134, "step": 66773 }, { "epoch": 2.22, "grad_norm": 0.6654552817344666, "learning_rate": 9.427453881624185e-05, "loss": 1.6947, "step": 66774 }, { "epoch": 2.22, "grad_norm": 0.6607987284660339, "learning_rate": 9.426693151741658e-05, "loss": 1.6698, "step": 66775 }, { "epoch": 2.22, "grad_norm": 1.197353482246399, "learning_rate": 9.425932446832094e-05, "loss": 1.7112, "step": 66776 }, { "epoch": 2.22, "grad_norm": 0.6687313318252563, "learning_rate": 9.425171766896407e-05, "loss": 1.7433, "step": 66777 }, { "epoch": 2.22, "grad_norm": 0.6800441145896912, "learning_rate": 9.424411111935542e-05, "loss": 1.6171, "step": 66778 }, { "epoch": 2.22, "grad_norm": 0.6896860599517822, "learning_rate": 9.423650481950408e-05, "loss": 1.733, "step": 66779 }, { "epoch": 2.22, "grad_norm": 0.6493080258369446, "learning_rate": 9.422889876941918e-05, "loss": 1.6969, "step": 66780 }, { "epoch": 2.22, "grad_norm": 0.6555883884429932, "learning_rate": 9.422129296911012e-05, "loss": 1.6744, "step": 66781 }, { "epoch": 2.22, "grad_norm": 0.6494835615158081, "learning_rate": 9.421368741858618e-05, "loss": 1.7416, "step": 66782 }, { "epoch": 2.22, "grad_norm": 1.0986143350601196, "learning_rate": 9.42060821178565e-05, "loss": 1.7071, "step": 66783 }, { "epoch": 2.22, "grad_norm": 0.680479884147644, "learning_rate": 9.419847706693019e-05, "loss": 1.6912, "step": 66784 }, { "epoch": 2.22, "grad_norm": 0.6379312872886658, "learning_rate": 9.419087226581672e-05, "loss": 1.6625, "step": 66785 }, { "epoch": 2.22, "grad_norm": 0.6438450813293457, "learning_rate": 9.41832677145252e-05, "loss": 1.759, "step": 66786 }, { "epoch": 2.22, "grad_norm": 0.6569888591766357, "learning_rate": 9.41756634130648e-05, "loss": 1.6908, "step": 66787 }, { "epoch": 2.22, "grad_norm": 0.674944281578064, "learning_rate": 9.416805936144479e-05, "loss": 1.7775, "step": 66788 }, { "epoch": 2.22, "grad_norm": 0.6602540016174316, "learning_rate": 9.416045555967456e-05, "loss": 1.7449, "step": 66789 }, { "epoch": 2.22, "grad_norm": 0.6336407661437988, "learning_rate": 9.415285200776317e-05, "loss": 1.6145, "step": 66790 }, { "epoch": 2.22, "grad_norm": 0.6642689108848572, "learning_rate": 9.41452487057198e-05, "loss": 1.7189, "step": 66791 }, { "epoch": 2.22, "grad_norm": 0.6532904505729675, "learning_rate": 9.413764565355388e-05, "loss": 1.686, "step": 66792 }, { "epoch": 2.22, "grad_norm": 0.6630693674087524, "learning_rate": 9.413004285127451e-05, "loss": 1.7145, "step": 66793 }, { "epoch": 2.22, "grad_norm": 0.6604706645011902, "learning_rate": 9.412244029889084e-05, "loss": 1.6592, "step": 66794 }, { "epoch": 2.22, "grad_norm": 0.7195526957511902, "learning_rate": 9.411483799641217e-05, "loss": 1.7312, "step": 66795 }, { "epoch": 2.22, "grad_norm": 0.6913943290710449, "learning_rate": 9.410723594384789e-05, "loss": 1.6631, "step": 66796 }, { "epoch": 2.22, "grad_norm": 0.6335946321487427, "learning_rate": 9.409963414120706e-05, "loss": 1.6669, "step": 66797 }, { "epoch": 2.22, "grad_norm": 0.6985100507736206, "learning_rate": 9.409203258849887e-05, "loss": 1.8001, "step": 66798 }, { "epoch": 2.22, "grad_norm": 0.6758937239646912, "learning_rate": 9.408443128573267e-05, "loss": 1.6931, "step": 66799 }, { "epoch": 2.22, "grad_norm": 0.6421881914138794, "learning_rate": 9.407683023291764e-05, "loss": 1.6632, "step": 66800 }, { "epoch": 2.22, "grad_norm": 0.6510065197944641, "learning_rate": 9.406922943006293e-05, "loss": 1.6844, "step": 66801 }, { "epoch": 2.22, "grad_norm": 0.6360290050506592, "learning_rate": 9.406162887717789e-05, "loss": 1.715, "step": 66802 }, { "epoch": 2.22, "grad_norm": 0.662794291973114, "learning_rate": 9.405402857427163e-05, "loss": 1.6622, "step": 66803 }, { "epoch": 2.22, "grad_norm": 0.6249379515647888, "learning_rate": 9.40464285213535e-05, "loss": 1.5923, "step": 66804 }, { "epoch": 2.22, "grad_norm": 0.6465754508972168, "learning_rate": 9.403882871843259e-05, "loss": 1.7204, "step": 66805 }, { "epoch": 2.22, "grad_norm": 0.6368574500083923, "learning_rate": 9.403122916551826e-05, "loss": 1.67, "step": 66806 }, { "epoch": 2.22, "grad_norm": 0.6623319387435913, "learning_rate": 9.402362986261971e-05, "loss": 1.7259, "step": 66807 }, { "epoch": 2.22, "grad_norm": 0.6508044004440308, "learning_rate": 9.401603080974599e-05, "loss": 1.6704, "step": 66808 }, { "epoch": 2.22, "grad_norm": 0.6796478033065796, "learning_rate": 9.400843200690656e-05, "loss": 1.6864, "step": 66809 }, { "epoch": 2.22, "grad_norm": 0.6804772019386292, "learning_rate": 9.400083345411046e-05, "loss": 1.8236, "step": 66810 }, { "epoch": 2.22, "grad_norm": 0.6463830471038818, "learning_rate": 9.39932351513671e-05, "loss": 1.7021, "step": 66811 }, { "epoch": 2.22, "grad_norm": 0.6510531306266785, "learning_rate": 9.398563709868558e-05, "loss": 1.7185, "step": 66812 }, { "epoch": 2.22, "grad_norm": 0.6535899043083191, "learning_rate": 9.397803929607505e-05, "loss": 1.6558, "step": 66813 }, { "epoch": 2.22, "grad_norm": 0.6550969481468201, "learning_rate": 9.397044174354491e-05, "loss": 1.684, "step": 66814 }, { "epoch": 2.22, "grad_norm": 0.6344460844993591, "learning_rate": 9.396284444110422e-05, "loss": 1.6049, "step": 66815 }, { "epoch": 2.22, "grad_norm": 0.6544799208641052, "learning_rate": 9.395524738876238e-05, "loss": 1.7026, "step": 66816 }, { "epoch": 2.22, "grad_norm": 0.6689544320106506, "learning_rate": 9.39476505865284e-05, "loss": 1.6635, "step": 66817 }, { "epoch": 2.22, "grad_norm": 0.6397832632064819, "learning_rate": 9.394005403441171e-05, "loss": 1.7227, "step": 66818 }, { "epoch": 2.22, "grad_norm": 0.6614128947257996, "learning_rate": 9.393245773242142e-05, "loss": 1.6931, "step": 66819 }, { "epoch": 2.22, "grad_norm": 0.6466425061225891, "learning_rate": 9.392486168056669e-05, "loss": 1.7254, "step": 66820 }, { "epoch": 2.22, "grad_norm": 0.6442713737487793, "learning_rate": 9.391726587885688e-05, "loss": 1.7356, "step": 66821 }, { "epoch": 2.22, "grad_norm": 0.6558425426483154, "learning_rate": 9.390967032730117e-05, "loss": 1.6642, "step": 66822 }, { "epoch": 2.22, "grad_norm": 0.6544618606567383, "learning_rate": 9.390207502590867e-05, "loss": 1.7054, "step": 66823 }, { "epoch": 2.22, "grad_norm": 0.6458001732826233, "learning_rate": 9.389447997468866e-05, "loss": 1.6954, "step": 66824 }, { "epoch": 2.22, "grad_norm": 0.6732293963432312, "learning_rate": 9.38868851736505e-05, "loss": 1.6773, "step": 66825 }, { "epoch": 2.22, "grad_norm": 0.629206657409668, "learning_rate": 9.387929062280326e-05, "loss": 1.6804, "step": 66826 }, { "epoch": 2.22, "grad_norm": 0.6603828072547913, "learning_rate": 9.387169632215611e-05, "loss": 1.7492, "step": 66827 }, { "epoch": 2.22, "grad_norm": 0.6304147839546204, "learning_rate": 9.386410227171847e-05, "loss": 1.649, "step": 66828 }, { "epoch": 2.22, "grad_norm": 0.6927210688591003, "learning_rate": 9.385650847149939e-05, "loss": 1.6693, "step": 66829 }, { "epoch": 2.22, "grad_norm": 0.6297959685325623, "learning_rate": 9.384891492150809e-05, "loss": 1.7255, "step": 66830 }, { "epoch": 2.22, "grad_norm": 0.662189245223999, "learning_rate": 9.384132162175379e-05, "loss": 1.7127, "step": 66831 }, { "epoch": 2.22, "grad_norm": 0.6641258597373962, "learning_rate": 9.383372857224597e-05, "loss": 1.7819, "step": 66832 }, { "epoch": 2.22, "grad_norm": 0.680632472038269, "learning_rate": 9.38261357729934e-05, "loss": 1.7929, "step": 66833 }, { "epoch": 2.22, "grad_norm": 0.6656672954559326, "learning_rate": 9.381854322400555e-05, "loss": 1.7157, "step": 66834 }, { "epoch": 2.22, "grad_norm": 0.6514290571212769, "learning_rate": 9.38109509252917e-05, "loss": 1.6634, "step": 66835 }, { "epoch": 2.22, "grad_norm": 0.6730652451515198, "learning_rate": 9.380335887686096e-05, "loss": 1.7673, "step": 66836 }, { "epoch": 2.22, "grad_norm": 0.6533994674682617, "learning_rate": 9.37957670787225e-05, "loss": 1.6511, "step": 66837 }, { "epoch": 2.22, "grad_norm": 0.6708981990814209, "learning_rate": 9.378817553088557e-05, "loss": 1.6937, "step": 66838 }, { "epoch": 2.22, "grad_norm": 0.6341673731803894, "learning_rate": 9.378058423335961e-05, "loss": 1.736, "step": 66839 }, { "epoch": 2.22, "grad_norm": 0.6428684592247009, "learning_rate": 9.377299318615346e-05, "loss": 1.6163, "step": 66840 }, { "epoch": 2.22, "grad_norm": 0.667907178401947, "learning_rate": 9.376540238927649e-05, "loss": 1.7793, "step": 66841 }, { "epoch": 2.22, "grad_norm": 0.6734390258789062, "learning_rate": 9.3757811842738e-05, "loss": 1.7203, "step": 66842 }, { "epoch": 2.22, "grad_norm": 0.6419651508331299, "learning_rate": 9.37502215465472e-05, "loss": 1.6907, "step": 66843 }, { "epoch": 2.22, "grad_norm": 0.6534835696220398, "learning_rate": 9.374263150071309e-05, "loss": 1.7656, "step": 66844 }, { "epoch": 2.22, "grad_norm": 0.6201545000076294, "learning_rate": 9.373504170524509e-05, "loss": 1.6127, "step": 66845 }, { "epoch": 2.22, "grad_norm": 0.6461543440818787, "learning_rate": 9.372745216015249e-05, "loss": 1.6801, "step": 66846 }, { "epoch": 2.22, "grad_norm": 0.6315028667449951, "learning_rate": 9.371986286544422e-05, "loss": 1.6702, "step": 66847 }, { "epoch": 2.22, "grad_norm": 0.6579766869544983, "learning_rate": 9.371227382112964e-05, "loss": 1.6751, "step": 66848 }, { "epoch": 2.22, "grad_norm": 0.6287710666656494, "learning_rate": 9.370468502721804e-05, "loss": 1.6974, "step": 66849 }, { "epoch": 2.22, "grad_norm": 0.6588811874389648, "learning_rate": 9.369709648371859e-05, "loss": 1.6833, "step": 66850 }, { "epoch": 2.22, "grad_norm": 0.6517373919487, "learning_rate": 9.368950819064033e-05, "loss": 1.681, "step": 66851 }, { "epoch": 2.22, "grad_norm": 0.6598880887031555, "learning_rate": 9.368192014799274e-05, "loss": 1.7702, "step": 66852 }, { "epoch": 2.22, "grad_norm": 0.6557010412216187, "learning_rate": 9.367433235578489e-05, "loss": 1.7264, "step": 66853 }, { "epoch": 2.22, "grad_norm": 0.6492440104484558, "learning_rate": 9.366674481402589e-05, "loss": 1.687, "step": 66854 }, { "epoch": 2.22, "grad_norm": 0.6305115818977356, "learning_rate": 9.365915752272505e-05, "loss": 1.6617, "step": 66855 }, { "epoch": 2.22, "grad_norm": 0.669940173625946, "learning_rate": 9.365157048189171e-05, "loss": 1.7675, "step": 66856 }, { "epoch": 2.22, "grad_norm": 0.6503625512123108, "learning_rate": 9.364398369153495e-05, "loss": 1.6696, "step": 66857 }, { "epoch": 2.22, "grad_norm": 0.6383498311042786, "learning_rate": 9.36363971516639e-05, "loss": 1.6364, "step": 66858 }, { "epoch": 2.22, "grad_norm": 0.6830548644065857, "learning_rate": 9.362881086228794e-05, "loss": 1.7469, "step": 66859 }, { "epoch": 2.22, "grad_norm": 0.670859694480896, "learning_rate": 9.362122482341624e-05, "loss": 1.6562, "step": 66860 }, { "epoch": 2.22, "grad_norm": 0.6475529074668884, "learning_rate": 9.361363903505783e-05, "loss": 1.682, "step": 66861 }, { "epoch": 2.22, "grad_norm": 0.6434668898582458, "learning_rate": 9.360605349722214e-05, "loss": 1.704, "step": 66862 }, { "epoch": 2.22, "grad_norm": 0.6695492267608643, "learning_rate": 9.35984682099182e-05, "loss": 1.6592, "step": 66863 }, { "epoch": 2.22, "grad_norm": 0.6411577463150024, "learning_rate": 9.359088317315542e-05, "loss": 1.746, "step": 66864 }, { "epoch": 2.22, "grad_norm": 0.6565945744514465, "learning_rate": 9.358329838694279e-05, "loss": 1.6513, "step": 66865 }, { "epoch": 2.22, "grad_norm": 0.6474418044090271, "learning_rate": 9.357571385128972e-05, "loss": 1.6389, "step": 66866 }, { "epoch": 2.22, "grad_norm": 0.6693803668022156, "learning_rate": 9.35681295662053e-05, "loss": 1.6445, "step": 66867 }, { "epoch": 2.22, "grad_norm": 0.6625841856002808, "learning_rate": 9.356054553169869e-05, "loss": 1.7031, "step": 66868 }, { "epoch": 2.22, "grad_norm": 0.6568336486816406, "learning_rate": 9.355296174777924e-05, "loss": 1.6721, "step": 66869 }, { "epoch": 2.22, "grad_norm": 0.6795457005500793, "learning_rate": 9.354537821445597e-05, "loss": 1.6847, "step": 66870 }, { "epoch": 2.22, "grad_norm": 0.6806455254554749, "learning_rate": 9.35377949317383e-05, "loss": 1.7649, "step": 66871 }, { "epoch": 2.22, "grad_norm": 0.6640210151672363, "learning_rate": 9.353021189963522e-05, "loss": 1.7014, "step": 66872 }, { "epoch": 2.22, "grad_norm": 0.6930014491081238, "learning_rate": 9.352262911815616e-05, "loss": 1.7424, "step": 66873 }, { "epoch": 2.22, "grad_norm": 0.6640735268592834, "learning_rate": 9.35150465873102e-05, "loss": 1.6778, "step": 66874 }, { "epoch": 2.22, "grad_norm": 0.6611359715461731, "learning_rate": 9.350746430710643e-05, "loss": 1.6801, "step": 66875 }, { "epoch": 2.22, "grad_norm": 0.65569669008255, "learning_rate": 9.349988227755426e-05, "loss": 1.7583, "step": 66876 }, { "epoch": 2.23, "grad_norm": 0.6670072078704834, "learning_rate": 9.349230049866276e-05, "loss": 1.6398, "step": 66877 }, { "epoch": 2.23, "grad_norm": 0.6782692074775696, "learning_rate": 9.348471897044124e-05, "loss": 1.6688, "step": 66878 }, { "epoch": 2.23, "grad_norm": 0.642950713634491, "learning_rate": 9.347713769289886e-05, "loss": 1.7516, "step": 66879 }, { "epoch": 2.23, "grad_norm": 0.655742883682251, "learning_rate": 9.346955666604468e-05, "loss": 1.7348, "step": 66880 }, { "epoch": 2.23, "grad_norm": 0.704684317111969, "learning_rate": 9.346197588988816e-05, "loss": 1.6185, "step": 66881 }, { "epoch": 2.23, "grad_norm": 0.6527701616287231, "learning_rate": 9.345439536443824e-05, "loss": 1.7538, "step": 66882 }, { "epoch": 2.23, "grad_norm": 0.6782297492027283, "learning_rate": 9.344681508970436e-05, "loss": 1.7961, "step": 66883 }, { "epoch": 2.23, "grad_norm": 0.6820260882377625, "learning_rate": 9.343923506569553e-05, "loss": 1.6704, "step": 66884 }, { "epoch": 2.23, "grad_norm": 0.6556706428527832, "learning_rate": 9.343165529242112e-05, "loss": 1.7183, "step": 66885 }, { "epoch": 2.23, "grad_norm": 0.6692537665367126, "learning_rate": 9.342407576989025e-05, "loss": 1.6888, "step": 66886 }, { "epoch": 2.23, "grad_norm": 0.6840450763702393, "learning_rate": 9.3416496498112e-05, "loss": 1.7497, "step": 66887 }, { "epoch": 2.23, "grad_norm": 0.6749669313430786, "learning_rate": 9.340891747709581e-05, "loss": 1.6631, "step": 66888 }, { "epoch": 2.23, "grad_norm": 0.6820290088653564, "learning_rate": 9.340133870685074e-05, "loss": 1.6956, "step": 66889 }, { "epoch": 2.23, "grad_norm": 0.6651394367218018, "learning_rate": 9.339376018738589e-05, "loss": 1.7816, "step": 66890 }, { "epoch": 2.23, "grad_norm": 0.6411508917808533, "learning_rate": 9.33861819187106e-05, "loss": 1.6844, "step": 66891 }, { "epoch": 2.23, "grad_norm": 0.6553289294242859, "learning_rate": 9.337860390083412e-05, "loss": 1.655, "step": 66892 }, { "epoch": 2.23, "grad_norm": 0.650165855884552, "learning_rate": 9.337102613376556e-05, "loss": 1.6982, "step": 66893 }, { "epoch": 2.23, "grad_norm": 0.673501193523407, "learning_rate": 9.336344861751406e-05, "loss": 1.6865, "step": 66894 }, { "epoch": 2.23, "grad_norm": 0.6462398767471313, "learning_rate": 9.335587135208898e-05, "loss": 1.6395, "step": 66895 }, { "epoch": 2.23, "grad_norm": 0.6360803246498108, "learning_rate": 9.33482943374994e-05, "loss": 1.6282, "step": 66896 }, { "epoch": 2.23, "grad_norm": 0.6566627025604248, "learning_rate": 9.334071757375448e-05, "loss": 1.6618, "step": 66897 }, { "epoch": 2.23, "grad_norm": 0.6582985520362854, "learning_rate": 9.333314106086345e-05, "loss": 1.6696, "step": 66898 }, { "epoch": 2.23, "grad_norm": 0.6477686762809753, "learning_rate": 9.332556479883564e-05, "loss": 1.6355, "step": 66899 }, { "epoch": 2.23, "grad_norm": 0.662879228591919, "learning_rate": 9.331798878768015e-05, "loss": 1.6628, "step": 66900 }, { "epoch": 2.23, "grad_norm": 0.678327202796936, "learning_rate": 9.331041302740609e-05, "loss": 1.6166, "step": 66901 }, { "epoch": 2.23, "grad_norm": 0.6550473570823669, "learning_rate": 9.330283751802278e-05, "loss": 1.6446, "step": 66902 }, { "epoch": 2.23, "grad_norm": 0.6575614213943481, "learning_rate": 9.32952622595394e-05, "loss": 1.5845, "step": 66903 }, { "epoch": 2.23, "grad_norm": 0.6578876376152039, "learning_rate": 9.328768725196503e-05, "loss": 1.7104, "step": 66904 }, { "epoch": 2.23, "grad_norm": 0.6878015398979187, "learning_rate": 9.328011249530894e-05, "loss": 1.7614, "step": 66905 }, { "epoch": 2.23, "grad_norm": 0.6582425832748413, "learning_rate": 9.327253798958041e-05, "loss": 1.7433, "step": 66906 }, { "epoch": 2.23, "grad_norm": 0.6789857745170593, "learning_rate": 9.326496373478858e-05, "loss": 1.6965, "step": 66907 }, { "epoch": 2.23, "grad_norm": 0.6577920913696289, "learning_rate": 9.325738973094255e-05, "loss": 1.6948, "step": 66908 }, { "epoch": 2.23, "grad_norm": 0.6796480417251587, "learning_rate": 9.324981597805163e-05, "loss": 1.7291, "step": 66909 }, { "epoch": 2.23, "grad_norm": 0.6546857953071594, "learning_rate": 9.324224247612501e-05, "loss": 1.6929, "step": 66910 }, { "epoch": 2.23, "grad_norm": 0.6964234113693237, "learning_rate": 9.323466922517172e-05, "loss": 1.762, "step": 66911 }, { "epoch": 2.23, "grad_norm": 0.6499143242835999, "learning_rate": 9.32270962252011e-05, "loss": 1.7049, "step": 66912 }, { "epoch": 2.23, "grad_norm": 0.6883138418197632, "learning_rate": 9.321952347622241e-05, "loss": 1.7345, "step": 66913 }, { "epoch": 2.23, "grad_norm": 0.6823033690452576, "learning_rate": 9.321195097824475e-05, "loss": 1.6846, "step": 66914 }, { "epoch": 2.23, "grad_norm": 0.6835720539093018, "learning_rate": 9.320437873127721e-05, "loss": 1.6659, "step": 66915 }, { "epoch": 2.23, "grad_norm": 0.6591936349868774, "learning_rate": 9.31968067353292e-05, "loss": 1.7397, "step": 66916 }, { "epoch": 2.23, "grad_norm": 0.6830201148986816, "learning_rate": 9.318923499040977e-05, "loss": 1.7221, "step": 66917 }, { "epoch": 2.23, "grad_norm": 0.655907928943634, "learning_rate": 9.318166349652805e-05, "loss": 1.7508, "step": 66918 }, { "epoch": 2.23, "grad_norm": 0.6571567058563232, "learning_rate": 9.31740922536934e-05, "loss": 1.653, "step": 66919 }, { "epoch": 2.23, "grad_norm": 0.6592218279838562, "learning_rate": 9.316652126191487e-05, "loss": 1.7461, "step": 66920 }, { "epoch": 2.23, "grad_norm": 0.6895955204963684, "learning_rate": 9.315895052120176e-05, "loss": 1.5776, "step": 66921 }, { "epoch": 2.23, "grad_norm": 0.679172694683075, "learning_rate": 9.315138003156312e-05, "loss": 1.7913, "step": 66922 }, { "epoch": 2.23, "grad_norm": 0.6697997450828552, "learning_rate": 9.314380979300832e-05, "loss": 1.7201, "step": 66923 }, { "epoch": 2.23, "grad_norm": 0.6759961843490601, "learning_rate": 9.313623980554648e-05, "loss": 1.7046, "step": 66924 }, { "epoch": 2.23, "grad_norm": 0.6640107035636902, "learning_rate": 9.312867006918668e-05, "loss": 1.7295, "step": 66925 }, { "epoch": 2.23, "grad_norm": 0.6423943638801575, "learning_rate": 9.312110058393825e-05, "loss": 1.7146, "step": 66926 }, { "epoch": 2.23, "grad_norm": 0.6716606020927429, "learning_rate": 9.311353134981026e-05, "loss": 1.7037, "step": 66927 }, { "epoch": 2.23, "grad_norm": 0.6583346128463745, "learning_rate": 9.310596236681204e-05, "loss": 1.6919, "step": 66928 }, { "epoch": 2.23, "grad_norm": 0.6918336749076843, "learning_rate": 9.309839363495269e-05, "loss": 1.6966, "step": 66929 }, { "epoch": 2.23, "grad_norm": 0.6763047575950623, "learning_rate": 9.30908251542413e-05, "loss": 1.6994, "step": 66930 }, { "epoch": 2.23, "grad_norm": 0.6446914076805115, "learning_rate": 9.308325692468727e-05, "loss": 1.7521, "step": 66931 }, { "epoch": 2.23, "grad_norm": 0.6601552963256836, "learning_rate": 9.307568894629961e-05, "loss": 1.666, "step": 66932 }, { "epoch": 2.23, "grad_norm": 0.6906133890151978, "learning_rate": 9.306812121908762e-05, "loss": 1.7543, "step": 66933 }, { "epoch": 2.23, "grad_norm": 0.6530947685241699, "learning_rate": 9.306055374306038e-05, "loss": 1.7092, "step": 66934 }, { "epoch": 2.23, "grad_norm": 0.6805368661880493, "learning_rate": 9.305298651822722e-05, "loss": 1.6946, "step": 66935 }, { "epoch": 2.23, "grad_norm": 0.6623538136482239, "learning_rate": 9.304541954459724e-05, "loss": 1.7256, "step": 66936 }, { "epoch": 2.23, "grad_norm": 0.6638235449790955, "learning_rate": 9.303785282217956e-05, "loss": 1.803, "step": 66937 }, { "epoch": 2.23, "grad_norm": 0.6838203072547913, "learning_rate": 9.30302863509835e-05, "loss": 1.7003, "step": 66938 }, { "epoch": 2.23, "grad_norm": 0.6515955328941345, "learning_rate": 9.302272013101816e-05, "loss": 1.6396, "step": 66939 }, { "epoch": 2.23, "grad_norm": 0.6711707711219788, "learning_rate": 9.301515416229268e-05, "loss": 1.6449, "step": 66940 }, { "epoch": 2.23, "grad_norm": 0.6743425130844116, "learning_rate": 9.30075884448163e-05, "loss": 1.705, "step": 66941 }, { "epoch": 2.23, "grad_norm": 0.6355077624320984, "learning_rate": 9.30000229785983e-05, "loss": 1.6836, "step": 66942 }, { "epoch": 2.23, "grad_norm": 0.6534762978553772, "learning_rate": 9.299245776364774e-05, "loss": 1.6857, "step": 66943 }, { "epoch": 2.23, "grad_norm": 0.628767728805542, "learning_rate": 9.298489279997379e-05, "loss": 1.6533, "step": 66944 }, { "epoch": 2.23, "grad_norm": 0.6789646744728088, "learning_rate": 9.297732808758576e-05, "loss": 1.6598, "step": 66945 }, { "epoch": 2.23, "grad_norm": 0.6541240215301514, "learning_rate": 9.296976362649271e-05, "loss": 1.6961, "step": 66946 }, { "epoch": 2.23, "grad_norm": 0.6797055006027222, "learning_rate": 9.296219941670381e-05, "loss": 1.6507, "step": 66947 }, { "epoch": 2.23, "grad_norm": 0.653687596321106, "learning_rate": 9.295463545822828e-05, "loss": 1.7141, "step": 66948 }, { "epoch": 2.23, "grad_norm": 0.6605112552642822, "learning_rate": 9.29470717510755e-05, "loss": 1.6624, "step": 66949 }, { "epoch": 2.23, "grad_norm": 0.6519722938537598, "learning_rate": 9.293950829525429e-05, "loss": 1.7338, "step": 66950 }, { "epoch": 2.23, "grad_norm": 0.6670356392860413, "learning_rate": 9.2931945090774e-05, "loss": 1.6409, "step": 66951 }, { "epoch": 2.23, "grad_norm": 0.675359845161438, "learning_rate": 9.292438213764394e-05, "loss": 1.7004, "step": 66952 }, { "epoch": 2.23, "grad_norm": 0.6542338132858276, "learning_rate": 9.291681943587315e-05, "loss": 1.7031, "step": 66953 }, { "epoch": 2.23, "grad_norm": 0.6417067050933838, "learning_rate": 9.290925698547072e-05, "loss": 1.7021, "step": 66954 }, { "epoch": 2.23, "grad_norm": 0.665205180644989, "learning_rate": 9.290169478644593e-05, "loss": 1.7174, "step": 66955 }, { "epoch": 2.23, "grad_norm": 0.6680684685707092, "learning_rate": 9.28941328388082e-05, "loss": 1.6889, "step": 66956 }, { "epoch": 2.23, "grad_norm": 0.6308503150939941, "learning_rate": 9.288657114256627e-05, "loss": 1.6295, "step": 66957 }, { "epoch": 2.23, "grad_norm": 0.6601619720458984, "learning_rate": 9.28790096977295e-05, "loss": 1.756, "step": 66958 }, { "epoch": 2.23, "grad_norm": 0.6616829633712769, "learning_rate": 9.287144850430722e-05, "loss": 1.701, "step": 66959 }, { "epoch": 2.23, "grad_norm": 0.6574331521987915, "learning_rate": 9.286388756230846e-05, "loss": 1.7539, "step": 66960 }, { "epoch": 2.23, "grad_norm": 0.6516222357749939, "learning_rate": 9.285632687174234e-05, "loss": 1.6852, "step": 66961 }, { "epoch": 2.23, "grad_norm": 0.647836446762085, "learning_rate": 9.284876643261811e-05, "loss": 1.6347, "step": 66962 }, { "epoch": 2.23, "grad_norm": 0.6510177254676819, "learning_rate": 9.284120624494515e-05, "loss": 1.7103, "step": 66963 }, { "epoch": 2.23, "grad_norm": 0.6536359786987305, "learning_rate": 9.283364630873228e-05, "loss": 1.6594, "step": 66964 }, { "epoch": 2.23, "grad_norm": 0.6391551494598389, "learning_rate": 9.282608662398878e-05, "loss": 1.7624, "step": 66965 }, { "epoch": 2.23, "grad_norm": 0.6481332182884216, "learning_rate": 9.281852719072402e-05, "loss": 1.6844, "step": 66966 }, { "epoch": 2.23, "grad_norm": 0.6505879163742065, "learning_rate": 9.281096800894703e-05, "loss": 1.7646, "step": 66967 }, { "epoch": 2.23, "grad_norm": 0.6421589851379395, "learning_rate": 9.280340907866692e-05, "loss": 1.6773, "step": 66968 }, { "epoch": 2.23, "grad_norm": 0.6669626235961914, "learning_rate": 9.279585039989304e-05, "loss": 1.7118, "step": 66969 }, { "epoch": 2.23, "grad_norm": 0.6301852464675903, "learning_rate": 9.278829197263442e-05, "loss": 1.699, "step": 66970 }, { "epoch": 2.23, "grad_norm": 0.635095477104187, "learning_rate": 9.278073379690024e-05, "loss": 1.7369, "step": 66971 }, { "epoch": 2.23, "grad_norm": 0.6420301198959351, "learning_rate": 9.277317587269969e-05, "loss": 1.8112, "step": 66972 }, { "epoch": 2.23, "grad_norm": 0.637673020362854, "learning_rate": 9.276561820004206e-05, "loss": 1.6699, "step": 66973 }, { "epoch": 2.23, "grad_norm": 0.6765775680541992, "learning_rate": 9.275806077893644e-05, "loss": 1.7433, "step": 66974 }, { "epoch": 2.23, "grad_norm": 0.6524280905723572, "learning_rate": 9.275050360939192e-05, "loss": 1.7394, "step": 66975 }, { "epoch": 2.23, "grad_norm": 0.6664429306983948, "learning_rate": 9.274294669141785e-05, "loss": 1.7848, "step": 66976 }, { "epoch": 2.23, "grad_norm": 0.6667299866676331, "learning_rate": 9.273539002502328e-05, "loss": 1.6764, "step": 66977 }, { "epoch": 2.23, "grad_norm": 0.6547771692276001, "learning_rate": 9.272783361021731e-05, "loss": 1.7597, "step": 66978 }, { "epoch": 2.23, "grad_norm": 0.6680107712745667, "learning_rate": 9.272027744700934e-05, "loss": 1.5916, "step": 66979 }, { "epoch": 2.23, "grad_norm": 0.6682561635971069, "learning_rate": 9.271272153540827e-05, "loss": 1.7157, "step": 66980 }, { "epoch": 2.23, "grad_norm": 0.6620821952819824, "learning_rate": 9.270516587542353e-05, "loss": 1.7117, "step": 66981 }, { "epoch": 2.23, "grad_norm": 0.6480442881584167, "learning_rate": 9.269761046706409e-05, "loss": 1.6588, "step": 66982 }, { "epoch": 2.23, "grad_norm": 0.6310163140296936, "learning_rate": 9.269005531033928e-05, "loss": 1.7135, "step": 66983 }, { "epoch": 2.23, "grad_norm": 0.6580989956855774, "learning_rate": 9.268250040525822e-05, "loss": 1.6835, "step": 66984 }, { "epoch": 2.23, "grad_norm": 0.6644477248191833, "learning_rate": 9.267494575182996e-05, "loss": 1.7482, "step": 66985 }, { "epoch": 2.23, "grad_norm": 0.6578698754310608, "learning_rate": 9.266739135006389e-05, "loss": 1.6994, "step": 66986 }, { "epoch": 2.23, "grad_norm": 0.6626939177513123, "learning_rate": 9.265983719996889e-05, "loss": 1.5966, "step": 66987 }, { "epoch": 2.23, "grad_norm": 0.6539584994316101, "learning_rate": 9.265228330155444e-05, "loss": 1.8106, "step": 66988 }, { "epoch": 2.23, "grad_norm": 0.6430093050003052, "learning_rate": 9.264472965482956e-05, "loss": 1.6438, "step": 66989 }, { "epoch": 2.23, "grad_norm": 0.6458104848861694, "learning_rate": 9.263717625980334e-05, "loss": 1.7457, "step": 66990 }, { "epoch": 2.23, "grad_norm": 0.6496412754058838, "learning_rate": 9.262962311648512e-05, "loss": 1.7685, "step": 66991 }, { "epoch": 2.23, "grad_norm": 0.6481248736381531, "learning_rate": 9.26220702248839e-05, "loss": 1.7082, "step": 66992 }, { "epoch": 2.23, "grad_norm": 0.6515290141105652, "learning_rate": 9.2614517585009e-05, "loss": 1.674, "step": 66993 }, { "epoch": 2.23, "grad_norm": 0.64260333776474, "learning_rate": 9.260696519686945e-05, "loss": 1.7272, "step": 66994 }, { "epoch": 2.23, "grad_norm": 0.6529881358146667, "learning_rate": 9.259941306047458e-05, "loss": 1.6358, "step": 66995 }, { "epoch": 2.23, "grad_norm": 0.674608588218689, "learning_rate": 9.259186117583344e-05, "loss": 1.7023, "step": 66996 }, { "epoch": 2.23, "grad_norm": 0.649605393409729, "learning_rate": 9.258430954295514e-05, "loss": 1.7276, "step": 66997 }, { "epoch": 2.23, "grad_norm": 0.6596592664718628, "learning_rate": 9.257675816184903e-05, "loss": 1.6836, "step": 66998 }, { "epoch": 2.23, "grad_norm": 0.6799114942550659, "learning_rate": 9.256920703252405e-05, "loss": 1.7606, "step": 66999 }, { "epoch": 2.23, "grad_norm": 0.653437077999115, "learning_rate": 9.25616561549896e-05, "loss": 1.6956, "step": 67000 }, { "epoch": 2.23, "grad_norm": 0.6778172254562378, "learning_rate": 9.255410552925465e-05, "loss": 1.7546, "step": 67001 }, { "epoch": 2.23, "grad_norm": 0.641227662563324, "learning_rate": 9.254655515532855e-05, "loss": 1.6963, "step": 67002 }, { "epoch": 2.23, "grad_norm": 0.6643278002738953, "learning_rate": 9.253900503322033e-05, "loss": 1.7052, "step": 67003 }, { "epoch": 2.23, "grad_norm": 0.6795483827590942, "learning_rate": 9.253145516293914e-05, "loss": 1.7479, "step": 67004 }, { "epoch": 2.23, "grad_norm": 0.6407430171966553, "learning_rate": 9.252390554449424e-05, "loss": 1.6767, "step": 67005 }, { "epoch": 2.23, "grad_norm": 0.6472794413566589, "learning_rate": 9.251635617789477e-05, "loss": 1.6672, "step": 67006 }, { "epoch": 2.23, "grad_norm": 0.6564396619796753, "learning_rate": 9.250880706314979e-05, "loss": 1.6968, "step": 67007 }, { "epoch": 2.23, "grad_norm": 0.6710674166679382, "learning_rate": 9.250125820026854e-05, "loss": 1.6975, "step": 67008 }, { "epoch": 2.23, "grad_norm": 0.6753432154655457, "learning_rate": 9.24937095892603e-05, "loss": 1.7307, "step": 67009 }, { "epoch": 2.23, "grad_norm": 0.6522603631019592, "learning_rate": 9.248616123013411e-05, "loss": 1.6969, "step": 67010 }, { "epoch": 2.23, "grad_norm": 0.6465099453926086, "learning_rate": 9.247861312289904e-05, "loss": 1.7841, "step": 67011 }, { "epoch": 2.23, "grad_norm": 0.640342116355896, "learning_rate": 9.247106526756445e-05, "loss": 1.6471, "step": 67012 }, { "epoch": 2.23, "grad_norm": 0.681258499622345, "learning_rate": 9.24635176641394e-05, "loss": 1.6838, "step": 67013 }, { "epoch": 2.23, "grad_norm": 0.6366971731185913, "learning_rate": 9.245597031263301e-05, "loss": 1.6484, "step": 67014 }, { "epoch": 2.23, "grad_norm": 0.6442946195602417, "learning_rate": 9.244842321305445e-05, "loss": 1.7652, "step": 67015 }, { "epoch": 2.23, "grad_norm": 0.6764789819717407, "learning_rate": 9.244087636541305e-05, "loss": 1.6918, "step": 67016 }, { "epoch": 2.23, "grad_norm": 0.6392192244529724, "learning_rate": 9.243332976971779e-05, "loss": 1.6008, "step": 67017 }, { "epoch": 2.23, "grad_norm": 0.6619320511817932, "learning_rate": 9.242578342597783e-05, "loss": 1.8031, "step": 67018 }, { "epoch": 2.23, "grad_norm": 0.6502761840820312, "learning_rate": 9.241823733420248e-05, "loss": 1.7206, "step": 67019 }, { "epoch": 2.23, "grad_norm": 0.6534677743911743, "learning_rate": 9.241069149440078e-05, "loss": 1.6844, "step": 67020 }, { "epoch": 2.23, "grad_norm": 0.6863647699356079, "learning_rate": 9.240314590658185e-05, "loss": 1.7056, "step": 67021 }, { "epoch": 2.23, "grad_norm": 0.6765372157096863, "learning_rate": 9.23956005707549e-05, "loss": 1.6635, "step": 67022 }, { "epoch": 2.23, "grad_norm": 0.6317496299743652, "learning_rate": 9.238805548692918e-05, "loss": 1.6731, "step": 67023 }, { "epoch": 2.23, "grad_norm": 0.6454585790634155, "learning_rate": 9.238051065511381e-05, "loss": 1.7261, "step": 67024 }, { "epoch": 2.23, "grad_norm": 0.6578784584999084, "learning_rate": 9.237296607531777e-05, "loss": 1.7633, "step": 67025 }, { "epoch": 2.23, "grad_norm": 0.6589150428771973, "learning_rate": 9.23654217475505e-05, "loss": 1.6928, "step": 67026 }, { "epoch": 2.23, "grad_norm": 0.6648765802383423, "learning_rate": 9.235787767182097e-05, "loss": 1.7662, "step": 67027 }, { "epoch": 2.23, "grad_norm": 0.6462273597717285, "learning_rate": 9.235033384813828e-05, "loss": 1.6919, "step": 67028 }, { "epoch": 2.23, "grad_norm": 0.6382602453231812, "learning_rate": 9.234279027651173e-05, "loss": 1.6635, "step": 67029 }, { "epoch": 2.23, "grad_norm": 0.658106803894043, "learning_rate": 9.23352469569505e-05, "loss": 1.772, "step": 67030 }, { "epoch": 2.23, "grad_norm": 0.6522846221923828, "learning_rate": 9.232770388946371e-05, "loss": 1.7235, "step": 67031 }, { "epoch": 2.23, "grad_norm": 0.649810791015625, "learning_rate": 9.232016107406038e-05, "loss": 1.6677, "step": 67032 }, { "epoch": 2.23, "grad_norm": 0.6635028123855591, "learning_rate": 9.231261851074985e-05, "loss": 1.728, "step": 67033 }, { "epoch": 2.23, "grad_norm": 0.638484537601471, "learning_rate": 9.230507619954123e-05, "loss": 1.6313, "step": 67034 }, { "epoch": 2.23, "grad_norm": 0.6542633175849915, "learning_rate": 9.229753414044355e-05, "loss": 1.625, "step": 67035 }, { "epoch": 2.23, "grad_norm": 0.6525758504867554, "learning_rate": 9.228999233346611e-05, "loss": 1.6535, "step": 67036 }, { "epoch": 2.23, "grad_norm": 0.6373032331466675, "learning_rate": 9.228245077861797e-05, "loss": 1.687, "step": 67037 }, { "epoch": 2.23, "grad_norm": 0.6597750186920166, "learning_rate": 9.227490947590839e-05, "loss": 1.6198, "step": 67038 }, { "epoch": 2.23, "grad_norm": 0.6223646402359009, "learning_rate": 9.22673684253464e-05, "loss": 1.7327, "step": 67039 }, { "epoch": 2.23, "grad_norm": 0.6524644494056702, "learning_rate": 9.225982762694129e-05, "loss": 1.6907, "step": 67040 }, { "epoch": 2.23, "grad_norm": 0.6433753967285156, "learning_rate": 9.225228708070214e-05, "loss": 1.5938, "step": 67041 }, { "epoch": 2.23, "grad_norm": 0.6450948715209961, "learning_rate": 9.224474678663801e-05, "loss": 1.7289, "step": 67042 }, { "epoch": 2.23, "grad_norm": 0.7132592797279358, "learning_rate": 9.223720674475825e-05, "loss": 1.7355, "step": 67043 }, { "epoch": 2.23, "grad_norm": 0.6595054864883423, "learning_rate": 9.222966695507182e-05, "loss": 1.6954, "step": 67044 }, { "epoch": 2.23, "grad_norm": 0.669800877571106, "learning_rate": 9.222212741758805e-05, "loss": 1.6947, "step": 67045 }, { "epoch": 2.23, "grad_norm": 0.6722135543823242, "learning_rate": 9.221458813231601e-05, "loss": 1.6953, "step": 67046 }, { "epoch": 2.23, "grad_norm": 0.651668906211853, "learning_rate": 9.220704909926473e-05, "loss": 1.6303, "step": 67047 }, { "epoch": 2.23, "grad_norm": 0.6600198149681091, "learning_rate": 9.219951031844357e-05, "loss": 1.7599, "step": 67048 }, { "epoch": 2.23, "grad_norm": 0.6631591320037842, "learning_rate": 9.219197178986148e-05, "loss": 1.6745, "step": 67049 }, { "epoch": 2.23, "grad_norm": 0.6401826739311218, "learning_rate": 9.218443351352783e-05, "loss": 1.7263, "step": 67050 }, { "epoch": 2.23, "grad_norm": 0.6727221608161926, "learning_rate": 9.217689548945156e-05, "loss": 1.6865, "step": 67051 }, { "epoch": 2.23, "grad_norm": 0.6456313133239746, "learning_rate": 9.216935771764204e-05, "loss": 1.6978, "step": 67052 }, { "epoch": 2.23, "grad_norm": 0.6435641646385193, "learning_rate": 9.216182019810827e-05, "loss": 1.7329, "step": 67053 }, { "epoch": 2.23, "grad_norm": 0.6566575169563293, "learning_rate": 9.215428293085934e-05, "loss": 1.715, "step": 67054 }, { "epoch": 2.23, "grad_norm": 0.6325819492340088, "learning_rate": 9.214674591590455e-05, "loss": 1.6963, "step": 67055 }, { "epoch": 2.23, "grad_norm": 0.6390388607978821, "learning_rate": 9.213920915325302e-05, "loss": 1.6689, "step": 67056 }, { "epoch": 2.23, "grad_norm": 0.6917533278465271, "learning_rate": 9.213167264291374e-05, "loss": 1.6745, "step": 67057 }, { "epoch": 2.23, "grad_norm": 0.6540092825889587, "learning_rate": 9.212413638489601e-05, "loss": 1.6498, "step": 67058 }, { "epoch": 2.23, "grad_norm": 0.6530516147613525, "learning_rate": 9.211660037920906e-05, "loss": 1.699, "step": 67059 }, { "epoch": 2.23, "grad_norm": 0.6638699173927307, "learning_rate": 9.21090646258619e-05, "loss": 1.6662, "step": 67060 }, { "epoch": 2.23, "grad_norm": 0.6572045087814331, "learning_rate": 9.21015291248636e-05, "loss": 1.7393, "step": 67061 }, { "epoch": 2.23, "grad_norm": 0.6445173621177673, "learning_rate": 9.209399387622351e-05, "loss": 1.6484, "step": 67062 }, { "epoch": 2.23, "grad_norm": 0.653647780418396, "learning_rate": 9.208645887995069e-05, "loss": 1.706, "step": 67063 }, { "epoch": 2.23, "grad_norm": 0.6493120789527893, "learning_rate": 9.207892413605418e-05, "loss": 1.7579, "step": 67064 }, { "epoch": 2.23, "grad_norm": 0.6460145115852356, "learning_rate": 9.207138964454321e-05, "loss": 1.7508, "step": 67065 }, { "epoch": 2.23, "grad_norm": 0.6627793908119202, "learning_rate": 9.206385540542712e-05, "loss": 1.6831, "step": 67066 }, { "epoch": 2.23, "grad_norm": 0.6631844639778137, "learning_rate": 9.20563214187147e-05, "loss": 1.7076, "step": 67067 }, { "epoch": 2.23, "grad_norm": 0.6471582651138306, "learning_rate": 9.204878768441526e-05, "loss": 1.7453, "step": 67068 }, { "epoch": 2.23, "grad_norm": 0.6468716263771057, "learning_rate": 9.204125420253804e-05, "loss": 1.6273, "step": 67069 }, { "epoch": 2.23, "grad_norm": 0.6411563754081726, "learning_rate": 9.203372097309209e-05, "loss": 1.7018, "step": 67070 }, { "epoch": 2.23, "grad_norm": 0.6568232774734497, "learning_rate": 9.202618799608649e-05, "loss": 1.74, "step": 67071 }, { "epoch": 2.23, "grad_norm": 0.6405043601989746, "learning_rate": 9.201865527153044e-05, "loss": 1.7163, "step": 67072 }, { "epoch": 2.23, "grad_norm": 0.6566328406333923, "learning_rate": 9.201112279943328e-05, "loss": 1.7133, "step": 67073 }, { "epoch": 2.23, "grad_norm": 0.6689518094062805, "learning_rate": 9.20035905798038e-05, "loss": 1.6584, "step": 67074 }, { "epoch": 2.23, "grad_norm": 0.6498159170150757, "learning_rate": 9.199605861265132e-05, "loss": 1.7493, "step": 67075 }, { "epoch": 2.23, "grad_norm": 0.6434775590896606, "learning_rate": 9.198852689798506e-05, "loss": 1.7318, "step": 67076 }, { "epoch": 2.23, "grad_norm": 0.6747940182685852, "learning_rate": 9.198099543581407e-05, "loss": 1.6485, "step": 67077 }, { "epoch": 2.23, "grad_norm": 0.6802186965942383, "learning_rate": 9.19734642261474e-05, "loss": 1.7436, "step": 67078 }, { "epoch": 2.23, "grad_norm": 0.6455711126327515, "learning_rate": 9.19659332689943e-05, "loss": 1.6244, "step": 67079 }, { "epoch": 2.23, "grad_norm": 0.6722133159637451, "learning_rate": 9.195840256436412e-05, "loss": 1.7323, "step": 67080 }, { "epoch": 2.23, "grad_norm": 0.6466652154922485, "learning_rate": 9.195087211226555e-05, "loss": 1.717, "step": 67081 }, { "epoch": 2.23, "grad_norm": 0.6676120758056641, "learning_rate": 9.1943341912708e-05, "loss": 1.6803, "step": 67082 }, { "epoch": 2.23, "grad_norm": 0.6655153036117554, "learning_rate": 9.193581196570067e-05, "loss": 1.7325, "step": 67083 }, { "epoch": 2.23, "grad_norm": 0.6430627703666687, "learning_rate": 9.192828227125261e-05, "loss": 1.6915, "step": 67084 }, { "epoch": 2.23, "grad_norm": 0.6356659531593323, "learning_rate": 9.192075282937283e-05, "loss": 1.6781, "step": 67085 }, { "epoch": 2.23, "grad_norm": 0.6536091566085815, "learning_rate": 9.191322364007075e-05, "loss": 1.7284, "step": 67086 }, { "epoch": 2.23, "grad_norm": 0.6619912981987, "learning_rate": 9.19056947033553e-05, "loss": 1.7486, "step": 67087 }, { "epoch": 2.23, "grad_norm": 0.6382305026054382, "learning_rate": 9.189816601923561e-05, "loss": 1.6025, "step": 67088 }, { "epoch": 2.23, "grad_norm": 0.6439371705055237, "learning_rate": 9.189063758772088e-05, "loss": 1.6834, "step": 67089 }, { "epoch": 2.23, "grad_norm": 0.6664964556694031, "learning_rate": 9.188310940882036e-05, "loss": 1.7371, "step": 67090 }, { "epoch": 2.23, "grad_norm": 0.6527601480484009, "learning_rate": 9.187558148254305e-05, "loss": 1.6923, "step": 67091 }, { "epoch": 2.23, "grad_norm": 0.6680915951728821, "learning_rate": 9.186805380889802e-05, "loss": 1.7099, "step": 67092 }, { "epoch": 2.23, "grad_norm": 0.6452056765556335, "learning_rate": 9.186052638789464e-05, "loss": 1.6778, "step": 67093 }, { "epoch": 2.23, "grad_norm": 0.6436508893966675, "learning_rate": 9.18529992195419e-05, "loss": 1.7301, "step": 67094 }, { "epoch": 2.23, "grad_norm": 0.6526397466659546, "learning_rate": 9.184547230384883e-05, "loss": 1.6878, "step": 67095 }, { "epoch": 2.23, "grad_norm": 0.6880199909210205, "learning_rate": 9.18379456408248e-05, "loss": 1.6397, "step": 67096 }, { "epoch": 2.23, "grad_norm": 0.645072877407074, "learning_rate": 9.183041923047874e-05, "loss": 1.7584, "step": 67097 }, { "epoch": 2.23, "grad_norm": 0.6524534225463867, "learning_rate": 9.182289307281998e-05, "loss": 1.6858, "step": 67098 }, { "epoch": 2.23, "grad_norm": 0.6483839750289917, "learning_rate": 9.181536716785745e-05, "loss": 1.6753, "step": 67099 }, { "epoch": 2.23, "grad_norm": 0.6524243950843811, "learning_rate": 9.180784151560049e-05, "loss": 1.6732, "step": 67100 }, { "epoch": 2.23, "grad_norm": 0.6615806221961975, "learning_rate": 9.18003161160581e-05, "loss": 1.8186, "step": 67101 }, { "epoch": 2.23, "grad_norm": 0.6619986295700073, "learning_rate": 9.17927909692394e-05, "loss": 1.6669, "step": 67102 }, { "epoch": 2.23, "grad_norm": 0.6704433560371399, "learning_rate": 9.178526607515365e-05, "loss": 1.6449, "step": 67103 }, { "epoch": 2.23, "grad_norm": 0.6554110050201416, "learning_rate": 9.17777414338098e-05, "loss": 1.6387, "step": 67104 }, { "epoch": 2.23, "grad_norm": 0.6684459447860718, "learning_rate": 9.17702170452172e-05, "loss": 1.718, "step": 67105 }, { "epoch": 2.23, "grad_norm": 0.6613635420799255, "learning_rate": 9.176269290938489e-05, "loss": 1.6595, "step": 67106 }, { "epoch": 2.23, "grad_norm": 0.6411466002464294, "learning_rate": 9.175516902632189e-05, "loss": 1.7043, "step": 67107 }, { "epoch": 2.23, "grad_norm": 0.6645491719245911, "learning_rate": 9.17476453960375e-05, "loss": 1.7558, "step": 67108 }, { "epoch": 2.23, "grad_norm": 0.6711471676826477, "learning_rate": 9.174012201854072e-05, "loss": 1.7221, "step": 67109 }, { "epoch": 2.23, "grad_norm": 0.6682482361793518, "learning_rate": 9.173259889384085e-05, "loss": 1.7122, "step": 67110 }, { "epoch": 2.23, "grad_norm": 0.6782364845275879, "learning_rate": 9.172507602194679e-05, "loss": 1.6261, "step": 67111 }, { "epoch": 2.23, "grad_norm": 0.6539660692214966, "learning_rate": 9.17175534028679e-05, "loss": 1.681, "step": 67112 }, { "epoch": 2.23, "grad_norm": 0.6402863264083862, "learning_rate": 9.171003103661324e-05, "loss": 1.6985, "step": 67113 }, { "epoch": 2.23, "grad_norm": 0.6485304832458496, "learning_rate": 9.170250892319178e-05, "loss": 1.7032, "step": 67114 }, { "epoch": 2.23, "grad_norm": 0.6685776710510254, "learning_rate": 9.169498706261289e-05, "loss": 1.6794, "step": 67115 }, { "epoch": 2.23, "grad_norm": 0.6576310396194458, "learning_rate": 9.16874654548855e-05, "loss": 1.6585, "step": 67116 }, { "epoch": 2.23, "grad_norm": 0.6538485884666443, "learning_rate": 9.167994410001891e-05, "loss": 1.752, "step": 67117 }, { "epoch": 2.23, "grad_norm": 0.6449653506278992, "learning_rate": 9.16724229980221e-05, "loss": 1.699, "step": 67118 }, { "epoch": 2.23, "grad_norm": 0.6393939256668091, "learning_rate": 9.166490214890436e-05, "loss": 1.7174, "step": 67119 }, { "epoch": 2.23, "grad_norm": 0.6507375836372375, "learning_rate": 9.165738155267476e-05, "loss": 1.6731, "step": 67120 }, { "epoch": 2.23, "grad_norm": 0.6482495665550232, "learning_rate": 9.164986120934229e-05, "loss": 1.662, "step": 67121 }, { "epoch": 2.23, "grad_norm": 0.6243899464607239, "learning_rate": 9.164234111891625e-05, "loss": 1.6843, "step": 67122 }, { "epoch": 2.23, "grad_norm": 0.6557731628417969, "learning_rate": 9.163482128140577e-05, "loss": 1.6191, "step": 67123 }, { "epoch": 2.23, "grad_norm": 0.656915009021759, "learning_rate": 9.162730169681977e-05, "loss": 1.6579, "step": 67124 }, { "epoch": 2.23, "grad_norm": 0.672918438911438, "learning_rate": 9.161978236516754e-05, "loss": 1.7329, "step": 67125 }, { "epoch": 2.23, "grad_norm": 0.6390811204910278, "learning_rate": 9.161226328645827e-05, "loss": 1.665, "step": 67126 }, { "epoch": 2.23, "grad_norm": 0.6647419929504395, "learning_rate": 9.160474446070104e-05, "loss": 1.8019, "step": 67127 }, { "epoch": 2.23, "grad_norm": 0.6715587377548218, "learning_rate": 9.159722588790483e-05, "loss": 1.7125, "step": 67128 }, { "epoch": 2.23, "grad_norm": 0.6557509303092957, "learning_rate": 9.158970756807899e-05, "loss": 1.6957, "step": 67129 }, { "epoch": 2.23, "grad_norm": 0.6676366925239563, "learning_rate": 9.158218950123251e-05, "loss": 1.7285, "step": 67130 }, { "epoch": 2.23, "grad_norm": 0.6349871158599854, "learning_rate": 9.157467168737448e-05, "loss": 1.6657, "step": 67131 }, { "epoch": 2.23, "grad_norm": 0.6787590384483337, "learning_rate": 9.156715412651407e-05, "loss": 1.7375, "step": 67132 }, { "epoch": 2.23, "grad_norm": 0.6494470238685608, "learning_rate": 9.155963681866053e-05, "loss": 1.8104, "step": 67133 }, { "epoch": 2.23, "grad_norm": 0.645688533782959, "learning_rate": 9.155211976382284e-05, "loss": 1.783, "step": 67134 }, { "epoch": 2.23, "grad_norm": 0.6637363433837891, "learning_rate": 9.154460296201012e-05, "loss": 1.7101, "step": 67135 }, { "epoch": 2.23, "grad_norm": 0.6902775764465332, "learning_rate": 9.153708641323162e-05, "loss": 1.7127, "step": 67136 }, { "epoch": 2.23, "grad_norm": 0.6715272068977356, "learning_rate": 9.152957011749638e-05, "loss": 1.6743, "step": 67137 }, { "epoch": 2.23, "grad_norm": 0.6886820793151855, "learning_rate": 9.152205407481342e-05, "loss": 1.7644, "step": 67138 }, { "epoch": 2.23, "grad_norm": 0.6773836016654968, "learning_rate": 9.151453828519195e-05, "loss": 1.7276, "step": 67139 }, { "epoch": 2.23, "grad_norm": 0.6845135688781738, "learning_rate": 9.150702274864122e-05, "loss": 1.712, "step": 67140 }, { "epoch": 2.23, "grad_norm": 0.6669657230377197, "learning_rate": 9.149950746517025e-05, "loss": 1.6128, "step": 67141 }, { "epoch": 2.23, "grad_norm": 0.6573415994644165, "learning_rate": 9.149199243478806e-05, "loss": 1.7296, "step": 67142 }, { "epoch": 2.23, "grad_norm": 0.689466118812561, "learning_rate": 9.148447765750398e-05, "loss": 1.751, "step": 67143 }, { "epoch": 2.23, "grad_norm": 0.6404415965080261, "learning_rate": 9.1476963133327e-05, "loss": 1.691, "step": 67144 }, { "epoch": 2.23, "grad_norm": 0.6772433519363403, "learning_rate": 9.146944886226619e-05, "loss": 1.7232, "step": 67145 }, { "epoch": 2.23, "grad_norm": 0.6470115184783936, "learning_rate": 9.146193484433073e-05, "loss": 1.7075, "step": 67146 }, { "epoch": 2.23, "grad_norm": 0.6499647498130798, "learning_rate": 9.145442107952986e-05, "loss": 1.7398, "step": 67147 }, { "epoch": 2.23, "grad_norm": 0.7021316289901733, "learning_rate": 9.144690756787257e-05, "loss": 1.6122, "step": 67148 }, { "epoch": 2.23, "grad_norm": 0.6435546875, "learning_rate": 9.143939430936793e-05, "loss": 1.6201, "step": 67149 }, { "epoch": 2.23, "grad_norm": 0.6318437457084656, "learning_rate": 9.143188130402521e-05, "loss": 1.6602, "step": 67150 }, { "epoch": 2.23, "grad_norm": 0.628855288028717, "learning_rate": 9.142436855185348e-05, "loss": 1.6101, "step": 67151 }, { "epoch": 2.23, "grad_norm": 0.6615539193153381, "learning_rate": 9.141685605286176e-05, "loss": 1.6677, "step": 67152 }, { "epoch": 2.23, "grad_norm": 0.6164644956588745, "learning_rate": 9.14093438070593e-05, "loss": 1.7344, "step": 67153 }, { "epoch": 2.23, "grad_norm": 0.6555280685424805, "learning_rate": 9.140183181445507e-05, "loss": 1.7056, "step": 67154 }, { "epoch": 2.23, "grad_norm": 0.6635609865188599, "learning_rate": 9.139432007505838e-05, "loss": 1.735, "step": 67155 }, { "epoch": 2.23, "grad_norm": 0.6696484684944153, "learning_rate": 9.138680858887814e-05, "loss": 1.6748, "step": 67156 }, { "epoch": 2.23, "grad_norm": 0.6476843953132629, "learning_rate": 9.137929735592369e-05, "loss": 1.5985, "step": 67157 }, { "epoch": 2.23, "grad_norm": 0.6679110527038574, "learning_rate": 9.137178637620405e-05, "loss": 1.7712, "step": 67158 }, { "epoch": 2.23, "grad_norm": 0.6309242248535156, "learning_rate": 9.13642756497282e-05, "loss": 1.667, "step": 67159 }, { "epoch": 2.23, "grad_norm": 0.638386070728302, "learning_rate": 9.135676517650549e-05, "loss": 1.6722, "step": 67160 }, { "epoch": 2.23, "grad_norm": 0.6552043557167053, "learning_rate": 9.134925495654479e-05, "loss": 1.6621, "step": 67161 }, { "epoch": 2.23, "grad_norm": 0.671769917011261, "learning_rate": 9.134174498985551e-05, "loss": 1.7339, "step": 67162 }, { "epoch": 2.23, "grad_norm": 0.6629074215888977, "learning_rate": 9.133423527644655e-05, "loss": 1.664, "step": 67163 }, { "epoch": 2.23, "grad_norm": 0.653523325920105, "learning_rate": 9.1326725816327e-05, "loss": 1.6554, "step": 67164 }, { "epoch": 2.23, "grad_norm": 0.6380869746208191, "learning_rate": 9.131921660950618e-05, "loss": 1.6736, "step": 67165 }, { "epoch": 2.23, "grad_norm": 0.6402992606163025, "learning_rate": 9.131170765599297e-05, "loss": 1.7069, "step": 67166 }, { "epoch": 2.23, "grad_norm": 0.6333885192871094, "learning_rate": 9.13041989557967e-05, "loss": 1.7095, "step": 67167 }, { "epoch": 2.23, "grad_norm": 0.6723651885986328, "learning_rate": 9.129669050892627e-05, "loss": 1.786, "step": 67168 }, { "epoch": 2.23, "grad_norm": 0.6543976664543152, "learning_rate": 9.128918231539104e-05, "loss": 1.7581, "step": 67169 }, { "epoch": 2.23, "grad_norm": 0.6460017561912537, "learning_rate": 9.128167437519995e-05, "loss": 1.7069, "step": 67170 }, { "epoch": 2.23, "grad_norm": 0.7153372764587402, "learning_rate": 9.127416668836207e-05, "loss": 1.8127, "step": 67171 }, { "epoch": 2.23, "grad_norm": 0.6266164183616638, "learning_rate": 9.126665925488671e-05, "loss": 1.7163, "step": 67172 }, { "epoch": 2.23, "grad_norm": 0.6553505063056946, "learning_rate": 9.125915207478286e-05, "loss": 1.765, "step": 67173 }, { "epoch": 2.23, "grad_norm": 0.6352282762527466, "learning_rate": 9.125164514805955e-05, "loss": 1.6606, "step": 67174 }, { "epoch": 2.23, "grad_norm": 0.6563735604286194, "learning_rate": 9.124413847472598e-05, "loss": 1.7364, "step": 67175 }, { "epoch": 2.23, "grad_norm": 0.6597782373428345, "learning_rate": 9.123663205479135e-05, "loss": 1.6487, "step": 67176 }, { "epoch": 2.23, "grad_norm": 0.6781095862388611, "learning_rate": 9.122912588826471e-05, "loss": 1.7121, "step": 67177 }, { "epoch": 2.24, "grad_norm": 0.677548348903656, "learning_rate": 9.122161997515506e-05, "loss": 1.7574, "step": 67178 }, { "epoch": 2.24, "grad_norm": 0.6731469631195068, "learning_rate": 9.121411431547169e-05, "loss": 1.656, "step": 67179 }, { "epoch": 2.24, "grad_norm": 0.6605966687202454, "learning_rate": 9.120660890922359e-05, "loss": 1.7352, "step": 67180 }, { "epoch": 2.24, "grad_norm": 0.6855555772781372, "learning_rate": 9.119910375641986e-05, "loss": 1.7305, "step": 67181 }, { "epoch": 2.24, "grad_norm": 0.6501133441925049, "learning_rate": 9.119159885706962e-05, "loss": 1.6885, "step": 67182 }, { "epoch": 2.24, "grad_norm": 0.654207706451416, "learning_rate": 9.118409421118219e-05, "loss": 1.6152, "step": 67183 }, { "epoch": 2.24, "grad_norm": 0.6528034806251526, "learning_rate": 9.117658981876635e-05, "loss": 1.6979, "step": 67184 }, { "epoch": 2.24, "grad_norm": 0.6639583110809326, "learning_rate": 9.116908567983133e-05, "loss": 1.6888, "step": 67185 }, { "epoch": 2.24, "grad_norm": 0.6720895171165466, "learning_rate": 9.116158179438638e-05, "loss": 1.734, "step": 67186 }, { "epoch": 2.24, "grad_norm": 0.6595007181167603, "learning_rate": 9.11540781624405e-05, "loss": 1.7426, "step": 67187 }, { "epoch": 2.24, "grad_norm": 0.6664846539497375, "learning_rate": 9.114657478400269e-05, "loss": 1.6847, "step": 67188 }, { "epoch": 2.24, "grad_norm": 0.6616460680961609, "learning_rate": 9.113907165908217e-05, "loss": 1.7212, "step": 67189 }, { "epoch": 2.24, "grad_norm": 0.6676132082939148, "learning_rate": 9.113156878768823e-05, "loss": 1.7066, "step": 67190 }, { "epoch": 2.24, "grad_norm": 0.6750701069831848, "learning_rate": 9.112406616982963e-05, "loss": 1.7403, "step": 67191 }, { "epoch": 2.24, "grad_norm": 0.6369988322257996, "learning_rate": 9.11165638055156e-05, "loss": 1.7174, "step": 67192 }, { "epoch": 2.24, "grad_norm": 0.6264224052429199, "learning_rate": 9.110906169475538e-05, "loss": 1.6758, "step": 67193 }, { "epoch": 2.24, "grad_norm": 0.6435515284538269, "learning_rate": 9.1101559837558e-05, "loss": 1.7372, "step": 67194 }, { "epoch": 2.24, "grad_norm": 0.667253851890564, "learning_rate": 9.109405823393245e-05, "loss": 1.5966, "step": 67195 }, { "epoch": 2.24, "grad_norm": 0.6709103584289551, "learning_rate": 9.108655688388792e-05, "loss": 1.7577, "step": 67196 }, { "epoch": 2.24, "grad_norm": 0.6668424606323242, "learning_rate": 9.107905578743371e-05, "loss": 1.707, "step": 67197 }, { "epoch": 2.24, "grad_norm": 0.6582529544830322, "learning_rate": 9.107155494457858e-05, "loss": 1.7273, "step": 67198 }, { "epoch": 2.24, "grad_norm": 0.6573972702026367, "learning_rate": 9.106405435533178e-05, "loss": 1.6647, "step": 67199 }, { "epoch": 2.24, "grad_norm": 0.6346091032028198, "learning_rate": 9.105655401970254e-05, "loss": 1.6261, "step": 67200 }, { "epoch": 2.24, "grad_norm": 0.6695037484169006, "learning_rate": 9.104905393769986e-05, "loss": 1.7727, "step": 67201 }, { "epoch": 2.24, "grad_norm": 0.6774589419364929, "learning_rate": 9.104155410933274e-05, "loss": 1.6805, "step": 67202 }, { "epoch": 2.24, "grad_norm": 0.6614418029785156, "learning_rate": 9.103405453461048e-05, "loss": 1.7334, "step": 67203 }, { "epoch": 2.24, "grad_norm": 0.6669934391975403, "learning_rate": 9.102655521354207e-05, "loss": 1.7437, "step": 67204 }, { "epoch": 2.24, "grad_norm": 0.6766651272773743, "learning_rate": 9.101905614613656e-05, "loss": 1.7101, "step": 67205 }, { "epoch": 2.24, "grad_norm": 0.6733022928237915, "learning_rate": 9.101155733240313e-05, "loss": 1.681, "step": 67206 }, { "epoch": 2.24, "grad_norm": 0.6434391140937805, "learning_rate": 9.100405877235094e-05, "loss": 1.6652, "step": 67207 }, { "epoch": 2.24, "grad_norm": 0.6651489734649658, "learning_rate": 9.099656046598906e-05, "loss": 1.6921, "step": 67208 }, { "epoch": 2.24, "grad_norm": 0.6537764668464661, "learning_rate": 9.098906241332643e-05, "loss": 1.6844, "step": 67209 }, { "epoch": 2.24, "grad_norm": 0.6618688702583313, "learning_rate": 9.098156461437239e-05, "loss": 1.7128, "step": 67210 }, { "epoch": 2.24, "grad_norm": 0.6920727491378784, "learning_rate": 9.097406706913597e-05, "loss": 1.716, "step": 67211 }, { "epoch": 2.24, "grad_norm": 0.6678143739700317, "learning_rate": 9.096656977762609e-05, "loss": 1.7213, "step": 67212 }, { "epoch": 2.24, "grad_norm": 0.6550546884536743, "learning_rate": 9.09590727398521e-05, "loss": 1.7071, "step": 67213 }, { "epoch": 2.24, "grad_norm": 0.6672112345695496, "learning_rate": 9.095157595582294e-05, "loss": 1.7518, "step": 67214 }, { "epoch": 2.24, "grad_norm": 0.6769420504570007, "learning_rate": 9.09440794255478e-05, "loss": 1.698, "step": 67215 }, { "epoch": 2.24, "grad_norm": 0.6466715335845947, "learning_rate": 9.09365831490357e-05, "loss": 1.6942, "step": 67216 }, { "epoch": 2.24, "grad_norm": 0.6469137668609619, "learning_rate": 9.092908712629584e-05, "loss": 1.6273, "step": 67217 }, { "epoch": 2.24, "grad_norm": 0.6496803164482117, "learning_rate": 9.092159135733728e-05, "loss": 1.6765, "step": 67218 }, { "epoch": 2.24, "grad_norm": 0.6756195425987244, "learning_rate": 9.091409584216903e-05, "loss": 1.8161, "step": 67219 }, { "epoch": 2.24, "grad_norm": 0.6599705219268799, "learning_rate": 9.090660058080034e-05, "loss": 1.7687, "step": 67220 }, { "epoch": 2.24, "grad_norm": 0.6723955869674683, "learning_rate": 9.089910557324012e-05, "loss": 1.641, "step": 67221 }, { "epoch": 2.24, "grad_norm": 0.6706035733222961, "learning_rate": 9.089161081949766e-05, "loss": 1.6934, "step": 67222 }, { "epoch": 2.24, "grad_norm": 0.6630877256393433, "learning_rate": 9.0884116319582e-05, "loss": 1.7213, "step": 67223 }, { "epoch": 2.24, "grad_norm": 0.6514922380447388, "learning_rate": 9.087662207350209e-05, "loss": 1.7217, "step": 67224 }, { "epoch": 2.24, "grad_norm": 0.6692861318588257, "learning_rate": 9.086912808126726e-05, "loss": 1.6391, "step": 67225 }, { "epoch": 2.24, "grad_norm": 0.6673650145530701, "learning_rate": 9.08616343428864e-05, "loss": 1.6865, "step": 67226 }, { "epoch": 2.24, "grad_norm": 0.6599757671356201, "learning_rate": 9.08541408583688e-05, "loss": 1.6938, "step": 67227 }, { "epoch": 2.24, "grad_norm": 0.6869102120399475, "learning_rate": 9.084664762772337e-05, "loss": 1.7912, "step": 67228 }, { "epoch": 2.24, "grad_norm": 0.6416403651237488, "learning_rate": 9.083915465095937e-05, "loss": 1.7122, "step": 67229 }, { "epoch": 2.24, "grad_norm": 0.663760244846344, "learning_rate": 9.083166192808583e-05, "loss": 1.7402, "step": 67230 }, { "epoch": 2.24, "grad_norm": 0.6592212915420532, "learning_rate": 9.082416945911172e-05, "loss": 1.6953, "step": 67231 }, { "epoch": 2.24, "grad_norm": 0.6555798053741455, "learning_rate": 9.081667724404637e-05, "loss": 1.7192, "step": 67232 }, { "epoch": 2.24, "grad_norm": 0.6699122786521912, "learning_rate": 9.080918528289873e-05, "loss": 1.6609, "step": 67233 }, { "epoch": 2.24, "grad_norm": 0.6517705321311951, "learning_rate": 9.080169357567782e-05, "loss": 1.7343, "step": 67234 }, { "epoch": 2.24, "grad_norm": 0.6680379509925842, "learning_rate": 9.079420212239284e-05, "loss": 1.7204, "step": 67235 }, { "epoch": 2.24, "grad_norm": 0.6505037546157837, "learning_rate": 9.078671092305295e-05, "loss": 1.6684, "step": 67236 }, { "epoch": 2.24, "grad_norm": 0.6883147358894348, "learning_rate": 9.077921997766719e-05, "loss": 1.7156, "step": 67237 }, { "epoch": 2.24, "grad_norm": 0.6459822654724121, "learning_rate": 9.077172928624451e-05, "loss": 1.7283, "step": 67238 }, { "epoch": 2.24, "grad_norm": 0.7155942916870117, "learning_rate": 9.076423884879426e-05, "loss": 1.7089, "step": 67239 }, { "epoch": 2.24, "grad_norm": 0.6740630865097046, "learning_rate": 9.075674866532537e-05, "loss": 1.6685, "step": 67240 }, { "epoch": 2.24, "grad_norm": 0.6466357111930847, "learning_rate": 9.074925873584687e-05, "loss": 1.6664, "step": 67241 }, { "epoch": 2.24, "grad_norm": 0.6585659980773926, "learning_rate": 9.074176906036792e-05, "loss": 1.695, "step": 67242 }, { "epoch": 2.24, "grad_norm": 0.6435315012931824, "learning_rate": 9.073427963889772e-05, "loss": 1.6597, "step": 67243 }, { "epoch": 2.24, "grad_norm": 0.660837709903717, "learning_rate": 9.07267904714453e-05, "loss": 1.6792, "step": 67244 }, { "epoch": 2.24, "grad_norm": 0.6715956926345825, "learning_rate": 9.071930155801961e-05, "loss": 1.7166, "step": 67245 }, { "epoch": 2.24, "grad_norm": 0.6618725061416626, "learning_rate": 9.071181289862994e-05, "loss": 1.6322, "step": 67246 }, { "epoch": 2.24, "grad_norm": 0.6781863570213318, "learning_rate": 9.07043244932853e-05, "loss": 1.6329, "step": 67247 }, { "epoch": 2.24, "grad_norm": 0.6542229652404785, "learning_rate": 9.069683634199466e-05, "loss": 1.6561, "step": 67248 }, { "epoch": 2.24, "grad_norm": 0.6349906325340271, "learning_rate": 9.068934844476726e-05, "loss": 1.6626, "step": 67249 }, { "epoch": 2.24, "grad_norm": 0.6557881832122803, "learning_rate": 9.068186080161222e-05, "loss": 1.7894, "step": 67250 }, { "epoch": 2.24, "grad_norm": 0.6796156764030457, "learning_rate": 9.067437341253855e-05, "loss": 1.6018, "step": 67251 }, { "epoch": 2.24, "grad_norm": 0.6703108549118042, "learning_rate": 9.066688627755528e-05, "loss": 1.706, "step": 67252 }, { "epoch": 2.24, "grad_norm": 0.6571254134178162, "learning_rate": 9.065939939667163e-05, "loss": 1.7381, "step": 67253 }, { "epoch": 2.24, "grad_norm": 0.6626761555671692, "learning_rate": 9.065191276989664e-05, "loss": 1.7642, "step": 67254 }, { "epoch": 2.24, "grad_norm": 0.6782208681106567, "learning_rate": 9.064442639723932e-05, "loss": 1.7466, "step": 67255 }, { "epoch": 2.24, "grad_norm": 0.6494907736778259, "learning_rate": 9.063694027870877e-05, "loss": 1.6903, "step": 67256 }, { "epoch": 2.24, "grad_norm": 0.6968988180160522, "learning_rate": 9.062945441431424e-05, "loss": 1.7621, "step": 67257 }, { "epoch": 2.24, "grad_norm": 0.6639376878738403, "learning_rate": 9.062196880406468e-05, "loss": 1.6387, "step": 67258 }, { "epoch": 2.24, "grad_norm": 0.6764939427375793, "learning_rate": 9.061448344796915e-05, "loss": 1.647, "step": 67259 }, { "epoch": 2.24, "grad_norm": 0.6513738036155701, "learning_rate": 9.060699834603685e-05, "loss": 1.678, "step": 67260 }, { "epoch": 2.24, "grad_norm": 0.6764964461326599, "learning_rate": 9.05995134982768e-05, "loss": 1.7246, "step": 67261 }, { "epoch": 2.24, "grad_norm": 0.656826376914978, "learning_rate": 9.059202890469799e-05, "loss": 1.6746, "step": 67262 }, { "epoch": 2.24, "grad_norm": 0.7023667097091675, "learning_rate": 9.058454456530962e-05, "loss": 1.8794, "step": 67263 }, { "epoch": 2.24, "grad_norm": 0.6549003720283508, "learning_rate": 9.057706048012083e-05, "loss": 1.7294, "step": 67264 }, { "epoch": 2.24, "grad_norm": 0.6590198278427124, "learning_rate": 9.056957664914068e-05, "loss": 1.7701, "step": 67265 }, { "epoch": 2.24, "grad_norm": 0.6506575345993042, "learning_rate": 9.056209307237806e-05, "loss": 1.6735, "step": 67266 }, { "epoch": 2.24, "grad_norm": 0.6601083278656006, "learning_rate": 9.055460974984232e-05, "loss": 1.6962, "step": 67267 }, { "epoch": 2.24, "grad_norm": 0.6449461579322815, "learning_rate": 9.054712668154241e-05, "loss": 1.6257, "step": 67268 }, { "epoch": 2.24, "grad_norm": 0.6557754278182983, "learning_rate": 9.053964386748733e-05, "loss": 1.6943, "step": 67269 }, { "epoch": 2.24, "grad_norm": 0.6287152767181396, "learning_rate": 9.053216130768637e-05, "loss": 1.6398, "step": 67270 }, { "epoch": 2.24, "grad_norm": 0.664434015750885, "learning_rate": 9.052467900214839e-05, "loss": 1.614, "step": 67271 }, { "epoch": 2.24, "grad_norm": 0.6481924653053284, "learning_rate": 9.05171969508827e-05, "loss": 1.6967, "step": 67272 }, { "epoch": 2.24, "grad_norm": 0.6654587388038635, "learning_rate": 9.050971515389815e-05, "loss": 1.7366, "step": 67273 }, { "epoch": 2.24, "grad_norm": 0.6460856795310974, "learning_rate": 9.050223361120404e-05, "loss": 1.6938, "step": 67274 }, { "epoch": 2.24, "grad_norm": 0.6668816208839417, "learning_rate": 9.049475232280936e-05, "loss": 1.6993, "step": 67275 }, { "epoch": 2.24, "grad_norm": 0.6493935585021973, "learning_rate": 9.048727128872308e-05, "loss": 1.735, "step": 67276 }, { "epoch": 2.24, "grad_norm": 0.649757981300354, "learning_rate": 9.047979050895448e-05, "loss": 1.6725, "step": 67277 }, { "epoch": 2.24, "grad_norm": 0.6685886979103088, "learning_rate": 9.047230998351246e-05, "loss": 1.6733, "step": 67278 }, { "epoch": 2.24, "grad_norm": 0.6643531322479248, "learning_rate": 9.046482971240626e-05, "loss": 1.7704, "step": 67279 }, { "epoch": 2.24, "grad_norm": 0.6418153047561646, "learning_rate": 9.045734969564488e-05, "loss": 1.6966, "step": 67280 }, { "epoch": 2.24, "grad_norm": 0.6582273244857788, "learning_rate": 9.044986993323729e-05, "loss": 1.7163, "step": 67281 }, { "epoch": 2.24, "grad_norm": 0.6595516204833984, "learning_rate": 9.044239042519281e-05, "loss": 1.7263, "step": 67282 }, { "epoch": 2.24, "grad_norm": 0.6562345027923584, "learning_rate": 9.043491117152032e-05, "loss": 1.7348, "step": 67283 }, { "epoch": 2.24, "grad_norm": 0.6911417841911316, "learning_rate": 9.042743217222901e-05, "loss": 1.7638, "step": 67284 }, { "epoch": 2.24, "grad_norm": 0.6684426665306091, "learning_rate": 9.041995342732784e-05, "loss": 1.6973, "step": 67285 }, { "epoch": 2.24, "grad_norm": 0.6513581275939941, "learning_rate": 9.041247493682607e-05, "loss": 1.7321, "step": 67286 }, { "epoch": 2.24, "grad_norm": 0.662692666053772, "learning_rate": 9.04049967007327e-05, "loss": 1.734, "step": 67287 }, { "epoch": 2.24, "grad_norm": 0.6505835652351379, "learning_rate": 9.039751871905665e-05, "loss": 1.7127, "step": 67288 }, { "epoch": 2.24, "grad_norm": 0.6505473852157593, "learning_rate": 9.039004099180725e-05, "loss": 1.7083, "step": 67289 }, { "epoch": 2.24, "grad_norm": 0.653805673122406, "learning_rate": 9.038256351899341e-05, "loss": 1.624, "step": 67290 }, { "epoch": 2.24, "grad_norm": 0.7063213586807251, "learning_rate": 9.037508630062417e-05, "loss": 1.6698, "step": 67291 }, { "epoch": 2.24, "grad_norm": 0.6543799042701721, "learning_rate": 9.036760933670873e-05, "loss": 1.7485, "step": 67292 }, { "epoch": 2.24, "grad_norm": 0.6523095965385437, "learning_rate": 9.036013262725618e-05, "loss": 1.6809, "step": 67293 }, { "epoch": 2.24, "grad_norm": 0.6930317878723145, "learning_rate": 9.035265617227556e-05, "loss": 1.7456, "step": 67294 }, { "epoch": 2.24, "grad_norm": 0.6669008135795593, "learning_rate": 9.034517997177582e-05, "loss": 1.7145, "step": 67295 }, { "epoch": 2.24, "grad_norm": 0.6428194642066956, "learning_rate": 9.033770402576623e-05, "loss": 1.6783, "step": 67296 }, { "epoch": 2.24, "grad_norm": 0.6432039737701416, "learning_rate": 9.033022833425581e-05, "loss": 1.734, "step": 67297 }, { "epoch": 2.24, "grad_norm": 0.671085774898529, "learning_rate": 9.032275289725346e-05, "loss": 1.605, "step": 67298 }, { "epoch": 2.24, "grad_norm": 0.6548609137535095, "learning_rate": 9.031527771476842e-05, "loss": 1.622, "step": 67299 }, { "epoch": 2.24, "grad_norm": 0.6544411182403564, "learning_rate": 9.030780278680992e-05, "loss": 1.6799, "step": 67300 }, { "epoch": 2.24, "grad_norm": 0.6301583647727966, "learning_rate": 9.030032811338667e-05, "loss": 1.6391, "step": 67301 }, { "epoch": 2.24, "grad_norm": 0.6441365480422974, "learning_rate": 9.02928536945079e-05, "loss": 1.6582, "step": 67302 }, { "epoch": 2.24, "grad_norm": 0.6694656014442444, "learning_rate": 9.028537953018283e-05, "loss": 1.6642, "step": 67303 }, { "epoch": 2.24, "grad_norm": 0.6794537901878357, "learning_rate": 9.027790562042037e-05, "loss": 1.7528, "step": 67304 }, { "epoch": 2.24, "grad_norm": 0.671441912651062, "learning_rate": 9.027043196522959e-05, "loss": 1.779, "step": 67305 }, { "epoch": 2.24, "grad_norm": 0.655892014503479, "learning_rate": 9.026295856461956e-05, "loss": 1.688, "step": 67306 }, { "epoch": 2.24, "grad_norm": 0.6589196920394897, "learning_rate": 9.02554854185996e-05, "loss": 1.7418, "step": 67307 }, { "epoch": 2.24, "grad_norm": 0.6583516001701355, "learning_rate": 9.02480125271784e-05, "loss": 1.6924, "step": 67308 }, { "epoch": 2.24, "grad_norm": 0.6260934472084045, "learning_rate": 9.024053989036519e-05, "loss": 1.6438, "step": 67309 }, { "epoch": 2.24, "grad_norm": 0.6528894901275635, "learning_rate": 9.023306750816915e-05, "loss": 1.7251, "step": 67310 }, { "epoch": 2.24, "grad_norm": 0.6673011779785156, "learning_rate": 9.022559538059927e-05, "loss": 1.7287, "step": 67311 }, { "epoch": 2.24, "grad_norm": 0.6758144497871399, "learning_rate": 9.02181235076645e-05, "loss": 1.6968, "step": 67312 }, { "epoch": 2.24, "grad_norm": 0.6419022083282471, "learning_rate": 9.021065188937403e-05, "loss": 1.7065, "step": 67313 }, { "epoch": 2.24, "grad_norm": 0.6618267297744751, "learning_rate": 9.020318052573709e-05, "loss": 1.7359, "step": 67314 }, { "epoch": 2.24, "grad_norm": 0.6746249794960022, "learning_rate": 9.019570941676242e-05, "loss": 1.7295, "step": 67315 }, { "epoch": 2.24, "grad_norm": 0.6414068937301636, "learning_rate": 9.018823856245923e-05, "loss": 1.6544, "step": 67316 }, { "epoch": 2.24, "grad_norm": 0.6840071082115173, "learning_rate": 9.018076796283668e-05, "loss": 1.7061, "step": 67317 }, { "epoch": 2.24, "grad_norm": 0.6719145178794861, "learning_rate": 9.01732976179038e-05, "loss": 1.6159, "step": 67318 }, { "epoch": 2.24, "grad_norm": 0.6793173551559448, "learning_rate": 9.016582752766948e-05, "loss": 1.7869, "step": 67319 }, { "epoch": 2.24, "grad_norm": 0.6592001914978027, "learning_rate": 9.015835769214305e-05, "loss": 1.6984, "step": 67320 }, { "epoch": 2.24, "grad_norm": 0.6618760228157043, "learning_rate": 9.015088811133347e-05, "loss": 1.724, "step": 67321 }, { "epoch": 2.24, "grad_norm": 0.6678452491760254, "learning_rate": 9.014341878524965e-05, "loss": 1.7164, "step": 67322 }, { "epoch": 2.24, "grad_norm": 0.6443194150924683, "learning_rate": 9.013594971390084e-05, "loss": 1.7622, "step": 67323 }, { "epoch": 2.24, "grad_norm": 0.676609218120575, "learning_rate": 9.012848089729616e-05, "loss": 1.7369, "step": 67324 }, { "epoch": 2.24, "grad_norm": 0.6794833540916443, "learning_rate": 9.012101233544457e-05, "loss": 1.6468, "step": 67325 }, { "epoch": 2.24, "grad_norm": 0.6783757209777832, "learning_rate": 9.011354402835505e-05, "loss": 1.7065, "step": 67326 }, { "epoch": 2.24, "grad_norm": 0.6653958559036255, "learning_rate": 9.010607597603683e-05, "loss": 1.7612, "step": 67327 }, { "epoch": 2.24, "grad_norm": 0.654116153717041, "learning_rate": 9.009860817849894e-05, "loss": 1.702, "step": 67328 }, { "epoch": 2.24, "grad_norm": 0.6436172723770142, "learning_rate": 9.009114063575031e-05, "loss": 1.7284, "step": 67329 }, { "epoch": 2.24, "grad_norm": 0.6677980422973633, "learning_rate": 9.00836733478002e-05, "loss": 1.6626, "step": 67330 }, { "epoch": 2.24, "grad_norm": 0.683690071105957, "learning_rate": 9.007620631465749e-05, "loss": 1.6953, "step": 67331 }, { "epoch": 2.24, "grad_norm": 0.7171522378921509, "learning_rate": 9.006873953633143e-05, "loss": 1.7024, "step": 67332 }, { "epoch": 2.24, "grad_norm": 0.6677789092063904, "learning_rate": 9.006127301283088e-05, "loss": 1.7496, "step": 67333 }, { "epoch": 2.24, "grad_norm": 0.6530668139457703, "learning_rate": 9.005380674416512e-05, "loss": 1.6725, "step": 67334 }, { "epoch": 2.24, "grad_norm": 0.6733962893486023, "learning_rate": 9.004634073034311e-05, "loss": 1.824, "step": 67335 }, { "epoch": 2.24, "grad_norm": 0.672104001045227, "learning_rate": 9.00388749713738e-05, "loss": 1.753, "step": 67336 }, { "epoch": 2.24, "grad_norm": 0.6896296143531799, "learning_rate": 9.003140946726647e-05, "loss": 1.6944, "step": 67337 }, { "epoch": 2.24, "grad_norm": 0.642957866191864, "learning_rate": 9.002394421802995e-05, "loss": 1.7179, "step": 67338 }, { "epoch": 2.24, "grad_norm": 0.6615301966667175, "learning_rate": 9.001647922367355e-05, "loss": 1.7099, "step": 67339 }, { "epoch": 2.24, "grad_norm": 0.6677721738815308, "learning_rate": 9.00090144842062e-05, "loss": 1.6194, "step": 67340 }, { "epoch": 2.24, "grad_norm": 0.6701845526695251, "learning_rate": 9.000154999963685e-05, "loss": 1.6648, "step": 67341 }, { "epoch": 2.24, "grad_norm": 0.6763238906860352, "learning_rate": 8.999408576997479e-05, "loss": 1.6031, "step": 67342 }, { "epoch": 2.24, "grad_norm": 0.6553159356117249, "learning_rate": 8.998662179522885e-05, "loss": 1.6723, "step": 67343 }, { "epoch": 2.24, "grad_norm": 0.6611455082893372, "learning_rate": 8.997915807540833e-05, "loss": 1.7061, "step": 67344 }, { "epoch": 2.24, "grad_norm": 0.6517753005027771, "learning_rate": 8.997169461052205e-05, "loss": 1.6824, "step": 67345 }, { "epoch": 2.24, "grad_norm": 0.6804881691932678, "learning_rate": 8.996423140057926e-05, "loss": 1.6493, "step": 67346 }, { "epoch": 2.24, "grad_norm": 0.6384062170982361, "learning_rate": 8.995676844558901e-05, "loss": 1.5795, "step": 67347 }, { "epoch": 2.24, "grad_norm": 0.6721616387367249, "learning_rate": 8.994930574556014e-05, "loss": 1.7649, "step": 67348 }, { "epoch": 2.24, "grad_norm": 0.6744563579559326, "learning_rate": 8.994184330050198e-05, "loss": 1.7113, "step": 67349 }, { "epoch": 2.24, "grad_norm": 0.6609976887702942, "learning_rate": 8.993438111042349e-05, "loss": 1.764, "step": 67350 }, { "epoch": 2.24, "grad_norm": 0.8851824402809143, "learning_rate": 8.99269191753336e-05, "loss": 1.6741, "step": 67351 }, { "epoch": 2.24, "grad_norm": 0.6550806164741516, "learning_rate": 8.991945749524147e-05, "loss": 1.6508, "step": 67352 }, { "epoch": 2.24, "grad_norm": 0.6649104356765747, "learning_rate": 8.991199607015627e-05, "loss": 1.6738, "step": 67353 }, { "epoch": 2.24, "grad_norm": 0.6928192973136902, "learning_rate": 8.990453490008694e-05, "loss": 1.6949, "step": 67354 }, { "epoch": 2.24, "grad_norm": 0.6413894891738892, "learning_rate": 8.989707398504245e-05, "loss": 1.6972, "step": 67355 }, { "epoch": 2.24, "grad_norm": 0.6952759027481079, "learning_rate": 8.988961332503207e-05, "loss": 1.6687, "step": 67356 }, { "epoch": 2.24, "grad_norm": 0.6440027952194214, "learning_rate": 8.988215292006474e-05, "loss": 1.707, "step": 67357 }, { "epoch": 2.24, "grad_norm": 0.6614993214607239, "learning_rate": 8.987469277014939e-05, "loss": 1.7299, "step": 67358 }, { "epoch": 2.24, "grad_norm": 0.645950198173523, "learning_rate": 8.986723287529525e-05, "loss": 1.6834, "step": 67359 }, { "epoch": 2.24, "grad_norm": 0.6471849083900452, "learning_rate": 8.98597732355114e-05, "loss": 1.6916, "step": 67360 }, { "epoch": 2.24, "grad_norm": 0.6833574771881104, "learning_rate": 8.985231385080681e-05, "loss": 1.7275, "step": 67361 }, { "epoch": 2.24, "grad_norm": 0.6715906858444214, "learning_rate": 8.984485472119047e-05, "loss": 1.6636, "step": 67362 }, { "epoch": 2.24, "grad_norm": 0.6592933535575867, "learning_rate": 8.983739584667162e-05, "loss": 1.6153, "step": 67363 }, { "epoch": 2.24, "grad_norm": 0.6420199275016785, "learning_rate": 8.982993722725915e-05, "loss": 1.7109, "step": 67364 }, { "epoch": 2.24, "grad_norm": 0.6488577723503113, "learning_rate": 8.982247886296212e-05, "loss": 1.7858, "step": 67365 }, { "epoch": 2.24, "grad_norm": 0.6503697037696838, "learning_rate": 8.981502075378962e-05, "loss": 1.7497, "step": 67366 }, { "epoch": 2.24, "grad_norm": 0.6515017747879028, "learning_rate": 8.980756289975082e-05, "loss": 1.7311, "step": 67367 }, { "epoch": 2.24, "grad_norm": 0.6437965035438538, "learning_rate": 8.980010530085467e-05, "loss": 1.6576, "step": 67368 }, { "epoch": 2.24, "grad_norm": 0.6644859910011292, "learning_rate": 8.979264795711011e-05, "loss": 1.6474, "step": 67369 }, { "epoch": 2.24, "grad_norm": 0.6450459957122803, "learning_rate": 8.978519086852641e-05, "loss": 1.7097, "step": 67370 }, { "epoch": 2.24, "grad_norm": 0.6578546166419983, "learning_rate": 8.97777340351125e-05, "loss": 1.793, "step": 67371 }, { "epoch": 2.24, "grad_norm": 0.6460728049278259, "learning_rate": 8.977027745687737e-05, "loss": 1.7232, "step": 67372 }, { "epoch": 2.24, "grad_norm": 0.6602182984352112, "learning_rate": 8.97628211338301e-05, "loss": 1.7242, "step": 67373 }, { "epoch": 2.24, "grad_norm": 0.655492901802063, "learning_rate": 8.975536506597994e-05, "loss": 1.7128, "step": 67374 }, { "epoch": 2.24, "grad_norm": 0.6823680400848389, "learning_rate": 8.974790925333577e-05, "loss": 1.7054, "step": 67375 }, { "epoch": 2.24, "grad_norm": 0.6371309757232666, "learning_rate": 8.974045369590657e-05, "loss": 1.722, "step": 67376 }, { "epoch": 2.24, "grad_norm": 0.6787735819816589, "learning_rate": 8.973299839370156e-05, "loss": 1.72, "step": 67377 }, { "epoch": 2.24, "grad_norm": 0.6512468457221985, "learning_rate": 8.972554334672971e-05, "loss": 1.7462, "step": 67378 }, { "epoch": 2.24, "grad_norm": 0.6664214730262756, "learning_rate": 8.971808855499999e-05, "loss": 1.6376, "step": 67379 }, { "epoch": 2.24, "grad_norm": 0.6565613746643066, "learning_rate": 8.971063401852151e-05, "loss": 1.6547, "step": 67380 }, { "epoch": 2.24, "grad_norm": 0.6722459197044373, "learning_rate": 8.970317973730343e-05, "loss": 1.7457, "step": 67381 }, { "epoch": 2.24, "grad_norm": 0.6445344686508179, "learning_rate": 8.969572571135471e-05, "loss": 1.6488, "step": 67382 }, { "epoch": 2.24, "grad_norm": 0.6640079617500305, "learning_rate": 8.96882719406843e-05, "loss": 1.7494, "step": 67383 }, { "epoch": 2.24, "grad_norm": 0.6508907079696655, "learning_rate": 8.968081842530144e-05, "loss": 1.7077, "step": 67384 }, { "epoch": 2.24, "grad_norm": 0.6418095827102661, "learning_rate": 8.967336516521506e-05, "loss": 1.7194, "step": 67385 }, { "epoch": 2.24, "grad_norm": 0.6637773513793945, "learning_rate": 8.966591216043413e-05, "loss": 1.7568, "step": 67386 }, { "epoch": 2.24, "grad_norm": 0.6475440859794617, "learning_rate": 8.96584594109679e-05, "loss": 1.7181, "step": 67387 }, { "epoch": 2.24, "grad_norm": 0.7013192772865295, "learning_rate": 8.965100691682523e-05, "loss": 1.6187, "step": 67388 }, { "epoch": 2.24, "grad_norm": 0.6758149266242981, "learning_rate": 8.964355467801532e-05, "loss": 1.7193, "step": 67389 }, { "epoch": 2.24, "grad_norm": 0.665654718875885, "learning_rate": 8.963610269454702e-05, "loss": 1.6775, "step": 67390 }, { "epoch": 2.24, "grad_norm": 0.6697131395339966, "learning_rate": 8.962865096642962e-05, "loss": 1.6673, "step": 67391 }, { "epoch": 2.24, "grad_norm": 0.6700288653373718, "learning_rate": 8.962119949367203e-05, "loss": 1.7691, "step": 67392 }, { "epoch": 2.24, "grad_norm": 0.6613683700561523, "learning_rate": 8.96137482762832e-05, "loss": 1.638, "step": 67393 }, { "epoch": 2.24, "grad_norm": 0.6568856835365295, "learning_rate": 8.960629731427243e-05, "loss": 1.7156, "step": 67394 }, { "epoch": 2.24, "grad_norm": 0.6399388909339905, "learning_rate": 8.959884660764845e-05, "loss": 1.7338, "step": 67395 }, { "epoch": 2.24, "grad_norm": 0.6900070905685425, "learning_rate": 8.959139615642059e-05, "loss": 1.8058, "step": 67396 }, { "epoch": 2.24, "grad_norm": 0.6776256561279297, "learning_rate": 8.958394596059779e-05, "loss": 1.7417, "step": 67397 }, { "epoch": 2.24, "grad_norm": 0.6673113703727722, "learning_rate": 8.957649602018898e-05, "loss": 1.6441, "step": 67398 }, { "epoch": 2.24, "grad_norm": 0.690653920173645, "learning_rate": 8.956904633520338e-05, "loss": 1.6877, "step": 67399 }, { "epoch": 2.24, "grad_norm": 0.6481242775917053, "learning_rate": 8.956159690564983e-05, "loss": 1.7451, "step": 67400 }, { "epoch": 2.24, "grad_norm": 0.6544805765151978, "learning_rate": 8.955414773153765e-05, "loss": 1.7099, "step": 67401 }, { "epoch": 2.24, "grad_norm": 0.673683762550354, "learning_rate": 8.95466988128756e-05, "loss": 1.7301, "step": 67402 }, { "epoch": 2.24, "grad_norm": 0.679006814956665, "learning_rate": 8.953925014967292e-05, "loss": 1.7175, "step": 67403 }, { "epoch": 2.24, "grad_norm": 0.642710268497467, "learning_rate": 8.953180174193863e-05, "loss": 1.7119, "step": 67404 }, { "epoch": 2.24, "grad_norm": 0.64576256275177, "learning_rate": 8.952435358968159e-05, "loss": 1.6926, "step": 67405 }, { "epoch": 2.24, "grad_norm": 0.6662808060646057, "learning_rate": 8.95169056929111e-05, "loss": 1.6975, "step": 67406 }, { "epoch": 2.24, "grad_norm": 0.7004960775375366, "learning_rate": 8.950945805163604e-05, "loss": 1.6919, "step": 67407 }, { "epoch": 2.24, "grad_norm": 0.6617724299430847, "learning_rate": 8.950201066586539e-05, "loss": 1.7518, "step": 67408 }, { "epoch": 2.24, "grad_norm": 0.647479772567749, "learning_rate": 8.94945635356083e-05, "loss": 1.6641, "step": 67409 }, { "epoch": 2.24, "grad_norm": 0.6637608408927917, "learning_rate": 8.94871166608739e-05, "loss": 1.6476, "step": 67410 }, { "epoch": 2.24, "grad_norm": 0.7138959765434265, "learning_rate": 8.94796700416711e-05, "loss": 1.7168, "step": 67411 }, { "epoch": 2.24, "grad_norm": 0.6588625907897949, "learning_rate": 8.947222367800885e-05, "loss": 1.7101, "step": 67412 }, { "epoch": 2.24, "grad_norm": 0.6634307503700256, "learning_rate": 8.946477756989643e-05, "loss": 1.6955, "step": 67413 }, { "epoch": 2.24, "grad_norm": 0.6739336252212524, "learning_rate": 8.945733171734274e-05, "loss": 1.6546, "step": 67414 }, { "epoch": 2.24, "grad_norm": 0.6557222604751587, "learning_rate": 8.944988612035676e-05, "loss": 1.6961, "step": 67415 }, { "epoch": 2.24, "grad_norm": 0.6664282083511353, "learning_rate": 8.944244077894755e-05, "loss": 1.7291, "step": 67416 }, { "epoch": 2.24, "grad_norm": 0.6555705666542053, "learning_rate": 8.943499569312441e-05, "loss": 1.6824, "step": 67417 }, { "epoch": 2.24, "grad_norm": 0.6506645083427429, "learning_rate": 8.942755086289594e-05, "loss": 1.6423, "step": 67418 }, { "epoch": 2.24, "grad_norm": 0.6514649391174316, "learning_rate": 8.942010628827142e-05, "loss": 1.709, "step": 67419 }, { "epoch": 2.24, "grad_norm": 0.6893534660339355, "learning_rate": 8.941266196925996e-05, "loss": 1.6843, "step": 67420 }, { "epoch": 2.24, "grad_norm": 0.6495453119277954, "learning_rate": 8.940521790587049e-05, "loss": 1.641, "step": 67421 }, { "epoch": 2.24, "grad_norm": 0.6659688353538513, "learning_rate": 8.939777409811196e-05, "loss": 1.6811, "step": 67422 }, { "epoch": 2.24, "grad_norm": 0.6674648523330688, "learning_rate": 8.939033054599351e-05, "loss": 1.6553, "step": 67423 }, { "epoch": 2.24, "grad_norm": 0.6476718187332153, "learning_rate": 8.938288724952433e-05, "loss": 1.6581, "step": 67424 }, { "epoch": 2.24, "grad_norm": 0.6666948199272156, "learning_rate": 8.937544420871315e-05, "loss": 1.7394, "step": 67425 }, { "epoch": 2.24, "grad_norm": 0.675347089767456, "learning_rate": 8.936800142356912e-05, "loss": 1.7223, "step": 67426 }, { "epoch": 2.24, "grad_norm": 0.6354129910469055, "learning_rate": 8.936055889410141e-05, "loss": 1.7394, "step": 67427 }, { "epoch": 2.24, "grad_norm": 0.6869890093803406, "learning_rate": 8.93531166203189e-05, "loss": 1.686, "step": 67428 }, { "epoch": 2.24, "grad_norm": 0.6829537749290466, "learning_rate": 8.934567460223062e-05, "loss": 1.7635, "step": 67429 }, { "epoch": 2.24, "grad_norm": 0.6901609301567078, "learning_rate": 8.933823283984563e-05, "loss": 1.6482, "step": 67430 }, { "epoch": 2.24, "grad_norm": 0.6660495400428772, "learning_rate": 8.93307913331732e-05, "loss": 1.6886, "step": 67431 }, { "epoch": 2.24, "grad_norm": 0.6806210279464722, "learning_rate": 8.932335008222194e-05, "loss": 1.7793, "step": 67432 }, { "epoch": 2.24, "grad_norm": 0.6530744433403015, "learning_rate": 8.931590908700109e-05, "loss": 1.7297, "step": 67433 }, { "epoch": 2.24, "grad_norm": 0.6500023007392883, "learning_rate": 8.93084683475198e-05, "loss": 1.6991, "step": 67434 }, { "epoch": 2.24, "grad_norm": 0.6348996758460999, "learning_rate": 8.930102786378697e-05, "loss": 1.6488, "step": 67435 }, { "epoch": 2.24, "grad_norm": 0.6645455956459045, "learning_rate": 8.929358763581155e-05, "loss": 1.6717, "step": 67436 }, { "epoch": 2.24, "grad_norm": 0.6595005393028259, "learning_rate": 8.928614766360274e-05, "loss": 1.7171, "step": 67437 }, { "epoch": 2.24, "grad_norm": 0.6718816161155701, "learning_rate": 8.927870794716952e-05, "loss": 1.6337, "step": 67438 }, { "epoch": 2.24, "grad_norm": 0.6411465406417847, "learning_rate": 8.927126848652084e-05, "loss": 1.7025, "step": 67439 }, { "epoch": 2.24, "grad_norm": 0.6393585801124573, "learning_rate": 8.926382928166575e-05, "loss": 1.6892, "step": 67440 }, { "epoch": 2.24, "grad_norm": 0.6689009666442871, "learning_rate": 8.925639033261343e-05, "loss": 1.7748, "step": 67441 }, { "epoch": 2.24, "grad_norm": 0.6499971747398376, "learning_rate": 8.924895163937278e-05, "loss": 1.5948, "step": 67442 }, { "epoch": 2.24, "grad_norm": 0.6667881011962891, "learning_rate": 8.924151320195276e-05, "loss": 1.6845, "step": 67443 }, { "epoch": 2.24, "grad_norm": 0.6558709740638733, "learning_rate": 8.923407502036261e-05, "loss": 1.6758, "step": 67444 }, { "epoch": 2.24, "grad_norm": 0.6667668223381042, "learning_rate": 8.922663709461123e-05, "loss": 1.716, "step": 67445 }, { "epoch": 2.24, "grad_norm": 0.6523773670196533, "learning_rate": 8.921919942470757e-05, "loss": 1.6517, "step": 67446 }, { "epoch": 2.24, "grad_norm": 0.6733600497245789, "learning_rate": 8.92117620106608e-05, "loss": 1.625, "step": 67447 }, { "epoch": 2.24, "grad_norm": 0.6635129451751709, "learning_rate": 8.920432485247983e-05, "loss": 1.6906, "step": 67448 }, { "epoch": 2.24, "grad_norm": 0.6746050119400024, "learning_rate": 8.919688795017383e-05, "loss": 1.6915, "step": 67449 }, { "epoch": 2.24, "grad_norm": 0.679744303226471, "learning_rate": 8.918945130375166e-05, "loss": 1.8085, "step": 67450 }, { "epoch": 2.24, "grad_norm": 0.6730838418006897, "learning_rate": 8.918201491322253e-05, "loss": 1.6562, "step": 67451 }, { "epoch": 2.24, "grad_norm": 0.6563521027565002, "learning_rate": 8.917457877859537e-05, "loss": 1.7234, "step": 67452 }, { "epoch": 2.24, "grad_norm": 0.6512070298194885, "learning_rate": 8.916714289987911e-05, "loss": 1.6514, "step": 67453 }, { "epoch": 2.24, "grad_norm": 0.6659674048423767, "learning_rate": 8.915970727708299e-05, "loss": 1.7304, "step": 67454 }, { "epoch": 2.24, "grad_norm": 0.6599081158638, "learning_rate": 8.91522719102158e-05, "loss": 1.7344, "step": 67455 }, { "epoch": 2.24, "grad_norm": 0.6737382411956787, "learning_rate": 8.914483679928679e-05, "loss": 1.6881, "step": 67456 }, { "epoch": 2.24, "grad_norm": 0.6587768197059631, "learning_rate": 8.913740194430489e-05, "loss": 1.7576, "step": 67457 }, { "epoch": 2.24, "grad_norm": 0.6671046614646912, "learning_rate": 8.912996734527901e-05, "loss": 1.6907, "step": 67458 }, { "epoch": 2.24, "grad_norm": 0.6514306664466858, "learning_rate": 8.912253300221835e-05, "loss": 1.7006, "step": 67459 }, { "epoch": 2.24, "grad_norm": 0.7809747457504272, "learning_rate": 8.911509891513179e-05, "loss": 1.7215, "step": 67460 }, { "epoch": 2.24, "grad_norm": 0.6651226878166199, "learning_rate": 8.910766508402854e-05, "loss": 1.5163, "step": 67461 }, { "epoch": 2.24, "grad_norm": 0.6749561429023743, "learning_rate": 8.910023150891738e-05, "loss": 1.69, "step": 67462 }, { "epoch": 2.24, "grad_norm": 0.6399242281913757, "learning_rate": 8.90927981898076e-05, "loss": 1.7795, "step": 67463 }, { "epoch": 2.24, "grad_norm": 0.6343634128570557, "learning_rate": 8.908536512670803e-05, "loss": 1.6141, "step": 67464 }, { "epoch": 2.24, "grad_norm": 0.6397666335105896, "learning_rate": 8.907793231962772e-05, "loss": 1.693, "step": 67465 }, { "epoch": 2.24, "grad_norm": 0.6376230120658875, "learning_rate": 8.907049976857577e-05, "loss": 1.7055, "step": 67466 }, { "epoch": 2.24, "grad_norm": 0.661601722240448, "learning_rate": 8.906306747356117e-05, "loss": 1.7307, "step": 67467 }, { "epoch": 2.24, "grad_norm": 0.6611648797988892, "learning_rate": 8.905563543459285e-05, "loss": 1.6778, "step": 67468 }, { "epoch": 2.24, "grad_norm": 0.6786252856254578, "learning_rate": 8.904820365167989e-05, "loss": 1.763, "step": 67469 }, { "epoch": 2.24, "grad_norm": 0.6490945219993591, "learning_rate": 8.904077212483141e-05, "loss": 1.7098, "step": 67470 }, { "epoch": 2.24, "grad_norm": 0.649290144443512, "learning_rate": 8.903334085405638e-05, "loss": 1.7281, "step": 67471 }, { "epoch": 2.24, "grad_norm": 0.6632068157196045, "learning_rate": 8.902590983936366e-05, "loss": 1.6226, "step": 67472 }, { "epoch": 2.24, "grad_norm": 0.6441580057144165, "learning_rate": 8.901847908076252e-05, "loss": 1.6522, "step": 67473 }, { "epoch": 2.24, "grad_norm": 0.6563649773597717, "learning_rate": 8.901104857826186e-05, "loss": 1.7044, "step": 67474 }, { "epoch": 2.24, "grad_norm": 0.6628249287605286, "learning_rate": 8.900361833187061e-05, "loss": 1.7259, "step": 67475 }, { "epoch": 2.24, "grad_norm": 0.6577169895172119, "learning_rate": 8.899618834159784e-05, "loss": 1.7008, "step": 67476 }, { "epoch": 2.24, "grad_norm": 0.6691953539848328, "learning_rate": 8.898875860745273e-05, "loss": 1.6618, "step": 67477 }, { "epoch": 2.25, "grad_norm": 0.6449027061462402, "learning_rate": 8.898132912944419e-05, "loss": 1.6984, "step": 67478 }, { "epoch": 2.25, "grad_norm": 0.6661170125007629, "learning_rate": 8.897389990758109e-05, "loss": 1.7205, "step": 67479 }, { "epoch": 2.25, "grad_norm": 0.6616851091384888, "learning_rate": 8.896647094187273e-05, "loss": 1.6418, "step": 67480 }, { "epoch": 2.25, "grad_norm": 0.6555190086364746, "learning_rate": 8.895904223232792e-05, "loss": 1.7839, "step": 67481 }, { "epoch": 2.25, "grad_norm": 0.6539582014083862, "learning_rate": 8.895161377895569e-05, "loss": 1.7465, "step": 67482 }, { "epoch": 2.25, "grad_norm": 0.6596643924713135, "learning_rate": 8.894418558176506e-05, "loss": 1.6809, "step": 67483 }, { "epoch": 2.25, "grad_norm": 0.6673921942710876, "learning_rate": 8.893675764076522e-05, "loss": 1.6863, "step": 67484 }, { "epoch": 2.25, "grad_norm": 0.6510820984840393, "learning_rate": 8.892932995596507e-05, "loss": 1.703, "step": 67485 }, { "epoch": 2.25, "grad_norm": 0.6520156264305115, "learning_rate": 8.89219025273735e-05, "loss": 1.7141, "step": 67486 }, { "epoch": 2.25, "grad_norm": 0.6539450287818909, "learning_rate": 8.891447535499972e-05, "loss": 1.7374, "step": 67487 }, { "epoch": 2.25, "grad_norm": 0.6554569005966187, "learning_rate": 8.890704843885267e-05, "loss": 1.712, "step": 67488 }, { "epoch": 2.25, "grad_norm": 0.6628333926200867, "learning_rate": 8.889962177894129e-05, "loss": 1.6805, "step": 67489 }, { "epoch": 2.25, "grad_norm": 0.6792041659355164, "learning_rate": 8.889219537527464e-05, "loss": 1.7231, "step": 67490 }, { "epoch": 2.25, "grad_norm": 0.6458348035812378, "learning_rate": 8.888476922786187e-05, "loss": 1.6878, "step": 67491 }, { "epoch": 2.25, "grad_norm": 0.65485680103302, "learning_rate": 8.887734333671187e-05, "loss": 1.6529, "step": 67492 }, { "epoch": 2.25, "grad_norm": 0.6555177569389343, "learning_rate": 8.886991770183358e-05, "loss": 1.6895, "step": 67493 }, { "epoch": 2.25, "grad_norm": 0.6615035533905029, "learning_rate": 8.886249232323619e-05, "loss": 1.5889, "step": 67494 }, { "epoch": 2.25, "grad_norm": 0.6543704271316528, "learning_rate": 8.885506720092865e-05, "loss": 1.7057, "step": 67495 }, { "epoch": 2.25, "grad_norm": 0.6596720814704895, "learning_rate": 8.884764233491983e-05, "loss": 1.7386, "step": 67496 }, { "epoch": 2.25, "grad_norm": 0.6454231142997742, "learning_rate": 8.884021772521886e-05, "loss": 1.7177, "step": 67497 }, { "epoch": 2.25, "grad_norm": 0.6717092990875244, "learning_rate": 8.883279337183484e-05, "loss": 1.7355, "step": 67498 }, { "epoch": 2.25, "grad_norm": 0.6770217418670654, "learning_rate": 8.882536927477672e-05, "loss": 1.6803, "step": 67499 }, { "epoch": 2.25, "grad_norm": 0.6325024366378784, "learning_rate": 8.881794543405337e-05, "loss": 1.71, "step": 67500 }, { "epoch": 2.25, "grad_norm": 0.6752529144287109, "learning_rate": 8.881052184967401e-05, "loss": 1.714, "step": 67501 }, { "epoch": 2.25, "grad_norm": 0.6541352272033691, "learning_rate": 8.88030985216476e-05, "loss": 1.684, "step": 67502 }, { "epoch": 2.25, "grad_norm": 0.6566112041473389, "learning_rate": 8.879567544998298e-05, "loss": 1.6696, "step": 67503 }, { "epoch": 2.25, "grad_norm": 0.6561301350593567, "learning_rate": 8.878825263468939e-05, "loss": 1.7986, "step": 67504 }, { "epoch": 2.25, "grad_norm": 0.6499984264373779, "learning_rate": 8.878083007577563e-05, "loss": 1.6443, "step": 67505 }, { "epoch": 2.25, "grad_norm": 0.6563148498535156, "learning_rate": 8.877340777325093e-05, "loss": 1.6933, "step": 67506 }, { "epoch": 2.25, "grad_norm": 0.6438465714454651, "learning_rate": 8.87659857271241e-05, "loss": 1.6726, "step": 67507 }, { "epoch": 2.25, "grad_norm": 0.6515691876411438, "learning_rate": 8.875856393740434e-05, "loss": 1.7077, "step": 67508 }, { "epoch": 2.25, "grad_norm": 0.65030437707901, "learning_rate": 8.875114240410053e-05, "loss": 1.6737, "step": 67509 }, { "epoch": 2.25, "grad_norm": 0.670103907585144, "learning_rate": 8.874372112722163e-05, "loss": 1.7759, "step": 67510 }, { "epoch": 2.25, "grad_norm": 0.6537710428237915, "learning_rate": 8.873630010677684e-05, "loss": 1.6532, "step": 67511 }, { "epoch": 2.25, "grad_norm": 0.6789921522140503, "learning_rate": 8.872887934277493e-05, "loss": 1.7728, "step": 67512 }, { "epoch": 2.25, "grad_norm": 0.6537970900535583, "learning_rate": 8.872145883522514e-05, "loss": 1.7068, "step": 67513 }, { "epoch": 2.25, "grad_norm": 0.6677110195159912, "learning_rate": 8.871403858413637e-05, "loss": 1.7012, "step": 67514 }, { "epoch": 2.25, "grad_norm": 0.6449652910232544, "learning_rate": 8.870661858951753e-05, "loss": 1.678, "step": 67515 }, { "epoch": 2.25, "grad_norm": 0.6404314637184143, "learning_rate": 8.86991988513778e-05, "loss": 1.6268, "step": 67516 }, { "epoch": 2.25, "grad_norm": 0.6648371815681458, "learning_rate": 8.869177936972605e-05, "loss": 1.7099, "step": 67517 }, { "epoch": 2.25, "grad_norm": 0.6711599826812744, "learning_rate": 8.868436014457141e-05, "loss": 1.6838, "step": 67518 }, { "epoch": 2.25, "grad_norm": 0.6663622856140137, "learning_rate": 8.867694117592272e-05, "loss": 1.729, "step": 67519 }, { "epoch": 2.25, "grad_norm": 0.6334539651870728, "learning_rate": 8.86695224637892e-05, "loss": 1.6593, "step": 67520 }, { "epoch": 2.25, "grad_norm": 0.6813942193984985, "learning_rate": 8.866210400817974e-05, "loss": 1.6965, "step": 67521 }, { "epoch": 2.25, "grad_norm": 0.6766799092292786, "learning_rate": 8.865468580910325e-05, "loss": 1.6974, "step": 67522 }, { "epoch": 2.25, "grad_norm": 0.6604564189910889, "learning_rate": 8.864726786656893e-05, "loss": 1.7821, "step": 67523 }, { "epoch": 2.25, "grad_norm": 0.6572933793067932, "learning_rate": 8.863985018058568e-05, "loss": 1.6751, "step": 67524 }, { "epoch": 2.25, "grad_norm": 0.6688015460968018, "learning_rate": 8.863243275116241e-05, "loss": 1.7215, "step": 67525 }, { "epoch": 2.25, "grad_norm": 0.6648604273796082, "learning_rate": 8.862501557830823e-05, "loss": 1.7006, "step": 67526 }, { "epoch": 2.25, "grad_norm": 0.6809027194976807, "learning_rate": 8.861759866203221e-05, "loss": 1.7548, "step": 67527 }, { "epoch": 2.25, "grad_norm": 0.6404210329055786, "learning_rate": 8.861018200234332e-05, "loss": 1.7524, "step": 67528 }, { "epoch": 2.25, "grad_norm": 0.6501778960227966, "learning_rate": 8.860276559925041e-05, "loss": 1.6718, "step": 67529 }, { "epoch": 2.25, "grad_norm": 0.6379556655883789, "learning_rate": 8.859534945276265e-05, "loss": 1.6536, "step": 67530 }, { "epoch": 2.25, "grad_norm": 0.6572752594947815, "learning_rate": 8.858793356288903e-05, "loss": 1.6998, "step": 67531 }, { "epoch": 2.25, "grad_norm": 0.663689136505127, "learning_rate": 8.858051792963841e-05, "loss": 1.7258, "step": 67532 }, { "epoch": 2.25, "grad_norm": 0.6829795241355896, "learning_rate": 8.857310255301989e-05, "loss": 1.7888, "step": 67533 }, { "epoch": 2.25, "grad_norm": 0.6831724047660828, "learning_rate": 8.856568743304265e-05, "loss": 1.6704, "step": 67534 }, { "epoch": 2.25, "grad_norm": 0.6855221390724182, "learning_rate": 8.855827256971532e-05, "loss": 1.7139, "step": 67535 }, { "epoch": 2.25, "grad_norm": 0.697510302066803, "learning_rate": 8.855085796304711e-05, "loss": 1.7356, "step": 67536 }, { "epoch": 2.25, "grad_norm": 0.6603670120239258, "learning_rate": 8.854344361304708e-05, "loss": 1.7113, "step": 67537 }, { "epoch": 2.25, "grad_norm": 0.6655805110931396, "learning_rate": 8.853602951972417e-05, "loss": 1.6752, "step": 67538 }, { "epoch": 2.25, "grad_norm": 0.6588432192802429, "learning_rate": 8.852861568308727e-05, "loss": 1.7069, "step": 67539 }, { "epoch": 2.25, "grad_norm": 0.7154786586761475, "learning_rate": 8.852120210314546e-05, "loss": 1.7279, "step": 67540 }, { "epoch": 2.25, "grad_norm": 0.6924679279327393, "learning_rate": 8.851378877990795e-05, "loss": 1.7286, "step": 67541 }, { "epoch": 2.25, "grad_norm": 0.6728090643882751, "learning_rate": 8.850637571338332e-05, "loss": 1.7322, "step": 67542 }, { "epoch": 2.25, "grad_norm": 0.6634790301322937, "learning_rate": 8.849896290358082e-05, "loss": 1.7245, "step": 67543 }, { "epoch": 2.25, "grad_norm": 0.666415810585022, "learning_rate": 8.849155035050947e-05, "loss": 1.7428, "step": 67544 }, { "epoch": 2.25, "grad_norm": 0.6652450561523438, "learning_rate": 8.848413805417825e-05, "loss": 1.6939, "step": 67545 }, { "epoch": 2.25, "grad_norm": 0.6507782340049744, "learning_rate": 8.847672601459603e-05, "loss": 1.7327, "step": 67546 }, { "epoch": 2.25, "grad_norm": 0.6874887943267822, "learning_rate": 8.84693142317719e-05, "loss": 1.7845, "step": 67547 }, { "epoch": 2.25, "grad_norm": 0.6599017381668091, "learning_rate": 8.846190270571502e-05, "loss": 1.6763, "step": 67548 }, { "epoch": 2.25, "grad_norm": 0.6720980405807495, "learning_rate": 8.845449143643404e-05, "loss": 1.643, "step": 67549 }, { "epoch": 2.25, "grad_norm": 0.6553157567977905, "learning_rate": 8.844708042393817e-05, "loss": 1.7194, "step": 67550 }, { "epoch": 2.25, "grad_norm": 0.6543638706207275, "learning_rate": 8.843966966823643e-05, "loss": 1.7645, "step": 67551 }, { "epoch": 2.25, "grad_norm": 0.6701517701148987, "learning_rate": 8.843225916933775e-05, "loss": 1.7197, "step": 67552 }, { "epoch": 2.25, "grad_norm": 0.6831341981887817, "learning_rate": 8.842484892725107e-05, "loss": 1.685, "step": 67553 }, { "epoch": 2.25, "grad_norm": 0.6548680663108826, "learning_rate": 8.841743894198552e-05, "loss": 1.7267, "step": 67554 }, { "epoch": 2.25, "grad_norm": 0.6521584987640381, "learning_rate": 8.841002921355007e-05, "loss": 1.6946, "step": 67555 }, { "epoch": 2.25, "grad_norm": 0.6579105257987976, "learning_rate": 8.840261974195351e-05, "loss": 1.6749, "step": 67556 }, { "epoch": 2.25, "grad_norm": 0.6717765927314758, "learning_rate": 8.839521052720501e-05, "loss": 1.6724, "step": 67557 }, { "epoch": 2.25, "grad_norm": 0.6653682589530945, "learning_rate": 8.838780156931366e-05, "loss": 1.732, "step": 67558 }, { "epoch": 2.25, "grad_norm": 0.6739997267723083, "learning_rate": 8.838039286828835e-05, "loss": 1.7065, "step": 67559 }, { "epoch": 2.25, "grad_norm": 0.6592697501182556, "learning_rate": 8.837298442413794e-05, "loss": 1.7501, "step": 67560 }, { "epoch": 2.25, "grad_norm": 0.6494042873382568, "learning_rate": 8.836557623687164e-05, "loss": 1.6561, "step": 67561 }, { "epoch": 2.25, "grad_norm": 0.6731928586959839, "learning_rate": 8.835816830649837e-05, "loss": 1.6864, "step": 67562 }, { "epoch": 2.25, "grad_norm": 0.7626650929450989, "learning_rate": 8.835076063302699e-05, "loss": 1.6696, "step": 67563 }, { "epoch": 2.25, "grad_norm": 0.695189356803894, "learning_rate": 8.834335321646669e-05, "loss": 1.8109, "step": 67564 }, { "epoch": 2.25, "grad_norm": 0.661558210849762, "learning_rate": 8.83359460568263e-05, "loss": 1.7475, "step": 67565 }, { "epoch": 2.25, "grad_norm": 0.672299861907959, "learning_rate": 8.832853915411496e-05, "loss": 1.6456, "step": 67566 }, { "epoch": 2.25, "grad_norm": 0.6464542150497437, "learning_rate": 8.83211325083415e-05, "loss": 1.6799, "step": 67567 }, { "epoch": 2.25, "grad_norm": 0.6498428583145142, "learning_rate": 8.83137261195151e-05, "loss": 1.6844, "step": 67568 }, { "epoch": 2.25, "grad_norm": 0.6804976463317871, "learning_rate": 8.830631998764461e-05, "loss": 1.7118, "step": 67569 }, { "epoch": 2.25, "grad_norm": 0.6527295112609863, "learning_rate": 8.829891411273898e-05, "loss": 1.6313, "step": 67570 }, { "epoch": 2.25, "grad_norm": 0.6754300594329834, "learning_rate": 8.829150849480736e-05, "loss": 1.7194, "step": 67571 }, { "epoch": 2.25, "grad_norm": 0.6667048931121826, "learning_rate": 8.828410313385861e-05, "loss": 1.7877, "step": 67572 }, { "epoch": 2.25, "grad_norm": 0.6540094614028931, "learning_rate": 8.827669802990182e-05, "loss": 1.6901, "step": 67573 }, { "epoch": 2.25, "grad_norm": 0.6416999101638794, "learning_rate": 8.826929318294592e-05, "loss": 1.7218, "step": 67574 }, { "epoch": 2.25, "grad_norm": 0.658463180065155, "learning_rate": 8.826188859299979e-05, "loss": 1.6519, "step": 67575 }, { "epoch": 2.25, "grad_norm": 0.6603708267211914, "learning_rate": 8.825448426007266e-05, "loss": 1.6523, "step": 67576 }, { "epoch": 2.25, "grad_norm": 0.6569890975952148, "learning_rate": 8.824708018417328e-05, "loss": 1.7171, "step": 67577 }, { "epoch": 2.25, "grad_norm": 0.6534214019775391, "learning_rate": 8.823967636531083e-05, "loss": 1.6535, "step": 67578 }, { "epoch": 2.25, "grad_norm": 0.6793627738952637, "learning_rate": 8.823227280349411e-05, "loss": 1.732, "step": 67579 }, { "epoch": 2.25, "grad_norm": 0.6763233542442322, "learning_rate": 8.822486949873235e-05, "loss": 1.6466, "step": 67580 }, { "epoch": 2.25, "grad_norm": 0.6846471428871155, "learning_rate": 8.821746645103435e-05, "loss": 1.7022, "step": 67581 }, { "epoch": 2.25, "grad_norm": 0.6639742851257324, "learning_rate": 8.821006366040904e-05, "loss": 1.741, "step": 67582 }, { "epoch": 2.25, "grad_norm": 0.6738713383674622, "learning_rate": 8.820266112686564e-05, "loss": 1.6999, "step": 67583 }, { "epoch": 2.25, "grad_norm": 0.6228777766227722, "learning_rate": 8.819525885041298e-05, "loss": 1.6847, "step": 67584 }, { "epoch": 2.25, "grad_norm": 0.6746460199356079, "learning_rate": 8.818785683105996e-05, "loss": 1.7275, "step": 67585 }, { "epoch": 2.25, "grad_norm": 0.6570313572883606, "learning_rate": 8.81804550688157e-05, "loss": 1.7152, "step": 67586 }, { "epoch": 2.25, "grad_norm": 0.6840406656265259, "learning_rate": 8.817305356368925e-05, "loss": 1.7374, "step": 67587 }, { "epoch": 2.25, "grad_norm": 0.6753702163696289, "learning_rate": 8.816565231568947e-05, "loss": 1.7464, "step": 67588 }, { "epoch": 2.25, "grad_norm": 0.655777633190155, "learning_rate": 8.815825132482531e-05, "loss": 1.7209, "step": 67589 }, { "epoch": 2.25, "grad_norm": 0.6541397571563721, "learning_rate": 8.815085059110589e-05, "loss": 1.7239, "step": 67590 }, { "epoch": 2.25, "grad_norm": 0.6656556129455566, "learning_rate": 8.814345011454014e-05, "loss": 1.7456, "step": 67591 }, { "epoch": 2.25, "grad_norm": 0.6412654519081116, "learning_rate": 8.813604989513694e-05, "loss": 1.6086, "step": 67592 }, { "epoch": 2.25, "grad_norm": 0.6503221392631531, "learning_rate": 8.812864993290535e-05, "loss": 1.6211, "step": 67593 }, { "epoch": 2.25, "grad_norm": 0.6515571475028992, "learning_rate": 8.812125022785446e-05, "loss": 1.7332, "step": 67594 }, { "epoch": 2.25, "grad_norm": 0.6717115044593811, "learning_rate": 8.811385077999316e-05, "loss": 1.7138, "step": 67595 }, { "epoch": 2.25, "grad_norm": 0.6556624174118042, "learning_rate": 8.810645158933032e-05, "loss": 1.6648, "step": 67596 }, { "epoch": 2.25, "grad_norm": 0.6673696041107178, "learning_rate": 8.80990526558751e-05, "loss": 1.7113, "step": 67597 }, { "epoch": 2.25, "grad_norm": 0.6640153527259827, "learning_rate": 8.809165397963643e-05, "loss": 1.6993, "step": 67598 }, { "epoch": 2.25, "grad_norm": 0.6630139946937561, "learning_rate": 8.808425556062318e-05, "loss": 1.5845, "step": 67599 }, { "epoch": 2.25, "grad_norm": 0.6817341446876526, "learning_rate": 8.807685739884442e-05, "loss": 1.7251, "step": 67600 }, { "epoch": 2.25, "grad_norm": 0.6812131404876709, "learning_rate": 8.806945949430922e-05, "loss": 1.6823, "step": 67601 }, { "epoch": 2.25, "grad_norm": 0.6593843698501587, "learning_rate": 8.806206184702647e-05, "loss": 1.708, "step": 67602 }, { "epoch": 2.25, "grad_norm": 0.64113849401474, "learning_rate": 8.805466445700504e-05, "loss": 1.5937, "step": 67603 }, { "epoch": 2.25, "grad_norm": 0.6806064248085022, "learning_rate": 8.80472673242541e-05, "loss": 1.6868, "step": 67604 }, { "epoch": 2.25, "grad_norm": 0.6867426037788391, "learning_rate": 8.803987044878261e-05, "loss": 1.6634, "step": 67605 }, { "epoch": 2.25, "grad_norm": 0.6801446080207825, "learning_rate": 8.803247383059932e-05, "loss": 1.7065, "step": 67606 }, { "epoch": 2.25, "grad_norm": 0.6908642053604126, "learning_rate": 8.802507746971344e-05, "loss": 1.8141, "step": 67607 }, { "epoch": 2.25, "grad_norm": 0.6933754086494446, "learning_rate": 8.801768136613392e-05, "loss": 1.7467, "step": 67608 }, { "epoch": 2.25, "grad_norm": 0.6492396593093872, "learning_rate": 8.801028551986974e-05, "loss": 1.7285, "step": 67609 }, { "epoch": 2.25, "grad_norm": 0.6331712603569031, "learning_rate": 8.800288993092972e-05, "loss": 1.6674, "step": 67610 }, { "epoch": 2.25, "grad_norm": 0.6446406245231628, "learning_rate": 8.799549459932306e-05, "loss": 1.7112, "step": 67611 }, { "epoch": 2.25, "grad_norm": 0.6665260195732117, "learning_rate": 8.798809952505864e-05, "loss": 1.7329, "step": 67612 }, { "epoch": 2.25, "grad_norm": 0.661720335483551, "learning_rate": 8.798070470814533e-05, "loss": 1.6787, "step": 67613 }, { "epoch": 2.25, "grad_norm": 0.6623813509941101, "learning_rate": 8.797331014859219e-05, "loss": 1.7056, "step": 67614 }, { "epoch": 2.25, "grad_norm": 0.6693978309631348, "learning_rate": 8.79659158464083e-05, "loss": 1.7537, "step": 67615 }, { "epoch": 2.25, "grad_norm": 0.649298369884491, "learning_rate": 8.795852180160258e-05, "loss": 1.7509, "step": 67616 }, { "epoch": 2.25, "grad_norm": 0.6465483903884888, "learning_rate": 8.795112801418386e-05, "loss": 1.7399, "step": 67617 }, { "epoch": 2.25, "grad_norm": 0.67591792345047, "learning_rate": 8.794373448416131e-05, "loss": 1.7199, "step": 67618 }, { "epoch": 2.25, "grad_norm": 0.6518844962120056, "learning_rate": 8.793634121154386e-05, "loss": 1.6759, "step": 67619 }, { "epoch": 2.25, "grad_norm": 0.6782634258270264, "learning_rate": 8.79289481963403e-05, "loss": 1.6919, "step": 67620 }, { "epoch": 2.25, "grad_norm": 0.6749255061149597, "learning_rate": 8.792155543855989e-05, "loss": 1.6853, "step": 67621 }, { "epoch": 2.25, "grad_norm": 0.6645480990409851, "learning_rate": 8.791416293821136e-05, "loss": 1.6919, "step": 67622 }, { "epoch": 2.25, "grad_norm": 0.6778430938720703, "learning_rate": 8.790677069530385e-05, "loss": 1.6691, "step": 67623 }, { "epoch": 2.25, "grad_norm": 0.6476856470108032, "learning_rate": 8.789937870984618e-05, "loss": 1.6838, "step": 67624 }, { "epoch": 2.25, "grad_norm": 0.6514532566070557, "learning_rate": 8.789198698184753e-05, "loss": 1.7509, "step": 67625 }, { "epoch": 2.25, "grad_norm": 0.6476175785064697, "learning_rate": 8.788459551131676e-05, "loss": 1.6993, "step": 67626 }, { "epoch": 2.25, "grad_norm": 0.6325324773788452, "learning_rate": 8.787720429826273e-05, "loss": 1.661, "step": 67627 }, { "epoch": 2.25, "grad_norm": 0.662686288356781, "learning_rate": 8.78698133426946e-05, "loss": 1.7441, "step": 67628 }, { "epoch": 2.25, "grad_norm": 0.6696124076843262, "learning_rate": 8.786242264462118e-05, "loss": 1.6086, "step": 67629 }, { "epoch": 2.25, "grad_norm": 0.6914870142936707, "learning_rate": 8.78550322040516e-05, "loss": 1.6908, "step": 67630 }, { "epoch": 2.25, "grad_norm": 0.6580824255943298, "learning_rate": 8.784764202099479e-05, "loss": 1.7224, "step": 67631 }, { "epoch": 2.25, "grad_norm": 0.6445720791816711, "learning_rate": 8.784025209545955e-05, "loss": 1.6876, "step": 67632 }, { "epoch": 2.25, "grad_norm": 0.6959875822067261, "learning_rate": 8.78328624274551e-05, "loss": 1.6744, "step": 67633 }, { "epoch": 2.25, "grad_norm": 0.6669769287109375, "learning_rate": 8.78254730169902e-05, "loss": 1.6848, "step": 67634 }, { "epoch": 2.25, "grad_norm": 0.6756772398948669, "learning_rate": 8.781808386407399e-05, "loss": 1.597, "step": 67635 }, { "epoch": 2.25, "grad_norm": 0.6908811330795288, "learning_rate": 8.78106949687153e-05, "loss": 1.6812, "step": 67636 }, { "epoch": 2.25, "grad_norm": 0.6761718988418579, "learning_rate": 8.780330633092323e-05, "loss": 1.723, "step": 67637 }, { "epoch": 2.25, "grad_norm": 0.6601964235305786, "learning_rate": 8.77959179507067e-05, "loss": 1.7247, "step": 67638 }, { "epoch": 2.25, "grad_norm": 0.6901352405548096, "learning_rate": 8.778852982807451e-05, "loss": 1.7088, "step": 67639 }, { "epoch": 2.25, "grad_norm": 0.6578130125999451, "learning_rate": 8.778114196303592e-05, "loss": 1.6261, "step": 67640 }, { "epoch": 2.25, "grad_norm": 0.6980199217796326, "learning_rate": 8.777375435559976e-05, "loss": 1.7294, "step": 67641 }, { "epoch": 2.25, "grad_norm": 0.6868962049484253, "learning_rate": 8.776636700577489e-05, "loss": 1.7286, "step": 67642 }, { "epoch": 2.25, "grad_norm": 0.6610652208328247, "learning_rate": 8.775897991357035e-05, "loss": 1.6727, "step": 67643 }, { "epoch": 2.25, "grad_norm": 0.663812518119812, "learning_rate": 8.775159307899528e-05, "loss": 1.7202, "step": 67644 }, { "epoch": 2.25, "grad_norm": 0.653724730014801, "learning_rate": 8.774420650205847e-05, "loss": 1.7273, "step": 67645 }, { "epoch": 2.25, "grad_norm": 0.6498996615409851, "learning_rate": 8.773682018276883e-05, "loss": 1.6708, "step": 67646 }, { "epoch": 2.25, "grad_norm": 0.6613936424255371, "learning_rate": 8.772943412113552e-05, "loss": 1.671, "step": 67647 }, { "epoch": 2.25, "grad_norm": 0.8048550486564636, "learning_rate": 8.772204831716738e-05, "loss": 1.6821, "step": 67648 }, { "epoch": 2.25, "grad_norm": 0.6452651023864746, "learning_rate": 8.771466277087333e-05, "loss": 1.6331, "step": 67649 }, { "epoch": 2.25, "grad_norm": 0.6660327911376953, "learning_rate": 8.770727748226237e-05, "loss": 1.58, "step": 67650 }, { "epoch": 2.25, "grad_norm": 0.6589877605438232, "learning_rate": 8.769989245134372e-05, "loss": 1.7831, "step": 67651 }, { "epoch": 2.25, "grad_norm": 0.6475326418876648, "learning_rate": 8.769250767812588e-05, "loss": 1.7096, "step": 67652 }, { "epoch": 2.25, "grad_norm": 1.4290926456451416, "learning_rate": 8.76851231626181e-05, "loss": 1.759, "step": 67653 }, { "epoch": 2.25, "grad_norm": 0.6490217447280884, "learning_rate": 8.767773890482937e-05, "loss": 1.7093, "step": 67654 }, { "epoch": 2.25, "grad_norm": 0.6694661378860474, "learning_rate": 8.767035490476857e-05, "loss": 1.7447, "step": 67655 }, { "epoch": 2.25, "grad_norm": 0.6743045449256897, "learning_rate": 8.76629711624446e-05, "loss": 1.7064, "step": 67656 }, { "epoch": 2.25, "grad_norm": 0.6582611203193665, "learning_rate": 8.765558767786651e-05, "loss": 1.7265, "step": 67657 }, { "epoch": 2.25, "grad_norm": 0.6807824373245239, "learning_rate": 8.764820445104341e-05, "loss": 1.7811, "step": 67658 }, { "epoch": 2.25, "grad_norm": 0.6337264180183411, "learning_rate": 8.76408214819839e-05, "loss": 1.7401, "step": 67659 }, { "epoch": 2.25, "grad_norm": 0.6553393006324768, "learning_rate": 8.763343877069716e-05, "loss": 1.6965, "step": 67660 }, { "epoch": 2.25, "grad_norm": 0.6497437357902527, "learning_rate": 8.762605631719225e-05, "loss": 1.7473, "step": 67661 }, { "epoch": 2.25, "grad_norm": 0.6474480032920837, "learning_rate": 8.7618674121478e-05, "loss": 1.6993, "step": 67662 }, { "epoch": 2.25, "grad_norm": 0.6560683250427246, "learning_rate": 8.761129218356329e-05, "loss": 1.6583, "step": 67663 }, { "epoch": 2.25, "grad_norm": 0.6392645835876465, "learning_rate": 8.760391050345718e-05, "loss": 1.706, "step": 67664 }, { "epoch": 2.25, "grad_norm": 0.6547321081161499, "learning_rate": 8.759652908116882e-05, "loss": 1.7275, "step": 67665 }, { "epoch": 2.25, "grad_norm": 0.6631388664245605, "learning_rate": 8.758914791670679e-05, "loss": 1.6896, "step": 67666 }, { "epoch": 2.25, "grad_norm": 0.68283611536026, "learning_rate": 8.758176701008019e-05, "loss": 1.6641, "step": 67667 }, { "epoch": 2.25, "grad_norm": 0.6509641408920288, "learning_rate": 8.757438636129819e-05, "loss": 1.7818, "step": 67668 }, { "epoch": 2.25, "grad_norm": 0.6608383655548096, "learning_rate": 8.756700597036955e-05, "loss": 1.7168, "step": 67669 }, { "epoch": 2.25, "grad_norm": 0.649750292301178, "learning_rate": 8.755962583730318e-05, "loss": 1.7459, "step": 67670 }, { "epoch": 2.25, "grad_norm": 0.670365035533905, "learning_rate": 8.75522459621082e-05, "loss": 1.7113, "step": 67671 }, { "epoch": 2.25, "grad_norm": 0.6978457570075989, "learning_rate": 8.754486634479351e-05, "loss": 1.6634, "step": 67672 }, { "epoch": 2.25, "grad_norm": 0.6808773875236511, "learning_rate": 8.753748698536795e-05, "loss": 1.6965, "step": 67673 }, { "epoch": 2.25, "grad_norm": 0.6615576148033142, "learning_rate": 8.753010788384057e-05, "loss": 1.6831, "step": 67674 }, { "epoch": 2.25, "grad_norm": 0.6574928164482117, "learning_rate": 8.752272904022045e-05, "loss": 1.7561, "step": 67675 }, { "epoch": 2.25, "grad_norm": 0.6461440920829773, "learning_rate": 8.751535045451639e-05, "loss": 1.6981, "step": 67676 }, { "epoch": 2.25, "grad_norm": 0.6509563326835632, "learning_rate": 8.750797212673733e-05, "loss": 1.7736, "step": 67677 }, { "epoch": 2.25, "grad_norm": 0.6834269165992737, "learning_rate": 8.750059405689235e-05, "loss": 1.7454, "step": 67678 }, { "epoch": 2.25, "grad_norm": 0.6428908109664917, "learning_rate": 8.749321624499035e-05, "loss": 1.8078, "step": 67679 }, { "epoch": 2.25, "grad_norm": 0.6430286169052124, "learning_rate": 8.748583869104017e-05, "loss": 1.6804, "step": 67680 }, { "epoch": 2.25, "grad_norm": 0.6525773406028748, "learning_rate": 8.747846139505097e-05, "loss": 1.6637, "step": 67681 }, { "epoch": 2.25, "grad_norm": 0.6737834811210632, "learning_rate": 8.747108435703152e-05, "loss": 1.7311, "step": 67682 }, { "epoch": 2.25, "grad_norm": 0.6754626631736755, "learning_rate": 8.746370757699094e-05, "loss": 1.7738, "step": 67683 }, { "epoch": 2.25, "grad_norm": 0.6468178629875183, "learning_rate": 8.745633105493802e-05, "loss": 1.7237, "step": 67684 }, { "epoch": 2.25, "grad_norm": 0.6810010075569153, "learning_rate": 8.74489547908819e-05, "loss": 1.787, "step": 67685 }, { "epoch": 2.25, "grad_norm": 0.6650921702384949, "learning_rate": 8.744157878483143e-05, "loss": 1.7619, "step": 67686 }, { "epoch": 2.25, "grad_norm": 0.6462055444717407, "learning_rate": 8.743420303679543e-05, "loss": 1.7137, "step": 67687 }, { "epoch": 2.25, "grad_norm": 0.6689923405647278, "learning_rate": 8.742682754678313e-05, "loss": 1.7363, "step": 67688 }, { "epoch": 2.25, "grad_norm": 0.6593518853187561, "learning_rate": 8.741945231480319e-05, "loss": 1.7259, "step": 67689 }, { "epoch": 2.25, "grad_norm": 0.6860777735710144, "learning_rate": 8.741207734086486e-05, "loss": 1.6769, "step": 67690 }, { "epoch": 2.25, "grad_norm": 0.6622947454452515, "learning_rate": 8.740470262497692e-05, "loss": 1.7969, "step": 67691 }, { "epoch": 2.25, "grad_norm": 0.6418219804763794, "learning_rate": 8.739732816714826e-05, "loss": 1.7255, "step": 67692 }, { "epoch": 2.25, "grad_norm": 0.6518304347991943, "learning_rate": 8.738995396738801e-05, "loss": 1.6459, "step": 67693 }, { "epoch": 2.25, "grad_norm": 0.6508644819259644, "learning_rate": 8.738258002570494e-05, "loss": 1.6702, "step": 67694 }, { "epoch": 2.25, "grad_norm": 0.6451919674873352, "learning_rate": 8.737520634210819e-05, "loss": 1.6534, "step": 67695 }, { "epoch": 2.25, "grad_norm": 0.6608737111091614, "learning_rate": 8.73678329166065e-05, "loss": 1.6619, "step": 67696 }, { "epoch": 2.25, "grad_norm": 0.6692385673522949, "learning_rate": 8.736045974920903e-05, "loss": 1.7434, "step": 67697 }, { "epoch": 2.25, "grad_norm": 0.6544137001037598, "learning_rate": 8.735308683992463e-05, "loss": 1.7238, "step": 67698 }, { "epoch": 2.25, "grad_norm": 0.63763028383255, "learning_rate": 8.734571418876216e-05, "loss": 1.707, "step": 67699 }, { "epoch": 2.25, "grad_norm": 0.659697413444519, "learning_rate": 8.733834179573077e-05, "loss": 1.7042, "step": 67700 }, { "epoch": 2.25, "grad_norm": 0.6651282906532288, "learning_rate": 8.733096966083926e-05, "loss": 1.6701, "step": 67701 }, { "epoch": 2.25, "grad_norm": 0.6486502885818481, "learning_rate": 8.732359778409655e-05, "loss": 1.7439, "step": 67702 }, { "epoch": 2.25, "grad_norm": 0.6716213822364807, "learning_rate": 8.731622616551165e-05, "loss": 1.7291, "step": 67703 }, { "epoch": 2.25, "grad_norm": 0.654316782951355, "learning_rate": 8.730885480509364e-05, "loss": 1.7419, "step": 67704 }, { "epoch": 2.25, "grad_norm": 0.6567128300666809, "learning_rate": 8.730148370285129e-05, "loss": 1.7277, "step": 67705 }, { "epoch": 2.25, "grad_norm": 0.6575873494148254, "learning_rate": 8.729411285879354e-05, "loss": 1.6995, "step": 67706 }, { "epoch": 2.25, "grad_norm": 0.6718683838844299, "learning_rate": 8.728674227292947e-05, "loss": 1.7271, "step": 67707 }, { "epoch": 2.25, "grad_norm": 0.6406164169311523, "learning_rate": 8.727937194526795e-05, "loss": 1.6759, "step": 67708 }, { "epoch": 2.25, "grad_norm": 0.6583399176597595, "learning_rate": 8.727200187581785e-05, "loss": 1.6668, "step": 67709 }, { "epoch": 2.25, "grad_norm": 0.6804593205451965, "learning_rate": 8.72646320645882e-05, "loss": 1.6827, "step": 67710 }, { "epoch": 2.25, "grad_norm": 0.654200553894043, "learning_rate": 8.725726251158802e-05, "loss": 1.6514, "step": 67711 }, { "epoch": 2.25, "grad_norm": 0.6503140330314636, "learning_rate": 8.724989321682618e-05, "loss": 1.6416, "step": 67712 }, { "epoch": 2.25, "grad_norm": 0.657659649848938, "learning_rate": 8.724252418031154e-05, "loss": 1.6513, "step": 67713 }, { "epoch": 2.25, "grad_norm": 0.6529702544212341, "learning_rate": 8.72351554020532e-05, "loss": 1.7211, "step": 67714 }, { "epoch": 2.25, "grad_norm": 0.6599230766296387, "learning_rate": 8.722778688206004e-05, "loss": 1.7794, "step": 67715 }, { "epoch": 2.25, "grad_norm": 0.6470601558685303, "learning_rate": 8.72204186203409e-05, "loss": 1.6534, "step": 67716 }, { "epoch": 2.25, "grad_norm": 0.6697881817817688, "learning_rate": 8.721305061690484e-05, "loss": 1.7212, "step": 67717 }, { "epoch": 2.25, "grad_norm": 0.6384996771812439, "learning_rate": 8.720568287176089e-05, "loss": 1.7013, "step": 67718 }, { "epoch": 2.25, "grad_norm": 0.6435303092002869, "learning_rate": 8.719831538491785e-05, "loss": 1.6773, "step": 67719 }, { "epoch": 2.25, "grad_norm": 0.6540141701698303, "learning_rate": 8.719094815638462e-05, "loss": 1.6781, "step": 67720 }, { "epoch": 2.25, "grad_norm": 0.7347079515457153, "learning_rate": 8.71835811861703e-05, "loss": 1.7544, "step": 67721 }, { "epoch": 2.25, "grad_norm": 0.6600019335746765, "learning_rate": 8.717621447428379e-05, "loss": 1.6349, "step": 67722 }, { "epoch": 2.25, "grad_norm": 0.6409026980400085, "learning_rate": 8.716884802073388e-05, "loss": 1.6791, "step": 67723 }, { "epoch": 2.25, "grad_norm": 0.6585866212844849, "learning_rate": 8.716148182552963e-05, "loss": 1.6237, "step": 67724 }, { "epoch": 2.25, "grad_norm": 0.6833605766296387, "learning_rate": 8.715411588868007e-05, "loss": 1.6927, "step": 67725 }, { "epoch": 2.25, "grad_norm": 0.6583793759346008, "learning_rate": 8.714675021019407e-05, "loss": 1.6703, "step": 67726 }, { "epoch": 2.25, "grad_norm": 0.6521756052970886, "learning_rate": 8.713938479008045e-05, "loss": 1.651, "step": 67727 }, { "epoch": 2.25, "grad_norm": 0.6713514924049377, "learning_rate": 8.713201962834835e-05, "loss": 1.734, "step": 67728 }, { "epoch": 2.25, "grad_norm": 0.646975040435791, "learning_rate": 8.712465472500659e-05, "loss": 1.6547, "step": 67729 }, { "epoch": 2.25, "grad_norm": 0.6892087459564209, "learning_rate": 8.711729008006407e-05, "loss": 1.7372, "step": 67730 }, { "epoch": 2.25, "grad_norm": 0.642726480960846, "learning_rate": 8.710992569352979e-05, "loss": 1.7546, "step": 67731 }, { "epoch": 2.25, "grad_norm": 0.6603137850761414, "learning_rate": 8.710256156541278e-05, "loss": 1.6659, "step": 67732 }, { "epoch": 2.25, "grad_norm": 0.6575596928596497, "learning_rate": 8.709519769572189e-05, "loss": 1.7028, "step": 67733 }, { "epoch": 2.25, "grad_norm": 0.6941921710968018, "learning_rate": 8.708783408446597e-05, "loss": 1.7536, "step": 67734 }, { "epoch": 2.25, "grad_norm": 0.630831241607666, "learning_rate": 8.708047073165412e-05, "loss": 1.6643, "step": 67735 }, { "epoch": 2.25, "grad_norm": 0.6990235447883606, "learning_rate": 8.707310763729522e-05, "loss": 1.7299, "step": 67736 }, { "epoch": 2.25, "grad_norm": 0.6415250301361084, "learning_rate": 8.706574480139812e-05, "loss": 1.6794, "step": 67737 }, { "epoch": 2.25, "grad_norm": 0.6590680480003357, "learning_rate": 8.705838222397192e-05, "loss": 1.7088, "step": 67738 }, { "epoch": 2.25, "grad_norm": 0.6512815356254578, "learning_rate": 8.705101990502536e-05, "loss": 1.8009, "step": 67739 }, { "epoch": 2.25, "grad_norm": 0.6779231429100037, "learning_rate": 8.704365784456757e-05, "loss": 1.6945, "step": 67740 }, { "epoch": 2.25, "grad_norm": 0.6612052321434021, "learning_rate": 8.703629604260734e-05, "loss": 1.7547, "step": 67741 }, { "epoch": 2.25, "grad_norm": 0.6537585854530334, "learning_rate": 8.702893449915374e-05, "loss": 1.7033, "step": 67742 }, { "epoch": 2.25, "grad_norm": 0.6540617346763611, "learning_rate": 8.702157321421563e-05, "loss": 1.7481, "step": 67743 }, { "epoch": 2.25, "grad_norm": 0.6650224924087524, "learning_rate": 8.701421218780185e-05, "loss": 1.6934, "step": 67744 }, { "epoch": 2.25, "grad_norm": 0.6625133156776428, "learning_rate": 8.700685141992157e-05, "loss": 1.6434, "step": 67745 }, { "epoch": 2.25, "grad_norm": 0.6692245006561279, "learning_rate": 8.699949091058345e-05, "loss": 1.7223, "step": 67746 }, { "epoch": 2.25, "grad_norm": 0.6493300795555115, "learning_rate": 8.69921306597967e-05, "loss": 1.6553, "step": 67747 }, { "epoch": 2.25, "grad_norm": 0.6763388514518738, "learning_rate": 8.698477066757009e-05, "loss": 1.7233, "step": 67748 }, { "epoch": 2.25, "grad_norm": 0.6717395782470703, "learning_rate": 8.697741093391249e-05, "loss": 1.7028, "step": 67749 }, { "epoch": 2.25, "grad_norm": 0.6889677047729492, "learning_rate": 8.6970051458833e-05, "loss": 1.7053, "step": 67750 }, { "epoch": 2.25, "grad_norm": 0.6444013714790344, "learning_rate": 8.696269224234041e-05, "loss": 1.6629, "step": 67751 }, { "epoch": 2.25, "grad_norm": 0.6591476798057556, "learning_rate": 8.695533328444382e-05, "loss": 1.7191, "step": 67752 }, { "epoch": 2.25, "grad_norm": 0.6645468473434448, "learning_rate": 8.694797458515197e-05, "loss": 1.7042, "step": 67753 }, { "epoch": 2.25, "grad_norm": 0.6360172033309937, "learning_rate": 8.694061614447395e-05, "loss": 1.7023, "step": 67754 }, { "epoch": 2.25, "grad_norm": 0.6734024882316589, "learning_rate": 8.693325796241865e-05, "loss": 1.6696, "step": 67755 }, { "epoch": 2.25, "grad_norm": 0.629728376865387, "learning_rate": 8.692590003899491e-05, "loss": 1.6925, "step": 67756 }, { "epoch": 2.25, "grad_norm": 0.6747012734413147, "learning_rate": 8.691854237421181e-05, "loss": 1.6751, "step": 67757 }, { "epoch": 2.25, "grad_norm": 0.6437209248542786, "learning_rate": 8.691118496807818e-05, "loss": 1.6966, "step": 67758 }, { "epoch": 2.25, "grad_norm": 0.6620343327522278, "learning_rate": 8.69038278206029e-05, "loss": 1.6576, "step": 67759 }, { "epoch": 2.25, "grad_norm": 0.6795786619186401, "learning_rate": 8.689647093179499e-05, "loss": 1.695, "step": 67760 }, { "epoch": 2.25, "grad_norm": 0.6533709168434143, "learning_rate": 8.688911430166345e-05, "loss": 1.7165, "step": 67761 }, { "epoch": 2.25, "grad_norm": 0.6721394658088684, "learning_rate": 8.68817579302171e-05, "loss": 1.7122, "step": 67762 }, { "epoch": 2.25, "grad_norm": 0.6468176245689392, "learning_rate": 8.687440181746481e-05, "loss": 1.7071, "step": 67763 }, { "epoch": 2.25, "grad_norm": 0.6803855299949646, "learning_rate": 8.686704596341571e-05, "loss": 1.6796, "step": 67764 }, { "epoch": 2.25, "grad_norm": 0.672691822052002, "learning_rate": 8.685969036807861e-05, "loss": 1.692, "step": 67765 }, { "epoch": 2.25, "grad_norm": 0.6359020471572876, "learning_rate": 8.685233503146235e-05, "loss": 1.608, "step": 67766 }, { "epoch": 2.25, "grad_norm": 0.6759123206138611, "learning_rate": 8.68449799535759e-05, "loss": 1.682, "step": 67767 }, { "epoch": 2.25, "grad_norm": 0.6670752763748169, "learning_rate": 8.683762513442846e-05, "loss": 1.6809, "step": 67768 }, { "epoch": 2.25, "grad_norm": 0.6799579858779907, "learning_rate": 8.683027057402858e-05, "loss": 1.6503, "step": 67769 }, { "epoch": 2.25, "grad_norm": 0.6427988409996033, "learning_rate": 8.68229162723853e-05, "loss": 1.653, "step": 67770 }, { "epoch": 2.25, "grad_norm": 0.6629639267921448, "learning_rate": 8.681556222950768e-05, "loss": 1.7414, "step": 67771 }, { "epoch": 2.25, "grad_norm": 0.6793753504753113, "learning_rate": 8.680820844540459e-05, "loss": 1.5499, "step": 67772 }, { "epoch": 2.25, "grad_norm": 0.6700473427772522, "learning_rate": 8.680085492008478e-05, "loss": 1.6787, "step": 67773 }, { "epoch": 2.25, "grad_norm": 0.6678780317306519, "learning_rate": 8.679350165355738e-05, "loss": 1.7027, "step": 67774 }, { "epoch": 2.25, "grad_norm": 0.6913321018218994, "learning_rate": 8.678614864583138e-05, "loss": 1.7588, "step": 67775 }, { "epoch": 2.25, "grad_norm": 0.6479504108428955, "learning_rate": 8.677879589691542e-05, "loss": 1.6368, "step": 67776 }, { "epoch": 2.25, "grad_norm": 0.642371654510498, "learning_rate": 8.67714434068186e-05, "loss": 1.6564, "step": 67777 }, { "epoch": 2.25, "grad_norm": 0.6705040335655212, "learning_rate": 8.67640911755499e-05, "loss": 1.6235, "step": 67778 }, { "epoch": 2.26, "grad_norm": 0.6530058979988098, "learning_rate": 8.675673920311818e-05, "loss": 1.6383, "step": 67779 }, { "epoch": 2.26, "grad_norm": 0.6865783333778381, "learning_rate": 8.674938748953229e-05, "loss": 1.7237, "step": 67780 }, { "epoch": 2.26, "grad_norm": 0.6640508770942688, "learning_rate": 8.674203603480119e-05, "loss": 1.6354, "step": 67781 }, { "epoch": 2.26, "grad_norm": 0.6544429659843445, "learning_rate": 8.673468483893406e-05, "loss": 1.6958, "step": 67782 }, { "epoch": 2.26, "grad_norm": 0.6513630747795105, "learning_rate": 8.672733390193938e-05, "loss": 1.7212, "step": 67783 }, { "epoch": 2.26, "grad_norm": 0.6955794095993042, "learning_rate": 8.671998322382628e-05, "loss": 1.7208, "step": 67784 }, { "epoch": 2.26, "grad_norm": 0.6772878766059875, "learning_rate": 8.671263280460384e-05, "loss": 1.6643, "step": 67785 }, { "epoch": 2.26, "grad_norm": 0.6952431797981262, "learning_rate": 8.670528264428079e-05, "loss": 1.7207, "step": 67786 }, { "epoch": 2.26, "grad_norm": 0.672152042388916, "learning_rate": 8.669793274286603e-05, "loss": 1.5846, "step": 67787 }, { "epoch": 2.26, "grad_norm": 0.635562002658844, "learning_rate": 8.669058310036863e-05, "loss": 1.7374, "step": 67788 }, { "epoch": 2.26, "grad_norm": 0.6514346599578857, "learning_rate": 8.668323371679744e-05, "loss": 1.729, "step": 67789 }, { "epoch": 2.26, "grad_norm": 0.6546377539634705, "learning_rate": 8.667588459216129e-05, "loss": 1.6364, "step": 67790 }, { "epoch": 2.26, "grad_norm": 0.6488440036773682, "learning_rate": 8.666853572646919e-05, "loss": 1.6399, "step": 67791 }, { "epoch": 2.26, "grad_norm": 0.6532530784606934, "learning_rate": 8.666118711973009e-05, "loss": 1.7076, "step": 67792 }, { "epoch": 2.26, "grad_norm": 0.6552013754844666, "learning_rate": 8.665383877195292e-05, "loss": 1.6544, "step": 67793 }, { "epoch": 2.26, "grad_norm": 0.673268735408783, "learning_rate": 8.664649068314647e-05, "loss": 1.6602, "step": 67794 }, { "epoch": 2.26, "grad_norm": 0.6739696860313416, "learning_rate": 8.663914285331984e-05, "loss": 1.6747, "step": 67795 }, { "epoch": 2.26, "grad_norm": 0.6348545551300049, "learning_rate": 8.663179528248181e-05, "loss": 1.6971, "step": 67796 }, { "epoch": 2.26, "grad_norm": 0.6473267078399658, "learning_rate": 8.662444797064128e-05, "loss": 1.7135, "step": 67797 }, { "epoch": 2.26, "grad_norm": 0.6543669700622559, "learning_rate": 8.661710091780734e-05, "loss": 1.7558, "step": 67798 }, { "epoch": 2.26, "grad_norm": 0.6751269698143005, "learning_rate": 8.660975412398869e-05, "loss": 1.6641, "step": 67799 }, { "epoch": 2.26, "grad_norm": 0.6626496911048889, "learning_rate": 8.660240758919446e-05, "loss": 1.7177, "step": 67800 }, { "epoch": 2.26, "grad_norm": 0.6653062105178833, "learning_rate": 8.659506131343334e-05, "loss": 1.6648, "step": 67801 }, { "epoch": 2.26, "grad_norm": 0.6681859493255615, "learning_rate": 8.65877152967145e-05, "loss": 1.6903, "step": 67802 }, { "epoch": 2.26, "grad_norm": 0.6405090093612671, "learning_rate": 8.658036953904672e-05, "loss": 1.7311, "step": 67803 }, { "epoch": 2.26, "grad_norm": 0.6724728941917419, "learning_rate": 8.657302404043881e-05, "loss": 1.716, "step": 67804 }, { "epoch": 2.26, "grad_norm": 0.6615179181098938, "learning_rate": 8.656567880089991e-05, "loss": 1.658, "step": 67805 }, { "epoch": 2.26, "grad_norm": 0.6720753908157349, "learning_rate": 8.655833382043873e-05, "loss": 1.7009, "step": 67806 }, { "epoch": 2.26, "grad_norm": 0.6442099213600159, "learning_rate": 8.655098909906437e-05, "loss": 1.6646, "step": 67807 }, { "epoch": 2.26, "grad_norm": 0.6493822336196899, "learning_rate": 8.65436446367857e-05, "loss": 1.7032, "step": 67808 }, { "epoch": 2.26, "grad_norm": 0.6799026131629944, "learning_rate": 8.653630043361145e-05, "loss": 1.6665, "step": 67809 }, { "epoch": 2.26, "grad_norm": 0.6655771732330322, "learning_rate": 8.652895648955081e-05, "loss": 1.7272, "step": 67810 }, { "epoch": 2.26, "grad_norm": 0.6845632791519165, "learning_rate": 8.652161280461245e-05, "loss": 1.7061, "step": 67811 }, { "epoch": 2.26, "grad_norm": 0.6552718877792358, "learning_rate": 8.651426937880548e-05, "loss": 1.7053, "step": 67812 }, { "epoch": 2.26, "grad_norm": 0.6728638410568237, "learning_rate": 8.650692621213864e-05, "loss": 1.7193, "step": 67813 }, { "epoch": 2.26, "grad_norm": 0.6793079972267151, "learning_rate": 8.649958330462106e-05, "loss": 1.7241, "step": 67814 }, { "epoch": 2.26, "grad_norm": 0.6691493988037109, "learning_rate": 8.649224065626149e-05, "loss": 1.6714, "step": 67815 }, { "epoch": 2.26, "grad_norm": 0.6570130586624146, "learning_rate": 8.648489826706881e-05, "loss": 1.6524, "step": 67816 }, { "epoch": 2.26, "grad_norm": 0.6532003283500671, "learning_rate": 8.647755613705212e-05, "loss": 1.7338, "step": 67817 }, { "epoch": 2.26, "grad_norm": 0.6349666118621826, "learning_rate": 8.647021426622017e-05, "loss": 1.6386, "step": 67818 }, { "epoch": 2.26, "grad_norm": 0.6702631115913391, "learning_rate": 8.646287265458184e-05, "loss": 1.7213, "step": 67819 }, { "epoch": 2.26, "grad_norm": 0.6631014347076416, "learning_rate": 8.645553130214614e-05, "loss": 1.6771, "step": 67820 }, { "epoch": 2.26, "grad_norm": 0.6580609083175659, "learning_rate": 8.644819020892203e-05, "loss": 1.6701, "step": 67821 }, { "epoch": 2.26, "grad_norm": 0.6506808400154114, "learning_rate": 8.644084937491836e-05, "loss": 1.7037, "step": 67822 }, { "epoch": 2.26, "grad_norm": 0.6408371925354004, "learning_rate": 8.643350880014393e-05, "loss": 1.6644, "step": 67823 }, { "epoch": 2.26, "grad_norm": 0.6622330546379089, "learning_rate": 8.642616848460787e-05, "loss": 1.7691, "step": 67824 }, { "epoch": 2.26, "grad_norm": 0.6582657694816589, "learning_rate": 8.641882842831893e-05, "loss": 1.5782, "step": 67825 }, { "epoch": 2.26, "grad_norm": 0.6583994030952454, "learning_rate": 8.641148863128601e-05, "loss": 1.7183, "step": 67826 }, { "epoch": 2.26, "grad_norm": 0.658828854560852, "learning_rate": 8.640414909351806e-05, "loss": 1.7116, "step": 67827 }, { "epoch": 2.26, "grad_norm": 0.6643600463867188, "learning_rate": 8.639680981502408e-05, "loss": 1.703, "step": 67828 }, { "epoch": 2.26, "grad_norm": 0.6600903868675232, "learning_rate": 8.638947079581293e-05, "loss": 1.6482, "step": 67829 }, { "epoch": 2.26, "grad_norm": 0.6521375179290771, "learning_rate": 8.638213203589337e-05, "loss": 1.6028, "step": 67830 }, { "epoch": 2.26, "grad_norm": 0.6443482637405396, "learning_rate": 8.637479353527453e-05, "loss": 1.6862, "step": 67831 }, { "epoch": 2.26, "grad_norm": 0.6397392153739929, "learning_rate": 8.636745529396522e-05, "loss": 1.639, "step": 67832 }, { "epoch": 2.26, "grad_norm": 0.663978636264801, "learning_rate": 8.636011731197424e-05, "loss": 1.7047, "step": 67833 }, { "epoch": 2.26, "grad_norm": 0.7032607197761536, "learning_rate": 8.635277958931059e-05, "loss": 1.7275, "step": 67834 }, { "epoch": 2.26, "grad_norm": 0.6389769315719604, "learning_rate": 8.63454421259833e-05, "loss": 1.7604, "step": 67835 }, { "epoch": 2.26, "grad_norm": 0.6630716323852539, "learning_rate": 8.633810492200115e-05, "loss": 1.6653, "step": 67836 }, { "epoch": 2.26, "grad_norm": 0.6508886218070984, "learning_rate": 8.633076797737297e-05, "loss": 1.6172, "step": 67837 }, { "epoch": 2.26, "grad_norm": 0.6677260398864746, "learning_rate": 8.632343129210783e-05, "loss": 1.7353, "step": 67838 }, { "epoch": 2.26, "grad_norm": 0.6830987334251404, "learning_rate": 8.631609486621457e-05, "loss": 1.7239, "step": 67839 }, { "epoch": 2.26, "grad_norm": 0.6457784175872803, "learning_rate": 8.6308758699702e-05, "loss": 1.6367, "step": 67840 }, { "epoch": 2.26, "grad_norm": 0.6850337386131287, "learning_rate": 8.630142279257913e-05, "loss": 1.7448, "step": 67841 }, { "epoch": 2.26, "grad_norm": 0.6818116903305054, "learning_rate": 8.62940871448549e-05, "loss": 1.7477, "step": 67842 }, { "epoch": 2.26, "grad_norm": 0.6657283306121826, "learning_rate": 8.628675175653821e-05, "loss": 1.6558, "step": 67843 }, { "epoch": 2.26, "grad_norm": 0.6559831500053406, "learning_rate": 8.627941662763776e-05, "loss": 1.737, "step": 67844 }, { "epoch": 2.26, "grad_norm": 0.6636196970939636, "learning_rate": 8.627208175816274e-05, "loss": 1.6429, "step": 67845 }, { "epoch": 2.26, "grad_norm": 0.6805362105369568, "learning_rate": 8.626474714812192e-05, "loss": 1.7586, "step": 67846 }, { "epoch": 2.26, "grad_norm": 0.6971693634986877, "learning_rate": 8.625741279752409e-05, "loss": 1.6418, "step": 67847 }, { "epoch": 2.26, "grad_norm": 0.6896815896034241, "learning_rate": 8.62500787063783e-05, "loss": 1.7065, "step": 67848 }, { "epoch": 2.26, "grad_norm": 0.6737297773361206, "learning_rate": 8.624274487469349e-05, "loss": 1.6993, "step": 67849 }, { "epoch": 2.26, "grad_norm": 0.6826018691062927, "learning_rate": 8.62354113024785e-05, "loss": 1.6591, "step": 67850 }, { "epoch": 2.26, "grad_norm": 0.6553046703338623, "learning_rate": 8.622807798974211e-05, "loss": 1.6755, "step": 67851 }, { "epoch": 2.26, "grad_norm": 0.6579445600509644, "learning_rate": 8.622074493649344e-05, "loss": 1.6857, "step": 67852 }, { "epoch": 2.26, "grad_norm": 0.6719959378242493, "learning_rate": 8.621341214274132e-05, "loss": 1.755, "step": 67853 }, { "epoch": 2.26, "grad_norm": 0.6715086698532104, "learning_rate": 8.62060796084945e-05, "loss": 1.6087, "step": 67854 }, { "epoch": 2.26, "grad_norm": 0.6709321737289429, "learning_rate": 8.619874733376208e-05, "loss": 1.6796, "step": 67855 }, { "epoch": 2.26, "grad_norm": 0.6499730944633484, "learning_rate": 8.619141531855278e-05, "loss": 1.6559, "step": 67856 }, { "epoch": 2.26, "grad_norm": 0.6684376001358032, "learning_rate": 8.618408356287571e-05, "loss": 1.735, "step": 67857 }, { "epoch": 2.26, "grad_norm": 0.6990349292755127, "learning_rate": 8.617675206673958e-05, "loss": 1.6709, "step": 67858 }, { "epoch": 2.26, "grad_norm": 0.6950114965438843, "learning_rate": 8.616942083015345e-05, "loss": 1.6242, "step": 67859 }, { "epoch": 2.26, "grad_norm": 0.6461578011512756, "learning_rate": 8.616208985312611e-05, "loss": 1.7215, "step": 67860 }, { "epoch": 2.26, "grad_norm": 0.6555246710777283, "learning_rate": 8.615475913566641e-05, "loss": 1.6553, "step": 67861 }, { "epoch": 2.26, "grad_norm": 0.658589243888855, "learning_rate": 8.614742867778342e-05, "loss": 1.6978, "step": 67862 }, { "epoch": 2.26, "grad_norm": 0.6697700023651123, "learning_rate": 8.614009847948584e-05, "loss": 1.6794, "step": 67863 }, { "epoch": 2.26, "grad_norm": 0.6394913792610168, "learning_rate": 8.613276854078275e-05, "loss": 1.6708, "step": 67864 }, { "epoch": 2.26, "grad_norm": 0.6881392598152161, "learning_rate": 8.612543886168303e-05, "loss": 1.6051, "step": 67865 }, { "epoch": 2.26, "grad_norm": 0.6657823324203491, "learning_rate": 8.611810944219536e-05, "loss": 1.6318, "step": 67866 }, { "epoch": 2.26, "grad_norm": 0.6748838424682617, "learning_rate": 8.611078028232892e-05, "loss": 1.6321, "step": 67867 }, { "epoch": 2.26, "grad_norm": 0.6505945920944214, "learning_rate": 8.610345138209236e-05, "loss": 1.7126, "step": 67868 }, { "epoch": 2.26, "grad_norm": 0.6937795877456665, "learning_rate": 8.609612274149478e-05, "loss": 1.6905, "step": 67869 }, { "epoch": 2.26, "grad_norm": 0.6484214663505554, "learning_rate": 8.60887943605449e-05, "loss": 1.6406, "step": 67870 }, { "epoch": 2.26, "grad_norm": 0.6767059564590454, "learning_rate": 8.608146623925181e-05, "loss": 1.7102, "step": 67871 }, { "epoch": 2.26, "grad_norm": 2.062601089477539, "learning_rate": 8.607413837762433e-05, "loss": 1.7528, "step": 67872 }, { "epoch": 2.26, "grad_norm": 0.7238207459449768, "learning_rate": 8.606681077567118e-05, "loss": 1.7639, "step": 67873 }, { "epoch": 2.26, "grad_norm": 0.6778305172920227, "learning_rate": 8.605948343340152e-05, "loss": 1.6588, "step": 67874 }, { "epoch": 2.26, "grad_norm": 0.6856905221939087, "learning_rate": 8.605215635082412e-05, "loss": 1.6756, "step": 67875 }, { "epoch": 2.26, "grad_norm": 0.681054949760437, "learning_rate": 8.604482952794776e-05, "loss": 1.7241, "step": 67876 }, { "epoch": 2.26, "grad_norm": 0.6765204071998596, "learning_rate": 8.603750296478148e-05, "loss": 1.6478, "step": 67877 }, { "epoch": 2.26, "grad_norm": 0.6677535176277161, "learning_rate": 8.603017666133424e-05, "loss": 1.7159, "step": 67878 }, { "epoch": 2.26, "grad_norm": 0.6621405482292175, "learning_rate": 8.602285061761484e-05, "loss": 1.6522, "step": 67879 }, { "epoch": 2.26, "grad_norm": 0.6425075531005859, "learning_rate": 8.601552483363206e-05, "loss": 1.6593, "step": 67880 }, { "epoch": 2.26, "grad_norm": 0.653614342212677, "learning_rate": 8.600819930939502e-05, "loss": 1.6681, "step": 67881 }, { "epoch": 2.26, "grad_norm": 0.6738015413284302, "learning_rate": 8.60008740449125e-05, "loss": 1.7144, "step": 67882 }, { "epoch": 2.26, "grad_norm": 0.6722585558891296, "learning_rate": 8.599354904019328e-05, "loss": 1.7017, "step": 67883 }, { "epoch": 2.26, "grad_norm": 0.6686745882034302, "learning_rate": 8.598622429524637e-05, "loss": 1.7545, "step": 67884 }, { "epoch": 2.26, "grad_norm": 0.6635109782218933, "learning_rate": 8.597889981008081e-05, "loss": 1.6642, "step": 67885 }, { "epoch": 2.26, "grad_norm": 0.6759204864501953, "learning_rate": 8.597157558470515e-05, "loss": 1.6833, "step": 67886 }, { "epoch": 2.26, "grad_norm": 0.6674875617027283, "learning_rate": 8.596425161912848e-05, "loss": 1.719, "step": 67887 }, { "epoch": 2.26, "grad_norm": 0.6568344235420227, "learning_rate": 8.595692791335974e-05, "loss": 1.7143, "step": 67888 }, { "epoch": 2.26, "grad_norm": 0.6568841338157654, "learning_rate": 8.594960446740777e-05, "loss": 1.784, "step": 67889 }, { "epoch": 2.26, "grad_norm": 0.6613935828208923, "learning_rate": 8.594228128128137e-05, "loss": 1.7207, "step": 67890 }, { "epoch": 2.26, "grad_norm": 0.6743065118789673, "learning_rate": 8.593495835498945e-05, "loss": 1.7501, "step": 67891 }, { "epoch": 2.26, "grad_norm": 0.6748632192611694, "learning_rate": 8.592763568854117e-05, "loss": 1.7526, "step": 67892 }, { "epoch": 2.26, "grad_norm": 0.6856489777565002, "learning_rate": 8.592031328194501e-05, "loss": 1.7173, "step": 67893 }, { "epoch": 2.26, "grad_norm": 0.6684057116508484, "learning_rate": 8.591299113521004e-05, "loss": 1.6879, "step": 67894 }, { "epoch": 2.26, "grad_norm": 0.6553094387054443, "learning_rate": 8.590566924834523e-05, "loss": 1.6506, "step": 67895 }, { "epoch": 2.26, "grad_norm": 0.7029025554656982, "learning_rate": 8.589834762135944e-05, "loss": 1.708, "step": 67896 }, { "epoch": 2.26, "grad_norm": 0.6548824906349182, "learning_rate": 8.589102625426136e-05, "loss": 1.6932, "step": 67897 }, { "epoch": 2.26, "grad_norm": 0.6948407888412476, "learning_rate": 8.588370514706005e-05, "loss": 1.7092, "step": 67898 }, { "epoch": 2.26, "grad_norm": 0.6709111332893372, "learning_rate": 8.587638429976458e-05, "loss": 1.7534, "step": 67899 }, { "epoch": 2.26, "grad_norm": 0.6732251048088074, "learning_rate": 8.586906371238342e-05, "loss": 1.7295, "step": 67900 }, { "epoch": 2.26, "grad_norm": 0.647864818572998, "learning_rate": 8.586174338492569e-05, "loss": 1.6517, "step": 67901 }, { "epoch": 2.26, "grad_norm": 0.6475382447242737, "learning_rate": 8.585442331740032e-05, "loss": 1.7196, "step": 67902 }, { "epoch": 2.26, "grad_norm": 0.6560649275779724, "learning_rate": 8.584710350981612e-05, "loss": 1.6875, "step": 67903 }, { "epoch": 2.26, "grad_norm": 0.6458000540733337, "learning_rate": 8.583978396218193e-05, "loss": 1.6728, "step": 67904 }, { "epoch": 2.26, "grad_norm": 0.6447703242301941, "learning_rate": 8.583246467450675e-05, "loss": 1.6388, "step": 67905 }, { "epoch": 2.26, "grad_norm": 0.6783707141876221, "learning_rate": 8.582514564679942e-05, "loss": 1.7744, "step": 67906 }, { "epoch": 2.26, "grad_norm": 0.6675601005554199, "learning_rate": 8.58178268790687e-05, "loss": 1.6923, "step": 67907 }, { "epoch": 2.26, "grad_norm": 0.6666930317878723, "learning_rate": 8.581050837132357e-05, "loss": 1.6931, "step": 67908 }, { "epoch": 2.26, "grad_norm": 0.6854367256164551, "learning_rate": 8.5803190123573e-05, "loss": 1.6916, "step": 67909 }, { "epoch": 2.26, "grad_norm": 0.6559238433837891, "learning_rate": 8.579587213582583e-05, "loss": 1.6759, "step": 67910 }, { "epoch": 2.26, "grad_norm": 0.6533538699150085, "learning_rate": 8.578855440809083e-05, "loss": 1.7104, "step": 67911 }, { "epoch": 2.26, "grad_norm": 0.6495934724807739, "learning_rate": 8.578123694037702e-05, "loss": 1.68, "step": 67912 }, { "epoch": 2.26, "grad_norm": 0.6741368174552917, "learning_rate": 8.577391973269322e-05, "loss": 1.6787, "step": 67913 }, { "epoch": 2.26, "grad_norm": 0.6617467999458313, "learning_rate": 8.576660278504825e-05, "loss": 1.7388, "step": 67914 }, { "epoch": 2.26, "grad_norm": 0.6765410900115967, "learning_rate": 8.575928609745117e-05, "loss": 1.7115, "step": 67915 }, { "epoch": 2.26, "grad_norm": 0.635668158531189, "learning_rate": 8.575196966991061e-05, "loss": 1.6471, "step": 67916 }, { "epoch": 2.26, "grad_norm": 0.6615046858787537, "learning_rate": 8.574465350243573e-05, "loss": 1.6823, "step": 67917 }, { "epoch": 2.26, "grad_norm": 0.6482369899749756, "learning_rate": 8.573733759503515e-05, "loss": 1.6306, "step": 67918 }, { "epoch": 2.26, "grad_norm": 0.6646454334259033, "learning_rate": 8.573002194771796e-05, "loss": 1.6709, "step": 67919 }, { "epoch": 2.26, "grad_norm": 0.6701990962028503, "learning_rate": 8.572270656049296e-05, "loss": 1.7285, "step": 67920 }, { "epoch": 2.26, "grad_norm": 0.6456156969070435, "learning_rate": 8.571539143336893e-05, "loss": 1.6904, "step": 67921 }, { "epoch": 2.26, "grad_norm": 0.6635771989822388, "learning_rate": 8.570807656635495e-05, "loss": 1.6844, "step": 67922 }, { "epoch": 2.26, "grad_norm": 0.6642471551895142, "learning_rate": 8.570076195945968e-05, "loss": 1.6658, "step": 67923 }, { "epoch": 2.26, "grad_norm": 0.6742767095565796, "learning_rate": 8.569344761269221e-05, "loss": 1.6118, "step": 67924 }, { "epoch": 2.26, "grad_norm": 0.6509345173835754, "learning_rate": 8.56861335260613e-05, "loss": 1.6899, "step": 67925 }, { "epoch": 2.26, "grad_norm": 0.6432571411132812, "learning_rate": 8.567881969957576e-05, "loss": 1.6756, "step": 67926 }, { "epoch": 2.26, "grad_norm": 0.6729819774627686, "learning_rate": 8.567150613324468e-05, "loss": 1.7244, "step": 67927 }, { "epoch": 2.26, "grad_norm": 0.6491644382476807, "learning_rate": 8.566419282707668e-05, "loss": 1.6593, "step": 67928 }, { "epoch": 2.26, "grad_norm": 0.645544171333313, "learning_rate": 8.565687978108088e-05, "loss": 1.6419, "step": 67929 }, { "epoch": 2.26, "grad_norm": 0.655314028263092, "learning_rate": 8.564956699526592e-05, "loss": 1.7622, "step": 67930 }, { "epoch": 2.26, "grad_norm": 0.6516783833503723, "learning_rate": 8.564225446964092e-05, "loss": 1.6611, "step": 67931 }, { "epoch": 2.26, "grad_norm": 0.642889142036438, "learning_rate": 8.563494220421466e-05, "loss": 1.665, "step": 67932 }, { "epoch": 2.26, "grad_norm": 0.7016693353652954, "learning_rate": 8.562763019899589e-05, "loss": 1.6143, "step": 67933 }, { "epoch": 2.26, "grad_norm": 0.682624876499176, "learning_rate": 8.562031845399367e-05, "loss": 1.6664, "step": 67934 }, { "epoch": 2.26, "grad_norm": 0.690725564956665, "learning_rate": 8.561300696921681e-05, "loss": 1.7733, "step": 67935 }, { "epoch": 2.26, "grad_norm": 0.6762173771858215, "learning_rate": 8.560569574467406e-05, "loss": 1.7097, "step": 67936 }, { "epoch": 2.26, "grad_norm": 0.6860135793685913, "learning_rate": 8.559838478037441e-05, "loss": 1.7226, "step": 67937 }, { "epoch": 2.26, "grad_norm": 0.6664994955062866, "learning_rate": 8.559107407632683e-05, "loss": 1.7658, "step": 67938 }, { "epoch": 2.26, "grad_norm": 0.6573269963264465, "learning_rate": 8.558376363254012e-05, "loss": 1.6338, "step": 67939 }, { "epoch": 2.26, "grad_norm": 0.6689097285270691, "learning_rate": 8.5576453449023e-05, "loss": 1.6912, "step": 67940 }, { "epoch": 2.26, "grad_norm": 0.6694304943084717, "learning_rate": 8.556914352578458e-05, "loss": 1.7592, "step": 67941 }, { "epoch": 2.26, "grad_norm": 0.6445568203926086, "learning_rate": 8.556183386283362e-05, "loss": 1.7298, "step": 67942 }, { "epoch": 2.26, "grad_norm": 0.6690592765808105, "learning_rate": 8.555452446017887e-05, "loss": 1.7082, "step": 67943 }, { "epoch": 2.26, "grad_norm": 0.6482202410697937, "learning_rate": 8.554721531782938e-05, "loss": 1.7024, "step": 67944 }, { "epoch": 2.26, "grad_norm": 0.6479154229164124, "learning_rate": 8.553990643579405e-05, "loss": 1.7217, "step": 67945 }, { "epoch": 2.26, "grad_norm": 0.657101571559906, "learning_rate": 8.553259781408168e-05, "loss": 1.6903, "step": 67946 }, { "epoch": 2.26, "grad_norm": 0.678645670413971, "learning_rate": 8.552528945270104e-05, "loss": 1.719, "step": 67947 }, { "epoch": 2.26, "grad_norm": 0.6844944357872009, "learning_rate": 8.551798135166118e-05, "loss": 1.6773, "step": 67948 }, { "epoch": 2.26, "grad_norm": 0.6577805280685425, "learning_rate": 8.55106735109709e-05, "loss": 1.6944, "step": 67949 }, { "epoch": 2.26, "grad_norm": 0.6507204174995422, "learning_rate": 8.550336593063895e-05, "loss": 1.7534, "step": 67950 }, { "epoch": 2.26, "grad_norm": 0.6373680830001831, "learning_rate": 8.549605861067432e-05, "loss": 1.718, "step": 67951 }, { "epoch": 2.26, "grad_norm": 0.6749556064605713, "learning_rate": 8.548875155108599e-05, "loss": 1.7454, "step": 67952 }, { "epoch": 2.26, "grad_norm": 0.6491624712944031, "learning_rate": 8.548144475188266e-05, "loss": 1.7165, "step": 67953 }, { "epoch": 2.26, "grad_norm": 0.6379276514053345, "learning_rate": 8.547413821307319e-05, "loss": 1.7065, "step": 67954 }, { "epoch": 2.26, "grad_norm": 0.659284234046936, "learning_rate": 8.546683193466657e-05, "loss": 1.6403, "step": 67955 }, { "epoch": 2.26, "grad_norm": 0.6616148948669434, "learning_rate": 8.545952591667164e-05, "loss": 1.7741, "step": 67956 }, { "epoch": 2.26, "grad_norm": 0.6793968081474304, "learning_rate": 8.545222015909711e-05, "loss": 1.7421, "step": 67957 }, { "epoch": 2.26, "grad_norm": 0.6495852470397949, "learning_rate": 8.544491466195198e-05, "loss": 1.7488, "step": 67958 }, { "epoch": 2.26, "grad_norm": 0.656883955001831, "learning_rate": 8.543760942524523e-05, "loss": 1.6805, "step": 67959 }, { "epoch": 2.26, "grad_norm": 0.629289984703064, "learning_rate": 8.54303044489856e-05, "loss": 1.7003, "step": 67960 }, { "epoch": 2.26, "grad_norm": 0.6536992192268372, "learning_rate": 8.542299973318183e-05, "loss": 1.7857, "step": 67961 }, { "epoch": 2.26, "grad_norm": 0.6566248536109924, "learning_rate": 8.541569527784304e-05, "loss": 1.6767, "step": 67962 }, { "epoch": 2.26, "grad_norm": 0.6463611125946045, "learning_rate": 8.5408391082978e-05, "loss": 1.6954, "step": 67963 }, { "epoch": 2.26, "grad_norm": 0.6685559749603271, "learning_rate": 8.540108714859546e-05, "loss": 1.6927, "step": 67964 }, { "epoch": 2.26, "grad_norm": 0.6405389308929443, "learning_rate": 8.539378347470434e-05, "loss": 1.7053, "step": 67965 }, { "epoch": 2.26, "grad_norm": 0.6429306864738464, "learning_rate": 8.538648006131368e-05, "loss": 1.721, "step": 67966 }, { "epoch": 2.26, "grad_norm": 0.6576282978057861, "learning_rate": 8.537917690843221e-05, "loss": 1.7211, "step": 67967 }, { "epoch": 2.26, "grad_norm": 0.6711556315422058, "learning_rate": 8.537187401606867e-05, "loss": 1.7397, "step": 67968 }, { "epoch": 2.26, "grad_norm": 0.6402332782745361, "learning_rate": 8.536457138423217e-05, "loss": 1.6701, "step": 67969 }, { "epoch": 2.26, "grad_norm": 0.664169430732727, "learning_rate": 8.535726901293147e-05, "loss": 1.6953, "step": 67970 }, { "epoch": 2.26, "grad_norm": 0.6560293436050415, "learning_rate": 8.534996690217527e-05, "loss": 1.6531, "step": 67971 }, { "epoch": 2.26, "grad_norm": 0.6586257815361023, "learning_rate": 8.534266505197273e-05, "loss": 1.6725, "step": 67972 }, { "epoch": 2.26, "grad_norm": 0.672792375087738, "learning_rate": 8.533536346233245e-05, "loss": 1.6767, "step": 67973 }, { "epoch": 2.26, "grad_norm": 0.6598551273345947, "learning_rate": 8.532806213326351e-05, "loss": 1.7219, "step": 67974 }, { "epoch": 2.26, "grad_norm": 0.6529189944267273, "learning_rate": 8.532076106477459e-05, "loss": 1.6116, "step": 67975 }, { "epoch": 2.26, "grad_norm": 0.6679372787475586, "learning_rate": 8.531346025687469e-05, "loss": 1.6461, "step": 67976 }, { "epoch": 2.26, "grad_norm": 0.6533814072608948, "learning_rate": 8.530615970957266e-05, "loss": 1.6992, "step": 67977 }, { "epoch": 2.26, "grad_norm": 0.6610302329063416, "learning_rate": 8.52988594228772e-05, "loss": 1.7191, "step": 67978 }, { "epoch": 2.26, "grad_norm": 0.6583394408226013, "learning_rate": 8.529155939679738e-05, "loss": 1.6809, "step": 67979 }, { "epoch": 2.26, "grad_norm": 0.656526505947113, "learning_rate": 8.528425963134188e-05, "loss": 1.7155, "step": 67980 }, { "epoch": 2.26, "grad_norm": 0.6387256979942322, "learning_rate": 8.527696012651975e-05, "loss": 1.7268, "step": 67981 }, { "epoch": 2.26, "grad_norm": 0.6739604473114014, "learning_rate": 8.526966088233978e-05, "loss": 1.7915, "step": 67982 }, { "epoch": 2.26, "grad_norm": 0.6613127589225769, "learning_rate": 8.526236189881065e-05, "loss": 1.7109, "step": 67983 }, { "epoch": 2.26, "grad_norm": 0.6566740870475769, "learning_rate": 8.525506317594151e-05, "loss": 1.7335, "step": 67984 }, { "epoch": 2.26, "grad_norm": 0.663956880569458, "learning_rate": 8.524776471374101e-05, "loss": 1.7157, "step": 67985 }, { "epoch": 2.26, "grad_norm": 0.6821216344833374, "learning_rate": 8.524046651221811e-05, "loss": 1.6439, "step": 67986 }, { "epoch": 2.26, "grad_norm": 0.6701095104217529, "learning_rate": 8.52331685713816e-05, "loss": 1.732, "step": 67987 }, { "epoch": 2.26, "grad_norm": 0.6614826321601868, "learning_rate": 8.522587089124046e-05, "loss": 1.7399, "step": 67988 }, { "epoch": 2.26, "grad_norm": 0.6718109846115112, "learning_rate": 8.521857347180346e-05, "loss": 1.7299, "step": 67989 }, { "epoch": 2.26, "grad_norm": 0.6558293700218201, "learning_rate": 8.521127631307936e-05, "loss": 1.5924, "step": 67990 }, { "epoch": 2.26, "grad_norm": 0.6697332262992859, "learning_rate": 8.520397941507723e-05, "loss": 1.628, "step": 67991 }, { "epoch": 2.26, "grad_norm": 0.6549789905548096, "learning_rate": 8.51966827778058e-05, "loss": 1.7141, "step": 67992 }, { "epoch": 2.26, "grad_norm": 0.6858490109443665, "learning_rate": 8.518938640127388e-05, "loss": 1.6971, "step": 67993 }, { "epoch": 2.26, "grad_norm": 0.6286478042602539, "learning_rate": 8.51820902854904e-05, "loss": 1.7666, "step": 67994 }, { "epoch": 2.26, "grad_norm": 0.6516639590263367, "learning_rate": 8.517479443046428e-05, "loss": 1.7112, "step": 67995 }, { "epoch": 2.26, "grad_norm": 0.6695886254310608, "learning_rate": 8.516749883620432e-05, "loss": 1.6503, "step": 67996 }, { "epoch": 2.26, "grad_norm": 0.685352623462677, "learning_rate": 8.516020350271928e-05, "loss": 1.7145, "step": 67997 }, { "epoch": 2.26, "grad_norm": 0.6416923999786377, "learning_rate": 8.515290843001817e-05, "loss": 1.717, "step": 67998 }, { "epoch": 2.26, "grad_norm": 0.6512342691421509, "learning_rate": 8.514561361810976e-05, "loss": 1.695, "step": 67999 }, { "epoch": 2.26, "grad_norm": 0.646141529083252, "learning_rate": 8.513831906700285e-05, "loss": 1.6406, "step": 68000 }, { "epoch": 2.26, "grad_norm": 0.6762406826019287, "learning_rate": 8.513102477670637e-05, "loss": 1.7496, "step": 68001 }, { "epoch": 2.26, "grad_norm": 0.6688406467437744, "learning_rate": 8.512373074722932e-05, "loss": 1.7269, "step": 68002 }, { "epoch": 2.26, "grad_norm": 0.6943718791007996, "learning_rate": 8.511643697858024e-05, "loss": 1.7639, "step": 68003 }, { "epoch": 2.26, "grad_norm": 0.6696546077728271, "learning_rate": 8.510914347076813e-05, "loss": 1.7347, "step": 68004 }, { "epoch": 2.26, "grad_norm": 0.6617594361305237, "learning_rate": 8.510185022380198e-05, "loss": 1.7616, "step": 68005 }, { "epoch": 2.26, "grad_norm": 0.6749681830406189, "learning_rate": 8.50945572376905e-05, "loss": 1.746, "step": 68006 }, { "epoch": 2.26, "grad_norm": 0.636772632598877, "learning_rate": 8.508726451244245e-05, "loss": 1.6882, "step": 68007 }, { "epoch": 2.26, "grad_norm": 0.6536433100700378, "learning_rate": 8.507997204806681e-05, "loss": 1.7214, "step": 68008 }, { "epoch": 2.26, "grad_norm": 0.6750732660293579, "learning_rate": 8.50726798445726e-05, "loss": 1.7057, "step": 68009 }, { "epoch": 2.26, "grad_norm": 0.6562913656234741, "learning_rate": 8.506538790196833e-05, "loss": 1.6757, "step": 68010 }, { "epoch": 2.26, "grad_norm": 0.6581668853759766, "learning_rate": 8.5058096220263e-05, "loss": 1.6751, "step": 68011 }, { "epoch": 2.26, "grad_norm": 0.6773770451545715, "learning_rate": 8.505080479946552e-05, "loss": 1.6972, "step": 68012 }, { "epoch": 2.26, "grad_norm": 0.6467081904411316, "learning_rate": 8.504351363958474e-05, "loss": 1.627, "step": 68013 }, { "epoch": 2.26, "grad_norm": 0.6822237968444824, "learning_rate": 8.503622274062933e-05, "loss": 1.6578, "step": 68014 }, { "epoch": 2.26, "grad_norm": 0.6654007434844971, "learning_rate": 8.50289321026083e-05, "loss": 1.6625, "step": 68015 }, { "epoch": 2.26, "grad_norm": 0.6900103688240051, "learning_rate": 8.502164172553068e-05, "loss": 1.729, "step": 68016 }, { "epoch": 2.26, "grad_norm": 0.6507806777954102, "learning_rate": 8.501435160940488e-05, "loss": 1.7127, "step": 68017 }, { "epoch": 2.26, "grad_norm": 0.6754721403121948, "learning_rate": 8.500706175424e-05, "loss": 1.721, "step": 68018 }, { "epoch": 2.26, "grad_norm": 0.6552218794822693, "learning_rate": 8.499977216004496e-05, "loss": 1.6705, "step": 68019 }, { "epoch": 2.26, "grad_norm": 0.6914384961128235, "learning_rate": 8.499248282682853e-05, "loss": 1.7682, "step": 68020 }, { "epoch": 2.26, "grad_norm": 0.6644124984741211, "learning_rate": 8.498519375459943e-05, "loss": 1.7067, "step": 68021 }, { "epoch": 2.26, "grad_norm": 0.6386499404907227, "learning_rate": 8.497790494336675e-05, "loss": 1.6633, "step": 68022 }, { "epoch": 2.26, "grad_norm": 0.6435558199882507, "learning_rate": 8.497061639313916e-05, "loss": 1.7371, "step": 68023 }, { "epoch": 2.26, "grad_norm": 0.6674818992614746, "learning_rate": 8.49633281039255e-05, "loss": 1.7593, "step": 68024 }, { "epoch": 2.26, "grad_norm": 0.6742479801177979, "learning_rate": 8.495604007573466e-05, "loss": 1.7156, "step": 68025 }, { "epoch": 2.26, "grad_norm": 0.6554338335990906, "learning_rate": 8.494875230857562e-05, "loss": 1.7477, "step": 68026 }, { "epoch": 2.26, "grad_norm": 0.6535934209823608, "learning_rate": 8.494146480245709e-05, "loss": 1.7208, "step": 68027 }, { "epoch": 2.26, "grad_norm": 0.6540684700012207, "learning_rate": 8.493417755738786e-05, "loss": 1.5959, "step": 68028 }, { "epoch": 2.26, "grad_norm": 0.6631020307540894, "learning_rate": 8.492689057337692e-05, "loss": 1.6996, "step": 68029 }, { "epoch": 2.26, "grad_norm": 0.6496742367744446, "learning_rate": 8.491960385043305e-05, "loss": 1.736, "step": 68030 }, { "epoch": 2.26, "grad_norm": 0.6507296562194824, "learning_rate": 8.4912317388565e-05, "loss": 1.7077, "step": 68031 }, { "epoch": 2.26, "grad_norm": 0.6834688782691956, "learning_rate": 8.490503118778182e-05, "loss": 1.7272, "step": 68032 }, { "epoch": 2.26, "grad_norm": 0.6415811777114868, "learning_rate": 8.489774524809212e-05, "loss": 1.7248, "step": 68033 }, { "epoch": 2.26, "grad_norm": 0.6729198098182678, "learning_rate": 8.489045956950495e-05, "loss": 1.613, "step": 68034 }, { "epoch": 2.26, "grad_norm": 1.2244361639022827, "learning_rate": 8.488317415202899e-05, "loss": 1.6258, "step": 68035 }, { "epoch": 2.26, "grad_norm": 0.6462373733520508, "learning_rate": 8.487588899567325e-05, "loss": 1.6658, "step": 68036 }, { "epoch": 2.26, "grad_norm": 0.6588713526725769, "learning_rate": 8.486860410044649e-05, "loss": 1.7468, "step": 68037 }, { "epoch": 2.26, "grad_norm": 0.6601830124855042, "learning_rate": 8.486131946635745e-05, "loss": 1.7317, "step": 68038 }, { "epoch": 2.26, "grad_norm": 0.6458725929260254, "learning_rate": 8.485403509341514e-05, "loss": 1.6762, "step": 68039 }, { "epoch": 2.26, "grad_norm": 0.6522109508514404, "learning_rate": 8.484675098162826e-05, "loss": 1.623, "step": 68040 }, { "epoch": 2.26, "grad_norm": 0.6789877414703369, "learning_rate": 8.483946713100581e-05, "loss": 1.7347, "step": 68041 }, { "epoch": 2.26, "grad_norm": 0.675006091594696, "learning_rate": 8.483218354155658e-05, "loss": 1.7514, "step": 68042 }, { "epoch": 2.26, "grad_norm": 0.6488205790519714, "learning_rate": 8.482490021328925e-05, "loss": 1.7145, "step": 68043 }, { "epoch": 2.26, "grad_norm": 0.6671719551086426, "learning_rate": 8.481761714621286e-05, "loss": 1.6424, "step": 68044 }, { "epoch": 2.26, "grad_norm": 0.6478277444839478, "learning_rate": 8.481033434033609e-05, "loss": 1.7065, "step": 68045 }, { "epoch": 2.26, "grad_norm": 0.6561183333396912, "learning_rate": 8.480305179566798e-05, "loss": 1.7107, "step": 68046 }, { "epoch": 2.26, "grad_norm": 0.6490657329559326, "learning_rate": 8.479576951221716e-05, "loss": 1.7578, "step": 68047 }, { "epoch": 2.26, "grad_norm": 0.6615824699401855, "learning_rate": 8.478848748999267e-05, "loss": 1.6921, "step": 68048 }, { "epoch": 2.26, "grad_norm": 0.6736177802085876, "learning_rate": 8.478120572900322e-05, "loss": 1.7278, "step": 68049 }, { "epoch": 2.26, "grad_norm": 0.6413908004760742, "learning_rate": 8.477392422925763e-05, "loss": 1.6841, "step": 68050 }, { "epoch": 2.26, "grad_norm": 0.6480048894882202, "learning_rate": 8.476664299076482e-05, "loss": 1.7339, "step": 68051 }, { "epoch": 2.26, "grad_norm": 0.6841177344322205, "learning_rate": 8.475936201353363e-05, "loss": 1.6633, "step": 68052 }, { "epoch": 2.26, "grad_norm": 0.6516571640968323, "learning_rate": 8.475208129757274e-05, "loss": 1.6303, "step": 68053 }, { "epoch": 2.26, "grad_norm": 0.6363581418991089, "learning_rate": 8.474480084289111e-05, "loss": 1.6824, "step": 68054 }, { "epoch": 2.26, "grad_norm": 0.6565631628036499, "learning_rate": 8.473752064949772e-05, "loss": 1.7144, "step": 68055 }, { "epoch": 2.26, "grad_norm": 0.6590595245361328, "learning_rate": 8.47302407174012e-05, "loss": 1.7768, "step": 68056 }, { "epoch": 2.26, "grad_norm": 0.6728976964950562, "learning_rate": 8.47229610466104e-05, "loss": 1.6896, "step": 68057 }, { "epoch": 2.26, "grad_norm": 0.6559234857559204, "learning_rate": 8.471568163713426e-05, "loss": 1.7419, "step": 68058 }, { "epoch": 2.26, "grad_norm": 0.6429536938667297, "learning_rate": 8.470840248898159e-05, "loss": 1.7034, "step": 68059 }, { "epoch": 2.26, "grad_norm": 0.6718592047691345, "learning_rate": 8.470112360216107e-05, "loss": 1.6427, "step": 68060 }, { "epoch": 2.26, "grad_norm": 0.6794374585151672, "learning_rate": 8.46938449766817e-05, "loss": 1.7255, "step": 68061 }, { "epoch": 2.26, "grad_norm": 0.6747080087661743, "learning_rate": 8.468656661255238e-05, "loss": 1.7997, "step": 68062 }, { "epoch": 2.26, "grad_norm": 0.6770954132080078, "learning_rate": 8.467928850978181e-05, "loss": 1.7512, "step": 68063 }, { "epoch": 2.26, "grad_norm": 0.6515551209449768, "learning_rate": 8.467201066837876e-05, "loss": 1.7328, "step": 68064 }, { "epoch": 2.26, "grad_norm": 0.6507164239883423, "learning_rate": 8.466473308835227e-05, "loss": 1.7164, "step": 68065 }, { "epoch": 2.26, "grad_norm": 0.6581115126609802, "learning_rate": 8.465745576971107e-05, "loss": 1.6179, "step": 68066 }, { "epoch": 2.26, "grad_norm": 0.6715026497840881, "learning_rate": 8.465017871246389e-05, "loss": 1.6822, "step": 68067 }, { "epoch": 2.26, "grad_norm": 0.6783203482627869, "learning_rate": 8.464290191661967e-05, "loss": 1.7196, "step": 68068 }, { "epoch": 2.26, "grad_norm": 0.652410626411438, "learning_rate": 8.463562538218734e-05, "loss": 1.686, "step": 68069 }, { "epoch": 2.26, "grad_norm": 0.6555097103118896, "learning_rate": 8.462834910917561e-05, "loss": 1.7409, "step": 68070 }, { "epoch": 2.26, "grad_norm": 0.6611681580543518, "learning_rate": 8.462107309759325e-05, "loss": 1.7084, "step": 68071 }, { "epoch": 2.26, "grad_norm": 0.6582741141319275, "learning_rate": 8.461379734744925e-05, "loss": 1.7092, "step": 68072 }, { "epoch": 2.26, "grad_norm": 0.6837310791015625, "learning_rate": 8.460652185875235e-05, "loss": 1.7185, "step": 68073 }, { "epoch": 2.26, "grad_norm": 0.6597787141799927, "learning_rate": 8.459924663151134e-05, "loss": 1.648, "step": 68074 }, { "epoch": 2.26, "grad_norm": 0.6455392241477966, "learning_rate": 8.459197166573509e-05, "loss": 1.6873, "step": 68075 }, { "epoch": 2.26, "grad_norm": 0.6586031913757324, "learning_rate": 8.458469696143253e-05, "loss": 1.6656, "step": 68076 }, { "epoch": 2.26, "grad_norm": 0.652855396270752, "learning_rate": 8.457742251861245e-05, "loss": 1.6241, "step": 68077 }, { "epoch": 2.26, "grad_norm": 0.6434887051582336, "learning_rate": 8.45701483372835e-05, "loss": 1.6146, "step": 68078 }, { "epoch": 2.26, "grad_norm": 0.6508170366287231, "learning_rate": 8.456287441745477e-05, "loss": 1.6695, "step": 68079 }, { "epoch": 2.27, "grad_norm": 0.6557242274284363, "learning_rate": 8.455560075913495e-05, "loss": 1.6541, "step": 68080 }, { "epoch": 2.27, "grad_norm": 0.6623911261558533, "learning_rate": 8.454832736233276e-05, "loss": 1.7291, "step": 68081 }, { "epoch": 2.27, "grad_norm": 0.6570848822593689, "learning_rate": 8.45410542270573e-05, "loss": 1.7414, "step": 68082 }, { "epoch": 2.27, "grad_norm": 0.6539439558982849, "learning_rate": 8.453378135331714e-05, "loss": 1.7253, "step": 68083 }, { "epoch": 2.27, "grad_norm": 0.6853592991828918, "learning_rate": 8.452650874112134e-05, "loss": 1.7422, "step": 68084 }, { "epoch": 2.27, "grad_norm": 0.6547788977622986, "learning_rate": 8.451923639047849e-05, "loss": 1.6842, "step": 68085 }, { "epoch": 2.27, "grad_norm": 0.6583983302116394, "learning_rate": 8.451196430139765e-05, "loss": 1.7023, "step": 68086 }, { "epoch": 2.27, "grad_norm": 0.6612293720245361, "learning_rate": 8.45046924738875e-05, "loss": 1.6627, "step": 68087 }, { "epoch": 2.27, "grad_norm": 0.6568589210510254, "learning_rate": 8.449742090795685e-05, "loss": 1.7097, "step": 68088 }, { "epoch": 2.27, "grad_norm": 0.6571260690689087, "learning_rate": 8.449014960361464e-05, "loss": 1.6925, "step": 68089 }, { "epoch": 2.27, "grad_norm": 0.6675710082054138, "learning_rate": 8.448287856086954e-05, "loss": 1.6127, "step": 68090 }, { "epoch": 2.27, "grad_norm": 0.6495318412780762, "learning_rate": 8.447560777973059e-05, "loss": 1.6863, "step": 68091 }, { "epoch": 2.27, "grad_norm": 0.6859996914863586, "learning_rate": 8.44683372602064e-05, "loss": 1.7289, "step": 68092 }, { "epoch": 2.27, "grad_norm": 0.677781343460083, "learning_rate": 8.446106700230596e-05, "loss": 1.7251, "step": 68093 }, { "epoch": 2.27, "grad_norm": 0.6412175893783569, "learning_rate": 8.445379700603804e-05, "loss": 1.7121, "step": 68094 }, { "epoch": 2.27, "grad_norm": 0.6787336468696594, "learning_rate": 8.444652727141136e-05, "loss": 1.7173, "step": 68095 }, { "epoch": 2.27, "grad_norm": 0.6660965085029602, "learning_rate": 8.443925779843494e-05, "loss": 1.714, "step": 68096 }, { "epoch": 2.27, "grad_norm": 0.6464067101478577, "learning_rate": 8.443198858711739e-05, "loss": 1.6899, "step": 68097 }, { "epoch": 2.27, "grad_norm": 0.6628112196922302, "learning_rate": 8.442471963746773e-05, "loss": 1.5776, "step": 68098 }, { "epoch": 2.27, "grad_norm": 0.6818189024925232, "learning_rate": 8.441745094949471e-05, "loss": 1.6993, "step": 68099 }, { "epoch": 2.27, "grad_norm": 0.6424992084503174, "learning_rate": 8.441018252320704e-05, "loss": 1.7108, "step": 68100 }, { "epoch": 2.27, "grad_norm": 0.654818058013916, "learning_rate": 8.440291435861376e-05, "loss": 1.7104, "step": 68101 }, { "epoch": 2.27, "grad_norm": 0.6767638921737671, "learning_rate": 8.439564645572345e-05, "loss": 1.7292, "step": 68102 }, { "epoch": 2.27, "grad_norm": 0.6607844829559326, "learning_rate": 8.438837881454518e-05, "loss": 1.6821, "step": 68103 }, { "epoch": 2.27, "grad_norm": 0.6748250722885132, "learning_rate": 8.438111143508752e-05, "loss": 1.8185, "step": 68104 }, { "epoch": 2.27, "grad_norm": 0.6531767249107361, "learning_rate": 8.437384431735954e-05, "loss": 1.6657, "step": 68105 }, { "epoch": 2.27, "grad_norm": 0.65365070104599, "learning_rate": 8.436657746136993e-05, "loss": 1.7322, "step": 68106 }, { "epoch": 2.27, "grad_norm": 0.6550590991973877, "learning_rate": 8.435931086712743e-05, "loss": 1.7257, "step": 68107 }, { "epoch": 2.27, "grad_norm": 0.6526464223861694, "learning_rate": 8.435204453464105e-05, "loss": 1.7473, "step": 68108 }, { "epoch": 2.27, "grad_norm": 0.6644406914710999, "learning_rate": 8.434477846391951e-05, "loss": 1.7132, "step": 68109 }, { "epoch": 2.27, "grad_norm": 0.6624637246131897, "learning_rate": 8.433751265497155e-05, "loss": 1.7975, "step": 68110 }, { "epoch": 2.27, "grad_norm": 0.6629056930541992, "learning_rate": 8.433024710780609e-05, "loss": 1.67, "step": 68111 }, { "epoch": 2.27, "grad_norm": 0.6685530543327332, "learning_rate": 8.4322981822432e-05, "loss": 1.6742, "step": 68112 }, { "epoch": 2.27, "grad_norm": 0.6688644289970398, "learning_rate": 8.431571679885803e-05, "loss": 1.6681, "step": 68113 }, { "epoch": 2.27, "grad_norm": 0.6445484757423401, "learning_rate": 8.43084520370929e-05, "loss": 1.6936, "step": 68114 }, { "epoch": 2.27, "grad_norm": 0.6551038026809692, "learning_rate": 8.430118753714565e-05, "loss": 1.7638, "step": 68115 }, { "epoch": 2.27, "grad_norm": 0.6345536708831787, "learning_rate": 8.429392329902495e-05, "loss": 1.6622, "step": 68116 }, { "epoch": 2.27, "grad_norm": 0.6554244160652161, "learning_rate": 8.428665932273958e-05, "loss": 1.6505, "step": 68117 }, { "epoch": 2.27, "grad_norm": 0.6555989980697632, "learning_rate": 8.427939560829838e-05, "loss": 1.6915, "step": 68118 }, { "epoch": 2.27, "grad_norm": 0.6349273920059204, "learning_rate": 8.427213215571043e-05, "loss": 1.697, "step": 68119 }, { "epoch": 2.27, "grad_norm": 0.6504302024841309, "learning_rate": 8.426486896498413e-05, "loss": 1.6625, "step": 68120 }, { "epoch": 2.27, "grad_norm": 0.672299861907959, "learning_rate": 8.42576060361285e-05, "loss": 1.7301, "step": 68121 }, { "epoch": 2.27, "grad_norm": 0.653694212436676, "learning_rate": 8.425034336915242e-05, "loss": 1.6891, "step": 68122 }, { "epoch": 2.27, "grad_norm": 0.6580367684364319, "learning_rate": 8.424308096406465e-05, "loss": 1.659, "step": 68123 }, { "epoch": 2.27, "grad_norm": 0.6699126958847046, "learning_rate": 8.423581882087388e-05, "loss": 1.7258, "step": 68124 }, { "epoch": 2.27, "grad_norm": 0.6477235555648804, "learning_rate": 8.422855693958905e-05, "loss": 1.6695, "step": 68125 }, { "epoch": 2.27, "grad_norm": 0.6818850040435791, "learning_rate": 8.422129532021913e-05, "loss": 1.6842, "step": 68126 }, { "epoch": 2.27, "grad_norm": 0.6742884516716003, "learning_rate": 8.42140339627726e-05, "loss": 1.6899, "step": 68127 }, { "epoch": 2.27, "grad_norm": 0.6405312418937683, "learning_rate": 8.420677286725845e-05, "loss": 1.6579, "step": 68128 }, { "epoch": 2.27, "grad_norm": 0.6544185876846313, "learning_rate": 8.419951203368552e-05, "loss": 1.7371, "step": 68129 }, { "epoch": 2.27, "grad_norm": 0.6539970636367798, "learning_rate": 8.419225146206264e-05, "loss": 1.7304, "step": 68130 }, { "epoch": 2.27, "grad_norm": 0.6796048879623413, "learning_rate": 8.418499115239846e-05, "loss": 1.72, "step": 68131 }, { "epoch": 2.27, "grad_norm": 0.6727964878082275, "learning_rate": 8.417773110470187e-05, "loss": 1.6889, "step": 68132 }, { "epoch": 2.27, "grad_norm": 0.659134030342102, "learning_rate": 8.417047131898196e-05, "loss": 1.6673, "step": 68133 }, { "epoch": 2.27, "grad_norm": 0.6693152189254761, "learning_rate": 8.416321179524707e-05, "loss": 1.6917, "step": 68134 }, { "epoch": 2.27, "grad_norm": 0.6734355092048645, "learning_rate": 8.415595253350625e-05, "loss": 1.7773, "step": 68135 }, { "epoch": 2.27, "grad_norm": 0.6759632229804993, "learning_rate": 8.414869353376839e-05, "loss": 1.6904, "step": 68136 }, { "epoch": 2.27, "grad_norm": 0.6575962901115417, "learning_rate": 8.41414347960422e-05, "loss": 1.7461, "step": 68137 }, { "epoch": 2.27, "grad_norm": 0.651031494140625, "learning_rate": 8.413417632033641e-05, "loss": 1.6802, "step": 68138 }, { "epoch": 2.27, "grad_norm": 0.6645841598510742, "learning_rate": 8.412691810666006e-05, "loss": 1.7627, "step": 68139 }, { "epoch": 2.27, "grad_norm": 0.6453666090965271, "learning_rate": 8.411966015502177e-05, "loss": 1.6143, "step": 68140 }, { "epoch": 2.27, "grad_norm": 0.6719521284103394, "learning_rate": 8.411240246543035e-05, "loss": 1.71, "step": 68141 }, { "epoch": 2.27, "grad_norm": 0.6594683527946472, "learning_rate": 8.410514503789465e-05, "loss": 1.656, "step": 68142 }, { "epoch": 2.27, "grad_norm": 0.6446643471717834, "learning_rate": 8.409788787242359e-05, "loss": 1.769, "step": 68143 }, { "epoch": 2.27, "grad_norm": 0.684806227684021, "learning_rate": 8.409063096902585e-05, "loss": 1.6967, "step": 68144 }, { "epoch": 2.27, "grad_norm": 0.6559819579124451, "learning_rate": 8.40833743277102e-05, "loss": 1.7035, "step": 68145 }, { "epoch": 2.27, "grad_norm": 0.6788343787193298, "learning_rate": 8.407611794848564e-05, "loss": 1.6385, "step": 68146 }, { "epoch": 2.27, "grad_norm": 0.6481772065162659, "learning_rate": 8.406886183136083e-05, "loss": 1.6674, "step": 68147 }, { "epoch": 2.27, "grad_norm": 0.6436737775802612, "learning_rate": 8.406160597634451e-05, "loss": 1.6978, "step": 68148 }, { "epoch": 2.27, "grad_norm": 0.6697660088539124, "learning_rate": 8.405435038344568e-05, "loss": 1.6898, "step": 68149 }, { "epoch": 2.27, "grad_norm": 0.6329725980758667, "learning_rate": 8.404709505267297e-05, "loss": 1.7414, "step": 68150 }, { "epoch": 2.27, "grad_norm": 0.6937257051467896, "learning_rate": 8.403983998403536e-05, "loss": 1.7011, "step": 68151 }, { "epoch": 2.27, "grad_norm": 0.6791201829910278, "learning_rate": 8.403258517754147e-05, "loss": 1.6424, "step": 68152 }, { "epoch": 2.27, "grad_norm": 0.6510958075523376, "learning_rate": 8.402533063320028e-05, "loss": 1.7082, "step": 68153 }, { "epoch": 2.27, "grad_norm": 0.6590568423271179, "learning_rate": 8.401807635102053e-05, "loss": 1.6712, "step": 68154 }, { "epoch": 2.27, "grad_norm": 0.6656221151351929, "learning_rate": 8.401082233101088e-05, "loss": 1.7178, "step": 68155 }, { "epoch": 2.27, "grad_norm": 0.6553454399108887, "learning_rate": 8.400356857318043e-05, "loss": 1.642, "step": 68156 }, { "epoch": 2.27, "grad_norm": 0.6563551425933838, "learning_rate": 8.399631507753767e-05, "loss": 1.6948, "step": 68157 }, { "epoch": 2.27, "grad_norm": 0.6593999862670898, "learning_rate": 8.398906184409168e-05, "loss": 1.6436, "step": 68158 }, { "epoch": 2.27, "grad_norm": 0.6415066719055176, "learning_rate": 8.398180887285114e-05, "loss": 1.6364, "step": 68159 }, { "epoch": 2.27, "grad_norm": 0.6622612476348877, "learning_rate": 8.397455616382476e-05, "loss": 1.6023, "step": 68160 }, { "epoch": 2.27, "grad_norm": 0.6749289631843567, "learning_rate": 8.396730371702152e-05, "loss": 1.7369, "step": 68161 }, { "epoch": 2.27, "grad_norm": 0.6788172125816345, "learning_rate": 8.396005153245005e-05, "loss": 1.8249, "step": 68162 }, { "epoch": 2.27, "grad_norm": 0.6624492406845093, "learning_rate": 8.395279961011934e-05, "loss": 1.6616, "step": 68163 }, { "epoch": 2.27, "grad_norm": 0.6611297726631165, "learning_rate": 8.394554795003801e-05, "loss": 1.6738, "step": 68164 }, { "epoch": 2.27, "grad_norm": 0.6553288698196411, "learning_rate": 8.393829655221504e-05, "loss": 1.6205, "step": 68165 }, { "epoch": 2.27, "grad_norm": 0.6573755145072937, "learning_rate": 8.393104541665914e-05, "loss": 1.7093, "step": 68166 }, { "epoch": 2.27, "grad_norm": 0.6587351560592651, "learning_rate": 8.392379454337905e-05, "loss": 1.6801, "step": 68167 }, { "epoch": 2.27, "grad_norm": 0.6678561568260193, "learning_rate": 8.391654393238369e-05, "loss": 1.6879, "step": 68168 }, { "epoch": 2.27, "grad_norm": 0.662067174911499, "learning_rate": 8.390929358368183e-05, "loss": 1.7607, "step": 68169 }, { "epoch": 2.27, "grad_norm": 0.6588732600212097, "learning_rate": 8.390204349728214e-05, "loss": 1.7224, "step": 68170 }, { "epoch": 2.27, "grad_norm": 0.6459206938743591, "learning_rate": 8.389479367319354e-05, "loss": 1.6371, "step": 68171 }, { "epoch": 2.27, "grad_norm": 0.6634896397590637, "learning_rate": 8.388754411142491e-05, "loss": 1.6732, "step": 68172 }, { "epoch": 2.27, "grad_norm": 0.6482558846473694, "learning_rate": 8.388029481198497e-05, "loss": 1.6965, "step": 68173 }, { "epoch": 2.27, "grad_norm": 0.639427900314331, "learning_rate": 8.387304577488238e-05, "loss": 1.7624, "step": 68174 }, { "epoch": 2.27, "grad_norm": 0.6444180607795715, "learning_rate": 8.386579700012621e-05, "loss": 1.6803, "step": 68175 }, { "epoch": 2.27, "grad_norm": 0.6791486144065857, "learning_rate": 8.385854848772507e-05, "loss": 1.6013, "step": 68176 }, { "epoch": 2.27, "grad_norm": 0.6587221026420593, "learning_rate": 8.385130023768773e-05, "loss": 1.752, "step": 68177 }, { "epoch": 2.27, "grad_norm": 0.643643319606781, "learning_rate": 8.384405225002308e-05, "loss": 1.7565, "step": 68178 }, { "epoch": 2.27, "grad_norm": 0.6867715120315552, "learning_rate": 8.383680452473999e-05, "loss": 1.7428, "step": 68179 }, { "epoch": 2.27, "grad_norm": 0.6552557349205017, "learning_rate": 8.382955706184716e-05, "loss": 1.6847, "step": 68180 }, { "epoch": 2.27, "grad_norm": 0.6642086505889893, "learning_rate": 8.38223098613533e-05, "loss": 1.6405, "step": 68181 }, { "epoch": 2.27, "grad_norm": 0.679425060749054, "learning_rate": 8.381506292326738e-05, "loss": 1.6893, "step": 68182 }, { "epoch": 2.27, "grad_norm": 0.6628691554069519, "learning_rate": 8.380781624759815e-05, "loss": 1.6788, "step": 68183 }, { "epoch": 2.27, "grad_norm": 0.6798029541969299, "learning_rate": 8.38005698343543e-05, "loss": 1.646, "step": 68184 }, { "epoch": 2.27, "grad_norm": 0.6523341536521912, "learning_rate": 8.379332368354465e-05, "loss": 1.766, "step": 68185 }, { "epoch": 2.27, "grad_norm": 0.670182466506958, "learning_rate": 8.378607779517816e-05, "loss": 1.7427, "step": 68186 }, { "epoch": 2.27, "grad_norm": 0.6491047739982605, "learning_rate": 8.377883216926355e-05, "loss": 1.662, "step": 68187 }, { "epoch": 2.27, "grad_norm": 0.6542953848838806, "learning_rate": 8.377158680580944e-05, "loss": 1.7334, "step": 68188 }, { "epoch": 2.27, "grad_norm": 0.672351598739624, "learning_rate": 8.376434170482488e-05, "loss": 1.6602, "step": 68189 }, { "epoch": 2.27, "grad_norm": 0.6826393604278564, "learning_rate": 8.375709686631855e-05, "loss": 1.6782, "step": 68190 }, { "epoch": 2.27, "grad_norm": 0.6583148241043091, "learning_rate": 8.374985229029913e-05, "loss": 1.6482, "step": 68191 }, { "epoch": 2.27, "grad_norm": 0.6592034697532654, "learning_rate": 8.374260797677557e-05, "loss": 1.6636, "step": 68192 }, { "epoch": 2.27, "grad_norm": 0.6608721017837524, "learning_rate": 8.373536392575666e-05, "loss": 1.7199, "step": 68193 }, { "epoch": 2.27, "grad_norm": 0.6864396929740906, "learning_rate": 8.372812013725116e-05, "loss": 1.6711, "step": 68194 }, { "epoch": 2.27, "grad_norm": 0.6491414308547974, "learning_rate": 8.37208766112678e-05, "loss": 1.7042, "step": 68195 }, { "epoch": 2.27, "grad_norm": 0.6798188090324402, "learning_rate": 8.371363334781551e-05, "loss": 1.756, "step": 68196 }, { "epoch": 2.27, "grad_norm": 0.6723765134811401, "learning_rate": 8.370639034690297e-05, "loss": 1.7354, "step": 68197 }, { "epoch": 2.27, "grad_norm": 0.6518180966377258, "learning_rate": 8.369914760853896e-05, "loss": 1.7582, "step": 68198 }, { "epoch": 2.27, "grad_norm": 0.6777063608169556, "learning_rate": 8.369190513273234e-05, "loss": 1.7779, "step": 68199 }, { "epoch": 2.27, "grad_norm": 0.6566444635391235, "learning_rate": 8.368466291949182e-05, "loss": 1.678, "step": 68200 }, { "epoch": 2.27, "grad_norm": 0.6570569276809692, "learning_rate": 8.367742096882635e-05, "loss": 1.6184, "step": 68201 }, { "epoch": 2.27, "grad_norm": 0.6460276246070862, "learning_rate": 8.367017928074452e-05, "loss": 1.7287, "step": 68202 }, { "epoch": 2.27, "grad_norm": 0.655913770198822, "learning_rate": 8.36629378552553e-05, "loss": 1.7489, "step": 68203 }, { "epoch": 2.27, "grad_norm": 0.6674963235855103, "learning_rate": 8.36556966923674e-05, "loss": 1.7413, "step": 68204 }, { "epoch": 2.27, "grad_norm": 0.6519378423690796, "learning_rate": 8.364845579208953e-05, "loss": 1.7117, "step": 68205 }, { "epoch": 2.27, "grad_norm": 0.6603009104728699, "learning_rate": 8.364121515443062e-05, "loss": 1.6573, "step": 68206 }, { "epoch": 2.27, "grad_norm": 0.6719756126403809, "learning_rate": 8.363397477939933e-05, "loss": 1.7391, "step": 68207 }, { "epoch": 2.27, "grad_norm": 0.6430643200874329, "learning_rate": 8.362673466700462e-05, "loss": 1.7442, "step": 68208 }, { "epoch": 2.27, "grad_norm": 0.6610300540924072, "learning_rate": 8.361949481725504e-05, "loss": 1.6416, "step": 68209 }, { "epoch": 2.27, "grad_norm": 0.662039577960968, "learning_rate": 8.361225523015962e-05, "loss": 1.6528, "step": 68210 }, { "epoch": 2.27, "grad_norm": 0.657728374004364, "learning_rate": 8.360501590572705e-05, "loss": 1.7134, "step": 68211 }, { "epoch": 2.27, "grad_norm": 0.6497195363044739, "learning_rate": 8.359777684396598e-05, "loss": 1.6573, "step": 68212 }, { "epoch": 2.27, "grad_norm": 0.6705560684204102, "learning_rate": 8.359053804488546e-05, "loss": 1.6867, "step": 68213 }, { "epoch": 2.27, "grad_norm": 0.6558547616004944, "learning_rate": 8.358329950849404e-05, "loss": 1.7382, "step": 68214 }, { "epoch": 2.27, "grad_norm": 0.6737160682678223, "learning_rate": 8.357606123480068e-05, "loss": 1.7645, "step": 68215 }, { "epoch": 2.27, "grad_norm": 0.7596786022186279, "learning_rate": 8.356882322381408e-05, "loss": 1.7338, "step": 68216 }, { "epoch": 2.27, "grad_norm": 0.6744853854179382, "learning_rate": 8.356158547554297e-05, "loss": 1.6313, "step": 68217 }, { "epoch": 2.27, "grad_norm": 0.6383585333824158, "learning_rate": 8.35543479899963e-05, "loss": 1.6252, "step": 68218 }, { "epoch": 2.27, "grad_norm": 0.6720869541168213, "learning_rate": 8.354711076718266e-05, "loss": 1.6186, "step": 68219 }, { "epoch": 2.27, "grad_norm": 0.6538152098655701, "learning_rate": 8.353987380711105e-05, "loss": 1.6465, "step": 68220 }, { "epoch": 2.27, "grad_norm": 0.6610581278800964, "learning_rate": 8.353263710979003e-05, "loss": 1.6617, "step": 68221 }, { "epoch": 2.27, "grad_norm": 0.6745734214782715, "learning_rate": 8.352540067522855e-05, "loss": 1.7169, "step": 68222 }, { "epoch": 2.27, "grad_norm": 0.6377021670341492, "learning_rate": 8.351816450343538e-05, "loss": 1.6975, "step": 68223 }, { "epoch": 2.27, "grad_norm": 0.6775597333908081, "learning_rate": 8.351092859441915e-05, "loss": 1.7704, "step": 68224 }, { "epoch": 2.27, "grad_norm": 0.6897112131118774, "learning_rate": 8.350369294818888e-05, "loss": 1.7104, "step": 68225 }, { "epoch": 2.27, "grad_norm": 0.6595854759216309, "learning_rate": 8.349645756475318e-05, "loss": 1.688, "step": 68226 }, { "epoch": 2.27, "grad_norm": 0.6742855906486511, "learning_rate": 8.348922244412081e-05, "loss": 1.6485, "step": 68227 }, { "epoch": 2.27, "grad_norm": 0.6953350901603699, "learning_rate": 8.348198758630063e-05, "loss": 1.7063, "step": 68228 }, { "epoch": 2.27, "grad_norm": 0.6856691241264343, "learning_rate": 8.34747529913015e-05, "loss": 1.7112, "step": 68229 }, { "epoch": 2.27, "grad_norm": 0.6475366353988647, "learning_rate": 8.346751865913211e-05, "loss": 1.6218, "step": 68230 }, { "epoch": 2.27, "grad_norm": 0.6835659742355347, "learning_rate": 8.346028458980115e-05, "loss": 1.6971, "step": 68231 }, { "epoch": 2.27, "grad_norm": 0.6623568534851074, "learning_rate": 8.345305078331763e-05, "loss": 1.7654, "step": 68232 }, { "epoch": 2.27, "grad_norm": 0.6392787098884583, "learning_rate": 8.344581723969014e-05, "loss": 1.6506, "step": 68233 }, { "epoch": 2.27, "grad_norm": 0.6574806571006775, "learning_rate": 8.34385839589275e-05, "loss": 1.6909, "step": 68234 }, { "epoch": 2.27, "grad_norm": 0.6575404405593872, "learning_rate": 8.343135094103847e-05, "loss": 1.6463, "step": 68235 }, { "epoch": 2.27, "grad_norm": 0.6687424182891846, "learning_rate": 8.342411818603205e-05, "loss": 1.6914, "step": 68236 }, { "epoch": 2.27, "grad_norm": 0.6429957151412964, "learning_rate": 8.341688569391665e-05, "loss": 1.6594, "step": 68237 }, { "epoch": 2.27, "grad_norm": 0.667578935623169, "learning_rate": 8.340965346470124e-05, "loss": 1.7655, "step": 68238 }, { "epoch": 2.27, "grad_norm": 0.6468195915222168, "learning_rate": 8.34024214983947e-05, "loss": 1.6588, "step": 68239 }, { "epoch": 2.27, "grad_norm": 0.6698285341262817, "learning_rate": 8.339518979500572e-05, "loss": 1.7351, "step": 68240 }, { "epoch": 2.27, "grad_norm": 0.6605395674705505, "learning_rate": 8.338795835454293e-05, "loss": 1.6857, "step": 68241 }, { "epoch": 2.27, "grad_norm": 0.6741886734962463, "learning_rate": 8.338072717701528e-05, "loss": 1.6109, "step": 68242 }, { "epoch": 2.27, "grad_norm": 0.6932233572006226, "learning_rate": 8.337349626243168e-05, "loss": 1.7277, "step": 68243 }, { "epoch": 2.27, "grad_norm": 0.655518651008606, "learning_rate": 8.336626561080057e-05, "loss": 1.7369, "step": 68244 }, { "epoch": 2.27, "grad_norm": 0.6369410157203674, "learning_rate": 8.335903522213088e-05, "loss": 1.6164, "step": 68245 }, { "epoch": 2.27, "grad_norm": 0.6593170166015625, "learning_rate": 8.335180509643151e-05, "loss": 1.7634, "step": 68246 }, { "epoch": 2.27, "grad_norm": 0.6744604110717773, "learning_rate": 8.33445752337111e-05, "loss": 1.6955, "step": 68247 }, { "epoch": 2.27, "grad_norm": 0.6636767387390137, "learning_rate": 8.333734563397835e-05, "loss": 1.7142, "step": 68248 }, { "epoch": 2.27, "grad_norm": 0.6827753186225891, "learning_rate": 8.333011629724217e-05, "loss": 1.743, "step": 68249 }, { "epoch": 2.27, "grad_norm": 0.6698746681213379, "learning_rate": 8.332288722351148e-05, "loss": 1.7152, "step": 68250 }, { "epoch": 2.27, "grad_norm": 0.645710825920105, "learning_rate": 8.33156584127947e-05, "loss": 1.6606, "step": 68251 }, { "epoch": 2.27, "grad_norm": 0.6708332896232605, "learning_rate": 8.330842986510077e-05, "loss": 1.8156, "step": 68252 }, { "epoch": 2.27, "grad_norm": 0.6944006681442261, "learning_rate": 8.330120158043856e-05, "loss": 1.7436, "step": 68253 }, { "epoch": 2.27, "grad_norm": 0.6599299311637878, "learning_rate": 8.329397355881679e-05, "loss": 1.5984, "step": 68254 }, { "epoch": 2.27, "grad_norm": 0.6933302283287048, "learning_rate": 8.328674580024411e-05, "loss": 1.7176, "step": 68255 }, { "epoch": 2.27, "grad_norm": 0.6525965332984924, "learning_rate": 8.327951830472947e-05, "loss": 1.7009, "step": 68256 }, { "epoch": 2.27, "grad_norm": 0.6486023664474487, "learning_rate": 8.327229107228158e-05, "loss": 1.7163, "step": 68257 }, { "epoch": 2.27, "grad_norm": 0.6578798294067383, "learning_rate": 8.326506410290907e-05, "loss": 1.7023, "step": 68258 }, { "epoch": 2.27, "grad_norm": 0.6760056614875793, "learning_rate": 8.325783739662087e-05, "loss": 1.7243, "step": 68259 }, { "epoch": 2.27, "grad_norm": 0.6722086668014526, "learning_rate": 8.32506109534258e-05, "loss": 1.7154, "step": 68260 }, { "epoch": 2.27, "grad_norm": 0.6529382467269897, "learning_rate": 8.324338477333253e-05, "loss": 1.6906, "step": 68261 }, { "epoch": 2.27, "grad_norm": 0.6860573291778564, "learning_rate": 8.32361588563498e-05, "loss": 1.6627, "step": 68262 }, { "epoch": 2.27, "grad_norm": 0.659034013748169, "learning_rate": 8.322893320248649e-05, "loss": 1.6842, "step": 68263 }, { "epoch": 2.27, "grad_norm": 0.6553316712379456, "learning_rate": 8.322170781175133e-05, "loss": 1.7289, "step": 68264 }, { "epoch": 2.27, "grad_norm": 0.6810088753700256, "learning_rate": 8.321448268415299e-05, "loss": 1.6763, "step": 68265 }, { "epoch": 2.27, "grad_norm": 0.688213050365448, "learning_rate": 8.320725781970042e-05, "loss": 1.7256, "step": 68266 }, { "epoch": 2.27, "grad_norm": 0.6724199056625366, "learning_rate": 8.320003321840222e-05, "loss": 1.6809, "step": 68267 }, { "epoch": 2.27, "grad_norm": 0.6739126443862915, "learning_rate": 8.31928088802673e-05, "loss": 1.6052, "step": 68268 }, { "epoch": 2.27, "grad_norm": 0.6596925258636475, "learning_rate": 8.318558480530428e-05, "loss": 1.6766, "step": 68269 }, { "epoch": 2.27, "grad_norm": 0.6454671621322632, "learning_rate": 8.317836099352213e-05, "loss": 1.6709, "step": 68270 }, { "epoch": 2.27, "grad_norm": 0.6652897000312805, "learning_rate": 8.317113744492947e-05, "loss": 1.6932, "step": 68271 }, { "epoch": 2.27, "grad_norm": 0.6523949503898621, "learning_rate": 8.316391415953502e-05, "loss": 1.7296, "step": 68272 }, { "epoch": 2.27, "grad_norm": 0.6639260649681091, "learning_rate": 8.315669113734773e-05, "loss": 1.7362, "step": 68273 }, { "epoch": 2.27, "grad_norm": 0.6543814539909363, "learning_rate": 8.314946837837614e-05, "loss": 1.7176, "step": 68274 }, { "epoch": 2.27, "grad_norm": 0.6665144562721252, "learning_rate": 8.314224588262926e-05, "loss": 1.7888, "step": 68275 }, { "epoch": 2.27, "grad_norm": 0.663453996181488, "learning_rate": 8.313502365011576e-05, "loss": 1.6851, "step": 68276 }, { "epoch": 2.27, "grad_norm": 0.6785975098609924, "learning_rate": 8.312780168084425e-05, "loss": 1.7462, "step": 68277 }, { "epoch": 2.27, "grad_norm": 0.6648164987564087, "learning_rate": 8.312057997482376e-05, "loss": 1.7694, "step": 68278 }, { "epoch": 2.27, "grad_norm": 0.6543151140213013, "learning_rate": 8.311335853206286e-05, "loss": 1.722, "step": 68279 }, { "epoch": 2.27, "grad_norm": 0.6564416289329529, "learning_rate": 8.310613735257043e-05, "loss": 1.6707, "step": 68280 }, { "epoch": 2.27, "grad_norm": 0.649099588394165, "learning_rate": 8.309891643635512e-05, "loss": 1.7265, "step": 68281 }, { "epoch": 2.27, "grad_norm": 0.6879578828811646, "learning_rate": 8.309169578342585e-05, "loss": 1.6714, "step": 68282 }, { "epoch": 2.27, "grad_norm": 0.6535007953643799, "learning_rate": 8.308447539379133e-05, "loss": 1.7027, "step": 68283 }, { "epoch": 2.27, "grad_norm": 0.6884190440177917, "learning_rate": 8.307725526746018e-05, "loss": 1.6906, "step": 68284 }, { "epoch": 2.27, "grad_norm": 0.6661592125892639, "learning_rate": 8.307003540444138e-05, "loss": 1.6511, "step": 68285 }, { "epoch": 2.27, "grad_norm": 0.6626129150390625, "learning_rate": 8.306281580474358e-05, "loss": 1.6853, "step": 68286 }, { "epoch": 2.27, "grad_norm": 0.6826077699661255, "learning_rate": 8.305559646837546e-05, "loss": 1.6728, "step": 68287 }, { "epoch": 2.27, "grad_norm": 0.6695525050163269, "learning_rate": 8.30483773953459e-05, "loss": 1.7386, "step": 68288 }, { "epoch": 2.27, "grad_norm": 0.654034435749054, "learning_rate": 8.304115858566375e-05, "loss": 1.6625, "step": 68289 }, { "epoch": 2.27, "grad_norm": 0.6756961345672607, "learning_rate": 8.303394003933763e-05, "loss": 1.6576, "step": 68290 }, { "epoch": 2.27, "grad_norm": 0.6536584496498108, "learning_rate": 8.302672175637628e-05, "loss": 1.6968, "step": 68291 }, { "epoch": 2.27, "grad_norm": 0.664867103099823, "learning_rate": 8.301950373678858e-05, "loss": 1.6531, "step": 68292 }, { "epoch": 2.27, "grad_norm": 0.6705491542816162, "learning_rate": 8.301228598058323e-05, "loss": 1.7094, "step": 68293 }, { "epoch": 2.27, "grad_norm": 0.6633336544036865, "learning_rate": 8.300506848776893e-05, "loss": 1.6496, "step": 68294 }, { "epoch": 2.27, "grad_norm": 0.6435166597366333, "learning_rate": 8.29978512583545e-05, "loss": 1.7305, "step": 68295 }, { "epoch": 2.27, "grad_norm": 0.6455866694450378, "learning_rate": 8.299063429234879e-05, "loss": 1.6693, "step": 68296 }, { "epoch": 2.27, "grad_norm": 0.6692110896110535, "learning_rate": 8.298341758976047e-05, "loss": 1.6825, "step": 68297 }, { "epoch": 2.27, "grad_norm": 0.6575652360916138, "learning_rate": 8.297620115059823e-05, "loss": 1.6151, "step": 68298 }, { "epoch": 2.27, "grad_norm": 0.6549721956253052, "learning_rate": 8.296898497487096e-05, "loss": 1.6568, "step": 68299 }, { "epoch": 2.27, "grad_norm": 0.6856171488761902, "learning_rate": 8.29617690625874e-05, "loss": 1.624, "step": 68300 }, { "epoch": 2.27, "grad_norm": 0.6647468209266663, "learning_rate": 8.295455341375617e-05, "loss": 1.7182, "step": 68301 }, { "epoch": 2.27, "grad_norm": 0.653053343296051, "learning_rate": 8.294733802838614e-05, "loss": 1.6429, "step": 68302 }, { "epoch": 2.27, "grad_norm": 0.6753480434417725, "learning_rate": 8.294012290648614e-05, "loss": 1.7181, "step": 68303 }, { "epoch": 2.27, "grad_norm": 0.6857851147651672, "learning_rate": 8.293290804806484e-05, "loss": 1.654, "step": 68304 }, { "epoch": 2.27, "grad_norm": 0.6621295809745789, "learning_rate": 8.292569345313093e-05, "loss": 1.7011, "step": 68305 }, { "epoch": 2.27, "grad_norm": 0.6726353764533997, "learning_rate": 8.291847912169332e-05, "loss": 1.7476, "step": 68306 }, { "epoch": 2.27, "grad_norm": 0.6615129113197327, "learning_rate": 8.291126505376074e-05, "loss": 1.6777, "step": 68307 }, { "epoch": 2.27, "grad_norm": 0.6583254337310791, "learning_rate": 8.290405124934176e-05, "loss": 1.6597, "step": 68308 }, { "epoch": 2.27, "grad_norm": 0.6489161252975464, "learning_rate": 8.289683770844527e-05, "loss": 1.7317, "step": 68309 }, { "epoch": 2.27, "grad_norm": 0.6668170690536499, "learning_rate": 8.288962443108012e-05, "loss": 1.7346, "step": 68310 }, { "epoch": 2.27, "grad_norm": 0.6567747592926025, "learning_rate": 8.2882411417255e-05, "loss": 1.6566, "step": 68311 }, { "epoch": 2.27, "grad_norm": 0.6598874926567078, "learning_rate": 8.287519866697856e-05, "loss": 1.7306, "step": 68312 }, { "epoch": 2.27, "grad_norm": 0.6335043907165527, "learning_rate": 8.286798618025969e-05, "loss": 1.654, "step": 68313 }, { "epoch": 2.27, "grad_norm": 0.6647760272026062, "learning_rate": 8.286077395710713e-05, "loss": 1.7633, "step": 68314 }, { "epoch": 2.27, "grad_norm": 0.6541990637779236, "learning_rate": 8.285356199752949e-05, "loss": 1.7217, "step": 68315 }, { "epoch": 2.27, "grad_norm": 0.6595700979232788, "learning_rate": 8.284635030153572e-05, "loss": 1.7178, "step": 68316 }, { "epoch": 2.27, "grad_norm": 0.6749798655509949, "learning_rate": 8.283913886913438e-05, "loss": 1.7807, "step": 68317 }, { "epoch": 2.27, "grad_norm": 0.6773290634155273, "learning_rate": 8.283192770033446e-05, "loss": 1.7396, "step": 68318 }, { "epoch": 2.27, "grad_norm": 0.6759583950042725, "learning_rate": 8.282471679514443e-05, "loss": 1.7279, "step": 68319 }, { "epoch": 2.27, "grad_norm": 0.6786765456199646, "learning_rate": 8.281750615357334e-05, "loss": 1.7033, "step": 68320 }, { "epoch": 2.27, "grad_norm": 0.6736944317817688, "learning_rate": 8.281029577562979e-05, "loss": 1.7005, "step": 68321 }, { "epoch": 2.27, "grad_norm": 0.6496702432632446, "learning_rate": 8.280308566132242e-05, "loss": 1.6996, "step": 68322 }, { "epoch": 2.27, "grad_norm": 0.6781213283538818, "learning_rate": 8.27958758106602e-05, "loss": 1.648, "step": 68323 }, { "epoch": 2.27, "grad_norm": 0.6550607085227966, "learning_rate": 8.278866622365171e-05, "loss": 1.7082, "step": 68324 }, { "epoch": 2.27, "grad_norm": 0.6891876459121704, "learning_rate": 8.278145690030583e-05, "loss": 1.7092, "step": 68325 }, { "epoch": 2.27, "grad_norm": 0.6820909380912781, "learning_rate": 8.277424784063128e-05, "loss": 1.7198, "step": 68326 }, { "epoch": 2.27, "grad_norm": 0.6617918014526367, "learning_rate": 8.276703904463671e-05, "loss": 1.6752, "step": 68327 }, { "epoch": 2.27, "grad_norm": 0.6444997787475586, "learning_rate": 8.275983051233104e-05, "loss": 1.6985, "step": 68328 }, { "epoch": 2.27, "grad_norm": 0.6638164520263672, "learning_rate": 8.27526222437228e-05, "loss": 1.7288, "step": 68329 }, { "epoch": 2.27, "grad_norm": 0.6437587141990662, "learning_rate": 8.274541423882099e-05, "loss": 1.6715, "step": 68330 }, { "epoch": 2.27, "grad_norm": 0.6410965323448181, "learning_rate": 8.273820649763412e-05, "loss": 1.6865, "step": 68331 }, { "epoch": 2.27, "grad_norm": 0.6673417091369629, "learning_rate": 8.273099902017116e-05, "loss": 1.707, "step": 68332 }, { "epoch": 2.27, "grad_norm": 0.6728695034980774, "learning_rate": 8.272379180644074e-05, "loss": 1.6659, "step": 68333 }, { "epoch": 2.27, "grad_norm": 0.6735289096832275, "learning_rate": 8.271658485645156e-05, "loss": 1.7523, "step": 68334 }, { "epoch": 2.27, "grad_norm": 0.6709733605384827, "learning_rate": 8.270937817021251e-05, "loss": 1.63, "step": 68335 }, { "epoch": 2.27, "grad_norm": 0.6590901017189026, "learning_rate": 8.270217174773216e-05, "loss": 1.6864, "step": 68336 }, { "epoch": 2.27, "grad_norm": 0.6957172751426697, "learning_rate": 8.269496558901945e-05, "loss": 1.6717, "step": 68337 }, { "epoch": 2.27, "grad_norm": 0.6697651147842407, "learning_rate": 8.268775969408294e-05, "loss": 1.6825, "step": 68338 }, { "epoch": 2.27, "grad_norm": 0.6838488578796387, "learning_rate": 8.268055406293158e-05, "loss": 1.756, "step": 68339 }, { "epoch": 2.27, "grad_norm": 0.6741592884063721, "learning_rate": 8.267334869557399e-05, "loss": 1.7115, "step": 68340 }, { "epoch": 2.27, "grad_norm": 0.655569851398468, "learning_rate": 8.266614359201883e-05, "loss": 1.7493, "step": 68341 }, { "epoch": 2.27, "grad_norm": 0.6639487147331238, "learning_rate": 8.265893875227505e-05, "loss": 1.7253, "step": 68342 }, { "epoch": 2.27, "grad_norm": 0.6667748689651489, "learning_rate": 8.26517341763513e-05, "loss": 1.6708, "step": 68343 }, { "epoch": 2.27, "grad_norm": 0.6624025106430054, "learning_rate": 8.264452986425625e-05, "loss": 1.6661, "step": 68344 }, { "epoch": 2.27, "grad_norm": 0.6729460954666138, "learning_rate": 8.263732581599868e-05, "loss": 1.7459, "step": 68345 }, { "epoch": 2.27, "grad_norm": 0.6927664279937744, "learning_rate": 8.263012203158746e-05, "loss": 1.738, "step": 68346 }, { "epoch": 2.27, "grad_norm": 0.6602259874343872, "learning_rate": 8.262291851103127e-05, "loss": 1.6897, "step": 68347 }, { "epoch": 2.27, "grad_norm": 0.677285373210907, "learning_rate": 8.261571525433872e-05, "loss": 1.6822, "step": 68348 }, { "epoch": 2.27, "grad_norm": 0.6718111038208008, "learning_rate": 8.260851226151875e-05, "loss": 1.6587, "step": 68349 }, { "epoch": 2.27, "grad_norm": 0.6534702181816101, "learning_rate": 8.260130953258003e-05, "loss": 1.7355, "step": 68350 }, { "epoch": 2.27, "grad_norm": 0.6597887873649597, "learning_rate": 8.25941070675312e-05, "loss": 1.7578, "step": 68351 }, { "epoch": 2.27, "grad_norm": 0.6995380520820618, "learning_rate": 8.258690486638105e-05, "loss": 1.6706, "step": 68352 }, { "epoch": 2.27, "grad_norm": 0.6741098761558533, "learning_rate": 8.257970292913859e-05, "loss": 1.7319, "step": 68353 }, { "epoch": 2.27, "grad_norm": 0.6665050387382507, "learning_rate": 8.257250125581213e-05, "loss": 1.6263, "step": 68354 }, { "epoch": 2.27, "grad_norm": 0.6573184132575989, "learning_rate": 8.256529984641062e-05, "loss": 1.6625, "step": 68355 }, { "epoch": 2.27, "grad_norm": 0.6574833393096924, "learning_rate": 8.25580987009429e-05, "loss": 1.6405, "step": 68356 }, { "epoch": 2.27, "grad_norm": 0.6726834774017334, "learning_rate": 8.255089781941761e-05, "loss": 1.7507, "step": 68357 }, { "epoch": 2.27, "grad_norm": 0.6536079049110413, "learning_rate": 8.254369720184338e-05, "loss": 1.713, "step": 68358 }, { "epoch": 2.27, "grad_norm": 0.6893723011016846, "learning_rate": 8.25364968482291e-05, "loss": 1.6448, "step": 68359 }, { "epoch": 2.27, "grad_norm": 0.7132706642150879, "learning_rate": 8.252929675858359e-05, "loss": 1.709, "step": 68360 }, { "epoch": 2.27, "grad_norm": 0.7111812829971313, "learning_rate": 8.252209693291534e-05, "loss": 1.7151, "step": 68361 }, { "epoch": 2.27, "grad_norm": 0.6367796659469604, "learning_rate": 8.251489737123322e-05, "loss": 1.6044, "step": 68362 }, { "epoch": 2.27, "grad_norm": 0.6775102019309998, "learning_rate": 8.250769807354605e-05, "loss": 1.7959, "step": 68363 }, { "epoch": 2.27, "grad_norm": 0.6605399250984192, "learning_rate": 8.250049903986247e-05, "loss": 1.7099, "step": 68364 }, { "epoch": 2.27, "grad_norm": 0.6555690169334412, "learning_rate": 8.249330027019117e-05, "loss": 1.675, "step": 68365 }, { "epoch": 2.27, "grad_norm": 0.6835110187530518, "learning_rate": 8.248610176454094e-05, "loss": 1.7877, "step": 68366 }, { "epoch": 2.27, "grad_norm": 0.6522065997123718, "learning_rate": 8.247890352292074e-05, "loss": 1.7003, "step": 68367 }, { "epoch": 2.27, "grad_norm": 0.6818841099739075, "learning_rate": 8.24717055453389e-05, "loss": 1.6691, "step": 68368 }, { "epoch": 2.27, "grad_norm": 0.6628771424293518, "learning_rate": 8.246450783180433e-05, "loss": 1.6846, "step": 68369 }, { "epoch": 2.27, "grad_norm": 0.6262552738189697, "learning_rate": 8.245731038232592e-05, "loss": 1.6368, "step": 68370 }, { "epoch": 2.27, "grad_norm": 0.6474269032478333, "learning_rate": 8.245011319691228e-05, "loss": 1.7318, "step": 68371 }, { "epoch": 2.27, "grad_norm": 0.6741622090339661, "learning_rate": 8.244291627557207e-05, "loss": 1.7143, "step": 68372 }, { "epoch": 2.27, "grad_norm": 0.6530267596244812, "learning_rate": 8.243571961831418e-05, "loss": 1.7561, "step": 68373 }, { "epoch": 2.27, "grad_norm": 0.6506625413894653, "learning_rate": 8.242852322514727e-05, "loss": 1.6581, "step": 68374 }, { "epoch": 2.27, "grad_norm": 0.6433461308479309, "learning_rate": 8.242132709607997e-05, "loss": 1.6248, "step": 68375 }, { "epoch": 2.27, "grad_norm": 0.6710754632949829, "learning_rate": 8.241413123112114e-05, "loss": 1.6251, "step": 68376 }, { "epoch": 2.27, "grad_norm": 0.6588595509529114, "learning_rate": 8.240693563027958e-05, "loss": 1.6654, "step": 68377 }, { "epoch": 2.27, "grad_norm": 0.6540567278862, "learning_rate": 8.239974029356393e-05, "loss": 1.7213, "step": 68378 }, { "epoch": 2.27, "grad_norm": 0.6618739366531372, "learning_rate": 8.239254522098286e-05, "loss": 1.7166, "step": 68379 }, { "epoch": 2.28, "grad_norm": 0.6635239124298096, "learning_rate": 8.238535041254526e-05, "loss": 1.7001, "step": 68380 }, { "epoch": 2.28, "grad_norm": 0.6482658386230469, "learning_rate": 8.237815586825976e-05, "loss": 1.7166, "step": 68381 }, { "epoch": 2.28, "grad_norm": 0.6623616814613342, "learning_rate": 8.237096158813506e-05, "loss": 1.676, "step": 68382 }, { "epoch": 2.28, "grad_norm": 0.6955479979515076, "learning_rate": 8.236376757218002e-05, "loss": 1.6841, "step": 68383 }, { "epoch": 2.28, "grad_norm": 0.6229341626167297, "learning_rate": 8.235657382040322e-05, "loss": 1.5866, "step": 68384 }, { "epoch": 2.28, "grad_norm": 0.6592655777931213, "learning_rate": 8.234938033281355e-05, "loss": 1.7019, "step": 68385 }, { "epoch": 2.28, "grad_norm": 0.6889229416847229, "learning_rate": 8.234218710941956e-05, "loss": 1.6953, "step": 68386 }, { "epoch": 2.28, "grad_norm": 0.6582229137420654, "learning_rate": 8.233499415023018e-05, "loss": 1.6692, "step": 68387 }, { "epoch": 2.28, "grad_norm": 0.661202073097229, "learning_rate": 8.232780145525406e-05, "loss": 1.7006, "step": 68388 }, { "epoch": 2.28, "grad_norm": 0.6583821177482605, "learning_rate": 8.232060902449982e-05, "loss": 1.6843, "step": 68389 }, { "epoch": 2.28, "grad_norm": 0.697665274143219, "learning_rate": 8.231341685797638e-05, "loss": 1.6604, "step": 68390 }, { "epoch": 2.28, "grad_norm": 0.7179632186889648, "learning_rate": 8.230622495569227e-05, "loss": 1.648, "step": 68391 }, { "epoch": 2.28, "grad_norm": 0.6584532856941223, "learning_rate": 8.229903331765645e-05, "loss": 1.7625, "step": 68392 }, { "epoch": 2.28, "grad_norm": 0.6616755127906799, "learning_rate": 8.22918419438775e-05, "loss": 1.6999, "step": 68393 }, { "epoch": 2.28, "grad_norm": 0.6690821647644043, "learning_rate": 8.228465083436409e-05, "loss": 1.6739, "step": 68394 }, { "epoch": 2.28, "grad_norm": 0.6585128903388977, "learning_rate": 8.227745998912513e-05, "loss": 1.6854, "step": 68395 }, { "epoch": 2.28, "grad_norm": 0.6702834963798523, "learning_rate": 8.227026940816916e-05, "loss": 1.6889, "step": 68396 }, { "epoch": 2.28, "grad_norm": 0.648708701133728, "learning_rate": 8.22630790915051e-05, "loss": 1.7116, "step": 68397 }, { "epoch": 2.28, "grad_norm": 0.6465179324150085, "learning_rate": 8.22558890391415e-05, "loss": 1.6778, "step": 68398 }, { "epoch": 2.28, "grad_norm": 0.6848645806312561, "learning_rate": 8.224869925108721e-05, "loss": 1.7224, "step": 68399 }, { "epoch": 2.28, "grad_norm": 0.6548559069633484, "learning_rate": 8.224150972735096e-05, "loss": 1.6445, "step": 68400 }, { "epoch": 2.28, "grad_norm": 0.6842987537384033, "learning_rate": 8.223432046794134e-05, "loss": 1.6915, "step": 68401 }, { "epoch": 2.28, "grad_norm": 0.6674466729164124, "learning_rate": 8.222713147286723e-05, "loss": 1.7383, "step": 68402 }, { "epoch": 2.28, "grad_norm": 0.6560254096984863, "learning_rate": 8.221994274213732e-05, "loss": 1.7357, "step": 68403 }, { "epoch": 2.28, "grad_norm": 0.6777857542037964, "learning_rate": 8.221275427576022e-05, "loss": 1.6596, "step": 68404 }, { "epoch": 2.28, "grad_norm": 0.7431074976921082, "learning_rate": 8.220556607374474e-05, "loss": 1.7333, "step": 68405 }, { "epoch": 2.28, "grad_norm": 0.6692717671394348, "learning_rate": 8.219837813609972e-05, "loss": 1.7009, "step": 68406 }, { "epoch": 2.28, "grad_norm": 0.6403694748878479, "learning_rate": 8.219119046283375e-05, "loss": 1.7465, "step": 68407 }, { "epoch": 2.28, "grad_norm": 0.641930103302002, "learning_rate": 8.218400305395548e-05, "loss": 1.6543, "step": 68408 }, { "epoch": 2.28, "grad_norm": 1.414243459701538, "learning_rate": 8.217681590947386e-05, "loss": 1.7135, "step": 68409 }, { "epoch": 2.28, "grad_norm": 0.6471904516220093, "learning_rate": 8.21696290293975e-05, "loss": 1.6768, "step": 68410 }, { "epoch": 2.28, "grad_norm": 0.6744279265403748, "learning_rate": 8.216244241373499e-05, "loss": 1.6489, "step": 68411 }, { "epoch": 2.28, "grad_norm": 0.7003679871559143, "learning_rate": 8.215525606249521e-05, "loss": 1.6863, "step": 68412 }, { "epoch": 2.28, "grad_norm": 0.6523569822311401, "learning_rate": 8.214806997568691e-05, "loss": 1.5885, "step": 68413 }, { "epoch": 2.28, "grad_norm": 0.6833613514900208, "learning_rate": 8.214088415331877e-05, "loss": 1.7578, "step": 68414 }, { "epoch": 2.28, "grad_norm": 0.6440742015838623, "learning_rate": 8.213369859539939e-05, "loss": 1.7632, "step": 68415 }, { "epoch": 2.28, "grad_norm": 0.6778720617294312, "learning_rate": 8.21265133019377e-05, "loss": 1.6823, "step": 68416 }, { "epoch": 2.28, "grad_norm": 0.6927266120910645, "learning_rate": 8.211932827294234e-05, "loss": 1.7047, "step": 68417 }, { "epoch": 2.28, "grad_norm": 0.6899758577346802, "learning_rate": 8.21121435084219e-05, "loss": 1.7109, "step": 68418 }, { "epoch": 2.28, "grad_norm": 0.6826296448707581, "learning_rate": 8.210495900838524e-05, "loss": 1.5191, "step": 68419 }, { "epoch": 2.28, "grad_norm": 0.6833358407020569, "learning_rate": 8.209777477284111e-05, "loss": 1.6602, "step": 68420 }, { "epoch": 2.28, "grad_norm": 0.6503860950469971, "learning_rate": 8.20905908017982e-05, "loss": 1.6862, "step": 68421 }, { "epoch": 2.28, "grad_norm": 0.6743040680885315, "learning_rate": 8.20834070952651e-05, "loss": 1.7298, "step": 68422 }, { "epoch": 2.28, "grad_norm": 0.6578790545463562, "learning_rate": 8.207622365325074e-05, "loss": 1.706, "step": 68423 }, { "epoch": 2.28, "grad_norm": 0.6866675615310669, "learning_rate": 8.206904047576371e-05, "loss": 1.7976, "step": 68424 }, { "epoch": 2.28, "grad_norm": 0.6812354326248169, "learning_rate": 8.206185756281269e-05, "loss": 1.6857, "step": 68425 }, { "epoch": 2.28, "grad_norm": 0.6719329953193665, "learning_rate": 8.205467491440644e-05, "loss": 1.7288, "step": 68426 }, { "epoch": 2.28, "grad_norm": 0.665160059928894, "learning_rate": 8.20474925305538e-05, "loss": 1.7423, "step": 68427 }, { "epoch": 2.28, "grad_norm": 0.6834405064582825, "learning_rate": 8.20403104112634e-05, "loss": 1.6896, "step": 68428 }, { "epoch": 2.28, "grad_norm": 0.6469281315803528, "learning_rate": 8.203312855654385e-05, "loss": 1.7196, "step": 68429 }, { "epoch": 2.28, "grad_norm": 0.6840150952339172, "learning_rate": 8.202594696640408e-05, "loss": 1.7053, "step": 68430 }, { "epoch": 2.28, "grad_norm": 0.6820002794265747, "learning_rate": 8.201876564085265e-05, "loss": 1.7477, "step": 68431 }, { "epoch": 2.28, "grad_norm": 0.7056079506874084, "learning_rate": 8.201158457989825e-05, "loss": 1.6585, "step": 68432 }, { "epoch": 2.28, "grad_norm": 0.6665732264518738, "learning_rate": 8.200440378354978e-05, "loss": 1.6378, "step": 68433 }, { "epoch": 2.28, "grad_norm": 0.6749706864356995, "learning_rate": 8.199722325181574e-05, "loss": 1.6904, "step": 68434 }, { "epoch": 2.28, "grad_norm": 0.6752891540527344, "learning_rate": 8.199004298470502e-05, "loss": 1.7097, "step": 68435 }, { "epoch": 2.28, "grad_norm": 0.6541999578475952, "learning_rate": 8.198286298222618e-05, "loss": 1.6581, "step": 68436 }, { "epoch": 2.28, "grad_norm": 0.6683974266052246, "learning_rate": 8.197568324438812e-05, "loss": 1.7063, "step": 68437 }, { "epoch": 2.28, "grad_norm": 0.6535316705703735, "learning_rate": 8.196850377119946e-05, "loss": 1.6723, "step": 68438 }, { "epoch": 2.28, "grad_norm": 0.6772965788841248, "learning_rate": 8.196132456266881e-05, "loss": 1.6555, "step": 68439 }, { "epoch": 2.28, "grad_norm": 0.6638545393943787, "learning_rate": 8.19541456188051e-05, "loss": 1.6786, "step": 68440 }, { "epoch": 2.28, "grad_norm": 0.6532092690467834, "learning_rate": 8.194696693961681e-05, "loss": 1.6426, "step": 68441 }, { "epoch": 2.28, "grad_norm": 0.8900650143623352, "learning_rate": 8.19397885251129e-05, "loss": 1.7325, "step": 68442 }, { "epoch": 2.28, "grad_norm": 0.6606150269508362, "learning_rate": 8.193261037530191e-05, "loss": 1.7405, "step": 68443 }, { "epoch": 2.28, "grad_norm": 0.6694097518920898, "learning_rate": 8.192543249019256e-05, "loss": 1.6303, "step": 68444 }, { "epoch": 2.28, "grad_norm": 0.6932217478752136, "learning_rate": 8.191825486979366e-05, "loss": 1.7072, "step": 68445 }, { "epoch": 2.28, "grad_norm": 0.6707713603973389, "learning_rate": 8.191107751411376e-05, "loss": 1.7093, "step": 68446 }, { "epoch": 2.28, "grad_norm": 0.6518877744674683, "learning_rate": 8.190390042316178e-05, "loss": 1.6583, "step": 68447 }, { "epoch": 2.28, "grad_norm": 0.6536567807197571, "learning_rate": 8.189672359694627e-05, "loss": 1.6826, "step": 68448 }, { "epoch": 2.28, "grad_norm": 0.643582820892334, "learning_rate": 8.188954703547608e-05, "loss": 1.7055, "step": 68449 }, { "epoch": 2.28, "grad_norm": 1.2058271169662476, "learning_rate": 8.188237073875984e-05, "loss": 1.6461, "step": 68450 }, { "epoch": 2.28, "grad_norm": 0.6775113344192505, "learning_rate": 8.187519470680615e-05, "loss": 1.7022, "step": 68451 }, { "epoch": 2.28, "grad_norm": 0.6642634868621826, "learning_rate": 8.186801893962395e-05, "loss": 1.7724, "step": 68452 }, { "epoch": 2.28, "grad_norm": 0.6778433322906494, "learning_rate": 8.186084343722174e-05, "loss": 1.7188, "step": 68453 }, { "epoch": 2.28, "grad_norm": 0.6645260453224182, "learning_rate": 8.18536681996084e-05, "loss": 1.7603, "step": 68454 }, { "epoch": 2.28, "grad_norm": 0.6456133127212524, "learning_rate": 8.184649322679249e-05, "loss": 1.6713, "step": 68455 }, { "epoch": 2.28, "grad_norm": 0.6572784781455994, "learning_rate": 8.183931851878288e-05, "loss": 1.6987, "step": 68456 }, { "epoch": 2.28, "grad_norm": 0.6517999768257141, "learning_rate": 8.183214407558821e-05, "loss": 1.6948, "step": 68457 }, { "epoch": 2.28, "grad_norm": 0.674419641494751, "learning_rate": 8.182496989721705e-05, "loss": 1.6928, "step": 68458 }, { "epoch": 2.28, "grad_norm": 0.6605576872825623, "learning_rate": 8.181779598367832e-05, "loss": 1.6241, "step": 68459 }, { "epoch": 2.28, "grad_norm": 0.6644905805587769, "learning_rate": 8.181062233498066e-05, "loss": 1.6568, "step": 68460 }, { "epoch": 2.28, "grad_norm": 0.6639658212661743, "learning_rate": 8.180344895113266e-05, "loss": 1.6957, "step": 68461 }, { "epoch": 2.28, "grad_norm": 0.6313356161117554, "learning_rate": 8.179627583214314e-05, "loss": 1.6278, "step": 68462 }, { "epoch": 2.28, "grad_norm": 0.6762924790382385, "learning_rate": 8.178910297802084e-05, "loss": 1.6842, "step": 68463 }, { "epoch": 2.28, "grad_norm": 0.6757323145866394, "learning_rate": 8.178193038877444e-05, "loss": 1.7141, "step": 68464 }, { "epoch": 2.28, "grad_norm": 0.6566967368125916, "learning_rate": 8.177475806441255e-05, "loss": 1.6433, "step": 68465 }, { "epoch": 2.28, "grad_norm": 0.6641429662704468, "learning_rate": 8.176758600494402e-05, "loss": 1.6619, "step": 68466 }, { "epoch": 2.28, "grad_norm": 0.6649459600448608, "learning_rate": 8.176041421037748e-05, "loss": 1.7206, "step": 68467 }, { "epoch": 2.28, "grad_norm": 0.652890682220459, "learning_rate": 8.175324268072155e-05, "loss": 1.6331, "step": 68468 }, { "epoch": 2.28, "grad_norm": 0.6702255606651306, "learning_rate": 8.174607141598505e-05, "loss": 1.7028, "step": 68469 }, { "epoch": 2.28, "grad_norm": 0.6883994340896606, "learning_rate": 8.173890041617682e-05, "loss": 1.7298, "step": 68470 }, { "epoch": 2.28, "grad_norm": 0.6913949251174927, "learning_rate": 8.173172968130524e-05, "loss": 1.7266, "step": 68471 }, { "epoch": 2.28, "grad_norm": 0.6816820502281189, "learning_rate": 8.172455921137918e-05, "loss": 1.7415, "step": 68472 }, { "epoch": 2.28, "grad_norm": 0.6708110570907593, "learning_rate": 8.171738900640741e-05, "loss": 1.7685, "step": 68473 }, { "epoch": 2.28, "grad_norm": 0.6641024351119995, "learning_rate": 8.171021906639861e-05, "loss": 1.7008, "step": 68474 }, { "epoch": 2.28, "grad_norm": 0.6712726354598999, "learning_rate": 8.170304939136131e-05, "loss": 1.7404, "step": 68475 }, { "epoch": 2.28, "grad_norm": 0.6718147993087769, "learning_rate": 8.169587998130435e-05, "loss": 1.6341, "step": 68476 }, { "epoch": 2.28, "grad_norm": 0.6638949513435364, "learning_rate": 8.168871083623661e-05, "loss": 1.625, "step": 68477 }, { "epoch": 2.28, "grad_norm": 0.6613550186157227, "learning_rate": 8.168154195616645e-05, "loss": 1.7441, "step": 68478 }, { "epoch": 2.28, "grad_norm": 0.695982038974762, "learning_rate": 8.167437334110272e-05, "loss": 1.6629, "step": 68479 }, { "epoch": 2.28, "grad_norm": 0.6470503211021423, "learning_rate": 8.16672049910542e-05, "loss": 1.6933, "step": 68480 }, { "epoch": 2.28, "grad_norm": 0.6622291803359985, "learning_rate": 8.166003690602953e-05, "loss": 1.7265, "step": 68481 }, { "epoch": 2.28, "grad_norm": 0.6688748002052307, "learning_rate": 8.165286908603735e-05, "loss": 1.6784, "step": 68482 }, { "epoch": 2.28, "grad_norm": 0.6656200289726257, "learning_rate": 8.164570153108635e-05, "loss": 1.6978, "step": 68483 }, { "epoch": 2.28, "grad_norm": 0.6923982501029968, "learning_rate": 8.163853424118554e-05, "loss": 1.7611, "step": 68484 }, { "epoch": 2.28, "grad_norm": 0.6868564486503601, "learning_rate": 8.163136721634317e-05, "loss": 1.6341, "step": 68485 }, { "epoch": 2.28, "grad_norm": 0.6536439061164856, "learning_rate": 8.162420045656814e-05, "loss": 1.66, "step": 68486 }, { "epoch": 2.28, "grad_norm": 0.669100821018219, "learning_rate": 8.161703396186922e-05, "loss": 1.6408, "step": 68487 }, { "epoch": 2.28, "grad_norm": 0.6416935324668884, "learning_rate": 8.160986773225508e-05, "loss": 1.6582, "step": 68488 }, { "epoch": 2.28, "grad_norm": 0.6809805035591125, "learning_rate": 8.16027017677343e-05, "loss": 1.7506, "step": 68489 }, { "epoch": 2.28, "grad_norm": 0.6638930439949036, "learning_rate": 8.159553606831573e-05, "loss": 1.6535, "step": 68490 }, { "epoch": 2.28, "grad_norm": 0.6671826839447021, "learning_rate": 8.1588370634008e-05, "loss": 1.6616, "step": 68491 }, { "epoch": 2.28, "grad_norm": 0.6730926632881165, "learning_rate": 8.15812054648197e-05, "loss": 1.5922, "step": 68492 }, { "epoch": 2.28, "grad_norm": 0.644070029258728, "learning_rate": 8.157404056075968e-05, "loss": 1.751, "step": 68493 }, { "epoch": 2.28, "grad_norm": 0.6486333608627319, "learning_rate": 8.156687592183663e-05, "loss": 1.7064, "step": 68494 }, { "epoch": 2.28, "grad_norm": 0.6741830706596375, "learning_rate": 8.155971154805924e-05, "loss": 1.684, "step": 68495 }, { "epoch": 2.28, "grad_norm": 0.6586658358573914, "learning_rate": 8.155254743943606e-05, "loss": 1.6863, "step": 68496 }, { "epoch": 2.28, "grad_norm": 0.6861704587936401, "learning_rate": 8.154538359597602e-05, "loss": 1.6209, "step": 68497 }, { "epoch": 2.28, "grad_norm": 0.6914258003234863, "learning_rate": 8.15382200176877e-05, "loss": 1.7075, "step": 68498 }, { "epoch": 2.28, "grad_norm": 0.66750168800354, "learning_rate": 8.153105670457967e-05, "loss": 1.6278, "step": 68499 }, { "epoch": 2.28, "grad_norm": 0.6685692667961121, "learning_rate": 8.152389365666086e-05, "loss": 1.6929, "step": 68500 }, { "epoch": 2.28, "grad_norm": 0.6494131684303284, "learning_rate": 8.151673087393975e-05, "loss": 1.7129, "step": 68501 }, { "epoch": 2.28, "grad_norm": 0.6760596632957458, "learning_rate": 8.150956835642523e-05, "loss": 1.7592, "step": 68502 }, { "epoch": 2.28, "grad_norm": 0.6549782156944275, "learning_rate": 8.150240610412581e-05, "loss": 1.6876, "step": 68503 }, { "epoch": 2.28, "grad_norm": 0.6494491100311279, "learning_rate": 8.149524411705039e-05, "loss": 1.7571, "step": 68504 }, { "epoch": 2.28, "grad_norm": 0.6801132559776306, "learning_rate": 8.148808239520752e-05, "loss": 1.7609, "step": 68505 }, { "epoch": 2.28, "grad_norm": 0.6962268352508545, "learning_rate": 8.148092093860585e-05, "loss": 1.6843, "step": 68506 }, { "epoch": 2.28, "grad_norm": 0.6617067456245422, "learning_rate": 8.147375974725421e-05, "loss": 1.615, "step": 68507 }, { "epoch": 2.28, "grad_norm": 0.668995201587677, "learning_rate": 8.146659882116115e-05, "loss": 1.7065, "step": 68508 }, { "epoch": 2.28, "grad_norm": 0.6605190634727478, "learning_rate": 8.145943816033552e-05, "loss": 1.6927, "step": 68509 }, { "epoch": 2.28, "grad_norm": 0.6682078242301941, "learning_rate": 8.145227776478596e-05, "loss": 1.7276, "step": 68510 }, { "epoch": 2.28, "grad_norm": 0.6753337383270264, "learning_rate": 8.144511763452101e-05, "loss": 1.6173, "step": 68511 }, { "epoch": 2.28, "grad_norm": 0.6814128756523132, "learning_rate": 8.14379577695496e-05, "loss": 1.7126, "step": 68512 }, { "epoch": 2.28, "grad_norm": 0.655529797077179, "learning_rate": 8.14307981698802e-05, "loss": 1.6686, "step": 68513 }, { "epoch": 2.28, "grad_norm": 0.7120715975761414, "learning_rate": 8.142363883552172e-05, "loss": 1.7657, "step": 68514 }, { "epoch": 2.28, "grad_norm": 0.6577312350273132, "learning_rate": 8.14164797664826e-05, "loss": 1.7361, "step": 68515 }, { "epoch": 2.28, "grad_norm": 0.6581096053123474, "learning_rate": 8.14093209627718e-05, "loss": 1.6078, "step": 68516 }, { "epoch": 2.28, "grad_norm": 0.660253643989563, "learning_rate": 8.140216242439787e-05, "loss": 1.7105, "step": 68517 }, { "epoch": 2.28, "grad_norm": 0.7721772193908691, "learning_rate": 8.139500415136941e-05, "loss": 1.7389, "step": 68518 }, { "epoch": 2.28, "grad_norm": 0.6368515491485596, "learning_rate": 8.138784614369528e-05, "loss": 1.6496, "step": 68519 }, { "epoch": 2.28, "grad_norm": 0.6659452319145203, "learning_rate": 8.138068840138412e-05, "loss": 1.7106, "step": 68520 }, { "epoch": 2.28, "grad_norm": 0.6707718372344971, "learning_rate": 8.137353092444449e-05, "loss": 1.6914, "step": 68521 }, { "epoch": 2.28, "grad_norm": 0.6828243732452393, "learning_rate": 8.136637371288518e-05, "loss": 1.6993, "step": 68522 }, { "epoch": 2.28, "grad_norm": 0.6741027235984802, "learning_rate": 8.135921676671496e-05, "loss": 1.6628, "step": 68523 }, { "epoch": 2.28, "grad_norm": 0.6607577204704285, "learning_rate": 8.135206008594242e-05, "loss": 1.6527, "step": 68524 }, { "epoch": 2.28, "grad_norm": 0.6727169156074524, "learning_rate": 8.134490367057622e-05, "loss": 1.6943, "step": 68525 }, { "epoch": 2.28, "grad_norm": 0.6676621437072754, "learning_rate": 8.133774752062514e-05, "loss": 1.5805, "step": 68526 }, { "epoch": 2.28, "grad_norm": 0.6673310399055481, "learning_rate": 8.133059163609784e-05, "loss": 1.7108, "step": 68527 }, { "epoch": 2.28, "grad_norm": 0.6652416586875916, "learning_rate": 8.132343601700289e-05, "loss": 1.6615, "step": 68528 }, { "epoch": 2.28, "grad_norm": 0.6368184685707092, "learning_rate": 8.131628066334907e-05, "loss": 1.6489, "step": 68529 }, { "epoch": 2.28, "grad_norm": 0.6692124605178833, "learning_rate": 8.130912557514513e-05, "loss": 1.7117, "step": 68530 }, { "epoch": 2.28, "grad_norm": 0.6809502243995667, "learning_rate": 8.13019707523997e-05, "loss": 1.713, "step": 68531 }, { "epoch": 2.28, "grad_norm": 0.6613130569458008, "learning_rate": 8.129481619512139e-05, "loss": 1.6501, "step": 68532 }, { "epoch": 2.28, "grad_norm": 0.6919170022010803, "learning_rate": 8.1287661903319e-05, "loss": 1.7083, "step": 68533 }, { "epoch": 2.28, "grad_norm": 0.6548581123352051, "learning_rate": 8.128050787700119e-05, "loss": 1.6783, "step": 68534 }, { "epoch": 2.28, "grad_norm": 0.6664806604385376, "learning_rate": 8.12733541161765e-05, "loss": 1.7556, "step": 68535 }, { "epoch": 2.28, "grad_norm": 0.6675732135772705, "learning_rate": 8.126620062085377e-05, "loss": 1.7107, "step": 68536 }, { "epoch": 2.28, "grad_norm": 0.6655478477478027, "learning_rate": 8.125904739104168e-05, "loss": 1.7116, "step": 68537 }, { "epoch": 2.28, "grad_norm": 0.6800522804260254, "learning_rate": 8.12518944267489e-05, "loss": 1.768, "step": 68538 }, { "epoch": 2.28, "grad_norm": 0.6784929037094116, "learning_rate": 8.124474172798403e-05, "loss": 1.7557, "step": 68539 }, { "epoch": 2.28, "grad_norm": 0.6413190960884094, "learning_rate": 8.123758929475588e-05, "loss": 1.6656, "step": 68540 }, { "epoch": 2.28, "grad_norm": 0.6551745533943176, "learning_rate": 8.123043712707304e-05, "loss": 1.7333, "step": 68541 }, { "epoch": 2.28, "grad_norm": 0.651019275188446, "learning_rate": 8.122328522494416e-05, "loss": 1.6584, "step": 68542 }, { "epoch": 2.28, "grad_norm": 0.6660894751548767, "learning_rate": 8.121613358837798e-05, "loss": 1.6752, "step": 68543 }, { "epoch": 2.28, "grad_norm": 0.6614242792129517, "learning_rate": 8.120898221738322e-05, "loss": 1.7188, "step": 68544 }, { "epoch": 2.28, "grad_norm": 0.6654122471809387, "learning_rate": 8.120183111196856e-05, "loss": 1.6894, "step": 68545 }, { "epoch": 2.28, "grad_norm": 0.681169867515564, "learning_rate": 8.119468027214255e-05, "loss": 1.666, "step": 68546 }, { "epoch": 2.28, "grad_norm": 0.6574280261993408, "learning_rate": 8.118752969791402e-05, "loss": 1.6253, "step": 68547 }, { "epoch": 2.28, "grad_norm": 0.6794100403785706, "learning_rate": 8.118037938929162e-05, "loss": 1.7443, "step": 68548 }, { "epoch": 2.28, "grad_norm": 0.6477599143981934, "learning_rate": 8.11732293462839e-05, "loss": 1.6981, "step": 68549 }, { "epoch": 2.28, "grad_norm": 0.6518650054931641, "learning_rate": 8.116607956889969e-05, "loss": 1.6959, "step": 68550 }, { "epoch": 2.28, "grad_norm": 0.7224121689796448, "learning_rate": 8.115893005714756e-05, "loss": 1.7552, "step": 68551 }, { "epoch": 2.28, "grad_norm": 0.6720744967460632, "learning_rate": 8.115178081103635e-05, "loss": 1.6896, "step": 68552 }, { "epoch": 2.28, "grad_norm": 0.6688198447227478, "learning_rate": 8.114463183057451e-05, "loss": 1.6679, "step": 68553 }, { "epoch": 2.28, "grad_norm": 0.6670268774032593, "learning_rate": 8.113748311577095e-05, "loss": 1.7038, "step": 68554 }, { "epoch": 2.28, "grad_norm": 0.6825261116027832, "learning_rate": 8.113033466663423e-05, "loss": 1.6391, "step": 68555 }, { "epoch": 2.28, "grad_norm": 0.6814910173416138, "learning_rate": 8.112318648317294e-05, "loss": 1.7844, "step": 68556 }, { "epoch": 2.28, "grad_norm": 0.6863689422607422, "learning_rate": 8.111603856539594e-05, "loss": 1.7362, "step": 68557 }, { "epoch": 2.28, "grad_norm": 0.6559017896652222, "learning_rate": 8.110889091331174e-05, "loss": 1.6665, "step": 68558 }, { "epoch": 2.28, "grad_norm": 0.6827260255813599, "learning_rate": 8.110174352692915e-05, "loss": 1.6664, "step": 68559 }, { "epoch": 2.28, "grad_norm": 0.6585990190505981, "learning_rate": 8.109459640625683e-05, "loss": 1.6476, "step": 68560 }, { "epoch": 2.28, "grad_norm": 0.6568198800086975, "learning_rate": 8.108744955130332e-05, "loss": 1.7469, "step": 68561 }, { "epoch": 2.28, "grad_norm": 0.6507607102394104, "learning_rate": 8.108030296207748e-05, "loss": 1.6083, "step": 68562 }, { "epoch": 2.28, "grad_norm": 0.6720832586288452, "learning_rate": 8.107315663858778e-05, "loss": 1.7721, "step": 68563 }, { "epoch": 2.28, "grad_norm": 0.6834905743598938, "learning_rate": 8.106601058084313e-05, "loss": 1.7142, "step": 68564 }, { "epoch": 2.28, "grad_norm": 0.6860237717628479, "learning_rate": 8.105886478885197e-05, "loss": 1.7325, "step": 68565 }, { "epoch": 2.28, "grad_norm": 0.6619342565536499, "learning_rate": 8.105171926262322e-05, "loss": 1.7814, "step": 68566 }, { "epoch": 2.28, "grad_norm": 0.6922250986099243, "learning_rate": 8.104457400216538e-05, "loss": 1.7384, "step": 68567 }, { "epoch": 2.28, "grad_norm": 0.6471533179283142, "learning_rate": 8.10374290074871e-05, "loss": 1.6408, "step": 68568 }, { "epoch": 2.28, "grad_norm": 0.6677718162536621, "learning_rate": 8.103028427859719e-05, "loss": 1.6976, "step": 68569 }, { "epoch": 2.28, "grad_norm": 0.6648744940757751, "learning_rate": 8.102313981550419e-05, "loss": 1.6961, "step": 68570 }, { "epoch": 2.28, "grad_norm": 0.6455657482147217, "learning_rate": 8.10159956182169e-05, "loss": 1.679, "step": 68571 }, { "epoch": 2.28, "grad_norm": 0.6498533487319946, "learning_rate": 8.100885168674384e-05, "loss": 1.7089, "step": 68572 }, { "epoch": 2.28, "grad_norm": 0.6538558602333069, "learning_rate": 8.100170802109385e-05, "loss": 1.7636, "step": 68573 }, { "epoch": 2.28, "grad_norm": 0.6506146788597107, "learning_rate": 8.099456462127553e-05, "loss": 1.6913, "step": 68574 }, { "epoch": 2.28, "grad_norm": 0.6441987752914429, "learning_rate": 8.098742148729745e-05, "loss": 1.7021, "step": 68575 }, { "epoch": 2.28, "grad_norm": 0.6788768172264099, "learning_rate": 8.098027861916847e-05, "loss": 1.6858, "step": 68576 }, { "epoch": 2.28, "grad_norm": 0.6511895060539246, "learning_rate": 8.097313601689714e-05, "loss": 1.7777, "step": 68577 }, { "epoch": 2.28, "grad_norm": 0.7233544588088989, "learning_rate": 8.096599368049206e-05, "loss": 1.6783, "step": 68578 }, { "epoch": 2.28, "grad_norm": 0.6613255143165588, "learning_rate": 8.0958851609962e-05, "loss": 1.5975, "step": 68579 }, { "epoch": 2.28, "grad_norm": 0.6743981242179871, "learning_rate": 8.095170980531574e-05, "loss": 1.7163, "step": 68580 }, { "epoch": 2.28, "grad_norm": 0.6396211981773376, "learning_rate": 8.094456826656177e-05, "loss": 1.6668, "step": 68581 }, { "epoch": 2.28, "grad_norm": 0.6771194934844971, "learning_rate": 8.093742699370877e-05, "loss": 1.7443, "step": 68582 }, { "epoch": 2.28, "grad_norm": 0.6943303346633911, "learning_rate": 8.09302859867655e-05, "loss": 1.7398, "step": 68583 }, { "epoch": 2.28, "grad_norm": 0.6944748759269714, "learning_rate": 8.092314524574063e-05, "loss": 1.7262, "step": 68584 }, { "epoch": 2.28, "grad_norm": 0.6474280953407288, "learning_rate": 8.09160047706427e-05, "loss": 1.6921, "step": 68585 }, { "epoch": 2.28, "grad_norm": 0.6581088900566101, "learning_rate": 8.090886456148044e-05, "loss": 1.7096, "step": 68586 }, { "epoch": 2.28, "grad_norm": 0.6706995368003845, "learning_rate": 8.090172461826271e-05, "loss": 1.6939, "step": 68587 }, { "epoch": 2.28, "grad_norm": 0.6643849015235901, "learning_rate": 8.089458494099781e-05, "loss": 1.7129, "step": 68588 }, { "epoch": 2.28, "grad_norm": 0.6444917321205139, "learning_rate": 8.088744552969463e-05, "loss": 1.6783, "step": 68589 }, { "epoch": 2.28, "grad_norm": 0.683428168296814, "learning_rate": 8.088030638436189e-05, "loss": 1.7293, "step": 68590 }, { "epoch": 2.28, "grad_norm": 0.6540361642837524, "learning_rate": 8.087316750500816e-05, "loss": 1.6104, "step": 68591 }, { "epoch": 2.28, "grad_norm": 0.6616336107254028, "learning_rate": 8.086602889164203e-05, "loss": 1.6411, "step": 68592 }, { "epoch": 2.28, "grad_norm": 0.6738890409469604, "learning_rate": 8.085889054427225e-05, "loss": 1.7029, "step": 68593 }, { "epoch": 2.28, "grad_norm": 0.6535592079162598, "learning_rate": 8.08517524629077e-05, "loss": 1.7404, "step": 68594 }, { "epoch": 2.28, "grad_norm": 0.7496111989021301, "learning_rate": 8.084461464755662e-05, "loss": 1.7805, "step": 68595 }, { "epoch": 2.28, "grad_norm": 0.6958768963813782, "learning_rate": 8.08374770982279e-05, "loss": 1.7104, "step": 68596 }, { "epoch": 2.28, "grad_norm": 0.6669980883598328, "learning_rate": 8.083033981493027e-05, "loss": 1.7239, "step": 68597 }, { "epoch": 2.28, "grad_norm": 0.6697171330451965, "learning_rate": 8.082320279767233e-05, "loss": 1.6864, "step": 68598 }, { "epoch": 2.28, "grad_norm": 0.6613072752952576, "learning_rate": 8.081606604646263e-05, "loss": 1.7147, "step": 68599 }, { "epoch": 2.28, "grad_norm": 0.6582798957824707, "learning_rate": 8.080892956130993e-05, "loss": 1.6419, "step": 68600 }, { "epoch": 2.28, "grad_norm": 0.6587552428245544, "learning_rate": 8.080179334222308e-05, "loss": 1.7034, "step": 68601 }, { "epoch": 2.28, "grad_norm": 0.6792221665382385, "learning_rate": 8.079465738921036e-05, "loss": 1.653, "step": 68602 }, { "epoch": 2.28, "grad_norm": 0.6719014644622803, "learning_rate": 8.078752170228064e-05, "loss": 1.6652, "step": 68603 }, { "epoch": 2.28, "grad_norm": 0.6690443158149719, "learning_rate": 8.078038628144266e-05, "loss": 1.6399, "step": 68604 }, { "epoch": 2.28, "grad_norm": 0.669904887676239, "learning_rate": 8.077325112670497e-05, "loss": 1.7354, "step": 68605 }, { "epoch": 2.28, "grad_norm": 0.6768159866333008, "learning_rate": 8.076611623807619e-05, "loss": 1.7371, "step": 68606 }, { "epoch": 2.28, "grad_norm": 0.6475257873535156, "learning_rate": 8.07589816155651e-05, "loss": 1.6791, "step": 68607 }, { "epoch": 2.28, "grad_norm": 0.6632171273231506, "learning_rate": 8.075184725918035e-05, "loss": 1.6346, "step": 68608 }, { "epoch": 2.28, "grad_norm": 0.6567333340644836, "learning_rate": 8.074471316893043e-05, "loss": 1.6883, "step": 68609 }, { "epoch": 2.28, "grad_norm": 0.678359866142273, "learning_rate": 8.073757934482412e-05, "loss": 1.6235, "step": 68610 }, { "epoch": 2.28, "grad_norm": 0.6699395179748535, "learning_rate": 8.073044578687018e-05, "loss": 1.6855, "step": 68611 }, { "epoch": 2.28, "grad_norm": 0.6645106077194214, "learning_rate": 8.072331249507716e-05, "loss": 1.7139, "step": 68612 }, { "epoch": 2.28, "grad_norm": 0.6489744186401367, "learning_rate": 8.071617946945367e-05, "loss": 1.6654, "step": 68613 }, { "epoch": 2.28, "grad_norm": 0.6611147522926331, "learning_rate": 8.070904671000849e-05, "loss": 1.6421, "step": 68614 }, { "epoch": 2.28, "grad_norm": 0.6481674909591675, "learning_rate": 8.070191421675021e-05, "loss": 1.7305, "step": 68615 }, { "epoch": 2.28, "grad_norm": 0.6741325855255127, "learning_rate": 8.069478198968742e-05, "loss": 1.6996, "step": 68616 }, { "epoch": 2.28, "grad_norm": 0.6920178532600403, "learning_rate": 8.068765002882895e-05, "loss": 1.7357, "step": 68617 }, { "epoch": 2.28, "grad_norm": 0.6643650531768799, "learning_rate": 8.068051833418324e-05, "loss": 1.6438, "step": 68618 }, { "epoch": 2.28, "grad_norm": 0.6619073748588562, "learning_rate": 8.067338690575919e-05, "loss": 1.6507, "step": 68619 }, { "epoch": 2.28, "grad_norm": 0.6729556918144226, "learning_rate": 8.066625574356523e-05, "loss": 1.7954, "step": 68620 }, { "epoch": 2.28, "grad_norm": 0.6779742240905762, "learning_rate": 8.065912484761021e-05, "loss": 1.6392, "step": 68621 }, { "epoch": 2.28, "grad_norm": 0.662547767162323, "learning_rate": 8.065199421790268e-05, "loss": 1.7192, "step": 68622 }, { "epoch": 2.28, "grad_norm": 0.6855168342590332, "learning_rate": 8.064486385445124e-05, "loss": 1.6313, "step": 68623 }, { "epoch": 2.28, "grad_norm": 0.665753960609436, "learning_rate": 8.063773375726467e-05, "loss": 1.7186, "step": 68624 }, { "epoch": 2.28, "grad_norm": 0.6713811755180359, "learning_rate": 8.06306039263515e-05, "loss": 1.7774, "step": 68625 }, { "epoch": 2.28, "grad_norm": 0.6993029713630676, "learning_rate": 8.062347436172056e-05, "loss": 1.7087, "step": 68626 }, { "epoch": 2.28, "grad_norm": 0.6495392322540283, "learning_rate": 8.061634506338038e-05, "loss": 1.633, "step": 68627 }, { "epoch": 2.28, "grad_norm": 0.6609627604484558, "learning_rate": 8.060921603133955e-05, "loss": 1.7087, "step": 68628 }, { "epoch": 2.28, "grad_norm": 0.6661223769187927, "learning_rate": 8.06020872656069e-05, "loss": 1.7626, "step": 68629 }, { "epoch": 2.28, "grad_norm": 0.6654872894287109, "learning_rate": 8.059495876619088e-05, "loss": 1.672, "step": 68630 }, { "epoch": 2.28, "grad_norm": 0.6659023761749268, "learning_rate": 8.058783053310035e-05, "loss": 1.6786, "step": 68631 }, { "epoch": 2.28, "grad_norm": 0.6655197143554688, "learning_rate": 8.058070256634378e-05, "loss": 1.7528, "step": 68632 }, { "epoch": 2.28, "grad_norm": 0.6496713161468506, "learning_rate": 8.057357486593002e-05, "loss": 1.744, "step": 68633 }, { "epoch": 2.28, "grad_norm": 0.6644665002822876, "learning_rate": 8.056644743186757e-05, "loss": 1.6712, "step": 68634 }, { "epoch": 2.28, "grad_norm": 0.6507882475852966, "learning_rate": 8.055932026416507e-05, "loss": 1.6681, "step": 68635 }, { "epoch": 2.28, "grad_norm": 0.6578561067581177, "learning_rate": 8.055219336283128e-05, "loss": 1.7184, "step": 68636 }, { "epoch": 2.28, "grad_norm": 0.6742339730262756, "learning_rate": 8.054506672787481e-05, "loss": 1.8038, "step": 68637 }, { "epoch": 2.28, "grad_norm": 0.6659455299377441, "learning_rate": 8.053794035930418e-05, "loss": 1.6898, "step": 68638 }, { "epoch": 2.28, "grad_norm": 0.6779133677482605, "learning_rate": 8.053081425712817e-05, "loss": 1.7226, "step": 68639 }, { "epoch": 2.28, "grad_norm": 0.6627377867698669, "learning_rate": 8.052368842135549e-05, "loss": 1.7653, "step": 68640 }, { "epoch": 2.28, "grad_norm": 0.6718488335609436, "learning_rate": 8.051656285199473e-05, "loss": 1.7382, "step": 68641 }, { "epoch": 2.28, "grad_norm": 0.6656506657600403, "learning_rate": 8.050943754905444e-05, "loss": 1.7085, "step": 68642 }, { "epoch": 2.28, "grad_norm": 0.7053202986717224, "learning_rate": 8.050231251254344e-05, "loss": 1.7582, "step": 68643 }, { "epoch": 2.28, "grad_norm": 0.6891701221466064, "learning_rate": 8.049518774247029e-05, "loss": 1.7295, "step": 68644 }, { "epoch": 2.28, "grad_norm": 0.6503357291221619, "learning_rate": 8.048806323884354e-05, "loss": 1.6667, "step": 68645 }, { "epoch": 2.28, "grad_norm": 0.658611536026001, "learning_rate": 8.048093900167194e-05, "loss": 1.7168, "step": 68646 }, { "epoch": 2.28, "grad_norm": 0.6619861721992493, "learning_rate": 8.047381503096423e-05, "loss": 1.7086, "step": 68647 }, { "epoch": 2.28, "grad_norm": 0.6723524928092957, "learning_rate": 8.046669132672896e-05, "loss": 1.6804, "step": 68648 }, { "epoch": 2.28, "grad_norm": 0.6700056195259094, "learning_rate": 8.045956788897469e-05, "loss": 1.7593, "step": 68649 }, { "epoch": 2.28, "grad_norm": 0.674889326095581, "learning_rate": 8.045244471771026e-05, "loss": 1.6929, "step": 68650 }, { "epoch": 2.28, "grad_norm": 0.673057496547699, "learning_rate": 8.044532181294419e-05, "loss": 1.7116, "step": 68651 }, { "epoch": 2.28, "grad_norm": 0.6573209166526794, "learning_rate": 8.043819917468509e-05, "loss": 1.7461, "step": 68652 }, { "epoch": 2.28, "grad_norm": 0.6478462219238281, "learning_rate": 8.043107680294164e-05, "loss": 1.6391, "step": 68653 }, { "epoch": 2.28, "grad_norm": 0.6640925407409668, "learning_rate": 8.04239546977226e-05, "loss": 1.7063, "step": 68654 }, { "epoch": 2.28, "grad_norm": 0.6460399031639099, "learning_rate": 8.041683285903654e-05, "loss": 1.6705, "step": 68655 }, { "epoch": 2.28, "grad_norm": 0.6449494957923889, "learning_rate": 8.040971128689197e-05, "loss": 1.7217, "step": 68656 }, { "epoch": 2.28, "grad_norm": 0.6620397567749023, "learning_rate": 8.040258998129779e-05, "loss": 1.6971, "step": 68657 }, { "epoch": 2.28, "grad_norm": 0.6644219160079956, "learning_rate": 8.039546894226248e-05, "loss": 1.6314, "step": 68658 }, { "epoch": 2.28, "grad_norm": 0.6528568863868713, "learning_rate": 8.038834816979465e-05, "loss": 1.7067, "step": 68659 }, { "epoch": 2.28, "grad_norm": 0.6652393937110901, "learning_rate": 8.038122766390296e-05, "loss": 1.7122, "step": 68660 }, { "epoch": 2.28, "grad_norm": 0.6703981161117554, "learning_rate": 8.037410742459625e-05, "loss": 1.6799, "step": 68661 }, { "epoch": 2.28, "grad_norm": 0.6688852906227112, "learning_rate": 8.036698745188297e-05, "loss": 1.737, "step": 68662 }, { "epoch": 2.28, "grad_norm": 0.6910305619239807, "learning_rate": 8.035986774577173e-05, "loss": 1.697, "step": 68663 }, { "epoch": 2.28, "grad_norm": 0.6686617732048035, "learning_rate": 8.035274830627134e-05, "loss": 1.657, "step": 68664 }, { "epoch": 2.28, "grad_norm": 0.6675445437431335, "learning_rate": 8.034562913339035e-05, "loss": 1.7363, "step": 68665 }, { "epoch": 2.28, "grad_norm": 0.6906754970550537, "learning_rate": 8.03385102271373e-05, "loss": 1.723, "step": 68666 }, { "epoch": 2.28, "grad_norm": 0.6601088047027588, "learning_rate": 8.033139158752102e-05, "loss": 1.6512, "step": 68667 }, { "epoch": 2.28, "grad_norm": 0.6930287480354309, "learning_rate": 8.032427321454998e-05, "loss": 1.7608, "step": 68668 }, { "epoch": 2.28, "grad_norm": 0.6694241762161255, "learning_rate": 8.031715510823302e-05, "loss": 1.7203, "step": 68669 }, { "epoch": 2.28, "grad_norm": 0.6518929600715637, "learning_rate": 8.031003726857856e-05, "loss": 1.7025, "step": 68670 }, { "epoch": 2.28, "grad_norm": 0.6706956624984741, "learning_rate": 8.030291969559544e-05, "loss": 1.6671, "step": 68671 }, { "epoch": 2.28, "grad_norm": 0.6558821201324463, "learning_rate": 8.029580238929219e-05, "loss": 1.6903, "step": 68672 }, { "epoch": 2.28, "grad_norm": 0.6757892966270447, "learning_rate": 8.028868534967737e-05, "loss": 1.745, "step": 68673 }, { "epoch": 2.28, "grad_norm": 0.6736860871315002, "learning_rate": 8.02815685767598e-05, "loss": 1.7594, "step": 68674 }, { "epoch": 2.28, "grad_norm": 0.6625013947486877, "learning_rate": 8.027445207054797e-05, "loss": 1.7481, "step": 68675 }, { "epoch": 2.28, "grad_norm": 0.6595113277435303, "learning_rate": 8.026733583105063e-05, "loss": 1.6363, "step": 68676 }, { "epoch": 2.28, "grad_norm": 0.6431320905685425, "learning_rate": 8.02602198582764e-05, "loss": 1.6843, "step": 68677 }, { "epoch": 2.28, "grad_norm": 0.6658888459205627, "learning_rate": 8.02531041522338e-05, "loss": 1.653, "step": 68678 }, { "epoch": 2.28, "grad_norm": 0.6311323642730713, "learning_rate": 8.024598871293161e-05, "loss": 1.6778, "step": 68679 }, { "epoch": 2.28, "grad_norm": 0.6472885012626648, "learning_rate": 8.023887354037836e-05, "loss": 1.671, "step": 68680 }, { "epoch": 2.29, "grad_norm": 0.6484357714653015, "learning_rate": 8.02317586345828e-05, "loss": 1.6539, "step": 68681 }, { "epoch": 2.29, "grad_norm": 0.673835813999176, "learning_rate": 8.022464399555343e-05, "loss": 1.7374, "step": 68682 }, { "epoch": 2.29, "grad_norm": 0.6887497901916504, "learning_rate": 8.021752962329904e-05, "loss": 1.6491, "step": 68683 }, { "epoch": 2.29, "grad_norm": 0.6619551777839661, "learning_rate": 8.02104155178282e-05, "loss": 1.7367, "step": 68684 }, { "epoch": 2.29, "grad_norm": 0.6791368126869202, "learning_rate": 8.02033016791494e-05, "loss": 1.743, "step": 68685 }, { "epoch": 2.29, "grad_norm": 0.6851757764816284, "learning_rate": 8.019618810727156e-05, "loss": 1.6997, "step": 68686 }, { "epoch": 2.29, "grad_norm": 0.6518288850784302, "learning_rate": 8.018907480220312e-05, "loss": 1.6581, "step": 68687 }, { "epoch": 2.29, "grad_norm": 0.6800130009651184, "learning_rate": 8.018196176395267e-05, "loss": 1.6809, "step": 68688 }, { "epoch": 2.29, "grad_norm": 0.669564962387085, "learning_rate": 8.017484899252893e-05, "loss": 1.6725, "step": 68689 }, { "epoch": 2.29, "grad_norm": 0.6619601845741272, "learning_rate": 8.01677364879406e-05, "loss": 1.6729, "step": 68690 }, { "epoch": 2.29, "grad_norm": 0.6843238472938538, "learning_rate": 8.01606242501963e-05, "loss": 1.666, "step": 68691 }, { "epoch": 2.29, "grad_norm": 0.6622939109802246, "learning_rate": 8.015351227930448e-05, "loss": 1.6648, "step": 68692 }, { "epoch": 2.29, "grad_norm": 0.6525432467460632, "learning_rate": 8.014640057527402e-05, "loss": 1.7351, "step": 68693 }, { "epoch": 2.29, "grad_norm": 0.6615538001060486, "learning_rate": 8.013928913811341e-05, "loss": 1.694, "step": 68694 }, { "epoch": 2.29, "grad_norm": 0.6761714816093445, "learning_rate": 8.013217796783121e-05, "loss": 1.7611, "step": 68695 }, { "epoch": 2.29, "grad_norm": 0.6737644672393799, "learning_rate": 8.01250670644362e-05, "loss": 1.7124, "step": 68696 }, { "epoch": 2.29, "grad_norm": 0.6758472919464111, "learning_rate": 8.011795642793703e-05, "loss": 1.7188, "step": 68697 }, { "epoch": 2.29, "grad_norm": 0.6371036171913147, "learning_rate": 8.011084605834224e-05, "loss": 1.6, "step": 68698 }, { "epoch": 2.29, "grad_norm": 0.6634746193885803, "learning_rate": 8.01037359556604e-05, "loss": 1.659, "step": 68699 }, { "epoch": 2.29, "grad_norm": 0.6483270525932312, "learning_rate": 8.009662611990033e-05, "loss": 1.6689, "step": 68700 }, { "epoch": 2.29, "grad_norm": 0.650130569934845, "learning_rate": 8.008951655107054e-05, "loss": 1.6271, "step": 68701 }, { "epoch": 2.29, "grad_norm": 0.6427738070487976, "learning_rate": 8.00824072491796e-05, "loss": 1.6913, "step": 68702 }, { "epoch": 2.29, "grad_norm": 0.6703851819038391, "learning_rate": 8.00752982142362e-05, "loss": 1.7282, "step": 68703 }, { "epoch": 2.29, "grad_norm": 0.6877861618995667, "learning_rate": 8.006818944624918e-05, "loss": 1.6808, "step": 68704 }, { "epoch": 2.29, "grad_norm": 0.6663175821304321, "learning_rate": 8.006108094522676e-05, "loss": 1.6143, "step": 68705 }, { "epoch": 2.29, "grad_norm": 0.6792553067207336, "learning_rate": 8.005397271117783e-05, "loss": 1.7043, "step": 68706 }, { "epoch": 2.29, "grad_norm": 0.6405026912689209, "learning_rate": 8.004686474411103e-05, "loss": 1.5805, "step": 68707 }, { "epoch": 2.29, "grad_norm": 0.6757012009620667, "learning_rate": 8.003975704403492e-05, "loss": 1.7817, "step": 68708 }, { "epoch": 2.29, "grad_norm": 0.672410786151886, "learning_rate": 8.003264961095804e-05, "loss": 1.7274, "step": 68709 }, { "epoch": 2.29, "grad_norm": 0.6820260286331177, "learning_rate": 8.002554244488914e-05, "loss": 1.7275, "step": 68710 }, { "epoch": 2.29, "grad_norm": 0.6770475506782532, "learning_rate": 8.001843554583699e-05, "loss": 1.7174, "step": 68711 }, { "epoch": 2.29, "grad_norm": 0.6799195408821106, "learning_rate": 8.001132891380987e-05, "loss": 1.7158, "step": 68712 }, { "epoch": 2.29, "grad_norm": 0.6753739714622498, "learning_rate": 8.000422254881658e-05, "loss": 1.778, "step": 68713 }, { "epoch": 2.29, "grad_norm": 0.6892753839492798, "learning_rate": 7.999711645086584e-05, "loss": 1.7234, "step": 68714 }, { "epoch": 2.29, "grad_norm": 0.6677719354629517, "learning_rate": 7.999001061996618e-05, "loss": 1.6889, "step": 68715 }, { "epoch": 2.29, "grad_norm": 0.6452611684799194, "learning_rate": 7.998290505612613e-05, "loss": 1.5944, "step": 68716 }, { "epoch": 2.29, "grad_norm": 0.6576154828071594, "learning_rate": 7.997579975935442e-05, "loss": 1.6547, "step": 68717 }, { "epoch": 2.29, "grad_norm": 0.667456328868866, "learning_rate": 7.996869472965987e-05, "loss": 1.6433, "step": 68718 }, { "epoch": 2.29, "grad_norm": 0.6477304697036743, "learning_rate": 7.996158996705069e-05, "loss": 1.731, "step": 68719 }, { "epoch": 2.29, "grad_norm": 0.6707839965820312, "learning_rate": 7.995448547153575e-05, "loss": 1.689, "step": 68720 }, { "epoch": 2.29, "grad_norm": 0.6451644897460938, "learning_rate": 7.994738124312373e-05, "loss": 1.6944, "step": 68721 }, { "epoch": 2.29, "grad_norm": 0.6782204508781433, "learning_rate": 7.994027728182315e-05, "loss": 1.6869, "step": 68722 }, { "epoch": 2.29, "grad_norm": 0.6449088454246521, "learning_rate": 7.993317358764256e-05, "loss": 1.7245, "step": 68723 }, { "epoch": 2.29, "grad_norm": 0.6563904881477356, "learning_rate": 7.992607016059079e-05, "loss": 1.6977, "step": 68724 }, { "epoch": 2.29, "grad_norm": 0.6661878228187561, "learning_rate": 7.99189670006763e-05, "loss": 1.6562, "step": 68725 }, { "epoch": 2.29, "grad_norm": 0.650214433670044, "learning_rate": 7.991186410790772e-05, "loss": 1.6518, "step": 68726 }, { "epoch": 2.29, "grad_norm": 0.6692284941673279, "learning_rate": 7.990476148229367e-05, "loss": 1.7114, "step": 68727 }, { "epoch": 2.29, "grad_norm": 0.6561877727508545, "learning_rate": 7.98976591238429e-05, "loss": 1.6721, "step": 68728 }, { "epoch": 2.29, "grad_norm": 0.6624100208282471, "learning_rate": 7.989055703256396e-05, "loss": 1.7439, "step": 68729 }, { "epoch": 2.29, "grad_norm": 0.6647828221321106, "learning_rate": 7.988345520846533e-05, "loss": 1.6912, "step": 68730 }, { "epoch": 2.29, "grad_norm": 0.6786960363388062, "learning_rate": 7.987635365155587e-05, "loss": 1.6762, "step": 68731 }, { "epoch": 2.29, "grad_norm": 0.669369101524353, "learning_rate": 7.986925236184407e-05, "loss": 1.6424, "step": 68732 }, { "epoch": 2.29, "grad_norm": 0.675432562828064, "learning_rate": 7.98621513393385e-05, "loss": 1.6913, "step": 68733 }, { "epoch": 2.29, "grad_norm": 0.6602277159690857, "learning_rate": 7.985505058404791e-05, "loss": 1.6217, "step": 68734 }, { "epoch": 2.29, "grad_norm": 0.6517259478569031, "learning_rate": 7.984795009598077e-05, "loss": 1.7096, "step": 68735 }, { "epoch": 2.29, "grad_norm": 0.6440019011497498, "learning_rate": 7.984084987514585e-05, "loss": 1.6848, "step": 68736 }, { "epoch": 2.29, "grad_norm": 0.6620466709136963, "learning_rate": 7.983374992155163e-05, "loss": 1.7301, "step": 68737 }, { "epoch": 2.29, "grad_norm": 0.6600611805915833, "learning_rate": 7.982665023520688e-05, "loss": 1.6848, "step": 68738 }, { "epoch": 2.29, "grad_norm": 0.6519050002098083, "learning_rate": 7.981955081612013e-05, "loss": 1.6613, "step": 68739 }, { "epoch": 2.29, "grad_norm": 0.6741927266120911, "learning_rate": 7.981245166429994e-05, "loss": 1.7129, "step": 68740 }, { "epoch": 2.29, "grad_norm": 0.6920887231826782, "learning_rate": 7.980535277975507e-05, "loss": 1.6899, "step": 68741 }, { "epoch": 2.29, "grad_norm": 0.6717084646224976, "learning_rate": 7.979825416249397e-05, "loss": 1.7193, "step": 68742 }, { "epoch": 2.29, "grad_norm": 0.6777190566062927, "learning_rate": 7.979115581252542e-05, "loss": 1.7223, "step": 68743 }, { "epoch": 2.29, "grad_norm": 0.6624969244003296, "learning_rate": 7.978405772985796e-05, "loss": 1.6596, "step": 68744 }, { "epoch": 2.29, "grad_norm": 0.6602094173431396, "learning_rate": 7.977695991450013e-05, "loss": 1.6632, "step": 68745 }, { "epoch": 2.29, "grad_norm": 0.6557979583740234, "learning_rate": 7.97698623664607e-05, "loss": 1.668, "step": 68746 }, { "epoch": 2.29, "grad_norm": 0.658598780632019, "learning_rate": 7.97627650857481e-05, "loss": 1.7488, "step": 68747 }, { "epoch": 2.29, "grad_norm": 0.6768956184387207, "learning_rate": 7.975566807237116e-05, "loss": 1.6904, "step": 68748 }, { "epoch": 2.29, "grad_norm": 0.6695831418037415, "learning_rate": 7.974857132633833e-05, "loss": 1.6186, "step": 68749 }, { "epoch": 2.29, "grad_norm": 0.6530405282974243, "learning_rate": 7.974147484765834e-05, "loss": 1.668, "step": 68750 }, { "epoch": 2.29, "grad_norm": 0.7080450057983398, "learning_rate": 7.973437863633975e-05, "loss": 1.772, "step": 68751 }, { "epoch": 2.29, "grad_norm": 0.6741956472396851, "learning_rate": 7.972728269239108e-05, "loss": 1.612, "step": 68752 }, { "epoch": 2.29, "grad_norm": 0.6527479887008667, "learning_rate": 7.97201870158211e-05, "loss": 1.7048, "step": 68753 }, { "epoch": 2.29, "grad_norm": 0.6584790945053101, "learning_rate": 7.97130916066384e-05, "loss": 1.7198, "step": 68754 }, { "epoch": 2.29, "grad_norm": 0.699487566947937, "learning_rate": 7.970599646485141e-05, "loss": 1.6957, "step": 68755 }, { "epoch": 2.29, "grad_norm": 0.6682114601135254, "learning_rate": 7.969890159046892e-05, "loss": 1.6945, "step": 68756 }, { "epoch": 2.29, "grad_norm": 0.717060387134552, "learning_rate": 7.969180698349958e-05, "loss": 1.666, "step": 68757 }, { "epoch": 2.29, "grad_norm": 0.6545818448066711, "learning_rate": 7.968471264395193e-05, "loss": 1.677, "step": 68758 }, { "epoch": 2.29, "grad_norm": 0.6669384837150574, "learning_rate": 7.967761857183448e-05, "loss": 1.6441, "step": 68759 }, { "epoch": 2.29, "grad_norm": 0.6845594048500061, "learning_rate": 7.967052476715604e-05, "loss": 1.668, "step": 68760 }, { "epoch": 2.29, "grad_norm": 0.6706119775772095, "learning_rate": 7.96634312299251e-05, "loss": 1.7647, "step": 68761 }, { "epoch": 2.29, "grad_norm": 0.668738842010498, "learning_rate": 7.965633796015022e-05, "loss": 1.7644, "step": 68762 }, { "epoch": 2.29, "grad_norm": 0.6680020093917847, "learning_rate": 7.964924495784009e-05, "loss": 1.7525, "step": 68763 }, { "epoch": 2.29, "grad_norm": 0.6808536052703857, "learning_rate": 7.964215222300336e-05, "loss": 1.7442, "step": 68764 }, { "epoch": 2.29, "grad_norm": 0.6474676728248596, "learning_rate": 7.963505975564861e-05, "loss": 1.7431, "step": 68765 }, { "epoch": 2.29, "grad_norm": 0.6753844022750854, "learning_rate": 7.962796755578435e-05, "loss": 1.6879, "step": 68766 }, { "epoch": 2.29, "grad_norm": 0.6680313348770142, "learning_rate": 7.962087562341933e-05, "loss": 1.7709, "step": 68767 }, { "epoch": 2.29, "grad_norm": 0.6458974480628967, "learning_rate": 7.96137839585621e-05, "loss": 1.6952, "step": 68768 }, { "epoch": 2.29, "grad_norm": 0.653052806854248, "learning_rate": 7.960669256122119e-05, "loss": 1.6906, "step": 68769 }, { "epoch": 2.29, "grad_norm": 0.6526381969451904, "learning_rate": 7.95996014314053e-05, "loss": 1.6549, "step": 68770 }, { "epoch": 2.29, "grad_norm": 0.6943322420120239, "learning_rate": 7.959251056912308e-05, "loss": 1.6792, "step": 68771 }, { "epoch": 2.29, "grad_norm": 0.7039907574653625, "learning_rate": 7.958541997438309e-05, "loss": 1.6422, "step": 68772 }, { "epoch": 2.29, "grad_norm": 0.680334746837616, "learning_rate": 7.957832964719382e-05, "loss": 1.7094, "step": 68773 }, { "epoch": 2.29, "grad_norm": 0.6921325325965881, "learning_rate": 7.957123958756409e-05, "loss": 1.6923, "step": 68774 }, { "epoch": 2.29, "grad_norm": 0.6615431308746338, "learning_rate": 7.956414979550235e-05, "loss": 1.6594, "step": 68775 }, { "epoch": 2.29, "grad_norm": 0.6726435422897339, "learning_rate": 7.955706027101722e-05, "loss": 1.691, "step": 68776 }, { "epoch": 2.29, "grad_norm": 0.6501391530036926, "learning_rate": 7.954997101411729e-05, "loss": 1.6675, "step": 68777 }, { "epoch": 2.29, "grad_norm": 0.669921338558197, "learning_rate": 7.954288202481132e-05, "loss": 1.71, "step": 68778 }, { "epoch": 2.29, "grad_norm": 0.6744577884674072, "learning_rate": 7.953579330310783e-05, "loss": 1.7306, "step": 68779 }, { "epoch": 2.29, "grad_norm": 0.6727780699729919, "learning_rate": 7.95287048490153e-05, "loss": 1.6536, "step": 68780 }, { "epoch": 2.29, "grad_norm": 0.6708210110664368, "learning_rate": 7.952161666254254e-05, "loss": 1.6943, "step": 68781 }, { "epoch": 2.29, "grad_norm": 0.6604080200195312, "learning_rate": 7.951452874369802e-05, "loss": 1.67, "step": 68782 }, { "epoch": 2.29, "grad_norm": 0.6869389414787292, "learning_rate": 7.950744109249032e-05, "loss": 1.7235, "step": 68783 }, { "epoch": 2.29, "grad_norm": 0.6777874231338501, "learning_rate": 7.950035370892815e-05, "loss": 1.7406, "step": 68784 }, { "epoch": 2.29, "grad_norm": 0.6787222623825073, "learning_rate": 7.949326659301998e-05, "loss": 1.7558, "step": 68785 }, { "epoch": 2.29, "grad_norm": 0.6494364738464355, "learning_rate": 7.94861797447746e-05, "loss": 1.6675, "step": 68786 }, { "epoch": 2.29, "grad_norm": 0.6836659908294678, "learning_rate": 7.947909316420041e-05, "loss": 1.7536, "step": 68787 }, { "epoch": 2.29, "grad_norm": 0.6560415029525757, "learning_rate": 7.94720068513062e-05, "loss": 1.6939, "step": 68788 }, { "epoch": 2.29, "grad_norm": 0.6793166995048523, "learning_rate": 7.94649208061005e-05, "loss": 1.7433, "step": 68789 }, { "epoch": 2.29, "grad_norm": 0.6519662141799927, "learning_rate": 7.945783502859179e-05, "loss": 1.7227, "step": 68790 }, { "epoch": 2.29, "grad_norm": 0.6705617904663086, "learning_rate": 7.945074951878884e-05, "loss": 1.6732, "step": 68791 }, { "epoch": 2.29, "grad_norm": 0.6796368956565857, "learning_rate": 7.94436642767001e-05, "loss": 1.648, "step": 68792 }, { "epoch": 2.29, "grad_norm": 0.6552628874778748, "learning_rate": 7.943657930233438e-05, "loss": 1.6827, "step": 68793 }, { "epoch": 2.29, "grad_norm": 0.6674970388412476, "learning_rate": 7.942949459570013e-05, "loss": 1.6509, "step": 68794 }, { "epoch": 2.29, "grad_norm": 0.6622787714004517, "learning_rate": 7.942241015680587e-05, "loss": 1.6793, "step": 68795 }, { "epoch": 2.29, "grad_norm": 0.7155224680900574, "learning_rate": 7.94153259856604e-05, "loss": 1.6561, "step": 68796 }, { "epoch": 2.29, "grad_norm": 0.6507984399795532, "learning_rate": 7.940824208227215e-05, "loss": 1.7674, "step": 68797 }, { "epoch": 2.29, "grad_norm": 0.6791896820068359, "learning_rate": 7.940115844664985e-05, "loss": 1.7443, "step": 68798 }, { "epoch": 2.29, "grad_norm": 0.670319676399231, "learning_rate": 7.939407507880197e-05, "loss": 1.6949, "step": 68799 }, { "epoch": 2.29, "grad_norm": 0.6652643084526062, "learning_rate": 7.938699197873727e-05, "loss": 1.6535, "step": 68800 }, { "epoch": 2.29, "grad_norm": 0.6347383856773376, "learning_rate": 7.937990914646425e-05, "loss": 1.7284, "step": 68801 }, { "epoch": 2.29, "grad_norm": 0.6675995588302612, "learning_rate": 7.937282658199145e-05, "loss": 1.6946, "step": 68802 }, { "epoch": 2.29, "grad_norm": 0.6466683745384216, "learning_rate": 7.936574428532758e-05, "loss": 1.668, "step": 68803 }, { "epoch": 2.29, "grad_norm": 0.6753049492835999, "learning_rate": 7.935866225648122e-05, "loss": 1.7409, "step": 68804 }, { "epoch": 2.29, "grad_norm": 0.6618684530258179, "learning_rate": 7.935158049546082e-05, "loss": 1.6648, "step": 68805 }, { "epoch": 2.29, "grad_norm": 0.6708834171295166, "learning_rate": 7.93444990022751e-05, "loss": 1.7238, "step": 68806 }, { "epoch": 2.29, "grad_norm": 0.6583544611930847, "learning_rate": 7.933741777693275e-05, "loss": 1.7227, "step": 68807 }, { "epoch": 2.29, "grad_norm": 0.6556130051612854, "learning_rate": 7.933033681944225e-05, "loss": 1.6559, "step": 68808 }, { "epoch": 2.29, "grad_norm": 0.6560711860656738, "learning_rate": 7.93232561298121e-05, "loss": 1.6793, "step": 68809 }, { "epoch": 2.29, "grad_norm": 0.6661691665649414, "learning_rate": 7.931617570805109e-05, "loss": 1.6969, "step": 68810 }, { "epoch": 2.29, "grad_norm": 0.6639741659164429, "learning_rate": 7.930909555416775e-05, "loss": 1.6716, "step": 68811 }, { "epoch": 2.29, "grad_norm": 0.6410910487174988, "learning_rate": 7.930201566817056e-05, "loss": 1.7054, "step": 68812 }, { "epoch": 2.29, "grad_norm": 0.688329815864563, "learning_rate": 7.92949360500682e-05, "loss": 1.6491, "step": 68813 }, { "epoch": 2.29, "grad_norm": 0.6896111369132996, "learning_rate": 7.928785669986934e-05, "loss": 1.7432, "step": 68814 }, { "epoch": 2.29, "grad_norm": 0.6633313894271851, "learning_rate": 7.928077761758255e-05, "loss": 1.6949, "step": 68815 }, { "epoch": 2.29, "grad_norm": 0.655556321144104, "learning_rate": 7.927369880321623e-05, "loss": 1.7019, "step": 68816 }, { "epoch": 2.29, "grad_norm": 0.6450619697570801, "learning_rate": 7.926662025677923e-05, "loss": 1.7275, "step": 68817 }, { "epoch": 2.29, "grad_norm": 0.6756938099861145, "learning_rate": 7.925954197828004e-05, "loss": 1.6765, "step": 68818 }, { "epoch": 2.29, "grad_norm": 0.6528926491737366, "learning_rate": 7.925246396772715e-05, "loss": 1.7282, "step": 68819 }, { "epoch": 2.29, "grad_norm": 0.709036648273468, "learning_rate": 7.924538622512923e-05, "loss": 1.6978, "step": 68820 }, { "epoch": 2.29, "grad_norm": 0.6683735847473145, "learning_rate": 7.923830875049505e-05, "loss": 1.7113, "step": 68821 }, { "epoch": 2.29, "grad_norm": 0.6866047382354736, "learning_rate": 7.923123154383287e-05, "loss": 1.7068, "step": 68822 }, { "epoch": 2.29, "grad_norm": 0.6773840188980103, "learning_rate": 7.922415460515147e-05, "loss": 1.6567, "step": 68823 }, { "epoch": 2.29, "grad_norm": 0.6459092497825623, "learning_rate": 7.921707793445949e-05, "loss": 1.6749, "step": 68824 }, { "epoch": 2.29, "grad_norm": 0.6612375378608704, "learning_rate": 7.921000153176545e-05, "loss": 1.6081, "step": 68825 }, { "epoch": 2.29, "grad_norm": 0.6790255904197693, "learning_rate": 7.92029253970778e-05, "loss": 1.7495, "step": 68826 }, { "epoch": 2.29, "grad_norm": 0.6585986614227295, "learning_rate": 7.919584953040532e-05, "loss": 1.6771, "step": 68827 }, { "epoch": 2.29, "grad_norm": 0.6764109134674072, "learning_rate": 7.918877393175674e-05, "loss": 1.6305, "step": 68828 }, { "epoch": 2.29, "grad_norm": 0.688477098941803, "learning_rate": 7.91816986011402e-05, "loss": 1.5894, "step": 68829 }, { "epoch": 2.29, "grad_norm": 0.688389241695404, "learning_rate": 7.917462353856461e-05, "loss": 1.6837, "step": 68830 }, { "epoch": 2.29, "grad_norm": 0.6730842590332031, "learning_rate": 7.916754874403855e-05, "loss": 1.6953, "step": 68831 }, { "epoch": 2.29, "grad_norm": 0.709100067615509, "learning_rate": 7.916047421757055e-05, "loss": 1.7007, "step": 68832 }, { "epoch": 2.29, "grad_norm": 0.68885338306427, "learning_rate": 7.91533999591691e-05, "loss": 1.7809, "step": 68833 }, { "epoch": 2.29, "grad_norm": 0.6834609508514404, "learning_rate": 7.914632596884289e-05, "loss": 1.6639, "step": 68834 }, { "epoch": 2.29, "grad_norm": 0.6592054963111877, "learning_rate": 7.91392522466007e-05, "loss": 1.7137, "step": 68835 }, { "epoch": 2.29, "grad_norm": 0.662684977054596, "learning_rate": 7.91321787924507e-05, "loss": 1.6713, "step": 68836 }, { "epoch": 2.29, "grad_norm": 0.6964635848999023, "learning_rate": 7.912510560640173e-05, "loss": 1.5864, "step": 68837 }, { "epoch": 2.29, "grad_norm": 0.6824750900268555, "learning_rate": 7.91180326884624e-05, "loss": 1.712, "step": 68838 }, { "epoch": 2.29, "grad_norm": 0.6898748874664307, "learning_rate": 7.911096003864123e-05, "loss": 1.705, "step": 68839 }, { "epoch": 2.29, "grad_norm": 0.6723537445068359, "learning_rate": 7.91038876569467e-05, "loss": 1.7048, "step": 68840 }, { "epoch": 2.29, "grad_norm": 0.6718546152114868, "learning_rate": 7.909681554338764e-05, "loss": 1.6798, "step": 68841 }, { "epoch": 2.29, "grad_norm": 0.6797055602073669, "learning_rate": 7.908974369797247e-05, "loss": 1.6752, "step": 68842 }, { "epoch": 2.29, "grad_norm": 0.6703909039497375, "learning_rate": 7.908267212070971e-05, "loss": 1.6566, "step": 68843 }, { "epoch": 2.29, "grad_norm": 0.6737510561943054, "learning_rate": 7.907560081160804e-05, "loss": 1.7477, "step": 68844 }, { "epoch": 2.29, "grad_norm": 0.6759116053581238, "learning_rate": 7.906852977067612e-05, "loss": 1.7392, "step": 68845 }, { "epoch": 2.29, "grad_norm": 0.658911406993866, "learning_rate": 7.90614589979225e-05, "loss": 1.6666, "step": 68846 }, { "epoch": 2.29, "grad_norm": 0.6966065764427185, "learning_rate": 7.905438849335556e-05, "loss": 1.7226, "step": 68847 }, { "epoch": 2.29, "grad_norm": 0.6679847836494446, "learning_rate": 7.904731825698417e-05, "loss": 1.6957, "step": 68848 }, { "epoch": 2.29, "grad_norm": 0.6691756844520569, "learning_rate": 7.904024828881675e-05, "loss": 1.7098, "step": 68849 }, { "epoch": 2.29, "grad_norm": 0.660159170627594, "learning_rate": 7.903317858886184e-05, "loss": 1.6735, "step": 68850 }, { "epoch": 2.29, "grad_norm": 0.6939495801925659, "learning_rate": 7.902610915712819e-05, "loss": 1.6433, "step": 68851 }, { "epoch": 2.29, "grad_norm": 0.6919548511505127, "learning_rate": 7.901903999362419e-05, "loss": 1.7683, "step": 68852 }, { "epoch": 2.29, "grad_norm": 0.6823973059654236, "learning_rate": 7.901197109835858e-05, "loss": 1.6888, "step": 68853 }, { "epoch": 2.29, "grad_norm": 0.6583756804466248, "learning_rate": 7.900490247133981e-05, "loss": 1.6565, "step": 68854 }, { "epoch": 2.29, "grad_norm": 0.6675283312797546, "learning_rate": 7.89978341125766e-05, "loss": 1.6854, "step": 68855 }, { "epoch": 2.29, "grad_norm": 0.6572569012641907, "learning_rate": 7.899076602207749e-05, "loss": 1.6946, "step": 68856 }, { "epoch": 2.29, "grad_norm": 0.6520794034004211, "learning_rate": 7.898369819985091e-05, "loss": 1.6722, "step": 68857 }, { "epoch": 2.29, "grad_norm": 0.6921247839927673, "learning_rate": 7.897663064590565e-05, "loss": 1.6334, "step": 68858 }, { "epoch": 2.29, "grad_norm": 0.6817846298217773, "learning_rate": 7.89695633602501e-05, "loss": 1.7345, "step": 68859 }, { "epoch": 2.29, "grad_norm": 0.6785825490951538, "learning_rate": 7.896249634289302e-05, "loss": 1.7129, "step": 68860 }, { "epoch": 2.29, "grad_norm": 0.6610215306282043, "learning_rate": 7.895542959384293e-05, "loss": 1.7498, "step": 68861 }, { "epoch": 2.29, "grad_norm": 0.6734563708305359, "learning_rate": 7.894836311310826e-05, "loss": 1.6807, "step": 68862 }, { "epoch": 2.29, "grad_norm": 0.6551342010498047, "learning_rate": 7.89412969006978e-05, "loss": 1.658, "step": 68863 }, { "epoch": 2.29, "grad_norm": 0.6588216423988342, "learning_rate": 7.893423095661994e-05, "loss": 1.6952, "step": 68864 }, { "epoch": 2.29, "grad_norm": 0.6668676137924194, "learning_rate": 7.892716528088345e-05, "loss": 1.7442, "step": 68865 }, { "epoch": 2.29, "grad_norm": 0.6528079509735107, "learning_rate": 7.892009987349673e-05, "loss": 1.6413, "step": 68866 }, { "epoch": 2.29, "grad_norm": 0.6607257723808289, "learning_rate": 7.891303473446848e-05, "loss": 1.7289, "step": 68867 }, { "epoch": 2.29, "grad_norm": 0.6860809326171875, "learning_rate": 7.89059698638073e-05, "loss": 1.6687, "step": 68868 }, { "epoch": 2.29, "grad_norm": 0.6846100091934204, "learning_rate": 7.889890526152156e-05, "loss": 1.6943, "step": 68869 }, { "epoch": 2.29, "grad_norm": 0.7006542682647705, "learning_rate": 7.889184092762005e-05, "loss": 1.6334, "step": 68870 }, { "epoch": 2.29, "grad_norm": 0.6651097536087036, "learning_rate": 7.88847768621113e-05, "loss": 1.7211, "step": 68871 }, { "epoch": 2.29, "grad_norm": 0.6733551621437073, "learning_rate": 7.887771306500373e-05, "loss": 1.7191, "step": 68872 }, { "epoch": 2.29, "grad_norm": 0.6693034768104553, "learning_rate": 7.887064953630604e-05, "loss": 1.7074, "step": 68873 }, { "epoch": 2.29, "grad_norm": 0.6935181617736816, "learning_rate": 7.886358627602692e-05, "loss": 1.6446, "step": 68874 }, { "epoch": 2.29, "grad_norm": 0.6834434866905212, "learning_rate": 7.88565232841748e-05, "loss": 1.7252, "step": 68875 }, { "epoch": 2.29, "grad_norm": 0.6756871938705444, "learning_rate": 7.88494605607582e-05, "loss": 1.7011, "step": 68876 }, { "epoch": 2.29, "grad_norm": 0.6796073913574219, "learning_rate": 7.884239810578585e-05, "loss": 1.6775, "step": 68877 }, { "epoch": 2.29, "grad_norm": 0.6822332739830017, "learning_rate": 7.883533591926624e-05, "loss": 1.7635, "step": 68878 }, { "epoch": 2.29, "grad_norm": 0.6614527702331543, "learning_rate": 7.882827400120784e-05, "loss": 1.665, "step": 68879 }, { "epoch": 2.29, "grad_norm": 0.6817358732223511, "learning_rate": 7.882121235161937e-05, "loss": 1.6384, "step": 68880 }, { "epoch": 2.29, "grad_norm": 0.7047045230865479, "learning_rate": 7.881415097050939e-05, "loss": 1.751, "step": 68881 }, { "epoch": 2.29, "grad_norm": 0.6673747897148132, "learning_rate": 7.880708985788649e-05, "loss": 1.6495, "step": 68882 }, { "epoch": 2.29, "grad_norm": 0.6887462735176086, "learning_rate": 7.88000290137591e-05, "loss": 1.7503, "step": 68883 }, { "epoch": 2.29, "grad_norm": 0.6562209129333496, "learning_rate": 7.879296843813595e-05, "loss": 1.7397, "step": 68884 }, { "epoch": 2.29, "grad_norm": 0.6433155536651611, "learning_rate": 7.878590813102555e-05, "loss": 1.7045, "step": 68885 }, { "epoch": 2.29, "grad_norm": 0.6580029129981995, "learning_rate": 7.877884809243638e-05, "loss": 1.731, "step": 68886 }, { "epoch": 2.29, "grad_norm": 0.6416071653366089, "learning_rate": 7.87717883223771e-05, "loss": 1.668, "step": 68887 }, { "epoch": 2.29, "grad_norm": 0.6706300973892212, "learning_rate": 7.876472882085633e-05, "loss": 1.6687, "step": 68888 }, { "epoch": 2.29, "grad_norm": 0.6783105731010437, "learning_rate": 7.87576695878826e-05, "loss": 1.6776, "step": 68889 }, { "epoch": 2.29, "grad_norm": 0.6569905281066895, "learning_rate": 7.875061062346437e-05, "loss": 1.6971, "step": 68890 }, { "epoch": 2.29, "grad_norm": 0.6717066168785095, "learning_rate": 7.874355192761041e-05, "loss": 1.6717, "step": 68891 }, { "epoch": 2.29, "grad_norm": 0.7365010976791382, "learning_rate": 7.873649350032916e-05, "loss": 1.693, "step": 68892 }, { "epoch": 2.29, "grad_norm": 0.7180126905441284, "learning_rate": 7.872943534162912e-05, "loss": 1.783, "step": 68893 }, { "epoch": 2.29, "grad_norm": 0.6863071322441101, "learning_rate": 7.872237745151894e-05, "loss": 1.7148, "step": 68894 }, { "epoch": 2.29, "grad_norm": 0.6847429871559143, "learning_rate": 7.871531983000727e-05, "loss": 1.7602, "step": 68895 }, { "epoch": 2.29, "grad_norm": 0.6692780256271362, "learning_rate": 7.870826247710259e-05, "loss": 1.6012, "step": 68896 }, { "epoch": 2.29, "grad_norm": 0.7102013230323792, "learning_rate": 7.870120539281341e-05, "loss": 1.6684, "step": 68897 }, { "epoch": 2.29, "grad_norm": 0.6565093994140625, "learning_rate": 7.869414857714843e-05, "loss": 1.6556, "step": 68898 }, { "epoch": 2.29, "grad_norm": 0.6778841614723206, "learning_rate": 7.868709203011617e-05, "loss": 1.736, "step": 68899 }, { "epoch": 2.29, "grad_norm": 0.6525428891181946, "learning_rate": 7.868003575172507e-05, "loss": 1.745, "step": 68900 }, { "epoch": 2.29, "grad_norm": 0.6628764867782593, "learning_rate": 7.867297974198388e-05, "loss": 1.6447, "step": 68901 }, { "epoch": 2.29, "grad_norm": 0.6683583855628967, "learning_rate": 7.866592400090099e-05, "loss": 1.6439, "step": 68902 }, { "epoch": 2.29, "grad_norm": 0.6565384268760681, "learning_rate": 7.865886852848514e-05, "loss": 1.769, "step": 68903 }, { "epoch": 2.29, "grad_norm": 0.6595692038536072, "learning_rate": 7.865181332474471e-05, "loss": 1.6363, "step": 68904 }, { "epoch": 2.29, "grad_norm": 0.6990853548049927, "learning_rate": 7.86447583896885e-05, "loss": 1.7348, "step": 68905 }, { "epoch": 2.29, "grad_norm": 0.680192768573761, "learning_rate": 7.863770372332492e-05, "loss": 1.6989, "step": 68906 }, { "epoch": 2.29, "grad_norm": 0.6836395859718323, "learning_rate": 7.863064932566244e-05, "loss": 1.7151, "step": 68907 }, { "epoch": 2.29, "grad_norm": 0.6879419088363647, "learning_rate": 7.862359519670983e-05, "loss": 1.6997, "step": 68908 }, { "epoch": 2.29, "grad_norm": 0.671579897403717, "learning_rate": 7.861654133647548e-05, "loss": 1.6419, "step": 68909 }, { "epoch": 2.29, "grad_norm": 0.6782053709030151, "learning_rate": 7.860948774496813e-05, "loss": 1.7031, "step": 68910 }, { "epoch": 2.29, "grad_norm": 0.6409127116203308, "learning_rate": 7.860243442219623e-05, "loss": 1.7239, "step": 68911 }, { "epoch": 2.29, "grad_norm": 0.67830491065979, "learning_rate": 7.859538136816825e-05, "loss": 1.6942, "step": 68912 }, { "epoch": 2.29, "grad_norm": 0.6759284138679504, "learning_rate": 7.858832858289295e-05, "loss": 1.7102, "step": 68913 }, { "epoch": 2.29, "grad_norm": 0.6657087206840515, "learning_rate": 7.858127606637873e-05, "loss": 1.704, "step": 68914 }, { "epoch": 2.29, "grad_norm": 0.6863714456558228, "learning_rate": 7.857422381863425e-05, "loss": 1.6486, "step": 68915 }, { "epoch": 2.29, "grad_norm": 0.7052949070930481, "learning_rate": 7.8567171839668e-05, "loss": 1.7479, "step": 68916 }, { "epoch": 2.29, "grad_norm": 0.6583994030952454, "learning_rate": 7.856012012948865e-05, "loss": 1.6712, "step": 68917 }, { "epoch": 2.29, "grad_norm": 0.6789965629577637, "learning_rate": 7.855306868810469e-05, "loss": 1.7203, "step": 68918 }, { "epoch": 2.29, "grad_norm": 0.6663657426834106, "learning_rate": 7.854601751552457e-05, "loss": 1.8116, "step": 68919 }, { "epoch": 2.29, "grad_norm": 0.6701831221580505, "learning_rate": 7.853896661175707e-05, "loss": 1.7773, "step": 68920 }, { "epoch": 2.29, "grad_norm": 0.6924307346343994, "learning_rate": 7.85319159768106e-05, "loss": 1.6446, "step": 68921 }, { "epoch": 2.29, "grad_norm": 0.660556972026825, "learning_rate": 7.852486561069369e-05, "loss": 1.6193, "step": 68922 }, { "epoch": 2.29, "grad_norm": 0.6480885744094849, "learning_rate": 7.851781551341498e-05, "loss": 1.6693, "step": 68923 }, { "epoch": 2.29, "grad_norm": 0.6702725887298584, "learning_rate": 7.851076568498306e-05, "loss": 1.6894, "step": 68924 }, { "epoch": 2.29, "grad_norm": 0.6503720283508301, "learning_rate": 7.850371612540643e-05, "loss": 1.7436, "step": 68925 }, { "epoch": 2.29, "grad_norm": 0.6763402819633484, "learning_rate": 7.849666683469361e-05, "loss": 1.685, "step": 68926 }, { "epoch": 2.29, "grad_norm": 0.6801265478134155, "learning_rate": 7.848961781285324e-05, "loss": 1.7051, "step": 68927 }, { "epoch": 2.29, "grad_norm": 0.65591961145401, "learning_rate": 7.848256905989386e-05, "loss": 1.7286, "step": 68928 }, { "epoch": 2.29, "grad_norm": 0.6566713452339172, "learning_rate": 7.847552057582393e-05, "loss": 1.677, "step": 68929 }, { "epoch": 2.29, "grad_norm": 0.6823563575744629, "learning_rate": 7.846847236065203e-05, "loss": 1.6774, "step": 68930 }, { "epoch": 2.29, "grad_norm": 0.6639346480369568, "learning_rate": 7.846142441438698e-05, "loss": 1.7372, "step": 68931 }, { "epoch": 2.29, "grad_norm": 0.6802164912223816, "learning_rate": 7.845437673703694e-05, "loss": 1.8097, "step": 68932 }, { "epoch": 2.29, "grad_norm": 0.6589710712432861, "learning_rate": 7.844732932861061e-05, "loss": 1.7839, "step": 68933 }, { "epoch": 2.29, "grad_norm": 0.6962615847587585, "learning_rate": 7.844028218911669e-05, "loss": 1.7357, "step": 68934 }, { "epoch": 2.29, "grad_norm": 0.6529698371887207, "learning_rate": 7.843323531856364e-05, "loss": 1.7217, "step": 68935 }, { "epoch": 2.29, "grad_norm": 0.664742112159729, "learning_rate": 7.842618871695988e-05, "loss": 1.741, "step": 68936 }, { "epoch": 2.29, "grad_norm": 0.660317063331604, "learning_rate": 7.841914238431408e-05, "loss": 1.7481, "step": 68937 }, { "epoch": 2.29, "grad_norm": 0.6959502696990967, "learning_rate": 7.841209632063496e-05, "loss": 1.7135, "step": 68938 }, { "epoch": 2.29, "grad_norm": 0.6756817698478699, "learning_rate": 7.840505052593076e-05, "loss": 1.5687, "step": 68939 }, { "epoch": 2.29, "grad_norm": 0.6871145963668823, "learning_rate": 7.839800500021016e-05, "loss": 1.8088, "step": 68940 }, { "epoch": 2.29, "grad_norm": 0.6618093252182007, "learning_rate": 7.839095974348184e-05, "loss": 1.7059, "step": 68941 }, { "epoch": 2.29, "grad_norm": 0.6705506443977356, "learning_rate": 7.838391475575422e-05, "loss": 1.6033, "step": 68942 }, { "epoch": 2.29, "grad_norm": 0.7076234817504883, "learning_rate": 7.837687003703578e-05, "loss": 1.7333, "step": 68943 }, { "epoch": 2.29, "grad_norm": 0.6881687045097351, "learning_rate": 7.836982558733517e-05, "loss": 1.6784, "step": 68944 }, { "epoch": 2.29, "grad_norm": 0.6741336584091187, "learning_rate": 7.836278140666114e-05, "loss": 1.6574, "step": 68945 }, { "epoch": 2.29, "grad_norm": 0.6500250697135925, "learning_rate": 7.835573749502184e-05, "loss": 1.7417, "step": 68946 }, { "epoch": 2.29, "grad_norm": 0.6510699391365051, "learning_rate": 7.8348693852426e-05, "loss": 1.7134, "step": 68947 }, { "epoch": 2.29, "grad_norm": 0.6672316789627075, "learning_rate": 7.83416504788823e-05, "loss": 1.7179, "step": 68948 }, { "epoch": 2.29, "grad_norm": 0.6425108313560486, "learning_rate": 7.833460737439916e-05, "loss": 1.7198, "step": 68949 }, { "epoch": 2.29, "grad_norm": 0.6700587272644043, "learning_rate": 7.832756453898507e-05, "loss": 1.6663, "step": 68950 }, { "epoch": 2.29, "grad_norm": 0.6829683780670166, "learning_rate": 7.832052197264863e-05, "loss": 1.7461, "step": 68951 }, { "epoch": 2.29, "grad_norm": 0.6642186045646667, "learning_rate": 7.831347967539861e-05, "loss": 1.667, "step": 68952 }, { "epoch": 2.29, "grad_norm": 0.6612218618392944, "learning_rate": 7.830643764724321e-05, "loss": 1.7961, "step": 68953 }, { "epoch": 2.29, "grad_norm": 0.6937727928161621, "learning_rate": 7.829939588819109e-05, "loss": 1.6398, "step": 68954 }, { "epoch": 2.29, "grad_norm": 0.6702602505683899, "learning_rate": 7.829235439825095e-05, "loss": 1.6857, "step": 68955 }, { "epoch": 2.29, "grad_norm": 0.6743577718734741, "learning_rate": 7.828531317743121e-05, "loss": 1.6427, "step": 68956 }, { "epoch": 2.29, "grad_norm": 0.6665194034576416, "learning_rate": 7.827827222574033e-05, "loss": 1.704, "step": 68957 }, { "epoch": 2.29, "grad_norm": 0.7019391059875488, "learning_rate": 7.82712315431871e-05, "loss": 1.7359, "step": 68958 }, { "epoch": 2.29, "grad_norm": 0.6780133247375488, "learning_rate": 7.826419112977986e-05, "loss": 1.7425, "step": 68959 }, { "epoch": 2.29, "grad_norm": 0.6498469710350037, "learning_rate": 7.825715098552716e-05, "loss": 1.7003, "step": 68960 }, { "epoch": 2.29, "grad_norm": 0.7017802000045776, "learning_rate": 7.825011111043761e-05, "loss": 1.7209, "step": 68961 }, { "epoch": 2.29, "grad_norm": 0.6844788193702698, "learning_rate": 7.824307150451986e-05, "loss": 1.6472, "step": 68962 }, { "epoch": 2.29, "grad_norm": 0.6860355734825134, "learning_rate": 7.823603216778231e-05, "loss": 1.6178, "step": 68963 }, { "epoch": 2.29, "grad_norm": 0.697460412979126, "learning_rate": 7.822899310023347e-05, "loss": 1.7139, "step": 68964 }, { "epoch": 2.29, "grad_norm": 0.6565197706222534, "learning_rate": 7.822195430188206e-05, "loss": 1.7469, "step": 68965 }, { "epoch": 2.29, "grad_norm": 0.6563364267349243, "learning_rate": 7.821491577273651e-05, "loss": 1.6908, "step": 68966 }, { "epoch": 2.29, "grad_norm": 0.6877678632736206, "learning_rate": 7.820787751280528e-05, "loss": 1.7257, "step": 68967 }, { "epoch": 2.29, "grad_norm": 0.6643067002296448, "learning_rate": 7.820083952209708e-05, "loss": 1.7033, "step": 68968 }, { "epoch": 2.29, "grad_norm": 0.6843221783638, "learning_rate": 7.81938018006203e-05, "loss": 1.8012, "step": 68969 }, { "epoch": 2.29, "grad_norm": 0.6519225239753723, "learning_rate": 7.818676434838367e-05, "loss": 1.7068, "step": 68970 }, { "epoch": 2.29, "grad_norm": 0.6565988063812256, "learning_rate": 7.81797271653955e-05, "loss": 1.7122, "step": 68971 }, { "epoch": 2.29, "grad_norm": 0.67067551612854, "learning_rate": 7.817269025166458e-05, "loss": 1.6456, "step": 68972 }, { "epoch": 2.29, "grad_norm": 0.6752849817276001, "learning_rate": 7.816565360719931e-05, "loss": 1.6857, "step": 68973 }, { "epoch": 2.29, "grad_norm": 0.683043897151947, "learning_rate": 7.815861723200817e-05, "loss": 1.6705, "step": 68974 }, { "epoch": 2.29, "grad_norm": 0.6670751571655273, "learning_rate": 7.81515811260999e-05, "loss": 1.6497, "step": 68975 }, { "epoch": 2.29, "grad_norm": 0.6562387347221375, "learning_rate": 7.814454528948277e-05, "loss": 1.7668, "step": 68976 }, { "epoch": 2.29, "grad_norm": 0.6775594353675842, "learning_rate": 7.81375097221656e-05, "loss": 1.7126, "step": 68977 }, { "epoch": 2.29, "grad_norm": 0.6574579477310181, "learning_rate": 7.81304744241568e-05, "loss": 1.6854, "step": 68978 }, { "epoch": 2.29, "grad_norm": 0.6421493887901306, "learning_rate": 7.812343939546485e-05, "loss": 1.6769, "step": 68979 }, { "epoch": 2.29, "grad_norm": 0.6317456364631653, "learning_rate": 7.811640463609841e-05, "loss": 1.6902, "step": 68980 }, { "epoch": 2.3, "grad_norm": 0.6765704154968262, "learning_rate": 7.810937014606588e-05, "loss": 1.6771, "step": 68981 }, { "epoch": 2.3, "grad_norm": 0.6423788666725159, "learning_rate": 7.810233592537598e-05, "loss": 1.6189, "step": 68982 }, { "epoch": 2.3, "grad_norm": 0.6771969795227051, "learning_rate": 7.809530197403703e-05, "loss": 1.7017, "step": 68983 }, { "epoch": 2.3, "grad_norm": 0.646051824092865, "learning_rate": 7.808826829205782e-05, "loss": 1.6326, "step": 68984 }, { "epoch": 2.3, "grad_norm": 0.6623479723930359, "learning_rate": 7.808123487944672e-05, "loss": 1.7364, "step": 68985 }, { "epoch": 2.3, "grad_norm": 0.6888946294784546, "learning_rate": 7.807420173621222e-05, "loss": 1.6958, "step": 68986 }, { "epoch": 2.3, "grad_norm": 0.6826068758964539, "learning_rate": 7.806716886236303e-05, "loss": 1.6451, "step": 68987 }, { "epoch": 2.3, "grad_norm": 0.6690409779548645, "learning_rate": 7.806013625790761e-05, "loss": 1.7061, "step": 68988 }, { "epoch": 2.3, "grad_norm": 1.7028223276138306, "learning_rate": 7.805310392285436e-05, "loss": 1.6954, "step": 68989 }, { "epoch": 2.3, "grad_norm": 0.6496422290802002, "learning_rate": 7.804607185721198e-05, "loss": 1.6809, "step": 68990 }, { "epoch": 2.3, "grad_norm": 0.6742938160896301, "learning_rate": 7.803904006098905e-05, "loss": 1.7064, "step": 68991 }, { "epoch": 2.3, "grad_norm": 0.6892266273498535, "learning_rate": 7.8032008534194e-05, "loss": 1.6572, "step": 68992 }, { "epoch": 2.3, "grad_norm": 0.6645545363426208, "learning_rate": 7.802497727683531e-05, "loss": 1.7986, "step": 68993 }, { "epoch": 2.3, "grad_norm": 0.6706962585449219, "learning_rate": 7.801794628892166e-05, "loss": 1.6698, "step": 68994 }, { "epoch": 2.3, "grad_norm": 0.6976994276046753, "learning_rate": 7.801091557046153e-05, "loss": 1.7808, "step": 68995 }, { "epoch": 2.3, "grad_norm": 0.6730644106864929, "learning_rate": 7.800388512146338e-05, "loss": 1.7119, "step": 68996 }, { "epoch": 2.3, "grad_norm": 0.6601681709289551, "learning_rate": 7.799685494193574e-05, "loss": 1.6918, "step": 68997 }, { "epoch": 2.3, "grad_norm": 0.6898031234741211, "learning_rate": 7.798982503188736e-05, "loss": 1.6757, "step": 68998 }, { "epoch": 2.3, "grad_norm": 0.6634576916694641, "learning_rate": 7.798279539132659e-05, "loss": 1.6983, "step": 68999 }, { "epoch": 2.3, "grad_norm": 0.6514307856559753, "learning_rate": 7.797576602026192e-05, "loss": 1.6338, "step": 69000 }, { "epoch": 2.3, "grad_norm": 0.658417284488678, "learning_rate": 7.796873691870201e-05, "loss": 1.6486, "step": 69001 }, { "epoch": 2.3, "grad_norm": 0.6925870776176453, "learning_rate": 7.796170808665539e-05, "loss": 1.76, "step": 69002 }, { "epoch": 2.3, "grad_norm": 0.688284158706665, "learning_rate": 7.795467952413043e-05, "loss": 1.7009, "step": 69003 }, { "epoch": 2.3, "grad_norm": 0.674852192401886, "learning_rate": 7.794765123113573e-05, "loss": 1.7352, "step": 69004 }, { "epoch": 2.3, "grad_norm": 0.670558750629425, "learning_rate": 7.794062320768e-05, "loss": 1.6888, "step": 69005 }, { "epoch": 2.3, "grad_norm": 0.663375198841095, "learning_rate": 7.793359545377165e-05, "loss": 1.5836, "step": 69006 }, { "epoch": 2.3, "grad_norm": 0.6765588521957397, "learning_rate": 7.792656796941907e-05, "loss": 1.752, "step": 69007 }, { "epoch": 2.3, "grad_norm": 0.6620312929153442, "learning_rate": 7.7919540754631e-05, "loss": 1.6443, "step": 69008 }, { "epoch": 2.3, "grad_norm": 0.6771858930587769, "learning_rate": 7.79125138094159e-05, "loss": 1.6596, "step": 69009 }, { "epoch": 2.3, "grad_norm": 0.6690634489059448, "learning_rate": 7.790548713378221e-05, "loss": 1.739, "step": 69010 }, { "epoch": 2.3, "grad_norm": 0.6701712608337402, "learning_rate": 7.789846072773852e-05, "loss": 1.6777, "step": 69011 }, { "epoch": 2.3, "grad_norm": 0.6534011363983154, "learning_rate": 7.789143459129347e-05, "loss": 1.7447, "step": 69012 }, { "epoch": 2.3, "grad_norm": 0.6739612817764282, "learning_rate": 7.788440872445549e-05, "loss": 1.6757, "step": 69013 }, { "epoch": 2.3, "grad_norm": 0.66493821144104, "learning_rate": 7.7877383127233e-05, "loss": 1.7416, "step": 69014 }, { "epoch": 2.3, "grad_norm": 0.6678255796432495, "learning_rate": 7.787035779963473e-05, "loss": 1.6785, "step": 69015 }, { "epoch": 2.3, "grad_norm": 0.6699726581573486, "learning_rate": 7.786333274166915e-05, "loss": 1.7306, "step": 69016 }, { "epoch": 2.3, "grad_norm": 0.6709919571876526, "learning_rate": 7.785630795334465e-05, "loss": 1.6035, "step": 69017 }, { "epoch": 2.3, "grad_norm": 0.6709030866622925, "learning_rate": 7.784928343466995e-05, "loss": 1.7459, "step": 69018 }, { "epoch": 2.3, "grad_norm": 0.6666418313980103, "learning_rate": 7.784225918565337e-05, "loss": 1.6803, "step": 69019 }, { "epoch": 2.3, "grad_norm": 0.6790169477462769, "learning_rate": 7.783523520630368e-05, "loss": 1.6967, "step": 69020 }, { "epoch": 2.3, "grad_norm": 0.6819022297859192, "learning_rate": 7.782821149662918e-05, "loss": 1.708, "step": 69021 }, { "epoch": 2.3, "grad_norm": 0.6562234163284302, "learning_rate": 7.782118805663858e-05, "loss": 1.8033, "step": 69022 }, { "epoch": 2.3, "grad_norm": 0.6817912459373474, "learning_rate": 7.781416488634032e-05, "loss": 1.6949, "step": 69023 }, { "epoch": 2.3, "grad_norm": 0.6697134971618652, "learning_rate": 7.780714198574283e-05, "loss": 1.6531, "step": 69024 }, { "epoch": 2.3, "grad_norm": 0.6453388333320618, "learning_rate": 7.780011935485485e-05, "loss": 1.6632, "step": 69025 }, { "epoch": 2.3, "grad_norm": 0.6899716258049011, "learning_rate": 7.779309699368466e-05, "loss": 1.7155, "step": 69026 }, { "epoch": 2.3, "grad_norm": 0.6783720850944519, "learning_rate": 7.778607490224103e-05, "loss": 1.6835, "step": 69027 }, { "epoch": 2.3, "grad_norm": 0.6577891707420349, "learning_rate": 7.777905308053235e-05, "loss": 1.709, "step": 69028 }, { "epoch": 2.3, "grad_norm": 0.6605452299118042, "learning_rate": 7.777203152856706e-05, "loss": 1.6715, "step": 69029 }, { "epoch": 2.3, "grad_norm": 0.7261083126068115, "learning_rate": 7.77650102463539e-05, "loss": 1.7277, "step": 69030 }, { "epoch": 2.3, "grad_norm": 0.6779824495315552, "learning_rate": 7.775798923390113e-05, "loss": 1.6753, "step": 69031 }, { "epoch": 2.3, "grad_norm": 0.6495617032051086, "learning_rate": 7.775096849121754e-05, "loss": 1.7174, "step": 69032 }, { "epoch": 2.3, "grad_norm": 0.6758791208267212, "learning_rate": 7.774394801831143e-05, "loss": 1.6435, "step": 69033 }, { "epoch": 2.3, "grad_norm": 0.6705774068832397, "learning_rate": 7.773692781519151e-05, "loss": 1.7319, "step": 69034 }, { "epoch": 2.3, "grad_norm": 0.6515374779701233, "learning_rate": 7.772990788186624e-05, "loss": 1.7088, "step": 69035 }, { "epoch": 2.3, "grad_norm": 0.6731999516487122, "learning_rate": 7.772288821834398e-05, "loss": 1.7044, "step": 69036 }, { "epoch": 2.3, "grad_norm": 0.6615786552429199, "learning_rate": 7.77158688246335e-05, "loss": 1.7451, "step": 69037 }, { "epoch": 2.3, "grad_norm": 0.6715066432952881, "learning_rate": 7.770884970074318e-05, "loss": 1.7368, "step": 69038 }, { "epoch": 2.3, "grad_norm": 0.6578203439712524, "learning_rate": 7.770183084668149e-05, "loss": 1.716, "step": 69039 }, { "epoch": 2.3, "grad_norm": 0.6747311353683472, "learning_rate": 7.769481226245702e-05, "loss": 1.651, "step": 69040 }, { "epoch": 2.3, "grad_norm": 0.6832695603370667, "learning_rate": 7.768779394807836e-05, "loss": 1.6381, "step": 69041 }, { "epoch": 2.3, "grad_norm": 0.6568847298622131, "learning_rate": 7.7680775903554e-05, "loss": 1.7434, "step": 69042 }, { "epoch": 2.3, "grad_norm": 0.6638641357421875, "learning_rate": 7.767375812889232e-05, "loss": 1.6848, "step": 69043 }, { "epoch": 2.3, "grad_norm": 0.6666342616081238, "learning_rate": 7.766674062410199e-05, "loss": 1.7678, "step": 69044 }, { "epoch": 2.3, "grad_norm": 0.6572787761688232, "learning_rate": 7.765972338919151e-05, "loss": 1.6945, "step": 69045 }, { "epoch": 2.3, "grad_norm": 0.684471845626831, "learning_rate": 7.765270642416928e-05, "loss": 1.6601, "step": 69046 }, { "epoch": 2.3, "grad_norm": 0.6677155494689941, "learning_rate": 7.764568972904392e-05, "loss": 1.5939, "step": 69047 }, { "epoch": 2.3, "grad_norm": 0.6564080119132996, "learning_rate": 7.763867330382409e-05, "loss": 1.7092, "step": 69048 }, { "epoch": 2.3, "grad_norm": 0.6780679225921631, "learning_rate": 7.763165714851795e-05, "loss": 1.7377, "step": 69049 }, { "epoch": 2.3, "grad_norm": 0.6789808869361877, "learning_rate": 7.762464126313423e-05, "loss": 1.6806, "step": 69050 }, { "epoch": 2.3, "grad_norm": 0.660618007183075, "learning_rate": 7.761762564768152e-05, "loss": 1.6995, "step": 69051 }, { "epoch": 2.3, "grad_norm": 0.6595162153244019, "learning_rate": 7.761061030216824e-05, "loss": 1.7173, "step": 69052 }, { "epoch": 2.3, "grad_norm": 0.6585654020309448, "learning_rate": 7.760359522660284e-05, "loss": 1.7089, "step": 69053 }, { "epoch": 2.3, "grad_norm": 0.6763661503791809, "learning_rate": 7.759658042099388e-05, "loss": 1.6336, "step": 69054 }, { "epoch": 2.3, "grad_norm": 0.6785005927085876, "learning_rate": 7.758956588535011e-05, "loss": 1.7734, "step": 69055 }, { "epoch": 2.3, "grad_norm": 0.6507670283317566, "learning_rate": 7.758255161967963e-05, "loss": 1.6903, "step": 69056 }, { "epoch": 2.3, "grad_norm": 0.6701053977012634, "learning_rate": 7.757553762399117e-05, "loss": 1.6523, "step": 69057 }, { "epoch": 2.3, "grad_norm": 0.6854022145271301, "learning_rate": 7.756852389829335e-05, "loss": 1.7346, "step": 69058 }, { "epoch": 2.3, "grad_norm": 0.6800985336303711, "learning_rate": 7.756151044259451e-05, "loss": 1.647, "step": 69059 }, { "epoch": 2.3, "grad_norm": 0.6444809436798096, "learning_rate": 7.75544972569032e-05, "loss": 1.7465, "step": 69060 }, { "epoch": 2.3, "grad_norm": 0.6491861939430237, "learning_rate": 7.754748434122791e-05, "loss": 1.6493, "step": 69061 }, { "epoch": 2.3, "grad_norm": 0.6653786301612854, "learning_rate": 7.754047169557738e-05, "loss": 1.643, "step": 69062 }, { "epoch": 2.3, "grad_norm": 0.6760609149932861, "learning_rate": 7.753345931995977e-05, "loss": 1.6199, "step": 69063 }, { "epoch": 2.3, "grad_norm": 0.6449179649353027, "learning_rate": 7.752644721438375e-05, "loss": 1.5749, "step": 69064 }, { "epoch": 2.3, "grad_norm": 0.6662307977676392, "learning_rate": 7.751943537885794e-05, "loss": 1.655, "step": 69065 }, { "epoch": 2.3, "grad_norm": 0.6780767440795898, "learning_rate": 7.751242381339078e-05, "loss": 1.6877, "step": 69066 }, { "epoch": 2.3, "grad_norm": 0.6826218366622925, "learning_rate": 7.750541251799066e-05, "loss": 1.6722, "step": 69067 }, { "epoch": 2.3, "grad_norm": 0.6850367784500122, "learning_rate": 7.749840149266615e-05, "loss": 1.7201, "step": 69068 }, { "epoch": 2.3, "grad_norm": 0.6504600644111633, "learning_rate": 7.749139073742597e-05, "loss": 1.6592, "step": 69069 }, { "epoch": 2.3, "grad_norm": 0.6465774178504944, "learning_rate": 7.74843802522783e-05, "loss": 1.6949, "step": 69070 }, { "epoch": 2.3, "grad_norm": 0.6615808010101318, "learning_rate": 7.747737003723182e-05, "loss": 1.669, "step": 69071 }, { "epoch": 2.3, "grad_norm": 0.6664015054702759, "learning_rate": 7.747036009229511e-05, "loss": 1.7475, "step": 69072 }, { "epoch": 2.3, "grad_norm": 0.6931419968605042, "learning_rate": 7.746335041747657e-05, "loss": 1.7152, "step": 69073 }, { "epoch": 2.3, "grad_norm": 0.6647897958755493, "learning_rate": 7.745634101278467e-05, "loss": 1.7483, "step": 69074 }, { "epoch": 2.3, "grad_norm": 0.6673557162284851, "learning_rate": 7.744933187822809e-05, "loss": 1.6912, "step": 69075 }, { "epoch": 2.3, "grad_norm": 0.6644348502159119, "learning_rate": 7.74423230138152e-05, "loss": 1.7156, "step": 69076 }, { "epoch": 2.3, "grad_norm": 0.6589382886886597, "learning_rate": 7.743531441955445e-05, "loss": 1.6737, "step": 69077 }, { "epoch": 2.3, "grad_norm": 0.6678570508956909, "learning_rate": 7.742830609545443e-05, "loss": 1.681, "step": 69078 }, { "epoch": 2.3, "grad_norm": 0.6873465776443481, "learning_rate": 7.742129804152373e-05, "loss": 1.7321, "step": 69079 }, { "epoch": 2.3, "grad_norm": 0.6935213804244995, "learning_rate": 7.741429025777081e-05, "loss": 1.783, "step": 69080 }, { "epoch": 2.3, "grad_norm": 0.6963638663291931, "learning_rate": 7.740728274420405e-05, "loss": 1.7188, "step": 69081 }, { "epoch": 2.3, "grad_norm": 0.6640093326568604, "learning_rate": 7.74002755008321e-05, "loss": 1.7459, "step": 69082 }, { "epoch": 2.3, "grad_norm": 0.680226743221283, "learning_rate": 7.73932685276635e-05, "loss": 1.6476, "step": 69083 }, { "epoch": 2.3, "grad_norm": 0.6610888242721558, "learning_rate": 7.738626182470652e-05, "loss": 1.6971, "step": 69084 }, { "epoch": 2.3, "grad_norm": 0.6485505700111389, "learning_rate": 7.73792553919699e-05, "loss": 1.6995, "step": 69085 }, { "epoch": 2.3, "grad_norm": 0.663925290107727, "learning_rate": 7.737224922946201e-05, "loss": 1.6434, "step": 69086 }, { "epoch": 2.3, "grad_norm": 0.6834679245948792, "learning_rate": 7.736524333719146e-05, "loss": 1.7093, "step": 69087 }, { "epoch": 2.3, "grad_norm": 0.6435698866844177, "learning_rate": 7.735823771516664e-05, "loss": 1.7468, "step": 69088 }, { "epoch": 2.3, "grad_norm": 0.676216185092926, "learning_rate": 7.73512323633962e-05, "loss": 1.8203, "step": 69089 }, { "epoch": 2.3, "grad_norm": 0.6736063957214355, "learning_rate": 7.734422728188854e-05, "loss": 1.7201, "step": 69090 }, { "epoch": 2.3, "grad_norm": 0.6454417705535889, "learning_rate": 7.733722247065211e-05, "loss": 1.6264, "step": 69091 }, { "epoch": 2.3, "grad_norm": 0.6598174571990967, "learning_rate": 7.733021792969556e-05, "loss": 1.6429, "step": 69092 }, { "epoch": 2.3, "grad_norm": 0.6656048893928528, "learning_rate": 7.732321365902723e-05, "loss": 1.7022, "step": 69093 }, { "epoch": 2.3, "grad_norm": 0.644615113735199, "learning_rate": 7.731620965865581e-05, "loss": 1.7141, "step": 69094 }, { "epoch": 2.3, "grad_norm": 0.6529628038406372, "learning_rate": 7.73092059285897e-05, "loss": 1.6716, "step": 69095 }, { "epoch": 2.3, "grad_norm": 0.6759204268455505, "learning_rate": 7.730220246883731e-05, "loss": 1.6596, "step": 69096 }, { "epoch": 2.3, "grad_norm": 0.6520978212356567, "learning_rate": 7.729519927940732e-05, "loss": 1.7056, "step": 69097 }, { "epoch": 2.3, "grad_norm": 0.654100775718689, "learning_rate": 7.728819636030804e-05, "loss": 1.6695, "step": 69098 }, { "epoch": 2.3, "grad_norm": 0.659110963344574, "learning_rate": 7.728119371154818e-05, "loss": 1.739, "step": 69099 }, { "epoch": 2.3, "grad_norm": 0.6761096119880676, "learning_rate": 7.727419133313604e-05, "loss": 1.6762, "step": 69100 }, { "epoch": 2.3, "grad_norm": 0.6592967510223389, "learning_rate": 7.726718922508032e-05, "loss": 1.6684, "step": 69101 }, { "epoch": 2.3, "grad_norm": 0.6516509056091309, "learning_rate": 7.726018738738938e-05, "loss": 1.6196, "step": 69102 }, { "epoch": 2.3, "grad_norm": 0.6852713823318481, "learning_rate": 7.725318582007169e-05, "loss": 1.7748, "step": 69103 }, { "epoch": 2.3, "grad_norm": 0.6721149682998657, "learning_rate": 7.724618452313592e-05, "loss": 1.7678, "step": 69104 }, { "epoch": 2.3, "grad_norm": 0.6510961055755615, "learning_rate": 7.723918349659042e-05, "loss": 1.6663, "step": 69105 }, { "epoch": 2.3, "grad_norm": 0.6712250113487244, "learning_rate": 7.723218274044368e-05, "loss": 1.684, "step": 69106 }, { "epoch": 2.3, "grad_norm": 0.6943160891532898, "learning_rate": 7.722518225470421e-05, "loss": 1.6969, "step": 69107 }, { "epoch": 2.3, "grad_norm": 0.7076291441917419, "learning_rate": 7.721818203938064e-05, "loss": 1.5885, "step": 69108 }, { "epoch": 2.3, "grad_norm": 0.650930643081665, "learning_rate": 7.721118209448138e-05, "loss": 1.6256, "step": 69109 }, { "epoch": 2.3, "grad_norm": 0.6611592173576355, "learning_rate": 7.720418242001483e-05, "loss": 1.6765, "step": 69110 }, { "epoch": 2.3, "grad_norm": 0.6527900099754333, "learning_rate": 7.719718301598966e-05, "loss": 1.6541, "step": 69111 }, { "epoch": 2.3, "grad_norm": 0.6627085208892822, "learning_rate": 7.719018388241428e-05, "loss": 1.7015, "step": 69112 }, { "epoch": 2.3, "grad_norm": 0.6617439389228821, "learning_rate": 7.718318501929712e-05, "loss": 1.6582, "step": 69113 }, { "epoch": 2.3, "grad_norm": 0.6474210619926453, "learning_rate": 7.717618642664671e-05, "loss": 1.6735, "step": 69114 }, { "epoch": 2.3, "grad_norm": 0.6850467324256897, "learning_rate": 7.716918810447168e-05, "loss": 1.7368, "step": 69115 }, { "epoch": 2.3, "grad_norm": 0.6889856457710266, "learning_rate": 7.716219005278044e-05, "loss": 1.6556, "step": 69116 }, { "epoch": 2.3, "grad_norm": 0.669624388217926, "learning_rate": 7.715519227158138e-05, "loss": 1.6932, "step": 69117 }, { "epoch": 2.3, "grad_norm": 0.6496508121490479, "learning_rate": 7.714819476088314e-05, "loss": 1.71, "step": 69118 }, { "epoch": 2.3, "grad_norm": 0.6788831949234009, "learning_rate": 7.714119752069416e-05, "loss": 1.6363, "step": 69119 }, { "epoch": 2.3, "grad_norm": 0.680761456489563, "learning_rate": 7.713420055102287e-05, "loss": 1.7654, "step": 69120 }, { "epoch": 2.3, "grad_norm": 0.6670989394187927, "learning_rate": 7.712720385187778e-05, "loss": 1.7101, "step": 69121 }, { "epoch": 2.3, "grad_norm": 0.6975262761116028, "learning_rate": 7.712020742326756e-05, "loss": 1.729, "step": 69122 }, { "epoch": 2.3, "grad_norm": 0.6773616671562195, "learning_rate": 7.711321126520054e-05, "loss": 1.7245, "step": 69123 }, { "epoch": 2.3, "grad_norm": 0.6796225905418396, "learning_rate": 7.710621537768516e-05, "loss": 1.6756, "step": 69124 }, { "epoch": 2.3, "grad_norm": 0.6675506830215454, "learning_rate": 7.709921976073007e-05, "loss": 1.6931, "step": 69125 }, { "epoch": 2.3, "grad_norm": 0.6695587635040283, "learning_rate": 7.709222441434372e-05, "loss": 1.7105, "step": 69126 }, { "epoch": 2.3, "grad_norm": 0.6616403460502625, "learning_rate": 7.708522933853442e-05, "loss": 1.7382, "step": 69127 }, { "epoch": 2.3, "grad_norm": 0.6789913177490234, "learning_rate": 7.707823453331086e-05, "loss": 1.6499, "step": 69128 }, { "epoch": 2.3, "grad_norm": 0.6676638126373291, "learning_rate": 7.707123999868153e-05, "loss": 1.6442, "step": 69129 }, { "epoch": 2.3, "grad_norm": 0.6980292201042175, "learning_rate": 7.706424573465488e-05, "loss": 1.7689, "step": 69130 }, { "epoch": 2.3, "grad_norm": 0.6869869828224182, "learning_rate": 7.70572517412393e-05, "loss": 1.6348, "step": 69131 }, { "epoch": 2.3, "grad_norm": 0.6652300953865051, "learning_rate": 7.705025801844347e-05, "loss": 1.6653, "step": 69132 }, { "epoch": 2.3, "grad_norm": 0.6583881378173828, "learning_rate": 7.704326456627578e-05, "loss": 1.6243, "step": 69133 }, { "epoch": 2.3, "grad_norm": 0.6636567711830139, "learning_rate": 7.703627138474459e-05, "loss": 1.7096, "step": 69134 }, { "epoch": 2.3, "grad_norm": 0.6598514914512634, "learning_rate": 7.702927847385863e-05, "loss": 1.7586, "step": 69135 }, { "epoch": 2.3, "grad_norm": 0.6676415205001831, "learning_rate": 7.702228583362619e-05, "loss": 1.7, "step": 69136 }, { "epoch": 2.3, "grad_norm": 0.6847565174102783, "learning_rate": 7.701529346405589e-05, "loss": 1.6723, "step": 69137 }, { "epoch": 2.3, "grad_norm": 0.6994097232818604, "learning_rate": 7.700830136515612e-05, "loss": 1.7473, "step": 69138 }, { "epoch": 2.3, "grad_norm": 0.6549386978149414, "learning_rate": 7.70013095369355e-05, "loss": 1.7205, "step": 69139 }, { "epoch": 2.3, "grad_norm": 0.6963460445404053, "learning_rate": 7.699431797940241e-05, "loss": 1.7943, "step": 69140 }, { "epoch": 2.3, "grad_norm": 0.659244954586029, "learning_rate": 7.69873266925653e-05, "loss": 1.6849, "step": 69141 }, { "epoch": 2.3, "grad_norm": 0.6664732694625854, "learning_rate": 7.698033567643279e-05, "loss": 1.733, "step": 69142 }, { "epoch": 2.3, "grad_norm": 0.6763632893562317, "learning_rate": 7.697334493101322e-05, "loss": 1.7065, "step": 69143 }, { "epoch": 2.3, "grad_norm": 0.7015426754951477, "learning_rate": 7.696635445631522e-05, "loss": 1.6842, "step": 69144 }, { "epoch": 2.3, "grad_norm": 0.6786611676216125, "learning_rate": 7.695936425234719e-05, "loss": 1.6705, "step": 69145 }, { "epoch": 2.3, "grad_norm": 0.6545842885971069, "learning_rate": 7.695237431911756e-05, "loss": 1.6372, "step": 69146 }, { "epoch": 2.3, "grad_norm": 0.665995717048645, "learning_rate": 7.694538465663496e-05, "loss": 1.6845, "step": 69147 }, { "epoch": 2.3, "grad_norm": 0.670335590839386, "learning_rate": 7.69383952649077e-05, "loss": 1.7287, "step": 69148 }, { "epoch": 2.3, "grad_norm": 0.6535234451293945, "learning_rate": 7.693140614394446e-05, "loss": 1.6696, "step": 69149 }, { "epoch": 2.3, "grad_norm": 0.660170316696167, "learning_rate": 7.692441729375356e-05, "loss": 1.6965, "step": 69150 }, { "epoch": 2.3, "grad_norm": 0.680454671382904, "learning_rate": 7.691742871434362e-05, "loss": 1.7105, "step": 69151 }, { "epoch": 2.3, "grad_norm": 0.6753246188163757, "learning_rate": 7.691044040572303e-05, "loss": 1.674, "step": 69152 }, { "epoch": 2.3, "grad_norm": 0.6808702945709229, "learning_rate": 7.690345236790024e-05, "loss": 1.6826, "step": 69153 }, { "epoch": 2.3, "grad_norm": 0.6845106482505798, "learning_rate": 7.689646460088385e-05, "loss": 1.8323, "step": 69154 }, { "epoch": 2.3, "grad_norm": 0.688995361328125, "learning_rate": 7.68894771046823e-05, "loss": 1.7343, "step": 69155 }, { "epoch": 2.3, "grad_norm": 0.684616208076477, "learning_rate": 7.688248987930395e-05, "loss": 1.6955, "step": 69156 }, { "epoch": 2.3, "grad_norm": 0.6667613983154297, "learning_rate": 7.68755029247574e-05, "loss": 1.699, "step": 69157 }, { "epoch": 2.3, "grad_norm": 0.6750370860099792, "learning_rate": 7.686851624105118e-05, "loss": 1.6935, "step": 69158 }, { "epoch": 2.3, "grad_norm": 0.6740528345108032, "learning_rate": 7.686152982819371e-05, "loss": 1.7134, "step": 69159 }, { "epoch": 2.3, "grad_norm": 0.6848844885826111, "learning_rate": 7.685454368619338e-05, "loss": 1.6926, "step": 69160 }, { "epoch": 2.3, "grad_norm": 0.6705750226974487, "learning_rate": 7.684755781505885e-05, "loss": 1.7076, "step": 69161 }, { "epoch": 2.3, "grad_norm": 0.6625788807868958, "learning_rate": 7.68405722147985e-05, "loss": 1.7151, "step": 69162 }, { "epoch": 2.3, "grad_norm": 0.6644406318664551, "learning_rate": 7.68335868854207e-05, "loss": 1.648, "step": 69163 }, { "epoch": 2.3, "grad_norm": 0.673793613910675, "learning_rate": 7.682660182693409e-05, "loss": 1.6699, "step": 69164 }, { "epoch": 2.3, "grad_norm": 0.6937678456306458, "learning_rate": 7.681961703934724e-05, "loss": 1.6703, "step": 69165 }, { "epoch": 2.3, "grad_norm": 0.6763142347335815, "learning_rate": 7.681263252266835e-05, "loss": 1.711, "step": 69166 }, { "epoch": 2.3, "grad_norm": 0.7351628541946411, "learning_rate": 7.680564827690599e-05, "loss": 1.7227, "step": 69167 }, { "epoch": 2.3, "grad_norm": 0.6662293672561646, "learning_rate": 7.67986643020688e-05, "loss": 1.7133, "step": 69168 }, { "epoch": 2.3, "grad_norm": 0.6845105290412903, "learning_rate": 7.679168059816516e-05, "loss": 1.7104, "step": 69169 }, { "epoch": 2.3, "grad_norm": 0.6496326327323914, "learning_rate": 7.678469716520342e-05, "loss": 1.7639, "step": 69170 }, { "epoch": 2.3, "grad_norm": 0.6664610505104065, "learning_rate": 7.677771400319218e-05, "loss": 1.7061, "step": 69171 }, { "epoch": 2.3, "grad_norm": 0.6702706217765808, "learning_rate": 7.677073111214009e-05, "loss": 1.7081, "step": 69172 }, { "epoch": 2.3, "grad_norm": 0.6870294213294983, "learning_rate": 7.676374849205525e-05, "loss": 1.6626, "step": 69173 }, { "epoch": 2.3, "grad_norm": 0.66781085729599, "learning_rate": 7.675676614294636e-05, "loss": 1.6223, "step": 69174 }, { "epoch": 2.3, "grad_norm": 0.6919966340065002, "learning_rate": 7.674978406482189e-05, "loss": 1.7694, "step": 69175 }, { "epoch": 2.3, "grad_norm": 0.6847772002220154, "learning_rate": 7.674280225769032e-05, "loss": 1.6457, "step": 69176 }, { "epoch": 2.3, "grad_norm": 0.7093471884727478, "learning_rate": 7.673582072156001e-05, "loss": 1.7177, "step": 69177 }, { "epoch": 2.3, "grad_norm": 0.6512342691421509, "learning_rate": 7.672883945643952e-05, "loss": 1.6813, "step": 69178 }, { "epoch": 2.3, "grad_norm": 0.6936864852905273, "learning_rate": 7.67218584623375e-05, "loss": 1.7722, "step": 69179 }, { "epoch": 2.3, "grad_norm": 0.6985405087471008, "learning_rate": 7.671487773926208e-05, "loss": 1.6853, "step": 69180 }, { "epoch": 2.3, "grad_norm": 0.6621295213699341, "learning_rate": 7.670789728722184e-05, "loss": 1.6994, "step": 69181 }, { "epoch": 2.3, "grad_norm": 0.6784495115280151, "learning_rate": 7.670091710622547e-05, "loss": 1.6486, "step": 69182 }, { "epoch": 2.3, "grad_norm": 0.7486075758934021, "learning_rate": 7.669393719628124e-05, "loss": 1.7069, "step": 69183 }, { "epoch": 2.3, "grad_norm": 0.665109395980835, "learning_rate": 7.668695755739758e-05, "loss": 1.6648, "step": 69184 }, { "epoch": 2.3, "grad_norm": 0.6789559125900269, "learning_rate": 7.667997818958307e-05, "loss": 1.7187, "step": 69185 }, { "epoch": 2.3, "grad_norm": 0.6558392643928528, "learning_rate": 7.667299909284634e-05, "loss": 1.7023, "step": 69186 }, { "epoch": 2.3, "grad_norm": 0.683110773563385, "learning_rate": 7.66660202671955e-05, "loss": 1.6006, "step": 69187 }, { "epoch": 2.3, "grad_norm": 0.6652448773384094, "learning_rate": 7.665904171263921e-05, "loss": 1.7282, "step": 69188 }, { "epoch": 2.3, "grad_norm": 0.7136622071266174, "learning_rate": 7.6652063429186e-05, "loss": 1.783, "step": 69189 }, { "epoch": 2.3, "grad_norm": 0.6725921034812927, "learning_rate": 7.664508541684432e-05, "loss": 1.7043, "step": 69190 }, { "epoch": 2.3, "grad_norm": 0.6634565591812134, "learning_rate": 7.663810767562245e-05, "loss": 1.7159, "step": 69191 }, { "epoch": 2.3, "grad_norm": 0.6821064949035645, "learning_rate": 7.663113020552914e-05, "loss": 1.8074, "step": 69192 }, { "epoch": 2.3, "grad_norm": 0.6824425458908081, "learning_rate": 7.662415300657272e-05, "loss": 1.6772, "step": 69193 }, { "epoch": 2.3, "grad_norm": 0.6637356877326965, "learning_rate": 7.661717607876157e-05, "loss": 1.667, "step": 69194 }, { "epoch": 2.3, "grad_norm": 0.64095538854599, "learning_rate": 7.661019942210425e-05, "loss": 1.6522, "step": 69195 }, { "epoch": 2.3, "grad_norm": 0.677219569683075, "learning_rate": 7.660322303660932e-05, "loss": 1.6609, "step": 69196 }, { "epoch": 2.3, "grad_norm": 0.6851373314857483, "learning_rate": 7.659624692228517e-05, "loss": 1.6864, "step": 69197 }, { "epoch": 2.3, "grad_norm": 0.6561012268066406, "learning_rate": 7.658927107914018e-05, "loss": 1.7098, "step": 69198 }, { "epoch": 2.3, "grad_norm": 0.6688981056213379, "learning_rate": 7.658229550718295e-05, "loss": 1.6775, "step": 69199 }, { "epoch": 2.3, "grad_norm": 0.6720656752586365, "learning_rate": 7.657532020642193e-05, "loss": 1.6407, "step": 69200 }, { "epoch": 2.3, "grad_norm": 0.668708324432373, "learning_rate": 7.656834517686543e-05, "loss": 1.7116, "step": 69201 }, { "epoch": 2.3, "grad_norm": 0.6779087781906128, "learning_rate": 7.656137041852215e-05, "loss": 1.6589, "step": 69202 }, { "epoch": 2.3, "grad_norm": 0.6791961789131165, "learning_rate": 7.655439593140038e-05, "loss": 1.7515, "step": 69203 }, { "epoch": 2.3, "grad_norm": 0.6624453663825989, "learning_rate": 7.654742171550869e-05, "loss": 1.6816, "step": 69204 }, { "epoch": 2.3, "grad_norm": 0.6575378179550171, "learning_rate": 7.654044777085545e-05, "loss": 1.6702, "step": 69205 }, { "epoch": 2.3, "grad_norm": 0.644301176071167, "learning_rate": 7.653347409744924e-05, "loss": 1.7633, "step": 69206 }, { "epoch": 2.3, "grad_norm": 0.6626636385917664, "learning_rate": 7.652650069529849e-05, "loss": 1.6983, "step": 69207 }, { "epoch": 2.3, "grad_norm": 0.678404688835144, "learning_rate": 7.651952756441154e-05, "loss": 1.7576, "step": 69208 }, { "epoch": 2.3, "grad_norm": 0.682344913482666, "learning_rate": 7.651255470479707e-05, "loss": 1.6357, "step": 69209 }, { "epoch": 2.3, "grad_norm": 0.6781530380249023, "learning_rate": 7.650558211646332e-05, "loss": 1.676, "step": 69210 }, { "epoch": 2.3, "grad_norm": 0.6882519125938416, "learning_rate": 7.649860979941894e-05, "loss": 1.7034, "step": 69211 }, { "epoch": 2.3, "grad_norm": 0.7157109975814819, "learning_rate": 7.649163775367234e-05, "loss": 1.7346, "step": 69212 }, { "epoch": 2.3, "grad_norm": 0.7051880955696106, "learning_rate": 7.648466597923185e-05, "loss": 1.758, "step": 69213 }, { "epoch": 2.3, "grad_norm": 0.7006043195724487, "learning_rate": 7.647769447610613e-05, "loss": 1.7211, "step": 69214 }, { "epoch": 2.3, "grad_norm": 0.6418178677558899, "learning_rate": 7.647072324430348e-05, "loss": 1.6723, "step": 69215 }, { "epoch": 2.3, "grad_norm": 0.6693629026412964, "learning_rate": 7.646375228383251e-05, "loss": 1.6992, "step": 69216 }, { "epoch": 2.3, "grad_norm": 0.6876605153083801, "learning_rate": 7.645678159470153e-05, "loss": 1.663, "step": 69217 }, { "epoch": 2.3, "grad_norm": 0.6618098020553589, "learning_rate": 7.64498111769192e-05, "loss": 1.6972, "step": 69218 }, { "epoch": 2.3, "grad_norm": 0.6554477214813232, "learning_rate": 7.644284103049383e-05, "loss": 1.7612, "step": 69219 }, { "epoch": 2.3, "grad_norm": 0.6568798422813416, "learning_rate": 7.64358711554338e-05, "loss": 1.6381, "step": 69220 }, { "epoch": 2.3, "grad_norm": 0.6585827469825745, "learning_rate": 7.642890155174781e-05, "loss": 1.6812, "step": 69221 }, { "epoch": 2.3, "grad_norm": 0.6682611703872681, "learning_rate": 7.642193221944415e-05, "loss": 1.7069, "step": 69222 }, { "epoch": 2.3, "grad_norm": 0.6528204083442688, "learning_rate": 7.641496315853126e-05, "loss": 1.7522, "step": 69223 }, { "epoch": 2.3, "grad_norm": 0.7044804692268372, "learning_rate": 7.640799436901768e-05, "loss": 1.769, "step": 69224 }, { "epoch": 2.3, "grad_norm": 0.6657451391220093, "learning_rate": 7.64010258509119e-05, "loss": 1.7344, "step": 69225 }, { "epoch": 2.3, "grad_norm": 0.6525421738624573, "learning_rate": 7.639405760422236e-05, "loss": 1.6315, "step": 69226 }, { "epoch": 2.3, "grad_norm": 0.657780647277832, "learning_rate": 7.638708962895737e-05, "loss": 1.6976, "step": 69227 }, { "epoch": 2.3, "grad_norm": 0.6774312257766724, "learning_rate": 7.63801219251256e-05, "loss": 1.7262, "step": 69228 }, { "epoch": 2.3, "grad_norm": 0.6604026556015015, "learning_rate": 7.637315449273543e-05, "loss": 1.6192, "step": 69229 }, { "epoch": 2.3, "grad_norm": 0.6817427277565002, "learning_rate": 7.63661873317952e-05, "loss": 1.7254, "step": 69230 }, { "epoch": 2.3, "grad_norm": 0.6510111093521118, "learning_rate": 7.635922044231347e-05, "loss": 1.7025, "step": 69231 }, { "epoch": 2.3, "grad_norm": 0.694660484790802, "learning_rate": 7.635225382429881e-05, "loss": 1.7799, "step": 69232 }, { "epoch": 2.3, "grad_norm": 0.7141372561454773, "learning_rate": 7.634528747775951e-05, "loss": 1.7261, "step": 69233 }, { "epoch": 2.3, "grad_norm": 0.6501029133796692, "learning_rate": 7.633832140270401e-05, "loss": 1.7559, "step": 69234 }, { "epoch": 2.3, "grad_norm": 0.6744928359985352, "learning_rate": 7.633135559914086e-05, "loss": 1.7574, "step": 69235 }, { "epoch": 2.3, "grad_norm": 0.6532846093177795, "learning_rate": 7.632439006707865e-05, "loss": 1.7215, "step": 69236 }, { "epoch": 2.3, "grad_norm": 0.6731659173965454, "learning_rate": 7.631742480652545e-05, "loss": 1.7129, "step": 69237 }, { "epoch": 2.3, "grad_norm": 0.7027130722999573, "learning_rate": 7.631045981749e-05, "loss": 1.7131, "step": 69238 }, { "epoch": 2.3, "grad_norm": 0.6776280999183655, "learning_rate": 7.630349509998075e-05, "loss": 1.7114, "step": 69239 }, { "epoch": 2.3, "grad_norm": 0.6951122879981995, "learning_rate": 7.629653065400609e-05, "loss": 1.7159, "step": 69240 }, { "epoch": 2.3, "grad_norm": 0.6716629266738892, "learning_rate": 7.628956647957443e-05, "loss": 1.732, "step": 69241 }, { "epoch": 2.3, "grad_norm": 0.7159867882728577, "learning_rate": 7.628260257669434e-05, "loss": 1.6529, "step": 69242 }, { "epoch": 2.3, "grad_norm": 0.7077267169952393, "learning_rate": 7.62756389453742e-05, "loss": 1.7009, "step": 69243 }, { "epoch": 2.3, "grad_norm": 0.7151367664337158, "learning_rate": 7.626867558562238e-05, "loss": 1.6635, "step": 69244 }, { "epoch": 2.3, "grad_norm": 0.644761323928833, "learning_rate": 7.626171249744744e-05, "loss": 1.7623, "step": 69245 }, { "epoch": 2.3, "grad_norm": 0.650054931640625, "learning_rate": 7.625474968085787e-05, "loss": 1.6645, "step": 69246 }, { "epoch": 2.3, "grad_norm": 0.6612115502357483, "learning_rate": 7.62477871358621e-05, "loss": 1.6544, "step": 69247 }, { "epoch": 2.3, "grad_norm": 0.6642464995384216, "learning_rate": 7.624082486246845e-05, "loss": 1.7015, "step": 69248 }, { "epoch": 2.3, "grad_norm": 0.6611165404319763, "learning_rate": 7.623386286068558e-05, "loss": 1.6879, "step": 69249 }, { "epoch": 2.3, "grad_norm": 0.6654640436172485, "learning_rate": 7.62269011305218e-05, "loss": 1.7299, "step": 69250 }, { "epoch": 2.3, "grad_norm": 0.6765157580375671, "learning_rate": 7.621993967198551e-05, "loss": 1.7754, "step": 69251 }, { "epoch": 2.3, "grad_norm": 0.6945796608924866, "learning_rate": 7.621297848508532e-05, "loss": 1.688, "step": 69252 }, { "epoch": 2.3, "grad_norm": 0.6923818588256836, "learning_rate": 7.620601756982952e-05, "loss": 1.7624, "step": 69253 }, { "epoch": 2.3, "grad_norm": 0.671408474445343, "learning_rate": 7.619905692622672e-05, "loss": 1.638, "step": 69254 }, { "epoch": 2.3, "grad_norm": 0.6642983555793762, "learning_rate": 7.619209655428522e-05, "loss": 1.6404, "step": 69255 }, { "epoch": 2.3, "grad_norm": 0.6647756099700928, "learning_rate": 7.618513645401362e-05, "loss": 1.7279, "step": 69256 }, { "epoch": 2.3, "grad_norm": 0.6940454244613647, "learning_rate": 7.617817662542028e-05, "loss": 1.6508, "step": 69257 }, { "epoch": 2.3, "grad_norm": 0.6614733934402466, "learning_rate": 7.617121706851357e-05, "loss": 1.8134, "step": 69258 }, { "epoch": 2.3, "grad_norm": 0.666861355304718, "learning_rate": 7.616425778330214e-05, "loss": 1.743, "step": 69259 }, { "epoch": 2.3, "grad_norm": 0.6817865371704102, "learning_rate": 7.615729876979418e-05, "loss": 1.6681, "step": 69260 }, { "epoch": 2.3, "grad_norm": 0.67011559009552, "learning_rate": 7.615034002799839e-05, "loss": 1.6085, "step": 69261 }, { "epoch": 2.3, "grad_norm": 0.651496171951294, "learning_rate": 7.614338155792311e-05, "loss": 1.6206, "step": 69262 }, { "epoch": 2.3, "grad_norm": 0.6545154452323914, "learning_rate": 7.613642335957668e-05, "loss": 1.56, "step": 69263 }, { "epoch": 2.3, "grad_norm": 0.6713347434997559, "learning_rate": 7.612946543296776e-05, "loss": 1.712, "step": 69264 }, { "epoch": 2.3, "grad_norm": 0.6728603839874268, "learning_rate": 7.612250777810458e-05, "loss": 1.7155, "step": 69265 }, { "epoch": 2.3, "grad_norm": 0.6721819043159485, "learning_rate": 7.611555039499575e-05, "loss": 1.7397, "step": 69266 }, { "epoch": 2.3, "grad_norm": 0.6813029050827026, "learning_rate": 7.61085932836496e-05, "loss": 1.7016, "step": 69267 }, { "epoch": 2.3, "grad_norm": 0.6622116565704346, "learning_rate": 7.610163644407472e-05, "loss": 1.7342, "step": 69268 }, { "epoch": 2.3, "grad_norm": 0.6659064888954163, "learning_rate": 7.609467987627944e-05, "loss": 1.7514, "step": 69269 }, { "epoch": 2.3, "grad_norm": 0.7329012751579285, "learning_rate": 7.608772358027215e-05, "loss": 1.7273, "step": 69270 }, { "epoch": 2.3, "grad_norm": 0.6777167916297913, "learning_rate": 7.608076755606145e-05, "loss": 1.7347, "step": 69271 }, { "epoch": 2.3, "grad_norm": 0.6888683438301086, "learning_rate": 7.607381180365573e-05, "loss": 1.6601, "step": 69272 }, { "epoch": 2.3, "grad_norm": 0.682551920413971, "learning_rate": 7.606685632306331e-05, "loss": 1.6828, "step": 69273 }, { "epoch": 2.3, "grad_norm": 0.6394988298416138, "learning_rate": 7.605990111429272e-05, "loss": 1.6485, "step": 69274 }, { "epoch": 2.3, "grad_norm": 0.6655908823013306, "learning_rate": 7.605294617735253e-05, "loss": 1.6947, "step": 69275 }, { "epoch": 2.3, "grad_norm": 0.7034375667572021, "learning_rate": 7.604599151225101e-05, "loss": 1.7377, "step": 69276 }, { "epoch": 2.3, "grad_norm": 0.6627148985862732, "learning_rate": 7.603903711899662e-05, "loss": 1.7047, "step": 69277 }, { "epoch": 2.3, "grad_norm": 0.658613383769989, "learning_rate": 7.603208299759792e-05, "loss": 1.6954, "step": 69278 }, { "epoch": 2.3, "grad_norm": 0.6647243499755859, "learning_rate": 7.602512914806326e-05, "loss": 1.7945, "step": 69279 }, { "epoch": 2.3, "grad_norm": 0.6722697615623474, "learning_rate": 7.601817557040102e-05, "loss": 1.7713, "step": 69280 }, { "epoch": 2.3, "grad_norm": 0.6709471344947815, "learning_rate": 7.60112222646197e-05, "loss": 1.712, "step": 69281 }, { "epoch": 2.31, "grad_norm": 0.6661292314529419, "learning_rate": 7.600426923072792e-05, "loss": 1.7155, "step": 69282 }, { "epoch": 2.31, "grad_norm": 0.6882764101028442, "learning_rate": 7.599731646873378e-05, "loss": 1.7149, "step": 69283 }, { "epoch": 2.31, "grad_norm": 0.653271496295929, "learning_rate": 7.599036397864591e-05, "loss": 1.6799, "step": 69284 }, { "epoch": 2.31, "grad_norm": 0.6692624688148499, "learning_rate": 7.598341176047283e-05, "loss": 1.7154, "step": 69285 }, { "epoch": 2.31, "grad_norm": 0.6853432059288025, "learning_rate": 7.597645981422286e-05, "loss": 1.7943, "step": 69286 }, { "epoch": 2.31, "grad_norm": 0.6840733885765076, "learning_rate": 7.596950813990437e-05, "loss": 1.6747, "step": 69287 }, { "epoch": 2.31, "grad_norm": 0.6594786047935486, "learning_rate": 7.596255673752589e-05, "loss": 1.6698, "step": 69288 }, { "epoch": 2.31, "grad_norm": 0.6856210231781006, "learning_rate": 7.595560560709602e-05, "loss": 1.7821, "step": 69289 }, { "epoch": 2.31, "grad_norm": 0.6683951616287231, "learning_rate": 7.594865474862286e-05, "loss": 1.6496, "step": 69290 }, { "epoch": 2.31, "grad_norm": 0.6519428491592407, "learning_rate": 7.594170416211505e-05, "loss": 1.694, "step": 69291 }, { "epoch": 2.31, "grad_norm": 0.6784886121749878, "learning_rate": 7.593475384758105e-05, "loss": 1.7311, "step": 69292 }, { "epoch": 2.31, "grad_norm": 0.6641359329223633, "learning_rate": 7.592780380502927e-05, "loss": 1.6974, "step": 69293 }, { "epoch": 2.31, "grad_norm": 0.657742977142334, "learning_rate": 7.5920854034468e-05, "loss": 1.727, "step": 69294 }, { "epoch": 2.31, "grad_norm": 0.6725345849990845, "learning_rate": 7.591390453590583e-05, "loss": 1.6719, "step": 69295 }, { "epoch": 2.31, "grad_norm": 0.6405019164085388, "learning_rate": 7.590695530935132e-05, "loss": 1.7021, "step": 69296 }, { "epoch": 2.31, "grad_norm": 0.6765229105949402, "learning_rate": 7.59000063548126e-05, "loss": 1.7459, "step": 69297 }, { "epoch": 2.31, "grad_norm": 0.6502485871315002, "learning_rate": 7.589305767229822e-05, "loss": 1.6829, "step": 69298 }, { "epoch": 2.31, "grad_norm": 0.6492429375648499, "learning_rate": 7.588610926181677e-05, "loss": 1.7053, "step": 69299 }, { "epoch": 2.31, "grad_norm": 0.6898348927497864, "learning_rate": 7.587916112337653e-05, "loss": 1.63, "step": 69300 }, { "epoch": 2.31, "grad_norm": 0.6668274998664856, "learning_rate": 7.587221325698588e-05, "loss": 1.6597, "step": 69301 }, { "epoch": 2.31, "grad_norm": 0.6638326644897461, "learning_rate": 7.586526566265334e-05, "loss": 1.6291, "step": 69302 }, { "epoch": 2.31, "grad_norm": 0.7052292823791504, "learning_rate": 7.585831834038753e-05, "loss": 1.6722, "step": 69303 }, { "epoch": 2.31, "grad_norm": 0.677426815032959, "learning_rate": 7.585137129019652e-05, "loss": 1.6988, "step": 69304 }, { "epoch": 2.31, "grad_norm": 0.6334586143493652, "learning_rate": 7.584442451208891e-05, "loss": 1.7006, "step": 69305 }, { "epoch": 2.31, "grad_norm": 0.6673204898834229, "learning_rate": 7.583747800607324e-05, "loss": 1.6588, "step": 69306 }, { "epoch": 2.31, "grad_norm": 0.7015858292579651, "learning_rate": 7.583053177215784e-05, "loss": 1.7264, "step": 69307 }, { "epoch": 2.31, "grad_norm": 0.6398734450340271, "learning_rate": 7.582358581035106e-05, "loss": 1.6584, "step": 69308 }, { "epoch": 2.31, "grad_norm": 0.6731863021850586, "learning_rate": 7.581664012066148e-05, "loss": 1.6709, "step": 69309 }, { "epoch": 2.31, "grad_norm": 0.6696283221244812, "learning_rate": 7.580969470309748e-05, "loss": 1.6997, "step": 69310 }, { "epoch": 2.31, "grad_norm": 0.6645180583000183, "learning_rate": 7.580274955766739e-05, "loss": 1.7204, "step": 69311 }, { "epoch": 2.31, "grad_norm": 0.6779946088790894, "learning_rate": 7.579580468437973e-05, "loss": 1.715, "step": 69312 }, { "epoch": 2.31, "grad_norm": 0.6426515579223633, "learning_rate": 7.578886008324302e-05, "loss": 1.6819, "step": 69313 }, { "epoch": 2.31, "grad_norm": 0.6707026958465576, "learning_rate": 7.57819157542656e-05, "loss": 1.7298, "step": 69314 }, { "epoch": 2.31, "grad_norm": 0.6564664244651794, "learning_rate": 7.57749716974558e-05, "loss": 1.7366, "step": 69315 }, { "epoch": 2.31, "grad_norm": 0.6872625946998596, "learning_rate": 7.576802791282225e-05, "loss": 1.7919, "step": 69316 }, { "epoch": 2.31, "grad_norm": 0.6752256155014038, "learning_rate": 7.576108440037327e-05, "loss": 1.6537, "step": 69317 }, { "epoch": 2.31, "grad_norm": 0.6715435981750488, "learning_rate": 7.57541411601172e-05, "loss": 1.6636, "step": 69318 }, { "epoch": 2.31, "grad_norm": 0.6560841798782349, "learning_rate": 7.574719819206262e-05, "loss": 1.6861, "step": 69319 }, { "epoch": 2.31, "grad_norm": 0.6955401301383972, "learning_rate": 7.574025549621785e-05, "loss": 1.747, "step": 69320 }, { "epoch": 2.31, "grad_norm": 0.6840122938156128, "learning_rate": 7.573331307259145e-05, "loss": 1.7247, "step": 69321 }, { "epoch": 2.31, "grad_norm": 0.6712920069694519, "learning_rate": 7.572637092119168e-05, "loss": 1.7245, "step": 69322 }, { "epoch": 2.31, "grad_norm": 0.6577792763710022, "learning_rate": 7.571942904202711e-05, "loss": 1.7528, "step": 69323 }, { "epoch": 2.31, "grad_norm": 0.6539747714996338, "learning_rate": 7.571248743510614e-05, "loss": 1.7625, "step": 69324 }, { "epoch": 2.31, "grad_norm": 0.6797444224357605, "learning_rate": 7.570554610043706e-05, "loss": 1.6648, "step": 69325 }, { "epoch": 2.31, "grad_norm": 0.6885807514190674, "learning_rate": 7.569860503802849e-05, "loss": 1.7248, "step": 69326 }, { "epoch": 2.31, "grad_norm": 0.6522706747055054, "learning_rate": 7.569166424788869e-05, "loss": 1.6424, "step": 69327 }, { "epoch": 2.31, "grad_norm": 0.6786211133003235, "learning_rate": 7.568472373002621e-05, "loss": 1.6623, "step": 69328 }, { "epoch": 2.31, "grad_norm": 0.6792954802513123, "learning_rate": 7.567778348444945e-05, "loss": 1.7679, "step": 69329 }, { "epoch": 2.31, "grad_norm": 0.6501284837722778, "learning_rate": 7.56708435111667e-05, "loss": 1.7004, "step": 69330 }, { "epoch": 2.31, "grad_norm": 0.6654266119003296, "learning_rate": 7.56639038101866e-05, "loss": 1.6111, "step": 69331 }, { "epoch": 2.31, "grad_norm": 0.6501404643058777, "learning_rate": 7.565696438151737e-05, "loss": 1.7408, "step": 69332 }, { "epoch": 2.31, "grad_norm": 0.6758410930633545, "learning_rate": 7.565002522516764e-05, "loss": 1.6531, "step": 69333 }, { "epoch": 2.31, "grad_norm": 0.7085152864456177, "learning_rate": 7.564308634114561e-05, "loss": 1.7108, "step": 69334 }, { "epoch": 2.31, "grad_norm": 0.6808856129646301, "learning_rate": 7.563614772945992e-05, "loss": 1.6031, "step": 69335 }, { "epoch": 2.31, "grad_norm": 0.6576151847839355, "learning_rate": 7.56292093901189e-05, "loss": 1.6749, "step": 69336 }, { "epoch": 2.31, "grad_norm": 0.668733537197113, "learning_rate": 7.562227132313083e-05, "loss": 1.7094, "step": 69337 }, { "epoch": 2.31, "grad_norm": 0.6417344212532043, "learning_rate": 7.561533352850438e-05, "loss": 1.6651, "step": 69338 }, { "epoch": 2.31, "grad_norm": 0.6555690169334412, "learning_rate": 7.560839600624784e-05, "loss": 1.6577, "step": 69339 }, { "epoch": 2.31, "grad_norm": 0.6740647554397583, "learning_rate": 7.560145875636956e-05, "loss": 1.645, "step": 69340 }, { "epoch": 2.31, "grad_norm": 0.6707607507705688, "learning_rate": 7.559452177887806e-05, "loss": 1.6501, "step": 69341 }, { "epoch": 2.31, "grad_norm": 0.6929783821105957, "learning_rate": 7.55875850737818e-05, "loss": 1.7663, "step": 69342 }, { "epoch": 2.31, "grad_norm": 0.6729027628898621, "learning_rate": 7.558064864108917e-05, "loss": 1.6785, "step": 69343 }, { "epoch": 2.31, "grad_norm": 0.6911768913269043, "learning_rate": 7.55737124808085e-05, "loss": 1.7028, "step": 69344 }, { "epoch": 2.31, "grad_norm": 0.6872034072875977, "learning_rate": 7.556677659294832e-05, "loss": 1.7367, "step": 69345 }, { "epoch": 2.31, "grad_norm": 0.6721389889717102, "learning_rate": 7.555984097751702e-05, "loss": 1.7299, "step": 69346 }, { "epoch": 2.31, "grad_norm": 0.6586378812789917, "learning_rate": 7.555290563452292e-05, "loss": 1.6402, "step": 69347 }, { "epoch": 2.31, "grad_norm": 0.6681941151618958, "learning_rate": 7.554597056397452e-05, "loss": 1.6417, "step": 69348 }, { "epoch": 2.31, "grad_norm": 0.6961587071418762, "learning_rate": 7.553903576588035e-05, "loss": 1.6561, "step": 69349 }, { "epoch": 2.31, "grad_norm": 0.6943386793136597, "learning_rate": 7.55321012402487e-05, "loss": 1.6682, "step": 69350 }, { "epoch": 2.31, "grad_norm": 0.6855641007423401, "learning_rate": 7.552516698708789e-05, "loss": 1.6834, "step": 69351 }, { "epoch": 2.31, "grad_norm": 0.6785277128219604, "learning_rate": 7.551823300640649e-05, "loss": 1.7047, "step": 69352 }, { "epoch": 2.31, "grad_norm": 0.6753189563751221, "learning_rate": 7.551129929821302e-05, "loss": 1.7569, "step": 69353 }, { "epoch": 2.31, "grad_norm": 0.6514090895652771, "learning_rate": 7.550436586251562e-05, "loss": 1.6636, "step": 69354 }, { "epoch": 2.31, "grad_norm": 0.686486542224884, "learning_rate": 7.549743269932282e-05, "loss": 1.6973, "step": 69355 }, { "epoch": 2.31, "grad_norm": 0.6628768444061279, "learning_rate": 7.549049980864315e-05, "loss": 1.7615, "step": 69356 }, { "epoch": 2.31, "grad_norm": 0.692497193813324, "learning_rate": 7.548356719048494e-05, "loss": 1.7521, "step": 69357 }, { "epoch": 2.31, "grad_norm": 0.6871027946472168, "learning_rate": 7.54766348448565e-05, "loss": 1.738, "step": 69358 }, { "epoch": 2.31, "grad_norm": 0.6852932572364807, "learning_rate": 7.546970277176645e-05, "loss": 1.6556, "step": 69359 }, { "epoch": 2.31, "grad_norm": 0.6593133211135864, "learning_rate": 7.546277097122305e-05, "loss": 1.5985, "step": 69360 }, { "epoch": 2.31, "grad_norm": 0.6752845644950867, "learning_rate": 7.545583944323473e-05, "loss": 1.6977, "step": 69361 }, { "epoch": 2.31, "grad_norm": 0.6773269772529602, "learning_rate": 7.544890818780992e-05, "loss": 1.6577, "step": 69362 }, { "epoch": 2.31, "grad_norm": 0.704894483089447, "learning_rate": 7.54419772049571e-05, "loss": 1.7297, "step": 69363 }, { "epoch": 2.31, "grad_norm": 0.6682701706886292, "learning_rate": 7.543504649468469e-05, "loss": 1.6285, "step": 69364 }, { "epoch": 2.31, "grad_norm": 0.674767255783081, "learning_rate": 7.542811605700091e-05, "loss": 1.6487, "step": 69365 }, { "epoch": 2.31, "grad_norm": 0.6920630931854248, "learning_rate": 7.542118589191442e-05, "loss": 1.7069, "step": 69366 }, { "epoch": 2.31, "grad_norm": 0.6802851557731628, "learning_rate": 7.541425599943353e-05, "loss": 1.6708, "step": 69367 }, { "epoch": 2.31, "grad_norm": 0.6428323984146118, "learning_rate": 7.540732637956654e-05, "loss": 1.6515, "step": 69368 }, { "epoch": 2.31, "grad_norm": 0.6683449149131775, "learning_rate": 7.540039703232203e-05, "loss": 1.6963, "step": 69369 }, { "epoch": 2.31, "grad_norm": 0.6560266017913818, "learning_rate": 7.539346795770828e-05, "loss": 1.6687, "step": 69370 }, { "epoch": 2.31, "grad_norm": 0.6733516454696655, "learning_rate": 7.538653915573385e-05, "loss": 1.7184, "step": 69371 }, { "epoch": 2.31, "grad_norm": 0.6539842486381531, "learning_rate": 7.537961062640698e-05, "loss": 1.7188, "step": 69372 }, { "epoch": 2.31, "grad_norm": 0.6818822622299194, "learning_rate": 7.537268236973629e-05, "loss": 1.6211, "step": 69373 }, { "epoch": 2.31, "grad_norm": 0.6688960194587708, "learning_rate": 7.536575438573e-05, "loss": 1.7621, "step": 69374 }, { "epoch": 2.31, "grad_norm": 0.6814238429069519, "learning_rate": 7.535882667439655e-05, "loss": 1.8094, "step": 69375 }, { "epoch": 2.31, "grad_norm": 0.7099916934967041, "learning_rate": 7.535189923574442e-05, "loss": 1.6539, "step": 69376 }, { "epoch": 2.31, "grad_norm": 0.6334294080734253, "learning_rate": 7.534497206978196e-05, "loss": 1.6752, "step": 69377 }, { "epoch": 2.31, "grad_norm": 0.6608376502990723, "learning_rate": 7.533804517651764e-05, "loss": 1.7433, "step": 69378 }, { "epoch": 2.31, "grad_norm": 0.6887698173522949, "learning_rate": 7.533111855595985e-05, "loss": 1.6768, "step": 69379 }, { "epoch": 2.31, "grad_norm": 0.6687930822372437, "learning_rate": 7.532419220811691e-05, "loss": 1.6992, "step": 69380 }, { "epoch": 2.31, "grad_norm": 0.68399578332901, "learning_rate": 7.531726613299735e-05, "loss": 1.7349, "step": 69381 }, { "epoch": 2.31, "grad_norm": 0.6624309420585632, "learning_rate": 7.531034033060945e-05, "loss": 1.7549, "step": 69382 }, { "epoch": 2.31, "grad_norm": 0.6644994020462036, "learning_rate": 7.530341480096179e-05, "loss": 1.6657, "step": 69383 }, { "epoch": 2.31, "grad_norm": 0.6717998385429382, "learning_rate": 7.529648954406256e-05, "loss": 1.6505, "step": 69384 }, { "epoch": 2.31, "grad_norm": 0.7067827582359314, "learning_rate": 7.52895645599204e-05, "loss": 1.6194, "step": 69385 }, { "epoch": 2.31, "grad_norm": 0.7044452428817749, "learning_rate": 7.528263984854359e-05, "loss": 1.6929, "step": 69386 }, { "epoch": 2.31, "grad_norm": 0.6725279688835144, "learning_rate": 7.527571540994045e-05, "loss": 1.6162, "step": 69387 }, { "epoch": 2.31, "grad_norm": 0.6521136164665222, "learning_rate": 7.526879124411958e-05, "loss": 1.7292, "step": 69388 }, { "epoch": 2.31, "grad_norm": 0.6639534831047058, "learning_rate": 7.526186735108925e-05, "loss": 1.6619, "step": 69389 }, { "epoch": 2.31, "grad_norm": 0.6937118768692017, "learning_rate": 7.525494373085785e-05, "loss": 1.7536, "step": 69390 }, { "epoch": 2.31, "grad_norm": 0.6468437314033508, "learning_rate": 7.524802038343383e-05, "loss": 1.7217, "step": 69391 }, { "epoch": 2.31, "grad_norm": 0.6515213847160339, "learning_rate": 7.524109730882568e-05, "loss": 1.6839, "step": 69392 }, { "epoch": 2.31, "grad_norm": 0.6928199529647827, "learning_rate": 7.523417450704172e-05, "loss": 1.6412, "step": 69393 }, { "epoch": 2.31, "grad_norm": 0.6845470070838928, "learning_rate": 7.522725197809028e-05, "loss": 1.7725, "step": 69394 }, { "epoch": 2.31, "grad_norm": 0.6798139214515686, "learning_rate": 7.522032972197989e-05, "loss": 1.5848, "step": 69395 }, { "epoch": 2.31, "grad_norm": 0.6564814448356628, "learning_rate": 7.521340773871891e-05, "loss": 1.6261, "step": 69396 }, { "epoch": 2.31, "grad_norm": 0.6483152508735657, "learning_rate": 7.520648602831567e-05, "loss": 1.662, "step": 69397 }, { "epoch": 2.31, "grad_norm": 0.6835882663726807, "learning_rate": 7.519956459077863e-05, "loss": 1.7175, "step": 69398 }, { "epoch": 2.31, "grad_norm": 0.7000153064727783, "learning_rate": 7.519264342611634e-05, "loss": 1.7003, "step": 69399 }, { "epoch": 2.31, "grad_norm": 0.6686622500419617, "learning_rate": 7.51857225343369e-05, "loss": 1.7035, "step": 69400 }, { "epoch": 2.31, "grad_norm": 0.6748051047325134, "learning_rate": 7.517880191544887e-05, "loss": 1.7257, "step": 69401 }, { "epoch": 2.31, "grad_norm": 0.6483080983161926, "learning_rate": 7.517188156946074e-05, "loss": 1.7002, "step": 69402 }, { "epoch": 2.31, "grad_norm": 0.6537312269210815, "learning_rate": 7.516496149638082e-05, "loss": 1.6603, "step": 69403 }, { "epoch": 2.31, "grad_norm": 0.6626138091087341, "learning_rate": 7.51580416962174e-05, "loss": 1.6821, "step": 69404 }, { "epoch": 2.31, "grad_norm": 0.704931378364563, "learning_rate": 7.515112216897902e-05, "loss": 1.7145, "step": 69405 }, { "epoch": 2.31, "grad_norm": 0.6575223207473755, "learning_rate": 7.51442029146742e-05, "loss": 1.6571, "step": 69406 }, { "epoch": 2.31, "grad_norm": 0.6925482749938965, "learning_rate": 7.513728393331102e-05, "loss": 1.6657, "step": 69407 }, { "epoch": 2.31, "grad_norm": 0.7004901766777039, "learning_rate": 7.513036522489808e-05, "loss": 1.6498, "step": 69408 }, { "epoch": 2.31, "grad_norm": 0.6835612654685974, "learning_rate": 7.512344678944377e-05, "loss": 1.7321, "step": 69409 }, { "epoch": 2.31, "grad_norm": 0.6740134358406067, "learning_rate": 7.511652862695649e-05, "loss": 1.6594, "step": 69410 }, { "epoch": 2.31, "grad_norm": 0.6570438146591187, "learning_rate": 7.510961073744452e-05, "loss": 1.7039, "step": 69411 }, { "epoch": 2.31, "grad_norm": 0.668815016746521, "learning_rate": 7.510269312091636e-05, "loss": 1.64, "step": 69412 }, { "epoch": 2.31, "grad_norm": 0.6859540343284607, "learning_rate": 7.509577577738055e-05, "loss": 1.6984, "step": 69413 }, { "epoch": 2.31, "grad_norm": 0.6906561851501465, "learning_rate": 7.508885870684517e-05, "loss": 1.6033, "step": 69414 }, { "epoch": 2.31, "grad_norm": 0.6659241914749146, "learning_rate": 7.508194190931874e-05, "loss": 1.7399, "step": 69415 }, { "epoch": 2.31, "grad_norm": 0.6801967024803162, "learning_rate": 7.507502538480981e-05, "loss": 1.7119, "step": 69416 }, { "epoch": 2.31, "grad_norm": 0.6450138688087463, "learning_rate": 7.506810913332665e-05, "loss": 1.6436, "step": 69417 }, { "epoch": 2.31, "grad_norm": 0.7033281326293945, "learning_rate": 7.506119315487758e-05, "loss": 1.6819, "step": 69418 }, { "epoch": 2.31, "grad_norm": 0.6928002834320068, "learning_rate": 7.505427744947108e-05, "loss": 1.6844, "step": 69419 }, { "epoch": 2.31, "grad_norm": 0.6655753254890442, "learning_rate": 7.504736201711572e-05, "loss": 1.7184, "step": 69420 }, { "epoch": 2.31, "grad_norm": 0.6733188033103943, "learning_rate": 7.504044685781953e-05, "loss": 1.7238, "step": 69421 }, { "epoch": 2.31, "grad_norm": 0.7399253249168396, "learning_rate": 7.503353197159112e-05, "loss": 1.7305, "step": 69422 }, { "epoch": 2.31, "grad_norm": 0.66545569896698, "learning_rate": 7.50266173584389e-05, "loss": 1.6659, "step": 69423 }, { "epoch": 2.31, "grad_norm": 0.6740188002586365, "learning_rate": 7.501970301837127e-05, "loss": 1.7013, "step": 69424 }, { "epoch": 2.31, "grad_norm": 0.666943371295929, "learning_rate": 7.501278895139645e-05, "loss": 1.683, "step": 69425 }, { "epoch": 2.31, "grad_norm": 0.6464475393295288, "learning_rate": 7.500587515752305e-05, "loss": 1.6973, "step": 69426 }, { "epoch": 2.31, "grad_norm": 0.6711908578872681, "learning_rate": 7.499896163675933e-05, "loss": 1.6902, "step": 69427 }, { "epoch": 2.31, "grad_norm": 0.6790719628334045, "learning_rate": 7.49920483891137e-05, "loss": 1.6891, "step": 69428 }, { "epoch": 2.31, "grad_norm": 0.6874180436134338, "learning_rate": 7.49851354145945e-05, "loss": 1.7001, "step": 69429 }, { "epoch": 2.31, "grad_norm": 0.664499044418335, "learning_rate": 7.497822271321033e-05, "loss": 1.6534, "step": 69430 }, { "epoch": 2.31, "grad_norm": 0.6813308000564575, "learning_rate": 7.497131028496943e-05, "loss": 1.7031, "step": 69431 }, { "epoch": 2.31, "grad_norm": 0.6816094517707825, "learning_rate": 7.496439812988009e-05, "loss": 1.692, "step": 69432 }, { "epoch": 2.31, "grad_norm": 0.707619845867157, "learning_rate": 7.495748624795091e-05, "loss": 1.7021, "step": 69433 }, { "epoch": 2.31, "grad_norm": 0.6621691584587097, "learning_rate": 7.49505746391902e-05, "loss": 1.7695, "step": 69434 }, { "epoch": 2.31, "grad_norm": 0.661297619342804, "learning_rate": 7.494366330360626e-05, "loss": 1.6463, "step": 69435 }, { "epoch": 2.31, "grad_norm": 0.6896641850471497, "learning_rate": 7.49367522412076e-05, "loss": 1.7784, "step": 69436 }, { "epoch": 2.31, "grad_norm": 0.6594379544258118, "learning_rate": 7.49298414520025e-05, "loss": 1.7742, "step": 69437 }, { "epoch": 2.31, "grad_norm": 0.6604921221733093, "learning_rate": 7.492293093599947e-05, "loss": 1.7077, "step": 69438 }, { "epoch": 2.31, "grad_norm": 0.7010830640792847, "learning_rate": 7.491602069320679e-05, "loss": 1.7124, "step": 69439 }, { "epoch": 2.31, "grad_norm": 0.6626594066619873, "learning_rate": 7.490911072363296e-05, "loss": 1.6458, "step": 69440 }, { "epoch": 2.31, "grad_norm": 0.6603862643241882, "learning_rate": 7.49022010272863e-05, "loss": 1.6842, "step": 69441 }, { "epoch": 2.31, "grad_norm": 0.6647091507911682, "learning_rate": 7.489529160417511e-05, "loss": 1.7063, "step": 69442 }, { "epoch": 2.31, "grad_norm": 0.6827968955039978, "learning_rate": 7.488838245430799e-05, "loss": 1.7279, "step": 69443 }, { "epoch": 2.31, "grad_norm": 0.6842189431190491, "learning_rate": 7.488147357769307e-05, "loss": 1.7426, "step": 69444 }, { "epoch": 2.31, "grad_norm": 0.6791355013847351, "learning_rate": 7.487456497433902e-05, "loss": 1.6878, "step": 69445 }, { "epoch": 2.31, "grad_norm": 0.6887349486351013, "learning_rate": 7.486765664425403e-05, "loss": 1.7328, "step": 69446 }, { "epoch": 2.31, "grad_norm": 0.6856434941291809, "learning_rate": 7.486074858744644e-05, "loss": 1.6311, "step": 69447 }, { "epoch": 2.31, "grad_norm": 0.6694656610488892, "learning_rate": 7.485384080392485e-05, "loss": 1.7618, "step": 69448 }, { "epoch": 2.31, "grad_norm": 0.6443885564804077, "learning_rate": 7.48469332936974e-05, "loss": 1.7109, "step": 69449 }, { "epoch": 2.31, "grad_norm": 0.652579665184021, "learning_rate": 7.484002605677271e-05, "loss": 1.7756, "step": 69450 }, { "epoch": 2.31, "grad_norm": 0.6618010401725769, "learning_rate": 7.483311909315897e-05, "loss": 1.7152, "step": 69451 }, { "epoch": 2.31, "grad_norm": 0.6721124053001404, "learning_rate": 7.482621240286471e-05, "loss": 1.8243, "step": 69452 }, { "epoch": 2.31, "grad_norm": 0.6655829548835754, "learning_rate": 7.48193059858983e-05, "loss": 1.7205, "step": 69453 }, { "epoch": 2.31, "grad_norm": 0.6704586148262024, "learning_rate": 7.481239984226793e-05, "loss": 1.6919, "step": 69454 }, { "epoch": 2.31, "grad_norm": 0.7046385407447815, "learning_rate": 7.480549397198223e-05, "loss": 1.7524, "step": 69455 }, { "epoch": 2.31, "grad_norm": 0.6691304445266724, "learning_rate": 7.479858837504949e-05, "loss": 1.6935, "step": 69456 }, { "epoch": 2.31, "grad_norm": 0.6615681648254395, "learning_rate": 7.479168305147796e-05, "loss": 1.6813, "step": 69457 }, { "epoch": 2.31, "grad_norm": 0.657687783241272, "learning_rate": 7.478477800127622e-05, "loss": 1.717, "step": 69458 }, { "epoch": 2.31, "grad_norm": 0.7066760659217834, "learning_rate": 7.477787322445258e-05, "loss": 1.6129, "step": 69459 }, { "epoch": 2.31, "grad_norm": 0.6646301746368408, "learning_rate": 7.477096872101545e-05, "loss": 1.7446, "step": 69460 }, { "epoch": 2.31, "grad_norm": 0.6730381846427917, "learning_rate": 7.476406449097311e-05, "loss": 1.7112, "step": 69461 }, { "epoch": 2.31, "grad_norm": 0.6581345200538635, "learning_rate": 7.475716053433409e-05, "loss": 1.6961, "step": 69462 }, { "epoch": 2.31, "grad_norm": 0.6671173572540283, "learning_rate": 7.475025685110667e-05, "loss": 1.6987, "step": 69463 }, { "epoch": 2.31, "grad_norm": 0.6639359593391418, "learning_rate": 7.474335344129918e-05, "loss": 1.6813, "step": 69464 }, { "epoch": 2.31, "grad_norm": 0.6710245013237, "learning_rate": 7.473645030492004e-05, "loss": 1.6749, "step": 69465 }, { "epoch": 2.31, "grad_norm": 0.6649526357650757, "learning_rate": 7.472954744197777e-05, "loss": 1.7203, "step": 69466 }, { "epoch": 2.31, "grad_norm": 0.6689477562904358, "learning_rate": 7.472264485248061e-05, "loss": 1.6863, "step": 69467 }, { "epoch": 2.31, "grad_norm": 0.6727199554443359, "learning_rate": 7.471574253643692e-05, "loss": 1.7606, "step": 69468 }, { "epoch": 2.31, "grad_norm": 0.6407718658447266, "learning_rate": 7.470884049385509e-05, "loss": 1.6247, "step": 69469 }, { "epoch": 2.31, "grad_norm": 0.6562116742134094, "learning_rate": 7.470193872474373e-05, "loss": 1.6793, "step": 69470 }, { "epoch": 2.31, "grad_norm": 0.6786647439002991, "learning_rate": 7.469503722911086e-05, "loss": 1.7106, "step": 69471 }, { "epoch": 2.31, "grad_norm": 0.6569265127182007, "learning_rate": 7.468813600696497e-05, "loss": 1.6185, "step": 69472 }, { "epoch": 2.31, "grad_norm": 0.6802319884300232, "learning_rate": 7.468123505831461e-05, "loss": 1.738, "step": 69473 }, { "epoch": 2.31, "grad_norm": 0.6824068427085876, "learning_rate": 7.467433438316802e-05, "loss": 1.7571, "step": 69474 }, { "epoch": 2.31, "grad_norm": 0.6942462921142578, "learning_rate": 7.466743398153353e-05, "loss": 1.7261, "step": 69475 }, { "epoch": 2.31, "grad_norm": 0.6666555404663086, "learning_rate": 7.466053385341961e-05, "loss": 1.6624, "step": 69476 }, { "epoch": 2.31, "grad_norm": 0.6550948619842529, "learning_rate": 7.465363399883461e-05, "loss": 1.673, "step": 69477 }, { "epoch": 2.31, "grad_norm": 0.667336106300354, "learning_rate": 7.464673441778683e-05, "loss": 1.707, "step": 69478 }, { "epoch": 2.31, "grad_norm": 0.6557058095932007, "learning_rate": 7.463983511028469e-05, "loss": 1.7222, "step": 69479 }, { "epoch": 2.31, "grad_norm": 0.691781759262085, "learning_rate": 7.46329360763367e-05, "loss": 1.7192, "step": 69480 }, { "epoch": 2.31, "grad_norm": 0.669867217540741, "learning_rate": 7.462603731595107e-05, "loss": 1.7176, "step": 69481 }, { "epoch": 2.31, "grad_norm": 0.6561959981918335, "learning_rate": 7.461913882913619e-05, "loss": 1.7778, "step": 69482 }, { "epoch": 2.31, "grad_norm": 0.6620262265205383, "learning_rate": 7.461224061590052e-05, "loss": 1.7717, "step": 69483 }, { "epoch": 2.31, "grad_norm": 0.6698635220527649, "learning_rate": 7.460534267625241e-05, "loss": 1.6929, "step": 69484 }, { "epoch": 2.31, "grad_norm": 0.6539694666862488, "learning_rate": 7.459844501020008e-05, "loss": 1.6078, "step": 69485 }, { "epoch": 2.31, "grad_norm": 0.6533085703849792, "learning_rate": 7.459154761775212e-05, "loss": 1.6458, "step": 69486 }, { "epoch": 2.31, "grad_norm": 0.6667933464050293, "learning_rate": 7.458465049891677e-05, "loss": 1.7236, "step": 69487 }, { "epoch": 2.31, "grad_norm": 0.6903983354568481, "learning_rate": 7.457775365370246e-05, "loss": 1.6878, "step": 69488 }, { "epoch": 2.31, "grad_norm": 0.6743470430374146, "learning_rate": 7.457085708211749e-05, "loss": 1.7434, "step": 69489 }, { "epoch": 2.31, "grad_norm": 0.6499454975128174, "learning_rate": 7.456396078417039e-05, "loss": 1.6927, "step": 69490 }, { "epoch": 2.31, "grad_norm": 0.6434259414672852, "learning_rate": 7.45570647598694e-05, "loss": 1.7048, "step": 69491 }, { "epoch": 2.31, "grad_norm": 0.6637998819351196, "learning_rate": 7.455016900922283e-05, "loss": 1.6736, "step": 69492 }, { "epoch": 2.31, "grad_norm": 0.660169780254364, "learning_rate": 7.454327353223923e-05, "loss": 1.658, "step": 69493 }, { "epoch": 2.31, "grad_norm": 0.684521496295929, "learning_rate": 7.453637832892677e-05, "loss": 1.6475, "step": 69494 }, { "epoch": 2.31, "grad_norm": 0.6727616190910339, "learning_rate": 7.452948339929403e-05, "loss": 1.6958, "step": 69495 }, { "epoch": 2.31, "grad_norm": 0.6988009214401245, "learning_rate": 7.452258874334927e-05, "loss": 1.8026, "step": 69496 }, { "epoch": 2.31, "grad_norm": 0.6711229085922241, "learning_rate": 7.45156943611008e-05, "loss": 1.7919, "step": 69497 }, { "epoch": 2.31, "grad_norm": 0.6911099553108215, "learning_rate": 7.450880025255712e-05, "loss": 1.7048, "step": 69498 }, { "epoch": 2.31, "grad_norm": 0.6642613410949707, "learning_rate": 7.450190641772644e-05, "loss": 1.6397, "step": 69499 }, { "epoch": 2.31, "grad_norm": 0.6587739586830139, "learning_rate": 7.449501285661733e-05, "loss": 1.6945, "step": 69500 }, { "epoch": 2.31, "grad_norm": 0.678488552570343, "learning_rate": 7.448811956923792e-05, "loss": 1.7221, "step": 69501 }, { "epoch": 2.31, "grad_norm": 0.665298342704773, "learning_rate": 7.448122655559681e-05, "loss": 1.6266, "step": 69502 }, { "epoch": 2.31, "grad_norm": 0.681668758392334, "learning_rate": 7.447433381570229e-05, "loss": 1.7281, "step": 69503 }, { "epoch": 2.31, "grad_norm": 0.658582329750061, "learning_rate": 7.44674413495626e-05, "loss": 1.6909, "step": 69504 }, { "epoch": 2.31, "grad_norm": 0.6848015785217285, "learning_rate": 7.446054915718628e-05, "loss": 1.7237, "step": 69505 }, { "epoch": 2.31, "grad_norm": 0.6623595952987671, "learning_rate": 7.445365723858159e-05, "loss": 1.7173, "step": 69506 }, { "epoch": 2.31, "grad_norm": 0.6734790205955505, "learning_rate": 7.444676559375689e-05, "loss": 1.6484, "step": 69507 }, { "epoch": 2.31, "grad_norm": 0.6683502197265625, "learning_rate": 7.443987422272057e-05, "loss": 1.6633, "step": 69508 }, { "epoch": 2.31, "grad_norm": 0.6548526883125305, "learning_rate": 7.44329831254811e-05, "loss": 1.7283, "step": 69509 }, { "epoch": 2.31, "grad_norm": 0.6552712321281433, "learning_rate": 7.442609230204674e-05, "loss": 1.749, "step": 69510 }, { "epoch": 2.31, "grad_norm": 0.678003191947937, "learning_rate": 7.441920175242578e-05, "loss": 1.7164, "step": 69511 }, { "epoch": 2.31, "grad_norm": 0.6693801879882812, "learning_rate": 7.441231147662676e-05, "loss": 1.7192, "step": 69512 }, { "epoch": 2.31, "grad_norm": 0.6650835275650024, "learning_rate": 7.440542147465797e-05, "loss": 1.8306, "step": 69513 }, { "epoch": 2.31, "grad_norm": 0.6669185757637024, "learning_rate": 7.439853174652766e-05, "loss": 1.7061, "step": 69514 }, { "epoch": 2.31, "grad_norm": 0.6839003562927246, "learning_rate": 7.439164229224429e-05, "loss": 1.6788, "step": 69515 }, { "epoch": 2.31, "grad_norm": 0.6587603092193604, "learning_rate": 7.438475311181642e-05, "loss": 1.6461, "step": 69516 }, { "epoch": 2.31, "grad_norm": 0.694412350654602, "learning_rate": 7.437786420525203e-05, "loss": 1.7266, "step": 69517 }, { "epoch": 2.31, "grad_norm": 0.6988202929496765, "learning_rate": 7.437097557255966e-05, "loss": 1.7178, "step": 69518 }, { "epoch": 2.31, "grad_norm": 0.6394757032394409, "learning_rate": 7.436408721374777e-05, "loss": 1.6798, "step": 69519 }, { "epoch": 2.31, "grad_norm": 0.654560387134552, "learning_rate": 7.435719912882464e-05, "loss": 1.7249, "step": 69520 }, { "epoch": 2.31, "grad_norm": 0.7031927108764648, "learning_rate": 7.43503113177985e-05, "loss": 1.6947, "step": 69521 }, { "epoch": 2.31, "grad_norm": 0.6977279186248779, "learning_rate": 7.434342378067787e-05, "loss": 1.6576, "step": 69522 }, { "epoch": 2.31, "grad_norm": 0.6941291689872742, "learning_rate": 7.433653651747125e-05, "loss": 1.7444, "step": 69523 }, { "epoch": 2.31, "grad_norm": 0.6627734899520874, "learning_rate": 7.432964952818666e-05, "loss": 1.6968, "step": 69524 }, { "epoch": 2.31, "grad_norm": 0.6738213896751404, "learning_rate": 7.432276281283258e-05, "loss": 1.6685, "step": 69525 }, { "epoch": 2.31, "grad_norm": 0.6657659411430359, "learning_rate": 7.431587637141754e-05, "loss": 1.69, "step": 69526 }, { "epoch": 2.31, "grad_norm": 0.680650532245636, "learning_rate": 7.430899020394975e-05, "loss": 1.6774, "step": 69527 }, { "epoch": 2.31, "grad_norm": 0.6712794899940491, "learning_rate": 7.430210431043753e-05, "loss": 1.6366, "step": 69528 }, { "epoch": 2.31, "grad_norm": 0.6626068949699402, "learning_rate": 7.42952186908893e-05, "loss": 1.6926, "step": 69529 }, { "epoch": 2.31, "grad_norm": 0.6685624718666077, "learning_rate": 7.428833334531358e-05, "loss": 1.6961, "step": 69530 }, { "epoch": 2.31, "grad_norm": 0.6812264919281006, "learning_rate": 7.428144827371839e-05, "loss": 1.6538, "step": 69531 }, { "epoch": 2.31, "grad_norm": 0.6912310123443604, "learning_rate": 7.427456347611227e-05, "loss": 1.7103, "step": 69532 }, { "epoch": 2.31, "grad_norm": 0.6869014501571655, "learning_rate": 7.426767895250363e-05, "loss": 1.7055, "step": 69533 }, { "epoch": 2.31, "grad_norm": 0.6687384247779846, "learning_rate": 7.42607947029008e-05, "loss": 1.7201, "step": 69534 }, { "epoch": 2.31, "grad_norm": 0.6724081635475159, "learning_rate": 7.425391072731203e-05, "loss": 1.7005, "step": 69535 }, { "epoch": 2.31, "grad_norm": 0.7037259936332703, "learning_rate": 7.42470270257458e-05, "loss": 1.7586, "step": 69536 }, { "epoch": 2.31, "grad_norm": 0.6629962921142578, "learning_rate": 7.424014359821044e-05, "loss": 1.6891, "step": 69537 }, { "epoch": 2.31, "grad_norm": 0.6819376349449158, "learning_rate": 7.42332604447142e-05, "loss": 1.7074, "step": 69538 }, { "epoch": 2.31, "grad_norm": 1.9747179746627808, "learning_rate": 7.422637756526547e-05, "loss": 1.7076, "step": 69539 }, { "epoch": 2.31, "grad_norm": 0.6584625840187073, "learning_rate": 7.42194949598728e-05, "loss": 1.6896, "step": 69540 }, { "epoch": 2.31, "grad_norm": 0.6894662380218506, "learning_rate": 7.421261262854436e-05, "loss": 1.6413, "step": 69541 }, { "epoch": 2.31, "grad_norm": 0.6950590014457703, "learning_rate": 7.420573057128844e-05, "loss": 1.7762, "step": 69542 }, { "epoch": 2.31, "grad_norm": 0.6856142282485962, "learning_rate": 7.41988487881136e-05, "loss": 1.6853, "step": 69543 }, { "epoch": 2.31, "grad_norm": 0.6735659837722778, "learning_rate": 7.41919672790281e-05, "loss": 1.7672, "step": 69544 }, { "epoch": 2.31, "grad_norm": 0.694293200969696, "learning_rate": 7.418508604404017e-05, "loss": 1.7127, "step": 69545 }, { "epoch": 2.31, "grad_norm": 0.7283831238746643, "learning_rate": 7.41782050831583e-05, "loss": 1.7475, "step": 69546 }, { "epoch": 2.31, "grad_norm": 0.6807085275650024, "learning_rate": 7.417132439639087e-05, "loss": 1.6702, "step": 69547 }, { "epoch": 2.31, "grad_norm": 0.6785603165626526, "learning_rate": 7.416444398374621e-05, "loss": 1.7799, "step": 69548 }, { "epoch": 2.31, "grad_norm": 0.6672756671905518, "learning_rate": 7.415756384523254e-05, "loss": 1.6933, "step": 69549 }, { "epoch": 2.31, "grad_norm": 0.6773629784584045, "learning_rate": 7.41506839808584e-05, "loss": 1.6393, "step": 69550 }, { "epoch": 2.31, "grad_norm": 0.6552760004997253, "learning_rate": 7.414380439063205e-05, "loss": 1.6674, "step": 69551 }, { "epoch": 2.31, "grad_norm": 0.670820415019989, "learning_rate": 7.413692507456175e-05, "loss": 1.6801, "step": 69552 }, { "epoch": 2.31, "grad_norm": 0.6788712739944458, "learning_rate": 7.413004603265604e-05, "loss": 1.7111, "step": 69553 }, { "epoch": 2.31, "grad_norm": 0.6785101294517517, "learning_rate": 7.412316726492308e-05, "loss": 1.7062, "step": 69554 }, { "epoch": 2.31, "grad_norm": 0.6679659485816956, "learning_rate": 7.411628877137142e-05, "loss": 1.7122, "step": 69555 }, { "epoch": 2.31, "grad_norm": 0.6618748903274536, "learning_rate": 7.410941055200917e-05, "loss": 1.73, "step": 69556 }, { "epoch": 2.31, "grad_norm": 0.6680005788803101, "learning_rate": 7.410253260684493e-05, "loss": 1.7233, "step": 69557 }, { "epoch": 2.31, "grad_norm": 0.6781295537948608, "learning_rate": 7.409565493588695e-05, "loss": 1.7036, "step": 69558 }, { "epoch": 2.31, "grad_norm": 0.6802852749824524, "learning_rate": 7.408877753914345e-05, "loss": 1.7654, "step": 69559 }, { "epoch": 2.31, "grad_norm": 0.6833111047744751, "learning_rate": 7.408190041662296e-05, "loss": 1.6934, "step": 69560 }, { "epoch": 2.31, "grad_norm": 0.6583298444747925, "learning_rate": 7.407502356833368e-05, "loss": 1.6575, "step": 69561 }, { "epoch": 2.31, "grad_norm": 0.6960464715957642, "learning_rate": 7.406814699428413e-05, "loss": 1.6569, "step": 69562 }, { "epoch": 2.31, "grad_norm": 0.7033420205116272, "learning_rate": 7.406127069448257e-05, "loss": 1.713, "step": 69563 }, { "epoch": 2.31, "grad_norm": 0.6789068579673767, "learning_rate": 7.405439466893723e-05, "loss": 1.6516, "step": 69564 }, { "epoch": 2.31, "grad_norm": 0.6875231266021729, "learning_rate": 7.404751891765665e-05, "loss": 1.6809, "step": 69565 }, { "epoch": 2.31, "grad_norm": 0.6701607704162598, "learning_rate": 7.404064344064899e-05, "loss": 1.699, "step": 69566 }, { "epoch": 2.31, "grad_norm": 0.6767802834510803, "learning_rate": 7.403376823792284e-05, "loss": 1.6921, "step": 69567 }, { "epoch": 2.31, "grad_norm": 0.6579075455665588, "learning_rate": 7.402689330948625e-05, "loss": 1.7626, "step": 69568 }, { "epoch": 2.31, "grad_norm": 0.6650828719139099, "learning_rate": 7.402001865534783e-05, "loss": 1.7105, "step": 69569 }, { "epoch": 2.31, "grad_norm": 0.6600725650787354, "learning_rate": 7.40131442755158e-05, "loss": 1.6486, "step": 69570 }, { "epoch": 2.31, "grad_norm": 0.7045615315437317, "learning_rate": 7.400627016999844e-05, "loss": 1.6295, "step": 69571 }, { "epoch": 2.31, "grad_norm": 0.6886991262435913, "learning_rate": 7.399939633880425e-05, "loss": 1.7224, "step": 69572 }, { "epoch": 2.31, "grad_norm": 0.6650855541229248, "learning_rate": 7.39925227819415e-05, "loss": 1.7281, "step": 69573 }, { "epoch": 2.31, "grad_norm": 0.672698974609375, "learning_rate": 7.398564949941842e-05, "loss": 1.7291, "step": 69574 }, { "epoch": 2.31, "grad_norm": 0.6661219000816345, "learning_rate": 7.39787764912435e-05, "loss": 1.7208, "step": 69575 }, { "epoch": 2.31, "grad_norm": 0.6703844666481018, "learning_rate": 7.397190375742507e-05, "loss": 1.6516, "step": 69576 }, { "epoch": 2.31, "grad_norm": 0.7094109654426575, "learning_rate": 7.396503129797149e-05, "loss": 1.7027, "step": 69577 }, { "epoch": 2.31, "grad_norm": 0.6520063877105713, "learning_rate": 7.395815911289095e-05, "loss": 1.6926, "step": 69578 }, { "epoch": 2.31, "grad_norm": 0.6645346283912659, "learning_rate": 7.395128720219201e-05, "loss": 1.7145, "step": 69579 }, { "epoch": 2.31, "grad_norm": 0.6778964996337891, "learning_rate": 7.39444155658829e-05, "loss": 1.7269, "step": 69580 }, { "epoch": 2.31, "grad_norm": 0.6890037655830383, "learning_rate": 7.393754420397184e-05, "loss": 1.6974, "step": 69581 }, { "epoch": 2.32, "grad_norm": 0.6770414113998413, "learning_rate": 7.393067311646734e-05, "loss": 1.657, "step": 69582 }, { "epoch": 2.32, "grad_norm": 0.6692687273025513, "learning_rate": 7.392380230337774e-05, "loss": 1.6839, "step": 69583 }, { "epoch": 2.32, "grad_norm": 0.675678551197052, "learning_rate": 7.391693176471134e-05, "loss": 1.7416, "step": 69584 }, { "epoch": 2.32, "grad_norm": 0.680923581123352, "learning_rate": 7.391006150047639e-05, "loss": 1.6647, "step": 69585 }, { "epoch": 2.32, "grad_norm": 0.6536204218864441, "learning_rate": 7.390319151068131e-05, "loss": 1.638, "step": 69586 }, { "epoch": 2.32, "grad_norm": 0.670498251914978, "learning_rate": 7.389632179533464e-05, "loss": 1.6844, "step": 69587 }, { "epoch": 2.32, "grad_norm": 0.6830155253410339, "learning_rate": 7.388945235444432e-05, "loss": 1.7009, "step": 69588 }, { "epoch": 2.32, "grad_norm": 0.6858097314834595, "learning_rate": 7.388258318801891e-05, "loss": 1.7631, "step": 69589 }, { "epoch": 2.32, "grad_norm": 0.6748361587524414, "learning_rate": 7.387571429606677e-05, "loss": 1.7005, "step": 69590 }, { "epoch": 2.32, "grad_norm": 0.7040454745292664, "learning_rate": 7.386884567859626e-05, "loss": 1.6792, "step": 69591 }, { "epoch": 2.32, "grad_norm": 0.6727368831634521, "learning_rate": 7.386197733561553e-05, "loss": 1.6959, "step": 69592 }, { "epoch": 2.32, "grad_norm": 0.661240816116333, "learning_rate": 7.385510926713313e-05, "loss": 1.6532, "step": 69593 }, { "epoch": 2.32, "grad_norm": 0.6854987144470215, "learning_rate": 7.384824147315731e-05, "loss": 1.7984, "step": 69594 }, { "epoch": 2.32, "grad_norm": 0.6995242834091187, "learning_rate": 7.384137395369631e-05, "loss": 1.7116, "step": 69595 }, { "epoch": 2.32, "grad_norm": 0.721642255783081, "learning_rate": 7.383450670875853e-05, "loss": 1.7211, "step": 69596 }, { "epoch": 2.32, "grad_norm": 0.6528022885322571, "learning_rate": 7.382763973835246e-05, "loss": 1.6519, "step": 69597 }, { "epoch": 2.32, "grad_norm": 0.6713138818740845, "learning_rate": 7.382077304248631e-05, "loss": 1.7099, "step": 69598 }, { "epoch": 2.32, "grad_norm": 0.6818338632583618, "learning_rate": 7.381390662116832e-05, "loss": 1.7524, "step": 69599 }, { "epoch": 2.32, "grad_norm": 0.6686740517616272, "learning_rate": 7.3807040474407e-05, "loss": 1.6714, "step": 69600 }, { "epoch": 2.32, "grad_norm": 0.6559739112854004, "learning_rate": 7.380017460221061e-05, "loss": 1.7866, "step": 69601 }, { "epoch": 2.32, "grad_norm": 0.676589846611023, "learning_rate": 7.37933090045874e-05, "loss": 1.6722, "step": 69602 }, { "epoch": 2.32, "grad_norm": 0.698910653591156, "learning_rate": 7.378644368154586e-05, "loss": 1.6708, "step": 69603 }, { "epoch": 2.32, "grad_norm": 0.6817053556442261, "learning_rate": 7.37795786330942e-05, "loss": 1.7025, "step": 69604 }, { "epoch": 2.32, "grad_norm": 0.6908451318740845, "learning_rate": 7.377271385924083e-05, "loss": 1.6549, "step": 69605 }, { "epoch": 2.32, "grad_norm": 0.7575733661651611, "learning_rate": 7.3765849359994e-05, "loss": 1.6945, "step": 69606 }, { "epoch": 2.32, "grad_norm": 0.6726092100143433, "learning_rate": 7.375898513536217e-05, "loss": 1.6813, "step": 69607 }, { "epoch": 2.32, "grad_norm": 0.698084831237793, "learning_rate": 7.37521211853536e-05, "loss": 1.7222, "step": 69608 }, { "epoch": 2.32, "grad_norm": 0.7083401083946228, "learning_rate": 7.374525750997651e-05, "loss": 1.7334, "step": 69609 }, { "epoch": 2.32, "grad_norm": 0.6503215432167053, "learning_rate": 7.373839410923943e-05, "loss": 1.6453, "step": 69610 }, { "epoch": 2.32, "grad_norm": 0.6748056411743164, "learning_rate": 7.373153098315056e-05, "loss": 1.6521, "step": 69611 }, { "epoch": 2.32, "grad_norm": 0.6700615286827087, "learning_rate": 7.372466813171831e-05, "loss": 1.6836, "step": 69612 }, { "epoch": 2.32, "grad_norm": 0.7359029650688171, "learning_rate": 7.371780555495099e-05, "loss": 1.8122, "step": 69613 }, { "epoch": 2.32, "grad_norm": 0.6684862971305847, "learning_rate": 7.371094325285683e-05, "loss": 1.6967, "step": 69614 }, { "epoch": 2.32, "grad_norm": 0.6682072281837463, "learning_rate": 7.370408122544434e-05, "loss": 1.6756, "step": 69615 }, { "epoch": 2.32, "grad_norm": 0.6726616024971008, "learning_rate": 7.369721947272165e-05, "loss": 1.685, "step": 69616 }, { "epoch": 2.32, "grad_norm": 0.6684789657592773, "learning_rate": 7.369035799469726e-05, "loss": 1.6762, "step": 69617 }, { "epoch": 2.32, "grad_norm": 0.6726901531219482, "learning_rate": 7.368349679137936e-05, "loss": 1.7179, "step": 69618 }, { "epoch": 2.32, "grad_norm": 0.6530443429946899, "learning_rate": 7.367663586277644e-05, "loss": 1.6602, "step": 69619 }, { "epoch": 2.32, "grad_norm": 0.6574265956878662, "learning_rate": 7.366977520889674e-05, "loss": 1.6481, "step": 69620 }, { "epoch": 2.32, "grad_norm": 0.6995061635971069, "learning_rate": 7.366291482974851e-05, "loss": 1.6507, "step": 69621 }, { "epoch": 2.32, "grad_norm": 0.6440767645835876, "learning_rate": 7.365605472534021e-05, "loss": 1.6878, "step": 69622 }, { "epoch": 2.32, "grad_norm": 0.6782245635986328, "learning_rate": 7.364919489568013e-05, "loss": 1.7533, "step": 69623 }, { "epoch": 2.32, "grad_norm": 0.6670722365379333, "learning_rate": 7.364233534077647e-05, "loss": 1.6881, "step": 69624 }, { "epoch": 2.32, "grad_norm": 0.664997398853302, "learning_rate": 7.363547606063768e-05, "loss": 1.7122, "step": 69625 }, { "epoch": 2.32, "grad_norm": 0.6999405026435852, "learning_rate": 7.362861705527215e-05, "loss": 1.7056, "step": 69626 }, { "epoch": 2.32, "grad_norm": 0.6430759429931641, "learning_rate": 7.362175832468812e-05, "loss": 1.6944, "step": 69627 }, { "epoch": 2.32, "grad_norm": 0.6881546974182129, "learning_rate": 7.361489986889384e-05, "loss": 1.761, "step": 69628 }, { "epoch": 2.32, "grad_norm": 0.6676532030105591, "learning_rate": 7.360804168789781e-05, "loss": 1.7266, "step": 69629 }, { "epoch": 2.32, "grad_norm": 0.6575233340263367, "learning_rate": 7.360118378170824e-05, "loss": 1.7457, "step": 69630 }, { "epoch": 2.32, "grad_norm": 0.6620076298713684, "learning_rate": 7.359432615033341e-05, "loss": 1.6582, "step": 69631 }, { "epoch": 2.32, "grad_norm": 0.6595398783683777, "learning_rate": 7.358746879378169e-05, "loss": 1.7281, "step": 69632 }, { "epoch": 2.32, "grad_norm": 0.6681061387062073, "learning_rate": 7.358061171206159e-05, "loss": 1.6226, "step": 69633 }, { "epoch": 2.32, "grad_norm": 0.6862999200820923, "learning_rate": 7.35737549051811e-05, "loss": 1.6591, "step": 69634 }, { "epoch": 2.32, "grad_norm": 0.6466134786605835, "learning_rate": 7.356689837314873e-05, "loss": 1.7203, "step": 69635 }, { "epoch": 2.32, "grad_norm": 0.6954038143157959, "learning_rate": 7.356004211597287e-05, "loss": 1.758, "step": 69636 }, { "epoch": 2.32, "grad_norm": 0.6654911041259766, "learning_rate": 7.355318613366173e-05, "loss": 1.6667, "step": 69637 }, { "epoch": 2.32, "grad_norm": 0.6858718991279602, "learning_rate": 7.354633042622358e-05, "loss": 1.701, "step": 69638 }, { "epoch": 2.32, "grad_norm": 0.6957414746284485, "learning_rate": 7.353947499366681e-05, "loss": 1.6963, "step": 69639 }, { "epoch": 2.32, "grad_norm": 0.6528611183166504, "learning_rate": 7.353261983599992e-05, "loss": 1.6447, "step": 69640 }, { "epoch": 2.32, "grad_norm": 0.666839063167572, "learning_rate": 7.352576495323091e-05, "loss": 1.7111, "step": 69641 }, { "epoch": 2.32, "grad_norm": 0.6895331740379333, "learning_rate": 7.351891034536823e-05, "loss": 1.7079, "step": 69642 }, { "epoch": 2.32, "grad_norm": 0.6795361042022705, "learning_rate": 7.351205601242031e-05, "loss": 1.6504, "step": 69643 }, { "epoch": 2.32, "grad_norm": 0.6976212859153748, "learning_rate": 7.35052019543954e-05, "loss": 1.7352, "step": 69644 }, { "epoch": 2.32, "grad_norm": 0.6920146346092224, "learning_rate": 7.34983481713017e-05, "loss": 1.6892, "step": 69645 }, { "epoch": 2.32, "grad_norm": 0.6876194477081299, "learning_rate": 7.349149466314765e-05, "loss": 1.7367, "step": 69646 }, { "epoch": 2.32, "grad_norm": 0.6938529014587402, "learning_rate": 7.348464142994172e-05, "loss": 1.7798, "step": 69647 }, { "epoch": 2.32, "grad_norm": 0.6659574508666992, "learning_rate": 7.347778847169188e-05, "loss": 1.6155, "step": 69648 }, { "epoch": 2.32, "grad_norm": 0.7000331878662109, "learning_rate": 7.347093578840663e-05, "loss": 1.6668, "step": 69649 }, { "epoch": 2.32, "grad_norm": 0.6928669214248657, "learning_rate": 7.346408338009435e-05, "loss": 1.7228, "step": 69650 }, { "epoch": 2.32, "grad_norm": 0.6683256030082703, "learning_rate": 7.345723124676331e-05, "loss": 1.7443, "step": 69651 }, { "epoch": 2.32, "grad_norm": 0.6617957353591919, "learning_rate": 7.345037938842174e-05, "loss": 1.7095, "step": 69652 }, { "epoch": 2.32, "grad_norm": 0.6840882897377014, "learning_rate": 7.34435278050781e-05, "loss": 1.6477, "step": 69653 }, { "epoch": 2.32, "grad_norm": 0.6564921140670776, "learning_rate": 7.343667649674067e-05, "loss": 1.6898, "step": 69654 }, { "epoch": 2.32, "grad_norm": 0.6511775851249695, "learning_rate": 7.342982546341758e-05, "loss": 1.6516, "step": 69655 }, { "epoch": 2.32, "grad_norm": 0.6742187738418579, "learning_rate": 7.342297470511736e-05, "loss": 1.6779, "step": 69656 }, { "epoch": 2.32, "grad_norm": 0.6843641996383667, "learning_rate": 7.341612422184832e-05, "loss": 1.66, "step": 69657 }, { "epoch": 2.32, "grad_norm": 0.6632072329521179, "learning_rate": 7.340927401361872e-05, "loss": 1.6517, "step": 69658 }, { "epoch": 2.32, "grad_norm": 0.6516849398612976, "learning_rate": 7.340242408043678e-05, "loss": 1.6768, "step": 69659 }, { "epoch": 2.32, "grad_norm": 0.6654109358787537, "learning_rate": 7.339557442231103e-05, "loss": 1.784, "step": 69660 }, { "epoch": 2.32, "grad_norm": 0.665851354598999, "learning_rate": 7.338872503924964e-05, "loss": 1.7116, "step": 69661 }, { "epoch": 2.32, "grad_norm": 0.6718408465385437, "learning_rate": 7.338187593126087e-05, "loss": 1.612, "step": 69662 }, { "epoch": 2.32, "grad_norm": 0.6794055700302124, "learning_rate": 7.33750270983531e-05, "loss": 1.6817, "step": 69663 }, { "epoch": 2.32, "grad_norm": 0.6601856350898743, "learning_rate": 7.336817854053475e-05, "loss": 1.6086, "step": 69664 }, { "epoch": 2.32, "grad_norm": 0.6916635036468506, "learning_rate": 7.336133025781405e-05, "loss": 1.772, "step": 69665 }, { "epoch": 2.32, "grad_norm": 0.684578537940979, "learning_rate": 7.335448225019921e-05, "loss": 1.728, "step": 69666 }, { "epoch": 2.32, "grad_norm": 0.6673687696456909, "learning_rate": 7.33476345176987e-05, "loss": 1.6329, "step": 69667 }, { "epoch": 2.32, "grad_norm": 0.6753963232040405, "learning_rate": 7.334078706032079e-05, "loss": 1.6926, "step": 69668 }, { "epoch": 2.32, "grad_norm": 0.6831983923912048, "learning_rate": 7.333393987807367e-05, "loss": 1.7744, "step": 69669 }, { "epoch": 2.32, "grad_norm": 0.6764410138130188, "learning_rate": 7.332709297096582e-05, "loss": 1.697, "step": 69670 }, { "epoch": 2.32, "grad_norm": 0.6662101745605469, "learning_rate": 7.332024633900541e-05, "loss": 1.6783, "step": 69671 }, { "epoch": 2.32, "grad_norm": 0.6829288601875305, "learning_rate": 7.331339998220093e-05, "loss": 1.6922, "step": 69672 }, { "epoch": 2.32, "grad_norm": 0.6915881037712097, "learning_rate": 7.330655390056047e-05, "loss": 1.7006, "step": 69673 }, { "epoch": 2.32, "grad_norm": 0.6835058927536011, "learning_rate": 7.329970809409253e-05, "loss": 1.6892, "step": 69674 }, { "epoch": 2.32, "grad_norm": 0.6827027797698975, "learning_rate": 7.329286256280535e-05, "loss": 1.7608, "step": 69675 }, { "epoch": 2.32, "grad_norm": 0.6891587972640991, "learning_rate": 7.328601730670716e-05, "loss": 1.7028, "step": 69676 }, { "epoch": 2.32, "grad_norm": 0.6596076488494873, "learning_rate": 7.32791723258064e-05, "loss": 1.7012, "step": 69677 }, { "epoch": 2.32, "grad_norm": 0.6822904944419861, "learning_rate": 7.327232762011127e-05, "loss": 1.6974, "step": 69678 }, { "epoch": 2.32, "grad_norm": 0.6701393723487854, "learning_rate": 7.326548318963019e-05, "loss": 1.7695, "step": 69679 }, { "epoch": 2.32, "grad_norm": 0.659283459186554, "learning_rate": 7.32586390343714e-05, "loss": 1.6781, "step": 69680 }, { "epoch": 2.32, "grad_norm": 0.6700065732002258, "learning_rate": 7.325179515434313e-05, "loss": 1.6752, "step": 69681 }, { "epoch": 2.32, "grad_norm": 0.676564633846283, "learning_rate": 7.324495154955387e-05, "loss": 1.6377, "step": 69682 }, { "epoch": 2.32, "grad_norm": 0.6887492537498474, "learning_rate": 7.323810822001172e-05, "loss": 1.6442, "step": 69683 }, { "epoch": 2.32, "grad_norm": 0.6767662167549133, "learning_rate": 7.323126516572521e-05, "loss": 1.6876, "step": 69684 }, { "epoch": 2.32, "grad_norm": 0.6580715179443359, "learning_rate": 7.322442238670243e-05, "loss": 1.6807, "step": 69685 }, { "epoch": 2.32, "grad_norm": 0.6964899301528931, "learning_rate": 7.321757988295186e-05, "loss": 1.7376, "step": 69686 }, { "epoch": 2.32, "grad_norm": 0.6894420981407166, "learning_rate": 7.321073765448175e-05, "loss": 1.7676, "step": 69687 }, { "epoch": 2.32, "grad_norm": 0.6700429916381836, "learning_rate": 7.32038957013003e-05, "loss": 1.6781, "step": 69688 }, { "epoch": 2.32, "grad_norm": 0.6819249391555786, "learning_rate": 7.319705402341597e-05, "loss": 1.7419, "step": 69689 }, { "epoch": 2.32, "grad_norm": 0.686933696269989, "learning_rate": 7.319021262083703e-05, "loss": 1.6897, "step": 69690 }, { "epoch": 2.32, "grad_norm": 0.6715543866157532, "learning_rate": 7.318337149357166e-05, "loss": 1.658, "step": 69691 }, { "epoch": 2.32, "grad_norm": 0.6547971367835999, "learning_rate": 7.317653064162825e-05, "loss": 1.6905, "step": 69692 }, { "epoch": 2.32, "grad_norm": 0.6563193798065186, "learning_rate": 7.316969006501518e-05, "loss": 1.6798, "step": 69693 }, { "epoch": 2.32, "grad_norm": 0.6760439276695251, "learning_rate": 7.316284976374071e-05, "loss": 1.7317, "step": 69694 }, { "epoch": 2.32, "grad_norm": 0.6642313599586487, "learning_rate": 7.315600973781303e-05, "loss": 1.6849, "step": 69695 }, { "epoch": 2.32, "grad_norm": 0.6591037511825562, "learning_rate": 7.314916998724054e-05, "loss": 1.6631, "step": 69696 }, { "epoch": 2.32, "grad_norm": 0.6828562021255493, "learning_rate": 7.314233051203168e-05, "loss": 1.7413, "step": 69697 }, { "epoch": 2.32, "grad_norm": 0.6686449646949768, "learning_rate": 7.313549131219443e-05, "loss": 1.6899, "step": 69698 }, { "epoch": 2.32, "grad_norm": 0.6606838703155518, "learning_rate": 7.312865238773728e-05, "loss": 1.6589, "step": 69699 }, { "epoch": 2.32, "grad_norm": 0.6915448307991028, "learning_rate": 7.312181373866862e-05, "loss": 1.5741, "step": 69700 }, { "epoch": 2.32, "grad_norm": 0.6831052303314209, "learning_rate": 7.311497536499663e-05, "loss": 1.7334, "step": 69701 }, { "epoch": 2.32, "grad_norm": 0.6514256596565247, "learning_rate": 7.310813726672954e-05, "loss": 1.6226, "step": 69702 }, { "epoch": 2.32, "grad_norm": 0.6999958753585815, "learning_rate": 7.310129944387573e-05, "loss": 1.7813, "step": 69703 }, { "epoch": 2.32, "grad_norm": 0.7268997430801392, "learning_rate": 7.309446189644373e-05, "loss": 1.7495, "step": 69704 }, { "epoch": 2.32, "grad_norm": 0.6914916634559631, "learning_rate": 7.308762462444142e-05, "loss": 1.6002, "step": 69705 }, { "epoch": 2.32, "grad_norm": 0.6944830417633057, "learning_rate": 7.308078762787729e-05, "loss": 1.6977, "step": 69706 }, { "epoch": 2.32, "grad_norm": 0.6575426459312439, "learning_rate": 7.307395090675973e-05, "loss": 1.6435, "step": 69707 }, { "epoch": 2.32, "grad_norm": 0.6763426661491394, "learning_rate": 7.306711446109698e-05, "loss": 1.6357, "step": 69708 }, { "epoch": 2.32, "grad_norm": 0.6877605319023132, "learning_rate": 7.30602782908972e-05, "loss": 1.6252, "step": 69709 }, { "epoch": 2.32, "grad_norm": 0.7113043665885925, "learning_rate": 7.305344239616893e-05, "loss": 1.6784, "step": 69710 }, { "epoch": 2.32, "grad_norm": 0.6667446494102478, "learning_rate": 7.304660677692033e-05, "loss": 1.6918, "step": 69711 }, { "epoch": 2.32, "grad_norm": 0.7016770839691162, "learning_rate": 7.303977143315958e-05, "loss": 1.6364, "step": 69712 }, { "epoch": 2.32, "grad_norm": 0.6751582026481628, "learning_rate": 7.303293636489514e-05, "loss": 1.7226, "step": 69713 }, { "epoch": 2.32, "grad_norm": 0.6714907288551331, "learning_rate": 7.302610157213539e-05, "loss": 1.7037, "step": 69714 }, { "epoch": 2.32, "grad_norm": 0.6744486093521118, "learning_rate": 7.301926705488847e-05, "loss": 1.6947, "step": 69715 }, { "epoch": 2.32, "grad_norm": 0.6818380355834961, "learning_rate": 7.301243281316264e-05, "loss": 1.6294, "step": 69716 }, { "epoch": 2.32, "grad_norm": 0.6843639016151428, "learning_rate": 7.300559884696636e-05, "loss": 1.7558, "step": 69717 }, { "epoch": 2.32, "grad_norm": 0.7016394138336182, "learning_rate": 7.299876515630784e-05, "loss": 1.7187, "step": 69718 }, { "epoch": 2.32, "grad_norm": 0.6773377656936646, "learning_rate": 7.299193174119529e-05, "loss": 1.7288, "step": 69719 }, { "epoch": 2.32, "grad_norm": 0.6591259837150574, "learning_rate": 7.298509860163714e-05, "loss": 1.7733, "step": 69720 }, { "epoch": 2.32, "grad_norm": 0.6671193242073059, "learning_rate": 7.297826573764156e-05, "loss": 1.7032, "step": 69721 }, { "epoch": 2.32, "grad_norm": 0.6635066866874695, "learning_rate": 7.297143314921702e-05, "loss": 1.7766, "step": 69722 }, { "epoch": 2.32, "grad_norm": 0.6998394727706909, "learning_rate": 7.296460083637164e-05, "loss": 1.717, "step": 69723 }, { "epoch": 2.32, "grad_norm": 0.6484650373458862, "learning_rate": 7.295776879911383e-05, "loss": 1.6935, "step": 69724 }, { "epoch": 2.32, "grad_norm": 0.6794099807739258, "learning_rate": 7.295093703745185e-05, "loss": 1.6665, "step": 69725 }, { "epoch": 2.32, "grad_norm": 0.6753855347633362, "learning_rate": 7.294410555139388e-05, "loss": 1.7184, "step": 69726 }, { "epoch": 2.32, "grad_norm": 0.6547800898551941, "learning_rate": 7.293727434094839e-05, "loss": 1.7033, "step": 69727 }, { "epoch": 2.32, "grad_norm": 0.6934386491775513, "learning_rate": 7.293044340612354e-05, "loss": 1.7084, "step": 69728 }, { "epoch": 2.32, "grad_norm": 0.6680655479431152, "learning_rate": 7.292361274692773e-05, "loss": 1.7235, "step": 69729 }, { "epoch": 2.32, "grad_norm": 0.6859361529350281, "learning_rate": 7.291678236336922e-05, "loss": 1.6779, "step": 69730 }, { "epoch": 2.32, "grad_norm": 0.6906914114952087, "learning_rate": 7.290995225545617e-05, "loss": 1.7099, "step": 69731 }, { "epoch": 2.32, "grad_norm": 0.665416955947876, "learning_rate": 7.290312242319708e-05, "loss": 1.6975, "step": 69732 }, { "epoch": 2.32, "grad_norm": 0.6757408976554871, "learning_rate": 7.289629286660003e-05, "loss": 1.7284, "step": 69733 }, { "epoch": 2.32, "grad_norm": 0.673233151435852, "learning_rate": 7.288946358567354e-05, "loss": 1.7065, "step": 69734 }, { "epoch": 2.32, "grad_norm": 0.6536674499511719, "learning_rate": 7.288263458042568e-05, "loss": 1.6912, "step": 69735 }, { "epoch": 2.32, "grad_norm": 0.7062587738037109, "learning_rate": 7.28758058508649e-05, "loss": 1.6551, "step": 69736 }, { "epoch": 2.32, "grad_norm": 0.6815698742866516, "learning_rate": 7.286897739699946e-05, "loss": 1.7453, "step": 69737 }, { "epoch": 2.32, "grad_norm": 0.6689736843109131, "learning_rate": 7.286214921883749e-05, "loss": 1.7068, "step": 69738 }, { "epoch": 2.32, "grad_norm": 0.6966752409934998, "learning_rate": 7.285532131638753e-05, "loss": 1.6657, "step": 69739 }, { "epoch": 2.32, "grad_norm": 0.68841552734375, "learning_rate": 7.284849368965774e-05, "loss": 1.5956, "step": 69740 }, { "epoch": 2.32, "grad_norm": 0.6706007719039917, "learning_rate": 7.284166633865629e-05, "loss": 1.6758, "step": 69741 }, { "epoch": 2.32, "grad_norm": 0.6754469871520996, "learning_rate": 7.283483926339161e-05, "loss": 1.625, "step": 69742 }, { "epoch": 2.32, "grad_norm": 0.6575438380241394, "learning_rate": 7.282801246387206e-05, "loss": 1.7047, "step": 69743 }, { "epoch": 2.32, "grad_norm": 0.6798757314682007, "learning_rate": 7.282118594010579e-05, "loss": 1.7406, "step": 69744 }, { "epoch": 2.32, "grad_norm": 0.6857707500457764, "learning_rate": 7.281435969210107e-05, "loss": 1.7671, "step": 69745 }, { "epoch": 2.32, "grad_norm": 0.6833897829055786, "learning_rate": 7.280753371986631e-05, "loss": 1.6387, "step": 69746 }, { "epoch": 2.32, "grad_norm": 0.6790921092033386, "learning_rate": 7.280070802340977e-05, "loss": 1.6929, "step": 69747 }, { "epoch": 2.32, "grad_norm": 0.6758668422698975, "learning_rate": 7.279388260273958e-05, "loss": 1.7476, "step": 69748 }, { "epoch": 2.32, "grad_norm": 0.6507033109664917, "learning_rate": 7.278705745786412e-05, "loss": 1.6388, "step": 69749 }, { "epoch": 2.32, "grad_norm": 0.6577296853065491, "learning_rate": 7.27802325887919e-05, "loss": 1.7364, "step": 69750 }, { "epoch": 2.32, "grad_norm": 0.671266496181488, "learning_rate": 7.277340799553082e-05, "loss": 1.6849, "step": 69751 }, { "epoch": 2.32, "grad_norm": 0.6766329407691956, "learning_rate": 7.276658367808934e-05, "loss": 1.7111, "step": 69752 }, { "epoch": 2.32, "grad_norm": 0.6476696133613586, "learning_rate": 7.275975963647582e-05, "loss": 1.7783, "step": 69753 }, { "epoch": 2.32, "grad_norm": 0.6706881523132324, "learning_rate": 7.275293587069845e-05, "loss": 1.6749, "step": 69754 }, { "epoch": 2.32, "grad_norm": 0.6680461168289185, "learning_rate": 7.274611238076551e-05, "loss": 1.6917, "step": 69755 }, { "epoch": 2.32, "grad_norm": 0.6957931518554688, "learning_rate": 7.273928916668526e-05, "loss": 1.6906, "step": 69756 }, { "epoch": 2.32, "grad_norm": 0.6819889545440674, "learning_rate": 7.273246622846619e-05, "loss": 1.6907, "step": 69757 }, { "epoch": 2.32, "grad_norm": 0.6511648893356323, "learning_rate": 7.272564356611627e-05, "loss": 1.695, "step": 69758 }, { "epoch": 2.32, "grad_norm": 0.6619096994400024, "learning_rate": 7.271882117964395e-05, "loss": 1.6849, "step": 69759 }, { "epoch": 2.32, "grad_norm": 0.6830075979232788, "learning_rate": 7.271199906905753e-05, "loss": 1.7233, "step": 69760 }, { "epoch": 2.32, "grad_norm": 0.6805529594421387, "learning_rate": 7.27051772343653e-05, "loss": 1.7349, "step": 69761 }, { "epoch": 2.32, "grad_norm": 0.6965463757514954, "learning_rate": 7.269835567557539e-05, "loss": 1.6558, "step": 69762 }, { "epoch": 2.32, "grad_norm": 0.6800440549850464, "learning_rate": 7.269153439269618e-05, "loss": 1.7307, "step": 69763 }, { "epoch": 2.32, "grad_norm": 0.6592473387718201, "learning_rate": 7.268471338573613e-05, "loss": 1.6594, "step": 69764 }, { "epoch": 2.32, "grad_norm": 0.66032874584198, "learning_rate": 7.267789265470319e-05, "loss": 1.7602, "step": 69765 }, { "epoch": 2.32, "grad_norm": 0.6528748273849487, "learning_rate": 7.267107219960575e-05, "loss": 1.721, "step": 69766 }, { "epoch": 2.32, "grad_norm": 0.6783784627914429, "learning_rate": 7.266425202045226e-05, "loss": 1.6788, "step": 69767 }, { "epoch": 2.32, "grad_norm": 0.6544397473335266, "learning_rate": 7.265743211725084e-05, "loss": 1.669, "step": 69768 }, { "epoch": 2.32, "grad_norm": 0.6731351017951965, "learning_rate": 7.265061249000972e-05, "loss": 1.7664, "step": 69769 }, { "epoch": 2.32, "grad_norm": 0.6547130942344666, "learning_rate": 7.264379313873739e-05, "loss": 1.729, "step": 69770 }, { "epoch": 2.32, "grad_norm": 0.6690683364868164, "learning_rate": 7.263697406344197e-05, "loss": 1.6852, "step": 69771 }, { "epoch": 2.32, "grad_norm": 0.6403623223304749, "learning_rate": 7.263015526413164e-05, "loss": 1.6705, "step": 69772 }, { "epoch": 2.32, "grad_norm": 0.6614845395088196, "learning_rate": 7.262333674081484e-05, "loss": 1.6439, "step": 69773 }, { "epoch": 2.32, "grad_norm": 0.6842538118362427, "learning_rate": 7.26165184934999e-05, "loss": 1.7334, "step": 69774 }, { "epoch": 2.32, "grad_norm": 0.6495895981788635, "learning_rate": 7.260970052219498e-05, "loss": 1.7478, "step": 69775 }, { "epoch": 2.32, "grad_norm": 0.713983416557312, "learning_rate": 7.260288282690828e-05, "loss": 1.6835, "step": 69776 }, { "epoch": 2.32, "grad_norm": 0.6653509140014648, "learning_rate": 7.25960654076483e-05, "loss": 1.6586, "step": 69777 }, { "epoch": 2.32, "grad_norm": 0.703788697719574, "learning_rate": 7.258924826442317e-05, "loss": 1.734, "step": 69778 }, { "epoch": 2.32, "grad_norm": 0.676532506942749, "learning_rate": 7.258243139724111e-05, "loss": 1.7938, "step": 69779 }, { "epoch": 2.32, "grad_norm": 0.7091175317764282, "learning_rate": 7.257561480611051e-05, "loss": 1.6936, "step": 69780 }, { "epoch": 2.32, "grad_norm": 0.662007749080658, "learning_rate": 7.256879849103957e-05, "loss": 1.6695, "step": 69781 }, { "epoch": 2.32, "grad_norm": 0.6645493507385254, "learning_rate": 7.256198245203666e-05, "loss": 1.6022, "step": 69782 }, { "epoch": 2.32, "grad_norm": 0.7954791188240051, "learning_rate": 7.255516668910992e-05, "loss": 1.6953, "step": 69783 }, { "epoch": 2.32, "grad_norm": 0.6969115734100342, "learning_rate": 7.254835120226779e-05, "loss": 1.7116, "step": 69784 }, { "epoch": 2.32, "grad_norm": 0.6609595417976379, "learning_rate": 7.25415359915184e-05, "loss": 1.6893, "step": 69785 }, { "epoch": 2.32, "grad_norm": 0.6559557914733887, "learning_rate": 7.253472105687004e-05, "loss": 1.7092, "step": 69786 }, { "epoch": 2.32, "grad_norm": 0.6707503199577332, "learning_rate": 7.252790639833107e-05, "loss": 1.6968, "step": 69787 }, { "epoch": 2.32, "grad_norm": 0.660615861415863, "learning_rate": 7.252109201590963e-05, "loss": 1.6926, "step": 69788 }, { "epoch": 2.32, "grad_norm": 0.6829190850257874, "learning_rate": 7.251427790961415e-05, "loss": 1.7344, "step": 69789 }, { "epoch": 2.32, "grad_norm": 0.6582846641540527, "learning_rate": 7.250746407945271e-05, "loss": 1.7387, "step": 69790 }, { "epoch": 2.32, "grad_norm": 0.6817640662193298, "learning_rate": 7.250065052543379e-05, "loss": 1.6886, "step": 69791 }, { "epoch": 2.32, "grad_norm": 0.6710681319236755, "learning_rate": 7.249383724756557e-05, "loss": 1.6522, "step": 69792 }, { "epoch": 2.32, "grad_norm": 0.6636635661125183, "learning_rate": 7.248702424585624e-05, "loss": 1.6928, "step": 69793 }, { "epoch": 2.32, "grad_norm": 0.6892021298408508, "learning_rate": 7.24802115203142e-05, "loss": 1.7243, "step": 69794 }, { "epoch": 2.32, "grad_norm": 0.6996560096740723, "learning_rate": 7.247339907094757e-05, "loss": 1.7423, "step": 69795 }, { "epoch": 2.32, "grad_norm": 0.6463165879249573, "learning_rate": 7.24665868977648e-05, "loss": 1.7028, "step": 69796 }, { "epoch": 2.32, "grad_norm": 0.6531454920768738, "learning_rate": 7.245977500077409e-05, "loss": 1.6748, "step": 69797 }, { "epoch": 2.32, "grad_norm": 0.6749696135520935, "learning_rate": 7.245296337998358e-05, "loss": 1.8058, "step": 69798 }, { "epoch": 2.32, "grad_norm": 0.6765046119689941, "learning_rate": 7.244615203540171e-05, "loss": 1.7102, "step": 69799 }, { "epoch": 2.32, "grad_norm": 0.6688131093978882, "learning_rate": 7.243934096703664e-05, "loss": 1.6829, "step": 69800 }, { "epoch": 2.32, "grad_norm": 0.6630829572677612, "learning_rate": 7.243253017489674e-05, "loss": 1.6605, "step": 69801 }, { "epoch": 2.32, "grad_norm": 0.6314286589622498, "learning_rate": 7.242571965899014e-05, "loss": 1.7192, "step": 69802 }, { "epoch": 2.32, "grad_norm": 0.6816861629486084, "learning_rate": 7.241890941932528e-05, "loss": 1.7144, "step": 69803 }, { "epoch": 2.32, "grad_norm": 0.6658392548561096, "learning_rate": 7.241209945591033e-05, "loss": 1.6456, "step": 69804 }, { "epoch": 2.32, "grad_norm": 0.6612275242805481, "learning_rate": 7.240528976875348e-05, "loss": 1.6954, "step": 69805 }, { "epoch": 2.32, "grad_norm": 0.6454083323478699, "learning_rate": 7.239848035786313e-05, "loss": 1.6665, "step": 69806 }, { "epoch": 2.32, "grad_norm": 0.6899014115333557, "learning_rate": 7.239167122324751e-05, "loss": 1.6829, "step": 69807 }, { "epoch": 2.32, "grad_norm": 0.6771800518035889, "learning_rate": 7.238486236491475e-05, "loss": 1.738, "step": 69808 }, { "epoch": 2.32, "grad_norm": 0.6782634854316711, "learning_rate": 7.237805378287326e-05, "loss": 1.7176, "step": 69809 }, { "epoch": 2.32, "grad_norm": 0.6647753119468689, "learning_rate": 7.237124547713134e-05, "loss": 1.698, "step": 69810 }, { "epoch": 2.32, "grad_norm": 0.6680547595024109, "learning_rate": 7.23644374476972e-05, "loss": 1.6608, "step": 69811 }, { "epoch": 2.32, "grad_norm": 0.6544538140296936, "learning_rate": 7.235762969457902e-05, "loss": 1.7137, "step": 69812 }, { "epoch": 2.32, "grad_norm": 0.6580049991607666, "learning_rate": 7.23508222177851e-05, "loss": 1.7362, "step": 69813 }, { "epoch": 2.32, "grad_norm": 0.6812182068824768, "learning_rate": 7.234401501732397e-05, "loss": 1.6251, "step": 69814 }, { "epoch": 2.32, "grad_norm": 0.6534826755523682, "learning_rate": 7.233720809320343e-05, "loss": 1.6436, "step": 69815 }, { "epoch": 2.32, "grad_norm": 0.6823962926864624, "learning_rate": 7.233040144543198e-05, "loss": 1.6954, "step": 69816 }, { "epoch": 2.32, "grad_norm": 0.6540572643280029, "learning_rate": 7.232359507401801e-05, "loss": 1.6192, "step": 69817 }, { "epoch": 2.32, "grad_norm": 0.6931971311569214, "learning_rate": 7.231678897896962e-05, "loss": 1.6973, "step": 69818 }, { "epoch": 2.32, "grad_norm": 0.642166793346405, "learning_rate": 7.2309983160295e-05, "loss": 1.6473, "step": 69819 }, { "epoch": 2.32, "grad_norm": 0.6703218221664429, "learning_rate": 7.230317761800256e-05, "loss": 1.6755, "step": 69820 }, { "epoch": 2.32, "grad_norm": 0.6972442865371704, "learning_rate": 7.229637235210062e-05, "loss": 1.7428, "step": 69821 }, { "epoch": 2.32, "grad_norm": 0.6451143622398376, "learning_rate": 7.228956736259718e-05, "loss": 1.7037, "step": 69822 }, { "epoch": 2.32, "grad_norm": 0.6600569486618042, "learning_rate": 7.228276264950069e-05, "loss": 1.7254, "step": 69823 }, { "epoch": 2.32, "grad_norm": 0.6838065385818481, "learning_rate": 7.227595821281946e-05, "loss": 1.6412, "step": 69824 }, { "epoch": 2.32, "grad_norm": 0.6811420321464539, "learning_rate": 7.226915405256162e-05, "loss": 1.6831, "step": 69825 }, { "epoch": 2.32, "grad_norm": 0.6660743951797485, "learning_rate": 7.226235016873542e-05, "loss": 1.6752, "step": 69826 }, { "epoch": 2.32, "grad_norm": 0.67467200756073, "learning_rate": 7.22555465613492e-05, "loss": 1.6969, "step": 69827 }, { "epoch": 2.32, "grad_norm": 0.7052812576293945, "learning_rate": 7.224874323041126e-05, "loss": 1.5842, "step": 69828 }, { "epoch": 2.32, "grad_norm": 0.6728183031082153, "learning_rate": 7.224194017592968e-05, "loss": 1.6924, "step": 69829 }, { "epoch": 2.32, "grad_norm": 0.6931726932525635, "learning_rate": 7.22351373979128e-05, "loss": 1.7211, "step": 69830 }, { "epoch": 2.32, "grad_norm": 0.6824046969413757, "learning_rate": 7.222833489636902e-05, "loss": 1.744, "step": 69831 }, { "epoch": 2.32, "grad_norm": 0.669131338596344, "learning_rate": 7.222153267130648e-05, "loss": 1.6665, "step": 69832 }, { "epoch": 2.32, "grad_norm": 0.6684670448303223, "learning_rate": 7.221473072273335e-05, "loss": 1.6702, "step": 69833 }, { "epoch": 2.32, "grad_norm": 0.6650885343551636, "learning_rate": 7.220792905065806e-05, "loss": 1.6788, "step": 69834 }, { "epoch": 2.32, "grad_norm": 0.6608079075813293, "learning_rate": 7.220112765508878e-05, "loss": 1.7247, "step": 69835 }, { "epoch": 2.32, "grad_norm": 0.6668345928192139, "learning_rate": 7.219432653603364e-05, "loss": 1.7005, "step": 69836 }, { "epoch": 2.32, "grad_norm": 0.6788925528526306, "learning_rate": 7.218752569350115e-05, "loss": 1.7043, "step": 69837 }, { "epoch": 2.32, "grad_norm": 0.6924726963043213, "learning_rate": 7.218072512749935e-05, "loss": 1.6824, "step": 69838 }, { "epoch": 2.32, "grad_norm": 0.66236811876297, "learning_rate": 7.217392483803668e-05, "loss": 1.6321, "step": 69839 }, { "epoch": 2.32, "grad_norm": 0.6528077721595764, "learning_rate": 7.216712482512119e-05, "loss": 1.649, "step": 69840 }, { "epoch": 2.32, "grad_norm": 0.6668387055397034, "learning_rate": 7.216032508876132e-05, "loss": 1.6849, "step": 69841 }, { "epoch": 2.32, "grad_norm": 0.6560762524604797, "learning_rate": 7.215352562896525e-05, "loss": 1.6484, "step": 69842 }, { "epoch": 2.32, "grad_norm": 0.6570529341697693, "learning_rate": 7.214672644574114e-05, "loss": 1.6878, "step": 69843 }, { "epoch": 2.32, "grad_norm": 0.6835379600524902, "learning_rate": 7.213992753909743e-05, "loss": 1.6838, "step": 69844 }, { "epoch": 2.32, "grad_norm": 0.6654751300811768, "learning_rate": 7.213312890904221e-05, "loss": 1.691, "step": 69845 }, { "epoch": 2.32, "grad_norm": 0.6711724400520325, "learning_rate": 7.212633055558382e-05, "loss": 1.679, "step": 69846 }, { "epoch": 2.32, "grad_norm": 0.6692785620689392, "learning_rate": 7.211953247873055e-05, "loss": 1.7381, "step": 69847 }, { "epoch": 2.32, "grad_norm": 0.6684110760688782, "learning_rate": 7.211273467849046e-05, "loss": 1.7146, "step": 69848 }, { "epoch": 2.32, "grad_norm": 0.6848031282424927, "learning_rate": 7.210593715487204e-05, "loss": 1.7803, "step": 69849 }, { "epoch": 2.32, "grad_norm": 0.6973855495452881, "learning_rate": 7.209913990788335e-05, "loss": 1.6284, "step": 69850 }, { "epoch": 2.32, "grad_norm": 0.6760455965995789, "learning_rate": 7.20923429375328e-05, "loss": 1.6775, "step": 69851 }, { "epoch": 2.32, "grad_norm": 0.7090253829956055, "learning_rate": 7.208554624382849e-05, "loss": 1.7177, "step": 69852 }, { "epoch": 2.32, "grad_norm": 0.676657497882843, "learning_rate": 7.207874982677883e-05, "loss": 1.6916, "step": 69853 }, { "epoch": 2.32, "grad_norm": 0.684017539024353, "learning_rate": 7.207195368639198e-05, "loss": 1.6847, "step": 69854 }, { "epoch": 2.32, "grad_norm": 0.6678899526596069, "learning_rate": 7.20651578226761e-05, "loss": 1.6434, "step": 69855 }, { "epoch": 2.32, "grad_norm": 0.6821092963218689, "learning_rate": 7.205836223563966e-05, "loss": 1.7224, "step": 69856 }, { "epoch": 2.32, "grad_norm": 0.6547538638114929, "learning_rate": 7.205156692529073e-05, "loss": 1.7074, "step": 69857 }, { "epoch": 2.32, "grad_norm": 0.6701412796974182, "learning_rate": 7.204477189163755e-05, "loss": 1.6746, "step": 69858 }, { "epoch": 2.32, "grad_norm": 0.6712517142295837, "learning_rate": 7.203797713468843e-05, "loss": 1.6796, "step": 69859 }, { "epoch": 2.32, "grad_norm": 0.6802529096603394, "learning_rate": 7.203118265445169e-05, "loss": 1.7222, "step": 69860 }, { "epoch": 2.32, "grad_norm": 0.680664598941803, "learning_rate": 7.202438845093553e-05, "loss": 1.5467, "step": 69861 }, { "epoch": 2.32, "grad_norm": 0.6815648674964905, "learning_rate": 7.201759452414808e-05, "loss": 1.7608, "step": 69862 }, { "epoch": 2.32, "grad_norm": 0.6730800271034241, "learning_rate": 7.201080087409776e-05, "loss": 1.679, "step": 69863 }, { "epoch": 2.32, "grad_norm": 0.667069137096405, "learning_rate": 7.200400750079274e-05, "loss": 1.6713, "step": 69864 }, { "epoch": 2.32, "grad_norm": 0.6761020421981812, "learning_rate": 7.199721440424113e-05, "loss": 1.6938, "step": 69865 }, { "epoch": 2.32, "grad_norm": 0.6796251535415649, "learning_rate": 7.199042158445137e-05, "loss": 1.7091, "step": 69866 }, { "epoch": 2.32, "grad_norm": 0.6531728506088257, "learning_rate": 7.19836290414318e-05, "loss": 1.6494, "step": 69867 }, { "epoch": 2.32, "grad_norm": 0.6557530164718628, "learning_rate": 7.19768367751903e-05, "loss": 1.6118, "step": 69868 }, { "epoch": 2.32, "grad_norm": 0.6716735363006592, "learning_rate": 7.197004478573535e-05, "loss": 1.6414, "step": 69869 }, { "epoch": 2.32, "grad_norm": 0.6544086933135986, "learning_rate": 7.196325307307525e-05, "loss": 1.6909, "step": 69870 }, { "epoch": 2.32, "grad_norm": 0.6596884727478027, "learning_rate": 7.195646163721816e-05, "loss": 1.7115, "step": 69871 }, { "epoch": 2.32, "grad_norm": 0.6568273901939392, "learning_rate": 7.194967047817223e-05, "loss": 1.7062, "step": 69872 }, { "epoch": 2.32, "grad_norm": 0.6800783276557922, "learning_rate": 7.194287959594583e-05, "loss": 1.7474, "step": 69873 }, { "epoch": 2.32, "grad_norm": 0.6740914583206177, "learning_rate": 7.19360889905473e-05, "loss": 1.7287, "step": 69874 }, { "epoch": 2.32, "grad_norm": 0.6781560778617859, "learning_rate": 7.19292986619846e-05, "loss": 1.7335, "step": 69875 }, { "epoch": 2.32, "grad_norm": 0.6659664511680603, "learning_rate": 7.192250861026615e-05, "loss": 1.6326, "step": 69876 }, { "epoch": 2.32, "grad_norm": 0.6719493865966797, "learning_rate": 7.19157188354002e-05, "loss": 1.7357, "step": 69877 }, { "epoch": 2.32, "grad_norm": 0.6629418134689331, "learning_rate": 7.190892933739503e-05, "loss": 1.6748, "step": 69878 }, { "epoch": 2.32, "grad_norm": 0.6514456272125244, "learning_rate": 7.190214011625867e-05, "loss": 1.6449, "step": 69879 }, { "epoch": 2.32, "grad_norm": 0.6852027773857117, "learning_rate": 7.189535117199952e-05, "loss": 1.7337, "step": 69880 }, { "epoch": 2.32, "grad_norm": 0.6876885294914246, "learning_rate": 7.188856250462601e-05, "loss": 1.633, "step": 69881 }, { "epoch": 2.32, "grad_norm": 0.6961389183998108, "learning_rate": 7.188177411414597e-05, "loss": 1.7643, "step": 69882 }, { "epoch": 2.33, "grad_norm": 0.6793784499168396, "learning_rate": 7.187498600056782e-05, "loss": 1.7259, "step": 69883 }, { "epoch": 2.33, "grad_norm": 0.6594370007514954, "learning_rate": 7.186819816389995e-05, "loss": 1.7027, "step": 69884 }, { "epoch": 2.33, "grad_norm": 0.6487503051757812, "learning_rate": 7.186141060415046e-05, "loss": 1.703, "step": 69885 }, { "epoch": 2.33, "grad_norm": 0.658736526966095, "learning_rate": 7.185462332132752e-05, "loss": 1.633, "step": 69886 }, { "epoch": 2.33, "grad_norm": 0.7020322680473328, "learning_rate": 7.184783631543955e-05, "loss": 1.7047, "step": 69887 }, { "epoch": 2.33, "grad_norm": 0.6688903570175171, "learning_rate": 7.184104958649467e-05, "loss": 1.7189, "step": 69888 }, { "epoch": 2.33, "grad_norm": 0.7002971768379211, "learning_rate": 7.183426313450106e-05, "loss": 1.6827, "step": 69889 }, { "epoch": 2.33, "grad_norm": 0.6947028636932373, "learning_rate": 7.182747695946704e-05, "loss": 1.7053, "step": 69890 }, { "epoch": 2.33, "grad_norm": 0.6642224192619324, "learning_rate": 7.182069106140092e-05, "loss": 1.7004, "step": 69891 }, { "epoch": 2.33, "grad_norm": 0.6641750931739807, "learning_rate": 7.181390544031086e-05, "loss": 1.6799, "step": 69892 }, { "epoch": 2.33, "grad_norm": 0.782917857170105, "learning_rate": 7.1807120096205e-05, "loss": 1.7048, "step": 69893 }, { "epoch": 2.33, "grad_norm": 0.6692203879356384, "learning_rate": 7.180033502909179e-05, "loss": 1.7178, "step": 69894 }, { "epoch": 2.33, "grad_norm": 0.6669324040412903, "learning_rate": 7.179355023897935e-05, "loss": 1.6708, "step": 69895 }, { "epoch": 2.33, "grad_norm": 0.6623506546020508, "learning_rate": 7.17867657258758e-05, "loss": 1.6424, "step": 69896 }, { "epoch": 2.33, "grad_norm": 0.6973429918289185, "learning_rate": 7.177998148978958e-05, "loss": 1.6488, "step": 69897 }, { "epoch": 2.33, "grad_norm": 0.6701316833496094, "learning_rate": 7.177319753072877e-05, "loss": 1.7157, "step": 69898 }, { "epoch": 2.33, "grad_norm": 0.659140408039093, "learning_rate": 7.176641384870176e-05, "loss": 1.6751, "step": 69899 }, { "epoch": 2.33, "grad_norm": 0.6563760042190552, "learning_rate": 7.175963044371657e-05, "loss": 1.662, "step": 69900 }, { "epoch": 2.33, "grad_norm": 0.6559550166130066, "learning_rate": 7.17528473157817e-05, "loss": 1.6158, "step": 69901 }, { "epoch": 2.33, "grad_norm": 0.6823359727859497, "learning_rate": 7.17460644649052e-05, "loss": 1.7119, "step": 69902 }, { "epoch": 2.33, "grad_norm": 0.6766685843467712, "learning_rate": 7.173928189109528e-05, "loss": 1.6962, "step": 69903 }, { "epoch": 2.33, "grad_norm": 0.6934537887573242, "learning_rate": 7.173249959436032e-05, "loss": 1.711, "step": 69904 }, { "epoch": 2.33, "grad_norm": 0.6668582558631897, "learning_rate": 7.172571757470837e-05, "loss": 1.6785, "step": 69905 }, { "epoch": 2.33, "grad_norm": 0.6703732013702393, "learning_rate": 7.171893583214788e-05, "loss": 1.6605, "step": 69906 }, { "epoch": 2.33, "grad_norm": 0.6856200098991394, "learning_rate": 7.171215436668687e-05, "loss": 1.6699, "step": 69907 }, { "epoch": 2.33, "grad_norm": 0.6785461902618408, "learning_rate": 7.170537317833375e-05, "loss": 1.6549, "step": 69908 }, { "epoch": 2.33, "grad_norm": 0.6855640411376953, "learning_rate": 7.169859226709667e-05, "loss": 1.6817, "step": 69909 }, { "epoch": 2.33, "grad_norm": 0.6580796241760254, "learning_rate": 7.169181163298375e-05, "loss": 1.7252, "step": 69910 }, { "epoch": 2.33, "grad_norm": 0.6611640453338623, "learning_rate": 7.168503127600345e-05, "loss": 1.7312, "step": 69911 }, { "epoch": 2.33, "grad_norm": 0.6719117760658264, "learning_rate": 7.167825119616378e-05, "loss": 1.7777, "step": 69912 }, { "epoch": 2.33, "grad_norm": 0.6588821411132812, "learning_rate": 7.167147139347319e-05, "loss": 1.6064, "step": 69913 }, { "epoch": 2.33, "grad_norm": 0.6947842240333557, "learning_rate": 7.166469186793977e-05, "loss": 1.8309, "step": 69914 }, { "epoch": 2.33, "grad_norm": 0.6813724040985107, "learning_rate": 7.165791261957165e-05, "loss": 1.7116, "step": 69915 }, { "epoch": 2.33, "grad_norm": 0.6863773465156555, "learning_rate": 7.16511336483773e-05, "loss": 1.6652, "step": 69916 }, { "epoch": 2.33, "grad_norm": 0.6512177586555481, "learning_rate": 7.164435495436472e-05, "loss": 1.6751, "step": 69917 }, { "epoch": 2.33, "grad_norm": 0.6653397083282471, "learning_rate": 7.163757653754235e-05, "loss": 1.672, "step": 69918 }, { "epoch": 2.33, "grad_norm": 0.6783446073532104, "learning_rate": 7.163079839791824e-05, "loss": 1.7363, "step": 69919 }, { "epoch": 2.33, "grad_norm": 0.6895179152488708, "learning_rate": 7.162402053550075e-05, "loss": 1.6682, "step": 69920 }, { "epoch": 2.33, "grad_norm": 0.6832457184791565, "learning_rate": 7.161724295029809e-05, "loss": 1.7156, "step": 69921 }, { "epoch": 2.33, "grad_norm": 0.6912273168563843, "learning_rate": 7.161046564231834e-05, "loss": 1.7337, "step": 69922 }, { "epoch": 2.33, "grad_norm": 0.6712809205055237, "learning_rate": 7.160368861156992e-05, "loss": 1.6859, "step": 69923 }, { "epoch": 2.33, "grad_norm": 0.6898476481437683, "learning_rate": 7.159691185806094e-05, "loss": 1.7345, "step": 69924 }, { "epoch": 2.33, "grad_norm": 0.6932213306427002, "learning_rate": 7.159013538179961e-05, "loss": 1.7494, "step": 69925 }, { "epoch": 2.33, "grad_norm": 0.6588371992111206, "learning_rate": 7.158335918279417e-05, "loss": 1.6698, "step": 69926 }, { "epoch": 2.33, "grad_norm": 0.6572786569595337, "learning_rate": 7.157658326105296e-05, "loss": 1.7004, "step": 69927 }, { "epoch": 2.33, "grad_norm": 0.6633723974227905, "learning_rate": 7.156980761658417e-05, "loss": 1.7328, "step": 69928 }, { "epoch": 2.33, "grad_norm": 0.6659142374992371, "learning_rate": 7.156303224939584e-05, "loss": 1.7134, "step": 69929 }, { "epoch": 2.33, "grad_norm": 0.6684718132019043, "learning_rate": 7.155625715949634e-05, "loss": 1.6507, "step": 69930 }, { "epoch": 2.33, "grad_norm": 0.6999568343162537, "learning_rate": 7.154948234689409e-05, "loss": 1.6674, "step": 69931 }, { "epoch": 2.33, "grad_norm": 0.6774222254753113, "learning_rate": 7.154270781159689e-05, "loss": 1.7467, "step": 69932 }, { "epoch": 2.33, "grad_norm": 0.6596530079841614, "learning_rate": 7.153593355361322e-05, "loss": 1.6914, "step": 69933 }, { "epoch": 2.33, "grad_norm": 0.6808717846870422, "learning_rate": 7.152915957295133e-05, "loss": 1.7051, "step": 69934 }, { "epoch": 2.33, "grad_norm": 0.6909112334251404, "learning_rate": 7.152238586961938e-05, "loss": 1.656, "step": 69935 }, { "epoch": 2.33, "grad_norm": 0.6712298393249512, "learning_rate": 7.151561244362549e-05, "loss": 1.7012, "step": 69936 }, { "epoch": 2.33, "grad_norm": 0.6665903329849243, "learning_rate": 7.150883929497798e-05, "loss": 1.6964, "step": 69937 }, { "epoch": 2.33, "grad_norm": 0.6697871088981628, "learning_rate": 7.150206642368527e-05, "loss": 1.7503, "step": 69938 }, { "epoch": 2.33, "grad_norm": 0.6581571698188782, "learning_rate": 7.149529382975521e-05, "loss": 1.6855, "step": 69939 }, { "epoch": 2.33, "grad_norm": 0.6516086459159851, "learning_rate": 7.148852151319616e-05, "loss": 1.6615, "step": 69940 }, { "epoch": 2.33, "grad_norm": 0.6804205775260925, "learning_rate": 7.14817494740165e-05, "loss": 1.7202, "step": 69941 }, { "epoch": 2.33, "grad_norm": 0.672605574131012, "learning_rate": 7.147497771222429e-05, "loss": 1.6712, "step": 69942 }, { "epoch": 2.33, "grad_norm": 0.7012714743614197, "learning_rate": 7.146820622782776e-05, "loss": 1.5969, "step": 69943 }, { "epoch": 2.33, "grad_norm": 0.6791547536849976, "learning_rate": 7.146143502083518e-05, "loss": 1.6854, "step": 69944 }, { "epoch": 2.33, "grad_norm": 0.6803920269012451, "learning_rate": 7.145466409125476e-05, "loss": 1.6409, "step": 69945 }, { "epoch": 2.33, "grad_norm": 0.6961107850074768, "learning_rate": 7.144789343909461e-05, "loss": 1.751, "step": 69946 }, { "epoch": 2.33, "grad_norm": 0.7065935730934143, "learning_rate": 7.144112306436306e-05, "loss": 1.7054, "step": 69947 }, { "epoch": 2.33, "grad_norm": 0.6726147532463074, "learning_rate": 7.143435296706838e-05, "loss": 1.6654, "step": 69948 }, { "epoch": 2.33, "grad_norm": 0.6684651374816895, "learning_rate": 7.142758314721875e-05, "loss": 1.6895, "step": 69949 }, { "epoch": 2.33, "grad_norm": 0.6616992950439453, "learning_rate": 7.142081360482223e-05, "loss": 1.7487, "step": 69950 }, { "epoch": 2.33, "grad_norm": 0.6836766600608826, "learning_rate": 7.141404433988728e-05, "loss": 1.777, "step": 69951 }, { "epoch": 2.33, "grad_norm": 0.6586421132087708, "learning_rate": 7.140727535242196e-05, "loss": 1.6907, "step": 69952 }, { "epoch": 2.33, "grad_norm": 0.6719857454299927, "learning_rate": 7.14005066424345e-05, "loss": 1.7647, "step": 69953 }, { "epoch": 2.33, "grad_norm": 0.6739609241485596, "learning_rate": 7.139373820993319e-05, "loss": 1.6703, "step": 69954 }, { "epoch": 2.33, "grad_norm": 0.7035039663314819, "learning_rate": 7.138697005492611e-05, "loss": 1.6266, "step": 69955 }, { "epoch": 2.33, "grad_norm": 0.664258599281311, "learning_rate": 7.138020217742167e-05, "loss": 1.7476, "step": 69956 }, { "epoch": 2.33, "grad_norm": 0.6572619676589966, "learning_rate": 7.137343457742786e-05, "loss": 1.7649, "step": 69957 }, { "epoch": 2.33, "grad_norm": 0.6622671484947205, "learning_rate": 7.136666725495312e-05, "loss": 1.6943, "step": 69958 }, { "epoch": 2.33, "grad_norm": 0.6959935426712036, "learning_rate": 7.135990021000556e-05, "loss": 1.7167, "step": 69959 }, { "epoch": 2.33, "grad_norm": 0.6864829063415527, "learning_rate": 7.135313344259329e-05, "loss": 1.7058, "step": 69960 }, { "epoch": 2.33, "grad_norm": 0.663430392742157, "learning_rate": 7.134636695272475e-05, "loss": 1.6686, "step": 69961 }, { "epoch": 2.33, "grad_norm": 0.6529693603515625, "learning_rate": 7.133960074040792e-05, "loss": 1.6892, "step": 69962 }, { "epoch": 2.33, "grad_norm": 0.672074019908905, "learning_rate": 7.133283480565119e-05, "loss": 1.6937, "step": 69963 }, { "epoch": 2.33, "grad_norm": 0.673236072063446, "learning_rate": 7.132606914846273e-05, "loss": 1.7028, "step": 69964 }, { "epoch": 2.33, "grad_norm": 0.6789001822471619, "learning_rate": 7.131930376885064e-05, "loss": 1.6699, "step": 69965 }, { "epoch": 2.33, "grad_norm": 0.6954069137573242, "learning_rate": 7.131253866682329e-05, "loss": 1.6981, "step": 69966 }, { "epoch": 2.33, "grad_norm": 0.6783321499824524, "learning_rate": 7.130577384238876e-05, "loss": 1.7079, "step": 69967 }, { "epoch": 2.33, "grad_norm": 0.6551175117492676, "learning_rate": 7.129900929555542e-05, "loss": 1.7243, "step": 69968 }, { "epoch": 2.33, "grad_norm": 0.6720168590545654, "learning_rate": 7.129224502633128e-05, "loss": 1.7586, "step": 69969 }, { "epoch": 2.33, "grad_norm": 0.6765527129173279, "learning_rate": 7.128548103472473e-05, "loss": 1.6363, "step": 69970 }, { "epoch": 2.33, "grad_norm": 0.7024419903755188, "learning_rate": 7.127871732074393e-05, "loss": 1.7512, "step": 69971 }, { "epoch": 2.33, "grad_norm": 0.6711735725402832, "learning_rate": 7.127195388439694e-05, "loss": 1.7726, "step": 69972 }, { "epoch": 2.33, "grad_norm": 0.6970489621162415, "learning_rate": 7.126519072569224e-05, "loss": 1.6522, "step": 69973 }, { "epoch": 2.33, "grad_norm": 0.6740589141845703, "learning_rate": 7.125842784463784e-05, "loss": 1.6924, "step": 69974 }, { "epoch": 2.33, "grad_norm": 0.6588112711906433, "learning_rate": 7.125166524124197e-05, "loss": 1.6724, "step": 69975 }, { "epoch": 2.33, "grad_norm": 0.6709555983543396, "learning_rate": 7.124490291551286e-05, "loss": 1.6859, "step": 69976 }, { "epoch": 2.33, "grad_norm": 0.6896436214447021, "learning_rate": 7.12381408674588e-05, "loss": 1.6541, "step": 69977 }, { "epoch": 2.33, "grad_norm": 0.6704282760620117, "learning_rate": 7.123137909708793e-05, "loss": 1.6799, "step": 69978 }, { "epoch": 2.33, "grad_norm": 0.6997508406639099, "learning_rate": 7.122461760440839e-05, "loss": 1.6834, "step": 69979 }, { "epoch": 2.33, "grad_norm": 0.6786247491836548, "learning_rate": 7.121785638942853e-05, "loss": 1.745, "step": 69980 }, { "epoch": 2.33, "grad_norm": 0.6663986444473267, "learning_rate": 7.12110954521565e-05, "loss": 1.7504, "step": 69981 }, { "epoch": 2.33, "grad_norm": 0.6642389297485352, "learning_rate": 7.120433479260039e-05, "loss": 1.6557, "step": 69982 }, { "epoch": 2.33, "grad_norm": 0.6700199246406555, "learning_rate": 7.11975744107685e-05, "loss": 1.6437, "step": 69983 }, { "epoch": 2.33, "grad_norm": 0.661823034286499, "learning_rate": 7.119081430666921e-05, "loss": 1.7208, "step": 69984 }, { "epoch": 2.33, "grad_norm": 0.68585205078125, "learning_rate": 7.11840544803104e-05, "loss": 1.6301, "step": 69985 }, { "epoch": 2.33, "grad_norm": 0.6613783240318298, "learning_rate": 7.117729493170043e-05, "loss": 1.6597, "step": 69986 }, { "epoch": 2.33, "grad_norm": 0.7047449350357056, "learning_rate": 7.11705356608476e-05, "loss": 1.6263, "step": 69987 }, { "epoch": 2.33, "grad_norm": 0.6696192622184753, "learning_rate": 7.116377666776001e-05, "loss": 1.7174, "step": 69988 }, { "epoch": 2.33, "grad_norm": 0.66650390625, "learning_rate": 7.115701795244583e-05, "loss": 1.6485, "step": 69989 }, { "epoch": 2.33, "grad_norm": 0.6756527423858643, "learning_rate": 7.115025951491328e-05, "loss": 1.6787, "step": 69990 }, { "epoch": 2.33, "grad_norm": 0.682460367679596, "learning_rate": 7.114350135517075e-05, "loss": 1.713, "step": 69991 }, { "epoch": 2.33, "grad_norm": 0.6489788293838501, "learning_rate": 7.113674347322614e-05, "loss": 1.6506, "step": 69992 }, { "epoch": 2.33, "grad_norm": 0.6557816863059998, "learning_rate": 7.112998586908779e-05, "loss": 1.6833, "step": 69993 }, { "epoch": 2.33, "grad_norm": 0.7072104811668396, "learning_rate": 7.112322854276402e-05, "loss": 1.7164, "step": 69994 }, { "epoch": 2.33, "grad_norm": 0.645346999168396, "learning_rate": 7.111647149426293e-05, "loss": 1.6725, "step": 69995 }, { "epoch": 2.33, "grad_norm": 0.6719633340835571, "learning_rate": 7.110971472359259e-05, "loss": 1.6652, "step": 69996 }, { "epoch": 2.33, "grad_norm": 0.6631192564964294, "learning_rate": 7.110295823076138e-05, "loss": 1.6719, "step": 69997 }, { "epoch": 2.33, "grad_norm": 0.680705726146698, "learning_rate": 7.10962020157776e-05, "loss": 1.6866, "step": 69998 }, { "epoch": 2.33, "grad_norm": 0.6633315086364746, "learning_rate": 7.108944607864914e-05, "loss": 1.7756, "step": 69999 }, { "epoch": 2.33, "grad_norm": 0.6703959703445435, "learning_rate": 7.108269041938434e-05, "loss": 1.7454, "step": 70000 }, { "epoch": 2.33, "grad_norm": 0.6996346712112427, "learning_rate": 7.107593503799153e-05, "loss": 1.7396, "step": 70001 }, { "epoch": 2.33, "grad_norm": 0.6835711002349854, "learning_rate": 7.106917993447881e-05, "loss": 1.6583, "step": 70002 }, { "epoch": 2.33, "grad_norm": 0.6656843423843384, "learning_rate": 7.106242510885429e-05, "loss": 1.6821, "step": 70003 }, { "epoch": 2.33, "grad_norm": 0.6779067516326904, "learning_rate": 7.105567056112632e-05, "loss": 1.7585, "step": 70004 }, { "epoch": 2.33, "grad_norm": 0.6802244186401367, "learning_rate": 7.104891629130305e-05, "loss": 1.6633, "step": 70005 }, { "epoch": 2.33, "grad_norm": 0.6491613388061523, "learning_rate": 7.104216229939257e-05, "loss": 1.6814, "step": 70006 }, { "epoch": 2.33, "grad_norm": 0.6954956650733948, "learning_rate": 7.103540858540317e-05, "loss": 1.6606, "step": 70007 }, { "epoch": 2.33, "grad_norm": 0.6817407011985779, "learning_rate": 7.102865514934313e-05, "loss": 1.6982, "step": 70008 }, { "epoch": 2.33, "grad_norm": 0.6776136755943298, "learning_rate": 7.102190199122059e-05, "loss": 1.7312, "step": 70009 }, { "epoch": 2.33, "grad_norm": 0.6788330078125, "learning_rate": 7.101514911104362e-05, "loss": 1.7617, "step": 70010 }, { "epoch": 2.33, "grad_norm": 0.6752944588661194, "learning_rate": 7.100839650882059e-05, "loss": 1.6768, "step": 70011 }, { "epoch": 2.33, "grad_norm": 0.6756331920623779, "learning_rate": 7.100164418455964e-05, "loss": 1.7145, "step": 70012 }, { "epoch": 2.33, "grad_norm": 0.6787240505218506, "learning_rate": 7.099489213826885e-05, "loss": 1.77, "step": 70013 }, { "epoch": 2.33, "grad_norm": 0.6680572628974915, "learning_rate": 7.098814036995662e-05, "loss": 1.685, "step": 70014 }, { "epoch": 2.33, "grad_norm": 0.685278058052063, "learning_rate": 7.098138887963097e-05, "loss": 1.7221, "step": 70015 }, { "epoch": 2.33, "grad_norm": 0.6708487272262573, "learning_rate": 7.097463766730023e-05, "loss": 1.655, "step": 70016 }, { "epoch": 2.33, "grad_norm": 0.6669837832450867, "learning_rate": 7.096788673297246e-05, "loss": 1.6234, "step": 70017 }, { "epoch": 2.33, "grad_norm": 0.680615246295929, "learning_rate": 7.096113607665602e-05, "loss": 1.634, "step": 70018 }, { "epoch": 2.33, "grad_norm": 0.6678019762039185, "learning_rate": 7.095438569835902e-05, "loss": 1.7464, "step": 70019 }, { "epoch": 2.33, "grad_norm": 0.6443772912025452, "learning_rate": 7.094763559808955e-05, "loss": 1.6378, "step": 70020 }, { "epoch": 2.33, "grad_norm": 0.6536929607391357, "learning_rate": 7.094088577585597e-05, "loss": 1.7495, "step": 70021 }, { "epoch": 2.33, "grad_norm": 0.674971878528595, "learning_rate": 7.093413623166635e-05, "loss": 1.6946, "step": 70022 }, { "epoch": 2.33, "grad_norm": 1.9337024688720703, "learning_rate": 7.0927386965529e-05, "loss": 1.6562, "step": 70023 }, { "epoch": 2.33, "grad_norm": 0.662619948387146, "learning_rate": 7.092063797745208e-05, "loss": 1.684, "step": 70024 }, { "epoch": 2.33, "grad_norm": 0.6756419539451599, "learning_rate": 7.091388926744365e-05, "loss": 1.7222, "step": 70025 }, { "epoch": 2.33, "grad_norm": 0.6528597474098206, "learning_rate": 7.090714083551208e-05, "loss": 1.6644, "step": 70026 }, { "epoch": 2.33, "grad_norm": 0.6558844447135925, "learning_rate": 7.09003926816654e-05, "loss": 1.648, "step": 70027 }, { "epoch": 2.33, "grad_norm": 0.6415347456932068, "learning_rate": 7.0893644805912e-05, "loss": 1.6926, "step": 70028 }, { "epoch": 2.33, "grad_norm": 0.6747815012931824, "learning_rate": 7.088689720825985e-05, "loss": 1.7457, "step": 70029 }, { "epoch": 2.33, "grad_norm": 0.6648867726325989, "learning_rate": 7.088014988871732e-05, "loss": 1.8068, "step": 70030 }, { "epoch": 2.33, "grad_norm": 0.6385846138000488, "learning_rate": 7.087340284729256e-05, "loss": 1.6921, "step": 70031 }, { "epoch": 2.33, "grad_norm": 0.6740384101867676, "learning_rate": 7.086665608399362e-05, "loss": 1.6959, "step": 70032 }, { "epoch": 2.33, "grad_norm": 0.6896957159042358, "learning_rate": 7.085990959882887e-05, "loss": 1.6547, "step": 70033 }, { "epoch": 2.33, "grad_norm": 0.6801503300666809, "learning_rate": 7.085316339180639e-05, "loss": 1.7509, "step": 70034 }, { "epoch": 2.33, "grad_norm": 0.6724837422370911, "learning_rate": 7.084641746293445e-05, "loss": 1.661, "step": 70035 }, { "epoch": 2.33, "grad_norm": 0.64725661277771, "learning_rate": 7.083967181222113e-05, "loss": 1.6359, "step": 70036 }, { "epoch": 2.33, "grad_norm": 0.6765491962432861, "learning_rate": 7.083292643967477e-05, "loss": 1.6586, "step": 70037 }, { "epoch": 2.33, "grad_norm": 0.6542341709136963, "learning_rate": 7.082618134530347e-05, "loss": 1.6539, "step": 70038 }, { "epoch": 2.33, "grad_norm": 0.700374186038971, "learning_rate": 7.08194365291153e-05, "loss": 1.7233, "step": 70039 }, { "epoch": 2.33, "grad_norm": 0.6534773111343384, "learning_rate": 7.081269199111861e-05, "loss": 1.7162, "step": 70040 }, { "epoch": 2.33, "grad_norm": 0.6510029435157776, "learning_rate": 7.080594773132171e-05, "loss": 1.5986, "step": 70041 }, { "epoch": 2.33, "grad_norm": 0.6500925421714783, "learning_rate": 7.079920374973244e-05, "loss": 1.618, "step": 70042 }, { "epoch": 2.33, "grad_norm": 0.6776102781295776, "learning_rate": 7.079246004635914e-05, "loss": 1.7482, "step": 70043 }, { "epoch": 2.33, "grad_norm": 0.6999083161354065, "learning_rate": 7.078571662121013e-05, "loss": 1.766, "step": 70044 }, { "epoch": 2.33, "grad_norm": 0.6821057200431824, "learning_rate": 7.077897347429349e-05, "loss": 1.7094, "step": 70045 }, { "epoch": 2.33, "grad_norm": 0.6738216280937195, "learning_rate": 7.07722306056173e-05, "loss": 1.6896, "step": 70046 }, { "epoch": 2.33, "grad_norm": 0.6758201718330383, "learning_rate": 7.076548801518985e-05, "loss": 1.739, "step": 70047 }, { "epoch": 2.33, "grad_norm": 0.672598659992218, "learning_rate": 7.075874570301952e-05, "loss": 1.721, "step": 70048 }, { "epoch": 2.33, "grad_norm": 0.660048246383667, "learning_rate": 7.075200366911411e-05, "loss": 1.6687, "step": 70049 }, { "epoch": 2.33, "grad_norm": 0.6653650999069214, "learning_rate": 7.074526191348197e-05, "loss": 1.6566, "step": 70050 }, { "epoch": 2.33, "grad_norm": 0.6578222513198853, "learning_rate": 7.07385204361314e-05, "loss": 1.7092, "step": 70051 }, { "epoch": 2.33, "grad_norm": 0.697241485118866, "learning_rate": 7.073177923707051e-05, "loss": 1.7601, "step": 70052 }, { "epoch": 2.33, "grad_norm": 0.6825003623962402, "learning_rate": 7.072503831630736e-05, "loss": 1.7285, "step": 70053 }, { "epoch": 2.33, "grad_norm": 0.6935949921607971, "learning_rate": 7.071829767385022e-05, "loss": 1.6668, "step": 70054 }, { "epoch": 2.33, "grad_norm": 0.6553190350532532, "learning_rate": 7.071155730970745e-05, "loss": 1.6689, "step": 70055 }, { "epoch": 2.33, "grad_norm": 0.7579284310340881, "learning_rate": 7.070481722388689e-05, "loss": 1.7486, "step": 70056 }, { "epoch": 2.33, "grad_norm": 0.668185830116272, "learning_rate": 7.069807741639691e-05, "loss": 1.6572, "step": 70057 }, { "epoch": 2.33, "grad_norm": 0.7216396331787109, "learning_rate": 7.069133788724579e-05, "loss": 1.6909, "step": 70058 }, { "epoch": 2.33, "grad_norm": 0.6773349046707153, "learning_rate": 7.068459863644154e-05, "loss": 1.6526, "step": 70059 }, { "epoch": 2.33, "grad_norm": 0.6595581769943237, "learning_rate": 7.067785966399238e-05, "loss": 1.7439, "step": 70060 }, { "epoch": 2.33, "grad_norm": 0.6754871010780334, "learning_rate": 7.067112096990654e-05, "loss": 1.6942, "step": 70061 }, { "epoch": 2.33, "grad_norm": 0.6702487468719482, "learning_rate": 7.06643825541922e-05, "loss": 1.6652, "step": 70062 }, { "epoch": 2.33, "grad_norm": 0.6754562258720398, "learning_rate": 7.065764441685743e-05, "loss": 1.6912, "step": 70063 }, { "epoch": 2.33, "grad_norm": 0.650418758392334, "learning_rate": 7.065090655791044e-05, "loss": 1.6445, "step": 70064 }, { "epoch": 2.33, "grad_norm": 0.6631258726119995, "learning_rate": 7.064416897735957e-05, "loss": 1.7088, "step": 70065 }, { "epoch": 2.33, "grad_norm": 0.6548927426338196, "learning_rate": 7.063743167521289e-05, "loss": 1.7214, "step": 70066 }, { "epoch": 2.33, "grad_norm": 0.6801847219467163, "learning_rate": 7.063069465147852e-05, "loss": 1.7879, "step": 70067 }, { "epoch": 2.33, "grad_norm": 0.6940276622772217, "learning_rate": 7.062395790616471e-05, "loss": 1.7596, "step": 70068 }, { "epoch": 2.33, "grad_norm": 0.6642524600028992, "learning_rate": 7.061722143927966e-05, "loss": 1.628, "step": 70069 }, { "epoch": 2.33, "grad_norm": 0.6758113503456116, "learning_rate": 7.061048525083142e-05, "loss": 1.7355, "step": 70070 }, { "epoch": 2.33, "grad_norm": 0.6590225100517273, "learning_rate": 7.060374934082832e-05, "loss": 1.6927, "step": 70071 }, { "epoch": 2.33, "grad_norm": 0.7028586268424988, "learning_rate": 7.059701370927837e-05, "loss": 1.7312, "step": 70072 }, { "epoch": 2.33, "grad_norm": 0.6660143733024597, "learning_rate": 7.059027835618997e-05, "loss": 1.7304, "step": 70073 }, { "epoch": 2.33, "grad_norm": 0.6795339584350586, "learning_rate": 7.058354328157107e-05, "loss": 1.7278, "step": 70074 }, { "epoch": 2.33, "grad_norm": 0.6887714862823486, "learning_rate": 7.057680848543002e-05, "loss": 1.6534, "step": 70075 }, { "epoch": 2.33, "grad_norm": 0.669452428817749, "learning_rate": 7.057007396777492e-05, "loss": 1.6821, "step": 70076 }, { "epoch": 2.33, "grad_norm": 0.6818097829818726, "learning_rate": 7.056333972861387e-05, "loss": 1.6395, "step": 70077 }, { "epoch": 2.33, "grad_norm": 0.6665604710578918, "learning_rate": 7.055660576795519e-05, "loss": 1.6867, "step": 70078 }, { "epoch": 2.33, "grad_norm": 0.673992395401001, "learning_rate": 7.054987208580691e-05, "loss": 1.733, "step": 70079 }, { "epoch": 2.33, "grad_norm": 0.687373161315918, "learning_rate": 7.054313868217737e-05, "loss": 1.7022, "step": 70080 }, { "epoch": 2.33, "grad_norm": 0.66084223985672, "learning_rate": 7.053640555707465e-05, "loss": 1.7649, "step": 70081 }, { "epoch": 2.33, "grad_norm": 0.6803345680236816, "learning_rate": 7.052967271050681e-05, "loss": 1.7489, "step": 70082 }, { "epoch": 2.33, "grad_norm": 0.6991196274757385, "learning_rate": 7.052294014248227e-05, "loss": 1.7292, "step": 70083 }, { "epoch": 2.33, "grad_norm": 0.6590745449066162, "learning_rate": 7.051620785300893e-05, "loss": 1.6391, "step": 70084 }, { "epoch": 2.33, "grad_norm": 0.6550124287605286, "learning_rate": 7.05094758420952e-05, "loss": 1.7185, "step": 70085 }, { "epoch": 2.33, "grad_norm": 0.6748224496841431, "learning_rate": 7.050274410974912e-05, "loss": 1.668, "step": 70086 }, { "epoch": 2.33, "grad_norm": 0.6546005010604858, "learning_rate": 7.049601265597892e-05, "loss": 1.6833, "step": 70087 }, { "epoch": 2.33, "grad_norm": 0.6573641300201416, "learning_rate": 7.048928148079276e-05, "loss": 1.6876, "step": 70088 }, { "epoch": 2.33, "grad_norm": 0.6784356832504272, "learning_rate": 7.048255058419871e-05, "loss": 1.6439, "step": 70089 }, { "epoch": 2.33, "grad_norm": 0.6640127301216125, "learning_rate": 7.047581996620513e-05, "loss": 1.6772, "step": 70090 }, { "epoch": 2.33, "grad_norm": 0.6709509491920471, "learning_rate": 7.046908962682005e-05, "loss": 1.7273, "step": 70091 }, { "epoch": 2.33, "grad_norm": 0.6792411804199219, "learning_rate": 7.046235956605165e-05, "loss": 1.6953, "step": 70092 }, { "epoch": 2.33, "grad_norm": 0.6829485297203064, "learning_rate": 7.045562978390805e-05, "loss": 1.6924, "step": 70093 }, { "epoch": 2.33, "grad_norm": 0.6443085670471191, "learning_rate": 7.044890028039761e-05, "loss": 1.6702, "step": 70094 }, { "epoch": 2.33, "grad_norm": 0.6775398850440979, "learning_rate": 7.044217105552841e-05, "loss": 1.7346, "step": 70095 }, { "epoch": 2.33, "grad_norm": 0.674646258354187, "learning_rate": 7.043544210930846e-05, "loss": 1.6475, "step": 70096 }, { "epoch": 2.33, "grad_norm": 0.6704899072647095, "learning_rate": 7.042871344174616e-05, "loss": 1.7095, "step": 70097 }, { "epoch": 2.33, "grad_norm": 0.66161048412323, "learning_rate": 7.04219850528496e-05, "loss": 1.6988, "step": 70098 }, { "epoch": 2.33, "grad_norm": 0.6345804929733276, "learning_rate": 7.041525694262681e-05, "loss": 1.6545, "step": 70099 }, { "epoch": 2.33, "grad_norm": 0.6765342950820923, "learning_rate": 7.04085291110861e-05, "loss": 1.6738, "step": 70100 }, { "epoch": 2.33, "grad_norm": 0.7033709287643433, "learning_rate": 7.040180155823577e-05, "loss": 1.6734, "step": 70101 }, { "epoch": 2.33, "grad_norm": 0.6720380783081055, "learning_rate": 7.039507428408362e-05, "loss": 1.756, "step": 70102 }, { "epoch": 2.33, "grad_norm": 0.6895883679389954, "learning_rate": 7.038834728863804e-05, "loss": 1.646, "step": 70103 }, { "epoch": 2.33, "grad_norm": 0.6560059189796448, "learning_rate": 7.038162057190727e-05, "loss": 1.6656, "step": 70104 }, { "epoch": 2.33, "grad_norm": 0.6628512740135193, "learning_rate": 7.037489413389935e-05, "loss": 1.6679, "step": 70105 }, { "epoch": 2.33, "grad_norm": 0.6765477061271667, "learning_rate": 7.03681679746224e-05, "loss": 1.743, "step": 70106 }, { "epoch": 2.33, "grad_norm": 0.6790502071380615, "learning_rate": 7.036144209408469e-05, "loss": 1.7067, "step": 70107 }, { "epoch": 2.33, "grad_norm": 0.7096343040466309, "learning_rate": 7.035471649229449e-05, "loss": 1.6919, "step": 70108 }, { "epoch": 2.33, "grad_norm": 0.6798043251037598, "learning_rate": 7.034799116925969e-05, "loss": 1.668, "step": 70109 }, { "epoch": 2.33, "grad_norm": 0.6607118248939514, "learning_rate": 7.034126612498857e-05, "loss": 1.6425, "step": 70110 }, { "epoch": 2.33, "grad_norm": 0.6515316963195801, "learning_rate": 7.03345413594894e-05, "loss": 1.6523, "step": 70111 }, { "epoch": 2.33, "grad_norm": 0.6797797679901123, "learning_rate": 7.032781687277025e-05, "loss": 1.704, "step": 70112 }, { "epoch": 2.33, "grad_norm": 0.6879101991653442, "learning_rate": 7.032109266483921e-05, "loss": 1.707, "step": 70113 }, { "epoch": 2.33, "grad_norm": 0.6723957061767578, "learning_rate": 7.03143687357045e-05, "loss": 1.6056, "step": 70114 }, { "epoch": 2.33, "grad_norm": 0.7095988392829895, "learning_rate": 7.030764508537451e-05, "loss": 1.7329, "step": 70115 }, { "epoch": 2.33, "grad_norm": 0.6631011962890625, "learning_rate": 7.030092171385701e-05, "loss": 1.6773, "step": 70116 }, { "epoch": 2.33, "grad_norm": 0.6911921501159668, "learning_rate": 7.029419862116036e-05, "loss": 1.7222, "step": 70117 }, { "epoch": 2.33, "grad_norm": 0.6656025052070618, "learning_rate": 7.028747580729279e-05, "loss": 1.6938, "step": 70118 }, { "epoch": 2.33, "grad_norm": 0.6819450259208679, "learning_rate": 7.028075327226238e-05, "loss": 1.7598, "step": 70119 }, { "epoch": 2.33, "grad_norm": 0.6947942972183228, "learning_rate": 7.027403101607718e-05, "loss": 1.6962, "step": 70120 }, { "epoch": 2.33, "grad_norm": 0.6800366044044495, "learning_rate": 7.026730903874556e-05, "loss": 1.7084, "step": 70121 }, { "epoch": 2.33, "grad_norm": 0.659122884273529, "learning_rate": 7.026058734027556e-05, "loss": 1.6815, "step": 70122 }, { "epoch": 2.33, "grad_norm": 0.6590970754623413, "learning_rate": 7.02538659206753e-05, "loss": 1.7013, "step": 70123 }, { "epoch": 2.33, "grad_norm": 0.6673339009284973, "learning_rate": 7.024714477995295e-05, "loss": 1.6756, "step": 70124 }, { "epoch": 2.33, "grad_norm": 0.6666715145111084, "learning_rate": 7.024042391811686e-05, "loss": 1.7418, "step": 70125 }, { "epoch": 2.33, "grad_norm": 0.6732969284057617, "learning_rate": 7.0233703335175e-05, "loss": 1.6635, "step": 70126 }, { "epoch": 2.33, "grad_norm": 0.6768971681594849, "learning_rate": 7.022698303113548e-05, "loss": 1.6529, "step": 70127 }, { "epoch": 2.33, "grad_norm": 0.6924381256103516, "learning_rate": 7.022026300600666e-05, "loss": 1.7081, "step": 70128 }, { "epoch": 2.33, "grad_norm": 0.6664384603500366, "learning_rate": 7.021354325979655e-05, "loss": 1.6866, "step": 70129 }, { "epoch": 2.33, "grad_norm": 0.6729956269264221, "learning_rate": 7.02068237925133e-05, "loss": 1.715, "step": 70130 }, { "epoch": 2.33, "grad_norm": 0.6402990221977234, "learning_rate": 7.020010460416519e-05, "loss": 1.5842, "step": 70131 }, { "epoch": 2.33, "grad_norm": 0.6759763360023499, "learning_rate": 7.019338569476017e-05, "loss": 1.6368, "step": 70132 }, { "epoch": 2.33, "grad_norm": 0.6776532530784607, "learning_rate": 7.018666706430662e-05, "loss": 1.6872, "step": 70133 }, { "epoch": 2.33, "grad_norm": 0.6685036420822144, "learning_rate": 7.017994871281253e-05, "loss": 1.6786, "step": 70134 }, { "epoch": 2.33, "grad_norm": 0.6833504438400269, "learning_rate": 7.017323064028619e-05, "loss": 1.7309, "step": 70135 }, { "epoch": 2.33, "grad_norm": 0.7086710929870605, "learning_rate": 7.016651284673568e-05, "loss": 1.7319, "step": 70136 }, { "epoch": 2.33, "grad_norm": 0.6630386114120483, "learning_rate": 7.015979533216905e-05, "loss": 1.6989, "step": 70137 }, { "epoch": 2.33, "grad_norm": 0.6711061000823975, "learning_rate": 7.015307809659468e-05, "loss": 1.7316, "step": 70138 }, { "epoch": 2.33, "grad_norm": 0.6961122155189514, "learning_rate": 7.014636114002052e-05, "loss": 1.7287, "step": 70139 }, { "epoch": 2.33, "grad_norm": 0.6826039552688599, "learning_rate": 7.013964446245489e-05, "loss": 1.6992, "step": 70140 }, { "epoch": 2.33, "grad_norm": 0.6687937378883362, "learning_rate": 7.013292806390588e-05, "loss": 1.743, "step": 70141 }, { "epoch": 2.33, "grad_norm": 0.6670112013816833, "learning_rate": 7.012621194438152e-05, "loss": 1.6669, "step": 70142 }, { "epoch": 2.33, "grad_norm": 0.6484026312828064, "learning_rate": 7.011949610389016e-05, "loss": 1.7224, "step": 70143 }, { "epoch": 2.33, "grad_norm": 0.6823536157608032, "learning_rate": 7.011278054243976e-05, "loss": 1.7595, "step": 70144 }, { "epoch": 2.33, "grad_norm": 0.6637585163116455, "learning_rate": 7.010606526003867e-05, "loss": 1.8081, "step": 70145 }, { "epoch": 2.33, "grad_norm": 0.6748284101486206, "learning_rate": 7.009935025669487e-05, "loss": 1.6277, "step": 70146 }, { "epoch": 2.33, "grad_norm": 0.7006492018699646, "learning_rate": 7.009263553241664e-05, "loss": 1.6796, "step": 70147 }, { "epoch": 2.33, "grad_norm": 0.6775485873222351, "learning_rate": 7.008592108721211e-05, "loss": 1.7539, "step": 70148 }, { "epoch": 2.33, "grad_norm": 0.6691137552261353, "learning_rate": 7.007920692108929e-05, "loss": 1.6339, "step": 70149 }, { "epoch": 2.33, "grad_norm": 0.6824433207511902, "learning_rate": 7.007249303405654e-05, "loss": 1.7125, "step": 70150 }, { "epoch": 2.33, "grad_norm": 0.6793045401573181, "learning_rate": 7.006577942612178e-05, "loss": 1.6986, "step": 70151 }, { "epoch": 2.33, "grad_norm": 0.6656481623649597, "learning_rate": 7.00590660972934e-05, "loss": 1.6556, "step": 70152 }, { "epoch": 2.33, "grad_norm": 0.6655527949333191, "learning_rate": 7.005235304757936e-05, "loss": 1.7083, "step": 70153 }, { "epoch": 2.33, "grad_norm": 0.6774038076400757, "learning_rate": 7.004564027698793e-05, "loss": 1.732, "step": 70154 }, { "epoch": 2.33, "grad_norm": 0.6623148918151855, "learning_rate": 7.003892778552723e-05, "loss": 1.6638, "step": 70155 }, { "epoch": 2.33, "grad_norm": 0.6649682521820068, "learning_rate": 7.00322155732053e-05, "loss": 1.5919, "step": 70156 }, { "epoch": 2.33, "grad_norm": 0.6647933721542358, "learning_rate": 7.002550364003039e-05, "loss": 1.711, "step": 70157 }, { "epoch": 2.33, "grad_norm": 0.6716594099998474, "learning_rate": 7.00187919860108e-05, "loss": 1.7015, "step": 70158 }, { "epoch": 2.33, "grad_norm": 0.6577878594398499, "learning_rate": 7.001208061115434e-05, "loss": 1.6919, "step": 70159 }, { "epoch": 2.33, "grad_norm": 0.6796582937240601, "learning_rate": 7.000536951546933e-05, "loss": 1.7322, "step": 70160 }, { "epoch": 2.33, "grad_norm": 0.6690893769264221, "learning_rate": 6.999865869896397e-05, "loss": 1.709, "step": 70161 }, { "epoch": 2.33, "grad_norm": 0.6845124959945679, "learning_rate": 6.999194816164638e-05, "loss": 1.6921, "step": 70162 }, { "epoch": 2.33, "grad_norm": 0.6769061088562012, "learning_rate": 6.99852379035246e-05, "loss": 1.7143, "step": 70163 }, { "epoch": 2.33, "grad_norm": 0.6949217915534973, "learning_rate": 6.997852792460683e-05, "loss": 1.6878, "step": 70164 }, { "epoch": 2.33, "grad_norm": 0.6637023687362671, "learning_rate": 6.997181822490139e-05, "loss": 1.6948, "step": 70165 }, { "epoch": 2.33, "grad_norm": 0.6745495200157166, "learning_rate": 6.996510880441613e-05, "loss": 1.6577, "step": 70166 }, { "epoch": 2.33, "grad_norm": 0.6962636113166809, "learning_rate": 6.995839966315931e-05, "loss": 1.7492, "step": 70167 }, { "epoch": 2.33, "grad_norm": 0.6620151996612549, "learning_rate": 6.995169080113919e-05, "loss": 1.7232, "step": 70168 }, { "epoch": 2.33, "grad_norm": 0.6703862547874451, "learning_rate": 6.994498221836382e-05, "loss": 1.6519, "step": 70169 }, { "epoch": 2.33, "grad_norm": 0.6500848531723022, "learning_rate": 6.993827391484124e-05, "loss": 1.6738, "step": 70170 }, { "epoch": 2.33, "grad_norm": 0.7081627249717712, "learning_rate": 6.993156589057972e-05, "loss": 1.7168, "step": 70171 }, { "epoch": 2.33, "grad_norm": 0.673759937286377, "learning_rate": 6.992485814558757e-05, "loss": 1.6217, "step": 70172 }, { "epoch": 2.33, "grad_norm": 0.6801245808601379, "learning_rate": 6.991815067987255e-05, "loss": 1.709, "step": 70173 }, { "epoch": 2.33, "grad_norm": 0.7012721300125122, "learning_rate": 6.9911443493443e-05, "loss": 1.6836, "step": 70174 }, { "epoch": 2.33, "grad_norm": 0.6588613390922546, "learning_rate": 6.990473658630713e-05, "loss": 1.6743, "step": 70175 }, { "epoch": 2.33, "grad_norm": 0.6679482460021973, "learning_rate": 6.989802995847301e-05, "loss": 1.6658, "step": 70176 }, { "epoch": 2.33, "grad_norm": 0.7094952464103699, "learning_rate": 6.98913236099487e-05, "loss": 1.6227, "step": 70177 }, { "epoch": 2.33, "grad_norm": 0.653323233127594, "learning_rate": 6.98846175407425e-05, "loss": 1.6558, "step": 70178 }, { "epoch": 2.33, "grad_norm": 0.6724932789802551, "learning_rate": 6.987791175086248e-05, "loss": 1.6882, "step": 70179 }, { "epoch": 2.33, "grad_norm": 0.6522018909454346, "learning_rate": 6.987120624031666e-05, "loss": 1.6189, "step": 70180 }, { "epoch": 2.33, "grad_norm": 0.6584543585777283, "learning_rate": 6.986450100911329e-05, "loss": 1.6701, "step": 70181 }, { "epoch": 2.33, "grad_norm": 0.6894383430480957, "learning_rate": 6.985779605726062e-05, "loss": 1.6963, "step": 70182 }, { "epoch": 2.33, "grad_norm": 0.6744678616523743, "learning_rate": 6.985109138476665e-05, "loss": 1.7291, "step": 70183 }, { "epoch": 2.34, "grad_norm": 0.69992595911026, "learning_rate": 6.984438699163946e-05, "loss": 1.6174, "step": 70184 }, { "epoch": 2.34, "grad_norm": 0.6934646964073181, "learning_rate": 6.983768287788735e-05, "loss": 1.7316, "step": 70185 }, { "epoch": 2.34, "grad_norm": 0.6813637018203735, "learning_rate": 6.983097904351839e-05, "loss": 1.7174, "step": 70186 }, { "epoch": 2.34, "grad_norm": 0.6783444285392761, "learning_rate": 6.982427548854063e-05, "loss": 1.6531, "step": 70187 }, { "epoch": 2.34, "grad_norm": 0.650729775428772, "learning_rate": 6.981757221296236e-05, "loss": 1.6424, "step": 70188 }, { "epoch": 2.34, "grad_norm": 0.67730712890625, "learning_rate": 6.981086921679157e-05, "loss": 1.6365, "step": 70189 }, { "epoch": 2.34, "grad_norm": 0.6817059516906738, "learning_rate": 6.980416650003655e-05, "loss": 1.7452, "step": 70190 }, { "epoch": 2.34, "grad_norm": 0.6685672998428345, "learning_rate": 6.979746406270528e-05, "loss": 1.704, "step": 70191 }, { "epoch": 2.34, "grad_norm": 0.6844300031661987, "learning_rate": 6.979076190480602e-05, "loss": 1.7096, "step": 70192 }, { "epoch": 2.34, "grad_norm": 0.6601671576499939, "learning_rate": 6.978406002634688e-05, "loss": 1.6571, "step": 70193 }, { "epoch": 2.34, "grad_norm": 0.665937066078186, "learning_rate": 6.977735842733586e-05, "loss": 1.7643, "step": 70194 }, { "epoch": 2.34, "grad_norm": 0.6707269549369812, "learning_rate": 6.977065710778132e-05, "loss": 1.6591, "step": 70195 }, { "epoch": 2.34, "grad_norm": 0.6483122706413269, "learning_rate": 6.976395606769118e-05, "loss": 1.7104, "step": 70196 }, { "epoch": 2.34, "grad_norm": 0.6573939919471741, "learning_rate": 6.975725530707377e-05, "loss": 1.66, "step": 70197 }, { "epoch": 2.34, "grad_norm": 0.6958025693893433, "learning_rate": 6.975055482593713e-05, "loss": 1.6391, "step": 70198 }, { "epoch": 2.34, "grad_norm": 0.6557103395462036, "learning_rate": 6.974385462428929e-05, "loss": 1.6457, "step": 70199 }, { "epoch": 2.34, "grad_norm": 0.6722313761711121, "learning_rate": 6.973715470213859e-05, "loss": 1.7197, "step": 70200 }, { "epoch": 2.34, "grad_norm": 0.6548227071762085, "learning_rate": 6.973045505949293e-05, "loss": 1.6889, "step": 70201 }, { "epoch": 2.34, "grad_norm": 0.6747954487800598, "learning_rate": 6.972375569636068e-05, "loss": 1.6806, "step": 70202 }, { "epoch": 2.34, "grad_norm": 0.7006929516792297, "learning_rate": 6.971705661274978e-05, "loss": 1.6964, "step": 70203 }, { "epoch": 2.34, "grad_norm": 0.6815433502197266, "learning_rate": 6.971035780866854e-05, "loss": 1.6375, "step": 70204 }, { "epoch": 2.34, "grad_norm": 0.6952953338623047, "learning_rate": 6.970365928412495e-05, "loss": 1.6536, "step": 70205 }, { "epoch": 2.34, "grad_norm": 0.6743609309196472, "learning_rate": 6.969696103912714e-05, "loss": 1.6796, "step": 70206 }, { "epoch": 2.34, "grad_norm": 0.6587972044944763, "learning_rate": 6.969026307368335e-05, "loss": 1.7097, "step": 70207 }, { "epoch": 2.34, "grad_norm": 0.7061967253684998, "learning_rate": 6.968356538780167e-05, "loss": 1.7485, "step": 70208 }, { "epoch": 2.34, "grad_norm": 0.6657378673553467, "learning_rate": 6.96768679814901e-05, "loss": 1.6777, "step": 70209 }, { "epoch": 2.34, "grad_norm": 0.6638857126235962, "learning_rate": 6.967017085475689e-05, "loss": 1.7072, "step": 70210 }, { "epoch": 2.34, "grad_norm": 0.6738337874412537, "learning_rate": 6.966347400761024e-05, "loss": 1.7309, "step": 70211 }, { "epoch": 2.34, "grad_norm": 2.2144997119903564, "learning_rate": 6.965677744005818e-05, "loss": 1.7209, "step": 70212 }, { "epoch": 2.34, "grad_norm": 0.6593724489212036, "learning_rate": 6.965008115210878e-05, "loss": 1.6347, "step": 70213 }, { "epoch": 2.34, "grad_norm": 0.7036938667297363, "learning_rate": 6.964338514377031e-05, "loss": 1.7014, "step": 70214 }, { "epoch": 2.34, "grad_norm": 0.6837742328643799, "learning_rate": 6.963668941505084e-05, "loss": 1.705, "step": 70215 }, { "epoch": 2.34, "grad_norm": 0.702233612537384, "learning_rate": 6.962999396595839e-05, "loss": 1.7141, "step": 70216 }, { "epoch": 2.34, "grad_norm": 0.7047635316848755, "learning_rate": 6.962329879650122e-05, "loss": 1.6491, "step": 70217 }, { "epoch": 2.34, "grad_norm": 0.6975042819976807, "learning_rate": 6.961660390668755e-05, "loss": 1.7799, "step": 70218 }, { "epoch": 2.34, "grad_norm": 0.685073733329773, "learning_rate": 6.960990929652522e-05, "loss": 1.7021, "step": 70219 }, { "epoch": 2.34, "grad_norm": 0.6702264547348022, "learning_rate": 6.960321496602253e-05, "loss": 1.658, "step": 70220 }, { "epoch": 2.34, "grad_norm": 0.6726056933403015, "learning_rate": 6.959652091518764e-05, "loss": 1.7002, "step": 70221 }, { "epoch": 2.34, "grad_norm": 0.6757780909538269, "learning_rate": 6.958982714402866e-05, "loss": 1.6638, "step": 70222 }, { "epoch": 2.34, "grad_norm": 0.6753149032592773, "learning_rate": 6.958313365255357e-05, "loss": 1.7122, "step": 70223 }, { "epoch": 2.34, "grad_norm": 0.6806361079216003, "learning_rate": 6.957644044077062e-05, "loss": 1.7198, "step": 70224 }, { "epoch": 2.34, "grad_norm": 0.6852676272392273, "learning_rate": 6.95697475086881e-05, "loss": 1.7558, "step": 70225 }, { "epoch": 2.34, "grad_norm": 0.6886175274848938, "learning_rate": 6.956305485631377e-05, "loss": 1.6946, "step": 70226 }, { "epoch": 2.34, "grad_norm": 0.678598940372467, "learning_rate": 6.955636248365592e-05, "loss": 1.6625, "step": 70227 }, { "epoch": 2.34, "grad_norm": 0.6559703946113586, "learning_rate": 6.954967039072279e-05, "loss": 1.7439, "step": 70228 }, { "epoch": 2.34, "grad_norm": 0.6849305033683777, "learning_rate": 6.954297857752242e-05, "loss": 1.6965, "step": 70229 }, { "epoch": 2.34, "grad_norm": 0.6810115575790405, "learning_rate": 6.953628704406283e-05, "loss": 1.6181, "step": 70230 }, { "epoch": 2.34, "grad_norm": 0.6589859127998352, "learning_rate": 6.952959579035221e-05, "loss": 1.6798, "step": 70231 }, { "epoch": 2.34, "grad_norm": 1.2096948623657227, "learning_rate": 6.952290481639888e-05, "loss": 1.6944, "step": 70232 }, { "epoch": 2.34, "grad_norm": 0.6611722707748413, "learning_rate": 6.951621412221063e-05, "loss": 1.5925, "step": 70233 }, { "epoch": 2.34, "grad_norm": 0.7136799097061157, "learning_rate": 6.950952370779572e-05, "loss": 1.6899, "step": 70234 }, { "epoch": 2.34, "grad_norm": 0.6679527163505554, "learning_rate": 6.950283357316241e-05, "loss": 1.6965, "step": 70235 }, { "epoch": 2.34, "grad_norm": 0.6630276441574097, "learning_rate": 6.949614371831866e-05, "loss": 1.7091, "step": 70236 }, { "epoch": 2.34, "grad_norm": 0.6722449660301208, "learning_rate": 6.948945414327255e-05, "loss": 1.6873, "step": 70237 }, { "epoch": 2.34, "grad_norm": 0.6898381114006042, "learning_rate": 6.948276484803236e-05, "loss": 1.73, "step": 70238 }, { "epoch": 2.34, "grad_norm": 0.7008534073829651, "learning_rate": 6.947607583260614e-05, "loss": 1.7793, "step": 70239 }, { "epoch": 2.34, "grad_norm": 0.6780565977096558, "learning_rate": 6.946938709700188e-05, "loss": 1.7416, "step": 70240 }, { "epoch": 2.34, "grad_norm": 0.6744874119758606, "learning_rate": 6.946269864122786e-05, "loss": 1.6392, "step": 70241 }, { "epoch": 2.34, "grad_norm": 0.6641340255737305, "learning_rate": 6.945601046529221e-05, "loss": 1.6538, "step": 70242 }, { "epoch": 2.34, "grad_norm": 0.6807974576950073, "learning_rate": 6.944932256920302e-05, "loss": 1.6114, "step": 70243 }, { "epoch": 2.34, "grad_norm": 0.6799083948135376, "learning_rate": 6.944263495296826e-05, "loss": 1.7222, "step": 70244 }, { "epoch": 2.34, "grad_norm": 0.660250186920166, "learning_rate": 6.94359476165963e-05, "loss": 1.6988, "step": 70245 }, { "epoch": 2.34, "grad_norm": 0.6738383173942566, "learning_rate": 6.94292605600951e-05, "loss": 1.7047, "step": 70246 }, { "epoch": 2.34, "grad_norm": 0.6879265904426575, "learning_rate": 6.942257378347272e-05, "loss": 1.7116, "step": 70247 }, { "epoch": 2.34, "grad_norm": 0.6751678586006165, "learning_rate": 6.941588728673746e-05, "loss": 1.6822, "step": 70248 }, { "epoch": 2.34, "grad_norm": 0.7348781824111938, "learning_rate": 6.940920106989724e-05, "loss": 1.7432, "step": 70249 }, { "epoch": 2.34, "grad_norm": 0.6907110810279846, "learning_rate": 6.940251513296037e-05, "loss": 1.6948, "step": 70250 }, { "epoch": 2.34, "grad_norm": 0.6812077760696411, "learning_rate": 6.939582947593478e-05, "loss": 1.6728, "step": 70251 }, { "epoch": 2.34, "grad_norm": 0.6749293804168701, "learning_rate": 6.938914409882874e-05, "loss": 1.6923, "step": 70252 }, { "epoch": 2.34, "grad_norm": 0.6827946901321411, "learning_rate": 6.938245900165035e-05, "loss": 1.728, "step": 70253 }, { "epoch": 2.34, "grad_norm": 0.68174147605896, "learning_rate": 6.937577418440756e-05, "loss": 1.6981, "step": 70254 }, { "epoch": 2.34, "grad_norm": 0.6835006475448608, "learning_rate": 6.936908964710867e-05, "loss": 1.6079, "step": 70255 }, { "epoch": 2.34, "grad_norm": 0.6660280227661133, "learning_rate": 6.936240538976165e-05, "loss": 1.7246, "step": 70256 }, { "epoch": 2.34, "grad_norm": 0.6735342144966125, "learning_rate": 6.935572141237479e-05, "loss": 1.7205, "step": 70257 }, { "epoch": 2.34, "grad_norm": 0.677276074886322, "learning_rate": 6.934903771495607e-05, "loss": 1.6734, "step": 70258 }, { "epoch": 2.34, "grad_norm": 0.6783795356750488, "learning_rate": 6.934235429751356e-05, "loss": 1.6499, "step": 70259 }, { "epoch": 2.34, "grad_norm": 0.6745539307594299, "learning_rate": 6.933567116005555e-05, "loss": 1.7414, "step": 70260 }, { "epoch": 2.34, "grad_norm": 0.6865347027778625, "learning_rate": 6.932898830258994e-05, "loss": 1.6533, "step": 70261 }, { "epoch": 2.34, "grad_norm": 0.6658583283424377, "learning_rate": 6.932230572512505e-05, "loss": 1.7196, "step": 70262 }, { "epoch": 2.34, "grad_norm": 0.6893906593322754, "learning_rate": 6.931562342766878e-05, "loss": 1.6883, "step": 70263 }, { "epoch": 2.34, "grad_norm": 0.7010388374328613, "learning_rate": 6.930894141022948e-05, "loss": 1.7312, "step": 70264 }, { "epoch": 2.34, "grad_norm": 0.6716845631599426, "learning_rate": 6.930225967281509e-05, "loss": 1.6815, "step": 70265 }, { "epoch": 2.34, "grad_norm": 0.6847183108329773, "learning_rate": 6.929557821543372e-05, "loss": 1.6979, "step": 70266 }, { "epoch": 2.34, "grad_norm": 0.6666721105575562, "learning_rate": 6.928889703809358e-05, "loss": 1.6959, "step": 70267 }, { "epoch": 2.34, "grad_norm": 0.6668241024017334, "learning_rate": 6.928221614080266e-05, "loss": 1.6581, "step": 70268 }, { "epoch": 2.34, "grad_norm": 0.6863619685173035, "learning_rate": 6.927553552356922e-05, "loss": 1.6665, "step": 70269 }, { "epoch": 2.34, "grad_norm": 0.6827451586723328, "learning_rate": 6.926885518640117e-05, "loss": 1.735, "step": 70270 }, { "epoch": 2.34, "grad_norm": 0.6913702487945557, "learning_rate": 6.926217512930686e-05, "loss": 1.6413, "step": 70271 }, { "epoch": 2.34, "grad_norm": 0.6777383089065552, "learning_rate": 6.925549535229424e-05, "loss": 1.7024, "step": 70272 }, { "epoch": 2.34, "grad_norm": 0.6887423396110535, "learning_rate": 6.924881585537138e-05, "loss": 1.7155, "step": 70273 }, { "epoch": 2.34, "grad_norm": 0.6472834348678589, "learning_rate": 6.924213663854645e-05, "loss": 1.6906, "step": 70274 }, { "epoch": 2.34, "grad_norm": 0.6672959327697754, "learning_rate": 6.923545770182773e-05, "loss": 1.6826, "step": 70275 }, { "epoch": 2.34, "grad_norm": 0.6872593760490417, "learning_rate": 6.922877904522302e-05, "loss": 1.663, "step": 70276 }, { "epoch": 2.34, "grad_norm": 0.7146255373954773, "learning_rate": 6.922210066874055e-05, "loss": 1.7067, "step": 70277 }, { "epoch": 2.34, "grad_norm": 0.6637256145477295, "learning_rate": 6.921542257238855e-05, "loss": 1.669, "step": 70278 }, { "epoch": 2.34, "grad_norm": 0.7366980910301208, "learning_rate": 6.920874475617499e-05, "loss": 1.8046, "step": 70279 }, { "epoch": 2.34, "grad_norm": 0.674626350402832, "learning_rate": 6.920206722010796e-05, "loss": 1.6356, "step": 70280 }, { "epoch": 2.34, "grad_norm": 0.7012506723403931, "learning_rate": 6.919538996419557e-05, "loss": 1.6994, "step": 70281 }, { "epoch": 2.34, "grad_norm": 0.6745249629020691, "learning_rate": 6.918871298844617e-05, "loss": 1.6608, "step": 70282 }, { "epoch": 2.34, "grad_norm": 0.663749635219574, "learning_rate": 6.918203629286749e-05, "loss": 1.6425, "step": 70283 }, { "epoch": 2.34, "grad_norm": 0.687925398349762, "learning_rate": 6.917535987746779e-05, "loss": 1.6861, "step": 70284 }, { "epoch": 2.34, "grad_norm": 0.6708000898361206, "learning_rate": 6.91686837422553e-05, "loss": 1.6794, "step": 70285 }, { "epoch": 2.34, "grad_norm": 0.6988491415977478, "learning_rate": 6.916200788723801e-05, "loss": 1.7432, "step": 70286 }, { "epoch": 2.34, "grad_norm": 0.6447927951812744, "learning_rate": 6.915533231242395e-05, "loss": 1.6305, "step": 70287 }, { "epoch": 2.34, "grad_norm": 0.6808748841285706, "learning_rate": 6.91486570178213e-05, "loss": 1.6698, "step": 70288 }, { "epoch": 2.34, "grad_norm": 0.6663604378700256, "learning_rate": 6.914198200343834e-05, "loss": 1.7598, "step": 70289 }, { "epoch": 2.34, "grad_norm": 0.6583914756774902, "learning_rate": 6.913530726928282e-05, "loss": 1.6952, "step": 70290 }, { "epoch": 2.34, "grad_norm": 0.659862220287323, "learning_rate": 6.912863281536304e-05, "loss": 1.6805, "step": 70291 }, { "epoch": 2.34, "grad_norm": 0.6754146218299866, "learning_rate": 6.912195864168714e-05, "loss": 1.7337, "step": 70292 }, { "epoch": 2.34, "grad_norm": 0.670401930809021, "learning_rate": 6.911528474826318e-05, "loss": 1.7431, "step": 70293 }, { "epoch": 2.34, "grad_norm": 0.6923179626464844, "learning_rate": 6.910861113509915e-05, "loss": 1.7434, "step": 70294 }, { "epoch": 2.34, "grad_norm": 0.6620553731918335, "learning_rate": 6.910193780220334e-05, "loss": 1.7236, "step": 70295 }, { "epoch": 2.34, "grad_norm": 0.6624078750610352, "learning_rate": 6.909526474958374e-05, "loss": 1.7387, "step": 70296 }, { "epoch": 2.34, "grad_norm": 0.6846193075180054, "learning_rate": 6.908859197724839e-05, "loss": 1.7038, "step": 70297 }, { "epoch": 2.34, "grad_norm": 0.7016839981079102, "learning_rate": 6.908191948520546e-05, "loss": 1.6848, "step": 70298 }, { "epoch": 2.34, "grad_norm": 0.6757246255874634, "learning_rate": 6.907524727346315e-05, "loss": 1.7077, "step": 70299 }, { "epoch": 2.34, "grad_norm": 0.6891770362854004, "learning_rate": 6.906857534202948e-05, "loss": 1.7463, "step": 70300 }, { "epoch": 2.34, "grad_norm": 0.6873725056648254, "learning_rate": 6.906190369091242e-05, "loss": 1.7153, "step": 70301 }, { "epoch": 2.34, "grad_norm": 0.6790255308151245, "learning_rate": 6.905523232012029e-05, "loss": 1.722, "step": 70302 }, { "epoch": 2.34, "grad_norm": 0.6923869252204895, "learning_rate": 6.904856122966109e-05, "loss": 1.6481, "step": 70303 }, { "epoch": 2.34, "grad_norm": 0.6679004430770874, "learning_rate": 6.904189041954279e-05, "loss": 1.7569, "step": 70304 }, { "epoch": 2.34, "grad_norm": 0.6540600061416626, "learning_rate": 6.903521988977371e-05, "loss": 1.7068, "step": 70305 }, { "epoch": 2.34, "grad_norm": 0.6810978651046753, "learning_rate": 6.902854964036172e-05, "loss": 1.722, "step": 70306 }, { "epoch": 2.34, "grad_norm": 0.6561368107795715, "learning_rate": 6.902187967131519e-05, "loss": 1.6087, "step": 70307 }, { "epoch": 2.34, "grad_norm": 0.6654022932052612, "learning_rate": 6.901520998264193e-05, "loss": 1.632, "step": 70308 }, { "epoch": 2.34, "grad_norm": 0.6497035622596741, "learning_rate": 6.900854057435026e-05, "loss": 1.6685, "step": 70309 }, { "epoch": 2.34, "grad_norm": 0.6849953532218933, "learning_rate": 6.900187144644819e-05, "loss": 1.7283, "step": 70310 }, { "epoch": 2.34, "grad_norm": 0.6727485656738281, "learning_rate": 6.899520259894374e-05, "loss": 1.6468, "step": 70311 }, { "epoch": 2.34, "grad_norm": 0.6967663764953613, "learning_rate": 6.898853403184515e-05, "loss": 1.7825, "step": 70312 }, { "epoch": 2.34, "grad_norm": 0.6715755462646484, "learning_rate": 6.898186574516036e-05, "loss": 1.7003, "step": 70313 }, { "epoch": 2.34, "grad_norm": 0.6724503636360168, "learning_rate": 6.897519773889765e-05, "loss": 1.6832, "step": 70314 }, { "epoch": 2.34, "grad_norm": 0.6921048760414124, "learning_rate": 6.896853001306498e-05, "loss": 1.7129, "step": 70315 }, { "epoch": 2.34, "grad_norm": 0.6817864179611206, "learning_rate": 6.896186256767043e-05, "loss": 1.7463, "step": 70316 }, { "epoch": 2.34, "grad_norm": 0.6977475881576538, "learning_rate": 6.895519540272217e-05, "loss": 1.6916, "step": 70317 }, { "epoch": 2.34, "grad_norm": 0.6831485629081726, "learning_rate": 6.89485285182282e-05, "loss": 1.7289, "step": 70318 }, { "epoch": 2.34, "grad_norm": 0.6633800864219666, "learning_rate": 6.894186191419676e-05, "loss": 1.7483, "step": 70319 }, { "epoch": 2.34, "grad_norm": 0.698276162147522, "learning_rate": 6.893519559063575e-05, "loss": 1.7474, "step": 70320 }, { "epoch": 2.34, "grad_norm": 0.693781316280365, "learning_rate": 6.892852954755349e-05, "loss": 1.6483, "step": 70321 }, { "epoch": 2.34, "grad_norm": 0.6558303833007812, "learning_rate": 6.892186378495793e-05, "loss": 1.656, "step": 70322 }, { "epoch": 2.34, "grad_norm": 0.6820727586746216, "learning_rate": 6.891519830285707e-05, "loss": 1.6922, "step": 70323 }, { "epoch": 2.34, "grad_norm": 0.6838721632957458, "learning_rate": 6.890853310125922e-05, "loss": 1.6086, "step": 70324 }, { "epoch": 2.34, "grad_norm": 0.6685498356819153, "learning_rate": 6.890186818017236e-05, "loss": 1.6537, "step": 70325 }, { "epoch": 2.34, "grad_norm": 0.6778278946876526, "learning_rate": 6.88952035396045e-05, "loss": 1.7278, "step": 70326 }, { "epoch": 2.34, "grad_norm": 0.6716809272766113, "learning_rate": 6.888853917956378e-05, "loss": 1.6994, "step": 70327 }, { "epoch": 2.34, "grad_norm": 0.681566059589386, "learning_rate": 6.888187510005846e-05, "loss": 1.6436, "step": 70328 }, { "epoch": 2.34, "grad_norm": 0.6666823625564575, "learning_rate": 6.887521130109646e-05, "loss": 1.706, "step": 70329 }, { "epoch": 2.34, "grad_norm": 0.6895546317100525, "learning_rate": 6.886854778268581e-05, "loss": 1.6889, "step": 70330 }, { "epoch": 2.34, "grad_norm": 0.6806294918060303, "learning_rate": 6.886188454483476e-05, "loss": 1.6935, "step": 70331 }, { "epoch": 2.34, "grad_norm": 0.6954676508903503, "learning_rate": 6.885522158755134e-05, "loss": 1.6658, "step": 70332 }, { "epoch": 2.34, "grad_norm": 0.6971844434738159, "learning_rate": 6.884855891084357e-05, "loss": 1.8024, "step": 70333 }, { "epoch": 2.34, "grad_norm": 0.6963695883750916, "learning_rate": 6.884189651471954e-05, "loss": 1.684, "step": 70334 }, { "epoch": 2.34, "grad_norm": 0.6890650391578674, "learning_rate": 6.88352343991876e-05, "loss": 1.6645, "step": 70335 }, { "epoch": 2.34, "grad_norm": 0.6635776162147522, "learning_rate": 6.882857256425543e-05, "loss": 1.68, "step": 70336 }, { "epoch": 2.34, "grad_norm": 0.6862834692001343, "learning_rate": 6.882191100993131e-05, "loss": 1.7247, "step": 70337 }, { "epoch": 2.34, "grad_norm": 0.6738330125808716, "learning_rate": 6.881524973622341e-05, "loss": 1.6753, "step": 70338 }, { "epoch": 2.34, "grad_norm": 0.6821627020835876, "learning_rate": 6.880858874313974e-05, "loss": 1.6228, "step": 70339 }, { "epoch": 2.34, "grad_norm": 0.6929669380187988, "learning_rate": 6.880192803068831e-05, "loss": 1.6387, "step": 70340 }, { "epoch": 2.34, "grad_norm": 0.6860125064849854, "learning_rate": 6.879526759887727e-05, "loss": 1.6512, "step": 70341 }, { "epoch": 2.34, "grad_norm": 0.6754756569862366, "learning_rate": 6.878860744771486e-05, "loss": 1.7533, "step": 70342 }, { "epoch": 2.34, "grad_norm": 0.6804783940315247, "learning_rate": 6.878194757720886e-05, "loss": 1.7231, "step": 70343 }, { "epoch": 2.34, "grad_norm": 0.6790239214897156, "learning_rate": 6.877528798736746e-05, "loss": 1.6872, "step": 70344 }, { "epoch": 2.34, "grad_norm": 0.6700199246406555, "learning_rate": 6.876862867819894e-05, "loss": 1.7224, "step": 70345 }, { "epoch": 2.34, "grad_norm": 0.670559823513031, "learning_rate": 6.87619696497112e-05, "loss": 1.7598, "step": 70346 }, { "epoch": 2.34, "grad_norm": 0.6739064455032349, "learning_rate": 6.87553109019123e-05, "loss": 1.6479, "step": 70347 }, { "epoch": 2.34, "grad_norm": 0.692503035068512, "learning_rate": 6.874865243481035e-05, "loss": 1.7736, "step": 70348 }, { "epoch": 2.34, "grad_norm": 0.6811544299125671, "learning_rate": 6.874199424841366e-05, "loss": 1.6554, "step": 70349 }, { "epoch": 2.34, "grad_norm": 0.6857577562332153, "learning_rate": 6.873533634272992e-05, "loss": 1.6632, "step": 70350 }, { "epoch": 2.34, "grad_norm": 0.6751648783683777, "learning_rate": 6.87286787177674e-05, "loss": 1.7274, "step": 70351 }, { "epoch": 2.34, "grad_norm": 0.6565871238708496, "learning_rate": 6.872202137353431e-05, "loss": 1.7037, "step": 70352 }, { "epoch": 2.34, "grad_norm": 0.6929782032966614, "learning_rate": 6.871536431003859e-05, "loss": 1.6674, "step": 70353 }, { "epoch": 2.34, "grad_norm": 0.6679164171218872, "learning_rate": 6.870870752728823e-05, "loss": 1.6501, "step": 70354 }, { "epoch": 2.34, "grad_norm": 0.6844735741615295, "learning_rate": 6.870205102529153e-05, "loss": 1.6912, "step": 70355 }, { "epoch": 2.34, "grad_norm": 0.6622695922851562, "learning_rate": 6.869539480405646e-05, "loss": 1.639, "step": 70356 }, { "epoch": 2.34, "grad_norm": 0.7074647545814514, "learning_rate": 6.868873886359101e-05, "loss": 1.6809, "step": 70357 }, { "epoch": 2.34, "grad_norm": 0.6742560267448425, "learning_rate": 6.868208320390334e-05, "loss": 1.7117, "step": 70358 }, { "epoch": 2.34, "grad_norm": 0.6867061853408813, "learning_rate": 6.867542782500165e-05, "loss": 1.718, "step": 70359 }, { "epoch": 2.34, "grad_norm": 0.6771720051765442, "learning_rate": 6.866877272689388e-05, "loss": 1.7321, "step": 70360 }, { "epoch": 2.34, "grad_norm": 0.6650780439376831, "learning_rate": 6.866211790958806e-05, "loss": 1.6348, "step": 70361 }, { "epoch": 2.34, "grad_norm": 0.6670962572097778, "learning_rate": 6.865546337309242e-05, "loss": 1.7094, "step": 70362 }, { "epoch": 2.34, "grad_norm": 0.6618964672088623, "learning_rate": 6.864880911741498e-05, "loss": 1.651, "step": 70363 }, { "epoch": 2.34, "grad_norm": 0.689561665058136, "learning_rate": 6.864215514256371e-05, "loss": 1.7139, "step": 70364 }, { "epoch": 2.34, "grad_norm": 0.6904877424240112, "learning_rate": 6.863550144854685e-05, "loss": 1.7238, "step": 70365 }, { "epoch": 2.34, "grad_norm": 0.6669685244560242, "learning_rate": 6.86288480353723e-05, "loss": 1.6965, "step": 70366 }, { "epoch": 2.34, "grad_norm": 0.684870183467865, "learning_rate": 6.862219490304834e-05, "loss": 1.6535, "step": 70367 }, { "epoch": 2.34, "grad_norm": 0.645598292350769, "learning_rate": 6.861554205158285e-05, "loss": 1.6647, "step": 70368 }, { "epoch": 2.34, "grad_norm": 0.6725159883499146, "learning_rate": 6.860888948098412e-05, "loss": 1.7115, "step": 70369 }, { "epoch": 2.34, "grad_norm": 0.6804308891296387, "learning_rate": 6.860223719126005e-05, "loss": 1.7126, "step": 70370 }, { "epoch": 2.34, "grad_norm": 0.6642897129058838, "learning_rate": 6.859558518241872e-05, "loss": 1.6629, "step": 70371 }, { "epoch": 2.34, "grad_norm": 0.6692205667495728, "learning_rate": 6.858893345446835e-05, "loss": 1.6868, "step": 70372 }, { "epoch": 2.34, "grad_norm": 0.6616582274436951, "learning_rate": 6.85822820074168e-05, "loss": 1.7022, "step": 70373 }, { "epoch": 2.34, "grad_norm": 0.6796743869781494, "learning_rate": 6.857563084127235e-05, "loss": 1.6978, "step": 70374 }, { "epoch": 2.34, "grad_norm": 0.6635839939117432, "learning_rate": 6.8568979956043e-05, "loss": 1.6927, "step": 70375 }, { "epoch": 2.34, "grad_norm": 0.6620864868164062, "learning_rate": 6.85623293517367e-05, "loss": 1.6956, "step": 70376 }, { "epoch": 2.34, "grad_norm": 0.6883438229560852, "learning_rate": 6.855567902836175e-05, "loss": 1.6883, "step": 70377 }, { "epoch": 2.34, "grad_norm": 0.6918959021568298, "learning_rate": 6.854902898592599e-05, "loss": 1.6987, "step": 70378 }, { "epoch": 2.34, "grad_norm": 0.6576507687568665, "learning_rate": 6.854237922443771e-05, "loss": 1.6258, "step": 70379 }, { "epoch": 2.34, "grad_norm": 0.6744668483734131, "learning_rate": 6.853572974390478e-05, "loss": 1.6612, "step": 70380 }, { "epoch": 2.34, "grad_norm": 0.6751282215118408, "learning_rate": 6.852908054433546e-05, "loss": 1.7564, "step": 70381 }, { "epoch": 2.34, "grad_norm": 0.6984401345252991, "learning_rate": 6.852243162573773e-05, "loss": 1.6609, "step": 70382 }, { "epoch": 2.34, "grad_norm": 0.6903322339057922, "learning_rate": 6.851578298811959e-05, "loss": 1.7425, "step": 70383 }, { "epoch": 2.34, "grad_norm": 0.6684999465942383, "learning_rate": 6.850913463148926e-05, "loss": 1.6968, "step": 70384 }, { "epoch": 2.34, "grad_norm": 0.6745896339416504, "learning_rate": 6.850248655585473e-05, "loss": 1.7042, "step": 70385 }, { "epoch": 2.34, "grad_norm": 0.7215116620063782, "learning_rate": 6.849583876122398e-05, "loss": 1.7576, "step": 70386 }, { "epoch": 2.34, "grad_norm": 0.707139253616333, "learning_rate": 6.84891912476052e-05, "loss": 1.6008, "step": 70387 }, { "epoch": 2.34, "grad_norm": 0.6530727744102478, "learning_rate": 6.848254401500646e-05, "loss": 1.7023, "step": 70388 }, { "epoch": 2.34, "grad_norm": 0.6961984634399414, "learning_rate": 6.847589706343585e-05, "loss": 1.7253, "step": 70389 }, { "epoch": 2.34, "grad_norm": 0.6868338584899902, "learning_rate": 6.846925039290125e-05, "loss": 1.7123, "step": 70390 }, { "epoch": 2.34, "grad_norm": 0.6609653830528259, "learning_rate": 6.846260400341091e-05, "loss": 1.6592, "step": 70391 }, { "epoch": 2.34, "grad_norm": 0.661724865436554, "learning_rate": 6.845595789497303e-05, "loss": 1.5521, "step": 70392 }, { "epoch": 2.34, "grad_norm": 0.6774110794067383, "learning_rate": 6.844931206759531e-05, "loss": 1.672, "step": 70393 }, { "epoch": 2.34, "grad_norm": 0.6772276759147644, "learning_rate": 6.844266652128599e-05, "loss": 1.7084, "step": 70394 }, { "epoch": 2.34, "grad_norm": 0.6845558881759644, "learning_rate": 6.843602125605328e-05, "loss": 1.6595, "step": 70395 }, { "epoch": 2.34, "grad_norm": 0.6627902984619141, "learning_rate": 6.842937627190508e-05, "loss": 1.7206, "step": 70396 }, { "epoch": 2.34, "grad_norm": 0.7123055458068848, "learning_rate": 6.842273156884943e-05, "loss": 1.708, "step": 70397 }, { "epoch": 2.34, "grad_norm": 0.6721345782279968, "learning_rate": 6.841608714689445e-05, "loss": 1.6727, "step": 70398 }, { "epoch": 2.34, "grad_norm": 0.6701685190200806, "learning_rate": 6.840944300604843e-05, "loss": 1.6591, "step": 70399 }, { "epoch": 2.34, "grad_norm": 0.6658905148506165, "learning_rate": 6.840279914631902e-05, "loss": 1.6491, "step": 70400 }, { "epoch": 2.34, "grad_norm": 0.6859164237976074, "learning_rate": 6.83961555677145e-05, "loss": 1.6761, "step": 70401 }, { "epoch": 2.34, "grad_norm": 0.7014260292053223, "learning_rate": 6.8389512270243e-05, "loss": 1.5954, "step": 70402 }, { "epoch": 2.34, "grad_norm": 0.6578824520111084, "learning_rate": 6.838286925391249e-05, "loss": 1.5918, "step": 70403 }, { "epoch": 2.34, "grad_norm": 0.6708853840827942, "learning_rate": 6.8376226518731e-05, "loss": 1.6519, "step": 70404 }, { "epoch": 2.34, "grad_norm": 0.6809231638908386, "learning_rate": 6.836958406470662e-05, "loss": 1.6869, "step": 70405 }, { "epoch": 2.34, "grad_norm": 0.6508090496063232, "learning_rate": 6.836294189184759e-05, "loss": 1.6608, "step": 70406 }, { "epoch": 2.34, "grad_norm": 0.682951807975769, "learning_rate": 6.835630000016168e-05, "loss": 1.7298, "step": 70407 }, { "epoch": 2.34, "grad_norm": 0.6446153521537781, "learning_rate": 6.834965838965708e-05, "loss": 1.7068, "step": 70408 }, { "epoch": 2.34, "grad_norm": 0.6701116561889648, "learning_rate": 6.834301706034196e-05, "loss": 1.6636, "step": 70409 }, { "epoch": 2.34, "grad_norm": 0.6782161593437195, "learning_rate": 6.833637601222426e-05, "loss": 1.7029, "step": 70410 }, { "epoch": 2.34, "grad_norm": 0.6730907559394836, "learning_rate": 6.832973524531198e-05, "loss": 1.7467, "step": 70411 }, { "epoch": 2.34, "grad_norm": 0.6896733045578003, "learning_rate": 6.832309475961337e-05, "loss": 1.6735, "step": 70412 }, { "epoch": 2.34, "grad_norm": 0.6887034773826599, "learning_rate": 6.831645455513637e-05, "loss": 1.7119, "step": 70413 }, { "epoch": 2.34, "grad_norm": 0.7004873156547546, "learning_rate": 6.830981463188898e-05, "loss": 1.7609, "step": 70414 }, { "epoch": 2.34, "grad_norm": 0.6646037101745605, "learning_rate": 6.830317498987934e-05, "loss": 1.6777, "step": 70415 }, { "epoch": 2.34, "grad_norm": 0.677967369556427, "learning_rate": 6.82965356291156e-05, "loss": 1.6387, "step": 70416 }, { "epoch": 2.34, "grad_norm": 1.1594542264938354, "learning_rate": 6.828989654960571e-05, "loss": 1.7102, "step": 70417 }, { "epoch": 2.34, "grad_norm": 0.6575915813446045, "learning_rate": 6.828325775135765e-05, "loss": 1.7076, "step": 70418 }, { "epoch": 2.34, "grad_norm": 0.6804689764976501, "learning_rate": 6.827661923437968e-05, "loss": 1.6846, "step": 70419 }, { "epoch": 2.34, "grad_norm": 0.7215809226036072, "learning_rate": 6.826998099867976e-05, "loss": 1.7072, "step": 70420 }, { "epoch": 2.34, "grad_norm": 0.699643611907959, "learning_rate": 6.826334304426581e-05, "loss": 1.6995, "step": 70421 }, { "epoch": 2.34, "grad_norm": 0.6574147343635559, "learning_rate": 6.825670537114614e-05, "loss": 1.6482, "step": 70422 }, { "epoch": 2.34, "grad_norm": 0.6547489762306213, "learning_rate": 6.82500679793286e-05, "loss": 1.64, "step": 70423 }, { "epoch": 2.34, "grad_norm": 0.678358793258667, "learning_rate": 6.824343086882142e-05, "loss": 1.7076, "step": 70424 }, { "epoch": 2.34, "grad_norm": 0.6726613640785217, "learning_rate": 6.823679403963246e-05, "loss": 1.6811, "step": 70425 }, { "epoch": 2.34, "grad_norm": 0.6471437811851501, "learning_rate": 6.823015749176997e-05, "loss": 1.6739, "step": 70426 }, { "epoch": 2.34, "grad_norm": 0.6879230737686157, "learning_rate": 6.822352122524192e-05, "loss": 1.7484, "step": 70427 }, { "epoch": 2.34, "grad_norm": 0.6720215678215027, "learning_rate": 6.821688524005628e-05, "loss": 1.7832, "step": 70428 }, { "epoch": 2.34, "grad_norm": 0.6809163689613342, "learning_rate": 6.821024953622129e-05, "loss": 1.7065, "step": 70429 }, { "epoch": 2.34, "grad_norm": 0.6609817147254944, "learning_rate": 6.820361411374479e-05, "loss": 1.6922, "step": 70430 }, { "epoch": 2.34, "grad_norm": 0.6795348525047302, "learning_rate": 6.819697897263508e-05, "loss": 1.761, "step": 70431 }, { "epoch": 2.34, "grad_norm": 0.6533272862434387, "learning_rate": 6.819034411290005e-05, "loss": 1.7343, "step": 70432 }, { "epoch": 2.34, "grad_norm": 0.6508637070655823, "learning_rate": 6.818370953454769e-05, "loss": 1.6898, "step": 70433 }, { "epoch": 2.34, "grad_norm": 0.6630853414535522, "learning_rate": 6.817707523758626e-05, "loss": 1.6265, "step": 70434 }, { "epoch": 2.34, "grad_norm": 0.6447948813438416, "learning_rate": 6.817044122202359e-05, "loss": 1.7337, "step": 70435 }, { "epoch": 2.34, "grad_norm": 0.6829617619514465, "learning_rate": 6.816380748786797e-05, "loss": 1.6985, "step": 70436 }, { "epoch": 2.34, "grad_norm": 0.6629890203475952, "learning_rate": 6.815717403512724e-05, "loss": 1.6308, "step": 70437 }, { "epoch": 2.34, "grad_norm": 0.6943267583847046, "learning_rate": 6.81505408638096e-05, "loss": 1.6328, "step": 70438 }, { "epoch": 2.34, "grad_norm": 0.6741565465927124, "learning_rate": 6.814390797392305e-05, "loss": 1.6737, "step": 70439 }, { "epoch": 2.34, "grad_norm": 0.6676470041275024, "learning_rate": 6.813727536547555e-05, "loss": 1.741, "step": 70440 }, { "epoch": 2.34, "grad_norm": 0.6753451824188232, "learning_rate": 6.813064303847532e-05, "loss": 1.6609, "step": 70441 }, { "epoch": 2.34, "grad_norm": 0.6483107209205627, "learning_rate": 6.812401099293031e-05, "loss": 1.6958, "step": 70442 }, { "epoch": 2.34, "grad_norm": 0.7028931379318237, "learning_rate": 6.811737922884852e-05, "loss": 1.7566, "step": 70443 }, { "epoch": 2.34, "grad_norm": 0.683778703212738, "learning_rate": 6.811074774623808e-05, "loss": 1.7066, "step": 70444 }, { "epoch": 2.34, "grad_norm": 0.6983907222747803, "learning_rate": 6.810411654510709e-05, "loss": 1.71, "step": 70445 }, { "epoch": 2.34, "grad_norm": 0.6676183938980103, "learning_rate": 6.809748562546352e-05, "loss": 1.6982, "step": 70446 }, { "epoch": 2.34, "grad_norm": 0.6604045033454895, "learning_rate": 6.809085498731537e-05, "loss": 1.6555, "step": 70447 }, { "epoch": 2.34, "grad_norm": 0.6779599189758301, "learning_rate": 6.808422463067082e-05, "loss": 1.6533, "step": 70448 }, { "epoch": 2.34, "grad_norm": 0.6962524652481079, "learning_rate": 6.807759455553788e-05, "loss": 1.7767, "step": 70449 }, { "epoch": 2.34, "grad_norm": 0.6842361092567444, "learning_rate": 6.807096476192445e-05, "loss": 1.7412, "step": 70450 }, { "epoch": 2.34, "grad_norm": 0.6713696718215942, "learning_rate": 6.80643352498387e-05, "loss": 1.6973, "step": 70451 }, { "epoch": 2.34, "grad_norm": 0.6902101039886475, "learning_rate": 6.805770601928884e-05, "loss": 1.6781, "step": 70452 }, { "epoch": 2.34, "grad_norm": 0.6923673152923584, "learning_rate": 6.805107707028259e-05, "loss": 1.7074, "step": 70453 }, { "epoch": 2.34, "grad_norm": 0.686580240726471, "learning_rate": 6.804444840282816e-05, "loss": 1.7256, "step": 70454 }, { "epoch": 2.34, "grad_norm": 0.678642988204956, "learning_rate": 6.80378200169337e-05, "loss": 1.6624, "step": 70455 }, { "epoch": 2.34, "grad_norm": 0.6771993637084961, "learning_rate": 6.80311919126071e-05, "loss": 1.7256, "step": 70456 }, { "epoch": 2.34, "grad_norm": 0.6735681295394897, "learning_rate": 6.802456408985642e-05, "loss": 1.7024, "step": 70457 }, { "epoch": 2.34, "grad_norm": 0.6613994240760803, "learning_rate": 6.801793654868967e-05, "loss": 1.666, "step": 70458 }, { "epoch": 2.34, "grad_norm": 0.6786672472953796, "learning_rate": 6.801130928911519e-05, "loss": 1.7364, "step": 70459 }, { "epoch": 2.34, "grad_norm": 0.6888095140457153, "learning_rate": 6.80046823111406e-05, "loss": 1.7485, "step": 70460 }, { "epoch": 2.34, "grad_norm": 0.6757000088691711, "learning_rate": 6.799805561477417e-05, "loss": 1.6975, "step": 70461 }, { "epoch": 2.34, "grad_norm": 0.686307966709137, "learning_rate": 6.7991429200024e-05, "loss": 1.7305, "step": 70462 }, { "epoch": 2.34, "grad_norm": 0.6729172468185425, "learning_rate": 6.798480306689805e-05, "loss": 1.7343, "step": 70463 }, { "epoch": 2.34, "grad_norm": 0.6952815651893616, "learning_rate": 6.797817721540424e-05, "loss": 1.6515, "step": 70464 }, { "epoch": 2.34, "grad_norm": 0.6723931431770325, "learning_rate": 6.797155164555078e-05, "loss": 1.6714, "step": 70465 }, { "epoch": 2.34, "grad_norm": 0.6802548766136169, "learning_rate": 6.796492635734583e-05, "loss": 1.6116, "step": 70466 }, { "epoch": 2.34, "grad_norm": 0.6682947278022766, "learning_rate": 6.795830135079707e-05, "loss": 1.7049, "step": 70467 }, { "epoch": 2.34, "grad_norm": 0.6736214756965637, "learning_rate": 6.795167662591277e-05, "loss": 1.7159, "step": 70468 }, { "epoch": 2.34, "grad_norm": 0.690721869468689, "learning_rate": 6.794505218270103e-05, "loss": 1.7052, "step": 70469 }, { "epoch": 2.34, "grad_norm": 0.6680997014045715, "learning_rate": 6.79384280211698e-05, "loss": 1.7457, "step": 70470 }, { "epoch": 2.34, "grad_norm": 0.6640671491622925, "learning_rate": 6.793180414132704e-05, "loss": 1.7003, "step": 70471 }, { "epoch": 2.34, "grad_norm": 0.690263032913208, "learning_rate": 6.792518054318095e-05, "loss": 1.6804, "step": 70472 }, { "epoch": 2.34, "grad_norm": 0.6804250478744507, "learning_rate": 6.791855722673952e-05, "loss": 1.7134, "step": 70473 }, { "epoch": 2.34, "grad_norm": 0.6552919745445251, "learning_rate": 6.791193419201066e-05, "loss": 1.6414, "step": 70474 }, { "epoch": 2.34, "grad_norm": 0.6605801582336426, "learning_rate": 6.790531143900252e-05, "loss": 1.6823, "step": 70475 }, { "epoch": 2.34, "grad_norm": 0.6711851358413696, "learning_rate": 6.789868896772324e-05, "loss": 1.7104, "step": 70476 }, { "epoch": 2.34, "grad_norm": 0.6919717788696289, "learning_rate": 6.789206677818072e-05, "loss": 1.7429, "step": 70477 }, { "epoch": 2.34, "grad_norm": 0.7004440426826477, "learning_rate": 6.788544487038298e-05, "loss": 1.664, "step": 70478 }, { "epoch": 2.34, "grad_norm": 0.6622969508171082, "learning_rate": 6.787882324433817e-05, "loss": 1.6821, "step": 70479 }, { "epoch": 2.34, "grad_norm": 0.6590949296951294, "learning_rate": 6.787220190005427e-05, "loss": 1.684, "step": 70480 }, { "epoch": 2.34, "grad_norm": 0.6642938852310181, "learning_rate": 6.786558083753921e-05, "loss": 1.6631, "step": 70481 }, { "epoch": 2.34, "grad_norm": 0.6671555638313293, "learning_rate": 6.785896005680126e-05, "loss": 1.767, "step": 70482 }, { "epoch": 2.34, "grad_norm": 0.6790105104446411, "learning_rate": 6.78523395578482e-05, "loss": 1.7249, "step": 70483 }, { "epoch": 2.35, "grad_norm": 0.7071031928062439, "learning_rate": 6.784571934068833e-05, "loss": 1.6793, "step": 70484 }, { "epoch": 2.35, "grad_norm": 0.6959486603736877, "learning_rate": 6.783909940532941e-05, "loss": 1.693, "step": 70485 }, { "epoch": 2.35, "grad_norm": 0.6878326535224915, "learning_rate": 6.783247975177974e-05, "loss": 1.6648, "step": 70486 }, { "epoch": 2.35, "grad_norm": 0.638328492641449, "learning_rate": 6.782586038004723e-05, "loss": 1.7241, "step": 70487 }, { "epoch": 2.35, "grad_norm": 0.7039691209793091, "learning_rate": 6.781924129013981e-05, "loss": 1.6837, "step": 70488 }, { "epoch": 2.35, "grad_norm": 0.6888648271560669, "learning_rate": 6.781262248206572e-05, "loss": 1.7449, "step": 70489 }, { "epoch": 2.35, "grad_norm": 0.6834619045257568, "learning_rate": 6.780600395583282e-05, "loss": 1.7448, "step": 70490 }, { "epoch": 2.35, "grad_norm": 0.6763005256652832, "learning_rate": 6.779938571144926e-05, "loss": 1.7329, "step": 70491 }, { "epoch": 2.35, "grad_norm": 0.6633971929550171, "learning_rate": 6.779276774892307e-05, "loss": 1.6159, "step": 70492 }, { "epoch": 2.35, "grad_norm": 0.6826887130737305, "learning_rate": 6.778615006826218e-05, "loss": 1.7013, "step": 70493 }, { "epoch": 2.35, "grad_norm": 0.6763401627540588, "learning_rate": 6.777953266947475e-05, "loss": 1.7296, "step": 70494 }, { "epoch": 2.35, "grad_norm": 0.664415180683136, "learning_rate": 6.777291555256863e-05, "loss": 1.6853, "step": 70495 }, { "epoch": 2.35, "grad_norm": 0.6725483536720276, "learning_rate": 6.776629871755209e-05, "loss": 1.6908, "step": 70496 }, { "epoch": 2.35, "grad_norm": 0.6791509389877319, "learning_rate": 6.7759682164433e-05, "loss": 1.6823, "step": 70497 }, { "epoch": 2.35, "grad_norm": 0.6659116148948669, "learning_rate": 6.775306589321946e-05, "loss": 1.6851, "step": 70498 }, { "epoch": 2.35, "grad_norm": 0.6838709115982056, "learning_rate": 6.774644990391952e-05, "loss": 1.715, "step": 70499 }, { "epoch": 2.35, "grad_norm": 0.6709114909172058, "learning_rate": 6.773983419654106e-05, "loss": 1.6875, "step": 70500 }, { "epoch": 2.35, "grad_norm": 0.6836764216423035, "learning_rate": 6.773321877109222e-05, "loss": 1.6835, "step": 70501 }, { "epoch": 2.35, "grad_norm": 0.6891177892684937, "learning_rate": 6.772660362758122e-05, "loss": 1.6716, "step": 70502 }, { "epoch": 2.35, "grad_norm": 0.6912072896957397, "learning_rate": 6.771998876601569e-05, "loss": 1.7379, "step": 70503 }, { "epoch": 2.35, "grad_norm": 0.6639572978019714, "learning_rate": 6.771337418640391e-05, "loss": 1.7115, "step": 70504 }, { "epoch": 2.35, "grad_norm": 0.7060593366622925, "learning_rate": 6.770675988875392e-05, "loss": 1.6226, "step": 70505 }, { "epoch": 2.35, "grad_norm": 0.685588002204895, "learning_rate": 6.770014587307372e-05, "loss": 1.6927, "step": 70506 }, { "epoch": 2.35, "grad_norm": 0.6815664768218994, "learning_rate": 6.769353213937123e-05, "loss": 1.6511, "step": 70507 }, { "epoch": 2.35, "grad_norm": 0.6825448274612427, "learning_rate": 6.768691868765456e-05, "loss": 1.6667, "step": 70508 }, { "epoch": 2.35, "grad_norm": 0.7156257033348083, "learning_rate": 6.768030551793189e-05, "loss": 1.6497, "step": 70509 }, { "epoch": 2.35, "grad_norm": 0.6887568831443787, "learning_rate": 6.767369263021094e-05, "loss": 1.6709, "step": 70510 }, { "epoch": 2.35, "grad_norm": 0.6561089754104614, "learning_rate": 6.766708002449991e-05, "loss": 1.7124, "step": 70511 }, { "epoch": 2.35, "grad_norm": 0.659197211265564, "learning_rate": 6.766046770080689e-05, "loss": 1.7292, "step": 70512 }, { "epoch": 2.35, "grad_norm": 0.6920866966247559, "learning_rate": 6.765385565913983e-05, "loss": 1.7876, "step": 70513 }, { "epoch": 2.35, "grad_norm": 0.673360288143158, "learning_rate": 6.764724389950668e-05, "loss": 1.7129, "step": 70514 }, { "epoch": 2.35, "grad_norm": 0.6674591898918152, "learning_rate": 6.76406324219155e-05, "loss": 1.6737, "step": 70515 }, { "epoch": 2.35, "grad_norm": 1.491329312324524, "learning_rate": 6.763402122637456e-05, "loss": 1.721, "step": 70516 }, { "epoch": 2.35, "grad_norm": 0.6805742383003235, "learning_rate": 6.762741031289151e-05, "loss": 1.6949, "step": 70517 }, { "epoch": 2.35, "grad_norm": 0.666532576084137, "learning_rate": 6.762079968147454e-05, "loss": 1.68, "step": 70518 }, { "epoch": 2.35, "grad_norm": 0.6679123640060425, "learning_rate": 6.761418933213178e-05, "loss": 1.6178, "step": 70519 }, { "epoch": 2.35, "grad_norm": 0.6577938199043274, "learning_rate": 6.760757926487113e-05, "loss": 1.604, "step": 70520 }, { "epoch": 2.35, "grad_norm": 0.6684242486953735, "learning_rate": 6.760096947970056e-05, "loss": 1.6861, "step": 70521 }, { "epoch": 2.35, "grad_norm": 0.6733555793762207, "learning_rate": 6.759435997662818e-05, "loss": 1.6836, "step": 70522 }, { "epoch": 2.35, "grad_norm": 0.6909534335136414, "learning_rate": 6.758775075566216e-05, "loss": 1.6751, "step": 70523 }, { "epoch": 2.35, "grad_norm": 0.6596463918685913, "learning_rate": 6.758114181681018e-05, "loss": 1.659, "step": 70524 }, { "epoch": 2.35, "grad_norm": 0.6670727729797363, "learning_rate": 6.757453316008048e-05, "loss": 1.7298, "step": 70525 }, { "epoch": 2.35, "grad_norm": 0.6875656247138977, "learning_rate": 6.756792478548112e-05, "loss": 1.6788, "step": 70526 }, { "epoch": 2.35, "grad_norm": 0.6794123649597168, "learning_rate": 6.756131669302006e-05, "loss": 1.6617, "step": 70527 }, { "epoch": 2.35, "grad_norm": 0.6802810430526733, "learning_rate": 6.75547088827052e-05, "loss": 1.6928, "step": 70528 }, { "epoch": 2.35, "grad_norm": 0.6739634275436401, "learning_rate": 6.754810135454479e-05, "loss": 1.652, "step": 70529 }, { "epoch": 2.35, "grad_norm": 0.6996047496795654, "learning_rate": 6.754149410854673e-05, "loss": 1.7534, "step": 70530 }, { "epoch": 2.35, "grad_norm": 0.687376081943512, "learning_rate": 6.753488714471897e-05, "loss": 1.7481, "step": 70531 }, { "epoch": 2.35, "grad_norm": 0.6931371092796326, "learning_rate": 6.75282804630696e-05, "loss": 1.633, "step": 70532 }, { "epoch": 2.35, "grad_norm": 0.6698626279830933, "learning_rate": 6.752167406360672e-05, "loss": 1.703, "step": 70533 }, { "epoch": 2.35, "grad_norm": 0.6817542314529419, "learning_rate": 6.751506794633827e-05, "loss": 1.7113, "step": 70534 }, { "epoch": 2.35, "grad_norm": 0.7074635624885559, "learning_rate": 6.750846211127219e-05, "loss": 1.7219, "step": 70535 }, { "epoch": 2.35, "grad_norm": 0.6747807264328003, "learning_rate": 6.750185655841669e-05, "loss": 1.6538, "step": 70536 }, { "epoch": 2.35, "grad_norm": 0.6898552179336548, "learning_rate": 6.749525128777964e-05, "loss": 1.7484, "step": 70537 }, { "epoch": 2.35, "grad_norm": 0.6810497641563416, "learning_rate": 6.748864629936903e-05, "loss": 1.7082, "step": 70538 }, { "epoch": 2.35, "grad_norm": 0.6738022565841675, "learning_rate": 6.748204159319303e-05, "loss": 1.6703, "step": 70539 }, { "epoch": 2.35, "grad_norm": 0.664510190486908, "learning_rate": 6.747543716925948e-05, "loss": 1.7277, "step": 70540 }, { "epoch": 2.35, "grad_norm": 0.6776996850967407, "learning_rate": 6.746883302757659e-05, "loss": 1.692, "step": 70541 }, { "epoch": 2.35, "grad_norm": 0.6699886918067932, "learning_rate": 6.746222916815219e-05, "loss": 1.6609, "step": 70542 }, { "epoch": 2.35, "grad_norm": 0.6866307854652405, "learning_rate": 6.745562559099446e-05, "loss": 1.683, "step": 70543 }, { "epoch": 2.35, "grad_norm": 0.675527036190033, "learning_rate": 6.744902229611131e-05, "loss": 1.704, "step": 70544 }, { "epoch": 2.35, "grad_norm": 0.7001286149024963, "learning_rate": 6.744241928351075e-05, "loss": 1.7234, "step": 70545 }, { "epoch": 2.35, "grad_norm": 0.6692569851875305, "learning_rate": 6.743581655320088e-05, "loss": 1.7479, "step": 70546 }, { "epoch": 2.35, "grad_norm": 0.6896395087242126, "learning_rate": 6.742921410518957e-05, "loss": 1.7571, "step": 70547 }, { "epoch": 2.35, "grad_norm": 0.6829906702041626, "learning_rate": 6.742261193948503e-05, "loss": 1.6569, "step": 70548 }, { "epoch": 2.35, "grad_norm": 0.6602693796157837, "learning_rate": 6.741601005609516e-05, "loss": 1.7133, "step": 70549 }, { "epoch": 2.35, "grad_norm": 0.6824947595596313, "learning_rate": 6.740940845502789e-05, "loss": 1.7116, "step": 70550 }, { "epoch": 2.35, "grad_norm": 0.6665943264961243, "learning_rate": 6.740280713629141e-05, "loss": 1.6938, "step": 70551 }, { "epoch": 2.35, "grad_norm": 0.6748249530792236, "learning_rate": 6.739620609989358e-05, "loss": 1.716, "step": 70552 }, { "epoch": 2.35, "grad_norm": 0.6622952222824097, "learning_rate": 6.738960534584257e-05, "loss": 1.7112, "step": 70553 }, { "epoch": 2.35, "grad_norm": 0.6836511492729187, "learning_rate": 6.738300487414624e-05, "loss": 1.7323, "step": 70554 }, { "epoch": 2.35, "grad_norm": 0.7029377818107605, "learning_rate": 6.73764046848127e-05, "loss": 1.612, "step": 70555 }, { "epoch": 2.35, "grad_norm": 0.6806557774543762, "learning_rate": 6.736980477784995e-05, "loss": 1.7142, "step": 70556 }, { "epoch": 2.35, "grad_norm": 0.6763397455215454, "learning_rate": 6.736320515326593e-05, "loss": 1.6795, "step": 70557 }, { "epoch": 2.35, "grad_norm": 0.6605532169342041, "learning_rate": 6.735660581106875e-05, "loss": 1.7064, "step": 70558 }, { "epoch": 2.35, "grad_norm": 0.700997531414032, "learning_rate": 6.735000675126639e-05, "loss": 1.6601, "step": 70559 }, { "epoch": 2.35, "grad_norm": 0.6487166285514832, "learning_rate": 6.734340797386675e-05, "loss": 1.6663, "step": 70560 }, { "epoch": 2.35, "grad_norm": 0.6830310225486755, "learning_rate": 6.733680947887794e-05, "loss": 1.6397, "step": 70561 }, { "epoch": 2.35, "grad_norm": 0.6640309691429138, "learning_rate": 6.733021126630802e-05, "loss": 1.7048, "step": 70562 }, { "epoch": 2.35, "grad_norm": 0.6666194796562195, "learning_rate": 6.732361333616498e-05, "loss": 1.7755, "step": 70563 }, { "epoch": 2.35, "grad_norm": 0.6832863092422485, "learning_rate": 6.731701568845668e-05, "loss": 1.6794, "step": 70564 }, { "epoch": 2.35, "grad_norm": 0.6932674050331116, "learning_rate": 6.731041832319133e-05, "loss": 1.7187, "step": 70565 }, { "epoch": 2.35, "grad_norm": 0.6723293662071228, "learning_rate": 6.730382124037687e-05, "loss": 1.7407, "step": 70566 }, { "epoch": 2.35, "grad_norm": 0.6923912763595581, "learning_rate": 6.729722444002116e-05, "loss": 1.6859, "step": 70567 }, { "epoch": 2.35, "grad_norm": 0.6574324369430542, "learning_rate": 6.729062792213233e-05, "loss": 1.7068, "step": 70568 }, { "epoch": 2.35, "grad_norm": 0.6690729856491089, "learning_rate": 6.728403168671859e-05, "loss": 1.6669, "step": 70569 }, { "epoch": 2.35, "grad_norm": 0.6837642192840576, "learning_rate": 6.727743573378758e-05, "loss": 1.6945, "step": 70570 }, { "epoch": 2.35, "grad_norm": 0.6892737150192261, "learning_rate": 6.727084006334747e-05, "loss": 1.6211, "step": 70571 }, { "epoch": 2.35, "grad_norm": 0.6668340563774109, "learning_rate": 6.72642446754063e-05, "loss": 1.6064, "step": 70572 }, { "epoch": 2.35, "grad_norm": 0.6998191475868225, "learning_rate": 6.725764956997211e-05, "loss": 1.6555, "step": 70573 }, { "epoch": 2.35, "grad_norm": 0.6778591871261597, "learning_rate": 6.725105474705274e-05, "loss": 1.7189, "step": 70574 }, { "epoch": 2.35, "grad_norm": 0.6671352982521057, "learning_rate": 6.72444602066563e-05, "loss": 1.6911, "step": 70575 }, { "epoch": 2.35, "grad_norm": 0.6682555079460144, "learning_rate": 6.723786594879094e-05, "loss": 1.6973, "step": 70576 }, { "epoch": 2.35, "grad_norm": 0.673369824886322, "learning_rate": 6.723127197346435e-05, "loss": 1.6652, "step": 70577 }, { "epoch": 2.35, "grad_norm": 0.6870406270027161, "learning_rate": 6.72246782806847e-05, "loss": 1.7263, "step": 70578 }, { "epoch": 2.35, "grad_norm": 0.6700717210769653, "learning_rate": 6.721808487046007e-05, "loss": 1.6853, "step": 70579 }, { "epoch": 2.35, "grad_norm": 0.6700600981712341, "learning_rate": 6.721149174279841e-05, "loss": 1.7213, "step": 70580 }, { "epoch": 2.35, "grad_norm": 0.652340292930603, "learning_rate": 6.720489889770759e-05, "loss": 1.6655, "step": 70581 }, { "epoch": 2.35, "grad_norm": 0.6673822999000549, "learning_rate": 6.719830633519575e-05, "loss": 1.6208, "step": 70582 }, { "epoch": 2.35, "grad_norm": 0.6571322083473206, "learning_rate": 6.719171405527101e-05, "loss": 1.7362, "step": 70583 }, { "epoch": 2.35, "grad_norm": 0.6800106763839722, "learning_rate": 6.718512205794104e-05, "loss": 1.6753, "step": 70584 }, { "epoch": 2.35, "grad_norm": 0.664754331111908, "learning_rate": 6.717853034321406e-05, "loss": 1.6846, "step": 70585 }, { "epoch": 2.35, "grad_norm": 0.686820387840271, "learning_rate": 6.717193891109813e-05, "loss": 1.7076, "step": 70586 }, { "epoch": 2.35, "grad_norm": 0.6807702779769897, "learning_rate": 6.716534776160114e-05, "loss": 1.6337, "step": 70587 }, { "epoch": 2.35, "grad_norm": 0.6831629276275635, "learning_rate": 6.715875689473104e-05, "loss": 1.6982, "step": 70588 }, { "epoch": 2.35, "grad_norm": 0.6739213466644287, "learning_rate": 6.715216631049598e-05, "loss": 1.7485, "step": 70589 }, { "epoch": 2.35, "grad_norm": 0.691717803478241, "learning_rate": 6.714557600890387e-05, "loss": 1.7461, "step": 70590 }, { "epoch": 2.35, "grad_norm": 0.6660445928573608, "learning_rate": 6.713898598996266e-05, "loss": 1.7338, "step": 70591 }, { "epoch": 2.35, "grad_norm": 0.6720037460327148, "learning_rate": 6.713239625368037e-05, "loss": 1.6721, "step": 70592 }, { "epoch": 2.35, "grad_norm": 0.6596761345863342, "learning_rate": 6.712580680006518e-05, "loss": 1.6781, "step": 70593 }, { "epoch": 2.35, "grad_norm": 0.6917004585266113, "learning_rate": 6.71192176291249e-05, "loss": 1.6907, "step": 70594 }, { "epoch": 2.35, "grad_norm": 0.6870828866958618, "learning_rate": 6.711262874086752e-05, "loss": 1.6367, "step": 70595 }, { "epoch": 2.35, "grad_norm": 0.6815049648284912, "learning_rate": 6.710604013530119e-05, "loss": 1.7307, "step": 70596 }, { "epoch": 2.35, "grad_norm": 0.6829401254653931, "learning_rate": 6.70994518124338e-05, "loss": 1.6775, "step": 70597 }, { "epoch": 2.35, "grad_norm": 0.6766334176063538, "learning_rate": 6.709286377227326e-05, "loss": 1.7143, "step": 70598 }, { "epoch": 2.35, "grad_norm": 0.6715425848960876, "learning_rate": 6.708627601482773e-05, "loss": 1.6622, "step": 70599 }, { "epoch": 2.35, "grad_norm": 0.682196855545044, "learning_rate": 6.70796885401051e-05, "loss": 1.7681, "step": 70600 }, { "epoch": 2.35, "grad_norm": 0.6751968860626221, "learning_rate": 6.70731013481135e-05, "loss": 1.7253, "step": 70601 }, { "epoch": 2.35, "grad_norm": 0.6982427835464478, "learning_rate": 6.706651443886072e-05, "loss": 1.7182, "step": 70602 }, { "epoch": 2.35, "grad_norm": 0.6661491990089417, "learning_rate": 6.705992781235498e-05, "loss": 1.7214, "step": 70603 }, { "epoch": 2.35, "grad_norm": 0.6837435960769653, "learning_rate": 6.705334146860412e-05, "loss": 1.7439, "step": 70604 }, { "epoch": 2.35, "grad_norm": 0.6933509111404419, "learning_rate": 6.704675540761613e-05, "loss": 1.6798, "step": 70605 }, { "epoch": 2.35, "grad_norm": 0.6779419183731079, "learning_rate": 6.704016962939913e-05, "loss": 1.7357, "step": 70606 }, { "epoch": 2.35, "grad_norm": 0.6907424330711365, "learning_rate": 6.703358413396096e-05, "loss": 1.7671, "step": 70607 }, { "epoch": 2.35, "grad_norm": 0.6882215142250061, "learning_rate": 6.702699892130978e-05, "loss": 1.6656, "step": 70608 }, { "epoch": 2.35, "grad_norm": 0.6883649230003357, "learning_rate": 6.70204139914535e-05, "loss": 1.6749, "step": 70609 }, { "epoch": 2.35, "grad_norm": 0.6833286285400391, "learning_rate": 6.701382934440002e-05, "loss": 1.7208, "step": 70610 }, { "epoch": 2.35, "grad_norm": 0.6706572771072388, "learning_rate": 6.700724498015752e-05, "loss": 1.6787, "step": 70611 }, { "epoch": 2.35, "grad_norm": 0.6749560832977295, "learning_rate": 6.700066089873379e-05, "loss": 1.6127, "step": 70612 }, { "epoch": 2.35, "grad_norm": 0.6918689012527466, "learning_rate": 6.699407710013701e-05, "loss": 1.7292, "step": 70613 }, { "epoch": 2.35, "grad_norm": 0.674214243888855, "learning_rate": 6.698749358437503e-05, "loss": 1.7824, "step": 70614 }, { "epoch": 2.35, "grad_norm": 0.6746870279312134, "learning_rate": 6.698091035145596e-05, "loss": 1.6489, "step": 70615 }, { "epoch": 2.35, "grad_norm": 0.6770282983779907, "learning_rate": 6.697432740138774e-05, "loss": 1.6791, "step": 70616 }, { "epoch": 2.35, "grad_norm": 0.6788907051086426, "learning_rate": 6.696774473417825e-05, "loss": 1.7443, "step": 70617 }, { "epoch": 2.35, "grad_norm": 0.6914290189743042, "learning_rate": 6.69611623498356e-05, "loss": 1.7227, "step": 70618 }, { "epoch": 2.35, "grad_norm": 0.6629165410995483, "learning_rate": 6.695458024836793e-05, "loss": 1.6361, "step": 70619 }, { "epoch": 2.35, "grad_norm": 0.6629367470741272, "learning_rate": 6.694799842978288e-05, "loss": 1.7107, "step": 70620 }, { "epoch": 2.35, "grad_norm": 0.6671880483627319, "learning_rate": 6.694141689408864e-05, "loss": 1.7048, "step": 70621 }, { "epoch": 2.35, "grad_norm": 0.6603164076805115, "learning_rate": 6.693483564129327e-05, "loss": 1.6573, "step": 70622 }, { "epoch": 2.35, "grad_norm": 0.7185260653495789, "learning_rate": 6.692825467140463e-05, "loss": 1.6714, "step": 70623 }, { "epoch": 2.35, "grad_norm": 0.670066237449646, "learning_rate": 6.692167398443073e-05, "loss": 1.6865, "step": 70624 }, { "epoch": 2.35, "grad_norm": 0.6828510165214539, "learning_rate": 6.69150935803795e-05, "loss": 1.6976, "step": 70625 }, { "epoch": 2.35, "grad_norm": 0.6657986044883728, "learning_rate": 6.690851345925922e-05, "loss": 1.6431, "step": 70626 }, { "epoch": 2.35, "grad_norm": 0.6884158849716187, "learning_rate": 6.690193362107748e-05, "loss": 1.6608, "step": 70627 }, { "epoch": 2.35, "grad_norm": 0.6855793595314026, "learning_rate": 6.689535406584243e-05, "loss": 1.6212, "step": 70628 }, { "epoch": 2.35, "grad_norm": 0.6927211880683899, "learning_rate": 6.688877479356218e-05, "loss": 1.6526, "step": 70629 }, { "epoch": 2.35, "grad_norm": 0.6997360587120056, "learning_rate": 6.688219580424462e-05, "loss": 1.7279, "step": 70630 }, { "epoch": 2.35, "grad_norm": 0.7010329365730286, "learning_rate": 6.68756170978976e-05, "loss": 1.6675, "step": 70631 }, { "epoch": 2.35, "grad_norm": 0.6671410202980042, "learning_rate": 6.686903867452926e-05, "loss": 1.6454, "step": 70632 }, { "epoch": 2.35, "grad_norm": 0.6934192776679993, "learning_rate": 6.686246053414772e-05, "loss": 1.7162, "step": 70633 }, { "epoch": 2.35, "grad_norm": 0.678632378578186, "learning_rate": 6.685588267676064e-05, "loss": 1.7092, "step": 70634 }, { "epoch": 2.35, "grad_norm": 0.6991989016532898, "learning_rate": 6.684930510237618e-05, "loss": 1.7586, "step": 70635 }, { "epoch": 2.35, "grad_norm": 0.6648381352424622, "learning_rate": 6.684272781100238e-05, "loss": 1.7121, "step": 70636 }, { "epoch": 2.35, "grad_norm": 0.6871770620346069, "learning_rate": 6.683615080264714e-05, "loss": 1.6813, "step": 70637 }, { "epoch": 2.35, "grad_norm": 0.6898527145385742, "learning_rate": 6.682957407731838e-05, "loss": 1.7256, "step": 70638 }, { "epoch": 2.35, "grad_norm": 0.6774721741676331, "learning_rate": 6.682299763502417e-05, "loss": 1.7585, "step": 70639 }, { "epoch": 2.35, "grad_norm": 0.7031062245368958, "learning_rate": 6.681642147577265e-05, "loss": 1.6504, "step": 70640 }, { "epoch": 2.35, "grad_norm": 0.6948556303977966, "learning_rate": 6.680984559957146e-05, "loss": 1.6901, "step": 70641 }, { "epoch": 2.35, "grad_norm": 0.6803315281867981, "learning_rate": 6.680327000642877e-05, "loss": 1.6404, "step": 70642 }, { "epoch": 2.35, "grad_norm": 0.6750794649124146, "learning_rate": 6.679669469635261e-05, "loss": 1.7161, "step": 70643 }, { "epoch": 2.35, "grad_norm": 0.710830569267273, "learning_rate": 6.679011966935093e-05, "loss": 1.6815, "step": 70644 }, { "epoch": 2.35, "grad_norm": 0.6710562705993652, "learning_rate": 6.67835449254316e-05, "loss": 1.6176, "step": 70645 }, { "epoch": 2.35, "grad_norm": 0.6593977808952332, "learning_rate": 6.677697046460275e-05, "loss": 1.686, "step": 70646 }, { "epoch": 2.35, "grad_norm": 0.6483095288276672, "learning_rate": 6.677039628687228e-05, "loss": 1.6666, "step": 70647 }, { "epoch": 2.35, "grad_norm": 0.6875761151313782, "learning_rate": 6.676382239224812e-05, "loss": 1.6738, "step": 70648 }, { "epoch": 2.35, "grad_norm": 0.6842188239097595, "learning_rate": 6.675724878073833e-05, "loss": 1.7228, "step": 70649 }, { "epoch": 2.35, "grad_norm": 0.6932002902030945, "learning_rate": 6.675067545235092e-05, "loss": 1.783, "step": 70650 }, { "epoch": 2.35, "grad_norm": 0.7113778591156006, "learning_rate": 6.674410240709387e-05, "loss": 1.6923, "step": 70651 }, { "epoch": 2.35, "grad_norm": 0.6572993397712708, "learning_rate": 6.6737529644975e-05, "loss": 1.7308, "step": 70652 }, { "epoch": 2.35, "grad_norm": 0.6899173259735107, "learning_rate": 6.673095716600247e-05, "loss": 1.6222, "step": 70653 }, { "epoch": 2.35, "grad_norm": 0.6658564209938049, "learning_rate": 6.67243849701842e-05, "loss": 1.6968, "step": 70654 }, { "epoch": 2.35, "grad_norm": 0.677801251411438, "learning_rate": 6.671781305752809e-05, "loss": 1.793, "step": 70655 }, { "epoch": 2.35, "grad_norm": 0.6953245401382446, "learning_rate": 6.671124142804224e-05, "loss": 1.6761, "step": 70656 }, { "epoch": 2.35, "grad_norm": 0.6888645887374878, "learning_rate": 6.67046700817345e-05, "loss": 1.6479, "step": 70657 }, { "epoch": 2.35, "grad_norm": 0.6698185205459595, "learning_rate": 6.669809901861297e-05, "loss": 1.6699, "step": 70658 }, { "epoch": 2.35, "grad_norm": 0.6639438271522522, "learning_rate": 6.669152823868554e-05, "loss": 1.6994, "step": 70659 }, { "epoch": 2.35, "grad_norm": 0.6829246878623962, "learning_rate": 6.668495774196027e-05, "loss": 1.6388, "step": 70660 }, { "epoch": 2.35, "grad_norm": 0.6699774861335754, "learning_rate": 6.66783875284451e-05, "loss": 1.6441, "step": 70661 }, { "epoch": 2.35, "grad_norm": 0.6906465888023376, "learning_rate": 6.667181759814787e-05, "loss": 1.7075, "step": 70662 }, { "epoch": 2.35, "grad_norm": 0.6935908198356628, "learning_rate": 6.666524795107678e-05, "loss": 1.7337, "step": 70663 }, { "epoch": 2.35, "grad_norm": 0.7037323713302612, "learning_rate": 6.665867858723963e-05, "loss": 1.7026, "step": 70664 }, { "epoch": 2.35, "grad_norm": 0.6465513110160828, "learning_rate": 6.665210950664454e-05, "loss": 1.7003, "step": 70665 }, { "epoch": 2.35, "grad_norm": 0.6792938113212585, "learning_rate": 6.664554070929938e-05, "loss": 1.6358, "step": 70666 }, { "epoch": 2.35, "grad_norm": 0.6590924859046936, "learning_rate": 6.66389721952121e-05, "loss": 1.6499, "step": 70667 }, { "epoch": 2.35, "grad_norm": 0.6642559766769409, "learning_rate": 6.66324039643908e-05, "loss": 1.6708, "step": 70668 }, { "epoch": 2.35, "grad_norm": 0.7108471393585205, "learning_rate": 6.662583601684327e-05, "loss": 1.6836, "step": 70669 }, { "epoch": 2.35, "grad_norm": 0.6457345485687256, "learning_rate": 6.66192683525777e-05, "loss": 1.7639, "step": 70670 }, { "epoch": 2.35, "grad_norm": 0.6689757108688354, "learning_rate": 6.661270097160187e-05, "loss": 1.643, "step": 70671 }, { "epoch": 2.35, "grad_norm": 0.6596865057945251, "learning_rate": 6.66061338739239e-05, "loss": 1.7092, "step": 70672 }, { "epoch": 2.35, "grad_norm": 0.6625781059265137, "learning_rate": 6.659956705955169e-05, "loss": 1.6791, "step": 70673 }, { "epoch": 2.35, "grad_norm": 0.6602451205253601, "learning_rate": 6.659300052849316e-05, "loss": 1.7035, "step": 70674 }, { "epoch": 2.35, "grad_norm": 0.6602333784103394, "learning_rate": 6.658643428075638e-05, "loss": 1.6888, "step": 70675 }, { "epoch": 2.35, "grad_norm": 0.7009931206703186, "learning_rate": 6.657986831634931e-05, "loss": 1.7561, "step": 70676 }, { "epoch": 2.35, "grad_norm": 0.6602759957313538, "learning_rate": 6.657330263527978e-05, "loss": 1.6615, "step": 70677 }, { "epoch": 2.35, "grad_norm": 0.6472738981246948, "learning_rate": 6.65667372375559e-05, "loss": 1.7265, "step": 70678 }, { "epoch": 2.35, "grad_norm": 0.6887980103492737, "learning_rate": 6.656017212318566e-05, "loss": 1.721, "step": 70679 }, { "epoch": 2.35, "grad_norm": 0.6765803098678589, "learning_rate": 6.6553607292177e-05, "loss": 1.6996, "step": 70680 }, { "epoch": 2.35, "grad_norm": 0.6826043128967285, "learning_rate": 6.654704274453775e-05, "loss": 1.7038, "step": 70681 }, { "epoch": 2.35, "grad_norm": 0.678411602973938, "learning_rate": 6.65404784802761e-05, "loss": 1.7265, "step": 70682 }, { "epoch": 2.35, "grad_norm": 0.6741970181465149, "learning_rate": 6.65339144993999e-05, "loss": 1.6071, "step": 70683 }, { "epoch": 2.35, "grad_norm": 0.6998465657234192, "learning_rate": 6.652735080191702e-05, "loss": 1.7319, "step": 70684 }, { "epoch": 2.35, "grad_norm": 0.6853179335594177, "learning_rate": 6.652078738783556e-05, "loss": 1.7272, "step": 70685 }, { "epoch": 2.35, "grad_norm": 0.6652125716209412, "learning_rate": 6.651422425716363e-05, "loss": 1.651, "step": 70686 }, { "epoch": 2.35, "grad_norm": 0.6592922210693359, "learning_rate": 6.650766140990889e-05, "loss": 1.6369, "step": 70687 }, { "epoch": 2.35, "grad_norm": 0.6788039803504944, "learning_rate": 6.65010988460794e-05, "loss": 1.745, "step": 70688 }, { "epoch": 2.35, "grad_norm": 0.6715269088745117, "learning_rate": 6.649453656568327e-05, "loss": 1.7499, "step": 70689 }, { "epoch": 2.35, "grad_norm": 0.6685081124305725, "learning_rate": 6.648797456872836e-05, "loss": 1.741, "step": 70690 }, { "epoch": 2.35, "grad_norm": 0.6820861101150513, "learning_rate": 6.648141285522259e-05, "loss": 1.6824, "step": 70691 }, { "epoch": 2.35, "grad_norm": 0.6591001152992249, "learning_rate": 6.647485142517396e-05, "loss": 1.5723, "step": 70692 }, { "epoch": 2.35, "grad_norm": 0.6733226776123047, "learning_rate": 6.646829027859062e-05, "loss": 1.722, "step": 70693 }, { "epoch": 2.35, "grad_norm": 0.687092125415802, "learning_rate": 6.646172941548019e-05, "loss": 1.6277, "step": 70694 }, { "epoch": 2.35, "grad_norm": 0.6759595274925232, "learning_rate": 6.64551688358508e-05, "loss": 1.6984, "step": 70695 }, { "epoch": 2.35, "grad_norm": 0.6718267202377319, "learning_rate": 6.644860853971055e-05, "loss": 1.7003, "step": 70696 }, { "epoch": 2.35, "grad_norm": 0.6790860295295715, "learning_rate": 6.644204852706725e-05, "loss": 1.6428, "step": 70697 }, { "epoch": 2.35, "grad_norm": 0.665291428565979, "learning_rate": 6.64354887979288e-05, "loss": 1.6386, "step": 70698 }, { "epoch": 2.35, "grad_norm": 0.7166988849639893, "learning_rate": 6.642892935230324e-05, "loss": 1.6766, "step": 70699 }, { "epoch": 2.35, "grad_norm": 2.562837839126587, "learning_rate": 6.642237019019875e-05, "loss": 1.6344, "step": 70700 }, { "epoch": 2.35, "grad_norm": 0.6817258596420288, "learning_rate": 6.641581131162288e-05, "loss": 1.6269, "step": 70701 }, { "epoch": 2.35, "grad_norm": 0.6709006428718567, "learning_rate": 6.640925271658383e-05, "loss": 1.6673, "step": 70702 }, { "epoch": 2.35, "grad_norm": 0.6589273810386658, "learning_rate": 6.640269440508959e-05, "loss": 1.6708, "step": 70703 }, { "epoch": 2.35, "grad_norm": 0.6502949595451355, "learning_rate": 6.639613637714809e-05, "loss": 1.6595, "step": 70704 }, { "epoch": 2.35, "grad_norm": 0.6990914940834045, "learning_rate": 6.638957863276714e-05, "loss": 1.7035, "step": 70705 }, { "epoch": 2.35, "grad_norm": 0.6614093780517578, "learning_rate": 6.638302117195491e-05, "loss": 1.6038, "step": 70706 }, { "epoch": 2.35, "grad_norm": 0.6676499843597412, "learning_rate": 6.63764639947193e-05, "loss": 1.6636, "step": 70707 }, { "epoch": 2.35, "grad_norm": 0.6751676797866821, "learning_rate": 6.636990710106813e-05, "loss": 1.6523, "step": 70708 }, { "epoch": 2.35, "grad_norm": 0.650797426700592, "learning_rate": 6.636335049100948e-05, "loss": 1.7006, "step": 70709 }, { "epoch": 2.35, "grad_norm": 0.6932926774024963, "learning_rate": 6.635679416455138e-05, "loss": 1.7031, "step": 70710 }, { "epoch": 2.35, "grad_norm": 0.6541619300842285, "learning_rate": 6.63502381217017e-05, "loss": 1.7571, "step": 70711 }, { "epoch": 2.35, "grad_norm": 0.6932557225227356, "learning_rate": 6.634368236246833e-05, "loss": 1.6939, "step": 70712 }, { "epoch": 2.35, "grad_norm": 0.6699866652488708, "learning_rate": 6.633712688685938e-05, "loss": 1.6556, "step": 70713 }, { "epoch": 2.35, "grad_norm": 0.694988489151001, "learning_rate": 6.633057169488276e-05, "loss": 1.7303, "step": 70714 }, { "epoch": 2.35, "grad_norm": 0.6865009665489197, "learning_rate": 6.632401678654626e-05, "loss": 1.7312, "step": 70715 }, { "epoch": 2.35, "grad_norm": 0.6946394443511963, "learning_rate": 6.631746216185808e-05, "loss": 1.7035, "step": 70716 }, { "epoch": 2.35, "grad_norm": 0.6554173231124878, "learning_rate": 6.6310907820826e-05, "loss": 1.6458, "step": 70717 }, { "epoch": 2.35, "grad_norm": 0.6763100028038025, "learning_rate": 6.630435376345811e-05, "loss": 1.658, "step": 70718 }, { "epoch": 2.35, "grad_norm": 0.672432005405426, "learning_rate": 6.62977999897622e-05, "loss": 1.7155, "step": 70719 }, { "epoch": 2.35, "grad_norm": 0.6712763905525208, "learning_rate": 6.629124649974644e-05, "loss": 1.6998, "step": 70720 }, { "epoch": 2.35, "grad_norm": 0.6700024008750916, "learning_rate": 6.628469329341869e-05, "loss": 1.6341, "step": 70721 }, { "epoch": 2.35, "grad_norm": 0.6934841275215149, "learning_rate": 6.627814037078678e-05, "loss": 1.6606, "step": 70722 }, { "epoch": 2.35, "grad_norm": 0.670642077922821, "learning_rate": 6.627158773185884e-05, "loss": 1.6078, "step": 70723 }, { "epoch": 2.35, "grad_norm": 0.6730918288230896, "learning_rate": 6.626503537664267e-05, "loss": 1.7659, "step": 70724 }, { "epoch": 2.35, "grad_norm": 0.6701595187187195, "learning_rate": 6.625848330514642e-05, "loss": 1.6513, "step": 70725 }, { "epoch": 2.35, "grad_norm": 0.682282030582428, "learning_rate": 6.62519315173779e-05, "loss": 1.6036, "step": 70726 }, { "epoch": 2.35, "grad_norm": 0.6399899125099182, "learning_rate": 6.624538001334502e-05, "loss": 1.6394, "step": 70727 }, { "epoch": 2.35, "grad_norm": 0.696557343006134, "learning_rate": 6.62388287930559e-05, "loss": 1.6943, "step": 70728 }, { "epoch": 2.35, "grad_norm": 0.6692465543746948, "learning_rate": 6.62322778565183e-05, "loss": 1.6344, "step": 70729 }, { "epoch": 2.35, "grad_norm": 0.6497412919998169, "learning_rate": 6.622572720374035e-05, "loss": 1.6347, "step": 70730 }, { "epoch": 2.35, "grad_norm": 0.6680923700332642, "learning_rate": 6.621917683472988e-05, "loss": 1.7278, "step": 70731 }, { "epoch": 2.35, "grad_norm": 0.6825607419013977, "learning_rate": 6.621262674949495e-05, "loss": 1.6998, "step": 70732 }, { "epoch": 2.35, "grad_norm": 0.70738285779953, "learning_rate": 6.62060769480434e-05, "loss": 1.6581, "step": 70733 }, { "epoch": 2.35, "grad_norm": 0.6519646644592285, "learning_rate": 6.619952743038319e-05, "loss": 1.6736, "step": 70734 }, { "epoch": 2.35, "grad_norm": 0.6647912263870239, "learning_rate": 6.61929781965223e-05, "loss": 1.6763, "step": 70735 }, { "epoch": 2.35, "grad_norm": 0.6772922873497009, "learning_rate": 6.618642924646885e-05, "loss": 1.6723, "step": 70736 }, { "epoch": 2.35, "grad_norm": 0.6931342482566833, "learning_rate": 6.617988058023043e-05, "loss": 1.6966, "step": 70737 }, { "epoch": 2.35, "grad_norm": 0.6882607936859131, "learning_rate": 6.617333219781521e-05, "loss": 1.6431, "step": 70738 }, { "epoch": 2.35, "grad_norm": 0.6936154961585999, "learning_rate": 6.616678409923118e-05, "loss": 1.7082, "step": 70739 }, { "epoch": 2.35, "grad_norm": 0.6792358756065369, "learning_rate": 6.616023628448622e-05, "loss": 1.6846, "step": 70740 }, { "epoch": 2.35, "grad_norm": 0.6913638114929199, "learning_rate": 6.615368875358818e-05, "loss": 1.6309, "step": 70741 }, { "epoch": 2.35, "grad_norm": 0.6833249926567078, "learning_rate": 6.614714150654516e-05, "loss": 1.6819, "step": 70742 }, { "epoch": 2.35, "grad_norm": 0.6651692986488342, "learning_rate": 6.614059454336518e-05, "loss": 1.6918, "step": 70743 }, { "epoch": 2.35, "grad_norm": 0.6881719827651978, "learning_rate": 6.613404786405589e-05, "loss": 1.6452, "step": 70744 }, { "epoch": 2.35, "grad_norm": 0.6751539707183838, "learning_rate": 6.612750146862544e-05, "loss": 1.6928, "step": 70745 }, { "epoch": 2.35, "grad_norm": 0.6754113435745239, "learning_rate": 6.612095535708179e-05, "loss": 1.698, "step": 70746 }, { "epoch": 2.35, "grad_norm": 0.6720584630966187, "learning_rate": 6.611440952943286e-05, "loss": 1.7148, "step": 70747 }, { "epoch": 2.35, "grad_norm": 0.6590926051139832, "learning_rate": 6.610786398568648e-05, "loss": 1.6907, "step": 70748 }, { "epoch": 2.35, "grad_norm": 0.6869525909423828, "learning_rate": 6.61013187258507e-05, "loss": 1.6599, "step": 70749 }, { "epoch": 2.35, "grad_norm": 0.6709631681442261, "learning_rate": 6.609477374993363e-05, "loss": 1.7005, "step": 70750 }, { "epoch": 2.35, "grad_norm": 0.6721300482749939, "learning_rate": 6.608822905794288e-05, "loss": 1.6754, "step": 70751 }, { "epoch": 2.35, "grad_norm": 0.6781942844390869, "learning_rate": 6.608168464988654e-05, "loss": 1.6763, "step": 70752 }, { "epoch": 2.35, "grad_norm": 0.6814386248588562, "learning_rate": 6.607514052577263e-05, "loss": 1.6141, "step": 70753 }, { "epoch": 2.35, "grad_norm": 0.6776683926582336, "learning_rate": 6.606859668560905e-05, "loss": 1.7054, "step": 70754 }, { "epoch": 2.35, "grad_norm": 0.675404965877533, "learning_rate": 6.606205312940367e-05, "loss": 1.6687, "step": 70755 }, { "epoch": 2.35, "grad_norm": 0.693659782409668, "learning_rate": 6.605550985716447e-05, "loss": 1.7302, "step": 70756 }, { "epoch": 2.35, "grad_norm": 0.6450294852256775, "learning_rate": 6.604896686889955e-05, "loss": 1.693, "step": 70757 }, { "epoch": 2.35, "grad_norm": 0.6922902464866638, "learning_rate": 6.604242416461659e-05, "loss": 1.7822, "step": 70758 }, { "epoch": 2.35, "grad_norm": 0.7132601141929626, "learning_rate": 6.60358817443236e-05, "loss": 1.7193, "step": 70759 }, { "epoch": 2.35, "grad_norm": 0.677926778793335, "learning_rate": 6.602933960802871e-05, "loss": 1.7165, "step": 70760 }, { "epoch": 2.35, "grad_norm": 0.6839826703071594, "learning_rate": 6.60227977557397e-05, "loss": 1.6168, "step": 70761 }, { "epoch": 2.35, "grad_norm": 0.688758909702301, "learning_rate": 6.601625618746447e-05, "loss": 1.7644, "step": 70762 }, { "epoch": 2.35, "grad_norm": 0.6763109564781189, "learning_rate": 6.60097149032111e-05, "loss": 1.7294, "step": 70763 }, { "epoch": 2.35, "grad_norm": 0.6887253522872925, "learning_rate": 6.600317390298748e-05, "loss": 1.7679, "step": 70764 }, { "epoch": 2.35, "grad_norm": 0.6662988066673279, "learning_rate": 6.599663318680145e-05, "loss": 1.65, "step": 70765 }, { "epoch": 2.35, "grad_norm": 0.6919485926628113, "learning_rate": 6.5990092754661e-05, "loss": 1.6534, "step": 70766 }, { "epoch": 2.35, "grad_norm": 0.6655316352844238, "learning_rate": 6.598355260657421e-05, "loss": 1.6188, "step": 70767 }, { "epoch": 2.35, "grad_norm": 0.6719538569450378, "learning_rate": 6.597701274254889e-05, "loss": 1.7623, "step": 70768 }, { "epoch": 2.35, "grad_norm": 0.7178521156311035, "learning_rate": 6.59704731625929e-05, "loss": 1.6975, "step": 70769 }, { "epoch": 2.35, "grad_norm": 0.6826587915420532, "learning_rate": 6.596393386671436e-05, "loss": 1.7125, "step": 70770 }, { "epoch": 2.35, "grad_norm": 0.7057735323905945, "learning_rate": 6.595739485492113e-05, "loss": 1.7474, "step": 70771 }, { "epoch": 2.35, "grad_norm": 0.6835790276527405, "learning_rate": 6.595085612722108e-05, "loss": 1.7324, "step": 70772 }, { "epoch": 2.35, "grad_norm": 0.6878500580787659, "learning_rate": 6.594431768362222e-05, "loss": 1.6192, "step": 70773 }, { "epoch": 2.35, "grad_norm": 0.6932693123817444, "learning_rate": 6.593777952413245e-05, "loss": 1.7241, "step": 70774 }, { "epoch": 2.35, "grad_norm": 0.6732823252677917, "learning_rate": 6.59312416487598e-05, "loss": 1.6917, "step": 70775 }, { "epoch": 2.35, "grad_norm": 0.674903392791748, "learning_rate": 6.592470405751204e-05, "loss": 1.6689, "step": 70776 }, { "epoch": 2.35, "grad_norm": 0.6948966979980469, "learning_rate": 6.591816675039728e-05, "loss": 1.6907, "step": 70777 }, { "epoch": 2.35, "grad_norm": 0.6533263325691223, "learning_rate": 6.591162972742338e-05, "loss": 1.6948, "step": 70778 }, { "epoch": 2.35, "grad_norm": 0.6632914543151855, "learning_rate": 6.590509298859819e-05, "loss": 1.7359, "step": 70779 }, { "epoch": 2.35, "grad_norm": 0.6900112628936768, "learning_rate": 6.589855653392982e-05, "loss": 1.7004, "step": 70780 }, { "epoch": 2.35, "grad_norm": 0.666563868522644, "learning_rate": 6.589202036342599e-05, "loss": 1.7794, "step": 70781 }, { "epoch": 2.35, "grad_norm": 0.655639111995697, "learning_rate": 6.588548447709487e-05, "loss": 1.6691, "step": 70782 }, { "epoch": 2.35, "grad_norm": 0.6850444078445435, "learning_rate": 6.587894887494427e-05, "loss": 1.6949, "step": 70783 }, { "epoch": 2.35, "grad_norm": 0.6766670942306519, "learning_rate": 6.587241355698204e-05, "loss": 1.6953, "step": 70784 }, { "epoch": 2.36, "grad_norm": 0.6518408060073853, "learning_rate": 6.586587852321627e-05, "loss": 1.6024, "step": 70785 }, { "epoch": 2.36, "grad_norm": 0.6818084716796875, "learning_rate": 6.585934377365477e-05, "loss": 1.6639, "step": 70786 }, { "epoch": 2.36, "grad_norm": 0.6704835295677185, "learning_rate": 6.585280930830563e-05, "loss": 1.6865, "step": 70787 }, { "epoch": 2.36, "grad_norm": 0.6886935234069824, "learning_rate": 6.584627512717655e-05, "loss": 1.7325, "step": 70788 }, { "epoch": 2.36, "grad_norm": 0.703846275806427, "learning_rate": 6.583974123027569e-05, "loss": 1.7159, "step": 70789 }, { "epoch": 2.36, "grad_norm": 0.6767656207084656, "learning_rate": 6.583320761761089e-05, "loss": 1.6792, "step": 70790 }, { "epoch": 2.36, "grad_norm": 0.6906090974807739, "learning_rate": 6.582667428918996e-05, "loss": 1.788, "step": 70791 }, { "epoch": 2.36, "grad_norm": 0.663467288017273, "learning_rate": 6.582014124502105e-05, "loss": 1.6472, "step": 70792 }, { "epoch": 2.36, "grad_norm": 0.7037175297737122, "learning_rate": 6.581360848511199e-05, "loss": 1.6909, "step": 70793 }, { "epoch": 2.36, "grad_norm": 0.6878485083580017, "learning_rate": 6.580707600947061e-05, "loss": 1.7824, "step": 70794 }, { "epoch": 2.36, "grad_norm": 0.7058743238449097, "learning_rate": 6.580054381810495e-05, "loss": 1.7572, "step": 70795 }, { "epoch": 2.36, "grad_norm": 0.6790579557418823, "learning_rate": 6.5794011911023e-05, "loss": 1.6377, "step": 70796 }, { "epoch": 2.36, "grad_norm": 0.672089159488678, "learning_rate": 6.57874802882326e-05, "loss": 1.7408, "step": 70797 }, { "epoch": 2.36, "grad_norm": 0.7099246978759766, "learning_rate": 6.578094894974159e-05, "loss": 1.7082, "step": 70798 }, { "epoch": 2.36, "grad_norm": 0.6898541450500488, "learning_rate": 6.577441789555813e-05, "loss": 1.6478, "step": 70799 }, { "epoch": 2.36, "grad_norm": 0.672909677028656, "learning_rate": 6.576788712568996e-05, "loss": 1.711, "step": 70800 }, { "epoch": 2.36, "grad_norm": 0.6776171922683716, "learning_rate": 6.5761356640145e-05, "loss": 1.7317, "step": 70801 }, { "epoch": 2.36, "grad_norm": 0.6741582751274109, "learning_rate": 6.575482643893127e-05, "loss": 1.6983, "step": 70802 }, { "epoch": 2.36, "grad_norm": 0.6685841679573059, "learning_rate": 6.574829652205681e-05, "loss": 1.7422, "step": 70803 }, { "epoch": 2.36, "grad_norm": 0.6796391010284424, "learning_rate": 6.574176688952924e-05, "loss": 1.6704, "step": 70804 }, { "epoch": 2.36, "grad_norm": 0.6801228523254395, "learning_rate": 6.573523754135663e-05, "loss": 1.7344, "step": 70805 }, { "epoch": 2.36, "grad_norm": 0.6962468028068542, "learning_rate": 6.572870847754703e-05, "loss": 1.639, "step": 70806 }, { "epoch": 2.36, "grad_norm": 0.6810994148254395, "learning_rate": 6.572217969810822e-05, "loss": 1.6683, "step": 70807 }, { "epoch": 2.36, "grad_norm": 0.6894555687904358, "learning_rate": 6.571565120304814e-05, "loss": 1.7482, "step": 70808 }, { "epoch": 2.36, "grad_norm": 0.6628261804580688, "learning_rate": 6.57091229923747e-05, "loss": 1.6461, "step": 70809 }, { "epoch": 2.36, "grad_norm": 0.6810396909713745, "learning_rate": 6.570259506609604e-05, "loss": 1.6607, "step": 70810 }, { "epoch": 2.36, "grad_norm": 0.6831748485565186, "learning_rate": 6.569606742421973e-05, "loss": 1.7206, "step": 70811 }, { "epoch": 2.36, "grad_norm": 0.6584187746047974, "learning_rate": 6.568954006675389e-05, "loss": 1.6567, "step": 70812 }, { "epoch": 2.36, "grad_norm": 0.6707037687301636, "learning_rate": 6.568301299370648e-05, "loss": 1.6112, "step": 70813 }, { "epoch": 2.36, "grad_norm": 0.675213634967804, "learning_rate": 6.567648620508542e-05, "loss": 1.6928, "step": 70814 }, { "epoch": 2.36, "grad_norm": 0.6990328431129456, "learning_rate": 6.566995970089846e-05, "loss": 1.7364, "step": 70815 }, { "epoch": 2.36, "grad_norm": 0.6837902069091797, "learning_rate": 6.566343348115365e-05, "loss": 1.7332, "step": 70816 }, { "epoch": 2.36, "grad_norm": 0.6697742938995361, "learning_rate": 6.565690754585906e-05, "loss": 1.6671, "step": 70817 }, { "epoch": 2.36, "grad_norm": 0.6691696047782898, "learning_rate": 6.56503818950223e-05, "loss": 1.664, "step": 70818 }, { "epoch": 2.36, "grad_norm": 0.699289083480835, "learning_rate": 6.564385652865141e-05, "loss": 1.7357, "step": 70819 }, { "epoch": 2.36, "grad_norm": 0.6703025698661804, "learning_rate": 6.563733144675448e-05, "loss": 1.7533, "step": 70820 }, { "epoch": 2.36, "grad_norm": 0.6811458468437195, "learning_rate": 6.563080664933929e-05, "loss": 1.7021, "step": 70821 }, { "epoch": 2.36, "grad_norm": 0.672294020652771, "learning_rate": 6.562428213641366e-05, "loss": 1.6521, "step": 70822 }, { "epoch": 2.36, "grad_norm": 0.6781478524208069, "learning_rate": 6.561775790798571e-05, "loss": 1.6363, "step": 70823 }, { "epoch": 2.36, "grad_norm": 0.6816621422767639, "learning_rate": 6.56112339640633e-05, "loss": 1.7037, "step": 70824 }, { "epoch": 2.36, "grad_norm": 0.6938405632972717, "learning_rate": 6.560471030465418e-05, "loss": 1.6828, "step": 70825 }, { "epoch": 2.36, "grad_norm": 0.6553366780281067, "learning_rate": 6.559818692976644e-05, "loss": 1.6894, "step": 70826 }, { "epoch": 2.36, "grad_norm": 0.6711395382881165, "learning_rate": 6.559166383940805e-05, "loss": 1.7445, "step": 70827 }, { "epoch": 2.36, "grad_norm": 0.6708055734634399, "learning_rate": 6.55851410335868e-05, "loss": 1.6502, "step": 70828 }, { "epoch": 2.36, "grad_norm": 0.6783725619316101, "learning_rate": 6.557861851231062e-05, "loss": 1.6847, "step": 70829 }, { "epoch": 2.36, "grad_norm": 0.6676564812660217, "learning_rate": 6.557209627558752e-05, "loss": 1.7152, "step": 70830 }, { "epoch": 2.36, "grad_norm": 0.7022721171379089, "learning_rate": 6.556557432342535e-05, "loss": 1.7063, "step": 70831 }, { "epoch": 2.36, "grad_norm": 0.6946166753768921, "learning_rate": 6.555905265583195e-05, "loss": 1.6995, "step": 70832 }, { "epoch": 2.36, "grad_norm": 0.6635881662368774, "learning_rate": 6.555253127281538e-05, "loss": 1.7011, "step": 70833 }, { "epoch": 2.36, "grad_norm": 0.6846454739570618, "learning_rate": 6.554601017438344e-05, "loss": 1.7291, "step": 70834 }, { "epoch": 2.36, "grad_norm": 0.686366081237793, "learning_rate": 6.553948936054419e-05, "loss": 1.7205, "step": 70835 }, { "epoch": 2.36, "grad_norm": 0.6732193827629089, "learning_rate": 6.553296883130534e-05, "loss": 1.634, "step": 70836 }, { "epoch": 2.36, "grad_norm": 0.656694233417511, "learning_rate": 6.552644858667499e-05, "loss": 1.6652, "step": 70837 }, { "epoch": 2.36, "grad_norm": 0.6586231589317322, "learning_rate": 6.5519928626661e-05, "loss": 1.7382, "step": 70838 }, { "epoch": 2.36, "grad_norm": 0.683864414691925, "learning_rate": 6.551340895127121e-05, "loss": 1.6451, "step": 70839 }, { "epoch": 2.36, "grad_norm": 0.698716402053833, "learning_rate": 6.550688956051364e-05, "loss": 1.7623, "step": 70840 }, { "epoch": 2.36, "grad_norm": 0.6779512763023376, "learning_rate": 6.55003704543961e-05, "loss": 1.6978, "step": 70841 }, { "epoch": 2.36, "grad_norm": 0.6724063158035278, "learning_rate": 6.54938516329266e-05, "loss": 1.7106, "step": 70842 }, { "epoch": 2.36, "grad_norm": 0.6819230318069458, "learning_rate": 6.548733309611305e-05, "loss": 1.6253, "step": 70843 }, { "epoch": 2.36, "grad_norm": 0.697577953338623, "learning_rate": 6.548081484396325e-05, "loss": 1.7387, "step": 70844 }, { "epoch": 2.36, "grad_norm": 0.6938396692276001, "learning_rate": 6.547429687648516e-05, "loss": 1.7283, "step": 70845 }, { "epoch": 2.36, "grad_norm": 0.6956601142883301, "learning_rate": 6.54677791936868e-05, "loss": 1.6588, "step": 70846 }, { "epoch": 2.36, "grad_norm": 0.6785479187965393, "learning_rate": 6.5461261795576e-05, "loss": 1.7022, "step": 70847 }, { "epoch": 2.36, "grad_norm": 0.6937329173088074, "learning_rate": 6.545474468216061e-05, "loss": 1.7495, "step": 70848 }, { "epoch": 2.36, "grad_norm": 0.6898714303970337, "learning_rate": 6.544822785344865e-05, "loss": 1.6845, "step": 70849 }, { "epoch": 2.36, "grad_norm": 0.6793370842933655, "learning_rate": 6.5441711309448e-05, "loss": 1.7243, "step": 70850 }, { "epoch": 2.36, "grad_norm": 0.6757007241249084, "learning_rate": 6.543519505016646e-05, "loss": 1.7203, "step": 70851 }, { "epoch": 2.36, "grad_norm": 0.658592164516449, "learning_rate": 6.542867907561205e-05, "loss": 1.6934, "step": 70852 }, { "epoch": 2.36, "grad_norm": 0.6875184774398804, "learning_rate": 6.542216338579282e-05, "loss": 1.6777, "step": 70853 }, { "epoch": 2.36, "grad_norm": 0.6974737644195557, "learning_rate": 6.541564798071634e-05, "loss": 1.7055, "step": 70854 }, { "epoch": 2.36, "grad_norm": 0.6961257457733154, "learning_rate": 6.540913286039074e-05, "loss": 1.6439, "step": 70855 }, { "epoch": 2.36, "grad_norm": 0.6730908155441284, "learning_rate": 6.540261802482391e-05, "loss": 1.6706, "step": 70856 }, { "epoch": 2.36, "grad_norm": 0.6630436182022095, "learning_rate": 6.539610347402378e-05, "loss": 1.684, "step": 70857 }, { "epoch": 2.36, "grad_norm": 0.6841787099838257, "learning_rate": 6.53895892079981e-05, "loss": 1.6817, "step": 70858 }, { "epoch": 2.36, "grad_norm": 0.6606841087341309, "learning_rate": 6.538307522675493e-05, "loss": 1.6213, "step": 70859 }, { "epoch": 2.36, "grad_norm": 0.660871684551239, "learning_rate": 6.537656153030226e-05, "loss": 1.6491, "step": 70860 }, { "epoch": 2.36, "grad_norm": 0.6495400667190552, "learning_rate": 6.537004811864772e-05, "loss": 1.7006, "step": 70861 }, { "epoch": 2.36, "grad_norm": 0.6882480978965759, "learning_rate": 6.53635349917994e-05, "loss": 1.6788, "step": 70862 }, { "epoch": 2.36, "grad_norm": 0.6835237741470337, "learning_rate": 6.535702214976522e-05, "loss": 1.6649, "step": 70863 }, { "epoch": 2.36, "grad_norm": 0.6775705814361572, "learning_rate": 6.535050959255308e-05, "loss": 1.6861, "step": 70864 }, { "epoch": 2.36, "grad_norm": 0.6707541942596436, "learning_rate": 6.534399732017073e-05, "loss": 1.7406, "step": 70865 }, { "epoch": 2.36, "grad_norm": 0.6852197647094727, "learning_rate": 6.533748533262622e-05, "loss": 1.6499, "step": 70866 }, { "epoch": 2.36, "grad_norm": 0.675134539604187, "learning_rate": 6.533097362992758e-05, "loss": 1.6227, "step": 70867 }, { "epoch": 2.36, "grad_norm": 0.6951914429664612, "learning_rate": 6.53244622120824e-05, "loss": 1.7292, "step": 70868 }, { "epoch": 2.36, "grad_norm": 0.7196072936058044, "learning_rate": 6.531795107909872e-05, "loss": 1.6635, "step": 70869 }, { "epoch": 2.36, "grad_norm": 0.684107780456543, "learning_rate": 6.531144023098457e-05, "loss": 1.6117, "step": 70870 }, { "epoch": 2.36, "grad_norm": 0.6651514172554016, "learning_rate": 6.530492966774777e-05, "loss": 1.6622, "step": 70871 }, { "epoch": 2.36, "grad_norm": 0.7013066411018372, "learning_rate": 6.529841938939609e-05, "loss": 1.7413, "step": 70872 }, { "epoch": 2.36, "grad_norm": 0.700232207775116, "learning_rate": 6.529190939593766e-05, "loss": 1.7016, "step": 70873 }, { "epoch": 2.36, "grad_norm": 0.6900221109390259, "learning_rate": 6.528539968738026e-05, "loss": 1.6851, "step": 70874 }, { "epoch": 2.36, "grad_norm": 0.6657750010490417, "learning_rate": 6.527889026373169e-05, "loss": 1.7257, "step": 70875 }, { "epoch": 2.36, "grad_norm": 0.6890202164649963, "learning_rate": 6.5272381125e-05, "loss": 1.6855, "step": 70876 }, { "epoch": 2.36, "grad_norm": 0.6706585884094238, "learning_rate": 6.526587227119311e-05, "loss": 1.7086, "step": 70877 }, { "epoch": 2.36, "grad_norm": 0.6686874628067017, "learning_rate": 6.525936370231889e-05, "loss": 1.7136, "step": 70878 }, { "epoch": 2.36, "grad_norm": 0.6653090715408325, "learning_rate": 6.525285541838511e-05, "loss": 1.7787, "step": 70879 }, { "epoch": 2.36, "grad_norm": 0.6934044361114502, "learning_rate": 6.524634741939985e-05, "loss": 1.6935, "step": 70880 }, { "epoch": 2.36, "grad_norm": 0.6662282347679138, "learning_rate": 6.523983970537097e-05, "loss": 1.7496, "step": 70881 }, { "epoch": 2.36, "grad_norm": 0.6857186555862427, "learning_rate": 6.523333227630623e-05, "loss": 1.7499, "step": 70882 }, { "epoch": 2.36, "grad_norm": 0.7264041304588318, "learning_rate": 6.522682513221363e-05, "loss": 1.7504, "step": 70883 }, { "epoch": 2.36, "grad_norm": 0.6710600852966309, "learning_rate": 6.522031827310118e-05, "loss": 1.6771, "step": 70884 }, { "epoch": 2.36, "grad_norm": 0.6784122586250305, "learning_rate": 6.521381169897666e-05, "loss": 1.7103, "step": 70885 }, { "epoch": 2.36, "grad_norm": 0.6856346726417542, "learning_rate": 6.520730540984787e-05, "loss": 1.7045, "step": 70886 }, { "epoch": 2.36, "grad_norm": 0.6619009375572205, "learning_rate": 6.520079940572293e-05, "loss": 1.7335, "step": 70887 }, { "epoch": 2.36, "grad_norm": 0.6993603706359863, "learning_rate": 6.51942936866096e-05, "loss": 1.7397, "step": 70888 }, { "epoch": 2.36, "grad_norm": 0.6493710875511169, "learning_rate": 6.518778825251575e-05, "loss": 1.697, "step": 70889 }, { "epoch": 2.36, "grad_norm": 0.6598383188247681, "learning_rate": 6.518128310344937e-05, "loss": 1.663, "step": 70890 }, { "epoch": 2.36, "grad_norm": 0.6778459548950195, "learning_rate": 6.517477823941824e-05, "loss": 1.7192, "step": 70891 }, { "epoch": 2.36, "grad_norm": 0.669301450252533, "learning_rate": 6.516827366043041e-05, "loss": 1.7026, "step": 70892 }, { "epoch": 2.36, "grad_norm": 0.6685788035392761, "learning_rate": 6.516176936649363e-05, "loss": 1.7044, "step": 70893 }, { "epoch": 2.36, "grad_norm": 0.6618372201919556, "learning_rate": 6.515526535761596e-05, "loss": 1.7559, "step": 70894 }, { "epoch": 2.36, "grad_norm": 0.6464790105819702, "learning_rate": 6.514876163380516e-05, "loss": 1.7003, "step": 70895 }, { "epoch": 2.36, "grad_norm": 0.6799013614654541, "learning_rate": 6.514225819506909e-05, "loss": 1.6649, "step": 70896 }, { "epoch": 2.36, "grad_norm": 0.6974552273750305, "learning_rate": 6.51357550414158e-05, "loss": 1.6616, "step": 70897 }, { "epoch": 2.36, "grad_norm": 0.6891523599624634, "learning_rate": 6.512925217285301e-05, "loss": 1.6607, "step": 70898 }, { "epoch": 2.36, "grad_norm": 0.6768320798873901, "learning_rate": 6.512274958938879e-05, "loss": 1.7268, "step": 70899 }, { "epoch": 2.36, "grad_norm": 0.6589791178703308, "learning_rate": 6.51162472910309e-05, "loss": 1.6335, "step": 70900 }, { "epoch": 2.36, "grad_norm": 0.675128161907196, "learning_rate": 6.510974527778727e-05, "loss": 1.677, "step": 70901 }, { "epoch": 2.36, "grad_norm": 0.6904438138008118, "learning_rate": 6.510324354966584e-05, "loss": 1.783, "step": 70902 }, { "epoch": 2.36, "grad_norm": 0.6637989282608032, "learning_rate": 6.509674210667437e-05, "loss": 1.6231, "step": 70903 }, { "epoch": 2.36, "grad_norm": 0.6715393662452698, "learning_rate": 6.509024094882094e-05, "loss": 1.7124, "step": 70904 }, { "epoch": 2.36, "grad_norm": 0.7497840523719788, "learning_rate": 6.508374007611327e-05, "loss": 1.7572, "step": 70905 }, { "epoch": 2.36, "grad_norm": 0.6414051055908203, "learning_rate": 6.507723948855942e-05, "loss": 1.6594, "step": 70906 }, { "epoch": 2.36, "grad_norm": 0.7291562557220459, "learning_rate": 6.507073918616716e-05, "loss": 1.7038, "step": 70907 }, { "epoch": 2.36, "grad_norm": 0.6944060921669006, "learning_rate": 6.506423916894433e-05, "loss": 1.7469, "step": 70908 }, { "epoch": 2.36, "grad_norm": 0.6610246300697327, "learning_rate": 6.505773943689898e-05, "loss": 1.68, "step": 70909 }, { "epoch": 2.36, "grad_norm": 0.7045897245407104, "learning_rate": 6.505123999003891e-05, "loss": 1.7704, "step": 70910 }, { "epoch": 2.36, "grad_norm": 0.68778395652771, "learning_rate": 6.504474082837196e-05, "loss": 1.6461, "step": 70911 }, { "epoch": 2.36, "grad_norm": 0.6615793704986572, "learning_rate": 6.503824195190606e-05, "loss": 1.7287, "step": 70912 }, { "epoch": 2.36, "grad_norm": 0.6714078187942505, "learning_rate": 6.503174336064921e-05, "loss": 1.6029, "step": 70913 }, { "epoch": 2.36, "grad_norm": 0.7055664658546448, "learning_rate": 6.502524505460917e-05, "loss": 1.669, "step": 70914 }, { "epoch": 2.36, "grad_norm": 0.688591480255127, "learning_rate": 6.50187470337938e-05, "loss": 1.6802, "step": 70915 }, { "epoch": 2.36, "grad_norm": 0.7076578736305237, "learning_rate": 6.501224929821111e-05, "loss": 1.7668, "step": 70916 }, { "epoch": 2.36, "grad_norm": 0.6711744666099548, "learning_rate": 6.500575184786896e-05, "loss": 1.5714, "step": 70917 }, { "epoch": 2.36, "grad_norm": 0.6892172694206238, "learning_rate": 6.499925468277513e-05, "loss": 1.7043, "step": 70918 }, { "epoch": 2.36, "grad_norm": 0.6765839457511902, "learning_rate": 6.499275780293755e-05, "loss": 1.6618, "step": 70919 }, { "epoch": 2.36, "grad_norm": 0.6650390625, "learning_rate": 6.498626120836428e-05, "loss": 1.6749, "step": 70920 }, { "epoch": 2.36, "grad_norm": 0.6842740178108215, "learning_rate": 6.497976489906291e-05, "loss": 1.6744, "step": 70921 }, { "epoch": 2.36, "grad_norm": 0.6968966722488403, "learning_rate": 6.497326887504152e-05, "loss": 1.6739, "step": 70922 }, { "epoch": 2.36, "grad_norm": 0.6561848521232605, "learning_rate": 6.496677313630799e-05, "loss": 1.7434, "step": 70923 }, { "epoch": 2.36, "grad_norm": 0.683432936668396, "learning_rate": 6.496027768287016e-05, "loss": 1.7184, "step": 70924 }, { "epoch": 2.36, "grad_norm": 0.7173377871513367, "learning_rate": 6.495378251473587e-05, "loss": 1.7218, "step": 70925 }, { "epoch": 2.36, "grad_norm": 0.7017505168914795, "learning_rate": 6.494728763191304e-05, "loss": 1.732, "step": 70926 }, { "epoch": 2.36, "grad_norm": 0.6712237000465393, "learning_rate": 6.494079303440975e-05, "loss": 1.7266, "step": 70927 }, { "epoch": 2.36, "grad_norm": 0.7144883871078491, "learning_rate": 6.493429872223351e-05, "loss": 1.6564, "step": 70928 }, { "epoch": 2.36, "grad_norm": 0.6774385571479797, "learning_rate": 6.49278046953924e-05, "loss": 1.6739, "step": 70929 }, { "epoch": 2.36, "grad_norm": 0.6846566796302795, "learning_rate": 6.492131095389441e-05, "loss": 1.7018, "step": 70930 }, { "epoch": 2.36, "grad_norm": 0.6690402626991272, "learning_rate": 6.49148174977473e-05, "loss": 1.6211, "step": 70931 }, { "epoch": 2.36, "grad_norm": 0.7008204460144043, "learning_rate": 6.490832432695888e-05, "loss": 1.7147, "step": 70932 }, { "epoch": 2.36, "grad_norm": 0.6789502501487732, "learning_rate": 6.49018314415371e-05, "loss": 1.7274, "step": 70933 }, { "epoch": 2.36, "grad_norm": 0.6983966827392578, "learning_rate": 6.489533884149004e-05, "loss": 1.7594, "step": 70934 }, { "epoch": 2.36, "grad_norm": 0.709088921546936, "learning_rate": 6.488884652682523e-05, "loss": 1.7967, "step": 70935 }, { "epoch": 2.36, "grad_norm": 0.6832635402679443, "learning_rate": 6.48823544975507e-05, "loss": 1.6555, "step": 70936 }, { "epoch": 2.36, "grad_norm": 0.6916623115539551, "learning_rate": 6.487586275367444e-05, "loss": 1.7063, "step": 70937 }, { "epoch": 2.36, "grad_norm": 0.6788966655731201, "learning_rate": 6.486937129520425e-05, "loss": 1.7069, "step": 70938 }, { "epoch": 2.36, "grad_norm": 0.660315990447998, "learning_rate": 6.486288012214792e-05, "loss": 1.6885, "step": 70939 }, { "epoch": 2.36, "grad_norm": 0.6843134164810181, "learning_rate": 6.485638923451347e-05, "loss": 1.6529, "step": 70940 }, { "epoch": 2.36, "grad_norm": 0.6650189161300659, "learning_rate": 6.48498986323087e-05, "loss": 1.708, "step": 70941 }, { "epoch": 2.36, "grad_norm": 0.6763247847557068, "learning_rate": 6.484340831554148e-05, "loss": 1.7487, "step": 70942 }, { "epoch": 2.36, "grad_norm": 0.6620909571647644, "learning_rate": 6.483691828421965e-05, "loss": 1.6427, "step": 70943 }, { "epoch": 2.36, "grad_norm": 0.6861982345581055, "learning_rate": 6.483042853835129e-05, "loss": 1.6479, "step": 70944 }, { "epoch": 2.36, "grad_norm": 0.6740814447402954, "learning_rate": 6.482393907794411e-05, "loss": 1.7149, "step": 70945 }, { "epoch": 2.36, "grad_norm": 0.680614709854126, "learning_rate": 6.481744990300595e-05, "loss": 1.6735, "step": 70946 }, { "epoch": 2.36, "grad_norm": 0.6874133348464966, "learning_rate": 6.481096101354482e-05, "loss": 1.706, "step": 70947 }, { "epoch": 2.36, "grad_norm": 0.664188802242279, "learning_rate": 6.480447240956855e-05, "loss": 1.7032, "step": 70948 }, { "epoch": 2.36, "grad_norm": 0.7191332578659058, "learning_rate": 6.479798409108489e-05, "loss": 1.7374, "step": 70949 }, { "epoch": 2.36, "grad_norm": 0.6858954429626465, "learning_rate": 6.479149605810189e-05, "loss": 1.6834, "step": 70950 }, { "epoch": 2.36, "grad_norm": 0.6679924130439758, "learning_rate": 6.47850083106273e-05, "loss": 1.6956, "step": 70951 }, { "epoch": 2.36, "grad_norm": 0.6799892783164978, "learning_rate": 6.477852084866913e-05, "loss": 1.6836, "step": 70952 }, { "epoch": 2.36, "grad_norm": 0.674003005027771, "learning_rate": 6.477203367223511e-05, "loss": 1.6614, "step": 70953 }, { "epoch": 2.36, "grad_norm": 0.6788150668144226, "learning_rate": 6.476554678133323e-05, "loss": 1.6781, "step": 70954 }, { "epoch": 2.36, "grad_norm": 0.6752691864967346, "learning_rate": 6.475906017597135e-05, "loss": 1.6867, "step": 70955 }, { "epoch": 2.36, "grad_norm": 0.6845225095748901, "learning_rate": 6.475257385615723e-05, "loss": 1.7745, "step": 70956 }, { "epoch": 2.36, "grad_norm": 0.6814644932746887, "learning_rate": 6.474608782189889e-05, "loss": 1.6879, "step": 70957 }, { "epoch": 2.36, "grad_norm": 0.6913136839866638, "learning_rate": 6.473960207320404e-05, "loss": 1.7343, "step": 70958 }, { "epoch": 2.36, "grad_norm": 0.6691348552703857, "learning_rate": 6.473311661008074e-05, "loss": 1.6898, "step": 70959 }, { "epoch": 2.36, "grad_norm": 0.6635196208953857, "learning_rate": 6.47266314325368e-05, "loss": 1.6435, "step": 70960 }, { "epoch": 2.36, "grad_norm": 0.6867383718490601, "learning_rate": 6.472014654057993e-05, "loss": 1.7721, "step": 70961 }, { "epoch": 2.36, "grad_norm": 0.6976369023323059, "learning_rate": 6.471366193421819e-05, "loss": 1.7249, "step": 70962 }, { "epoch": 2.36, "grad_norm": 0.6954238414764404, "learning_rate": 6.470717761345945e-05, "loss": 1.6684, "step": 70963 }, { "epoch": 2.36, "grad_norm": 0.6621844172477722, "learning_rate": 6.470069357831151e-05, "loss": 1.6316, "step": 70964 }, { "epoch": 2.36, "grad_norm": 0.675171434879303, "learning_rate": 6.469420982878221e-05, "loss": 1.6986, "step": 70965 }, { "epoch": 2.36, "grad_norm": 0.6632347106933594, "learning_rate": 6.468772636487953e-05, "loss": 1.6376, "step": 70966 }, { "epoch": 2.36, "grad_norm": 0.7067388892173767, "learning_rate": 6.468124318661128e-05, "loss": 1.7019, "step": 70967 }, { "epoch": 2.36, "grad_norm": 0.6638220548629761, "learning_rate": 6.467476029398526e-05, "loss": 1.6911, "step": 70968 }, { "epoch": 2.36, "grad_norm": 0.6772392988204956, "learning_rate": 6.466827768700941e-05, "loss": 1.7215, "step": 70969 }, { "epoch": 2.36, "grad_norm": 0.6990489363670349, "learning_rate": 6.466179536569175e-05, "loss": 1.7646, "step": 70970 }, { "epoch": 2.36, "grad_norm": 0.6672965884208679, "learning_rate": 6.465531333003982e-05, "loss": 1.6646, "step": 70971 }, { "epoch": 2.36, "grad_norm": 0.6745613217353821, "learning_rate": 6.464883158006167e-05, "loss": 1.7538, "step": 70972 }, { "epoch": 2.36, "grad_norm": 0.664830207824707, "learning_rate": 6.464235011576526e-05, "loss": 1.6318, "step": 70973 }, { "epoch": 2.36, "grad_norm": 0.6866607069969177, "learning_rate": 6.463586893715833e-05, "loss": 1.6723, "step": 70974 }, { "epoch": 2.36, "grad_norm": 0.6737415194511414, "learning_rate": 6.462938804424873e-05, "loss": 1.6644, "step": 70975 }, { "epoch": 2.36, "grad_norm": 0.6974294185638428, "learning_rate": 6.462290743704436e-05, "loss": 1.704, "step": 70976 }, { "epoch": 2.36, "grad_norm": 0.6827050447463989, "learning_rate": 6.461642711555323e-05, "loss": 1.6697, "step": 70977 }, { "epoch": 2.36, "grad_norm": 0.6823550462722778, "learning_rate": 6.460994707978297e-05, "loss": 1.665, "step": 70978 }, { "epoch": 2.36, "grad_norm": 0.6887650489807129, "learning_rate": 6.460346732974149e-05, "loss": 1.6985, "step": 70979 }, { "epoch": 2.36, "grad_norm": 0.6954146027565002, "learning_rate": 6.459698786543683e-05, "loss": 1.6696, "step": 70980 }, { "epoch": 2.36, "grad_norm": 0.6973555684089661, "learning_rate": 6.459050868687674e-05, "loss": 1.6267, "step": 70981 }, { "epoch": 2.36, "grad_norm": 0.6746311187744141, "learning_rate": 6.458402979406902e-05, "loss": 1.7235, "step": 70982 }, { "epoch": 2.36, "grad_norm": 0.7509378790855408, "learning_rate": 6.457755118702158e-05, "loss": 1.7985, "step": 70983 }, { "epoch": 2.36, "grad_norm": 0.7226863503456116, "learning_rate": 6.457107286574245e-05, "loss": 1.7348, "step": 70984 }, { "epoch": 2.36, "grad_norm": 0.7051160931587219, "learning_rate": 6.45645948302392e-05, "loss": 1.7366, "step": 70985 }, { "epoch": 2.36, "grad_norm": 0.6863049268722534, "learning_rate": 6.455811708051988e-05, "loss": 1.6835, "step": 70986 }, { "epoch": 2.36, "grad_norm": 0.6691994071006775, "learning_rate": 6.455163961659234e-05, "loss": 1.6993, "step": 70987 }, { "epoch": 2.36, "grad_norm": 0.663628339767456, "learning_rate": 6.454516243846445e-05, "loss": 1.7074, "step": 70988 }, { "epoch": 2.36, "grad_norm": 0.662999153137207, "learning_rate": 6.453868554614395e-05, "loss": 1.6848, "step": 70989 }, { "epoch": 2.36, "grad_norm": 0.6957917809486389, "learning_rate": 6.453220893963889e-05, "loss": 1.6873, "step": 70990 }, { "epoch": 2.36, "grad_norm": 0.6925169825553894, "learning_rate": 6.452573261895702e-05, "loss": 1.7345, "step": 70991 }, { "epoch": 2.36, "grad_norm": 0.7012401223182678, "learning_rate": 6.451925658410613e-05, "loss": 1.8222, "step": 70992 }, { "epoch": 2.36, "grad_norm": 0.7023640275001526, "learning_rate": 6.451278083509418e-05, "loss": 1.6591, "step": 70993 }, { "epoch": 2.36, "grad_norm": 0.6831458806991577, "learning_rate": 6.450630537192907e-05, "loss": 1.7214, "step": 70994 }, { "epoch": 2.36, "grad_norm": 0.6659188866615295, "learning_rate": 6.449983019461862e-05, "loss": 1.6573, "step": 70995 }, { "epoch": 2.36, "grad_norm": 0.6974627375602722, "learning_rate": 6.449335530317063e-05, "loss": 1.6941, "step": 70996 }, { "epoch": 2.36, "grad_norm": 0.6886827349662781, "learning_rate": 6.448688069759305e-05, "loss": 1.6681, "step": 70997 }, { "epoch": 2.36, "grad_norm": 0.6688555479049683, "learning_rate": 6.448040637789369e-05, "loss": 1.6643, "step": 70998 }, { "epoch": 2.36, "grad_norm": 0.6589800715446472, "learning_rate": 6.447393234408035e-05, "loss": 1.6237, "step": 70999 }, { "epoch": 2.36, "grad_norm": 0.668923020362854, "learning_rate": 6.446745859616094e-05, "loss": 1.6931, "step": 71000 }, { "epoch": 2.36, "grad_norm": 0.7051671743392944, "learning_rate": 6.446098513414345e-05, "loss": 1.7203, "step": 71001 }, { "epoch": 2.36, "grad_norm": 0.6903584003448486, "learning_rate": 6.445451195803558e-05, "loss": 1.6438, "step": 71002 }, { "epoch": 2.36, "grad_norm": 0.6910995841026306, "learning_rate": 6.444803906784517e-05, "loss": 1.6436, "step": 71003 }, { "epoch": 2.36, "grad_norm": 0.6869896650314331, "learning_rate": 6.444156646358017e-05, "loss": 1.6867, "step": 71004 }, { "epoch": 2.36, "grad_norm": 0.6603415012359619, "learning_rate": 6.443509414524846e-05, "loss": 1.7266, "step": 71005 }, { "epoch": 2.36, "grad_norm": 0.6698656678199768, "learning_rate": 6.44286221128577e-05, "loss": 1.6772, "step": 71006 }, { "epoch": 2.36, "grad_norm": 0.6778311729431152, "learning_rate": 6.442215036641603e-05, "loss": 1.7006, "step": 71007 }, { "epoch": 2.36, "grad_norm": 0.6510865092277527, "learning_rate": 6.441567890593104e-05, "loss": 1.6011, "step": 71008 }, { "epoch": 2.36, "grad_norm": 0.6621429920196533, "learning_rate": 6.440920773141077e-05, "loss": 1.6487, "step": 71009 }, { "epoch": 2.36, "grad_norm": 0.6842737793922424, "learning_rate": 6.440273684286294e-05, "loss": 1.6474, "step": 71010 }, { "epoch": 2.36, "grad_norm": 0.7239274382591248, "learning_rate": 6.439626624029554e-05, "loss": 1.774, "step": 71011 }, { "epoch": 2.36, "grad_norm": 0.6894868016242981, "learning_rate": 6.438979592371639e-05, "loss": 1.6594, "step": 71012 }, { "epoch": 2.36, "grad_norm": 0.6637936234474182, "learning_rate": 6.438332589313324e-05, "loss": 1.6241, "step": 71013 }, { "epoch": 2.36, "grad_norm": 0.6795846223831177, "learning_rate": 6.437685614855405e-05, "loss": 1.6282, "step": 71014 }, { "epoch": 2.36, "grad_norm": 0.711950421333313, "learning_rate": 6.43703866899866e-05, "loss": 1.73, "step": 71015 }, { "epoch": 2.36, "grad_norm": 0.6703311800956726, "learning_rate": 6.436391751743881e-05, "loss": 1.6621, "step": 71016 }, { "epoch": 2.36, "grad_norm": 0.6848331093788147, "learning_rate": 6.435744863091857e-05, "loss": 1.664, "step": 71017 }, { "epoch": 2.36, "grad_norm": 0.6999494433403015, "learning_rate": 6.435098003043353e-05, "loss": 1.7279, "step": 71018 }, { "epoch": 2.36, "grad_norm": 0.6734522581100464, "learning_rate": 6.434451171599181e-05, "loss": 1.6978, "step": 71019 }, { "epoch": 2.36, "grad_norm": 0.6783169507980347, "learning_rate": 6.433804368760103e-05, "loss": 1.6902, "step": 71020 }, { "epoch": 2.36, "grad_norm": 0.6710871458053589, "learning_rate": 6.433157594526922e-05, "loss": 1.6911, "step": 71021 }, { "epoch": 2.36, "grad_norm": 0.689202606678009, "learning_rate": 6.432510848900405e-05, "loss": 1.6621, "step": 71022 }, { "epoch": 2.36, "grad_norm": 0.6776507496833801, "learning_rate": 6.431864131881358e-05, "loss": 1.7208, "step": 71023 }, { "epoch": 2.36, "grad_norm": 0.6753641963005066, "learning_rate": 6.431217443470553e-05, "loss": 1.7228, "step": 71024 }, { "epoch": 2.36, "grad_norm": 0.684501051902771, "learning_rate": 6.430570783668773e-05, "loss": 1.6692, "step": 71025 }, { "epoch": 2.36, "grad_norm": 0.666841447353363, "learning_rate": 6.429924152476812e-05, "loss": 1.6432, "step": 71026 }, { "epoch": 2.36, "grad_norm": 0.6948508620262146, "learning_rate": 6.429277549895454e-05, "loss": 1.6584, "step": 71027 }, { "epoch": 2.36, "grad_norm": 0.6927906274795532, "learning_rate": 6.428630975925469e-05, "loss": 1.6595, "step": 71028 }, { "epoch": 2.36, "grad_norm": 0.6886269450187683, "learning_rate": 6.42798443056765e-05, "loss": 1.7286, "step": 71029 }, { "epoch": 2.36, "grad_norm": 0.6661954522132874, "learning_rate": 6.427337913822797e-05, "loss": 1.711, "step": 71030 }, { "epoch": 2.36, "grad_norm": 0.6843231916427612, "learning_rate": 6.426691425691682e-05, "loss": 1.6925, "step": 71031 }, { "epoch": 2.36, "grad_norm": 0.6903651356697083, "learning_rate": 6.426044966175084e-05, "loss": 1.6856, "step": 71032 }, { "epoch": 2.36, "grad_norm": 0.6732891201972961, "learning_rate": 6.4253985352738e-05, "loss": 1.7251, "step": 71033 }, { "epoch": 2.36, "grad_norm": 0.6906391978263855, "learning_rate": 6.424752132988609e-05, "loss": 1.7481, "step": 71034 }, { "epoch": 2.36, "grad_norm": 0.6887584328651428, "learning_rate": 6.424105759320285e-05, "loss": 1.6922, "step": 71035 }, { "epoch": 2.36, "grad_norm": 0.6801353096961975, "learning_rate": 6.423459414269625e-05, "loss": 1.7478, "step": 71036 }, { "epoch": 2.36, "grad_norm": 0.683141827583313, "learning_rate": 6.422813097837429e-05, "loss": 1.7778, "step": 71037 }, { "epoch": 2.36, "grad_norm": 0.6740371584892273, "learning_rate": 6.422166810024445e-05, "loss": 1.6821, "step": 71038 }, { "epoch": 2.36, "grad_norm": 0.6739537119865417, "learning_rate": 6.421520550831479e-05, "loss": 1.651, "step": 71039 }, { "epoch": 2.36, "grad_norm": 0.653562605381012, "learning_rate": 6.420874320259317e-05, "loss": 1.6627, "step": 71040 }, { "epoch": 2.36, "grad_norm": 0.6716279983520508, "learning_rate": 6.420228118308741e-05, "loss": 1.7502, "step": 71041 }, { "epoch": 2.36, "grad_norm": 0.6721587181091309, "learning_rate": 6.41958194498053e-05, "loss": 1.7046, "step": 71042 }, { "epoch": 2.36, "grad_norm": 0.6910228729248047, "learning_rate": 6.418935800275466e-05, "loss": 1.6715, "step": 71043 }, { "epoch": 2.36, "grad_norm": 0.6955382227897644, "learning_rate": 6.418289684194358e-05, "loss": 1.7122, "step": 71044 }, { "epoch": 2.36, "grad_norm": 0.6609858870506287, "learning_rate": 6.417643596737956e-05, "loss": 1.6944, "step": 71045 }, { "epoch": 2.36, "grad_norm": 0.6810064315795898, "learning_rate": 6.416997537907059e-05, "loss": 1.7082, "step": 71046 }, { "epoch": 2.36, "grad_norm": 0.6772581338882446, "learning_rate": 6.416351507702464e-05, "loss": 1.7034, "step": 71047 }, { "epoch": 2.36, "grad_norm": 0.6894906163215637, "learning_rate": 6.41570550612494e-05, "loss": 1.7072, "step": 71048 }, { "epoch": 2.36, "grad_norm": 0.7017652988433838, "learning_rate": 6.415059533175265e-05, "loss": 1.6587, "step": 71049 }, { "epoch": 2.36, "grad_norm": 0.673968493938446, "learning_rate": 6.414413588854236e-05, "loss": 1.7689, "step": 71050 }, { "epoch": 2.36, "grad_norm": 0.6537520885467529, "learning_rate": 6.413767673162648e-05, "loss": 1.6658, "step": 71051 }, { "epoch": 2.36, "grad_norm": 0.6769891381263733, "learning_rate": 6.413121786101257e-05, "loss": 1.6782, "step": 71052 }, { "epoch": 2.36, "grad_norm": 0.6920233368873596, "learning_rate": 6.412475927670856e-05, "loss": 1.7005, "step": 71053 }, { "epoch": 2.36, "grad_norm": 0.6894964575767517, "learning_rate": 6.411830097872245e-05, "loss": 1.6702, "step": 71054 }, { "epoch": 2.36, "grad_norm": 0.6789833307266235, "learning_rate": 6.411184296706195e-05, "loss": 1.759, "step": 71055 }, { "epoch": 2.36, "grad_norm": 0.8802772164344788, "learning_rate": 6.410538524173487e-05, "loss": 1.6591, "step": 71056 }, { "epoch": 2.36, "grad_norm": 0.6731581687927246, "learning_rate": 6.409892780274915e-05, "loss": 1.6554, "step": 71057 }, { "epoch": 2.36, "grad_norm": 0.7009127736091614, "learning_rate": 6.409247065011259e-05, "loss": 1.7573, "step": 71058 }, { "epoch": 2.36, "grad_norm": 0.668834388256073, "learning_rate": 6.408601378383292e-05, "loss": 1.6972, "step": 71059 }, { "epoch": 2.36, "grad_norm": 0.7095414400100708, "learning_rate": 6.407955720391806e-05, "loss": 1.7053, "step": 71060 }, { "epoch": 2.36, "grad_norm": 0.6780463457107544, "learning_rate": 6.407310091037594e-05, "loss": 1.7736, "step": 71061 }, { "epoch": 2.36, "grad_norm": 0.6664674282073975, "learning_rate": 6.40666449032143e-05, "loss": 1.6446, "step": 71062 }, { "epoch": 2.36, "grad_norm": 0.6622733473777771, "learning_rate": 6.406018918244095e-05, "loss": 1.7052, "step": 71063 }, { "epoch": 2.36, "grad_norm": 0.6660478711128235, "learning_rate": 6.405373374806383e-05, "loss": 1.6633, "step": 71064 }, { "epoch": 2.36, "grad_norm": 0.6662007570266724, "learning_rate": 6.404727860009072e-05, "loss": 1.6084, "step": 71065 }, { "epoch": 2.36, "grad_norm": 0.6602208018302917, "learning_rate": 6.404082373852932e-05, "loss": 1.7317, "step": 71066 }, { "epoch": 2.36, "grad_norm": 0.6843432188034058, "learning_rate": 6.403436916338772e-05, "loss": 1.6949, "step": 71067 }, { "epoch": 2.36, "grad_norm": 0.6866182088851929, "learning_rate": 6.402791487467354e-05, "loss": 1.7941, "step": 71068 }, { "epoch": 2.36, "grad_norm": 0.691434919834137, "learning_rate": 6.402146087239476e-05, "loss": 1.6258, "step": 71069 }, { "epoch": 2.36, "grad_norm": 0.67960125207901, "learning_rate": 6.401500715655912e-05, "loss": 1.6883, "step": 71070 }, { "epoch": 2.36, "grad_norm": 0.6833994388580322, "learning_rate": 6.400855372717453e-05, "loss": 1.7454, "step": 71071 }, { "epoch": 2.36, "grad_norm": 0.6735154390335083, "learning_rate": 6.400210058424878e-05, "loss": 1.6968, "step": 71072 }, { "epoch": 2.36, "grad_norm": 0.6579906344413757, "learning_rate": 6.399564772778964e-05, "loss": 1.6677, "step": 71073 }, { "epoch": 2.36, "grad_norm": 0.6913429498672485, "learning_rate": 6.398919515780512e-05, "loss": 1.6277, "step": 71074 }, { "epoch": 2.36, "grad_norm": 0.6787445545196533, "learning_rate": 6.398274287430283e-05, "loss": 1.6441, "step": 71075 }, { "epoch": 2.36, "grad_norm": 1.9116231203079224, "learning_rate": 6.39762908772908e-05, "loss": 1.6819, "step": 71076 }, { "epoch": 2.36, "grad_norm": 0.6863624453544617, "learning_rate": 6.396983916677681e-05, "loss": 1.7214, "step": 71077 }, { "epoch": 2.36, "grad_norm": 0.6773795485496521, "learning_rate": 6.396338774276853e-05, "loss": 1.6939, "step": 71078 }, { "epoch": 2.36, "grad_norm": 0.6799852848052979, "learning_rate": 6.395693660527392e-05, "loss": 1.6624, "step": 71079 }, { "epoch": 2.36, "grad_norm": 0.6954818964004517, "learning_rate": 6.395048575430093e-05, "loss": 1.763, "step": 71080 }, { "epoch": 2.36, "grad_norm": 0.6885209083557129, "learning_rate": 6.394403518985724e-05, "loss": 1.7152, "step": 71081 }, { "epoch": 2.36, "grad_norm": 0.6884462237358093, "learning_rate": 6.393758491195063e-05, "loss": 1.6775, "step": 71082 }, { "epoch": 2.36, "grad_norm": 0.6638272404670715, "learning_rate": 6.393113492058909e-05, "loss": 1.677, "step": 71083 }, { "epoch": 2.36, "grad_norm": 0.672241747379303, "learning_rate": 6.392468521578039e-05, "loss": 1.7568, "step": 71084 }, { "epoch": 2.37, "grad_norm": 0.6984990835189819, "learning_rate": 6.391823579753224e-05, "loss": 1.6761, "step": 71085 }, { "epoch": 2.37, "grad_norm": 0.718172013759613, "learning_rate": 6.391178666585255e-05, "loss": 1.7428, "step": 71086 }, { "epoch": 2.37, "grad_norm": 0.6825510263442993, "learning_rate": 6.390533782074935e-05, "loss": 1.7185, "step": 71087 }, { "epoch": 2.37, "grad_norm": 0.6595200300216675, "learning_rate": 6.389888926223013e-05, "loss": 1.7236, "step": 71088 }, { "epoch": 2.37, "grad_norm": 0.6825301051139832, "learning_rate": 6.389244099030285e-05, "loss": 1.7218, "step": 71089 }, { "epoch": 2.37, "grad_norm": 0.6767314076423645, "learning_rate": 6.388599300497544e-05, "loss": 1.6112, "step": 71090 }, { "epoch": 2.37, "grad_norm": 0.6512355208396912, "learning_rate": 6.387954530625567e-05, "loss": 1.6579, "step": 71091 }, { "epoch": 2.37, "grad_norm": 0.6672871708869934, "learning_rate": 6.387309789415123e-05, "loss": 1.6534, "step": 71092 }, { "epoch": 2.37, "grad_norm": 0.6660187244415283, "learning_rate": 6.386665076867009e-05, "loss": 1.6765, "step": 71093 }, { "epoch": 2.37, "grad_norm": 0.6700853109359741, "learning_rate": 6.386020392982015e-05, "loss": 1.6944, "step": 71094 }, { "epoch": 2.37, "grad_norm": 0.671424150466919, "learning_rate": 6.3853757377609e-05, "loss": 1.7143, "step": 71095 }, { "epoch": 2.37, "grad_norm": 0.7275594472885132, "learning_rate": 6.38473111120446e-05, "loss": 1.7131, "step": 71096 }, { "epoch": 2.37, "grad_norm": 0.6761154532432556, "learning_rate": 6.384086513313486e-05, "loss": 1.6635, "step": 71097 }, { "epoch": 2.37, "grad_norm": 0.6525746583938599, "learning_rate": 6.383441944088747e-05, "loss": 1.6797, "step": 71098 }, { "epoch": 2.37, "grad_norm": 0.6655209064483643, "learning_rate": 6.382797403531025e-05, "loss": 1.646, "step": 71099 }, { "epoch": 2.37, "grad_norm": 0.6811620593070984, "learning_rate": 6.382152891641107e-05, "loss": 1.6567, "step": 71100 }, { "epoch": 2.37, "grad_norm": 0.7075982093811035, "learning_rate": 6.381508408419794e-05, "loss": 1.7419, "step": 71101 }, { "epoch": 2.37, "grad_norm": 0.6734347343444824, "learning_rate": 6.380863953867829e-05, "loss": 1.6948, "step": 71102 }, { "epoch": 2.37, "grad_norm": 0.6835880279541016, "learning_rate": 6.380219527986016e-05, "loss": 1.7012, "step": 71103 }, { "epoch": 2.37, "grad_norm": 0.6861925721168518, "learning_rate": 6.379575130775148e-05, "loss": 1.658, "step": 71104 }, { "epoch": 2.37, "grad_norm": 0.6788316965103149, "learning_rate": 6.378930762235991e-05, "loss": 1.6871, "step": 71105 }, { "epoch": 2.37, "grad_norm": 0.6718581914901733, "learning_rate": 6.378286422369325e-05, "loss": 1.7158, "step": 71106 }, { "epoch": 2.37, "grad_norm": 0.6907307505607605, "learning_rate": 6.377642111175947e-05, "loss": 1.7274, "step": 71107 }, { "epoch": 2.37, "grad_norm": 0.6858506202697754, "learning_rate": 6.376997828656632e-05, "loss": 1.6427, "step": 71108 }, { "epoch": 2.37, "grad_norm": 0.6667365431785583, "learning_rate": 6.376353574812152e-05, "loss": 1.6721, "step": 71109 }, { "epoch": 2.37, "grad_norm": 0.6750615835189819, "learning_rate": 6.375709349643297e-05, "loss": 1.7737, "step": 71110 }, { "epoch": 2.37, "grad_norm": 0.6813092231750488, "learning_rate": 6.375065153150857e-05, "loss": 1.7067, "step": 71111 }, { "epoch": 2.37, "grad_norm": 0.6880699992179871, "learning_rate": 6.374420985335612e-05, "loss": 1.7332, "step": 71112 }, { "epoch": 2.37, "grad_norm": 0.6773402094841003, "learning_rate": 6.373776846198326e-05, "loss": 1.7454, "step": 71113 }, { "epoch": 2.37, "grad_norm": 0.6783040165901184, "learning_rate": 6.373132735739803e-05, "loss": 1.7526, "step": 71114 }, { "epoch": 2.37, "grad_norm": 0.6468865275382996, "learning_rate": 6.372488653960819e-05, "loss": 1.6958, "step": 71115 }, { "epoch": 2.37, "grad_norm": 0.6669132113456726, "learning_rate": 6.37184460086214e-05, "loss": 1.7067, "step": 71116 }, { "epoch": 2.37, "grad_norm": 0.6826682686805725, "learning_rate": 6.37120057644457e-05, "loss": 1.742, "step": 71117 }, { "epoch": 2.37, "grad_norm": 0.6869639754295349, "learning_rate": 6.370556580708872e-05, "loss": 1.746, "step": 71118 }, { "epoch": 2.37, "grad_norm": 0.6581038236618042, "learning_rate": 6.369912613655846e-05, "loss": 1.7015, "step": 71119 }, { "epoch": 2.37, "grad_norm": 0.6763637661933899, "learning_rate": 6.369268675286257e-05, "loss": 1.6698, "step": 71120 }, { "epoch": 2.37, "grad_norm": 0.6660858988761902, "learning_rate": 6.368624765600898e-05, "loss": 1.7417, "step": 71121 }, { "epoch": 2.37, "grad_norm": 0.6906229257583618, "learning_rate": 6.367980884600551e-05, "loss": 1.6989, "step": 71122 }, { "epoch": 2.37, "grad_norm": 0.6551165580749512, "learning_rate": 6.367337032285985e-05, "loss": 1.6895, "step": 71123 }, { "epoch": 2.37, "grad_norm": 0.6851291060447693, "learning_rate": 6.366693208657994e-05, "loss": 1.6094, "step": 71124 }, { "epoch": 2.37, "grad_norm": 0.6813949942588806, "learning_rate": 6.366049413717349e-05, "loss": 1.6495, "step": 71125 }, { "epoch": 2.37, "grad_norm": 0.6648778319358826, "learning_rate": 6.365405647464847e-05, "loss": 1.7094, "step": 71126 }, { "epoch": 2.37, "grad_norm": 0.6638083457946777, "learning_rate": 6.364761909901252e-05, "loss": 1.6338, "step": 71127 }, { "epoch": 2.37, "grad_norm": 0.6690003871917725, "learning_rate": 6.36411820102736e-05, "loss": 1.7348, "step": 71128 }, { "epoch": 2.37, "grad_norm": 0.6737115979194641, "learning_rate": 6.363474520843947e-05, "loss": 1.6934, "step": 71129 }, { "epoch": 2.37, "grad_norm": 0.6670787334442139, "learning_rate": 6.362830869351784e-05, "loss": 1.716, "step": 71130 }, { "epoch": 2.37, "grad_norm": 0.6767109632492065, "learning_rate": 6.362187246551672e-05, "loss": 1.6529, "step": 71131 }, { "epoch": 2.37, "grad_norm": 0.6836492419242859, "learning_rate": 6.361543652444371e-05, "loss": 1.5967, "step": 71132 }, { "epoch": 2.37, "grad_norm": 0.7044146060943604, "learning_rate": 6.360900087030683e-05, "loss": 1.78, "step": 71133 }, { "epoch": 2.37, "grad_norm": 0.670642614364624, "learning_rate": 6.36025655031138e-05, "loss": 1.8137, "step": 71134 }, { "epoch": 2.37, "grad_norm": 0.6838593482971191, "learning_rate": 6.359613042287232e-05, "loss": 1.6983, "step": 71135 }, { "epoch": 2.37, "grad_norm": 0.6648064255714417, "learning_rate": 6.35896956295904e-05, "loss": 1.7175, "step": 71136 }, { "epoch": 2.37, "grad_norm": 0.6634492874145508, "learning_rate": 6.358326112327568e-05, "loss": 1.6513, "step": 71137 }, { "epoch": 2.37, "grad_norm": 0.7003809809684753, "learning_rate": 6.357682690393611e-05, "loss": 1.7446, "step": 71138 }, { "epoch": 2.37, "grad_norm": 0.6723616123199463, "learning_rate": 6.357039297157937e-05, "loss": 1.6705, "step": 71139 }, { "epoch": 2.37, "grad_norm": 0.6558026671409607, "learning_rate": 6.356395932621341e-05, "loss": 1.676, "step": 71140 }, { "epoch": 2.37, "grad_norm": 0.6766956448554993, "learning_rate": 6.355752596784598e-05, "loss": 1.7043, "step": 71141 }, { "epoch": 2.37, "grad_norm": 0.6679011583328247, "learning_rate": 6.355109289648476e-05, "loss": 1.7609, "step": 71142 }, { "epoch": 2.37, "grad_norm": 0.6666237711906433, "learning_rate": 6.354466011213776e-05, "loss": 1.6965, "step": 71143 }, { "epoch": 2.37, "grad_norm": 0.6683038473129272, "learning_rate": 6.353822761481274e-05, "loss": 1.7319, "step": 71144 }, { "epoch": 2.37, "grad_norm": 0.6757073998451233, "learning_rate": 6.353179540451738e-05, "loss": 1.6603, "step": 71145 }, { "epoch": 2.37, "grad_norm": 0.6884713172912598, "learning_rate": 6.352536348125955e-05, "loss": 1.7289, "step": 71146 }, { "epoch": 2.37, "grad_norm": 0.6695882081985474, "learning_rate": 6.351893184504721e-05, "loss": 1.6835, "step": 71147 }, { "epoch": 2.37, "grad_norm": 0.6791935563087463, "learning_rate": 6.3512500495888e-05, "loss": 1.6788, "step": 71148 }, { "epoch": 2.37, "grad_norm": 0.657659649848938, "learning_rate": 6.350606943378972e-05, "loss": 1.684, "step": 71149 }, { "epoch": 2.37, "grad_norm": 0.6662650108337402, "learning_rate": 6.34996386587603e-05, "loss": 1.6971, "step": 71150 }, { "epoch": 2.37, "grad_norm": 0.6831878423690796, "learning_rate": 6.349320817080748e-05, "loss": 1.6899, "step": 71151 }, { "epoch": 2.37, "grad_norm": 0.6679356098175049, "learning_rate": 6.348677796993894e-05, "loss": 1.6479, "step": 71152 }, { "epoch": 2.37, "grad_norm": 0.7056305408477783, "learning_rate": 6.348034805616262e-05, "loss": 1.7107, "step": 71153 }, { "epoch": 2.37, "grad_norm": 0.6811766624450684, "learning_rate": 6.347391842948648e-05, "loss": 1.6272, "step": 71154 }, { "epoch": 2.37, "grad_norm": 0.6551052331924438, "learning_rate": 6.346748908991797e-05, "loss": 1.7174, "step": 71155 }, { "epoch": 2.37, "grad_norm": 0.674723744392395, "learning_rate": 6.34610600374651e-05, "loss": 1.707, "step": 71156 }, { "epoch": 2.37, "grad_norm": 0.6899634599685669, "learning_rate": 6.345463127213573e-05, "loss": 1.7268, "step": 71157 }, { "epoch": 2.37, "grad_norm": 0.7089807987213135, "learning_rate": 6.34482027939376e-05, "loss": 1.7709, "step": 71158 }, { "epoch": 2.37, "grad_norm": 0.6712193489074707, "learning_rate": 6.344177460287839e-05, "loss": 1.7316, "step": 71159 }, { "epoch": 2.37, "grad_norm": 0.6662580966949463, "learning_rate": 6.343534669896601e-05, "loss": 1.6311, "step": 71160 }, { "epoch": 2.37, "grad_norm": 0.6853064894676208, "learning_rate": 6.342891908220843e-05, "loss": 1.6532, "step": 71161 }, { "epoch": 2.37, "grad_norm": 0.6643474102020264, "learning_rate": 6.342249175261311e-05, "loss": 1.7156, "step": 71162 }, { "epoch": 2.37, "grad_norm": 0.6851431727409363, "learning_rate": 6.341606471018806e-05, "loss": 1.6825, "step": 71163 }, { "epoch": 2.37, "grad_norm": 0.6862425208091736, "learning_rate": 6.34096379549411e-05, "loss": 1.7017, "step": 71164 }, { "epoch": 2.37, "grad_norm": 0.6844144463539124, "learning_rate": 6.340321148688e-05, "loss": 1.7265, "step": 71165 }, { "epoch": 2.37, "grad_norm": 0.6991479992866516, "learning_rate": 6.339678530601243e-05, "loss": 1.7004, "step": 71166 }, { "epoch": 2.37, "grad_norm": 0.675081729888916, "learning_rate": 6.339035941234632e-05, "loss": 1.6687, "step": 71167 }, { "epoch": 2.37, "grad_norm": 0.673531711101532, "learning_rate": 6.338393380588964e-05, "loss": 1.6173, "step": 71168 }, { "epoch": 2.37, "grad_norm": 0.6819557547569275, "learning_rate": 6.337750848664981e-05, "loss": 1.6722, "step": 71169 }, { "epoch": 2.37, "grad_norm": 0.6742660403251648, "learning_rate": 6.337108345463482e-05, "loss": 1.6478, "step": 71170 }, { "epoch": 2.37, "grad_norm": 0.6936395764350891, "learning_rate": 6.336465870985253e-05, "loss": 1.622, "step": 71171 }, { "epoch": 2.37, "grad_norm": 0.6931592226028442, "learning_rate": 6.335823425231072e-05, "loss": 1.6799, "step": 71172 }, { "epoch": 2.37, "grad_norm": 0.7021223902702332, "learning_rate": 6.335181008201705e-05, "loss": 1.767, "step": 71173 }, { "epoch": 2.37, "grad_norm": 0.6976950764656067, "learning_rate": 6.334538619897948e-05, "loss": 1.6974, "step": 71174 }, { "epoch": 2.37, "grad_norm": 0.7102379202842712, "learning_rate": 6.333896260320575e-05, "loss": 1.6806, "step": 71175 }, { "epoch": 2.37, "grad_norm": 0.6517854928970337, "learning_rate": 6.333253929470355e-05, "loss": 1.7127, "step": 71176 }, { "epoch": 2.37, "grad_norm": 0.6819347143173218, "learning_rate": 6.332611627348083e-05, "loss": 1.7136, "step": 71177 }, { "epoch": 2.37, "grad_norm": 0.6788418889045715, "learning_rate": 6.331969353954537e-05, "loss": 1.736, "step": 71178 }, { "epoch": 2.37, "grad_norm": 0.6681925058364868, "learning_rate": 6.331327109290493e-05, "loss": 1.745, "step": 71179 }, { "epoch": 2.37, "grad_norm": 0.6680647134780884, "learning_rate": 6.330684893356724e-05, "loss": 1.7025, "step": 71180 }, { "epoch": 2.37, "grad_norm": 0.672994077205658, "learning_rate": 6.330042706154022e-05, "loss": 1.6588, "step": 71181 }, { "epoch": 2.37, "grad_norm": 0.6988616585731506, "learning_rate": 6.329400547683163e-05, "loss": 1.6572, "step": 71182 }, { "epoch": 2.37, "grad_norm": 0.6882978677749634, "learning_rate": 6.328758417944915e-05, "loss": 1.7299, "step": 71183 }, { "epoch": 2.37, "grad_norm": 0.6823664307594299, "learning_rate": 6.328116316940075e-05, "loss": 1.7398, "step": 71184 }, { "epoch": 2.37, "grad_norm": 0.6712600588798523, "learning_rate": 6.327474244669404e-05, "loss": 1.6643, "step": 71185 }, { "epoch": 2.37, "grad_norm": 0.6618586182594299, "learning_rate": 6.3268322011337e-05, "loss": 1.6815, "step": 71186 }, { "epoch": 2.37, "grad_norm": 0.6510380506515503, "learning_rate": 6.326190186333727e-05, "loss": 1.6383, "step": 71187 }, { "epoch": 2.37, "grad_norm": 0.6632482409477234, "learning_rate": 6.325548200270277e-05, "loss": 1.6261, "step": 71188 }, { "epoch": 2.37, "grad_norm": 0.6953151226043701, "learning_rate": 6.324906242944116e-05, "loss": 1.6528, "step": 71189 }, { "epoch": 2.37, "grad_norm": 0.6870141625404358, "learning_rate": 6.324264314356041e-05, "loss": 1.7289, "step": 71190 }, { "epoch": 2.37, "grad_norm": 0.6652382016181946, "learning_rate": 6.323622414506818e-05, "loss": 1.7007, "step": 71191 }, { "epoch": 2.37, "grad_norm": 0.6860936880111694, "learning_rate": 6.32298054339722e-05, "loss": 1.719, "step": 71192 }, { "epoch": 2.37, "grad_norm": 0.6863981485366821, "learning_rate": 6.322338701028043e-05, "loss": 1.6904, "step": 71193 }, { "epoch": 2.37, "grad_norm": 0.6716554164886475, "learning_rate": 6.321696887400063e-05, "loss": 1.7093, "step": 71194 }, { "epoch": 2.37, "grad_norm": 0.6714033484458923, "learning_rate": 6.32105510251404e-05, "loss": 1.6507, "step": 71195 }, { "epoch": 2.37, "grad_norm": 0.6947078704833984, "learning_rate": 6.320413346370771e-05, "loss": 1.6877, "step": 71196 }, { "epoch": 2.37, "grad_norm": 0.6775548458099365, "learning_rate": 6.319771618971037e-05, "loss": 1.7602, "step": 71197 }, { "epoch": 2.37, "grad_norm": 0.674383282661438, "learning_rate": 6.319129920315612e-05, "loss": 1.6523, "step": 71198 }, { "epoch": 2.37, "grad_norm": 0.690362811088562, "learning_rate": 6.318488250405266e-05, "loss": 1.6818, "step": 71199 }, { "epoch": 2.37, "grad_norm": 0.6924415230751038, "learning_rate": 6.317846609240795e-05, "loss": 1.6639, "step": 71200 }, { "epoch": 2.37, "grad_norm": 0.6686732769012451, "learning_rate": 6.317204996822967e-05, "loss": 1.6881, "step": 71201 }, { "epoch": 2.37, "grad_norm": 0.673339307308197, "learning_rate": 6.316563413152555e-05, "loss": 1.7078, "step": 71202 }, { "epoch": 2.37, "grad_norm": 0.7005097270011902, "learning_rate": 6.315921858230344e-05, "loss": 1.6849, "step": 71203 }, { "epoch": 2.37, "grad_norm": 0.7050191164016724, "learning_rate": 6.315280332057134e-05, "loss": 1.6836, "step": 71204 }, { "epoch": 2.37, "grad_norm": 0.6635643243789673, "learning_rate": 6.314638834633666e-05, "loss": 1.6817, "step": 71205 }, { "epoch": 2.37, "grad_norm": 0.70497727394104, "learning_rate": 6.313997365960737e-05, "loss": 1.7562, "step": 71206 }, { "epoch": 2.37, "grad_norm": 0.6598089337348938, "learning_rate": 6.313355926039131e-05, "loss": 1.7002, "step": 71207 }, { "epoch": 2.37, "grad_norm": 0.6579983234405518, "learning_rate": 6.312714514869625e-05, "loss": 1.6797, "step": 71208 }, { "epoch": 2.37, "grad_norm": 0.6939378976821899, "learning_rate": 6.31207313245298e-05, "loss": 1.7488, "step": 71209 }, { "epoch": 2.37, "grad_norm": 0.6884072422981262, "learning_rate": 6.311431778789992e-05, "loss": 1.6639, "step": 71210 }, { "epoch": 2.37, "grad_norm": 0.679850161075592, "learning_rate": 6.31079045388145e-05, "loss": 1.6799, "step": 71211 }, { "epoch": 2.37, "grad_norm": 0.6619691252708435, "learning_rate": 6.310149157728103e-05, "loss": 1.687, "step": 71212 }, { "epoch": 2.37, "grad_norm": 0.6801174879074097, "learning_rate": 6.309507890330746e-05, "loss": 1.7538, "step": 71213 }, { "epoch": 2.37, "grad_norm": 0.6990949511528015, "learning_rate": 6.30886665169016e-05, "loss": 1.748, "step": 71214 }, { "epoch": 2.37, "grad_norm": 0.7195329070091248, "learning_rate": 6.30822544180712e-05, "loss": 1.7199, "step": 71215 }, { "epoch": 2.37, "grad_norm": 0.6674160361289978, "learning_rate": 6.307584260682396e-05, "loss": 1.6393, "step": 71216 }, { "epoch": 2.37, "grad_norm": 0.6682848930358887, "learning_rate": 6.306943108316777e-05, "loss": 1.7067, "step": 71217 }, { "epoch": 2.37, "grad_norm": 0.6876789331436157, "learning_rate": 6.306301984711049e-05, "loss": 1.6575, "step": 71218 }, { "epoch": 2.37, "grad_norm": 0.6824896335601807, "learning_rate": 6.305660889865964e-05, "loss": 1.6206, "step": 71219 }, { "epoch": 2.37, "grad_norm": 0.698256254196167, "learning_rate": 6.305019823782317e-05, "loss": 1.6859, "step": 71220 }, { "epoch": 2.37, "grad_norm": 0.6701540946960449, "learning_rate": 6.304378786460895e-05, "loss": 1.7189, "step": 71221 }, { "epoch": 2.37, "grad_norm": 0.6868758201599121, "learning_rate": 6.303737777902463e-05, "loss": 1.786, "step": 71222 }, { "epoch": 2.37, "grad_norm": 0.6701191067695618, "learning_rate": 6.303096798107794e-05, "loss": 1.6603, "step": 71223 }, { "epoch": 2.37, "grad_norm": 0.6626608967781067, "learning_rate": 6.302455847077684e-05, "loss": 1.5657, "step": 71224 }, { "epoch": 2.37, "grad_norm": 0.6642107367515564, "learning_rate": 6.301814924812896e-05, "loss": 1.6167, "step": 71225 }, { "epoch": 2.37, "grad_norm": 0.6595741510391235, "learning_rate": 6.301174031314208e-05, "loss": 1.6444, "step": 71226 }, { "epoch": 2.37, "grad_norm": 0.6625230312347412, "learning_rate": 6.300533166582402e-05, "loss": 1.6737, "step": 71227 }, { "epoch": 2.37, "grad_norm": 0.6741511821746826, "learning_rate": 6.299892330618269e-05, "loss": 1.6006, "step": 71228 }, { "epoch": 2.37, "grad_norm": 0.6907823085784912, "learning_rate": 6.299251523422568e-05, "loss": 1.7337, "step": 71229 }, { "epoch": 2.37, "grad_norm": 0.6780388355255127, "learning_rate": 6.298610744996079e-05, "loss": 1.7204, "step": 71230 }, { "epoch": 2.37, "grad_norm": 0.6975113153457642, "learning_rate": 6.297969995339592e-05, "loss": 1.7126, "step": 71231 }, { "epoch": 2.37, "grad_norm": 0.6999894380569458, "learning_rate": 6.297329274453878e-05, "loss": 1.6955, "step": 71232 }, { "epoch": 2.37, "grad_norm": 0.6535891890525818, "learning_rate": 6.296688582339706e-05, "loss": 1.7497, "step": 71233 }, { "epoch": 2.37, "grad_norm": 0.69004887342453, "learning_rate": 6.296047918997868e-05, "loss": 1.6656, "step": 71234 }, { "epoch": 2.37, "grad_norm": 0.6579642295837402, "learning_rate": 6.295407284429128e-05, "loss": 1.7419, "step": 71235 }, { "epoch": 2.37, "grad_norm": 0.6682910323143005, "learning_rate": 6.294766678634278e-05, "loss": 1.6793, "step": 71236 }, { "epoch": 2.37, "grad_norm": 0.6580164432525635, "learning_rate": 6.29412610161408e-05, "loss": 1.7075, "step": 71237 }, { "epoch": 2.37, "grad_norm": 0.6959155201911926, "learning_rate": 6.29348555336933e-05, "loss": 1.716, "step": 71238 }, { "epoch": 2.37, "grad_norm": 0.6796907186508179, "learning_rate": 6.292845033900792e-05, "loss": 1.6478, "step": 71239 }, { "epoch": 2.37, "grad_norm": 0.7142941951751709, "learning_rate": 6.292204543209243e-05, "loss": 1.6336, "step": 71240 }, { "epoch": 2.37, "grad_norm": 0.6707723736763, "learning_rate": 6.291564081295473e-05, "loss": 1.6567, "step": 71241 }, { "epoch": 2.37, "grad_norm": 0.660386323928833, "learning_rate": 6.290923648160238e-05, "loss": 1.7744, "step": 71242 }, { "epoch": 2.37, "grad_norm": 0.7029860615730286, "learning_rate": 6.29028324380434e-05, "loss": 1.6381, "step": 71243 }, { "epoch": 2.37, "grad_norm": 0.7001712322235107, "learning_rate": 6.289642868228538e-05, "loss": 1.6934, "step": 71244 }, { "epoch": 2.37, "grad_norm": 0.7261571288108826, "learning_rate": 6.289002521433621e-05, "loss": 1.693, "step": 71245 }, { "epoch": 2.37, "grad_norm": 0.703471302986145, "learning_rate": 6.288362203420363e-05, "loss": 1.7772, "step": 71246 }, { "epoch": 2.37, "grad_norm": 0.6551519632339478, "learning_rate": 6.28772191418953e-05, "loss": 1.7104, "step": 71247 }, { "epoch": 2.37, "grad_norm": 0.6585498452186584, "learning_rate": 6.287081653741918e-05, "loss": 1.6869, "step": 71248 }, { "epoch": 2.37, "grad_norm": 0.6477579474449158, "learning_rate": 6.286441422078286e-05, "loss": 1.6614, "step": 71249 }, { "epoch": 2.37, "grad_norm": 0.6625267267227173, "learning_rate": 6.28580121919943e-05, "loss": 1.6884, "step": 71250 }, { "epoch": 2.37, "grad_norm": 0.6860112547874451, "learning_rate": 6.285161045106115e-05, "loss": 1.6751, "step": 71251 }, { "epoch": 2.37, "grad_norm": 0.7240399122238159, "learning_rate": 6.284520899799115e-05, "loss": 1.7192, "step": 71252 }, { "epoch": 2.37, "grad_norm": 0.7179771661758423, "learning_rate": 6.283880783279218e-05, "loss": 1.671, "step": 71253 }, { "epoch": 2.37, "grad_norm": 0.6509878635406494, "learning_rate": 6.283240695547187e-05, "loss": 1.6577, "step": 71254 }, { "epoch": 2.37, "grad_norm": 0.6961643695831299, "learning_rate": 6.282600636603817e-05, "loss": 1.6337, "step": 71255 }, { "epoch": 2.37, "grad_norm": 0.6688390970230103, "learning_rate": 6.281960606449865e-05, "loss": 1.6856, "step": 71256 }, { "epoch": 2.37, "grad_norm": 0.6672095060348511, "learning_rate": 6.281320605086127e-05, "loss": 1.6717, "step": 71257 }, { "epoch": 2.37, "grad_norm": 0.7081947922706604, "learning_rate": 6.28068063251337e-05, "loss": 1.752, "step": 71258 }, { "epoch": 2.37, "grad_norm": 0.6870444416999817, "learning_rate": 6.280040688732367e-05, "loss": 1.7012, "step": 71259 }, { "epoch": 2.37, "grad_norm": 0.7235538959503174, "learning_rate": 6.279400773743906e-05, "loss": 1.6057, "step": 71260 }, { "epoch": 2.37, "grad_norm": 0.6840735077857971, "learning_rate": 6.278760887548758e-05, "loss": 1.6793, "step": 71261 }, { "epoch": 2.37, "grad_norm": 0.7063086628913879, "learning_rate": 6.278121030147691e-05, "loss": 1.729, "step": 71262 }, { "epoch": 2.37, "grad_norm": 0.6838746666908264, "learning_rate": 6.277481201541488e-05, "loss": 1.6955, "step": 71263 }, { "epoch": 2.37, "grad_norm": 0.6715373992919922, "learning_rate": 6.276841401730936e-05, "loss": 1.7168, "step": 71264 }, { "epoch": 2.37, "grad_norm": 0.6804662346839905, "learning_rate": 6.276201630716804e-05, "loss": 1.6984, "step": 71265 }, { "epoch": 2.37, "grad_norm": 0.7160717248916626, "learning_rate": 6.275561888499859e-05, "loss": 1.7044, "step": 71266 }, { "epoch": 2.37, "grad_norm": 0.6785166263580322, "learning_rate": 6.274922175080895e-05, "loss": 1.733, "step": 71267 }, { "epoch": 2.37, "grad_norm": 0.6705172657966614, "learning_rate": 6.274282490460681e-05, "loss": 1.6993, "step": 71268 }, { "epoch": 2.37, "grad_norm": 0.6823148727416992, "learning_rate": 6.273642834639983e-05, "loss": 1.7185, "step": 71269 }, { "epoch": 2.37, "grad_norm": 0.6928181648254395, "learning_rate": 6.273003207619584e-05, "loss": 1.6727, "step": 71270 }, { "epoch": 2.37, "grad_norm": 0.6815682649612427, "learning_rate": 6.272363609400284e-05, "loss": 1.708, "step": 71271 }, { "epoch": 2.37, "grad_norm": 0.6998008489608765, "learning_rate": 6.271724039982821e-05, "loss": 1.6815, "step": 71272 }, { "epoch": 2.37, "grad_norm": 0.6968122720718384, "learning_rate": 6.271084499367985e-05, "loss": 1.74, "step": 71273 }, { "epoch": 2.37, "grad_norm": 0.6562255024909973, "learning_rate": 6.270444987556568e-05, "loss": 1.7001, "step": 71274 }, { "epoch": 2.37, "grad_norm": 0.6727304458618164, "learning_rate": 6.269805504549334e-05, "loss": 1.7724, "step": 71275 }, { "epoch": 2.37, "grad_norm": 0.6547583341598511, "learning_rate": 6.269166050347051e-05, "loss": 1.6809, "step": 71276 }, { "epoch": 2.37, "grad_norm": 0.6697019338607788, "learning_rate": 6.268526624950503e-05, "loss": 1.6236, "step": 71277 }, { "epoch": 2.37, "grad_norm": 0.6990829706192017, "learning_rate": 6.267887228360486e-05, "loss": 1.7415, "step": 71278 }, { "epoch": 2.37, "grad_norm": 0.6826696395874023, "learning_rate": 6.267247860577738e-05, "loss": 1.7325, "step": 71279 }, { "epoch": 2.37, "grad_norm": 0.6801258325576782, "learning_rate": 6.266608521603052e-05, "loss": 1.7631, "step": 71280 }, { "epoch": 2.37, "grad_norm": 0.6722784638404846, "learning_rate": 6.26596921143722e-05, "loss": 1.6166, "step": 71281 }, { "epoch": 2.37, "grad_norm": 0.6968961358070374, "learning_rate": 6.265329930081e-05, "loss": 1.743, "step": 71282 }, { "epoch": 2.37, "grad_norm": 0.6715367436408997, "learning_rate": 6.264690677535167e-05, "loss": 1.6937, "step": 71283 }, { "epoch": 2.37, "grad_norm": 0.6803194880485535, "learning_rate": 6.264051453800503e-05, "loss": 1.7287, "step": 71284 }, { "epoch": 2.37, "grad_norm": 0.6849067211151123, "learning_rate": 6.263412258877798e-05, "loss": 1.7644, "step": 71285 }, { "epoch": 2.37, "grad_norm": 0.6723965406417847, "learning_rate": 6.262773092767796e-05, "loss": 1.6818, "step": 71286 }, { "epoch": 2.37, "grad_norm": 0.6937147378921509, "learning_rate": 6.262133955471292e-05, "loss": 1.6458, "step": 71287 }, { "epoch": 2.37, "grad_norm": 0.6733120679855347, "learning_rate": 6.261494846989065e-05, "loss": 1.7204, "step": 71288 }, { "epoch": 2.37, "grad_norm": 0.69987553358078, "learning_rate": 6.260855767321885e-05, "loss": 1.7831, "step": 71289 }, { "epoch": 2.37, "grad_norm": 0.6978967785835266, "learning_rate": 6.260216716470522e-05, "loss": 1.7145, "step": 71290 }, { "epoch": 2.37, "grad_norm": 0.7134397625923157, "learning_rate": 6.259577694435768e-05, "loss": 1.7306, "step": 71291 }, { "epoch": 2.37, "grad_norm": 0.6848180890083313, "learning_rate": 6.258938701218385e-05, "loss": 1.6702, "step": 71292 }, { "epoch": 2.37, "grad_norm": 0.6596300005912781, "learning_rate": 6.258299736819149e-05, "loss": 1.6685, "step": 71293 }, { "epoch": 2.37, "grad_norm": 0.6728029251098633, "learning_rate": 6.257660801238834e-05, "loss": 1.5562, "step": 71294 }, { "epoch": 2.37, "grad_norm": 0.6730569005012512, "learning_rate": 6.257021894478234e-05, "loss": 1.6495, "step": 71295 }, { "epoch": 2.37, "grad_norm": 0.6826782822608948, "learning_rate": 6.256383016538106e-05, "loss": 1.6906, "step": 71296 }, { "epoch": 2.37, "grad_norm": 0.6711717247962952, "learning_rate": 6.255744167419223e-05, "loss": 1.6369, "step": 71297 }, { "epoch": 2.37, "grad_norm": 0.7045289874076843, "learning_rate": 6.255105347122379e-05, "loss": 1.7501, "step": 71298 }, { "epoch": 2.37, "grad_norm": 0.6785727143287659, "learning_rate": 6.254466555648338e-05, "loss": 1.7085, "step": 71299 }, { "epoch": 2.37, "grad_norm": 0.696426510810852, "learning_rate": 6.253827792997865e-05, "loss": 1.7309, "step": 71300 }, { "epoch": 2.37, "grad_norm": 0.7160394191741943, "learning_rate": 6.253189059171758e-05, "loss": 1.6954, "step": 71301 }, { "epoch": 2.37, "grad_norm": 0.6739112138748169, "learning_rate": 6.25255035417077e-05, "loss": 1.6415, "step": 71302 }, { "epoch": 2.37, "grad_norm": 0.6822503805160522, "learning_rate": 6.251911677995694e-05, "loss": 1.7072, "step": 71303 }, { "epoch": 2.37, "grad_norm": 0.6505061984062195, "learning_rate": 6.251273030647292e-05, "loss": 1.6606, "step": 71304 }, { "epoch": 2.37, "grad_norm": 0.6617459654808044, "learning_rate": 6.250634412126354e-05, "loss": 1.7181, "step": 71305 }, { "epoch": 2.37, "grad_norm": 0.6775673031806946, "learning_rate": 6.249995822433637e-05, "loss": 1.6907, "step": 71306 }, { "epoch": 2.37, "grad_norm": 0.6788175702095032, "learning_rate": 6.249357261569935e-05, "loss": 1.6541, "step": 71307 }, { "epoch": 2.37, "grad_norm": 0.6623323559761047, "learning_rate": 6.24871872953601e-05, "loss": 1.6552, "step": 71308 }, { "epoch": 2.37, "grad_norm": 0.6685382127761841, "learning_rate": 6.248080226332638e-05, "loss": 1.7182, "step": 71309 }, { "epoch": 2.37, "grad_norm": 0.6875825524330139, "learning_rate": 6.247441751960602e-05, "loss": 1.7065, "step": 71310 }, { "epoch": 2.37, "grad_norm": 0.6790186166763306, "learning_rate": 6.246803306420674e-05, "loss": 1.7072, "step": 71311 }, { "epoch": 2.37, "grad_norm": 0.66105055809021, "learning_rate": 6.246164889713614e-05, "loss": 1.678, "step": 71312 }, { "epoch": 2.37, "grad_norm": 0.6771470904350281, "learning_rate": 6.245526501840213e-05, "loss": 1.7079, "step": 71313 }, { "epoch": 2.37, "grad_norm": 0.6709005832672119, "learning_rate": 6.244888142801254e-05, "loss": 1.7516, "step": 71314 }, { "epoch": 2.37, "grad_norm": 0.6829728484153748, "learning_rate": 6.244249812597497e-05, "loss": 1.6217, "step": 71315 }, { "epoch": 2.37, "grad_norm": 0.6909798383712769, "learning_rate": 6.243611511229715e-05, "loss": 1.6481, "step": 71316 }, { "epoch": 2.37, "grad_norm": 0.6738146543502808, "learning_rate": 6.242973238698693e-05, "loss": 1.6136, "step": 71317 }, { "epoch": 2.37, "grad_norm": 0.6844955086708069, "learning_rate": 6.242334995005203e-05, "loss": 1.7435, "step": 71318 }, { "epoch": 2.37, "grad_norm": 0.7016176581382751, "learning_rate": 6.24169678015001e-05, "loss": 1.6963, "step": 71319 }, { "epoch": 2.37, "grad_norm": 0.6940577626228333, "learning_rate": 6.241058594133893e-05, "loss": 1.7305, "step": 71320 }, { "epoch": 2.37, "grad_norm": 0.6668905019760132, "learning_rate": 6.24042043695765e-05, "loss": 1.6502, "step": 71321 }, { "epoch": 2.37, "grad_norm": 0.6610596179962158, "learning_rate": 6.239782308622017e-05, "loss": 1.6768, "step": 71322 }, { "epoch": 2.37, "grad_norm": 0.6800017356872559, "learning_rate": 6.239144209127791e-05, "loss": 1.7084, "step": 71323 }, { "epoch": 2.37, "grad_norm": 0.6921785473823547, "learning_rate": 6.23850613847575e-05, "loss": 1.6863, "step": 71324 }, { "epoch": 2.37, "grad_norm": 0.68312668800354, "learning_rate": 6.237868096666659e-05, "loss": 1.6461, "step": 71325 }, { "epoch": 2.37, "grad_norm": 0.6942148804664612, "learning_rate": 6.23723008370129e-05, "loss": 1.719, "step": 71326 }, { "epoch": 2.37, "grad_norm": 0.680199146270752, "learning_rate": 6.236592099580421e-05, "loss": 1.7309, "step": 71327 }, { "epoch": 2.37, "grad_norm": 0.6780976057052612, "learning_rate": 6.235954144304844e-05, "loss": 1.691, "step": 71328 }, { "epoch": 2.37, "grad_norm": 0.6999646425247192, "learning_rate": 6.2353162178753e-05, "loss": 1.7855, "step": 71329 }, { "epoch": 2.37, "grad_norm": 0.6973094344139099, "learning_rate": 6.234678320292582e-05, "loss": 1.6968, "step": 71330 }, { "epoch": 2.37, "grad_norm": 0.7017170190811157, "learning_rate": 6.23404045155747e-05, "loss": 1.6458, "step": 71331 }, { "epoch": 2.37, "grad_norm": 0.6816926598548889, "learning_rate": 6.233402611670731e-05, "loss": 1.6766, "step": 71332 }, { "epoch": 2.37, "grad_norm": 0.6972777843475342, "learning_rate": 6.232764800633133e-05, "loss": 1.6804, "step": 71333 }, { "epoch": 2.37, "grad_norm": 0.6782028675079346, "learning_rate": 6.232127018445454e-05, "loss": 1.656, "step": 71334 }, { "epoch": 2.37, "grad_norm": 0.676374077796936, "learning_rate": 6.23148926510849e-05, "loss": 1.724, "step": 71335 }, { "epoch": 2.37, "grad_norm": 0.6719958186149597, "learning_rate": 6.230851540622978e-05, "loss": 1.6044, "step": 71336 }, { "epoch": 2.37, "grad_norm": 0.6845924854278564, "learning_rate": 6.230213844989708e-05, "loss": 1.6859, "step": 71337 }, { "epoch": 2.37, "grad_norm": 0.7307280898094177, "learning_rate": 6.229576178209467e-05, "loss": 1.7922, "step": 71338 }, { "epoch": 2.37, "grad_norm": 0.6639063358306885, "learning_rate": 6.228938540283017e-05, "loss": 1.6113, "step": 71339 }, { "epoch": 2.37, "grad_norm": 0.6950157284736633, "learning_rate": 6.228300931211126e-05, "loss": 1.6802, "step": 71340 }, { "epoch": 2.37, "grad_norm": 0.6874424815177917, "learning_rate": 6.227663350994583e-05, "loss": 1.712, "step": 71341 }, { "epoch": 2.37, "grad_norm": 0.6726032495498657, "learning_rate": 6.227025799634152e-05, "loss": 1.6828, "step": 71342 }, { "epoch": 2.37, "grad_norm": 0.6664901375770569, "learning_rate": 6.226388277130604e-05, "loss": 1.7328, "step": 71343 }, { "epoch": 2.37, "grad_norm": 0.6663029789924622, "learning_rate": 6.225750783484716e-05, "loss": 1.7283, "step": 71344 }, { "epoch": 2.37, "grad_norm": 0.6902987360954285, "learning_rate": 6.225113318697272e-05, "loss": 1.7353, "step": 71345 }, { "epoch": 2.37, "grad_norm": 0.6837220788002014, "learning_rate": 6.224475882769038e-05, "loss": 1.742, "step": 71346 }, { "epoch": 2.37, "grad_norm": 0.6898494958877563, "learning_rate": 6.223838475700777e-05, "loss": 1.5947, "step": 71347 }, { "epoch": 2.37, "grad_norm": 0.6793261170387268, "learning_rate": 6.22320109749328e-05, "loss": 1.7268, "step": 71348 }, { "epoch": 2.37, "grad_norm": 0.6701958775520325, "learning_rate": 6.222563748147318e-05, "loss": 1.7172, "step": 71349 }, { "epoch": 2.37, "grad_norm": 0.6759685277938843, "learning_rate": 6.22192642766365e-05, "loss": 1.6943, "step": 71350 }, { "epoch": 2.37, "grad_norm": 0.6596452593803406, "learning_rate": 6.221289136043065e-05, "loss": 1.6948, "step": 71351 }, { "epoch": 2.37, "grad_norm": 0.6759518384933472, "learning_rate": 6.220651873286327e-05, "loss": 1.6216, "step": 71352 }, { "epoch": 2.37, "grad_norm": 0.6747883558273315, "learning_rate": 6.220014639394221e-05, "loss": 1.745, "step": 71353 }, { "epoch": 2.37, "grad_norm": 0.662708044052124, "learning_rate": 6.219377434367504e-05, "loss": 1.6418, "step": 71354 }, { "epoch": 2.37, "grad_norm": 0.6887882351875305, "learning_rate": 6.21874025820697e-05, "loss": 1.6865, "step": 71355 }, { "epoch": 2.37, "grad_norm": 0.6832444667816162, "learning_rate": 6.218103110913379e-05, "loss": 1.6224, "step": 71356 }, { "epoch": 2.37, "grad_norm": 0.7111386060714722, "learning_rate": 6.217465992487498e-05, "loss": 1.6752, "step": 71357 }, { "epoch": 2.37, "grad_norm": 0.6741393804550171, "learning_rate": 6.216828902930119e-05, "loss": 1.7586, "step": 71358 }, { "epoch": 2.37, "grad_norm": 0.6850892305374146, "learning_rate": 6.216191842241996e-05, "loss": 1.7178, "step": 71359 }, { "epoch": 2.37, "grad_norm": 0.6728832721710205, "learning_rate": 6.21555481042392e-05, "loss": 1.6501, "step": 71360 }, { "epoch": 2.37, "grad_norm": 0.6642481088638306, "learning_rate": 6.21491780747665e-05, "loss": 1.7298, "step": 71361 }, { "epoch": 2.37, "grad_norm": 0.6518962383270264, "learning_rate": 6.214280833400972e-05, "loss": 1.6919, "step": 71362 }, { "epoch": 2.37, "grad_norm": 0.6781507730484009, "learning_rate": 6.213643888197651e-05, "loss": 1.7821, "step": 71363 }, { "epoch": 2.37, "grad_norm": 0.6635746359825134, "learning_rate": 6.213006971867455e-05, "loss": 1.6968, "step": 71364 }, { "epoch": 2.37, "grad_norm": 0.6779213547706604, "learning_rate": 6.212370084411171e-05, "loss": 1.7431, "step": 71365 }, { "epoch": 2.37, "grad_norm": 0.691678524017334, "learning_rate": 6.211733225829557e-05, "loss": 1.6822, "step": 71366 }, { "epoch": 2.37, "grad_norm": 0.6896770000457764, "learning_rate": 6.211096396123402e-05, "loss": 1.7281, "step": 71367 }, { "epoch": 2.37, "grad_norm": 0.694989800453186, "learning_rate": 6.21045959529347e-05, "loss": 1.7397, "step": 71368 }, { "epoch": 2.37, "grad_norm": 0.7286500930786133, "learning_rate": 6.209822823340528e-05, "loss": 1.789, "step": 71369 }, { "epoch": 2.37, "grad_norm": 0.6778861284255981, "learning_rate": 6.209186080265364e-05, "loss": 1.634, "step": 71370 }, { "epoch": 2.37, "grad_norm": 0.712669849395752, "learning_rate": 6.208549366068735e-05, "loss": 1.8143, "step": 71371 }, { "epoch": 2.37, "grad_norm": 0.6718863844871521, "learning_rate": 6.207912680751426e-05, "loss": 1.7625, "step": 71372 }, { "epoch": 2.37, "grad_norm": 0.7079694271087646, "learning_rate": 6.207276024314201e-05, "loss": 1.7376, "step": 71373 }, { "epoch": 2.37, "grad_norm": 0.7133803963661194, "learning_rate": 6.206639396757845e-05, "loss": 1.6919, "step": 71374 }, { "epoch": 2.37, "grad_norm": 0.685291051864624, "learning_rate": 6.20600279808312e-05, "loss": 1.6775, "step": 71375 }, { "epoch": 2.37, "grad_norm": 0.6794518828392029, "learning_rate": 6.205366228290796e-05, "loss": 1.6743, "step": 71376 }, { "epoch": 2.37, "grad_norm": 0.6664503812789917, "learning_rate": 6.20472968738166e-05, "loss": 1.6981, "step": 71377 }, { "epoch": 2.37, "grad_norm": 0.6804360151290894, "learning_rate": 6.204093175356476e-05, "loss": 1.7206, "step": 71378 }, { "epoch": 2.37, "grad_norm": 0.6799718141555786, "learning_rate": 6.203456692216006e-05, "loss": 1.7524, "step": 71379 }, { "epoch": 2.37, "grad_norm": 0.6978227496147156, "learning_rate": 6.202820237961038e-05, "loss": 1.6953, "step": 71380 }, { "epoch": 2.37, "grad_norm": 0.6947340369224548, "learning_rate": 6.202183812592344e-05, "loss": 1.5638, "step": 71381 }, { "epoch": 2.37, "grad_norm": 0.6840501427650452, "learning_rate": 6.201547416110694e-05, "loss": 1.6553, "step": 71382 }, { "epoch": 2.37, "grad_norm": 0.6772567629814148, "learning_rate": 6.20091104851685e-05, "loss": 1.689, "step": 71383 }, { "epoch": 2.37, "grad_norm": 0.6941981315612793, "learning_rate": 6.200274709811602e-05, "loss": 1.6774, "step": 71384 }, { "epoch": 2.37, "grad_norm": 0.6990275979042053, "learning_rate": 6.199638399995714e-05, "loss": 1.7048, "step": 71385 }, { "epoch": 2.38, "grad_norm": 0.6607150435447693, "learning_rate": 6.199002119069949e-05, "loss": 1.6754, "step": 71386 }, { "epoch": 2.38, "grad_norm": 0.7183406949043274, "learning_rate": 6.19836586703509e-05, "loss": 1.7755, "step": 71387 }, { "epoch": 2.38, "grad_norm": 0.6871196031570435, "learning_rate": 6.197729643891925e-05, "loss": 1.6982, "step": 71388 }, { "epoch": 2.38, "grad_norm": 0.6638486385345459, "learning_rate": 6.197093449641191e-05, "loss": 1.7189, "step": 71389 }, { "epoch": 2.38, "grad_norm": 0.6634590029716492, "learning_rate": 6.19645728428368e-05, "loss": 1.654, "step": 71390 }, { "epoch": 2.38, "grad_norm": 0.7000441551208496, "learning_rate": 6.195821147820167e-05, "loss": 1.7538, "step": 71391 }, { "epoch": 2.38, "grad_norm": 0.6886866688728333, "learning_rate": 6.195185040251421e-05, "loss": 1.5984, "step": 71392 }, { "epoch": 2.38, "grad_norm": 0.7035004496574402, "learning_rate": 6.194548961578209e-05, "loss": 1.6697, "step": 71393 }, { "epoch": 2.38, "grad_norm": 0.688967764377594, "learning_rate": 6.193912911801301e-05, "loss": 1.6368, "step": 71394 }, { "epoch": 2.38, "grad_norm": 0.6887997388839722, "learning_rate": 6.193276890921495e-05, "loss": 1.755, "step": 71395 }, { "epoch": 2.38, "grad_norm": 0.6805905103683472, "learning_rate": 6.192640898939526e-05, "loss": 1.7251, "step": 71396 }, { "epoch": 2.38, "grad_norm": 0.6674099564552307, "learning_rate": 6.192004935856184e-05, "loss": 1.6531, "step": 71397 }, { "epoch": 2.38, "grad_norm": 0.6726887226104736, "learning_rate": 6.191369001672245e-05, "loss": 1.6393, "step": 71398 }, { "epoch": 2.38, "grad_norm": 0.6697680950164795, "learning_rate": 6.190733096388481e-05, "loss": 1.6779, "step": 71399 }, { "epoch": 2.38, "grad_norm": 0.6648988723754883, "learning_rate": 6.190097220005646e-05, "loss": 1.6873, "step": 71400 }, { "epoch": 2.38, "grad_norm": 0.673322319984436, "learning_rate": 6.18946137252453e-05, "loss": 1.7617, "step": 71401 }, { "epoch": 2.38, "grad_norm": 0.6685529351234436, "learning_rate": 6.188825553945911e-05, "loss": 1.6702, "step": 71402 }, { "epoch": 2.38, "grad_norm": 0.6812736392021179, "learning_rate": 6.188189764270535e-05, "loss": 1.7279, "step": 71403 }, { "epoch": 2.38, "grad_norm": 0.6946887373924255, "learning_rate": 6.187554003499186e-05, "loss": 1.72, "step": 71404 }, { "epoch": 2.38, "grad_norm": 0.681033730506897, "learning_rate": 6.18691827163265e-05, "loss": 1.6567, "step": 71405 }, { "epoch": 2.38, "grad_norm": 0.6543143391609192, "learning_rate": 6.186282568671682e-05, "loss": 1.6363, "step": 71406 }, { "epoch": 2.38, "grad_norm": 0.6662018895149231, "learning_rate": 6.185646894617055e-05, "loss": 1.6622, "step": 71407 }, { "epoch": 2.38, "grad_norm": 0.7169166207313538, "learning_rate": 6.185011249469547e-05, "loss": 1.7522, "step": 71408 }, { "epoch": 2.38, "grad_norm": 0.673332154750824, "learning_rate": 6.184375633229929e-05, "loss": 1.6792, "step": 71409 }, { "epoch": 2.38, "grad_norm": 0.6746198534965515, "learning_rate": 6.183740045898963e-05, "loss": 1.6757, "step": 71410 }, { "epoch": 2.38, "grad_norm": 0.6702951788902283, "learning_rate": 6.183104487477425e-05, "loss": 1.7201, "step": 71411 }, { "epoch": 2.38, "grad_norm": 0.6886223554611206, "learning_rate": 6.182468957966097e-05, "loss": 1.723, "step": 71412 }, { "epoch": 2.38, "grad_norm": 0.7130357027053833, "learning_rate": 6.181833457365744e-05, "loss": 1.6881, "step": 71413 }, { "epoch": 2.38, "grad_norm": 0.7273791432380676, "learning_rate": 6.181197985677128e-05, "loss": 1.7168, "step": 71414 }, { "epoch": 2.38, "grad_norm": 0.6668329238891602, "learning_rate": 6.180562542901033e-05, "loss": 1.6625, "step": 71415 }, { "epoch": 2.38, "grad_norm": 0.6836138367652893, "learning_rate": 6.17992712903823e-05, "loss": 1.6121, "step": 71416 }, { "epoch": 2.38, "grad_norm": 0.6745050549507141, "learning_rate": 6.179291744089475e-05, "loss": 1.7146, "step": 71417 }, { "epoch": 2.38, "grad_norm": 0.6886972784996033, "learning_rate": 6.178656388055558e-05, "loss": 1.6182, "step": 71418 }, { "epoch": 2.38, "grad_norm": 0.6670315265655518, "learning_rate": 6.178021060937233e-05, "loss": 1.6702, "step": 71419 }, { "epoch": 2.38, "grad_norm": 0.6832674145698547, "learning_rate": 6.17738576273529e-05, "loss": 1.6977, "step": 71420 }, { "epoch": 2.38, "grad_norm": 0.6781669855117798, "learning_rate": 6.176750493450485e-05, "loss": 1.6663, "step": 71421 }, { "epoch": 2.38, "grad_norm": 0.673252522945404, "learning_rate": 6.176115253083599e-05, "loss": 1.7385, "step": 71422 }, { "epoch": 2.38, "grad_norm": 0.6681503057479858, "learning_rate": 6.175480041635392e-05, "loss": 1.6284, "step": 71423 }, { "epoch": 2.38, "grad_norm": 0.6891573667526245, "learning_rate": 6.174844859106651e-05, "loss": 1.6535, "step": 71424 }, { "epoch": 2.38, "grad_norm": 0.688487708568573, "learning_rate": 6.174209705498135e-05, "loss": 1.6786, "step": 71425 }, { "epoch": 2.38, "grad_norm": 0.6710906028747559, "learning_rate": 6.173574580810613e-05, "loss": 1.7251, "step": 71426 }, { "epoch": 2.38, "grad_norm": 0.6815837025642395, "learning_rate": 6.172939485044866e-05, "loss": 1.701, "step": 71427 }, { "epoch": 2.38, "grad_norm": 0.6785048246383667, "learning_rate": 6.172304418201663e-05, "loss": 1.6679, "step": 71428 }, { "epoch": 2.38, "grad_norm": 0.6790913939476013, "learning_rate": 6.17166938028176e-05, "loss": 1.6977, "step": 71429 }, { "epoch": 2.38, "grad_norm": 0.7020979523658752, "learning_rate": 6.171034371285941e-05, "loss": 1.7061, "step": 71430 }, { "epoch": 2.38, "grad_norm": 0.679046630859375, "learning_rate": 6.170399391214982e-05, "loss": 1.755, "step": 71431 }, { "epoch": 2.38, "grad_norm": 0.6894256472587585, "learning_rate": 6.169764440069652e-05, "loss": 1.7036, "step": 71432 }, { "epoch": 2.38, "grad_norm": 0.6554141640663147, "learning_rate": 6.169129517850703e-05, "loss": 1.632, "step": 71433 }, { "epoch": 2.38, "grad_norm": 0.6751498579978943, "learning_rate": 6.16849462455893e-05, "loss": 1.716, "step": 71434 }, { "epoch": 2.38, "grad_norm": 0.6840209364891052, "learning_rate": 6.16785976019509e-05, "loss": 1.7204, "step": 71435 }, { "epoch": 2.38, "grad_norm": 0.675692617893219, "learning_rate": 6.167224924759953e-05, "loss": 1.659, "step": 71436 }, { "epoch": 2.38, "grad_norm": 0.6806461215019226, "learning_rate": 6.166590118254292e-05, "loss": 1.621, "step": 71437 }, { "epoch": 2.38, "grad_norm": 0.6875280141830444, "learning_rate": 6.165955340678896e-05, "loss": 1.6732, "step": 71438 }, { "epoch": 2.38, "grad_norm": 0.6934026479721069, "learning_rate": 6.1653205920345e-05, "loss": 1.6853, "step": 71439 }, { "epoch": 2.38, "grad_norm": 0.6923276782035828, "learning_rate": 6.164685872321894e-05, "loss": 1.6455, "step": 71440 }, { "epoch": 2.38, "grad_norm": 0.6782961487770081, "learning_rate": 6.164051181541855e-05, "loss": 1.6871, "step": 71441 }, { "epoch": 2.38, "grad_norm": 0.6723740100860596, "learning_rate": 6.163416519695148e-05, "loss": 1.675, "step": 71442 }, { "epoch": 2.38, "grad_norm": 0.7004165053367615, "learning_rate": 6.16278188678253e-05, "loss": 1.6678, "step": 71443 }, { "epoch": 2.38, "grad_norm": 0.6902281641960144, "learning_rate": 6.162147282804786e-05, "loss": 1.7095, "step": 71444 }, { "epoch": 2.38, "grad_norm": 0.6563383936882019, "learning_rate": 6.161512707762696e-05, "loss": 1.6855, "step": 71445 }, { "epoch": 2.38, "grad_norm": 0.6613191366195679, "learning_rate": 6.160878161657002e-05, "loss": 1.7202, "step": 71446 }, { "epoch": 2.38, "grad_norm": 0.6839028000831604, "learning_rate": 6.160243644488488e-05, "loss": 1.647, "step": 71447 }, { "epoch": 2.38, "grad_norm": 0.6940810680389404, "learning_rate": 6.159609156257936e-05, "loss": 1.6541, "step": 71448 }, { "epoch": 2.38, "grad_norm": 0.6788944602012634, "learning_rate": 6.158974696966102e-05, "loss": 1.7638, "step": 71449 }, { "epoch": 2.38, "grad_norm": 0.6885144114494324, "learning_rate": 6.158340266613756e-05, "loss": 1.6941, "step": 71450 }, { "epoch": 2.38, "grad_norm": 0.6827159523963928, "learning_rate": 6.15770586520167e-05, "loss": 1.6624, "step": 71451 }, { "epoch": 2.38, "grad_norm": 0.6572591662406921, "learning_rate": 6.157071492730633e-05, "loss": 1.5913, "step": 71452 }, { "epoch": 2.38, "grad_norm": 0.6816606521606445, "learning_rate": 6.156437149201382e-05, "loss": 1.6781, "step": 71453 }, { "epoch": 2.38, "grad_norm": 0.694770097732544, "learning_rate": 6.155802834614703e-05, "loss": 1.7234, "step": 71454 }, { "epoch": 2.38, "grad_norm": 0.6862425804138184, "learning_rate": 6.155168548971375e-05, "loss": 1.6081, "step": 71455 }, { "epoch": 2.38, "grad_norm": 0.6965488195419312, "learning_rate": 6.154534292272156e-05, "loss": 1.7269, "step": 71456 }, { "epoch": 2.38, "grad_norm": 0.6788491606712341, "learning_rate": 6.153900064517815e-05, "loss": 1.7092, "step": 71457 }, { "epoch": 2.38, "grad_norm": 0.6824740171432495, "learning_rate": 6.153265865709133e-05, "loss": 1.6879, "step": 71458 }, { "epoch": 2.38, "grad_norm": 0.6751720905303955, "learning_rate": 6.152631695846869e-05, "loss": 1.6799, "step": 71459 }, { "epoch": 2.38, "grad_norm": 0.6736737489700317, "learning_rate": 6.15199755493179e-05, "loss": 1.6227, "step": 71460 }, { "epoch": 2.38, "grad_norm": 0.6749522686004639, "learning_rate": 6.151363442964675e-05, "loss": 1.7279, "step": 71461 }, { "epoch": 2.38, "grad_norm": 0.6764702200889587, "learning_rate": 6.150729359946297e-05, "loss": 1.6216, "step": 71462 }, { "epoch": 2.38, "grad_norm": 0.6676497459411621, "learning_rate": 6.150095305877418e-05, "loss": 1.681, "step": 71463 }, { "epoch": 2.38, "grad_norm": 0.6778423190116882, "learning_rate": 6.149461280758802e-05, "loss": 1.5884, "step": 71464 }, { "epoch": 2.38, "grad_norm": 0.6986841559410095, "learning_rate": 6.148827284591233e-05, "loss": 1.6627, "step": 71465 }, { "epoch": 2.38, "grad_norm": 0.671887218952179, "learning_rate": 6.148193317375476e-05, "loss": 1.7262, "step": 71466 }, { "epoch": 2.38, "grad_norm": 0.6996923685073853, "learning_rate": 6.14755937911229e-05, "loss": 1.7164, "step": 71467 }, { "epoch": 2.38, "grad_norm": 0.7015647888183594, "learning_rate": 6.146925469802458e-05, "loss": 1.7608, "step": 71468 }, { "epoch": 2.38, "grad_norm": 0.6651967167854309, "learning_rate": 6.146291589446737e-05, "loss": 1.7004, "step": 71469 }, { "epoch": 2.38, "grad_norm": 0.6746165752410889, "learning_rate": 6.14565773804591e-05, "loss": 1.6645, "step": 71470 }, { "epoch": 2.38, "grad_norm": 0.6775458455085754, "learning_rate": 6.145023915600732e-05, "loss": 1.7044, "step": 71471 }, { "epoch": 2.38, "grad_norm": 0.6863371729850769, "learning_rate": 6.144390122111991e-05, "loss": 1.6533, "step": 71472 }, { "epoch": 2.38, "grad_norm": 0.6802451014518738, "learning_rate": 6.143756357580442e-05, "loss": 1.645, "step": 71473 }, { "epoch": 2.38, "grad_norm": 0.6673213243484497, "learning_rate": 6.14312262200685e-05, "loss": 1.6889, "step": 71474 }, { "epoch": 2.38, "grad_norm": 0.7020355463027954, "learning_rate": 6.142488915392005e-05, "loss": 1.7009, "step": 71475 }, { "epoch": 2.38, "grad_norm": 0.6580154895782471, "learning_rate": 6.141855237736649e-05, "loss": 1.7019, "step": 71476 }, { "epoch": 2.38, "grad_norm": 0.671174168586731, "learning_rate": 6.141221589041575e-05, "loss": 1.7294, "step": 71477 }, { "epoch": 2.38, "grad_norm": 0.6865543723106384, "learning_rate": 6.140587969307545e-05, "loss": 1.7185, "step": 71478 }, { "epoch": 2.38, "grad_norm": 0.6673986911773682, "learning_rate": 6.139954378535313e-05, "loss": 1.6783, "step": 71479 }, { "epoch": 2.38, "grad_norm": 0.681948184967041, "learning_rate": 6.139320816725675e-05, "loss": 1.6762, "step": 71480 }, { "epoch": 2.38, "grad_norm": 0.7052032351493835, "learning_rate": 6.138687283879374e-05, "loss": 1.7078, "step": 71481 }, { "epoch": 2.38, "grad_norm": 0.7139750719070435, "learning_rate": 6.138053779997198e-05, "loss": 1.7358, "step": 71482 }, { "epoch": 2.38, "grad_norm": 0.6838428974151611, "learning_rate": 6.137420305079903e-05, "loss": 1.6708, "step": 71483 }, { "epoch": 2.38, "grad_norm": 0.7141676545143127, "learning_rate": 6.13678685912827e-05, "loss": 1.712, "step": 71484 }, { "epoch": 2.38, "grad_norm": 0.658430814743042, "learning_rate": 6.136153442143065e-05, "loss": 1.6932, "step": 71485 }, { "epoch": 2.38, "grad_norm": 0.661780834197998, "learning_rate": 6.13552005412504e-05, "loss": 1.724, "step": 71486 }, { "epoch": 2.38, "grad_norm": 0.6911699175834656, "learning_rate": 6.134886695074991e-05, "loss": 1.6009, "step": 71487 }, { "epoch": 2.38, "grad_norm": 0.6698541045188904, "learning_rate": 6.134253364993661e-05, "loss": 1.6702, "step": 71488 }, { "epoch": 2.38, "grad_norm": 0.6927961111068726, "learning_rate": 6.13362006388184e-05, "loss": 1.6214, "step": 71489 }, { "epoch": 2.38, "grad_norm": 0.6965411305427551, "learning_rate": 6.13298679174028e-05, "loss": 1.6858, "step": 71490 }, { "epoch": 2.38, "grad_norm": 0.677126407623291, "learning_rate": 6.132353548569768e-05, "loss": 1.7104, "step": 71491 }, { "epoch": 2.38, "grad_norm": 0.6874426007270813, "learning_rate": 6.131720334371059e-05, "loss": 1.7521, "step": 71492 }, { "epoch": 2.38, "grad_norm": 0.6692609190940857, "learning_rate": 6.131087149144918e-05, "loss": 1.7012, "step": 71493 }, { "epoch": 2.38, "grad_norm": 0.6666254997253418, "learning_rate": 6.130453992892127e-05, "loss": 1.6644, "step": 71494 }, { "epoch": 2.38, "grad_norm": 0.6949228644371033, "learning_rate": 6.129820865613449e-05, "loss": 1.7168, "step": 71495 }, { "epoch": 2.38, "grad_norm": 0.696430504322052, "learning_rate": 6.129187767309645e-05, "loss": 1.7241, "step": 71496 }, { "epoch": 2.38, "grad_norm": 0.6837843060493469, "learning_rate": 6.128554697981488e-05, "loss": 1.674, "step": 71497 }, { "epoch": 2.38, "grad_norm": 0.6782296895980835, "learning_rate": 6.127921657629757e-05, "loss": 1.7236, "step": 71498 }, { "epoch": 2.38, "grad_norm": 0.6636602878570557, "learning_rate": 6.127288646255208e-05, "loss": 1.7203, "step": 71499 }, { "epoch": 2.38, "grad_norm": 0.7146576642990112, "learning_rate": 6.12665566385861e-05, "loss": 1.7652, "step": 71500 }, { "epoch": 2.38, "grad_norm": 0.6801743507385254, "learning_rate": 6.126022710440739e-05, "loss": 1.6612, "step": 71501 }, { "epoch": 2.38, "grad_norm": 0.6859909892082214, "learning_rate": 6.12538978600236e-05, "loss": 1.7143, "step": 71502 }, { "epoch": 2.38, "grad_norm": 0.6830132007598877, "learning_rate": 6.124756890544235e-05, "loss": 1.7168, "step": 71503 }, { "epoch": 2.38, "grad_norm": 0.6673746109008789, "learning_rate": 6.124124024067134e-05, "loss": 1.7118, "step": 71504 }, { "epoch": 2.38, "grad_norm": 0.7045357823371887, "learning_rate": 6.123491186571844e-05, "loss": 1.6851, "step": 71505 }, { "epoch": 2.38, "grad_norm": 0.6812227368354797, "learning_rate": 6.122858378059104e-05, "loss": 1.6629, "step": 71506 }, { "epoch": 2.38, "grad_norm": 0.6953685283660889, "learning_rate": 6.122225598529692e-05, "loss": 1.7074, "step": 71507 }, { "epoch": 2.38, "grad_norm": 0.6749856472015381, "learning_rate": 6.121592847984393e-05, "loss": 1.7251, "step": 71508 }, { "epoch": 2.38, "grad_norm": 0.6840947270393372, "learning_rate": 6.120960126423957e-05, "loss": 1.6922, "step": 71509 }, { "epoch": 2.38, "grad_norm": 0.6725656390190125, "learning_rate": 6.120327433849153e-05, "loss": 1.6662, "step": 71510 }, { "epoch": 2.38, "grad_norm": 0.6763378381729126, "learning_rate": 6.119694770260751e-05, "loss": 1.648, "step": 71511 }, { "epoch": 2.38, "grad_norm": 0.6660362482070923, "learning_rate": 6.119062135659536e-05, "loss": 1.657, "step": 71512 }, { "epoch": 2.38, "grad_norm": 0.674862802028656, "learning_rate": 6.118429530046246e-05, "loss": 1.6877, "step": 71513 }, { "epoch": 2.38, "grad_norm": 0.6977204084396362, "learning_rate": 6.117796953421664e-05, "loss": 1.7617, "step": 71514 }, { "epoch": 2.38, "grad_norm": 0.6957376003265381, "learning_rate": 6.117164405786566e-05, "loss": 1.664, "step": 71515 }, { "epoch": 2.38, "grad_norm": 0.6898164749145508, "learning_rate": 6.116531887141712e-05, "loss": 1.7299, "step": 71516 }, { "epoch": 2.38, "grad_norm": 0.6489320993423462, "learning_rate": 6.115899397487861e-05, "loss": 1.6672, "step": 71517 }, { "epoch": 2.38, "grad_norm": 0.6763089895248413, "learning_rate": 6.115266936825786e-05, "loss": 1.6037, "step": 71518 }, { "epoch": 2.38, "grad_norm": 0.7775094509124756, "learning_rate": 6.114634505156279e-05, "loss": 1.7304, "step": 71519 }, { "epoch": 2.38, "grad_norm": 0.6897211670875549, "learning_rate": 6.114002102480066e-05, "loss": 1.7792, "step": 71520 }, { "epoch": 2.38, "grad_norm": 0.7108171582221985, "learning_rate": 6.113369728797936e-05, "loss": 1.7441, "step": 71521 }, { "epoch": 2.38, "grad_norm": 1.4430948495864868, "learning_rate": 6.112737384110664e-05, "loss": 1.6453, "step": 71522 }, { "epoch": 2.38, "grad_norm": 0.6810451745986938, "learning_rate": 6.112105068419008e-05, "loss": 1.7211, "step": 71523 }, { "epoch": 2.38, "grad_norm": 0.6812729835510254, "learning_rate": 6.11147278172373e-05, "loss": 1.6766, "step": 71524 }, { "epoch": 2.38, "grad_norm": 0.682593047618866, "learning_rate": 6.110840524025613e-05, "loss": 1.7227, "step": 71525 }, { "epoch": 2.38, "grad_norm": 0.6860989332199097, "learning_rate": 6.110208295325414e-05, "loss": 1.7453, "step": 71526 }, { "epoch": 2.38, "grad_norm": 0.7139569520950317, "learning_rate": 6.109576095623895e-05, "loss": 1.7437, "step": 71527 }, { "epoch": 2.38, "grad_norm": 0.6756665110588074, "learning_rate": 6.108943924921832e-05, "loss": 1.6924, "step": 71528 }, { "epoch": 2.38, "grad_norm": 0.6855809688568115, "learning_rate": 6.108311783219994e-05, "loss": 1.6711, "step": 71529 }, { "epoch": 2.38, "grad_norm": 0.6792817711830139, "learning_rate": 6.107679670519152e-05, "loss": 1.698, "step": 71530 }, { "epoch": 2.38, "grad_norm": 0.6733080744743347, "learning_rate": 6.107047586820054e-05, "loss": 1.7179, "step": 71531 }, { "epoch": 2.38, "grad_norm": 0.6784918904304504, "learning_rate": 6.10641553212349e-05, "loss": 1.6683, "step": 71532 }, { "epoch": 2.38, "grad_norm": 0.6639528274536133, "learning_rate": 6.105783506430215e-05, "loss": 1.6222, "step": 71533 }, { "epoch": 2.38, "grad_norm": 0.6744014620780945, "learning_rate": 6.10515150974099e-05, "loss": 1.6834, "step": 71534 }, { "epoch": 2.38, "grad_norm": 0.6720602512359619, "learning_rate": 6.1045195420566e-05, "loss": 1.6644, "step": 71535 }, { "epoch": 2.38, "grad_norm": 0.6972343921661377, "learning_rate": 6.103887603377794e-05, "loss": 1.6654, "step": 71536 }, { "epoch": 2.38, "grad_norm": 0.6940224170684814, "learning_rate": 6.103255693705356e-05, "loss": 1.6992, "step": 71537 }, { "epoch": 2.38, "grad_norm": 0.6734300851821899, "learning_rate": 6.102623813040034e-05, "loss": 1.5927, "step": 71538 }, { "epoch": 2.38, "grad_norm": 0.6948874592781067, "learning_rate": 6.101991961382615e-05, "loss": 1.7412, "step": 71539 }, { "epoch": 2.38, "grad_norm": 0.6774733662605286, "learning_rate": 6.1013601387338515e-05, "loss": 1.6343, "step": 71540 }, { "epoch": 2.38, "grad_norm": 0.6858793497085571, "learning_rate": 6.10072834509452e-05, "loss": 1.7289, "step": 71541 }, { "epoch": 2.38, "grad_norm": 1.3187282085418701, "learning_rate": 6.100096580465382e-05, "loss": 1.7373, "step": 71542 }, { "epoch": 2.38, "grad_norm": 0.6769437789916992, "learning_rate": 6.0994648448472014e-05, "loss": 1.6698, "step": 71543 }, { "epoch": 2.38, "grad_norm": 0.6744922995567322, "learning_rate": 6.098833138240754e-05, "loss": 1.7247, "step": 71544 }, { "epoch": 2.38, "grad_norm": 0.6782088875770569, "learning_rate": 6.098201460646802e-05, "loss": 1.6767, "step": 71545 }, { "epoch": 2.38, "grad_norm": 0.684842050075531, "learning_rate": 6.097569812066106e-05, "loss": 1.6582, "step": 71546 }, { "epoch": 2.38, "grad_norm": 0.662884533405304, "learning_rate": 6.0969381924994335e-05, "loss": 1.5999, "step": 71547 }, { "epoch": 2.38, "grad_norm": 0.6664551496505737, "learning_rate": 6.096306601947568e-05, "loss": 1.6849, "step": 71548 }, { "epoch": 2.38, "grad_norm": 0.6851146221160889, "learning_rate": 6.095675040411263e-05, "loss": 1.6945, "step": 71549 }, { "epoch": 2.38, "grad_norm": 0.6755996346473694, "learning_rate": 6.0950435078912775e-05, "loss": 1.7163, "step": 71550 }, { "epoch": 2.38, "grad_norm": 0.6755624413490295, "learning_rate": 6.094412004388397e-05, "loss": 1.5805, "step": 71551 }, { "epoch": 2.38, "grad_norm": 0.6692385077476501, "learning_rate": 6.0937805299033745e-05, "loss": 1.6746, "step": 71552 }, { "epoch": 2.38, "grad_norm": 0.6884328126907349, "learning_rate": 6.093149084436977e-05, "loss": 1.7898, "step": 71553 }, { "epoch": 2.38, "grad_norm": 0.6805040240287781, "learning_rate": 6.092517667989969e-05, "loss": 1.6769, "step": 71554 }, { "epoch": 2.38, "grad_norm": 0.6952272057533264, "learning_rate": 6.0918862805631406e-05, "loss": 1.7374, "step": 71555 }, { "epoch": 2.38, "grad_norm": 0.692454993724823, "learning_rate": 6.091254922157225e-05, "loss": 1.7355, "step": 71556 }, { "epoch": 2.38, "grad_norm": 0.6987366676330566, "learning_rate": 6.0906235927730005e-05, "loss": 1.655, "step": 71557 }, { "epoch": 2.38, "grad_norm": 0.7012375593185425, "learning_rate": 6.089992292411244e-05, "loss": 1.6397, "step": 71558 }, { "epoch": 2.38, "grad_norm": 0.6741788983345032, "learning_rate": 6.089361021072715e-05, "loss": 1.7417, "step": 71559 }, { "epoch": 2.38, "grad_norm": 0.7025333046913147, "learning_rate": 6.0887297787581725e-05, "loss": 1.6267, "step": 71560 }, { "epoch": 2.38, "grad_norm": 0.7057846784591675, "learning_rate": 6.0880985654683855e-05, "loss": 1.6407, "step": 71561 }, { "epoch": 2.38, "grad_norm": 0.6801089644432068, "learning_rate": 6.087467381204141e-05, "loss": 1.6675, "step": 71562 }, { "epoch": 2.38, "grad_norm": 0.6542300581932068, "learning_rate": 6.086836225966171e-05, "loss": 1.6073, "step": 71563 }, { "epoch": 2.38, "grad_norm": 0.6734619736671448, "learning_rate": 6.086205099755261e-05, "loss": 1.6732, "step": 71564 }, { "epoch": 2.38, "grad_norm": 0.672760009765625, "learning_rate": 6.0855740025721785e-05, "loss": 1.6194, "step": 71565 }, { "epoch": 2.38, "grad_norm": 0.6615210771560669, "learning_rate": 6.084942934417689e-05, "loss": 1.7825, "step": 71566 }, { "epoch": 2.38, "grad_norm": 0.6764661073684692, "learning_rate": 6.084311895292545e-05, "loss": 1.6903, "step": 71567 }, { "epoch": 2.38, "grad_norm": 0.6800678968429565, "learning_rate": 6.083680885197523e-05, "loss": 1.6483, "step": 71568 }, { "epoch": 2.38, "grad_norm": 0.668156623840332, "learning_rate": 6.0830499041334046e-05, "loss": 1.6654, "step": 71569 }, { "epoch": 2.38, "grad_norm": 0.6605668067932129, "learning_rate": 6.082418952100924e-05, "loss": 1.6316, "step": 71570 }, { "epoch": 2.38, "grad_norm": 0.6845632195472717, "learning_rate": 6.081788029100862e-05, "loss": 1.7289, "step": 71571 }, { "epoch": 2.38, "grad_norm": 0.6934350728988647, "learning_rate": 6.0811571351339935e-05, "loss": 1.7179, "step": 71572 }, { "epoch": 2.38, "grad_norm": 0.7035102248191833, "learning_rate": 6.080526270201077e-05, "loss": 1.7267, "step": 71573 }, { "epoch": 2.38, "grad_norm": 0.6886879205703735, "learning_rate": 6.0798954343028685e-05, "loss": 1.6566, "step": 71574 }, { "epoch": 2.38, "grad_norm": 0.7001107335090637, "learning_rate": 6.0792646274401504e-05, "loss": 1.6345, "step": 71575 }, { "epoch": 2.38, "grad_norm": 0.714989423751831, "learning_rate": 6.0786338496136825e-05, "loss": 1.7324, "step": 71576 }, { "epoch": 2.38, "grad_norm": 0.6978871822357178, "learning_rate": 6.078003100824217e-05, "loss": 1.6933, "step": 71577 }, { "epoch": 2.38, "grad_norm": 0.6790592074394226, "learning_rate": 6.077372381072531e-05, "loss": 1.6864, "step": 71578 }, { "epoch": 2.38, "grad_norm": 0.6526617407798767, "learning_rate": 6.076741690359399e-05, "loss": 1.6797, "step": 71579 }, { "epoch": 2.38, "grad_norm": 0.6871216297149658, "learning_rate": 6.076111028685578e-05, "loss": 1.673, "step": 71580 }, { "epoch": 2.38, "grad_norm": 0.701475977897644, "learning_rate": 6.0754803960518284e-05, "loss": 1.7626, "step": 71581 }, { "epoch": 2.38, "grad_norm": 0.6636090874671936, "learning_rate": 6.0748497924589244e-05, "loss": 1.7026, "step": 71582 }, { "epoch": 2.38, "grad_norm": 0.7060530185699463, "learning_rate": 6.0742192179076264e-05, "loss": 1.712, "step": 71583 }, { "epoch": 2.38, "grad_norm": 0.6833032965660095, "learning_rate": 6.073588672398697e-05, "loss": 1.6614, "step": 71584 }, { "epoch": 2.38, "grad_norm": 0.7132930159568787, "learning_rate": 6.072958155932912e-05, "loss": 1.6277, "step": 71585 }, { "epoch": 2.38, "grad_norm": 0.6748723387718201, "learning_rate": 6.072327668511021e-05, "loss": 1.6774, "step": 71586 }, { "epoch": 2.38, "grad_norm": 0.6827928423881531, "learning_rate": 6.071697210133807e-05, "loss": 1.695, "step": 71587 }, { "epoch": 2.38, "grad_norm": 0.6914902329444885, "learning_rate": 6.071066780802022e-05, "loss": 1.716, "step": 71588 }, { "epoch": 2.38, "grad_norm": 0.6697193384170532, "learning_rate": 6.070436380516439e-05, "loss": 1.6775, "step": 71589 }, { "epoch": 2.38, "grad_norm": 0.6976723074913025, "learning_rate": 6.069806009277825e-05, "loss": 1.6546, "step": 71590 }, { "epoch": 2.38, "grad_norm": 0.7143540382385254, "learning_rate": 6.0691756670869305e-05, "loss": 1.7739, "step": 71591 }, { "epoch": 2.38, "grad_norm": 0.6771304607391357, "learning_rate": 6.068545353944537e-05, "loss": 1.6757, "step": 71592 }, { "epoch": 2.38, "grad_norm": 0.6790348291397095, "learning_rate": 6.0679150698514004e-05, "loss": 1.721, "step": 71593 }, { "epoch": 2.38, "grad_norm": 0.6828675270080566, "learning_rate": 6.067284814808292e-05, "loss": 1.6797, "step": 71594 }, { "epoch": 2.38, "grad_norm": 0.6892120838165283, "learning_rate": 6.0666545888159754e-05, "loss": 1.7123, "step": 71595 }, { "epoch": 2.38, "grad_norm": 0.683819055557251, "learning_rate": 6.066024391875204e-05, "loss": 1.7117, "step": 71596 }, { "epoch": 2.38, "grad_norm": 0.6976901292800903, "learning_rate": 6.065394223986763e-05, "loss": 1.721, "step": 71597 }, { "epoch": 2.38, "grad_norm": 0.6692615747451782, "learning_rate": 6.0647640851513976e-05, "loss": 1.6489, "step": 71598 }, { "epoch": 2.38, "grad_norm": 0.6778823137283325, "learning_rate": 6.064133975369889e-05, "loss": 1.6833, "step": 71599 }, { "epoch": 2.38, "grad_norm": 0.6875209212303162, "learning_rate": 6.0635038946429894e-05, "loss": 1.6519, "step": 71600 }, { "epoch": 2.38, "grad_norm": 0.678847074508667, "learning_rate": 6.062873842971474e-05, "loss": 1.6707, "step": 71601 }, { "epoch": 2.38, "grad_norm": 0.68228679895401, "learning_rate": 6.062243820356103e-05, "loss": 1.6868, "step": 71602 }, { "epoch": 2.38, "grad_norm": 0.6762679219245911, "learning_rate": 6.061613826797636e-05, "loss": 1.7555, "step": 71603 }, { "epoch": 2.38, "grad_norm": 0.673659086227417, "learning_rate": 6.0609838622968475e-05, "loss": 1.6877, "step": 71604 }, { "epoch": 2.38, "grad_norm": 0.6810163259506226, "learning_rate": 6.060353926854492e-05, "loss": 1.6993, "step": 71605 }, { "epoch": 2.38, "grad_norm": 0.6653671860694885, "learning_rate": 6.059724020471345e-05, "loss": 1.6559, "step": 71606 }, { "epoch": 2.38, "grad_norm": 0.6740565299987793, "learning_rate": 6.059094143148159e-05, "loss": 1.6882, "step": 71607 }, { "epoch": 2.38, "grad_norm": 0.6642544269561768, "learning_rate": 6.0584642948857133e-05, "loss": 1.6314, "step": 71608 }, { "epoch": 2.38, "grad_norm": 0.6951282024383545, "learning_rate": 6.057834475684764e-05, "loss": 1.672, "step": 71609 }, { "epoch": 2.38, "grad_norm": 0.6771844029426575, "learning_rate": 6.0572046855460676e-05, "loss": 1.6717, "step": 71610 }, { "epoch": 2.38, "grad_norm": 0.6982851028442383, "learning_rate": 6.056574924470407e-05, "loss": 1.6843, "step": 71611 }, { "epoch": 2.38, "grad_norm": 0.6967743039131165, "learning_rate": 6.0559451924585333e-05, "loss": 1.6673, "step": 71612 }, { "epoch": 2.38, "grad_norm": 0.6876711249351501, "learning_rate": 6.055315489511211e-05, "loss": 1.6927, "step": 71613 }, { "epoch": 2.38, "grad_norm": 0.6846895813941956, "learning_rate": 6.054685815629202e-05, "loss": 1.7007, "step": 71614 }, { "epoch": 2.38, "grad_norm": 0.6945952773094177, "learning_rate": 6.054056170813286e-05, "loss": 1.6819, "step": 71615 }, { "epoch": 2.38, "grad_norm": 0.6666216850280762, "learning_rate": 6.053426555064219e-05, "loss": 1.7077, "step": 71616 }, { "epoch": 2.38, "grad_norm": 0.6824678778648376, "learning_rate": 6.052796968382757e-05, "loss": 1.6696, "step": 71617 }, { "epoch": 2.38, "grad_norm": 0.68204265832901, "learning_rate": 6.052167410769676e-05, "loss": 1.6927, "step": 71618 }, { "epoch": 2.38, "grad_norm": 0.7007718086242676, "learning_rate": 6.0515378822257355e-05, "loss": 1.6808, "step": 71619 }, { "epoch": 2.38, "grad_norm": 0.6713977456092834, "learning_rate": 6.0509083827516915e-05, "loss": 1.7308, "step": 71620 }, { "epoch": 2.38, "grad_norm": 0.6899281740188599, "learning_rate": 6.050278912348316e-05, "loss": 1.7196, "step": 71621 }, { "epoch": 2.38, "grad_norm": 0.6777617931365967, "learning_rate": 6.04964947101639e-05, "loss": 1.7185, "step": 71622 }, { "epoch": 2.38, "grad_norm": 0.7595450282096863, "learning_rate": 6.049020058756645e-05, "loss": 1.6565, "step": 71623 }, { "epoch": 2.38, "grad_norm": 0.6776482462882996, "learning_rate": 6.0483906755698606e-05, "loss": 1.7212, "step": 71624 }, { "epoch": 2.38, "grad_norm": 0.6893647909164429, "learning_rate": 6.047761321456806e-05, "loss": 1.7664, "step": 71625 }, { "epoch": 2.38, "grad_norm": 0.7015304565429688, "learning_rate": 6.047131996418245e-05, "loss": 1.6232, "step": 71626 }, { "epoch": 2.38, "grad_norm": 0.6914121508598328, "learning_rate": 6.046502700454926e-05, "loss": 1.6573, "step": 71627 }, { "epoch": 2.38, "grad_norm": 0.6701902747154236, "learning_rate": 6.045873433567622e-05, "loss": 1.6209, "step": 71628 }, { "epoch": 2.38, "grad_norm": 0.681158721446991, "learning_rate": 6.045244195757115e-05, "loss": 1.6657, "step": 71629 }, { "epoch": 2.38, "grad_norm": 0.6963729858398438, "learning_rate": 6.0446149870241356e-05, "loss": 1.7864, "step": 71630 }, { "epoch": 2.38, "grad_norm": 0.6818429231643677, "learning_rate": 6.043985807369466e-05, "loss": 1.7021, "step": 71631 }, { "epoch": 2.38, "grad_norm": 0.684776246547699, "learning_rate": 6.043356656793872e-05, "loss": 1.7335, "step": 71632 }, { "epoch": 2.38, "grad_norm": 0.6924291253089905, "learning_rate": 6.042727535298116e-05, "loss": 1.7178, "step": 71633 }, { "epoch": 2.38, "grad_norm": 0.6850313544273376, "learning_rate": 6.042098442882949e-05, "loss": 1.6612, "step": 71634 }, { "epoch": 2.38, "grad_norm": 0.6806919574737549, "learning_rate": 6.0414693795491484e-05, "loss": 1.7291, "step": 71635 }, { "epoch": 2.38, "grad_norm": 0.656964898109436, "learning_rate": 6.0408403452974854e-05, "loss": 1.633, "step": 71636 }, { "epoch": 2.38, "grad_norm": 0.6744130849838257, "learning_rate": 6.040211340128698e-05, "loss": 1.7189, "step": 71637 }, { "epoch": 2.38, "grad_norm": 0.6840488910675049, "learning_rate": 6.0395823640435626e-05, "loss": 1.6923, "step": 71638 }, { "epoch": 2.38, "grad_norm": 0.6808194518089294, "learning_rate": 6.038953417042855e-05, "loss": 1.7072, "step": 71639 }, { "epoch": 2.38, "grad_norm": 0.685044527053833, "learning_rate": 6.038324499127323e-05, "loss": 1.7156, "step": 71640 }, { "epoch": 2.38, "grad_norm": 0.6992125511169434, "learning_rate": 6.03769561029773e-05, "loss": 1.789, "step": 71641 }, { "epoch": 2.38, "grad_norm": 0.6899488568305969, "learning_rate": 6.037066750554851e-05, "loss": 1.633, "step": 71642 }, { "epoch": 2.38, "grad_norm": 0.7150007486343384, "learning_rate": 6.03643791989944e-05, "loss": 1.7689, "step": 71643 }, { "epoch": 2.38, "grad_norm": 0.6704362630844116, "learning_rate": 6.0358091183322585e-05, "loss": 1.6598, "step": 71644 }, { "epoch": 2.38, "grad_norm": 0.9060319066047668, "learning_rate": 6.035180345854071e-05, "loss": 1.6369, "step": 71645 }, { "epoch": 2.38, "grad_norm": 0.6979373097419739, "learning_rate": 6.0345516024656516e-05, "loss": 1.6442, "step": 71646 }, { "epoch": 2.38, "grad_norm": 0.707166850566864, "learning_rate": 6.033922888167755e-05, "loss": 1.7626, "step": 71647 }, { "epoch": 2.38, "grad_norm": 0.6773834228515625, "learning_rate": 6.033294202961138e-05, "loss": 1.6889, "step": 71648 }, { "epoch": 2.38, "grad_norm": 0.6940035223960876, "learning_rate": 6.032665546846579e-05, "loss": 1.7101, "step": 71649 }, { "epoch": 2.38, "grad_norm": 0.7008198499679565, "learning_rate": 6.032036919824822e-05, "loss": 1.6793, "step": 71650 }, { "epoch": 2.38, "grad_norm": 0.6870750784873962, "learning_rate": 6.031408321896653e-05, "loss": 1.6491, "step": 71651 }, { "epoch": 2.38, "grad_norm": 0.6725442409515381, "learning_rate": 6.0307797530628165e-05, "loss": 1.6889, "step": 71652 }, { "epoch": 2.38, "grad_norm": 0.6651763319969177, "learning_rate": 6.030151213324077e-05, "loss": 1.7067, "step": 71653 }, { "epoch": 2.38, "grad_norm": 0.6961055397987366, "learning_rate": 6.029522702681213e-05, "loss": 1.6855, "step": 71654 }, { "epoch": 2.38, "grad_norm": 0.6522596478462219, "learning_rate": 6.028894221134964e-05, "loss": 1.6312, "step": 71655 }, { "epoch": 2.38, "grad_norm": 0.6761699914932251, "learning_rate": 6.028265768686117e-05, "loss": 1.6628, "step": 71656 }, { "epoch": 2.38, "grad_norm": 0.6883699297904968, "learning_rate": 6.0276373453354135e-05, "loss": 1.6777, "step": 71657 }, { "epoch": 2.38, "grad_norm": 0.6843618750572205, "learning_rate": 6.027008951083633e-05, "loss": 1.7293, "step": 71658 }, { "epoch": 2.38, "grad_norm": 0.6797903776168823, "learning_rate": 6.0263805859315316e-05, "loss": 1.6899, "step": 71659 }, { "epoch": 2.38, "grad_norm": 0.6827651858329773, "learning_rate": 6.025752249879866e-05, "loss": 1.7504, "step": 71660 }, { "epoch": 2.38, "grad_norm": 0.6609264612197876, "learning_rate": 6.0251239429294116e-05, "loss": 1.6607, "step": 71661 }, { "epoch": 2.38, "grad_norm": 0.6635218262672424, "learning_rate": 6.024495665080925e-05, "loss": 1.5828, "step": 71662 }, { "epoch": 2.38, "grad_norm": 0.6912730932235718, "learning_rate": 6.023867416335158e-05, "loss": 1.7203, "step": 71663 }, { "epoch": 2.38, "grad_norm": 0.7304447889328003, "learning_rate": 6.023239196692885e-05, "loss": 1.7452, "step": 71664 }, { "epoch": 2.38, "grad_norm": 0.6799823641777039, "learning_rate": 6.022611006154874e-05, "loss": 1.7303, "step": 71665 }, { "epoch": 2.38, "grad_norm": 0.6945880651473999, "learning_rate": 6.021982844721881e-05, "loss": 1.6125, "step": 71666 }, { "epoch": 2.38, "grad_norm": 0.6655780076980591, "learning_rate": 6.021354712394659e-05, "loss": 1.6714, "step": 71667 }, { "epoch": 2.38, "grad_norm": 0.668494701385498, "learning_rate": 6.020726609173988e-05, "loss": 1.6882, "step": 71668 }, { "epoch": 2.38, "grad_norm": 0.6930245161056519, "learning_rate": 6.02009853506062e-05, "loss": 1.7527, "step": 71669 }, { "epoch": 2.38, "grad_norm": 0.7041540741920471, "learning_rate": 6.019470490055315e-05, "loss": 1.6411, "step": 71670 }, { "epoch": 2.38, "grad_norm": 0.6942465901374817, "learning_rate": 6.018842474158835e-05, "loss": 1.6466, "step": 71671 }, { "epoch": 2.38, "grad_norm": 0.7119631171226501, "learning_rate": 6.0182144873719664e-05, "loss": 1.6433, "step": 71672 }, { "epoch": 2.38, "grad_norm": 0.685114860534668, "learning_rate": 6.0175865296954354e-05, "loss": 1.719, "step": 71673 }, { "epoch": 2.38, "grad_norm": 0.6645106673240662, "learning_rate": 6.0169586011300174e-05, "loss": 1.7253, "step": 71674 }, { "epoch": 2.38, "grad_norm": 0.7076685428619385, "learning_rate": 6.0163307016764896e-05, "loss": 1.6994, "step": 71675 }, { "epoch": 2.38, "grad_norm": 0.6837010383605957, "learning_rate": 6.015702831335601e-05, "loss": 1.7275, "step": 71676 }, { "epoch": 2.38, "grad_norm": 0.6766318082809448, "learning_rate": 6.015074990108106e-05, "loss": 1.626, "step": 71677 }, { "epoch": 2.38, "grad_norm": 0.6871198415756226, "learning_rate": 6.014447177994777e-05, "loss": 1.676, "step": 71678 }, { "epoch": 2.38, "grad_norm": 0.7028926610946655, "learning_rate": 6.013819394996391e-05, "loss": 1.7674, "step": 71679 }, { "epoch": 2.38, "grad_norm": 0.6625123620033264, "learning_rate": 6.0131916411136785e-05, "loss": 1.6638, "step": 71680 }, { "epoch": 2.38, "grad_norm": 0.7822774648666382, "learning_rate": 6.012563916347418e-05, "loss": 1.7491, "step": 71681 }, { "epoch": 2.38, "grad_norm": 0.6745038628578186, "learning_rate": 6.0119362206983765e-05, "loss": 1.6357, "step": 71682 }, { "epoch": 2.38, "grad_norm": 0.6591256856918335, "learning_rate": 6.01130855416731e-05, "loss": 1.6837, "step": 71683 }, { "epoch": 2.38, "grad_norm": 0.6873741149902344, "learning_rate": 6.010680916754974e-05, "loss": 1.7455, "step": 71684 }, { "epoch": 2.38, "grad_norm": 0.6871071457862854, "learning_rate": 6.010053308462135e-05, "loss": 1.6245, "step": 71685 }, { "epoch": 2.39, "grad_norm": 0.6989098191261292, "learning_rate": 6.0094257292895754e-05, "loss": 1.7444, "step": 71686 }, { "epoch": 2.39, "grad_norm": 0.6768741011619568, "learning_rate": 6.008798179238018e-05, "loss": 1.727, "step": 71687 }, { "epoch": 2.39, "grad_norm": 0.7134240865707397, "learning_rate": 6.008170658308249e-05, "loss": 1.6821, "step": 71688 }, { "epoch": 2.39, "grad_norm": 0.6825284957885742, "learning_rate": 6.007543166501031e-05, "loss": 1.6761, "step": 71689 }, { "epoch": 2.39, "grad_norm": 0.6712568998336792, "learning_rate": 6.00691570381712e-05, "loss": 1.65, "step": 71690 }, { "epoch": 2.39, "grad_norm": 0.6742451786994934, "learning_rate": 6.0062882702572726e-05, "loss": 1.7061, "step": 71691 }, { "epoch": 2.39, "grad_norm": 0.7008320689201355, "learning_rate": 6.005660865822261e-05, "loss": 1.7449, "step": 71692 }, { "epoch": 2.39, "grad_norm": 0.6655954718589783, "learning_rate": 6.005033490512844e-05, "loss": 1.6608, "step": 71693 }, { "epoch": 2.39, "grad_norm": 0.6720772385597229, "learning_rate": 6.004406144329772e-05, "loss": 1.6782, "step": 71694 }, { "epoch": 2.39, "grad_norm": 0.6698313355445862, "learning_rate": 6.003778827273814e-05, "loss": 1.6278, "step": 71695 }, { "epoch": 2.39, "grad_norm": 0.6858245730400085, "learning_rate": 6.003151539345743e-05, "loss": 1.6549, "step": 71696 }, { "epoch": 2.39, "grad_norm": 0.6911961436271667, "learning_rate": 6.002524280546308e-05, "loss": 1.6747, "step": 71697 }, { "epoch": 2.39, "grad_norm": 0.6794778108596802, "learning_rate": 6.001897050876269e-05, "loss": 1.6582, "step": 71698 }, { "epoch": 2.39, "grad_norm": 0.6813070178031921, "learning_rate": 6.001269850336394e-05, "loss": 1.5847, "step": 71699 }, { "epoch": 2.39, "grad_norm": 0.682741105556488, "learning_rate": 6.000642678927444e-05, "loss": 1.6973, "step": 71700 }, { "epoch": 2.39, "grad_norm": 0.675605058670044, "learning_rate": 6.000015536650168e-05, "loss": 1.6447, "step": 71701 }, { "epoch": 2.39, "grad_norm": 0.6889244914054871, "learning_rate": 5.999388423505348e-05, "loss": 1.7729, "step": 71702 }, { "epoch": 2.39, "grad_norm": 0.6927638053894043, "learning_rate": 5.998761339493724e-05, "loss": 1.7586, "step": 71703 }, { "epoch": 2.39, "grad_norm": 0.6694613099098206, "learning_rate": 5.998134284616075e-05, "loss": 1.6934, "step": 71704 }, { "epoch": 2.39, "grad_norm": 0.690269410610199, "learning_rate": 5.9975072588731474e-05, "loss": 1.6704, "step": 71705 }, { "epoch": 2.39, "grad_norm": 0.6965827345848083, "learning_rate": 5.996880262265717e-05, "loss": 1.6904, "step": 71706 }, { "epoch": 2.39, "grad_norm": 0.6725305318832397, "learning_rate": 5.996253294794537e-05, "loss": 1.7175, "step": 71707 }, { "epoch": 2.39, "grad_norm": 0.6831306219100952, "learning_rate": 5.99562635646036e-05, "loss": 1.5681, "step": 71708 }, { "epoch": 2.39, "grad_norm": 0.7259474396705627, "learning_rate": 5.994999447263965e-05, "loss": 1.7316, "step": 71709 }, { "epoch": 2.39, "grad_norm": 0.6856969594955444, "learning_rate": 5.9943725672060947e-05, "loss": 1.7467, "step": 71710 }, { "epoch": 2.39, "grad_norm": 0.6610938310623169, "learning_rate": 5.993745716287529e-05, "loss": 1.6796, "step": 71711 }, { "epoch": 2.39, "grad_norm": 0.6568908095359802, "learning_rate": 5.993118894509017e-05, "loss": 1.6769, "step": 71712 }, { "epoch": 2.39, "grad_norm": 0.7055304646492004, "learning_rate": 5.9924921018713143e-05, "loss": 1.656, "step": 71713 }, { "epoch": 2.39, "grad_norm": 0.6992990970611572, "learning_rate": 5.991865338375198e-05, "loss": 1.5976, "step": 71714 }, { "epoch": 2.39, "grad_norm": 0.6708656549453735, "learning_rate": 5.99123860402141e-05, "loss": 1.6764, "step": 71715 }, { "epoch": 2.39, "grad_norm": 0.6804572343826294, "learning_rate": 5.990611898810727e-05, "loss": 1.7286, "step": 71716 }, { "epoch": 2.39, "grad_norm": 0.6946213841438293, "learning_rate": 5.989985222743897e-05, "loss": 1.7278, "step": 71717 }, { "epoch": 2.39, "grad_norm": 0.6944178342819214, "learning_rate": 5.989358575821698e-05, "loss": 1.6954, "step": 71718 }, { "epoch": 2.39, "grad_norm": 0.7197678685188293, "learning_rate": 5.988731958044878e-05, "loss": 1.705, "step": 71719 }, { "epoch": 2.39, "grad_norm": 0.693139910697937, "learning_rate": 5.98810536941419e-05, "loss": 1.6953, "step": 71720 }, { "epoch": 2.39, "grad_norm": 0.6864455938339233, "learning_rate": 5.9874788099304136e-05, "loss": 1.7255, "step": 71721 }, { "epoch": 2.39, "grad_norm": 0.6709205508232117, "learning_rate": 5.986852279594299e-05, "loss": 1.6803, "step": 71722 }, { "epoch": 2.39, "grad_norm": 0.6801770329475403, "learning_rate": 5.9862257784066e-05, "loss": 1.6449, "step": 71723 }, { "epoch": 2.39, "grad_norm": 0.7265896797180176, "learning_rate": 5.985599306368085e-05, "loss": 1.7559, "step": 71724 }, { "epoch": 2.39, "grad_norm": 0.7017147541046143, "learning_rate": 5.984972863479522e-05, "loss": 1.7341, "step": 71725 }, { "epoch": 2.39, "grad_norm": 0.6829649209976196, "learning_rate": 5.984346449741665e-05, "loss": 1.6854, "step": 71726 }, { "epoch": 2.39, "grad_norm": 0.6913394331932068, "learning_rate": 5.983720065155262e-05, "loss": 1.7422, "step": 71727 }, { "epoch": 2.39, "grad_norm": 0.6862437129020691, "learning_rate": 5.983093709721093e-05, "loss": 1.6674, "step": 71728 }, { "epoch": 2.39, "grad_norm": 0.6771267652511597, "learning_rate": 5.982467383439911e-05, "loss": 1.6773, "step": 71729 }, { "epoch": 2.39, "grad_norm": 0.6909403204917908, "learning_rate": 5.9818410863124646e-05, "loss": 1.7286, "step": 71730 }, { "epoch": 2.39, "grad_norm": 0.678402841091156, "learning_rate": 5.981214818339527e-05, "loss": 1.6412, "step": 71731 }, { "epoch": 2.39, "grad_norm": 0.6746028065681458, "learning_rate": 5.980588579521861e-05, "loss": 1.6903, "step": 71732 }, { "epoch": 2.39, "grad_norm": 0.7057658433914185, "learning_rate": 5.9799623698602226e-05, "loss": 1.6199, "step": 71733 }, { "epoch": 2.39, "grad_norm": 0.6585760712623596, "learning_rate": 5.979336189355361e-05, "loss": 1.6745, "step": 71734 }, { "epoch": 2.39, "grad_norm": 0.6843583583831787, "learning_rate": 5.9787100380080565e-05, "loss": 1.7369, "step": 71735 }, { "epoch": 2.39, "grad_norm": 0.6778461933135986, "learning_rate": 5.9780839158190574e-05, "loss": 1.6722, "step": 71736 }, { "epoch": 2.39, "grad_norm": 0.6956380009651184, "learning_rate": 5.977457822789117e-05, "loss": 1.7324, "step": 71737 }, { "epoch": 2.39, "grad_norm": 0.6854609847068787, "learning_rate": 5.976831758919004e-05, "loss": 1.7016, "step": 71738 }, { "epoch": 2.39, "grad_norm": 0.6740881204605103, "learning_rate": 5.9762057242094916e-05, "loss": 1.6996, "step": 71739 }, { "epoch": 2.39, "grad_norm": 0.6917807459831238, "learning_rate": 5.975579718661313e-05, "loss": 1.645, "step": 71740 }, { "epoch": 2.39, "grad_norm": 0.6711934208869934, "learning_rate": 5.974953742275237e-05, "loss": 1.6292, "step": 71741 }, { "epoch": 2.39, "grad_norm": 0.6809974908828735, "learning_rate": 5.9743277950520395e-05, "loss": 1.7204, "step": 71742 }, { "epoch": 2.39, "grad_norm": 0.6858918070793152, "learning_rate": 5.973701876992464e-05, "loss": 1.6403, "step": 71743 }, { "epoch": 2.39, "grad_norm": 0.6605222821235657, "learning_rate": 5.973075988097269e-05, "loss": 1.6769, "step": 71744 }, { "epoch": 2.39, "grad_norm": 0.7353506088256836, "learning_rate": 5.972450128367218e-05, "loss": 1.6484, "step": 71745 }, { "epoch": 2.39, "grad_norm": 0.6771205067634583, "learning_rate": 5.97182429780309e-05, "loss": 1.6909, "step": 71746 }, { "epoch": 2.39, "grad_norm": 0.6809995174407959, "learning_rate": 5.971198496405607e-05, "loss": 1.7115, "step": 71747 }, { "epoch": 2.39, "grad_norm": 0.6773003339767456, "learning_rate": 5.970572724175553e-05, "loss": 1.6243, "step": 71748 }, { "epoch": 2.39, "grad_norm": 0.6725454330444336, "learning_rate": 5.96994698111369e-05, "loss": 1.6635, "step": 71749 }, { "epoch": 2.39, "grad_norm": 0.69050532579422, "learning_rate": 5.969321267220768e-05, "loss": 1.6486, "step": 71750 }, { "epoch": 2.39, "grad_norm": 0.6796182990074158, "learning_rate": 5.9686955824975426e-05, "loss": 1.6343, "step": 71751 }, { "epoch": 2.39, "grad_norm": 0.6499950289726257, "learning_rate": 5.968069926944783e-05, "loss": 1.6859, "step": 71752 }, { "epoch": 2.39, "grad_norm": 0.6807407736778259, "learning_rate": 5.967444300563259e-05, "loss": 1.6707, "step": 71753 }, { "epoch": 2.39, "grad_norm": 0.69825279712677, "learning_rate": 5.9668187033537e-05, "loss": 1.7134, "step": 71754 }, { "epoch": 2.39, "grad_norm": 0.6631638407707214, "learning_rate": 5.966193135316881e-05, "loss": 1.7079, "step": 71755 }, { "epoch": 2.39, "grad_norm": 0.659536600112915, "learning_rate": 5.965567596453573e-05, "loss": 1.7697, "step": 71756 }, { "epoch": 2.39, "grad_norm": 0.668264627456665, "learning_rate": 5.964942086764525e-05, "loss": 1.7088, "step": 71757 }, { "epoch": 2.39, "grad_norm": 0.6601323485374451, "learning_rate": 5.964316606250486e-05, "loss": 1.7328, "step": 71758 }, { "epoch": 2.39, "grad_norm": 0.6898815035820007, "learning_rate": 5.963691154912235e-05, "loss": 1.7187, "step": 71759 }, { "epoch": 2.39, "grad_norm": 0.6715462803840637, "learning_rate": 5.963065732750518e-05, "loss": 1.6397, "step": 71760 }, { "epoch": 2.39, "grad_norm": 0.698646605014801, "learning_rate": 5.962440339766093e-05, "loss": 1.7611, "step": 71761 }, { "epoch": 2.39, "grad_norm": 0.6831979751586914, "learning_rate": 5.961814975959723e-05, "loss": 1.6868, "step": 71762 }, { "epoch": 2.39, "grad_norm": 0.7130995392799377, "learning_rate": 5.9611896413321734e-05, "loss": 1.7739, "step": 71763 }, { "epoch": 2.39, "grad_norm": 0.7005122900009155, "learning_rate": 5.960564335884199e-05, "loss": 1.6674, "step": 71764 }, { "epoch": 2.39, "grad_norm": 0.7184401750564575, "learning_rate": 5.9599390596165495e-05, "loss": 1.7305, "step": 71765 }, { "epoch": 2.39, "grad_norm": 0.6938607096672058, "learning_rate": 5.959313812530001e-05, "loss": 1.7669, "step": 71766 }, { "epoch": 2.39, "grad_norm": 0.7153581976890564, "learning_rate": 5.9586885946252926e-05, "loss": 1.7685, "step": 71767 }, { "epoch": 2.39, "grad_norm": 0.6860643029212952, "learning_rate": 5.958063405903204e-05, "loss": 1.747, "step": 71768 }, { "epoch": 2.39, "grad_norm": 0.7006340622901917, "learning_rate": 5.957438246364484e-05, "loss": 1.7985, "step": 71769 }, { "epoch": 2.39, "grad_norm": 0.6836153864860535, "learning_rate": 5.956813116009883e-05, "loss": 1.6894, "step": 71770 }, { "epoch": 2.39, "grad_norm": 0.6763994097709656, "learning_rate": 5.9561880148401767e-05, "loss": 1.722, "step": 71771 }, { "epoch": 2.39, "grad_norm": 0.682338535785675, "learning_rate": 5.955562942856107e-05, "loss": 1.7572, "step": 71772 }, { "epoch": 2.39, "grad_norm": 0.7040789127349854, "learning_rate": 5.9549379000584506e-05, "loss": 1.8117, "step": 71773 }, { "epoch": 2.39, "grad_norm": 0.6823924779891968, "learning_rate": 5.9543128864479474e-05, "loss": 1.6771, "step": 71774 }, { "epoch": 2.39, "grad_norm": 0.6704514026641846, "learning_rate": 5.953687902025372e-05, "loss": 1.6891, "step": 71775 }, { "epoch": 2.39, "grad_norm": 0.6668508648872375, "learning_rate": 5.953062946791479e-05, "loss": 1.7368, "step": 71776 }, { "epoch": 2.39, "grad_norm": 0.6916138529777527, "learning_rate": 5.952438020747017e-05, "loss": 1.6936, "step": 71777 }, { "epoch": 2.39, "grad_norm": 0.6728166341781616, "learning_rate": 5.951813123892758e-05, "loss": 1.6607, "step": 71778 }, { "epoch": 2.39, "grad_norm": 0.7022762894630432, "learning_rate": 5.951188256229452e-05, "loss": 1.6494, "step": 71779 }, { "epoch": 2.39, "grad_norm": 0.6821744441986084, "learning_rate": 5.9505634177578555e-05, "loss": 1.7263, "step": 71780 }, { "epoch": 2.39, "grad_norm": 0.699862003326416, "learning_rate": 5.9499386084787303e-05, "loss": 1.7106, "step": 71781 }, { "epoch": 2.39, "grad_norm": 0.720426082611084, "learning_rate": 5.9493138283928465e-05, "loss": 1.6847, "step": 71782 }, { "epoch": 2.39, "grad_norm": 0.6775804162025452, "learning_rate": 5.94868907750095e-05, "loss": 1.6842, "step": 71783 }, { "epoch": 2.39, "grad_norm": 0.6780641674995422, "learning_rate": 5.9480643558037935e-05, "loss": 1.6346, "step": 71784 }, { "epoch": 2.39, "grad_norm": 0.6808710098266602, "learning_rate": 5.947439663302149e-05, "loss": 1.7674, "step": 71785 }, { "epoch": 2.39, "grad_norm": 0.700108528137207, "learning_rate": 5.946814999996771e-05, "loss": 1.7017, "step": 71786 }, { "epoch": 2.39, "grad_norm": 0.6807137131690979, "learning_rate": 5.946190365888407e-05, "loss": 1.6923, "step": 71787 }, { "epoch": 2.39, "grad_norm": 0.7098481059074402, "learning_rate": 5.945565760977824e-05, "loss": 1.7137, "step": 71788 }, { "epoch": 2.39, "grad_norm": 0.6763221621513367, "learning_rate": 5.944941185265795e-05, "loss": 1.7223, "step": 71789 }, { "epoch": 2.39, "grad_norm": 0.6800724267959595, "learning_rate": 5.944316638753048e-05, "loss": 1.6938, "step": 71790 }, { "epoch": 2.39, "grad_norm": 0.6812893748283386, "learning_rate": 5.943692121440358e-05, "loss": 1.648, "step": 71791 }, { "epoch": 2.39, "grad_norm": 0.6655184626579285, "learning_rate": 5.9430676333284853e-05, "loss": 1.6641, "step": 71792 }, { "epoch": 2.39, "grad_norm": 0.6728538870811462, "learning_rate": 5.942443174418184e-05, "loss": 1.6355, "step": 71793 }, { "epoch": 2.39, "grad_norm": 0.72957843542099, "learning_rate": 5.941818744710207e-05, "loss": 1.7122, "step": 71794 }, { "epoch": 2.39, "grad_norm": 0.6771979928016663, "learning_rate": 5.941194344205317e-05, "loss": 1.769, "step": 71795 }, { "epoch": 2.39, "grad_norm": 0.677780032157898, "learning_rate": 5.9405699729042854e-05, "loss": 1.6968, "step": 71796 }, { "epoch": 2.39, "grad_norm": 0.6582326292991638, "learning_rate": 5.939945630807843e-05, "loss": 1.5959, "step": 71797 }, { "epoch": 2.39, "grad_norm": 0.6938561201095581, "learning_rate": 5.939321317916762e-05, "loss": 1.65, "step": 71798 }, { "epoch": 2.39, "grad_norm": 0.6837087273597717, "learning_rate": 5.938697034231805e-05, "loss": 1.6776, "step": 71799 }, { "epoch": 2.39, "grad_norm": 0.6856551170349121, "learning_rate": 5.938072779753729e-05, "loss": 1.7311, "step": 71800 }, { "epoch": 2.39, "grad_norm": 0.664656400680542, "learning_rate": 5.937448554483275e-05, "loss": 1.7007, "step": 71801 }, { "epoch": 2.39, "grad_norm": 0.6597775220870972, "learning_rate": 5.936824358421214e-05, "loss": 1.6908, "step": 71802 }, { "epoch": 2.39, "grad_norm": 0.6908348798751831, "learning_rate": 5.9362001915683214e-05, "loss": 1.7449, "step": 71803 }, { "epoch": 2.39, "grad_norm": 0.7534580826759338, "learning_rate": 5.935576053925316e-05, "loss": 1.7292, "step": 71804 }, { "epoch": 2.39, "grad_norm": 0.6788901686668396, "learning_rate": 5.9349519454929785e-05, "loss": 1.608, "step": 71805 }, { "epoch": 2.39, "grad_norm": 0.6748631596565247, "learning_rate": 5.9343278662720704e-05, "loss": 1.6205, "step": 71806 }, { "epoch": 2.39, "grad_norm": 0.7022029161453247, "learning_rate": 5.9337038162633425e-05, "loss": 1.6989, "step": 71807 }, { "epoch": 2.39, "grad_norm": 0.6892557740211487, "learning_rate": 5.9330797954675456e-05, "loss": 1.6701, "step": 71808 }, { "epoch": 2.39, "grad_norm": 0.6992717385292053, "learning_rate": 5.9324558038854475e-05, "loss": 1.7644, "step": 71809 }, { "epoch": 2.39, "grad_norm": 0.6762574315071106, "learning_rate": 5.9318318415178065e-05, "loss": 1.7258, "step": 71810 }, { "epoch": 2.39, "grad_norm": 0.689928412437439, "learning_rate": 5.9312079083653665e-05, "loss": 1.6923, "step": 71811 }, { "epoch": 2.39, "grad_norm": 0.6729199886322021, "learning_rate": 5.930584004428892e-05, "loss": 1.7112, "step": 71812 }, { "epoch": 2.39, "grad_norm": 0.6904407143592834, "learning_rate": 5.92996012970915e-05, "loss": 1.6567, "step": 71813 }, { "epoch": 2.39, "grad_norm": 0.7106376886367798, "learning_rate": 5.92933628420689e-05, "loss": 1.6862, "step": 71814 }, { "epoch": 2.39, "grad_norm": 0.6820429563522339, "learning_rate": 5.9287124679228615e-05, "loss": 1.8012, "step": 71815 }, { "epoch": 2.39, "grad_norm": 0.6688070893287659, "learning_rate": 5.9280886808578395e-05, "loss": 1.7321, "step": 71816 }, { "epoch": 2.39, "grad_norm": 0.6690786480903625, "learning_rate": 5.927464923012568e-05, "loss": 1.7365, "step": 71817 }, { "epoch": 2.39, "grad_norm": 0.6737565398216248, "learning_rate": 5.9268411943878026e-05, "loss": 1.7016, "step": 71818 }, { "epoch": 2.39, "grad_norm": 0.674771249294281, "learning_rate": 5.926217494984309e-05, "loss": 1.7454, "step": 71819 }, { "epoch": 2.39, "grad_norm": 0.6913813352584839, "learning_rate": 5.925593824802834e-05, "loss": 1.7009, "step": 71820 }, { "epoch": 2.39, "grad_norm": 0.6933068633079529, "learning_rate": 5.924970183844149e-05, "loss": 1.6872, "step": 71821 }, { "epoch": 2.39, "grad_norm": 0.6926448345184326, "learning_rate": 5.924346572108994e-05, "loss": 1.7749, "step": 71822 }, { "epoch": 2.39, "grad_norm": 0.7181902527809143, "learning_rate": 5.923722989598143e-05, "loss": 1.7168, "step": 71823 }, { "epoch": 2.39, "grad_norm": 0.6672325134277344, "learning_rate": 5.923099436312347e-05, "loss": 1.7523, "step": 71824 }, { "epoch": 2.39, "grad_norm": 0.6696755290031433, "learning_rate": 5.92247591225235e-05, "loss": 1.6545, "step": 71825 }, { "epoch": 2.39, "grad_norm": 0.6757427453994751, "learning_rate": 5.9218524174189305e-05, "loss": 1.6515, "step": 71826 }, { "epoch": 2.39, "grad_norm": 0.6875499486923218, "learning_rate": 5.921228951812825e-05, "loss": 1.7648, "step": 71827 }, { "epoch": 2.39, "grad_norm": 0.6748717427253723, "learning_rate": 5.9206055154348064e-05, "loss": 1.6782, "step": 71828 }, { "epoch": 2.39, "grad_norm": 0.653514564037323, "learning_rate": 5.919982108285627e-05, "loss": 1.6624, "step": 71829 }, { "epoch": 2.39, "grad_norm": 0.7087410092353821, "learning_rate": 5.9193587303660295e-05, "loss": 1.737, "step": 71830 }, { "epoch": 2.39, "grad_norm": 0.6778170466423035, "learning_rate": 5.918735381676794e-05, "loss": 1.6811, "step": 71831 }, { "epoch": 2.39, "grad_norm": 0.679396390914917, "learning_rate": 5.9181120622186584e-05, "loss": 1.6925, "step": 71832 }, { "epoch": 2.39, "grad_norm": 0.6631454229354858, "learning_rate": 5.91748877199239e-05, "loss": 1.7333, "step": 71833 }, { "epoch": 2.39, "grad_norm": 0.6763246655464172, "learning_rate": 5.916865510998738e-05, "loss": 1.6511, "step": 71834 }, { "epoch": 2.39, "grad_norm": 0.6756078004837036, "learning_rate": 5.916242279238469e-05, "loss": 1.7085, "step": 71835 }, { "epoch": 2.39, "grad_norm": 0.6926558613777161, "learning_rate": 5.9156190767123345e-05, "loss": 1.6529, "step": 71836 }, { "epoch": 2.39, "grad_norm": 0.6860952377319336, "learning_rate": 5.914995903421078e-05, "loss": 1.6658, "step": 71837 }, { "epoch": 2.39, "grad_norm": 0.6548267006874084, "learning_rate": 5.9143727593654786e-05, "loss": 1.6851, "step": 71838 }, { "epoch": 2.39, "grad_norm": 0.6852246522903442, "learning_rate": 5.913749644546283e-05, "loss": 1.7625, "step": 71839 }, { "epoch": 2.39, "grad_norm": 0.6803756356239319, "learning_rate": 5.913126558964236e-05, "loss": 1.7241, "step": 71840 }, { "epoch": 2.39, "grad_norm": 0.6658334136009216, "learning_rate": 5.912503502620105e-05, "loss": 1.7016, "step": 71841 }, { "epoch": 2.39, "grad_norm": 0.6792535185813904, "learning_rate": 5.9118804755146554e-05, "loss": 1.5964, "step": 71842 }, { "epoch": 2.39, "grad_norm": 0.6653602719306946, "learning_rate": 5.9112574776486334e-05, "loss": 1.6648, "step": 71843 }, { "epoch": 2.39, "grad_norm": 0.6667295694351196, "learning_rate": 5.9106345090227844e-05, "loss": 1.6735, "step": 71844 }, { "epoch": 2.39, "grad_norm": 0.6796128749847412, "learning_rate": 5.9100115696378855e-05, "loss": 1.6136, "step": 71845 }, { "epoch": 2.39, "grad_norm": 0.674839437007904, "learning_rate": 5.909388659494682e-05, "loss": 1.7418, "step": 71846 }, { "epoch": 2.39, "grad_norm": 0.706859290599823, "learning_rate": 5.908765778593928e-05, "loss": 1.8126, "step": 71847 }, { "epoch": 2.39, "grad_norm": 0.6699540019035339, "learning_rate": 5.908142926936378e-05, "loss": 1.6929, "step": 71848 }, { "epoch": 2.39, "grad_norm": 0.6898793578147888, "learning_rate": 5.907520104522802e-05, "loss": 1.6735, "step": 71849 }, { "epoch": 2.39, "grad_norm": 0.7127878665924072, "learning_rate": 5.906897311353945e-05, "loss": 1.6341, "step": 71850 }, { "epoch": 2.39, "grad_norm": 0.6872362494468689, "learning_rate": 5.906274547430558e-05, "loss": 1.7041, "step": 71851 }, { "epoch": 2.39, "grad_norm": 0.6901790499687195, "learning_rate": 5.9056518127534134e-05, "loss": 1.6965, "step": 71852 }, { "epoch": 2.39, "grad_norm": 0.6806679368019104, "learning_rate": 5.905029107323254e-05, "loss": 1.6879, "step": 71853 }, { "epoch": 2.39, "grad_norm": 0.6840148568153381, "learning_rate": 5.9044064311408344e-05, "loss": 1.7088, "step": 71854 }, { "epoch": 2.39, "grad_norm": 0.6649169325828552, "learning_rate": 5.9037837842069124e-05, "loss": 1.6771, "step": 71855 }, { "epoch": 2.39, "grad_norm": 0.7104311585426331, "learning_rate": 5.9031611665222633e-05, "loss": 1.7064, "step": 71856 }, { "epoch": 2.39, "grad_norm": 0.6755023002624512, "learning_rate": 5.90253857808761e-05, "loss": 1.6545, "step": 71857 }, { "epoch": 2.39, "grad_norm": 0.6960051655769348, "learning_rate": 5.901916018903725e-05, "loss": 1.6924, "step": 71858 }, { "epoch": 2.39, "grad_norm": 0.7038958072662354, "learning_rate": 5.9012934889713716e-05, "loss": 1.6273, "step": 71859 }, { "epoch": 2.39, "grad_norm": 0.6697421669960022, "learning_rate": 5.900670988291295e-05, "loss": 1.7379, "step": 71860 }, { "epoch": 2.39, "grad_norm": 0.6647953987121582, "learning_rate": 5.900048516864249e-05, "loss": 1.7439, "step": 71861 }, { "epoch": 2.39, "grad_norm": 0.6802079677581787, "learning_rate": 5.899426074690988e-05, "loss": 1.7037, "step": 71862 }, { "epoch": 2.39, "grad_norm": 0.6662869453430176, "learning_rate": 5.898803661772293e-05, "loss": 1.6951, "step": 71863 }, { "epoch": 2.39, "grad_norm": 0.6800721287727356, "learning_rate": 5.89818127810888e-05, "loss": 1.7018, "step": 71864 }, { "epoch": 2.39, "grad_norm": 0.7093119621276855, "learning_rate": 5.897558923701528e-05, "loss": 1.7426, "step": 71865 }, { "epoch": 2.39, "grad_norm": 0.6696145534515381, "learning_rate": 5.896936598550992e-05, "loss": 1.726, "step": 71866 }, { "epoch": 2.39, "grad_norm": 0.6984213590621948, "learning_rate": 5.896314302658023e-05, "loss": 1.7471, "step": 71867 }, { "epoch": 2.39, "grad_norm": 0.6771557331085205, "learning_rate": 5.895692036023373e-05, "loss": 1.6842, "step": 71868 }, { "epoch": 2.39, "grad_norm": 0.6760336756706238, "learning_rate": 5.895069798647798e-05, "loss": 1.7602, "step": 71869 }, { "epoch": 2.39, "grad_norm": 0.6818860769271851, "learning_rate": 5.894447590532074e-05, "loss": 1.6741, "step": 71870 }, { "epoch": 2.39, "grad_norm": 0.6655493974685669, "learning_rate": 5.893825411676921e-05, "loss": 1.7338, "step": 71871 }, { "epoch": 2.39, "grad_norm": 0.6905746459960938, "learning_rate": 5.893203262083115e-05, "loss": 1.7414, "step": 71872 }, { "epoch": 2.39, "grad_norm": 0.6725509762763977, "learning_rate": 5.892581141751415e-05, "loss": 1.7103, "step": 71873 }, { "epoch": 2.39, "grad_norm": 0.6810537576675415, "learning_rate": 5.8919590506825664e-05, "loss": 1.6887, "step": 71874 }, { "epoch": 2.39, "grad_norm": 0.6647794246673584, "learning_rate": 5.891336988877323e-05, "loss": 1.6853, "step": 71875 }, { "epoch": 2.39, "grad_norm": 0.685375988483429, "learning_rate": 5.890714956336451e-05, "loss": 1.7231, "step": 71876 }, { "epoch": 2.39, "grad_norm": 0.6945877075195312, "learning_rate": 5.890092953060699e-05, "loss": 1.7572, "step": 71877 }, { "epoch": 2.39, "grad_norm": 0.6651846766471863, "learning_rate": 5.889470979050813e-05, "loss": 1.7083, "step": 71878 }, { "epoch": 2.39, "grad_norm": 0.7252999544143677, "learning_rate": 5.88884903430756e-05, "loss": 1.7642, "step": 71879 }, { "epoch": 2.39, "grad_norm": 0.6878933310508728, "learning_rate": 5.888227118831696e-05, "loss": 1.7133, "step": 71880 }, { "epoch": 2.39, "grad_norm": 0.678998589515686, "learning_rate": 5.887605232623972e-05, "loss": 1.6778, "step": 71881 }, { "epoch": 2.39, "grad_norm": 0.6803290247917175, "learning_rate": 5.886983375685136e-05, "loss": 1.7195, "step": 71882 }, { "epoch": 2.39, "grad_norm": 0.6770721673965454, "learning_rate": 5.8863615480159563e-05, "loss": 1.6354, "step": 71883 }, { "epoch": 2.39, "grad_norm": 0.6753054857254028, "learning_rate": 5.885739749617173e-05, "loss": 1.6855, "step": 71884 }, { "epoch": 2.39, "grad_norm": 0.6675550937652588, "learning_rate": 5.885117980489555e-05, "loss": 1.6675, "step": 71885 }, { "epoch": 2.39, "grad_norm": 0.6811652183532715, "learning_rate": 5.884496240633852e-05, "loss": 1.706, "step": 71886 }, { "epoch": 2.39, "grad_norm": 0.6891486644744873, "learning_rate": 5.8838745300508105e-05, "loss": 1.6746, "step": 71887 }, { "epoch": 2.39, "grad_norm": 0.6806421279907227, "learning_rate": 5.883252848741199e-05, "loss": 1.6746, "step": 71888 }, { "epoch": 2.39, "grad_norm": 0.6679842472076416, "learning_rate": 5.882631196705757e-05, "loss": 1.745, "step": 71889 }, { "epoch": 2.39, "grad_norm": 0.7182466983795166, "learning_rate": 5.882009573945254e-05, "loss": 1.705, "step": 71890 }, { "epoch": 2.39, "grad_norm": 0.6968885660171509, "learning_rate": 5.8813879804604334e-05, "loss": 1.7018, "step": 71891 }, { "epoch": 2.39, "grad_norm": 0.6719973683357239, "learning_rate": 5.880766416252061e-05, "loss": 1.6957, "step": 71892 }, { "epoch": 2.39, "grad_norm": 0.7107454538345337, "learning_rate": 5.8801448813208815e-05, "loss": 1.7521, "step": 71893 }, { "epoch": 2.39, "grad_norm": 0.6834982633590698, "learning_rate": 5.8795233756676466e-05, "loss": 1.7137, "step": 71894 }, { "epoch": 2.39, "grad_norm": 0.6892536282539368, "learning_rate": 5.878901899293123e-05, "loss": 1.6702, "step": 71895 }, { "epoch": 2.39, "grad_norm": 0.6862752437591553, "learning_rate": 5.8782804521980564e-05, "loss": 1.6669, "step": 71896 }, { "epoch": 2.39, "grad_norm": 0.6641758680343628, "learning_rate": 5.877659034383197e-05, "loss": 1.6339, "step": 71897 }, { "epoch": 2.39, "grad_norm": 0.6712467074394226, "learning_rate": 5.8770376458493097e-05, "loss": 1.7139, "step": 71898 }, { "epoch": 2.39, "grad_norm": 0.6828805804252625, "learning_rate": 5.876416286597145e-05, "loss": 1.6663, "step": 71899 }, { "epoch": 2.39, "grad_norm": 0.662151038646698, "learning_rate": 5.875794956627461e-05, "loss": 1.744, "step": 71900 }, { "epoch": 2.39, "grad_norm": 0.6715749502182007, "learning_rate": 5.875173655940999e-05, "loss": 1.6288, "step": 71901 }, { "epoch": 2.39, "grad_norm": 0.6897994875907898, "learning_rate": 5.874552384538529e-05, "loss": 1.7479, "step": 71902 }, { "epoch": 2.39, "grad_norm": 0.6730288863182068, "learning_rate": 5.873931142420797e-05, "loss": 1.6359, "step": 71903 }, { "epoch": 2.39, "grad_norm": 0.6778740286827087, "learning_rate": 5.873309929588552e-05, "loss": 1.7154, "step": 71904 }, { "epoch": 2.39, "grad_norm": 0.7058508992195129, "learning_rate": 5.872688746042551e-05, "loss": 1.676, "step": 71905 }, { "epoch": 2.39, "grad_norm": 0.6602106094360352, "learning_rate": 5.872067591783569e-05, "loss": 1.7564, "step": 71906 }, { "epoch": 2.39, "grad_norm": 0.6762633323669434, "learning_rate": 5.871446466812326e-05, "loss": 1.6431, "step": 71907 }, { "epoch": 2.39, "grad_norm": 0.7125304937362671, "learning_rate": 5.870825371129588e-05, "loss": 1.7112, "step": 71908 }, { "epoch": 2.39, "grad_norm": 0.6682937741279602, "learning_rate": 5.870204304736125e-05, "loss": 1.6676, "step": 71909 }, { "epoch": 2.39, "grad_norm": 0.6655334830284119, "learning_rate": 5.869583267632678e-05, "loss": 1.647, "step": 71910 }, { "epoch": 2.39, "grad_norm": 0.697735607624054, "learning_rate": 5.868962259819992e-05, "loss": 1.6953, "step": 71911 }, { "epoch": 2.39, "grad_norm": 0.6821675896644592, "learning_rate": 5.868341281298831e-05, "loss": 1.696, "step": 71912 }, { "epoch": 2.39, "grad_norm": 0.7098984122276306, "learning_rate": 5.867720332069965e-05, "loss": 1.7157, "step": 71913 }, { "epoch": 2.39, "grad_norm": 0.7138335108757019, "learning_rate": 5.867099412134111e-05, "loss": 1.7126, "step": 71914 }, { "epoch": 2.39, "grad_norm": 0.7234495282173157, "learning_rate": 5.8664785214920475e-05, "loss": 1.6978, "step": 71915 }, { "epoch": 2.39, "grad_norm": 0.7032002806663513, "learning_rate": 5.8658576601445274e-05, "loss": 1.6967, "step": 71916 }, { "epoch": 2.39, "grad_norm": 0.6924949288368225, "learning_rate": 5.865236828092304e-05, "loss": 1.6537, "step": 71917 }, { "epoch": 2.39, "grad_norm": 0.7103112936019897, "learning_rate": 5.864616025336115e-05, "loss": 1.6641, "step": 71918 }, { "epoch": 2.39, "grad_norm": 0.6673470735549927, "learning_rate": 5.863995251876728e-05, "loss": 1.6596, "step": 71919 }, { "epoch": 2.39, "grad_norm": 0.6903407573699951, "learning_rate": 5.86337450771491e-05, "loss": 1.7264, "step": 71920 }, { "epoch": 2.39, "grad_norm": 0.7017632126808167, "learning_rate": 5.862753792851381e-05, "loss": 1.7738, "step": 71921 }, { "epoch": 2.39, "grad_norm": 0.7148709297180176, "learning_rate": 5.862133107286916e-05, "loss": 1.7616, "step": 71922 }, { "epoch": 2.39, "grad_norm": 0.6814277172088623, "learning_rate": 5.86151245102227e-05, "loss": 1.7861, "step": 71923 }, { "epoch": 2.39, "grad_norm": 0.6987935304641724, "learning_rate": 5.8608918240581924e-05, "loss": 1.653, "step": 71924 }, { "epoch": 2.39, "grad_norm": 0.6639611721038818, "learning_rate": 5.86027122639543e-05, "loss": 1.6206, "step": 71925 }, { "epoch": 2.39, "grad_norm": 0.6799583435058594, "learning_rate": 5.859650658034748e-05, "loss": 1.6378, "step": 71926 }, { "epoch": 2.39, "grad_norm": 0.6808759570121765, "learning_rate": 5.85903011897689e-05, "loss": 1.6436, "step": 71927 }, { "epoch": 2.39, "grad_norm": 0.7083286643028259, "learning_rate": 5.8584096092226075e-05, "loss": 1.7401, "step": 71928 }, { "epoch": 2.39, "grad_norm": 0.6879473924636841, "learning_rate": 5.857789128772658e-05, "loss": 1.7909, "step": 71929 }, { "epoch": 2.39, "grad_norm": 0.7017524242401123, "learning_rate": 5.8571686776278024e-05, "loss": 1.6961, "step": 71930 }, { "epoch": 2.39, "grad_norm": 0.6648162007331848, "learning_rate": 5.856548255788789e-05, "loss": 1.6422, "step": 71931 }, { "epoch": 2.39, "grad_norm": 0.6969555020332336, "learning_rate": 5.855927863256359e-05, "loss": 1.7156, "step": 71932 }, { "epoch": 2.39, "grad_norm": 0.6957051157951355, "learning_rate": 5.8553075000312825e-05, "loss": 1.7062, "step": 71933 }, { "epoch": 2.39, "grad_norm": 0.6918719410896301, "learning_rate": 5.854687166114304e-05, "loss": 1.6768, "step": 71934 }, { "epoch": 2.39, "grad_norm": 0.6936163306236267, "learning_rate": 5.8540668615061725e-05, "loss": 1.6398, "step": 71935 }, { "epoch": 2.39, "grad_norm": 0.6914923191070557, "learning_rate": 5.8534465862076555e-05, "loss": 1.685, "step": 71936 }, { "epoch": 2.39, "grad_norm": 0.6894678473472595, "learning_rate": 5.852826340219485e-05, "loss": 1.7506, "step": 71937 }, { "epoch": 2.39, "grad_norm": 0.6697961091995239, "learning_rate": 5.852206123542436e-05, "loss": 1.6638, "step": 71938 }, { "epoch": 2.39, "grad_norm": 0.6866709589958191, "learning_rate": 5.851585936177239e-05, "loss": 1.6353, "step": 71939 }, { "epoch": 2.39, "grad_norm": 0.6762477159500122, "learning_rate": 5.850965778124671e-05, "loss": 1.6897, "step": 71940 }, { "epoch": 2.39, "grad_norm": 0.7041168212890625, "learning_rate": 5.8503456493854685e-05, "loss": 1.6958, "step": 71941 }, { "epoch": 2.39, "grad_norm": 0.6556726694107056, "learning_rate": 5.8497255499603834e-05, "loss": 1.6582, "step": 71942 }, { "epoch": 2.39, "grad_norm": 0.675492525100708, "learning_rate": 5.849105479850179e-05, "loss": 1.7096, "step": 71943 }, { "epoch": 2.39, "grad_norm": 0.651168704032898, "learning_rate": 5.848485439055595e-05, "loss": 1.6111, "step": 71944 }, { "epoch": 2.39, "grad_norm": 0.709844172000885, "learning_rate": 5.847865427577396e-05, "loss": 1.7115, "step": 71945 }, { "epoch": 2.39, "grad_norm": 0.6904049515724182, "learning_rate": 5.847245445416333e-05, "loss": 1.6726, "step": 71946 }, { "epoch": 2.39, "grad_norm": 0.6783523559570312, "learning_rate": 5.8466254925731484e-05, "loss": 1.6914, "step": 71947 }, { "epoch": 2.39, "grad_norm": 0.6831316947937012, "learning_rate": 5.8460055690486076e-05, "loss": 1.6333, "step": 71948 }, { "epoch": 2.39, "grad_norm": 0.6743021011352539, "learning_rate": 5.84538567484345e-05, "loss": 1.6326, "step": 71949 }, { "epoch": 2.39, "grad_norm": 0.6833733320236206, "learning_rate": 5.844765809958443e-05, "loss": 1.7403, "step": 71950 }, { "epoch": 2.39, "grad_norm": 0.6842984557151794, "learning_rate": 5.844145974394321e-05, "loss": 1.6423, "step": 71951 }, { "epoch": 2.39, "grad_norm": 0.6756325960159302, "learning_rate": 5.8435261681518575e-05, "loss": 1.6616, "step": 71952 }, { "epoch": 2.39, "grad_norm": 0.6715850234031677, "learning_rate": 5.842906391231792e-05, "loss": 1.6241, "step": 71953 }, { "epoch": 2.39, "grad_norm": 0.684633195400238, "learning_rate": 5.842286643634874e-05, "loss": 1.716, "step": 71954 }, { "epoch": 2.39, "grad_norm": 0.6728362441062927, "learning_rate": 5.8416669253618654e-05, "loss": 1.6176, "step": 71955 }, { "epoch": 2.39, "grad_norm": 0.6889860033988953, "learning_rate": 5.841047236413513e-05, "loss": 1.6192, "step": 71956 }, { "epoch": 2.39, "grad_norm": 0.6779465079307556, "learning_rate": 5.840427576790563e-05, "loss": 1.7064, "step": 71957 }, { "epoch": 2.39, "grad_norm": 0.6735068559646606, "learning_rate": 5.839807946493774e-05, "loss": 1.715, "step": 71958 }, { "epoch": 2.39, "grad_norm": 0.6670244336128235, "learning_rate": 5.839188345523906e-05, "loss": 1.7247, "step": 71959 }, { "epoch": 2.39, "grad_norm": 0.702569842338562, "learning_rate": 5.838568773881706e-05, "loss": 1.6037, "step": 71960 }, { "epoch": 2.39, "grad_norm": 0.6874868869781494, "learning_rate": 5.8379492315679114e-05, "loss": 1.7124, "step": 71961 }, { "epoch": 2.39, "grad_norm": 0.6882337927818298, "learning_rate": 5.837329718583299e-05, "loss": 1.7153, "step": 71962 }, { "epoch": 2.39, "grad_norm": 0.6842588782310486, "learning_rate": 5.8367102349286055e-05, "loss": 1.7273, "step": 71963 }, { "epoch": 2.39, "grad_norm": 0.6869640946388245, "learning_rate": 5.8360907806045766e-05, "loss": 1.7335, "step": 71964 }, { "epoch": 2.39, "grad_norm": 0.6816438436508179, "learning_rate": 5.835471355611975e-05, "loss": 1.6759, "step": 71965 }, { "epoch": 2.39, "grad_norm": 0.6813225150108337, "learning_rate": 5.834851959951563e-05, "loss": 1.7626, "step": 71966 }, { "epoch": 2.39, "grad_norm": 0.6930413842201233, "learning_rate": 5.8342325936240676e-05, "loss": 1.6763, "step": 71967 }, { "epoch": 2.39, "grad_norm": 0.6964421272277832, "learning_rate": 5.8336132566302504e-05, "loss": 1.7043, "step": 71968 }, { "epoch": 2.39, "grad_norm": 0.6676537394523621, "learning_rate": 5.832993948970879e-05, "loss": 1.6272, "step": 71969 }, { "epoch": 2.39, "grad_norm": 0.6854320168495178, "learning_rate": 5.832374670646688e-05, "loss": 1.7012, "step": 71970 }, { "epoch": 2.39, "grad_norm": 0.6980947256088257, "learning_rate": 5.831755421658427e-05, "loss": 1.6871, "step": 71971 }, { "epoch": 2.39, "grad_norm": 0.6574217081069946, "learning_rate": 5.831136202006853e-05, "loss": 1.732, "step": 71972 }, { "epoch": 2.39, "grad_norm": 0.6730859279632568, "learning_rate": 5.8305170116927345e-05, "loss": 1.6109, "step": 71973 }, { "epoch": 2.39, "grad_norm": 0.7107048630714417, "learning_rate": 5.8298978507167915e-05, "loss": 1.7632, "step": 71974 }, { "epoch": 2.39, "grad_norm": 0.726163387298584, "learning_rate": 5.8292787190797895e-05, "loss": 1.7277, "step": 71975 }, { "epoch": 2.39, "grad_norm": 0.6578188538551331, "learning_rate": 5.8286596167824916e-05, "loss": 1.6502, "step": 71976 }, { "epoch": 2.39, "grad_norm": 0.6783257722854614, "learning_rate": 5.8280405438256404e-05, "loss": 1.6317, "step": 71977 }, { "epoch": 2.39, "grad_norm": 0.6801350712776184, "learning_rate": 5.8274215002099754e-05, "loss": 1.724, "step": 71978 }, { "epoch": 2.39, "grad_norm": 0.6925475597381592, "learning_rate": 5.826802485936263e-05, "loss": 1.6393, "step": 71979 }, { "epoch": 2.39, "grad_norm": 0.6589600443840027, "learning_rate": 5.8261835010052615e-05, "loss": 1.6494, "step": 71980 }, { "epoch": 2.39, "grad_norm": 0.7019578814506531, "learning_rate": 5.825564545417698e-05, "loss": 1.6691, "step": 71981 }, { "epoch": 2.39, "grad_norm": 0.7158300280570984, "learning_rate": 5.824945619174338e-05, "loss": 1.7777, "step": 71982 }, { "epoch": 2.39, "grad_norm": 0.6904857754707336, "learning_rate": 5.8243267222759414e-05, "loss": 1.6841, "step": 71983 }, { "epoch": 2.39, "grad_norm": 0.6659296154975891, "learning_rate": 5.823707854723247e-05, "loss": 1.6873, "step": 71984 }, { "epoch": 2.39, "grad_norm": 0.6954745054244995, "learning_rate": 5.823089016517002e-05, "loss": 1.7055, "step": 71985 }, { "epoch": 2.39, "grad_norm": 0.6697484254837036, "learning_rate": 5.822470207657964e-05, "loss": 1.6918, "step": 71986 }, { "epoch": 2.4, "grad_norm": 0.6902151107788086, "learning_rate": 5.8218514281469045e-05, "loss": 1.7394, "step": 71987 }, { "epoch": 2.4, "grad_norm": 0.6831859350204468, "learning_rate": 5.821232677984534e-05, "loss": 1.7026, "step": 71988 }, { "epoch": 2.4, "grad_norm": 0.6750054955482483, "learning_rate": 5.820613957171627e-05, "loss": 1.6349, "step": 71989 }, { "epoch": 2.4, "grad_norm": 0.7282270789146423, "learning_rate": 5.819995265708939e-05, "loss": 1.7137, "step": 71990 }, { "epoch": 2.4, "grad_norm": 0.7056369781494141, "learning_rate": 5.819376603597215e-05, "loss": 1.6248, "step": 71991 }, { "epoch": 2.4, "grad_norm": 0.6973163485527039, "learning_rate": 5.818757970837197e-05, "loss": 1.7212, "step": 71992 }, { "epoch": 2.4, "grad_norm": 0.6768065690994263, "learning_rate": 5.818139367429648e-05, "loss": 1.6273, "step": 71993 }, { "epoch": 2.4, "grad_norm": 0.679395854473114, "learning_rate": 5.817520793375313e-05, "loss": 1.6854, "step": 71994 }, { "epoch": 2.4, "grad_norm": 0.6809567213058472, "learning_rate": 5.8169022486749474e-05, "loss": 1.6409, "step": 71995 }, { "epoch": 2.4, "grad_norm": 0.6938566565513611, "learning_rate": 5.8162837333292936e-05, "loss": 1.6804, "step": 71996 }, { "epoch": 2.4, "grad_norm": 0.6730698943138123, "learning_rate": 5.815665247339118e-05, "loss": 1.6795, "step": 71997 }, { "epoch": 2.4, "grad_norm": 0.6628682017326355, "learning_rate": 5.815046790705159e-05, "loss": 1.6714, "step": 71998 }, { "epoch": 2.4, "grad_norm": 0.6916317343711853, "learning_rate": 5.814428363428161e-05, "loss": 1.6873, "step": 71999 }, { "epoch": 2.4, "grad_norm": 0.6827343702316284, "learning_rate": 5.813809965508892e-05, "loss": 1.7264, "step": 72000 }, { "epoch": 2.4, "grad_norm": 0.6772281527519226, "learning_rate": 5.8131915969480856e-05, "loss": 1.6936, "step": 72001 }, { "epoch": 2.4, "grad_norm": 0.6572601795196533, "learning_rate": 5.812573257746511e-05, "loss": 1.7007, "step": 72002 }, { "epoch": 2.4, "grad_norm": 0.6811229586601257, "learning_rate": 5.811954947904908e-05, "loss": 1.6504, "step": 72003 }, { "epoch": 2.4, "grad_norm": 0.7072787880897522, "learning_rate": 5.811336667424018e-05, "loss": 1.6448, "step": 72004 }, { "epoch": 2.4, "grad_norm": 0.7097740173339844, "learning_rate": 5.810718416304612e-05, "loss": 1.6892, "step": 72005 }, { "epoch": 2.4, "grad_norm": 0.6812931299209595, "learning_rate": 5.810100194547418e-05, "loss": 1.7293, "step": 72006 }, { "epoch": 2.4, "grad_norm": 0.6833972334861755, "learning_rate": 5.80948200215321e-05, "loss": 1.7251, "step": 72007 }, { "epoch": 2.4, "grad_norm": 0.6851720809936523, "learning_rate": 5.808863839122716e-05, "loss": 1.6653, "step": 72008 }, { "epoch": 2.4, "grad_norm": 0.6881024241447449, "learning_rate": 5.808245705456707e-05, "loss": 1.6811, "step": 72009 }, { "epoch": 2.4, "grad_norm": 0.6731058359146118, "learning_rate": 5.807627601155921e-05, "loss": 1.6854, "step": 72010 }, { "epoch": 2.4, "grad_norm": 0.722507894039154, "learning_rate": 5.8070095262211014e-05, "loss": 1.6759, "step": 72011 }, { "epoch": 2.4, "grad_norm": 0.6912715435028076, "learning_rate": 5.806391480653018e-05, "loss": 1.7627, "step": 72012 }, { "epoch": 2.4, "grad_norm": 0.6679458618164062, "learning_rate": 5.8057734644524125e-05, "loss": 1.7249, "step": 72013 }, { "epoch": 2.4, "grad_norm": 0.6869155168533325, "learning_rate": 5.8051554776200216e-05, "loss": 1.7249, "step": 72014 }, { "epoch": 2.4, "grad_norm": 0.6748811602592468, "learning_rate": 5.804537520156608e-05, "loss": 1.6908, "step": 72015 }, { "epoch": 2.4, "grad_norm": 0.6733572483062744, "learning_rate": 5.803919592062928e-05, "loss": 1.7006, "step": 72016 }, { "epoch": 2.4, "grad_norm": 0.6901474595069885, "learning_rate": 5.803301693339727e-05, "loss": 1.6475, "step": 72017 }, { "epoch": 2.4, "grad_norm": 0.694489598274231, "learning_rate": 5.8026838239877415e-05, "loss": 1.63, "step": 72018 }, { "epoch": 2.4, "grad_norm": 0.691137433052063, "learning_rate": 5.8020659840077444e-05, "loss": 1.7791, "step": 72019 }, { "epoch": 2.4, "grad_norm": 0.6642889976501465, "learning_rate": 5.8014481734004715e-05, "loss": 1.6168, "step": 72020 }, { "epoch": 2.4, "grad_norm": 0.6898804903030396, "learning_rate": 5.800830392166665e-05, "loss": 1.6612, "step": 72021 }, { "epoch": 2.4, "grad_norm": 0.6898082494735718, "learning_rate": 5.800212640307089e-05, "loss": 1.6553, "step": 72022 }, { "epoch": 2.4, "grad_norm": 0.7077963948249817, "learning_rate": 5.799594917822502e-05, "loss": 1.6767, "step": 72023 }, { "epoch": 2.4, "grad_norm": 0.6483702063560486, "learning_rate": 5.798977224713627e-05, "loss": 1.632, "step": 72024 }, { "epoch": 2.4, "grad_norm": 0.6897596716880798, "learning_rate": 5.798359560981226e-05, "loss": 1.6358, "step": 72025 }, { "epoch": 2.4, "grad_norm": 0.6901248693466187, "learning_rate": 5.7977419266260596e-05, "loss": 1.6439, "step": 72026 }, { "epoch": 2.4, "grad_norm": 0.6632145047187805, "learning_rate": 5.797124321648866e-05, "loss": 1.6266, "step": 72027 }, { "epoch": 2.4, "grad_norm": 0.669376790523529, "learning_rate": 5.7965067460503924e-05, "loss": 1.7086, "step": 72028 }, { "epoch": 2.4, "grad_norm": 0.6959885954856873, "learning_rate": 5.795889199831394e-05, "loss": 1.703, "step": 72029 }, { "epoch": 2.4, "grad_norm": 0.7077682614326477, "learning_rate": 5.795271682992634e-05, "loss": 1.7426, "step": 72030 }, { "epoch": 2.4, "grad_norm": 0.7510387897491455, "learning_rate": 5.794654195534831e-05, "loss": 1.7247, "step": 72031 }, { "epoch": 2.4, "grad_norm": 0.704335629940033, "learning_rate": 5.794036737458753e-05, "loss": 1.7218, "step": 72032 }, { "epoch": 2.4, "grad_norm": 0.695233941078186, "learning_rate": 5.793419308765157e-05, "loss": 1.67, "step": 72033 }, { "epoch": 2.4, "grad_norm": 0.7241320610046387, "learning_rate": 5.792801909454781e-05, "loss": 1.7677, "step": 72034 }, { "epoch": 2.4, "grad_norm": 0.6718040108680725, "learning_rate": 5.7921845395283695e-05, "loss": 1.7364, "step": 72035 }, { "epoch": 2.4, "grad_norm": 0.6847988963127136, "learning_rate": 5.7915671989866764e-05, "loss": 1.7385, "step": 72036 }, { "epoch": 2.4, "grad_norm": 0.6761588454246521, "learning_rate": 5.790949887830474e-05, "loss": 1.6507, "step": 72037 }, { "epoch": 2.4, "grad_norm": 0.7168108224868774, "learning_rate": 5.7903326060604716e-05, "loss": 1.7168, "step": 72038 }, { "epoch": 2.4, "grad_norm": 0.6706752777099609, "learning_rate": 5.789715353677442e-05, "loss": 1.7232, "step": 72039 }, { "epoch": 2.4, "grad_norm": 0.7039694786071777, "learning_rate": 5.789098130682138e-05, "loss": 1.7647, "step": 72040 }, { "epoch": 2.4, "grad_norm": 0.6967271566390991, "learning_rate": 5.788480937075299e-05, "loss": 1.7501, "step": 72041 }, { "epoch": 2.4, "grad_norm": 0.6938572525978088, "learning_rate": 5.787863772857672e-05, "loss": 1.761, "step": 72042 }, { "epoch": 2.4, "grad_norm": 0.6704877018928528, "learning_rate": 5.7872466380300174e-05, "loss": 1.6531, "step": 72043 }, { "epoch": 2.4, "grad_norm": 0.6900311708450317, "learning_rate": 5.786629532593077e-05, "loss": 1.6391, "step": 72044 }, { "epoch": 2.4, "grad_norm": 0.6848210692405701, "learning_rate": 5.786012456547592e-05, "loss": 1.7707, "step": 72045 }, { "epoch": 2.4, "grad_norm": 0.6783058047294617, "learning_rate": 5.7853954098943226e-05, "loss": 1.7288, "step": 72046 }, { "epoch": 2.4, "grad_norm": 0.7009382247924805, "learning_rate": 5.784778392634021e-05, "loss": 1.6788, "step": 72047 }, { "epoch": 2.4, "grad_norm": 0.6860247254371643, "learning_rate": 5.784161404767433e-05, "loss": 1.7311, "step": 72048 }, { "epoch": 2.4, "grad_norm": 0.7029330134391785, "learning_rate": 5.7835444462952965e-05, "loss": 1.6554, "step": 72049 }, { "epoch": 2.4, "grad_norm": 0.6901029348373413, "learning_rate": 5.782927517218375e-05, "loss": 1.7322, "step": 72050 }, { "epoch": 2.4, "grad_norm": 0.7068189978599548, "learning_rate": 5.7823106175374135e-05, "loss": 1.6803, "step": 72051 }, { "epoch": 2.4, "grad_norm": 0.6601377129554749, "learning_rate": 5.78169374725315e-05, "loss": 1.622, "step": 72052 }, { "epoch": 2.4, "grad_norm": 0.678855836391449, "learning_rate": 5.781076906366348e-05, "loss": 1.6541, "step": 72053 }, { "epoch": 2.4, "grad_norm": 0.681232750415802, "learning_rate": 5.7804600948777426e-05, "loss": 1.7114, "step": 72054 }, { "epoch": 2.4, "grad_norm": 0.6594128608703613, "learning_rate": 5.7798433127881005e-05, "loss": 1.6951, "step": 72055 }, { "epoch": 2.4, "grad_norm": 0.6896928548812866, "learning_rate": 5.7792265600981514e-05, "loss": 1.6251, "step": 72056 }, { "epoch": 2.4, "grad_norm": 0.6970503926277161, "learning_rate": 5.778609836808658e-05, "loss": 1.6455, "step": 72057 }, { "epoch": 2.4, "grad_norm": 0.7183243632316589, "learning_rate": 5.777993142920365e-05, "loss": 1.6713, "step": 72058 }, { "epoch": 2.4, "grad_norm": 0.6720147728919983, "learning_rate": 5.77737647843401e-05, "loss": 1.7009, "step": 72059 }, { "epoch": 2.4, "grad_norm": 0.6855932474136353, "learning_rate": 5.776759843350362e-05, "loss": 1.7874, "step": 72060 }, { "epoch": 2.4, "grad_norm": 0.6817512512207031, "learning_rate": 5.776143237670148e-05, "loss": 1.6605, "step": 72061 }, { "epoch": 2.4, "grad_norm": 0.6737942099571228, "learning_rate": 5.775526661394135e-05, "loss": 1.678, "step": 72062 }, { "epoch": 2.4, "grad_norm": 0.6748344898223877, "learning_rate": 5.7749101145230645e-05, "loss": 1.6696, "step": 72063 }, { "epoch": 2.4, "grad_norm": 0.6752135157585144, "learning_rate": 5.7742935970576756e-05, "loss": 1.6858, "step": 72064 }, { "epoch": 2.4, "grad_norm": 0.6940601468086243, "learning_rate": 5.773677108998733e-05, "loss": 1.7088, "step": 72065 }, { "epoch": 2.4, "grad_norm": 0.681981086730957, "learning_rate": 5.773060650346968e-05, "loss": 1.6711, "step": 72066 }, { "epoch": 2.4, "grad_norm": 0.6910902857780457, "learning_rate": 5.772444221103145e-05, "loss": 1.7152, "step": 72067 }, { "epoch": 2.4, "grad_norm": 0.6908854842185974, "learning_rate": 5.771827821267998e-05, "loss": 1.715, "step": 72068 }, { "epoch": 2.4, "grad_norm": 0.6798197031021118, "learning_rate": 5.771211450842289e-05, "loss": 1.6723, "step": 72069 }, { "epoch": 2.4, "grad_norm": 0.6684260368347168, "learning_rate": 5.770595109826761e-05, "loss": 1.7365, "step": 72070 }, { "epoch": 2.4, "grad_norm": 0.6707684397697449, "learning_rate": 5.7699787982221503e-05, "loss": 1.6877, "step": 72071 }, { "epoch": 2.4, "grad_norm": 0.7162259221076965, "learning_rate": 5.769362516029226e-05, "loss": 1.6065, "step": 72072 }, { "epoch": 2.4, "grad_norm": 0.6973675489425659, "learning_rate": 5.7687462632487236e-05, "loss": 1.6884, "step": 72073 }, { "epoch": 2.4, "grad_norm": 0.6477687954902649, "learning_rate": 5.768130039881387e-05, "loss": 1.6017, "step": 72074 }, { "epoch": 2.4, "grad_norm": 0.684781551361084, "learning_rate": 5.767513845927968e-05, "loss": 1.7001, "step": 72075 }, { "epoch": 2.4, "grad_norm": 0.6608567833900452, "learning_rate": 5.766897681389223e-05, "loss": 1.7312, "step": 72076 }, { "epoch": 2.4, "grad_norm": 0.6997820138931274, "learning_rate": 5.7662815462658985e-05, "loss": 1.7444, "step": 72077 }, { "epoch": 2.4, "grad_norm": 0.6859464049339294, "learning_rate": 5.765665440558727e-05, "loss": 1.7568, "step": 72078 }, { "epoch": 2.4, "grad_norm": 0.6773565411567688, "learning_rate": 5.765049364268474e-05, "loss": 1.668, "step": 72079 }, { "epoch": 2.4, "grad_norm": 0.6978781819343567, "learning_rate": 5.764433317395879e-05, "loss": 1.6285, "step": 72080 }, { "epoch": 2.4, "grad_norm": 0.6952264904975891, "learning_rate": 5.7638172999416854e-05, "loss": 1.6388, "step": 72081 }, { "epoch": 2.4, "grad_norm": 1.704642415046692, "learning_rate": 5.763201311906649e-05, "loss": 1.727, "step": 72082 }, { "epoch": 2.4, "grad_norm": 0.694442629814148, "learning_rate": 5.7625853532915256e-05, "loss": 1.6679, "step": 72083 }, { "epoch": 2.4, "grad_norm": 0.690518856048584, "learning_rate": 5.7619694240970415e-05, "loss": 1.6752, "step": 72084 }, { "epoch": 2.4, "grad_norm": 0.6791920065879822, "learning_rate": 5.7613535243239526e-05, "loss": 1.7112, "step": 72085 }, { "epoch": 2.4, "grad_norm": 0.719231903553009, "learning_rate": 5.7607376539730154e-05, "loss": 1.7394, "step": 72086 }, { "epoch": 2.4, "grad_norm": 0.6911055445671082, "learning_rate": 5.7601218130449724e-05, "loss": 1.6595, "step": 72087 }, { "epoch": 2.4, "grad_norm": 0.6817828416824341, "learning_rate": 5.759506001540562e-05, "loss": 1.6437, "step": 72088 }, { "epoch": 2.4, "grad_norm": 0.6802158951759338, "learning_rate": 5.7588902194605426e-05, "loss": 1.7318, "step": 72089 }, { "epoch": 2.4, "grad_norm": 0.6923753023147583, "learning_rate": 5.7582744668056716e-05, "loss": 1.7362, "step": 72090 }, { "epoch": 2.4, "grad_norm": 0.680141031742096, "learning_rate": 5.757658743576668e-05, "loss": 1.6337, "step": 72091 }, { "epoch": 2.4, "grad_norm": 0.7036451101303101, "learning_rate": 5.757043049774297e-05, "loss": 1.6181, "step": 72092 }, { "epoch": 2.4, "grad_norm": 0.6830713748931885, "learning_rate": 5.756427385399309e-05, "loss": 1.6741, "step": 72093 }, { "epoch": 2.4, "grad_norm": 0.6662731170654297, "learning_rate": 5.755811750452445e-05, "loss": 1.701, "step": 72094 }, { "epoch": 2.4, "grad_norm": 0.6907848715782166, "learning_rate": 5.7551961449344494e-05, "loss": 1.655, "step": 72095 }, { "epoch": 2.4, "grad_norm": 0.7173804044723511, "learning_rate": 5.75458056884607e-05, "loss": 1.6467, "step": 72096 }, { "epoch": 2.4, "grad_norm": 0.7022002339363098, "learning_rate": 5.753965022188075e-05, "loss": 1.6812, "step": 72097 }, { "epoch": 2.4, "grad_norm": 0.7117315530776978, "learning_rate": 5.753349504961178e-05, "loss": 1.7034, "step": 72098 }, { "epoch": 2.4, "grad_norm": 0.7121214270591736, "learning_rate": 5.752734017166141e-05, "loss": 1.7012, "step": 72099 }, { "epoch": 2.4, "grad_norm": 0.6829992532730103, "learning_rate": 5.75211855880372e-05, "loss": 1.6402, "step": 72100 }, { "epoch": 2.4, "grad_norm": 0.6821924448013306, "learning_rate": 5.751503129874655e-05, "loss": 1.7344, "step": 72101 }, { "epoch": 2.4, "grad_norm": 0.6620288491249084, "learning_rate": 5.7508877303796855e-05, "loss": 1.724, "step": 72102 }, { "epoch": 2.4, "grad_norm": 0.6992159485816956, "learning_rate": 5.750272360319563e-05, "loss": 1.714, "step": 72103 }, { "epoch": 2.4, "grad_norm": 0.695915937423706, "learning_rate": 5.7496570196950555e-05, "loss": 1.6891, "step": 72104 }, { "epoch": 2.4, "grad_norm": 0.691364586353302, "learning_rate": 5.749041708506874e-05, "loss": 1.6338, "step": 72105 }, { "epoch": 2.4, "grad_norm": 0.7095057368278503, "learning_rate": 5.748426426755782e-05, "loss": 1.7524, "step": 72106 }, { "epoch": 2.4, "grad_norm": 0.6989008188247681, "learning_rate": 5.747811174442536e-05, "loss": 1.6687, "step": 72107 }, { "epoch": 2.4, "grad_norm": 0.6634567379951477, "learning_rate": 5.747195951567874e-05, "loss": 1.6797, "step": 72108 }, { "epoch": 2.4, "grad_norm": 0.7062234878540039, "learning_rate": 5.746580758132534e-05, "loss": 1.6806, "step": 72109 }, { "epoch": 2.4, "grad_norm": 0.691955029964447, "learning_rate": 5.7459655941372805e-05, "loss": 1.7271, "step": 72110 }, { "epoch": 2.4, "grad_norm": 0.682107150554657, "learning_rate": 5.7453504595828405e-05, "loss": 1.7262, "step": 72111 }, { "epoch": 2.4, "grad_norm": 0.6761062145233154, "learning_rate": 5.7447353544699824e-05, "loss": 1.664, "step": 72112 }, { "epoch": 2.4, "grad_norm": 0.6899542808532715, "learning_rate": 5.74412027879943e-05, "loss": 1.6793, "step": 72113 }, { "epoch": 2.4, "grad_norm": 0.7619383931159973, "learning_rate": 5.7435052325719524e-05, "loss": 1.6307, "step": 72114 }, { "epoch": 2.4, "grad_norm": 0.7048050761222839, "learning_rate": 5.742890215788285e-05, "loss": 1.6845, "step": 72115 }, { "epoch": 2.4, "grad_norm": 0.6860917806625366, "learning_rate": 5.742275228449168e-05, "loss": 1.5831, "step": 72116 }, { "epoch": 2.4, "grad_norm": 0.7029858827590942, "learning_rate": 5.7416602705553604e-05, "loss": 1.7254, "step": 72117 }, { "epoch": 2.4, "grad_norm": 0.6631067395210266, "learning_rate": 5.7410453421075955e-05, "loss": 1.6521, "step": 72118 }, { "epoch": 2.4, "grad_norm": 0.6913167834281921, "learning_rate": 5.740430443106635e-05, "loss": 1.7019, "step": 72119 }, { "epoch": 2.4, "grad_norm": 0.6767695546150208, "learning_rate": 5.739815573553219e-05, "loss": 1.6369, "step": 72120 }, { "epoch": 2.4, "grad_norm": 0.6750279068946838, "learning_rate": 5.739200733448084e-05, "loss": 1.6348, "step": 72121 }, { "epoch": 2.4, "grad_norm": 0.6806934475898743, "learning_rate": 5.738585922791992e-05, "loss": 1.657, "step": 72122 }, { "epoch": 2.4, "grad_norm": 0.6640188097953796, "learning_rate": 5.737971141585672e-05, "loss": 1.6494, "step": 72123 }, { "epoch": 2.4, "grad_norm": 0.6603415012359619, "learning_rate": 5.737356389829891e-05, "loss": 1.656, "step": 72124 }, { "epoch": 2.4, "grad_norm": 0.6667159795761108, "learning_rate": 5.736741667525375e-05, "loss": 1.7271, "step": 72125 }, { "epoch": 2.4, "grad_norm": 0.6676540970802307, "learning_rate": 5.736126974672887e-05, "loss": 1.7328, "step": 72126 }, { "epoch": 2.4, "grad_norm": 0.7349407076835632, "learning_rate": 5.735512311273168e-05, "loss": 1.7319, "step": 72127 }, { "epoch": 2.4, "grad_norm": 0.6744561195373535, "learning_rate": 5.734897677326953e-05, "loss": 1.7276, "step": 72128 }, { "epoch": 2.4, "grad_norm": 0.7010579705238342, "learning_rate": 5.734283072835003e-05, "loss": 1.6924, "step": 72129 }, { "epoch": 2.4, "grad_norm": 0.6819558143615723, "learning_rate": 5.733668497798062e-05, "loss": 1.7673, "step": 72130 }, { "epoch": 2.4, "grad_norm": 0.6579185128211975, "learning_rate": 5.7330539522168615e-05, "loss": 1.6487, "step": 72131 }, { "epoch": 2.4, "grad_norm": 0.6622996926307678, "learning_rate": 5.732439436092155e-05, "loss": 1.732, "step": 72132 }, { "epoch": 2.4, "grad_norm": 0.6666649580001831, "learning_rate": 5.731824949424703e-05, "loss": 1.6967, "step": 72133 }, { "epoch": 2.4, "grad_norm": 0.6707948446273804, "learning_rate": 5.731210492215239e-05, "loss": 1.7206, "step": 72134 }, { "epoch": 2.4, "grad_norm": 0.6615868806838989, "learning_rate": 5.730596064464501e-05, "loss": 1.6539, "step": 72135 }, { "epoch": 2.4, "grad_norm": 0.6789169907569885, "learning_rate": 5.7299816661732544e-05, "loss": 1.6636, "step": 72136 }, { "epoch": 2.4, "grad_norm": 0.681911289691925, "learning_rate": 5.7293672973422325e-05, "loss": 1.6736, "step": 72137 }, { "epoch": 2.4, "grad_norm": 0.7196455001831055, "learning_rate": 5.728752957972174e-05, "loss": 1.7652, "step": 72138 }, { "epoch": 2.4, "grad_norm": 0.6656818389892578, "learning_rate": 5.7281386480638314e-05, "loss": 1.7227, "step": 72139 }, { "epoch": 2.4, "grad_norm": 0.6737582683563232, "learning_rate": 5.727524367617968e-05, "loss": 1.6201, "step": 72140 }, { "epoch": 2.4, "grad_norm": 0.6793507933616638, "learning_rate": 5.7269101166353e-05, "loss": 1.727, "step": 72141 }, { "epoch": 2.4, "grad_norm": 0.6578940749168396, "learning_rate": 5.726295895116587e-05, "loss": 1.6935, "step": 72142 }, { "epoch": 2.4, "grad_norm": 0.6598841547966003, "learning_rate": 5.7256817030625815e-05, "loss": 1.6946, "step": 72143 }, { "epoch": 2.4, "grad_norm": 0.667422354221344, "learning_rate": 5.725067540474022e-05, "loss": 1.7193, "step": 72144 }, { "epoch": 2.4, "grad_norm": 0.6750279664993286, "learning_rate": 5.724453407351646e-05, "loss": 1.6951, "step": 72145 }, { "epoch": 2.4, "grad_norm": 0.6806350946426392, "learning_rate": 5.723839303696205e-05, "loss": 1.7235, "step": 72146 }, { "epoch": 2.4, "grad_norm": 0.6861759424209595, "learning_rate": 5.723225229508462e-05, "loss": 1.7837, "step": 72147 }, { "epoch": 2.4, "grad_norm": 0.6981549263000488, "learning_rate": 5.7226111847891345e-05, "loss": 1.6672, "step": 72148 }, { "epoch": 2.4, "grad_norm": 0.6885201334953308, "learning_rate": 5.721997169538977e-05, "loss": 1.7111, "step": 72149 }, { "epoch": 2.4, "grad_norm": 0.6837934255599976, "learning_rate": 5.7213831837587455e-05, "loss": 1.7162, "step": 72150 }, { "epoch": 2.4, "grad_norm": 0.7016164660453796, "learning_rate": 5.720769227449177e-05, "loss": 1.6667, "step": 72151 }, { "epoch": 2.4, "grad_norm": 0.7029026746749878, "learning_rate": 5.72015530061101e-05, "loss": 1.7295, "step": 72152 }, { "epoch": 2.4, "grad_norm": 0.6675114035606384, "learning_rate": 5.7195414032449984e-05, "loss": 1.6957, "step": 72153 }, { "epoch": 2.4, "grad_norm": 0.6820423007011414, "learning_rate": 5.7189275353519e-05, "loss": 1.7003, "step": 72154 }, { "epoch": 2.4, "grad_norm": 0.6731661558151245, "learning_rate": 5.718313696932433e-05, "loss": 1.7315, "step": 72155 }, { "epoch": 2.4, "grad_norm": 0.6917396783828735, "learning_rate": 5.717699887987351e-05, "loss": 1.6965, "step": 72156 }, { "epoch": 2.4, "grad_norm": 0.6801741123199463, "learning_rate": 5.7170861085174155e-05, "loss": 1.7398, "step": 72157 }, { "epoch": 2.4, "grad_norm": 0.6793992519378662, "learning_rate": 5.7164723585233584e-05, "loss": 1.6823, "step": 72158 }, { "epoch": 2.4, "grad_norm": 0.6933651566505432, "learning_rate": 5.715858638005916e-05, "loss": 1.6665, "step": 72159 }, { "epoch": 2.4, "grad_norm": 0.7139580249786377, "learning_rate": 5.715244946965853e-05, "loss": 1.7212, "step": 72160 }, { "epoch": 2.4, "grad_norm": 0.689605176448822, "learning_rate": 5.7146312854039046e-05, "loss": 1.697, "step": 72161 }, { "epoch": 2.4, "grad_norm": 0.694019079208374, "learning_rate": 5.7140176533208047e-05, "loss": 1.762, "step": 72162 }, { "epoch": 2.4, "grad_norm": 0.6774322986602783, "learning_rate": 5.713404050717311e-05, "loss": 1.7311, "step": 72163 }, { "epoch": 2.4, "grad_norm": 0.683562695980072, "learning_rate": 5.7127904775941746e-05, "loss": 1.7342, "step": 72164 }, { "epoch": 2.4, "grad_norm": 0.6687150597572327, "learning_rate": 5.7121769339521327e-05, "loss": 1.6919, "step": 72165 }, { "epoch": 2.4, "grad_norm": 0.7018714547157288, "learning_rate": 5.7115634197919215e-05, "loss": 1.6953, "step": 72166 }, { "epoch": 2.4, "grad_norm": 0.6785989999771118, "learning_rate": 5.7109499351142994e-05, "loss": 1.7014, "step": 72167 }, { "epoch": 2.4, "grad_norm": 0.689235508441925, "learning_rate": 5.710336479920007e-05, "loss": 1.7148, "step": 72168 }, { "epoch": 2.4, "grad_norm": 0.7059769034385681, "learning_rate": 5.709723054209776e-05, "loss": 1.713, "step": 72169 }, { "epoch": 2.4, "grad_norm": 0.675585925579071, "learning_rate": 5.709109657984373e-05, "loss": 1.6044, "step": 72170 }, { "epoch": 2.4, "grad_norm": 0.6957045793533325, "learning_rate": 5.7084962912445245e-05, "loss": 1.6777, "step": 72171 }, { "epoch": 2.4, "grad_norm": 0.6937342286109924, "learning_rate": 5.7078829539909886e-05, "loss": 1.692, "step": 72172 }, { "epoch": 2.4, "grad_norm": 0.6931328773498535, "learning_rate": 5.707269646224496e-05, "loss": 1.6719, "step": 72173 }, { "epoch": 2.4, "grad_norm": 0.6759609580039978, "learning_rate": 5.706656367945809e-05, "loss": 1.6692, "step": 72174 }, { "epoch": 2.4, "grad_norm": 0.689583420753479, "learning_rate": 5.70604311915566e-05, "loss": 1.7705, "step": 72175 }, { "epoch": 2.4, "grad_norm": 0.7021180987358093, "learning_rate": 5.705429899854785e-05, "loss": 1.7063, "step": 72176 }, { "epoch": 2.4, "grad_norm": 0.6964396238327026, "learning_rate": 5.7048167100439476e-05, "loss": 1.6824, "step": 72177 }, { "epoch": 2.4, "grad_norm": 0.6617603302001953, "learning_rate": 5.704203549723877e-05, "loss": 1.617, "step": 72178 }, { "epoch": 2.4, "grad_norm": 0.6691431999206543, "learning_rate": 5.703590418895328e-05, "loss": 1.7162, "step": 72179 }, { "epoch": 2.4, "grad_norm": 0.6974905133247375, "learning_rate": 5.702977317559045e-05, "loss": 1.7473, "step": 72180 }, { "epoch": 2.4, "grad_norm": 0.6678210496902466, "learning_rate": 5.702364245715757e-05, "loss": 1.6613, "step": 72181 }, { "epoch": 2.4, "grad_norm": 0.6973758339881897, "learning_rate": 5.701751203366226e-05, "loss": 1.6106, "step": 72182 }, { "epoch": 2.4, "grad_norm": 0.7041584849357605, "learning_rate": 5.701138190511182e-05, "loss": 1.6935, "step": 72183 }, { "epoch": 2.4, "grad_norm": 0.6879497170448303, "learning_rate": 5.700525207151384e-05, "loss": 1.6489, "step": 72184 }, { "epoch": 2.4, "grad_norm": 0.6676927804946899, "learning_rate": 5.6999122532875584e-05, "loss": 1.6833, "step": 72185 }, { "epoch": 2.4, "grad_norm": 0.6986671686172485, "learning_rate": 5.699299328920468e-05, "loss": 1.7531, "step": 72186 }, { "epoch": 2.4, "grad_norm": 0.6854930520057678, "learning_rate": 5.698686434050849e-05, "loss": 1.6842, "step": 72187 }, { "epoch": 2.4, "grad_norm": 0.6687850952148438, "learning_rate": 5.6980735686794335e-05, "loss": 1.6306, "step": 72188 }, { "epoch": 2.4, "grad_norm": 1.6942369937896729, "learning_rate": 5.697460732806984e-05, "loss": 1.7437, "step": 72189 }, { "epoch": 2.4, "grad_norm": 0.694602370262146, "learning_rate": 5.696847926434238e-05, "loss": 1.6313, "step": 72190 }, { "epoch": 2.4, "grad_norm": 0.6906195282936096, "learning_rate": 5.696235149561931e-05, "loss": 1.6694, "step": 72191 }, { "epoch": 2.4, "grad_norm": 0.6952614784240723, "learning_rate": 5.6956224021908115e-05, "loss": 1.7107, "step": 72192 }, { "epoch": 2.4, "grad_norm": 0.6898298263549805, "learning_rate": 5.695009684321633e-05, "loss": 1.676, "step": 72193 }, { "epoch": 2.4, "grad_norm": 0.6760322451591492, "learning_rate": 5.694396995955132e-05, "loss": 1.6607, "step": 72194 }, { "epoch": 2.4, "grad_norm": 0.6838963031768799, "learning_rate": 5.693784337092043e-05, "loss": 1.7256, "step": 72195 }, { "epoch": 2.4, "grad_norm": 0.7002789378166199, "learning_rate": 5.6931717077331305e-05, "loss": 1.6391, "step": 72196 }, { "epoch": 2.4, "grad_norm": 0.6947169899940491, "learning_rate": 5.69255910787912e-05, "loss": 1.7462, "step": 72197 }, { "epoch": 2.4, "grad_norm": 0.6798252463340759, "learning_rate": 5.6919465375307573e-05, "loss": 1.7102, "step": 72198 }, { "epoch": 2.4, "grad_norm": 0.6921111345291138, "learning_rate": 5.691333996688788e-05, "loss": 1.7288, "step": 72199 }, { "epoch": 2.4, "grad_norm": 0.6713523864746094, "learning_rate": 5.690721485353976e-05, "loss": 1.6022, "step": 72200 }, { "epoch": 2.4, "grad_norm": 0.6759842038154602, "learning_rate": 5.690109003527027e-05, "loss": 1.6207, "step": 72201 }, { "epoch": 2.4, "grad_norm": 0.680178165435791, "learning_rate": 5.6894965512087064e-05, "loss": 1.7601, "step": 72202 }, { "epoch": 2.4, "grad_norm": 0.6935234069824219, "learning_rate": 5.688884128399761e-05, "loss": 1.6921, "step": 72203 }, { "epoch": 2.4, "grad_norm": 0.6699429750442505, "learning_rate": 5.68827173510093e-05, "loss": 1.6844, "step": 72204 }, { "epoch": 2.4, "grad_norm": 0.7208369374275208, "learning_rate": 5.687659371312946e-05, "loss": 1.65, "step": 72205 }, { "epoch": 2.4, "grad_norm": 0.7010330557823181, "learning_rate": 5.687047037036562e-05, "loss": 1.6801, "step": 72206 }, { "epoch": 2.4, "grad_norm": 0.7028514742851257, "learning_rate": 5.6864347322725366e-05, "loss": 1.6582, "step": 72207 }, { "epoch": 2.4, "grad_norm": 0.6937280893325806, "learning_rate": 5.685822457021581e-05, "loss": 1.7006, "step": 72208 }, { "epoch": 2.4, "grad_norm": 0.6814796924591064, "learning_rate": 5.6852102112844524e-05, "loss": 1.6454, "step": 72209 }, { "epoch": 2.4, "grad_norm": 0.6569253206253052, "learning_rate": 5.6845979950619046e-05, "loss": 1.6429, "step": 72210 }, { "epoch": 2.4, "grad_norm": 0.6676123738288879, "learning_rate": 5.683985808354674e-05, "loss": 1.6455, "step": 72211 }, { "epoch": 2.4, "grad_norm": 0.6595489978790283, "learning_rate": 5.6833736511634934e-05, "loss": 1.6496, "step": 72212 }, { "epoch": 2.4, "grad_norm": 1.086575984954834, "learning_rate": 5.6827615234891146e-05, "loss": 1.6927, "step": 72213 }, { "epoch": 2.4, "grad_norm": 0.7219287157058716, "learning_rate": 5.682149425332294e-05, "loss": 1.7065, "step": 72214 }, { "epoch": 2.4, "grad_norm": 0.6834903955459595, "learning_rate": 5.6815373566937486e-05, "loss": 1.6976, "step": 72215 }, { "epoch": 2.4, "grad_norm": 0.6994112133979797, "learning_rate": 5.68092531757423e-05, "loss": 1.6635, "step": 72216 }, { "epoch": 2.4, "grad_norm": 0.6836239099502563, "learning_rate": 5.680313307974494e-05, "loss": 1.6787, "step": 72217 }, { "epoch": 2.4, "grad_norm": 0.6920114755630493, "learning_rate": 5.679701327895275e-05, "loss": 1.6913, "step": 72218 }, { "epoch": 2.4, "grad_norm": 0.7024328708648682, "learning_rate": 5.6790893773373044e-05, "loss": 1.7254, "step": 72219 }, { "epoch": 2.4, "grad_norm": 0.7189422249794006, "learning_rate": 5.678477456301339e-05, "loss": 1.6925, "step": 72220 }, { "epoch": 2.4, "grad_norm": 0.6678344011306763, "learning_rate": 5.677865564788134e-05, "loss": 1.7434, "step": 72221 }, { "epoch": 2.4, "grad_norm": 0.6837308406829834, "learning_rate": 5.6772537027984e-05, "loss": 1.6368, "step": 72222 }, { "epoch": 2.4, "grad_norm": 0.6599847078323364, "learning_rate": 5.676641870332895e-05, "loss": 1.6645, "step": 72223 }, { "epoch": 2.4, "grad_norm": 0.6607064604759216, "learning_rate": 5.676030067392373e-05, "loss": 1.6603, "step": 72224 }, { "epoch": 2.4, "grad_norm": 0.6924649477005005, "learning_rate": 5.675418293977566e-05, "loss": 1.695, "step": 72225 }, { "epoch": 2.4, "grad_norm": 0.6760598421096802, "learning_rate": 5.6748065500892104e-05, "loss": 1.6475, "step": 72226 }, { "epoch": 2.4, "grad_norm": 0.6690753698348999, "learning_rate": 5.674194835728059e-05, "loss": 1.6507, "step": 72227 }, { "epoch": 2.4, "grad_norm": 0.6724377870559692, "learning_rate": 5.673583150894847e-05, "loss": 1.7017, "step": 72228 }, { "epoch": 2.4, "grad_norm": 0.6692935228347778, "learning_rate": 5.672971495590328e-05, "loss": 1.6881, "step": 72229 }, { "epoch": 2.4, "grad_norm": 0.6774892210960388, "learning_rate": 5.6723598698152285e-05, "loss": 1.7241, "step": 72230 }, { "epoch": 2.4, "grad_norm": 0.6993557214736938, "learning_rate": 5.671748273570307e-05, "loss": 1.744, "step": 72231 }, { "epoch": 2.4, "grad_norm": 0.6820552349090576, "learning_rate": 5.6711367068563e-05, "loss": 1.6651, "step": 72232 }, { "epoch": 2.4, "grad_norm": 0.6785229444503784, "learning_rate": 5.67052516967394e-05, "loss": 1.738, "step": 72233 }, { "epoch": 2.4, "grad_norm": 0.6777561902999878, "learning_rate": 5.669913662023983e-05, "loss": 1.6509, "step": 72234 }, { "epoch": 2.4, "grad_norm": 0.7003782391548157, "learning_rate": 5.6693021839071615e-05, "loss": 1.7055, "step": 72235 }, { "epoch": 2.4, "grad_norm": 0.6768301129341125, "learning_rate": 5.66869073532423e-05, "loss": 1.6719, "step": 72236 }, { "epoch": 2.4, "grad_norm": 0.6822236776351929, "learning_rate": 5.6680793162759185e-05, "loss": 1.7162, "step": 72237 }, { "epoch": 2.4, "grad_norm": 0.6793643832206726, "learning_rate": 5.667467926762972e-05, "loss": 1.7336, "step": 72238 }, { "epoch": 2.4, "grad_norm": 0.6962427496910095, "learning_rate": 5.666856566786139e-05, "loss": 1.7125, "step": 72239 }, { "epoch": 2.4, "grad_norm": 0.6749780178070068, "learning_rate": 5.666245236346149e-05, "loss": 1.6724, "step": 72240 }, { "epoch": 2.4, "grad_norm": 0.6771779656410217, "learning_rate": 5.665633935443758e-05, "loss": 1.6146, "step": 72241 }, { "epoch": 2.4, "grad_norm": 0.6992555856704712, "learning_rate": 5.6650226640796945e-05, "loss": 1.7284, "step": 72242 }, { "epoch": 2.4, "grad_norm": 0.6785376071929932, "learning_rate": 5.664411422254717e-05, "loss": 1.7418, "step": 72243 }, { "epoch": 2.4, "grad_norm": 0.6785157322883606, "learning_rate": 5.663800209969559e-05, "loss": 1.7447, "step": 72244 }, { "epoch": 2.4, "grad_norm": 0.7070877552032471, "learning_rate": 5.663189027224955e-05, "loss": 1.6868, "step": 72245 }, { "epoch": 2.4, "grad_norm": 0.6786918044090271, "learning_rate": 5.6625778740216566e-05, "loss": 1.6599, "step": 72246 }, { "epoch": 2.4, "grad_norm": 0.6834380626678467, "learning_rate": 5.6619667503604075e-05, "loss": 1.714, "step": 72247 }, { "epoch": 2.4, "grad_norm": 0.6853352785110474, "learning_rate": 5.661355656241936e-05, "loss": 1.747, "step": 72248 }, { "epoch": 2.4, "grad_norm": 0.6617675423622131, "learning_rate": 5.6607445916669924e-05, "loss": 1.7002, "step": 72249 }, { "epoch": 2.4, "grad_norm": 0.68502277135849, "learning_rate": 5.660133556636326e-05, "loss": 1.6589, "step": 72250 }, { "epoch": 2.4, "grad_norm": 0.6928079724311829, "learning_rate": 5.6595225511506695e-05, "loss": 1.6979, "step": 72251 }, { "epoch": 2.4, "grad_norm": 0.6703130006790161, "learning_rate": 5.658911575210758e-05, "loss": 1.7426, "step": 72252 }, { "epoch": 2.4, "grad_norm": 0.6646081805229187, "learning_rate": 5.6583006288173524e-05, "loss": 1.6729, "step": 72253 }, { "epoch": 2.4, "grad_norm": 0.7029687762260437, "learning_rate": 5.657689711971182e-05, "loss": 1.6859, "step": 72254 }, { "epoch": 2.4, "grad_norm": 0.6739529967308044, "learning_rate": 5.657078824672981e-05, "loss": 1.667, "step": 72255 }, { "epoch": 2.4, "grad_norm": 0.7098817825317383, "learning_rate": 5.656467966923501e-05, "loss": 1.7054, "step": 72256 }, { "epoch": 2.4, "grad_norm": 0.6847881078720093, "learning_rate": 5.6558571387234974e-05, "loss": 1.6455, "step": 72257 }, { "epoch": 2.4, "grad_norm": 0.6907098293304443, "learning_rate": 5.655246340073679e-05, "loss": 1.6861, "step": 72258 }, { "epoch": 2.4, "grad_norm": 0.6882929801940918, "learning_rate": 5.654635570974806e-05, "loss": 1.6864, "step": 72259 }, { "epoch": 2.4, "grad_norm": 0.7010610699653625, "learning_rate": 5.654024831427627e-05, "loss": 1.7459, "step": 72260 }, { "epoch": 2.4, "grad_norm": 0.703016996383667, "learning_rate": 5.653414121432872e-05, "loss": 1.7781, "step": 72261 }, { "epoch": 2.4, "grad_norm": 0.6947130560874939, "learning_rate": 5.6528034409912804e-05, "loss": 1.7478, "step": 72262 }, { "epoch": 2.4, "grad_norm": 0.6994715332984924, "learning_rate": 5.6521927901035945e-05, "loss": 1.6986, "step": 72263 }, { "epoch": 2.4, "grad_norm": 0.662788987159729, "learning_rate": 5.6515821687705775e-05, "loss": 1.6524, "step": 72264 }, { "epoch": 2.4, "grad_norm": 0.7046743035316467, "learning_rate": 5.6509715769929354e-05, "loss": 1.6711, "step": 72265 }, { "epoch": 2.4, "grad_norm": 0.7126421928405762, "learning_rate": 5.650361014771427e-05, "loss": 1.6667, "step": 72266 }, { "epoch": 2.4, "grad_norm": 0.7128639817237854, "learning_rate": 5.6497504821067996e-05, "loss": 1.6834, "step": 72267 }, { "epoch": 2.4, "grad_norm": 0.681535542011261, "learning_rate": 5.6491399789997885e-05, "loss": 1.6432, "step": 72268 }, { "epoch": 2.4, "grad_norm": 0.7215986251831055, "learning_rate": 5.6485295054511225e-05, "loss": 1.7193, "step": 72269 }, { "epoch": 2.4, "grad_norm": 0.7003939747810364, "learning_rate": 5.6479190614615555e-05, "loss": 1.6693, "step": 72270 }, { "epoch": 2.4, "grad_norm": 0.6986700892448425, "learning_rate": 5.647308647031843e-05, "loss": 1.6838, "step": 72271 }, { "epoch": 2.4, "grad_norm": 0.7002851366996765, "learning_rate": 5.646698262162695e-05, "loss": 1.6371, "step": 72272 }, { "epoch": 2.4, "grad_norm": 0.6953331828117371, "learning_rate": 5.646087906854866e-05, "loss": 1.7196, "step": 72273 }, { "epoch": 2.4, "grad_norm": 0.6966335773468018, "learning_rate": 5.6454775811091034e-05, "loss": 1.7589, "step": 72274 }, { "epoch": 2.4, "grad_norm": 0.6813916563987732, "learning_rate": 5.6448672849261434e-05, "loss": 1.7064, "step": 72275 }, { "epoch": 2.4, "grad_norm": 0.7125153541564941, "learning_rate": 5.644257018306722e-05, "loss": 1.7205, "step": 72276 }, { "epoch": 2.4, "grad_norm": 0.6694817543029785, "learning_rate": 5.643646781251587e-05, "loss": 1.6825, "step": 72277 }, { "epoch": 2.4, "grad_norm": 0.6929425001144409, "learning_rate": 5.643036573761477e-05, "loss": 1.6743, "step": 72278 }, { "epoch": 2.4, "grad_norm": 0.6899442076683044, "learning_rate": 5.642426395837126e-05, "loss": 1.6877, "step": 72279 }, { "epoch": 2.4, "grad_norm": 0.67093425989151, "learning_rate": 5.641816247479278e-05, "loss": 1.697, "step": 72280 }, { "epoch": 2.4, "grad_norm": 0.6645461916923523, "learning_rate": 5.6412061286886856e-05, "loss": 1.7715, "step": 72281 }, { "epoch": 2.4, "grad_norm": 0.676242470741272, "learning_rate": 5.640596039466081e-05, "loss": 1.6864, "step": 72282 }, { "epoch": 2.4, "grad_norm": 0.6875492930412292, "learning_rate": 5.639985979812194e-05, "loss": 1.6497, "step": 72283 }, { "epoch": 2.4, "grad_norm": 0.6950979232788086, "learning_rate": 5.6393759497277834e-05, "loss": 1.6893, "step": 72284 }, { "epoch": 2.4, "grad_norm": 0.7246404886245728, "learning_rate": 5.6387659492135795e-05, "loss": 1.7235, "step": 72285 }, { "epoch": 2.4, "grad_norm": 0.6717677712440491, "learning_rate": 5.638155978270318e-05, "loss": 1.7011, "step": 72286 }, { "epoch": 2.4, "grad_norm": 0.6828661561012268, "learning_rate": 5.6375460368987514e-05, "loss": 1.7613, "step": 72287 }, { "epoch": 2.41, "grad_norm": 0.6801484823226929, "learning_rate": 5.636936125099609e-05, "loss": 1.6893, "step": 72288 }, { "epoch": 2.41, "grad_norm": 0.7169438600540161, "learning_rate": 5.636326242873641e-05, "loss": 1.7115, "step": 72289 }, { "epoch": 2.41, "grad_norm": 0.6597757339477539, "learning_rate": 5.63571639022158e-05, "loss": 1.6891, "step": 72290 }, { "epoch": 2.41, "grad_norm": 0.6617574095726013, "learning_rate": 5.6351065671441744e-05, "loss": 1.6287, "step": 72291 }, { "epoch": 2.41, "grad_norm": 0.6722833514213562, "learning_rate": 5.634496773642158e-05, "loss": 1.7002, "step": 72292 }, { "epoch": 2.41, "grad_norm": 0.6893439888954163, "learning_rate": 5.6338870097162656e-05, "loss": 1.6267, "step": 72293 }, { "epoch": 2.41, "grad_norm": 0.6819272041320801, "learning_rate": 5.633277275367255e-05, "loss": 1.6304, "step": 72294 }, { "epoch": 2.41, "grad_norm": 0.6796876788139343, "learning_rate": 5.632667570595845e-05, "loss": 1.7174, "step": 72295 }, { "epoch": 2.41, "grad_norm": 2.1176202297210693, "learning_rate": 5.632057895402794e-05, "loss": 1.6861, "step": 72296 }, { "epoch": 2.41, "grad_norm": 0.6719583868980408, "learning_rate": 5.6314482497888326e-05, "loss": 1.6828, "step": 72297 }, { "epoch": 2.41, "grad_norm": 0.688749372959137, "learning_rate": 5.630838633754699e-05, "loss": 1.6418, "step": 72298 }, { "epoch": 2.41, "grad_norm": 0.6885437965393066, "learning_rate": 5.630229047301141e-05, "loss": 1.728, "step": 72299 }, { "epoch": 2.41, "grad_norm": 0.6957297921180725, "learning_rate": 5.629619490428887e-05, "loss": 1.7276, "step": 72300 }, { "epoch": 2.41, "grad_norm": 0.6862427592277527, "learning_rate": 5.629009963138693e-05, "loss": 1.7543, "step": 72301 }, { "epoch": 2.41, "grad_norm": 0.6884668469429016, "learning_rate": 5.628400465431282e-05, "loss": 1.7569, "step": 72302 }, { "epoch": 2.41, "grad_norm": 0.6952564120292664, "learning_rate": 5.6277909973074096e-05, "loss": 1.6688, "step": 72303 }, { "epoch": 2.41, "grad_norm": 0.696361243724823, "learning_rate": 5.627181558767806e-05, "loss": 1.7268, "step": 72304 }, { "epoch": 2.41, "grad_norm": 0.6853885054588318, "learning_rate": 5.626572149813206e-05, "loss": 1.7086, "step": 72305 }, { "epoch": 2.41, "grad_norm": 0.6809525489807129, "learning_rate": 5.625962770444368e-05, "loss": 1.7474, "step": 72306 }, { "epoch": 2.41, "grad_norm": 0.6662845015525818, "learning_rate": 5.6253534206620154e-05, "loss": 1.6747, "step": 72307 }, { "epoch": 2.41, "grad_norm": 0.6838653087615967, "learning_rate": 5.624744100466886e-05, "loss": 1.7093, "step": 72308 }, { "epoch": 2.41, "grad_norm": 0.6654123663902283, "learning_rate": 5.624134809859725e-05, "loss": 1.7504, "step": 72309 }, { "epoch": 2.41, "grad_norm": 0.6767346262931824, "learning_rate": 5.623525548841282e-05, "loss": 1.6297, "step": 72310 }, { "epoch": 2.41, "grad_norm": 0.6806697845458984, "learning_rate": 5.622916317412284e-05, "loss": 1.6933, "step": 72311 }, { "epoch": 2.41, "grad_norm": 0.6854139566421509, "learning_rate": 5.6223071155734686e-05, "loss": 1.831, "step": 72312 }, { "epoch": 2.41, "grad_norm": 0.7017234563827515, "learning_rate": 5.621697943325587e-05, "loss": 1.6279, "step": 72313 }, { "epoch": 2.41, "grad_norm": 0.6955834031105042, "learning_rate": 5.6210888006693744e-05, "loss": 1.728, "step": 72314 }, { "epoch": 2.41, "grad_norm": 0.7276513576507568, "learning_rate": 5.6204796876055566e-05, "loss": 1.6785, "step": 72315 }, { "epoch": 2.41, "grad_norm": 0.7173805236816406, "learning_rate": 5.619870604134887e-05, "loss": 1.7744, "step": 72316 }, { "epoch": 2.41, "grad_norm": 0.6879884004592896, "learning_rate": 5.619261550258118e-05, "loss": 1.648, "step": 72317 }, { "epoch": 2.41, "grad_norm": 0.6768641471862793, "learning_rate": 5.618652525975956e-05, "loss": 1.6108, "step": 72318 }, { "epoch": 2.41, "grad_norm": 0.670266330242157, "learning_rate": 5.61804353128916e-05, "loss": 1.6254, "step": 72319 }, { "epoch": 2.41, "grad_norm": 0.7022786736488342, "learning_rate": 5.6174345661984734e-05, "loss": 1.7092, "step": 72320 }, { "epoch": 2.41, "grad_norm": 0.6682647466659546, "learning_rate": 5.616825630704628e-05, "loss": 1.6839, "step": 72321 }, { "epoch": 2.41, "grad_norm": 0.6792323589324951, "learning_rate": 5.6162167248083535e-05, "loss": 1.6258, "step": 72322 }, { "epoch": 2.41, "grad_norm": 0.6944113373756409, "learning_rate": 5.6156078485104025e-05, "loss": 1.7044, "step": 72323 }, { "epoch": 2.41, "grad_norm": 0.6867967844009399, "learning_rate": 5.614999001811525e-05, "loss": 1.6811, "step": 72324 }, { "epoch": 2.41, "grad_norm": 0.7050989866256714, "learning_rate": 5.6143901847124304e-05, "loss": 1.6962, "step": 72325 }, { "epoch": 2.41, "grad_norm": 0.6852340698242188, "learning_rate": 5.613781397213871e-05, "loss": 1.7052, "step": 72326 }, { "epoch": 2.41, "grad_norm": 0.6761563420295715, "learning_rate": 5.6131726393166e-05, "loss": 1.6614, "step": 72327 }, { "epoch": 2.41, "grad_norm": 0.6958079934120178, "learning_rate": 5.6125639110213396e-05, "loss": 1.7368, "step": 72328 }, { "epoch": 2.41, "grad_norm": 0.6838602423667908, "learning_rate": 5.6119552123288294e-05, "loss": 1.6438, "step": 72329 }, { "epoch": 2.41, "grad_norm": 0.6703316569328308, "learning_rate": 5.611346543239808e-05, "loss": 1.6904, "step": 72330 }, { "epoch": 2.41, "grad_norm": 0.6815124750137329, "learning_rate": 5.610737903755037e-05, "loss": 1.6631, "step": 72331 }, { "epoch": 2.41, "grad_norm": 0.6836112141609192, "learning_rate": 5.6101292938752205e-05, "loss": 1.7139, "step": 72332 }, { "epoch": 2.41, "grad_norm": 0.6707732081413269, "learning_rate": 5.609520713601112e-05, "loss": 1.6039, "step": 72333 }, { "epoch": 2.41, "grad_norm": 0.6798044443130493, "learning_rate": 5.608912162933461e-05, "loss": 1.7237, "step": 72334 }, { "epoch": 2.41, "grad_norm": 0.6980668902397156, "learning_rate": 5.608303641872997e-05, "loss": 1.6868, "step": 72335 }, { "epoch": 2.41, "grad_norm": 0.7080084681510925, "learning_rate": 5.607695150420451e-05, "loss": 1.6266, "step": 72336 }, { "epoch": 2.41, "grad_norm": 0.6842200756072998, "learning_rate": 5.607086688576569e-05, "loss": 1.7173, "step": 72337 }, { "epoch": 2.41, "grad_norm": 0.7047238945960999, "learning_rate": 5.606478256342104e-05, "loss": 1.7558, "step": 72338 }, { "epoch": 2.41, "grad_norm": 0.6834957599639893, "learning_rate": 5.6058698537177636e-05, "loss": 1.6862, "step": 72339 }, { "epoch": 2.41, "grad_norm": 0.6870865225791931, "learning_rate": 5.605261480704304e-05, "loss": 1.6566, "step": 72340 }, { "epoch": 2.41, "grad_norm": 0.6660496592521667, "learning_rate": 5.604653137302471e-05, "loss": 1.6949, "step": 72341 }, { "epoch": 2.41, "grad_norm": 0.6990705728530884, "learning_rate": 5.6040448235129933e-05, "loss": 1.6315, "step": 72342 }, { "epoch": 2.41, "grad_norm": 0.6918721795082092, "learning_rate": 5.603436539336604e-05, "loss": 1.5995, "step": 72343 }, { "epoch": 2.41, "grad_norm": 0.684521496295929, "learning_rate": 5.602828284774057e-05, "loss": 1.6671, "step": 72344 }, { "epoch": 2.41, "grad_norm": 0.6831962466239929, "learning_rate": 5.60222005982607e-05, "loss": 1.6667, "step": 72345 }, { "epoch": 2.41, "grad_norm": 0.6893124580383301, "learning_rate": 5.601611864493404e-05, "loss": 1.6779, "step": 72346 }, { "epoch": 2.41, "grad_norm": 0.6886104941368103, "learning_rate": 5.601003698776777e-05, "loss": 1.672, "step": 72347 }, { "epoch": 2.41, "grad_norm": 0.6858876943588257, "learning_rate": 5.600395562676946e-05, "loss": 1.6961, "step": 72348 }, { "epoch": 2.41, "grad_norm": 0.6829090118408203, "learning_rate": 5.599787456194637e-05, "loss": 1.711, "step": 72349 }, { "epoch": 2.41, "grad_norm": 0.6847036480903625, "learning_rate": 5.599179379330585e-05, "loss": 1.704, "step": 72350 }, { "epoch": 2.41, "grad_norm": 0.7093002200126648, "learning_rate": 5.598571332085541e-05, "loss": 1.679, "step": 72351 }, { "epoch": 2.41, "grad_norm": 0.7095966339111328, "learning_rate": 5.59796331446023e-05, "loss": 1.6494, "step": 72352 }, { "epoch": 2.41, "grad_norm": 0.6783314943313599, "learning_rate": 5.597355326455402e-05, "loss": 1.6739, "step": 72353 }, { "epoch": 2.41, "grad_norm": 0.687253475189209, "learning_rate": 5.596747368071789e-05, "loss": 1.7217, "step": 72354 }, { "epoch": 2.41, "grad_norm": 0.6863936185836792, "learning_rate": 5.5961394393101214e-05, "loss": 1.671, "step": 72355 }, { "epoch": 2.41, "grad_norm": 0.6649988889694214, "learning_rate": 5.5955315401711545e-05, "loss": 1.5993, "step": 72356 }, { "epoch": 2.41, "grad_norm": 0.7112528085708618, "learning_rate": 5.594923670655608e-05, "loss": 1.6721, "step": 72357 }, { "epoch": 2.41, "grad_norm": 0.6615248918533325, "learning_rate": 5.594315830764234e-05, "loss": 1.6724, "step": 72358 }, { "epoch": 2.41, "grad_norm": 0.7045401930809021, "learning_rate": 5.593708020497757e-05, "loss": 1.7004, "step": 72359 }, { "epoch": 2.41, "grad_norm": 0.680392861366272, "learning_rate": 5.5931002398569316e-05, "loss": 1.6831, "step": 72360 }, { "epoch": 2.41, "grad_norm": 0.6988660097122192, "learning_rate": 5.592492488842487e-05, "loss": 1.7394, "step": 72361 }, { "epoch": 2.41, "grad_norm": 0.6940550208091736, "learning_rate": 5.5918847674551505e-05, "loss": 1.6921, "step": 72362 }, { "epoch": 2.41, "grad_norm": 0.6949428915977478, "learning_rate": 5.591277075695677e-05, "loss": 1.7045, "step": 72363 }, { "epoch": 2.41, "grad_norm": 0.6782853603363037, "learning_rate": 5.5906694135648e-05, "loss": 1.7039, "step": 72364 }, { "epoch": 2.41, "grad_norm": 0.6637125611305237, "learning_rate": 5.590061781063242e-05, "loss": 1.6575, "step": 72365 }, { "epoch": 2.41, "grad_norm": 0.6868473291397095, "learning_rate": 5.589454178191756e-05, "loss": 1.7069, "step": 72366 }, { "epoch": 2.41, "grad_norm": 0.6671727895736694, "learning_rate": 5.58884660495108e-05, "loss": 1.6924, "step": 72367 }, { "epoch": 2.41, "grad_norm": 0.6740843057632446, "learning_rate": 5.588239061341949e-05, "loss": 1.6587, "step": 72368 }, { "epoch": 2.41, "grad_norm": 0.6737424731254578, "learning_rate": 5.587631547365091e-05, "loss": 1.7263, "step": 72369 }, { "epoch": 2.41, "grad_norm": 0.6537034511566162, "learning_rate": 5.587024063021263e-05, "loss": 1.6216, "step": 72370 }, { "epoch": 2.41, "grad_norm": 0.6497994065284729, "learning_rate": 5.586416608311186e-05, "loss": 1.6174, "step": 72371 }, { "epoch": 2.41, "grad_norm": 0.6924343705177307, "learning_rate": 5.585809183235594e-05, "loss": 1.6497, "step": 72372 }, { "epoch": 2.41, "grad_norm": 0.6830832362174988, "learning_rate": 5.585201787795234e-05, "loss": 1.6956, "step": 72373 }, { "epoch": 2.41, "grad_norm": 0.6767522096633911, "learning_rate": 5.5845944219908564e-05, "loss": 1.6784, "step": 72374 }, { "epoch": 2.41, "grad_norm": 0.6886470913887024, "learning_rate": 5.58398708582317e-05, "loss": 1.6507, "step": 72375 }, { "epoch": 2.41, "grad_norm": 0.6917155385017395, "learning_rate": 5.5833797792929245e-05, "loss": 1.7688, "step": 72376 }, { "epoch": 2.41, "grad_norm": 0.6859671473503113, "learning_rate": 5.582772502400865e-05, "loss": 1.7121, "step": 72377 }, { "epoch": 2.41, "grad_norm": 0.8396101593971252, "learning_rate": 5.5821652551477236e-05, "loss": 1.665, "step": 72378 }, { "epoch": 2.41, "grad_norm": 0.703398585319519, "learning_rate": 5.5815580375342295e-05, "loss": 1.6968, "step": 72379 }, { "epoch": 2.41, "grad_norm": 0.6910848617553711, "learning_rate": 5.580950849561122e-05, "loss": 1.7129, "step": 72380 }, { "epoch": 2.41, "grad_norm": 0.6821973919868469, "learning_rate": 5.580343691229161e-05, "loss": 1.7256, "step": 72381 }, { "epoch": 2.41, "grad_norm": 0.7062733769416809, "learning_rate": 5.5797365625390486e-05, "loss": 1.7348, "step": 72382 }, { "epoch": 2.41, "grad_norm": 0.6938092112541199, "learning_rate": 5.579129463491535e-05, "loss": 1.6705, "step": 72383 }, { "epoch": 2.41, "grad_norm": 0.7054485082626343, "learning_rate": 5.578522394087373e-05, "loss": 1.6678, "step": 72384 }, { "epoch": 2.41, "grad_norm": 0.6732437014579773, "learning_rate": 5.5779153543272805e-05, "loss": 1.647, "step": 72385 }, { "epoch": 2.41, "grad_norm": 0.6852360367774963, "learning_rate": 5.5773083442119983e-05, "loss": 1.6567, "step": 72386 }, { "epoch": 2.41, "grad_norm": 0.6742695569992065, "learning_rate": 5.576701363742262e-05, "loss": 1.6918, "step": 72387 }, { "epoch": 2.41, "grad_norm": 0.6965688467025757, "learning_rate": 5.5760944129188276e-05, "loss": 1.6506, "step": 72388 }, { "epoch": 2.41, "grad_norm": 0.6709046363830566, "learning_rate": 5.5754874917423987e-05, "loss": 1.642, "step": 72389 }, { "epoch": 2.41, "grad_norm": 0.6940662264823914, "learning_rate": 5.57488060021373e-05, "loss": 1.7242, "step": 72390 }, { "epoch": 2.41, "grad_norm": 0.7015353441238403, "learning_rate": 5.574273738333569e-05, "loss": 1.6674, "step": 72391 }, { "epoch": 2.41, "grad_norm": 0.6856520771980286, "learning_rate": 5.573666906102637e-05, "loss": 1.747, "step": 72392 }, { "epoch": 2.41, "grad_norm": 0.6855438351631165, "learning_rate": 5.573060103521668e-05, "loss": 1.7259, "step": 72393 }, { "epoch": 2.41, "grad_norm": 0.7019393444061279, "learning_rate": 5.572453330591411e-05, "loss": 1.6801, "step": 72394 }, { "epoch": 2.41, "grad_norm": 0.6847943067550659, "learning_rate": 5.571846587312598e-05, "loss": 1.7613, "step": 72395 }, { "epoch": 2.41, "grad_norm": 0.6982058882713318, "learning_rate": 5.571239873685956e-05, "loss": 1.7827, "step": 72396 }, { "epoch": 2.41, "grad_norm": 0.668878972530365, "learning_rate": 5.570633189712228e-05, "loss": 1.6872, "step": 72397 }, { "epoch": 2.41, "grad_norm": 0.7009629607200623, "learning_rate": 5.5700265353921594e-05, "loss": 1.7398, "step": 72398 }, { "epoch": 2.41, "grad_norm": 0.700473964214325, "learning_rate": 5.569419910726483e-05, "loss": 1.6971, "step": 72399 }, { "epoch": 2.41, "grad_norm": 0.6747539639472961, "learning_rate": 5.5688133157159174e-05, "loss": 1.593, "step": 72400 }, { "epoch": 2.41, "grad_norm": 0.677227795124054, "learning_rate": 5.568206750361224e-05, "loss": 1.6683, "step": 72401 }, { "epoch": 2.41, "grad_norm": 0.6746390461921692, "learning_rate": 5.567600214663127e-05, "loss": 1.6658, "step": 72402 }, { "epoch": 2.41, "grad_norm": 0.6859810948371887, "learning_rate": 5.566993708622357e-05, "loss": 1.6761, "step": 72403 }, { "epoch": 2.41, "grad_norm": 0.6859167814254761, "learning_rate": 5.566387232239663e-05, "loss": 1.6689, "step": 72404 }, { "epoch": 2.41, "grad_norm": 0.6767265200614929, "learning_rate": 5.5657807855157674e-05, "loss": 1.7026, "step": 72405 }, { "epoch": 2.41, "grad_norm": 0.6982355117797852, "learning_rate": 5.565174368451424e-05, "loss": 1.6669, "step": 72406 }, { "epoch": 2.41, "grad_norm": 0.6694336533546448, "learning_rate": 5.564567981047348e-05, "loss": 1.705, "step": 72407 }, { "epoch": 2.41, "grad_norm": 0.6975564360618591, "learning_rate": 5.563961623304291e-05, "loss": 1.7402, "step": 72408 }, { "epoch": 2.41, "grad_norm": 0.6822370886802673, "learning_rate": 5.563355295222989e-05, "loss": 1.6804, "step": 72409 }, { "epoch": 2.41, "grad_norm": 0.6808338761329651, "learning_rate": 5.562748996804165e-05, "loss": 1.7138, "step": 72410 }, { "epoch": 2.41, "grad_norm": 0.6674553155899048, "learning_rate": 5.5621427280485666e-05, "loss": 1.7739, "step": 72411 }, { "epoch": 2.41, "grad_norm": 0.6976093649864197, "learning_rate": 5.5615364889569225e-05, "loss": 1.7911, "step": 72412 }, { "epoch": 2.41, "grad_norm": 0.6991155743598938, "learning_rate": 5.5609302795299795e-05, "loss": 1.7564, "step": 72413 }, { "epoch": 2.41, "grad_norm": 0.6929640173912048, "learning_rate": 5.560324099768466e-05, "loss": 1.6713, "step": 72414 }, { "epoch": 2.41, "grad_norm": 0.6597329378128052, "learning_rate": 5.5597179496731094e-05, "loss": 1.7339, "step": 72415 }, { "epoch": 2.41, "grad_norm": 0.671683669090271, "learning_rate": 5.5591118292446615e-05, "loss": 1.7143, "step": 72416 }, { "epoch": 2.41, "grad_norm": 0.6982114911079407, "learning_rate": 5.5585057384838415e-05, "loss": 1.7369, "step": 72417 }, { "epoch": 2.41, "grad_norm": 0.6946219801902771, "learning_rate": 5.557899677391406e-05, "loss": 1.6988, "step": 72418 }, { "epoch": 2.41, "grad_norm": 0.6908867359161377, "learning_rate": 5.5572936459680684e-05, "loss": 1.7142, "step": 72419 }, { "epoch": 2.41, "grad_norm": 0.6806597113609314, "learning_rate": 5.556687644214584e-05, "loss": 1.6843, "step": 72420 }, { "epoch": 2.41, "grad_norm": 0.7009418606758118, "learning_rate": 5.556081672131678e-05, "loss": 1.6688, "step": 72421 }, { "epoch": 2.41, "grad_norm": 0.6936008334159851, "learning_rate": 5.5554757297200816e-05, "loss": 1.6803, "step": 72422 }, { "epoch": 2.41, "grad_norm": 0.6777437925338745, "learning_rate": 5.554869816980543e-05, "loss": 1.7236, "step": 72423 }, { "epoch": 2.41, "grad_norm": 0.6968769431114197, "learning_rate": 5.554263933913788e-05, "loss": 1.6893, "step": 72424 }, { "epoch": 2.41, "grad_norm": 0.7093679904937744, "learning_rate": 5.5536580805205476e-05, "loss": 1.7333, "step": 72425 }, { "epoch": 2.41, "grad_norm": 0.661514163017273, "learning_rate": 5.5530522568015666e-05, "loss": 1.6286, "step": 72426 }, { "epoch": 2.41, "grad_norm": 0.6783117651939392, "learning_rate": 5.552446462757585e-05, "loss": 1.722, "step": 72427 }, { "epoch": 2.41, "grad_norm": 0.695482075214386, "learning_rate": 5.5518406983893316e-05, "loss": 1.7523, "step": 72428 }, { "epoch": 2.41, "grad_norm": 1.1933294534683228, "learning_rate": 5.551234963697533e-05, "loss": 1.6901, "step": 72429 }, { "epoch": 2.41, "grad_norm": 0.6786043047904968, "learning_rate": 5.5506292586829385e-05, "loss": 1.6728, "step": 72430 }, { "epoch": 2.41, "grad_norm": 0.6614488959312439, "learning_rate": 5.5500235833462805e-05, "loss": 1.6113, "step": 72431 }, { "epoch": 2.41, "grad_norm": 0.6816986799240112, "learning_rate": 5.549417937688282e-05, "loss": 1.7188, "step": 72432 }, { "epoch": 2.41, "grad_norm": 0.6905866861343384, "learning_rate": 5.5488123217096894e-05, "loss": 1.7013, "step": 72433 }, { "epoch": 2.41, "grad_norm": 0.680607795715332, "learning_rate": 5.548206735411248e-05, "loss": 1.7094, "step": 72434 }, { "epoch": 2.41, "grad_norm": 0.6794072389602661, "learning_rate": 5.5476011787936684e-05, "loss": 1.6188, "step": 72435 }, { "epoch": 2.41, "grad_norm": 0.656721830368042, "learning_rate": 5.5469956518576984e-05, "loss": 1.7046, "step": 72436 }, { "epoch": 2.41, "grad_norm": 0.6598158478736877, "learning_rate": 5.546390154604079e-05, "loss": 1.7322, "step": 72437 }, { "epoch": 2.41, "grad_norm": 0.684128999710083, "learning_rate": 5.545784687033539e-05, "loss": 1.613, "step": 72438 }, { "epoch": 2.41, "grad_norm": 0.7093794941902161, "learning_rate": 5.545179249146804e-05, "loss": 1.6839, "step": 72439 }, { "epoch": 2.41, "grad_norm": 0.6811943054199219, "learning_rate": 5.544573840944621e-05, "loss": 1.6995, "step": 72440 }, { "epoch": 2.41, "grad_norm": 0.6630772352218628, "learning_rate": 5.543968462427735e-05, "loss": 1.695, "step": 72441 }, { "epoch": 2.41, "grad_norm": 0.6675522327423096, "learning_rate": 5.5433631135968557e-05, "loss": 1.6468, "step": 72442 }, { "epoch": 2.41, "grad_norm": 0.6763963103294373, "learning_rate": 5.542757794452727e-05, "loss": 1.6819, "step": 72443 }, { "epoch": 2.41, "grad_norm": 0.7011901140213013, "learning_rate": 5.542152504996097e-05, "loss": 1.7248, "step": 72444 }, { "epoch": 2.41, "grad_norm": 0.6659170389175415, "learning_rate": 5.5415472452276856e-05, "loss": 1.7312, "step": 72445 }, { "epoch": 2.41, "grad_norm": 0.6721775531768799, "learning_rate": 5.540942015148229e-05, "loss": 1.6957, "step": 72446 }, { "epoch": 2.41, "grad_norm": 0.6756839156150818, "learning_rate": 5.540336814758463e-05, "loss": 1.6342, "step": 72447 }, { "epoch": 2.41, "grad_norm": 0.7065877914428711, "learning_rate": 5.53973164405914e-05, "loss": 1.6943, "step": 72448 }, { "epoch": 2.41, "grad_norm": 0.6783477663993835, "learning_rate": 5.5391265030509636e-05, "loss": 1.677, "step": 72449 }, { "epoch": 2.41, "grad_norm": 0.6945235729217529, "learning_rate": 5.538521391734683e-05, "loss": 1.7141, "step": 72450 }, { "epoch": 2.41, "grad_norm": 0.6983041763305664, "learning_rate": 5.53791631011104e-05, "loss": 1.7411, "step": 72451 }, { "epoch": 2.41, "grad_norm": 0.6958133578300476, "learning_rate": 5.537311258180762e-05, "loss": 1.7424, "step": 72452 }, { "epoch": 2.41, "grad_norm": 0.6922722458839417, "learning_rate": 5.536706235944578e-05, "loss": 1.6681, "step": 72453 }, { "epoch": 2.41, "grad_norm": 0.6718629002571106, "learning_rate": 5.536101243403227e-05, "loss": 1.7196, "step": 72454 }, { "epoch": 2.41, "grad_norm": 0.6717516779899597, "learning_rate": 5.5354962805574524e-05, "loss": 1.6428, "step": 72455 }, { "epoch": 2.41, "grad_norm": 0.6850168108940125, "learning_rate": 5.5348913474079824e-05, "loss": 1.6685, "step": 72456 }, { "epoch": 2.41, "grad_norm": 0.6805824041366577, "learning_rate": 5.534286443955541e-05, "loss": 1.7208, "step": 72457 }, { "epoch": 2.41, "grad_norm": 0.6598920226097107, "learning_rate": 5.533681570200877e-05, "loss": 1.6012, "step": 72458 }, { "epoch": 2.41, "grad_norm": 0.6593498587608337, "learning_rate": 5.533076726144721e-05, "loss": 1.6116, "step": 72459 }, { "epoch": 2.41, "grad_norm": 0.7047344446182251, "learning_rate": 5.532471911787794e-05, "loss": 1.7281, "step": 72460 }, { "epoch": 2.41, "grad_norm": 0.7043493390083313, "learning_rate": 5.531867127130849e-05, "loss": 1.7574, "step": 72461 }, { "epoch": 2.41, "grad_norm": 0.6980466842651367, "learning_rate": 5.531262372174606e-05, "loss": 1.7032, "step": 72462 }, { "epoch": 2.41, "grad_norm": 0.6801999807357788, "learning_rate": 5.530657646919815e-05, "loss": 1.6808, "step": 72463 }, { "epoch": 2.41, "grad_norm": 0.6797665357589722, "learning_rate": 5.530052951367188e-05, "loss": 1.6754, "step": 72464 }, { "epoch": 2.41, "grad_norm": 0.6773108839988708, "learning_rate": 5.5294482855174805e-05, "loss": 1.6446, "step": 72465 }, { "epoch": 2.41, "grad_norm": 0.6979498863220215, "learning_rate": 5.5288436493714196e-05, "loss": 1.7625, "step": 72466 }, { "epoch": 2.41, "grad_norm": 0.7020489573478699, "learning_rate": 5.5282390429297274e-05, "loss": 1.7428, "step": 72467 }, { "epoch": 2.41, "grad_norm": 0.6638926863670349, "learning_rate": 5.527634466193154e-05, "loss": 1.6829, "step": 72468 }, { "epoch": 2.41, "grad_norm": 0.6895641684532166, "learning_rate": 5.527029919162421e-05, "loss": 1.7395, "step": 72469 }, { "epoch": 2.41, "grad_norm": 0.7296619415283203, "learning_rate": 5.526425401838273e-05, "loss": 1.8764, "step": 72470 }, { "epoch": 2.41, "grad_norm": 0.7039088606834412, "learning_rate": 5.52582091422144e-05, "loss": 1.698, "step": 72471 }, { "epoch": 2.41, "grad_norm": 0.6722209453582764, "learning_rate": 5.525216456312648e-05, "loss": 1.5991, "step": 72472 }, { "epoch": 2.41, "grad_norm": 0.6782540082931519, "learning_rate": 5.524612028112641e-05, "loss": 1.6504, "step": 72473 }, { "epoch": 2.41, "grad_norm": 0.6791408061981201, "learning_rate": 5.524007629622145e-05, "loss": 1.6813, "step": 72474 }, { "epoch": 2.41, "grad_norm": 0.6787230372428894, "learning_rate": 5.523403260841903e-05, "loss": 1.6618, "step": 72475 }, { "epoch": 2.41, "grad_norm": 0.684736967086792, "learning_rate": 5.5227989217726353e-05, "loss": 1.6298, "step": 72476 }, { "epoch": 2.41, "grad_norm": 0.66358482837677, "learning_rate": 5.5221946124150906e-05, "loss": 1.6707, "step": 72477 }, { "epoch": 2.41, "grad_norm": 0.6880589127540588, "learning_rate": 5.521590332769995e-05, "loss": 1.7028, "step": 72478 }, { "epoch": 2.41, "grad_norm": 0.702288031578064, "learning_rate": 5.5209860828380745e-05, "loss": 1.6966, "step": 72479 }, { "epoch": 2.41, "grad_norm": 0.6661696434020996, "learning_rate": 5.520381862620079e-05, "loss": 1.688, "step": 72480 }, { "epoch": 2.41, "grad_norm": 0.6958453059196472, "learning_rate": 5.519777672116731e-05, "loss": 1.7332, "step": 72481 }, { "epoch": 2.41, "grad_norm": 0.6882383823394775, "learning_rate": 5.519173511328759e-05, "loss": 1.6452, "step": 72482 }, { "epoch": 2.41, "grad_norm": 0.6734011769294739, "learning_rate": 5.518569380256904e-05, "loss": 1.6783, "step": 72483 }, { "epoch": 2.41, "grad_norm": 0.6932258605957031, "learning_rate": 5.517965278901908e-05, "loss": 1.6428, "step": 72484 }, { "epoch": 2.41, "grad_norm": 0.6713931560516357, "learning_rate": 5.51736120726449e-05, "loss": 1.7172, "step": 72485 }, { "epoch": 2.41, "grad_norm": 0.6853387355804443, "learning_rate": 5.516757165345384e-05, "loss": 1.7043, "step": 72486 }, { "epoch": 2.41, "grad_norm": 0.6904433369636536, "learning_rate": 5.516153153145334e-05, "loss": 1.6589, "step": 72487 }, { "epoch": 2.41, "grad_norm": 0.6804614067077637, "learning_rate": 5.515549170665068e-05, "loss": 1.628, "step": 72488 }, { "epoch": 2.41, "grad_norm": 0.7022759318351746, "learning_rate": 5.5149452179053075e-05, "loss": 1.6679, "step": 72489 }, { "epoch": 2.41, "grad_norm": 0.7069151997566223, "learning_rate": 5.514341294866797e-05, "loss": 1.6594, "step": 72490 }, { "epoch": 2.41, "grad_norm": 0.6595535278320312, "learning_rate": 5.513737401550283e-05, "loss": 1.6877, "step": 72491 }, { "epoch": 2.41, "grad_norm": 0.6879178285598755, "learning_rate": 5.5131335379564714e-05, "loss": 1.7712, "step": 72492 }, { "epoch": 2.41, "grad_norm": 0.6832030415534973, "learning_rate": 5.512529704086106e-05, "loss": 1.6893, "step": 72493 }, { "epoch": 2.41, "grad_norm": 0.7018883228302002, "learning_rate": 5.511925899939927e-05, "loss": 1.7017, "step": 72494 }, { "epoch": 2.41, "grad_norm": 0.6712698340415955, "learning_rate": 5.511322125518664e-05, "loss": 1.7185, "step": 72495 }, { "epoch": 2.41, "grad_norm": 0.702704668045044, "learning_rate": 5.51071838082304e-05, "loss": 1.7201, "step": 72496 }, { "epoch": 2.41, "grad_norm": 0.6692657470703125, "learning_rate": 5.5101146658537975e-05, "loss": 1.7544, "step": 72497 }, { "epoch": 2.41, "grad_norm": 0.6829178929328918, "learning_rate": 5.509510980611679e-05, "loss": 1.6963, "step": 72498 }, { "epoch": 2.41, "grad_norm": 0.6796401143074036, "learning_rate": 5.5089073250973904e-05, "loss": 1.6281, "step": 72499 }, { "epoch": 2.41, "grad_norm": 0.6826077103614807, "learning_rate": 5.5083036993116816e-05, "loss": 1.6633, "step": 72500 }, { "epoch": 2.41, "grad_norm": 0.682308554649353, "learning_rate": 5.507700103255292e-05, "loss": 1.6555, "step": 72501 }, { "epoch": 2.41, "grad_norm": 0.6908623576164246, "learning_rate": 5.507096536928943e-05, "loss": 1.6557, "step": 72502 }, { "epoch": 2.41, "grad_norm": 0.6834200620651245, "learning_rate": 5.506493000333363e-05, "loss": 1.7705, "step": 72503 }, { "epoch": 2.41, "grad_norm": 0.7003931999206543, "learning_rate": 5.5058894934692935e-05, "loss": 1.6797, "step": 72504 }, { "epoch": 2.41, "grad_norm": 0.6711974143981934, "learning_rate": 5.50528601633748e-05, "loss": 1.7244, "step": 72505 }, { "epoch": 2.41, "grad_norm": 0.6910844445228577, "learning_rate": 5.504682568938623e-05, "loss": 1.7323, "step": 72506 }, { "epoch": 2.41, "grad_norm": 0.6692107319831848, "learning_rate": 5.504079151273475e-05, "loss": 1.7361, "step": 72507 }, { "epoch": 2.41, "grad_norm": 0.7059244513511658, "learning_rate": 5.503475763342768e-05, "loss": 1.7229, "step": 72508 }, { "epoch": 2.41, "grad_norm": 0.6854239702224731, "learning_rate": 5.502872405147236e-05, "loss": 1.6936, "step": 72509 }, { "epoch": 2.41, "grad_norm": 0.6784453392028809, "learning_rate": 5.502269076687601e-05, "loss": 1.73, "step": 72510 }, { "epoch": 2.41, "grad_norm": 0.6699697375297546, "learning_rate": 5.5016657779646056e-05, "loss": 1.7124, "step": 72511 }, { "epoch": 2.41, "grad_norm": 0.6956294775009155, "learning_rate": 5.501062508978983e-05, "loss": 1.705, "step": 72512 }, { "epoch": 2.41, "grad_norm": 0.6940113306045532, "learning_rate": 5.500459269731449e-05, "loss": 1.6714, "step": 72513 }, { "epoch": 2.41, "grad_norm": 0.6780745387077332, "learning_rate": 5.499856060222749e-05, "loss": 1.6427, "step": 72514 }, { "epoch": 2.41, "grad_norm": 0.6930036544799805, "learning_rate": 5.499252880453621e-05, "loss": 1.7505, "step": 72515 }, { "epoch": 2.41, "grad_norm": 0.6835470795631409, "learning_rate": 5.4986497304247924e-05, "loss": 1.6976, "step": 72516 }, { "epoch": 2.41, "grad_norm": 0.675049901008606, "learning_rate": 5.49804661013698e-05, "loss": 1.7696, "step": 72517 }, { "epoch": 2.41, "grad_norm": 0.6815616488456726, "learning_rate": 5.4974435195909406e-05, "loss": 1.7341, "step": 72518 }, { "epoch": 2.41, "grad_norm": 0.6597706079483032, "learning_rate": 5.496840458787396e-05, "loss": 1.638, "step": 72519 }, { "epoch": 2.41, "grad_norm": 0.6895472407341003, "learning_rate": 5.496237427727066e-05, "loss": 1.7097, "step": 72520 }, { "epoch": 2.41, "grad_norm": 0.681776762008667, "learning_rate": 5.4956344264107e-05, "loss": 1.6916, "step": 72521 }, { "epoch": 2.41, "grad_norm": 0.6769661903381348, "learning_rate": 5.495031454839017e-05, "loss": 1.727, "step": 72522 }, { "epoch": 2.41, "grad_norm": 0.685205340385437, "learning_rate": 5.494428513012764e-05, "loss": 1.6937, "step": 72523 }, { "epoch": 2.41, "grad_norm": 0.6750142574310303, "learning_rate": 5.4938256009326566e-05, "loss": 1.6045, "step": 72524 }, { "epoch": 2.41, "grad_norm": 0.7034357786178589, "learning_rate": 5.4932227185994404e-05, "loss": 1.6699, "step": 72525 }, { "epoch": 2.41, "grad_norm": 0.6873887777328491, "learning_rate": 5.492619866013842e-05, "loss": 1.7013, "step": 72526 }, { "epoch": 2.41, "grad_norm": 0.6693775653839111, "learning_rate": 5.492017043176581e-05, "loss": 1.7021, "step": 72527 }, { "epoch": 2.41, "grad_norm": 0.6821762323379517, "learning_rate": 5.491414250088413e-05, "loss": 1.7329, "step": 72528 }, { "epoch": 2.41, "grad_norm": 0.6698561310768127, "learning_rate": 5.490811486750044e-05, "loss": 1.6631, "step": 72529 }, { "epoch": 2.41, "grad_norm": 0.6927739381790161, "learning_rate": 5.490208753162231e-05, "loss": 1.6253, "step": 72530 }, { "epoch": 2.41, "grad_norm": 0.686657190322876, "learning_rate": 5.489606049325692e-05, "loss": 1.7091, "step": 72531 }, { "epoch": 2.41, "grad_norm": 0.717269778251648, "learning_rate": 5.489003375241151e-05, "loss": 1.7408, "step": 72532 }, { "epoch": 2.41, "grad_norm": 0.7047014236450195, "learning_rate": 5.488400730909357e-05, "loss": 1.7443, "step": 72533 }, { "epoch": 2.41, "grad_norm": 0.6814361214637756, "learning_rate": 5.487798116331022e-05, "loss": 1.6808, "step": 72534 }, { "epoch": 2.41, "grad_norm": 0.672173798084259, "learning_rate": 5.4871955315069005e-05, "loss": 1.7406, "step": 72535 }, { "epoch": 2.41, "grad_norm": 0.7128516435623169, "learning_rate": 5.4865929764377004e-05, "loss": 1.6373, "step": 72536 }, { "epoch": 2.41, "grad_norm": 0.6844267845153809, "learning_rate": 5.485990451124175e-05, "loss": 1.6534, "step": 72537 }, { "epoch": 2.41, "grad_norm": 0.6819816827774048, "learning_rate": 5.485387955567047e-05, "loss": 1.7206, "step": 72538 }, { "epoch": 2.41, "grad_norm": 0.6840013265609741, "learning_rate": 5.4847854897670354e-05, "loss": 1.7198, "step": 72539 }, { "epoch": 2.41, "grad_norm": 0.6818858981132507, "learning_rate": 5.48418305372489e-05, "loss": 1.7577, "step": 72540 }, { "epoch": 2.41, "grad_norm": 0.7281702160835266, "learning_rate": 5.483580647441333e-05, "loss": 1.6918, "step": 72541 }, { "epoch": 2.41, "grad_norm": 0.677605152130127, "learning_rate": 5.482978270917089e-05, "loss": 1.6858, "step": 72542 }, { "epoch": 2.41, "grad_norm": 0.6984784603118896, "learning_rate": 5.482375924152899e-05, "loss": 1.6644, "step": 72543 }, { "epoch": 2.41, "grad_norm": 0.6817421913146973, "learning_rate": 5.481773607149499e-05, "loss": 1.6845, "step": 72544 }, { "epoch": 2.41, "grad_norm": 0.6827049851417542, "learning_rate": 5.4811713199076125e-05, "loss": 1.6831, "step": 72545 }, { "epoch": 2.41, "grad_norm": 0.6917637586593628, "learning_rate": 5.480569062427965e-05, "loss": 1.6856, "step": 72546 }, { "epoch": 2.41, "grad_norm": 0.6699840426445007, "learning_rate": 5.4799668347113e-05, "loss": 1.6692, "step": 72547 }, { "epoch": 2.41, "grad_norm": 0.7146551012992859, "learning_rate": 5.479364636758347e-05, "loss": 1.7394, "step": 72548 }, { "epoch": 2.41, "grad_norm": 0.6865313649177551, "learning_rate": 5.4787624685698205e-05, "loss": 1.7219, "step": 72549 }, { "epoch": 2.41, "grad_norm": 0.6632140278816223, "learning_rate": 5.4781603301464615e-05, "loss": 1.6462, "step": 72550 }, { "epoch": 2.41, "grad_norm": 0.6978920102119446, "learning_rate": 5.477558221489022e-05, "loss": 1.6313, "step": 72551 }, { "epoch": 2.41, "grad_norm": 0.7096325755119324, "learning_rate": 5.4769561425981954e-05, "loss": 1.6447, "step": 72552 }, { "epoch": 2.41, "grad_norm": 0.6733074188232422, "learning_rate": 5.4763540934747344e-05, "loss": 1.6288, "step": 72553 }, { "epoch": 2.41, "grad_norm": 0.6945812106132507, "learning_rate": 5.475752074119371e-05, "loss": 1.7413, "step": 72554 }, { "epoch": 2.41, "grad_norm": 0.6732452511787415, "learning_rate": 5.475150084532831e-05, "loss": 1.6663, "step": 72555 }, { "epoch": 2.41, "grad_norm": 0.7126430869102478, "learning_rate": 5.474548124715839e-05, "loss": 1.7124, "step": 72556 }, { "epoch": 2.41, "grad_norm": 0.6889156103134155, "learning_rate": 5.473946194669132e-05, "loss": 1.7432, "step": 72557 }, { "epoch": 2.41, "grad_norm": 0.6794965863227844, "learning_rate": 5.473344294393455e-05, "loss": 1.6954, "step": 72558 }, { "epoch": 2.41, "grad_norm": 0.7063901424407959, "learning_rate": 5.472742423889509e-05, "loss": 1.6846, "step": 72559 }, { "epoch": 2.41, "grad_norm": 0.6984626650810242, "learning_rate": 5.472140583158038e-05, "loss": 1.707, "step": 72560 }, { "epoch": 2.41, "grad_norm": 0.7183021903038025, "learning_rate": 5.471538772199784e-05, "loss": 1.7211, "step": 72561 }, { "epoch": 2.41, "grad_norm": 0.674190104007721, "learning_rate": 5.4709369910154666e-05, "loss": 1.6531, "step": 72562 }, { "epoch": 2.41, "grad_norm": 0.6740609407424927, "learning_rate": 5.470335239605812e-05, "loss": 1.6643, "step": 72563 }, { "epoch": 2.41, "grad_norm": 0.6984802484512329, "learning_rate": 5.469733517971556e-05, "loss": 1.7081, "step": 72564 }, { "epoch": 2.41, "grad_norm": 0.7070163488388062, "learning_rate": 5.469131826113442e-05, "loss": 1.7767, "step": 72565 }, { "epoch": 2.41, "grad_norm": 0.6649956703186035, "learning_rate": 5.468530164032175e-05, "loss": 1.6935, "step": 72566 }, { "epoch": 2.41, "grad_norm": 0.7036755084991455, "learning_rate": 5.467928531728496e-05, "loss": 1.7599, "step": 72567 }, { "epoch": 2.41, "grad_norm": 0.7085984945297241, "learning_rate": 5.467326929203143e-05, "loss": 1.6158, "step": 72568 }, { "epoch": 2.41, "grad_norm": 0.6877173185348511, "learning_rate": 5.466725356456843e-05, "loss": 1.6819, "step": 72569 }, { "epoch": 2.41, "grad_norm": 0.6704684495925903, "learning_rate": 5.466123813490317e-05, "loss": 1.6466, "step": 72570 }, { "epoch": 2.41, "grad_norm": 0.7035514712333679, "learning_rate": 5.465522300304307e-05, "loss": 1.6113, "step": 72571 }, { "epoch": 2.41, "grad_norm": 0.6989798545837402, "learning_rate": 5.4649208168995305e-05, "loss": 1.7214, "step": 72572 }, { "epoch": 2.41, "grad_norm": 0.6758631467819214, "learning_rate": 5.4643193632767344e-05, "loss": 1.6691, "step": 72573 }, { "epoch": 2.41, "grad_norm": 0.6788697242736816, "learning_rate": 5.463717939436632e-05, "loss": 1.6669, "step": 72574 }, { "epoch": 2.41, "grad_norm": 0.6566945314407349, "learning_rate": 5.4631165453799674e-05, "loss": 1.7189, "step": 72575 }, { "epoch": 2.41, "grad_norm": 0.6513549089431763, "learning_rate": 5.4625151811074616e-05, "loss": 1.6754, "step": 72576 }, { "epoch": 2.41, "grad_norm": 0.6809432506561279, "learning_rate": 5.461913846619844e-05, "loss": 1.715, "step": 72577 }, { "epoch": 2.41, "grad_norm": 0.7176023721694946, "learning_rate": 5.46131254191785e-05, "loss": 1.7339, "step": 72578 }, { "epoch": 2.41, "grad_norm": 0.6963231563568115, "learning_rate": 5.4607112670022035e-05, "loss": 1.7575, "step": 72579 }, { "epoch": 2.41, "grad_norm": 0.6807532906532288, "learning_rate": 5.460110021873643e-05, "loss": 1.6965, "step": 72580 }, { "epoch": 2.41, "grad_norm": 0.6665775775909424, "learning_rate": 5.4595088065328875e-05, "loss": 1.6958, "step": 72581 }, { "epoch": 2.41, "grad_norm": 0.6814261674880981, "learning_rate": 5.4589076209806805e-05, "loss": 1.6948, "step": 72582 }, { "epoch": 2.41, "grad_norm": 0.6749165654182434, "learning_rate": 5.458306465217741e-05, "loss": 1.6202, "step": 72583 }, { "epoch": 2.41, "grad_norm": 0.7094016075134277, "learning_rate": 5.457705339244796e-05, "loss": 1.6873, "step": 72584 }, { "epoch": 2.41, "grad_norm": 0.6950401663780212, "learning_rate": 5.457104243062586e-05, "loss": 1.7507, "step": 72585 }, { "epoch": 2.41, "grad_norm": 0.6795519590377808, "learning_rate": 5.45650317667183e-05, "loss": 1.7711, "step": 72586 }, { "epoch": 2.41, "grad_norm": 0.692265510559082, "learning_rate": 5.455902140073269e-05, "loss": 1.7582, "step": 72587 }, { "epoch": 2.42, "grad_norm": 0.6846021413803101, "learning_rate": 5.4553011332676264e-05, "loss": 1.711, "step": 72588 }, { "epoch": 2.42, "grad_norm": 0.6821679472923279, "learning_rate": 5.454700156255625e-05, "loss": 1.6774, "step": 72589 }, { "epoch": 2.42, "grad_norm": 0.6894906163215637, "learning_rate": 5.4540992090380065e-05, "loss": 1.7446, "step": 72590 }, { "epoch": 2.42, "grad_norm": 0.6548019647598267, "learning_rate": 5.453498291615488e-05, "loss": 1.6384, "step": 72591 }, { "epoch": 2.42, "grad_norm": 0.6841364502906799, "learning_rate": 5.452897403988815e-05, "loss": 1.7643, "step": 72592 }, { "epoch": 2.42, "grad_norm": 0.6838322877883911, "learning_rate": 5.452296546158701e-05, "loss": 1.6241, "step": 72593 }, { "epoch": 2.42, "grad_norm": 0.6828954815864563, "learning_rate": 5.451695718125888e-05, "loss": 1.6336, "step": 72594 }, { "epoch": 2.42, "grad_norm": 0.6983720064163208, "learning_rate": 5.451094919891099e-05, "loss": 1.6793, "step": 72595 }, { "epoch": 2.42, "grad_norm": 0.6631721258163452, "learning_rate": 5.450494151455057e-05, "loss": 1.5752, "step": 72596 }, { "epoch": 2.42, "grad_norm": 0.6746723055839539, "learning_rate": 5.449893412818505e-05, "loss": 1.7082, "step": 72597 }, { "epoch": 2.42, "grad_norm": 0.6894810199737549, "learning_rate": 5.449292703982164e-05, "loss": 1.692, "step": 72598 }, { "epoch": 2.42, "grad_norm": 0.6897943019866943, "learning_rate": 5.448692024946759e-05, "loss": 1.8091, "step": 72599 }, { "epoch": 2.42, "grad_norm": 0.7463147044181824, "learning_rate": 5.4480913757130216e-05, "loss": 1.676, "step": 72600 }, { "epoch": 2.42, "grad_norm": 0.7008135318756104, "learning_rate": 5.447490756281692e-05, "loss": 1.7341, "step": 72601 }, { "epoch": 2.42, "grad_norm": 0.6788831353187561, "learning_rate": 5.4468901666534914e-05, "loss": 1.7241, "step": 72602 }, { "epoch": 2.42, "grad_norm": 0.6986055970191956, "learning_rate": 5.446289606829144e-05, "loss": 1.6494, "step": 72603 }, { "epoch": 2.42, "grad_norm": 0.6731089353561401, "learning_rate": 5.445689076809385e-05, "loss": 1.671, "step": 72604 }, { "epoch": 2.42, "grad_norm": 0.6733033061027527, "learning_rate": 5.445088576594945e-05, "loss": 1.635, "step": 72605 }, { "epoch": 2.42, "grad_norm": 0.701398491859436, "learning_rate": 5.444488106186542e-05, "loss": 1.6782, "step": 72606 }, { "epoch": 2.42, "grad_norm": 0.6948102712631226, "learning_rate": 5.4438876655849126e-05, "loss": 1.687, "step": 72607 }, { "epoch": 2.42, "grad_norm": 0.6745563745498657, "learning_rate": 5.4432872547908e-05, "loss": 1.7246, "step": 72608 }, { "epoch": 2.42, "grad_norm": 0.6983760595321655, "learning_rate": 5.442686873804903e-05, "loss": 1.7671, "step": 72609 }, { "epoch": 2.42, "grad_norm": 0.6876058578491211, "learning_rate": 5.442086522627965e-05, "loss": 1.7276, "step": 72610 }, { "epoch": 2.42, "grad_norm": 0.7100732326507568, "learning_rate": 5.4414862012607254e-05, "loss": 1.7197, "step": 72611 }, { "epoch": 2.42, "grad_norm": 0.6854443550109863, "learning_rate": 5.440885909703899e-05, "loss": 1.7001, "step": 72612 }, { "epoch": 2.42, "grad_norm": 0.6877104640007019, "learning_rate": 5.4402856479582133e-05, "loss": 1.6925, "step": 72613 }, { "epoch": 2.42, "grad_norm": 0.6898055672645569, "learning_rate": 5.439685416024404e-05, "loss": 1.6765, "step": 72614 }, { "epoch": 2.42, "grad_norm": 0.6799628734588623, "learning_rate": 5.4390852139032107e-05, "loss": 1.6973, "step": 72615 }, { "epoch": 2.42, "grad_norm": 0.6957457065582275, "learning_rate": 5.438485041595335e-05, "loss": 1.7657, "step": 72616 }, { "epoch": 2.42, "grad_norm": 0.6711918711662292, "learning_rate": 5.437884899101518e-05, "loss": 1.7088, "step": 72617 }, { "epoch": 2.42, "grad_norm": 0.6938956379890442, "learning_rate": 5.4372847864224976e-05, "loss": 1.565, "step": 72618 }, { "epoch": 2.42, "grad_norm": 0.688289225101471, "learning_rate": 5.436684703558998e-05, "loss": 1.6775, "step": 72619 }, { "epoch": 2.42, "grad_norm": 0.7026998996734619, "learning_rate": 5.4360846505117306e-05, "loss": 1.7712, "step": 72620 }, { "epoch": 2.42, "grad_norm": 0.6725500822067261, "learning_rate": 5.435484627281442e-05, "loss": 1.6099, "step": 72621 }, { "epoch": 2.42, "grad_norm": 0.6706913113594055, "learning_rate": 5.434884633868869e-05, "loss": 1.7144, "step": 72622 }, { "epoch": 2.42, "grad_norm": 0.6740565896034241, "learning_rate": 5.43428467027471e-05, "loss": 1.6088, "step": 72623 }, { "epoch": 2.42, "grad_norm": 0.6629510521888733, "learning_rate": 5.4336847364997115e-05, "loss": 1.6372, "step": 72624 }, { "epoch": 2.42, "grad_norm": 0.7080633044242859, "learning_rate": 5.433084832544607e-05, "loss": 1.7484, "step": 72625 }, { "epoch": 2.42, "grad_norm": 0.6890246868133545, "learning_rate": 5.4324849584101174e-05, "loss": 1.7268, "step": 72626 }, { "epoch": 2.42, "grad_norm": 0.6872690916061401, "learning_rate": 5.431885114096963e-05, "loss": 1.8088, "step": 72627 }, { "epoch": 2.42, "grad_norm": 0.6745887994766235, "learning_rate": 5.4312852996058875e-05, "loss": 1.7502, "step": 72628 }, { "epoch": 2.42, "grad_norm": 0.7087222933769226, "learning_rate": 5.430685514937613e-05, "loss": 1.731, "step": 72629 }, { "epoch": 2.42, "grad_norm": 0.6762621402740479, "learning_rate": 5.4300857600928584e-05, "loss": 1.6771, "step": 72630 }, { "epoch": 2.42, "grad_norm": 0.6820968985557556, "learning_rate": 5.42948603507236e-05, "loss": 1.7019, "step": 72631 }, { "epoch": 2.42, "grad_norm": 0.6988588571548462, "learning_rate": 5.428886339876854e-05, "loss": 1.7452, "step": 72632 }, { "epoch": 2.42, "grad_norm": 0.7094631791114807, "learning_rate": 5.428286674507057e-05, "loss": 1.6294, "step": 72633 }, { "epoch": 2.42, "grad_norm": 0.6763898730278015, "learning_rate": 5.427687038963692e-05, "loss": 1.7115, "step": 72634 }, { "epoch": 2.42, "grad_norm": 0.6837595105171204, "learning_rate": 5.427087433247503e-05, "loss": 1.6844, "step": 72635 }, { "epoch": 2.42, "grad_norm": 0.6667214632034302, "learning_rate": 5.4264878573592087e-05, "loss": 1.6787, "step": 72636 }, { "epoch": 2.42, "grad_norm": 0.6711785197257996, "learning_rate": 5.4258883112995296e-05, "loss": 1.6751, "step": 72637 }, { "epoch": 2.42, "grad_norm": 0.693985104560852, "learning_rate": 5.42528879506921e-05, "loss": 1.6913, "step": 72638 }, { "epoch": 2.42, "grad_norm": 0.6858597993850708, "learning_rate": 5.4246893086689626e-05, "loss": 1.6642, "step": 72639 }, { "epoch": 2.42, "grad_norm": 0.6799139976501465, "learning_rate": 5.424089852099526e-05, "loss": 1.6551, "step": 72640 }, { "epoch": 2.42, "grad_norm": 0.6808040738105774, "learning_rate": 5.423490425361616e-05, "loss": 1.7089, "step": 72641 }, { "epoch": 2.42, "grad_norm": 0.6816721558570862, "learning_rate": 5.422891028455977e-05, "loss": 1.7739, "step": 72642 }, { "epoch": 2.42, "grad_norm": 0.6756821274757385, "learning_rate": 5.422291661383323e-05, "loss": 1.72, "step": 72643 }, { "epoch": 2.42, "grad_norm": 0.6820219159126282, "learning_rate": 5.421692324144382e-05, "loss": 1.6617, "step": 72644 }, { "epoch": 2.42, "grad_norm": 0.6780267953872681, "learning_rate": 5.4210930167398927e-05, "loss": 1.6509, "step": 72645 }, { "epoch": 2.42, "grad_norm": 0.6949648261070251, "learning_rate": 5.420493739170564e-05, "loss": 1.671, "step": 72646 }, { "epoch": 2.42, "grad_norm": 0.6740076541900635, "learning_rate": 5.419894491437146e-05, "loss": 1.7119, "step": 72647 }, { "epoch": 2.42, "grad_norm": 0.7099176049232483, "learning_rate": 5.419295273540351e-05, "loss": 1.6914, "step": 72648 }, { "epoch": 2.42, "grad_norm": 0.6890183687210083, "learning_rate": 5.418696085480905e-05, "loss": 1.6598, "step": 72649 }, { "epoch": 2.42, "grad_norm": 0.6979798674583435, "learning_rate": 5.418096927259544e-05, "loss": 1.7081, "step": 72650 }, { "epoch": 2.42, "grad_norm": 0.6846796870231628, "learning_rate": 5.417497798876985e-05, "loss": 1.7198, "step": 72651 }, { "epoch": 2.42, "grad_norm": 0.6966655254364014, "learning_rate": 5.41689870033397e-05, "loss": 1.6981, "step": 72652 }, { "epoch": 2.42, "grad_norm": 0.6946055889129639, "learning_rate": 5.416299631631213e-05, "loss": 1.6874, "step": 72653 }, { "epoch": 2.42, "grad_norm": 0.6894235610961914, "learning_rate": 5.415700592769451e-05, "loss": 1.6886, "step": 72654 }, { "epoch": 2.42, "grad_norm": 1.6550239324569702, "learning_rate": 5.4151015837494047e-05, "loss": 1.704, "step": 72655 }, { "epoch": 2.42, "grad_norm": 0.7139296531677246, "learning_rate": 5.414502604571794e-05, "loss": 1.7171, "step": 72656 }, { "epoch": 2.42, "grad_norm": 0.670688271522522, "learning_rate": 5.413903655237365e-05, "loss": 1.6325, "step": 72657 }, { "epoch": 2.42, "grad_norm": 0.6851646900177002, "learning_rate": 5.413304735746836e-05, "loss": 1.6402, "step": 72658 }, { "epoch": 2.42, "grad_norm": 0.699546217918396, "learning_rate": 5.412705846100921e-05, "loss": 1.6159, "step": 72659 }, { "epoch": 2.42, "grad_norm": 0.6855601072311401, "learning_rate": 5.412106986300362e-05, "loss": 1.6872, "step": 72660 }, { "epoch": 2.42, "grad_norm": 0.7178494334220886, "learning_rate": 5.411508156345889e-05, "loss": 1.7393, "step": 72661 }, { "epoch": 2.42, "grad_norm": 0.6788467168807983, "learning_rate": 5.41090935623822e-05, "loss": 1.6244, "step": 72662 }, { "epoch": 2.42, "grad_norm": 0.6998280882835388, "learning_rate": 5.410310585978076e-05, "loss": 1.699, "step": 72663 }, { "epoch": 2.42, "grad_norm": 0.6792992353439331, "learning_rate": 5.409711845566203e-05, "loss": 1.6411, "step": 72664 }, { "epoch": 2.42, "grad_norm": 0.688291072845459, "learning_rate": 5.4091131350033126e-05, "loss": 1.6681, "step": 72665 }, { "epoch": 2.42, "grad_norm": 0.6753082871437073, "learning_rate": 5.408514454290132e-05, "loss": 1.6839, "step": 72666 }, { "epoch": 2.42, "grad_norm": 0.6849930286407471, "learning_rate": 5.4079158034273895e-05, "loss": 1.6527, "step": 72667 }, { "epoch": 2.42, "grad_norm": 0.6907550692558289, "learning_rate": 5.407317182415829e-05, "loss": 1.7253, "step": 72668 }, { "epoch": 2.42, "grad_norm": 0.6729193329811096, "learning_rate": 5.4067185912561464e-05, "loss": 1.698, "step": 72669 }, { "epoch": 2.42, "grad_norm": 0.7042528986930847, "learning_rate": 5.4061200299490836e-05, "loss": 1.7339, "step": 72670 }, { "epoch": 2.42, "grad_norm": 0.7021616697311401, "learning_rate": 5.4055214984953745e-05, "loss": 1.6735, "step": 72671 }, { "epoch": 2.42, "grad_norm": 0.6846259832382202, "learning_rate": 5.4049229968957374e-05, "loss": 1.6265, "step": 72672 }, { "epoch": 2.42, "grad_norm": 0.6882463693618774, "learning_rate": 5.404324525150896e-05, "loss": 1.6914, "step": 72673 }, { "epoch": 2.42, "grad_norm": 0.6997390985488892, "learning_rate": 5.403726083261579e-05, "loss": 1.7114, "step": 72674 }, { "epoch": 2.42, "grad_norm": 0.6979309320449829, "learning_rate": 5.40312767122853e-05, "loss": 1.6932, "step": 72675 }, { "epoch": 2.42, "grad_norm": 0.6721910834312439, "learning_rate": 5.402529289052441e-05, "loss": 1.6544, "step": 72676 }, { "epoch": 2.42, "grad_norm": 0.7065479159355164, "learning_rate": 5.4019309367340616e-05, "loss": 1.7237, "step": 72677 }, { "epoch": 2.42, "grad_norm": 0.7124481797218323, "learning_rate": 5.401332614274121e-05, "loss": 1.7321, "step": 72678 }, { "epoch": 2.42, "grad_norm": 0.6869311928749084, "learning_rate": 5.4007343216733354e-05, "loss": 1.6421, "step": 72679 }, { "epoch": 2.42, "grad_norm": 0.6948713660240173, "learning_rate": 5.400136058932428e-05, "loss": 1.703, "step": 72680 }, { "epoch": 2.42, "grad_norm": 0.6965286731719971, "learning_rate": 5.3995378260521315e-05, "loss": 1.6965, "step": 72681 }, { "epoch": 2.42, "grad_norm": 0.6682959198951721, "learning_rate": 5.398939623033185e-05, "loss": 1.7334, "step": 72682 }, { "epoch": 2.42, "grad_norm": 0.6888913512229919, "learning_rate": 5.3983414498762876e-05, "loss": 1.7231, "step": 72683 }, { "epoch": 2.42, "grad_norm": 0.7081171870231628, "learning_rate": 5.3977433065821764e-05, "loss": 1.7214, "step": 72684 }, { "epoch": 2.42, "grad_norm": 0.6848728656768799, "learning_rate": 5.3971451931515894e-05, "loss": 1.7591, "step": 72685 }, { "epoch": 2.42, "grad_norm": 0.6851164698600769, "learning_rate": 5.396547109585241e-05, "loss": 1.7389, "step": 72686 }, { "epoch": 2.42, "grad_norm": 0.6730920076370239, "learning_rate": 5.395949055883853e-05, "loss": 1.6469, "step": 72687 }, { "epoch": 2.42, "grad_norm": 0.6904070973396301, "learning_rate": 5.395351032048168e-05, "loss": 1.6821, "step": 72688 }, { "epoch": 2.42, "grad_norm": 0.6784826517105103, "learning_rate": 5.3947530380788886e-05, "loss": 1.6884, "step": 72689 }, { "epoch": 2.42, "grad_norm": 0.6888700723648071, "learning_rate": 5.3941550739767644e-05, "loss": 1.6746, "step": 72690 }, { "epoch": 2.42, "grad_norm": 0.6834469437599182, "learning_rate": 5.3935571397425026e-05, "loss": 1.6332, "step": 72691 }, { "epoch": 2.42, "grad_norm": 0.6946350336074829, "learning_rate": 5.3929592353768444e-05, "loss": 1.6667, "step": 72692 }, { "epoch": 2.42, "grad_norm": 0.6939417719841003, "learning_rate": 5.392361360880507e-05, "loss": 1.6749, "step": 72693 }, { "epoch": 2.42, "grad_norm": 0.6927217841148376, "learning_rate": 5.3917635162542095e-05, "loss": 1.6531, "step": 72694 }, { "epoch": 2.42, "grad_norm": 0.681660532951355, "learning_rate": 5.3911657014986943e-05, "loss": 1.7338, "step": 72695 }, { "epoch": 2.42, "grad_norm": 0.6737033128738403, "learning_rate": 5.390567916614671e-05, "loss": 1.6784, "step": 72696 }, { "epoch": 2.42, "grad_norm": 0.7095639705657959, "learning_rate": 5.389970161602876e-05, "loss": 1.6611, "step": 72697 }, { "epoch": 2.42, "grad_norm": 0.6926788091659546, "learning_rate": 5.389372436464028e-05, "loss": 1.6747, "step": 72698 }, { "epoch": 2.42, "grad_norm": 0.7178393006324768, "learning_rate": 5.3887747411988604e-05, "loss": 1.7139, "step": 72699 }, { "epoch": 2.42, "grad_norm": 0.7097800970077515, "learning_rate": 5.3881770758080957e-05, "loss": 1.7031, "step": 72700 }, { "epoch": 2.42, "grad_norm": 0.6823024749755859, "learning_rate": 5.3875794402924466e-05, "loss": 1.6645, "step": 72701 }, { "epoch": 2.42, "grad_norm": 0.705895185470581, "learning_rate": 5.3869818346526627e-05, "loss": 1.756, "step": 72702 }, { "epoch": 2.42, "grad_norm": 0.6950244307518005, "learning_rate": 5.386384258889444e-05, "loss": 1.6573, "step": 72703 }, { "epoch": 2.42, "grad_norm": 0.6625290513038635, "learning_rate": 5.385786713003538e-05, "loss": 1.6494, "step": 72704 }, { "epoch": 2.42, "grad_norm": 0.6897504329681396, "learning_rate": 5.38518919699566e-05, "loss": 1.6974, "step": 72705 }, { "epoch": 2.42, "grad_norm": 0.673966109752655, "learning_rate": 5.384591710866527e-05, "loss": 1.683, "step": 72706 }, { "epoch": 2.42, "grad_norm": 0.6713821887969971, "learning_rate": 5.3839942546168804e-05, "loss": 1.6929, "step": 72707 }, { "epoch": 2.42, "grad_norm": 0.6967171430587769, "learning_rate": 5.383396828247431e-05, "loss": 1.6983, "step": 72708 }, { "epoch": 2.42, "grad_norm": 0.7032541632652283, "learning_rate": 5.382799431758917e-05, "loss": 1.7231, "step": 72709 }, { "epoch": 2.42, "grad_norm": 0.7025274634361267, "learning_rate": 5.3822020651520533e-05, "loss": 1.6831, "step": 72710 }, { "epoch": 2.42, "grad_norm": 0.7000673413276672, "learning_rate": 5.381604728427573e-05, "loss": 1.7298, "step": 72711 }, { "epoch": 2.42, "grad_norm": 0.6716944575309753, "learning_rate": 5.381007421586199e-05, "loss": 1.6815, "step": 72712 }, { "epoch": 2.42, "grad_norm": 0.6887852549552917, "learning_rate": 5.3804101446286466e-05, "loss": 1.6592, "step": 72713 }, { "epoch": 2.42, "grad_norm": 0.6733338236808777, "learning_rate": 5.3798128975556555e-05, "loss": 1.67, "step": 72714 }, { "epoch": 2.42, "grad_norm": 0.6892593502998352, "learning_rate": 5.379215680367945e-05, "loss": 1.7242, "step": 72715 }, { "epoch": 2.42, "grad_norm": 0.6718682050704956, "learning_rate": 5.378618493066229e-05, "loss": 1.7014, "step": 72716 }, { "epoch": 2.42, "grad_norm": 0.687605619430542, "learning_rate": 5.378021335651246e-05, "loss": 1.6662, "step": 72717 }, { "epoch": 2.42, "grad_norm": 0.6786085963249207, "learning_rate": 5.3774242081237215e-05, "loss": 1.6379, "step": 72718 }, { "epoch": 2.42, "grad_norm": 0.678165078163147, "learning_rate": 5.37682711048438e-05, "loss": 1.8021, "step": 72719 }, { "epoch": 2.42, "grad_norm": 0.6977836489677429, "learning_rate": 5.376230042733929e-05, "loss": 1.6559, "step": 72720 }, { "epoch": 2.42, "grad_norm": 0.6918776631355286, "learning_rate": 5.375633004873119e-05, "loss": 1.6992, "step": 72721 }, { "epoch": 2.42, "grad_norm": 0.6959453821182251, "learning_rate": 5.3750359969026624e-05, "loss": 1.5998, "step": 72722 }, { "epoch": 2.42, "grad_norm": 0.6461164951324463, "learning_rate": 5.374439018823272e-05, "loss": 1.7054, "step": 72723 }, { "epoch": 2.42, "grad_norm": 0.6892635822296143, "learning_rate": 5.373842070635688e-05, "loss": 1.7546, "step": 72724 }, { "epoch": 2.42, "grad_norm": 0.6705149412155151, "learning_rate": 5.373245152340645e-05, "loss": 1.6656, "step": 72725 }, { "epoch": 2.42, "grad_norm": 0.6986415982246399, "learning_rate": 5.37264826393884e-05, "loss": 1.7468, "step": 72726 }, { "epoch": 2.42, "grad_norm": 0.7066393494606018, "learning_rate": 5.372051405431009e-05, "loss": 1.657, "step": 72727 }, { "epoch": 2.42, "grad_norm": 0.7028337121009827, "learning_rate": 5.371454576817889e-05, "loss": 1.6756, "step": 72728 }, { "epoch": 2.42, "grad_norm": 0.7194696068763733, "learning_rate": 5.3708577781001904e-05, "loss": 1.6676, "step": 72729 }, { "epoch": 2.42, "grad_norm": 0.715613603591919, "learning_rate": 5.370261009278638e-05, "loss": 1.7522, "step": 72730 }, { "epoch": 2.42, "grad_norm": 0.6833429336547852, "learning_rate": 5.3696642703539574e-05, "loss": 1.7098, "step": 72731 }, { "epoch": 2.42, "grad_norm": 0.7055482864379883, "learning_rate": 5.3690675613268905e-05, "loss": 1.7379, "step": 72732 }, { "epoch": 2.42, "grad_norm": 0.6556297540664673, "learning_rate": 5.368470882198132e-05, "loss": 1.645, "step": 72733 }, { "epoch": 2.42, "grad_norm": 0.6799577474594116, "learning_rate": 5.3678742329684223e-05, "loss": 1.6888, "step": 72734 }, { "epoch": 2.42, "grad_norm": 0.6742481589317322, "learning_rate": 5.367277613638489e-05, "loss": 1.662, "step": 72735 }, { "epoch": 2.42, "grad_norm": 0.709828794002533, "learning_rate": 5.366681024209051e-05, "loss": 1.7187, "step": 72736 }, { "epoch": 2.42, "grad_norm": 0.6644752025604248, "learning_rate": 5.366084464680828e-05, "loss": 1.6476, "step": 72737 }, { "epoch": 2.42, "grad_norm": 0.6877979040145874, "learning_rate": 5.3654879350545455e-05, "loss": 1.7012, "step": 72738 }, { "epoch": 2.42, "grad_norm": 0.6753195524215698, "learning_rate": 5.364891435330947e-05, "loss": 1.7146, "step": 72739 }, { "epoch": 2.42, "grad_norm": 0.687172532081604, "learning_rate": 5.364294965510725e-05, "loss": 1.719, "step": 72740 }, { "epoch": 2.42, "grad_norm": 0.6954052448272705, "learning_rate": 5.363698525594622e-05, "loss": 1.6639, "step": 72741 }, { "epoch": 2.42, "grad_norm": 0.6845024824142456, "learning_rate": 5.3631021155833645e-05, "loss": 1.7316, "step": 72742 }, { "epoch": 2.42, "grad_norm": 0.6808669567108154, "learning_rate": 5.362505735477668e-05, "loss": 1.7003, "step": 72743 }, { "epoch": 2.42, "grad_norm": 0.7257552742958069, "learning_rate": 5.3619093852782557e-05, "loss": 1.7645, "step": 72744 }, { "epoch": 2.42, "grad_norm": 0.6636528968811035, "learning_rate": 5.3613130649858604e-05, "loss": 1.71, "step": 72745 }, { "epoch": 2.42, "grad_norm": 0.678568959236145, "learning_rate": 5.360716774601202e-05, "loss": 1.7047, "step": 72746 }, { "epoch": 2.42, "grad_norm": 0.6836287975311279, "learning_rate": 5.3601205141249945e-05, "loss": 1.7386, "step": 72747 }, { "epoch": 2.42, "grad_norm": 0.7231913208961487, "learning_rate": 5.359524283557973e-05, "loss": 1.6911, "step": 72748 }, { "epoch": 2.42, "grad_norm": 0.6915164589881897, "learning_rate": 5.358928082900863e-05, "loss": 1.7327, "step": 72749 }, { "epoch": 2.42, "grad_norm": 0.6775969862937927, "learning_rate": 5.3583319121543866e-05, "loss": 1.6674, "step": 72750 }, { "epoch": 2.42, "grad_norm": 0.6909238696098328, "learning_rate": 5.3577357713192535e-05, "loss": 1.6688, "step": 72751 }, { "epoch": 2.42, "grad_norm": 0.6678040623664856, "learning_rate": 5.3571396603962067e-05, "loss": 1.6777, "step": 72752 }, { "epoch": 2.42, "grad_norm": 0.7149826288223267, "learning_rate": 5.3565435793859623e-05, "loss": 1.6792, "step": 72753 }, { "epoch": 2.42, "grad_norm": 0.6902127861976624, "learning_rate": 5.355947528289236e-05, "loss": 1.6249, "step": 72754 }, { "epoch": 2.42, "grad_norm": 0.6808703541755676, "learning_rate": 5.3553515071067646e-05, "loss": 1.6856, "step": 72755 }, { "epoch": 2.42, "grad_norm": 0.7078731656074524, "learning_rate": 5.3547555158392565e-05, "loss": 1.6866, "step": 72756 }, { "epoch": 2.42, "grad_norm": 0.7042794227600098, "learning_rate": 5.354159554487455e-05, "loss": 1.7498, "step": 72757 }, { "epoch": 2.42, "grad_norm": 0.6832148432731628, "learning_rate": 5.353563623052063e-05, "loss": 1.7206, "step": 72758 }, { "epoch": 2.42, "grad_norm": 0.7282196879386902, "learning_rate": 5.35296772153382e-05, "loss": 1.71, "step": 72759 }, { "epoch": 2.42, "grad_norm": 0.7024142742156982, "learning_rate": 5.352371849933445e-05, "loss": 1.6807, "step": 72760 }, { "epoch": 2.42, "grad_norm": 0.6828790307044983, "learning_rate": 5.351776008251647e-05, "loss": 1.6674, "step": 72761 }, { "epoch": 2.42, "grad_norm": 0.6813516616821289, "learning_rate": 5.3511801964891735e-05, "loss": 1.5975, "step": 72762 }, { "epoch": 2.42, "grad_norm": 0.694366991519928, "learning_rate": 5.350584414646722e-05, "loss": 1.682, "step": 72763 }, { "epoch": 2.42, "grad_norm": 0.6784120202064514, "learning_rate": 5.349988662725041e-05, "loss": 1.724, "step": 72764 }, { "epoch": 2.42, "grad_norm": 0.7115446329116821, "learning_rate": 5.349392940724841e-05, "loss": 1.7025, "step": 72765 }, { "epoch": 2.42, "grad_norm": 0.6724681854248047, "learning_rate": 5.34879724864684e-05, "loss": 1.7333, "step": 72766 }, { "epoch": 2.42, "grad_norm": 0.6773873567581177, "learning_rate": 5.3482015864917724e-05, "loss": 1.7312, "step": 72767 }, { "epoch": 2.42, "grad_norm": 0.6748913526535034, "learning_rate": 5.347605954260349e-05, "loss": 1.6549, "step": 72768 }, { "epoch": 2.42, "grad_norm": 0.6931180357933044, "learning_rate": 5.347010351953308e-05, "loss": 1.6944, "step": 72769 }, { "epoch": 2.42, "grad_norm": 0.6903507709503174, "learning_rate": 5.346414779571353e-05, "loss": 1.6646, "step": 72770 }, { "epoch": 2.42, "grad_norm": 0.7130423784255981, "learning_rate": 5.3458192371152294e-05, "loss": 1.7202, "step": 72771 }, { "epoch": 2.42, "grad_norm": 0.6847307682037354, "learning_rate": 5.345223724585648e-05, "loss": 1.7266, "step": 72772 }, { "epoch": 2.42, "grad_norm": 0.7157183289527893, "learning_rate": 5.344628241983323e-05, "loss": 1.6872, "step": 72773 }, { "epoch": 2.42, "grad_norm": 0.6930243372917175, "learning_rate": 5.344032789308995e-05, "loss": 1.6621, "step": 72774 }, { "epoch": 2.42, "grad_norm": 0.6929954886436462, "learning_rate": 5.3434373665633764e-05, "loss": 1.7118, "step": 72775 }, { "epoch": 2.42, "grad_norm": 0.6622456908226013, "learning_rate": 5.3428419737471875e-05, "loss": 1.6968, "step": 72776 }, { "epoch": 2.42, "grad_norm": 0.6980658173561096, "learning_rate": 5.342246610861153e-05, "loss": 1.6972, "step": 72777 }, { "epoch": 2.42, "grad_norm": 0.7000653743743896, "learning_rate": 5.341651277906006e-05, "loss": 1.7074, "step": 72778 }, { "epoch": 2.42, "grad_norm": 0.6891384720802307, "learning_rate": 5.3410559748824635e-05, "loss": 1.6563, "step": 72779 }, { "epoch": 2.42, "grad_norm": 0.7167290449142456, "learning_rate": 5.340460701791237e-05, "loss": 1.7881, "step": 72780 }, { "epoch": 2.42, "grad_norm": 0.6722193360328674, "learning_rate": 5.3398654586330634e-05, "loss": 1.6729, "step": 72781 }, { "epoch": 2.42, "grad_norm": 0.6726383566856384, "learning_rate": 5.3392702454086625e-05, "loss": 1.6666, "step": 72782 }, { "epoch": 2.42, "grad_norm": 0.6909670829772949, "learning_rate": 5.3386750621187426e-05, "loss": 1.6651, "step": 72783 }, { "epoch": 2.42, "grad_norm": 0.6973328590393066, "learning_rate": 5.3380799087640405e-05, "loss": 1.6322, "step": 72784 }, { "epoch": 2.42, "grad_norm": 0.670785129070282, "learning_rate": 5.3374847853452886e-05, "loss": 1.6769, "step": 72785 }, { "epoch": 2.42, "grad_norm": 0.6989939212799072, "learning_rate": 5.3368896918631835e-05, "loss": 1.6807, "step": 72786 }, { "epoch": 2.42, "grad_norm": 0.6861081123352051, "learning_rate": 5.3362946283184605e-05, "loss": 1.7229, "step": 72787 }, { "epoch": 2.42, "grad_norm": 0.6839632391929626, "learning_rate": 5.3356995947118466e-05, "loss": 1.7241, "step": 72788 }, { "epoch": 2.42, "grad_norm": 0.6962994337081909, "learning_rate": 5.33510459104406e-05, "loss": 1.6751, "step": 72789 }, { "epoch": 2.42, "grad_norm": 0.6915777921676636, "learning_rate": 5.334509617315819e-05, "loss": 1.668, "step": 72790 }, { "epoch": 2.42, "grad_norm": 0.6855674386024475, "learning_rate": 5.333914673527844e-05, "loss": 1.7141, "step": 72791 }, { "epoch": 2.42, "grad_norm": 0.6644195318222046, "learning_rate": 5.333319759680879e-05, "loss": 1.7043, "step": 72792 }, { "epoch": 2.42, "grad_norm": 0.6847980618476868, "learning_rate": 5.3327248757756127e-05, "loss": 1.7309, "step": 72793 }, { "epoch": 2.42, "grad_norm": 0.6693252921104431, "learning_rate": 5.332130021812785e-05, "loss": 1.6919, "step": 72794 }, { "epoch": 2.42, "grad_norm": 0.6729685664176941, "learning_rate": 5.331535197793125e-05, "loss": 1.7144, "step": 72795 }, { "epoch": 2.42, "grad_norm": 0.6530467867851257, "learning_rate": 5.330940403717346e-05, "loss": 1.5837, "step": 72796 }, { "epoch": 2.42, "grad_norm": 0.6818676590919495, "learning_rate": 5.330345639586164e-05, "loss": 1.6362, "step": 72797 }, { "epoch": 2.42, "grad_norm": 0.6840900778770447, "learning_rate": 5.329750905400309e-05, "loss": 1.761, "step": 72798 }, { "epoch": 2.42, "grad_norm": 0.6627181768417358, "learning_rate": 5.329156201160505e-05, "loss": 1.7308, "step": 72799 }, { "epoch": 2.42, "grad_norm": 0.6829207539558411, "learning_rate": 5.328561526867473e-05, "loss": 1.6598, "step": 72800 }, { "epoch": 2.42, "grad_norm": 0.6893466711044312, "learning_rate": 5.327966882521922e-05, "loss": 1.6377, "step": 72801 }, { "epoch": 2.42, "grad_norm": 0.7076364755630493, "learning_rate": 5.3273722681245924e-05, "loss": 1.6846, "step": 72802 }, { "epoch": 2.42, "grad_norm": 0.6954169273376465, "learning_rate": 5.326777683676199e-05, "loss": 1.7243, "step": 72803 }, { "epoch": 2.42, "grad_norm": 0.6953942179679871, "learning_rate": 5.326183129177455e-05, "loss": 1.7273, "step": 72804 }, { "epoch": 2.42, "grad_norm": 0.6812664270401001, "learning_rate": 5.325588604629093e-05, "loss": 1.6634, "step": 72805 }, { "epoch": 2.42, "grad_norm": 0.684982419013977, "learning_rate": 5.324994110031826e-05, "loss": 1.686, "step": 72806 }, { "epoch": 2.42, "grad_norm": 0.6883047223091125, "learning_rate": 5.324399645386387e-05, "loss": 1.6128, "step": 72807 }, { "epoch": 2.42, "grad_norm": 0.6930686831474304, "learning_rate": 5.323805210693485e-05, "loss": 1.7378, "step": 72808 }, { "epoch": 2.42, "grad_norm": 0.7066178321838379, "learning_rate": 5.323210805953856e-05, "loss": 1.6665, "step": 72809 }, { "epoch": 2.42, "grad_norm": 0.6918744444847107, "learning_rate": 5.3226164311682094e-05, "loss": 1.7142, "step": 72810 }, { "epoch": 2.42, "grad_norm": 0.7470893263816833, "learning_rate": 5.322022086337265e-05, "loss": 1.7243, "step": 72811 }, { "epoch": 2.42, "grad_norm": 0.7109397053718567, "learning_rate": 5.3214277714617595e-05, "loss": 1.6613, "step": 72812 }, { "epoch": 2.42, "grad_norm": 0.690850019454956, "learning_rate": 5.320833486542394e-05, "loss": 1.7205, "step": 72813 }, { "epoch": 2.42, "grad_norm": 0.7079710364341736, "learning_rate": 5.320239231579909e-05, "loss": 1.6956, "step": 72814 }, { "epoch": 2.42, "grad_norm": 0.6926897764205933, "learning_rate": 5.319645006575014e-05, "loss": 1.7266, "step": 72815 }, { "epoch": 2.42, "grad_norm": 0.677410364151001, "learning_rate": 5.3190508115284315e-05, "loss": 1.7242, "step": 72816 }, { "epoch": 2.42, "grad_norm": 0.6647154092788696, "learning_rate": 5.318456646440887e-05, "loss": 1.7312, "step": 72817 }, { "epoch": 2.42, "grad_norm": 0.6753436326980591, "learning_rate": 5.317862511313095e-05, "loss": 1.7118, "step": 72818 }, { "epoch": 2.42, "grad_norm": 0.6943280696868896, "learning_rate": 5.3172684061457895e-05, "loss": 1.6645, "step": 72819 }, { "epoch": 2.42, "grad_norm": 0.6796182990074158, "learning_rate": 5.316674330939674e-05, "loss": 1.6182, "step": 72820 }, { "epoch": 2.42, "grad_norm": 0.6972669363021851, "learning_rate": 5.3160802856954854e-05, "loss": 1.7386, "step": 72821 }, { "epoch": 2.42, "grad_norm": 0.6881484389305115, "learning_rate": 5.315486270413939e-05, "loss": 1.6757, "step": 72822 }, { "epoch": 2.42, "grad_norm": 0.7008281946182251, "learning_rate": 5.314892285095748e-05, "loss": 1.6583, "step": 72823 }, { "epoch": 2.42, "grad_norm": 0.6687347888946533, "learning_rate": 5.3142983297416506e-05, "loss": 1.667, "step": 72824 }, { "epoch": 2.42, "grad_norm": 0.7067926526069641, "learning_rate": 5.313704404352348e-05, "loss": 1.7286, "step": 72825 }, { "epoch": 2.42, "grad_norm": 0.6873914003372192, "learning_rate": 5.313110508928579e-05, "loss": 1.6468, "step": 72826 }, { "epoch": 2.42, "grad_norm": 0.70650315284729, "learning_rate": 5.312516643471045e-05, "loss": 1.7026, "step": 72827 }, { "epoch": 2.42, "grad_norm": 0.6846857070922852, "learning_rate": 5.311922807980491e-05, "loss": 1.7813, "step": 72828 }, { "epoch": 2.42, "grad_norm": 0.6928821206092834, "learning_rate": 5.311329002457625e-05, "loss": 1.6764, "step": 72829 }, { "epoch": 2.42, "grad_norm": 0.6925570964813232, "learning_rate": 5.310735226903156e-05, "loss": 1.6805, "step": 72830 }, { "epoch": 2.42, "grad_norm": 0.6816151738166809, "learning_rate": 5.310141481317829e-05, "loss": 1.6622, "step": 72831 }, { "epoch": 2.42, "grad_norm": 0.6959927678108215, "learning_rate": 5.309547765702348e-05, "loss": 1.6821, "step": 72832 }, { "epoch": 2.42, "grad_norm": 0.698539674282074, "learning_rate": 5.3089540800574325e-05, "loss": 1.7169, "step": 72833 }, { "epoch": 2.42, "grad_norm": 0.6882911324501038, "learning_rate": 5.30836042438381e-05, "loss": 1.676, "step": 72834 }, { "epoch": 2.42, "grad_norm": 0.7166098952293396, "learning_rate": 5.3077667986822045e-05, "loss": 1.6361, "step": 72835 }, { "epoch": 2.42, "grad_norm": 0.7130483984947205, "learning_rate": 5.3071732029533345e-05, "loss": 1.6908, "step": 72836 }, { "epoch": 2.42, "grad_norm": 0.6968268752098083, "learning_rate": 5.3065796371979105e-05, "loss": 1.7087, "step": 72837 }, { "epoch": 2.42, "grad_norm": 0.7184595465660095, "learning_rate": 5.305986101416667e-05, "loss": 1.7049, "step": 72838 }, { "epoch": 2.42, "grad_norm": 0.697752833366394, "learning_rate": 5.305392595610316e-05, "loss": 1.7388, "step": 72839 }, { "epoch": 2.42, "grad_norm": 0.6882988810539246, "learning_rate": 5.304799119779577e-05, "loss": 1.6856, "step": 72840 }, { "epoch": 2.42, "grad_norm": 0.6919761300086975, "learning_rate": 5.3042056739251696e-05, "loss": 1.6818, "step": 72841 }, { "epoch": 2.42, "grad_norm": 0.6853193044662476, "learning_rate": 5.3036122580478325e-05, "loss": 1.6407, "step": 72842 }, { "epoch": 2.42, "grad_norm": 0.6844832897186279, "learning_rate": 5.303018872148256e-05, "loss": 1.6328, "step": 72843 }, { "epoch": 2.42, "grad_norm": 0.6913328766822815, "learning_rate": 5.302425516227179e-05, "loss": 1.6891, "step": 72844 }, { "epoch": 2.42, "grad_norm": 0.6700825691223145, "learning_rate": 5.301832190285324e-05, "loss": 1.7013, "step": 72845 }, { "epoch": 2.42, "grad_norm": 0.6707890033721924, "learning_rate": 5.301238894323405e-05, "loss": 1.6561, "step": 72846 }, { "epoch": 2.42, "grad_norm": 0.6820588707923889, "learning_rate": 5.300645628342134e-05, "loss": 1.6928, "step": 72847 }, { "epoch": 2.42, "grad_norm": 0.7311347126960754, "learning_rate": 5.3000523923422435e-05, "loss": 1.7694, "step": 72848 }, { "epoch": 2.42, "grad_norm": 0.7176382541656494, "learning_rate": 5.299459186324463e-05, "loss": 1.6353, "step": 72849 }, { "epoch": 2.42, "grad_norm": 0.703184962272644, "learning_rate": 5.298866010289485e-05, "loss": 1.6821, "step": 72850 }, { "epoch": 2.42, "grad_norm": 0.68767911195755, "learning_rate": 5.2982728642380436e-05, "loss": 1.6291, "step": 72851 }, { "epoch": 2.42, "grad_norm": 0.7052034139633179, "learning_rate": 5.297679748170866e-05, "loss": 1.5834, "step": 72852 }, { "epoch": 2.42, "grad_norm": 0.7092708349227905, "learning_rate": 5.2970866620886675e-05, "loss": 1.718, "step": 72853 }, { "epoch": 2.42, "grad_norm": 0.690365731716156, "learning_rate": 5.296493605992156e-05, "loss": 1.7193, "step": 72854 }, { "epoch": 2.42, "grad_norm": 0.6838499903678894, "learning_rate": 5.2959005798820644e-05, "loss": 1.669, "step": 72855 }, { "epoch": 2.42, "grad_norm": 0.6743238568305969, "learning_rate": 5.295307583759122e-05, "loss": 1.6535, "step": 72856 }, { "epoch": 2.42, "grad_norm": 0.6851782202720642, "learning_rate": 5.294714617624022e-05, "loss": 1.6017, "step": 72857 }, { "epoch": 2.42, "grad_norm": 0.6851763725280762, "learning_rate": 5.2941216814775e-05, "loss": 1.717, "step": 72858 }, { "epoch": 2.42, "grad_norm": 0.6811671853065491, "learning_rate": 5.293528775320279e-05, "loss": 1.659, "step": 72859 }, { "epoch": 2.42, "grad_norm": 0.6693875193595886, "learning_rate": 5.292935899153076e-05, "loss": 1.681, "step": 72860 }, { "epoch": 2.42, "grad_norm": 0.7135462164878845, "learning_rate": 5.292343052976599e-05, "loss": 1.7214, "step": 72861 }, { "epoch": 2.42, "grad_norm": 0.69133061170578, "learning_rate": 5.2917502367915843e-05, "loss": 1.68, "step": 72862 }, { "epoch": 2.42, "grad_norm": 0.6905668377876282, "learning_rate": 5.2911574505987456e-05, "loss": 1.7412, "step": 72863 }, { "epoch": 2.42, "grad_norm": 0.7121127247810364, "learning_rate": 5.2905646943987946e-05, "loss": 1.7398, "step": 72864 }, { "epoch": 2.42, "grad_norm": 0.6952206492424011, "learning_rate": 5.2899719681924545e-05, "loss": 1.7602, "step": 72865 }, { "epoch": 2.42, "grad_norm": 2.0926125049591064, "learning_rate": 5.289379271980455e-05, "loss": 1.7371, "step": 72866 }, { "epoch": 2.42, "grad_norm": 0.6951467990875244, "learning_rate": 5.288786605763512e-05, "loss": 1.649, "step": 72867 }, { "epoch": 2.42, "grad_norm": 0.6830836534500122, "learning_rate": 5.288193969542328e-05, "loss": 1.7352, "step": 72868 }, { "epoch": 2.42, "grad_norm": 0.6925690770149231, "learning_rate": 5.287601363317646e-05, "loss": 1.7092, "step": 72869 }, { "epoch": 2.42, "grad_norm": 0.6677557229995728, "learning_rate": 5.287008787090176e-05, "loss": 1.7119, "step": 72870 }, { "epoch": 2.42, "grad_norm": 0.6741459965705872, "learning_rate": 5.286416240860626e-05, "loss": 1.7121, "step": 72871 }, { "epoch": 2.42, "grad_norm": 0.6923183798789978, "learning_rate": 5.285823724629736e-05, "loss": 1.7664, "step": 72872 }, { "epoch": 2.42, "grad_norm": 0.6918185353279114, "learning_rate": 5.285231238398203e-05, "loss": 1.7015, "step": 72873 }, { "epoch": 2.42, "grad_norm": 0.7043033838272095, "learning_rate": 5.284638782166769e-05, "loss": 1.7398, "step": 72874 }, { "epoch": 2.42, "grad_norm": 0.7000787258148193, "learning_rate": 5.2840463559361336e-05, "loss": 1.7744, "step": 72875 }, { "epoch": 2.42, "grad_norm": 0.6776869893074036, "learning_rate": 5.283453959707029e-05, "loss": 1.693, "step": 72876 }, { "epoch": 2.42, "grad_norm": 0.6834511160850525, "learning_rate": 5.282861593480172e-05, "loss": 1.7692, "step": 72877 }, { "epoch": 2.42, "grad_norm": 0.7180588245391846, "learning_rate": 5.282269257256272e-05, "loss": 1.6848, "step": 72878 }, { "epoch": 2.42, "grad_norm": 0.6629676222801208, "learning_rate": 5.2816769510360614e-05, "loss": 1.6806, "step": 72879 }, { "epoch": 2.42, "grad_norm": 0.6657983660697937, "learning_rate": 5.281084674820243e-05, "loss": 1.6723, "step": 72880 }, { "epoch": 2.42, "grad_norm": 0.6825478076934814, "learning_rate": 5.280492428609556e-05, "loss": 1.7231, "step": 72881 }, { "epoch": 2.42, "grad_norm": 0.685821533203125, "learning_rate": 5.279900212404711e-05, "loss": 1.7367, "step": 72882 }, { "epoch": 2.42, "grad_norm": 0.6972963213920593, "learning_rate": 5.2793080262064126e-05, "loss": 1.7204, "step": 72883 }, { "epoch": 2.42, "grad_norm": 0.7147220373153687, "learning_rate": 5.2787158700154006e-05, "loss": 1.6381, "step": 72884 }, { "epoch": 2.42, "grad_norm": 0.6899179220199585, "learning_rate": 5.2781237438323785e-05, "loss": 1.6832, "step": 72885 }, { "epoch": 2.42, "grad_norm": 0.6879131197929382, "learning_rate": 5.277531647658082e-05, "loss": 1.6618, "step": 72886 }, { "epoch": 2.42, "grad_norm": 0.7026082277297974, "learning_rate": 5.276939581493207e-05, "loss": 1.6312, "step": 72887 }, { "epoch": 2.42, "grad_norm": 0.7564396262168884, "learning_rate": 5.2763475453384965e-05, "loss": 1.7185, "step": 72888 }, { "epoch": 2.43, "grad_norm": 0.6794730424880981, "learning_rate": 5.275755539194653e-05, "loss": 1.7547, "step": 72889 }, { "epoch": 2.43, "grad_norm": 0.6728196144104004, "learning_rate": 5.2751635630623924e-05, "loss": 1.5304, "step": 72890 }, { "epoch": 2.43, "grad_norm": 0.6770370006561279, "learning_rate": 5.2745716169424515e-05, "loss": 1.7009, "step": 72891 }, { "epoch": 2.43, "grad_norm": 0.6647131443023682, "learning_rate": 5.273979700835532e-05, "loss": 1.6267, "step": 72892 }, { "epoch": 2.43, "grad_norm": 0.7066205143928528, "learning_rate": 5.273387814742355e-05, "loss": 1.7992, "step": 72893 }, { "epoch": 2.43, "grad_norm": 0.6872541904449463, "learning_rate": 5.2727959586636414e-05, "loss": 1.6609, "step": 72894 }, { "epoch": 2.43, "grad_norm": 0.6858871579170227, "learning_rate": 5.2722041326001155e-05, "loss": 1.6611, "step": 72895 }, { "epoch": 2.43, "grad_norm": 0.6713501811027527, "learning_rate": 5.271612336552492e-05, "loss": 1.6905, "step": 72896 }, { "epoch": 2.43, "grad_norm": 0.6934463381767273, "learning_rate": 5.271020570521478e-05, "loss": 1.7, "step": 72897 }, { "epoch": 2.43, "grad_norm": 0.7083551287651062, "learning_rate": 5.270428834507813e-05, "loss": 1.7138, "step": 72898 }, { "epoch": 2.43, "grad_norm": 0.6783861517906189, "learning_rate": 5.269837128512199e-05, "loss": 1.6733, "step": 72899 }, { "epoch": 2.43, "grad_norm": 0.7144894003868103, "learning_rate": 5.269245452535352e-05, "loss": 1.7018, "step": 72900 }, { "epoch": 2.43, "grad_norm": 0.6732831597328186, "learning_rate": 5.268653806577999e-05, "loss": 1.725, "step": 72901 }, { "epoch": 2.43, "grad_norm": 0.705056369304657, "learning_rate": 5.2680621906408726e-05, "loss": 1.6659, "step": 72902 }, { "epoch": 2.43, "grad_norm": 0.6923669576644897, "learning_rate": 5.2674706047246585e-05, "loss": 1.7792, "step": 72903 }, { "epoch": 2.43, "grad_norm": 0.6914911866188049, "learning_rate": 5.266879048830089e-05, "loss": 1.6887, "step": 72904 }, { "epoch": 2.43, "grad_norm": 0.6728652119636536, "learning_rate": 5.2662875229578914e-05, "loss": 1.6789, "step": 72905 }, { "epoch": 2.43, "grad_norm": 0.6809775233268738, "learning_rate": 5.265696027108778e-05, "loss": 1.6218, "step": 72906 }, { "epoch": 2.43, "grad_norm": 0.7140558362007141, "learning_rate": 5.2651045612834576e-05, "loss": 1.7438, "step": 72907 }, { "epoch": 2.43, "grad_norm": 0.6815295815467834, "learning_rate": 5.264513125482657e-05, "loss": 1.7206, "step": 72908 }, { "epoch": 2.43, "grad_norm": 0.681110143661499, "learning_rate": 5.263921719707105e-05, "loss": 1.7188, "step": 72909 }, { "epoch": 2.43, "grad_norm": 0.6893022060394287, "learning_rate": 5.263330343957495e-05, "loss": 1.6964, "step": 72910 }, { "epoch": 2.43, "grad_norm": 0.6758439540863037, "learning_rate": 5.262738998234556e-05, "loss": 1.6579, "step": 72911 }, { "epoch": 2.43, "grad_norm": 0.6900758743286133, "learning_rate": 5.262147682539014e-05, "loss": 1.6783, "step": 72912 }, { "epoch": 2.43, "grad_norm": 0.6868116855621338, "learning_rate": 5.261556396871582e-05, "loss": 1.6418, "step": 72913 }, { "epoch": 2.43, "grad_norm": 0.6812914609909058, "learning_rate": 5.260965141232967e-05, "loss": 1.6894, "step": 72914 }, { "epoch": 2.43, "grad_norm": 0.6985757946968079, "learning_rate": 5.260373915623893e-05, "loss": 1.7079, "step": 72915 }, { "epoch": 2.43, "grad_norm": 0.7312077283859253, "learning_rate": 5.259782720045088e-05, "loss": 1.6934, "step": 72916 }, { "epoch": 2.43, "grad_norm": 0.6879278421401978, "learning_rate": 5.259191554497264e-05, "loss": 1.6329, "step": 72917 }, { "epoch": 2.43, "grad_norm": 0.6890429854393005, "learning_rate": 5.258600418981127e-05, "loss": 1.734, "step": 72918 }, { "epoch": 2.43, "grad_norm": 0.6668583750724792, "learning_rate": 5.25800931349741e-05, "loss": 1.7071, "step": 72919 }, { "epoch": 2.43, "grad_norm": 0.7065051794052124, "learning_rate": 5.257418238046825e-05, "loss": 1.6943, "step": 72920 }, { "epoch": 2.43, "grad_norm": 0.6839497089385986, "learning_rate": 5.25682719263008e-05, "loss": 1.6958, "step": 72921 }, { "epoch": 2.43, "grad_norm": 0.7070481181144714, "learning_rate": 5.256236177247912e-05, "loss": 1.6617, "step": 72922 }, { "epoch": 2.43, "grad_norm": 0.6673421859741211, "learning_rate": 5.255645191901016e-05, "loss": 1.6763, "step": 72923 }, { "epoch": 2.43, "grad_norm": 0.6927294135093689, "learning_rate": 5.255054236590129e-05, "loss": 1.6806, "step": 72924 }, { "epoch": 2.43, "grad_norm": 0.6978952884674072, "learning_rate": 5.2544633113159565e-05, "loss": 1.687, "step": 72925 }, { "epoch": 2.43, "grad_norm": 0.6803932189941406, "learning_rate": 5.253872416079222e-05, "loss": 1.6555, "step": 72926 }, { "epoch": 2.43, "grad_norm": 0.6725450158119202, "learning_rate": 5.253281550880644e-05, "loss": 1.6669, "step": 72927 }, { "epoch": 2.43, "grad_norm": 0.694584310054779, "learning_rate": 5.252690715720927e-05, "loss": 1.757, "step": 72928 }, { "epoch": 2.43, "grad_norm": 0.6818670630455017, "learning_rate": 5.2520999106008035e-05, "loss": 1.5948, "step": 72929 }, { "epoch": 2.43, "grad_norm": 0.6803948283195496, "learning_rate": 5.251509135520977e-05, "loss": 1.6085, "step": 72930 }, { "epoch": 2.43, "grad_norm": 0.6918806433677673, "learning_rate": 5.2509183904821806e-05, "loss": 1.6716, "step": 72931 }, { "epoch": 2.43, "grad_norm": 0.6727715730667114, "learning_rate": 5.250327675485123e-05, "loss": 1.7109, "step": 72932 }, { "epoch": 2.43, "grad_norm": 0.6890208721160889, "learning_rate": 5.2497369905305106e-05, "loss": 1.6671, "step": 72933 }, { "epoch": 2.43, "grad_norm": 0.6886209845542908, "learning_rate": 5.2491463356190835e-05, "loss": 1.71, "step": 72934 }, { "epoch": 2.43, "grad_norm": 0.6753068566322327, "learning_rate": 5.248555710751533e-05, "loss": 1.7112, "step": 72935 }, { "epoch": 2.43, "grad_norm": 0.6825385689735413, "learning_rate": 5.2479651159285993e-05, "loss": 1.6771, "step": 72936 }, { "epoch": 2.43, "grad_norm": 0.697670042514801, "learning_rate": 5.247374551150979e-05, "loss": 1.7, "step": 72937 }, { "epoch": 2.43, "grad_norm": 0.6979504227638245, "learning_rate": 5.246784016419411e-05, "loss": 1.7943, "step": 72938 }, { "epoch": 2.43, "grad_norm": 0.7113635540008545, "learning_rate": 5.246193511734594e-05, "loss": 1.6225, "step": 72939 }, { "epoch": 2.43, "grad_norm": 0.6879031658172607, "learning_rate": 5.245603037097249e-05, "loss": 1.7143, "step": 72940 }, { "epoch": 2.43, "grad_norm": 0.6776812672615051, "learning_rate": 5.245012592508098e-05, "loss": 1.6978, "step": 72941 }, { "epoch": 2.43, "grad_norm": 0.6872809529304504, "learning_rate": 5.24442217796785e-05, "loss": 1.7409, "step": 72942 }, { "epoch": 2.43, "grad_norm": 0.688765823841095, "learning_rate": 5.243831793477231e-05, "loss": 1.7097, "step": 72943 }, { "epoch": 2.43, "grad_norm": 0.6723911166191101, "learning_rate": 5.2432414390369447e-05, "loss": 1.7697, "step": 72944 }, { "epoch": 2.43, "grad_norm": 0.6657048463821411, "learning_rate": 5.242651114647723e-05, "loss": 1.7159, "step": 72945 }, { "epoch": 2.43, "grad_norm": 0.6780710220336914, "learning_rate": 5.2420608203102766e-05, "loss": 1.7459, "step": 72946 }, { "epoch": 2.43, "grad_norm": 0.71620774269104, "learning_rate": 5.24147055602531e-05, "loss": 1.6986, "step": 72947 }, { "epoch": 2.43, "grad_norm": 0.692958652973175, "learning_rate": 5.24088032179356e-05, "loss": 1.7469, "step": 72948 }, { "epoch": 2.43, "grad_norm": 0.6865012049674988, "learning_rate": 5.240290117615733e-05, "loss": 1.6207, "step": 72949 }, { "epoch": 2.43, "grad_norm": 0.6728635430335999, "learning_rate": 5.2396999434925415e-05, "loss": 1.695, "step": 72950 }, { "epoch": 2.43, "grad_norm": 0.6785471439361572, "learning_rate": 5.239109799424701e-05, "loss": 1.6253, "step": 72951 }, { "epoch": 2.43, "grad_norm": 0.6796466112136841, "learning_rate": 5.238519685412942e-05, "loss": 1.6369, "step": 72952 }, { "epoch": 2.43, "grad_norm": 0.6877171993255615, "learning_rate": 5.237929601457973e-05, "loss": 1.6359, "step": 72953 }, { "epoch": 2.43, "grad_norm": 0.6872133612632751, "learning_rate": 5.237339547560497e-05, "loss": 1.7097, "step": 72954 }, { "epoch": 2.43, "grad_norm": 0.6913809776306152, "learning_rate": 5.236749523721254e-05, "loss": 1.6879, "step": 72955 }, { "epoch": 2.43, "grad_norm": 0.6741023659706116, "learning_rate": 5.236159529940947e-05, "loss": 1.6214, "step": 72956 }, { "epoch": 2.43, "grad_norm": 0.6941039562225342, "learning_rate": 5.235569566220287e-05, "loss": 1.7173, "step": 72957 }, { "epoch": 2.43, "grad_norm": 0.6707819700241089, "learning_rate": 5.234979632559998e-05, "loss": 1.636, "step": 72958 }, { "epoch": 2.43, "grad_norm": 0.7053519487380981, "learning_rate": 5.234389728960806e-05, "loss": 1.7118, "step": 72959 }, { "epoch": 2.43, "grad_norm": 0.6748362183570862, "learning_rate": 5.233799855423404e-05, "loss": 1.6919, "step": 72960 }, { "epoch": 2.43, "grad_norm": 0.6918641924858093, "learning_rate": 5.233210011948518e-05, "loss": 1.7254, "step": 72961 }, { "epoch": 2.43, "grad_norm": 0.6811426877975464, "learning_rate": 5.232620198536873e-05, "loss": 1.6396, "step": 72962 }, { "epoch": 2.43, "grad_norm": 0.673957347869873, "learning_rate": 5.232030415189177e-05, "loss": 1.6552, "step": 72963 }, { "epoch": 2.43, "grad_norm": 0.6832613945007324, "learning_rate": 5.2314406619061414e-05, "loss": 1.7035, "step": 72964 }, { "epoch": 2.43, "grad_norm": 0.7263044714927673, "learning_rate": 5.230850938688489e-05, "loss": 1.725, "step": 72965 }, { "epoch": 2.43, "grad_norm": 0.6825605630874634, "learning_rate": 5.230261245536946e-05, "loss": 1.7138, "step": 72966 }, { "epoch": 2.43, "grad_norm": 0.665684700012207, "learning_rate": 5.2296715824521994e-05, "loss": 1.7422, "step": 72967 }, { "epoch": 2.43, "grad_norm": 0.688630998134613, "learning_rate": 5.2290819494349844e-05, "loss": 1.7807, "step": 72968 }, { "epoch": 2.43, "grad_norm": 0.7033866047859192, "learning_rate": 5.228492346486024e-05, "loss": 1.6854, "step": 72969 }, { "epoch": 2.43, "grad_norm": 0.7016656994819641, "learning_rate": 5.2279027736060206e-05, "loss": 1.7121, "step": 72970 }, { "epoch": 2.43, "grad_norm": 0.6856069564819336, "learning_rate": 5.227313230795688e-05, "loss": 1.6895, "step": 72971 }, { "epoch": 2.43, "grad_norm": 0.6697564721107483, "learning_rate": 5.226723718055744e-05, "loss": 1.7016, "step": 72972 }, { "epoch": 2.43, "grad_norm": 0.6704697608947754, "learning_rate": 5.2261342353869226e-05, "loss": 1.6309, "step": 72973 }, { "epoch": 2.43, "grad_norm": 0.6914588212966919, "learning_rate": 5.22554478278991e-05, "loss": 1.6571, "step": 72974 }, { "epoch": 2.43, "grad_norm": 0.6945667266845703, "learning_rate": 5.224955360265438e-05, "loss": 1.6682, "step": 72975 }, { "epoch": 2.43, "grad_norm": 0.6634488105773926, "learning_rate": 5.224365967814224e-05, "loss": 1.6053, "step": 72976 }, { "epoch": 2.43, "grad_norm": 0.6965250968933105, "learning_rate": 5.223776605436979e-05, "loss": 1.7219, "step": 72977 }, { "epoch": 2.43, "grad_norm": 0.6760680675506592, "learning_rate": 5.223187273134414e-05, "loss": 1.6863, "step": 72978 }, { "epoch": 2.43, "grad_norm": 0.7179961204528809, "learning_rate": 5.222597970907254e-05, "loss": 1.7221, "step": 72979 }, { "epoch": 2.43, "grad_norm": 0.721805214881897, "learning_rate": 5.2220086987562124e-05, "loss": 1.6616, "step": 72980 }, { "epoch": 2.43, "grad_norm": 0.6786649227142334, "learning_rate": 5.221419456681989e-05, "loss": 1.6991, "step": 72981 }, { "epoch": 2.43, "grad_norm": 0.7153747081756592, "learning_rate": 5.220830244685316e-05, "loss": 1.7453, "step": 72982 }, { "epoch": 2.43, "grad_norm": 0.6960344314575195, "learning_rate": 5.220241062766909e-05, "loss": 1.7625, "step": 72983 }, { "epoch": 2.43, "grad_norm": 0.7075833678245544, "learning_rate": 5.2196519109274785e-05, "loss": 1.7408, "step": 72984 }, { "epoch": 2.43, "grad_norm": 0.6905134320259094, "learning_rate": 5.2190627891677325e-05, "loss": 1.6996, "step": 72985 }, { "epoch": 2.43, "grad_norm": 0.6870778203010559, "learning_rate": 5.218473697488399e-05, "loss": 1.6206, "step": 72986 }, { "epoch": 2.43, "grad_norm": 0.6842869520187378, "learning_rate": 5.2178846358901895e-05, "loss": 1.6028, "step": 72987 }, { "epoch": 2.43, "grad_norm": 0.7102568745613098, "learning_rate": 5.217295604373807e-05, "loss": 1.6664, "step": 72988 }, { "epoch": 2.43, "grad_norm": 0.7043604254722595, "learning_rate": 5.216706602939984e-05, "loss": 1.7595, "step": 72989 }, { "epoch": 2.43, "grad_norm": 0.6721497178077698, "learning_rate": 5.21611763158942e-05, "loss": 1.6942, "step": 72990 }, { "epoch": 2.43, "grad_norm": 0.6937899589538574, "learning_rate": 5.215528690322849e-05, "loss": 1.6764, "step": 72991 }, { "epoch": 2.43, "grad_norm": 0.6771386861801147, "learning_rate": 5.214939779140962e-05, "loss": 1.6942, "step": 72992 }, { "epoch": 2.43, "grad_norm": 0.6951701641082764, "learning_rate": 5.214350898044496e-05, "loss": 1.6484, "step": 72993 }, { "epoch": 2.43, "grad_norm": 0.6751788854598999, "learning_rate": 5.2137620470341534e-05, "loss": 1.7064, "step": 72994 }, { "epoch": 2.43, "grad_norm": 0.682914137840271, "learning_rate": 5.213173226110647e-05, "loss": 1.7048, "step": 72995 }, { "epoch": 2.43, "grad_norm": 0.6954169869422913, "learning_rate": 5.2125844352747e-05, "loss": 1.7054, "step": 72996 }, { "epoch": 2.43, "grad_norm": 0.6633629202842712, "learning_rate": 5.211995674527019e-05, "loss": 1.743, "step": 72997 }, { "epoch": 2.43, "grad_norm": 0.682001531124115, "learning_rate": 5.211406943868328e-05, "loss": 1.6667, "step": 72998 }, { "epoch": 2.43, "grad_norm": 0.6922423243522644, "learning_rate": 5.2108182432993384e-05, "loss": 1.7512, "step": 72999 }, { "epoch": 2.43, "grad_norm": 0.7040219902992249, "learning_rate": 5.210229572820756e-05, "loss": 1.6381, "step": 73000 }, { "epoch": 2.43, "grad_norm": 0.685843825340271, "learning_rate": 5.2096409324333086e-05, "loss": 1.6264, "step": 73001 }, { "epoch": 2.43, "grad_norm": 0.678297221660614, "learning_rate": 5.2090523221376975e-05, "loss": 1.6465, "step": 73002 }, { "epoch": 2.43, "grad_norm": 0.6847878694534302, "learning_rate": 5.208463741934652e-05, "loss": 1.6504, "step": 73003 }, { "epoch": 2.43, "grad_norm": 0.7016047835350037, "learning_rate": 5.2078751918248705e-05, "loss": 1.5948, "step": 73004 }, { "epoch": 2.43, "grad_norm": 0.6898634433746338, "learning_rate": 5.207286671809083e-05, "loss": 1.6381, "step": 73005 }, { "epoch": 2.43, "grad_norm": 0.6891496181488037, "learning_rate": 5.206698181887996e-05, "loss": 1.7344, "step": 73006 }, { "epoch": 2.43, "grad_norm": 0.6810516715049744, "learning_rate": 5.206109722062318e-05, "loss": 1.7092, "step": 73007 }, { "epoch": 2.43, "grad_norm": 0.6748983860015869, "learning_rate": 5.205521292332776e-05, "loss": 1.705, "step": 73008 }, { "epoch": 2.43, "grad_norm": 0.677358865737915, "learning_rate": 5.2049328927000785e-05, "loss": 1.6449, "step": 73009 }, { "epoch": 2.43, "grad_norm": 0.683741569519043, "learning_rate": 5.2043445231649295e-05, "loss": 1.6465, "step": 73010 }, { "epoch": 2.43, "grad_norm": 0.7114620804786682, "learning_rate": 5.203756183728054e-05, "loss": 1.7022, "step": 73011 }, { "epoch": 2.43, "grad_norm": 0.7020032405853271, "learning_rate": 5.203167874390175e-05, "loss": 1.6734, "step": 73012 }, { "epoch": 2.43, "grad_norm": 0.6843391060829163, "learning_rate": 5.202579595151992e-05, "loss": 1.6849, "step": 73013 }, { "epoch": 2.43, "grad_norm": 0.6992059946060181, "learning_rate": 5.2019913460142214e-05, "loss": 1.7004, "step": 73014 }, { "epoch": 2.43, "grad_norm": 0.6806438565254211, "learning_rate": 5.2014031269775825e-05, "loss": 1.6783, "step": 73015 }, { "epoch": 2.43, "grad_norm": 0.6683655381202698, "learning_rate": 5.2008149380427875e-05, "loss": 1.6589, "step": 73016 }, { "epoch": 2.43, "grad_norm": 0.6888492107391357, "learning_rate": 5.2002267792105434e-05, "loss": 1.6963, "step": 73017 }, { "epoch": 2.43, "grad_norm": 0.6959251165390015, "learning_rate": 5.199638650481569e-05, "loss": 1.6524, "step": 73018 }, { "epoch": 2.43, "grad_norm": 0.6716165542602539, "learning_rate": 5.199050551856594e-05, "loss": 1.6595, "step": 73019 }, { "epoch": 2.43, "grad_norm": 0.7036891579627991, "learning_rate": 5.198462483336301e-05, "loss": 1.6422, "step": 73020 }, { "epoch": 2.43, "grad_norm": 0.689784824848175, "learning_rate": 5.1978744449214225e-05, "loss": 1.6994, "step": 73021 }, { "epoch": 2.43, "grad_norm": 0.6870946288108826, "learning_rate": 5.197286436612675e-05, "loss": 1.7526, "step": 73022 }, { "epoch": 2.43, "grad_norm": 0.682325005531311, "learning_rate": 5.1966984584107715e-05, "loss": 1.7042, "step": 73023 }, { "epoch": 2.43, "grad_norm": 0.709061324596405, "learning_rate": 5.1961105103164113e-05, "loss": 1.7267, "step": 73024 }, { "epoch": 2.43, "grad_norm": 0.6946918368339539, "learning_rate": 5.195522592330317e-05, "loss": 1.6955, "step": 73025 }, { "epoch": 2.43, "grad_norm": 0.7265699505805969, "learning_rate": 5.194934704453221e-05, "loss": 1.6418, "step": 73026 }, { "epoch": 2.43, "grad_norm": 0.672556459903717, "learning_rate": 5.194346846685803e-05, "loss": 1.663, "step": 73027 }, { "epoch": 2.43, "grad_norm": 0.6776127219200134, "learning_rate": 5.1937590190287925e-05, "loss": 1.6621, "step": 73028 }, { "epoch": 2.43, "grad_norm": 0.6735106110572815, "learning_rate": 5.193171221482909e-05, "loss": 1.7156, "step": 73029 }, { "epoch": 2.43, "grad_norm": 0.7409499287605286, "learning_rate": 5.192583454048865e-05, "loss": 1.6811, "step": 73030 }, { "epoch": 2.43, "grad_norm": 0.7845454812049866, "learning_rate": 5.1919957167273605e-05, "loss": 1.6608, "step": 73031 }, { "epoch": 2.43, "grad_norm": 0.7036434412002563, "learning_rate": 5.191408009519118e-05, "loss": 1.6706, "step": 73032 }, { "epoch": 2.43, "grad_norm": 0.6783986687660217, "learning_rate": 5.190820332424856e-05, "loss": 1.6829, "step": 73033 }, { "epoch": 2.43, "grad_norm": 0.6952784657478333, "learning_rate": 5.190232685445286e-05, "loss": 1.7243, "step": 73034 }, { "epoch": 2.43, "grad_norm": 0.6874479651451111, "learning_rate": 5.1896450685811085e-05, "loss": 1.7309, "step": 73035 }, { "epoch": 2.43, "grad_norm": 0.7021500468254089, "learning_rate": 5.189057481833054e-05, "loss": 1.7783, "step": 73036 }, { "epoch": 2.43, "grad_norm": 0.682707667350769, "learning_rate": 5.1884699252018294e-05, "loss": 1.7045, "step": 73037 }, { "epoch": 2.43, "grad_norm": 0.6825302839279175, "learning_rate": 5.187882398688136e-05, "loss": 1.6856, "step": 73038 }, { "epoch": 2.43, "grad_norm": 0.6820144057273865, "learning_rate": 5.187294902292707e-05, "loss": 1.6038, "step": 73039 }, { "epoch": 2.43, "grad_norm": 0.7007057070732117, "learning_rate": 5.1867074360162386e-05, "loss": 1.6047, "step": 73040 }, { "epoch": 2.43, "grad_norm": 0.6876353025436401, "learning_rate": 5.186119999859457e-05, "loss": 1.6532, "step": 73041 }, { "epoch": 2.43, "grad_norm": 0.6743687987327576, "learning_rate": 5.1855325938230653e-05, "loss": 1.679, "step": 73042 }, { "epoch": 2.43, "grad_norm": 0.7013846039772034, "learning_rate": 5.184945217907786e-05, "loss": 1.7147, "step": 73043 }, { "epoch": 2.43, "grad_norm": 0.680522620677948, "learning_rate": 5.184357872114328e-05, "loss": 1.6395, "step": 73044 }, { "epoch": 2.43, "grad_norm": 0.691520631313324, "learning_rate": 5.183770556443394e-05, "loss": 1.6851, "step": 73045 }, { "epoch": 2.43, "grad_norm": 0.7092047929763794, "learning_rate": 5.1831832708957146e-05, "loss": 1.7651, "step": 73046 }, { "epoch": 2.43, "grad_norm": 0.7109431028366089, "learning_rate": 5.1825960154719884e-05, "loss": 1.753, "step": 73047 }, { "epoch": 2.43, "grad_norm": 0.7009276151657104, "learning_rate": 5.182008790172938e-05, "loss": 1.697, "step": 73048 }, { "epoch": 2.43, "grad_norm": 0.6895824074745178, "learning_rate": 5.181421594999273e-05, "loss": 1.6457, "step": 73049 }, { "epoch": 2.43, "grad_norm": 0.7111058235168457, "learning_rate": 5.1808344299517e-05, "loss": 1.7501, "step": 73050 }, { "epoch": 2.43, "grad_norm": 0.6834023594856262, "learning_rate": 5.1802472950309446e-05, "loss": 1.6775, "step": 73051 }, { "epoch": 2.43, "grad_norm": 0.6857119202613831, "learning_rate": 5.179660190237703e-05, "loss": 1.7361, "step": 73052 }, { "epoch": 2.43, "grad_norm": 0.7008468508720398, "learning_rate": 5.179073115572702e-05, "loss": 1.6628, "step": 73053 }, { "epoch": 2.43, "grad_norm": 0.6924566030502319, "learning_rate": 5.178486071036646e-05, "loss": 1.6606, "step": 73054 }, { "epoch": 2.43, "grad_norm": 0.692218005657196, "learning_rate": 5.177899056630256e-05, "loss": 1.6742, "step": 73055 }, { "epoch": 2.43, "grad_norm": 0.6841875314712524, "learning_rate": 5.177312072354237e-05, "loss": 1.7064, "step": 73056 }, { "epoch": 2.43, "grad_norm": 0.7036489248275757, "learning_rate": 5.1767251182092996e-05, "loss": 1.6815, "step": 73057 }, { "epoch": 2.43, "grad_norm": 0.6896819472312927, "learning_rate": 5.176138194196169e-05, "loss": 1.7269, "step": 73058 }, { "epoch": 2.43, "grad_norm": 0.6876707077026367, "learning_rate": 5.175551300315545e-05, "loss": 1.78, "step": 73059 }, { "epoch": 2.43, "grad_norm": 0.6771211624145508, "learning_rate": 5.17496443656814e-05, "loss": 1.6352, "step": 73060 }, { "epoch": 2.43, "grad_norm": 0.6615007519721985, "learning_rate": 5.174377602954667e-05, "loss": 1.669, "step": 73061 }, { "epoch": 2.43, "grad_norm": 0.6661427021026611, "learning_rate": 5.173790799475852e-05, "loss": 1.6459, "step": 73062 }, { "epoch": 2.43, "grad_norm": 0.6756466627120972, "learning_rate": 5.173204026132395e-05, "loss": 1.7173, "step": 73063 }, { "epoch": 2.43, "grad_norm": 0.6864740252494812, "learning_rate": 5.172617282925004e-05, "loss": 1.6903, "step": 73064 }, { "epoch": 2.43, "grad_norm": 0.6975541114807129, "learning_rate": 5.172030569854404e-05, "loss": 1.6728, "step": 73065 }, { "epoch": 2.43, "grad_norm": 0.6800289750099182, "learning_rate": 5.1714438869213025e-05, "loss": 1.7607, "step": 73066 }, { "epoch": 2.43, "grad_norm": 0.6773021221160889, "learning_rate": 5.1708572341263996e-05, "loss": 1.6707, "step": 73067 }, { "epoch": 2.43, "grad_norm": 0.6647176742553711, "learning_rate": 5.1702706114704184e-05, "loss": 1.677, "step": 73068 }, { "epoch": 2.43, "grad_norm": 0.6911803483963013, "learning_rate": 5.169684018954078e-05, "loss": 1.7071, "step": 73069 }, { "epoch": 2.43, "grad_norm": 0.7033047080039978, "learning_rate": 5.169097456578084e-05, "loss": 1.7259, "step": 73070 }, { "epoch": 2.43, "grad_norm": 0.6881948113441467, "learning_rate": 5.1685109243431396e-05, "loss": 1.7021, "step": 73071 }, { "epoch": 2.43, "grad_norm": 0.6929013729095459, "learning_rate": 5.1679244222499684e-05, "loss": 1.75, "step": 73072 }, { "epoch": 2.43, "grad_norm": 0.7273345589637756, "learning_rate": 5.167337950299282e-05, "loss": 1.6678, "step": 73073 }, { "epoch": 2.43, "grad_norm": 0.6719329953193665, "learning_rate": 5.16675150849178e-05, "loss": 1.6616, "step": 73074 }, { "epoch": 2.43, "grad_norm": 0.6925203800201416, "learning_rate": 5.166165096828182e-05, "loss": 1.6321, "step": 73075 }, { "epoch": 2.43, "grad_norm": 0.684846818447113, "learning_rate": 5.1655787153092144e-05, "loss": 1.7361, "step": 73076 }, { "epoch": 2.43, "grad_norm": 0.7126918435096741, "learning_rate": 5.16499236393556e-05, "loss": 1.7058, "step": 73077 }, { "epoch": 2.43, "grad_norm": 1.774327278137207, "learning_rate": 5.164406042707947e-05, "loss": 1.7664, "step": 73078 }, { "epoch": 2.43, "grad_norm": 0.712651789188385, "learning_rate": 5.1638197516270926e-05, "loss": 1.7283, "step": 73079 }, { "epoch": 2.43, "grad_norm": 0.6819124817848206, "learning_rate": 5.163233490693703e-05, "loss": 1.6249, "step": 73080 }, { "epoch": 2.43, "grad_norm": 0.6946941614151001, "learning_rate": 5.1626472599084775e-05, "loss": 1.6135, "step": 73081 }, { "epoch": 2.43, "grad_norm": 0.6807690262794495, "learning_rate": 5.162061059272142e-05, "loss": 1.6766, "step": 73082 }, { "epoch": 2.43, "grad_norm": 0.6840105056762695, "learning_rate": 5.161474888785419e-05, "loss": 1.6965, "step": 73083 }, { "epoch": 2.43, "grad_norm": 0.6781442165374756, "learning_rate": 5.1608887484489894e-05, "loss": 1.6877, "step": 73084 }, { "epoch": 2.43, "grad_norm": 0.6584949493408203, "learning_rate": 5.1603026382635836e-05, "loss": 1.6155, "step": 73085 }, { "epoch": 2.43, "grad_norm": 0.6762300729751587, "learning_rate": 5.159716558229916e-05, "loss": 1.5665, "step": 73086 }, { "epoch": 2.43, "grad_norm": 0.6795923113822937, "learning_rate": 5.1591305083486924e-05, "loss": 1.647, "step": 73087 }, { "epoch": 2.43, "grad_norm": 0.68593430519104, "learning_rate": 5.158544488620615e-05, "loss": 1.6723, "step": 73088 }, { "epoch": 2.43, "grad_norm": 0.7100488543510437, "learning_rate": 5.157958499046407e-05, "loss": 1.666, "step": 73089 }, { "epoch": 2.43, "grad_norm": 0.6718873977661133, "learning_rate": 5.157372539626792e-05, "loss": 1.6891, "step": 73090 }, { "epoch": 2.43, "grad_norm": 0.6892979145050049, "learning_rate": 5.156786610362451e-05, "loss": 1.6545, "step": 73091 }, { "epoch": 2.43, "grad_norm": 0.6824881434440613, "learning_rate": 5.156200711254111e-05, "loss": 1.6578, "step": 73092 }, { "epoch": 2.43, "grad_norm": 0.7371289730072021, "learning_rate": 5.155614842302489e-05, "loss": 1.6916, "step": 73093 }, { "epoch": 2.43, "grad_norm": 0.7134585976600647, "learning_rate": 5.15502900350829e-05, "loss": 1.7061, "step": 73094 }, { "epoch": 2.43, "grad_norm": 0.7010003924369812, "learning_rate": 5.15444319487222e-05, "loss": 1.7019, "step": 73095 }, { "epoch": 2.43, "grad_norm": 0.6987026929855347, "learning_rate": 5.153857416394999e-05, "loss": 1.6163, "step": 73096 }, { "epoch": 2.43, "grad_norm": 0.6926729083061218, "learning_rate": 5.1532716680773355e-05, "loss": 1.7316, "step": 73097 }, { "epoch": 2.43, "grad_norm": 0.6894696950912476, "learning_rate": 5.152685949919933e-05, "loss": 1.6604, "step": 73098 }, { "epoch": 2.43, "grad_norm": 0.7109692096710205, "learning_rate": 5.152100261923509e-05, "loss": 1.6623, "step": 73099 }, { "epoch": 2.43, "grad_norm": 0.7085791826248169, "learning_rate": 5.15151460408878e-05, "loss": 1.7688, "step": 73100 }, { "epoch": 2.43, "grad_norm": 0.6784034371376038, "learning_rate": 5.150928976416451e-05, "loss": 1.707, "step": 73101 }, { "epoch": 2.43, "grad_norm": 0.7050397992134094, "learning_rate": 5.150343378907227e-05, "loss": 1.7363, "step": 73102 }, { "epoch": 2.43, "grad_norm": 0.684100866317749, "learning_rate": 5.1497578115618276e-05, "loss": 1.6777, "step": 73103 }, { "epoch": 2.43, "grad_norm": 0.6722529530525208, "learning_rate": 5.149172274380966e-05, "loss": 1.6695, "step": 73104 }, { "epoch": 2.43, "grad_norm": 0.9238205552101135, "learning_rate": 5.148586767365338e-05, "loss": 1.8071, "step": 73105 }, { "epoch": 2.43, "grad_norm": 0.6822800636291504, "learning_rate": 5.148001290515673e-05, "loss": 1.7639, "step": 73106 }, { "epoch": 2.43, "grad_norm": 0.6958147883415222, "learning_rate": 5.147415843832664e-05, "loss": 1.7474, "step": 73107 }, { "epoch": 2.43, "grad_norm": 0.7022578716278076, "learning_rate": 5.146830427317037e-05, "loss": 1.6983, "step": 73108 }, { "epoch": 2.43, "grad_norm": 0.6901521682739258, "learning_rate": 5.1462450409694875e-05, "loss": 1.6742, "step": 73109 }, { "epoch": 2.43, "grad_norm": 0.6998498439788818, "learning_rate": 5.145659684790746e-05, "loss": 1.696, "step": 73110 }, { "epoch": 2.43, "grad_norm": 0.7065697908401489, "learning_rate": 5.145074358781508e-05, "loss": 1.643, "step": 73111 }, { "epoch": 2.43, "grad_norm": 0.68584144115448, "learning_rate": 5.144489062942483e-05, "loss": 1.6884, "step": 73112 }, { "epoch": 2.43, "grad_norm": 0.6797589659690857, "learning_rate": 5.14390379727439e-05, "loss": 1.673, "step": 73113 }, { "epoch": 2.43, "grad_norm": 0.701902449131012, "learning_rate": 5.143318561777929e-05, "loss": 1.6961, "step": 73114 }, { "epoch": 2.43, "grad_norm": 0.7230439782142639, "learning_rate": 5.142733356453826e-05, "loss": 1.7564, "step": 73115 }, { "epoch": 2.43, "grad_norm": 0.681847095489502, "learning_rate": 5.142148181302781e-05, "loss": 1.6561, "step": 73116 }, { "epoch": 2.43, "grad_norm": 0.6953100562095642, "learning_rate": 5.141563036325499e-05, "loss": 1.6633, "step": 73117 }, { "epoch": 2.43, "grad_norm": 0.7059060335159302, "learning_rate": 5.140977921522703e-05, "loss": 1.7158, "step": 73118 }, { "epoch": 2.43, "grad_norm": 0.7212308049201965, "learning_rate": 5.14039283689509e-05, "loss": 1.7365, "step": 73119 }, { "epoch": 2.43, "grad_norm": 0.7089345455169678, "learning_rate": 5.1398077824433846e-05, "loss": 1.6824, "step": 73120 }, { "epoch": 2.43, "grad_norm": 0.6777195334434509, "learning_rate": 5.139222758168281e-05, "loss": 1.7275, "step": 73121 }, { "epoch": 2.43, "grad_norm": 0.6999771595001221, "learning_rate": 5.138637764070504e-05, "loss": 1.6269, "step": 73122 }, { "epoch": 2.43, "grad_norm": 0.7102387547492981, "learning_rate": 5.1380528001507614e-05, "loss": 1.6881, "step": 73123 }, { "epoch": 2.43, "grad_norm": 0.683141827583313, "learning_rate": 5.1374678664097515e-05, "loss": 1.7164, "step": 73124 }, { "epoch": 2.43, "grad_norm": 0.6756493449211121, "learning_rate": 5.1368829628482e-05, "loss": 1.662, "step": 73125 }, { "epoch": 2.43, "grad_norm": 0.6841011643409729, "learning_rate": 5.136298089466807e-05, "loss": 1.7398, "step": 73126 }, { "epoch": 2.43, "grad_norm": 0.6784676313400269, "learning_rate": 5.135713246266279e-05, "loss": 1.7025, "step": 73127 }, { "epoch": 2.43, "grad_norm": 0.6641285419464111, "learning_rate": 5.135128433247331e-05, "loss": 1.6798, "step": 73128 }, { "epoch": 2.43, "grad_norm": 0.7020003199577332, "learning_rate": 5.1345436504106796e-05, "loss": 1.6624, "step": 73129 }, { "epoch": 2.43, "grad_norm": 0.688291609287262, "learning_rate": 5.133958897757028e-05, "loss": 1.6764, "step": 73130 }, { "epoch": 2.43, "grad_norm": 0.705883264541626, "learning_rate": 5.1333741752870814e-05, "loss": 1.6936, "step": 73131 }, { "epoch": 2.43, "grad_norm": 0.7080049514770508, "learning_rate": 5.1327894830015605e-05, "loss": 1.6766, "step": 73132 }, { "epoch": 2.43, "grad_norm": 0.7039139270782471, "learning_rate": 5.13220482090117e-05, "loss": 1.694, "step": 73133 }, { "epoch": 2.43, "grad_norm": 0.6828378438949585, "learning_rate": 5.131620188986611e-05, "loss": 1.7064, "step": 73134 }, { "epoch": 2.43, "grad_norm": 0.6949616074562073, "learning_rate": 5.1310355872586006e-05, "loss": 1.6419, "step": 73135 }, { "epoch": 2.43, "grad_norm": 0.6707980632781982, "learning_rate": 5.1304510157178636e-05, "loss": 1.6875, "step": 73136 }, { "epoch": 2.43, "grad_norm": 0.6995662450790405, "learning_rate": 5.129866474365082e-05, "loss": 1.7395, "step": 73137 }, { "epoch": 2.43, "grad_norm": 0.6850489377975464, "learning_rate": 5.1292819632009754e-05, "loss": 1.6806, "step": 73138 }, { "epoch": 2.43, "grad_norm": 0.6836333274841309, "learning_rate": 5.128697482226266e-05, "loss": 1.671, "step": 73139 }, { "epoch": 2.43, "grad_norm": 0.6731790900230408, "learning_rate": 5.128113031441651e-05, "loss": 1.6556, "step": 73140 }, { "epoch": 2.43, "grad_norm": 0.7325288653373718, "learning_rate": 5.127528610847835e-05, "loss": 1.6956, "step": 73141 }, { "epoch": 2.43, "grad_norm": 0.6912621259689331, "learning_rate": 5.126944220445536e-05, "loss": 1.6443, "step": 73142 }, { "epoch": 2.43, "grad_norm": 0.7127199172973633, "learning_rate": 5.126359860235475e-05, "loss": 1.6782, "step": 73143 }, { "epoch": 2.43, "grad_norm": 0.6804683804512024, "learning_rate": 5.1257755302183326e-05, "loss": 1.6599, "step": 73144 }, { "epoch": 2.43, "grad_norm": 0.7044811248779297, "learning_rate": 5.1251912303948336e-05, "loss": 1.7012, "step": 73145 }, { "epoch": 2.43, "grad_norm": 0.7044432759284973, "learning_rate": 5.1246069607656956e-05, "loss": 1.6686, "step": 73146 }, { "epoch": 2.43, "grad_norm": 0.6870283484458923, "learning_rate": 5.1240227213316207e-05, "loss": 1.7314, "step": 73147 }, { "epoch": 2.43, "grad_norm": 0.6884680390357971, "learning_rate": 5.123438512093309e-05, "loss": 1.6868, "step": 73148 }, { "epoch": 2.43, "grad_norm": 0.6931136846542358, "learning_rate": 5.122854333051478e-05, "loss": 1.6643, "step": 73149 }, { "epoch": 2.43, "grad_norm": 0.6805862784385681, "learning_rate": 5.122270184206842e-05, "loss": 1.7294, "step": 73150 }, { "epoch": 2.43, "grad_norm": 0.686944842338562, "learning_rate": 5.1216860655601046e-05, "loss": 1.6454, "step": 73151 }, { "epoch": 2.43, "grad_norm": 0.6896535754203796, "learning_rate": 5.12110197711197e-05, "loss": 1.5946, "step": 73152 }, { "epoch": 2.43, "grad_norm": 0.6919295787811279, "learning_rate": 5.120517918863155e-05, "loss": 1.6816, "step": 73153 }, { "epoch": 2.43, "grad_norm": 0.7168680429458618, "learning_rate": 5.119933890814369e-05, "loss": 1.6556, "step": 73154 }, { "epoch": 2.43, "grad_norm": 0.6704691052436829, "learning_rate": 5.119349892966309e-05, "loss": 1.7361, "step": 73155 }, { "epoch": 2.43, "grad_norm": 0.7109168767929077, "learning_rate": 5.1187659253197e-05, "loss": 1.6651, "step": 73156 }, { "epoch": 2.43, "grad_norm": 0.7310999035835266, "learning_rate": 5.118181987875235e-05, "loss": 1.6607, "step": 73157 }, { "epoch": 2.43, "grad_norm": 0.7043155431747437, "learning_rate": 5.117598080633637e-05, "loss": 1.6857, "step": 73158 }, { "epoch": 2.43, "grad_norm": 0.6997424960136414, "learning_rate": 5.117014203595602e-05, "loss": 1.7528, "step": 73159 }, { "epoch": 2.43, "grad_norm": 0.6846282482147217, "learning_rate": 5.116430356761856e-05, "loss": 1.7486, "step": 73160 }, { "epoch": 2.43, "grad_norm": 0.7192201614379883, "learning_rate": 5.115846540133092e-05, "loss": 1.7323, "step": 73161 }, { "epoch": 2.43, "grad_norm": 0.6922786831855774, "learning_rate": 5.1152627537100186e-05, "loss": 1.709, "step": 73162 }, { "epoch": 2.43, "grad_norm": 0.6693793535232544, "learning_rate": 5.11467899749336e-05, "loss": 1.7204, "step": 73163 }, { "epoch": 2.43, "grad_norm": 0.6958885788917542, "learning_rate": 5.1140952714838046e-05, "loss": 1.6589, "step": 73164 }, { "epoch": 2.43, "grad_norm": 0.6959773302078247, "learning_rate": 5.113511575682075e-05, "loss": 1.7041, "step": 73165 }, { "epoch": 2.43, "grad_norm": 0.7110029458999634, "learning_rate": 5.112927910088879e-05, "loss": 1.7758, "step": 73166 }, { "epoch": 2.43, "grad_norm": 0.6606324911117554, "learning_rate": 5.112344274704914e-05, "loss": 1.6665, "step": 73167 }, { "epoch": 2.43, "grad_norm": 0.695932149887085, "learning_rate": 5.1117606695309034e-05, "loss": 1.6595, "step": 73168 }, { "epoch": 2.43, "grad_norm": 0.6821403503417969, "learning_rate": 5.11117709456754e-05, "loss": 1.7324, "step": 73169 }, { "epoch": 2.43, "grad_norm": 0.7020477056503296, "learning_rate": 5.11059354981555e-05, "loss": 1.7122, "step": 73170 }, { "epoch": 2.43, "grad_norm": 0.6901096701622009, "learning_rate": 5.110010035275624e-05, "loss": 1.7223, "step": 73171 }, { "epoch": 2.43, "grad_norm": 0.7080155611038208, "learning_rate": 5.109426550948482e-05, "loss": 1.6726, "step": 73172 }, { "epoch": 2.43, "grad_norm": 0.6824295520782471, "learning_rate": 5.1088430968348325e-05, "loss": 1.6942, "step": 73173 }, { "epoch": 2.43, "grad_norm": 0.7050294876098633, "learning_rate": 5.108259672935371e-05, "loss": 1.6695, "step": 73174 }, { "epoch": 2.43, "grad_norm": 0.6838391423225403, "learning_rate": 5.107676279250823e-05, "loss": 1.708, "step": 73175 }, { "epoch": 2.43, "grad_norm": 0.7129815816879272, "learning_rate": 5.1070929157818886e-05, "loss": 1.6992, "step": 73176 }, { "epoch": 2.43, "grad_norm": 0.6812149286270142, "learning_rate": 5.106509582529267e-05, "loss": 1.6523, "step": 73177 }, { "epoch": 2.43, "grad_norm": 0.7017987370491028, "learning_rate": 5.105926279493677e-05, "loss": 1.7089, "step": 73178 }, { "epoch": 2.43, "grad_norm": 0.6693187355995178, "learning_rate": 5.1053430066758295e-05, "loss": 1.6502, "step": 73179 }, { "epoch": 2.43, "grad_norm": 0.6945956349372864, "learning_rate": 5.104759764076429e-05, "loss": 1.7257, "step": 73180 }, { "epoch": 2.43, "grad_norm": 0.7043402194976807, "learning_rate": 5.104176551696173e-05, "loss": 1.696, "step": 73181 }, { "epoch": 2.43, "grad_norm": 0.690030574798584, "learning_rate": 5.103593369535787e-05, "loss": 1.7019, "step": 73182 }, { "epoch": 2.43, "grad_norm": 0.6674339175224304, "learning_rate": 5.10301021759597e-05, "loss": 1.5959, "step": 73183 }, { "epoch": 2.43, "grad_norm": 0.6912776827812195, "learning_rate": 5.102427095877425e-05, "loss": 1.6073, "step": 73184 }, { "epoch": 2.43, "grad_norm": 0.6826736330986023, "learning_rate": 5.1018440043808614e-05, "loss": 1.6901, "step": 73185 }, { "epoch": 2.43, "grad_norm": 0.6951713562011719, "learning_rate": 5.101260943106999e-05, "loss": 1.7203, "step": 73186 }, { "epoch": 2.43, "grad_norm": 0.6972103714942932, "learning_rate": 5.100677912056538e-05, "loss": 1.6577, "step": 73187 }, { "epoch": 2.43, "grad_norm": 0.7171435356140137, "learning_rate": 5.100094911230179e-05, "loss": 1.6999, "step": 73188 }, { "epoch": 2.44, "grad_norm": 0.6855535507202148, "learning_rate": 5.09951194062864e-05, "loss": 1.6231, "step": 73189 }, { "epoch": 2.44, "grad_norm": 0.6915012001991272, "learning_rate": 5.098929000252626e-05, "loss": 1.7292, "step": 73190 }, { "epoch": 2.44, "grad_norm": 0.7002233862876892, "learning_rate": 5.098346090102837e-05, "loss": 1.6982, "step": 73191 }, { "epoch": 2.44, "grad_norm": 0.7003093361854553, "learning_rate": 5.097763210179986e-05, "loss": 1.6998, "step": 73192 }, { "epoch": 2.44, "grad_norm": 0.6966067552566528, "learning_rate": 5.097180360484798e-05, "loss": 1.7069, "step": 73193 }, { "epoch": 2.44, "grad_norm": 0.6938731074333191, "learning_rate": 5.096597541017947e-05, "loss": 1.7223, "step": 73194 }, { "epoch": 2.44, "grad_norm": 0.6901427507400513, "learning_rate": 5.096014751780158e-05, "loss": 1.7253, "step": 73195 }, { "epoch": 2.44, "grad_norm": 0.6739990711212158, "learning_rate": 5.0954319927721455e-05, "loss": 1.6644, "step": 73196 }, { "epoch": 2.44, "grad_norm": 0.6685588359832764, "learning_rate": 5.094849263994607e-05, "loss": 1.6685, "step": 73197 }, { "epoch": 2.44, "grad_norm": 0.6955553293228149, "learning_rate": 5.0942665654482463e-05, "loss": 1.7092, "step": 73198 }, { "epoch": 2.44, "grad_norm": 0.7041650414466858, "learning_rate": 5.093683897133776e-05, "loss": 1.7085, "step": 73199 }, { "epoch": 2.44, "grad_norm": 0.671680212020874, "learning_rate": 5.0931012590519147e-05, "loss": 1.6846, "step": 73200 }, { "epoch": 2.44, "grad_norm": 0.6619162559509277, "learning_rate": 5.09251865120335e-05, "loss": 1.7092, "step": 73201 }, { "epoch": 2.44, "grad_norm": 0.6961961984634399, "learning_rate": 5.0919360735887936e-05, "loss": 1.7013, "step": 73202 }, { "epoch": 2.44, "grad_norm": 0.6967931985855103, "learning_rate": 5.0913535262089655e-05, "loss": 1.732, "step": 73203 }, { "epoch": 2.44, "grad_norm": 0.6830366849899292, "learning_rate": 5.0907710090645645e-05, "loss": 1.6815, "step": 73204 }, { "epoch": 2.44, "grad_norm": 0.6713084578514099, "learning_rate": 5.0901885221562874e-05, "loss": 1.725, "step": 73205 }, { "epoch": 2.44, "grad_norm": 0.6964318752288818, "learning_rate": 5.089606065484854e-05, "loss": 1.7856, "step": 73206 }, { "epoch": 2.44, "grad_norm": 0.6897856593132019, "learning_rate": 5.089023639050982e-05, "loss": 1.6245, "step": 73207 }, { "epoch": 2.44, "grad_norm": 0.6681669354438782, "learning_rate": 5.0884412428553524e-05, "loss": 1.6825, "step": 73208 }, { "epoch": 2.44, "grad_norm": 0.7157998085021973, "learning_rate": 5.087858876898681e-05, "loss": 1.7188, "step": 73209 }, { "epoch": 2.44, "grad_norm": 0.6740090250968933, "learning_rate": 5.08727654118169e-05, "loss": 1.6219, "step": 73210 }, { "epoch": 2.44, "grad_norm": 0.7010376453399658, "learning_rate": 5.086694235705069e-05, "loss": 1.7057, "step": 73211 }, { "epoch": 2.44, "grad_norm": 0.6910302042961121, "learning_rate": 5.0861119604695275e-05, "loss": 1.5963, "step": 73212 }, { "epoch": 2.44, "grad_norm": 0.707146167755127, "learning_rate": 5.0855297154757825e-05, "loss": 1.7548, "step": 73213 }, { "epoch": 2.44, "grad_norm": 0.6873700022697449, "learning_rate": 5.084947500724529e-05, "loss": 1.7075, "step": 73214 }, { "epoch": 2.44, "grad_norm": 0.6877347230911255, "learning_rate": 5.084365316216476e-05, "loss": 1.7917, "step": 73215 }, { "epoch": 2.44, "grad_norm": 0.692747175693512, "learning_rate": 5.083783161952329e-05, "loss": 1.6958, "step": 73216 }, { "epoch": 2.44, "grad_norm": 0.6901633739471436, "learning_rate": 5.083201037932807e-05, "loss": 1.6555, "step": 73217 }, { "epoch": 2.44, "grad_norm": 0.7172887921333313, "learning_rate": 5.082618944158605e-05, "loss": 1.6948, "step": 73218 }, { "epoch": 2.44, "grad_norm": 0.691644012928009, "learning_rate": 5.082036880630429e-05, "loss": 1.728, "step": 73219 }, { "epoch": 2.44, "grad_norm": 0.6775521636009216, "learning_rate": 5.0814548473489944e-05, "loss": 1.7442, "step": 73220 }, { "epoch": 2.44, "grad_norm": 0.7011662125587463, "learning_rate": 5.0808728443150017e-05, "loss": 1.7004, "step": 73221 }, { "epoch": 2.44, "grad_norm": 0.6776384115219116, "learning_rate": 5.080290871529149e-05, "loss": 1.7181, "step": 73222 }, { "epoch": 2.44, "grad_norm": 0.6891046762466431, "learning_rate": 5.07970892899216e-05, "loss": 1.6774, "step": 73223 }, { "epoch": 2.44, "grad_norm": 0.675447404384613, "learning_rate": 5.079127016704727e-05, "loss": 1.684, "step": 73224 }, { "epoch": 2.44, "grad_norm": 0.6922623515129089, "learning_rate": 5.078545134667567e-05, "loss": 1.6742, "step": 73225 }, { "epoch": 2.44, "grad_norm": 0.6844106316566467, "learning_rate": 5.077963282881374e-05, "loss": 1.7493, "step": 73226 }, { "epoch": 2.44, "grad_norm": 0.6795786023139954, "learning_rate": 5.07738146134687e-05, "loss": 1.719, "step": 73227 }, { "epoch": 2.44, "grad_norm": 0.6947584748268127, "learning_rate": 5.076799670064753e-05, "loss": 1.6161, "step": 73228 }, { "epoch": 2.44, "grad_norm": 0.6738013029098511, "learning_rate": 5.076217909035719e-05, "loss": 1.6834, "step": 73229 }, { "epoch": 2.44, "grad_norm": 0.6639829277992249, "learning_rate": 5.075636178260492e-05, "loss": 1.6637, "step": 73230 }, { "epoch": 2.44, "grad_norm": 0.6905461549758911, "learning_rate": 5.075054477739766e-05, "loss": 1.6687, "step": 73231 }, { "epoch": 2.44, "grad_norm": 0.6976491212844849, "learning_rate": 5.074472807474256e-05, "loss": 1.686, "step": 73232 }, { "epoch": 2.44, "grad_norm": 0.6933711171150208, "learning_rate": 5.073891167464663e-05, "loss": 1.6591, "step": 73233 }, { "epoch": 2.44, "grad_norm": 0.6815624237060547, "learning_rate": 5.0733095577116876e-05, "loss": 1.6656, "step": 73234 }, { "epoch": 2.44, "grad_norm": 0.7074236273765564, "learning_rate": 5.072727978216049e-05, "loss": 1.7246, "step": 73235 }, { "epoch": 2.44, "grad_norm": 0.6868471503257751, "learning_rate": 5.0721464289784364e-05, "loss": 1.6468, "step": 73236 }, { "epoch": 2.44, "grad_norm": 0.7027183175086975, "learning_rate": 5.0715649099995756e-05, "loss": 1.6485, "step": 73237 }, { "epoch": 2.44, "grad_norm": 0.705864429473877, "learning_rate": 5.0709834212801503e-05, "loss": 1.6978, "step": 73238 }, { "epoch": 2.44, "grad_norm": 0.7480143904685974, "learning_rate": 5.070401962820889e-05, "loss": 1.6786, "step": 73239 }, { "epoch": 2.44, "grad_norm": 0.6852670311927795, "learning_rate": 5.0698205346224855e-05, "loss": 1.6775, "step": 73240 }, { "epoch": 2.44, "grad_norm": 0.677809476852417, "learning_rate": 5.069239136685641e-05, "loss": 1.6072, "step": 73241 }, { "epoch": 2.44, "grad_norm": 0.6669097542762756, "learning_rate": 5.068657769011073e-05, "loss": 1.6314, "step": 73242 }, { "epoch": 2.44, "grad_norm": 0.6952655911445618, "learning_rate": 5.06807643159948e-05, "loss": 1.6712, "step": 73243 }, { "epoch": 2.44, "grad_norm": 0.701077938079834, "learning_rate": 5.0674951244515594e-05, "loss": 1.6941, "step": 73244 }, { "epoch": 2.44, "grad_norm": 0.6965044140815735, "learning_rate": 5.0669138475680296e-05, "loss": 1.7381, "step": 73245 }, { "epoch": 2.44, "grad_norm": 0.6840152144432068, "learning_rate": 5.0663326009495996e-05, "loss": 1.6568, "step": 73246 }, { "epoch": 2.44, "grad_norm": 0.7034324407577515, "learning_rate": 5.0657513845969666e-05, "loss": 1.5862, "step": 73247 }, { "epoch": 2.44, "grad_norm": 0.6934021711349487, "learning_rate": 5.0651701985108294e-05, "loss": 1.7033, "step": 73248 }, { "epoch": 2.44, "grad_norm": 0.7158743739128113, "learning_rate": 5.064589042691911e-05, "loss": 1.7733, "step": 73249 }, { "epoch": 2.44, "grad_norm": 0.7119390964508057, "learning_rate": 5.0640079171409076e-05, "loss": 1.6121, "step": 73250 }, { "epoch": 2.44, "grad_norm": 0.6816079616546631, "learning_rate": 5.063426821858515e-05, "loss": 1.737, "step": 73251 }, { "epoch": 2.44, "grad_norm": 0.6845149993896484, "learning_rate": 5.06284575684545e-05, "loss": 1.681, "step": 73252 }, { "epoch": 2.44, "grad_norm": 0.6617811322212219, "learning_rate": 5.0622647221024304e-05, "loss": 1.7407, "step": 73253 }, { "epoch": 2.44, "grad_norm": 0.6923492550849915, "learning_rate": 5.06168371763013e-05, "loss": 1.7129, "step": 73254 }, { "epoch": 2.44, "grad_norm": 0.7156015634536743, "learning_rate": 5.061102743429276e-05, "loss": 1.6889, "step": 73255 }, { "epoch": 2.44, "grad_norm": 0.6979950666427612, "learning_rate": 5.060521799500573e-05, "loss": 1.7072, "step": 73256 }, { "epoch": 2.44, "grad_norm": 0.7020596265792847, "learning_rate": 5.059940885844721e-05, "loss": 1.6754, "step": 73257 }, { "epoch": 2.44, "grad_norm": 0.6820397973060608, "learning_rate": 5.059360002462419e-05, "loss": 1.6577, "step": 73258 }, { "epoch": 2.44, "grad_norm": 0.7069578170776367, "learning_rate": 5.0587791493543804e-05, "loss": 1.7318, "step": 73259 }, { "epoch": 2.44, "grad_norm": 0.7074630260467529, "learning_rate": 5.058198326521314e-05, "loss": 1.6877, "step": 73260 }, { "epoch": 2.44, "grad_norm": 0.674778401851654, "learning_rate": 5.0576175339639236e-05, "loss": 1.6697, "step": 73261 }, { "epoch": 2.44, "grad_norm": 0.6724551916122437, "learning_rate": 5.057036771682901e-05, "loss": 1.6847, "step": 73262 }, { "epoch": 2.44, "grad_norm": 0.689305305480957, "learning_rate": 5.0564560396789654e-05, "loss": 1.6624, "step": 73263 }, { "epoch": 2.44, "grad_norm": 0.7102029323577881, "learning_rate": 5.05587533795282e-05, "loss": 1.7116, "step": 73264 }, { "epoch": 2.44, "grad_norm": 0.6876596212387085, "learning_rate": 5.0552946665051585e-05, "loss": 1.6825, "step": 73265 }, { "epoch": 2.44, "grad_norm": 0.67855304479599, "learning_rate": 5.054714025336696e-05, "loss": 1.6692, "step": 73266 }, { "epoch": 2.44, "grad_norm": 0.7262876629829407, "learning_rate": 5.054133414448138e-05, "loss": 1.7172, "step": 73267 }, { "epoch": 2.44, "grad_norm": 0.6934457421302795, "learning_rate": 5.053552833840192e-05, "loss": 1.7291, "step": 73268 }, { "epoch": 2.44, "grad_norm": 0.6866839528083801, "learning_rate": 5.052972283513547e-05, "loss": 1.6484, "step": 73269 }, { "epoch": 2.44, "grad_norm": 0.6876323223114014, "learning_rate": 5.052391763468925e-05, "loss": 1.7025, "step": 73270 }, { "epoch": 2.44, "grad_norm": 0.6853994131088257, "learning_rate": 5.051811273707023e-05, "loss": 1.6708, "step": 73271 }, { "epoch": 2.44, "grad_norm": 0.6932074427604675, "learning_rate": 5.05123081422854e-05, "loss": 1.6586, "step": 73272 }, { "epoch": 2.44, "grad_norm": 0.6673623323440552, "learning_rate": 5.0506503850341915e-05, "loss": 1.6095, "step": 73273 }, { "epoch": 2.44, "grad_norm": 0.7070350050926208, "learning_rate": 5.0500699861246716e-05, "loss": 1.7098, "step": 73274 }, { "epoch": 2.44, "grad_norm": 0.6885842680931091, "learning_rate": 5.0494896175006994e-05, "loss": 1.6528, "step": 73275 }, { "epoch": 2.44, "grad_norm": 0.7100939154624939, "learning_rate": 5.04890927916296e-05, "loss": 1.6216, "step": 73276 }, { "epoch": 2.44, "grad_norm": 0.6811309456825256, "learning_rate": 5.0483289711121776e-05, "loss": 1.6868, "step": 73277 }, { "epoch": 2.44, "grad_norm": 0.6989665627479553, "learning_rate": 5.0477486933490475e-05, "loss": 1.7108, "step": 73278 }, { "epoch": 2.44, "grad_norm": 0.6765867471694946, "learning_rate": 5.0471684458742686e-05, "loss": 1.6311, "step": 73279 }, { "epoch": 2.44, "grad_norm": 0.6676040291786194, "learning_rate": 5.046588228688555e-05, "loss": 1.6722, "step": 73280 }, { "epoch": 2.44, "grad_norm": 0.7094101309776306, "learning_rate": 5.046008041792599e-05, "loss": 1.7987, "step": 73281 }, { "epoch": 2.44, "grad_norm": 0.7221723198890686, "learning_rate": 5.045427885187122e-05, "loss": 1.7453, "step": 73282 }, { "epoch": 2.44, "grad_norm": 0.6908959746360779, "learning_rate": 5.0448477588728184e-05, "loss": 1.692, "step": 73283 }, { "epoch": 2.44, "grad_norm": 0.6709319949150085, "learning_rate": 5.044267662850384e-05, "loss": 1.733, "step": 73284 }, { "epoch": 2.44, "grad_norm": 0.6648443937301636, "learning_rate": 5.043687597120538e-05, "loss": 1.6868, "step": 73285 }, { "epoch": 2.44, "grad_norm": 0.6868659853935242, "learning_rate": 5.0431075616839734e-05, "loss": 1.7032, "step": 73286 }, { "epoch": 2.44, "grad_norm": 0.662774384021759, "learning_rate": 5.042527556541406e-05, "loss": 1.6927, "step": 73287 }, { "epoch": 2.44, "grad_norm": 0.6736000180244446, "learning_rate": 5.041947581693525e-05, "loss": 1.7335, "step": 73288 }, { "epoch": 2.44, "grad_norm": 0.693508505821228, "learning_rate": 5.041367637141047e-05, "loss": 1.722, "step": 73289 }, { "epoch": 2.44, "grad_norm": 0.6713233590126038, "learning_rate": 5.0407877228846746e-05, "loss": 1.6455, "step": 73290 }, { "epoch": 2.44, "grad_norm": 0.700373649597168, "learning_rate": 5.040207838925101e-05, "loss": 1.7351, "step": 73291 }, { "epoch": 2.44, "grad_norm": 0.6799409985542297, "learning_rate": 5.039627985263042e-05, "loss": 1.6184, "step": 73292 }, { "epoch": 2.44, "grad_norm": 0.6801879405975342, "learning_rate": 5.0390481618992e-05, "loss": 1.7359, "step": 73293 }, { "epoch": 2.44, "grad_norm": 0.6799410581588745, "learning_rate": 5.0384683688342645e-05, "loss": 1.7608, "step": 73294 }, { "epoch": 2.44, "grad_norm": 0.7254124283790588, "learning_rate": 5.0378886060689546e-05, "loss": 1.7317, "step": 73295 }, { "epoch": 2.44, "grad_norm": 0.6792027950286865, "learning_rate": 5.037308873603974e-05, "loss": 1.7692, "step": 73296 }, { "epoch": 2.44, "grad_norm": 0.6830548048019409, "learning_rate": 5.036729171440026e-05, "loss": 1.7552, "step": 73297 }, { "epoch": 2.44, "grad_norm": 0.6922897696495056, "learning_rate": 5.036149499577798e-05, "loss": 1.7491, "step": 73298 }, { "epoch": 2.44, "grad_norm": 0.6757487058639526, "learning_rate": 5.035569858018018e-05, "loss": 1.7644, "step": 73299 }, { "epoch": 2.44, "grad_norm": 0.6873278021812439, "learning_rate": 5.034990246761378e-05, "loss": 1.6956, "step": 73300 }, { "epoch": 2.44, "grad_norm": 0.6768357157707214, "learning_rate": 5.03441066580857e-05, "loss": 1.7051, "step": 73301 }, { "epoch": 2.44, "grad_norm": 0.7163870334625244, "learning_rate": 5.033831115160312e-05, "loss": 1.6842, "step": 73302 }, { "epoch": 2.44, "grad_norm": 0.6732951998710632, "learning_rate": 5.0332515948173114e-05, "loss": 1.6444, "step": 73303 }, { "epoch": 2.44, "grad_norm": 0.699842631816864, "learning_rate": 5.0326721047802655e-05, "loss": 1.7232, "step": 73304 }, { "epoch": 2.44, "grad_norm": 0.6773042678833008, "learning_rate": 5.03209264504987e-05, "loss": 1.5883, "step": 73305 }, { "epoch": 2.44, "grad_norm": 0.6848878264427185, "learning_rate": 5.0315132156268414e-05, "loss": 1.7004, "step": 73306 }, { "epoch": 2.44, "grad_norm": 0.6576807498931885, "learning_rate": 5.030933816511875e-05, "loss": 1.7001, "step": 73307 }, { "epoch": 2.44, "grad_norm": 0.6744813919067383, "learning_rate": 5.030354447705671e-05, "loss": 1.6615, "step": 73308 }, { "epoch": 2.44, "grad_norm": 0.6748952865600586, "learning_rate": 5.029775109208936e-05, "loss": 1.6725, "step": 73309 }, { "epoch": 2.44, "grad_norm": 0.670859158039093, "learning_rate": 5.029195801022391e-05, "loss": 1.7088, "step": 73310 }, { "epoch": 2.44, "grad_norm": 0.7333173155784607, "learning_rate": 5.028616523146707e-05, "loss": 1.754, "step": 73311 }, { "epoch": 2.44, "grad_norm": 0.6856951713562012, "learning_rate": 5.0280372755826056e-05, "loss": 1.6587, "step": 73312 }, { "epoch": 2.44, "grad_norm": 0.6798608899116516, "learning_rate": 5.027458058330791e-05, "loss": 1.5875, "step": 73313 }, { "epoch": 2.44, "grad_norm": 0.6888847351074219, "learning_rate": 5.026878871391965e-05, "loss": 1.7423, "step": 73314 }, { "epoch": 2.44, "grad_norm": 0.7026357650756836, "learning_rate": 5.02629971476682e-05, "loss": 1.7376, "step": 73315 }, { "epoch": 2.44, "grad_norm": 0.6872622966766357, "learning_rate": 5.0257205884560706e-05, "loss": 1.6363, "step": 73316 }, { "epoch": 2.44, "grad_norm": 0.6789929866790771, "learning_rate": 5.02514149246043e-05, "loss": 1.7592, "step": 73317 }, { "epoch": 2.44, "grad_norm": 0.6819055676460266, "learning_rate": 5.024562426780571e-05, "loss": 1.6751, "step": 73318 }, { "epoch": 2.44, "grad_norm": 0.7007361054420471, "learning_rate": 5.0239833914172136e-05, "loss": 1.6309, "step": 73319 }, { "epoch": 2.44, "grad_norm": 0.7089422345161438, "learning_rate": 5.0234043863710685e-05, "loss": 1.7043, "step": 73320 }, { "epoch": 2.44, "grad_norm": 0.7069922089576721, "learning_rate": 5.022825411642831e-05, "loss": 1.7208, "step": 73321 }, { "epoch": 2.44, "grad_norm": 0.6931411027908325, "learning_rate": 5.022246467233195e-05, "loss": 1.737, "step": 73322 }, { "epoch": 2.44, "grad_norm": 0.6989437937736511, "learning_rate": 5.021667553142873e-05, "loss": 1.7415, "step": 73323 }, { "epoch": 2.44, "grad_norm": 0.7061290740966797, "learning_rate": 5.0210886693725804e-05, "loss": 1.6713, "step": 73324 }, { "epoch": 2.44, "grad_norm": 0.682086169719696, "learning_rate": 5.020509815922991e-05, "loss": 1.6162, "step": 73325 }, { "epoch": 2.44, "grad_norm": 0.6929695010185242, "learning_rate": 5.019930992794819e-05, "loss": 1.6786, "step": 73326 }, { "epoch": 2.44, "grad_norm": 0.679203987121582, "learning_rate": 5.01935219998878e-05, "loss": 1.5976, "step": 73327 }, { "epoch": 2.44, "grad_norm": 0.6912415027618408, "learning_rate": 5.018773437505568e-05, "loss": 1.7, "step": 73328 }, { "epoch": 2.44, "grad_norm": 0.6759152412414551, "learning_rate": 5.018194705345876e-05, "loss": 1.6845, "step": 73329 }, { "epoch": 2.44, "grad_norm": 0.714043140411377, "learning_rate": 5.017616003510421e-05, "loss": 1.6749, "step": 73330 }, { "epoch": 2.44, "grad_norm": 0.6795060634613037, "learning_rate": 5.017037331999898e-05, "loss": 1.6806, "step": 73331 }, { "epoch": 2.44, "grad_norm": 0.6777815818786621, "learning_rate": 5.016458690815007e-05, "loss": 1.6769, "step": 73332 }, { "epoch": 2.44, "grad_norm": 0.6912646889686584, "learning_rate": 5.0158800799564514e-05, "loss": 1.7424, "step": 73333 }, { "epoch": 2.44, "grad_norm": 0.7075477242469788, "learning_rate": 5.015301499424943e-05, "loss": 1.6103, "step": 73334 }, { "epoch": 2.44, "grad_norm": 0.7114583849906921, "learning_rate": 5.014722949221178e-05, "loss": 1.7349, "step": 73335 }, { "epoch": 2.44, "grad_norm": 0.679799497127533, "learning_rate": 5.014144429345853e-05, "loss": 1.6818, "step": 73336 }, { "epoch": 2.44, "grad_norm": 0.687376856803894, "learning_rate": 5.0135659397996777e-05, "loss": 1.6674, "step": 73337 }, { "epoch": 2.44, "grad_norm": 0.6905207633972168, "learning_rate": 5.012987480583357e-05, "loss": 1.6663, "step": 73338 }, { "epoch": 2.44, "grad_norm": 0.7064173817634583, "learning_rate": 5.012409051697578e-05, "loss": 1.7704, "step": 73339 }, { "epoch": 2.44, "grad_norm": 0.7007768154144287, "learning_rate": 5.01183065314306e-05, "loss": 1.7111, "step": 73340 }, { "epoch": 2.44, "grad_norm": 0.680994987487793, "learning_rate": 5.011252284920488e-05, "loss": 1.6958, "step": 73341 }, { "epoch": 2.44, "grad_norm": 0.7042906880378723, "learning_rate": 5.0106739470305834e-05, "loss": 1.6768, "step": 73342 }, { "epoch": 2.44, "grad_norm": 0.7020841240882874, "learning_rate": 5.0100956394740345e-05, "loss": 1.6315, "step": 73343 }, { "epoch": 2.44, "grad_norm": 0.6840879917144775, "learning_rate": 5.009517362251551e-05, "loss": 1.6347, "step": 73344 }, { "epoch": 2.44, "grad_norm": 0.6871973872184753, "learning_rate": 5.0089391153638316e-05, "loss": 1.7259, "step": 73345 }, { "epoch": 2.44, "grad_norm": 0.7024253606796265, "learning_rate": 5.0083608988115695e-05, "loss": 1.7533, "step": 73346 }, { "epoch": 2.44, "grad_norm": 0.7026190161705017, "learning_rate": 5.007782712595485e-05, "loss": 1.6992, "step": 73347 }, { "epoch": 2.44, "grad_norm": 0.7154495716094971, "learning_rate": 5.0072045567162596e-05, "loss": 1.7085, "step": 73348 }, { "epoch": 2.44, "grad_norm": 0.6890376210212708, "learning_rate": 5.006626431174614e-05, "loss": 1.7057, "step": 73349 }, { "epoch": 2.44, "grad_norm": 0.7119331955909729, "learning_rate": 5.0060483359712436e-05, "loss": 1.6915, "step": 73350 }, { "epoch": 2.44, "grad_norm": 0.6802571415901184, "learning_rate": 5.005470271106837e-05, "loss": 1.7071, "step": 73351 }, { "epoch": 2.44, "grad_norm": 0.6563369035720825, "learning_rate": 5.004892236582115e-05, "loss": 1.69, "step": 73352 }, { "epoch": 2.44, "grad_norm": 0.688072144985199, "learning_rate": 5.004314232397764e-05, "loss": 1.6604, "step": 73353 }, { "epoch": 2.44, "grad_norm": 0.6852834820747375, "learning_rate": 5.003736258554501e-05, "loss": 1.6542, "step": 73354 }, { "epoch": 2.44, "grad_norm": 0.6877303123474121, "learning_rate": 5.003158315053012e-05, "loss": 1.6777, "step": 73355 }, { "epoch": 2.44, "grad_norm": 0.6866706609725952, "learning_rate": 5.0025804018940106e-05, "loss": 1.6788, "step": 73356 }, { "epoch": 2.44, "grad_norm": 1.382692575454712, "learning_rate": 5.002002519078194e-05, "loss": 1.7388, "step": 73357 }, { "epoch": 2.44, "grad_norm": 0.698917806148529, "learning_rate": 5.001424666606259e-05, "loss": 1.7463, "step": 73358 }, { "epoch": 2.44, "grad_norm": 0.7089646458625793, "learning_rate": 5.000846844478913e-05, "loss": 1.6896, "step": 73359 }, { "epoch": 2.44, "grad_norm": 0.7102279663085938, "learning_rate": 5.00026905269686e-05, "loss": 1.6039, "step": 73360 }, { "epoch": 2.44, "grad_norm": 0.6800761222839355, "learning_rate": 4.999691291260787e-05, "loss": 1.7424, "step": 73361 }, { "epoch": 2.44, "grad_norm": 0.7298165559768677, "learning_rate": 4.999113560171405e-05, "loss": 1.727, "step": 73362 }, { "epoch": 2.44, "grad_norm": 0.6942014098167419, "learning_rate": 4.998535859429424e-05, "loss": 1.7459, "step": 73363 }, { "epoch": 2.44, "grad_norm": 0.696328341960907, "learning_rate": 4.997958189035538e-05, "loss": 1.6949, "step": 73364 }, { "epoch": 2.44, "grad_norm": 0.7118577361106873, "learning_rate": 4.997380548990438e-05, "loss": 1.6973, "step": 73365 }, { "epoch": 2.44, "grad_norm": 0.6998316645622253, "learning_rate": 4.9968029392948416e-05, "loss": 1.6215, "step": 73366 }, { "epoch": 2.44, "grad_norm": 0.7051795125007629, "learning_rate": 4.9962253599494405e-05, "loss": 1.6266, "step": 73367 }, { "epoch": 2.44, "grad_norm": 0.679562509059906, "learning_rate": 4.9956478109549314e-05, "loss": 1.7019, "step": 73368 }, { "epoch": 2.44, "grad_norm": 0.720504879951477, "learning_rate": 4.995070292312023e-05, "loss": 1.698, "step": 73369 }, { "epoch": 2.44, "grad_norm": 0.6694462299346924, "learning_rate": 4.994492804021429e-05, "loss": 1.6989, "step": 73370 }, { "epoch": 2.44, "grad_norm": 0.7029463052749634, "learning_rate": 4.993915346083822e-05, "loss": 1.6995, "step": 73371 }, { "epoch": 2.44, "grad_norm": 0.6852996349334717, "learning_rate": 4.993337918499918e-05, "loss": 1.7089, "step": 73372 }, { "epoch": 2.44, "grad_norm": 0.7036537528038025, "learning_rate": 4.992760521270422e-05, "loss": 1.7239, "step": 73373 }, { "epoch": 2.44, "grad_norm": 0.694262683391571, "learning_rate": 4.992183154396032e-05, "loss": 1.6523, "step": 73374 }, { "epoch": 2.44, "grad_norm": 0.676174521446228, "learning_rate": 4.99160581787744e-05, "loss": 1.6887, "step": 73375 }, { "epoch": 2.44, "grad_norm": 0.6831384301185608, "learning_rate": 4.9910285117153514e-05, "loss": 1.6826, "step": 73376 }, { "epoch": 2.44, "grad_norm": 0.6809321045875549, "learning_rate": 4.9904512359104764e-05, "loss": 1.7076, "step": 73377 }, { "epoch": 2.44, "grad_norm": 0.6891891956329346, "learning_rate": 4.989873990463511e-05, "loss": 1.6329, "step": 73378 }, { "epoch": 2.44, "grad_norm": 0.6957841515541077, "learning_rate": 4.9892967753751446e-05, "loss": 1.7653, "step": 73379 }, { "epoch": 2.44, "grad_norm": 0.6975158452987671, "learning_rate": 4.988719590646097e-05, "loss": 1.6849, "step": 73380 }, { "epoch": 2.44, "grad_norm": 0.6869127750396729, "learning_rate": 4.988142436277056e-05, "loss": 1.6647, "step": 73381 }, { "epoch": 2.44, "grad_norm": 0.7012449502944946, "learning_rate": 4.9875653122687165e-05, "loss": 1.6966, "step": 73382 }, { "epoch": 2.44, "grad_norm": 0.6880602240562439, "learning_rate": 4.98698821862179e-05, "loss": 1.6771, "step": 73383 }, { "epoch": 2.44, "grad_norm": 0.6966996788978577, "learning_rate": 4.98641115533698e-05, "loss": 1.6418, "step": 73384 }, { "epoch": 2.44, "grad_norm": 0.7080075740814209, "learning_rate": 4.985834122414982e-05, "loss": 1.7445, "step": 73385 }, { "epoch": 2.44, "grad_norm": 0.6807339191436768, "learning_rate": 4.9852571198564864e-05, "loss": 1.6876, "step": 73386 }, { "epoch": 2.44, "grad_norm": 0.688585102558136, "learning_rate": 4.984680147662215e-05, "loss": 1.6627, "step": 73387 }, { "epoch": 2.44, "grad_norm": 0.6868810653686523, "learning_rate": 4.98410320583285e-05, "loss": 1.7272, "step": 73388 }, { "epoch": 2.44, "grad_norm": 0.6667605042457581, "learning_rate": 4.9835262943690956e-05, "loss": 1.6947, "step": 73389 }, { "epoch": 2.44, "grad_norm": 0.704668402671814, "learning_rate": 4.9829494132716576e-05, "loss": 1.6885, "step": 73390 }, { "epoch": 2.44, "grad_norm": 0.7117520570755005, "learning_rate": 4.982372562541228e-05, "loss": 1.7555, "step": 73391 }, { "epoch": 2.44, "grad_norm": 0.68754643201828, "learning_rate": 4.981795742178517e-05, "loss": 1.6481, "step": 73392 }, { "epoch": 2.44, "grad_norm": 0.7060611844062805, "learning_rate": 4.981218952184214e-05, "loss": 1.6132, "step": 73393 }, { "epoch": 2.44, "grad_norm": 0.643901526927948, "learning_rate": 4.980642192559034e-05, "loss": 1.6778, "step": 73394 }, { "epoch": 2.44, "grad_norm": 0.6922232508659363, "learning_rate": 4.980065463303665e-05, "loss": 1.6917, "step": 73395 }, { "epoch": 2.44, "grad_norm": 0.6955579519271851, "learning_rate": 4.9794887644188056e-05, "loss": 1.687, "step": 73396 }, { "epoch": 2.44, "grad_norm": 0.6622439622879028, "learning_rate": 4.978912095905164e-05, "loss": 1.69, "step": 73397 }, { "epoch": 2.44, "grad_norm": 0.7026205658912659, "learning_rate": 4.978335457763432e-05, "loss": 1.6677, "step": 73398 }, { "epoch": 2.44, "grad_norm": 0.7033646702766418, "learning_rate": 4.97775884999432e-05, "loss": 1.7084, "step": 73399 }, { "epoch": 2.44, "grad_norm": 0.7010971903800964, "learning_rate": 4.977182272598522e-05, "loss": 1.6977, "step": 73400 }, { "epoch": 2.44, "grad_norm": 0.7063126564025879, "learning_rate": 4.976605725576731e-05, "loss": 1.6608, "step": 73401 }, { "epoch": 2.44, "grad_norm": 0.6988269090652466, "learning_rate": 4.9760292089296595e-05, "loss": 1.6785, "step": 73402 }, { "epoch": 2.44, "grad_norm": 0.6680318713188171, "learning_rate": 4.9754527226579965e-05, "loss": 1.7231, "step": 73403 }, { "epoch": 2.44, "grad_norm": 0.6844674348831177, "learning_rate": 4.974876266762452e-05, "loss": 1.7151, "step": 73404 }, { "epoch": 2.44, "grad_norm": 0.6854211688041687, "learning_rate": 4.9742998412437116e-05, "loss": 1.7106, "step": 73405 }, { "epoch": 2.44, "grad_norm": 0.692411482334137, "learning_rate": 4.973723446102496e-05, "loss": 1.6443, "step": 73406 }, { "epoch": 2.44, "grad_norm": 0.6784524321556091, "learning_rate": 4.973147081339489e-05, "loss": 1.6606, "step": 73407 }, { "epoch": 2.44, "grad_norm": 0.736433207988739, "learning_rate": 4.9725707469553856e-05, "loss": 1.7774, "step": 73408 }, { "epoch": 2.44, "grad_norm": 0.6691540479660034, "learning_rate": 4.971994442950904e-05, "loss": 1.6721, "step": 73409 }, { "epoch": 2.44, "grad_norm": 0.6842760443687439, "learning_rate": 4.9714181693267343e-05, "loss": 1.6841, "step": 73410 }, { "epoch": 2.44, "grad_norm": 0.6847749352455139, "learning_rate": 4.970841926083565e-05, "loss": 1.715, "step": 73411 }, { "epoch": 2.44, "grad_norm": 0.6916078329086304, "learning_rate": 4.970265713222107e-05, "loss": 1.6798, "step": 73412 }, { "epoch": 2.44, "grad_norm": 0.6944678425788879, "learning_rate": 4.969689530743065e-05, "loss": 1.6778, "step": 73413 }, { "epoch": 2.44, "grad_norm": 0.6894753575325012, "learning_rate": 4.9691133786471324e-05, "loss": 1.6827, "step": 73414 }, { "epoch": 2.44, "grad_norm": 0.7133691906929016, "learning_rate": 4.968537256934999e-05, "loss": 1.7577, "step": 73415 }, { "epoch": 2.44, "grad_norm": 0.7056384682655334, "learning_rate": 4.9679611656073836e-05, "loss": 1.7129, "step": 73416 }, { "epoch": 2.44, "grad_norm": 0.6681767106056213, "learning_rate": 4.967385104664973e-05, "loss": 1.6927, "step": 73417 }, { "epoch": 2.44, "grad_norm": 0.6931259036064148, "learning_rate": 4.966809074108462e-05, "loss": 1.6461, "step": 73418 }, { "epoch": 2.44, "grad_norm": 0.6665962338447571, "learning_rate": 4.9662330739385545e-05, "loss": 1.7348, "step": 73419 }, { "epoch": 2.44, "grad_norm": 0.6638720631599426, "learning_rate": 4.96565710415597e-05, "loss": 1.7093, "step": 73420 }, { "epoch": 2.44, "grad_norm": 0.7016064524650574, "learning_rate": 4.965081164761371e-05, "loss": 1.6506, "step": 73421 }, { "epoch": 2.44, "grad_norm": 0.6929439902305603, "learning_rate": 4.964505255755477e-05, "loss": 1.6715, "step": 73422 }, { "epoch": 2.44, "grad_norm": 0.696506679058075, "learning_rate": 4.96392937713899e-05, "loss": 1.7271, "step": 73423 }, { "epoch": 2.44, "grad_norm": 0.7088775038719177, "learning_rate": 4.963353528912604e-05, "loss": 1.6734, "step": 73424 }, { "epoch": 2.44, "grad_norm": 0.6949090361595154, "learning_rate": 4.962777711077011e-05, "loss": 1.6291, "step": 73425 }, { "epoch": 2.44, "grad_norm": 0.6892646551132202, "learning_rate": 4.962201923632917e-05, "loss": 1.6157, "step": 73426 }, { "epoch": 2.44, "grad_norm": 0.7186436057090759, "learning_rate": 4.9616261665810385e-05, "loss": 1.6878, "step": 73427 }, { "epoch": 2.44, "grad_norm": 0.7074036598205566, "learning_rate": 4.9610504399220384e-05, "loss": 1.7646, "step": 73428 }, { "epoch": 2.44, "grad_norm": 0.6628060936927795, "learning_rate": 4.960474743656633e-05, "loss": 1.6329, "step": 73429 }, { "epoch": 2.44, "grad_norm": 0.6894570589065552, "learning_rate": 4.959899077785531e-05, "loss": 1.694, "step": 73430 }, { "epoch": 2.44, "grad_norm": 0.7217908501625061, "learning_rate": 4.959323442309422e-05, "loss": 1.686, "step": 73431 }, { "epoch": 2.44, "grad_norm": 0.6952286958694458, "learning_rate": 4.958747837228996e-05, "loss": 1.6989, "step": 73432 }, { "epoch": 2.44, "grad_norm": 0.7216404676437378, "learning_rate": 4.9581722625449616e-05, "loss": 1.7425, "step": 73433 }, { "epoch": 2.44, "grad_norm": 0.7094366550445557, "learning_rate": 4.957596718258032e-05, "loss": 1.7105, "step": 73434 }, { "epoch": 2.44, "grad_norm": 0.7117392420768738, "learning_rate": 4.957021204368874e-05, "loss": 1.7518, "step": 73435 }, { "epoch": 2.44, "grad_norm": 0.6742904186248779, "learning_rate": 4.9564457208782026e-05, "loss": 1.6635, "step": 73436 }, { "epoch": 2.44, "grad_norm": 0.6978154182434082, "learning_rate": 4.955870267786724e-05, "loss": 1.6974, "step": 73437 }, { "epoch": 2.44, "grad_norm": 0.699178159236908, "learning_rate": 4.955294845095129e-05, "loss": 1.7295, "step": 73438 }, { "epoch": 2.44, "grad_norm": 0.6764411926269531, "learning_rate": 4.954719452804105e-05, "loss": 1.6149, "step": 73439 }, { "epoch": 2.44, "grad_norm": 0.7061502933502197, "learning_rate": 4.9541440909143673e-05, "loss": 1.6845, "step": 73440 }, { "epoch": 2.44, "grad_norm": 0.725189745426178, "learning_rate": 4.953568759426619e-05, "loss": 1.6322, "step": 73441 }, { "epoch": 2.44, "grad_norm": 0.6722719669342041, "learning_rate": 4.952993458341536e-05, "loss": 1.6818, "step": 73442 }, { "epoch": 2.44, "grad_norm": 0.7094718813896179, "learning_rate": 4.952418187659824e-05, "loss": 1.6707, "step": 73443 }, { "epoch": 2.44, "grad_norm": 0.7049251794815063, "learning_rate": 4.9518429473821966e-05, "loss": 1.6455, "step": 73444 }, { "epoch": 2.44, "grad_norm": 0.7316127419471741, "learning_rate": 4.9512677375093404e-05, "loss": 1.7483, "step": 73445 }, { "epoch": 2.44, "grad_norm": 0.728076696395874, "learning_rate": 4.9506925580419454e-05, "loss": 1.6833, "step": 73446 }, { "epoch": 2.44, "grad_norm": 0.6735599040985107, "learning_rate": 4.950117408980728e-05, "loss": 1.6498, "step": 73447 }, { "epoch": 2.44, "grad_norm": 0.7045480012893677, "learning_rate": 4.949542290326378e-05, "loss": 1.6935, "step": 73448 }, { "epoch": 2.44, "grad_norm": 0.6767830848693848, "learning_rate": 4.948967202079584e-05, "loss": 1.6889, "step": 73449 }, { "epoch": 2.44, "grad_norm": 0.6878503561019897, "learning_rate": 4.948392144241053e-05, "loss": 1.6605, "step": 73450 }, { "epoch": 2.44, "grad_norm": 0.6847764849662781, "learning_rate": 4.9478171168114875e-05, "loss": 1.6325, "step": 73451 }, { "epoch": 2.44, "grad_norm": 0.7037824392318726, "learning_rate": 4.9472421197915836e-05, "loss": 1.6839, "step": 73452 }, { "epoch": 2.44, "grad_norm": 0.6930233836174011, "learning_rate": 4.94666715318203e-05, "loss": 1.7474, "step": 73453 }, { "epoch": 2.44, "grad_norm": 0.6782791614532471, "learning_rate": 4.946092216983537e-05, "loss": 1.606, "step": 73454 }, { "epoch": 2.44, "grad_norm": 0.702548086643219, "learning_rate": 4.945517311196797e-05, "loss": 1.6902, "step": 73455 }, { "epoch": 2.44, "grad_norm": 0.7049299478530884, "learning_rate": 4.9449424358225e-05, "loss": 1.6614, "step": 73456 }, { "epoch": 2.44, "grad_norm": 0.6841565370559692, "learning_rate": 4.944367590861358e-05, "loss": 1.7213, "step": 73457 }, { "epoch": 2.44, "grad_norm": 0.7065320014953613, "learning_rate": 4.943792776314054e-05, "loss": 1.5962, "step": 73458 }, { "epoch": 2.44, "grad_norm": 0.6982358694076538, "learning_rate": 4.943217992181301e-05, "loss": 1.741, "step": 73459 }, { "epoch": 2.44, "grad_norm": 0.6604446768760681, "learning_rate": 4.942643238463785e-05, "loss": 1.6868, "step": 73460 }, { "epoch": 2.44, "grad_norm": 0.6768797636032104, "learning_rate": 4.942068515162212e-05, "loss": 1.6591, "step": 73461 }, { "epoch": 2.44, "grad_norm": 0.700386643409729, "learning_rate": 4.941493822277278e-05, "loss": 1.6965, "step": 73462 }, { "epoch": 2.44, "grad_norm": 0.6866461634635925, "learning_rate": 4.9409191598096696e-05, "loss": 1.7134, "step": 73463 }, { "epoch": 2.44, "grad_norm": 0.7012050747871399, "learning_rate": 4.940344527760099e-05, "loss": 1.6902, "step": 73464 }, { "epoch": 2.44, "grad_norm": 0.6730672717094421, "learning_rate": 4.9397699261292534e-05, "loss": 1.6988, "step": 73465 }, { "epoch": 2.44, "grad_norm": 0.6927890181541443, "learning_rate": 4.939195354917841e-05, "loss": 1.6775, "step": 73466 }, { "epoch": 2.44, "grad_norm": 0.6886725425720215, "learning_rate": 4.938620814126551e-05, "loss": 1.6186, "step": 73467 }, { "epoch": 2.44, "grad_norm": 0.6793418526649475, "learning_rate": 4.938046303756078e-05, "loss": 1.7345, "step": 73468 }, { "epoch": 2.44, "grad_norm": 0.660080075263977, "learning_rate": 4.93747182380713e-05, "loss": 1.6584, "step": 73469 }, { "epoch": 2.44, "grad_norm": 0.695807933807373, "learning_rate": 4.936897374280393e-05, "loss": 1.7389, "step": 73470 }, { "epoch": 2.44, "grad_norm": 0.7063855528831482, "learning_rate": 4.936322955176574e-05, "loss": 1.6633, "step": 73471 }, { "epoch": 2.44, "grad_norm": 0.6756291389465332, "learning_rate": 4.935748566496362e-05, "loss": 1.6886, "step": 73472 }, { "epoch": 2.44, "grad_norm": 0.6800594329833984, "learning_rate": 4.935174208240462e-05, "loss": 1.6967, "step": 73473 }, { "epoch": 2.44, "grad_norm": 0.6919628977775574, "learning_rate": 4.934599880409569e-05, "loss": 1.5684, "step": 73474 }, { "epoch": 2.44, "grad_norm": 0.6699585914611816, "learning_rate": 4.934025583004374e-05, "loss": 1.6908, "step": 73475 }, { "epoch": 2.44, "grad_norm": 0.677201509475708, "learning_rate": 4.9334513160255803e-05, "loss": 1.7151, "step": 73476 }, { "epoch": 2.44, "grad_norm": 0.6836492419242859, "learning_rate": 4.932877079473888e-05, "loss": 1.6649, "step": 73477 }, { "epoch": 2.44, "grad_norm": 0.7066587805747986, "learning_rate": 4.9323028733499794e-05, "loss": 1.7226, "step": 73478 }, { "epoch": 2.44, "grad_norm": 0.6891037821769714, "learning_rate": 4.931728697654563e-05, "loss": 1.7155, "step": 73479 }, { "epoch": 2.44, "grad_norm": 0.6972061395645142, "learning_rate": 4.9311545523883434e-05, "loss": 1.6707, "step": 73480 }, { "epoch": 2.44, "grad_norm": 0.7006077170372009, "learning_rate": 4.930580437552005e-05, "loss": 1.6998, "step": 73481 }, { "epoch": 2.44, "grad_norm": 0.6891232132911682, "learning_rate": 4.930006353146242e-05, "loss": 1.7067, "step": 73482 }, { "epoch": 2.44, "grad_norm": 0.7297952771186829, "learning_rate": 4.929432299171766e-05, "loss": 1.8139, "step": 73483 }, { "epoch": 2.44, "grad_norm": 0.7001866698265076, "learning_rate": 4.928858275629264e-05, "loss": 1.7215, "step": 73484 }, { "epoch": 2.44, "grad_norm": 0.7027458548545837, "learning_rate": 4.9282842825194246e-05, "loss": 1.739, "step": 73485 }, { "epoch": 2.44, "grad_norm": 0.686488926410675, "learning_rate": 4.927710319842958e-05, "loss": 1.7389, "step": 73486 }, { "epoch": 2.44, "grad_norm": 0.6782940626144409, "learning_rate": 4.92713638760057e-05, "loss": 1.6842, "step": 73487 }, { "epoch": 2.44, "grad_norm": 0.6565384864807129, "learning_rate": 4.92656248579293e-05, "loss": 1.6181, "step": 73488 }, { "epoch": 2.44, "grad_norm": 0.678080677986145, "learning_rate": 4.9259886144207475e-05, "loss": 1.6866, "step": 73489 }, { "epoch": 2.45, "grad_norm": 0.6750538349151611, "learning_rate": 4.9254147734847284e-05, "loss": 1.6136, "step": 73490 }, { "epoch": 2.45, "grad_norm": 0.6814192533493042, "learning_rate": 4.924840962985559e-05, "loss": 1.6024, "step": 73491 }, { "epoch": 2.45, "grad_norm": 0.6987127661705017, "learning_rate": 4.924267182923932e-05, "loss": 1.7486, "step": 73492 }, { "epoch": 2.45, "grad_norm": 0.6884859800338745, "learning_rate": 4.9236934333005503e-05, "loss": 1.7352, "step": 73493 }, { "epoch": 2.45, "grad_norm": 0.6637763977050781, "learning_rate": 4.9231197141161174e-05, "loss": 1.6683, "step": 73494 }, { "epoch": 2.45, "grad_norm": 0.6871649026870728, "learning_rate": 4.922546025371321e-05, "loss": 1.658, "step": 73495 }, { "epoch": 2.45, "grad_norm": 0.698493242263794, "learning_rate": 4.921972367066855e-05, "loss": 1.6211, "step": 73496 }, { "epoch": 2.45, "grad_norm": 0.6601073741912842, "learning_rate": 4.921398739203423e-05, "loss": 1.7028, "step": 73497 }, { "epoch": 2.45, "grad_norm": 0.6920794248580933, "learning_rate": 4.920825141781719e-05, "loss": 1.6925, "step": 73498 }, { "epoch": 2.45, "grad_norm": 0.686784565448761, "learning_rate": 4.9202515748024306e-05, "loss": 1.7855, "step": 73499 }, { "epoch": 2.45, "grad_norm": 0.6849403381347656, "learning_rate": 4.919678038266264e-05, "loss": 1.6641, "step": 73500 }, { "epoch": 2.45, "grad_norm": 0.7073407769203186, "learning_rate": 4.919104532173918e-05, "loss": 1.7144, "step": 73501 }, { "epoch": 2.45, "grad_norm": 0.6942046880722046, "learning_rate": 4.9185310565260816e-05, "loss": 1.6761, "step": 73502 }, { "epoch": 2.45, "grad_norm": 0.7231078743934631, "learning_rate": 4.917957611323449e-05, "loss": 1.7451, "step": 73503 }, { "epoch": 2.45, "grad_norm": 0.7038949131965637, "learning_rate": 4.917384196566725e-05, "loss": 1.6849, "step": 73504 }, { "epoch": 2.45, "grad_norm": 0.6915410757064819, "learning_rate": 4.916810812256603e-05, "loss": 1.7312, "step": 73505 }, { "epoch": 2.45, "grad_norm": 0.6854826211929321, "learning_rate": 4.9162374583937695e-05, "loss": 1.7226, "step": 73506 }, { "epoch": 2.45, "grad_norm": 0.7195367813110352, "learning_rate": 4.9156641349789325e-05, "loss": 1.738, "step": 73507 }, { "epoch": 2.45, "grad_norm": 0.6770864725112915, "learning_rate": 4.9150908420127765e-05, "loss": 1.6615, "step": 73508 }, { "epoch": 2.45, "grad_norm": 0.6654442548751831, "learning_rate": 4.9145175794960136e-05, "loss": 1.6331, "step": 73509 }, { "epoch": 2.45, "grad_norm": 0.6871151924133301, "learning_rate": 4.913944347429323e-05, "loss": 1.6817, "step": 73510 }, { "epoch": 2.45, "grad_norm": 0.7062146067619324, "learning_rate": 4.9133711458134104e-05, "loss": 1.665, "step": 73511 }, { "epoch": 2.45, "grad_norm": 0.6977320313453674, "learning_rate": 4.9127979746489734e-05, "loss": 1.6801, "step": 73512 }, { "epoch": 2.45, "grad_norm": 0.6999074220657349, "learning_rate": 4.912224833936696e-05, "loss": 1.6826, "step": 73513 }, { "epoch": 2.45, "grad_norm": 0.675774872303009, "learning_rate": 4.911651723677287e-05, "loss": 1.6398, "step": 73514 }, { "epoch": 2.45, "grad_norm": 0.6955262422561646, "learning_rate": 4.911078643871427e-05, "loss": 1.7228, "step": 73515 }, { "epoch": 2.45, "grad_norm": 0.7125107645988464, "learning_rate": 4.910505594519832e-05, "loss": 1.6602, "step": 73516 }, { "epoch": 2.45, "grad_norm": 0.6948114633560181, "learning_rate": 4.9099325756231836e-05, "loss": 1.6922, "step": 73517 }, { "epoch": 2.45, "grad_norm": 0.6603430509567261, "learning_rate": 4.909359587182175e-05, "loss": 1.6917, "step": 73518 }, { "epoch": 2.45, "grad_norm": 0.7780349254608154, "learning_rate": 4.908786629197512e-05, "loss": 1.6716, "step": 73519 }, { "epoch": 2.45, "grad_norm": 0.681403398513794, "learning_rate": 4.9082137016698784e-05, "loss": 1.7589, "step": 73520 }, { "epoch": 2.45, "grad_norm": 0.6860290765762329, "learning_rate": 4.907640804599985e-05, "loss": 1.7024, "step": 73521 }, { "epoch": 2.45, "grad_norm": 0.6952292323112488, "learning_rate": 4.90706793798851e-05, "loss": 1.6688, "step": 73522 }, { "epoch": 2.45, "grad_norm": 0.7015815377235413, "learning_rate": 4.906495101836164e-05, "loss": 1.7879, "step": 73523 }, { "epoch": 2.45, "grad_norm": 0.7047624588012695, "learning_rate": 4.905922296143636e-05, "loss": 1.6893, "step": 73524 }, { "epoch": 2.45, "grad_norm": 0.6963732242584229, "learning_rate": 4.905349520911616e-05, "loss": 1.7836, "step": 73525 }, { "epoch": 2.45, "grad_norm": 0.6905609965324402, "learning_rate": 4.9047767761408085e-05, "loss": 1.6277, "step": 73526 }, { "epoch": 2.45, "grad_norm": 0.6902668476104736, "learning_rate": 4.904204061831905e-05, "loss": 1.6642, "step": 73527 }, { "epoch": 2.45, "grad_norm": 0.6820290684700012, "learning_rate": 4.903631377985593e-05, "loss": 1.6487, "step": 73528 }, { "epoch": 2.45, "grad_norm": 0.6951870322227478, "learning_rate": 4.903058724602576e-05, "loss": 1.7317, "step": 73529 }, { "epoch": 2.45, "grad_norm": 0.6706302165985107, "learning_rate": 4.902486101683555e-05, "loss": 1.7343, "step": 73530 }, { "epoch": 2.45, "grad_norm": 0.7029722332954407, "learning_rate": 4.901913509229217e-05, "loss": 1.6069, "step": 73531 }, { "epoch": 2.45, "grad_norm": 0.6745292544364929, "learning_rate": 4.90134094724025e-05, "loss": 1.6406, "step": 73532 }, { "epoch": 2.45, "grad_norm": 0.6814993023872375, "learning_rate": 4.9007684157173656e-05, "loss": 1.6906, "step": 73533 }, { "epoch": 2.45, "grad_norm": 0.6930299401283264, "learning_rate": 4.900195914661247e-05, "loss": 1.698, "step": 73534 }, { "epoch": 2.45, "grad_norm": 0.6957974433898926, "learning_rate": 4.89962344407259e-05, "loss": 1.6544, "step": 73535 }, { "epoch": 2.45, "grad_norm": 0.6975070834159851, "learning_rate": 4.899051003952088e-05, "loss": 1.6834, "step": 73536 }, { "epoch": 2.45, "grad_norm": 0.6854247450828552, "learning_rate": 4.8984785943004536e-05, "loss": 1.6695, "step": 73537 }, { "epoch": 2.45, "grad_norm": 0.7174026966094971, "learning_rate": 4.897906215118356e-05, "loss": 1.6892, "step": 73538 }, { "epoch": 2.45, "grad_norm": 0.6915168166160583, "learning_rate": 4.8973338664065006e-05, "loss": 1.677, "step": 73539 }, { "epoch": 2.45, "grad_norm": 0.6925188899040222, "learning_rate": 4.896761548165592e-05, "loss": 1.6479, "step": 73540 }, { "epoch": 2.45, "grad_norm": 0.7014427185058594, "learning_rate": 4.8961892603963115e-05, "loss": 1.7152, "step": 73541 }, { "epoch": 2.45, "grad_norm": 0.8418470621109009, "learning_rate": 4.895617003099356e-05, "loss": 1.7121, "step": 73542 }, { "epoch": 2.45, "grad_norm": 0.7119905948638916, "learning_rate": 4.8950447762754206e-05, "loss": 1.688, "step": 73543 }, { "epoch": 2.45, "grad_norm": 0.69029700756073, "learning_rate": 4.894472579925216e-05, "loss": 1.6706, "step": 73544 }, { "epoch": 2.45, "grad_norm": 0.7069483995437622, "learning_rate": 4.893900414049407e-05, "loss": 1.6974, "step": 73545 }, { "epoch": 2.45, "grad_norm": 0.6870713233947754, "learning_rate": 4.893328278648705e-05, "loss": 1.6805, "step": 73546 }, { "epoch": 2.45, "grad_norm": 0.6966435313224792, "learning_rate": 4.8927561737238106e-05, "loss": 1.7096, "step": 73547 }, { "epoch": 2.45, "grad_norm": 0.7009989619255066, "learning_rate": 4.8921840992754116e-05, "loss": 1.7554, "step": 73548 }, { "epoch": 2.45, "grad_norm": 0.6795853972434998, "learning_rate": 4.8916120553041925e-05, "loss": 1.6799, "step": 73549 }, { "epoch": 2.45, "grad_norm": 0.69376540184021, "learning_rate": 4.891040041810857e-05, "loss": 1.7109, "step": 73550 }, { "epoch": 2.45, "grad_norm": 0.7228507995605469, "learning_rate": 4.8904680587961156e-05, "loss": 1.7104, "step": 73551 }, { "epoch": 2.45, "grad_norm": 0.6871132254600525, "learning_rate": 4.889896106260633e-05, "loss": 1.6605, "step": 73552 }, { "epoch": 2.45, "grad_norm": 0.6914154887199402, "learning_rate": 4.889324184205111e-05, "loss": 1.6295, "step": 73553 }, { "epoch": 2.45, "grad_norm": 0.7024184465408325, "learning_rate": 4.8887522926302616e-05, "loss": 1.6507, "step": 73554 }, { "epoch": 2.45, "grad_norm": 0.7052062749862671, "learning_rate": 4.8881804315367654e-05, "loss": 1.7371, "step": 73555 }, { "epoch": 2.45, "grad_norm": 0.6677072644233704, "learning_rate": 4.88760860092531e-05, "loss": 1.6625, "step": 73556 }, { "epoch": 2.45, "grad_norm": 0.685476541519165, "learning_rate": 4.887036800796601e-05, "loss": 1.6487, "step": 73557 }, { "epoch": 2.45, "grad_norm": 0.6995207071304321, "learning_rate": 4.886465031151341e-05, "loss": 1.7497, "step": 73558 }, { "epoch": 2.45, "grad_norm": 0.6899667382240295, "learning_rate": 4.885893291990196e-05, "loss": 1.6734, "step": 73559 }, { "epoch": 2.45, "grad_norm": 0.705471396446228, "learning_rate": 4.885321583313879e-05, "loss": 1.7053, "step": 73560 }, { "epoch": 2.45, "grad_norm": 0.6905640959739685, "learning_rate": 4.8847499051230863e-05, "loss": 1.655, "step": 73561 }, { "epoch": 2.45, "grad_norm": 0.7098886966705322, "learning_rate": 4.8841782574185076e-05, "loss": 1.7189, "step": 73562 }, { "epoch": 2.45, "grad_norm": 0.7044450640678406, "learning_rate": 4.883606640200831e-05, "loss": 1.7098, "step": 73563 }, { "epoch": 2.45, "grad_norm": 0.6776303052902222, "learning_rate": 4.883035053470761e-05, "loss": 1.5782, "step": 73564 }, { "epoch": 2.45, "grad_norm": 0.7154505848884583, "learning_rate": 4.882463497228982e-05, "loss": 1.7265, "step": 73565 }, { "epoch": 2.45, "grad_norm": 0.7067126631736755, "learning_rate": 4.881891971476188e-05, "loss": 1.5959, "step": 73566 }, { "epoch": 2.45, "grad_norm": 0.7020044326782227, "learning_rate": 4.8813204762130755e-05, "loss": 1.6387, "step": 73567 }, { "epoch": 2.45, "grad_norm": 0.7377167344093323, "learning_rate": 4.880749011440346e-05, "loss": 1.7485, "step": 73568 }, { "epoch": 2.45, "grad_norm": 0.687470555305481, "learning_rate": 4.880177577158686e-05, "loss": 1.7231, "step": 73569 }, { "epoch": 2.45, "grad_norm": 0.6932680606842041, "learning_rate": 4.879606173368782e-05, "loss": 1.6851, "step": 73570 }, { "epoch": 2.45, "grad_norm": 0.7050670385360718, "learning_rate": 4.8790348000713396e-05, "loss": 1.683, "step": 73571 }, { "epoch": 2.45, "grad_norm": 0.686212420463562, "learning_rate": 4.878463457267052e-05, "loss": 1.6705, "step": 73572 }, { "epoch": 2.45, "grad_norm": 0.7081548571586609, "learning_rate": 4.877892144956599e-05, "loss": 1.6924, "step": 73573 }, { "epoch": 2.45, "grad_norm": 0.6891900300979614, "learning_rate": 4.877320863140689e-05, "loss": 1.648, "step": 73574 }, { "epoch": 2.45, "grad_norm": 0.6731342673301697, "learning_rate": 4.876749611820006e-05, "loss": 1.6685, "step": 73575 }, { "epoch": 2.45, "grad_norm": 0.7207277417182922, "learning_rate": 4.876178390995251e-05, "loss": 1.6807, "step": 73576 }, { "epoch": 2.45, "grad_norm": 0.7925763726234436, "learning_rate": 4.8756072006671085e-05, "loss": 1.7336, "step": 73577 }, { "epoch": 2.45, "grad_norm": 0.6793620586395264, "learning_rate": 4.875036040836284e-05, "loss": 1.7119, "step": 73578 }, { "epoch": 2.45, "grad_norm": 0.6877306699752808, "learning_rate": 4.874464911503464e-05, "loss": 1.6921, "step": 73579 }, { "epoch": 2.45, "grad_norm": 0.6745310425758362, "learning_rate": 4.8738938126693306e-05, "loss": 1.6742, "step": 73580 }, { "epoch": 2.45, "grad_norm": 0.7219434976577759, "learning_rate": 4.8733227443346e-05, "loss": 1.6423, "step": 73581 }, { "epoch": 2.45, "grad_norm": 0.6778904795646667, "learning_rate": 4.8727517064999456e-05, "loss": 1.7532, "step": 73582 }, { "epoch": 2.45, "grad_norm": 0.7133952975273132, "learning_rate": 4.872180699166073e-05, "loss": 1.7168, "step": 73583 }, { "epoch": 2.45, "grad_norm": 0.6957372426986694, "learning_rate": 4.8716097223336714e-05, "loss": 1.7143, "step": 73584 }, { "epoch": 2.45, "grad_norm": 0.7049620151519775, "learning_rate": 4.871038776003427e-05, "loss": 1.6558, "step": 73585 }, { "epoch": 2.45, "grad_norm": 0.7010819315910339, "learning_rate": 4.870467860176044e-05, "loss": 1.7033, "step": 73586 }, { "epoch": 2.45, "grad_norm": 0.6994588971138, "learning_rate": 4.8698969748522064e-05, "loss": 1.703, "step": 73587 }, { "epoch": 2.45, "grad_norm": 0.6970183253288269, "learning_rate": 4.869326120032615e-05, "loss": 1.6104, "step": 73588 }, { "epoch": 2.45, "grad_norm": 0.7115988731384277, "learning_rate": 4.868755295717955e-05, "loss": 1.687, "step": 73589 }, { "epoch": 2.45, "grad_norm": 0.706135630607605, "learning_rate": 4.868184501908927e-05, "loss": 1.6509, "step": 73590 }, { "epoch": 2.45, "grad_norm": 0.6717197299003601, "learning_rate": 4.867613738606223e-05, "loss": 1.6355, "step": 73591 }, { "epoch": 2.45, "grad_norm": 0.701253354549408, "learning_rate": 4.867043005810522e-05, "loss": 1.7061, "step": 73592 }, { "epoch": 2.45, "grad_norm": 0.6911543607711792, "learning_rate": 4.866472303522538e-05, "loss": 1.7145, "step": 73593 }, { "epoch": 2.45, "grad_norm": 0.6927056908607483, "learning_rate": 4.8659016317429534e-05, "loss": 1.6392, "step": 73594 }, { "epoch": 2.45, "grad_norm": 0.704779863357544, "learning_rate": 4.865330990472453e-05, "loss": 1.7181, "step": 73595 }, { "epoch": 2.45, "grad_norm": 0.7049213647842407, "learning_rate": 4.864760379711735e-05, "loss": 1.7625, "step": 73596 }, { "epoch": 2.45, "grad_norm": 0.7012137770652771, "learning_rate": 4.864189799461504e-05, "loss": 1.7499, "step": 73597 }, { "epoch": 2.45, "grad_norm": 0.6986915469169617, "learning_rate": 4.863619249722444e-05, "loss": 1.6758, "step": 73598 }, { "epoch": 2.45, "grad_norm": 0.6709980964660645, "learning_rate": 4.863048730495239e-05, "loss": 1.6792, "step": 73599 }, { "epoch": 2.45, "grad_norm": 0.6916430592536926, "learning_rate": 4.862478241780594e-05, "loss": 1.7227, "step": 73600 }, { "epoch": 2.45, "grad_norm": 0.7073785066604614, "learning_rate": 4.8619077835791974e-05, "loss": 1.6712, "step": 73601 }, { "epoch": 2.45, "grad_norm": 0.7000318765640259, "learning_rate": 4.861337355891736e-05, "loss": 1.668, "step": 73602 }, { "epoch": 2.45, "grad_norm": 0.6744288206100464, "learning_rate": 4.8607669587189044e-05, "loss": 1.7785, "step": 73603 }, { "epoch": 2.45, "grad_norm": 0.7061956524848938, "learning_rate": 4.860196592061404e-05, "loss": 1.6714, "step": 73604 }, { "epoch": 2.45, "grad_norm": 0.6959742307662964, "learning_rate": 4.859626255919923e-05, "loss": 1.7273, "step": 73605 }, { "epoch": 2.45, "grad_norm": 0.6827501058578491, "learning_rate": 4.8590559502951443e-05, "loss": 1.733, "step": 73606 }, { "epoch": 2.45, "grad_norm": 0.699077308177948, "learning_rate": 4.858485675187774e-05, "loss": 1.7453, "step": 73607 }, { "epoch": 2.45, "grad_norm": 0.6893510818481445, "learning_rate": 4.857915430598496e-05, "loss": 1.7101, "step": 73608 }, { "epoch": 2.45, "grad_norm": 0.6946588754653931, "learning_rate": 4.857345216527998e-05, "loss": 1.668, "step": 73609 }, { "epoch": 2.45, "grad_norm": 0.7042407989501953, "learning_rate": 4.8567750329769804e-05, "loss": 1.6296, "step": 73610 }, { "epoch": 2.45, "grad_norm": 0.7103537917137146, "learning_rate": 4.856204879946139e-05, "loss": 1.7062, "step": 73611 }, { "epoch": 2.45, "grad_norm": 0.7129342555999756, "learning_rate": 4.8556347574361606e-05, "loss": 1.6476, "step": 73612 }, { "epoch": 2.45, "grad_norm": 0.6854706406593323, "learning_rate": 4.8550646654477274e-05, "loss": 1.7062, "step": 73613 }, { "epoch": 2.45, "grad_norm": 0.6760463714599609, "learning_rate": 4.854494603981549e-05, "loss": 1.6466, "step": 73614 }, { "epoch": 2.45, "grad_norm": 0.6987568736076355, "learning_rate": 4.8539245730383115e-05, "loss": 1.706, "step": 73615 }, { "epoch": 2.45, "grad_norm": 0.6604501605033875, "learning_rate": 4.853354572618694e-05, "loss": 1.6296, "step": 73616 }, { "epoch": 2.45, "grad_norm": 0.7067009210586548, "learning_rate": 4.8527846027234e-05, "loss": 1.605, "step": 73617 }, { "epoch": 2.45, "grad_norm": 0.6685146689414978, "learning_rate": 4.852214663353128e-05, "loss": 1.6628, "step": 73618 }, { "epoch": 2.45, "grad_norm": 0.6922819018363953, "learning_rate": 4.8516447545085614e-05, "loss": 1.6293, "step": 73619 }, { "epoch": 2.45, "grad_norm": 0.688412606716156, "learning_rate": 4.851074876190386e-05, "loss": 1.6792, "step": 73620 }, { "epoch": 2.45, "grad_norm": 0.6792908310890198, "learning_rate": 4.8505050283993085e-05, "loss": 1.657, "step": 73621 }, { "epoch": 2.45, "grad_norm": 0.6876530051231384, "learning_rate": 4.849935211136011e-05, "loss": 1.5897, "step": 73622 }, { "epoch": 2.45, "grad_norm": 0.6894543766975403, "learning_rate": 4.849365424401177e-05, "loss": 1.7225, "step": 73623 }, { "epoch": 2.45, "grad_norm": 0.679233968257904, "learning_rate": 4.8487956681955184e-05, "loss": 1.6816, "step": 73624 }, { "epoch": 2.45, "grad_norm": 0.704994261264801, "learning_rate": 4.848225942519706e-05, "loss": 1.6725, "step": 73625 }, { "epoch": 2.45, "grad_norm": 0.6886088252067566, "learning_rate": 4.847656247374451e-05, "loss": 1.6721, "step": 73626 }, { "epoch": 2.45, "grad_norm": 0.6784798502922058, "learning_rate": 4.847086582760427e-05, "loss": 1.6907, "step": 73627 }, { "epoch": 2.45, "grad_norm": 0.678493857383728, "learning_rate": 4.84651694867834e-05, "loss": 1.6685, "step": 73628 }, { "epoch": 2.45, "grad_norm": 0.687677264213562, "learning_rate": 4.845947345128876e-05, "loss": 1.6781, "step": 73629 }, { "epoch": 2.45, "grad_norm": 0.6868361830711365, "learning_rate": 4.845377772112721e-05, "loss": 1.571, "step": 73630 }, { "epoch": 2.45, "grad_norm": 0.7068790793418884, "learning_rate": 4.8448082296305744e-05, "loss": 1.729, "step": 73631 }, { "epoch": 2.45, "grad_norm": 0.6966424584388733, "learning_rate": 4.84423871768312e-05, "loss": 1.6836, "step": 73632 }, { "epoch": 2.45, "grad_norm": 0.6617071032524109, "learning_rate": 4.843669236271056e-05, "loss": 1.6885, "step": 73633 }, { "epoch": 2.45, "grad_norm": 0.6926112771034241, "learning_rate": 4.8430997853950756e-05, "loss": 1.6501, "step": 73634 }, { "epoch": 2.45, "grad_norm": 0.6786873936653137, "learning_rate": 4.842530365055855e-05, "loss": 1.6847, "step": 73635 }, { "epoch": 2.45, "grad_norm": 0.6987899541854858, "learning_rate": 4.8419609752541046e-05, "loss": 1.6234, "step": 73636 }, { "epoch": 2.45, "grad_norm": 0.6847700476646423, "learning_rate": 4.8413916159904985e-05, "loss": 1.7587, "step": 73637 }, { "epoch": 2.45, "grad_norm": 0.7245177030563354, "learning_rate": 4.8408222872657444e-05, "loss": 1.7219, "step": 73638 }, { "epoch": 2.45, "grad_norm": 0.7153321504592896, "learning_rate": 4.840252989080517e-05, "loss": 1.7334, "step": 73639 }, { "epoch": 2.45, "grad_norm": 0.6912122964859009, "learning_rate": 4.8396837214355245e-05, "loss": 1.6494, "step": 73640 }, { "epoch": 2.45, "grad_norm": 0.704073429107666, "learning_rate": 4.839114484331448e-05, "loss": 1.6511, "step": 73641 }, { "epoch": 2.45, "grad_norm": 0.7012645602226257, "learning_rate": 4.83854527776897e-05, "loss": 1.7428, "step": 73642 }, { "epoch": 2.45, "grad_norm": 0.7088732719421387, "learning_rate": 4.8379761017487985e-05, "loss": 1.6865, "step": 73643 }, { "epoch": 2.45, "grad_norm": 0.6957046389579773, "learning_rate": 4.837406956271618e-05, "loss": 1.726, "step": 73644 }, { "epoch": 2.45, "grad_norm": 0.7166855931282043, "learning_rate": 4.836837841338112e-05, "loss": 1.7009, "step": 73645 }, { "epoch": 2.45, "grad_norm": 0.7214317917823792, "learning_rate": 4.836268756948972e-05, "loss": 1.7343, "step": 73646 }, { "epoch": 2.45, "grad_norm": 0.6803491115570068, "learning_rate": 4.835699703104906e-05, "loss": 1.718, "step": 73647 }, { "epoch": 2.45, "grad_norm": 0.7187551856040955, "learning_rate": 4.835130679806593e-05, "loss": 1.7669, "step": 73648 }, { "epoch": 2.45, "grad_norm": 0.7043511867523193, "learning_rate": 4.834561687054714e-05, "loss": 1.7107, "step": 73649 }, { "epoch": 2.45, "grad_norm": 0.6915648579597473, "learning_rate": 4.833992724849974e-05, "loss": 1.6977, "step": 73650 }, { "epoch": 2.45, "grad_norm": 0.705691397190094, "learning_rate": 4.833423793193062e-05, "loss": 1.7082, "step": 73651 }, { "epoch": 2.45, "grad_norm": 0.6902189254760742, "learning_rate": 4.83285489208466e-05, "loss": 1.7036, "step": 73652 }, { "epoch": 2.45, "grad_norm": 0.7078168392181396, "learning_rate": 4.832286021525461e-05, "loss": 1.7054, "step": 73653 }, { "epoch": 2.45, "grad_norm": 0.7127097249031067, "learning_rate": 4.8317171815161746e-05, "loss": 1.6876, "step": 73654 }, { "epoch": 2.45, "grad_norm": 0.6971895098686218, "learning_rate": 4.8311483720574574e-05, "loss": 1.6923, "step": 73655 }, { "epoch": 2.45, "grad_norm": 0.6779253482818604, "learning_rate": 4.830579593150021e-05, "loss": 1.7474, "step": 73656 }, { "epoch": 2.45, "grad_norm": 0.7049469947814941, "learning_rate": 4.83001084479456e-05, "loss": 1.6691, "step": 73657 }, { "epoch": 2.45, "grad_norm": 0.703564465045929, "learning_rate": 4.829442126991755e-05, "loss": 1.742, "step": 73658 }, { "epoch": 2.45, "grad_norm": 0.7367536425590515, "learning_rate": 4.8288734397422936e-05, "loss": 1.6917, "step": 73659 }, { "epoch": 2.45, "grad_norm": 0.6667683720588684, "learning_rate": 4.828304783046868e-05, "loss": 1.7195, "step": 73660 }, { "epoch": 2.45, "grad_norm": 0.7055644989013672, "learning_rate": 4.827736156906188e-05, "loss": 1.6377, "step": 73661 }, { "epoch": 2.45, "grad_norm": 0.6843162178993225, "learning_rate": 4.827167561320913e-05, "loss": 1.7088, "step": 73662 }, { "epoch": 2.45, "grad_norm": 0.718271017074585, "learning_rate": 4.826598996291748e-05, "loss": 1.6713, "step": 73663 }, { "epoch": 2.45, "grad_norm": 0.6958603858947754, "learning_rate": 4.826030461819388e-05, "loss": 1.7072, "step": 73664 }, { "epoch": 2.45, "grad_norm": 0.681226909160614, "learning_rate": 4.8254619579045176e-05, "loss": 1.7348, "step": 73665 }, { "epoch": 2.45, "grad_norm": 0.7007592916488647, "learning_rate": 4.8248934845478226e-05, "loss": 1.6528, "step": 73666 }, { "epoch": 2.45, "grad_norm": 0.6970734596252441, "learning_rate": 4.824325041749997e-05, "loss": 1.6947, "step": 73667 }, { "epoch": 2.45, "grad_norm": 0.7027990221977234, "learning_rate": 4.8237566295117427e-05, "loss": 1.6391, "step": 73668 }, { "epoch": 2.45, "grad_norm": 0.6780022978782654, "learning_rate": 4.823188247833726e-05, "loss": 1.7576, "step": 73669 }, { "epoch": 2.45, "grad_norm": 0.701356053352356, "learning_rate": 4.8226198967166496e-05, "loss": 1.7748, "step": 73670 }, { "epoch": 2.45, "grad_norm": 0.7112151980400085, "learning_rate": 4.82205157616121e-05, "loss": 1.7789, "step": 73671 }, { "epoch": 2.45, "grad_norm": 0.710473895072937, "learning_rate": 4.82148328616809e-05, "loss": 1.6329, "step": 73672 }, { "epoch": 2.45, "grad_norm": 0.7058057188987732, "learning_rate": 4.8209150267379715e-05, "loss": 1.6907, "step": 73673 }, { "epoch": 2.45, "grad_norm": 0.7211750745773315, "learning_rate": 4.820346797871552e-05, "loss": 1.703, "step": 73674 }, { "epoch": 2.45, "grad_norm": 0.7254961729049683, "learning_rate": 4.819778599569536e-05, "loss": 1.7207, "step": 73675 }, { "epoch": 2.45, "grad_norm": 0.6951813697814941, "learning_rate": 4.8192104318325844e-05, "loss": 1.6252, "step": 73676 }, { "epoch": 2.45, "grad_norm": 0.6914846897125244, "learning_rate": 4.818642294661402e-05, "loss": 1.6533, "step": 73677 }, { "epoch": 2.45, "grad_norm": 0.6952111721038818, "learning_rate": 4.818074188056683e-05, "loss": 1.683, "step": 73678 }, { "epoch": 2.45, "grad_norm": 0.686453104019165, "learning_rate": 4.817506112019113e-05, "loss": 1.6617, "step": 73679 }, { "epoch": 2.45, "grad_norm": 0.6982585787773132, "learning_rate": 4.816938066549374e-05, "loss": 1.7213, "step": 73680 }, { "epoch": 2.45, "grad_norm": 0.6975334286689758, "learning_rate": 4.816370051648166e-05, "loss": 1.7485, "step": 73681 }, { "epoch": 2.45, "grad_norm": 0.7069011926651001, "learning_rate": 4.815802067316176e-05, "loss": 1.6617, "step": 73682 }, { "epoch": 2.45, "grad_norm": 0.693695068359375, "learning_rate": 4.815234113554085e-05, "loss": 1.6714, "step": 73683 }, { "epoch": 2.45, "grad_norm": 0.7089653611183167, "learning_rate": 4.814666190362587e-05, "loss": 1.8352, "step": 73684 }, { "epoch": 2.45, "grad_norm": 0.7024545073509216, "learning_rate": 4.814098297742381e-05, "loss": 1.6962, "step": 73685 }, { "epoch": 2.45, "grad_norm": 0.7110685706138611, "learning_rate": 4.8135304356941504e-05, "loss": 1.7169, "step": 73686 }, { "epoch": 2.45, "grad_norm": 0.6869364976882935, "learning_rate": 4.812962604218574e-05, "loss": 1.6984, "step": 73687 }, { "epoch": 2.45, "grad_norm": 0.6857856512069702, "learning_rate": 4.812394803316358e-05, "loss": 1.6757, "step": 73688 }, { "epoch": 2.45, "grad_norm": 0.7016732692718506, "learning_rate": 4.811827032988183e-05, "loss": 1.7002, "step": 73689 }, { "epoch": 2.45, "grad_norm": 0.7100860476493835, "learning_rate": 4.8112592932347294e-05, "loss": 1.6243, "step": 73690 }, { "epoch": 2.45, "grad_norm": 0.6922505497932434, "learning_rate": 4.810691584056705e-05, "loss": 1.6128, "step": 73691 }, { "epoch": 2.45, "grad_norm": 0.6932169795036316, "learning_rate": 4.8101239054547824e-05, "loss": 1.6347, "step": 73692 }, { "epoch": 2.45, "grad_norm": 0.6956777572631836, "learning_rate": 4.809556257429664e-05, "loss": 1.7247, "step": 73693 }, { "epoch": 2.45, "grad_norm": 0.7136718034744263, "learning_rate": 4.808988639982026e-05, "loss": 1.6977, "step": 73694 }, { "epoch": 2.45, "grad_norm": 0.6878570318222046, "learning_rate": 4.8084210531125725e-05, "loss": 1.6521, "step": 73695 }, { "epoch": 2.45, "grad_norm": 0.6767989993095398, "learning_rate": 4.807853496821984e-05, "loss": 1.7179, "step": 73696 }, { "epoch": 2.45, "grad_norm": 0.6905838251113892, "learning_rate": 4.8072859711109414e-05, "loss": 1.6589, "step": 73697 }, { "epoch": 2.45, "grad_norm": 0.6969008445739746, "learning_rate": 4.806718475980147e-05, "loss": 1.7173, "step": 73698 }, { "epoch": 2.45, "grad_norm": 0.6918665170669556, "learning_rate": 4.80615101143028e-05, "loss": 1.6573, "step": 73699 }, { "epoch": 2.45, "grad_norm": 0.6696467399597168, "learning_rate": 4.805583577462041e-05, "loss": 1.6438, "step": 73700 }, { "epoch": 2.45, "grad_norm": 0.7048078775405884, "learning_rate": 4.8050161740761086e-05, "loss": 1.6827, "step": 73701 }, { "epoch": 2.45, "grad_norm": 0.7008556723594666, "learning_rate": 4.804448801273169e-05, "loss": 1.6351, "step": 73702 }, { "epoch": 2.45, "grad_norm": 0.6876230835914612, "learning_rate": 4.803881459053921e-05, "loss": 1.6147, "step": 73703 }, { "epoch": 2.45, "grad_norm": 0.665683925151825, "learning_rate": 4.803314147419045e-05, "loss": 1.6718, "step": 73704 }, { "epoch": 2.45, "grad_norm": 0.6815166473388672, "learning_rate": 4.802746866369237e-05, "loss": 1.6317, "step": 73705 }, { "epoch": 2.45, "grad_norm": 0.7034945487976074, "learning_rate": 4.8021796159051765e-05, "loss": 1.7088, "step": 73706 }, { "epoch": 2.45, "grad_norm": 0.7003692388534546, "learning_rate": 4.801612396027562e-05, "loss": 1.7166, "step": 73707 }, { "epoch": 2.45, "grad_norm": 0.6844866275787354, "learning_rate": 4.801045206737081e-05, "loss": 1.6598, "step": 73708 }, { "epoch": 2.45, "grad_norm": 0.6840829253196716, "learning_rate": 4.800478048034409e-05, "loss": 1.7064, "step": 73709 }, { "epoch": 2.45, "grad_norm": 0.6957666873931885, "learning_rate": 4.7999109199202516e-05, "loss": 1.6376, "step": 73710 }, { "epoch": 2.45, "grad_norm": 0.6938511729240417, "learning_rate": 4.7993438223952886e-05, "loss": 1.7095, "step": 73711 }, { "epoch": 2.45, "grad_norm": 0.6877177953720093, "learning_rate": 4.7987767554601996e-05, "loss": 1.759, "step": 73712 }, { "epoch": 2.45, "grad_norm": 0.678720235824585, "learning_rate": 4.7982097191156845e-05, "loss": 1.6567, "step": 73713 }, { "epoch": 2.45, "grad_norm": 0.6856677532196045, "learning_rate": 4.7976427133624385e-05, "loss": 1.7168, "step": 73714 }, { "epoch": 2.45, "grad_norm": 0.740634560585022, "learning_rate": 4.7970757382011385e-05, "loss": 1.694, "step": 73715 }, { "epoch": 2.45, "grad_norm": 0.7121131420135498, "learning_rate": 4.7965087936324664e-05, "loss": 1.7392, "step": 73716 }, { "epoch": 2.45, "grad_norm": 0.6799122095108032, "learning_rate": 4.7959418796571294e-05, "loss": 1.6641, "step": 73717 }, { "epoch": 2.45, "grad_norm": 0.6886407732963562, "learning_rate": 4.795374996275806e-05, "loss": 1.7176, "step": 73718 }, { "epoch": 2.45, "grad_norm": 0.6993052959442139, "learning_rate": 4.794808143489173e-05, "loss": 1.6993, "step": 73719 }, { "epoch": 2.45, "grad_norm": 0.6883123517036438, "learning_rate": 4.7942413212979325e-05, "loss": 1.7165, "step": 73720 }, { "epoch": 2.45, "grad_norm": 0.7116348743438721, "learning_rate": 4.7936745297027744e-05, "loss": 1.6777, "step": 73721 }, { "epoch": 2.45, "grad_norm": 0.6673151254653931, "learning_rate": 4.7931077687043815e-05, "loss": 1.6899, "step": 73722 }, { "epoch": 2.45, "grad_norm": 0.6869889497756958, "learning_rate": 4.792541038303434e-05, "loss": 1.6781, "step": 73723 }, { "epoch": 2.45, "grad_norm": 0.7042356729507446, "learning_rate": 4.791974338500636e-05, "loss": 1.6928, "step": 73724 }, { "epoch": 2.45, "grad_norm": 0.7093945145606995, "learning_rate": 4.791407669296665e-05, "loss": 1.7261, "step": 73725 }, { "epoch": 2.45, "grad_norm": 0.6667313575744629, "learning_rate": 4.790841030692204e-05, "loss": 1.6638, "step": 73726 }, { "epoch": 2.45, "grad_norm": 0.6992506384849548, "learning_rate": 4.790274422687949e-05, "loss": 1.7526, "step": 73727 }, { "epoch": 2.45, "grad_norm": 0.6938393712043762, "learning_rate": 4.789707845284595e-05, "loss": 1.5997, "step": 73728 }, { "epoch": 2.45, "grad_norm": 0.6907135248184204, "learning_rate": 4.789141298482819e-05, "loss": 1.7838, "step": 73729 }, { "epoch": 2.45, "grad_norm": 0.6952332258224487, "learning_rate": 4.788574782283301e-05, "loss": 1.7384, "step": 73730 }, { "epoch": 2.45, "grad_norm": 0.6817892789840698, "learning_rate": 4.7880082966867496e-05, "loss": 1.7304, "step": 73731 }, { "epoch": 2.45, "grad_norm": 0.6867711544036865, "learning_rate": 4.787441841693841e-05, "loss": 1.7649, "step": 73732 }, { "epoch": 2.45, "grad_norm": 0.70149827003479, "learning_rate": 4.786875417305255e-05, "loss": 1.7439, "step": 73733 }, { "epoch": 2.45, "grad_norm": 0.6976629495620728, "learning_rate": 4.7863090235216874e-05, "loss": 1.6414, "step": 73734 }, { "epoch": 2.45, "grad_norm": 0.6862229704856873, "learning_rate": 4.785742660343831e-05, "loss": 1.6902, "step": 73735 }, { "epoch": 2.45, "grad_norm": 0.6853348016738892, "learning_rate": 4.785176327772372e-05, "loss": 1.6177, "step": 73736 }, { "epoch": 2.45, "grad_norm": 0.6831907033920288, "learning_rate": 4.7846100258079835e-05, "loss": 1.7407, "step": 73737 }, { "epoch": 2.45, "grad_norm": 0.7125368118286133, "learning_rate": 4.784043754451371e-05, "loss": 1.7333, "step": 73738 }, { "epoch": 2.45, "grad_norm": 0.6750921010971069, "learning_rate": 4.7834775137032146e-05, "loss": 1.6814, "step": 73739 }, { "epoch": 2.45, "grad_norm": 0.707685112953186, "learning_rate": 4.782911303564193e-05, "loss": 1.7296, "step": 73740 }, { "epoch": 2.45, "grad_norm": 0.6826300024986267, "learning_rate": 4.7823451240350064e-05, "loss": 1.6626, "step": 73741 }, { "epoch": 2.45, "grad_norm": 0.6672977209091187, "learning_rate": 4.781778975116334e-05, "loss": 1.6486, "step": 73742 }, { "epoch": 2.45, "grad_norm": 0.676766037940979, "learning_rate": 4.7812128568088713e-05, "loss": 1.6324, "step": 73743 }, { "epoch": 2.45, "grad_norm": 0.6963045001029968, "learning_rate": 4.7806467691132956e-05, "loss": 1.6852, "step": 73744 }, { "epoch": 2.45, "grad_norm": 0.660930335521698, "learning_rate": 4.780080712030302e-05, "loss": 1.6086, "step": 73745 }, { "epoch": 2.45, "grad_norm": 0.6960281729698181, "learning_rate": 4.779514685560577e-05, "loss": 1.714, "step": 73746 }, { "epoch": 2.45, "grad_norm": 0.7145082354545593, "learning_rate": 4.778948689704797e-05, "loss": 1.6921, "step": 73747 }, { "epoch": 2.45, "grad_norm": 0.704953670501709, "learning_rate": 4.778382724463664e-05, "loss": 1.7461, "step": 73748 }, { "epoch": 2.45, "grad_norm": 0.68623948097229, "learning_rate": 4.777816789837852e-05, "loss": 1.6883, "step": 73749 }, { "epoch": 2.45, "grad_norm": 0.6997774243354797, "learning_rate": 4.777250885828062e-05, "loss": 1.6114, "step": 73750 }, { "epoch": 2.45, "grad_norm": 0.7086811661720276, "learning_rate": 4.7766850124349724e-05, "loss": 1.6679, "step": 73751 }, { "epoch": 2.45, "grad_norm": 0.6876466274261475, "learning_rate": 4.776119169659264e-05, "loss": 1.6705, "step": 73752 }, { "epoch": 2.45, "grad_norm": 0.6756067872047424, "learning_rate": 4.775553357501637e-05, "loss": 1.647, "step": 73753 }, { "epoch": 2.45, "grad_norm": 0.6961551904678345, "learning_rate": 4.7749875759627644e-05, "loss": 1.7108, "step": 73754 }, { "epoch": 2.45, "grad_norm": 0.7015318274497986, "learning_rate": 4.7744218250433455e-05, "loss": 1.731, "step": 73755 }, { "epoch": 2.45, "grad_norm": 0.6780838966369629, "learning_rate": 4.773856104744056e-05, "loss": 1.6881, "step": 73756 }, { "epoch": 2.45, "grad_norm": 0.6979600191116333, "learning_rate": 4.773290415065596e-05, "loss": 1.5759, "step": 73757 }, { "epoch": 2.45, "grad_norm": 0.6960483193397522, "learning_rate": 4.772724756008644e-05, "loss": 1.7455, "step": 73758 }, { "epoch": 2.45, "grad_norm": 0.6885040402412415, "learning_rate": 4.772159127573878e-05, "loss": 1.6488, "step": 73759 }, { "epoch": 2.45, "grad_norm": 0.6610867977142334, "learning_rate": 4.771593529762005e-05, "loss": 1.6332, "step": 73760 }, { "epoch": 2.45, "grad_norm": 0.7045795321464539, "learning_rate": 4.771027962573697e-05, "loss": 1.612, "step": 73761 }, { "epoch": 2.45, "grad_norm": 0.7142959833145142, "learning_rate": 4.770462426009638e-05, "loss": 1.7001, "step": 73762 }, { "epoch": 2.45, "grad_norm": 0.728705644607544, "learning_rate": 4.769896920070518e-05, "loss": 1.7261, "step": 73763 }, { "epoch": 2.45, "grad_norm": 0.6691902279853821, "learning_rate": 4.769331444757038e-05, "loss": 1.6818, "step": 73764 }, { "epoch": 2.45, "grad_norm": 0.6641862988471985, "learning_rate": 4.768766000069868e-05, "loss": 1.6307, "step": 73765 }, { "epoch": 2.45, "grad_norm": 0.6770226359367371, "learning_rate": 4.7682005860096894e-05, "loss": 1.7541, "step": 73766 }, { "epoch": 2.45, "grad_norm": 0.7118677496910095, "learning_rate": 4.767635202577208e-05, "loss": 1.7586, "step": 73767 }, { "epoch": 2.45, "grad_norm": 0.7038689255714417, "learning_rate": 4.7670698497731e-05, "loss": 1.7114, "step": 73768 }, { "epoch": 2.45, "grad_norm": 0.6788489818572998, "learning_rate": 4.766504527598044e-05, "loss": 1.7097, "step": 73769 }, { "epoch": 2.45, "grad_norm": 0.6952660083770752, "learning_rate": 4.765939236052731e-05, "loss": 1.6184, "step": 73770 }, { "epoch": 2.45, "grad_norm": 0.694025456905365, "learning_rate": 4.765373975137865e-05, "loss": 1.6495, "step": 73771 }, { "epoch": 2.45, "grad_norm": 0.6956684589385986, "learning_rate": 4.764808744854104e-05, "loss": 1.6908, "step": 73772 }, { "epoch": 2.45, "grad_norm": 0.6847977638244629, "learning_rate": 4.764243545202147e-05, "loss": 1.7027, "step": 73773 }, { "epoch": 2.45, "grad_norm": 0.70167475938797, "learning_rate": 4.763678376182686e-05, "loss": 1.6727, "step": 73774 }, { "epoch": 2.45, "grad_norm": 0.7073279619216919, "learning_rate": 4.763113237796401e-05, "loss": 1.7192, "step": 73775 }, { "epoch": 2.45, "grad_norm": 0.6935093402862549, "learning_rate": 4.7625481300439715e-05, "loss": 1.7336, "step": 73776 }, { "epoch": 2.45, "grad_norm": 0.6885806918144226, "learning_rate": 4.761983052926087e-05, "loss": 1.7196, "step": 73777 }, { "epoch": 2.45, "grad_norm": 0.684106707572937, "learning_rate": 4.7614180064434536e-05, "loss": 1.719, "step": 73778 }, { "epoch": 2.45, "grad_norm": 0.693310558795929, "learning_rate": 4.760852990596727e-05, "loss": 1.6585, "step": 73779 }, { "epoch": 2.45, "grad_norm": 0.6982011795043945, "learning_rate": 4.760288005386605e-05, "loss": 1.7267, "step": 73780 }, { "epoch": 2.45, "grad_norm": 0.7092819809913635, "learning_rate": 4.7597230508137804e-05, "loss": 1.657, "step": 73781 }, { "epoch": 2.45, "grad_norm": 0.7057164311408997, "learning_rate": 4.759158126878936e-05, "loss": 1.6307, "step": 73782 }, { "epoch": 2.45, "grad_norm": 0.6866596341133118, "learning_rate": 4.7585932335827424e-05, "loss": 1.7153, "step": 73783 }, { "epoch": 2.45, "grad_norm": 0.7176167964935303, "learning_rate": 4.7580283709259e-05, "loss": 1.6887, "step": 73784 }, { "epoch": 2.45, "grad_norm": 0.6732364892959595, "learning_rate": 4.757463538909109e-05, "loss": 1.6744, "step": 73785 }, { "epoch": 2.45, "grad_norm": 0.8287716507911682, "learning_rate": 4.75689873753302e-05, "loss": 1.7536, "step": 73786 }, { "epoch": 2.45, "grad_norm": 0.6874369978904724, "learning_rate": 4.756333966798338e-05, "loss": 1.6793, "step": 73787 }, { "epoch": 2.45, "grad_norm": 0.6897211670875549, "learning_rate": 4.755769226705755e-05, "loss": 1.6411, "step": 73788 }, { "epoch": 2.45, "grad_norm": 0.6929768323898315, "learning_rate": 4.755204517255946e-05, "loss": 1.7375, "step": 73789 }, { "epoch": 2.46, "grad_norm": 0.7027138471603394, "learning_rate": 4.754639838449595e-05, "loss": 1.7202, "step": 73790 }, { "epoch": 2.46, "grad_norm": 0.7109571695327759, "learning_rate": 4.754075190287389e-05, "loss": 1.6621, "step": 73791 }, { "epoch": 2.46, "grad_norm": 0.703359842300415, "learning_rate": 4.7535105727700315e-05, "loss": 1.756, "step": 73792 }, { "epoch": 2.46, "grad_norm": 0.680283784866333, "learning_rate": 4.752945985898178e-05, "loss": 1.59, "step": 73793 }, { "epoch": 2.46, "grad_norm": 0.7164545059204102, "learning_rate": 4.752381429672526e-05, "loss": 1.716, "step": 73794 }, { "epoch": 2.46, "grad_norm": 0.6745098829269409, "learning_rate": 4.751816904093772e-05, "loss": 1.6924, "step": 73795 }, { "epoch": 2.46, "grad_norm": 0.7065292000770569, "learning_rate": 4.7512524091625934e-05, "loss": 1.7531, "step": 73796 }, { "epoch": 2.46, "grad_norm": 0.7018696665763855, "learning_rate": 4.750687944879665e-05, "loss": 1.7142, "step": 73797 }, { "epoch": 2.46, "grad_norm": 0.6934486627578735, "learning_rate": 4.7501235112456906e-05, "loss": 1.6295, "step": 73798 }, { "epoch": 2.46, "grad_norm": 0.6910143494606018, "learning_rate": 4.7495591082613416e-05, "loss": 1.6202, "step": 73799 }, { "epoch": 2.46, "grad_norm": 0.6912538409233093, "learning_rate": 4.748994735927302e-05, "loss": 1.6523, "step": 73800 }, { "epoch": 2.46, "grad_norm": 0.6964780688285828, "learning_rate": 4.7484303942442634e-05, "loss": 1.718, "step": 73801 }, { "epoch": 2.46, "grad_norm": 0.6733563542366028, "learning_rate": 4.7478660832129166e-05, "loss": 1.666, "step": 73802 }, { "epoch": 2.46, "grad_norm": 0.7404820322990417, "learning_rate": 4.7473018028339405e-05, "loss": 1.6442, "step": 73803 }, { "epoch": 2.46, "grad_norm": 0.7041736841201782, "learning_rate": 4.746737553108011e-05, "loss": 1.7566, "step": 73804 }, { "epoch": 2.46, "grad_norm": 0.678164005279541, "learning_rate": 4.746173334035828e-05, "loss": 1.7229, "step": 73805 }, { "epoch": 2.46, "grad_norm": 0.7135435938835144, "learning_rate": 4.74560914561807e-05, "loss": 1.6629, "step": 73806 }, { "epoch": 2.46, "grad_norm": 0.6772069931030273, "learning_rate": 4.745044987855414e-05, "loss": 1.6901, "step": 73807 }, { "epoch": 2.46, "grad_norm": 0.6800640225410461, "learning_rate": 4.74448086074856e-05, "loss": 1.7586, "step": 73808 }, { "epoch": 2.46, "grad_norm": 0.712486207485199, "learning_rate": 4.7439167642981756e-05, "loss": 1.6636, "step": 73809 }, { "epoch": 2.46, "grad_norm": 0.68302983045578, "learning_rate": 4.743352698504965e-05, "loss": 1.7123, "step": 73810 }, { "epoch": 2.46, "grad_norm": 0.6761053800582886, "learning_rate": 4.7427886633695945e-05, "loss": 1.69, "step": 73811 }, { "epoch": 2.46, "grad_norm": 0.7011176347732544, "learning_rate": 4.742224658892766e-05, "loss": 1.768, "step": 73812 }, { "epoch": 2.46, "grad_norm": 0.6885092854499817, "learning_rate": 4.741660685075153e-05, "loss": 1.7321, "step": 73813 }, { "epoch": 2.46, "grad_norm": 0.6889620423316956, "learning_rate": 4.741096741917434e-05, "loss": 1.6869, "step": 73814 }, { "epoch": 2.46, "grad_norm": 0.6856405735015869, "learning_rate": 4.7405328294203126e-05, "loss": 1.7042, "step": 73815 }, { "epoch": 2.46, "grad_norm": 0.6813850998878479, "learning_rate": 4.739968947584452e-05, "loss": 1.6438, "step": 73816 }, { "epoch": 2.46, "grad_norm": 0.6997267603874207, "learning_rate": 4.739405096410557e-05, "loss": 1.6317, "step": 73817 }, { "epoch": 2.46, "grad_norm": 0.6769648194313049, "learning_rate": 4.738841275899301e-05, "loss": 1.6903, "step": 73818 }, { "epoch": 2.46, "grad_norm": 0.6852553486824036, "learning_rate": 4.7382774860513604e-05, "loss": 1.7303, "step": 73819 }, { "epoch": 2.46, "grad_norm": 0.6755523681640625, "learning_rate": 4.7377137268674384e-05, "loss": 1.6897, "step": 73820 }, { "epoch": 2.46, "grad_norm": 0.6924254894256592, "learning_rate": 4.737149998348203e-05, "loss": 1.7116, "step": 73821 }, { "epoch": 2.46, "grad_norm": 0.7027685046195984, "learning_rate": 4.7365863004943513e-05, "loss": 1.7143, "step": 73822 }, { "epoch": 2.46, "grad_norm": 0.6918008327484131, "learning_rate": 4.736022633306553e-05, "loss": 1.7109, "step": 73823 }, { "epoch": 2.46, "grad_norm": 0.7083524465560913, "learning_rate": 4.735458996785511e-05, "loss": 1.6729, "step": 73824 }, { "epoch": 2.46, "grad_norm": 0.9315099120140076, "learning_rate": 4.734895390931897e-05, "loss": 1.754, "step": 73825 }, { "epoch": 2.46, "grad_norm": 0.6742486953735352, "learning_rate": 4.7343318157463906e-05, "loss": 1.6756, "step": 73826 }, { "epoch": 2.46, "grad_norm": 0.6632279753684998, "learning_rate": 4.733768271229692e-05, "loss": 1.6832, "step": 73827 }, { "epoch": 2.46, "grad_norm": 0.7074053287506104, "learning_rate": 4.7332047573824735e-05, "loss": 1.6467, "step": 73828 }, { "epoch": 2.46, "grad_norm": 0.702182412147522, "learning_rate": 4.732641274205415e-05, "loss": 1.6635, "step": 73829 }, { "epoch": 2.46, "grad_norm": 0.6992059350013733, "learning_rate": 4.732077821699206e-05, "loss": 1.7879, "step": 73830 }, { "epoch": 2.46, "grad_norm": 0.7201087474822998, "learning_rate": 4.731514399864539e-05, "loss": 1.7187, "step": 73831 }, { "epoch": 2.46, "grad_norm": 0.6844702959060669, "learning_rate": 4.730951008702093e-05, "loss": 1.7347, "step": 73832 }, { "epoch": 2.46, "grad_norm": 0.6773284673690796, "learning_rate": 4.730387648212538e-05, "loss": 1.6513, "step": 73833 }, { "epoch": 2.46, "grad_norm": 0.7047832608222961, "learning_rate": 4.72982431839658e-05, "loss": 1.7582, "step": 73834 }, { "epoch": 2.46, "grad_norm": 0.7243807315826416, "learning_rate": 4.729261019254889e-05, "loss": 1.7457, "step": 73835 }, { "epoch": 2.46, "grad_norm": 0.7079359292984009, "learning_rate": 4.7286977507881464e-05, "loss": 1.7494, "step": 73836 }, { "epoch": 2.46, "grad_norm": 0.7019636631011963, "learning_rate": 4.7281345129970396e-05, "loss": 1.6075, "step": 73837 }, { "epoch": 2.46, "grad_norm": 0.7199305295944214, "learning_rate": 4.727571305882264e-05, "loss": 1.7734, "step": 73838 }, { "epoch": 2.46, "grad_norm": 0.689842700958252, "learning_rate": 4.7270081294444897e-05, "loss": 1.6266, "step": 73839 }, { "epoch": 2.46, "grad_norm": 0.675266444683075, "learning_rate": 4.726444983684399e-05, "loss": 1.7333, "step": 73840 }, { "epoch": 2.46, "grad_norm": 0.6971350312232971, "learning_rate": 4.725881868602684e-05, "loss": 1.6838, "step": 73841 }, { "epoch": 2.46, "grad_norm": 0.708050012588501, "learning_rate": 4.7253187842000265e-05, "loss": 1.7635, "step": 73842 }, { "epoch": 2.46, "grad_norm": 0.6852883100509644, "learning_rate": 4.7247557304771034e-05, "loss": 1.6324, "step": 73843 }, { "epoch": 2.46, "grad_norm": 0.6954917907714844, "learning_rate": 4.7241927074345986e-05, "loss": 1.7358, "step": 73844 }, { "epoch": 2.46, "grad_norm": 0.6804338693618774, "learning_rate": 4.723629715073208e-05, "loss": 1.5971, "step": 73845 }, { "epoch": 2.46, "grad_norm": 0.6850050091743469, "learning_rate": 4.7230667533936086e-05, "loss": 1.7112, "step": 73846 }, { "epoch": 2.46, "grad_norm": 0.6993333101272583, "learning_rate": 4.722503822396472e-05, "loss": 1.677, "step": 73847 }, { "epoch": 2.46, "grad_norm": 0.6776583194732666, "learning_rate": 4.721940922082501e-05, "loss": 1.6398, "step": 73848 }, { "epoch": 2.46, "grad_norm": 0.6845253109931946, "learning_rate": 4.721378052452366e-05, "loss": 1.7486, "step": 73849 }, { "epoch": 2.46, "grad_norm": 0.6613503694534302, "learning_rate": 4.720815213506749e-05, "loss": 1.6283, "step": 73850 }, { "epoch": 2.46, "grad_norm": 0.6962212324142456, "learning_rate": 4.7202524052463364e-05, "loss": 1.6928, "step": 73851 }, { "epoch": 2.46, "grad_norm": 0.712043046951294, "learning_rate": 4.719689627671821e-05, "loss": 1.7165, "step": 73852 }, { "epoch": 2.46, "grad_norm": 0.6851374506950378, "learning_rate": 4.7191268807838757e-05, "loss": 1.6471, "step": 73853 }, { "epoch": 2.46, "grad_norm": 0.6870843768119812, "learning_rate": 4.7185641645831793e-05, "loss": 1.7055, "step": 73854 }, { "epoch": 2.46, "grad_norm": 0.6854625344276428, "learning_rate": 4.718001479070426e-05, "loss": 1.6507, "step": 73855 }, { "epoch": 2.46, "grad_norm": 0.7089813351631165, "learning_rate": 4.7174388242462965e-05, "loss": 1.6841, "step": 73856 }, { "epoch": 2.46, "grad_norm": 0.7143651843070984, "learning_rate": 4.7168762001114625e-05, "loss": 1.6966, "step": 73857 }, { "epoch": 2.46, "grad_norm": 0.7227122783660889, "learning_rate": 4.716313606666625e-05, "loss": 1.6358, "step": 73858 }, { "epoch": 2.46, "grad_norm": 0.6808997392654419, "learning_rate": 4.7157510439124476e-05, "loss": 1.6896, "step": 73859 }, { "epoch": 2.46, "grad_norm": 0.6890043020248413, "learning_rate": 4.71518851184963e-05, "loss": 1.638, "step": 73860 }, { "epoch": 2.46, "grad_norm": 0.6687011122703552, "learning_rate": 4.7146260104788415e-05, "loss": 1.6886, "step": 73861 }, { "epoch": 2.46, "grad_norm": 0.7075849175453186, "learning_rate": 4.714063539800781e-05, "loss": 1.6317, "step": 73862 }, { "epoch": 2.46, "grad_norm": 0.7121557593345642, "learning_rate": 4.7135010998161185e-05, "loss": 1.7132, "step": 73863 }, { "epoch": 2.46, "grad_norm": 0.6821853518486023, "learning_rate": 4.712938690525533e-05, "loss": 1.6632, "step": 73864 }, { "epoch": 2.46, "grad_norm": 0.682304859161377, "learning_rate": 4.712376311929721e-05, "loss": 1.7002, "step": 73865 }, { "epoch": 2.46, "grad_norm": 0.7024070620536804, "learning_rate": 4.7118139640293515e-05, "loss": 1.7592, "step": 73866 }, { "epoch": 2.46, "grad_norm": 0.7041832208633423, "learning_rate": 4.711251646825125e-05, "loss": 1.6716, "step": 73867 }, { "epoch": 2.46, "grad_norm": 0.6858832836151123, "learning_rate": 4.710689360317706e-05, "loss": 1.6349, "step": 73868 }, { "epoch": 2.46, "grad_norm": 0.686619222164154, "learning_rate": 4.710127104507783e-05, "loss": 1.6072, "step": 73869 }, { "epoch": 2.46, "grad_norm": 0.6625361442565918, "learning_rate": 4.7095648793960426e-05, "loss": 1.6609, "step": 73870 }, { "epoch": 2.46, "grad_norm": 0.6816449165344238, "learning_rate": 4.709002684983156e-05, "loss": 1.6997, "step": 73871 }, { "epoch": 2.46, "grad_norm": 0.6901901960372925, "learning_rate": 4.7084405212698226e-05, "loss": 1.7218, "step": 73872 }, { "epoch": 2.46, "grad_norm": 0.6922094821929932, "learning_rate": 4.707878388256712e-05, "loss": 1.7178, "step": 73873 }, { "epoch": 2.46, "grad_norm": 0.6785509586334229, "learning_rate": 4.707316285944512e-05, "loss": 1.6906, "step": 73874 }, { "epoch": 2.46, "grad_norm": 0.708271861076355, "learning_rate": 4.7067542143339074e-05, "loss": 1.6685, "step": 73875 }, { "epoch": 2.46, "grad_norm": 0.6858681440353394, "learning_rate": 4.706192173425567e-05, "loss": 1.6919, "step": 73876 }, { "epoch": 2.46, "grad_norm": 0.703423261642456, "learning_rate": 4.70563016322019e-05, "loss": 1.6422, "step": 73877 }, { "epoch": 2.46, "grad_norm": 0.6715782284736633, "learning_rate": 4.7050681837184524e-05, "loss": 1.7493, "step": 73878 }, { "epoch": 2.46, "grad_norm": 0.6903823614120483, "learning_rate": 4.7045062349210275e-05, "loss": 1.7087, "step": 73879 }, { "epoch": 2.46, "grad_norm": 0.684611439704895, "learning_rate": 4.703944316828604e-05, "loss": 1.6592, "step": 73880 }, { "epoch": 2.46, "grad_norm": 0.7273104786872864, "learning_rate": 4.703382429441872e-05, "loss": 1.6179, "step": 73881 }, { "epoch": 2.46, "grad_norm": 0.6706556081771851, "learning_rate": 4.702820572761504e-05, "loss": 1.7243, "step": 73882 }, { "epoch": 2.46, "grad_norm": 0.680098295211792, "learning_rate": 4.70225874678818e-05, "loss": 1.6824, "step": 73883 }, { "epoch": 2.46, "grad_norm": 0.7256080508232117, "learning_rate": 4.701696951522595e-05, "loss": 1.6896, "step": 73884 }, { "epoch": 2.46, "grad_norm": 0.6897770166397095, "learning_rate": 4.70113518696542e-05, "loss": 1.6443, "step": 73885 }, { "epoch": 2.46, "grad_norm": 0.7168208360671997, "learning_rate": 4.7005734531173334e-05, "loss": 1.6649, "step": 73886 }, { "epoch": 2.46, "grad_norm": 0.7068517804145813, "learning_rate": 4.7000117499790216e-05, "loss": 1.6898, "step": 73887 }, { "epoch": 2.46, "grad_norm": 0.6723271608352661, "learning_rate": 4.6994500775511835e-05, "loss": 1.6421, "step": 73888 }, { "epoch": 2.46, "grad_norm": 0.6950100064277649, "learning_rate": 4.6988884358344725e-05, "loss": 1.726, "step": 73889 }, { "epoch": 2.46, "grad_norm": 0.6835796236991882, "learning_rate": 4.698326824829578e-05, "loss": 1.6462, "step": 73890 }, { "epoch": 2.46, "grad_norm": 0.6969935894012451, "learning_rate": 4.6977652445371995e-05, "loss": 1.6258, "step": 73891 }, { "epoch": 2.46, "grad_norm": 0.7086727619171143, "learning_rate": 4.697203694958003e-05, "loss": 1.6373, "step": 73892 }, { "epoch": 2.46, "grad_norm": 0.6730910539627075, "learning_rate": 4.696642176092664e-05, "loss": 1.6153, "step": 73893 }, { "epoch": 2.46, "grad_norm": 0.688401460647583, "learning_rate": 4.6960806879418767e-05, "loss": 1.6457, "step": 73894 }, { "epoch": 2.46, "grad_norm": 0.6997725963592529, "learning_rate": 4.695519230506329e-05, "loss": 1.7994, "step": 73895 }, { "epoch": 2.46, "grad_norm": 0.6962411999702454, "learning_rate": 4.694957803786682e-05, "loss": 1.6639, "step": 73896 }, { "epoch": 2.46, "grad_norm": 0.6785824298858643, "learning_rate": 4.6943964077836275e-05, "loss": 1.6793, "step": 73897 }, { "epoch": 2.46, "grad_norm": 0.6931430101394653, "learning_rate": 4.693835042497854e-05, "loss": 1.747, "step": 73898 }, { "epoch": 2.46, "grad_norm": 0.6700001358985901, "learning_rate": 4.693273707930036e-05, "loss": 1.6792, "step": 73899 }, { "epoch": 2.46, "grad_norm": 0.6831034421920776, "learning_rate": 4.692712404080846e-05, "loss": 1.6561, "step": 73900 }, { "epoch": 2.46, "grad_norm": 0.7207071185112, "learning_rate": 4.692151130950976e-05, "loss": 1.7524, "step": 73901 }, { "epoch": 2.46, "grad_norm": 0.6999858617782593, "learning_rate": 4.6915898885411194e-05, "loss": 1.6826, "step": 73902 }, { "epoch": 2.46, "grad_norm": 0.7088873386383057, "learning_rate": 4.691028676851929e-05, "loss": 1.6878, "step": 73903 }, { "epoch": 2.46, "grad_norm": 0.7182974219322205, "learning_rate": 4.6904674958841005e-05, "loss": 1.7345, "step": 73904 }, { "epoch": 2.46, "grad_norm": 0.6933966875076294, "learning_rate": 4.6899063456383234e-05, "loss": 1.6763, "step": 73905 }, { "epoch": 2.46, "grad_norm": 0.6879293322563171, "learning_rate": 4.689345226115271e-05, "loss": 1.7604, "step": 73906 }, { "epoch": 2.46, "grad_norm": 0.6910656690597534, "learning_rate": 4.688784137315615e-05, "loss": 1.6299, "step": 73907 }, { "epoch": 2.46, "grad_norm": 0.6788097620010376, "learning_rate": 4.688223079240057e-05, "loss": 1.6832, "step": 73908 }, { "epoch": 2.46, "grad_norm": 0.7111579179763794, "learning_rate": 4.687662051889264e-05, "loss": 1.7712, "step": 73909 }, { "epoch": 2.46, "grad_norm": 0.6893555521965027, "learning_rate": 4.68710105526391e-05, "loss": 1.6749, "step": 73910 }, { "epoch": 2.46, "grad_norm": 0.9909327626228333, "learning_rate": 4.686540089364691e-05, "loss": 1.7217, "step": 73911 }, { "epoch": 2.46, "grad_norm": 0.6745824217796326, "learning_rate": 4.685979154192287e-05, "loss": 1.6485, "step": 73912 }, { "epoch": 2.46, "grad_norm": 0.6987525224685669, "learning_rate": 4.6854182497473726e-05, "loss": 1.7368, "step": 73913 }, { "epoch": 2.46, "grad_norm": 0.6755833029747009, "learning_rate": 4.684857376030625e-05, "loss": 1.627, "step": 73914 }, { "epoch": 2.46, "grad_norm": 0.7080826759338379, "learning_rate": 4.684296533042737e-05, "loss": 1.7004, "step": 73915 }, { "epoch": 2.46, "grad_norm": 0.683371901512146, "learning_rate": 4.683735720784385e-05, "loss": 1.6446, "step": 73916 }, { "epoch": 2.46, "grad_norm": 0.6948315501213074, "learning_rate": 4.683174939256238e-05, "loss": 1.6891, "step": 73917 }, { "epoch": 2.46, "grad_norm": 0.6903084516525269, "learning_rate": 4.682614188458989e-05, "loss": 1.6847, "step": 73918 }, { "epoch": 2.46, "grad_norm": 0.6996057033538818, "learning_rate": 4.68205346839332e-05, "loss": 1.6587, "step": 73919 }, { "epoch": 2.46, "grad_norm": 0.7183359265327454, "learning_rate": 4.681492779059911e-05, "loss": 1.7369, "step": 73920 }, { "epoch": 2.46, "grad_norm": 0.66753751039505, "learning_rate": 4.6809321204594286e-05, "loss": 1.6228, "step": 73921 }, { "epoch": 2.46, "grad_norm": 0.6878340840339661, "learning_rate": 4.680371492592572e-05, "loss": 1.7812, "step": 73922 }, { "epoch": 2.46, "grad_norm": 0.7092640399932861, "learning_rate": 4.6798108954600165e-05, "loss": 1.703, "step": 73923 }, { "epoch": 2.46, "grad_norm": 0.6904463171958923, "learning_rate": 4.679250329062429e-05, "loss": 1.7066, "step": 73924 }, { "epoch": 2.46, "grad_norm": 0.7037482261657715, "learning_rate": 4.6786897934005094e-05, "loss": 1.736, "step": 73925 }, { "epoch": 2.46, "grad_norm": 0.6853013038635254, "learning_rate": 4.6781292884749234e-05, "loss": 1.6829, "step": 73926 }, { "epoch": 2.46, "grad_norm": 0.6984800696372986, "learning_rate": 4.6775688142863634e-05, "loss": 1.7438, "step": 73927 }, { "epoch": 2.46, "grad_norm": 0.6828540563583374, "learning_rate": 4.677008370835499e-05, "loss": 1.7343, "step": 73928 }, { "epoch": 2.46, "grad_norm": 0.7050327062606812, "learning_rate": 4.67644795812302e-05, "loss": 1.7373, "step": 73929 }, { "epoch": 2.46, "grad_norm": 0.6876190900802612, "learning_rate": 4.675887576149602e-05, "loss": 1.6968, "step": 73930 }, { "epoch": 2.46, "grad_norm": 0.6726580858230591, "learning_rate": 4.675327224915918e-05, "loss": 1.6334, "step": 73931 }, { "epoch": 2.46, "grad_norm": 0.6823833584785461, "learning_rate": 4.674766904422664e-05, "loss": 1.7045, "step": 73932 }, { "epoch": 2.46, "grad_norm": 0.6890762448310852, "learning_rate": 4.674206614670504e-05, "loss": 1.6724, "step": 73933 }, { "epoch": 2.46, "grad_norm": 0.6938669681549072, "learning_rate": 4.673646355660132e-05, "loss": 1.7461, "step": 73934 }, { "epoch": 2.46, "grad_norm": 0.7121073007583618, "learning_rate": 4.673086127392219e-05, "loss": 1.7479, "step": 73935 }, { "epoch": 2.46, "grad_norm": 0.6824817061424255, "learning_rate": 4.672525929867444e-05, "loss": 1.7062, "step": 73936 }, { "epoch": 2.46, "grad_norm": 0.7132248282432556, "learning_rate": 4.671965763086497e-05, "loss": 1.6536, "step": 73937 }, { "epoch": 2.46, "grad_norm": 0.6890085339546204, "learning_rate": 4.6714056270500467e-05, "loss": 1.6498, "step": 73938 }, { "epoch": 2.46, "grad_norm": 0.6925903558731079, "learning_rate": 4.670845521758783e-05, "loss": 1.6378, "step": 73939 }, { "epoch": 2.46, "grad_norm": 0.6682572364807129, "learning_rate": 4.670285447213374e-05, "loss": 1.6788, "step": 73940 }, { "epoch": 2.46, "grad_norm": 0.6857499480247498, "learning_rate": 4.6697254034145116e-05, "loss": 1.6901, "step": 73941 }, { "epoch": 2.46, "grad_norm": 0.7006551027297974, "learning_rate": 4.669165390362873e-05, "loss": 1.6023, "step": 73942 }, { "epoch": 2.46, "grad_norm": 0.6917127966880798, "learning_rate": 4.668605408059126e-05, "loss": 1.7448, "step": 73943 }, { "epoch": 2.46, "grad_norm": 0.6805936694145203, "learning_rate": 4.6680454565039696e-05, "loss": 1.6545, "step": 73944 }, { "epoch": 2.46, "grad_norm": 0.6997218132019043, "learning_rate": 4.667485535698069e-05, "loss": 1.6589, "step": 73945 }, { "epoch": 2.46, "grad_norm": 0.7272070050239563, "learning_rate": 4.6669256456421044e-05, "loss": 1.5863, "step": 73946 }, { "epoch": 2.46, "grad_norm": 0.6931343078613281, "learning_rate": 4.6663657863367585e-05, "loss": 1.5987, "step": 73947 }, { "epoch": 2.46, "grad_norm": 0.7015507221221924, "learning_rate": 4.665805957782721e-05, "loss": 1.7255, "step": 73948 }, { "epoch": 2.46, "grad_norm": 0.6883589029312134, "learning_rate": 4.6652461599806576e-05, "loss": 1.649, "step": 73949 }, { "epoch": 2.46, "grad_norm": 0.709262490272522, "learning_rate": 4.6646863929312516e-05, "loss": 1.7023, "step": 73950 }, { "epoch": 2.46, "grad_norm": 0.6711040735244751, "learning_rate": 4.6641266566351853e-05, "loss": 1.7024, "step": 73951 }, { "epoch": 2.46, "grad_norm": 0.681984007358551, "learning_rate": 4.6635669510931385e-05, "loss": 1.6714, "step": 73952 }, { "epoch": 2.46, "grad_norm": 0.7173690795898438, "learning_rate": 4.66300727630578e-05, "loss": 1.7039, "step": 73953 }, { "epoch": 2.46, "grad_norm": 0.6936244964599609, "learning_rate": 4.6624476322738e-05, "loss": 1.679, "step": 73954 }, { "epoch": 2.46, "grad_norm": 0.7220675349235535, "learning_rate": 4.661888018997878e-05, "loss": 1.7878, "step": 73955 }, { "epoch": 2.46, "grad_norm": 0.6948564052581787, "learning_rate": 4.661328436478693e-05, "loss": 1.7042, "step": 73956 }, { "epoch": 2.46, "grad_norm": 0.7078149318695068, "learning_rate": 4.6607688847169175e-05, "loss": 1.6546, "step": 73957 }, { "epoch": 2.46, "grad_norm": 0.6881143450737, "learning_rate": 4.660209363713239e-05, "loss": 1.6787, "step": 73958 }, { "epoch": 2.46, "grad_norm": 0.6689239740371704, "learning_rate": 4.659649873468332e-05, "loss": 1.6139, "step": 73959 }, { "epoch": 2.46, "grad_norm": 0.6822866201400757, "learning_rate": 4.65909041398287e-05, "loss": 1.7185, "step": 73960 }, { "epoch": 2.46, "grad_norm": 0.6840204000473022, "learning_rate": 4.658530985257539e-05, "loss": 1.7012, "step": 73961 }, { "epoch": 2.46, "grad_norm": 0.71822589635849, "learning_rate": 4.657971587293026e-05, "loss": 1.7568, "step": 73962 }, { "epoch": 2.46, "grad_norm": 0.6910896897315979, "learning_rate": 4.657412220089999e-05, "loss": 1.6914, "step": 73963 }, { "epoch": 2.46, "grad_norm": 0.6955090165138245, "learning_rate": 4.6568528836491314e-05, "loss": 1.7505, "step": 73964 }, { "epoch": 2.46, "grad_norm": 0.7088846564292908, "learning_rate": 4.65629357797112e-05, "loss": 1.705, "step": 73965 }, { "epoch": 2.46, "grad_norm": 0.6838304996490479, "learning_rate": 4.655734303056633e-05, "loss": 1.6036, "step": 73966 }, { "epoch": 2.46, "grad_norm": 0.7186911106109619, "learning_rate": 4.655175058906344e-05, "loss": 1.6601, "step": 73967 }, { "epoch": 2.46, "grad_norm": 0.6751441359519958, "learning_rate": 4.654615845520936e-05, "loss": 1.6883, "step": 73968 }, { "epoch": 2.46, "grad_norm": 0.6994100213050842, "learning_rate": 4.654056662901101e-05, "loss": 1.7141, "step": 73969 }, { "epoch": 2.46, "grad_norm": 0.6891763210296631, "learning_rate": 4.6534975110475023e-05, "loss": 1.704, "step": 73970 }, { "epoch": 2.46, "grad_norm": 0.6938682794570923, "learning_rate": 4.6529383899608196e-05, "loss": 1.6366, "step": 73971 }, { "epoch": 2.46, "grad_norm": 0.7089057564735413, "learning_rate": 4.652379299641739e-05, "loss": 1.6853, "step": 73972 }, { "epoch": 2.46, "grad_norm": 0.6989418268203735, "learning_rate": 4.651820240090936e-05, "loss": 1.7174, "step": 73973 }, { "epoch": 2.46, "grad_norm": 0.9335606694221497, "learning_rate": 4.651261211309083e-05, "loss": 1.761, "step": 73974 }, { "epoch": 2.46, "grad_norm": 0.6789085865020752, "learning_rate": 4.6507022132968676e-05, "loss": 1.7763, "step": 73975 }, { "epoch": 2.46, "grad_norm": 0.7313528060913086, "learning_rate": 4.650143246054962e-05, "loss": 1.6587, "step": 73976 }, { "epoch": 2.46, "grad_norm": 0.6753024458885193, "learning_rate": 4.649584309584051e-05, "loss": 1.6713, "step": 73977 }, { "epoch": 2.46, "grad_norm": 0.6671286225318909, "learning_rate": 4.6490254038848026e-05, "loss": 1.7663, "step": 73978 }, { "epoch": 2.46, "grad_norm": 0.6777344942092896, "learning_rate": 4.6484665289579094e-05, "loss": 1.729, "step": 73979 }, { "epoch": 2.46, "grad_norm": 0.6672793626785278, "learning_rate": 4.647907684804043e-05, "loss": 1.5922, "step": 73980 }, { "epoch": 2.46, "grad_norm": 0.7253337502479553, "learning_rate": 4.6473488714238736e-05, "loss": 1.6035, "step": 73981 }, { "epoch": 2.46, "grad_norm": 0.6867792010307312, "learning_rate": 4.646790088818094e-05, "loss": 1.7749, "step": 73982 }, { "epoch": 2.46, "grad_norm": 0.6909348368644714, "learning_rate": 4.646231336987371e-05, "loss": 1.7374, "step": 73983 }, { "epoch": 2.46, "grad_norm": 0.7062498927116394, "learning_rate": 4.645672615932392e-05, "loss": 1.6427, "step": 73984 }, { "epoch": 2.46, "grad_norm": 0.6849578022956848, "learning_rate": 4.6451139256538314e-05, "loss": 1.6986, "step": 73985 }, { "epoch": 2.46, "grad_norm": 0.6866201758384705, "learning_rate": 4.644555266152359e-05, "loss": 1.6886, "step": 73986 }, { "epoch": 2.46, "grad_norm": 0.6702491044998169, "learning_rate": 4.64399663742867e-05, "loss": 1.6589, "step": 73987 }, { "epoch": 2.46, "grad_norm": 0.6986926794052124, "learning_rate": 4.643438039483424e-05, "loss": 1.6935, "step": 73988 }, { "epoch": 2.46, "grad_norm": 0.6917968988418579, "learning_rate": 4.642879472317317e-05, "loss": 1.6227, "step": 73989 }, { "epoch": 2.46, "grad_norm": 0.6995894312858582, "learning_rate": 4.6423209359310086e-05, "loss": 1.7375, "step": 73990 }, { "epoch": 2.46, "grad_norm": 0.7350822687149048, "learning_rate": 4.6417624303251924e-05, "loss": 1.7656, "step": 73991 }, { "epoch": 2.46, "grad_norm": 0.6853447556495667, "learning_rate": 4.641203955500543e-05, "loss": 1.707, "step": 73992 }, { "epoch": 2.46, "grad_norm": 0.7097249627113342, "learning_rate": 4.640645511457728e-05, "loss": 1.6822, "step": 73993 }, { "epoch": 2.46, "grad_norm": 0.6900911927223206, "learning_rate": 4.640087098197439e-05, "loss": 1.7036, "step": 73994 }, { "epoch": 2.46, "grad_norm": 0.6823566555976868, "learning_rate": 4.6395287157203464e-05, "loss": 1.6831, "step": 73995 }, { "epoch": 2.46, "grad_norm": 0.692808985710144, "learning_rate": 4.638970364027126e-05, "loss": 1.6187, "step": 73996 }, { "epoch": 2.46, "grad_norm": 0.6813879609107971, "learning_rate": 4.638412043118457e-05, "loss": 1.6852, "step": 73997 }, { "epoch": 2.46, "grad_norm": 0.7044887542724609, "learning_rate": 4.6378537529950254e-05, "loss": 1.6926, "step": 73998 }, { "epoch": 2.46, "grad_norm": 0.7098668813705444, "learning_rate": 4.6372954936575015e-05, "loss": 1.6418, "step": 73999 }, { "epoch": 2.46, "grad_norm": 0.6944589614868164, "learning_rate": 4.636737265106557e-05, "loss": 1.7257, "step": 74000 }, { "epoch": 2.46, "grad_norm": 0.684598445892334, "learning_rate": 4.6361790673428855e-05, "loss": 1.6526, "step": 74001 }, { "epoch": 2.46, "grad_norm": 0.6831504106521606, "learning_rate": 4.635620900367156e-05, "loss": 1.63, "step": 74002 }, { "epoch": 2.46, "grad_norm": 0.7010161280632019, "learning_rate": 4.635062764180035e-05, "loss": 1.7247, "step": 74003 }, { "epoch": 2.46, "grad_norm": 0.6861743330955505, "learning_rate": 4.634504658782212e-05, "loss": 1.6889, "step": 74004 }, { "epoch": 2.46, "grad_norm": 0.6811913847923279, "learning_rate": 4.633946584174379e-05, "loss": 1.6288, "step": 74005 }, { "epoch": 2.46, "grad_norm": 0.7134864926338196, "learning_rate": 4.63338854035718e-05, "loss": 1.6453, "step": 74006 }, { "epoch": 2.46, "grad_norm": 0.6949944496154785, "learning_rate": 4.6328305273313106e-05, "loss": 1.6841, "step": 74007 }, { "epoch": 2.46, "grad_norm": 0.7029097080230713, "learning_rate": 4.632272545097456e-05, "loss": 1.7069, "step": 74008 }, { "epoch": 2.46, "grad_norm": 0.6929476857185364, "learning_rate": 4.631714593656284e-05, "loss": 1.75, "step": 74009 }, { "epoch": 2.46, "grad_norm": 0.6961967945098877, "learning_rate": 4.631156673008466e-05, "loss": 1.6761, "step": 74010 }, { "epoch": 2.46, "grad_norm": 0.6667501926422119, "learning_rate": 4.630598783154685e-05, "loss": 1.6326, "step": 74011 }, { "epoch": 2.46, "grad_norm": 0.7026661038398743, "learning_rate": 4.6300409240956316e-05, "loss": 1.7271, "step": 74012 }, { "epoch": 2.46, "grad_norm": 0.7003902792930603, "learning_rate": 4.6294830958319576e-05, "loss": 1.6795, "step": 74013 }, { "epoch": 2.46, "grad_norm": 1.132503867149353, "learning_rate": 4.628925298364355e-05, "loss": 1.7877, "step": 74014 }, { "epoch": 2.46, "grad_norm": 0.7177129983901978, "learning_rate": 4.628367531693502e-05, "loss": 1.6379, "step": 74015 }, { "epoch": 2.46, "grad_norm": 0.7065322399139404, "learning_rate": 4.627809795820077e-05, "loss": 1.7011, "step": 74016 }, { "epoch": 2.46, "grad_norm": 0.6899012923240662, "learning_rate": 4.627252090744742e-05, "loss": 1.624, "step": 74017 }, { "epoch": 2.46, "grad_norm": 0.6949824094772339, "learning_rate": 4.6266944164681876e-05, "loss": 1.7079, "step": 74018 }, { "epoch": 2.46, "grad_norm": 0.6878702044487, "learning_rate": 4.626136772991099e-05, "loss": 1.7077, "step": 74019 }, { "epoch": 2.46, "grad_norm": 0.7256414890289307, "learning_rate": 4.625579160314127e-05, "loss": 1.657, "step": 74020 }, { "epoch": 2.46, "grad_norm": 0.7135345339775085, "learning_rate": 4.625021578437966e-05, "loss": 1.7036, "step": 74021 }, { "epoch": 2.46, "grad_norm": 0.6834986209869385, "learning_rate": 4.624464027363294e-05, "loss": 1.6206, "step": 74022 }, { "epoch": 2.46, "grad_norm": 0.6877068281173706, "learning_rate": 4.623906507090785e-05, "loss": 1.6786, "step": 74023 }, { "epoch": 2.46, "grad_norm": 0.6775524616241455, "learning_rate": 4.623349017621107e-05, "loss": 1.6619, "step": 74024 }, { "epoch": 2.46, "grad_norm": 0.6887027621269226, "learning_rate": 4.622791558954953e-05, "loss": 1.6703, "step": 74025 }, { "epoch": 2.46, "grad_norm": 0.7134641408920288, "learning_rate": 4.62223413109299e-05, "loss": 1.667, "step": 74026 }, { "epoch": 2.46, "grad_norm": 0.724162757396698, "learning_rate": 4.6216767340358896e-05, "loss": 1.7414, "step": 74027 }, { "epoch": 2.46, "grad_norm": 0.6788608431816101, "learning_rate": 4.62111936778433e-05, "loss": 1.5751, "step": 74028 }, { "epoch": 2.46, "grad_norm": 0.6784027218818665, "learning_rate": 4.6205620323390056e-05, "loss": 1.6548, "step": 74029 }, { "epoch": 2.46, "grad_norm": 0.703755795955658, "learning_rate": 4.620004727700575e-05, "loss": 1.7332, "step": 74030 }, { "epoch": 2.46, "grad_norm": 0.6891866326332092, "learning_rate": 4.6194474538697134e-05, "loss": 1.6581, "step": 74031 }, { "epoch": 2.46, "grad_norm": 0.6902145743370056, "learning_rate": 4.618890210847108e-05, "loss": 1.6351, "step": 74032 }, { "epoch": 2.46, "grad_norm": 0.7170259356498718, "learning_rate": 4.6183329986334314e-05, "loss": 1.7546, "step": 74033 }, { "epoch": 2.46, "grad_norm": 0.6851305961608887, "learning_rate": 4.617775817229353e-05, "loss": 1.6966, "step": 74034 }, { "epoch": 2.46, "grad_norm": 0.681891918182373, "learning_rate": 4.617218666635556e-05, "loss": 1.6877, "step": 74035 }, { "epoch": 2.46, "grad_norm": 0.6854676008224487, "learning_rate": 4.616661546852719e-05, "loss": 1.6709, "step": 74036 }, { "epoch": 2.46, "grad_norm": 0.6853453516960144, "learning_rate": 4.6161044578815154e-05, "loss": 1.7349, "step": 74037 }, { "epoch": 2.46, "grad_norm": 0.6728824973106384, "learning_rate": 4.615547399722615e-05, "loss": 1.6806, "step": 74038 }, { "epoch": 2.46, "grad_norm": 0.6968317627906799, "learning_rate": 4.614990372376709e-05, "loss": 1.6934, "step": 74039 }, { "epoch": 2.46, "grad_norm": 0.7078331708908081, "learning_rate": 4.614433375844462e-05, "loss": 1.6304, "step": 74040 }, { "epoch": 2.46, "grad_norm": 0.6815133690834045, "learning_rate": 4.6138764101265426e-05, "loss": 1.7132, "step": 74041 }, { "epoch": 2.46, "grad_norm": 0.6847171187400818, "learning_rate": 4.6133194752236467e-05, "loss": 1.7323, "step": 74042 }, { "epoch": 2.46, "grad_norm": 0.6907339692115784, "learning_rate": 4.612762571136435e-05, "loss": 1.6864, "step": 74043 }, { "epoch": 2.46, "grad_norm": 0.6867993474006653, "learning_rate": 4.6122056978655955e-05, "loss": 1.7323, "step": 74044 }, { "epoch": 2.46, "grad_norm": 0.6927320957183838, "learning_rate": 4.611648855411788e-05, "loss": 1.7411, "step": 74045 }, { "epoch": 2.46, "grad_norm": 0.6818090081214905, "learning_rate": 4.6110920437757056e-05, "loss": 1.7483, "step": 74046 }, { "epoch": 2.46, "grad_norm": 0.6883507370948792, "learning_rate": 4.6105352629580185e-05, "loss": 1.7243, "step": 74047 }, { "epoch": 2.46, "grad_norm": 0.7341359853744507, "learning_rate": 4.6099785129593914e-05, "loss": 1.6725, "step": 74048 }, { "epoch": 2.46, "grad_norm": 0.7051562666893005, "learning_rate": 4.6094217937805175e-05, "loss": 1.6877, "step": 74049 }, { "epoch": 2.46, "grad_norm": 0.6918883919715881, "learning_rate": 4.608865105422057e-05, "loss": 1.6287, "step": 74050 }, { "epoch": 2.46, "grad_norm": 0.6870434284210205, "learning_rate": 4.608308447884702e-05, "loss": 1.6795, "step": 74051 }, { "epoch": 2.46, "grad_norm": 0.7088590860366821, "learning_rate": 4.6077518211691155e-05, "loss": 1.7851, "step": 74052 }, { "epoch": 2.46, "grad_norm": 0.6915287971496582, "learning_rate": 4.607195225275974e-05, "loss": 1.6643, "step": 74053 }, { "epoch": 2.46, "grad_norm": 0.7018164396286011, "learning_rate": 4.606638660205959e-05, "loss": 1.7542, "step": 74054 }, { "epoch": 2.46, "grad_norm": 0.7069258689880371, "learning_rate": 4.606082125959741e-05, "loss": 1.6427, "step": 74055 }, { "epoch": 2.46, "grad_norm": 0.6919196844100952, "learning_rate": 4.605525622538e-05, "loss": 1.7138, "step": 74056 }, { "epoch": 2.46, "grad_norm": 0.6813525557518005, "learning_rate": 4.6049691499414044e-05, "loss": 1.6269, "step": 74057 }, { "epoch": 2.46, "grad_norm": 0.7121957540512085, "learning_rate": 4.6044127081706406e-05, "loss": 1.6168, "step": 74058 }, { "epoch": 2.46, "grad_norm": 0.6715201139450073, "learning_rate": 4.603856297226378e-05, "loss": 1.7083, "step": 74059 }, { "epoch": 2.46, "grad_norm": 0.7125376462936401, "learning_rate": 4.6032999171092864e-05, "loss": 1.7454, "step": 74060 }, { "epoch": 2.46, "grad_norm": 0.6793552041053772, "learning_rate": 4.602743567820052e-05, "loss": 1.7032, "step": 74061 }, { "epoch": 2.46, "grad_norm": 0.7068874835968018, "learning_rate": 4.602187249359347e-05, "loss": 1.5926, "step": 74062 }, { "epoch": 2.46, "grad_norm": 0.7094917297363281, "learning_rate": 4.601630961727834e-05, "loss": 1.7393, "step": 74063 }, { "epoch": 2.46, "grad_norm": 0.6945378184318542, "learning_rate": 4.601074704926201e-05, "loss": 1.6656, "step": 74064 }, { "epoch": 2.46, "grad_norm": 0.693088948726654, "learning_rate": 4.6005184789551287e-05, "loss": 1.7088, "step": 74065 }, { "epoch": 2.46, "grad_norm": 0.6818523406982422, "learning_rate": 4.5999622838152836e-05, "loss": 1.7963, "step": 74066 }, { "epoch": 2.46, "grad_norm": 0.6958276629447937, "learning_rate": 4.599406119507336e-05, "loss": 1.6688, "step": 74067 }, { "epoch": 2.46, "grad_norm": 0.6944443583488464, "learning_rate": 4.5988499860319715e-05, "loss": 1.7129, "step": 74068 }, { "epoch": 2.46, "grad_norm": 0.7110522389411926, "learning_rate": 4.59829388338986e-05, "loss": 1.7844, "step": 74069 }, { "epoch": 2.46, "grad_norm": 0.6888315677642822, "learning_rate": 4.597737811581673e-05, "loss": 1.7055, "step": 74070 }, { "epoch": 2.46, "grad_norm": 0.695608377456665, "learning_rate": 4.5971817706080884e-05, "loss": 1.6748, "step": 74071 }, { "epoch": 2.46, "grad_norm": 0.6666301488876343, "learning_rate": 4.5966257604697874e-05, "loss": 1.6704, "step": 74072 }, { "epoch": 2.46, "grad_norm": 0.6870354413986206, "learning_rate": 4.5960697811674404e-05, "loss": 1.7073, "step": 74073 }, { "epoch": 2.46, "grad_norm": 0.7101415991783142, "learning_rate": 4.595513832701714e-05, "loss": 1.6999, "step": 74074 }, { "epoch": 2.46, "grad_norm": 0.6783096790313721, "learning_rate": 4.5949579150732997e-05, "loss": 1.7185, "step": 74075 }, { "epoch": 2.46, "grad_norm": 0.6754035949707031, "learning_rate": 4.594402028282861e-05, "loss": 1.6917, "step": 74076 }, { "epoch": 2.46, "grad_norm": 0.6724748015403748, "learning_rate": 4.59384617233107e-05, "loss": 1.6244, "step": 74077 }, { "epoch": 2.46, "grad_norm": 0.6685115694999695, "learning_rate": 4.593290347218604e-05, "loss": 1.6743, "step": 74078 }, { "epoch": 2.46, "grad_norm": 0.7076835632324219, "learning_rate": 4.592734552946149e-05, "loss": 1.6362, "step": 74079 }, { "epoch": 2.46, "grad_norm": 0.6677869558334351, "learning_rate": 4.59217878951437e-05, "loss": 1.7384, "step": 74080 }, { "epoch": 2.46, "grad_norm": 0.6780495643615723, "learning_rate": 4.591623056923938e-05, "loss": 1.7557, "step": 74081 }, { "epoch": 2.46, "grad_norm": 0.7037107944488525, "learning_rate": 4.591067355175534e-05, "loss": 1.7155, "step": 74082 }, { "epoch": 2.46, "grad_norm": 0.679445207118988, "learning_rate": 4.5905116842698353e-05, "loss": 1.5663, "step": 74083 }, { "epoch": 2.46, "grad_norm": 0.7155050039291382, "learning_rate": 4.5899560442075014e-05, "loss": 1.6485, "step": 74084 }, { "epoch": 2.46, "grad_norm": 0.7115943431854248, "learning_rate": 4.5894004349892176e-05, "loss": 1.7312, "step": 74085 }, { "epoch": 2.46, "grad_norm": 0.6986489295959473, "learning_rate": 4.588844856615664e-05, "loss": 1.7025, "step": 74086 }, { "epoch": 2.46, "grad_norm": 0.7251591682434082, "learning_rate": 4.588289309087507e-05, "loss": 1.6743, "step": 74087 }, { "epoch": 2.46, "grad_norm": 0.6963658928871155, "learning_rate": 4.587733792405419e-05, "loss": 1.7456, "step": 74088 }, { "epoch": 2.46, "grad_norm": 0.6946617364883423, "learning_rate": 4.5871783065700826e-05, "loss": 1.6762, "step": 74089 }, { "epoch": 2.46, "grad_norm": 0.6864912509918213, "learning_rate": 4.586622851582168e-05, "loss": 1.7327, "step": 74090 }, { "epoch": 2.47, "grad_norm": 0.7063793540000916, "learning_rate": 4.586067427442344e-05, "loss": 1.6091, "step": 74091 }, { "epoch": 2.47, "grad_norm": 0.6873372197151184, "learning_rate": 4.585512034151293e-05, "loss": 1.6782, "step": 74092 }, { "epoch": 2.47, "grad_norm": 0.6977611780166626, "learning_rate": 4.584956671709679e-05, "loss": 1.6855, "step": 74093 }, { "epoch": 2.47, "grad_norm": 0.7098633646965027, "learning_rate": 4.58440134011819e-05, "loss": 1.7622, "step": 74094 }, { "epoch": 2.47, "grad_norm": 0.6853936910629272, "learning_rate": 4.58384603937749e-05, "loss": 1.7121, "step": 74095 }, { "epoch": 2.47, "grad_norm": 0.7018159627914429, "learning_rate": 4.583290769488258e-05, "loss": 1.7339, "step": 74096 }, { "epoch": 2.47, "grad_norm": 0.688040554523468, "learning_rate": 4.582735530451167e-05, "loss": 1.7332, "step": 74097 }, { "epoch": 2.47, "grad_norm": 0.6707010269165039, "learning_rate": 4.5821803222668865e-05, "loss": 1.6541, "step": 74098 }, { "epoch": 2.47, "grad_norm": 0.6906100511550903, "learning_rate": 4.581625144936098e-05, "loss": 1.607, "step": 74099 }, { "epoch": 2.47, "grad_norm": 0.678074300289154, "learning_rate": 4.581069998459464e-05, "loss": 1.6794, "step": 74100 }, { "epoch": 2.47, "grad_norm": 0.6827155351638794, "learning_rate": 4.580514882837674e-05, "loss": 1.7224, "step": 74101 }, { "epoch": 2.47, "grad_norm": 0.6805453896522522, "learning_rate": 4.579959798071392e-05, "loss": 1.7071, "step": 74102 }, { "epoch": 2.47, "grad_norm": 0.7065742015838623, "learning_rate": 4.579404744161288e-05, "loss": 1.7032, "step": 74103 }, { "epoch": 2.47, "grad_norm": 0.697361171245575, "learning_rate": 4.578849721108048e-05, "loss": 1.6974, "step": 74104 }, { "epoch": 2.47, "grad_norm": 0.7076478004455566, "learning_rate": 4.5782947289123306e-05, "loss": 1.7378, "step": 74105 }, { "epoch": 2.47, "grad_norm": 0.6836073398590088, "learning_rate": 4.577739767574826e-05, "loss": 1.6582, "step": 74106 }, { "epoch": 2.47, "grad_norm": 0.7006927132606506, "learning_rate": 4.577184837096191e-05, "loss": 1.7775, "step": 74107 }, { "epoch": 2.47, "grad_norm": 0.6769334673881531, "learning_rate": 4.5766299374771164e-05, "loss": 1.6668, "step": 74108 }, { "epoch": 2.47, "grad_norm": 0.6860223412513733, "learning_rate": 4.5760750687182665e-05, "loss": 1.6588, "step": 74109 }, { "epoch": 2.47, "grad_norm": 0.6994142532348633, "learning_rate": 4.575520230820308e-05, "loss": 1.6857, "step": 74110 }, { "epoch": 2.47, "grad_norm": 0.6897390484809875, "learning_rate": 4.5749654237839283e-05, "loss": 1.7018, "step": 74111 }, { "epoch": 2.47, "grad_norm": 0.6713688969612122, "learning_rate": 4.574410647609795e-05, "loss": 1.6858, "step": 74112 }, { "epoch": 2.47, "grad_norm": 0.6750994920730591, "learning_rate": 4.573855902298574e-05, "loss": 1.7092, "step": 74113 }, { "epoch": 2.47, "grad_norm": 0.6959871053695679, "learning_rate": 4.5733011878509485e-05, "loss": 1.7532, "step": 74114 }, { "epoch": 2.47, "grad_norm": 0.6944051384925842, "learning_rate": 4.57274650426759e-05, "loss": 1.7204, "step": 74115 }, { "epoch": 2.47, "grad_norm": 0.6907501220703125, "learning_rate": 4.572191851549176e-05, "loss": 1.6082, "step": 74116 }, { "epoch": 2.47, "grad_norm": 0.6889997720718384, "learning_rate": 4.571637229696366e-05, "loss": 1.7072, "step": 74117 }, { "epoch": 2.47, "grad_norm": 0.7111101746559143, "learning_rate": 4.5710826387098476e-05, "loss": 1.7751, "step": 74118 }, { "epoch": 2.47, "grad_norm": 0.7061352133750916, "learning_rate": 4.570528078590289e-05, "loss": 1.7059, "step": 74119 }, { "epoch": 2.47, "grad_norm": 0.7088250517845154, "learning_rate": 4.569973549338355e-05, "loss": 1.7429, "step": 74120 }, { "epoch": 2.47, "grad_norm": 0.6888070106506348, "learning_rate": 4.569419050954726e-05, "loss": 1.7006, "step": 74121 }, { "epoch": 2.47, "grad_norm": 0.9460393190383911, "learning_rate": 4.568864583440087e-05, "loss": 1.6329, "step": 74122 }, { "epoch": 2.47, "grad_norm": 0.7069433331489563, "learning_rate": 4.5683101467950886e-05, "loss": 1.6731, "step": 74123 }, { "epoch": 2.47, "grad_norm": 0.678561270236969, "learning_rate": 4.567755741020414e-05, "loss": 1.703, "step": 74124 }, { "epoch": 2.47, "grad_norm": 0.71376633644104, "learning_rate": 4.567201366116744e-05, "loss": 1.751, "step": 74125 }, { "epoch": 2.47, "grad_norm": 0.6772425174713135, "learning_rate": 4.566647022084743e-05, "loss": 1.6268, "step": 74126 }, { "epoch": 2.47, "grad_norm": 0.6988930702209473, "learning_rate": 4.5660927089250766e-05, "loss": 1.6674, "step": 74127 }, { "epoch": 2.47, "grad_norm": 0.7071138024330139, "learning_rate": 4.565538426638428e-05, "loss": 1.6709, "step": 74128 }, { "epoch": 2.47, "grad_norm": 0.6977728605270386, "learning_rate": 4.564984175225482e-05, "loss": 1.7012, "step": 74129 }, { "epoch": 2.47, "grad_norm": 0.6982218027114868, "learning_rate": 4.5644299546868835e-05, "loss": 1.7358, "step": 74130 }, { "epoch": 2.47, "grad_norm": 0.6994289755821228, "learning_rate": 4.563875765023321e-05, "loss": 1.7097, "step": 74131 }, { "epoch": 2.47, "grad_norm": 0.7087650299072266, "learning_rate": 4.5633216062354696e-05, "loss": 1.7427, "step": 74132 }, { "epoch": 2.47, "grad_norm": 0.7159410119056702, "learning_rate": 4.5627674783239995e-05, "loss": 1.7144, "step": 74133 }, { "epoch": 2.47, "grad_norm": 0.7206534147262573, "learning_rate": 4.5622133812895746e-05, "loss": 1.6827, "step": 74134 }, { "epoch": 2.47, "grad_norm": 0.7037478685379028, "learning_rate": 4.561659315132876e-05, "loss": 1.7233, "step": 74135 }, { "epoch": 2.47, "grad_norm": 0.7109593152999878, "learning_rate": 4.561105279854588e-05, "loss": 1.6899, "step": 74136 }, { "epoch": 2.47, "grad_norm": 0.6932504773139954, "learning_rate": 4.560551275455352e-05, "loss": 1.6966, "step": 74137 }, { "epoch": 2.47, "grad_norm": 0.7109112739562988, "learning_rate": 4.5599973019358616e-05, "loss": 1.7062, "step": 74138 }, { "epoch": 2.47, "grad_norm": 0.6841409802436829, "learning_rate": 4.559443359296796e-05, "loss": 1.6477, "step": 74139 }, { "epoch": 2.47, "grad_norm": 0.7129307985305786, "learning_rate": 4.558889447538815e-05, "loss": 1.6691, "step": 74140 }, { "epoch": 2.47, "grad_norm": 0.6862533092498779, "learning_rate": 4.5583355666625844e-05, "loss": 1.7537, "step": 74141 }, { "epoch": 2.47, "grad_norm": 0.6951995491981506, "learning_rate": 4.5577817166687935e-05, "loss": 1.6959, "step": 74142 }, { "epoch": 2.47, "grad_norm": 0.6961784362792969, "learning_rate": 4.557227897558109e-05, "loss": 1.7065, "step": 74143 }, { "epoch": 2.47, "grad_norm": 0.7052954435348511, "learning_rate": 4.556674109331193e-05, "loss": 1.6129, "step": 74144 }, { "epoch": 2.47, "grad_norm": 0.6905792951583862, "learning_rate": 4.5561203519887256e-05, "loss": 1.6596, "step": 74145 }, { "epoch": 2.47, "grad_norm": 0.7097762823104858, "learning_rate": 4.555566625531386e-05, "loss": 1.6569, "step": 74146 }, { "epoch": 2.47, "grad_norm": 0.7147406935691833, "learning_rate": 4.55501292995984e-05, "loss": 1.6681, "step": 74147 }, { "epoch": 2.47, "grad_norm": 0.7001785635948181, "learning_rate": 4.5544592652747546e-05, "loss": 1.6822, "step": 74148 }, { "epoch": 2.47, "grad_norm": 0.6630634665489197, "learning_rate": 4.553905631476809e-05, "loss": 1.6751, "step": 74149 }, { "epoch": 2.47, "grad_norm": 0.6957218647003174, "learning_rate": 4.553352028566675e-05, "loss": 1.6085, "step": 74150 }, { "epoch": 2.47, "grad_norm": 0.7121401429176331, "learning_rate": 4.552798456545016e-05, "loss": 1.6379, "step": 74151 }, { "epoch": 2.47, "grad_norm": 0.6808274388313293, "learning_rate": 4.552244915412508e-05, "loss": 1.6527, "step": 74152 }, { "epoch": 2.47, "grad_norm": 0.6930689811706543, "learning_rate": 4.551691405169832e-05, "loss": 1.6877, "step": 74153 }, { "epoch": 2.47, "grad_norm": 0.6974440217018127, "learning_rate": 4.551137925817655e-05, "loss": 1.6292, "step": 74154 }, { "epoch": 2.47, "grad_norm": 0.6918911933898926, "learning_rate": 4.550584477356638e-05, "loss": 1.656, "step": 74155 }, { "epoch": 2.47, "grad_norm": 0.6809041500091553, "learning_rate": 4.55003105978747e-05, "loss": 1.7261, "step": 74156 }, { "epoch": 2.47, "grad_norm": 0.6955522894859314, "learning_rate": 4.549477673110814e-05, "loss": 1.6916, "step": 74157 }, { "epoch": 2.47, "grad_norm": 0.6783889532089233, "learning_rate": 4.548924317327336e-05, "loss": 1.6872, "step": 74158 }, { "epoch": 2.47, "grad_norm": 0.7280367016792297, "learning_rate": 4.548370992437719e-05, "loss": 1.7541, "step": 74159 }, { "epoch": 2.47, "grad_norm": 0.6962296366691589, "learning_rate": 4.547817698442622e-05, "loss": 1.6481, "step": 74160 }, { "epoch": 2.47, "grad_norm": 0.6891696453094482, "learning_rate": 4.5472644353427346e-05, "loss": 1.6248, "step": 74161 }, { "epoch": 2.47, "grad_norm": 0.6828871965408325, "learning_rate": 4.546711203138707e-05, "loss": 1.6725, "step": 74162 }, { "epoch": 2.47, "grad_norm": 0.7098093628883362, "learning_rate": 4.546158001831231e-05, "loss": 1.7295, "step": 74163 }, { "epoch": 2.47, "grad_norm": 0.7118551135063171, "learning_rate": 4.5456048314209695e-05, "loss": 1.7468, "step": 74164 }, { "epoch": 2.47, "grad_norm": 0.6778645515441895, "learning_rate": 4.545051691908586e-05, "loss": 1.693, "step": 74165 }, { "epoch": 2.47, "grad_norm": 0.7172849774360657, "learning_rate": 4.5444985832947625e-05, "loss": 1.718, "step": 74166 }, { "epoch": 2.47, "grad_norm": 0.6847688555717468, "learning_rate": 4.5439455055801624e-05, "loss": 1.6741, "step": 74167 }, { "epoch": 2.47, "grad_norm": 0.7100146412849426, "learning_rate": 4.543392458765468e-05, "loss": 1.8036, "step": 74168 }, { "epoch": 2.47, "grad_norm": 0.6857588291168213, "learning_rate": 4.542839442851346e-05, "loss": 1.6081, "step": 74169 }, { "epoch": 2.47, "grad_norm": 0.7153748869895935, "learning_rate": 4.5422864578384586e-05, "loss": 1.5923, "step": 74170 }, { "epoch": 2.47, "grad_norm": 0.697977602481842, "learning_rate": 4.5417335037274924e-05, "loss": 1.563, "step": 74171 }, { "epoch": 2.47, "grad_norm": 0.6910483837127686, "learning_rate": 4.5411805805191e-05, "loss": 1.7336, "step": 74172 }, { "epoch": 2.47, "grad_norm": 0.6852937936782837, "learning_rate": 4.5406276882139746e-05, "loss": 1.6731, "step": 74173 }, { "epoch": 2.47, "grad_norm": 0.6885349750518799, "learning_rate": 4.5400748268127655e-05, "loss": 1.7713, "step": 74174 }, { "epoch": 2.47, "grad_norm": 0.6852863430976868, "learning_rate": 4.539521996316161e-05, "loss": 1.6597, "step": 74175 }, { "epoch": 2.47, "grad_norm": 0.6759449243545532, "learning_rate": 4.538969196724829e-05, "loss": 1.692, "step": 74176 }, { "epoch": 2.47, "grad_norm": 0.7001371383666992, "learning_rate": 4.538416428039427e-05, "loss": 1.7411, "step": 74177 }, { "epoch": 2.47, "grad_norm": 0.6941140294075012, "learning_rate": 4.537863690260644e-05, "loss": 1.6674, "step": 74178 }, { "epoch": 2.47, "grad_norm": 0.664950430393219, "learning_rate": 4.537310983389143e-05, "loss": 1.6511, "step": 74179 }, { "epoch": 2.47, "grad_norm": 0.7055608034133911, "learning_rate": 4.536758307425585e-05, "loss": 1.7193, "step": 74180 }, { "epoch": 2.47, "grad_norm": 0.6852279305458069, "learning_rate": 4.5362056623706534e-05, "loss": 1.6907, "step": 74181 }, { "epoch": 2.47, "grad_norm": 0.7087920904159546, "learning_rate": 4.5356530482250233e-05, "loss": 1.7163, "step": 74182 }, { "epoch": 2.47, "grad_norm": 0.6912602782249451, "learning_rate": 4.5351004649893575e-05, "loss": 1.68, "step": 74183 }, { "epoch": 2.47, "grad_norm": 0.6636660099029541, "learning_rate": 4.534547912664319e-05, "loss": 1.6876, "step": 74184 }, { "epoch": 2.47, "grad_norm": 0.6772735118865967, "learning_rate": 4.533995391250598e-05, "loss": 1.7064, "step": 74185 }, { "epoch": 2.47, "grad_norm": 0.6713241338729858, "learning_rate": 4.533442900748853e-05, "loss": 1.6943, "step": 74186 }, { "epoch": 2.47, "grad_norm": 0.7032988667488098, "learning_rate": 4.532890441159747e-05, "loss": 1.6757, "step": 74187 }, { "epoch": 2.47, "grad_norm": 0.6791104078292847, "learning_rate": 4.532338012483959e-05, "loss": 1.667, "step": 74188 }, { "epoch": 2.47, "grad_norm": 0.6692386269569397, "learning_rate": 4.53178561472217e-05, "loss": 1.7164, "step": 74189 }, { "epoch": 2.47, "grad_norm": 0.6886690855026245, "learning_rate": 4.531233247875041e-05, "loss": 1.7668, "step": 74190 }, { "epoch": 2.47, "grad_norm": 0.6904802322387695, "learning_rate": 4.530680911943233e-05, "loss": 1.6605, "step": 74191 }, { "epoch": 2.47, "grad_norm": 0.6938584446907043, "learning_rate": 4.530128606927435e-05, "loss": 1.6931, "step": 74192 }, { "epoch": 2.47, "grad_norm": 0.6978946924209595, "learning_rate": 4.5295763328283055e-05, "loss": 1.7039, "step": 74193 }, { "epoch": 2.47, "grad_norm": 0.6859256625175476, "learning_rate": 4.529024089646511e-05, "loss": 1.6049, "step": 74194 }, { "epoch": 2.47, "grad_norm": 0.6996909379959106, "learning_rate": 4.528471877382729e-05, "loss": 1.7741, "step": 74195 }, { "epoch": 2.47, "grad_norm": 0.671935498714447, "learning_rate": 4.527919696037637e-05, "loss": 1.6444, "step": 74196 }, { "epoch": 2.47, "grad_norm": 0.70201575756073, "learning_rate": 4.527367545611896e-05, "loss": 1.6846, "step": 74197 }, { "epoch": 2.47, "grad_norm": 0.6978994011878967, "learning_rate": 4.526815426106168e-05, "loss": 1.6768, "step": 74198 }, { "epoch": 2.47, "grad_norm": 0.6941884160041809, "learning_rate": 4.526263337521143e-05, "loss": 1.6891, "step": 74199 }, { "epoch": 2.47, "grad_norm": 0.7001631259918213, "learning_rate": 4.525711279857483e-05, "loss": 1.6552, "step": 74200 }, { "epoch": 2.47, "grad_norm": 0.6868559122085571, "learning_rate": 4.5251592531158454e-05, "loss": 1.6441, "step": 74201 }, { "epoch": 2.47, "grad_norm": 0.7016341686248779, "learning_rate": 4.524607257296912e-05, "loss": 1.6724, "step": 74202 }, { "epoch": 2.47, "grad_norm": 0.6755318641662598, "learning_rate": 4.524055292401358e-05, "loss": 1.6147, "step": 74203 }, { "epoch": 2.47, "grad_norm": 0.6676916480064392, "learning_rate": 4.5235033584298486e-05, "loss": 1.6402, "step": 74204 }, { "epoch": 2.47, "grad_norm": 0.6891281604766846, "learning_rate": 4.522951455383045e-05, "loss": 1.6418, "step": 74205 }, { "epoch": 2.47, "grad_norm": 0.6800426840782166, "learning_rate": 4.522399583261633e-05, "loss": 1.6924, "step": 74206 }, { "epoch": 2.47, "grad_norm": 0.6969209909439087, "learning_rate": 4.52184774206627e-05, "loss": 1.6322, "step": 74207 }, { "epoch": 2.47, "grad_norm": 0.7060707211494446, "learning_rate": 4.5212959317976246e-05, "loss": 1.6912, "step": 74208 }, { "epoch": 2.47, "grad_norm": 0.6679222583770752, "learning_rate": 4.52074415245638e-05, "loss": 1.7358, "step": 74209 }, { "epoch": 2.47, "grad_norm": 0.6853023171424866, "learning_rate": 4.520192404043192e-05, "loss": 1.5892, "step": 74210 }, { "epoch": 2.47, "grad_norm": 0.6831272840499878, "learning_rate": 4.519640686558741e-05, "loss": 1.6864, "step": 74211 }, { "epoch": 2.47, "grad_norm": 0.665887176990509, "learning_rate": 4.519089000003685e-05, "loss": 1.6295, "step": 74212 }, { "epoch": 2.47, "grad_norm": 0.6898173093795776, "learning_rate": 4.518537344378708e-05, "loss": 1.5892, "step": 74213 }, { "epoch": 2.47, "grad_norm": 0.6851975917816162, "learning_rate": 4.517985719684472e-05, "loss": 1.7154, "step": 74214 }, { "epoch": 2.47, "grad_norm": 0.7174561619758606, "learning_rate": 4.517434125921639e-05, "loss": 1.7699, "step": 74215 }, { "epoch": 2.47, "grad_norm": 0.6994526386260986, "learning_rate": 4.516882563090894e-05, "loss": 1.6464, "step": 74216 }, { "epoch": 2.47, "grad_norm": 0.70014488697052, "learning_rate": 4.5163310311928925e-05, "loss": 1.7594, "step": 74217 }, { "epoch": 2.47, "grad_norm": 0.6988039016723633, "learning_rate": 4.5157795302283165e-05, "loss": 1.6708, "step": 74218 }, { "epoch": 2.47, "grad_norm": 0.699762761592865, "learning_rate": 4.515228060197828e-05, "loss": 1.6889, "step": 74219 }, { "epoch": 2.47, "grad_norm": 0.6779593825340271, "learning_rate": 4.5146766211020925e-05, "loss": 1.636, "step": 74220 }, { "epoch": 2.47, "grad_norm": 0.6920593976974487, "learning_rate": 4.51412521294179e-05, "loss": 1.6843, "step": 74221 }, { "epoch": 2.47, "grad_norm": 0.6863940954208374, "learning_rate": 4.513573835717579e-05, "loss": 1.6657, "step": 74222 }, { "epoch": 2.47, "grad_norm": 0.7037935853004456, "learning_rate": 4.513022489430137e-05, "loss": 1.6813, "step": 74223 }, { "epoch": 2.47, "grad_norm": 0.6858915090560913, "learning_rate": 4.5124711740801275e-05, "loss": 1.652, "step": 74224 }, { "epoch": 2.47, "grad_norm": 0.7055222988128662, "learning_rate": 4.5119198896682306e-05, "loss": 1.6264, "step": 74225 }, { "epoch": 2.47, "grad_norm": 0.7018799185752869, "learning_rate": 4.511368636195105e-05, "loss": 1.6989, "step": 74226 }, { "epoch": 2.47, "grad_norm": 0.6795623898506165, "learning_rate": 4.510817413661414e-05, "loss": 1.6991, "step": 74227 }, { "epoch": 2.47, "grad_norm": 0.6794853806495667, "learning_rate": 4.510266222067843e-05, "loss": 1.7555, "step": 74228 }, { "epoch": 2.47, "grad_norm": 0.6809684038162231, "learning_rate": 4.509715061415056e-05, "loss": 1.7413, "step": 74229 }, { "epoch": 2.47, "grad_norm": 0.7067157626152039, "learning_rate": 4.509163931703711e-05, "loss": 1.6524, "step": 74230 }, { "epoch": 2.47, "grad_norm": 0.693699300289154, "learning_rate": 4.508612832934482e-05, "loss": 1.6612, "step": 74231 }, { "epoch": 2.47, "grad_norm": 0.6796597242355347, "learning_rate": 4.508061765108051e-05, "loss": 1.7067, "step": 74232 }, { "epoch": 2.47, "grad_norm": 0.6880835294723511, "learning_rate": 4.507510728225074e-05, "loss": 1.6525, "step": 74233 }, { "epoch": 2.47, "grad_norm": 0.6879028081893921, "learning_rate": 4.5069597222862186e-05, "loss": 1.6717, "step": 74234 }, { "epoch": 2.47, "grad_norm": 0.6759042739868164, "learning_rate": 4.506408747292166e-05, "loss": 1.6633, "step": 74235 }, { "epoch": 2.47, "grad_norm": 0.6975857019424438, "learning_rate": 4.505857803243573e-05, "loss": 1.684, "step": 74236 }, { "epoch": 2.47, "grad_norm": 0.662510335445404, "learning_rate": 4.50530689014111e-05, "loss": 1.6576, "step": 74237 }, { "epoch": 2.47, "grad_norm": 0.6681934595108032, "learning_rate": 4.504756007985444e-05, "loss": 1.6156, "step": 74238 }, { "epoch": 2.47, "grad_norm": 0.6830445528030396, "learning_rate": 4.504205156777263e-05, "loss": 1.6991, "step": 74239 }, { "epoch": 2.47, "grad_norm": 0.6819165945053101, "learning_rate": 4.5036543365172064e-05, "loss": 1.6864, "step": 74240 }, { "epoch": 2.47, "grad_norm": 0.6856081485748291, "learning_rate": 4.5031035472059565e-05, "loss": 1.6711, "step": 74241 }, { "epoch": 2.47, "grad_norm": 0.7012932896614075, "learning_rate": 4.502552788844189e-05, "loss": 1.7533, "step": 74242 }, { "epoch": 2.47, "grad_norm": 0.6781082153320312, "learning_rate": 4.502002061432565e-05, "loss": 1.6771, "step": 74243 }, { "epoch": 2.47, "grad_norm": 0.7076238393783569, "learning_rate": 4.5014513649717486e-05, "loss": 1.7037, "step": 74244 }, { "epoch": 2.47, "grad_norm": 0.6938164234161377, "learning_rate": 4.500900699462411e-05, "loss": 1.6184, "step": 74245 }, { "epoch": 2.47, "grad_norm": 0.6883771419525146, "learning_rate": 4.500350064905237e-05, "loss": 1.66, "step": 74246 }, { "epoch": 2.47, "grad_norm": 0.7229211926460266, "learning_rate": 4.499799461300867e-05, "loss": 1.7342, "step": 74247 }, { "epoch": 2.47, "grad_norm": 0.7122594118118286, "learning_rate": 4.499248888649983e-05, "loss": 1.7618, "step": 74248 }, { "epoch": 2.47, "grad_norm": 0.6783151030540466, "learning_rate": 4.4986983469532624e-05, "loss": 1.6418, "step": 74249 }, { "epoch": 2.47, "grad_norm": 0.6892669796943665, "learning_rate": 4.4981478362113635e-05, "loss": 1.7725, "step": 74250 }, { "epoch": 2.47, "grad_norm": 0.6966333985328674, "learning_rate": 4.497597356424949e-05, "loss": 1.6989, "step": 74251 }, { "epoch": 2.47, "grad_norm": 0.6937448382377625, "learning_rate": 4.497046907594693e-05, "loss": 1.6811, "step": 74252 }, { "epoch": 2.47, "grad_norm": 0.6975722908973694, "learning_rate": 4.4964964897212794e-05, "loss": 1.6964, "step": 74253 }, { "epoch": 2.47, "grad_norm": 0.6907240152359009, "learning_rate": 4.4959461028053435e-05, "loss": 1.655, "step": 74254 }, { "epoch": 2.47, "grad_norm": 0.6935235857963562, "learning_rate": 4.495395746847576e-05, "loss": 1.6988, "step": 74255 }, { "epoch": 2.47, "grad_norm": 0.7086547613143921, "learning_rate": 4.494845421848645e-05, "loss": 1.6695, "step": 74256 }, { "epoch": 2.47, "grad_norm": 0.6929861307144165, "learning_rate": 4.4942951278092124e-05, "loss": 1.7137, "step": 74257 }, { "epoch": 2.47, "grad_norm": 0.6682615280151367, "learning_rate": 4.493744864729943e-05, "loss": 1.6962, "step": 74258 }, { "epoch": 2.47, "grad_norm": 0.6995632648468018, "learning_rate": 4.4931946326115154e-05, "loss": 1.6626, "step": 74259 }, { "epoch": 2.47, "grad_norm": 0.7024694681167603, "learning_rate": 4.4926444314545906e-05, "loss": 1.7208, "step": 74260 }, { "epoch": 2.47, "grad_norm": 0.6895633935928345, "learning_rate": 4.492094261259832e-05, "loss": 1.6372, "step": 74261 }, { "epoch": 2.47, "grad_norm": 0.7107784152030945, "learning_rate": 4.491544122027909e-05, "loss": 1.7449, "step": 74262 }, { "epoch": 2.47, "grad_norm": 0.6940317749977112, "learning_rate": 4.490994013759501e-05, "loss": 1.6437, "step": 74263 }, { "epoch": 2.47, "grad_norm": 0.7023102045059204, "learning_rate": 4.490443936455271e-05, "loss": 1.6799, "step": 74264 }, { "epoch": 2.47, "grad_norm": 0.6897129416465759, "learning_rate": 4.489893890115872e-05, "loss": 1.5939, "step": 74265 }, { "epoch": 2.47, "grad_norm": 0.6835881471633911, "learning_rate": 4.489343874741993e-05, "loss": 1.6515, "step": 74266 }, { "epoch": 2.47, "grad_norm": 0.7001690864562988, "learning_rate": 4.48879389033429e-05, "loss": 1.616, "step": 74267 }, { "epoch": 2.47, "grad_norm": 0.6950677633285522, "learning_rate": 4.488243936893426e-05, "loss": 1.6949, "step": 74268 }, { "epoch": 2.47, "grad_norm": 0.6912658214569092, "learning_rate": 4.487694014420085e-05, "loss": 1.5997, "step": 74269 }, { "epoch": 2.47, "grad_norm": 0.6863226294517517, "learning_rate": 4.487144122914914e-05, "loss": 1.6167, "step": 74270 }, { "epoch": 2.47, "grad_norm": 0.7046008706092834, "learning_rate": 4.486594262378598e-05, "loss": 1.739, "step": 74271 }, { "epoch": 2.47, "grad_norm": 0.7025423049926758, "learning_rate": 4.4860444328117886e-05, "loss": 1.6521, "step": 74272 }, { "epoch": 2.47, "grad_norm": 0.7048801779747009, "learning_rate": 4.485494634215172e-05, "loss": 1.7165, "step": 74273 }, { "epoch": 2.47, "grad_norm": 0.7233113646507263, "learning_rate": 4.484944866589405e-05, "loss": 1.6128, "step": 74274 }, { "epoch": 2.47, "grad_norm": 0.7060580849647522, "learning_rate": 4.4843951299351465e-05, "loss": 1.6827, "step": 74275 }, { "epoch": 2.47, "grad_norm": 0.6840914487838745, "learning_rate": 4.483845424253083e-05, "loss": 1.6109, "step": 74276 }, { "epoch": 2.47, "grad_norm": 0.6978226900100708, "learning_rate": 4.4832957495438636e-05, "loss": 1.6257, "step": 74277 }, { "epoch": 2.47, "grad_norm": 0.6894806027412415, "learning_rate": 4.4827461058081683e-05, "loss": 1.6421, "step": 74278 }, { "epoch": 2.47, "grad_norm": 0.7014942169189453, "learning_rate": 4.4821964930466526e-05, "loss": 1.5921, "step": 74279 }, { "epoch": 2.47, "grad_norm": 0.7094899415969849, "learning_rate": 4.4816469112599995e-05, "loss": 1.6919, "step": 74280 }, { "epoch": 2.47, "grad_norm": 0.6849621534347534, "learning_rate": 4.481097360448869e-05, "loss": 1.6081, "step": 74281 }, { "epoch": 2.47, "grad_norm": 0.69864821434021, "learning_rate": 4.480547840613916e-05, "loss": 1.682, "step": 74282 }, { "epoch": 2.47, "grad_norm": 0.6969575881958008, "learning_rate": 4.4799983517558244e-05, "loss": 1.7016, "step": 74283 }, { "epoch": 2.47, "grad_norm": 0.7102214097976685, "learning_rate": 4.479448893875247e-05, "loss": 1.7579, "step": 74284 }, { "epoch": 2.47, "grad_norm": 0.68462735414505, "learning_rate": 4.4788994669728695e-05, "loss": 1.7182, "step": 74285 }, { "epoch": 2.47, "grad_norm": 0.6804274320602417, "learning_rate": 4.478350071049345e-05, "loss": 1.659, "step": 74286 }, { "epoch": 2.47, "grad_norm": 0.6848529577255249, "learning_rate": 4.477800706105336e-05, "loss": 1.6409, "step": 74287 }, { "epoch": 2.47, "grad_norm": 0.7142935395240784, "learning_rate": 4.477251372141522e-05, "loss": 1.7295, "step": 74288 }, { "epoch": 2.47, "grad_norm": 0.7045347690582275, "learning_rate": 4.4767020691585584e-05, "loss": 1.7097, "step": 74289 }, { "epoch": 2.47, "grad_norm": 0.6931570768356323, "learning_rate": 4.476152797157126e-05, "loss": 1.6754, "step": 74290 }, { "epoch": 2.47, "grad_norm": 0.7288146018981934, "learning_rate": 4.475603556137874e-05, "loss": 1.6804, "step": 74291 }, { "epoch": 2.47, "grad_norm": 0.6973071098327637, "learning_rate": 4.475054346101489e-05, "loss": 1.6933, "step": 74292 }, { "epoch": 2.47, "grad_norm": 0.6934653520584106, "learning_rate": 4.4745051670486255e-05, "loss": 1.6915, "step": 74293 }, { "epoch": 2.47, "grad_norm": 0.676592230796814, "learning_rate": 4.473956018979944e-05, "loss": 1.6841, "step": 74294 }, { "epoch": 2.47, "grad_norm": 0.6811923384666443, "learning_rate": 4.473406901896127e-05, "loss": 1.7752, "step": 74295 }, { "epoch": 2.47, "grad_norm": 0.7154631018638611, "learning_rate": 4.472857815797831e-05, "loss": 1.761, "step": 74296 }, { "epoch": 2.47, "grad_norm": 0.6994028687477112, "learning_rate": 4.4723087606857185e-05, "loss": 1.6857, "step": 74297 }, { "epoch": 2.47, "grad_norm": 0.7007637619972229, "learning_rate": 4.4717597365604626e-05, "loss": 1.742, "step": 74298 }, { "epoch": 2.47, "grad_norm": 0.6889514923095703, "learning_rate": 4.471210743422733e-05, "loss": 1.6761, "step": 74299 }, { "epoch": 2.47, "grad_norm": 0.6893786191940308, "learning_rate": 4.470661781273195e-05, "loss": 1.6488, "step": 74300 }, { "epoch": 2.47, "grad_norm": 0.6703218817710876, "learning_rate": 4.470112850112505e-05, "loss": 1.6719, "step": 74301 }, { "epoch": 2.47, "grad_norm": 0.6851358413696289, "learning_rate": 4.4695639499413424e-05, "loss": 1.71, "step": 74302 }, { "epoch": 2.47, "grad_norm": 0.7190349102020264, "learning_rate": 4.4690150807603676e-05, "loss": 1.7391, "step": 74303 }, { "epoch": 2.47, "grad_norm": 0.6868684887886047, "learning_rate": 4.4684662425702386e-05, "loss": 1.6806, "step": 74304 }, { "epoch": 2.47, "grad_norm": 0.710175633430481, "learning_rate": 4.467917435371633e-05, "loss": 1.6968, "step": 74305 }, { "epoch": 2.47, "grad_norm": 0.6946476697921753, "learning_rate": 4.4673686591652155e-05, "loss": 1.7408, "step": 74306 }, { "epoch": 2.47, "grad_norm": 0.701961100101471, "learning_rate": 4.4668199139516536e-05, "loss": 1.7174, "step": 74307 }, { "epoch": 2.47, "grad_norm": 0.6584820747375488, "learning_rate": 4.4662711997316025e-05, "loss": 1.7082, "step": 74308 }, { "epoch": 2.47, "grad_norm": 0.6965939998626709, "learning_rate": 4.465722516505745e-05, "loss": 1.669, "step": 74309 }, { "epoch": 2.47, "grad_norm": 0.6842664480209351, "learning_rate": 4.4651738642747346e-05, "loss": 1.6927, "step": 74310 }, { "epoch": 2.47, "grad_norm": 0.6673903465270996, "learning_rate": 4.464625243039237e-05, "loss": 1.7094, "step": 74311 }, { "epoch": 2.47, "grad_norm": 0.6890751123428345, "learning_rate": 4.4640766527999216e-05, "loss": 1.6303, "step": 74312 }, { "epoch": 2.47, "grad_norm": 0.6741812229156494, "learning_rate": 4.463528093557458e-05, "loss": 1.6585, "step": 74313 }, { "epoch": 2.47, "grad_norm": 0.6921855211257935, "learning_rate": 4.462979565312512e-05, "loss": 1.692, "step": 74314 }, { "epoch": 2.47, "grad_norm": 0.6955710053443909, "learning_rate": 4.462431068065737e-05, "loss": 1.6775, "step": 74315 }, { "epoch": 2.47, "grad_norm": 0.7239338755607605, "learning_rate": 4.4618826018178186e-05, "loss": 1.6634, "step": 74316 }, { "epoch": 2.47, "grad_norm": 0.708646833896637, "learning_rate": 4.461334166569407e-05, "loss": 1.704, "step": 74317 }, { "epoch": 2.47, "grad_norm": 0.6850845813751221, "learning_rate": 4.460785762321171e-05, "loss": 1.6691, "step": 74318 }, { "epoch": 2.47, "grad_norm": 0.7019765377044678, "learning_rate": 4.460237389073773e-05, "loss": 1.66, "step": 74319 }, { "epoch": 2.47, "grad_norm": 0.6671690344810486, "learning_rate": 4.459689046827893e-05, "loss": 1.6629, "step": 74320 }, { "epoch": 2.47, "grad_norm": 0.6855352520942688, "learning_rate": 4.459140735584188e-05, "loss": 1.644, "step": 74321 }, { "epoch": 2.47, "grad_norm": 0.7043916583061218, "learning_rate": 4.458592455343316e-05, "loss": 1.7185, "step": 74322 }, { "epoch": 2.47, "grad_norm": 0.6738414764404297, "learning_rate": 4.458044206105954e-05, "loss": 1.6326, "step": 74323 }, { "epoch": 2.47, "grad_norm": 0.6806114912033081, "learning_rate": 4.457495987872765e-05, "loss": 1.6475, "step": 74324 }, { "epoch": 2.47, "grad_norm": 0.6985213756561279, "learning_rate": 4.456947800644404e-05, "loss": 1.6834, "step": 74325 }, { "epoch": 2.47, "grad_norm": 0.6968833208084106, "learning_rate": 4.4563996444215544e-05, "loss": 1.7279, "step": 74326 }, { "epoch": 2.47, "grad_norm": 0.6862061619758606, "learning_rate": 4.4558515192048625e-05, "loss": 1.6926, "step": 74327 }, { "epoch": 2.47, "grad_norm": 0.6848819851875305, "learning_rate": 4.455303424995012e-05, "loss": 1.6362, "step": 74328 }, { "epoch": 2.47, "grad_norm": 0.6872492432594299, "learning_rate": 4.45475536179265e-05, "loss": 1.6504, "step": 74329 }, { "epoch": 2.47, "grad_norm": 0.679365873336792, "learning_rate": 4.454207329598458e-05, "loss": 1.6561, "step": 74330 }, { "epoch": 2.47, "grad_norm": 0.6958176493644714, "learning_rate": 4.453659328413095e-05, "loss": 1.7358, "step": 74331 }, { "epoch": 2.47, "grad_norm": 0.6817023754119873, "learning_rate": 4.4531113582372204e-05, "loss": 1.6992, "step": 74332 }, { "epoch": 2.47, "grad_norm": 0.6906964182853699, "learning_rate": 4.45256341907151e-05, "loss": 1.6918, "step": 74333 }, { "epoch": 2.47, "grad_norm": 0.6781488656997681, "learning_rate": 4.452015510916617e-05, "loss": 1.82, "step": 74334 }, { "epoch": 2.47, "grad_norm": 0.696347713470459, "learning_rate": 4.451467633773217e-05, "loss": 1.6485, "step": 74335 }, { "epoch": 2.47, "grad_norm": 0.666472315788269, "learning_rate": 4.4509197876419735e-05, "loss": 1.7179, "step": 74336 }, { "epoch": 2.47, "grad_norm": 0.7298701405525208, "learning_rate": 4.450371972523542e-05, "loss": 1.721, "step": 74337 }, { "epoch": 2.47, "grad_norm": 0.6790107488632202, "learning_rate": 4.449824188418599e-05, "loss": 1.7124, "step": 74338 }, { "epoch": 2.47, "grad_norm": 0.7020591497421265, "learning_rate": 4.449276435327801e-05, "loss": 1.7019, "step": 74339 }, { "epoch": 2.47, "grad_norm": 0.6804974675178528, "learning_rate": 4.448728713251823e-05, "loss": 1.6655, "step": 74340 }, { "epoch": 2.47, "grad_norm": 0.714304506778717, "learning_rate": 4.448181022191316e-05, "loss": 1.7863, "step": 74341 }, { "epoch": 2.47, "grad_norm": 0.7014720439910889, "learning_rate": 4.447633362146958e-05, "loss": 1.7167, "step": 74342 }, { "epoch": 2.47, "grad_norm": 0.6955552101135254, "learning_rate": 4.44708573311941e-05, "loss": 1.695, "step": 74343 }, { "epoch": 2.47, "grad_norm": 0.689976155757904, "learning_rate": 4.4465381351093264e-05, "loss": 1.6439, "step": 74344 }, { "epoch": 2.47, "grad_norm": 0.7275030016899109, "learning_rate": 4.445990568117388e-05, "loss": 1.7341, "step": 74345 }, { "epoch": 2.47, "grad_norm": 0.6927357316017151, "learning_rate": 4.445443032144253e-05, "loss": 1.7083, "step": 74346 }, { "epoch": 2.47, "grad_norm": 0.6915082335472107, "learning_rate": 4.444895527190576e-05, "loss": 1.6509, "step": 74347 }, { "epoch": 2.47, "grad_norm": 0.6793753504753113, "learning_rate": 4.444348053257032e-05, "loss": 1.714, "step": 74348 }, { "epoch": 2.47, "grad_norm": 0.7013591527938843, "learning_rate": 4.44380061034429e-05, "loss": 1.6672, "step": 74349 }, { "epoch": 2.47, "grad_norm": 0.7176346182823181, "learning_rate": 4.443253198453011e-05, "loss": 1.7177, "step": 74350 }, { "epoch": 2.47, "grad_norm": 0.6981654167175293, "learning_rate": 4.442705817583846e-05, "loss": 1.7349, "step": 74351 }, { "epoch": 2.47, "grad_norm": 0.6830736398696899, "learning_rate": 4.442158467737479e-05, "loss": 1.6795, "step": 74352 }, { "epoch": 2.47, "grad_norm": 0.6906835436820984, "learning_rate": 4.441611148914566e-05, "loss": 1.6803, "step": 74353 }, { "epoch": 2.47, "grad_norm": 0.6809419393539429, "learning_rate": 4.441063861115766e-05, "loss": 1.6511, "step": 74354 }, { "epoch": 2.47, "grad_norm": 0.70687335729599, "learning_rate": 4.440516604341745e-05, "loss": 1.6993, "step": 74355 }, { "epoch": 2.47, "grad_norm": 0.6941040754318237, "learning_rate": 4.439969378593187e-05, "loss": 1.7122, "step": 74356 }, { "epoch": 2.47, "grad_norm": 0.6728813648223877, "learning_rate": 4.4394221838707264e-05, "loss": 1.6675, "step": 74357 }, { "epoch": 2.47, "grad_norm": 0.7064695954322815, "learning_rate": 4.4388750201750415e-05, "loss": 1.658, "step": 74358 }, { "epoch": 2.47, "grad_norm": 0.6999709010124207, "learning_rate": 4.438327887506803e-05, "loss": 1.6354, "step": 74359 }, { "epoch": 2.47, "grad_norm": 0.6814534664154053, "learning_rate": 4.4377807858666646e-05, "loss": 1.6556, "step": 74360 }, { "epoch": 2.47, "grad_norm": 0.6739806532859802, "learning_rate": 4.4372337152552894e-05, "loss": 1.7162, "step": 74361 }, { "epoch": 2.47, "grad_norm": 0.7041618824005127, "learning_rate": 4.4366866756733466e-05, "loss": 1.7242, "step": 74362 }, { "epoch": 2.47, "grad_norm": 0.688989520072937, "learning_rate": 4.436139667121512e-05, "loss": 1.6943, "step": 74363 }, { "epoch": 2.47, "grad_norm": 0.682218611240387, "learning_rate": 4.4355926896004256e-05, "loss": 1.6945, "step": 74364 }, { "epoch": 2.47, "grad_norm": 0.6881435513496399, "learning_rate": 4.4350457431107634e-05, "loss": 1.6829, "step": 74365 }, { "epoch": 2.47, "grad_norm": 0.7029777765274048, "learning_rate": 4.4344988276531914e-05, "loss": 1.6965, "step": 74366 }, { "epoch": 2.47, "grad_norm": 0.6859365105628967, "learning_rate": 4.4339519432283756e-05, "loss": 1.717, "step": 74367 }, { "epoch": 2.47, "grad_norm": 0.6734259128570557, "learning_rate": 4.433405089836965e-05, "loss": 1.6803, "step": 74368 }, { "epoch": 2.47, "grad_norm": 0.6923150420188904, "learning_rate": 4.4328582674796345e-05, "loss": 1.6006, "step": 74369 }, { "epoch": 2.47, "grad_norm": 0.6819106936454773, "learning_rate": 4.432311476157061e-05, "loss": 1.6868, "step": 74370 }, { "epoch": 2.47, "grad_norm": 0.6949976682662964, "learning_rate": 4.431764715869882e-05, "loss": 1.6158, "step": 74371 }, { "epoch": 2.47, "grad_norm": 0.6999222636222839, "learning_rate": 4.4312179866187705e-05, "loss": 1.7429, "step": 74372 }, { "epoch": 2.47, "grad_norm": 0.6958011388778687, "learning_rate": 4.430671288404402e-05, "loss": 1.6893, "step": 74373 }, { "epoch": 2.47, "grad_norm": 0.6955877542495728, "learning_rate": 4.430124621227429e-05, "loss": 1.7245, "step": 74374 }, { "epoch": 2.47, "grad_norm": 0.7129119634628296, "learning_rate": 4.429577985088512e-05, "loss": 1.7307, "step": 74375 }, { "epoch": 2.47, "grad_norm": 0.6978457570075989, "learning_rate": 4.429031379988326e-05, "loss": 1.6921, "step": 74376 }, { "epoch": 2.47, "grad_norm": 0.7316572666168213, "learning_rate": 4.428484805927528e-05, "loss": 1.6287, "step": 74377 }, { "epoch": 2.47, "grad_norm": 0.7035354971885681, "learning_rate": 4.427938262906774e-05, "loss": 1.7438, "step": 74378 }, { "epoch": 2.47, "grad_norm": 0.6857966184616089, "learning_rate": 4.427391750926737e-05, "loss": 1.6561, "step": 74379 }, { "epoch": 2.47, "grad_norm": 0.6869070529937744, "learning_rate": 4.4268452699880854e-05, "loss": 1.669, "step": 74380 }, { "epoch": 2.47, "grad_norm": 0.6932207942008972, "learning_rate": 4.426298820091473e-05, "loss": 1.7519, "step": 74381 }, { "epoch": 2.47, "grad_norm": 0.6900877952575684, "learning_rate": 4.4257524012375626e-05, "loss": 1.6865, "step": 74382 }, { "epoch": 2.47, "grad_norm": 0.6811144351959229, "learning_rate": 4.4252060134270226e-05, "loss": 1.7198, "step": 74383 }, { "epoch": 2.47, "grad_norm": 0.684126615524292, "learning_rate": 4.424659656660517e-05, "loss": 1.6663, "step": 74384 }, { "epoch": 2.47, "grad_norm": 0.7078898549079895, "learning_rate": 4.424113330938702e-05, "loss": 1.6549, "step": 74385 }, { "epoch": 2.47, "grad_norm": 0.7036678194999695, "learning_rate": 4.42356703626225e-05, "loss": 1.6245, "step": 74386 }, { "epoch": 2.47, "grad_norm": 0.7073321342468262, "learning_rate": 4.42302077263181e-05, "loss": 1.7565, "step": 74387 }, { "epoch": 2.47, "grad_norm": 0.7203779220581055, "learning_rate": 4.422474540048062e-05, "loss": 1.6652, "step": 74388 }, { "epoch": 2.47, "grad_norm": 0.703735888004303, "learning_rate": 4.421928338511655e-05, "loss": 1.7479, "step": 74389 }, { "epoch": 2.47, "grad_norm": 0.6768950819969177, "learning_rate": 4.421382168023266e-05, "loss": 1.7408, "step": 74390 }, { "epoch": 2.47, "grad_norm": 0.6632136702537537, "learning_rate": 4.4208360285835475e-05, "loss": 1.6489, "step": 74391 }, { "epoch": 2.48, "grad_norm": 0.67185378074646, "learning_rate": 4.420289920193162e-05, "loss": 1.7218, "step": 74392 }, { "epoch": 2.48, "grad_norm": 0.7042063474655151, "learning_rate": 4.419743842852779e-05, "loss": 1.7006, "step": 74393 }, { "epoch": 2.48, "grad_norm": 0.6960514783859253, "learning_rate": 4.419197796563054e-05, "loss": 1.7409, "step": 74394 }, { "epoch": 2.48, "grad_norm": 0.7005904316902161, "learning_rate": 4.418651781324658e-05, "loss": 1.7009, "step": 74395 }, { "epoch": 2.48, "grad_norm": 0.6790373921394348, "learning_rate": 4.418105797138243e-05, "loss": 1.6895, "step": 74396 }, { "epoch": 2.48, "grad_norm": 0.6926205158233643, "learning_rate": 4.417559844004484e-05, "loss": 1.7153, "step": 74397 }, { "epoch": 2.48, "grad_norm": 0.6953817009925842, "learning_rate": 4.417013921924043e-05, "loss": 1.6671, "step": 74398 }, { "epoch": 2.48, "grad_norm": 0.7003867030143738, "learning_rate": 4.416468030897568e-05, "loss": 1.7033, "step": 74399 }, { "epoch": 2.48, "grad_norm": 0.689110279083252, "learning_rate": 4.415922170925737e-05, "loss": 1.7448, "step": 74400 }, { "epoch": 2.48, "grad_norm": 0.668315589427948, "learning_rate": 4.415376342009199e-05, "loss": 1.7249, "step": 74401 }, { "epoch": 2.48, "grad_norm": 0.7225974202156067, "learning_rate": 4.414830544148632e-05, "loss": 1.7209, "step": 74402 }, { "epoch": 2.48, "grad_norm": 0.6969377994537354, "learning_rate": 4.414284777344692e-05, "loss": 1.7301, "step": 74403 }, { "epoch": 2.48, "grad_norm": 0.6746922731399536, "learning_rate": 4.4137390415980355e-05, "loss": 1.668, "step": 74404 }, { "epoch": 2.48, "grad_norm": 0.6911203861236572, "learning_rate": 4.413193336909332e-05, "loss": 1.6262, "step": 74405 }, { "epoch": 2.48, "grad_norm": 0.6955905556678772, "learning_rate": 4.4126476632792375e-05, "loss": 1.7009, "step": 74406 }, { "epoch": 2.48, "grad_norm": 0.6761317849159241, "learning_rate": 4.4121020207084276e-05, "loss": 1.7037, "step": 74407 }, { "epoch": 2.48, "grad_norm": 0.6852918267250061, "learning_rate": 4.411556409197546e-05, "loss": 1.6729, "step": 74408 }, { "epoch": 2.48, "grad_norm": 0.7032850980758667, "learning_rate": 4.411010828747272e-05, "loss": 1.7348, "step": 74409 }, { "epoch": 2.48, "grad_norm": 0.6998066306114197, "learning_rate": 4.410465279358262e-05, "loss": 1.7741, "step": 74410 }, { "epoch": 2.48, "grad_norm": 0.7304936647415161, "learning_rate": 4.409919761031168e-05, "loss": 1.6527, "step": 74411 }, { "epoch": 2.48, "grad_norm": 0.6938236951828003, "learning_rate": 4.409374273766666e-05, "loss": 1.6311, "step": 74412 }, { "epoch": 2.48, "grad_norm": 0.6874735951423645, "learning_rate": 4.4088288175654165e-05, "loss": 1.7262, "step": 74413 }, { "epoch": 2.48, "grad_norm": 0.6769112348556519, "learning_rate": 4.408283392428068e-05, "loss": 1.6835, "step": 74414 }, { "epoch": 2.48, "grad_norm": 0.690988302230835, "learning_rate": 4.4077379983552976e-05, "loss": 1.6905, "step": 74415 }, { "epoch": 2.48, "grad_norm": 0.6755109429359436, "learning_rate": 4.4071926353477635e-05, "loss": 1.661, "step": 74416 }, { "epoch": 2.48, "grad_norm": 0.6680095195770264, "learning_rate": 4.40664730340613e-05, "loss": 1.7614, "step": 74417 }, { "epoch": 2.48, "grad_norm": 0.6935470700263977, "learning_rate": 4.406102002531049e-05, "loss": 1.6491, "step": 74418 }, { "epoch": 2.48, "grad_norm": 0.6850983500480652, "learning_rate": 4.4055567327231964e-05, "loss": 1.7511, "step": 74419 }, { "epoch": 2.48, "grad_norm": 0.7093292474746704, "learning_rate": 4.405011493983226e-05, "loss": 1.7495, "step": 74420 }, { "epoch": 2.48, "grad_norm": 0.6757108569145203, "learning_rate": 4.404466286311793e-05, "loss": 1.6721, "step": 74421 }, { "epoch": 2.48, "grad_norm": 0.6904865503311157, "learning_rate": 4.403921109709567e-05, "loss": 1.6571, "step": 74422 }, { "epoch": 2.48, "grad_norm": 0.7057884931564331, "learning_rate": 4.403375964177215e-05, "loss": 1.6444, "step": 74423 }, { "epoch": 2.48, "grad_norm": 0.6696875691413879, "learning_rate": 4.4028308497153956e-05, "loss": 1.6949, "step": 74424 }, { "epoch": 2.48, "grad_norm": 0.6800386309623718, "learning_rate": 4.402285766324758e-05, "loss": 1.6388, "step": 74425 }, { "epoch": 2.48, "grad_norm": 0.6989424228668213, "learning_rate": 4.401740714005982e-05, "loss": 1.7369, "step": 74426 }, { "epoch": 2.48, "grad_norm": 0.6720625162124634, "learning_rate": 4.4011956927597217e-05, "loss": 1.6455, "step": 74427 }, { "epoch": 2.48, "grad_norm": 0.6999202370643616, "learning_rate": 4.400650702586631e-05, "loss": 1.7176, "step": 74428 }, { "epoch": 2.48, "grad_norm": 0.66957026720047, "learning_rate": 4.400105743487378e-05, "loss": 1.6651, "step": 74429 }, { "epoch": 2.48, "grad_norm": 0.6743426322937012, "learning_rate": 4.39956081546263e-05, "loss": 1.6483, "step": 74430 }, { "epoch": 2.48, "grad_norm": 0.6979420185089111, "learning_rate": 4.399015918513046e-05, "loss": 1.7533, "step": 74431 }, { "epoch": 2.48, "grad_norm": 0.6872389316558838, "learning_rate": 4.3984710526392765e-05, "loss": 1.7514, "step": 74432 }, { "epoch": 2.48, "grad_norm": 0.6908866167068481, "learning_rate": 4.3979262178419986e-05, "loss": 1.7207, "step": 74433 }, { "epoch": 2.48, "grad_norm": 0.7004941701889038, "learning_rate": 4.397381414121864e-05, "loss": 1.6783, "step": 74434 }, { "epoch": 2.48, "grad_norm": 0.691257655620575, "learning_rate": 4.396836641479531e-05, "loss": 1.5727, "step": 74435 }, { "epoch": 2.48, "grad_norm": 0.6833360195159912, "learning_rate": 4.396291899915664e-05, "loss": 1.585, "step": 74436 }, { "epoch": 2.48, "grad_norm": 0.6758785247802734, "learning_rate": 4.395747189430936e-05, "loss": 1.6643, "step": 74437 }, { "epoch": 2.48, "grad_norm": 0.7069072723388672, "learning_rate": 4.3952025100259967e-05, "loss": 1.6074, "step": 74438 }, { "epoch": 2.48, "grad_norm": 0.7042657732963562, "learning_rate": 4.394657861701503e-05, "loss": 1.7135, "step": 74439 }, { "epoch": 2.48, "grad_norm": 0.7363393902778625, "learning_rate": 4.394113244458126e-05, "loss": 1.7022, "step": 74440 }, { "epoch": 2.48, "grad_norm": 0.7376894950866699, "learning_rate": 4.393568658296524e-05, "loss": 1.7154, "step": 74441 }, { "epoch": 2.48, "grad_norm": 0.687728226184845, "learning_rate": 4.393024103217351e-05, "loss": 1.6707, "step": 74442 }, { "epoch": 2.48, "grad_norm": 0.6702051758766174, "learning_rate": 4.3924795792212775e-05, "loss": 1.7012, "step": 74443 }, { "epoch": 2.48, "grad_norm": 0.7014870643615723, "learning_rate": 4.391935086308956e-05, "loss": 1.6486, "step": 74444 }, { "epoch": 2.48, "grad_norm": 0.6778780221939087, "learning_rate": 4.3913906244810604e-05, "loss": 1.6782, "step": 74445 }, { "epoch": 2.48, "grad_norm": 0.6777403354644775, "learning_rate": 4.3908461937382355e-05, "loss": 1.6638, "step": 74446 }, { "epoch": 2.48, "grad_norm": 0.6873705983161926, "learning_rate": 4.3903017940811546e-05, "loss": 1.6445, "step": 74447 }, { "epoch": 2.48, "grad_norm": 2.3308358192443848, "learning_rate": 4.3897574255104775e-05, "loss": 1.7892, "step": 74448 }, { "epoch": 2.48, "grad_norm": 0.6759690642356873, "learning_rate": 4.389213088026853e-05, "loss": 1.6633, "step": 74449 }, { "epoch": 2.48, "grad_norm": 0.7137657999992371, "learning_rate": 4.388668781630957e-05, "loss": 1.7161, "step": 74450 }, { "epoch": 2.48, "grad_norm": 0.729878306388855, "learning_rate": 4.388124506323437e-05, "loss": 1.7901, "step": 74451 }, { "epoch": 2.48, "grad_norm": 0.6940901875495911, "learning_rate": 4.387580262104965e-05, "loss": 1.6673, "step": 74452 }, { "epoch": 2.48, "grad_norm": 0.6887263655662537, "learning_rate": 4.387036048976197e-05, "loss": 1.67, "step": 74453 }, { "epoch": 2.48, "grad_norm": 0.6969006657600403, "learning_rate": 4.386491866937789e-05, "loss": 1.762, "step": 74454 }, { "epoch": 2.48, "grad_norm": 0.690214991569519, "learning_rate": 4.385947715990411e-05, "loss": 1.6742, "step": 74455 }, { "epoch": 2.48, "grad_norm": 0.7020156979560852, "learning_rate": 4.3854035961347124e-05, "loss": 1.7286, "step": 74456 }, { "epoch": 2.48, "grad_norm": 0.6905501484870911, "learning_rate": 4.384859507371366e-05, "loss": 1.7026, "step": 74457 }, { "epoch": 2.48, "grad_norm": 0.6978930234909058, "learning_rate": 4.3843154497010204e-05, "loss": 1.6825, "step": 74458 }, { "epoch": 2.48, "grad_norm": 0.6941283941268921, "learning_rate": 4.383771423124346e-05, "loss": 1.7211, "step": 74459 }, { "epoch": 2.48, "grad_norm": 0.6820459961891174, "learning_rate": 4.383227427642002e-05, "loss": 1.6987, "step": 74460 }, { "epoch": 2.48, "grad_norm": 0.683557391166687, "learning_rate": 4.382683463254635e-05, "loss": 1.6156, "step": 74461 }, { "epoch": 2.48, "grad_norm": 0.6844363212585449, "learning_rate": 4.382139529962924e-05, "loss": 1.6546, "step": 74462 }, { "epoch": 2.48, "grad_norm": 0.6930646300315857, "learning_rate": 4.381595627767521e-05, "loss": 1.6893, "step": 74463 }, { "epoch": 2.48, "grad_norm": 0.7049893140792847, "learning_rate": 4.381051756669081e-05, "loss": 1.6879, "step": 74464 }, { "epoch": 2.48, "grad_norm": 0.6933095455169678, "learning_rate": 4.3805079166682675e-05, "loss": 1.7257, "step": 74465 }, { "epoch": 2.48, "grad_norm": 0.7059147357940674, "learning_rate": 4.379964107765749e-05, "loss": 1.6554, "step": 74466 }, { "epoch": 2.48, "grad_norm": 0.700141966342926, "learning_rate": 4.37942032996218e-05, "loss": 1.7477, "step": 74467 }, { "epoch": 2.48, "grad_norm": 0.6892532110214233, "learning_rate": 4.378876583258214e-05, "loss": 1.7938, "step": 74468 }, { "epoch": 2.48, "grad_norm": 0.706402063369751, "learning_rate": 4.3783328676545206e-05, "loss": 1.6958, "step": 74469 }, { "epoch": 2.48, "grad_norm": 0.6874583959579468, "learning_rate": 4.377789183151759e-05, "loss": 1.7487, "step": 74470 }, { "epoch": 2.48, "grad_norm": 0.6868686079978943, "learning_rate": 4.377245529750578e-05, "loss": 1.6748, "step": 74471 }, { "epoch": 2.48, "grad_norm": 0.6752592921257019, "learning_rate": 4.376701907451645e-05, "loss": 1.6413, "step": 74472 }, { "epoch": 2.48, "grad_norm": 0.6868674159049988, "learning_rate": 4.3761583162556354e-05, "loss": 1.6923, "step": 74473 }, { "epoch": 2.48, "grad_norm": 0.6899046897888184, "learning_rate": 4.375614756163179e-05, "loss": 1.7031, "step": 74474 }, { "epoch": 2.48, "grad_norm": 0.712533712387085, "learning_rate": 4.375071227174952e-05, "loss": 1.6736, "step": 74475 }, { "epoch": 2.48, "grad_norm": 0.6902978420257568, "learning_rate": 4.374527729291617e-05, "loss": 1.7106, "step": 74476 }, { "epoch": 2.48, "grad_norm": 0.6851264834403992, "learning_rate": 4.3739842625138334e-05, "loss": 1.6331, "step": 74477 }, { "epoch": 2.48, "grad_norm": 0.7254527807235718, "learning_rate": 4.373440826842247e-05, "loss": 1.7252, "step": 74478 }, { "epoch": 2.48, "grad_norm": 0.6751284599304199, "learning_rate": 4.372897422277531e-05, "loss": 1.7011, "step": 74479 }, { "epoch": 2.48, "grad_norm": 0.721937358379364, "learning_rate": 4.3723540488203524e-05, "loss": 1.6877, "step": 74480 }, { "epoch": 2.48, "grad_norm": 0.7195984125137329, "learning_rate": 4.3718107064713456e-05, "loss": 1.6784, "step": 74481 }, { "epoch": 2.48, "grad_norm": 0.6751588582992554, "learning_rate": 4.3712673952311884e-05, "loss": 1.5449, "step": 74482 }, { "epoch": 2.48, "grad_norm": 0.6768810153007507, "learning_rate": 4.370724115100539e-05, "loss": 1.7317, "step": 74483 }, { "epoch": 2.48, "grad_norm": 0.6792653203010559, "learning_rate": 4.3701808660800574e-05, "loss": 1.6789, "step": 74484 }, { "epoch": 2.48, "grad_norm": 0.6966850757598877, "learning_rate": 4.36963764817039e-05, "loss": 1.7262, "step": 74485 }, { "epoch": 2.48, "grad_norm": 0.7171061635017395, "learning_rate": 4.369094461372209e-05, "loss": 1.6533, "step": 74486 }, { "epoch": 2.48, "grad_norm": 0.6966354846954346, "learning_rate": 4.368551305686184e-05, "loss": 1.6163, "step": 74487 }, { "epoch": 2.48, "grad_norm": 0.705304741859436, "learning_rate": 4.3680081811129454e-05, "loss": 1.6903, "step": 74488 }, { "epoch": 2.48, "grad_norm": 0.6876192688941956, "learning_rate": 4.367465087653172e-05, "loss": 1.7239, "step": 74489 }, { "epoch": 2.48, "grad_norm": 0.681367814540863, "learning_rate": 4.366922025307522e-05, "loss": 1.6326, "step": 74490 }, { "epoch": 2.48, "grad_norm": 0.6953748464584351, "learning_rate": 4.366378994076654e-05, "loss": 1.7019, "step": 74491 }, { "epoch": 2.48, "grad_norm": 0.6816215515136719, "learning_rate": 4.3658359939612154e-05, "loss": 1.7267, "step": 74492 }, { "epoch": 2.48, "grad_norm": 0.6780269742012024, "learning_rate": 4.365293024961883e-05, "loss": 1.66, "step": 74493 }, { "epoch": 2.48, "grad_norm": 0.7088049650192261, "learning_rate": 4.364750087079311e-05, "loss": 1.8129, "step": 74494 }, { "epoch": 2.48, "grad_norm": 0.702677845954895, "learning_rate": 4.364207180314143e-05, "loss": 1.6648, "step": 74495 }, { "epoch": 2.48, "grad_norm": 0.6892721056938171, "learning_rate": 4.363664304667054e-05, "loss": 1.6421, "step": 74496 }, { "epoch": 2.48, "grad_norm": 0.69365394115448, "learning_rate": 4.363121460138705e-05, "loss": 1.6747, "step": 74497 }, { "epoch": 2.48, "grad_norm": 0.6791548132896423, "learning_rate": 4.3625786467297506e-05, "loss": 1.6361, "step": 74498 }, { "epoch": 2.48, "grad_norm": 0.6967013478279114, "learning_rate": 4.3620358644408394e-05, "loss": 1.6886, "step": 74499 }, { "epoch": 2.48, "grad_norm": 0.6754615902900696, "learning_rate": 4.361493113272647e-05, "loss": 1.7087, "step": 74500 }, { "epoch": 2.48, "grad_norm": 0.6857653260231018, "learning_rate": 4.3609503932258236e-05, "loss": 1.6462, "step": 74501 }, { "epoch": 2.48, "grad_norm": 0.6708594560623169, "learning_rate": 4.360407704301022e-05, "loss": 1.665, "step": 74502 }, { "epoch": 2.48, "grad_norm": 0.7485147714614868, "learning_rate": 4.359865046498915e-05, "loss": 1.7368, "step": 74503 }, { "epoch": 2.48, "grad_norm": 0.6884629726409912, "learning_rate": 4.359322419820145e-05, "loss": 1.7019, "step": 74504 }, { "epoch": 2.48, "grad_norm": 0.7084944844245911, "learning_rate": 4.358779824265388e-05, "loss": 1.7154, "step": 74505 }, { "epoch": 2.48, "grad_norm": 0.6945330500602722, "learning_rate": 4.358237259835288e-05, "loss": 1.6574, "step": 74506 }, { "epoch": 2.48, "grad_norm": 0.6887192130088806, "learning_rate": 4.357694726530516e-05, "loss": 1.7351, "step": 74507 }, { "epoch": 2.48, "grad_norm": 0.7023609280586243, "learning_rate": 4.3571522243517264e-05, "loss": 1.6719, "step": 74508 }, { "epoch": 2.48, "grad_norm": 0.6715026497840881, "learning_rate": 4.3566097532995646e-05, "loss": 1.6921, "step": 74509 }, { "epoch": 2.48, "grad_norm": 0.6541599035263062, "learning_rate": 4.35606731337471e-05, "loss": 1.6352, "step": 74510 }, { "epoch": 2.48, "grad_norm": 0.7078445553779602, "learning_rate": 4.3555249045778065e-05, "loss": 1.7701, "step": 74511 }, { "epoch": 2.48, "grad_norm": 0.7017509341239929, "learning_rate": 4.354982526909519e-05, "loss": 1.6279, "step": 74512 }, { "epoch": 2.48, "grad_norm": 0.7081857323646545, "learning_rate": 4.354440180370506e-05, "loss": 1.728, "step": 74513 }, { "epoch": 2.48, "grad_norm": 0.7036943435668945, "learning_rate": 4.3538978649614196e-05, "loss": 1.661, "step": 74514 }, { "epoch": 2.48, "grad_norm": 0.6935558319091797, "learning_rate": 4.353355580682928e-05, "loss": 1.7629, "step": 74515 }, { "epoch": 2.48, "grad_norm": 0.6996294260025024, "learning_rate": 4.3528133275356734e-05, "loss": 1.6491, "step": 74516 }, { "epoch": 2.48, "grad_norm": 0.6806539297103882, "learning_rate": 4.3522711055203366e-05, "loss": 1.6462, "step": 74517 }, { "epoch": 2.48, "grad_norm": 0.6940056681632996, "learning_rate": 4.351728914637552e-05, "loss": 1.6435, "step": 74518 }, { "epoch": 2.48, "grad_norm": 0.6714398860931396, "learning_rate": 4.351186754888001e-05, "loss": 1.6952, "step": 74519 }, { "epoch": 2.48, "grad_norm": 0.6842097043991089, "learning_rate": 4.350644626272328e-05, "loss": 1.7058, "step": 74520 }, { "epoch": 2.48, "grad_norm": 0.6951645016670227, "learning_rate": 4.350102528791184e-05, "loss": 1.7744, "step": 74521 }, { "epoch": 2.48, "grad_norm": 0.7115108966827393, "learning_rate": 4.349560462445247e-05, "loss": 1.709, "step": 74522 }, { "epoch": 2.48, "grad_norm": 0.6904138922691345, "learning_rate": 4.349018427235155e-05, "loss": 1.6966, "step": 74523 }, { "epoch": 2.48, "grad_norm": 0.7060960531234741, "learning_rate": 4.348476423161582e-05, "loss": 1.7083, "step": 74524 }, { "epoch": 2.48, "grad_norm": 0.7063333988189697, "learning_rate": 4.3479344502251726e-05, "loss": 1.6657, "step": 74525 }, { "epoch": 2.48, "grad_norm": 0.6834406852722168, "learning_rate": 4.347392508426598e-05, "loss": 1.6959, "step": 74526 }, { "epoch": 2.48, "grad_norm": 0.7000603675842285, "learning_rate": 4.346850597766509e-05, "loss": 1.6835, "step": 74527 }, { "epoch": 2.48, "grad_norm": 0.6910738348960876, "learning_rate": 4.346308718245558e-05, "loss": 1.6642, "step": 74528 }, { "epoch": 2.48, "grad_norm": 0.6792517304420471, "learning_rate": 4.345766869864415e-05, "loss": 1.7235, "step": 74529 }, { "epoch": 2.48, "grad_norm": 0.7121409177780151, "learning_rate": 4.345225052623728e-05, "loss": 1.6501, "step": 74530 }, { "epoch": 2.48, "grad_norm": 0.6804837584495544, "learning_rate": 4.3446832665241536e-05, "loss": 1.6906, "step": 74531 }, { "epoch": 2.48, "grad_norm": 0.7140771746635437, "learning_rate": 4.344141511566352e-05, "loss": 1.6813, "step": 74532 }, { "epoch": 2.48, "grad_norm": 0.7043910622596741, "learning_rate": 4.34359978775099e-05, "loss": 1.6054, "step": 74533 }, { "epoch": 2.48, "grad_norm": 0.7242224216461182, "learning_rate": 4.343058095078719e-05, "loss": 1.6568, "step": 74534 }, { "epoch": 2.48, "grad_norm": 0.6617411971092224, "learning_rate": 4.3425164335501884e-05, "loss": 1.7185, "step": 74535 }, { "epoch": 2.48, "grad_norm": 0.6712237000465393, "learning_rate": 4.341974803166065e-05, "loss": 1.6063, "step": 74536 }, { "epoch": 2.48, "grad_norm": 0.6931830048561096, "learning_rate": 4.3414332039270083e-05, "loss": 1.756, "step": 74537 }, { "epoch": 2.48, "grad_norm": 0.7323886156082153, "learning_rate": 4.340891635833661e-05, "loss": 1.7643, "step": 74538 }, { "epoch": 2.48, "grad_norm": 0.7012863159179688, "learning_rate": 4.3403500988866926e-05, "loss": 1.6446, "step": 74539 }, { "epoch": 2.48, "grad_norm": 0.6817016005516052, "learning_rate": 4.339808593086765e-05, "loss": 1.7307, "step": 74540 }, { "epoch": 2.48, "grad_norm": 0.6991035342216492, "learning_rate": 4.33926711843453e-05, "loss": 1.737, "step": 74541 }, { "epoch": 2.48, "grad_norm": 0.6903989911079407, "learning_rate": 4.338725674930634e-05, "loss": 1.6979, "step": 74542 }, { "epoch": 2.48, "grad_norm": 0.6829936504364014, "learning_rate": 4.338184262575752e-05, "loss": 1.6681, "step": 74543 }, { "epoch": 2.48, "grad_norm": 0.6975448727607727, "learning_rate": 4.3376428813705335e-05, "loss": 1.6887, "step": 74544 }, { "epoch": 2.48, "grad_norm": 0.7012113332748413, "learning_rate": 4.3371015313156296e-05, "loss": 1.6749, "step": 74545 }, { "epoch": 2.48, "grad_norm": 0.6595067381858826, "learning_rate": 4.3365602124117014e-05, "loss": 1.648, "step": 74546 }, { "epoch": 2.48, "grad_norm": 0.6638880968093872, "learning_rate": 4.336018924659415e-05, "loss": 1.5838, "step": 74547 }, { "epoch": 2.48, "grad_norm": 0.7062906622886658, "learning_rate": 4.3354776680594214e-05, "loss": 1.705, "step": 74548 }, { "epoch": 2.48, "grad_norm": 0.7086175084114075, "learning_rate": 4.334936442612369e-05, "loss": 1.6247, "step": 74549 }, { "epoch": 2.48, "grad_norm": 0.7000487446784973, "learning_rate": 4.334395248318929e-05, "loss": 1.6909, "step": 74550 }, { "epoch": 2.48, "grad_norm": 0.6901229619979858, "learning_rate": 4.333854085179752e-05, "loss": 1.6469, "step": 74551 }, { "epoch": 2.48, "grad_norm": 0.7002785205841064, "learning_rate": 4.333312953195487e-05, "loss": 1.7162, "step": 74552 }, { "epoch": 2.48, "grad_norm": 0.7001365423202515, "learning_rate": 4.3327718523667967e-05, "loss": 1.776, "step": 74553 }, { "epoch": 2.48, "grad_norm": 0.7225643992424011, "learning_rate": 4.3322307826943504e-05, "loss": 1.7365, "step": 74554 }, { "epoch": 2.48, "grad_norm": 0.6729978322982788, "learning_rate": 4.331689744178791e-05, "loss": 1.6094, "step": 74555 }, { "epoch": 2.48, "grad_norm": 0.7316743731498718, "learning_rate": 4.331148736820772e-05, "loss": 1.7417, "step": 74556 }, { "epoch": 2.48, "grad_norm": 0.6843140125274658, "learning_rate": 4.330607760620962e-05, "loss": 1.624, "step": 74557 }, { "epoch": 2.48, "grad_norm": 0.6763534545898438, "learning_rate": 4.330066815580011e-05, "loss": 1.6111, "step": 74558 }, { "epoch": 2.48, "grad_norm": 0.7037004828453064, "learning_rate": 4.3295259016985714e-05, "loss": 1.7111, "step": 74559 }, { "epoch": 2.48, "grad_norm": 0.6967387199401855, "learning_rate": 4.3289850189773134e-05, "loss": 1.6811, "step": 74560 }, { "epoch": 2.48, "grad_norm": 0.6779856085777283, "learning_rate": 4.328444167416879e-05, "loss": 1.7001, "step": 74561 }, { "epoch": 2.48, "grad_norm": 0.7011392116546631, "learning_rate": 4.327903347017935e-05, "loss": 1.6973, "step": 74562 }, { "epoch": 2.48, "grad_norm": 0.689054548740387, "learning_rate": 4.3273625577811264e-05, "loss": 1.654, "step": 74563 }, { "epoch": 2.48, "grad_norm": 0.701120913028717, "learning_rate": 4.326821799707124e-05, "loss": 1.7715, "step": 74564 }, { "epoch": 2.48, "grad_norm": 0.6886553168296814, "learning_rate": 4.32628107279658e-05, "loss": 1.699, "step": 74565 }, { "epoch": 2.48, "grad_norm": 0.6985970139503479, "learning_rate": 4.325740377050138e-05, "loss": 1.6703, "step": 74566 }, { "epoch": 2.48, "grad_norm": 0.6928698420524597, "learning_rate": 4.3251997124684724e-05, "loss": 1.6652, "step": 74567 }, { "epoch": 2.48, "grad_norm": 0.6846022605895996, "learning_rate": 4.324659079052224e-05, "loss": 1.771, "step": 74568 }, { "epoch": 2.48, "grad_norm": 0.7048583626747131, "learning_rate": 4.3241184768020655e-05, "loss": 1.6972, "step": 74569 }, { "epoch": 2.48, "grad_norm": 0.6979902982711792, "learning_rate": 4.3235779057186426e-05, "loss": 1.7436, "step": 74570 }, { "epoch": 2.48, "grad_norm": 0.6727291941642761, "learning_rate": 4.3230373658026054e-05, "loss": 1.7244, "step": 74571 }, { "epoch": 2.48, "grad_norm": 0.6782350540161133, "learning_rate": 4.322496857054623e-05, "loss": 1.6418, "step": 74572 }, { "epoch": 2.48, "grad_norm": 0.6774172186851501, "learning_rate": 4.321956379475341e-05, "loss": 1.6876, "step": 74573 }, { "epoch": 2.48, "grad_norm": 0.6811506152153015, "learning_rate": 4.3214159330654264e-05, "loss": 1.5867, "step": 74574 }, { "epoch": 2.48, "grad_norm": 0.7076746821403503, "learning_rate": 4.320875517825525e-05, "loss": 1.7294, "step": 74575 }, { "epoch": 2.48, "grad_norm": 0.7098850607872009, "learning_rate": 4.320335133756303e-05, "loss": 1.7, "step": 74576 }, { "epoch": 2.48, "grad_norm": 0.7043958902359009, "learning_rate": 4.3197947808584066e-05, "loss": 1.6279, "step": 74577 }, { "epoch": 2.48, "grad_norm": 0.6794654726982117, "learning_rate": 4.319254459132492e-05, "loss": 1.6758, "step": 74578 }, { "epoch": 2.48, "grad_norm": 0.67529296875, "learning_rate": 4.318714168579225e-05, "loss": 1.7223, "step": 74579 }, { "epoch": 2.48, "grad_norm": 0.6737878918647766, "learning_rate": 4.318173909199255e-05, "loss": 1.6281, "step": 74580 }, { "epoch": 2.48, "grad_norm": 0.6934783458709717, "learning_rate": 4.317633680993232e-05, "loss": 1.6725, "step": 74581 }, { "epoch": 2.48, "grad_norm": 0.7053713798522949, "learning_rate": 4.317093483961815e-05, "loss": 1.6823, "step": 74582 }, { "epoch": 2.48, "grad_norm": 0.7091491222381592, "learning_rate": 4.316553318105671e-05, "loss": 1.7554, "step": 74583 }, { "epoch": 2.48, "grad_norm": 0.6999031901359558, "learning_rate": 4.3160131834254485e-05, "loss": 1.7456, "step": 74584 }, { "epoch": 2.48, "grad_norm": 0.6911062002182007, "learning_rate": 4.315473079921794e-05, "loss": 1.7361, "step": 74585 }, { "epoch": 2.48, "grad_norm": 0.6934034824371338, "learning_rate": 4.314933007595377e-05, "loss": 1.7139, "step": 74586 }, { "epoch": 2.48, "grad_norm": 0.6805646419525146, "learning_rate": 4.314392966446844e-05, "loss": 1.7439, "step": 74587 }, { "epoch": 2.48, "grad_norm": 0.6918418407440186, "learning_rate": 4.31385295647685e-05, "loss": 1.6359, "step": 74588 }, { "epoch": 2.48, "grad_norm": 0.6837663054466248, "learning_rate": 4.313312977686052e-05, "loss": 1.6779, "step": 74589 }, { "epoch": 2.48, "grad_norm": 0.7083439230918884, "learning_rate": 4.312773030075123e-05, "loss": 1.7478, "step": 74590 }, { "epoch": 2.48, "grad_norm": 0.6936283111572266, "learning_rate": 4.312233113644689e-05, "loss": 1.6773, "step": 74591 }, { "epoch": 2.48, "grad_norm": 0.7185112237930298, "learning_rate": 4.3116932283954156e-05, "loss": 1.7358, "step": 74592 }, { "epoch": 2.48, "grad_norm": 0.68694669008255, "learning_rate": 4.3111533743279725e-05, "loss": 1.686, "step": 74593 }, { "epoch": 2.48, "grad_norm": 0.6915079355239868, "learning_rate": 4.310613551443003e-05, "loss": 1.6956, "step": 74594 }, { "epoch": 2.48, "grad_norm": 0.6994206309318542, "learning_rate": 4.3100737597411525e-05, "loss": 1.7358, "step": 74595 }, { "epoch": 2.48, "grad_norm": 0.6987661719322205, "learning_rate": 4.3095339992230916e-05, "loss": 1.6552, "step": 74596 }, { "epoch": 2.48, "grad_norm": 0.6743192672729492, "learning_rate": 4.308994269889485e-05, "loss": 1.6954, "step": 74597 }, { "epoch": 2.48, "grad_norm": 0.711143434047699, "learning_rate": 4.3084545717409605e-05, "loss": 1.7296, "step": 74598 }, { "epoch": 2.48, "grad_norm": 0.6845039129257202, "learning_rate": 4.3079149047781826e-05, "loss": 1.6396, "step": 74599 }, { "epoch": 2.48, "grad_norm": 0.700143575668335, "learning_rate": 4.3073752690018216e-05, "loss": 1.6997, "step": 74600 }, { "epoch": 2.48, "grad_norm": 0.6955525279045105, "learning_rate": 4.3068356644125204e-05, "loss": 1.7383, "step": 74601 }, { "epoch": 2.48, "grad_norm": 0.7071225643157959, "learning_rate": 4.3062960910109276e-05, "loss": 1.7209, "step": 74602 }, { "epoch": 2.48, "grad_norm": 0.690364420413971, "learning_rate": 4.305756548797704e-05, "loss": 1.5762, "step": 74603 }, { "epoch": 2.48, "grad_norm": 0.6876898407936096, "learning_rate": 4.3052170377735216e-05, "loss": 1.702, "step": 74604 }, { "epoch": 2.48, "grad_norm": 0.6930354833602905, "learning_rate": 4.3046775579390066e-05, "loss": 1.6862, "step": 74605 }, { "epoch": 2.48, "grad_norm": 0.6764232516288757, "learning_rate": 4.304138109294826e-05, "loss": 1.6715, "step": 74606 }, { "epoch": 2.48, "grad_norm": 0.7045423984527588, "learning_rate": 4.303598691841644e-05, "loss": 1.6906, "step": 74607 }, { "epoch": 2.48, "grad_norm": 0.6870086193084717, "learning_rate": 4.3030593055801055e-05, "loss": 1.6937, "step": 74608 }, { "epoch": 2.48, "grad_norm": 0.6745772361755371, "learning_rate": 4.302519950510859e-05, "loss": 1.6761, "step": 74609 }, { "epoch": 2.48, "grad_norm": 0.7167269587516785, "learning_rate": 4.301980626634577e-05, "loss": 1.7483, "step": 74610 }, { "epoch": 2.48, "grad_norm": 0.7079011797904968, "learning_rate": 4.3014413339518996e-05, "loss": 1.6671, "step": 74611 }, { "epoch": 2.48, "grad_norm": 0.720862865447998, "learning_rate": 4.3009020724634825e-05, "loss": 1.5555, "step": 74612 }, { "epoch": 2.48, "grad_norm": 0.6959152221679688, "learning_rate": 4.300362842169982e-05, "loss": 1.7077, "step": 74613 }, { "epoch": 2.48, "grad_norm": 0.6790532469749451, "learning_rate": 4.2998236430720614e-05, "loss": 1.6711, "step": 74614 }, { "epoch": 2.48, "grad_norm": 0.6863207221031189, "learning_rate": 4.299284475170369e-05, "loss": 1.7502, "step": 74615 }, { "epoch": 2.48, "grad_norm": 0.6657809019088745, "learning_rate": 4.298745338465552e-05, "loss": 1.6086, "step": 74616 }, { "epoch": 2.48, "grad_norm": 0.7044779658317566, "learning_rate": 4.298206232958277e-05, "loss": 1.6843, "step": 74617 }, { "epoch": 2.48, "grad_norm": 0.6983675956726074, "learning_rate": 4.2976671586491954e-05, "loss": 1.6096, "step": 74618 }, { "epoch": 2.48, "grad_norm": 0.6881058216094971, "learning_rate": 4.297128115538947e-05, "loss": 1.695, "step": 74619 }, { "epoch": 2.48, "grad_norm": 0.7151364684104919, "learning_rate": 4.2965891036282084e-05, "loss": 1.7646, "step": 74620 }, { "epoch": 2.48, "grad_norm": 0.6808916926383972, "learning_rate": 4.296050122917616e-05, "loss": 1.6815, "step": 74621 }, { "epoch": 2.48, "grad_norm": 0.6893199682235718, "learning_rate": 4.2955111734078354e-05, "loss": 1.6771, "step": 74622 }, { "epoch": 2.48, "grad_norm": 0.7071474194526672, "learning_rate": 4.294972255099512e-05, "loss": 1.7374, "step": 74623 }, { "epoch": 2.48, "grad_norm": 0.7149102687835693, "learning_rate": 4.294433367993314e-05, "loss": 1.6732, "step": 74624 }, { "epoch": 2.48, "grad_norm": 0.6932750940322876, "learning_rate": 4.293894512089883e-05, "loss": 1.6697, "step": 74625 }, { "epoch": 2.48, "grad_norm": 0.6904401779174805, "learning_rate": 4.293355687389871e-05, "loss": 1.6456, "step": 74626 }, { "epoch": 2.48, "grad_norm": 0.6855463981628418, "learning_rate": 4.292816893893946e-05, "loss": 1.6886, "step": 74627 }, { "epoch": 2.48, "grad_norm": 0.724812388420105, "learning_rate": 4.2922781316027454e-05, "loss": 1.7198, "step": 74628 }, { "epoch": 2.48, "grad_norm": 0.6795299649238586, "learning_rate": 4.291739400516937e-05, "loss": 1.7123, "step": 74629 }, { "epoch": 2.48, "grad_norm": 0.7092992663383484, "learning_rate": 4.29120070063717e-05, "loss": 1.6703, "step": 74630 }, { "epoch": 2.48, "grad_norm": 0.6879984140396118, "learning_rate": 4.2906620319640895e-05, "loss": 1.7527, "step": 74631 }, { "epoch": 2.48, "grad_norm": 0.7043594717979431, "learning_rate": 4.290123394498366e-05, "loss": 1.6452, "step": 74632 }, { "epoch": 2.48, "grad_norm": 0.7185215950012207, "learning_rate": 4.2895847882406356e-05, "loss": 1.6516, "step": 74633 }, { "epoch": 2.48, "grad_norm": 0.6940340399742126, "learning_rate": 4.2890462131915707e-05, "loss": 1.6854, "step": 74634 }, { "epoch": 2.48, "grad_norm": 0.699722945690155, "learning_rate": 4.288507669351804e-05, "loss": 1.7395, "step": 74635 }, { "epoch": 2.48, "grad_norm": 0.6760064959526062, "learning_rate": 4.2879691567220124e-05, "loss": 1.6325, "step": 74636 }, { "epoch": 2.48, "grad_norm": 0.6895431280136108, "learning_rate": 4.287430675302834e-05, "loss": 1.6467, "step": 74637 }, { "epoch": 2.48, "grad_norm": 0.6700307726860046, "learning_rate": 4.2868922250949203e-05, "loss": 1.5952, "step": 74638 }, { "epoch": 2.48, "grad_norm": 0.6885691285133362, "learning_rate": 4.2863538060989385e-05, "loss": 1.6858, "step": 74639 }, { "epoch": 2.48, "grad_norm": 0.7104171514511108, "learning_rate": 4.2858154183155266e-05, "loss": 1.6815, "step": 74640 }, { "epoch": 2.48, "grad_norm": 0.6855547428131104, "learning_rate": 4.285277061745353e-05, "loss": 1.6681, "step": 74641 }, { "epoch": 2.48, "grad_norm": 0.7102768421173096, "learning_rate": 4.284738736389058e-05, "loss": 1.7168, "step": 74642 }, { "epoch": 2.48, "grad_norm": 0.6884139180183411, "learning_rate": 4.2842004422473076e-05, "loss": 1.6709, "step": 74643 }, { "epoch": 2.48, "grad_norm": 0.6915152668952942, "learning_rate": 4.283662179320751e-05, "loss": 1.7436, "step": 74644 }, { "epoch": 2.48, "grad_norm": 0.6726833581924438, "learning_rate": 4.28312394761003e-05, "loss": 1.7062, "step": 74645 }, { "epoch": 2.48, "grad_norm": 0.6746147871017456, "learning_rate": 4.2825857471158155e-05, "loss": 1.6852, "step": 74646 }, { "epoch": 2.48, "grad_norm": 0.6736924648284912, "learning_rate": 4.28204757783875e-05, "loss": 1.6853, "step": 74647 }, { "epoch": 2.48, "grad_norm": 0.7049019932746887, "learning_rate": 4.281509439779486e-05, "loss": 1.7113, "step": 74648 }, { "epoch": 2.48, "grad_norm": 0.6875937581062317, "learning_rate": 4.2809713329386766e-05, "loss": 1.6646, "step": 74649 }, { "epoch": 2.48, "grad_norm": 0.6947957873344421, "learning_rate": 4.2804332573169876e-05, "loss": 1.7529, "step": 74650 }, { "epoch": 2.48, "grad_norm": 0.69029700756073, "learning_rate": 4.279895212915062e-05, "loss": 1.7341, "step": 74651 }, { "epoch": 2.48, "grad_norm": 0.6699047684669495, "learning_rate": 4.27935719973355e-05, "loss": 1.6949, "step": 74652 }, { "epoch": 2.48, "grad_norm": 0.7030790448188782, "learning_rate": 4.27881921777311e-05, "loss": 1.7254, "step": 74653 }, { "epoch": 2.48, "grad_norm": 0.68536376953125, "learning_rate": 4.278281267034398e-05, "loss": 1.6982, "step": 74654 }, { "epoch": 2.48, "grad_norm": 0.6909101605415344, "learning_rate": 4.277743347518054e-05, "loss": 1.6214, "step": 74655 }, { "epoch": 2.48, "grad_norm": 0.6869783997535706, "learning_rate": 4.277205459224742e-05, "loss": 1.6665, "step": 74656 }, { "epoch": 2.48, "grad_norm": 0.6891441941261292, "learning_rate": 4.276667602155116e-05, "loss": 1.707, "step": 74657 }, { "epoch": 2.48, "grad_norm": 0.6910253167152405, "learning_rate": 4.276129776309827e-05, "loss": 1.7695, "step": 74658 }, { "epoch": 2.48, "grad_norm": 0.703848659992218, "learning_rate": 4.2755919816895204e-05, "loss": 1.7168, "step": 74659 }, { "epoch": 2.48, "grad_norm": 0.7207818031311035, "learning_rate": 4.275054218294859e-05, "loss": 1.6637, "step": 74660 }, { "epoch": 2.48, "grad_norm": 0.6696927547454834, "learning_rate": 4.2745164861264924e-05, "loss": 1.6526, "step": 74661 }, { "epoch": 2.48, "grad_norm": 0.688975989818573, "learning_rate": 4.273978785185067e-05, "loss": 1.6398, "step": 74662 }, { "epoch": 2.48, "grad_norm": 0.6940217018127441, "learning_rate": 4.273441115471239e-05, "loss": 1.6894, "step": 74663 }, { "epoch": 2.48, "grad_norm": 0.691436231136322, "learning_rate": 4.272903476985671e-05, "loss": 1.6593, "step": 74664 }, { "epoch": 2.48, "grad_norm": 0.694798469543457, "learning_rate": 4.2723658697290056e-05, "loss": 1.6247, "step": 74665 }, { "epoch": 2.48, "grad_norm": 0.7056841850280762, "learning_rate": 4.271828293701892e-05, "loss": 1.6671, "step": 74666 }, { "epoch": 2.48, "grad_norm": 0.7901853919029236, "learning_rate": 4.2712907489049944e-05, "loss": 1.7016, "step": 74667 }, { "epoch": 2.48, "grad_norm": 0.6792478561401367, "learning_rate": 4.2707532353389604e-05, "loss": 1.7035, "step": 74668 }, { "epoch": 2.48, "grad_norm": 0.6680031418800354, "learning_rate": 4.270215753004431e-05, "loss": 1.5793, "step": 74669 }, { "epoch": 2.48, "grad_norm": 0.7531539797782898, "learning_rate": 4.269678301902072e-05, "loss": 1.6343, "step": 74670 }, { "epoch": 2.48, "grad_norm": 0.698241651058197, "learning_rate": 4.269140882032536e-05, "loss": 1.7366, "step": 74671 }, { "epoch": 2.48, "grad_norm": 0.704534113407135, "learning_rate": 4.268603493396475e-05, "loss": 1.7664, "step": 74672 }, { "epoch": 2.48, "grad_norm": 0.6781858801841736, "learning_rate": 4.268066135994527e-05, "loss": 1.7007, "step": 74673 }, { "epoch": 2.48, "grad_norm": 0.6889708042144775, "learning_rate": 4.2675288098273665e-05, "loss": 1.7052, "step": 74674 }, { "epoch": 2.48, "grad_norm": 0.6808251738548279, "learning_rate": 4.2669915148956304e-05, "loss": 1.7132, "step": 74675 }, { "epoch": 2.48, "grad_norm": 0.7139672636985779, "learning_rate": 4.266454251199971e-05, "loss": 1.7058, "step": 74676 }, { "epoch": 2.48, "grad_norm": 0.6721048355102539, "learning_rate": 4.2659170187410516e-05, "loss": 1.6907, "step": 74677 }, { "epoch": 2.48, "grad_norm": 0.6938461661338806, "learning_rate": 4.2653798175195087e-05, "loss": 1.6966, "step": 74678 }, { "epoch": 2.48, "grad_norm": 0.7102137804031372, "learning_rate": 4.264842647536011e-05, "loss": 1.6537, "step": 74679 }, { "epoch": 2.48, "grad_norm": 0.6751235127449036, "learning_rate": 4.2643055087911946e-05, "loss": 1.6318, "step": 74680 }, { "epoch": 2.48, "grad_norm": 0.6984020471572876, "learning_rate": 4.2637684012857265e-05, "loss": 1.6543, "step": 74681 }, { "epoch": 2.48, "grad_norm": 0.6848958134651184, "learning_rate": 4.263231325020252e-05, "loss": 1.6112, "step": 74682 }, { "epoch": 2.48, "grad_norm": 0.6684855222702026, "learning_rate": 4.262694279995414e-05, "loss": 1.6015, "step": 74683 }, { "epoch": 2.48, "grad_norm": 0.6835616230964661, "learning_rate": 4.262157266211883e-05, "loss": 1.6998, "step": 74684 }, { "epoch": 2.48, "grad_norm": 0.6947877407073975, "learning_rate": 4.2616202836702905e-05, "loss": 1.7499, "step": 74685 }, { "epoch": 2.48, "grad_norm": 0.7026699781417847, "learning_rate": 4.261083332371307e-05, "loss": 1.7373, "step": 74686 }, { "epoch": 2.48, "grad_norm": 0.6757871508598328, "learning_rate": 4.260546412315578e-05, "loss": 1.6926, "step": 74687 }, { "epoch": 2.48, "grad_norm": 0.7096865773200989, "learning_rate": 4.2600095235037425e-05, "loss": 1.6837, "step": 74688 }, { "epoch": 2.48, "grad_norm": 0.6894927620887756, "learning_rate": 4.2594726659364705e-05, "loss": 1.6425, "step": 74689 }, { "epoch": 2.48, "grad_norm": 0.682115375995636, "learning_rate": 4.258935839614399e-05, "loss": 1.6591, "step": 74690 }, { "epoch": 2.48, "grad_norm": 0.6904592514038086, "learning_rate": 4.258399044538197e-05, "loss": 1.6009, "step": 74691 }, { "epoch": 2.49, "grad_norm": 0.679705798625946, "learning_rate": 4.257862280708496e-05, "loss": 1.6534, "step": 74692 }, { "epoch": 2.49, "grad_norm": 0.72785484790802, "learning_rate": 4.2573255481259636e-05, "loss": 1.7469, "step": 74693 }, { "epoch": 2.49, "grad_norm": 0.6970347166061401, "learning_rate": 4.256788846791246e-05, "loss": 1.6847, "step": 74694 }, { "epoch": 2.49, "grad_norm": 0.6885728240013123, "learning_rate": 4.256252176704985e-05, "loss": 1.7144, "step": 74695 }, { "epoch": 2.49, "grad_norm": 0.6732901334762573, "learning_rate": 4.255715537867851e-05, "loss": 1.6678, "step": 74696 }, { "epoch": 2.49, "grad_norm": 0.6921296715736389, "learning_rate": 4.255178930280483e-05, "loss": 1.637, "step": 74697 }, { "epoch": 2.49, "grad_norm": 0.6761736869812012, "learning_rate": 4.254642353943527e-05, "loss": 1.7069, "step": 74698 }, { "epoch": 2.49, "grad_norm": 0.6754629015922546, "learning_rate": 4.254105808857643e-05, "loss": 1.7097, "step": 74699 }, { "epoch": 2.49, "grad_norm": 0.6888878345489502, "learning_rate": 4.253569295023487e-05, "loss": 1.6602, "step": 74700 }, { "epoch": 2.49, "grad_norm": 0.7101443409919739, "learning_rate": 4.253032812441704e-05, "loss": 1.6998, "step": 74701 }, { "epoch": 2.49, "grad_norm": 0.6780232787132263, "learning_rate": 4.2524963611129395e-05, "loss": 1.6377, "step": 74702 }, { "epoch": 2.49, "grad_norm": 0.7154350876808167, "learning_rate": 4.251959941037857e-05, "loss": 1.6347, "step": 74703 }, { "epoch": 2.49, "grad_norm": 0.6736712455749512, "learning_rate": 4.2514235522171034e-05, "loss": 1.6882, "step": 74704 }, { "epoch": 2.49, "grad_norm": 0.6996257901191711, "learning_rate": 4.250887194651319e-05, "loss": 1.7457, "step": 74705 }, { "epoch": 2.49, "grad_norm": 0.6781949400901794, "learning_rate": 4.2503508683411626e-05, "loss": 1.6783, "step": 74706 }, { "epoch": 2.49, "grad_norm": 0.7089115381240845, "learning_rate": 4.2498145732873034e-05, "loss": 1.6849, "step": 74707 }, { "epoch": 2.49, "grad_norm": 0.6911394000053406, "learning_rate": 4.2492783094903585e-05, "loss": 1.6586, "step": 74708 }, { "epoch": 2.49, "grad_norm": 0.7273156046867371, "learning_rate": 4.248742076950996e-05, "loss": 1.7777, "step": 74709 }, { "epoch": 2.49, "grad_norm": 0.697315514087677, "learning_rate": 4.2482058756698736e-05, "loss": 1.6649, "step": 74710 }, { "epoch": 2.49, "grad_norm": 0.7076742053031921, "learning_rate": 4.247669705647636e-05, "loss": 1.7094, "step": 74711 }, { "epoch": 2.49, "grad_norm": 0.6768556237220764, "learning_rate": 4.2471335668849237e-05, "loss": 1.7222, "step": 74712 }, { "epoch": 2.49, "grad_norm": 0.6948131918907166, "learning_rate": 4.246597459382399e-05, "loss": 1.6699, "step": 74713 }, { "epoch": 2.49, "grad_norm": 0.6870689392089844, "learning_rate": 4.246061383140722e-05, "loss": 1.7048, "step": 74714 }, { "epoch": 2.49, "grad_norm": 0.72226482629776, "learning_rate": 4.2455253381605214e-05, "loss": 1.7224, "step": 74715 }, { "epoch": 2.49, "grad_norm": 0.6869898438453674, "learning_rate": 4.244989324442454e-05, "loss": 1.6932, "step": 74716 }, { "epoch": 2.49, "grad_norm": 0.6723069548606873, "learning_rate": 4.2444533419871854e-05, "loss": 1.5171, "step": 74717 }, { "epoch": 2.49, "grad_norm": 0.6877680420875549, "learning_rate": 4.243917390795351e-05, "loss": 1.6988, "step": 74718 }, { "epoch": 2.49, "grad_norm": 0.7047145962715149, "learning_rate": 4.243381470867603e-05, "loss": 1.7315, "step": 74719 }, { "epoch": 2.49, "grad_norm": 0.6966083645820618, "learning_rate": 4.242845582204595e-05, "loss": 1.7312, "step": 74720 }, { "epoch": 2.49, "grad_norm": 0.7025323510169983, "learning_rate": 4.2423097248069905e-05, "loss": 1.6461, "step": 74721 }, { "epoch": 2.49, "grad_norm": 0.6665171980857849, "learning_rate": 4.2417738986754125e-05, "loss": 1.6199, "step": 74722 }, { "epoch": 2.49, "grad_norm": 0.7261273860931396, "learning_rate": 4.2412381038105236e-05, "loss": 1.7229, "step": 74723 }, { "epoch": 2.49, "grad_norm": 0.7071693539619446, "learning_rate": 4.2407023402129866e-05, "loss": 1.6908, "step": 74724 }, { "epoch": 2.49, "grad_norm": 0.7038743495941162, "learning_rate": 4.2401666078834384e-05, "loss": 1.6935, "step": 74725 }, { "epoch": 2.49, "grad_norm": 0.6783781051635742, "learning_rate": 4.2396309068225276e-05, "loss": 1.6589, "step": 74726 }, { "epoch": 2.49, "grad_norm": 0.7000248432159424, "learning_rate": 4.2390952370309174e-05, "loss": 1.6917, "step": 74727 }, { "epoch": 2.49, "grad_norm": 0.697221577167511, "learning_rate": 4.238559598509247e-05, "loss": 1.6537, "step": 74728 }, { "epoch": 2.49, "grad_norm": 0.680077314376831, "learning_rate": 4.2380239912581627e-05, "loss": 1.7246, "step": 74729 }, { "epoch": 2.49, "grad_norm": 0.7167350649833679, "learning_rate": 4.237488415278324e-05, "loss": 1.7015, "step": 74730 }, { "epoch": 2.49, "grad_norm": 0.7095147371292114, "learning_rate": 4.236952870570384e-05, "loss": 1.6872, "step": 74731 }, { "epoch": 2.49, "grad_norm": 0.6968240737915039, "learning_rate": 4.236417357134988e-05, "loss": 1.6792, "step": 74732 }, { "epoch": 2.49, "grad_norm": 0.6976801753044128, "learning_rate": 4.235881874972776e-05, "loss": 1.6765, "step": 74733 }, { "epoch": 2.49, "grad_norm": 0.6919388175010681, "learning_rate": 4.235346424084415e-05, "loss": 1.6578, "step": 74734 }, { "epoch": 2.49, "grad_norm": 0.6931165456771851, "learning_rate": 4.2348110044705494e-05, "loss": 1.5994, "step": 74735 }, { "epoch": 2.49, "grad_norm": 0.6913716197013855, "learning_rate": 4.234275616131817e-05, "loss": 1.7068, "step": 74736 }, { "epoch": 2.49, "grad_norm": 0.6914777159690857, "learning_rate": 4.2337402590688866e-05, "loss": 1.6046, "step": 74737 }, { "epoch": 2.49, "grad_norm": 0.6873912811279297, "learning_rate": 4.2332049332823905e-05, "loss": 1.7054, "step": 74738 }, { "epoch": 2.49, "grad_norm": 0.7192933559417725, "learning_rate": 4.232669638772995e-05, "loss": 1.6557, "step": 74739 }, { "epoch": 2.49, "grad_norm": 0.7080162763595581, "learning_rate": 4.232134375541334e-05, "loss": 1.7092, "step": 74740 }, { "epoch": 2.49, "grad_norm": 0.6779107451438904, "learning_rate": 4.2315991435880723e-05, "loss": 1.6884, "step": 74741 }, { "epoch": 2.49, "grad_norm": 0.6749135851860046, "learning_rate": 4.231063942913854e-05, "loss": 1.6429, "step": 74742 }, { "epoch": 2.49, "grad_norm": 0.6958823800086975, "learning_rate": 4.2305287735193205e-05, "loss": 1.5676, "step": 74743 }, { "epoch": 2.49, "grad_norm": 0.6908131837844849, "learning_rate": 4.2299936354051325e-05, "loss": 1.6212, "step": 74744 }, { "epoch": 2.49, "grad_norm": 0.6957947015762329, "learning_rate": 4.2294585285719316e-05, "loss": 1.7193, "step": 74745 }, { "epoch": 2.49, "grad_norm": 0.6829285621643066, "learning_rate": 4.228923453020375e-05, "loss": 1.6498, "step": 74746 }, { "epoch": 2.49, "grad_norm": 0.7220696806907654, "learning_rate": 4.228388408751109e-05, "loss": 1.7708, "step": 74747 }, { "epoch": 2.49, "grad_norm": 0.6983873248100281, "learning_rate": 4.2278533957647754e-05, "loss": 1.6659, "step": 74748 }, { "epoch": 2.49, "grad_norm": 0.6962769031524658, "learning_rate": 4.2273184140620375e-05, "loss": 1.6355, "step": 74749 }, { "epoch": 2.49, "grad_norm": 0.6983770728111267, "learning_rate": 4.226783463643528e-05, "loss": 1.7445, "step": 74750 }, { "epoch": 2.49, "grad_norm": 0.6773274540901184, "learning_rate": 4.226248544509917e-05, "loss": 1.669, "step": 74751 }, { "epoch": 2.49, "grad_norm": 0.6937570571899414, "learning_rate": 4.225713656661833e-05, "loss": 1.7186, "step": 74752 }, { "epoch": 2.49, "grad_norm": 0.6870848536491394, "learning_rate": 4.225178800099943e-05, "loss": 1.6218, "step": 74753 }, { "epoch": 2.49, "grad_norm": 0.7013381123542786, "learning_rate": 4.22464397482489e-05, "loss": 1.6078, "step": 74754 }, { "epoch": 2.49, "grad_norm": 0.68361896276474, "learning_rate": 4.2241091808373117e-05, "loss": 1.6745, "step": 74755 }, { "epoch": 2.49, "grad_norm": 0.6817308664321899, "learning_rate": 4.223574418137876e-05, "loss": 1.6573, "step": 74756 }, { "epoch": 2.49, "grad_norm": 0.6961687207221985, "learning_rate": 4.223039686727219e-05, "loss": 1.68, "step": 74757 }, { "epoch": 2.49, "grad_norm": 0.6951009035110474, "learning_rate": 4.222504986605993e-05, "loss": 1.7061, "step": 74758 }, { "epoch": 2.49, "grad_norm": 0.6989654898643494, "learning_rate": 4.2219703177748434e-05, "loss": 1.6991, "step": 74759 }, { "epoch": 2.49, "grad_norm": 0.7079272866249084, "learning_rate": 4.221435680234431e-05, "loss": 1.6952, "step": 74760 }, { "epoch": 2.49, "grad_norm": 0.6944717764854431, "learning_rate": 4.2209010739853975e-05, "loss": 1.6457, "step": 74761 }, { "epoch": 2.49, "grad_norm": 0.6950914263725281, "learning_rate": 4.2203664990283836e-05, "loss": 1.7095, "step": 74762 }, { "epoch": 2.49, "grad_norm": 0.6916086673736572, "learning_rate": 4.2198319553640546e-05, "loss": 1.6595, "step": 74763 }, { "epoch": 2.49, "grad_norm": 0.7089749574661255, "learning_rate": 4.2192974429930535e-05, "loss": 1.6686, "step": 74764 }, { "epoch": 2.49, "grad_norm": 0.6884850263595581, "learning_rate": 4.218762961916017e-05, "loss": 1.669, "step": 74765 }, { "epoch": 2.49, "grad_norm": 0.7223160266876221, "learning_rate": 4.2182285121336036e-05, "loss": 1.764, "step": 74766 }, { "epoch": 2.49, "grad_norm": 0.7055812478065491, "learning_rate": 4.21769409364647e-05, "loss": 1.5984, "step": 74767 }, { "epoch": 2.49, "grad_norm": 0.673213541507721, "learning_rate": 4.217159706455256e-05, "loss": 1.6526, "step": 74768 }, { "epoch": 2.49, "grad_norm": 0.7056391835212708, "learning_rate": 4.216625350560606e-05, "loss": 1.6551, "step": 74769 }, { "epoch": 2.49, "grad_norm": 0.6685934662818909, "learning_rate": 4.2160910259631786e-05, "loss": 1.695, "step": 74770 }, { "epoch": 2.49, "grad_norm": 0.702738344669342, "learning_rate": 4.215556732663619e-05, "loss": 1.686, "step": 74771 }, { "epoch": 2.49, "grad_norm": 0.7038169503211975, "learning_rate": 4.2150224706625656e-05, "loss": 1.6728, "step": 74772 }, { "epoch": 2.49, "grad_norm": 0.6827843189239502, "learning_rate": 4.2144882399606795e-05, "loss": 1.6292, "step": 74773 }, { "epoch": 2.49, "grad_norm": 2.2784667015075684, "learning_rate": 4.213954040558609e-05, "loss": 1.6958, "step": 74774 }, { "epoch": 2.49, "grad_norm": 0.689274787902832, "learning_rate": 4.2134198724569976e-05, "loss": 1.7128, "step": 74775 }, { "epoch": 2.49, "grad_norm": 0.71736079454422, "learning_rate": 4.2128857356564914e-05, "loss": 1.7368, "step": 74776 }, { "epoch": 2.49, "grad_norm": 0.7006298899650574, "learning_rate": 4.212351630157746e-05, "loss": 1.7484, "step": 74777 }, { "epoch": 2.49, "grad_norm": 0.6992948651313782, "learning_rate": 4.2118175559614086e-05, "loss": 1.6504, "step": 74778 }, { "epoch": 2.49, "grad_norm": 0.6893061399459839, "learning_rate": 4.2112835130681175e-05, "loss": 1.7306, "step": 74779 }, { "epoch": 2.49, "grad_norm": 0.7050646543502808, "learning_rate": 4.2107495014785295e-05, "loss": 1.7485, "step": 74780 }, { "epoch": 2.49, "grad_norm": 0.6802500486373901, "learning_rate": 4.210215521193298e-05, "loss": 1.6828, "step": 74781 }, { "epoch": 2.49, "grad_norm": 0.694000244140625, "learning_rate": 4.209681572213064e-05, "loss": 1.7099, "step": 74782 }, { "epoch": 2.49, "grad_norm": 0.7046401500701904, "learning_rate": 4.209147654538472e-05, "loss": 1.8045, "step": 74783 }, { "epoch": 2.49, "grad_norm": 0.6967551112174988, "learning_rate": 4.2086137681701804e-05, "loss": 1.7756, "step": 74784 }, { "epoch": 2.49, "grad_norm": 0.6941556334495544, "learning_rate": 4.208079913108829e-05, "loss": 1.6717, "step": 74785 }, { "epoch": 2.49, "grad_norm": 0.6917631030082703, "learning_rate": 4.207546089355065e-05, "loss": 1.7284, "step": 74786 }, { "epoch": 2.49, "grad_norm": 0.6860117316246033, "learning_rate": 4.2070122969095364e-05, "loss": 1.7034, "step": 74787 }, { "epoch": 2.49, "grad_norm": 0.7077733874320984, "learning_rate": 4.2064785357729034e-05, "loss": 1.7878, "step": 74788 }, { "epoch": 2.49, "grad_norm": 0.6771179437637329, "learning_rate": 4.205944805945806e-05, "loss": 1.7126, "step": 74789 }, { "epoch": 2.49, "grad_norm": 0.6999731063842773, "learning_rate": 4.205411107428882e-05, "loss": 1.7835, "step": 74790 }, { "epoch": 2.49, "grad_norm": 0.7137601375579834, "learning_rate": 4.204877440222796e-05, "loss": 1.7133, "step": 74791 }, { "epoch": 2.49, "grad_norm": 0.6943147778511047, "learning_rate": 4.204343804328186e-05, "loss": 1.6774, "step": 74792 }, { "epoch": 2.49, "grad_norm": 0.7069740295410156, "learning_rate": 4.203810199745696e-05, "loss": 1.5915, "step": 74793 }, { "epoch": 2.49, "grad_norm": 0.6876963973045349, "learning_rate": 4.203276626475989e-05, "loss": 1.7104, "step": 74794 }, { "epoch": 2.49, "grad_norm": 0.7062616348266602, "learning_rate": 4.202743084519693e-05, "loss": 1.8003, "step": 74795 }, { "epoch": 2.49, "grad_norm": 0.6992464661598206, "learning_rate": 4.2022095738774756e-05, "loss": 1.7121, "step": 74796 }, { "epoch": 2.49, "grad_norm": 0.7003868818283081, "learning_rate": 4.201676094549965e-05, "loss": 1.7416, "step": 74797 }, { "epoch": 2.49, "grad_norm": 0.711098313331604, "learning_rate": 4.2011426465378285e-05, "loss": 1.7286, "step": 74798 }, { "epoch": 2.49, "grad_norm": 0.6995591521263123, "learning_rate": 4.2006092298417014e-05, "loss": 1.7369, "step": 74799 }, { "epoch": 2.49, "grad_norm": 0.6829240918159485, "learning_rate": 4.2000758444622276e-05, "loss": 1.6123, "step": 74800 }, { "epoch": 2.49, "grad_norm": 0.677581787109375, "learning_rate": 4.199542490400065e-05, "loss": 1.6378, "step": 74801 }, { "epoch": 2.49, "grad_norm": 0.7084109783172607, "learning_rate": 4.199009167655851e-05, "loss": 1.6199, "step": 74802 }, { "epoch": 2.49, "grad_norm": 0.6991279721260071, "learning_rate": 4.198475876230245e-05, "loss": 1.6096, "step": 74803 }, { "epoch": 2.49, "grad_norm": 0.699417769908905, "learning_rate": 4.197942616123889e-05, "loss": 1.6813, "step": 74804 }, { "epoch": 2.49, "grad_norm": 0.6803030967712402, "learning_rate": 4.1974093873374204e-05, "loss": 1.6711, "step": 74805 }, { "epoch": 2.49, "grad_norm": 0.7032561898231506, "learning_rate": 4.196876189871501e-05, "loss": 1.698, "step": 74806 }, { "epoch": 2.49, "grad_norm": 0.7033502459526062, "learning_rate": 4.196343023726767e-05, "loss": 1.6977, "step": 74807 }, { "epoch": 2.49, "grad_norm": 0.7071851491928101, "learning_rate": 4.195809888903875e-05, "loss": 1.6443, "step": 74808 }, { "epoch": 2.49, "grad_norm": 0.6692230105400085, "learning_rate": 4.195276785403464e-05, "loss": 1.6861, "step": 74809 }, { "epoch": 2.49, "grad_norm": 0.7179075479507446, "learning_rate": 4.194743713226191e-05, "loss": 1.7171, "step": 74810 }, { "epoch": 2.49, "grad_norm": 0.704919695854187, "learning_rate": 4.194210672372694e-05, "loss": 1.7145, "step": 74811 }, { "epoch": 2.49, "grad_norm": 0.6943321824073792, "learning_rate": 4.1936776628436175e-05, "loss": 1.6724, "step": 74812 }, { "epoch": 2.49, "grad_norm": 0.708757221698761, "learning_rate": 4.193144684639623e-05, "loss": 1.6726, "step": 74813 }, { "epoch": 2.49, "grad_norm": 0.7153657674789429, "learning_rate": 4.192611737761348e-05, "loss": 1.7487, "step": 74814 }, { "epoch": 2.49, "grad_norm": 0.701398491859436, "learning_rate": 4.19207882220943e-05, "loss": 1.625, "step": 74815 }, { "epoch": 2.49, "grad_norm": 0.6915767192840576, "learning_rate": 4.1915459379845274e-05, "loss": 1.6653, "step": 74816 }, { "epoch": 2.49, "grad_norm": 0.6953192949295044, "learning_rate": 4.191013085087291e-05, "loss": 1.7204, "step": 74817 }, { "epoch": 2.49, "grad_norm": 0.7019590735435486, "learning_rate": 4.1904802635183656e-05, "loss": 1.6708, "step": 74818 }, { "epoch": 2.49, "grad_norm": 0.6864073872566223, "learning_rate": 4.189947473278385e-05, "loss": 1.6222, "step": 74819 }, { "epoch": 2.49, "grad_norm": 0.692225456237793, "learning_rate": 4.189414714368011e-05, "loss": 1.6694, "step": 74820 }, { "epoch": 2.49, "grad_norm": 0.7121869921684265, "learning_rate": 4.188881986787885e-05, "loss": 1.6581, "step": 74821 }, { "epoch": 2.49, "grad_norm": 0.7221317887306213, "learning_rate": 4.1883492905386475e-05, "loss": 1.6728, "step": 74822 }, { "epoch": 2.49, "grad_norm": 0.7038707137107849, "learning_rate": 4.18781662562095e-05, "loss": 1.6121, "step": 74823 }, { "epoch": 2.49, "grad_norm": 0.6897622346878052, "learning_rate": 4.187283992035456e-05, "loss": 1.6761, "step": 74824 }, { "epoch": 2.49, "grad_norm": 0.7103566527366638, "learning_rate": 4.1867513897827786e-05, "loss": 1.711, "step": 74825 }, { "epoch": 2.49, "grad_norm": 0.7021484971046448, "learning_rate": 4.186218818863584e-05, "loss": 1.6541, "step": 74826 }, { "epoch": 2.49, "grad_norm": 0.7119796276092529, "learning_rate": 4.185686279278521e-05, "loss": 1.6901, "step": 74827 }, { "epoch": 2.49, "grad_norm": 0.6807399392127991, "learning_rate": 4.185153771028232e-05, "loss": 1.6284, "step": 74828 }, { "epoch": 2.49, "grad_norm": 0.7202203273773193, "learning_rate": 4.184621294113358e-05, "loss": 1.6773, "step": 74829 }, { "epoch": 2.49, "grad_norm": 0.7220132350921631, "learning_rate": 4.184088848534548e-05, "loss": 1.7127, "step": 74830 }, { "epoch": 2.49, "grad_norm": 0.7084851861000061, "learning_rate": 4.183556434292464e-05, "loss": 1.7115, "step": 74831 }, { "epoch": 2.49, "grad_norm": 0.7078285813331604, "learning_rate": 4.1830240513877265e-05, "loss": 1.6395, "step": 74832 }, { "epoch": 2.49, "grad_norm": 0.6960986256599426, "learning_rate": 4.1824916998209914e-05, "loss": 1.7213, "step": 74833 }, { "epoch": 2.49, "grad_norm": 0.7219701409339905, "learning_rate": 4.181959379592914e-05, "loss": 1.6286, "step": 74834 }, { "epoch": 2.49, "grad_norm": 0.6999400854110718, "learning_rate": 4.181427090704138e-05, "loss": 1.6735, "step": 74835 }, { "epoch": 2.49, "grad_norm": 0.6817804574966431, "learning_rate": 4.180894833155296e-05, "loss": 1.6705, "step": 74836 }, { "epoch": 2.49, "grad_norm": 0.7249133586883545, "learning_rate": 4.180362606947041e-05, "loss": 1.7528, "step": 74837 }, { "epoch": 2.49, "grad_norm": 0.706134557723999, "learning_rate": 4.179830412080038e-05, "loss": 1.7144, "step": 74838 }, { "epoch": 2.49, "grad_norm": 0.682753324508667, "learning_rate": 4.179298248554902e-05, "loss": 1.7008, "step": 74839 }, { "epoch": 2.49, "grad_norm": 0.7207621335983276, "learning_rate": 4.1787661163722944e-05, "loss": 1.6674, "step": 74840 }, { "epoch": 2.49, "grad_norm": 0.687333881855011, "learning_rate": 4.178234015532864e-05, "loss": 1.6806, "step": 74841 }, { "epoch": 2.49, "grad_norm": 0.7089336514472961, "learning_rate": 4.177701946037255e-05, "loss": 1.6969, "step": 74842 }, { "epoch": 2.49, "grad_norm": 0.7017059326171875, "learning_rate": 4.177169907886107e-05, "loss": 1.7686, "step": 74843 }, { "epoch": 2.49, "grad_norm": 0.6972223520278931, "learning_rate": 4.176637901080071e-05, "loss": 1.7066, "step": 74844 }, { "epoch": 2.49, "grad_norm": 0.7084961533546448, "learning_rate": 4.1761059256197946e-05, "loss": 1.6886, "step": 74845 }, { "epoch": 2.49, "grad_norm": 0.7065067291259766, "learning_rate": 4.1755739815059145e-05, "loss": 1.675, "step": 74846 }, { "epoch": 2.49, "grad_norm": 0.6964063048362732, "learning_rate": 4.175042068739082e-05, "loss": 1.7074, "step": 74847 }, { "epoch": 2.49, "grad_norm": 0.7014832496643066, "learning_rate": 4.1745101873199507e-05, "loss": 1.8292, "step": 74848 }, { "epoch": 2.49, "grad_norm": 0.6998072266578674, "learning_rate": 4.1739783372491565e-05, "loss": 1.7167, "step": 74849 }, { "epoch": 2.49, "grad_norm": 0.6808759570121765, "learning_rate": 4.173446518527342e-05, "loss": 1.6636, "step": 74850 }, { "epoch": 2.49, "grad_norm": 0.6798173785209656, "learning_rate": 4.172914731155168e-05, "loss": 1.6761, "step": 74851 }, { "epoch": 2.49, "grad_norm": 0.6832378506660461, "learning_rate": 4.172382975133266e-05, "loss": 1.6079, "step": 74852 }, { "epoch": 2.49, "grad_norm": 0.6946068406105042, "learning_rate": 4.171851250462279e-05, "loss": 1.6948, "step": 74853 }, { "epoch": 2.49, "grad_norm": 0.7204763293266296, "learning_rate": 4.171319557142867e-05, "loss": 1.6658, "step": 74854 }, { "epoch": 2.49, "grad_norm": 0.6902455687522888, "learning_rate": 4.170787895175659e-05, "loss": 1.6646, "step": 74855 }, { "epoch": 2.49, "grad_norm": 0.6938824653625488, "learning_rate": 4.170256264561318e-05, "loss": 1.6618, "step": 74856 }, { "epoch": 2.49, "grad_norm": 0.692536473274231, "learning_rate": 4.169724665300473e-05, "loss": 1.6217, "step": 74857 }, { "epoch": 2.49, "grad_norm": 0.7103161811828613, "learning_rate": 4.169193097393782e-05, "loss": 1.6544, "step": 74858 }, { "epoch": 2.49, "grad_norm": 0.728721559047699, "learning_rate": 4.168661560841885e-05, "loss": 1.7496, "step": 74859 }, { "epoch": 2.49, "grad_norm": 0.6897019147872925, "learning_rate": 4.16813005564542e-05, "loss": 1.6153, "step": 74860 }, { "epoch": 2.49, "grad_norm": 0.7152749300003052, "learning_rate": 4.1675985818050425e-05, "loss": 1.6532, "step": 74861 }, { "epoch": 2.49, "grad_norm": 0.6947925090789795, "learning_rate": 4.1670671393213926e-05, "loss": 1.6588, "step": 74862 }, { "epoch": 2.49, "grad_norm": 0.7033136487007141, "learning_rate": 4.16653572819512e-05, "loss": 1.7108, "step": 74863 }, { "epoch": 2.49, "grad_norm": 0.7105541229248047, "learning_rate": 4.166004348426869e-05, "loss": 1.6874, "step": 74864 }, { "epoch": 2.49, "grad_norm": 0.6876394152641296, "learning_rate": 4.165473000017274e-05, "loss": 1.741, "step": 74865 }, { "epoch": 2.49, "grad_norm": 0.7480178475379944, "learning_rate": 4.164941682966994e-05, "loss": 1.7294, "step": 74866 }, { "epoch": 2.49, "grad_norm": 0.7338472604751587, "learning_rate": 4.164410397276663e-05, "loss": 1.7893, "step": 74867 }, { "epoch": 2.49, "grad_norm": 0.6587204933166504, "learning_rate": 4.163879142946939e-05, "loss": 1.6214, "step": 74868 }, { "epoch": 2.49, "grad_norm": 0.6667792797088623, "learning_rate": 4.1633479199784514e-05, "loss": 1.5795, "step": 74869 }, { "epoch": 2.49, "grad_norm": 0.7170142531394958, "learning_rate": 4.162816728371857e-05, "loss": 1.6932, "step": 74870 }, { "epoch": 2.49, "grad_norm": 0.6777941584587097, "learning_rate": 4.1622855681277986e-05, "loss": 1.7458, "step": 74871 }, { "epoch": 2.49, "grad_norm": 0.7090051174163818, "learning_rate": 4.1617544392469124e-05, "loss": 1.6602, "step": 74872 }, { "epoch": 2.49, "grad_norm": 0.6771125793457031, "learning_rate": 4.1612233417298545e-05, "loss": 1.6978, "step": 74873 }, { "epoch": 2.49, "grad_norm": 0.6829938292503357, "learning_rate": 4.1606922755772645e-05, "loss": 1.6762, "step": 74874 }, { "epoch": 2.49, "grad_norm": 0.6930480599403381, "learning_rate": 4.1601612407897814e-05, "loss": 1.6728, "step": 74875 }, { "epoch": 2.49, "grad_norm": 0.6703370213508606, "learning_rate": 4.159630237368051e-05, "loss": 1.6367, "step": 74876 }, { "epoch": 2.49, "grad_norm": 0.7331887483596802, "learning_rate": 4.159099265312734e-05, "loss": 1.7597, "step": 74877 }, { "epoch": 2.49, "grad_norm": 0.7140327095985413, "learning_rate": 4.158568324624459e-05, "loss": 1.6592, "step": 74878 }, { "epoch": 2.49, "grad_norm": 0.684118390083313, "learning_rate": 4.1580374153038696e-05, "loss": 1.641, "step": 74879 }, { "epoch": 2.49, "grad_norm": 0.7102622985839844, "learning_rate": 4.1575065373516246e-05, "loss": 1.68, "step": 74880 }, { "epoch": 2.49, "grad_norm": 0.6972125768661499, "learning_rate": 4.156975690768354e-05, "loss": 1.7967, "step": 74881 }, { "epoch": 2.49, "grad_norm": 0.6831597685813904, "learning_rate": 4.156444875554703e-05, "loss": 1.6192, "step": 74882 }, { "epoch": 2.49, "grad_norm": 0.696022629737854, "learning_rate": 4.155914091711319e-05, "loss": 1.7282, "step": 74883 }, { "epoch": 2.49, "grad_norm": 0.6865578889846802, "learning_rate": 4.155383339238857e-05, "loss": 1.719, "step": 74884 }, { "epoch": 2.49, "grad_norm": 0.6792191863059998, "learning_rate": 4.154852618137947e-05, "loss": 1.7053, "step": 74885 }, { "epoch": 2.49, "grad_norm": 0.7156768441200256, "learning_rate": 4.1543219284092354e-05, "loss": 1.6709, "step": 74886 }, { "epoch": 2.49, "grad_norm": 0.7105035185813904, "learning_rate": 4.1537912700533714e-05, "loss": 1.7218, "step": 74887 }, { "epoch": 2.49, "grad_norm": 0.7052870988845825, "learning_rate": 4.153260643071001e-05, "loss": 1.6538, "step": 74888 }, { "epoch": 2.49, "grad_norm": 0.6916818022727966, "learning_rate": 4.1527300474627544e-05, "loss": 1.696, "step": 74889 }, { "epoch": 2.49, "grad_norm": 0.6772481203079224, "learning_rate": 4.1521994832292836e-05, "loss": 1.655, "step": 74890 }, { "epoch": 2.49, "grad_norm": 0.6729982495307922, "learning_rate": 4.1516689503712454e-05, "loss": 1.6642, "step": 74891 }, { "epoch": 2.49, "grad_norm": 0.7156454920768738, "learning_rate": 4.1511384488892685e-05, "loss": 1.6981, "step": 74892 }, { "epoch": 2.49, "grad_norm": 0.7103911638259888, "learning_rate": 4.150607978783996e-05, "loss": 1.7028, "step": 74893 }, { "epoch": 2.49, "grad_norm": 0.6921811699867249, "learning_rate": 4.1500775400560816e-05, "loss": 1.6301, "step": 74894 }, { "epoch": 2.49, "grad_norm": 0.6801824569702148, "learning_rate": 4.149547132706167e-05, "loss": 1.5768, "step": 74895 }, { "epoch": 2.49, "grad_norm": 0.7104743123054504, "learning_rate": 4.1490167567348856e-05, "loss": 1.6914, "step": 74896 }, { "epoch": 2.49, "grad_norm": 0.6738523244857788, "learning_rate": 4.148486412142886e-05, "loss": 1.6697, "step": 74897 }, { "epoch": 2.49, "grad_norm": 0.69338059425354, "learning_rate": 4.147956098930826e-05, "loss": 1.7467, "step": 74898 }, { "epoch": 2.49, "grad_norm": 0.6852423548698425, "learning_rate": 4.147425817099337e-05, "loss": 1.7238, "step": 74899 }, { "epoch": 2.49, "grad_norm": 0.6893600225448608, "learning_rate": 4.1468955666490554e-05, "loss": 1.7019, "step": 74900 }, { "epoch": 2.49, "grad_norm": 0.7073881030082703, "learning_rate": 4.146365347580641e-05, "loss": 1.7235, "step": 74901 }, { "epoch": 2.49, "grad_norm": 0.7005786299705505, "learning_rate": 4.1458351598947306e-05, "loss": 1.6775, "step": 74902 }, { "epoch": 2.49, "grad_norm": 0.6907539367675781, "learning_rate": 4.1453050035919585e-05, "loss": 1.7002, "step": 74903 }, { "epoch": 2.49, "grad_norm": 0.7057514786720276, "learning_rate": 4.144774878672976e-05, "loss": 1.7218, "step": 74904 }, { "epoch": 2.49, "grad_norm": 0.702254056930542, "learning_rate": 4.1442447851384355e-05, "loss": 1.6716, "step": 74905 }, { "epoch": 2.49, "grad_norm": 0.7153475284576416, "learning_rate": 4.143714722988972e-05, "loss": 1.6751, "step": 74906 }, { "epoch": 2.49, "grad_norm": 0.6870980858802795, "learning_rate": 4.143184692225223e-05, "loss": 1.5903, "step": 74907 }, { "epoch": 2.49, "grad_norm": 0.6797829270362854, "learning_rate": 4.1426546928478445e-05, "loss": 1.6161, "step": 74908 }, { "epoch": 2.49, "grad_norm": 0.675658643245697, "learning_rate": 4.142124724857472e-05, "loss": 1.6686, "step": 74909 }, { "epoch": 2.49, "grad_norm": 0.6860328316688538, "learning_rate": 4.141594788254745e-05, "loss": 1.691, "step": 74910 }, { "epoch": 2.49, "grad_norm": 0.7341374158859253, "learning_rate": 4.141064883040317e-05, "loss": 1.708, "step": 74911 }, { "epoch": 2.49, "grad_norm": 0.6973119974136353, "learning_rate": 4.1405350092148204e-05, "loss": 1.6779, "step": 74912 }, { "epoch": 2.49, "grad_norm": 0.734488308429718, "learning_rate": 4.1400051667789117e-05, "loss": 1.6981, "step": 74913 }, { "epoch": 2.49, "grad_norm": 0.6687242388725281, "learning_rate": 4.139475355733216e-05, "loss": 1.6045, "step": 74914 }, { "epoch": 2.49, "grad_norm": 0.689193844795227, "learning_rate": 4.138945576078397e-05, "loss": 1.6434, "step": 74915 }, { "epoch": 2.49, "grad_norm": 0.6961231231689453, "learning_rate": 4.138415827815088e-05, "loss": 1.6891, "step": 74916 }, { "epoch": 2.49, "grad_norm": 0.7029266953468323, "learning_rate": 4.137886110943924e-05, "loss": 1.7583, "step": 74917 }, { "epoch": 2.49, "grad_norm": 0.6939114928245544, "learning_rate": 4.1373564254655625e-05, "loss": 1.6691, "step": 74918 }, { "epoch": 2.49, "grad_norm": 0.7129420638084412, "learning_rate": 4.136826771380631e-05, "loss": 1.6672, "step": 74919 }, { "epoch": 2.49, "grad_norm": 0.7072369456291199, "learning_rate": 4.1362971486897914e-05, "loss": 1.6941, "step": 74920 }, { "epoch": 2.49, "grad_norm": 0.6735315322875977, "learning_rate": 4.135767557393671e-05, "loss": 1.6445, "step": 74921 }, { "epoch": 2.49, "grad_norm": 0.7037112712860107, "learning_rate": 4.135237997492914e-05, "loss": 1.639, "step": 74922 }, { "epoch": 2.49, "grad_norm": 0.6918314695358276, "learning_rate": 4.134708468988176e-05, "loss": 1.6206, "step": 74923 }, { "epoch": 2.49, "grad_norm": 0.6874752044677734, "learning_rate": 4.1341789718800834e-05, "loss": 1.7179, "step": 74924 }, { "epoch": 2.49, "grad_norm": 0.705615758895874, "learning_rate": 4.1336495061692885e-05, "loss": 1.7475, "step": 74925 }, { "epoch": 2.49, "grad_norm": 0.7101392149925232, "learning_rate": 4.1331200718564316e-05, "loss": 1.7536, "step": 74926 }, { "epoch": 2.49, "grad_norm": 0.7351306676864624, "learning_rate": 4.132590668942158e-05, "loss": 1.7173, "step": 74927 }, { "epoch": 2.49, "grad_norm": 0.6812446117401123, "learning_rate": 4.1320612974271084e-05, "loss": 1.7248, "step": 74928 }, { "epoch": 2.49, "grad_norm": 0.7073435187339783, "learning_rate": 4.131531957311921e-05, "loss": 1.6516, "step": 74929 }, { "epoch": 2.49, "grad_norm": 0.6977878212928772, "learning_rate": 4.1310026485972455e-05, "loss": 1.6798, "step": 74930 }, { "epoch": 2.49, "grad_norm": 0.6818252801895142, "learning_rate": 4.130473371283722e-05, "loss": 1.6818, "step": 74931 }, { "epoch": 2.49, "grad_norm": 0.6739617586135864, "learning_rate": 4.129944125371987e-05, "loss": 1.7315, "step": 74932 }, { "epoch": 2.49, "grad_norm": 0.6841248273849487, "learning_rate": 4.129414910862688e-05, "loss": 1.7387, "step": 74933 }, { "epoch": 2.49, "grad_norm": 0.6722596883773804, "learning_rate": 4.1288857277564734e-05, "loss": 1.7305, "step": 74934 }, { "epoch": 2.49, "grad_norm": 0.720698893070221, "learning_rate": 4.128356576053978e-05, "loss": 1.6898, "step": 74935 }, { "epoch": 2.49, "grad_norm": 0.7014132738113403, "learning_rate": 4.1278274557558424e-05, "loss": 1.6785, "step": 74936 }, { "epoch": 2.49, "grad_norm": 0.689818263053894, "learning_rate": 4.1272983668627144e-05, "loss": 1.6779, "step": 74937 }, { "epoch": 2.49, "grad_norm": 0.7234659790992737, "learning_rate": 4.1267693093752384e-05, "loss": 1.691, "step": 74938 }, { "epoch": 2.49, "grad_norm": 0.7071571350097656, "learning_rate": 4.126240283294043e-05, "loss": 1.722, "step": 74939 }, { "epoch": 2.49, "grad_norm": 0.6741617918014526, "learning_rate": 4.125711288619779e-05, "loss": 1.683, "step": 74940 }, { "epoch": 2.49, "grad_norm": 0.6696277856826782, "learning_rate": 4.1251823253531035e-05, "loss": 1.6344, "step": 74941 }, { "epoch": 2.49, "grad_norm": 0.689629316329956, "learning_rate": 4.1246533934946316e-05, "loss": 1.7239, "step": 74942 }, { "epoch": 2.49, "grad_norm": 0.763899028301239, "learning_rate": 4.1241244930450176e-05, "loss": 1.7743, "step": 74943 }, { "epoch": 2.49, "grad_norm": 0.7078737616539001, "learning_rate": 4.123595624004909e-05, "loss": 1.6688, "step": 74944 }, { "epoch": 2.49, "grad_norm": 0.6819618344306946, "learning_rate": 4.1230667863749455e-05, "loss": 1.6023, "step": 74945 }, { "epoch": 2.49, "grad_norm": 0.7049103379249573, "learning_rate": 4.122537980155759e-05, "loss": 1.7483, "step": 74946 }, { "epoch": 2.49, "grad_norm": 0.6996415853500366, "learning_rate": 4.122009205347996e-05, "loss": 1.6601, "step": 74947 }, { "epoch": 2.49, "grad_norm": 0.7075293660163879, "learning_rate": 4.121480461952316e-05, "loss": 1.7055, "step": 74948 }, { "epoch": 2.49, "grad_norm": 0.697051465511322, "learning_rate": 4.120951749969331e-05, "loss": 1.7409, "step": 74949 }, { "epoch": 2.49, "grad_norm": 0.6960010528564453, "learning_rate": 4.120423069399699e-05, "loss": 1.7448, "step": 74950 }, { "epoch": 2.49, "grad_norm": 0.6958342790603638, "learning_rate": 4.1198944202440674e-05, "loss": 1.7005, "step": 74951 }, { "epoch": 2.49, "grad_norm": 0.6750219464302063, "learning_rate": 4.119365802503071e-05, "loss": 1.6657, "step": 74952 }, { "epoch": 2.49, "grad_norm": 0.6891248822212219, "learning_rate": 4.118837216177341e-05, "loss": 1.7142, "step": 74953 }, { "epoch": 2.49, "grad_norm": 0.7028312683105469, "learning_rate": 4.1183086612675306e-05, "loss": 1.7507, "step": 74954 }, { "epoch": 2.49, "grad_norm": 0.68228679895401, "learning_rate": 4.117780137774297e-05, "loss": 1.7031, "step": 74955 }, { "epoch": 2.49, "grad_norm": 0.7143245935440063, "learning_rate": 4.1172516456982484e-05, "loss": 1.8129, "step": 74956 }, { "epoch": 2.49, "grad_norm": 0.7086380124092102, "learning_rate": 4.116723185040042e-05, "loss": 1.6472, "step": 74957 }, { "epoch": 2.49, "grad_norm": 0.6930265426635742, "learning_rate": 4.116194755800326e-05, "loss": 1.6545, "step": 74958 }, { "epoch": 2.49, "grad_norm": 0.6974963545799255, "learning_rate": 4.115666357979738e-05, "loss": 1.7089, "step": 74959 }, { "epoch": 2.49, "grad_norm": 0.7210948467254639, "learning_rate": 4.1151379915789095e-05, "loss": 1.6917, "step": 74960 }, { "epoch": 2.49, "grad_norm": 0.7113292813301086, "learning_rate": 4.1146096565984974e-05, "loss": 1.6233, "step": 74961 }, { "epoch": 2.49, "grad_norm": 0.7092453837394714, "learning_rate": 4.114081353039135e-05, "loss": 1.715, "step": 74962 }, { "epoch": 2.49, "grad_norm": 0.7020901441574097, "learning_rate": 4.113553080901454e-05, "loss": 1.7078, "step": 74963 }, { "epoch": 2.49, "grad_norm": 0.7107430696487427, "learning_rate": 4.113024840186108e-05, "loss": 1.6668, "step": 74964 }, { "epoch": 2.49, "grad_norm": 0.6765987873077393, "learning_rate": 4.112496630893742e-05, "loss": 1.6181, "step": 74965 }, { "epoch": 2.49, "grad_norm": 0.6832427978515625, "learning_rate": 4.111968453024991e-05, "loss": 1.6608, "step": 74966 }, { "epoch": 2.49, "grad_norm": 0.6923431158065796, "learning_rate": 4.111440306580489e-05, "loss": 1.69, "step": 74967 }, { "epoch": 2.49, "grad_norm": 0.6908544301986694, "learning_rate": 4.11091219156089e-05, "loss": 1.6634, "step": 74968 }, { "epoch": 2.49, "grad_norm": 0.6810410618782043, "learning_rate": 4.11038410796683e-05, "loss": 1.6678, "step": 74969 }, { "epoch": 2.49, "grad_norm": 0.714103639125824, "learning_rate": 4.1098560557989416e-05, "loss": 1.6904, "step": 74970 }, { "epoch": 2.49, "grad_norm": 0.6944313049316406, "learning_rate": 4.109328035057877e-05, "loss": 1.6758, "step": 74971 }, { "epoch": 2.49, "grad_norm": 0.689793586730957, "learning_rate": 4.10880004574427e-05, "loss": 1.7045, "step": 74972 }, { "epoch": 2.49, "grad_norm": 0.7234134674072266, "learning_rate": 4.108272087858774e-05, "loss": 1.7252, "step": 74973 }, { "epoch": 2.49, "grad_norm": 0.7405920624732971, "learning_rate": 4.10774416140201e-05, "loss": 1.6642, "step": 74974 }, { "epoch": 2.49, "grad_norm": 0.7109542489051819, "learning_rate": 4.107216266374639e-05, "loss": 1.636, "step": 74975 }, { "epoch": 2.49, "grad_norm": 0.7196478843688965, "learning_rate": 4.10668840277729e-05, "loss": 1.7299, "step": 74976 }, { "epoch": 2.49, "grad_norm": 0.690109372138977, "learning_rate": 4.1061605706105984e-05, "loss": 1.6332, "step": 74977 }, { "epoch": 2.49, "grad_norm": 0.6898592710494995, "learning_rate": 4.105632769875221e-05, "loss": 1.6777, "step": 74978 }, { "epoch": 2.49, "grad_norm": 0.703009843826294, "learning_rate": 4.1051050005717846e-05, "loss": 1.6761, "step": 74979 }, { "epoch": 2.49, "grad_norm": 0.6988064646720886, "learning_rate": 4.104577262700941e-05, "loss": 1.7006, "step": 74980 }, { "epoch": 2.49, "grad_norm": 0.6835189461708069, "learning_rate": 4.1040495562633233e-05, "loss": 1.752, "step": 74981 }, { "epoch": 2.49, "grad_norm": 0.6919409036636353, "learning_rate": 4.103521881259572e-05, "loss": 1.6702, "step": 74982 }, { "epoch": 2.49, "grad_norm": 0.6898928284645081, "learning_rate": 4.1029942376903315e-05, "loss": 1.6862, "step": 74983 }, { "epoch": 2.49, "grad_norm": 0.7223281860351562, "learning_rate": 4.102466625556236e-05, "loss": 1.723, "step": 74984 }, { "epoch": 2.49, "grad_norm": 0.6889282464981079, "learning_rate": 4.101939044857934e-05, "loss": 1.6888, "step": 74985 }, { "epoch": 2.49, "grad_norm": 0.6958891153335571, "learning_rate": 4.101411495596059e-05, "loss": 1.7484, "step": 74986 }, { "epoch": 2.49, "grad_norm": 0.691810131072998, "learning_rate": 4.10088397777126e-05, "loss": 1.6672, "step": 74987 }, { "epoch": 2.49, "grad_norm": 0.7035390138626099, "learning_rate": 4.1003564913841705e-05, "loss": 1.6384, "step": 74988 }, { "epoch": 2.49, "grad_norm": 0.6725901961326599, "learning_rate": 4.099829036435426e-05, "loss": 1.6302, "step": 74989 }, { "epoch": 2.49, "grad_norm": 0.684726893901825, "learning_rate": 4.0993016129256797e-05, "loss": 1.7304, "step": 74990 }, { "epoch": 2.49, "grad_norm": 0.6912038326263428, "learning_rate": 4.098774220855564e-05, "loss": 1.7717, "step": 74991 }, { "epoch": 2.49, "grad_norm": 0.7004982233047485, "learning_rate": 4.098246860225716e-05, "loss": 1.6643, "step": 74992 }, { "epoch": 2.5, "grad_norm": 0.7080150246620178, "learning_rate": 4.097719531036777e-05, "loss": 1.6684, "step": 74993 }, { "epoch": 2.5, "grad_norm": 0.7009826302528381, "learning_rate": 4.097192233289398e-05, "loss": 1.6446, "step": 74994 }, { "epoch": 2.5, "grad_norm": 0.7141406536102295, "learning_rate": 4.0966649669842114e-05, "loss": 1.7034, "step": 74995 }, { "epoch": 2.5, "grad_norm": 0.7038847208023071, "learning_rate": 4.096137732121849e-05, "loss": 1.648, "step": 74996 }, { "epoch": 2.5, "grad_norm": 0.7114301919937134, "learning_rate": 4.095610528702965e-05, "loss": 1.698, "step": 74997 }, { "epoch": 2.5, "grad_norm": 0.6901495456695557, "learning_rate": 4.095083356728195e-05, "loss": 1.7077, "step": 74998 }, { "epoch": 2.5, "grad_norm": 0.6682083010673523, "learning_rate": 4.094556216198168e-05, "loss": 1.7152, "step": 74999 }, { "epoch": 2.5, "grad_norm": 0.690625011920929, "learning_rate": 4.0940291071135345e-05, "loss": 1.7228, "step": 75000 }, { "epoch": 2.5, "grad_norm": 0.712177038192749, "learning_rate": 4.09350202947494e-05, "loss": 1.7211, "step": 75001 }, { "epoch": 2.5, "grad_norm": 0.6946200132369995, "learning_rate": 4.092974983283014e-05, "loss": 1.6771, "step": 75002 }, { "epoch": 2.5, "grad_norm": 0.6774932146072388, "learning_rate": 4.092447968538396e-05, "loss": 1.6832, "step": 75003 }, { "epoch": 2.5, "grad_norm": 0.6830730438232422, "learning_rate": 4.091920985241732e-05, "loss": 1.7041, "step": 75004 }, { "epoch": 2.5, "grad_norm": 0.6816851496696472, "learning_rate": 4.091394033393659e-05, "loss": 1.582, "step": 75005 }, { "epoch": 2.5, "grad_norm": 0.6972060203552246, "learning_rate": 4.0908671129948126e-05, "loss": 1.7135, "step": 75006 }, { "epoch": 2.5, "grad_norm": 0.7040035128593445, "learning_rate": 4.090340224045832e-05, "loss": 1.7066, "step": 75007 }, { "epoch": 2.5, "grad_norm": 0.6924905180931091, "learning_rate": 4.0898133665473696e-05, "loss": 1.63, "step": 75008 }, { "epoch": 2.5, "grad_norm": 0.7026885747909546, "learning_rate": 4.0892865405000564e-05, "loss": 1.7563, "step": 75009 }, { "epoch": 2.5, "grad_norm": 0.6920099258422852, "learning_rate": 4.088759745904524e-05, "loss": 1.6618, "step": 75010 }, { "epoch": 2.5, "grad_norm": 0.710519015789032, "learning_rate": 4.088232982761429e-05, "loss": 1.7402, "step": 75011 }, { "epoch": 2.5, "grad_norm": 0.6841840744018555, "learning_rate": 4.0877062510713973e-05, "loss": 1.5837, "step": 75012 }, { "epoch": 2.5, "grad_norm": 0.7046669125556946, "learning_rate": 4.0871795508350656e-05, "loss": 1.6909, "step": 75013 }, { "epoch": 2.5, "grad_norm": 0.6969108581542969, "learning_rate": 4.0866528820530834e-05, "loss": 1.7606, "step": 75014 }, { "epoch": 2.5, "grad_norm": 0.6842480301856995, "learning_rate": 4.086126244726089e-05, "loss": 1.7116, "step": 75015 }, { "epoch": 2.5, "grad_norm": 0.6889880895614624, "learning_rate": 4.085599638854723e-05, "loss": 1.7054, "step": 75016 }, { "epoch": 2.5, "grad_norm": 0.6979576349258423, "learning_rate": 4.085073064439611e-05, "loss": 1.6498, "step": 75017 }, { "epoch": 2.5, "grad_norm": 0.6825990080833435, "learning_rate": 4.08454652148141e-05, "loss": 1.6933, "step": 75018 }, { "epoch": 2.5, "grad_norm": 0.6915536522865295, "learning_rate": 4.084020009980752e-05, "loss": 1.6391, "step": 75019 }, { "epoch": 2.5, "grad_norm": 0.7031251788139343, "learning_rate": 4.0834935299382664e-05, "loss": 1.7112, "step": 75020 }, { "epoch": 2.5, "grad_norm": 0.6873180866241455, "learning_rate": 4.082967081354603e-05, "loss": 1.6973, "step": 75021 }, { "epoch": 2.5, "grad_norm": 0.7122735977172852, "learning_rate": 4.082440664230405e-05, "loss": 1.6594, "step": 75022 }, { "epoch": 2.5, "grad_norm": 0.7205291986465454, "learning_rate": 4.081914278566305e-05, "loss": 1.6473, "step": 75023 }, { "epoch": 2.5, "grad_norm": 0.6902263164520264, "learning_rate": 4.081387924362936e-05, "loss": 1.6677, "step": 75024 }, { "epoch": 2.5, "grad_norm": 0.6920174360275269, "learning_rate": 4.0808616016209464e-05, "loss": 1.6522, "step": 75025 }, { "epoch": 2.5, "grad_norm": 0.716453492641449, "learning_rate": 4.080335310340976e-05, "loss": 1.6876, "step": 75026 }, { "epoch": 2.5, "grad_norm": 0.6996679306030273, "learning_rate": 4.079809050523649e-05, "loss": 1.6891, "step": 75027 }, { "epoch": 2.5, "grad_norm": 0.6871568560600281, "learning_rate": 4.079282822169624e-05, "loss": 1.6636, "step": 75028 }, { "epoch": 2.5, "grad_norm": 0.6916512250900269, "learning_rate": 4.078756625279523e-05, "loss": 1.625, "step": 75029 }, { "epoch": 2.5, "grad_norm": 0.7110123634338379, "learning_rate": 4.0782304598540005e-05, "loss": 1.7004, "step": 75030 }, { "epoch": 2.5, "grad_norm": 0.6742803454399109, "learning_rate": 4.077704325893678e-05, "loss": 1.6609, "step": 75031 }, { "epoch": 2.5, "grad_norm": 0.6772382855415344, "learning_rate": 4.0771782233992126e-05, "loss": 1.6244, "step": 75032 }, { "epoch": 2.5, "grad_norm": 0.7088190913200378, "learning_rate": 4.076652152371229e-05, "loss": 1.7155, "step": 75033 }, { "epoch": 2.5, "grad_norm": 0.6875531077384949, "learning_rate": 4.076126112810365e-05, "loss": 1.6793, "step": 75034 }, { "epoch": 2.5, "grad_norm": 0.6676178574562073, "learning_rate": 4.0756001047172725e-05, "loss": 1.6397, "step": 75035 }, { "epoch": 2.5, "grad_norm": 0.6898921728134155, "learning_rate": 4.075074128092575e-05, "loss": 1.7233, "step": 75036 }, { "epoch": 2.5, "grad_norm": 0.6798557639122009, "learning_rate": 4.074548182936924e-05, "loss": 1.62, "step": 75037 }, { "epoch": 2.5, "grad_norm": 0.678234338760376, "learning_rate": 4.0740222692509506e-05, "loss": 1.674, "step": 75038 }, { "epoch": 2.5, "grad_norm": 0.6958555579185486, "learning_rate": 4.073496387035286e-05, "loss": 1.7182, "step": 75039 }, { "epoch": 2.5, "grad_norm": 0.7070873975753784, "learning_rate": 4.072970536290585e-05, "loss": 1.6453, "step": 75040 }, { "epoch": 2.5, "grad_norm": 0.6904933452606201, "learning_rate": 4.072444717017472e-05, "loss": 1.6541, "step": 75041 }, { "epoch": 2.5, "grad_norm": 0.6747643351554871, "learning_rate": 4.0719189292165975e-05, "loss": 1.6351, "step": 75042 }, { "epoch": 2.5, "grad_norm": 0.6976931691169739, "learning_rate": 4.071393172888584e-05, "loss": 1.695, "step": 75043 }, { "epoch": 2.5, "grad_norm": 0.7422837018966675, "learning_rate": 4.070867448034087e-05, "loss": 1.7453, "step": 75044 }, { "epoch": 2.5, "grad_norm": 0.7012979984283447, "learning_rate": 4.070341754653738e-05, "loss": 1.7261, "step": 75045 }, { "epoch": 2.5, "grad_norm": 0.7156349420547485, "learning_rate": 4.0698160927481636e-05, "loss": 1.7611, "step": 75046 }, { "epoch": 2.5, "grad_norm": 0.6903451681137085, "learning_rate": 4.069290462318019e-05, "loss": 1.6316, "step": 75047 }, { "epoch": 2.5, "grad_norm": 0.6803078055381775, "learning_rate": 4.068764863363939e-05, "loss": 1.7181, "step": 75048 }, { "epoch": 2.5, "grad_norm": 0.7097317576408386, "learning_rate": 4.0682392958865474e-05, "loss": 1.7063, "step": 75049 }, { "epoch": 2.5, "grad_norm": 0.6896674633026123, "learning_rate": 4.067713759886493e-05, "loss": 1.7155, "step": 75050 }, { "epoch": 2.5, "grad_norm": 0.7445733547210693, "learning_rate": 4.0671882553644184e-05, "loss": 1.7202, "step": 75051 }, { "epoch": 2.5, "grad_norm": 0.6853134036064148, "learning_rate": 4.0666627823209605e-05, "loss": 1.6901, "step": 75052 }, { "epoch": 2.5, "grad_norm": 0.7258436679840088, "learning_rate": 4.066137340756742e-05, "loss": 1.7175, "step": 75053 }, { "epoch": 2.5, "grad_norm": 0.6927102208137512, "learning_rate": 4.065611930672419e-05, "loss": 1.691, "step": 75054 }, { "epoch": 2.5, "grad_norm": 0.7198237180709839, "learning_rate": 4.0650865520686235e-05, "loss": 1.755, "step": 75055 }, { "epoch": 2.5, "grad_norm": 0.7007589936256409, "learning_rate": 4.064561204945986e-05, "loss": 1.6583, "step": 75056 }, { "epoch": 2.5, "grad_norm": 0.6870348453521729, "learning_rate": 4.0640358893051464e-05, "loss": 1.6729, "step": 75057 }, { "epoch": 2.5, "grad_norm": 0.7045305967330933, "learning_rate": 4.063510605146762e-05, "loss": 1.7189, "step": 75058 }, { "epoch": 2.5, "grad_norm": 0.6739960312843323, "learning_rate": 4.062985352471439e-05, "loss": 1.6772, "step": 75059 }, { "epoch": 2.5, "grad_norm": 0.688164234161377, "learning_rate": 4.06246013127983e-05, "loss": 1.783, "step": 75060 }, { "epoch": 2.5, "grad_norm": 0.7218442559242249, "learning_rate": 4.0619349415725797e-05, "loss": 1.6823, "step": 75061 }, { "epoch": 2.5, "grad_norm": 0.7075737118721008, "learning_rate": 4.061409783350318e-05, "loss": 1.6776, "step": 75062 }, { "epoch": 2.5, "grad_norm": 0.6984400749206543, "learning_rate": 4.06088465661368e-05, "loss": 1.7327, "step": 75063 }, { "epoch": 2.5, "grad_norm": 0.7093664407730103, "learning_rate": 4.0603595613633024e-05, "loss": 1.7003, "step": 75064 }, { "epoch": 2.5, "grad_norm": 0.6995875239372253, "learning_rate": 4.059834497599841e-05, "loss": 1.72, "step": 75065 }, { "epoch": 2.5, "grad_norm": 0.6756051182746887, "learning_rate": 4.0593094653239025e-05, "loss": 1.6559, "step": 75066 }, { "epoch": 2.5, "grad_norm": 0.6987341046333313, "learning_rate": 4.058784464536142e-05, "loss": 1.7034, "step": 75067 }, { "epoch": 2.5, "grad_norm": 0.6979047060012817, "learning_rate": 4.0582594952372035e-05, "loss": 1.704, "step": 75068 }, { "epoch": 2.5, "grad_norm": 0.6964256167411804, "learning_rate": 4.057734557427713e-05, "loss": 1.6767, "step": 75069 }, { "epoch": 2.5, "grad_norm": 0.7147844433784485, "learning_rate": 4.057209651108303e-05, "loss": 1.7177, "step": 75070 }, { "epoch": 2.5, "grad_norm": 0.6886649131774902, "learning_rate": 4.056684776279619e-05, "loss": 1.7026, "step": 75071 }, { "epoch": 2.5, "grad_norm": 0.7144500613212585, "learning_rate": 4.056159932942312e-05, "loss": 1.6717, "step": 75072 }, { "epoch": 2.5, "grad_norm": 0.6820408701896667, "learning_rate": 4.055635121096991e-05, "loss": 1.7157, "step": 75073 }, { "epoch": 2.5, "grad_norm": 0.6986985206604004, "learning_rate": 4.055110340744304e-05, "loss": 1.6916, "step": 75074 }, { "epoch": 2.5, "grad_norm": 0.7074941992759705, "learning_rate": 4.054585591884899e-05, "loss": 1.6278, "step": 75075 }, { "epoch": 2.5, "grad_norm": 0.7223365902900696, "learning_rate": 4.054060874519401e-05, "loss": 1.6882, "step": 75076 }, { "epoch": 2.5, "grad_norm": 0.713932991027832, "learning_rate": 4.0535361886484474e-05, "loss": 1.6796, "step": 75077 }, { "epoch": 2.5, "grad_norm": 0.672927737236023, "learning_rate": 4.053011534272683e-05, "loss": 1.7153, "step": 75078 }, { "epoch": 2.5, "grad_norm": 0.7233825325965881, "learning_rate": 4.0524869113927386e-05, "loss": 1.7822, "step": 75079 }, { "epoch": 2.5, "grad_norm": 0.6938902139663696, "learning_rate": 4.051962320009245e-05, "loss": 1.7007, "step": 75080 }, { "epoch": 2.5, "grad_norm": 0.6933749914169312, "learning_rate": 4.0514377601228475e-05, "loss": 1.7189, "step": 75081 }, { "epoch": 2.5, "grad_norm": 0.6901518702507019, "learning_rate": 4.050913231734187e-05, "loss": 1.6763, "step": 75082 }, { "epoch": 2.5, "grad_norm": 0.6810163259506226, "learning_rate": 4.0503887348438944e-05, "loss": 1.6795, "step": 75083 }, { "epoch": 2.5, "grad_norm": 0.7111141085624695, "learning_rate": 4.049864269452602e-05, "loss": 1.7019, "step": 75084 }, { "epoch": 2.5, "grad_norm": 0.6848337054252625, "learning_rate": 4.049339835560955e-05, "loss": 1.7406, "step": 75085 }, { "epoch": 2.5, "grad_norm": 0.6936899423599243, "learning_rate": 4.048815433169584e-05, "loss": 1.7119, "step": 75086 }, { "epoch": 2.5, "grad_norm": 0.6844868659973145, "learning_rate": 4.048291062279125e-05, "loss": 1.5947, "step": 75087 }, { "epoch": 2.5, "grad_norm": 0.6956778764724731, "learning_rate": 4.047766722890221e-05, "loss": 1.6236, "step": 75088 }, { "epoch": 2.5, "grad_norm": 0.7020460367202759, "learning_rate": 4.0472424150034974e-05, "loss": 1.7035, "step": 75089 }, { "epoch": 2.5, "grad_norm": 0.7422930002212524, "learning_rate": 4.0467181386196045e-05, "loss": 1.7476, "step": 75090 }, { "epoch": 2.5, "grad_norm": 0.7229082584381104, "learning_rate": 4.046193893739165e-05, "loss": 1.616, "step": 75091 }, { "epoch": 2.5, "grad_norm": 0.7354333996772766, "learning_rate": 4.045669680362831e-05, "loss": 1.7143, "step": 75092 }, { "epoch": 2.5, "grad_norm": 0.6952845454216003, "learning_rate": 4.045145498491226e-05, "loss": 1.6969, "step": 75093 }, { "epoch": 2.5, "grad_norm": 0.6861077547073364, "learning_rate": 4.0446213481249854e-05, "loss": 1.6967, "step": 75094 }, { "epoch": 2.5, "grad_norm": 0.7049508690834045, "learning_rate": 4.0440972292647564e-05, "loss": 1.7096, "step": 75095 }, { "epoch": 2.5, "grad_norm": 0.6949207186698914, "learning_rate": 4.043573141911165e-05, "loss": 1.637, "step": 75096 }, { "epoch": 2.5, "grad_norm": 0.6917122602462769, "learning_rate": 4.043049086064853e-05, "loss": 1.6929, "step": 75097 }, { "epoch": 2.5, "grad_norm": 0.7143199443817139, "learning_rate": 4.0425250617264585e-05, "loss": 1.688, "step": 75098 }, { "epoch": 2.5, "grad_norm": 0.686025083065033, "learning_rate": 4.042001068896605e-05, "loss": 1.718, "step": 75099 }, { "epoch": 2.5, "grad_norm": 0.6903499960899353, "learning_rate": 4.041477107575948e-05, "loss": 1.6652, "step": 75100 }, { "epoch": 2.5, "grad_norm": 2.1275718212127686, "learning_rate": 4.040953177765102e-05, "loss": 1.6638, "step": 75101 }, { "epoch": 2.5, "grad_norm": 0.7201409935951233, "learning_rate": 4.040429279464723e-05, "loss": 1.6401, "step": 75102 }, { "epoch": 2.5, "grad_norm": 0.686353325843811, "learning_rate": 4.0399054126754284e-05, "loss": 1.7162, "step": 75103 }, { "epoch": 2.5, "grad_norm": 0.6882473230361938, "learning_rate": 4.0393815773978734e-05, "loss": 1.6747, "step": 75104 }, { "epoch": 2.5, "grad_norm": 0.7032079696655273, "learning_rate": 4.0388577736326844e-05, "loss": 1.6485, "step": 75105 }, { "epoch": 2.5, "grad_norm": 0.6960697174072266, "learning_rate": 4.0383340013804875e-05, "loss": 1.6645, "step": 75106 }, { "epoch": 2.5, "grad_norm": 0.6829660534858704, "learning_rate": 4.037810260641936e-05, "loss": 1.6678, "step": 75107 }, { "epoch": 2.5, "grad_norm": 0.7255194783210754, "learning_rate": 4.0372865514176557e-05, "loss": 1.596, "step": 75108 }, { "epoch": 2.5, "grad_norm": 0.6838082671165466, "learning_rate": 4.036762873708279e-05, "loss": 1.5784, "step": 75109 }, { "epoch": 2.5, "grad_norm": 0.7006099820137024, "learning_rate": 4.036239227514446e-05, "loss": 1.6993, "step": 75110 }, { "epoch": 2.5, "grad_norm": 0.728873074054718, "learning_rate": 4.0357156128368026e-05, "loss": 1.6703, "step": 75111 }, { "epoch": 2.5, "grad_norm": 0.662308931350708, "learning_rate": 4.035192029675972e-05, "loss": 1.5909, "step": 75112 }, { "epoch": 2.5, "grad_norm": 0.710832417011261, "learning_rate": 4.0346684780325864e-05, "loss": 1.7034, "step": 75113 }, { "epoch": 2.5, "grad_norm": 0.6803308725357056, "learning_rate": 4.034144957907293e-05, "loss": 1.6152, "step": 75114 }, { "epoch": 2.5, "grad_norm": 0.6971777081489563, "learning_rate": 4.033621469300723e-05, "loss": 1.7174, "step": 75115 }, { "epoch": 2.5, "grad_norm": 0.6928995847702026, "learning_rate": 4.033098012213504e-05, "loss": 1.7097, "step": 75116 }, { "epoch": 2.5, "grad_norm": 0.6961305141448975, "learning_rate": 4.032574586646278e-05, "loss": 1.67, "step": 75117 }, { "epoch": 2.5, "grad_norm": 0.7029115557670593, "learning_rate": 4.032051192599688e-05, "loss": 1.6462, "step": 75118 }, { "epoch": 2.5, "grad_norm": 0.6892219185829163, "learning_rate": 4.0315278300743605e-05, "loss": 1.7533, "step": 75119 }, { "epoch": 2.5, "grad_norm": 0.6673218607902527, "learning_rate": 4.031004499070924e-05, "loss": 1.6592, "step": 75120 }, { "epoch": 2.5, "grad_norm": 0.6997369527816772, "learning_rate": 4.03048119959003e-05, "loss": 1.6891, "step": 75121 }, { "epoch": 2.5, "grad_norm": 0.6993728280067444, "learning_rate": 4.029957931632303e-05, "loss": 1.6624, "step": 75122 }, { "epoch": 2.5, "grad_norm": 0.6751477718353271, "learning_rate": 4.0294346951983755e-05, "loss": 1.646, "step": 75123 }, { "epoch": 2.5, "grad_norm": 0.6742387413978577, "learning_rate": 4.028911490288889e-05, "loss": 1.7009, "step": 75124 }, { "epoch": 2.5, "grad_norm": 0.7037842869758606, "learning_rate": 4.028388316904484e-05, "loss": 1.7278, "step": 75125 }, { "epoch": 2.5, "grad_norm": 0.699273407459259, "learning_rate": 4.027865175045788e-05, "loss": 1.6814, "step": 75126 }, { "epoch": 2.5, "grad_norm": 0.7062641382217407, "learning_rate": 4.0273420647134304e-05, "loss": 1.6596, "step": 75127 }, { "epoch": 2.5, "grad_norm": 0.7171003818511963, "learning_rate": 4.0268189859080566e-05, "loss": 1.647, "step": 75128 }, { "epoch": 2.5, "grad_norm": 0.7126311659812927, "learning_rate": 4.026295938630301e-05, "loss": 1.7247, "step": 75129 }, { "epoch": 2.5, "grad_norm": 0.6940762996673584, "learning_rate": 4.025772922880788e-05, "loss": 1.606, "step": 75130 }, { "epoch": 2.5, "grad_norm": 0.6830813884735107, "learning_rate": 4.025249938660158e-05, "loss": 1.6317, "step": 75131 }, { "epoch": 2.5, "grad_norm": 0.6919998526573181, "learning_rate": 4.024726985969053e-05, "loss": 1.6398, "step": 75132 }, { "epoch": 2.5, "grad_norm": 0.6800695657730103, "learning_rate": 4.0242040648081044e-05, "loss": 1.6818, "step": 75133 }, { "epoch": 2.5, "grad_norm": 0.7155491709709167, "learning_rate": 4.0236811751779394e-05, "loss": 1.7769, "step": 75134 }, { "epoch": 2.5, "grad_norm": 0.6945222616195679, "learning_rate": 4.023158317079203e-05, "loss": 1.7207, "step": 75135 }, { "epoch": 2.5, "grad_norm": 0.6880587935447693, "learning_rate": 4.022635490512526e-05, "loss": 1.6639, "step": 75136 }, { "epoch": 2.5, "grad_norm": 0.7071791291236877, "learning_rate": 4.022112695478532e-05, "loss": 1.6981, "step": 75137 }, { "epoch": 2.5, "grad_norm": 0.691495954990387, "learning_rate": 4.0215899319778674e-05, "loss": 1.7594, "step": 75138 }, { "epoch": 2.5, "grad_norm": 0.7460412383079529, "learning_rate": 4.021067200011172e-05, "loss": 1.6653, "step": 75139 }, { "epoch": 2.5, "grad_norm": 0.6723921298980713, "learning_rate": 4.020544499579075e-05, "loss": 1.6853, "step": 75140 }, { "epoch": 2.5, "grad_norm": 0.6972047090530396, "learning_rate": 4.020021830682198e-05, "loss": 1.6315, "step": 75141 }, { "epoch": 2.5, "grad_norm": 0.6914888024330139, "learning_rate": 4.019499193321196e-05, "loss": 1.6677, "step": 75142 }, { "epoch": 2.5, "grad_norm": 0.6922598481178284, "learning_rate": 4.018976587496694e-05, "loss": 1.7169, "step": 75143 }, { "epoch": 2.5, "grad_norm": 0.7006729245185852, "learning_rate": 4.018454013209318e-05, "loss": 1.7221, "step": 75144 }, { "epoch": 2.5, "grad_norm": 0.7161410450935364, "learning_rate": 4.017931470459721e-05, "loss": 1.6203, "step": 75145 }, { "epoch": 2.5, "grad_norm": 0.701800525188446, "learning_rate": 4.0174089592485156e-05, "loss": 1.6266, "step": 75146 }, { "epoch": 2.5, "grad_norm": 0.7142486572265625, "learning_rate": 4.0168864795763557e-05, "loss": 1.7271, "step": 75147 }, { "epoch": 2.5, "grad_norm": 0.7055895924568176, "learning_rate": 4.0163640314438626e-05, "loss": 1.6603, "step": 75148 }, { "epoch": 2.5, "grad_norm": 0.6852442622184753, "learning_rate": 4.01584161485168e-05, "loss": 1.734, "step": 75149 }, { "epoch": 2.5, "grad_norm": 0.7079875469207764, "learning_rate": 4.015319229800438e-05, "loss": 1.6714, "step": 75150 }, { "epoch": 2.5, "grad_norm": 0.7072381377220154, "learning_rate": 4.0147968762907615e-05, "loss": 1.6654, "step": 75151 }, { "epoch": 2.5, "grad_norm": 0.6884700655937195, "learning_rate": 4.0142745543233e-05, "loss": 1.6047, "step": 75152 }, { "epoch": 2.5, "grad_norm": 0.695310652256012, "learning_rate": 4.013752263898674e-05, "loss": 1.6532, "step": 75153 }, { "epoch": 2.5, "grad_norm": 0.6982530951499939, "learning_rate": 4.013230005017529e-05, "loss": 1.7109, "step": 75154 }, { "epoch": 2.5, "grad_norm": 0.6810089945793152, "learning_rate": 4.012707777680495e-05, "loss": 1.7371, "step": 75155 }, { "epoch": 2.5, "grad_norm": 0.6978894472122192, "learning_rate": 4.012185581888201e-05, "loss": 1.7149, "step": 75156 }, { "epoch": 2.5, "grad_norm": 0.6964501142501831, "learning_rate": 4.011663417641287e-05, "loss": 1.6995, "step": 75157 }, { "epoch": 2.5, "grad_norm": 0.691267192363739, "learning_rate": 4.011141284940381e-05, "loss": 1.6992, "step": 75158 }, { "epoch": 2.5, "grad_norm": 0.692505955696106, "learning_rate": 4.010619183786125e-05, "loss": 1.7014, "step": 75159 }, { "epoch": 2.5, "grad_norm": 0.6854476928710938, "learning_rate": 4.010097114179139e-05, "loss": 1.6531, "step": 75160 }, { "epoch": 2.5, "grad_norm": 0.7066175937652588, "learning_rate": 4.009575076120079e-05, "loss": 1.6624, "step": 75161 }, { "epoch": 2.5, "grad_norm": 0.7083434462547302, "learning_rate": 4.009053069609558e-05, "loss": 1.7448, "step": 75162 }, { "epoch": 2.5, "grad_norm": 0.7104693055152893, "learning_rate": 4.008531094648214e-05, "loss": 1.7014, "step": 75163 }, { "epoch": 2.5, "grad_norm": 0.7188976407051086, "learning_rate": 4.0080091512366894e-05, "loss": 1.7146, "step": 75164 }, { "epoch": 2.5, "grad_norm": 0.6947288513183594, "learning_rate": 4.007487239375614e-05, "loss": 1.7498, "step": 75165 }, { "epoch": 2.5, "grad_norm": 0.7382947206497192, "learning_rate": 4.00696535906561e-05, "loss": 1.6467, "step": 75166 }, { "epoch": 2.5, "grad_norm": 0.7047383189201355, "learning_rate": 4.0064435103073215e-05, "loss": 1.7293, "step": 75167 }, { "epoch": 2.5, "grad_norm": 0.6826348900794983, "learning_rate": 4.005921693101386e-05, "loss": 1.6683, "step": 75168 }, { "epoch": 2.5, "grad_norm": 0.6875467300415039, "learning_rate": 4.005399907448435e-05, "loss": 1.667, "step": 75169 }, { "epoch": 2.5, "grad_norm": 0.7063443660736084, "learning_rate": 4.004878153349086e-05, "loss": 1.6501, "step": 75170 }, { "epoch": 2.5, "grad_norm": 0.676625669002533, "learning_rate": 4.004356430803997e-05, "loss": 1.6595, "step": 75171 }, { "epoch": 2.5, "grad_norm": 0.68840491771698, "learning_rate": 4.003834739813786e-05, "loss": 1.7017, "step": 75172 }, { "epoch": 2.5, "grad_norm": 0.7225306034088135, "learning_rate": 4.0033130803790835e-05, "loss": 1.7354, "step": 75173 }, { "epoch": 2.5, "grad_norm": 0.668519139289856, "learning_rate": 4.0027914525005286e-05, "loss": 1.5992, "step": 75174 }, { "epoch": 2.5, "grad_norm": 0.7116844654083252, "learning_rate": 4.002269856178765e-05, "loss": 1.6906, "step": 75175 }, { "epoch": 2.5, "grad_norm": 0.6882494688034058, "learning_rate": 4.001748291414404e-05, "loss": 1.705, "step": 75176 }, { "epoch": 2.5, "grad_norm": 0.7206811308860779, "learning_rate": 4.0012267582080926e-05, "loss": 1.6794, "step": 75177 }, { "epoch": 2.5, "grad_norm": 0.6756114363670349, "learning_rate": 4.000705256560464e-05, "loss": 1.7555, "step": 75178 }, { "epoch": 2.5, "grad_norm": 0.7035004496574402, "learning_rate": 4.00018378647215e-05, "loss": 1.7348, "step": 75179 }, { "epoch": 2.5, "grad_norm": 0.6785534024238586, "learning_rate": 3.999662347943774e-05, "loss": 1.684, "step": 75180 }, { "epoch": 2.5, "grad_norm": 0.6775985956192017, "learning_rate": 3.999140940975979e-05, "loss": 1.6437, "step": 75181 }, { "epoch": 2.5, "grad_norm": 0.7066619992256165, "learning_rate": 3.998619565569407e-05, "loss": 1.6366, "step": 75182 }, { "epoch": 2.5, "grad_norm": 0.6969354152679443, "learning_rate": 3.998098221724669e-05, "loss": 1.7039, "step": 75183 }, { "epoch": 2.5, "grad_norm": 0.7125676870346069, "learning_rate": 3.997576909442407e-05, "loss": 1.6713, "step": 75184 }, { "epoch": 2.5, "grad_norm": 0.6980847716331482, "learning_rate": 3.9970556287232606e-05, "loss": 1.6923, "step": 75185 }, { "epoch": 2.5, "grad_norm": 0.6881928443908691, "learning_rate": 3.9965343795678586e-05, "loss": 1.671, "step": 75186 }, { "epoch": 2.5, "grad_norm": 0.692746639251709, "learning_rate": 3.996013161976828e-05, "loss": 1.6648, "step": 75187 }, { "epoch": 2.5, "grad_norm": 0.6923354864120483, "learning_rate": 3.995491975950804e-05, "loss": 1.6629, "step": 75188 }, { "epoch": 2.5, "grad_norm": 0.7070263624191284, "learning_rate": 3.9949708214904344e-05, "loss": 1.6993, "step": 75189 }, { "epoch": 2.5, "grad_norm": 0.7054629921913147, "learning_rate": 3.9944496985963236e-05, "loss": 1.6918, "step": 75190 }, { "epoch": 2.5, "grad_norm": 0.6796632409095764, "learning_rate": 3.993928607269122e-05, "loss": 1.618, "step": 75191 }, { "epoch": 2.5, "grad_norm": 0.6941118836402893, "learning_rate": 3.9934075475094627e-05, "loss": 1.6941, "step": 75192 }, { "epoch": 2.5, "grad_norm": 0.6709120869636536, "learning_rate": 3.992886519317978e-05, "loss": 1.6443, "step": 75193 }, { "epoch": 2.5, "grad_norm": 0.6844930052757263, "learning_rate": 3.992365522695288e-05, "loss": 1.7192, "step": 75194 }, { "epoch": 2.5, "grad_norm": 0.6798631548881531, "learning_rate": 3.991844557642041e-05, "loss": 1.6355, "step": 75195 }, { "epoch": 2.5, "grad_norm": 0.6802688837051392, "learning_rate": 3.9913236241588644e-05, "loss": 1.7006, "step": 75196 }, { "epoch": 2.5, "grad_norm": 0.7259729504585266, "learning_rate": 3.990802722246384e-05, "loss": 1.7269, "step": 75197 }, { "epoch": 2.5, "grad_norm": 0.6932999491691589, "learning_rate": 3.990281851905231e-05, "loss": 1.6717, "step": 75198 }, { "epoch": 2.5, "grad_norm": 0.696284294128418, "learning_rate": 3.989761013136055e-05, "loss": 1.7129, "step": 75199 }, { "epoch": 2.5, "grad_norm": 0.7127038240432739, "learning_rate": 3.989240205939476e-05, "loss": 1.7001, "step": 75200 }, { "epoch": 2.5, "grad_norm": 0.6993522047996521, "learning_rate": 3.988719430316117e-05, "loss": 1.6005, "step": 75201 }, { "epoch": 2.5, "grad_norm": 0.7045943140983582, "learning_rate": 3.988198686266629e-05, "loss": 1.7129, "step": 75202 }, { "epoch": 2.5, "grad_norm": 0.6824949383735657, "learning_rate": 3.987677973791634e-05, "loss": 1.6754, "step": 75203 }, { "epoch": 2.5, "grad_norm": 0.6919068694114685, "learning_rate": 3.987157292891762e-05, "loss": 1.6569, "step": 75204 }, { "epoch": 2.5, "grad_norm": 0.6833742260932922, "learning_rate": 3.986636643567651e-05, "loss": 1.7582, "step": 75205 }, { "epoch": 2.5, "grad_norm": 0.7105196118354797, "learning_rate": 3.986116025819925e-05, "loss": 1.728, "step": 75206 }, { "epoch": 2.5, "grad_norm": 0.7064926624298096, "learning_rate": 3.9855954396492274e-05, "loss": 1.6722, "step": 75207 }, { "epoch": 2.5, "grad_norm": 0.6882045269012451, "learning_rate": 3.985074885056177e-05, "loss": 1.6512, "step": 75208 }, { "epoch": 2.5, "grad_norm": 0.70235276222229, "learning_rate": 3.984554362041422e-05, "loss": 1.6699, "step": 75209 }, { "epoch": 2.5, "grad_norm": 0.7029519081115723, "learning_rate": 3.984033870605581e-05, "loss": 1.7261, "step": 75210 }, { "epoch": 2.5, "grad_norm": 0.6951889991760254, "learning_rate": 3.983513410749285e-05, "loss": 1.7051, "step": 75211 }, { "epoch": 2.5, "grad_norm": 0.9275281429290771, "learning_rate": 3.982992982473178e-05, "loss": 1.7267, "step": 75212 }, { "epoch": 2.5, "grad_norm": 0.7078590989112854, "learning_rate": 3.982472585777875e-05, "loss": 1.7313, "step": 75213 }, { "epoch": 2.5, "grad_norm": 0.6911096572875977, "learning_rate": 3.9819522206640234e-05, "loss": 1.6946, "step": 75214 }, { "epoch": 2.5, "grad_norm": 0.6886441111564636, "learning_rate": 3.981431887132252e-05, "loss": 1.6598, "step": 75215 }, { "epoch": 2.5, "grad_norm": 0.6822801828384399, "learning_rate": 3.980911585183176e-05, "loss": 1.6642, "step": 75216 }, { "epoch": 2.5, "grad_norm": 0.7154434323310852, "learning_rate": 3.9803913148174505e-05, "loss": 1.6759, "step": 75217 }, { "epoch": 2.5, "grad_norm": 0.7104465365409851, "learning_rate": 3.97987107603569e-05, "loss": 1.7102, "step": 75218 }, { "epoch": 2.5, "grad_norm": 0.6944282650947571, "learning_rate": 3.979350868838538e-05, "loss": 1.6908, "step": 75219 }, { "epoch": 2.5, "grad_norm": 0.6939007639884949, "learning_rate": 3.97883069322661e-05, "loss": 1.657, "step": 75220 }, { "epoch": 2.5, "grad_norm": 0.7043141722679138, "learning_rate": 3.9783105492005595e-05, "loss": 1.6815, "step": 75221 }, { "epoch": 2.5, "grad_norm": 0.7176049947738647, "learning_rate": 3.9777904367610024e-05, "loss": 1.6476, "step": 75222 }, { "epoch": 2.5, "grad_norm": 0.687710165977478, "learning_rate": 3.9772703559085684e-05, "loss": 1.6182, "step": 75223 }, { "epoch": 2.5, "grad_norm": 0.6846051216125488, "learning_rate": 3.9767503066439e-05, "loss": 1.6739, "step": 75224 }, { "epoch": 2.5, "grad_norm": 0.6969327330589294, "learning_rate": 3.97623028896762e-05, "loss": 1.7323, "step": 75225 }, { "epoch": 2.5, "grad_norm": 0.6805056929588318, "learning_rate": 3.975710302880358e-05, "loss": 1.6297, "step": 75226 }, { "epoch": 2.5, "grad_norm": 0.7110713124275208, "learning_rate": 3.9751903483827506e-05, "loss": 1.78, "step": 75227 }, { "epoch": 2.5, "grad_norm": 0.6862605810165405, "learning_rate": 3.97467042547543e-05, "loss": 1.6872, "step": 75228 }, { "epoch": 2.5, "grad_norm": 0.6915985345840454, "learning_rate": 3.9741505341590294e-05, "loss": 1.6844, "step": 75229 }, { "epoch": 2.5, "grad_norm": 0.6999698281288147, "learning_rate": 3.973630674434165e-05, "loss": 1.7172, "step": 75230 }, { "epoch": 2.5, "grad_norm": 0.7023928165435791, "learning_rate": 3.973110846301486e-05, "loss": 1.7156, "step": 75231 }, { "epoch": 2.5, "grad_norm": 0.702733039855957, "learning_rate": 3.972591049761619e-05, "loss": 1.7378, "step": 75232 }, { "epoch": 2.5, "grad_norm": 0.6893693208694458, "learning_rate": 3.972071284815179e-05, "loss": 1.6785, "step": 75233 }, { "epoch": 2.5, "grad_norm": 0.7041642665863037, "learning_rate": 3.971551551462814e-05, "loss": 1.6569, "step": 75234 }, { "epoch": 2.5, "grad_norm": 0.6912620663642883, "learning_rate": 3.971031849705155e-05, "loss": 1.669, "step": 75235 }, { "epoch": 2.5, "grad_norm": 0.7220977544784546, "learning_rate": 3.9705121795428296e-05, "loss": 1.7311, "step": 75236 }, { "epoch": 2.5, "grad_norm": 0.7115538120269775, "learning_rate": 3.9699925409764566e-05, "loss": 1.6543, "step": 75237 }, { "epoch": 2.5, "grad_norm": 0.6955384016036987, "learning_rate": 3.9694729340066887e-05, "loss": 1.6602, "step": 75238 }, { "epoch": 2.5, "grad_norm": 0.7161044478416443, "learning_rate": 3.9689533586341424e-05, "loss": 1.6737, "step": 75239 }, { "epoch": 2.5, "grad_norm": 0.7152335047721863, "learning_rate": 3.968433814859444e-05, "loss": 1.6555, "step": 75240 }, { "epoch": 2.5, "grad_norm": 0.6940332055091858, "learning_rate": 3.9679143026832325e-05, "loss": 1.6856, "step": 75241 }, { "epoch": 2.5, "grad_norm": 0.7234142422676086, "learning_rate": 3.967394822106145e-05, "loss": 1.6084, "step": 75242 }, { "epoch": 2.5, "grad_norm": 0.6941471695899963, "learning_rate": 3.966875373128803e-05, "loss": 1.64, "step": 75243 }, { "epoch": 2.5, "grad_norm": 0.7180266976356506, "learning_rate": 3.966355955751833e-05, "loss": 1.7451, "step": 75244 }, { "epoch": 2.5, "grad_norm": 0.6970284581184387, "learning_rate": 3.965836569975879e-05, "loss": 1.6937, "step": 75245 }, { "epoch": 2.5, "grad_norm": 0.705751895904541, "learning_rate": 3.965317215801559e-05, "loss": 1.6251, "step": 75246 }, { "epoch": 2.5, "grad_norm": 0.6832123398780823, "learning_rate": 3.9647978932295064e-05, "loss": 1.7771, "step": 75247 }, { "epoch": 2.5, "grad_norm": 0.7015998959541321, "learning_rate": 3.964278602260348e-05, "loss": 1.67, "step": 75248 }, { "epoch": 2.5, "grad_norm": 0.6946417093276978, "learning_rate": 3.963759342894729e-05, "loss": 1.6434, "step": 75249 }, { "epoch": 2.5, "grad_norm": 0.7122583985328674, "learning_rate": 3.9632401151332695e-05, "loss": 1.7383, "step": 75250 }, { "epoch": 2.5, "grad_norm": 0.6903795003890991, "learning_rate": 3.962720918976595e-05, "loss": 1.7584, "step": 75251 }, { "epoch": 2.5, "grad_norm": 0.691572904586792, "learning_rate": 3.962201754425346e-05, "loss": 1.6594, "step": 75252 }, { "epoch": 2.5, "grad_norm": 0.7239931225776672, "learning_rate": 3.9616826214801444e-05, "loss": 1.6897, "step": 75253 }, { "epoch": 2.5, "grad_norm": 0.714148223400116, "learning_rate": 3.96116352014162e-05, "loss": 1.6589, "step": 75254 }, { "epoch": 2.5, "grad_norm": 0.6882142424583435, "learning_rate": 3.9606444504104064e-05, "loss": 1.6294, "step": 75255 }, { "epoch": 2.5, "grad_norm": 0.7533410787582397, "learning_rate": 3.9601254122871417e-05, "loss": 1.7294, "step": 75256 }, { "epoch": 2.5, "grad_norm": 0.689634382724762, "learning_rate": 3.959606405772446e-05, "loss": 1.6964, "step": 75257 }, { "epoch": 2.5, "grad_norm": 0.7113046646118164, "learning_rate": 3.959087430866946e-05, "loss": 1.6586, "step": 75258 }, { "epoch": 2.5, "grad_norm": 0.7023557424545288, "learning_rate": 3.958568487571281e-05, "loss": 1.6433, "step": 75259 }, { "epoch": 2.5, "grad_norm": 0.7066574096679688, "learning_rate": 3.95804957588608e-05, "loss": 1.7011, "step": 75260 }, { "epoch": 2.5, "grad_norm": 0.7134406566619873, "learning_rate": 3.957530695811962e-05, "loss": 1.6329, "step": 75261 }, { "epoch": 2.5, "grad_norm": 0.7262797355651855, "learning_rate": 3.9570118473495685e-05, "loss": 1.6277, "step": 75262 }, { "epoch": 2.5, "grad_norm": 0.6827042698860168, "learning_rate": 3.956493030499523e-05, "loss": 1.7392, "step": 75263 }, { "epoch": 2.5, "grad_norm": 0.7025555372238159, "learning_rate": 3.955974245262461e-05, "loss": 1.7412, "step": 75264 }, { "epoch": 2.5, "grad_norm": 0.700396716594696, "learning_rate": 3.9554554916390034e-05, "loss": 1.6052, "step": 75265 }, { "epoch": 2.5, "grad_norm": 0.7118136882781982, "learning_rate": 3.954936769629794e-05, "loss": 1.7345, "step": 75266 }, { "epoch": 2.5, "grad_norm": 0.707973301410675, "learning_rate": 3.9544180792354516e-05, "loss": 1.6762, "step": 75267 }, { "epoch": 2.5, "grad_norm": 0.7078872919082642, "learning_rate": 3.9538994204566e-05, "loss": 1.701, "step": 75268 }, { "epoch": 2.5, "grad_norm": 0.7085487246513367, "learning_rate": 3.953380793293884e-05, "loss": 1.7139, "step": 75269 }, { "epoch": 2.5, "grad_norm": 0.7158481478691101, "learning_rate": 3.9528621977479217e-05, "loss": 1.6355, "step": 75270 }, { "epoch": 2.5, "grad_norm": 0.7253872156143188, "learning_rate": 3.952343633819354e-05, "loss": 1.8118, "step": 75271 }, { "epoch": 2.5, "grad_norm": 0.6970998048782349, "learning_rate": 3.951825101508801e-05, "loss": 1.6714, "step": 75272 }, { "epoch": 2.5, "grad_norm": 0.7061998248100281, "learning_rate": 3.951306600816886e-05, "loss": 1.6549, "step": 75273 }, { "epoch": 2.5, "grad_norm": 0.681056797504425, "learning_rate": 3.950788131744255e-05, "loss": 1.698, "step": 75274 }, { "epoch": 2.5, "grad_norm": 0.6965161561965942, "learning_rate": 3.950269694291523e-05, "loss": 1.6942, "step": 75275 }, { "epoch": 2.5, "grad_norm": 0.6890935301780701, "learning_rate": 3.949751288459332e-05, "loss": 1.6624, "step": 75276 }, { "epoch": 2.5, "grad_norm": 0.7018014788627625, "learning_rate": 3.949232914248299e-05, "loss": 1.6408, "step": 75277 }, { "epoch": 2.5, "grad_norm": 0.6983914971351624, "learning_rate": 3.948714571659065e-05, "loss": 1.7115, "step": 75278 }, { "epoch": 2.5, "grad_norm": 0.6882524490356445, "learning_rate": 3.948196260692252e-05, "loss": 1.7374, "step": 75279 }, { "epoch": 2.5, "grad_norm": 0.7101770639419556, "learning_rate": 3.9476779813484836e-05, "loss": 1.6604, "step": 75280 }, { "epoch": 2.5, "grad_norm": 0.6959735155105591, "learning_rate": 3.947159733628402e-05, "loss": 1.6888, "step": 75281 }, { "epoch": 2.5, "grad_norm": 0.6704482436180115, "learning_rate": 3.9466415175326306e-05, "loss": 1.7013, "step": 75282 }, { "epoch": 2.5, "grad_norm": 0.7443215250968933, "learning_rate": 3.946123333061788e-05, "loss": 1.7599, "step": 75283 }, { "epoch": 2.5, "grad_norm": 0.7112342119216919, "learning_rate": 3.945605180216518e-05, "loss": 1.6971, "step": 75284 }, { "epoch": 2.5, "grad_norm": 0.685325562953949, "learning_rate": 3.945087058997446e-05, "loss": 1.6506, "step": 75285 }, { "epoch": 2.5, "grad_norm": 0.6977319717407227, "learning_rate": 3.944568969405202e-05, "loss": 1.6709, "step": 75286 }, { "epoch": 2.5, "grad_norm": 0.7218309640884399, "learning_rate": 3.944050911440405e-05, "loss": 1.6371, "step": 75287 }, { "epoch": 2.5, "grad_norm": 0.7201048731803894, "learning_rate": 3.943532885103698e-05, "loss": 1.714, "step": 75288 }, { "epoch": 2.5, "grad_norm": 0.7069341540336609, "learning_rate": 3.9430148903957016e-05, "loss": 1.6598, "step": 75289 }, { "epoch": 2.5, "grad_norm": 0.6882152557373047, "learning_rate": 3.94249692731704e-05, "loss": 1.7062, "step": 75290 }, { "epoch": 2.5, "grad_norm": 0.6997451782226562, "learning_rate": 3.941978995868348e-05, "loss": 1.6905, "step": 75291 }, { "epoch": 2.5, "grad_norm": 0.7167516946792603, "learning_rate": 3.941461096050267e-05, "loss": 1.7543, "step": 75292 }, { "epoch": 2.51, "grad_norm": 0.702273964881897, "learning_rate": 3.9409432278634e-05, "loss": 1.6798, "step": 75293 }, { "epoch": 2.51, "grad_norm": 0.6923737525939941, "learning_rate": 3.940425391308391e-05, "loss": 1.6757, "step": 75294 }, { "epoch": 2.51, "grad_norm": 0.7018123269081116, "learning_rate": 3.939907586385867e-05, "loss": 1.6673, "step": 75295 }, { "epoch": 2.51, "grad_norm": 0.727415919303894, "learning_rate": 3.939389813096457e-05, "loss": 1.691, "step": 75296 }, { "epoch": 2.51, "grad_norm": 0.6828404068946838, "learning_rate": 3.938872071440783e-05, "loss": 1.6471, "step": 75297 }, { "epoch": 2.51, "grad_norm": 0.6950902342796326, "learning_rate": 3.938354361419477e-05, "loss": 1.6991, "step": 75298 }, { "epoch": 2.51, "grad_norm": 0.6977617144584656, "learning_rate": 3.937836683033182e-05, "loss": 1.6596, "step": 75299 }, { "epoch": 2.51, "grad_norm": 0.6912620067596436, "learning_rate": 3.9373190362825015e-05, "loss": 1.6492, "step": 75300 }, { "epoch": 2.51, "grad_norm": 0.7002716064453125, "learning_rate": 3.936801421168074e-05, "loss": 1.7525, "step": 75301 }, { "epoch": 2.51, "grad_norm": 0.7099761962890625, "learning_rate": 3.936283837690537e-05, "loss": 1.6503, "step": 75302 }, { "epoch": 2.51, "grad_norm": 0.6976266503334045, "learning_rate": 3.935766285850512e-05, "loss": 1.6133, "step": 75303 }, { "epoch": 2.51, "grad_norm": 0.6877014636993408, "learning_rate": 3.935248765648617e-05, "loss": 1.6358, "step": 75304 }, { "epoch": 2.51, "grad_norm": 0.6967832446098328, "learning_rate": 3.934731277085492e-05, "loss": 1.7185, "step": 75305 }, { "epoch": 2.51, "grad_norm": 0.6993784308433533, "learning_rate": 3.9342138201617756e-05, "loss": 1.649, "step": 75306 }, { "epoch": 2.51, "grad_norm": 0.7121435403823853, "learning_rate": 3.9336963948780664e-05, "loss": 1.6509, "step": 75307 }, { "epoch": 2.51, "grad_norm": 0.7557466626167297, "learning_rate": 3.933179001235014e-05, "loss": 1.608, "step": 75308 }, { "epoch": 2.51, "grad_norm": 0.6950571537017822, "learning_rate": 3.932661639233244e-05, "loss": 1.6721, "step": 75309 }, { "epoch": 2.51, "grad_norm": 0.6947890520095825, "learning_rate": 3.932144308873384e-05, "loss": 1.7112, "step": 75310 }, { "epoch": 2.51, "grad_norm": 0.7002246379852295, "learning_rate": 3.931627010156055e-05, "loss": 1.6515, "step": 75311 }, { "epoch": 2.51, "grad_norm": 0.6860944628715515, "learning_rate": 3.9311097430818916e-05, "loss": 1.7097, "step": 75312 }, { "epoch": 2.51, "grad_norm": 0.6866462826728821, "learning_rate": 3.930592507651525e-05, "loss": 1.6429, "step": 75313 }, { "epoch": 2.51, "grad_norm": 0.720338761806488, "learning_rate": 3.93007530386557e-05, "loss": 1.7335, "step": 75314 }, { "epoch": 2.51, "grad_norm": 0.7007021903991699, "learning_rate": 3.929558131724661e-05, "loss": 1.6218, "step": 75315 }, { "epoch": 2.51, "grad_norm": 0.6874209642410278, "learning_rate": 3.929040991229434e-05, "loss": 1.7168, "step": 75316 }, { "epoch": 2.51, "grad_norm": 0.6961745619773865, "learning_rate": 3.92852388238051e-05, "loss": 1.7001, "step": 75317 }, { "epoch": 2.51, "grad_norm": 0.7039206624031067, "learning_rate": 3.928006805178511e-05, "loss": 1.685, "step": 75318 }, { "epoch": 2.51, "grad_norm": 0.6923955082893372, "learning_rate": 3.9274897596240726e-05, "loss": 1.6501, "step": 75319 }, { "epoch": 2.51, "grad_norm": 0.6842803955078125, "learning_rate": 3.9269727457178256e-05, "loss": 1.6868, "step": 75320 }, { "epoch": 2.51, "grad_norm": 0.6826415657997131, "learning_rate": 3.926455763460382e-05, "loss": 1.6685, "step": 75321 }, { "epoch": 2.51, "grad_norm": 0.6846699714660645, "learning_rate": 3.9259388128523886e-05, "loss": 1.7164, "step": 75322 }, { "epoch": 2.51, "grad_norm": 0.6891390085220337, "learning_rate": 3.925421893894454e-05, "loss": 1.6796, "step": 75323 }, { "epoch": 2.51, "grad_norm": 0.6758242249488831, "learning_rate": 3.924905006587225e-05, "loss": 1.6701, "step": 75324 }, { "epoch": 2.51, "grad_norm": 0.6895301342010498, "learning_rate": 3.9243881509313105e-05, "loss": 1.6751, "step": 75325 }, { "epoch": 2.51, "grad_norm": 0.7012098431587219, "learning_rate": 3.923871326927354e-05, "loss": 1.6829, "step": 75326 }, { "epoch": 2.51, "grad_norm": 0.7001757621765137, "learning_rate": 3.923354534575974e-05, "loss": 1.7706, "step": 75327 }, { "epoch": 2.51, "grad_norm": 0.7142770886421204, "learning_rate": 3.922837773877795e-05, "loss": 1.721, "step": 75328 }, { "epoch": 2.51, "grad_norm": 0.7011915445327759, "learning_rate": 3.9223210448334554e-05, "loss": 1.7027, "step": 75329 }, { "epoch": 2.51, "grad_norm": 0.719449520111084, "learning_rate": 3.921804347443568e-05, "loss": 1.7341, "step": 75330 }, { "epoch": 2.51, "grad_norm": 0.7058470249176025, "learning_rate": 3.921287681708772e-05, "loss": 1.6015, "step": 75331 }, { "epoch": 2.51, "grad_norm": 0.7152250409126282, "learning_rate": 3.920771047629695e-05, "loss": 1.6903, "step": 75332 }, { "epoch": 2.51, "grad_norm": 0.6849231719970703, "learning_rate": 3.920254445206948e-05, "loss": 1.6749, "step": 75333 }, { "epoch": 2.51, "grad_norm": 0.699438214302063, "learning_rate": 3.919737874441179e-05, "loss": 1.7382, "step": 75334 }, { "epoch": 2.51, "grad_norm": 0.7144652605056763, "learning_rate": 3.919221335332999e-05, "loss": 1.6683, "step": 75335 }, { "epoch": 2.51, "grad_norm": 0.7102041840553284, "learning_rate": 3.9187048278830494e-05, "loss": 1.7024, "step": 75336 }, { "epoch": 2.51, "grad_norm": 0.731786847114563, "learning_rate": 3.918188352091941e-05, "loss": 1.652, "step": 75337 }, { "epoch": 2.51, "grad_norm": 0.7203972935676575, "learning_rate": 3.9176719079603146e-05, "loss": 1.6625, "step": 75338 }, { "epoch": 2.51, "grad_norm": 0.6976239681243896, "learning_rate": 3.91715549548879e-05, "loss": 1.7349, "step": 75339 }, { "epoch": 2.51, "grad_norm": 0.6728818416595459, "learning_rate": 3.916639114677992e-05, "loss": 1.6324, "step": 75340 }, { "epoch": 2.51, "grad_norm": 0.6894535422325134, "learning_rate": 3.916122765528557e-05, "loss": 1.6879, "step": 75341 }, { "epoch": 2.51, "grad_norm": 0.6854092478752136, "learning_rate": 3.9156064480411064e-05, "loss": 1.6381, "step": 75342 }, { "epoch": 2.51, "grad_norm": 0.7031137943267822, "learning_rate": 3.915090162216258e-05, "loss": 1.6552, "step": 75343 }, { "epoch": 2.51, "grad_norm": 0.7080852389335632, "learning_rate": 3.914573908054648e-05, "loss": 1.6496, "step": 75344 }, { "epoch": 2.51, "grad_norm": 0.7042160630226135, "learning_rate": 3.9140576855569064e-05, "loss": 1.7008, "step": 75345 }, { "epoch": 2.51, "grad_norm": 0.694307267665863, "learning_rate": 3.9135414947236555e-05, "loss": 1.6671, "step": 75346 }, { "epoch": 2.51, "grad_norm": 0.705964982509613, "learning_rate": 3.913025335555515e-05, "loss": 1.6746, "step": 75347 }, { "epoch": 2.51, "grad_norm": 0.7165175676345825, "learning_rate": 3.912509208053125e-05, "loss": 1.6771, "step": 75348 }, { "epoch": 2.51, "grad_norm": 0.697712242603302, "learning_rate": 3.9119931122171075e-05, "loss": 1.7237, "step": 75349 }, { "epoch": 2.51, "grad_norm": 0.6877427697181702, "learning_rate": 3.911477048048076e-05, "loss": 1.6102, "step": 75350 }, { "epoch": 2.51, "grad_norm": 0.6830339431762695, "learning_rate": 3.91096101554667e-05, "loss": 1.7247, "step": 75351 }, { "epoch": 2.51, "grad_norm": 0.7281627655029297, "learning_rate": 3.910445014713518e-05, "loss": 1.759, "step": 75352 }, { "epoch": 2.51, "grad_norm": 0.7011648416519165, "learning_rate": 3.909929045549244e-05, "loss": 1.7048, "step": 75353 }, { "epoch": 2.51, "grad_norm": 0.6855494379997253, "learning_rate": 3.9094131080544634e-05, "loss": 1.6628, "step": 75354 }, { "epoch": 2.51, "grad_norm": 0.7073068022727966, "learning_rate": 3.908897202229819e-05, "loss": 1.7261, "step": 75355 }, { "epoch": 2.51, "grad_norm": 0.6959103941917419, "learning_rate": 3.908381328075928e-05, "loss": 1.6851, "step": 75356 }, { "epoch": 2.51, "grad_norm": 0.7096847295761108, "learning_rate": 3.907865485593412e-05, "loss": 1.6989, "step": 75357 }, { "epoch": 2.51, "grad_norm": 0.6927411556243896, "learning_rate": 3.907349674782901e-05, "loss": 1.6796, "step": 75358 }, { "epoch": 2.51, "grad_norm": 0.689983069896698, "learning_rate": 3.9068338956450316e-05, "loss": 1.6533, "step": 75359 }, { "epoch": 2.51, "grad_norm": 0.729575514793396, "learning_rate": 3.9063181481804196e-05, "loss": 1.7221, "step": 75360 }, { "epoch": 2.51, "grad_norm": 0.7004924416542053, "learning_rate": 3.905802432389688e-05, "loss": 1.7285, "step": 75361 }, { "epoch": 2.51, "grad_norm": 0.6638079881668091, "learning_rate": 3.905286748273472e-05, "loss": 1.6829, "step": 75362 }, { "epoch": 2.51, "grad_norm": 0.6959487199783325, "learning_rate": 3.904771095832395e-05, "loss": 1.6423, "step": 75363 }, { "epoch": 2.51, "grad_norm": 0.6910009384155273, "learning_rate": 3.904255475067071e-05, "loss": 1.6741, "step": 75364 }, { "epoch": 2.51, "grad_norm": 0.6984364986419678, "learning_rate": 3.903739885978138e-05, "loss": 1.7202, "step": 75365 }, { "epoch": 2.51, "grad_norm": 0.6932411193847656, "learning_rate": 3.9032243285662254e-05, "loss": 1.6743, "step": 75366 }, { "epoch": 2.51, "grad_norm": 0.696521520614624, "learning_rate": 3.9027088028319576e-05, "loss": 1.6717, "step": 75367 }, { "epoch": 2.51, "grad_norm": 0.6885372996330261, "learning_rate": 3.902193308775946e-05, "loss": 1.5888, "step": 75368 }, { "epoch": 2.51, "grad_norm": 0.6952781081199646, "learning_rate": 3.9016778463988306e-05, "loss": 1.7233, "step": 75369 }, { "epoch": 2.51, "grad_norm": 0.6877257823944092, "learning_rate": 3.9011624157012346e-05, "loss": 1.6999, "step": 75370 }, { "epoch": 2.51, "grad_norm": 0.6947073340415955, "learning_rate": 3.900647016683777e-05, "loss": 1.5674, "step": 75371 }, { "epoch": 2.51, "grad_norm": 0.6948663592338562, "learning_rate": 3.900131649347087e-05, "loss": 1.6669, "step": 75372 }, { "epoch": 2.51, "grad_norm": 0.7012237906455994, "learning_rate": 3.899616313691798e-05, "loss": 1.7224, "step": 75373 }, { "epoch": 2.51, "grad_norm": 0.7324452996253967, "learning_rate": 3.8991010097185295e-05, "loss": 1.702, "step": 75374 }, { "epoch": 2.51, "grad_norm": 0.6846283674240112, "learning_rate": 3.898585737427901e-05, "loss": 1.6283, "step": 75375 }, { "epoch": 2.51, "grad_norm": 0.6637976169586182, "learning_rate": 3.898070496820551e-05, "loss": 1.6554, "step": 75376 }, { "epoch": 2.51, "grad_norm": 0.6845682859420776, "learning_rate": 3.8975552878970936e-05, "loss": 1.6244, "step": 75377 }, { "epoch": 2.51, "grad_norm": 0.7239975929260254, "learning_rate": 3.897040110658155e-05, "loss": 1.7569, "step": 75378 }, { "epoch": 2.51, "grad_norm": 0.697829008102417, "learning_rate": 3.896524965104367e-05, "loss": 1.7367, "step": 75379 }, { "epoch": 2.51, "grad_norm": 0.7038885354995728, "learning_rate": 3.896009851236347e-05, "loss": 1.6791, "step": 75380 }, { "epoch": 2.51, "grad_norm": 0.6704130172729492, "learning_rate": 3.8954947690547334e-05, "loss": 1.6631, "step": 75381 }, { "epoch": 2.51, "grad_norm": 0.7050160765647888, "learning_rate": 3.894979718560133e-05, "loss": 1.6622, "step": 75382 }, { "epoch": 2.51, "grad_norm": 0.7223051190376282, "learning_rate": 3.894464699753188e-05, "loss": 1.6429, "step": 75383 }, { "epoch": 2.51, "grad_norm": 0.6994825601577759, "learning_rate": 3.8939497126345156e-05, "loss": 1.6735, "step": 75384 }, { "epoch": 2.51, "grad_norm": 0.7090487480163574, "learning_rate": 3.893434757204737e-05, "loss": 1.6649, "step": 75385 }, { "epoch": 2.51, "grad_norm": 0.698613166809082, "learning_rate": 3.892919833464487e-05, "loss": 1.6607, "step": 75386 }, { "epoch": 2.51, "grad_norm": 0.7011561393737793, "learning_rate": 3.892404941414381e-05, "loss": 1.6815, "step": 75387 }, { "epoch": 2.51, "grad_norm": 0.6894673109054565, "learning_rate": 3.8918900810550545e-05, "loss": 1.6462, "step": 75388 }, { "epoch": 2.51, "grad_norm": 0.7037931680679321, "learning_rate": 3.8913752523871245e-05, "loss": 1.704, "step": 75389 }, { "epoch": 2.51, "grad_norm": 0.707252562046051, "learning_rate": 3.8908604554112135e-05, "loss": 1.7196, "step": 75390 }, { "epoch": 2.51, "grad_norm": 0.6657787561416626, "learning_rate": 3.890345690127957e-05, "loss": 1.6467, "step": 75391 }, { "epoch": 2.51, "grad_norm": 0.6955298781394958, "learning_rate": 3.889830956537969e-05, "loss": 1.6088, "step": 75392 }, { "epoch": 2.51, "grad_norm": 0.6843087673187256, "learning_rate": 3.889316254641882e-05, "loss": 1.6661, "step": 75393 }, { "epoch": 2.51, "grad_norm": 0.6875550746917725, "learning_rate": 3.888801584440315e-05, "loss": 1.6387, "step": 75394 }, { "epoch": 2.51, "grad_norm": 0.7111930847167969, "learning_rate": 3.888286945933898e-05, "loss": 1.7055, "step": 75395 }, { "epoch": 2.51, "grad_norm": 0.6993501782417297, "learning_rate": 3.887772339123258e-05, "loss": 1.7028, "step": 75396 }, { "epoch": 2.51, "grad_norm": 0.7288413643836975, "learning_rate": 3.8872577640090054e-05, "loss": 1.7262, "step": 75397 }, { "epoch": 2.51, "grad_norm": 0.708262026309967, "learning_rate": 3.886743220591782e-05, "loss": 1.6352, "step": 75398 }, { "epoch": 2.51, "grad_norm": 0.6973592638969421, "learning_rate": 3.886228708872202e-05, "loss": 1.7366, "step": 75399 }, { "epoch": 2.51, "grad_norm": 0.6993488669395447, "learning_rate": 3.88571422885089e-05, "loss": 1.6732, "step": 75400 }, { "epoch": 2.51, "grad_norm": 0.7173522710800171, "learning_rate": 3.885199780528472e-05, "loss": 1.666, "step": 75401 }, { "epoch": 2.51, "grad_norm": 0.710791826248169, "learning_rate": 3.88468536390558e-05, "loss": 1.6544, "step": 75402 }, { "epoch": 2.51, "grad_norm": 0.6778044700622559, "learning_rate": 3.88417097898283e-05, "loss": 1.6265, "step": 75403 }, { "epoch": 2.51, "grad_norm": 0.7068702578544617, "learning_rate": 3.8836566257608456e-05, "loss": 1.7183, "step": 75404 }, { "epoch": 2.51, "grad_norm": 0.7040059566497803, "learning_rate": 3.883142304240256e-05, "loss": 1.6576, "step": 75405 }, { "epoch": 2.51, "grad_norm": 0.6755346655845642, "learning_rate": 3.882628014421688e-05, "loss": 1.6955, "step": 75406 }, { "epoch": 2.51, "grad_norm": 0.6981601715087891, "learning_rate": 3.8821137563057536e-05, "loss": 1.6707, "step": 75407 }, { "epoch": 2.51, "grad_norm": 0.6997838616371155, "learning_rate": 3.8815995298930824e-05, "loss": 1.7632, "step": 75408 }, { "epoch": 2.51, "grad_norm": 0.6892290115356445, "learning_rate": 3.881085335184317e-05, "loss": 1.6703, "step": 75409 }, { "epoch": 2.51, "grad_norm": 0.7061508297920227, "learning_rate": 3.880571172180051e-05, "loss": 1.7309, "step": 75410 }, { "epoch": 2.51, "grad_norm": 0.7115628123283386, "learning_rate": 3.880057040880924e-05, "loss": 1.7102, "step": 75411 }, { "epoch": 2.51, "grad_norm": 0.7084859013557434, "learning_rate": 3.879542941287564e-05, "loss": 1.696, "step": 75412 }, { "epoch": 2.51, "grad_norm": 0.7102329134941101, "learning_rate": 3.879028873400593e-05, "loss": 1.672, "step": 75413 }, { "epoch": 2.51, "grad_norm": 0.6736522912979126, "learning_rate": 3.8785148372206206e-05, "loss": 1.6354, "step": 75414 }, { "epoch": 2.51, "grad_norm": 0.6948383450508118, "learning_rate": 3.878000832748286e-05, "loss": 1.696, "step": 75415 }, { "epoch": 2.51, "grad_norm": 0.6986812353134155, "learning_rate": 3.877486859984224e-05, "loss": 1.6195, "step": 75416 }, { "epoch": 2.51, "grad_norm": 0.7099392414093018, "learning_rate": 3.87697291892903e-05, "loss": 1.6226, "step": 75417 }, { "epoch": 2.51, "grad_norm": 0.7036960124969482, "learning_rate": 3.876459009583338e-05, "loss": 1.6397, "step": 75418 }, { "epoch": 2.51, "grad_norm": 0.674879789352417, "learning_rate": 3.8759451319477876e-05, "loss": 1.6466, "step": 75419 }, { "epoch": 2.51, "grad_norm": 0.7181090712547302, "learning_rate": 3.875431286022987e-05, "loss": 1.6701, "step": 75420 }, { "epoch": 2.51, "grad_norm": 0.6896077394485474, "learning_rate": 3.874917471809558e-05, "loss": 1.6935, "step": 75421 }, { "epoch": 2.51, "grad_norm": 0.6826216578483582, "learning_rate": 3.874403689308131e-05, "loss": 1.6798, "step": 75422 }, { "epoch": 2.51, "grad_norm": 0.6829212307929993, "learning_rate": 3.87388993851934e-05, "loss": 1.6746, "step": 75423 }, { "epoch": 2.51, "grad_norm": 0.7121659517288208, "learning_rate": 3.8733762194437844e-05, "loss": 1.6589, "step": 75424 }, { "epoch": 2.51, "grad_norm": 0.7084375619888306, "learning_rate": 3.8728625320821e-05, "loss": 1.6746, "step": 75425 }, { "epoch": 2.51, "grad_norm": 0.686803936958313, "learning_rate": 3.872348876434917e-05, "loss": 1.6994, "step": 75426 }, { "epoch": 2.51, "grad_norm": 0.6911959052085876, "learning_rate": 3.871835252502855e-05, "loss": 1.7379, "step": 75427 }, { "epoch": 2.51, "grad_norm": 0.7056716680526733, "learning_rate": 3.8713216602865296e-05, "loss": 1.734, "step": 75428 }, { "epoch": 2.51, "grad_norm": 0.6982677578926086, "learning_rate": 3.870808099786573e-05, "loss": 1.7312, "step": 75429 }, { "epoch": 2.51, "grad_norm": 0.7050164341926575, "learning_rate": 3.870294571003606e-05, "loss": 1.6315, "step": 75430 }, { "epoch": 2.51, "grad_norm": 0.6841878294944763, "learning_rate": 3.8697810739382474e-05, "loss": 1.5948, "step": 75431 }, { "epoch": 2.51, "grad_norm": 0.6969438195228577, "learning_rate": 3.86926760859112e-05, "loss": 1.659, "step": 75432 }, { "epoch": 2.51, "grad_norm": 0.6795842051506042, "learning_rate": 3.86875417496286e-05, "loss": 1.741, "step": 75433 }, { "epoch": 2.51, "grad_norm": 0.6989299058914185, "learning_rate": 3.8682407730540845e-05, "loss": 1.5671, "step": 75434 }, { "epoch": 2.51, "grad_norm": 0.6823109984397888, "learning_rate": 3.8677274028654046e-05, "loss": 1.683, "step": 75435 }, { "epoch": 2.51, "grad_norm": 0.6998672485351562, "learning_rate": 3.8672140643974614e-05, "loss": 1.6824, "step": 75436 }, { "epoch": 2.51, "grad_norm": 0.6745685338973999, "learning_rate": 3.86670075765087e-05, "loss": 1.6701, "step": 75437 }, { "epoch": 2.51, "grad_norm": 0.6920762658119202, "learning_rate": 3.866187482626247e-05, "loss": 1.7206, "step": 75438 }, { "epoch": 2.51, "grad_norm": 0.6853810548782349, "learning_rate": 3.865674239324229e-05, "loss": 1.6666, "step": 75439 }, { "epoch": 2.51, "grad_norm": 0.6900168061256409, "learning_rate": 3.865161027745425e-05, "loss": 1.6983, "step": 75440 }, { "epoch": 2.51, "grad_norm": 0.701421320438385, "learning_rate": 3.864647847890467e-05, "loss": 1.7586, "step": 75441 }, { "epoch": 2.51, "grad_norm": 0.7036278247833252, "learning_rate": 3.864134699759973e-05, "loss": 1.6737, "step": 75442 }, { "epoch": 2.51, "grad_norm": 0.7060310244560242, "learning_rate": 3.8636215833545746e-05, "loss": 1.6749, "step": 75443 }, { "epoch": 2.51, "grad_norm": 0.6845564842224121, "learning_rate": 3.8631084986748884e-05, "loss": 1.6675, "step": 75444 }, { "epoch": 2.51, "grad_norm": 0.7061535716056824, "learning_rate": 3.8625954457215334e-05, "loss": 1.7338, "step": 75445 }, { "epoch": 2.51, "grad_norm": 0.7007234692573547, "learning_rate": 3.86208242449514e-05, "loss": 1.6838, "step": 75446 }, { "epoch": 2.51, "grad_norm": 0.6813802123069763, "learning_rate": 3.86156943499632e-05, "loss": 1.7611, "step": 75447 }, { "epoch": 2.51, "grad_norm": 0.7016276717185974, "learning_rate": 3.86105647722571e-05, "loss": 1.7067, "step": 75448 }, { "epoch": 2.51, "grad_norm": 0.6930565237998962, "learning_rate": 3.86054355118393e-05, "loss": 1.6307, "step": 75449 }, { "epoch": 2.51, "grad_norm": 0.7100375890731812, "learning_rate": 3.860030656871589e-05, "loss": 1.6779, "step": 75450 }, { "epoch": 2.51, "grad_norm": 0.7085243463516235, "learning_rate": 3.859517794289326e-05, "loss": 1.6785, "step": 75451 }, { "epoch": 2.51, "grad_norm": 0.6775683760643005, "learning_rate": 3.859004963437748e-05, "loss": 1.6386, "step": 75452 }, { "epoch": 2.51, "grad_norm": 0.70176100730896, "learning_rate": 3.858492164317495e-05, "loss": 1.6059, "step": 75453 }, { "epoch": 2.51, "grad_norm": 0.731001079082489, "learning_rate": 3.857979396929175e-05, "loss": 1.7351, "step": 75454 }, { "epoch": 2.51, "grad_norm": 0.6855787634849548, "learning_rate": 3.857466661273422e-05, "loss": 1.6229, "step": 75455 }, { "epoch": 2.51, "grad_norm": 0.6978732347488403, "learning_rate": 3.8569539573508543e-05, "loss": 1.7506, "step": 75456 }, { "epoch": 2.51, "grad_norm": 0.7228979468345642, "learning_rate": 3.856441285162083e-05, "loss": 1.6278, "step": 75457 }, { "epoch": 2.51, "grad_norm": 0.6996526122093201, "learning_rate": 3.8559286447077464e-05, "loss": 1.6968, "step": 75458 }, { "epoch": 2.51, "grad_norm": 0.727806568145752, "learning_rate": 3.855416035988461e-05, "loss": 1.6448, "step": 75459 }, { "epoch": 2.51, "grad_norm": 0.7239653468132019, "learning_rate": 3.8549034590048436e-05, "loss": 1.7427, "step": 75460 }, { "epoch": 2.51, "grad_norm": 0.7088741660118103, "learning_rate": 3.854390913757519e-05, "loss": 1.7163, "step": 75461 }, { "epoch": 2.51, "grad_norm": 0.6836332678794861, "learning_rate": 3.853878400247121e-05, "loss": 1.6544, "step": 75462 }, { "epoch": 2.51, "grad_norm": 0.6924242377281189, "learning_rate": 3.8533659184742597e-05, "loss": 1.6927, "step": 75463 }, { "epoch": 2.51, "grad_norm": 0.6860045790672302, "learning_rate": 3.852853468439553e-05, "loss": 1.6748, "step": 75464 }, { "epoch": 2.51, "grad_norm": 0.7117984294891357, "learning_rate": 3.852341050143635e-05, "loss": 1.6999, "step": 75465 }, { "epoch": 2.51, "grad_norm": 0.689063549041748, "learning_rate": 3.851828663587124e-05, "loss": 1.6555, "step": 75466 }, { "epoch": 2.51, "grad_norm": 0.6770946383476257, "learning_rate": 3.8513163087706346e-05, "loss": 1.6818, "step": 75467 }, { "epoch": 2.51, "grad_norm": 0.689395546913147, "learning_rate": 3.850803985694795e-05, "loss": 1.7022, "step": 75468 }, { "epoch": 2.51, "grad_norm": 0.7033783793449402, "learning_rate": 3.850291694360231e-05, "loss": 1.6861, "step": 75469 }, { "epoch": 2.51, "grad_norm": 0.6891142129898071, "learning_rate": 3.84977943476756e-05, "loss": 1.7054, "step": 75470 }, { "epoch": 2.51, "grad_norm": 0.7014074921607971, "learning_rate": 3.8492672069173943e-05, "loss": 1.5996, "step": 75471 }, { "epoch": 2.51, "grad_norm": 0.6854208111763, "learning_rate": 3.848755010810377e-05, "loss": 1.6543, "step": 75472 }, { "epoch": 2.51, "grad_norm": 0.6812396049499512, "learning_rate": 3.848242846447114e-05, "loss": 1.6872, "step": 75473 }, { "epoch": 2.51, "grad_norm": 0.7095918655395508, "learning_rate": 3.847730713828227e-05, "loss": 1.7269, "step": 75474 }, { "epoch": 2.51, "grad_norm": 0.7075089812278748, "learning_rate": 3.847218612954341e-05, "loss": 1.7, "step": 75475 }, { "epoch": 2.51, "grad_norm": 0.6839314103126526, "learning_rate": 3.8467065438260845e-05, "loss": 1.6993, "step": 75476 }, { "epoch": 2.51, "grad_norm": 0.7112624645233154, "learning_rate": 3.846194506444074e-05, "loss": 1.7183, "step": 75477 }, { "epoch": 2.51, "grad_norm": 0.6849914789199829, "learning_rate": 3.845682500808921e-05, "loss": 1.7557, "step": 75478 }, { "epoch": 2.51, "grad_norm": 0.692165732383728, "learning_rate": 3.845170526921263e-05, "loss": 1.6699, "step": 75479 }, { "epoch": 2.51, "grad_norm": 0.6949305534362793, "learning_rate": 3.844658584781712e-05, "loss": 1.6715, "step": 75480 }, { "epoch": 2.51, "grad_norm": 0.7012898325920105, "learning_rate": 3.844146674390889e-05, "loss": 1.6603, "step": 75481 }, { "epoch": 2.51, "grad_norm": 0.6586029529571533, "learning_rate": 3.843634795749415e-05, "loss": 1.6345, "step": 75482 }, { "epoch": 2.51, "grad_norm": 0.6785376667976379, "learning_rate": 3.843122948857924e-05, "loss": 1.727, "step": 75483 }, { "epoch": 2.51, "grad_norm": 0.6916585564613342, "learning_rate": 3.842611133717025e-05, "loss": 1.7396, "step": 75484 }, { "epoch": 2.51, "grad_norm": 0.684515655040741, "learning_rate": 3.842099350327338e-05, "loss": 1.6579, "step": 75485 }, { "epoch": 2.51, "grad_norm": 0.6968711614608765, "learning_rate": 3.8415875986894917e-05, "loss": 1.6871, "step": 75486 }, { "epoch": 2.51, "grad_norm": 0.7030704617500305, "learning_rate": 3.841075878804106e-05, "loss": 1.7498, "step": 75487 }, { "epoch": 2.51, "grad_norm": 0.6936612725257874, "learning_rate": 3.840564190671794e-05, "loss": 1.6931, "step": 75488 }, { "epoch": 2.51, "grad_norm": 0.6897724866867065, "learning_rate": 3.8400525342931786e-05, "loss": 1.6797, "step": 75489 }, { "epoch": 2.51, "grad_norm": 0.6976286172866821, "learning_rate": 3.839540909668895e-05, "loss": 1.6736, "step": 75490 }, { "epoch": 2.51, "grad_norm": 0.6917600035667419, "learning_rate": 3.8390293167995546e-05, "loss": 1.7169, "step": 75491 }, { "epoch": 2.51, "grad_norm": 0.6900746822357178, "learning_rate": 3.8385177556857684e-05, "loss": 1.6973, "step": 75492 }, { "epoch": 2.51, "grad_norm": 0.6936254501342773, "learning_rate": 3.8380062263281765e-05, "loss": 1.6289, "step": 75493 }, { "epoch": 2.51, "grad_norm": 0.6876800656318665, "learning_rate": 3.837494728727388e-05, "loss": 1.6844, "step": 75494 }, { "epoch": 2.51, "grad_norm": 0.7013075351715088, "learning_rate": 3.8369832628840236e-05, "loss": 1.6768, "step": 75495 }, { "epoch": 2.51, "grad_norm": 0.7002381086349487, "learning_rate": 3.836471828798709e-05, "loss": 1.7492, "step": 75496 }, { "epoch": 2.51, "grad_norm": 0.6787582039833069, "learning_rate": 3.835960426472055e-05, "loss": 1.7115, "step": 75497 }, { "epoch": 2.51, "grad_norm": 0.6928258538246155, "learning_rate": 3.835449055904701e-05, "loss": 1.7565, "step": 75498 }, { "epoch": 2.51, "grad_norm": 0.6994420289993286, "learning_rate": 3.834937717097248e-05, "loss": 1.7828, "step": 75499 }, { "epoch": 2.51, "grad_norm": 0.7068465948104858, "learning_rate": 3.83442641005033e-05, "loss": 1.6556, "step": 75500 }, { "epoch": 2.51, "grad_norm": 0.7136925458908081, "learning_rate": 3.8339151347645645e-05, "loss": 1.7435, "step": 75501 }, { "epoch": 2.51, "grad_norm": 0.7128852009773254, "learning_rate": 3.833403891240566e-05, "loss": 1.7285, "step": 75502 }, { "epoch": 2.51, "grad_norm": 0.7104007005691528, "learning_rate": 3.832892679478964e-05, "loss": 1.6503, "step": 75503 }, { "epoch": 2.51, "grad_norm": 0.6848993301391602, "learning_rate": 3.832381499480367e-05, "loss": 1.7264, "step": 75504 }, { "epoch": 2.51, "grad_norm": 0.6756442189216614, "learning_rate": 3.831870351245412e-05, "loss": 1.6619, "step": 75505 }, { "epoch": 2.51, "grad_norm": 0.6973110437393188, "learning_rate": 3.831359234774711e-05, "loss": 1.6102, "step": 75506 }, { "epoch": 2.51, "grad_norm": 0.7076566815376282, "learning_rate": 3.830848150068878e-05, "loss": 1.7557, "step": 75507 }, { "epoch": 2.51, "grad_norm": 0.6888037919998169, "learning_rate": 3.830337097128545e-05, "loss": 1.7116, "step": 75508 }, { "epoch": 2.51, "grad_norm": 0.7189244031906128, "learning_rate": 3.829826075954319e-05, "loss": 1.6682, "step": 75509 }, { "epoch": 2.51, "grad_norm": 0.7007573843002319, "learning_rate": 3.8293150865468346e-05, "loss": 1.6354, "step": 75510 }, { "epoch": 2.51, "grad_norm": 0.6695889234542847, "learning_rate": 3.828804128906703e-05, "loss": 1.6895, "step": 75511 }, { "epoch": 2.51, "grad_norm": 0.7168329358100891, "learning_rate": 3.828293203034549e-05, "loss": 1.6405, "step": 75512 }, { "epoch": 2.51, "grad_norm": 0.701423704624176, "learning_rate": 3.827782308930993e-05, "loss": 1.6813, "step": 75513 }, { "epoch": 2.51, "grad_norm": 0.6962840557098389, "learning_rate": 3.8272714465966447e-05, "loss": 1.6646, "step": 75514 }, { "epoch": 2.51, "grad_norm": 0.694108247756958, "learning_rate": 3.826760616032143e-05, "loss": 1.7048, "step": 75515 }, { "epoch": 2.51, "grad_norm": 0.712943434715271, "learning_rate": 3.826249817238094e-05, "loss": 1.6588, "step": 75516 }, { "epoch": 2.51, "grad_norm": 0.6839011907577515, "learning_rate": 3.825739050215114e-05, "loss": 1.7531, "step": 75517 }, { "epoch": 2.51, "grad_norm": 0.7005566358566284, "learning_rate": 3.825228314963833e-05, "loss": 1.7019, "step": 75518 }, { "epoch": 2.51, "grad_norm": 0.6872648000717163, "learning_rate": 3.824717611484873e-05, "loss": 1.6865, "step": 75519 }, { "epoch": 2.51, "grad_norm": 0.7049088478088379, "learning_rate": 3.824206939778851e-05, "loss": 1.7098, "step": 75520 }, { "epoch": 2.51, "grad_norm": 0.6899086833000183, "learning_rate": 3.823696299846376e-05, "loss": 1.6489, "step": 75521 }, { "epoch": 2.51, "grad_norm": 0.6940872073173523, "learning_rate": 3.8231856916880845e-05, "loss": 1.6695, "step": 75522 }, { "epoch": 2.51, "grad_norm": 0.6903669834136963, "learning_rate": 3.822675115304589e-05, "loss": 1.6774, "step": 75523 }, { "epoch": 2.51, "grad_norm": 0.7251310348510742, "learning_rate": 3.822164570696502e-05, "loss": 1.6986, "step": 75524 }, { "epoch": 2.51, "grad_norm": 0.7111701965332031, "learning_rate": 3.8216540578644505e-05, "loss": 1.7209, "step": 75525 }, { "epoch": 2.51, "grad_norm": 0.6987084746360779, "learning_rate": 3.821143576809067e-05, "loss": 1.6532, "step": 75526 }, { "epoch": 2.51, "grad_norm": 0.7052684426307678, "learning_rate": 3.820633127530941e-05, "loss": 1.7025, "step": 75527 }, { "epoch": 2.51, "grad_norm": 0.7297914028167725, "learning_rate": 3.820122710030715e-05, "loss": 1.6809, "step": 75528 }, { "epoch": 2.51, "grad_norm": 0.6986759901046753, "learning_rate": 3.819612324309006e-05, "loss": 1.6232, "step": 75529 }, { "epoch": 2.51, "grad_norm": 0.6845232248306274, "learning_rate": 3.819101970366429e-05, "loss": 1.6271, "step": 75530 }, { "epoch": 2.51, "grad_norm": 0.7064852118492126, "learning_rate": 3.818591648203598e-05, "loss": 1.6845, "step": 75531 }, { "epoch": 2.51, "grad_norm": 0.6969848871231079, "learning_rate": 3.818081357821142e-05, "loss": 1.6484, "step": 75532 }, { "epoch": 2.51, "grad_norm": 0.719537079334259, "learning_rate": 3.817571099219691e-05, "loss": 1.7152, "step": 75533 }, { "epoch": 2.51, "grad_norm": 0.6889874935150146, "learning_rate": 3.8170608723998334e-05, "loss": 1.7001, "step": 75534 }, { "epoch": 2.51, "grad_norm": 0.6804971098899841, "learning_rate": 3.816550677362209e-05, "loss": 1.546, "step": 75535 }, { "epoch": 2.51, "grad_norm": 0.6949324011802673, "learning_rate": 3.816040514107441e-05, "loss": 1.7037, "step": 75536 }, { "epoch": 2.51, "grad_norm": 0.7030417919158936, "learning_rate": 3.815530382636139e-05, "loss": 1.7428, "step": 75537 }, { "epoch": 2.51, "grad_norm": 0.6973772048950195, "learning_rate": 3.8150202829489226e-05, "loss": 1.6682, "step": 75538 }, { "epoch": 2.51, "grad_norm": 0.6877998113632202, "learning_rate": 3.8145102150464114e-05, "loss": 1.6824, "step": 75539 }, { "epoch": 2.51, "grad_norm": 0.6929769515991211, "learning_rate": 3.8140001789292405e-05, "loss": 1.6286, "step": 75540 }, { "epoch": 2.51, "grad_norm": 0.6682702302932739, "learning_rate": 3.813490174598001e-05, "loss": 1.6585, "step": 75541 }, { "epoch": 2.51, "grad_norm": 0.7110849618911743, "learning_rate": 3.812980202053324e-05, "loss": 1.7406, "step": 75542 }, { "epoch": 2.51, "grad_norm": 0.6826019883155823, "learning_rate": 3.8124702612958404e-05, "loss": 1.7993, "step": 75543 }, { "epoch": 2.51, "grad_norm": 0.6938037276268005, "learning_rate": 3.811960352326159e-05, "loss": 1.7027, "step": 75544 }, { "epoch": 2.51, "grad_norm": 0.6760820150375366, "learning_rate": 3.811450475144889e-05, "loss": 1.6536, "step": 75545 }, { "epoch": 2.51, "grad_norm": 0.7135685682296753, "learning_rate": 3.8109406297526714e-05, "loss": 1.7433, "step": 75546 }, { "epoch": 2.51, "grad_norm": 0.6930710077285767, "learning_rate": 3.810430816150107e-05, "loss": 1.6656, "step": 75547 }, { "epoch": 2.51, "grad_norm": 0.7234529852867126, "learning_rate": 3.809921034337821e-05, "loss": 1.6297, "step": 75548 }, { "epoch": 2.51, "grad_norm": 0.6965611577033997, "learning_rate": 3.809411284316427e-05, "loss": 1.7513, "step": 75549 }, { "epoch": 2.51, "grad_norm": 0.6763597130775452, "learning_rate": 3.808901566086556e-05, "loss": 1.6757, "step": 75550 }, { "epoch": 2.51, "grad_norm": 0.6803227663040161, "learning_rate": 3.8083918796488213e-05, "loss": 1.7158, "step": 75551 }, { "epoch": 2.51, "grad_norm": 0.7094053626060486, "learning_rate": 3.807882225003831e-05, "loss": 1.6119, "step": 75552 }, { "epoch": 2.51, "grad_norm": 0.6944669485092163, "learning_rate": 3.807372602152219e-05, "loss": 1.718, "step": 75553 }, { "epoch": 2.51, "grad_norm": 0.6983227729797363, "learning_rate": 3.806863011094601e-05, "loss": 1.6374, "step": 75554 }, { "epoch": 2.51, "grad_norm": 0.7120370268821716, "learning_rate": 3.806353451831583e-05, "loss": 1.6058, "step": 75555 }, { "epoch": 2.51, "grad_norm": 0.7206734418869019, "learning_rate": 3.805843924363798e-05, "loss": 1.702, "step": 75556 }, { "epoch": 2.51, "grad_norm": 0.6856401562690735, "learning_rate": 3.805334428691853e-05, "loss": 1.6927, "step": 75557 }, { "epoch": 2.51, "grad_norm": 0.7063265442848206, "learning_rate": 3.80482496481638e-05, "loss": 1.7384, "step": 75558 }, { "epoch": 2.51, "grad_norm": 0.7005190253257751, "learning_rate": 3.804315532737981e-05, "loss": 1.6948, "step": 75559 }, { "epoch": 2.51, "grad_norm": 0.6764265894889832, "learning_rate": 3.803806132457291e-05, "loss": 1.6601, "step": 75560 }, { "epoch": 2.51, "grad_norm": 0.6740266680717468, "learning_rate": 3.803296763974921e-05, "loss": 1.6917, "step": 75561 }, { "epoch": 2.51, "grad_norm": 0.69379723072052, "learning_rate": 3.802787427291481e-05, "loss": 1.7188, "step": 75562 }, { "epoch": 2.51, "grad_norm": 0.6864155530929565, "learning_rate": 3.802278122407605e-05, "loss": 1.6885, "step": 75563 }, { "epoch": 2.51, "grad_norm": 0.7371591329574585, "learning_rate": 3.8017688493238975e-05, "loss": 1.7325, "step": 75564 }, { "epoch": 2.51, "grad_norm": 0.7014997601509094, "learning_rate": 3.801259608040985e-05, "loss": 1.7342, "step": 75565 }, { "epoch": 2.51, "grad_norm": 0.7223455905914307, "learning_rate": 3.800750398559488e-05, "loss": 1.7546, "step": 75566 }, { "epoch": 2.51, "grad_norm": 0.6954364776611328, "learning_rate": 3.800241220880011e-05, "loss": 1.6559, "step": 75567 }, { "epoch": 2.51, "grad_norm": 0.6887328028678894, "learning_rate": 3.7997320750031857e-05, "loss": 1.6998, "step": 75568 }, { "epoch": 2.51, "grad_norm": 0.7077409625053406, "learning_rate": 3.7992229609296235e-05, "loss": 1.6875, "step": 75569 }, { "epoch": 2.51, "grad_norm": 0.6977502107620239, "learning_rate": 3.798713878659947e-05, "loss": 1.7096, "step": 75570 }, { "epoch": 2.51, "grad_norm": 0.7013335227966309, "learning_rate": 3.798204828194763e-05, "loss": 1.7884, "step": 75571 }, { "epoch": 2.51, "grad_norm": 0.6864601373672485, "learning_rate": 3.7976958095347074e-05, "loss": 1.693, "step": 75572 }, { "epoch": 2.51, "grad_norm": 0.6828405261039734, "learning_rate": 3.797186822680387e-05, "loss": 1.7132, "step": 75573 }, { "epoch": 2.51, "grad_norm": 0.6898341178894043, "learning_rate": 3.7966778676324174e-05, "loss": 1.7012, "step": 75574 }, { "epoch": 2.51, "grad_norm": 0.680596113204956, "learning_rate": 3.796168944391421e-05, "loss": 1.7301, "step": 75575 }, { "epoch": 2.51, "grad_norm": 0.6968069672584534, "learning_rate": 3.7956600529580186e-05, "loss": 1.6726, "step": 75576 }, { "epoch": 2.51, "grad_norm": 0.6900983452796936, "learning_rate": 3.7951511933328184e-05, "loss": 1.6677, "step": 75577 }, { "epoch": 2.51, "grad_norm": 0.6810850501060486, "learning_rate": 3.79464236551644e-05, "loss": 1.6485, "step": 75578 }, { "epoch": 2.51, "grad_norm": 0.7181056141853333, "learning_rate": 3.794133569509514e-05, "loss": 1.7043, "step": 75579 }, { "epoch": 2.51, "grad_norm": 0.7221148610115051, "learning_rate": 3.7936248053126465e-05, "loss": 1.6176, "step": 75580 }, { "epoch": 2.51, "grad_norm": 0.6738437414169312, "learning_rate": 3.793116072926452e-05, "loss": 1.7189, "step": 75581 }, { "epoch": 2.51, "grad_norm": 0.6912522912025452, "learning_rate": 3.792607372351558e-05, "loss": 1.6594, "step": 75582 }, { "epoch": 2.51, "grad_norm": 0.7078427076339722, "learning_rate": 3.792098703588577e-05, "loss": 1.6793, "step": 75583 }, { "epoch": 2.51, "grad_norm": 0.6869453191757202, "learning_rate": 3.791590066638123e-05, "loss": 1.6576, "step": 75584 }, { "epoch": 2.51, "grad_norm": 0.7051357626914978, "learning_rate": 3.791081461500817e-05, "loss": 1.7304, "step": 75585 }, { "epoch": 2.51, "grad_norm": 0.69206303358078, "learning_rate": 3.790572888177279e-05, "loss": 1.6481, "step": 75586 }, { "epoch": 2.51, "grad_norm": 0.716891348361969, "learning_rate": 3.790064346668126e-05, "loss": 1.7019, "step": 75587 }, { "epoch": 2.51, "grad_norm": 0.684100329875946, "learning_rate": 3.789555836973966e-05, "loss": 1.6028, "step": 75588 }, { "epoch": 2.51, "grad_norm": 0.6971540451049805, "learning_rate": 3.7890473590954306e-05, "loss": 1.6639, "step": 75589 }, { "epoch": 2.51, "grad_norm": 0.7055285573005676, "learning_rate": 3.788538913033131e-05, "loss": 1.6686, "step": 75590 }, { "epoch": 2.51, "grad_norm": 0.6911541819572449, "learning_rate": 3.788030498787674e-05, "loss": 1.5801, "step": 75591 }, { "epoch": 2.51, "grad_norm": 0.6945292353630066, "learning_rate": 3.787522116359689e-05, "loss": 1.6891, "step": 75592 }, { "epoch": 2.51, "grad_norm": 0.6921592354774475, "learning_rate": 3.7870137657497924e-05, "loss": 1.7388, "step": 75593 }, { "epoch": 2.52, "grad_norm": 0.6860027313232422, "learning_rate": 3.7865054469586e-05, "loss": 1.648, "step": 75594 }, { "epoch": 2.52, "grad_norm": 0.6649113893508911, "learning_rate": 3.785997159986724e-05, "loss": 1.6266, "step": 75595 }, { "epoch": 2.52, "grad_norm": 0.6927716732025146, "learning_rate": 3.7854889048347894e-05, "loss": 1.6741, "step": 75596 }, { "epoch": 2.52, "grad_norm": 0.6834999918937683, "learning_rate": 3.784980681503407e-05, "loss": 1.6944, "step": 75597 }, { "epoch": 2.52, "grad_norm": 0.687635064125061, "learning_rate": 3.78447248999319e-05, "loss": 1.7303, "step": 75598 }, { "epoch": 2.52, "grad_norm": 0.7066050171852112, "learning_rate": 3.783964330304762e-05, "loss": 1.7729, "step": 75599 }, { "epoch": 2.52, "grad_norm": 0.6898536086082458, "learning_rate": 3.783456202438745e-05, "loss": 1.698, "step": 75600 }, { "epoch": 2.52, "grad_norm": 0.7043223977088928, "learning_rate": 3.782948106395749e-05, "loss": 1.7373, "step": 75601 }, { "epoch": 2.52, "grad_norm": 0.7052906155586243, "learning_rate": 3.782440042176384e-05, "loss": 1.6426, "step": 75602 }, { "epoch": 2.52, "grad_norm": 0.6921988129615784, "learning_rate": 3.781932009781281e-05, "loss": 1.654, "step": 75603 }, { "epoch": 2.52, "grad_norm": 0.6858178973197937, "learning_rate": 3.781424009211048e-05, "loss": 1.7255, "step": 75604 }, { "epoch": 2.52, "grad_norm": 0.7069413065910339, "learning_rate": 3.780916040466298e-05, "loss": 1.7371, "step": 75605 }, { "epoch": 2.52, "grad_norm": 0.7063527703285217, "learning_rate": 3.780408103547659e-05, "loss": 1.6983, "step": 75606 }, { "epoch": 2.52, "grad_norm": 0.6772304773330688, "learning_rate": 3.779900198455734e-05, "loss": 1.6728, "step": 75607 }, { "epoch": 2.52, "grad_norm": 0.7028265595436096, "learning_rate": 3.779392325191154e-05, "loss": 1.6872, "step": 75608 }, { "epoch": 2.52, "grad_norm": 0.7103765606880188, "learning_rate": 3.778884483754524e-05, "loss": 1.6852, "step": 75609 }, { "epoch": 2.52, "grad_norm": 0.6941684484481812, "learning_rate": 3.778376674146467e-05, "loss": 1.6868, "step": 75610 }, { "epoch": 2.52, "grad_norm": 0.7190626263618469, "learning_rate": 3.7778688963675996e-05, "loss": 1.6159, "step": 75611 }, { "epoch": 2.52, "grad_norm": 0.7010468244552612, "learning_rate": 3.777361150418531e-05, "loss": 1.8084, "step": 75612 }, { "epoch": 2.52, "grad_norm": 0.7352573871612549, "learning_rate": 3.776853436299887e-05, "loss": 1.6948, "step": 75613 }, { "epoch": 2.52, "grad_norm": 0.69475257396698, "learning_rate": 3.776345754012271e-05, "loss": 1.754, "step": 75614 }, { "epoch": 2.52, "grad_norm": 0.7150710821151733, "learning_rate": 3.7758381035563155e-05, "loss": 1.6982, "step": 75615 }, { "epoch": 2.52, "grad_norm": 0.6881381869316101, "learning_rate": 3.7753304849326205e-05, "loss": 1.675, "step": 75616 }, { "epoch": 2.52, "grad_norm": 0.7703060507774353, "learning_rate": 3.774822898141818e-05, "loss": 1.7, "step": 75617 }, { "epoch": 2.52, "grad_norm": 0.7016856074333191, "learning_rate": 3.7743153431845185e-05, "loss": 1.58, "step": 75618 }, { "epoch": 2.52, "grad_norm": 0.6940487027168274, "learning_rate": 3.773807820061327e-05, "loss": 1.6955, "step": 75619 }, { "epoch": 2.52, "grad_norm": 0.7091193199157715, "learning_rate": 3.7733003287728745e-05, "loss": 1.7512, "step": 75620 }, { "epoch": 2.52, "grad_norm": 0.7351874709129333, "learning_rate": 3.772792869319766e-05, "loss": 1.7208, "step": 75621 }, { "epoch": 2.52, "grad_norm": 0.6853229999542236, "learning_rate": 3.772285441702628e-05, "loss": 1.6667, "step": 75622 }, { "epoch": 2.52, "grad_norm": 0.7053453922271729, "learning_rate": 3.771778045922071e-05, "loss": 1.6393, "step": 75623 }, { "epoch": 2.52, "grad_norm": 0.6906407475471497, "learning_rate": 3.7712706819787054e-05, "loss": 1.6861, "step": 75624 }, { "epoch": 2.52, "grad_norm": 0.7035967111587524, "learning_rate": 3.770763349873159e-05, "loss": 1.7214, "step": 75625 }, { "epoch": 2.52, "grad_norm": 1.003989338874817, "learning_rate": 3.770256049606035e-05, "loss": 1.6646, "step": 75626 }, { "epoch": 2.52, "grad_norm": 0.6721696257591248, "learning_rate": 3.769748781177959e-05, "loss": 1.6935, "step": 75627 }, { "epoch": 2.52, "grad_norm": 0.6927931308746338, "learning_rate": 3.769241544589541e-05, "loss": 1.6688, "step": 75628 }, { "epoch": 2.52, "grad_norm": 0.6959342956542969, "learning_rate": 3.768734339841403e-05, "loss": 1.7181, "step": 75629 }, { "epoch": 2.52, "grad_norm": 0.710462212562561, "learning_rate": 3.7682271669341546e-05, "loss": 1.7774, "step": 75630 }, { "epoch": 2.52, "grad_norm": 0.7076455950737, "learning_rate": 3.767720025868409e-05, "loss": 1.6617, "step": 75631 }, { "epoch": 2.52, "grad_norm": 0.6871610879898071, "learning_rate": 3.767212916644793e-05, "loss": 1.6539, "step": 75632 }, { "epoch": 2.52, "grad_norm": 0.7253204584121704, "learning_rate": 3.766705839263915e-05, "loss": 1.673, "step": 75633 }, { "epoch": 2.52, "grad_norm": 0.6821706295013428, "learning_rate": 3.766198793726385e-05, "loss": 1.6388, "step": 75634 }, { "epoch": 2.52, "grad_norm": 0.723270833492279, "learning_rate": 3.765691780032822e-05, "loss": 1.6656, "step": 75635 }, { "epoch": 2.52, "grad_norm": 0.6932497024536133, "learning_rate": 3.765184798183853e-05, "loss": 1.6765, "step": 75636 }, { "epoch": 2.52, "grad_norm": 0.6935280561447144, "learning_rate": 3.7646778481800845e-05, "loss": 1.6477, "step": 75637 }, { "epoch": 2.52, "grad_norm": 0.7223660945892334, "learning_rate": 3.764170930022121e-05, "loss": 1.6648, "step": 75638 }, { "epoch": 2.52, "grad_norm": 0.71809983253479, "learning_rate": 3.7636640437105966e-05, "loss": 1.6408, "step": 75639 }, { "epoch": 2.52, "grad_norm": 0.7133569121360779, "learning_rate": 3.763157189246121e-05, "loss": 1.708, "step": 75640 }, { "epoch": 2.52, "grad_norm": 0.6870541572570801, "learning_rate": 3.7626503666292994e-05, "loss": 1.6899, "step": 75641 }, { "epoch": 2.52, "grad_norm": 0.690913200378418, "learning_rate": 3.762143575860752e-05, "loss": 1.6467, "step": 75642 }, { "epoch": 2.52, "grad_norm": 0.6992228627204895, "learning_rate": 3.7616368169411116e-05, "loss": 1.7247, "step": 75643 }, { "epoch": 2.52, "grad_norm": 0.7069253325462341, "learning_rate": 3.761130089870964e-05, "loss": 1.7495, "step": 75644 }, { "epoch": 2.52, "grad_norm": 0.693560779094696, "learning_rate": 3.760623394650935e-05, "loss": 1.6557, "step": 75645 }, { "epoch": 2.52, "grad_norm": 0.6925702691078186, "learning_rate": 3.7601167312816564e-05, "loss": 1.6922, "step": 75646 }, { "epoch": 2.52, "grad_norm": 0.6845434308052063, "learning_rate": 3.7596100997637246e-05, "loss": 1.6872, "step": 75647 }, { "epoch": 2.52, "grad_norm": 0.7182566523551941, "learning_rate": 3.7591035000977546e-05, "loss": 1.7101, "step": 75648 }, { "epoch": 2.52, "grad_norm": 0.7061867713928223, "learning_rate": 3.758596932284368e-05, "loss": 1.6927, "step": 75649 }, { "epoch": 2.52, "grad_norm": 0.6865558624267578, "learning_rate": 3.758090396324188e-05, "loss": 1.6179, "step": 75650 }, { "epoch": 2.52, "grad_norm": 0.684421718120575, "learning_rate": 3.7575838922178116e-05, "loss": 1.6111, "step": 75651 }, { "epoch": 2.52, "grad_norm": 0.7001004815101624, "learning_rate": 3.757077419965857e-05, "loss": 1.7255, "step": 75652 }, { "epoch": 2.52, "grad_norm": 0.6852038502693176, "learning_rate": 3.756570979568951e-05, "loss": 1.6561, "step": 75653 }, { "epoch": 2.52, "grad_norm": 0.732377290725708, "learning_rate": 3.7560645710277036e-05, "loss": 1.6556, "step": 75654 }, { "epoch": 2.52, "grad_norm": 0.7122461795806885, "learning_rate": 3.7555581943427206e-05, "loss": 1.7048, "step": 75655 }, { "epoch": 2.52, "grad_norm": 0.6849533915519714, "learning_rate": 3.755051849514621e-05, "loss": 1.76, "step": 75656 }, { "epoch": 2.52, "grad_norm": 0.7265161275863647, "learning_rate": 3.754545536544035e-05, "loss": 1.7289, "step": 75657 }, { "epoch": 2.52, "grad_norm": 0.6945703625679016, "learning_rate": 3.754039255431548e-05, "loss": 1.6644, "step": 75658 }, { "epoch": 2.52, "grad_norm": 0.6797255277633667, "learning_rate": 3.7535330061777936e-05, "loss": 1.7326, "step": 75659 }, { "epoch": 2.52, "grad_norm": 0.6824156045913696, "learning_rate": 3.753026788783388e-05, "loss": 1.6292, "step": 75660 }, { "epoch": 2.52, "grad_norm": 0.7099611163139343, "learning_rate": 3.752520603248943e-05, "loss": 1.7265, "step": 75661 }, { "epoch": 2.52, "grad_norm": 0.7316386103630066, "learning_rate": 3.752014449575059e-05, "loss": 1.7151, "step": 75662 }, { "epoch": 2.52, "grad_norm": 0.6928911805152893, "learning_rate": 3.7515083277623725e-05, "loss": 1.5816, "step": 75663 }, { "epoch": 2.52, "grad_norm": 0.6773368120193481, "learning_rate": 3.751002237811489e-05, "loss": 1.6871, "step": 75664 }, { "epoch": 2.52, "grad_norm": 0.720430314540863, "learning_rate": 3.7504961797230115e-05, "loss": 1.6337, "step": 75665 }, { "epoch": 2.52, "grad_norm": 0.6873148679733276, "learning_rate": 3.749990153497563e-05, "loss": 1.6469, "step": 75666 }, { "epoch": 2.52, "grad_norm": 0.6819877624511719, "learning_rate": 3.749484159135766e-05, "loss": 1.6776, "step": 75667 }, { "epoch": 2.52, "grad_norm": 0.6862212419509888, "learning_rate": 3.7489781966382305e-05, "loss": 1.6661, "step": 75668 }, { "epoch": 2.52, "grad_norm": 0.7024865746498108, "learning_rate": 3.748472266005559e-05, "loss": 1.763, "step": 75669 }, { "epoch": 2.52, "grad_norm": 0.7122712731361389, "learning_rate": 3.7479663672383805e-05, "loss": 1.5999, "step": 75670 }, { "epoch": 2.52, "grad_norm": 0.6985185742378235, "learning_rate": 3.747460500337302e-05, "loss": 1.622, "step": 75671 }, { "epoch": 2.52, "grad_norm": 0.7074121236801147, "learning_rate": 3.746954665302934e-05, "loss": 1.785, "step": 75672 }, { "epoch": 2.52, "grad_norm": 0.6754010915756226, "learning_rate": 3.746448862135897e-05, "loss": 1.6561, "step": 75673 }, { "epoch": 2.52, "grad_norm": 0.6894962191581726, "learning_rate": 3.745943090836802e-05, "loss": 1.6714, "step": 75674 }, { "epoch": 2.52, "grad_norm": 0.7004534006118774, "learning_rate": 3.7454373514062674e-05, "loss": 1.674, "step": 75675 }, { "epoch": 2.52, "grad_norm": 0.7001075744628906, "learning_rate": 3.744931643844894e-05, "loss": 1.6953, "step": 75676 }, { "epoch": 2.52, "grad_norm": 0.7209030985832214, "learning_rate": 3.744425968153317e-05, "loss": 1.7209, "step": 75677 }, { "epoch": 2.52, "grad_norm": 0.694053590297699, "learning_rate": 3.7439203243321334e-05, "loss": 1.6681, "step": 75678 }, { "epoch": 2.52, "grad_norm": 0.7059429883956909, "learning_rate": 3.743414712381959e-05, "loss": 1.6061, "step": 75679 }, { "epoch": 2.52, "grad_norm": 0.7033067345619202, "learning_rate": 3.742909132303417e-05, "loss": 1.6445, "step": 75680 }, { "epoch": 2.52, "grad_norm": 0.6907767057418823, "learning_rate": 3.742403584097105e-05, "loss": 1.6529, "step": 75681 }, { "epoch": 2.52, "grad_norm": 0.6887950301170349, "learning_rate": 3.741898067763656e-05, "loss": 1.7446, "step": 75682 }, { "epoch": 2.52, "grad_norm": 0.707731306552887, "learning_rate": 3.741392583303673e-05, "loss": 1.7282, "step": 75683 }, { "epoch": 2.52, "grad_norm": 0.712989866733551, "learning_rate": 3.7408871307177604e-05, "loss": 1.6738, "step": 75684 }, { "epoch": 2.52, "grad_norm": 0.6830459833145142, "learning_rate": 3.74038171000655e-05, "loss": 1.686, "step": 75685 }, { "epoch": 2.52, "grad_norm": 0.68381667137146, "learning_rate": 3.739876321170643e-05, "loss": 1.6806, "step": 75686 }, { "epoch": 2.52, "grad_norm": 0.7052385210990906, "learning_rate": 3.7393709642106606e-05, "loss": 1.7472, "step": 75687 }, { "epoch": 2.52, "grad_norm": 0.6931575536727905, "learning_rate": 3.73886563912721e-05, "loss": 1.6591, "step": 75688 }, { "epoch": 2.52, "grad_norm": 0.70408034324646, "learning_rate": 3.73836034592091e-05, "loss": 1.7174, "step": 75689 }, { "epoch": 2.52, "grad_norm": 0.7159295678138733, "learning_rate": 3.7378550845923705e-05, "loss": 1.7069, "step": 75690 }, { "epoch": 2.52, "grad_norm": 0.6970356106758118, "learning_rate": 3.737349855142201e-05, "loss": 1.6052, "step": 75691 }, { "epoch": 2.52, "grad_norm": 0.6961818933486938, "learning_rate": 3.736844657571024e-05, "loss": 1.6591, "step": 75692 }, { "epoch": 2.52, "grad_norm": 0.6826547384262085, "learning_rate": 3.73633949187945e-05, "loss": 1.7376, "step": 75693 }, { "epoch": 2.52, "grad_norm": 0.6887103319168091, "learning_rate": 3.735834358068084e-05, "loss": 1.6784, "step": 75694 }, { "epoch": 2.52, "grad_norm": 0.7218553423881531, "learning_rate": 3.735329256137543e-05, "loss": 1.75, "step": 75695 }, { "epoch": 2.52, "grad_norm": 0.7016998529434204, "learning_rate": 3.7348241860884485e-05, "loss": 1.6783, "step": 75696 }, { "epoch": 2.52, "grad_norm": 0.6839643120765686, "learning_rate": 3.734319147921409e-05, "loss": 1.6408, "step": 75697 }, { "epoch": 2.52, "grad_norm": 0.6928636431694031, "learning_rate": 3.733814141637028e-05, "loss": 1.6826, "step": 75698 }, { "epoch": 2.52, "grad_norm": 0.6736462712287903, "learning_rate": 3.7333091672359336e-05, "loss": 1.7371, "step": 75699 }, { "epoch": 2.52, "grad_norm": 0.7095824480056763, "learning_rate": 3.732804224718735e-05, "loss": 1.647, "step": 75700 }, { "epoch": 2.52, "grad_norm": 0.6907201409339905, "learning_rate": 3.7322993140860314e-05, "loss": 1.7227, "step": 75701 }, { "epoch": 2.52, "grad_norm": 0.7335001826286316, "learning_rate": 3.7317944353384454e-05, "loss": 1.6875, "step": 75702 }, { "epoch": 2.52, "grad_norm": 0.693533718585968, "learning_rate": 3.7312895884766e-05, "loss": 1.6206, "step": 75703 }, { "epoch": 2.52, "grad_norm": 0.7096823453903198, "learning_rate": 3.730784773501099e-05, "loss": 1.6918, "step": 75704 }, { "epoch": 2.52, "grad_norm": 0.6831513047218323, "learning_rate": 3.7302799904125436e-05, "loss": 1.6788, "step": 75705 }, { "epoch": 2.52, "grad_norm": 0.6805914044380188, "learning_rate": 3.729775239211568e-05, "loss": 1.6277, "step": 75706 }, { "epoch": 2.52, "grad_norm": 0.6754237413406372, "learning_rate": 3.729270519898774e-05, "loss": 1.6734, "step": 75707 }, { "epoch": 2.52, "grad_norm": 0.69694983959198, "learning_rate": 3.728765832474768e-05, "loss": 1.6495, "step": 75708 }, { "epoch": 2.52, "grad_norm": 0.7138180732727051, "learning_rate": 3.728261176940167e-05, "loss": 1.6865, "step": 75709 }, { "epoch": 2.52, "grad_norm": 0.6806217432022095, "learning_rate": 3.7277565532955964e-05, "loss": 1.6124, "step": 75710 }, { "epoch": 2.52, "grad_norm": 0.7047743201255798, "learning_rate": 3.727251961541656e-05, "loss": 1.7453, "step": 75711 }, { "epoch": 2.52, "grad_norm": 0.6829726099967957, "learning_rate": 3.726747401678955e-05, "loss": 1.669, "step": 75712 }, { "epoch": 2.52, "grad_norm": 0.7154985070228577, "learning_rate": 3.726242873708117e-05, "loss": 1.7572, "step": 75713 }, { "epoch": 2.52, "grad_norm": 1.4969457387924194, "learning_rate": 3.725738377629748e-05, "loss": 1.6993, "step": 75714 }, { "epoch": 2.52, "grad_norm": 0.6780418157577515, "learning_rate": 3.725233913444456e-05, "loss": 1.7034, "step": 75715 }, { "epoch": 2.52, "grad_norm": 0.696148157119751, "learning_rate": 3.7247294811528594e-05, "loss": 1.7396, "step": 75716 }, { "epoch": 2.52, "grad_norm": 0.7228696942329407, "learning_rate": 3.724225080755576e-05, "loss": 1.7565, "step": 75717 }, { "epoch": 2.52, "grad_norm": 0.6773438453674316, "learning_rate": 3.7237207122532134e-05, "loss": 1.7175, "step": 75718 }, { "epoch": 2.52, "grad_norm": 0.7072873115539551, "learning_rate": 3.723216375646373e-05, "loss": 1.6594, "step": 75719 }, { "epoch": 2.52, "grad_norm": 0.7123873829841614, "learning_rate": 3.722712070935682e-05, "loss": 1.6794, "step": 75720 }, { "epoch": 2.52, "grad_norm": 0.7062461972236633, "learning_rate": 3.722207798121749e-05, "loss": 1.6532, "step": 75721 }, { "epoch": 2.52, "grad_norm": 0.6954943537712097, "learning_rate": 3.721703557205178e-05, "loss": 1.7471, "step": 75722 }, { "epoch": 2.52, "grad_norm": 0.7067542672157288, "learning_rate": 3.7211993481865907e-05, "loss": 1.6118, "step": 75723 }, { "epoch": 2.52, "grad_norm": 0.6835225820541382, "learning_rate": 3.720695171066591e-05, "loss": 1.6954, "step": 75724 }, { "epoch": 2.52, "grad_norm": 0.6784288883209229, "learning_rate": 3.720191025845801e-05, "loss": 1.6245, "step": 75725 }, { "epoch": 2.52, "grad_norm": 0.7025405168533325, "learning_rate": 3.7196869125248205e-05, "loss": 1.7599, "step": 75726 }, { "epoch": 2.52, "grad_norm": 0.6963378190994263, "learning_rate": 3.719182831104276e-05, "loss": 1.672, "step": 75727 }, { "epoch": 2.52, "grad_norm": 0.6996079087257385, "learning_rate": 3.7186787815847693e-05, "loss": 1.7123, "step": 75728 }, { "epoch": 2.52, "grad_norm": 0.7074131369590759, "learning_rate": 3.718174763966908e-05, "loss": 1.6756, "step": 75729 }, { "epoch": 2.52, "grad_norm": 0.6939898729324341, "learning_rate": 3.717670778251317e-05, "loss": 1.7045, "step": 75730 }, { "epoch": 2.52, "grad_norm": 0.6925825476646423, "learning_rate": 3.7171668244385933e-05, "loss": 1.7217, "step": 75731 }, { "epoch": 2.52, "grad_norm": 0.6997209191322327, "learning_rate": 3.716662902529363e-05, "loss": 1.7314, "step": 75732 }, { "epoch": 2.52, "grad_norm": 0.6952806115150452, "learning_rate": 3.716159012524228e-05, "loss": 1.5991, "step": 75733 }, { "epoch": 2.52, "grad_norm": 0.6976227164268494, "learning_rate": 3.715655154423806e-05, "loss": 1.6957, "step": 75734 }, { "epoch": 2.52, "grad_norm": 0.7000367641448975, "learning_rate": 3.715151328228708e-05, "loss": 1.6648, "step": 75735 }, { "epoch": 2.52, "grad_norm": 0.6846550107002258, "learning_rate": 3.7146475339395354e-05, "loss": 1.6926, "step": 75736 }, { "epoch": 2.52, "grad_norm": 0.7007135152816772, "learning_rate": 3.7141437715569135e-05, "loss": 1.6626, "step": 75737 }, { "epoch": 2.52, "grad_norm": 0.6803473234176636, "learning_rate": 3.713640041081445e-05, "loss": 1.7257, "step": 75738 }, { "epoch": 2.52, "grad_norm": 0.6892269849777222, "learning_rate": 3.713136342513749e-05, "loss": 1.7101, "step": 75739 }, { "epoch": 2.52, "grad_norm": 0.719658613204956, "learning_rate": 3.71263267585443e-05, "loss": 1.7179, "step": 75740 }, { "epoch": 2.52, "grad_norm": 0.6896377205848694, "learning_rate": 3.712129041104099e-05, "loss": 1.7139, "step": 75741 }, { "epoch": 2.52, "grad_norm": 0.6884234547615051, "learning_rate": 3.7116254382633734e-05, "loss": 1.6778, "step": 75742 }, { "epoch": 2.52, "grad_norm": 0.7114642262458801, "learning_rate": 3.711121867332859e-05, "loss": 1.6966, "step": 75743 }, { "epoch": 2.52, "grad_norm": 0.7128036022186279, "learning_rate": 3.710618328313172e-05, "loss": 1.6121, "step": 75744 }, { "epoch": 2.52, "grad_norm": 0.6773223876953125, "learning_rate": 3.710114821204915e-05, "loss": 1.6889, "step": 75745 }, { "epoch": 2.52, "grad_norm": 0.6814534664154053, "learning_rate": 3.709611346008711e-05, "loss": 1.74, "step": 75746 }, { "epoch": 2.52, "grad_norm": 0.6925027966499329, "learning_rate": 3.7091079027251656e-05, "loss": 1.6915, "step": 75747 }, { "epoch": 2.52, "grad_norm": 0.6930620670318604, "learning_rate": 3.708604491354883e-05, "loss": 1.6725, "step": 75748 }, { "epoch": 2.52, "grad_norm": 0.6985030770301819, "learning_rate": 3.708101111898488e-05, "loss": 1.7674, "step": 75749 }, { "epoch": 2.52, "grad_norm": 0.6865864992141724, "learning_rate": 3.7075977643565814e-05, "loss": 1.6766, "step": 75750 }, { "epoch": 2.52, "grad_norm": 0.7058125138282776, "learning_rate": 3.707094448729775e-05, "loss": 1.6986, "step": 75751 }, { "epoch": 2.52, "grad_norm": 0.6945157647132874, "learning_rate": 3.7065911650186783e-05, "loss": 1.6694, "step": 75752 }, { "epoch": 2.52, "grad_norm": 0.6907421350479126, "learning_rate": 3.7060879132239155e-05, "loss": 1.6734, "step": 75753 }, { "epoch": 2.52, "grad_norm": 0.6998646855354309, "learning_rate": 3.705584693346084e-05, "loss": 1.6834, "step": 75754 }, { "epoch": 2.52, "grad_norm": 0.713320255279541, "learning_rate": 3.7050815053857955e-05, "loss": 1.6666, "step": 75755 }, { "epoch": 2.52, "grad_norm": 0.6955257058143616, "learning_rate": 3.7045783493436676e-05, "loss": 1.7275, "step": 75756 }, { "epoch": 2.52, "grad_norm": 0.7070349454879761, "learning_rate": 3.704075225220308e-05, "loss": 1.7154, "step": 75757 }, { "epoch": 2.52, "grad_norm": 0.6865856647491455, "learning_rate": 3.703572133016318e-05, "loss": 1.6593, "step": 75758 }, { "epoch": 2.52, "grad_norm": 0.6739704608917236, "learning_rate": 3.703069072732319e-05, "loss": 1.6809, "step": 75759 }, { "epoch": 2.52, "grad_norm": 0.7059392929077148, "learning_rate": 3.702566044368932e-05, "loss": 1.6653, "step": 75760 }, { "epoch": 2.52, "grad_norm": 0.6935262680053711, "learning_rate": 3.702063047926744e-05, "loss": 1.7407, "step": 75761 }, { "epoch": 2.52, "grad_norm": 0.7220249772071838, "learning_rate": 3.701560083406375e-05, "loss": 1.6918, "step": 75762 }, { "epoch": 2.52, "grad_norm": 0.7262952327728271, "learning_rate": 3.701057150808444e-05, "loss": 1.7502, "step": 75763 }, { "epoch": 2.52, "grad_norm": 0.6906570196151733, "learning_rate": 3.700554250133554e-05, "loss": 1.7047, "step": 75764 }, { "epoch": 2.52, "grad_norm": 0.7080163955688477, "learning_rate": 3.7000513813823104e-05, "loss": 1.6491, "step": 75765 }, { "epoch": 2.52, "grad_norm": 0.6832065582275391, "learning_rate": 3.6995485445553277e-05, "loss": 1.6865, "step": 75766 }, { "epoch": 2.52, "grad_norm": 0.6964511871337891, "learning_rate": 3.69904573965323e-05, "loss": 1.7012, "step": 75767 }, { "epoch": 2.52, "grad_norm": 0.7043295502662659, "learning_rate": 3.698542966676605e-05, "loss": 1.6819, "step": 75768 }, { "epoch": 2.52, "grad_norm": 0.689154326915741, "learning_rate": 3.698040225626075e-05, "loss": 1.7469, "step": 75769 }, { "epoch": 2.52, "grad_norm": 0.7221688628196716, "learning_rate": 3.697537516502253e-05, "loss": 1.7199, "step": 75770 }, { "epoch": 2.52, "grad_norm": 0.6862534284591675, "learning_rate": 3.697034839305746e-05, "loss": 1.6679, "step": 75771 }, { "epoch": 2.52, "grad_norm": 0.6860472559928894, "learning_rate": 3.6965321940371554e-05, "loss": 1.5904, "step": 75772 }, { "epoch": 2.52, "grad_norm": 0.7078999280929565, "learning_rate": 3.696029580697097e-05, "loss": 1.699, "step": 75773 }, { "epoch": 2.52, "grad_norm": 0.6908125877380371, "learning_rate": 3.695526999286199e-05, "loss": 1.6508, "step": 75774 }, { "epoch": 2.52, "grad_norm": 0.7037404179573059, "learning_rate": 3.695024449805043e-05, "loss": 1.7106, "step": 75775 }, { "epoch": 2.52, "grad_norm": 0.6897391080856323, "learning_rate": 3.694521932254252e-05, "loss": 1.6547, "step": 75776 }, { "epoch": 2.52, "grad_norm": 0.7004030346870422, "learning_rate": 3.694019446634438e-05, "loss": 1.7545, "step": 75777 }, { "epoch": 2.52, "grad_norm": 0.7181858420372009, "learning_rate": 3.693516992946213e-05, "loss": 1.6815, "step": 75778 }, { "epoch": 2.52, "grad_norm": 0.7163345217704773, "learning_rate": 3.693014571190174e-05, "loss": 1.6303, "step": 75779 }, { "epoch": 2.52, "grad_norm": 0.6960972547531128, "learning_rate": 3.6925121813669445e-05, "loss": 1.6323, "step": 75780 }, { "epoch": 2.52, "grad_norm": 0.7045058608055115, "learning_rate": 3.6920098234771275e-05, "loss": 1.7076, "step": 75781 }, { "epoch": 2.52, "grad_norm": 0.6824062466621399, "learning_rate": 3.69150749752133e-05, "loss": 1.6652, "step": 75782 }, { "epoch": 2.52, "grad_norm": 0.7079170346260071, "learning_rate": 3.691005203500167e-05, "loss": 1.7375, "step": 75783 }, { "epoch": 2.52, "grad_norm": 0.7125725746154785, "learning_rate": 3.6905029414142515e-05, "loss": 1.6899, "step": 75784 }, { "epoch": 2.52, "grad_norm": 0.6780199408531189, "learning_rate": 3.6900007112641906e-05, "loss": 1.589, "step": 75785 }, { "epoch": 2.52, "grad_norm": 0.7142947912216187, "learning_rate": 3.689498513050586e-05, "loss": 1.7329, "step": 75786 }, { "epoch": 2.52, "grad_norm": 0.7071113586425781, "learning_rate": 3.688996346774058e-05, "loss": 1.6404, "step": 75787 }, { "epoch": 2.52, "grad_norm": 0.6888306140899658, "learning_rate": 3.688494212435216e-05, "loss": 1.7127, "step": 75788 }, { "epoch": 2.52, "grad_norm": 0.7226986885070801, "learning_rate": 3.687992110034655e-05, "loss": 1.7179, "step": 75789 }, { "epoch": 2.52, "grad_norm": 0.7073707580566406, "learning_rate": 3.687490039573e-05, "loss": 1.6524, "step": 75790 }, { "epoch": 2.52, "grad_norm": 0.7034477591514587, "learning_rate": 3.6869880010508524e-05, "loss": 1.7099, "step": 75791 }, { "epoch": 2.52, "grad_norm": 0.6848081946372986, "learning_rate": 3.6864859944688285e-05, "loss": 1.6867, "step": 75792 }, { "epoch": 2.52, "grad_norm": 0.7087553143501282, "learning_rate": 3.685984019827528e-05, "loss": 1.7414, "step": 75793 }, { "epoch": 2.52, "grad_norm": 0.704005777835846, "learning_rate": 3.685482077127571e-05, "loss": 1.6638, "step": 75794 }, { "epoch": 2.52, "grad_norm": 0.7025102376937866, "learning_rate": 3.6849801663695624e-05, "loss": 1.7149, "step": 75795 }, { "epoch": 2.52, "grad_norm": 0.7073264122009277, "learning_rate": 3.684478287554102e-05, "loss": 1.6947, "step": 75796 }, { "epoch": 2.52, "grad_norm": 0.6669988036155701, "learning_rate": 3.6839764406818175e-05, "loss": 1.7357, "step": 75797 }, { "epoch": 2.52, "grad_norm": 0.6943989992141724, "learning_rate": 3.6834746257532996e-05, "loss": 1.7015, "step": 75798 }, { "epoch": 2.52, "grad_norm": 0.7280223369598389, "learning_rate": 3.682972842769173e-05, "loss": 1.7406, "step": 75799 }, { "epoch": 2.52, "grad_norm": 0.7329297661781311, "learning_rate": 3.6824710917300396e-05, "loss": 1.7342, "step": 75800 }, { "epoch": 2.52, "grad_norm": 0.6918076276779175, "learning_rate": 3.6819693726365016e-05, "loss": 1.6864, "step": 75801 }, { "epoch": 2.52, "grad_norm": 0.6938892602920532, "learning_rate": 3.68146768548918e-05, "loss": 1.7776, "step": 75802 }, { "epoch": 2.52, "grad_norm": 0.7003124952316284, "learning_rate": 3.680966030288677e-05, "loss": 1.7678, "step": 75803 }, { "epoch": 2.52, "grad_norm": 0.6673421263694763, "learning_rate": 3.680464407035605e-05, "loss": 1.647, "step": 75804 }, { "epoch": 2.52, "grad_norm": 0.67352294921875, "learning_rate": 3.679962815730567e-05, "loss": 1.6689, "step": 75805 }, { "epoch": 2.52, "grad_norm": 0.6887624859809875, "learning_rate": 3.679461256374183e-05, "loss": 1.6282, "step": 75806 }, { "epoch": 2.52, "grad_norm": 0.7223793268203735, "learning_rate": 3.6789597289670526e-05, "loss": 1.6251, "step": 75807 }, { "epoch": 2.52, "grad_norm": 0.7185686826705933, "learning_rate": 3.6784582335097846e-05, "loss": 1.6254, "step": 75808 }, { "epoch": 2.52, "grad_norm": 0.6957964301109314, "learning_rate": 3.6779567700029924e-05, "loss": 1.695, "step": 75809 }, { "epoch": 2.52, "grad_norm": 0.685103714466095, "learning_rate": 3.6774553384472846e-05, "loss": 1.6645, "step": 75810 }, { "epoch": 2.52, "grad_norm": 0.6879262328147888, "learning_rate": 3.676953938843259e-05, "loss": 1.7636, "step": 75811 }, { "epoch": 2.52, "grad_norm": 0.7017889618873596, "learning_rate": 3.676452571191534e-05, "loss": 1.6958, "step": 75812 }, { "epoch": 2.52, "grad_norm": 0.7122179865837097, "learning_rate": 3.6759512354927254e-05, "loss": 1.6389, "step": 75813 }, { "epoch": 2.52, "grad_norm": 0.7065570950508118, "learning_rate": 3.6754499317474304e-05, "loss": 1.674, "step": 75814 }, { "epoch": 2.52, "grad_norm": 0.7057582139968872, "learning_rate": 3.674948659956255e-05, "loss": 1.6937, "step": 75815 }, { "epoch": 2.52, "grad_norm": 0.6926106214523315, "learning_rate": 3.6744474201198185e-05, "loss": 1.7085, "step": 75816 }, { "epoch": 2.52, "grad_norm": 0.7056890726089478, "learning_rate": 3.673946212238727e-05, "loss": 1.7288, "step": 75817 }, { "epoch": 2.52, "grad_norm": 0.6889291405677795, "learning_rate": 3.673445036313576e-05, "loss": 1.69, "step": 75818 }, { "epoch": 2.52, "grad_norm": 0.6879914402961731, "learning_rate": 3.672943892344986e-05, "loss": 1.676, "step": 75819 }, { "epoch": 2.52, "grad_norm": 0.686019778251648, "learning_rate": 3.672442780333567e-05, "loss": 1.6686, "step": 75820 }, { "epoch": 2.52, "grad_norm": 0.6999586820602417, "learning_rate": 3.671941700279927e-05, "loss": 1.769, "step": 75821 }, { "epoch": 2.52, "grad_norm": 0.6855173110961914, "learning_rate": 3.6714406521846596e-05, "loss": 1.6879, "step": 75822 }, { "epoch": 2.52, "grad_norm": 0.7174686193466187, "learning_rate": 3.670939636048395e-05, "loss": 1.7504, "step": 75823 }, { "epoch": 2.52, "grad_norm": 0.6941555142402649, "learning_rate": 3.670438651871728e-05, "loss": 1.6609, "step": 75824 }, { "epoch": 2.52, "grad_norm": 0.6913642883300781, "learning_rate": 3.6699376996552623e-05, "loss": 1.6581, "step": 75825 }, { "epoch": 2.52, "grad_norm": 0.6830946207046509, "learning_rate": 3.66943677939961e-05, "loss": 1.7069, "step": 75826 }, { "epoch": 2.52, "grad_norm": 0.7131469249725342, "learning_rate": 3.668935891105392e-05, "loss": 1.6899, "step": 75827 }, { "epoch": 2.52, "grad_norm": 0.6787468194961548, "learning_rate": 3.668435034773207e-05, "loss": 1.6863, "step": 75828 }, { "epoch": 2.52, "grad_norm": 0.7034872174263, "learning_rate": 3.6679342104036545e-05, "loss": 1.67, "step": 75829 }, { "epoch": 2.52, "grad_norm": 0.7212405800819397, "learning_rate": 3.6674334179973544e-05, "loss": 1.6544, "step": 75830 }, { "epoch": 2.52, "grad_norm": 0.7080119252204895, "learning_rate": 3.666932657554912e-05, "loss": 1.6755, "step": 75831 }, { "epoch": 2.52, "grad_norm": 0.703763484954834, "learning_rate": 3.666431929076927e-05, "loss": 1.7194, "step": 75832 }, { "epoch": 2.52, "grad_norm": 0.6786357760429382, "learning_rate": 3.665931232564013e-05, "loss": 1.7585, "step": 75833 }, { "epoch": 2.52, "grad_norm": 0.703130304813385, "learning_rate": 3.665430568016783e-05, "loss": 1.7053, "step": 75834 }, { "epoch": 2.52, "grad_norm": 0.6754642724990845, "learning_rate": 3.664929935435842e-05, "loss": 1.6357, "step": 75835 }, { "epoch": 2.52, "grad_norm": 0.6894465088844299, "learning_rate": 3.66442933482179e-05, "loss": 1.6932, "step": 75836 }, { "epoch": 2.52, "grad_norm": 0.7060497403144836, "learning_rate": 3.663928766175247e-05, "loss": 1.7692, "step": 75837 }, { "epoch": 2.52, "grad_norm": 0.7046692371368408, "learning_rate": 3.663428229496812e-05, "loss": 1.6779, "step": 75838 }, { "epoch": 2.52, "grad_norm": 0.6993845105171204, "learning_rate": 3.662927724787091e-05, "loss": 1.614, "step": 75839 }, { "epoch": 2.52, "grad_norm": 0.7228606939315796, "learning_rate": 3.6624272520467015e-05, "loss": 1.6992, "step": 75840 }, { "epoch": 2.52, "grad_norm": 0.7172115445137024, "learning_rate": 3.661926811276235e-05, "loss": 1.69, "step": 75841 }, { "epoch": 2.52, "grad_norm": 0.6960127949714661, "learning_rate": 3.661426402476317e-05, "loss": 1.6709, "step": 75842 }, { "epoch": 2.52, "grad_norm": 0.6929267644882202, "learning_rate": 3.660926025647539e-05, "loss": 1.7227, "step": 75843 }, { "epoch": 2.52, "grad_norm": 0.6804835796356201, "learning_rate": 3.660425680790523e-05, "loss": 1.6695, "step": 75844 }, { "epoch": 2.52, "grad_norm": 0.7069106698036194, "learning_rate": 3.6599253679058716e-05, "loss": 1.6487, "step": 75845 }, { "epoch": 2.52, "grad_norm": 0.679619550704956, "learning_rate": 3.6594250869941813e-05, "loss": 1.6858, "step": 75846 }, { "epoch": 2.52, "grad_norm": 0.6909865736961365, "learning_rate": 3.6589248380560746e-05, "loss": 1.7842, "step": 75847 }, { "epoch": 2.52, "grad_norm": 0.6984206438064575, "learning_rate": 3.658424621092144e-05, "loss": 1.7764, "step": 75848 }, { "epoch": 2.52, "grad_norm": 0.6821832060813904, "learning_rate": 3.657924436103013e-05, "loss": 1.6991, "step": 75849 }, { "epoch": 2.52, "grad_norm": 0.7007164359092712, "learning_rate": 3.657424283089278e-05, "loss": 1.6804, "step": 75850 }, { "epoch": 2.52, "grad_norm": 0.704553484916687, "learning_rate": 3.6569241620515454e-05, "loss": 1.5934, "step": 75851 }, { "epoch": 2.52, "grad_norm": 0.7026523947715759, "learning_rate": 3.6564240729904304e-05, "loss": 1.7257, "step": 75852 }, { "epoch": 2.52, "grad_norm": 0.6944127678871155, "learning_rate": 3.655924015906526e-05, "loss": 1.5835, "step": 75853 }, { "epoch": 2.52, "grad_norm": 0.6855632066726685, "learning_rate": 3.655423990800459e-05, "loss": 1.6681, "step": 75854 }, { "epoch": 2.52, "grad_norm": 0.7192999124526978, "learning_rate": 3.6549239976728185e-05, "loss": 1.5641, "step": 75855 }, { "epoch": 2.52, "grad_norm": 0.6923505663871765, "learning_rate": 3.6544240365242204e-05, "loss": 1.6713, "step": 75856 }, { "epoch": 2.52, "grad_norm": 0.7120196223258972, "learning_rate": 3.653924107355275e-05, "loss": 1.7298, "step": 75857 }, { "epoch": 2.52, "grad_norm": 0.6876843571662903, "learning_rate": 3.653424210166575e-05, "loss": 1.7319, "step": 75858 }, { "epoch": 2.52, "grad_norm": 0.7143269181251526, "learning_rate": 3.652924344958742e-05, "loss": 1.66, "step": 75859 }, { "epoch": 2.52, "grad_norm": 0.698574423789978, "learning_rate": 3.65242451173237e-05, "loss": 1.7132, "step": 75860 }, { "epoch": 2.52, "grad_norm": 0.6877356767654419, "learning_rate": 3.651924710488082e-05, "loss": 1.665, "step": 75861 }, { "epoch": 2.52, "grad_norm": 0.6927797794342041, "learning_rate": 3.6514249412264675e-05, "loss": 1.6171, "step": 75862 }, { "epoch": 2.52, "grad_norm": 0.7206575274467468, "learning_rate": 3.6509252039481454e-05, "loss": 1.6741, "step": 75863 }, { "epoch": 2.52, "grad_norm": 0.6838120818138123, "learning_rate": 3.650425498653718e-05, "loss": 1.6899, "step": 75864 }, { "epoch": 2.52, "grad_norm": 0.7191697955131531, "learning_rate": 3.649925825343787e-05, "loss": 1.6394, "step": 75865 }, { "epoch": 2.52, "grad_norm": 0.6960203647613525, "learning_rate": 3.64942618401897e-05, "loss": 1.7134, "step": 75866 }, { "epoch": 2.52, "grad_norm": 0.7265403866767883, "learning_rate": 3.648926574679867e-05, "loss": 1.7142, "step": 75867 }, { "epoch": 2.52, "grad_norm": 0.7141527533531189, "learning_rate": 3.6484269973270774e-05, "loss": 1.6094, "step": 75868 }, { "epoch": 2.52, "grad_norm": 0.665827214717865, "learning_rate": 3.647927451961214e-05, "loss": 1.6491, "step": 75869 }, { "epoch": 2.52, "grad_norm": 0.6900333166122437, "learning_rate": 3.647427938582891e-05, "loss": 1.6988, "step": 75870 }, { "epoch": 2.52, "grad_norm": 0.6876517534255981, "learning_rate": 3.646928457192706e-05, "loss": 1.6691, "step": 75871 }, { "epoch": 2.52, "grad_norm": 0.7343263626098633, "learning_rate": 3.646429007791263e-05, "loss": 1.7274, "step": 75872 }, { "epoch": 2.52, "grad_norm": 0.7590881586074829, "learning_rate": 3.6459295903791776e-05, "loss": 1.698, "step": 75873 }, { "epoch": 2.52, "grad_norm": 0.7102718353271484, "learning_rate": 3.6454302049570496e-05, "loss": 1.6712, "step": 75874 }, { "epoch": 2.52, "grad_norm": 0.6756978631019592, "learning_rate": 3.6449308515254816e-05, "loss": 1.6952, "step": 75875 }, { "epoch": 2.52, "grad_norm": 0.7080327868461609, "learning_rate": 3.6444315300850836e-05, "loss": 1.6791, "step": 75876 }, { "epoch": 2.52, "grad_norm": 0.6897407174110413, "learning_rate": 3.643932240636474e-05, "loss": 1.6889, "step": 75877 }, { "epoch": 2.52, "grad_norm": 0.7324057221412659, "learning_rate": 3.643432983180234e-05, "loss": 1.7087, "step": 75878 }, { "epoch": 2.52, "grad_norm": 0.6945008039474487, "learning_rate": 3.642933757716986e-05, "loss": 1.6878, "step": 75879 }, { "epoch": 2.52, "grad_norm": 0.695995032787323, "learning_rate": 3.6424345642473385e-05, "loss": 1.6655, "step": 75880 }, { "epoch": 2.52, "grad_norm": 0.7044349908828735, "learning_rate": 3.6419354027718916e-05, "loss": 1.6525, "step": 75881 }, { "epoch": 2.52, "grad_norm": 0.6643403768539429, "learning_rate": 3.6414362732912447e-05, "loss": 1.7175, "step": 75882 }, { "epoch": 2.52, "grad_norm": 0.7135754823684692, "learning_rate": 3.640937175806008e-05, "loss": 1.7582, "step": 75883 }, { "epoch": 2.52, "grad_norm": 0.6772633790969849, "learning_rate": 3.6404381103168065e-05, "loss": 1.6789, "step": 75884 }, { "epoch": 2.52, "grad_norm": 0.7055589556694031, "learning_rate": 3.639939076824214e-05, "loss": 1.6566, "step": 75885 }, { "epoch": 2.52, "grad_norm": 0.7030569314956665, "learning_rate": 3.639440075328853e-05, "loss": 1.7188, "step": 75886 }, { "epoch": 2.52, "grad_norm": 0.7215498685836792, "learning_rate": 3.638941105831333e-05, "loss": 1.6567, "step": 75887 }, { "epoch": 2.52, "grad_norm": 0.6951844692230225, "learning_rate": 3.638442168332254e-05, "loss": 1.7067, "step": 75888 }, { "epoch": 2.52, "grad_norm": 0.7282649874687195, "learning_rate": 3.637943262832215e-05, "loss": 1.7553, "step": 75889 }, { "epoch": 2.52, "grad_norm": 0.6770971417427063, "learning_rate": 3.637444389331832e-05, "loss": 1.7047, "step": 75890 }, { "epoch": 2.52, "grad_norm": 0.7147371172904968, "learning_rate": 3.6369455478317176e-05, "loss": 1.7054, "step": 75891 }, { "epoch": 2.52, "grad_norm": 0.7054334878921509, "learning_rate": 3.636446738332453e-05, "loss": 1.6184, "step": 75892 }, { "epoch": 2.52, "grad_norm": 0.691871166229248, "learning_rate": 3.635947960834658e-05, "loss": 1.6436, "step": 75893 }, { "epoch": 2.53, "grad_norm": 0.695823073387146, "learning_rate": 3.635449215338946e-05, "loss": 1.741, "step": 75894 }, { "epoch": 2.53, "grad_norm": 0.7212746739387512, "learning_rate": 3.6349505018459133e-05, "loss": 1.7204, "step": 75895 }, { "epoch": 2.53, "grad_norm": 0.7048739194869995, "learning_rate": 3.634451820356158e-05, "loss": 1.6961, "step": 75896 }, { "epoch": 2.53, "grad_norm": 0.7100129127502441, "learning_rate": 3.633953170870302e-05, "loss": 1.7038, "step": 75897 }, { "epoch": 2.53, "grad_norm": 0.6863565444946289, "learning_rate": 3.633454553388941e-05, "loss": 1.7248, "step": 75898 }, { "epoch": 2.53, "grad_norm": 0.691217303276062, "learning_rate": 3.632955967912673e-05, "loss": 1.6494, "step": 75899 }, { "epoch": 2.53, "grad_norm": 0.6876216530799866, "learning_rate": 3.632457414442116e-05, "loss": 1.6629, "step": 75900 }, { "epoch": 2.53, "grad_norm": 0.6914016604423523, "learning_rate": 3.631958892977873e-05, "loss": 1.674, "step": 75901 }, { "epoch": 2.53, "grad_norm": 0.6773405075073242, "learning_rate": 3.631460403520547e-05, "loss": 1.6635, "step": 75902 }, { "epoch": 2.53, "grad_norm": 0.705874502658844, "learning_rate": 3.630961946070739e-05, "loss": 1.7113, "step": 75903 }, { "epoch": 2.53, "grad_norm": 0.6984202861785889, "learning_rate": 3.630463520629062e-05, "loss": 1.779, "step": 75904 }, { "epoch": 2.53, "grad_norm": 0.7135382294654846, "learning_rate": 3.6299651271961185e-05, "loss": 1.6709, "step": 75905 }, { "epoch": 2.53, "grad_norm": 0.7085705399513245, "learning_rate": 3.629466765772503e-05, "loss": 1.6359, "step": 75906 }, { "epoch": 2.53, "grad_norm": 0.6820340752601624, "learning_rate": 3.6289684363588387e-05, "loss": 1.6437, "step": 75907 }, { "epoch": 2.53, "grad_norm": 0.6807879209518433, "learning_rate": 3.6284701389557125e-05, "loss": 1.67, "step": 75908 }, { "epoch": 2.53, "grad_norm": 0.7078284025192261, "learning_rate": 3.627971873563746e-05, "loss": 1.6852, "step": 75909 }, { "epoch": 2.53, "grad_norm": 0.6844602227210999, "learning_rate": 3.62747364018353e-05, "loss": 1.6769, "step": 75910 }, { "epoch": 2.53, "grad_norm": 0.7057115435600281, "learning_rate": 3.62697543881568e-05, "loss": 1.7085, "step": 75911 }, { "epoch": 2.53, "grad_norm": 0.7007506489753723, "learning_rate": 3.6264772694607954e-05, "loss": 1.6658, "step": 75912 }, { "epoch": 2.53, "grad_norm": 0.7035574316978455, "learning_rate": 3.625979132119476e-05, "loss": 1.6277, "step": 75913 }, { "epoch": 2.53, "grad_norm": 0.6766639947891235, "learning_rate": 3.6254810267923383e-05, "loss": 1.6757, "step": 75914 }, { "epoch": 2.53, "grad_norm": 0.6832025647163391, "learning_rate": 3.624982953479974e-05, "loss": 1.6249, "step": 75915 }, { "epoch": 2.53, "grad_norm": 0.6883017420768738, "learning_rate": 3.624484912183001e-05, "loss": 1.7121, "step": 75916 }, { "epoch": 2.53, "grad_norm": 0.6824098825454712, "learning_rate": 3.6239869029020176e-05, "loss": 1.6384, "step": 75917 }, { "epoch": 2.53, "grad_norm": 0.6921759247779846, "learning_rate": 3.6234889256376196e-05, "loss": 1.688, "step": 75918 }, { "epoch": 2.53, "grad_norm": 0.7051098346710205, "learning_rate": 3.622990980390428e-05, "loss": 1.6989, "step": 75919 }, { "epoch": 2.53, "grad_norm": 0.7124938368797302, "learning_rate": 3.622493067161031e-05, "loss": 1.6645, "step": 75920 }, { "epoch": 2.53, "grad_norm": 0.7705063223838806, "learning_rate": 3.621995185950049e-05, "loss": 1.8101, "step": 75921 }, { "epoch": 2.53, "grad_norm": 0.7071747183799744, "learning_rate": 3.62149733675807e-05, "loss": 1.7548, "step": 75922 }, { "epoch": 2.53, "grad_norm": 0.6890405416488647, "learning_rate": 3.6209995195857125e-05, "loss": 1.6561, "step": 75923 }, { "epoch": 2.53, "grad_norm": 0.7063470482826233, "learning_rate": 3.6205017344335776e-05, "loss": 1.6944, "step": 75924 }, { "epoch": 2.53, "grad_norm": 0.7113876342773438, "learning_rate": 3.620003981302259e-05, "loss": 1.645, "step": 75925 }, { "epoch": 2.53, "grad_norm": 0.6948586702346802, "learning_rate": 3.619506260192372e-05, "loss": 1.6825, "step": 75926 }, { "epoch": 2.53, "grad_norm": 0.6886407732963562, "learning_rate": 3.6190085711045206e-05, "loss": 1.7062, "step": 75927 }, { "epoch": 2.53, "grad_norm": 0.685464084148407, "learning_rate": 3.6185109140393e-05, "loss": 1.6406, "step": 75928 }, { "epoch": 2.53, "grad_norm": 0.6941303014755249, "learning_rate": 3.618013288997317e-05, "loss": 1.6943, "step": 75929 }, { "epoch": 2.53, "grad_norm": 0.69971764087677, "learning_rate": 3.6175156959791876e-05, "loss": 1.7352, "step": 75930 }, { "epoch": 2.53, "grad_norm": 0.6842690706253052, "learning_rate": 3.6170181349855076e-05, "loss": 1.6764, "step": 75931 }, { "epoch": 2.53, "grad_norm": 0.6924426555633545, "learning_rate": 3.616520606016871e-05, "loss": 1.7611, "step": 75932 }, { "epoch": 2.53, "grad_norm": 0.7046130299568176, "learning_rate": 3.616023109073899e-05, "loss": 1.6768, "step": 75933 }, { "epoch": 2.53, "grad_norm": 0.7207459807395935, "learning_rate": 3.615525644157186e-05, "loss": 1.6165, "step": 75934 }, { "epoch": 2.53, "grad_norm": 0.6969383358955383, "learning_rate": 3.6150282112673335e-05, "loss": 1.7017, "step": 75935 }, { "epoch": 2.53, "grad_norm": 0.7224352359771729, "learning_rate": 3.614530810404949e-05, "loss": 1.7357, "step": 75936 }, { "epoch": 2.53, "grad_norm": 0.6970716714859009, "learning_rate": 3.6140334415706406e-05, "loss": 1.7295, "step": 75937 }, { "epoch": 2.53, "grad_norm": 0.7046182155609131, "learning_rate": 3.613536104765009e-05, "loss": 1.7194, "step": 75938 }, { "epoch": 2.53, "grad_norm": 0.6980395913124084, "learning_rate": 3.61303879998865e-05, "loss": 1.6495, "step": 75939 }, { "epoch": 2.53, "grad_norm": 0.7353423833847046, "learning_rate": 3.61254152724218e-05, "loss": 1.5898, "step": 75940 }, { "epoch": 2.53, "grad_norm": 0.7041850090026855, "learning_rate": 3.612044286526198e-05, "loss": 1.6814, "step": 75941 }, { "epoch": 2.53, "grad_norm": 0.6942052841186523, "learning_rate": 3.6115470778412966e-05, "loss": 1.691, "step": 75942 }, { "epoch": 2.53, "grad_norm": 0.7186262607574463, "learning_rate": 3.61104990118809e-05, "loss": 1.6005, "step": 75943 }, { "epoch": 2.53, "grad_norm": 0.707058310508728, "learning_rate": 3.61055275656719e-05, "loss": 1.6487, "step": 75944 }, { "epoch": 2.53, "grad_norm": 0.6899588704109192, "learning_rate": 3.61005564397919e-05, "loss": 1.6874, "step": 75945 }, { "epoch": 2.53, "grad_norm": 0.6956748962402344, "learning_rate": 3.6095585634246846e-05, "loss": 1.6221, "step": 75946 }, { "epoch": 2.53, "grad_norm": 0.7179173231124878, "learning_rate": 3.609061514904296e-05, "loss": 1.6414, "step": 75947 }, { "epoch": 2.53, "grad_norm": 0.6905436515808105, "learning_rate": 3.608564498418615e-05, "loss": 1.6483, "step": 75948 }, { "epoch": 2.53, "grad_norm": 0.7165127396583557, "learning_rate": 3.608067513968245e-05, "loss": 1.6829, "step": 75949 }, { "epoch": 2.53, "grad_norm": 0.6894707083702087, "learning_rate": 3.607570561553792e-05, "loss": 1.6987, "step": 75950 }, { "epoch": 2.53, "grad_norm": 0.7194536328315735, "learning_rate": 3.6070736411758625e-05, "loss": 1.6939, "step": 75951 }, { "epoch": 2.53, "grad_norm": 0.6938550472259521, "learning_rate": 3.6065767528350597e-05, "loss": 1.7425, "step": 75952 }, { "epoch": 2.53, "grad_norm": 0.7154875993728638, "learning_rate": 3.6060798965319757e-05, "loss": 1.7446, "step": 75953 }, { "epoch": 2.53, "grad_norm": 0.6961248517036438, "learning_rate": 3.6055830722672305e-05, "loss": 1.6951, "step": 75954 }, { "epoch": 2.53, "grad_norm": 0.7038750648498535, "learning_rate": 3.605086280041417e-05, "loss": 1.7227, "step": 75955 }, { "epoch": 2.53, "grad_norm": 0.7221435308456421, "learning_rate": 3.6045895198551346e-05, "loss": 1.6804, "step": 75956 }, { "epoch": 2.53, "grad_norm": 0.7068215608596802, "learning_rate": 3.6040927917089954e-05, "loss": 1.731, "step": 75957 }, { "epoch": 2.53, "grad_norm": 0.68072509765625, "learning_rate": 3.603596095603594e-05, "loss": 1.5867, "step": 75958 }, { "epoch": 2.53, "grad_norm": 0.7066032886505127, "learning_rate": 3.603099431539545e-05, "loss": 1.7287, "step": 75959 }, { "epoch": 2.53, "grad_norm": 0.7218267917633057, "learning_rate": 3.6026027995174345e-05, "loss": 1.6898, "step": 75960 }, { "epoch": 2.53, "grad_norm": 0.6980196833610535, "learning_rate": 3.602106199537883e-05, "loss": 1.6235, "step": 75961 }, { "epoch": 2.53, "grad_norm": 0.688258171081543, "learning_rate": 3.601609631601483e-05, "loss": 1.7161, "step": 75962 }, { "epoch": 2.53, "grad_norm": 0.710430920124054, "learning_rate": 3.601113095708837e-05, "loss": 1.6499, "step": 75963 }, { "epoch": 2.53, "grad_norm": 0.7018439769744873, "learning_rate": 3.600616591860552e-05, "loss": 1.6877, "step": 75964 }, { "epoch": 2.53, "grad_norm": 0.7300352454185486, "learning_rate": 3.600120120057224e-05, "loss": 1.7684, "step": 75965 }, { "epoch": 2.53, "grad_norm": 0.7099795937538147, "learning_rate": 3.5996236802994686e-05, "loss": 1.6708, "step": 75966 }, { "epoch": 2.53, "grad_norm": 0.6926047205924988, "learning_rate": 3.599127272587878e-05, "loss": 1.6796, "step": 75967 }, { "epoch": 2.53, "grad_norm": 0.7567773461341858, "learning_rate": 3.59863089692305e-05, "loss": 1.7967, "step": 75968 }, { "epoch": 2.53, "grad_norm": 0.7058920860290527, "learning_rate": 3.5981345533056005e-05, "loss": 1.6835, "step": 75969 }, { "epoch": 2.53, "grad_norm": 0.6879433393478394, "learning_rate": 3.5976382417361215e-05, "loss": 1.6954, "step": 75970 }, { "epoch": 2.53, "grad_norm": 0.7001644372940063, "learning_rate": 3.597141962215223e-05, "loss": 1.6772, "step": 75971 }, { "epoch": 2.53, "grad_norm": 0.700310230255127, "learning_rate": 3.596645714743498e-05, "loss": 1.6623, "step": 75972 }, { "epoch": 2.53, "grad_norm": 0.6862393617630005, "learning_rate": 3.596149499321562e-05, "loss": 1.6105, "step": 75973 }, { "epoch": 2.53, "grad_norm": 0.667792797088623, "learning_rate": 3.595653315950012e-05, "loss": 1.6901, "step": 75974 }, { "epoch": 2.53, "grad_norm": 0.7055606842041016, "learning_rate": 3.5951571646294376e-05, "loss": 1.6758, "step": 75975 }, { "epoch": 2.53, "grad_norm": 0.7311684489250183, "learning_rate": 3.594661045360461e-05, "loss": 1.7075, "step": 75976 }, { "epoch": 2.53, "grad_norm": 0.6980858445167542, "learning_rate": 3.594164958143668e-05, "loss": 1.7145, "step": 75977 }, { "epoch": 2.53, "grad_norm": 0.6828089952468872, "learning_rate": 3.593668902979676e-05, "loss": 1.6624, "step": 75978 }, { "epoch": 2.53, "grad_norm": 0.6822060346603394, "learning_rate": 3.593172879869071e-05, "loss": 1.692, "step": 75979 }, { "epoch": 2.53, "grad_norm": 0.7019482254981995, "learning_rate": 3.592676888812471e-05, "loss": 1.725, "step": 75980 }, { "epoch": 2.53, "grad_norm": 0.7038303017616272, "learning_rate": 3.592180929810471e-05, "loss": 1.6381, "step": 75981 }, { "epoch": 2.53, "grad_norm": 0.7358447313308716, "learning_rate": 3.5916850028636624e-05, "loss": 1.7638, "step": 75982 }, { "epoch": 2.53, "grad_norm": 0.6911265850067139, "learning_rate": 3.591189107972665e-05, "loss": 1.6822, "step": 75983 }, { "epoch": 2.53, "grad_norm": 0.6997659802436829, "learning_rate": 3.5906932451380755e-05, "loss": 1.6664, "step": 75984 }, { "epoch": 2.53, "grad_norm": 0.6974737644195557, "learning_rate": 3.5901974143604826e-05, "loss": 1.6863, "step": 75985 }, { "epoch": 2.53, "grad_norm": 0.7014061808586121, "learning_rate": 3.5897016156405036e-05, "loss": 1.6868, "step": 75986 }, { "epoch": 2.53, "grad_norm": 0.6892325282096863, "learning_rate": 3.589205848978737e-05, "loss": 1.6299, "step": 75987 }, { "epoch": 2.53, "grad_norm": 0.7103951573371887, "learning_rate": 3.5887101143757856e-05, "loss": 1.709, "step": 75988 }, { "epoch": 2.53, "grad_norm": 0.7108666896820068, "learning_rate": 3.588214411832243e-05, "loss": 1.6663, "step": 75989 }, { "epoch": 2.53, "grad_norm": 0.703644871711731, "learning_rate": 3.587718741348722e-05, "loss": 1.6497, "step": 75990 }, { "epoch": 2.53, "grad_norm": 0.7012503147125244, "learning_rate": 3.587223102925818e-05, "loss": 1.644, "step": 75991 }, { "epoch": 2.53, "grad_norm": 0.7341436743736267, "learning_rate": 3.586727496564128e-05, "loss": 1.6494, "step": 75992 }, { "epoch": 2.53, "grad_norm": 0.685745358467102, "learning_rate": 3.586231922264258e-05, "loss": 1.7344, "step": 75993 }, { "epoch": 2.53, "grad_norm": 0.6764741539955139, "learning_rate": 3.585736380026824e-05, "loss": 1.7146, "step": 75994 }, { "epoch": 2.53, "grad_norm": 0.6936329007148743, "learning_rate": 3.5852408698524014e-05, "loss": 1.6397, "step": 75995 }, { "epoch": 2.53, "grad_norm": 0.7061207294464111, "learning_rate": 3.584745391741605e-05, "loss": 1.6847, "step": 75996 }, { "epoch": 2.53, "grad_norm": 0.704136312007904, "learning_rate": 3.5842499456950394e-05, "loss": 1.6956, "step": 75997 }, { "epoch": 2.53, "grad_norm": 0.6850975751876831, "learning_rate": 3.5837545317133045e-05, "loss": 1.6722, "step": 75998 }, { "epoch": 2.53, "grad_norm": 0.7266391515731812, "learning_rate": 3.583259149796993e-05, "loss": 1.751, "step": 75999 }, { "epoch": 2.53, "grad_norm": 0.7006875872612, "learning_rate": 3.582763799946712e-05, "loss": 1.6514, "step": 76000 }, { "epoch": 2.53, "grad_norm": 0.7164031267166138, "learning_rate": 3.582268482163076e-05, "loss": 1.8333, "step": 76001 }, { "epoch": 2.53, "grad_norm": 0.7092620134353638, "learning_rate": 3.5817731964466604e-05, "loss": 1.7025, "step": 76002 }, { "epoch": 2.53, "grad_norm": 0.6949296593666077, "learning_rate": 3.5812779427980784e-05, "loss": 1.6278, "step": 76003 }, { "epoch": 2.53, "grad_norm": 0.708511471748352, "learning_rate": 3.5807827212179416e-05, "loss": 1.7278, "step": 76004 }, { "epoch": 2.53, "grad_norm": 0.7085456252098083, "learning_rate": 3.5802875317068394e-05, "loss": 1.7048, "step": 76005 }, { "epoch": 2.53, "grad_norm": 0.7148491144180298, "learning_rate": 3.57979237426537e-05, "loss": 1.7128, "step": 76006 }, { "epoch": 2.53, "grad_norm": 0.677879273891449, "learning_rate": 3.5792972488941405e-05, "loss": 1.6441, "step": 76007 }, { "epoch": 2.53, "grad_norm": 0.6985787749290466, "learning_rate": 3.578802155593764e-05, "loss": 1.6673, "step": 76008 }, { "epoch": 2.53, "grad_norm": 0.7063246965408325, "learning_rate": 3.5783070943648166e-05, "loss": 1.7033, "step": 76009 }, { "epoch": 2.53, "grad_norm": 0.6951674222946167, "learning_rate": 3.57781206520791e-05, "loss": 1.6793, "step": 76010 }, { "epoch": 2.53, "grad_norm": 0.6924739480018616, "learning_rate": 3.577317068123652e-05, "loss": 1.6724, "step": 76011 }, { "epoch": 2.53, "grad_norm": 0.7406541705131531, "learning_rate": 3.576822103112641e-05, "loss": 1.7403, "step": 76012 }, { "epoch": 2.53, "grad_norm": 0.7307463884353638, "learning_rate": 3.576327170175467e-05, "loss": 1.7914, "step": 76013 }, { "epoch": 2.53, "grad_norm": 0.707318902015686, "learning_rate": 3.5758322693127426e-05, "loss": 1.6819, "step": 76014 }, { "epoch": 2.53, "grad_norm": 0.740132749080658, "learning_rate": 3.5753374005250647e-05, "loss": 1.7638, "step": 76015 }, { "epoch": 2.53, "grad_norm": 0.7228318452835083, "learning_rate": 3.574842563813028e-05, "loss": 1.6384, "step": 76016 }, { "epoch": 2.53, "grad_norm": 0.6872861981391907, "learning_rate": 3.5743477591772404e-05, "loss": 1.6622, "step": 76017 }, { "epoch": 2.53, "grad_norm": 0.7259820699691772, "learning_rate": 3.573852986618308e-05, "loss": 1.7518, "step": 76018 }, { "epoch": 2.53, "grad_norm": 0.716052234172821, "learning_rate": 3.573358246136822e-05, "loss": 1.702, "step": 76019 }, { "epoch": 2.53, "grad_norm": 0.7174452543258667, "learning_rate": 3.572863537733379e-05, "loss": 1.7039, "step": 76020 }, { "epoch": 2.53, "grad_norm": 0.7094334959983826, "learning_rate": 3.572368861408593e-05, "loss": 1.6935, "step": 76021 }, { "epoch": 2.53, "grad_norm": 0.6995834112167358, "learning_rate": 3.571874217163057e-05, "loss": 1.6681, "step": 76022 }, { "epoch": 2.53, "grad_norm": 0.6978920698165894, "learning_rate": 3.5713796049973656e-05, "loss": 1.6886, "step": 76023 }, { "epoch": 2.53, "grad_norm": 0.6893592476844788, "learning_rate": 3.5708850249121324e-05, "loss": 1.6707, "step": 76024 }, { "epoch": 2.53, "grad_norm": 0.7074258327484131, "learning_rate": 3.570390476907944e-05, "loss": 1.7324, "step": 76025 }, { "epoch": 2.53, "grad_norm": 0.7175812721252441, "learning_rate": 3.5698959609854126e-05, "loss": 1.6534, "step": 76026 }, { "epoch": 2.53, "grad_norm": 0.7079910039901733, "learning_rate": 3.569401477145128e-05, "loss": 1.6564, "step": 76027 }, { "epoch": 2.53, "grad_norm": 0.6833094358444214, "learning_rate": 3.5689070253877006e-05, "loss": 1.6862, "step": 76028 }, { "epoch": 2.53, "grad_norm": 0.7089426517486572, "learning_rate": 3.5684126057137255e-05, "loss": 1.7303, "step": 76029 }, { "epoch": 2.53, "grad_norm": 0.6978931427001953, "learning_rate": 3.5679182181237986e-05, "loss": 1.643, "step": 76030 }, { "epoch": 2.53, "grad_norm": 0.7211523652076721, "learning_rate": 3.5674238626185304e-05, "loss": 1.6744, "step": 76031 }, { "epoch": 2.53, "grad_norm": 0.6927382946014404, "learning_rate": 3.566929539198506e-05, "loss": 1.7143, "step": 76032 }, { "epoch": 2.53, "grad_norm": 0.6778735518455505, "learning_rate": 3.566435247864343e-05, "loss": 1.6734, "step": 76033 }, { "epoch": 2.53, "grad_norm": 0.6866791248321533, "learning_rate": 3.5659409886166334e-05, "loss": 1.6445, "step": 76034 }, { "epoch": 2.53, "grad_norm": 0.7066919803619385, "learning_rate": 3.56544676145597e-05, "loss": 1.6892, "step": 76035 }, { "epoch": 2.53, "grad_norm": 0.6720861792564392, "learning_rate": 3.564952566382963e-05, "loss": 1.6613, "step": 76036 }, { "epoch": 2.53, "grad_norm": 0.6937066912651062, "learning_rate": 3.564458403398204e-05, "loss": 1.6396, "step": 76037 }, { "epoch": 2.53, "grad_norm": 0.7069404721260071, "learning_rate": 3.563964272502303e-05, "loss": 1.653, "step": 76038 }, { "epoch": 2.53, "grad_norm": 0.7386659979820251, "learning_rate": 3.5634701736958504e-05, "loss": 1.7167, "step": 76039 }, { "epoch": 2.53, "grad_norm": 0.7177641987800598, "learning_rate": 3.562976106979455e-05, "loss": 1.747, "step": 76040 }, { "epoch": 2.53, "grad_norm": 0.7157559394836426, "learning_rate": 3.5624820723537095e-05, "loss": 1.7225, "step": 76041 }, { "epoch": 2.53, "grad_norm": 0.697020947933197, "learning_rate": 3.561988069819211e-05, "loss": 1.737, "step": 76042 }, { "epoch": 2.53, "grad_norm": 0.703123927116394, "learning_rate": 3.5614940993765704e-05, "loss": 1.6716, "step": 76043 }, { "epoch": 2.53, "grad_norm": 0.6983638405799866, "learning_rate": 3.5610001610263795e-05, "loss": 1.7037, "step": 76044 }, { "epoch": 2.53, "grad_norm": 0.7100325226783752, "learning_rate": 3.5605062547692295e-05, "loss": 1.7457, "step": 76045 }, { "epoch": 2.53, "grad_norm": 0.7184907793998718, "learning_rate": 3.560012380605731e-05, "loss": 1.688, "step": 76046 }, { "epoch": 2.53, "grad_norm": 0.67752605676651, "learning_rate": 3.55951853853649e-05, "loss": 1.644, "step": 76047 }, { "epoch": 2.53, "grad_norm": 0.6932867765426636, "learning_rate": 3.5590247285620986e-05, "loss": 1.7237, "step": 76048 }, { "epoch": 2.53, "grad_norm": 0.6995185613632202, "learning_rate": 3.558530950683147e-05, "loss": 1.6451, "step": 76049 }, { "epoch": 2.53, "grad_norm": 0.7118621468544006, "learning_rate": 3.558037204900248e-05, "loss": 1.6848, "step": 76050 }, { "epoch": 2.53, "grad_norm": 0.6932445764541626, "learning_rate": 3.557543491213998e-05, "loss": 1.6706, "step": 76051 }, { "epoch": 2.53, "grad_norm": 0.6999865770339966, "learning_rate": 3.5570498096249854e-05, "loss": 1.6193, "step": 76052 }, { "epoch": 2.53, "grad_norm": 0.6742441058158875, "learning_rate": 3.556556160133821e-05, "loss": 1.6643, "step": 76053 }, { "epoch": 2.53, "grad_norm": 0.7202871441841125, "learning_rate": 3.556062542741104e-05, "loss": 1.7054, "step": 76054 }, { "epoch": 2.53, "grad_norm": 0.6741474866867065, "learning_rate": 3.555568957447433e-05, "loss": 1.6211, "step": 76055 }, { "epoch": 2.53, "grad_norm": 0.6995243430137634, "learning_rate": 3.555075404253399e-05, "loss": 1.6872, "step": 76056 }, { "epoch": 2.53, "grad_norm": 0.688260018825531, "learning_rate": 3.554581883159611e-05, "loss": 1.6402, "step": 76057 }, { "epoch": 2.53, "grad_norm": 0.6836007833480835, "learning_rate": 3.554088394166668e-05, "loss": 1.6124, "step": 76058 }, { "epoch": 2.53, "grad_norm": 0.6863284707069397, "learning_rate": 3.553594937275156e-05, "loss": 1.6856, "step": 76059 }, { "epoch": 2.53, "grad_norm": 0.6838855743408203, "learning_rate": 3.553101512485682e-05, "loss": 1.7131, "step": 76060 }, { "epoch": 2.53, "grad_norm": 0.7083916068077087, "learning_rate": 3.552608119798855e-05, "loss": 1.6216, "step": 76061 }, { "epoch": 2.53, "grad_norm": 0.7096362113952637, "learning_rate": 3.552114759215262e-05, "loss": 1.6667, "step": 76062 }, { "epoch": 2.53, "grad_norm": 0.7186000347137451, "learning_rate": 3.5516214307355014e-05, "loss": 1.6647, "step": 76063 }, { "epoch": 2.53, "grad_norm": 0.7077176570892334, "learning_rate": 3.55112813436018e-05, "loss": 1.6824, "step": 76064 }, { "epoch": 2.53, "grad_norm": 0.6917694211006165, "learning_rate": 3.5506348700898914e-05, "loss": 1.7085, "step": 76065 }, { "epoch": 2.53, "grad_norm": 0.7281551361083984, "learning_rate": 3.55014163792523e-05, "loss": 1.7264, "step": 76066 }, { "epoch": 2.53, "grad_norm": 0.7237188816070557, "learning_rate": 3.5496484378668e-05, "loss": 1.7722, "step": 76067 }, { "epoch": 2.53, "grad_norm": 0.7186540365219116, "learning_rate": 3.549155269915204e-05, "loss": 1.6212, "step": 76068 }, { "epoch": 2.53, "grad_norm": 0.7032173871994019, "learning_rate": 3.5486621340710374e-05, "loss": 1.699, "step": 76069 }, { "epoch": 2.53, "grad_norm": 0.6848977208137512, "learning_rate": 3.548169030334891e-05, "loss": 1.6599, "step": 76070 }, { "epoch": 2.53, "grad_norm": 0.7149533033370972, "learning_rate": 3.5476759587073765e-05, "loss": 1.6969, "step": 76071 }, { "epoch": 2.53, "grad_norm": 0.7010923624038696, "learning_rate": 3.547182919189084e-05, "loss": 1.6594, "step": 76072 }, { "epoch": 2.53, "grad_norm": 0.7176862359046936, "learning_rate": 3.54668991178061e-05, "loss": 1.6793, "step": 76073 }, { "epoch": 2.53, "grad_norm": 0.6888599991798401, "learning_rate": 3.5461969364825604e-05, "loss": 1.651, "step": 76074 }, { "epoch": 2.53, "grad_norm": 0.6877180933952332, "learning_rate": 3.545703993295525e-05, "loss": 1.7165, "step": 76075 }, { "epoch": 2.53, "grad_norm": 0.7016108632087708, "learning_rate": 3.545211082220115e-05, "loss": 1.6562, "step": 76076 }, { "epoch": 2.53, "grad_norm": 0.7088569402694702, "learning_rate": 3.544718203256912e-05, "loss": 1.6938, "step": 76077 }, { "epoch": 2.53, "grad_norm": 0.6775152683258057, "learning_rate": 3.544225356406532e-05, "loss": 1.6765, "step": 76078 }, { "epoch": 2.53, "grad_norm": 0.6824116706848145, "learning_rate": 3.543732541669563e-05, "loss": 1.7562, "step": 76079 }, { "epoch": 2.53, "grad_norm": 0.7185947299003601, "learning_rate": 3.543239759046596e-05, "loss": 1.6755, "step": 76080 }, { "epoch": 2.53, "grad_norm": 0.7020115852355957, "learning_rate": 3.5427470085382456e-05, "loss": 1.7247, "step": 76081 }, { "epoch": 2.53, "grad_norm": 0.7161325216293335, "learning_rate": 3.542254290145097e-05, "loss": 1.7576, "step": 76082 }, { "epoch": 2.53, "grad_norm": 0.7144200205802917, "learning_rate": 3.541761603867758e-05, "loss": 1.611, "step": 76083 }, { "epoch": 2.53, "grad_norm": 0.6864411234855652, "learning_rate": 3.5412689497068245e-05, "loss": 1.699, "step": 76084 }, { "epoch": 2.53, "grad_norm": 0.6904928684234619, "learning_rate": 3.540776327662884e-05, "loss": 1.6881, "step": 76085 }, { "epoch": 2.53, "grad_norm": 0.7440876960754395, "learning_rate": 3.540283737736549e-05, "loss": 1.7019, "step": 76086 }, { "epoch": 2.53, "grad_norm": 0.7049433588981628, "learning_rate": 3.539791179928402e-05, "loss": 1.7049, "step": 76087 }, { "epoch": 2.53, "grad_norm": 0.6963420510292053, "learning_rate": 3.539298654239059e-05, "loss": 1.7021, "step": 76088 }, { "epoch": 2.53, "grad_norm": 0.6997654438018799, "learning_rate": 3.538806160669103e-05, "loss": 1.7359, "step": 76089 }, { "epoch": 2.53, "grad_norm": 0.6898766756057739, "learning_rate": 3.5383136992191404e-05, "loss": 1.6841, "step": 76090 }, { "epoch": 2.53, "grad_norm": 0.716030478477478, "learning_rate": 3.537821269889768e-05, "loss": 1.7132, "step": 76091 }, { "epoch": 2.53, "grad_norm": 0.691803514957428, "learning_rate": 3.537328872681577e-05, "loss": 1.7124, "step": 76092 }, { "epoch": 2.53, "grad_norm": 0.7041992545127869, "learning_rate": 3.536836507595175e-05, "loss": 1.6191, "step": 76093 }, { "epoch": 2.53, "grad_norm": 0.7258151173591614, "learning_rate": 3.5363441746311446e-05, "loss": 1.75, "step": 76094 }, { "epoch": 2.53, "grad_norm": 0.6967079043388367, "learning_rate": 3.535851873790105e-05, "loss": 1.664, "step": 76095 }, { "epoch": 2.53, "grad_norm": 0.694731593132019, "learning_rate": 3.535359605072633e-05, "loss": 1.685, "step": 76096 }, { "epoch": 2.53, "grad_norm": 0.7062211036682129, "learning_rate": 3.5348673684793404e-05, "loss": 1.6394, "step": 76097 }, { "epoch": 2.53, "grad_norm": 0.715935230255127, "learning_rate": 3.534375164010821e-05, "loss": 1.6668, "step": 76098 }, { "epoch": 2.53, "grad_norm": 0.7207938432693481, "learning_rate": 3.53388299166766e-05, "loss": 1.6846, "step": 76099 }, { "epoch": 2.53, "grad_norm": 0.7047067880630493, "learning_rate": 3.5333908514504786e-05, "loss": 1.7024, "step": 76100 }, { "epoch": 2.53, "grad_norm": 0.688586950302124, "learning_rate": 3.532898743359854e-05, "loss": 1.7066, "step": 76101 }, { "epoch": 2.53, "grad_norm": 0.693253755569458, "learning_rate": 3.532406667396391e-05, "loss": 1.6537, "step": 76102 }, { "epoch": 2.53, "grad_norm": 0.6955944299697876, "learning_rate": 3.531914623560682e-05, "loss": 1.6836, "step": 76103 }, { "epoch": 2.53, "grad_norm": 0.7010025978088379, "learning_rate": 3.531422611853336e-05, "loss": 1.744, "step": 76104 }, { "epoch": 2.53, "grad_norm": 0.6815822720527649, "learning_rate": 3.530930632274943e-05, "loss": 1.7482, "step": 76105 }, { "epoch": 2.53, "grad_norm": 0.7106925845146179, "learning_rate": 3.530438684826096e-05, "loss": 1.7774, "step": 76106 }, { "epoch": 2.53, "grad_norm": 0.718722939491272, "learning_rate": 3.5299467695074e-05, "loss": 1.7096, "step": 76107 }, { "epoch": 2.53, "grad_norm": 0.6808359622955322, "learning_rate": 3.5294548863194495e-05, "loss": 1.6524, "step": 76108 }, { "epoch": 2.53, "grad_norm": 0.6911569833755493, "learning_rate": 3.528963035262833e-05, "loss": 1.6744, "step": 76109 }, { "epoch": 2.53, "grad_norm": 0.6907575726509094, "learning_rate": 3.528471216338157e-05, "loss": 1.6, "step": 76110 }, { "epoch": 2.53, "grad_norm": 0.7120458483695984, "learning_rate": 3.5279794295460275e-05, "loss": 1.6908, "step": 76111 }, { "epoch": 2.53, "grad_norm": 0.692589282989502, "learning_rate": 3.527487674887021e-05, "loss": 1.7441, "step": 76112 }, { "epoch": 2.53, "grad_norm": 0.7220836877822876, "learning_rate": 3.52699595236174e-05, "loss": 1.6096, "step": 76113 }, { "epoch": 2.53, "grad_norm": 0.6905156373977661, "learning_rate": 3.526504261970794e-05, "loss": 1.6162, "step": 76114 }, { "epoch": 2.53, "grad_norm": 0.6964210271835327, "learning_rate": 3.5260126037147696e-05, "loss": 1.686, "step": 76115 }, { "epoch": 2.53, "grad_norm": 0.6927803158760071, "learning_rate": 3.5255209775942596e-05, "loss": 1.7014, "step": 76116 }, { "epoch": 2.53, "grad_norm": 0.6906580328941345, "learning_rate": 3.5250293836098664e-05, "loss": 1.7182, "step": 76117 }, { "epoch": 2.53, "grad_norm": 0.6841949820518494, "learning_rate": 3.5245378217622e-05, "loss": 1.7098, "step": 76118 }, { "epoch": 2.53, "grad_norm": 0.6857709884643555, "learning_rate": 3.52404629205183e-05, "loss": 1.6412, "step": 76119 }, { "epoch": 2.53, "grad_norm": 0.6969501376152039, "learning_rate": 3.5235547944793655e-05, "loss": 1.663, "step": 76120 }, { "epoch": 2.53, "grad_norm": 0.6887477040290833, "learning_rate": 3.523063329045412e-05, "loss": 1.6944, "step": 76121 }, { "epoch": 2.53, "grad_norm": 0.7190439701080322, "learning_rate": 3.522571895750561e-05, "loss": 1.7273, "step": 76122 }, { "epoch": 2.53, "grad_norm": 0.6820632219314575, "learning_rate": 3.522080494595397e-05, "loss": 1.6286, "step": 76123 }, { "epoch": 2.53, "grad_norm": 0.6859171986579895, "learning_rate": 3.5215891255805276e-05, "loss": 1.6828, "step": 76124 }, { "epoch": 2.53, "grad_norm": 0.7138225436210632, "learning_rate": 3.521097788706557e-05, "loss": 1.6717, "step": 76125 }, { "epoch": 2.53, "grad_norm": 0.6822654008865356, "learning_rate": 3.5206064839740634e-05, "loss": 1.7219, "step": 76126 }, { "epoch": 2.53, "grad_norm": 0.709900975227356, "learning_rate": 3.5201152113836484e-05, "loss": 1.6701, "step": 76127 }, { "epoch": 2.53, "grad_norm": 0.713857889175415, "learning_rate": 3.519623970935922e-05, "loss": 1.7389, "step": 76128 }, { "epoch": 2.53, "grad_norm": 0.7191534042358398, "learning_rate": 3.519132762631467e-05, "loss": 1.6904, "step": 76129 }, { "epoch": 2.53, "grad_norm": 0.682380735874176, "learning_rate": 3.51864158647088e-05, "loss": 1.6395, "step": 76130 }, { "epoch": 2.53, "grad_norm": 0.7254629731178284, "learning_rate": 3.5181504424547624e-05, "loss": 1.7689, "step": 76131 }, { "epoch": 2.53, "grad_norm": 0.6997472643852234, "learning_rate": 3.517659330583712e-05, "loss": 1.687, "step": 76132 }, { "epoch": 2.53, "grad_norm": 0.7103858590126038, "learning_rate": 3.5171682508583144e-05, "loss": 1.6772, "step": 76133 }, { "epoch": 2.53, "grad_norm": 0.7200331091880798, "learning_rate": 3.516677203279172e-05, "loss": 1.7203, "step": 76134 }, { "epoch": 2.53, "grad_norm": 0.7154695987701416, "learning_rate": 3.5161861878468853e-05, "loss": 1.7186, "step": 76135 }, { "epoch": 2.53, "grad_norm": 0.6826067566871643, "learning_rate": 3.51569520456205e-05, "loss": 1.6892, "step": 76136 }, { "epoch": 2.53, "grad_norm": 0.7318200469017029, "learning_rate": 3.515204253425252e-05, "loss": 1.7166, "step": 76137 }, { "epoch": 2.53, "grad_norm": 0.6684479117393494, "learning_rate": 3.514713334437098e-05, "loss": 1.6664, "step": 76138 }, { "epoch": 2.53, "grad_norm": 0.6890384554862976, "learning_rate": 3.5142224475981806e-05, "loss": 1.6738, "step": 76139 }, { "epoch": 2.53, "grad_norm": 0.693598747253418, "learning_rate": 3.513731592909086e-05, "loss": 1.6687, "step": 76140 }, { "epoch": 2.53, "grad_norm": 0.6880316138267517, "learning_rate": 3.513240770370427e-05, "loss": 1.6982, "step": 76141 }, { "epoch": 2.53, "grad_norm": 0.693989098072052, "learning_rate": 3.5127499799827865e-05, "loss": 1.6572, "step": 76142 }, { "epoch": 2.53, "grad_norm": 0.6822962164878845, "learning_rate": 3.512259221746767e-05, "loss": 1.6152, "step": 76143 }, { "epoch": 2.53, "grad_norm": 0.7043100595474243, "learning_rate": 3.5117684956629585e-05, "loss": 1.7465, "step": 76144 }, { "epoch": 2.53, "grad_norm": 0.7047264575958252, "learning_rate": 3.5112778017319676e-05, "loss": 1.7499, "step": 76145 }, { "epoch": 2.53, "grad_norm": 0.7343502044677734, "learning_rate": 3.51078713995438e-05, "loss": 1.7432, "step": 76146 }, { "epoch": 2.53, "grad_norm": 0.6910828351974487, "learning_rate": 3.5102965103307875e-05, "loss": 1.7279, "step": 76147 }, { "epoch": 2.53, "grad_norm": 0.6824690699577332, "learning_rate": 3.5098059128617985e-05, "loss": 1.646, "step": 76148 }, { "epoch": 2.53, "grad_norm": 0.7077721357345581, "learning_rate": 3.5093153475479976e-05, "loss": 1.6717, "step": 76149 }, { "epoch": 2.53, "grad_norm": 0.6786544919013977, "learning_rate": 3.508824814389991e-05, "loss": 1.6844, "step": 76150 }, { "epoch": 2.53, "grad_norm": 0.6910030245780945, "learning_rate": 3.5083343133883655e-05, "loss": 1.6706, "step": 76151 }, { "epoch": 2.53, "grad_norm": 0.6985379457473755, "learning_rate": 3.507843844543714e-05, "loss": 1.656, "step": 76152 }, { "epoch": 2.53, "grad_norm": 0.6830655932426453, "learning_rate": 3.507353407856642e-05, "loss": 1.6949, "step": 76153 }, { "epoch": 2.53, "grad_norm": 0.6930894255638123, "learning_rate": 3.5068630033277336e-05, "loss": 1.6444, "step": 76154 }, { "epoch": 2.53, "grad_norm": 0.6857414245605469, "learning_rate": 3.5063726309575965e-05, "loss": 1.7605, "step": 76155 }, { "epoch": 2.53, "grad_norm": 0.6759445071220398, "learning_rate": 3.505882290746812e-05, "loss": 1.6934, "step": 76156 }, { "epoch": 2.53, "grad_norm": 0.7040100693702698, "learning_rate": 3.5053919826959884e-05, "loss": 1.7507, "step": 76157 }, { "epoch": 2.53, "grad_norm": 0.7025418281555176, "learning_rate": 3.504901706805716e-05, "loss": 1.63, "step": 76158 }, { "epoch": 2.53, "grad_norm": 0.712717592716217, "learning_rate": 3.504411463076584e-05, "loss": 1.6662, "step": 76159 }, { "epoch": 2.53, "grad_norm": 0.6774390935897827, "learning_rate": 3.5039212515091985e-05, "loss": 1.6478, "step": 76160 }, { "epoch": 2.53, "grad_norm": 0.6982165575027466, "learning_rate": 3.503431072104149e-05, "loss": 1.626, "step": 76161 }, { "epoch": 2.53, "grad_norm": 0.6993265151977539, "learning_rate": 3.502940924862022e-05, "loss": 1.6361, "step": 76162 }, { "epoch": 2.53, "grad_norm": 0.6926862001419067, "learning_rate": 3.5024508097834235e-05, "loss": 1.6488, "step": 76163 }, { "epoch": 2.53, "grad_norm": 0.6998563408851624, "learning_rate": 3.5019607268689495e-05, "loss": 1.7136, "step": 76164 }, { "epoch": 2.53, "grad_norm": 0.7189217209815979, "learning_rate": 3.50147067611919e-05, "loss": 1.7121, "step": 76165 }, { "epoch": 2.53, "grad_norm": 0.7104222178459167, "learning_rate": 3.500980657534734e-05, "loss": 1.6722, "step": 76166 }, { "epoch": 2.53, "grad_norm": 0.6983392834663391, "learning_rate": 3.5004906711161914e-05, "loss": 1.679, "step": 76167 }, { "epoch": 2.53, "grad_norm": 0.6839597225189209, "learning_rate": 3.5000007168641484e-05, "loss": 1.6939, "step": 76168 }, { "epoch": 2.53, "grad_norm": 0.6893610954284668, "learning_rate": 3.499510794779195e-05, "loss": 1.7727, "step": 76169 }, { "epoch": 2.53, "grad_norm": 0.6743922233581543, "learning_rate": 3.499020904861929e-05, "loss": 1.6692, "step": 76170 }, { "epoch": 2.53, "grad_norm": 0.7105268836021423, "learning_rate": 3.4985310471129516e-05, "loss": 1.6037, "step": 76171 }, { "epoch": 2.53, "grad_norm": 0.7050842642784119, "learning_rate": 3.498041221532855e-05, "loss": 1.6855, "step": 76172 }, { "epoch": 2.53, "grad_norm": 0.68918377161026, "learning_rate": 3.497551428122225e-05, "loss": 1.7039, "step": 76173 }, { "epoch": 2.53, "grad_norm": 0.6744189858436584, "learning_rate": 3.4970616668816684e-05, "loss": 1.7282, "step": 76174 }, { "epoch": 2.53, "grad_norm": 0.6789798140525818, "learning_rate": 3.496571937811775e-05, "loss": 1.6214, "step": 76175 }, { "epoch": 2.53, "grad_norm": 0.6962394714355469, "learning_rate": 3.4960822409131306e-05, "loss": 1.7261, "step": 76176 }, { "epoch": 2.53, "grad_norm": 0.6987881660461426, "learning_rate": 3.495592576186338e-05, "loss": 1.6864, "step": 76177 }, { "epoch": 2.53, "grad_norm": 0.7092286348342896, "learning_rate": 3.4951029436319963e-05, "loss": 1.6901, "step": 76178 }, { "epoch": 2.53, "grad_norm": 0.7024349570274353, "learning_rate": 3.4946133432506926e-05, "loss": 1.6754, "step": 76179 }, { "epoch": 2.53, "grad_norm": 0.6968406438827515, "learning_rate": 3.494123775043019e-05, "loss": 1.6657, "step": 76180 }, { "epoch": 2.53, "grad_norm": 0.6912246942520142, "learning_rate": 3.493634239009583e-05, "loss": 1.6651, "step": 76181 }, { "epoch": 2.53, "grad_norm": 0.7181226015090942, "learning_rate": 3.493144735150966e-05, "loss": 1.699, "step": 76182 }, { "epoch": 2.53, "grad_norm": 0.7038043141365051, "learning_rate": 3.492655263467757e-05, "loss": 1.6808, "step": 76183 }, { "epoch": 2.53, "grad_norm": 0.6948886513710022, "learning_rate": 3.492165823960561e-05, "loss": 1.7128, "step": 76184 }, { "epoch": 2.53, "grad_norm": 0.7023069262504578, "learning_rate": 3.4916764166299794e-05, "loss": 1.6956, "step": 76185 }, { "epoch": 2.53, "grad_norm": 0.6928991079330444, "learning_rate": 3.491187041476592e-05, "loss": 1.6853, "step": 76186 }, { "epoch": 2.53, "grad_norm": 0.6781445741653442, "learning_rate": 3.490697698500995e-05, "loss": 1.6867, "step": 76187 }, { "epoch": 2.53, "grad_norm": 0.7021181583404541, "learning_rate": 3.4902083877037876e-05, "loss": 1.6484, "step": 76188 }, { "epoch": 2.53, "grad_norm": 0.7070866227149963, "learning_rate": 3.489719109085564e-05, "loss": 1.5538, "step": 76189 }, { "epoch": 2.53, "grad_norm": 0.7013108134269714, "learning_rate": 3.4892298626469084e-05, "loss": 1.701, "step": 76190 }, { "epoch": 2.53, "grad_norm": 0.7223266959190369, "learning_rate": 3.4887406483884285e-05, "loss": 1.8306, "step": 76191 }, { "epoch": 2.53, "grad_norm": 0.7266687750816345, "learning_rate": 3.488251466310703e-05, "loss": 1.6436, "step": 76192 }, { "epoch": 2.53, "grad_norm": 0.7013911008834839, "learning_rate": 3.487762316414342e-05, "loss": 1.6925, "step": 76193 }, { "epoch": 2.53, "grad_norm": 0.7143025398254395, "learning_rate": 3.487273198699925e-05, "loss": 1.6164, "step": 76194 }, { "epoch": 2.54, "grad_norm": 0.6838660836219788, "learning_rate": 3.486784113168057e-05, "loss": 1.668, "step": 76195 }, { "epoch": 2.54, "grad_norm": 0.6892611980438232, "learning_rate": 3.486295059819326e-05, "loss": 1.6565, "step": 76196 }, { "epoch": 2.54, "grad_norm": 0.7040502429008484, "learning_rate": 3.485806038654321e-05, "loss": 1.6724, "step": 76197 }, { "epoch": 2.54, "grad_norm": 0.7007889747619629, "learning_rate": 3.485317049673647e-05, "loss": 1.632, "step": 76198 }, { "epoch": 2.54, "grad_norm": 0.7152493000030518, "learning_rate": 3.484828092877885e-05, "loss": 1.7753, "step": 76199 }, { "epoch": 2.54, "grad_norm": 0.706145703792572, "learning_rate": 3.48433916826764e-05, "loss": 1.6433, "step": 76200 }, { "epoch": 2.54, "grad_norm": 0.6948190331459045, "learning_rate": 3.483850275843503e-05, "loss": 1.6996, "step": 76201 }, { "epoch": 2.54, "grad_norm": 0.6885403394699097, "learning_rate": 3.483361415606055e-05, "loss": 1.5925, "step": 76202 }, { "epoch": 2.54, "grad_norm": 0.7011451125144958, "learning_rate": 3.4828725875559075e-05, "loss": 1.6527, "step": 76203 }, { "epoch": 2.54, "grad_norm": 0.7041548490524292, "learning_rate": 3.482383791693641e-05, "loss": 1.7131, "step": 76204 }, { "epoch": 2.54, "grad_norm": 0.7316206693649292, "learning_rate": 3.481895028019858e-05, "loss": 1.6587, "step": 76205 }, { "epoch": 2.54, "grad_norm": 0.7164016366004944, "learning_rate": 3.481406296535139e-05, "loss": 1.7278, "step": 76206 }, { "epoch": 2.54, "grad_norm": 0.7525011301040649, "learning_rate": 3.4809175972400935e-05, "loss": 1.7544, "step": 76207 }, { "epoch": 2.54, "grad_norm": 0.7055211663246155, "learning_rate": 3.4804289301353064e-05, "loss": 1.7868, "step": 76208 }, { "epoch": 2.54, "grad_norm": 0.684647798538208, "learning_rate": 3.479940295221365e-05, "loss": 1.6286, "step": 76209 }, { "epoch": 2.54, "grad_norm": 0.6907094120979309, "learning_rate": 3.479451692498877e-05, "loss": 1.7351, "step": 76210 }, { "epoch": 2.54, "grad_norm": 0.7035185098648071, "learning_rate": 3.4789631219684244e-05, "loss": 1.7468, "step": 76211 }, { "epoch": 2.54, "grad_norm": 0.7376497983932495, "learning_rate": 3.478474583630597e-05, "loss": 1.7451, "step": 76212 }, { "epoch": 2.54, "grad_norm": 0.6841458082199097, "learning_rate": 3.4779860774859946e-05, "loss": 1.6843, "step": 76213 }, { "epoch": 2.54, "grad_norm": 0.7122568488121033, "learning_rate": 3.477497603535213e-05, "loss": 1.6887, "step": 76214 }, { "epoch": 2.54, "grad_norm": 0.6985936760902405, "learning_rate": 3.477009161778842e-05, "loss": 1.6233, "step": 76215 }, { "epoch": 2.54, "grad_norm": 0.7177222371101379, "learning_rate": 3.476520752217471e-05, "loss": 1.6528, "step": 76216 }, { "epoch": 2.54, "grad_norm": 0.7275938987731934, "learning_rate": 3.4760323748516994e-05, "loss": 1.7403, "step": 76217 }, { "epoch": 2.54, "grad_norm": 0.754075825214386, "learning_rate": 3.475544029682117e-05, "loss": 1.6635, "step": 76218 }, { "epoch": 2.54, "grad_norm": 0.6955925226211548, "learning_rate": 3.475055716709309e-05, "loss": 1.6511, "step": 76219 }, { "epoch": 2.54, "grad_norm": 0.6838041543960571, "learning_rate": 3.474567435933877e-05, "loss": 1.7329, "step": 76220 }, { "epoch": 2.54, "grad_norm": 0.7018616199493408, "learning_rate": 3.4740791873564154e-05, "loss": 1.6161, "step": 76221 }, { "epoch": 2.54, "grad_norm": 0.7254153490066528, "learning_rate": 3.4735909709775146e-05, "loss": 1.812, "step": 76222 }, { "epoch": 2.54, "grad_norm": 0.7087520360946655, "learning_rate": 3.47310278679776e-05, "loss": 1.7033, "step": 76223 }, { "epoch": 2.54, "grad_norm": 0.7019669413566589, "learning_rate": 3.4726146348177554e-05, "loss": 1.7203, "step": 76224 }, { "epoch": 2.54, "grad_norm": 0.7254434823989868, "learning_rate": 3.472126515038086e-05, "loss": 1.6587, "step": 76225 }, { "epoch": 2.54, "grad_norm": 0.7175981402397156, "learning_rate": 3.471638427459345e-05, "loss": 1.714, "step": 76226 }, { "epoch": 2.54, "grad_norm": 0.6754696369171143, "learning_rate": 3.471150372082123e-05, "loss": 1.6413, "step": 76227 }, { "epoch": 2.54, "grad_norm": 0.6914401054382324, "learning_rate": 3.4706623489070285e-05, "loss": 1.6205, "step": 76228 }, { "epoch": 2.54, "grad_norm": 0.7171329259872437, "learning_rate": 3.4701743579346275e-05, "loss": 1.7453, "step": 76229 }, { "epoch": 2.54, "grad_norm": 0.7140874266624451, "learning_rate": 3.4696863991655267e-05, "loss": 1.7104, "step": 76230 }, { "epoch": 2.54, "grad_norm": 0.7039793133735657, "learning_rate": 3.469198472600325e-05, "loss": 1.745, "step": 76231 }, { "epoch": 2.54, "grad_norm": 0.6998834013938904, "learning_rate": 3.468710578239606e-05, "loss": 1.6782, "step": 76232 }, { "epoch": 2.54, "grad_norm": 0.712518036365509, "learning_rate": 3.4682227160839595e-05, "loss": 1.6018, "step": 76233 }, { "epoch": 2.54, "grad_norm": 0.6969226598739624, "learning_rate": 3.467734886133977e-05, "loss": 1.6159, "step": 76234 }, { "epoch": 2.54, "grad_norm": 0.6926059722900391, "learning_rate": 3.4672470883902694e-05, "loss": 1.7385, "step": 76235 }, { "epoch": 2.54, "grad_norm": 0.6863195896148682, "learning_rate": 3.4667593228534016e-05, "loss": 1.6242, "step": 76236 }, { "epoch": 2.54, "grad_norm": 0.6997918486595154, "learning_rate": 3.466271589523981e-05, "loss": 1.6818, "step": 76237 }, { "epoch": 2.54, "grad_norm": 0.7231686115264893, "learning_rate": 3.4657838884026e-05, "loss": 1.7613, "step": 76238 }, { "epoch": 2.54, "grad_norm": 0.7052441835403442, "learning_rate": 3.465296219489852e-05, "loss": 1.655, "step": 76239 }, { "epoch": 2.54, "grad_norm": 0.6714650392532349, "learning_rate": 3.4648085827863145e-05, "loss": 1.6638, "step": 76240 }, { "epoch": 2.54, "grad_norm": 0.7118442058563232, "learning_rate": 3.4643209782925894e-05, "loss": 1.7271, "step": 76241 }, { "epoch": 2.54, "grad_norm": 0.6766343712806702, "learning_rate": 3.463833406009282e-05, "loss": 1.7072, "step": 76242 }, { "epoch": 2.54, "grad_norm": 0.6910750865936279, "learning_rate": 3.463345865936962e-05, "loss": 1.6826, "step": 76243 }, { "epoch": 2.54, "grad_norm": 0.7067521214485168, "learning_rate": 3.462858358076225e-05, "loss": 1.6633, "step": 76244 }, { "epoch": 2.54, "grad_norm": 0.6976550817489624, "learning_rate": 3.462370882427678e-05, "loss": 1.7246, "step": 76245 }, { "epoch": 2.54, "grad_norm": 0.7186559438705444, "learning_rate": 3.4618834389918994e-05, "loss": 1.6854, "step": 76246 }, { "epoch": 2.54, "grad_norm": 0.72333163022995, "learning_rate": 3.46139602776948e-05, "loss": 1.6619, "step": 76247 }, { "epoch": 2.54, "grad_norm": 0.7112841606140137, "learning_rate": 3.460908648761022e-05, "loss": 1.6368, "step": 76248 }, { "epoch": 2.54, "grad_norm": 0.7362549304962158, "learning_rate": 3.460421301967108e-05, "loss": 1.5424, "step": 76249 }, { "epoch": 2.54, "grad_norm": 0.6986813545227051, "learning_rate": 3.459933987388328e-05, "loss": 1.6382, "step": 76250 }, { "epoch": 2.54, "grad_norm": 0.6970816850662231, "learning_rate": 3.459446705025278e-05, "loss": 1.6347, "step": 76251 }, { "epoch": 2.54, "grad_norm": 0.6934088468551636, "learning_rate": 3.458959454878555e-05, "loss": 1.6844, "step": 76252 }, { "epoch": 2.54, "grad_norm": 0.7028312683105469, "learning_rate": 3.458472236948744e-05, "loss": 1.6495, "step": 76253 }, { "epoch": 2.54, "grad_norm": 0.7019672989845276, "learning_rate": 3.457985051236428e-05, "loss": 1.6679, "step": 76254 }, { "epoch": 2.54, "grad_norm": 0.6944471001625061, "learning_rate": 3.457497897742218e-05, "loss": 1.7624, "step": 76255 }, { "epoch": 2.54, "grad_norm": 0.7054774165153503, "learning_rate": 3.4570107764666944e-05, "loss": 1.6028, "step": 76256 }, { "epoch": 2.54, "grad_norm": 0.6954929232597351, "learning_rate": 3.4565236874104416e-05, "loss": 1.6461, "step": 76257 }, { "epoch": 2.54, "grad_norm": 0.7172967195510864, "learning_rate": 3.456036630574063e-05, "loss": 1.7139, "step": 76258 }, { "epoch": 2.54, "grad_norm": 0.6772841811180115, "learning_rate": 3.45554960595814e-05, "loss": 1.646, "step": 76259 }, { "epoch": 2.54, "grad_norm": 0.7173455953598022, "learning_rate": 3.455062613563273e-05, "loss": 1.6772, "step": 76260 }, { "epoch": 2.54, "grad_norm": 0.6822503805160522, "learning_rate": 3.454575653390045e-05, "loss": 1.6844, "step": 76261 }, { "epoch": 2.54, "grad_norm": 0.6887007355690002, "learning_rate": 3.454088725439055e-05, "loss": 1.6435, "step": 76262 }, { "epoch": 2.54, "grad_norm": 0.7199513912200928, "learning_rate": 3.453601829710893e-05, "loss": 1.7379, "step": 76263 }, { "epoch": 2.54, "grad_norm": 0.694996178150177, "learning_rate": 3.4531149662061375e-05, "loss": 1.6595, "step": 76264 }, { "epoch": 2.54, "grad_norm": 0.6985242962837219, "learning_rate": 3.452628134925396e-05, "loss": 1.7172, "step": 76265 }, { "epoch": 2.54, "grad_norm": 0.690591037273407, "learning_rate": 3.452141335869247e-05, "loss": 1.6721, "step": 76266 }, { "epoch": 2.54, "grad_norm": 0.6858078837394714, "learning_rate": 3.451654569038295e-05, "loss": 1.6972, "step": 76267 }, { "epoch": 2.54, "grad_norm": 0.6906232833862305, "learning_rate": 3.4511678344331175e-05, "loss": 1.6332, "step": 76268 }, { "epoch": 2.54, "grad_norm": 0.6900790929794312, "learning_rate": 3.450681132054308e-05, "loss": 1.7592, "step": 76269 }, { "epoch": 2.54, "grad_norm": 0.6907355785369873, "learning_rate": 3.450194461902467e-05, "loss": 1.7317, "step": 76270 }, { "epoch": 2.54, "grad_norm": 0.6909307241439819, "learning_rate": 3.44970782397817e-05, "loss": 1.6601, "step": 76271 }, { "epoch": 2.54, "grad_norm": 0.7073160409927368, "learning_rate": 3.449221218282019e-05, "loss": 1.717, "step": 76272 }, { "epoch": 2.54, "grad_norm": 0.7073990702629089, "learning_rate": 3.448734644814598e-05, "loss": 1.741, "step": 76273 }, { "epoch": 2.54, "grad_norm": 0.698910117149353, "learning_rate": 3.448248103576507e-05, "loss": 1.6574, "step": 76274 }, { "epoch": 2.54, "grad_norm": 0.6910979747772217, "learning_rate": 3.44776159456833e-05, "loss": 1.6708, "step": 76275 }, { "epoch": 2.54, "grad_norm": 0.6743177175521851, "learning_rate": 3.447275117790656e-05, "loss": 1.6147, "step": 76276 }, { "epoch": 2.54, "grad_norm": 0.6862533092498779, "learning_rate": 3.4467886732440785e-05, "loss": 1.6915, "step": 76277 }, { "epoch": 2.54, "grad_norm": 0.6935288906097412, "learning_rate": 3.446302260929189e-05, "loss": 1.7209, "step": 76278 }, { "epoch": 2.54, "grad_norm": 0.7382699251174927, "learning_rate": 3.4458158808465694e-05, "loss": 1.7263, "step": 76279 }, { "epoch": 2.54, "grad_norm": 0.7120919823646545, "learning_rate": 3.445329532996817e-05, "loss": 1.6425, "step": 76280 }, { "epoch": 2.54, "grad_norm": 0.7211622595787048, "learning_rate": 3.4448432173805294e-05, "loss": 1.721, "step": 76281 }, { "epoch": 2.54, "grad_norm": 0.7106115221977234, "learning_rate": 3.444356933998288e-05, "loss": 1.707, "step": 76282 }, { "epoch": 2.54, "grad_norm": 0.7078376412391663, "learning_rate": 3.443870682850678e-05, "loss": 1.7503, "step": 76283 }, { "epoch": 2.54, "grad_norm": 0.6985166072845459, "learning_rate": 3.4433844639383026e-05, "loss": 1.6742, "step": 76284 }, { "epoch": 2.54, "grad_norm": 0.6891637444496155, "learning_rate": 3.4428982772617473e-05, "loss": 1.7283, "step": 76285 }, { "epoch": 2.54, "grad_norm": 0.7036283612251282, "learning_rate": 3.442412122821593e-05, "loss": 1.7059, "step": 76286 }, { "epoch": 2.54, "grad_norm": 0.7061610221862793, "learning_rate": 3.441926000618438e-05, "loss": 1.6474, "step": 76287 }, { "epoch": 2.54, "grad_norm": 0.698820173740387, "learning_rate": 3.4414399106528756e-05, "loss": 1.6959, "step": 76288 }, { "epoch": 2.54, "grad_norm": 0.6919306516647339, "learning_rate": 3.4409538529254924e-05, "loss": 1.677, "step": 76289 }, { "epoch": 2.54, "grad_norm": 1.0628975629806519, "learning_rate": 3.4404678274368736e-05, "loss": 1.6916, "step": 76290 }, { "epoch": 2.54, "grad_norm": 0.7072242498397827, "learning_rate": 3.43998183418762e-05, "loss": 1.7013, "step": 76291 }, { "epoch": 2.54, "grad_norm": 0.7167669534683228, "learning_rate": 3.439495873178313e-05, "loss": 1.7081, "step": 76292 }, { "epoch": 2.54, "grad_norm": 0.7316671013832092, "learning_rate": 3.4390099444095395e-05, "loss": 1.6446, "step": 76293 }, { "epoch": 2.54, "grad_norm": 0.7091054320335388, "learning_rate": 3.438524047881893e-05, "loss": 1.7612, "step": 76294 }, { "epoch": 2.54, "grad_norm": 0.7038140892982483, "learning_rate": 3.438038183595973e-05, "loss": 1.6309, "step": 76295 }, { "epoch": 2.54, "grad_norm": 0.7113093733787537, "learning_rate": 3.437552351552357e-05, "loss": 1.6736, "step": 76296 }, { "epoch": 2.54, "grad_norm": 0.7258045673370361, "learning_rate": 3.437066551751637e-05, "loss": 1.7204, "step": 76297 }, { "epoch": 2.54, "grad_norm": 0.6899315714836121, "learning_rate": 3.436580784194408e-05, "loss": 1.6635, "step": 76298 }, { "epoch": 2.54, "grad_norm": 0.6859785318374634, "learning_rate": 3.4360950488812565e-05, "loss": 1.6898, "step": 76299 }, { "epoch": 2.54, "grad_norm": 0.6642526984214783, "learning_rate": 3.435609345812765e-05, "loss": 1.6674, "step": 76300 }, { "epoch": 2.54, "grad_norm": 0.7116193771362305, "learning_rate": 3.4351236749895306e-05, "loss": 1.7043, "step": 76301 }, { "epoch": 2.54, "grad_norm": 0.7076060175895691, "learning_rate": 3.4346380364121486e-05, "loss": 1.6923, "step": 76302 }, { "epoch": 2.54, "grad_norm": 0.7014440298080444, "learning_rate": 3.434152430081202e-05, "loss": 1.7366, "step": 76303 }, { "epoch": 2.54, "grad_norm": 0.7103749513626099, "learning_rate": 3.4336668559972735e-05, "loss": 1.7394, "step": 76304 }, { "epoch": 2.54, "grad_norm": 0.6929076910018921, "learning_rate": 3.433181314160963e-05, "loss": 1.6507, "step": 76305 }, { "epoch": 2.54, "grad_norm": 0.6792038679122925, "learning_rate": 3.432695804572857e-05, "loss": 1.6638, "step": 76306 }, { "epoch": 2.54, "grad_norm": 0.6921809911727905, "learning_rate": 3.4322103272335414e-05, "loss": 1.7375, "step": 76307 }, { "epoch": 2.54, "grad_norm": 0.6989820599555969, "learning_rate": 3.431724882143608e-05, "loss": 1.7233, "step": 76308 }, { "epoch": 2.54, "grad_norm": 0.6989160776138306, "learning_rate": 3.4312394693036446e-05, "loss": 1.6998, "step": 76309 }, { "epoch": 2.54, "grad_norm": 0.694659411907196, "learning_rate": 3.4307540887142437e-05, "loss": 1.7377, "step": 76310 }, { "epoch": 2.54, "grad_norm": 0.6937942504882812, "learning_rate": 3.430268740375991e-05, "loss": 1.6555, "step": 76311 }, { "epoch": 2.54, "grad_norm": 0.7399632930755615, "learning_rate": 3.42978342428948e-05, "loss": 1.6463, "step": 76312 }, { "epoch": 2.54, "grad_norm": 0.7104029059410095, "learning_rate": 3.429298140455299e-05, "loss": 1.7416, "step": 76313 }, { "epoch": 2.54, "grad_norm": 0.7052923440933228, "learning_rate": 3.428812888874025e-05, "loss": 1.6305, "step": 76314 }, { "epoch": 2.54, "grad_norm": 0.6827996373176575, "learning_rate": 3.428327669546268e-05, "loss": 1.6733, "step": 76315 }, { "epoch": 2.54, "grad_norm": 0.7212479114532471, "learning_rate": 3.427842482472597e-05, "loss": 1.6827, "step": 76316 }, { "epoch": 2.54, "grad_norm": 0.6767338514328003, "learning_rate": 3.427357327653618e-05, "loss": 1.6372, "step": 76317 }, { "epoch": 2.54, "grad_norm": 0.7106066346168518, "learning_rate": 3.426872205089911e-05, "loss": 1.6034, "step": 76318 }, { "epoch": 2.54, "grad_norm": 0.7229052782058716, "learning_rate": 3.426387114782059e-05, "loss": 1.7137, "step": 76319 }, { "epoch": 2.54, "grad_norm": 0.6643189191818237, "learning_rate": 3.425902056730664e-05, "loss": 1.6511, "step": 76320 }, { "epoch": 2.54, "grad_norm": 0.6986901760101318, "learning_rate": 3.425417030936303e-05, "loss": 1.7059, "step": 76321 }, { "epoch": 2.54, "grad_norm": 0.701433002948761, "learning_rate": 3.424932037399578e-05, "loss": 1.7058, "step": 76322 }, { "epoch": 2.54, "grad_norm": 0.6937293410301208, "learning_rate": 3.42444707612106e-05, "loss": 1.6859, "step": 76323 }, { "epoch": 2.54, "grad_norm": 0.7178609371185303, "learning_rate": 3.423962147101357e-05, "loss": 1.7114, "step": 76324 }, { "epoch": 2.54, "grad_norm": 0.6826517581939697, "learning_rate": 3.4234772503410454e-05, "loss": 1.6657, "step": 76325 }, { "epoch": 2.54, "grad_norm": 0.7099258303642273, "learning_rate": 3.422992385840713e-05, "loss": 1.6866, "step": 76326 }, { "epoch": 2.54, "grad_norm": 0.7186116576194763, "learning_rate": 3.422507553600954e-05, "loss": 1.7486, "step": 76327 }, { "epoch": 2.54, "grad_norm": 0.7187700867652893, "learning_rate": 3.422022753622359e-05, "loss": 1.7823, "step": 76328 }, { "epoch": 2.54, "grad_norm": 0.686241626739502, "learning_rate": 3.4215379859055035e-05, "loss": 1.6866, "step": 76329 }, { "epoch": 2.54, "grad_norm": 0.7131367325782776, "learning_rate": 3.421053250450985e-05, "loss": 1.6882, "step": 76330 }, { "epoch": 2.54, "grad_norm": 0.6865392923355103, "learning_rate": 3.420568547259401e-05, "loss": 1.6839, "step": 76331 }, { "epoch": 2.54, "grad_norm": 0.7287508249282837, "learning_rate": 3.4200838763313265e-05, "loss": 1.718, "step": 76332 }, { "epoch": 2.54, "grad_norm": 0.6738696098327637, "learning_rate": 3.41959923766735e-05, "loss": 1.6923, "step": 76333 }, { "epoch": 2.54, "grad_norm": 0.7260662317276001, "learning_rate": 3.419114631268067e-05, "loss": 1.7592, "step": 76334 }, { "epoch": 2.54, "grad_norm": 0.6894587278366089, "learning_rate": 3.418630057134065e-05, "loss": 1.6467, "step": 76335 }, { "epoch": 2.54, "grad_norm": 0.7096413969993591, "learning_rate": 3.418145515265923e-05, "loss": 1.6499, "step": 76336 }, { "epoch": 2.54, "grad_norm": 0.6995877623558044, "learning_rate": 3.4176610056642375e-05, "loss": 1.6508, "step": 76337 }, { "epoch": 2.54, "grad_norm": 0.6874725818634033, "learning_rate": 3.417176528329597e-05, "loss": 1.736, "step": 76338 }, { "epoch": 2.54, "grad_norm": 0.6791722178459167, "learning_rate": 3.416692083262589e-05, "loss": 1.6793, "step": 76339 }, { "epoch": 2.54, "grad_norm": 0.7128915786743164, "learning_rate": 3.416207670463795e-05, "loss": 1.6476, "step": 76340 }, { "epoch": 2.54, "grad_norm": 0.6832386255264282, "learning_rate": 3.4157232899338126e-05, "loss": 1.7184, "step": 76341 }, { "epoch": 2.54, "grad_norm": 0.7042512893676758, "learning_rate": 3.415238941673224e-05, "loss": 1.7285, "step": 76342 }, { "epoch": 2.54, "grad_norm": 0.7054750919342041, "learning_rate": 3.414754625682615e-05, "loss": 1.6538, "step": 76343 }, { "epoch": 2.54, "grad_norm": 0.703187882900238, "learning_rate": 3.414270341962576e-05, "loss": 1.6546, "step": 76344 }, { "epoch": 2.54, "grad_norm": 0.6980087757110596, "learning_rate": 3.413786090513706e-05, "loss": 1.686, "step": 76345 }, { "epoch": 2.54, "grad_norm": 0.6879427433013916, "learning_rate": 3.413301871336571e-05, "loss": 1.7043, "step": 76346 }, { "epoch": 2.54, "grad_norm": 0.7166832685470581, "learning_rate": 3.412817684431771e-05, "loss": 1.7149, "step": 76347 }, { "epoch": 2.54, "grad_norm": 0.7074058651924133, "learning_rate": 3.4123335297998986e-05, "loss": 1.7148, "step": 76348 }, { "epoch": 2.54, "grad_norm": 0.6878814697265625, "learning_rate": 3.411849407441537e-05, "loss": 1.6853, "step": 76349 }, { "epoch": 2.54, "grad_norm": 0.6821698546409607, "learning_rate": 3.411365317357265e-05, "loss": 1.6524, "step": 76350 }, { "epoch": 2.54, "grad_norm": 0.6891623139381409, "learning_rate": 3.41088125954768e-05, "loss": 1.6215, "step": 76351 }, { "epoch": 2.54, "grad_norm": 0.7123576402664185, "learning_rate": 3.4103972340133765e-05, "loss": 1.6788, "step": 76352 }, { "epoch": 2.54, "grad_norm": 0.6995307207107544, "learning_rate": 3.4099132407549226e-05, "loss": 1.734, "step": 76353 }, { "epoch": 2.54, "grad_norm": 0.7053127288818359, "learning_rate": 3.409429279772916e-05, "loss": 1.6087, "step": 76354 }, { "epoch": 2.54, "grad_norm": 0.6821234226226807, "learning_rate": 3.4089453510679475e-05, "loss": 1.6782, "step": 76355 }, { "epoch": 2.54, "grad_norm": 0.7087078094482422, "learning_rate": 3.408461454640606e-05, "loss": 1.67, "step": 76356 }, { "epoch": 2.54, "grad_norm": 0.7118898630142212, "learning_rate": 3.407977590491465e-05, "loss": 1.6503, "step": 76357 }, { "epoch": 2.54, "grad_norm": 0.7108436226844788, "learning_rate": 3.4074937586211204e-05, "loss": 1.7206, "step": 76358 }, { "epoch": 2.54, "grad_norm": 0.6992396712303162, "learning_rate": 3.407009959030175e-05, "loss": 1.6716, "step": 76359 }, { "epoch": 2.54, "grad_norm": 0.7190810441970825, "learning_rate": 3.406526191719188e-05, "loss": 1.6985, "step": 76360 }, { "epoch": 2.54, "grad_norm": 0.7081790566444397, "learning_rate": 3.406042456688759e-05, "loss": 1.6927, "step": 76361 }, { "epoch": 2.54, "grad_norm": 0.7080488204956055, "learning_rate": 3.405558753939485e-05, "loss": 1.71, "step": 76362 }, { "epoch": 2.54, "grad_norm": 0.7169983983039856, "learning_rate": 3.405075083471941e-05, "loss": 1.7672, "step": 76363 }, { "epoch": 2.54, "grad_norm": 0.6784381866455078, "learning_rate": 3.404591445286714e-05, "loss": 1.7069, "step": 76364 }, { "epoch": 2.54, "grad_norm": 0.7100762128829956, "learning_rate": 3.404107839384397e-05, "loss": 1.6832, "step": 76365 }, { "epoch": 2.54, "grad_norm": 0.6960088610649109, "learning_rate": 3.403624265765575e-05, "loss": 1.7212, "step": 76366 }, { "epoch": 2.54, "grad_norm": 0.6836463809013367, "learning_rate": 3.403140724430833e-05, "loss": 1.6526, "step": 76367 }, { "epoch": 2.54, "grad_norm": 0.6804773807525635, "learning_rate": 3.402657215380755e-05, "loss": 1.6647, "step": 76368 }, { "epoch": 2.54, "grad_norm": 0.7429794073104858, "learning_rate": 3.402173738615939e-05, "loss": 1.6835, "step": 76369 }, { "epoch": 2.54, "grad_norm": 0.7278348207473755, "learning_rate": 3.401690294136963e-05, "loss": 1.6235, "step": 76370 }, { "epoch": 2.54, "grad_norm": 0.7152996063232422, "learning_rate": 3.401206881944414e-05, "loss": 1.7226, "step": 76371 }, { "epoch": 2.54, "grad_norm": 0.7034772038459778, "learning_rate": 3.4007235020388846e-05, "loss": 1.6731, "step": 76372 }, { "epoch": 2.54, "grad_norm": 0.6987401843070984, "learning_rate": 3.4002401544209576e-05, "loss": 1.7189, "step": 76373 }, { "epoch": 2.54, "grad_norm": 0.6897327303886414, "learning_rate": 3.3997568390912136e-05, "loss": 1.6352, "step": 76374 }, { "epoch": 2.54, "grad_norm": 0.6888056993484497, "learning_rate": 3.3992735560502504e-05, "loss": 1.7018, "step": 76375 }, { "epoch": 2.54, "grad_norm": 0.7006546854972839, "learning_rate": 3.398790305298645e-05, "loss": 1.6847, "step": 76376 }, { "epoch": 2.54, "grad_norm": 0.7189054489135742, "learning_rate": 3.3983070868369946e-05, "loss": 1.6452, "step": 76377 }, { "epoch": 2.54, "grad_norm": 0.7342386841773987, "learning_rate": 3.3978239006658735e-05, "loss": 1.7305, "step": 76378 }, { "epoch": 2.54, "grad_norm": 0.695691704750061, "learning_rate": 3.3973407467858826e-05, "loss": 1.6693, "step": 76379 }, { "epoch": 2.54, "grad_norm": 0.7158417105674744, "learning_rate": 3.396857625197598e-05, "loss": 1.7269, "step": 76380 }, { "epoch": 2.54, "grad_norm": 0.6972088813781738, "learning_rate": 3.396374535901602e-05, "loss": 1.6054, "step": 76381 }, { "epoch": 2.54, "grad_norm": 0.6954852938652039, "learning_rate": 3.3958914788984935e-05, "loss": 1.6767, "step": 76382 }, { "epoch": 2.54, "grad_norm": 0.7074894309043884, "learning_rate": 3.3954084541888505e-05, "loss": 1.7203, "step": 76383 }, { "epoch": 2.54, "grad_norm": 1.9816793203353882, "learning_rate": 3.394925461773265e-05, "loss": 1.7182, "step": 76384 }, { "epoch": 2.54, "grad_norm": 0.694537341594696, "learning_rate": 3.394442501652319e-05, "loss": 1.6259, "step": 76385 }, { "epoch": 2.54, "grad_norm": 0.7356677055358887, "learning_rate": 3.393959573826594e-05, "loss": 1.6368, "step": 76386 }, { "epoch": 2.54, "grad_norm": 0.6879916191101074, "learning_rate": 3.393476678296687e-05, "loss": 1.6311, "step": 76387 }, { "epoch": 2.54, "grad_norm": 0.7008088827133179, "learning_rate": 3.3929938150631764e-05, "loss": 1.7168, "step": 76388 }, { "epoch": 2.54, "grad_norm": 1.7333924770355225, "learning_rate": 3.392510984126654e-05, "loss": 1.7391, "step": 76389 }, { "epoch": 2.54, "grad_norm": 0.6888294219970703, "learning_rate": 3.3920281854877e-05, "loss": 1.6137, "step": 76390 }, { "epoch": 2.54, "grad_norm": 0.7026825547218323, "learning_rate": 3.391545419146906e-05, "loss": 1.7127, "step": 76391 }, { "epoch": 2.54, "grad_norm": 0.6930646896362305, "learning_rate": 3.391062685104856e-05, "loss": 1.7261, "step": 76392 }, { "epoch": 2.54, "grad_norm": 0.716445803642273, "learning_rate": 3.390579983362129e-05, "loss": 1.6779, "step": 76393 }, { "epoch": 2.54, "grad_norm": 0.6951799988746643, "learning_rate": 3.3900973139193255e-05, "loss": 1.713, "step": 76394 }, { "epoch": 2.54, "grad_norm": 0.7158883810043335, "learning_rate": 3.389614676777021e-05, "loss": 1.6499, "step": 76395 }, { "epoch": 2.54, "grad_norm": 0.6802411675453186, "learning_rate": 3.389132071935797e-05, "loss": 1.6754, "step": 76396 }, { "epoch": 2.54, "grad_norm": 0.6992858052253723, "learning_rate": 3.388649499396245e-05, "loss": 1.6445, "step": 76397 }, { "epoch": 2.54, "grad_norm": 0.6819305419921875, "learning_rate": 3.388166959158961e-05, "loss": 1.6295, "step": 76398 }, { "epoch": 2.54, "grad_norm": 0.6988674402236938, "learning_rate": 3.3876844512245175e-05, "loss": 1.6789, "step": 76399 }, { "epoch": 2.54, "grad_norm": 0.7121136784553528, "learning_rate": 3.3872019755934997e-05, "loss": 1.7268, "step": 76400 }, { "epoch": 2.54, "grad_norm": 0.6841036677360535, "learning_rate": 3.386719532266505e-05, "loss": 1.5963, "step": 76401 }, { "epoch": 2.54, "grad_norm": 0.6953135132789612, "learning_rate": 3.38623712124411e-05, "loss": 1.7447, "step": 76402 }, { "epoch": 2.54, "grad_norm": 0.6993733048439026, "learning_rate": 3.385754742526896e-05, "loss": 1.6819, "step": 76403 }, { "epoch": 2.54, "grad_norm": 0.7175346612930298, "learning_rate": 3.3852723961154535e-05, "loss": 1.7439, "step": 76404 }, { "epoch": 2.54, "grad_norm": 0.6976240873336792, "learning_rate": 3.3847900820103756e-05, "loss": 1.7042, "step": 76405 }, { "epoch": 2.54, "grad_norm": 0.6849830746650696, "learning_rate": 3.3843078002122415e-05, "loss": 1.6794, "step": 76406 }, { "epoch": 2.54, "grad_norm": 0.6847816705703735, "learning_rate": 3.383825550721627e-05, "loss": 1.7166, "step": 76407 }, { "epoch": 2.54, "grad_norm": 0.6859809160232544, "learning_rate": 3.383343333539136e-05, "loss": 1.7396, "step": 76408 }, { "epoch": 2.54, "grad_norm": 0.6846317648887634, "learning_rate": 3.3828611486653435e-05, "loss": 1.6604, "step": 76409 }, { "epoch": 2.54, "grad_norm": 0.6858453154563904, "learning_rate": 3.3823789961008294e-05, "loss": 1.7075, "step": 76410 }, { "epoch": 2.54, "grad_norm": 0.6969847083091736, "learning_rate": 3.381896875846184e-05, "loss": 1.6845, "step": 76411 }, { "epoch": 2.54, "grad_norm": 0.7081049084663391, "learning_rate": 3.381414787902003e-05, "loss": 1.7639, "step": 76412 }, { "epoch": 2.54, "grad_norm": 0.7013209462165833, "learning_rate": 3.3809327322688615e-05, "loss": 1.721, "step": 76413 }, { "epoch": 2.54, "grad_norm": 0.690719485282898, "learning_rate": 3.3804507089473375e-05, "loss": 1.6838, "step": 76414 }, { "epoch": 2.54, "grad_norm": 0.7273707389831543, "learning_rate": 3.379968717938033e-05, "loss": 1.7269, "step": 76415 }, { "epoch": 2.54, "grad_norm": 0.6880402565002441, "learning_rate": 3.379486759241521e-05, "loss": 1.6701, "step": 76416 }, { "epoch": 2.54, "grad_norm": 0.7208709716796875, "learning_rate": 3.379004832858384e-05, "loss": 1.6378, "step": 76417 }, { "epoch": 2.54, "grad_norm": 0.6986824870109558, "learning_rate": 3.3785229387892154e-05, "loss": 1.6146, "step": 76418 }, { "epoch": 2.54, "grad_norm": 0.678700864315033, "learning_rate": 3.378041077034601e-05, "loss": 1.7462, "step": 76419 }, { "epoch": 2.54, "grad_norm": 0.6951801776885986, "learning_rate": 3.377559247595124e-05, "loss": 1.7276, "step": 76420 }, { "epoch": 2.54, "grad_norm": 0.6923484206199646, "learning_rate": 3.37707745047136e-05, "loss": 1.6568, "step": 76421 }, { "epoch": 2.54, "grad_norm": 0.7332632541656494, "learning_rate": 3.3765956856639095e-05, "loss": 1.6675, "step": 76422 }, { "epoch": 2.54, "grad_norm": 0.7128987312316895, "learning_rate": 3.376113953173345e-05, "loss": 1.6709, "step": 76423 }, { "epoch": 2.54, "grad_norm": 0.7205474972724915, "learning_rate": 3.375632253000252e-05, "loss": 1.6762, "step": 76424 }, { "epoch": 2.54, "grad_norm": 0.7185654044151306, "learning_rate": 3.375150585145224e-05, "loss": 1.7658, "step": 76425 }, { "epoch": 2.54, "grad_norm": 0.7258539795875549, "learning_rate": 3.3746689496088376e-05, "loss": 1.722, "step": 76426 }, { "epoch": 2.54, "grad_norm": 0.7035166621208191, "learning_rate": 3.374187346391681e-05, "loss": 1.6424, "step": 76427 }, { "epoch": 2.54, "grad_norm": 0.7391448616981506, "learning_rate": 3.373705775494335e-05, "loss": 1.7416, "step": 76428 }, { "epoch": 2.54, "grad_norm": 0.6997959017753601, "learning_rate": 3.373224236917392e-05, "loss": 1.6595, "step": 76429 }, { "epoch": 2.54, "grad_norm": 0.7169667482376099, "learning_rate": 3.372742730661432e-05, "loss": 1.7133, "step": 76430 }, { "epoch": 2.54, "grad_norm": 0.6891965866088867, "learning_rate": 3.3722612567270305e-05, "loss": 1.6523, "step": 76431 }, { "epoch": 2.54, "grad_norm": 0.7205982208251953, "learning_rate": 3.3717798151147865e-05, "loss": 1.709, "step": 76432 }, { "epoch": 2.54, "grad_norm": 0.7013424634933472, "learning_rate": 3.371298405825274e-05, "loss": 1.6585, "step": 76433 }, { "epoch": 2.54, "grad_norm": 0.7016438245773315, "learning_rate": 3.3708170288590896e-05, "loss": 1.6753, "step": 76434 }, { "epoch": 2.54, "grad_norm": 0.6967188119888306, "learning_rate": 3.3703356842168084e-05, "loss": 1.6321, "step": 76435 }, { "epoch": 2.54, "grad_norm": 0.6761395931243896, "learning_rate": 3.369854371899007e-05, "loss": 1.6218, "step": 76436 }, { "epoch": 2.54, "grad_norm": 0.7203986048698425, "learning_rate": 3.3693730919062875e-05, "loss": 1.6, "step": 76437 }, { "epoch": 2.54, "grad_norm": 0.7005136013031006, "learning_rate": 3.368891844239221e-05, "loss": 1.6533, "step": 76438 }, { "epoch": 2.54, "grad_norm": 0.7080707550048828, "learning_rate": 3.3684106288983994e-05, "loss": 1.6334, "step": 76439 }, { "epoch": 2.54, "grad_norm": 0.7033427953720093, "learning_rate": 3.367929445884395e-05, "loss": 1.6829, "step": 76440 }, { "epoch": 2.54, "grad_norm": 0.7360718846321106, "learning_rate": 3.367448295197809e-05, "loss": 1.7467, "step": 76441 }, { "epoch": 2.54, "grad_norm": 0.7079052925109863, "learning_rate": 3.366967176839216e-05, "loss": 1.5853, "step": 76442 }, { "epoch": 2.54, "grad_norm": 0.6952391862869263, "learning_rate": 3.366486090809196e-05, "loss": 1.7237, "step": 76443 }, { "epoch": 2.54, "grad_norm": 0.7153740525245667, "learning_rate": 3.366005037108342e-05, "loss": 1.6418, "step": 76444 }, { "epoch": 2.54, "grad_norm": 0.7244200110435486, "learning_rate": 3.3655240157372344e-05, "loss": 1.6856, "step": 76445 }, { "epoch": 2.54, "grad_norm": 0.6936193704605103, "learning_rate": 3.365043026696451e-05, "loss": 1.6639, "step": 76446 }, { "epoch": 2.54, "grad_norm": 0.7102435827255249, "learning_rate": 3.3645620699865794e-05, "loss": 1.6673, "step": 76447 }, { "epoch": 2.54, "grad_norm": 0.7321479916572571, "learning_rate": 3.364081145608212e-05, "loss": 1.6581, "step": 76448 }, { "epoch": 2.54, "grad_norm": 0.7092018127441406, "learning_rate": 3.3636002535619245e-05, "loss": 1.6572, "step": 76449 }, { "epoch": 2.54, "grad_norm": 0.7220853567123413, "learning_rate": 3.363119393848297e-05, "loss": 1.6954, "step": 76450 }, { "epoch": 2.54, "grad_norm": 0.6914682984352112, "learning_rate": 3.362638566467922e-05, "loss": 1.6121, "step": 76451 }, { "epoch": 2.54, "grad_norm": 0.6921829581260681, "learning_rate": 3.36215777142138e-05, "loss": 1.6321, "step": 76452 }, { "epoch": 2.54, "grad_norm": 0.7193642258644104, "learning_rate": 3.361677008709246e-05, "loss": 1.7208, "step": 76453 }, { "epoch": 2.54, "grad_norm": 0.7246688604354858, "learning_rate": 3.3611962783321133e-05, "loss": 1.6791, "step": 76454 }, { "epoch": 2.54, "grad_norm": 0.7023773789405823, "learning_rate": 3.360715580290575e-05, "loss": 1.6756, "step": 76455 }, { "epoch": 2.54, "grad_norm": 0.686842679977417, "learning_rate": 3.360234914585191e-05, "loss": 1.6921, "step": 76456 }, { "epoch": 2.54, "grad_norm": 0.687669575214386, "learning_rate": 3.3597542812165554e-05, "loss": 1.6903, "step": 76457 }, { "epoch": 2.54, "grad_norm": 0.7032104730606079, "learning_rate": 3.359273680185257e-05, "loss": 1.6965, "step": 76458 }, { "epoch": 2.54, "grad_norm": 0.707983136177063, "learning_rate": 3.358793111491878e-05, "loss": 1.6491, "step": 76459 }, { "epoch": 2.54, "grad_norm": 0.703368067741394, "learning_rate": 3.358312575136993e-05, "loss": 1.6566, "step": 76460 }, { "epoch": 2.54, "grad_norm": 0.6903351545333862, "learning_rate": 3.35783207112119e-05, "loss": 1.6071, "step": 76461 }, { "epoch": 2.54, "grad_norm": 0.7046604752540588, "learning_rate": 3.357351599445065e-05, "loss": 1.6847, "step": 76462 }, { "epoch": 2.54, "grad_norm": 0.6890430450439453, "learning_rate": 3.35687116010918e-05, "loss": 1.6471, "step": 76463 }, { "epoch": 2.54, "grad_norm": 0.6964048147201538, "learning_rate": 3.356390753114126e-05, "loss": 1.6622, "step": 76464 }, { "epoch": 2.54, "grad_norm": 0.6779263615608215, "learning_rate": 3.3559103784604935e-05, "loss": 1.6993, "step": 76465 }, { "epoch": 2.54, "grad_norm": 0.7234338521957397, "learning_rate": 3.355430036148861e-05, "loss": 1.7298, "step": 76466 }, { "epoch": 2.54, "grad_norm": 0.6896287798881531, "learning_rate": 3.354949726179803e-05, "loss": 1.7176, "step": 76467 }, { "epoch": 2.54, "grad_norm": 0.7219560146331787, "learning_rate": 3.35446944855391e-05, "loss": 1.681, "step": 76468 }, { "epoch": 2.54, "grad_norm": 0.6745978593826294, "learning_rate": 3.3539892032717785e-05, "loss": 1.6748, "step": 76469 }, { "epoch": 2.54, "grad_norm": 0.700354814529419, "learning_rate": 3.353508990333964e-05, "loss": 1.6629, "step": 76470 }, { "epoch": 2.54, "grad_norm": 0.701302707195282, "learning_rate": 3.353028809741066e-05, "loss": 1.6956, "step": 76471 }, { "epoch": 2.54, "grad_norm": 0.7473891973495483, "learning_rate": 3.352548661493668e-05, "loss": 1.6183, "step": 76472 }, { "epoch": 2.54, "grad_norm": 0.6842142343521118, "learning_rate": 3.3520685455923525e-05, "loss": 1.7179, "step": 76473 }, { "epoch": 2.54, "grad_norm": 0.6952564716339111, "learning_rate": 3.3515884620376924e-05, "loss": 1.7209, "step": 76474 }, { "epoch": 2.54, "grad_norm": 0.6940931677818298, "learning_rate": 3.3511084108302766e-05, "loss": 1.704, "step": 76475 }, { "epoch": 2.54, "grad_norm": 0.7152460217475891, "learning_rate": 3.3506283919706987e-05, "loss": 1.7082, "step": 76476 }, { "epoch": 2.54, "grad_norm": 0.7380034923553467, "learning_rate": 3.350148405459525e-05, "loss": 1.6406, "step": 76477 }, { "epoch": 2.54, "grad_norm": 0.7222282290458679, "learning_rate": 3.349668451297338e-05, "loss": 1.6772, "step": 76478 }, { "epoch": 2.54, "grad_norm": 0.6997591853141785, "learning_rate": 3.3491885294847374e-05, "loss": 1.6794, "step": 76479 }, { "epoch": 2.54, "grad_norm": 0.6959417462348938, "learning_rate": 3.348708640022293e-05, "loss": 1.7763, "step": 76480 }, { "epoch": 2.54, "grad_norm": 0.7169649004936218, "learning_rate": 3.3482287829105834e-05, "loss": 1.6994, "step": 76481 }, { "epoch": 2.54, "grad_norm": 0.6873122453689575, "learning_rate": 3.347748958150206e-05, "loss": 1.7035, "step": 76482 }, { "epoch": 2.54, "grad_norm": 0.6870836615562439, "learning_rate": 3.347269165741733e-05, "loss": 1.7433, "step": 76483 }, { "epoch": 2.54, "grad_norm": 0.6943673491477966, "learning_rate": 3.34678940568574e-05, "loss": 1.6413, "step": 76484 }, { "epoch": 2.54, "grad_norm": 0.7167305946350098, "learning_rate": 3.346309677982818e-05, "loss": 1.7108, "step": 76485 }, { "epoch": 2.54, "grad_norm": 0.6972567439079285, "learning_rate": 3.345829982633556e-05, "loss": 1.6581, "step": 76486 }, { "epoch": 2.54, "grad_norm": 0.6747862696647644, "learning_rate": 3.345350319638529e-05, "loss": 1.6611, "step": 76487 }, { "epoch": 2.54, "grad_norm": 0.7103284001350403, "learning_rate": 3.344870688998315e-05, "loss": 1.7819, "step": 76488 }, { "epoch": 2.54, "grad_norm": 0.7096802592277527, "learning_rate": 3.344391090713502e-05, "loss": 1.7314, "step": 76489 }, { "epoch": 2.54, "grad_norm": 0.6827940940856934, "learning_rate": 3.343911524784674e-05, "loss": 1.6762, "step": 76490 }, { "epoch": 2.54, "grad_norm": 0.7159438133239746, "learning_rate": 3.343431991212403e-05, "loss": 1.6598, "step": 76491 }, { "epoch": 2.54, "grad_norm": 0.6801945567131042, "learning_rate": 3.3429524899972826e-05, "loss": 1.6841, "step": 76492 }, { "epoch": 2.54, "grad_norm": 0.7010022401809692, "learning_rate": 3.3424730211398886e-05, "loss": 1.6612, "step": 76493 }, { "epoch": 2.54, "grad_norm": 0.7182634472846985, "learning_rate": 3.341993584640807e-05, "loss": 1.6991, "step": 76494 }, { "epoch": 2.54, "grad_norm": 0.6833251118659973, "learning_rate": 3.341514180500612e-05, "loss": 1.7063, "step": 76495 }, { "epoch": 2.55, "grad_norm": 0.6960907578468323, "learning_rate": 3.3410348087198944e-05, "loss": 1.665, "step": 76496 }, { "epoch": 2.55, "grad_norm": 0.7103754878044128, "learning_rate": 3.3405554692992354e-05, "loss": 1.719, "step": 76497 }, { "epoch": 2.55, "grad_norm": 0.6871277093887329, "learning_rate": 3.340076162239207e-05, "loss": 1.6613, "step": 76498 }, { "epoch": 2.55, "grad_norm": 0.7045572400093079, "learning_rate": 3.3395968875404054e-05, "loss": 1.6212, "step": 76499 }, { "epoch": 2.55, "grad_norm": 0.6924141645431519, "learning_rate": 3.339117645203397e-05, "loss": 1.6501, "step": 76500 }, { "epoch": 2.55, "grad_norm": 0.7034330368041992, "learning_rate": 3.3386384352287785e-05, "loss": 1.6666, "step": 76501 }, { "epoch": 2.55, "grad_norm": 0.7062782645225525, "learning_rate": 3.3381592576171256e-05, "loss": 1.6585, "step": 76502 }, { "epoch": 2.55, "grad_norm": 0.7170192003250122, "learning_rate": 3.337680112369011e-05, "loss": 1.764, "step": 76503 }, { "epoch": 2.55, "grad_norm": 0.6925079822540283, "learning_rate": 3.337200999485031e-05, "loss": 1.7383, "step": 76504 }, { "epoch": 2.55, "grad_norm": 0.7071548104286194, "learning_rate": 3.3367219189657555e-05, "loss": 1.6627, "step": 76505 }, { "epoch": 2.55, "grad_norm": 0.6791574358940125, "learning_rate": 3.3362428708117736e-05, "loss": 1.7227, "step": 76506 }, { "epoch": 2.55, "grad_norm": 0.7377076148986816, "learning_rate": 3.335763855023662e-05, "loss": 1.6742, "step": 76507 }, { "epoch": 2.55, "grad_norm": 0.6991047859191895, "learning_rate": 3.33528487160201e-05, "loss": 1.6652, "step": 76508 }, { "epoch": 2.55, "grad_norm": 0.6865958571434021, "learning_rate": 3.33480592054739e-05, "loss": 1.6385, "step": 76509 }, { "epoch": 2.55, "grad_norm": 0.6681712865829468, "learning_rate": 3.334327001860382e-05, "loss": 1.7224, "step": 76510 }, { "epoch": 2.55, "grad_norm": 0.6969968676567078, "learning_rate": 3.333848115541579e-05, "loss": 1.6794, "step": 76511 }, { "epoch": 2.55, "grad_norm": 0.710261881351471, "learning_rate": 3.333369261591556e-05, "loss": 1.7056, "step": 76512 }, { "epoch": 2.55, "grad_norm": 0.6956780552864075, "learning_rate": 3.332890440010888e-05, "loss": 1.6346, "step": 76513 }, { "epoch": 2.55, "grad_norm": 0.7086371779441833, "learning_rate": 3.33241165080016e-05, "loss": 1.6687, "step": 76514 }, { "epoch": 2.55, "grad_norm": 0.6984153985977173, "learning_rate": 3.331932893959961e-05, "loss": 1.6797, "step": 76515 }, { "epoch": 2.55, "grad_norm": 0.7087447047233582, "learning_rate": 3.331454169490865e-05, "loss": 1.7363, "step": 76516 }, { "epoch": 2.55, "grad_norm": 0.6785717010498047, "learning_rate": 3.3309754773934504e-05, "loss": 1.6832, "step": 76517 }, { "epoch": 2.55, "grad_norm": 0.6907912492752075, "learning_rate": 3.3304968176683076e-05, "loss": 1.7386, "step": 76518 }, { "epoch": 2.55, "grad_norm": 0.6893631815910339, "learning_rate": 3.330018190316013e-05, "loss": 1.66, "step": 76519 }, { "epoch": 2.55, "grad_norm": 0.7215965986251831, "learning_rate": 3.329539595337139e-05, "loss": 1.6889, "step": 76520 }, { "epoch": 2.55, "grad_norm": 0.7217867374420166, "learning_rate": 3.329061032732274e-05, "loss": 1.6597, "step": 76521 }, { "epoch": 2.55, "grad_norm": 0.706159770488739, "learning_rate": 3.328582502502006e-05, "loss": 1.6746, "step": 76522 }, { "epoch": 2.55, "grad_norm": 0.6832789182662964, "learning_rate": 3.328104004646908e-05, "loss": 1.6622, "step": 76523 }, { "epoch": 2.55, "grad_norm": 0.6775563955307007, "learning_rate": 3.327625539167558e-05, "loss": 1.6338, "step": 76524 }, { "epoch": 2.55, "grad_norm": 0.6917871832847595, "learning_rate": 3.327147106064543e-05, "loss": 1.6896, "step": 76525 }, { "epoch": 2.55, "grad_norm": 0.7064269781112671, "learning_rate": 3.326668705338446e-05, "loss": 1.7252, "step": 76526 }, { "epoch": 2.55, "grad_norm": 0.6762517690658569, "learning_rate": 3.326190336989832e-05, "loss": 1.6109, "step": 76527 }, { "epoch": 2.55, "grad_norm": 0.6962600350379944, "learning_rate": 3.325712001019296e-05, "loss": 1.7314, "step": 76528 }, { "epoch": 2.55, "grad_norm": 0.7076072096824646, "learning_rate": 3.325233697427423e-05, "loss": 1.6279, "step": 76529 }, { "epoch": 2.55, "grad_norm": 0.6972707509994507, "learning_rate": 3.3247554262147824e-05, "loss": 1.6394, "step": 76530 }, { "epoch": 2.55, "grad_norm": 0.727069616317749, "learning_rate": 3.324277187381954e-05, "loss": 1.723, "step": 76531 }, { "epoch": 2.55, "grad_norm": 0.6998381018638611, "learning_rate": 3.323798980929527e-05, "loss": 1.7048, "step": 76532 }, { "epoch": 2.55, "grad_norm": 0.7131485342979431, "learning_rate": 3.323320806858078e-05, "loss": 1.6592, "step": 76533 }, { "epoch": 2.55, "grad_norm": 0.7313118577003479, "learning_rate": 3.322842665168184e-05, "loss": 1.6904, "step": 76534 }, { "epoch": 2.55, "grad_norm": 0.7066590189933777, "learning_rate": 3.3223645558604264e-05, "loss": 1.7202, "step": 76535 }, { "epoch": 2.55, "grad_norm": 0.7070010304450989, "learning_rate": 3.3218864789353916e-05, "loss": 1.704, "step": 76536 }, { "epoch": 2.55, "grad_norm": 0.7032864093780518, "learning_rate": 3.321408434393656e-05, "loss": 1.6579, "step": 76537 }, { "epoch": 2.55, "grad_norm": 0.7474804520606995, "learning_rate": 3.320930422235797e-05, "loss": 1.6576, "step": 76538 }, { "epoch": 2.55, "grad_norm": 0.73014235496521, "learning_rate": 3.320452442462399e-05, "loss": 1.6224, "step": 76539 }, { "epoch": 2.55, "grad_norm": 0.7073395848274231, "learning_rate": 3.3199744950740456e-05, "loss": 1.7166, "step": 76540 }, { "epoch": 2.55, "grad_norm": 0.6848093271255493, "learning_rate": 3.319496580071303e-05, "loss": 1.6864, "step": 76541 }, { "epoch": 2.55, "grad_norm": 0.7243630886077881, "learning_rate": 3.319018697454765e-05, "loss": 1.7432, "step": 76542 }, { "epoch": 2.55, "grad_norm": 0.7074434161186218, "learning_rate": 3.318540847225003e-05, "loss": 1.7123, "step": 76543 }, { "epoch": 2.55, "grad_norm": 0.717761218547821, "learning_rate": 3.3180630293826074e-05, "loss": 1.7877, "step": 76544 }, { "epoch": 2.55, "grad_norm": 0.6915435194969177, "learning_rate": 3.317585243928147e-05, "loss": 1.6571, "step": 76545 }, { "epoch": 2.55, "grad_norm": 0.7178359031677246, "learning_rate": 3.317107490862212e-05, "loss": 1.6997, "step": 76546 }, { "epoch": 2.55, "grad_norm": 0.6997546553611755, "learning_rate": 3.316629770185375e-05, "loss": 1.7223, "step": 76547 }, { "epoch": 2.55, "grad_norm": 0.7139437794685364, "learning_rate": 3.316152081898212e-05, "loss": 1.7237, "step": 76548 }, { "epoch": 2.55, "grad_norm": 0.6940538883209229, "learning_rate": 3.315674426001317e-05, "loss": 1.7059, "step": 76549 }, { "epoch": 2.55, "grad_norm": 0.6999984979629517, "learning_rate": 3.315196802495254e-05, "loss": 1.6554, "step": 76550 }, { "epoch": 2.55, "grad_norm": 0.6903106570243835, "learning_rate": 3.314719211380615e-05, "loss": 1.6693, "step": 76551 }, { "epoch": 2.55, "grad_norm": 0.7038585543632507, "learning_rate": 3.314241652657975e-05, "loss": 1.7708, "step": 76552 }, { "epoch": 2.55, "grad_norm": 0.7028780579566956, "learning_rate": 3.31376412632791e-05, "loss": 1.681, "step": 76553 }, { "epoch": 2.55, "grad_norm": 0.7063329815864563, "learning_rate": 3.31328663239101e-05, "loss": 1.74, "step": 76554 }, { "epoch": 2.55, "grad_norm": 0.707594096660614, "learning_rate": 3.3128091708478375e-05, "loss": 1.7336, "step": 76555 }, { "epoch": 2.55, "grad_norm": 0.7301568388938904, "learning_rate": 3.3123317416989924e-05, "loss": 1.6797, "step": 76556 }, { "epoch": 2.55, "grad_norm": 0.7601117491722107, "learning_rate": 3.3118543449450375e-05, "loss": 1.7295, "step": 76557 }, { "epoch": 2.55, "grad_norm": 0.7043855786323547, "learning_rate": 3.311376980586565e-05, "loss": 1.694, "step": 76558 }, { "epoch": 2.55, "grad_norm": 0.7029935717582703, "learning_rate": 3.310899648624149e-05, "loss": 1.6341, "step": 76559 }, { "epoch": 2.55, "grad_norm": 0.6911576390266418, "learning_rate": 3.310422349058362e-05, "loss": 1.6875, "step": 76560 }, { "epoch": 2.55, "grad_norm": 0.6902744770050049, "learning_rate": 3.3099450818897955e-05, "loss": 1.6503, "step": 76561 }, { "epoch": 2.55, "grad_norm": 0.7080625891685486, "learning_rate": 3.3094678471190205e-05, "loss": 1.7487, "step": 76562 }, { "epoch": 2.55, "grad_norm": 0.7043654918670654, "learning_rate": 3.308990644746616e-05, "loss": 1.696, "step": 76563 }, { "epoch": 2.55, "grad_norm": 0.7183694839477539, "learning_rate": 3.3085134747731655e-05, "loss": 1.6133, "step": 76564 }, { "epoch": 2.55, "grad_norm": 0.7096413373947144, "learning_rate": 3.308036337199248e-05, "loss": 1.6228, "step": 76565 }, { "epoch": 2.55, "grad_norm": 0.6954084634780884, "learning_rate": 3.307559232025446e-05, "loss": 1.6862, "step": 76566 }, { "epoch": 2.55, "grad_norm": 0.7227034568786621, "learning_rate": 3.307082159252327e-05, "loss": 1.6942, "step": 76567 }, { "epoch": 2.55, "grad_norm": 0.6937536001205444, "learning_rate": 3.306605118880482e-05, "loss": 1.6729, "step": 76568 }, { "epoch": 2.55, "grad_norm": 0.7022018432617188, "learning_rate": 3.306128110910485e-05, "loss": 1.6128, "step": 76569 }, { "epoch": 2.55, "grad_norm": 0.6931636929512024, "learning_rate": 3.305651135342909e-05, "loss": 1.7499, "step": 76570 }, { "epoch": 2.55, "grad_norm": 0.6767531633377075, "learning_rate": 3.3051741921783435e-05, "loss": 1.6689, "step": 76571 }, { "epoch": 2.55, "grad_norm": 0.7169455289840698, "learning_rate": 3.3046972814173714e-05, "loss": 1.7144, "step": 76572 }, { "epoch": 2.55, "grad_norm": 0.7170693874359131, "learning_rate": 3.3042204030605514e-05, "loss": 1.7004, "step": 76573 }, { "epoch": 2.55, "grad_norm": 0.7033863663673401, "learning_rate": 3.303743557108477e-05, "loss": 1.7103, "step": 76574 }, { "epoch": 2.55, "grad_norm": 0.7220211625099182, "learning_rate": 3.303266743561728e-05, "loss": 1.7525, "step": 76575 }, { "epoch": 2.55, "grad_norm": 0.7178981304168701, "learning_rate": 3.30278996242088e-05, "loss": 1.7097, "step": 76576 }, { "epoch": 2.55, "grad_norm": 0.7041574716567993, "learning_rate": 3.302313213686506e-05, "loss": 1.6678, "step": 76577 }, { "epoch": 2.55, "grad_norm": 0.7043493390083313, "learning_rate": 3.301836497359189e-05, "loss": 1.7179, "step": 76578 }, { "epoch": 2.55, "grad_norm": 0.7134799361228943, "learning_rate": 3.301359813439523e-05, "loss": 1.6216, "step": 76579 }, { "epoch": 2.55, "grad_norm": 0.693615734577179, "learning_rate": 3.300883161928059e-05, "loss": 1.7068, "step": 76580 }, { "epoch": 2.55, "grad_norm": 0.694429874420166, "learning_rate": 3.300406542825387e-05, "loss": 1.5525, "step": 76581 }, { "epoch": 2.55, "grad_norm": 0.7103008031845093, "learning_rate": 3.299929956132097e-05, "loss": 1.7482, "step": 76582 }, { "epoch": 2.55, "grad_norm": 0.6820396780967712, "learning_rate": 3.299453401848755e-05, "loss": 1.6242, "step": 76583 }, { "epoch": 2.55, "grad_norm": 0.6914730668067932, "learning_rate": 3.2989768799759374e-05, "loss": 1.634, "step": 76584 }, { "epoch": 2.55, "grad_norm": 0.693061888217926, "learning_rate": 3.298500390514227e-05, "loss": 1.6933, "step": 76585 }, { "epoch": 2.55, "grad_norm": 0.6917118430137634, "learning_rate": 3.2980239334642165e-05, "loss": 1.6716, "step": 76586 }, { "epoch": 2.55, "grad_norm": 0.6803773045539856, "learning_rate": 3.297547508826456e-05, "loss": 1.6925, "step": 76587 }, { "epoch": 2.55, "grad_norm": 0.7271561026573181, "learning_rate": 3.2970711166015404e-05, "loss": 1.6705, "step": 76588 }, { "epoch": 2.55, "grad_norm": 0.6788022518157959, "learning_rate": 3.296594756790054e-05, "loss": 1.6509, "step": 76589 }, { "epoch": 2.55, "grad_norm": 0.7118315100669861, "learning_rate": 3.2961184293925625e-05, "loss": 1.7055, "step": 76590 }, { "epoch": 2.55, "grad_norm": 0.6984169483184814, "learning_rate": 3.295642134409645e-05, "loss": 1.722, "step": 76591 }, { "epoch": 2.55, "grad_norm": 0.6984550952911377, "learning_rate": 3.295165871841885e-05, "loss": 1.6212, "step": 76592 }, { "epoch": 2.55, "grad_norm": 0.7206032276153564, "learning_rate": 3.2946896416898683e-05, "loss": 1.6772, "step": 76593 }, { "epoch": 2.55, "grad_norm": 0.7025713920593262, "learning_rate": 3.2942134439541514e-05, "loss": 1.6605, "step": 76594 }, { "epoch": 2.55, "grad_norm": 0.7023718953132629, "learning_rate": 3.2937372786353266e-05, "loss": 1.669, "step": 76595 }, { "epoch": 2.55, "grad_norm": 0.7361632585525513, "learning_rate": 3.293261145733974e-05, "loss": 1.7284, "step": 76596 }, { "epoch": 2.55, "grad_norm": 0.7026962637901306, "learning_rate": 3.292785045250667e-05, "loss": 1.7035, "step": 76597 }, { "epoch": 2.55, "grad_norm": 0.7006241083145142, "learning_rate": 3.292308977185977e-05, "loss": 1.6262, "step": 76598 }, { "epoch": 2.55, "grad_norm": 0.6887252926826477, "learning_rate": 3.291832941540497e-05, "loss": 1.7246, "step": 76599 }, { "epoch": 2.55, "grad_norm": 0.6882050037384033, "learning_rate": 3.291356938314795e-05, "loss": 1.7411, "step": 76600 }, { "epoch": 2.55, "grad_norm": 0.6904738545417786, "learning_rate": 3.290880967509446e-05, "loss": 1.6879, "step": 76601 }, { "epoch": 2.55, "grad_norm": 0.7095968127250671, "learning_rate": 3.290405029125029e-05, "loss": 1.679, "step": 76602 }, { "epoch": 2.55, "grad_norm": 0.6941787004470825, "learning_rate": 3.289929123162135e-05, "loss": 1.636, "step": 76603 }, { "epoch": 2.55, "grad_norm": 0.6874533295631409, "learning_rate": 3.289453249621329e-05, "loss": 1.6944, "step": 76604 }, { "epoch": 2.55, "grad_norm": 0.7087113857269287, "learning_rate": 3.288977408503187e-05, "loss": 1.6772, "step": 76605 }, { "epoch": 2.55, "grad_norm": 0.6913740038871765, "learning_rate": 3.288501599808293e-05, "loss": 1.6291, "step": 76606 }, { "epoch": 2.55, "grad_norm": 0.681647002696991, "learning_rate": 3.288025823537226e-05, "loss": 1.6758, "step": 76607 }, { "epoch": 2.55, "grad_norm": 0.6997430920600891, "learning_rate": 3.287550079690552e-05, "loss": 1.6217, "step": 76608 }, { "epoch": 2.55, "grad_norm": 0.6867309808731079, "learning_rate": 3.287074368268864e-05, "loss": 1.694, "step": 76609 }, { "epoch": 2.55, "grad_norm": 0.6977072954177856, "learning_rate": 3.2865986892727246e-05, "loss": 1.6354, "step": 76610 }, { "epoch": 2.55, "grad_norm": 0.717681884765625, "learning_rate": 3.286123042702724e-05, "loss": 1.5839, "step": 76611 }, { "epoch": 2.55, "grad_norm": 0.7058249711990356, "learning_rate": 3.2856474285594284e-05, "loss": 1.6382, "step": 76612 }, { "epoch": 2.55, "grad_norm": 0.6977168321609497, "learning_rate": 3.285171846843426e-05, "loss": 1.6295, "step": 76613 }, { "epoch": 2.55, "grad_norm": 0.7048500776290894, "learning_rate": 3.284696297555288e-05, "loss": 1.7164, "step": 76614 }, { "epoch": 2.55, "grad_norm": 0.7102704048156738, "learning_rate": 3.28422078069559e-05, "loss": 1.7763, "step": 76615 }, { "epoch": 2.55, "grad_norm": 0.7124853134155273, "learning_rate": 3.2837452962649144e-05, "loss": 1.6919, "step": 76616 }, { "epoch": 2.55, "grad_norm": 0.7056410312652588, "learning_rate": 3.283269844263832e-05, "loss": 1.7296, "step": 76617 }, { "epoch": 2.55, "grad_norm": 0.720770537853241, "learning_rate": 3.282794424692927e-05, "loss": 1.6247, "step": 76618 }, { "epoch": 2.55, "grad_norm": 0.6942452192306519, "learning_rate": 3.282319037552774e-05, "loss": 1.6363, "step": 76619 }, { "epoch": 2.55, "grad_norm": 0.6829620599746704, "learning_rate": 3.281843682843942e-05, "loss": 1.7192, "step": 76620 }, { "epoch": 2.55, "grad_norm": 0.6965193152427673, "learning_rate": 3.2813683605670205e-05, "loss": 1.6317, "step": 76621 }, { "epoch": 2.55, "grad_norm": 0.7375093698501587, "learning_rate": 3.280893070722579e-05, "loss": 1.6283, "step": 76622 }, { "epoch": 2.55, "grad_norm": 0.7075843214988708, "learning_rate": 3.2804178133111966e-05, "loss": 1.6693, "step": 76623 }, { "epoch": 2.55, "grad_norm": 0.7044051289558411, "learning_rate": 3.2799425883334474e-05, "loss": 1.666, "step": 76624 }, { "epoch": 2.55, "grad_norm": 0.6868316531181335, "learning_rate": 3.27946739578992e-05, "loss": 1.7453, "step": 76625 }, { "epoch": 2.55, "grad_norm": 0.7051461338996887, "learning_rate": 3.278992235681177e-05, "loss": 1.6824, "step": 76626 }, { "epoch": 2.55, "grad_norm": 0.7106118202209473, "learning_rate": 3.278517108007795e-05, "loss": 1.6066, "step": 76627 }, { "epoch": 2.55, "grad_norm": 0.7147907018661499, "learning_rate": 3.2780420127703644e-05, "loss": 1.6715, "step": 76628 }, { "epoch": 2.55, "grad_norm": 0.6998029351234436, "learning_rate": 3.277566949969453e-05, "loss": 1.7262, "step": 76629 }, { "epoch": 2.55, "grad_norm": 0.6982524991035461, "learning_rate": 3.2770919196056286e-05, "loss": 1.651, "step": 76630 }, { "epoch": 2.55, "grad_norm": 0.6623503565788269, "learning_rate": 3.27661692167948e-05, "loss": 1.6315, "step": 76631 }, { "epoch": 2.55, "grad_norm": 0.6926848888397217, "learning_rate": 3.276141956191587e-05, "loss": 1.6806, "step": 76632 }, { "epoch": 2.55, "grad_norm": 0.6963782906532288, "learning_rate": 3.275667023142522e-05, "loss": 1.649, "step": 76633 }, { "epoch": 2.55, "grad_norm": 0.7079330682754517, "learning_rate": 3.2751921225328516e-05, "loss": 1.6045, "step": 76634 }, { "epoch": 2.55, "grad_norm": 0.7027833461761475, "learning_rate": 3.274717254363165e-05, "loss": 1.6558, "step": 76635 }, { "epoch": 2.55, "grad_norm": 0.7300971150398254, "learning_rate": 3.2742424186340356e-05, "loss": 1.6676, "step": 76636 }, { "epoch": 2.55, "grad_norm": 0.713483989238739, "learning_rate": 3.273767615346029e-05, "loss": 1.6581, "step": 76637 }, { "epoch": 2.55, "grad_norm": 0.7052889466285706, "learning_rate": 3.2732928444997354e-05, "loss": 1.6489, "step": 76638 }, { "epoch": 2.55, "grad_norm": 0.6938468217849731, "learning_rate": 3.272818106095727e-05, "loss": 1.5948, "step": 76639 }, { "epoch": 2.55, "grad_norm": 0.7024007439613342, "learning_rate": 3.272343400134584e-05, "loss": 1.6526, "step": 76640 }, { "epoch": 2.55, "grad_norm": 0.7130590081214905, "learning_rate": 3.2718687266168686e-05, "loss": 1.769, "step": 76641 }, { "epoch": 2.55, "grad_norm": 0.697858989238739, "learning_rate": 3.271394085543174e-05, "loss": 1.7347, "step": 76642 }, { "epoch": 2.55, "grad_norm": 0.6781439781188965, "learning_rate": 3.2709194769140704e-05, "loss": 1.6652, "step": 76643 }, { "epoch": 2.55, "grad_norm": 0.705687940120697, "learning_rate": 3.2704449007301226e-05, "loss": 1.667, "step": 76644 }, { "epoch": 2.55, "grad_norm": 0.7366330623626709, "learning_rate": 3.2699703569919176e-05, "loss": 1.6486, "step": 76645 }, { "epoch": 2.55, "grad_norm": 0.6925458908081055, "learning_rate": 3.269495845700039e-05, "loss": 1.721, "step": 76646 }, { "epoch": 2.55, "grad_norm": 0.6834039688110352, "learning_rate": 3.269021366855051e-05, "loss": 1.656, "step": 76647 }, { "epoch": 2.55, "grad_norm": 0.6938654184341431, "learning_rate": 3.2685469204575254e-05, "loss": 1.6415, "step": 76648 }, { "epoch": 2.55, "grad_norm": 0.7003542184829712, "learning_rate": 3.268072506508054e-05, "loss": 1.6965, "step": 76649 }, { "epoch": 2.55, "grad_norm": 0.6679548025131226, "learning_rate": 3.267598125007202e-05, "loss": 1.6659, "step": 76650 }, { "epoch": 2.55, "grad_norm": 0.7012444138526917, "learning_rate": 3.267123775955541e-05, "loss": 1.7511, "step": 76651 }, { "epoch": 2.55, "grad_norm": 0.7035627365112305, "learning_rate": 3.266649459353653e-05, "loss": 1.6328, "step": 76652 }, { "epoch": 2.55, "grad_norm": 0.678843080997467, "learning_rate": 3.2661751752021206e-05, "loss": 1.7295, "step": 76653 }, { "epoch": 2.55, "grad_norm": 0.7153681516647339, "learning_rate": 3.265700923501513e-05, "loss": 1.7093, "step": 76654 }, { "epoch": 2.55, "grad_norm": 0.6970043182373047, "learning_rate": 3.265226704252397e-05, "loss": 1.7212, "step": 76655 }, { "epoch": 2.55, "grad_norm": 0.6845540404319763, "learning_rate": 3.2647525174553626e-05, "loss": 1.7298, "step": 76656 }, { "epoch": 2.55, "grad_norm": 0.7175313830375671, "learning_rate": 3.2642783631109814e-05, "loss": 1.6966, "step": 76657 }, { "epoch": 2.55, "grad_norm": 0.7082796692848206, "learning_rate": 3.2638042412198206e-05, "loss": 1.7148, "step": 76658 }, { "epoch": 2.55, "grad_norm": 0.7032382488250732, "learning_rate": 3.263330151782466e-05, "loss": 1.6598, "step": 76659 }, { "epoch": 2.55, "grad_norm": 0.7064346671104431, "learning_rate": 3.2628560947994875e-05, "loss": 1.6107, "step": 76660 }, { "epoch": 2.55, "grad_norm": 0.6857024431228638, "learning_rate": 3.262382070271464e-05, "loss": 1.6587, "step": 76661 }, { "epoch": 2.55, "grad_norm": 0.712102472782135, "learning_rate": 3.261908078198966e-05, "loss": 1.5948, "step": 76662 }, { "epoch": 2.55, "grad_norm": 0.7074213624000549, "learning_rate": 3.2614341185825757e-05, "loss": 1.7091, "step": 76663 }, { "epoch": 2.55, "grad_norm": 0.6746529340744019, "learning_rate": 3.260960191422866e-05, "loss": 1.6601, "step": 76664 }, { "epoch": 2.55, "grad_norm": 0.7008141875267029, "learning_rate": 3.2604862967204025e-05, "loss": 1.6946, "step": 76665 }, { "epoch": 2.55, "grad_norm": 0.699608564376831, "learning_rate": 3.260012434475776e-05, "loss": 1.6958, "step": 76666 }, { "epoch": 2.55, "grad_norm": 0.698305606842041, "learning_rate": 3.259538604689549e-05, "loss": 1.6462, "step": 76667 }, { "epoch": 2.55, "grad_norm": 0.6979452967643738, "learning_rate": 3.259064807362307e-05, "loss": 1.735, "step": 76668 }, { "epoch": 2.55, "grad_norm": 0.7066366672515869, "learning_rate": 3.2585910424946235e-05, "loss": 1.6836, "step": 76669 }, { "epoch": 2.55, "grad_norm": 0.7151865363121033, "learning_rate": 3.258117310087062e-05, "loss": 1.7404, "step": 76670 }, { "epoch": 2.55, "grad_norm": 0.684877336025238, "learning_rate": 3.25764361014021e-05, "loss": 1.6765, "step": 76671 }, { "epoch": 2.55, "grad_norm": 0.6973206996917725, "learning_rate": 3.257169942654636e-05, "loss": 1.6637, "step": 76672 }, { "epoch": 2.55, "grad_norm": 0.6837280988693237, "learning_rate": 3.256696307630922e-05, "loss": 1.7187, "step": 76673 }, { "epoch": 2.55, "grad_norm": 0.7132902145385742, "learning_rate": 3.2562227050696296e-05, "loss": 1.6626, "step": 76674 }, { "epoch": 2.55, "grad_norm": 0.7031249403953552, "learning_rate": 3.255749134971351e-05, "loss": 1.7644, "step": 76675 }, { "epoch": 2.55, "grad_norm": 0.7130885124206543, "learning_rate": 3.255275597336653e-05, "loss": 1.701, "step": 76676 }, { "epoch": 2.55, "grad_norm": 0.7177408933639526, "learning_rate": 3.2548020921661e-05, "loss": 1.6996, "step": 76677 }, { "epoch": 2.55, "grad_norm": 0.7029979825019836, "learning_rate": 3.254328619460288e-05, "loss": 1.725, "step": 76678 }, { "epoch": 2.55, "grad_norm": 0.729185163974762, "learning_rate": 3.253855179219776e-05, "loss": 1.6453, "step": 76679 }, { "epoch": 2.55, "grad_norm": 0.7064552307128906, "learning_rate": 3.253381771445138e-05, "loss": 1.6986, "step": 76680 }, { "epoch": 2.55, "grad_norm": 0.7005037665367126, "learning_rate": 3.252908396136953e-05, "loss": 1.7149, "step": 76681 }, { "epoch": 2.55, "grad_norm": 0.7005801200866699, "learning_rate": 3.252435053295803e-05, "loss": 1.7086, "step": 76682 }, { "epoch": 2.55, "grad_norm": 0.6717658042907715, "learning_rate": 3.251961742922259e-05, "loss": 1.6959, "step": 76683 }, { "epoch": 2.55, "grad_norm": 0.6959806084632874, "learning_rate": 3.251488465016883e-05, "loss": 1.641, "step": 76684 }, { "epoch": 2.55, "grad_norm": 0.6821348667144775, "learning_rate": 3.2510152195802645e-05, "loss": 1.614, "step": 76685 }, { "epoch": 2.55, "grad_norm": 0.7155463695526123, "learning_rate": 3.250542006612973e-05, "loss": 1.6949, "step": 76686 }, { "epoch": 2.55, "grad_norm": 0.6998069286346436, "learning_rate": 3.250068826115575e-05, "loss": 1.6693, "step": 76687 }, { "epoch": 2.55, "grad_norm": 0.7093717455863953, "learning_rate": 3.249595678088653e-05, "loss": 1.7005, "step": 76688 }, { "epoch": 2.55, "grad_norm": 0.7091019153594971, "learning_rate": 3.249122562532791e-05, "loss": 1.713, "step": 76689 }, { "epoch": 2.55, "grad_norm": 0.7131403088569641, "learning_rate": 3.248649479448544e-05, "loss": 1.7163, "step": 76690 }, { "epoch": 2.55, "grad_norm": 0.7104920148849487, "learning_rate": 3.248176428836492e-05, "loss": 1.7081, "step": 76691 }, { "epoch": 2.55, "grad_norm": 0.6973156332969666, "learning_rate": 3.247703410697218e-05, "loss": 1.6654, "step": 76692 }, { "epoch": 2.55, "grad_norm": 0.7032734155654907, "learning_rate": 3.247230425031291e-05, "loss": 1.6513, "step": 76693 }, { "epoch": 2.55, "grad_norm": 0.6801222562789917, "learning_rate": 3.246757471839281e-05, "loss": 1.643, "step": 76694 }, { "epoch": 2.55, "grad_norm": 0.6824302077293396, "learning_rate": 3.246284551121764e-05, "loss": 1.6011, "step": 76695 }, { "epoch": 2.55, "grad_norm": 0.687453031539917, "learning_rate": 3.2458116628793266e-05, "loss": 1.6989, "step": 76696 }, { "epoch": 2.55, "grad_norm": 0.7130586504936218, "learning_rate": 3.245338807112518e-05, "loss": 1.7335, "step": 76697 }, { "epoch": 2.55, "grad_norm": 0.6861822009086609, "learning_rate": 3.244865983821927e-05, "loss": 1.7181, "step": 76698 }, { "epoch": 2.55, "grad_norm": 0.7204962372779846, "learning_rate": 3.2443931930081344e-05, "loss": 1.6929, "step": 76699 }, { "epoch": 2.55, "grad_norm": 0.7274846434593201, "learning_rate": 3.2439204346717064e-05, "loss": 1.594, "step": 76700 }, { "epoch": 2.55, "grad_norm": 0.7108488082885742, "learning_rate": 3.2434477088132084e-05, "loss": 1.5883, "step": 76701 }, { "epoch": 2.55, "grad_norm": 0.7362909317016602, "learning_rate": 3.242975015433224e-05, "loss": 1.7415, "step": 76702 }, { "epoch": 2.55, "grad_norm": 0.7043060660362244, "learning_rate": 3.242502354532338e-05, "loss": 1.759, "step": 76703 }, { "epoch": 2.55, "grad_norm": 0.6925826072692871, "learning_rate": 3.242029726111098e-05, "loss": 1.6835, "step": 76704 }, { "epoch": 2.55, "grad_norm": 0.6936995387077332, "learning_rate": 3.241557130170094e-05, "loss": 1.6676, "step": 76705 }, { "epoch": 2.55, "grad_norm": 0.7273083925247192, "learning_rate": 3.241084566709901e-05, "loss": 1.713, "step": 76706 }, { "epoch": 2.55, "grad_norm": 0.7172331213951111, "learning_rate": 3.2406120357310884e-05, "loss": 1.6173, "step": 76707 }, { "epoch": 2.55, "grad_norm": 0.706425130367279, "learning_rate": 3.240139537234223e-05, "loss": 1.635, "step": 76708 }, { "epoch": 2.55, "grad_norm": 0.70875084400177, "learning_rate": 3.239667071219888e-05, "loss": 1.645, "step": 76709 }, { "epoch": 2.55, "grad_norm": 0.691254734992981, "learning_rate": 3.239194637688662e-05, "loss": 1.6032, "step": 76710 }, { "epoch": 2.55, "grad_norm": 0.6957784295082092, "learning_rate": 3.2387222366411024e-05, "loss": 1.6479, "step": 76711 }, { "epoch": 2.55, "grad_norm": 0.6962591409683228, "learning_rate": 3.2382498680777915e-05, "loss": 1.6871, "step": 76712 }, { "epoch": 2.55, "grad_norm": 0.726127028465271, "learning_rate": 3.237777531999305e-05, "loss": 1.7361, "step": 76713 }, { "epoch": 2.55, "grad_norm": 0.6807413697242737, "learning_rate": 3.237305228406216e-05, "loss": 1.6412, "step": 76714 }, { "epoch": 2.55, "grad_norm": 0.6900265216827393, "learning_rate": 3.236832957299088e-05, "loss": 1.6976, "step": 76715 }, { "epoch": 2.55, "grad_norm": 0.7076967358589172, "learning_rate": 3.2363607186785065e-05, "loss": 1.6821, "step": 76716 }, { "epoch": 2.55, "grad_norm": 0.6830500960350037, "learning_rate": 3.2358885125450416e-05, "loss": 1.7012, "step": 76717 }, { "epoch": 2.55, "grad_norm": 0.7197266817092896, "learning_rate": 3.235416338899256e-05, "loss": 1.7335, "step": 76718 }, { "epoch": 2.55, "grad_norm": 0.7273037433624268, "learning_rate": 3.234944197741736e-05, "loss": 1.6045, "step": 76719 }, { "epoch": 2.55, "grad_norm": 0.6923882961273193, "learning_rate": 3.23447208907305e-05, "loss": 1.6751, "step": 76720 }, { "epoch": 2.55, "grad_norm": 0.6930170059204102, "learning_rate": 3.234000012893776e-05, "loss": 1.6952, "step": 76721 }, { "epoch": 2.55, "grad_norm": 0.6934273838996887, "learning_rate": 3.2335279692044727e-05, "loss": 1.6581, "step": 76722 }, { "epoch": 2.55, "grad_norm": 0.7072911858558655, "learning_rate": 3.233055958005732e-05, "loss": 1.7253, "step": 76723 }, { "epoch": 2.55, "grad_norm": 0.6909856796264648, "learning_rate": 3.2325839792981154e-05, "loss": 1.7344, "step": 76724 }, { "epoch": 2.55, "grad_norm": 0.6918980479240417, "learning_rate": 3.232112033082192e-05, "loss": 1.6773, "step": 76725 }, { "epoch": 2.55, "grad_norm": 0.7050656080245972, "learning_rate": 3.231640119358546e-05, "loss": 1.6888, "step": 76726 }, { "epoch": 2.55, "grad_norm": 0.7016475796699524, "learning_rate": 3.2311682381277395e-05, "loss": 1.7411, "step": 76727 }, { "epoch": 2.55, "grad_norm": 0.7023873925209045, "learning_rate": 3.230696389390357e-05, "loss": 1.6616, "step": 76728 }, { "epoch": 2.55, "grad_norm": 0.6909922957420349, "learning_rate": 3.2302245731469586e-05, "loss": 1.6541, "step": 76729 }, { "epoch": 2.55, "grad_norm": 0.6913545727729797, "learning_rate": 3.229752789398128e-05, "loss": 1.6587, "step": 76730 }, { "epoch": 2.55, "grad_norm": 0.6905694603919983, "learning_rate": 3.2292810381444365e-05, "loss": 1.6015, "step": 76731 }, { "epoch": 2.55, "grad_norm": 0.6969485282897949, "learning_rate": 3.228809319386445e-05, "loss": 1.7045, "step": 76732 }, { "epoch": 2.55, "grad_norm": 0.6835605502128601, "learning_rate": 3.228337633124739e-05, "loss": 1.7174, "step": 76733 }, { "epoch": 2.55, "grad_norm": 0.6826178431510925, "learning_rate": 3.227865979359882e-05, "loss": 1.6284, "step": 76734 }, { "epoch": 2.55, "grad_norm": 0.7065618634223938, "learning_rate": 3.227394358092459e-05, "loss": 1.7049, "step": 76735 }, { "epoch": 2.55, "grad_norm": 0.7230693101882935, "learning_rate": 3.2269227693230304e-05, "loss": 1.6738, "step": 76736 }, { "epoch": 2.55, "grad_norm": 0.7046542763710022, "learning_rate": 3.2264512130521694e-05, "loss": 1.7354, "step": 76737 }, { "epoch": 2.55, "grad_norm": 0.7223771810531616, "learning_rate": 3.2259796892804576e-05, "loss": 1.731, "step": 76738 }, { "epoch": 2.55, "grad_norm": 0.7093362808227539, "learning_rate": 3.225508198008453e-05, "loss": 1.6527, "step": 76739 }, { "epoch": 2.55, "grad_norm": 0.7205063700675964, "learning_rate": 3.225036739236747e-05, "loss": 1.7486, "step": 76740 }, { "epoch": 2.55, "grad_norm": 0.7230136394500732, "learning_rate": 3.22456531296589e-05, "loss": 1.5915, "step": 76741 }, { "epoch": 2.55, "grad_norm": 0.6823933720588684, "learning_rate": 3.224093919196474e-05, "loss": 1.6879, "step": 76742 }, { "epoch": 2.55, "grad_norm": 0.6907026767730713, "learning_rate": 3.2236225579290633e-05, "loss": 1.6766, "step": 76743 }, { "epoch": 2.55, "grad_norm": 0.6830536723136902, "learning_rate": 3.223151229164222e-05, "loss": 1.6551, "step": 76744 }, { "epoch": 2.55, "grad_norm": 0.6923879981040955, "learning_rate": 3.2226799329025386e-05, "loss": 1.6563, "step": 76745 }, { "epoch": 2.55, "grad_norm": 0.6802946925163269, "learning_rate": 3.222208669144575e-05, "loss": 1.6902, "step": 76746 }, { "epoch": 2.55, "grad_norm": 0.707815945148468, "learning_rate": 3.2217374378908964e-05, "loss": 1.694, "step": 76747 }, { "epoch": 2.55, "grad_norm": 0.7001131176948547, "learning_rate": 3.221266239142087e-05, "loss": 1.7153, "step": 76748 }, { "epoch": 2.55, "grad_norm": 0.6882669925689697, "learning_rate": 3.220795072898717e-05, "loss": 1.6633, "step": 76749 }, { "epoch": 2.55, "grad_norm": 0.7068408727645874, "learning_rate": 3.2203239391613564e-05, "loss": 1.7163, "step": 76750 }, { "epoch": 2.55, "grad_norm": 0.6828204989433289, "learning_rate": 3.219852837930574e-05, "loss": 1.6204, "step": 76751 }, { "epoch": 2.55, "grad_norm": 0.7216894030570984, "learning_rate": 3.219381769206949e-05, "loss": 1.6829, "step": 76752 }, { "epoch": 2.55, "grad_norm": 0.7027077078819275, "learning_rate": 3.218910732991047e-05, "loss": 1.6772, "step": 76753 }, { "epoch": 2.55, "grad_norm": 0.6837497353553772, "learning_rate": 3.218439729283434e-05, "loss": 1.6591, "step": 76754 }, { "epoch": 2.55, "grad_norm": 0.6982113718986511, "learning_rate": 3.217968758084694e-05, "loss": 1.647, "step": 76755 }, { "epoch": 2.55, "grad_norm": 0.6997371912002563, "learning_rate": 3.2174978193953946e-05, "loss": 1.6806, "step": 76756 }, { "epoch": 2.55, "grad_norm": 0.7283768653869629, "learning_rate": 3.217026913216111e-05, "loss": 1.7306, "step": 76757 }, { "epoch": 2.55, "grad_norm": 0.6928653120994568, "learning_rate": 3.216556039547401e-05, "loss": 1.8216, "step": 76758 }, { "epoch": 2.55, "grad_norm": 0.7091997265815735, "learning_rate": 3.216085198389848e-05, "loss": 1.6654, "step": 76759 }, { "epoch": 2.55, "grad_norm": 0.7011334896087646, "learning_rate": 3.2156143897440325e-05, "loss": 1.7019, "step": 76760 }, { "epoch": 2.55, "grad_norm": 0.7215645909309387, "learning_rate": 3.2151436136105025e-05, "loss": 1.7801, "step": 76761 }, { "epoch": 2.55, "grad_norm": 0.6748011112213135, "learning_rate": 3.2146728699898417e-05, "loss": 1.7177, "step": 76762 }, { "epoch": 2.55, "grad_norm": 0.7081114649772644, "learning_rate": 3.214202158882626e-05, "loss": 1.7227, "step": 76763 }, { "epoch": 2.55, "grad_norm": 0.716212272644043, "learning_rate": 3.213731480289422e-05, "loss": 1.651, "step": 76764 }, { "epoch": 2.55, "grad_norm": 0.7103855609893799, "learning_rate": 3.213260834210799e-05, "loss": 1.7066, "step": 76765 }, { "epoch": 2.55, "grad_norm": 0.7012420296669006, "learning_rate": 3.2127902206473334e-05, "loss": 1.6419, "step": 76766 }, { "epoch": 2.55, "grad_norm": 0.721293032169342, "learning_rate": 3.212319639599594e-05, "loss": 1.7221, "step": 76767 }, { "epoch": 2.55, "grad_norm": 0.6863323450088501, "learning_rate": 3.211849091068145e-05, "loss": 1.6901, "step": 76768 }, { "epoch": 2.55, "grad_norm": 0.742611825466156, "learning_rate": 3.211378575053565e-05, "loss": 1.7973, "step": 76769 }, { "epoch": 2.55, "grad_norm": 0.7331673502922058, "learning_rate": 3.21090809155643e-05, "loss": 1.6697, "step": 76770 }, { "epoch": 2.55, "grad_norm": 0.7484248876571655, "learning_rate": 3.210437640577307e-05, "loss": 1.6888, "step": 76771 }, { "epoch": 2.55, "grad_norm": 0.6959711909294128, "learning_rate": 3.2099672221167584e-05, "loss": 1.7211, "step": 76772 }, { "epoch": 2.55, "grad_norm": 0.7328292727470398, "learning_rate": 3.209496836175367e-05, "loss": 1.7143, "step": 76773 }, { "epoch": 2.55, "grad_norm": 0.7092140316963196, "learning_rate": 3.2090264827536995e-05, "loss": 1.6098, "step": 76774 }, { "epoch": 2.55, "grad_norm": 0.6949377059936523, "learning_rate": 3.2085561618523214e-05, "loss": 1.7279, "step": 76775 }, { "epoch": 2.55, "grad_norm": 0.7146768569946289, "learning_rate": 3.208085873471813e-05, "loss": 1.6164, "step": 76776 }, { "epoch": 2.55, "grad_norm": 0.7021954655647278, "learning_rate": 3.2076156176127365e-05, "loss": 1.657, "step": 76777 }, { "epoch": 2.55, "grad_norm": 0.7168882489204407, "learning_rate": 3.207145394275672e-05, "loss": 1.6697, "step": 76778 }, { "epoch": 2.55, "grad_norm": 0.7228842973709106, "learning_rate": 3.206675203461182e-05, "loss": 1.6728, "step": 76779 }, { "epoch": 2.55, "grad_norm": 0.7157759070396423, "learning_rate": 3.206205045169843e-05, "loss": 1.7482, "step": 76780 }, { "epoch": 2.55, "grad_norm": 0.6899657845497131, "learning_rate": 3.2057349194022244e-05, "loss": 1.6753, "step": 76781 }, { "epoch": 2.55, "grad_norm": 0.681603729724884, "learning_rate": 3.2052648261588884e-05, "loss": 1.7332, "step": 76782 }, { "epoch": 2.55, "grad_norm": 0.7034363746643066, "learning_rate": 3.2047947654404194e-05, "loss": 1.6585, "step": 76783 }, { "epoch": 2.55, "grad_norm": 0.7085369825363159, "learning_rate": 3.204324737247376e-05, "loss": 1.7176, "step": 76784 }, { "epoch": 2.55, "grad_norm": 0.7220296859741211, "learning_rate": 3.203854741580341e-05, "loss": 1.7251, "step": 76785 }, { "epoch": 2.55, "grad_norm": 0.7070183753967285, "learning_rate": 3.203384778439877e-05, "loss": 1.6351, "step": 76786 }, { "epoch": 2.55, "grad_norm": 0.6996433734893799, "learning_rate": 3.202914847826551e-05, "loss": 1.6376, "step": 76787 }, { "epoch": 2.55, "grad_norm": 0.6999461054801941, "learning_rate": 3.2024449497409424e-05, "loss": 1.6256, "step": 76788 }, { "epoch": 2.55, "grad_norm": 0.7036805152893066, "learning_rate": 3.2019750841836134e-05, "loss": 1.6352, "step": 76789 }, { "epoch": 2.55, "grad_norm": 0.686052143573761, "learning_rate": 3.2015052511551406e-05, "loss": 1.7085, "step": 76790 }, { "epoch": 2.55, "grad_norm": 0.6969305276870728, "learning_rate": 3.201035450656087e-05, "loss": 1.6673, "step": 76791 }, { "epoch": 2.55, "grad_norm": 0.760486364364624, "learning_rate": 3.200565682687036e-05, "loss": 1.6584, "step": 76792 }, { "epoch": 2.55, "grad_norm": 0.6985951066017151, "learning_rate": 3.200095947248546e-05, "loss": 1.6637, "step": 76793 }, { "epoch": 2.55, "grad_norm": 0.712039053440094, "learning_rate": 3.199626244341188e-05, "loss": 1.6395, "step": 76794 }, { "epoch": 2.55, "grad_norm": 0.6848275661468506, "learning_rate": 3.19915657396554e-05, "loss": 1.6577, "step": 76795 }, { "epoch": 2.56, "grad_norm": 0.7322115302085876, "learning_rate": 3.198686936122166e-05, "loss": 1.6698, "step": 76796 }, { "epoch": 2.56, "grad_norm": 0.7018176317214966, "learning_rate": 3.198217330811631e-05, "loss": 1.6585, "step": 76797 }, { "epoch": 2.56, "grad_norm": 0.7064719200134277, "learning_rate": 3.1977477580345126e-05, "loss": 1.6798, "step": 76798 }, { "epoch": 2.56, "grad_norm": 0.7444115281105042, "learning_rate": 3.197278217791386e-05, "loss": 1.6921, "step": 76799 }, { "epoch": 2.56, "grad_norm": 0.7110456824302673, "learning_rate": 3.196808710082811e-05, "loss": 1.6116, "step": 76800 }, { "epoch": 2.56, "grad_norm": 0.7280962467193604, "learning_rate": 3.1963392349093575e-05, "loss": 1.7084, "step": 76801 }, { "epoch": 2.56, "grad_norm": 0.668988823890686, "learning_rate": 3.195869792271605e-05, "loss": 1.6492, "step": 76802 }, { "epoch": 2.56, "grad_norm": 0.7313008904457092, "learning_rate": 3.195400382170116e-05, "loss": 1.7222, "step": 76803 }, { "epoch": 2.56, "grad_norm": 0.6885697245597839, "learning_rate": 3.194931004605457e-05, "loss": 1.7067, "step": 76804 }, { "epoch": 2.56, "grad_norm": 0.7306437492370605, "learning_rate": 3.1944616595782005e-05, "loss": 1.6765, "step": 76805 }, { "epoch": 2.56, "grad_norm": 0.697063684463501, "learning_rate": 3.19399234708893e-05, "loss": 1.6794, "step": 76806 }, { "epoch": 2.56, "grad_norm": 0.7093300223350525, "learning_rate": 3.19352306713819e-05, "loss": 1.7346, "step": 76807 }, { "epoch": 2.56, "grad_norm": 2.5114917755126953, "learning_rate": 3.1930538197265665e-05, "loss": 1.702, "step": 76808 }, { "epoch": 2.56, "grad_norm": 0.7158169746398926, "learning_rate": 3.19258460485463e-05, "loss": 1.7212, "step": 76809 }, { "epoch": 2.56, "grad_norm": 0.7170513868331909, "learning_rate": 3.192115422522947e-05, "loss": 1.7458, "step": 76810 }, { "epoch": 2.56, "grad_norm": 0.7024266719818115, "learning_rate": 3.191646272732078e-05, "loss": 1.7104, "step": 76811 }, { "epoch": 2.56, "grad_norm": 0.7316475510597229, "learning_rate": 3.191177155482602e-05, "loss": 1.6348, "step": 76812 }, { "epoch": 2.56, "grad_norm": 0.751573920249939, "learning_rate": 3.190708070775097e-05, "loss": 1.7157, "step": 76813 }, { "epoch": 2.56, "grad_norm": 0.7026650905609131, "learning_rate": 3.190239018610112e-05, "loss": 1.6471, "step": 76814 }, { "epoch": 2.56, "grad_norm": 0.7182216644287109, "learning_rate": 3.1897699989882284e-05, "loss": 1.6099, "step": 76815 }, { "epoch": 2.56, "grad_norm": 0.7170194983482361, "learning_rate": 3.189301011910016e-05, "loss": 1.6738, "step": 76816 }, { "epoch": 2.56, "grad_norm": 0.7001428008079529, "learning_rate": 3.188832057376044e-05, "loss": 1.738, "step": 76817 }, { "epoch": 2.56, "grad_norm": 0.6683592200279236, "learning_rate": 3.188363135386872e-05, "loss": 1.6492, "step": 76818 }, { "epoch": 2.56, "grad_norm": 0.7200010418891907, "learning_rate": 3.1878942459430764e-05, "loss": 1.6625, "step": 76819 }, { "epoch": 2.56, "grad_norm": 0.7105434536933899, "learning_rate": 3.1874253890452404e-05, "loss": 1.7769, "step": 76820 }, { "epoch": 2.56, "grad_norm": 0.7062740325927734, "learning_rate": 3.186956564693907e-05, "loss": 1.6474, "step": 76821 }, { "epoch": 2.56, "grad_norm": 0.6944386959075928, "learning_rate": 3.1864877728896575e-05, "loss": 1.7293, "step": 76822 }, { "epoch": 2.56, "grad_norm": 0.6946623921394348, "learning_rate": 3.186019013633067e-05, "loss": 1.7161, "step": 76823 }, { "epoch": 2.56, "grad_norm": 0.7207026481628418, "learning_rate": 3.185550286924696e-05, "loss": 1.731, "step": 76824 }, { "epoch": 2.56, "grad_norm": 0.6921250224113464, "learning_rate": 3.185081592765113e-05, "loss": 1.662, "step": 76825 }, { "epoch": 2.56, "grad_norm": 0.6953170299530029, "learning_rate": 3.1846129311548895e-05, "loss": 1.6758, "step": 76826 }, { "epoch": 2.56, "grad_norm": 0.7309091687202454, "learning_rate": 3.1841443020946055e-05, "loss": 1.6868, "step": 76827 }, { "epoch": 2.56, "grad_norm": 0.7273789644241333, "learning_rate": 3.18367570558481e-05, "loss": 1.6569, "step": 76828 }, { "epoch": 2.56, "grad_norm": 0.7140655517578125, "learning_rate": 3.183207141626077e-05, "loss": 1.6648, "step": 76829 }, { "epoch": 2.56, "grad_norm": 0.69846510887146, "learning_rate": 3.182738610218989e-05, "loss": 1.6624, "step": 76830 }, { "epoch": 2.56, "grad_norm": 0.7050057053565979, "learning_rate": 3.182270111364105e-05, "loss": 1.6274, "step": 76831 }, { "epoch": 2.56, "grad_norm": 0.7116304039955139, "learning_rate": 3.1818016450619844e-05, "loss": 1.7053, "step": 76832 }, { "epoch": 2.56, "grad_norm": 0.7313368916511536, "learning_rate": 3.1813332113132136e-05, "loss": 1.7517, "step": 76833 }, { "epoch": 2.56, "grad_norm": 0.7026599645614624, "learning_rate": 3.180864810118353e-05, "loss": 1.6545, "step": 76834 }, { "epoch": 2.56, "grad_norm": 0.7013322710990906, "learning_rate": 3.180396441477962e-05, "loss": 1.7512, "step": 76835 }, { "epoch": 2.56, "grad_norm": 0.6925927400588989, "learning_rate": 3.179928105392622e-05, "loss": 1.7096, "step": 76836 }, { "epoch": 2.56, "grad_norm": 0.7062948942184448, "learning_rate": 3.1794598018629e-05, "loss": 1.6447, "step": 76837 }, { "epoch": 2.56, "grad_norm": 0.6973154544830322, "learning_rate": 3.178991530889363e-05, "loss": 1.6835, "step": 76838 }, { "epoch": 2.56, "grad_norm": 0.6957277059555054, "learning_rate": 3.178523292472573e-05, "loss": 1.5968, "step": 76839 }, { "epoch": 2.56, "grad_norm": 0.7062481641769409, "learning_rate": 3.17805508661311e-05, "loss": 1.7076, "step": 76840 }, { "epoch": 2.56, "grad_norm": 0.6910509467124939, "learning_rate": 3.177586913311534e-05, "loss": 1.6544, "step": 76841 }, { "epoch": 2.56, "grad_norm": 0.7006826996803284, "learning_rate": 3.177118772568413e-05, "loss": 1.6563, "step": 76842 }, { "epoch": 2.56, "grad_norm": 0.690153956413269, "learning_rate": 3.176650664384321e-05, "loss": 1.6426, "step": 76843 }, { "epoch": 2.56, "grad_norm": 0.6826822757720947, "learning_rate": 3.176182588759818e-05, "loss": 1.6354, "step": 76844 }, { "epoch": 2.56, "grad_norm": 0.6941127777099609, "learning_rate": 3.1757145456954825e-05, "loss": 1.6609, "step": 76845 }, { "epoch": 2.56, "grad_norm": 0.6856755614280701, "learning_rate": 3.175246535191871e-05, "loss": 1.6181, "step": 76846 }, { "epoch": 2.56, "grad_norm": 0.7052512168884277, "learning_rate": 3.174778557249563e-05, "loss": 1.6792, "step": 76847 }, { "epoch": 2.56, "grad_norm": 0.6947759985923767, "learning_rate": 3.174310611869125e-05, "loss": 1.6317, "step": 76848 }, { "epoch": 2.56, "grad_norm": 0.7023921608924866, "learning_rate": 3.17384269905111e-05, "loss": 1.6903, "step": 76849 }, { "epoch": 2.56, "grad_norm": 0.6872164607048035, "learning_rate": 3.173374818796107e-05, "loss": 1.6749, "step": 76850 }, { "epoch": 2.56, "grad_norm": 0.678535521030426, "learning_rate": 3.1729069711046696e-05, "loss": 1.6706, "step": 76851 }, { "epoch": 2.56, "grad_norm": 0.682769775390625, "learning_rate": 3.172439155977373e-05, "loss": 1.6768, "step": 76852 }, { "epoch": 2.56, "grad_norm": 0.7011903524398804, "learning_rate": 3.1719713734147846e-05, "loss": 1.7022, "step": 76853 }, { "epoch": 2.56, "grad_norm": 0.6795001029968262, "learning_rate": 3.171503623417459e-05, "loss": 1.7403, "step": 76854 }, { "epoch": 2.56, "grad_norm": 0.7043069005012512, "learning_rate": 3.171035905985988e-05, "loss": 1.6712, "step": 76855 }, { "epoch": 2.56, "grad_norm": 0.7055377960205078, "learning_rate": 3.170568221120916e-05, "loss": 1.6735, "step": 76856 }, { "epoch": 2.56, "grad_norm": 0.6898350715637207, "learning_rate": 3.17010056882283e-05, "loss": 1.6265, "step": 76857 }, { "epoch": 2.56, "grad_norm": 0.7007701992988586, "learning_rate": 3.169632949092279e-05, "loss": 1.6488, "step": 76858 }, { "epoch": 2.56, "grad_norm": 0.686384916305542, "learning_rate": 3.1691653619298495e-05, "loss": 1.7014, "step": 76859 }, { "epoch": 2.56, "grad_norm": 0.6975535750389099, "learning_rate": 3.168697807336098e-05, "loss": 1.7076, "step": 76860 }, { "epoch": 2.56, "grad_norm": 0.6981995701789856, "learning_rate": 3.16823028531159e-05, "loss": 1.6027, "step": 76861 }, { "epoch": 2.56, "grad_norm": 0.7006385326385498, "learning_rate": 3.1677627958568996e-05, "loss": 1.7022, "step": 76862 }, { "epoch": 2.56, "grad_norm": 0.702220618724823, "learning_rate": 3.1672953389725956e-05, "loss": 1.5914, "step": 76863 }, { "epoch": 2.56, "grad_norm": 0.7284119725227356, "learning_rate": 3.166827914659233e-05, "loss": 1.6779, "step": 76864 }, { "epoch": 2.56, "grad_norm": 0.6834603548049927, "learning_rate": 3.166360522917387e-05, "loss": 1.6688, "step": 76865 }, { "epoch": 2.56, "grad_norm": 0.6975290179252625, "learning_rate": 3.1658931637476326e-05, "loss": 1.6277, "step": 76866 }, { "epoch": 2.56, "grad_norm": 0.6792205572128296, "learning_rate": 3.165425837150533e-05, "loss": 1.6441, "step": 76867 }, { "epoch": 2.56, "grad_norm": 0.6861519813537598, "learning_rate": 3.1649585431266434e-05, "loss": 1.6104, "step": 76868 }, { "epoch": 2.56, "grad_norm": 0.6982797384262085, "learning_rate": 3.164491281676544e-05, "loss": 1.6961, "step": 76869 }, { "epoch": 2.56, "grad_norm": 0.6834116578102112, "learning_rate": 3.164024052800802e-05, "loss": 1.6902, "step": 76870 }, { "epoch": 2.56, "grad_norm": 0.6833656430244446, "learning_rate": 3.1635568564999726e-05, "loss": 1.5943, "step": 76871 }, { "epoch": 2.56, "grad_norm": 0.728094756603241, "learning_rate": 3.163089692774632e-05, "loss": 1.6787, "step": 76872 }, { "epoch": 2.56, "grad_norm": 0.7137684226036072, "learning_rate": 3.162622561625353e-05, "loss": 1.67, "step": 76873 }, { "epoch": 2.56, "grad_norm": 0.7093548774719238, "learning_rate": 3.162155463052696e-05, "loss": 1.7457, "step": 76874 }, { "epoch": 2.56, "grad_norm": 0.7056507468223572, "learning_rate": 3.161688397057219e-05, "loss": 1.6813, "step": 76875 }, { "epoch": 2.56, "grad_norm": 0.7121937870979309, "learning_rate": 3.1612213636395e-05, "loss": 1.7051, "step": 76876 }, { "epoch": 2.56, "grad_norm": 0.6926048994064331, "learning_rate": 3.160754362800114e-05, "loss": 1.6465, "step": 76877 }, { "epoch": 2.56, "grad_norm": 0.7039821743965149, "learning_rate": 3.160287394539608e-05, "loss": 1.6733, "step": 76878 }, { "epoch": 2.56, "grad_norm": 0.7229329347610474, "learning_rate": 3.159820458858557e-05, "loss": 1.6875, "step": 76879 }, { "epoch": 2.56, "grad_norm": 0.7219053506851196, "learning_rate": 3.159353555757535e-05, "loss": 1.6868, "step": 76880 }, { "epoch": 2.56, "grad_norm": 0.7017168998718262, "learning_rate": 3.1588866852371056e-05, "loss": 1.6713, "step": 76881 }, { "epoch": 2.56, "grad_norm": 0.695280909538269, "learning_rate": 3.158419847297826e-05, "loss": 1.6409, "step": 76882 }, { "epoch": 2.56, "grad_norm": 0.6897537112236023, "learning_rate": 3.1579530419402746e-05, "loss": 1.7314, "step": 76883 }, { "epoch": 2.56, "grad_norm": 0.7386922240257263, "learning_rate": 3.1574862691650124e-05, "loss": 1.7568, "step": 76884 }, { "epoch": 2.56, "grad_norm": 0.6973918676376343, "learning_rate": 3.157019528972604e-05, "loss": 1.6799, "step": 76885 }, { "epoch": 2.56, "grad_norm": 0.7066330313682556, "learning_rate": 3.156552821363615e-05, "loss": 1.731, "step": 76886 }, { "epoch": 2.56, "grad_norm": 0.7131381630897522, "learning_rate": 3.1560861463386245e-05, "loss": 1.675, "step": 76887 }, { "epoch": 2.56, "grad_norm": 0.7146182060241699, "learning_rate": 3.1556195038981894e-05, "loss": 1.6892, "step": 76888 }, { "epoch": 2.56, "grad_norm": 0.7152499556541443, "learning_rate": 3.1551528940428685e-05, "loss": 1.668, "step": 76889 }, { "epoch": 2.56, "grad_norm": 0.6998659372329712, "learning_rate": 3.154686316773245e-05, "loss": 1.6526, "step": 76890 }, { "epoch": 2.56, "grad_norm": 0.6944488286972046, "learning_rate": 3.154219772089875e-05, "loss": 1.6263, "step": 76891 }, { "epoch": 2.56, "grad_norm": 0.712102472782135, "learning_rate": 3.153753259993326e-05, "loss": 1.6803, "step": 76892 }, { "epoch": 2.56, "grad_norm": 0.6881595253944397, "learning_rate": 3.1532867804841654e-05, "loss": 1.7397, "step": 76893 }, { "epoch": 2.56, "grad_norm": 0.7110637426376343, "learning_rate": 3.1528203335629544e-05, "loss": 1.6347, "step": 76894 }, { "epoch": 2.56, "grad_norm": 0.716312825679779, "learning_rate": 3.152353919230268e-05, "loss": 1.7059, "step": 76895 }, { "epoch": 2.56, "grad_norm": 0.8303719162940979, "learning_rate": 3.151887537486667e-05, "loss": 1.6259, "step": 76896 }, { "epoch": 2.56, "grad_norm": 0.7153117656707764, "learning_rate": 3.15142118833272e-05, "loss": 1.6765, "step": 76897 }, { "epoch": 2.56, "grad_norm": 0.6938175559043884, "learning_rate": 3.150954871768994e-05, "loss": 1.7609, "step": 76898 }, { "epoch": 2.56, "grad_norm": 0.7046536803245544, "learning_rate": 3.1504885877960474e-05, "loss": 1.6671, "step": 76899 }, { "epoch": 2.56, "grad_norm": 0.7139639258384705, "learning_rate": 3.1500223364144536e-05, "loss": 1.7034, "step": 76900 }, { "epoch": 2.56, "grad_norm": 0.7104750871658325, "learning_rate": 3.149556117624777e-05, "loss": 1.6775, "step": 76901 }, { "epoch": 2.56, "grad_norm": 0.682981014251709, "learning_rate": 3.149089931427584e-05, "loss": 1.6155, "step": 76902 }, { "epoch": 2.56, "grad_norm": 0.698256254196167, "learning_rate": 3.14862377782344e-05, "loss": 1.6888, "step": 76903 }, { "epoch": 2.56, "grad_norm": 0.7280772924423218, "learning_rate": 3.1481576568129075e-05, "loss": 1.67, "step": 76904 }, { "epoch": 2.56, "grad_norm": 0.7395463585853577, "learning_rate": 3.1476915683965584e-05, "loss": 1.6782, "step": 76905 }, { "epoch": 2.56, "grad_norm": 0.678250789642334, "learning_rate": 3.14722551257495e-05, "loss": 1.6151, "step": 76906 }, { "epoch": 2.56, "grad_norm": 0.6888151168823242, "learning_rate": 3.146759489348658e-05, "loss": 1.6689, "step": 76907 }, { "epoch": 2.56, "grad_norm": 0.7064163684844971, "learning_rate": 3.146293498718239e-05, "loss": 1.6941, "step": 76908 }, { "epoch": 2.56, "grad_norm": 0.7085065245628357, "learning_rate": 3.145827540684268e-05, "loss": 1.7091, "step": 76909 }, { "epoch": 2.56, "grad_norm": 0.7121105194091797, "learning_rate": 3.145361615247306e-05, "loss": 1.6259, "step": 76910 }, { "epoch": 2.56, "grad_norm": 0.7052340507507324, "learning_rate": 3.1448957224079116e-05, "loss": 1.7134, "step": 76911 }, { "epoch": 2.56, "grad_norm": 0.7218639254570007, "learning_rate": 3.144429862166662e-05, "loss": 1.7434, "step": 76912 }, { "epoch": 2.56, "grad_norm": 0.6906221508979797, "learning_rate": 3.143964034524119e-05, "loss": 1.6608, "step": 76913 }, { "epoch": 2.56, "grad_norm": 0.7270923256874084, "learning_rate": 3.143498239480843e-05, "loss": 1.6917, "step": 76914 }, { "epoch": 2.56, "grad_norm": 0.6904137134552002, "learning_rate": 3.1430324770373994e-05, "loss": 1.6887, "step": 76915 }, { "epoch": 2.56, "grad_norm": 0.6860029697418213, "learning_rate": 3.142566747194365e-05, "loss": 1.6863, "step": 76916 }, { "epoch": 2.56, "grad_norm": 0.723970353603363, "learning_rate": 3.142101049952296e-05, "loss": 1.6877, "step": 76917 }, { "epoch": 2.56, "grad_norm": 0.7030214667320251, "learning_rate": 3.141635385311755e-05, "loss": 1.6719, "step": 76918 }, { "epoch": 2.56, "grad_norm": 0.6806685328483582, "learning_rate": 3.141169753273315e-05, "loss": 1.7421, "step": 76919 }, { "epoch": 2.56, "grad_norm": 0.7160877585411072, "learning_rate": 3.1407041538375386e-05, "loss": 1.6976, "step": 76920 }, { "epoch": 2.56, "grad_norm": 0.6735545992851257, "learning_rate": 3.140238587004983e-05, "loss": 1.6676, "step": 76921 }, { "epoch": 2.56, "grad_norm": 0.7475532293319702, "learning_rate": 3.13977305277622e-05, "loss": 1.7116, "step": 76922 }, { "epoch": 2.56, "grad_norm": 0.6750868558883667, "learning_rate": 3.13930755115183e-05, "loss": 1.699, "step": 76923 }, { "epoch": 2.56, "grad_norm": 0.7222195267677307, "learning_rate": 3.138842082132348e-05, "loss": 1.7021, "step": 76924 }, { "epoch": 2.56, "grad_norm": 0.6958310604095459, "learning_rate": 3.138376645718354e-05, "loss": 1.7057, "step": 76925 }, { "epoch": 2.56, "grad_norm": 0.7073346376419067, "learning_rate": 3.137911241910419e-05, "loss": 1.7494, "step": 76926 }, { "epoch": 2.56, "grad_norm": 0.7154093980789185, "learning_rate": 3.137445870709101e-05, "loss": 1.7314, "step": 76927 }, { "epoch": 2.56, "grad_norm": 0.704926609992981, "learning_rate": 3.13698053211496e-05, "loss": 1.6803, "step": 76928 }, { "epoch": 2.56, "grad_norm": 0.6929855346679688, "learning_rate": 3.136515226128569e-05, "loss": 1.6523, "step": 76929 }, { "epoch": 2.56, "grad_norm": 0.7241547703742981, "learning_rate": 3.1360499527505004e-05, "loss": 1.7803, "step": 76930 }, { "epoch": 2.56, "grad_norm": 0.7060564756393433, "learning_rate": 3.135584711981297e-05, "loss": 1.7301, "step": 76931 }, { "epoch": 2.56, "grad_norm": 0.691450834274292, "learning_rate": 3.135119503821536e-05, "loss": 1.7381, "step": 76932 }, { "epoch": 2.56, "grad_norm": 0.6852134466171265, "learning_rate": 3.134654328271786e-05, "loss": 1.6657, "step": 76933 }, { "epoch": 2.56, "grad_norm": 0.6974506378173828, "learning_rate": 3.1341891853326105e-05, "loss": 1.6669, "step": 76934 }, { "epoch": 2.56, "grad_norm": 0.7033374309539795, "learning_rate": 3.133724075004562e-05, "loss": 1.7332, "step": 76935 }, { "epoch": 2.56, "grad_norm": 0.7053874135017395, "learning_rate": 3.133258997288214e-05, "loss": 1.5946, "step": 76936 }, { "epoch": 2.56, "grad_norm": 0.7452484965324402, "learning_rate": 3.132793952184145e-05, "loss": 1.7272, "step": 76937 }, { "epoch": 2.56, "grad_norm": 0.7015738487243652, "learning_rate": 3.132328939692892e-05, "loss": 1.7351, "step": 76938 }, { "epoch": 2.56, "grad_norm": 0.6849596500396729, "learning_rate": 3.131863959815034e-05, "loss": 1.682, "step": 76939 }, { "epoch": 2.56, "grad_norm": 0.7271080017089844, "learning_rate": 3.131399012551138e-05, "loss": 1.5851, "step": 76940 }, { "epoch": 2.56, "grad_norm": 0.693941056728363, "learning_rate": 3.130934097901766e-05, "loss": 1.7623, "step": 76941 }, { "epoch": 2.56, "grad_norm": 0.717073917388916, "learning_rate": 3.1304692158674746e-05, "loss": 1.7403, "step": 76942 }, { "epoch": 2.56, "grad_norm": 0.6843459606170654, "learning_rate": 3.1300043664488375e-05, "loss": 1.6197, "step": 76943 }, { "epoch": 2.56, "grad_norm": 0.6980830430984497, "learning_rate": 3.129539549646422e-05, "loss": 1.6889, "step": 76944 }, { "epoch": 2.56, "grad_norm": 0.6969780921936035, "learning_rate": 3.129074765460781e-05, "loss": 1.5877, "step": 76945 }, { "epoch": 2.56, "grad_norm": 0.7221534252166748, "learning_rate": 3.1286100138924806e-05, "loss": 1.7699, "step": 76946 }, { "epoch": 2.56, "grad_norm": 0.724939227104187, "learning_rate": 3.1281452949420925e-05, "loss": 1.7079, "step": 76947 }, { "epoch": 2.56, "grad_norm": 0.71335369348526, "learning_rate": 3.127680608610179e-05, "loss": 1.6948, "step": 76948 }, { "epoch": 2.56, "grad_norm": 0.7014667987823486, "learning_rate": 3.1272159548972966e-05, "loss": 1.7053, "step": 76949 }, { "epoch": 2.56, "grad_norm": 0.6826551556587219, "learning_rate": 3.1267513338040204e-05, "loss": 1.7848, "step": 76950 }, { "epoch": 2.56, "grad_norm": 0.7008927464485168, "learning_rate": 3.1262867453309046e-05, "loss": 1.696, "step": 76951 }, { "epoch": 2.56, "grad_norm": 0.6936180591583252, "learning_rate": 3.125822189478515e-05, "loss": 1.6995, "step": 76952 }, { "epoch": 2.56, "grad_norm": 0.6885513663291931, "learning_rate": 3.125357666247418e-05, "loss": 1.6364, "step": 76953 }, { "epoch": 2.56, "grad_norm": 0.6882854700088501, "learning_rate": 3.1248931756381824e-05, "loss": 1.66, "step": 76954 }, { "epoch": 2.56, "grad_norm": 0.6981766819953918, "learning_rate": 3.124428717651365e-05, "loss": 1.724, "step": 76955 }, { "epoch": 2.56, "grad_norm": 0.7006532549858093, "learning_rate": 3.1239642922875254e-05, "loss": 1.7235, "step": 76956 }, { "epoch": 2.56, "grad_norm": 0.6972722411155701, "learning_rate": 3.123499899547239e-05, "loss": 1.5977, "step": 76957 }, { "epoch": 2.56, "grad_norm": 0.7345336675643921, "learning_rate": 3.123035539431064e-05, "loss": 1.6358, "step": 76958 }, { "epoch": 2.56, "grad_norm": 0.6771917939186096, "learning_rate": 3.1225712119395577e-05, "loss": 1.6459, "step": 76959 }, { "epoch": 2.56, "grad_norm": 0.6877230405807495, "learning_rate": 3.122106917073297e-05, "loss": 1.6575, "step": 76960 }, { "epoch": 2.56, "grad_norm": 0.7252100706100464, "learning_rate": 3.121642654832832e-05, "loss": 1.6366, "step": 76961 }, { "epoch": 2.56, "grad_norm": 0.6930394768714905, "learning_rate": 3.121178425218736e-05, "loss": 1.6368, "step": 76962 }, { "epoch": 2.56, "grad_norm": 0.6983530521392822, "learning_rate": 3.1207142282315664e-05, "loss": 1.7425, "step": 76963 }, { "epoch": 2.56, "grad_norm": 0.7044427990913391, "learning_rate": 3.120250063871892e-05, "loss": 1.682, "step": 76964 }, { "epoch": 2.56, "grad_norm": 0.7644147276878357, "learning_rate": 3.119785932140277e-05, "loss": 1.6032, "step": 76965 }, { "epoch": 2.56, "grad_norm": 0.7096846699714661, "learning_rate": 3.119321833037273e-05, "loss": 1.69, "step": 76966 }, { "epoch": 2.56, "grad_norm": 0.7074421048164368, "learning_rate": 3.118857766563457e-05, "loss": 1.655, "step": 76967 }, { "epoch": 2.56, "grad_norm": 0.688593864440918, "learning_rate": 3.118393732719381e-05, "loss": 1.7368, "step": 76968 }, { "epoch": 2.56, "grad_norm": 0.7230681777000427, "learning_rate": 3.117929731505623e-05, "loss": 1.6586, "step": 76969 }, { "epoch": 2.56, "grad_norm": 0.7096266150474548, "learning_rate": 3.117465762922736e-05, "loss": 1.6349, "step": 76970 }, { "epoch": 2.56, "grad_norm": 0.6845985054969788, "learning_rate": 3.117001826971277e-05, "loss": 1.6372, "step": 76971 }, { "epoch": 2.56, "grad_norm": 0.7260233759880066, "learning_rate": 3.116537923651825e-05, "loss": 1.7279, "step": 76972 }, { "epoch": 2.56, "grad_norm": 0.6824948787689209, "learning_rate": 3.1160740529649266e-05, "loss": 1.6447, "step": 76973 }, { "epoch": 2.56, "grad_norm": 0.6919456720352173, "learning_rate": 3.115610214911161e-05, "loss": 1.7014, "step": 76974 }, { "epoch": 2.56, "grad_norm": 0.7005528211593628, "learning_rate": 3.115146409491077e-05, "loss": 1.5518, "step": 76975 }, { "epoch": 2.56, "grad_norm": 0.7626314759254456, "learning_rate": 3.1146826367052494e-05, "loss": 1.5848, "step": 76976 }, { "epoch": 2.56, "grad_norm": 0.7290058135986328, "learning_rate": 3.114218896554236e-05, "loss": 1.7661, "step": 76977 }, { "epoch": 2.56, "grad_norm": 0.6992891430854797, "learning_rate": 3.1137551890385934e-05, "loss": 1.768, "step": 76978 }, { "epoch": 2.56, "grad_norm": 0.6819582581520081, "learning_rate": 3.113291514158895e-05, "loss": 1.6636, "step": 76979 }, { "epoch": 2.56, "grad_norm": 0.6867040395736694, "learning_rate": 3.1128278719157005e-05, "loss": 1.6718, "step": 76980 }, { "epoch": 2.56, "grad_norm": 0.7069045901298523, "learning_rate": 3.112364262309566e-05, "loss": 1.6875, "step": 76981 }, { "epoch": 2.56, "grad_norm": 0.6868495345115662, "learning_rate": 3.11190068534106e-05, "loss": 1.6427, "step": 76982 }, { "epoch": 2.56, "grad_norm": 0.6989524364471436, "learning_rate": 3.11143714101075e-05, "loss": 1.6726, "step": 76983 }, { "epoch": 2.56, "grad_norm": 0.6987593173980713, "learning_rate": 3.110973629319191e-05, "loss": 1.668, "step": 76984 }, { "epoch": 2.56, "grad_norm": 0.7090263962745667, "learning_rate": 3.110510150266944e-05, "loss": 1.7603, "step": 76985 }, { "epoch": 2.56, "grad_norm": 0.7133544087409973, "learning_rate": 3.110046703854584e-05, "loss": 1.6576, "step": 76986 }, { "epoch": 2.56, "grad_norm": 0.688701331615448, "learning_rate": 3.109583290082661e-05, "loss": 1.6112, "step": 76987 }, { "epoch": 2.56, "grad_norm": 0.6958053708076477, "learning_rate": 3.1091199089517385e-05, "loss": 1.7224, "step": 76988 }, { "epoch": 2.56, "grad_norm": 0.6708419322967529, "learning_rate": 3.1086565604623847e-05, "loss": 1.707, "step": 76989 }, { "epoch": 2.56, "grad_norm": 0.7030386328697205, "learning_rate": 3.1081932446151605e-05, "loss": 1.663, "step": 76990 }, { "epoch": 2.56, "grad_norm": 0.7108423113822937, "learning_rate": 3.107729961410631e-05, "loss": 1.6647, "step": 76991 }, { "epoch": 2.56, "grad_norm": 0.7019339799880981, "learning_rate": 3.107266710849346e-05, "loss": 1.6176, "step": 76992 }, { "epoch": 2.56, "grad_norm": 0.7021164894104004, "learning_rate": 3.10680349293188e-05, "loss": 1.6909, "step": 76993 }, { "epoch": 2.56, "grad_norm": 0.7038125395774841, "learning_rate": 3.1063403076588e-05, "loss": 1.7203, "step": 76994 }, { "epoch": 2.56, "grad_norm": 0.7118777632713318, "learning_rate": 3.105877155030651e-05, "loss": 1.6739, "step": 76995 }, { "epoch": 2.56, "grad_norm": 0.7414671182632446, "learning_rate": 3.105414035048004e-05, "loss": 1.7025, "step": 76996 }, { "epoch": 2.56, "grad_norm": 0.7032276391983032, "learning_rate": 3.10495094771143e-05, "loss": 1.7182, "step": 76997 }, { "epoch": 2.56, "grad_norm": 0.7126132249832153, "learning_rate": 3.104487893021478e-05, "loss": 1.7038, "step": 76998 }, { "epoch": 2.56, "grad_norm": 0.6931812763214111, "learning_rate": 3.104024870978711e-05, "loss": 1.7867, "step": 76999 }, { "epoch": 2.56, "grad_norm": 0.7286181449890137, "learning_rate": 3.1035618815837016e-05, "loss": 1.7106, "step": 77000 }, { "epoch": 2.56, "grad_norm": 0.7242705821990967, "learning_rate": 3.103098924837006e-05, "loss": 1.6895, "step": 77001 }, { "epoch": 2.56, "grad_norm": 0.6855898499488831, "learning_rate": 3.102636000739178e-05, "loss": 1.7128, "step": 77002 }, { "epoch": 2.56, "grad_norm": 0.7090556025505066, "learning_rate": 3.1021731092907866e-05, "loss": 1.6589, "step": 77003 }, { "epoch": 2.56, "grad_norm": 0.6753048300743103, "learning_rate": 3.1017102504923974e-05, "loss": 1.672, "step": 77004 }, { "epoch": 2.56, "grad_norm": 0.6982868909835815, "learning_rate": 3.1012474243445736e-05, "loss": 1.7362, "step": 77005 }, { "epoch": 2.56, "grad_norm": 0.7014238834381104, "learning_rate": 3.100784630847862e-05, "loss": 1.6662, "step": 77006 }, { "epoch": 2.56, "grad_norm": 0.6897954940795898, "learning_rate": 3.100321870002842e-05, "loss": 1.7335, "step": 77007 }, { "epoch": 2.56, "grad_norm": 0.7095971703529358, "learning_rate": 3.0998591418100694e-05, "loss": 1.6063, "step": 77008 }, { "epoch": 2.56, "grad_norm": 0.702231764793396, "learning_rate": 3.099396446270097e-05, "loss": 1.7127, "step": 77009 }, { "epoch": 2.56, "grad_norm": 0.7048791646957397, "learning_rate": 3.098933783383498e-05, "loss": 1.6684, "step": 77010 }, { "epoch": 2.56, "grad_norm": 0.7099552154541016, "learning_rate": 3.098471153150825e-05, "loss": 1.6304, "step": 77011 }, { "epoch": 2.56, "grad_norm": 0.7153515815734863, "learning_rate": 3.0980085555726517e-05, "loss": 1.6648, "step": 77012 }, { "epoch": 2.56, "grad_norm": 0.6980401873588562, "learning_rate": 3.097545990649524e-05, "loss": 1.6628, "step": 77013 }, { "epoch": 2.56, "grad_norm": 0.7198382019996643, "learning_rate": 3.097083458382017e-05, "loss": 1.7622, "step": 77014 }, { "epoch": 2.56, "grad_norm": 0.6851279139518738, "learning_rate": 3.096620958770688e-05, "loss": 1.736, "step": 77015 }, { "epoch": 2.56, "grad_norm": 0.7189629077911377, "learning_rate": 3.096158491816093e-05, "loss": 1.6358, "step": 77016 }, { "epoch": 2.56, "grad_norm": 0.7158539891242981, "learning_rate": 3.0956960575188015e-05, "loss": 1.6791, "step": 77017 }, { "epoch": 2.56, "grad_norm": 0.7085081934928894, "learning_rate": 3.0952336558793636e-05, "loss": 1.5908, "step": 77018 }, { "epoch": 2.56, "grad_norm": 0.7011229991912842, "learning_rate": 3.094771286898358e-05, "loss": 1.6752, "step": 77019 }, { "epoch": 2.56, "grad_norm": 0.7039126753807068, "learning_rate": 3.094308950576331e-05, "loss": 1.6968, "step": 77020 }, { "epoch": 2.56, "grad_norm": 0.6995799541473389, "learning_rate": 3.0938466469138457e-05, "loss": 1.7183, "step": 77021 }, { "epoch": 2.56, "grad_norm": 0.6908956170082092, "learning_rate": 3.0933843759114684e-05, "loss": 1.6458, "step": 77022 }, { "epoch": 2.56, "grad_norm": 0.6821398138999939, "learning_rate": 3.092922137569755e-05, "loss": 1.6768, "step": 77023 }, { "epoch": 2.56, "grad_norm": 0.742766797542572, "learning_rate": 3.092459931889276e-05, "loss": 1.6498, "step": 77024 }, { "epoch": 2.56, "grad_norm": 0.6968222856521606, "learning_rate": 3.09199775887058e-05, "loss": 1.6365, "step": 77025 }, { "epoch": 2.56, "grad_norm": 0.7126795649528503, "learning_rate": 3.091535618514237e-05, "loss": 1.6881, "step": 77026 }, { "epoch": 2.56, "grad_norm": 0.7464635968208313, "learning_rate": 3.09107351082081e-05, "loss": 1.7167, "step": 77027 }, { "epoch": 2.56, "grad_norm": 0.7287755012512207, "learning_rate": 3.090611435790844e-05, "loss": 1.7733, "step": 77028 }, { "epoch": 2.56, "grad_norm": 0.7100238800048828, "learning_rate": 3.09014939342492e-05, "loss": 1.727, "step": 77029 }, { "epoch": 2.56, "grad_norm": 0.6896377801895142, "learning_rate": 3.089687383723587e-05, "loss": 1.6872, "step": 77030 }, { "epoch": 2.56, "grad_norm": 0.7131986021995544, "learning_rate": 3.089225406687405e-05, "loss": 1.7394, "step": 77031 }, { "epoch": 2.56, "grad_norm": 0.7220844626426697, "learning_rate": 3.088763462316939e-05, "loss": 1.7302, "step": 77032 }, { "epoch": 2.56, "grad_norm": 0.702460765838623, "learning_rate": 3.0883015506127526e-05, "loss": 1.6462, "step": 77033 }, { "epoch": 2.56, "grad_norm": 0.6940010190010071, "learning_rate": 3.087839671575403e-05, "loss": 1.7404, "step": 77034 }, { "epoch": 2.56, "grad_norm": 0.69990074634552, "learning_rate": 3.087377825205445e-05, "loss": 1.6433, "step": 77035 }, { "epoch": 2.56, "grad_norm": 0.7041438817977905, "learning_rate": 3.086916011503452e-05, "loss": 1.6711, "step": 77036 }, { "epoch": 2.56, "grad_norm": 0.6862698197364807, "learning_rate": 3.086454230469974e-05, "loss": 1.7324, "step": 77037 }, { "epoch": 2.56, "grad_norm": 0.6783487200737, "learning_rate": 3.0859924821055726e-05, "loss": 1.7071, "step": 77038 }, { "epoch": 2.56, "grad_norm": 0.7080177664756775, "learning_rate": 3.0855307664108086e-05, "loss": 1.6788, "step": 77039 }, { "epoch": 2.56, "grad_norm": 0.6943018436431885, "learning_rate": 3.085069083386258e-05, "loss": 1.7307, "step": 77040 }, { "epoch": 2.56, "grad_norm": 0.686765730381012, "learning_rate": 3.0846074330324565e-05, "loss": 1.6935, "step": 77041 }, { "epoch": 2.56, "grad_norm": 0.688734233379364, "learning_rate": 3.084145815349974e-05, "loss": 1.6214, "step": 77042 }, { "epoch": 2.56, "grad_norm": 0.6938144564628601, "learning_rate": 3.08368423033938e-05, "loss": 1.6686, "step": 77043 }, { "epoch": 2.56, "grad_norm": 0.687343955039978, "learning_rate": 3.083222678001228e-05, "loss": 1.7082, "step": 77044 }, { "epoch": 2.56, "grad_norm": 0.6777008175849915, "learning_rate": 3.08276115833607e-05, "loss": 1.6591, "step": 77045 }, { "epoch": 2.56, "grad_norm": 0.7359321117401123, "learning_rate": 3.082299671344476e-05, "loss": 1.7723, "step": 77046 }, { "epoch": 2.56, "grad_norm": 0.6786274909973145, "learning_rate": 3.0818382170270116e-05, "loss": 1.6998, "step": 77047 }, { "epoch": 2.56, "grad_norm": 0.6891412138938904, "learning_rate": 3.0813767953842205e-05, "loss": 1.7125, "step": 77048 }, { "epoch": 2.56, "grad_norm": 0.693364679813385, "learning_rate": 3.080915406416672e-05, "loss": 1.7691, "step": 77049 }, { "epoch": 2.56, "grad_norm": 0.6926577091217041, "learning_rate": 3.080454050124932e-05, "loss": 1.6141, "step": 77050 }, { "epoch": 2.56, "grad_norm": 0.7017738223075867, "learning_rate": 3.079992726509554e-05, "loss": 1.69, "step": 77051 }, { "epoch": 2.56, "grad_norm": 0.6999145746231079, "learning_rate": 3.079531435571093e-05, "loss": 1.6361, "step": 77052 }, { "epoch": 2.56, "grad_norm": 0.6929921507835388, "learning_rate": 3.079070177310113e-05, "loss": 1.6909, "step": 77053 }, { "epoch": 2.56, "grad_norm": 0.7080183625221252, "learning_rate": 3.078608951727187e-05, "loss": 1.715, "step": 77054 }, { "epoch": 2.56, "grad_norm": 0.699110209941864, "learning_rate": 3.078147758822851e-05, "loss": 1.6723, "step": 77055 }, { "epoch": 2.56, "grad_norm": 0.7071789503097534, "learning_rate": 3.0776865985976774e-05, "loss": 1.6676, "step": 77056 }, { "epoch": 2.56, "grad_norm": 0.679593563079834, "learning_rate": 3.077225471052232e-05, "loss": 1.6515, "step": 77057 }, { "epoch": 2.56, "grad_norm": 0.6987976431846619, "learning_rate": 3.076764376187067e-05, "loss": 1.6477, "step": 77058 }, { "epoch": 2.56, "grad_norm": 0.7018337845802307, "learning_rate": 3.076303314002738e-05, "loss": 1.6873, "step": 77059 }, { "epoch": 2.56, "grad_norm": 0.7095475196838379, "learning_rate": 3.0758422844998144e-05, "loss": 1.6993, "step": 77060 }, { "epoch": 2.56, "grad_norm": 0.6815390586853027, "learning_rate": 3.075381287678854e-05, "loss": 1.6839, "step": 77061 }, { "epoch": 2.56, "grad_norm": 0.6935163140296936, "learning_rate": 3.074920323540404e-05, "loss": 1.8013, "step": 77062 }, { "epoch": 2.56, "grad_norm": 0.7144675850868225, "learning_rate": 3.074459392085037e-05, "loss": 1.6906, "step": 77063 }, { "epoch": 2.56, "grad_norm": 0.7290595769882202, "learning_rate": 3.0739984933133125e-05, "loss": 1.6834, "step": 77064 }, { "epoch": 2.56, "grad_norm": 0.694998025894165, "learning_rate": 3.073537627225786e-05, "loss": 1.6518, "step": 77065 }, { "epoch": 2.56, "grad_norm": 0.742702841758728, "learning_rate": 3.073076793823015e-05, "loss": 1.6847, "step": 77066 }, { "epoch": 2.56, "grad_norm": 0.6852500438690186, "learning_rate": 3.0726159931055646e-05, "loss": 1.7067, "step": 77067 }, { "epoch": 2.56, "grad_norm": 0.7024976015090942, "learning_rate": 3.072155225073989e-05, "loss": 1.6782, "step": 77068 }, { "epoch": 2.56, "grad_norm": 0.6973663568496704, "learning_rate": 3.071694489728847e-05, "loss": 1.6758, "step": 77069 }, { "epoch": 2.56, "grad_norm": 0.7056861519813538, "learning_rate": 3.071233787070697e-05, "loss": 1.6925, "step": 77070 }, { "epoch": 2.56, "grad_norm": 0.676541268825531, "learning_rate": 3.070773117100111e-05, "loss": 1.6654, "step": 77071 }, { "epoch": 2.56, "grad_norm": 0.7198522686958313, "learning_rate": 3.070312479817636e-05, "loss": 1.6231, "step": 77072 }, { "epoch": 2.56, "grad_norm": 0.8030285239219666, "learning_rate": 3.0698518752238265e-05, "loss": 1.7422, "step": 77073 }, { "epoch": 2.56, "grad_norm": 0.6987998485565186, "learning_rate": 3.069391303319259e-05, "loss": 1.717, "step": 77074 }, { "epoch": 2.56, "grad_norm": 0.7218825817108154, "learning_rate": 3.068930764104478e-05, "loss": 1.7191, "step": 77075 }, { "epoch": 2.56, "grad_norm": 0.7155303955078125, "learning_rate": 3.068470257580045e-05, "loss": 1.7486, "step": 77076 }, { "epoch": 2.56, "grad_norm": 0.6954141855239868, "learning_rate": 3.0680097837465246e-05, "loss": 1.7122, "step": 77077 }, { "epoch": 2.56, "grad_norm": 0.6897488236427307, "learning_rate": 3.067549342604467e-05, "loss": 1.7262, "step": 77078 }, { "epoch": 2.56, "grad_norm": 0.7048978209495544, "learning_rate": 3.067088934154443e-05, "loss": 1.7148, "step": 77079 }, { "epoch": 2.56, "grad_norm": 0.7215034365653992, "learning_rate": 3.0666285583969966e-05, "loss": 1.6874, "step": 77080 }, { "epoch": 2.56, "grad_norm": 0.699913501739502, "learning_rate": 3.066168215332701e-05, "loss": 1.6418, "step": 77081 }, { "epoch": 2.56, "grad_norm": 0.7169835567474365, "learning_rate": 3.065707904962108e-05, "loss": 1.6395, "step": 77082 }, { "epoch": 2.56, "grad_norm": 0.6844971776008606, "learning_rate": 3.065247627285772e-05, "loss": 1.6393, "step": 77083 }, { "epoch": 2.56, "grad_norm": 0.6978586316108704, "learning_rate": 3.0647873823042624e-05, "loss": 1.66, "step": 77084 }, { "epoch": 2.56, "grad_norm": 0.7081753611564636, "learning_rate": 3.064327170018126e-05, "loss": 1.698, "step": 77085 }, { "epoch": 2.56, "grad_norm": 0.6948254704475403, "learning_rate": 3.0638669904279354e-05, "loss": 1.6411, "step": 77086 }, { "epoch": 2.56, "grad_norm": 0.686941921710968, "learning_rate": 3.0634068435342407e-05, "loss": 1.6343, "step": 77087 }, { "epoch": 2.56, "grad_norm": 0.6967980265617371, "learning_rate": 3.0629467293375945e-05, "loss": 1.6804, "step": 77088 }, { "epoch": 2.56, "grad_norm": 0.7111620903015137, "learning_rate": 3.062486647838569e-05, "loss": 1.6695, "step": 77089 }, { "epoch": 2.56, "grad_norm": 0.7071695923805237, "learning_rate": 3.062026599037708e-05, "loss": 1.6595, "step": 77090 }, { "epoch": 2.56, "grad_norm": 0.7306190133094788, "learning_rate": 3.061566582935584e-05, "loss": 1.7099, "step": 77091 }, { "epoch": 2.56, "grad_norm": 0.7362539172172546, "learning_rate": 3.061106599532743e-05, "loss": 1.6974, "step": 77092 }, { "epoch": 2.56, "grad_norm": 0.6892216205596924, "learning_rate": 3.060646648829759e-05, "loss": 1.6572, "step": 77093 }, { "epoch": 2.56, "grad_norm": 0.6823193430900574, "learning_rate": 3.0601867308271765e-05, "loss": 1.7066, "step": 77094 }, { "epoch": 2.56, "grad_norm": 0.7072942852973938, "learning_rate": 3.059726845525553e-05, "loss": 1.6538, "step": 77095 }, { "epoch": 2.56, "grad_norm": 0.6795138120651245, "learning_rate": 3.0592669929254576e-05, "loss": 1.6407, "step": 77096 }, { "epoch": 2.57, "grad_norm": 0.6920665502548218, "learning_rate": 3.05880717302744e-05, "loss": 1.6801, "step": 77097 }, { "epoch": 2.57, "grad_norm": 0.7329950332641602, "learning_rate": 3.058347385832059e-05, "loss": 1.7239, "step": 77098 }, { "epoch": 2.57, "grad_norm": 0.7003999948501587, "learning_rate": 3.057887631339873e-05, "loss": 1.6416, "step": 77099 }, { "epoch": 2.57, "grad_norm": 0.6935651898384094, "learning_rate": 3.057427909551449e-05, "loss": 1.7146, "step": 77100 }, { "epoch": 2.57, "grad_norm": 0.7101874947547913, "learning_rate": 3.056968220467334e-05, "loss": 1.6349, "step": 77101 }, { "epoch": 2.57, "grad_norm": 0.7135102152824402, "learning_rate": 3.0565085640880885e-05, "loss": 1.6112, "step": 77102 }, { "epoch": 2.57, "grad_norm": 0.6925827264785767, "learning_rate": 3.056048940414274e-05, "loss": 1.6355, "step": 77103 }, { "epoch": 2.57, "grad_norm": 0.6819638013839722, "learning_rate": 3.055589349446448e-05, "loss": 1.695, "step": 77104 }, { "epoch": 2.57, "grad_norm": 0.6955868005752563, "learning_rate": 3.055129791185159e-05, "loss": 1.6593, "step": 77105 }, { "epoch": 2.57, "grad_norm": 0.7016794085502625, "learning_rate": 3.054670265630974e-05, "loss": 1.7129, "step": 77106 }, { "epoch": 2.57, "grad_norm": 0.7335764169692993, "learning_rate": 3.054210772784452e-05, "loss": 1.7705, "step": 77107 }, { "epoch": 2.57, "grad_norm": 0.6884848475456238, "learning_rate": 3.0537513126461496e-05, "loss": 1.6699, "step": 77108 }, { "epoch": 2.57, "grad_norm": 0.7079470157623291, "learning_rate": 3.0532918852166166e-05, "loss": 1.6701, "step": 77109 }, { "epoch": 2.57, "grad_norm": 0.7074273824691772, "learning_rate": 3.052832490496418e-05, "loss": 1.6273, "step": 77110 }, { "epoch": 2.57, "grad_norm": 0.6967689394950867, "learning_rate": 3.052373128486122e-05, "loss": 1.6647, "step": 77111 }, { "epoch": 2.57, "grad_norm": 0.7200865745544434, "learning_rate": 3.051913799186263e-05, "loss": 1.6698, "step": 77112 }, { "epoch": 2.57, "grad_norm": 0.7032793760299683, "learning_rate": 3.051454502597408e-05, "loss": 1.6138, "step": 77113 }, { "epoch": 2.57, "grad_norm": 0.7008686661720276, "learning_rate": 3.050995238720123e-05, "loss": 1.7025, "step": 77114 }, { "epoch": 2.57, "grad_norm": 0.7260372638702393, "learning_rate": 3.050536007554961e-05, "loss": 1.7844, "step": 77115 }, { "epoch": 2.57, "grad_norm": 0.7072750926017761, "learning_rate": 3.050076809102472e-05, "loss": 1.6841, "step": 77116 }, { "epoch": 2.57, "grad_norm": 0.7169598340988159, "learning_rate": 3.0496176433632214e-05, "loss": 1.6586, "step": 77117 }, { "epoch": 2.57, "grad_norm": 0.6954030394554138, "learning_rate": 3.0491585103377658e-05, "loss": 1.6427, "step": 77118 }, { "epoch": 2.57, "grad_norm": 0.6860897541046143, "learning_rate": 3.0486994100266548e-05, "loss": 1.6689, "step": 77119 }, { "epoch": 2.57, "grad_norm": 0.7077916860580444, "learning_rate": 3.0482403424304512e-05, "loss": 1.6244, "step": 77120 }, { "epoch": 2.57, "grad_norm": 0.7355422377586365, "learning_rate": 3.0477813075497182e-05, "loss": 1.7462, "step": 77121 }, { "epoch": 2.57, "grad_norm": 0.7084414958953857, "learning_rate": 3.0473223053850084e-05, "loss": 1.698, "step": 77122 }, { "epoch": 2.57, "grad_norm": 0.7167862057685852, "learning_rate": 3.0468633359368743e-05, "loss": 1.6997, "step": 77123 }, { "epoch": 2.57, "grad_norm": 0.6828568577766418, "learning_rate": 3.0464043992058794e-05, "loss": 1.6511, "step": 77124 }, { "epoch": 2.57, "grad_norm": 0.7042586803436279, "learning_rate": 3.04594549519258e-05, "loss": 1.7045, "step": 77125 }, { "epoch": 2.57, "grad_norm": 0.6880344152450562, "learning_rate": 3.0454866238975217e-05, "loss": 1.6671, "step": 77126 }, { "epoch": 2.57, "grad_norm": 0.7128933668136597, "learning_rate": 3.045027785321281e-05, "loss": 1.6843, "step": 77127 }, { "epoch": 2.57, "grad_norm": 0.7005029320716858, "learning_rate": 3.044568979464398e-05, "loss": 1.7331, "step": 77128 }, { "epoch": 2.57, "grad_norm": 0.7102156281471252, "learning_rate": 3.0441102063274413e-05, "loss": 1.6072, "step": 77129 }, { "epoch": 2.57, "grad_norm": 0.6891626715660095, "learning_rate": 3.0436514659109578e-05, "loss": 1.6104, "step": 77130 }, { "epoch": 2.57, "grad_norm": 0.6863685250282288, "learning_rate": 3.043192758215517e-05, "loss": 1.6802, "step": 77131 }, { "epoch": 2.57, "grad_norm": 0.7059362530708313, "learning_rate": 3.0427340832416647e-05, "loss": 1.6517, "step": 77132 }, { "epoch": 2.57, "grad_norm": 0.8849490284919739, "learning_rate": 3.042275440989961e-05, "loss": 1.7845, "step": 77133 }, { "epoch": 2.57, "grad_norm": 0.7022824287414551, "learning_rate": 3.0418168314609648e-05, "loss": 1.6817, "step": 77134 }, { "epoch": 2.57, "grad_norm": 0.7219133973121643, "learning_rate": 3.0413582546552262e-05, "loss": 1.73, "step": 77135 }, { "epoch": 2.57, "grad_norm": 0.7114177942276001, "learning_rate": 3.0408997105733112e-05, "loss": 1.7284, "step": 77136 }, { "epoch": 2.57, "grad_norm": 0.7023565173149109, "learning_rate": 3.0404411992157728e-05, "loss": 1.6843, "step": 77137 }, { "epoch": 2.57, "grad_norm": 0.7265009880065918, "learning_rate": 3.039982720583157e-05, "loss": 1.7292, "step": 77138 }, { "epoch": 2.57, "grad_norm": 0.6993937492370605, "learning_rate": 3.0395242746760406e-05, "loss": 1.6563, "step": 77139 }, { "epoch": 2.57, "grad_norm": 0.6965779662132263, "learning_rate": 3.0390658614949625e-05, "loss": 1.6707, "step": 77140 }, { "epoch": 2.57, "grad_norm": 0.7161381840705872, "learning_rate": 3.0386074810404894e-05, "loss": 1.702, "step": 77141 }, { "epoch": 2.57, "grad_norm": 0.6819043755531311, "learning_rate": 3.03814913331317e-05, "loss": 1.6825, "step": 77142 }, { "epoch": 2.57, "grad_norm": 0.7430104613304138, "learning_rate": 3.037690818313572e-05, "loss": 1.6114, "step": 77143 }, { "epoch": 2.57, "grad_norm": 0.7054510116577148, "learning_rate": 3.0372325360422435e-05, "loss": 1.6878, "step": 77144 }, { "epoch": 2.57, "grad_norm": 0.7070891857147217, "learning_rate": 3.0367742864997346e-05, "loss": 1.7661, "step": 77145 }, { "epoch": 2.57, "grad_norm": 0.6785801649093628, "learning_rate": 3.036316069686615e-05, "loss": 1.635, "step": 77146 }, { "epoch": 2.57, "grad_norm": 0.6977653503417969, "learning_rate": 3.0358578856034377e-05, "loss": 1.7868, "step": 77147 }, { "epoch": 2.57, "grad_norm": 0.6941023468971252, "learning_rate": 3.0353997342507486e-05, "loss": 1.718, "step": 77148 }, { "epoch": 2.57, "grad_norm": 0.7247598767280579, "learning_rate": 3.0349416156291106e-05, "loss": 1.693, "step": 77149 }, { "epoch": 2.57, "grad_norm": 0.6734232902526855, "learning_rate": 3.034483529739087e-05, "loss": 1.6713, "step": 77150 }, { "epoch": 2.57, "grad_norm": 0.7136348485946655, "learning_rate": 3.0340254765812234e-05, "loss": 1.7616, "step": 77151 }, { "epoch": 2.57, "grad_norm": 0.6985203623771667, "learning_rate": 3.0335674561560798e-05, "loss": 1.676, "step": 77152 }, { "epoch": 2.57, "grad_norm": 0.6690126657485962, "learning_rate": 3.033109468464212e-05, "loss": 1.7003, "step": 77153 }, { "epoch": 2.57, "grad_norm": 0.7141478657722473, "learning_rate": 3.03265151350618e-05, "loss": 1.7136, "step": 77154 }, { "epoch": 2.57, "grad_norm": 0.7436507940292358, "learning_rate": 3.03219359128253e-05, "loss": 1.7403, "step": 77155 }, { "epoch": 2.57, "grad_norm": 0.7028245329856873, "learning_rate": 3.0317357017938214e-05, "loss": 1.7643, "step": 77156 }, { "epoch": 2.57, "grad_norm": 0.6988735795021057, "learning_rate": 3.0312778450406238e-05, "loss": 1.7047, "step": 77157 }, { "epoch": 2.57, "grad_norm": 0.7261878848075867, "learning_rate": 3.03082002102347e-05, "loss": 1.6058, "step": 77158 }, { "epoch": 2.57, "grad_norm": 0.6977139115333557, "learning_rate": 3.0303622297429262e-05, "loss": 1.6675, "step": 77159 }, { "epoch": 2.57, "grad_norm": 0.6889440417289734, "learning_rate": 3.029904471199559e-05, "loss": 1.6583, "step": 77160 }, { "epoch": 2.57, "grad_norm": 0.7131806015968323, "learning_rate": 3.0294467453939074e-05, "loss": 1.6717, "step": 77161 }, { "epoch": 2.57, "grad_norm": 0.6923523545265198, "learning_rate": 3.0289890523265315e-05, "loss": 1.749, "step": 77162 }, { "epoch": 2.57, "grad_norm": 0.6904646754264832, "learning_rate": 3.0285313919979904e-05, "loss": 1.6596, "step": 77163 }, { "epoch": 2.57, "grad_norm": 0.6939038634300232, "learning_rate": 3.0280737644088472e-05, "loss": 1.6917, "step": 77164 }, { "epoch": 2.57, "grad_norm": 0.7149088382720947, "learning_rate": 3.027616169559638e-05, "loss": 1.716, "step": 77165 }, { "epoch": 2.57, "grad_norm": 0.6906710267066956, "learning_rate": 3.027158607450926e-05, "loss": 1.6852, "step": 77166 }, { "epoch": 2.57, "grad_norm": 0.675180971622467, "learning_rate": 3.0267010780832766e-05, "loss": 1.6503, "step": 77167 }, { "epoch": 2.57, "grad_norm": 0.6918450593948364, "learning_rate": 3.0262435814572374e-05, "loss": 1.6899, "step": 77168 }, { "epoch": 2.57, "grad_norm": 0.6896029114723206, "learning_rate": 3.02578611757336e-05, "loss": 1.6756, "step": 77169 }, { "epoch": 2.57, "grad_norm": 0.7097721695899963, "learning_rate": 3.0253286864322013e-05, "loss": 1.6793, "step": 77170 }, { "epoch": 2.57, "grad_norm": 0.7020726799964905, "learning_rate": 3.024871288034334e-05, "loss": 1.7024, "step": 77171 }, { "epoch": 2.57, "grad_norm": 0.7122472524642944, "learning_rate": 3.0244139223802843e-05, "loss": 1.7294, "step": 77172 }, { "epoch": 2.57, "grad_norm": 0.7034239768981934, "learning_rate": 3.023956589470622e-05, "loss": 1.6889, "step": 77173 }, { "epoch": 2.57, "grad_norm": 0.7155432105064392, "learning_rate": 3.0234992893059063e-05, "loss": 1.7001, "step": 77174 }, { "epoch": 2.57, "grad_norm": 0.708991289138794, "learning_rate": 3.0230420218866902e-05, "loss": 1.6261, "step": 77175 }, { "epoch": 2.57, "grad_norm": 0.7244932055473328, "learning_rate": 3.0225847872135168e-05, "loss": 1.6826, "step": 77176 }, { "epoch": 2.57, "grad_norm": 0.7147306203842163, "learning_rate": 3.0221275852869587e-05, "loss": 1.7248, "step": 77177 }, { "epoch": 2.57, "grad_norm": 0.6944683194160461, "learning_rate": 3.0216704161075623e-05, "loss": 1.603, "step": 77178 }, { "epoch": 2.57, "grad_norm": 0.7033421397209167, "learning_rate": 3.021213279675877e-05, "loss": 1.6664, "step": 77179 }, { "epoch": 2.57, "grad_norm": 0.7145437598228455, "learning_rate": 3.020756175992466e-05, "loss": 1.7265, "step": 77180 }, { "epoch": 2.57, "grad_norm": 0.7145946025848389, "learning_rate": 3.0202991050578852e-05, "loss": 1.7137, "step": 77181 }, { "epoch": 2.57, "grad_norm": 0.6799406409263611, "learning_rate": 3.0198420668726874e-05, "loss": 1.6188, "step": 77182 }, { "epoch": 2.57, "grad_norm": 0.7027826905250549, "learning_rate": 3.0193850614374192e-05, "loss": 1.6714, "step": 77183 }, { "epoch": 2.57, "grad_norm": 0.7045481204986572, "learning_rate": 3.018928088752647e-05, "loss": 1.6608, "step": 77184 }, { "epoch": 2.57, "grad_norm": 0.6929779648780823, "learning_rate": 3.0184711488189193e-05, "loss": 1.6599, "step": 77185 }, { "epoch": 2.57, "grad_norm": 0.7243958711624146, "learning_rate": 3.0180142416367903e-05, "loss": 1.7001, "step": 77186 }, { "epoch": 2.57, "grad_norm": 0.8071761131286621, "learning_rate": 3.0175573672068154e-05, "loss": 1.6922, "step": 77187 }, { "epoch": 2.57, "grad_norm": 0.7136275768280029, "learning_rate": 3.0171005255295544e-05, "loss": 1.6895, "step": 77188 }, { "epoch": 2.57, "grad_norm": 0.706009566783905, "learning_rate": 3.016643716605557e-05, "loss": 1.735, "step": 77189 }, { "epoch": 2.57, "grad_norm": 0.7351248264312744, "learning_rate": 3.0161869404353754e-05, "loss": 1.6944, "step": 77190 }, { "epoch": 2.57, "grad_norm": 0.7069589495658875, "learning_rate": 3.01573019701957e-05, "loss": 1.6186, "step": 77191 }, { "epoch": 2.57, "grad_norm": 0.692439615726471, "learning_rate": 3.0152734863586935e-05, "loss": 1.6634, "step": 77192 }, { "epoch": 2.57, "grad_norm": 0.6891708374023438, "learning_rate": 3.0148168084532953e-05, "loss": 1.7131, "step": 77193 }, { "epoch": 2.57, "grad_norm": 0.6779946684837341, "learning_rate": 3.014360163303935e-05, "loss": 1.6541, "step": 77194 }, { "epoch": 2.57, "grad_norm": 0.7284486293792725, "learning_rate": 3.013903550911162e-05, "loss": 1.7164, "step": 77195 }, { "epoch": 2.57, "grad_norm": 0.7152899503707886, "learning_rate": 3.0134469712755392e-05, "loss": 1.7822, "step": 77196 }, { "epoch": 2.57, "grad_norm": 0.6979084610939026, "learning_rate": 3.01299042439761e-05, "loss": 1.7012, "step": 77197 }, { "epoch": 2.57, "grad_norm": 0.6968187689781189, "learning_rate": 3.01253391027794e-05, "loss": 1.6837, "step": 77198 }, { "epoch": 2.57, "grad_norm": 0.7023380994796753, "learning_rate": 3.0120774289170757e-05, "loss": 1.6798, "step": 77199 }, { "epoch": 2.57, "grad_norm": 0.7130924463272095, "learning_rate": 3.01162098031557e-05, "loss": 1.7888, "step": 77200 }, { "epoch": 2.57, "grad_norm": 0.6902686357498169, "learning_rate": 3.0111645644739824e-05, "loss": 1.6502, "step": 77201 }, { "epoch": 2.57, "grad_norm": 0.7212258577346802, "learning_rate": 3.0107081813928623e-05, "loss": 1.8172, "step": 77202 }, { "epoch": 2.57, "grad_norm": 0.7044177651405334, "learning_rate": 3.0102518310727695e-05, "loss": 1.7463, "step": 77203 }, { "epoch": 2.57, "grad_norm": 0.7471047639846802, "learning_rate": 3.0097955135142537e-05, "loss": 1.6482, "step": 77204 }, { "epoch": 2.57, "grad_norm": 0.7107334136962891, "learning_rate": 3.009339228717864e-05, "loss": 1.6857, "step": 77205 }, { "epoch": 2.57, "grad_norm": 0.6912356019020081, "learning_rate": 3.008882976684164e-05, "loss": 1.6456, "step": 77206 }, { "epoch": 2.57, "grad_norm": 0.7133564949035645, "learning_rate": 3.008426757413699e-05, "loss": 1.6622, "step": 77207 }, { "epoch": 2.57, "grad_norm": 0.6978826522827148, "learning_rate": 3.0079705709070325e-05, "loss": 1.6326, "step": 77208 }, { "epoch": 2.57, "grad_norm": 0.6862509846687317, "learning_rate": 3.007514417164707e-05, "loss": 1.647, "step": 77209 }, { "epoch": 2.57, "grad_norm": 0.7316806316375732, "learning_rate": 3.0070582961872892e-05, "loss": 1.6406, "step": 77210 }, { "epoch": 2.57, "grad_norm": 0.7036203742027283, "learning_rate": 3.0066022079753248e-05, "loss": 1.678, "step": 77211 }, { "epoch": 2.57, "grad_norm": 0.6977177262306213, "learning_rate": 3.0061461525293608e-05, "loss": 1.6171, "step": 77212 }, { "epoch": 2.57, "grad_norm": 0.7031450867652893, "learning_rate": 3.0056901298499625e-05, "loss": 1.6824, "step": 77213 }, { "epoch": 2.57, "grad_norm": 0.7070667147636414, "learning_rate": 3.00523413993768e-05, "loss": 1.6775, "step": 77214 }, { "epoch": 2.57, "grad_norm": 0.6870676279067993, "learning_rate": 3.0047781827930628e-05, "loss": 1.6507, "step": 77215 }, { "epoch": 2.57, "grad_norm": 0.7128597497940063, "learning_rate": 3.0043222584166637e-05, "loss": 1.6739, "step": 77216 }, { "epoch": 2.57, "grad_norm": 0.7137553095817566, "learning_rate": 3.0038663668090457e-05, "loss": 1.6866, "step": 77217 }, { "epoch": 2.57, "grad_norm": 0.7123607397079468, "learning_rate": 3.0034105079707584e-05, "loss": 1.7137, "step": 77218 }, { "epoch": 2.57, "grad_norm": 0.7187942266464233, "learning_rate": 3.0029546819023475e-05, "loss": 1.7546, "step": 77219 }, { "epoch": 2.57, "grad_norm": 0.6953997015953064, "learning_rate": 3.00249888860437e-05, "loss": 1.6327, "step": 77220 }, { "epoch": 2.57, "grad_norm": 0.6963251233100891, "learning_rate": 3.0020431280773915e-05, "loss": 1.6998, "step": 77221 }, { "epoch": 2.57, "grad_norm": 0.7024155259132385, "learning_rate": 3.0015874003219454e-05, "loss": 1.7225, "step": 77222 }, { "epoch": 2.57, "grad_norm": 0.6879813075065613, "learning_rate": 3.001131705338594e-05, "loss": 1.6359, "step": 77223 }, { "epoch": 2.57, "grad_norm": 0.7045074701309204, "learning_rate": 3.0006760431278974e-05, "loss": 1.6953, "step": 77224 }, { "epoch": 2.57, "grad_norm": 0.7136332988739014, "learning_rate": 3.0002204136903983e-05, "loss": 1.701, "step": 77225 }, { "epoch": 2.57, "grad_norm": 0.703764796257019, "learning_rate": 2.9997648170266498e-05, "loss": 1.5804, "step": 77226 }, { "epoch": 2.57, "grad_norm": 0.7057946920394897, "learning_rate": 2.999309253137211e-05, "loss": 1.7568, "step": 77227 }, { "epoch": 2.57, "grad_norm": 0.6957331895828247, "learning_rate": 2.998853722022638e-05, "loss": 1.7263, "step": 77228 }, { "epoch": 2.57, "grad_norm": 0.710690975189209, "learning_rate": 2.9983982236834715e-05, "loss": 1.6922, "step": 77229 }, { "epoch": 2.57, "grad_norm": 0.7094306945800781, "learning_rate": 2.99794275812027e-05, "loss": 1.6936, "step": 77230 }, { "epoch": 2.57, "grad_norm": 0.7041152119636536, "learning_rate": 2.997487325333593e-05, "loss": 1.7345, "step": 77231 }, { "epoch": 2.57, "grad_norm": 0.7075189352035522, "learning_rate": 2.997031925323987e-05, "loss": 1.7051, "step": 77232 }, { "epoch": 2.57, "grad_norm": 0.696354866027832, "learning_rate": 2.9965765580919987e-05, "loss": 1.7179, "step": 77233 }, { "epoch": 2.57, "grad_norm": 0.7099012136459351, "learning_rate": 2.9961212236381972e-05, "loss": 1.6527, "step": 77234 }, { "epoch": 2.57, "grad_norm": 0.6985008716583252, "learning_rate": 2.9956659219631217e-05, "loss": 1.6448, "step": 77235 }, { "epoch": 2.57, "grad_norm": 0.7194121479988098, "learning_rate": 2.995210653067326e-05, "loss": 1.6525, "step": 77236 }, { "epoch": 2.57, "grad_norm": 0.6993499994277954, "learning_rate": 2.9947554169513653e-05, "loss": 1.6945, "step": 77237 }, { "epoch": 2.57, "grad_norm": 0.7191469669342041, "learning_rate": 2.9943002136157968e-05, "loss": 1.6863, "step": 77238 }, { "epoch": 2.57, "grad_norm": 0.6911270022392273, "learning_rate": 2.9938450430611694e-05, "loss": 1.6951, "step": 77239 }, { "epoch": 2.57, "grad_norm": 0.6931235194206238, "learning_rate": 2.9933899052880296e-05, "loss": 1.6616, "step": 77240 }, { "epoch": 2.57, "grad_norm": 0.6864897012710571, "learning_rate": 2.99293480029694e-05, "loss": 1.6856, "step": 77241 }, { "epoch": 2.57, "grad_norm": 0.6987214088439941, "learning_rate": 2.9924797280884504e-05, "loss": 1.6295, "step": 77242 }, { "epoch": 2.57, "grad_norm": 0.699345588684082, "learning_rate": 2.9920246886631037e-05, "loss": 1.662, "step": 77243 }, { "epoch": 2.57, "grad_norm": 0.70276939868927, "learning_rate": 2.991569682021463e-05, "loss": 1.7082, "step": 77244 }, { "epoch": 2.57, "grad_norm": 0.7104408144950867, "learning_rate": 2.991114708164074e-05, "loss": 1.6669, "step": 77245 }, { "epoch": 2.57, "grad_norm": 0.6905558705329895, "learning_rate": 2.9906597670915002e-05, "loss": 1.6628, "step": 77246 }, { "epoch": 2.57, "grad_norm": 0.7109146118164062, "learning_rate": 2.990204858804277e-05, "loss": 1.7403, "step": 77247 }, { "epoch": 2.57, "grad_norm": 0.723993182182312, "learning_rate": 2.9897499833029714e-05, "loss": 1.755, "step": 77248 }, { "epoch": 2.57, "grad_norm": 0.7025117874145508, "learning_rate": 2.9892951405881294e-05, "loss": 1.6122, "step": 77249 }, { "epoch": 2.57, "grad_norm": 0.7076249718666077, "learning_rate": 2.988840330660297e-05, "loss": 1.6181, "step": 77250 }, { "epoch": 2.57, "grad_norm": 0.6972761750221252, "learning_rate": 2.9883855535200408e-05, "loss": 1.6571, "step": 77251 }, { "epoch": 2.57, "grad_norm": 0.6975077390670776, "learning_rate": 2.9879308091678966e-05, "loss": 1.69, "step": 77252 }, { "epoch": 2.57, "grad_norm": 0.7267703413963318, "learning_rate": 2.987476097604431e-05, "loss": 1.6764, "step": 77253 }, { "epoch": 2.57, "grad_norm": 0.7124214172363281, "learning_rate": 2.98702141883019e-05, "loss": 1.6551, "step": 77254 }, { "epoch": 2.57, "grad_norm": 0.7049626111984253, "learning_rate": 2.9865667728457165e-05, "loss": 1.702, "step": 77255 }, { "epoch": 2.57, "grad_norm": 0.6971304416656494, "learning_rate": 2.986112159651577e-05, "loss": 1.6804, "step": 77256 }, { "epoch": 2.57, "grad_norm": 0.7173894643783569, "learning_rate": 2.985657579248314e-05, "loss": 1.6097, "step": 77257 }, { "epoch": 2.57, "grad_norm": 0.706578254699707, "learning_rate": 2.985203031636487e-05, "loss": 1.7268, "step": 77258 }, { "epoch": 2.57, "grad_norm": 0.7016753554344177, "learning_rate": 2.9847485168166365e-05, "loss": 1.6062, "step": 77259 }, { "epoch": 2.57, "grad_norm": 0.6755282878875732, "learning_rate": 2.9842940347893275e-05, "loss": 1.638, "step": 77260 }, { "epoch": 2.57, "grad_norm": 0.7061481475830078, "learning_rate": 2.9838395855551034e-05, "loss": 1.7672, "step": 77261 }, { "epoch": 2.57, "grad_norm": 0.6953060626983643, "learning_rate": 2.9833851691145138e-05, "loss": 1.6977, "step": 77262 }, { "epoch": 2.57, "grad_norm": 0.6962224245071411, "learning_rate": 2.9829307854681182e-05, "loss": 1.7293, "step": 77263 }, { "epoch": 2.57, "grad_norm": 0.6850799322128296, "learning_rate": 2.982476434616463e-05, "loss": 1.7113, "step": 77264 }, { "epoch": 2.57, "grad_norm": 0.6987273097038269, "learning_rate": 2.9820221165600945e-05, "loss": 1.7411, "step": 77265 }, { "epoch": 2.57, "grad_norm": 0.693090558052063, "learning_rate": 2.9815678312995716e-05, "loss": 1.6677, "step": 77266 }, { "epoch": 2.57, "grad_norm": 0.7014735341072083, "learning_rate": 2.9811135788354512e-05, "loss": 1.6806, "step": 77267 }, { "epoch": 2.57, "grad_norm": 0.6962792873382568, "learning_rate": 2.980659359168276e-05, "loss": 1.6739, "step": 77268 }, { "epoch": 2.57, "grad_norm": 0.7038283348083496, "learning_rate": 2.9802051722985954e-05, "loss": 1.6714, "step": 77269 }, { "epoch": 2.57, "grad_norm": 0.7040219306945801, "learning_rate": 2.979751018226969e-05, "loss": 1.6281, "step": 77270 }, { "epoch": 2.57, "grad_norm": 0.7134473323822021, "learning_rate": 2.979296896953943e-05, "loss": 1.6872, "step": 77271 }, { "epoch": 2.57, "grad_norm": 0.7072822451591492, "learning_rate": 2.9788428084800643e-05, "loss": 1.7011, "step": 77272 }, { "epoch": 2.57, "grad_norm": 0.7069017887115479, "learning_rate": 2.978388752805888e-05, "loss": 1.622, "step": 77273 }, { "epoch": 2.57, "grad_norm": 0.956559956073761, "learning_rate": 2.977934729931981e-05, "loss": 1.6654, "step": 77274 }, { "epoch": 2.57, "grad_norm": 0.6884815692901611, "learning_rate": 2.9774807398588664e-05, "loss": 1.6568, "step": 77275 }, { "epoch": 2.57, "grad_norm": 0.6979060769081116, "learning_rate": 2.9770267825871064e-05, "loss": 1.6792, "step": 77276 }, { "epoch": 2.57, "grad_norm": 0.7204750776290894, "learning_rate": 2.976572858117261e-05, "loss": 1.7229, "step": 77277 }, { "epoch": 2.57, "grad_norm": 0.6962868571281433, "learning_rate": 2.9761189664498762e-05, "loss": 1.6204, "step": 77278 }, { "epoch": 2.57, "grad_norm": 0.6953999996185303, "learning_rate": 2.9756651075854953e-05, "loss": 1.6618, "step": 77279 }, { "epoch": 2.57, "grad_norm": 0.7236595153808594, "learning_rate": 2.9752112815246744e-05, "loss": 1.6785, "step": 77280 }, { "epoch": 2.57, "grad_norm": 0.7214540839195251, "learning_rate": 2.9747574882679758e-05, "loss": 1.7131, "step": 77281 }, { "epoch": 2.57, "grad_norm": 0.7114008665084839, "learning_rate": 2.97430372781593e-05, "loss": 1.6954, "step": 77282 }, { "epoch": 2.57, "grad_norm": 0.6858007907867432, "learning_rate": 2.9738500001690956e-05, "loss": 1.6204, "step": 77283 }, { "epoch": 2.57, "grad_norm": 0.7283595204353333, "learning_rate": 2.973396305328033e-05, "loss": 1.6788, "step": 77284 }, { "epoch": 2.57, "grad_norm": 0.6957833766937256, "learning_rate": 2.972942643293281e-05, "loss": 1.6128, "step": 77285 }, { "epoch": 2.57, "grad_norm": 0.706368088722229, "learning_rate": 2.9724890140653934e-05, "loss": 1.7329, "step": 77286 }, { "epoch": 2.57, "grad_norm": 0.7147623896598816, "learning_rate": 2.972035417644919e-05, "loss": 1.7126, "step": 77287 }, { "epoch": 2.57, "grad_norm": 0.6765880584716797, "learning_rate": 2.9715818540324242e-05, "loss": 1.6529, "step": 77288 }, { "epoch": 2.57, "grad_norm": 0.7183372974395752, "learning_rate": 2.9711283232284323e-05, "loss": 1.6948, "step": 77289 }, { "epoch": 2.57, "grad_norm": 0.7413146495819092, "learning_rate": 2.9706748252335122e-05, "loss": 1.6696, "step": 77290 }, { "epoch": 2.57, "grad_norm": 0.6837222576141357, "learning_rate": 2.9702213600482138e-05, "loss": 1.6232, "step": 77291 }, { "epoch": 2.57, "grad_norm": 0.6720133423805237, "learning_rate": 2.9697679276730835e-05, "loss": 1.6225, "step": 77292 }, { "epoch": 2.57, "grad_norm": 0.6890244483947754, "learning_rate": 2.9693145281086672e-05, "loss": 1.6933, "step": 77293 }, { "epoch": 2.57, "grad_norm": 0.69637531042099, "learning_rate": 2.9688611613555247e-05, "loss": 1.7151, "step": 77294 }, { "epoch": 2.57, "grad_norm": 0.689972460269928, "learning_rate": 2.9684078274142053e-05, "loss": 1.6118, "step": 77295 }, { "epoch": 2.57, "grad_norm": 0.7005413770675659, "learning_rate": 2.9679545262852456e-05, "loss": 1.7001, "step": 77296 }, { "epoch": 2.57, "grad_norm": 0.7066504955291748, "learning_rate": 2.9675012579692116e-05, "loss": 1.662, "step": 77297 }, { "epoch": 2.57, "grad_norm": 0.6953316926956177, "learning_rate": 2.9670480224666492e-05, "loss": 1.6764, "step": 77298 }, { "epoch": 2.57, "grad_norm": 0.715387761592865, "learning_rate": 2.9665948197781087e-05, "loss": 1.7004, "step": 77299 }, { "epoch": 2.57, "grad_norm": 0.6840717792510986, "learning_rate": 2.9661416499041323e-05, "loss": 1.6704, "step": 77300 }, { "epoch": 2.57, "grad_norm": 0.6940297484397888, "learning_rate": 2.9656885128452834e-05, "loss": 1.6817, "step": 77301 }, { "epoch": 2.57, "grad_norm": 0.7041666507720947, "learning_rate": 2.965235408602108e-05, "loss": 1.6756, "step": 77302 }, { "epoch": 2.57, "grad_norm": 0.7180190682411194, "learning_rate": 2.964782337175142e-05, "loss": 1.6876, "step": 77303 }, { "epoch": 2.57, "grad_norm": 0.6836071610450745, "learning_rate": 2.964329298564956e-05, "loss": 1.6394, "step": 77304 }, { "epoch": 2.57, "grad_norm": 0.6936431527137756, "learning_rate": 2.963876292772085e-05, "loss": 1.7408, "step": 77305 }, { "epoch": 2.57, "grad_norm": 0.7059078216552734, "learning_rate": 2.9634233197970924e-05, "loss": 1.7285, "step": 77306 }, { "epoch": 2.57, "grad_norm": 0.6933428049087524, "learning_rate": 2.9629703796405114e-05, "loss": 1.6809, "step": 77307 }, { "epoch": 2.57, "grad_norm": 0.7108749151229858, "learning_rate": 2.9625174723029077e-05, "loss": 1.7058, "step": 77308 }, { "epoch": 2.57, "grad_norm": 0.6926701664924622, "learning_rate": 2.9620645977848213e-05, "loss": 1.7078, "step": 77309 }, { "epoch": 2.57, "grad_norm": 0.7398067712783813, "learning_rate": 2.9616117560868015e-05, "loss": 1.6278, "step": 77310 }, { "epoch": 2.57, "grad_norm": 0.6706815361976624, "learning_rate": 2.9611589472094043e-05, "loss": 1.728, "step": 77311 }, { "epoch": 2.57, "grad_norm": 0.7116959095001221, "learning_rate": 2.960706171153173e-05, "loss": 1.6597, "step": 77312 }, { "epoch": 2.57, "grad_norm": 0.7169463038444519, "learning_rate": 2.9602534279186674e-05, "loss": 1.7188, "step": 77313 }, { "epoch": 2.57, "grad_norm": 0.676896870136261, "learning_rate": 2.9598007175064196e-05, "loss": 1.6518, "step": 77314 }, { "epoch": 2.57, "grad_norm": 0.7121574282646179, "learning_rate": 2.959348039917e-05, "loss": 1.6387, "step": 77315 }, { "epoch": 2.57, "grad_norm": 0.7003649473190308, "learning_rate": 2.9588953951509442e-05, "loss": 1.635, "step": 77316 }, { "epoch": 2.57, "grad_norm": 0.7504419088363647, "learning_rate": 2.958442783208802e-05, "loss": 1.6472, "step": 77317 }, { "epoch": 2.57, "grad_norm": 0.6891024708747864, "learning_rate": 2.9579902040911295e-05, "loss": 1.726, "step": 77318 }, { "epoch": 2.57, "grad_norm": 0.6960726380348206, "learning_rate": 2.95753765779847e-05, "loss": 1.7517, "step": 77319 }, { "epoch": 2.57, "grad_norm": 0.6887634992599487, "learning_rate": 2.957085144331376e-05, "loss": 1.6495, "step": 77320 }, { "epoch": 2.57, "grad_norm": 0.6888519525527954, "learning_rate": 2.9566326636904e-05, "loss": 1.6592, "step": 77321 }, { "epoch": 2.57, "grad_norm": 0.6972762942314148, "learning_rate": 2.9561802158760796e-05, "loss": 1.6024, "step": 77322 }, { "epoch": 2.57, "grad_norm": 0.7017069458961487, "learning_rate": 2.9557278008889796e-05, "loss": 1.6341, "step": 77323 }, { "epoch": 2.57, "grad_norm": 0.6970870494842529, "learning_rate": 2.9552754187296335e-05, "loss": 1.7247, "step": 77324 }, { "epoch": 2.57, "grad_norm": 0.708042323589325, "learning_rate": 2.9548230693986042e-05, "loss": 1.7199, "step": 77325 }, { "epoch": 2.57, "grad_norm": 0.7008175849914551, "learning_rate": 2.954370752896428e-05, "loss": 1.7212, "step": 77326 }, { "epoch": 2.57, "grad_norm": 0.6795147657394409, "learning_rate": 2.9539184692236673e-05, "loss": 1.6969, "step": 77327 }, { "epoch": 2.57, "grad_norm": 0.6973013281822205, "learning_rate": 2.9534662183808656e-05, "loss": 1.6495, "step": 77328 }, { "epoch": 2.57, "grad_norm": 0.7057094573974609, "learning_rate": 2.9530140003685654e-05, "loss": 1.6679, "step": 77329 }, { "epoch": 2.57, "grad_norm": 0.7033542394638062, "learning_rate": 2.952561815187323e-05, "loss": 1.6439, "step": 77330 }, { "epoch": 2.57, "grad_norm": 0.6944144368171692, "learning_rate": 2.9521096628376884e-05, "loss": 1.6998, "step": 77331 }, { "epoch": 2.57, "grad_norm": 0.7037290930747986, "learning_rate": 2.9516575433202037e-05, "loss": 1.7709, "step": 77332 }, { "epoch": 2.57, "grad_norm": 0.7213009595870972, "learning_rate": 2.9512054566354193e-05, "loss": 1.7194, "step": 77333 }, { "epoch": 2.57, "grad_norm": 0.6895033717155457, "learning_rate": 2.9507534027838908e-05, "loss": 1.67, "step": 77334 }, { "epoch": 2.57, "grad_norm": 0.6806785464286804, "learning_rate": 2.9503013817661613e-05, "loss": 1.6541, "step": 77335 }, { "epoch": 2.57, "grad_norm": 0.6937304139137268, "learning_rate": 2.949849393582777e-05, "loss": 1.6684, "step": 77336 }, { "epoch": 2.57, "grad_norm": 0.7182100415229797, "learning_rate": 2.9493974382342878e-05, "loss": 1.6688, "step": 77337 }, { "epoch": 2.57, "grad_norm": 0.701805591583252, "learning_rate": 2.948945515721256e-05, "loss": 1.6422, "step": 77338 }, { "epoch": 2.57, "grad_norm": 0.7212891578674316, "learning_rate": 2.9484936260442084e-05, "loss": 1.7078, "step": 77339 }, { "epoch": 2.57, "grad_norm": 0.6976326107978821, "learning_rate": 2.948041769203704e-05, "loss": 1.6942, "step": 77340 }, { "epoch": 2.57, "grad_norm": 0.7161209583282471, "learning_rate": 2.947589945200296e-05, "loss": 1.6029, "step": 77341 }, { "epoch": 2.57, "grad_norm": 0.7013378739356995, "learning_rate": 2.9471381540345273e-05, "loss": 1.6449, "step": 77342 }, { "epoch": 2.57, "grad_norm": 0.6777710318565369, "learning_rate": 2.9466863957069408e-05, "loss": 1.5843, "step": 77343 }, { "epoch": 2.57, "grad_norm": 0.7003358602523804, "learning_rate": 2.9462346702180928e-05, "loss": 1.6918, "step": 77344 }, { "epoch": 2.57, "grad_norm": 0.7187590599060059, "learning_rate": 2.9457829775685394e-05, "loss": 1.7517, "step": 77345 }, { "epoch": 2.57, "grad_norm": 0.6829150915145874, "learning_rate": 2.94533131775881e-05, "loss": 1.6588, "step": 77346 }, { "epoch": 2.57, "grad_norm": 0.6948429346084595, "learning_rate": 2.9448796907894613e-05, "loss": 1.6827, "step": 77347 }, { "epoch": 2.57, "grad_norm": 0.6976635456085205, "learning_rate": 2.9444280966610456e-05, "loss": 1.6401, "step": 77348 }, { "epoch": 2.57, "grad_norm": 0.7029137015342712, "learning_rate": 2.9439765353741098e-05, "loss": 1.7198, "step": 77349 }, { "epoch": 2.57, "grad_norm": 0.7100216150283813, "learning_rate": 2.9435250069291926e-05, "loss": 1.6367, "step": 77350 }, { "epoch": 2.57, "grad_norm": 0.7379812002182007, "learning_rate": 2.9430735113268578e-05, "loss": 1.7024, "step": 77351 }, { "epoch": 2.57, "grad_norm": 0.6954555511474609, "learning_rate": 2.9426220485676445e-05, "loss": 1.7638, "step": 77352 }, { "epoch": 2.57, "grad_norm": 0.7203510999679565, "learning_rate": 2.9421706186520954e-05, "loss": 1.7143, "step": 77353 }, { "epoch": 2.57, "grad_norm": 0.6985553503036499, "learning_rate": 2.941719221580764e-05, "loss": 1.7716, "step": 77354 }, { "epoch": 2.57, "grad_norm": 0.7177302241325378, "learning_rate": 2.9412678573542025e-05, "loss": 1.7003, "step": 77355 }, { "epoch": 2.57, "grad_norm": 0.6930068135261536, "learning_rate": 2.9408165259729577e-05, "loss": 1.6631, "step": 77356 }, { "epoch": 2.57, "grad_norm": 0.7105216979980469, "learning_rate": 2.9403652274375655e-05, "loss": 1.6336, "step": 77357 }, { "epoch": 2.57, "grad_norm": 0.697785496711731, "learning_rate": 2.9399139617485922e-05, "loss": 1.5306, "step": 77358 }, { "epoch": 2.57, "grad_norm": 0.6818051338195801, "learning_rate": 2.9394627289065743e-05, "loss": 1.6605, "step": 77359 }, { "epoch": 2.57, "grad_norm": 0.6867666840553284, "learning_rate": 2.9390115289120574e-05, "loss": 1.7039, "step": 77360 }, { "epoch": 2.57, "grad_norm": 0.6910959482192993, "learning_rate": 2.9385603617655985e-05, "loss": 1.6335, "step": 77361 }, { "epoch": 2.57, "grad_norm": 0.7070926427841187, "learning_rate": 2.9381092274677332e-05, "loss": 1.7408, "step": 77362 }, { "epoch": 2.57, "grad_norm": 0.7038670182228088, "learning_rate": 2.9376581260190213e-05, "loss": 1.6415, "step": 77363 }, { "epoch": 2.57, "grad_norm": 0.6712632775306702, "learning_rate": 2.937207057419999e-05, "loss": 1.6689, "step": 77364 }, { "epoch": 2.57, "grad_norm": 0.7149266004562378, "learning_rate": 2.9367560216712262e-05, "loss": 1.7176, "step": 77365 }, { "epoch": 2.57, "grad_norm": 0.7241553068161011, "learning_rate": 2.9363050187732452e-05, "loss": 1.6354, "step": 77366 }, { "epoch": 2.57, "grad_norm": 0.6883324980735779, "learning_rate": 2.935854048726596e-05, "loss": 1.639, "step": 77367 }, { "epoch": 2.57, "grad_norm": 0.7137426137924194, "learning_rate": 2.9354031115318378e-05, "loss": 1.6802, "step": 77368 }, { "epoch": 2.57, "grad_norm": 0.6893112659454346, "learning_rate": 2.934952207189507e-05, "loss": 1.6741, "step": 77369 }, { "epoch": 2.57, "grad_norm": 0.6882767677307129, "learning_rate": 2.9345013357001602e-05, "loss": 1.6077, "step": 77370 }, { "epoch": 2.57, "grad_norm": 0.6989677548408508, "learning_rate": 2.934050497064343e-05, "loss": 1.6341, "step": 77371 }, { "epoch": 2.57, "grad_norm": 0.7126878499984741, "learning_rate": 2.9335996912825954e-05, "loss": 1.7078, "step": 77372 }, { "epoch": 2.57, "grad_norm": 0.7161697149276733, "learning_rate": 2.9331489183554735e-05, "loss": 1.7581, "step": 77373 }, { "epoch": 2.57, "grad_norm": 0.692533552646637, "learning_rate": 2.932698178283517e-05, "loss": 1.6591, "step": 77374 }, { "epoch": 2.57, "grad_norm": 0.6964067220687866, "learning_rate": 2.9322474710672818e-05, "loss": 1.7283, "step": 77375 }, { "epoch": 2.57, "grad_norm": 0.7063456773757935, "learning_rate": 2.931796796707301e-05, "loss": 1.657, "step": 77376 }, { "epoch": 2.57, "grad_norm": 0.7117959856987, "learning_rate": 2.9313461552041407e-05, "loss": 1.6923, "step": 77377 }, { "epoch": 2.57, "grad_norm": 0.6987256407737732, "learning_rate": 2.930895546558334e-05, "loss": 1.74, "step": 77378 }, { "epoch": 2.57, "grad_norm": 0.7031653523445129, "learning_rate": 2.9304449707704302e-05, "loss": 1.662, "step": 77379 }, { "epoch": 2.57, "grad_norm": 0.6847459077835083, "learning_rate": 2.9299944278409793e-05, "loss": 1.6674, "step": 77380 }, { "epoch": 2.57, "grad_norm": 0.7156473398208618, "learning_rate": 2.9295439177705304e-05, "loss": 1.6649, "step": 77381 }, { "epoch": 2.57, "grad_norm": 0.7002778053283691, "learning_rate": 2.9290934405596168e-05, "loss": 1.7244, "step": 77382 }, { "epoch": 2.57, "grad_norm": 0.7195913791656494, "learning_rate": 2.9286429962087978e-05, "loss": 1.7676, "step": 77383 }, { "epoch": 2.57, "grad_norm": 0.7197676301002502, "learning_rate": 2.9281925847186226e-05, "loss": 1.6627, "step": 77384 }, { "epoch": 2.57, "grad_norm": 0.692944347858429, "learning_rate": 2.9277422060896318e-05, "loss": 1.6841, "step": 77385 }, { "epoch": 2.57, "grad_norm": 0.7592723369598389, "learning_rate": 2.9272918603223703e-05, "loss": 1.6816, "step": 77386 }, { "epoch": 2.57, "grad_norm": 0.6993537545204163, "learning_rate": 2.926841547417389e-05, "loss": 1.5973, "step": 77387 }, { "epoch": 2.57, "grad_norm": 0.7182019948959351, "learning_rate": 2.9263912673752367e-05, "loss": 1.6795, "step": 77388 }, { "epoch": 2.57, "grad_norm": 0.7441239953041077, "learning_rate": 2.9259410201964493e-05, "loss": 1.6641, "step": 77389 }, { "epoch": 2.57, "grad_norm": 0.7092460989952087, "learning_rate": 2.9254908058815774e-05, "loss": 1.6909, "step": 77390 }, { "epoch": 2.57, "grad_norm": 0.7249981760978699, "learning_rate": 2.9250406244311865e-05, "loss": 1.7659, "step": 77391 }, { "epoch": 2.57, "grad_norm": 0.7320942282676697, "learning_rate": 2.9245904758457928e-05, "loss": 1.67, "step": 77392 }, { "epoch": 2.57, "grad_norm": 0.7257880568504333, "learning_rate": 2.924140360125956e-05, "loss": 1.6951, "step": 77393 }, { "epoch": 2.57, "grad_norm": 0.7008004784584045, "learning_rate": 2.9236902772722327e-05, "loss": 1.6232, "step": 77394 }, { "epoch": 2.57, "grad_norm": 0.72642982006073, "learning_rate": 2.9232402272851586e-05, "loss": 1.5903, "step": 77395 }, { "epoch": 2.57, "grad_norm": 0.6830436587333679, "learning_rate": 2.9227902101652733e-05, "loss": 1.7139, "step": 77396 }, { "epoch": 2.58, "grad_norm": 0.6877652406692505, "learning_rate": 2.9223402259131334e-05, "loss": 1.703, "step": 77397 }, { "epoch": 2.58, "grad_norm": 0.7065815329551697, "learning_rate": 2.9218902745292918e-05, "loss": 1.6927, "step": 77398 }, { "epoch": 2.58, "grad_norm": 0.6937199831008911, "learning_rate": 2.9214403560142774e-05, "loss": 1.7058, "step": 77399 }, { "epoch": 2.58, "grad_norm": 0.714438259601593, "learning_rate": 2.920990470368644e-05, "loss": 1.6636, "step": 77400 }, { "epoch": 2.58, "grad_norm": 0.7168456315994263, "learning_rate": 2.9205406175929404e-05, "loss": 1.6771, "step": 77401 }, { "epoch": 2.58, "grad_norm": 0.7440540194511414, "learning_rate": 2.920090797687713e-05, "loss": 1.6877, "step": 77402 }, { "epoch": 2.58, "grad_norm": 0.7154967188835144, "learning_rate": 2.9196410106535018e-05, "loss": 1.6854, "step": 77403 }, { "epoch": 2.58, "grad_norm": 0.7493261694908142, "learning_rate": 2.9191912564908526e-05, "loss": 1.7281, "step": 77404 }, { "epoch": 2.58, "grad_norm": 0.747444748878479, "learning_rate": 2.9187415352003287e-05, "loss": 1.6832, "step": 77405 }, { "epoch": 2.58, "grad_norm": 0.7115708589553833, "learning_rate": 2.918291846782449e-05, "loss": 1.6722, "step": 77406 }, { "epoch": 2.58, "grad_norm": 0.7108228206634521, "learning_rate": 2.9178421912377737e-05, "loss": 1.6714, "step": 77407 }, { "epoch": 2.58, "grad_norm": 0.7082741856575012, "learning_rate": 2.9173925685668553e-05, "loss": 1.6786, "step": 77408 }, { "epoch": 2.58, "grad_norm": 0.6820071339607239, "learning_rate": 2.9169429787702302e-05, "loss": 1.6877, "step": 77409 }, { "epoch": 2.58, "grad_norm": 0.7021842002868652, "learning_rate": 2.9164934218484382e-05, "loss": 1.6641, "step": 77410 }, { "epoch": 2.58, "grad_norm": 0.685252845287323, "learning_rate": 2.9160438978020416e-05, "loss": 1.7449, "step": 77411 }, { "epoch": 2.58, "grad_norm": 0.6904793977737427, "learning_rate": 2.915594406631574e-05, "loss": 1.6555, "step": 77412 }, { "epoch": 2.58, "grad_norm": 0.698773980140686, "learning_rate": 2.9151449483375812e-05, "loss": 1.6845, "step": 77413 }, { "epoch": 2.58, "grad_norm": 0.6856576204299927, "learning_rate": 2.9146955229206093e-05, "loss": 1.6922, "step": 77414 }, { "epoch": 2.58, "grad_norm": 0.7017337679862976, "learning_rate": 2.914246130381215e-05, "loss": 1.6096, "step": 77415 }, { "epoch": 2.58, "grad_norm": 0.6736397743225098, "learning_rate": 2.913796770719934e-05, "loss": 1.6263, "step": 77416 }, { "epoch": 2.58, "grad_norm": 0.7376518845558167, "learning_rate": 2.9133474439373063e-05, "loss": 1.7132, "step": 77417 }, { "epoch": 2.58, "grad_norm": 0.6990816593170166, "learning_rate": 2.912898150033888e-05, "loss": 1.6375, "step": 77418 }, { "epoch": 2.58, "grad_norm": 0.6948078870773315, "learning_rate": 2.9124488890102217e-05, "loss": 1.6514, "step": 77419 }, { "epoch": 2.58, "grad_norm": 0.7141356468200684, "learning_rate": 2.9119996608668438e-05, "loss": 1.6522, "step": 77420 }, { "epoch": 2.58, "grad_norm": 0.7062133550643921, "learning_rate": 2.9115504656043142e-05, "loss": 1.7038, "step": 77421 }, { "epoch": 2.58, "grad_norm": 0.7012162804603577, "learning_rate": 2.9111013032231655e-05, "loss": 1.6878, "step": 77422 }, { "epoch": 2.58, "grad_norm": 0.804212212562561, "learning_rate": 2.9106521737239575e-05, "loss": 1.7274, "step": 77423 }, { "epoch": 2.58, "grad_norm": 0.7264370322227478, "learning_rate": 2.910203077107216e-05, "loss": 1.6582, "step": 77424 }, { "epoch": 2.58, "grad_norm": 0.676448404788971, "learning_rate": 2.9097540133735044e-05, "loss": 1.648, "step": 77425 }, { "epoch": 2.58, "grad_norm": 0.7105622887611389, "learning_rate": 2.9093049825233583e-05, "loss": 1.5946, "step": 77426 }, { "epoch": 2.58, "grad_norm": 0.689761757850647, "learning_rate": 2.908855984557318e-05, "loss": 1.6697, "step": 77427 }, { "epoch": 2.58, "grad_norm": 0.6882981061935425, "learning_rate": 2.9084070194759423e-05, "loss": 1.7112, "step": 77428 }, { "epoch": 2.58, "grad_norm": 0.7488203048706055, "learning_rate": 2.9079580872797613e-05, "loss": 1.7486, "step": 77429 }, { "epoch": 2.58, "grad_norm": 0.6987763047218323, "learning_rate": 2.9075091879693346e-05, "loss": 1.6868, "step": 77430 }, { "epoch": 2.58, "grad_norm": 0.6827418804168701, "learning_rate": 2.9070603215451948e-05, "loss": 1.5988, "step": 77431 }, { "epoch": 2.58, "grad_norm": 0.6792310476303101, "learning_rate": 2.9066114880078983e-05, "loss": 1.653, "step": 77432 }, { "epoch": 2.58, "grad_norm": 0.6905086040496826, "learning_rate": 2.9061626873579814e-05, "loss": 1.7345, "step": 77433 }, { "epoch": 2.58, "grad_norm": 0.6881940364837646, "learning_rate": 2.9057139195959832e-05, "loss": 1.6112, "step": 77434 }, { "epoch": 2.58, "grad_norm": 0.6974302530288696, "learning_rate": 2.905265184722467e-05, "loss": 1.7397, "step": 77435 }, { "epoch": 2.58, "grad_norm": 0.6867591738700867, "learning_rate": 2.904816482737956e-05, "loss": 1.5878, "step": 77436 }, { "epoch": 2.58, "grad_norm": 0.7219230532646179, "learning_rate": 2.904367813643016e-05, "loss": 1.7277, "step": 77437 }, { "epoch": 2.58, "grad_norm": 0.7072268128395081, "learning_rate": 2.9039191774381766e-05, "loss": 1.6486, "step": 77438 }, { "epoch": 2.58, "grad_norm": 0.6943143606185913, "learning_rate": 2.903470574123984e-05, "loss": 1.6712, "step": 77439 }, { "epoch": 2.58, "grad_norm": 0.6826750040054321, "learning_rate": 2.903022003700991e-05, "loss": 1.6938, "step": 77440 }, { "epoch": 2.58, "grad_norm": 0.706333577632904, "learning_rate": 2.902573466169731e-05, "loss": 1.6546, "step": 77441 }, { "epoch": 2.58, "grad_norm": 0.705244243144989, "learning_rate": 2.9021249615307562e-05, "loss": 1.7406, "step": 77442 }, { "epoch": 2.58, "grad_norm": 0.7135084271430969, "learning_rate": 2.901676489784607e-05, "loss": 1.6861, "step": 77443 }, { "epoch": 2.58, "grad_norm": 0.7130482792854309, "learning_rate": 2.9012280509318352e-05, "loss": 1.7018, "step": 77444 }, { "epoch": 2.58, "grad_norm": 0.7029213309288025, "learning_rate": 2.9007796449729815e-05, "loss": 1.7096, "step": 77445 }, { "epoch": 2.58, "grad_norm": 0.7102037668228149, "learning_rate": 2.900331271908578e-05, "loss": 1.6724, "step": 77446 }, { "epoch": 2.58, "grad_norm": 0.7266563177108765, "learning_rate": 2.8998829317391882e-05, "loss": 1.5689, "step": 77447 }, { "epoch": 2.58, "grad_norm": 0.6841676235198975, "learning_rate": 2.8994346244653477e-05, "loss": 1.6954, "step": 77448 }, { "epoch": 2.58, "grad_norm": 0.6993544697761536, "learning_rate": 2.8989863500875932e-05, "loss": 1.677, "step": 77449 }, { "epoch": 2.58, "grad_norm": 0.7404068112373352, "learning_rate": 2.8985381086064775e-05, "loss": 1.673, "step": 77450 }, { "epoch": 2.58, "grad_norm": 0.6823872327804565, "learning_rate": 2.89808990002255e-05, "loss": 1.6686, "step": 77451 }, { "epoch": 2.58, "grad_norm": 0.7041551470756531, "learning_rate": 2.897641724336347e-05, "loss": 1.6556, "step": 77452 }, { "epoch": 2.58, "grad_norm": 0.6849467754364014, "learning_rate": 2.8971935815484082e-05, "loss": 1.5096, "step": 77453 }, { "epoch": 2.58, "grad_norm": 0.6772245764732361, "learning_rate": 2.8967454716592798e-05, "loss": 1.6796, "step": 77454 }, { "epoch": 2.58, "grad_norm": 0.7086070775985718, "learning_rate": 2.8962973946695243e-05, "loss": 1.5735, "step": 77455 }, { "epoch": 2.58, "grad_norm": 0.6759321689605713, "learning_rate": 2.8958493505796588e-05, "loss": 1.6222, "step": 77456 }, { "epoch": 2.58, "grad_norm": 0.7228373289108276, "learning_rate": 2.8954013393902352e-05, "loss": 1.7845, "step": 77457 }, { "epoch": 2.58, "grad_norm": 0.6955521106719971, "learning_rate": 2.8949533611018104e-05, "loss": 1.6155, "step": 77458 }, { "epoch": 2.58, "grad_norm": 0.6924079656600952, "learning_rate": 2.8945054157149138e-05, "loss": 1.6596, "step": 77459 }, { "epoch": 2.58, "grad_norm": 0.711661159992218, "learning_rate": 2.8940575032300917e-05, "loss": 1.6542, "step": 77460 }, { "epoch": 2.58, "grad_norm": 0.7291062474250793, "learning_rate": 2.89360962364789e-05, "loss": 1.6995, "step": 77461 }, { "epoch": 2.58, "grad_norm": 0.7232430577278137, "learning_rate": 2.893161776968862e-05, "loss": 1.7283, "step": 77462 }, { "epoch": 2.58, "grad_norm": 0.7363138794898987, "learning_rate": 2.8927139631935303e-05, "loss": 1.7286, "step": 77463 }, { "epoch": 2.58, "grad_norm": 0.7177110910415649, "learning_rate": 2.8922661823224515e-05, "loss": 1.684, "step": 77464 }, { "epoch": 2.58, "grad_norm": 0.7000201940536499, "learning_rate": 2.8918184343561713e-05, "loss": 1.6687, "step": 77465 }, { "epoch": 2.58, "grad_norm": 0.7017613649368286, "learning_rate": 2.8913707192952296e-05, "loss": 1.7092, "step": 77466 }, { "epoch": 2.58, "grad_norm": 0.7013768553733826, "learning_rate": 2.890923037140166e-05, "loss": 1.666, "step": 77467 }, { "epoch": 2.58, "grad_norm": 0.6961731314659119, "learning_rate": 2.8904753878915297e-05, "loss": 1.6553, "step": 77468 }, { "epoch": 2.58, "grad_norm": 0.7324728965759277, "learning_rate": 2.8900277715498643e-05, "loss": 1.6634, "step": 77469 }, { "epoch": 2.58, "grad_norm": 0.6982282996177673, "learning_rate": 2.8895801881157053e-05, "loss": 1.6538, "step": 77470 }, { "epoch": 2.58, "grad_norm": 0.7016689777374268, "learning_rate": 2.8891326375895997e-05, "loss": 1.7485, "step": 77471 }, { "epoch": 2.58, "grad_norm": 0.7163008451461792, "learning_rate": 2.8886851199720996e-05, "loss": 1.7166, "step": 77472 }, { "epoch": 2.58, "grad_norm": 0.6752393841743469, "learning_rate": 2.8882376352637415e-05, "loss": 1.6279, "step": 77473 }, { "epoch": 2.58, "grad_norm": 0.7190394401550293, "learning_rate": 2.8877901834650585e-05, "loss": 1.7119, "step": 77474 }, { "epoch": 2.58, "grad_norm": 0.7082582712173462, "learning_rate": 2.8873427645766133e-05, "loss": 1.7062, "step": 77475 }, { "epoch": 2.58, "grad_norm": 0.7207803130149841, "learning_rate": 2.886895378598939e-05, "loss": 1.706, "step": 77476 }, { "epoch": 2.58, "grad_norm": 0.7085464596748352, "learning_rate": 2.886448025532572e-05, "loss": 1.6866, "step": 77477 }, { "epoch": 2.58, "grad_norm": 0.6833968758583069, "learning_rate": 2.8860007053780677e-05, "loss": 1.6883, "step": 77478 }, { "epoch": 2.58, "grad_norm": 0.7049045562744141, "learning_rate": 2.8855534181359564e-05, "loss": 1.6188, "step": 77479 }, { "epoch": 2.58, "grad_norm": 0.69752037525177, "learning_rate": 2.8851061638067973e-05, "loss": 1.6769, "step": 77480 }, { "epoch": 2.58, "grad_norm": 0.7138500809669495, "learning_rate": 2.8846589423911136e-05, "loss": 1.7033, "step": 77481 }, { "epoch": 2.58, "grad_norm": 0.7092099785804749, "learning_rate": 2.8842117538894683e-05, "loss": 1.6169, "step": 77482 }, { "epoch": 2.58, "grad_norm": 0.7279524207115173, "learning_rate": 2.8837645983023937e-05, "loss": 1.7048, "step": 77483 }, { "epoch": 2.58, "grad_norm": 0.7055577635765076, "learning_rate": 2.8833174756304266e-05, "loss": 1.6542, "step": 77484 }, { "epoch": 2.58, "grad_norm": 0.6911092400550842, "learning_rate": 2.8828703858741266e-05, "loss": 1.638, "step": 77485 }, { "epoch": 2.58, "grad_norm": 0.698405385017395, "learning_rate": 2.882423329034016e-05, "loss": 1.7463, "step": 77486 }, { "epoch": 2.58, "grad_norm": 0.7019519805908203, "learning_rate": 2.881976305110655e-05, "loss": 1.5989, "step": 77487 }, { "epoch": 2.58, "grad_norm": 0.7017530202865601, "learning_rate": 2.881529314104579e-05, "loss": 1.6529, "step": 77488 }, { "epoch": 2.58, "grad_norm": 0.7132039666175842, "learning_rate": 2.8810823560163256e-05, "loss": 1.7775, "step": 77489 }, { "epoch": 2.58, "grad_norm": 0.697376549243927, "learning_rate": 2.8806354308464463e-05, "loss": 1.6307, "step": 77490 }, { "epoch": 2.58, "grad_norm": 0.6963856816291809, "learning_rate": 2.880188538595475e-05, "loss": 1.6765, "step": 77491 }, { "epoch": 2.58, "grad_norm": 0.7144665122032166, "learning_rate": 2.8797416792639673e-05, "loss": 1.7052, "step": 77492 }, { "epoch": 2.58, "grad_norm": 0.6996739506721497, "learning_rate": 2.8792948528524495e-05, "loss": 1.7135, "step": 77493 }, { "epoch": 2.58, "grad_norm": 0.7350240349769592, "learning_rate": 2.878848059361475e-05, "loss": 1.7041, "step": 77494 }, { "epoch": 2.58, "grad_norm": 0.7131191492080688, "learning_rate": 2.8784012987915862e-05, "loss": 1.6704, "step": 77495 }, { "epoch": 2.58, "grad_norm": 0.7000279426574707, "learning_rate": 2.877954571143316e-05, "loss": 1.6142, "step": 77496 }, { "epoch": 2.58, "grad_norm": 0.6912128329277039, "learning_rate": 2.8775078764172178e-05, "loss": 1.6302, "step": 77497 }, { "epoch": 2.58, "grad_norm": 0.6850336194038391, "learning_rate": 2.877061214613827e-05, "loss": 1.7502, "step": 77498 }, { "epoch": 2.58, "grad_norm": 0.7289209365844727, "learning_rate": 2.876614585733684e-05, "loss": 1.7078, "step": 77499 }, { "epoch": 2.58, "grad_norm": 0.6933620572090149, "learning_rate": 2.876167989777335e-05, "loss": 1.595, "step": 77500 }, { "epoch": 2.58, "grad_norm": 0.7113209366798401, "learning_rate": 2.8757214267453253e-05, "loss": 1.8144, "step": 77501 }, { "epoch": 2.58, "grad_norm": 0.7883685827255249, "learning_rate": 2.8752748966381924e-05, "loss": 1.6162, "step": 77502 }, { "epoch": 2.58, "grad_norm": 0.6814260482788086, "learning_rate": 2.8748283994564746e-05, "loss": 1.6295, "step": 77503 }, { "epoch": 2.58, "grad_norm": 0.6980195641517639, "learning_rate": 2.8743819352007258e-05, "loss": 1.6528, "step": 77504 }, { "epoch": 2.58, "grad_norm": 0.6888558864593506, "learning_rate": 2.8739355038714784e-05, "loss": 1.7254, "step": 77505 }, { "epoch": 2.58, "grad_norm": 0.6962555646896362, "learning_rate": 2.873489105469272e-05, "loss": 1.6668, "step": 77506 }, { "epoch": 2.58, "grad_norm": 0.6850516200065613, "learning_rate": 2.873042739994653e-05, "loss": 1.6621, "step": 77507 }, { "epoch": 2.58, "grad_norm": 0.7128236293792725, "learning_rate": 2.8725964074481746e-05, "loss": 1.6973, "step": 77508 }, { "epoch": 2.58, "grad_norm": 0.6773098111152649, "learning_rate": 2.8721501078303556e-05, "loss": 1.6551, "step": 77509 }, { "epoch": 2.58, "grad_norm": 0.755947470664978, "learning_rate": 2.871703841141746e-05, "loss": 1.6926, "step": 77510 }, { "epoch": 2.58, "grad_norm": 0.6990150213241577, "learning_rate": 2.871257607382902e-05, "loss": 1.6912, "step": 77511 }, { "epoch": 2.58, "grad_norm": 0.7213547229766846, "learning_rate": 2.8708114065543496e-05, "loss": 1.6615, "step": 77512 }, { "epoch": 2.58, "grad_norm": 0.7055397629737854, "learning_rate": 2.870365238656632e-05, "loss": 1.7186, "step": 77513 }, { "epoch": 2.58, "grad_norm": 0.7104325890541077, "learning_rate": 2.8699191036902925e-05, "loss": 1.6927, "step": 77514 }, { "epoch": 2.58, "grad_norm": 0.7193785905838013, "learning_rate": 2.869473001655883e-05, "loss": 1.61, "step": 77515 }, { "epoch": 2.58, "grad_norm": 0.7068959474563599, "learning_rate": 2.8690269325539272e-05, "loss": 1.6913, "step": 77516 }, { "epoch": 2.58, "grad_norm": 0.7047387361526489, "learning_rate": 2.8685808963849742e-05, "loss": 1.7154, "step": 77517 }, { "epoch": 2.58, "grad_norm": 0.7050342559814453, "learning_rate": 2.868134893149574e-05, "loss": 1.7319, "step": 77518 }, { "epoch": 2.58, "grad_norm": 0.7182585597038269, "learning_rate": 2.867688922848259e-05, "loss": 1.7544, "step": 77519 }, { "epoch": 2.58, "grad_norm": 0.6894852519035339, "learning_rate": 2.8672429854815658e-05, "loss": 1.6522, "step": 77520 }, { "epoch": 2.58, "grad_norm": 0.7098089456558228, "learning_rate": 2.8667970810500408e-05, "loss": 1.685, "step": 77521 }, { "epoch": 2.58, "grad_norm": 0.6908108592033386, "learning_rate": 2.8663512095542396e-05, "loss": 1.6457, "step": 77522 }, { "epoch": 2.58, "grad_norm": 0.6922523379325867, "learning_rate": 2.865905370994679e-05, "loss": 1.6261, "step": 77523 }, { "epoch": 2.58, "grad_norm": 0.705080509185791, "learning_rate": 2.865459565371908e-05, "loss": 1.7323, "step": 77524 }, { "epoch": 2.58, "grad_norm": 0.6924406886100769, "learning_rate": 2.8650137926864803e-05, "loss": 1.6798, "step": 77525 }, { "epoch": 2.58, "grad_norm": 0.727844774723053, "learning_rate": 2.8645680529389247e-05, "loss": 1.7058, "step": 77526 }, { "epoch": 2.58, "grad_norm": 0.6937047839164734, "learning_rate": 2.864122346129781e-05, "loss": 1.6477, "step": 77527 }, { "epoch": 2.58, "grad_norm": 0.7034451961517334, "learning_rate": 2.8636766722595995e-05, "loss": 1.7429, "step": 77528 }, { "epoch": 2.58, "grad_norm": 0.6954233050346375, "learning_rate": 2.8632310313289187e-05, "loss": 1.6386, "step": 77529 }, { "epoch": 2.58, "grad_norm": 0.7223183512687683, "learning_rate": 2.8627854233382685e-05, "loss": 1.6458, "step": 77530 }, { "epoch": 2.58, "grad_norm": 0.6842144131660461, "learning_rate": 2.8623398482881986e-05, "loss": 1.6005, "step": 77531 }, { "epoch": 2.58, "grad_norm": 0.7149565815925598, "learning_rate": 2.8618943061792587e-05, "loss": 1.6123, "step": 77532 }, { "epoch": 2.58, "grad_norm": 0.6829494833946228, "learning_rate": 2.861448797011978e-05, "loss": 1.6447, "step": 77533 }, { "epoch": 2.58, "grad_norm": 0.71283358335495, "learning_rate": 2.861003320786893e-05, "loss": 1.6323, "step": 77534 }, { "epoch": 2.58, "grad_norm": 0.6995375156402588, "learning_rate": 2.8605578775045568e-05, "loss": 1.6796, "step": 77535 }, { "epoch": 2.58, "grad_norm": 0.690208375453949, "learning_rate": 2.8601124671655052e-05, "loss": 1.6469, "step": 77536 }, { "epoch": 2.58, "grad_norm": 0.7054102420806885, "learning_rate": 2.8596670897702744e-05, "loss": 1.6699, "step": 77537 }, { "epoch": 2.58, "grad_norm": 0.7041913866996765, "learning_rate": 2.859221745319411e-05, "loss": 1.7574, "step": 77538 }, { "epoch": 2.58, "grad_norm": 0.7020172476768494, "learning_rate": 2.858776433813451e-05, "loss": 1.7156, "step": 77539 }, { "epoch": 2.58, "grad_norm": 0.7172629833221436, "learning_rate": 2.8583311552529408e-05, "loss": 1.6078, "step": 77540 }, { "epoch": 2.58, "grad_norm": 0.7144240736961365, "learning_rate": 2.8578859096384133e-05, "loss": 1.6811, "step": 77541 }, { "epoch": 2.58, "grad_norm": 0.6907673478126526, "learning_rate": 2.8574406969704177e-05, "loss": 1.6136, "step": 77542 }, { "epoch": 2.58, "grad_norm": 0.7030348777770996, "learning_rate": 2.8569955172494908e-05, "loss": 1.7159, "step": 77543 }, { "epoch": 2.58, "grad_norm": 0.70417320728302, "learning_rate": 2.856550370476165e-05, "loss": 1.6979, "step": 77544 }, { "epoch": 2.58, "grad_norm": 0.6979475617408752, "learning_rate": 2.8561052566509935e-05, "loss": 1.6908, "step": 77545 }, { "epoch": 2.58, "grad_norm": 0.7075628042221069, "learning_rate": 2.8556601757745056e-05, "loss": 1.698, "step": 77546 }, { "epoch": 2.58, "grad_norm": 0.6982693672180176, "learning_rate": 2.8552151278472547e-05, "loss": 1.678, "step": 77547 }, { "epoch": 2.58, "grad_norm": 0.7152529358863831, "learning_rate": 2.85477011286977e-05, "loss": 1.6879, "step": 77548 }, { "epoch": 2.58, "grad_norm": 0.6934240460395813, "learning_rate": 2.8543251308425907e-05, "loss": 1.6394, "step": 77549 }, { "epoch": 2.58, "grad_norm": 0.6952205300331116, "learning_rate": 2.8538801817662637e-05, "loss": 1.7448, "step": 77550 }, { "epoch": 2.58, "grad_norm": 0.7318191528320312, "learning_rate": 2.8534352656413252e-05, "loss": 1.7034, "step": 77551 }, { "epoch": 2.58, "grad_norm": 0.7114510536193848, "learning_rate": 2.8529903824683174e-05, "loss": 1.7544, "step": 77552 }, { "epoch": 2.58, "grad_norm": 0.7126999497413635, "learning_rate": 2.8525455322477776e-05, "loss": 1.6899, "step": 77553 }, { "epoch": 2.58, "grad_norm": 0.7002931237220764, "learning_rate": 2.852100714980251e-05, "loss": 1.6706, "step": 77554 }, { "epoch": 2.58, "grad_norm": 0.7233760356903076, "learning_rate": 2.8516559306662744e-05, "loss": 1.6343, "step": 77555 }, { "epoch": 2.58, "grad_norm": 0.731715202331543, "learning_rate": 2.851211179306384e-05, "loss": 1.6882, "step": 77556 }, { "epoch": 2.58, "grad_norm": 0.7082810401916504, "learning_rate": 2.850766460901126e-05, "loss": 1.705, "step": 77557 }, { "epoch": 2.58, "grad_norm": 0.7147675156593323, "learning_rate": 2.8503217754510334e-05, "loss": 1.5984, "step": 77558 }, { "epoch": 2.58, "grad_norm": 0.7044583559036255, "learning_rate": 2.8498771229566518e-05, "loss": 1.6284, "step": 77559 }, { "epoch": 2.58, "grad_norm": 0.6946861743927002, "learning_rate": 2.8494325034185183e-05, "loss": 1.635, "step": 77560 }, { "epoch": 2.58, "grad_norm": 0.7057123184204102, "learning_rate": 2.8489879168371755e-05, "loss": 1.6697, "step": 77561 }, { "epoch": 2.58, "grad_norm": 0.7066646814346313, "learning_rate": 2.8485433632131626e-05, "loss": 1.7464, "step": 77562 }, { "epoch": 2.58, "grad_norm": 0.6927000880241394, "learning_rate": 2.84809884254701e-05, "loss": 1.6445, "step": 77563 }, { "epoch": 2.58, "grad_norm": 0.705878496170044, "learning_rate": 2.847654354839266e-05, "loss": 1.7121, "step": 77564 }, { "epoch": 2.58, "grad_norm": 0.7043718099594116, "learning_rate": 2.8472099000904813e-05, "loss": 1.7041, "step": 77565 }, { "epoch": 2.58, "grad_norm": 0.7008850574493408, "learning_rate": 2.8467654783011682e-05, "loss": 1.6888, "step": 77566 }, { "epoch": 2.58, "grad_norm": 0.7168887853622437, "learning_rate": 2.8463210894718835e-05, "loss": 1.6661, "step": 77567 }, { "epoch": 2.58, "grad_norm": 0.6930091381072998, "learning_rate": 2.8458767336031695e-05, "loss": 1.6184, "step": 77568 }, { "epoch": 2.58, "grad_norm": 0.7091137170791626, "learning_rate": 2.8454324106955628e-05, "loss": 1.6646, "step": 77569 }, { "epoch": 2.58, "grad_norm": 0.7113420963287354, "learning_rate": 2.8449881207495894e-05, "loss": 1.6635, "step": 77570 }, { "epoch": 2.58, "grad_norm": 0.718463659286499, "learning_rate": 2.8445438637658025e-05, "loss": 1.6231, "step": 77571 }, { "epoch": 2.58, "grad_norm": 0.6947336792945862, "learning_rate": 2.844099639744748e-05, "loss": 1.6625, "step": 77572 }, { "epoch": 2.58, "grad_norm": 0.7200498580932617, "learning_rate": 2.8436554486869455e-05, "loss": 1.698, "step": 77573 }, { "epoch": 2.58, "grad_norm": 0.7432348132133484, "learning_rate": 2.843211290592945e-05, "loss": 1.693, "step": 77574 }, { "epoch": 2.58, "grad_norm": 0.7024526596069336, "learning_rate": 2.842767165463289e-05, "loss": 1.7401, "step": 77575 }, { "epoch": 2.58, "grad_norm": 0.7070351243019104, "learning_rate": 2.8423230732985102e-05, "loss": 1.5992, "step": 77576 }, { "epoch": 2.58, "grad_norm": 0.7040766477584839, "learning_rate": 2.8418790140991455e-05, "loss": 1.73, "step": 77577 }, { "epoch": 2.58, "grad_norm": 0.7056036591529846, "learning_rate": 2.8414349878657374e-05, "loss": 1.7055, "step": 77578 }, { "epoch": 2.58, "grad_norm": 0.7298559546470642, "learning_rate": 2.8409909945988386e-05, "loss": 1.6779, "step": 77579 }, { "epoch": 2.58, "grad_norm": 0.7118734121322632, "learning_rate": 2.8405470342989623e-05, "loss": 1.638, "step": 77580 }, { "epoch": 2.58, "grad_norm": 0.6885044574737549, "learning_rate": 2.8401031069666613e-05, "loss": 1.6691, "step": 77581 }, { "epoch": 2.58, "grad_norm": 0.6940503120422363, "learning_rate": 2.8396592126024787e-05, "loss": 1.6398, "step": 77582 }, { "epoch": 2.58, "grad_norm": 0.7071942090988159, "learning_rate": 2.8392153512069473e-05, "loss": 1.6775, "step": 77583 }, { "epoch": 2.58, "grad_norm": 0.7099315524101257, "learning_rate": 2.8387715227805995e-05, "loss": 1.7133, "step": 77584 }, { "epoch": 2.58, "grad_norm": 0.6911361813545227, "learning_rate": 2.838327727323989e-05, "loss": 1.6541, "step": 77585 }, { "epoch": 2.58, "grad_norm": 0.7424899339675903, "learning_rate": 2.8378839648376484e-05, "loss": 1.7703, "step": 77586 }, { "epoch": 2.58, "grad_norm": 0.7130637168884277, "learning_rate": 2.8374402353221036e-05, "loss": 1.705, "step": 77587 }, { "epoch": 2.58, "grad_norm": 0.707689106464386, "learning_rate": 2.836996538777908e-05, "loss": 1.5828, "step": 77588 }, { "epoch": 2.58, "grad_norm": 0.6993584036827087, "learning_rate": 2.836552875205601e-05, "loss": 1.6998, "step": 77589 }, { "epoch": 2.58, "grad_norm": 0.7169209122657776, "learning_rate": 2.8361092446057155e-05, "loss": 1.643, "step": 77590 }, { "epoch": 2.58, "grad_norm": 0.6985524296760559, "learning_rate": 2.8356656469787876e-05, "loss": 1.739, "step": 77591 }, { "epoch": 2.58, "grad_norm": 0.7194268703460693, "learning_rate": 2.8352220823253636e-05, "loss": 1.6378, "step": 77592 }, { "epoch": 2.58, "grad_norm": 0.707910418510437, "learning_rate": 2.8347785506459765e-05, "loss": 1.6888, "step": 77593 }, { "epoch": 2.58, "grad_norm": 0.6894654631614685, "learning_rate": 2.8343350519411622e-05, "loss": 1.7427, "step": 77594 }, { "epoch": 2.58, "grad_norm": 0.7197355628013611, "learning_rate": 2.833891586211464e-05, "loss": 1.6954, "step": 77595 }, { "epoch": 2.58, "grad_norm": 0.7302606701850891, "learning_rate": 2.833448153457418e-05, "loss": 1.6923, "step": 77596 }, { "epoch": 2.58, "grad_norm": 0.7099235653877258, "learning_rate": 2.833004753679564e-05, "loss": 1.6843, "step": 77597 }, { "epoch": 2.58, "grad_norm": 0.705011785030365, "learning_rate": 2.832561386878438e-05, "loss": 1.6821, "step": 77598 }, { "epoch": 2.58, "grad_norm": 0.6934317946434021, "learning_rate": 2.832118053054583e-05, "loss": 1.7239, "step": 77599 }, { "epoch": 2.58, "grad_norm": 0.709330141544342, "learning_rate": 2.8316747522085347e-05, "loss": 1.6765, "step": 77600 }, { "epoch": 2.58, "grad_norm": 0.6904622316360474, "learning_rate": 2.8312314843408234e-05, "loss": 1.7505, "step": 77601 }, { "epoch": 2.58, "grad_norm": 0.7054622769355774, "learning_rate": 2.8307882494519984e-05, "loss": 1.7348, "step": 77602 }, { "epoch": 2.58, "grad_norm": 0.7318981885910034, "learning_rate": 2.8303450475425893e-05, "loss": 1.7335, "step": 77603 }, { "epoch": 2.58, "grad_norm": 0.7279465198516846, "learning_rate": 2.8299018786131455e-05, "loss": 1.6538, "step": 77604 }, { "epoch": 2.58, "grad_norm": 0.6832911372184753, "learning_rate": 2.8294587426641933e-05, "loss": 1.6536, "step": 77605 }, { "epoch": 2.58, "grad_norm": 0.6875086426734924, "learning_rate": 2.8290156396962728e-05, "loss": 1.6249, "step": 77606 }, { "epoch": 2.58, "grad_norm": 0.7105425596237183, "learning_rate": 2.8285725697099294e-05, "loss": 1.6411, "step": 77607 }, { "epoch": 2.58, "grad_norm": 0.7005778551101685, "learning_rate": 2.8281295327056862e-05, "loss": 1.6754, "step": 77608 }, { "epoch": 2.58, "grad_norm": 0.6848320960998535, "learning_rate": 2.8276865286841e-05, "loss": 1.7167, "step": 77609 }, { "epoch": 2.58, "grad_norm": 0.7002573013305664, "learning_rate": 2.8272435576456898e-05, "loss": 1.691, "step": 77610 }, { "epoch": 2.58, "grad_norm": 0.7018222808837891, "learning_rate": 2.8268006195910085e-05, "loss": 1.6885, "step": 77611 }, { "epoch": 2.58, "grad_norm": 0.6913809180259705, "learning_rate": 2.8263577145205895e-05, "loss": 1.7582, "step": 77612 }, { "epoch": 2.58, "grad_norm": 0.7005600929260254, "learning_rate": 2.8259148424349622e-05, "loss": 1.7109, "step": 77613 }, { "epoch": 2.58, "grad_norm": 0.6931834816932678, "learning_rate": 2.8254720033346756e-05, "loss": 1.6911, "step": 77614 }, { "epoch": 2.58, "grad_norm": 0.6877180933952332, "learning_rate": 2.82502919722026e-05, "loss": 1.6598, "step": 77615 }, { "epoch": 2.58, "grad_norm": 0.6963606476783752, "learning_rate": 2.824586424092251e-05, "loss": 1.6824, "step": 77616 }, { "epoch": 2.58, "grad_norm": 0.7242583632469177, "learning_rate": 2.824143683951192e-05, "loss": 1.6876, "step": 77617 }, { "epoch": 2.58, "grad_norm": 0.7067293524742126, "learning_rate": 2.8237009767976227e-05, "loss": 1.6633, "step": 77618 }, { "epoch": 2.58, "grad_norm": 0.7087571024894714, "learning_rate": 2.823258302632072e-05, "loss": 1.6479, "step": 77619 }, { "epoch": 2.58, "grad_norm": 0.7208734154701233, "learning_rate": 2.82281566145508e-05, "loss": 1.6301, "step": 77620 }, { "epoch": 2.58, "grad_norm": 0.6990236043930054, "learning_rate": 2.822373053267193e-05, "loss": 1.6681, "step": 77621 }, { "epoch": 2.58, "grad_norm": 0.674456775188446, "learning_rate": 2.8219304780689366e-05, "loss": 1.6899, "step": 77622 }, { "epoch": 2.58, "grad_norm": 0.6897283792495728, "learning_rate": 2.8214879358608477e-05, "loss": 1.6876, "step": 77623 }, { "epoch": 2.58, "grad_norm": 0.7453823089599609, "learning_rate": 2.821045426643469e-05, "loss": 1.707, "step": 77624 }, { "epoch": 2.58, "grad_norm": 0.69810551404953, "learning_rate": 2.8206029504173465e-05, "loss": 1.6051, "step": 77625 }, { "epoch": 2.58, "grad_norm": 0.7189816832542419, "learning_rate": 2.8201605071829968e-05, "loss": 1.6899, "step": 77626 }, { "epoch": 2.58, "grad_norm": 0.7086742520332336, "learning_rate": 2.8197180969409662e-05, "loss": 1.7414, "step": 77627 }, { "epoch": 2.58, "grad_norm": 0.718590497970581, "learning_rate": 2.8192757196918004e-05, "loss": 1.6278, "step": 77628 }, { "epoch": 2.58, "grad_norm": 0.7396784424781799, "learning_rate": 2.8188333754360258e-05, "loss": 1.7609, "step": 77629 }, { "epoch": 2.58, "grad_norm": 0.692303478717804, "learning_rate": 2.818391064174179e-05, "loss": 1.745, "step": 77630 }, { "epoch": 2.58, "grad_norm": 0.6914607882499695, "learning_rate": 2.817948785906803e-05, "loss": 1.634, "step": 77631 }, { "epoch": 2.58, "grad_norm": 0.7102982997894287, "learning_rate": 2.81750654063444e-05, "loss": 1.6348, "step": 77632 }, { "epoch": 2.58, "grad_norm": 0.6960544586181641, "learning_rate": 2.8170643283576065e-05, "loss": 1.6934, "step": 77633 }, { "epoch": 2.58, "grad_norm": 0.7144678235054016, "learning_rate": 2.8166221490768557e-05, "loss": 1.6755, "step": 77634 }, { "epoch": 2.58, "grad_norm": 0.6866259574890137, "learning_rate": 2.816180002792724e-05, "loss": 1.6206, "step": 77635 }, { "epoch": 2.58, "grad_norm": 0.6929545998573303, "learning_rate": 2.8157378895057437e-05, "loss": 1.678, "step": 77636 }, { "epoch": 2.58, "grad_norm": 0.7206682562828064, "learning_rate": 2.815295809216448e-05, "loss": 1.7031, "step": 77637 }, { "epoch": 2.58, "grad_norm": 0.7059155106544495, "learning_rate": 2.8148537619253764e-05, "loss": 1.7207, "step": 77638 }, { "epoch": 2.58, "grad_norm": 0.7196354866027832, "learning_rate": 2.8144117476330784e-05, "loss": 1.6324, "step": 77639 }, { "epoch": 2.58, "grad_norm": 0.727033793926239, "learning_rate": 2.8139697663400673e-05, "loss": 1.6336, "step": 77640 }, { "epoch": 2.58, "grad_norm": 0.6771669387817383, "learning_rate": 2.8135278180468922e-05, "loss": 1.6031, "step": 77641 }, { "epoch": 2.58, "grad_norm": 0.692857027053833, "learning_rate": 2.813085902754093e-05, "loss": 1.6463, "step": 77642 }, { "epoch": 2.58, "grad_norm": 0.7148134708404541, "learning_rate": 2.8126440204622027e-05, "loss": 1.689, "step": 77643 }, { "epoch": 2.58, "grad_norm": 0.6822717785835266, "learning_rate": 2.8122021711717533e-05, "loss": 1.6552, "step": 77644 }, { "epoch": 2.58, "grad_norm": 0.6884315609931946, "learning_rate": 2.8117603548832855e-05, "loss": 1.6731, "step": 77645 }, { "epoch": 2.58, "grad_norm": 0.7086004018783569, "learning_rate": 2.8113185715973385e-05, "loss": 1.677, "step": 77646 }, { "epoch": 2.58, "grad_norm": 0.7169769406318665, "learning_rate": 2.8108768213144383e-05, "loss": 1.7265, "step": 77647 }, { "epoch": 2.58, "grad_norm": 0.7225671410560608, "learning_rate": 2.8104351040351248e-05, "loss": 1.7725, "step": 77648 }, { "epoch": 2.58, "grad_norm": 0.7053550481796265, "learning_rate": 2.809993419759944e-05, "loss": 1.6523, "step": 77649 }, { "epoch": 2.58, "grad_norm": 0.6919373869895935, "learning_rate": 2.809551768489425e-05, "loss": 1.7012, "step": 77650 }, { "epoch": 2.58, "grad_norm": 0.7007810473442078, "learning_rate": 2.809110150224099e-05, "loss": 1.7539, "step": 77651 }, { "epoch": 2.58, "grad_norm": 0.7155176401138306, "learning_rate": 2.8086685649645135e-05, "loss": 1.7161, "step": 77652 }, { "epoch": 2.58, "grad_norm": 0.7150667309761047, "learning_rate": 2.808227012711196e-05, "loss": 1.6421, "step": 77653 }, { "epoch": 2.58, "grad_norm": 0.6907304525375366, "learning_rate": 2.807785493464679e-05, "loss": 1.7486, "step": 77654 }, { "epoch": 2.58, "grad_norm": 0.7197428941726685, "learning_rate": 2.8073440072255093e-05, "loss": 1.6807, "step": 77655 }, { "epoch": 2.58, "grad_norm": 0.73212730884552, "learning_rate": 2.8069025539942125e-05, "loss": 1.621, "step": 77656 }, { "epoch": 2.58, "grad_norm": 0.7179858684539795, "learning_rate": 2.806461133771335e-05, "loss": 1.7407, "step": 77657 }, { "epoch": 2.58, "grad_norm": 0.6939734816551208, "learning_rate": 2.8060197465574e-05, "loss": 1.6707, "step": 77658 }, { "epoch": 2.58, "grad_norm": 0.6990522742271423, "learning_rate": 2.8055783923529563e-05, "loss": 1.6792, "step": 77659 }, { "epoch": 2.58, "grad_norm": 0.7132212519645691, "learning_rate": 2.805137071158534e-05, "loss": 1.6875, "step": 77660 }, { "epoch": 2.58, "grad_norm": 0.7063244581222534, "learning_rate": 2.804695782974663e-05, "loss": 1.7363, "step": 77661 }, { "epoch": 2.58, "grad_norm": 0.7175267338752747, "learning_rate": 2.804254527801889e-05, "loss": 1.699, "step": 77662 }, { "epoch": 2.58, "grad_norm": 0.7189871668815613, "learning_rate": 2.8038133056407385e-05, "loss": 1.7319, "step": 77663 }, { "epoch": 2.58, "grad_norm": 0.7075757384300232, "learning_rate": 2.8033721164917545e-05, "loss": 1.6362, "step": 77664 }, { "epoch": 2.58, "grad_norm": 0.690925121307373, "learning_rate": 2.8029309603554727e-05, "loss": 1.6572, "step": 77665 }, { "epoch": 2.58, "grad_norm": 0.716415524482727, "learning_rate": 2.802489837232417e-05, "loss": 1.7392, "step": 77666 }, { "epoch": 2.58, "grad_norm": 0.7104968428611755, "learning_rate": 2.802048747123139e-05, "loss": 1.7338, "step": 77667 }, { "epoch": 2.58, "grad_norm": 0.689315140247345, "learning_rate": 2.801607690028159e-05, "loss": 1.6217, "step": 77668 }, { "epoch": 2.58, "grad_norm": 0.7026354670524597, "learning_rate": 2.801166665948027e-05, "loss": 1.6878, "step": 77669 }, { "epoch": 2.58, "grad_norm": 0.7072154879570007, "learning_rate": 2.8007256748832652e-05, "loss": 1.7129, "step": 77670 }, { "epoch": 2.58, "grad_norm": 0.6957433819770813, "learning_rate": 2.80028471683442e-05, "loss": 1.6904, "step": 77671 }, { "epoch": 2.58, "grad_norm": 0.7060155272483826, "learning_rate": 2.799843791802021e-05, "loss": 1.6863, "step": 77672 }, { "epoch": 2.58, "grad_norm": 0.6742964386940002, "learning_rate": 2.7994028997866015e-05, "loss": 1.6572, "step": 77673 }, { "epoch": 2.58, "grad_norm": 0.7115417122840881, "learning_rate": 2.7989620407887003e-05, "loss": 1.6961, "step": 77674 }, { "epoch": 2.58, "grad_norm": 0.7126262187957764, "learning_rate": 2.7985212148088475e-05, "loss": 1.6104, "step": 77675 }, { "epoch": 2.58, "grad_norm": 0.7072317600250244, "learning_rate": 2.7980804218475895e-05, "loss": 1.7463, "step": 77676 }, { "epoch": 2.58, "grad_norm": 0.7259202599525452, "learning_rate": 2.7976396619054454e-05, "loss": 1.7631, "step": 77677 }, { "epoch": 2.58, "grad_norm": 0.710837185382843, "learning_rate": 2.7971989349829648e-05, "loss": 1.7268, "step": 77678 }, { "epoch": 2.58, "grad_norm": 0.6980215907096863, "learning_rate": 2.796758241080678e-05, "loss": 1.6472, "step": 77679 }, { "epoch": 2.58, "grad_norm": 0.6991117596626282, "learning_rate": 2.79631758019911e-05, "loss": 1.722, "step": 77680 }, { "epoch": 2.58, "grad_norm": 0.6966104507446289, "learning_rate": 2.795876952338808e-05, "loss": 1.6267, "step": 77681 }, { "epoch": 2.58, "grad_norm": 0.7100226283073425, "learning_rate": 2.7954363575003112e-05, "loss": 1.7471, "step": 77682 }, { "epoch": 2.58, "grad_norm": 0.7103869915008545, "learning_rate": 2.794995795684136e-05, "loss": 1.7303, "step": 77683 }, { "epoch": 2.58, "grad_norm": 0.6961727142333984, "learning_rate": 2.7945552668908255e-05, "loss": 1.6544, "step": 77684 }, { "epoch": 2.58, "grad_norm": 0.7135978937149048, "learning_rate": 2.7941147711209254e-05, "loss": 1.6596, "step": 77685 }, { "epoch": 2.58, "grad_norm": 0.7059498429298401, "learning_rate": 2.793674308374959e-05, "loss": 1.7269, "step": 77686 }, { "epoch": 2.58, "grad_norm": 0.6755597591400146, "learning_rate": 2.7932338786534592e-05, "loss": 1.6363, "step": 77687 }, { "epoch": 2.58, "grad_norm": 0.7197331786155701, "learning_rate": 2.7927934819569653e-05, "loss": 1.7573, "step": 77688 }, { "epoch": 2.58, "grad_norm": 0.7068281769752502, "learning_rate": 2.7923531182860203e-05, "loss": 1.6958, "step": 77689 }, { "epoch": 2.58, "grad_norm": 0.6908503770828247, "learning_rate": 2.7919127876411406e-05, "loss": 1.6577, "step": 77690 }, { "epoch": 2.58, "grad_norm": 0.698864758014679, "learning_rate": 2.791472490022869e-05, "loss": 1.7523, "step": 77691 }, { "epoch": 2.58, "grad_norm": 0.7173783183097839, "learning_rate": 2.791032225431745e-05, "loss": 1.6704, "step": 77692 }, { "epoch": 2.58, "grad_norm": 0.7089358568191528, "learning_rate": 2.790591993868302e-05, "loss": 1.6912, "step": 77693 }, { "epoch": 2.58, "grad_norm": 0.7067906260490417, "learning_rate": 2.7901517953330622e-05, "loss": 1.6822, "step": 77694 }, { "epoch": 2.58, "grad_norm": 0.7164100408554077, "learning_rate": 2.789711629826572e-05, "loss": 1.6597, "step": 77695 }, { "epoch": 2.58, "grad_norm": 0.6993299722671509, "learning_rate": 2.7892714973493712e-05, "loss": 1.7028, "step": 77696 }, { "epoch": 2.58, "grad_norm": 0.7104837894439697, "learning_rate": 2.788831397901976e-05, "loss": 1.6951, "step": 77697 }, { "epoch": 2.59, "grad_norm": 0.7013108134269714, "learning_rate": 2.7883913314849293e-05, "loss": 1.7051, "step": 77698 }, { "epoch": 2.59, "grad_norm": 0.6938184499740601, "learning_rate": 2.7879512980987707e-05, "loss": 1.6943, "step": 77699 }, { "epoch": 2.59, "grad_norm": 0.7187198996543884, "learning_rate": 2.7875112977440327e-05, "loss": 1.6732, "step": 77700 }, { "epoch": 2.59, "grad_norm": 0.6976343393325806, "learning_rate": 2.7870713304212387e-05, "loss": 1.6263, "step": 77701 }, { "epoch": 2.59, "grad_norm": 0.7307407259941101, "learning_rate": 2.786631396130935e-05, "loss": 1.6965, "step": 77702 }, { "epoch": 2.59, "grad_norm": 0.7035038471221924, "learning_rate": 2.7861914948736542e-05, "loss": 1.6786, "step": 77703 }, { "epoch": 2.59, "grad_norm": 0.7082908749580383, "learning_rate": 2.785751626649919e-05, "loss": 1.6368, "step": 77704 }, { "epoch": 2.59, "grad_norm": 0.6826170682907104, "learning_rate": 2.785311791460273e-05, "loss": 1.6464, "step": 77705 }, { "epoch": 2.59, "grad_norm": 0.73028564453125, "learning_rate": 2.784871989305255e-05, "loss": 1.6787, "step": 77706 }, { "epoch": 2.59, "grad_norm": 0.6962583065032959, "learning_rate": 2.7844322201853885e-05, "loss": 1.6384, "step": 77707 }, { "epoch": 2.59, "grad_norm": 0.694030225276947, "learning_rate": 2.7839924841012095e-05, "loss": 1.6472, "step": 77708 }, { "epoch": 2.59, "grad_norm": 0.7200107574462891, "learning_rate": 2.7835527810532575e-05, "loss": 1.679, "step": 77709 }, { "epoch": 2.59, "grad_norm": 0.7021334171295166, "learning_rate": 2.7831131110420624e-05, "loss": 1.5941, "step": 77710 }, { "epoch": 2.59, "grad_norm": 0.6985439658164978, "learning_rate": 2.7826734740681532e-05, "loss": 1.7424, "step": 77711 }, { "epoch": 2.59, "grad_norm": 0.7106818556785583, "learning_rate": 2.7822338701320736e-05, "loss": 1.7145, "step": 77712 }, { "epoch": 2.59, "grad_norm": 0.722511887550354, "learning_rate": 2.7817942992343457e-05, "loss": 1.6641, "step": 77713 }, { "epoch": 2.59, "grad_norm": 0.7407845854759216, "learning_rate": 2.7813547613755126e-05, "loss": 1.6722, "step": 77714 }, { "epoch": 2.59, "grad_norm": 0.6947478652000427, "learning_rate": 2.780915256556101e-05, "loss": 1.6576, "step": 77715 }, { "epoch": 2.59, "grad_norm": 0.6990262866020203, "learning_rate": 2.7804757847766533e-05, "loss": 1.719, "step": 77716 }, { "epoch": 2.59, "grad_norm": 0.7188841700553894, "learning_rate": 2.7800363460376963e-05, "loss": 1.6718, "step": 77717 }, { "epoch": 2.59, "grad_norm": 0.7353852987289429, "learning_rate": 2.779596940339759e-05, "loss": 1.6824, "step": 77718 }, { "epoch": 2.59, "grad_norm": 0.6934653520584106, "learning_rate": 2.7791575676833875e-05, "loss": 1.703, "step": 77719 }, { "epoch": 2.59, "grad_norm": 0.7471563816070557, "learning_rate": 2.778718228069099e-05, "loss": 1.7362, "step": 77720 }, { "epoch": 2.59, "grad_norm": 0.7060171365737915, "learning_rate": 2.7782789214974456e-05, "loss": 1.6529, "step": 77721 }, { "epoch": 2.59, "grad_norm": 0.7198737859725952, "learning_rate": 2.777839647968947e-05, "loss": 1.6802, "step": 77722 }, { "epoch": 2.59, "grad_norm": 0.7063881158828735, "learning_rate": 2.777400407484136e-05, "loss": 1.6816, "step": 77723 }, { "epoch": 2.59, "grad_norm": 0.7168068289756775, "learning_rate": 2.776961200043556e-05, "loss": 1.7291, "step": 77724 }, { "epoch": 2.59, "grad_norm": 0.7099639773368835, "learning_rate": 2.7765220256477295e-05, "loss": 1.6355, "step": 77725 }, { "epoch": 2.59, "grad_norm": 0.6874817609786987, "learning_rate": 2.776082884297196e-05, "loss": 1.6956, "step": 77726 }, { "epoch": 2.59, "grad_norm": 0.7045513391494751, "learning_rate": 2.7756437759924853e-05, "loss": 1.6797, "step": 77727 }, { "epoch": 2.59, "grad_norm": 0.6962059140205383, "learning_rate": 2.775204700734134e-05, "loss": 1.5986, "step": 77728 }, { "epoch": 2.59, "grad_norm": 0.696031928062439, "learning_rate": 2.774765658522674e-05, "loss": 1.6491, "step": 77729 }, { "epoch": 2.59, "grad_norm": 0.697449266910553, "learning_rate": 2.7743266493586325e-05, "loss": 1.679, "step": 77730 }, { "epoch": 2.59, "grad_norm": 0.7325575351715088, "learning_rate": 2.7738876732425487e-05, "loss": 1.7185, "step": 77731 }, { "epoch": 2.59, "grad_norm": 0.699479341506958, "learning_rate": 2.7734487301749587e-05, "loss": 1.667, "step": 77732 }, { "epoch": 2.59, "grad_norm": 0.7337969541549683, "learning_rate": 2.773009820156382e-05, "loss": 1.7698, "step": 77733 }, { "epoch": 2.59, "grad_norm": 0.6868413686752319, "learning_rate": 2.7725709431873588e-05, "loss": 1.6485, "step": 77734 }, { "epoch": 2.59, "grad_norm": 0.7466587424278259, "learning_rate": 2.7721320992684314e-05, "loss": 1.7885, "step": 77735 }, { "epoch": 2.59, "grad_norm": 0.7436564564704895, "learning_rate": 2.77169328840012e-05, "loss": 1.8361, "step": 77736 }, { "epoch": 2.59, "grad_norm": 0.6917731761932373, "learning_rate": 2.7712545105829597e-05, "loss": 1.6674, "step": 77737 }, { "epoch": 2.59, "grad_norm": 0.6857924461364746, "learning_rate": 2.770815765817488e-05, "loss": 1.6821, "step": 77738 }, { "epoch": 2.59, "grad_norm": 0.7035183310508728, "learning_rate": 2.7703770541042304e-05, "loss": 1.652, "step": 77739 }, { "epoch": 2.59, "grad_norm": 0.7170624136924744, "learning_rate": 2.769938375443723e-05, "loss": 1.7203, "step": 77740 }, { "epoch": 2.59, "grad_norm": 0.7150319814682007, "learning_rate": 2.7694997298364964e-05, "loss": 1.7221, "step": 77741 }, { "epoch": 2.59, "grad_norm": 0.6979352831840515, "learning_rate": 2.7690611172830956e-05, "loss": 1.709, "step": 77742 }, { "epoch": 2.59, "grad_norm": 0.7197306156158447, "learning_rate": 2.768622537784031e-05, "loss": 1.7517, "step": 77743 }, { "epoch": 2.59, "grad_norm": 0.6963871717453003, "learning_rate": 2.7681839913398453e-05, "loss": 1.6333, "step": 77744 }, { "epoch": 2.59, "grad_norm": 0.723814070224762, "learning_rate": 2.767745477951078e-05, "loss": 1.7047, "step": 77745 }, { "epoch": 2.59, "grad_norm": 0.709634006023407, "learning_rate": 2.767306997618255e-05, "loss": 1.7039, "step": 77746 }, { "epoch": 2.59, "grad_norm": 0.6940183639526367, "learning_rate": 2.7668685503419064e-05, "loss": 1.6896, "step": 77747 }, { "epoch": 2.59, "grad_norm": 0.7053600549697876, "learning_rate": 2.7664301361225615e-05, "loss": 1.6617, "step": 77748 }, { "epoch": 2.59, "grad_norm": 0.695614218711853, "learning_rate": 2.7659917549607735e-05, "loss": 1.7442, "step": 77749 }, { "epoch": 2.59, "grad_norm": 0.6944184303283691, "learning_rate": 2.765553406857045e-05, "loss": 1.616, "step": 77750 }, { "epoch": 2.59, "grad_norm": 0.6899998188018799, "learning_rate": 2.7651150918119225e-05, "loss": 1.7633, "step": 77751 }, { "epoch": 2.59, "grad_norm": 0.7237101197242737, "learning_rate": 2.764676809825942e-05, "loss": 1.6912, "step": 77752 }, { "epoch": 2.59, "grad_norm": 0.7281380891799927, "learning_rate": 2.764238560899633e-05, "loss": 1.6731, "step": 77753 }, { "epoch": 2.59, "grad_norm": 0.7212107181549072, "learning_rate": 2.763800345033519e-05, "loss": 1.7344, "step": 77754 }, { "epoch": 2.59, "grad_norm": 0.6849239468574524, "learning_rate": 2.7633621622281388e-05, "loss": 1.7244, "step": 77755 }, { "epoch": 2.59, "grad_norm": 0.6977274417877197, "learning_rate": 2.7629240124840323e-05, "loss": 1.6622, "step": 77756 }, { "epoch": 2.59, "grad_norm": 0.7254024744033813, "learning_rate": 2.762485895801716e-05, "loss": 1.7332, "step": 77757 }, { "epoch": 2.59, "grad_norm": 0.6817745566368103, "learning_rate": 2.7620478121817257e-05, "loss": 1.6721, "step": 77758 }, { "epoch": 2.59, "grad_norm": 0.7442829608917236, "learning_rate": 2.7616097616246014e-05, "loss": 1.7606, "step": 77759 }, { "epoch": 2.59, "grad_norm": 0.7283086180686951, "learning_rate": 2.7611717441308724e-05, "loss": 1.7171, "step": 77760 }, { "epoch": 2.59, "grad_norm": 0.7256529927253723, "learning_rate": 2.760733759701058e-05, "loss": 1.7439, "step": 77761 }, { "epoch": 2.59, "grad_norm": 0.7234620451927185, "learning_rate": 2.7602958083357053e-05, "loss": 1.6948, "step": 77762 }, { "epoch": 2.59, "grad_norm": 0.694894015789032, "learning_rate": 2.759857890035343e-05, "loss": 1.7078, "step": 77763 }, { "epoch": 2.59, "grad_norm": 0.6958737969398499, "learning_rate": 2.7594200048004912e-05, "loss": 1.6032, "step": 77764 }, { "epoch": 2.59, "grad_norm": 0.7076555490493774, "learning_rate": 2.7589821526316924e-05, "loss": 1.6967, "step": 77765 }, { "epoch": 2.59, "grad_norm": 0.7124761343002319, "learning_rate": 2.7585443335294766e-05, "loss": 1.7352, "step": 77766 }, { "epoch": 2.59, "grad_norm": 0.7245976328849792, "learning_rate": 2.7581065474943798e-05, "loss": 1.7622, "step": 77767 }, { "epoch": 2.59, "grad_norm": 0.7102621793746948, "learning_rate": 2.7576687945269183e-05, "loss": 1.7102, "step": 77768 }, { "epoch": 2.59, "grad_norm": 0.7071194648742676, "learning_rate": 2.7572310746276384e-05, "loss": 1.6277, "step": 77769 }, { "epoch": 2.59, "grad_norm": 0.701009213924408, "learning_rate": 2.7567933877970695e-05, "loss": 1.6644, "step": 77770 }, { "epoch": 2.59, "grad_norm": 0.7198111414909363, "learning_rate": 2.756355734035731e-05, "loss": 1.7238, "step": 77771 }, { "epoch": 2.59, "grad_norm": 0.6960278153419495, "learning_rate": 2.7559181133441665e-05, "loss": 1.7129, "step": 77772 }, { "epoch": 2.59, "grad_norm": 0.6889787316322327, "learning_rate": 2.7554805257229018e-05, "loss": 1.6464, "step": 77773 }, { "epoch": 2.59, "grad_norm": 0.700318455696106, "learning_rate": 2.7550429711724698e-05, "loss": 1.6332, "step": 77774 }, { "epoch": 2.59, "grad_norm": 0.694035530090332, "learning_rate": 2.7546054496934e-05, "loss": 1.6648, "step": 77775 }, { "epoch": 2.59, "grad_norm": 0.7037084102630615, "learning_rate": 2.754167961286229e-05, "loss": 1.7425, "step": 77776 }, { "epoch": 2.59, "grad_norm": 0.6940174698829651, "learning_rate": 2.7537305059514824e-05, "loss": 1.6945, "step": 77777 }, { "epoch": 2.59, "grad_norm": 0.6839330196380615, "learning_rate": 2.7532930836896904e-05, "loss": 1.625, "step": 77778 }, { "epoch": 2.59, "grad_norm": 0.6966848373413086, "learning_rate": 2.7528556945013892e-05, "loss": 1.6594, "step": 77779 }, { "epoch": 2.59, "grad_norm": 0.704623281955719, "learning_rate": 2.7524183383871012e-05, "loss": 1.6757, "step": 77780 }, { "epoch": 2.59, "grad_norm": 0.7151828408241272, "learning_rate": 2.7519810153473664e-05, "loss": 1.7116, "step": 77781 }, { "epoch": 2.59, "grad_norm": 0.7207643389701843, "learning_rate": 2.7515437253827143e-05, "loss": 1.7498, "step": 77782 }, { "epoch": 2.59, "grad_norm": 0.7036208510398865, "learning_rate": 2.7511064684936645e-05, "loss": 1.6994, "step": 77783 }, { "epoch": 2.59, "grad_norm": 0.7183333039283752, "learning_rate": 2.7506692446807664e-05, "loss": 1.7015, "step": 77784 }, { "epoch": 2.59, "grad_norm": 0.6996694803237915, "learning_rate": 2.7502320539445334e-05, "loss": 1.6562, "step": 77785 }, { "epoch": 2.59, "grad_norm": 0.7262125611305237, "learning_rate": 2.7497948962855075e-05, "loss": 1.7279, "step": 77786 }, { "epoch": 2.59, "grad_norm": 0.7209540605545044, "learning_rate": 2.7493577717042126e-05, "loss": 1.6957, "step": 77787 }, { "epoch": 2.59, "grad_norm": 0.6871042251586914, "learning_rate": 2.7489206802011842e-05, "loss": 1.6356, "step": 77788 }, { "epoch": 2.59, "grad_norm": 0.6986592411994934, "learning_rate": 2.748483621776956e-05, "loss": 1.7705, "step": 77789 }, { "epoch": 2.59, "grad_norm": 0.7228581309318542, "learning_rate": 2.748046596432043e-05, "loss": 1.638, "step": 77790 }, { "epoch": 2.59, "grad_norm": 0.7214594483375549, "learning_rate": 2.7476096041669927e-05, "loss": 1.6705, "step": 77791 }, { "epoch": 2.59, "grad_norm": 0.69746333360672, "learning_rate": 2.7471726449823305e-05, "loss": 1.6987, "step": 77792 }, { "epoch": 2.59, "grad_norm": 0.7171058654785156, "learning_rate": 2.74673571887858e-05, "loss": 1.6934, "step": 77793 }, { "epoch": 2.59, "grad_norm": 0.7178475260734558, "learning_rate": 2.7462988258562735e-05, "loss": 1.7315, "step": 77794 }, { "epoch": 2.59, "grad_norm": 0.7033710479736328, "learning_rate": 2.7458619659159542e-05, "loss": 1.6238, "step": 77795 }, { "epoch": 2.59, "grad_norm": 0.6993338465690613, "learning_rate": 2.7454251390581385e-05, "loss": 1.6612, "step": 77796 }, { "epoch": 2.59, "grad_norm": 0.7181625366210938, "learning_rate": 2.7449883452833588e-05, "loss": 1.6577, "step": 77797 }, { "epoch": 2.59, "grad_norm": 0.7005956768989563, "learning_rate": 2.744551584592145e-05, "loss": 1.6993, "step": 77798 }, { "epoch": 2.59, "grad_norm": 0.7226427793502808, "learning_rate": 2.7441148569850436e-05, "loss": 1.6905, "step": 77799 }, { "epoch": 2.59, "grad_norm": 0.688880980014801, "learning_rate": 2.743678162462557e-05, "loss": 1.7357, "step": 77800 }, { "epoch": 2.59, "grad_norm": 0.6977099180221558, "learning_rate": 2.7432415010252317e-05, "loss": 1.6411, "step": 77801 }, { "epoch": 2.59, "grad_norm": 0.691210925579071, "learning_rate": 2.7428048726735973e-05, "loss": 1.7085, "step": 77802 }, { "epoch": 2.59, "grad_norm": 0.7160442471504211, "learning_rate": 2.7423682774081835e-05, "loss": 1.6972, "step": 77803 }, { "epoch": 2.59, "grad_norm": 0.7192020416259766, "learning_rate": 2.7419317152295128e-05, "loss": 1.6904, "step": 77804 }, { "epoch": 2.59, "grad_norm": 0.7937792539596558, "learning_rate": 2.7414951861381218e-05, "loss": 1.6682, "step": 77805 }, { "epoch": 2.59, "grad_norm": 0.7010670304298401, "learning_rate": 2.74105869013455e-05, "loss": 1.6974, "step": 77806 }, { "epoch": 2.59, "grad_norm": 0.7190770506858826, "learning_rate": 2.7406222272193033e-05, "loss": 1.7503, "step": 77807 }, { "epoch": 2.59, "grad_norm": 0.7306535243988037, "learning_rate": 2.7401857973929286e-05, "loss": 1.6836, "step": 77808 }, { "epoch": 2.59, "grad_norm": 0.706976592540741, "learning_rate": 2.7397494006559552e-05, "loss": 1.6666, "step": 77809 }, { "epoch": 2.59, "grad_norm": 0.7289621233940125, "learning_rate": 2.739313037008909e-05, "loss": 1.7404, "step": 77810 }, { "epoch": 2.59, "grad_norm": 0.6843642592430115, "learning_rate": 2.7388767064523132e-05, "loss": 1.691, "step": 77811 }, { "epoch": 2.59, "grad_norm": 0.6998006701469421, "learning_rate": 2.7384404089867074e-05, "loss": 1.6359, "step": 77812 }, { "epoch": 2.59, "grad_norm": 0.7346625328063965, "learning_rate": 2.7380041446126278e-05, "loss": 1.6469, "step": 77813 }, { "epoch": 2.59, "grad_norm": 0.7029922008514404, "learning_rate": 2.7375679133305807e-05, "loss": 1.5742, "step": 77814 }, { "epoch": 2.59, "grad_norm": 0.6973958611488342, "learning_rate": 2.737131715141112e-05, "loss": 1.6665, "step": 77815 }, { "epoch": 2.59, "grad_norm": 0.7100299596786499, "learning_rate": 2.736695550044755e-05, "loss": 1.7508, "step": 77816 }, { "epoch": 2.59, "grad_norm": 0.7146527767181396, "learning_rate": 2.7362594180420293e-05, "loss": 1.7255, "step": 77817 }, { "epoch": 2.59, "grad_norm": 0.7057613134384155, "learning_rate": 2.7358233191334645e-05, "loss": 1.6769, "step": 77818 }, { "epoch": 2.59, "grad_norm": 0.7078728675842285, "learning_rate": 2.7353872533195998e-05, "loss": 1.7438, "step": 77819 }, { "epoch": 2.59, "grad_norm": 0.7120145559310913, "learning_rate": 2.734951220600955e-05, "loss": 1.7012, "step": 77820 }, { "epoch": 2.59, "grad_norm": 0.6963847875595093, "learning_rate": 2.7345152209780563e-05, "loss": 1.7132, "step": 77821 }, { "epoch": 2.59, "grad_norm": 0.6993940472602844, "learning_rate": 2.73407925445144e-05, "loss": 1.709, "step": 77822 }, { "epoch": 2.59, "grad_norm": 0.7306879162788391, "learning_rate": 2.733643321021639e-05, "loss": 1.7487, "step": 77823 }, { "epoch": 2.59, "grad_norm": 0.7162865996360779, "learning_rate": 2.7332074206891796e-05, "loss": 1.713, "step": 77824 }, { "epoch": 2.59, "grad_norm": 0.7160839438438416, "learning_rate": 2.7327715534545814e-05, "loss": 1.7099, "step": 77825 }, { "epoch": 2.59, "grad_norm": 0.6899051666259766, "learning_rate": 2.732335719318387e-05, "loss": 1.6406, "step": 77826 }, { "epoch": 2.59, "grad_norm": 0.6974474787712097, "learning_rate": 2.7318999182811165e-05, "loss": 1.6674, "step": 77827 }, { "epoch": 2.59, "grad_norm": 0.7078409194946289, "learning_rate": 2.731464150343299e-05, "loss": 1.6659, "step": 77828 }, { "epoch": 2.59, "grad_norm": 0.694837749004364, "learning_rate": 2.731028415505471e-05, "loss": 1.6567, "step": 77829 }, { "epoch": 2.59, "grad_norm": 0.6893300414085388, "learning_rate": 2.7305927137681515e-05, "loss": 1.6833, "step": 77830 }, { "epoch": 2.59, "grad_norm": 0.7358008027076721, "learning_rate": 2.730157045131881e-05, "loss": 1.7159, "step": 77831 }, { "epoch": 2.59, "grad_norm": 0.6955260038375854, "learning_rate": 2.7297214095971752e-05, "loss": 1.7139, "step": 77832 }, { "epoch": 2.59, "grad_norm": 0.7202503085136414, "learning_rate": 2.729285807164574e-05, "loss": 1.7073, "step": 77833 }, { "epoch": 2.59, "grad_norm": 0.7063190937042236, "learning_rate": 2.7288502378346033e-05, "loss": 1.6578, "step": 77834 }, { "epoch": 2.59, "grad_norm": 0.7242937684059143, "learning_rate": 2.728414701607786e-05, "loss": 1.6815, "step": 77835 }, { "epoch": 2.59, "grad_norm": 0.7076887488365173, "learning_rate": 2.7279791984846588e-05, "loss": 1.716, "step": 77836 }, { "epoch": 2.59, "grad_norm": 0.7020047307014465, "learning_rate": 2.727543728465741e-05, "loss": 1.5961, "step": 77837 }, { "epoch": 2.59, "grad_norm": 0.7123176455497742, "learning_rate": 2.7271082915515718e-05, "loss": 1.7693, "step": 77838 }, { "epoch": 2.59, "grad_norm": 0.7011837959289551, "learning_rate": 2.7266728877426747e-05, "loss": 1.6884, "step": 77839 }, { "epoch": 2.59, "grad_norm": 0.7028595209121704, "learning_rate": 2.7262375170395724e-05, "loss": 1.6402, "step": 77840 }, { "epoch": 2.59, "grad_norm": 0.7392354011535645, "learning_rate": 2.7258021794428044e-05, "loss": 1.6448, "step": 77841 }, { "epoch": 2.59, "grad_norm": 0.7205150127410889, "learning_rate": 2.72536687495289e-05, "loss": 1.7293, "step": 77842 }, { "epoch": 2.59, "grad_norm": 0.7011246681213379, "learning_rate": 2.7249316035703662e-05, "loss": 1.7696, "step": 77843 }, { "epoch": 2.59, "grad_norm": 0.7510989904403687, "learning_rate": 2.724496365295752e-05, "loss": 1.6724, "step": 77844 }, { "epoch": 2.59, "grad_norm": 0.7094578742980957, "learning_rate": 2.7240611601295836e-05, "loss": 1.6897, "step": 77845 }, { "epoch": 2.59, "grad_norm": 0.7081164121627808, "learning_rate": 2.7236259880723876e-05, "loss": 1.6874, "step": 77846 }, { "epoch": 2.59, "grad_norm": 0.7031485438346863, "learning_rate": 2.7231908491246835e-05, "loss": 1.6372, "step": 77847 }, { "epoch": 2.59, "grad_norm": 0.7261865735054016, "learning_rate": 2.722755743287014e-05, "loss": 1.781, "step": 77848 }, { "epoch": 2.59, "grad_norm": 0.6910181045532227, "learning_rate": 2.7223206705598988e-05, "loss": 1.669, "step": 77849 }, { "epoch": 2.59, "grad_norm": 0.7216128706932068, "learning_rate": 2.7218856309438643e-05, "loss": 1.7261, "step": 77850 }, { "epoch": 2.59, "grad_norm": 0.7272204756736755, "learning_rate": 2.7214506244394397e-05, "loss": 1.709, "step": 77851 }, { "epoch": 2.59, "grad_norm": 0.7075589299201965, "learning_rate": 2.721015651047158e-05, "loss": 1.675, "step": 77852 }, { "epoch": 2.59, "grad_norm": 0.6968238353729248, "learning_rate": 2.720580710767546e-05, "loss": 1.6481, "step": 77853 }, { "epoch": 2.59, "grad_norm": 0.7025927305221558, "learning_rate": 2.720145803601126e-05, "loss": 1.6246, "step": 77854 }, { "epoch": 2.59, "grad_norm": 0.699933648109436, "learning_rate": 2.719710929548431e-05, "loss": 1.6817, "step": 77855 }, { "epoch": 2.59, "grad_norm": 0.7107417583465576, "learning_rate": 2.7192760886099908e-05, "loss": 1.6886, "step": 77856 }, { "epoch": 2.59, "grad_norm": 0.7144106030464172, "learning_rate": 2.7188412807863215e-05, "loss": 1.6792, "step": 77857 }, { "epoch": 2.59, "grad_norm": 0.6898163557052612, "learning_rate": 2.7184065060779625e-05, "loss": 1.618, "step": 77858 }, { "epoch": 2.59, "grad_norm": 0.7162626385688782, "learning_rate": 2.717971764485447e-05, "loss": 1.6664, "step": 77859 }, { "epoch": 2.59, "grad_norm": 0.6998538970947266, "learning_rate": 2.7175370560092813e-05, "loss": 1.7227, "step": 77860 }, { "epoch": 2.59, "grad_norm": 0.7175126671791077, "learning_rate": 2.717102380650008e-05, "loss": 1.6588, "step": 77861 }, { "epoch": 2.59, "grad_norm": 0.7366452217102051, "learning_rate": 2.716667738408157e-05, "loss": 1.6832, "step": 77862 }, { "epoch": 2.59, "grad_norm": 0.7122026085853577, "learning_rate": 2.7162331292842543e-05, "loss": 1.6698, "step": 77863 }, { "epoch": 2.59, "grad_norm": 0.6884104609489441, "learning_rate": 2.715798553278816e-05, "loss": 1.6985, "step": 77864 }, { "epoch": 2.59, "grad_norm": 0.7340345978736877, "learning_rate": 2.715364010392379e-05, "loss": 1.742, "step": 77865 }, { "epoch": 2.59, "grad_norm": 0.6998962163925171, "learning_rate": 2.714929500625479e-05, "loss": 1.5776, "step": 77866 }, { "epoch": 2.59, "grad_norm": 0.6948778629302979, "learning_rate": 2.7144950239786257e-05, "loss": 1.6711, "step": 77867 }, { "epoch": 2.59, "grad_norm": 0.7338425517082214, "learning_rate": 2.714060580452355e-05, "loss": 1.6901, "step": 77868 }, { "epoch": 2.59, "grad_norm": 0.7100768089294434, "learning_rate": 2.7136261700471974e-05, "loss": 1.7032, "step": 77869 }, { "epoch": 2.59, "grad_norm": 0.7208513617515564, "learning_rate": 2.7131917927636816e-05, "loss": 1.6797, "step": 77870 }, { "epoch": 2.59, "grad_norm": 0.7227486371994019, "learning_rate": 2.7127574486023205e-05, "loss": 1.6359, "step": 77871 }, { "epoch": 2.59, "grad_norm": 0.7094966173171997, "learning_rate": 2.7123231375636543e-05, "loss": 1.7188, "step": 77872 }, { "epoch": 2.59, "grad_norm": 0.7207069993019104, "learning_rate": 2.711888859648216e-05, "loss": 1.6748, "step": 77873 }, { "epoch": 2.59, "grad_norm": 0.7148849368095398, "learning_rate": 2.7114546148565142e-05, "loss": 1.6796, "step": 77874 }, { "epoch": 2.59, "grad_norm": 0.7152642607688904, "learning_rate": 2.711020403189086e-05, "loss": 1.7422, "step": 77875 }, { "epoch": 2.59, "grad_norm": 0.6909666061401367, "learning_rate": 2.710586224646464e-05, "loss": 1.7136, "step": 77876 }, { "epoch": 2.59, "grad_norm": 0.7392064332962036, "learning_rate": 2.7101520792291677e-05, "loss": 1.7474, "step": 77877 }, { "epoch": 2.59, "grad_norm": 0.7000889182090759, "learning_rate": 2.70971796693772e-05, "loss": 1.6362, "step": 77878 }, { "epoch": 2.59, "grad_norm": 0.6926931738853455, "learning_rate": 2.709283887772661e-05, "loss": 1.6419, "step": 77879 }, { "epoch": 2.59, "grad_norm": 0.7156806588172913, "learning_rate": 2.7088498417345094e-05, "loss": 1.6782, "step": 77880 }, { "epoch": 2.59, "grad_norm": 0.6880911588668823, "learning_rate": 2.708415828823789e-05, "loss": 1.6309, "step": 77881 }, { "epoch": 2.59, "grad_norm": 0.6727737784385681, "learning_rate": 2.7079818490410288e-05, "loss": 1.626, "step": 77882 }, { "epoch": 2.59, "grad_norm": 0.7135345339775085, "learning_rate": 2.7075479023867618e-05, "loss": 1.7185, "step": 77883 }, { "epoch": 2.59, "grad_norm": 0.7103824615478516, "learning_rate": 2.707113988861511e-05, "loss": 1.6373, "step": 77884 }, { "epoch": 2.59, "grad_norm": 0.7413024306297302, "learning_rate": 2.7066801084657998e-05, "loss": 1.7854, "step": 77885 }, { "epoch": 2.59, "grad_norm": 0.7032120823860168, "learning_rate": 2.70624626120016e-05, "loss": 1.6771, "step": 77886 }, { "epoch": 2.59, "grad_norm": 0.7159533500671387, "learning_rate": 2.7058124470651153e-05, "loss": 1.7276, "step": 77887 }, { "epoch": 2.59, "grad_norm": 0.736393928527832, "learning_rate": 2.705378666061189e-05, "loss": 1.6731, "step": 77888 }, { "epoch": 2.59, "grad_norm": 0.7094182372093201, "learning_rate": 2.704944918188916e-05, "loss": 1.6479, "step": 77889 }, { "epoch": 2.59, "grad_norm": 0.7030927538871765, "learning_rate": 2.7045112034488138e-05, "loss": 1.6312, "step": 77890 }, { "epoch": 2.59, "grad_norm": 0.679951012134552, "learning_rate": 2.704077521841418e-05, "loss": 1.7158, "step": 77891 }, { "epoch": 2.59, "grad_norm": 0.7033828496932983, "learning_rate": 2.703643873367245e-05, "loss": 1.6629, "step": 77892 }, { "epoch": 2.59, "grad_norm": 0.6696493625640869, "learning_rate": 2.703210258026831e-05, "loss": 1.7228, "step": 77893 }, { "epoch": 2.59, "grad_norm": 0.7251752018928528, "learning_rate": 2.7027766758206993e-05, "loss": 1.5933, "step": 77894 }, { "epoch": 2.59, "grad_norm": 0.6976653337478638, "learning_rate": 2.7023431267493656e-05, "loss": 1.6047, "step": 77895 }, { "epoch": 2.59, "grad_norm": 0.7326650023460388, "learning_rate": 2.701909610813373e-05, "loss": 1.6815, "step": 77896 }, { "epoch": 2.59, "grad_norm": 0.6994096636772156, "learning_rate": 2.7014761280132347e-05, "loss": 1.5899, "step": 77897 }, { "epoch": 2.59, "grad_norm": 0.6850724816322327, "learning_rate": 2.7010426783494864e-05, "loss": 1.6609, "step": 77898 }, { "epoch": 2.59, "grad_norm": 0.7083724737167358, "learning_rate": 2.7006092618226516e-05, "loss": 1.7249, "step": 77899 }, { "epoch": 2.59, "grad_norm": 0.6962027549743652, "learning_rate": 2.7001758784332493e-05, "loss": 1.6554, "step": 77900 }, { "epoch": 2.59, "grad_norm": 0.6817307472229004, "learning_rate": 2.6997425281818163e-05, "loss": 1.676, "step": 77901 }, { "epoch": 2.59, "grad_norm": 0.6853294968605042, "learning_rate": 2.6993092110688686e-05, "loss": 1.684, "step": 77902 }, { "epoch": 2.59, "grad_norm": 0.682095468044281, "learning_rate": 2.698875927094939e-05, "loss": 1.7175, "step": 77903 }, { "epoch": 2.59, "grad_norm": 0.745480477809906, "learning_rate": 2.6984426762605504e-05, "loss": 1.6796, "step": 77904 }, { "epoch": 2.59, "grad_norm": 0.7159900069236755, "learning_rate": 2.6980094585662327e-05, "loss": 1.7695, "step": 77905 }, { "epoch": 2.59, "grad_norm": 0.6847115755081177, "learning_rate": 2.6975762740125083e-05, "loss": 1.71, "step": 77906 }, { "epoch": 2.59, "grad_norm": 0.7124213576316833, "learning_rate": 2.6971431225999007e-05, "loss": 1.6984, "step": 77907 }, { "epoch": 2.59, "grad_norm": 0.7276773452758789, "learning_rate": 2.6967100043289392e-05, "loss": 1.7009, "step": 77908 }, { "epoch": 2.59, "grad_norm": 0.698437511920929, "learning_rate": 2.6962769192001533e-05, "loss": 1.6608, "step": 77909 }, { "epoch": 2.59, "grad_norm": 0.6942994594573975, "learning_rate": 2.695843867214056e-05, "loss": 1.7117, "step": 77910 }, { "epoch": 2.59, "grad_norm": 0.7003241181373596, "learning_rate": 2.6954108483711834e-05, "loss": 1.7636, "step": 77911 }, { "epoch": 2.59, "grad_norm": 0.7180449962615967, "learning_rate": 2.694977862672062e-05, "loss": 1.6702, "step": 77912 }, { "epoch": 2.59, "grad_norm": 0.6950271725654602, "learning_rate": 2.694544910117218e-05, "loss": 1.6778, "step": 77913 }, { "epoch": 2.59, "grad_norm": 0.7077305912971497, "learning_rate": 2.694111990707164e-05, "loss": 1.697, "step": 77914 }, { "epoch": 2.59, "grad_norm": 0.6932176351547241, "learning_rate": 2.6936791044424365e-05, "loss": 1.7002, "step": 77915 }, { "epoch": 2.59, "grad_norm": 0.7247099876403809, "learning_rate": 2.6932462513235687e-05, "loss": 1.6943, "step": 77916 }, { "epoch": 2.59, "grad_norm": 0.6946017742156982, "learning_rate": 2.6928134313510662e-05, "loss": 1.6838, "step": 77917 }, { "epoch": 2.59, "grad_norm": 0.6855062246322632, "learning_rate": 2.6923806445254657e-05, "loss": 1.6761, "step": 77918 }, { "epoch": 2.59, "grad_norm": 0.7079310417175293, "learning_rate": 2.691947890847297e-05, "loss": 1.6829, "step": 77919 }, { "epoch": 2.59, "grad_norm": 0.6989025473594666, "learning_rate": 2.691515170317079e-05, "loss": 1.7132, "step": 77920 }, { "epoch": 2.59, "grad_norm": 0.713744580745697, "learning_rate": 2.6910824829353315e-05, "loss": 1.6774, "step": 77921 }, { "epoch": 2.59, "grad_norm": 0.7017478942871094, "learning_rate": 2.6906498287025878e-05, "loss": 1.6166, "step": 77922 }, { "epoch": 2.59, "grad_norm": 0.7158624529838562, "learning_rate": 2.6902172076193807e-05, "loss": 1.7203, "step": 77923 }, { "epoch": 2.59, "grad_norm": 0.7153075337409973, "learning_rate": 2.689784619686216e-05, "loss": 1.6704, "step": 77924 }, { "epoch": 2.59, "grad_norm": 0.7055267095565796, "learning_rate": 2.6893520649036272e-05, "loss": 1.7038, "step": 77925 }, { "epoch": 2.59, "grad_norm": 0.705885112285614, "learning_rate": 2.68891954327215e-05, "loss": 1.7048, "step": 77926 }, { "epoch": 2.59, "grad_norm": 0.7124280333518982, "learning_rate": 2.6884870547922976e-05, "loss": 1.7528, "step": 77927 }, { "epoch": 2.59, "grad_norm": 0.6770931482315063, "learning_rate": 2.688054599464593e-05, "loss": 1.6679, "step": 77928 }, { "epoch": 2.59, "grad_norm": 0.7041464447975159, "learning_rate": 2.6876221772895656e-05, "loss": 1.7218, "step": 77929 }, { "epoch": 2.59, "grad_norm": 0.7119253277778625, "learning_rate": 2.6871897882677518e-05, "loss": 1.6461, "step": 77930 }, { "epoch": 2.59, "grad_norm": 0.7063942551612854, "learning_rate": 2.6867574323996542e-05, "loss": 1.7629, "step": 77931 }, { "epoch": 2.59, "grad_norm": 0.7191346883773804, "learning_rate": 2.6863251096858096e-05, "loss": 1.6465, "step": 77932 }, { "epoch": 2.59, "grad_norm": 0.7041406035423279, "learning_rate": 2.685892820126747e-05, "loss": 1.7384, "step": 77933 }, { "epoch": 2.59, "grad_norm": 0.6810861825942993, "learning_rate": 2.685460563722983e-05, "loss": 1.7049, "step": 77934 }, { "epoch": 2.59, "grad_norm": 0.7144139409065247, "learning_rate": 2.6850283404750438e-05, "loss": 1.6798, "step": 77935 }, { "epoch": 2.59, "grad_norm": 0.7046564221382141, "learning_rate": 2.684596150383459e-05, "loss": 1.7414, "step": 77936 }, { "epoch": 2.59, "grad_norm": 0.6969988942146301, "learning_rate": 2.6841639934487514e-05, "loss": 1.7189, "step": 77937 }, { "epoch": 2.59, "grad_norm": 0.6973031163215637, "learning_rate": 2.683731869671437e-05, "loss": 1.6659, "step": 77938 }, { "epoch": 2.59, "grad_norm": 0.7115561366081238, "learning_rate": 2.683299779052046e-05, "loss": 1.6673, "step": 77939 }, { "epoch": 2.59, "grad_norm": 0.7000648975372314, "learning_rate": 2.682867721591111e-05, "loss": 1.6859, "step": 77940 }, { "epoch": 2.59, "grad_norm": 0.6940217018127441, "learning_rate": 2.6824356972891513e-05, "loss": 1.6917, "step": 77941 }, { "epoch": 2.59, "grad_norm": 0.7238501310348511, "learning_rate": 2.6820037061466804e-05, "loss": 1.7012, "step": 77942 }, { "epoch": 2.59, "grad_norm": 0.7435352206230164, "learning_rate": 2.6815717481642407e-05, "loss": 1.6548, "step": 77943 }, { "epoch": 2.59, "grad_norm": 0.7256921529769897, "learning_rate": 2.6811398233423452e-05, "loss": 1.7208, "step": 77944 }, { "epoch": 2.59, "grad_norm": 0.7257372140884399, "learning_rate": 2.6807079316815173e-05, "loss": 1.691, "step": 77945 }, { "epoch": 2.59, "grad_norm": 0.7177014350891113, "learning_rate": 2.680276073182289e-05, "loss": 1.7143, "step": 77946 }, { "epoch": 2.59, "grad_norm": 0.7147651314735413, "learning_rate": 2.6798442478451743e-05, "loss": 1.6486, "step": 77947 }, { "epoch": 2.59, "grad_norm": 0.700690746307373, "learning_rate": 2.679412455670712e-05, "loss": 1.7803, "step": 77948 }, { "epoch": 2.59, "grad_norm": 0.6884812712669373, "learning_rate": 2.6789806966594086e-05, "loss": 1.6804, "step": 77949 }, { "epoch": 2.59, "grad_norm": 0.6979801654815674, "learning_rate": 2.6785489708118036e-05, "loss": 1.6359, "step": 77950 }, { "epoch": 2.59, "grad_norm": 0.7155374884605408, "learning_rate": 2.6781172781284133e-05, "loss": 1.694, "step": 77951 }, { "epoch": 2.59, "grad_norm": 0.7037553787231445, "learning_rate": 2.6776856186097574e-05, "loss": 1.6659, "step": 77952 }, { "epoch": 2.59, "grad_norm": 0.7230432033538818, "learning_rate": 2.677253992256375e-05, "loss": 1.7226, "step": 77953 }, { "epoch": 2.59, "grad_norm": 0.7099810838699341, "learning_rate": 2.6768223990687697e-05, "loss": 1.7012, "step": 77954 }, { "epoch": 2.59, "grad_norm": 0.6921611428260803, "learning_rate": 2.676390839047484e-05, "loss": 1.7352, "step": 77955 }, { "epoch": 2.59, "grad_norm": 0.6799680590629578, "learning_rate": 2.6759593121930347e-05, "loss": 1.7124, "step": 77956 }, { "epoch": 2.59, "grad_norm": 0.6958932876586914, "learning_rate": 2.675527818505937e-05, "loss": 1.7588, "step": 77957 }, { "epoch": 2.59, "grad_norm": 0.6985706686973572, "learning_rate": 2.675096357986731e-05, "loss": 1.6183, "step": 77958 }, { "epoch": 2.59, "grad_norm": 0.6901520490646362, "learning_rate": 2.6746649306359237e-05, "loss": 1.6378, "step": 77959 }, { "epoch": 2.59, "grad_norm": 0.6931354999542236, "learning_rate": 2.6742335364540536e-05, "loss": 1.7201, "step": 77960 }, { "epoch": 2.59, "grad_norm": 0.6937392950057983, "learning_rate": 2.6738021754416306e-05, "loss": 1.6896, "step": 77961 }, { "epoch": 2.59, "grad_norm": 0.694976270198822, "learning_rate": 2.6733708475991943e-05, "loss": 1.6414, "step": 77962 }, { "epoch": 2.59, "grad_norm": 0.6788215637207031, "learning_rate": 2.672939552927258e-05, "loss": 1.6959, "step": 77963 }, { "epoch": 2.59, "grad_norm": 0.7029701471328735, "learning_rate": 2.6725082914263406e-05, "loss": 1.7051, "step": 77964 }, { "epoch": 2.59, "grad_norm": 0.7048700451850891, "learning_rate": 2.6720770630969758e-05, "loss": 1.6529, "step": 77965 }, { "epoch": 2.59, "grad_norm": 0.6836623549461365, "learning_rate": 2.6716458679396856e-05, "loss": 1.6777, "step": 77966 }, { "epoch": 2.59, "grad_norm": 0.7137895226478577, "learning_rate": 2.6712147059549838e-05, "loss": 1.7522, "step": 77967 }, { "epoch": 2.59, "grad_norm": 0.7178467512130737, "learning_rate": 2.6707835771433994e-05, "loss": 1.672, "step": 77968 }, { "epoch": 2.59, "grad_norm": 0.6970185041427612, "learning_rate": 2.6703524815054657e-05, "loss": 1.6992, "step": 77969 }, { "epoch": 2.59, "grad_norm": 0.6986090540885925, "learning_rate": 2.6699214190416952e-05, "loss": 1.6445, "step": 77970 }, { "epoch": 2.59, "grad_norm": 0.6914365887641907, "learning_rate": 2.6694903897526078e-05, "loss": 1.6839, "step": 77971 }, { "epoch": 2.59, "grad_norm": 0.6763271689414978, "learning_rate": 2.669059393638736e-05, "loss": 1.6871, "step": 77972 }, { "epoch": 2.59, "grad_norm": 0.7342895865440369, "learning_rate": 2.6686284307006e-05, "loss": 1.5944, "step": 77973 }, { "epoch": 2.59, "grad_norm": 0.7096055150032043, "learning_rate": 2.668197500938719e-05, "loss": 1.6911, "step": 77974 }, { "epoch": 2.59, "grad_norm": 0.7080366611480713, "learning_rate": 2.6677666043536163e-05, "loss": 1.7175, "step": 77975 }, { "epoch": 2.59, "grad_norm": 0.6797971129417419, "learning_rate": 2.6673357409458275e-05, "loss": 1.6733, "step": 77976 }, { "epoch": 2.59, "grad_norm": 0.7138463854789734, "learning_rate": 2.666904910715859e-05, "loss": 1.7678, "step": 77977 }, { "epoch": 2.59, "grad_norm": 0.7194951772689819, "learning_rate": 2.6664741136642377e-05, "loss": 1.7123, "step": 77978 }, { "epoch": 2.59, "grad_norm": 0.7029076814651489, "learning_rate": 2.666043349791496e-05, "loss": 1.6737, "step": 77979 }, { "epoch": 2.59, "grad_norm": 0.7085016965866089, "learning_rate": 2.6656126190981496e-05, "loss": 1.6454, "step": 77980 }, { "epoch": 2.59, "grad_norm": 0.7179502844810486, "learning_rate": 2.665181921584716e-05, "loss": 1.6846, "step": 77981 }, { "epoch": 2.59, "grad_norm": 0.714019238948822, "learning_rate": 2.6647512572517272e-05, "loss": 1.699, "step": 77982 }, { "epoch": 2.59, "grad_norm": 0.6883034110069275, "learning_rate": 2.6643206260997098e-05, "loss": 1.6523, "step": 77983 }, { "epoch": 2.59, "grad_norm": 0.6981374621391296, "learning_rate": 2.6638900281291697e-05, "loss": 1.6828, "step": 77984 }, { "epoch": 2.59, "grad_norm": 0.7200478315353394, "learning_rate": 2.6634594633406404e-05, "loss": 1.619, "step": 77985 }, { "epoch": 2.59, "grad_norm": 0.6908145546913147, "learning_rate": 2.6630289317346475e-05, "loss": 1.6227, "step": 77986 }, { "epoch": 2.59, "grad_norm": 0.6949858665466309, "learning_rate": 2.662598433311711e-05, "loss": 1.722, "step": 77987 }, { "epoch": 2.59, "grad_norm": 0.7149360179901123, "learning_rate": 2.662167968072344e-05, "loss": 1.6973, "step": 77988 }, { "epoch": 2.59, "grad_norm": 0.7138310074806213, "learning_rate": 2.6617375360170788e-05, "loss": 1.7176, "step": 77989 }, { "epoch": 2.59, "grad_norm": 0.7042222023010254, "learning_rate": 2.6613071371464488e-05, "loss": 1.7261, "step": 77990 }, { "epoch": 2.59, "grad_norm": 0.6876087784767151, "learning_rate": 2.66087677146095e-05, "loss": 1.6829, "step": 77991 }, { "epoch": 2.59, "grad_norm": 0.7272564768791199, "learning_rate": 2.6604464389611192e-05, "loss": 1.8261, "step": 77992 }, { "epoch": 2.59, "grad_norm": 0.6893371939659119, "learning_rate": 2.6600161396474852e-05, "loss": 1.6835, "step": 77993 }, { "epoch": 2.59, "grad_norm": 0.6983016133308411, "learning_rate": 2.659585873520561e-05, "loss": 1.6389, "step": 77994 }, { "epoch": 2.59, "grad_norm": 0.7089909911155701, "learning_rate": 2.6591556405808668e-05, "loss": 1.7055, "step": 77995 }, { "epoch": 2.59, "grad_norm": 0.7128878831863403, "learning_rate": 2.658725440828935e-05, "loss": 1.7174, "step": 77996 }, { "epoch": 2.59, "grad_norm": 0.6844158172607422, "learning_rate": 2.6582952742652818e-05, "loss": 1.6466, "step": 77997 }, { "epoch": 2.6, "grad_norm": 0.7213087677955627, "learning_rate": 2.657865140890424e-05, "loss": 1.7044, "step": 77998 }, { "epoch": 2.6, "grad_norm": 0.69929039478302, "learning_rate": 2.6574350407048873e-05, "loss": 1.6776, "step": 77999 }, { "epoch": 2.6, "grad_norm": 0.7193571925163269, "learning_rate": 2.6570049737092047e-05, "loss": 1.6707, "step": 78000 }, { "epoch": 2.6, "grad_norm": 0.7126910090446472, "learning_rate": 2.656574939903886e-05, "loss": 1.6952, "step": 78001 }, { "epoch": 2.6, "grad_norm": 0.7183462381362915, "learning_rate": 2.6561449392894542e-05, "loss": 1.5886, "step": 78002 }, { "epoch": 2.6, "grad_norm": 0.7055647373199463, "learning_rate": 2.655714971866435e-05, "loss": 1.714, "step": 78003 }, { "epoch": 2.6, "grad_norm": 0.6916059851646423, "learning_rate": 2.6552850376353518e-05, "loss": 1.7143, "step": 78004 }, { "epoch": 2.6, "grad_norm": 0.6990834474563599, "learning_rate": 2.6548551365967176e-05, "loss": 1.6507, "step": 78005 }, { "epoch": 2.6, "grad_norm": 0.7306216359138489, "learning_rate": 2.6544252687510647e-05, "loss": 1.6262, "step": 78006 }, { "epoch": 2.6, "grad_norm": 0.7159281969070435, "learning_rate": 2.653995434098907e-05, "loss": 1.692, "step": 78007 }, { "epoch": 2.6, "grad_norm": 0.7062024474143982, "learning_rate": 2.6535656326407728e-05, "loss": 1.6892, "step": 78008 }, { "epoch": 2.6, "grad_norm": 0.6918244361877441, "learning_rate": 2.6531358643771762e-05, "loss": 1.5929, "step": 78009 }, { "epoch": 2.6, "grad_norm": 0.7253308892250061, "learning_rate": 2.6527061293086494e-05, "loss": 1.6742, "step": 78010 }, { "epoch": 2.6, "grad_norm": 0.7034623026847839, "learning_rate": 2.6522764274357056e-05, "loss": 1.6418, "step": 78011 }, { "epoch": 2.6, "grad_norm": 0.71337890625, "learning_rate": 2.6518467587588643e-05, "loss": 1.7387, "step": 78012 }, { "epoch": 2.6, "grad_norm": 0.7096288800239563, "learning_rate": 2.6514171232786584e-05, "loss": 1.6923, "step": 78013 }, { "epoch": 2.6, "grad_norm": 0.7424109578132629, "learning_rate": 2.6509875209955944e-05, "loss": 1.6845, "step": 78014 }, { "epoch": 2.6, "grad_norm": 0.7180731296539307, "learning_rate": 2.6505579519102117e-05, "loss": 1.6867, "step": 78015 }, { "epoch": 2.6, "grad_norm": 0.6941865086555481, "learning_rate": 2.6501284160230163e-05, "loss": 1.7263, "step": 78016 }, { "epoch": 2.6, "grad_norm": 0.6883087754249573, "learning_rate": 2.6496989133345348e-05, "loss": 1.7075, "step": 78017 }, { "epoch": 2.6, "grad_norm": 0.6849977374076843, "learning_rate": 2.64926944384529e-05, "loss": 1.7217, "step": 78018 }, { "epoch": 2.6, "grad_norm": 0.7140686511993408, "learning_rate": 2.6488400075558014e-05, "loss": 1.7398, "step": 78019 }, { "epoch": 2.6, "grad_norm": 0.7053831815719604, "learning_rate": 2.648410604466592e-05, "loss": 1.6755, "step": 78020 }, { "epoch": 2.6, "grad_norm": 0.7066788673400879, "learning_rate": 2.647981234578178e-05, "loss": 1.6727, "step": 78021 }, { "epoch": 2.6, "grad_norm": 0.6928575038909912, "learning_rate": 2.6475518978910926e-05, "loss": 1.6901, "step": 78022 }, { "epoch": 2.6, "grad_norm": 0.715920627117157, "learning_rate": 2.647122594405845e-05, "loss": 1.6796, "step": 78023 }, { "epoch": 2.6, "grad_norm": 0.6974952220916748, "learning_rate": 2.6466933241229582e-05, "loss": 1.6617, "step": 78024 }, { "epoch": 2.6, "grad_norm": 0.7039930820465088, "learning_rate": 2.6462640870429518e-05, "loss": 1.6669, "step": 78025 }, { "epoch": 2.6, "grad_norm": 0.6944125294685364, "learning_rate": 2.6458348831663655e-05, "loss": 1.6334, "step": 78026 }, { "epoch": 2.6, "grad_norm": 0.6890774965286255, "learning_rate": 2.6454057124936923e-05, "loss": 1.6746, "step": 78027 }, { "epoch": 2.6, "grad_norm": 0.6988383531570435, "learning_rate": 2.644976575025465e-05, "loss": 1.7158, "step": 78028 }, { "epoch": 2.6, "grad_norm": 0.7013163566589355, "learning_rate": 2.6445474707622127e-05, "loss": 1.6576, "step": 78029 }, { "epoch": 2.6, "grad_norm": 0.7155194282531738, "learning_rate": 2.6441183997044492e-05, "loss": 1.6839, "step": 78030 }, { "epoch": 2.6, "grad_norm": 0.6874384880065918, "learning_rate": 2.64368936185269e-05, "loss": 1.6383, "step": 78031 }, { "epoch": 2.6, "grad_norm": 0.7110263705253601, "learning_rate": 2.6432603572074584e-05, "loss": 1.684, "step": 78032 }, { "epoch": 2.6, "grad_norm": 0.6993361711502075, "learning_rate": 2.6428313857692906e-05, "loss": 1.7623, "step": 78033 }, { "epoch": 2.6, "grad_norm": 0.700792670249939, "learning_rate": 2.642402447538683e-05, "loss": 1.6394, "step": 78034 }, { "epoch": 2.6, "grad_norm": 0.7164639830589294, "learning_rate": 2.6419735425161713e-05, "loss": 1.6042, "step": 78035 }, { "epoch": 2.6, "grad_norm": 0.7165381908416748, "learning_rate": 2.641544670702276e-05, "loss": 1.7304, "step": 78036 }, { "epoch": 2.6, "grad_norm": 0.7278040647506714, "learning_rate": 2.6411158320975123e-05, "loss": 1.7223, "step": 78037 }, { "epoch": 2.6, "grad_norm": 0.7075122594833374, "learning_rate": 2.6406870267024003e-05, "loss": 1.6789, "step": 78038 }, { "epoch": 2.6, "grad_norm": 0.7046438455581665, "learning_rate": 2.64025825451746e-05, "loss": 1.6964, "step": 78039 }, { "epoch": 2.6, "grad_norm": 0.7239478230476379, "learning_rate": 2.639829515543227e-05, "loss": 1.6472, "step": 78040 }, { "epoch": 2.6, "grad_norm": 0.6979550123214722, "learning_rate": 2.6394008097802012e-05, "loss": 1.7115, "step": 78041 }, { "epoch": 2.6, "grad_norm": 0.6987746357917786, "learning_rate": 2.6389721372289086e-05, "loss": 1.6817, "step": 78042 }, { "epoch": 2.6, "grad_norm": 0.7031224966049194, "learning_rate": 2.638543497889879e-05, "loss": 1.7497, "step": 78043 }, { "epoch": 2.6, "grad_norm": 0.7011885046958923, "learning_rate": 2.6381148917636252e-05, "loss": 1.6405, "step": 78044 }, { "epoch": 2.6, "grad_norm": 0.7012364864349365, "learning_rate": 2.6376863188506637e-05, "loss": 1.7033, "step": 78045 }, { "epoch": 2.6, "grad_norm": 0.6847054362297058, "learning_rate": 2.6372577791515202e-05, "loss": 1.6066, "step": 78046 }, { "epoch": 2.6, "grad_norm": 0.705859899520874, "learning_rate": 2.636829272666722e-05, "loss": 1.6645, "step": 78047 }, { "epoch": 2.6, "grad_norm": 0.7259745001792908, "learning_rate": 2.636400799396774e-05, "loss": 1.7713, "step": 78048 }, { "epoch": 2.6, "grad_norm": 0.7034580111503601, "learning_rate": 2.6359723593422e-05, "loss": 1.701, "step": 78049 }, { "epoch": 2.6, "grad_norm": 0.7323792576789856, "learning_rate": 2.6355439525035326e-05, "loss": 1.7629, "step": 78050 }, { "epoch": 2.6, "grad_norm": 0.6944435834884644, "learning_rate": 2.6351155788812818e-05, "loss": 1.7118, "step": 78051 }, { "epoch": 2.6, "grad_norm": 0.7050041556358337, "learning_rate": 2.634687238475963e-05, "loss": 1.6816, "step": 78052 }, { "epoch": 2.6, "grad_norm": 0.6997076869010925, "learning_rate": 2.634258931288107e-05, "loss": 1.6796, "step": 78053 }, { "epoch": 2.6, "grad_norm": 0.7221153974533081, "learning_rate": 2.6338306573182287e-05, "loss": 1.7167, "step": 78054 }, { "epoch": 2.6, "grad_norm": 0.678601086139679, "learning_rate": 2.633402416566842e-05, "loss": 1.6596, "step": 78055 }, { "epoch": 2.6, "grad_norm": 0.7123812437057495, "learning_rate": 2.6329742090344698e-05, "loss": 1.5894, "step": 78056 }, { "epoch": 2.6, "grad_norm": 0.7085257768630981, "learning_rate": 2.632546034721644e-05, "loss": 1.7143, "step": 78057 }, { "epoch": 2.6, "grad_norm": 0.6975641250610352, "learning_rate": 2.6321178936288757e-05, "loss": 1.7192, "step": 78058 }, { "epoch": 2.6, "grad_norm": 0.6992629766464233, "learning_rate": 2.631689785756673e-05, "loss": 1.7238, "step": 78059 }, { "epoch": 2.6, "grad_norm": 0.6872594356536865, "learning_rate": 2.6312617111055766e-05, "loss": 1.6188, "step": 78060 }, { "epoch": 2.6, "grad_norm": 0.7010379433631897, "learning_rate": 2.6308336696760956e-05, "loss": 1.7063, "step": 78061 }, { "epoch": 2.6, "grad_norm": 0.7303014993667603, "learning_rate": 2.630405661468743e-05, "loss": 1.6595, "step": 78062 }, { "epoch": 2.6, "grad_norm": 0.720109760761261, "learning_rate": 2.6299776864840517e-05, "loss": 1.6276, "step": 78063 }, { "epoch": 2.6, "grad_norm": 0.7220035195350647, "learning_rate": 2.629549744722528e-05, "loss": 1.7032, "step": 78064 }, { "epoch": 2.6, "grad_norm": 0.7043497562408447, "learning_rate": 2.6291218361847043e-05, "loss": 1.5993, "step": 78065 }, { "epoch": 2.6, "grad_norm": 0.7127290964126587, "learning_rate": 2.6286939608710877e-05, "loss": 1.6703, "step": 78066 }, { "epoch": 2.6, "grad_norm": 0.6757615804672241, "learning_rate": 2.6282661187822073e-05, "loss": 1.655, "step": 78067 }, { "epoch": 2.6, "grad_norm": 0.7246228456497192, "learning_rate": 2.627838309918583e-05, "loss": 1.7148, "step": 78068 }, { "epoch": 2.6, "grad_norm": 0.6978358030319214, "learning_rate": 2.6274105342807207e-05, "loss": 1.7021, "step": 78069 }, { "epoch": 2.6, "grad_norm": 0.7023134827613831, "learning_rate": 2.6269827918691566e-05, "loss": 1.6448, "step": 78070 }, { "epoch": 2.6, "grad_norm": 0.7015020251274109, "learning_rate": 2.6265550826843972e-05, "loss": 1.7067, "step": 78071 }, { "epoch": 2.6, "grad_norm": 0.7058106660842896, "learning_rate": 2.6261274067269687e-05, "loss": 1.636, "step": 78072 }, { "epoch": 2.6, "grad_norm": 0.7436051964759827, "learning_rate": 2.6256997639973908e-05, "loss": 1.7488, "step": 78073 }, { "epoch": 2.6, "grad_norm": 0.7198194861412048, "learning_rate": 2.6252721544961765e-05, "loss": 1.7185, "step": 78074 }, { "epoch": 2.6, "grad_norm": 0.701259434223175, "learning_rate": 2.6248445782238516e-05, "loss": 1.6313, "step": 78075 }, { "epoch": 2.6, "grad_norm": 0.685242235660553, "learning_rate": 2.6244170351809256e-05, "loss": 1.704, "step": 78076 }, { "epoch": 2.6, "grad_norm": 0.7108787894248962, "learning_rate": 2.623989525367929e-05, "loss": 1.5786, "step": 78077 }, { "epoch": 2.6, "grad_norm": 0.6976343989372253, "learning_rate": 2.6235620487853703e-05, "loss": 1.6838, "step": 78078 }, { "epoch": 2.6, "grad_norm": 0.7159008383750916, "learning_rate": 2.6231346054337797e-05, "loss": 1.7357, "step": 78079 }, { "epoch": 2.6, "grad_norm": 0.7344053387641907, "learning_rate": 2.62270719531367e-05, "loss": 1.6945, "step": 78080 }, { "epoch": 2.6, "grad_norm": 0.709494948387146, "learning_rate": 2.6222798184255545e-05, "loss": 1.7307, "step": 78081 }, { "epoch": 2.6, "grad_norm": 0.7143495082855225, "learning_rate": 2.621852474769962e-05, "loss": 1.6753, "step": 78082 }, { "epoch": 2.6, "grad_norm": 0.6957171559333801, "learning_rate": 2.621425164347406e-05, "loss": 1.6445, "step": 78083 }, { "epoch": 2.6, "grad_norm": 0.6878663301467896, "learning_rate": 2.6209978871584026e-05, "loss": 1.629, "step": 78084 }, { "epoch": 2.6, "grad_norm": 0.7151795029640198, "learning_rate": 2.6205706432034713e-05, "loss": 1.6481, "step": 78085 }, { "epoch": 2.6, "grad_norm": 0.7019146084785461, "learning_rate": 2.6201434324831415e-05, "loss": 1.6841, "step": 78086 }, { "epoch": 2.6, "grad_norm": 0.703677773475647, "learning_rate": 2.61971625499792e-05, "loss": 1.716, "step": 78087 }, { "epoch": 2.6, "grad_norm": 0.7128627300262451, "learning_rate": 2.619289110748326e-05, "loss": 1.6559, "step": 78088 }, { "epoch": 2.6, "grad_norm": 0.7173608541488647, "learning_rate": 2.6188619997348858e-05, "loss": 1.7521, "step": 78089 }, { "epoch": 2.6, "grad_norm": 0.7092582583427429, "learning_rate": 2.6184349219581125e-05, "loss": 1.6589, "step": 78090 }, { "epoch": 2.6, "grad_norm": 0.6846822500228882, "learning_rate": 2.6180078774185153e-05, "loss": 1.6866, "step": 78091 }, { "epoch": 2.6, "grad_norm": 0.7045391201972961, "learning_rate": 2.6175808661166274e-05, "loss": 1.5745, "step": 78092 }, { "epoch": 2.6, "grad_norm": 0.7280470132827759, "learning_rate": 2.6171538880529684e-05, "loss": 1.7453, "step": 78093 }, { "epoch": 2.6, "grad_norm": 0.7094277739524841, "learning_rate": 2.6167269432280413e-05, "loss": 1.7469, "step": 78094 }, { "epoch": 2.6, "grad_norm": 0.6947681307792664, "learning_rate": 2.6163000316423754e-05, "loss": 1.702, "step": 78095 }, { "epoch": 2.6, "grad_norm": 0.6951991319656372, "learning_rate": 2.6158731532964873e-05, "loss": 1.6794, "step": 78096 }, { "epoch": 2.6, "grad_norm": 0.7118697166442871, "learning_rate": 2.615446308190896e-05, "loss": 1.7294, "step": 78097 }, { "epoch": 2.6, "grad_norm": 0.7474417686462402, "learning_rate": 2.615019496326115e-05, "loss": 1.6781, "step": 78098 }, { "epoch": 2.6, "grad_norm": 0.7283883094787598, "learning_rate": 2.6145927177026603e-05, "loss": 1.76, "step": 78099 }, { "epoch": 2.6, "grad_norm": 0.7077975869178772, "learning_rate": 2.614165972321072e-05, "loss": 1.7051, "step": 78100 }, { "epoch": 2.6, "grad_norm": 0.720137357711792, "learning_rate": 2.6137392601818352e-05, "loss": 1.6832, "step": 78101 }, { "epoch": 2.6, "grad_norm": 0.7494946122169495, "learning_rate": 2.613312581285487e-05, "loss": 1.6997, "step": 78102 }, { "epoch": 2.6, "grad_norm": 0.7089672684669495, "learning_rate": 2.6128859356325437e-05, "loss": 1.6767, "step": 78103 }, { "epoch": 2.6, "grad_norm": 0.7005848288536072, "learning_rate": 2.6124593232235246e-05, "loss": 1.6538, "step": 78104 }, { "epoch": 2.6, "grad_norm": 0.7246671319007874, "learning_rate": 2.6120327440589394e-05, "loss": 1.741, "step": 78105 }, { "epoch": 2.6, "grad_norm": 0.7490294575691223, "learning_rate": 2.611606198139311e-05, "loss": 1.6749, "step": 78106 }, { "epoch": 2.6, "grad_norm": 0.7004021406173706, "learning_rate": 2.6111796854651655e-05, "loss": 1.6666, "step": 78107 }, { "epoch": 2.6, "grad_norm": 0.7303614020347595, "learning_rate": 2.6107532060370028e-05, "loss": 1.6674, "step": 78108 }, { "epoch": 2.6, "grad_norm": 0.6994298100471497, "learning_rate": 2.610326759855349e-05, "loss": 1.6501, "step": 78109 }, { "epoch": 2.6, "grad_norm": 0.6955717206001282, "learning_rate": 2.60990034692073e-05, "loss": 1.5891, "step": 78110 }, { "epoch": 2.6, "grad_norm": 0.7107341885566711, "learning_rate": 2.6094739672336562e-05, "loss": 1.6896, "step": 78111 }, { "epoch": 2.6, "grad_norm": 0.7190772891044617, "learning_rate": 2.6090476207946397e-05, "loss": 1.6836, "step": 78112 }, { "epoch": 2.6, "grad_norm": 0.7617958784103394, "learning_rate": 2.6086213076042105e-05, "loss": 1.6836, "step": 78113 }, { "epoch": 2.6, "grad_norm": 0.7265457510948181, "learning_rate": 2.6081950276628748e-05, "loss": 1.6963, "step": 78114 }, { "epoch": 2.6, "grad_norm": 0.6853898763656616, "learning_rate": 2.6077687809711524e-05, "loss": 1.6584, "step": 78115 }, { "epoch": 2.6, "grad_norm": 0.7036415934562683, "learning_rate": 2.6073425675295623e-05, "loss": 1.687, "step": 78116 }, { "epoch": 2.6, "grad_norm": 0.7124701142311096, "learning_rate": 2.606916387338628e-05, "loss": 1.6666, "step": 78117 }, { "epoch": 2.6, "grad_norm": 0.6926891803741455, "learning_rate": 2.6064902403988587e-05, "loss": 1.6432, "step": 78118 }, { "epoch": 2.6, "grad_norm": 0.7162813544273376, "learning_rate": 2.6060641267107708e-05, "loss": 1.7128, "step": 78119 }, { "epoch": 2.6, "grad_norm": 0.7249170541763306, "learning_rate": 2.605638046274887e-05, "loss": 1.693, "step": 78120 }, { "epoch": 2.6, "grad_norm": 1.950148582458496, "learning_rate": 2.605211999091724e-05, "loss": 1.7251, "step": 78121 }, { "epoch": 2.6, "grad_norm": 0.7218903303146362, "learning_rate": 2.6047859851617946e-05, "loss": 1.7391, "step": 78122 }, { "epoch": 2.6, "grad_norm": 0.7080935835838318, "learning_rate": 2.6043600044856218e-05, "loss": 1.6525, "step": 78123 }, { "epoch": 2.6, "grad_norm": 0.7072038054466248, "learning_rate": 2.6039340570637114e-05, "loss": 1.6886, "step": 78124 }, { "epoch": 2.6, "grad_norm": 0.7022954225540161, "learning_rate": 2.6035081428965964e-05, "loss": 1.6657, "step": 78125 }, { "epoch": 2.6, "grad_norm": 0.7195627093315125, "learning_rate": 2.6030822619847803e-05, "loss": 1.6644, "step": 78126 }, { "epoch": 2.6, "grad_norm": 0.6890773773193359, "learning_rate": 2.602656414328792e-05, "loss": 1.695, "step": 78127 }, { "epoch": 2.6, "grad_norm": 0.698165774345398, "learning_rate": 2.6022305999291416e-05, "loss": 1.7261, "step": 78128 }, { "epoch": 2.6, "grad_norm": 0.7111712694168091, "learning_rate": 2.6018048187863382e-05, "loss": 1.8259, "step": 78129 }, { "epoch": 2.6, "grad_norm": 0.717467725276947, "learning_rate": 2.6013790709009153e-05, "loss": 1.7071, "step": 78130 }, { "epoch": 2.6, "grad_norm": 0.727096676826477, "learning_rate": 2.6009533562733754e-05, "loss": 1.7216, "step": 78131 }, { "epoch": 2.6, "grad_norm": 0.7045936584472656, "learning_rate": 2.6005276749042447e-05, "loss": 1.7117, "step": 78132 }, { "epoch": 2.6, "grad_norm": 0.7136558294296265, "learning_rate": 2.6001020267940365e-05, "loss": 1.6592, "step": 78133 }, { "epoch": 2.6, "grad_norm": 0.710372805595398, "learning_rate": 2.5996764119432635e-05, "loss": 1.6568, "step": 78134 }, { "epoch": 2.6, "grad_norm": 0.6894217133522034, "learning_rate": 2.5992508303524517e-05, "loss": 1.6253, "step": 78135 }, { "epoch": 2.6, "grad_norm": 0.6793384552001953, "learning_rate": 2.5988252820221046e-05, "loss": 1.6467, "step": 78136 }, { "epoch": 2.6, "grad_norm": 0.7041087746620178, "learning_rate": 2.5983997669527546e-05, "loss": 1.6139, "step": 78137 }, { "epoch": 2.6, "grad_norm": 0.703808605670929, "learning_rate": 2.5979742851449014e-05, "loss": 1.6644, "step": 78138 }, { "epoch": 2.6, "grad_norm": 0.704128623008728, "learning_rate": 2.5975488365990783e-05, "loss": 1.6941, "step": 78139 }, { "epoch": 2.6, "grad_norm": 0.7106481790542603, "learning_rate": 2.597123421315791e-05, "loss": 1.7331, "step": 78140 }, { "epoch": 2.6, "grad_norm": 0.7092164754867554, "learning_rate": 2.5966980392955527e-05, "loss": 1.6412, "step": 78141 }, { "epoch": 2.6, "grad_norm": 0.6916813850402832, "learning_rate": 2.5962726905388864e-05, "loss": 1.6522, "step": 78142 }, { "epoch": 2.6, "grad_norm": 0.71431565284729, "learning_rate": 2.5958473750463183e-05, "loss": 1.7027, "step": 78143 }, { "epoch": 2.6, "grad_norm": 0.7068709135055542, "learning_rate": 2.595422092818341e-05, "loss": 1.7114, "step": 78144 }, { "epoch": 2.6, "grad_norm": 0.7008028030395508, "learning_rate": 2.5949968438554845e-05, "loss": 1.6898, "step": 78145 }, { "epoch": 2.6, "grad_norm": 0.7024561762809753, "learning_rate": 2.594571628158272e-05, "loss": 1.6383, "step": 78146 }, { "epoch": 2.6, "grad_norm": 0.6982426047325134, "learning_rate": 2.594146445727209e-05, "loss": 1.6811, "step": 78147 }, { "epoch": 2.6, "grad_norm": 0.7104504108428955, "learning_rate": 2.5937212965628086e-05, "loss": 1.7191, "step": 78148 }, { "epoch": 2.6, "grad_norm": 0.6962982416152954, "learning_rate": 2.5932961806655904e-05, "loss": 1.6822, "step": 78149 }, { "epoch": 2.6, "grad_norm": 0.7098676562309265, "learning_rate": 2.592871098036088e-05, "loss": 1.7135, "step": 78150 }, { "epoch": 2.6, "grad_norm": 0.6975586414337158, "learning_rate": 2.5924460486747865e-05, "loss": 1.6694, "step": 78151 }, { "epoch": 2.6, "grad_norm": 0.6908143758773804, "learning_rate": 2.59202103258222e-05, "loss": 1.7318, "step": 78152 }, { "epoch": 2.6, "grad_norm": 0.7151948809623718, "learning_rate": 2.5915960497589072e-05, "loss": 1.6413, "step": 78153 }, { "epoch": 2.6, "grad_norm": 0.7103965282440186, "learning_rate": 2.5911711002053548e-05, "loss": 1.6595, "step": 78154 }, { "epoch": 2.6, "grad_norm": 0.7085117697715759, "learning_rate": 2.590746183922079e-05, "loss": 1.6601, "step": 78155 }, { "epoch": 2.6, "grad_norm": 0.6988294720649719, "learning_rate": 2.590321300909599e-05, "loss": 1.6651, "step": 78156 }, { "epoch": 2.6, "grad_norm": 0.7105706334114075, "learning_rate": 2.5898964511684416e-05, "loss": 1.7753, "step": 78157 }, { "epoch": 2.6, "grad_norm": 0.7212777733802795, "learning_rate": 2.589471634699103e-05, "loss": 1.7018, "step": 78158 }, { "epoch": 2.6, "grad_norm": 0.7001289129257202, "learning_rate": 2.5890468515021024e-05, "loss": 1.7046, "step": 78159 }, { "epoch": 2.6, "grad_norm": 0.689980149269104, "learning_rate": 2.5886221015779662e-05, "loss": 1.6596, "step": 78160 }, { "epoch": 2.6, "grad_norm": 0.6981432437896729, "learning_rate": 2.5881973849272075e-05, "loss": 1.7169, "step": 78161 }, { "epoch": 2.6, "grad_norm": 0.7269777059555054, "learning_rate": 2.587772701550329e-05, "loss": 1.63, "step": 78162 }, { "epoch": 2.6, "grad_norm": 0.6956009864807129, "learning_rate": 2.5873480514478574e-05, "loss": 1.7052, "step": 78163 }, { "epoch": 2.6, "grad_norm": 0.6888287663459778, "learning_rate": 2.5869234346203148e-05, "loss": 1.6025, "step": 78164 }, { "epoch": 2.6, "grad_norm": 0.6898888945579529, "learning_rate": 2.5864988510681982e-05, "loss": 1.7286, "step": 78165 }, { "epoch": 2.6, "grad_norm": 0.7055688500404358, "learning_rate": 2.5860743007920336e-05, "loss": 1.6377, "step": 78166 }, { "epoch": 2.6, "grad_norm": 0.6889819502830505, "learning_rate": 2.5856497837923407e-05, "loss": 1.6743, "step": 78167 }, { "epoch": 2.6, "grad_norm": 0.7012604475021362, "learning_rate": 2.585225300069632e-05, "loss": 1.701, "step": 78168 }, { "epoch": 2.6, "grad_norm": 0.7269445657730103, "learning_rate": 2.584800849624411e-05, "loss": 1.6734, "step": 78169 }, { "epoch": 2.6, "grad_norm": 0.7214763760566711, "learning_rate": 2.5843764324572102e-05, "loss": 1.6736, "step": 78170 }, { "epoch": 2.6, "grad_norm": 0.7200482487678528, "learning_rate": 2.583952048568536e-05, "loss": 1.6461, "step": 78171 }, { "epoch": 2.6, "grad_norm": 0.6899092197418213, "learning_rate": 2.5835276979588983e-05, "loss": 1.6893, "step": 78172 }, { "epoch": 2.6, "grad_norm": 0.7076749205589294, "learning_rate": 2.5831033806288193e-05, "loss": 1.6806, "step": 78173 }, { "epoch": 2.6, "grad_norm": 0.7110112905502319, "learning_rate": 2.582679096578819e-05, "loss": 1.7373, "step": 78174 }, { "epoch": 2.6, "grad_norm": 0.6959742903709412, "learning_rate": 2.582254845809404e-05, "loss": 1.6333, "step": 78175 }, { "epoch": 2.6, "grad_norm": 0.7331593632698059, "learning_rate": 2.58183062832109e-05, "loss": 1.614, "step": 78176 }, { "epoch": 2.6, "grad_norm": 0.7028900980949402, "learning_rate": 2.5814064441143967e-05, "loss": 1.7039, "step": 78177 }, { "epoch": 2.6, "grad_norm": 0.6913541555404663, "learning_rate": 2.5809822931898373e-05, "loss": 1.6585, "step": 78178 }, { "epoch": 2.6, "grad_norm": 0.7069565653800964, "learning_rate": 2.580558175547921e-05, "loss": 1.7177, "step": 78179 }, { "epoch": 2.6, "grad_norm": 0.6864300966262817, "learning_rate": 2.580134091189171e-05, "loss": 1.6876, "step": 78180 }, { "epoch": 2.6, "grad_norm": 0.7066968083381653, "learning_rate": 2.5797100401140903e-05, "loss": 1.6744, "step": 78181 }, { "epoch": 2.6, "grad_norm": 0.699408769607544, "learning_rate": 2.5792860223232083e-05, "loss": 1.7224, "step": 78182 }, { "epoch": 2.6, "grad_norm": 0.6880477666854858, "learning_rate": 2.578862037817031e-05, "loss": 1.673, "step": 78183 }, { "epoch": 2.6, "grad_norm": 0.7038142085075378, "learning_rate": 2.578438086596075e-05, "loss": 1.6432, "step": 78184 }, { "epoch": 2.6, "grad_norm": 0.7095872163772583, "learning_rate": 2.578014168660857e-05, "loss": 1.6862, "step": 78185 }, { "epoch": 2.6, "grad_norm": 0.7052150368690491, "learning_rate": 2.5775902840118855e-05, "loss": 1.632, "step": 78186 }, { "epoch": 2.6, "grad_norm": 0.6938992142677307, "learning_rate": 2.5771664326496843e-05, "loss": 1.733, "step": 78187 }, { "epoch": 2.6, "grad_norm": 0.6879839301109314, "learning_rate": 2.576742614574756e-05, "loss": 1.6333, "step": 78188 }, { "epoch": 2.6, "grad_norm": 0.6902821063995361, "learning_rate": 2.5763188297876268e-05, "loss": 1.6997, "step": 78189 }, { "epoch": 2.6, "grad_norm": 0.7143462896347046, "learning_rate": 2.575895078288803e-05, "loss": 1.6318, "step": 78190 }, { "epoch": 2.6, "grad_norm": 0.6870666742324829, "learning_rate": 2.5754713600788013e-05, "loss": 1.5683, "step": 78191 }, { "epoch": 2.6, "grad_norm": 0.701522171497345, "learning_rate": 2.5750476751581405e-05, "loss": 1.6786, "step": 78192 }, { "epoch": 2.6, "grad_norm": 0.7075865864753723, "learning_rate": 2.574624023527324e-05, "loss": 1.6463, "step": 78193 }, { "epoch": 2.6, "grad_norm": 0.7107009291648865, "learning_rate": 2.574200405186878e-05, "loss": 1.6378, "step": 78194 }, { "epoch": 2.6, "grad_norm": 0.6918494701385498, "learning_rate": 2.5737768201373086e-05, "loss": 1.6666, "step": 78195 }, { "epoch": 2.6, "grad_norm": 0.7533669471740723, "learning_rate": 2.573353268379139e-05, "loss": 1.7089, "step": 78196 }, { "epoch": 2.6, "grad_norm": 0.7367842793464661, "learning_rate": 2.5729297499128753e-05, "loss": 1.7113, "step": 78197 }, { "epoch": 2.6, "grad_norm": 0.7172302603721619, "learning_rate": 2.572506264739027e-05, "loss": 1.6072, "step": 78198 }, { "epoch": 2.6, "grad_norm": 0.7236575484275818, "learning_rate": 2.572082812858124e-05, "loss": 1.7372, "step": 78199 }, { "epoch": 2.6, "grad_norm": 0.7046903371810913, "learning_rate": 2.5716593942706685e-05, "loss": 1.6068, "step": 78200 }, { "epoch": 2.6, "grad_norm": 0.7045167088508606, "learning_rate": 2.571236008977171e-05, "loss": 1.7056, "step": 78201 }, { "epoch": 2.6, "grad_norm": 0.7114004492759705, "learning_rate": 2.570812656978154e-05, "loss": 1.6377, "step": 78202 }, { "epoch": 2.6, "grad_norm": 0.7136489152908325, "learning_rate": 2.5703893382741335e-05, "loss": 1.6226, "step": 78203 }, { "epoch": 2.6, "grad_norm": 0.7353063225746155, "learning_rate": 2.5699660528656198e-05, "loss": 1.6325, "step": 78204 }, { "epoch": 2.6, "grad_norm": 0.7097363471984863, "learning_rate": 2.569542800753118e-05, "loss": 1.7, "step": 78205 }, { "epoch": 2.6, "grad_norm": 0.7280471324920654, "learning_rate": 2.5691195819371557e-05, "loss": 1.6639, "step": 78206 }, { "epoch": 2.6, "grad_norm": 0.7204335331916809, "learning_rate": 2.5686963964182417e-05, "loss": 1.7797, "step": 78207 }, { "epoch": 2.6, "grad_norm": 0.6931383013725281, "learning_rate": 2.5682732441968824e-05, "loss": 1.7195, "step": 78208 }, { "epoch": 2.6, "grad_norm": 0.7029616236686707, "learning_rate": 2.5678501252735974e-05, "loss": 1.6953, "step": 78209 }, { "epoch": 2.6, "grad_norm": 0.6963322162628174, "learning_rate": 2.5674270396489093e-05, "loss": 1.6676, "step": 78210 }, { "epoch": 2.6, "grad_norm": 0.6963144540786743, "learning_rate": 2.567003987323315e-05, "loss": 1.5912, "step": 78211 }, { "epoch": 2.6, "grad_norm": 0.7302778959274292, "learning_rate": 2.5665809682973337e-05, "loss": 1.7551, "step": 78212 }, { "epoch": 2.6, "grad_norm": 0.7227036952972412, "learning_rate": 2.5661579825714884e-05, "loss": 1.6849, "step": 78213 }, { "epoch": 2.6, "grad_norm": 0.7003774642944336, "learning_rate": 2.5657350301462854e-05, "loss": 1.6914, "step": 78214 }, { "epoch": 2.6, "grad_norm": 0.706428587436676, "learning_rate": 2.5653121110222308e-05, "loss": 1.675, "step": 78215 }, { "epoch": 2.6, "grad_norm": 0.6900824904441833, "learning_rate": 2.5648892251998444e-05, "loss": 1.6779, "step": 78216 }, { "epoch": 2.6, "grad_norm": 0.702705979347229, "learning_rate": 2.5644663726796523e-05, "loss": 1.6691, "step": 78217 }, { "epoch": 2.6, "grad_norm": 0.6994924545288086, "learning_rate": 2.5640435534621438e-05, "loss": 1.6786, "step": 78218 }, { "epoch": 2.6, "grad_norm": 0.7030168771743774, "learning_rate": 2.563620767547846e-05, "loss": 1.6809, "step": 78219 }, { "epoch": 2.6, "grad_norm": 0.6944366097450256, "learning_rate": 2.5631980149372712e-05, "loss": 1.651, "step": 78220 }, { "epoch": 2.6, "grad_norm": 0.7097395062446594, "learning_rate": 2.5627752956309356e-05, "loss": 1.6597, "step": 78221 }, { "epoch": 2.6, "grad_norm": 0.7114743590354919, "learning_rate": 2.562352609629339e-05, "loss": 1.713, "step": 78222 }, { "epoch": 2.6, "grad_norm": 0.7243416905403137, "learning_rate": 2.5619299569330076e-05, "loss": 1.663, "step": 78223 }, { "epoch": 2.6, "grad_norm": 0.7178037166595459, "learning_rate": 2.5615073375424576e-05, "loss": 1.6795, "step": 78224 }, { "epoch": 2.6, "grad_norm": 0.70002681016922, "learning_rate": 2.5610847514581857e-05, "loss": 1.6719, "step": 78225 }, { "epoch": 2.6, "grad_norm": 0.696693480014801, "learning_rate": 2.560662198680714e-05, "loss": 1.6763, "step": 78226 }, { "epoch": 2.6, "grad_norm": 0.6984084844589233, "learning_rate": 2.560239679210563e-05, "loss": 1.6105, "step": 78227 }, { "epoch": 2.6, "grad_norm": 0.684674859046936, "learning_rate": 2.5598171930482348e-05, "loss": 1.6453, "step": 78228 }, { "epoch": 2.6, "grad_norm": 0.7107875943183899, "learning_rate": 2.5593947401942393e-05, "loss": 1.643, "step": 78229 }, { "epoch": 2.6, "grad_norm": 0.7261105179786682, "learning_rate": 2.558972320649103e-05, "loss": 1.6638, "step": 78230 }, { "epoch": 2.6, "grad_norm": 0.710753858089447, "learning_rate": 2.558549934413332e-05, "loss": 1.6446, "step": 78231 }, { "epoch": 2.6, "grad_norm": 0.7041560411453247, "learning_rate": 2.5581275814874325e-05, "loss": 1.7106, "step": 78232 }, { "epoch": 2.6, "grad_norm": 0.7021582722663879, "learning_rate": 2.5577052618719206e-05, "loss": 1.7447, "step": 78233 }, { "epoch": 2.6, "grad_norm": 0.7122126221656799, "learning_rate": 2.5572829755673162e-05, "loss": 1.7448, "step": 78234 }, { "epoch": 2.6, "grad_norm": 0.7312827110290527, "learning_rate": 2.5568607225741287e-05, "loss": 1.7126, "step": 78235 }, { "epoch": 2.6, "grad_norm": 0.6908732652664185, "learning_rate": 2.5564385028928612e-05, "loss": 1.6134, "step": 78236 }, { "epoch": 2.6, "grad_norm": 0.6933910250663757, "learning_rate": 2.55601631652404e-05, "loss": 1.6248, "step": 78237 }, { "epoch": 2.6, "grad_norm": 0.7249844670295715, "learning_rate": 2.5555941634681743e-05, "loss": 1.6375, "step": 78238 }, { "epoch": 2.6, "grad_norm": 0.7037262320518494, "learning_rate": 2.5551720437257638e-05, "loss": 1.7197, "step": 78239 }, { "epoch": 2.6, "grad_norm": 0.7632429599761963, "learning_rate": 2.5547499572973383e-05, "loss": 1.7029, "step": 78240 }, { "epoch": 2.6, "grad_norm": 0.6740173101425171, "learning_rate": 2.5543279041833974e-05, "loss": 1.6278, "step": 78241 }, { "epoch": 2.6, "grad_norm": 0.6794442534446716, "learning_rate": 2.553905884384464e-05, "loss": 1.5587, "step": 78242 }, { "epoch": 2.6, "grad_norm": 0.6686770915985107, "learning_rate": 2.5534838979010376e-05, "loss": 1.5882, "step": 78243 }, { "epoch": 2.6, "grad_norm": 0.6998218894004822, "learning_rate": 2.5530619447336444e-05, "loss": 1.7091, "step": 78244 }, { "epoch": 2.6, "grad_norm": 0.7334827184677124, "learning_rate": 2.552640024882787e-05, "loss": 1.6038, "step": 78245 }, { "epoch": 2.6, "grad_norm": 0.7086998820304871, "learning_rate": 2.5522181383489792e-05, "loss": 1.5927, "step": 78246 }, { "epoch": 2.6, "grad_norm": 0.7263729572296143, "learning_rate": 2.551796285132737e-05, "loss": 1.7207, "step": 78247 }, { "epoch": 2.6, "grad_norm": 0.7105676531791687, "learning_rate": 2.5513744652345656e-05, "loss": 1.6408, "step": 78248 }, { "epoch": 2.6, "grad_norm": 0.7184591889381409, "learning_rate": 2.5509526786549894e-05, "loss": 1.6555, "step": 78249 }, { "epoch": 2.6, "grad_norm": 0.7142597436904907, "learning_rate": 2.550530925394507e-05, "loss": 1.6454, "step": 78250 }, { "epoch": 2.6, "grad_norm": 0.7467975616455078, "learning_rate": 2.550109205453632e-05, "loss": 1.704, "step": 78251 }, { "epoch": 2.6, "grad_norm": 0.6811556220054626, "learning_rate": 2.549687518832887e-05, "loss": 1.6455, "step": 78252 }, { "epoch": 2.6, "grad_norm": 0.7094244956970215, "learning_rate": 2.5492658655327713e-05, "loss": 1.6656, "step": 78253 }, { "epoch": 2.6, "grad_norm": 0.6966934204101562, "learning_rate": 2.548844245553805e-05, "loss": 1.7174, "step": 78254 }, { "epoch": 2.6, "grad_norm": 0.7017784118652344, "learning_rate": 2.548422658896494e-05, "loss": 1.6676, "step": 78255 }, { "epoch": 2.6, "grad_norm": 0.7088219523429871, "learning_rate": 2.548001105561358e-05, "loss": 1.7616, "step": 78256 }, { "epoch": 2.6, "grad_norm": 0.6896412968635559, "learning_rate": 2.5475795855489034e-05, "loss": 1.6721, "step": 78257 }, { "epoch": 2.6, "grad_norm": 0.6854097247123718, "learning_rate": 2.547158098859636e-05, "loss": 1.6413, "step": 78258 }, { "epoch": 2.6, "grad_norm": 0.7756959199905396, "learning_rate": 2.546736645494073e-05, "loss": 1.7273, "step": 78259 }, { "epoch": 2.6, "grad_norm": 0.7177656292915344, "learning_rate": 2.5463152254527396e-05, "loss": 1.6592, "step": 78260 }, { "epoch": 2.6, "grad_norm": 0.723095178604126, "learning_rate": 2.5458938387361226e-05, "loss": 1.686, "step": 78261 }, { "epoch": 2.6, "grad_norm": 0.722391664981842, "learning_rate": 2.5454724853447485e-05, "loss": 1.7584, "step": 78262 }, { "epoch": 2.6, "grad_norm": 0.7390108108520508, "learning_rate": 2.545051165279126e-05, "loss": 1.7537, "step": 78263 }, { "epoch": 2.6, "grad_norm": 0.700812578201294, "learning_rate": 2.544629878539769e-05, "loss": 1.654, "step": 78264 }, { "epoch": 2.6, "grad_norm": 0.7146962285041809, "learning_rate": 2.54420862512718e-05, "loss": 1.7433, "step": 78265 }, { "epoch": 2.6, "grad_norm": 0.7060940265655518, "learning_rate": 2.543787405041875e-05, "loss": 1.6609, "step": 78266 }, { "epoch": 2.6, "grad_norm": 0.6808494925498962, "learning_rate": 2.5433662182843804e-05, "loss": 1.7224, "step": 78267 }, { "epoch": 2.6, "grad_norm": 0.7023463249206543, "learning_rate": 2.5429450648551797e-05, "loss": 1.646, "step": 78268 }, { "epoch": 2.6, "grad_norm": 0.6924264430999756, "learning_rate": 2.5425239447547986e-05, "loss": 1.5956, "step": 78269 }, { "epoch": 2.6, "grad_norm": 0.7132657766342163, "learning_rate": 2.5421028579837565e-05, "loss": 1.6742, "step": 78270 }, { "epoch": 2.6, "grad_norm": 0.702871561050415, "learning_rate": 2.5416818045425502e-05, "loss": 1.648, "step": 78271 }, { "epoch": 2.6, "grad_norm": 0.7008146047592163, "learning_rate": 2.5412607844316957e-05, "loss": 1.695, "step": 78272 }, { "epoch": 2.6, "grad_norm": 0.6958749294281006, "learning_rate": 2.5408397976517025e-05, "loss": 1.707, "step": 78273 }, { "epoch": 2.6, "grad_norm": 0.7012051343917847, "learning_rate": 2.540418844203097e-05, "loss": 1.6886, "step": 78274 }, { "epoch": 2.6, "grad_norm": 0.6965726017951965, "learning_rate": 2.539997924086362e-05, "loss": 1.7136, "step": 78275 }, { "epoch": 2.6, "grad_norm": 0.6946155428886414, "learning_rate": 2.5395770373020275e-05, "loss": 1.681, "step": 78276 }, { "epoch": 2.6, "grad_norm": 0.71695876121521, "learning_rate": 2.5391561838506025e-05, "loss": 1.7152, "step": 78277 }, { "epoch": 2.6, "grad_norm": 0.69438636302948, "learning_rate": 2.5387353637326002e-05, "loss": 1.6758, "step": 78278 }, { "epoch": 2.6, "grad_norm": 0.6886082291603088, "learning_rate": 2.538314576948517e-05, "loss": 1.6488, "step": 78279 }, { "epoch": 2.6, "grad_norm": 0.6890476942062378, "learning_rate": 2.5378938234988755e-05, "loss": 1.686, "step": 78280 }, { "epoch": 2.6, "grad_norm": 0.742054283618927, "learning_rate": 2.5374731033841956e-05, "loss": 1.7323, "step": 78281 }, { "epoch": 2.6, "grad_norm": 0.7286904454231262, "learning_rate": 2.5370524166049665e-05, "loss": 1.6735, "step": 78282 }, { "epoch": 2.6, "grad_norm": 0.7255122661590576, "learning_rate": 2.5366317631617084e-05, "loss": 1.6263, "step": 78283 }, { "epoch": 2.6, "grad_norm": 0.6936361789703369, "learning_rate": 2.536211143054937e-05, "loss": 1.7023, "step": 78284 }, { "epoch": 2.6, "grad_norm": 0.7105375528335571, "learning_rate": 2.5357905562851622e-05, "loss": 1.6979, "step": 78285 }, { "epoch": 2.6, "grad_norm": 0.7176856994628906, "learning_rate": 2.5353700028528833e-05, "loss": 1.6416, "step": 78286 }, { "epoch": 2.6, "grad_norm": 0.710114061832428, "learning_rate": 2.5349494827586235e-05, "loss": 1.7382, "step": 78287 }, { "epoch": 2.6, "grad_norm": 0.7211492657661438, "learning_rate": 2.534528996002889e-05, "loss": 1.7231, "step": 78288 }, { "epoch": 2.6, "grad_norm": 0.707346498966217, "learning_rate": 2.534108542586183e-05, "loss": 1.6804, "step": 78289 }, { "epoch": 2.6, "grad_norm": 0.7110307812690735, "learning_rate": 2.5336881225090243e-05, "loss": 1.7206, "step": 78290 }, { "epoch": 2.6, "grad_norm": 0.7163405418395996, "learning_rate": 2.5332677357719265e-05, "loss": 1.6267, "step": 78291 }, { "epoch": 2.6, "grad_norm": 0.705194890499115, "learning_rate": 2.5328473823753927e-05, "loss": 1.6666, "step": 78292 }, { "epoch": 2.6, "grad_norm": 0.7078673839569092, "learning_rate": 2.5324270623199316e-05, "loss": 1.6871, "step": 78293 }, { "epoch": 2.6, "grad_norm": 0.7032263875007629, "learning_rate": 2.5320067756060604e-05, "loss": 1.7397, "step": 78294 }, { "epoch": 2.6, "grad_norm": 0.7154438495635986, "learning_rate": 2.531586522234288e-05, "loss": 1.6882, "step": 78295 }, { "epoch": 2.6, "grad_norm": 0.7443627715110779, "learning_rate": 2.5311663022051143e-05, "loss": 1.7936, "step": 78296 }, { "epoch": 2.6, "grad_norm": 0.7244014143943787, "learning_rate": 2.5307461155190623e-05, "loss": 1.6541, "step": 78297 }, { "epoch": 2.6, "grad_norm": 0.712039053440094, "learning_rate": 2.5303259621766348e-05, "loss": 1.6844, "step": 78298 }, { "epoch": 2.61, "grad_norm": 0.7246379256248474, "learning_rate": 2.529905842178348e-05, "loss": 1.6958, "step": 78299 }, { "epoch": 2.61, "grad_norm": 0.6984310746192932, "learning_rate": 2.5294857555247017e-05, "loss": 1.6558, "step": 78300 }, { "epoch": 2.61, "grad_norm": 0.7105796933174133, "learning_rate": 2.5290657022162187e-05, "loss": 1.6785, "step": 78301 }, { "epoch": 2.61, "grad_norm": 0.6964318156242371, "learning_rate": 2.5286456822534053e-05, "loss": 1.6672, "step": 78302 }, { "epoch": 2.61, "grad_norm": 0.7358637452125549, "learning_rate": 2.5282256956367574e-05, "loss": 1.7687, "step": 78303 }, { "epoch": 2.61, "grad_norm": 0.6788200736045837, "learning_rate": 2.5278057423668052e-05, "loss": 1.6348, "step": 78304 }, { "epoch": 2.61, "grad_norm": 0.6810339093208313, "learning_rate": 2.5273858224440414e-05, "loss": 1.6193, "step": 78305 }, { "epoch": 2.61, "grad_norm": 0.72573322057724, "learning_rate": 2.5269659358689885e-05, "loss": 1.6428, "step": 78306 }, { "epoch": 2.61, "grad_norm": 0.6980834007263184, "learning_rate": 2.5265460826421537e-05, "loss": 1.6803, "step": 78307 }, { "epoch": 2.61, "grad_norm": 0.7151615023612976, "learning_rate": 2.5261262627640356e-05, "loss": 1.7176, "step": 78308 }, { "epoch": 2.61, "grad_norm": 0.7092365026473999, "learning_rate": 2.5257064762351574e-05, "loss": 1.7047, "step": 78309 }, { "epoch": 2.61, "grad_norm": 0.7169791460037231, "learning_rate": 2.5252867230560193e-05, "loss": 1.6393, "step": 78310 }, { "epoch": 2.61, "grad_norm": 0.7556670308113098, "learning_rate": 2.5248670032271402e-05, "loss": 1.7627, "step": 78311 }, { "epoch": 2.61, "grad_norm": 0.7147427201271057, "learning_rate": 2.52444731674902e-05, "loss": 1.6486, "step": 78312 }, { "epoch": 2.61, "grad_norm": 0.7191014289855957, "learning_rate": 2.5240276636221745e-05, "loss": 1.726, "step": 78313 }, { "epoch": 2.61, "grad_norm": 0.7547799348831177, "learning_rate": 2.523608043847114e-05, "loss": 1.7282, "step": 78314 }, { "epoch": 2.61, "grad_norm": 0.7001529335975647, "learning_rate": 2.523188457424338e-05, "loss": 1.6994, "step": 78315 }, { "epoch": 2.61, "grad_norm": 0.7065944671630859, "learning_rate": 2.5227689043543686e-05, "loss": 1.6664, "step": 78316 }, { "epoch": 2.61, "grad_norm": 0.7209879159927368, "learning_rate": 2.5223493846377097e-05, "loss": 1.7204, "step": 78317 }, { "epoch": 2.61, "grad_norm": 0.7093213200569153, "learning_rate": 2.5219298982748637e-05, "loss": 1.6622, "step": 78318 }, { "epoch": 2.61, "grad_norm": 0.7031499743461609, "learning_rate": 2.521510445266347e-05, "loss": 1.6983, "step": 78319 }, { "epoch": 2.61, "grad_norm": 0.7314684391021729, "learning_rate": 2.5210910256126727e-05, "loss": 1.7702, "step": 78320 }, { "epoch": 2.61, "grad_norm": 0.7167218923568726, "learning_rate": 2.5206716393143465e-05, "loss": 1.7321, "step": 78321 }, { "epoch": 2.61, "grad_norm": 0.7161120772361755, "learning_rate": 2.5202522863718688e-05, "loss": 1.701, "step": 78322 }, { "epoch": 2.61, "grad_norm": 0.6841588020324707, "learning_rate": 2.519832966785762e-05, "loss": 1.7264, "step": 78323 }, { "epoch": 2.61, "grad_norm": 0.7050489187240601, "learning_rate": 2.519413680556529e-05, "loss": 1.6773, "step": 78324 }, { "epoch": 2.61, "grad_norm": 0.7149750590324402, "learning_rate": 2.518994427684673e-05, "loss": 1.6809, "step": 78325 }, { "epoch": 2.61, "grad_norm": 0.6971789002418518, "learning_rate": 2.5185752081707065e-05, "loss": 1.6834, "step": 78326 }, { "epoch": 2.61, "grad_norm": 0.7199051976203918, "learning_rate": 2.5181560220151564e-05, "loss": 1.7044, "step": 78327 }, { "epoch": 2.61, "grad_norm": 0.6844300627708435, "learning_rate": 2.5177368692185017e-05, "loss": 1.7158, "step": 78328 }, { "epoch": 2.61, "grad_norm": 0.713291347026825, "learning_rate": 2.5173177497812625e-05, "loss": 1.7265, "step": 78329 }, { "epoch": 2.61, "grad_norm": 0.7259193658828735, "learning_rate": 2.5168986637039613e-05, "loss": 1.68, "step": 78330 }, { "epoch": 2.61, "grad_norm": 0.7274160981178284, "learning_rate": 2.516479610987091e-05, "loss": 1.6361, "step": 78331 }, { "epoch": 2.61, "grad_norm": 0.7164701819419861, "learning_rate": 2.5160605916311617e-05, "loss": 1.6596, "step": 78332 }, { "epoch": 2.61, "grad_norm": 0.7228583693504333, "learning_rate": 2.5156416056366857e-05, "loss": 1.7229, "step": 78333 }, { "epoch": 2.61, "grad_norm": 0.7009680867195129, "learning_rate": 2.5152226530041798e-05, "loss": 1.6829, "step": 78334 }, { "epoch": 2.61, "grad_norm": 0.6794393658638, "learning_rate": 2.5148037337341366e-05, "loss": 1.6816, "step": 78335 }, { "epoch": 2.61, "grad_norm": 0.7055816054344177, "learning_rate": 2.514384847827069e-05, "loss": 1.7282, "step": 78336 }, { "epoch": 2.61, "grad_norm": 0.6969634294509888, "learning_rate": 2.5139659952834935e-05, "loss": 1.7109, "step": 78337 }, { "epoch": 2.61, "grad_norm": 0.6919336318969727, "learning_rate": 2.513547176103916e-05, "loss": 1.6586, "step": 78338 }, { "epoch": 2.61, "grad_norm": 0.6926888227462769, "learning_rate": 2.5131283902888334e-05, "loss": 1.6858, "step": 78339 }, { "epoch": 2.61, "grad_norm": 0.7094939947128296, "learning_rate": 2.5127096378387647e-05, "loss": 1.7602, "step": 78340 }, { "epoch": 2.61, "grad_norm": 0.7061730027198792, "learning_rate": 2.5122909187542294e-05, "loss": 1.6649, "step": 78341 }, { "epoch": 2.61, "grad_norm": 0.6965333819389343, "learning_rate": 2.5118722330357077e-05, "loss": 1.6609, "step": 78342 }, { "epoch": 2.61, "grad_norm": 0.7406018972396851, "learning_rate": 2.5114535806837253e-05, "loss": 1.7328, "step": 78343 }, { "epoch": 2.61, "grad_norm": 0.6966642737388611, "learning_rate": 2.5110349616987957e-05, "loss": 1.6566, "step": 78344 }, { "epoch": 2.61, "grad_norm": 0.7058904767036438, "learning_rate": 2.5106163760814147e-05, "loss": 1.6972, "step": 78345 }, { "epoch": 2.61, "grad_norm": 0.7027037739753723, "learning_rate": 2.510197823832095e-05, "loss": 1.7539, "step": 78346 }, { "epoch": 2.61, "grad_norm": 0.6852411031723022, "learning_rate": 2.509779304951347e-05, "loss": 1.6406, "step": 78347 }, { "epoch": 2.61, "grad_norm": 0.7015907168388367, "learning_rate": 2.5093608194396763e-05, "loss": 1.6922, "step": 78348 }, { "epoch": 2.61, "grad_norm": 0.7123120427131653, "learning_rate": 2.5089423672975863e-05, "loss": 1.6298, "step": 78349 }, { "epoch": 2.61, "grad_norm": 0.7136164903640747, "learning_rate": 2.5085239485255894e-05, "loss": 1.8011, "step": 78350 }, { "epoch": 2.61, "grad_norm": 0.7330239415168762, "learning_rate": 2.5081055631241986e-05, "loss": 1.6533, "step": 78351 }, { "epoch": 2.61, "grad_norm": 0.7074704170227051, "learning_rate": 2.5076872110939172e-05, "loss": 1.6977, "step": 78352 }, { "epoch": 2.61, "grad_norm": 0.7060848474502563, "learning_rate": 2.5072688924352448e-05, "loss": 1.6667, "step": 78353 }, { "epoch": 2.61, "grad_norm": 0.6983384490013123, "learning_rate": 2.5068506071487072e-05, "loss": 1.7161, "step": 78354 }, { "epoch": 2.61, "grad_norm": 0.7133577466011047, "learning_rate": 2.5064323552347975e-05, "loss": 1.6636, "step": 78355 }, { "epoch": 2.61, "grad_norm": 0.7128894329071045, "learning_rate": 2.5060141366940256e-05, "loss": 1.7224, "step": 78356 }, { "epoch": 2.61, "grad_norm": 0.6950306296348572, "learning_rate": 2.5055959515269075e-05, "loss": 1.6877, "step": 78357 }, { "epoch": 2.61, "grad_norm": 0.689289391040802, "learning_rate": 2.505177799733936e-05, "loss": 1.6961, "step": 78358 }, { "epoch": 2.61, "grad_norm": 0.6948990821838379, "learning_rate": 2.504759681315638e-05, "loss": 1.6395, "step": 78359 }, { "epoch": 2.61, "grad_norm": 0.7203273177146912, "learning_rate": 2.5043415962725023e-05, "loss": 1.6561, "step": 78360 }, { "epoch": 2.61, "grad_norm": 0.7188161015510559, "learning_rate": 2.503923544605049e-05, "loss": 1.6608, "step": 78361 }, { "epoch": 2.61, "grad_norm": 0.7123824954032898, "learning_rate": 2.5035055263137806e-05, "loss": 1.6494, "step": 78362 }, { "epoch": 2.61, "grad_norm": 0.704754114151001, "learning_rate": 2.5030875413992e-05, "loss": 1.6644, "step": 78363 }, { "epoch": 2.61, "grad_norm": 0.7075350880622864, "learning_rate": 2.5026695898618275e-05, "loss": 1.7243, "step": 78364 }, { "epoch": 2.61, "grad_norm": 0.722086489200592, "learning_rate": 2.5022516717021555e-05, "loss": 1.655, "step": 78365 }, { "epoch": 2.61, "grad_norm": 0.7123544812202454, "learning_rate": 2.501833786920704e-05, "loss": 1.743, "step": 78366 }, { "epoch": 2.61, "grad_norm": 0.7141510844230652, "learning_rate": 2.5014159355179752e-05, "loss": 1.7151, "step": 78367 }, { "epoch": 2.61, "grad_norm": 0.7129669785499573, "learning_rate": 2.500998117494469e-05, "loss": 1.6574, "step": 78368 }, { "epoch": 2.61, "grad_norm": 0.6719313859939575, "learning_rate": 2.5005803328507023e-05, "loss": 1.6739, "step": 78369 }, { "epoch": 2.61, "grad_norm": 0.7216050028800964, "learning_rate": 2.5001625815871808e-05, "loss": 1.7485, "step": 78370 }, { "epoch": 2.61, "grad_norm": 0.7085897922515869, "learning_rate": 2.4997448637044105e-05, "loss": 1.7006, "step": 78371 }, { "epoch": 2.61, "grad_norm": 0.7004339098930359, "learning_rate": 2.4993271792028946e-05, "loss": 1.7184, "step": 78372 }, { "epoch": 2.61, "grad_norm": 0.6818128824234009, "learning_rate": 2.4989095280831494e-05, "loss": 1.7575, "step": 78373 }, { "epoch": 2.61, "grad_norm": 0.7212085127830505, "learning_rate": 2.4984919103456747e-05, "loss": 1.7106, "step": 78374 }, { "epoch": 2.61, "grad_norm": 0.7154218554496765, "learning_rate": 2.498074325990973e-05, "loss": 1.7794, "step": 78375 }, { "epoch": 2.61, "grad_norm": 0.6672883033752441, "learning_rate": 2.4976567750195543e-05, "loss": 1.6416, "step": 78376 }, { "epoch": 2.61, "grad_norm": 0.7059845328330994, "learning_rate": 2.4972392574319444e-05, "loss": 1.7298, "step": 78377 }, { "epoch": 2.61, "grad_norm": 0.7120934724807739, "learning_rate": 2.49682177322862e-05, "loss": 1.7481, "step": 78378 }, { "epoch": 2.61, "grad_norm": 0.7023456692695618, "learning_rate": 2.4964043224101006e-05, "loss": 1.651, "step": 78379 }, { "epoch": 2.61, "grad_norm": 0.684097170829773, "learning_rate": 2.495986904976899e-05, "loss": 1.6596, "step": 78380 }, { "epoch": 2.61, "grad_norm": 0.687993049621582, "learning_rate": 2.495569520929518e-05, "loss": 1.6445, "step": 78381 }, { "epoch": 2.61, "grad_norm": 0.705820620059967, "learning_rate": 2.4951521702684575e-05, "loss": 1.6996, "step": 78382 }, { "epoch": 2.61, "grad_norm": 0.683933436870575, "learning_rate": 2.4947348529942303e-05, "loss": 1.6218, "step": 78383 }, { "epoch": 2.61, "grad_norm": 0.731532096862793, "learning_rate": 2.4943175691073524e-05, "loss": 1.6762, "step": 78384 }, { "epoch": 2.61, "grad_norm": 0.7066319584846497, "learning_rate": 2.4939003186083107e-05, "loss": 1.6377, "step": 78385 }, { "epoch": 2.61, "grad_norm": 0.712489128112793, "learning_rate": 2.4934831014976175e-05, "loss": 1.729, "step": 78386 }, { "epoch": 2.61, "grad_norm": 0.7119296789169312, "learning_rate": 2.4930659177757927e-05, "loss": 1.7169, "step": 78387 }, { "epoch": 2.61, "grad_norm": 0.712195873260498, "learning_rate": 2.4926487674433295e-05, "loss": 1.7709, "step": 78388 }, { "epoch": 2.61, "grad_norm": 0.69737708568573, "learning_rate": 2.4922316505007332e-05, "loss": 1.8057, "step": 78389 }, { "epoch": 2.61, "grad_norm": 0.7237764596939087, "learning_rate": 2.4918145669485146e-05, "loss": 1.6984, "step": 78390 }, { "epoch": 2.61, "grad_norm": 0.7010147571563721, "learning_rate": 2.4913975167871926e-05, "loss": 1.74, "step": 78391 }, { "epoch": 2.61, "grad_norm": 0.6987084746360779, "learning_rate": 2.49098050001725e-05, "loss": 1.71, "step": 78392 }, { "epoch": 2.61, "grad_norm": 0.7087714076042175, "learning_rate": 2.4905635166392003e-05, "loss": 1.662, "step": 78393 }, { "epoch": 2.61, "grad_norm": 0.6905955076217651, "learning_rate": 2.4901465666535626e-05, "loss": 1.7239, "step": 78394 }, { "epoch": 2.61, "grad_norm": 0.711862325668335, "learning_rate": 2.48972965006083e-05, "loss": 1.6352, "step": 78395 }, { "epoch": 2.61, "grad_norm": 0.7319366931915283, "learning_rate": 2.4893127668615087e-05, "loss": 1.6552, "step": 78396 }, { "epoch": 2.61, "grad_norm": 0.707705557346344, "learning_rate": 2.4888959170561153e-05, "loss": 1.7204, "step": 78397 }, { "epoch": 2.61, "grad_norm": 0.7034975290298462, "learning_rate": 2.4884791006451455e-05, "loss": 1.6397, "step": 78398 }, { "epoch": 2.61, "grad_norm": 0.6884794235229492, "learning_rate": 2.488062317629106e-05, "loss": 1.6351, "step": 78399 }, { "epoch": 2.61, "grad_norm": 0.7100164890289307, "learning_rate": 2.487645568008503e-05, "loss": 1.6471, "step": 78400 }, { "epoch": 2.61, "grad_norm": 0.6885266900062561, "learning_rate": 2.487228851783849e-05, "loss": 1.6858, "step": 78401 }, { "epoch": 2.61, "grad_norm": 0.7384486198425293, "learning_rate": 2.4868121689556475e-05, "loss": 1.6645, "step": 78402 }, { "epoch": 2.61, "grad_norm": 0.7152109742164612, "learning_rate": 2.4863955195243945e-05, "loss": 1.6971, "step": 78403 }, { "epoch": 2.61, "grad_norm": 0.713679313659668, "learning_rate": 2.485978903490613e-05, "loss": 1.6853, "step": 78404 }, { "epoch": 2.61, "grad_norm": 0.713964581489563, "learning_rate": 2.4855623208547957e-05, "loss": 1.7197, "step": 78405 }, { "epoch": 2.61, "grad_norm": 0.7181186676025391, "learning_rate": 2.485145771617446e-05, "loss": 1.7169, "step": 78406 }, { "epoch": 2.61, "grad_norm": 0.7241514325141907, "learning_rate": 2.4847292557790765e-05, "loss": 1.7059, "step": 78407 }, { "epoch": 2.61, "grad_norm": 0.6954062581062317, "learning_rate": 2.484312773340197e-05, "loss": 1.6652, "step": 78408 }, { "epoch": 2.61, "grad_norm": 0.7161210775375366, "learning_rate": 2.4838963243013067e-05, "loss": 1.7298, "step": 78409 }, { "epoch": 2.61, "grad_norm": 0.7192734479904175, "learning_rate": 2.4834799086629087e-05, "loss": 1.6583, "step": 78410 }, { "epoch": 2.61, "grad_norm": 0.7090906500816345, "learning_rate": 2.4830635264255128e-05, "loss": 1.7205, "step": 78411 }, { "epoch": 2.61, "grad_norm": 0.7035283446311951, "learning_rate": 2.4826471775896285e-05, "loss": 1.6539, "step": 78412 }, { "epoch": 2.61, "grad_norm": 0.7161857485771179, "learning_rate": 2.4822308621557486e-05, "loss": 1.6919, "step": 78413 }, { "epoch": 2.61, "grad_norm": 0.7047643661499023, "learning_rate": 2.4818145801243893e-05, "loss": 1.6687, "step": 78414 }, { "epoch": 2.61, "grad_norm": 0.7120165824890137, "learning_rate": 2.4813983314960474e-05, "loss": 1.7142, "step": 78415 }, { "epoch": 2.61, "grad_norm": 0.7180522084236145, "learning_rate": 2.480982116271242e-05, "loss": 1.7289, "step": 78416 }, { "epoch": 2.61, "grad_norm": 0.7165907621383667, "learning_rate": 2.4805659344504624e-05, "loss": 1.7304, "step": 78417 }, { "epoch": 2.61, "grad_norm": 0.7140516042709351, "learning_rate": 2.4801497860342255e-05, "loss": 1.6553, "step": 78418 }, { "epoch": 2.61, "grad_norm": 0.7008402347564697, "learning_rate": 2.479733671023031e-05, "loss": 1.6617, "step": 78419 }, { "epoch": 2.61, "grad_norm": 0.7183743119239807, "learning_rate": 2.479317589417381e-05, "loss": 1.6732, "step": 78420 }, { "epoch": 2.61, "grad_norm": 0.7319462299346924, "learning_rate": 2.4789015412177894e-05, "loss": 1.7075, "step": 78421 }, { "epoch": 2.61, "grad_norm": 0.7159340977668762, "learning_rate": 2.4784855264247517e-05, "loss": 1.64, "step": 78422 }, { "epoch": 2.61, "grad_norm": 0.7143170237541199, "learning_rate": 2.4780695450387843e-05, "loss": 1.7726, "step": 78423 }, { "epoch": 2.61, "grad_norm": 0.7161595225334167, "learning_rate": 2.4776535970603807e-05, "loss": 1.7034, "step": 78424 }, { "epoch": 2.61, "grad_norm": 0.6872673034667969, "learning_rate": 2.4772376824900496e-05, "loss": 1.6923, "step": 78425 }, { "epoch": 2.61, "grad_norm": 0.6805065870285034, "learning_rate": 2.476821801328298e-05, "loss": 1.6982, "step": 78426 }, { "epoch": 2.61, "grad_norm": 0.6819062232971191, "learning_rate": 2.4764059535756288e-05, "loss": 1.6928, "step": 78427 }, { "epoch": 2.61, "grad_norm": 0.6724100708961487, "learning_rate": 2.4759901392325477e-05, "loss": 1.6542, "step": 78428 }, { "epoch": 2.61, "grad_norm": 0.7247403264045715, "learning_rate": 2.475574358299558e-05, "loss": 1.6501, "step": 78429 }, { "epoch": 2.61, "grad_norm": 0.7031521201133728, "learning_rate": 2.4751586107771692e-05, "loss": 1.6848, "step": 78430 }, { "epoch": 2.61, "grad_norm": 0.7056010365486145, "learning_rate": 2.474742896665881e-05, "loss": 1.6812, "step": 78431 }, { "epoch": 2.61, "grad_norm": 0.7029397487640381, "learning_rate": 2.4743272159661964e-05, "loss": 1.713, "step": 78432 }, { "epoch": 2.61, "grad_norm": 0.7089202404022217, "learning_rate": 2.473911568678625e-05, "loss": 1.673, "step": 78433 }, { "epoch": 2.61, "grad_norm": 0.721447765827179, "learning_rate": 2.4734959548036725e-05, "loss": 1.6679, "step": 78434 }, { "epoch": 2.61, "grad_norm": 0.7034112811088562, "learning_rate": 2.4730803743418326e-05, "loss": 1.678, "step": 78435 }, { "epoch": 2.61, "grad_norm": 0.706847071647644, "learning_rate": 2.4726648272936177e-05, "loss": 1.7341, "step": 78436 }, { "epoch": 2.61, "grad_norm": 0.7109052538871765, "learning_rate": 2.472249313659538e-05, "loss": 1.6424, "step": 78437 }, { "epoch": 2.61, "grad_norm": 0.7083119750022888, "learning_rate": 2.471833833440089e-05, "loss": 1.6891, "step": 78438 }, { "epoch": 2.61, "grad_norm": 0.7234490513801575, "learning_rate": 2.471418386635774e-05, "loss": 1.7002, "step": 78439 }, { "epoch": 2.61, "grad_norm": 0.6905198693275452, "learning_rate": 2.471002973247106e-05, "loss": 1.629, "step": 78440 }, { "epoch": 2.61, "grad_norm": 0.678053617477417, "learning_rate": 2.4705875932745846e-05, "loss": 1.6642, "step": 78441 }, { "epoch": 2.61, "grad_norm": 0.712616503238678, "learning_rate": 2.470172246718709e-05, "loss": 1.5729, "step": 78442 }, { "epoch": 2.61, "grad_norm": 0.7413280010223389, "learning_rate": 2.469756933579986e-05, "loss": 1.7644, "step": 78443 }, { "epoch": 2.61, "grad_norm": 0.712115466594696, "learning_rate": 2.469341653858935e-05, "loss": 1.6935, "step": 78444 }, { "epoch": 2.61, "grad_norm": 0.7179739475250244, "learning_rate": 2.4689264075560356e-05, "loss": 1.6658, "step": 78445 }, { "epoch": 2.61, "grad_norm": 0.7115238904953003, "learning_rate": 2.4685111946718007e-05, "loss": 1.6672, "step": 78446 }, { "epoch": 2.61, "grad_norm": 0.707120418548584, "learning_rate": 2.468096015206743e-05, "loss": 1.6672, "step": 78447 }, { "epoch": 2.61, "grad_norm": 0.7253656387329102, "learning_rate": 2.4676808691613625e-05, "loss": 1.7031, "step": 78448 }, { "epoch": 2.61, "grad_norm": 0.7133568525314331, "learning_rate": 2.467265756536152e-05, "loss": 1.6876, "step": 78449 }, { "epoch": 2.61, "grad_norm": 0.7132343053817749, "learning_rate": 2.4668506773316245e-05, "loss": 1.6488, "step": 78450 }, { "epoch": 2.61, "grad_norm": 0.6861022114753723, "learning_rate": 2.4664356315482958e-05, "loss": 1.6528, "step": 78451 }, { "epoch": 2.61, "grad_norm": 0.6955177783966064, "learning_rate": 2.4660206191866462e-05, "loss": 1.6983, "step": 78452 }, { "epoch": 2.61, "grad_norm": 0.6924626231193542, "learning_rate": 2.4656056402471912e-05, "loss": 1.6863, "step": 78453 }, { "epoch": 2.61, "grad_norm": 0.6959856748580933, "learning_rate": 2.465190694730438e-05, "loss": 1.7263, "step": 78454 }, { "epoch": 2.61, "grad_norm": 0.7060542106628418, "learning_rate": 2.4647757826368854e-05, "loss": 1.7124, "step": 78455 }, { "epoch": 2.61, "grad_norm": 0.6892240047454834, "learning_rate": 2.4643609039670365e-05, "loss": 1.6525, "step": 78456 }, { "epoch": 2.61, "grad_norm": 0.6915520429611206, "learning_rate": 2.463946058721391e-05, "loss": 1.7004, "step": 78457 }, { "epoch": 2.61, "grad_norm": 0.7232539057731628, "learning_rate": 2.463531246900472e-05, "loss": 1.6372, "step": 78458 }, { "epoch": 2.61, "grad_norm": 0.7009930610656738, "learning_rate": 2.4631164685047556e-05, "loss": 1.6552, "step": 78459 }, { "epoch": 2.61, "grad_norm": 0.7004460692405701, "learning_rate": 2.4627017235347613e-05, "loss": 1.703, "step": 78460 }, { "epoch": 2.61, "grad_norm": 0.6812356114387512, "learning_rate": 2.4622870119909954e-05, "loss": 1.6221, "step": 78461 }, { "epoch": 2.61, "grad_norm": 0.7114972472190857, "learning_rate": 2.461872333873951e-05, "loss": 1.7552, "step": 78462 }, { "epoch": 2.61, "grad_norm": 0.7390834093093872, "learning_rate": 2.4614576891841343e-05, "loss": 1.6767, "step": 78463 }, { "epoch": 2.61, "grad_norm": 0.6869073510169983, "learning_rate": 2.4610430779220546e-05, "loss": 1.7198, "step": 78464 }, { "epoch": 2.61, "grad_norm": 0.7089383602142334, "learning_rate": 2.460628500088212e-05, "loss": 1.6249, "step": 78465 }, { "epoch": 2.61, "grad_norm": 0.706413984298706, "learning_rate": 2.4602139556831023e-05, "loss": 1.617, "step": 78466 }, { "epoch": 2.61, "grad_norm": 0.711287260055542, "learning_rate": 2.4597994447072356e-05, "loss": 1.6721, "step": 78467 }, { "epoch": 2.61, "grad_norm": 0.7031623125076294, "learning_rate": 2.459384967161121e-05, "loss": 1.6939, "step": 78468 }, { "epoch": 2.61, "grad_norm": 0.7153415083885193, "learning_rate": 2.4589705230452518e-05, "loss": 1.6362, "step": 78469 }, { "epoch": 2.61, "grad_norm": 0.7136951684951782, "learning_rate": 2.4585561123601305e-05, "loss": 1.6535, "step": 78470 }, { "epoch": 2.61, "grad_norm": 0.7183518409729004, "learning_rate": 2.4581417351062703e-05, "loss": 1.6937, "step": 78471 }, { "epoch": 2.61, "grad_norm": 0.7131063342094421, "learning_rate": 2.4577273912841677e-05, "loss": 1.7733, "step": 78472 }, { "epoch": 2.61, "grad_norm": 0.6831640005111694, "learning_rate": 2.457313080894322e-05, "loss": 1.6861, "step": 78473 }, { "epoch": 2.61, "grad_norm": 0.7290048003196716, "learning_rate": 2.4568988039372428e-05, "loss": 1.6173, "step": 78474 }, { "epoch": 2.61, "grad_norm": 0.7381318211555481, "learning_rate": 2.4564845604134265e-05, "loss": 1.7274, "step": 78475 }, { "epoch": 2.61, "grad_norm": 0.7058438062667847, "learning_rate": 2.4560703503233826e-05, "loss": 1.6501, "step": 78476 }, { "epoch": 2.61, "grad_norm": 0.6931959390640259, "learning_rate": 2.4556561736676072e-05, "loss": 1.666, "step": 78477 }, { "epoch": 2.61, "grad_norm": 0.7474148869514465, "learning_rate": 2.4552420304466136e-05, "loss": 1.7122, "step": 78478 }, { "epoch": 2.61, "grad_norm": 0.7193446755409241, "learning_rate": 2.4548279206608947e-05, "loss": 1.7442, "step": 78479 }, { "epoch": 2.61, "grad_norm": 0.7327072024345398, "learning_rate": 2.454413844310953e-05, "loss": 1.6777, "step": 78480 }, { "epoch": 2.61, "grad_norm": 0.7184029221534729, "learning_rate": 2.4539998013972984e-05, "loss": 1.7157, "step": 78481 }, { "epoch": 2.61, "grad_norm": 0.7160184979438782, "learning_rate": 2.453585791920424e-05, "loss": 1.8004, "step": 78482 }, { "epoch": 2.61, "grad_norm": 0.7254948616027832, "learning_rate": 2.4531718158808422e-05, "loss": 1.8048, "step": 78483 }, { "epoch": 2.61, "grad_norm": 0.7152681350708008, "learning_rate": 2.4527578732790532e-05, "loss": 1.6398, "step": 78484 }, { "epoch": 2.61, "grad_norm": 0.708053469657898, "learning_rate": 2.4523439641155497e-05, "loss": 1.7303, "step": 78485 }, { "epoch": 2.61, "grad_norm": 0.6952874660491943, "learning_rate": 2.4519300883908443e-05, "loss": 1.6536, "step": 78486 }, { "epoch": 2.61, "grad_norm": 0.7314671277999878, "learning_rate": 2.4515162461054404e-05, "loss": 1.7239, "step": 78487 }, { "epoch": 2.61, "grad_norm": 0.7112210988998413, "learning_rate": 2.4511024372598374e-05, "loss": 1.6878, "step": 78488 }, { "epoch": 2.61, "grad_norm": 0.7176018357276917, "learning_rate": 2.4506886618545284e-05, "loss": 1.634, "step": 78489 }, { "epoch": 2.61, "grad_norm": 0.7151811718940735, "learning_rate": 2.450274919890033e-05, "loss": 1.7384, "step": 78490 }, { "epoch": 2.61, "grad_norm": 0.6990835666656494, "learning_rate": 2.4498612113668436e-05, "loss": 1.7749, "step": 78491 }, { "epoch": 2.61, "grad_norm": 0.7245716452598572, "learning_rate": 2.449447536285457e-05, "loss": 1.6484, "step": 78492 }, { "epoch": 2.61, "grad_norm": 0.6990189552307129, "learning_rate": 2.449033894646386e-05, "loss": 1.6957, "step": 78493 }, { "epoch": 2.61, "grad_norm": 0.7098502516746521, "learning_rate": 2.4486202864501336e-05, "loss": 1.6455, "step": 78494 }, { "epoch": 2.61, "grad_norm": 0.7079151272773743, "learning_rate": 2.4482067116971893e-05, "loss": 1.6612, "step": 78495 }, { "epoch": 2.61, "grad_norm": 0.7011181116104126, "learning_rate": 2.4477931703880628e-05, "loss": 1.6318, "step": 78496 }, { "epoch": 2.61, "grad_norm": 0.7069057822227478, "learning_rate": 2.4473796625232567e-05, "loss": 1.642, "step": 78497 }, { "epoch": 2.61, "grad_norm": 0.7012587785720825, "learning_rate": 2.4469661881032776e-05, "loss": 1.6564, "step": 78498 }, { "epoch": 2.61, "grad_norm": 0.7169476747512817, "learning_rate": 2.446552747128612e-05, "loss": 1.7428, "step": 78499 }, { "epoch": 2.61, "grad_norm": 0.70210862159729, "learning_rate": 2.4461393395997752e-05, "loss": 1.6884, "step": 78500 }, { "epoch": 2.61, "grad_norm": 0.7129784822463989, "learning_rate": 2.4457259655172746e-05, "loss": 1.6996, "step": 78501 }, { "epoch": 2.61, "grad_norm": 0.7001636028289795, "learning_rate": 2.4453126248815924e-05, "loss": 1.6371, "step": 78502 }, { "epoch": 2.61, "grad_norm": 0.7169386148452759, "learning_rate": 2.4448993176932386e-05, "loss": 1.7075, "step": 78503 }, { "epoch": 2.61, "grad_norm": 0.7059141397476196, "learning_rate": 2.4444860439527223e-05, "loss": 1.694, "step": 78504 }, { "epoch": 2.61, "grad_norm": 0.7155537605285645, "learning_rate": 2.4440728036605438e-05, "loss": 1.688, "step": 78505 }, { "epoch": 2.61, "grad_norm": 0.7248392701148987, "learning_rate": 2.4436595968171922e-05, "loss": 1.7251, "step": 78506 }, { "epoch": 2.61, "grad_norm": 0.7151472568511963, "learning_rate": 2.4432464234231774e-05, "loss": 1.704, "step": 78507 }, { "epoch": 2.61, "grad_norm": 0.6820435523986816, "learning_rate": 2.442833283479012e-05, "loss": 1.691, "step": 78508 }, { "epoch": 2.61, "grad_norm": 0.7023346424102783, "learning_rate": 2.442420176985179e-05, "loss": 1.6508, "step": 78509 }, { "epoch": 2.61, "grad_norm": 0.7077875733375549, "learning_rate": 2.4420071039421853e-05, "loss": 1.6546, "step": 78510 }, { "epoch": 2.61, "grad_norm": 0.7061389684677124, "learning_rate": 2.4415940643505395e-05, "loss": 1.7717, "step": 78511 }, { "epoch": 2.61, "grad_norm": 0.716801643371582, "learning_rate": 2.4411810582107384e-05, "loss": 1.6734, "step": 78512 }, { "epoch": 2.61, "grad_norm": 0.7340607643127441, "learning_rate": 2.440768085523278e-05, "loss": 1.6665, "step": 78513 }, { "epoch": 2.61, "grad_norm": 0.6924286484718323, "learning_rate": 2.4403551462886684e-05, "loss": 1.6928, "step": 78514 }, { "epoch": 2.61, "grad_norm": 0.709113359451294, "learning_rate": 2.4399422405074055e-05, "loss": 1.7397, "step": 78515 }, { "epoch": 2.61, "grad_norm": 0.6905696392059326, "learning_rate": 2.4395293681799887e-05, "loss": 1.646, "step": 78516 }, { "epoch": 2.61, "grad_norm": 0.6977543830871582, "learning_rate": 2.4391165293069248e-05, "loss": 1.6454, "step": 78517 }, { "epoch": 2.61, "grad_norm": 0.6941931247711182, "learning_rate": 2.4387037238887164e-05, "loss": 1.6751, "step": 78518 }, { "epoch": 2.61, "grad_norm": 0.7137486934661865, "learning_rate": 2.4382909519258598e-05, "loss": 1.7284, "step": 78519 }, { "epoch": 2.61, "grad_norm": 0.6960169076919556, "learning_rate": 2.4378782134188513e-05, "loss": 1.7432, "step": 78520 }, { "epoch": 2.61, "grad_norm": 0.7226544618606567, "learning_rate": 2.4374655083682036e-05, "loss": 1.6296, "step": 78521 }, { "epoch": 2.61, "grad_norm": 0.7223489284515381, "learning_rate": 2.43705283677441e-05, "loss": 1.6421, "step": 78522 }, { "epoch": 2.61, "grad_norm": 0.7302901148796082, "learning_rate": 2.4366401986379703e-05, "loss": 1.7479, "step": 78523 }, { "epoch": 2.61, "grad_norm": 0.7035447955131531, "learning_rate": 2.4362275939593866e-05, "loss": 1.6316, "step": 78524 }, { "epoch": 2.61, "grad_norm": 0.7194053530693054, "learning_rate": 2.435815022739166e-05, "loss": 1.6757, "step": 78525 }, { "epoch": 2.61, "grad_norm": 0.687484860420227, "learning_rate": 2.4354024849778042e-05, "loss": 1.7076, "step": 78526 }, { "epoch": 2.61, "grad_norm": 0.7055174112319946, "learning_rate": 2.434989980675801e-05, "loss": 1.5835, "step": 78527 }, { "epoch": 2.61, "grad_norm": 0.6988770961761475, "learning_rate": 2.4345775098336596e-05, "loss": 1.625, "step": 78528 }, { "epoch": 2.61, "grad_norm": 0.7339895367622375, "learning_rate": 2.4341650724518824e-05, "loss": 1.6895, "step": 78529 }, { "epoch": 2.61, "grad_norm": 0.7157042026519775, "learning_rate": 2.4337526685309594e-05, "loss": 1.6685, "step": 78530 }, { "epoch": 2.61, "grad_norm": 0.6925970911979675, "learning_rate": 2.4333402980714034e-05, "loss": 1.7061, "step": 78531 }, { "epoch": 2.61, "grad_norm": 0.7200974822044373, "learning_rate": 2.4329279610737074e-05, "loss": 1.6385, "step": 78532 }, { "epoch": 2.61, "grad_norm": 0.7030113339424133, "learning_rate": 2.4325156575383808e-05, "loss": 1.6999, "step": 78533 }, { "epoch": 2.61, "grad_norm": 0.6898253560066223, "learning_rate": 2.4321033874659134e-05, "loss": 1.656, "step": 78534 }, { "epoch": 2.61, "grad_norm": 0.691048800945282, "learning_rate": 2.4316911508568147e-05, "loss": 1.6682, "step": 78535 }, { "epoch": 2.61, "grad_norm": 0.7215654253959656, "learning_rate": 2.4312789477115813e-05, "loss": 1.7568, "step": 78536 }, { "epoch": 2.61, "grad_norm": 0.7035550475120544, "learning_rate": 2.4308667780307056e-05, "loss": 1.6423, "step": 78537 }, { "epoch": 2.61, "grad_norm": 0.7075996994972229, "learning_rate": 2.430454641814704e-05, "loss": 1.6983, "step": 78538 }, { "epoch": 2.61, "grad_norm": 0.6997522711753845, "learning_rate": 2.430042539064063e-05, "loss": 1.6795, "step": 78539 }, { "epoch": 2.61, "grad_norm": 0.6925882697105408, "learning_rate": 2.4296304697792922e-05, "loss": 1.5991, "step": 78540 }, { "epoch": 2.61, "grad_norm": 0.6913596987724304, "learning_rate": 2.429218433960888e-05, "loss": 1.652, "step": 78541 }, { "epoch": 2.61, "grad_norm": 0.6898818612098694, "learning_rate": 2.4288064316093425e-05, "loss": 1.6709, "step": 78542 }, { "epoch": 2.61, "grad_norm": 0.6993274092674255, "learning_rate": 2.428394462725173e-05, "loss": 1.6359, "step": 78543 }, { "epoch": 2.61, "grad_norm": 0.7000238299369812, "learning_rate": 2.4279825273088615e-05, "loss": 1.6498, "step": 78544 }, { "epoch": 2.61, "grad_norm": 0.6816181540489197, "learning_rate": 2.427570625360922e-05, "loss": 1.67, "step": 78545 }, { "epoch": 2.61, "grad_norm": 0.7071933150291443, "learning_rate": 2.427158756881846e-05, "loss": 1.6689, "step": 78546 }, { "epoch": 2.61, "grad_norm": 0.7057952880859375, "learning_rate": 2.4267469218721413e-05, "loss": 1.6782, "step": 78547 }, { "epoch": 2.61, "grad_norm": 0.7237568497657776, "learning_rate": 2.4263351203323034e-05, "loss": 1.655, "step": 78548 }, { "epoch": 2.61, "grad_norm": 0.7120661735534668, "learning_rate": 2.4259233522628253e-05, "loss": 1.71, "step": 78549 }, { "epoch": 2.61, "grad_norm": 0.7024805545806885, "learning_rate": 2.4255116176642197e-05, "loss": 1.6962, "step": 78550 }, { "epoch": 2.61, "grad_norm": 0.6763945817947388, "learning_rate": 2.42509991653698e-05, "loss": 1.6327, "step": 78551 }, { "epoch": 2.61, "grad_norm": 0.7177989482879639, "learning_rate": 2.424688248881599e-05, "loss": 1.7414, "step": 78552 }, { "epoch": 2.61, "grad_norm": 0.7409417629241943, "learning_rate": 2.424276614698586e-05, "loss": 1.6915, "step": 78553 }, { "epoch": 2.61, "grad_norm": 0.6960726976394653, "learning_rate": 2.423865013988444e-05, "loss": 1.6267, "step": 78554 }, { "epoch": 2.61, "grad_norm": 0.7108700275421143, "learning_rate": 2.423453446751663e-05, "loss": 1.6281, "step": 78555 }, { "epoch": 2.61, "grad_norm": 0.7181121706962585, "learning_rate": 2.4230419129887423e-05, "loss": 1.7209, "step": 78556 }, { "epoch": 2.61, "grad_norm": 0.7025300860404968, "learning_rate": 2.4226304127001918e-05, "loss": 1.595, "step": 78557 }, { "epoch": 2.61, "grad_norm": 0.6933963894844055, "learning_rate": 2.4222189458865073e-05, "loss": 1.6961, "step": 78558 }, { "epoch": 2.61, "grad_norm": 0.7079437375068665, "learning_rate": 2.421807512548175e-05, "loss": 1.7248, "step": 78559 }, { "epoch": 2.61, "grad_norm": 0.7114370465278625, "learning_rate": 2.4213961126857084e-05, "loss": 1.6213, "step": 78560 }, { "epoch": 2.61, "grad_norm": 0.70054030418396, "learning_rate": 2.4209847462996136e-05, "loss": 1.6905, "step": 78561 }, { "epoch": 2.61, "grad_norm": 0.698742151260376, "learning_rate": 2.4205734133903665e-05, "loss": 1.6306, "step": 78562 }, { "epoch": 2.61, "grad_norm": 0.6803017854690552, "learning_rate": 2.4201621139584836e-05, "loss": 1.6695, "step": 78563 }, { "epoch": 2.61, "grad_norm": 0.7149619460105896, "learning_rate": 2.4197508480044615e-05, "loss": 1.6749, "step": 78564 }, { "epoch": 2.61, "grad_norm": 0.7013722658157349, "learning_rate": 2.4193396155288024e-05, "loss": 1.7183, "step": 78565 }, { "epoch": 2.61, "grad_norm": 0.7182185053825378, "learning_rate": 2.4189284165319932e-05, "loss": 1.7733, "step": 78566 }, { "epoch": 2.61, "grad_norm": 0.6980077028274536, "learning_rate": 2.41851725101454e-05, "loss": 1.6519, "step": 78567 }, { "epoch": 2.61, "grad_norm": 0.7148951292037964, "learning_rate": 2.4181061189769558e-05, "loss": 1.6424, "step": 78568 }, { "epoch": 2.61, "grad_norm": 0.7433132529258728, "learning_rate": 2.4176950204197132e-05, "loss": 1.7004, "step": 78569 }, { "epoch": 2.61, "grad_norm": 0.7164928913116455, "learning_rate": 2.4172839553433286e-05, "loss": 1.6946, "step": 78570 }, { "epoch": 2.61, "grad_norm": 0.7004011273384094, "learning_rate": 2.416872923748302e-05, "loss": 1.6665, "step": 78571 }, { "epoch": 2.61, "grad_norm": 0.7045180201530457, "learning_rate": 2.416461925635126e-05, "loss": 1.6324, "step": 78572 }, { "epoch": 2.61, "grad_norm": 0.7215479612350464, "learning_rate": 2.4160509610042932e-05, "loss": 1.6659, "step": 78573 }, { "epoch": 2.61, "grad_norm": 0.7275074124336243, "learning_rate": 2.415640029856314e-05, "loss": 1.6988, "step": 78574 }, { "epoch": 2.61, "grad_norm": 0.7042720913887024, "learning_rate": 2.415229132191694e-05, "loss": 1.6347, "step": 78575 }, { "epoch": 2.61, "grad_norm": 0.7284762263298035, "learning_rate": 2.4148182680109097e-05, "loss": 1.7634, "step": 78576 }, { "epoch": 2.61, "grad_norm": 0.7072953581809998, "learning_rate": 2.4144074373144705e-05, "loss": 1.6177, "step": 78577 }, { "epoch": 2.61, "grad_norm": 0.720818281173706, "learning_rate": 2.4139966401028833e-05, "loss": 1.6711, "step": 78578 }, { "epoch": 2.61, "grad_norm": 0.7151015400886536, "learning_rate": 2.4135858763766402e-05, "loss": 1.7385, "step": 78579 }, { "epoch": 2.61, "grad_norm": 0.7198655009269714, "learning_rate": 2.4131751461362313e-05, "loss": 1.743, "step": 78580 }, { "epoch": 2.61, "grad_norm": 0.6972224712371826, "learning_rate": 2.4127644493821695e-05, "loss": 1.6926, "step": 78581 }, { "epoch": 2.61, "grad_norm": 0.7042055726051331, "learning_rate": 2.4123537861149478e-05, "loss": 1.6525, "step": 78582 }, { "epoch": 2.61, "grad_norm": 0.7042094469070435, "learning_rate": 2.4119431563350554e-05, "loss": 1.7534, "step": 78583 }, { "epoch": 2.61, "grad_norm": 0.7257959246635437, "learning_rate": 2.4115325600430026e-05, "loss": 1.6346, "step": 78584 }, { "epoch": 2.61, "grad_norm": 0.7085183262825012, "learning_rate": 2.411121997239288e-05, "loss": 1.6172, "step": 78585 }, { "epoch": 2.61, "grad_norm": 0.7055118083953857, "learning_rate": 2.4107114679244055e-05, "loss": 1.6762, "step": 78586 }, { "epoch": 2.61, "grad_norm": 0.7121085524559021, "learning_rate": 2.410300972098851e-05, "loss": 1.7729, "step": 78587 }, { "epoch": 2.61, "grad_norm": 0.7012614607810974, "learning_rate": 2.4098905097631305e-05, "loss": 1.6611, "step": 78588 }, { "epoch": 2.61, "grad_norm": 0.7272377014160156, "learning_rate": 2.4094800809177373e-05, "loss": 1.6835, "step": 78589 }, { "epoch": 2.61, "grad_norm": 0.7130532264709473, "learning_rate": 2.409069685563164e-05, "loss": 1.7427, "step": 78590 }, { "epoch": 2.61, "grad_norm": 0.6851599812507629, "learning_rate": 2.4086593236999207e-05, "loss": 1.6663, "step": 78591 }, { "epoch": 2.61, "grad_norm": 0.7201152443885803, "learning_rate": 2.4082489953284965e-05, "loss": 1.7034, "step": 78592 }, { "epoch": 2.61, "grad_norm": 0.6977211236953735, "learning_rate": 2.4078387004493948e-05, "loss": 1.7126, "step": 78593 }, { "epoch": 2.61, "grad_norm": 0.7267506122589111, "learning_rate": 2.407428439063108e-05, "loss": 1.715, "step": 78594 }, { "epoch": 2.61, "grad_norm": 0.7301520109176636, "learning_rate": 2.40701821117014e-05, "loss": 1.69, "step": 78595 }, { "epoch": 2.61, "grad_norm": 0.7253714799880981, "learning_rate": 2.406608016770989e-05, "loss": 1.727, "step": 78596 }, { "epoch": 2.61, "grad_norm": 0.7062703967094421, "learning_rate": 2.4061978558661455e-05, "loss": 1.6875, "step": 78597 }, { "epoch": 2.61, "grad_norm": 0.7115126252174377, "learning_rate": 2.4057877284561155e-05, "loss": 1.6929, "step": 78598 }, { "epoch": 2.61, "grad_norm": 0.7015168070793152, "learning_rate": 2.4053776345413887e-05, "loss": 1.6687, "step": 78599 }, { "epoch": 2.62, "grad_norm": 0.7103825211524963, "learning_rate": 2.4049675741224718e-05, "loss": 1.6978, "step": 78600 }, { "epoch": 2.62, "grad_norm": 0.6862916350364685, "learning_rate": 2.40455754719986e-05, "loss": 1.6162, "step": 78601 }, { "epoch": 2.62, "grad_norm": 0.6887590289115906, "learning_rate": 2.404147553774044e-05, "loss": 1.6895, "step": 78602 }, { "epoch": 2.62, "grad_norm": 0.7059351801872253, "learning_rate": 2.403737593845523e-05, "loss": 1.7056, "step": 78603 }, { "epoch": 2.62, "grad_norm": 0.7058997750282288, "learning_rate": 2.4033276674148095e-05, "loss": 1.6839, "step": 78604 }, { "epoch": 2.62, "grad_norm": 0.6937260627746582, "learning_rate": 2.4029177744823867e-05, "loss": 1.5665, "step": 78605 }, { "epoch": 2.62, "grad_norm": 0.6847708225250244, "learning_rate": 2.4025079150487513e-05, "loss": 1.6191, "step": 78606 }, { "epoch": 2.62, "grad_norm": 0.7176370024681091, "learning_rate": 2.4020980891144093e-05, "loss": 1.6572, "step": 78607 }, { "epoch": 2.62, "grad_norm": 0.7121530175209045, "learning_rate": 2.4016882966798535e-05, "loss": 1.6853, "step": 78608 }, { "epoch": 2.62, "grad_norm": 0.7277268171310425, "learning_rate": 2.4012785377455768e-05, "loss": 1.7208, "step": 78609 }, { "epoch": 2.62, "grad_norm": 0.6797354221343994, "learning_rate": 2.4008688123120823e-05, "loss": 1.659, "step": 78610 }, { "epoch": 2.62, "grad_norm": 0.7029356956481934, "learning_rate": 2.4004591203798762e-05, "loss": 1.718, "step": 78611 }, { "epoch": 2.62, "grad_norm": 0.6962123513221741, "learning_rate": 2.400049461949438e-05, "loss": 1.7252, "step": 78612 }, { "epoch": 2.62, "grad_norm": 0.6854362487792969, "learning_rate": 2.399639837021271e-05, "loss": 1.6642, "step": 78613 }, { "epoch": 2.62, "grad_norm": 0.7232646346092224, "learning_rate": 2.3992302455958777e-05, "loss": 1.7106, "step": 78614 }, { "epoch": 2.62, "grad_norm": 0.70404052734375, "learning_rate": 2.3988206876737547e-05, "loss": 1.6673, "step": 78615 }, { "epoch": 2.62, "grad_norm": 0.7113590836524963, "learning_rate": 2.3984111632553915e-05, "loss": 1.652, "step": 78616 }, { "epoch": 2.62, "grad_norm": 0.7056323289871216, "learning_rate": 2.3980016723412908e-05, "loss": 1.6765, "step": 78617 }, { "epoch": 2.62, "grad_norm": 0.7079651951789856, "learning_rate": 2.397592214931959e-05, "loss": 1.6585, "step": 78618 }, { "epoch": 2.62, "grad_norm": 0.7092887163162231, "learning_rate": 2.3971827910278695e-05, "loss": 1.6594, "step": 78619 }, { "epoch": 2.62, "grad_norm": 0.7210485935211182, "learning_rate": 2.396773400629538e-05, "loss": 1.7353, "step": 78620 }, { "epoch": 2.62, "grad_norm": 0.689635157585144, "learning_rate": 2.3963640437374575e-05, "loss": 1.6468, "step": 78621 }, { "epoch": 2.62, "grad_norm": 0.7175216674804688, "learning_rate": 2.395954720352128e-05, "loss": 1.712, "step": 78622 }, { "epoch": 2.62, "grad_norm": 0.6910136342048645, "learning_rate": 2.3955454304740352e-05, "loss": 1.667, "step": 78623 }, { "epoch": 2.62, "grad_norm": 0.7265529632568359, "learning_rate": 2.3951361741036822e-05, "loss": 1.6504, "step": 78624 }, { "epoch": 2.62, "grad_norm": 0.7056138515472412, "learning_rate": 2.3947269512415755e-05, "loss": 1.6703, "step": 78625 }, { "epoch": 2.62, "grad_norm": 0.7212857604026794, "learning_rate": 2.3943177618881948e-05, "loss": 1.6447, "step": 78626 }, { "epoch": 2.62, "grad_norm": 0.6879041790962219, "learning_rate": 2.3939086060440427e-05, "loss": 1.6378, "step": 78627 }, { "epoch": 2.62, "grad_norm": 0.6980759501457214, "learning_rate": 2.3934994837096255e-05, "loss": 1.7411, "step": 78628 }, { "epoch": 2.62, "grad_norm": 0.7345206141471863, "learning_rate": 2.393090394885433e-05, "loss": 1.6439, "step": 78629 }, { "epoch": 2.62, "grad_norm": 0.7003102898597717, "learning_rate": 2.3926813395719547e-05, "loss": 1.6639, "step": 78630 }, { "epoch": 2.62, "grad_norm": 0.6865313649177551, "learning_rate": 2.392272317769697e-05, "loss": 1.631, "step": 78631 }, { "epoch": 2.62, "grad_norm": 0.7186824083328247, "learning_rate": 2.3918633294791557e-05, "loss": 1.6469, "step": 78632 }, { "epoch": 2.62, "grad_norm": 0.7188524007797241, "learning_rate": 2.3914543747008175e-05, "loss": 1.6529, "step": 78633 }, { "epoch": 2.62, "grad_norm": 0.7394902110099792, "learning_rate": 2.391045453435185e-05, "loss": 1.7337, "step": 78634 }, { "epoch": 2.62, "grad_norm": 0.6978504657745361, "learning_rate": 2.390636565682762e-05, "loss": 1.6461, "step": 78635 }, { "epoch": 2.62, "grad_norm": 0.7114355564117432, "learning_rate": 2.3902277114440372e-05, "loss": 1.6306, "step": 78636 }, { "epoch": 2.62, "grad_norm": 0.6944581270217896, "learning_rate": 2.3898188907195036e-05, "loss": 1.6641, "step": 78637 }, { "epoch": 2.62, "grad_norm": 0.7184024453163147, "learning_rate": 2.3894101035096646e-05, "loss": 1.6878, "step": 78638 }, { "epoch": 2.62, "grad_norm": 0.7026021480560303, "learning_rate": 2.389001349815016e-05, "loss": 1.6707, "step": 78639 }, { "epoch": 2.62, "grad_norm": 0.7052416801452637, "learning_rate": 2.3885926296360446e-05, "loss": 1.6544, "step": 78640 }, { "epoch": 2.62, "grad_norm": 0.6986560821533203, "learning_rate": 2.3881839429732597e-05, "loss": 1.6685, "step": 78641 }, { "epoch": 2.62, "grad_norm": 0.7072299718856812, "learning_rate": 2.3877752898271474e-05, "loss": 1.7536, "step": 78642 }, { "epoch": 2.62, "grad_norm": 0.723802387714386, "learning_rate": 2.3873666701982108e-05, "loss": 1.7325, "step": 78643 }, { "epoch": 2.62, "grad_norm": 0.7073889374732971, "learning_rate": 2.386958084086936e-05, "loss": 1.6827, "step": 78644 }, { "epoch": 2.62, "grad_norm": 0.7298128008842468, "learning_rate": 2.3865495314938332e-05, "loss": 1.7103, "step": 78645 }, { "epoch": 2.62, "grad_norm": 0.7136586904525757, "learning_rate": 2.3861410124193914e-05, "loss": 1.6408, "step": 78646 }, { "epoch": 2.62, "grad_norm": 0.7166790962219238, "learning_rate": 2.385732526864097e-05, "loss": 1.6976, "step": 78647 }, { "epoch": 2.62, "grad_norm": 0.6895714402198792, "learning_rate": 2.385324074828463e-05, "loss": 1.6919, "step": 78648 }, { "epoch": 2.62, "grad_norm": 0.7129525542259216, "learning_rate": 2.384915656312969e-05, "loss": 1.6569, "step": 78649 }, { "epoch": 2.62, "grad_norm": 0.7709003686904907, "learning_rate": 2.3845072713181247e-05, "loss": 1.7247, "step": 78650 }, { "epoch": 2.62, "grad_norm": 0.7051703929901123, "learning_rate": 2.384098919844416e-05, "loss": 1.6227, "step": 78651 }, { "epoch": 2.62, "grad_norm": 0.6988802552223206, "learning_rate": 2.3836906018923463e-05, "loss": 1.7169, "step": 78652 }, { "epoch": 2.62, "grad_norm": 0.715492844581604, "learning_rate": 2.383282317462405e-05, "loss": 1.6632, "step": 78653 }, { "epoch": 2.62, "grad_norm": 0.7099210023880005, "learning_rate": 2.3828740665550884e-05, "loss": 1.6486, "step": 78654 }, { "epoch": 2.62, "grad_norm": 0.7297532558441162, "learning_rate": 2.382465849170896e-05, "loss": 1.728, "step": 78655 }, { "epoch": 2.62, "grad_norm": 0.7092726230621338, "learning_rate": 2.382057665310314e-05, "loss": 1.712, "step": 78656 }, { "epoch": 2.62, "grad_norm": 0.7123603224754333, "learning_rate": 2.3816495149738523e-05, "loss": 1.656, "step": 78657 }, { "epoch": 2.62, "grad_norm": 0.6988359093666077, "learning_rate": 2.3812413981620006e-05, "loss": 1.6063, "step": 78658 }, { "epoch": 2.62, "grad_norm": 0.7161539793014526, "learning_rate": 2.3808333148752444e-05, "loss": 1.6563, "step": 78659 }, { "epoch": 2.62, "grad_norm": 0.7296686768531799, "learning_rate": 2.3804252651140942e-05, "loss": 1.6558, "step": 78660 }, { "epoch": 2.62, "grad_norm": 0.713024377822876, "learning_rate": 2.3800172488790292e-05, "loss": 1.7093, "step": 78661 }, { "epoch": 2.62, "grad_norm": 0.726065993309021, "learning_rate": 2.3796092661705624e-05, "loss": 1.6959, "step": 78662 }, { "epoch": 2.62, "grad_norm": 0.6990708112716675, "learning_rate": 2.3792013169891733e-05, "loss": 1.6707, "step": 78663 }, { "epoch": 2.62, "grad_norm": 0.7172942161560059, "learning_rate": 2.3787934013353714e-05, "loss": 1.6957, "step": 78664 }, { "epoch": 2.62, "grad_norm": 0.6873087882995605, "learning_rate": 2.3783855192096435e-05, "loss": 1.6935, "step": 78665 }, { "epoch": 2.62, "grad_norm": 0.6987914443016052, "learning_rate": 2.3779776706124788e-05, "loss": 1.6935, "step": 78666 }, { "epoch": 2.62, "grad_norm": 0.7042489647865295, "learning_rate": 2.3775698555443866e-05, "loss": 1.6875, "step": 78667 }, { "epoch": 2.62, "grad_norm": 0.7366911172866821, "learning_rate": 2.377162074005854e-05, "loss": 1.7077, "step": 78668 }, { "epoch": 2.62, "grad_norm": 0.684490442276001, "learning_rate": 2.376754325997373e-05, "loss": 1.6418, "step": 78669 }, { "epoch": 2.62, "grad_norm": 0.7064887881278992, "learning_rate": 2.3763466115194408e-05, "loss": 1.5965, "step": 78670 }, { "epoch": 2.62, "grad_norm": 0.7017213702201843, "learning_rate": 2.3759389305725563e-05, "loss": 1.6816, "step": 78671 }, { "epoch": 2.62, "grad_norm": 0.7305465340614319, "learning_rate": 2.375531283157216e-05, "loss": 1.7335, "step": 78672 }, { "epoch": 2.62, "grad_norm": 0.6941972374916077, "learning_rate": 2.3751236692739027e-05, "loss": 1.6382, "step": 78673 }, { "epoch": 2.62, "grad_norm": 0.7165734171867371, "learning_rate": 2.374716088923123e-05, "loss": 1.7467, "step": 78674 }, { "epoch": 2.62, "grad_norm": 0.7001103758811951, "learning_rate": 2.3743085421053697e-05, "loss": 1.736, "step": 78675 }, { "epoch": 2.62, "grad_norm": 0.7088257670402527, "learning_rate": 2.373901028821129e-05, "loss": 1.6916, "step": 78676 }, { "epoch": 2.62, "grad_norm": 0.7002092599868774, "learning_rate": 2.373493549070904e-05, "loss": 1.6874, "step": 78677 }, { "epoch": 2.62, "grad_norm": 0.7053943276405334, "learning_rate": 2.3730861028551973e-05, "loss": 1.6642, "step": 78678 }, { "epoch": 2.62, "grad_norm": 0.6929781436920166, "learning_rate": 2.372678690174482e-05, "loss": 1.6541, "step": 78679 }, { "epoch": 2.62, "grad_norm": 0.6868159770965576, "learning_rate": 2.3722713110292646e-05, "loss": 1.5885, "step": 78680 }, { "epoch": 2.62, "grad_norm": 0.7177113890647888, "learning_rate": 2.371863965420041e-05, "loss": 1.6528, "step": 78681 }, { "epoch": 2.62, "grad_norm": 0.757113516330719, "learning_rate": 2.371456653347308e-05, "loss": 1.7386, "step": 78682 }, { "epoch": 2.62, "grad_norm": 0.6920985579490662, "learning_rate": 2.3710493748115478e-05, "loss": 1.6742, "step": 78683 }, { "epoch": 2.62, "grad_norm": 0.7292940020561218, "learning_rate": 2.3706421298132637e-05, "loss": 1.7756, "step": 78684 }, { "epoch": 2.62, "grad_norm": 0.7092247605323792, "learning_rate": 2.3702349183529623e-05, "loss": 1.6193, "step": 78685 }, { "epoch": 2.62, "grad_norm": 0.6996607184410095, "learning_rate": 2.3698277404311096e-05, "loss": 1.7286, "step": 78686 }, { "epoch": 2.62, "grad_norm": 0.7167720198631287, "learning_rate": 2.3694205960482183e-05, "loss": 1.6165, "step": 78687 }, { "epoch": 2.62, "grad_norm": 0.7107467651367188, "learning_rate": 2.3690134852047848e-05, "loss": 1.6147, "step": 78688 }, { "epoch": 2.62, "grad_norm": 0.7001412510871887, "learning_rate": 2.3686064079012956e-05, "loss": 1.6325, "step": 78689 }, { "epoch": 2.62, "grad_norm": 0.719153881072998, "learning_rate": 2.3681993641382403e-05, "loss": 1.6723, "step": 78690 }, { "epoch": 2.62, "grad_norm": 0.6987869143486023, "learning_rate": 2.3677923539161213e-05, "loss": 1.5394, "step": 78691 }, { "epoch": 2.62, "grad_norm": 0.7094610929489136, "learning_rate": 2.367385377235442e-05, "loss": 1.7059, "step": 78692 }, { "epoch": 2.62, "grad_norm": 0.7243702411651611, "learning_rate": 2.3669784340966758e-05, "loss": 1.6241, "step": 78693 }, { "epoch": 2.62, "grad_norm": 0.6886463165283203, "learning_rate": 2.3665715245003247e-05, "loss": 1.62, "step": 78694 }, { "epoch": 2.62, "grad_norm": 0.7020998001098633, "learning_rate": 2.3661646484468886e-05, "loss": 1.69, "step": 78695 }, { "epoch": 2.62, "grad_norm": 0.7186959981918335, "learning_rate": 2.3657578059368575e-05, "loss": 1.5924, "step": 78696 }, { "epoch": 2.62, "grad_norm": 0.722100555896759, "learning_rate": 2.365350996970721e-05, "loss": 1.6647, "step": 78697 }, { "epoch": 2.62, "grad_norm": 0.6942766904830933, "learning_rate": 2.3649442215489813e-05, "loss": 1.6365, "step": 78698 }, { "epoch": 2.62, "grad_norm": 0.7043232917785645, "learning_rate": 2.3645374796721285e-05, "loss": 1.6627, "step": 78699 }, { "epoch": 2.62, "grad_norm": 0.7092114686965942, "learning_rate": 2.3641307713406455e-05, "loss": 1.6505, "step": 78700 }, { "epoch": 2.62, "grad_norm": 0.7137409448623657, "learning_rate": 2.3637240965550418e-05, "loss": 1.6383, "step": 78701 }, { "epoch": 2.62, "grad_norm": 0.7003848552703857, "learning_rate": 2.3633174553158073e-05, "loss": 1.6466, "step": 78702 }, { "epoch": 2.62, "grad_norm": 0.6893378496170044, "learning_rate": 2.3629108476234316e-05, "loss": 1.6471, "step": 78703 }, { "epoch": 2.62, "grad_norm": 0.7073000073432922, "learning_rate": 2.3625042734784073e-05, "loss": 1.6783, "step": 78704 }, { "epoch": 2.62, "grad_norm": 0.68669193983078, "learning_rate": 2.362097732881234e-05, "loss": 1.6889, "step": 78705 }, { "epoch": 2.62, "grad_norm": 0.6947113871574402, "learning_rate": 2.361691225832405e-05, "loss": 1.5884, "step": 78706 }, { "epoch": 2.62, "grad_norm": 0.717054009437561, "learning_rate": 2.361284752332403e-05, "loss": 1.6928, "step": 78707 }, { "epoch": 2.62, "grad_norm": 0.6964303851127625, "learning_rate": 2.3608783123817342e-05, "loss": 1.7094, "step": 78708 }, { "epoch": 2.62, "grad_norm": 0.7062251567840576, "learning_rate": 2.360471905980882e-05, "loss": 1.6622, "step": 78709 }, { "epoch": 2.62, "grad_norm": 0.7387418150901794, "learning_rate": 2.3600655331303486e-05, "loss": 1.6754, "step": 78710 }, { "epoch": 2.62, "grad_norm": 0.7132169008255005, "learning_rate": 2.3596591938306174e-05, "loss": 1.7325, "step": 78711 }, { "epoch": 2.62, "grad_norm": 0.7237572073936462, "learning_rate": 2.3592528880821947e-05, "loss": 1.701, "step": 78712 }, { "epoch": 2.62, "grad_norm": 0.7393357753753662, "learning_rate": 2.35884661588556e-05, "loss": 1.7046, "step": 78713 }, { "epoch": 2.62, "grad_norm": 0.7085406184196472, "learning_rate": 2.3584403772412196e-05, "loss": 1.6849, "step": 78714 }, { "epoch": 2.62, "grad_norm": 0.7077922821044922, "learning_rate": 2.3580341721496567e-05, "loss": 1.6729, "step": 78715 }, { "epoch": 2.62, "grad_norm": 0.6815465092658997, "learning_rate": 2.3576280006113635e-05, "loss": 1.6656, "step": 78716 }, { "epoch": 2.62, "grad_norm": 0.7179128527641296, "learning_rate": 2.3572218626268436e-05, "loss": 1.6746, "step": 78717 }, { "epoch": 2.62, "grad_norm": 0.6873779296875, "learning_rate": 2.356815758196583e-05, "loss": 1.69, "step": 78718 }, { "epoch": 2.62, "grad_norm": 0.7057875394821167, "learning_rate": 2.356409687321068e-05, "loss": 1.6768, "step": 78719 }, { "epoch": 2.62, "grad_norm": 0.715597927570343, "learning_rate": 2.3560036500007984e-05, "loss": 1.6782, "step": 78720 }, { "epoch": 2.62, "grad_norm": 0.6891994476318359, "learning_rate": 2.3555976462362737e-05, "loss": 1.6579, "step": 78721 }, { "epoch": 2.62, "grad_norm": 0.712033212184906, "learning_rate": 2.355191676027983e-05, "loss": 1.7931, "step": 78722 }, { "epoch": 2.62, "grad_norm": 0.704367458820343, "learning_rate": 2.354785739376407e-05, "loss": 1.6558, "step": 78723 }, { "epoch": 2.62, "grad_norm": 0.6913256645202637, "learning_rate": 2.3543798362820543e-05, "loss": 1.7348, "step": 78724 }, { "epoch": 2.62, "grad_norm": 0.7244914770126343, "learning_rate": 2.3539739667454117e-05, "loss": 1.6236, "step": 78725 }, { "epoch": 2.62, "grad_norm": 0.7244191765785217, "learning_rate": 2.353568130766965e-05, "loss": 1.6818, "step": 78726 }, { "epoch": 2.62, "grad_norm": 0.6941779255867004, "learning_rate": 2.3531623283472145e-05, "loss": 1.6579, "step": 78727 }, { "epoch": 2.62, "grad_norm": 0.6972918510437012, "learning_rate": 2.3527565594866593e-05, "loss": 1.6971, "step": 78728 }, { "epoch": 2.62, "grad_norm": 0.6911541819572449, "learning_rate": 2.3523508241857757e-05, "loss": 1.6427, "step": 78729 }, { "epoch": 2.62, "grad_norm": 0.7238611578941345, "learning_rate": 2.3519451224450636e-05, "loss": 1.6489, "step": 78730 }, { "epoch": 2.62, "grad_norm": 0.7065128087997437, "learning_rate": 2.3515394542650223e-05, "loss": 1.7085, "step": 78731 }, { "epoch": 2.62, "grad_norm": 0.6920315623283386, "learning_rate": 2.351133819646138e-05, "loss": 1.6207, "step": 78732 }, { "epoch": 2.62, "grad_norm": 0.7323412895202637, "learning_rate": 2.3507282185888975e-05, "loss": 1.7038, "step": 78733 }, { "epoch": 2.62, "grad_norm": 0.6754167675971985, "learning_rate": 2.3503226510938e-05, "loss": 1.5854, "step": 78734 }, { "epoch": 2.62, "grad_norm": 0.7158535122871399, "learning_rate": 2.3499171171613484e-05, "loss": 1.6954, "step": 78735 }, { "epoch": 2.62, "grad_norm": 0.69405597448349, "learning_rate": 2.3495116167920093e-05, "loss": 1.6663, "step": 78736 }, { "epoch": 2.62, "grad_norm": 0.6914685368537903, "learning_rate": 2.3491061499862917e-05, "loss": 1.7066, "step": 78737 }, { "epoch": 2.62, "grad_norm": 0.7041322588920593, "learning_rate": 2.348700716744689e-05, "loss": 1.7565, "step": 78738 }, { "epoch": 2.62, "grad_norm": 0.7063575387001038, "learning_rate": 2.3482953170676912e-05, "loss": 1.6211, "step": 78739 }, { "epoch": 2.62, "grad_norm": 0.6821944117546082, "learning_rate": 2.3478899509557837e-05, "loss": 1.6551, "step": 78740 }, { "epoch": 2.62, "grad_norm": 0.7002878189086914, "learning_rate": 2.3474846184094598e-05, "loss": 1.6297, "step": 78741 }, { "epoch": 2.62, "grad_norm": 0.6984428763389587, "learning_rate": 2.3470793194292292e-05, "loss": 1.6444, "step": 78742 }, { "epoch": 2.62, "grad_norm": 0.7067882418632507, "learning_rate": 2.3466740540155583e-05, "loss": 1.6101, "step": 78743 }, { "epoch": 2.62, "grad_norm": 0.6923040747642517, "learning_rate": 2.3462688221689497e-05, "loss": 1.6574, "step": 78744 }, { "epoch": 2.62, "grad_norm": 0.7187758088111877, "learning_rate": 2.345863623889903e-05, "loss": 1.6942, "step": 78745 }, { "epoch": 2.62, "grad_norm": 0.6949002742767334, "learning_rate": 2.3454584591789015e-05, "loss": 1.6697, "step": 78746 }, { "epoch": 2.62, "grad_norm": 0.6870826482772827, "learning_rate": 2.3450533280364347e-05, "loss": 1.6327, "step": 78747 }, { "epoch": 2.62, "grad_norm": 0.6980535387992859, "learning_rate": 2.344648230463002e-05, "loss": 1.7023, "step": 78748 }, { "epoch": 2.62, "grad_norm": 0.7088138461112976, "learning_rate": 2.3442431664590933e-05, "loss": 1.7057, "step": 78749 }, { "epoch": 2.62, "grad_norm": 0.7013012766838074, "learning_rate": 2.3438381360251912e-05, "loss": 1.7468, "step": 78750 }, { "epoch": 2.62, "grad_norm": 0.7021574974060059, "learning_rate": 2.3434331391617987e-05, "loss": 1.6709, "step": 78751 }, { "epoch": 2.62, "grad_norm": 0.7262645363807678, "learning_rate": 2.3430281758694058e-05, "loss": 1.6778, "step": 78752 }, { "epoch": 2.62, "grad_norm": 0.7320153117179871, "learning_rate": 2.3426232461485017e-05, "loss": 1.6813, "step": 78753 }, { "epoch": 2.62, "grad_norm": 0.7220368981361389, "learning_rate": 2.3422183499995727e-05, "loss": 1.7133, "step": 78754 }, { "epoch": 2.62, "grad_norm": 0.7160342931747437, "learning_rate": 2.3418134874231187e-05, "loss": 1.665, "step": 78755 }, { "epoch": 2.62, "grad_norm": 0.7188969254493713, "learning_rate": 2.3414086584196325e-05, "loss": 1.6664, "step": 78756 }, { "epoch": 2.62, "grad_norm": 0.7100394368171692, "learning_rate": 2.3410038629895934e-05, "loss": 1.6489, "step": 78757 }, { "epoch": 2.62, "grad_norm": 0.7062260508537292, "learning_rate": 2.3405991011335047e-05, "loss": 1.6809, "step": 78758 }, { "epoch": 2.62, "grad_norm": 0.710036039352417, "learning_rate": 2.3401943728518457e-05, "loss": 1.7063, "step": 78759 }, { "epoch": 2.62, "grad_norm": 0.7212464213371277, "learning_rate": 2.3397896781451263e-05, "loss": 1.7125, "step": 78760 }, { "epoch": 2.62, "grad_norm": 0.9866086840629578, "learning_rate": 2.339385017013816e-05, "loss": 1.7408, "step": 78761 }, { "epoch": 2.62, "grad_norm": 0.7153880000114441, "learning_rate": 2.3389803894584248e-05, "loss": 1.6368, "step": 78762 }, { "epoch": 2.62, "grad_norm": 0.7197003364562988, "learning_rate": 2.3385757954794347e-05, "loss": 1.7179, "step": 78763 }, { "epoch": 2.62, "grad_norm": 0.7268397212028503, "learning_rate": 2.338171235077333e-05, "loss": 1.6547, "step": 78764 }, { "epoch": 2.62, "grad_norm": 0.7328038811683655, "learning_rate": 2.3377667082526218e-05, "loss": 1.8059, "step": 78765 }, { "epoch": 2.62, "grad_norm": 0.7226338386535645, "learning_rate": 2.3373622150057812e-05, "loss": 1.6926, "step": 78766 }, { "epoch": 2.62, "grad_norm": 0.7117263078689575, "learning_rate": 2.336957755337311e-05, "loss": 1.6627, "step": 78767 }, { "epoch": 2.62, "grad_norm": 0.7349572777748108, "learning_rate": 2.3365533292476935e-05, "loss": 1.7446, "step": 78768 }, { "epoch": 2.62, "grad_norm": 0.6936968564987183, "learning_rate": 2.3361489367374286e-05, "loss": 1.6805, "step": 78769 }, { "epoch": 2.62, "grad_norm": 0.7188389897346497, "learning_rate": 2.3357445778070026e-05, "loss": 1.6911, "step": 78770 }, { "epoch": 2.62, "grad_norm": 0.7182450890541077, "learning_rate": 2.335340252456902e-05, "loss": 1.7479, "step": 78771 }, { "epoch": 2.62, "grad_norm": 0.7039082646369934, "learning_rate": 2.3349359606876262e-05, "loss": 1.6343, "step": 78772 }, { "epoch": 2.62, "grad_norm": 0.6957578659057617, "learning_rate": 2.334531702499658e-05, "loss": 1.6633, "step": 78773 }, { "epoch": 2.62, "grad_norm": 0.6861767768859863, "learning_rate": 2.334127477893497e-05, "loss": 1.6286, "step": 78774 }, { "epoch": 2.62, "grad_norm": 0.7032644152641296, "learning_rate": 2.3337232868696298e-05, "loss": 1.6693, "step": 78775 }, { "epoch": 2.62, "grad_norm": 0.716300368309021, "learning_rate": 2.3333191294285393e-05, "loss": 1.6708, "step": 78776 }, { "epoch": 2.62, "grad_norm": 0.6998116970062256, "learning_rate": 2.332915005570728e-05, "loss": 1.6551, "step": 78777 }, { "epoch": 2.62, "grad_norm": 0.7005846500396729, "learning_rate": 2.332510915296676e-05, "loss": 1.6607, "step": 78778 }, { "epoch": 2.62, "grad_norm": 0.6865953207015991, "learning_rate": 2.3321068586068857e-05, "loss": 1.6646, "step": 78779 }, { "epoch": 2.62, "grad_norm": 0.7533645629882812, "learning_rate": 2.3317028355018373e-05, "loss": 1.7306, "step": 78780 }, { "epoch": 2.62, "grad_norm": 0.7466394901275635, "learning_rate": 2.331298845982027e-05, "loss": 1.651, "step": 78781 }, { "epoch": 2.62, "grad_norm": 0.7257542610168457, "learning_rate": 2.330894890047944e-05, "loss": 1.6263, "step": 78782 }, { "epoch": 2.62, "grad_norm": 0.6900257468223572, "learning_rate": 2.3304909677000715e-05, "loss": 1.6934, "step": 78783 }, { "epoch": 2.62, "grad_norm": 0.6857254505157471, "learning_rate": 2.3300870789389127e-05, "loss": 1.6637, "step": 78784 }, { "epoch": 2.62, "grad_norm": 0.7107247114181519, "learning_rate": 2.329683223764953e-05, "loss": 1.6263, "step": 78785 }, { "epoch": 2.62, "grad_norm": 0.7058302760124207, "learning_rate": 2.3292794021786733e-05, "loss": 1.7329, "step": 78786 }, { "epoch": 2.62, "grad_norm": 0.7036320567131042, "learning_rate": 2.328875614180572e-05, "loss": 1.6705, "step": 78787 }, { "epoch": 2.62, "grad_norm": 0.7289350032806396, "learning_rate": 2.328471859771143e-05, "loss": 1.6636, "step": 78788 }, { "epoch": 2.62, "grad_norm": 0.7288339138031006, "learning_rate": 2.3280681389508714e-05, "loss": 1.7012, "step": 78789 }, { "epoch": 2.62, "grad_norm": 0.6933722496032715, "learning_rate": 2.3276644517202448e-05, "loss": 1.6448, "step": 78790 }, { "epoch": 2.62, "grad_norm": 0.7062929272651672, "learning_rate": 2.3272607980797587e-05, "loss": 1.7074, "step": 78791 }, { "epoch": 2.62, "grad_norm": 0.7019679546356201, "learning_rate": 2.3268571780299026e-05, "loss": 1.6248, "step": 78792 }, { "epoch": 2.62, "grad_norm": 0.6949699521064758, "learning_rate": 2.3264535915711568e-05, "loss": 1.6918, "step": 78793 }, { "epoch": 2.62, "grad_norm": 0.7226173281669617, "learning_rate": 2.326050038704024e-05, "loss": 1.7037, "step": 78794 }, { "epoch": 2.62, "grad_norm": 0.7226558923721313, "learning_rate": 2.325646519428993e-05, "loss": 1.6591, "step": 78795 }, { "epoch": 2.62, "grad_norm": 0.6975501775741577, "learning_rate": 2.3252430337465444e-05, "loss": 1.6794, "step": 78796 }, { "epoch": 2.62, "grad_norm": 0.6853657960891724, "learning_rate": 2.3248395816571707e-05, "loss": 1.5893, "step": 78797 }, { "epoch": 2.62, "grad_norm": 0.6835359334945679, "learning_rate": 2.3244361631613682e-05, "loss": 1.6498, "step": 78798 }, { "epoch": 2.62, "grad_norm": 0.7469119429588318, "learning_rate": 2.3240327782596236e-05, "loss": 1.7315, "step": 78799 }, { "epoch": 2.62, "grad_norm": 0.7248231172561646, "learning_rate": 2.3236294269524193e-05, "loss": 1.7822, "step": 78800 }, { "epoch": 2.62, "grad_norm": 0.7092821002006531, "learning_rate": 2.3232261092402517e-05, "loss": 1.6803, "step": 78801 }, { "epoch": 2.62, "grad_norm": 0.7156831622123718, "learning_rate": 2.3228228251236202e-05, "loss": 1.6871, "step": 78802 }, { "epoch": 2.62, "grad_norm": 0.7212826609611511, "learning_rate": 2.322419574602995e-05, "loss": 1.693, "step": 78803 }, { "epoch": 2.62, "grad_norm": 0.733855664730072, "learning_rate": 2.322016357678872e-05, "loss": 1.6944, "step": 78804 }, { "epoch": 2.62, "grad_norm": 0.7038936018943787, "learning_rate": 2.3216131743517475e-05, "loss": 1.6675, "step": 78805 }, { "epoch": 2.62, "grad_norm": 0.7125520706176758, "learning_rate": 2.3212100246221078e-05, "loss": 1.6212, "step": 78806 }, { "epoch": 2.62, "grad_norm": 0.7109842300415039, "learning_rate": 2.3208069084904357e-05, "loss": 1.7661, "step": 78807 }, { "epoch": 2.62, "grad_norm": 0.7286597490310669, "learning_rate": 2.3204038259572245e-05, "loss": 1.7155, "step": 78808 }, { "epoch": 2.62, "grad_norm": 0.7177203297615051, "learning_rate": 2.3200007770229767e-05, "loss": 1.6507, "step": 78809 }, { "epoch": 2.62, "grad_norm": 0.7123807668685913, "learning_rate": 2.319597761688159e-05, "loss": 1.7378, "step": 78810 }, { "epoch": 2.62, "grad_norm": 0.688378095626831, "learning_rate": 2.3191947799532708e-05, "loss": 1.7095, "step": 78811 }, { "epoch": 2.62, "grad_norm": 0.7086390852928162, "learning_rate": 2.3187918318188047e-05, "loss": 1.7114, "step": 78812 }, { "epoch": 2.62, "grad_norm": 0.695521891117096, "learning_rate": 2.3183889172852475e-05, "loss": 1.7648, "step": 78813 }, { "epoch": 2.62, "grad_norm": 0.7140052914619446, "learning_rate": 2.3179860363530856e-05, "loss": 1.7053, "step": 78814 }, { "epoch": 2.62, "grad_norm": 0.7021955847740173, "learning_rate": 2.3175831890228115e-05, "loss": 1.645, "step": 78815 }, { "epoch": 2.62, "grad_norm": 0.7088800072669983, "learning_rate": 2.3171803752949148e-05, "loss": 1.7248, "step": 78816 }, { "epoch": 2.62, "grad_norm": 0.700313150882721, "learning_rate": 2.3167775951698752e-05, "loss": 1.7636, "step": 78817 }, { "epoch": 2.62, "grad_norm": 0.7084886431694031, "learning_rate": 2.3163748486481892e-05, "loss": 1.685, "step": 78818 }, { "epoch": 2.62, "grad_norm": 0.6958144903182983, "learning_rate": 2.3159721357303527e-05, "loss": 1.6596, "step": 78819 }, { "epoch": 2.62, "grad_norm": 0.7050522565841675, "learning_rate": 2.3155694564168425e-05, "loss": 1.6773, "step": 78820 }, { "epoch": 2.62, "grad_norm": 0.7044867873191833, "learning_rate": 2.3151668107081512e-05, "loss": 1.6312, "step": 78821 }, { "epoch": 2.62, "grad_norm": 0.7195816040039062, "learning_rate": 2.3147641986047717e-05, "loss": 1.6352, "step": 78822 }, { "epoch": 2.62, "grad_norm": 0.717190682888031, "learning_rate": 2.314361620107187e-05, "loss": 1.7163, "step": 78823 }, { "epoch": 2.62, "grad_norm": 0.7012109756469727, "learning_rate": 2.3139590752158864e-05, "loss": 1.6746, "step": 78824 }, { "epoch": 2.62, "grad_norm": 0.7097101211547852, "learning_rate": 2.3135565639313635e-05, "loss": 1.6788, "step": 78825 }, { "epoch": 2.62, "grad_norm": 0.7030807733535767, "learning_rate": 2.3131540862540975e-05, "loss": 1.7335, "step": 78826 }, { "epoch": 2.62, "grad_norm": 0.7270830273628235, "learning_rate": 2.3127516421845883e-05, "loss": 1.6603, "step": 78827 }, { "epoch": 2.62, "grad_norm": 0.7164791822433472, "learning_rate": 2.312349231723315e-05, "loss": 1.7141, "step": 78828 }, { "epoch": 2.62, "grad_norm": 0.7205610275268555, "learning_rate": 2.3119468548707744e-05, "loss": 1.6469, "step": 78829 }, { "epoch": 2.62, "grad_norm": 0.7104995846748352, "learning_rate": 2.3115445116274454e-05, "loss": 1.6608, "step": 78830 }, { "epoch": 2.62, "grad_norm": 0.7167327404022217, "learning_rate": 2.3111422019938285e-05, "loss": 1.6485, "step": 78831 }, { "epoch": 2.62, "grad_norm": 0.6863881945610046, "learning_rate": 2.3107399259704028e-05, "loss": 1.612, "step": 78832 }, { "epoch": 2.62, "grad_norm": 0.7112023234367371, "learning_rate": 2.310337683557658e-05, "loss": 1.6792, "step": 78833 }, { "epoch": 2.62, "grad_norm": 0.7001024484634399, "learning_rate": 2.309935474756084e-05, "loss": 1.5824, "step": 78834 }, { "epoch": 2.62, "grad_norm": 0.7058193683624268, "learning_rate": 2.3095332995661696e-05, "loss": 1.6466, "step": 78835 }, { "epoch": 2.62, "grad_norm": 0.7327919006347656, "learning_rate": 2.3091311579883987e-05, "loss": 1.6539, "step": 78836 }, { "epoch": 2.62, "grad_norm": 0.7245978713035583, "learning_rate": 2.30872905002326e-05, "loss": 1.7102, "step": 78837 }, { "epoch": 2.62, "grad_norm": 0.6899964213371277, "learning_rate": 2.3083269756712508e-05, "loss": 1.5891, "step": 78838 }, { "epoch": 2.62, "grad_norm": 0.701267421245575, "learning_rate": 2.3079249349328532e-05, "loss": 1.6407, "step": 78839 }, { "epoch": 2.62, "grad_norm": 1.202019214630127, "learning_rate": 2.3075229278085472e-05, "loss": 1.7008, "step": 78840 }, { "epoch": 2.62, "grad_norm": 0.6790514588356018, "learning_rate": 2.3071209542988355e-05, "loss": 1.6447, "step": 78841 }, { "epoch": 2.62, "grad_norm": 0.6959930658340454, "learning_rate": 2.3067190144041948e-05, "loss": 1.6957, "step": 78842 }, { "epoch": 2.62, "grad_norm": 0.719508171081543, "learning_rate": 2.306317108125114e-05, "loss": 1.6261, "step": 78843 }, { "epoch": 2.62, "grad_norm": 0.7242447733879089, "learning_rate": 2.305915235462087e-05, "loss": 1.6725, "step": 78844 }, { "epoch": 2.62, "grad_norm": 0.7377966642379761, "learning_rate": 2.3055133964156024e-05, "loss": 1.7396, "step": 78845 }, { "epoch": 2.62, "grad_norm": 0.6947756409645081, "learning_rate": 2.3051115909861373e-05, "loss": 1.6401, "step": 78846 }, { "epoch": 2.62, "grad_norm": 0.7057902812957764, "learning_rate": 2.3047098191741875e-05, "loss": 1.6286, "step": 78847 }, { "epoch": 2.62, "grad_norm": 0.7318708896636963, "learning_rate": 2.304308080980243e-05, "loss": 1.6686, "step": 78848 }, { "epoch": 2.62, "grad_norm": 0.7052194476127625, "learning_rate": 2.303906376404786e-05, "loss": 1.6931, "step": 78849 }, { "epoch": 2.62, "grad_norm": 0.7067611813545227, "learning_rate": 2.3035047054483003e-05, "loss": 1.691, "step": 78850 }, { "epoch": 2.62, "grad_norm": 0.7063389420509338, "learning_rate": 2.3031030681112816e-05, "loss": 1.7597, "step": 78851 }, { "epoch": 2.62, "grad_norm": 0.7112835645675659, "learning_rate": 2.3027014643942232e-05, "loss": 1.6658, "step": 78852 }, { "epoch": 2.62, "grad_norm": 0.704351007938385, "learning_rate": 2.3022998942975947e-05, "loss": 1.6601, "step": 78853 }, { "epoch": 2.62, "grad_norm": 0.6948787569999695, "learning_rate": 2.3018983578218952e-05, "loss": 1.6025, "step": 78854 }, { "epoch": 2.62, "grad_norm": 0.7177066802978516, "learning_rate": 2.3014968549676117e-05, "loss": 1.6683, "step": 78855 }, { "epoch": 2.62, "grad_norm": 0.6956713795661926, "learning_rate": 2.301095385735233e-05, "loss": 1.7211, "step": 78856 }, { "epoch": 2.62, "grad_norm": 0.7276051044464111, "learning_rate": 2.3006939501252365e-05, "loss": 1.6199, "step": 78857 }, { "epoch": 2.62, "grad_norm": 0.7023156881332397, "learning_rate": 2.300292548138114e-05, "loss": 1.7477, "step": 78858 }, { "epoch": 2.62, "grad_norm": 0.7267508506774902, "learning_rate": 2.299891179774369e-05, "loss": 1.7282, "step": 78859 }, { "epoch": 2.62, "grad_norm": 0.7183918952941895, "learning_rate": 2.2994898450344613e-05, "loss": 1.6865, "step": 78860 }, { "epoch": 2.62, "grad_norm": 0.71817547082901, "learning_rate": 2.2990885439188934e-05, "loss": 1.7988, "step": 78861 }, { "epoch": 2.62, "grad_norm": 0.7215069532394409, "learning_rate": 2.298687276428155e-05, "loss": 1.6658, "step": 78862 }, { "epoch": 2.62, "grad_norm": 0.7315492033958435, "learning_rate": 2.298286042562729e-05, "loss": 1.7734, "step": 78863 }, { "epoch": 2.62, "grad_norm": 0.7325395941734314, "learning_rate": 2.2978848423230955e-05, "loss": 1.6783, "step": 78864 }, { "epoch": 2.62, "grad_norm": 0.7021291851997375, "learning_rate": 2.297483675709757e-05, "loss": 1.6268, "step": 78865 }, { "epoch": 2.62, "grad_norm": 0.6902428269386292, "learning_rate": 2.2970825427231864e-05, "loss": 1.6513, "step": 78866 }, { "epoch": 2.62, "grad_norm": 0.7155441641807556, "learning_rate": 2.2966814433638738e-05, "loss": 1.6925, "step": 78867 }, { "epoch": 2.62, "grad_norm": 0.7260235548019409, "learning_rate": 2.296280377632308e-05, "loss": 1.6664, "step": 78868 }, { "epoch": 2.62, "grad_norm": 0.6736770868301392, "learning_rate": 2.2958793455289827e-05, "loss": 1.6591, "step": 78869 }, { "epoch": 2.62, "grad_norm": 0.6956021785736084, "learning_rate": 2.2954783470543735e-05, "loss": 1.6417, "step": 78870 }, { "epoch": 2.62, "grad_norm": 0.7299568057060242, "learning_rate": 2.2950773822089674e-05, "loss": 1.6613, "step": 78871 }, { "epoch": 2.62, "grad_norm": 0.6893598437309265, "learning_rate": 2.2946764509932635e-05, "loss": 1.6254, "step": 78872 }, { "epoch": 2.62, "grad_norm": 0.7006946802139282, "learning_rate": 2.2942755534077384e-05, "loss": 1.6802, "step": 78873 }, { "epoch": 2.62, "grad_norm": 0.7333003878593445, "learning_rate": 2.2938746894528748e-05, "loss": 1.6945, "step": 78874 }, { "epoch": 2.62, "grad_norm": 0.7064536809921265, "learning_rate": 2.293473859129169e-05, "loss": 1.5988, "step": 78875 }, { "epoch": 2.62, "grad_norm": 0.6912420392036438, "learning_rate": 2.2930730624371007e-05, "loss": 1.6035, "step": 78876 }, { "epoch": 2.62, "grad_norm": 0.6990066766738892, "learning_rate": 2.292672299377163e-05, "loss": 1.7359, "step": 78877 }, { "epoch": 2.62, "grad_norm": 0.7260052561759949, "learning_rate": 2.2922715699498352e-05, "loss": 1.738, "step": 78878 }, { "epoch": 2.62, "grad_norm": 0.7158952355384827, "learning_rate": 2.29187087415561e-05, "loss": 1.6504, "step": 78879 }, { "epoch": 2.62, "grad_norm": 0.6891182065010071, "learning_rate": 2.2914702119949747e-05, "loss": 1.6897, "step": 78880 }, { "epoch": 2.62, "grad_norm": 0.6886728405952454, "learning_rate": 2.2910695834684044e-05, "loss": 1.6342, "step": 78881 }, { "epoch": 2.62, "grad_norm": 0.7059305906295776, "learning_rate": 2.290668988576396e-05, "loss": 1.7401, "step": 78882 }, { "epoch": 2.62, "grad_norm": 0.7223997116088867, "learning_rate": 2.2902684273194295e-05, "loss": 1.5729, "step": 78883 }, { "epoch": 2.62, "grad_norm": 0.6942862272262573, "learning_rate": 2.2898678996980003e-05, "loss": 1.6444, "step": 78884 }, { "epoch": 2.62, "grad_norm": 0.7186437845230103, "learning_rate": 2.2894674057125818e-05, "loss": 1.6976, "step": 78885 }, { "epoch": 2.62, "grad_norm": 0.7144260406494141, "learning_rate": 2.2890669453636734e-05, "loss": 1.7452, "step": 78886 }, { "epoch": 2.62, "grad_norm": 0.7195638418197632, "learning_rate": 2.288666518651755e-05, "loss": 1.6212, "step": 78887 }, { "epoch": 2.62, "grad_norm": 0.7113485336303711, "learning_rate": 2.288266125577306e-05, "loss": 1.6725, "step": 78888 }, { "epoch": 2.62, "grad_norm": 0.717841625213623, "learning_rate": 2.287865766140823e-05, "loss": 1.7768, "step": 78889 }, { "epoch": 2.62, "grad_norm": 0.7101327776908875, "learning_rate": 2.287465440342785e-05, "loss": 1.6877, "step": 78890 }, { "epoch": 2.62, "grad_norm": 0.6996835470199585, "learning_rate": 2.2870651481836854e-05, "loss": 1.6757, "step": 78891 }, { "epoch": 2.62, "grad_norm": 0.7205864191055298, "learning_rate": 2.286664889664004e-05, "loss": 1.664, "step": 78892 }, { "epoch": 2.62, "grad_norm": 0.7024679780006409, "learning_rate": 2.2862646647842233e-05, "loss": 1.6528, "step": 78893 }, { "epoch": 2.62, "grad_norm": 0.7173205018043518, "learning_rate": 2.2858644735448393e-05, "loss": 1.7321, "step": 78894 }, { "epoch": 2.62, "grad_norm": 0.704584002494812, "learning_rate": 2.2854643159463294e-05, "loss": 1.7085, "step": 78895 }, { "epoch": 2.62, "grad_norm": 0.7059791684150696, "learning_rate": 2.2850641919891855e-05, "loss": 1.6782, "step": 78896 }, { "epoch": 2.62, "grad_norm": 0.6968884468078613, "learning_rate": 2.2846641016738843e-05, "loss": 1.7164, "step": 78897 }, { "epoch": 2.62, "grad_norm": 0.7069227695465088, "learning_rate": 2.284264045000922e-05, "loss": 1.6455, "step": 78898 }, { "epoch": 2.62, "grad_norm": 0.7067323923110962, "learning_rate": 2.2838640219707815e-05, "loss": 1.693, "step": 78899 }, { "epoch": 2.63, "grad_norm": 0.7081724405288696, "learning_rate": 2.283464032583939e-05, "loss": 1.7549, "step": 78900 }, { "epoch": 2.63, "grad_norm": 0.7135059833526611, "learning_rate": 2.2830640768408914e-05, "loss": 1.7328, "step": 78901 }, { "epoch": 2.63, "grad_norm": 0.7313408255577087, "learning_rate": 2.2826641547421244e-05, "loss": 1.7328, "step": 78902 }, { "epoch": 2.63, "grad_norm": 0.705707848072052, "learning_rate": 2.282264266288111e-05, "loss": 1.6921, "step": 78903 }, { "epoch": 2.63, "grad_norm": 0.6977463364601135, "learning_rate": 2.2818644114793438e-05, "loss": 1.6511, "step": 78904 }, { "epoch": 2.63, "grad_norm": 0.724550724029541, "learning_rate": 2.2814645903163164e-05, "loss": 1.631, "step": 78905 }, { "epoch": 2.63, "grad_norm": 0.7226484417915344, "learning_rate": 2.2810648027995082e-05, "loss": 1.7465, "step": 78906 }, { "epoch": 2.63, "grad_norm": 0.7395158410072327, "learning_rate": 2.2806650489293955e-05, "loss": 1.7261, "step": 78907 }, { "epoch": 2.63, "grad_norm": 0.6966065764427185, "learning_rate": 2.2802653287064778e-05, "loss": 1.6529, "step": 78908 }, { "epoch": 2.63, "grad_norm": 0.7025400400161743, "learning_rate": 2.279865642131231e-05, "loss": 1.6708, "step": 78909 }, { "epoch": 2.63, "grad_norm": 0.7134425044059753, "learning_rate": 2.279465989204139e-05, "loss": 1.7588, "step": 78910 }, { "epoch": 2.63, "grad_norm": 0.7155897617340088, "learning_rate": 2.279066369925694e-05, "loss": 1.626, "step": 78911 }, { "epoch": 2.63, "grad_norm": 0.6759262681007385, "learning_rate": 2.2786667842963858e-05, "loss": 1.6766, "step": 78912 }, { "epoch": 2.63, "grad_norm": 0.7308523058891296, "learning_rate": 2.2782672323166805e-05, "loss": 1.6646, "step": 78913 }, { "epoch": 2.63, "grad_norm": 0.7184579968452454, "learning_rate": 2.277867713987075e-05, "loss": 1.6966, "step": 78914 }, { "epoch": 2.63, "grad_norm": 0.7017232179641724, "learning_rate": 2.277468229308058e-05, "loss": 1.6293, "step": 78915 }, { "epoch": 2.63, "grad_norm": 0.725395679473877, "learning_rate": 2.27706877828011e-05, "loss": 1.6231, "step": 78916 }, { "epoch": 2.63, "grad_norm": 0.7144104242324829, "learning_rate": 2.27666936090371e-05, "loss": 1.704, "step": 78917 }, { "epoch": 2.63, "grad_norm": 0.7137357592582703, "learning_rate": 2.2762699771793513e-05, "loss": 1.6856, "step": 78918 }, { "epoch": 2.63, "grad_norm": 0.7036300897598267, "learning_rate": 2.275870627107523e-05, "loss": 1.6624, "step": 78919 }, { "epoch": 2.63, "grad_norm": 0.707254946231842, "learning_rate": 2.275471310688696e-05, "loss": 1.6622, "step": 78920 }, { "epoch": 2.63, "grad_norm": 0.6984994411468506, "learning_rate": 2.275072027923358e-05, "loss": 1.6908, "step": 78921 }, { "epoch": 2.63, "grad_norm": 0.7208916544914246, "learning_rate": 2.2746727788120035e-05, "loss": 1.7393, "step": 78922 }, { "epoch": 2.63, "grad_norm": 0.706605076789856, "learning_rate": 2.2742735633551145e-05, "loss": 1.6651, "step": 78923 }, { "epoch": 2.63, "grad_norm": 0.7048531770706177, "learning_rate": 2.2738743815531647e-05, "loss": 1.6848, "step": 78924 }, { "epoch": 2.63, "grad_norm": 0.7134425044059753, "learning_rate": 2.273475233406643e-05, "loss": 1.6921, "step": 78925 }, { "epoch": 2.63, "grad_norm": 0.7083457112312317, "learning_rate": 2.273076118916053e-05, "loss": 1.7019, "step": 78926 }, { "epoch": 2.63, "grad_norm": 0.7120011448860168, "learning_rate": 2.2726770380818504e-05, "loss": 1.6995, "step": 78927 }, { "epoch": 2.63, "grad_norm": 0.7215861678123474, "learning_rate": 2.2722779909045318e-05, "loss": 1.6685, "step": 78928 }, { "epoch": 2.63, "grad_norm": 0.7068074941635132, "learning_rate": 2.271878977384587e-05, "loss": 1.7042, "step": 78929 }, { "epoch": 2.63, "grad_norm": 0.7199940085411072, "learning_rate": 2.2714799975224952e-05, "loss": 1.6471, "step": 78930 }, { "epoch": 2.63, "grad_norm": 0.7340061068534851, "learning_rate": 2.2710810513187393e-05, "loss": 1.7011, "step": 78931 }, { "epoch": 2.63, "grad_norm": 0.7256880402565002, "learning_rate": 2.270682138773806e-05, "loss": 1.722, "step": 78932 }, { "epoch": 2.63, "grad_norm": 0.6974218487739563, "learning_rate": 2.2702832598881814e-05, "loss": 1.6916, "step": 78933 }, { "epoch": 2.63, "grad_norm": 0.6908302307128906, "learning_rate": 2.269884414662342e-05, "loss": 1.6158, "step": 78934 }, { "epoch": 2.63, "grad_norm": 0.7236694097518921, "learning_rate": 2.2694856030967767e-05, "loss": 1.76, "step": 78935 }, { "epoch": 2.63, "grad_norm": 0.7133405804634094, "learning_rate": 2.2690868251919758e-05, "loss": 1.6912, "step": 78936 }, { "epoch": 2.63, "grad_norm": 0.7117382287979126, "learning_rate": 2.2686880809484187e-05, "loss": 1.6596, "step": 78937 }, { "epoch": 2.63, "grad_norm": 0.7210729718208313, "learning_rate": 2.2682893703665818e-05, "loss": 1.7048, "step": 78938 }, { "epoch": 2.63, "grad_norm": 0.710577130317688, "learning_rate": 2.267890693446961e-05, "loss": 1.6581, "step": 78939 }, { "epoch": 2.63, "grad_norm": 0.7074419856071472, "learning_rate": 2.267492050190033e-05, "loss": 1.6765, "step": 78940 }, { "epoch": 2.63, "grad_norm": 0.7150430083274841, "learning_rate": 2.2670934405962803e-05, "loss": 1.6551, "step": 78941 }, { "epoch": 2.63, "grad_norm": 0.7325825095176697, "learning_rate": 2.266694864666193e-05, "loss": 1.6438, "step": 78942 }, { "epoch": 2.63, "grad_norm": 0.7021491527557373, "learning_rate": 2.2662963224002473e-05, "loss": 1.685, "step": 78943 }, { "epoch": 2.63, "grad_norm": 0.6918379068374634, "learning_rate": 2.265897813798936e-05, "loss": 1.6447, "step": 78944 }, { "epoch": 2.63, "grad_norm": 0.6891142725944519, "learning_rate": 2.2654993388627352e-05, "loss": 1.6621, "step": 78945 }, { "epoch": 2.63, "grad_norm": 0.6928614974021912, "learning_rate": 2.265100897592138e-05, "loss": 1.6531, "step": 78946 }, { "epoch": 2.63, "grad_norm": 0.7011118531227112, "learning_rate": 2.2647024899876142e-05, "loss": 1.6862, "step": 78947 }, { "epoch": 2.63, "grad_norm": 0.7147833108901978, "learning_rate": 2.26430411604966e-05, "loss": 1.6205, "step": 78948 }, { "epoch": 2.63, "grad_norm": 0.6885058879852295, "learning_rate": 2.2639057757787547e-05, "loss": 1.6456, "step": 78949 }, { "epoch": 2.63, "grad_norm": 0.7148950099945068, "learning_rate": 2.263507469175375e-05, "loss": 1.7526, "step": 78950 }, { "epoch": 2.63, "grad_norm": 0.6871762871742249, "learning_rate": 2.2631091962400173e-05, "loss": 1.6213, "step": 78951 }, { "epoch": 2.63, "grad_norm": 0.7042432427406311, "learning_rate": 2.2627109569731572e-05, "loss": 1.7741, "step": 78952 }, { "epoch": 2.63, "grad_norm": 0.7309293746948242, "learning_rate": 2.2623127513752748e-05, "loss": 1.5991, "step": 78953 }, { "epoch": 2.63, "grad_norm": 1.3170735836029053, "learning_rate": 2.2619145794468564e-05, "loss": 1.7305, "step": 78954 }, { "epoch": 2.63, "grad_norm": 0.7094275951385498, "learning_rate": 2.2615164411883914e-05, "loss": 1.6732, "step": 78955 }, { "epoch": 2.63, "grad_norm": 0.7123085260391235, "learning_rate": 2.2611183366003592e-05, "loss": 1.7126, "step": 78956 }, { "epoch": 2.63, "grad_norm": 0.7158359885215759, "learning_rate": 2.2607202656832368e-05, "loss": 1.7518, "step": 78957 }, { "epoch": 2.63, "grad_norm": 0.7175272107124329, "learning_rate": 2.26032222843752e-05, "loss": 1.707, "step": 78958 }, { "epoch": 2.63, "grad_norm": 0.7024911642074585, "learning_rate": 2.259924224863682e-05, "loss": 1.6665, "step": 78959 }, { "epoch": 2.63, "grad_norm": 0.6926885843276978, "learning_rate": 2.259526254962205e-05, "loss": 1.6644, "step": 78960 }, { "epoch": 2.63, "grad_norm": 0.694881796836853, "learning_rate": 2.259128318733576e-05, "loss": 1.623, "step": 78961 }, { "epoch": 2.63, "grad_norm": 0.6744042038917542, "learning_rate": 2.2587304161782884e-05, "loss": 1.6984, "step": 78962 }, { "epoch": 2.63, "grad_norm": 0.7064793109893799, "learning_rate": 2.2583325472968005e-05, "loss": 1.6256, "step": 78963 }, { "epoch": 2.63, "grad_norm": 0.7014082670211792, "learning_rate": 2.257934712089613e-05, "loss": 1.6711, "step": 78964 }, { "epoch": 2.63, "grad_norm": 0.7021464109420776, "learning_rate": 2.2575369105572082e-05, "loss": 1.6238, "step": 78965 }, { "epoch": 2.63, "grad_norm": 0.7175878882408142, "learning_rate": 2.2571391427000697e-05, "loss": 1.6853, "step": 78966 }, { "epoch": 2.63, "grad_norm": 0.7312697172164917, "learning_rate": 2.2567414085186698e-05, "loss": 1.7337, "step": 78967 }, { "epoch": 2.63, "grad_norm": 0.7072484493255615, "learning_rate": 2.256343708013495e-05, "loss": 1.6174, "step": 78968 }, { "epoch": 2.63, "grad_norm": 0.7103313207626343, "learning_rate": 2.255946041185045e-05, "loss": 1.71, "step": 78969 }, { "epoch": 2.63, "grad_norm": 0.6997873783111572, "learning_rate": 2.255548408033776e-05, "loss": 1.6141, "step": 78970 }, { "epoch": 2.63, "grad_norm": 0.69577556848526, "learning_rate": 2.2551508085601845e-05, "loss": 1.6712, "step": 78971 }, { "epoch": 2.63, "grad_norm": 0.7184765338897705, "learning_rate": 2.2547532427647563e-05, "loss": 1.7391, "step": 78972 }, { "epoch": 2.63, "grad_norm": 0.6942214369773865, "learning_rate": 2.2543557106479713e-05, "loss": 1.6425, "step": 78973 }, { "epoch": 2.63, "grad_norm": 0.7106943130493164, "learning_rate": 2.253958212210303e-05, "loss": 1.7101, "step": 78974 }, { "epoch": 2.63, "grad_norm": 0.6937151551246643, "learning_rate": 2.2535607474522433e-05, "loss": 1.7181, "step": 78975 }, { "epoch": 2.63, "grad_norm": 0.6944449543952942, "learning_rate": 2.2531633163742822e-05, "loss": 1.7118, "step": 78976 }, { "epoch": 2.63, "grad_norm": 0.7096723914146423, "learning_rate": 2.25276591897688e-05, "loss": 1.7306, "step": 78977 }, { "epoch": 2.63, "grad_norm": 0.7369834184646606, "learning_rate": 2.2523685552605353e-05, "loss": 1.6559, "step": 78978 }, { "epoch": 2.63, "grad_norm": 0.6982733607292175, "learning_rate": 2.2519712252257284e-05, "loss": 1.7736, "step": 78979 }, { "epoch": 2.63, "grad_norm": 0.7127808332443237, "learning_rate": 2.2515739288729417e-05, "loss": 1.6979, "step": 78980 }, { "epoch": 2.63, "grad_norm": 0.7127320170402527, "learning_rate": 2.251176666202652e-05, "loss": 1.6223, "step": 78981 }, { "epoch": 2.63, "grad_norm": 0.7071551084518433, "learning_rate": 2.250779437215349e-05, "loss": 1.6986, "step": 78982 }, { "epoch": 2.63, "grad_norm": 0.7357059121131897, "learning_rate": 2.2503822419115114e-05, "loss": 1.7507, "step": 78983 }, { "epoch": 2.63, "grad_norm": 0.7048682570457458, "learning_rate": 2.2499850802916132e-05, "loss": 1.6277, "step": 78984 }, { "epoch": 2.63, "grad_norm": 0.6793912053108215, "learning_rate": 2.2495879523561468e-05, "loss": 1.7171, "step": 78985 }, { "epoch": 2.63, "grad_norm": 0.7400950789451599, "learning_rate": 2.2491908581055985e-05, "loss": 1.7091, "step": 78986 }, { "epoch": 2.63, "grad_norm": 0.6938204765319824, "learning_rate": 2.2487937975404412e-05, "loss": 1.688, "step": 78987 }, { "epoch": 2.63, "grad_norm": 0.6904908418655396, "learning_rate": 2.248396770661155e-05, "loss": 1.7006, "step": 78988 }, { "epoch": 2.63, "grad_norm": 0.7156710624694824, "learning_rate": 2.247999777468229e-05, "loss": 1.7128, "step": 78989 }, { "epoch": 2.63, "grad_norm": 0.7246941924095154, "learning_rate": 2.2476028179621464e-05, "loss": 1.6347, "step": 78990 }, { "epoch": 2.63, "grad_norm": 0.7044532895088196, "learning_rate": 2.2472058921433766e-05, "loss": 1.7346, "step": 78991 }, { "epoch": 2.63, "grad_norm": 0.7239593863487244, "learning_rate": 2.246809000012416e-05, "loss": 1.6617, "step": 78992 }, { "epoch": 2.63, "grad_norm": 0.7054013609886169, "learning_rate": 2.2464121415697345e-05, "loss": 1.6601, "step": 78993 }, { "epoch": 2.63, "grad_norm": 0.693656861782074, "learning_rate": 2.2460153168158246e-05, "loss": 1.7294, "step": 78994 }, { "epoch": 2.63, "grad_norm": 0.6957339644432068, "learning_rate": 2.2456185257511595e-05, "loss": 1.7049, "step": 78995 }, { "epoch": 2.63, "grad_norm": 0.7391618490219116, "learning_rate": 2.2452217683762252e-05, "loss": 1.7039, "step": 78996 }, { "epoch": 2.63, "grad_norm": 0.7434802651405334, "learning_rate": 2.244825044691505e-05, "loss": 1.6386, "step": 78997 }, { "epoch": 2.63, "grad_norm": 0.7013675570487976, "learning_rate": 2.244428354697472e-05, "loss": 1.6112, "step": 78998 }, { "epoch": 2.63, "grad_norm": 0.740553081035614, "learning_rate": 2.2440316983946184e-05, "loss": 1.8018, "step": 78999 }, { "epoch": 2.63, "grad_norm": 0.7046241164207458, "learning_rate": 2.2436350757834145e-05, "loss": 1.6265, "step": 79000 }, { "epoch": 2.63, "grad_norm": 0.6829661726951599, "learning_rate": 2.243238486864356e-05, "loss": 1.6134, "step": 79001 }, { "epoch": 2.63, "grad_norm": 0.7351341247558594, "learning_rate": 2.242841931637913e-05, "loss": 1.6552, "step": 79002 }, { "epoch": 2.63, "grad_norm": 0.7148488163948059, "learning_rate": 2.2424454101045687e-05, "loss": 1.6713, "step": 79003 }, { "epoch": 2.63, "grad_norm": 0.7015116810798645, "learning_rate": 2.242048922264805e-05, "loss": 1.6358, "step": 79004 }, { "epoch": 2.63, "grad_norm": 0.6968121528625488, "learning_rate": 2.241652468119103e-05, "loss": 1.6571, "step": 79005 }, { "epoch": 2.63, "grad_norm": 0.702935516834259, "learning_rate": 2.2412560476679507e-05, "loss": 1.727, "step": 79006 }, { "epoch": 2.63, "grad_norm": 0.7240926027297974, "learning_rate": 2.2408596609118157e-05, "loss": 1.6615, "step": 79007 }, { "epoch": 2.63, "grad_norm": 0.7243303656578064, "learning_rate": 2.2404633078511937e-05, "loss": 1.6486, "step": 79008 }, { "epoch": 2.63, "grad_norm": 0.7015345096588135, "learning_rate": 2.240066988486561e-05, "loss": 1.6998, "step": 79009 }, { "epoch": 2.63, "grad_norm": 0.7362373471260071, "learning_rate": 2.239670702818387e-05, "loss": 1.6729, "step": 79010 }, { "epoch": 2.63, "grad_norm": 0.6922997832298279, "learning_rate": 2.2392744508471717e-05, "loss": 1.6144, "step": 79011 }, { "epoch": 2.63, "grad_norm": 0.7276397943496704, "learning_rate": 2.238878232573378e-05, "loss": 1.6608, "step": 79012 }, { "epoch": 2.63, "grad_norm": 0.7060672044754028, "learning_rate": 2.2384820479975052e-05, "loss": 1.6235, "step": 79013 }, { "epoch": 2.63, "grad_norm": 0.6923472881317139, "learning_rate": 2.2380858971200167e-05, "loss": 1.67, "step": 79014 }, { "epoch": 2.63, "grad_norm": 0.7020918726921082, "learning_rate": 2.2376897799414084e-05, "loss": 1.5804, "step": 79015 }, { "epoch": 2.63, "grad_norm": 0.7041212916374207, "learning_rate": 2.2372936964621502e-05, "loss": 1.6327, "step": 79016 }, { "epoch": 2.63, "grad_norm": 0.7022578716278076, "learning_rate": 2.236897646682725e-05, "loss": 1.6244, "step": 79017 }, { "epoch": 2.63, "grad_norm": 0.702116072177887, "learning_rate": 2.2365016306036186e-05, "loss": 1.6668, "step": 79018 }, { "epoch": 2.63, "grad_norm": 0.7455970048904419, "learning_rate": 2.2361056482253114e-05, "loss": 1.6537, "step": 79019 }, { "epoch": 2.63, "grad_norm": 0.7326065897941589, "learning_rate": 2.2357096995482727e-05, "loss": 1.6998, "step": 79020 }, { "epoch": 2.63, "grad_norm": 0.6980347633361816, "learning_rate": 2.2353137845729952e-05, "loss": 1.6414, "step": 79021 }, { "epoch": 2.63, "grad_norm": 0.7042697072029114, "learning_rate": 2.2349179032999586e-05, "loss": 1.5839, "step": 79022 }, { "epoch": 2.63, "grad_norm": 0.7053683996200562, "learning_rate": 2.2345220557296395e-05, "loss": 1.7531, "step": 79023 }, { "epoch": 2.63, "grad_norm": 0.7258379459381104, "learning_rate": 2.2341262418625172e-05, "loss": 1.6156, "step": 79024 }, { "epoch": 2.63, "grad_norm": 0.707598865032196, "learning_rate": 2.233730461699078e-05, "loss": 1.6912, "step": 79025 }, { "epoch": 2.63, "grad_norm": 0.7200636863708496, "learning_rate": 2.2333347152397985e-05, "loss": 1.6806, "step": 79026 }, { "epoch": 2.63, "grad_norm": 0.7193307876586914, "learning_rate": 2.2329390024851546e-05, "loss": 1.6776, "step": 79027 }, { "epoch": 2.63, "grad_norm": 0.6963228583335876, "learning_rate": 2.2325433234356325e-05, "loss": 1.7149, "step": 79028 }, { "epoch": 2.63, "grad_norm": 0.7005972266197205, "learning_rate": 2.2321476780917225e-05, "loss": 1.7601, "step": 79029 }, { "epoch": 2.63, "grad_norm": 0.7364871501922607, "learning_rate": 2.23175206645388e-05, "loss": 1.7388, "step": 79030 }, { "epoch": 2.63, "grad_norm": 0.7084859609603882, "learning_rate": 2.231356488522602e-05, "loss": 1.6292, "step": 79031 }, { "epoch": 2.63, "grad_norm": 0.7013086676597595, "learning_rate": 2.2309609442983712e-05, "loss": 1.7329, "step": 79032 }, { "epoch": 2.63, "grad_norm": 0.7293899059295654, "learning_rate": 2.2305654337816604e-05, "loss": 1.6999, "step": 79033 }, { "epoch": 2.63, "grad_norm": 0.716522753238678, "learning_rate": 2.230169956972946e-05, "loss": 1.711, "step": 79034 }, { "epoch": 2.63, "grad_norm": 0.7348095178604126, "learning_rate": 2.2297745138727142e-05, "loss": 1.7121, "step": 79035 }, { "epoch": 2.63, "grad_norm": 0.7022486925125122, "learning_rate": 2.229379104481458e-05, "loss": 1.7157, "step": 79036 }, { "epoch": 2.63, "grad_norm": 0.7456014752388, "learning_rate": 2.2289837287996304e-05, "loss": 1.7202, "step": 79037 }, { "epoch": 2.63, "grad_norm": 0.7004421353340149, "learning_rate": 2.2285883868277245e-05, "loss": 1.6893, "step": 79038 }, { "epoch": 2.63, "grad_norm": 0.70166015625, "learning_rate": 2.2281930785662262e-05, "loss": 1.632, "step": 79039 }, { "epoch": 2.63, "grad_norm": 0.7032614350318909, "learning_rate": 2.2277978040156086e-05, "loss": 1.6589, "step": 79040 }, { "epoch": 2.63, "grad_norm": 0.6954192519187927, "learning_rate": 2.227402563176348e-05, "loss": 1.6252, "step": 79041 }, { "epoch": 2.63, "grad_norm": 0.694576621055603, "learning_rate": 2.2270073560489275e-05, "loss": 1.6514, "step": 79042 }, { "epoch": 2.63, "grad_norm": 0.7081131935119629, "learning_rate": 2.2266121826338402e-05, "loss": 1.6625, "step": 79043 }, { "epoch": 2.63, "grad_norm": 0.7492302060127258, "learning_rate": 2.2262170429315417e-05, "loss": 1.7061, "step": 79044 }, { "epoch": 2.63, "grad_norm": 0.6922487020492554, "learning_rate": 2.2258219369425222e-05, "loss": 1.6503, "step": 79045 }, { "epoch": 2.63, "grad_norm": 0.7158148884773254, "learning_rate": 2.225426864667271e-05, "loss": 1.6715, "step": 79046 }, { "epoch": 2.63, "grad_norm": 0.6843894720077515, "learning_rate": 2.2250318261062583e-05, "loss": 1.5869, "step": 79047 }, { "epoch": 2.63, "grad_norm": 0.7011853456497192, "learning_rate": 2.2246368212599564e-05, "loss": 1.7003, "step": 79048 }, { "epoch": 2.63, "grad_norm": 0.6905741095542908, "learning_rate": 2.2242418501288618e-05, "loss": 1.6743, "step": 79049 }, { "epoch": 2.63, "grad_norm": 0.6928272247314453, "learning_rate": 2.223846912713444e-05, "loss": 1.6201, "step": 79050 }, { "epoch": 2.63, "grad_norm": 0.7208255529403687, "learning_rate": 2.223452009014176e-05, "loss": 1.7287, "step": 79051 }, { "epoch": 2.63, "grad_norm": 0.683384895324707, "learning_rate": 2.2230571390315477e-05, "loss": 1.6401, "step": 79052 }, { "epoch": 2.63, "grad_norm": 0.7205036282539368, "learning_rate": 2.2226623027660384e-05, "loss": 1.6277, "step": 79053 }, { "epoch": 2.63, "grad_norm": 0.6880599856376648, "learning_rate": 2.2222675002181243e-05, "loss": 1.7202, "step": 79054 }, { "epoch": 2.63, "grad_norm": 0.6937928199768066, "learning_rate": 2.2218727313882822e-05, "loss": 1.5985, "step": 79055 }, { "epoch": 2.63, "grad_norm": 0.7310999035835266, "learning_rate": 2.2214779962769946e-05, "loss": 1.5797, "step": 79056 }, { "epoch": 2.63, "grad_norm": 0.7099958658218384, "learning_rate": 2.2210832948847444e-05, "loss": 1.6129, "step": 79057 }, { "epoch": 2.63, "grad_norm": 0.7089888453483582, "learning_rate": 2.2206886272119984e-05, "loss": 1.6799, "step": 79058 }, { "epoch": 2.63, "grad_norm": 0.6909119486808777, "learning_rate": 2.2202939932592457e-05, "loss": 1.6598, "step": 79059 }, { "epoch": 2.63, "grad_norm": 0.7096571326255798, "learning_rate": 2.2198993930269628e-05, "loss": 1.6694, "step": 79060 }, { "epoch": 2.63, "grad_norm": 0.7112791538238525, "learning_rate": 2.2195048265156323e-05, "loss": 1.7037, "step": 79061 }, { "epoch": 2.63, "grad_norm": 0.7229016423225403, "learning_rate": 2.219110293725721e-05, "loss": 1.6051, "step": 79062 }, { "epoch": 2.63, "grad_norm": 0.7160279750823975, "learning_rate": 2.218715794657725e-05, "loss": 1.6651, "step": 79063 }, { "epoch": 2.63, "grad_norm": 0.6983955502510071, "learning_rate": 2.2183213293121105e-05, "loss": 1.6467, "step": 79064 }, { "epoch": 2.63, "grad_norm": 0.7045159339904785, "learning_rate": 2.2179268976893637e-05, "loss": 1.7011, "step": 79065 }, { "epoch": 2.63, "grad_norm": 0.7069650888442993, "learning_rate": 2.2175324997899612e-05, "loss": 1.7024, "step": 79066 }, { "epoch": 2.63, "grad_norm": 0.6929616928100586, "learning_rate": 2.2171381356143758e-05, "loss": 1.7077, "step": 79067 }, { "epoch": 2.63, "grad_norm": 0.726484477519989, "learning_rate": 2.216743805163094e-05, "loss": 1.7369, "step": 79068 }, { "epoch": 2.63, "grad_norm": 0.7172524929046631, "learning_rate": 2.2163495084365944e-05, "loss": 1.7475, "step": 79069 }, { "epoch": 2.63, "grad_norm": 0.7460907697677612, "learning_rate": 2.2159552454353446e-05, "loss": 1.7793, "step": 79070 }, { "epoch": 2.63, "grad_norm": 0.6807305216789246, "learning_rate": 2.2155610161598337e-05, "loss": 1.6802, "step": 79071 }, { "epoch": 2.63, "grad_norm": 0.7148995995521545, "learning_rate": 2.215166820610541e-05, "loss": 1.6444, "step": 79072 }, { "epoch": 2.63, "grad_norm": 0.713009238243103, "learning_rate": 2.214772658787943e-05, "loss": 1.6998, "step": 79073 }, { "epoch": 2.63, "grad_norm": 0.7273699641227722, "learning_rate": 2.214378530692513e-05, "loss": 1.7259, "step": 79074 }, { "epoch": 2.63, "grad_norm": 0.6945869326591492, "learning_rate": 2.2139844363247374e-05, "loss": 1.6348, "step": 79075 }, { "epoch": 2.63, "grad_norm": 0.6767099499702454, "learning_rate": 2.2135903756850916e-05, "loss": 1.6294, "step": 79076 }, { "epoch": 2.63, "grad_norm": 0.7273815274238586, "learning_rate": 2.2131963487740456e-05, "loss": 1.6817, "step": 79077 }, { "epoch": 2.63, "grad_norm": 0.7004172801971436, "learning_rate": 2.212802355592089e-05, "loss": 1.7203, "step": 79078 }, { "epoch": 2.63, "grad_norm": 0.6999196410179138, "learning_rate": 2.2124083961397022e-05, "loss": 1.741, "step": 79079 }, { "epoch": 2.63, "grad_norm": 0.7165576219558716, "learning_rate": 2.2120144704173504e-05, "loss": 1.6674, "step": 79080 }, { "epoch": 2.63, "grad_norm": 0.7103376984596252, "learning_rate": 2.211620578425517e-05, "loss": 1.7352, "step": 79081 }, { "epoch": 2.63, "grad_norm": 0.7053493857383728, "learning_rate": 2.211226720164685e-05, "loss": 1.6706, "step": 79082 }, { "epoch": 2.63, "grad_norm": 0.6989637017250061, "learning_rate": 2.2108328956353306e-05, "loss": 1.6581, "step": 79083 }, { "epoch": 2.63, "grad_norm": 0.7127235531806946, "learning_rate": 2.2104391048379266e-05, "loss": 1.6605, "step": 79084 }, { "epoch": 2.63, "grad_norm": 0.6998273730278015, "learning_rate": 2.210045347772956e-05, "loss": 1.6865, "step": 79085 }, { "epoch": 2.63, "grad_norm": 0.7017351388931274, "learning_rate": 2.2096516244409024e-05, "loss": 1.6677, "step": 79086 }, { "epoch": 2.63, "grad_norm": 0.72539883852005, "learning_rate": 2.2092579348422278e-05, "loss": 1.6722, "step": 79087 }, { "epoch": 2.63, "grad_norm": 0.720737636089325, "learning_rate": 2.2088642789774224e-05, "loss": 1.7296, "step": 79088 }, { "epoch": 2.63, "grad_norm": 0.7209011316299438, "learning_rate": 2.2084706568469624e-05, "loss": 1.6448, "step": 79089 }, { "epoch": 2.63, "grad_norm": 0.7048086524009705, "learning_rate": 2.2080770684513273e-05, "loss": 1.7407, "step": 79090 }, { "epoch": 2.63, "grad_norm": 0.7067252397537231, "learning_rate": 2.2076835137909833e-05, "loss": 1.6743, "step": 79091 }, { "epoch": 2.63, "grad_norm": 0.7005746960639954, "learning_rate": 2.2072899928664166e-05, "loss": 1.6847, "step": 79092 }, { "epoch": 2.63, "grad_norm": 0.7173168063163757, "learning_rate": 2.2068965056781175e-05, "loss": 1.686, "step": 79093 }, { "epoch": 2.63, "grad_norm": 0.6947341561317444, "learning_rate": 2.2065030522265416e-05, "loss": 1.7007, "step": 79094 }, { "epoch": 2.63, "grad_norm": 0.6970395445823669, "learning_rate": 2.206109632512172e-05, "loss": 1.7145, "step": 79095 }, { "epoch": 2.63, "grad_norm": 0.7004928588867188, "learning_rate": 2.2057162465354984e-05, "loss": 1.6494, "step": 79096 }, { "epoch": 2.63, "grad_norm": 0.7363578081130981, "learning_rate": 2.2053228942969868e-05, "loss": 1.7155, "step": 79097 }, { "epoch": 2.63, "grad_norm": 0.7160245180130005, "learning_rate": 2.204929575797114e-05, "loss": 1.7288, "step": 79098 }, { "epoch": 2.63, "grad_norm": 0.6825883984565735, "learning_rate": 2.2045362910363697e-05, "loss": 1.631, "step": 79099 }, { "epoch": 2.63, "grad_norm": 0.691697359085083, "learning_rate": 2.2041430400152192e-05, "loss": 1.8009, "step": 79100 }, { "epoch": 2.63, "grad_norm": 0.7317186594009399, "learning_rate": 2.2037498227341398e-05, "loss": 1.674, "step": 79101 }, { "epoch": 2.63, "grad_norm": 0.8064430356025696, "learning_rate": 2.203356639193611e-05, "loss": 1.6715, "step": 79102 }, { "epoch": 2.63, "grad_norm": 0.7011322379112244, "learning_rate": 2.2029634893941184e-05, "loss": 1.7003, "step": 79103 }, { "epoch": 2.63, "grad_norm": 0.7101436257362366, "learning_rate": 2.2025703733361322e-05, "loss": 1.7041, "step": 79104 }, { "epoch": 2.63, "grad_norm": 0.6927158236503601, "learning_rate": 2.202177291020122e-05, "loss": 1.7198, "step": 79105 }, { "epoch": 2.63, "grad_norm": 0.6919114589691162, "learning_rate": 2.2017842424465804e-05, "loss": 1.6967, "step": 79106 }, { "epoch": 2.63, "grad_norm": 0.7036502361297607, "learning_rate": 2.2013912276159774e-05, "loss": 1.7067, "step": 79107 }, { "epoch": 2.63, "grad_norm": 0.739301323890686, "learning_rate": 2.2009982465287824e-05, "loss": 1.6537, "step": 79108 }, { "epoch": 2.63, "grad_norm": 0.6927520036697388, "learning_rate": 2.200605299185485e-05, "loss": 1.5999, "step": 79109 }, { "epoch": 2.63, "grad_norm": 0.7092453241348267, "learning_rate": 2.200212385586555e-05, "loss": 1.6645, "step": 79110 }, { "epoch": 2.63, "grad_norm": 0.70440274477005, "learning_rate": 2.199819505732472e-05, "loss": 1.7511, "step": 79111 }, { "epoch": 2.63, "grad_norm": 0.7031102180480957, "learning_rate": 2.1994266596237087e-05, "loss": 1.6657, "step": 79112 }, { "epoch": 2.63, "grad_norm": 0.704481303691864, "learning_rate": 2.1990338472607517e-05, "loss": 1.7041, "step": 79113 }, { "epoch": 2.63, "grad_norm": 0.7479228973388672, "learning_rate": 2.1986410686440668e-05, "loss": 1.758, "step": 79114 }, { "epoch": 2.63, "grad_norm": 0.6926924586296082, "learning_rate": 2.1982483237741344e-05, "loss": 1.617, "step": 79115 }, { "epoch": 2.63, "grad_norm": 0.7052071690559387, "learning_rate": 2.197855612651437e-05, "loss": 1.6868, "step": 79116 }, { "epoch": 2.63, "grad_norm": 0.696563184261322, "learning_rate": 2.1974629352764405e-05, "loss": 1.6513, "step": 79117 }, { "epoch": 2.63, "grad_norm": 0.7019625902175903, "learning_rate": 2.197070291649632e-05, "loss": 1.8035, "step": 79118 }, { "epoch": 2.63, "grad_norm": 0.6928296685218811, "learning_rate": 2.196677681771484e-05, "loss": 1.6479, "step": 79119 }, { "epoch": 2.63, "grad_norm": 0.7125601172447205, "learning_rate": 2.1962851056424658e-05, "loss": 1.6934, "step": 79120 }, { "epoch": 2.63, "grad_norm": 0.7154889702796936, "learning_rate": 2.1958925632630676e-05, "loss": 1.6729, "step": 79121 }, { "epoch": 2.63, "grad_norm": 0.7048255205154419, "learning_rate": 2.1955000546337554e-05, "loss": 1.619, "step": 79122 }, { "epoch": 2.63, "grad_norm": 0.7031285762786865, "learning_rate": 2.195107579755012e-05, "loss": 1.5819, "step": 79123 }, { "epoch": 2.63, "grad_norm": 0.7136786580085754, "learning_rate": 2.1947151386273077e-05, "loss": 1.7639, "step": 79124 }, { "epoch": 2.63, "grad_norm": 0.7085890769958496, "learning_rate": 2.1943227312511248e-05, "loss": 1.6993, "step": 79125 }, { "epoch": 2.63, "grad_norm": 0.7362212538719177, "learning_rate": 2.1939303576269396e-05, "loss": 1.7102, "step": 79126 }, { "epoch": 2.63, "grad_norm": 0.7277053594589233, "learning_rate": 2.193538017755222e-05, "loss": 1.7574, "step": 79127 }, { "epoch": 2.63, "grad_norm": 0.7222639322280884, "learning_rate": 2.1931457116364547e-05, "loss": 1.6225, "step": 79128 }, { "epoch": 2.63, "grad_norm": 0.6879972219467163, "learning_rate": 2.1927534392711077e-05, "loss": 1.714, "step": 79129 }, { "epoch": 2.63, "grad_norm": 0.7042703032493591, "learning_rate": 2.1923612006596636e-05, "loss": 1.7491, "step": 79130 }, { "epoch": 2.63, "grad_norm": 0.7121075391769409, "learning_rate": 2.191968995802592e-05, "loss": 1.7205, "step": 79131 }, { "epoch": 2.63, "grad_norm": 0.7036550045013428, "learning_rate": 2.1915768247003796e-05, "loss": 1.7274, "step": 79132 }, { "epoch": 2.63, "grad_norm": 0.7259431481361389, "learning_rate": 2.1911846873534956e-05, "loss": 1.6683, "step": 79133 }, { "epoch": 2.63, "grad_norm": 0.7090992331504822, "learning_rate": 2.1907925837624098e-05, "loss": 1.6876, "step": 79134 }, { "epoch": 2.63, "grad_norm": 0.726997971534729, "learning_rate": 2.1904005139276082e-05, "loss": 1.7104, "step": 79135 }, { "epoch": 2.63, "grad_norm": 0.7060258388519287, "learning_rate": 2.1900084778495642e-05, "loss": 1.5876, "step": 79136 }, { "epoch": 2.63, "grad_norm": 0.6980502009391785, "learning_rate": 2.1896164755287438e-05, "loss": 1.6142, "step": 79137 }, { "epoch": 2.63, "grad_norm": 0.733993649482727, "learning_rate": 2.1892245069656368e-05, "loss": 1.7379, "step": 79138 }, { "epoch": 2.63, "grad_norm": 0.6973732113838196, "learning_rate": 2.1888325721607158e-05, "loss": 1.6827, "step": 79139 }, { "epoch": 2.63, "grad_norm": 0.7043764591217041, "learning_rate": 2.1884406711144542e-05, "loss": 1.706, "step": 79140 }, { "epoch": 2.63, "grad_norm": 0.7334008812904358, "learning_rate": 2.1880488038273213e-05, "loss": 1.6901, "step": 79141 }, { "epoch": 2.63, "grad_norm": 0.7057526707649231, "learning_rate": 2.187656970299807e-05, "loss": 1.754, "step": 79142 }, { "epoch": 2.63, "grad_norm": 0.6822671890258789, "learning_rate": 2.1872651705323774e-05, "loss": 1.6773, "step": 79143 }, { "epoch": 2.63, "grad_norm": 0.7062817811965942, "learning_rate": 2.1868734045255055e-05, "loss": 1.7655, "step": 79144 }, { "epoch": 2.63, "grad_norm": 0.7097750306129456, "learning_rate": 2.186481672279671e-05, "loss": 1.6995, "step": 79145 }, { "epoch": 2.63, "grad_norm": 0.7028357982635498, "learning_rate": 2.1860899737953596e-05, "loss": 1.6695, "step": 79146 }, { "epoch": 2.63, "grad_norm": 0.7026415467262268, "learning_rate": 2.1856983090730284e-05, "loss": 1.6893, "step": 79147 }, { "epoch": 2.63, "grad_norm": 0.6913497447967529, "learning_rate": 2.18530667811316e-05, "loss": 1.681, "step": 79148 }, { "epoch": 2.63, "grad_norm": 0.7335169911384583, "learning_rate": 2.1849150809162342e-05, "loss": 1.6684, "step": 79149 }, { "epoch": 2.63, "grad_norm": 0.7177125215530396, "learning_rate": 2.184523517482727e-05, "loss": 1.697, "step": 79150 }, { "epoch": 2.63, "grad_norm": 0.7061675190925598, "learning_rate": 2.1841319878131016e-05, "loss": 1.6153, "step": 79151 }, { "epoch": 2.63, "grad_norm": 0.7177988290786743, "learning_rate": 2.183740491907844e-05, "loss": 1.6441, "step": 79152 }, { "epoch": 2.63, "grad_norm": 0.7018905282020569, "learning_rate": 2.183349029767434e-05, "loss": 1.58, "step": 79153 }, { "epoch": 2.63, "grad_norm": 0.7280373573303223, "learning_rate": 2.182957601392331e-05, "loss": 1.6987, "step": 79154 }, { "epoch": 2.63, "grad_norm": 0.6849190592765808, "learning_rate": 2.1825662067830184e-05, "loss": 1.7194, "step": 79155 }, { "epoch": 2.63, "grad_norm": 0.6984802484512329, "learning_rate": 2.182174845939979e-05, "loss": 1.719, "step": 79156 }, { "epoch": 2.63, "grad_norm": 0.7288265228271484, "learning_rate": 2.181783518863679e-05, "loss": 1.6369, "step": 79157 }, { "epoch": 2.63, "grad_norm": 0.6956774592399597, "learning_rate": 2.1813922255545878e-05, "loss": 1.5955, "step": 79158 }, { "epoch": 2.63, "grad_norm": 0.6914677619934082, "learning_rate": 2.1810009660131923e-05, "loss": 1.6266, "step": 79159 }, { "epoch": 2.63, "grad_norm": 0.7041899561882019, "learning_rate": 2.180609740239968e-05, "loss": 1.6604, "step": 79160 }, { "epoch": 2.63, "grad_norm": 0.6991369724273682, "learning_rate": 2.1802185482353786e-05, "loss": 1.6638, "step": 79161 }, { "epoch": 2.63, "grad_norm": 0.7033244967460632, "learning_rate": 2.179827389999903e-05, "loss": 1.6195, "step": 79162 }, { "epoch": 2.63, "grad_norm": 0.7012806534767151, "learning_rate": 2.1794362655340215e-05, "loss": 1.6945, "step": 79163 }, { "epoch": 2.63, "grad_norm": 0.6900011897087097, "learning_rate": 2.1790451748382066e-05, "loss": 1.6637, "step": 79164 }, { "epoch": 2.63, "grad_norm": 1.3128818273544312, "learning_rate": 2.1786541179129248e-05, "loss": 1.704, "step": 79165 }, { "epoch": 2.63, "grad_norm": 0.7300198078155518, "learning_rate": 2.1782630947586653e-05, "loss": 1.6339, "step": 79166 }, { "epoch": 2.63, "grad_norm": 0.7237817645072937, "learning_rate": 2.177872105375895e-05, "loss": 1.683, "step": 79167 }, { "epoch": 2.63, "grad_norm": 0.6974785327911377, "learning_rate": 2.17748114976508e-05, "loss": 1.6474, "step": 79168 }, { "epoch": 2.63, "grad_norm": 0.7540369629859924, "learning_rate": 2.1770902279267065e-05, "loss": 1.708, "step": 79169 }, { "epoch": 2.63, "grad_norm": 0.7169636487960815, "learning_rate": 2.176699339861251e-05, "loss": 1.667, "step": 79170 }, { "epoch": 2.63, "grad_norm": 0.7102590203285217, "learning_rate": 2.1763084855691824e-05, "loss": 1.7349, "step": 79171 }, { "epoch": 2.63, "grad_norm": 0.7354411482810974, "learning_rate": 2.1759176650509714e-05, "loss": 1.6745, "step": 79172 }, { "epoch": 2.63, "grad_norm": 0.7204651236534119, "learning_rate": 2.1755268783071e-05, "loss": 1.6582, "step": 79173 }, { "epoch": 2.63, "grad_norm": 0.7205672860145569, "learning_rate": 2.175136125338035e-05, "loss": 1.7019, "step": 79174 }, { "epoch": 2.63, "grad_norm": 0.7090582847595215, "learning_rate": 2.1747454061442593e-05, "loss": 1.6117, "step": 79175 }, { "epoch": 2.63, "grad_norm": 0.7039249539375305, "learning_rate": 2.1743547207262424e-05, "loss": 1.6505, "step": 79176 }, { "epoch": 2.63, "grad_norm": 0.7059487104415894, "learning_rate": 2.173964069084457e-05, "loss": 1.7067, "step": 79177 }, { "epoch": 2.63, "grad_norm": 0.7245348691940308, "learning_rate": 2.1735734512193802e-05, "loss": 1.7165, "step": 79178 }, { "epoch": 2.63, "grad_norm": 0.6860218644142151, "learning_rate": 2.1731828671314845e-05, "loss": 1.6676, "step": 79179 }, { "epoch": 2.63, "grad_norm": 0.7195138335227966, "learning_rate": 2.1727923168212458e-05, "loss": 1.6614, "step": 79180 }, { "epoch": 2.63, "grad_norm": 0.7154448628425598, "learning_rate": 2.172401800289134e-05, "loss": 1.6821, "step": 79181 }, { "epoch": 2.63, "grad_norm": 0.720504641532898, "learning_rate": 2.1720113175356323e-05, "loss": 1.6654, "step": 79182 }, { "epoch": 2.63, "grad_norm": 0.7466844320297241, "learning_rate": 2.171620868561207e-05, "loss": 1.6581, "step": 79183 }, { "epoch": 2.63, "grad_norm": 0.6874832510948181, "learning_rate": 2.1712304533663272e-05, "loss": 1.6642, "step": 79184 }, { "epoch": 2.63, "grad_norm": 0.6976184248924255, "learning_rate": 2.1708400719514828e-05, "loss": 1.6271, "step": 79185 }, { "epoch": 2.63, "grad_norm": 0.6962640285491943, "learning_rate": 2.170449724317134e-05, "loss": 1.7181, "step": 79186 }, { "epoch": 2.63, "grad_norm": 0.7113586068153381, "learning_rate": 2.170059410463756e-05, "loss": 1.6997, "step": 79187 }, { "epoch": 2.63, "grad_norm": 0.6844439506530762, "learning_rate": 2.1696691303918224e-05, "loss": 1.6628, "step": 79188 }, { "epoch": 2.63, "grad_norm": 0.7070878744125366, "learning_rate": 2.1692788841018194e-05, "loss": 1.6843, "step": 79189 }, { "epoch": 2.63, "grad_norm": 0.7202548384666443, "learning_rate": 2.1688886715942066e-05, "loss": 1.7184, "step": 79190 }, { "epoch": 2.63, "grad_norm": 0.6795023679733276, "learning_rate": 2.1684984928694604e-05, "loss": 1.6681, "step": 79191 }, { "epoch": 2.63, "grad_norm": 0.7170662879943848, "learning_rate": 2.1681083479280605e-05, "loss": 1.7137, "step": 79192 }, { "epoch": 2.63, "grad_norm": 0.6936640739440918, "learning_rate": 2.167718236770476e-05, "loss": 1.6522, "step": 79193 }, { "epoch": 2.63, "grad_norm": 0.713019847869873, "learning_rate": 2.1673281593971737e-05, "loss": 1.6911, "step": 79194 }, { "epoch": 2.63, "grad_norm": 0.7329114675521851, "learning_rate": 2.1669381158086364e-05, "loss": 1.642, "step": 79195 }, { "epoch": 2.63, "grad_norm": 0.6889581680297852, "learning_rate": 2.1665481060053436e-05, "loss": 1.622, "step": 79196 }, { "epoch": 2.63, "grad_norm": 0.7233296036720276, "learning_rate": 2.1661581299877518e-05, "loss": 1.703, "step": 79197 }, { "epoch": 2.63, "grad_norm": 0.6874685883522034, "learning_rate": 2.165768187756344e-05, "loss": 1.6728, "step": 79198 }, { "epoch": 2.63, "grad_norm": 0.6912566423416138, "learning_rate": 2.165378279311596e-05, "loss": 1.6788, "step": 79199 }, { "epoch": 2.63, "grad_norm": 0.7046595215797424, "learning_rate": 2.164988404653978e-05, "loss": 1.7255, "step": 79200 }, { "epoch": 2.64, "grad_norm": 0.7107841968536377, "learning_rate": 2.1645985637839558e-05, "loss": 1.6535, "step": 79201 }, { "epoch": 2.64, "grad_norm": 0.7243039608001709, "learning_rate": 2.1642087567020127e-05, "loss": 1.7143, "step": 79202 }, { "epoch": 2.64, "grad_norm": 0.7027750015258789, "learning_rate": 2.163818983408625e-05, "loss": 1.7147, "step": 79203 }, { "epoch": 2.64, "grad_norm": 0.6878147125244141, "learning_rate": 2.1634292439042522e-05, "loss": 1.6885, "step": 79204 }, { "epoch": 2.64, "grad_norm": 0.687904953956604, "learning_rate": 2.163039538189374e-05, "loss": 1.6783, "step": 79205 }, { "epoch": 2.64, "grad_norm": 0.7132155299186707, "learning_rate": 2.1626498662644698e-05, "loss": 1.6033, "step": 79206 }, { "epoch": 2.64, "grad_norm": 0.7004565000534058, "learning_rate": 2.1622602281300063e-05, "loss": 1.7057, "step": 79207 }, { "epoch": 2.64, "grad_norm": 0.7114771008491516, "learning_rate": 2.1618706237864524e-05, "loss": 1.7197, "step": 79208 }, { "epoch": 2.64, "grad_norm": 0.6927535533905029, "learning_rate": 2.1614810532342852e-05, "loss": 1.7013, "step": 79209 }, { "epoch": 2.64, "grad_norm": 0.6962171792984009, "learning_rate": 2.1610915164739907e-05, "loss": 1.6983, "step": 79210 }, { "epoch": 2.64, "grad_norm": 0.7144179940223694, "learning_rate": 2.1607020135060182e-05, "loss": 1.6493, "step": 79211 }, { "epoch": 2.64, "grad_norm": 0.7044366598129272, "learning_rate": 2.160312544330851e-05, "loss": 1.6756, "step": 79212 }, { "epoch": 2.64, "grad_norm": 0.748313307762146, "learning_rate": 2.1599231089489654e-05, "loss": 1.7445, "step": 79213 }, { "epoch": 2.64, "grad_norm": 0.6977949738502502, "learning_rate": 2.1595337073608343e-05, "loss": 1.6692, "step": 79214 }, { "epoch": 2.64, "grad_norm": 0.7128545641899109, "learning_rate": 2.1591443395669207e-05, "loss": 1.7346, "step": 79215 }, { "epoch": 2.64, "grad_norm": 0.7228742241859436, "learning_rate": 2.158755005567707e-05, "loss": 1.7074, "step": 79216 }, { "epoch": 2.64, "grad_norm": 0.7183970808982849, "learning_rate": 2.158365705363667e-05, "loss": 1.6823, "step": 79217 }, { "epoch": 2.64, "grad_norm": 0.7007593512535095, "learning_rate": 2.15797643895526e-05, "loss": 1.7056, "step": 79218 }, { "epoch": 2.64, "grad_norm": 0.7189680933952332, "learning_rate": 2.1575872063429687e-05, "loss": 1.6063, "step": 79219 }, { "epoch": 2.64, "grad_norm": 0.6856628656387329, "learning_rate": 2.1571980075272665e-05, "loss": 1.5967, "step": 79220 }, { "epoch": 2.64, "grad_norm": 0.714914083480835, "learning_rate": 2.156808842508626e-05, "loss": 1.6646, "step": 79221 }, { "epoch": 2.64, "grad_norm": 0.6967613101005554, "learning_rate": 2.1564197112875137e-05, "loss": 1.6689, "step": 79222 }, { "epoch": 2.64, "grad_norm": 0.680988609790802, "learning_rate": 2.156030613864409e-05, "loss": 1.6441, "step": 79223 }, { "epoch": 2.64, "grad_norm": 0.7278087735176086, "learning_rate": 2.1556415502397782e-05, "loss": 1.7113, "step": 79224 }, { "epoch": 2.64, "grad_norm": 0.6835595965385437, "learning_rate": 2.1552525204140947e-05, "loss": 1.6396, "step": 79225 }, { "epoch": 2.64, "grad_norm": 0.6948255300521851, "learning_rate": 2.1548635243878344e-05, "loss": 1.6453, "step": 79226 }, { "epoch": 2.64, "grad_norm": 0.7235782742500305, "learning_rate": 2.15447456216146e-05, "loss": 1.7304, "step": 79227 }, { "epoch": 2.64, "grad_norm": 0.7155859470367432, "learning_rate": 2.1540856337354583e-05, "loss": 1.6927, "step": 79228 }, { "epoch": 2.64, "grad_norm": 0.6927465796470642, "learning_rate": 2.1536967391102888e-05, "loss": 1.6721, "step": 79229 }, { "epoch": 2.64, "grad_norm": 0.7153303027153015, "learning_rate": 2.1533078782864342e-05, "loss": 1.7243, "step": 79230 }, { "epoch": 2.64, "grad_norm": 0.7052556872367859, "learning_rate": 2.152919051264361e-05, "loss": 1.7159, "step": 79231 }, { "epoch": 2.64, "grad_norm": 0.7139330506324768, "learning_rate": 2.1525302580445323e-05, "loss": 1.7119, "step": 79232 }, { "epoch": 2.64, "grad_norm": 0.7158292531967163, "learning_rate": 2.1521414986274378e-05, "loss": 1.7076, "step": 79233 }, { "epoch": 2.64, "grad_norm": 0.7000029683113098, "learning_rate": 2.1517527730135332e-05, "loss": 1.7292, "step": 79234 }, { "epoch": 2.64, "grad_norm": 0.7094786763191223, "learning_rate": 2.151364081203305e-05, "loss": 1.6274, "step": 79235 }, { "epoch": 2.64, "grad_norm": 0.698040246963501, "learning_rate": 2.1509754231972132e-05, "loss": 1.6313, "step": 79236 }, { "epoch": 2.64, "grad_norm": 0.7044166326522827, "learning_rate": 2.1505867989957336e-05, "loss": 1.7219, "step": 79237 }, { "epoch": 2.64, "grad_norm": 0.7346262335777283, "learning_rate": 2.1501982085993396e-05, "loss": 1.6658, "step": 79238 }, { "epoch": 2.64, "grad_norm": 0.7283273339271545, "learning_rate": 2.1498096520084974e-05, "loss": 1.6681, "step": 79239 }, { "epoch": 2.64, "grad_norm": 0.6939628720283508, "learning_rate": 2.1494211292236895e-05, "loss": 1.6182, "step": 79240 }, { "epoch": 2.64, "grad_norm": 0.7491776943206787, "learning_rate": 2.1490326402453726e-05, "loss": 1.6332, "step": 79241 }, { "epoch": 2.64, "grad_norm": 0.7357062697410583, "learning_rate": 2.1486441850740333e-05, "loss": 1.692, "step": 79242 }, { "epoch": 2.64, "grad_norm": 0.7145975232124329, "learning_rate": 2.1482557637101373e-05, "loss": 1.6587, "step": 79243 }, { "epoch": 2.64, "grad_norm": 0.7208654880523682, "learning_rate": 2.1478673761541476e-05, "loss": 1.672, "step": 79244 }, { "epoch": 2.64, "grad_norm": 0.7463023662567139, "learning_rate": 2.147479022406551e-05, "loss": 1.6725, "step": 79245 }, { "epoch": 2.64, "grad_norm": 0.7321244478225708, "learning_rate": 2.1470907024678095e-05, "loss": 1.7043, "step": 79246 }, { "epoch": 2.64, "grad_norm": 0.7284584045410156, "learning_rate": 2.1467024163383906e-05, "loss": 1.7249, "step": 79247 }, { "epoch": 2.64, "grad_norm": 0.6802793145179749, "learning_rate": 2.1463141640187698e-05, "loss": 1.6159, "step": 79248 }, { "epoch": 2.64, "grad_norm": 0.6853561401367188, "learning_rate": 2.145925945509427e-05, "loss": 1.6163, "step": 79249 }, { "epoch": 2.64, "grad_norm": 0.7006123661994934, "learning_rate": 2.1455377608108248e-05, "loss": 1.6569, "step": 79250 }, { "epoch": 2.64, "grad_norm": 0.6904434561729431, "learning_rate": 2.14514960992343e-05, "loss": 1.7261, "step": 79251 }, { "epoch": 2.64, "grad_norm": 0.7371313571929932, "learning_rate": 2.144761492847725e-05, "loss": 1.7115, "step": 79252 }, { "epoch": 2.64, "grad_norm": 0.7175975441932678, "learning_rate": 2.144373409584177e-05, "loss": 1.5752, "step": 79253 }, { "epoch": 2.64, "grad_norm": 0.7186074256896973, "learning_rate": 2.143985360133248e-05, "loss": 1.6374, "step": 79254 }, { "epoch": 2.64, "grad_norm": 0.7083320021629333, "learning_rate": 2.143597344495418e-05, "loss": 1.5475, "step": 79255 }, { "epoch": 2.64, "grad_norm": 0.7054367661476135, "learning_rate": 2.14320936267116e-05, "loss": 1.7191, "step": 79256 }, { "epoch": 2.64, "grad_norm": 0.7047090530395508, "learning_rate": 2.1428214146609434e-05, "loss": 1.7049, "step": 79257 }, { "epoch": 2.64, "grad_norm": 0.7211371660232544, "learning_rate": 2.142433500465228e-05, "loss": 1.6726, "step": 79258 }, { "epoch": 2.64, "grad_norm": 0.7060158252716064, "learning_rate": 2.1420456200845036e-05, "loss": 1.7422, "step": 79259 }, { "epoch": 2.64, "grad_norm": 0.6988440752029419, "learning_rate": 2.1416577735192263e-05, "loss": 1.6696, "step": 79260 }, { "epoch": 2.64, "grad_norm": 0.736777663230896, "learning_rate": 2.1412699607698692e-05, "loss": 1.7717, "step": 79261 }, { "epoch": 2.64, "grad_norm": 0.7306137681007385, "learning_rate": 2.1408821818369083e-05, "loss": 1.6935, "step": 79262 }, { "epoch": 2.64, "grad_norm": 0.678089439868927, "learning_rate": 2.1404944367208165e-05, "loss": 1.705, "step": 79263 }, { "epoch": 2.64, "grad_norm": 0.6975833177566528, "learning_rate": 2.1401067254220538e-05, "loss": 1.6156, "step": 79264 }, { "epoch": 2.64, "grad_norm": 0.7194300293922424, "learning_rate": 2.139719047941093e-05, "loss": 1.6924, "step": 79265 }, { "epoch": 2.64, "grad_norm": 0.7207329869270325, "learning_rate": 2.1393314042784138e-05, "loss": 1.6494, "step": 79266 }, { "epoch": 2.64, "grad_norm": 0.7275721430778503, "learning_rate": 2.1389437944344822e-05, "loss": 1.636, "step": 79267 }, { "epoch": 2.64, "grad_norm": 0.7272615432739258, "learning_rate": 2.1385562184097648e-05, "loss": 1.7022, "step": 79268 }, { "epoch": 2.64, "grad_norm": 0.6929265856742859, "learning_rate": 2.1381686762047313e-05, "loss": 1.6552, "step": 79269 }, { "epoch": 2.64, "grad_norm": 0.7183367609977722, "learning_rate": 2.1377811678198675e-05, "loss": 1.7104, "step": 79270 }, { "epoch": 2.64, "grad_norm": 0.727273166179657, "learning_rate": 2.1373936932556235e-05, "loss": 1.7113, "step": 79271 }, { "epoch": 2.64, "grad_norm": 0.7043347358703613, "learning_rate": 2.1370062525124754e-05, "loss": 1.6685, "step": 79272 }, { "epoch": 2.64, "grad_norm": 0.7293225526809692, "learning_rate": 2.136618845590903e-05, "loss": 1.7039, "step": 79273 }, { "epoch": 2.64, "grad_norm": 0.7141817808151245, "learning_rate": 2.136231472491369e-05, "loss": 1.6998, "step": 79274 }, { "epoch": 2.64, "grad_norm": 0.7233133912086487, "learning_rate": 2.1358441332143395e-05, "loss": 1.6463, "step": 79275 }, { "epoch": 2.64, "grad_norm": 0.7173207998275757, "learning_rate": 2.1354568277602912e-05, "loss": 1.638, "step": 79276 }, { "epoch": 2.64, "grad_norm": 0.6870747804641724, "learning_rate": 2.1350695561297003e-05, "loss": 1.674, "step": 79277 }, { "epoch": 2.64, "grad_norm": 0.7336708307266235, "learning_rate": 2.134682318323023e-05, "loss": 1.7001, "step": 79278 }, { "epoch": 2.64, "grad_norm": 0.7153046727180481, "learning_rate": 2.1342951143407328e-05, "loss": 1.6945, "step": 79279 }, { "epoch": 2.64, "grad_norm": 0.7165770530700684, "learning_rate": 2.1339079441833052e-05, "loss": 1.677, "step": 79280 }, { "epoch": 2.64, "grad_norm": 0.7172732949256897, "learning_rate": 2.13352080785121e-05, "loss": 1.6546, "step": 79281 }, { "epoch": 2.64, "grad_norm": 0.6984327435493469, "learning_rate": 2.1331337053449105e-05, "loss": 1.6986, "step": 79282 }, { "epoch": 2.64, "grad_norm": 0.7180614471435547, "learning_rate": 2.132746636664886e-05, "loss": 1.739, "step": 79283 }, { "epoch": 2.64, "grad_norm": 0.7027237415313721, "learning_rate": 2.1323596018115994e-05, "loss": 1.6553, "step": 79284 }, { "epoch": 2.64, "grad_norm": 0.7057167291641235, "learning_rate": 2.1319726007855177e-05, "loss": 1.6944, "step": 79285 }, { "epoch": 2.64, "grad_norm": 0.7226808071136475, "learning_rate": 2.1315856335871162e-05, "loss": 1.6938, "step": 79286 }, { "epoch": 2.64, "grad_norm": 0.7158337235450745, "learning_rate": 2.131198700216865e-05, "loss": 1.7103, "step": 79287 }, { "epoch": 2.64, "grad_norm": 0.7184257507324219, "learning_rate": 2.130811800675234e-05, "loss": 1.6293, "step": 79288 }, { "epoch": 2.64, "grad_norm": 0.7268785238265991, "learning_rate": 2.130424934962689e-05, "loss": 1.5943, "step": 79289 }, { "epoch": 2.64, "grad_norm": 0.7340691089630127, "learning_rate": 2.1300381030797032e-05, "loss": 1.6877, "step": 79290 }, { "epoch": 2.64, "grad_norm": 0.719219446182251, "learning_rate": 2.129651305026743e-05, "loss": 1.6731, "step": 79291 }, { "epoch": 2.64, "grad_norm": 0.7047850489616394, "learning_rate": 2.1292645408042808e-05, "loss": 1.6028, "step": 79292 }, { "epoch": 2.64, "grad_norm": 0.6947126984596252, "learning_rate": 2.128877810412787e-05, "loss": 1.6398, "step": 79293 }, { "epoch": 2.64, "grad_norm": 0.7059647440910339, "learning_rate": 2.1284911138527238e-05, "loss": 1.5911, "step": 79294 }, { "epoch": 2.64, "grad_norm": 0.7174398899078369, "learning_rate": 2.1281044511245714e-05, "loss": 1.6053, "step": 79295 }, { "epoch": 2.64, "grad_norm": 0.7075752019882202, "learning_rate": 2.127717822228786e-05, "loss": 1.614, "step": 79296 }, { "epoch": 2.64, "grad_norm": 0.6993440389633179, "learning_rate": 2.1273312271658504e-05, "loss": 1.6541, "step": 79297 }, { "epoch": 2.64, "grad_norm": 0.7205597162246704, "learning_rate": 2.1269446659362243e-05, "loss": 1.7157, "step": 79298 }, { "epoch": 2.64, "grad_norm": 0.6805425882339478, "learning_rate": 2.126558138540384e-05, "loss": 1.6439, "step": 79299 }, { "epoch": 2.64, "grad_norm": 0.7246856093406677, "learning_rate": 2.1261716449787957e-05, "loss": 1.7704, "step": 79300 }, { "epoch": 2.64, "grad_norm": 0.722633957862854, "learning_rate": 2.1257851852519225e-05, "loss": 1.6668, "step": 79301 }, { "epoch": 2.64, "grad_norm": 0.6744123101234436, "learning_rate": 2.1253987593602473e-05, "loss": 1.6946, "step": 79302 }, { "epoch": 2.64, "grad_norm": 0.7049115300178528, "learning_rate": 2.1250123673042264e-05, "loss": 1.6277, "step": 79303 }, { "epoch": 2.64, "grad_norm": 0.7104094624519348, "learning_rate": 2.124626009084329e-05, "loss": 1.6976, "step": 79304 }, { "epoch": 2.64, "grad_norm": 0.7148633599281311, "learning_rate": 2.1242396847010322e-05, "loss": 1.7166, "step": 79305 }, { "epoch": 2.64, "grad_norm": 0.6846765875816345, "learning_rate": 2.1238533941548018e-05, "loss": 1.6946, "step": 79306 }, { "epoch": 2.64, "grad_norm": 0.7006855010986328, "learning_rate": 2.1234671374461078e-05, "loss": 1.7944, "step": 79307 }, { "epoch": 2.64, "grad_norm": 0.717258095741272, "learning_rate": 2.1230809145754124e-05, "loss": 1.6807, "step": 79308 }, { "epoch": 2.64, "grad_norm": 0.7298189997673035, "learning_rate": 2.1226947255431927e-05, "loss": 1.7005, "step": 79309 }, { "epoch": 2.64, "grad_norm": 0.6938748359680176, "learning_rate": 2.1223085703499175e-05, "loss": 1.6631, "step": 79310 }, { "epoch": 2.64, "grad_norm": 0.7182602286338806, "learning_rate": 2.121922448996044e-05, "loss": 1.6747, "step": 79311 }, { "epoch": 2.64, "grad_norm": 0.7150106430053711, "learning_rate": 2.1215363614820514e-05, "loss": 1.7394, "step": 79312 }, { "epoch": 2.64, "grad_norm": 0.7146007418632507, "learning_rate": 2.1211503078084125e-05, "loss": 1.7518, "step": 79313 }, { "epoch": 2.64, "grad_norm": 0.7160328030586243, "learning_rate": 2.1207642879755804e-05, "loss": 1.6479, "step": 79314 }, { "epoch": 2.64, "grad_norm": 0.7190636396408081, "learning_rate": 2.120378301984035e-05, "loss": 1.7148, "step": 79315 }, { "epoch": 2.64, "grad_norm": 0.705231785774231, "learning_rate": 2.1199923498342452e-05, "loss": 1.6692, "step": 79316 }, { "epoch": 2.64, "grad_norm": 0.6941870450973511, "learning_rate": 2.119606431526678e-05, "loss": 1.6684, "step": 79317 }, { "epoch": 2.64, "grad_norm": 0.7226703763008118, "learning_rate": 2.1192205470617964e-05, "loss": 1.6156, "step": 79318 }, { "epoch": 2.64, "grad_norm": 0.7136737108230591, "learning_rate": 2.1188346964400727e-05, "loss": 1.6681, "step": 79319 }, { "epoch": 2.64, "grad_norm": 0.6915284991264343, "learning_rate": 2.1184488796619837e-05, "loss": 1.6343, "step": 79320 }, { "epoch": 2.64, "grad_norm": 0.7095453143119812, "learning_rate": 2.1180630967279788e-05, "loss": 1.5942, "step": 79321 }, { "epoch": 2.64, "grad_norm": 0.6970566511154175, "learning_rate": 2.117677347638541e-05, "loss": 1.6459, "step": 79322 }, { "epoch": 2.64, "grad_norm": 0.6927213072776794, "learning_rate": 2.117291632394137e-05, "loss": 1.7099, "step": 79323 }, { "epoch": 2.64, "grad_norm": 0.7104623317718506, "learning_rate": 2.1169059509952325e-05, "loss": 1.6901, "step": 79324 }, { "epoch": 2.64, "grad_norm": 0.7135089635848999, "learning_rate": 2.1165203034422938e-05, "loss": 1.6193, "step": 79325 }, { "epoch": 2.64, "grad_norm": 0.691771924495697, "learning_rate": 2.1161346897357878e-05, "loss": 1.6993, "step": 79326 }, { "epoch": 2.64, "grad_norm": 0.6951087713241577, "learning_rate": 2.115749109876197e-05, "loss": 1.6347, "step": 79327 }, { "epoch": 2.64, "grad_norm": 0.7128345370292664, "learning_rate": 2.1153635638639677e-05, "loss": 1.6937, "step": 79328 }, { "epoch": 2.64, "grad_norm": 0.6946837902069092, "learning_rate": 2.1149780516995796e-05, "loss": 1.6141, "step": 79329 }, { "epoch": 2.64, "grad_norm": 0.6996124982833862, "learning_rate": 2.1145925733835022e-05, "loss": 1.6831, "step": 79330 }, { "epoch": 2.64, "grad_norm": 0.6871052384376526, "learning_rate": 2.114207128916202e-05, "loss": 1.6858, "step": 79331 }, { "epoch": 2.64, "grad_norm": 0.7361276745796204, "learning_rate": 2.113821718298142e-05, "loss": 1.7467, "step": 79332 }, { "epoch": 2.64, "grad_norm": 0.7098603248596191, "learning_rate": 2.113436341529795e-05, "loss": 1.6725, "step": 79333 }, { "epoch": 2.64, "grad_norm": 0.722089946269989, "learning_rate": 2.1130509986116308e-05, "loss": 1.6814, "step": 79334 }, { "epoch": 2.64, "grad_norm": 0.691326916217804, "learning_rate": 2.1126656895441085e-05, "loss": 1.7041, "step": 79335 }, { "epoch": 2.64, "grad_norm": 0.6886999607086182, "learning_rate": 2.1122804143276983e-05, "loss": 1.7186, "step": 79336 }, { "epoch": 2.64, "grad_norm": 0.6982123255729675, "learning_rate": 2.1118951729628765e-05, "loss": 1.6908, "step": 79337 }, { "epoch": 2.64, "grad_norm": 0.7176430225372314, "learning_rate": 2.1115099654501055e-05, "loss": 1.683, "step": 79338 }, { "epoch": 2.64, "grad_norm": 0.695713460445404, "learning_rate": 2.111124791789849e-05, "loss": 1.7039, "step": 79339 }, { "epoch": 2.64, "grad_norm": 0.7091767191886902, "learning_rate": 2.1107396519825827e-05, "loss": 1.685, "step": 79340 }, { "epoch": 2.64, "grad_norm": 0.7185739874839783, "learning_rate": 2.1103545460287663e-05, "loss": 1.7053, "step": 79341 }, { "epoch": 2.64, "grad_norm": 0.7051763534545898, "learning_rate": 2.1099694739288663e-05, "loss": 1.7488, "step": 79342 }, { "epoch": 2.64, "grad_norm": 0.7065367698669434, "learning_rate": 2.109584435683359e-05, "loss": 1.6868, "step": 79343 }, { "epoch": 2.64, "grad_norm": 0.7306596040725708, "learning_rate": 2.109199431292704e-05, "loss": 1.6826, "step": 79344 }, { "epoch": 2.64, "grad_norm": 0.7278570532798767, "learning_rate": 2.1088144607573744e-05, "loss": 1.6963, "step": 79345 }, { "epoch": 2.64, "grad_norm": 0.7007184028625488, "learning_rate": 2.1084295240778294e-05, "loss": 1.6491, "step": 79346 }, { "epoch": 2.64, "grad_norm": 0.7015874981880188, "learning_rate": 2.1080446212545454e-05, "loss": 1.7243, "step": 79347 }, { "epoch": 2.64, "grad_norm": 0.7259999513626099, "learning_rate": 2.1076597522879858e-05, "loss": 1.7714, "step": 79348 }, { "epoch": 2.64, "grad_norm": 0.7143465876579285, "learning_rate": 2.107274917178613e-05, "loss": 1.7232, "step": 79349 }, { "epoch": 2.64, "grad_norm": 0.7251065969467163, "learning_rate": 2.1068901159269035e-05, "loss": 1.6782, "step": 79350 }, { "epoch": 2.64, "grad_norm": 0.7267035841941833, "learning_rate": 2.106505348533314e-05, "loss": 1.7784, "step": 79351 }, { "epoch": 2.64, "grad_norm": 0.7177829146385193, "learning_rate": 2.1061206149983234e-05, "loss": 1.6903, "step": 79352 }, { "epoch": 2.64, "grad_norm": 0.6945011019706726, "learning_rate": 2.1057359153223917e-05, "loss": 1.6564, "step": 79353 }, { "epoch": 2.64, "grad_norm": 0.7027006149291992, "learning_rate": 2.105351249505982e-05, "loss": 1.7101, "step": 79354 }, { "epoch": 2.64, "grad_norm": 0.7291499376296997, "learning_rate": 2.1049666175495706e-05, "loss": 1.6958, "step": 79355 }, { "epoch": 2.64, "grad_norm": 0.6949615478515625, "learning_rate": 2.1045820194536165e-05, "loss": 1.6133, "step": 79356 }, { "epoch": 2.64, "grad_norm": 0.7065855264663696, "learning_rate": 2.1041974552185937e-05, "loss": 1.7309, "step": 79357 }, { "epoch": 2.64, "grad_norm": 0.7072240710258484, "learning_rate": 2.1038129248449577e-05, "loss": 1.6991, "step": 79358 }, { "epoch": 2.64, "grad_norm": 0.7145618796348572, "learning_rate": 2.1034284283331883e-05, "loss": 1.6871, "step": 79359 }, { "epoch": 2.64, "grad_norm": 0.7327383160591125, "learning_rate": 2.1030439656837484e-05, "loss": 1.6851, "step": 79360 }, { "epoch": 2.64, "grad_norm": 0.7221378684043884, "learning_rate": 2.1026595368970946e-05, "loss": 1.7196, "step": 79361 }, { "epoch": 2.64, "grad_norm": 0.7088655829429626, "learning_rate": 2.1022751419737095e-05, "loss": 1.7164, "step": 79362 }, { "epoch": 2.64, "grad_norm": 0.7125658392906189, "learning_rate": 2.1018907809140493e-05, "loss": 1.7005, "step": 79363 }, { "epoch": 2.64, "grad_norm": 0.6904842257499695, "learning_rate": 2.101506453718581e-05, "loss": 1.6269, "step": 79364 }, { "epoch": 2.64, "grad_norm": 0.7093901634216309, "learning_rate": 2.1011221603877703e-05, "loss": 1.6867, "step": 79365 }, { "epoch": 2.64, "grad_norm": 0.6731642484664917, "learning_rate": 2.1007379009220937e-05, "loss": 1.6857, "step": 79366 }, { "epoch": 2.64, "grad_norm": 0.7044203281402588, "learning_rate": 2.1003536753220072e-05, "loss": 1.7922, "step": 79367 }, { "epoch": 2.64, "grad_norm": 0.7134957909584045, "learning_rate": 2.099969483587981e-05, "loss": 1.6088, "step": 79368 }, { "epoch": 2.64, "grad_norm": 0.6843382120132446, "learning_rate": 2.099585325720481e-05, "loss": 1.7635, "step": 79369 }, { "epoch": 2.64, "grad_norm": 0.6853883862495422, "learning_rate": 2.0992012017199767e-05, "loss": 1.7031, "step": 79370 }, { "epoch": 2.64, "grad_norm": 0.7274788618087769, "learning_rate": 2.0988171115869246e-05, "loss": 1.6817, "step": 79371 }, { "epoch": 2.64, "grad_norm": 0.71001136302948, "learning_rate": 2.0984330553217977e-05, "loss": 1.7992, "step": 79372 }, { "epoch": 2.64, "grad_norm": 0.7208215594291687, "learning_rate": 2.0980490329250688e-05, "loss": 1.7257, "step": 79373 }, { "epoch": 2.64, "grad_norm": 0.7331644296646118, "learning_rate": 2.097665044397194e-05, "loss": 1.6562, "step": 79374 }, { "epoch": 2.64, "grad_norm": 0.7227067351341248, "learning_rate": 2.0972810897386404e-05, "loss": 1.6807, "step": 79375 }, { "epoch": 2.64, "grad_norm": 0.6936118006706238, "learning_rate": 2.096897168949877e-05, "loss": 1.6837, "step": 79376 }, { "epoch": 2.64, "grad_norm": 0.7052914500236511, "learning_rate": 2.0965132820313735e-05, "loss": 1.8079, "step": 79377 }, { "epoch": 2.64, "grad_norm": 0.7198008894920349, "learning_rate": 2.0961294289835863e-05, "loss": 1.6673, "step": 79378 }, { "epoch": 2.64, "grad_norm": 0.7128058671951294, "learning_rate": 2.0957456098069847e-05, "loss": 1.589, "step": 79379 }, { "epoch": 2.64, "grad_norm": 0.7040427923202515, "learning_rate": 2.0953618245020453e-05, "loss": 1.6788, "step": 79380 }, { "epoch": 2.64, "grad_norm": 0.7088665962219238, "learning_rate": 2.0949780730692178e-05, "loss": 1.742, "step": 79381 }, { "epoch": 2.64, "grad_norm": 0.7083588242530823, "learning_rate": 2.094594355508975e-05, "loss": 1.6501, "step": 79382 }, { "epoch": 2.64, "grad_norm": 0.6807712912559509, "learning_rate": 2.0942106718217864e-05, "loss": 1.6275, "step": 79383 }, { "epoch": 2.64, "grad_norm": 0.7308239340782166, "learning_rate": 2.0938270220081156e-05, "loss": 1.7301, "step": 79384 }, { "epoch": 2.64, "grad_norm": 0.7352647185325623, "learning_rate": 2.0934434060684212e-05, "loss": 1.6154, "step": 79385 }, { "epoch": 2.64, "grad_norm": 0.6836358904838562, "learning_rate": 2.093059824003177e-05, "loss": 1.6664, "step": 79386 }, { "epoch": 2.64, "grad_norm": 0.7053694725036621, "learning_rate": 2.0926762758128522e-05, "loss": 1.6824, "step": 79387 }, { "epoch": 2.64, "grad_norm": 0.7190857529640198, "learning_rate": 2.0922927614979e-05, "loss": 1.7152, "step": 79388 }, { "epoch": 2.64, "grad_norm": 0.680881917476654, "learning_rate": 2.09190928105879e-05, "loss": 1.6102, "step": 79389 }, { "epoch": 2.64, "grad_norm": 0.7041089534759521, "learning_rate": 2.0915258344959983e-05, "loss": 1.6601, "step": 79390 }, { "epoch": 2.64, "grad_norm": 0.6878793835639954, "learning_rate": 2.0911424218099783e-05, "loss": 1.7017, "step": 79391 }, { "epoch": 2.64, "grad_norm": 0.7194617390632629, "learning_rate": 2.0907590430011955e-05, "loss": 1.6342, "step": 79392 }, { "epoch": 2.64, "grad_norm": 0.7053330540657043, "learning_rate": 2.0903756980701202e-05, "loss": 1.69, "step": 79393 }, { "epoch": 2.64, "grad_norm": 0.7093979716300964, "learning_rate": 2.0899923870172253e-05, "loss": 1.7341, "step": 79394 }, { "epoch": 2.64, "grad_norm": 0.7312462329864502, "learning_rate": 2.089609109842957e-05, "loss": 1.6358, "step": 79395 }, { "epoch": 2.64, "grad_norm": 0.7056878805160522, "learning_rate": 2.0892258665477945e-05, "loss": 1.6401, "step": 79396 }, { "epoch": 2.64, "grad_norm": 0.7175637483596802, "learning_rate": 2.088842657132198e-05, "loss": 1.5983, "step": 79397 }, { "epoch": 2.64, "grad_norm": 0.7185653448104858, "learning_rate": 2.0884594815966405e-05, "loss": 1.6072, "step": 79398 }, { "epoch": 2.64, "grad_norm": 0.7041810750961304, "learning_rate": 2.0880763399415714e-05, "loss": 1.6544, "step": 79399 }, { "epoch": 2.64, "grad_norm": 0.7104769945144653, "learning_rate": 2.08769323216747e-05, "loss": 1.6705, "step": 79400 }, { "epoch": 2.64, "grad_norm": 0.7053929567337036, "learning_rate": 2.0873101582747966e-05, "loss": 1.7602, "step": 79401 }, { "epoch": 2.64, "grad_norm": 0.7001231908798218, "learning_rate": 2.0869271182640135e-05, "loss": 1.7149, "step": 79402 }, { "epoch": 2.64, "grad_norm": 0.7103492617607117, "learning_rate": 2.0865441121355875e-05, "loss": 1.6835, "step": 79403 }, { "epoch": 2.64, "grad_norm": 0.7306047677993774, "learning_rate": 2.086161139889988e-05, "loss": 1.742, "step": 79404 }, { "epoch": 2.64, "grad_norm": 0.7228417992591858, "learning_rate": 2.0857782015276747e-05, "loss": 1.6663, "step": 79405 }, { "epoch": 2.64, "grad_norm": 0.7270951867103577, "learning_rate": 2.0853952970491105e-05, "loss": 1.6762, "step": 79406 }, { "epoch": 2.64, "grad_norm": 0.728193998336792, "learning_rate": 2.0850124264547652e-05, "loss": 1.6791, "step": 79407 }, { "epoch": 2.64, "grad_norm": 0.7268502116203308, "learning_rate": 2.0846295897451015e-05, "loss": 1.6356, "step": 79408 }, { "epoch": 2.64, "grad_norm": 0.7024751305580139, "learning_rate": 2.084246786920586e-05, "loss": 1.6675, "step": 79409 }, { "epoch": 2.64, "grad_norm": 0.7046233415603638, "learning_rate": 2.0838640179816845e-05, "loss": 1.6203, "step": 79410 }, { "epoch": 2.64, "grad_norm": 0.7090960144996643, "learning_rate": 2.0834812829288507e-05, "loss": 1.6669, "step": 79411 }, { "epoch": 2.64, "grad_norm": 0.7243894338607788, "learning_rate": 2.0830985817625634e-05, "loss": 1.7012, "step": 79412 }, { "epoch": 2.64, "grad_norm": 0.715607225894928, "learning_rate": 2.082715914483276e-05, "loss": 1.7969, "step": 79413 }, { "epoch": 2.64, "grad_norm": 0.7181900143623352, "learning_rate": 2.082333281091465e-05, "loss": 1.7237, "step": 79414 }, { "epoch": 2.64, "grad_norm": 0.7167553901672363, "learning_rate": 2.08195068158758e-05, "loss": 1.6418, "step": 79415 }, { "epoch": 2.64, "grad_norm": 0.7089090347290039, "learning_rate": 2.0815681159720997e-05, "loss": 1.6605, "step": 79416 }, { "epoch": 2.64, "grad_norm": 0.6993532180786133, "learning_rate": 2.0811855842454815e-05, "loss": 1.6602, "step": 79417 }, { "epoch": 2.64, "grad_norm": 0.6973192095756531, "learning_rate": 2.080803086408188e-05, "loss": 1.6772, "step": 79418 }, { "epoch": 2.64, "grad_norm": 0.6814520359039307, "learning_rate": 2.080420622460689e-05, "loss": 1.5845, "step": 79419 }, { "epoch": 2.64, "grad_norm": 0.7084118127822876, "learning_rate": 2.080038192403444e-05, "loss": 1.7112, "step": 79420 }, { "epoch": 2.64, "grad_norm": 0.6998184323310852, "learning_rate": 2.0796557962369154e-05, "loss": 1.7523, "step": 79421 }, { "epoch": 2.64, "grad_norm": 0.7052561044692993, "learning_rate": 2.0792734339615703e-05, "loss": 1.7379, "step": 79422 }, { "epoch": 2.64, "grad_norm": 0.7083977460861206, "learning_rate": 2.078891105577881e-05, "loss": 1.6688, "step": 79423 }, { "epoch": 2.64, "grad_norm": 0.7467052936553955, "learning_rate": 2.0785088110863012e-05, "loss": 1.7086, "step": 79424 }, { "epoch": 2.64, "grad_norm": 0.6994816064834595, "learning_rate": 2.0781265504872936e-05, "loss": 1.7161, "step": 79425 }, { "epoch": 2.64, "grad_norm": 0.7307395935058594, "learning_rate": 2.077744323781331e-05, "loss": 1.671, "step": 79426 }, { "epoch": 2.64, "grad_norm": 0.7182933688163757, "learning_rate": 2.0773621309688725e-05, "loss": 1.6072, "step": 79427 }, { "epoch": 2.64, "grad_norm": 0.6846057176589966, "learning_rate": 2.0769799720503755e-05, "loss": 1.6197, "step": 79428 }, { "epoch": 2.64, "grad_norm": 0.7154322862625122, "learning_rate": 2.076597847026312e-05, "loss": 1.808, "step": 79429 }, { "epoch": 2.64, "grad_norm": 0.8650463819503784, "learning_rate": 2.0762157558971558e-05, "loss": 1.6348, "step": 79430 }, { "epoch": 2.64, "grad_norm": 0.6946166157722473, "learning_rate": 2.0758336986633463e-05, "loss": 1.6777, "step": 79431 }, { "epoch": 2.64, "grad_norm": 0.7226421236991882, "learning_rate": 2.0754516753253626e-05, "loss": 1.7314, "step": 79432 }, { "epoch": 2.64, "grad_norm": 0.7085261940956116, "learning_rate": 2.0750696858836714e-05, "loss": 1.7261, "step": 79433 }, { "epoch": 2.64, "grad_norm": 0.7145664691925049, "learning_rate": 2.074687730338729e-05, "loss": 1.7328, "step": 79434 }, { "epoch": 2.64, "grad_norm": 0.6802225112915039, "learning_rate": 2.074305808690998e-05, "loss": 1.6775, "step": 79435 }, { "epoch": 2.64, "grad_norm": 0.7013495564460754, "learning_rate": 2.073923920940945e-05, "loss": 1.5985, "step": 79436 }, { "epoch": 2.64, "grad_norm": 0.721396267414093, "learning_rate": 2.0735420670890402e-05, "loss": 1.6553, "step": 79437 }, { "epoch": 2.64, "grad_norm": 0.7284830808639526, "learning_rate": 2.0731602471357357e-05, "loss": 1.7221, "step": 79438 }, { "epoch": 2.64, "grad_norm": 0.6972249746322632, "learning_rate": 2.072778461081498e-05, "loss": 1.7216, "step": 79439 }, { "epoch": 2.64, "grad_norm": 0.7077187299728394, "learning_rate": 2.072396708926797e-05, "loss": 1.6051, "step": 79440 }, { "epoch": 2.64, "grad_norm": 0.699959397315979, "learning_rate": 2.0720149906720885e-05, "loss": 1.6721, "step": 79441 }, { "epoch": 2.64, "grad_norm": 0.7126796245574951, "learning_rate": 2.0716333063178392e-05, "loss": 1.6989, "step": 79442 }, { "epoch": 2.64, "grad_norm": 0.6993362903594971, "learning_rate": 2.0712516558645086e-05, "loss": 1.6676, "step": 79443 }, { "epoch": 2.64, "grad_norm": 0.7221908569335938, "learning_rate": 2.070870039312573e-05, "loss": 1.6686, "step": 79444 }, { "epoch": 2.64, "grad_norm": 0.7141830921173096, "learning_rate": 2.0704884566624793e-05, "loss": 1.6888, "step": 79445 }, { "epoch": 2.64, "grad_norm": 0.6955326199531555, "learning_rate": 2.0701069079146927e-05, "loss": 1.6671, "step": 79446 }, { "epoch": 2.64, "grad_norm": 0.7044444680213928, "learning_rate": 2.06972539306969e-05, "loss": 1.6917, "step": 79447 }, { "epoch": 2.64, "grad_norm": 0.6956367492675781, "learning_rate": 2.0693439121279242e-05, "loss": 1.6995, "step": 79448 }, { "epoch": 2.64, "grad_norm": 0.7044446468353271, "learning_rate": 2.068962465089855e-05, "loss": 1.7162, "step": 79449 }, { "epoch": 2.64, "grad_norm": 0.7108651399612427, "learning_rate": 2.0685810519559522e-05, "loss": 1.5785, "step": 79450 }, { "epoch": 2.64, "grad_norm": 0.7083283066749573, "learning_rate": 2.068199672726678e-05, "loss": 1.6961, "step": 79451 }, { "epoch": 2.64, "grad_norm": 0.7047774791717529, "learning_rate": 2.0678183274024894e-05, "loss": 1.6304, "step": 79452 }, { "epoch": 2.64, "grad_norm": 0.685538113117218, "learning_rate": 2.067437015983856e-05, "loss": 1.7342, "step": 79453 }, { "epoch": 2.64, "grad_norm": 0.7004844546318054, "learning_rate": 2.0670557384712404e-05, "loss": 1.6428, "step": 79454 }, { "epoch": 2.64, "grad_norm": 0.7152886986732483, "learning_rate": 2.0666744948651026e-05, "loss": 1.6717, "step": 79455 }, { "epoch": 2.64, "grad_norm": 0.7920399904251099, "learning_rate": 2.066293285165902e-05, "loss": 1.7042, "step": 79456 }, { "epoch": 2.64, "grad_norm": 0.7260941863059998, "learning_rate": 2.0659121093741117e-05, "loss": 1.6884, "step": 79457 }, { "epoch": 2.64, "grad_norm": 0.7204888463020325, "learning_rate": 2.0655309674901876e-05, "loss": 1.7298, "step": 79458 }, { "epoch": 2.64, "grad_norm": 0.7028458714485168, "learning_rate": 2.06514985951459e-05, "loss": 1.581, "step": 79459 }, { "epoch": 2.64, "grad_norm": 0.7184008359909058, "learning_rate": 2.0647687854477845e-05, "loss": 1.7526, "step": 79460 }, { "epoch": 2.64, "grad_norm": 0.7142661809921265, "learning_rate": 2.0643877452902348e-05, "loss": 1.6334, "step": 79461 }, { "epoch": 2.64, "grad_norm": 0.6921330690383911, "learning_rate": 2.064006739042403e-05, "loss": 1.6124, "step": 79462 }, { "epoch": 2.64, "grad_norm": 0.7195290327072144, "learning_rate": 2.0636257667047462e-05, "loss": 1.746, "step": 79463 }, { "epoch": 2.64, "grad_norm": 0.7244539856910706, "learning_rate": 2.063244828277737e-05, "loss": 1.7139, "step": 79464 }, { "epoch": 2.64, "grad_norm": 0.7065033912658691, "learning_rate": 2.0628639237618316e-05, "loss": 1.6674, "step": 79465 }, { "epoch": 2.64, "grad_norm": 0.6999579071998596, "learning_rate": 2.0624830531574898e-05, "loss": 1.6251, "step": 79466 }, { "epoch": 2.64, "grad_norm": 0.7091237306594849, "learning_rate": 2.0621022164651812e-05, "loss": 1.6168, "step": 79467 }, { "epoch": 2.64, "grad_norm": 0.6930268406867981, "learning_rate": 2.0617214136853587e-05, "loss": 1.6616, "step": 79468 }, { "epoch": 2.64, "grad_norm": 0.701582670211792, "learning_rate": 2.0613406448184954e-05, "loss": 1.6433, "step": 79469 }, { "epoch": 2.64, "grad_norm": 0.7229500412940979, "learning_rate": 2.0609599098650475e-05, "loss": 1.6572, "step": 79470 }, { "epoch": 2.64, "grad_norm": 0.7098981738090515, "learning_rate": 2.0605792088254748e-05, "loss": 1.6715, "step": 79471 }, { "epoch": 2.64, "grad_norm": 0.7517626881599426, "learning_rate": 2.0601985417002464e-05, "loss": 1.6663, "step": 79472 }, { "epoch": 2.64, "grad_norm": 0.6845453381538391, "learning_rate": 2.0598179084898124e-05, "loss": 1.6379, "step": 79473 }, { "epoch": 2.64, "grad_norm": 0.7244340777397156, "learning_rate": 2.0594373091946494e-05, "loss": 1.6525, "step": 79474 }, { "epoch": 2.64, "grad_norm": 0.7069393992424011, "learning_rate": 2.0590567438152094e-05, "loss": 1.7487, "step": 79475 }, { "epoch": 2.64, "grad_norm": 0.7134789228439331, "learning_rate": 2.0586762123519596e-05, "loss": 1.6051, "step": 79476 }, { "epoch": 2.64, "grad_norm": 0.7136910557746887, "learning_rate": 2.0582957148053626e-05, "loss": 1.7438, "step": 79477 }, { "epoch": 2.64, "grad_norm": 0.6851003766059875, "learning_rate": 2.0579152511758712e-05, "loss": 1.7223, "step": 79478 }, { "epoch": 2.64, "grad_norm": 0.6859639883041382, "learning_rate": 2.0575348214639586e-05, "loss": 1.6965, "step": 79479 }, { "epoch": 2.64, "grad_norm": 0.7381981015205383, "learning_rate": 2.057154425670081e-05, "loss": 1.7128, "step": 79480 }, { "epoch": 2.64, "grad_norm": 0.6985653638839722, "learning_rate": 2.0567740637946983e-05, "loss": 1.6474, "step": 79481 }, { "epoch": 2.64, "grad_norm": 0.7020451426506042, "learning_rate": 2.0563937358382733e-05, "loss": 1.6424, "step": 79482 }, { "epoch": 2.64, "grad_norm": 0.7327719926834106, "learning_rate": 2.056013441801272e-05, "loss": 1.7022, "step": 79483 }, { "epoch": 2.64, "grad_norm": 0.6967792510986328, "learning_rate": 2.0556331816841542e-05, "loss": 1.7179, "step": 79484 }, { "epoch": 2.64, "grad_norm": 0.7236010432243347, "learning_rate": 2.0552529554873763e-05, "loss": 1.7111, "step": 79485 }, { "epoch": 2.64, "grad_norm": 0.7017475366592407, "learning_rate": 2.054872763211408e-05, "loss": 1.6798, "step": 79486 }, { "epoch": 2.64, "grad_norm": 0.7040030360221863, "learning_rate": 2.054492604856709e-05, "loss": 1.6936, "step": 79487 }, { "epoch": 2.64, "grad_norm": 0.697766900062561, "learning_rate": 2.0541124804237286e-05, "loss": 1.6266, "step": 79488 }, { "epoch": 2.64, "grad_norm": 0.7082555294036865, "learning_rate": 2.053732389912943e-05, "loss": 1.6941, "step": 79489 }, { "epoch": 2.64, "grad_norm": 0.7151628732681274, "learning_rate": 2.0533523333248158e-05, "loss": 1.6495, "step": 79490 }, { "epoch": 2.64, "grad_norm": 0.6878011226654053, "learning_rate": 2.0529723106597928e-05, "loss": 1.696, "step": 79491 }, { "epoch": 2.64, "grad_norm": 0.7318350672721863, "learning_rate": 2.0525923219183405e-05, "loss": 1.6612, "step": 79492 }, { "epoch": 2.64, "grad_norm": 0.7282309532165527, "learning_rate": 2.0522123671009317e-05, "loss": 1.7238, "step": 79493 }, { "epoch": 2.64, "grad_norm": 0.6976198554039001, "learning_rate": 2.051832446208016e-05, "loss": 1.6751, "step": 79494 }, { "epoch": 2.64, "grad_norm": 0.7366021871566772, "learning_rate": 2.0514525592400566e-05, "loss": 1.6133, "step": 79495 }, { "epoch": 2.64, "grad_norm": 0.7301013469696045, "learning_rate": 2.051072706197513e-05, "loss": 1.6881, "step": 79496 }, { "epoch": 2.64, "grad_norm": 0.7220974564552307, "learning_rate": 2.050692887080858e-05, "loss": 1.6399, "step": 79497 }, { "epoch": 2.64, "grad_norm": 0.688262403011322, "learning_rate": 2.050313101890535e-05, "loss": 1.6554, "step": 79498 }, { "epoch": 2.64, "grad_norm": 0.7025419473648071, "learning_rate": 2.0499333506270165e-05, "loss": 1.6437, "step": 79499 }, { "epoch": 2.64, "grad_norm": 0.6910017132759094, "learning_rate": 2.0495536332907624e-05, "loss": 1.6653, "step": 79500 }, { "epoch": 2.65, "grad_norm": 0.6996788382530212, "learning_rate": 2.0491739498822323e-05, "loss": 1.6737, "step": 79501 }, { "epoch": 2.65, "grad_norm": 0.7091577053070068, "learning_rate": 2.0487943004018824e-05, "loss": 1.6895, "step": 79502 }, { "epoch": 2.65, "grad_norm": 0.7268508076667786, "learning_rate": 2.0484146848501792e-05, "loss": 1.6508, "step": 79503 }, { "epoch": 2.65, "grad_norm": 0.7008882761001587, "learning_rate": 2.048035103227589e-05, "loss": 1.7075, "step": 79504 }, { "epoch": 2.65, "grad_norm": 0.7395592331886292, "learning_rate": 2.0476555555345575e-05, "loss": 1.694, "step": 79505 }, { "epoch": 2.65, "grad_norm": 0.7077257037162781, "learning_rate": 2.0472760417715516e-05, "loss": 1.7267, "step": 79506 }, { "epoch": 2.65, "grad_norm": 0.7068239450454712, "learning_rate": 2.046896561939041e-05, "loss": 1.7139, "step": 79507 }, { "epoch": 2.65, "grad_norm": 0.6966266632080078, "learning_rate": 2.046517116037478e-05, "loss": 1.7068, "step": 79508 }, { "epoch": 2.65, "grad_norm": 0.7364941835403442, "learning_rate": 2.0461377040673198e-05, "loss": 1.6867, "step": 79509 }, { "epoch": 2.65, "grad_norm": 0.7042464017868042, "learning_rate": 2.0457583260290323e-05, "loss": 1.6829, "step": 79510 }, { "epoch": 2.65, "grad_norm": 0.7164611220359802, "learning_rate": 2.045378981923085e-05, "loss": 1.7306, "step": 79511 }, { "epoch": 2.65, "grad_norm": 0.7199956178665161, "learning_rate": 2.0449996717499205e-05, "loss": 1.7227, "step": 79512 }, { "epoch": 2.65, "grad_norm": 0.701346218585968, "learning_rate": 2.044620395510006e-05, "loss": 1.7519, "step": 79513 }, { "epoch": 2.65, "grad_norm": 0.7050378918647766, "learning_rate": 2.0442411532038073e-05, "loss": 1.7249, "step": 79514 }, { "epoch": 2.65, "grad_norm": 0.7061054110527039, "learning_rate": 2.0438619448317805e-05, "loss": 1.5876, "step": 79515 }, { "epoch": 2.65, "grad_norm": 0.7201278805732727, "learning_rate": 2.0434827703943824e-05, "loss": 1.7725, "step": 79516 }, { "epoch": 2.65, "grad_norm": 0.7161198258399963, "learning_rate": 2.043103629892082e-05, "loss": 1.6443, "step": 79517 }, { "epoch": 2.65, "grad_norm": 0.7147623896598816, "learning_rate": 2.0427245233253297e-05, "loss": 1.6812, "step": 79518 }, { "epoch": 2.65, "grad_norm": 0.7022409439086914, "learning_rate": 2.0423454506945945e-05, "loss": 1.6969, "step": 79519 }, { "epoch": 2.65, "grad_norm": 0.7123557925224304, "learning_rate": 2.04196641200033e-05, "loss": 1.6444, "step": 79520 }, { "epoch": 2.65, "grad_norm": 0.7050310969352722, "learning_rate": 2.0415874072430015e-05, "loss": 1.7094, "step": 79521 }, { "epoch": 2.65, "grad_norm": 0.7001006603240967, "learning_rate": 2.0412084364230663e-05, "loss": 1.672, "step": 79522 }, { "epoch": 2.65, "grad_norm": 0.731380045413971, "learning_rate": 2.0408294995409802e-05, "loss": 1.6412, "step": 79523 }, { "epoch": 2.65, "grad_norm": 0.7150359749794006, "learning_rate": 2.040450596597213e-05, "loss": 1.6972, "step": 79524 }, { "epoch": 2.65, "grad_norm": 0.6955335140228271, "learning_rate": 2.040071727592214e-05, "loss": 1.6243, "step": 79525 }, { "epoch": 2.65, "grad_norm": 0.7059110403060913, "learning_rate": 2.0396928925264498e-05, "loss": 1.7172, "step": 79526 }, { "epoch": 2.65, "grad_norm": 0.6863964796066284, "learning_rate": 2.0393140914003837e-05, "loss": 1.6741, "step": 79527 }, { "epoch": 2.65, "grad_norm": 0.7481443881988525, "learning_rate": 2.0389353242144612e-05, "loss": 1.7113, "step": 79528 }, { "epoch": 2.65, "grad_norm": 0.7241711616516113, "learning_rate": 2.0385565909691594e-05, "loss": 1.6915, "step": 79529 }, { "epoch": 2.65, "grad_norm": 0.6865000128746033, "learning_rate": 2.038177891664924e-05, "loss": 1.7337, "step": 79530 }, { "epoch": 2.65, "grad_norm": 0.7076268792152405, "learning_rate": 2.037799226302228e-05, "loss": 1.6457, "step": 79531 }, { "epoch": 2.65, "grad_norm": 0.7078953981399536, "learning_rate": 2.0374205948815146e-05, "loss": 1.6589, "step": 79532 }, { "epoch": 2.65, "grad_norm": 0.7055948376655579, "learning_rate": 2.0370419974032604e-05, "loss": 1.6707, "step": 79533 }, { "epoch": 2.65, "grad_norm": 0.6886632442474365, "learning_rate": 2.0366634338679143e-05, "loss": 1.7007, "step": 79534 }, { "epoch": 2.65, "grad_norm": 0.6805276274681091, "learning_rate": 2.0362849042759367e-05, "loss": 1.728, "step": 79535 }, { "epoch": 2.65, "grad_norm": 0.7048936486244202, "learning_rate": 2.0359064086277934e-05, "loss": 1.6811, "step": 79536 }, { "epoch": 2.65, "grad_norm": 0.6994395852088928, "learning_rate": 2.035527946923937e-05, "loss": 1.6952, "step": 79537 }, { "epoch": 2.65, "grad_norm": 0.7082972526550293, "learning_rate": 2.035149519164828e-05, "loss": 1.6567, "step": 79538 }, { "epoch": 2.65, "grad_norm": 0.7114890813827515, "learning_rate": 2.0347711253509224e-05, "loss": 1.6878, "step": 79539 }, { "epoch": 2.65, "grad_norm": 0.746173083782196, "learning_rate": 2.0343927654826932e-05, "loss": 1.7213, "step": 79540 }, { "epoch": 2.65, "grad_norm": 0.8344822525978088, "learning_rate": 2.0340144395605897e-05, "loss": 1.6413, "step": 79541 }, { "epoch": 2.65, "grad_norm": 0.7051889300346375, "learning_rate": 2.033636147585065e-05, "loss": 1.6767, "step": 79542 }, { "epoch": 2.65, "grad_norm": 0.7051266431808472, "learning_rate": 2.033257889556592e-05, "loss": 1.7069, "step": 79543 }, { "epoch": 2.65, "grad_norm": 0.7163293957710266, "learning_rate": 2.032879665475624e-05, "loss": 1.7061, "step": 79544 }, { "epoch": 2.65, "grad_norm": 0.6945323348045349, "learning_rate": 2.0325014753426138e-05, "loss": 1.6558, "step": 79545 }, { "epoch": 2.65, "grad_norm": 0.7230575084686279, "learning_rate": 2.0321233191580243e-05, "loss": 1.6506, "step": 79546 }, { "epoch": 2.65, "grad_norm": 0.7302766442298889, "learning_rate": 2.0317451969223287e-05, "loss": 1.6805, "step": 79547 }, { "epoch": 2.65, "grad_norm": 0.7160740494728088, "learning_rate": 2.031367108635963e-05, "loss": 1.7172, "step": 79548 }, { "epoch": 2.65, "grad_norm": 0.6971052885055542, "learning_rate": 2.0309890542993967e-05, "loss": 1.6912, "step": 79549 }, { "epoch": 2.65, "grad_norm": 0.742194414138794, "learning_rate": 2.030611033913093e-05, "loss": 1.6258, "step": 79550 }, { "epoch": 2.65, "grad_norm": 0.724858820438385, "learning_rate": 2.030233047477505e-05, "loss": 1.6559, "step": 79551 }, { "epoch": 2.65, "grad_norm": 0.7288706302642822, "learning_rate": 2.029855094993089e-05, "loss": 1.6266, "step": 79552 }, { "epoch": 2.65, "grad_norm": 0.7048516869544983, "learning_rate": 2.029477176460308e-05, "loss": 1.7304, "step": 79553 }, { "epoch": 2.65, "grad_norm": 0.7375261783599854, "learning_rate": 2.029099291879631e-05, "loss": 1.6991, "step": 79554 }, { "epoch": 2.65, "grad_norm": 0.7144392132759094, "learning_rate": 2.028721441251495e-05, "loss": 1.7428, "step": 79555 }, { "epoch": 2.65, "grad_norm": 0.6888633370399475, "learning_rate": 2.0283436245763697e-05, "loss": 1.6969, "step": 79556 }, { "epoch": 2.65, "grad_norm": 0.7303069829940796, "learning_rate": 2.0279658418547206e-05, "loss": 1.628, "step": 79557 }, { "epoch": 2.65, "grad_norm": 0.7007825374603271, "learning_rate": 2.027588093086998e-05, "loss": 1.5966, "step": 79558 }, { "epoch": 2.65, "grad_norm": 0.7143206000328064, "learning_rate": 2.027210378273658e-05, "loss": 1.6745, "step": 79559 }, { "epoch": 2.65, "grad_norm": 0.7084255218505859, "learning_rate": 2.026832697415164e-05, "loss": 1.606, "step": 79560 }, { "epoch": 2.65, "grad_norm": 0.7192898392677307, "learning_rate": 2.0264550505119813e-05, "loss": 1.705, "step": 79561 }, { "epoch": 2.65, "grad_norm": 0.7214703559875488, "learning_rate": 2.0260774375645505e-05, "loss": 1.742, "step": 79562 }, { "epoch": 2.65, "grad_norm": 0.7159115076065063, "learning_rate": 2.0256998585733407e-05, "loss": 1.6919, "step": 79563 }, { "epoch": 2.65, "grad_norm": 0.7076650261878967, "learning_rate": 2.025322313538812e-05, "loss": 1.6929, "step": 79564 }, { "epoch": 2.65, "grad_norm": 0.7011350393295288, "learning_rate": 2.0249448024614233e-05, "loss": 1.6948, "step": 79565 }, { "epoch": 2.65, "grad_norm": 0.7106767296791077, "learning_rate": 2.024567325341625e-05, "loss": 1.7024, "step": 79566 }, { "epoch": 2.65, "grad_norm": 0.6963809132575989, "learning_rate": 2.024189882179883e-05, "loss": 1.6713, "step": 79567 }, { "epoch": 2.65, "grad_norm": 0.6985125541687012, "learning_rate": 2.0238124729766536e-05, "loss": 1.6076, "step": 79568 }, { "epoch": 2.65, "grad_norm": 0.6967411637306213, "learning_rate": 2.0234350977323864e-05, "loss": 1.6883, "step": 79569 }, { "epoch": 2.65, "grad_norm": 0.7067649960517883, "learning_rate": 2.023057756447548e-05, "loss": 1.6453, "step": 79570 }, { "epoch": 2.65, "grad_norm": 0.7123702764511108, "learning_rate": 2.0226804491226012e-05, "loss": 1.6923, "step": 79571 }, { "epoch": 2.65, "grad_norm": 0.7200769782066345, "learning_rate": 2.0223031757579987e-05, "loss": 1.716, "step": 79572 }, { "epoch": 2.65, "grad_norm": 0.7203795909881592, "learning_rate": 2.0219259363541905e-05, "loss": 1.6175, "step": 79573 }, { "epoch": 2.65, "grad_norm": 0.722631573677063, "learning_rate": 2.0215487309116496e-05, "loss": 1.636, "step": 79574 }, { "epoch": 2.65, "grad_norm": 0.687350869178772, "learning_rate": 2.0211715594308253e-05, "loss": 1.616, "step": 79575 }, { "epoch": 2.65, "grad_norm": 0.7006060481071472, "learning_rate": 2.0207944219121708e-05, "loss": 1.7149, "step": 79576 }, { "epoch": 2.65, "grad_norm": 0.722805380821228, "learning_rate": 2.020417318356152e-05, "loss": 1.6892, "step": 79577 }, { "epoch": 2.65, "grad_norm": 0.7255523204803467, "learning_rate": 2.020040248763223e-05, "loss": 1.7145, "step": 79578 }, { "epoch": 2.65, "grad_norm": 0.6820482015609741, "learning_rate": 2.0196632131338452e-05, "loss": 1.6316, "step": 79579 }, { "epoch": 2.65, "grad_norm": 0.694916844367981, "learning_rate": 2.0192862114684694e-05, "loss": 1.6661, "step": 79580 }, { "epoch": 2.65, "grad_norm": 0.7110531330108643, "learning_rate": 2.0189092437675613e-05, "loss": 1.7221, "step": 79581 }, { "epoch": 2.65, "grad_norm": 0.7116991281509399, "learning_rate": 2.0185323100315743e-05, "loss": 1.709, "step": 79582 }, { "epoch": 2.65, "grad_norm": 0.7004015445709229, "learning_rate": 2.0181554102609643e-05, "loss": 1.7271, "step": 79583 }, { "epoch": 2.65, "grad_norm": 0.7189586162567139, "learning_rate": 2.0177785444561943e-05, "loss": 1.6152, "step": 79584 }, { "epoch": 2.65, "grad_norm": 0.7080779671669006, "learning_rate": 2.017401712617711e-05, "loss": 1.7783, "step": 79585 }, { "epoch": 2.65, "grad_norm": 0.7177493572235107, "learning_rate": 2.017024914745984e-05, "loss": 1.7061, "step": 79586 }, { "epoch": 2.65, "grad_norm": 0.7229906320571899, "learning_rate": 2.016648150841469e-05, "loss": 1.6872, "step": 79587 }, { "epoch": 2.65, "grad_norm": 0.7041028738021851, "learning_rate": 2.0162714209046126e-05, "loss": 1.7564, "step": 79588 }, { "epoch": 2.65, "grad_norm": 0.686275064945221, "learning_rate": 2.015894724935885e-05, "loss": 1.6273, "step": 79589 }, { "epoch": 2.65, "grad_norm": 0.6970009803771973, "learning_rate": 2.0155180629357316e-05, "loss": 1.6593, "step": 79590 }, { "epoch": 2.65, "grad_norm": 0.7038437724113464, "learning_rate": 2.0151414349046224e-05, "loss": 1.6542, "step": 79591 }, { "epoch": 2.65, "grad_norm": 0.7260718941688538, "learning_rate": 2.014764840843004e-05, "loss": 1.6804, "step": 79592 }, { "epoch": 2.65, "grad_norm": 0.7129767537117004, "learning_rate": 2.0143882807513422e-05, "loss": 1.73, "step": 79593 }, { "epoch": 2.65, "grad_norm": 0.6869292259216309, "learning_rate": 2.0140117546300872e-05, "loss": 1.6716, "step": 79594 }, { "epoch": 2.65, "grad_norm": 0.6942572593688965, "learning_rate": 2.0136352624796948e-05, "loss": 1.6964, "step": 79595 }, { "epoch": 2.65, "grad_norm": 0.7068897485733032, "learning_rate": 2.0132588043006314e-05, "loss": 1.6963, "step": 79596 }, { "epoch": 2.65, "grad_norm": 0.713535726070404, "learning_rate": 2.0128823800933503e-05, "loss": 1.6399, "step": 79597 }, { "epoch": 2.65, "grad_norm": 0.7118479609489441, "learning_rate": 2.0125059898582972e-05, "loss": 1.6036, "step": 79598 }, { "epoch": 2.65, "grad_norm": 0.7081462740898132, "learning_rate": 2.0121296335959392e-05, "loss": 1.6919, "step": 79599 }, { "epoch": 2.65, "grad_norm": 0.7016264200210571, "learning_rate": 2.0117533113067384e-05, "loss": 1.6599, "step": 79600 }, { "epoch": 2.65, "grad_norm": 0.7314862012863159, "learning_rate": 2.011377022991142e-05, "loss": 1.7388, "step": 79601 }, { "epoch": 2.65, "grad_norm": 0.7041223645210266, "learning_rate": 2.011000768649609e-05, "loss": 1.6528, "step": 79602 }, { "epoch": 2.65, "grad_norm": 0.6932137608528137, "learning_rate": 2.010624548282599e-05, "loss": 1.6914, "step": 79603 }, { "epoch": 2.65, "grad_norm": 0.7199050784111023, "learning_rate": 2.0102483618905686e-05, "loss": 1.6426, "step": 79604 }, { "epoch": 2.65, "grad_norm": 0.7215605974197388, "learning_rate": 2.009872209473967e-05, "loss": 1.7045, "step": 79605 }, { "epoch": 2.65, "grad_norm": 0.7171763181686401, "learning_rate": 2.009496091033258e-05, "loss": 1.6387, "step": 79606 }, { "epoch": 2.65, "grad_norm": 0.7298054695129395, "learning_rate": 2.009120006568904e-05, "loss": 1.6353, "step": 79607 }, { "epoch": 2.65, "grad_norm": 0.6968598365783691, "learning_rate": 2.0087439560813447e-05, "loss": 1.6776, "step": 79608 }, { "epoch": 2.65, "grad_norm": 0.724256157875061, "learning_rate": 2.008367939571046e-05, "loss": 1.5708, "step": 79609 }, { "epoch": 2.65, "grad_norm": 0.7049508690834045, "learning_rate": 2.0079919570384685e-05, "loss": 1.7231, "step": 79610 }, { "epoch": 2.65, "grad_norm": 0.725163996219635, "learning_rate": 2.0076160084840643e-05, "loss": 1.6414, "step": 79611 }, { "epoch": 2.65, "grad_norm": 0.703809916973114, "learning_rate": 2.0072400939082868e-05, "loss": 1.753, "step": 79612 }, { "epoch": 2.65, "grad_norm": 0.7374195456504822, "learning_rate": 2.0068642133115952e-05, "loss": 1.7156, "step": 79613 }, { "epoch": 2.65, "grad_norm": 0.7028440833091736, "learning_rate": 2.0064883666944532e-05, "loss": 1.7407, "step": 79614 }, { "epoch": 2.65, "grad_norm": 0.7001097202301025, "learning_rate": 2.006112554057303e-05, "loss": 1.6302, "step": 79615 }, { "epoch": 2.65, "grad_norm": 0.7239632606506348, "learning_rate": 2.0057367754006048e-05, "loss": 1.649, "step": 79616 }, { "epoch": 2.65, "grad_norm": 0.696285605430603, "learning_rate": 2.0053610307248247e-05, "loss": 1.6505, "step": 79617 }, { "epoch": 2.65, "grad_norm": 0.7328048944473267, "learning_rate": 2.004985320030409e-05, "loss": 1.5795, "step": 79618 }, { "epoch": 2.65, "grad_norm": 0.710265040397644, "learning_rate": 2.0046096433178138e-05, "loss": 1.729, "step": 79619 }, { "epoch": 2.65, "grad_norm": 0.6925326585769653, "learning_rate": 2.004234000587496e-05, "loss": 1.6477, "step": 79620 }, { "epoch": 2.65, "grad_norm": 0.6987497806549072, "learning_rate": 2.003858391839921e-05, "loss": 1.6556, "step": 79621 }, { "epoch": 2.65, "grad_norm": 0.7244983315467834, "learning_rate": 2.003482817075529e-05, "loss": 1.6174, "step": 79622 }, { "epoch": 2.65, "grad_norm": 0.693039059638977, "learning_rate": 2.0031072762947863e-05, "loss": 1.6158, "step": 79623 }, { "epoch": 2.65, "grad_norm": 0.7351064085960388, "learning_rate": 2.0027317694981494e-05, "loss": 1.7001, "step": 79624 }, { "epoch": 2.65, "grad_norm": 0.7146771550178528, "learning_rate": 2.0023562966860674e-05, "loss": 1.7663, "step": 79625 }, { "epoch": 2.65, "grad_norm": 0.7133641242980957, "learning_rate": 2.001980857859e-05, "loss": 1.731, "step": 79626 }, { "epoch": 2.65, "grad_norm": 0.7206271290779114, "learning_rate": 2.001605453017401e-05, "loss": 1.7207, "step": 79627 }, { "epoch": 2.65, "grad_norm": 0.715856671333313, "learning_rate": 2.0012300821617356e-05, "loss": 1.6945, "step": 79628 }, { "epoch": 2.65, "grad_norm": 0.7259877324104309, "learning_rate": 2.0008547452924406e-05, "loss": 1.7066, "step": 79629 }, { "epoch": 2.65, "grad_norm": 0.7253480553627014, "learning_rate": 2.0004794424099858e-05, "loss": 1.6812, "step": 79630 }, { "epoch": 2.65, "grad_norm": 0.712247371673584, "learning_rate": 2.000104173514827e-05, "loss": 1.6538, "step": 79631 }, { "epoch": 2.65, "grad_norm": 0.7242079973220825, "learning_rate": 1.9997289386074177e-05, "loss": 1.7423, "step": 79632 }, { "epoch": 2.65, "grad_norm": 0.7064163088798523, "learning_rate": 1.999353737688204e-05, "loss": 1.7229, "step": 79633 }, { "epoch": 2.65, "grad_norm": 0.7324632406234741, "learning_rate": 1.9989785707576557e-05, "loss": 1.6078, "step": 79634 }, { "epoch": 2.65, "grad_norm": 0.6902711391448975, "learning_rate": 1.9986034378162153e-05, "loss": 1.5788, "step": 79635 }, { "epoch": 2.65, "grad_norm": 0.7250549793243408, "learning_rate": 1.9982283388643528e-05, "loss": 1.7051, "step": 79636 }, { "epoch": 2.65, "grad_norm": 0.7046507596969604, "learning_rate": 1.997853273902508e-05, "loss": 1.6308, "step": 79637 }, { "epoch": 2.65, "grad_norm": 0.7175918817520142, "learning_rate": 1.9974782429311464e-05, "loss": 1.7163, "step": 79638 }, { "epoch": 2.65, "grad_norm": 0.7041535973548889, "learning_rate": 1.997103245950722e-05, "loss": 1.7, "step": 79639 }, { "epoch": 2.65, "grad_norm": 0.6851274371147156, "learning_rate": 1.9967282829616837e-05, "loss": 1.6487, "step": 79640 }, { "epoch": 2.65, "grad_norm": 0.7089170217514038, "learning_rate": 1.9963533539644982e-05, "loss": 1.7452, "step": 79641 }, { "epoch": 2.65, "grad_norm": 0.7124544382095337, "learning_rate": 1.995978458959605e-05, "loss": 1.66, "step": 79642 }, { "epoch": 2.65, "grad_norm": 0.6893950700759888, "learning_rate": 1.9956035979474737e-05, "loss": 1.733, "step": 79643 }, { "epoch": 2.65, "grad_norm": 0.696694016456604, "learning_rate": 1.9952287709285542e-05, "loss": 1.6767, "step": 79644 }, { "epoch": 2.65, "grad_norm": 0.7139043211936951, "learning_rate": 1.9948539779032956e-05, "loss": 1.7122, "step": 79645 }, { "epoch": 2.65, "grad_norm": 0.7196868658065796, "learning_rate": 1.9944792188721615e-05, "loss": 1.7906, "step": 79646 }, { "epoch": 2.65, "grad_norm": 0.6965110301971436, "learning_rate": 1.994104493835601e-05, "loss": 1.6411, "step": 79647 }, { "epoch": 2.65, "grad_norm": 0.6759580969810486, "learning_rate": 1.993729802794074e-05, "loss": 1.5852, "step": 79648 }, { "epoch": 2.65, "grad_norm": 0.7627344131469727, "learning_rate": 1.99335514574803e-05, "loss": 1.6552, "step": 79649 }, { "epoch": 2.65, "grad_norm": 0.7022174596786499, "learning_rate": 1.992980522697929e-05, "loss": 1.5874, "step": 79650 }, { "epoch": 2.65, "grad_norm": 0.7381025552749634, "learning_rate": 1.99260593364422e-05, "loss": 1.6634, "step": 79651 }, { "epoch": 2.65, "grad_norm": 0.704682469367981, "learning_rate": 1.99223137858736e-05, "loss": 1.6977, "step": 79652 }, { "epoch": 2.65, "grad_norm": 0.7120301127433777, "learning_rate": 1.991856857527808e-05, "loss": 1.6476, "step": 79653 }, { "epoch": 2.65, "grad_norm": 0.703672468662262, "learning_rate": 1.9914823704660177e-05, "loss": 1.6832, "step": 79654 }, { "epoch": 2.65, "grad_norm": 0.7112884521484375, "learning_rate": 1.9911079174024314e-05, "loss": 1.6459, "step": 79655 }, { "epoch": 2.65, "grad_norm": 0.7012049555778503, "learning_rate": 1.9907334983375156e-05, "loss": 1.6703, "step": 79656 }, { "epoch": 2.65, "grad_norm": 0.7009915113449097, "learning_rate": 1.9903591132717268e-05, "loss": 1.6421, "step": 79657 }, { "epoch": 2.65, "grad_norm": 0.7234295606613159, "learning_rate": 1.9899847622055176e-05, "loss": 1.6739, "step": 79658 }, { "epoch": 2.65, "grad_norm": 0.7027411460876465, "learning_rate": 1.9896104451393312e-05, "loss": 1.6891, "step": 79659 }, { "epoch": 2.65, "grad_norm": 0.71989506483078, "learning_rate": 1.9892361620736375e-05, "loss": 1.6574, "step": 79660 }, { "epoch": 2.65, "grad_norm": 0.7005557417869568, "learning_rate": 1.9888619130088824e-05, "loss": 1.7297, "step": 79661 }, { "epoch": 2.65, "grad_norm": 0.7124223113059998, "learning_rate": 1.988487697945519e-05, "loss": 1.6609, "step": 79662 }, { "epoch": 2.65, "grad_norm": 0.7130821347236633, "learning_rate": 1.988113516884e-05, "loss": 1.7218, "step": 79663 }, { "epoch": 2.65, "grad_norm": 0.7147747278213501, "learning_rate": 1.987739369824799e-05, "loss": 1.7038, "step": 79664 }, { "epoch": 2.65, "grad_norm": 0.6963114142417908, "learning_rate": 1.9873652567683417e-05, "loss": 1.6329, "step": 79665 }, { "epoch": 2.65, "grad_norm": 0.715677797794342, "learning_rate": 1.9869911777150948e-05, "loss": 1.6915, "step": 79666 }, { "epoch": 2.65, "grad_norm": 0.7053250670433044, "learning_rate": 1.986617132665521e-05, "loss": 1.7235, "step": 79667 }, { "epoch": 2.65, "grad_norm": 0.7242922186851501, "learning_rate": 1.986243121620067e-05, "loss": 1.618, "step": 79668 }, { "epoch": 2.65, "grad_norm": 0.7357911467552185, "learning_rate": 1.9858691445791786e-05, "loss": 1.7247, "step": 79669 }, { "epoch": 2.65, "grad_norm": 0.7230406403541565, "learning_rate": 1.985495201543319e-05, "loss": 1.6325, "step": 79670 }, { "epoch": 2.65, "grad_norm": 0.718024492263794, "learning_rate": 1.9851212925129477e-05, "loss": 1.8023, "step": 79671 }, { "epoch": 2.65, "grad_norm": 0.7083230018615723, "learning_rate": 1.9847474174885048e-05, "loss": 1.6938, "step": 79672 }, { "epoch": 2.65, "grad_norm": 0.7136811017990112, "learning_rate": 1.9843735764704493e-05, "loss": 1.6685, "step": 79673 }, { "epoch": 2.65, "grad_norm": 0.7093033194541931, "learning_rate": 1.9839997694592412e-05, "loss": 1.6672, "step": 79674 }, { "epoch": 2.65, "grad_norm": 0.7264143228530884, "learning_rate": 1.983625996455327e-05, "loss": 1.6687, "step": 79675 }, { "epoch": 2.65, "grad_norm": 0.6955021619796753, "learning_rate": 1.9832522574591624e-05, "loss": 1.6448, "step": 79676 }, { "epoch": 2.65, "grad_norm": 0.7207549810409546, "learning_rate": 1.9828785524711975e-05, "loss": 1.6836, "step": 79677 }, { "epoch": 2.65, "grad_norm": 0.6898068189620972, "learning_rate": 1.982504881491902e-05, "loss": 1.632, "step": 79678 }, { "epoch": 2.65, "grad_norm": 2.6311585903167725, "learning_rate": 1.9821312445217087e-05, "loss": 1.7606, "step": 79679 }, { "epoch": 2.65, "grad_norm": 0.696883499622345, "learning_rate": 1.981757641561077e-05, "loss": 1.6834, "step": 79680 }, { "epoch": 2.65, "grad_norm": 0.7096144556999207, "learning_rate": 1.9813840726104702e-05, "loss": 1.6995, "step": 79681 }, { "epoch": 2.65, "grad_norm": 0.7261834144592285, "learning_rate": 1.9810105376703343e-05, "loss": 1.7242, "step": 79682 }, { "epoch": 2.65, "grad_norm": 0.7073196768760681, "learning_rate": 1.980637036741116e-05, "loss": 1.6849, "step": 79683 }, { "epoch": 2.65, "grad_norm": 0.7270205020904541, "learning_rate": 1.9802635698232848e-05, "loss": 1.7299, "step": 79684 }, { "epoch": 2.65, "grad_norm": 0.6987159848213196, "learning_rate": 1.9798901369172836e-05, "loss": 1.6717, "step": 79685 }, { "epoch": 2.65, "grad_norm": 0.7115910649299622, "learning_rate": 1.979516738023562e-05, "loss": 1.6212, "step": 79686 }, { "epoch": 2.65, "grad_norm": 0.6903865933418274, "learning_rate": 1.9791433731425766e-05, "loss": 1.647, "step": 79687 }, { "epoch": 2.65, "grad_norm": 0.7153010368347168, "learning_rate": 1.97877004227479e-05, "loss": 1.6973, "step": 79688 }, { "epoch": 2.65, "grad_norm": 0.7489354014396667, "learning_rate": 1.978396745420645e-05, "loss": 1.6814, "step": 79689 }, { "epoch": 2.65, "grad_norm": 0.7120513319969177, "learning_rate": 1.9780234825805953e-05, "loss": 1.6488, "step": 79690 }, { "epoch": 2.65, "grad_norm": 0.6895294189453125, "learning_rate": 1.9776502537550996e-05, "loss": 1.6665, "step": 79691 }, { "epoch": 2.65, "grad_norm": 0.6947097778320312, "learning_rate": 1.9772770589446084e-05, "loss": 1.6847, "step": 79692 }, { "epoch": 2.65, "grad_norm": 0.7194903492927551, "learning_rate": 1.976903898149571e-05, "loss": 1.7124, "step": 79693 }, { "epoch": 2.65, "grad_norm": 0.7227229475975037, "learning_rate": 1.976530771370447e-05, "loss": 1.7088, "step": 79694 }, { "epoch": 2.65, "grad_norm": 0.7172547578811646, "learning_rate": 1.9761576786076792e-05, "loss": 1.7096, "step": 79695 }, { "epoch": 2.65, "grad_norm": 0.6979373693466187, "learning_rate": 1.9757846198617312e-05, "loss": 1.713, "step": 79696 }, { "epoch": 2.65, "grad_norm": 0.7134151458740234, "learning_rate": 1.9754115951330485e-05, "loss": 1.7315, "step": 79697 }, { "epoch": 2.65, "grad_norm": 0.7283816337585449, "learning_rate": 1.9750386044220912e-05, "loss": 1.614, "step": 79698 }, { "epoch": 2.65, "grad_norm": 0.7010531425476074, "learning_rate": 1.974665647729309e-05, "loss": 1.7138, "step": 79699 }, { "epoch": 2.65, "grad_norm": 0.7159703373908997, "learning_rate": 1.9742927250551442e-05, "loss": 1.6208, "step": 79700 }, { "epoch": 2.65, "grad_norm": 0.6993576884269714, "learning_rate": 1.9739198364000676e-05, "loss": 1.7497, "step": 79701 }, { "epoch": 2.65, "grad_norm": 0.6993280649185181, "learning_rate": 1.9735469817645176e-05, "loss": 1.7331, "step": 79702 }, { "epoch": 2.65, "grad_norm": 0.711129367351532, "learning_rate": 1.973174161148955e-05, "loss": 1.6414, "step": 79703 }, { "epoch": 2.65, "grad_norm": 0.7159709334373474, "learning_rate": 1.972801374553832e-05, "loss": 1.7531, "step": 79704 }, { "epoch": 2.65, "grad_norm": 0.7001100182533264, "learning_rate": 1.9724286219795914e-05, "loss": 1.7108, "step": 79705 }, { "epoch": 2.65, "grad_norm": 0.6883690357208252, "learning_rate": 1.972055903426697e-05, "loss": 1.6487, "step": 79706 }, { "epoch": 2.65, "grad_norm": 0.7062058448791504, "learning_rate": 1.9716832188955945e-05, "loss": 1.6803, "step": 79707 }, { "epoch": 2.65, "grad_norm": 0.7092331647872925, "learning_rate": 1.9713105683867435e-05, "loss": 1.7435, "step": 79708 }, { "epoch": 2.65, "grad_norm": 0.7102784514427185, "learning_rate": 1.9709379519005842e-05, "loss": 1.6092, "step": 79709 }, { "epoch": 2.65, "grad_norm": 0.7023892998695374, "learning_rate": 1.9705653694375856e-05, "loss": 1.7304, "step": 79710 }, { "epoch": 2.65, "grad_norm": 0.697240948677063, "learning_rate": 1.9701928209981875e-05, "loss": 1.6971, "step": 79711 }, { "epoch": 2.65, "grad_norm": 0.7208585739135742, "learning_rate": 1.96982030658284e-05, "loss": 1.6465, "step": 79712 }, { "epoch": 2.65, "grad_norm": 0.6955796480178833, "learning_rate": 1.969447826192009e-05, "loss": 1.635, "step": 79713 }, { "epoch": 2.65, "grad_norm": 0.70673006772995, "learning_rate": 1.969075379826134e-05, "loss": 1.581, "step": 79714 }, { "epoch": 2.65, "grad_norm": 0.7289248108863831, "learning_rate": 1.968702967485668e-05, "loss": 1.6246, "step": 79715 }, { "epoch": 2.65, "grad_norm": 0.6982412934303284, "learning_rate": 1.9683305891710678e-05, "loss": 1.6911, "step": 79716 }, { "epoch": 2.65, "grad_norm": 0.7127021551132202, "learning_rate": 1.967958244882786e-05, "loss": 1.6802, "step": 79717 }, { "epoch": 2.65, "grad_norm": 0.7098259925842285, "learning_rate": 1.967585934621275e-05, "loss": 1.6978, "step": 79718 }, { "epoch": 2.65, "grad_norm": 0.706052303314209, "learning_rate": 1.967213658386979e-05, "loss": 1.6195, "step": 79719 }, { "epoch": 2.65, "grad_norm": 0.6977786421775818, "learning_rate": 1.9668414161803603e-05, "loss": 1.6295, "step": 79720 }, { "epoch": 2.65, "grad_norm": 0.7066685557365417, "learning_rate": 1.966469208001865e-05, "loss": 1.6985, "step": 79721 }, { "epoch": 2.65, "grad_norm": 0.7212085723876953, "learning_rate": 1.96609703385194e-05, "loss": 1.7025, "step": 79722 }, { "epoch": 2.65, "grad_norm": 0.7034964561462402, "learning_rate": 1.9657248937310444e-05, "loss": 1.6773, "step": 79723 }, { "epoch": 2.65, "grad_norm": 0.7016410231590271, "learning_rate": 1.9653527876396345e-05, "loss": 1.6442, "step": 79724 }, { "epoch": 2.65, "grad_norm": 0.7339112758636475, "learning_rate": 1.9649807155781504e-05, "loss": 1.7376, "step": 79725 }, { "epoch": 2.65, "grad_norm": 0.6927794218063354, "learning_rate": 1.964608677547045e-05, "loss": 1.6776, "step": 79726 }, { "epoch": 2.65, "grad_norm": 0.7562596201896667, "learning_rate": 1.9642366735467806e-05, "loss": 1.6291, "step": 79727 }, { "epoch": 2.65, "grad_norm": 0.7162869572639465, "learning_rate": 1.963864703577801e-05, "loss": 1.7396, "step": 79728 }, { "epoch": 2.65, "grad_norm": 0.70265793800354, "learning_rate": 1.9634927676405554e-05, "loss": 1.6637, "step": 79729 }, { "epoch": 2.65, "grad_norm": 0.7401810884475708, "learning_rate": 1.963120865735497e-05, "loss": 1.772, "step": 79730 }, { "epoch": 2.65, "grad_norm": 0.702184796333313, "learning_rate": 1.9627489978630918e-05, "loss": 1.6587, "step": 79731 }, { "epoch": 2.65, "grad_norm": 0.712942898273468, "learning_rate": 1.9623771640237664e-05, "loss": 1.7405, "step": 79732 }, { "epoch": 2.65, "grad_norm": 0.6779035329818726, "learning_rate": 1.9620053642179835e-05, "loss": 1.674, "step": 79733 }, { "epoch": 2.65, "grad_norm": 0.6947782635688782, "learning_rate": 1.9616335984461996e-05, "loss": 1.6952, "step": 79734 }, { "epoch": 2.65, "grad_norm": 0.7043401002883911, "learning_rate": 1.961261866708861e-05, "loss": 1.6973, "step": 79735 }, { "epoch": 2.65, "grad_norm": 0.7030029892921448, "learning_rate": 1.960890169006414e-05, "loss": 1.7454, "step": 79736 }, { "epoch": 2.65, "grad_norm": 0.7283556461334229, "learning_rate": 1.9605185053393178e-05, "loss": 1.6871, "step": 79737 }, { "epoch": 2.65, "grad_norm": 0.7441508769989014, "learning_rate": 1.9601468757080262e-05, "loss": 1.649, "step": 79738 }, { "epoch": 2.65, "grad_norm": 0.6831824779510498, "learning_rate": 1.9597752801129785e-05, "loss": 1.6425, "step": 79739 }, { "epoch": 2.65, "grad_norm": 0.6915040016174316, "learning_rate": 1.9594037185546308e-05, "loss": 1.6406, "step": 79740 }, { "epoch": 2.65, "grad_norm": 0.7075188755989075, "learning_rate": 1.9590321910334395e-05, "loss": 1.6703, "step": 79741 }, { "epoch": 2.65, "grad_norm": 0.7037492990493774, "learning_rate": 1.958660697549854e-05, "loss": 1.7428, "step": 79742 }, { "epoch": 2.65, "grad_norm": 0.7302979230880737, "learning_rate": 1.9582892381043148e-05, "loss": 1.7632, "step": 79743 }, { "epoch": 2.65, "grad_norm": 0.6872617602348328, "learning_rate": 1.9579178126972838e-05, "loss": 1.7527, "step": 79744 }, { "epoch": 2.65, "grad_norm": 0.7004296779632568, "learning_rate": 1.9575464213292146e-05, "loss": 1.6802, "step": 79745 }, { "epoch": 2.65, "grad_norm": 0.713904857635498, "learning_rate": 1.957175064000547e-05, "loss": 1.7258, "step": 79746 }, { "epoch": 2.65, "grad_norm": 0.7048768401145935, "learning_rate": 1.956803740711733e-05, "loss": 1.6775, "step": 79747 }, { "epoch": 2.65, "grad_norm": 0.6856207251548767, "learning_rate": 1.9564324514632334e-05, "loss": 1.7294, "step": 79748 }, { "epoch": 2.65, "grad_norm": 0.7183281779289246, "learning_rate": 1.956061196255494e-05, "loss": 1.6744, "step": 79749 }, { "epoch": 2.65, "grad_norm": 0.7588975429534912, "learning_rate": 1.9556899750889578e-05, "loss": 1.5965, "step": 79750 }, { "epoch": 2.65, "grad_norm": 0.7454478740692139, "learning_rate": 1.9553187879640875e-05, "loss": 1.6499, "step": 79751 }, { "epoch": 2.65, "grad_norm": 0.7475288510322571, "learning_rate": 1.954947634881323e-05, "loss": 1.5927, "step": 79752 }, { "epoch": 2.65, "grad_norm": 0.7065454125404358, "learning_rate": 1.954576515841124e-05, "loss": 1.7136, "step": 79753 }, { "epoch": 2.65, "grad_norm": 0.7044984698295593, "learning_rate": 1.9542054308439338e-05, "loss": 1.6667, "step": 79754 }, { "epoch": 2.65, "grad_norm": 0.7171909809112549, "learning_rate": 1.9538343798902113e-05, "loss": 1.7006, "step": 79755 }, { "epoch": 2.65, "grad_norm": 0.6625617742538452, "learning_rate": 1.9534633629803997e-05, "loss": 1.597, "step": 79756 }, { "epoch": 2.65, "grad_norm": 0.7019436359405518, "learning_rate": 1.9530923801149456e-05, "loss": 1.7461, "step": 79757 }, { "epoch": 2.65, "grad_norm": 0.6907500624656677, "learning_rate": 1.952721431294312e-05, "loss": 1.6278, "step": 79758 }, { "epoch": 2.65, "grad_norm": 0.6813341379165649, "learning_rate": 1.9523505165189345e-05, "loss": 1.581, "step": 79759 }, { "epoch": 2.65, "grad_norm": 0.7197123169898987, "learning_rate": 1.951979635789277e-05, "loss": 1.6789, "step": 79760 }, { "epoch": 2.65, "grad_norm": 0.7166605591773987, "learning_rate": 1.951608789105782e-05, "loss": 1.6582, "step": 79761 }, { "epoch": 2.65, "grad_norm": 0.7118175625801086, "learning_rate": 1.951237976468899e-05, "loss": 1.674, "step": 79762 }, { "epoch": 2.65, "grad_norm": 0.699461817741394, "learning_rate": 1.9508671978790846e-05, "loss": 1.6222, "step": 79763 }, { "epoch": 2.65, "grad_norm": 0.6950909495353699, "learning_rate": 1.9504964533367786e-05, "loss": 1.6371, "step": 79764 }, { "epoch": 2.65, "grad_norm": 0.6982569694519043, "learning_rate": 1.95012574284244e-05, "loss": 1.7104, "step": 79765 }, { "epoch": 2.65, "grad_norm": 0.6969629526138306, "learning_rate": 1.9497550663965157e-05, "loss": 1.6201, "step": 79766 }, { "epoch": 2.65, "grad_norm": 0.7562973499298096, "learning_rate": 1.949384423999458e-05, "loss": 1.7433, "step": 79767 }, { "epoch": 2.65, "grad_norm": 0.7372686862945557, "learning_rate": 1.9490138156517142e-05, "loss": 1.6351, "step": 79768 }, { "epoch": 2.65, "grad_norm": 0.699908435344696, "learning_rate": 1.9486432413537302e-05, "loss": 1.6972, "step": 79769 }, { "epoch": 2.65, "grad_norm": 0.725439190864563, "learning_rate": 1.948272701105965e-05, "loss": 1.68, "step": 79770 }, { "epoch": 2.65, "grad_norm": 0.7016667127609253, "learning_rate": 1.9479021949088625e-05, "loss": 1.611, "step": 79771 }, { "epoch": 2.65, "grad_norm": 0.7261701226234436, "learning_rate": 1.9475317227628683e-05, "loss": 1.6713, "step": 79772 }, { "epoch": 2.65, "grad_norm": 0.7116104364395142, "learning_rate": 1.947161284668439e-05, "loss": 1.7075, "step": 79773 }, { "epoch": 2.65, "grad_norm": 0.7489163875579834, "learning_rate": 1.946790880626028e-05, "loss": 1.733, "step": 79774 }, { "epoch": 2.65, "grad_norm": 0.7186273336410522, "learning_rate": 1.9464205106360775e-05, "loss": 1.6571, "step": 79775 }, { "epoch": 2.65, "grad_norm": 0.7071197628974915, "learning_rate": 1.9460501746990344e-05, "loss": 1.6453, "step": 79776 }, { "epoch": 2.65, "grad_norm": 0.725025475025177, "learning_rate": 1.945679872815358e-05, "loss": 1.7732, "step": 79777 }, { "epoch": 2.65, "grad_norm": 0.7386263608932495, "learning_rate": 1.9453096049854944e-05, "loss": 1.7243, "step": 79778 }, { "epoch": 2.65, "grad_norm": 0.7238671779632568, "learning_rate": 1.9449393712098838e-05, "loss": 1.5929, "step": 79779 }, { "epoch": 2.65, "grad_norm": 0.7061287760734558, "learning_rate": 1.9445691714889856e-05, "loss": 1.6679, "step": 79780 }, { "epoch": 2.65, "grad_norm": 0.7390947341918945, "learning_rate": 1.9441990058232526e-05, "loss": 1.7273, "step": 79781 }, { "epoch": 2.65, "grad_norm": 0.7247433066368103, "learning_rate": 1.9438288742131214e-05, "loss": 1.7353, "step": 79782 }, { "epoch": 2.65, "grad_norm": 0.7229727506637573, "learning_rate": 1.9434587766590482e-05, "loss": 1.7054, "step": 79783 }, { "epoch": 2.65, "grad_norm": 0.7215065360069275, "learning_rate": 1.9430887131614858e-05, "loss": 1.7319, "step": 79784 }, { "epoch": 2.65, "grad_norm": 0.7199171185493469, "learning_rate": 1.9427186837208776e-05, "loss": 1.6512, "step": 79785 }, { "epoch": 2.65, "grad_norm": 0.7016364336013794, "learning_rate": 1.9423486883376725e-05, "loss": 1.6677, "step": 79786 }, { "epoch": 2.65, "grad_norm": 0.7051845192909241, "learning_rate": 1.9419787270123243e-05, "loss": 1.6594, "step": 79787 }, { "epoch": 2.65, "grad_norm": 0.7125839591026306, "learning_rate": 1.9416087997452856e-05, "loss": 1.6691, "step": 79788 }, { "epoch": 2.65, "grad_norm": 0.9641792178153992, "learning_rate": 1.9412389065369928e-05, "loss": 1.7584, "step": 79789 }, { "epoch": 2.65, "grad_norm": 0.7250699400901794, "learning_rate": 1.9408690473878985e-05, "loss": 1.6429, "step": 79790 }, { "epoch": 2.65, "grad_norm": 0.7061795592308044, "learning_rate": 1.9404992222984628e-05, "loss": 1.6051, "step": 79791 }, { "epoch": 2.65, "grad_norm": 0.716575562953949, "learning_rate": 1.9401294312691252e-05, "loss": 1.6627, "step": 79792 }, { "epoch": 2.65, "grad_norm": 0.7100070714950562, "learning_rate": 1.939759674300332e-05, "loss": 1.6443, "step": 79793 }, { "epoch": 2.65, "grad_norm": 0.7080156207084656, "learning_rate": 1.939389951392536e-05, "loss": 1.7421, "step": 79794 }, { "epoch": 2.65, "grad_norm": 0.7073140740394592, "learning_rate": 1.9390202625461937e-05, "loss": 1.7326, "step": 79795 }, { "epoch": 2.65, "grad_norm": 0.7276362776756287, "learning_rate": 1.938650607761738e-05, "loss": 1.6674, "step": 79796 }, { "epoch": 2.65, "grad_norm": 0.6926169395446777, "learning_rate": 1.9382809870396255e-05, "loss": 1.6938, "step": 79797 }, { "epoch": 2.65, "grad_norm": 0.6883823275566101, "learning_rate": 1.937911400380312e-05, "loss": 1.6673, "step": 79798 }, { "epoch": 2.65, "grad_norm": 0.7140912413597107, "learning_rate": 1.9375418477842375e-05, "loss": 1.7168, "step": 79799 }, { "epoch": 2.65, "grad_norm": 0.6952221989631653, "learning_rate": 1.937172329251848e-05, "loss": 1.6679, "step": 79800 }, { "epoch": 2.65, "grad_norm": 0.7054148316383362, "learning_rate": 1.9368028447836004e-05, "loss": 1.6844, "step": 79801 }, { "epoch": 2.66, "grad_norm": 0.7216619849205017, "learning_rate": 1.9364333943799405e-05, "loss": 1.685, "step": 79802 }, { "epoch": 2.66, "grad_norm": 0.7087460160255432, "learning_rate": 1.936063978041308e-05, "loss": 1.6674, "step": 79803 }, { "epoch": 2.66, "grad_norm": 0.6979069709777832, "learning_rate": 1.9356945957681624e-05, "loss": 1.6565, "step": 79804 }, { "epoch": 2.66, "grad_norm": 0.698326587677002, "learning_rate": 1.9353252475609503e-05, "loss": 1.6793, "step": 79805 }, { "epoch": 2.66, "grad_norm": 0.695731520652771, "learning_rate": 1.9349559334201213e-05, "loss": 1.7489, "step": 79806 }, { "epoch": 2.66, "grad_norm": 0.7197791337966919, "learning_rate": 1.9345866533461117e-05, "loss": 1.6766, "step": 79807 }, { "epoch": 2.66, "grad_norm": 0.7211621403694153, "learning_rate": 1.9342174073393878e-05, "loss": 1.649, "step": 79808 }, { "epoch": 2.66, "grad_norm": 0.73642897605896, "learning_rate": 1.933848195400386e-05, "loss": 1.679, "step": 79809 }, { "epoch": 2.66, "grad_norm": 0.7147711515426636, "learning_rate": 1.933479017529552e-05, "loss": 1.7228, "step": 79810 }, { "epoch": 2.66, "grad_norm": 0.721291184425354, "learning_rate": 1.9331098737273466e-05, "loss": 1.715, "step": 79811 }, { "epoch": 2.66, "grad_norm": 0.7052497863769531, "learning_rate": 1.9327407639942048e-05, "loss": 1.6674, "step": 79812 }, { "epoch": 2.66, "grad_norm": 0.7234676480293274, "learning_rate": 1.932371688330584e-05, "loss": 1.6585, "step": 79813 }, { "epoch": 2.66, "grad_norm": 0.7172523736953735, "learning_rate": 1.9320026467369232e-05, "loss": 1.6898, "step": 79814 }, { "epoch": 2.66, "grad_norm": 0.7157707810401917, "learning_rate": 1.9316336392136788e-05, "loss": 1.7127, "step": 79815 }, { "epoch": 2.66, "grad_norm": 0.7150161862373352, "learning_rate": 1.9312646657612973e-05, "loss": 1.6333, "step": 79816 }, { "epoch": 2.66, "grad_norm": 0.7096759676933289, "learning_rate": 1.9308957263802215e-05, "loss": 1.6333, "step": 79817 }, { "epoch": 2.66, "grad_norm": 0.7019593119621277, "learning_rate": 1.9305268210709046e-05, "loss": 1.7388, "step": 79818 }, { "epoch": 2.66, "grad_norm": 0.715076208114624, "learning_rate": 1.9301579498337893e-05, "loss": 1.7417, "step": 79819 }, { "epoch": 2.66, "grad_norm": 0.6944438815116882, "learning_rate": 1.9297891126693322e-05, "loss": 1.6737, "step": 79820 }, { "epoch": 2.66, "grad_norm": 0.6886295080184937, "learning_rate": 1.9294203095779725e-05, "loss": 1.6377, "step": 79821 }, { "epoch": 2.66, "grad_norm": 0.7012508511543274, "learning_rate": 1.929051540560157e-05, "loss": 1.7674, "step": 79822 }, { "epoch": 2.66, "grad_norm": 0.6898316144943237, "learning_rate": 1.9286828056163417e-05, "loss": 1.673, "step": 79823 }, { "epoch": 2.66, "grad_norm": 0.6908612847328186, "learning_rate": 1.9283141047469665e-05, "loss": 1.593, "step": 79824 }, { "epoch": 2.66, "grad_norm": 0.7046957015991211, "learning_rate": 1.927945437952484e-05, "loss": 1.7629, "step": 79825 }, { "epoch": 2.66, "grad_norm": 0.695429801940918, "learning_rate": 1.927576805233334e-05, "loss": 1.6934, "step": 79826 }, { "epoch": 2.66, "grad_norm": 0.7649404406547546, "learning_rate": 1.9272082065899762e-05, "loss": 1.6354, "step": 79827 }, { "epoch": 2.66, "grad_norm": 0.718538761138916, "learning_rate": 1.9268396420228504e-05, "loss": 1.6895, "step": 79828 }, { "epoch": 2.66, "grad_norm": 0.722338855266571, "learning_rate": 1.9264711115323995e-05, "loss": 1.7018, "step": 79829 }, { "epoch": 2.66, "grad_norm": 0.7117403745651245, "learning_rate": 1.926102615119083e-05, "loss": 1.6684, "step": 79830 }, { "epoch": 2.66, "grad_norm": 0.6955069899559021, "learning_rate": 1.9257341527833404e-05, "loss": 1.6918, "step": 79831 }, { "epoch": 2.66, "grad_norm": 0.6923570036888123, "learning_rate": 1.9253657245256148e-05, "loss": 1.7013, "step": 79832 }, { "epoch": 2.66, "grad_norm": 0.7313478589057922, "learning_rate": 1.9249973303463594e-05, "loss": 1.6754, "step": 79833 }, { "epoch": 2.66, "grad_norm": 0.6851231455802917, "learning_rate": 1.9246289702460238e-05, "loss": 1.5721, "step": 79834 }, { "epoch": 2.66, "grad_norm": 0.7233544588088989, "learning_rate": 1.9242606442250543e-05, "loss": 1.7002, "step": 79835 }, { "epoch": 2.66, "grad_norm": 0.6885069012641907, "learning_rate": 1.92389235228389e-05, "loss": 1.5927, "step": 79836 }, { "epoch": 2.66, "grad_norm": 0.7184988260269165, "learning_rate": 1.923524094422988e-05, "loss": 1.6251, "step": 79837 }, { "epoch": 2.66, "grad_norm": 0.731093168258667, "learning_rate": 1.9231558706427906e-05, "loss": 1.708, "step": 79838 }, { "epoch": 2.66, "grad_norm": 0.7088910341262817, "learning_rate": 1.922787680943738e-05, "loss": 1.6876, "step": 79839 }, { "epoch": 2.66, "grad_norm": 0.7112472653388977, "learning_rate": 1.9224195253262896e-05, "loss": 1.6635, "step": 79840 }, { "epoch": 2.66, "grad_norm": 0.7280700206756592, "learning_rate": 1.9220514037908917e-05, "loss": 1.6552, "step": 79841 }, { "epoch": 2.66, "grad_norm": 0.6881490349769592, "learning_rate": 1.9216833163379776e-05, "loss": 1.6482, "step": 79842 }, { "epoch": 2.66, "grad_norm": 0.6972019672393799, "learning_rate": 1.921315262968006e-05, "loss": 1.6077, "step": 79843 }, { "epoch": 2.66, "grad_norm": 0.6962550282478333, "learning_rate": 1.920947243681421e-05, "loss": 1.677, "step": 79844 }, { "epoch": 2.66, "grad_norm": 0.7057397961616516, "learning_rate": 1.9205792584786715e-05, "loss": 1.6709, "step": 79845 }, { "epoch": 2.66, "grad_norm": 0.7105239033699036, "learning_rate": 1.920211307360198e-05, "loss": 1.6191, "step": 79846 }, { "epoch": 2.66, "grad_norm": 0.7597288489341736, "learning_rate": 1.9198433903264455e-05, "loss": 1.6202, "step": 79847 }, { "epoch": 2.66, "grad_norm": 0.6921062469482422, "learning_rate": 1.9194755073778777e-05, "loss": 1.6472, "step": 79848 }, { "epoch": 2.66, "grad_norm": 0.710322916507721, "learning_rate": 1.9191076585149213e-05, "loss": 1.6761, "step": 79849 }, { "epoch": 2.66, "grad_norm": 0.6922366619110107, "learning_rate": 1.918739843738032e-05, "loss": 1.6684, "step": 79850 }, { "epoch": 2.66, "grad_norm": 0.707269549369812, "learning_rate": 1.9183720630476562e-05, "loss": 1.7145, "step": 79851 }, { "epoch": 2.66, "grad_norm": 0.719885528087616, "learning_rate": 1.9180043164442404e-05, "loss": 1.6574, "step": 79852 }, { "epoch": 2.66, "grad_norm": 0.7063716053962708, "learning_rate": 1.9176366039282242e-05, "loss": 1.6872, "step": 79853 }, { "epoch": 2.66, "grad_norm": 0.7182907462120056, "learning_rate": 1.9172689255000607e-05, "loss": 1.6718, "step": 79854 }, { "epoch": 2.66, "grad_norm": 0.7091975212097168, "learning_rate": 1.9169012811602025e-05, "loss": 1.6852, "step": 79855 }, { "epoch": 2.66, "grad_norm": 0.6870717406272888, "learning_rate": 1.9165336709090795e-05, "loss": 1.6561, "step": 79856 }, { "epoch": 2.66, "grad_norm": 0.7025836706161499, "learning_rate": 1.916166094747148e-05, "loss": 1.6768, "step": 79857 }, { "epoch": 2.66, "grad_norm": 0.7300460338592529, "learning_rate": 1.9157985526748578e-05, "loss": 1.7631, "step": 79858 }, { "epoch": 2.66, "grad_norm": 0.7264559268951416, "learning_rate": 1.915431044692648e-05, "loss": 1.63, "step": 79859 }, { "epoch": 2.66, "grad_norm": 0.7102611660957336, "learning_rate": 1.915063570800962e-05, "loss": 1.6921, "step": 79860 }, { "epoch": 2.66, "grad_norm": 0.7118615508079529, "learning_rate": 1.914696131000253e-05, "loss": 1.6359, "step": 79861 }, { "epoch": 2.66, "grad_norm": 0.7168394327163696, "learning_rate": 1.9143287252909735e-05, "loss": 1.662, "step": 79862 }, { "epoch": 2.66, "grad_norm": 0.7226334810256958, "learning_rate": 1.91396135367355e-05, "loss": 1.7024, "step": 79863 }, { "epoch": 2.66, "grad_norm": 0.6971882581710815, "learning_rate": 1.913594016148442e-05, "loss": 1.6924, "step": 79864 }, { "epoch": 2.66, "grad_norm": 0.6870198845863342, "learning_rate": 1.9132267127160926e-05, "loss": 1.6542, "step": 79865 }, { "epoch": 2.66, "grad_norm": 0.689867377281189, "learning_rate": 1.9128594433769518e-05, "loss": 1.6476, "step": 79866 }, { "epoch": 2.66, "grad_norm": 0.697227418422699, "learning_rate": 1.9124922081314554e-05, "loss": 1.643, "step": 79867 }, { "epoch": 2.66, "grad_norm": 0.7150683999061584, "learning_rate": 1.9121250069800596e-05, "loss": 1.7248, "step": 79868 }, { "epoch": 2.66, "grad_norm": 0.7186185717582703, "learning_rate": 1.911757839923198e-05, "loss": 1.7022, "step": 79869 }, { "epoch": 2.66, "grad_norm": 0.6930358409881592, "learning_rate": 1.911390706961333e-05, "loss": 1.7665, "step": 79870 }, { "epoch": 2.66, "grad_norm": 0.7183072566986084, "learning_rate": 1.9110236080948917e-05, "loss": 1.6665, "step": 79871 }, { "epoch": 2.66, "grad_norm": 0.7357239723205566, "learning_rate": 1.9106565433243358e-05, "loss": 1.7084, "step": 79872 }, { "epoch": 2.66, "grad_norm": 0.7065205574035645, "learning_rate": 1.910289512650106e-05, "loss": 1.6592, "step": 79873 }, { "epoch": 2.66, "grad_norm": 0.7473339438438416, "learning_rate": 1.9099225160726382e-05, "loss": 1.6881, "step": 79874 }, { "epoch": 2.66, "grad_norm": 0.6961795091629028, "learning_rate": 1.9095555535923923e-05, "loss": 1.7305, "step": 79875 }, { "epoch": 2.66, "grad_norm": 0.7229730486869812, "learning_rate": 1.9091886252098043e-05, "loss": 1.6171, "step": 79876 }, { "epoch": 2.66, "grad_norm": 0.6969364285469055, "learning_rate": 1.908821730925324e-05, "loss": 1.6931, "step": 79877 }, { "epoch": 2.66, "grad_norm": 0.6912670731544495, "learning_rate": 1.9084548707393943e-05, "loss": 1.6449, "step": 79878 }, { "epoch": 2.66, "grad_norm": 0.7183989882469177, "learning_rate": 1.9080880446524583e-05, "loss": 1.7208, "step": 79879 }, { "epoch": 2.66, "grad_norm": 0.6882215142250061, "learning_rate": 1.9077212526649688e-05, "loss": 1.6632, "step": 79880 }, { "epoch": 2.66, "grad_norm": 0.6842982769012451, "learning_rate": 1.907354494777362e-05, "loss": 1.696, "step": 79881 }, { "epoch": 2.66, "grad_norm": 0.735309898853302, "learning_rate": 1.9069877709900917e-05, "loss": 1.6035, "step": 79882 }, { "epoch": 2.66, "grad_norm": 0.7437439560890198, "learning_rate": 1.9066210813035933e-05, "loss": 1.6607, "step": 79883 }, { "epoch": 2.66, "grad_norm": 0.7258899211883545, "learning_rate": 1.906254425718323e-05, "loss": 1.6781, "step": 79884 }, { "epoch": 2.66, "grad_norm": 0.7021406888961792, "learning_rate": 1.9058878042347214e-05, "loss": 1.6382, "step": 79885 }, { "epoch": 2.66, "grad_norm": 0.7148782014846802, "learning_rate": 1.9055212168532275e-05, "loss": 1.7461, "step": 79886 }, { "epoch": 2.66, "grad_norm": 0.7233626842498779, "learning_rate": 1.905154663574294e-05, "loss": 1.6274, "step": 79887 }, { "epoch": 2.66, "grad_norm": 0.6915359497070312, "learning_rate": 1.9047881443983648e-05, "loss": 1.6898, "step": 79888 }, { "epoch": 2.66, "grad_norm": 0.7150824069976807, "learning_rate": 1.9044216593258786e-05, "loss": 1.605, "step": 79889 }, { "epoch": 2.66, "grad_norm": 0.6946410536766052, "learning_rate": 1.904055208357286e-05, "loss": 1.7532, "step": 79890 }, { "epoch": 2.66, "grad_norm": 0.7118982672691345, "learning_rate": 1.9036887914930355e-05, "loss": 1.7206, "step": 79891 }, { "epoch": 2.66, "grad_norm": 0.6891211271286011, "learning_rate": 1.903322408733564e-05, "loss": 1.6381, "step": 79892 }, { "epoch": 2.66, "grad_norm": 0.7209086418151855, "learning_rate": 1.902956060079318e-05, "loss": 1.6253, "step": 79893 }, { "epoch": 2.66, "grad_norm": 0.7146274447441101, "learning_rate": 1.9025897455307438e-05, "loss": 1.5634, "step": 79894 }, { "epoch": 2.66, "grad_norm": 0.7116464972496033, "learning_rate": 1.9022234650882907e-05, "loss": 1.7077, "step": 79895 }, { "epoch": 2.66, "grad_norm": 0.7430169582366943, "learning_rate": 1.9018572187523883e-05, "loss": 1.7636, "step": 79896 }, { "epoch": 2.66, "grad_norm": 0.716242253780365, "learning_rate": 1.9014910065234934e-05, "loss": 1.649, "step": 79897 }, { "epoch": 2.66, "grad_norm": 0.7134430408477783, "learning_rate": 1.9011248284020586e-05, "loss": 1.7366, "step": 79898 }, { "epoch": 2.66, "grad_norm": 0.7083338499069214, "learning_rate": 1.9007586843885104e-05, "loss": 1.6406, "step": 79899 }, { "epoch": 2.66, "grad_norm": 0.7044683694839478, "learning_rate": 1.9003925744832982e-05, "loss": 1.7339, "step": 79900 }, { "epoch": 2.66, "grad_norm": 0.7269829511642456, "learning_rate": 1.9000264986868752e-05, "loss": 1.7066, "step": 79901 }, { "epoch": 2.66, "grad_norm": 0.7612826228141785, "learning_rate": 1.8996604569996776e-05, "loss": 1.7307, "step": 79902 }, { "epoch": 2.66, "grad_norm": 0.7029688358306885, "learning_rate": 1.8992944494221484e-05, "loss": 1.7141, "step": 79903 }, { "epoch": 2.66, "grad_norm": 0.7014933228492737, "learning_rate": 1.898928475954734e-05, "loss": 1.6737, "step": 79904 }, { "epoch": 2.66, "grad_norm": 0.6972469687461853, "learning_rate": 1.8985625365978906e-05, "loss": 1.6368, "step": 79905 }, { "epoch": 2.66, "grad_norm": 0.6925604939460754, "learning_rate": 1.8981966313520446e-05, "loss": 1.6991, "step": 79906 }, { "epoch": 2.66, "grad_norm": 0.7468312382698059, "learning_rate": 1.8978307602176458e-05, "loss": 1.7419, "step": 79907 }, { "epoch": 2.66, "grad_norm": 0.7281396389007568, "learning_rate": 1.8974649231951435e-05, "loss": 1.6782, "step": 79908 }, { "epoch": 2.66, "grad_norm": 0.7047940492630005, "learning_rate": 1.897099120284977e-05, "loss": 1.6368, "step": 79909 }, { "epoch": 2.66, "grad_norm": 0.6965925097465515, "learning_rate": 1.8967333514875873e-05, "loss": 1.6553, "step": 79910 }, { "epoch": 2.66, "grad_norm": 0.7336163520812988, "learning_rate": 1.8963676168034258e-05, "loss": 1.5865, "step": 79911 }, { "epoch": 2.66, "grad_norm": 0.6677279472351074, "learning_rate": 1.89600191623294e-05, "loss": 1.6818, "step": 79912 }, { "epoch": 2.66, "grad_norm": 0.7133028507232666, "learning_rate": 1.8956362497765555e-05, "loss": 1.7029, "step": 79913 }, { "epoch": 2.66, "grad_norm": 0.7400357723236084, "learning_rate": 1.8952706174347287e-05, "loss": 1.6949, "step": 79914 }, { "epoch": 2.66, "grad_norm": 0.7189112901687622, "learning_rate": 1.8949050192079094e-05, "loss": 1.6449, "step": 79915 }, { "epoch": 2.66, "grad_norm": 0.7022804617881775, "learning_rate": 1.8945394550965344e-05, "loss": 1.7119, "step": 79916 }, { "epoch": 2.66, "grad_norm": 0.7334188222885132, "learning_rate": 1.894173925101039e-05, "loss": 1.6568, "step": 79917 }, { "epoch": 2.66, "grad_norm": 0.7093387246131897, "learning_rate": 1.8938084292218835e-05, "loss": 1.6765, "step": 79918 }, { "epoch": 2.66, "grad_norm": 0.7475589513778687, "learning_rate": 1.893442967459501e-05, "loss": 1.709, "step": 79919 }, { "epoch": 2.66, "grad_norm": 0.6908111572265625, "learning_rate": 1.8930775398143337e-05, "loss": 1.629, "step": 79920 }, { "epoch": 2.66, "grad_norm": 0.7101497054100037, "learning_rate": 1.8927121462868287e-05, "loss": 1.6576, "step": 79921 }, { "epoch": 2.66, "grad_norm": 0.68602454662323, "learning_rate": 1.8923467868774354e-05, "loss": 1.5956, "step": 79922 }, { "epoch": 2.66, "grad_norm": 0.6879687905311584, "learning_rate": 1.8919814615865902e-05, "loss": 1.6611, "step": 79923 }, { "epoch": 2.66, "grad_norm": 0.7343973517417908, "learning_rate": 1.8916161704147326e-05, "loss": 1.6813, "step": 79924 }, { "epoch": 2.66, "grad_norm": 0.7025845646858215, "learning_rate": 1.8912509133623188e-05, "loss": 1.6351, "step": 79925 }, { "epoch": 2.66, "grad_norm": 0.6947703957557678, "learning_rate": 1.890885690429782e-05, "loss": 1.6482, "step": 79926 }, { "epoch": 2.66, "grad_norm": 0.7037648558616638, "learning_rate": 1.890520501617565e-05, "loss": 1.6391, "step": 79927 }, { "epoch": 2.66, "grad_norm": 0.7013654708862305, "learning_rate": 1.890155346926121e-05, "loss": 1.7845, "step": 79928 }, { "epoch": 2.66, "grad_norm": 0.7160888314247131, "learning_rate": 1.8897902263558794e-05, "loss": 1.6492, "step": 79929 }, { "epoch": 2.66, "grad_norm": 0.6885704398155212, "learning_rate": 1.889425139907297e-05, "loss": 1.6642, "step": 79930 }, { "epoch": 2.66, "grad_norm": 0.7068778872489929, "learning_rate": 1.8890600875808027e-05, "loss": 1.6994, "step": 79931 }, { "epoch": 2.66, "grad_norm": 0.7209128141403198, "learning_rate": 1.8886950693768533e-05, "loss": 1.746, "step": 79932 }, { "epoch": 2.66, "grad_norm": 0.7079177498817444, "learning_rate": 1.8883300852958884e-05, "loss": 1.6486, "step": 79933 }, { "epoch": 2.66, "grad_norm": 0.7157737612724304, "learning_rate": 1.887965135338344e-05, "loss": 1.5521, "step": 79934 }, { "epoch": 2.66, "grad_norm": 0.7276661396026611, "learning_rate": 1.8876002195046703e-05, "loss": 1.6059, "step": 79935 }, { "epoch": 2.66, "grad_norm": 0.7145506739616394, "learning_rate": 1.887235337795303e-05, "loss": 1.7168, "step": 79936 }, { "epoch": 2.66, "grad_norm": 0.7147529125213623, "learning_rate": 1.8868704902106957e-05, "loss": 1.6947, "step": 79937 }, { "epoch": 2.66, "grad_norm": 0.6942487359046936, "learning_rate": 1.8865056767512843e-05, "loss": 1.6204, "step": 79938 }, { "epoch": 2.66, "grad_norm": 0.6999838352203369, "learning_rate": 1.8861408974175084e-05, "loss": 1.658, "step": 79939 }, { "epoch": 2.66, "grad_norm": 0.707503616809845, "learning_rate": 1.8857761522098214e-05, "loss": 1.6685, "step": 79940 }, { "epoch": 2.66, "grad_norm": 0.7092067003250122, "learning_rate": 1.8854114411286525e-05, "loss": 1.7173, "step": 79941 }, { "epoch": 2.66, "grad_norm": 0.7010487914085388, "learning_rate": 1.8850467641744583e-05, "loss": 1.6992, "step": 79942 }, { "epoch": 2.66, "grad_norm": 0.719743013381958, "learning_rate": 1.884682121347668e-05, "loss": 1.7073, "step": 79943 }, { "epoch": 2.66, "grad_norm": 0.7377901673316956, "learning_rate": 1.8843175126487387e-05, "loss": 1.7559, "step": 79944 }, { "epoch": 2.66, "grad_norm": 0.6969282031059265, "learning_rate": 1.8839529380781026e-05, "loss": 1.6327, "step": 79945 }, { "epoch": 2.66, "grad_norm": 0.7126122713088989, "learning_rate": 1.883588397636203e-05, "loss": 1.6868, "step": 79946 }, { "epoch": 2.66, "grad_norm": 0.7071577906608582, "learning_rate": 1.8832238913234863e-05, "loss": 1.6866, "step": 79947 }, { "epoch": 2.66, "grad_norm": 0.7088301777839661, "learning_rate": 1.882859419140392e-05, "loss": 1.6975, "step": 79948 }, { "epoch": 2.66, "grad_norm": 0.7263880968093872, "learning_rate": 1.8824949810873637e-05, "loss": 1.6981, "step": 79949 }, { "epoch": 2.66, "grad_norm": 0.7013019919395447, "learning_rate": 1.88213057716484e-05, "loss": 1.6368, "step": 79950 }, { "epoch": 2.66, "grad_norm": 0.7074843049049377, "learning_rate": 1.8817662073732707e-05, "loss": 1.696, "step": 79951 }, { "epoch": 2.66, "grad_norm": 0.6956031918525696, "learning_rate": 1.8814018717130964e-05, "loss": 1.6683, "step": 79952 }, { "epoch": 2.66, "grad_norm": 0.7203537225723267, "learning_rate": 1.8810375701847523e-05, "loss": 1.7418, "step": 79953 }, { "epoch": 2.66, "grad_norm": 0.7465545535087585, "learning_rate": 1.880673302788689e-05, "loss": 1.6575, "step": 79954 }, { "epoch": 2.66, "grad_norm": 0.7217422127723694, "learning_rate": 1.8803090695253455e-05, "loss": 1.7242, "step": 79955 }, { "epoch": 2.66, "grad_norm": 0.6946706771850586, "learning_rate": 1.879944870395158e-05, "loss": 1.7042, "step": 79956 }, { "epoch": 2.66, "grad_norm": 0.7413015365600586, "learning_rate": 1.8795807053985767e-05, "loss": 1.6751, "step": 79957 }, { "epoch": 2.66, "grad_norm": 0.6874132752418518, "learning_rate": 1.8792165745360476e-05, "loss": 1.6754, "step": 79958 }, { "epoch": 2.66, "grad_norm": 0.6825020909309387, "learning_rate": 1.8788524778079973e-05, "loss": 1.5754, "step": 79959 }, { "epoch": 2.66, "grad_norm": 0.7020645141601562, "learning_rate": 1.878488415214878e-05, "loss": 1.6891, "step": 79960 }, { "epoch": 2.66, "grad_norm": 0.7063208818435669, "learning_rate": 1.8781243867571338e-05, "loss": 1.7126, "step": 79961 }, { "epoch": 2.66, "grad_norm": 0.6968262791633606, "learning_rate": 1.8777603924352035e-05, "loss": 1.6887, "step": 79962 }, { "epoch": 2.66, "grad_norm": 0.7379916906356812, "learning_rate": 1.877396432249524e-05, "loss": 1.7188, "step": 79963 }, { "epoch": 2.66, "grad_norm": 0.7166081070899963, "learning_rate": 1.877032506200544e-05, "loss": 1.6125, "step": 79964 }, { "epoch": 2.66, "grad_norm": 0.7132195830345154, "learning_rate": 1.8766686142887078e-05, "loss": 1.7331, "step": 79965 }, { "epoch": 2.66, "grad_norm": 0.7032287120819092, "learning_rate": 1.876304756514444e-05, "loss": 1.6875, "step": 79966 }, { "epoch": 2.66, "grad_norm": 0.7135195732116699, "learning_rate": 1.875940932878203e-05, "loss": 1.7303, "step": 79967 }, { "epoch": 2.66, "grad_norm": 0.7120264768600464, "learning_rate": 1.87557714338043e-05, "loss": 1.6979, "step": 79968 }, { "epoch": 2.66, "grad_norm": 0.6863250732421875, "learning_rate": 1.875213388021566e-05, "loss": 1.6054, "step": 79969 }, { "epoch": 2.66, "grad_norm": 0.7103774547576904, "learning_rate": 1.8748496668020395e-05, "loss": 1.7069, "step": 79970 }, { "epoch": 2.66, "grad_norm": 0.7233184576034546, "learning_rate": 1.8744859797223045e-05, "loss": 1.7239, "step": 79971 }, { "epoch": 2.66, "grad_norm": 0.6896337270736694, "learning_rate": 1.8741223267828066e-05, "loss": 1.664, "step": 79972 }, { "epoch": 2.66, "grad_norm": 0.6960576176643372, "learning_rate": 1.8737587079839722e-05, "loss": 1.6842, "step": 79973 }, { "epoch": 2.66, "grad_norm": 0.7087515592575073, "learning_rate": 1.873395123326248e-05, "loss": 1.6591, "step": 79974 }, { "epoch": 2.66, "grad_norm": 0.7179644107818604, "learning_rate": 1.8730315728100865e-05, "loss": 1.6987, "step": 79975 }, { "epoch": 2.66, "grad_norm": 0.6896630525588989, "learning_rate": 1.8726680564359174e-05, "loss": 1.6622, "step": 79976 }, { "epoch": 2.66, "grad_norm": 0.7189429402351379, "learning_rate": 1.8723045742041807e-05, "loss": 1.5801, "step": 79977 }, { "epoch": 2.66, "grad_norm": 0.719076931476593, "learning_rate": 1.8719411261153227e-05, "loss": 1.7434, "step": 79978 }, { "epoch": 2.66, "grad_norm": 0.692432701587677, "learning_rate": 1.871577712169786e-05, "loss": 1.6556, "step": 79979 }, { "epoch": 2.66, "grad_norm": 0.7247642278671265, "learning_rate": 1.8712143323680106e-05, "loss": 1.6168, "step": 79980 }, { "epoch": 2.66, "grad_norm": 0.737589418888092, "learning_rate": 1.8708509867104328e-05, "loss": 1.6617, "step": 79981 }, { "epoch": 2.66, "grad_norm": 0.7157292366027832, "learning_rate": 1.870487675197502e-05, "loss": 1.6816, "step": 79982 }, { "epoch": 2.66, "grad_norm": 0.7570354342460632, "learning_rate": 1.8701243978296543e-05, "loss": 1.6813, "step": 79983 }, { "epoch": 2.66, "grad_norm": 0.6986214518547058, "learning_rate": 1.8697611546073266e-05, "loss": 1.7234, "step": 79984 }, { "epoch": 2.66, "grad_norm": 0.6870231032371521, "learning_rate": 1.8693979455309683e-05, "loss": 1.5852, "step": 79985 }, { "epoch": 2.66, "grad_norm": 0.6917591094970703, "learning_rate": 1.8690347706010088e-05, "loss": 1.6663, "step": 79986 }, { "epoch": 2.66, "grad_norm": 0.7046794295310974, "learning_rate": 1.868671629817905e-05, "loss": 1.6341, "step": 79987 }, { "epoch": 2.66, "grad_norm": 1.6851414442062378, "learning_rate": 1.868308523182083e-05, "loss": 1.6982, "step": 79988 }, { "epoch": 2.66, "grad_norm": 0.7010001540184021, "learning_rate": 1.8679454506939916e-05, "loss": 1.7064, "step": 79989 }, { "epoch": 2.66, "grad_norm": 0.7161403894424438, "learning_rate": 1.8675824123540718e-05, "loss": 1.7665, "step": 79990 }, { "epoch": 2.66, "grad_norm": 0.7392652630805969, "learning_rate": 1.8672194081627556e-05, "loss": 1.7126, "step": 79991 }, { "epoch": 2.66, "grad_norm": 0.7136242389678955, "learning_rate": 1.8668564381204963e-05, "loss": 1.7186, "step": 79992 }, { "epoch": 2.66, "grad_norm": 0.7153354287147522, "learning_rate": 1.8664935022277238e-05, "loss": 1.6437, "step": 79993 }, { "epoch": 2.66, "grad_norm": 0.6842957735061646, "learning_rate": 1.866130600484884e-05, "loss": 1.6679, "step": 79994 }, { "epoch": 2.66, "grad_norm": 0.684517502784729, "learning_rate": 1.8657677328924204e-05, "loss": 1.6433, "step": 79995 }, { "epoch": 2.66, "grad_norm": 0.6892252564430237, "learning_rate": 1.865404899450762e-05, "loss": 1.6979, "step": 79996 }, { "epoch": 2.66, "grad_norm": 0.7109985947608948, "learning_rate": 1.865042100160362e-05, "loss": 1.6837, "step": 79997 }, { "epoch": 2.66, "grad_norm": 0.7128546237945557, "learning_rate": 1.8646793350216506e-05, "loss": 1.6796, "step": 79998 }, { "epoch": 2.66, "grad_norm": 0.6841771602630615, "learning_rate": 1.8643166040350798e-05, "loss": 1.6838, "step": 79999 }, { "epoch": 2.66, "grad_norm": 0.7074567079544067, "learning_rate": 1.8639539072010767e-05, "loss": 1.7086, "step": 80000 }, { "epoch": 2.66, "grad_norm": 0.697016716003418, "learning_rate": 1.8635912445200905e-05, "loss": 1.6524, "step": 80001 }, { "epoch": 2.66, "grad_norm": 0.6996104717254639, "learning_rate": 1.8632286159925614e-05, "loss": 1.7091, "step": 80002 }, { "epoch": 2.66, "grad_norm": 0.7153908610343933, "learning_rate": 1.8628660216189216e-05, "loss": 1.6375, "step": 80003 }, { "epoch": 2.66, "grad_norm": 0.7041833400726318, "learning_rate": 1.8625034613996215e-05, "loss": 1.6842, "step": 80004 }, { "epoch": 2.66, "grad_norm": 0.7250859141349792, "learning_rate": 1.862140935335097e-05, "loss": 1.7663, "step": 80005 }, { "epoch": 2.66, "grad_norm": 0.6952391862869263, "learning_rate": 1.861778443425781e-05, "loss": 1.6433, "step": 80006 }, { "epoch": 2.66, "grad_norm": 0.6926677227020264, "learning_rate": 1.8614159856721232e-05, "loss": 1.6521, "step": 80007 }, { "epoch": 2.66, "grad_norm": 0.7385072708129883, "learning_rate": 1.8610535620745635e-05, "loss": 1.721, "step": 80008 }, { "epoch": 2.66, "grad_norm": 0.7096574902534485, "learning_rate": 1.8606911726335384e-05, "loss": 1.68, "step": 80009 }, { "epoch": 2.66, "grad_norm": 0.7300832867622375, "learning_rate": 1.86032881734948e-05, "loss": 1.7065, "step": 80010 }, { "epoch": 2.66, "grad_norm": 0.7247997522354126, "learning_rate": 1.8599664962228455e-05, "loss": 1.5675, "step": 80011 }, { "epoch": 2.66, "grad_norm": 0.7018358707427979, "learning_rate": 1.859604209254064e-05, "loss": 1.6379, "step": 80012 }, { "epoch": 2.66, "grad_norm": 0.7029305100440979, "learning_rate": 1.8592419564435723e-05, "loss": 1.6777, "step": 80013 }, { "epoch": 2.66, "grad_norm": 0.7397692203521729, "learning_rate": 1.858879737791813e-05, "loss": 1.6387, "step": 80014 }, { "epoch": 2.66, "grad_norm": 0.7217448353767395, "learning_rate": 1.858517553299239e-05, "loss": 1.637, "step": 80015 }, { "epoch": 2.66, "grad_norm": 0.6982643008232117, "learning_rate": 1.858155402966267e-05, "loss": 1.7111, "step": 80016 }, { "epoch": 2.66, "grad_norm": 0.7032461762428284, "learning_rate": 1.8577932867933497e-05, "loss": 1.6435, "step": 80017 }, { "epoch": 2.66, "grad_norm": 0.7166610956192017, "learning_rate": 1.857431204780927e-05, "loss": 1.6859, "step": 80018 }, { "epoch": 2.66, "grad_norm": 0.6967801451683044, "learning_rate": 1.857069156929438e-05, "loss": 1.6846, "step": 80019 }, { "epoch": 2.66, "grad_norm": 0.7205193638801575, "learning_rate": 1.8567071432393167e-05, "loss": 1.7035, "step": 80020 }, { "epoch": 2.66, "grad_norm": 0.6900773048400879, "learning_rate": 1.856345163711005e-05, "loss": 1.6171, "step": 80021 }, { "epoch": 2.66, "grad_norm": 0.7085587382316589, "learning_rate": 1.855983218344953e-05, "loss": 1.6647, "step": 80022 }, { "epoch": 2.66, "grad_norm": 0.7290250062942505, "learning_rate": 1.8556213071415803e-05, "loss": 1.7535, "step": 80023 }, { "epoch": 2.66, "grad_norm": 0.7232149839401245, "learning_rate": 1.8552594301013367e-05, "loss": 1.702, "step": 80024 }, { "epoch": 2.66, "grad_norm": 0.7249518036842346, "learning_rate": 1.854897587224665e-05, "loss": 1.6742, "step": 80025 }, { "epoch": 2.66, "grad_norm": 0.7086496353149414, "learning_rate": 1.854535778512002e-05, "loss": 1.6952, "step": 80026 }, { "epoch": 2.66, "grad_norm": 0.6912173628807068, "learning_rate": 1.85417400396378e-05, "loss": 1.6634, "step": 80027 }, { "epoch": 2.66, "grad_norm": 0.705913245677948, "learning_rate": 1.8538122635804453e-05, "loss": 1.6216, "step": 80028 }, { "epoch": 2.66, "grad_norm": 0.7079183459281921, "learning_rate": 1.8534505573624448e-05, "loss": 1.6518, "step": 80029 }, { "epoch": 2.66, "grad_norm": 0.7268293499946594, "learning_rate": 1.853088885310198e-05, "loss": 1.8038, "step": 80030 }, { "epoch": 2.66, "grad_norm": 0.7256365418434143, "learning_rate": 1.8527272474241538e-05, "loss": 1.6521, "step": 80031 }, { "epoch": 2.66, "grad_norm": 0.725983738899231, "learning_rate": 1.8523656437047564e-05, "loss": 1.7077, "step": 80032 }, { "epoch": 2.66, "grad_norm": 0.703058123588562, "learning_rate": 1.8520040741524377e-05, "loss": 1.686, "step": 80033 }, { "epoch": 2.66, "grad_norm": 0.7179920673370361, "learning_rate": 1.8516425387676347e-05, "loss": 1.7052, "step": 80034 }, { "epoch": 2.66, "grad_norm": 0.7452796101570129, "learning_rate": 1.8512810375507968e-05, "loss": 1.679, "step": 80035 }, { "epoch": 2.66, "grad_norm": 0.7053642272949219, "learning_rate": 1.850919570502354e-05, "loss": 1.7234, "step": 80036 }, { "epoch": 2.66, "grad_norm": 0.7136377692222595, "learning_rate": 1.8505581376227453e-05, "loss": 1.7568, "step": 80037 }, { "epoch": 2.66, "grad_norm": 0.7431403398513794, "learning_rate": 1.8501967389124107e-05, "loss": 1.739, "step": 80038 }, { "epoch": 2.66, "grad_norm": 0.7100566625595093, "learning_rate": 1.849835374371793e-05, "loss": 1.6699, "step": 80039 }, { "epoch": 2.66, "grad_norm": 0.7219382524490356, "learning_rate": 1.8494740440013288e-05, "loss": 1.6963, "step": 80040 }, { "epoch": 2.66, "grad_norm": 0.7222062349319458, "learning_rate": 1.849112747801448e-05, "loss": 1.72, "step": 80041 }, { "epoch": 2.66, "grad_norm": 0.6900752782821655, "learning_rate": 1.8487514857726027e-05, "loss": 1.6893, "step": 80042 }, { "epoch": 2.66, "grad_norm": 0.7355907559394836, "learning_rate": 1.8483902579152266e-05, "loss": 1.6962, "step": 80043 }, { "epoch": 2.66, "grad_norm": 0.7025430202484131, "learning_rate": 1.848029064229749e-05, "loss": 1.7249, "step": 80044 }, { "epoch": 2.66, "grad_norm": 0.7068989872932434, "learning_rate": 1.847667904716623e-05, "loss": 1.711, "step": 80045 }, { "epoch": 2.66, "grad_norm": 0.7059952020645142, "learning_rate": 1.847306779376275e-05, "loss": 1.6712, "step": 80046 }, { "epoch": 2.66, "grad_norm": 0.7203258275985718, "learning_rate": 1.846945688209155e-05, "loss": 1.6729, "step": 80047 }, { "epoch": 2.66, "grad_norm": 0.7009905576705933, "learning_rate": 1.8465846312156884e-05, "loss": 1.6522, "step": 80048 }, { "epoch": 2.66, "grad_norm": 0.7009488344192505, "learning_rate": 1.8462236083963255e-05, "loss": 1.6952, "step": 80049 }, { "epoch": 2.66, "grad_norm": 0.7105515003204346, "learning_rate": 1.845862619751496e-05, "loss": 1.7438, "step": 80050 }, { "epoch": 2.66, "grad_norm": 0.716240406036377, "learning_rate": 1.845501665281639e-05, "loss": 1.5958, "step": 80051 }, { "epoch": 2.66, "grad_norm": 0.7295539379119873, "learning_rate": 1.845140744987198e-05, "loss": 1.7406, "step": 80052 }, { "epoch": 2.66, "grad_norm": 0.6969587206840515, "learning_rate": 1.8447798588686026e-05, "loss": 1.7098, "step": 80053 }, { "epoch": 2.66, "grad_norm": 0.7229236960411072, "learning_rate": 1.844419006926302e-05, "loss": 1.7473, "step": 80054 }, { "epoch": 2.66, "grad_norm": 0.6977488994598389, "learning_rate": 1.84405818916073e-05, "loss": 1.7174, "step": 80055 }, { "epoch": 2.66, "grad_norm": 0.7208186984062195, "learning_rate": 1.8436974055723152e-05, "loss": 1.6372, "step": 80056 }, { "epoch": 2.66, "grad_norm": 0.7069080471992493, "learning_rate": 1.843336656161508e-05, "loss": 1.6402, "step": 80057 }, { "epoch": 2.66, "grad_norm": 0.705328643321991, "learning_rate": 1.8429759409287383e-05, "loss": 1.717, "step": 80058 }, { "epoch": 2.66, "grad_norm": 0.7226839661598206, "learning_rate": 1.8426152598744482e-05, "loss": 1.7071, "step": 80059 }, { "epoch": 2.66, "grad_norm": 0.6980915069580078, "learning_rate": 1.8422546129990746e-05, "loss": 1.7535, "step": 80060 }, { "epoch": 2.66, "grad_norm": 0.6958308219909668, "learning_rate": 1.841894000303057e-05, "loss": 1.7209, "step": 80061 }, { "epoch": 2.66, "grad_norm": 0.698871910572052, "learning_rate": 1.8415334217868315e-05, "loss": 1.6987, "step": 80062 }, { "epoch": 2.66, "grad_norm": 0.6995183229446411, "learning_rate": 1.8411728774508284e-05, "loss": 1.644, "step": 80063 }, { "epoch": 2.66, "grad_norm": 0.7043213248252869, "learning_rate": 1.8408123672955e-05, "loss": 1.6928, "step": 80064 }, { "epoch": 2.66, "grad_norm": 0.7014012932777405, "learning_rate": 1.8404518913212762e-05, "loss": 1.6148, "step": 80065 }, { "epoch": 2.66, "grad_norm": 0.7392899990081787, "learning_rate": 1.84009144952859e-05, "loss": 1.6999, "step": 80066 }, { "epoch": 2.66, "grad_norm": 0.716899037361145, "learning_rate": 1.8397310419178813e-05, "loss": 1.7202, "step": 80067 }, { "epoch": 2.66, "grad_norm": 0.6946039795875549, "learning_rate": 1.8393706684895994e-05, "loss": 1.6251, "step": 80068 }, { "epoch": 2.66, "grad_norm": 0.6980819702148438, "learning_rate": 1.8390103292441673e-05, "loss": 1.6894, "step": 80069 }, { "epoch": 2.66, "grad_norm": 0.7201705574989319, "learning_rate": 1.8386500241820247e-05, "loss": 1.7292, "step": 80070 }, { "epoch": 2.66, "grad_norm": 0.7114152908325195, "learning_rate": 1.838289753303618e-05, "loss": 1.6383, "step": 80071 }, { "epoch": 2.66, "grad_norm": 0.7019633650779724, "learning_rate": 1.8379295166093733e-05, "loss": 1.6857, "step": 80072 }, { "epoch": 2.66, "grad_norm": 0.6908528804779053, "learning_rate": 1.837569314099734e-05, "loss": 1.6599, "step": 80073 }, { "epoch": 2.66, "grad_norm": 0.7007368803024292, "learning_rate": 1.8372091457751325e-05, "loss": 1.6555, "step": 80074 }, { "epoch": 2.66, "grad_norm": 0.7094987630844116, "learning_rate": 1.836849011636019e-05, "loss": 1.6404, "step": 80075 }, { "epoch": 2.66, "grad_norm": 0.7291756272315979, "learning_rate": 1.836488911682813e-05, "loss": 1.6611, "step": 80076 }, { "epoch": 2.66, "grad_norm": 0.6970661282539368, "learning_rate": 1.8361288459159573e-05, "loss": 1.7321, "step": 80077 }, { "epoch": 2.66, "grad_norm": 0.7358632683753967, "learning_rate": 1.8357688143358984e-05, "loss": 1.6745, "step": 80078 }, { "epoch": 2.66, "grad_norm": 0.7010077238082886, "learning_rate": 1.8354088169430657e-05, "loss": 1.5993, "step": 80079 }, { "epoch": 2.66, "grad_norm": 0.6906173229217529, "learning_rate": 1.8350488537378925e-05, "loss": 1.6568, "step": 80080 }, { "epoch": 2.66, "grad_norm": 0.6970812082290649, "learning_rate": 1.8346889247208184e-05, "loss": 1.6379, "step": 80081 }, { "epoch": 2.66, "grad_norm": 0.713211715221405, "learning_rate": 1.8343290298922932e-05, "loss": 1.6387, "step": 80082 }, { "epoch": 2.66, "grad_norm": 0.7111765146255493, "learning_rate": 1.8339691692527326e-05, "loss": 1.639, "step": 80083 }, { "epoch": 2.66, "grad_norm": 0.7135704159736633, "learning_rate": 1.8336093428025833e-05, "loss": 1.7868, "step": 80084 }, { "epoch": 2.66, "grad_norm": 0.723410964012146, "learning_rate": 1.8332495505422883e-05, "loss": 1.7029, "step": 80085 }, { "epoch": 2.66, "grad_norm": 0.696112334728241, "learning_rate": 1.832889792472274e-05, "loss": 1.6581, "step": 80086 }, { "epoch": 2.66, "grad_norm": 0.7057784795761108, "learning_rate": 1.83253006859298e-05, "loss": 1.6585, "step": 80087 }, { "epoch": 2.66, "grad_norm": 0.7189273834228516, "learning_rate": 1.832170378904846e-05, "loss": 1.699, "step": 80088 }, { "epoch": 2.66, "grad_norm": 0.7162585258483887, "learning_rate": 1.8318107234083114e-05, "loss": 1.7083, "step": 80089 }, { "epoch": 2.66, "grad_norm": 0.7230827808380127, "learning_rate": 1.8314511021038024e-05, "loss": 1.631, "step": 80090 }, { "epoch": 2.66, "grad_norm": 0.737360954284668, "learning_rate": 1.8310915149917593e-05, "loss": 1.6231, "step": 80091 }, { "epoch": 2.66, "grad_norm": 0.7161127328872681, "learning_rate": 1.8307319620726278e-05, "loss": 1.6932, "step": 80092 }, { "epoch": 2.66, "grad_norm": 0.7144455313682556, "learning_rate": 1.8303724433468346e-05, "loss": 1.6715, "step": 80093 }, { "epoch": 2.66, "grad_norm": 0.7091744542121887, "learning_rate": 1.830012958814816e-05, "loss": 1.6468, "step": 80094 }, { "epoch": 2.66, "grad_norm": 0.6845646500587463, "learning_rate": 1.829653508477015e-05, "loss": 1.6971, "step": 80095 }, { "epoch": 2.66, "grad_norm": 0.7009593844413757, "learning_rate": 1.8292940923338573e-05, "loss": 1.6155, "step": 80096 }, { "epoch": 2.66, "grad_norm": 0.6819149851799011, "learning_rate": 1.828934710385793e-05, "loss": 1.6421, "step": 80097 }, { "epoch": 2.66, "grad_norm": 0.7003397345542908, "learning_rate": 1.8285753626332457e-05, "loss": 1.6514, "step": 80098 }, { "epoch": 2.66, "grad_norm": 0.7106596827507019, "learning_rate": 1.8282160490766607e-05, "loss": 1.6653, "step": 80099 }, { "epoch": 2.66, "grad_norm": 0.6965999603271484, "learning_rate": 1.8278567697164716e-05, "loss": 1.6443, "step": 80100 }, { "epoch": 2.66, "grad_norm": 0.7305214405059814, "learning_rate": 1.8274975245531076e-05, "loss": 1.701, "step": 80101 }, { "epoch": 2.67, "grad_norm": 0.7246145009994507, "learning_rate": 1.8271383135870156e-05, "loss": 1.7133, "step": 80102 }, { "epoch": 2.67, "grad_norm": 0.7039104104042053, "learning_rate": 1.8267791368186245e-05, "loss": 1.6497, "step": 80103 }, { "epoch": 2.67, "grad_norm": 0.7060051560401917, "learning_rate": 1.826419994248375e-05, "loss": 1.6321, "step": 80104 }, { "epoch": 2.67, "grad_norm": 0.7110469937324524, "learning_rate": 1.8260608858766957e-05, "loss": 1.7352, "step": 80105 }, { "epoch": 2.67, "grad_norm": 0.7009638547897339, "learning_rate": 1.82570181170403e-05, "loss": 1.6267, "step": 80106 }, { "epoch": 2.67, "grad_norm": 0.7342575788497925, "learning_rate": 1.8253427717308145e-05, "loss": 1.6564, "step": 80107 }, { "epoch": 2.67, "grad_norm": 0.695045530796051, "learning_rate": 1.824983765957475e-05, "loss": 1.6631, "step": 80108 }, { "epoch": 2.67, "grad_norm": 0.7101850509643555, "learning_rate": 1.824624794384458e-05, "loss": 1.6106, "step": 80109 }, { "epoch": 2.67, "grad_norm": 0.7146305441856384, "learning_rate": 1.8242658570121904e-05, "loss": 1.665, "step": 80110 }, { "epoch": 2.67, "grad_norm": 0.6939318180084229, "learning_rate": 1.823906953841118e-05, "loss": 1.7403, "step": 80111 }, { "epoch": 2.67, "grad_norm": 0.7233728766441345, "learning_rate": 1.8235480848716734e-05, "loss": 1.6757, "step": 80112 }, { "epoch": 2.67, "grad_norm": 0.7082394957542419, "learning_rate": 1.8231892501042833e-05, "loss": 1.6623, "step": 80113 }, { "epoch": 2.67, "grad_norm": 0.7006333470344543, "learning_rate": 1.8228304495393908e-05, "loss": 1.6621, "step": 80114 }, { "epoch": 2.67, "grad_norm": 0.6911534070968628, "learning_rate": 1.8224716831774288e-05, "loss": 1.6986, "step": 80115 }, { "epoch": 2.67, "grad_norm": 0.7034728527069092, "learning_rate": 1.8221129510188405e-05, "loss": 1.6448, "step": 80116 }, { "epoch": 2.67, "grad_norm": 0.7197301387786865, "learning_rate": 1.8217542530640484e-05, "loss": 1.6302, "step": 80117 }, { "epoch": 2.67, "grad_norm": 0.7110146284103394, "learning_rate": 1.8213955893135025e-05, "loss": 1.628, "step": 80118 }, { "epoch": 2.67, "grad_norm": 0.6930983066558838, "learning_rate": 1.821036959767629e-05, "loss": 1.752, "step": 80119 }, { "epoch": 2.67, "grad_norm": 0.7708705067634583, "learning_rate": 1.8206783644268574e-05, "loss": 1.6874, "step": 80120 }, { "epoch": 2.67, "grad_norm": 0.697889506816864, "learning_rate": 1.8203198032916378e-05, "loss": 1.6907, "step": 80121 }, { "epoch": 2.67, "grad_norm": 0.7326269745826721, "learning_rate": 1.819961276362396e-05, "loss": 1.6672, "step": 80122 }, { "epoch": 2.67, "grad_norm": 0.7525399923324585, "learning_rate": 1.8196027836395654e-05, "loss": 1.6374, "step": 80123 }, { "epoch": 2.67, "grad_norm": 0.7104133367538452, "learning_rate": 1.8192443251235854e-05, "loss": 1.7402, "step": 80124 }, { "epoch": 2.67, "grad_norm": 0.728704571723938, "learning_rate": 1.8188859008148927e-05, "loss": 1.612, "step": 80125 }, { "epoch": 2.67, "grad_norm": 0.7360197305679321, "learning_rate": 1.818527510713923e-05, "loss": 1.6884, "step": 80126 }, { "epoch": 2.67, "grad_norm": 0.7154679298400879, "learning_rate": 1.818169154821103e-05, "loss": 1.7301, "step": 80127 }, { "epoch": 2.67, "grad_norm": 0.7000536918640137, "learning_rate": 1.817810833136879e-05, "loss": 1.6661, "step": 80128 }, { "epoch": 2.67, "grad_norm": 0.7209521532058716, "learning_rate": 1.817452545661677e-05, "loss": 1.6715, "step": 80129 }, { "epoch": 2.67, "grad_norm": 0.705374538898468, "learning_rate": 1.8170942923959342e-05, "loss": 1.6967, "step": 80130 }, { "epoch": 2.67, "grad_norm": 0.7264490723609924, "learning_rate": 1.8167360733400827e-05, "loss": 1.7354, "step": 80131 }, { "epoch": 2.67, "grad_norm": 0.702102541923523, "learning_rate": 1.8163778884945722e-05, "loss": 1.5832, "step": 80132 }, { "epoch": 2.67, "grad_norm": 0.718988299369812, "learning_rate": 1.8160197378598196e-05, "loss": 1.7315, "step": 80133 }, { "epoch": 2.67, "grad_norm": 0.7117184400558472, "learning_rate": 1.8156616214362608e-05, "loss": 1.6462, "step": 80134 }, { "epoch": 2.67, "grad_norm": 0.7244518399238586, "learning_rate": 1.8153035392243454e-05, "loss": 1.7017, "step": 80135 }, { "epoch": 2.67, "grad_norm": 0.7255874872207642, "learning_rate": 1.8149454912244964e-05, "loss": 1.6999, "step": 80136 }, { "epoch": 2.67, "grad_norm": 0.7237964868545532, "learning_rate": 1.814587477437147e-05, "loss": 1.6672, "step": 80137 }, { "epoch": 2.67, "grad_norm": 0.7236534953117371, "learning_rate": 1.8142294978627336e-05, "loss": 1.6547, "step": 80138 }, { "epoch": 2.67, "grad_norm": 0.7143996357917786, "learning_rate": 1.8138715525017056e-05, "loss": 1.6544, "step": 80139 }, { "epoch": 2.67, "grad_norm": 0.7270417809486389, "learning_rate": 1.8135136413544728e-05, "loss": 1.6304, "step": 80140 }, { "epoch": 2.67, "grad_norm": 0.6971853375434875, "learning_rate": 1.813155764421481e-05, "loss": 1.6356, "step": 80141 }, { "epoch": 2.67, "grad_norm": 0.7346441149711609, "learning_rate": 1.8127979217031707e-05, "loss": 1.6632, "step": 80142 }, { "epoch": 2.67, "grad_norm": 0.7346488833427429, "learning_rate": 1.8124401131999678e-05, "loss": 1.7818, "step": 80143 }, { "epoch": 2.67, "grad_norm": 0.7208580374717712, "learning_rate": 1.8120823389123084e-05, "loss": 1.6242, "step": 80144 }, { "epoch": 2.67, "grad_norm": 0.7301065921783447, "learning_rate": 1.811724598840626e-05, "loss": 1.6675, "step": 80145 }, { "epoch": 2.67, "grad_norm": 0.699260950088501, "learning_rate": 1.8113668929853665e-05, "loss": 1.6877, "step": 80146 }, { "epoch": 2.67, "grad_norm": 0.6963998675346375, "learning_rate": 1.811009221346943e-05, "loss": 1.6821, "step": 80147 }, { "epoch": 2.67, "grad_norm": 0.706659197807312, "learning_rate": 1.810651583925802e-05, "loss": 1.6902, "step": 80148 }, { "epoch": 2.67, "grad_norm": 0.7120113372802734, "learning_rate": 1.8102939807223828e-05, "loss": 1.7245, "step": 80149 }, { "epoch": 2.67, "grad_norm": 0.7095046043395996, "learning_rate": 1.809936411737112e-05, "loss": 1.6672, "step": 80150 }, { "epoch": 2.67, "grad_norm": 0.7142831087112427, "learning_rate": 1.809578876970419e-05, "loss": 1.65, "step": 80151 }, { "epoch": 2.67, "grad_norm": 0.7439848780632019, "learning_rate": 1.8092213764227503e-05, "loss": 1.681, "step": 80152 }, { "epoch": 2.67, "grad_norm": 0.7069353461265564, "learning_rate": 1.8088639100945323e-05, "loss": 1.7139, "step": 80153 }, { "epoch": 2.67, "grad_norm": 0.7013294100761414, "learning_rate": 1.8085064779861946e-05, "loss": 1.7758, "step": 80154 }, { "epoch": 2.67, "grad_norm": 0.7038429975509644, "learning_rate": 1.8081490800981767e-05, "loss": 1.6961, "step": 80155 }, { "epoch": 2.67, "grad_norm": 0.7411544322967529, "learning_rate": 1.8077917164309154e-05, "loss": 1.7638, "step": 80156 }, { "epoch": 2.67, "grad_norm": 0.7083513140678406, "learning_rate": 1.807434386984843e-05, "loss": 1.6201, "step": 80157 }, { "epoch": 2.67, "grad_norm": 1.030696153640747, "learning_rate": 1.807077091760386e-05, "loss": 1.6375, "step": 80158 }, { "epoch": 2.67, "grad_norm": 0.7164247632026672, "learning_rate": 1.806719830757988e-05, "loss": 1.7411, "step": 80159 }, { "epoch": 2.67, "grad_norm": 0.7341731786727905, "learning_rate": 1.8063626039780776e-05, "loss": 1.6287, "step": 80160 }, { "epoch": 2.67, "grad_norm": 0.7146281003952026, "learning_rate": 1.806005411421082e-05, "loss": 1.7052, "step": 80161 }, { "epoch": 2.67, "grad_norm": 0.7140101194381714, "learning_rate": 1.8056482530874505e-05, "loss": 1.6527, "step": 80162 }, { "epoch": 2.67, "grad_norm": 0.7016486525535583, "learning_rate": 1.8052911289775997e-05, "loss": 1.619, "step": 80163 }, { "epoch": 2.67, "grad_norm": 0.7158859372138977, "learning_rate": 1.8049340390919786e-05, "loss": 1.6888, "step": 80164 }, { "epoch": 2.67, "grad_norm": 0.7055081129074097, "learning_rate": 1.8045769834310076e-05, "loss": 1.6322, "step": 80165 }, { "epoch": 2.67, "grad_norm": 0.7046574950218201, "learning_rate": 1.8042199619951294e-05, "loss": 1.6692, "step": 80166 }, { "epoch": 2.67, "grad_norm": 0.719674825668335, "learning_rate": 1.8038629747847733e-05, "loss": 1.6893, "step": 80167 }, { "epoch": 2.67, "grad_norm": 0.7057195901870728, "learning_rate": 1.8035060218003695e-05, "loss": 1.6968, "step": 80168 }, { "epoch": 2.67, "grad_norm": 0.692842960357666, "learning_rate": 1.8031491030423604e-05, "loss": 1.6929, "step": 80169 }, { "epoch": 2.67, "grad_norm": 0.7015544176101685, "learning_rate": 1.8027922185111665e-05, "loss": 1.7076, "step": 80170 }, { "epoch": 2.67, "grad_norm": 0.7309483289718628, "learning_rate": 1.8024353682072367e-05, "loss": 1.6698, "step": 80171 }, { "epoch": 2.67, "grad_norm": 0.7002151608467102, "learning_rate": 1.802078552130991e-05, "loss": 1.696, "step": 80172 }, { "epoch": 2.67, "grad_norm": 0.736709475517273, "learning_rate": 1.801721770282866e-05, "loss": 1.6899, "step": 80173 }, { "epoch": 2.67, "grad_norm": 0.7117374539375305, "learning_rate": 1.8013650226633003e-05, "loss": 1.7572, "step": 80174 }, { "epoch": 2.67, "grad_norm": 0.7449296712875366, "learning_rate": 1.801008309272718e-05, "loss": 1.7829, "step": 80175 }, { "epoch": 2.67, "grad_norm": 0.6981378793716431, "learning_rate": 1.8006516301115583e-05, "loss": 1.6869, "step": 80176 }, { "epoch": 2.67, "grad_norm": 0.695503830909729, "learning_rate": 1.8002949851802506e-05, "loss": 1.6173, "step": 80177 }, { "epoch": 2.67, "grad_norm": 0.709664523601532, "learning_rate": 1.799938374479235e-05, "loss": 1.7123, "step": 80178 }, { "epoch": 2.67, "grad_norm": 0.7263035774230957, "learning_rate": 1.7995817980089377e-05, "loss": 1.742, "step": 80179 }, { "epoch": 2.67, "grad_norm": 0.7223533987998962, "learning_rate": 1.7992252557697883e-05, "loss": 1.6571, "step": 80180 }, { "epoch": 2.67, "grad_norm": 0.7365595102310181, "learning_rate": 1.79886874776223e-05, "loss": 1.6777, "step": 80181 }, { "epoch": 2.67, "grad_norm": 0.7078617215156555, "learning_rate": 1.7985122739866886e-05, "loss": 1.6585, "step": 80182 }, { "epoch": 2.67, "grad_norm": 0.7156168818473816, "learning_rate": 1.7981558344435942e-05, "loss": 1.6743, "step": 80183 }, { "epoch": 2.67, "grad_norm": 0.6951332688331604, "learning_rate": 1.7977994291333797e-05, "loss": 1.6882, "step": 80184 }, { "epoch": 2.67, "grad_norm": 0.7119110226631165, "learning_rate": 1.7974430580564914e-05, "loss": 1.6509, "step": 80185 }, { "epoch": 2.67, "grad_norm": 0.7123966813087463, "learning_rate": 1.797086721213349e-05, "loss": 1.6666, "step": 80186 }, { "epoch": 2.67, "grad_norm": 0.7223088145256042, "learning_rate": 1.796730418604382e-05, "loss": 1.6632, "step": 80187 }, { "epoch": 2.67, "grad_norm": 0.7330163717269897, "learning_rate": 1.7963741502300368e-05, "loss": 1.6903, "step": 80188 }, { "epoch": 2.67, "grad_norm": 0.7062559127807617, "learning_rate": 1.7960179160907363e-05, "loss": 1.698, "step": 80189 }, { "epoch": 2.67, "grad_norm": 0.7074151635169983, "learning_rate": 1.795661716186907e-05, "loss": 1.6836, "step": 80190 }, { "epoch": 2.67, "grad_norm": 0.6979343891143799, "learning_rate": 1.7953055505189917e-05, "loss": 1.6846, "step": 80191 }, { "epoch": 2.67, "grad_norm": 0.7197891473770142, "learning_rate": 1.7949494190874302e-05, "loss": 1.6707, "step": 80192 }, { "epoch": 2.67, "grad_norm": 0.7383180856704712, "learning_rate": 1.794593321892632e-05, "loss": 1.7, "step": 80193 }, { "epoch": 2.67, "grad_norm": 0.6905651688575745, "learning_rate": 1.794237258935044e-05, "loss": 1.6626, "step": 80194 }, { "epoch": 2.67, "grad_norm": 0.7114837765693665, "learning_rate": 1.7938812302150985e-05, "loss": 1.7152, "step": 80195 }, { "epoch": 2.67, "grad_norm": 0.6972317695617676, "learning_rate": 1.7935252357332253e-05, "loss": 1.6305, "step": 80196 }, { "epoch": 2.67, "grad_norm": 0.7249357104301453, "learning_rate": 1.7931692754898542e-05, "loss": 1.6333, "step": 80197 }, { "epoch": 2.67, "grad_norm": 0.6877482533454895, "learning_rate": 1.792813349485418e-05, "loss": 1.6967, "step": 80198 }, { "epoch": 2.67, "grad_norm": 0.6948264837265015, "learning_rate": 1.79245745772036e-05, "loss": 1.6423, "step": 80199 }, { "epoch": 2.67, "grad_norm": 0.7062317728996277, "learning_rate": 1.7921016001950927e-05, "loss": 1.5802, "step": 80200 }, { "epoch": 2.67, "grad_norm": 0.7035962343215942, "learning_rate": 1.7917457769100564e-05, "loss": 1.6477, "step": 80201 }, { "epoch": 2.67, "grad_norm": 0.7053076028823853, "learning_rate": 1.79138998786569e-05, "loss": 1.7413, "step": 80202 }, { "epoch": 2.67, "grad_norm": 0.709462583065033, "learning_rate": 1.7910342330624205e-05, "loss": 1.6757, "step": 80203 }, { "epoch": 2.67, "grad_norm": 0.706689178943634, "learning_rate": 1.790678512500674e-05, "loss": 1.6132, "step": 80204 }, { "epoch": 2.67, "grad_norm": 0.7389137148857117, "learning_rate": 1.7903228261808866e-05, "loss": 1.7129, "step": 80205 }, { "epoch": 2.67, "grad_norm": 0.7307288646697998, "learning_rate": 1.7899671741035015e-05, "loss": 1.6978, "step": 80206 }, { "epoch": 2.67, "grad_norm": 0.7185246348381042, "learning_rate": 1.7896115562689317e-05, "loss": 1.6759, "step": 80207 }, { "epoch": 2.67, "grad_norm": 0.7053744196891785, "learning_rate": 1.7892559726776136e-05, "loss": 1.6421, "step": 80208 }, { "epoch": 2.67, "grad_norm": 0.7244311571121216, "learning_rate": 1.7889004233299896e-05, "loss": 1.6912, "step": 80209 }, { "epoch": 2.67, "grad_norm": 0.7093397974967957, "learning_rate": 1.7885449082264803e-05, "loss": 1.674, "step": 80210 }, { "epoch": 2.67, "grad_norm": 0.711387574672699, "learning_rate": 1.7881894273675213e-05, "loss": 1.6512, "step": 80211 }, { "epoch": 2.67, "grad_norm": 0.7124900221824646, "learning_rate": 1.7878339807535426e-05, "loss": 1.7207, "step": 80212 }, { "epoch": 2.67, "grad_norm": 0.7172736525535583, "learning_rate": 1.787478568384977e-05, "loss": 1.7365, "step": 80213 }, { "epoch": 2.67, "grad_norm": 0.7261880040168762, "learning_rate": 1.787123190262254e-05, "loss": 1.6557, "step": 80214 }, { "epoch": 2.67, "grad_norm": 0.7309228777885437, "learning_rate": 1.786767846385807e-05, "loss": 1.691, "step": 80215 }, { "epoch": 2.67, "grad_norm": 0.7221019268035889, "learning_rate": 1.786412536756069e-05, "loss": 1.6727, "step": 80216 }, { "epoch": 2.67, "grad_norm": 0.693999171257019, "learning_rate": 1.7860572613734693e-05, "loss": 1.7309, "step": 80217 }, { "epoch": 2.67, "grad_norm": 0.7126409411430359, "learning_rate": 1.7857020202384374e-05, "loss": 1.6827, "step": 80218 }, { "epoch": 2.67, "grad_norm": 0.7025756239891052, "learning_rate": 1.785346813351407e-05, "loss": 1.6303, "step": 80219 }, { "epoch": 2.67, "grad_norm": 0.693617582321167, "learning_rate": 1.784991640712804e-05, "loss": 1.6384, "step": 80220 }, { "epoch": 2.67, "grad_norm": 0.7274131178855896, "learning_rate": 1.7846365023230712e-05, "loss": 1.6317, "step": 80221 }, { "epoch": 2.67, "grad_norm": 0.6972021460533142, "learning_rate": 1.7842813981826254e-05, "loss": 1.7072, "step": 80222 }, { "epoch": 2.67, "grad_norm": 0.7116000056266785, "learning_rate": 1.7839263282919093e-05, "loss": 1.654, "step": 80223 }, { "epoch": 2.67, "grad_norm": 0.6987219452857971, "learning_rate": 1.783571292651349e-05, "loss": 1.7315, "step": 80224 }, { "epoch": 2.67, "grad_norm": 0.7092841267585754, "learning_rate": 1.7832162912613712e-05, "loss": 1.6269, "step": 80225 }, { "epoch": 2.67, "grad_norm": 0.7071607708930969, "learning_rate": 1.782861324122419e-05, "loss": 1.6945, "step": 80226 }, { "epoch": 2.67, "grad_norm": 0.7201122045516968, "learning_rate": 1.782506391234908e-05, "loss": 1.6867, "step": 80227 }, { "epoch": 2.67, "grad_norm": 0.7236682176589966, "learning_rate": 1.782151492599282e-05, "loss": 1.6701, "step": 80228 }, { "epoch": 2.67, "grad_norm": 0.6928906440734863, "learning_rate": 1.7817966282159636e-05, "loss": 1.6519, "step": 80229 }, { "epoch": 2.67, "grad_norm": 0.7212448716163635, "learning_rate": 1.7814417980853856e-05, "loss": 1.5934, "step": 80230 }, { "epoch": 2.67, "grad_norm": 0.7224757075309753, "learning_rate": 1.7810870022079815e-05, "loss": 1.7051, "step": 80231 }, { "epoch": 2.67, "grad_norm": 0.7270938754081726, "learning_rate": 1.7807322405841774e-05, "loss": 1.6688, "step": 80232 }, { "epoch": 2.67, "grad_norm": 0.6944916844367981, "learning_rate": 1.7803775132144094e-05, "loss": 1.6324, "step": 80233 }, { "epoch": 2.67, "grad_norm": 0.712349534034729, "learning_rate": 1.7800228200991007e-05, "loss": 1.656, "step": 80234 }, { "epoch": 2.67, "grad_norm": 0.72907555103302, "learning_rate": 1.7796681612386908e-05, "loss": 1.6868, "step": 80235 }, { "epoch": 2.67, "grad_norm": 0.7217978835105896, "learning_rate": 1.7793135366336064e-05, "loss": 1.6701, "step": 80236 }, { "epoch": 2.67, "grad_norm": 0.7000287175178528, "learning_rate": 1.7789589462842736e-05, "loss": 1.6974, "step": 80237 }, { "epoch": 2.67, "grad_norm": 0.6972551941871643, "learning_rate": 1.7786043901911283e-05, "loss": 1.6022, "step": 80238 }, { "epoch": 2.67, "grad_norm": 0.6898464560508728, "learning_rate": 1.778249868354601e-05, "loss": 1.6841, "step": 80239 }, { "epoch": 2.67, "grad_norm": 0.692267894744873, "learning_rate": 1.777895380775114e-05, "loss": 1.7072, "step": 80240 }, { "epoch": 2.67, "grad_norm": 0.7244218587875366, "learning_rate": 1.7775409274531073e-05, "loss": 1.6616, "step": 80241 }, { "epoch": 2.67, "grad_norm": 0.7085521221160889, "learning_rate": 1.7771865083890068e-05, "loss": 1.6711, "step": 80242 }, { "epoch": 2.67, "grad_norm": 0.7081199288368225, "learning_rate": 1.7768321235832493e-05, "loss": 1.6806, "step": 80243 }, { "epoch": 2.67, "grad_norm": 0.7278807759284973, "learning_rate": 1.776477773036251e-05, "loss": 1.6749, "step": 80244 }, { "epoch": 2.67, "grad_norm": 0.7151614427566528, "learning_rate": 1.776123456748455e-05, "loss": 1.6884, "step": 80245 }, { "epoch": 2.67, "grad_norm": 0.7114530801773071, "learning_rate": 1.7757691747202874e-05, "loss": 1.706, "step": 80246 }, { "epoch": 2.67, "grad_norm": 0.7272720336914062, "learning_rate": 1.775414926952171e-05, "loss": 1.6717, "step": 80247 }, { "epoch": 2.67, "grad_norm": 0.716288685798645, "learning_rate": 1.7750607134445428e-05, "loss": 1.6822, "step": 80248 }, { "epoch": 2.67, "grad_norm": 0.7446630597114563, "learning_rate": 1.7747065341978418e-05, "loss": 1.7285, "step": 80249 }, { "epoch": 2.67, "grad_norm": 0.7117995023727417, "learning_rate": 1.7743523892124777e-05, "loss": 1.6635, "step": 80250 }, { "epoch": 2.67, "grad_norm": 0.7365631461143494, "learning_rate": 1.773998278488894e-05, "loss": 1.7934, "step": 80251 }, { "epoch": 2.67, "grad_norm": 0.7069038152694702, "learning_rate": 1.7736442020275197e-05, "loss": 1.6414, "step": 80252 }, { "epoch": 2.67, "grad_norm": 0.7378693222999573, "learning_rate": 1.7732901598287818e-05, "loss": 1.7412, "step": 80253 }, { "epoch": 2.67, "grad_norm": 0.7033772468566895, "learning_rate": 1.7729361518931096e-05, "loss": 1.7215, "step": 80254 }, { "epoch": 2.67, "grad_norm": 0.7188907265663147, "learning_rate": 1.7725821782209326e-05, "loss": 1.6786, "step": 80255 }, { "epoch": 2.67, "grad_norm": 0.6979963183403015, "learning_rate": 1.7722282388126873e-05, "loss": 1.6764, "step": 80256 }, { "epoch": 2.67, "grad_norm": 0.7270650863647461, "learning_rate": 1.7718743336687934e-05, "loss": 1.6392, "step": 80257 }, { "epoch": 2.67, "grad_norm": 0.7166037559509277, "learning_rate": 1.7715204627896806e-05, "loss": 1.6134, "step": 80258 }, { "epoch": 2.67, "grad_norm": 0.7019836902618408, "learning_rate": 1.7711666261757886e-05, "loss": 1.6895, "step": 80259 }, { "epoch": 2.67, "grad_norm": 0.6977172493934631, "learning_rate": 1.770812823827543e-05, "loss": 1.6139, "step": 80260 }, { "epoch": 2.67, "grad_norm": 0.6863207221031189, "learning_rate": 1.7704590557453646e-05, "loss": 1.6475, "step": 80261 }, { "epoch": 2.67, "grad_norm": 0.720423698425293, "learning_rate": 1.7701053219296922e-05, "loss": 1.6763, "step": 80262 }, { "epoch": 2.67, "grad_norm": 0.7049174904823303, "learning_rate": 1.7697516223809593e-05, "loss": 1.6322, "step": 80263 }, { "epoch": 2.67, "grad_norm": 0.7328202128410339, "learning_rate": 1.7693979570995785e-05, "loss": 1.7067, "step": 80264 }, { "epoch": 2.67, "grad_norm": 0.7166823148727417, "learning_rate": 1.76904432608599e-05, "loss": 1.6879, "step": 80265 }, { "epoch": 2.67, "grad_norm": 0.7225947976112366, "learning_rate": 1.768690729340626e-05, "loss": 1.7656, "step": 80266 }, { "epoch": 2.67, "grad_norm": 0.691265881061554, "learning_rate": 1.7683371668639133e-05, "loss": 1.6496, "step": 80267 }, { "epoch": 2.67, "grad_norm": 0.710018515586853, "learning_rate": 1.7679836386562752e-05, "loss": 1.6292, "step": 80268 }, { "epoch": 2.67, "grad_norm": 0.7079322338104248, "learning_rate": 1.7676301447181475e-05, "loss": 1.6205, "step": 80269 }, { "epoch": 2.67, "grad_norm": 0.7244492173194885, "learning_rate": 1.7672766850499597e-05, "loss": 1.6681, "step": 80270 }, { "epoch": 2.67, "grad_norm": 0.7052425742149353, "learning_rate": 1.766923259652132e-05, "loss": 1.7327, "step": 80271 }, { "epoch": 2.67, "grad_norm": 0.7043890357017517, "learning_rate": 1.7665698685250973e-05, "loss": 1.6468, "step": 80272 }, { "epoch": 2.67, "grad_norm": 0.7005017995834351, "learning_rate": 1.766216511669295e-05, "loss": 1.6119, "step": 80273 }, { "epoch": 2.67, "grad_norm": 0.715674638748169, "learning_rate": 1.7658631890851448e-05, "loss": 1.743, "step": 80274 }, { "epoch": 2.67, "grad_norm": 0.6896041631698608, "learning_rate": 1.76550990077307e-05, "loss": 1.6645, "step": 80275 }, { "epoch": 2.67, "grad_norm": 0.7263352274894714, "learning_rate": 1.765156646733513e-05, "loss": 1.673, "step": 80276 }, { "epoch": 2.67, "grad_norm": 0.7170844674110413, "learning_rate": 1.764803426966894e-05, "loss": 1.6372, "step": 80277 }, { "epoch": 2.67, "grad_norm": 0.7080816626548767, "learning_rate": 1.7644502414736393e-05, "loss": 1.6958, "step": 80278 }, { "epoch": 2.67, "grad_norm": 0.7412643432617188, "learning_rate": 1.7640970902541883e-05, "loss": 1.7161, "step": 80279 }, { "epoch": 2.67, "grad_norm": 0.7019979357719421, "learning_rate": 1.763743973308954e-05, "loss": 1.6601, "step": 80280 }, { "epoch": 2.67, "grad_norm": 0.7068911194801331, "learning_rate": 1.763390890638383e-05, "loss": 1.6925, "step": 80281 }, { "epoch": 2.67, "grad_norm": 2.3627045154571533, "learning_rate": 1.7630378422428883e-05, "loss": 1.6593, "step": 80282 }, { "epoch": 2.67, "grad_norm": 0.72162926197052, "learning_rate": 1.7626848281229087e-05, "loss": 1.6604, "step": 80283 }, { "epoch": 2.67, "grad_norm": 0.7199006080627441, "learning_rate": 1.7623318482788716e-05, "loss": 1.7335, "step": 80284 }, { "epoch": 2.67, "grad_norm": 0.695492148399353, "learning_rate": 1.7619789027111997e-05, "loss": 1.7271, "step": 80285 }, { "epoch": 2.67, "grad_norm": 0.7027827501296997, "learning_rate": 1.7616259914203257e-05, "loss": 1.7399, "step": 80286 }, { "epoch": 2.67, "grad_norm": 0.6894671320915222, "learning_rate": 1.7612731144066728e-05, "loss": 1.64, "step": 80287 }, { "epoch": 2.67, "grad_norm": 0.7100400924682617, "learning_rate": 1.7609202716706804e-05, "loss": 1.6682, "step": 80288 }, { "epoch": 2.67, "grad_norm": 0.6998057961463928, "learning_rate": 1.7605674632127687e-05, "loss": 1.7404, "step": 80289 }, { "epoch": 2.67, "grad_norm": 0.7051634788513184, "learning_rate": 1.7602146890333635e-05, "loss": 1.6272, "step": 80290 }, { "epoch": 2.67, "grad_norm": 0.7092999815940857, "learning_rate": 1.759861949132898e-05, "loss": 1.6665, "step": 80291 }, { "epoch": 2.67, "grad_norm": 0.7157906889915466, "learning_rate": 1.7595092435117984e-05, "loss": 1.6677, "step": 80292 }, { "epoch": 2.67, "grad_norm": 0.7063924670219421, "learning_rate": 1.759156572170498e-05, "loss": 1.6754, "step": 80293 }, { "epoch": 2.67, "grad_norm": 0.7049211263656616, "learning_rate": 1.758803935109413e-05, "loss": 1.6302, "step": 80294 }, { "epoch": 2.67, "grad_norm": 0.7127041220664978, "learning_rate": 1.7584513323289863e-05, "loss": 1.6881, "step": 80295 }, { "epoch": 2.67, "grad_norm": 0.7451785802841187, "learning_rate": 1.7580987638296373e-05, "loss": 1.6877, "step": 80296 }, { "epoch": 2.67, "grad_norm": 0.7115956544876099, "learning_rate": 1.7577462296117895e-05, "loss": 1.7233, "step": 80297 }, { "epoch": 2.67, "grad_norm": 0.6919659376144409, "learning_rate": 1.7573937296758822e-05, "loss": 1.6727, "step": 80298 }, { "epoch": 2.67, "grad_norm": 0.726581871509552, "learning_rate": 1.7570412640223385e-05, "loss": 1.7456, "step": 80299 }, { "epoch": 2.67, "grad_norm": 0.7290729880332947, "learning_rate": 1.7566888326515782e-05, "loss": 1.7537, "step": 80300 }, { "epoch": 2.67, "grad_norm": 0.7094448208808899, "learning_rate": 1.7563364355640376e-05, "loss": 1.6581, "step": 80301 }, { "epoch": 2.67, "grad_norm": 0.7365370988845825, "learning_rate": 1.755984072760149e-05, "loss": 1.6573, "step": 80302 }, { "epoch": 2.67, "grad_norm": 0.6992126107215881, "learning_rate": 1.7556317442403333e-05, "loss": 1.6458, "step": 80303 }, { "epoch": 2.67, "grad_norm": 0.7127910256385803, "learning_rate": 1.7552794500050128e-05, "loss": 1.7114, "step": 80304 }, { "epoch": 2.67, "grad_norm": 0.6905059218406677, "learning_rate": 1.754927190054627e-05, "loss": 1.678, "step": 80305 }, { "epoch": 2.67, "grad_norm": 0.7029478549957275, "learning_rate": 1.754574964389599e-05, "loss": 1.6506, "step": 80306 }, { "epoch": 2.67, "grad_norm": 0.7304188013076782, "learning_rate": 1.7542227730103488e-05, "loss": 1.6346, "step": 80307 }, { "epoch": 2.67, "grad_norm": 0.6990250945091248, "learning_rate": 1.7538706159173122e-05, "loss": 1.7433, "step": 80308 }, { "epoch": 2.67, "grad_norm": 0.733342707157135, "learning_rate": 1.7535184931109225e-05, "loss": 1.6927, "step": 80309 }, { "epoch": 2.67, "grad_norm": 0.7200469374656677, "learning_rate": 1.7531664045915893e-05, "loss": 1.7718, "step": 80310 }, { "epoch": 2.67, "grad_norm": 0.7052182555198669, "learning_rate": 1.7528143503597523e-05, "loss": 1.643, "step": 80311 }, { "epoch": 2.67, "grad_norm": 0.7032800316810608, "learning_rate": 1.7524623304158413e-05, "loss": 1.7144, "step": 80312 }, { "epoch": 2.67, "grad_norm": 0.7342535853385925, "learning_rate": 1.7521103447602793e-05, "loss": 1.6813, "step": 80313 }, { "epoch": 2.67, "grad_norm": 0.688280463218689, "learning_rate": 1.7517583933934855e-05, "loss": 1.6022, "step": 80314 }, { "epoch": 2.67, "grad_norm": 0.7138303518295288, "learning_rate": 1.7514064763158963e-05, "loss": 1.6968, "step": 80315 }, { "epoch": 2.67, "grad_norm": 0.7228941917419434, "learning_rate": 1.7510545935279487e-05, "loss": 1.7244, "step": 80316 }, { "epoch": 2.67, "grad_norm": 0.7019092440605164, "learning_rate": 1.7507027450300483e-05, "loss": 1.715, "step": 80317 }, { "epoch": 2.67, "grad_norm": 0.7300862669944763, "learning_rate": 1.7503509308226315e-05, "loss": 1.6292, "step": 80318 }, { "epoch": 2.67, "grad_norm": 0.7180272340774536, "learning_rate": 1.7499991509061316e-05, "loss": 1.6505, "step": 80319 }, { "epoch": 2.67, "grad_norm": 0.7146950364112854, "learning_rate": 1.7496474052809685e-05, "loss": 1.6737, "step": 80320 }, { "epoch": 2.67, "grad_norm": 0.7261388897895813, "learning_rate": 1.7492956939475677e-05, "loss": 1.7181, "step": 80321 }, { "epoch": 2.67, "grad_norm": 0.6959289312362671, "learning_rate": 1.7489440169063597e-05, "loss": 1.6738, "step": 80322 }, { "epoch": 2.67, "grad_norm": 0.7174027562141418, "learning_rate": 1.7485923741577766e-05, "loss": 1.7216, "step": 80323 }, { "epoch": 2.67, "grad_norm": 0.7107502222061157, "learning_rate": 1.748240765702239e-05, "loss": 1.7245, "step": 80324 }, { "epoch": 2.67, "grad_norm": 0.7063995003700256, "learning_rate": 1.7478891915401694e-05, "loss": 1.6913, "step": 80325 }, { "epoch": 2.67, "grad_norm": 0.7052074670791626, "learning_rate": 1.7475376516720042e-05, "loss": 1.6476, "step": 80326 }, { "epoch": 2.67, "grad_norm": 0.6998512148857117, "learning_rate": 1.747186146098163e-05, "loss": 1.7134, "step": 80327 }, { "epoch": 2.67, "grad_norm": 0.7051541209220886, "learning_rate": 1.7468346748190754e-05, "loss": 1.6484, "step": 80328 }, { "epoch": 2.67, "grad_norm": 0.6954556107521057, "learning_rate": 1.7464832378351677e-05, "loss": 1.6628, "step": 80329 }, { "epoch": 2.67, "grad_norm": 0.7288627028465271, "learning_rate": 1.7461318351468635e-05, "loss": 1.7012, "step": 80330 }, { "epoch": 2.67, "grad_norm": 0.7195608019828796, "learning_rate": 1.7457804667545983e-05, "loss": 1.673, "step": 80331 }, { "epoch": 2.67, "grad_norm": 0.7245758175849915, "learning_rate": 1.7454291326587856e-05, "loss": 1.6933, "step": 80332 }, { "epoch": 2.67, "grad_norm": 0.7098681330680847, "learning_rate": 1.7450778328598648e-05, "loss": 1.7047, "step": 80333 }, { "epoch": 2.67, "grad_norm": 0.7069565653800964, "learning_rate": 1.7447265673582557e-05, "loss": 1.6612, "step": 80334 }, { "epoch": 2.67, "grad_norm": 0.7207775712013245, "learning_rate": 1.744375336154381e-05, "loss": 1.7507, "step": 80335 }, { "epoch": 2.67, "grad_norm": 0.7195048928260803, "learning_rate": 1.7440241392486775e-05, "loss": 1.7171, "step": 80336 }, { "epoch": 2.67, "grad_norm": 0.7235982418060303, "learning_rate": 1.743672976641558e-05, "loss": 1.7459, "step": 80337 }, { "epoch": 2.67, "grad_norm": 0.7304635643959045, "learning_rate": 1.743321848333462e-05, "loss": 1.7093, "step": 80338 }, { "epoch": 2.67, "grad_norm": 0.7137112617492676, "learning_rate": 1.7429707543248094e-05, "loss": 1.6739, "step": 80339 }, { "epoch": 2.67, "grad_norm": 0.7175686955451965, "learning_rate": 1.7426196946160232e-05, "loss": 1.632, "step": 80340 }, { "epoch": 2.67, "grad_norm": 0.7128119468688965, "learning_rate": 1.742268669207536e-05, "loss": 1.6826, "step": 80341 }, { "epoch": 2.67, "grad_norm": 0.7236306071281433, "learning_rate": 1.7419176780997678e-05, "loss": 1.6926, "step": 80342 }, { "epoch": 2.67, "grad_norm": 0.7053664326667786, "learning_rate": 1.741566721293155e-05, "loss": 1.6343, "step": 80343 }, { "epoch": 2.67, "grad_norm": 0.7353330850601196, "learning_rate": 1.741215798788107e-05, "loss": 1.6703, "step": 80344 }, { "epoch": 2.67, "grad_norm": 0.7503330111503601, "learning_rate": 1.740864910585067e-05, "loss": 1.6876, "step": 80345 }, { "epoch": 2.67, "grad_norm": 0.7161834239959717, "learning_rate": 1.740514056684451e-05, "loss": 1.7007, "step": 80346 }, { "epoch": 2.67, "grad_norm": 0.7076051235198975, "learning_rate": 1.7401632370866858e-05, "loss": 1.6537, "step": 80347 }, { "epoch": 2.67, "grad_norm": 0.7231655716896057, "learning_rate": 1.7398124517922007e-05, "loss": 1.6108, "step": 80348 }, { "epoch": 2.67, "grad_norm": 0.7094350457191467, "learning_rate": 1.7394617008014156e-05, "loss": 1.7176, "step": 80349 }, { "epoch": 2.67, "grad_norm": 0.6979300379753113, "learning_rate": 1.7391109841147632e-05, "loss": 1.6672, "step": 80350 }, { "epoch": 2.67, "grad_norm": 0.6903558969497681, "learning_rate": 1.7387603017326636e-05, "loss": 1.6432, "step": 80351 }, { "epoch": 2.67, "grad_norm": 0.7095617055892944, "learning_rate": 1.738409653655549e-05, "loss": 1.6809, "step": 80352 }, { "epoch": 2.67, "grad_norm": 0.7017545104026794, "learning_rate": 1.73805903988384e-05, "loss": 1.6626, "step": 80353 }, { "epoch": 2.67, "grad_norm": 0.7047533988952637, "learning_rate": 1.7377084604179592e-05, "loss": 1.6951, "step": 80354 }, { "epoch": 2.67, "grad_norm": 0.7074447274208069, "learning_rate": 1.7373579152583395e-05, "loss": 1.6709, "step": 80355 }, { "epoch": 2.67, "grad_norm": 0.7118510007858276, "learning_rate": 1.737007404405404e-05, "loss": 1.6792, "step": 80356 }, { "epoch": 2.67, "grad_norm": 0.6974104046821594, "learning_rate": 1.7366569278595722e-05, "loss": 1.6876, "step": 80357 }, { "epoch": 2.67, "grad_norm": 0.7418967485427856, "learning_rate": 1.736306485621274e-05, "loss": 1.7154, "step": 80358 }, { "epoch": 2.67, "grad_norm": 0.7041414976119995, "learning_rate": 1.735956077690942e-05, "loss": 1.7399, "step": 80359 }, { "epoch": 2.67, "grad_norm": 0.7325404286384583, "learning_rate": 1.7356057040689932e-05, "loss": 1.7072, "step": 80360 }, { "epoch": 2.67, "grad_norm": 0.7354822754859924, "learning_rate": 1.73525536475585e-05, "loss": 1.6921, "step": 80361 }, { "epoch": 2.67, "grad_norm": 0.7431498765945435, "learning_rate": 1.7349050597519453e-05, "loss": 1.6546, "step": 80362 }, { "epoch": 2.67, "grad_norm": 0.7448566555976868, "learning_rate": 1.7345547890577028e-05, "loss": 1.6541, "step": 80363 }, { "epoch": 2.67, "grad_norm": 0.7227210998535156, "learning_rate": 1.7342045526735415e-05, "loss": 1.6368, "step": 80364 }, { "epoch": 2.67, "grad_norm": 0.7042645812034607, "learning_rate": 1.7338543505998913e-05, "loss": 1.7306, "step": 80365 }, { "epoch": 2.67, "grad_norm": 0.7237457036972046, "learning_rate": 1.7335041828371854e-05, "loss": 1.697, "step": 80366 }, { "epoch": 2.67, "grad_norm": 0.6976544260978699, "learning_rate": 1.7331540493858333e-05, "loss": 1.6509, "step": 80367 }, { "epoch": 2.67, "grad_norm": 0.7120028734207153, "learning_rate": 1.7328039502462676e-05, "loss": 1.647, "step": 80368 }, { "epoch": 2.67, "grad_norm": 0.6819430589675903, "learning_rate": 1.7324538854189153e-05, "loss": 1.6447, "step": 80369 }, { "epoch": 2.67, "grad_norm": 0.7152298092842102, "learning_rate": 1.732103854904199e-05, "loss": 1.7317, "step": 80370 }, { "epoch": 2.67, "grad_norm": 0.6906082630157471, "learning_rate": 1.731753858702538e-05, "loss": 1.6726, "step": 80371 }, { "epoch": 2.67, "grad_norm": 0.7358176708221436, "learning_rate": 1.731403896814366e-05, "loss": 1.7716, "step": 80372 }, { "epoch": 2.67, "grad_norm": 0.6993492841720581, "learning_rate": 1.7310539692401092e-05, "loss": 1.6972, "step": 80373 }, { "epoch": 2.67, "grad_norm": 0.7085466384887695, "learning_rate": 1.7307040759801837e-05, "loss": 1.7119, "step": 80374 }, { "epoch": 2.67, "grad_norm": 0.7152920365333557, "learning_rate": 1.730354217035016e-05, "loss": 1.6383, "step": 80375 }, { "epoch": 2.67, "grad_norm": 0.7150601148605347, "learning_rate": 1.7300043924050354e-05, "loss": 1.6783, "step": 80376 }, { "epoch": 2.67, "grad_norm": 0.6910565495491028, "learning_rate": 1.7296546020906656e-05, "loss": 1.6785, "step": 80377 }, { "epoch": 2.67, "grad_norm": 0.6795830726623535, "learning_rate": 1.7293048460923254e-05, "loss": 1.6364, "step": 80378 }, { "epoch": 2.67, "grad_norm": 0.6995411515235901, "learning_rate": 1.728955124410445e-05, "loss": 1.76, "step": 80379 }, { "epoch": 2.67, "grad_norm": 0.6946852803230286, "learning_rate": 1.728605437045454e-05, "loss": 1.6779, "step": 80380 }, { "epoch": 2.67, "grad_norm": 0.7127872109413147, "learning_rate": 1.728255783997762e-05, "loss": 1.7655, "step": 80381 }, { "epoch": 2.67, "grad_norm": 0.7093886137008667, "learning_rate": 1.7279061652678017e-05, "loss": 1.6618, "step": 80382 }, { "epoch": 2.67, "grad_norm": 0.7067014575004578, "learning_rate": 1.7275565808560032e-05, "loss": 1.7061, "step": 80383 }, { "epoch": 2.67, "grad_norm": 0.7362141013145447, "learning_rate": 1.727207030762786e-05, "loss": 1.5625, "step": 80384 }, { "epoch": 2.67, "grad_norm": 0.6944751143455505, "learning_rate": 1.7268575149885666e-05, "loss": 1.6579, "step": 80385 }, { "epoch": 2.67, "grad_norm": 0.6927372813224792, "learning_rate": 1.726508033533781e-05, "loss": 1.6124, "step": 80386 }, { "epoch": 2.67, "grad_norm": 0.7149730920791626, "learning_rate": 1.7261585863988524e-05, "loss": 1.6307, "step": 80387 }, { "epoch": 2.67, "grad_norm": 0.7070774435997009, "learning_rate": 1.7258091735841938e-05, "loss": 1.8251, "step": 80388 }, { "epoch": 2.67, "grad_norm": 0.7180251479148865, "learning_rate": 1.7254597950902383e-05, "loss": 1.7128, "step": 80389 }, { "epoch": 2.67, "grad_norm": 0.7165015935897827, "learning_rate": 1.725110450917412e-05, "loss": 1.6767, "step": 80390 }, { "epoch": 2.67, "grad_norm": 0.6833666563034058, "learning_rate": 1.7247611410661343e-05, "loss": 1.6751, "step": 80391 }, { "epoch": 2.67, "grad_norm": 0.7350643277168274, "learning_rate": 1.724411865536829e-05, "loss": 1.6654, "step": 80392 }, { "epoch": 2.67, "grad_norm": 0.728542149066925, "learning_rate": 1.7240626243299216e-05, "loss": 1.6491, "step": 80393 }, { "epoch": 2.67, "grad_norm": 0.700518012046814, "learning_rate": 1.723713417445839e-05, "loss": 1.6567, "step": 80394 }, { "epoch": 2.67, "grad_norm": 0.7078852653503418, "learning_rate": 1.723364244884997e-05, "loss": 1.6656, "step": 80395 }, { "epoch": 2.67, "grad_norm": 0.6973628997802734, "learning_rate": 1.7230151066478292e-05, "loss": 1.6727, "step": 80396 }, { "epoch": 2.67, "grad_norm": 0.6873235106468201, "learning_rate": 1.7226660027347515e-05, "loss": 1.6433, "step": 80397 }, { "epoch": 2.67, "grad_norm": 0.7445987462997437, "learning_rate": 1.722316933146194e-05, "loss": 1.7152, "step": 80398 }, { "epoch": 2.67, "grad_norm": 0.7041541934013367, "learning_rate": 1.7219678978825724e-05, "loss": 1.7331, "step": 80399 }, { "epoch": 2.67, "grad_norm": 0.745536744594574, "learning_rate": 1.7216188969443167e-05, "loss": 1.6881, "step": 80400 }, { "epoch": 2.67, "grad_norm": 0.7290353775024414, "learning_rate": 1.7212699303318534e-05, "loss": 1.6954, "step": 80401 }, { "epoch": 2.67, "grad_norm": 0.7280749678611755, "learning_rate": 1.720920998045595e-05, "loss": 1.7577, "step": 80402 }, { "epoch": 2.68, "grad_norm": 0.7012742757797241, "learning_rate": 1.7205721000859784e-05, "loss": 1.6797, "step": 80403 }, { "epoch": 2.68, "grad_norm": 0.6985912919044495, "learning_rate": 1.720223236453413e-05, "loss": 1.6184, "step": 80404 }, { "epoch": 2.68, "grad_norm": 0.71692955493927, "learning_rate": 1.7198744071483383e-05, "loss": 1.7261, "step": 80405 }, { "epoch": 2.68, "grad_norm": 0.7074623703956604, "learning_rate": 1.7195256121711676e-05, "loss": 1.6915, "step": 80406 }, { "epoch": 2.68, "grad_norm": 0.6907037496566772, "learning_rate": 1.7191768515223202e-05, "loss": 1.6716, "step": 80407 }, { "epoch": 2.68, "grad_norm": 0.7046335935592651, "learning_rate": 1.7188281252022294e-05, "loss": 1.6522, "step": 80408 }, { "epoch": 2.68, "grad_norm": 0.6945822834968567, "learning_rate": 1.7184794332113117e-05, "loss": 1.62, "step": 80409 }, { "epoch": 2.68, "grad_norm": 0.7159396409988403, "learning_rate": 1.7181307755499928e-05, "loss": 1.7357, "step": 80410 }, { "epoch": 2.68, "grad_norm": 0.7129032015800476, "learning_rate": 1.7177821522186963e-05, "loss": 1.6741, "step": 80411 }, { "epoch": 2.68, "grad_norm": 0.7022618055343628, "learning_rate": 1.717433563217848e-05, "loss": 1.7035, "step": 80412 }, { "epoch": 2.68, "grad_norm": 0.7073158025741577, "learning_rate": 1.7170850085478684e-05, "loss": 1.6949, "step": 80413 }, { "epoch": 2.68, "grad_norm": 0.7110005617141724, "learning_rate": 1.7167364882091762e-05, "loss": 1.7126, "step": 80414 }, { "epoch": 2.68, "grad_norm": 0.7169092893600464, "learning_rate": 1.7163880022022015e-05, "loss": 1.7341, "step": 80415 }, { "epoch": 2.68, "grad_norm": 0.7178389430046082, "learning_rate": 1.716039550527367e-05, "loss": 1.6725, "step": 80416 }, { "epoch": 2.68, "grad_norm": 0.6954235434532166, "learning_rate": 1.7156911331850865e-05, "loss": 1.6102, "step": 80417 }, { "epoch": 2.68, "grad_norm": 0.7247461676597595, "learning_rate": 1.7153427501757887e-05, "loss": 1.7413, "step": 80418 }, { "epoch": 2.68, "grad_norm": 0.7400600910186768, "learning_rate": 1.714994401499904e-05, "loss": 1.7106, "step": 80419 }, { "epoch": 2.68, "grad_norm": 0.7207688093185425, "learning_rate": 1.714646087157848e-05, "loss": 1.7093, "step": 80420 }, { "epoch": 2.68, "grad_norm": 0.7618910670280457, "learning_rate": 1.714297807150038e-05, "loss": 1.7253, "step": 80421 }, { "epoch": 2.68, "grad_norm": 0.7194664478302002, "learning_rate": 1.7139495614769094e-05, "loss": 1.7172, "step": 80422 }, { "epoch": 2.68, "grad_norm": 0.7135637998580933, "learning_rate": 1.7136013501388758e-05, "loss": 1.6602, "step": 80423 }, { "epoch": 2.68, "grad_norm": 0.695199728012085, "learning_rate": 1.7132531731363596e-05, "loss": 1.6672, "step": 80424 }, { "epoch": 2.68, "grad_norm": 0.7249673008918762, "learning_rate": 1.7129050304697844e-05, "loss": 1.7629, "step": 80425 }, { "epoch": 2.68, "grad_norm": 0.7267393469810486, "learning_rate": 1.7125569221395862e-05, "loss": 1.6515, "step": 80426 }, { "epoch": 2.68, "grad_norm": 0.7351000308990479, "learning_rate": 1.712208848146165e-05, "loss": 1.6865, "step": 80427 }, { "epoch": 2.68, "grad_norm": 0.7002197504043579, "learning_rate": 1.7118608084899533e-05, "loss": 1.6272, "step": 80428 }, { "epoch": 2.68, "grad_norm": 0.7414207458496094, "learning_rate": 1.711512803171381e-05, "loss": 1.6715, "step": 80429 }, { "epoch": 2.68, "grad_norm": 0.7124294638633728, "learning_rate": 1.7111648321908643e-05, "loss": 1.6999, "step": 80430 }, { "epoch": 2.68, "grad_norm": 0.7070115804672241, "learning_rate": 1.71081689554882e-05, "loss": 1.6864, "step": 80431 }, { "epoch": 2.68, "grad_norm": 0.7098491787910461, "learning_rate": 1.710468993245677e-05, "loss": 1.6964, "step": 80432 }, { "epoch": 2.68, "grad_norm": 0.6881948113441467, "learning_rate": 1.7101211252818625e-05, "loss": 1.6763, "step": 80433 }, { "epoch": 2.68, "grad_norm": 0.7139238119125366, "learning_rate": 1.7097732916577854e-05, "loss": 1.6561, "step": 80434 }, { "epoch": 2.68, "grad_norm": 0.7134812474250793, "learning_rate": 1.709425492373876e-05, "loss": 1.6965, "step": 80435 }, { "epoch": 2.68, "grad_norm": 0.699928879737854, "learning_rate": 1.70907772743056e-05, "loss": 1.6826, "step": 80436 }, { "epoch": 2.68, "grad_norm": 0.6916432976722717, "learning_rate": 1.708729996828254e-05, "loss": 1.6684, "step": 80437 }, { "epoch": 2.68, "grad_norm": 0.755750298500061, "learning_rate": 1.708382300567378e-05, "loss": 1.6472, "step": 80438 }, { "epoch": 2.68, "grad_norm": 0.716373860836029, "learning_rate": 1.7080346386483545e-05, "loss": 1.6571, "step": 80439 }, { "epoch": 2.68, "grad_norm": 0.7371478080749512, "learning_rate": 1.7076870110716168e-05, "loss": 1.7071, "step": 80440 }, { "epoch": 2.68, "grad_norm": 0.7433487772941589, "learning_rate": 1.7073394178375742e-05, "loss": 1.6271, "step": 80441 }, { "epoch": 2.68, "grad_norm": 0.6855799555778503, "learning_rate": 1.7069918589466535e-05, "loss": 1.6578, "step": 80442 }, { "epoch": 2.68, "grad_norm": 0.6828287839889526, "learning_rate": 1.7066443343992776e-05, "loss": 1.6094, "step": 80443 }, { "epoch": 2.68, "grad_norm": 0.7340145707130432, "learning_rate": 1.706296844195866e-05, "loss": 1.7124, "step": 80444 }, { "epoch": 2.68, "grad_norm": 0.70860755443573, "learning_rate": 1.705949388336838e-05, "loss": 1.6433, "step": 80445 }, { "epoch": 2.68, "grad_norm": 0.7365174293518066, "learning_rate": 1.7056019668226206e-05, "loss": 1.6259, "step": 80446 }, { "epoch": 2.68, "grad_norm": 0.7285210490226746, "learning_rate": 1.70525457965363e-05, "loss": 1.8065, "step": 80447 }, { "epoch": 2.68, "grad_norm": 0.7186701893806458, "learning_rate": 1.7049072268302954e-05, "loss": 1.7297, "step": 80448 }, { "epoch": 2.68, "grad_norm": 0.7340267896652222, "learning_rate": 1.7045599083530336e-05, "loss": 1.6395, "step": 80449 }, { "epoch": 2.68, "grad_norm": 0.6977381706237793, "learning_rate": 1.7042126242222676e-05, "loss": 1.6626, "step": 80450 }, { "epoch": 2.68, "grad_norm": 0.695855975151062, "learning_rate": 1.70386537443842e-05, "loss": 1.6314, "step": 80451 }, { "epoch": 2.68, "grad_norm": 0.722848653793335, "learning_rate": 1.703518159001904e-05, "loss": 1.6138, "step": 80452 }, { "epoch": 2.68, "grad_norm": 0.7098501920700073, "learning_rate": 1.703170977913153e-05, "loss": 1.6731, "step": 80453 }, { "epoch": 2.68, "grad_norm": 0.7094094157218933, "learning_rate": 1.7028238311725795e-05, "loss": 1.6379, "step": 80454 }, { "epoch": 2.68, "grad_norm": 0.7093229293823242, "learning_rate": 1.7024767187806134e-05, "loss": 1.6699, "step": 80455 }, { "epoch": 2.68, "grad_norm": 0.7116471529006958, "learning_rate": 1.7021296407376706e-05, "loss": 1.6539, "step": 80456 }, { "epoch": 2.68, "grad_norm": 0.7225084900856018, "learning_rate": 1.701782597044168e-05, "loss": 1.7254, "step": 80457 }, { "epoch": 2.68, "grad_norm": 0.7029770016670227, "learning_rate": 1.7014355877005348e-05, "loss": 1.7244, "step": 80458 }, { "epoch": 2.68, "grad_norm": 0.6979701519012451, "learning_rate": 1.7010886127071878e-05, "loss": 1.697, "step": 80459 }, { "epoch": 2.68, "grad_norm": 0.7174481749534607, "learning_rate": 1.7007416720645528e-05, "loss": 1.7556, "step": 80460 }, { "epoch": 2.68, "grad_norm": 0.7303155064582825, "learning_rate": 1.700394765773043e-05, "loss": 1.735, "step": 80461 }, { "epoch": 2.68, "grad_norm": 0.6838569045066833, "learning_rate": 1.7000478938330886e-05, "loss": 1.6632, "step": 80462 }, { "epoch": 2.68, "grad_norm": 0.7184274196624756, "learning_rate": 1.6997010562451054e-05, "loss": 1.7083, "step": 80463 }, { "epoch": 2.68, "grad_norm": 0.700131893157959, "learning_rate": 1.6993542530095127e-05, "loss": 1.6863, "step": 80464 }, { "epoch": 2.68, "grad_norm": 0.6938809156417847, "learning_rate": 1.6990074841267374e-05, "loss": 1.6967, "step": 80465 }, { "epoch": 2.68, "grad_norm": 0.7072001695632935, "learning_rate": 1.6986607495971926e-05, "loss": 1.7593, "step": 80466 }, { "epoch": 2.68, "grad_norm": 0.7200192809104919, "learning_rate": 1.6983140494213075e-05, "loss": 1.7468, "step": 80467 }, { "epoch": 2.68, "grad_norm": 0.7192364931106567, "learning_rate": 1.6979673835994955e-05, "loss": 1.6635, "step": 80468 }, { "epoch": 2.68, "grad_norm": 0.6980771422386169, "learning_rate": 1.697620752132186e-05, "loss": 1.678, "step": 80469 }, { "epoch": 2.68, "grad_norm": 0.6854059100151062, "learning_rate": 1.6972741550197955e-05, "loss": 1.6846, "step": 80470 }, { "epoch": 2.68, "grad_norm": 0.6980001926422119, "learning_rate": 1.6969275922627367e-05, "loss": 1.655, "step": 80471 }, { "epoch": 2.68, "grad_norm": 0.724544107913971, "learning_rate": 1.696581063861443e-05, "loss": 1.6854, "step": 80472 }, { "epoch": 2.68, "grad_norm": 0.6948053240776062, "learning_rate": 1.6962345698163303e-05, "loss": 1.6067, "step": 80473 }, { "epoch": 2.68, "grad_norm": 0.7219597697257996, "learning_rate": 1.6958881101278153e-05, "loss": 1.7414, "step": 80474 }, { "epoch": 2.68, "grad_norm": 0.6802453398704529, "learning_rate": 1.6955416847963177e-05, "loss": 1.6365, "step": 80475 }, { "epoch": 2.68, "grad_norm": 0.7298173904418945, "learning_rate": 1.69519529382227e-05, "loss": 1.6651, "step": 80476 }, { "epoch": 2.68, "grad_norm": 0.6896423697471619, "learning_rate": 1.6948489372060827e-05, "loss": 1.7133, "step": 80477 }, { "epoch": 2.68, "grad_norm": 0.6961411833763123, "learning_rate": 1.6945026149481744e-05, "loss": 1.6344, "step": 80478 }, { "epoch": 2.68, "grad_norm": 0.7104504704475403, "learning_rate": 1.6941563270489756e-05, "loss": 1.636, "step": 80479 }, { "epoch": 2.68, "grad_norm": 0.7002472281455994, "learning_rate": 1.6938100735088988e-05, "loss": 1.6939, "step": 80480 }, { "epoch": 2.68, "grad_norm": 0.7363334894180298, "learning_rate": 1.6934638543283608e-05, "loss": 1.6643, "step": 80481 }, { "epoch": 2.68, "grad_norm": 0.6979532837867737, "learning_rate": 1.6931176695077875e-05, "loss": 1.5934, "step": 80482 }, { "epoch": 2.68, "grad_norm": 0.7137094736099243, "learning_rate": 1.692771519047609e-05, "loss": 1.6874, "step": 80483 }, { "epoch": 2.68, "grad_norm": 0.6819421648979187, "learning_rate": 1.6924254029482243e-05, "loss": 1.6907, "step": 80484 }, { "epoch": 2.68, "grad_norm": 0.7411399483680725, "learning_rate": 1.6920793212100638e-05, "loss": 1.7975, "step": 80485 }, { "epoch": 2.68, "grad_norm": 0.7150076627731323, "learning_rate": 1.6917332738335532e-05, "loss": 1.7198, "step": 80486 }, { "epoch": 2.68, "grad_norm": 0.7326099276542664, "learning_rate": 1.691387260819106e-05, "loss": 1.6703, "step": 80487 }, { "epoch": 2.68, "grad_norm": 0.7105523943901062, "learning_rate": 1.6910412821671415e-05, "loss": 1.6213, "step": 80488 }, { "epoch": 2.68, "grad_norm": 0.7336357235908508, "learning_rate": 1.6906953378780795e-05, "loss": 1.6476, "step": 80489 }, { "epoch": 2.68, "grad_norm": 0.7125729918479919, "learning_rate": 1.6903494279523534e-05, "loss": 1.6209, "step": 80490 }, { "epoch": 2.68, "grad_norm": 0.7138834595680237, "learning_rate": 1.690003552390362e-05, "loss": 1.7074, "step": 80491 }, { "epoch": 2.68, "grad_norm": 0.6959722638130188, "learning_rate": 1.689657711192536e-05, "loss": 1.6749, "step": 80492 }, { "epoch": 2.68, "grad_norm": 0.7122381329536438, "learning_rate": 1.6893119043592972e-05, "loss": 1.6226, "step": 80493 }, { "epoch": 2.68, "grad_norm": 0.7176087498664856, "learning_rate": 1.6889661318910597e-05, "loss": 1.6341, "step": 80494 }, { "epoch": 2.68, "grad_norm": 0.7067739963531494, "learning_rate": 1.6886203937882458e-05, "loss": 1.6857, "step": 80495 }, { "epoch": 2.68, "grad_norm": 0.7132052183151245, "learning_rate": 1.6882746900512723e-05, "loss": 1.6615, "step": 80496 }, { "epoch": 2.68, "grad_norm": 0.6976602077484131, "learning_rate": 1.6879290206805753e-05, "loss": 1.7082, "step": 80497 }, { "epoch": 2.68, "grad_norm": 0.6913323998451233, "learning_rate": 1.6875833856765473e-05, "loss": 1.7101, "step": 80498 }, { "epoch": 2.68, "grad_norm": 0.7261435985565186, "learning_rate": 1.6872377850396222e-05, "loss": 1.6747, "step": 80499 }, { "epoch": 2.68, "grad_norm": 0.7259711623191833, "learning_rate": 1.686892218770226e-05, "loss": 1.6836, "step": 80500 }, { "epoch": 2.68, "grad_norm": 0.7117131948471069, "learning_rate": 1.6865466868687683e-05, "loss": 1.6723, "step": 80501 }, { "epoch": 2.68, "grad_norm": 0.7071863412857056, "learning_rate": 1.6862011893356653e-05, "loss": 1.6401, "step": 80502 }, { "epoch": 2.68, "grad_norm": 0.7015260457992554, "learning_rate": 1.68585572617135e-05, "loss": 1.69, "step": 80503 }, { "epoch": 2.68, "grad_norm": 0.709696352481842, "learning_rate": 1.6855102973762323e-05, "loss": 1.7289, "step": 80504 }, { "epoch": 2.68, "grad_norm": 0.7322447896003723, "learning_rate": 1.6851649029507284e-05, "loss": 1.7247, "step": 80505 }, { "epoch": 2.68, "grad_norm": 0.6965180039405823, "learning_rate": 1.684819542895265e-05, "loss": 1.6612, "step": 80506 }, { "epoch": 2.68, "grad_norm": 0.7221482992172241, "learning_rate": 1.684474217210261e-05, "loss": 1.6858, "step": 80507 }, { "epoch": 2.68, "grad_norm": 0.6851869821548462, "learning_rate": 1.6841289258961334e-05, "loss": 1.6041, "step": 80508 }, { "epoch": 2.68, "grad_norm": 0.7029690146446228, "learning_rate": 1.6837836689532946e-05, "loss": 1.6748, "step": 80509 }, { "epoch": 2.68, "grad_norm": 0.7161853313446045, "learning_rate": 1.6834384463821783e-05, "loss": 1.6043, "step": 80510 }, { "epoch": 2.68, "grad_norm": 0.7037765383720398, "learning_rate": 1.6830932581831935e-05, "loss": 1.7113, "step": 80511 }, { "epoch": 2.68, "grad_norm": 0.7303876280784607, "learning_rate": 1.682748104356757e-05, "loss": 1.7331, "step": 80512 }, { "epoch": 2.68, "grad_norm": 0.7030342221260071, "learning_rate": 1.682402984903295e-05, "loss": 1.6865, "step": 80513 }, { "epoch": 2.68, "grad_norm": 0.7308176159858704, "learning_rate": 1.6820578998232204e-05, "loss": 1.7584, "step": 80514 }, { "epoch": 2.68, "grad_norm": 0.6802253127098083, "learning_rate": 1.6817128491169597e-05, "loss": 1.7456, "step": 80515 }, { "epoch": 2.68, "grad_norm": 0.6740392446517944, "learning_rate": 1.6813678327849223e-05, "loss": 1.6451, "step": 80516 }, { "epoch": 2.68, "grad_norm": 0.7235413193702698, "learning_rate": 1.681022850827538e-05, "loss": 1.6991, "step": 80517 }, { "epoch": 2.68, "grad_norm": 0.7026440501213074, "learning_rate": 1.6806779032452166e-05, "loss": 1.6049, "step": 80518 }, { "epoch": 2.68, "grad_norm": 0.7076051235198975, "learning_rate": 1.6803329900383776e-05, "loss": 1.6356, "step": 80519 }, { "epoch": 2.68, "grad_norm": 0.7261836528778076, "learning_rate": 1.6799881112074442e-05, "loss": 1.5842, "step": 80520 }, { "epoch": 2.68, "grad_norm": 0.7135137319564819, "learning_rate": 1.6796432667528293e-05, "loss": 1.6322, "step": 80521 }, { "epoch": 2.68, "grad_norm": 0.7092809081077576, "learning_rate": 1.6792984566749558e-05, "loss": 1.6718, "step": 80522 }, { "epoch": 2.68, "grad_norm": 0.7081312537193298, "learning_rate": 1.678953680974243e-05, "loss": 1.6387, "step": 80523 }, { "epoch": 2.68, "grad_norm": 0.7177632451057434, "learning_rate": 1.678608939651105e-05, "loss": 1.6501, "step": 80524 }, { "epoch": 2.68, "grad_norm": 0.7088736295700073, "learning_rate": 1.678264232705967e-05, "loss": 1.6994, "step": 80525 }, { "epoch": 2.68, "grad_norm": 0.7139434814453125, "learning_rate": 1.677919560139236e-05, "loss": 1.6817, "step": 80526 }, { "epoch": 2.68, "grad_norm": 0.7169337272644043, "learning_rate": 1.6775749219513447e-05, "loss": 1.7093, "step": 80527 }, { "epoch": 2.68, "grad_norm": 0.7201082706451416, "learning_rate": 1.6772303181426993e-05, "loss": 1.6812, "step": 80528 }, { "epoch": 2.68, "grad_norm": 0.6910700798034668, "learning_rate": 1.6768857487137268e-05, "loss": 1.6384, "step": 80529 }, { "epoch": 2.68, "grad_norm": 0.7151802778244019, "learning_rate": 1.6765412136648427e-05, "loss": 1.6017, "step": 80530 }, { "epoch": 2.68, "grad_norm": 0.7072743773460388, "learning_rate": 1.6761967129964605e-05, "loss": 1.716, "step": 80531 }, { "epoch": 2.68, "grad_norm": 0.7079910635948181, "learning_rate": 1.675852246709003e-05, "loss": 1.7098, "step": 80532 }, { "epoch": 2.68, "grad_norm": 0.7120591402053833, "learning_rate": 1.67550781480289e-05, "loss": 1.6715, "step": 80533 }, { "epoch": 2.68, "grad_norm": 0.7361353039741516, "learning_rate": 1.6751634172785343e-05, "loss": 1.6742, "step": 80534 }, { "epoch": 2.68, "grad_norm": 0.6776645183563232, "learning_rate": 1.674819054136356e-05, "loss": 1.626, "step": 80535 }, { "epoch": 2.68, "grad_norm": 0.7229936718940735, "learning_rate": 1.674474725376774e-05, "loss": 1.6605, "step": 80536 }, { "epoch": 2.68, "grad_norm": 0.7287328839302063, "learning_rate": 1.6741304310002125e-05, "loss": 1.682, "step": 80537 }, { "epoch": 2.68, "grad_norm": 0.7089166641235352, "learning_rate": 1.6737861710070733e-05, "loss": 1.6182, "step": 80538 }, { "epoch": 2.68, "grad_norm": 0.6956892609596252, "learning_rate": 1.6734419453977932e-05, "loss": 1.6824, "step": 80539 }, { "epoch": 2.68, "grad_norm": 0.7294623851776123, "learning_rate": 1.6730977541727785e-05, "loss": 1.6883, "step": 80540 }, { "epoch": 2.68, "grad_norm": 0.7218846678733826, "learning_rate": 1.6727535973324422e-05, "loss": 1.7058, "step": 80541 }, { "epoch": 2.68, "grad_norm": 0.7070457935333252, "learning_rate": 1.6724094748772143e-05, "loss": 1.707, "step": 80542 }, { "epoch": 2.68, "grad_norm": 0.7140604257583618, "learning_rate": 1.672065386807514e-05, "loss": 1.704, "step": 80543 }, { "epoch": 2.68, "grad_norm": 0.7110946178436279, "learning_rate": 1.671721333123741e-05, "loss": 1.5964, "step": 80544 }, { "epoch": 2.68, "grad_norm": 0.6930649876594543, "learning_rate": 1.6713773138263287e-05, "loss": 1.7101, "step": 80545 }, { "epoch": 2.68, "grad_norm": 0.7248027324676514, "learning_rate": 1.671033328915693e-05, "loss": 1.6855, "step": 80546 }, { "epoch": 2.68, "grad_norm": 0.7188988327980042, "learning_rate": 1.670689378392247e-05, "loss": 1.6389, "step": 80547 }, { "epoch": 2.68, "grad_norm": 0.6935386657714844, "learning_rate": 1.670345462256407e-05, "loss": 1.6464, "step": 80548 }, { "epoch": 2.68, "grad_norm": 0.738507091999054, "learning_rate": 1.670001580508593e-05, "loss": 1.6647, "step": 80549 }, { "epoch": 2.68, "grad_norm": 0.7031333446502686, "learning_rate": 1.6696577331492313e-05, "loss": 1.7521, "step": 80550 }, { "epoch": 2.68, "grad_norm": 0.7005627751350403, "learning_rate": 1.6693139201787244e-05, "loss": 1.6525, "step": 80551 }, { "epoch": 2.68, "grad_norm": 0.7173758745193481, "learning_rate": 1.6689701415974953e-05, "loss": 1.6146, "step": 80552 }, { "epoch": 2.68, "grad_norm": 0.7082318663597107, "learning_rate": 1.6686263974059644e-05, "loss": 1.6916, "step": 80553 }, { "epoch": 2.68, "grad_norm": 0.7109153270721436, "learning_rate": 1.6682826876045476e-05, "loss": 1.7205, "step": 80554 }, { "epoch": 2.68, "grad_norm": 0.7194033265113831, "learning_rate": 1.6679390121936577e-05, "loss": 1.6806, "step": 80555 }, { "epoch": 2.68, "grad_norm": 0.6928122043609619, "learning_rate": 1.6675953711737146e-05, "loss": 1.683, "step": 80556 }, { "epoch": 2.68, "grad_norm": 0.7019199728965759, "learning_rate": 1.667251764545141e-05, "loss": 1.7585, "step": 80557 }, { "epoch": 2.68, "grad_norm": 0.7325894832611084, "learning_rate": 1.6669081923083473e-05, "loss": 1.6713, "step": 80558 }, { "epoch": 2.68, "grad_norm": 0.6873793005943298, "learning_rate": 1.666564654463749e-05, "loss": 1.6616, "step": 80559 }, { "epoch": 2.68, "grad_norm": 0.6901592016220093, "learning_rate": 1.666221151011773e-05, "loss": 1.7023, "step": 80560 }, { "epoch": 2.68, "grad_norm": 0.7276449203491211, "learning_rate": 1.6658776819528285e-05, "loss": 1.6467, "step": 80561 }, { "epoch": 2.68, "grad_norm": 0.7135812640190125, "learning_rate": 1.665534247287329e-05, "loss": 1.7371, "step": 80562 }, { "epoch": 2.68, "grad_norm": 0.6923050880432129, "learning_rate": 1.665190847015697e-05, "loss": 1.6526, "step": 80563 }, { "epoch": 2.68, "grad_norm": 0.7129345536231995, "learning_rate": 1.6648474811383493e-05, "loss": 1.6509, "step": 80564 }, { "epoch": 2.68, "grad_norm": 0.7075686454772949, "learning_rate": 1.6645041496557022e-05, "loss": 1.706, "step": 80565 }, { "epoch": 2.68, "grad_norm": 0.7188110947608948, "learning_rate": 1.6641608525681717e-05, "loss": 1.6581, "step": 80566 }, { "epoch": 2.68, "grad_norm": 0.719663143157959, "learning_rate": 1.6638175898761776e-05, "loss": 1.6888, "step": 80567 }, { "epoch": 2.68, "grad_norm": 0.719892680644989, "learning_rate": 1.6634743615801328e-05, "loss": 1.7079, "step": 80568 }, { "epoch": 2.68, "grad_norm": 0.7092539668083191, "learning_rate": 1.6631311676804503e-05, "loss": 1.7174, "step": 80569 }, { "epoch": 2.68, "grad_norm": 0.7089269757270813, "learning_rate": 1.662788008177557e-05, "loss": 1.7142, "step": 80570 }, { "epoch": 2.68, "grad_norm": 0.7066712975502014, "learning_rate": 1.6624448830718585e-05, "loss": 1.646, "step": 80571 }, { "epoch": 2.68, "grad_norm": 0.7111791968345642, "learning_rate": 1.6621017923637813e-05, "loss": 1.6402, "step": 80572 }, { "epoch": 2.68, "grad_norm": 0.7000864148139954, "learning_rate": 1.6617587360537354e-05, "loss": 1.6045, "step": 80573 }, { "epoch": 2.68, "grad_norm": 0.7124868631362915, "learning_rate": 1.661415714142137e-05, "loss": 1.6899, "step": 80574 }, { "epoch": 2.68, "grad_norm": 0.7406310439109802, "learning_rate": 1.6610727266294088e-05, "loss": 1.6758, "step": 80575 }, { "epoch": 2.68, "grad_norm": 0.7278439402580261, "learning_rate": 1.6607297735159574e-05, "loss": 1.6913, "step": 80576 }, { "epoch": 2.68, "grad_norm": 0.7250088453292847, "learning_rate": 1.660386854802209e-05, "loss": 1.6879, "step": 80577 }, { "epoch": 2.68, "grad_norm": 0.7110974788665771, "learning_rate": 1.6600439704885705e-05, "loss": 1.6601, "step": 80578 }, { "epoch": 2.68, "grad_norm": 0.7112791538238525, "learning_rate": 1.6597011205754675e-05, "loss": 1.6716, "step": 80579 }, { "epoch": 2.68, "grad_norm": 0.7068107724189758, "learning_rate": 1.659358305063313e-05, "loss": 1.643, "step": 80580 }, { "epoch": 2.68, "grad_norm": 0.715379536151886, "learning_rate": 1.659015523952514e-05, "loss": 1.7115, "step": 80581 }, { "epoch": 2.68, "grad_norm": 0.7062259912490845, "learning_rate": 1.6586727772435025e-05, "loss": 1.6649, "step": 80582 }, { "epoch": 2.68, "grad_norm": 0.7249897122383118, "learning_rate": 1.6583300649366826e-05, "loss": 1.6953, "step": 80583 }, { "epoch": 2.68, "grad_norm": 0.7058727145195007, "learning_rate": 1.6579873870324734e-05, "loss": 1.7139, "step": 80584 }, { "epoch": 2.68, "grad_norm": 0.7130129933357239, "learning_rate": 1.657644743531288e-05, "loss": 1.6015, "step": 80585 }, { "epoch": 2.68, "grad_norm": 0.71772301197052, "learning_rate": 1.6573021344335525e-05, "loss": 1.7135, "step": 80586 }, { "epoch": 2.68, "grad_norm": 0.7011709809303284, "learning_rate": 1.6569595597396767e-05, "loss": 1.677, "step": 80587 }, { "epoch": 2.68, "grad_norm": 0.7214176058769226, "learning_rate": 1.6566170194500705e-05, "loss": 1.6793, "step": 80588 }, { "epoch": 2.68, "grad_norm": 0.7129499316215515, "learning_rate": 1.65627451356516e-05, "loss": 1.6635, "step": 80589 }, { "epoch": 2.68, "grad_norm": 0.7262896299362183, "learning_rate": 1.655932042085355e-05, "loss": 1.7072, "step": 80590 }, { "epoch": 2.68, "grad_norm": 0.7134481072425842, "learning_rate": 1.6555896050110684e-05, "loss": 1.6997, "step": 80591 }, { "epoch": 2.68, "grad_norm": 0.686309278011322, "learning_rate": 1.6552472023427198e-05, "loss": 1.6829, "step": 80592 }, { "epoch": 2.68, "grad_norm": 0.6875008940696716, "learning_rate": 1.6549048340807293e-05, "loss": 1.6977, "step": 80593 }, { "epoch": 2.68, "grad_norm": 0.7058848142623901, "learning_rate": 1.6545625002255057e-05, "loss": 1.6521, "step": 80594 }, { "epoch": 2.68, "grad_norm": 0.6809831857681274, "learning_rate": 1.654220200777463e-05, "loss": 1.6247, "step": 80595 }, { "epoch": 2.68, "grad_norm": 0.6965375542640686, "learning_rate": 1.653877935737027e-05, "loss": 1.6398, "step": 80596 }, { "epoch": 2.68, "grad_norm": 0.674673855304718, "learning_rate": 1.6535357051046037e-05, "loss": 1.6389, "step": 80597 }, { "epoch": 2.68, "grad_norm": 0.7114592790603638, "learning_rate": 1.6531935088806105e-05, "loss": 1.6931, "step": 80598 }, { "epoch": 2.68, "grad_norm": 0.7021310925483704, "learning_rate": 1.6528513470654625e-05, "loss": 1.6767, "step": 80599 }, { "epoch": 2.68, "grad_norm": 0.7016870975494385, "learning_rate": 1.6525092196595836e-05, "loss": 1.6724, "step": 80600 }, { "epoch": 2.68, "grad_norm": 0.6933130025863647, "learning_rate": 1.652167126663373e-05, "loss": 1.6856, "step": 80601 }, { "epoch": 2.68, "grad_norm": 0.7043362259864807, "learning_rate": 1.651825068077257e-05, "loss": 1.6716, "step": 80602 }, { "epoch": 2.68, "grad_norm": 0.733538806438446, "learning_rate": 1.651483043901649e-05, "loss": 1.67, "step": 80603 }, { "epoch": 2.68, "grad_norm": 1.0233092308044434, "learning_rate": 1.6511410541369653e-05, "loss": 1.7566, "step": 80604 }, { "epoch": 2.68, "grad_norm": 0.6948665380477905, "learning_rate": 1.650799098783615e-05, "loss": 1.5885, "step": 80605 }, { "epoch": 2.68, "grad_norm": 0.7039951682090759, "learning_rate": 1.6504571778420184e-05, "loss": 1.6831, "step": 80606 }, { "epoch": 2.68, "grad_norm": 0.7345086932182312, "learning_rate": 1.6501152913125982e-05, "loss": 1.781, "step": 80607 }, { "epoch": 2.68, "grad_norm": 0.7191581130027771, "learning_rate": 1.649773439195754e-05, "loss": 1.6424, "step": 80608 }, { "epoch": 2.68, "grad_norm": 0.7057991027832031, "learning_rate": 1.6494316214919058e-05, "loss": 1.7148, "step": 80609 }, { "epoch": 2.68, "grad_norm": 0.6894992589950562, "learning_rate": 1.649089838201473e-05, "loss": 1.6502, "step": 80610 }, { "epoch": 2.68, "grad_norm": 0.7380422949790955, "learning_rate": 1.6487480893248684e-05, "loss": 1.7053, "step": 80611 }, { "epoch": 2.68, "grad_norm": 0.733035683631897, "learning_rate": 1.6484063748625055e-05, "loss": 1.671, "step": 80612 }, { "epoch": 2.68, "grad_norm": 0.7099186778068542, "learning_rate": 1.6480646948147968e-05, "loss": 1.6171, "step": 80613 }, { "epoch": 2.68, "grad_norm": 0.7162114381790161, "learning_rate": 1.6477230491821692e-05, "loss": 1.6097, "step": 80614 }, { "epoch": 2.68, "grad_norm": 0.6997669339179993, "learning_rate": 1.6473814379650185e-05, "loss": 1.731, "step": 80615 }, { "epoch": 2.68, "grad_norm": 0.7094236612319946, "learning_rate": 1.6470398611637713e-05, "loss": 1.6501, "step": 80616 }, { "epoch": 2.68, "grad_norm": 0.7046689391136169, "learning_rate": 1.6466983187788407e-05, "loss": 1.6389, "step": 80617 }, { "epoch": 2.68, "grad_norm": 0.7189781665802002, "learning_rate": 1.646356810810646e-05, "loss": 1.7048, "step": 80618 }, { "epoch": 2.68, "grad_norm": 0.7264516949653625, "learning_rate": 1.6460153372595875e-05, "loss": 1.6764, "step": 80619 }, { "epoch": 2.68, "grad_norm": 0.7241176962852478, "learning_rate": 1.6456738981260942e-05, "loss": 1.7468, "step": 80620 }, { "epoch": 2.68, "grad_norm": 0.7143980860710144, "learning_rate": 1.6453324934105762e-05, "loss": 1.7196, "step": 80621 }, { "epoch": 2.68, "grad_norm": 0.7088786959648132, "learning_rate": 1.6449911231134427e-05, "loss": 1.5963, "step": 80622 }, { "epoch": 2.68, "grad_norm": 0.7063894867897034, "learning_rate": 1.6446497872351105e-05, "loss": 1.6807, "step": 80623 }, { "epoch": 2.68, "grad_norm": 0.7158899903297424, "learning_rate": 1.6443084857760025e-05, "loss": 1.6751, "step": 80624 }, { "epoch": 2.68, "grad_norm": 0.7217977643013, "learning_rate": 1.6439672187365217e-05, "loss": 1.6832, "step": 80625 }, { "epoch": 2.68, "grad_norm": 0.6963501572608948, "learning_rate": 1.643625986117084e-05, "loss": 1.6272, "step": 80626 }, { "epoch": 2.68, "grad_norm": 0.714658260345459, "learning_rate": 1.6432847879181132e-05, "loss": 1.744, "step": 80627 }, { "epoch": 2.68, "grad_norm": 0.6954869627952576, "learning_rate": 1.6429436241400152e-05, "loss": 1.6244, "step": 80628 }, { "epoch": 2.68, "grad_norm": 0.6856485605239868, "learning_rate": 1.6426024947831996e-05, "loss": 1.6978, "step": 80629 }, { "epoch": 2.68, "grad_norm": 0.7406973242759705, "learning_rate": 1.6422613998480893e-05, "loss": 1.6893, "step": 80630 }, { "epoch": 2.68, "grad_norm": 0.7341511845588684, "learning_rate": 1.6419203393350942e-05, "loss": 1.6944, "step": 80631 }, { "epoch": 2.68, "grad_norm": 0.7139853239059448, "learning_rate": 1.641579313244634e-05, "loss": 1.6495, "step": 80632 }, { "epoch": 2.68, "grad_norm": 0.6924550533294678, "learning_rate": 1.641238321577112e-05, "loss": 1.6075, "step": 80633 }, { "epoch": 2.68, "grad_norm": 0.7396342754364014, "learning_rate": 1.6408973643329537e-05, "loss": 1.6725, "step": 80634 }, { "epoch": 2.68, "grad_norm": 0.7066805958747864, "learning_rate": 1.640556441512566e-05, "loss": 1.7143, "step": 80635 }, { "epoch": 2.68, "grad_norm": 0.69293212890625, "learning_rate": 1.6402155531163584e-05, "loss": 1.6533, "step": 80636 }, { "epoch": 2.68, "grad_norm": 0.7196953296661377, "learning_rate": 1.6398746991447574e-05, "loss": 1.6204, "step": 80637 }, { "epoch": 2.68, "grad_norm": 0.6898795366287231, "learning_rate": 1.639533879598166e-05, "loss": 1.69, "step": 80638 }, { "epoch": 2.68, "grad_norm": 0.718010425567627, "learning_rate": 1.639193094477007e-05, "loss": 1.6464, "step": 80639 }, { "epoch": 2.68, "grad_norm": 0.6908050775527954, "learning_rate": 1.638852343781687e-05, "loss": 1.7407, "step": 80640 }, { "epoch": 2.68, "grad_norm": 0.7071781158447266, "learning_rate": 1.6385116275126154e-05, "loss": 1.669, "step": 80641 }, { "epoch": 2.68, "grad_norm": 0.713302493095398, "learning_rate": 1.6381709456702185e-05, "loss": 1.693, "step": 80642 }, { "epoch": 2.68, "grad_norm": 0.7140514254570007, "learning_rate": 1.6378302982548997e-05, "loss": 1.594, "step": 80643 }, { "epoch": 2.68, "grad_norm": 0.7064309120178223, "learning_rate": 1.6374896852670782e-05, "loss": 1.6516, "step": 80644 }, { "epoch": 2.68, "grad_norm": 0.690728485584259, "learning_rate": 1.6371491067071605e-05, "loss": 1.6468, "step": 80645 }, { "epoch": 2.68, "grad_norm": 0.7135487198829651, "learning_rate": 1.63680856257557e-05, "loss": 1.7133, "step": 80646 }, { "epoch": 2.68, "grad_norm": 0.7232038974761963, "learning_rate": 1.636468052872716e-05, "loss": 1.6997, "step": 80647 }, { "epoch": 2.68, "grad_norm": 0.7118895053863525, "learning_rate": 1.6361275775990045e-05, "loss": 1.6322, "step": 80648 }, { "epoch": 2.68, "grad_norm": 0.7220062613487244, "learning_rate": 1.635787136754859e-05, "loss": 1.7223, "step": 80649 }, { "epoch": 2.68, "grad_norm": 0.6918423175811768, "learning_rate": 1.6354467303406894e-05, "loss": 1.674, "step": 80650 }, { "epoch": 2.68, "grad_norm": 0.7240672707557678, "learning_rate": 1.6351063583569047e-05, "loss": 1.6935, "step": 80651 }, { "epoch": 2.68, "grad_norm": 0.7290142178535461, "learning_rate": 1.6347660208039217e-05, "loss": 1.6406, "step": 80652 }, { "epoch": 2.68, "grad_norm": 0.7339129447937012, "learning_rate": 1.634425717682156e-05, "loss": 1.6888, "step": 80653 }, { "epoch": 2.68, "grad_norm": 0.714174747467041, "learning_rate": 1.6340854489920186e-05, "loss": 1.6929, "step": 80654 }, { "epoch": 2.68, "grad_norm": 0.719424307346344, "learning_rate": 1.633745214733918e-05, "loss": 1.6738, "step": 80655 }, { "epoch": 2.68, "grad_norm": 0.7186146974563599, "learning_rate": 1.6334050149082745e-05, "loss": 1.7425, "step": 80656 }, { "epoch": 2.68, "grad_norm": 0.6995435953140259, "learning_rate": 1.6330648495154973e-05, "loss": 1.7385, "step": 80657 }, { "epoch": 2.68, "grad_norm": 0.7089018821716309, "learning_rate": 1.6327247185559966e-05, "loss": 1.6728, "step": 80658 }, { "epoch": 2.68, "grad_norm": 0.6953521966934204, "learning_rate": 1.6323846220301882e-05, "loss": 1.654, "step": 80659 }, { "epoch": 2.68, "grad_norm": 0.7188688516616821, "learning_rate": 1.6320445599384956e-05, "loss": 1.5895, "step": 80660 }, { "epoch": 2.68, "grad_norm": 0.7012856006622314, "learning_rate": 1.6317045322813082e-05, "loss": 1.6055, "step": 80661 }, { "epoch": 2.68, "grad_norm": 0.6990859508514404, "learning_rate": 1.6313645390590557e-05, "loss": 1.6798, "step": 80662 }, { "epoch": 2.68, "grad_norm": 0.7123227119445801, "learning_rate": 1.6310245802721477e-05, "loss": 1.6551, "step": 80663 }, { "epoch": 2.68, "grad_norm": 0.6952711343765259, "learning_rate": 1.6306846559210007e-05, "loss": 1.6449, "step": 80664 }, { "epoch": 2.68, "grad_norm": 0.7067887783050537, "learning_rate": 1.6303447660060144e-05, "loss": 1.6996, "step": 80665 }, { "epoch": 2.68, "grad_norm": 0.7107378840446472, "learning_rate": 1.6300049105276082e-05, "loss": 1.6409, "step": 80666 }, { "epoch": 2.68, "grad_norm": 0.7321649789810181, "learning_rate": 1.6296650894862085e-05, "loss": 1.7199, "step": 80667 }, { "epoch": 2.68, "grad_norm": 0.7354594469070435, "learning_rate": 1.629325302882205e-05, "loss": 1.6811, "step": 80668 }, { "epoch": 2.68, "grad_norm": 0.7111325860023499, "learning_rate": 1.6289855507160176e-05, "loss": 1.6587, "step": 80669 }, { "epoch": 2.68, "grad_norm": 0.6937824487686157, "learning_rate": 1.628645832988069e-05, "loss": 1.6765, "step": 80670 }, { "epoch": 2.68, "grad_norm": 0.7127246260643005, "learning_rate": 1.6283061496987627e-05, "loss": 1.6519, "step": 80671 }, { "epoch": 2.68, "grad_norm": 0.7178087830543518, "learning_rate": 1.6279665008485078e-05, "loss": 1.6415, "step": 80672 }, { "epoch": 2.68, "grad_norm": 0.7102013826370239, "learning_rate": 1.627626886437724e-05, "loss": 1.7178, "step": 80673 }, { "epoch": 2.68, "grad_norm": 0.6958206295967102, "learning_rate": 1.6272873064668212e-05, "loss": 1.6838, "step": 80674 }, { "epoch": 2.68, "grad_norm": 0.6984013319015503, "learning_rate": 1.6269477609362124e-05, "loss": 1.6704, "step": 80675 }, { "epoch": 2.68, "grad_norm": 0.7400832772254944, "learning_rate": 1.626608249846304e-05, "loss": 1.7034, "step": 80676 }, { "epoch": 2.68, "grad_norm": 0.7113704681396484, "learning_rate": 1.6262687731975187e-05, "loss": 1.728, "step": 80677 }, { "epoch": 2.68, "grad_norm": 0.6820834279060364, "learning_rate": 1.62592933099026e-05, "loss": 1.6399, "step": 80678 }, { "epoch": 2.68, "grad_norm": 0.7164039611816406, "learning_rate": 1.6255899232249402e-05, "loss": 1.6963, "step": 80679 }, { "epoch": 2.68, "grad_norm": 0.6808931827545166, "learning_rate": 1.6252505499019765e-05, "loss": 1.6386, "step": 80680 }, { "epoch": 2.68, "grad_norm": 0.7337252497673035, "learning_rate": 1.6249112110217743e-05, "loss": 1.6372, "step": 80681 }, { "epoch": 2.68, "grad_norm": 0.7267099618911743, "learning_rate": 1.6245719065847506e-05, "loss": 1.7222, "step": 80682 }, { "epoch": 2.68, "grad_norm": 0.7349022030830383, "learning_rate": 1.624232636591315e-05, "loss": 1.7446, "step": 80683 }, { "epoch": 2.68, "grad_norm": 0.7283419370651245, "learning_rate": 1.6238934010418835e-05, "loss": 1.7024, "step": 80684 }, { "epoch": 2.68, "grad_norm": 0.7003358006477356, "learning_rate": 1.6235541999368627e-05, "loss": 1.6192, "step": 80685 }, { "epoch": 2.68, "grad_norm": 0.6997177600860596, "learning_rate": 1.6232150332766624e-05, "loss": 1.6891, "step": 80686 }, { "epoch": 2.68, "grad_norm": 0.7224581837654114, "learning_rate": 1.622875901061702e-05, "loss": 1.6893, "step": 80687 }, { "epoch": 2.68, "grad_norm": 0.7096685171127319, "learning_rate": 1.622536803292388e-05, "loss": 1.6644, "step": 80688 }, { "epoch": 2.68, "grad_norm": 0.7199285626411438, "learning_rate": 1.6221977399691333e-05, "loss": 1.5891, "step": 80689 }, { "epoch": 2.68, "grad_norm": 0.6934281587600708, "learning_rate": 1.621858711092351e-05, "loss": 1.6505, "step": 80690 }, { "epoch": 2.68, "grad_norm": 0.7106078863143921, "learning_rate": 1.621519716662447e-05, "loss": 1.6931, "step": 80691 }, { "epoch": 2.68, "grad_norm": 0.7284196615219116, "learning_rate": 1.6211807566798384e-05, "loss": 1.7245, "step": 80692 }, { "epoch": 2.68, "grad_norm": 0.7147319912910461, "learning_rate": 1.6208418311449344e-05, "loss": 1.7039, "step": 80693 }, { "epoch": 2.68, "grad_norm": 0.6972803473472595, "learning_rate": 1.6205029400581482e-05, "loss": 1.7154, "step": 80694 }, { "epoch": 2.68, "grad_norm": 0.7047078013420105, "learning_rate": 1.620164083419886e-05, "loss": 1.6552, "step": 80695 }, { "epoch": 2.68, "grad_norm": 0.711967408657074, "learning_rate": 1.6198252612305673e-05, "loss": 1.6647, "step": 80696 }, { "epoch": 2.68, "grad_norm": 0.7427056431770325, "learning_rate": 1.619486473490602e-05, "loss": 1.6493, "step": 80697 }, { "epoch": 2.68, "grad_norm": 0.7144106030464172, "learning_rate": 1.61914772020039e-05, "loss": 1.6332, "step": 80698 }, { "epoch": 2.68, "grad_norm": 0.7101998329162598, "learning_rate": 1.618809001360357e-05, "loss": 1.7797, "step": 80699 }, { "epoch": 2.68, "grad_norm": 0.6977922916412354, "learning_rate": 1.61847031697091e-05, "loss": 1.6793, "step": 80700 }, { "epoch": 2.68, "grad_norm": 0.7382385730743408, "learning_rate": 1.6181316670324518e-05, "loss": 1.6661, "step": 80701 }, { "epoch": 2.68, "grad_norm": 0.7046718001365662, "learning_rate": 1.6177930515454018e-05, "loss": 1.71, "step": 80702 }, { "epoch": 2.68, "grad_norm": 0.7117306590080261, "learning_rate": 1.61745447051017e-05, "loss": 1.7091, "step": 80703 }, { "epoch": 2.69, "grad_norm": 0.6896606683731079, "learning_rate": 1.6171159239271693e-05, "loss": 1.7147, "step": 80704 }, { "epoch": 2.69, "grad_norm": 0.6979173421859741, "learning_rate": 1.6167774117968024e-05, "loss": 1.6918, "step": 80705 }, { "epoch": 2.69, "grad_norm": 0.7099468111991882, "learning_rate": 1.6164389341194928e-05, "loss": 1.6722, "step": 80706 }, { "epoch": 2.69, "grad_norm": 0.7092439532279968, "learning_rate": 1.61610049089564e-05, "loss": 1.6272, "step": 80707 }, { "epoch": 2.69, "grad_norm": 0.7109402418136597, "learning_rate": 1.6157620821256566e-05, "loss": 1.6114, "step": 80708 }, { "epoch": 2.69, "grad_norm": 0.7137808203697205, "learning_rate": 1.6154237078099565e-05, "loss": 1.6679, "step": 80709 }, { "epoch": 2.69, "grad_norm": 0.7347881197929382, "learning_rate": 1.6150853679489518e-05, "loss": 1.7439, "step": 80710 }, { "epoch": 2.69, "grad_norm": 0.694699764251709, "learning_rate": 1.6147470625430526e-05, "loss": 1.6627, "step": 80711 }, { "epoch": 2.69, "grad_norm": 0.69242924451828, "learning_rate": 1.614408791592665e-05, "loss": 1.6859, "step": 80712 }, { "epoch": 2.69, "grad_norm": 0.7305083870887756, "learning_rate": 1.6140705550982057e-05, "loss": 1.6765, "step": 80713 }, { "epoch": 2.69, "grad_norm": 0.7108187079429626, "learning_rate": 1.613732353060081e-05, "loss": 1.736, "step": 80714 }, { "epoch": 2.69, "grad_norm": 0.6934597492218018, "learning_rate": 1.6133941854787002e-05, "loss": 1.7552, "step": 80715 }, { "epoch": 2.69, "grad_norm": 0.7094761729240417, "learning_rate": 1.6130560523544768e-05, "loss": 1.6913, "step": 80716 }, { "epoch": 2.69, "grad_norm": 0.711014986038208, "learning_rate": 1.6127179536878264e-05, "loss": 1.7442, "step": 80717 }, { "epoch": 2.69, "grad_norm": 0.708609402179718, "learning_rate": 1.6123798894791495e-05, "loss": 1.6652, "step": 80718 }, { "epoch": 2.69, "grad_norm": 0.7153549790382385, "learning_rate": 1.6120418597288588e-05, "loss": 1.7167, "step": 80719 }, { "epoch": 2.69, "grad_norm": 0.7140291333198547, "learning_rate": 1.611703864437367e-05, "loss": 1.6258, "step": 80720 }, { "epoch": 2.69, "grad_norm": 0.7168688178062439, "learning_rate": 1.6113659036050873e-05, "loss": 1.7349, "step": 80721 }, { "epoch": 2.69, "grad_norm": 0.7594912648200989, "learning_rate": 1.611027977232423e-05, "loss": 1.6414, "step": 80722 }, { "epoch": 2.69, "grad_norm": 0.7202780842781067, "learning_rate": 1.610690085319787e-05, "loss": 1.7102, "step": 80723 }, { "epoch": 2.69, "grad_norm": 0.7083120942115784, "learning_rate": 1.6103522278675984e-05, "loss": 1.6229, "step": 80724 }, { "epoch": 2.69, "grad_norm": 0.7238808870315552, "learning_rate": 1.610014404876251e-05, "loss": 1.7305, "step": 80725 }, { "epoch": 2.69, "grad_norm": 0.7187188267707825, "learning_rate": 1.609676616346164e-05, "loss": 1.7027, "step": 80726 }, { "epoch": 2.69, "grad_norm": 0.7136440873146057, "learning_rate": 1.6093388622777503e-05, "loss": 1.6775, "step": 80727 }, { "epoch": 2.69, "grad_norm": 0.7095234394073486, "learning_rate": 1.6090011426714166e-05, "loss": 1.7072, "step": 80728 }, { "epoch": 2.69, "grad_norm": 0.7175785899162292, "learning_rate": 1.608663457527566e-05, "loss": 1.6303, "step": 80729 }, { "epoch": 2.69, "grad_norm": 0.7146772742271423, "learning_rate": 1.608325806846614e-05, "loss": 1.6617, "step": 80730 }, { "epoch": 2.69, "grad_norm": 0.7128713130950928, "learning_rate": 1.6079881906289817e-05, "loss": 1.6566, "step": 80731 }, { "epoch": 2.69, "grad_norm": 0.7135927677154541, "learning_rate": 1.607650608875061e-05, "loss": 1.6488, "step": 80732 }, { "epoch": 2.69, "grad_norm": 0.7354127168655396, "learning_rate": 1.607313061585268e-05, "loss": 1.7012, "step": 80733 }, { "epoch": 2.69, "grad_norm": 0.7040549516677856, "learning_rate": 1.6069755487600168e-05, "loss": 1.6228, "step": 80734 }, { "epoch": 2.69, "grad_norm": 0.6993513107299805, "learning_rate": 1.6066380703997127e-05, "loss": 1.672, "step": 80735 }, { "epoch": 2.69, "grad_norm": 0.7123880982398987, "learning_rate": 1.606300626504763e-05, "loss": 1.7032, "step": 80736 }, { "epoch": 2.69, "grad_norm": 0.7137484550476074, "learning_rate": 1.6059632170755864e-05, "loss": 1.6773, "step": 80737 }, { "epoch": 2.69, "grad_norm": 0.7251258492469788, "learning_rate": 1.6056258421125868e-05, "loss": 1.6845, "step": 80738 }, { "epoch": 2.69, "grad_norm": 0.7151051759719849, "learning_rate": 1.60528850161617e-05, "loss": 1.6899, "step": 80739 }, { "epoch": 2.69, "grad_norm": 0.715392529964447, "learning_rate": 1.6049511955867455e-05, "loss": 1.611, "step": 80740 }, { "epoch": 2.69, "grad_norm": 0.7046728134155273, "learning_rate": 1.6046139240247335e-05, "loss": 1.6471, "step": 80741 }, { "epoch": 2.69, "grad_norm": 0.7117213606834412, "learning_rate": 1.604276686930537e-05, "loss": 1.6947, "step": 80742 }, { "epoch": 2.69, "grad_norm": 0.7219313383102417, "learning_rate": 1.603939484304558e-05, "loss": 1.6856, "step": 80743 }, { "epoch": 2.69, "grad_norm": 0.701438307762146, "learning_rate": 1.6036023161472178e-05, "loss": 1.6878, "step": 80744 }, { "epoch": 2.69, "grad_norm": 0.7388565540313721, "learning_rate": 1.6032651824589216e-05, "loss": 1.6993, "step": 80745 }, { "epoch": 2.69, "grad_norm": 0.7116656303405762, "learning_rate": 1.6029280832400725e-05, "loss": 1.7297, "step": 80746 }, { "epoch": 2.69, "grad_norm": 0.7048580050468445, "learning_rate": 1.6025910184910873e-05, "loss": 1.6845, "step": 80747 }, { "epoch": 2.69, "grad_norm": 0.6973552703857422, "learning_rate": 1.602253988212372e-05, "loss": 1.6342, "step": 80748 }, { "epoch": 2.69, "grad_norm": 0.7010719180107117, "learning_rate": 1.6019169924043363e-05, "loss": 1.6685, "step": 80749 }, { "epoch": 2.69, "grad_norm": 0.7144339680671692, "learning_rate": 1.6015800310673867e-05, "loss": 1.6286, "step": 80750 }, { "epoch": 2.69, "grad_norm": 0.7169241905212402, "learning_rate": 1.601243104201939e-05, "loss": 1.7637, "step": 80751 }, { "epoch": 2.69, "grad_norm": 0.7130274176597595, "learning_rate": 1.600906211808397e-05, "loss": 1.6824, "step": 80752 }, { "epoch": 2.69, "grad_norm": 0.7424217462539673, "learning_rate": 1.6005693538871667e-05, "loss": 1.6853, "step": 80753 }, { "epoch": 2.69, "grad_norm": 0.7066898345947266, "learning_rate": 1.6002325304386642e-05, "loss": 1.7359, "step": 80754 }, { "epoch": 2.69, "grad_norm": 0.6945355534553528, "learning_rate": 1.599895741463293e-05, "loss": 1.6424, "step": 80755 }, { "epoch": 2.69, "grad_norm": 0.7041923999786377, "learning_rate": 1.5995589869614656e-05, "loss": 1.7242, "step": 80756 }, { "epoch": 2.69, "grad_norm": 0.6925427913665771, "learning_rate": 1.599222266933592e-05, "loss": 1.7241, "step": 80757 }, { "epoch": 2.69, "grad_norm": 0.7016674876213074, "learning_rate": 1.5988855813800683e-05, "loss": 1.6932, "step": 80758 }, { "epoch": 2.69, "grad_norm": 0.7046986222267151, "learning_rate": 1.598548930301321e-05, "loss": 1.6436, "step": 80759 }, { "epoch": 2.69, "grad_norm": 0.7196606993675232, "learning_rate": 1.5982123136977466e-05, "loss": 1.7237, "step": 80760 }, { "epoch": 2.69, "grad_norm": 0.7207283973693848, "learning_rate": 1.5978757315697612e-05, "loss": 1.6311, "step": 80761 }, { "epoch": 2.69, "grad_norm": 0.7241969108581543, "learning_rate": 1.5975391839177643e-05, "loss": 1.682, "step": 80762 }, { "epoch": 2.69, "grad_norm": 0.6995342373847961, "learning_rate": 1.5972026707421758e-05, "loss": 1.6856, "step": 80763 }, { "epoch": 2.69, "grad_norm": 0.6814431548118591, "learning_rate": 1.5968661920433954e-05, "loss": 1.6691, "step": 80764 }, { "epoch": 2.69, "grad_norm": 0.6951012015342712, "learning_rate": 1.5965297478218332e-05, "loss": 1.6426, "step": 80765 }, { "epoch": 2.69, "grad_norm": 0.7211770415306091, "learning_rate": 1.5961933380779014e-05, "loss": 1.6486, "step": 80766 }, { "epoch": 2.69, "grad_norm": 0.70311439037323, "learning_rate": 1.5958569628120066e-05, "loss": 1.6579, "step": 80767 }, { "epoch": 2.69, "grad_norm": 0.7034991383552551, "learning_rate": 1.595520622024552e-05, "loss": 1.7088, "step": 80768 }, { "epoch": 2.69, "grad_norm": 0.7601284384727478, "learning_rate": 1.5951843157159506e-05, "loss": 1.6548, "step": 80769 }, { "epoch": 2.69, "grad_norm": 0.728128969669342, "learning_rate": 1.594848043886615e-05, "loss": 1.7862, "step": 80770 }, { "epoch": 2.69, "grad_norm": 0.7184866666793823, "learning_rate": 1.594511806536949e-05, "loss": 1.6184, "step": 80771 }, { "epoch": 2.69, "grad_norm": 0.7212636470794678, "learning_rate": 1.594175603667355e-05, "loss": 1.6947, "step": 80772 }, { "epoch": 2.69, "grad_norm": 0.7101477384567261, "learning_rate": 1.593839435278249e-05, "loss": 1.6209, "step": 80773 }, { "epoch": 2.69, "grad_norm": 0.7045868039131165, "learning_rate": 1.593503301370038e-05, "loss": 1.6227, "step": 80774 }, { "epoch": 2.69, "grad_norm": 0.7081899642944336, "learning_rate": 1.5931672019431253e-05, "loss": 1.7078, "step": 80775 }, { "epoch": 2.69, "grad_norm": 0.7009929418563843, "learning_rate": 1.5928311369979195e-05, "loss": 1.7648, "step": 80776 }, { "epoch": 2.69, "grad_norm": 0.7066330909729004, "learning_rate": 1.5924951065348412e-05, "loss": 1.7113, "step": 80777 }, { "epoch": 2.69, "grad_norm": 0.7064396739006042, "learning_rate": 1.5921591105542798e-05, "loss": 1.7069, "step": 80778 }, { "epoch": 2.69, "grad_norm": 0.7058549523353577, "learning_rate": 1.5918231490566512e-05, "loss": 1.7085, "step": 80779 }, { "epoch": 2.69, "grad_norm": 0.7149951457977295, "learning_rate": 1.5914872220423657e-05, "loss": 1.8047, "step": 80780 }, { "epoch": 2.69, "grad_norm": 0.7662690877914429, "learning_rate": 1.5911513295118328e-05, "loss": 1.6839, "step": 80781 }, { "epoch": 2.69, "grad_norm": 0.7058698534965515, "learning_rate": 1.5908154714654487e-05, "loss": 1.7603, "step": 80782 }, { "epoch": 2.69, "grad_norm": 0.7382380366325378, "learning_rate": 1.5904796479036298e-05, "loss": 1.746, "step": 80783 }, { "epoch": 2.69, "grad_norm": 0.7017316222190857, "learning_rate": 1.590143858826789e-05, "loss": 1.5862, "step": 80784 }, { "epoch": 2.69, "grad_norm": 0.7431312799453735, "learning_rate": 1.589808104235326e-05, "loss": 1.6851, "step": 80785 }, { "epoch": 2.69, "grad_norm": 0.7083495855331421, "learning_rate": 1.5894723841296442e-05, "loss": 1.5839, "step": 80786 }, { "epoch": 2.69, "grad_norm": 0.7080366611480713, "learning_rate": 1.5891366985101627e-05, "loss": 1.7394, "step": 80787 }, { "epoch": 2.69, "grad_norm": 0.709148108959198, "learning_rate": 1.588801047377285e-05, "loss": 1.7434, "step": 80788 }, { "epoch": 2.69, "grad_norm": 0.7156447768211365, "learning_rate": 1.5884654307314105e-05, "loss": 1.6507, "step": 80789 }, { "epoch": 2.69, "grad_norm": 0.7089004516601562, "learning_rate": 1.588129848572952e-05, "loss": 1.7227, "step": 80790 }, { "epoch": 2.69, "grad_norm": 0.7286283373832703, "learning_rate": 1.5877943009023232e-05, "loss": 1.6477, "step": 80791 }, { "epoch": 2.69, "grad_norm": 0.709870457649231, "learning_rate": 1.587458787719923e-05, "loss": 1.7021, "step": 80792 }, { "epoch": 2.69, "grad_norm": 0.7098501920700073, "learning_rate": 1.587123309026158e-05, "loss": 1.6188, "step": 80793 }, { "epoch": 2.69, "grad_norm": 0.7131433486938477, "learning_rate": 1.586787864821445e-05, "loss": 1.6155, "step": 80794 }, { "epoch": 2.69, "grad_norm": 0.7072377800941467, "learning_rate": 1.5864524551061864e-05, "loss": 1.6999, "step": 80795 }, { "epoch": 2.69, "grad_norm": 0.698509931564331, "learning_rate": 1.5861170798807788e-05, "loss": 1.6964, "step": 80796 }, { "epoch": 2.69, "grad_norm": 0.7210030555725098, "learning_rate": 1.5857817391456486e-05, "loss": 1.642, "step": 80797 }, { "epoch": 2.69, "grad_norm": 0.7020889520645142, "learning_rate": 1.5854464329011853e-05, "loss": 1.6743, "step": 80798 }, { "epoch": 2.69, "grad_norm": 0.7116446495056152, "learning_rate": 1.585111161147805e-05, "loss": 1.7103, "step": 80799 }, { "epoch": 2.69, "grad_norm": 0.7076016068458557, "learning_rate": 1.5847759238859116e-05, "loss": 1.6593, "step": 80800 }, { "epoch": 2.69, "grad_norm": 0.6882883906364441, "learning_rate": 1.584440721115917e-05, "loss": 1.7181, "step": 80801 }, { "epoch": 2.69, "grad_norm": 0.7046931385993958, "learning_rate": 1.5841055528382253e-05, "loss": 1.6086, "step": 80802 }, { "epoch": 2.69, "grad_norm": 0.7203982472419739, "learning_rate": 1.5837704190532384e-05, "loss": 1.6312, "step": 80803 }, { "epoch": 2.69, "grad_norm": 0.7351229786872864, "learning_rate": 1.5834353197613735e-05, "loss": 1.7826, "step": 80804 }, { "epoch": 2.69, "grad_norm": 0.7109352350234985, "learning_rate": 1.5831002549630234e-05, "loss": 1.6523, "step": 80805 }, { "epoch": 2.69, "grad_norm": 0.7324283719062805, "learning_rate": 1.5827652246586076e-05, "loss": 1.7075, "step": 80806 }, { "epoch": 2.69, "grad_norm": 0.6865615248680115, "learning_rate": 1.5824302288485292e-05, "loss": 1.6583, "step": 80807 }, { "epoch": 2.69, "grad_norm": 0.6973766088485718, "learning_rate": 1.5820952675331876e-05, "loss": 1.6957, "step": 80808 }, { "epoch": 2.69, "grad_norm": 0.7359074950218201, "learning_rate": 1.581760340713e-05, "loss": 1.665, "step": 80809 }, { "epoch": 2.69, "grad_norm": 0.7238724827766418, "learning_rate": 1.5814254483883647e-05, "loss": 1.661, "step": 80810 }, { "epoch": 2.69, "grad_norm": 0.7161675095558167, "learning_rate": 1.5810905905596926e-05, "loss": 1.6363, "step": 80811 }, { "epoch": 2.69, "grad_norm": 0.6930317282676697, "learning_rate": 1.5807557672273896e-05, "loss": 1.6858, "step": 80812 }, { "epoch": 2.69, "grad_norm": 0.7190331220626831, "learning_rate": 1.580420978391862e-05, "loss": 1.6781, "step": 80813 }, { "epoch": 2.69, "grad_norm": 0.7021909952163696, "learning_rate": 1.5800862240535194e-05, "loss": 1.6758, "step": 80814 }, { "epoch": 2.69, "grad_norm": 0.7290690541267395, "learning_rate": 1.5797515042127584e-05, "loss": 1.6961, "step": 80815 }, { "epoch": 2.69, "grad_norm": 0.7083912491798401, "learning_rate": 1.579416818869995e-05, "loss": 1.618, "step": 80816 }, { "epoch": 2.69, "grad_norm": 0.7128827571868896, "learning_rate": 1.5790821680256326e-05, "loss": 1.6624, "step": 80817 }, { "epoch": 2.69, "grad_norm": 0.7107737064361572, "learning_rate": 1.578747551680074e-05, "loss": 1.6671, "step": 80818 }, { "epoch": 2.69, "grad_norm": 0.6964297294616699, "learning_rate": 1.5784129698337255e-05, "loss": 1.6667, "step": 80819 }, { "epoch": 2.69, "grad_norm": 0.705608606338501, "learning_rate": 1.5780784224870035e-05, "loss": 1.6772, "step": 80820 }, { "epoch": 2.69, "grad_norm": 0.7130171656608582, "learning_rate": 1.5777439096403045e-05, "loss": 1.6861, "step": 80821 }, { "epoch": 2.69, "grad_norm": 0.7391307950019836, "learning_rate": 1.5774094312940312e-05, "loss": 1.6055, "step": 80822 }, { "epoch": 2.69, "grad_norm": 0.7312402725219727, "learning_rate": 1.5770749874486e-05, "loss": 1.6607, "step": 80823 }, { "epoch": 2.69, "grad_norm": 0.7219861149787903, "learning_rate": 1.576740578104414e-05, "loss": 1.7107, "step": 80824 }, { "epoch": 2.69, "grad_norm": 0.7123212814331055, "learning_rate": 1.5764062032618696e-05, "loss": 1.5988, "step": 80825 }, { "epoch": 2.69, "grad_norm": 0.70707106590271, "learning_rate": 1.576071862921383e-05, "loss": 1.7, "step": 80826 }, { "epoch": 2.69, "grad_norm": 0.6934929490089417, "learning_rate": 1.5757375570833607e-05, "loss": 1.6206, "step": 80827 }, { "epoch": 2.69, "grad_norm": 0.7257156372070312, "learning_rate": 1.5754032857482023e-05, "loss": 1.6689, "step": 80828 }, { "epoch": 2.69, "grad_norm": 0.7143055200576782, "learning_rate": 1.5750690489163144e-05, "loss": 1.6883, "step": 80829 }, { "epoch": 2.69, "grad_norm": 0.6984941363334656, "learning_rate": 1.5747348465881093e-05, "loss": 1.645, "step": 80830 }, { "epoch": 2.69, "grad_norm": 0.7446761131286621, "learning_rate": 1.5744006787639875e-05, "loss": 1.7412, "step": 80831 }, { "epoch": 2.69, "grad_norm": 0.7094776034355164, "learning_rate": 1.574066545444348e-05, "loss": 1.6969, "step": 80832 }, { "epoch": 2.69, "grad_norm": 0.7151970863342285, "learning_rate": 1.573732446629604e-05, "loss": 1.7276, "step": 80833 }, { "epoch": 2.69, "grad_norm": 0.7020364999771118, "learning_rate": 1.5733983823201725e-05, "loss": 1.6213, "step": 80834 }, { "epoch": 2.69, "grad_norm": 0.693774402141571, "learning_rate": 1.5730643525164353e-05, "loss": 1.6708, "step": 80835 }, { "epoch": 2.69, "grad_norm": 0.7024420499801636, "learning_rate": 1.57273035721881e-05, "loss": 1.6699, "step": 80836 }, { "epoch": 2.69, "grad_norm": 0.714784562587738, "learning_rate": 1.572396396427705e-05, "loss": 1.6703, "step": 80837 }, { "epoch": 2.69, "grad_norm": 0.7095635533332825, "learning_rate": 1.5720624701435247e-05, "loss": 1.6729, "step": 80838 }, { "epoch": 2.69, "grad_norm": 0.7008931040763855, "learning_rate": 1.5717285783666645e-05, "loss": 1.6692, "step": 80839 }, { "epoch": 2.69, "grad_norm": 0.7100523710250854, "learning_rate": 1.571394721097541e-05, "loss": 1.6718, "step": 80840 }, { "epoch": 2.69, "grad_norm": 0.7143736481666565, "learning_rate": 1.5710608983365604e-05, "loss": 1.7036, "step": 80841 }, { "epoch": 2.69, "grad_norm": 0.7007551193237305, "learning_rate": 1.5707271100841158e-05, "loss": 1.6238, "step": 80842 }, { "epoch": 2.69, "grad_norm": 0.6905679106712341, "learning_rate": 1.5703933563406202e-05, "loss": 1.6964, "step": 80843 }, { "epoch": 2.69, "grad_norm": 0.7296378016471863, "learning_rate": 1.57005963710648e-05, "loss": 1.6611, "step": 80844 }, { "epoch": 2.69, "grad_norm": 0.7024858593940735, "learning_rate": 1.5697259523821015e-05, "loss": 1.7019, "step": 80845 }, { "epoch": 2.69, "grad_norm": 0.7291563749313354, "learning_rate": 1.569392302167881e-05, "loss": 1.6592, "step": 80846 }, { "epoch": 2.69, "grad_norm": 0.7086862921714783, "learning_rate": 1.5690586864642285e-05, "loss": 1.6196, "step": 80847 }, { "epoch": 2.69, "grad_norm": 0.7301368117332458, "learning_rate": 1.5687251052715598e-05, "loss": 1.6675, "step": 80848 }, { "epoch": 2.69, "grad_norm": 0.6934669017791748, "learning_rate": 1.568391558590262e-05, "loss": 1.7203, "step": 80849 }, { "epoch": 2.69, "grad_norm": 0.6735235452651978, "learning_rate": 1.5680580464207437e-05, "loss": 1.7068, "step": 80850 }, { "epoch": 2.69, "grad_norm": 0.7049726247787476, "learning_rate": 1.567724568763422e-05, "loss": 1.6747, "step": 80851 }, { "epoch": 2.69, "grad_norm": 0.6986233592033386, "learning_rate": 1.56739112561869e-05, "loss": 1.6536, "step": 80852 }, { "epoch": 2.69, "grad_norm": 0.6994408965110779, "learning_rate": 1.5670577169869536e-05, "loss": 1.677, "step": 80853 }, { "epoch": 2.69, "grad_norm": 0.7153812050819397, "learning_rate": 1.566724342868626e-05, "loss": 1.6353, "step": 80854 }, { "epoch": 2.69, "grad_norm": 0.7039520740509033, "learning_rate": 1.5663910032641002e-05, "loss": 1.6694, "step": 80855 }, { "epoch": 2.69, "grad_norm": 0.7008442878723145, "learning_rate": 1.566057698173786e-05, "loss": 1.6863, "step": 80856 }, { "epoch": 2.69, "grad_norm": 0.7132714986801147, "learning_rate": 1.5657244275980895e-05, "loss": 1.686, "step": 80857 }, { "epoch": 2.69, "grad_norm": 0.7065974473953247, "learning_rate": 1.565391191537414e-05, "loss": 1.8055, "step": 80858 }, { "epoch": 2.69, "grad_norm": 0.6963393092155457, "learning_rate": 1.5650579899921655e-05, "loss": 1.61, "step": 80859 }, { "epoch": 2.69, "grad_norm": 0.7039309740066528, "learning_rate": 1.564724822962744e-05, "loss": 1.67, "step": 80860 }, { "epoch": 2.69, "grad_norm": 0.7162250876426697, "learning_rate": 1.564391690449559e-05, "loss": 1.6678, "step": 80861 }, { "epoch": 2.69, "grad_norm": 0.7143210768699646, "learning_rate": 1.5640585924530136e-05, "loss": 1.7269, "step": 80862 }, { "epoch": 2.69, "grad_norm": 0.7171593904495239, "learning_rate": 1.5637255289735074e-05, "loss": 1.7186, "step": 80863 }, { "epoch": 2.69, "grad_norm": 0.7049174904823303, "learning_rate": 1.5633925000114534e-05, "loss": 1.6182, "step": 80864 }, { "epoch": 2.69, "grad_norm": 0.6986526846885681, "learning_rate": 1.563059505567248e-05, "loss": 1.6309, "step": 80865 }, { "epoch": 2.69, "grad_norm": 1.1321614980697632, "learning_rate": 1.5627265456412973e-05, "loss": 1.7358, "step": 80866 }, { "epoch": 2.69, "grad_norm": 0.7288119196891785, "learning_rate": 1.562393620234008e-05, "loss": 1.7239, "step": 80867 }, { "epoch": 2.69, "grad_norm": 0.6997621655464172, "learning_rate": 1.562060729345783e-05, "loss": 1.7029, "step": 80868 }, { "epoch": 2.69, "grad_norm": 0.7001103162765503, "learning_rate": 1.561727872977029e-05, "loss": 1.6575, "step": 80869 }, { "epoch": 2.69, "grad_norm": 0.7117878198623657, "learning_rate": 1.5613950511281416e-05, "loss": 1.671, "step": 80870 }, { "epoch": 2.69, "grad_norm": 0.7167229652404785, "learning_rate": 1.561062263799534e-05, "loss": 1.6501, "step": 80871 }, { "epoch": 2.69, "grad_norm": 0.7233792543411255, "learning_rate": 1.560729510991603e-05, "loss": 1.702, "step": 80872 }, { "epoch": 2.69, "grad_norm": 0.6856301426887512, "learning_rate": 1.560396792704761e-05, "loss": 1.6018, "step": 80873 }, { "epoch": 2.69, "grad_norm": 0.7072557806968689, "learning_rate": 1.560064108939405e-05, "loss": 1.631, "step": 80874 }, { "epoch": 2.69, "grad_norm": 0.696806788444519, "learning_rate": 1.5597314596959377e-05, "loss": 1.6538, "step": 80875 }, { "epoch": 2.69, "grad_norm": 0.7070183753967285, "learning_rate": 1.5593988449747685e-05, "loss": 1.6159, "step": 80876 }, { "epoch": 2.69, "grad_norm": 0.7018240690231323, "learning_rate": 1.5590662647762974e-05, "loss": 1.6339, "step": 80877 }, { "epoch": 2.69, "grad_norm": 0.7111018300056458, "learning_rate": 1.5587337191009308e-05, "loss": 1.6495, "step": 80878 }, { "epoch": 2.69, "grad_norm": 0.6986900568008423, "learning_rate": 1.5584012079490682e-05, "loss": 1.716, "step": 80879 }, { "epoch": 2.69, "grad_norm": 0.7201019525527954, "learning_rate": 1.5580687313211194e-05, "loss": 1.7571, "step": 80880 }, { "epoch": 2.69, "grad_norm": 0.7303105592727661, "learning_rate": 1.557736289217487e-05, "loss": 1.6817, "step": 80881 }, { "epoch": 2.69, "grad_norm": 0.7337874174118042, "learning_rate": 1.5574038816385646e-05, "loss": 1.6935, "step": 80882 }, { "epoch": 2.69, "grad_norm": 0.744574785232544, "learning_rate": 1.5570715085847685e-05, "loss": 1.6892, "step": 80883 }, { "epoch": 2.69, "grad_norm": 0.6932259202003479, "learning_rate": 1.5567391700564947e-05, "loss": 1.6805, "step": 80884 }, { "epoch": 2.69, "grad_norm": 0.7165851593017578, "learning_rate": 1.556406866054146e-05, "loss": 1.6596, "step": 80885 }, { "epoch": 2.69, "grad_norm": 0.7010343074798584, "learning_rate": 1.5560745965781296e-05, "loss": 1.6367, "step": 80886 }, { "epoch": 2.69, "grad_norm": 0.7030115723609924, "learning_rate": 1.5557423616288512e-05, "loss": 1.6245, "step": 80887 }, { "epoch": 2.69, "grad_norm": 0.7148993611335754, "learning_rate": 1.5554101612067106e-05, "loss": 1.6974, "step": 80888 }, { "epoch": 2.69, "grad_norm": 0.7292277812957764, "learning_rate": 1.5550779953121072e-05, "loss": 1.6397, "step": 80889 }, { "epoch": 2.69, "grad_norm": 0.7027345299720764, "learning_rate": 1.5547458639454515e-05, "loss": 1.6964, "step": 80890 }, { "epoch": 2.69, "grad_norm": 0.7128145098686218, "learning_rate": 1.554413767107142e-05, "loss": 1.7985, "step": 80891 }, { "epoch": 2.69, "grad_norm": 0.6821496486663818, "learning_rate": 1.5540817047975796e-05, "loss": 1.6655, "step": 80892 }, { "epoch": 2.69, "grad_norm": 0.713962972164154, "learning_rate": 1.5537496770171698e-05, "loss": 1.7928, "step": 80893 }, { "epoch": 2.69, "grad_norm": 0.720980703830719, "learning_rate": 1.553417683766326e-05, "loss": 1.7416, "step": 80894 }, { "epoch": 2.69, "grad_norm": 0.723753035068512, "learning_rate": 1.5530857250454344e-05, "loss": 1.6859, "step": 80895 }, { "epoch": 2.69, "grad_norm": 0.7466011047363281, "learning_rate": 1.5527538008549013e-05, "loss": 1.7127, "step": 80896 }, { "epoch": 2.69, "grad_norm": 0.6910514831542969, "learning_rate": 1.552421911195143e-05, "loss": 1.6511, "step": 80897 }, { "epoch": 2.69, "grad_norm": 0.7154520750045776, "learning_rate": 1.5520900560665496e-05, "loss": 1.6626, "step": 80898 }, { "epoch": 2.69, "grad_norm": 0.7010453343391418, "learning_rate": 1.5517582354695236e-05, "loss": 1.7057, "step": 80899 }, { "epoch": 2.69, "grad_norm": 0.7152480483055115, "learning_rate": 1.5514264494044716e-05, "loss": 1.6784, "step": 80900 }, { "epoch": 2.69, "grad_norm": 0.6954849362373352, "learning_rate": 1.5510946978718e-05, "loss": 1.6507, "step": 80901 }, { "epoch": 2.69, "grad_norm": 0.6928139925003052, "learning_rate": 1.550762980871908e-05, "loss": 1.6321, "step": 80902 }, { "epoch": 2.69, "grad_norm": 0.696248471736908, "learning_rate": 1.5504312984051957e-05, "loss": 1.6476, "step": 80903 }, { "epoch": 2.69, "grad_norm": 0.7084527015686035, "learning_rate": 1.5500996504720697e-05, "loss": 1.6929, "step": 80904 }, { "epoch": 2.69, "grad_norm": 0.7268503904342651, "learning_rate": 1.5497680370729293e-05, "loss": 1.7711, "step": 80905 }, { "epoch": 2.69, "grad_norm": 0.712087869644165, "learning_rate": 1.5494364582081777e-05, "loss": 1.6501, "step": 80906 }, { "epoch": 2.69, "grad_norm": 0.7183279395103455, "learning_rate": 1.5491049138782173e-05, "loss": 1.7223, "step": 80907 }, { "epoch": 2.69, "grad_norm": 0.712023913860321, "learning_rate": 1.5487734040834554e-05, "loss": 1.629, "step": 80908 }, { "epoch": 2.69, "grad_norm": 0.7021114230155945, "learning_rate": 1.5484419288242912e-05, "loss": 1.6841, "step": 80909 }, { "epoch": 2.69, "grad_norm": 0.7500789761543274, "learning_rate": 1.5481104881011207e-05, "loss": 1.6457, "step": 80910 }, { "epoch": 2.69, "grad_norm": 0.7149542570114136, "learning_rate": 1.5477790819143576e-05, "loss": 1.7557, "step": 80911 }, { "epoch": 2.69, "grad_norm": 0.6976169347763062, "learning_rate": 1.5474477102643977e-05, "loss": 1.6593, "step": 80912 }, { "epoch": 2.69, "grad_norm": 0.7111605405807495, "learning_rate": 1.547116373151641e-05, "loss": 1.6611, "step": 80913 }, { "epoch": 2.69, "grad_norm": 0.6976842284202576, "learning_rate": 1.546785070576494e-05, "loss": 1.6608, "step": 80914 }, { "epoch": 2.69, "grad_norm": 0.6888546347618103, "learning_rate": 1.5464538025393558e-05, "loss": 1.7012, "step": 80915 }, { "epoch": 2.69, "grad_norm": 0.7121031880378723, "learning_rate": 1.5461225690406332e-05, "loss": 1.6028, "step": 80916 }, { "epoch": 2.69, "grad_norm": 0.7019129395484924, "learning_rate": 1.5457913700807222e-05, "loss": 1.6691, "step": 80917 }, { "epoch": 2.69, "grad_norm": 0.7084732055664062, "learning_rate": 1.545460205660033e-05, "loss": 1.5794, "step": 80918 }, { "epoch": 2.69, "grad_norm": 0.7014320492744446, "learning_rate": 1.5451290757789615e-05, "loss": 1.6845, "step": 80919 }, { "epoch": 2.69, "grad_norm": 0.7128479480743408, "learning_rate": 1.544797980437904e-05, "loss": 1.6998, "step": 80920 }, { "epoch": 2.69, "grad_norm": 0.7004658579826355, "learning_rate": 1.544466919637277e-05, "loss": 1.7008, "step": 80921 }, { "epoch": 2.69, "grad_norm": 0.7015975713729858, "learning_rate": 1.5441358933774674e-05, "loss": 1.7387, "step": 80922 }, { "epoch": 2.69, "grad_norm": 0.6887301206588745, "learning_rate": 1.5438049016588904e-05, "loss": 1.6813, "step": 80923 }, { "epoch": 2.69, "grad_norm": 0.6989439725875854, "learning_rate": 1.54347394448194e-05, "loss": 1.6813, "step": 80924 }, { "epoch": 2.69, "grad_norm": 0.7273488640785217, "learning_rate": 1.5431430218470154e-05, "loss": 1.7688, "step": 80925 }, { "epoch": 2.69, "grad_norm": 0.7116540670394897, "learning_rate": 1.5428121337545262e-05, "loss": 1.6469, "step": 80926 }, { "epoch": 2.69, "grad_norm": 0.6972550749778748, "learning_rate": 1.542481280204866e-05, "loss": 1.6767, "step": 80927 }, { "epoch": 2.69, "grad_norm": 0.7242538332939148, "learning_rate": 1.542150461198447e-05, "loss": 1.6527, "step": 80928 }, { "epoch": 2.69, "grad_norm": 0.7189441323280334, "learning_rate": 1.5418196767356563e-05, "loss": 1.6764, "step": 80929 }, { "epoch": 2.69, "grad_norm": 0.7241445779800415, "learning_rate": 1.5414889268169097e-05, "loss": 1.6966, "step": 80930 }, { "epoch": 2.69, "grad_norm": 0.7036684155464172, "learning_rate": 1.5411582114426034e-05, "loss": 1.7247, "step": 80931 }, { "epoch": 2.69, "grad_norm": 0.7064287662506104, "learning_rate": 1.540827530613131e-05, "loss": 1.706, "step": 80932 }, { "epoch": 2.69, "grad_norm": 0.6853047609329224, "learning_rate": 1.540496884328909e-05, "loss": 1.6495, "step": 80933 }, { "epoch": 2.69, "grad_norm": 0.7059531807899475, "learning_rate": 1.5401662725903263e-05, "loss": 1.6212, "step": 80934 }, { "epoch": 2.69, "grad_norm": 0.720718264579773, "learning_rate": 1.5398356953977865e-05, "loss": 1.7283, "step": 80935 }, { "epoch": 2.69, "grad_norm": 0.683868944644928, "learning_rate": 1.539505152751692e-05, "loss": 1.5983, "step": 80936 }, { "epoch": 2.69, "grad_norm": 0.7150327563285828, "learning_rate": 1.5391746446524466e-05, "loss": 1.6597, "step": 80937 }, { "epoch": 2.69, "grad_norm": 0.6989439725875854, "learning_rate": 1.538844171100453e-05, "loss": 1.6678, "step": 80938 }, { "epoch": 2.69, "grad_norm": 0.7004433870315552, "learning_rate": 1.538513732096104e-05, "loss": 1.6524, "step": 80939 }, { "epoch": 2.69, "grad_norm": 0.6966464519500732, "learning_rate": 1.5381833276398092e-05, "loss": 1.6672, "step": 80940 }, { "epoch": 2.69, "grad_norm": 0.7056292295455933, "learning_rate": 1.5378529577319687e-05, "loss": 1.7283, "step": 80941 }, { "epoch": 2.69, "grad_norm": 0.689812958240509, "learning_rate": 1.537522622372972e-05, "loss": 1.6496, "step": 80942 }, { "epoch": 2.69, "grad_norm": 0.9947054982185364, "learning_rate": 1.537192321563232e-05, "loss": 1.6654, "step": 80943 }, { "epoch": 2.69, "grad_norm": 0.7126920819282532, "learning_rate": 1.5368620553031554e-05, "loss": 1.6706, "step": 80944 }, { "epoch": 2.69, "grad_norm": 0.6968352794647217, "learning_rate": 1.536531823593128e-05, "loss": 1.7003, "step": 80945 }, { "epoch": 2.69, "grad_norm": 0.7156808376312256, "learning_rate": 1.536201626433553e-05, "loss": 1.7387, "step": 80946 }, { "epoch": 2.69, "grad_norm": 0.7009888887405396, "learning_rate": 1.5358714638248403e-05, "loss": 1.7303, "step": 80947 }, { "epoch": 2.69, "grad_norm": 0.7212523818016052, "learning_rate": 1.5355413357673896e-05, "loss": 1.6224, "step": 80948 }, { "epoch": 2.69, "grad_norm": 0.6821704506874084, "learning_rate": 1.5352112422615903e-05, "loss": 1.6803, "step": 80949 }, { "epoch": 2.69, "grad_norm": 0.7151501178741455, "learning_rate": 1.534881183307849e-05, "loss": 1.7288, "step": 80950 }, { "epoch": 2.69, "grad_norm": 0.7229727506637573, "learning_rate": 1.5345511589065784e-05, "loss": 1.6602, "step": 80951 }, { "epoch": 2.69, "grad_norm": 0.7210550308227539, "learning_rate": 1.5342211690581585e-05, "loss": 1.6122, "step": 80952 }, { "epoch": 2.69, "grad_norm": 0.7070509791374207, "learning_rate": 1.5338912137630022e-05, "loss": 1.6395, "step": 80953 }, { "epoch": 2.69, "grad_norm": 0.6939435601234436, "learning_rate": 1.533561293021509e-05, "loss": 1.7632, "step": 80954 }, { "epoch": 2.69, "grad_norm": 0.7042315006256104, "learning_rate": 1.533231406834079e-05, "loss": 1.6697, "step": 80955 }, { "epoch": 2.69, "grad_norm": 0.733782172203064, "learning_rate": 1.532901555201108e-05, "loss": 1.7587, "step": 80956 }, { "epoch": 2.69, "grad_norm": 0.7124883532524109, "learning_rate": 1.532571738123003e-05, "loss": 1.5903, "step": 80957 }, { "epoch": 2.69, "grad_norm": 0.7068334817886353, "learning_rate": 1.5322419556001662e-05, "loss": 1.7073, "step": 80958 }, { "epoch": 2.69, "grad_norm": 0.6971171498298645, "learning_rate": 1.531912207632985e-05, "loss": 1.7106, "step": 80959 }, { "epoch": 2.69, "grad_norm": 0.7064002752304077, "learning_rate": 1.531582494221868e-05, "loss": 1.6647, "step": 80960 }, { "epoch": 2.69, "grad_norm": 0.6976115107536316, "learning_rate": 1.531252815367222e-05, "loss": 1.6851, "step": 80961 }, { "epoch": 2.69, "grad_norm": 0.7121350765228271, "learning_rate": 1.530923171069437e-05, "loss": 1.6713, "step": 80962 }, { "epoch": 2.69, "grad_norm": 0.7322573065757751, "learning_rate": 1.530593561328912e-05, "loss": 1.7226, "step": 80963 }, { "epoch": 2.69, "grad_norm": 0.7170451879501343, "learning_rate": 1.5302639861460542e-05, "loss": 1.7802, "step": 80964 }, { "epoch": 2.69, "grad_norm": 0.7242901921272278, "learning_rate": 1.5299344455212692e-05, "loss": 1.681, "step": 80965 }, { "epoch": 2.69, "grad_norm": 0.7199256420135498, "learning_rate": 1.5296049394549405e-05, "loss": 1.6539, "step": 80966 }, { "epoch": 2.69, "grad_norm": 0.7318689227104187, "learning_rate": 1.529275467947474e-05, "loss": 1.6518, "step": 80967 }, { "epoch": 2.69, "grad_norm": 0.6848982572555542, "learning_rate": 1.5289460309992795e-05, "loss": 1.5964, "step": 80968 }, { "epoch": 2.69, "grad_norm": 0.7156098484992981, "learning_rate": 1.5286166286107472e-05, "loss": 1.6476, "step": 80969 }, { "epoch": 2.69, "grad_norm": 0.722791314125061, "learning_rate": 1.528287260782276e-05, "loss": 1.6073, "step": 80970 }, { "epoch": 2.69, "grad_norm": 0.711788535118103, "learning_rate": 1.527957927514273e-05, "loss": 1.6945, "step": 80971 }, { "epoch": 2.69, "grad_norm": 0.7055854201316833, "learning_rate": 1.5276286288071338e-05, "loss": 1.6331, "step": 80972 }, { "epoch": 2.69, "grad_norm": 0.6995896100997925, "learning_rate": 1.5272993646612553e-05, "loss": 1.6675, "step": 80973 }, { "epoch": 2.69, "grad_norm": 0.717005729675293, "learning_rate": 1.5269701350770404e-05, "loss": 1.666, "step": 80974 }, { "epoch": 2.69, "grad_norm": 0.713432252407074, "learning_rate": 1.5266409400548885e-05, "loss": 1.7261, "step": 80975 }, { "epoch": 2.69, "grad_norm": 0.7186344861984253, "learning_rate": 1.526311779595203e-05, "loss": 1.7073, "step": 80976 }, { "epoch": 2.69, "grad_norm": 0.7137842178344727, "learning_rate": 1.5259826536983734e-05, "loss": 1.6826, "step": 80977 }, { "epoch": 2.69, "grad_norm": 0.703061044216156, "learning_rate": 1.5256535623648126e-05, "loss": 1.6628, "step": 80978 }, { "epoch": 2.69, "grad_norm": 0.687258780002594, "learning_rate": 1.5253245055949104e-05, "loss": 1.6693, "step": 80979 }, { "epoch": 2.69, "grad_norm": 0.7137091755867004, "learning_rate": 1.5249954833890666e-05, "loss": 1.7195, "step": 80980 }, { "epoch": 2.69, "grad_norm": 0.7153194546699524, "learning_rate": 1.5246664957476839e-05, "loss": 1.6179, "step": 80981 }, { "epoch": 2.69, "grad_norm": 0.7005033493041992, "learning_rate": 1.524337542671159e-05, "loss": 1.6709, "step": 80982 }, { "epoch": 2.69, "grad_norm": 0.7210631370544434, "learning_rate": 1.5240086241598948e-05, "loss": 1.6754, "step": 80983 }, { "epoch": 2.69, "grad_norm": 0.7531582117080688, "learning_rate": 1.5236797402142842e-05, "loss": 1.6553, "step": 80984 }, { "epoch": 2.69, "grad_norm": 0.70924973487854, "learning_rate": 1.5233508908347336e-05, "loss": 1.6715, "step": 80985 }, { "epoch": 2.69, "grad_norm": 0.7095764875411987, "learning_rate": 1.5230220760216427e-05, "loss": 1.5853, "step": 80986 }, { "epoch": 2.69, "grad_norm": 0.7257439494132996, "learning_rate": 1.5226932957754012e-05, "loss": 1.7565, "step": 80987 }, { "epoch": 2.69, "grad_norm": 0.6976678967475891, "learning_rate": 1.5223645500964188e-05, "loss": 1.7026, "step": 80988 }, { "epoch": 2.69, "grad_norm": 0.7225242853164673, "learning_rate": 1.522035838985085e-05, "loss": 1.6475, "step": 80989 }, { "epoch": 2.69, "grad_norm": 0.7113558053970337, "learning_rate": 1.5217071624418065e-05, "loss": 1.6288, "step": 80990 }, { "epoch": 2.69, "grad_norm": 0.7378724217414856, "learning_rate": 1.5213785204669827e-05, "loss": 1.673, "step": 80991 }, { "epoch": 2.69, "grad_norm": 0.7207548022270203, "learning_rate": 1.5210499130610033e-05, "loss": 1.6698, "step": 80992 }, { "epoch": 2.69, "grad_norm": 0.7122382521629333, "learning_rate": 1.520721340224278e-05, "loss": 1.6563, "step": 80993 }, { "epoch": 2.69, "grad_norm": 0.723098635673523, "learning_rate": 1.5203928019571965e-05, "loss": 1.7297, "step": 80994 }, { "epoch": 2.69, "grad_norm": 0.7213084101676941, "learning_rate": 1.5200642982601653e-05, "loss": 1.7939, "step": 80995 }, { "epoch": 2.69, "grad_norm": 0.7133046388626099, "learning_rate": 1.5197358291335737e-05, "loss": 1.628, "step": 80996 }, { "epoch": 2.69, "grad_norm": 0.7079440951347351, "learning_rate": 1.5194073945778317e-05, "loss": 1.7153, "step": 80997 }, { "epoch": 2.69, "grad_norm": 0.7172551155090332, "learning_rate": 1.5190789945933356e-05, "loss": 1.7013, "step": 80998 }, { "epoch": 2.69, "grad_norm": 0.6970542073249817, "learning_rate": 1.5187506291804752e-05, "loss": 1.6887, "step": 80999 }, { "epoch": 2.69, "grad_norm": 0.7025851607322693, "learning_rate": 1.51842229833966e-05, "loss": 1.7116, "step": 81000 }, { "epoch": 2.69, "grad_norm": 0.7021827697753906, "learning_rate": 1.5180940020712795e-05, "loss": 1.7352, "step": 81001 }, { "epoch": 2.69, "grad_norm": 0.6886041760444641, "learning_rate": 1.5177657403757371e-05, "loss": 1.6413, "step": 81002 }, { "epoch": 2.69, "grad_norm": 0.7247247695922852, "learning_rate": 1.5174375132534288e-05, "loss": 1.7468, "step": 81003 }, { "epoch": 2.7, "grad_norm": 0.7141645550727844, "learning_rate": 1.517109320704758e-05, "loss": 1.6239, "step": 81004 }, { "epoch": 2.7, "grad_norm": 0.7355579733848572, "learning_rate": 1.5167811627301207e-05, "loss": 1.6643, "step": 81005 }, { "epoch": 2.7, "grad_norm": 0.7045891880989075, "learning_rate": 1.51645303932991e-05, "loss": 1.5761, "step": 81006 }, { "epoch": 2.7, "grad_norm": 0.7150741219520569, "learning_rate": 1.5161249505045325e-05, "loss": 1.715, "step": 81007 }, { "epoch": 2.7, "grad_norm": 0.703700840473175, "learning_rate": 1.515796896254381e-05, "loss": 1.7009, "step": 81008 }, { "epoch": 2.7, "grad_norm": 0.7193477153778076, "learning_rate": 1.5154688765798518e-05, "loss": 1.6983, "step": 81009 }, { "epoch": 2.7, "grad_norm": 0.7000250816345215, "learning_rate": 1.5151408914813478e-05, "loss": 1.7181, "step": 81010 }, { "epoch": 2.7, "grad_norm": 0.7123862504959106, "learning_rate": 1.5148129409592723e-05, "loss": 1.7037, "step": 81011 }, { "epoch": 2.7, "grad_norm": 0.7046513557434082, "learning_rate": 1.5144850250140084e-05, "loss": 1.6614, "step": 81012 }, { "epoch": 2.7, "grad_norm": 0.701023280620575, "learning_rate": 1.5141571436459654e-05, "loss": 1.7076, "step": 81013 }, { "epoch": 2.7, "grad_norm": 0.7175452709197998, "learning_rate": 1.5138292968555399e-05, "loss": 1.6503, "step": 81014 }, { "epoch": 2.7, "grad_norm": 0.7149170637130737, "learning_rate": 1.5135014846431281e-05, "loss": 1.7343, "step": 81015 }, { "epoch": 2.7, "grad_norm": 0.708884060382843, "learning_rate": 1.5131737070091232e-05, "loss": 1.7539, "step": 81016 }, { "epoch": 2.7, "grad_norm": 0.7079092860221863, "learning_rate": 1.5128459639539315e-05, "loss": 1.6474, "step": 81017 }, { "epoch": 2.7, "grad_norm": 0.6810054779052734, "learning_rate": 1.5125182554779492e-05, "loss": 1.7179, "step": 81018 }, { "epoch": 2.7, "grad_norm": 0.6856357455253601, "learning_rate": 1.5121905815815726e-05, "loss": 1.7361, "step": 81019 }, { "epoch": 2.7, "grad_norm": 0.709412157535553, "learning_rate": 1.5118629422651952e-05, "loss": 1.6778, "step": 81020 }, { "epoch": 2.7, "grad_norm": 0.7171469926834106, "learning_rate": 1.5115353375292227e-05, "loss": 1.7373, "step": 81021 }, { "epoch": 2.7, "grad_norm": 0.7015722393989563, "learning_rate": 1.5112077673740519e-05, "loss": 1.6284, "step": 81022 }, { "epoch": 2.7, "grad_norm": 0.726018488407135, "learning_rate": 1.510880231800069e-05, "loss": 1.6507, "step": 81023 }, { "epoch": 2.7, "grad_norm": 0.7313160300254822, "learning_rate": 1.5105527308076803e-05, "loss": 1.6901, "step": 81024 }, { "epoch": 2.7, "grad_norm": 0.725484311580658, "learning_rate": 1.510225264397289e-05, "loss": 1.6915, "step": 81025 }, { "epoch": 2.7, "grad_norm": 0.7171183824539185, "learning_rate": 1.5098978325692846e-05, "loss": 1.7349, "step": 81026 }, { "epoch": 2.7, "grad_norm": 0.7196617722511292, "learning_rate": 1.5095704353240635e-05, "loss": 1.6317, "step": 81027 }, { "epoch": 2.7, "grad_norm": 0.729157030582428, "learning_rate": 1.5092430726620285e-05, "loss": 1.5743, "step": 81028 }, { "epoch": 2.7, "grad_norm": 0.6998410224914551, "learning_rate": 1.5089157445835765e-05, "loss": 1.7104, "step": 81029 }, { "epoch": 2.7, "grad_norm": 0.7133762240409851, "learning_rate": 1.5085884510890967e-05, "loss": 1.7218, "step": 81030 }, { "epoch": 2.7, "grad_norm": 0.7055436968803406, "learning_rate": 1.5082611921789988e-05, "loss": 1.6434, "step": 81031 }, { "epoch": 2.7, "grad_norm": 0.7196387052536011, "learning_rate": 1.5079339678536662e-05, "loss": 1.656, "step": 81032 }, { "epoch": 2.7, "grad_norm": 0.7043314576148987, "learning_rate": 1.5076067781135115e-05, "loss": 1.749, "step": 81033 }, { "epoch": 2.7, "grad_norm": 0.7176674008369446, "learning_rate": 1.507279622958918e-05, "loss": 1.6642, "step": 81034 }, { "epoch": 2.7, "grad_norm": 0.7427578568458557, "learning_rate": 1.5069525023902918e-05, "loss": 1.6571, "step": 81035 }, { "epoch": 2.7, "grad_norm": 0.7011633515357971, "learning_rate": 1.5066254164080293e-05, "loss": 1.6338, "step": 81036 }, { "epoch": 2.7, "grad_norm": 0.6989148855209351, "learning_rate": 1.5062983650125204e-05, "loss": 1.633, "step": 81037 }, { "epoch": 2.7, "grad_norm": 0.724131166934967, "learning_rate": 1.5059713482041713e-05, "loss": 1.6281, "step": 81038 }, { "epoch": 2.7, "grad_norm": 0.7196176648139954, "learning_rate": 1.5056443659833717e-05, "loss": 1.7042, "step": 81039 }, { "epoch": 2.7, "grad_norm": 0.7008070945739746, "learning_rate": 1.5053174183505245e-05, "loss": 1.6983, "step": 81040 }, { "epoch": 2.7, "grad_norm": 0.7324633598327637, "learning_rate": 1.504990505306023e-05, "loss": 1.6954, "step": 81041 }, { "epoch": 2.7, "grad_norm": 0.7102420330047607, "learning_rate": 1.5046636268502598e-05, "loss": 1.6969, "step": 81042 }, { "epoch": 2.7, "grad_norm": 0.7094917297363281, "learning_rate": 1.5043367829836417e-05, "loss": 1.7178, "step": 81043 }, { "epoch": 2.7, "grad_norm": 0.7782778143882751, "learning_rate": 1.5040099737065547e-05, "loss": 1.6438, "step": 81044 }, { "epoch": 2.7, "grad_norm": 0.7021448016166687, "learning_rate": 1.5036831990194054e-05, "loss": 1.6726, "step": 81045 }, { "epoch": 2.7, "grad_norm": 0.7021282911300659, "learning_rate": 1.5033564589225833e-05, "loss": 1.703, "step": 81046 }, { "epoch": 2.7, "grad_norm": 0.7332770824432373, "learning_rate": 1.5030297534164881e-05, "loss": 1.7292, "step": 81047 }, { "epoch": 2.7, "grad_norm": 0.681810200214386, "learning_rate": 1.5027030825015196e-05, "loss": 1.6318, "step": 81048 }, { "epoch": 2.7, "grad_norm": 0.7206480503082275, "learning_rate": 1.5023764461780641e-05, "loss": 1.6667, "step": 81049 }, { "epoch": 2.7, "grad_norm": 0.729728102684021, "learning_rate": 1.5020498444465312e-05, "loss": 1.6132, "step": 81050 }, { "epoch": 2.7, "grad_norm": 0.7299338579177856, "learning_rate": 1.5017232773073107e-05, "loss": 1.7411, "step": 81051 }, { "epoch": 2.7, "grad_norm": 0.7216985821723938, "learning_rate": 1.5013967447607922e-05, "loss": 1.7126, "step": 81052 }, { "epoch": 2.7, "grad_norm": 0.7397962808609009, "learning_rate": 1.501070246807382e-05, "loss": 1.6461, "step": 81053 }, { "epoch": 2.7, "grad_norm": 0.7246187329292297, "learning_rate": 1.5007437834474767e-05, "loss": 1.6805, "step": 81054 }, { "epoch": 2.7, "grad_norm": 0.7011497616767883, "learning_rate": 1.5004173546814691e-05, "loss": 1.7045, "step": 81055 }, { "epoch": 2.7, "grad_norm": 0.7012789249420166, "learning_rate": 1.5000909605097488e-05, "loss": 1.6726, "step": 81056 }, { "epoch": 2.7, "grad_norm": 0.7227352857589722, "learning_rate": 1.4997646009327259e-05, "loss": 1.6556, "step": 81057 }, { "epoch": 2.7, "grad_norm": 0.7214561700820923, "learning_rate": 1.4994382759507895e-05, "loss": 1.6279, "step": 81058 }, { "epoch": 2.7, "grad_norm": 0.7123409509658813, "learning_rate": 1.499111985564333e-05, "loss": 1.6347, "step": 81059 }, { "epoch": 2.7, "grad_norm": 0.6997118592262268, "learning_rate": 1.4987857297737527e-05, "loss": 1.6166, "step": 81060 }, { "epoch": 2.7, "grad_norm": 0.6947221755981445, "learning_rate": 1.4984595085794548e-05, "loss": 1.6244, "step": 81061 }, { "epoch": 2.7, "grad_norm": 0.6976951956748962, "learning_rate": 1.4981333219818193e-05, "loss": 1.5817, "step": 81062 }, { "epoch": 2.7, "grad_norm": 0.722698986530304, "learning_rate": 1.4978071699812521e-05, "loss": 1.7565, "step": 81063 }, { "epoch": 2.7, "grad_norm": 0.7620984315872192, "learning_rate": 1.49748105257815e-05, "loss": 1.7647, "step": 81064 }, { "epoch": 2.7, "grad_norm": 0.745219886302948, "learning_rate": 1.497154969772909e-05, "loss": 1.7154, "step": 81065 }, { "epoch": 2.7, "grad_norm": 0.7082789540290833, "learning_rate": 1.4968289215659158e-05, "loss": 1.6571, "step": 81066 }, { "epoch": 2.7, "grad_norm": 0.7114000916481018, "learning_rate": 1.4965029079575729e-05, "loss": 1.6942, "step": 81067 }, { "epoch": 2.7, "grad_norm": 0.711239755153656, "learning_rate": 1.4961769289482839e-05, "loss": 1.6699, "step": 81068 }, { "epoch": 2.7, "grad_norm": 0.7532830238342285, "learning_rate": 1.4958509845384281e-05, "loss": 1.6574, "step": 81069 }, { "epoch": 2.7, "grad_norm": 0.6959916949272156, "learning_rate": 1.4955250747284086e-05, "loss": 1.7078, "step": 81070 }, { "epoch": 2.7, "grad_norm": 0.7008174061775208, "learning_rate": 1.4951991995186252e-05, "loss": 1.6493, "step": 81071 }, { "epoch": 2.7, "grad_norm": 0.696613073348999, "learning_rate": 1.4948733589094708e-05, "loss": 1.6527, "step": 81072 }, { "epoch": 2.7, "grad_norm": 0.6919651627540588, "learning_rate": 1.4945475529013385e-05, "loss": 1.6748, "step": 81073 }, { "epoch": 2.7, "grad_norm": 0.7134443521499634, "learning_rate": 1.4942217814946211e-05, "loss": 1.7135, "step": 81074 }, { "epoch": 2.7, "grad_norm": 0.7604769468307495, "learning_rate": 1.4938960446897286e-05, "loss": 1.6748, "step": 81075 }, { "epoch": 2.7, "grad_norm": 0.7160696983337402, "learning_rate": 1.4935703424870405e-05, "loss": 1.6986, "step": 81076 }, { "epoch": 2.7, "grad_norm": 0.727428674697876, "learning_rate": 1.493244674886953e-05, "loss": 1.6941, "step": 81077 }, { "epoch": 2.7, "grad_norm": 0.7332448363304138, "learning_rate": 1.4929190418898728e-05, "loss": 1.6235, "step": 81078 }, { "epoch": 2.7, "grad_norm": 0.7061719298362732, "learning_rate": 1.4925934434961895e-05, "loss": 1.6776, "step": 81079 }, { "epoch": 2.7, "grad_norm": 0.6847456693649292, "learning_rate": 1.4922678797062926e-05, "loss": 1.647, "step": 81080 }, { "epoch": 2.7, "grad_norm": 0.7059270739555359, "learning_rate": 1.4919423505205818e-05, "loss": 1.6091, "step": 81081 }, { "epoch": 2.7, "grad_norm": 0.7198114395141602, "learning_rate": 1.4916168559394604e-05, "loss": 1.7131, "step": 81082 }, { "epoch": 2.7, "grad_norm": 0.7272092700004578, "learning_rate": 1.4912913959633077e-05, "loss": 1.6953, "step": 81083 }, { "epoch": 2.7, "grad_norm": 0.7275128960609436, "learning_rate": 1.490965970592527e-05, "loss": 1.6703, "step": 81084 }, { "epoch": 2.7, "grad_norm": 0.6929541230201721, "learning_rate": 1.4906405798275144e-05, "loss": 1.7077, "step": 81085 }, { "epoch": 2.7, "grad_norm": 0.6949413418769836, "learning_rate": 1.4903152236686666e-05, "loss": 1.7194, "step": 81086 }, { "epoch": 2.7, "grad_norm": 0.7152770161628723, "learning_rate": 1.4899899021163698e-05, "loss": 1.7003, "step": 81087 }, { "epoch": 2.7, "grad_norm": 0.7121987342834473, "learning_rate": 1.48966461517103e-05, "loss": 1.7389, "step": 81088 }, { "epoch": 2.7, "grad_norm": 0.7134659290313721, "learning_rate": 1.4893393628330375e-05, "loss": 1.7078, "step": 81089 }, { "epoch": 2.7, "grad_norm": 0.688541054725647, "learning_rate": 1.4890141451027782e-05, "loss": 1.7065, "step": 81090 }, { "epoch": 2.7, "grad_norm": 0.7098253965377808, "learning_rate": 1.4886889619806586e-05, "loss": 1.7149, "step": 81091 }, { "epoch": 2.7, "grad_norm": 0.715066134929657, "learning_rate": 1.4883638134670717e-05, "loss": 1.6394, "step": 81092 }, { "epoch": 2.7, "grad_norm": 0.7258309125900269, "learning_rate": 1.4880386995624104e-05, "loss": 1.6074, "step": 81093 }, { "epoch": 2.7, "grad_norm": 0.7040963172912598, "learning_rate": 1.4877136202670648e-05, "loss": 1.6879, "step": 81094 }, { "epoch": 2.7, "grad_norm": 0.7164510488510132, "learning_rate": 1.4873885755814408e-05, "loss": 1.6266, "step": 81095 }, { "epoch": 2.7, "grad_norm": 0.7153594493865967, "learning_rate": 1.4870635655059215e-05, "loss": 1.6682, "step": 81096 }, { "epoch": 2.7, "grad_norm": 0.7694173455238342, "learning_rate": 1.4867385900409035e-05, "loss": 1.7555, "step": 81097 }, { "epoch": 2.7, "grad_norm": 0.7136318683624268, "learning_rate": 1.4864136491867895e-05, "loss": 1.6372, "step": 81098 }, { "epoch": 2.7, "grad_norm": 0.7057260274887085, "learning_rate": 1.4860887429439627e-05, "loss": 1.709, "step": 81099 }, { "epoch": 2.7, "grad_norm": 0.7071458697319031, "learning_rate": 1.4857638713128262e-05, "loss": 1.6792, "step": 81100 }, { "epoch": 2.7, "grad_norm": 0.726425290107727, "learning_rate": 1.485439034293766e-05, "loss": 1.6544, "step": 81101 }, { "epoch": 2.7, "grad_norm": 0.7122988104820251, "learning_rate": 1.4851142318871856e-05, "loss": 1.7739, "step": 81102 }, { "epoch": 2.7, "grad_norm": 0.7242822647094727, "learning_rate": 1.4847894640934777e-05, "loss": 1.5975, "step": 81103 }, { "epoch": 2.7, "grad_norm": 0.7082825303077698, "learning_rate": 1.4844647309130287e-05, "loss": 1.7194, "step": 81104 }, { "epoch": 2.7, "grad_norm": 0.718782365322113, "learning_rate": 1.4841400323462416e-05, "loss": 1.7485, "step": 81105 }, { "epoch": 2.7, "grad_norm": 0.7132203578948975, "learning_rate": 1.4838153683935028e-05, "loss": 1.6817, "step": 81106 }, { "epoch": 2.7, "grad_norm": 0.7034074068069458, "learning_rate": 1.4834907390552154e-05, "loss": 1.6642, "step": 81107 }, { "epoch": 2.7, "grad_norm": 0.7022770047187805, "learning_rate": 1.4831661443317655e-05, "loss": 1.7443, "step": 81108 }, { "epoch": 2.7, "grad_norm": 0.7063920497894287, "learning_rate": 1.4828415842235498e-05, "loss": 1.6338, "step": 81109 }, { "epoch": 2.7, "grad_norm": 0.7583556175231934, "learning_rate": 1.4825170587309643e-05, "loss": 1.5981, "step": 81110 }, { "epoch": 2.7, "grad_norm": 0.7488889694213867, "learning_rate": 1.4821925678543989e-05, "loss": 1.7298, "step": 81111 }, { "epoch": 2.7, "grad_norm": 0.723662793636322, "learning_rate": 1.4818681115942533e-05, "loss": 1.6306, "step": 81112 }, { "epoch": 2.7, "grad_norm": 0.7232868075370789, "learning_rate": 1.4815436899509136e-05, "loss": 1.6701, "step": 81113 }, { "epoch": 2.7, "grad_norm": 0.7049172520637512, "learning_rate": 1.4812193029247798e-05, "loss": 1.6279, "step": 81114 }, { "epoch": 2.7, "grad_norm": 0.7085290551185608, "learning_rate": 1.4808949505162481e-05, "loss": 1.6696, "step": 81115 }, { "epoch": 2.7, "grad_norm": 0.7192131876945496, "learning_rate": 1.4805706327257016e-05, "loss": 1.683, "step": 81116 }, { "epoch": 2.7, "grad_norm": 0.7075762152671814, "learning_rate": 1.4802463495535433e-05, "loss": 1.6448, "step": 81117 }, { "epoch": 2.7, "grad_norm": 0.723810613155365, "learning_rate": 1.4799221010001661e-05, "loss": 1.7074, "step": 81118 }, { "epoch": 2.7, "grad_norm": 0.7272828817367554, "learning_rate": 1.479597887065953e-05, "loss": 1.6636, "step": 81119 }, { "epoch": 2.7, "grad_norm": 1.2890574932098389, "learning_rate": 1.4792737077513106e-05, "loss": 1.7543, "step": 81120 }, { "epoch": 2.7, "grad_norm": 0.7064947485923767, "learning_rate": 1.4789495630566284e-05, "loss": 1.7014, "step": 81121 }, { "epoch": 2.7, "grad_norm": 0.7226271629333496, "learning_rate": 1.4786254529822993e-05, "loss": 1.5742, "step": 81122 }, { "epoch": 2.7, "grad_norm": 0.6836894154548645, "learning_rate": 1.4783013775287133e-05, "loss": 1.6486, "step": 81123 }, { "epoch": 2.7, "grad_norm": 0.706303060054779, "learning_rate": 1.4779773366962733e-05, "loss": 1.66, "step": 81124 }, { "epoch": 2.7, "grad_norm": 0.6898173093795776, "learning_rate": 1.4776533304853622e-05, "loss": 1.6319, "step": 81125 }, { "epoch": 2.7, "grad_norm": 0.7192522287368774, "learning_rate": 1.4773293588963764e-05, "loss": 1.6766, "step": 81126 }, { "epoch": 2.7, "grad_norm": 0.738318681716919, "learning_rate": 1.477005421929709e-05, "loss": 1.6436, "step": 81127 }, { "epoch": 2.7, "grad_norm": 0.7004072070121765, "learning_rate": 1.4766815195857562e-05, "loss": 1.6177, "step": 81128 }, { "epoch": 2.7, "grad_norm": 0.7095081210136414, "learning_rate": 1.4763576518649112e-05, "loss": 1.7145, "step": 81129 }, { "epoch": 2.7, "grad_norm": 0.7008249759674072, "learning_rate": 1.4760338187675635e-05, "loss": 1.6895, "step": 81130 }, { "epoch": 2.7, "grad_norm": 0.7248189449310303, "learning_rate": 1.4757100202941096e-05, "loss": 1.6366, "step": 81131 }, { "epoch": 2.7, "grad_norm": 0.7184799909591675, "learning_rate": 1.4753862564449392e-05, "loss": 1.7804, "step": 81132 }, { "epoch": 2.7, "grad_norm": 0.6849032044410706, "learning_rate": 1.4750625272204453e-05, "loss": 1.6667, "step": 81133 }, { "epoch": 2.7, "grad_norm": 0.7223488688468933, "learning_rate": 1.4747388326210207e-05, "loss": 1.7366, "step": 81134 }, { "epoch": 2.7, "grad_norm": 0.7227169275283813, "learning_rate": 1.4744151726470688e-05, "loss": 1.6734, "step": 81135 }, { "epoch": 2.7, "grad_norm": 0.7062850594520569, "learning_rate": 1.474091547298969e-05, "loss": 1.7099, "step": 81136 }, { "epoch": 2.7, "grad_norm": 0.7154268622398376, "learning_rate": 1.4737679565771177e-05, "loss": 1.6419, "step": 81137 }, { "epoch": 2.7, "grad_norm": 0.6916494369506836, "learning_rate": 1.4734444004819112e-05, "loss": 1.7057, "step": 81138 }, { "epoch": 2.7, "grad_norm": 0.7114706039428711, "learning_rate": 1.4731208790137394e-05, "loss": 1.669, "step": 81139 }, { "epoch": 2.7, "grad_norm": 0.7002440094947815, "learning_rate": 1.4727973921729918e-05, "loss": 1.7027, "step": 81140 }, { "epoch": 2.7, "grad_norm": 0.6945827007293701, "learning_rate": 1.4724739399600683e-05, "loss": 1.735, "step": 81141 }, { "epoch": 2.7, "grad_norm": 0.7173072695732117, "learning_rate": 1.4721505223753582e-05, "loss": 1.6785, "step": 81142 }, { "epoch": 2.7, "grad_norm": 0.7081970572471619, "learning_rate": 1.4718271394192549e-05, "loss": 1.6416, "step": 81143 }, { "epoch": 2.7, "grad_norm": 0.6934561729431152, "learning_rate": 1.4715037910921478e-05, "loss": 1.6825, "step": 81144 }, { "epoch": 2.7, "grad_norm": 0.7012513279914856, "learning_rate": 1.4711804773944336e-05, "loss": 1.7112, "step": 81145 }, { "epoch": 2.7, "grad_norm": 0.7136492133140564, "learning_rate": 1.470857198326505e-05, "loss": 1.6771, "step": 81146 }, { "epoch": 2.7, "grad_norm": 0.7191110849380493, "learning_rate": 1.4705339538887451e-05, "loss": 1.6893, "step": 81147 }, { "epoch": 2.7, "grad_norm": 0.7183884382247925, "learning_rate": 1.4702107440815602e-05, "loss": 1.6745, "step": 81148 }, { "epoch": 2.7, "grad_norm": 0.6884068250656128, "learning_rate": 1.4698875689053302e-05, "loss": 1.69, "step": 81149 }, { "epoch": 2.7, "grad_norm": 0.690125584602356, "learning_rate": 1.469564428360458e-05, "loss": 1.6624, "step": 81150 }, { "epoch": 2.7, "grad_norm": 0.7280815243721008, "learning_rate": 1.4692413224473264e-05, "loss": 1.7868, "step": 81151 }, { "epoch": 2.7, "grad_norm": 0.71663898229599, "learning_rate": 1.4689182511663356e-05, "loss": 1.7075, "step": 81152 }, { "epoch": 2.7, "grad_norm": 0.6983222365379333, "learning_rate": 1.4685952145178747e-05, "loss": 1.6786, "step": 81153 }, { "epoch": 2.7, "grad_norm": 0.7343341112136841, "learning_rate": 1.4682722125023306e-05, "loss": 1.6601, "step": 81154 }, { "epoch": 2.7, "grad_norm": 0.7287341952323914, "learning_rate": 1.4679492451201058e-05, "loss": 1.6945, "step": 81155 }, { "epoch": 2.7, "grad_norm": 0.7206602096557617, "learning_rate": 1.4676263123715804e-05, "loss": 1.7218, "step": 81156 }, { "epoch": 2.7, "grad_norm": 0.6733565330505371, "learning_rate": 1.467303414257157e-05, "loss": 1.6352, "step": 81157 }, { "epoch": 2.7, "grad_norm": 0.7049041390419006, "learning_rate": 1.4669805507772259e-05, "loss": 1.6771, "step": 81158 }, { "epoch": 2.7, "grad_norm": 0.7080002427101135, "learning_rate": 1.4666577219321695e-05, "loss": 1.687, "step": 81159 }, { "epoch": 2.7, "grad_norm": 0.7211641073226929, "learning_rate": 1.4663349277223912e-05, "loss": 1.6895, "step": 81160 }, { "epoch": 2.7, "grad_norm": 0.6838212013244629, "learning_rate": 1.4660121681482739e-05, "loss": 1.6657, "step": 81161 }, { "epoch": 2.7, "grad_norm": 0.7157797813415527, "learning_rate": 1.4656894432102173e-05, "loss": 1.6722, "step": 81162 }, { "epoch": 2.7, "grad_norm": 0.725928544998169, "learning_rate": 1.4653667529086044e-05, "loss": 1.7188, "step": 81163 }, { "epoch": 2.7, "grad_norm": 0.7077934145927429, "learning_rate": 1.4650440972438381e-05, "loss": 1.6667, "step": 81164 }, { "epoch": 2.7, "grad_norm": 0.7077779173851013, "learning_rate": 1.4647214762163018e-05, "loss": 1.6673, "step": 81165 }, { "epoch": 2.7, "grad_norm": 0.6946956515312195, "learning_rate": 1.4643988898263881e-05, "loss": 1.6436, "step": 81166 }, { "epoch": 2.7, "grad_norm": 0.7347322106361389, "learning_rate": 1.4640763380744902e-05, "loss": 1.6883, "step": 81167 }, { "epoch": 2.7, "grad_norm": 0.7099679112434387, "learning_rate": 1.4637538209610011e-05, "loss": 1.7519, "step": 81168 }, { "epoch": 2.7, "grad_norm": 0.7042071223258972, "learning_rate": 1.4634313384863072e-05, "loss": 1.6724, "step": 81169 }, { "epoch": 2.7, "grad_norm": 0.6944249868392944, "learning_rate": 1.4631088906508015e-05, "loss": 1.7082, "step": 81170 }, { "epoch": 2.7, "grad_norm": 0.7253108024597168, "learning_rate": 1.4627864774548803e-05, "loss": 1.7532, "step": 81171 }, { "epoch": 2.7, "grad_norm": 0.7313013672828674, "learning_rate": 1.46246409889893e-05, "loss": 1.676, "step": 81172 }, { "epoch": 2.7, "grad_norm": 0.7450195550918579, "learning_rate": 1.4621417549833437e-05, "loss": 1.7155, "step": 81173 }, { "epoch": 2.7, "grad_norm": 0.6941487789154053, "learning_rate": 1.4618194457085142e-05, "loss": 1.6848, "step": 81174 }, { "epoch": 2.7, "grad_norm": 0.7061982154846191, "learning_rate": 1.4614971710748313e-05, "loss": 1.6784, "step": 81175 }, { "epoch": 2.7, "grad_norm": 0.7022419571876526, "learning_rate": 1.4611749310826814e-05, "loss": 1.6377, "step": 81176 }, { "epoch": 2.7, "grad_norm": 0.7606019377708435, "learning_rate": 1.460852725732461e-05, "loss": 1.7424, "step": 81177 }, { "epoch": 2.7, "grad_norm": 0.7154111266136169, "learning_rate": 1.4605305550245661e-05, "loss": 1.7157, "step": 81178 }, { "epoch": 2.7, "grad_norm": 1.0719358921051025, "learning_rate": 1.4602084189593766e-05, "loss": 1.6914, "step": 81179 }, { "epoch": 2.7, "grad_norm": 0.7129803895950317, "learning_rate": 1.4598863175372889e-05, "loss": 1.695, "step": 81180 }, { "epoch": 2.7, "grad_norm": 0.6923213005065918, "learning_rate": 1.4595642507586958e-05, "loss": 1.6398, "step": 81181 }, { "epoch": 2.7, "grad_norm": 0.7059086561203003, "learning_rate": 1.4592422186239871e-05, "loss": 1.768, "step": 81182 }, { "epoch": 2.7, "grad_norm": 0.6746522784233093, "learning_rate": 1.4589202211335526e-05, "loss": 1.5639, "step": 81183 }, { "epoch": 2.7, "grad_norm": 0.716510534286499, "learning_rate": 1.4585982582877787e-05, "loss": 1.6323, "step": 81184 }, { "epoch": 2.7, "grad_norm": 0.711310863494873, "learning_rate": 1.4582763300870715e-05, "loss": 1.6153, "step": 81185 }, { "epoch": 2.7, "grad_norm": 0.7064961194992065, "learning_rate": 1.4579544365318041e-05, "loss": 1.6952, "step": 81186 }, { "epoch": 2.7, "grad_norm": 0.7102500200271606, "learning_rate": 1.457632577622373e-05, "loss": 1.6512, "step": 81187 }, { "epoch": 2.7, "grad_norm": 0.6885908246040344, "learning_rate": 1.4573107533591744e-05, "loss": 1.7042, "step": 81188 }, { "epoch": 2.7, "grad_norm": 0.702134370803833, "learning_rate": 1.456988963742598e-05, "loss": 1.6668, "step": 81189 }, { "epoch": 2.7, "grad_norm": 0.7071844935417175, "learning_rate": 1.4566672087730235e-05, "loss": 1.6776, "step": 81190 }, { "epoch": 2.7, "grad_norm": 0.7109533548355103, "learning_rate": 1.4563454884508507e-05, "loss": 1.6449, "step": 81191 }, { "epoch": 2.7, "grad_norm": 0.6991110444068909, "learning_rate": 1.456023802776476e-05, "loss": 1.6971, "step": 81192 }, { "epoch": 2.7, "grad_norm": 0.7038360238075256, "learning_rate": 1.4557021517502754e-05, "loss": 1.6482, "step": 81193 }, { "epoch": 2.7, "grad_norm": 0.7082157135009766, "learning_rate": 1.4553805353726488e-05, "loss": 1.6105, "step": 81194 }, { "epoch": 2.7, "grad_norm": 0.7185826897621155, "learning_rate": 1.4550589536439861e-05, "loss": 1.655, "step": 81195 }, { "epoch": 2.7, "grad_norm": 0.7211730480194092, "learning_rate": 1.4547374065646766e-05, "loss": 1.7068, "step": 81196 }, { "epoch": 2.7, "grad_norm": 0.7138023972511292, "learning_rate": 1.4544158941351035e-05, "loss": 1.63, "step": 81197 }, { "epoch": 2.7, "grad_norm": 0.7185838222503662, "learning_rate": 1.4540944163556667e-05, "loss": 1.7222, "step": 81198 }, { "epoch": 2.7, "grad_norm": 0.7185787558555603, "learning_rate": 1.4537729732267588e-05, "loss": 1.7643, "step": 81199 }, { "epoch": 2.7, "grad_norm": 0.7262141108512878, "learning_rate": 1.45345156474876e-05, "loss": 1.774, "step": 81200 }, { "epoch": 2.7, "grad_norm": 0.7030686140060425, "learning_rate": 1.4531301909220628e-05, "loss": 1.7103, "step": 81201 }, { "epoch": 2.7, "grad_norm": 0.7247074246406555, "learning_rate": 1.4528088517470638e-05, "loss": 1.6789, "step": 81202 }, { "epoch": 2.7, "grad_norm": 0.7181583046913147, "learning_rate": 1.452487547224146e-05, "loss": 1.6025, "step": 81203 }, { "epoch": 2.7, "grad_norm": 0.7019661068916321, "learning_rate": 1.452166277353699e-05, "loss": 1.5955, "step": 81204 }, { "epoch": 2.7, "grad_norm": 0.7165344953536987, "learning_rate": 1.4518450421361227e-05, "loss": 1.6147, "step": 81205 }, { "epoch": 2.7, "grad_norm": 0.6892911195755005, "learning_rate": 1.4515238415717967e-05, "loss": 1.6099, "step": 81206 }, { "epoch": 2.7, "grad_norm": 0.7017886638641357, "learning_rate": 1.4512026756611139e-05, "loss": 1.6229, "step": 81207 }, { "epoch": 2.7, "grad_norm": 0.7239660024642944, "learning_rate": 1.4508815444044609e-05, "loss": 1.6773, "step": 81208 }, { "epoch": 2.7, "grad_norm": 0.6890276670455933, "learning_rate": 1.450560447802237e-05, "loss": 1.7069, "step": 81209 }, { "epoch": 2.7, "grad_norm": 0.7454155683517456, "learning_rate": 1.4502393858548256e-05, "loss": 1.6621, "step": 81210 }, { "epoch": 2.7, "grad_norm": 0.7068604230880737, "learning_rate": 1.4499183585626129e-05, "loss": 1.7048, "step": 81211 }, { "epoch": 2.7, "grad_norm": 0.732894778251648, "learning_rate": 1.4495973659259986e-05, "loss": 1.6719, "step": 81212 }, { "epoch": 2.7, "grad_norm": 0.7062933444976807, "learning_rate": 1.4492764079453657e-05, "loss": 1.6727, "step": 81213 }, { "epoch": 2.7, "grad_norm": 0.7282694578170776, "learning_rate": 1.4489554846211005e-05, "loss": 1.6406, "step": 81214 }, { "epoch": 2.7, "grad_norm": 0.7125246524810791, "learning_rate": 1.4486345959535994e-05, "loss": 1.769, "step": 81215 }, { "epoch": 2.7, "grad_norm": 0.7020137310028076, "learning_rate": 1.4483137419432455e-05, "loss": 1.657, "step": 81216 }, { "epoch": 2.7, "grad_norm": 0.7323827147483826, "learning_rate": 1.4479929225904352e-05, "loss": 1.6541, "step": 81217 }, { "epoch": 2.7, "grad_norm": 0.7029895186424255, "learning_rate": 1.4476721378955514e-05, "loss": 1.6343, "step": 81218 }, { "epoch": 2.7, "grad_norm": 0.7148973345756531, "learning_rate": 1.4473513878589904e-05, "loss": 1.6291, "step": 81219 }, { "epoch": 2.7, "grad_norm": 0.7118785381317139, "learning_rate": 1.4470306724811354e-05, "loss": 1.6929, "step": 81220 }, { "epoch": 2.7, "grad_norm": 0.7263942360877991, "learning_rate": 1.4467099917623792e-05, "loss": 1.6705, "step": 81221 }, { "epoch": 2.7, "grad_norm": 0.7410415410995483, "learning_rate": 1.4463893457031084e-05, "loss": 1.6584, "step": 81222 }, { "epoch": 2.7, "grad_norm": 0.7230680584907532, "learning_rate": 1.4460687343037125e-05, "loss": 1.7298, "step": 81223 }, { "epoch": 2.7, "grad_norm": 0.6982359290122986, "learning_rate": 1.445748157564588e-05, "loss": 1.6977, "step": 81224 }, { "epoch": 2.7, "grad_norm": 0.7155372500419617, "learning_rate": 1.4454276154861144e-05, "loss": 1.6406, "step": 81225 }, { "epoch": 2.7, "grad_norm": 0.6887410879135132, "learning_rate": 1.4451071080686816e-05, "loss": 1.6634, "step": 81226 }, { "epoch": 2.7, "grad_norm": 0.708514928817749, "learning_rate": 1.4447866353126858e-05, "loss": 1.651, "step": 81227 }, { "epoch": 2.7, "grad_norm": 0.7094125747680664, "learning_rate": 1.4444661972185068e-05, "loss": 1.6991, "step": 81228 }, { "epoch": 2.7, "grad_norm": 1.0576759576797485, "learning_rate": 1.4441457937865441e-05, "loss": 1.6619, "step": 81229 }, { "epoch": 2.7, "grad_norm": 0.6981698870658875, "learning_rate": 1.4438254250171777e-05, "loss": 1.6562, "step": 81230 }, { "epoch": 2.7, "grad_norm": 0.7212364673614502, "learning_rate": 1.4435050909108004e-05, "loss": 1.7836, "step": 81231 }, { "epoch": 2.7, "grad_norm": 0.6953495144844055, "learning_rate": 1.4431847914678018e-05, "loss": 1.6416, "step": 81232 }, { "epoch": 2.7, "grad_norm": 0.7157819867134094, "learning_rate": 1.4428645266885652e-05, "loss": 1.6642, "step": 81233 }, { "epoch": 2.7, "grad_norm": 0.688014805316925, "learning_rate": 1.4425442965734868e-05, "loss": 1.6931, "step": 81234 }, { "epoch": 2.7, "grad_norm": 0.7082602977752686, "learning_rate": 1.442224101122953e-05, "loss": 1.6871, "step": 81235 }, { "epoch": 2.7, "grad_norm": 0.6981163620948792, "learning_rate": 1.4419039403373466e-05, "loss": 1.6708, "step": 81236 }, { "epoch": 2.7, "grad_norm": 0.6990862488746643, "learning_rate": 1.441583814217061e-05, "loss": 1.664, "step": 81237 }, { "epoch": 2.7, "grad_norm": 0.7306332588195801, "learning_rate": 1.441263722762489e-05, "loss": 1.7201, "step": 81238 }, { "epoch": 2.7, "grad_norm": 0.7084090113639832, "learning_rate": 1.4409436659740137e-05, "loss": 1.7009, "step": 81239 }, { "epoch": 2.7, "grad_norm": 0.7203380465507507, "learning_rate": 1.4406236438520247e-05, "loss": 1.6416, "step": 81240 }, { "epoch": 2.7, "grad_norm": 0.7179620265960693, "learning_rate": 1.4403036563969116e-05, "loss": 1.7386, "step": 81241 }, { "epoch": 2.7, "grad_norm": 0.7470522522926331, "learning_rate": 1.4399837036090612e-05, "loss": 1.8042, "step": 81242 }, { "epoch": 2.7, "grad_norm": 0.6956948041915894, "learning_rate": 1.4396637854888593e-05, "loss": 1.6352, "step": 81243 }, { "epoch": 2.7, "grad_norm": 0.7098008394241333, "learning_rate": 1.439343902036696e-05, "loss": 1.6786, "step": 81244 }, { "epoch": 2.7, "grad_norm": 0.7046605348587036, "learning_rate": 1.4390240532529673e-05, "loss": 1.6703, "step": 81245 }, { "epoch": 2.7, "grad_norm": 0.7029856443405151, "learning_rate": 1.4387042391380533e-05, "loss": 1.6592, "step": 81246 }, { "epoch": 2.7, "grad_norm": 0.7313653826713562, "learning_rate": 1.43838445969234e-05, "loss": 1.6725, "step": 81247 }, { "epoch": 2.7, "grad_norm": 0.7271316647529602, "learning_rate": 1.4380647149162273e-05, "loss": 1.6195, "step": 81248 }, { "epoch": 2.7, "grad_norm": 0.7402873039245605, "learning_rate": 1.4377450048100913e-05, "loss": 1.7196, "step": 81249 }, { "epoch": 2.7, "grad_norm": 0.7560508847236633, "learning_rate": 1.437425329374322e-05, "loss": 1.6237, "step": 81250 }, { "epoch": 2.7, "grad_norm": 0.7042728066444397, "learning_rate": 1.437105688609309e-05, "loss": 1.67, "step": 81251 }, { "epoch": 2.7, "grad_norm": 0.7446709871292114, "learning_rate": 1.4367860825154453e-05, "loss": 1.6717, "step": 81252 }, { "epoch": 2.7, "grad_norm": 0.7217177748680115, "learning_rate": 1.4364665110931173e-05, "loss": 1.6555, "step": 81253 }, { "epoch": 2.7, "grad_norm": 0.7091385722160339, "learning_rate": 1.4361469743427045e-05, "loss": 1.7263, "step": 81254 }, { "epoch": 2.7, "grad_norm": 0.7106693983078003, "learning_rate": 1.4358274722646035e-05, "loss": 1.7025, "step": 81255 }, { "epoch": 2.7, "grad_norm": 0.7101706266403198, "learning_rate": 1.4355080048592004e-05, "loss": 1.7506, "step": 81256 }, { "epoch": 2.7, "grad_norm": 0.7061091065406799, "learning_rate": 1.4351885721268785e-05, "loss": 1.744, "step": 81257 }, { "epoch": 2.7, "grad_norm": 0.7075826525688171, "learning_rate": 1.4348691740680274e-05, "loss": 1.6505, "step": 81258 }, { "epoch": 2.7, "grad_norm": 0.7083011269569397, "learning_rate": 1.43454981068304e-05, "loss": 1.694, "step": 81259 }, { "epoch": 2.7, "grad_norm": 0.7110466361045837, "learning_rate": 1.4342304819723027e-05, "loss": 1.7042, "step": 81260 }, { "epoch": 2.7, "grad_norm": 0.7200632691383362, "learning_rate": 1.4339111879361953e-05, "loss": 1.6807, "step": 81261 }, { "epoch": 2.7, "grad_norm": 0.7146035432815552, "learning_rate": 1.433591928575114e-05, "loss": 1.6452, "step": 81262 }, { "epoch": 2.7, "grad_norm": 0.7139140963554382, "learning_rate": 1.4332727038894421e-05, "loss": 1.6435, "step": 81263 }, { "epoch": 2.7, "grad_norm": 0.7076430916786194, "learning_rate": 1.4329535138795655e-05, "loss": 1.6546, "step": 81264 }, { "epoch": 2.7, "grad_norm": 0.6883653998374939, "learning_rate": 1.4326343585458776e-05, "loss": 1.6729, "step": 81265 }, { "epoch": 2.7, "grad_norm": 0.6926924586296082, "learning_rate": 1.432315237888758e-05, "loss": 1.6456, "step": 81266 }, { "epoch": 2.7, "grad_norm": 0.7065715789794922, "learning_rate": 1.4319961519086032e-05, "loss": 1.6937, "step": 81267 }, { "epoch": 2.7, "grad_norm": 0.701639711856842, "learning_rate": 1.4316771006057926e-05, "loss": 1.723, "step": 81268 }, { "epoch": 2.7, "grad_norm": 0.691146969795227, "learning_rate": 1.4313580839807192e-05, "loss": 1.6895, "step": 81269 }, { "epoch": 2.7, "grad_norm": 0.777111291885376, "learning_rate": 1.4310391020337696e-05, "loss": 1.6825, "step": 81270 }, { "epoch": 2.7, "grad_norm": 0.7252350449562073, "learning_rate": 1.4307201547653235e-05, "loss": 1.66, "step": 81271 }, { "epoch": 2.7, "grad_norm": 0.7195309400558472, "learning_rate": 1.430401242175777e-05, "loss": 1.7316, "step": 81272 }, { "epoch": 2.7, "grad_norm": 0.7163071036338806, "learning_rate": 1.4300823642655102e-05, "loss": 1.7568, "step": 81273 }, { "epoch": 2.7, "grad_norm": 0.735492467880249, "learning_rate": 1.429763521034919e-05, "loss": 1.7099, "step": 81274 }, { "epoch": 2.7, "grad_norm": 0.6951663494110107, "learning_rate": 1.4294447124843866e-05, "loss": 1.6222, "step": 81275 }, { "epoch": 2.7, "grad_norm": 0.7069092988967896, "learning_rate": 1.4291259386142928e-05, "loss": 1.701, "step": 81276 }, { "epoch": 2.7, "grad_norm": 0.6953208446502686, "learning_rate": 1.4288071994250338e-05, "loss": 1.7084, "step": 81277 }, { "epoch": 2.7, "grad_norm": 0.7062987685203552, "learning_rate": 1.4284884949169895e-05, "loss": 1.7152, "step": 81278 }, { "epoch": 2.7, "grad_norm": 0.7252001166343689, "learning_rate": 1.4281698250905527e-05, "loss": 1.7115, "step": 81279 }, { "epoch": 2.7, "grad_norm": 0.7178722620010376, "learning_rate": 1.4278511899461065e-05, "loss": 1.7079, "step": 81280 }, { "epoch": 2.7, "grad_norm": 0.7288236618041992, "learning_rate": 1.427532589484044e-05, "loss": 1.686, "step": 81281 }, { "epoch": 2.7, "grad_norm": 0.7210167050361633, "learning_rate": 1.4272140237047447e-05, "loss": 1.7299, "step": 81282 }, { "epoch": 2.7, "grad_norm": 0.7117933630943298, "learning_rate": 1.4268954926085918e-05, "loss": 1.6563, "step": 81283 }, { "epoch": 2.7, "grad_norm": 0.7168442010879517, "learning_rate": 1.426576996195985e-05, "loss": 1.7321, "step": 81284 }, { "epoch": 2.7, "grad_norm": 0.7079068422317505, "learning_rate": 1.426258534467304e-05, "loss": 1.6174, "step": 81285 }, { "epoch": 2.7, "grad_norm": 0.6935607194900513, "learning_rate": 1.4259401074229282e-05, "loss": 1.6191, "step": 81286 }, { "epoch": 2.7, "grad_norm": 0.72765052318573, "learning_rate": 1.4256217150632544e-05, "loss": 1.7004, "step": 81287 }, { "epoch": 2.7, "grad_norm": 0.7039661407470703, "learning_rate": 1.4253033573886652e-05, "loss": 1.7045, "step": 81288 }, { "epoch": 2.7, "grad_norm": 0.7581818699836731, "learning_rate": 1.4249850343995506e-05, "loss": 1.7065, "step": 81289 }, { "epoch": 2.7, "grad_norm": 0.7172257304191589, "learning_rate": 1.4246667460962902e-05, "loss": 1.6748, "step": 81290 }, { "epoch": 2.7, "grad_norm": 0.7321536540985107, "learning_rate": 1.424348492479277e-05, "loss": 1.6344, "step": 81291 }, { "epoch": 2.7, "grad_norm": 0.7455624341964722, "learning_rate": 1.424030273548894e-05, "loss": 1.7058, "step": 81292 }, { "epoch": 2.7, "grad_norm": 0.7244206666946411, "learning_rate": 1.423712089305521e-05, "loss": 1.645, "step": 81293 }, { "epoch": 2.7, "grad_norm": 0.7004486322402954, "learning_rate": 1.4233939397495541e-05, "loss": 1.6578, "step": 81294 }, { "epoch": 2.7, "grad_norm": 0.6988154053688049, "learning_rate": 1.4230758248813868e-05, "loss": 1.6121, "step": 81295 }, { "epoch": 2.7, "grad_norm": 0.7119749784469604, "learning_rate": 1.4227577447013816e-05, "loss": 1.6791, "step": 81296 }, { "epoch": 2.7, "grad_norm": 0.6872907876968384, "learning_rate": 1.4224396992099418e-05, "loss": 1.7026, "step": 81297 }, { "epoch": 2.7, "grad_norm": 0.7153482437133789, "learning_rate": 1.4221216884074537e-05, "loss": 1.5982, "step": 81298 }, { "epoch": 2.7, "grad_norm": 0.7172783613204956, "learning_rate": 1.4218037122942971e-05, "loss": 1.6243, "step": 81299 }, { "epoch": 2.7, "grad_norm": 0.7177175283432007, "learning_rate": 1.421485770870855e-05, "loss": 1.6699, "step": 81300 }, { "epoch": 2.7, "grad_norm": 0.7520722150802612, "learning_rate": 1.4211678641375202e-05, "loss": 1.724, "step": 81301 }, { "epoch": 2.7, "grad_norm": 0.6908864974975586, "learning_rate": 1.4208499920946858e-05, "loss": 1.6623, "step": 81302 }, { "epoch": 2.7, "grad_norm": 0.7443914413452148, "learning_rate": 1.4205321547427184e-05, "loss": 1.6715, "step": 81303 }, { "epoch": 2.7, "grad_norm": 0.6970113515853882, "learning_rate": 1.4202143520820142e-05, "loss": 1.7182, "step": 81304 }, { "epoch": 2.71, "grad_norm": 0.6943222880363464, "learning_rate": 1.4198965841129627e-05, "loss": 1.7124, "step": 81305 }, { "epoch": 2.71, "grad_norm": 0.6977152228355408, "learning_rate": 1.4195788508359474e-05, "loss": 1.7175, "step": 81306 }, { "epoch": 2.71, "grad_norm": 0.7246264219284058, "learning_rate": 1.4192611522513475e-05, "loss": 1.5905, "step": 81307 }, { "epoch": 2.71, "grad_norm": 0.6999402046203613, "learning_rate": 1.4189434883595497e-05, "loss": 1.6703, "step": 81308 }, { "epoch": 2.71, "grad_norm": 0.7080057263374329, "learning_rate": 1.4186258591609568e-05, "loss": 1.6497, "step": 81309 }, { "epoch": 2.71, "grad_norm": 0.7205566167831421, "learning_rate": 1.4183082646559285e-05, "loss": 1.6198, "step": 81310 }, { "epoch": 2.71, "grad_norm": 0.7106534242630005, "learning_rate": 1.4179907048448647e-05, "loss": 1.7144, "step": 81311 }, { "epoch": 2.71, "grad_norm": 0.7135636210441589, "learning_rate": 1.4176731797281548e-05, "loss": 1.7279, "step": 81312 }, { "epoch": 2.71, "grad_norm": 0.7172666192054749, "learning_rate": 1.4173556893061755e-05, "loss": 1.676, "step": 81313 }, { "epoch": 2.71, "grad_norm": 0.7162186503410339, "learning_rate": 1.4170382335793128e-05, "loss": 1.6495, "step": 81314 }, { "epoch": 2.71, "grad_norm": 0.7209678888320923, "learning_rate": 1.4167208125479534e-05, "loss": 1.6956, "step": 81315 }, { "epoch": 2.71, "grad_norm": 0.699209451675415, "learning_rate": 1.4164034262124901e-05, "loss": 1.6201, "step": 81316 }, { "epoch": 2.71, "grad_norm": 0.7045974731445312, "learning_rate": 1.4160860745732927e-05, "loss": 1.7239, "step": 81317 }, { "epoch": 2.71, "grad_norm": 0.7246396541595459, "learning_rate": 1.4157687576307574e-05, "loss": 1.6189, "step": 81318 }, { "epoch": 2.71, "grad_norm": 0.7213773131370544, "learning_rate": 1.4154514753852707e-05, "loss": 1.6326, "step": 81319 }, { "epoch": 2.71, "grad_norm": 1.5581395626068115, "learning_rate": 1.4151342278372158e-05, "loss": 1.7436, "step": 81320 }, { "epoch": 2.71, "grad_norm": 0.7187582850456238, "learning_rate": 1.4148170149869687e-05, "loss": 1.6533, "step": 81321 }, { "epoch": 2.71, "grad_norm": 0.7145600318908691, "learning_rate": 1.4144998368349291e-05, "loss": 1.6215, "step": 81322 }, { "epoch": 2.71, "grad_norm": 0.7138576507568359, "learning_rate": 1.4141826933814737e-05, "loss": 1.7175, "step": 81323 }, { "epoch": 2.71, "grad_norm": 0.7113712430000305, "learning_rate": 1.4138655846269819e-05, "loss": 1.6447, "step": 81324 }, { "epoch": 2.71, "grad_norm": 0.7113248109817505, "learning_rate": 1.4135485105718503e-05, "loss": 1.6203, "step": 81325 }, { "epoch": 2.71, "grad_norm": 0.7134818434715271, "learning_rate": 1.4132314712164584e-05, "loss": 1.7819, "step": 81326 }, { "epoch": 2.71, "grad_norm": 0.706992506980896, "learning_rate": 1.4129144665611924e-05, "loss": 1.6413, "step": 81327 }, { "epoch": 2.71, "grad_norm": 0.7137234210968018, "learning_rate": 1.4125974966064357e-05, "loss": 1.6974, "step": 81328 }, { "epoch": 2.71, "grad_norm": 0.7033835649490356, "learning_rate": 1.4122805613525745e-05, "loss": 1.6778, "step": 81329 }, { "epoch": 2.71, "grad_norm": 0.7304792404174805, "learning_rate": 1.4119636607999917e-05, "loss": 1.6662, "step": 81330 }, { "epoch": 2.71, "grad_norm": 0.7124801874160767, "learning_rate": 1.4116467949490706e-05, "loss": 1.7286, "step": 81331 }, { "epoch": 2.71, "grad_norm": 0.7066063284873962, "learning_rate": 1.4113299638002007e-05, "loss": 1.6705, "step": 81332 }, { "epoch": 2.71, "grad_norm": 0.7294716238975525, "learning_rate": 1.4110131673537583e-05, "loss": 1.6035, "step": 81333 }, { "epoch": 2.71, "grad_norm": 0.7034808397293091, "learning_rate": 1.41069640561014e-05, "loss": 1.6753, "step": 81334 }, { "epoch": 2.71, "grad_norm": 0.7049221992492676, "learning_rate": 1.4103796785697219e-05, "loss": 1.6505, "step": 81335 }, { "epoch": 2.71, "grad_norm": 0.7193987965583801, "learning_rate": 1.4100629862328905e-05, "loss": 1.719, "step": 81336 }, { "epoch": 2.71, "grad_norm": 0.7041290998458862, "learning_rate": 1.4097463286000321e-05, "loss": 1.6705, "step": 81337 }, { "epoch": 2.71, "grad_norm": 0.7442650198936462, "learning_rate": 1.409429705671523e-05, "loss": 1.7356, "step": 81338 }, { "epoch": 2.71, "grad_norm": 0.7319915294647217, "learning_rate": 1.4091131174477599e-05, "loss": 1.708, "step": 81339 }, { "epoch": 2.71, "grad_norm": 0.7393274307250977, "learning_rate": 1.4087965639291155e-05, "loss": 1.6761, "step": 81340 }, { "epoch": 2.71, "grad_norm": 0.7518651485443115, "learning_rate": 1.4084800451159829e-05, "loss": 1.7309, "step": 81341 }, { "epoch": 2.71, "grad_norm": 0.7277674078941345, "learning_rate": 1.4081635610087449e-05, "loss": 1.687, "step": 81342 }, { "epoch": 2.71, "grad_norm": 0.6933701038360596, "learning_rate": 1.4078471116077783e-05, "loss": 1.7176, "step": 81343 }, { "epoch": 2.71, "grad_norm": 0.731283962726593, "learning_rate": 1.407530696913476e-05, "loss": 1.673, "step": 81344 }, { "epoch": 2.71, "grad_norm": 0.7012903094291687, "learning_rate": 1.4072143169262173e-05, "loss": 1.7082, "step": 81345 }, { "epoch": 2.71, "grad_norm": 0.7085389494895935, "learning_rate": 1.406897971646389e-05, "loss": 1.6725, "step": 81346 }, { "epoch": 2.71, "grad_norm": 0.7483664155006409, "learning_rate": 1.4065816610743675e-05, "loss": 1.698, "step": 81347 }, { "epoch": 2.71, "grad_norm": 0.7101438641548157, "learning_rate": 1.4062653852105487e-05, "loss": 1.6912, "step": 81348 }, { "epoch": 2.71, "grad_norm": 0.9190566539764404, "learning_rate": 1.4059491440553127e-05, "loss": 1.6309, "step": 81349 }, { "epoch": 2.71, "grad_norm": 0.6930352449417114, "learning_rate": 1.405632937609036e-05, "loss": 1.719, "step": 81350 }, { "epoch": 2.71, "grad_norm": 0.7084152698516846, "learning_rate": 1.405316765872111e-05, "loss": 1.6882, "step": 81351 }, { "epoch": 2.71, "grad_norm": 0.7321620583534241, "learning_rate": 1.4050006288449178e-05, "loss": 1.6461, "step": 81352 }, { "epoch": 2.71, "grad_norm": 0.707169771194458, "learning_rate": 1.4046845265278361e-05, "loss": 1.7427, "step": 81353 }, { "epoch": 2.71, "grad_norm": 0.7217190265655518, "learning_rate": 1.4043684589212556e-05, "loss": 1.7285, "step": 81354 }, { "epoch": 2.71, "grad_norm": 0.7129549980163574, "learning_rate": 1.4040524260255626e-05, "loss": 1.6826, "step": 81355 }, { "epoch": 2.71, "grad_norm": 0.7110413312911987, "learning_rate": 1.4037364278411366e-05, "loss": 1.7126, "step": 81356 }, { "epoch": 2.71, "grad_norm": 0.717166543006897, "learning_rate": 1.4034204643683577e-05, "loss": 1.6422, "step": 81357 }, { "epoch": 2.71, "grad_norm": 0.7191071510314941, "learning_rate": 1.4031045356076154e-05, "loss": 1.6518, "step": 81358 }, { "epoch": 2.71, "grad_norm": 0.7099310755729675, "learning_rate": 1.4027886415592892e-05, "loss": 1.698, "step": 81359 }, { "epoch": 2.71, "grad_norm": 0.7129007577896118, "learning_rate": 1.4024727822237625e-05, "loss": 1.6362, "step": 81360 }, { "epoch": 2.71, "grad_norm": 0.7108255624771118, "learning_rate": 1.4021569576014214e-05, "loss": 1.6065, "step": 81361 }, { "epoch": 2.71, "grad_norm": 0.72126305103302, "learning_rate": 1.4018411676926488e-05, "loss": 1.7235, "step": 81362 }, { "epoch": 2.71, "grad_norm": 0.7010167241096497, "learning_rate": 1.4015254124978282e-05, "loss": 1.7286, "step": 81363 }, { "epoch": 2.71, "grad_norm": 0.7238107323646545, "learning_rate": 1.4012096920173388e-05, "loss": 1.7337, "step": 81364 }, { "epoch": 2.71, "grad_norm": 0.7226732969284058, "learning_rate": 1.4008940062515706e-05, "loss": 1.6964, "step": 81365 }, { "epoch": 2.71, "grad_norm": 0.7008798122406006, "learning_rate": 1.4005783552009031e-05, "loss": 1.703, "step": 81366 }, { "epoch": 2.71, "grad_norm": 0.7164681553840637, "learning_rate": 1.4002627388657162e-05, "loss": 1.6467, "step": 81367 }, { "epoch": 2.71, "grad_norm": 0.7234790921211243, "learning_rate": 1.399947157246396e-05, "loss": 1.7052, "step": 81368 }, { "epoch": 2.71, "grad_norm": 0.7107202410697937, "learning_rate": 1.3996316103433291e-05, "loss": 1.6339, "step": 81369 }, { "epoch": 2.71, "grad_norm": 0.7106973528862, "learning_rate": 1.3993160981568951e-05, "loss": 1.6875, "step": 81370 }, { "epoch": 2.71, "grad_norm": 0.7169269323348999, "learning_rate": 1.3990006206874737e-05, "loss": 1.6843, "step": 81371 }, { "epoch": 2.71, "grad_norm": 0.6912075281143188, "learning_rate": 1.398685177935458e-05, "loss": 1.6486, "step": 81372 }, { "epoch": 2.71, "grad_norm": 0.7273386716842651, "learning_rate": 1.3983697699012208e-05, "loss": 1.6906, "step": 81373 }, { "epoch": 2.71, "grad_norm": 0.7264731526374817, "learning_rate": 1.3980543965851453e-05, "loss": 1.6344, "step": 81374 }, { "epoch": 2.71, "grad_norm": 0.7121877670288086, "learning_rate": 1.397739057987618e-05, "loss": 1.6956, "step": 81375 }, { "epoch": 2.71, "grad_norm": 0.7044382095336914, "learning_rate": 1.397423754109025e-05, "loss": 1.6762, "step": 81376 }, { "epoch": 2.71, "grad_norm": 0.7079041600227356, "learning_rate": 1.3971084849497461e-05, "loss": 1.6739, "step": 81377 }, { "epoch": 2.71, "grad_norm": 0.7289713025093079, "learning_rate": 1.3967932505101576e-05, "loss": 1.6368, "step": 81378 }, { "epoch": 2.71, "grad_norm": 0.7214577198028564, "learning_rate": 1.3964780507906526e-05, "loss": 1.7246, "step": 81379 }, { "epoch": 2.71, "grad_norm": 0.7074449062347412, "learning_rate": 1.3961628857916074e-05, "loss": 1.8014, "step": 81380 }, { "epoch": 2.71, "grad_norm": 0.7009318470954895, "learning_rate": 1.3958477555134017e-05, "loss": 1.669, "step": 81381 }, { "epoch": 2.71, "grad_norm": 0.7187177538871765, "learning_rate": 1.3955326599564287e-05, "loss": 1.704, "step": 81382 }, { "epoch": 2.71, "grad_norm": 0.7041482925415039, "learning_rate": 1.3952175991210579e-05, "loss": 1.6983, "step": 81383 }, { "epoch": 2.71, "grad_norm": 0.7363033294677734, "learning_rate": 1.3949025730076857e-05, "loss": 1.6832, "step": 81384 }, { "epoch": 2.71, "grad_norm": 0.7156322598457336, "learning_rate": 1.3945875816166785e-05, "loss": 1.6014, "step": 81385 }, { "epoch": 2.71, "grad_norm": 0.719771146774292, "learning_rate": 1.3942726249484326e-05, "loss": 1.6975, "step": 81386 }, { "epoch": 2.71, "grad_norm": 0.7016781568527222, "learning_rate": 1.3939577030033278e-05, "loss": 1.6598, "step": 81387 }, { "epoch": 2.71, "grad_norm": 0.7007907032966614, "learning_rate": 1.393642815781737e-05, "loss": 1.7101, "step": 81388 }, { "epoch": 2.71, "grad_norm": 0.7286994457244873, "learning_rate": 1.3933279632840533e-05, "loss": 1.7661, "step": 81389 }, { "epoch": 2.71, "grad_norm": 0.7147454023361206, "learning_rate": 1.3930131455106497e-05, "loss": 1.6912, "step": 81390 }, { "epoch": 2.71, "grad_norm": 0.7114131450653076, "learning_rate": 1.3926983624619159e-05, "loss": 1.7015, "step": 81391 }, { "epoch": 2.71, "grad_norm": 0.7169634699821472, "learning_rate": 1.3923836141382349e-05, "loss": 1.6993, "step": 81392 }, { "epoch": 2.71, "grad_norm": 0.7141634225845337, "learning_rate": 1.3920689005399766e-05, "loss": 1.6764, "step": 81393 }, { "epoch": 2.71, "grad_norm": 0.7229799032211304, "learning_rate": 1.3917542216675404e-05, "loss": 1.6212, "step": 81394 }, { "epoch": 2.71, "grad_norm": 0.7080018520355225, "learning_rate": 1.3914395775212927e-05, "loss": 1.6732, "step": 81395 }, { "epoch": 2.71, "grad_norm": 0.7546483278274536, "learning_rate": 1.3911249681016267e-05, "loss": 1.7213, "step": 81396 }, { "epoch": 2.71, "grad_norm": 0.7262433171272278, "learning_rate": 1.3908103934089121e-05, "loss": 1.6598, "step": 81397 }, { "epoch": 2.71, "grad_norm": 0.6946000456809998, "learning_rate": 1.3904958534435484e-05, "loss": 1.6987, "step": 81398 }, { "epoch": 2.71, "grad_norm": 0.7061014771461487, "learning_rate": 1.390181348205902e-05, "loss": 1.7155, "step": 81399 }, { "epoch": 2.71, "grad_norm": 0.7268155217170715, "learning_rate": 1.3898668776963596e-05, "loss": 1.6519, "step": 81400 }, { "epoch": 2.71, "grad_norm": 0.7056377530097961, "learning_rate": 1.389552441915307e-05, "loss": 1.7055, "step": 81401 }, { "epoch": 2.71, "grad_norm": 0.722548246383667, "learning_rate": 1.389238040863121e-05, "loss": 1.639, "step": 81402 }, { "epoch": 2.71, "grad_norm": 0.7028951048851013, "learning_rate": 1.388923674540181e-05, "loss": 1.7165, "step": 81403 }, { "epoch": 2.71, "grad_norm": 0.7030156850814819, "learning_rate": 1.3886093429468703e-05, "loss": 1.5869, "step": 81404 }, { "epoch": 2.71, "grad_norm": 0.7014277577400208, "learning_rate": 1.3882950460835784e-05, "loss": 1.6129, "step": 81405 }, { "epoch": 2.71, "grad_norm": 0.7011860013008118, "learning_rate": 1.3879807839506785e-05, "loss": 1.6789, "step": 81406 }, { "epoch": 2.71, "grad_norm": 0.689091682434082, "learning_rate": 1.38766655654855e-05, "loss": 1.6174, "step": 81407 }, { "epoch": 2.71, "grad_norm": 0.7304717898368835, "learning_rate": 1.3873523638775863e-05, "loss": 1.7164, "step": 81408 }, { "epoch": 2.71, "grad_norm": 0.7034123539924622, "learning_rate": 1.3870382059381568e-05, "loss": 1.6664, "step": 81409 }, { "epoch": 2.71, "grad_norm": 0.7134653925895691, "learning_rate": 1.3867240827306447e-05, "loss": 1.6981, "step": 81410 }, { "epoch": 2.71, "grad_norm": 0.7117949724197388, "learning_rate": 1.386409994255433e-05, "loss": 1.6715, "step": 81411 }, { "epoch": 2.71, "grad_norm": 0.715615451335907, "learning_rate": 1.3860959405129146e-05, "loss": 1.6628, "step": 81412 }, { "epoch": 2.71, "grad_norm": 0.7074639201164246, "learning_rate": 1.3857819215034494e-05, "loss": 1.6666, "step": 81413 }, { "epoch": 2.71, "grad_norm": 0.7132270932197571, "learning_rate": 1.3854679372274269e-05, "loss": 1.7287, "step": 81414 }, { "epoch": 2.71, "grad_norm": 0.7066020369529724, "learning_rate": 1.3851539876852369e-05, "loss": 1.6553, "step": 81415 }, { "epoch": 2.71, "grad_norm": 0.7356497645378113, "learning_rate": 1.3848400728772524e-05, "loss": 1.6478, "step": 81416 }, { "epoch": 2.71, "grad_norm": 0.7125073671340942, "learning_rate": 1.3845261928038531e-05, "loss": 1.6888, "step": 81417 }, { "epoch": 2.71, "grad_norm": 0.7181668877601624, "learning_rate": 1.3842123474654221e-05, "loss": 1.739, "step": 81418 }, { "epoch": 2.71, "grad_norm": 0.7054468393325806, "learning_rate": 1.3838985368623456e-05, "loss": 1.7013, "step": 81419 }, { "epoch": 2.71, "grad_norm": 0.6972167491912842, "learning_rate": 1.3835847609949968e-05, "loss": 1.68, "step": 81420 }, { "epoch": 2.71, "grad_norm": 0.7188814878463745, "learning_rate": 1.3832710198637586e-05, "loss": 1.6988, "step": 81421 }, { "epoch": 2.71, "grad_norm": 0.694409191608429, "learning_rate": 1.3829573134690141e-05, "loss": 1.7011, "step": 81422 }, { "epoch": 2.71, "grad_norm": 0.7065384387969971, "learning_rate": 1.3826436418111464e-05, "loss": 1.6732, "step": 81423 }, { "epoch": 2.71, "grad_norm": 0.7176178693771362, "learning_rate": 1.382330004890525e-05, "loss": 1.6873, "step": 81424 }, { "epoch": 2.71, "grad_norm": 0.6988273859024048, "learning_rate": 1.382016402707543e-05, "loss": 1.7427, "step": 81425 }, { "epoch": 2.71, "grad_norm": 0.7152436971664429, "learning_rate": 1.3817028352625803e-05, "loss": 1.6936, "step": 81426 }, { "epoch": 2.71, "grad_norm": 0.7100958228111267, "learning_rate": 1.3813893025560096e-05, "loss": 1.5848, "step": 81427 }, { "epoch": 2.71, "grad_norm": 0.715789794921875, "learning_rate": 1.381075804588211e-05, "loss": 1.6634, "step": 81428 }, { "epoch": 2.71, "grad_norm": 0.7060933709144592, "learning_rate": 1.3807623413595769e-05, "loss": 1.7255, "step": 81429 }, { "epoch": 2.71, "grad_norm": 0.7022625803947449, "learning_rate": 1.380448912870481e-05, "loss": 1.6049, "step": 81430 }, { "epoch": 2.71, "grad_norm": 0.7372706532478333, "learning_rate": 1.380135519121296e-05, "loss": 1.6671, "step": 81431 }, { "epoch": 2.71, "grad_norm": 0.722072958946228, "learning_rate": 1.3798221601124148e-05, "loss": 1.6367, "step": 81432 }, { "epoch": 2.71, "grad_norm": 0.7058942317962646, "learning_rate": 1.379508835844214e-05, "loss": 1.6327, "step": 81433 }, { "epoch": 2.71, "grad_norm": 0.725552499294281, "learning_rate": 1.3791955463170668e-05, "loss": 1.6822, "step": 81434 }, { "epoch": 2.71, "grad_norm": 0.702536940574646, "learning_rate": 1.378882291531359e-05, "loss": 1.7181, "step": 81435 }, { "epoch": 2.71, "grad_norm": 0.6825816631317139, "learning_rate": 1.3785690714874776e-05, "loss": 1.7064, "step": 81436 }, { "epoch": 2.71, "grad_norm": 0.7186506390571594, "learning_rate": 1.3782558861857917e-05, "loss": 1.6186, "step": 81437 }, { "epoch": 2.71, "grad_norm": 0.733138918876648, "learning_rate": 1.3779427356266848e-05, "loss": 1.6842, "step": 81438 }, { "epoch": 2.71, "grad_norm": 0.7253341674804688, "learning_rate": 1.3776296198105429e-05, "loss": 1.6668, "step": 81439 }, { "epoch": 2.71, "grad_norm": 0.7668033242225647, "learning_rate": 1.3773165387377394e-05, "loss": 1.7187, "step": 81440 }, { "epoch": 2.71, "grad_norm": 0.7046334743499756, "learning_rate": 1.3770034924086537e-05, "loss": 1.6635, "step": 81441 }, { "epoch": 2.71, "grad_norm": 0.7209538221359253, "learning_rate": 1.376690480823669e-05, "loss": 1.7898, "step": 81442 }, { "epoch": 2.71, "grad_norm": 0.690382719039917, "learning_rate": 1.3763775039831682e-05, "loss": 1.6667, "step": 81443 }, { "epoch": 2.71, "grad_norm": 0.7043266892433167, "learning_rate": 1.3760645618875243e-05, "loss": 1.6704, "step": 81444 }, { "epoch": 2.71, "grad_norm": 0.7161619067192078, "learning_rate": 1.3757516545371205e-05, "loss": 1.6898, "step": 81445 }, { "epoch": 2.71, "grad_norm": 0.7143089175224304, "learning_rate": 1.3754387819323399e-05, "loss": 1.6578, "step": 81446 }, { "epoch": 2.71, "grad_norm": 0.7046633362770081, "learning_rate": 1.3751259440735584e-05, "loss": 1.6826, "step": 81447 }, { "epoch": 2.71, "grad_norm": 0.7306761145591736, "learning_rate": 1.3748131409611496e-05, "loss": 1.6894, "step": 81448 }, { "epoch": 2.71, "grad_norm": 0.7082390189170837, "learning_rate": 1.3745003725955094e-05, "loss": 1.6685, "step": 81449 }, { "epoch": 2.71, "grad_norm": 0.6960083842277527, "learning_rate": 1.3741876389769979e-05, "loss": 1.7077, "step": 81450 }, { "epoch": 2.71, "grad_norm": 0.7046681046485901, "learning_rate": 1.3738749401060145e-05, "loss": 1.6094, "step": 81451 }, { "epoch": 2.71, "grad_norm": 0.703035295009613, "learning_rate": 1.3735622759829223e-05, "loss": 1.6953, "step": 81452 }, { "epoch": 2.71, "grad_norm": 0.6986488699913025, "learning_rate": 1.3732496466081111e-05, "loss": 1.6675, "step": 81453 }, { "epoch": 2.71, "grad_norm": 0.7238690853118896, "learning_rate": 1.3729370519819572e-05, "loss": 1.5568, "step": 81454 }, { "epoch": 2.71, "grad_norm": 0.7071687579154968, "learning_rate": 1.3726244921048368e-05, "loss": 1.6789, "step": 81455 }, { "epoch": 2.71, "grad_norm": 0.70027756690979, "learning_rate": 1.3723119669771365e-05, "loss": 1.6288, "step": 81456 }, { "epoch": 2.71, "grad_norm": 0.7153216004371643, "learning_rate": 1.371999476599226e-05, "loss": 1.6991, "step": 81457 }, { "epoch": 2.71, "grad_norm": 0.7293659448623657, "learning_rate": 1.3716870209714948e-05, "loss": 1.6582, "step": 81458 }, { "epoch": 2.71, "grad_norm": 0.712420642375946, "learning_rate": 1.371374600094316e-05, "loss": 1.6584, "step": 81459 }, { "epoch": 2.71, "grad_norm": 0.7208114862442017, "learning_rate": 1.371062213968066e-05, "loss": 1.612, "step": 81460 }, { "epoch": 2.71, "grad_norm": 0.7145788073539734, "learning_rate": 1.3707498625931346e-05, "loss": 1.7262, "step": 81461 }, { "epoch": 2.71, "grad_norm": 0.7222006916999817, "learning_rate": 1.3704375459698879e-05, "loss": 1.6485, "step": 81462 }, { "epoch": 2.71, "grad_norm": 0.6809290647506714, "learning_rate": 1.3701252640987193e-05, "loss": 1.6663, "step": 81463 }, { "epoch": 2.71, "grad_norm": 0.6956174373626709, "learning_rate": 1.3698130169799915e-05, "loss": 1.6458, "step": 81464 }, { "epoch": 2.71, "grad_norm": 0.7287499904632568, "learning_rate": 1.3695008046141009e-05, "loss": 1.7201, "step": 81465 }, { "epoch": 2.71, "grad_norm": 0.7090904712677002, "learning_rate": 1.3691886270014141e-05, "loss": 1.7458, "step": 81466 }, { "epoch": 2.71, "grad_norm": 0.7108496427536011, "learning_rate": 1.3688764841423106e-05, "loss": 1.7592, "step": 81467 }, { "epoch": 2.71, "grad_norm": 0.6983774304389954, "learning_rate": 1.3685643760371767e-05, "loss": 1.6946, "step": 81468 }, { "epoch": 2.71, "grad_norm": 0.7529482841491699, "learning_rate": 1.368252302686389e-05, "loss": 1.7168, "step": 81469 }, { "epoch": 2.71, "grad_norm": 0.7191833257675171, "learning_rate": 1.3679402640903202e-05, "loss": 1.6846, "step": 81470 }, { "epoch": 2.71, "grad_norm": 0.6996327042579651, "learning_rate": 1.3676282602493504e-05, "loss": 1.7104, "step": 81471 }, { "epoch": 2.71, "grad_norm": 0.7149360775947571, "learning_rate": 1.3673162911638658e-05, "loss": 1.7159, "step": 81472 }, { "epoch": 2.71, "grad_norm": 0.6965517997741699, "learning_rate": 1.3670043568342427e-05, "loss": 1.6715, "step": 81473 }, { "epoch": 2.71, "grad_norm": 0.7032790780067444, "learning_rate": 1.3666924572608506e-05, "loss": 1.656, "step": 81474 }, { "epoch": 2.71, "grad_norm": 0.6963353157043457, "learning_rate": 1.3663805924440796e-05, "loss": 1.7091, "step": 81475 }, { "epoch": 2.71, "grad_norm": 0.6881983876228333, "learning_rate": 1.3660687623843058e-05, "loss": 1.7089, "step": 81476 }, { "epoch": 2.71, "grad_norm": 0.728172779083252, "learning_rate": 1.3657569670818991e-05, "loss": 1.7053, "step": 81477 }, { "epoch": 2.71, "grad_norm": 0.702551543712616, "learning_rate": 1.3654452065372456e-05, "loss": 1.7457, "step": 81478 }, { "epoch": 2.71, "grad_norm": 0.701410710811615, "learning_rate": 1.3651334807507253e-05, "loss": 1.704, "step": 81479 }, { "epoch": 2.71, "grad_norm": 0.7246520519256592, "learning_rate": 1.3648217897227143e-05, "loss": 1.6358, "step": 81480 }, { "epoch": 2.71, "grad_norm": 0.7014896273612976, "learning_rate": 1.3645101334535858e-05, "loss": 1.7354, "step": 81481 }, { "epoch": 2.71, "grad_norm": 0.7016515731811523, "learning_rate": 1.3641985119437293e-05, "loss": 1.6983, "step": 81482 }, { "epoch": 2.71, "grad_norm": 0.7226647734642029, "learning_rate": 1.3638869251935147e-05, "loss": 1.7029, "step": 81483 }, { "epoch": 2.71, "grad_norm": 0.6893897652626038, "learning_rate": 1.3635753732033184e-05, "loss": 1.6233, "step": 81484 }, { "epoch": 2.71, "grad_norm": 0.7243501543998718, "learning_rate": 1.3632638559735199e-05, "loss": 1.7153, "step": 81485 }, { "epoch": 2.71, "grad_norm": 0.7087159752845764, "learning_rate": 1.362952373504509e-05, "loss": 1.6306, "step": 81486 }, { "epoch": 2.71, "grad_norm": 0.7185356020927429, "learning_rate": 1.3626409257966486e-05, "loss": 1.6154, "step": 81487 }, { "epoch": 2.71, "grad_norm": 0.7219035625457764, "learning_rate": 1.362329512850322e-05, "loss": 1.7162, "step": 81488 }, { "epoch": 2.71, "grad_norm": 0.6912376880645752, "learning_rate": 1.362018134665912e-05, "loss": 1.7202, "step": 81489 }, { "epoch": 2.71, "grad_norm": 0.6964702606201172, "learning_rate": 1.3617067912437918e-05, "loss": 1.5642, "step": 81490 }, { "epoch": 2.71, "grad_norm": 0.7043834328651428, "learning_rate": 1.3613954825843344e-05, "loss": 1.6653, "step": 81491 }, { "epoch": 2.71, "grad_norm": 0.7123187780380249, "learning_rate": 1.3610842086879226e-05, "loss": 1.6432, "step": 81492 }, { "epoch": 2.71, "grad_norm": 0.7078301906585693, "learning_rate": 1.360772969554943e-05, "loss": 1.7541, "step": 81493 }, { "epoch": 2.71, "grad_norm": 0.7076624631881714, "learning_rate": 1.3604617651857618e-05, "loss": 1.7207, "step": 81494 }, { "epoch": 2.71, "grad_norm": 0.7215982675552368, "learning_rate": 1.3601505955807557e-05, "loss": 1.7367, "step": 81495 }, { "epoch": 2.71, "grad_norm": 0.6921466588973999, "learning_rate": 1.359839460740314e-05, "loss": 1.7561, "step": 81496 }, { "epoch": 2.71, "grad_norm": 0.6937853693962097, "learning_rate": 1.3595283606648067e-05, "loss": 1.7096, "step": 81497 }, { "epoch": 2.71, "grad_norm": 0.7124704718589783, "learning_rate": 1.3592172953546065e-05, "loss": 1.6935, "step": 81498 }, { "epoch": 2.71, "grad_norm": 0.7047179937362671, "learning_rate": 1.3589062648101001e-05, "loss": 1.6629, "step": 81499 }, { "epoch": 2.71, "grad_norm": 0.701389729976654, "learning_rate": 1.358595269031657e-05, "loss": 1.6317, "step": 81500 }, { "epoch": 2.71, "grad_norm": 0.7327793836593628, "learning_rate": 1.3582843080196636e-05, "loss": 1.7249, "step": 81501 }, { "epoch": 2.71, "grad_norm": 0.7233293652534485, "learning_rate": 1.3579733817744898e-05, "loss": 1.6858, "step": 81502 }, { "epoch": 2.71, "grad_norm": 0.7355872392654419, "learning_rate": 1.3576624902965217e-05, "loss": 1.6982, "step": 81503 }, { "epoch": 2.71, "grad_norm": 0.7027084231376648, "learning_rate": 1.357351633586129e-05, "loss": 1.5735, "step": 81504 }, { "epoch": 2.71, "grad_norm": 0.7158767580986023, "learning_rate": 1.3570408116436881e-05, "loss": 1.7188, "step": 81505 }, { "epoch": 2.71, "grad_norm": 0.705779492855072, "learning_rate": 1.3567300244695822e-05, "loss": 1.6399, "step": 81506 }, { "epoch": 2.71, "grad_norm": 0.7163169384002686, "learning_rate": 1.3564192720641809e-05, "loss": 1.735, "step": 81507 }, { "epoch": 2.71, "grad_norm": 0.7386717200279236, "learning_rate": 1.3561085544278739e-05, "loss": 1.7259, "step": 81508 }, { "epoch": 2.71, "grad_norm": 0.6903786063194275, "learning_rate": 1.3557978715610274e-05, "loss": 1.6597, "step": 81509 }, { "epoch": 2.71, "grad_norm": 0.7175500392913818, "learning_rate": 1.355487223464018e-05, "loss": 1.6623, "step": 81510 }, { "epoch": 2.71, "grad_norm": 0.7169948220252991, "learning_rate": 1.3551766101372319e-05, "loss": 1.6855, "step": 81511 }, { "epoch": 2.71, "grad_norm": 0.7334305644035339, "learning_rate": 1.3548660315810356e-05, "loss": 1.7043, "step": 81512 }, { "epoch": 2.71, "grad_norm": 0.692314088344574, "learning_rate": 1.3545554877958153e-05, "loss": 1.6801, "step": 81513 }, { "epoch": 2.71, "grad_norm": 0.7190448045730591, "learning_rate": 1.3542449787819376e-05, "loss": 1.6279, "step": 81514 }, { "epoch": 2.71, "grad_norm": 0.7100920081138611, "learning_rate": 1.3539345045397953e-05, "loss": 1.611, "step": 81515 }, { "epoch": 2.71, "grad_norm": 0.7241880893707275, "learning_rate": 1.3536240650697515e-05, "loss": 1.7219, "step": 81516 }, { "epoch": 2.71, "grad_norm": 0.702666163444519, "learning_rate": 1.3533136603721828e-05, "loss": 1.6557, "step": 81517 }, { "epoch": 2.71, "grad_norm": 0.70770663022995, "learning_rate": 1.3530032904474787e-05, "loss": 1.7397, "step": 81518 }, { "epoch": 2.71, "grad_norm": 0.70735102891922, "learning_rate": 1.3526929552960052e-05, "loss": 1.6213, "step": 81519 }, { "epoch": 2.71, "grad_norm": 0.72065269947052, "learning_rate": 1.352382654918136e-05, "loss": 1.652, "step": 81520 }, { "epoch": 2.71, "grad_norm": 0.731419563293457, "learning_rate": 1.3520723893142538e-05, "loss": 1.6777, "step": 81521 }, { "epoch": 2.71, "grad_norm": 0.6875944137573242, "learning_rate": 1.3517621584847415e-05, "loss": 1.6487, "step": 81522 }, { "epoch": 2.71, "grad_norm": 0.7251709699630737, "learning_rate": 1.3514519624299658e-05, "loss": 1.6636, "step": 81523 }, { "epoch": 2.71, "grad_norm": 0.684184193611145, "learning_rate": 1.3511418011503028e-05, "loss": 1.7267, "step": 81524 }, { "epoch": 2.71, "grad_norm": 0.7203611731529236, "learning_rate": 1.3508316746461355e-05, "loss": 1.6336, "step": 81525 }, { "epoch": 2.71, "grad_norm": 0.7346989512443542, "learning_rate": 1.3505215829178406e-05, "loss": 1.6817, "step": 81526 }, { "epoch": 2.71, "grad_norm": 0.7245933413505554, "learning_rate": 1.3502115259657842e-05, "loss": 1.6907, "step": 81527 }, { "epoch": 2.71, "grad_norm": 0.7121501564979553, "learning_rate": 1.3499015037903527e-05, "loss": 1.7643, "step": 81528 }, { "epoch": 2.71, "grad_norm": 0.7220306992530823, "learning_rate": 1.3495915163919224e-05, "loss": 1.7477, "step": 81529 }, { "epoch": 2.71, "grad_norm": 0.7120392918586731, "learning_rate": 1.349281563770863e-05, "loss": 1.6586, "step": 81530 }, { "epoch": 2.71, "grad_norm": 0.7043436169624329, "learning_rate": 1.3489716459275512e-05, "loss": 1.7565, "step": 81531 }, { "epoch": 2.71, "grad_norm": 0.6892511248588562, "learning_rate": 1.348661762862373e-05, "loss": 1.6792, "step": 81532 }, { "epoch": 2.71, "grad_norm": 0.7316886186599731, "learning_rate": 1.3483519145756983e-05, "loss": 1.7541, "step": 81533 }, { "epoch": 2.71, "grad_norm": 0.7179284691810608, "learning_rate": 1.3480421010678966e-05, "loss": 1.7256, "step": 81534 }, { "epoch": 2.71, "grad_norm": 0.7146696448326111, "learning_rate": 1.347732322339351e-05, "loss": 1.7016, "step": 81535 }, { "epoch": 2.71, "grad_norm": 0.7293580770492554, "learning_rate": 1.3474225783904446e-05, "loss": 1.6666, "step": 81536 }, { "epoch": 2.71, "grad_norm": 0.6996716856956482, "learning_rate": 1.347112869221537e-05, "loss": 1.6101, "step": 81537 }, { "epoch": 2.71, "grad_norm": 0.7045228481292725, "learning_rate": 1.3468031948330116e-05, "loss": 1.5753, "step": 81538 }, { "epoch": 2.71, "grad_norm": 0.7002140879631042, "learning_rate": 1.346493555225251e-05, "loss": 1.6944, "step": 81539 }, { "epoch": 2.71, "grad_norm": 0.7184742093086243, "learning_rate": 1.3461839503986249e-05, "loss": 1.7463, "step": 81540 }, { "epoch": 2.71, "grad_norm": 0.7293041944503784, "learning_rate": 1.3458743803535066e-05, "loss": 1.6118, "step": 81541 }, { "epoch": 2.71, "grad_norm": 0.70650714635849, "learning_rate": 1.3455648450902723e-05, "loss": 1.6436, "step": 81542 }, { "epoch": 2.71, "grad_norm": 0.6964377760887146, "learning_rate": 1.3452553446093083e-05, "loss": 1.65, "step": 81543 }, { "epoch": 2.71, "grad_norm": 0.728127658367157, "learning_rate": 1.3449458789109779e-05, "loss": 1.5981, "step": 81544 }, { "epoch": 2.71, "grad_norm": 0.713347852230072, "learning_rate": 1.3446364479956573e-05, "loss": 1.6527, "step": 81545 }, { "epoch": 2.71, "grad_norm": 0.7108182907104492, "learning_rate": 1.3443270518637328e-05, "loss": 1.6342, "step": 81546 }, { "epoch": 2.71, "grad_norm": 0.7264310121536255, "learning_rate": 1.344017690515571e-05, "loss": 1.7169, "step": 81547 }, { "epoch": 2.71, "grad_norm": 0.7553184628486633, "learning_rate": 1.3437083639515444e-05, "loss": 1.6445, "step": 81548 }, { "epoch": 2.71, "grad_norm": 0.7265034914016724, "learning_rate": 1.34339907217204e-05, "loss": 1.634, "step": 81549 }, { "epoch": 2.71, "grad_norm": 0.68736732006073, "learning_rate": 1.3430898151774238e-05, "loss": 1.6737, "step": 81550 }, { "epoch": 2.71, "grad_norm": 0.6868225336074829, "learning_rate": 1.3427805929680724e-05, "loss": 1.6531, "step": 81551 }, { "epoch": 2.71, "grad_norm": 0.7190563082695007, "learning_rate": 1.3424714055443653e-05, "loss": 1.7264, "step": 81552 }, { "epoch": 2.71, "grad_norm": 0.7167810797691345, "learning_rate": 1.3421622529066756e-05, "loss": 1.7141, "step": 81553 }, { "epoch": 2.71, "grad_norm": 0.7092092037200928, "learning_rate": 1.3418531350553796e-05, "loss": 1.686, "step": 81554 }, { "epoch": 2.71, "grad_norm": 0.7020455002784729, "learning_rate": 1.3415440519908472e-05, "loss": 1.6854, "step": 81555 }, { "epoch": 2.71, "grad_norm": 0.7102851867675781, "learning_rate": 1.3412350037134611e-05, "loss": 1.6492, "step": 81556 }, { "epoch": 2.71, "grad_norm": 0.7108712792396545, "learning_rate": 1.3409259902235946e-05, "loss": 1.6748, "step": 81557 }, { "epoch": 2.71, "grad_norm": 0.7378476858139038, "learning_rate": 1.3406170115216174e-05, "loss": 1.6282, "step": 81558 }, { "epoch": 2.71, "grad_norm": 0.7205086350440979, "learning_rate": 1.3403080676079058e-05, "loss": 1.6167, "step": 81559 }, { "epoch": 2.71, "grad_norm": 0.6991110444068909, "learning_rate": 1.3399991584828429e-05, "loss": 1.6427, "step": 81560 }, { "epoch": 2.71, "grad_norm": 0.6961726546287537, "learning_rate": 1.3396902841467982e-05, "loss": 1.669, "step": 81561 }, { "epoch": 2.71, "grad_norm": 0.727006196975708, "learning_rate": 1.339381444600145e-05, "loss": 1.7589, "step": 81562 }, { "epoch": 2.71, "grad_norm": 0.6923896670341492, "learning_rate": 1.3390726398432594e-05, "loss": 1.6424, "step": 81563 }, { "epoch": 2.71, "grad_norm": 0.7032843232154846, "learning_rate": 1.3387638698765213e-05, "loss": 1.6335, "step": 81564 }, { "epoch": 2.71, "grad_norm": 0.7384015917778015, "learning_rate": 1.3384551347002937e-05, "loss": 1.6891, "step": 81565 }, { "epoch": 2.71, "grad_norm": 0.6969982981681824, "learning_rate": 1.3381464343149629e-05, "loss": 1.6504, "step": 81566 }, { "epoch": 2.71, "grad_norm": 0.7205227017402649, "learning_rate": 1.3378377687208953e-05, "loss": 1.7115, "step": 81567 }, { "epoch": 2.71, "grad_norm": 0.7456040978431702, "learning_rate": 1.3375291379184739e-05, "loss": 1.6586, "step": 81568 }, { "epoch": 2.71, "grad_norm": 0.7174167037010193, "learning_rate": 1.3372205419080651e-05, "loss": 1.6791, "step": 81569 }, { "epoch": 2.71, "grad_norm": 0.7057031393051147, "learning_rate": 1.3369119806900486e-05, "loss": 1.6256, "step": 81570 }, { "epoch": 2.71, "grad_norm": 0.7163339853286743, "learning_rate": 1.3366034542648007e-05, "loss": 1.6443, "step": 81571 }, { "epoch": 2.71, "grad_norm": 0.7237471342086792, "learning_rate": 1.3362949626326879e-05, "loss": 1.7844, "step": 81572 }, { "epoch": 2.71, "grad_norm": 0.7219511866569519, "learning_rate": 1.3359865057940933e-05, "loss": 1.6836, "step": 81573 }, { "epoch": 2.71, "grad_norm": 0.7148035168647766, "learning_rate": 1.335678083749383e-05, "loss": 1.704, "step": 81574 }, { "epoch": 2.71, "grad_norm": 0.7200902700424194, "learning_rate": 1.3353696964989435e-05, "loss": 1.6447, "step": 81575 }, { "epoch": 2.71, "grad_norm": 0.7470044493675232, "learning_rate": 1.3350613440431379e-05, "loss": 1.6522, "step": 81576 }, { "epoch": 2.71, "grad_norm": 0.7055531740188599, "learning_rate": 1.3347530263823425e-05, "loss": 1.7024, "step": 81577 }, { "epoch": 2.71, "grad_norm": 0.7012236714363098, "learning_rate": 1.3344447435169337e-05, "loss": 1.6432, "step": 81578 }, { "epoch": 2.71, "grad_norm": 0.7156108617782593, "learning_rate": 1.3341364954472845e-05, "loss": 1.7063, "step": 81579 }, { "epoch": 2.71, "grad_norm": 0.7241072058677673, "learning_rate": 1.3338282821737745e-05, "loss": 1.7205, "step": 81580 }, { "epoch": 2.71, "grad_norm": 0.7130416035652161, "learning_rate": 1.3335201036967669e-05, "loss": 1.6371, "step": 81581 }, { "epoch": 2.71, "grad_norm": 0.7132979035377502, "learning_rate": 1.333211960016648e-05, "loss": 1.6236, "step": 81582 }, { "epoch": 2.71, "grad_norm": 0.7003064155578613, "learning_rate": 1.3329038511337842e-05, "loss": 1.6307, "step": 81583 }, { "epoch": 2.71, "grad_norm": 0.695417582988739, "learning_rate": 1.3325957770485485e-05, "loss": 1.6604, "step": 81584 }, { "epoch": 2.71, "grad_norm": 0.7113500833511353, "learning_rate": 1.3322877377613206e-05, "loss": 1.6848, "step": 81585 }, { "epoch": 2.71, "grad_norm": 0.7047128677368164, "learning_rate": 1.3319797332724736e-05, "loss": 1.658, "step": 81586 }, { "epoch": 2.71, "grad_norm": 0.7083470821380615, "learning_rate": 1.3316717635823737e-05, "loss": 1.7482, "step": 81587 }, { "epoch": 2.71, "grad_norm": 0.6834638118743896, "learning_rate": 1.3313638286914008e-05, "loss": 1.696, "step": 81588 }, { "epoch": 2.71, "grad_norm": 0.7010621428489685, "learning_rate": 1.3310559285999311e-05, "loss": 1.6325, "step": 81589 }, { "epoch": 2.71, "grad_norm": 0.7220356464385986, "learning_rate": 1.3307480633083344e-05, "loss": 1.6754, "step": 81590 }, { "epoch": 2.71, "grad_norm": 0.7059962153434753, "learning_rate": 1.3304402328169839e-05, "loss": 1.6544, "step": 81591 }, { "epoch": 2.71, "grad_norm": 0.7154803276062012, "learning_rate": 1.3301324371262557e-05, "loss": 1.6605, "step": 81592 }, { "epoch": 2.71, "grad_norm": 0.7478644847869873, "learning_rate": 1.3298246762365262e-05, "loss": 1.6849, "step": 81593 }, { "epoch": 2.71, "grad_norm": 0.6850088834762573, "learning_rate": 1.3295169501481584e-05, "loss": 1.6863, "step": 81594 }, { "epoch": 2.71, "grad_norm": 0.7079550623893738, "learning_rate": 1.3292092588615322e-05, "loss": 1.6448, "step": 81595 }, { "epoch": 2.71, "grad_norm": 0.7085579037666321, "learning_rate": 1.3289016023770272e-05, "loss": 1.6418, "step": 81596 }, { "epoch": 2.71, "grad_norm": 0.7207688093185425, "learning_rate": 1.3285939806950097e-05, "loss": 1.651, "step": 81597 }, { "epoch": 2.71, "grad_norm": 0.6965781450271606, "learning_rate": 1.3282863938158527e-05, "loss": 1.6931, "step": 81598 }, { "epoch": 2.71, "grad_norm": 0.7146475315093994, "learning_rate": 1.3279788417399329e-05, "loss": 1.7267, "step": 81599 }, { "epoch": 2.71, "grad_norm": 0.716202437877655, "learning_rate": 1.327671324467623e-05, "loss": 1.7298, "step": 81600 }, { "epoch": 2.71, "grad_norm": 0.7223894000053406, "learning_rate": 1.3273638419992927e-05, "loss": 1.6745, "step": 81601 }, { "epoch": 2.71, "grad_norm": 0.7072356343269348, "learning_rate": 1.3270563943353185e-05, "loss": 1.7081, "step": 81602 }, { "epoch": 2.71, "grad_norm": 0.708696186542511, "learning_rate": 1.3267489814760734e-05, "loss": 1.7204, "step": 81603 }, { "epoch": 2.71, "grad_norm": 0.7228257656097412, "learning_rate": 1.3264416034219338e-05, "loss": 1.7096, "step": 81604 }, { "epoch": 2.72, "grad_norm": 0.6978049874305725, "learning_rate": 1.3261342601732627e-05, "loss": 1.5987, "step": 81605 }, { "epoch": 2.72, "grad_norm": 0.6922451853752136, "learning_rate": 1.3258269517304464e-05, "loss": 1.6348, "step": 81606 }, { "epoch": 2.72, "grad_norm": 0.6965842247009277, "learning_rate": 1.3255196780938482e-05, "loss": 1.5771, "step": 81607 }, { "epoch": 2.72, "grad_norm": 0.7006210088729858, "learning_rate": 1.3252124392638442e-05, "loss": 1.6808, "step": 81608 }, { "epoch": 2.72, "grad_norm": 0.7165445685386658, "learning_rate": 1.3249052352408041e-05, "loss": 1.6509, "step": 81609 }, { "epoch": 2.72, "grad_norm": 0.6818506121635437, "learning_rate": 1.3245980660251076e-05, "loss": 1.6454, "step": 81610 }, { "epoch": 2.72, "grad_norm": 0.7109490633010864, "learning_rate": 1.3242909316171246e-05, "loss": 1.6904, "step": 81611 }, { "epoch": 2.72, "grad_norm": 0.7092642188072205, "learning_rate": 1.3239838320172247e-05, "loss": 1.6679, "step": 81612 }, { "epoch": 2.72, "grad_norm": 0.7191982865333557, "learning_rate": 1.3236767672257875e-05, "loss": 1.7357, "step": 81613 }, { "epoch": 2.72, "grad_norm": 0.7036185264587402, "learning_rate": 1.3233697372431795e-05, "loss": 1.68, "step": 81614 }, { "epoch": 2.72, "grad_norm": 0.721298098564148, "learning_rate": 1.3230627420697704e-05, "loss": 1.6961, "step": 81615 }, { "epoch": 2.72, "grad_norm": 0.7125446200370789, "learning_rate": 1.322755781705943e-05, "loss": 1.7581, "step": 81616 }, { "epoch": 2.72, "grad_norm": 0.7073017358779907, "learning_rate": 1.3224488561520608e-05, "loss": 1.6727, "step": 81617 }, { "epoch": 2.72, "grad_norm": 0.7106956839561462, "learning_rate": 1.3221419654085063e-05, "loss": 1.689, "step": 81618 }, { "epoch": 2.72, "grad_norm": 0.7050163149833679, "learning_rate": 1.3218351094756396e-05, "loss": 1.6699, "step": 81619 }, { "epoch": 2.72, "grad_norm": 0.7093420028686523, "learning_rate": 1.3215282883538435e-05, "loss": 1.6561, "step": 81620 }, { "epoch": 2.72, "grad_norm": 0.7229869365692139, "learning_rate": 1.321221502043488e-05, "loss": 1.6689, "step": 81621 }, { "epoch": 2.72, "grad_norm": 0.7047159075737, "learning_rate": 1.3209147505449392e-05, "loss": 1.7013, "step": 81622 }, { "epoch": 2.72, "grad_norm": 0.7098314762115479, "learning_rate": 1.3206080338585767e-05, "loss": 1.7379, "step": 81623 }, { "epoch": 2.72, "grad_norm": 0.708465576171875, "learning_rate": 1.3203013519847706e-05, "loss": 1.658, "step": 81624 }, { "epoch": 2.72, "grad_norm": 0.6895309090614319, "learning_rate": 1.3199947049238934e-05, "loss": 1.7218, "step": 81625 }, { "epoch": 2.72, "grad_norm": 0.7154074907302856, "learning_rate": 1.3196880926763153e-05, "loss": 1.6598, "step": 81626 }, { "epoch": 2.72, "grad_norm": 0.7013629674911499, "learning_rate": 1.3193815152424092e-05, "loss": 1.6195, "step": 81627 }, { "epoch": 2.72, "grad_norm": 0.7018829584121704, "learning_rate": 1.3190749726225513e-05, "loss": 1.6631, "step": 81628 }, { "epoch": 2.72, "grad_norm": 0.6992177963256836, "learning_rate": 1.3187684648171048e-05, "loss": 1.5813, "step": 81629 }, { "epoch": 2.72, "grad_norm": 0.6777039170265198, "learning_rate": 1.3184619918264528e-05, "loss": 1.6696, "step": 81630 }, { "epoch": 2.72, "grad_norm": 0.742743968963623, "learning_rate": 1.318155553650958e-05, "loss": 1.7275, "step": 81631 }, { "epoch": 2.72, "grad_norm": 0.7109870910644531, "learning_rate": 1.3178491502910005e-05, "loss": 1.6306, "step": 81632 }, { "epoch": 2.72, "grad_norm": 0.7128403186798096, "learning_rate": 1.3175427817469496e-05, "loss": 1.7185, "step": 81633 }, { "epoch": 2.72, "grad_norm": 0.7081460952758789, "learning_rate": 1.3172364480191688e-05, "loss": 1.7159, "step": 81634 }, { "epoch": 2.72, "grad_norm": 0.7479134798049927, "learning_rate": 1.3169301491080408e-05, "loss": 1.7184, "step": 81635 }, { "epoch": 2.72, "grad_norm": 0.703197181224823, "learning_rate": 1.3166238850139354e-05, "loss": 1.649, "step": 81636 }, { "epoch": 2.72, "grad_norm": 0.7115582227706909, "learning_rate": 1.3163176557372157e-05, "loss": 1.7238, "step": 81637 }, { "epoch": 2.72, "grad_norm": 0.7263195514678955, "learning_rate": 1.3160114612782612e-05, "loss": 1.7007, "step": 81638 }, { "epoch": 2.72, "grad_norm": 0.7128571271896362, "learning_rate": 1.3157053016374486e-05, "loss": 1.6688, "step": 81639 }, { "epoch": 2.72, "grad_norm": 0.7026739120483398, "learning_rate": 1.3153991768151407e-05, "loss": 1.6949, "step": 81640 }, { "epoch": 2.72, "grad_norm": 0.7213491201400757, "learning_rate": 1.3150930868117071e-05, "loss": 1.6612, "step": 81641 }, { "epoch": 2.72, "grad_norm": 0.7201713919639587, "learning_rate": 1.314787031627531e-05, "loss": 1.6939, "step": 81642 }, { "epoch": 2.72, "grad_norm": 0.7046393752098083, "learning_rate": 1.3144810112629755e-05, "loss": 1.6567, "step": 81643 }, { "epoch": 2.72, "grad_norm": 0.729656457901001, "learning_rate": 1.314175025718407e-05, "loss": 1.7457, "step": 81644 }, { "epoch": 2.72, "grad_norm": 0.7242569923400879, "learning_rate": 1.313869074994205e-05, "loss": 1.6606, "step": 81645 }, { "epoch": 2.72, "grad_norm": 0.7267025113105774, "learning_rate": 1.3135631590907492e-05, "loss": 1.6751, "step": 81646 }, { "epoch": 2.72, "grad_norm": 0.7151390910148621, "learning_rate": 1.3132572780083928e-05, "loss": 1.614, "step": 81647 }, { "epoch": 2.72, "grad_norm": 0.7183607816696167, "learning_rate": 1.312951431747512e-05, "loss": 1.6398, "step": 81648 }, { "epoch": 2.72, "grad_norm": 0.7354646921157837, "learning_rate": 1.3126456203084867e-05, "loss": 1.6131, "step": 81649 }, { "epoch": 2.72, "grad_norm": 0.7202122211456299, "learning_rate": 1.3123398436916833e-05, "loss": 1.6301, "step": 81650 }, { "epoch": 2.72, "grad_norm": 0.7212687134742737, "learning_rate": 1.3120341018974679e-05, "loss": 1.6387, "step": 81651 }, { "epoch": 2.72, "grad_norm": 0.7137501239776611, "learning_rate": 1.311728394926217e-05, "loss": 1.685, "step": 81652 }, { "epoch": 2.72, "grad_norm": 0.7109158039093018, "learning_rate": 1.3114227227783103e-05, "loss": 1.66, "step": 81653 }, { "epoch": 2.72, "grad_norm": 0.7110616564750671, "learning_rate": 1.3111170854540975e-05, "loss": 1.6497, "step": 81654 }, { "epoch": 2.72, "grad_norm": 0.7550826072692871, "learning_rate": 1.3108114829539651e-05, "loss": 1.8423, "step": 81655 }, { "epoch": 2.72, "grad_norm": 0.6955297589302063, "learning_rate": 1.3105059152782826e-05, "loss": 1.7082, "step": 81656 }, { "epoch": 2.72, "grad_norm": 0.7265706062316895, "learning_rate": 1.3102003824274165e-05, "loss": 1.6688, "step": 81657 }, { "epoch": 2.72, "grad_norm": 0.6833983659744263, "learning_rate": 1.3098948844017399e-05, "loss": 1.5902, "step": 81658 }, { "epoch": 2.72, "grad_norm": 0.7012184858322144, "learning_rate": 1.3095894212016222e-05, "loss": 1.6586, "step": 81659 }, { "epoch": 2.72, "grad_norm": 0.727977991104126, "learning_rate": 1.3092839928274435e-05, "loss": 1.6795, "step": 81660 }, { "epoch": 2.72, "grad_norm": 0.6883019208908081, "learning_rate": 1.3089785992795564e-05, "loss": 1.7456, "step": 81661 }, { "epoch": 2.72, "grad_norm": 0.72563236951828, "learning_rate": 1.3086732405583445e-05, "loss": 1.691, "step": 81662 }, { "epoch": 2.72, "grad_norm": 0.722560703754425, "learning_rate": 1.3083679166641803e-05, "loss": 1.6943, "step": 81663 }, { "epoch": 2.72, "grad_norm": 0.6909036040306091, "learning_rate": 1.3080626275974304e-05, "loss": 1.6541, "step": 81664 }, { "epoch": 2.72, "grad_norm": 0.71000736951828, "learning_rate": 1.307757373358458e-05, "loss": 1.5811, "step": 81665 }, { "epoch": 2.72, "grad_norm": 0.7392175793647766, "learning_rate": 1.3074521539476457e-05, "loss": 1.704, "step": 81666 }, { "epoch": 2.72, "grad_norm": 0.71043461561203, "learning_rate": 1.3071469693653602e-05, "loss": 1.7509, "step": 81667 }, { "epoch": 2.72, "grad_norm": 0.7408934831619263, "learning_rate": 1.3068418196119645e-05, "loss": 1.7096, "step": 81668 }, { "epoch": 2.72, "grad_norm": 0.710372805595398, "learning_rate": 1.306536704687835e-05, "loss": 1.6499, "step": 81669 }, { "epoch": 2.72, "grad_norm": 0.7186805009841919, "learning_rate": 1.306231624593348e-05, "loss": 1.7137, "step": 81670 }, { "epoch": 2.72, "grad_norm": 0.7069668173789978, "learning_rate": 1.3059265793288664e-05, "loss": 1.6722, "step": 81671 }, { "epoch": 2.72, "grad_norm": 0.6903178691864014, "learning_rate": 1.3056215688947602e-05, "loss": 1.6762, "step": 81672 }, { "epoch": 2.72, "grad_norm": 0.6913663148880005, "learning_rate": 1.3053165932914056e-05, "loss": 1.6353, "step": 81673 }, { "epoch": 2.72, "grad_norm": 0.685144305229187, "learning_rate": 1.3050116525191657e-05, "loss": 1.6917, "step": 81674 }, { "epoch": 2.72, "grad_norm": 0.7006009221076965, "learning_rate": 1.3047067465784101e-05, "loss": 1.6435, "step": 81675 }, { "epoch": 2.72, "grad_norm": 0.6922488212585449, "learning_rate": 1.3044018754695151e-05, "loss": 1.6188, "step": 81676 }, { "epoch": 2.72, "grad_norm": 0.7160976529121399, "learning_rate": 1.3040970391928507e-05, "loss": 1.6488, "step": 81677 }, { "epoch": 2.72, "grad_norm": 0.7260054349899292, "learning_rate": 1.303792237748783e-05, "loss": 1.6692, "step": 81678 }, { "epoch": 2.72, "grad_norm": 0.7045677900314331, "learning_rate": 1.303487471137682e-05, "loss": 1.6576, "step": 81679 }, { "epoch": 2.72, "grad_norm": 0.7213120460510254, "learning_rate": 1.3031827393599204e-05, "loss": 1.7338, "step": 81680 }, { "epoch": 2.72, "grad_norm": 0.7133855223655701, "learning_rate": 1.302878042415868e-05, "loss": 1.6898, "step": 81681 }, { "epoch": 2.72, "grad_norm": 0.7348515391349792, "learning_rate": 1.3025733803058913e-05, "loss": 1.7418, "step": 81682 }, { "epoch": 2.72, "grad_norm": 0.7144995331764221, "learning_rate": 1.3022687530303633e-05, "loss": 1.6417, "step": 81683 }, { "epoch": 2.72, "grad_norm": 0.7052372694015503, "learning_rate": 1.3019641605896503e-05, "loss": 1.7057, "step": 81684 }, { "epoch": 2.72, "grad_norm": 0.7098090648651123, "learning_rate": 1.3016596029841253e-05, "loss": 1.6096, "step": 81685 }, { "epoch": 2.72, "grad_norm": 0.7105249166488647, "learning_rate": 1.3013550802141548e-05, "loss": 1.7462, "step": 81686 }, { "epoch": 2.72, "grad_norm": 0.7184293270111084, "learning_rate": 1.3010505922801151e-05, "loss": 1.6795, "step": 81687 }, { "epoch": 2.72, "grad_norm": 0.7310313582420349, "learning_rate": 1.3007461391823727e-05, "loss": 1.7218, "step": 81688 }, { "epoch": 2.72, "grad_norm": 0.7031847238540649, "learning_rate": 1.3004417209212903e-05, "loss": 1.7515, "step": 81689 }, { "epoch": 2.72, "grad_norm": 0.7149484753608704, "learning_rate": 1.3001373374972478e-05, "loss": 1.6512, "step": 81690 }, { "epoch": 2.72, "grad_norm": 0.7084560990333557, "learning_rate": 1.2998329889106051e-05, "loss": 1.6442, "step": 81691 }, { "epoch": 2.72, "grad_norm": 0.7122918963432312, "learning_rate": 1.2995286751617383e-05, "loss": 1.6913, "step": 81692 }, { "epoch": 2.72, "grad_norm": 0.7326574325561523, "learning_rate": 1.2992243962510174e-05, "loss": 1.7034, "step": 81693 }, { "epoch": 2.72, "grad_norm": 0.7197527885437012, "learning_rate": 1.298920152178805e-05, "loss": 1.6483, "step": 81694 }, { "epoch": 2.72, "grad_norm": 0.7243478894233704, "learning_rate": 1.2986159429454778e-05, "loss": 1.6831, "step": 81695 }, { "epoch": 2.72, "grad_norm": 0.7199620008468628, "learning_rate": 1.2983117685513989e-05, "loss": 1.6572, "step": 81696 }, { "epoch": 2.72, "grad_norm": 0.7051414847373962, "learning_rate": 1.2980076289969443e-05, "loss": 1.6833, "step": 81697 }, { "epoch": 2.72, "grad_norm": 0.7175600528717041, "learning_rate": 1.2977035242824773e-05, "loss": 1.713, "step": 81698 }, { "epoch": 2.72, "grad_norm": 0.6953607797622681, "learning_rate": 1.2973994544083711e-05, "loss": 1.6839, "step": 81699 }, { "epoch": 2.72, "grad_norm": 0.7092468738555908, "learning_rate": 1.2970954193749916e-05, "loss": 1.6673, "step": 81700 }, { "epoch": 2.72, "grad_norm": 0.7197146415710449, "learning_rate": 1.2967914191827055e-05, "loss": 1.6266, "step": 81701 }, { "epoch": 2.72, "grad_norm": 0.7681216597557068, "learning_rate": 1.2964874538318926e-05, "loss": 1.7975, "step": 81702 }, { "epoch": 2.72, "grad_norm": 0.7418560981750488, "learning_rate": 1.2961835233229124e-05, "loss": 1.7302, "step": 81703 }, { "epoch": 2.72, "grad_norm": 0.7125617861747742, "learning_rate": 1.2958796276561312e-05, "loss": 1.7571, "step": 81704 }, { "epoch": 2.72, "grad_norm": 0.7175648212432861, "learning_rate": 1.2955757668319222e-05, "loss": 1.6156, "step": 81705 }, { "epoch": 2.72, "grad_norm": 0.6887238025665283, "learning_rate": 1.2952719408506617e-05, "loss": 1.7243, "step": 81706 }, { "epoch": 2.72, "grad_norm": 0.7038074731826782, "learning_rate": 1.2949681497127096e-05, "loss": 1.6762, "step": 81707 }, { "epoch": 2.72, "grad_norm": 0.7169222235679626, "learning_rate": 1.294664393418432e-05, "loss": 1.6701, "step": 81708 }, { "epoch": 2.72, "grad_norm": 0.6961908936500549, "learning_rate": 1.2943606719682086e-05, "loss": 1.6368, "step": 81709 }, { "epoch": 2.72, "grad_norm": 0.7022284865379333, "learning_rate": 1.2940569853623995e-05, "loss": 1.6745, "step": 81710 }, { "epoch": 2.72, "grad_norm": 0.714818000793457, "learning_rate": 1.293753333601374e-05, "loss": 1.6735, "step": 81711 }, { "epoch": 2.72, "grad_norm": 0.7276133894920349, "learning_rate": 1.2934497166855018e-05, "loss": 1.7723, "step": 81712 }, { "epoch": 2.72, "grad_norm": 0.7128211855888367, "learning_rate": 1.2931461346151562e-05, "loss": 1.7257, "step": 81713 }, { "epoch": 2.72, "grad_norm": 0.7323666214942932, "learning_rate": 1.2928425873907e-05, "loss": 1.7853, "step": 81714 }, { "epoch": 2.72, "grad_norm": 0.7189000248908997, "learning_rate": 1.2925390750124998e-05, "loss": 1.6554, "step": 81715 }, { "epoch": 2.72, "grad_norm": 0.7072159051895142, "learning_rate": 1.2922355974809284e-05, "loss": 1.6855, "step": 81716 }, { "epoch": 2.72, "grad_norm": 0.7192821502685547, "learning_rate": 1.2919321547963556e-05, "loss": 1.6697, "step": 81717 }, { "epoch": 2.72, "grad_norm": 0.6947869658470154, "learning_rate": 1.2916287469591447e-05, "loss": 1.6394, "step": 81718 }, { "epoch": 2.72, "grad_norm": 0.7332559823989868, "learning_rate": 1.291325373969665e-05, "loss": 1.6586, "step": 81719 }, { "epoch": 2.72, "grad_norm": 0.7178364396095276, "learning_rate": 1.2910220358282897e-05, "loss": 1.6386, "step": 81720 }, { "epoch": 2.72, "grad_norm": 0.7153770923614502, "learning_rate": 1.2907187325353851e-05, "loss": 1.6905, "step": 81721 }, { "epoch": 2.72, "grad_norm": 0.7421170473098755, "learning_rate": 1.2904154640913145e-05, "loss": 1.6588, "step": 81722 }, { "epoch": 2.72, "grad_norm": 0.7157322764396667, "learning_rate": 1.2901122304964507e-05, "loss": 1.6546, "step": 81723 }, { "epoch": 2.72, "grad_norm": 0.6972944736480713, "learning_rate": 1.28980903175116e-05, "loss": 1.716, "step": 81724 }, { "epoch": 2.72, "grad_norm": 0.7008250951766968, "learning_rate": 1.289505867855809e-05, "loss": 1.6871, "step": 81725 }, { "epoch": 2.72, "grad_norm": 0.728842556476593, "learning_rate": 1.2892027388107673e-05, "loss": 1.6263, "step": 81726 }, { "epoch": 2.72, "grad_norm": 0.6861407160758972, "learning_rate": 1.2888996446164079e-05, "loss": 1.6245, "step": 81727 }, { "epoch": 2.72, "grad_norm": 0.7126731872558594, "learning_rate": 1.2885965852730939e-05, "loss": 1.665, "step": 81728 }, { "epoch": 2.72, "grad_norm": 0.7560817003250122, "learning_rate": 1.2882935607811884e-05, "loss": 1.6864, "step": 81729 }, { "epoch": 2.72, "grad_norm": 0.6972267031669617, "learning_rate": 1.2879905711410676e-05, "loss": 1.6993, "step": 81730 }, { "epoch": 2.72, "grad_norm": 0.723493218421936, "learning_rate": 1.2876876163530947e-05, "loss": 1.7301, "step": 81731 }, { "epoch": 2.72, "grad_norm": 0.7311387062072754, "learning_rate": 1.287384696417636e-05, "loss": 1.6937, "step": 81732 }, { "epoch": 2.72, "grad_norm": 0.7540520429611206, "learning_rate": 1.2870818113350679e-05, "loss": 1.6408, "step": 81733 }, { "epoch": 2.72, "grad_norm": 0.6981896162033081, "learning_rate": 1.2867789611057433e-05, "loss": 1.6745, "step": 81734 }, { "epoch": 2.72, "grad_norm": 0.700426459312439, "learning_rate": 1.2864761457300455e-05, "loss": 1.6711, "step": 81735 }, { "epoch": 2.72, "grad_norm": 0.7222430109977722, "learning_rate": 1.2861733652083306e-05, "loss": 1.6218, "step": 81736 }, { "epoch": 2.72, "grad_norm": 0.7162034511566162, "learning_rate": 1.2858706195409717e-05, "loss": 1.6752, "step": 81737 }, { "epoch": 2.72, "grad_norm": 0.6951243877410889, "learning_rate": 1.2855679087283388e-05, "loss": 1.6395, "step": 81738 }, { "epoch": 2.72, "grad_norm": 0.6991997361183167, "learning_rate": 1.2852652327707913e-05, "loss": 1.735, "step": 81739 }, { "epoch": 2.72, "grad_norm": 0.7042018175125122, "learning_rate": 1.2849625916687023e-05, "loss": 1.6685, "step": 81740 }, { "epoch": 2.72, "grad_norm": 0.7083070278167725, "learning_rate": 1.2846599854224349e-05, "loss": 1.6053, "step": 81741 }, { "epoch": 2.72, "grad_norm": 0.7159357666969299, "learning_rate": 1.284357414032362e-05, "loss": 1.6941, "step": 81742 }, { "epoch": 2.72, "grad_norm": 0.7099214196205139, "learning_rate": 1.2840548774988502e-05, "loss": 1.7249, "step": 81743 }, { "epoch": 2.72, "grad_norm": 0.7555593848228455, "learning_rate": 1.2837523758222623e-05, "loss": 1.7346, "step": 81744 }, { "epoch": 2.72, "grad_norm": 0.7066713571548462, "learning_rate": 1.2834499090029681e-05, "loss": 1.7415, "step": 81745 }, { "epoch": 2.72, "grad_norm": 0.7199263572692871, "learning_rate": 1.2831474770413308e-05, "loss": 1.7153, "step": 81746 }, { "epoch": 2.72, "grad_norm": 0.715187132358551, "learning_rate": 1.2828450799377265e-05, "loss": 1.6486, "step": 81747 }, { "epoch": 2.72, "grad_norm": 0.7466530203819275, "learning_rate": 1.2825427176925152e-05, "loss": 1.7573, "step": 81748 }, { "epoch": 2.72, "grad_norm": 0.7259541153907776, "learning_rate": 1.2822403903060663e-05, "loss": 1.6693, "step": 81749 }, { "epoch": 2.72, "grad_norm": 0.7195677757263184, "learning_rate": 1.2819380977787463e-05, "loss": 1.6771, "step": 81750 }, { "epoch": 2.72, "grad_norm": 0.6890712976455688, "learning_rate": 1.2816358401109216e-05, "loss": 1.6844, "step": 81751 }, { "epoch": 2.72, "grad_norm": 0.684610903263092, "learning_rate": 1.2813336173029586e-05, "loss": 1.724, "step": 81752 }, { "epoch": 2.72, "grad_norm": 0.7091863751411438, "learning_rate": 1.2810314293552271e-05, "loss": 1.7301, "step": 81753 }, { "epoch": 2.72, "grad_norm": 0.7031713724136353, "learning_rate": 1.2807292762680898e-05, "loss": 1.6898, "step": 81754 }, { "epoch": 2.72, "grad_norm": 0.7287794947624207, "learning_rate": 1.2804271580419135e-05, "loss": 1.6947, "step": 81755 }, { "epoch": 2.72, "grad_norm": 0.7347198128700256, "learning_rate": 1.2801250746770742e-05, "loss": 1.6225, "step": 81756 }, { "epoch": 2.72, "grad_norm": 0.7140162587165833, "learning_rate": 1.2798230261739284e-05, "loss": 1.7095, "step": 81757 }, { "epoch": 2.72, "grad_norm": 0.6995850205421448, "learning_rate": 1.2795210125328392e-05, "loss": 1.6558, "step": 81758 }, { "epoch": 2.72, "grad_norm": 0.7243273854255676, "learning_rate": 1.2792190337541896e-05, "loss": 1.6675, "step": 81759 }, { "epoch": 2.72, "grad_norm": 0.6830061078071594, "learning_rate": 1.2789170898383328e-05, "loss": 1.6269, "step": 81760 }, { "epoch": 2.72, "grad_norm": 0.7320572733879089, "learning_rate": 1.278615180785635e-05, "loss": 1.6648, "step": 81761 }, { "epoch": 2.72, "grad_norm": 0.7571989893913269, "learning_rate": 1.2783133065964657e-05, "loss": 1.6663, "step": 81762 }, { "epoch": 2.72, "grad_norm": 0.7010868787765503, "learning_rate": 1.2780114672712016e-05, "loss": 1.6367, "step": 81763 }, { "epoch": 2.72, "grad_norm": 0.7252563834190369, "learning_rate": 1.2777096628101924e-05, "loss": 1.6767, "step": 81764 }, { "epoch": 2.72, "grad_norm": 0.7013227939605713, "learning_rate": 1.277407893213811e-05, "loss": 1.6656, "step": 81765 }, { "epoch": 2.72, "grad_norm": 0.7385033369064331, "learning_rate": 1.2771061584824271e-05, "loss": 1.8271, "step": 81766 }, { "epoch": 2.72, "grad_norm": 0.7074099779129028, "learning_rate": 1.2768044586164039e-05, "loss": 1.6661, "step": 81767 }, { "epoch": 2.72, "grad_norm": 0.7010093331336975, "learning_rate": 1.2765027936161043e-05, "loss": 1.6792, "step": 81768 }, { "epoch": 2.72, "grad_norm": 0.7120819687843323, "learning_rate": 1.2762011634819014e-05, "loss": 1.7079, "step": 81769 }, { "epoch": 2.72, "grad_norm": 0.7129858136177063, "learning_rate": 1.2758995682141615e-05, "loss": 1.6371, "step": 81770 }, { "epoch": 2.72, "grad_norm": 0.6945045590400696, "learning_rate": 1.2755980078132411e-05, "loss": 1.66, "step": 81771 }, { "epoch": 2.72, "grad_norm": 0.6954601407051086, "learning_rate": 1.2752964822795131e-05, "loss": 1.6705, "step": 81772 }, { "epoch": 2.72, "grad_norm": 0.7057104110717773, "learning_rate": 1.274994991613344e-05, "loss": 1.627, "step": 81773 }, { "epoch": 2.72, "grad_norm": 0.7105939388275146, "learning_rate": 1.2746935358151033e-05, "loss": 1.6143, "step": 81774 }, { "epoch": 2.72, "grad_norm": 0.7251588702201843, "learning_rate": 1.2743921148851444e-05, "loss": 1.6097, "step": 81775 }, { "epoch": 2.72, "grad_norm": 0.7336061596870422, "learning_rate": 1.2740907288238433e-05, "loss": 1.7377, "step": 81776 }, { "epoch": 2.72, "grad_norm": 0.7175119519233704, "learning_rate": 1.27378937763157e-05, "loss": 1.7003, "step": 81777 }, { "epoch": 2.72, "grad_norm": 0.7493394613265991, "learning_rate": 1.273488061308674e-05, "loss": 1.687, "step": 81778 }, { "epoch": 2.72, "grad_norm": 0.7026405334472656, "learning_rate": 1.2731867798555352e-05, "loss": 1.7086, "step": 81779 }, { "epoch": 2.72, "grad_norm": 0.7278660535812378, "learning_rate": 1.2728855332725163e-05, "loss": 1.6853, "step": 81780 }, { "epoch": 2.72, "grad_norm": 0.7258387804031372, "learning_rate": 1.2725843215599808e-05, "loss": 1.6187, "step": 81781 }, { "epoch": 2.72, "grad_norm": 0.7175408005714417, "learning_rate": 1.2722831447182912e-05, "loss": 1.6644, "step": 81782 }, { "epoch": 2.72, "grad_norm": 0.7115359902381897, "learning_rate": 1.2719820027478245e-05, "loss": 1.6886, "step": 81783 }, { "epoch": 2.72, "grad_norm": 0.7214557528495789, "learning_rate": 1.2716808956489333e-05, "loss": 1.6493, "step": 81784 }, { "epoch": 2.72, "grad_norm": 0.7034029364585876, "learning_rate": 1.2713798234219907e-05, "loss": 1.7236, "step": 81785 }, { "epoch": 2.72, "grad_norm": 0.7081375122070312, "learning_rate": 1.2710787860673566e-05, "loss": 1.6522, "step": 81786 }, { "epoch": 2.72, "grad_norm": 0.7005224823951721, "learning_rate": 1.2707777835854038e-05, "loss": 1.7005, "step": 81787 }, { "epoch": 2.72, "grad_norm": 0.7065791487693787, "learning_rate": 1.2704768159764922e-05, "loss": 1.6647, "step": 81788 }, { "epoch": 2.72, "grad_norm": 0.7190110087394714, "learning_rate": 1.270175883240988e-05, "loss": 1.6878, "step": 81789 }, { "epoch": 2.72, "grad_norm": 0.7127777338027954, "learning_rate": 1.2698749853792578e-05, "loss": 1.6599, "step": 81790 }, { "epoch": 2.72, "grad_norm": 0.7161231637001038, "learning_rate": 1.269574122391671e-05, "loss": 1.6412, "step": 81791 }, { "epoch": 2.72, "grad_norm": 0.6928084492683411, "learning_rate": 1.2692732942785811e-05, "loss": 1.58, "step": 81792 }, { "epoch": 2.72, "grad_norm": 0.6782749891281128, "learning_rate": 1.268972501040364e-05, "loss": 1.5953, "step": 81793 }, { "epoch": 2.72, "grad_norm": 0.705309271812439, "learning_rate": 1.2686717426773762e-05, "loss": 1.6455, "step": 81794 }, { "epoch": 2.72, "grad_norm": 0.7095881104469299, "learning_rate": 1.268371019189991e-05, "loss": 1.6773, "step": 81795 }, { "epoch": 2.72, "grad_norm": 0.7107282876968384, "learning_rate": 1.2680703305785677e-05, "loss": 1.6619, "step": 81796 }, { "epoch": 2.72, "grad_norm": 0.7148895263671875, "learning_rate": 1.2677696768434798e-05, "loss": 1.6565, "step": 81797 }, { "epoch": 2.72, "grad_norm": 0.7342962026596069, "learning_rate": 1.2674690579850833e-05, "loss": 1.7238, "step": 81798 }, { "epoch": 2.72, "grad_norm": 0.7099586129188538, "learning_rate": 1.2671684740037413e-05, "loss": 1.6931, "step": 81799 }, { "epoch": 2.72, "grad_norm": 0.7176194190979004, "learning_rate": 1.266867924899827e-05, "loss": 1.7382, "step": 81800 }, { "epoch": 2.72, "grad_norm": 0.6937597990036011, "learning_rate": 1.2665674106737e-05, "loss": 1.7074, "step": 81801 }, { "epoch": 2.72, "grad_norm": 0.6827062964439392, "learning_rate": 1.2662669313257268e-05, "loss": 1.6361, "step": 81802 }, { "epoch": 2.72, "grad_norm": 0.7018576264381409, "learning_rate": 1.2659664868562702e-05, "loss": 1.7257, "step": 81803 }, { "epoch": 2.72, "grad_norm": 0.7019554376602173, "learning_rate": 1.2656660772657001e-05, "loss": 1.6723, "step": 81804 }, { "epoch": 2.72, "grad_norm": 0.7095438241958618, "learning_rate": 1.2653657025543796e-05, "loss": 1.7412, "step": 81805 }, { "epoch": 2.72, "grad_norm": 0.702806830406189, "learning_rate": 1.2650653627226648e-05, "loss": 1.6769, "step": 81806 }, { "epoch": 2.72, "grad_norm": 0.7465811967849731, "learning_rate": 1.264765057770929e-05, "loss": 1.7482, "step": 81807 }, { "epoch": 2.72, "grad_norm": 0.6887005567550659, "learning_rate": 1.2644647876995351e-05, "loss": 1.5819, "step": 81808 }, { "epoch": 2.72, "grad_norm": 0.6885619163513184, "learning_rate": 1.2641645525088463e-05, "loss": 1.6548, "step": 81809 }, { "epoch": 2.72, "grad_norm": 0.7175865173339844, "learning_rate": 1.263864352199232e-05, "loss": 1.6846, "step": 81810 }, { "epoch": 2.72, "grad_norm": 0.6960795521736145, "learning_rate": 1.2635641867710455e-05, "loss": 1.7165, "step": 81811 }, { "epoch": 2.72, "grad_norm": 0.7147241830825806, "learning_rate": 1.2632640562246633e-05, "loss": 1.5936, "step": 81812 }, { "epoch": 2.72, "grad_norm": 0.7159518599510193, "learning_rate": 1.262963960560438e-05, "loss": 1.6556, "step": 81813 }, { "epoch": 2.72, "grad_norm": 0.7011225819587708, "learning_rate": 1.2626638997787464e-05, "loss": 1.6568, "step": 81814 }, { "epoch": 2.72, "grad_norm": 0.7228302359580994, "learning_rate": 1.2623638738799446e-05, "loss": 1.7183, "step": 81815 }, { "epoch": 2.72, "grad_norm": 0.6970664262771606, "learning_rate": 1.2620638828643992e-05, "loss": 1.6355, "step": 81816 }, { "epoch": 2.72, "grad_norm": 0.7409756183624268, "learning_rate": 1.2617639267324765e-05, "loss": 1.6906, "step": 81817 }, { "epoch": 2.72, "grad_norm": 0.708763599395752, "learning_rate": 1.2614640054845326e-05, "loss": 1.6356, "step": 81818 }, { "epoch": 2.72, "grad_norm": 0.7200813293457031, "learning_rate": 1.2611641191209409e-05, "loss": 1.6887, "step": 81819 }, { "epoch": 2.72, "grad_norm": 0.6996204853057861, "learning_rate": 1.2608642676420611e-05, "loss": 1.6728, "step": 81820 }, { "epoch": 2.72, "grad_norm": 0.7136008143424988, "learning_rate": 1.2605644510482526e-05, "loss": 1.6866, "step": 81821 }, { "epoch": 2.72, "grad_norm": 0.7104117274284363, "learning_rate": 1.2602646693398888e-05, "loss": 1.6716, "step": 81822 }, { "epoch": 2.72, "grad_norm": 0.7354167103767395, "learning_rate": 1.2599649225173292e-05, "loss": 1.7022, "step": 81823 }, { "epoch": 2.72, "grad_norm": 0.7367827296257019, "learning_rate": 1.2596652105809368e-05, "loss": 1.6487, "step": 81824 }, { "epoch": 2.72, "grad_norm": 0.7196339964866638, "learning_rate": 1.2593655335310748e-05, "loss": 1.6509, "step": 81825 }, { "epoch": 2.72, "grad_norm": 0.720594048500061, "learning_rate": 1.2590658913681128e-05, "loss": 1.7053, "step": 81826 }, { "epoch": 2.72, "grad_norm": 0.6989230513572693, "learning_rate": 1.2587662840924074e-05, "loss": 1.6958, "step": 81827 }, { "epoch": 2.72, "grad_norm": 0.6992643475532532, "learning_rate": 1.2584667117043212e-05, "loss": 1.6612, "step": 81828 }, { "epoch": 2.72, "grad_norm": 0.7052890062332153, "learning_rate": 1.2581671742042244e-05, "loss": 1.7071, "step": 81829 }, { "epoch": 2.72, "grad_norm": 0.7112985849380493, "learning_rate": 1.2578676715924796e-05, "loss": 1.6659, "step": 81830 }, { "epoch": 2.72, "grad_norm": 0.7368506193161011, "learning_rate": 1.2575682038694467e-05, "loss": 1.6697, "step": 81831 }, { "epoch": 2.72, "grad_norm": 0.7045310139656067, "learning_rate": 1.257268771035489e-05, "loss": 1.6497, "step": 81832 }, { "epoch": 2.72, "grad_norm": 0.7018845677375793, "learning_rate": 1.2569693730909758e-05, "loss": 1.6638, "step": 81833 }, { "epoch": 2.72, "grad_norm": 0.6991990804672241, "learning_rate": 1.256670010036267e-05, "loss": 1.6731, "step": 81834 }, { "epoch": 2.72, "grad_norm": 0.7203630805015564, "learning_rate": 1.2563706818717223e-05, "loss": 1.6739, "step": 81835 }, { "epoch": 2.72, "grad_norm": 0.7497185468673706, "learning_rate": 1.2560713885977081e-05, "loss": 1.6944, "step": 81836 }, { "epoch": 2.72, "grad_norm": 0.7316625118255615, "learning_rate": 1.2557721302145906e-05, "loss": 1.7383, "step": 81837 }, { "epoch": 2.72, "grad_norm": 0.7137629985809326, "learning_rate": 1.2554729067227331e-05, "loss": 1.6585, "step": 81838 }, { "epoch": 2.72, "grad_norm": 0.7010231614112854, "learning_rate": 1.2551737181224886e-05, "loss": 1.6477, "step": 81839 }, { "epoch": 2.72, "grad_norm": 0.7170184254646301, "learning_rate": 1.2548745644142333e-05, "loss": 1.6385, "step": 81840 }, { "epoch": 2.72, "grad_norm": 0.7079439759254456, "learning_rate": 1.254575445598327e-05, "loss": 1.6825, "step": 81841 }, { "epoch": 2.72, "grad_norm": 0.7297796607017517, "learning_rate": 1.2542763616751228e-05, "loss": 1.6738, "step": 81842 }, { "epoch": 2.72, "grad_norm": 0.7207063436508179, "learning_rate": 1.2539773126449936e-05, "loss": 1.6906, "step": 81843 }, { "epoch": 2.72, "grad_norm": 0.7075705528259277, "learning_rate": 1.253678298508306e-05, "loss": 1.599, "step": 81844 }, { "epoch": 2.72, "grad_norm": 0.6866161823272705, "learning_rate": 1.2533793192654162e-05, "loss": 1.6857, "step": 81845 }, { "epoch": 2.72, "grad_norm": 0.719936192035675, "learning_rate": 1.253080374916684e-05, "loss": 1.6064, "step": 81846 }, { "epoch": 2.72, "grad_norm": 0.7041593790054321, "learning_rate": 1.252781465462479e-05, "loss": 1.7023, "step": 81847 }, { "epoch": 2.72, "grad_norm": 0.75342857837677, "learning_rate": 1.2524825909031643e-05, "loss": 1.6536, "step": 81848 }, { "epoch": 2.72, "grad_norm": 0.6953057646751404, "learning_rate": 1.2521837512390964e-05, "loss": 1.7002, "step": 81849 }, { "epoch": 2.72, "grad_norm": 0.6827844977378845, "learning_rate": 1.2518849464706415e-05, "loss": 1.6143, "step": 81850 }, { "epoch": 2.72, "grad_norm": 0.7137852907180786, "learning_rate": 1.2515861765981628e-05, "loss": 1.7582, "step": 81851 }, { "epoch": 2.72, "grad_norm": 0.7115755081176758, "learning_rate": 1.2512874416220265e-05, "loss": 1.7524, "step": 81852 }, { "epoch": 2.72, "grad_norm": 0.7118335366249084, "learning_rate": 1.2509887415425857e-05, "loss": 1.7036, "step": 81853 }, { "epoch": 2.72, "grad_norm": 0.7156340479850769, "learning_rate": 1.2506900763602101e-05, "loss": 1.7209, "step": 81854 }, { "epoch": 2.72, "grad_norm": 0.719195544719696, "learning_rate": 1.2503914460752629e-05, "loss": 1.6384, "step": 81855 }, { "epoch": 2.72, "grad_norm": 0.7131487727165222, "learning_rate": 1.2500928506881035e-05, "loss": 1.6995, "step": 81856 }, { "epoch": 2.72, "grad_norm": 0.7201240062713623, "learning_rate": 1.2497942901990953e-05, "loss": 1.7191, "step": 81857 }, { "epoch": 2.72, "grad_norm": 0.7147544622421265, "learning_rate": 1.2494957646085979e-05, "loss": 1.6779, "step": 81858 }, { "epoch": 2.72, "grad_norm": 0.7331308722496033, "learning_rate": 1.2491972739169808e-05, "loss": 1.6815, "step": 81859 }, { "epoch": 2.72, "grad_norm": 0.6878417134284973, "learning_rate": 1.2488988181246007e-05, "loss": 1.7012, "step": 81860 }, { "epoch": 2.72, "grad_norm": 0.7251697182655334, "learning_rate": 1.248600397231817e-05, "loss": 1.7275, "step": 81861 }, { "epoch": 2.72, "grad_norm": 0.7083319425582886, "learning_rate": 1.248302011239003e-05, "loss": 1.6022, "step": 81862 }, { "epoch": 2.72, "grad_norm": 0.719474196434021, "learning_rate": 1.2480036601465081e-05, "loss": 1.6695, "step": 81863 }, { "epoch": 2.72, "grad_norm": 0.7121317386627197, "learning_rate": 1.2477053439547058e-05, "loss": 1.7306, "step": 81864 }, { "epoch": 2.72, "grad_norm": 0.7110018730163574, "learning_rate": 1.2474070626639454e-05, "loss": 1.7212, "step": 81865 }, { "epoch": 2.72, "grad_norm": 0.7279686331748962, "learning_rate": 1.2471088162746034e-05, "loss": 1.6917, "step": 81866 }, { "epoch": 2.72, "grad_norm": 0.6980712413787842, "learning_rate": 1.246810604787033e-05, "loss": 1.7013, "step": 81867 }, { "epoch": 2.72, "grad_norm": 0.6876541972160339, "learning_rate": 1.2465124282015936e-05, "loss": 1.688, "step": 81868 }, { "epoch": 2.72, "grad_norm": 0.693465530872345, "learning_rate": 1.2462142865186587e-05, "loss": 1.6893, "step": 81869 }, { "epoch": 2.72, "grad_norm": 0.7133254408836365, "learning_rate": 1.2459161797385808e-05, "loss": 1.7103, "step": 81870 }, { "epoch": 2.72, "grad_norm": 0.7058175802230835, "learning_rate": 1.24561810786172e-05, "loss": 1.6657, "step": 81871 }, { "epoch": 2.72, "grad_norm": 0.7125914692878723, "learning_rate": 1.2453200708884426e-05, "loss": 1.7406, "step": 81872 }, { "epoch": 2.72, "grad_norm": 0.6942898631095886, "learning_rate": 1.2450220688191115e-05, "loss": 1.6497, "step": 81873 }, { "epoch": 2.72, "grad_norm": 0.7233930230140686, "learning_rate": 1.2447241016540899e-05, "loss": 1.6762, "step": 81874 }, { "epoch": 2.72, "grad_norm": 0.7038915753364563, "learning_rate": 1.2444261693937307e-05, "loss": 1.6838, "step": 81875 }, { "epoch": 2.72, "grad_norm": 0.70931476354599, "learning_rate": 1.2441282720384039e-05, "loss": 1.6961, "step": 81876 }, { "epoch": 2.72, "grad_norm": 0.7128503322601318, "learning_rate": 1.2438304095884721e-05, "loss": 1.6349, "step": 81877 }, { "epoch": 2.72, "grad_norm": 0.7114449143409729, "learning_rate": 1.2435325820442854e-05, "loss": 1.689, "step": 81878 }, { "epoch": 2.72, "grad_norm": 0.7349588871002197, "learning_rate": 1.2432347894062166e-05, "loss": 1.6547, "step": 81879 }, { "epoch": 2.72, "grad_norm": 0.7118016481399536, "learning_rate": 1.242937031674629e-05, "loss": 1.6409, "step": 81880 }, { "epoch": 2.72, "grad_norm": 0.7095307111740112, "learning_rate": 1.242639308849872e-05, "loss": 1.6191, "step": 81881 }, { "epoch": 2.72, "grad_norm": 0.7055824995040894, "learning_rate": 1.2423416209323123e-05, "loss": 1.6865, "step": 81882 }, { "epoch": 2.72, "grad_norm": 0.7670521140098572, "learning_rate": 1.242043967922316e-05, "loss": 1.8152, "step": 81883 }, { "epoch": 2.72, "grad_norm": 0.7263044118881226, "learning_rate": 1.241746349820243e-05, "loss": 1.6184, "step": 81884 }, { "epoch": 2.72, "grad_norm": 0.7402501106262207, "learning_rate": 1.2414487666264494e-05, "loss": 1.7076, "step": 81885 }, { "epoch": 2.72, "grad_norm": 0.7136147618293762, "learning_rate": 1.2411512183412987e-05, "loss": 1.6746, "step": 81886 }, { "epoch": 2.72, "grad_norm": 0.6971220970153809, "learning_rate": 1.2408537049651602e-05, "loss": 1.6516, "step": 81887 }, { "epoch": 2.72, "grad_norm": 0.7222796082496643, "learning_rate": 1.2405562264983803e-05, "loss": 1.6618, "step": 81888 }, { "epoch": 2.72, "grad_norm": 0.6943075060844421, "learning_rate": 1.2402587829413257e-05, "loss": 1.6382, "step": 81889 }, { "epoch": 2.72, "grad_norm": 0.7076818346977234, "learning_rate": 1.239961374294366e-05, "loss": 1.5988, "step": 81890 }, { "epoch": 2.72, "grad_norm": 0.7127158641815186, "learning_rate": 1.2396640005578539e-05, "loss": 1.6267, "step": 81891 }, { "epoch": 2.72, "grad_norm": 0.724208652973175, "learning_rate": 1.2393666617321496e-05, "loss": 1.7425, "step": 81892 }, { "epoch": 2.72, "grad_norm": 0.7129928469657898, "learning_rate": 1.2390693578176159e-05, "loss": 1.7279, "step": 81893 }, { "epoch": 2.72, "grad_norm": 0.7041444778442383, "learning_rate": 1.2387720888146224e-05, "loss": 1.6994, "step": 81894 }, { "epoch": 2.72, "grad_norm": 0.7018443942070007, "learning_rate": 1.2384748547235158e-05, "loss": 1.6939, "step": 81895 }, { "epoch": 2.72, "grad_norm": 0.706519365310669, "learning_rate": 1.2381776555446588e-05, "loss": 1.6394, "step": 81896 }, { "epoch": 2.72, "grad_norm": 0.6931354999542236, "learning_rate": 1.2378804912784246e-05, "loss": 1.6196, "step": 81897 }, { "epoch": 2.72, "grad_norm": 0.6995078921318054, "learning_rate": 1.237583361925163e-05, "loss": 1.6317, "step": 81898 }, { "epoch": 2.72, "grad_norm": 0.7060505151748657, "learning_rate": 1.2372862674852336e-05, "loss": 1.5883, "step": 81899 }, { "epoch": 2.72, "grad_norm": 0.7061312198638916, "learning_rate": 1.2369892079590028e-05, "loss": 1.6287, "step": 81900 }, { "epoch": 2.72, "grad_norm": 0.7268617153167725, "learning_rate": 1.2366921833468302e-05, "loss": 1.6421, "step": 81901 }, { "epoch": 2.72, "grad_norm": 0.6991224884986877, "learning_rate": 1.2363951936490724e-05, "loss": 1.7905, "step": 81902 }, { "epoch": 2.72, "grad_norm": 0.6944264769554138, "learning_rate": 1.2360982388660922e-05, "loss": 1.6838, "step": 81903 }, { "epoch": 2.72, "grad_norm": 0.724195659160614, "learning_rate": 1.2358013189982563e-05, "loss": 1.7521, "step": 81904 }, { "epoch": 2.72, "grad_norm": 0.6937229633331299, "learning_rate": 1.2355044340459175e-05, "loss": 1.574, "step": 81905 }, { "epoch": 2.73, "grad_norm": 0.723720133304596, "learning_rate": 1.2352075840094355e-05, "loss": 1.725, "step": 81906 }, { "epoch": 2.73, "grad_norm": 0.7140806913375854, "learning_rate": 1.2349107688891734e-05, "loss": 1.6957, "step": 81907 }, { "epoch": 2.73, "grad_norm": 0.6913541555404663, "learning_rate": 1.2346139886854945e-05, "loss": 1.7288, "step": 81908 }, { "epoch": 2.73, "grad_norm": 0.7250869870185852, "learning_rate": 1.2343172433987514e-05, "loss": 1.681, "step": 81909 }, { "epoch": 2.73, "grad_norm": 0.720391035079956, "learning_rate": 1.234020533029314e-05, "loss": 1.6646, "step": 81910 }, { "epoch": 2.73, "grad_norm": 0.7142288684844971, "learning_rate": 1.2337238575775322e-05, "loss": 1.7465, "step": 81911 }, { "epoch": 2.73, "grad_norm": 0.6909528970718384, "learning_rate": 1.233427217043772e-05, "loss": 1.5888, "step": 81912 }, { "epoch": 2.73, "grad_norm": 0.7061525583267212, "learning_rate": 1.2331306114283935e-05, "loss": 1.624, "step": 81913 }, { "epoch": 2.73, "grad_norm": 0.7075058817863464, "learning_rate": 1.2328340407317561e-05, "loss": 1.6437, "step": 81914 }, { "epoch": 2.73, "grad_norm": 0.7881554961204529, "learning_rate": 1.232537504954223e-05, "loss": 1.6787, "step": 81915 }, { "epoch": 2.73, "grad_norm": 0.6958832144737244, "learning_rate": 1.2322410040961439e-05, "loss": 1.6629, "step": 81916 }, { "epoch": 2.73, "grad_norm": 0.704936683177948, "learning_rate": 1.2319445381578886e-05, "loss": 1.6645, "step": 81917 }, { "epoch": 2.73, "grad_norm": 0.7121163010597229, "learning_rate": 1.2316481071398133e-05, "loss": 1.7075, "step": 81918 }, { "epoch": 2.73, "grad_norm": 0.7508755326271057, "learning_rate": 1.231351711042281e-05, "loss": 1.7273, "step": 81919 }, { "epoch": 2.73, "grad_norm": 0.7012776732444763, "learning_rate": 1.231055349865645e-05, "loss": 1.8141, "step": 81920 }, { "epoch": 2.73, "grad_norm": 0.7111966609954834, "learning_rate": 1.2307590236102716e-05, "loss": 1.6797, "step": 81921 }, { "epoch": 2.73, "grad_norm": 0.7202221751213074, "learning_rate": 1.230462732276517e-05, "loss": 1.6367, "step": 81922 }, { "epoch": 2.73, "grad_norm": 0.7171947360038757, "learning_rate": 1.2301664758647412e-05, "loss": 1.7061, "step": 81923 }, { "epoch": 2.73, "grad_norm": 0.67403244972229, "learning_rate": 1.229870254375307e-05, "loss": 1.6814, "step": 81924 }, { "epoch": 2.73, "grad_norm": 0.7095504403114319, "learning_rate": 1.2295740678085642e-05, "loss": 1.6599, "step": 81925 }, { "epoch": 2.73, "grad_norm": 0.69858318567276, "learning_rate": 1.2292779161648858e-05, "loss": 1.5837, "step": 81926 }, { "epoch": 2.73, "grad_norm": 0.7157461047172546, "learning_rate": 1.2289817994446249e-05, "loss": 1.6606, "step": 81927 }, { "epoch": 2.73, "grad_norm": 0.7181286811828613, "learning_rate": 1.228685717648138e-05, "loss": 1.5833, "step": 81928 }, { "epoch": 2.73, "grad_norm": 0.7084728479385376, "learning_rate": 1.2283896707757912e-05, "loss": 1.5764, "step": 81929 }, { "epoch": 2.73, "grad_norm": 0.7050323486328125, "learning_rate": 1.2280936588279344e-05, "loss": 1.65, "step": 81930 }, { "epoch": 2.73, "grad_norm": 0.6954057216644287, "learning_rate": 1.2277976818049373e-05, "loss": 1.6827, "step": 81931 }, { "epoch": 2.73, "grad_norm": 0.7278310060501099, "learning_rate": 1.2275017397071495e-05, "loss": 1.6212, "step": 81932 }, { "epoch": 2.73, "grad_norm": 0.7361515760421753, "learning_rate": 1.227205832534941e-05, "loss": 1.6698, "step": 81933 }, { "epoch": 2.73, "grad_norm": 0.7032991647720337, "learning_rate": 1.2269099602886645e-05, "loss": 1.6277, "step": 81934 }, { "epoch": 2.73, "grad_norm": 0.7123354077339172, "learning_rate": 1.2266141229686733e-05, "loss": 1.6576, "step": 81935 }, { "epoch": 2.73, "grad_norm": 0.7096425294876099, "learning_rate": 1.2263183205753402e-05, "loss": 1.669, "step": 81936 }, { "epoch": 2.73, "grad_norm": 0.7090728878974915, "learning_rate": 1.2260225531090151e-05, "loss": 1.6698, "step": 81937 }, { "epoch": 2.73, "grad_norm": 0.7013704180717468, "learning_rate": 1.2257268205700577e-05, "loss": 1.6647, "step": 81938 }, { "epoch": 2.73, "grad_norm": 0.6843939423561096, "learning_rate": 1.2254311229588242e-05, "loss": 1.6951, "step": 81939 }, { "epoch": 2.73, "grad_norm": 0.7023709416389465, "learning_rate": 1.2251354602756846e-05, "loss": 1.7321, "step": 81940 }, { "epoch": 2.73, "grad_norm": 0.7298179864883423, "learning_rate": 1.2248398325209885e-05, "loss": 1.6839, "step": 81941 }, { "epoch": 2.73, "grad_norm": 0.7062684893608093, "learning_rate": 1.2245442396950955e-05, "loss": 1.6116, "step": 81942 }, { "epoch": 2.73, "grad_norm": 0.7279629707336426, "learning_rate": 1.2242486817983688e-05, "loss": 1.7567, "step": 81943 }, { "epoch": 2.73, "grad_norm": 0.7091472148895264, "learning_rate": 1.2239531588311646e-05, "loss": 1.6808, "step": 81944 }, { "epoch": 2.73, "grad_norm": 0.7004716396331787, "learning_rate": 1.2236576707938362e-05, "loss": 1.6763, "step": 81945 }, { "epoch": 2.73, "grad_norm": 0.7302420139312744, "learning_rate": 1.2233622176867497e-05, "loss": 1.6924, "step": 81946 }, { "epoch": 2.73, "grad_norm": 0.7231432795524597, "learning_rate": 1.2230667995102649e-05, "loss": 1.7713, "step": 81947 }, { "epoch": 2.73, "grad_norm": 0.6917702555656433, "learning_rate": 1.2227714162647351e-05, "loss": 1.6151, "step": 81948 }, { "epoch": 2.73, "grad_norm": 0.7052670121192932, "learning_rate": 1.2224760679505164e-05, "loss": 1.7048, "step": 81949 }, { "epoch": 2.73, "grad_norm": 0.7103185057640076, "learning_rate": 1.2221807545679785e-05, "loss": 1.7096, "step": 81950 }, { "epoch": 2.73, "grad_norm": 0.7188764810562134, "learning_rate": 1.2218854761174713e-05, "loss": 1.6581, "step": 81951 }, { "epoch": 2.73, "grad_norm": 0.6981440186500549, "learning_rate": 1.221590232599351e-05, "loss": 1.6581, "step": 81952 }, { "epoch": 2.73, "grad_norm": 0.7284741997718811, "learning_rate": 1.2212950240139775e-05, "loss": 1.742, "step": 81953 }, { "epoch": 2.73, "grad_norm": 0.7279936075210571, "learning_rate": 1.2209998503617203e-05, "loss": 1.5814, "step": 81954 }, { "epoch": 2.73, "grad_norm": 0.7109708786010742, "learning_rate": 1.2207047116429258e-05, "loss": 1.7051, "step": 81955 }, { "epoch": 2.73, "grad_norm": 0.7335292100906372, "learning_rate": 1.2204096078579505e-05, "loss": 1.6362, "step": 81956 }, { "epoch": 2.73, "grad_norm": 0.7054131031036377, "learning_rate": 1.2201145390071643e-05, "loss": 1.6804, "step": 81957 }, { "epoch": 2.73, "grad_norm": 0.7096855044364929, "learning_rate": 1.2198195050909164e-05, "loss": 1.673, "step": 81958 }, { "epoch": 2.73, "grad_norm": 0.7268815636634827, "learning_rate": 1.2195245061095637e-05, "loss": 1.6047, "step": 81959 }, { "epoch": 2.73, "grad_norm": 0.7117869853973389, "learning_rate": 1.2192295420634657e-05, "loss": 1.624, "step": 81960 }, { "epoch": 2.73, "grad_norm": 0.7032056450843811, "learning_rate": 1.2189346129529887e-05, "loss": 1.638, "step": 81961 }, { "epoch": 2.73, "grad_norm": 0.7092916965484619, "learning_rate": 1.2186397187784857e-05, "loss": 1.6672, "step": 81962 }, { "epoch": 2.73, "grad_norm": 0.7166005373001099, "learning_rate": 1.2183448595403068e-05, "loss": 1.68, "step": 81963 }, { "epoch": 2.73, "grad_norm": 0.7022194266319275, "learning_rate": 1.2180500352388211e-05, "loss": 1.7002, "step": 81964 }, { "epoch": 2.73, "grad_norm": 0.7103636860847473, "learning_rate": 1.2177552458743822e-05, "loss": 1.6265, "step": 81965 }, { "epoch": 2.73, "grad_norm": 0.709297776222229, "learning_rate": 1.2174604914473429e-05, "loss": 1.6885, "step": 81966 }, { "epoch": 2.73, "grad_norm": 0.7235194444656372, "learning_rate": 1.2171657719580696e-05, "loss": 1.6991, "step": 81967 }, { "epoch": 2.73, "grad_norm": 0.7227427959442139, "learning_rate": 1.216871087406912e-05, "loss": 1.6969, "step": 81968 }, { "epoch": 2.73, "grad_norm": 0.7164779305458069, "learning_rate": 1.2165764377942367e-05, "loss": 1.687, "step": 81969 }, { "epoch": 2.73, "grad_norm": 0.7173882126808167, "learning_rate": 1.216281823120393e-05, "loss": 1.7033, "step": 81970 }, { "epoch": 2.73, "grad_norm": 0.7107309103012085, "learning_rate": 1.2159872433857476e-05, "loss": 1.6975, "step": 81971 }, { "epoch": 2.73, "grad_norm": 0.7305331230163574, "learning_rate": 1.2156926985906501e-05, "loss": 1.6736, "step": 81972 }, { "epoch": 2.73, "grad_norm": 0.7157477736473083, "learning_rate": 1.2153981887354569e-05, "loss": 1.6804, "step": 81973 }, { "epoch": 2.73, "grad_norm": 0.7011017799377441, "learning_rate": 1.215103713820531e-05, "loss": 1.671, "step": 81974 }, { "epoch": 2.73, "grad_norm": 0.7220448851585388, "learning_rate": 1.2148092738462256e-05, "loss": 1.6809, "step": 81975 }, { "epoch": 2.73, "grad_norm": 0.7392998933792114, "learning_rate": 1.214514868812907e-05, "loss": 1.6768, "step": 81976 }, { "epoch": 2.73, "grad_norm": 0.7141863107681274, "learning_rate": 1.2142204987209214e-05, "loss": 1.6755, "step": 81977 }, { "epoch": 2.73, "grad_norm": 0.7193670272827148, "learning_rate": 1.213926163570632e-05, "loss": 1.6999, "step": 81978 }, { "epoch": 2.73, "grad_norm": 0.7200048565864563, "learning_rate": 1.2136318633623954e-05, "loss": 1.6678, "step": 81979 }, { "epoch": 2.73, "grad_norm": 0.733468234539032, "learning_rate": 1.2133375980965642e-05, "loss": 1.6472, "step": 81980 }, { "epoch": 2.73, "grad_norm": 0.7028181552886963, "learning_rate": 1.2130433677735053e-05, "loss": 1.6741, "step": 81981 }, { "epoch": 2.73, "grad_norm": 0.7179126739501953, "learning_rate": 1.2127491723935645e-05, "loss": 1.7079, "step": 81982 }, { "epoch": 2.73, "grad_norm": 0.7209044694900513, "learning_rate": 1.2124550119571086e-05, "loss": 1.6355, "step": 81983 }, { "epoch": 2.73, "grad_norm": 0.7532826662063599, "learning_rate": 1.2121608864644905e-05, "loss": 1.7041, "step": 81984 }, { "epoch": 2.73, "grad_norm": 0.6836878061294556, "learning_rate": 1.2118667959160666e-05, "loss": 1.6871, "step": 81985 }, { "epoch": 2.73, "grad_norm": 0.7256774306297302, "learning_rate": 1.2115727403121967e-05, "loss": 1.7523, "step": 81986 }, { "epoch": 2.73, "grad_norm": 0.7500278949737549, "learning_rate": 1.2112787196532337e-05, "loss": 1.7176, "step": 81987 }, { "epoch": 2.73, "grad_norm": 0.7531027793884277, "learning_rate": 1.210984733939534e-05, "loss": 1.7485, "step": 81988 }, { "epoch": 2.73, "grad_norm": 0.7163134813308716, "learning_rate": 1.2106907831714573e-05, "loss": 1.7134, "step": 81989 }, { "epoch": 2.73, "grad_norm": 0.700510561466217, "learning_rate": 1.2103968673493635e-05, "loss": 1.6373, "step": 81990 }, { "epoch": 2.73, "grad_norm": 0.7355533838272095, "learning_rate": 1.2101029864736055e-05, "loss": 1.6502, "step": 81991 }, { "epoch": 2.73, "grad_norm": 0.7042362689971924, "learning_rate": 1.2098091405445397e-05, "loss": 1.6697, "step": 81992 }, { "epoch": 2.73, "grad_norm": 0.7234311103820801, "learning_rate": 1.2095153295625226e-05, "loss": 1.7306, "step": 81993 }, { "epoch": 2.73, "grad_norm": 0.7387205362319946, "learning_rate": 1.2092215535279171e-05, "loss": 1.6845, "step": 81994 }, { "epoch": 2.73, "grad_norm": 0.6945239305496216, "learning_rate": 1.2089278124410662e-05, "loss": 1.6688, "step": 81995 }, { "epoch": 2.73, "grad_norm": 0.7088018655776978, "learning_rate": 1.2086341063023363e-05, "loss": 1.6502, "step": 81996 }, { "epoch": 2.73, "grad_norm": 0.7138792872428894, "learning_rate": 1.2083404351120906e-05, "loss": 1.681, "step": 81997 }, { "epoch": 2.73, "grad_norm": 0.7251269221305847, "learning_rate": 1.2080467988706687e-05, "loss": 1.6766, "step": 81998 }, { "epoch": 2.73, "grad_norm": 0.7395436763763428, "learning_rate": 1.2077531975784372e-05, "loss": 1.6623, "step": 81999 }, { "epoch": 2.73, "grad_norm": 0.6895344853401184, "learning_rate": 1.2074596312357554e-05, "loss": 1.6899, "step": 82000 }, { "epoch": 2.73, "grad_norm": 0.7134053707122803, "learning_rate": 1.2071660998429733e-05, "loss": 1.654, "step": 82001 }, { "epoch": 2.73, "grad_norm": 0.6971132159233093, "learning_rate": 1.2068726034004439e-05, "loss": 1.6737, "step": 82002 }, { "epoch": 2.73, "grad_norm": 0.6880781650543213, "learning_rate": 1.2065791419085303e-05, "loss": 1.6515, "step": 82003 }, { "epoch": 2.73, "grad_norm": 0.7112295031547546, "learning_rate": 1.2062857153675953e-05, "loss": 1.6837, "step": 82004 }, { "epoch": 2.73, "grad_norm": 0.7136564254760742, "learning_rate": 1.205992323777979e-05, "loss": 1.6724, "step": 82005 }, { "epoch": 2.73, "grad_norm": 0.7405492663383484, "learning_rate": 1.2056989671400474e-05, "loss": 1.6319, "step": 82006 }, { "epoch": 2.73, "grad_norm": 0.6944400072097778, "learning_rate": 1.2054056454541539e-05, "loss": 1.7119, "step": 82007 }, { "epoch": 2.73, "grad_norm": 0.7107188701629639, "learning_rate": 1.2051123587206578e-05, "loss": 1.7165, "step": 82008 }, { "epoch": 2.73, "grad_norm": 0.7104195356369019, "learning_rate": 1.2048191069399093e-05, "loss": 1.677, "step": 82009 }, { "epoch": 2.73, "grad_norm": 0.7076779007911682, "learning_rate": 1.2045258901122679e-05, "loss": 1.5972, "step": 82010 }, { "epoch": 2.73, "grad_norm": 0.7133904099464417, "learning_rate": 1.2042327082380965e-05, "loss": 1.6278, "step": 82011 }, { "epoch": 2.73, "grad_norm": 0.7002719044685364, "learning_rate": 1.2039395613177349e-05, "loss": 1.6318, "step": 82012 }, { "epoch": 2.73, "grad_norm": 0.7206438779830933, "learning_rate": 1.2036464493515497e-05, "loss": 1.7181, "step": 82013 }, { "epoch": 2.73, "grad_norm": 0.7392288446426392, "learning_rate": 1.2033533723398969e-05, "loss": 1.6165, "step": 82014 }, { "epoch": 2.73, "grad_norm": 0.686983048915863, "learning_rate": 1.2030603302831298e-05, "loss": 1.6362, "step": 82015 }, { "epoch": 2.73, "grad_norm": 0.6930215954780579, "learning_rate": 1.2027673231816015e-05, "loss": 1.6254, "step": 82016 }, { "epoch": 2.73, "grad_norm": 0.7116032242774963, "learning_rate": 1.2024743510356749e-05, "loss": 1.661, "step": 82017 }, { "epoch": 2.73, "grad_norm": 0.7392041683197021, "learning_rate": 1.2021814138456997e-05, "loss": 1.6809, "step": 82018 }, { "epoch": 2.73, "grad_norm": 0.7144317030906677, "learning_rate": 1.2018885116120324e-05, "loss": 1.6957, "step": 82019 }, { "epoch": 2.73, "grad_norm": 0.7096750736236572, "learning_rate": 1.2015956443350261e-05, "loss": 1.657, "step": 82020 }, { "epoch": 2.73, "grad_norm": 0.724664568901062, "learning_rate": 1.2013028120150436e-05, "loss": 1.7539, "step": 82021 }, { "epoch": 2.73, "grad_norm": 0.7440055012702942, "learning_rate": 1.2010100146524381e-05, "loss": 1.7749, "step": 82022 }, { "epoch": 2.73, "grad_norm": 0.727060854434967, "learning_rate": 1.200717252247556e-05, "loss": 1.6577, "step": 82023 }, { "epoch": 2.73, "grad_norm": 0.7164006233215332, "learning_rate": 1.2004245248007672e-05, "loss": 1.6911, "step": 82024 }, { "epoch": 2.73, "grad_norm": 0.7118579745292664, "learning_rate": 1.2001318323124176e-05, "loss": 1.6478, "step": 82025 }, { "epoch": 2.73, "grad_norm": 0.6858817934989929, "learning_rate": 1.1998391747828607e-05, "loss": 1.6289, "step": 82026 }, { "epoch": 2.73, "grad_norm": 0.70958012342453, "learning_rate": 1.1995465522124625e-05, "loss": 1.6132, "step": 82027 }, { "epoch": 2.73, "grad_norm": 0.7098762392997742, "learning_rate": 1.1992539646015631e-05, "loss": 1.6341, "step": 82028 }, { "epoch": 2.73, "grad_norm": 0.712840735912323, "learning_rate": 1.198961411950532e-05, "loss": 1.7085, "step": 82029 }, { "epoch": 2.73, "grad_norm": 0.7046733498573303, "learning_rate": 1.1986688942597123e-05, "loss": 1.5958, "step": 82030 }, { "epoch": 2.73, "grad_norm": 0.7054595947265625, "learning_rate": 1.1983764115294703e-05, "loss": 1.6669, "step": 82031 }, { "epoch": 2.73, "grad_norm": 0.6888191103935242, "learning_rate": 1.1980839637601558e-05, "loss": 1.6056, "step": 82032 }, { "epoch": 2.73, "grad_norm": 0.7281168103218079, "learning_rate": 1.1977915509521186e-05, "loss": 1.7356, "step": 82033 }, { "epoch": 2.73, "grad_norm": 0.6912223696708679, "learning_rate": 1.1974991731057249e-05, "loss": 1.6904, "step": 82034 }, { "epoch": 2.73, "grad_norm": 0.7411916255950928, "learning_rate": 1.1972068302213178e-05, "loss": 1.6664, "step": 82035 }, { "epoch": 2.73, "grad_norm": 0.705773115158081, "learning_rate": 1.1969145222992604e-05, "loss": 1.7577, "step": 82036 }, { "epoch": 2.73, "grad_norm": 0.7124653458595276, "learning_rate": 1.1966222493399058e-05, "loss": 1.691, "step": 82037 }, { "epoch": 2.73, "grad_norm": 0.7044853568077087, "learning_rate": 1.1963300113436036e-05, "loss": 1.6879, "step": 82038 }, { "epoch": 2.73, "grad_norm": 0.7036442160606384, "learning_rate": 1.196037808310717e-05, "loss": 1.7478, "step": 82039 }, { "epoch": 2.73, "grad_norm": 0.6980684995651245, "learning_rate": 1.1957456402415954e-05, "loss": 1.6463, "step": 82040 }, { "epoch": 2.73, "grad_norm": 0.7106429934501648, "learning_rate": 1.1954535071365956e-05, "loss": 1.6661, "step": 82041 }, { "epoch": 2.73, "grad_norm": 0.7164242267608643, "learning_rate": 1.195161408996067e-05, "loss": 1.582, "step": 82042 }, { "epoch": 2.73, "grad_norm": 0.7310867309570312, "learning_rate": 1.1948693458203729e-05, "loss": 1.6672, "step": 82043 }, { "epoch": 2.73, "grad_norm": 0.7174293398857117, "learning_rate": 1.1945773176098627e-05, "loss": 1.6812, "step": 82044 }, { "epoch": 2.73, "grad_norm": 0.7247087955474854, "learning_rate": 1.1942853243648865e-05, "loss": 1.7217, "step": 82045 }, { "epoch": 2.73, "grad_norm": 0.7221860885620117, "learning_rate": 1.193993366085807e-05, "loss": 1.6868, "step": 82046 }, { "epoch": 2.73, "grad_norm": 0.732515811920166, "learning_rate": 1.193701442772974e-05, "loss": 1.5667, "step": 82047 }, { "epoch": 2.73, "grad_norm": 0.7027862668037415, "learning_rate": 1.1934095544267442e-05, "loss": 1.7416, "step": 82048 }, { "epoch": 2.73, "grad_norm": 0.719573438167572, "learning_rate": 1.1931177010474701e-05, "loss": 1.7009, "step": 82049 }, { "epoch": 2.73, "grad_norm": 0.7241841554641724, "learning_rate": 1.1928258826355087e-05, "loss": 1.6829, "step": 82050 }, { "epoch": 2.73, "grad_norm": 0.698355495929718, "learning_rate": 1.1925340991912124e-05, "loss": 1.651, "step": 82051 }, { "epoch": 2.73, "grad_norm": 0.681524932384491, "learning_rate": 1.1922423507149314e-05, "loss": 1.7304, "step": 82052 }, { "epoch": 2.73, "grad_norm": 0.7260451316833496, "learning_rate": 1.1919506372070254e-05, "loss": 1.6977, "step": 82053 }, { "epoch": 2.73, "grad_norm": 0.7304725050926208, "learning_rate": 1.1916589586678504e-05, "loss": 1.6493, "step": 82054 }, { "epoch": 2.73, "grad_norm": 0.7391225695610046, "learning_rate": 1.1913673150977499e-05, "loss": 1.6392, "step": 82055 }, { "epoch": 2.73, "grad_norm": 0.7253739833831787, "learning_rate": 1.1910757064970866e-05, "loss": 1.6589, "step": 82056 }, { "epoch": 2.73, "grad_norm": 0.6744005084037781, "learning_rate": 1.1907841328662138e-05, "loss": 1.6605, "step": 82057 }, { "epoch": 2.73, "grad_norm": 0.7093930840492249, "learning_rate": 1.1904925942054877e-05, "loss": 1.7339, "step": 82058 }, { "epoch": 2.73, "grad_norm": 0.7221861481666565, "learning_rate": 1.1902010905152514e-05, "loss": 1.6675, "step": 82059 }, { "epoch": 2.73, "grad_norm": 0.7246690392494202, "learning_rate": 1.1899096217958715e-05, "loss": 1.5677, "step": 82060 }, { "epoch": 2.73, "grad_norm": 0.7306962013244629, "learning_rate": 1.1896181880476974e-05, "loss": 1.6197, "step": 82061 }, { "epoch": 2.73, "grad_norm": 0.712463915348053, "learning_rate": 1.1893267892710756e-05, "loss": 1.64, "step": 82062 }, { "epoch": 2.73, "grad_norm": 0.7217755913734436, "learning_rate": 1.1890354254663693e-05, "loss": 1.6784, "step": 82063 }, { "epoch": 2.73, "grad_norm": 0.7308401465415955, "learning_rate": 1.1887440966339312e-05, "loss": 1.6818, "step": 82064 }, { "epoch": 2.73, "grad_norm": 0.7081264853477478, "learning_rate": 1.1884528027741114e-05, "loss": 1.6199, "step": 82065 }, { "epoch": 2.73, "grad_norm": 0.732223629951477, "learning_rate": 1.1881615438872626e-05, "loss": 1.6573, "step": 82066 }, { "epoch": 2.73, "grad_norm": 0.7124749422073364, "learning_rate": 1.1878703199737416e-05, "loss": 1.645, "step": 82067 }, { "epoch": 2.73, "grad_norm": 0.6941561102867126, "learning_rate": 1.1875791310339044e-05, "loss": 1.7219, "step": 82068 }, { "epoch": 2.73, "grad_norm": 0.7385138273239136, "learning_rate": 1.1872879770680944e-05, "loss": 1.6993, "step": 82069 }, { "epoch": 2.73, "grad_norm": 0.7488790154457092, "learning_rate": 1.186996858076671e-05, "loss": 1.7181, "step": 82070 }, { "epoch": 2.73, "grad_norm": 0.7368285655975342, "learning_rate": 1.186705774059994e-05, "loss": 1.6858, "step": 82071 }, { "epoch": 2.73, "grad_norm": 0.7309742569923401, "learning_rate": 1.1864147250184097e-05, "loss": 1.7322, "step": 82072 }, { "epoch": 2.73, "grad_norm": 0.7320144176483154, "learning_rate": 1.186123710952268e-05, "loss": 1.732, "step": 82073 }, { "epoch": 2.73, "grad_norm": 0.7134476900100708, "learning_rate": 1.185832731861932e-05, "loss": 1.6927, "step": 82074 }, { "epoch": 2.73, "grad_norm": 0.7049770951271057, "learning_rate": 1.185541787747748e-05, "loss": 1.684, "step": 82075 }, { "epoch": 2.73, "grad_norm": 0.7026100158691406, "learning_rate": 1.185250878610069e-05, "loss": 1.6983, "step": 82076 }, { "epoch": 2.73, "grad_norm": 0.7137261033058167, "learning_rate": 1.184960004449248e-05, "loss": 1.7047, "step": 82077 }, { "epoch": 2.73, "grad_norm": 0.7137260437011719, "learning_rate": 1.1846691652656448e-05, "loss": 1.6336, "step": 82078 }, { "epoch": 2.73, "grad_norm": 0.7085143327713013, "learning_rate": 1.1843783610596058e-05, "loss": 1.729, "step": 82079 }, { "epoch": 2.73, "grad_norm": 0.7019668817520142, "learning_rate": 1.1840875918314807e-05, "loss": 1.6889, "step": 82080 }, { "epoch": 2.73, "grad_norm": 0.7226618528366089, "learning_rate": 1.1837968575816359e-05, "loss": 1.6465, "step": 82081 }, { "epoch": 2.73, "grad_norm": 0.719424307346344, "learning_rate": 1.1835061583104111e-05, "loss": 1.6211, "step": 82082 }, { "epoch": 2.73, "grad_norm": 0.7144792675971985, "learning_rate": 1.1832154940181625e-05, "loss": 1.6789, "step": 82083 }, { "epoch": 2.73, "grad_norm": 0.7260039448738098, "learning_rate": 1.182924864705247e-05, "loss": 1.648, "step": 82084 }, { "epoch": 2.73, "grad_norm": 0.7248185276985168, "learning_rate": 1.1826342703720138e-05, "loss": 1.674, "step": 82085 }, { "epoch": 2.73, "grad_norm": 0.7187897562980652, "learning_rate": 1.1823437110188194e-05, "loss": 1.7681, "step": 82086 }, { "epoch": 2.73, "grad_norm": 0.7199066877365112, "learning_rate": 1.1820531866460071e-05, "loss": 1.6072, "step": 82087 }, { "epoch": 2.73, "grad_norm": 0.6995817422866821, "learning_rate": 1.181762697253943e-05, "loss": 1.7257, "step": 82088 }, { "epoch": 2.73, "grad_norm": 0.7034227252006531, "learning_rate": 1.1814722428429701e-05, "loss": 1.6887, "step": 82089 }, { "epoch": 2.73, "grad_norm": 0.7156815528869629, "learning_rate": 1.1811818234134418e-05, "loss": 1.7247, "step": 82090 }, { "epoch": 2.73, "grad_norm": 0.7039971351623535, "learning_rate": 1.1808914389657143e-05, "loss": 1.7169, "step": 82091 }, { "epoch": 2.73, "grad_norm": 0.7334719896316528, "learning_rate": 1.1806010895001373e-05, "loss": 1.6883, "step": 82092 }, { "epoch": 2.73, "grad_norm": 0.739081621170044, "learning_rate": 1.180310775017067e-05, "loss": 1.7134, "step": 82093 }, { "epoch": 2.73, "grad_norm": 0.7220723032951355, "learning_rate": 1.1800204955168502e-05, "loss": 1.6265, "step": 82094 }, { "epoch": 2.73, "grad_norm": 0.7172329425811768, "learning_rate": 1.179730250999843e-05, "loss": 1.6772, "step": 82095 }, { "epoch": 2.73, "grad_norm": 0.704383134841919, "learning_rate": 1.1794400414663985e-05, "loss": 1.7295, "step": 82096 }, { "epoch": 2.73, "grad_norm": 0.6838303208351135, "learning_rate": 1.179149866916863e-05, "loss": 1.6411, "step": 82097 }, { "epoch": 2.73, "grad_norm": 0.7220924496650696, "learning_rate": 1.1788597273515966e-05, "loss": 1.6829, "step": 82098 }, { "epoch": 2.73, "grad_norm": 0.7132020592689514, "learning_rate": 1.1785696227709452e-05, "loss": 1.6789, "step": 82099 }, { "epoch": 2.73, "grad_norm": 0.7006452679634094, "learning_rate": 1.1782795531752654e-05, "loss": 1.6772, "step": 82100 }, { "epoch": 2.73, "grad_norm": 0.7046505212783813, "learning_rate": 1.1779895185649102e-05, "loss": 1.6493, "step": 82101 }, { "epoch": 2.73, "grad_norm": 0.7209896445274353, "learning_rate": 1.1776995189402227e-05, "loss": 1.7262, "step": 82102 }, { "epoch": 2.73, "grad_norm": 0.7176619172096252, "learning_rate": 1.1774095543015693e-05, "loss": 1.6365, "step": 82103 }, { "epoch": 2.73, "grad_norm": 0.7206060886383057, "learning_rate": 1.1771196246492897e-05, "loss": 1.7287, "step": 82104 }, { "epoch": 2.73, "grad_norm": 0.6899240612983704, "learning_rate": 1.176829729983737e-05, "loss": 1.6346, "step": 82105 }, { "epoch": 2.73, "grad_norm": 0.8085871338844299, "learning_rate": 1.1765398703052675e-05, "loss": 1.6204, "step": 82106 }, { "epoch": 2.73, "grad_norm": 0.722453236579895, "learning_rate": 1.1762500456142343e-05, "loss": 1.674, "step": 82107 }, { "epoch": 2.73, "grad_norm": 0.7156879901885986, "learning_rate": 1.1759602559109871e-05, "loss": 1.6781, "step": 82108 }, { "epoch": 2.73, "grad_norm": 0.7017031908035278, "learning_rate": 1.1756705011958722e-05, "loss": 1.6437, "step": 82109 }, { "epoch": 2.73, "grad_norm": 0.6891849637031555, "learning_rate": 1.1753807814692496e-05, "loss": 1.7136, "step": 82110 }, { "epoch": 2.73, "grad_norm": 0.7099735140800476, "learning_rate": 1.175091096731472e-05, "loss": 1.6839, "step": 82111 }, { "epoch": 2.73, "grad_norm": 0.704110860824585, "learning_rate": 1.1748014469828793e-05, "loss": 1.6959, "step": 82112 }, { "epoch": 2.73, "grad_norm": 0.7100145220756531, "learning_rate": 1.174511832223831e-05, "loss": 1.6874, "step": 82113 }, { "epoch": 2.73, "grad_norm": 0.7116429805755615, "learning_rate": 1.1742222524546841e-05, "loss": 1.659, "step": 82114 }, { "epoch": 2.73, "grad_norm": 0.7245537042617798, "learning_rate": 1.1739327076757777e-05, "loss": 1.7202, "step": 82115 }, { "epoch": 2.73, "grad_norm": 0.7134256958961487, "learning_rate": 1.1736431978874716e-05, "loss": 1.6868, "step": 82116 }, { "epoch": 2.73, "grad_norm": 0.7035961747169495, "learning_rate": 1.1733537230901158e-05, "loss": 1.6876, "step": 82117 }, { "epoch": 2.73, "grad_norm": 0.7132747173309326, "learning_rate": 1.1730642832840631e-05, "loss": 1.6775, "step": 82118 }, { "epoch": 2.73, "grad_norm": 0.7473023533821106, "learning_rate": 1.17277487846966e-05, "loss": 1.7065, "step": 82119 }, { "epoch": 2.73, "grad_norm": 0.7183083295822144, "learning_rate": 1.1724855086472596e-05, "loss": 1.609, "step": 82120 }, { "epoch": 2.73, "grad_norm": 0.7181563973426819, "learning_rate": 1.1721961738172214e-05, "loss": 1.6692, "step": 82121 }, { "epoch": 2.73, "grad_norm": 0.7178216576576233, "learning_rate": 1.171906873979882e-05, "loss": 1.6846, "step": 82122 }, { "epoch": 2.73, "grad_norm": 0.7072636485099792, "learning_rate": 1.171617609135601e-05, "loss": 1.6755, "step": 82123 }, { "epoch": 2.73, "grad_norm": 0.7077301144599915, "learning_rate": 1.1713283792847317e-05, "loss": 1.6777, "step": 82124 }, { "epoch": 2.73, "grad_norm": 0.7116419672966003, "learning_rate": 1.1710391844276202e-05, "loss": 1.6135, "step": 82125 }, { "epoch": 2.73, "grad_norm": 0.7013940215110779, "learning_rate": 1.1707500245646195e-05, "loss": 1.6963, "step": 82126 }, { "epoch": 2.73, "grad_norm": 0.7073549628257751, "learning_rate": 1.1704608996960796e-05, "loss": 1.6702, "step": 82127 }, { "epoch": 2.73, "grad_norm": 0.7026674747467041, "learning_rate": 1.1701718098223566e-05, "loss": 1.6298, "step": 82128 }, { "epoch": 2.73, "grad_norm": 0.7141727209091187, "learning_rate": 1.1698827549437905e-05, "loss": 1.6289, "step": 82129 }, { "epoch": 2.73, "grad_norm": 0.7434368133544922, "learning_rate": 1.169593735060741e-05, "loss": 1.724, "step": 82130 }, { "epoch": 2.73, "grad_norm": 0.7277019023895264, "learning_rate": 1.1693047501735609e-05, "loss": 1.6925, "step": 82131 }, { "epoch": 2.73, "grad_norm": 0.7179284691810608, "learning_rate": 1.1690158002825933e-05, "loss": 1.7399, "step": 82132 }, { "epoch": 2.73, "grad_norm": 0.7620972990989685, "learning_rate": 1.1687268853881915e-05, "loss": 1.6709, "step": 82133 }, { "epoch": 2.73, "grad_norm": 0.7153251767158508, "learning_rate": 1.1684380054907117e-05, "loss": 1.6258, "step": 82134 }, { "epoch": 2.73, "grad_norm": 0.7054387331008911, "learning_rate": 1.168149160590497e-05, "loss": 1.6551, "step": 82135 }, { "epoch": 2.73, "grad_norm": 0.7376587390899658, "learning_rate": 1.1678603506879003e-05, "loss": 1.6522, "step": 82136 }, { "epoch": 2.73, "grad_norm": 0.70676189661026, "learning_rate": 1.1675715757832716e-05, "loss": 1.7309, "step": 82137 }, { "epoch": 2.73, "grad_norm": 0.7143352627754211, "learning_rate": 1.1672828358769671e-05, "loss": 1.6919, "step": 82138 }, { "epoch": 2.73, "grad_norm": 0.7066428661346436, "learning_rate": 1.1669941309693298e-05, "loss": 1.6801, "step": 82139 }, { "epoch": 2.73, "grad_norm": 0.6970360279083252, "learning_rate": 1.1667054610607129e-05, "loss": 1.6761, "step": 82140 }, { "epoch": 2.73, "grad_norm": 0.7144829630851746, "learning_rate": 1.1664168261514695e-05, "loss": 1.6578, "step": 82141 }, { "epoch": 2.73, "grad_norm": 0.6876011490821838, "learning_rate": 1.1661282262419491e-05, "loss": 1.6566, "step": 82142 }, { "epoch": 2.73, "grad_norm": 0.7117294669151306, "learning_rate": 1.165839661332495e-05, "loss": 1.7048, "step": 82143 }, { "epoch": 2.73, "grad_norm": 0.7096543312072754, "learning_rate": 1.16555113142347e-05, "loss": 1.6988, "step": 82144 }, { "epoch": 2.73, "grad_norm": 0.7298645377159119, "learning_rate": 1.1652626365152107e-05, "loss": 1.7125, "step": 82145 }, { "epoch": 2.73, "grad_norm": 0.7271760106086731, "learning_rate": 1.1649741766080767e-05, "loss": 1.7489, "step": 82146 }, { "epoch": 2.73, "grad_norm": 0.7193060517311096, "learning_rate": 1.1646857517024145e-05, "loss": 1.6507, "step": 82147 }, { "epoch": 2.73, "grad_norm": 0.7501586079597473, "learning_rate": 1.1643973617985769e-05, "loss": 1.7677, "step": 82148 }, { "epoch": 2.73, "grad_norm": 0.7252967357635498, "learning_rate": 1.1641090068969138e-05, "loss": 1.7574, "step": 82149 }, { "epoch": 2.73, "grad_norm": 0.6917077302932739, "learning_rate": 1.1638206869977684e-05, "loss": 1.7141, "step": 82150 }, { "epoch": 2.73, "grad_norm": 0.7084376215934753, "learning_rate": 1.1635324021015002e-05, "loss": 1.6044, "step": 82151 }, { "epoch": 2.73, "grad_norm": 0.7000575065612793, "learning_rate": 1.1632441522084523e-05, "loss": 1.7208, "step": 82152 }, { "epoch": 2.73, "grad_norm": 0.7028736472129822, "learning_rate": 1.1629559373189778e-05, "loss": 1.7017, "step": 82153 }, { "epoch": 2.73, "grad_norm": 0.7210413217544556, "learning_rate": 1.1626677574334297e-05, "loss": 1.7346, "step": 82154 }, { "epoch": 2.73, "grad_norm": 0.6993879675865173, "learning_rate": 1.1623796125521478e-05, "loss": 1.6583, "step": 82155 }, { "epoch": 2.73, "grad_norm": 0.7129138708114624, "learning_rate": 1.1620915026754918e-05, "loss": 1.7463, "step": 82156 }, { "epoch": 2.73, "grad_norm": 0.7400820851325989, "learning_rate": 1.1618034278038046e-05, "loss": 1.6807, "step": 82157 }, { "epoch": 2.73, "grad_norm": 0.6879552602767944, "learning_rate": 1.1615153879374429e-05, "loss": 1.6367, "step": 82158 }, { "epoch": 2.73, "grad_norm": 0.7020901441574097, "learning_rate": 1.161227383076746e-05, "loss": 1.6511, "step": 82159 }, { "epoch": 2.73, "grad_norm": 0.7050716280937195, "learning_rate": 1.1609394132220774e-05, "loss": 1.6351, "step": 82160 }, { "epoch": 2.73, "grad_norm": 0.6844615340232849, "learning_rate": 1.1606514783737763e-05, "loss": 1.6994, "step": 82161 }, { "epoch": 2.73, "grad_norm": 0.726060152053833, "learning_rate": 1.1603635785321929e-05, "loss": 1.6613, "step": 82162 }, { "epoch": 2.73, "grad_norm": 0.6987067461013794, "learning_rate": 1.1600757136976801e-05, "loss": 1.6628, "step": 82163 }, { "epoch": 2.73, "grad_norm": 0.6989185810089111, "learning_rate": 1.1597878838705844e-05, "loss": 1.7282, "step": 82164 }, { "epoch": 2.73, "grad_norm": 0.7058964967727661, "learning_rate": 1.1595000890512617e-05, "loss": 1.7116, "step": 82165 }, { "epoch": 2.73, "grad_norm": 0.688812792301178, "learning_rate": 1.1592123292400523e-05, "loss": 1.6863, "step": 82166 }, { "epoch": 2.73, "grad_norm": 0.6934833526611328, "learning_rate": 1.1589246044373124e-05, "loss": 1.682, "step": 82167 }, { "epoch": 2.73, "grad_norm": 0.7118544578552246, "learning_rate": 1.1586369146433882e-05, "loss": 1.6843, "step": 82168 }, { "epoch": 2.73, "grad_norm": 0.773273229598999, "learning_rate": 1.1583492598586264e-05, "loss": 1.6501, "step": 82169 }, { "epoch": 2.73, "grad_norm": 0.7405320405960083, "learning_rate": 1.1580616400833798e-05, "loss": 1.6268, "step": 82170 }, { "epoch": 2.73, "grad_norm": 0.7182270884513855, "learning_rate": 1.1577740553180015e-05, "loss": 1.6843, "step": 82171 }, { "epoch": 2.73, "grad_norm": 0.7058188915252686, "learning_rate": 1.157486505562828e-05, "loss": 1.7477, "step": 82172 }, { "epoch": 2.73, "grad_norm": 0.7086682915687561, "learning_rate": 1.1571989908182189e-05, "loss": 1.7337, "step": 82173 }, { "epoch": 2.73, "grad_norm": 0.6918172240257263, "learning_rate": 1.1569115110845207e-05, "loss": 1.6452, "step": 82174 }, { "epoch": 2.73, "grad_norm": 0.7145711183547974, "learning_rate": 1.1566240663620863e-05, "loss": 1.6737, "step": 82175 }, { "epoch": 2.73, "grad_norm": 0.7254404425621033, "learning_rate": 1.1563366566512522e-05, "loss": 1.6819, "step": 82176 }, { "epoch": 2.73, "grad_norm": 0.6994901299476624, "learning_rate": 1.1560492819523848e-05, "loss": 1.6799, "step": 82177 }, { "epoch": 2.73, "grad_norm": 0.7391005158424377, "learning_rate": 1.1557619422658204e-05, "loss": 1.6718, "step": 82178 }, { "epoch": 2.73, "grad_norm": 0.6865063905715942, "learning_rate": 1.1554746375919055e-05, "loss": 1.6795, "step": 82179 }, { "epoch": 2.73, "grad_norm": 0.7073693871498108, "learning_rate": 1.1551873679309964e-05, "loss": 1.718, "step": 82180 }, { "epoch": 2.73, "grad_norm": 0.753145694732666, "learning_rate": 1.1549001332834429e-05, "loss": 1.5886, "step": 82181 }, { "epoch": 2.73, "grad_norm": 0.6996752023696899, "learning_rate": 1.1546129336495912e-05, "loss": 1.6606, "step": 82182 }, { "epoch": 2.73, "grad_norm": 0.7118844389915466, "learning_rate": 1.1543257690297846e-05, "loss": 1.6053, "step": 82183 }, { "epoch": 2.73, "grad_norm": 0.7129443287849426, "learning_rate": 1.1540386394243829e-05, "loss": 1.7235, "step": 82184 }, { "epoch": 2.73, "grad_norm": 0.6914939284324646, "learning_rate": 1.1537515448337253e-05, "loss": 1.6535, "step": 82185 }, { "epoch": 2.73, "grad_norm": 0.7223445177078247, "learning_rate": 1.1534644852581587e-05, "loss": 1.7501, "step": 82186 }, { "epoch": 2.73, "grad_norm": 0.714188814163208, "learning_rate": 1.1531774606980392e-05, "loss": 1.6724, "step": 82187 }, { "epoch": 2.73, "grad_norm": 0.7023149132728577, "learning_rate": 1.1528904711537135e-05, "loss": 1.627, "step": 82188 }, { "epoch": 2.73, "grad_norm": 0.7228162884712219, "learning_rate": 1.1526035166255276e-05, "loss": 1.6949, "step": 82189 }, { "epoch": 2.73, "grad_norm": 0.7393704056739807, "learning_rate": 1.1523165971138281e-05, "loss": 1.722, "step": 82190 }, { "epoch": 2.73, "grad_norm": 0.7084086537361145, "learning_rate": 1.1520297126189682e-05, "loss": 1.6842, "step": 82191 }, { "epoch": 2.73, "grad_norm": 0.6991215348243713, "learning_rate": 1.1517428631412939e-05, "loss": 1.6658, "step": 82192 }, { "epoch": 2.73, "grad_norm": 0.737441897392273, "learning_rate": 1.1514560486811485e-05, "loss": 1.7496, "step": 82193 }, { "epoch": 2.73, "grad_norm": 0.7341893911361694, "learning_rate": 1.1511692692388885e-05, "loss": 1.6741, "step": 82194 }, { "epoch": 2.73, "grad_norm": 0.719052255153656, "learning_rate": 1.1508825248148601e-05, "loss": 1.7778, "step": 82195 }, { "epoch": 2.73, "grad_norm": 0.7092620730400085, "learning_rate": 1.1505958154094097e-05, "loss": 1.6605, "step": 82196 }, { "epoch": 2.73, "grad_norm": 0.7475279569625854, "learning_rate": 1.1503091410228804e-05, "loss": 1.656, "step": 82197 }, { "epoch": 2.73, "grad_norm": 0.6944562792778015, "learning_rate": 1.1500225016556286e-05, "loss": 1.6557, "step": 82198 }, { "epoch": 2.73, "grad_norm": 0.7146786451339722, "learning_rate": 1.1497358973080006e-05, "loss": 1.6961, "step": 82199 }, { "epoch": 2.73, "grad_norm": 0.6984277963638306, "learning_rate": 1.1494493279803362e-05, "loss": 1.7092, "step": 82200 }, { "epoch": 2.73, "grad_norm": 0.748778760433197, "learning_rate": 1.1491627936729953e-05, "loss": 1.6399, "step": 82201 }, { "epoch": 2.73, "grad_norm": 0.719909131526947, "learning_rate": 1.1488762943863138e-05, "loss": 1.6482, "step": 82202 }, { "epoch": 2.73, "grad_norm": 0.7180408835411072, "learning_rate": 1.148589830120652e-05, "loss": 1.7007, "step": 82203 }, { "epoch": 2.73, "grad_norm": 0.709638237953186, "learning_rate": 1.1483034008763458e-05, "loss": 1.721, "step": 82204 }, { "epoch": 2.73, "grad_norm": 0.7429549694061279, "learning_rate": 1.148017006653752e-05, "loss": 1.7394, "step": 82205 }, { "epoch": 2.74, "grad_norm": 0.7237856984138489, "learning_rate": 1.1477306474532134e-05, "loss": 1.6855, "step": 82206 }, { "epoch": 2.74, "grad_norm": 0.7209152579307556, "learning_rate": 1.1474443232750763e-05, "loss": 1.6592, "step": 82207 }, { "epoch": 2.74, "grad_norm": 0.7329055666923523, "learning_rate": 1.147158034119694e-05, "loss": 1.6429, "step": 82208 }, { "epoch": 2.74, "grad_norm": 0.75640469789505, "learning_rate": 1.1468717799874062e-05, "loss": 1.7382, "step": 82209 }, { "epoch": 2.74, "grad_norm": 0.7252443432807922, "learning_rate": 1.146585560878569e-05, "loss": 1.7031, "step": 82210 }, { "epoch": 2.74, "grad_norm": 0.7320596575737, "learning_rate": 1.1462993767935258e-05, "loss": 1.7402, "step": 82211 }, { "epoch": 2.74, "grad_norm": 0.721127986907959, "learning_rate": 1.1460132277326195e-05, "loss": 1.6708, "step": 82212 }, { "epoch": 2.74, "grad_norm": 0.7484127879142761, "learning_rate": 1.1457271136962065e-05, "loss": 1.8144, "step": 82213 }, { "epoch": 2.74, "grad_norm": 0.7152003645896912, "learning_rate": 1.1454410346846266e-05, "loss": 1.699, "step": 82214 }, { "epoch": 2.74, "grad_norm": 0.7489587664604187, "learning_rate": 1.1451549906982293e-05, "loss": 1.7172, "step": 82215 }, { "epoch": 2.74, "grad_norm": 0.6919530034065247, "learning_rate": 1.1448689817373614e-05, "loss": 1.6896, "step": 82216 }, { "epoch": 2.74, "grad_norm": 0.7165239453315735, "learning_rate": 1.1445830078023754e-05, "loss": 1.6597, "step": 82217 }, { "epoch": 2.74, "grad_norm": 0.6991650462150574, "learning_rate": 1.1442970688936114e-05, "loss": 1.7403, "step": 82218 }, { "epoch": 2.74, "grad_norm": 0.7144403457641602, "learning_rate": 1.1440111650114192e-05, "loss": 1.7268, "step": 82219 }, { "epoch": 2.74, "grad_norm": 0.7157601714134216, "learning_rate": 1.143725296156145e-05, "loss": 1.6588, "step": 82220 }, { "epoch": 2.74, "grad_norm": 0.7059488892555237, "learning_rate": 1.1434394623281385e-05, "loss": 1.6669, "step": 82221 }, { "epoch": 2.74, "grad_norm": 0.6934062838554382, "learning_rate": 1.1431536635277427e-05, "loss": 1.6959, "step": 82222 }, { "epoch": 2.74, "grad_norm": 0.7211810946464539, "learning_rate": 1.1428678997553043e-05, "loss": 1.7316, "step": 82223 }, { "epoch": 2.74, "grad_norm": 0.701192319393158, "learning_rate": 1.142582171011176e-05, "loss": 1.6668, "step": 82224 }, { "epoch": 2.74, "grad_norm": 0.7263062000274658, "learning_rate": 1.1422964772957011e-05, "loss": 1.6854, "step": 82225 }, { "epoch": 2.74, "grad_norm": 0.7251162528991699, "learning_rate": 1.1420108186092225e-05, "loss": 1.7177, "step": 82226 }, { "epoch": 2.74, "grad_norm": 0.6969897747039795, "learning_rate": 1.1417251949520967e-05, "loss": 1.6694, "step": 82227 }, { "epoch": 2.74, "grad_norm": 0.7167349457740784, "learning_rate": 1.1414396063246633e-05, "loss": 1.6698, "step": 82228 }, { "epoch": 2.74, "grad_norm": 0.7206514477729797, "learning_rate": 1.1411540527272655e-05, "loss": 1.5782, "step": 82229 }, { "epoch": 2.74, "grad_norm": 0.7221387624740601, "learning_rate": 1.1408685341602564e-05, "loss": 1.6684, "step": 82230 }, { "epoch": 2.74, "grad_norm": 0.7430430054664612, "learning_rate": 1.1405830506239855e-05, "loss": 1.6208, "step": 82231 }, { "epoch": 2.74, "grad_norm": 0.7037765979766846, "learning_rate": 1.1402976021187892e-05, "loss": 1.651, "step": 82232 }, { "epoch": 2.74, "grad_norm": 0.7348407506942749, "learning_rate": 1.1400121886450175e-05, "loss": 1.7281, "step": 82233 }, { "epoch": 2.74, "grad_norm": 0.701692521572113, "learning_rate": 1.1397268102030232e-05, "loss": 1.7002, "step": 82234 }, { "epoch": 2.74, "grad_norm": 0.7352606654167175, "learning_rate": 1.1394414667931461e-05, "loss": 1.6571, "step": 82235 }, { "epoch": 2.74, "grad_norm": 0.7263497710227966, "learning_rate": 1.139156158415736e-05, "loss": 1.6746, "step": 82236 }, { "epoch": 2.74, "grad_norm": 0.6910141706466675, "learning_rate": 1.1388708850711325e-05, "loss": 1.6469, "step": 82237 }, { "epoch": 2.74, "grad_norm": 0.7064889073371887, "learning_rate": 1.1385856467596988e-05, "loss": 1.6376, "step": 82238 }, { "epoch": 2.74, "grad_norm": 0.7013117074966431, "learning_rate": 1.1383004434817611e-05, "loss": 1.6694, "step": 82239 }, { "epoch": 2.74, "grad_norm": 0.709098219871521, "learning_rate": 1.1380152752376726e-05, "loss": 1.6552, "step": 82240 }, { "epoch": 2.74, "grad_norm": 0.7049890160560608, "learning_rate": 1.1377301420277862e-05, "loss": 1.6338, "step": 82241 }, { "epoch": 2.74, "grad_norm": 0.7145071029663086, "learning_rate": 1.1374450438524418e-05, "loss": 1.6944, "step": 82242 }, { "epoch": 2.74, "grad_norm": 0.6932635307312012, "learning_rate": 1.1371599807119825e-05, "loss": 1.6498, "step": 82243 }, { "epoch": 2.74, "grad_norm": 0.7190847992897034, "learning_rate": 1.136874952606761e-05, "loss": 1.6786, "step": 82244 }, { "epoch": 2.74, "grad_norm": 0.6959930658340454, "learning_rate": 1.1365899595371242e-05, "loss": 1.6863, "step": 82245 }, { "epoch": 2.74, "grad_norm": 0.7105352878570557, "learning_rate": 1.136305001503408e-05, "loss": 1.678, "step": 82246 }, { "epoch": 2.74, "grad_norm": 0.7100654244422913, "learning_rate": 1.1360200785059658e-05, "loss": 1.6161, "step": 82247 }, { "epoch": 2.74, "grad_norm": 0.6944242715835571, "learning_rate": 1.1357351905451439e-05, "loss": 1.6862, "step": 82248 }, { "epoch": 2.74, "grad_norm": 0.7207146286964417, "learning_rate": 1.1354503376212886e-05, "loss": 1.7051, "step": 82249 }, { "epoch": 2.74, "grad_norm": 0.7073081731796265, "learning_rate": 1.1351655197347398e-05, "loss": 1.6667, "step": 82250 }, { "epoch": 2.74, "grad_norm": 0.7201866507530212, "learning_rate": 1.1348807368858503e-05, "loss": 1.6659, "step": 82251 }, { "epoch": 2.74, "grad_norm": 0.7081812024116516, "learning_rate": 1.1345959890749635e-05, "loss": 1.6735, "step": 82252 }, { "epoch": 2.74, "grad_norm": 0.6999332308769226, "learning_rate": 1.1343112763024187e-05, "loss": 1.7115, "step": 82253 }, { "epoch": 2.74, "grad_norm": 0.7391988635063171, "learning_rate": 1.1340265985685659e-05, "loss": 1.7162, "step": 82254 }, { "epoch": 2.74, "grad_norm": 0.7255681753158569, "learning_rate": 1.1337419558737582e-05, "loss": 1.7228, "step": 82255 }, { "epoch": 2.74, "grad_norm": 0.7399938702583313, "learning_rate": 1.1334573482183318e-05, "loss": 1.7317, "step": 82256 }, { "epoch": 2.74, "grad_norm": 0.7398452162742615, "learning_rate": 1.1331727756026332e-05, "loss": 1.7024, "step": 82257 }, { "epoch": 2.74, "grad_norm": 0.7199237942695618, "learning_rate": 1.132888238027012e-05, "loss": 1.6683, "step": 82258 }, { "epoch": 2.74, "grad_norm": 0.7102085947990417, "learning_rate": 1.1326037354918115e-05, "loss": 1.6544, "step": 82259 }, { "epoch": 2.74, "grad_norm": 0.7035306692123413, "learning_rate": 1.1323192679973747e-05, "loss": 1.6692, "step": 82260 }, { "epoch": 2.74, "grad_norm": 0.7460479736328125, "learning_rate": 1.1320348355440512e-05, "loss": 1.7145, "step": 82261 }, { "epoch": 2.74, "grad_norm": 0.7144778370857239, "learning_rate": 1.1317504381321807e-05, "loss": 1.6662, "step": 82262 }, { "epoch": 2.74, "grad_norm": 0.7450548410415649, "learning_rate": 1.131466075762113e-05, "loss": 1.6474, "step": 82263 }, { "epoch": 2.74, "grad_norm": 0.7286570072174072, "learning_rate": 1.1311817484341912e-05, "loss": 1.6606, "step": 82264 }, { "epoch": 2.74, "grad_norm": 0.7131518721580505, "learning_rate": 1.1308974561487616e-05, "loss": 1.6948, "step": 82265 }, { "epoch": 2.74, "grad_norm": 0.7086107730865479, "learning_rate": 1.130613198906174e-05, "loss": 1.6754, "step": 82266 }, { "epoch": 2.74, "grad_norm": 0.6884002089500427, "learning_rate": 1.1303289767067581e-05, "loss": 1.6872, "step": 82267 }, { "epoch": 2.74, "grad_norm": 0.7156806588172913, "learning_rate": 1.1300447895508768e-05, "loss": 1.6829, "step": 82268 }, { "epoch": 2.74, "grad_norm": 0.72737056016922, "learning_rate": 1.1297606374388635e-05, "loss": 1.6808, "step": 82269 }, { "epoch": 2.74, "grad_norm": 0.7167171835899353, "learning_rate": 1.1294765203710709e-05, "loss": 1.7362, "step": 82270 }, { "epoch": 2.74, "grad_norm": 0.7152106761932373, "learning_rate": 1.129192438347839e-05, "loss": 1.712, "step": 82271 }, { "epoch": 2.74, "grad_norm": 0.7237151265144348, "learning_rate": 1.1289083913695108e-05, "loss": 1.6811, "step": 82272 }, { "epoch": 2.74, "grad_norm": 0.7233254909515381, "learning_rate": 1.1286243794364359e-05, "loss": 1.7058, "step": 82273 }, { "epoch": 2.74, "grad_norm": 0.7379727959632874, "learning_rate": 1.128340402548954e-05, "loss": 1.5997, "step": 82274 }, { "epoch": 2.74, "grad_norm": 0.6962314248085022, "learning_rate": 1.128056460707415e-05, "loss": 1.6941, "step": 82275 }, { "epoch": 2.74, "grad_norm": 0.6899499893188477, "learning_rate": 1.1277725539121584e-05, "loss": 1.6704, "step": 82276 }, { "epoch": 2.74, "grad_norm": 0.7223153114318848, "learning_rate": 1.1274886821635375e-05, "loss": 1.6635, "step": 82277 }, { "epoch": 2.74, "grad_norm": 0.742056131362915, "learning_rate": 1.1272048454618888e-05, "loss": 1.6831, "step": 82278 }, { "epoch": 2.74, "grad_norm": 0.7116029858589172, "learning_rate": 1.1269210438075582e-05, "loss": 1.6808, "step": 82279 }, { "epoch": 2.74, "grad_norm": 0.720505952835083, "learning_rate": 1.1266372772008924e-05, "loss": 1.7391, "step": 82280 }, { "epoch": 2.74, "grad_norm": 0.7295902371406555, "learning_rate": 1.126353545642238e-05, "loss": 1.6081, "step": 82281 }, { "epoch": 2.74, "grad_norm": 0.7117864489555359, "learning_rate": 1.1260698491319276e-05, "loss": 1.7302, "step": 82282 }, { "epoch": 2.74, "grad_norm": 0.7091215252876282, "learning_rate": 1.1257861876703178e-05, "loss": 1.6914, "step": 82283 }, { "epoch": 2.74, "grad_norm": 0.718617856502533, "learning_rate": 1.1255025612577517e-05, "loss": 1.6784, "step": 82284 }, { "epoch": 2.74, "grad_norm": 0.7233757972717285, "learning_rate": 1.1252189698945691e-05, "loss": 1.6238, "step": 82285 }, { "epoch": 2.74, "grad_norm": 0.7235659956932068, "learning_rate": 1.1249354135811162e-05, "loss": 1.6554, "step": 82286 }, { "epoch": 2.74, "grad_norm": 0.7186170816421509, "learning_rate": 1.1246518923177395e-05, "loss": 1.6254, "step": 82287 }, { "epoch": 2.74, "grad_norm": 0.7213923931121826, "learning_rate": 1.1243684061047785e-05, "loss": 1.6065, "step": 82288 }, { "epoch": 2.74, "grad_norm": 0.7024203538894653, "learning_rate": 1.1240849549425768e-05, "loss": 1.7006, "step": 82289 }, { "epoch": 2.74, "grad_norm": 0.7268900275230408, "learning_rate": 1.1238015388314836e-05, "loss": 1.6801, "step": 82290 }, { "epoch": 2.74, "grad_norm": 0.7014166712760925, "learning_rate": 1.1235181577718422e-05, "loss": 1.7117, "step": 82291 }, { "epoch": 2.74, "grad_norm": 0.7014536261558533, "learning_rate": 1.1232348117639956e-05, "loss": 1.7408, "step": 82292 }, { "epoch": 2.74, "grad_norm": 0.7289854884147644, "learning_rate": 1.1229515008082835e-05, "loss": 1.655, "step": 82293 }, { "epoch": 2.74, "grad_norm": 0.7234644293785095, "learning_rate": 1.122668224905059e-05, "loss": 1.7301, "step": 82294 }, { "epoch": 2.74, "grad_norm": 0.6974050998687744, "learning_rate": 1.1223849840546584e-05, "loss": 1.6899, "step": 82295 }, { "epoch": 2.74, "grad_norm": 0.7504573464393616, "learning_rate": 1.1221017782574216e-05, "loss": 1.7073, "step": 82296 }, { "epoch": 2.74, "grad_norm": 0.7223731279373169, "learning_rate": 1.1218186075137015e-05, "loss": 1.648, "step": 82297 }, { "epoch": 2.74, "grad_norm": 0.7192420959472656, "learning_rate": 1.1215354718238411e-05, "loss": 1.7066, "step": 82298 }, { "epoch": 2.74, "grad_norm": 0.7079741954803467, "learning_rate": 1.1212523711881838e-05, "loss": 1.6788, "step": 82299 }, { "epoch": 2.74, "grad_norm": 0.6883735060691833, "learning_rate": 1.1209693056070657e-05, "loss": 1.6587, "step": 82300 }, { "epoch": 2.74, "grad_norm": 0.699835479259491, "learning_rate": 1.1206862750808365e-05, "loss": 1.6558, "step": 82301 }, { "epoch": 2.74, "grad_norm": 0.6957893967628479, "learning_rate": 1.1204032796098428e-05, "loss": 1.7067, "step": 82302 }, { "epoch": 2.74, "grad_norm": 0.7371204495429993, "learning_rate": 1.1201203191944209e-05, "loss": 1.6786, "step": 82303 }, { "epoch": 2.74, "grad_norm": 0.6989187598228455, "learning_rate": 1.119837393834917e-05, "loss": 1.7547, "step": 82304 }, { "epoch": 2.74, "grad_norm": 0.7008697986602783, "learning_rate": 1.1195545035316777e-05, "loss": 1.654, "step": 82305 }, { "epoch": 2.74, "grad_norm": 0.7045964598655701, "learning_rate": 1.1192716482850428e-05, "loss": 1.7406, "step": 82306 }, { "epoch": 2.74, "grad_norm": 0.7056741118431091, "learning_rate": 1.1189888280953552e-05, "loss": 1.7267, "step": 82307 }, { "epoch": 2.74, "grad_norm": 0.7113097310066223, "learning_rate": 1.1187060429629647e-05, "loss": 1.6832, "step": 82308 }, { "epoch": 2.74, "grad_norm": 0.7255023717880249, "learning_rate": 1.1184232928882076e-05, "loss": 1.75, "step": 82309 }, { "epoch": 2.74, "grad_norm": 0.6978044509887695, "learning_rate": 1.1181405778714236e-05, "loss": 1.6165, "step": 82310 }, { "epoch": 2.74, "grad_norm": 0.720728874206543, "learning_rate": 1.1178578979129626e-05, "loss": 1.6681, "step": 82311 }, { "epoch": 2.74, "grad_norm": 0.7240224480628967, "learning_rate": 1.1175752530131743e-05, "loss": 1.7548, "step": 82312 }, { "epoch": 2.74, "grad_norm": 0.7083302736282349, "learning_rate": 1.1172926431723883e-05, "loss": 1.7191, "step": 82313 }, { "epoch": 2.74, "grad_norm": 0.7024329900741577, "learning_rate": 1.1170100683909545e-05, "loss": 1.6293, "step": 82314 }, { "epoch": 2.74, "grad_norm": 0.7298576235771179, "learning_rate": 1.1167275286692157e-05, "loss": 1.6941, "step": 82315 }, { "epoch": 2.74, "grad_norm": 0.7261066436767578, "learning_rate": 1.1164450240075119e-05, "loss": 1.6208, "step": 82316 }, { "epoch": 2.74, "grad_norm": 0.7287434339523315, "learning_rate": 1.1161625544061891e-05, "loss": 1.6793, "step": 82317 }, { "epoch": 2.74, "grad_norm": 0.696158230304718, "learning_rate": 1.115880119865591e-05, "loss": 1.6433, "step": 82318 }, { "epoch": 2.74, "grad_norm": 0.711653470993042, "learning_rate": 1.1155977203860534e-05, "loss": 1.7587, "step": 82319 }, { "epoch": 2.74, "grad_norm": 0.7019923329353333, "learning_rate": 1.1153153559679295e-05, "loss": 1.6651, "step": 82320 }, { "epoch": 2.74, "grad_norm": 0.7091613411903381, "learning_rate": 1.1150330266115525e-05, "loss": 1.7183, "step": 82321 }, { "epoch": 2.74, "grad_norm": 0.7083298563957214, "learning_rate": 1.1147507323172723e-05, "loss": 1.6843, "step": 82322 }, { "epoch": 2.74, "grad_norm": 0.7081856727600098, "learning_rate": 1.114468473085428e-05, "loss": 1.6264, "step": 82323 }, { "epoch": 2.74, "grad_norm": 0.6905636787414551, "learning_rate": 1.1141862489163633e-05, "loss": 1.6327, "step": 82324 }, { "epoch": 2.74, "grad_norm": 0.7054995894432068, "learning_rate": 1.113904059810421e-05, "loss": 1.6734, "step": 82325 }, { "epoch": 2.74, "grad_norm": 0.725648045539856, "learning_rate": 1.1136219057679407e-05, "loss": 1.7338, "step": 82326 }, { "epoch": 2.74, "grad_norm": 0.7177512049674988, "learning_rate": 1.113339786789269e-05, "loss": 1.6711, "step": 82327 }, { "epoch": 2.74, "grad_norm": 0.703549325466156, "learning_rate": 1.1130577028747456e-05, "loss": 1.6829, "step": 82328 }, { "epoch": 2.74, "grad_norm": 0.7229103446006775, "learning_rate": 1.1127756540247134e-05, "loss": 1.67, "step": 82329 }, { "epoch": 2.74, "grad_norm": 0.6922315359115601, "learning_rate": 1.112493640239519e-05, "loss": 1.6966, "step": 82330 }, { "epoch": 2.74, "grad_norm": 0.7273510694503784, "learning_rate": 1.1122116615194954e-05, "loss": 1.7091, "step": 82331 }, { "epoch": 2.74, "grad_norm": 0.7163039445877075, "learning_rate": 1.1119297178649922e-05, "loss": 1.6219, "step": 82332 }, { "epoch": 2.74, "grad_norm": 0.7049153447151184, "learning_rate": 1.1116478092763492e-05, "loss": 1.5927, "step": 82333 }, { "epoch": 2.74, "grad_norm": 0.7100850939750671, "learning_rate": 1.1113659357539129e-05, "loss": 1.6597, "step": 82334 }, { "epoch": 2.74, "grad_norm": 0.728833019733429, "learning_rate": 1.1110840972980228e-05, "loss": 1.6688, "step": 82335 }, { "epoch": 2.74, "grad_norm": 0.7310912013053894, "learning_rate": 1.1108022939090122e-05, "loss": 1.7503, "step": 82336 }, { "epoch": 2.74, "grad_norm": 0.7734801769256592, "learning_rate": 1.1105205255872374e-05, "loss": 1.624, "step": 82337 }, { "epoch": 2.74, "grad_norm": 0.7256176471710205, "learning_rate": 1.1102387923330346e-05, "loss": 1.716, "step": 82338 }, { "epoch": 2.74, "grad_norm": 0.6955880522727966, "learning_rate": 1.1099570941467406e-05, "loss": 1.6772, "step": 82339 }, { "epoch": 2.74, "grad_norm": 0.7027193307876587, "learning_rate": 1.1096754310287014e-05, "loss": 1.6591, "step": 82340 }, { "epoch": 2.74, "grad_norm": 0.7219340801239014, "learning_rate": 1.1093938029792637e-05, "loss": 1.7223, "step": 82341 }, { "epoch": 2.74, "grad_norm": 0.6939809322357178, "learning_rate": 1.109112209998767e-05, "loss": 1.6786, "step": 82342 }, { "epoch": 2.74, "grad_norm": 0.7295587658882141, "learning_rate": 1.1088306520875445e-05, "loss": 1.6593, "step": 82343 }, { "epoch": 2.74, "grad_norm": 0.7288696765899658, "learning_rate": 1.1085491292459492e-05, "loss": 1.6494, "step": 82344 }, { "epoch": 2.74, "grad_norm": 0.7216634750366211, "learning_rate": 1.1082676414743209e-05, "loss": 1.674, "step": 82345 }, { "epoch": 2.74, "grad_norm": 0.7197889089584351, "learning_rate": 1.1079861887729925e-05, "loss": 1.6964, "step": 82346 }, { "epoch": 2.74, "grad_norm": 0.699247419834137, "learning_rate": 1.1077047711423104e-05, "loss": 1.6607, "step": 82347 }, { "epoch": 2.74, "grad_norm": 0.7282040119171143, "learning_rate": 1.1074233885826279e-05, "loss": 1.7194, "step": 82348 }, { "epoch": 2.74, "grad_norm": 0.7153849601745605, "learning_rate": 1.1071420410942678e-05, "loss": 1.6489, "step": 82349 }, { "epoch": 2.74, "grad_norm": 0.7244434356689453, "learning_rate": 1.10686072867758e-05, "loss": 1.715, "step": 82350 }, { "epoch": 2.74, "grad_norm": 0.7062875032424927, "learning_rate": 1.1065794513329107e-05, "loss": 1.7404, "step": 82351 }, { "epoch": 2.74, "grad_norm": 0.7052037119865417, "learning_rate": 1.1062982090605932e-05, "loss": 1.6259, "step": 82352 }, { "epoch": 2.74, "grad_norm": 0.712949812412262, "learning_rate": 1.1060170018609738e-05, "loss": 1.6818, "step": 82353 }, { "epoch": 2.74, "grad_norm": 0.7142207622528076, "learning_rate": 1.1057358297343888e-05, "loss": 1.6557, "step": 82354 }, { "epoch": 2.74, "grad_norm": 0.7114492654800415, "learning_rate": 1.1054546926811914e-05, "loss": 1.6677, "step": 82355 }, { "epoch": 2.74, "grad_norm": 0.7086694836616516, "learning_rate": 1.1051735907017078e-05, "loss": 1.6581, "step": 82356 }, { "epoch": 2.74, "grad_norm": 0.709244966506958, "learning_rate": 1.1048925237962846e-05, "loss": 1.6428, "step": 82357 }, { "epoch": 2.74, "grad_norm": 0.7025864720344543, "learning_rate": 1.1046114919652715e-05, "loss": 1.7007, "step": 82358 }, { "epoch": 2.74, "grad_norm": 0.7094665169715881, "learning_rate": 1.1043304952089982e-05, "loss": 1.6207, "step": 82359 }, { "epoch": 2.74, "grad_norm": 0.684469997882843, "learning_rate": 1.1040495335278077e-05, "loss": 1.6281, "step": 82360 }, { "epoch": 2.74, "grad_norm": 0.7160096168518066, "learning_rate": 1.1037686069220464e-05, "loss": 1.726, "step": 82361 }, { "epoch": 2.74, "grad_norm": 0.6987568140029907, "learning_rate": 1.1034877153920574e-05, "loss": 1.6574, "step": 82362 }, { "epoch": 2.74, "grad_norm": 0.8273972868919373, "learning_rate": 1.103206858938167e-05, "loss": 1.7391, "step": 82363 }, { "epoch": 2.74, "grad_norm": 0.7167109847068787, "learning_rate": 1.1029260375607286e-05, "loss": 1.6036, "step": 82364 }, { "epoch": 2.74, "grad_norm": 0.7462418675422668, "learning_rate": 1.1026452512600847e-05, "loss": 1.722, "step": 82365 }, { "epoch": 2.74, "grad_norm": 0.7122361063957214, "learning_rate": 1.1023645000365688e-05, "loss": 1.6314, "step": 82366 }, { "epoch": 2.74, "grad_norm": 0.7134170532226562, "learning_rate": 1.1020837838905238e-05, "loss": 1.636, "step": 82367 }, { "epoch": 2.74, "grad_norm": 0.709915280342102, "learning_rate": 1.101803102822293e-05, "loss": 1.7353, "step": 82368 }, { "epoch": 2.74, "grad_norm": 0.7138420343399048, "learning_rate": 1.1015224568322156e-05, "loss": 1.6545, "step": 82369 }, { "epoch": 2.74, "grad_norm": 0.7048940062522888, "learning_rate": 1.1012418459206286e-05, "loss": 1.6503, "step": 82370 }, { "epoch": 2.74, "grad_norm": 0.7204194664955139, "learning_rate": 1.1009612700878745e-05, "loss": 1.6766, "step": 82371 }, { "epoch": 2.74, "grad_norm": 0.7158744931221008, "learning_rate": 1.1006807293343e-05, "loss": 1.6385, "step": 82372 }, { "epoch": 2.74, "grad_norm": 0.7264469265937805, "learning_rate": 1.1004002236602417e-05, "loss": 1.6551, "step": 82373 }, { "epoch": 2.74, "grad_norm": 0.6956855654716492, "learning_rate": 1.1001197530660355e-05, "loss": 1.6821, "step": 82374 }, { "epoch": 2.74, "grad_norm": 0.7000188827514648, "learning_rate": 1.099839317552028e-05, "loss": 1.5628, "step": 82375 }, { "epoch": 2.74, "grad_norm": 0.7288768291473389, "learning_rate": 1.0995589171185593e-05, "loss": 1.6666, "step": 82376 }, { "epoch": 2.74, "grad_norm": 0.6866192817687988, "learning_rate": 1.0992785517659618e-05, "loss": 1.5859, "step": 82377 }, { "epoch": 2.74, "grad_norm": 0.7387052178382874, "learning_rate": 1.0989982214945892e-05, "loss": 1.7285, "step": 82378 }, { "epoch": 2.74, "grad_norm": 0.739737331867218, "learning_rate": 1.0987179263047674e-05, "loss": 1.6868, "step": 82379 }, { "epoch": 2.74, "grad_norm": 0.7092388272285461, "learning_rate": 1.0984376661968497e-05, "loss": 1.7042, "step": 82380 }, { "epoch": 2.74, "grad_norm": 0.7228677272796631, "learning_rate": 1.0981574411711625e-05, "loss": 1.6111, "step": 82381 }, { "epoch": 2.74, "grad_norm": 0.7023084163665771, "learning_rate": 1.097877251228062e-05, "loss": 1.6298, "step": 82382 }, { "epoch": 2.74, "grad_norm": 0.7209547758102417, "learning_rate": 1.0975970963678782e-05, "loss": 1.675, "step": 82383 }, { "epoch": 2.74, "grad_norm": 0.7163342833518982, "learning_rate": 1.0973169765909473e-05, "loss": 1.6363, "step": 82384 }, { "epoch": 2.74, "grad_norm": 0.7164939641952515, "learning_rate": 1.0970368918976224e-05, "loss": 1.6848, "step": 82385 }, { "epoch": 2.74, "grad_norm": 0.7055010199546814, "learning_rate": 1.09675684228823e-05, "loss": 1.6388, "step": 82386 }, { "epoch": 2.74, "grad_norm": 0.7012441754341125, "learning_rate": 1.0964768277631198e-05, "loss": 1.6327, "step": 82387 }, { "epoch": 2.74, "grad_norm": 0.7019056081771851, "learning_rate": 1.096196848322628e-05, "loss": 1.6055, "step": 82388 }, { "epoch": 2.74, "grad_norm": 0.6956074833869934, "learning_rate": 1.0959169039670912e-05, "loss": 1.7029, "step": 82389 }, { "epoch": 2.74, "grad_norm": 0.7079057693481445, "learning_rate": 1.0956369946968557e-05, "loss": 1.6418, "step": 82390 }, { "epoch": 2.74, "grad_norm": 0.718774139881134, "learning_rate": 1.0953571205122547e-05, "loss": 1.6903, "step": 82391 }, { "epoch": 2.74, "grad_norm": 0.7125587463378906, "learning_rate": 1.0950772814136344e-05, "loss": 1.7517, "step": 82392 }, { "epoch": 2.74, "grad_norm": 0.7002359628677368, "learning_rate": 1.0947974774013313e-05, "loss": 1.6243, "step": 82393 }, { "epoch": 2.74, "grad_norm": 0.7002948522567749, "learning_rate": 1.0945177084756851e-05, "loss": 1.646, "step": 82394 }, { "epoch": 2.74, "grad_norm": 0.7056003212928772, "learning_rate": 1.0942379746370355e-05, "loss": 1.6403, "step": 82395 }, { "epoch": 2.74, "grad_norm": 0.7156461477279663, "learning_rate": 1.093958275885719e-05, "loss": 1.7675, "step": 82396 }, { "epoch": 2.74, "grad_norm": 0.7032772302627563, "learning_rate": 1.093678612222082e-05, "loss": 1.6711, "step": 82397 }, { "epoch": 2.74, "grad_norm": 0.7266650795936584, "learning_rate": 1.0933989836464574e-05, "loss": 1.627, "step": 82398 }, { "epoch": 2.74, "grad_norm": 0.7140315771102905, "learning_rate": 1.0931193901591883e-05, "loss": 1.714, "step": 82399 }, { "epoch": 2.74, "grad_norm": 0.715331494808197, "learning_rate": 1.092839831760608e-05, "loss": 1.6942, "step": 82400 }, { "epoch": 2.74, "grad_norm": 0.7324265837669373, "learning_rate": 1.0925603084510692e-05, "loss": 1.6749, "step": 82401 }, { "epoch": 2.74, "grad_norm": 0.7192320823669434, "learning_rate": 1.0922808202308986e-05, "loss": 1.6604, "step": 82402 }, { "epoch": 2.74, "grad_norm": 0.7059305906295776, "learning_rate": 1.092001367100439e-05, "loss": 1.6499, "step": 82403 }, { "epoch": 2.74, "grad_norm": 0.7145910859107971, "learning_rate": 1.0917219490600338e-05, "loss": 1.6331, "step": 82404 }, { "epoch": 2.74, "grad_norm": 0.7024418711662292, "learning_rate": 1.0914425661100157e-05, "loss": 1.7161, "step": 82405 }, { "epoch": 2.74, "grad_norm": 0.7730188369750977, "learning_rate": 1.0911632182507279e-05, "loss": 1.7909, "step": 82406 }, { "epoch": 2.74, "grad_norm": 0.7346175312995911, "learning_rate": 1.0908839054825036e-05, "loss": 1.7631, "step": 82407 }, { "epoch": 2.74, "grad_norm": 0.7121491432189941, "learning_rate": 1.0906046278056957e-05, "loss": 1.6587, "step": 82408 }, { "epoch": 2.74, "grad_norm": 0.7182032465934753, "learning_rate": 1.0903253852206307e-05, "loss": 1.6493, "step": 82409 }, { "epoch": 2.74, "grad_norm": 0.7244793176651001, "learning_rate": 1.090046177727648e-05, "loss": 1.6877, "step": 82410 }, { "epoch": 2.74, "grad_norm": 0.7167252898216248, "learning_rate": 1.0897670053270912e-05, "loss": 1.7193, "step": 82411 }, { "epoch": 2.74, "grad_norm": 0.7354122996330261, "learning_rate": 1.0894878680192998e-05, "loss": 1.6885, "step": 82412 }, { "epoch": 2.74, "grad_norm": 0.6953954100608826, "learning_rate": 1.0892087658046068e-05, "loss": 1.7065, "step": 82413 }, { "epoch": 2.74, "grad_norm": 0.7206484079360962, "learning_rate": 1.0889296986833552e-05, "loss": 1.6573, "step": 82414 }, { "epoch": 2.74, "grad_norm": 0.7156959772109985, "learning_rate": 1.0886506666558847e-05, "loss": 1.6844, "step": 82415 }, { "epoch": 2.74, "grad_norm": 0.728554904460907, "learning_rate": 1.0883716697225353e-05, "loss": 1.5517, "step": 82416 }, { "epoch": 2.74, "grad_norm": 0.7159662842750549, "learning_rate": 1.0880927078836365e-05, "loss": 1.6982, "step": 82417 }, { "epoch": 2.74, "grad_norm": 0.6961718797683716, "learning_rate": 1.0878137811395382e-05, "loss": 1.6367, "step": 82418 }, { "epoch": 2.74, "grad_norm": 0.7184710502624512, "learning_rate": 1.0875348894905734e-05, "loss": 1.6758, "step": 82419 }, { "epoch": 2.74, "grad_norm": 0.7084915637969971, "learning_rate": 1.0872560329370749e-05, "loss": 1.7237, "step": 82420 }, { "epoch": 2.74, "grad_norm": 0.7106829285621643, "learning_rate": 1.0869772114793929e-05, "loss": 1.7184, "step": 82421 }, { "epoch": 2.74, "grad_norm": 0.7003435492515564, "learning_rate": 1.08669842511786e-05, "loss": 1.6961, "step": 82422 }, { "epoch": 2.74, "grad_norm": 0.6944321393966675, "learning_rate": 1.0864196738528163e-05, "loss": 1.6797, "step": 82423 }, { "epoch": 2.74, "grad_norm": 0.7159402370452881, "learning_rate": 1.0861409576845948e-05, "loss": 1.712, "step": 82424 }, { "epoch": 2.74, "grad_norm": 0.7130578756332397, "learning_rate": 1.0858622766135417e-05, "loss": 1.6649, "step": 82425 }, { "epoch": 2.74, "grad_norm": 0.7056471109390259, "learning_rate": 1.0855836306399934e-05, "loss": 1.768, "step": 82426 }, { "epoch": 2.74, "grad_norm": 0.7045484781265259, "learning_rate": 1.0853050197642798e-05, "loss": 1.6599, "step": 82427 }, { "epoch": 2.74, "grad_norm": 0.6992128491401672, "learning_rate": 1.0850264439867474e-05, "loss": 1.6479, "step": 82428 }, { "epoch": 2.74, "grad_norm": 0.6790398359298706, "learning_rate": 1.0847479033077355e-05, "loss": 1.6632, "step": 82429 }, { "epoch": 2.74, "grad_norm": 0.7234674096107483, "learning_rate": 1.0844693977275776e-05, "loss": 1.7594, "step": 82430 }, { "epoch": 2.74, "grad_norm": 0.7183206081390381, "learning_rate": 1.0841909272466132e-05, "loss": 1.6477, "step": 82431 }, { "epoch": 2.74, "grad_norm": 0.7164418697357178, "learning_rate": 1.083912491865182e-05, "loss": 1.6586, "step": 82432 }, { "epoch": 2.74, "grad_norm": 0.690773069858551, "learning_rate": 1.0836340915836206e-05, "loss": 1.7153, "step": 82433 }, { "epoch": 2.74, "grad_norm": 0.7029968500137329, "learning_rate": 1.0833557264022619e-05, "loss": 1.6984, "step": 82434 }, { "epoch": 2.74, "grad_norm": 0.7158634066581726, "learning_rate": 1.0830773963214523e-05, "loss": 1.6346, "step": 82435 }, { "epoch": 2.74, "grad_norm": 0.6946604251861572, "learning_rate": 1.082799101341525e-05, "loss": 1.6424, "step": 82436 }, { "epoch": 2.74, "grad_norm": 0.6968541741371155, "learning_rate": 1.0825208414628228e-05, "loss": 1.675, "step": 82437 }, { "epoch": 2.74, "grad_norm": 0.7151674628257751, "learning_rate": 1.0822426166856723e-05, "loss": 1.6912, "step": 82438 }, { "epoch": 2.74, "grad_norm": 0.7148047089576721, "learning_rate": 1.0819644270104234e-05, "loss": 1.7023, "step": 82439 }, { "epoch": 2.74, "grad_norm": 0.685359001159668, "learning_rate": 1.0816862724374087e-05, "loss": 1.7378, "step": 82440 }, { "epoch": 2.74, "grad_norm": 0.7268704771995544, "learning_rate": 1.081408152966965e-05, "loss": 1.7153, "step": 82441 }, { "epoch": 2.74, "grad_norm": 0.7618588805198669, "learning_rate": 1.0811300685994318e-05, "loss": 1.6416, "step": 82442 }, { "epoch": 2.74, "grad_norm": 0.7095742225646973, "learning_rate": 1.0808520193351423e-05, "loss": 1.5786, "step": 82443 }, { "epoch": 2.74, "grad_norm": 0.7033278942108154, "learning_rate": 1.0805740051744426e-05, "loss": 1.643, "step": 82444 }, { "epoch": 2.74, "grad_norm": 0.7041158080101013, "learning_rate": 1.080296026117663e-05, "loss": 1.6682, "step": 82445 }, { "epoch": 2.74, "grad_norm": 0.7048171758651733, "learning_rate": 1.0800180821651394e-05, "loss": 1.7217, "step": 82446 }, { "epoch": 2.74, "grad_norm": 0.7227984666824341, "learning_rate": 1.0797401733172184e-05, "loss": 1.6332, "step": 82447 }, { "epoch": 2.74, "grad_norm": 0.7028051614761353, "learning_rate": 1.0794622995742263e-05, "loss": 1.6385, "step": 82448 }, { "epoch": 2.74, "grad_norm": 0.695227861404419, "learning_rate": 1.0791844609365097e-05, "loss": 1.6553, "step": 82449 }, { "epoch": 2.74, "grad_norm": 0.7000361680984497, "learning_rate": 1.0789066574043981e-05, "loss": 1.6446, "step": 82450 }, { "epoch": 2.74, "grad_norm": 0.7097827792167664, "learning_rate": 1.0786288889782379e-05, "loss": 1.6836, "step": 82451 }, { "epoch": 2.74, "grad_norm": 0.723958432674408, "learning_rate": 1.078351155658359e-05, "loss": 1.7052, "step": 82452 }, { "epoch": 2.74, "grad_norm": 0.7294013500213623, "learning_rate": 1.0780734574450978e-05, "loss": 1.7174, "step": 82453 }, { "epoch": 2.74, "grad_norm": 0.6894564032554626, "learning_rate": 1.0777957943387939e-05, "loss": 1.6438, "step": 82454 }, { "epoch": 2.74, "grad_norm": 0.6981838345527649, "learning_rate": 1.0775181663397869e-05, "loss": 1.6724, "step": 82455 }, { "epoch": 2.74, "grad_norm": 0.7056147456169128, "learning_rate": 1.0772405734484102e-05, "loss": 1.7256, "step": 82456 }, { "epoch": 2.74, "grad_norm": 0.7104142904281616, "learning_rate": 1.076963015665e-05, "loss": 1.6694, "step": 82457 }, { "epoch": 2.74, "grad_norm": 0.7223535776138306, "learning_rate": 1.0766854929898993e-05, "loss": 1.7004, "step": 82458 }, { "epoch": 2.74, "grad_norm": 0.7000259160995483, "learning_rate": 1.076408005423438e-05, "loss": 1.6327, "step": 82459 }, { "epoch": 2.74, "grad_norm": 0.7188948392868042, "learning_rate": 1.0761305529659558e-05, "loss": 1.747, "step": 82460 }, { "epoch": 2.74, "grad_norm": 0.7115395665168762, "learning_rate": 1.0758531356177892e-05, "loss": 1.7215, "step": 82461 }, { "epoch": 2.74, "grad_norm": 0.7468237280845642, "learning_rate": 1.0755757533792774e-05, "loss": 1.7218, "step": 82462 }, { "epoch": 2.74, "grad_norm": 0.7295397520065308, "learning_rate": 1.0752984062507508e-05, "loss": 1.6702, "step": 82463 }, { "epoch": 2.74, "grad_norm": 0.7158505916595459, "learning_rate": 1.075021094232552e-05, "loss": 1.7228, "step": 82464 }, { "epoch": 2.74, "grad_norm": 0.7223606109619141, "learning_rate": 1.0747438173250212e-05, "loss": 1.7219, "step": 82465 }, { "epoch": 2.74, "grad_norm": 0.7386384010314941, "learning_rate": 1.074466575528481e-05, "loss": 1.6836, "step": 82466 }, { "epoch": 2.74, "grad_norm": 0.7178402543067932, "learning_rate": 1.074189368843278e-05, "loss": 1.7105, "step": 82467 }, { "epoch": 2.74, "grad_norm": 0.7267224788665771, "learning_rate": 1.0739121972697518e-05, "loss": 1.7098, "step": 82468 }, { "epoch": 2.74, "grad_norm": 0.7240411639213562, "learning_rate": 1.0736350608082323e-05, "loss": 1.6404, "step": 82469 }, { "epoch": 2.74, "grad_norm": 0.7123384475708008, "learning_rate": 1.0733579594590525e-05, "loss": 1.6971, "step": 82470 }, { "epoch": 2.74, "grad_norm": 0.724290132522583, "learning_rate": 1.0730808932225553e-05, "loss": 1.7115, "step": 82471 }, { "epoch": 2.74, "grad_norm": 0.7090052962303162, "learning_rate": 1.0728038620990842e-05, "loss": 1.5697, "step": 82472 }, { "epoch": 2.74, "grad_norm": 0.6878072619438171, "learning_rate": 1.0725268660889618e-05, "loss": 1.6958, "step": 82473 }, { "epoch": 2.74, "grad_norm": 0.713646411895752, "learning_rate": 1.0722499051925249e-05, "loss": 1.7491, "step": 82474 }, { "epoch": 2.74, "grad_norm": 0.7162981629371643, "learning_rate": 1.0719729794101195e-05, "loss": 1.6211, "step": 82475 }, { "epoch": 2.74, "grad_norm": 0.7201480269432068, "learning_rate": 1.071696088742079e-05, "loss": 1.6758, "step": 82476 }, { "epoch": 2.74, "grad_norm": 0.7175280451774597, "learning_rate": 1.0714192331887295e-05, "loss": 1.7236, "step": 82477 }, { "epoch": 2.74, "grad_norm": 0.715244472026825, "learning_rate": 1.0711424127504176e-05, "loss": 1.8046, "step": 82478 }, { "epoch": 2.74, "grad_norm": 0.7382011413574219, "learning_rate": 1.070865627427483e-05, "loss": 1.7249, "step": 82479 }, { "epoch": 2.74, "grad_norm": 0.7007744312286377, "learning_rate": 1.0705888772202453e-05, "loss": 1.6994, "step": 82480 }, { "epoch": 2.74, "grad_norm": 0.7151673436164856, "learning_rate": 1.0703121621290545e-05, "loss": 1.6913, "step": 82481 }, { "epoch": 2.74, "grad_norm": 0.7136589884757996, "learning_rate": 1.0700354821542434e-05, "loss": 1.6963, "step": 82482 }, { "epoch": 2.74, "grad_norm": 0.7158974409103394, "learning_rate": 1.0697588372961452e-05, "loss": 1.6925, "step": 82483 }, { "epoch": 2.74, "grad_norm": 0.7073830366134644, "learning_rate": 1.0694822275550963e-05, "loss": 1.6677, "step": 82484 }, { "epoch": 2.74, "grad_norm": 0.7155281901359558, "learning_rate": 1.0692056529314363e-05, "loss": 1.6423, "step": 82485 }, { "epoch": 2.74, "grad_norm": 0.7002173066139221, "learning_rate": 1.068929113425495e-05, "loss": 1.6563, "step": 82486 }, { "epoch": 2.74, "grad_norm": 0.7313467264175415, "learning_rate": 1.068652609037609e-05, "loss": 1.6238, "step": 82487 }, { "epoch": 2.74, "grad_norm": 0.7130817770957947, "learning_rate": 1.0683761397681179e-05, "loss": 1.6197, "step": 82488 }, { "epoch": 2.74, "grad_norm": 0.6981638669967651, "learning_rate": 1.0680997056173578e-05, "loss": 1.7048, "step": 82489 }, { "epoch": 2.74, "grad_norm": 0.7104401588439941, "learning_rate": 1.0678233065856589e-05, "loss": 1.67, "step": 82490 }, { "epoch": 2.74, "grad_norm": 0.7158374190330505, "learning_rate": 1.0675469426733608e-05, "loss": 1.7078, "step": 82491 }, { "epoch": 2.74, "grad_norm": 0.7095393538475037, "learning_rate": 1.0672706138807963e-05, "loss": 1.7193, "step": 82492 }, { "epoch": 2.74, "grad_norm": 0.7006515264511108, "learning_rate": 1.0669943202083054e-05, "loss": 1.694, "step": 82493 }, { "epoch": 2.74, "grad_norm": 0.7197591662406921, "learning_rate": 1.0667180616562176e-05, "loss": 1.6678, "step": 82494 }, { "epoch": 2.74, "grad_norm": 0.6959959864616394, "learning_rate": 1.066441838224873e-05, "loss": 1.6881, "step": 82495 }, { "epoch": 2.74, "grad_norm": 0.7336651086807251, "learning_rate": 1.0661656499146009e-05, "loss": 1.7414, "step": 82496 }, { "epoch": 2.74, "grad_norm": 0.719158411026001, "learning_rate": 1.0658894967257447e-05, "loss": 1.7144, "step": 82497 }, { "epoch": 2.74, "grad_norm": 0.7359415292739868, "learning_rate": 1.0656133786586308e-05, "loss": 1.6568, "step": 82498 }, { "epoch": 2.74, "grad_norm": 0.7092044353485107, "learning_rate": 1.0653372957136053e-05, "loss": 1.7436, "step": 82499 }, { "epoch": 2.74, "grad_norm": 0.7164930105209351, "learning_rate": 1.0650612478909947e-05, "loss": 1.7081, "step": 82500 }, { "epoch": 2.74, "grad_norm": 0.7101799845695496, "learning_rate": 1.0647852351911324e-05, "loss": 1.6599, "step": 82501 }, { "epoch": 2.74, "grad_norm": 0.7073391675949097, "learning_rate": 1.064509257614361e-05, "loss": 1.6499, "step": 82502 }, { "epoch": 2.74, "grad_norm": 0.7217703461647034, "learning_rate": 1.0642333151610105e-05, "loss": 1.7088, "step": 82503 }, { "epoch": 2.74, "grad_norm": 0.6972426772117615, "learning_rate": 1.0639574078314172e-05, "loss": 1.6219, "step": 82504 }, { "epoch": 2.74, "grad_norm": 0.7129487991333008, "learning_rate": 1.0636815356259209e-05, "loss": 1.6539, "step": 82505 }, { "epoch": 2.74, "grad_norm": 0.6910574436187744, "learning_rate": 1.0634056985448448e-05, "loss": 1.6429, "step": 82506 }, { "epoch": 2.75, "grad_norm": 0.7113093733787537, "learning_rate": 1.0631298965885348e-05, "loss": 1.6677, "step": 82507 }, { "epoch": 2.75, "grad_norm": 0.7277389168739319, "learning_rate": 1.0628541297573178e-05, "loss": 1.6672, "step": 82508 }, { "epoch": 2.75, "grad_norm": 0.7046881914138794, "learning_rate": 1.0625783980515368e-05, "loss": 1.6919, "step": 82509 }, { "epoch": 2.75, "grad_norm": 0.6967357993125916, "learning_rate": 1.062302701471518e-05, "loss": 1.6245, "step": 82510 }, { "epoch": 2.75, "grad_norm": 0.7332178950309753, "learning_rate": 1.0620270400176012e-05, "loss": 1.6624, "step": 82511 }, { "epoch": 2.75, "grad_norm": 0.7045549750328064, "learning_rate": 1.0617514136901229e-05, "loss": 1.7105, "step": 82512 }, { "epoch": 2.75, "grad_norm": 0.7163931131362915, "learning_rate": 1.0614758224894094e-05, "loss": 1.6571, "step": 82513 }, { "epoch": 2.75, "grad_norm": 0.7135288119316101, "learning_rate": 1.0612002664158037e-05, "loss": 1.641, "step": 82514 }, { "epoch": 2.75, "grad_norm": 0.7017718553543091, "learning_rate": 1.0609247454696356e-05, "loss": 1.6578, "step": 82515 }, { "epoch": 2.75, "grad_norm": 0.6936951279640198, "learning_rate": 1.0606492596512417e-05, "loss": 1.6756, "step": 82516 }, { "epoch": 2.75, "grad_norm": 0.7113326787948608, "learning_rate": 1.0603738089609515e-05, "loss": 1.742, "step": 82517 }, { "epoch": 2.75, "grad_norm": 0.7059506177902222, "learning_rate": 1.060098393399108e-05, "loss": 1.7512, "step": 82518 }, { "epoch": 2.75, "grad_norm": 0.6945285797119141, "learning_rate": 1.0598230129660412e-05, "loss": 1.7186, "step": 82519 }, { "epoch": 2.75, "grad_norm": 0.7267230153083801, "learning_rate": 1.059547667662084e-05, "loss": 1.7231, "step": 82520 }, { "epoch": 2.75, "grad_norm": 0.7137316465377808, "learning_rate": 1.0592723574875727e-05, "loss": 1.6416, "step": 82521 }, { "epoch": 2.75, "grad_norm": 0.7261459827423096, "learning_rate": 1.0589970824428406e-05, "loss": 1.6943, "step": 82522 }, { "epoch": 2.75, "grad_norm": 0.7422512769699097, "learning_rate": 1.0587218425282173e-05, "loss": 1.8239, "step": 82523 }, { "epoch": 2.75, "grad_norm": 0.717637836933136, "learning_rate": 1.0584466377440426e-05, "loss": 1.6538, "step": 82524 }, { "epoch": 2.75, "grad_norm": 0.6854925751686096, "learning_rate": 1.0581714680906527e-05, "loss": 1.682, "step": 82525 }, { "epoch": 2.75, "grad_norm": 0.7380059957504272, "learning_rate": 1.057896333568381e-05, "loss": 1.6908, "step": 82526 }, { "epoch": 2.75, "grad_norm": 0.7401089668273926, "learning_rate": 1.0576212341775502e-05, "loss": 1.6453, "step": 82527 }, { "epoch": 2.75, "grad_norm": 0.7009919285774231, "learning_rate": 1.0573461699185104e-05, "loss": 1.6732, "step": 82528 }, { "epoch": 2.75, "grad_norm": 0.7153157591819763, "learning_rate": 1.0570711407915843e-05, "loss": 1.6136, "step": 82529 }, { "epoch": 2.75, "grad_norm": 0.7082219123840332, "learning_rate": 1.0567961467971086e-05, "loss": 1.6468, "step": 82530 }, { "epoch": 2.75, "grad_norm": 0.7393734455108643, "learning_rate": 1.0565211879354163e-05, "loss": 1.6912, "step": 82531 }, { "epoch": 2.75, "grad_norm": 0.7203099131584167, "learning_rate": 1.056246264206847e-05, "loss": 1.736, "step": 82532 }, { "epoch": 2.75, "grad_norm": 0.7113202810287476, "learning_rate": 1.0559713756117305e-05, "loss": 1.679, "step": 82533 }, { "epoch": 2.75, "grad_norm": 0.7300552129745483, "learning_rate": 1.0556965221503966e-05, "loss": 1.6374, "step": 82534 }, { "epoch": 2.75, "grad_norm": 0.7294779419898987, "learning_rate": 1.055421703823185e-05, "loss": 1.7063, "step": 82535 }, { "epoch": 2.75, "grad_norm": 0.7263372540473938, "learning_rate": 1.0551469206304253e-05, "loss": 1.7746, "step": 82536 }, { "epoch": 2.75, "grad_norm": 0.7158119082450867, "learning_rate": 1.0548721725724507e-05, "loss": 1.643, "step": 82537 }, { "epoch": 2.75, "grad_norm": 0.7227140069007874, "learning_rate": 1.0545974596495976e-05, "loss": 1.8003, "step": 82538 }, { "epoch": 2.75, "grad_norm": 0.7112643718719482, "learning_rate": 1.0543227818621992e-05, "loss": 1.6579, "step": 82539 }, { "epoch": 2.75, "grad_norm": 0.7027466297149658, "learning_rate": 1.0540481392105915e-05, "loss": 1.5776, "step": 82540 }, { "epoch": 2.75, "grad_norm": 0.6978628635406494, "learning_rate": 1.0537735316950979e-05, "loss": 1.6545, "step": 82541 }, { "epoch": 2.75, "grad_norm": 0.7003602385520935, "learning_rate": 1.0534989593160615e-05, "loss": 1.7442, "step": 82542 }, { "epoch": 2.75, "grad_norm": 0.7223475575447083, "learning_rate": 1.053224422073815e-05, "loss": 1.6799, "step": 82543 }, { "epoch": 2.75, "grad_norm": 0.7074776887893677, "learning_rate": 1.052949919968682e-05, "loss": 1.6988, "step": 82544 }, { "epoch": 2.75, "grad_norm": 0.7170892953872681, "learning_rate": 1.0526754530010051e-05, "loss": 1.6273, "step": 82545 }, { "epoch": 2.75, "grad_norm": 0.6927971243858337, "learning_rate": 1.0524010211711176e-05, "loss": 1.6901, "step": 82546 }, { "epoch": 2.75, "grad_norm": 0.7302053570747375, "learning_rate": 1.0521266244793491e-05, "loss": 1.7211, "step": 82547 }, { "epoch": 2.75, "grad_norm": 0.7229928374290466, "learning_rate": 1.0518522629260329e-05, "loss": 1.6821, "step": 82548 }, { "epoch": 2.75, "grad_norm": 0.7296789884567261, "learning_rate": 1.0515779365115018e-05, "loss": 1.5815, "step": 82549 }, { "epoch": 2.75, "grad_norm": 0.7025177478790283, "learning_rate": 1.0513036452360923e-05, "loss": 1.6284, "step": 82550 }, { "epoch": 2.75, "grad_norm": 0.726971447467804, "learning_rate": 1.0510293891001342e-05, "loss": 1.6882, "step": 82551 }, { "epoch": 2.75, "grad_norm": 0.7103908061981201, "learning_rate": 1.0507551681039606e-05, "loss": 1.6475, "step": 82552 }, { "epoch": 2.75, "grad_norm": 0.7064833045005798, "learning_rate": 1.050480982247901e-05, "loss": 1.6664, "step": 82553 }, { "epoch": 2.75, "grad_norm": 0.6949813961982727, "learning_rate": 1.0502068315322987e-05, "loss": 1.6638, "step": 82554 }, { "epoch": 2.75, "grad_norm": 0.7354978322982788, "learning_rate": 1.0499327159574733e-05, "loss": 1.7422, "step": 82555 }, { "epoch": 2.75, "grad_norm": 0.6946634650230408, "learning_rate": 1.049658635523768e-05, "loss": 1.7092, "step": 82556 }, { "epoch": 2.75, "grad_norm": 0.7002540230751038, "learning_rate": 1.0493845902315123e-05, "loss": 1.6553, "step": 82557 }, { "epoch": 2.75, "grad_norm": 0.7241306304931641, "learning_rate": 1.0491105800810362e-05, "loss": 1.6692, "step": 82558 }, { "epoch": 2.75, "grad_norm": 0.712745726108551, "learning_rate": 1.0488366050726726e-05, "loss": 1.63, "step": 82559 }, { "epoch": 2.75, "grad_norm": 0.7105137705802917, "learning_rate": 1.048562665206758e-05, "loss": 1.6395, "step": 82560 }, { "epoch": 2.75, "grad_norm": 0.713952362537384, "learning_rate": 1.0482887604836221e-05, "loss": 1.6783, "step": 82561 }, { "epoch": 2.75, "grad_norm": 0.6998468041419983, "learning_rate": 1.0480148909035979e-05, "loss": 1.6498, "step": 82562 }, { "epoch": 2.75, "grad_norm": 0.7212775945663452, "learning_rate": 1.0477410564670152e-05, "loss": 1.6427, "step": 82563 }, { "epoch": 2.75, "grad_norm": 0.7358302474021912, "learning_rate": 1.0474672571742137e-05, "loss": 1.7328, "step": 82564 }, { "epoch": 2.75, "grad_norm": 0.691007137298584, "learning_rate": 1.0471934930255166e-05, "loss": 1.6613, "step": 82565 }, { "epoch": 2.75, "grad_norm": 0.7074019312858582, "learning_rate": 1.0469197640212634e-05, "loss": 1.6917, "step": 82566 }, { "epoch": 2.75, "grad_norm": 0.7104200124740601, "learning_rate": 1.0466460701617806e-05, "loss": 1.6563, "step": 82567 }, { "epoch": 2.75, "grad_norm": 0.7301211953163147, "learning_rate": 1.0463724114474082e-05, "loss": 1.6801, "step": 82568 }, { "epoch": 2.75, "grad_norm": 0.7151472568511963, "learning_rate": 1.046098787878472e-05, "loss": 1.6704, "step": 82569 }, { "epoch": 2.75, "grad_norm": 0.728797972202301, "learning_rate": 1.0458251994553024e-05, "loss": 1.7127, "step": 82570 }, { "epoch": 2.75, "grad_norm": 0.7135959267616272, "learning_rate": 1.0455516461782387e-05, "loss": 1.7312, "step": 82571 }, { "epoch": 2.75, "grad_norm": 0.740784227848053, "learning_rate": 1.0452781280476074e-05, "loss": 1.6674, "step": 82572 }, { "epoch": 2.75, "grad_norm": 0.7036404609680176, "learning_rate": 1.0450046450637383e-05, "loss": 1.7267, "step": 82573 }, { "epoch": 2.75, "grad_norm": 0.711728572845459, "learning_rate": 1.0447311972269712e-05, "loss": 1.6788, "step": 82574 }, { "epoch": 2.75, "grad_norm": 0.7043980360031128, "learning_rate": 1.0444577845376356e-05, "loss": 1.6663, "step": 82575 }, { "epoch": 2.75, "grad_norm": 0.7333911061286926, "learning_rate": 1.0441844069960614e-05, "loss": 1.6405, "step": 82576 }, { "epoch": 2.75, "grad_norm": 0.6920787692070007, "learning_rate": 1.043911064602575e-05, "loss": 1.7398, "step": 82577 }, { "epoch": 2.75, "grad_norm": 0.6879993081092834, "learning_rate": 1.0436377573575227e-05, "loss": 1.6556, "step": 82578 }, { "epoch": 2.75, "grad_norm": 0.7460517287254333, "learning_rate": 1.0433644852612245e-05, "loss": 1.6968, "step": 82579 }, { "epoch": 2.75, "grad_norm": 0.7238523364067078, "learning_rate": 1.043091248314013e-05, "loss": 1.6582, "step": 82580 }, { "epoch": 2.75, "grad_norm": 0.7204858660697937, "learning_rate": 1.0428180465162183e-05, "loss": 1.6912, "step": 82581 }, { "epoch": 2.75, "grad_norm": 0.7233594059944153, "learning_rate": 1.0425448798681869e-05, "loss": 1.7312, "step": 82582 }, { "epoch": 2.75, "grad_norm": 0.6941047310829163, "learning_rate": 1.0422717483702314e-05, "loss": 1.6505, "step": 82583 }, { "epoch": 2.75, "grad_norm": 0.7293779850006104, "learning_rate": 1.041998652022692e-05, "loss": 1.6359, "step": 82584 }, { "epoch": 2.75, "grad_norm": 0.7035936117172241, "learning_rate": 1.0417255908259014e-05, "loss": 1.6339, "step": 82585 }, { "epoch": 2.75, "grad_norm": 0.7017301321029663, "learning_rate": 1.0414525647801897e-05, "loss": 1.7724, "step": 82586 }, { "epoch": 2.75, "grad_norm": 0.7174634337425232, "learning_rate": 1.0411795738858863e-05, "loss": 1.7338, "step": 82587 }, { "epoch": 2.75, "grad_norm": 0.7057981491088867, "learning_rate": 1.0409066181433212e-05, "loss": 1.6792, "step": 82588 }, { "epoch": 2.75, "grad_norm": 0.7238770127296448, "learning_rate": 1.0406336975528373e-05, "loss": 1.708, "step": 82589 }, { "epoch": 2.75, "grad_norm": 0.6955723166465759, "learning_rate": 1.0403608121147477e-05, "loss": 1.6406, "step": 82590 }, { "epoch": 2.75, "grad_norm": 0.7031832337379456, "learning_rate": 1.0400879618293956e-05, "loss": 1.6035, "step": 82591 }, { "epoch": 2.75, "grad_norm": 0.6947107911109924, "learning_rate": 1.0398151466971138e-05, "loss": 1.5997, "step": 82592 }, { "epoch": 2.75, "grad_norm": 0.7473444938659668, "learning_rate": 1.0395423667182257e-05, "loss": 1.6701, "step": 82593 }, { "epoch": 2.75, "grad_norm": 0.7052159309387207, "learning_rate": 1.0392696218930673e-05, "loss": 1.6892, "step": 82594 }, { "epoch": 2.75, "grad_norm": 0.7082483172416687, "learning_rate": 1.0389969122219655e-05, "loss": 1.6839, "step": 82595 }, { "epoch": 2.75, "grad_norm": 0.7180320024490356, "learning_rate": 1.0387242377052596e-05, "loss": 1.6465, "step": 82596 }, { "epoch": 2.75, "grad_norm": 0.6766749024391174, "learning_rate": 1.0384515983432729e-05, "loss": 1.6255, "step": 82597 }, { "epoch": 2.75, "grad_norm": 0.7070139646530151, "learning_rate": 1.038178994136335e-05, "loss": 1.6762, "step": 82598 }, { "epoch": 2.75, "grad_norm": 0.70408034324646, "learning_rate": 1.0379064250847857e-05, "loss": 1.6285, "step": 82599 }, { "epoch": 2.75, "grad_norm": 0.7043265700340271, "learning_rate": 1.037633891188948e-05, "loss": 1.8122, "step": 82600 }, { "epoch": 2.75, "grad_norm": 0.7257527709007263, "learning_rate": 1.0373613924491553e-05, "loss": 1.6875, "step": 82601 }, { "epoch": 2.75, "grad_norm": 0.7421208024024963, "learning_rate": 1.0370889288657403e-05, "loss": 1.6788, "step": 82602 }, { "epoch": 2.75, "grad_norm": 0.6893783807754517, "learning_rate": 1.0368165004390295e-05, "loss": 1.6674, "step": 82603 }, { "epoch": 2.75, "grad_norm": 0.7068402171134949, "learning_rate": 1.036544107169356e-05, "loss": 1.6516, "step": 82604 }, { "epoch": 2.75, "grad_norm": 0.7112137675285339, "learning_rate": 1.0362717490570493e-05, "loss": 1.6768, "step": 82605 }, { "epoch": 2.75, "grad_norm": 0.7220905423164368, "learning_rate": 1.0359994261024462e-05, "loss": 1.7161, "step": 82606 }, { "epoch": 2.75, "grad_norm": 2.6156466007232666, "learning_rate": 1.0357271383058696e-05, "loss": 1.7608, "step": 82607 }, { "epoch": 2.75, "grad_norm": 0.7270689606666565, "learning_rate": 1.0354548856676492e-05, "loss": 1.6807, "step": 82608 }, { "epoch": 2.75, "grad_norm": 0.7050601840019226, "learning_rate": 1.0351826681881247e-05, "loss": 1.6385, "step": 82609 }, { "epoch": 2.75, "grad_norm": 0.6966314315795898, "learning_rate": 1.0349104858676194e-05, "loss": 1.681, "step": 82610 }, { "epoch": 2.75, "grad_norm": 0.7044512033462524, "learning_rate": 1.0346383387064594e-05, "loss": 1.6754, "step": 82611 }, { "epoch": 2.75, "grad_norm": 0.731096088886261, "learning_rate": 1.034366226704988e-05, "loss": 1.75, "step": 82612 }, { "epoch": 2.75, "grad_norm": 0.7104206681251526, "learning_rate": 1.0340941498635213e-05, "loss": 1.6518, "step": 82613 }, { "epoch": 2.75, "grad_norm": 0.6982969641685486, "learning_rate": 1.0338221081823994e-05, "loss": 1.7186, "step": 82614 }, { "epoch": 2.75, "grad_norm": 0.6899144053459167, "learning_rate": 1.0335501016619485e-05, "loss": 1.6527, "step": 82615 }, { "epoch": 2.75, "grad_norm": 0.7082077860832214, "learning_rate": 1.0332781303025017e-05, "loss": 1.6326, "step": 82616 }, { "epoch": 2.75, "grad_norm": 0.7139440178871155, "learning_rate": 1.0330061941043888e-05, "loss": 1.6772, "step": 82617 }, { "epoch": 2.75, "grad_norm": 0.7142683267593384, "learning_rate": 1.0327342930679328e-05, "loss": 1.6726, "step": 82618 }, { "epoch": 2.75, "grad_norm": 0.6989699602127075, "learning_rate": 1.0324624271934768e-05, "loss": 1.6125, "step": 82619 }, { "epoch": 2.75, "grad_norm": 0.7212826609611511, "learning_rate": 1.0321905964813337e-05, "loss": 1.7376, "step": 82620 }, { "epoch": 2.75, "grad_norm": 0.7022301554679871, "learning_rate": 1.0319188009318502e-05, "loss": 1.6265, "step": 82621 }, { "epoch": 2.75, "grad_norm": 0.7013114094734192, "learning_rate": 1.0316470405453492e-05, "loss": 1.6708, "step": 82622 }, { "epoch": 2.75, "grad_norm": 0.7082667946815491, "learning_rate": 1.0313753153221571e-05, "loss": 1.7499, "step": 82623 }, { "epoch": 2.75, "grad_norm": 0.7317519187927246, "learning_rate": 1.0311036252626103e-05, "loss": 1.6296, "step": 82624 }, { "epoch": 2.75, "grad_norm": 0.7021045088768005, "learning_rate": 1.0308319703670288e-05, "loss": 1.6772, "step": 82625 }, { "epoch": 2.75, "grad_norm": 0.702609121799469, "learning_rate": 1.0305603506357551e-05, "loss": 1.6697, "step": 82626 }, { "epoch": 2.75, "grad_norm": 0.7161691784858704, "learning_rate": 1.0302887660691095e-05, "loss": 1.6137, "step": 82627 }, { "epoch": 2.75, "grad_norm": 0.7180042266845703, "learning_rate": 1.0300172166674282e-05, "loss": 1.6428, "step": 82628 }, { "epoch": 2.75, "grad_norm": 0.7081809639930725, "learning_rate": 1.0297457024310374e-05, "loss": 1.6399, "step": 82629 }, { "epoch": 2.75, "grad_norm": 0.7484484910964966, "learning_rate": 1.0294742233602605e-05, "loss": 1.661, "step": 82630 }, { "epoch": 2.75, "grad_norm": 0.7130672931671143, "learning_rate": 1.0292027794554402e-05, "loss": 1.6623, "step": 82631 }, { "epoch": 2.75, "grad_norm": 0.7253548502922058, "learning_rate": 1.0289313707168967e-05, "loss": 1.655, "step": 82632 }, { "epoch": 2.75, "grad_norm": 0.7285252213478088, "learning_rate": 1.0286599971449593e-05, "loss": 1.6499, "step": 82633 }, { "epoch": 2.75, "grad_norm": 0.7375787496566772, "learning_rate": 1.0283886587399615e-05, "loss": 1.6558, "step": 82634 }, { "epoch": 2.75, "grad_norm": 0.7019426822662354, "learning_rate": 1.0281173555022327e-05, "loss": 1.6451, "step": 82635 }, { "epoch": 2.75, "grad_norm": 0.7279022932052612, "learning_rate": 1.0278460874321026e-05, "loss": 1.7291, "step": 82636 }, { "epoch": 2.75, "grad_norm": 0.7015625834465027, "learning_rate": 1.0275748545298946e-05, "loss": 1.7233, "step": 82637 }, { "epoch": 2.75, "grad_norm": 0.6977068185806274, "learning_rate": 1.0273036567959447e-05, "loss": 1.5807, "step": 82638 }, { "epoch": 2.75, "grad_norm": 0.7144705057144165, "learning_rate": 1.0270324942305763e-05, "loss": 1.7519, "step": 82639 }, { "epoch": 2.75, "grad_norm": 0.7375679016113281, "learning_rate": 1.0267613668341224e-05, "loss": 1.6543, "step": 82640 }, { "epoch": 2.75, "grad_norm": 0.7122036814689636, "learning_rate": 1.0264902746069093e-05, "loss": 1.7001, "step": 82641 }, { "epoch": 2.75, "grad_norm": 0.7258034348487854, "learning_rate": 1.02621921754927e-05, "loss": 1.6479, "step": 82642 }, { "epoch": 2.75, "grad_norm": 0.7083649039268494, "learning_rate": 1.0259481956615345e-05, "loss": 1.6567, "step": 82643 }, { "epoch": 2.75, "grad_norm": 0.7111018300056458, "learning_rate": 1.0256772089440225e-05, "loss": 1.6598, "step": 82644 }, { "epoch": 2.75, "grad_norm": 0.7273358702659607, "learning_rate": 1.0254062573970733e-05, "loss": 1.6075, "step": 82645 }, { "epoch": 2.75, "grad_norm": 0.7297348380088806, "learning_rate": 1.025135341021014e-05, "loss": 1.679, "step": 82646 }, { "epoch": 2.75, "grad_norm": 0.7037925124168396, "learning_rate": 1.0248644598161671e-05, "loss": 1.6925, "step": 82647 }, { "epoch": 2.75, "grad_norm": 0.7050443291664124, "learning_rate": 1.0245936137828625e-05, "loss": 1.6417, "step": 82648 }, { "epoch": 2.75, "grad_norm": 0.7283518314361572, "learning_rate": 1.0243228029214368e-05, "loss": 1.6899, "step": 82649 }, { "epoch": 2.75, "grad_norm": 0.7337150573730469, "learning_rate": 1.024052027232216e-05, "loss": 1.7071, "step": 82650 }, { "epoch": 2.75, "grad_norm": 0.7192105650901794, "learning_rate": 1.0237812867155204e-05, "loss": 1.6885, "step": 82651 }, { "epoch": 2.75, "grad_norm": 0.7088245749473572, "learning_rate": 1.0235105813716893e-05, "loss": 1.6428, "step": 82652 }, { "epoch": 2.75, "grad_norm": 0.7402288913726807, "learning_rate": 1.0232399112010491e-05, "loss": 1.6816, "step": 82653 }, { "epoch": 2.75, "grad_norm": 0.7354049682617188, "learning_rate": 1.02296927620392e-05, "loss": 1.6473, "step": 82654 }, { "epoch": 2.75, "grad_norm": 0.7392346262931824, "learning_rate": 1.0226986763806377e-05, "loss": 1.677, "step": 82655 }, { "epoch": 2.75, "grad_norm": 0.7018492221832275, "learning_rate": 1.0224281117315325e-05, "loss": 1.6843, "step": 82656 }, { "epoch": 2.75, "grad_norm": 0.6997992992401123, "learning_rate": 1.0221575822569306e-05, "loss": 1.6704, "step": 82657 }, { "epoch": 2.75, "grad_norm": 0.7048197984695435, "learning_rate": 1.021887087957155e-05, "loss": 1.6913, "step": 82658 }, { "epoch": 2.75, "grad_norm": 0.7042351365089417, "learning_rate": 1.0216166288325422e-05, "loss": 1.6812, "step": 82659 }, { "epoch": 2.75, "grad_norm": 0.7143149375915527, "learning_rate": 1.0213462048834186e-05, "loss": 1.7034, "step": 82660 }, { "epoch": 2.75, "grad_norm": 0.7198516726493835, "learning_rate": 1.0210758161101073e-05, "loss": 1.6895, "step": 82661 }, { "epoch": 2.75, "grad_norm": 0.7285197377204895, "learning_rate": 1.020805462512938e-05, "loss": 1.7146, "step": 82662 }, { "epoch": 2.75, "grad_norm": 0.7381751537322998, "learning_rate": 1.0205351440922472e-05, "loss": 1.7408, "step": 82663 }, { "epoch": 2.75, "grad_norm": 0.6969866156578064, "learning_rate": 1.0202648608483544e-05, "loss": 1.6412, "step": 82664 }, { "epoch": 2.75, "grad_norm": 0.7281461954116821, "learning_rate": 1.0199946127815862e-05, "loss": 1.6832, "step": 82665 }, { "epoch": 2.75, "grad_norm": 0.7156299948692322, "learning_rate": 1.0197243998922821e-05, "loss": 1.67, "step": 82666 }, { "epoch": 2.75, "grad_norm": 0.7170798778533936, "learning_rate": 1.0194542221807588e-05, "loss": 1.6898, "step": 82667 }, { "epoch": 2.75, "grad_norm": 0.713649332523346, "learning_rate": 1.019184079647346e-05, "loss": 1.6806, "step": 82668 }, { "epoch": 2.75, "grad_norm": 0.7066452503204346, "learning_rate": 1.0189139722923768e-05, "loss": 1.703, "step": 82669 }, { "epoch": 2.75, "grad_norm": 0.7155050039291382, "learning_rate": 1.0186439001161705e-05, "loss": 1.6946, "step": 82670 }, { "epoch": 2.75, "grad_norm": 0.6975163221359253, "learning_rate": 1.0183738631190674e-05, "loss": 1.6941, "step": 82671 }, { "epoch": 2.75, "grad_norm": 0.7101845145225525, "learning_rate": 1.0181038613013836e-05, "loss": 1.6321, "step": 82672 }, { "epoch": 2.75, "grad_norm": 0.704105794429779, "learning_rate": 1.0178338946634523e-05, "loss": 1.6554, "step": 82673 }, { "epoch": 2.75, "grad_norm": 0.725800633430481, "learning_rate": 1.0175639632056032e-05, "loss": 1.6899, "step": 82674 }, { "epoch": 2.75, "grad_norm": 0.7137482166290283, "learning_rate": 1.017294066928156e-05, "loss": 1.6927, "step": 82675 }, { "epoch": 2.75, "grad_norm": 0.7109039425849915, "learning_rate": 1.017024205831447e-05, "loss": 1.7182, "step": 82676 }, { "epoch": 2.75, "grad_norm": 0.7041693925857544, "learning_rate": 1.0167543799157996e-05, "loss": 1.6486, "step": 82677 }, { "epoch": 2.75, "grad_norm": 0.7382981777191162, "learning_rate": 1.0164845891815431e-05, "loss": 1.6032, "step": 82678 }, { "epoch": 2.75, "grad_norm": 0.7184844613075256, "learning_rate": 1.016214833629001e-05, "loss": 1.7159, "step": 82679 }, { "epoch": 2.75, "grad_norm": 0.711570143699646, "learning_rate": 1.0159451132585028e-05, "loss": 1.6892, "step": 82680 }, { "epoch": 2.75, "grad_norm": 0.7061273455619812, "learning_rate": 1.0156754280703817e-05, "loss": 1.6768, "step": 82681 }, { "epoch": 2.75, "grad_norm": 0.713996410369873, "learning_rate": 1.0154057780649538e-05, "loss": 1.6974, "step": 82682 }, { "epoch": 2.75, "grad_norm": 0.7088800072669983, "learning_rate": 1.0151361632425592e-05, "loss": 1.6951, "step": 82683 }, { "epoch": 2.75, "grad_norm": 0.7009132504463196, "learning_rate": 1.0148665836035108e-05, "loss": 1.5724, "step": 82684 }, { "epoch": 2.75, "grad_norm": 0.6959731578826904, "learning_rate": 1.0145970391481517e-05, "loss": 1.6709, "step": 82685 }, { "epoch": 2.75, "grad_norm": 0.6952055096626282, "learning_rate": 1.0143275298767984e-05, "loss": 1.5949, "step": 82686 }, { "epoch": 2.75, "grad_norm": 0.7123556733131409, "learning_rate": 1.014058055789777e-05, "loss": 1.6917, "step": 82687 }, { "epoch": 2.75, "grad_norm": 0.7153195142745972, "learning_rate": 1.0137886168874209e-05, "loss": 1.7045, "step": 82688 }, { "epoch": 2.75, "grad_norm": 0.6918594837188721, "learning_rate": 1.0135192131700564e-05, "loss": 1.6077, "step": 82689 }, { "epoch": 2.75, "grad_norm": 0.6957434415817261, "learning_rate": 1.0132498446380034e-05, "loss": 1.6309, "step": 82690 }, { "epoch": 2.75, "grad_norm": 0.7460008859634399, "learning_rate": 1.0129805112915978e-05, "loss": 1.7927, "step": 82691 }, { "epoch": 2.75, "grad_norm": 0.7174267768859863, "learning_rate": 1.0127112131311633e-05, "loss": 1.695, "step": 82692 }, { "epoch": 2.75, "grad_norm": 0.7327610850334167, "learning_rate": 1.0124419501570258e-05, "loss": 1.6589, "step": 82693 }, { "epoch": 2.75, "grad_norm": 0.7151146531105042, "learning_rate": 1.0121727223695086e-05, "loss": 1.7697, "step": 82694 }, { "epoch": 2.75, "grad_norm": 0.721078634262085, "learning_rate": 1.0119035297689482e-05, "loss": 1.7042, "step": 82695 }, { "epoch": 2.75, "grad_norm": 0.7250798940658569, "learning_rate": 1.011634372355664e-05, "loss": 1.6616, "step": 82696 }, { "epoch": 2.75, "grad_norm": 0.7285600900650024, "learning_rate": 1.0113652501299829e-05, "loss": 1.6968, "step": 82697 }, { "epoch": 2.75, "grad_norm": 0.7247856855392456, "learning_rate": 1.0110961630922344e-05, "loss": 1.6671, "step": 82698 }, { "epoch": 2.75, "grad_norm": 0.7194229960441589, "learning_rate": 1.010827111242748e-05, "loss": 1.6518, "step": 82699 }, { "epoch": 2.75, "grad_norm": 0.690377950668335, "learning_rate": 1.0105580945818404e-05, "loss": 1.671, "step": 82700 }, { "epoch": 2.75, "grad_norm": 0.7374551296234131, "learning_rate": 1.0102891131098411e-05, "loss": 1.6631, "step": 82701 }, { "epoch": 2.75, "grad_norm": 0.7072510719299316, "learning_rate": 1.0100201668270868e-05, "loss": 1.6074, "step": 82702 }, { "epoch": 2.75, "grad_norm": 0.7101725339889526, "learning_rate": 1.0097512557338939e-05, "loss": 1.6955, "step": 82703 }, { "epoch": 2.75, "grad_norm": 0.7256311774253845, "learning_rate": 1.0094823798305884e-05, "loss": 1.6805, "step": 82704 }, { "epoch": 2.75, "grad_norm": 0.7058000564575195, "learning_rate": 1.0092135391175004e-05, "loss": 1.7192, "step": 82705 }, { "epoch": 2.75, "grad_norm": 0.7191951870918274, "learning_rate": 1.008944733594963e-05, "loss": 1.6527, "step": 82706 }, { "epoch": 2.75, "grad_norm": 0.7010008692741394, "learning_rate": 1.0086759632632856e-05, "loss": 1.721, "step": 82707 }, { "epoch": 2.75, "grad_norm": 0.729972779750824, "learning_rate": 1.0084072281228083e-05, "loss": 1.7061, "step": 82708 }, { "epoch": 2.75, "grad_norm": 0.7041718363761902, "learning_rate": 1.008138528173854e-05, "loss": 1.6739, "step": 82709 }, { "epoch": 2.75, "grad_norm": 0.6990609169006348, "learning_rate": 1.0078698634167459e-05, "loss": 1.6763, "step": 82710 }, { "epoch": 2.75, "grad_norm": 0.7124332785606384, "learning_rate": 1.0076012338518102e-05, "loss": 1.6284, "step": 82711 }, { "epoch": 2.75, "grad_norm": 0.7261975407600403, "learning_rate": 1.0073326394793736e-05, "loss": 1.7248, "step": 82712 }, { "epoch": 2.75, "grad_norm": 0.7065710425376892, "learning_rate": 1.0070640802997721e-05, "loss": 1.6956, "step": 82713 }, { "epoch": 2.75, "grad_norm": 0.6927100419998169, "learning_rate": 1.0067955563133156e-05, "loss": 1.631, "step": 82714 }, { "epoch": 2.75, "grad_norm": 0.7071760296821594, "learning_rate": 1.0065270675203374e-05, "loss": 1.6507, "step": 82715 }, { "epoch": 2.75, "grad_norm": 0.7367271184921265, "learning_rate": 1.006258613921167e-05, "loss": 1.6722, "step": 82716 }, { "epoch": 2.75, "grad_norm": 0.7322612404823303, "learning_rate": 1.0059901955161242e-05, "loss": 1.7135, "step": 82717 }, { "epoch": 2.75, "grad_norm": 0.7054499387741089, "learning_rate": 1.0057218123055354e-05, "loss": 1.6821, "step": 82718 }, { "epoch": 2.75, "grad_norm": 0.7069684863090515, "learning_rate": 1.0054534642897338e-05, "loss": 1.6431, "step": 82719 }, { "epoch": 2.75, "grad_norm": 0.7217952013015747, "learning_rate": 1.005185151469039e-05, "loss": 1.7459, "step": 82720 }, { "epoch": 2.75, "grad_norm": 0.6932990550994873, "learning_rate": 1.0049168738437707e-05, "loss": 1.6216, "step": 82721 }, { "epoch": 2.75, "grad_norm": 0.7160205245018005, "learning_rate": 1.004648631414262e-05, "loss": 1.7154, "step": 82722 }, { "epoch": 2.75, "grad_norm": 0.6976603865623474, "learning_rate": 1.004380424180843e-05, "loss": 1.6658, "step": 82723 }, { "epoch": 2.75, "grad_norm": 0.7113099098205566, "learning_rate": 1.0041122521438327e-05, "loss": 1.702, "step": 82724 }, { "epoch": 2.75, "grad_norm": 0.7026530504226685, "learning_rate": 1.0038441153035547e-05, "loss": 1.6656, "step": 82725 }, { "epoch": 2.75, "grad_norm": 0.7406620979309082, "learning_rate": 1.0035760136603387e-05, "loss": 1.6439, "step": 82726 }, { "epoch": 2.75, "grad_norm": 0.7087958455085754, "learning_rate": 1.0033079472145111e-05, "loss": 1.6319, "step": 82727 }, { "epoch": 2.75, "grad_norm": 1.2290090322494507, "learning_rate": 1.0030399159663949e-05, "loss": 1.7749, "step": 82728 }, { "epoch": 2.75, "grad_norm": 0.7074636816978455, "learning_rate": 1.002771919916313e-05, "loss": 1.682, "step": 82729 }, { "epoch": 2.75, "grad_norm": 0.7107484340667725, "learning_rate": 1.0025039590645955e-05, "loss": 1.6645, "step": 82730 }, { "epoch": 2.75, "grad_norm": 0.7191466093063354, "learning_rate": 1.0022360334115654e-05, "loss": 1.6477, "step": 82731 }, { "epoch": 2.75, "grad_norm": 0.7293866276741028, "learning_rate": 1.0019681429575454e-05, "loss": 1.6193, "step": 82732 }, { "epoch": 2.75, "grad_norm": 0.7137889266014099, "learning_rate": 1.001700287702869e-05, "loss": 1.6466, "step": 82733 }, { "epoch": 2.75, "grad_norm": 0.6795463562011719, "learning_rate": 1.0014324676478524e-05, "loss": 1.6247, "step": 82734 }, { "epoch": 2.75, "grad_norm": 0.716950535774231, "learning_rate": 1.001164682792822e-05, "loss": 1.7265, "step": 82735 }, { "epoch": 2.75, "grad_norm": 0.710823118686676, "learning_rate": 1.0008969331381078e-05, "loss": 1.6282, "step": 82736 }, { "epoch": 2.75, "grad_norm": 0.7016019821166992, "learning_rate": 1.0006292186840292e-05, "loss": 1.6359, "step": 82737 }, { "epoch": 2.75, "grad_norm": 0.7106446623802185, "learning_rate": 1.0003615394309195e-05, "loss": 1.692, "step": 82738 }, { "epoch": 2.75, "grad_norm": 0.7043704390525818, "learning_rate": 1.0000938953790949e-05, "loss": 1.6866, "step": 82739 }, { "epoch": 2.75, "grad_norm": 0.6955175995826721, "learning_rate": 9.99826286528882e-06, "loss": 1.6761, "step": 82740 }, { "epoch": 2.75, "grad_norm": 0.7126729488372803, "learning_rate": 9.99558712880607e-06, "loss": 1.6634, "step": 82741 }, { "epoch": 2.75, "grad_norm": 0.6952458620071411, "learning_rate": 9.992911744345932e-06, "loss": 1.6651, "step": 82742 }, { "epoch": 2.75, "grad_norm": 0.7163169384002686, "learning_rate": 9.990236711911703e-06, "loss": 1.7065, "step": 82743 }, { "epoch": 2.75, "grad_norm": 0.732544481754303, "learning_rate": 9.98756203150658e-06, "loss": 1.7031, "step": 82744 }, { "epoch": 2.75, "grad_norm": 0.708742618560791, "learning_rate": 9.98488770313386e-06, "loss": 1.6164, "step": 82745 }, { "epoch": 2.75, "grad_norm": 0.7383813261985779, "learning_rate": 9.98221372679674e-06, "loss": 1.7003, "step": 82746 }, { "epoch": 2.75, "grad_norm": 0.6975585222244263, "learning_rate": 9.979540102498418e-06, "loss": 1.6391, "step": 82747 }, { "epoch": 2.75, "grad_norm": 0.7019892930984497, "learning_rate": 9.976866830242292e-06, "loss": 1.6384, "step": 82748 }, { "epoch": 2.75, "grad_norm": 0.7003552317619324, "learning_rate": 9.974193910031459e-06, "loss": 1.7529, "step": 82749 }, { "epoch": 2.75, "grad_norm": 0.7007084488868713, "learning_rate": 9.971521341869215e-06, "loss": 1.6611, "step": 82750 }, { "epoch": 2.75, "grad_norm": 0.726460874080658, "learning_rate": 9.968849125758827e-06, "loss": 1.6956, "step": 82751 }, { "epoch": 2.75, "grad_norm": 0.7029906511306763, "learning_rate": 9.966177261703557e-06, "loss": 1.6458, "step": 82752 }, { "epoch": 2.75, "grad_norm": 0.7038870453834534, "learning_rate": 9.963505749706569e-06, "loss": 1.672, "step": 82753 }, { "epoch": 2.75, "grad_norm": 0.706658124923706, "learning_rate": 9.960834589771161e-06, "loss": 1.678, "step": 82754 }, { "epoch": 2.75, "grad_norm": 0.7007951736450195, "learning_rate": 9.958163781900563e-06, "loss": 1.6216, "step": 82755 }, { "epoch": 2.75, "grad_norm": 0.6856396198272705, "learning_rate": 9.955493326098041e-06, "loss": 1.56, "step": 82756 }, { "epoch": 2.75, "grad_norm": 0.7100553512573242, "learning_rate": 9.952823222366791e-06, "loss": 1.7153, "step": 82757 }, { "epoch": 2.75, "grad_norm": 0.698377251625061, "learning_rate": 9.950153470710042e-06, "loss": 1.7068, "step": 82758 }, { "epoch": 2.75, "grad_norm": 0.7179412245750427, "learning_rate": 9.947484071131129e-06, "loss": 1.6795, "step": 82759 }, { "epoch": 2.75, "grad_norm": 0.7079882025718689, "learning_rate": 9.944815023633245e-06, "loss": 1.687, "step": 82760 }, { "epoch": 2.75, "grad_norm": 0.752522885799408, "learning_rate": 9.942146328219558e-06, "loss": 1.6818, "step": 82761 }, { "epoch": 2.75, "grad_norm": 0.6971693634986877, "learning_rate": 9.939477984893428e-06, "loss": 1.6535, "step": 82762 }, { "epoch": 2.75, "grad_norm": 0.7154988050460815, "learning_rate": 9.93680999365799e-06, "loss": 1.7363, "step": 82763 }, { "epoch": 2.75, "grad_norm": 0.7188376784324646, "learning_rate": 9.934142354516538e-06, "loss": 1.735, "step": 82764 }, { "epoch": 2.75, "grad_norm": 1.5371031761169434, "learning_rate": 9.931475067472273e-06, "loss": 1.6853, "step": 82765 }, { "epoch": 2.75, "grad_norm": 0.7230419516563416, "learning_rate": 9.928808132528487e-06, "loss": 1.6305, "step": 82766 }, { "epoch": 2.75, "grad_norm": 0.6949494481086731, "learning_rate": 9.926141549688416e-06, "loss": 1.6664, "step": 82767 }, { "epoch": 2.75, "grad_norm": 0.695976972579956, "learning_rate": 9.92347531895522e-06, "loss": 1.6874, "step": 82768 }, { "epoch": 2.75, "grad_norm": 0.7311387062072754, "learning_rate": 9.9208094403322e-06, "loss": 1.6172, "step": 82769 }, { "epoch": 2.75, "grad_norm": 0.7115598917007446, "learning_rate": 9.918143913822585e-06, "loss": 1.6647, "step": 82770 }, { "epoch": 2.75, "grad_norm": 0.7090798616409302, "learning_rate": 9.915478739429571e-06, "loss": 1.6212, "step": 82771 }, { "epoch": 2.75, "grad_norm": 0.7259538769721985, "learning_rate": 9.912813917156426e-06, "loss": 1.6208, "step": 82772 }, { "epoch": 2.75, "grad_norm": 0.7175874710083008, "learning_rate": 9.910149447006443e-06, "loss": 1.6431, "step": 82773 }, { "epoch": 2.75, "grad_norm": 0.7100010514259338, "learning_rate": 9.907485328982755e-06, "loss": 1.7232, "step": 82774 }, { "epoch": 2.75, "grad_norm": 0.7184033393859863, "learning_rate": 9.904821563088628e-06, "loss": 1.6779, "step": 82775 }, { "epoch": 2.75, "grad_norm": 0.7051841020584106, "learning_rate": 9.902158149327321e-06, "loss": 1.6892, "step": 82776 }, { "epoch": 2.75, "grad_norm": 0.6993494033813477, "learning_rate": 9.899495087702036e-06, "loss": 1.6553, "step": 82777 }, { "epoch": 2.75, "grad_norm": 0.7405765056610107, "learning_rate": 9.896832378216036e-06, "loss": 1.7509, "step": 82778 }, { "epoch": 2.75, "grad_norm": 0.7202653288841248, "learning_rate": 9.894170020872483e-06, "loss": 1.6974, "step": 82779 }, { "epoch": 2.75, "grad_norm": 0.710811197757721, "learning_rate": 9.891508015674743e-06, "loss": 1.6459, "step": 82780 }, { "epoch": 2.75, "grad_norm": 0.7247466444969177, "learning_rate": 9.888846362625947e-06, "loss": 1.658, "step": 82781 }, { "epoch": 2.75, "grad_norm": 0.7246673703193665, "learning_rate": 9.88618506172929e-06, "loss": 1.7136, "step": 82782 }, { "epoch": 2.75, "grad_norm": 0.7152902483940125, "learning_rate": 9.883524112988105e-06, "loss": 1.6932, "step": 82783 }, { "epoch": 2.75, "grad_norm": 0.719605028629303, "learning_rate": 9.88086351640559e-06, "loss": 1.6913, "step": 82784 }, { "epoch": 2.75, "grad_norm": 0.6983352899551392, "learning_rate": 9.878203271984908e-06, "loss": 1.6262, "step": 82785 }, { "epoch": 2.75, "grad_norm": 0.7304283380508423, "learning_rate": 9.87554337972939e-06, "loss": 1.6535, "step": 82786 }, { "epoch": 2.75, "grad_norm": 0.7135853171348572, "learning_rate": 9.872883839642165e-06, "loss": 1.5926, "step": 82787 }, { "epoch": 2.75, "grad_norm": 0.7001656293869019, "learning_rate": 9.870224651726567e-06, "loss": 1.6583, "step": 82788 }, { "epoch": 2.75, "grad_norm": 0.7037318348884583, "learning_rate": 9.867565815985723e-06, "loss": 1.7004, "step": 82789 }, { "epoch": 2.75, "grad_norm": 0.6972894072532654, "learning_rate": 9.864907332422934e-06, "loss": 1.6598, "step": 82790 }, { "epoch": 2.75, "grad_norm": 0.7111703753471375, "learning_rate": 9.862249201041394e-06, "loss": 1.6928, "step": 82791 }, { "epoch": 2.75, "grad_norm": 0.6970534920692444, "learning_rate": 9.859591421844304e-06, "loss": 1.7333, "step": 82792 }, { "epoch": 2.75, "grad_norm": 0.7201151847839355, "learning_rate": 9.85693399483496e-06, "loss": 1.5895, "step": 82793 }, { "epoch": 2.75, "grad_norm": 0.693132221698761, "learning_rate": 9.854276920016524e-06, "loss": 1.6366, "step": 82794 }, { "epoch": 2.75, "grad_norm": 0.719904363155365, "learning_rate": 9.851620197392262e-06, "loss": 1.6565, "step": 82795 }, { "epoch": 2.75, "grad_norm": 0.7153016924858093, "learning_rate": 9.848963826965372e-06, "loss": 1.6733, "step": 82796 }, { "epoch": 2.75, "grad_norm": 0.6878761649131775, "learning_rate": 9.846307808739052e-06, "loss": 1.6608, "step": 82797 }, { "epoch": 2.75, "grad_norm": 0.7226459980010986, "learning_rate": 9.84365214271663e-06, "loss": 1.7258, "step": 82798 }, { "epoch": 2.75, "grad_norm": 0.7138027548789978, "learning_rate": 9.840996828901204e-06, "loss": 1.6718, "step": 82799 }, { "epoch": 2.75, "grad_norm": 0.7060107588768005, "learning_rate": 9.838341867296073e-06, "loss": 1.708, "step": 82800 }, { "epoch": 2.75, "grad_norm": 0.7075662612915039, "learning_rate": 9.835687257904435e-06, "loss": 1.69, "step": 82801 }, { "epoch": 2.75, "grad_norm": 0.6889833807945251, "learning_rate": 9.83303300072955e-06, "loss": 1.6771, "step": 82802 }, { "epoch": 2.75, "grad_norm": 0.7223106026649475, "learning_rate": 9.830379095774621e-06, "loss": 1.6736, "step": 82803 }, { "epoch": 2.75, "grad_norm": 0.7209128141403198, "learning_rate": 9.827725543042808e-06, "loss": 1.6493, "step": 82804 }, { "epoch": 2.75, "grad_norm": 0.6913490295410156, "learning_rate": 9.82507234253741e-06, "loss": 1.6688, "step": 82805 }, { "epoch": 2.75, "grad_norm": 0.6911235451698303, "learning_rate": 9.822419494261624e-06, "loss": 1.6831, "step": 82806 }, { "epoch": 2.75, "grad_norm": 0.7122258543968201, "learning_rate": 9.819766998218614e-06, "loss": 1.6233, "step": 82807 }, { "epoch": 2.76, "grad_norm": 0.6808055639266968, "learning_rate": 9.817114854411678e-06, "loss": 1.7052, "step": 82808 }, { "epoch": 2.76, "grad_norm": 0.7183472514152527, "learning_rate": 9.814463062844013e-06, "loss": 1.6754, "step": 82809 }, { "epoch": 2.76, "grad_norm": 0.6754529476165771, "learning_rate": 9.81181162351885e-06, "loss": 1.6084, "step": 82810 }, { "epoch": 2.76, "grad_norm": 0.7449535727500916, "learning_rate": 9.809160536439353e-06, "loss": 1.7088, "step": 82811 }, { "epoch": 2.76, "grad_norm": 0.7178769707679749, "learning_rate": 9.80650980160882e-06, "loss": 1.6188, "step": 82812 }, { "epoch": 2.76, "grad_norm": 0.706240177154541, "learning_rate": 9.803859419030447e-06, "loss": 1.6874, "step": 82813 }, { "epoch": 2.76, "grad_norm": 0.7227376699447632, "learning_rate": 9.801209388707365e-06, "loss": 1.693, "step": 82814 }, { "epoch": 2.76, "grad_norm": 0.7161068916320801, "learning_rate": 9.798559710642839e-06, "loss": 1.7033, "step": 82815 }, { "epoch": 2.76, "grad_norm": 0.7069618105888367, "learning_rate": 9.7959103848402e-06, "loss": 1.7095, "step": 82816 }, { "epoch": 2.76, "grad_norm": 0.724905788898468, "learning_rate": 9.79326141130251e-06, "loss": 1.7891, "step": 82817 }, { "epoch": 2.76, "grad_norm": 0.7388746738433838, "learning_rate": 9.790612790033004e-06, "loss": 1.6894, "step": 82818 }, { "epoch": 2.76, "grad_norm": 0.7218279242515564, "learning_rate": 9.787964521034974e-06, "loss": 1.6984, "step": 82819 }, { "epoch": 2.76, "grad_norm": 0.7046313285827637, "learning_rate": 9.785316604311622e-06, "loss": 1.6481, "step": 82820 }, { "epoch": 2.76, "grad_norm": 1.2926585674285889, "learning_rate": 9.782669039866075e-06, "loss": 1.685, "step": 82821 }, { "epoch": 2.76, "grad_norm": 0.7097110152244568, "learning_rate": 9.780021827701635e-06, "loss": 1.606, "step": 82822 }, { "epoch": 2.76, "grad_norm": 0.7243127822875977, "learning_rate": 9.777374967821527e-06, "loss": 1.7215, "step": 82823 }, { "epoch": 2.76, "grad_norm": 0.7129797339439392, "learning_rate": 9.774728460228853e-06, "loss": 1.7021, "step": 82824 }, { "epoch": 2.76, "grad_norm": 0.702497124671936, "learning_rate": 9.77208230492691e-06, "loss": 1.7285, "step": 82825 }, { "epoch": 2.76, "grad_norm": 0.722210168838501, "learning_rate": 9.769436501918926e-06, "loss": 1.7466, "step": 82826 }, { "epoch": 2.76, "grad_norm": 0.7022456526756287, "learning_rate": 9.7667910512081e-06, "loss": 1.7441, "step": 82827 }, { "epoch": 2.76, "grad_norm": 0.6980343461036682, "learning_rate": 9.764145952797564e-06, "loss": 1.6777, "step": 82828 }, { "epoch": 2.76, "grad_norm": 0.7086322903633118, "learning_rate": 9.761501206690614e-06, "loss": 1.6519, "step": 82829 }, { "epoch": 2.76, "grad_norm": 0.7180923819541931, "learning_rate": 9.758856812890481e-06, "loss": 1.6777, "step": 82830 }, { "epoch": 2.76, "grad_norm": 0.7042718529701233, "learning_rate": 9.756212771400296e-06, "loss": 1.6838, "step": 82831 }, { "epoch": 2.76, "grad_norm": 0.6991161704063416, "learning_rate": 9.753569082223256e-06, "loss": 1.6827, "step": 82832 }, { "epoch": 2.76, "grad_norm": 0.7309134602546692, "learning_rate": 9.750925745362692e-06, "loss": 1.6506, "step": 82833 }, { "epoch": 2.76, "grad_norm": 0.714747965335846, "learning_rate": 9.748282760821736e-06, "loss": 1.676, "step": 82834 }, { "epoch": 2.76, "grad_norm": 0.70648592710495, "learning_rate": 9.745640128603548e-06, "loss": 1.6484, "step": 82835 }, { "epoch": 2.76, "grad_norm": 0.7027732133865356, "learning_rate": 9.742997848711399e-06, "loss": 1.6663, "step": 82836 }, { "epoch": 2.76, "grad_norm": 0.6858403086662292, "learning_rate": 9.740355921148512e-06, "loss": 1.6686, "step": 82837 }, { "epoch": 2.76, "grad_norm": 0.7299485802650452, "learning_rate": 9.737714345918023e-06, "loss": 1.658, "step": 82838 }, { "epoch": 2.76, "grad_norm": 0.699168860912323, "learning_rate": 9.735073123023196e-06, "loss": 1.732, "step": 82839 }, { "epoch": 2.76, "grad_norm": 1.2408729791641235, "learning_rate": 9.73243225246726e-06, "loss": 1.6362, "step": 82840 }, { "epoch": 2.76, "grad_norm": 0.7008796334266663, "learning_rate": 9.729791734253344e-06, "loss": 1.6555, "step": 82841 }, { "epoch": 2.76, "grad_norm": 0.6980916857719421, "learning_rate": 9.727151568384683e-06, "loss": 1.6818, "step": 82842 }, { "epoch": 2.76, "grad_norm": 0.7176350951194763, "learning_rate": 9.72451175486454e-06, "loss": 1.6874, "step": 82843 }, { "epoch": 2.76, "grad_norm": 0.7056499719619751, "learning_rate": 9.721872293696043e-06, "loss": 1.6797, "step": 82844 }, { "epoch": 2.76, "grad_norm": 0.724146842956543, "learning_rate": 9.719233184882392e-06, "loss": 1.7909, "step": 82845 }, { "epoch": 2.76, "grad_norm": 0.760342538356781, "learning_rate": 9.716594428426849e-06, "loss": 1.7318, "step": 82846 }, { "epoch": 2.76, "grad_norm": 0.7089787125587463, "learning_rate": 9.71395602433258e-06, "loss": 1.7072, "step": 82847 }, { "epoch": 2.76, "grad_norm": 0.7355565428733826, "learning_rate": 9.711317972602816e-06, "loss": 1.7004, "step": 82848 }, { "epoch": 2.76, "grad_norm": 0.7025864720344543, "learning_rate": 9.708680273240688e-06, "loss": 1.6616, "step": 82849 }, { "epoch": 2.76, "grad_norm": 0.731194019317627, "learning_rate": 9.706042926249525e-06, "loss": 1.6769, "step": 82850 }, { "epoch": 2.76, "grad_norm": 0.6979905962944031, "learning_rate": 9.703405931632424e-06, "loss": 1.6887, "step": 82851 }, { "epoch": 2.76, "grad_norm": 0.7219939827919006, "learning_rate": 9.700769289392585e-06, "loss": 1.6953, "step": 82852 }, { "epoch": 2.76, "grad_norm": 0.7092081308364868, "learning_rate": 9.69813299953327e-06, "loss": 1.6237, "step": 82853 }, { "epoch": 2.76, "grad_norm": 0.7121174931526184, "learning_rate": 9.69549706205761e-06, "loss": 1.6494, "step": 82854 }, { "epoch": 2.76, "grad_norm": 0.7053989768028259, "learning_rate": 9.692861476968904e-06, "loss": 1.6182, "step": 82855 }, { "epoch": 2.76, "grad_norm": 0.7186353802680969, "learning_rate": 9.69022624427025e-06, "loss": 1.6135, "step": 82856 }, { "epoch": 2.76, "grad_norm": 0.703713595867157, "learning_rate": 9.687591363964874e-06, "loss": 1.6646, "step": 82857 }, { "epoch": 2.76, "grad_norm": 0.7232103943824768, "learning_rate": 9.684956836056013e-06, "loss": 1.6486, "step": 82858 }, { "epoch": 2.76, "grad_norm": 0.69887775182724, "learning_rate": 9.682322660546827e-06, "loss": 1.6836, "step": 82859 }, { "epoch": 2.76, "grad_norm": 0.7174814343452454, "learning_rate": 9.679688837440547e-06, "loss": 1.6468, "step": 82860 }, { "epoch": 2.76, "grad_norm": 0.7078157663345337, "learning_rate": 9.677055366740305e-06, "loss": 1.7066, "step": 82861 }, { "epoch": 2.76, "grad_norm": 0.7118526697158813, "learning_rate": 9.674422248449365e-06, "loss": 1.6521, "step": 82862 }, { "epoch": 2.76, "grad_norm": 0.7043562531471252, "learning_rate": 9.671789482570924e-06, "loss": 1.6158, "step": 82863 }, { "epoch": 2.76, "grad_norm": 0.7071247696876526, "learning_rate": 9.669157069108113e-06, "loss": 1.6942, "step": 82864 }, { "epoch": 2.76, "grad_norm": 0.7361971139907837, "learning_rate": 9.666525008064197e-06, "loss": 1.6645, "step": 82865 }, { "epoch": 2.76, "grad_norm": 0.7141753435134888, "learning_rate": 9.663893299442339e-06, "loss": 1.6751, "step": 82866 }, { "epoch": 2.76, "grad_norm": 0.726444661617279, "learning_rate": 9.661261943245702e-06, "loss": 1.6212, "step": 82867 }, { "epoch": 2.76, "grad_norm": 0.7094851732254028, "learning_rate": 9.658630939477518e-06, "loss": 1.6558, "step": 82868 }, { "epoch": 2.76, "grad_norm": 0.7095547914505005, "learning_rate": 9.656000288140987e-06, "loss": 1.6982, "step": 82869 }, { "epoch": 2.76, "grad_norm": 0.7270528078079224, "learning_rate": 9.653369989239335e-06, "loss": 1.7283, "step": 82870 }, { "epoch": 2.76, "grad_norm": 0.6942812204360962, "learning_rate": 9.650740042775628e-06, "loss": 1.6346, "step": 82871 }, { "epoch": 2.76, "grad_norm": 0.725590705871582, "learning_rate": 9.6481104487532e-06, "loss": 1.6865, "step": 82872 }, { "epoch": 2.76, "grad_norm": 0.7096807956695557, "learning_rate": 9.64548120717521e-06, "loss": 1.6507, "step": 82873 }, { "epoch": 2.76, "grad_norm": 0.7278612852096558, "learning_rate": 9.642852318044759e-06, "loss": 1.6879, "step": 82874 }, { "epoch": 2.76, "grad_norm": 0.7413380146026611, "learning_rate": 9.640223781365108e-06, "loss": 1.6698, "step": 82875 }, { "epoch": 2.76, "grad_norm": 0.709132730960846, "learning_rate": 9.637595597139458e-06, "loss": 1.6497, "step": 82876 }, { "epoch": 2.76, "grad_norm": 0.7387180328369141, "learning_rate": 9.634967765371004e-06, "loss": 1.6192, "step": 82877 }, { "epoch": 2.76, "grad_norm": 0.7372902035713196, "learning_rate": 9.632340286062878e-06, "loss": 1.6323, "step": 82878 }, { "epoch": 2.76, "grad_norm": 0.69376140832901, "learning_rate": 9.629713159218344e-06, "loss": 1.6373, "step": 82879 }, { "epoch": 2.76, "grad_norm": 0.7194430232048035, "learning_rate": 9.627086384840566e-06, "loss": 1.7275, "step": 82880 }, { "epoch": 2.76, "grad_norm": 0.7346826195716858, "learning_rate": 9.624459962932674e-06, "loss": 1.6441, "step": 82881 }, { "epoch": 2.76, "grad_norm": 0.7136324644088745, "learning_rate": 9.621833893497899e-06, "loss": 1.7081, "step": 82882 }, { "epoch": 2.76, "grad_norm": 0.7090991735458374, "learning_rate": 9.619208176539472e-06, "loss": 1.6258, "step": 82883 }, { "epoch": 2.76, "grad_norm": 0.7108893990516663, "learning_rate": 9.616582812060559e-06, "loss": 1.6449, "step": 82884 }, { "epoch": 2.76, "grad_norm": 0.7244471311569214, "learning_rate": 9.613957800064286e-06, "loss": 1.7289, "step": 82885 }, { "epoch": 2.76, "grad_norm": 0.6978837847709656, "learning_rate": 9.61133314055389e-06, "loss": 1.6112, "step": 82886 }, { "epoch": 2.76, "grad_norm": 0.717766284942627, "learning_rate": 9.608708833532564e-06, "loss": 1.6647, "step": 82887 }, { "epoch": 2.76, "grad_norm": 0.7129184007644653, "learning_rate": 9.606084879003439e-06, "loss": 1.7199, "step": 82888 }, { "epoch": 2.76, "grad_norm": 0.7317243814468384, "learning_rate": 9.603461276969749e-06, "loss": 1.6307, "step": 82889 }, { "epoch": 2.76, "grad_norm": 0.7053858637809753, "learning_rate": 9.600838027434721e-06, "loss": 1.6653, "step": 82890 }, { "epoch": 2.76, "grad_norm": 0.7254485487937927, "learning_rate": 9.598215130401454e-06, "loss": 1.7005, "step": 82891 }, { "epoch": 2.76, "grad_norm": 0.7288581132888794, "learning_rate": 9.595592585873147e-06, "loss": 1.767, "step": 82892 }, { "epoch": 2.76, "grad_norm": 0.7215186953544617, "learning_rate": 9.592970393853028e-06, "loss": 1.7377, "step": 82893 }, { "epoch": 2.76, "grad_norm": 0.6881263852119446, "learning_rate": 9.590348554344263e-06, "loss": 1.6016, "step": 82894 }, { "epoch": 2.76, "grad_norm": 0.7062792181968689, "learning_rate": 9.587727067349982e-06, "loss": 1.7144, "step": 82895 }, { "epoch": 2.76, "grad_norm": 0.6862061023712158, "learning_rate": 9.585105932873415e-06, "loss": 1.64, "step": 82896 }, { "epoch": 2.76, "grad_norm": 0.7192707061767578, "learning_rate": 9.582485150917762e-06, "loss": 1.6972, "step": 82897 }, { "epoch": 2.76, "grad_norm": 0.7221808433532715, "learning_rate": 9.579864721486186e-06, "loss": 1.679, "step": 82898 }, { "epoch": 2.76, "grad_norm": 0.7041610479354858, "learning_rate": 9.57724464458185e-06, "loss": 1.667, "step": 82899 }, { "epoch": 2.76, "grad_norm": 0.7008776068687439, "learning_rate": 9.574624920207951e-06, "loss": 1.6631, "step": 82900 }, { "epoch": 2.76, "grad_norm": 0.7332401275634766, "learning_rate": 9.57200554836769e-06, "loss": 1.7653, "step": 82901 }, { "epoch": 2.76, "grad_norm": 0.6976206302642822, "learning_rate": 9.56938652906416e-06, "loss": 1.6566, "step": 82902 }, { "epoch": 2.76, "grad_norm": 0.7515530586242676, "learning_rate": 9.566767862300661e-06, "loss": 1.7592, "step": 82903 }, { "epoch": 2.76, "grad_norm": 0.729732096195221, "learning_rate": 9.564149548080257e-06, "loss": 1.6204, "step": 82904 }, { "epoch": 2.76, "grad_norm": 0.7082822322845459, "learning_rate": 9.561531586406245e-06, "loss": 1.6515, "step": 82905 }, { "epoch": 2.76, "grad_norm": 0.6902265548706055, "learning_rate": 9.55891397728169e-06, "loss": 1.6907, "step": 82906 }, { "epoch": 2.76, "grad_norm": 0.736004114151001, "learning_rate": 9.556296720709855e-06, "loss": 1.6289, "step": 82907 }, { "epoch": 2.76, "grad_norm": 0.726329505443573, "learning_rate": 9.553679816693871e-06, "loss": 1.6361, "step": 82908 }, { "epoch": 2.76, "grad_norm": 0.6928830146789551, "learning_rate": 9.551063265236902e-06, "loss": 1.6351, "step": 82909 }, { "epoch": 2.76, "grad_norm": 0.7039874196052551, "learning_rate": 9.54844706634218e-06, "loss": 1.6341, "step": 82910 }, { "epoch": 2.76, "grad_norm": 0.7287660837173462, "learning_rate": 9.5458312200128e-06, "loss": 1.6873, "step": 82911 }, { "epoch": 2.76, "grad_norm": 0.7121075987815857, "learning_rate": 9.543215726252029e-06, "loss": 1.6169, "step": 82912 }, { "epoch": 2.76, "grad_norm": 0.7060375213623047, "learning_rate": 9.540600585062997e-06, "loss": 1.679, "step": 82913 }, { "epoch": 2.76, "grad_norm": 0.7313423752784729, "learning_rate": 9.537985796448832e-06, "loss": 1.6873, "step": 82914 }, { "epoch": 2.76, "grad_norm": 0.7439548969268799, "learning_rate": 9.535371360412803e-06, "loss": 1.6445, "step": 82915 }, { "epoch": 2.76, "grad_norm": 0.7113519906997681, "learning_rate": 9.532757276958004e-06, "loss": 1.6406, "step": 82916 }, { "epoch": 2.76, "grad_norm": 0.6998938918113708, "learning_rate": 9.530143546087665e-06, "loss": 1.6858, "step": 82917 }, { "epoch": 2.76, "grad_norm": 0.7221847176551819, "learning_rate": 9.527530167804886e-06, "loss": 1.7495, "step": 82918 }, { "epoch": 2.76, "grad_norm": 0.7258014678955078, "learning_rate": 9.524917142112964e-06, "loss": 1.6682, "step": 82919 }, { "epoch": 2.76, "grad_norm": 0.7129398584365845, "learning_rate": 9.522304469014963e-06, "loss": 1.6615, "step": 82920 }, { "epoch": 2.76, "grad_norm": 0.7089285254478455, "learning_rate": 9.519692148514047e-06, "loss": 1.6793, "step": 82921 }, { "epoch": 2.76, "grad_norm": 0.7063125967979431, "learning_rate": 9.517080180613446e-06, "loss": 1.5481, "step": 82922 }, { "epoch": 2.76, "grad_norm": 0.7255043387413025, "learning_rate": 9.514468565316324e-06, "loss": 1.7042, "step": 82923 }, { "epoch": 2.76, "grad_norm": 0.7056679129600525, "learning_rate": 9.511857302625814e-06, "loss": 1.6268, "step": 82924 }, { "epoch": 2.76, "grad_norm": 0.7042158246040344, "learning_rate": 9.509246392545112e-06, "loss": 1.6459, "step": 82925 }, { "epoch": 2.76, "grad_norm": 0.6995558738708496, "learning_rate": 9.506635835077415e-06, "loss": 1.656, "step": 82926 }, { "epoch": 2.76, "grad_norm": 0.7218362092971802, "learning_rate": 9.50402563022582e-06, "loss": 1.5929, "step": 82927 }, { "epoch": 2.76, "grad_norm": 0.7094429135322571, "learning_rate": 9.501415777993526e-06, "loss": 1.7003, "step": 82928 }, { "epoch": 2.76, "grad_norm": 0.7246476411819458, "learning_rate": 9.498806278383764e-06, "loss": 1.6846, "step": 82929 }, { "epoch": 2.76, "grad_norm": 0.7144389152526855, "learning_rate": 9.49619713139963e-06, "loss": 1.5989, "step": 82930 }, { "epoch": 2.76, "grad_norm": 0.701179563999176, "learning_rate": 9.493588337044288e-06, "loss": 1.6973, "step": 82931 }, { "epoch": 2.76, "grad_norm": 0.7065796256065369, "learning_rate": 9.490979895320905e-06, "loss": 1.6946, "step": 82932 }, { "epoch": 2.76, "grad_norm": 0.725121796131134, "learning_rate": 9.488371806232742e-06, "loss": 1.6474, "step": 82933 }, { "epoch": 2.76, "grad_norm": 0.7037016153335571, "learning_rate": 9.485764069782831e-06, "loss": 1.6855, "step": 82934 }, { "epoch": 2.76, "grad_norm": 0.7020528316497803, "learning_rate": 9.483156685974402e-06, "loss": 1.6668, "step": 82935 }, { "epoch": 2.76, "grad_norm": 0.7204936742782593, "learning_rate": 9.480549654810622e-06, "loss": 1.7039, "step": 82936 }, { "epoch": 2.76, "grad_norm": 0.7246783375740051, "learning_rate": 9.477942976294684e-06, "loss": 1.6673, "step": 82937 }, { "epoch": 2.76, "grad_norm": 0.7203252911567688, "learning_rate": 9.475336650429655e-06, "loss": 1.6887, "step": 82938 }, { "epoch": 2.76, "grad_norm": 0.729670524597168, "learning_rate": 9.472730677218764e-06, "loss": 1.7129, "step": 82939 }, { "epoch": 2.76, "grad_norm": 0.7077839374542236, "learning_rate": 9.470125056665245e-06, "loss": 1.668, "step": 82940 }, { "epoch": 2.76, "grad_norm": 0.6934519410133362, "learning_rate": 9.467519788772126e-06, "loss": 1.7007, "step": 82941 }, { "epoch": 2.76, "grad_norm": 0.6993163824081421, "learning_rate": 9.464914873542639e-06, "loss": 1.6867, "step": 82942 }, { "epoch": 2.76, "grad_norm": 0.708788275718689, "learning_rate": 9.462310310979949e-06, "loss": 1.6551, "step": 82943 }, { "epoch": 2.76, "grad_norm": 0.741722047328949, "learning_rate": 9.459706101087216e-06, "loss": 1.6896, "step": 82944 }, { "epoch": 2.76, "grad_norm": 0.7149727940559387, "learning_rate": 9.457102243867543e-06, "loss": 1.67, "step": 82945 }, { "epoch": 2.76, "grad_norm": 0.7283080816268921, "learning_rate": 9.454498739324157e-06, "loss": 1.6915, "step": 82946 }, { "epoch": 2.76, "grad_norm": 0.6889210343360901, "learning_rate": 9.451895587460256e-06, "loss": 1.5726, "step": 82947 }, { "epoch": 2.76, "grad_norm": 0.7201153039932251, "learning_rate": 9.449292788278872e-06, "loss": 1.7315, "step": 82948 }, { "epoch": 2.76, "grad_norm": 0.7126711010932922, "learning_rate": 9.446690341783237e-06, "loss": 1.6667, "step": 82949 }, { "epoch": 2.76, "grad_norm": 0.6958860754966736, "learning_rate": 9.444088247976544e-06, "loss": 1.6881, "step": 82950 }, { "epoch": 2.76, "grad_norm": 0.7213292121887207, "learning_rate": 9.441486506861928e-06, "loss": 1.6996, "step": 82951 }, { "epoch": 2.76, "grad_norm": 0.7074424624443054, "learning_rate": 9.438885118442485e-06, "loss": 1.6342, "step": 82952 }, { "epoch": 2.76, "grad_norm": 0.7162238955497742, "learning_rate": 9.43628408272148e-06, "loss": 1.7448, "step": 82953 }, { "epoch": 2.76, "grad_norm": 0.7298884987831116, "learning_rate": 9.433683399702008e-06, "loss": 1.7746, "step": 82954 }, { "epoch": 2.76, "grad_norm": 0.6952947378158569, "learning_rate": 9.431083069387169e-06, "loss": 1.666, "step": 82955 }, { "epoch": 2.76, "grad_norm": 0.7202285528182983, "learning_rate": 9.428483091780226e-06, "loss": 1.6794, "step": 82956 }, { "epoch": 2.76, "grad_norm": 0.7161224484443665, "learning_rate": 9.425883466884277e-06, "loss": 1.6673, "step": 82957 }, { "epoch": 2.76, "grad_norm": 0.7061373591423035, "learning_rate": 9.423284194702518e-06, "loss": 1.7077, "step": 82958 }, { "epoch": 2.76, "grad_norm": 0.7221405506134033, "learning_rate": 9.420685275238049e-06, "loss": 1.7473, "step": 82959 }, { "epoch": 2.76, "grad_norm": 0.7090722918510437, "learning_rate": 9.418086708494065e-06, "loss": 1.6887, "step": 82960 }, { "epoch": 2.76, "grad_norm": 0.6978168487548828, "learning_rate": 9.415488494473734e-06, "loss": 1.6687, "step": 82961 }, { "epoch": 2.76, "grad_norm": 0.7362101078033447, "learning_rate": 9.412890633180147e-06, "loss": 1.6313, "step": 82962 }, { "epoch": 2.76, "grad_norm": 0.7084316611289978, "learning_rate": 9.410293124616508e-06, "loss": 1.6785, "step": 82963 }, { "epoch": 2.76, "grad_norm": 0.7041078805923462, "learning_rate": 9.407695968785944e-06, "loss": 1.6938, "step": 82964 }, { "epoch": 2.76, "grad_norm": 0.7657623291015625, "learning_rate": 9.405099165691653e-06, "loss": 1.7386, "step": 82965 }, { "epoch": 2.76, "grad_norm": 0.7349745631217957, "learning_rate": 9.402502715336702e-06, "loss": 1.6608, "step": 82966 }, { "epoch": 2.76, "grad_norm": 0.7196682691574097, "learning_rate": 9.399906617724351e-06, "loss": 1.5939, "step": 82967 }, { "epoch": 2.76, "grad_norm": 0.7042746543884277, "learning_rate": 9.397310872857667e-06, "loss": 1.7318, "step": 82968 }, { "epoch": 2.76, "grad_norm": 0.6970263123512268, "learning_rate": 9.394715480739812e-06, "loss": 1.6737, "step": 82969 }, { "epoch": 2.76, "grad_norm": 0.7151143550872803, "learning_rate": 9.392120441373986e-06, "loss": 1.6363, "step": 82970 }, { "epoch": 2.76, "grad_norm": 0.725534200668335, "learning_rate": 9.389525754763282e-06, "loss": 1.7268, "step": 82971 }, { "epoch": 2.76, "grad_norm": 0.7436407804489136, "learning_rate": 9.386931420910903e-06, "loss": 1.6869, "step": 82972 }, { "epoch": 2.76, "grad_norm": 0.7134243845939636, "learning_rate": 9.384337439819978e-06, "loss": 1.6614, "step": 82973 }, { "epoch": 2.76, "grad_norm": 0.7055131793022156, "learning_rate": 9.381743811493603e-06, "loss": 1.6894, "step": 82974 }, { "epoch": 2.76, "grad_norm": 0.7148416042327881, "learning_rate": 9.37915053593501e-06, "loss": 1.665, "step": 82975 }, { "epoch": 2.76, "grad_norm": 0.7199628353118896, "learning_rate": 9.376557613147262e-06, "loss": 1.6971, "step": 82976 }, { "epoch": 2.76, "grad_norm": 0.7142453789710999, "learning_rate": 9.37396504313359e-06, "loss": 1.653, "step": 82977 }, { "epoch": 2.76, "grad_norm": 0.7153474688529968, "learning_rate": 9.371372825897061e-06, "loss": 1.6167, "step": 82978 }, { "epoch": 2.76, "grad_norm": 0.7275795936584473, "learning_rate": 9.368780961440903e-06, "loss": 1.69, "step": 82979 }, { "epoch": 2.76, "grad_norm": 0.7123138904571533, "learning_rate": 9.366189449768247e-06, "loss": 1.672, "step": 82980 }, { "epoch": 2.76, "grad_norm": 0.7251976728439331, "learning_rate": 9.363598290882157e-06, "loss": 1.7455, "step": 82981 }, { "epoch": 2.76, "grad_norm": 0.735041618347168, "learning_rate": 9.361007484785865e-06, "loss": 1.7038, "step": 82982 }, { "epoch": 2.76, "grad_norm": 0.7163933515548706, "learning_rate": 9.358417031482502e-06, "loss": 1.7259, "step": 82983 }, { "epoch": 2.76, "grad_norm": 0.7082991003990173, "learning_rate": 9.35582693097513e-06, "loss": 1.6343, "step": 82984 }, { "epoch": 2.76, "grad_norm": 0.713996171951294, "learning_rate": 9.353237183267015e-06, "loss": 1.6479, "step": 82985 }, { "epoch": 2.76, "grad_norm": 0.7114118933677673, "learning_rate": 9.350647788361221e-06, "loss": 1.6256, "step": 82986 }, { "epoch": 2.76, "grad_norm": 0.6978142261505127, "learning_rate": 9.348058746260944e-06, "loss": 1.6283, "step": 82987 }, { "epoch": 2.76, "grad_norm": 0.7309523224830627, "learning_rate": 9.345470056969285e-06, "loss": 1.6726, "step": 82988 }, { "epoch": 2.76, "grad_norm": 0.7001777291297913, "learning_rate": 9.342881720489404e-06, "loss": 1.667, "step": 82989 }, { "epoch": 2.76, "grad_norm": 0.7185677289962769, "learning_rate": 9.340293736824434e-06, "loss": 1.7387, "step": 82990 }, { "epoch": 2.76, "grad_norm": 0.7037650346755981, "learning_rate": 9.337706105977505e-06, "loss": 1.6465, "step": 82991 }, { "epoch": 2.76, "grad_norm": 0.7069664001464844, "learning_rate": 9.335118827951749e-06, "loss": 1.699, "step": 82992 }, { "epoch": 2.76, "grad_norm": 0.6876044869422913, "learning_rate": 9.332531902750396e-06, "loss": 1.6763, "step": 82993 }, { "epoch": 2.76, "grad_norm": 0.7176878452301025, "learning_rate": 9.329945330376509e-06, "loss": 1.5994, "step": 82994 }, { "epoch": 2.76, "grad_norm": 0.7033838629722595, "learning_rate": 9.327359110833222e-06, "loss": 1.6857, "step": 82995 }, { "epoch": 2.76, "grad_norm": 0.7171454429626465, "learning_rate": 9.324773244123695e-06, "loss": 1.6882, "step": 82996 }, { "epoch": 2.76, "grad_norm": 0.7163400650024414, "learning_rate": 9.322187730251063e-06, "loss": 1.6563, "step": 82997 }, { "epoch": 2.76, "grad_norm": 0.7112480401992798, "learning_rate": 9.319602569218453e-06, "loss": 1.6381, "step": 82998 }, { "epoch": 2.76, "grad_norm": 0.7160024046897888, "learning_rate": 9.317017761029032e-06, "loss": 1.6267, "step": 82999 }, { "epoch": 2.76, "grad_norm": 0.7219257950782776, "learning_rate": 9.31443330568593e-06, "loss": 1.7007, "step": 83000 }, { "epoch": 2.76, "grad_norm": 0.7404322028160095, "learning_rate": 9.31184920319228e-06, "loss": 1.6707, "step": 83001 }, { "epoch": 2.76, "grad_norm": 0.7065301537513733, "learning_rate": 9.309265453551173e-06, "loss": 1.7285, "step": 83002 }, { "epoch": 2.76, "grad_norm": 0.7013888359069824, "learning_rate": 9.306682056765813e-06, "loss": 1.6414, "step": 83003 }, { "epoch": 2.76, "grad_norm": 0.720853865146637, "learning_rate": 9.304099012839327e-06, "loss": 1.6271, "step": 83004 }, { "epoch": 2.76, "grad_norm": 0.6939980983734131, "learning_rate": 9.301516321774816e-06, "loss": 1.685, "step": 83005 }, { "epoch": 2.76, "grad_norm": 0.7174373865127563, "learning_rate": 9.298933983575407e-06, "loss": 1.7992, "step": 83006 }, { "epoch": 2.76, "grad_norm": 0.7225676774978638, "learning_rate": 9.2963519982443e-06, "loss": 1.6759, "step": 83007 }, { "epoch": 2.76, "grad_norm": 0.7147395610809326, "learning_rate": 9.293770365784558e-06, "loss": 1.7239, "step": 83008 }, { "epoch": 2.76, "grad_norm": 0.6970053315162659, "learning_rate": 9.291189086199346e-06, "loss": 1.6972, "step": 83009 }, { "epoch": 2.76, "grad_norm": 0.7061847448348999, "learning_rate": 9.288608159491828e-06, "loss": 1.6851, "step": 83010 }, { "epoch": 2.76, "grad_norm": 0.7051730155944824, "learning_rate": 9.286027585665101e-06, "loss": 1.7524, "step": 83011 }, { "epoch": 2.76, "grad_norm": 0.7267090678215027, "learning_rate": 9.283447364722264e-06, "loss": 1.7195, "step": 83012 }, { "epoch": 2.76, "grad_norm": 0.7275515794754028, "learning_rate": 9.280867496666511e-06, "loss": 1.7096, "step": 83013 }, { "epoch": 2.76, "grad_norm": 0.7185541987419128, "learning_rate": 9.278287981500942e-06, "loss": 1.6641, "step": 83014 }, { "epoch": 2.76, "grad_norm": 0.7066031694412231, "learning_rate": 9.275708819228722e-06, "loss": 1.6979, "step": 83015 }, { "epoch": 2.76, "grad_norm": 0.748089075088501, "learning_rate": 9.273130009852914e-06, "loss": 1.6371, "step": 83016 }, { "epoch": 2.76, "grad_norm": 0.7233396172523499, "learning_rate": 9.270551553376714e-06, "loss": 1.6774, "step": 83017 }, { "epoch": 2.76, "grad_norm": 0.6998267769813538, "learning_rate": 9.267973449803256e-06, "loss": 1.5901, "step": 83018 }, { "epoch": 2.76, "grad_norm": 0.7114754319190979, "learning_rate": 9.265395699135603e-06, "loss": 1.6588, "step": 83019 }, { "epoch": 2.76, "grad_norm": 0.6966445446014404, "learning_rate": 9.262818301376918e-06, "loss": 1.606, "step": 83020 }, { "epoch": 2.76, "grad_norm": 0.7493875622749329, "learning_rate": 9.260241256530332e-06, "loss": 1.6516, "step": 83021 }, { "epoch": 2.76, "grad_norm": 0.696977436542511, "learning_rate": 9.25766456459901e-06, "loss": 1.6149, "step": 83022 }, { "epoch": 2.76, "grad_norm": 0.7485288381576538, "learning_rate": 9.255088225585983e-06, "loss": 1.709, "step": 83023 }, { "epoch": 2.76, "grad_norm": 0.7190408706665039, "learning_rate": 9.252512239494515e-06, "loss": 1.7219, "step": 83024 }, { "epoch": 2.76, "grad_norm": 0.8343926072120667, "learning_rate": 9.249936606327635e-06, "loss": 1.6706, "step": 83025 }, { "epoch": 2.76, "grad_norm": 0.7234992980957031, "learning_rate": 9.247361326088442e-06, "loss": 1.6591, "step": 83026 }, { "epoch": 2.76, "grad_norm": 0.7201161980628967, "learning_rate": 9.244786398780169e-06, "loss": 1.6607, "step": 83027 }, { "epoch": 2.76, "grad_norm": 0.7303521633148193, "learning_rate": 9.242211824405842e-06, "loss": 1.6861, "step": 83028 }, { "epoch": 2.76, "grad_norm": 0.716222882270813, "learning_rate": 9.239637602968664e-06, "loss": 1.7292, "step": 83029 }, { "epoch": 2.76, "grad_norm": 0.7122376561164856, "learning_rate": 9.237063734471729e-06, "loss": 1.6984, "step": 83030 }, { "epoch": 2.76, "grad_norm": 0.7242261171340942, "learning_rate": 9.2344902189181e-06, "loss": 1.6841, "step": 83031 }, { "epoch": 2.76, "grad_norm": 0.7378299832344055, "learning_rate": 9.231917056311012e-06, "loss": 1.6708, "step": 83032 }, { "epoch": 2.76, "grad_norm": 0.7192333936691284, "learning_rate": 9.229344246653492e-06, "loss": 1.6456, "step": 83033 }, { "epoch": 2.76, "grad_norm": 0.7208638787269592, "learning_rate": 9.226771789948739e-06, "loss": 1.6584, "step": 83034 }, { "epoch": 2.76, "grad_norm": 0.7069573998451233, "learning_rate": 9.224199686199818e-06, "loss": 1.7189, "step": 83035 }, { "epoch": 2.76, "grad_norm": 0.7206384539604187, "learning_rate": 9.221627935409892e-06, "loss": 1.6167, "step": 83036 }, { "epoch": 2.76, "grad_norm": 0.7393149733543396, "learning_rate": 9.219056537582092e-06, "loss": 1.678, "step": 83037 }, { "epoch": 2.76, "grad_norm": 0.7074559926986694, "learning_rate": 9.21648549271945e-06, "loss": 1.6271, "step": 83038 }, { "epoch": 2.76, "grad_norm": 0.7112521529197693, "learning_rate": 9.213914800825228e-06, "loss": 1.6373, "step": 83039 }, { "epoch": 2.76, "grad_norm": 0.7086647748947144, "learning_rate": 9.211344461902426e-06, "loss": 1.6326, "step": 83040 }, { "epoch": 2.76, "grad_norm": 0.7298330664634705, "learning_rate": 9.208774475954172e-06, "loss": 1.7665, "step": 83041 }, { "epoch": 2.76, "grad_norm": 0.7229004502296448, "learning_rate": 9.206204842983633e-06, "loss": 1.6898, "step": 83042 }, { "epoch": 2.76, "grad_norm": 0.7135365009307861, "learning_rate": 9.203635562993938e-06, "loss": 1.7094, "step": 83043 }, { "epoch": 2.76, "grad_norm": 0.7316967844963074, "learning_rate": 9.20106663598822e-06, "loss": 1.7259, "step": 83044 }, { "epoch": 2.76, "grad_norm": 0.7046305537223816, "learning_rate": 9.198498061969472e-06, "loss": 1.7168, "step": 83045 }, { "epoch": 2.76, "grad_norm": 0.7147979736328125, "learning_rate": 9.195929840940963e-06, "loss": 1.6717, "step": 83046 }, { "epoch": 2.76, "grad_norm": 0.7428495287895203, "learning_rate": 9.193361972905756e-06, "loss": 1.7672, "step": 83047 }, { "epoch": 2.76, "grad_norm": 0.7440834641456604, "learning_rate": 9.190794457866913e-06, "loss": 1.7398, "step": 83048 }, { "epoch": 2.76, "grad_norm": 0.6960038542747498, "learning_rate": 9.188227295827599e-06, "loss": 1.6522, "step": 83049 }, { "epoch": 2.76, "grad_norm": 0.7539088726043701, "learning_rate": 9.18566048679098e-06, "loss": 1.7144, "step": 83050 }, { "epoch": 2.76, "grad_norm": 0.7316123247146606, "learning_rate": 9.183094030760085e-06, "loss": 1.665, "step": 83051 }, { "epoch": 2.76, "grad_norm": 0.7291595935821533, "learning_rate": 9.180527927738047e-06, "loss": 1.669, "step": 83052 }, { "epoch": 2.76, "grad_norm": 0.7381641864776611, "learning_rate": 9.177962177728026e-06, "loss": 1.7273, "step": 83053 }, { "epoch": 2.76, "grad_norm": 0.7098169326782227, "learning_rate": 9.175396780733091e-06, "loss": 1.5988, "step": 83054 }, { "epoch": 2.76, "grad_norm": 0.7001732587814331, "learning_rate": 9.17283173675637e-06, "loss": 1.6718, "step": 83055 }, { "epoch": 2.76, "grad_norm": 0.7188848853111267, "learning_rate": 9.170267045800961e-06, "loss": 1.624, "step": 83056 }, { "epoch": 2.76, "grad_norm": 0.7458062767982483, "learning_rate": 9.167702707870095e-06, "loss": 1.6976, "step": 83057 }, { "epoch": 2.76, "grad_norm": 0.7199493646621704, "learning_rate": 9.16513872296667e-06, "loss": 1.6591, "step": 83058 }, { "epoch": 2.76, "grad_norm": 0.6929998397827148, "learning_rate": 9.16257509109395e-06, "loss": 1.7012, "step": 83059 }, { "epoch": 2.76, "grad_norm": 0.7500736117362976, "learning_rate": 9.16001181225503e-06, "loss": 1.7913, "step": 83060 }, { "epoch": 2.76, "grad_norm": 0.7219148874282837, "learning_rate": 9.157448886453011e-06, "loss": 1.6498, "step": 83061 }, { "epoch": 2.76, "grad_norm": 0.7304379343986511, "learning_rate": 9.154886313690957e-06, "loss": 1.7109, "step": 83062 }, { "epoch": 2.76, "grad_norm": 0.7137221097946167, "learning_rate": 9.15232409397203e-06, "loss": 1.7139, "step": 83063 }, { "epoch": 2.76, "grad_norm": 0.7211705446243286, "learning_rate": 9.149762227299361e-06, "loss": 1.6945, "step": 83064 }, { "epoch": 2.76, "grad_norm": 0.6942843198776245, "learning_rate": 9.147200713675985e-06, "loss": 1.6719, "step": 83065 }, { "epoch": 2.76, "grad_norm": 0.6858363747596741, "learning_rate": 9.14463955310506e-06, "loss": 1.6901, "step": 83066 }, { "epoch": 2.76, "grad_norm": 0.7183582782745361, "learning_rate": 9.142078745589688e-06, "loss": 1.6082, "step": 83067 }, { "epoch": 2.76, "grad_norm": 0.7209336757659912, "learning_rate": 9.139518291132997e-06, "loss": 1.6903, "step": 83068 }, { "epoch": 2.76, "grad_norm": 0.6910830140113831, "learning_rate": 9.136958189738053e-06, "loss": 1.6613, "step": 83069 }, { "epoch": 2.76, "grad_norm": 0.7145401835441589, "learning_rate": 9.134398441407986e-06, "loss": 1.6613, "step": 83070 }, { "epoch": 2.76, "grad_norm": 0.7067335247993469, "learning_rate": 9.131839046145928e-06, "loss": 1.7056, "step": 83071 }, { "epoch": 2.76, "grad_norm": 0.718444287776947, "learning_rate": 9.129280003954942e-06, "loss": 1.7009, "step": 83072 }, { "epoch": 2.76, "grad_norm": 0.720137357711792, "learning_rate": 9.126721314838126e-06, "loss": 1.6968, "step": 83073 }, { "epoch": 2.76, "grad_norm": 0.7252439856529236, "learning_rate": 9.124162978798644e-06, "loss": 1.6969, "step": 83074 }, { "epoch": 2.76, "grad_norm": 0.7313620448112488, "learning_rate": 9.12160499583956e-06, "loss": 1.6314, "step": 83075 }, { "epoch": 2.76, "grad_norm": 0.7131187319755554, "learning_rate": 9.119047365963972e-06, "loss": 1.6629, "step": 83076 }, { "epoch": 2.76, "grad_norm": 0.711037278175354, "learning_rate": 9.116490089175043e-06, "loss": 1.6733, "step": 83077 }, { "epoch": 2.76, "grad_norm": 0.7085496187210083, "learning_rate": 9.11393316547584e-06, "loss": 1.6606, "step": 83078 }, { "epoch": 2.76, "grad_norm": 0.7139533162117004, "learning_rate": 9.11137659486939e-06, "loss": 1.6956, "step": 83079 }, { "epoch": 2.76, "grad_norm": 0.6984603404998779, "learning_rate": 9.108820377358927e-06, "loss": 1.6684, "step": 83080 }, { "epoch": 2.76, "grad_norm": 0.6977020502090454, "learning_rate": 9.10626451294748e-06, "loss": 1.7117, "step": 83081 }, { "epoch": 2.76, "grad_norm": 0.6923685073852539, "learning_rate": 9.103709001638182e-06, "loss": 1.6673, "step": 83082 }, { "epoch": 2.76, "grad_norm": 0.72508305311203, "learning_rate": 9.101153843434094e-06, "loss": 1.6543, "step": 83083 }, { "epoch": 2.76, "grad_norm": 0.6963637471199036, "learning_rate": 9.09859903833835e-06, "loss": 1.6622, "step": 83084 }, { "epoch": 2.76, "grad_norm": 0.7309736013412476, "learning_rate": 9.096044586354078e-06, "loss": 1.6654, "step": 83085 }, { "epoch": 2.76, "grad_norm": 0.6999434232711792, "learning_rate": 9.093490487484278e-06, "loss": 1.6387, "step": 83086 }, { "epoch": 2.76, "grad_norm": 0.7127120494842529, "learning_rate": 9.09093674173218e-06, "loss": 1.6989, "step": 83087 }, { "epoch": 2.76, "grad_norm": 0.7364698648452759, "learning_rate": 9.088383349100781e-06, "loss": 1.636, "step": 83088 }, { "epoch": 2.76, "grad_norm": 0.7366445064544678, "learning_rate": 9.085830309593245e-06, "loss": 1.7051, "step": 83089 }, { "epoch": 2.76, "grad_norm": 0.7041568160057068, "learning_rate": 9.08327762321267e-06, "loss": 1.6877, "step": 83090 }, { "epoch": 2.76, "grad_norm": 0.7081766128540039, "learning_rate": 9.080725289962054e-06, "loss": 1.6917, "step": 83091 }, { "epoch": 2.76, "grad_norm": 0.7115280628204346, "learning_rate": 9.078173309844627e-06, "loss": 1.7947, "step": 83092 }, { "epoch": 2.76, "grad_norm": 0.7192825078964233, "learning_rate": 9.075621682863421e-06, "loss": 1.7016, "step": 83093 }, { "epoch": 2.76, "grad_norm": 0.7063245177268982, "learning_rate": 9.073070409021566e-06, "loss": 1.7273, "step": 83094 }, { "epoch": 2.76, "grad_norm": 0.7130308151245117, "learning_rate": 9.070519488322093e-06, "loss": 1.7344, "step": 83095 }, { "epoch": 2.76, "grad_norm": 0.706150233745575, "learning_rate": 9.067968920768198e-06, "loss": 1.6814, "step": 83096 }, { "epoch": 2.76, "grad_norm": 0.7104725241661072, "learning_rate": 9.065418706362881e-06, "loss": 1.6297, "step": 83097 }, { "epoch": 2.76, "grad_norm": 0.735698938369751, "learning_rate": 9.062868845109273e-06, "loss": 1.6668, "step": 83098 }, { "epoch": 2.76, "grad_norm": 0.7248831391334534, "learning_rate": 9.060319337010502e-06, "loss": 1.6612, "step": 83099 }, { "epoch": 2.76, "grad_norm": 0.7047684192657471, "learning_rate": 9.057770182069667e-06, "loss": 1.668, "step": 83100 }, { "epoch": 2.76, "grad_norm": 0.7184861302375793, "learning_rate": 9.055221380289768e-06, "loss": 1.6845, "step": 83101 }, { "epoch": 2.76, "grad_norm": 0.7146840691566467, "learning_rate": 9.052672931673965e-06, "loss": 1.6961, "step": 83102 }, { "epoch": 2.76, "grad_norm": 0.7397112250328064, "learning_rate": 9.050124836225358e-06, "loss": 1.6691, "step": 83103 }, { "epoch": 2.76, "grad_norm": 0.7167718410491943, "learning_rate": 9.047577093947045e-06, "loss": 1.7194, "step": 83104 }, { "epoch": 2.76, "grad_norm": 0.688812792301178, "learning_rate": 9.045029704842089e-06, "loss": 1.6299, "step": 83105 }, { "epoch": 2.76, "grad_norm": 0.7023512125015259, "learning_rate": 9.04248266891362e-06, "loss": 1.6568, "step": 83106 }, { "epoch": 2.76, "grad_norm": 0.7469486594200134, "learning_rate": 9.039935986164704e-06, "loss": 1.638, "step": 83107 }, { "epoch": 2.77, "grad_norm": 0.7123900055885315, "learning_rate": 9.037389656598403e-06, "loss": 1.7459, "step": 83108 }, { "epoch": 2.77, "grad_norm": 0.7146784067153931, "learning_rate": 9.034843680217851e-06, "loss": 1.6861, "step": 83109 }, { "epoch": 2.77, "grad_norm": 0.7120358943939209, "learning_rate": 9.032298057026144e-06, "loss": 1.6857, "step": 83110 }, { "epoch": 2.77, "grad_norm": 0.6990190148353577, "learning_rate": 9.029752787026345e-06, "loss": 1.5887, "step": 83111 }, { "epoch": 2.77, "grad_norm": 0.7534798979759216, "learning_rate": 9.02720787022152e-06, "loss": 1.6845, "step": 83112 }, { "epoch": 2.77, "grad_norm": 0.7325466871261597, "learning_rate": 9.024663306614865e-06, "loss": 1.6601, "step": 83113 }, { "epoch": 2.77, "grad_norm": 0.7188360095024109, "learning_rate": 9.022119096209346e-06, "loss": 1.7259, "step": 83114 }, { "epoch": 2.77, "grad_norm": 0.7200323343276978, "learning_rate": 9.019575239008092e-06, "loss": 1.7302, "step": 83115 }, { "epoch": 2.77, "grad_norm": 0.7141616344451904, "learning_rate": 9.017031735014202e-06, "loss": 1.6839, "step": 83116 }, { "epoch": 2.77, "grad_norm": 0.746334969997406, "learning_rate": 9.014488584230806e-06, "loss": 1.7078, "step": 83117 }, { "epoch": 2.77, "grad_norm": 0.7205361127853394, "learning_rate": 9.011945786660934e-06, "loss": 1.6678, "step": 83118 }, { "epoch": 2.77, "grad_norm": 0.6939194798469543, "learning_rate": 9.009403342307653e-06, "loss": 1.6983, "step": 83119 }, { "epoch": 2.77, "grad_norm": 0.742615282535553, "learning_rate": 9.006861251174092e-06, "loss": 1.6604, "step": 83120 }, { "epoch": 2.77, "grad_norm": 0.7514804601669312, "learning_rate": 9.004319513263348e-06, "loss": 1.6814, "step": 83121 }, { "epoch": 2.77, "grad_norm": 0.7505360245704651, "learning_rate": 9.001778128578452e-06, "loss": 1.6666, "step": 83122 }, { "epoch": 2.77, "grad_norm": 0.7232410311698914, "learning_rate": 8.999237097122502e-06, "loss": 1.6571, "step": 83123 }, { "epoch": 2.77, "grad_norm": 0.7131702899932861, "learning_rate": 8.996696418898664e-06, "loss": 1.6891, "step": 83124 }, { "epoch": 2.77, "grad_norm": 0.6863385438919067, "learning_rate": 8.994156093909932e-06, "loss": 1.615, "step": 83125 }, { "epoch": 2.77, "grad_norm": 0.7061020135879517, "learning_rate": 8.991616122159406e-06, "loss": 1.6513, "step": 83126 }, { "epoch": 2.77, "grad_norm": 0.7105209231376648, "learning_rate": 8.989076503650183e-06, "loss": 1.7198, "step": 83127 }, { "epoch": 2.77, "grad_norm": 0.7379095554351807, "learning_rate": 8.98653723838536e-06, "loss": 1.6158, "step": 83128 }, { "epoch": 2.77, "grad_norm": 0.7350775599479675, "learning_rate": 8.98399832636797e-06, "loss": 1.6947, "step": 83129 }, { "epoch": 2.77, "grad_norm": 0.693109929561615, "learning_rate": 8.981459767601174e-06, "loss": 1.6383, "step": 83130 }, { "epoch": 2.77, "grad_norm": 0.7295911312103271, "learning_rate": 8.978921562087971e-06, "loss": 1.6797, "step": 83131 }, { "epoch": 2.77, "grad_norm": 0.7269630432128906, "learning_rate": 8.976383709831492e-06, "loss": 1.6541, "step": 83132 }, { "epoch": 2.77, "grad_norm": 0.7011561393737793, "learning_rate": 8.973846210834767e-06, "loss": 1.6158, "step": 83133 }, { "epoch": 2.77, "grad_norm": 0.7161046862602234, "learning_rate": 8.971309065100963e-06, "loss": 1.699, "step": 83134 }, { "epoch": 2.77, "grad_norm": 0.6994796991348267, "learning_rate": 8.968772272633107e-06, "loss": 1.6835, "step": 83135 }, { "epoch": 2.77, "grad_norm": 0.6978858709335327, "learning_rate": 8.966235833434233e-06, "loss": 1.6931, "step": 83136 }, { "epoch": 2.77, "grad_norm": 0.7230271697044373, "learning_rate": 8.963699747507502e-06, "loss": 1.8058, "step": 83137 }, { "epoch": 2.77, "grad_norm": 0.7476041913032532, "learning_rate": 8.961164014855916e-06, "loss": 1.7405, "step": 83138 }, { "epoch": 2.77, "grad_norm": 0.7527039051055908, "learning_rate": 8.958628635482668e-06, "loss": 1.6921, "step": 83139 }, { "epoch": 2.77, "grad_norm": 0.7025770545005798, "learning_rate": 8.956093609390691e-06, "loss": 1.6406, "step": 83140 }, { "epoch": 2.77, "grad_norm": 0.7052412033081055, "learning_rate": 8.953558936583183e-06, "loss": 1.7219, "step": 83141 }, { "epoch": 2.77, "grad_norm": 0.7191510796546936, "learning_rate": 8.95102461706314e-06, "loss": 1.6638, "step": 83142 }, { "epoch": 2.77, "grad_norm": 0.7251493334770203, "learning_rate": 8.948490650833662e-06, "loss": 1.6879, "step": 83143 }, { "epoch": 2.77, "grad_norm": 0.7601296901702881, "learning_rate": 8.945957037897878e-06, "loss": 1.6693, "step": 83144 }, { "epoch": 2.77, "grad_norm": 0.7259070873260498, "learning_rate": 8.943423778258784e-06, "loss": 1.7905, "step": 83145 }, { "epoch": 2.77, "grad_norm": 0.7122774124145508, "learning_rate": 8.940890871919482e-06, "loss": 1.6488, "step": 83146 }, { "epoch": 2.77, "grad_norm": 0.7117461562156677, "learning_rate": 8.9383583188831e-06, "loss": 1.6424, "step": 83147 }, { "epoch": 2.77, "grad_norm": 0.7224170565605164, "learning_rate": 8.9358261191526e-06, "loss": 1.64, "step": 83148 }, { "epoch": 2.77, "grad_norm": 0.7210542559623718, "learning_rate": 8.933294272731151e-06, "loss": 1.6169, "step": 83149 }, { "epoch": 2.77, "grad_norm": 0.6929188370704651, "learning_rate": 8.930762779621781e-06, "loss": 1.6265, "step": 83150 }, { "epoch": 2.77, "grad_norm": 0.6973644495010376, "learning_rate": 8.928231639827621e-06, "loss": 1.6202, "step": 83151 }, { "epoch": 2.77, "grad_norm": 0.7028205990791321, "learning_rate": 8.925700853351637e-06, "loss": 1.6571, "step": 83152 }, { "epoch": 2.77, "grad_norm": 0.718655526638031, "learning_rate": 8.923170420197023e-06, "loss": 1.653, "step": 83153 }, { "epoch": 2.77, "grad_norm": 0.7279055714607239, "learning_rate": 8.92064034036678e-06, "loss": 1.6593, "step": 83154 }, { "epoch": 2.77, "grad_norm": 0.7109500169754028, "learning_rate": 8.918110613863972e-06, "loss": 1.7312, "step": 83155 }, { "epoch": 2.77, "grad_norm": 0.7318155169487, "learning_rate": 8.915581240691694e-06, "loss": 1.6884, "step": 83156 }, { "epoch": 2.77, "grad_norm": 0.7426075339317322, "learning_rate": 8.913052220853012e-06, "loss": 1.6928, "step": 83157 }, { "epoch": 2.77, "grad_norm": 0.7045717835426331, "learning_rate": 8.91052355435099e-06, "loss": 1.6879, "step": 83158 }, { "epoch": 2.77, "grad_norm": 0.7259672284126282, "learning_rate": 8.907995241188693e-06, "loss": 1.6639, "step": 83159 }, { "epoch": 2.77, "grad_norm": 0.7067552208900452, "learning_rate": 8.905467281369216e-06, "loss": 1.6145, "step": 83160 }, { "epoch": 2.77, "grad_norm": 1.4770588874816895, "learning_rate": 8.902939674895626e-06, "loss": 1.6226, "step": 83161 }, { "epoch": 2.77, "grad_norm": 0.6974164247512817, "learning_rate": 8.900412421770952e-06, "loss": 1.676, "step": 83162 }, { "epoch": 2.77, "grad_norm": 0.7188319563865662, "learning_rate": 8.897885521998293e-06, "loss": 1.713, "step": 83163 }, { "epoch": 2.77, "grad_norm": 0.7116071581840515, "learning_rate": 8.895358975580746e-06, "loss": 1.6567, "step": 83164 }, { "epoch": 2.77, "grad_norm": 0.7242291569709778, "learning_rate": 8.892832782521275e-06, "loss": 1.7282, "step": 83165 }, { "epoch": 2.77, "grad_norm": 0.7261223793029785, "learning_rate": 8.89030694282301e-06, "loss": 1.756, "step": 83166 }, { "epoch": 2.77, "grad_norm": 0.7175697684288025, "learning_rate": 8.887781456489118e-06, "loss": 1.5681, "step": 83167 }, { "epoch": 2.77, "grad_norm": 0.7209180593490601, "learning_rate": 8.885256323522461e-06, "loss": 1.7288, "step": 83168 }, { "epoch": 2.77, "grad_norm": 0.7232216596603394, "learning_rate": 8.882731543926236e-06, "loss": 1.7106, "step": 83169 }, { "epoch": 2.77, "grad_norm": 0.7118465900421143, "learning_rate": 8.880207117703508e-06, "loss": 1.7059, "step": 83170 }, { "epoch": 2.77, "grad_norm": 0.7215470671653748, "learning_rate": 8.87768304485731e-06, "loss": 1.713, "step": 83171 }, { "epoch": 2.77, "grad_norm": 0.7134445905685425, "learning_rate": 8.875159325390668e-06, "loss": 1.6818, "step": 83172 }, { "epoch": 2.77, "grad_norm": 0.7313535213470459, "learning_rate": 8.872635959306718e-06, "loss": 1.6674, "step": 83173 }, { "epoch": 2.77, "grad_norm": 0.7077286839485168, "learning_rate": 8.87011294660852e-06, "loss": 1.7236, "step": 83174 }, { "epoch": 2.77, "grad_norm": 0.7210028767585754, "learning_rate": 8.867590287299075e-06, "loss": 1.6961, "step": 83175 }, { "epoch": 2.77, "grad_norm": 0.7417636513710022, "learning_rate": 8.865067981381479e-06, "loss": 1.7214, "step": 83176 }, { "epoch": 2.77, "grad_norm": 0.7073229551315308, "learning_rate": 8.862546028858796e-06, "loss": 1.6979, "step": 83177 }, { "epoch": 2.77, "grad_norm": 0.7109645009040833, "learning_rate": 8.860024429734092e-06, "loss": 1.6214, "step": 83178 }, { "epoch": 2.77, "grad_norm": 0.6965793371200562, "learning_rate": 8.857503184010428e-06, "loss": 1.6498, "step": 83179 }, { "epoch": 2.77, "grad_norm": 0.7265356183052063, "learning_rate": 8.854982291690805e-06, "loss": 1.6587, "step": 83180 }, { "epoch": 2.77, "grad_norm": 0.6971760988235474, "learning_rate": 8.852461752778416e-06, "loss": 1.6728, "step": 83181 }, { "epoch": 2.77, "grad_norm": 0.7296112775802612, "learning_rate": 8.849941567276198e-06, "loss": 1.6782, "step": 83182 }, { "epoch": 2.77, "grad_norm": 0.7355435490608215, "learning_rate": 8.847421735187244e-06, "loss": 1.6354, "step": 83183 }, { "epoch": 2.77, "grad_norm": 0.6824144721031189, "learning_rate": 8.844902256514652e-06, "loss": 1.7099, "step": 83184 }, { "epoch": 2.77, "grad_norm": 0.7033150792121887, "learning_rate": 8.842383131261422e-06, "loss": 1.6753, "step": 83185 }, { "epoch": 2.77, "grad_norm": 0.7084798812866211, "learning_rate": 8.839864359430614e-06, "loss": 1.6756, "step": 83186 }, { "epoch": 2.77, "grad_norm": 0.7037303447723389, "learning_rate": 8.837345941025365e-06, "loss": 1.7069, "step": 83187 }, { "epoch": 2.77, "grad_norm": 0.6949071884155273, "learning_rate": 8.834827876048667e-06, "loss": 1.6941, "step": 83188 }, { "epoch": 2.77, "grad_norm": 0.6907302141189575, "learning_rate": 8.832310164503554e-06, "loss": 1.5833, "step": 83189 }, { "epoch": 2.77, "grad_norm": 0.707166850566864, "learning_rate": 8.82979280639312e-06, "loss": 1.7395, "step": 83190 }, { "epoch": 2.77, "grad_norm": 0.7227161526679993, "learning_rate": 8.827275801720435e-06, "loss": 1.6808, "step": 83191 }, { "epoch": 2.77, "grad_norm": 0.7153960466384888, "learning_rate": 8.824759150488558e-06, "loss": 1.6996, "step": 83192 }, { "epoch": 2.77, "grad_norm": 0.7093067765235901, "learning_rate": 8.822242852700456e-06, "loss": 1.6298, "step": 83193 }, { "epoch": 2.77, "grad_norm": 0.6966698169708252, "learning_rate": 8.819726908359326e-06, "loss": 1.6512, "step": 83194 }, { "epoch": 2.77, "grad_norm": 0.7355120182037354, "learning_rate": 8.817211317468099e-06, "loss": 1.5832, "step": 83195 }, { "epoch": 2.77, "grad_norm": 0.7130110263824463, "learning_rate": 8.814696080029871e-06, "loss": 1.6846, "step": 83196 }, { "epoch": 2.77, "grad_norm": 0.7202185988426208, "learning_rate": 8.812181196047707e-06, "loss": 1.633, "step": 83197 }, { "epoch": 2.77, "grad_norm": 0.7451802492141724, "learning_rate": 8.809666665524639e-06, "loss": 1.7063, "step": 83198 }, { "epoch": 2.77, "grad_norm": 0.7175570726394653, "learning_rate": 8.807152488463765e-06, "loss": 1.643, "step": 83199 }, { "epoch": 2.77, "grad_norm": 0.709153413772583, "learning_rate": 8.804638664868047e-06, "loss": 1.621, "step": 83200 }, { "epoch": 2.77, "grad_norm": 0.7311136722564697, "learning_rate": 8.802125194740616e-06, "loss": 1.7095, "step": 83201 }, { "epoch": 2.77, "grad_norm": 0.7127227187156677, "learning_rate": 8.79961207808454e-06, "loss": 1.6767, "step": 83202 }, { "epoch": 2.77, "grad_norm": 0.7267947196960449, "learning_rate": 8.797099314902779e-06, "loss": 1.6826, "step": 83203 }, { "epoch": 2.77, "grad_norm": 0.7141917943954468, "learning_rate": 8.794586905198465e-06, "loss": 1.7134, "step": 83204 }, { "epoch": 2.77, "grad_norm": 0.7188350558280945, "learning_rate": 8.792074848974563e-06, "loss": 1.6817, "step": 83205 }, { "epoch": 2.77, "grad_norm": 0.7133264541625977, "learning_rate": 8.789563146234235e-06, "loss": 1.7029, "step": 83206 }, { "epoch": 2.77, "grad_norm": 0.7071139812469482, "learning_rate": 8.78705179698045e-06, "loss": 1.6568, "step": 83207 }, { "epoch": 2.77, "grad_norm": 0.7048307657241821, "learning_rate": 8.784540801216266e-06, "loss": 1.6147, "step": 83208 }, { "epoch": 2.77, "grad_norm": 0.6950035691261292, "learning_rate": 8.782030158944752e-06, "loss": 1.6058, "step": 83209 }, { "epoch": 2.77, "grad_norm": 0.7186881303787231, "learning_rate": 8.779519870168905e-06, "loss": 1.6441, "step": 83210 }, { "epoch": 2.77, "grad_norm": 0.6918978691101074, "learning_rate": 8.777009934891854e-06, "loss": 1.5939, "step": 83211 }, { "epoch": 2.77, "grad_norm": 0.7114892601966858, "learning_rate": 8.774500353116598e-06, "loss": 1.6924, "step": 83212 }, { "epoch": 2.77, "grad_norm": 0.7216336131095886, "learning_rate": 8.771991124846167e-06, "loss": 1.6949, "step": 83213 }, { "epoch": 2.77, "grad_norm": 0.7135102152824402, "learning_rate": 8.76948225008366e-06, "loss": 1.6369, "step": 83214 }, { "epoch": 2.77, "grad_norm": 0.7095181345939636, "learning_rate": 8.766973728832071e-06, "loss": 1.7004, "step": 83215 }, { "epoch": 2.77, "grad_norm": 0.700596272945404, "learning_rate": 8.76446556109447e-06, "loss": 1.7536, "step": 83216 }, { "epoch": 2.77, "grad_norm": 0.7254689335823059, "learning_rate": 8.761957746873916e-06, "loss": 1.7799, "step": 83217 }, { "epoch": 2.77, "grad_norm": 0.6980928182601929, "learning_rate": 8.759450286173408e-06, "loss": 1.6694, "step": 83218 }, { "epoch": 2.77, "grad_norm": 0.7336909174919128, "learning_rate": 8.756943178996012e-06, "loss": 1.7557, "step": 83219 }, { "epoch": 2.77, "grad_norm": 0.7132227420806885, "learning_rate": 8.754436425344791e-06, "loss": 1.6354, "step": 83220 }, { "epoch": 2.77, "grad_norm": 0.7132822871208191, "learning_rate": 8.751930025222775e-06, "loss": 1.6432, "step": 83221 }, { "epoch": 2.77, "grad_norm": 0.7087342739105225, "learning_rate": 8.749423978632964e-06, "loss": 1.7287, "step": 83222 }, { "epoch": 2.77, "grad_norm": 0.6906298398971558, "learning_rate": 8.746918285578486e-06, "loss": 1.594, "step": 83223 }, { "epoch": 2.77, "grad_norm": 0.7208209037780762, "learning_rate": 8.744412946062307e-06, "loss": 1.717, "step": 83224 }, { "epoch": 2.77, "grad_norm": 0.7360394597053528, "learning_rate": 8.741907960087491e-06, "loss": 1.6317, "step": 83225 }, { "epoch": 2.77, "grad_norm": 0.7367966175079346, "learning_rate": 8.739403327657068e-06, "loss": 1.6778, "step": 83226 }, { "epoch": 2.77, "grad_norm": 0.7132290601730347, "learning_rate": 8.736899048774137e-06, "loss": 1.5922, "step": 83227 }, { "epoch": 2.77, "grad_norm": 0.7016141414642334, "learning_rate": 8.734395123441695e-06, "loss": 1.7066, "step": 83228 }, { "epoch": 2.77, "grad_norm": 0.7283588647842407, "learning_rate": 8.731891551662773e-06, "loss": 1.7191, "step": 83229 }, { "epoch": 2.77, "grad_norm": 0.7357426285743713, "learning_rate": 8.729388333440402e-06, "loss": 1.5462, "step": 83230 }, { "epoch": 2.77, "grad_norm": 0.6933260560035706, "learning_rate": 8.726885468777678e-06, "loss": 1.6248, "step": 83231 }, { "epoch": 2.77, "grad_norm": 0.7124179005622864, "learning_rate": 8.724382957677534e-06, "loss": 1.7359, "step": 83232 }, { "epoch": 2.77, "grad_norm": 0.6998195648193359, "learning_rate": 8.7218808001431e-06, "loss": 1.6343, "step": 83233 }, { "epoch": 2.77, "grad_norm": 0.7242333889007568, "learning_rate": 8.719378996177405e-06, "loss": 1.688, "step": 83234 }, { "epoch": 2.77, "grad_norm": 0.710580587387085, "learning_rate": 8.716877545783452e-06, "loss": 1.7221, "step": 83235 }, { "epoch": 2.77, "grad_norm": 0.7134355306625366, "learning_rate": 8.714376448964266e-06, "loss": 1.6346, "step": 83236 }, { "epoch": 2.77, "grad_norm": 0.7346644401550293, "learning_rate": 8.711875705722948e-06, "loss": 1.695, "step": 83237 }, { "epoch": 2.77, "grad_norm": 0.696885883808136, "learning_rate": 8.709375316062494e-06, "loss": 1.6567, "step": 83238 }, { "epoch": 2.77, "grad_norm": 0.7259223461151123, "learning_rate": 8.706875279985903e-06, "loss": 1.724, "step": 83239 }, { "epoch": 2.77, "grad_norm": 0.7015442252159119, "learning_rate": 8.704375597496239e-06, "loss": 1.6772, "step": 83240 }, { "epoch": 2.77, "grad_norm": 0.7128569483757019, "learning_rate": 8.701876268596597e-06, "loss": 1.6968, "step": 83241 }, { "epoch": 2.77, "grad_norm": 0.7162612080574036, "learning_rate": 8.699377293289944e-06, "loss": 1.6583, "step": 83242 }, { "epoch": 2.77, "grad_norm": 0.7045608758926392, "learning_rate": 8.696878671579277e-06, "loss": 1.7217, "step": 83243 }, { "epoch": 2.77, "grad_norm": 0.6984396576881409, "learning_rate": 8.694380403467727e-06, "loss": 1.5923, "step": 83244 }, { "epoch": 2.77, "grad_norm": 0.7133253216743469, "learning_rate": 8.691882488958257e-06, "loss": 1.6705, "step": 83245 }, { "epoch": 2.77, "grad_norm": 0.6954018473625183, "learning_rate": 8.6893849280539e-06, "loss": 1.6034, "step": 83246 }, { "epoch": 2.77, "grad_norm": 0.7169537544250488, "learning_rate": 8.68688772075775e-06, "loss": 1.7353, "step": 83247 }, { "epoch": 2.77, "grad_norm": 0.7005930542945862, "learning_rate": 8.684390867072776e-06, "loss": 1.6638, "step": 83248 }, { "epoch": 2.77, "grad_norm": 0.7026023268699646, "learning_rate": 8.68189436700204e-06, "loss": 1.6674, "step": 83249 }, { "epoch": 2.77, "grad_norm": 0.7166688442230225, "learning_rate": 8.679398220548506e-06, "loss": 1.6487, "step": 83250 }, { "epoch": 2.77, "grad_norm": 0.7108433246612549, "learning_rate": 8.676902427715338e-06, "loss": 1.6939, "step": 83251 }, { "epoch": 2.77, "grad_norm": 0.6878082752227783, "learning_rate": 8.674406988505433e-06, "loss": 1.6884, "step": 83252 }, { "epoch": 2.77, "grad_norm": 0.7004671096801758, "learning_rate": 8.671911902921891e-06, "loss": 1.6161, "step": 83253 }, { "epoch": 2.77, "grad_norm": 0.6999266743659973, "learning_rate": 8.66941717096774e-06, "loss": 1.73, "step": 83254 }, { "epoch": 2.77, "grad_norm": 0.7177104949951172, "learning_rate": 8.666922792645948e-06, "loss": 1.6895, "step": 83255 }, { "epoch": 2.77, "grad_norm": 0.7217665910720825, "learning_rate": 8.664428767959607e-06, "loss": 1.6772, "step": 83256 }, { "epoch": 2.77, "grad_norm": 0.6917738914489746, "learning_rate": 8.661935096911721e-06, "loss": 1.74, "step": 83257 }, { "epoch": 2.77, "grad_norm": 0.708153247833252, "learning_rate": 8.659441779505317e-06, "loss": 1.6853, "step": 83258 }, { "epoch": 2.77, "grad_norm": 0.7044751644134521, "learning_rate": 8.656948815743458e-06, "loss": 1.6498, "step": 83259 }, { "epoch": 2.77, "grad_norm": 0.7407015562057495, "learning_rate": 8.654456205629079e-06, "loss": 1.682, "step": 83260 }, { "epoch": 2.77, "grad_norm": 0.706764280796051, "learning_rate": 8.651963949165309e-06, "loss": 1.6826, "step": 83261 }, { "epoch": 2.77, "grad_norm": 0.7056185007095337, "learning_rate": 8.649472046355077e-06, "loss": 1.6321, "step": 83262 }, { "epoch": 2.77, "grad_norm": 0.7357836961746216, "learning_rate": 8.646980497201517e-06, "loss": 1.6664, "step": 83263 }, { "epoch": 2.77, "grad_norm": 0.69821697473526, "learning_rate": 8.644489301707591e-06, "loss": 1.6276, "step": 83264 }, { "epoch": 2.77, "grad_norm": 0.7838264107704163, "learning_rate": 8.641998459876265e-06, "loss": 1.7774, "step": 83265 }, { "epoch": 2.77, "grad_norm": 0.7023967504501343, "learning_rate": 8.63950797171067e-06, "loss": 1.7232, "step": 83266 }, { "epoch": 2.77, "grad_norm": 0.7097041010856628, "learning_rate": 8.637017837213767e-06, "loss": 1.6381, "step": 83267 }, { "epoch": 2.77, "grad_norm": 0.713336169719696, "learning_rate": 8.63452805638859e-06, "loss": 1.713, "step": 83268 }, { "epoch": 2.77, "grad_norm": 0.7268514633178711, "learning_rate": 8.63203862923817e-06, "loss": 1.6585, "step": 83269 }, { "epoch": 2.77, "grad_norm": 0.7006853222846985, "learning_rate": 8.629549555765535e-06, "loss": 1.6219, "step": 83270 }, { "epoch": 2.77, "grad_norm": 0.7028687000274658, "learning_rate": 8.62706083597372e-06, "loss": 1.6136, "step": 83271 }, { "epoch": 2.77, "grad_norm": 0.7305161952972412, "learning_rate": 8.624572469865654e-06, "loss": 1.7173, "step": 83272 }, { "epoch": 2.77, "grad_norm": 0.6936855912208557, "learning_rate": 8.622084457444468e-06, "loss": 1.7159, "step": 83273 }, { "epoch": 2.77, "grad_norm": 0.7025606036186218, "learning_rate": 8.619596798713157e-06, "loss": 1.65, "step": 83274 }, { "epoch": 2.77, "grad_norm": 0.7298140525817871, "learning_rate": 8.617109493674656e-06, "loss": 1.7642, "step": 83275 }, { "epoch": 2.77, "grad_norm": 0.7048852443695068, "learning_rate": 8.614622542332095e-06, "loss": 1.7142, "step": 83276 }, { "epoch": 2.77, "grad_norm": 0.7021560072898865, "learning_rate": 8.612135944688437e-06, "loss": 1.6897, "step": 83277 }, { "epoch": 2.77, "grad_norm": 0.7247329950332642, "learning_rate": 8.609649700746746e-06, "loss": 1.6814, "step": 83278 }, { "epoch": 2.77, "grad_norm": 0.6943159103393555, "learning_rate": 8.607163810509954e-06, "loss": 1.6625, "step": 83279 }, { "epoch": 2.77, "grad_norm": 0.7261478900909424, "learning_rate": 8.604678273981191e-06, "loss": 1.6754, "step": 83280 }, { "epoch": 2.77, "grad_norm": 0.6925122737884521, "learning_rate": 8.60219309116339e-06, "loss": 1.6781, "step": 83281 }, { "epoch": 2.77, "grad_norm": 0.7156303524971008, "learning_rate": 8.599708262059546e-06, "loss": 1.7042, "step": 83282 }, { "epoch": 2.77, "grad_norm": 0.6839919090270996, "learning_rate": 8.597223786672759e-06, "loss": 1.6607, "step": 83283 }, { "epoch": 2.77, "grad_norm": 0.7005460858345032, "learning_rate": 8.594739665006024e-06, "loss": 1.6744, "step": 83284 }, { "epoch": 2.77, "grad_norm": 0.7107188105583191, "learning_rate": 8.592255897062306e-06, "loss": 1.7296, "step": 83285 }, { "epoch": 2.77, "grad_norm": 0.7012481093406677, "learning_rate": 8.589772482844637e-06, "loss": 1.6701, "step": 83286 }, { "epoch": 2.77, "grad_norm": 0.6968072652816772, "learning_rate": 8.58728942235608e-06, "loss": 1.6528, "step": 83287 }, { "epoch": 2.77, "grad_norm": 0.704069972038269, "learning_rate": 8.584806715599635e-06, "loss": 1.6845, "step": 83288 }, { "epoch": 2.77, "grad_norm": 0.7367013096809387, "learning_rate": 8.582324362578264e-06, "loss": 1.6845, "step": 83289 }, { "epoch": 2.77, "grad_norm": 0.7112302780151367, "learning_rate": 8.579842363294998e-06, "loss": 1.6879, "step": 83290 }, { "epoch": 2.77, "grad_norm": 0.6869561672210693, "learning_rate": 8.577360717752901e-06, "loss": 1.6422, "step": 83291 }, { "epoch": 2.77, "grad_norm": 0.7029450535774231, "learning_rate": 8.57487942595494e-06, "loss": 1.6312, "step": 83292 }, { "epoch": 2.77, "grad_norm": 0.7104790210723877, "learning_rate": 8.572398487904109e-06, "loss": 1.6893, "step": 83293 }, { "epoch": 2.77, "grad_norm": 0.7360495924949646, "learning_rate": 8.569917903603473e-06, "loss": 1.7052, "step": 83294 }, { "epoch": 2.77, "grad_norm": 0.7102518081665039, "learning_rate": 8.567437673056032e-06, "loss": 1.7508, "step": 83295 }, { "epoch": 2.77, "grad_norm": 0.7066997289657593, "learning_rate": 8.56495779626475e-06, "loss": 1.6897, "step": 83296 }, { "epoch": 2.77, "grad_norm": 0.7003724575042725, "learning_rate": 8.562478273232654e-06, "loss": 1.6842, "step": 83297 }, { "epoch": 2.77, "grad_norm": 0.686930239200592, "learning_rate": 8.559999103962811e-06, "loss": 1.6245, "step": 83298 }, { "epoch": 2.77, "grad_norm": 0.715343177318573, "learning_rate": 8.557520288458153e-06, "loss": 1.7014, "step": 83299 }, { "epoch": 2.77, "grad_norm": 0.6890689134597778, "learning_rate": 8.555041826721742e-06, "loss": 1.6623, "step": 83300 }, { "epoch": 2.77, "grad_norm": 0.7264367938041687, "learning_rate": 8.552563718756577e-06, "loss": 1.7033, "step": 83301 }, { "epoch": 2.77, "grad_norm": 0.72647625207901, "learning_rate": 8.550085964565655e-06, "loss": 1.6812, "step": 83302 }, { "epoch": 2.77, "grad_norm": 0.7089095711708069, "learning_rate": 8.547608564151942e-06, "loss": 1.6386, "step": 83303 }, { "epoch": 2.77, "grad_norm": 0.7045220136642456, "learning_rate": 8.545131517518534e-06, "loss": 1.712, "step": 83304 }, { "epoch": 2.77, "grad_norm": 0.7053245902061462, "learning_rate": 8.542654824668394e-06, "loss": 1.6575, "step": 83305 }, { "epoch": 2.77, "grad_norm": 0.7321754097938538, "learning_rate": 8.540178485604488e-06, "loss": 1.7293, "step": 83306 }, { "epoch": 2.77, "grad_norm": 0.7396799921989441, "learning_rate": 8.53770250032988e-06, "loss": 1.6614, "step": 83307 }, { "epoch": 2.77, "grad_norm": 0.7272584438323975, "learning_rate": 8.535226868847566e-06, "loss": 1.5953, "step": 83308 }, { "epoch": 2.77, "grad_norm": 0.7424659729003906, "learning_rate": 8.532751591160547e-06, "loss": 1.6963, "step": 83309 }, { "epoch": 2.77, "grad_norm": 0.7182360887527466, "learning_rate": 8.530276667271818e-06, "loss": 1.746, "step": 83310 }, { "epoch": 2.77, "grad_norm": 0.6953132748603821, "learning_rate": 8.527802097184378e-06, "loss": 1.6263, "step": 83311 }, { "epoch": 2.77, "grad_norm": 0.7284557223320007, "learning_rate": 8.525327880901256e-06, "loss": 1.7046, "step": 83312 }, { "epoch": 2.77, "grad_norm": 0.7112337946891785, "learning_rate": 8.522854018425418e-06, "loss": 1.6595, "step": 83313 }, { "epoch": 2.77, "grad_norm": 0.7182320356369019, "learning_rate": 8.520380509759894e-06, "loss": 1.7577, "step": 83314 }, { "epoch": 2.77, "grad_norm": 0.703501284122467, "learning_rate": 8.517907354907682e-06, "loss": 1.704, "step": 83315 }, { "epoch": 2.77, "grad_norm": 0.7068827152252197, "learning_rate": 8.51543455387178e-06, "loss": 1.6962, "step": 83316 }, { "epoch": 2.77, "grad_norm": 0.705694317817688, "learning_rate": 8.512962106655186e-06, "loss": 1.6192, "step": 83317 }, { "epoch": 2.77, "grad_norm": 0.7284463047981262, "learning_rate": 8.51049001326093e-06, "loss": 1.6874, "step": 83318 }, { "epoch": 2.77, "grad_norm": 0.7216178774833679, "learning_rate": 8.508018273691974e-06, "loss": 1.7199, "step": 83319 }, { "epoch": 2.77, "grad_norm": 0.7229114174842834, "learning_rate": 8.50554688795132e-06, "loss": 1.7144, "step": 83320 }, { "epoch": 2.77, "grad_norm": 0.6902257204055786, "learning_rate": 8.50307585604203e-06, "loss": 1.5859, "step": 83321 }, { "epoch": 2.77, "grad_norm": 0.7051743268966675, "learning_rate": 8.500605177967e-06, "loss": 1.7087, "step": 83322 }, { "epoch": 2.77, "grad_norm": 0.7323442101478577, "learning_rate": 8.498134853729333e-06, "loss": 1.7113, "step": 83323 }, { "epoch": 2.77, "grad_norm": 0.7375275492668152, "learning_rate": 8.495664883331954e-06, "loss": 1.7157, "step": 83324 }, { "epoch": 2.77, "grad_norm": 0.7140259742736816, "learning_rate": 8.493195266777863e-06, "loss": 1.6955, "step": 83325 }, { "epoch": 2.77, "grad_norm": 0.6916570067405701, "learning_rate": 8.490726004070125e-06, "loss": 1.6116, "step": 83326 }, { "epoch": 2.77, "grad_norm": 0.7112048268318176, "learning_rate": 8.48825709521167e-06, "loss": 1.7183, "step": 83327 }, { "epoch": 2.77, "grad_norm": 0.7157801985740662, "learning_rate": 8.48578854020553e-06, "loss": 1.6237, "step": 83328 }, { "epoch": 2.77, "grad_norm": 0.7305339574813843, "learning_rate": 8.483320339054667e-06, "loss": 1.712, "step": 83329 }, { "epoch": 2.77, "grad_norm": 0.6978275775909424, "learning_rate": 8.480852491762146e-06, "loss": 1.6774, "step": 83330 }, { "epoch": 2.77, "grad_norm": 0.7291979789733887, "learning_rate": 8.4783849983309e-06, "loss": 1.6292, "step": 83331 }, { "epoch": 2.77, "grad_norm": 0.7401237487792969, "learning_rate": 8.475917858763926e-06, "loss": 1.6885, "step": 83332 }, { "epoch": 2.77, "grad_norm": 0.7114077806472778, "learning_rate": 8.473451073064219e-06, "loss": 1.6838, "step": 83333 }, { "epoch": 2.77, "grad_norm": 0.7338407039642334, "learning_rate": 8.470984641234846e-06, "loss": 1.6838, "step": 83334 }, { "epoch": 2.77, "grad_norm": 0.7048580646514893, "learning_rate": 8.46851856327867e-06, "loss": 1.611, "step": 83335 }, { "epoch": 2.77, "grad_norm": 0.7114208936691284, "learning_rate": 8.46605283919879e-06, "loss": 1.6621, "step": 83336 }, { "epoch": 2.77, "grad_norm": 0.703262448310852, "learning_rate": 8.463587468998168e-06, "loss": 1.633, "step": 83337 }, { "epoch": 2.77, "grad_norm": 0.6954383254051208, "learning_rate": 8.461122452679802e-06, "loss": 1.6878, "step": 83338 }, { "epoch": 2.77, "grad_norm": 0.7271877527236938, "learning_rate": 8.458657790246659e-06, "loss": 1.6925, "step": 83339 }, { "epoch": 2.77, "grad_norm": 0.7512633204460144, "learning_rate": 8.456193481701767e-06, "loss": 1.769, "step": 83340 }, { "epoch": 2.77, "grad_norm": 0.7083245515823364, "learning_rate": 8.453729527048126e-06, "loss": 1.6783, "step": 83341 }, { "epoch": 2.77, "grad_norm": 0.7259215712547302, "learning_rate": 8.45126592628863e-06, "loss": 1.7452, "step": 83342 }, { "epoch": 2.77, "grad_norm": 0.7100782990455627, "learning_rate": 8.448802679426347e-06, "loss": 1.6435, "step": 83343 }, { "epoch": 2.77, "grad_norm": 0.7133344411849976, "learning_rate": 8.446339786464307e-06, "loss": 1.6468, "step": 83344 }, { "epoch": 2.77, "grad_norm": 0.7054679989814758, "learning_rate": 8.44387724740544e-06, "loss": 1.6618, "step": 83345 }, { "epoch": 2.77, "grad_norm": 0.7280147075653076, "learning_rate": 8.441415062252744e-06, "loss": 1.6592, "step": 83346 }, { "epoch": 2.77, "grad_norm": 0.7129639387130737, "learning_rate": 8.438953231009215e-06, "loss": 1.6473, "step": 83347 }, { "epoch": 2.77, "grad_norm": 0.7126328945159912, "learning_rate": 8.43649175367782e-06, "loss": 1.6756, "step": 83348 }, { "epoch": 2.77, "grad_norm": 0.7016550898551941, "learning_rate": 8.434030630261557e-06, "loss": 1.6886, "step": 83349 }, { "epoch": 2.77, "grad_norm": 0.7019699215888977, "learning_rate": 8.43156986076342e-06, "loss": 1.6266, "step": 83350 }, { "epoch": 2.77, "grad_norm": 0.7270008325576782, "learning_rate": 8.429109445186444e-06, "loss": 1.7026, "step": 83351 }, { "epoch": 2.77, "grad_norm": 0.7125810384750366, "learning_rate": 8.426649383533556e-06, "loss": 1.8, "step": 83352 }, { "epoch": 2.77, "grad_norm": 0.6988600492477417, "learning_rate": 8.424189675807724e-06, "loss": 1.7294, "step": 83353 }, { "epoch": 2.77, "grad_norm": 0.7085146307945251, "learning_rate": 8.421730322011977e-06, "loss": 1.6128, "step": 83354 }, { "epoch": 2.77, "grad_norm": 1.1025491952896118, "learning_rate": 8.419271322149313e-06, "loss": 1.7521, "step": 83355 }, { "epoch": 2.77, "grad_norm": 0.7136374711990356, "learning_rate": 8.416812676222628e-06, "loss": 1.6342, "step": 83356 }, { "epoch": 2.77, "grad_norm": 0.7301162481307983, "learning_rate": 8.41435438423499e-06, "loss": 1.6316, "step": 83357 }, { "epoch": 2.77, "grad_norm": 0.6947286128997803, "learning_rate": 8.411896446189393e-06, "loss": 1.6683, "step": 83358 }, { "epoch": 2.77, "grad_norm": 0.7180113196372986, "learning_rate": 8.409438862088802e-06, "loss": 1.6142, "step": 83359 }, { "epoch": 2.77, "grad_norm": 0.7158549427986145, "learning_rate": 8.406981631936116e-06, "loss": 1.6582, "step": 83360 }, { "epoch": 2.77, "grad_norm": 0.7157899737358093, "learning_rate": 8.404524755734466e-06, "loss": 1.6797, "step": 83361 }, { "epoch": 2.77, "grad_norm": 0.7419688105583191, "learning_rate": 8.402068233486747e-06, "loss": 1.778, "step": 83362 }, { "epoch": 2.77, "grad_norm": 0.7232678532600403, "learning_rate": 8.399612065195927e-06, "loss": 1.6324, "step": 83363 }, { "epoch": 2.77, "grad_norm": 0.7458919882774353, "learning_rate": 8.397156250865e-06, "loss": 1.6849, "step": 83364 }, { "epoch": 2.77, "grad_norm": 0.6810094118118286, "learning_rate": 8.394700790496967e-06, "loss": 1.6795, "step": 83365 }, { "epoch": 2.77, "grad_norm": 0.7099419236183167, "learning_rate": 8.392245684094822e-06, "loss": 1.6729, "step": 83366 }, { "epoch": 2.77, "grad_norm": 0.7154815196990967, "learning_rate": 8.3897909316615e-06, "loss": 1.6911, "step": 83367 }, { "epoch": 2.77, "grad_norm": 0.7138833999633789, "learning_rate": 8.387336533200061e-06, "loss": 1.669, "step": 83368 }, { "epoch": 2.77, "grad_norm": 0.7331910729408264, "learning_rate": 8.384882488713373e-06, "loss": 1.7242, "step": 83369 }, { "epoch": 2.77, "grad_norm": 0.7104918360710144, "learning_rate": 8.3824287982045e-06, "loss": 1.6732, "step": 83370 }, { "epoch": 2.77, "grad_norm": 0.713341474533081, "learning_rate": 8.37997546167637e-06, "loss": 1.7526, "step": 83371 }, { "epoch": 2.77, "grad_norm": 0.7121536135673523, "learning_rate": 8.377522479131982e-06, "loss": 1.6565, "step": 83372 }, { "epoch": 2.77, "grad_norm": 0.7106924653053284, "learning_rate": 8.375069850574333e-06, "loss": 1.6711, "step": 83373 }, { "epoch": 2.77, "grad_norm": 0.7183535695075989, "learning_rate": 8.372617576006358e-06, "loss": 1.7863, "step": 83374 }, { "epoch": 2.77, "grad_norm": 0.731113851070404, "learning_rate": 8.37016565543105e-06, "loss": 1.6642, "step": 83375 }, { "epoch": 2.77, "grad_norm": 0.7135527729988098, "learning_rate": 8.36771408885144e-06, "loss": 1.6869, "step": 83376 }, { "epoch": 2.77, "grad_norm": 0.7180377244949341, "learning_rate": 8.365262876270396e-06, "loss": 1.6279, "step": 83377 }, { "epoch": 2.77, "grad_norm": 0.7280084490776062, "learning_rate": 8.362812017690978e-06, "loss": 1.7189, "step": 83378 }, { "epoch": 2.77, "grad_norm": 0.7477911114692688, "learning_rate": 8.36036151311612e-06, "loss": 1.6807, "step": 83379 }, { "epoch": 2.77, "grad_norm": 0.6942117214202881, "learning_rate": 8.357911362548853e-06, "loss": 1.6532, "step": 83380 }, { "epoch": 2.77, "grad_norm": 0.728628933429718, "learning_rate": 8.355461565992072e-06, "loss": 1.6933, "step": 83381 }, { "epoch": 2.77, "grad_norm": 0.7241224050521851, "learning_rate": 8.35301212344881e-06, "loss": 1.7162, "step": 83382 }, { "epoch": 2.77, "grad_norm": 0.7412704825401306, "learning_rate": 8.350563034921998e-06, "loss": 1.6502, "step": 83383 }, { "epoch": 2.77, "grad_norm": 0.7586334943771362, "learning_rate": 8.348114300414631e-06, "loss": 1.6372, "step": 83384 }, { "epoch": 2.77, "grad_norm": 0.7320854067802429, "learning_rate": 8.345665919929712e-06, "loss": 1.6569, "step": 83385 }, { "epoch": 2.77, "grad_norm": 0.7232362031936646, "learning_rate": 8.343217893470133e-06, "loss": 1.6508, "step": 83386 }, { "epoch": 2.77, "grad_norm": 0.7143553495407104, "learning_rate": 8.340770221038961e-06, "loss": 1.6414, "step": 83387 }, { "epoch": 2.77, "grad_norm": 0.7071430087089539, "learning_rate": 8.338322902639127e-06, "loss": 1.6747, "step": 83388 }, { "epoch": 2.77, "grad_norm": 0.7269201874732971, "learning_rate": 8.335875938273528e-06, "loss": 1.6856, "step": 83389 }, { "epoch": 2.77, "grad_norm": 0.7130901217460632, "learning_rate": 8.333429327945263e-06, "loss": 1.6931, "step": 83390 }, { "epoch": 2.77, "grad_norm": 0.7162445187568665, "learning_rate": 8.330983071657226e-06, "loss": 1.7386, "step": 83391 }, { "epoch": 2.77, "grad_norm": 0.7105787992477417, "learning_rate": 8.328537169412386e-06, "loss": 1.7563, "step": 83392 }, { "epoch": 2.77, "grad_norm": 0.6955664157867432, "learning_rate": 8.326091621213704e-06, "loss": 1.6386, "step": 83393 }, { "epoch": 2.77, "grad_norm": 0.716810941696167, "learning_rate": 8.323646427064212e-06, "loss": 1.7004, "step": 83394 }, { "epoch": 2.77, "grad_norm": 0.7224476933479309, "learning_rate": 8.321201586966841e-06, "loss": 1.6306, "step": 83395 }, { "epoch": 2.77, "grad_norm": 0.7064129114151001, "learning_rate": 8.318757100924523e-06, "loss": 1.5849, "step": 83396 }, { "epoch": 2.77, "grad_norm": 0.7063609957695007, "learning_rate": 8.316312968940287e-06, "loss": 1.6826, "step": 83397 }, { "epoch": 2.77, "grad_norm": 0.7340767979621887, "learning_rate": 8.313869191017063e-06, "loss": 1.6783, "step": 83398 }, { "epoch": 2.77, "grad_norm": 0.7045392990112305, "learning_rate": 8.311425767157786e-06, "loss": 1.6071, "step": 83399 }, { "epoch": 2.77, "grad_norm": 0.7070357203483582, "learning_rate": 8.308982697365485e-06, "loss": 1.7259, "step": 83400 }, { "epoch": 2.77, "grad_norm": 0.7143324613571167, "learning_rate": 8.306539981643123e-06, "loss": 1.7375, "step": 83401 }, { "epoch": 2.77, "grad_norm": 0.7178835272789001, "learning_rate": 8.3040976199936e-06, "loss": 1.6757, "step": 83402 }, { "epoch": 2.77, "grad_norm": 0.7456814646720886, "learning_rate": 8.301655612419945e-06, "loss": 1.6473, "step": 83403 }, { "epoch": 2.77, "grad_norm": 0.7072243690490723, "learning_rate": 8.299213958925121e-06, "loss": 1.664, "step": 83404 }, { "epoch": 2.77, "grad_norm": 0.7015669941902161, "learning_rate": 8.296772659512064e-06, "loss": 1.6754, "step": 83405 }, { "epoch": 2.77, "grad_norm": 0.7247703075408936, "learning_rate": 8.2943317141837e-06, "loss": 1.695, "step": 83406 }, { "epoch": 2.77, "grad_norm": 0.6999902725219727, "learning_rate": 8.291891122943061e-06, "loss": 1.6672, "step": 83407 }, { "epoch": 2.77, "grad_norm": 0.7026404142379761, "learning_rate": 8.289450885793115e-06, "loss": 1.6221, "step": 83408 }, { "epoch": 2.78, "grad_norm": 0.7253177762031555, "learning_rate": 8.287011002736754e-06, "loss": 1.6381, "step": 83409 }, { "epoch": 2.78, "grad_norm": 0.7112434506416321, "learning_rate": 8.28457147377698e-06, "loss": 1.7112, "step": 83410 }, { "epoch": 2.78, "grad_norm": 0.7307753562927246, "learning_rate": 8.282132298916788e-06, "loss": 1.6437, "step": 83411 }, { "epoch": 2.78, "grad_norm": 0.7143761515617371, "learning_rate": 8.27969347815911e-06, "loss": 1.6955, "step": 83412 }, { "epoch": 2.78, "grad_norm": 0.7338938117027283, "learning_rate": 8.277255011506844e-06, "loss": 1.6494, "step": 83413 }, { "epoch": 2.78, "grad_norm": 0.7218620777130127, "learning_rate": 8.274816898963022e-06, "loss": 1.7142, "step": 83414 }, { "epoch": 2.78, "grad_norm": 0.7235881686210632, "learning_rate": 8.272379140530672e-06, "loss": 1.6429, "step": 83415 }, { "epoch": 2.78, "grad_norm": 0.751560628414154, "learning_rate": 8.269941736212593e-06, "loss": 1.7303, "step": 83416 }, { "epoch": 2.78, "grad_norm": 0.720012903213501, "learning_rate": 8.267504686011817e-06, "loss": 1.7165, "step": 83417 }, { "epoch": 2.78, "grad_norm": 0.725541353225708, "learning_rate": 8.265067989931306e-06, "loss": 1.6586, "step": 83418 }, { "epoch": 2.78, "grad_norm": 0.7068317532539368, "learning_rate": 8.262631647974061e-06, "loss": 1.7076, "step": 83419 }, { "epoch": 2.78, "grad_norm": 0.7087569236755371, "learning_rate": 8.260195660142943e-06, "loss": 1.6043, "step": 83420 }, { "epoch": 2.78, "grad_norm": 0.7048066854476929, "learning_rate": 8.257760026441017e-06, "loss": 1.6457, "step": 83421 }, { "epoch": 2.78, "grad_norm": 0.7433773279190063, "learning_rate": 8.25532474687115e-06, "loss": 1.6488, "step": 83422 }, { "epoch": 2.78, "grad_norm": 0.7129137516021729, "learning_rate": 8.252889821436337e-06, "loss": 1.589, "step": 83423 }, { "epoch": 2.78, "grad_norm": 0.7132253050804138, "learning_rate": 8.250455250139509e-06, "loss": 1.6384, "step": 83424 }, { "epoch": 2.78, "grad_norm": 0.7136850357055664, "learning_rate": 8.248021032983665e-06, "loss": 1.6953, "step": 83425 }, { "epoch": 2.78, "grad_norm": 0.7311151027679443, "learning_rate": 8.245587169971734e-06, "loss": 1.6919, "step": 83426 }, { "epoch": 2.78, "grad_norm": 0.7099847793579102, "learning_rate": 8.24315366110665e-06, "loss": 1.7331, "step": 83427 }, { "epoch": 2.78, "grad_norm": 0.6833807826042175, "learning_rate": 8.240720506391441e-06, "loss": 1.6339, "step": 83428 }, { "epoch": 2.78, "grad_norm": 0.7437852025032043, "learning_rate": 8.238287705828972e-06, "loss": 1.7032, "step": 83429 }, { "epoch": 2.78, "grad_norm": 0.7201136946678162, "learning_rate": 8.235855259422208e-06, "loss": 1.629, "step": 83430 }, { "epoch": 2.78, "grad_norm": 0.6936648488044739, "learning_rate": 8.233423167174181e-06, "loss": 1.7121, "step": 83431 }, { "epoch": 2.78, "grad_norm": 0.7076002359390259, "learning_rate": 8.230991429087752e-06, "loss": 1.6117, "step": 83432 }, { "epoch": 2.78, "grad_norm": 0.7032583951950073, "learning_rate": 8.228560045165921e-06, "loss": 1.6597, "step": 83433 }, { "epoch": 2.78, "grad_norm": 0.7000638246536255, "learning_rate": 8.22612901541162e-06, "loss": 1.6981, "step": 83434 }, { "epoch": 2.78, "grad_norm": 0.7129852771759033, "learning_rate": 8.223698339827811e-06, "loss": 1.6769, "step": 83435 }, { "epoch": 2.78, "grad_norm": 0.7125434279441833, "learning_rate": 8.221268018417459e-06, "loss": 1.6961, "step": 83436 }, { "epoch": 2.78, "grad_norm": 0.7309702634811401, "learning_rate": 8.218838051183462e-06, "loss": 1.7535, "step": 83437 }, { "epoch": 2.78, "grad_norm": 0.7358034253120422, "learning_rate": 8.216408438128852e-06, "loss": 1.7213, "step": 83438 }, { "epoch": 2.78, "grad_norm": 0.7103329300880432, "learning_rate": 8.213979179256458e-06, "loss": 1.7363, "step": 83439 }, { "epoch": 2.78, "grad_norm": 0.7318294048309326, "learning_rate": 8.211550274569378e-06, "loss": 1.7371, "step": 83440 }, { "epoch": 2.78, "grad_norm": 0.707228422164917, "learning_rate": 8.209121724070445e-06, "loss": 1.6544, "step": 83441 }, { "epoch": 2.78, "grad_norm": 0.7053312063217163, "learning_rate": 8.206693527762653e-06, "loss": 1.6766, "step": 83442 }, { "epoch": 2.78, "grad_norm": 0.707804262638092, "learning_rate": 8.204265685648937e-06, "loss": 1.599, "step": 83443 }, { "epoch": 2.78, "grad_norm": 0.7250148057937622, "learning_rate": 8.201838197732224e-06, "loss": 1.6925, "step": 83444 }, { "epoch": 2.78, "grad_norm": 0.7004587650299072, "learning_rate": 8.199411064015516e-06, "loss": 1.6565, "step": 83445 }, { "epoch": 2.78, "grad_norm": 0.7228103876113892, "learning_rate": 8.196984284501707e-06, "loss": 1.7461, "step": 83446 }, { "epoch": 2.78, "grad_norm": 0.7181907892227173, "learning_rate": 8.194557859193796e-06, "loss": 1.6748, "step": 83447 }, { "epoch": 2.78, "grad_norm": 0.7073416709899902, "learning_rate": 8.192131788094681e-06, "loss": 1.7015, "step": 83448 }, { "epoch": 2.78, "grad_norm": 0.7223055958747864, "learning_rate": 8.189706071207292e-06, "loss": 1.7061, "step": 83449 }, { "epoch": 2.78, "grad_norm": 0.723738431930542, "learning_rate": 8.18728070853466e-06, "loss": 1.6918, "step": 83450 }, { "epoch": 2.78, "grad_norm": 0.7311491966247559, "learning_rate": 8.184855700079652e-06, "loss": 1.7012, "step": 83451 }, { "epoch": 2.78, "grad_norm": 0.7072018384933472, "learning_rate": 8.182431045845195e-06, "loss": 1.6154, "step": 83452 }, { "epoch": 2.78, "grad_norm": 0.7216435074806213, "learning_rate": 8.180006745834288e-06, "loss": 1.6679, "step": 83453 }, { "epoch": 2.78, "grad_norm": 0.6890486478805542, "learning_rate": 8.177582800049865e-06, "loss": 1.6883, "step": 83454 }, { "epoch": 2.78, "grad_norm": 0.7144666910171509, "learning_rate": 8.175159208494885e-06, "loss": 1.6528, "step": 83455 }, { "epoch": 2.78, "grad_norm": 0.7130506634712219, "learning_rate": 8.172735971172217e-06, "loss": 1.7536, "step": 83456 }, { "epoch": 2.78, "grad_norm": 0.7302619218826294, "learning_rate": 8.17031308808489e-06, "loss": 1.6663, "step": 83457 }, { "epoch": 2.78, "grad_norm": 0.721819281578064, "learning_rate": 8.167890559235802e-06, "loss": 1.7391, "step": 83458 }, { "epoch": 2.78, "grad_norm": 0.7132688164710999, "learning_rate": 8.165468384627848e-06, "loss": 1.7031, "step": 83459 }, { "epoch": 2.78, "grad_norm": 0.7082650065422058, "learning_rate": 8.16304656426403e-06, "loss": 1.6385, "step": 83460 }, { "epoch": 2.78, "grad_norm": 0.6892699599266052, "learning_rate": 8.160625098147277e-06, "loss": 1.6753, "step": 83461 }, { "epoch": 2.78, "grad_norm": 0.7254743576049805, "learning_rate": 8.158203986280553e-06, "loss": 1.6893, "step": 83462 }, { "epoch": 2.78, "grad_norm": 0.7229549884796143, "learning_rate": 8.155783228666723e-06, "loss": 1.6555, "step": 83463 }, { "epoch": 2.78, "grad_norm": 0.7132562398910522, "learning_rate": 8.153362825308817e-06, "loss": 1.6683, "step": 83464 }, { "epoch": 2.78, "grad_norm": 0.7206764817237854, "learning_rate": 8.150942776209734e-06, "loss": 1.6477, "step": 83465 }, { "epoch": 2.78, "grad_norm": 0.7172524929046631, "learning_rate": 8.148523081372371e-06, "loss": 1.7426, "step": 83466 }, { "epoch": 2.78, "grad_norm": 0.7189458608627319, "learning_rate": 8.146103740799659e-06, "loss": 1.6956, "step": 83467 }, { "epoch": 2.78, "grad_norm": 0.7208573222160339, "learning_rate": 8.143684754494661e-06, "loss": 1.7143, "step": 83468 }, { "epoch": 2.78, "grad_norm": 0.7134163975715637, "learning_rate": 8.141266122460178e-06, "loss": 1.6503, "step": 83469 }, { "epoch": 2.78, "grad_norm": 0.760769784450531, "learning_rate": 8.138847844699203e-06, "loss": 1.6723, "step": 83470 }, { "epoch": 2.78, "grad_norm": 0.7300270199775696, "learning_rate": 8.136429921214671e-06, "loss": 1.667, "step": 83471 }, { "epoch": 2.78, "grad_norm": 0.6902137398719788, "learning_rate": 8.134012352009512e-06, "loss": 1.6975, "step": 83472 }, { "epoch": 2.78, "grad_norm": 0.7349791526794434, "learning_rate": 8.131595137086622e-06, "loss": 1.697, "step": 83473 }, { "epoch": 2.78, "grad_norm": 0.6956844329833984, "learning_rate": 8.129178276448967e-06, "loss": 1.5947, "step": 83474 }, { "epoch": 2.78, "grad_norm": 0.7162050008773804, "learning_rate": 8.126761770099543e-06, "loss": 1.7354, "step": 83475 }, { "epoch": 2.78, "grad_norm": 1.29925537109375, "learning_rate": 8.124345618041216e-06, "loss": 1.6309, "step": 83476 }, { "epoch": 2.78, "grad_norm": 0.7070114612579346, "learning_rate": 8.121929820276884e-06, "loss": 1.67, "step": 83477 }, { "epoch": 2.78, "grad_norm": 0.7420076727867126, "learning_rate": 8.119514376809545e-06, "loss": 1.733, "step": 83478 }, { "epoch": 2.78, "grad_norm": 0.7020390033721924, "learning_rate": 8.117099287642126e-06, "loss": 1.7119, "step": 83479 }, { "epoch": 2.78, "grad_norm": 0.6924917697906494, "learning_rate": 8.114684552777494e-06, "loss": 1.6545, "step": 83480 }, { "epoch": 2.78, "grad_norm": 0.7111409306526184, "learning_rate": 8.112270172218683e-06, "loss": 1.6337, "step": 83481 }, { "epoch": 2.78, "grad_norm": 0.7252435684204102, "learning_rate": 8.109856145968518e-06, "loss": 1.6544, "step": 83482 }, { "epoch": 2.78, "grad_norm": 0.7258562445640564, "learning_rate": 8.107442474030035e-06, "loss": 1.7086, "step": 83483 }, { "epoch": 2.78, "grad_norm": 0.7123836278915405, "learning_rate": 8.105029156406062e-06, "loss": 1.7053, "step": 83484 }, { "epoch": 2.78, "grad_norm": 0.7268792986869812, "learning_rate": 8.102616193099598e-06, "loss": 1.6559, "step": 83485 }, { "epoch": 2.78, "grad_norm": 0.727497935295105, "learning_rate": 8.100203584113574e-06, "loss": 1.7257, "step": 83486 }, { "epoch": 2.78, "grad_norm": 0.7259572148323059, "learning_rate": 8.09779132945082e-06, "loss": 1.6286, "step": 83487 }, { "epoch": 2.78, "grad_norm": 0.720738410949707, "learning_rate": 8.095379429114402e-06, "loss": 1.6686, "step": 83488 }, { "epoch": 2.78, "grad_norm": 0.7215695977210999, "learning_rate": 8.092967883107149e-06, "loss": 1.6563, "step": 83489 }, { "epoch": 2.78, "grad_norm": 0.709396481513977, "learning_rate": 8.090556691432026e-06, "loss": 1.6606, "step": 83490 }, { "epoch": 2.78, "grad_norm": 0.7051938772201538, "learning_rate": 8.088145854091999e-06, "loss": 1.7219, "step": 83491 }, { "epoch": 2.78, "grad_norm": 0.7094206213951111, "learning_rate": 8.085735371089896e-06, "loss": 1.6799, "step": 83492 }, { "epoch": 2.78, "grad_norm": 0.7338936924934387, "learning_rate": 8.083325242428718e-06, "loss": 1.6312, "step": 83493 }, { "epoch": 2.78, "grad_norm": 0.7365427613258362, "learning_rate": 8.08091546811136e-06, "loss": 1.6316, "step": 83494 }, { "epoch": 2.78, "grad_norm": 0.6915239691734314, "learning_rate": 8.078506048140788e-06, "loss": 1.6418, "step": 83495 }, { "epoch": 2.78, "grad_norm": 0.7146298289299011, "learning_rate": 8.076096982519865e-06, "loss": 1.6441, "step": 83496 }, { "epoch": 2.78, "grad_norm": 0.716378390789032, "learning_rate": 8.073688271251589e-06, "loss": 1.7269, "step": 83497 }, { "epoch": 2.78, "grad_norm": 0.7120600342750549, "learning_rate": 8.071279914338823e-06, "loss": 1.7016, "step": 83498 }, { "epoch": 2.78, "grad_norm": 0.7229846715927124, "learning_rate": 8.06887191178447e-06, "loss": 1.6842, "step": 83499 }, { "epoch": 2.78, "grad_norm": 0.69818514585495, "learning_rate": 8.066464263591554e-06, "loss": 1.6308, "step": 83500 }, { "epoch": 2.78, "grad_norm": 0.7257441282272339, "learning_rate": 8.064056969762877e-06, "loss": 1.7205, "step": 83501 }, { "epoch": 2.78, "grad_norm": 0.7248010635375977, "learning_rate": 8.06165003030147e-06, "loss": 1.7211, "step": 83502 }, { "epoch": 2.78, "grad_norm": 0.6997637152671814, "learning_rate": 8.059243445210161e-06, "loss": 1.6382, "step": 83503 }, { "epoch": 2.78, "grad_norm": 0.7195026278495789, "learning_rate": 8.056837214491951e-06, "loss": 1.6155, "step": 83504 }, { "epoch": 2.78, "grad_norm": 0.7045246958732605, "learning_rate": 8.054431338149735e-06, "loss": 1.6933, "step": 83505 }, { "epoch": 2.78, "grad_norm": 0.7414923310279846, "learning_rate": 8.052025816186381e-06, "loss": 1.6032, "step": 83506 }, { "epoch": 2.78, "grad_norm": 0.7358558177947998, "learning_rate": 8.049620648604883e-06, "loss": 1.7274, "step": 83507 }, { "epoch": 2.78, "grad_norm": 0.73000568151474, "learning_rate": 8.04721583540814e-06, "loss": 1.6871, "step": 83508 }, { "epoch": 2.78, "grad_norm": 0.7016780972480774, "learning_rate": 8.044811376599013e-06, "loss": 1.6005, "step": 83509 }, { "epoch": 2.78, "grad_norm": 0.7237735390663147, "learning_rate": 8.042407272180473e-06, "loss": 1.6297, "step": 83510 }, { "epoch": 2.78, "grad_norm": 0.6942596435546875, "learning_rate": 8.040003522155447e-06, "loss": 1.6405, "step": 83511 }, { "epoch": 2.78, "grad_norm": 0.7194792032241821, "learning_rate": 8.037600126526867e-06, "loss": 1.6825, "step": 83512 }, { "epoch": 2.78, "grad_norm": 0.7001345157623291, "learning_rate": 8.035197085297596e-06, "loss": 1.6521, "step": 83513 }, { "epoch": 2.78, "grad_norm": 0.6882844567298889, "learning_rate": 8.032794398470566e-06, "loss": 1.6399, "step": 83514 }, { "epoch": 2.78, "grad_norm": 0.7162541151046753, "learning_rate": 8.030392066048741e-06, "loss": 1.68, "step": 83515 }, { "epoch": 2.78, "grad_norm": 0.7003971934318542, "learning_rate": 8.027990088034952e-06, "loss": 1.6766, "step": 83516 }, { "epoch": 2.78, "grad_norm": 0.7139260172843933, "learning_rate": 8.025588464432165e-06, "loss": 1.7422, "step": 83517 }, { "epoch": 2.78, "grad_norm": 0.7152899503707886, "learning_rate": 8.023187195243375e-06, "loss": 1.6511, "step": 83518 }, { "epoch": 2.78, "grad_norm": 0.6989736557006836, "learning_rate": 8.020786280471314e-06, "loss": 1.7109, "step": 83519 }, { "epoch": 2.78, "grad_norm": 0.7189772725105286, "learning_rate": 8.018385720119046e-06, "loss": 1.5954, "step": 83520 }, { "epoch": 2.78, "grad_norm": 0.7075146436691284, "learning_rate": 8.015985514189438e-06, "loss": 1.7228, "step": 83521 }, { "epoch": 2.78, "grad_norm": 0.7150679230690002, "learning_rate": 8.013585662685384e-06, "loss": 1.6641, "step": 83522 }, { "epoch": 2.78, "grad_norm": 0.7408022284507751, "learning_rate": 8.011186165609817e-06, "loss": 1.6986, "step": 83523 }, { "epoch": 2.78, "grad_norm": 0.6860170364379883, "learning_rate": 8.008787022965635e-06, "loss": 1.6661, "step": 83524 }, { "epoch": 2.78, "grad_norm": 0.7215093374252319, "learning_rate": 8.006388234755834e-06, "loss": 1.7568, "step": 83525 }, { "epoch": 2.78, "grad_norm": 0.7255907654762268, "learning_rate": 8.00398980098318e-06, "loss": 1.6957, "step": 83526 }, { "epoch": 2.78, "grad_norm": 0.722266435623169, "learning_rate": 8.001591721650668e-06, "loss": 1.6411, "step": 83527 }, { "epoch": 2.78, "grad_norm": 0.7071149349212646, "learning_rate": 7.9991939967612e-06, "loss": 1.6362, "step": 83528 }, { "epoch": 2.78, "grad_norm": 0.7225819230079651, "learning_rate": 7.996796626317737e-06, "loss": 1.7276, "step": 83529 }, { "epoch": 2.78, "grad_norm": 0.7169151902198792, "learning_rate": 7.994399610323076e-06, "loss": 1.6319, "step": 83530 }, { "epoch": 2.78, "grad_norm": 0.7279840111732483, "learning_rate": 7.992002948780186e-06, "loss": 1.6521, "step": 83531 }, { "epoch": 2.78, "grad_norm": 0.7187835574150085, "learning_rate": 7.98960664169206e-06, "loss": 1.607, "step": 83532 }, { "epoch": 2.78, "grad_norm": 0.7178773283958435, "learning_rate": 7.987210689061429e-06, "loss": 1.628, "step": 83533 }, { "epoch": 2.78, "grad_norm": 0.7124356627464294, "learning_rate": 7.984815090891327e-06, "loss": 1.6805, "step": 83534 }, { "epoch": 2.78, "grad_norm": 0.7254222631454468, "learning_rate": 7.982419847184651e-06, "loss": 1.6672, "step": 83535 }, { "epoch": 2.78, "grad_norm": 0.6986123919487, "learning_rate": 7.980024957944297e-06, "loss": 1.6789, "step": 83536 }, { "epoch": 2.78, "grad_norm": 0.6933264136314392, "learning_rate": 7.977630423173164e-06, "loss": 1.6675, "step": 83537 }, { "epoch": 2.78, "grad_norm": 0.6935884356498718, "learning_rate": 7.97523624287415e-06, "loss": 1.667, "step": 83538 }, { "epoch": 2.78, "grad_norm": 0.7192304134368896, "learning_rate": 7.972842417050184e-06, "loss": 1.7162, "step": 83539 }, { "epoch": 2.78, "grad_norm": 0.7425639033317566, "learning_rate": 7.970448945704133e-06, "loss": 1.7389, "step": 83540 }, { "epoch": 2.78, "grad_norm": 0.7280392050743103, "learning_rate": 7.968055828838926e-06, "loss": 1.7237, "step": 83541 }, { "epoch": 2.78, "grad_norm": 0.7215715050697327, "learning_rate": 7.965663066457495e-06, "loss": 1.734, "step": 83542 }, { "epoch": 2.78, "grad_norm": 0.7177273631095886, "learning_rate": 7.963270658562736e-06, "loss": 1.7668, "step": 83543 }, { "epoch": 2.78, "grad_norm": 0.7081182599067688, "learning_rate": 7.960878605157483e-06, "loss": 1.6858, "step": 83544 }, { "epoch": 2.78, "grad_norm": 0.7143100500106812, "learning_rate": 7.958486906244731e-06, "loss": 1.6116, "step": 83545 }, { "epoch": 2.78, "grad_norm": 0.7039921283721924, "learning_rate": 7.956095561827346e-06, "loss": 1.7003, "step": 83546 }, { "epoch": 2.78, "grad_norm": 0.6855571866035461, "learning_rate": 7.953704571908226e-06, "loss": 1.6657, "step": 83547 }, { "epoch": 2.78, "grad_norm": 0.7084549069404602, "learning_rate": 7.951313936490267e-06, "loss": 1.6942, "step": 83548 }, { "epoch": 2.78, "grad_norm": 0.6832740902900696, "learning_rate": 7.948923655576366e-06, "loss": 1.6793, "step": 83549 }, { "epoch": 2.78, "grad_norm": 0.7172656059265137, "learning_rate": 7.946533729169491e-06, "loss": 1.7104, "step": 83550 }, { "epoch": 2.78, "grad_norm": 0.7289538383483887, "learning_rate": 7.944144157272437e-06, "loss": 1.6602, "step": 83551 }, { "epoch": 2.78, "grad_norm": 0.7045754194259644, "learning_rate": 7.9417549398882e-06, "loss": 1.6964, "step": 83552 }, { "epoch": 2.78, "grad_norm": 0.7288020253181458, "learning_rate": 7.939366077019648e-06, "loss": 1.6627, "step": 83553 }, { "epoch": 2.78, "grad_norm": 0.705719530582428, "learning_rate": 7.936977568669612e-06, "loss": 1.6803, "step": 83554 }, { "epoch": 2.78, "grad_norm": 0.7369526624679565, "learning_rate": 7.93458941484112e-06, "loss": 1.8149, "step": 83555 }, { "epoch": 2.78, "grad_norm": 0.7179380059242249, "learning_rate": 7.932201615536936e-06, "loss": 1.6901, "step": 83556 }, { "epoch": 2.78, "grad_norm": 0.7417199611663818, "learning_rate": 7.929814170760095e-06, "loss": 1.6711, "step": 83557 }, { "epoch": 2.78, "grad_norm": 0.7296286821365356, "learning_rate": 7.927427080513394e-06, "loss": 1.7144, "step": 83558 }, { "epoch": 2.78, "grad_norm": 0.7252848148345947, "learning_rate": 7.92504034479976e-06, "loss": 1.6404, "step": 83559 }, { "epoch": 2.78, "grad_norm": 0.7036722898483276, "learning_rate": 7.922653963622094e-06, "loss": 1.7478, "step": 83560 }, { "epoch": 2.78, "grad_norm": 0.7065674066543579, "learning_rate": 7.92026793698326e-06, "loss": 1.66, "step": 83561 }, { "epoch": 2.78, "grad_norm": 0.7032026648521423, "learning_rate": 7.917882264886222e-06, "loss": 1.5835, "step": 83562 }, { "epoch": 2.78, "grad_norm": 0.708091139793396, "learning_rate": 7.91549694733381e-06, "loss": 1.6894, "step": 83563 }, { "epoch": 2.78, "grad_norm": 0.7321427464485168, "learning_rate": 7.913111984328991e-06, "loss": 1.6589, "step": 83564 }, { "epoch": 2.78, "grad_norm": 0.6918390989303589, "learning_rate": 7.910727375874593e-06, "loss": 1.6514, "step": 83565 }, { "epoch": 2.78, "grad_norm": 0.7444451451301575, "learning_rate": 7.908343121973515e-06, "loss": 1.714, "step": 83566 }, { "epoch": 2.78, "grad_norm": 0.6954591870307922, "learning_rate": 7.905959222628722e-06, "loss": 1.6223, "step": 83567 }, { "epoch": 2.78, "grad_norm": 0.7150247097015381, "learning_rate": 7.903575677843044e-06, "loss": 1.7036, "step": 83568 }, { "epoch": 2.78, "grad_norm": 0.7351365089416504, "learning_rate": 7.901192487619346e-06, "loss": 1.6275, "step": 83569 }, { "epoch": 2.78, "grad_norm": 0.7007999420166016, "learning_rate": 7.898809651960557e-06, "loss": 1.5958, "step": 83570 }, { "epoch": 2.78, "grad_norm": 0.6986883878707886, "learning_rate": 7.896427170869612e-06, "loss": 1.646, "step": 83571 }, { "epoch": 2.78, "grad_norm": 0.728420078754425, "learning_rate": 7.894045044349374e-06, "loss": 1.7185, "step": 83572 }, { "epoch": 2.78, "grad_norm": 0.6958099603652954, "learning_rate": 7.89166327240267e-06, "loss": 1.6425, "step": 83573 }, { "epoch": 2.78, "grad_norm": 0.6983435750007629, "learning_rate": 7.889281855032503e-06, "loss": 1.6836, "step": 83574 }, { "epoch": 2.78, "grad_norm": 0.7350316643714905, "learning_rate": 7.886900792241702e-06, "loss": 1.7053, "step": 83575 }, { "epoch": 2.78, "grad_norm": 0.7223778963088989, "learning_rate": 7.884520084033131e-06, "loss": 1.6537, "step": 83576 }, { "epoch": 2.78, "grad_norm": 0.7014294266700745, "learning_rate": 7.882139730409687e-06, "loss": 1.7232, "step": 83577 }, { "epoch": 2.78, "grad_norm": 0.7107290625572205, "learning_rate": 7.879759731374335e-06, "loss": 1.7109, "step": 83578 }, { "epoch": 2.78, "grad_norm": 0.7143938541412354, "learning_rate": 7.877380086929942e-06, "loss": 1.6709, "step": 83579 }, { "epoch": 2.78, "grad_norm": 0.7250770926475525, "learning_rate": 7.875000797079301e-06, "loss": 1.6541, "step": 83580 }, { "epoch": 2.78, "grad_norm": 0.7128955721855164, "learning_rate": 7.872621861825413e-06, "loss": 1.64, "step": 83581 }, { "epoch": 2.78, "grad_norm": 0.7151966094970703, "learning_rate": 7.870243281171107e-06, "loss": 1.6645, "step": 83582 }, { "epoch": 2.78, "grad_norm": 0.7284225225448608, "learning_rate": 7.867865055119248e-06, "loss": 1.6962, "step": 83583 }, { "epoch": 2.78, "grad_norm": 0.6953665614128113, "learning_rate": 7.86548718367277e-06, "loss": 1.6894, "step": 83584 }, { "epoch": 2.78, "grad_norm": 0.7103345394134521, "learning_rate": 7.863109666834566e-06, "loss": 1.6715, "step": 83585 }, { "epoch": 2.78, "grad_norm": 0.733092188835144, "learning_rate": 7.860732504607503e-06, "loss": 1.7168, "step": 83586 }, { "epoch": 2.78, "grad_norm": 0.716983437538147, "learning_rate": 7.858355696994445e-06, "loss": 1.6111, "step": 83587 }, { "epoch": 2.78, "grad_norm": 0.7148285508155823, "learning_rate": 7.855979243998323e-06, "loss": 1.6764, "step": 83588 }, { "epoch": 2.78, "grad_norm": 0.7086682915687561, "learning_rate": 7.853603145622001e-06, "loss": 1.6326, "step": 83589 }, { "epoch": 2.78, "grad_norm": 0.7239668965339661, "learning_rate": 7.85122740186831e-06, "loss": 1.66, "step": 83590 }, { "epoch": 2.78, "grad_norm": 0.7122328281402588, "learning_rate": 7.848852012740215e-06, "loss": 1.7549, "step": 83591 }, { "epoch": 2.78, "grad_norm": 0.7215397953987122, "learning_rate": 7.84647697824058e-06, "loss": 1.7084, "step": 83592 }, { "epoch": 2.78, "grad_norm": 0.7310516238212585, "learning_rate": 7.844102298372267e-06, "loss": 1.5631, "step": 83593 }, { "epoch": 2.78, "grad_norm": 0.7032344341278076, "learning_rate": 7.841727973138146e-06, "loss": 1.6574, "step": 83594 }, { "epoch": 2.78, "grad_norm": 0.7001338005065918, "learning_rate": 7.839354002541144e-06, "loss": 1.6075, "step": 83595 }, { "epoch": 2.78, "grad_norm": 0.7050652503967285, "learning_rate": 7.836980386584124e-06, "loss": 1.7246, "step": 83596 }, { "epoch": 2.78, "grad_norm": 0.6898095607757568, "learning_rate": 7.834607125269954e-06, "loss": 1.6575, "step": 83597 }, { "epoch": 2.78, "grad_norm": 0.7110268473625183, "learning_rate": 7.83223421860153e-06, "loss": 1.7477, "step": 83598 }, { "epoch": 2.78, "grad_norm": 0.7147394418716431, "learning_rate": 7.829861666581683e-06, "loss": 1.6804, "step": 83599 }, { "epoch": 2.78, "grad_norm": 0.7173179984092712, "learning_rate": 7.827489469213377e-06, "loss": 1.716, "step": 83600 }, { "epoch": 2.78, "grad_norm": 0.7300414443016052, "learning_rate": 7.82511762649941e-06, "loss": 1.6846, "step": 83601 }, { "epoch": 2.78, "grad_norm": 0.7192258238792419, "learning_rate": 7.822746138442747e-06, "loss": 1.7276, "step": 83602 }, { "epoch": 2.78, "grad_norm": 0.7436630725860596, "learning_rate": 7.82037500504622e-06, "loss": 1.7008, "step": 83603 }, { "epoch": 2.78, "grad_norm": 0.7247775197029114, "learning_rate": 7.818004226312658e-06, "loss": 1.7036, "step": 83604 }, { "epoch": 2.78, "grad_norm": 0.728138267993927, "learning_rate": 7.815633802245025e-06, "loss": 1.6244, "step": 83605 }, { "epoch": 2.78, "grad_norm": 0.7214806079864502, "learning_rate": 7.813263732846154e-06, "loss": 1.69, "step": 83606 }, { "epoch": 2.78, "grad_norm": 0.7016262412071228, "learning_rate": 7.810894018118941e-06, "loss": 1.5761, "step": 83607 }, { "epoch": 2.78, "grad_norm": 0.7000669836997986, "learning_rate": 7.80852465806625e-06, "loss": 1.6586, "step": 83608 }, { "epoch": 2.78, "grad_norm": 0.7022581100463867, "learning_rate": 7.806155652690916e-06, "loss": 1.6827, "step": 83609 }, { "epoch": 2.78, "grad_norm": 0.6886433959007263, "learning_rate": 7.803787001995898e-06, "loss": 1.6982, "step": 83610 }, { "epoch": 2.78, "grad_norm": 0.7088707685470581, "learning_rate": 7.801418705983996e-06, "loss": 1.6469, "step": 83611 }, { "epoch": 2.78, "grad_norm": 0.7552787661552429, "learning_rate": 7.799050764658144e-06, "loss": 1.6942, "step": 83612 }, { "epoch": 2.78, "grad_norm": 0.7171205878257751, "learning_rate": 7.796683178021168e-06, "loss": 1.6808, "step": 83613 }, { "epoch": 2.78, "grad_norm": 0.7239044904708862, "learning_rate": 7.794315946076002e-06, "loss": 1.6559, "step": 83614 }, { "epoch": 2.78, "grad_norm": 0.7390425205230713, "learning_rate": 7.791949068825476e-06, "loss": 1.7186, "step": 83615 }, { "epoch": 2.78, "grad_norm": 0.7018934488296509, "learning_rate": 7.789582546272454e-06, "loss": 1.7244, "step": 83616 }, { "epoch": 2.78, "grad_norm": 0.708590567111969, "learning_rate": 7.787216378419836e-06, "loss": 1.6863, "step": 83617 }, { "epoch": 2.78, "grad_norm": 0.7139844298362732, "learning_rate": 7.78485056527045e-06, "loss": 1.7315, "step": 83618 }, { "epoch": 2.78, "grad_norm": 0.7238839864730835, "learning_rate": 7.78248510682723e-06, "loss": 1.6472, "step": 83619 }, { "epoch": 2.78, "grad_norm": 0.7048155665397644, "learning_rate": 7.780120003092971e-06, "loss": 1.685, "step": 83620 }, { "epoch": 2.78, "grad_norm": 0.7308929562568665, "learning_rate": 7.777755254070638e-06, "loss": 1.7086, "step": 83621 }, { "epoch": 2.78, "grad_norm": 0.7271038293838501, "learning_rate": 7.775390859763064e-06, "loss": 1.665, "step": 83622 }, { "epoch": 2.78, "grad_norm": 0.7005853652954102, "learning_rate": 7.773026820173078e-06, "loss": 1.6977, "step": 83623 }, { "epoch": 2.78, "grad_norm": 0.7322377562522888, "learning_rate": 7.770663135303578e-06, "loss": 1.708, "step": 83624 }, { "epoch": 2.78, "grad_norm": 0.7191421985626221, "learning_rate": 7.768299805157463e-06, "loss": 1.6112, "step": 83625 }, { "epoch": 2.78, "grad_norm": 0.7273930907249451, "learning_rate": 7.765936829737529e-06, "loss": 1.6563, "step": 83626 }, { "epoch": 2.78, "grad_norm": 0.7128918766975403, "learning_rate": 7.763574209046708e-06, "loss": 1.6321, "step": 83627 }, { "epoch": 2.78, "grad_norm": 0.7050510048866272, "learning_rate": 7.761211943087864e-06, "loss": 1.6707, "step": 83628 }, { "epoch": 2.78, "grad_norm": 0.7076959013938904, "learning_rate": 7.758850031863861e-06, "loss": 1.7321, "step": 83629 }, { "epoch": 2.78, "grad_norm": 0.7179431319236755, "learning_rate": 7.756488475377531e-06, "loss": 1.6506, "step": 83630 }, { "epoch": 2.78, "grad_norm": 0.706078052520752, "learning_rate": 7.754127273631772e-06, "loss": 1.6766, "step": 83631 }, { "epoch": 2.78, "grad_norm": 0.7161059379577637, "learning_rate": 7.751766426629446e-06, "loss": 1.6719, "step": 83632 }, { "epoch": 2.78, "grad_norm": 0.738274872303009, "learning_rate": 7.749405934373387e-06, "loss": 1.6888, "step": 83633 }, { "epoch": 2.78, "grad_norm": 0.7139807939529419, "learning_rate": 7.747045796866491e-06, "loss": 1.6304, "step": 83634 }, { "epoch": 2.78, "grad_norm": 0.7064193487167358, "learning_rate": 7.74468601411169e-06, "loss": 1.6625, "step": 83635 }, { "epoch": 2.78, "grad_norm": 0.7158342599868774, "learning_rate": 7.74232658611168e-06, "loss": 1.6569, "step": 83636 }, { "epoch": 2.78, "grad_norm": 0.738267183303833, "learning_rate": 7.739967512869461e-06, "loss": 1.6565, "step": 83637 }, { "epoch": 2.78, "grad_norm": 0.7229607105255127, "learning_rate": 7.737608794387895e-06, "loss": 1.7593, "step": 83638 }, { "epoch": 2.78, "grad_norm": 0.7145852446556091, "learning_rate": 7.735250430669782e-06, "loss": 1.6891, "step": 83639 }, { "epoch": 2.78, "grad_norm": 0.7057678699493408, "learning_rate": 7.732892421717984e-06, "loss": 1.7493, "step": 83640 }, { "epoch": 2.78, "grad_norm": 0.7470840215682983, "learning_rate": 7.730534767535401e-06, "loss": 1.6219, "step": 83641 }, { "epoch": 2.78, "grad_norm": 0.7261735200881958, "learning_rate": 7.72817746812493e-06, "loss": 1.7019, "step": 83642 }, { "epoch": 2.78, "grad_norm": 0.7164146304130554, "learning_rate": 7.725820523489368e-06, "loss": 1.6945, "step": 83643 }, { "epoch": 2.78, "grad_norm": 0.7200700640678406, "learning_rate": 7.723463933631546e-06, "loss": 1.671, "step": 83644 }, { "epoch": 2.78, "grad_norm": 0.7241978049278259, "learning_rate": 7.72110769855443e-06, "loss": 1.6988, "step": 83645 }, { "epoch": 2.78, "grad_norm": 0.717595636844635, "learning_rate": 7.718751818260816e-06, "loss": 1.6476, "step": 83646 }, { "epoch": 2.78, "grad_norm": 0.7013685703277588, "learning_rate": 7.716396292753569e-06, "loss": 1.5911, "step": 83647 }, { "epoch": 2.78, "grad_norm": 0.7261188626289368, "learning_rate": 7.71404112203552e-06, "loss": 1.7712, "step": 83648 }, { "epoch": 2.78, "grad_norm": 1.8221017122268677, "learning_rate": 7.711686306109633e-06, "loss": 1.7099, "step": 83649 }, { "epoch": 2.78, "grad_norm": 0.7505436539649963, "learning_rate": 7.709331844978605e-06, "loss": 1.7217, "step": 83650 }, { "epoch": 2.78, "grad_norm": 0.7025059461593628, "learning_rate": 7.706977738645403e-06, "loss": 1.6484, "step": 83651 }, { "epoch": 2.78, "grad_norm": 0.7210688591003418, "learning_rate": 7.70462398711289e-06, "loss": 1.6595, "step": 83652 }, { "epoch": 2.78, "grad_norm": 0.7012070417404175, "learning_rate": 7.702270590383896e-06, "loss": 1.6301, "step": 83653 }, { "epoch": 2.78, "grad_norm": 0.7297545671463013, "learning_rate": 7.699917548461254e-06, "loss": 1.7213, "step": 83654 }, { "epoch": 2.78, "grad_norm": 0.7297722101211548, "learning_rate": 7.69756486134786e-06, "loss": 1.7856, "step": 83655 }, { "epoch": 2.78, "grad_norm": 0.7186161279678345, "learning_rate": 7.695212529046579e-06, "loss": 1.6424, "step": 83656 }, { "epoch": 2.78, "grad_norm": 0.7298153638839722, "learning_rate": 7.692860551560175e-06, "loss": 1.6585, "step": 83657 }, { "epoch": 2.78, "grad_norm": 0.7063892483711243, "learning_rate": 7.69050892889158e-06, "loss": 1.6764, "step": 83658 }, { "epoch": 2.78, "grad_norm": 0.7116944789886475, "learning_rate": 7.688157661043692e-06, "loss": 1.7188, "step": 83659 }, { "epoch": 2.78, "grad_norm": 0.7290692329406738, "learning_rate": 7.685806748019275e-06, "loss": 1.7026, "step": 83660 }, { "epoch": 2.78, "grad_norm": 0.7193971276283264, "learning_rate": 7.683456189821225e-06, "loss": 1.6529, "step": 83661 }, { "epoch": 2.78, "grad_norm": 2.723726272583008, "learning_rate": 7.681105986452407e-06, "loss": 1.6751, "step": 83662 }, { "epoch": 2.78, "grad_norm": 0.7267054319381714, "learning_rate": 7.67875613791562e-06, "loss": 1.6571, "step": 83663 }, { "epoch": 2.78, "grad_norm": 0.702491283416748, "learning_rate": 7.676406644213762e-06, "loss": 1.6873, "step": 83664 }, { "epoch": 2.78, "grad_norm": 0.6998855471611023, "learning_rate": 7.674057505349696e-06, "loss": 1.6859, "step": 83665 }, { "epoch": 2.78, "grad_norm": 0.7211316227912903, "learning_rate": 7.671708721326219e-06, "loss": 1.6621, "step": 83666 }, { "epoch": 2.78, "grad_norm": 0.7215984463691711, "learning_rate": 7.669360292146231e-06, "loss": 1.7043, "step": 83667 }, { "epoch": 2.78, "grad_norm": 0.7334126234054565, "learning_rate": 7.667012217812563e-06, "loss": 1.6709, "step": 83668 }, { "epoch": 2.78, "grad_norm": 0.7525805234909058, "learning_rate": 7.664664498328076e-06, "loss": 1.693, "step": 83669 }, { "epoch": 2.78, "grad_norm": 0.7246014475822449, "learning_rate": 7.662317133695639e-06, "loss": 1.6893, "step": 83670 }, { "epoch": 2.78, "grad_norm": 0.713338315486908, "learning_rate": 7.659970123918014e-06, "loss": 1.6231, "step": 83671 }, { "epoch": 2.78, "grad_norm": 0.7218484878540039, "learning_rate": 7.657623468998164e-06, "loss": 1.6687, "step": 83672 }, { "epoch": 2.78, "grad_norm": 0.7240113615989685, "learning_rate": 7.655277168938856e-06, "loss": 1.72, "step": 83673 }, { "epoch": 2.78, "grad_norm": 0.7366315126419067, "learning_rate": 7.652931223742986e-06, "loss": 1.6583, "step": 83674 }, { "epoch": 2.78, "grad_norm": 0.6996440291404724, "learning_rate": 7.650585633413354e-06, "loss": 1.631, "step": 83675 }, { "epoch": 2.78, "grad_norm": 0.698536217212677, "learning_rate": 7.648240397952853e-06, "loss": 1.7096, "step": 83676 }, { "epoch": 2.78, "grad_norm": 0.7171698212623596, "learning_rate": 7.64589551736432e-06, "loss": 1.6826, "step": 83677 }, { "epoch": 2.78, "grad_norm": 0.758624792098999, "learning_rate": 7.643550991650549e-06, "loss": 1.7106, "step": 83678 }, { "epoch": 2.78, "grad_norm": 0.738470196723938, "learning_rate": 7.641206820814505e-06, "loss": 1.6651, "step": 83679 }, { "epoch": 2.78, "grad_norm": 0.7054687142372131, "learning_rate": 7.638863004858885e-06, "loss": 1.6314, "step": 83680 }, { "epoch": 2.78, "grad_norm": 0.7159324884414673, "learning_rate": 7.636519543786657e-06, "loss": 1.7235, "step": 83681 }, { "epoch": 2.78, "grad_norm": 0.6990289092063904, "learning_rate": 7.634176437600614e-06, "loss": 1.6117, "step": 83682 }, { "epoch": 2.78, "grad_norm": 0.7201539874076843, "learning_rate": 7.631833686303557e-06, "loss": 1.6681, "step": 83683 }, { "epoch": 2.78, "grad_norm": 0.7136116623878479, "learning_rate": 7.629491289898448e-06, "loss": 1.6663, "step": 83684 }, { "epoch": 2.78, "grad_norm": 0.7064825892448425, "learning_rate": 7.62714924838802e-06, "loss": 1.6134, "step": 83685 }, { "epoch": 2.78, "grad_norm": 0.7014992237091064, "learning_rate": 7.624807561775137e-06, "loss": 1.6426, "step": 83686 }, { "epoch": 2.78, "grad_norm": 0.7212119698524475, "learning_rate": 7.622466230062663e-06, "loss": 1.7019, "step": 83687 }, { "epoch": 2.78, "grad_norm": 0.7061491012573242, "learning_rate": 7.620125253253462e-06, "loss": 1.6889, "step": 83688 }, { "epoch": 2.78, "grad_norm": 0.7152442932128906, "learning_rate": 7.617784631350332e-06, "loss": 1.654, "step": 83689 }, { "epoch": 2.78, "grad_norm": 0.6974253058433533, "learning_rate": 7.615444364356105e-06, "loss": 1.6917, "step": 83690 }, { "epoch": 2.78, "grad_norm": 0.6922144293785095, "learning_rate": 7.613104452273711e-06, "loss": 1.7003, "step": 83691 }, { "epoch": 2.78, "grad_norm": 0.730769693851471, "learning_rate": 7.610764895105881e-06, "loss": 1.7272, "step": 83692 }, { "epoch": 2.78, "grad_norm": 0.719955563545227, "learning_rate": 7.608425692855513e-06, "loss": 1.6361, "step": 83693 }, { "epoch": 2.78, "grad_norm": 0.719919741153717, "learning_rate": 7.606086845525406e-06, "loss": 1.6525, "step": 83694 }, { "epoch": 2.78, "grad_norm": 0.7035701870918274, "learning_rate": 7.603748353118455e-06, "loss": 1.7339, "step": 83695 }, { "epoch": 2.78, "grad_norm": 0.7351774573326111, "learning_rate": 7.601410215637493e-06, "loss": 1.6544, "step": 83696 }, { "epoch": 2.78, "grad_norm": 0.7149436473846436, "learning_rate": 7.5990724330852844e-06, "loss": 1.6715, "step": 83697 }, { "epoch": 2.78, "grad_norm": 0.7279724478721619, "learning_rate": 7.5967350054647605e-06, "loss": 1.644, "step": 83698 }, { "epoch": 2.78, "grad_norm": 0.714249849319458, "learning_rate": 7.594397932778717e-06, "loss": 1.6403, "step": 83699 }, { "epoch": 2.78, "grad_norm": 0.7178015112876892, "learning_rate": 7.592061215029955e-06, "loss": 1.6184, "step": 83700 }, { "epoch": 2.78, "grad_norm": 0.7090203762054443, "learning_rate": 7.589724852221335e-06, "loss": 1.6111, "step": 83701 }, { "epoch": 2.78, "grad_norm": 0.7428240180015564, "learning_rate": 7.587388844355757e-06, "loss": 1.6289, "step": 83702 }, { "epoch": 2.78, "grad_norm": 0.7252458333969116, "learning_rate": 7.585053191435985e-06, "loss": 1.6907, "step": 83703 }, { "epoch": 2.78, "grad_norm": 0.7195001244544983, "learning_rate": 7.5827178934648846e-06, "loss": 1.6626, "step": 83704 }, { "epoch": 2.78, "grad_norm": 0.6802602410316467, "learning_rate": 7.580382950445252e-06, "loss": 1.6571, "step": 83705 }, { "epoch": 2.78, "grad_norm": 0.7244338393211365, "learning_rate": 7.578048362379985e-06, "loss": 1.7487, "step": 83706 }, { "epoch": 2.78, "grad_norm": 0.7090626358985901, "learning_rate": 7.5757141292718485e-06, "loss": 1.723, "step": 83707 }, { "epoch": 2.78, "grad_norm": 0.7095089554786682, "learning_rate": 7.5733802511237064e-06, "loss": 1.6732, "step": 83708 }, { "epoch": 2.79, "grad_norm": 0.6982378959655762, "learning_rate": 7.57104672793839e-06, "loss": 1.6098, "step": 83709 }, { "epoch": 2.79, "grad_norm": 0.734496533870697, "learning_rate": 7.568713559718764e-06, "loss": 1.7228, "step": 83710 }, { "epoch": 2.79, "grad_norm": 0.7059214115142822, "learning_rate": 7.566380746467626e-06, "loss": 1.6623, "step": 83711 }, { "epoch": 2.79, "grad_norm": 0.7141403555870056, "learning_rate": 7.564048288187807e-06, "loss": 1.7128, "step": 83712 }, { "epoch": 2.79, "grad_norm": 0.7083315253257751, "learning_rate": 7.5617161848821715e-06, "loss": 1.6516, "step": 83713 }, { "epoch": 2.79, "grad_norm": 0.676888108253479, "learning_rate": 7.55938443655345e-06, "loss": 1.714, "step": 83714 }, { "epoch": 2.79, "grad_norm": 0.7051534652709961, "learning_rate": 7.5570530432046085e-06, "loss": 1.6936, "step": 83715 }, { "epoch": 2.79, "grad_norm": 0.7033340334892273, "learning_rate": 7.55472200483841e-06, "loss": 1.6309, "step": 83716 }, { "epoch": 2.79, "grad_norm": 0.7089093327522278, "learning_rate": 7.552391321457685e-06, "loss": 1.7102, "step": 83717 }, { "epoch": 2.79, "grad_norm": 0.7354527115821838, "learning_rate": 7.550060993065232e-06, "loss": 1.6654, "step": 83718 }, { "epoch": 2.79, "grad_norm": 0.7019301056861877, "learning_rate": 7.54773101966395e-06, "loss": 1.6457, "step": 83719 }, { "epoch": 2.79, "grad_norm": 0.710910975933075, "learning_rate": 7.545401401256634e-06, "loss": 1.6794, "step": 83720 }, { "epoch": 2.79, "grad_norm": 0.7032285928726196, "learning_rate": 7.543072137846084e-06, "loss": 1.6895, "step": 83721 }, { "epoch": 2.79, "grad_norm": 0.7038162350654602, "learning_rate": 7.5407432294351976e-06, "loss": 1.7047, "step": 83722 }, { "epoch": 2.79, "grad_norm": 0.718309760093689, "learning_rate": 7.538414676026705e-06, "loss": 1.662, "step": 83723 }, { "epoch": 2.79, "grad_norm": 0.6943210959434509, "learning_rate": 7.536086477623537e-06, "loss": 1.6822, "step": 83724 }, { "epoch": 2.79, "grad_norm": 0.7136939167976379, "learning_rate": 7.533758634228426e-06, "loss": 1.5674, "step": 83725 }, { "epoch": 2.79, "grad_norm": 0.7159632444381714, "learning_rate": 7.531431145844235e-06, "loss": 1.6768, "step": 83726 }, { "epoch": 2.79, "grad_norm": 0.7178122401237488, "learning_rate": 7.52910401247383e-06, "loss": 1.7102, "step": 83727 }, { "epoch": 2.79, "grad_norm": 0.7006012201309204, "learning_rate": 7.526777234119974e-06, "loss": 1.7108, "step": 83728 }, { "epoch": 2.79, "grad_norm": 0.7064560055732727, "learning_rate": 7.524450810785531e-06, "loss": 1.6131, "step": 83729 }, { "epoch": 2.79, "grad_norm": 0.7218238711357117, "learning_rate": 7.522124742473301e-06, "loss": 1.7404, "step": 83730 }, { "epoch": 2.79, "grad_norm": 0.7192060947418213, "learning_rate": 7.519799029186114e-06, "loss": 1.6993, "step": 83731 }, { "epoch": 2.79, "grad_norm": 0.7247337102890015, "learning_rate": 7.517473670926833e-06, "loss": 1.7687, "step": 83732 }, { "epoch": 2.79, "grad_norm": 0.7049719095230103, "learning_rate": 7.51514866769819e-06, "loss": 1.6143, "step": 83733 }, { "epoch": 2.79, "grad_norm": 0.7019107937812805, "learning_rate": 7.512824019503083e-06, "loss": 1.7661, "step": 83734 }, { "epoch": 2.79, "grad_norm": 0.7064330577850342, "learning_rate": 7.51049972634431e-06, "loss": 1.71, "step": 83735 }, { "epoch": 2.79, "grad_norm": 0.718585193157196, "learning_rate": 7.508175788224702e-06, "loss": 1.6407, "step": 83736 }, { "epoch": 2.79, "grad_norm": 0.7133399844169617, "learning_rate": 7.505852205147023e-06, "loss": 1.674, "step": 83737 }, { "epoch": 2.79, "grad_norm": 0.7166916728019714, "learning_rate": 7.5035289771142036e-06, "loss": 1.6385, "step": 83738 }, { "epoch": 2.79, "grad_norm": 0.711749255657196, "learning_rate": 7.5012061041290094e-06, "loss": 1.6738, "step": 83739 }, { "epoch": 2.79, "grad_norm": 0.7114709615707397, "learning_rate": 7.498883586194204e-06, "loss": 1.6716, "step": 83740 }, { "epoch": 2.79, "grad_norm": 0.6910188794136047, "learning_rate": 7.4965614233126855e-06, "loss": 1.6538, "step": 83741 }, { "epoch": 2.79, "grad_norm": 0.7061060070991516, "learning_rate": 7.494239615487252e-06, "loss": 1.7269, "step": 83742 }, { "epoch": 2.79, "grad_norm": 0.6966599822044373, "learning_rate": 7.491918162720667e-06, "loss": 1.7439, "step": 83743 }, { "epoch": 2.79, "grad_norm": 0.7236763834953308, "learning_rate": 7.489597065015829e-06, "loss": 1.7289, "step": 83744 }, { "epoch": 2.79, "grad_norm": 0.6909862756729126, "learning_rate": 7.487276322375501e-06, "loss": 1.6841, "step": 83745 }, { "epoch": 2.79, "grad_norm": 0.7027325630187988, "learning_rate": 7.48495593480255e-06, "loss": 1.6295, "step": 83746 }, { "epoch": 2.79, "grad_norm": 0.6928333044052124, "learning_rate": 7.482635902299738e-06, "loss": 1.7078, "step": 83747 }, { "epoch": 2.79, "grad_norm": 0.6930062174797058, "learning_rate": 7.480316224869931e-06, "loss": 1.6349, "step": 83748 }, { "epoch": 2.79, "grad_norm": 0.7182645201683044, "learning_rate": 7.477996902515926e-06, "loss": 1.5714, "step": 83749 }, { "epoch": 2.79, "grad_norm": 0.7242493033409119, "learning_rate": 7.4756779352404874e-06, "loss": 1.7151, "step": 83750 }, { "epoch": 2.79, "grad_norm": 0.7025116086006165, "learning_rate": 7.4733593230464795e-06, "loss": 1.7013, "step": 83751 }, { "epoch": 2.79, "grad_norm": 0.7274022102355957, "learning_rate": 7.471041065936767e-06, "loss": 1.5409, "step": 83752 }, { "epoch": 2.79, "grad_norm": 0.7291082739830017, "learning_rate": 7.468723163914048e-06, "loss": 1.6665, "step": 83753 }, { "epoch": 2.79, "grad_norm": 0.6936054229736328, "learning_rate": 7.466405616981219e-06, "loss": 1.6063, "step": 83754 }, { "epoch": 2.79, "grad_norm": 0.725538969039917, "learning_rate": 7.464088425141079e-06, "loss": 1.7216, "step": 83755 }, { "epoch": 2.79, "grad_norm": 0.7088788747787476, "learning_rate": 7.461771588396426e-06, "loss": 1.644, "step": 83756 }, { "epoch": 2.79, "grad_norm": 0.7056750655174255, "learning_rate": 7.459455106750056e-06, "loss": 1.713, "step": 83757 }, { "epoch": 2.79, "grad_norm": 0.7230435013771057, "learning_rate": 7.457138980204802e-06, "loss": 1.6478, "step": 83758 }, { "epoch": 2.79, "grad_norm": 0.7113289833068848, "learning_rate": 7.454823208763561e-06, "loss": 1.6154, "step": 83759 }, { "epoch": 2.79, "grad_norm": 0.7110161185264587, "learning_rate": 7.4525077924289634e-06, "loss": 1.6162, "step": 83760 }, { "epoch": 2.79, "grad_norm": 0.7356081008911133, "learning_rate": 7.450192731203941e-06, "loss": 1.6417, "step": 83761 }, { "epoch": 2.79, "grad_norm": 0.7269458770751953, "learning_rate": 7.447878025091292e-06, "loss": 1.7187, "step": 83762 }, { "epoch": 2.79, "grad_norm": 0.7520005106925964, "learning_rate": 7.445563674093847e-06, "loss": 1.6492, "step": 83763 }, { "epoch": 2.79, "grad_norm": 0.7122780680656433, "learning_rate": 7.443249678214303e-06, "loss": 1.7311, "step": 83764 }, { "epoch": 2.79, "grad_norm": 0.7234038710594177, "learning_rate": 7.4409360374555914e-06, "loss": 1.7048, "step": 83765 }, { "epoch": 2.79, "grad_norm": 0.7048423290252686, "learning_rate": 7.438622751820511e-06, "loss": 1.6313, "step": 83766 }, { "epoch": 2.79, "grad_norm": 0.7069452404975891, "learning_rate": 7.436309821311792e-06, "loss": 1.7175, "step": 83767 }, { "epoch": 2.79, "grad_norm": 0.708757758140564, "learning_rate": 7.433997245932266e-06, "loss": 1.6937, "step": 83768 }, { "epoch": 2.79, "grad_norm": 0.7200624346733093, "learning_rate": 7.431685025684797e-06, "loss": 1.6928, "step": 83769 }, { "epoch": 2.79, "grad_norm": 0.7116143703460693, "learning_rate": 7.429373160572183e-06, "loss": 1.7068, "step": 83770 }, { "epoch": 2.79, "grad_norm": 0.7168946862220764, "learning_rate": 7.427061650597122e-06, "loss": 1.6683, "step": 83771 }, { "epoch": 2.79, "grad_norm": 0.7066358327865601, "learning_rate": 7.424750495762577e-06, "loss": 1.6422, "step": 83772 }, { "epoch": 2.79, "grad_norm": 0.7030975818634033, "learning_rate": 7.422439696071248e-06, "loss": 1.6659, "step": 83773 }, { "epoch": 2.79, "grad_norm": 0.7068234086036682, "learning_rate": 7.42012925152593e-06, "loss": 1.718, "step": 83774 }, { "epoch": 2.79, "grad_norm": 0.7029988765716553, "learning_rate": 7.41781916212949e-06, "loss": 1.6745, "step": 83775 }, { "epoch": 2.79, "grad_norm": 0.7181784510612488, "learning_rate": 7.415509427884725e-06, "loss": 1.6953, "step": 83776 }, { "epoch": 2.79, "grad_norm": 0.7052350044250488, "learning_rate": 7.413200048794432e-06, "loss": 1.6534, "step": 83777 }, { "epoch": 2.79, "grad_norm": 0.7280786633491516, "learning_rate": 7.410891024861343e-06, "loss": 1.6708, "step": 83778 }, { "epoch": 2.79, "grad_norm": 0.7205532789230347, "learning_rate": 7.408582356088388e-06, "loss": 1.6483, "step": 83779 }, { "epoch": 2.79, "grad_norm": 0.7262052893638611, "learning_rate": 7.406274042478299e-06, "loss": 1.6544, "step": 83780 }, { "epoch": 2.79, "grad_norm": 0.7349912524223328, "learning_rate": 7.4039660840338415e-06, "loss": 1.7353, "step": 83781 }, { "epoch": 2.79, "grad_norm": 0.7082552313804626, "learning_rate": 7.401658480757877e-06, "loss": 1.739, "step": 83782 }, { "epoch": 2.79, "grad_norm": 0.7106207013130188, "learning_rate": 7.3993512326531725e-06, "loss": 1.6551, "step": 83783 }, { "epoch": 2.79, "grad_norm": 0.7121031880378723, "learning_rate": 7.397044339722558e-06, "loss": 1.6981, "step": 83784 }, { "epoch": 2.79, "grad_norm": 0.7095551490783691, "learning_rate": 7.394737801968797e-06, "loss": 1.6615, "step": 83785 }, { "epoch": 2.79, "grad_norm": 0.7167827486991882, "learning_rate": 7.3924316193947235e-06, "loss": 1.6407, "step": 83786 }, { "epoch": 2.79, "grad_norm": 0.6988822817802429, "learning_rate": 7.3901257920031655e-06, "loss": 1.6285, "step": 83787 }, { "epoch": 2.79, "grad_norm": 0.702732503414154, "learning_rate": 7.387820319796822e-06, "loss": 1.6664, "step": 83788 }, { "epoch": 2.79, "grad_norm": 0.7317438721656799, "learning_rate": 7.3855152027785916e-06, "loss": 1.6978, "step": 83789 }, { "epoch": 2.79, "grad_norm": 0.7009186744689941, "learning_rate": 7.38321044095117e-06, "loss": 1.6517, "step": 83790 }, { "epoch": 2.79, "grad_norm": 0.7063272595405579, "learning_rate": 7.380906034317491e-06, "loss": 1.7366, "step": 83791 }, { "epoch": 2.79, "grad_norm": 0.7232156991958618, "learning_rate": 7.37860198288025e-06, "loss": 1.768, "step": 83792 }, { "epoch": 2.79, "grad_norm": 0.7053844928741455, "learning_rate": 7.376298286642246e-06, "loss": 1.6867, "step": 83793 }, { "epoch": 2.79, "grad_norm": 0.7347149848937988, "learning_rate": 7.373994945606309e-06, "loss": 1.7265, "step": 83794 }, { "epoch": 2.79, "grad_norm": 0.709052324295044, "learning_rate": 7.371691959775239e-06, "loss": 1.6461, "step": 83795 }, { "epoch": 2.79, "grad_norm": 0.7157572507858276, "learning_rate": 7.369389329151831e-06, "loss": 1.6854, "step": 83796 }, { "epoch": 2.79, "grad_norm": 0.6978356242179871, "learning_rate": 7.367087053738818e-06, "loss": 1.6735, "step": 83797 }, { "epoch": 2.79, "grad_norm": 0.7366395592689514, "learning_rate": 7.364785133539097e-06, "loss": 1.6195, "step": 83798 }, { "epoch": 2.79, "grad_norm": 0.7036306262016296, "learning_rate": 7.3624835685553994e-06, "loss": 1.7171, "step": 83799 }, { "epoch": 2.79, "grad_norm": 0.7113850712776184, "learning_rate": 7.360182358790489e-06, "loss": 1.6831, "step": 83800 }, { "epoch": 2.79, "grad_norm": 0.6921572089195251, "learning_rate": 7.357881504247231e-06, "loss": 1.6711, "step": 83801 }, { "epoch": 2.79, "grad_norm": 0.693061888217926, "learning_rate": 7.355581004928424e-06, "loss": 1.637, "step": 83802 }, { "epoch": 2.79, "grad_norm": 0.7008590698242188, "learning_rate": 7.3532808608367635e-06, "loss": 1.6365, "step": 83803 }, { "epoch": 2.79, "grad_norm": 0.7312133312225342, "learning_rate": 7.3509810719750815e-06, "loss": 1.6981, "step": 83804 }, { "epoch": 2.79, "grad_norm": 0.7225793600082397, "learning_rate": 7.3486816383462435e-06, "loss": 1.6297, "step": 83805 }, { "epoch": 2.79, "grad_norm": 0.7121655941009521, "learning_rate": 7.3463825599529794e-06, "loss": 1.6573, "step": 83806 }, { "epoch": 2.79, "grad_norm": 0.7089256048202515, "learning_rate": 7.344083836798054e-06, "loss": 1.7076, "step": 83807 }, { "epoch": 2.79, "grad_norm": 0.7115267515182495, "learning_rate": 7.341785468884331e-06, "loss": 1.6685, "step": 83808 }, { "epoch": 2.79, "grad_norm": 0.7164174318313599, "learning_rate": 7.339487456214543e-06, "loss": 1.6978, "step": 83809 }, { "epoch": 2.79, "grad_norm": 0.7266749739646912, "learning_rate": 7.337189798791487e-06, "loss": 1.7427, "step": 83810 }, { "epoch": 2.79, "grad_norm": 0.7026663422584534, "learning_rate": 7.334892496617928e-06, "loss": 1.6583, "step": 83811 }, { "epoch": 2.79, "grad_norm": 0.7115143537521362, "learning_rate": 7.332595549696729e-06, "loss": 1.6887, "step": 83812 }, { "epoch": 2.79, "grad_norm": 0.7232861518859863, "learning_rate": 7.3302989580306556e-06, "loss": 1.7013, "step": 83813 }, { "epoch": 2.79, "grad_norm": 0.7065425515174866, "learning_rate": 7.328002721622439e-06, "loss": 1.6541, "step": 83814 }, { "epoch": 2.79, "grad_norm": 0.708430826663971, "learning_rate": 7.325706840474943e-06, "loss": 1.7265, "step": 83815 }, { "epoch": 2.79, "grad_norm": 0.7234291434288025, "learning_rate": 7.323411314590899e-06, "loss": 1.6942, "step": 83816 }, { "epoch": 2.79, "grad_norm": 0.7069896459579468, "learning_rate": 7.321116143973071e-06, "loss": 1.7144, "step": 83817 }, { "epoch": 2.79, "grad_norm": 0.6999260187149048, "learning_rate": 7.318821328624292e-06, "loss": 1.6036, "step": 83818 }, { "epoch": 2.79, "grad_norm": 0.7068331837654114, "learning_rate": 7.31652686854739e-06, "loss": 1.7212, "step": 83819 }, { "epoch": 2.79, "grad_norm": 0.6976315975189209, "learning_rate": 7.314232763745065e-06, "loss": 1.6428, "step": 83820 }, { "epoch": 2.79, "grad_norm": 0.7411082983016968, "learning_rate": 7.311939014220114e-06, "loss": 1.6633, "step": 83821 }, { "epoch": 2.79, "grad_norm": 0.7303318381309509, "learning_rate": 7.3096456199754015e-06, "loss": 1.6312, "step": 83822 }, { "epoch": 2.79, "grad_norm": 0.7182492017745972, "learning_rate": 7.307352581013626e-06, "loss": 1.5921, "step": 83823 }, { "epoch": 2.79, "grad_norm": 0.7084738612174988, "learning_rate": 7.305059897337584e-06, "loss": 1.6374, "step": 83824 }, { "epoch": 2.79, "grad_norm": 0.7170712351799011, "learning_rate": 7.302767568950074e-06, "loss": 1.6563, "step": 83825 }, { "epoch": 2.79, "grad_norm": 0.6914353966712952, "learning_rate": 7.300475595853894e-06, "loss": 1.6432, "step": 83826 }, { "epoch": 2.79, "grad_norm": 0.7212285399436951, "learning_rate": 7.298183978051808e-06, "loss": 1.6786, "step": 83827 }, { "epoch": 2.79, "grad_norm": 0.7262486219406128, "learning_rate": 7.295892715546581e-06, "loss": 1.5961, "step": 83828 }, { "epoch": 2.79, "grad_norm": 0.687876284122467, "learning_rate": 7.293601808341043e-06, "loss": 1.5769, "step": 83829 }, { "epoch": 2.79, "grad_norm": 0.7015789151191711, "learning_rate": 7.291311256437926e-06, "loss": 1.682, "step": 83830 }, { "epoch": 2.79, "grad_norm": 0.7099599242210388, "learning_rate": 7.289021059840027e-06, "loss": 1.6469, "step": 83831 }, { "epoch": 2.79, "grad_norm": 0.7039474248886108, "learning_rate": 7.286731218550113e-06, "loss": 1.7117, "step": 83832 }, { "epoch": 2.79, "grad_norm": 0.7106509804725647, "learning_rate": 7.284441732570978e-06, "loss": 1.598, "step": 83833 }, { "epoch": 2.79, "grad_norm": 0.7097102999687195, "learning_rate": 7.282152601905455e-06, "loss": 1.6784, "step": 83834 }, { "epoch": 2.79, "grad_norm": 0.7115954756736755, "learning_rate": 7.2798638265562095e-06, "loss": 1.6732, "step": 83835 }, { "epoch": 2.79, "grad_norm": 0.7365072965621948, "learning_rate": 7.2775754065261035e-06, "loss": 1.747, "step": 83836 }, { "epoch": 2.79, "grad_norm": 0.7061246633529663, "learning_rate": 7.275287341817904e-06, "loss": 1.6725, "step": 83837 }, { "epoch": 2.79, "grad_norm": 0.7063519954681396, "learning_rate": 7.27299963243434e-06, "loss": 1.7534, "step": 83838 }, { "epoch": 2.79, "grad_norm": 0.711674690246582, "learning_rate": 7.270712278378244e-06, "loss": 1.6204, "step": 83839 }, { "epoch": 2.79, "grad_norm": 0.6941693425178528, "learning_rate": 7.2684252796523454e-06, "loss": 1.7133, "step": 83840 }, { "epoch": 2.79, "grad_norm": 0.7323715090751648, "learning_rate": 7.266138636259477e-06, "loss": 1.6822, "step": 83841 }, { "epoch": 2.79, "grad_norm": 0.7152183651924133, "learning_rate": 7.263852348202404e-06, "loss": 1.7559, "step": 83842 }, { "epoch": 2.79, "grad_norm": 0.7275645136833191, "learning_rate": 7.261566415483855e-06, "loss": 1.7053, "step": 83843 }, { "epoch": 2.79, "grad_norm": 0.7094092965126038, "learning_rate": 7.259280838106629e-06, "loss": 1.7061, "step": 83844 }, { "epoch": 2.79, "grad_norm": 0.7006778717041016, "learning_rate": 7.256995616073491e-06, "loss": 1.6954, "step": 83845 }, { "epoch": 2.79, "grad_norm": 0.6935836672782898, "learning_rate": 7.254710749387238e-06, "loss": 1.6769, "step": 83846 }, { "epoch": 2.79, "grad_norm": 0.7056245803833008, "learning_rate": 7.252426238050635e-06, "loss": 1.6289, "step": 83847 }, { "epoch": 2.79, "grad_norm": 0.7439993023872375, "learning_rate": 7.250142082066446e-06, "loss": 1.733, "step": 83848 }, { "epoch": 2.79, "grad_norm": 0.7343534231185913, "learning_rate": 7.247858281437468e-06, "loss": 1.7358, "step": 83849 }, { "epoch": 2.79, "grad_norm": 0.7091416716575623, "learning_rate": 7.245574836166435e-06, "loss": 1.7341, "step": 83850 }, { "epoch": 2.79, "grad_norm": 0.7164272665977478, "learning_rate": 7.243291746256141e-06, "loss": 1.7121, "step": 83851 }, { "epoch": 2.79, "grad_norm": 0.7072567939758301, "learning_rate": 7.241009011709386e-06, "loss": 1.5963, "step": 83852 }, { "epoch": 2.79, "grad_norm": 0.7344979047775269, "learning_rate": 7.238726632528869e-06, "loss": 1.6838, "step": 83853 }, { "epoch": 2.79, "grad_norm": 0.7130902409553528, "learning_rate": 7.236444608717384e-06, "loss": 1.7259, "step": 83854 }, { "epoch": 2.79, "grad_norm": 0.715842068195343, "learning_rate": 7.234162940277766e-06, "loss": 1.7366, "step": 83855 }, { "epoch": 2.79, "grad_norm": 0.7243633270263672, "learning_rate": 7.231881627212743e-06, "loss": 1.704, "step": 83856 }, { "epoch": 2.79, "grad_norm": 0.7091625332832336, "learning_rate": 7.229600669525049e-06, "loss": 1.7119, "step": 83857 }, { "epoch": 2.79, "grad_norm": 0.7357746958732605, "learning_rate": 7.2273200672175124e-06, "loss": 1.6734, "step": 83858 }, { "epoch": 2.79, "grad_norm": 0.7045011520385742, "learning_rate": 7.225039820292833e-06, "loss": 1.6987, "step": 83859 }, { "epoch": 2.79, "grad_norm": 0.7181456685066223, "learning_rate": 7.222759928753841e-06, "loss": 1.6546, "step": 83860 }, { "epoch": 2.79, "grad_norm": 0.7265160083770752, "learning_rate": 7.220480392603234e-06, "loss": 1.7359, "step": 83861 }, { "epoch": 2.79, "grad_norm": 0.7288053631782532, "learning_rate": 7.218201211843877e-06, "loss": 1.6505, "step": 83862 }, { "epoch": 2.79, "grad_norm": 0.7265143394470215, "learning_rate": 7.215922386478501e-06, "loss": 1.6697, "step": 83863 }, { "epoch": 2.79, "grad_norm": 0.7469183206558228, "learning_rate": 7.213643916509804e-06, "loss": 1.7505, "step": 83864 }, { "epoch": 2.79, "grad_norm": 0.706276535987854, "learning_rate": 7.21136580194065e-06, "loss": 1.7242, "step": 83865 }, { "epoch": 2.79, "grad_norm": 0.7511727213859558, "learning_rate": 7.209088042773736e-06, "loss": 1.6594, "step": 83866 }, { "epoch": 2.79, "grad_norm": 0.6971297264099121, "learning_rate": 7.206810639011828e-06, "loss": 1.6747, "step": 83867 }, { "epoch": 2.79, "grad_norm": 0.8344377279281616, "learning_rate": 7.204533590657691e-06, "loss": 1.7026, "step": 83868 }, { "epoch": 2.79, "grad_norm": 0.7220590114593506, "learning_rate": 7.202256897714187e-06, "loss": 1.6695, "step": 83869 }, { "epoch": 2.79, "grad_norm": 0.6936852931976318, "learning_rate": 7.199980560183949e-06, "loss": 1.6302, "step": 83870 }, { "epoch": 2.79, "grad_norm": 0.7007579207420349, "learning_rate": 7.197704578069774e-06, "loss": 1.5794, "step": 83871 }, { "epoch": 2.79, "grad_norm": 0.7544421553611755, "learning_rate": 7.195428951374493e-06, "loss": 1.6741, "step": 83872 }, { "epoch": 2.79, "grad_norm": 0.7052919268608093, "learning_rate": 7.193153680100805e-06, "loss": 1.6952, "step": 83873 }, { "epoch": 2.79, "grad_norm": 0.7154276967048645, "learning_rate": 7.190878764251473e-06, "loss": 1.6348, "step": 83874 }, { "epoch": 2.79, "grad_norm": 0.708540141582489, "learning_rate": 7.188604203829262e-06, "loss": 1.7146, "step": 83875 }, { "epoch": 2.79, "grad_norm": 0.7130377888679504, "learning_rate": 7.186329998837004e-06, "loss": 1.6414, "step": 83876 }, { "epoch": 2.79, "grad_norm": 0.717004120349884, "learning_rate": 7.184056149277328e-06, "loss": 1.7148, "step": 83877 }, { "epoch": 2.79, "grad_norm": 0.7269791960716248, "learning_rate": 7.181782655153068e-06, "loss": 1.7197, "step": 83878 }, { "epoch": 2.79, "grad_norm": 0.7598093152046204, "learning_rate": 7.179509516466986e-06, "loss": 1.7513, "step": 83879 }, { "epoch": 2.79, "grad_norm": 0.7049672603607178, "learning_rate": 7.177236733221881e-06, "loss": 1.6594, "step": 83880 }, { "epoch": 2.79, "grad_norm": 0.7156319618225098, "learning_rate": 7.174964305420417e-06, "loss": 1.7194, "step": 83881 }, { "epoch": 2.79, "grad_norm": 0.73798006772995, "learning_rate": 7.172692233065391e-06, "loss": 1.6332, "step": 83882 }, { "epoch": 2.79, "grad_norm": 0.7024582624435425, "learning_rate": 7.1704205161596025e-06, "loss": 1.6555, "step": 83883 }, { "epoch": 2.79, "grad_norm": 0.7077318429946899, "learning_rate": 7.168149154705749e-06, "loss": 1.7165, "step": 83884 }, { "epoch": 2.79, "grad_norm": 0.7152202725410461, "learning_rate": 7.165878148706627e-06, "loss": 1.654, "step": 83885 }, { "epoch": 2.79, "grad_norm": 0.69913649559021, "learning_rate": 7.163607498165003e-06, "loss": 1.7384, "step": 83886 }, { "epoch": 2.79, "grad_norm": 0.7029693722724915, "learning_rate": 7.1613372030836054e-06, "loss": 1.7148, "step": 83887 }, { "epoch": 2.79, "grad_norm": 0.7219769358634949, "learning_rate": 7.159067263465168e-06, "loss": 1.6407, "step": 83888 }, { "epoch": 2.79, "grad_norm": 0.7366645932197571, "learning_rate": 7.15679767931252e-06, "loss": 1.7295, "step": 83889 }, { "epoch": 2.79, "grad_norm": 0.6907496452331543, "learning_rate": 7.154528450628361e-06, "loss": 1.6513, "step": 83890 }, { "epoch": 2.79, "grad_norm": 0.737872838973999, "learning_rate": 7.15225957741542e-06, "loss": 1.7394, "step": 83891 }, { "epoch": 2.79, "grad_norm": 0.7050679922103882, "learning_rate": 7.1499910596764964e-06, "loss": 1.7245, "step": 83892 }, { "epoch": 2.79, "grad_norm": 0.7157579660415649, "learning_rate": 7.147722897414354e-06, "loss": 1.6704, "step": 83893 }, { "epoch": 2.79, "grad_norm": 0.7161031365394592, "learning_rate": 7.145455090631724e-06, "loss": 1.6261, "step": 83894 }, { "epoch": 2.79, "grad_norm": 0.7151760458946228, "learning_rate": 7.143187639331338e-06, "loss": 1.6585, "step": 83895 }, { "epoch": 2.79, "grad_norm": 0.7209525108337402, "learning_rate": 7.140920543515993e-06, "loss": 1.7254, "step": 83896 }, { "epoch": 2.79, "grad_norm": 0.6991715431213379, "learning_rate": 7.138653803188421e-06, "loss": 1.6914, "step": 83897 }, { "epoch": 2.79, "grad_norm": 0.7047924399375916, "learning_rate": 7.1363874183513525e-06, "loss": 1.6844, "step": 83898 }, { "epoch": 2.79, "grad_norm": 0.7386178970336914, "learning_rate": 7.134121389007586e-06, "loss": 1.6704, "step": 83899 }, { "epoch": 2.79, "grad_norm": 0.6814692616462708, "learning_rate": 7.131855715159818e-06, "loss": 1.6378, "step": 83900 }, { "epoch": 2.79, "grad_norm": 0.7069257497787476, "learning_rate": 7.129590396810847e-06, "loss": 1.7026, "step": 83901 }, { "epoch": 2.79, "grad_norm": 0.7023512721061707, "learning_rate": 7.127325433963371e-06, "loss": 1.7251, "step": 83902 }, { "epoch": 2.79, "grad_norm": 0.7026653289794922, "learning_rate": 7.125060826620221e-06, "loss": 1.7311, "step": 83903 }, { "epoch": 2.79, "grad_norm": 0.7018858194351196, "learning_rate": 7.122796574784062e-06, "loss": 1.6652, "step": 83904 }, { "epoch": 2.79, "grad_norm": 0.6848669052124023, "learning_rate": 7.120532678457658e-06, "loss": 1.6954, "step": 83905 }, { "epoch": 2.79, "grad_norm": 0.7074520587921143, "learning_rate": 7.118269137643806e-06, "loss": 1.6678, "step": 83906 }, { "epoch": 2.79, "grad_norm": 0.6925104260444641, "learning_rate": 7.1160059523451716e-06, "loss": 1.7252, "step": 83907 }, { "epoch": 2.79, "grad_norm": 0.6898292899131775, "learning_rate": 7.113743122564619e-06, "loss": 1.6595, "step": 83908 }, { "epoch": 2.79, "grad_norm": 0.7243990302085876, "learning_rate": 7.111480648304813e-06, "loss": 1.6878, "step": 83909 }, { "epoch": 2.79, "grad_norm": 0.7106198072433472, "learning_rate": 7.10921852956845e-06, "loss": 1.653, "step": 83910 }, { "epoch": 2.79, "grad_norm": 0.7245972156524658, "learning_rate": 7.10695676635843e-06, "loss": 1.6212, "step": 83911 }, { "epoch": 2.79, "grad_norm": 0.7138963937759399, "learning_rate": 7.104695358677348e-06, "loss": 1.6701, "step": 83912 }, { "epoch": 2.79, "grad_norm": 0.7216383218765259, "learning_rate": 7.1024343065280375e-06, "loss": 1.6815, "step": 83913 }, { "epoch": 2.79, "grad_norm": 0.7192386984825134, "learning_rate": 7.100173609913162e-06, "loss": 1.699, "step": 83914 }, { "epoch": 2.79, "grad_norm": 0.6902903318405151, "learning_rate": 7.097913268835587e-06, "loss": 1.6187, "step": 83915 }, { "epoch": 2.79, "grad_norm": 0.7450715899467468, "learning_rate": 7.095653283297975e-06, "loss": 1.6648, "step": 83916 }, { "epoch": 2.79, "grad_norm": 0.697576642036438, "learning_rate": 7.0933936533030585e-06, "loss": 1.6535, "step": 83917 }, { "epoch": 2.79, "grad_norm": 0.7193121910095215, "learning_rate": 7.091134378853602e-06, "loss": 1.7634, "step": 83918 }, { "epoch": 2.79, "grad_norm": 0.7210313677787781, "learning_rate": 7.088875459952403e-06, "loss": 1.658, "step": 83919 }, { "epoch": 2.79, "grad_norm": 0.7085248827934265, "learning_rate": 7.086616896602093e-06, "loss": 1.7301, "step": 83920 }, { "epoch": 2.79, "grad_norm": 0.6932064890861511, "learning_rate": 7.084358688805469e-06, "loss": 1.6105, "step": 83921 }, { "epoch": 2.79, "grad_norm": 0.7076150178909302, "learning_rate": 7.082100836565297e-06, "loss": 1.669, "step": 83922 }, { "epoch": 2.79, "grad_norm": 0.7011169195175171, "learning_rate": 7.079843339884306e-06, "loss": 1.6205, "step": 83923 }, { "epoch": 2.79, "grad_norm": 0.7111342549324036, "learning_rate": 7.077586198765195e-06, "loss": 1.6597, "step": 83924 }, { "epoch": 2.79, "grad_norm": 0.6937637329101562, "learning_rate": 7.075329413210762e-06, "loss": 1.5933, "step": 83925 }, { "epoch": 2.79, "grad_norm": 0.718621551990509, "learning_rate": 7.073072983223738e-06, "loss": 1.7098, "step": 83926 }, { "epoch": 2.79, "grad_norm": 0.7054007053375244, "learning_rate": 7.070816908806787e-06, "loss": 1.6398, "step": 83927 }, { "epoch": 2.79, "grad_norm": 0.7209430932998657, "learning_rate": 7.068561189962707e-06, "loss": 1.6917, "step": 83928 }, { "epoch": 2.79, "grad_norm": 0.7369803190231323, "learning_rate": 7.066305826694263e-06, "loss": 1.7524, "step": 83929 }, { "epoch": 2.79, "grad_norm": 0.7121400237083435, "learning_rate": 7.0640508190041855e-06, "loss": 1.7044, "step": 83930 }, { "epoch": 2.79, "grad_norm": 0.7159358859062195, "learning_rate": 7.06179616689514e-06, "loss": 1.6705, "step": 83931 }, { "epoch": 2.79, "grad_norm": 0.7152527570724487, "learning_rate": 7.0595418703699226e-06, "loss": 1.6135, "step": 83932 }, { "epoch": 2.79, "grad_norm": 0.72271728515625, "learning_rate": 7.057287929431299e-06, "loss": 1.693, "step": 83933 }, { "epoch": 2.79, "grad_norm": 0.7212410569190979, "learning_rate": 7.0550343440819e-06, "loss": 1.6656, "step": 83934 }, { "epoch": 2.79, "grad_norm": 0.7268708348274231, "learning_rate": 7.052781114324557e-06, "loss": 1.5867, "step": 83935 }, { "epoch": 2.79, "grad_norm": 0.7028666734695435, "learning_rate": 7.0505282401619676e-06, "loss": 1.6567, "step": 83936 }, { "epoch": 2.79, "grad_norm": 0.6944786906242371, "learning_rate": 7.048275721596897e-06, "loss": 1.6238, "step": 83937 }, { "epoch": 2.79, "grad_norm": 0.7313701510429382, "learning_rate": 7.046023558632008e-06, "loss": 1.6176, "step": 83938 }, { "epoch": 2.79, "grad_norm": 0.7205347418785095, "learning_rate": 7.043771751270133e-06, "loss": 1.7046, "step": 83939 }, { "epoch": 2.79, "grad_norm": 0.7103666067123413, "learning_rate": 7.041520299513937e-06, "loss": 1.7578, "step": 83940 }, { "epoch": 2.79, "grad_norm": 0.6956374645233154, "learning_rate": 7.039269203366149e-06, "loss": 1.6452, "step": 83941 }, { "epoch": 2.79, "grad_norm": 0.7108067870140076, "learning_rate": 7.037018462829536e-06, "loss": 1.7575, "step": 83942 }, { "epoch": 2.79, "grad_norm": 0.7357814311981201, "learning_rate": 7.034768077906827e-06, "loss": 1.7399, "step": 83943 }, { "epoch": 2.79, "grad_norm": 0.7180058360099792, "learning_rate": 7.032518048600722e-06, "loss": 1.6416, "step": 83944 }, { "epoch": 2.79, "grad_norm": 0.7343547344207764, "learning_rate": 7.030268374913983e-06, "loss": 1.6677, "step": 83945 }, { "epoch": 2.79, "grad_norm": 0.7377073764801025, "learning_rate": 7.028019056849344e-06, "loss": 1.6806, "step": 83946 }, { "epoch": 2.79, "grad_norm": 0.7146149277687073, "learning_rate": 7.025770094409533e-06, "loss": 1.64, "step": 83947 }, { "epoch": 2.79, "grad_norm": 0.701038122177124, "learning_rate": 7.023521487597217e-06, "loss": 1.6715, "step": 83948 }, { "epoch": 2.79, "grad_norm": 0.6991606950759888, "learning_rate": 7.021273236415226e-06, "loss": 1.6503, "step": 83949 }, { "epoch": 2.79, "grad_norm": 0.7225185632705688, "learning_rate": 7.019025340866224e-06, "loss": 1.6314, "step": 83950 }, { "epoch": 2.79, "grad_norm": 0.7213070392608643, "learning_rate": 7.016777800952944e-06, "loss": 1.6514, "step": 83951 }, { "epoch": 2.79, "grad_norm": 0.717033326625824, "learning_rate": 7.014530616678149e-06, "loss": 1.7109, "step": 83952 }, { "epoch": 2.79, "grad_norm": 0.749901294708252, "learning_rate": 7.012283788044537e-06, "loss": 1.7467, "step": 83953 }, { "epoch": 2.79, "grad_norm": 0.7228548526763916, "learning_rate": 7.010037315054839e-06, "loss": 1.6078, "step": 83954 }, { "epoch": 2.79, "grad_norm": 0.7399662137031555, "learning_rate": 7.007791197711787e-06, "loss": 1.6844, "step": 83955 }, { "epoch": 2.79, "grad_norm": 0.7614545822143555, "learning_rate": 7.0055454360181114e-06, "loss": 1.738, "step": 83956 }, { "epoch": 2.79, "grad_norm": 0.7256659269332886, "learning_rate": 7.003300029976511e-06, "loss": 1.6892, "step": 83957 }, { "epoch": 2.79, "grad_norm": 0.7108504772186279, "learning_rate": 7.001054979589748e-06, "loss": 1.6458, "step": 83958 }, { "epoch": 2.79, "grad_norm": 0.7300956845283508, "learning_rate": 6.998810284860556e-06, "loss": 1.7503, "step": 83959 }, { "epoch": 2.79, "grad_norm": 0.7123987674713135, "learning_rate": 6.996565945791599e-06, "loss": 1.6262, "step": 83960 }, { "epoch": 2.79, "grad_norm": 0.7087442278862, "learning_rate": 6.9943219623856735e-06, "loss": 1.6395, "step": 83961 }, { "epoch": 2.79, "grad_norm": 0.6966413259506226, "learning_rate": 6.992078334645446e-06, "loss": 1.6735, "step": 83962 }, { "epoch": 2.79, "grad_norm": 0.746229350566864, "learning_rate": 6.989835062573678e-06, "loss": 1.6704, "step": 83963 }, { "epoch": 2.79, "grad_norm": 0.7322859764099121, "learning_rate": 6.987592146173038e-06, "loss": 1.7222, "step": 83964 }, { "epoch": 2.79, "grad_norm": 0.7008236050605774, "learning_rate": 6.985349585446321e-06, "loss": 1.6367, "step": 83965 }, { "epoch": 2.79, "grad_norm": 0.7090160846710205, "learning_rate": 6.983107380396224e-06, "loss": 1.703, "step": 83966 }, { "epoch": 2.79, "grad_norm": 0.7243564128875732, "learning_rate": 6.980865531025448e-06, "loss": 1.6552, "step": 83967 }, { "epoch": 2.79, "grad_norm": 0.7263127565383911, "learning_rate": 6.978624037336722e-06, "loss": 1.6657, "step": 83968 }, { "epoch": 2.79, "grad_norm": 0.7117809057235718, "learning_rate": 6.976382899332811e-06, "loss": 1.7055, "step": 83969 }, { "epoch": 2.79, "grad_norm": 0.7495532035827637, "learning_rate": 6.974142117016346e-06, "loss": 1.731, "step": 83970 }, { "epoch": 2.79, "grad_norm": 0.708450436592102, "learning_rate": 6.971901690390092e-06, "loss": 1.6738, "step": 83971 }, { "epoch": 2.79, "grad_norm": 0.717590868473053, "learning_rate": 6.969661619456812e-06, "loss": 1.631, "step": 83972 }, { "epoch": 2.79, "grad_norm": 0.7215613126754761, "learning_rate": 6.9674219042191726e-06, "loss": 1.7126, "step": 83973 }, { "epoch": 2.79, "grad_norm": 0.7211961150169373, "learning_rate": 6.965182544679904e-06, "loss": 1.6939, "step": 83974 }, { "epoch": 2.79, "grad_norm": 0.7015167474746704, "learning_rate": 6.962943540841736e-06, "loss": 1.6017, "step": 83975 }, { "epoch": 2.79, "grad_norm": 0.7222683429718018, "learning_rate": 6.9607048927073696e-06, "loss": 1.7178, "step": 83976 }, { "epoch": 2.79, "grad_norm": 0.7307272553443909, "learning_rate": 6.9584666002795e-06, "loss": 1.6557, "step": 83977 }, { "epoch": 2.79, "grad_norm": 0.6991894841194153, "learning_rate": 6.956228663560892e-06, "loss": 1.6689, "step": 83978 }, { "epoch": 2.79, "grad_norm": 0.7099570631980896, "learning_rate": 6.953991082554311e-06, "loss": 1.7249, "step": 83979 }, { "epoch": 2.79, "grad_norm": 0.7042881846427917, "learning_rate": 6.951753857262321e-06, "loss": 1.6475, "step": 83980 }, { "epoch": 2.79, "grad_norm": 0.7065209746360779, "learning_rate": 6.9495169876877535e-06, "loss": 1.6838, "step": 83981 }, { "epoch": 2.79, "grad_norm": 0.7208685278892517, "learning_rate": 6.947280473833305e-06, "loss": 1.7486, "step": 83982 }, { "epoch": 2.79, "grad_norm": 0.7173852324485779, "learning_rate": 6.945044315701676e-06, "loss": 1.6826, "step": 83983 }, { "epoch": 2.79, "grad_norm": 0.7235482335090637, "learning_rate": 6.942808513295562e-06, "loss": 1.6348, "step": 83984 }, { "epoch": 2.79, "grad_norm": 0.7211843132972717, "learning_rate": 6.940573066617694e-06, "loss": 1.6391, "step": 83985 }, { "epoch": 2.79, "grad_norm": 0.7073839902877808, "learning_rate": 6.938337975670838e-06, "loss": 1.748, "step": 83986 }, { "epoch": 2.79, "grad_norm": 0.7328844666481018, "learning_rate": 6.936103240457624e-06, "loss": 1.6392, "step": 83987 }, { "epoch": 2.79, "grad_norm": 0.7108283042907715, "learning_rate": 6.9338688609807844e-06, "loss": 1.6557, "step": 83988 }, { "epoch": 2.79, "grad_norm": 0.7007718682289124, "learning_rate": 6.931634837243083e-06, "loss": 1.6768, "step": 83989 }, { "epoch": 2.79, "grad_norm": 0.715492308139801, "learning_rate": 6.929401169247184e-06, "loss": 1.6752, "step": 83990 }, { "epoch": 2.79, "grad_norm": 0.704982340335846, "learning_rate": 6.927167856995785e-06, "loss": 1.6312, "step": 83991 }, { "epoch": 2.79, "grad_norm": 0.7212501764297485, "learning_rate": 6.924934900491619e-06, "loss": 1.7098, "step": 83992 }, { "epoch": 2.79, "grad_norm": 0.7243646383285522, "learning_rate": 6.922702299737448e-06, "loss": 1.6635, "step": 83993 }, { "epoch": 2.79, "grad_norm": 0.7404709458351135, "learning_rate": 6.920470054735905e-06, "loss": 1.7188, "step": 83994 }, { "epoch": 2.79, "grad_norm": 0.7522513270378113, "learning_rate": 6.91823816548972e-06, "loss": 1.685, "step": 83995 }, { "epoch": 2.79, "grad_norm": 0.700023353099823, "learning_rate": 6.916006632001625e-06, "loss": 1.6482, "step": 83996 }, { "epoch": 2.79, "grad_norm": 0.7307948470115662, "learning_rate": 6.9137754542743174e-06, "loss": 1.7584, "step": 83997 }, { "epoch": 2.79, "grad_norm": 0.7042461633682251, "learning_rate": 6.911544632310495e-06, "loss": 1.6911, "step": 83998 }, { "epoch": 2.79, "grad_norm": 0.6913728713989258, "learning_rate": 6.909314166112856e-06, "loss": 1.6879, "step": 83999 }, { "epoch": 2.79, "grad_norm": 0.7057044506072998, "learning_rate": 6.907084055684164e-06, "loss": 1.6415, "step": 84000 }, { "epoch": 2.79, "grad_norm": 0.7151147127151489, "learning_rate": 6.9048543010270516e-06, "loss": 1.6663, "step": 84001 }, { "epoch": 2.79, "grad_norm": 0.7022156715393066, "learning_rate": 6.902624902144249e-06, "loss": 1.6348, "step": 84002 }, { "epoch": 2.79, "grad_norm": 0.6986117959022522, "learning_rate": 6.900395859038521e-06, "loss": 1.673, "step": 84003 }, { "epoch": 2.79, "grad_norm": 0.7077417373657227, "learning_rate": 6.898167171712532e-06, "loss": 1.6131, "step": 84004 }, { "epoch": 2.79, "grad_norm": 0.7013264298439026, "learning_rate": 6.895938840168913e-06, "loss": 1.6779, "step": 84005 }, { "epoch": 2.79, "grad_norm": 0.7091312408447266, "learning_rate": 6.893710864410495e-06, "loss": 1.7013, "step": 84006 }, { "epoch": 2.79, "grad_norm": 0.7194810509681702, "learning_rate": 6.891483244439911e-06, "loss": 1.6363, "step": 84007 }, { "epoch": 2.79, "grad_norm": 0.7057751417160034, "learning_rate": 6.889255980259889e-06, "loss": 1.6092, "step": 84008 }, { "epoch": 2.79, "grad_norm": 0.6977422833442688, "learning_rate": 6.887029071873063e-06, "loss": 1.5898, "step": 84009 }, { "epoch": 2.8, "grad_norm": 0.7113309502601624, "learning_rate": 6.884802519282262e-06, "loss": 1.6075, "step": 84010 }, { "epoch": 2.8, "grad_norm": 0.7151628732681274, "learning_rate": 6.882576322490119e-06, "loss": 1.743, "step": 84011 }, { "epoch": 2.8, "grad_norm": 0.6924117803573608, "learning_rate": 6.880350481499297e-06, "loss": 1.67, "step": 84012 }, { "epoch": 2.8, "grad_norm": 0.7061771750450134, "learning_rate": 6.878124996312595e-06, "loss": 1.6188, "step": 84013 }, { "epoch": 2.8, "grad_norm": 0.699146568775177, "learning_rate": 6.875899866932644e-06, "loss": 1.6656, "step": 84014 }, { "epoch": 2.8, "grad_norm": 0.7195408940315247, "learning_rate": 6.8736750933621075e-06, "loss": 1.6426, "step": 84015 }, { "epoch": 2.8, "grad_norm": 0.7214953899383545, "learning_rate": 6.871450675603785e-06, "loss": 1.6765, "step": 84016 }, { "epoch": 2.8, "grad_norm": 0.7225469350814819, "learning_rate": 6.8692266136603055e-06, "loss": 1.6826, "step": 84017 }, { "epoch": 2.8, "grad_norm": 0.7084163427352905, "learning_rate": 6.8670029075344356e-06, "loss": 1.637, "step": 84018 }, { "epoch": 2.8, "grad_norm": 0.7236341238021851, "learning_rate": 6.864779557228806e-06, "loss": 1.6383, "step": 84019 }, { "epoch": 2.8, "grad_norm": 0.7274839878082275, "learning_rate": 6.862556562746146e-06, "loss": 1.7419, "step": 84020 }, { "epoch": 2.8, "grad_norm": 0.7122164964675903, "learning_rate": 6.860333924089156e-06, "loss": 1.7285, "step": 84021 }, { "epoch": 2.8, "grad_norm": 0.6867935061454773, "learning_rate": 6.858111641260533e-06, "loss": 1.6558, "step": 84022 }, { "epoch": 2.8, "grad_norm": 0.6953681707382202, "learning_rate": 6.8558897142629745e-06, "loss": 1.6247, "step": 84023 }, { "epoch": 2.8, "grad_norm": 0.7255460023880005, "learning_rate": 6.853668143099145e-06, "loss": 1.6958, "step": 84024 }, { "epoch": 2.8, "grad_norm": 0.6858612298965454, "learning_rate": 6.851446927771776e-06, "loss": 1.6263, "step": 84025 }, { "epoch": 2.8, "grad_norm": 0.7424203753471375, "learning_rate": 6.849226068283564e-06, "loss": 1.7227, "step": 84026 }, { "epoch": 2.8, "grad_norm": 0.7021533250808716, "learning_rate": 6.8470055646372094e-06, "loss": 1.6523, "step": 84027 }, { "epoch": 2.8, "grad_norm": 0.6957367658615112, "learning_rate": 6.844785416835374e-06, "loss": 1.729, "step": 84028 }, { "epoch": 2.8, "grad_norm": 0.6929486393928528, "learning_rate": 6.842565624880792e-06, "loss": 1.7416, "step": 84029 }, { "epoch": 2.8, "grad_norm": 0.7193160653114319, "learning_rate": 6.840346188776124e-06, "loss": 1.6721, "step": 84030 }, { "epoch": 2.8, "grad_norm": 0.7014308571815491, "learning_rate": 6.8381271085240714e-06, "loss": 1.684, "step": 84031 }, { "epoch": 2.8, "grad_norm": 0.7171697020530701, "learning_rate": 6.835908384127398e-06, "loss": 1.642, "step": 84032 }, { "epoch": 2.8, "grad_norm": 0.7157734036445618, "learning_rate": 6.8336900155887e-06, "loss": 1.6844, "step": 84033 }, { "epoch": 2.8, "grad_norm": 0.7006001472473145, "learning_rate": 6.831472002910676e-06, "loss": 1.6662, "step": 84034 }, { "epoch": 2.8, "grad_norm": 0.7044548988342285, "learning_rate": 6.829254346096091e-06, "loss": 1.5895, "step": 84035 }, { "epoch": 2.8, "grad_norm": 0.7035685777664185, "learning_rate": 6.8270370451475765e-06, "loss": 1.6653, "step": 84036 }, { "epoch": 2.8, "grad_norm": 0.7227791547775269, "learning_rate": 6.8248201000678295e-06, "loss": 1.6998, "step": 84037 }, { "epoch": 2.8, "grad_norm": 0.7188722491264343, "learning_rate": 6.822603510859548e-06, "loss": 1.6467, "step": 84038 }, { "epoch": 2.8, "grad_norm": 0.7032526135444641, "learning_rate": 6.820387277525463e-06, "loss": 1.55, "step": 84039 }, { "epoch": 2.8, "grad_norm": 0.6941050291061401, "learning_rate": 6.818171400068239e-06, "loss": 1.7345, "step": 84040 }, { "epoch": 2.8, "grad_norm": 0.7312083840370178, "learning_rate": 6.815955878490509e-06, "loss": 1.7342, "step": 84041 }, { "epoch": 2.8, "grad_norm": 0.7054280042648315, "learning_rate": 6.813740712795035e-06, "loss": 1.7062, "step": 84042 }, { "epoch": 2.8, "grad_norm": 0.7440916895866394, "learning_rate": 6.811525902984516e-06, "loss": 1.672, "step": 84043 }, { "epoch": 2.8, "grad_norm": 0.7632800936698914, "learning_rate": 6.809311449061549e-06, "loss": 1.6761, "step": 84044 }, { "epoch": 2.8, "grad_norm": 0.6879299879074097, "learning_rate": 6.8070973510288676e-06, "loss": 1.6496, "step": 84045 }, { "epoch": 2.8, "grad_norm": 0.7105281352996826, "learning_rate": 6.8048836088892e-06, "loss": 1.7549, "step": 84046 }, { "epoch": 2.8, "grad_norm": 0.7120906710624695, "learning_rate": 6.802670222645212e-06, "loss": 1.6536, "step": 84047 }, { "epoch": 2.8, "grad_norm": 0.7581994533538818, "learning_rate": 6.8004571922995685e-06, "loss": 1.624, "step": 84048 }, { "epoch": 2.8, "grad_norm": 0.6993650197982788, "learning_rate": 6.798244517855e-06, "loss": 1.6291, "step": 84049 }, { "epoch": 2.8, "grad_norm": 0.732759952545166, "learning_rate": 6.7960321993141385e-06, "loss": 1.6679, "step": 84050 }, { "epoch": 2.8, "grad_norm": 0.7272989153862, "learning_rate": 6.7938202366796815e-06, "loss": 1.6382, "step": 84051 }, { "epoch": 2.8, "grad_norm": 0.7209318280220032, "learning_rate": 6.791608629954293e-06, "loss": 1.7801, "step": 84052 }, { "epoch": 2.8, "grad_norm": 0.7187093496322632, "learning_rate": 6.7893973791407376e-06, "loss": 1.7171, "step": 84053 }, { "epoch": 2.8, "grad_norm": 0.7229222655296326, "learning_rate": 6.78718648424168e-06, "loss": 1.6905, "step": 84054 }, { "epoch": 2.8, "grad_norm": 0.7098938226699829, "learning_rate": 6.784975945259719e-06, "loss": 1.6718, "step": 84055 }, { "epoch": 2.8, "grad_norm": 0.7092427015304565, "learning_rate": 6.782765762197618e-06, "loss": 1.6123, "step": 84056 }, { "epoch": 2.8, "grad_norm": 0.7204089164733887, "learning_rate": 6.780555935058041e-06, "loss": 1.6536, "step": 84057 }, { "epoch": 2.8, "grad_norm": 0.7314416766166687, "learning_rate": 6.77834646384362e-06, "loss": 1.6639, "step": 84058 }, { "epoch": 2.8, "grad_norm": 0.7516511678695679, "learning_rate": 6.7761373485571204e-06, "loss": 1.7485, "step": 84059 }, { "epoch": 2.8, "grad_norm": 0.7097936272621155, "learning_rate": 6.773928589201172e-06, "loss": 1.6882, "step": 84060 }, { "epoch": 2.8, "grad_norm": 0.7211478352546692, "learning_rate": 6.771720185778506e-06, "loss": 1.7072, "step": 84061 }, { "epoch": 2.8, "grad_norm": 0.7225233912467957, "learning_rate": 6.7695121382917215e-06, "loss": 1.7048, "step": 84062 }, { "epoch": 2.8, "grad_norm": 0.7235751748085022, "learning_rate": 6.767304446743582e-06, "loss": 1.6285, "step": 84063 }, { "epoch": 2.8, "grad_norm": 0.7063691020011902, "learning_rate": 6.765097111136719e-06, "loss": 1.6885, "step": 84064 }, { "epoch": 2.8, "grad_norm": 0.7040702700614929, "learning_rate": 6.762890131473797e-06, "loss": 1.6677, "step": 84065 }, { "epoch": 2.8, "grad_norm": 0.7417022585868835, "learning_rate": 6.760683507757547e-06, "loss": 1.7237, "step": 84066 }, { "epoch": 2.8, "grad_norm": 0.6893988251686096, "learning_rate": 6.758477239990568e-06, "loss": 1.7101, "step": 84067 }, { "epoch": 2.8, "grad_norm": 0.7054991126060486, "learning_rate": 6.756271328175655e-06, "loss": 1.7255, "step": 84068 }, { "epoch": 2.8, "grad_norm": 0.7083981037139893, "learning_rate": 6.754065772315376e-06, "loss": 1.6677, "step": 84069 }, { "epoch": 2.8, "grad_norm": 0.7373796105384827, "learning_rate": 6.751860572412493e-06, "loss": 1.6199, "step": 84070 }, { "epoch": 2.8, "grad_norm": 0.7133144736289978, "learning_rate": 6.7496557284696385e-06, "loss": 1.6554, "step": 84071 }, { "epoch": 2.8, "grad_norm": 0.7156693935394287, "learning_rate": 6.747451240489476e-06, "loss": 1.6779, "step": 84072 }, { "epoch": 2.8, "grad_norm": 0.7173924446105957, "learning_rate": 6.745247108474705e-06, "loss": 1.6485, "step": 84073 }, { "epoch": 2.8, "grad_norm": 0.7026457190513611, "learning_rate": 6.7430433324279555e-06, "loss": 1.6868, "step": 84074 }, { "epoch": 2.8, "grad_norm": 0.7089269757270813, "learning_rate": 6.740839912351992e-06, "loss": 1.6929, "step": 84075 }, { "epoch": 2.8, "grad_norm": 0.6797292232513428, "learning_rate": 6.738636848249446e-06, "loss": 1.6147, "step": 84076 }, { "epoch": 2.8, "grad_norm": 0.7053066492080688, "learning_rate": 6.736434140122948e-06, "loss": 1.6307, "step": 84077 }, { "epoch": 2.8, "grad_norm": 0.7496510148048401, "learning_rate": 6.734231787975231e-06, "loss": 1.673, "step": 84078 }, { "epoch": 2.8, "grad_norm": 0.7368258833885193, "learning_rate": 6.7320297918089235e-06, "loss": 1.6233, "step": 84079 }, { "epoch": 2.8, "grad_norm": 0.7418124675750732, "learning_rate": 6.729828151626759e-06, "loss": 1.6259, "step": 84080 }, { "epoch": 2.8, "grad_norm": 0.6961963772773743, "learning_rate": 6.727626867431335e-06, "loss": 1.6528, "step": 84081 }, { "epoch": 2.8, "grad_norm": 0.730415403842926, "learning_rate": 6.725425939225382e-06, "loss": 1.6755, "step": 84082 }, { "epoch": 2.8, "grad_norm": 0.6875477433204651, "learning_rate": 6.723225367011564e-06, "loss": 1.6076, "step": 84083 }, { "epoch": 2.8, "grad_norm": 0.7238454818725586, "learning_rate": 6.721025150792514e-06, "loss": 1.666, "step": 84084 }, { "epoch": 2.8, "grad_norm": 0.7059662938117981, "learning_rate": 6.718825290570928e-06, "loss": 1.6605, "step": 84085 }, { "epoch": 2.8, "grad_norm": 0.6856117844581604, "learning_rate": 6.716625786349506e-06, "loss": 1.6352, "step": 84086 }, { "epoch": 2.8, "grad_norm": 0.70633864402771, "learning_rate": 6.714426638130843e-06, "loss": 1.6575, "step": 84087 }, { "epoch": 2.8, "grad_norm": 0.7157624959945679, "learning_rate": 6.712227845917639e-06, "loss": 1.7836, "step": 84088 }, { "epoch": 2.8, "grad_norm": 0.7239957451820374, "learning_rate": 6.710029409712625e-06, "loss": 1.6129, "step": 84089 }, { "epoch": 2.8, "grad_norm": 0.719279408454895, "learning_rate": 6.7078313295183986e-06, "loss": 1.6485, "step": 84090 }, { "epoch": 2.8, "grad_norm": 0.7298633456230164, "learning_rate": 6.705633605337657e-06, "loss": 1.6548, "step": 84091 }, { "epoch": 2.8, "grad_norm": 0.7088305354118347, "learning_rate": 6.703436237173065e-06, "loss": 1.7228, "step": 84092 }, { "epoch": 2.8, "grad_norm": 0.7361378073692322, "learning_rate": 6.701239225027289e-06, "loss": 1.6597, "step": 84093 }, { "epoch": 2.8, "grad_norm": 0.7240649461746216, "learning_rate": 6.699042568902957e-06, "loss": 1.6325, "step": 84094 }, { "epoch": 2.8, "grad_norm": 0.692908525466919, "learning_rate": 6.696846268802802e-06, "loss": 1.6704, "step": 84095 }, { "epoch": 2.8, "grad_norm": 0.7254990339279175, "learning_rate": 6.694650324729489e-06, "loss": 1.6998, "step": 84096 }, { "epoch": 2.8, "grad_norm": 0.7118319272994995, "learning_rate": 6.692454736685582e-06, "loss": 1.7552, "step": 84097 }, { "epoch": 2.8, "grad_norm": 0.7261533737182617, "learning_rate": 6.690259504673845e-06, "loss": 1.6896, "step": 84098 }, { "epoch": 2.8, "grad_norm": 0.7009228467941284, "learning_rate": 6.688064628696943e-06, "loss": 1.7474, "step": 84099 }, { "epoch": 2.8, "grad_norm": 0.7043027281761169, "learning_rate": 6.685870108757507e-06, "loss": 1.6962, "step": 84100 }, { "epoch": 2.8, "grad_norm": 0.7165153622627258, "learning_rate": 6.683675944858169e-06, "loss": 1.7812, "step": 84101 }, { "epoch": 2.8, "grad_norm": 0.7177111506462097, "learning_rate": 6.6814821370016596e-06, "loss": 1.6921, "step": 84102 }, { "epoch": 2.8, "grad_norm": 0.7208539843559265, "learning_rate": 6.679288685190643e-06, "loss": 1.7363, "step": 84103 }, { "epoch": 2.8, "grad_norm": 0.7398548722267151, "learning_rate": 6.677095589427717e-06, "loss": 1.6412, "step": 84104 }, { "epoch": 2.8, "grad_norm": 0.7244791388511658, "learning_rate": 6.674902849715546e-06, "loss": 1.6704, "step": 84105 }, { "epoch": 2.8, "grad_norm": 0.708288311958313, "learning_rate": 6.672710466056896e-06, "loss": 1.6825, "step": 84106 }, { "epoch": 2.8, "grad_norm": 0.7350603342056274, "learning_rate": 6.670518438454298e-06, "loss": 1.7068, "step": 84107 }, { "epoch": 2.8, "grad_norm": 0.7248444557189941, "learning_rate": 6.668326766910481e-06, "loss": 1.6578, "step": 84108 }, { "epoch": 2.8, "grad_norm": 0.7066259980201721, "learning_rate": 6.666135451428112e-06, "loss": 1.6883, "step": 84109 }, { "epoch": 2.8, "grad_norm": 0.7330865859985352, "learning_rate": 6.663944492009854e-06, "loss": 1.6433, "step": 84110 }, { "epoch": 2.8, "grad_norm": 0.7280847430229187, "learning_rate": 6.6617538886583055e-06, "loss": 1.7277, "step": 84111 }, { "epoch": 2.8, "grad_norm": 0.7297543287277222, "learning_rate": 6.659563641376164e-06, "loss": 1.7133, "step": 84112 }, { "epoch": 2.8, "grad_norm": 0.7186405062675476, "learning_rate": 6.657373750166095e-06, "loss": 1.6558, "step": 84113 }, { "epoch": 2.8, "grad_norm": 0.7013816237449646, "learning_rate": 6.655184215030796e-06, "loss": 1.7338, "step": 84114 }, { "epoch": 2.8, "grad_norm": 0.6956915259361267, "learning_rate": 6.6529950359728304e-06, "loss": 1.6673, "step": 84115 }, { "epoch": 2.8, "grad_norm": 0.6993624567985535, "learning_rate": 6.650806212994897e-06, "loss": 1.6762, "step": 84116 }, { "epoch": 2.8, "grad_norm": 0.7249695062637329, "learning_rate": 6.648617746099727e-06, "loss": 1.7257, "step": 84117 }, { "epoch": 2.8, "grad_norm": 0.707887589931488, "learning_rate": 6.646429635289852e-06, "loss": 1.6465, "step": 84118 }, { "epoch": 2.8, "grad_norm": 0.7050866484642029, "learning_rate": 6.644241880568002e-06, "loss": 1.7868, "step": 84119 }, { "epoch": 2.8, "grad_norm": 0.7267215251922607, "learning_rate": 6.642054481936843e-06, "loss": 1.6904, "step": 84120 }, { "epoch": 2.8, "grad_norm": 0.7181206345558167, "learning_rate": 6.639867439399005e-06, "loss": 1.6838, "step": 84121 }, { "epoch": 2.8, "grad_norm": 0.7074882984161377, "learning_rate": 6.637680752957087e-06, "loss": 1.6697, "step": 84122 }, { "epoch": 2.8, "grad_norm": 0.7014905214309692, "learning_rate": 6.635494422613852e-06, "loss": 1.6705, "step": 84123 }, { "epoch": 2.8, "grad_norm": 0.7502007484436035, "learning_rate": 6.633308448371899e-06, "loss": 1.6781, "step": 84124 }, { "epoch": 2.8, "grad_norm": 0.7396836876869202, "learning_rate": 6.631122830233859e-06, "loss": 1.6934, "step": 84125 }, { "epoch": 2.8, "grad_norm": 0.7064551711082458, "learning_rate": 6.628937568202431e-06, "loss": 1.706, "step": 84126 }, { "epoch": 2.8, "grad_norm": 0.7240371108055115, "learning_rate": 6.626752662280244e-06, "loss": 1.7051, "step": 84127 }, { "epoch": 2.8, "grad_norm": 0.7330437302589417, "learning_rate": 6.624568112469963e-06, "loss": 1.7157, "step": 84128 }, { "epoch": 2.8, "grad_norm": 0.7021369934082031, "learning_rate": 6.622383918774188e-06, "loss": 1.6855, "step": 84129 }, { "epoch": 2.8, "grad_norm": 0.712856113910675, "learning_rate": 6.620200081195648e-06, "loss": 1.6845, "step": 84130 }, { "epoch": 2.8, "grad_norm": 0.703045666217804, "learning_rate": 6.618016599736975e-06, "loss": 1.6414, "step": 84131 }, { "epoch": 2.8, "grad_norm": 0.719394326210022, "learning_rate": 6.615833474400767e-06, "loss": 1.6692, "step": 84132 }, { "epoch": 2.8, "grad_norm": 0.7117406725883484, "learning_rate": 6.613650705189721e-06, "loss": 1.6846, "step": 84133 }, { "epoch": 2.8, "grad_norm": 0.7168381810188293, "learning_rate": 6.611468292106436e-06, "loss": 1.6832, "step": 84134 }, { "epoch": 2.8, "grad_norm": 0.7210748195648193, "learning_rate": 6.609286235153644e-06, "loss": 1.6285, "step": 84135 }, { "epoch": 2.8, "grad_norm": 0.7191200256347656, "learning_rate": 6.6071045343339406e-06, "loss": 1.7261, "step": 84136 }, { "epoch": 2.8, "grad_norm": 0.7378283739089966, "learning_rate": 6.604923189649991e-06, "loss": 1.7443, "step": 84137 }, { "epoch": 2.8, "grad_norm": 0.7262792587280273, "learning_rate": 6.602742201104428e-06, "loss": 1.7111, "step": 84138 }, { "epoch": 2.8, "grad_norm": 0.6995657682418823, "learning_rate": 6.600561568699914e-06, "loss": 1.7108, "step": 84139 }, { "epoch": 2.8, "grad_norm": 0.7101935148239136, "learning_rate": 6.598381292439081e-06, "loss": 1.647, "step": 84140 }, { "epoch": 2.8, "grad_norm": 0.7180655002593994, "learning_rate": 6.596201372324561e-06, "loss": 1.6281, "step": 84141 }, { "epoch": 2.8, "grad_norm": 0.7143880724906921, "learning_rate": 6.5940218083590516e-06, "loss": 1.6054, "step": 84142 }, { "epoch": 2.8, "grad_norm": 0.701528787612915, "learning_rate": 6.591842600545183e-06, "loss": 1.6697, "step": 84143 }, { "epoch": 2.8, "grad_norm": 0.6951443552970886, "learning_rate": 6.589663748885521e-06, "loss": 1.6308, "step": 84144 }, { "epoch": 2.8, "grad_norm": 0.7310134768486023, "learning_rate": 6.587485253382829e-06, "loss": 1.6855, "step": 84145 }, { "epoch": 2.8, "grad_norm": 0.7175366282463074, "learning_rate": 6.585307114039673e-06, "loss": 1.6531, "step": 84146 }, { "epoch": 2.8, "grad_norm": 0.7191720008850098, "learning_rate": 6.58312933085875e-06, "loss": 1.6869, "step": 84147 }, { "epoch": 2.8, "grad_norm": 0.7106380462646484, "learning_rate": 6.580951903842624e-06, "loss": 1.6083, "step": 84148 }, { "epoch": 2.8, "grad_norm": 0.7252609133720398, "learning_rate": 6.578774832994027e-06, "loss": 1.7092, "step": 84149 }, { "epoch": 2.8, "grad_norm": 0.7510448098182678, "learning_rate": 6.57659811831559e-06, "loss": 1.7505, "step": 84150 }, { "epoch": 2.8, "grad_norm": 0.7363632917404175, "learning_rate": 6.574421759809878e-06, "loss": 1.7295, "step": 84151 }, { "epoch": 2.8, "grad_norm": 0.7099820375442505, "learning_rate": 6.572245757479622e-06, "loss": 1.7244, "step": 84152 }, { "epoch": 2.8, "grad_norm": 0.6891564726829529, "learning_rate": 6.570070111327419e-06, "loss": 1.6387, "step": 84153 }, { "epoch": 2.8, "grad_norm": 0.7003090381622314, "learning_rate": 6.567894821355868e-06, "loss": 1.6826, "step": 84154 }, { "epoch": 2.8, "grad_norm": 0.6934272646903992, "learning_rate": 6.5657198875677e-06, "loss": 1.6852, "step": 84155 }, { "epoch": 2.8, "grad_norm": 0.7253984808921814, "learning_rate": 6.563545309965512e-06, "loss": 1.7328, "step": 84156 }, { "epoch": 2.8, "grad_norm": 0.7162463665008545, "learning_rate": 6.5613710885519365e-06, "loss": 1.6082, "step": 84157 }, { "epoch": 2.8, "grad_norm": 0.721977949142456, "learning_rate": 6.559197223329604e-06, "loss": 1.6856, "step": 84158 }, { "epoch": 2.8, "grad_norm": 0.7248856425285339, "learning_rate": 6.557023714301213e-06, "loss": 1.6603, "step": 84159 }, { "epoch": 2.8, "grad_norm": 0.7327374219894409, "learning_rate": 6.554850561469327e-06, "loss": 1.7913, "step": 84160 }, { "epoch": 2.8, "grad_norm": 0.7169473767280579, "learning_rate": 6.552677764836611e-06, "loss": 1.6674, "step": 84161 }, { "epoch": 2.8, "grad_norm": 0.7419406175613403, "learning_rate": 6.550505324405697e-06, "loss": 1.6968, "step": 84162 }, { "epoch": 2.8, "grad_norm": 0.7501276135444641, "learning_rate": 6.548333240179282e-06, "loss": 1.6867, "step": 84163 }, { "epoch": 2.8, "grad_norm": 0.6942131519317627, "learning_rate": 6.546161512159931e-06, "loss": 1.7421, "step": 84164 }, { "epoch": 2.8, "grad_norm": 0.6944060325622559, "learning_rate": 6.543990140350275e-06, "loss": 1.6099, "step": 84165 }, { "epoch": 2.8, "grad_norm": 0.7391922473907471, "learning_rate": 6.5418191247529785e-06, "loss": 1.7117, "step": 84166 }, { "epoch": 2.8, "grad_norm": 0.7141422629356384, "learning_rate": 6.539648465370706e-06, "loss": 1.7428, "step": 84167 }, { "epoch": 2.8, "grad_norm": 0.7271975874900818, "learning_rate": 6.537478162206022e-06, "loss": 1.6749, "step": 84168 }, { "epoch": 2.8, "grad_norm": 0.7002717852592468, "learning_rate": 6.535308215261625e-06, "loss": 1.6102, "step": 84169 }, { "epoch": 2.8, "grad_norm": 0.7058901786804199, "learning_rate": 6.533138624540113e-06, "loss": 1.6398, "step": 84170 }, { "epoch": 2.8, "grad_norm": 0.6920777559280396, "learning_rate": 6.53096939004415e-06, "loss": 1.6699, "step": 84171 }, { "epoch": 2.8, "grad_norm": 0.7163946628570557, "learning_rate": 6.528800511776333e-06, "loss": 1.6856, "step": 84172 }, { "epoch": 2.8, "grad_norm": 0.7271286845207214, "learning_rate": 6.526631989739328e-06, "loss": 1.7393, "step": 84173 }, { "epoch": 2.8, "grad_norm": 0.7224190831184387, "learning_rate": 6.524463823935766e-06, "loss": 1.687, "step": 84174 }, { "epoch": 2.8, "grad_norm": 0.7192270755767822, "learning_rate": 6.5222960143682115e-06, "loss": 1.6745, "step": 84175 }, { "epoch": 2.8, "grad_norm": 0.7205262184143066, "learning_rate": 6.520128561039362e-06, "loss": 1.6988, "step": 84176 }, { "epoch": 2.8, "grad_norm": 0.7101989984512329, "learning_rate": 6.517961463951849e-06, "loss": 1.6991, "step": 84177 }, { "epoch": 2.8, "grad_norm": 0.7088127732276917, "learning_rate": 6.515794723108303e-06, "loss": 1.6971, "step": 84178 }, { "epoch": 2.8, "grad_norm": 0.7300612330436707, "learning_rate": 6.51362833851129e-06, "loss": 1.707, "step": 84179 }, { "epoch": 2.8, "grad_norm": 0.7082290053367615, "learning_rate": 6.51146231016354e-06, "loss": 1.6294, "step": 84180 }, { "epoch": 2.8, "grad_norm": 0.7045227289199829, "learning_rate": 6.509296638067618e-06, "loss": 1.6536, "step": 84181 }, { "epoch": 2.8, "grad_norm": 0.7116714119911194, "learning_rate": 6.507131322226156e-06, "loss": 1.7412, "step": 84182 }, { "epoch": 2.8, "grad_norm": 0.711985170841217, "learning_rate": 6.5049663626417835e-06, "loss": 1.7345, "step": 84183 }, { "epoch": 2.8, "grad_norm": 0.6958305835723877, "learning_rate": 6.502801759317133e-06, "loss": 1.6455, "step": 84184 }, { "epoch": 2.8, "grad_norm": 0.7128833532333374, "learning_rate": 6.5006375122548694e-06, "loss": 1.709, "step": 84185 }, { "epoch": 2.8, "grad_norm": 0.6888632774353027, "learning_rate": 6.498473621457556e-06, "loss": 1.6023, "step": 84186 }, { "epoch": 2.8, "grad_norm": 0.708249568939209, "learning_rate": 6.496310086927858e-06, "loss": 1.7049, "step": 84187 }, { "epoch": 2.8, "grad_norm": 0.7171865701675415, "learning_rate": 6.494146908668441e-06, "loss": 1.6504, "step": 84188 }, { "epoch": 2.8, "grad_norm": 0.7045163512229919, "learning_rate": 6.4919840866818005e-06, "loss": 1.7229, "step": 84189 }, { "epoch": 2.8, "grad_norm": 0.717653751373291, "learning_rate": 6.489821620970703e-06, "loss": 1.7511, "step": 84190 }, { "epoch": 2.8, "grad_norm": 0.7180045247077942, "learning_rate": 6.4876595115376795e-06, "loss": 1.6998, "step": 84191 }, { "epoch": 2.8, "grad_norm": 0.709773600101471, "learning_rate": 6.485497758385428e-06, "loss": 1.745, "step": 84192 }, { "epoch": 2.8, "grad_norm": 0.718001663684845, "learning_rate": 6.483336361516545e-06, "loss": 1.6316, "step": 84193 }, { "epoch": 2.8, "grad_norm": 0.7803817391395569, "learning_rate": 6.481175320933596e-06, "loss": 1.7325, "step": 84194 }, { "epoch": 2.8, "grad_norm": 0.6975077986717224, "learning_rate": 6.479014636639279e-06, "loss": 1.6683, "step": 84195 }, { "epoch": 2.8, "grad_norm": 0.6869865655899048, "learning_rate": 6.4768543086361595e-06, "loss": 1.6134, "step": 84196 }, { "epoch": 2.8, "grad_norm": 0.7275413274765015, "learning_rate": 6.474694336926933e-06, "loss": 1.6456, "step": 84197 }, { "epoch": 2.8, "grad_norm": 0.7020005583763123, "learning_rate": 6.472534721514133e-06, "loss": 1.6626, "step": 84198 }, { "epoch": 2.8, "grad_norm": 0.6912703514099121, "learning_rate": 6.470375462400457e-06, "loss": 1.6491, "step": 84199 }, { "epoch": 2.8, "grad_norm": 0.7220695614814758, "learning_rate": 6.468216559588501e-06, "loss": 1.6594, "step": 84200 }, { "epoch": 2.8, "grad_norm": 0.7350554466247559, "learning_rate": 6.466058013080866e-06, "loss": 1.6759, "step": 84201 }, { "epoch": 2.8, "grad_norm": 0.7106850743293762, "learning_rate": 6.463899822880214e-06, "loss": 1.6691, "step": 84202 }, { "epoch": 2.8, "grad_norm": 1.0935763120651245, "learning_rate": 6.461741988989111e-06, "loss": 1.6469, "step": 84203 }, { "epoch": 2.8, "grad_norm": 0.7181428074836731, "learning_rate": 6.459584511410188e-06, "loss": 1.6177, "step": 84204 }, { "epoch": 2.8, "grad_norm": 0.7100216150283813, "learning_rate": 6.457427390146075e-06, "loss": 1.5994, "step": 84205 }, { "epoch": 2.8, "grad_norm": 0.7058716416358948, "learning_rate": 6.455270625199438e-06, "loss": 1.6472, "step": 84206 }, { "epoch": 2.8, "grad_norm": 0.7134382724761963, "learning_rate": 6.453114216572841e-06, "loss": 1.6383, "step": 84207 }, { "epoch": 2.8, "grad_norm": 0.7005447149276733, "learning_rate": 6.450958164268882e-06, "loss": 1.6979, "step": 84208 }, { "epoch": 2.8, "grad_norm": 0.6968432664871216, "learning_rate": 6.4488024682902266e-06, "loss": 1.6324, "step": 84209 }, { "epoch": 2.8, "grad_norm": 0.742847740650177, "learning_rate": 6.44664712863947e-06, "loss": 1.6514, "step": 84210 }, { "epoch": 2.8, "grad_norm": 0.6922587752342224, "learning_rate": 6.444492145319213e-06, "loss": 1.7139, "step": 84211 }, { "epoch": 2.8, "grad_norm": 0.7252277731895447, "learning_rate": 6.442337518332086e-06, "loss": 1.7232, "step": 84212 }, { "epoch": 2.8, "grad_norm": 0.714842677116394, "learning_rate": 6.4401832476807855e-06, "loss": 1.7014, "step": 84213 }, { "epoch": 2.8, "grad_norm": 0.7268405556678772, "learning_rate": 6.438029333367778e-06, "loss": 1.7146, "step": 84214 }, { "epoch": 2.8, "grad_norm": 0.7185789942741394, "learning_rate": 6.4358757753957266e-06, "loss": 1.6747, "step": 84215 }, { "epoch": 2.8, "grad_norm": 0.720719575881958, "learning_rate": 6.43372257376733e-06, "loss": 1.6239, "step": 84216 }, { "epoch": 2.8, "grad_norm": 0.716376006603241, "learning_rate": 6.431569728485119e-06, "loss": 1.6917, "step": 84217 }, { "epoch": 2.8, "grad_norm": 0.706098735332489, "learning_rate": 6.429417239551693e-06, "loss": 1.6437, "step": 84218 }, { "epoch": 2.8, "grad_norm": 0.7390632033348083, "learning_rate": 6.427265106969714e-06, "loss": 1.7165, "step": 84219 }, { "epoch": 2.8, "grad_norm": 0.7175208330154419, "learning_rate": 6.425113330741816e-06, "loss": 1.7016, "step": 84220 }, { "epoch": 2.8, "grad_norm": 0.7120469808578491, "learning_rate": 6.422961910870527e-06, "loss": 1.7231, "step": 84221 }, { "epoch": 2.8, "grad_norm": 0.7079889178276062, "learning_rate": 6.420810847358515e-06, "loss": 1.6705, "step": 84222 }, { "epoch": 2.8, "grad_norm": 0.7054933309555054, "learning_rate": 6.418660140208409e-06, "loss": 1.7459, "step": 84223 }, { "epoch": 2.8, "grad_norm": 0.7205280065536499, "learning_rate": 6.416509789422808e-06, "loss": 1.7023, "step": 84224 }, { "epoch": 2.8, "grad_norm": 0.7089754343032837, "learning_rate": 6.414359795004243e-06, "loss": 1.7279, "step": 84225 }, { "epoch": 2.8, "grad_norm": 0.7286557555198669, "learning_rate": 6.412210156955411e-06, "loss": 1.7565, "step": 84226 }, { "epoch": 2.8, "grad_norm": 0.709003210067749, "learning_rate": 6.410060875278944e-06, "loss": 1.6209, "step": 84227 }, { "epoch": 2.8, "grad_norm": 0.7190229296684265, "learning_rate": 6.40791194997734e-06, "loss": 1.6973, "step": 84228 }, { "epoch": 2.8, "grad_norm": 0.7105283141136169, "learning_rate": 6.405763381053297e-06, "loss": 1.7223, "step": 84229 }, { "epoch": 2.8, "grad_norm": 0.7071155905723572, "learning_rate": 6.403615168509413e-06, "loss": 1.6613, "step": 84230 }, { "epoch": 2.8, "grad_norm": 0.7132235169410706, "learning_rate": 6.401467312348285e-06, "loss": 1.7489, "step": 84231 }, { "epoch": 2.8, "grad_norm": 0.7294243574142456, "learning_rate": 6.399319812572478e-06, "loss": 1.6796, "step": 84232 }, { "epoch": 2.8, "grad_norm": 0.7419626116752625, "learning_rate": 6.397172669184625e-06, "loss": 1.8261, "step": 84233 }, { "epoch": 2.8, "grad_norm": 0.7140872478485107, "learning_rate": 6.3950258821874215e-06, "loss": 1.6714, "step": 84234 }, { "epoch": 2.8, "grad_norm": 0.7081364989280701, "learning_rate": 6.3928794515833324e-06, "loss": 1.7035, "step": 84235 }, { "epoch": 2.8, "grad_norm": 0.7090849280357361, "learning_rate": 6.390733377375024e-06, "loss": 1.7373, "step": 84236 }, { "epoch": 2.8, "grad_norm": 0.7327039837837219, "learning_rate": 6.3885876595651585e-06, "loss": 1.5728, "step": 84237 }, { "epoch": 2.8, "grad_norm": 0.7126243710517883, "learning_rate": 6.3864422981562355e-06, "loss": 1.6793, "step": 84238 }, { "epoch": 2.8, "grad_norm": 0.7382081747055054, "learning_rate": 6.3842972931509195e-06, "loss": 1.6619, "step": 84239 }, { "epoch": 2.8, "grad_norm": 0.7225606441497803, "learning_rate": 6.382152644551841e-06, "loss": 1.7312, "step": 84240 }, { "epoch": 2.8, "grad_norm": 0.7137136459350586, "learning_rate": 6.380008352361532e-06, "loss": 1.6151, "step": 84241 }, { "epoch": 2.8, "grad_norm": 0.7157720327377319, "learning_rate": 6.377864416582623e-06, "loss": 1.6184, "step": 84242 }, { "epoch": 2.8, "grad_norm": 0.750529944896698, "learning_rate": 6.375720837217713e-06, "loss": 1.6663, "step": 84243 }, { "epoch": 2.8, "grad_norm": 0.7098896503448486, "learning_rate": 6.373577614269465e-06, "loss": 1.6893, "step": 84244 }, { "epoch": 2.8, "grad_norm": 0.7042685151100159, "learning_rate": 6.3714347477404114e-06, "loss": 1.6612, "step": 84245 }, { "epoch": 2.8, "grad_norm": 0.7092536687850952, "learning_rate": 6.36929223763315e-06, "loss": 1.7538, "step": 84246 }, { "epoch": 2.8, "grad_norm": 0.7425190210342407, "learning_rate": 6.367150083950312e-06, "loss": 1.6209, "step": 84247 }, { "epoch": 2.8, "grad_norm": 0.6943132281303406, "learning_rate": 6.365008286694529e-06, "loss": 1.656, "step": 84248 }, { "epoch": 2.8, "grad_norm": 0.7172085642814636, "learning_rate": 6.362866845868297e-06, "loss": 1.6879, "step": 84249 }, { "epoch": 2.8, "grad_norm": 0.7017172574996948, "learning_rate": 6.360725761474317e-06, "loss": 1.5575, "step": 84250 }, { "epoch": 2.8, "grad_norm": 0.7130445241928101, "learning_rate": 6.358585033515118e-06, "loss": 1.633, "step": 84251 }, { "epoch": 2.8, "grad_norm": 0.7231419086456299, "learning_rate": 6.3564446619933655e-06, "loss": 1.7601, "step": 84252 }, { "epoch": 2.8, "grad_norm": 0.7308679223060608, "learning_rate": 6.35430464691159e-06, "loss": 1.6955, "step": 84253 }, { "epoch": 2.8, "grad_norm": 0.7017971277236938, "learning_rate": 6.352164988272457e-06, "loss": 1.6289, "step": 84254 }, { "epoch": 2.8, "grad_norm": 0.7623205780982971, "learning_rate": 6.3500256860785305e-06, "loss": 1.6574, "step": 84255 }, { "epoch": 2.8, "grad_norm": 0.734563946723938, "learning_rate": 6.347886740332375e-06, "loss": 1.6381, "step": 84256 }, { "epoch": 2.8, "grad_norm": 0.7042287588119507, "learning_rate": 6.345748151036656e-06, "loss": 1.7481, "step": 84257 }, { "epoch": 2.8, "grad_norm": 0.7381827235221863, "learning_rate": 6.34360991819387e-06, "loss": 1.5847, "step": 84258 }, { "epoch": 2.8, "grad_norm": 0.7190868258476257, "learning_rate": 6.341472041806717e-06, "loss": 1.743, "step": 84259 }, { "epoch": 2.8, "grad_norm": 0.7152215242385864, "learning_rate": 6.339334521877759e-06, "loss": 1.6101, "step": 84260 }, { "epoch": 2.8, "grad_norm": 0.7015038132667542, "learning_rate": 6.3371973584095625e-06, "loss": 1.5905, "step": 84261 }, { "epoch": 2.8, "grad_norm": 0.7389546036720276, "learning_rate": 6.3350605514047245e-06, "loss": 1.6946, "step": 84262 }, { "epoch": 2.8, "grad_norm": 0.7090017199516296, "learning_rate": 6.3329241008658775e-06, "loss": 1.6649, "step": 84263 }, { "epoch": 2.8, "grad_norm": 0.726014256477356, "learning_rate": 6.330788006795584e-06, "loss": 1.6413, "step": 84264 }, { "epoch": 2.8, "grad_norm": 0.7125347852706909, "learning_rate": 6.3286522691964105e-06, "loss": 1.6168, "step": 84265 }, { "epoch": 2.8, "grad_norm": 0.7254119515419006, "learning_rate": 6.326516888071054e-06, "loss": 1.6417, "step": 84266 }, { "epoch": 2.8, "grad_norm": 0.6962116956710815, "learning_rate": 6.324381863421979e-06, "loss": 1.6888, "step": 84267 }, { "epoch": 2.8, "grad_norm": 0.7101835012435913, "learning_rate": 6.32224719525185e-06, "loss": 1.6974, "step": 84268 }, { "epoch": 2.8, "grad_norm": 0.7189471125602722, "learning_rate": 6.320112883563233e-06, "loss": 1.5991, "step": 84269 }, { "epoch": 2.8, "grad_norm": 0.712691068649292, "learning_rate": 6.3179789283587575e-06, "loss": 1.6905, "step": 84270 }, { "epoch": 2.8, "grad_norm": 0.7125017046928406, "learning_rate": 6.315845329640956e-06, "loss": 1.6612, "step": 84271 }, { "epoch": 2.8, "grad_norm": 0.7152644991874695, "learning_rate": 6.313712087412426e-06, "loss": 1.6759, "step": 84272 }, { "epoch": 2.8, "grad_norm": 0.7291169762611389, "learning_rate": 6.3115792016757986e-06, "loss": 1.6866, "step": 84273 }, { "epoch": 2.8, "grad_norm": 0.7228342294692993, "learning_rate": 6.3094466724336716e-06, "loss": 1.6724, "step": 84274 }, { "epoch": 2.8, "grad_norm": 0.726183295249939, "learning_rate": 6.307314499688543e-06, "loss": 1.7589, "step": 84275 }, { "epoch": 2.8, "grad_norm": 0.6942128539085388, "learning_rate": 6.305182683443111e-06, "loss": 1.6301, "step": 84276 }, { "epoch": 2.8, "grad_norm": 0.7043679356575012, "learning_rate": 6.303051223699906e-06, "loss": 1.5912, "step": 84277 }, { "epoch": 2.8, "grad_norm": 0.686499834060669, "learning_rate": 6.300920120461494e-06, "loss": 1.6524, "step": 84278 }, { "epoch": 2.8, "grad_norm": 0.7049346566200256, "learning_rate": 6.298789373730506e-06, "loss": 1.6982, "step": 84279 }, { "epoch": 2.8, "grad_norm": 0.7091951966285706, "learning_rate": 6.29665898350954e-06, "loss": 1.6999, "step": 84280 }, { "epoch": 2.8, "grad_norm": 0.7134964466094971, "learning_rate": 6.294528949801125e-06, "loss": 1.69, "step": 84281 }, { "epoch": 2.8, "grad_norm": 0.7223459482192993, "learning_rate": 6.292399272607862e-06, "loss": 1.7013, "step": 84282 }, { "epoch": 2.8, "grad_norm": 0.7036657929420471, "learning_rate": 6.2902699519323475e-06, "loss": 1.696, "step": 84283 }, { "epoch": 2.8, "grad_norm": 0.7261054515838623, "learning_rate": 6.288140987777246e-06, "loss": 1.6551, "step": 84284 }, { "epoch": 2.8, "grad_norm": 0.7553508281707764, "learning_rate": 6.286012380144989e-06, "loss": 1.7298, "step": 84285 }, { "epoch": 2.8, "grad_norm": 0.7050476670265198, "learning_rate": 6.2838841290382414e-06, "loss": 1.653, "step": 84286 }, { "epoch": 2.8, "grad_norm": 0.7055578231811523, "learning_rate": 6.281756234459601e-06, "loss": 1.7327, "step": 84287 }, { "epoch": 2.8, "grad_norm": 0.713065505027771, "learning_rate": 6.279628696411665e-06, "loss": 1.6348, "step": 84288 }, { "epoch": 2.8, "grad_norm": 0.7336277365684509, "learning_rate": 6.277501514896932e-06, "loss": 1.6192, "step": 84289 }, { "epoch": 2.8, "grad_norm": 0.7513746619224548, "learning_rate": 6.275374689918034e-06, "loss": 1.6587, "step": 84290 }, { "epoch": 2.8, "grad_norm": 0.7166826725006104, "learning_rate": 6.2732482214776e-06, "loss": 1.6797, "step": 84291 }, { "epoch": 2.8, "grad_norm": 0.7209280729293823, "learning_rate": 6.271122109578097e-06, "loss": 1.659, "step": 84292 }, { "epoch": 2.8, "grad_norm": 0.7086061239242554, "learning_rate": 6.268996354222189e-06, "loss": 1.771, "step": 84293 }, { "epoch": 2.8, "grad_norm": 0.7100328803062439, "learning_rate": 6.266870955412473e-06, "loss": 1.7694, "step": 84294 }, { "epoch": 2.8, "grad_norm": 0.7272043228149414, "learning_rate": 6.264745913151481e-06, "loss": 1.593, "step": 84295 }, { "epoch": 2.8, "grad_norm": 0.7067487239837646, "learning_rate": 6.262621227441811e-06, "loss": 1.6716, "step": 84296 }, { "epoch": 2.8, "grad_norm": 0.6931840777397156, "learning_rate": 6.260496898286027e-06, "loss": 1.6281, "step": 84297 }, { "epoch": 2.8, "grad_norm": 0.7000528573989868, "learning_rate": 6.258372925686761e-06, "loss": 1.623, "step": 84298 }, { "epoch": 2.8, "grad_norm": 0.7222316861152649, "learning_rate": 6.256249309646477e-06, "loss": 1.7137, "step": 84299 }, { "epoch": 2.8, "grad_norm": 0.6928125023841858, "learning_rate": 6.254126050167907e-06, "loss": 1.6539, "step": 84300 }, { "epoch": 2.8, "grad_norm": 0.6997801661491394, "learning_rate": 6.2520031472534815e-06, "loss": 1.6507, "step": 84301 }, { "epoch": 2.8, "grad_norm": 0.707169771194458, "learning_rate": 6.249880600905865e-06, "loss": 1.706, "step": 84302 }, { "epoch": 2.8, "grad_norm": 0.7013378143310547, "learning_rate": 6.24775841112759e-06, "loss": 1.6376, "step": 84303 }, { "epoch": 2.8, "grad_norm": 0.6934716701507568, "learning_rate": 6.245636577921287e-06, "loss": 1.6352, "step": 84304 }, { "epoch": 2.8, "grad_norm": 0.7066938877105713, "learning_rate": 6.24351510128952e-06, "loss": 1.6737, "step": 84305 }, { "epoch": 2.8, "grad_norm": 0.7032294869422913, "learning_rate": 6.241393981234788e-06, "loss": 1.6901, "step": 84306 }, { "epoch": 2.8, "grad_norm": 0.7096810340881348, "learning_rate": 6.239273217759755e-06, "loss": 1.694, "step": 84307 }, { "epoch": 2.8, "grad_norm": 0.7373517751693726, "learning_rate": 6.237152810866919e-06, "loss": 1.7101, "step": 84308 }, { "epoch": 2.8, "grad_norm": 0.7156742215156555, "learning_rate": 6.235032760558944e-06, "loss": 1.7116, "step": 84309 }, { "epoch": 2.81, "grad_norm": 0.7176583409309387, "learning_rate": 6.2329130668383295e-06, "loss": 1.7107, "step": 84310 }, { "epoch": 2.81, "grad_norm": 0.7227153778076172, "learning_rate": 6.2307937297076725e-06, "loss": 1.6969, "step": 84311 }, { "epoch": 2.81, "grad_norm": 0.7123152613639832, "learning_rate": 6.228674749169571e-06, "loss": 1.6385, "step": 84312 }, { "epoch": 2.81, "grad_norm": 0.7217639088630676, "learning_rate": 6.226556125226523e-06, "loss": 1.7656, "step": 84313 }, { "epoch": 2.81, "grad_norm": 0.7097112536430359, "learning_rate": 6.224437857881193e-06, "loss": 1.5985, "step": 84314 }, { "epoch": 2.81, "grad_norm": 0.6980617642402649, "learning_rate": 6.222319947136112e-06, "loss": 1.6412, "step": 84315 }, { "epoch": 2.81, "grad_norm": 0.7050836086273193, "learning_rate": 6.220202392993845e-06, "loss": 1.6391, "step": 84316 }, { "epoch": 2.81, "grad_norm": 0.7445721626281738, "learning_rate": 6.218085195456957e-06, "loss": 1.6517, "step": 84317 }, { "epoch": 2.81, "grad_norm": 0.7208139300346375, "learning_rate": 6.215968354528011e-06, "loss": 1.6549, "step": 84318 }, { "epoch": 2.81, "grad_norm": 0.7041596174240112, "learning_rate": 6.213851870209608e-06, "loss": 1.6121, "step": 84319 }, { "epoch": 2.81, "grad_norm": 0.7243576049804688, "learning_rate": 6.2117357425043095e-06, "loss": 1.7454, "step": 84320 }, { "epoch": 2.81, "grad_norm": 0.7224343419075012, "learning_rate": 6.2096199714146814e-06, "loss": 1.6329, "step": 84321 }, { "epoch": 2.81, "grad_norm": 0.7411080598831177, "learning_rate": 6.2075045569432545e-06, "loss": 1.6243, "step": 84322 }, { "epoch": 2.81, "grad_norm": 0.7103704214096069, "learning_rate": 6.205389499092661e-06, "loss": 1.6541, "step": 84323 }, { "epoch": 2.81, "grad_norm": 0.7219561338424683, "learning_rate": 6.203274797865432e-06, "loss": 1.7354, "step": 84324 }, { "epoch": 2.81, "grad_norm": 0.7011585235595703, "learning_rate": 6.201160453264098e-06, "loss": 1.6983, "step": 84325 }, { "epoch": 2.81, "grad_norm": 0.691133975982666, "learning_rate": 6.199046465291324e-06, "loss": 1.6779, "step": 84326 }, { "epoch": 2.81, "grad_norm": 0.713612973690033, "learning_rate": 6.1969328339496085e-06, "loss": 1.6408, "step": 84327 }, { "epoch": 2.81, "grad_norm": 0.7201051115989685, "learning_rate": 6.194819559241482e-06, "loss": 1.6265, "step": 84328 }, { "epoch": 2.81, "grad_norm": 0.7142252326011658, "learning_rate": 6.192706641169576e-06, "loss": 1.6401, "step": 84329 }, { "epoch": 2.81, "grad_norm": 0.7222108244895935, "learning_rate": 6.190594079736455e-06, "loss": 1.6786, "step": 84330 }, { "epoch": 2.81, "grad_norm": 0.7105122208595276, "learning_rate": 6.18848187494465e-06, "loss": 1.6313, "step": 84331 }, { "epoch": 2.81, "grad_norm": 0.709461510181427, "learning_rate": 6.186370026796728e-06, "loss": 1.7033, "step": 84332 }, { "epoch": 2.81, "grad_norm": 0.7163190245628357, "learning_rate": 6.18425853529525e-06, "loss": 1.7286, "step": 84333 }, { "epoch": 2.81, "grad_norm": 0.743483304977417, "learning_rate": 6.182147400442817e-06, "loss": 1.7348, "step": 84334 }, { "epoch": 2.81, "grad_norm": 0.7486987113952637, "learning_rate": 6.180036622241958e-06, "loss": 1.6732, "step": 84335 }, { "epoch": 2.81, "grad_norm": 0.7038536667823792, "learning_rate": 6.177926200695238e-06, "loss": 1.7195, "step": 84336 }, { "epoch": 2.81, "grad_norm": 0.7135093808174133, "learning_rate": 6.1758161358052564e-06, "loss": 1.6963, "step": 84337 }, { "epoch": 2.81, "grad_norm": 0.687253475189209, "learning_rate": 6.173706427574476e-06, "loss": 1.6902, "step": 84338 }, { "epoch": 2.81, "grad_norm": 0.6984221935272217, "learning_rate": 6.171597076005564e-06, "loss": 1.7157, "step": 84339 }, { "epoch": 2.81, "grad_norm": 0.7125644683837891, "learning_rate": 6.169488081101015e-06, "loss": 1.6231, "step": 84340 }, { "epoch": 2.81, "grad_norm": 0.7359887957572937, "learning_rate": 6.167379442863463e-06, "loss": 1.6478, "step": 84341 }, { "epoch": 2.81, "grad_norm": 0.6989976763725281, "learning_rate": 6.1652711612953705e-06, "loss": 1.6757, "step": 84342 }, { "epoch": 2.81, "grad_norm": 0.7021993398666382, "learning_rate": 6.163163236399338e-06, "loss": 1.6958, "step": 84343 }, { "epoch": 2.81, "grad_norm": 0.7227336764335632, "learning_rate": 6.1610556681779945e-06, "loss": 1.7023, "step": 84344 }, { "epoch": 2.81, "grad_norm": 0.6980602741241455, "learning_rate": 6.158948456633772e-06, "loss": 1.641, "step": 84345 }, { "epoch": 2.81, "grad_norm": 0.6963489651679993, "learning_rate": 6.156841601769302e-06, "loss": 1.5872, "step": 84346 }, { "epoch": 2.81, "grad_norm": 0.704905092716217, "learning_rate": 6.154735103587149e-06, "loss": 1.6455, "step": 84347 }, { "epoch": 2.81, "grad_norm": 1.486573576927185, "learning_rate": 6.152628962089845e-06, "loss": 1.626, "step": 84348 }, { "epoch": 2.81, "grad_norm": 0.7220738530158997, "learning_rate": 6.15052317727992e-06, "loss": 1.6559, "step": 84349 }, { "epoch": 2.81, "grad_norm": 0.7248125076293945, "learning_rate": 6.1484177491599725e-06, "loss": 1.68, "step": 84350 }, { "epoch": 2.81, "grad_norm": 0.7130177617073059, "learning_rate": 6.1463126777326344e-06, "loss": 1.7249, "step": 84351 }, { "epoch": 2.81, "grad_norm": 0.7275388836860657, "learning_rate": 6.144207963000269e-06, "loss": 1.7418, "step": 84352 }, { "epoch": 2.81, "grad_norm": 0.7125530242919922, "learning_rate": 6.142103604965576e-06, "loss": 1.67, "step": 84353 }, { "epoch": 2.81, "grad_norm": 0.7170067429542542, "learning_rate": 6.139999603631085e-06, "loss": 1.6094, "step": 84354 }, { "epoch": 2.81, "grad_norm": 0.7131114602088928, "learning_rate": 6.137895958999328e-06, "loss": 1.6871, "step": 84355 }, { "epoch": 2.81, "grad_norm": 0.7144218683242798, "learning_rate": 6.13579267107287e-06, "loss": 1.6901, "step": 84356 }, { "epoch": 2.81, "grad_norm": 0.7061436772346497, "learning_rate": 6.133689739854242e-06, "loss": 1.5726, "step": 84357 }, { "epoch": 2.81, "grad_norm": 0.7252455949783325, "learning_rate": 6.1315871653460416e-06, "loss": 1.6761, "step": 84358 }, { "epoch": 2.81, "grad_norm": 0.7028780579566956, "learning_rate": 6.129484947550767e-06, "loss": 1.688, "step": 84359 }, { "epoch": 2.81, "grad_norm": 0.7345070242881775, "learning_rate": 6.127383086471016e-06, "loss": 1.7368, "step": 84360 }, { "epoch": 2.81, "grad_norm": 0.7026705741882324, "learning_rate": 6.1252815821093205e-06, "loss": 1.6394, "step": 84361 }, { "epoch": 2.81, "grad_norm": 0.730876088142395, "learning_rate": 6.123180434468244e-06, "loss": 1.7278, "step": 84362 }, { "epoch": 2.81, "grad_norm": 0.7182737588882446, "learning_rate": 6.1210796435502865e-06, "loss": 1.6591, "step": 84363 }, { "epoch": 2.81, "grad_norm": 0.7129508852958679, "learning_rate": 6.118979209358077e-06, "loss": 1.6725, "step": 84364 }, { "epoch": 2.81, "grad_norm": 0.706732451915741, "learning_rate": 6.116879131894115e-06, "loss": 1.6257, "step": 84365 }, { "epoch": 2.81, "grad_norm": 0.7143653631210327, "learning_rate": 6.114779411160964e-06, "loss": 1.5774, "step": 84366 }, { "epoch": 2.81, "grad_norm": 0.7283229827880859, "learning_rate": 6.112680047161156e-06, "loss": 1.6854, "step": 84367 }, { "epoch": 2.81, "grad_norm": 0.6881647109985352, "learning_rate": 6.110581039897255e-06, "loss": 1.667, "step": 84368 }, { "epoch": 2.81, "grad_norm": 0.7062864303588867, "learning_rate": 6.108482389371794e-06, "loss": 1.6421, "step": 84369 }, { "epoch": 2.81, "grad_norm": 0.6963253021240234, "learning_rate": 6.106384095587335e-06, "loss": 1.6912, "step": 84370 }, { "epoch": 2.81, "grad_norm": 0.7195369601249695, "learning_rate": 6.104286158546446e-06, "loss": 1.6998, "step": 84371 }, { "epoch": 2.81, "grad_norm": 0.6846696138381958, "learning_rate": 6.102188578251655e-06, "loss": 1.6363, "step": 84372 }, { "epoch": 2.81, "grad_norm": 0.7303613424301147, "learning_rate": 6.100091354705494e-06, "loss": 1.6814, "step": 84373 }, { "epoch": 2.81, "grad_norm": 0.7132265567779541, "learning_rate": 6.097994487910496e-06, "loss": 1.7236, "step": 84374 }, { "epoch": 2.81, "grad_norm": 0.7141299247741699, "learning_rate": 6.0958979778692245e-06, "loss": 1.6881, "step": 84375 }, { "epoch": 2.81, "grad_norm": 0.6812860369682312, "learning_rate": 6.093801824584276e-06, "loss": 1.5947, "step": 84376 }, { "epoch": 2.81, "grad_norm": 0.7175076007843018, "learning_rate": 6.091706028058119e-06, "loss": 1.6583, "step": 84377 }, { "epoch": 2.81, "grad_norm": 0.7178679704666138, "learning_rate": 6.089610588293315e-06, "loss": 1.7592, "step": 84378 }, { "epoch": 2.81, "grad_norm": 0.7171982526779175, "learning_rate": 6.087515505292428e-06, "loss": 1.6449, "step": 84379 }, { "epoch": 2.81, "grad_norm": 0.7033575177192688, "learning_rate": 6.085420779057992e-06, "loss": 1.7338, "step": 84380 }, { "epoch": 2.81, "grad_norm": 0.7296207547187805, "learning_rate": 6.083326409592537e-06, "loss": 1.6359, "step": 84381 }, { "epoch": 2.81, "grad_norm": 0.7441744804382324, "learning_rate": 6.081232396898628e-06, "loss": 1.6588, "step": 84382 }, { "epoch": 2.81, "grad_norm": 0.716012179851532, "learning_rate": 6.079138740978795e-06, "loss": 1.7183, "step": 84383 }, { "epoch": 2.81, "grad_norm": 0.7154150009155273, "learning_rate": 6.077045441835604e-06, "loss": 1.6913, "step": 84384 }, { "epoch": 2.81, "grad_norm": 0.7136504650115967, "learning_rate": 6.0749524994715195e-06, "loss": 1.7364, "step": 84385 }, { "epoch": 2.81, "grad_norm": 0.7376974821090698, "learning_rate": 6.072859913889172e-06, "loss": 1.7243, "step": 84386 }, { "epoch": 2.81, "grad_norm": 0.6977176666259766, "learning_rate": 6.070767685091093e-06, "loss": 1.629, "step": 84387 }, { "epoch": 2.81, "grad_norm": 0.7169986963272095, "learning_rate": 6.068675813079748e-06, "loss": 1.7227, "step": 84388 }, { "epoch": 2.81, "grad_norm": 0.7019417881965637, "learning_rate": 6.066584297857702e-06, "loss": 1.6428, "step": 84389 }, { "epoch": 2.81, "grad_norm": 0.7074746489524841, "learning_rate": 6.064493139427551e-06, "loss": 1.6213, "step": 84390 }, { "epoch": 2.81, "grad_norm": 1.177544116973877, "learning_rate": 6.062402337791794e-06, "loss": 1.7262, "step": 84391 }, { "epoch": 2.81, "grad_norm": 0.7138791680335999, "learning_rate": 6.060311892952963e-06, "loss": 1.6156, "step": 84392 }, { "epoch": 2.81, "grad_norm": 0.7011716365814209, "learning_rate": 6.058221804913621e-06, "loss": 1.6971, "step": 84393 }, { "epoch": 2.81, "grad_norm": 0.7252006530761719, "learning_rate": 6.056132073676268e-06, "loss": 1.6779, "step": 84394 }, { "epoch": 2.81, "grad_norm": 0.7362431883811951, "learning_rate": 6.054042699243467e-06, "loss": 1.8131, "step": 84395 }, { "epoch": 2.81, "grad_norm": 0.7076040506362915, "learning_rate": 6.051953681617716e-06, "loss": 1.6853, "step": 84396 }, { "epoch": 2.81, "grad_norm": 0.6960886716842651, "learning_rate": 6.049865020801614e-06, "loss": 1.631, "step": 84397 }, { "epoch": 2.81, "grad_norm": 0.6891534328460693, "learning_rate": 6.047776716797692e-06, "loss": 1.6699, "step": 84398 }, { "epoch": 2.81, "grad_norm": 0.7083290815353394, "learning_rate": 6.045688769608381e-06, "loss": 1.6488, "step": 84399 }, { "epoch": 2.81, "grad_norm": 0.6974467635154724, "learning_rate": 6.043601179236346e-06, "loss": 1.7251, "step": 84400 }, { "epoch": 2.81, "grad_norm": 0.7273889183998108, "learning_rate": 6.041513945684085e-06, "loss": 1.6186, "step": 84401 }, { "epoch": 2.81, "grad_norm": 0.7052165269851685, "learning_rate": 6.039427068954061e-06, "loss": 1.6463, "step": 84402 }, { "epoch": 2.81, "grad_norm": 0.711387574672699, "learning_rate": 6.037340549048875e-06, "loss": 1.6914, "step": 84403 }, { "epoch": 2.81, "grad_norm": 0.7213824391365051, "learning_rate": 6.035254385971056e-06, "loss": 1.716, "step": 84404 }, { "epoch": 2.81, "grad_norm": 0.70948326587677, "learning_rate": 6.0331685797231355e-06, "loss": 1.7023, "step": 84405 }, { "epoch": 2.81, "grad_norm": 0.6970950961112976, "learning_rate": 6.031083130307612e-06, "loss": 1.7117, "step": 84406 }, { "epoch": 2.81, "grad_norm": 0.7138093709945679, "learning_rate": 6.0289980377270505e-06, "loss": 1.7045, "step": 84407 }, { "epoch": 2.81, "grad_norm": 0.7043910026550293, "learning_rate": 6.026913301983982e-06, "loss": 1.5523, "step": 84408 }, { "epoch": 2.81, "grad_norm": 0.7000138759613037, "learning_rate": 6.024828923080905e-06, "loss": 1.6603, "step": 84409 }, { "epoch": 2.81, "grad_norm": 0.7116813659667969, "learning_rate": 6.022744901020349e-06, "loss": 1.6297, "step": 84410 }, { "epoch": 2.81, "grad_norm": 0.7362598180770874, "learning_rate": 6.020661235804913e-06, "loss": 1.6742, "step": 84411 }, { "epoch": 2.81, "grad_norm": 0.6984286308288574, "learning_rate": 6.018577927437063e-06, "loss": 1.7006, "step": 84412 }, { "epoch": 2.81, "grad_norm": 0.6960682272911072, "learning_rate": 6.016494975919328e-06, "loss": 1.7068, "step": 84413 }, { "epoch": 2.81, "grad_norm": 0.6980260610580444, "learning_rate": 6.0144123812542745e-06, "loss": 1.6863, "step": 84414 }, { "epoch": 2.81, "grad_norm": 0.7196274399757385, "learning_rate": 6.012330143444399e-06, "loss": 1.705, "step": 84415 }, { "epoch": 2.81, "grad_norm": 0.7236558794975281, "learning_rate": 6.010248262492234e-06, "loss": 1.6276, "step": 84416 }, { "epoch": 2.81, "grad_norm": 0.7310293316841125, "learning_rate": 6.008166738400311e-06, "loss": 1.6672, "step": 84417 }, { "epoch": 2.81, "grad_norm": 0.713893473148346, "learning_rate": 6.006085571171159e-06, "loss": 1.6637, "step": 84418 }, { "epoch": 2.81, "grad_norm": 0.7438482642173767, "learning_rate": 6.004004760807313e-06, "loss": 1.7052, "step": 84419 }, { "epoch": 2.81, "grad_norm": 0.7132511138916016, "learning_rate": 6.001924307311234e-06, "loss": 1.6356, "step": 84420 }, { "epoch": 2.81, "grad_norm": 0.7194212079048157, "learning_rate": 5.999844210685556e-06, "loss": 1.7065, "step": 84421 }, { "epoch": 2.81, "grad_norm": 0.7190099954605103, "learning_rate": 5.997764470932742e-06, "loss": 1.6789, "step": 84422 }, { "epoch": 2.81, "grad_norm": 0.7133635878562927, "learning_rate": 5.995685088055291e-06, "loss": 1.7305, "step": 84423 }, { "epoch": 2.81, "grad_norm": 1.0789239406585693, "learning_rate": 5.9936060620558e-06, "loss": 1.7676, "step": 84424 }, { "epoch": 2.81, "grad_norm": 0.7645822763442993, "learning_rate": 5.991527392936701e-06, "loss": 1.7022, "step": 84425 }, { "epoch": 2.81, "grad_norm": 0.7042832970619202, "learning_rate": 5.989449080700626e-06, "loss": 1.6522, "step": 84426 }, { "epoch": 2.81, "grad_norm": 0.763393759727478, "learning_rate": 5.987371125350005e-06, "loss": 1.606, "step": 84427 }, { "epoch": 2.81, "grad_norm": 0.7279633283615112, "learning_rate": 5.985293526887402e-06, "loss": 1.7049, "step": 84428 }, { "epoch": 2.81, "grad_norm": 0.7230421900749207, "learning_rate": 5.983216285315318e-06, "loss": 1.6246, "step": 84429 }, { "epoch": 2.81, "grad_norm": 0.7019717693328857, "learning_rate": 5.981139400636315e-06, "loss": 1.6803, "step": 84430 }, { "epoch": 2.81, "grad_norm": 0.7187269330024719, "learning_rate": 5.979062872852858e-06, "loss": 1.6719, "step": 84431 }, { "epoch": 2.81, "grad_norm": 0.7171294689178467, "learning_rate": 5.976986701967512e-06, "loss": 1.7397, "step": 84432 }, { "epoch": 2.81, "grad_norm": 0.7152329683303833, "learning_rate": 5.9749108879827754e-06, "loss": 1.6578, "step": 84433 }, { "epoch": 2.81, "grad_norm": 0.717522144317627, "learning_rate": 5.972835430901212e-06, "loss": 1.6351, "step": 84434 }, { "epoch": 2.81, "grad_norm": 0.710966944694519, "learning_rate": 5.9707603307252546e-06, "loss": 1.6928, "step": 84435 }, { "epoch": 2.81, "grad_norm": 0.701934814453125, "learning_rate": 5.9686855874575e-06, "loss": 1.6692, "step": 84436 }, { "epoch": 2.81, "grad_norm": 0.7323862314224243, "learning_rate": 5.966611201100413e-06, "loss": 1.6562, "step": 84437 }, { "epoch": 2.81, "grad_norm": 0.7127060294151306, "learning_rate": 5.964537171656559e-06, "loss": 1.7213, "step": 84438 }, { "epoch": 2.81, "grad_norm": 0.7146335244178772, "learning_rate": 5.962463499128401e-06, "loss": 1.7031, "step": 84439 }, { "epoch": 2.81, "grad_norm": 0.7145231366157532, "learning_rate": 5.960390183518504e-06, "loss": 1.6351, "step": 84440 }, { "epoch": 2.81, "grad_norm": 0.7170900702476501, "learning_rate": 5.958317224829401e-06, "loss": 1.6723, "step": 84441 }, { "epoch": 2.81, "grad_norm": 0.7151436805725098, "learning_rate": 5.956244623063522e-06, "loss": 1.6848, "step": 84442 }, { "epoch": 2.81, "grad_norm": 0.7231768369674683, "learning_rate": 5.954172378223465e-06, "loss": 1.6642, "step": 84443 }, { "epoch": 2.81, "grad_norm": 0.703670084476471, "learning_rate": 5.9521004903117285e-06, "loss": 1.6921, "step": 84444 }, { "epoch": 2.81, "grad_norm": 0.7211216688156128, "learning_rate": 5.950028959330811e-06, "loss": 1.7355, "step": 84445 }, { "epoch": 2.81, "grad_norm": 0.7271802425384521, "learning_rate": 5.947957785283208e-06, "loss": 1.693, "step": 84446 }, { "epoch": 2.81, "grad_norm": 0.6992378830909729, "learning_rate": 5.945886968171487e-06, "loss": 1.6955, "step": 84447 }, { "epoch": 2.81, "grad_norm": 0.7005515098571777, "learning_rate": 5.943816507998112e-06, "loss": 1.7569, "step": 84448 }, { "epoch": 2.81, "grad_norm": 0.6971220970153809, "learning_rate": 5.941746404765612e-06, "loss": 1.7351, "step": 84449 }, { "epoch": 2.81, "grad_norm": 0.7380896806716919, "learning_rate": 5.939676658476522e-06, "loss": 1.7413, "step": 84450 }, { "epoch": 2.81, "grad_norm": 0.715023934841156, "learning_rate": 5.937607269133338e-06, "loss": 1.7565, "step": 84451 }, { "epoch": 2.81, "grad_norm": 0.7102526426315308, "learning_rate": 5.935538236738557e-06, "loss": 1.6727, "step": 84452 }, { "epoch": 2.81, "grad_norm": 0.686003565788269, "learning_rate": 5.9334695612947124e-06, "loss": 1.6534, "step": 84453 }, { "epoch": 2.81, "grad_norm": 0.7148756980895996, "learning_rate": 5.931401242804335e-06, "loss": 1.6776, "step": 84454 }, { "epoch": 2.81, "grad_norm": 0.7316744327545166, "learning_rate": 5.929333281269855e-06, "loss": 1.7767, "step": 84455 }, { "epoch": 2.81, "grad_norm": 0.713045060634613, "learning_rate": 5.927265676693837e-06, "loss": 1.5967, "step": 84456 }, { "epoch": 2.81, "grad_norm": 0.7287148833274841, "learning_rate": 5.9251984290788145e-06, "loss": 1.6606, "step": 84457 }, { "epoch": 2.81, "grad_norm": 0.70854252576828, "learning_rate": 5.923131538427284e-06, "loss": 1.6042, "step": 84458 }, { "epoch": 2.81, "grad_norm": 0.7100521922111511, "learning_rate": 5.92106500474171e-06, "loss": 1.6971, "step": 84459 }, { "epoch": 2.81, "grad_norm": 0.7037826776504517, "learning_rate": 5.918998828024624e-06, "loss": 1.6605, "step": 84460 }, { "epoch": 2.81, "grad_norm": 0.7249177694320679, "learning_rate": 5.916933008278623e-06, "loss": 1.7395, "step": 84461 }, { "epoch": 2.81, "grad_norm": 0.7241342663764954, "learning_rate": 5.9148675455060414e-06, "loss": 1.6828, "step": 84462 }, { "epoch": 2.81, "grad_norm": 0.688256025314331, "learning_rate": 5.912802439709507e-06, "loss": 1.6544, "step": 84463 }, { "epoch": 2.81, "grad_norm": 0.70402991771698, "learning_rate": 5.91073769089152e-06, "loss": 1.6722, "step": 84464 }, { "epoch": 2.81, "grad_norm": 0.714198648929596, "learning_rate": 5.908673299054578e-06, "loss": 1.6648, "step": 84465 }, { "epoch": 2.81, "grad_norm": 0.7110820412635803, "learning_rate": 5.906609264201112e-06, "loss": 1.6443, "step": 84466 }, { "epoch": 2.81, "grad_norm": 0.7381662726402283, "learning_rate": 5.904545586333753e-06, "loss": 1.7242, "step": 84467 }, { "epoch": 2.81, "grad_norm": 0.7252656817436218, "learning_rate": 5.902482265454933e-06, "loss": 1.6269, "step": 84468 }, { "epoch": 2.81, "grad_norm": 0.6989637613296509, "learning_rate": 5.900419301567116e-06, "loss": 1.6808, "step": 84469 }, { "epoch": 2.81, "grad_norm": 0.7135088443756104, "learning_rate": 5.898356694672868e-06, "loss": 1.6519, "step": 84470 }, { "epoch": 2.81, "grad_norm": 0.7400797009468079, "learning_rate": 5.896294444774719e-06, "loss": 1.6603, "step": 84471 }, { "epoch": 2.81, "grad_norm": 0.6977783441543579, "learning_rate": 5.8942325518751335e-06, "loss": 1.7079, "step": 84472 }, { "epoch": 2.81, "grad_norm": 0.7246866226196289, "learning_rate": 5.892171015976577e-06, "loss": 1.7238, "step": 84473 }, { "epoch": 2.81, "grad_norm": 0.7374703288078308, "learning_rate": 5.890109837081647e-06, "loss": 1.744, "step": 84474 }, { "epoch": 2.81, "grad_norm": 0.7185223698616028, "learning_rate": 5.888049015192742e-06, "loss": 1.6476, "step": 84475 }, { "epoch": 2.81, "grad_norm": 0.7228026986122131, "learning_rate": 5.885988550312426e-06, "loss": 1.696, "step": 84476 }, { "epoch": 2.81, "grad_norm": 0.728988766670227, "learning_rate": 5.883928442443164e-06, "loss": 1.6336, "step": 84477 }, { "epoch": 2.81, "grad_norm": 0.7111210227012634, "learning_rate": 5.881868691587488e-06, "loss": 1.7409, "step": 84478 }, { "epoch": 2.81, "grad_norm": 0.7041410803794861, "learning_rate": 5.8798092977479286e-06, "loss": 1.65, "step": 84479 }, { "epoch": 2.81, "grad_norm": 0.7255651354789734, "learning_rate": 5.877750260926883e-06, "loss": 1.6819, "step": 84480 }, { "epoch": 2.81, "grad_norm": 0.701177716255188, "learning_rate": 5.875691581126951e-06, "loss": 1.664, "step": 84481 }, { "epoch": 2.81, "grad_norm": 0.6906573176383972, "learning_rate": 5.873633258350596e-06, "loss": 1.6228, "step": 84482 }, { "epoch": 2.81, "grad_norm": 0.6938185691833496, "learning_rate": 5.87157529260025e-06, "loss": 1.6261, "step": 84483 }, { "epoch": 2.81, "grad_norm": 0.7164346575737, "learning_rate": 5.8695176838785444e-06, "loss": 1.6631, "step": 84484 }, { "epoch": 2.81, "grad_norm": 0.6992683410644531, "learning_rate": 5.867460432187876e-06, "loss": 1.6251, "step": 84485 }, { "epoch": 2.81, "grad_norm": 0.7364968657493591, "learning_rate": 5.865403537530777e-06, "loss": 1.6985, "step": 84486 }, { "epoch": 2.81, "grad_norm": 0.6960626244544983, "learning_rate": 5.863346999909713e-06, "loss": 1.6629, "step": 84487 }, { "epoch": 2.81, "grad_norm": 0.7321748733520508, "learning_rate": 5.861290819327247e-06, "loss": 1.7057, "step": 84488 }, { "epoch": 2.81, "grad_norm": 0.6969736218452454, "learning_rate": 5.859234995785844e-06, "loss": 1.6916, "step": 84489 }, { "epoch": 2.81, "grad_norm": 0.7228868007659912, "learning_rate": 5.857179529287936e-06, "loss": 1.6718, "step": 84490 }, { "epoch": 2.81, "grad_norm": 0.7166774272918701, "learning_rate": 5.855124419836121e-06, "loss": 1.6658, "step": 84491 }, { "epoch": 2.81, "grad_norm": 0.6942620873451233, "learning_rate": 5.8530696674328306e-06, "loss": 1.7267, "step": 84492 }, { "epoch": 2.81, "grad_norm": 0.7003746628761292, "learning_rate": 5.851015272080561e-06, "loss": 1.6624, "step": 84493 }, { "epoch": 2.81, "grad_norm": 0.7120843529701233, "learning_rate": 5.848961233781846e-06, "loss": 1.6161, "step": 84494 }, { "epoch": 2.81, "grad_norm": 0.7297305464744568, "learning_rate": 5.8469075525391155e-06, "loss": 1.742, "step": 84495 }, { "epoch": 2.81, "grad_norm": 0.7163364887237549, "learning_rate": 5.844854228354934e-06, "loss": 1.7561, "step": 84496 }, { "epoch": 2.81, "grad_norm": 0.7053717374801636, "learning_rate": 5.8428012612316995e-06, "loss": 1.6159, "step": 84497 }, { "epoch": 2.81, "grad_norm": 0.7020648121833801, "learning_rate": 5.8407486511720115e-06, "loss": 1.6737, "step": 84498 }, { "epoch": 2.81, "grad_norm": 0.6997067928314209, "learning_rate": 5.8386963981783e-06, "loss": 1.6488, "step": 84499 }, { "epoch": 2.81, "grad_norm": 0.7374035716056824, "learning_rate": 5.836644502253063e-06, "loss": 1.6873, "step": 84500 }, { "epoch": 2.81, "grad_norm": 0.7253577709197998, "learning_rate": 5.834592963398832e-06, "loss": 1.6402, "step": 84501 }, { "epoch": 2.81, "grad_norm": 1.3530207872390747, "learning_rate": 5.832541781618006e-06, "loss": 1.6893, "step": 84502 }, { "epoch": 2.81, "grad_norm": 0.7136094570159912, "learning_rate": 5.8304909569131804e-06, "loss": 1.6897, "step": 84503 }, { "epoch": 2.81, "grad_norm": 0.7204465270042419, "learning_rate": 5.82844048928679e-06, "loss": 1.6999, "step": 84504 }, { "epoch": 2.81, "grad_norm": 0.7399922609329224, "learning_rate": 5.826390378741297e-06, "loss": 1.6022, "step": 84505 }, { "epoch": 2.81, "grad_norm": 0.7188627123832703, "learning_rate": 5.8243406252792004e-06, "loss": 1.6431, "step": 84506 }, { "epoch": 2.81, "grad_norm": 0.7225369811058044, "learning_rate": 5.822291228903064e-06, "loss": 1.7047, "step": 84507 }, { "epoch": 2.81, "grad_norm": 0.7235592007637024, "learning_rate": 5.8202421896153205e-06, "loss": 1.7273, "step": 84508 }, { "epoch": 2.81, "grad_norm": 0.7063894867897034, "learning_rate": 5.8181935074184e-06, "loss": 1.7105, "step": 84509 }, { "epoch": 2.81, "grad_norm": 0.708717405796051, "learning_rate": 5.816145182314902e-06, "loss": 1.7312, "step": 84510 }, { "epoch": 2.81, "grad_norm": 0.7147403955459595, "learning_rate": 5.814097214307256e-06, "loss": 1.6946, "step": 84511 }, { "epoch": 2.81, "grad_norm": 0.7264346480369568, "learning_rate": 5.812049603397928e-06, "loss": 1.6665, "step": 84512 }, { "epoch": 2.81, "grad_norm": 0.7273508310317993, "learning_rate": 5.810002349589416e-06, "loss": 1.6129, "step": 84513 }, { "epoch": 2.81, "grad_norm": 0.7222713828086853, "learning_rate": 5.80795545288425e-06, "loss": 1.6736, "step": 84514 }, { "epoch": 2.81, "grad_norm": 0.6848253607749939, "learning_rate": 5.8059089132848625e-06, "loss": 1.6344, "step": 84515 }, { "epoch": 2.81, "grad_norm": 0.70729660987854, "learning_rate": 5.803862730793718e-06, "loss": 1.6344, "step": 84516 }, { "epoch": 2.81, "grad_norm": 0.7036588191986084, "learning_rate": 5.801816905413381e-06, "loss": 1.6859, "step": 84517 }, { "epoch": 2.81, "grad_norm": 0.7147745490074158, "learning_rate": 5.799771437146317e-06, "loss": 1.6497, "step": 84518 }, { "epoch": 2.81, "grad_norm": 0.7145417332649231, "learning_rate": 5.7977263259949224e-06, "loss": 1.7212, "step": 84519 }, { "epoch": 2.81, "grad_norm": 0.7159295678138733, "learning_rate": 5.79568157196173e-06, "loss": 1.6715, "step": 84520 }, { "epoch": 2.81, "grad_norm": 0.7384476661682129, "learning_rate": 5.793637175049271e-06, "loss": 1.6619, "step": 84521 }, { "epoch": 2.81, "grad_norm": 0.6959695219993591, "learning_rate": 5.7915931352600086e-06, "loss": 1.6195, "step": 84522 }, { "epoch": 2.81, "grad_norm": 0.7350533604621887, "learning_rate": 5.789549452596343e-06, "loss": 1.6881, "step": 84523 }, { "epoch": 2.81, "grad_norm": 0.7084227800369263, "learning_rate": 5.78750612706087e-06, "loss": 1.755, "step": 84524 }, { "epoch": 2.81, "grad_norm": 0.7258176207542419, "learning_rate": 5.785463158655989e-06, "loss": 1.6869, "step": 84525 }, { "epoch": 2.81, "grad_norm": 0.7050439119338989, "learning_rate": 5.7834205473841985e-06, "loss": 1.6341, "step": 84526 }, { "epoch": 2.81, "grad_norm": 0.7254827618598938, "learning_rate": 5.781378293247962e-06, "loss": 1.7269, "step": 84527 }, { "epoch": 2.81, "grad_norm": 0.7091753482818604, "learning_rate": 5.779336396249812e-06, "loss": 1.6865, "step": 84528 }, { "epoch": 2.81, "grad_norm": 0.7316339612007141, "learning_rate": 5.7772948563922115e-06, "loss": 1.6471, "step": 84529 }, { "epoch": 2.81, "grad_norm": 0.683582603931427, "learning_rate": 5.775253673677594e-06, "loss": 1.7081, "step": 84530 }, { "epoch": 2.81, "grad_norm": 0.7202828526496887, "learning_rate": 5.773212848108488e-06, "loss": 1.6383, "step": 84531 }, { "epoch": 2.81, "grad_norm": 0.7060709595680237, "learning_rate": 5.771172379687361e-06, "loss": 1.7108, "step": 84532 }, { "epoch": 2.81, "grad_norm": 0.691389799118042, "learning_rate": 5.769132268416643e-06, "loss": 1.6597, "step": 84533 }, { "epoch": 2.81, "grad_norm": 0.7228136658668518, "learning_rate": 5.767092514298865e-06, "loss": 1.7417, "step": 84534 }, { "epoch": 2.81, "grad_norm": 0.7146620154380798, "learning_rate": 5.76505311733646e-06, "loss": 1.6567, "step": 84535 }, { "epoch": 2.81, "grad_norm": 0.7304323315620422, "learning_rate": 5.763014077531958e-06, "loss": 1.6581, "step": 84536 }, { "epoch": 2.81, "grad_norm": 0.6970721483230591, "learning_rate": 5.7609753948877565e-06, "loss": 1.639, "step": 84537 }, { "epoch": 2.81, "grad_norm": 0.7233812808990479, "learning_rate": 5.758937069406422e-06, "loss": 1.7406, "step": 84538 }, { "epoch": 2.81, "grad_norm": 0.7206310033798218, "learning_rate": 5.756899101090384e-06, "loss": 1.657, "step": 84539 }, { "epoch": 2.81, "grad_norm": 0.7308751344680786, "learning_rate": 5.754861489942108e-06, "loss": 1.6321, "step": 84540 }, { "epoch": 2.81, "grad_norm": 0.730388343334198, "learning_rate": 5.75282423596406e-06, "loss": 1.7151, "step": 84541 }, { "epoch": 2.81, "grad_norm": 0.7209396362304688, "learning_rate": 5.750787339158736e-06, "loss": 1.6379, "step": 84542 }, { "epoch": 2.81, "grad_norm": 0.7233030796051025, "learning_rate": 5.748750799528601e-06, "loss": 1.7274, "step": 84543 }, { "epoch": 2.81, "grad_norm": 0.7329145669937134, "learning_rate": 5.746714617076154e-06, "loss": 1.6282, "step": 84544 }, { "epoch": 2.81, "grad_norm": 0.6760588884353638, "learning_rate": 5.744678791803792e-06, "loss": 1.6728, "step": 84545 }, { "epoch": 2.81, "grad_norm": 0.7218573689460754, "learning_rate": 5.74264332371408e-06, "loss": 1.7458, "step": 84546 }, { "epoch": 2.81, "grad_norm": 0.7349892854690552, "learning_rate": 5.7406082128093835e-06, "loss": 1.7657, "step": 84547 }, { "epoch": 2.81, "grad_norm": 0.7366811633110046, "learning_rate": 5.7385734590922994e-06, "loss": 1.7257, "step": 84548 }, { "epoch": 2.81, "grad_norm": 0.6908750534057617, "learning_rate": 5.7365390625651595e-06, "loss": 1.642, "step": 84549 }, { "epoch": 2.81, "grad_norm": 0.7426672577857971, "learning_rate": 5.734505023230562e-06, "loss": 1.6551, "step": 84550 }, { "epoch": 2.81, "grad_norm": 0.7184062004089355, "learning_rate": 5.732471341090905e-06, "loss": 1.6889, "step": 84551 }, { "epoch": 2.81, "grad_norm": 0.7352488040924072, "learning_rate": 5.730438016148653e-06, "loss": 1.6894, "step": 84552 }, { "epoch": 2.81, "grad_norm": 0.7101109027862549, "learning_rate": 5.728405048406304e-06, "loss": 1.6127, "step": 84553 }, { "epoch": 2.81, "grad_norm": 0.731007993221283, "learning_rate": 5.726372437866322e-06, "loss": 1.7603, "step": 84554 }, { "epoch": 2.81, "grad_norm": 0.7144621014595032, "learning_rate": 5.72434018453114e-06, "loss": 1.6313, "step": 84555 }, { "epoch": 2.81, "grad_norm": 0.7182532548904419, "learning_rate": 5.722308288403221e-06, "loss": 1.6807, "step": 84556 }, { "epoch": 2.81, "grad_norm": 0.7129697799682617, "learning_rate": 5.720276749485131e-06, "loss": 1.7272, "step": 84557 }, { "epoch": 2.81, "grad_norm": 0.6991481184959412, "learning_rate": 5.7182455677792334e-06, "loss": 1.7126, "step": 84558 }, { "epoch": 2.81, "grad_norm": 0.7181753516197205, "learning_rate": 5.716214743287995e-06, "loss": 1.7113, "step": 84559 }, { "epoch": 2.81, "grad_norm": 0.7021126747131348, "learning_rate": 5.714184276013978e-06, "loss": 1.6504, "step": 84560 }, { "epoch": 2.81, "grad_norm": 0.6921331882476807, "learning_rate": 5.712154165959548e-06, "loss": 1.6552, "step": 84561 }, { "epoch": 2.81, "grad_norm": 0.7120548486709595, "learning_rate": 5.71012441312717e-06, "loss": 1.7656, "step": 84562 }, { "epoch": 2.81, "grad_norm": 0.7119525671005249, "learning_rate": 5.708095017519343e-06, "loss": 1.6467, "step": 84563 }, { "epoch": 2.81, "grad_norm": 0.7206401824951172, "learning_rate": 5.7060659791385964e-06, "loss": 1.7187, "step": 84564 }, { "epoch": 2.81, "grad_norm": 0.7292618751525879, "learning_rate": 5.704037297987263e-06, "loss": 1.6712, "step": 84565 }, { "epoch": 2.81, "grad_norm": 0.6978608965873718, "learning_rate": 5.702008974067873e-06, "loss": 1.6863, "step": 84566 }, { "epoch": 2.81, "grad_norm": 0.7263687252998352, "learning_rate": 5.699981007382892e-06, "loss": 1.6401, "step": 84567 }, { "epoch": 2.81, "grad_norm": 0.7086201906204224, "learning_rate": 5.6979533979347844e-06, "loss": 1.7297, "step": 84568 }, { "epoch": 2.81, "grad_norm": 0.7224850654602051, "learning_rate": 5.695926145725949e-06, "loss": 1.6647, "step": 84569 }, { "epoch": 2.81, "grad_norm": 0.7402151823043823, "learning_rate": 5.6938992507589155e-06, "loss": 1.6395, "step": 84570 }, { "epoch": 2.81, "grad_norm": 0.712993860244751, "learning_rate": 5.691872713036183e-06, "loss": 1.6485, "step": 84571 }, { "epoch": 2.81, "grad_norm": 0.7105027437210083, "learning_rate": 5.689846532560083e-06, "loss": 1.7318, "step": 84572 }, { "epoch": 2.81, "grad_norm": 0.7086361050605774, "learning_rate": 5.687820709333146e-06, "loss": 1.7345, "step": 84573 }, { "epoch": 2.81, "grad_norm": 0.7292526960372925, "learning_rate": 5.685795243357871e-06, "loss": 1.7099, "step": 84574 }, { "epoch": 2.81, "grad_norm": 0.7128044962882996, "learning_rate": 5.683770134636656e-06, "loss": 1.6216, "step": 84575 }, { "epoch": 2.81, "grad_norm": 0.7409716844558716, "learning_rate": 5.681745383171965e-06, "loss": 1.7335, "step": 84576 }, { "epoch": 2.81, "grad_norm": 0.7123154997825623, "learning_rate": 5.679720988966263e-06, "loss": 1.74, "step": 84577 }, { "epoch": 2.81, "grad_norm": 0.7142699956893921, "learning_rate": 5.677696952022082e-06, "loss": 1.6846, "step": 84578 }, { "epoch": 2.81, "grad_norm": 0.7234339714050293, "learning_rate": 5.675673272341719e-06, "loss": 1.6971, "step": 84579 }, { "epoch": 2.81, "grad_norm": 0.7402081489562988, "learning_rate": 5.673649949927772e-06, "loss": 1.7111, "step": 84580 }, { "epoch": 2.81, "grad_norm": 0.7111368179321289, "learning_rate": 5.67162698478264e-06, "loss": 1.6888, "step": 84581 }, { "epoch": 2.81, "grad_norm": 0.7188436985015869, "learning_rate": 5.669604376908787e-06, "loss": 1.682, "step": 84582 }, { "epoch": 2.81, "grad_norm": 0.7103464603424072, "learning_rate": 5.667582126308645e-06, "loss": 1.696, "step": 84583 }, { "epoch": 2.81, "grad_norm": 0.7036585807800293, "learning_rate": 5.665560232984745e-06, "loss": 1.6611, "step": 84584 }, { "epoch": 2.81, "grad_norm": 0.6831361651420593, "learning_rate": 5.663538696939452e-06, "loss": 1.5891, "step": 84585 }, { "epoch": 2.81, "grad_norm": 0.7283775210380554, "learning_rate": 5.661517518175229e-06, "loss": 1.7574, "step": 84586 }, { "epoch": 2.81, "grad_norm": 0.7094552516937256, "learning_rate": 5.659496696694577e-06, "loss": 1.7076, "step": 84587 }, { "epoch": 2.81, "grad_norm": 0.7196279168128967, "learning_rate": 5.6574762324999246e-06, "loss": 1.7263, "step": 84588 }, { "epoch": 2.81, "grad_norm": 0.7158977389335632, "learning_rate": 5.655456125593738e-06, "loss": 1.5857, "step": 84589 }, { "epoch": 2.81, "grad_norm": 0.720426619052887, "learning_rate": 5.653436375978449e-06, "loss": 1.6608, "step": 84590 }, { "epoch": 2.81, "grad_norm": 0.7147615551948547, "learning_rate": 5.65141698365652e-06, "loss": 1.6594, "step": 84591 }, { "epoch": 2.81, "grad_norm": 0.739757239818573, "learning_rate": 5.649397948630419e-06, "loss": 1.7361, "step": 84592 }, { "epoch": 2.81, "grad_norm": 0.7410839200019836, "learning_rate": 5.6473792709025415e-06, "loss": 1.6783, "step": 84593 }, { "epoch": 2.81, "grad_norm": 0.7167699337005615, "learning_rate": 5.6453609504753864e-06, "loss": 1.6437, "step": 84594 }, { "epoch": 2.81, "grad_norm": 0.6964977383613586, "learning_rate": 5.643342987351418e-06, "loss": 1.7004, "step": 84595 }, { "epoch": 2.81, "grad_norm": 0.7150110602378845, "learning_rate": 5.6413253815330354e-06, "loss": 1.7737, "step": 84596 }, { "epoch": 2.81, "grad_norm": 0.6949874758720398, "learning_rate": 5.639308133022735e-06, "loss": 1.6166, "step": 84597 }, { "epoch": 2.81, "grad_norm": 0.7173519730567932, "learning_rate": 5.637291241822916e-06, "loss": 1.6831, "step": 84598 }, { "epoch": 2.81, "grad_norm": 0.7210108637809753, "learning_rate": 5.635274707936077e-06, "loss": 1.7654, "step": 84599 }, { "epoch": 2.81, "grad_norm": 0.7396689653396606, "learning_rate": 5.633258531364615e-06, "loss": 1.6806, "step": 84600 }, { "epoch": 2.81, "grad_norm": 0.7028835415840149, "learning_rate": 5.6312427121110286e-06, "loss": 1.7125, "step": 84601 }, { "epoch": 2.81, "grad_norm": 0.7050998210906982, "learning_rate": 5.629227250177715e-06, "loss": 1.6193, "step": 84602 }, { "epoch": 2.81, "grad_norm": 0.7325022220611572, "learning_rate": 5.627212145567173e-06, "loss": 1.6543, "step": 84603 }, { "epoch": 2.81, "grad_norm": 0.7199437022209167, "learning_rate": 5.6251973982818e-06, "loss": 1.6167, "step": 84604 }, { "epoch": 2.81, "grad_norm": 0.7518654465675354, "learning_rate": 5.623183008324061e-06, "loss": 1.6806, "step": 84605 }, { "epoch": 2.81, "grad_norm": 0.7050173282623291, "learning_rate": 5.6211689756964215e-06, "loss": 1.7261, "step": 84606 }, { "epoch": 2.81, "grad_norm": 0.7210595011711121, "learning_rate": 5.619155300401313e-06, "loss": 1.7318, "step": 84607 }, { "epoch": 2.81, "grad_norm": 0.7115272283554077, "learning_rate": 5.617141982441164e-06, "loss": 1.6727, "step": 84608 }, { "epoch": 2.81, "grad_norm": 0.7022706866264343, "learning_rate": 5.61512902181841e-06, "loss": 1.6588, "step": 84609 }, { "epoch": 2.81, "grad_norm": 0.7000835537910461, "learning_rate": 5.613116418535546e-06, "loss": 1.7223, "step": 84610 }, { "epoch": 2.82, "grad_norm": 0.7080065608024597, "learning_rate": 5.611104172594971e-06, "loss": 1.6454, "step": 84611 }, { "epoch": 2.82, "grad_norm": 0.7133525609970093, "learning_rate": 5.609092283999117e-06, "loss": 1.7491, "step": 84612 }, { "epoch": 2.82, "grad_norm": 0.7152618169784546, "learning_rate": 5.607080752750481e-06, "loss": 1.6889, "step": 84613 }, { "epoch": 2.82, "grad_norm": 0.7062861323356628, "learning_rate": 5.605069578851462e-06, "loss": 1.7389, "step": 84614 }, { "epoch": 2.82, "grad_norm": 0.72725510597229, "learning_rate": 5.603058762304524e-06, "loss": 1.7216, "step": 84615 }, { "epoch": 2.82, "grad_norm": 0.7274071574211121, "learning_rate": 5.601048303112066e-06, "loss": 1.6709, "step": 84616 }, { "epoch": 2.82, "grad_norm": 0.7032049298286438, "learning_rate": 5.599038201276584e-06, "loss": 1.6085, "step": 84617 }, { "epoch": 2.82, "grad_norm": 0.711513102054596, "learning_rate": 5.597028456800479e-06, "loss": 1.7171, "step": 84618 }, { "epoch": 2.82, "grad_norm": 0.7396396398544312, "learning_rate": 5.59501906968618e-06, "loss": 1.6683, "step": 84619 }, { "epoch": 2.82, "grad_norm": 0.7146666049957275, "learning_rate": 5.593010039936186e-06, "loss": 1.6651, "step": 84620 }, { "epoch": 2.82, "grad_norm": 0.7056381702423096, "learning_rate": 5.591001367552894e-06, "loss": 1.6814, "step": 84621 }, { "epoch": 2.82, "grad_norm": 0.7252693772315979, "learning_rate": 5.5889930525387375e-06, "loss": 1.7028, "step": 84622 }, { "epoch": 2.82, "grad_norm": 0.7140040397644043, "learning_rate": 5.586985094896146e-06, "loss": 1.6489, "step": 84623 }, { "epoch": 2.82, "grad_norm": 0.7278359532356262, "learning_rate": 5.584977494627618e-06, "loss": 1.6972, "step": 84624 }, { "epoch": 2.82, "grad_norm": 0.6923918724060059, "learning_rate": 5.582970251735519e-06, "loss": 1.6615, "step": 84625 }, { "epoch": 2.82, "grad_norm": 0.7179192304611206, "learning_rate": 5.580963366222313e-06, "loss": 1.6355, "step": 84626 }, { "epoch": 2.82, "grad_norm": 0.742712140083313, "learning_rate": 5.578956838090465e-06, "loss": 1.6901, "step": 84627 }, { "epoch": 2.82, "grad_norm": 0.7026888132095337, "learning_rate": 5.576950667342372e-06, "loss": 1.6655, "step": 84628 }, { "epoch": 2.82, "grad_norm": 0.6981796622276306, "learning_rate": 5.5749448539804344e-06, "loss": 1.6692, "step": 84629 }, { "epoch": 2.82, "grad_norm": 0.722037672996521, "learning_rate": 5.572939398007182e-06, "loss": 1.6857, "step": 84630 }, { "epoch": 2.82, "grad_norm": 0.7194443345069885, "learning_rate": 5.570934299424979e-06, "loss": 1.6839, "step": 84631 }, { "epoch": 2.82, "grad_norm": 0.7412365078926086, "learning_rate": 5.56892955823629e-06, "loss": 1.6696, "step": 84632 }, { "epoch": 2.82, "grad_norm": 0.7236647009849548, "learning_rate": 5.566925174443515e-06, "loss": 1.7182, "step": 84633 }, { "epoch": 2.82, "grad_norm": 0.7067754864692688, "learning_rate": 5.564921148049118e-06, "loss": 1.6214, "step": 84634 }, { "epoch": 2.82, "grad_norm": 0.7132712006568909, "learning_rate": 5.562917479055562e-06, "loss": 1.5767, "step": 84635 }, { "epoch": 2.82, "grad_norm": 0.7094890475273132, "learning_rate": 5.560914167465213e-06, "loss": 1.6791, "step": 84636 }, { "epoch": 2.82, "grad_norm": 0.7087006568908691, "learning_rate": 5.558911213280504e-06, "loss": 1.6635, "step": 84637 }, { "epoch": 2.82, "grad_norm": 0.7180798053741455, "learning_rate": 5.55690861650393e-06, "loss": 1.7404, "step": 84638 }, { "epoch": 2.82, "grad_norm": 0.7245296835899353, "learning_rate": 5.554906377137891e-06, "loss": 1.6344, "step": 84639 }, { "epoch": 2.82, "grad_norm": 0.7198565006256104, "learning_rate": 5.552904495184784e-06, "loss": 1.6921, "step": 84640 }, { "epoch": 2.82, "grad_norm": 0.7060918211936951, "learning_rate": 5.550902970647076e-06, "loss": 1.7273, "step": 84641 }, { "epoch": 2.82, "grad_norm": 0.7180790901184082, "learning_rate": 5.548901803527195e-06, "loss": 1.6731, "step": 84642 }, { "epoch": 2.82, "grad_norm": 0.6938266158103943, "learning_rate": 5.546900993827541e-06, "loss": 1.6909, "step": 84643 }, { "epoch": 2.82, "grad_norm": 0.7245386838912964, "learning_rate": 5.544900541550578e-06, "loss": 1.6169, "step": 84644 }, { "epoch": 2.82, "grad_norm": 0.7239657640457153, "learning_rate": 5.542900446698706e-06, "loss": 1.7153, "step": 84645 }, { "epoch": 2.82, "grad_norm": 0.707645833492279, "learning_rate": 5.5409007092743875e-06, "loss": 1.7127, "step": 84646 }, { "epoch": 2.82, "grad_norm": 0.7225365042686462, "learning_rate": 5.538901329280021e-06, "loss": 1.7466, "step": 84647 }, { "epoch": 2.82, "grad_norm": 0.6991146802902222, "learning_rate": 5.536902306718038e-06, "loss": 1.6703, "step": 84648 }, { "epoch": 2.82, "grad_norm": 0.7150567770004272, "learning_rate": 5.534903641590904e-06, "loss": 1.6133, "step": 84649 }, { "epoch": 2.82, "grad_norm": 0.7059431672096252, "learning_rate": 5.53290533390095e-06, "loss": 1.6433, "step": 84650 }, { "epoch": 2.82, "grad_norm": 0.7153822779655457, "learning_rate": 5.530907383650707e-06, "loss": 1.6885, "step": 84651 }, { "epoch": 2.82, "grad_norm": 0.7174370884895325, "learning_rate": 5.528909790842506e-06, "loss": 1.6474, "step": 84652 }, { "epoch": 2.82, "grad_norm": 0.7066234946250916, "learning_rate": 5.52691255547888e-06, "loss": 1.7473, "step": 84653 }, { "epoch": 2.82, "grad_norm": 0.7258890271186829, "learning_rate": 5.524915677562125e-06, "loss": 1.6625, "step": 84654 }, { "epoch": 2.82, "grad_norm": 0.6892926692962646, "learning_rate": 5.5229191570948076e-06, "loss": 1.6579, "step": 84655 }, { "epoch": 2.82, "grad_norm": 0.7162964344024658, "learning_rate": 5.520922994079258e-06, "loss": 1.6592, "step": 84656 }, { "epoch": 2.82, "grad_norm": 0.7085299491882324, "learning_rate": 5.518927188517874e-06, "loss": 1.6473, "step": 84657 }, { "epoch": 2.82, "grad_norm": 0.7105403542518616, "learning_rate": 5.5169317404131556e-06, "loss": 1.7289, "step": 84658 }, { "epoch": 2.82, "grad_norm": 0.7575915455818176, "learning_rate": 5.514936649767465e-06, "loss": 1.609, "step": 84659 }, { "epoch": 2.82, "grad_norm": 0.7078133821487427, "learning_rate": 5.512941916583302e-06, "loss": 1.6439, "step": 84660 }, { "epoch": 2.82, "grad_norm": 0.7323570847511292, "learning_rate": 5.510947540862998e-06, "loss": 1.71, "step": 84661 }, { "epoch": 2.82, "grad_norm": 0.7026379704475403, "learning_rate": 5.5089535226090165e-06, "loss": 1.6951, "step": 84662 }, { "epoch": 2.82, "grad_norm": 0.728670060634613, "learning_rate": 5.5069598618237895e-06, "loss": 1.7311, "step": 84663 }, { "epoch": 2.82, "grad_norm": 0.7182386517524719, "learning_rate": 5.5049665585097155e-06, "loss": 1.6782, "step": 84664 }, { "epoch": 2.82, "grad_norm": 0.7102205753326416, "learning_rate": 5.5029736126692256e-06, "loss": 1.7804, "step": 84665 }, { "epoch": 2.82, "grad_norm": 0.7089928388595581, "learning_rate": 5.500981024304718e-06, "loss": 1.6261, "step": 84666 }, { "epoch": 2.82, "grad_norm": 0.72845458984375, "learning_rate": 5.498988793418624e-06, "loss": 1.6787, "step": 84667 }, { "epoch": 2.82, "grad_norm": 0.7303411960601807, "learning_rate": 5.4969969200133745e-06, "loss": 1.6835, "step": 84668 }, { "epoch": 2.82, "grad_norm": 0.7116398215293884, "learning_rate": 5.495005404091368e-06, "loss": 1.6196, "step": 84669 }, { "epoch": 2.82, "grad_norm": 0.7145037055015564, "learning_rate": 5.4930142456550695e-06, "loss": 1.7115, "step": 84670 }, { "epoch": 2.82, "grad_norm": 0.7186251878738403, "learning_rate": 5.4910234447068434e-06, "loss": 1.6527, "step": 84671 }, { "epoch": 2.82, "grad_norm": 0.7009128928184509, "learning_rate": 5.489033001249088e-06, "loss": 1.6834, "step": 84672 }, { "epoch": 2.82, "grad_norm": 0.7214904427528381, "learning_rate": 5.487042915284268e-06, "loss": 1.673, "step": 84673 }, { "epoch": 2.82, "grad_norm": 0.7014763951301575, "learning_rate": 5.485053186814781e-06, "loss": 1.7518, "step": 84674 }, { "epoch": 2.82, "grad_norm": 0.7065169215202332, "learning_rate": 5.4830638158430584e-06, "loss": 1.6332, "step": 84675 }, { "epoch": 2.82, "grad_norm": 0.7152188420295715, "learning_rate": 5.481074802371499e-06, "loss": 1.6549, "step": 84676 }, { "epoch": 2.82, "grad_norm": 0.7221577167510986, "learning_rate": 5.479086146402534e-06, "loss": 1.682, "step": 84677 }, { "epoch": 2.82, "grad_norm": 0.7052589058876038, "learning_rate": 5.477097847938528e-06, "loss": 1.5987, "step": 84678 }, { "epoch": 2.82, "grad_norm": 0.7347151041030884, "learning_rate": 5.475109906981945e-06, "loss": 1.7557, "step": 84679 }, { "epoch": 2.82, "grad_norm": 0.7303403615951538, "learning_rate": 5.47312232353515e-06, "loss": 1.6443, "step": 84680 }, { "epoch": 2.82, "grad_norm": 0.7273374795913696, "learning_rate": 5.4711350976006765e-06, "loss": 1.7625, "step": 84681 }, { "epoch": 2.82, "grad_norm": 0.6961973309516907, "learning_rate": 5.469148229180787e-06, "loss": 1.6316, "step": 84682 }, { "epoch": 2.82, "grad_norm": 0.6978239417076111, "learning_rate": 5.467161718277946e-06, "loss": 1.6686, "step": 84683 }, { "epoch": 2.82, "grad_norm": 0.6955022811889648, "learning_rate": 5.465175564894586e-06, "loss": 1.686, "step": 84684 }, { "epoch": 2.82, "grad_norm": 0.7167719006538391, "learning_rate": 5.463189769033105e-06, "loss": 1.6669, "step": 84685 }, { "epoch": 2.82, "grad_norm": 0.728649914264679, "learning_rate": 5.461204330695901e-06, "loss": 1.6478, "step": 84686 }, { "epoch": 2.82, "grad_norm": 0.7198573350906372, "learning_rate": 5.459219249885404e-06, "loss": 1.585, "step": 84687 }, { "epoch": 2.82, "grad_norm": 0.7084653973579407, "learning_rate": 5.457234526604048e-06, "loss": 1.734, "step": 84688 }, { "epoch": 2.82, "grad_norm": 0.7132135629653931, "learning_rate": 5.455250160854197e-06, "loss": 1.6588, "step": 84689 }, { "epoch": 2.82, "grad_norm": 0.6949799656867981, "learning_rate": 5.453266152638247e-06, "loss": 1.6629, "step": 84690 }, { "epoch": 2.82, "grad_norm": 0.708641767501831, "learning_rate": 5.451282501958632e-06, "loss": 1.7456, "step": 84691 }, { "epoch": 2.82, "grad_norm": 0.7332030534744263, "learning_rate": 5.449299208817815e-06, "loss": 1.796, "step": 84692 }, { "epoch": 2.82, "grad_norm": 0.7239928245544434, "learning_rate": 5.4473162732180944e-06, "loss": 1.6602, "step": 84693 }, { "epoch": 2.82, "grad_norm": 0.694261908531189, "learning_rate": 5.445333695161902e-06, "loss": 1.687, "step": 84694 }, { "epoch": 2.82, "grad_norm": 0.7129697799682617, "learning_rate": 5.443351474651769e-06, "loss": 1.6944, "step": 84695 }, { "epoch": 2.82, "grad_norm": 1.0270346403121948, "learning_rate": 5.441369611689927e-06, "loss": 1.7135, "step": 84696 }, { "epoch": 2.82, "grad_norm": 0.7258687019348145, "learning_rate": 5.439388106278908e-06, "loss": 1.6674, "step": 84697 }, { "epoch": 2.82, "grad_norm": 0.7209175229072571, "learning_rate": 5.437406958421042e-06, "loss": 1.7367, "step": 84698 }, { "epoch": 2.82, "grad_norm": 0.7124007940292358, "learning_rate": 5.435426168118795e-06, "loss": 1.6581, "step": 84699 }, { "epoch": 2.82, "grad_norm": 0.7326035499572754, "learning_rate": 5.433445735374498e-06, "loss": 1.6418, "step": 84700 }, { "epoch": 2.82, "grad_norm": 0.7082297205924988, "learning_rate": 5.4314656601906146e-06, "loss": 1.6735, "step": 84701 }, { "epoch": 2.82, "grad_norm": 0.733809232711792, "learning_rate": 5.429485942569544e-06, "loss": 1.6876, "step": 84702 }, { "epoch": 2.82, "grad_norm": 0.6927711367607117, "learning_rate": 5.427506582513652e-06, "loss": 1.6113, "step": 84703 }, { "epoch": 2.82, "grad_norm": 0.7116513848304749, "learning_rate": 5.425527580025335e-06, "loss": 1.6915, "step": 84704 }, { "epoch": 2.82, "grad_norm": 0.7527936100959778, "learning_rate": 5.423548935107091e-06, "loss": 1.6931, "step": 84705 }, { "epoch": 2.82, "grad_norm": 0.699100911617279, "learning_rate": 5.42157064776122e-06, "loss": 1.6904, "step": 84706 }, { "epoch": 2.82, "grad_norm": 0.7162262797355652, "learning_rate": 5.419592717990151e-06, "loss": 1.6851, "step": 84707 }, { "epoch": 2.82, "grad_norm": 0.7099101543426514, "learning_rate": 5.417615145796317e-06, "loss": 1.6229, "step": 84708 }, { "epoch": 2.82, "grad_norm": 0.7356997132301331, "learning_rate": 5.4156379311820815e-06, "loss": 1.6379, "step": 84709 }, { "epoch": 2.82, "grad_norm": 0.7416170239448547, "learning_rate": 5.413661074149844e-06, "loss": 1.6959, "step": 84710 }, { "epoch": 2.82, "grad_norm": 0.7121900320053101, "learning_rate": 5.411684574702002e-06, "loss": 1.6327, "step": 84711 }, { "epoch": 2.82, "grad_norm": 0.7099292874336243, "learning_rate": 5.40970843284102e-06, "loss": 1.6685, "step": 84712 }, { "epoch": 2.82, "grad_norm": 0.7037827968597412, "learning_rate": 5.40773264856923e-06, "loss": 1.6281, "step": 84713 }, { "epoch": 2.82, "grad_norm": 0.7164273858070374, "learning_rate": 5.4057572218890285e-06, "loss": 1.5821, "step": 84714 }, { "epoch": 2.82, "grad_norm": 0.7162907719612122, "learning_rate": 5.4037821528028495e-06, "loss": 1.6159, "step": 84715 }, { "epoch": 2.82, "grad_norm": 0.7109611630439758, "learning_rate": 5.401807441313055e-06, "loss": 1.6768, "step": 84716 }, { "epoch": 2.82, "grad_norm": 0.7134533524513245, "learning_rate": 5.399833087422079e-06, "loss": 1.6882, "step": 84717 }, { "epoch": 2.82, "grad_norm": 0.694271981716156, "learning_rate": 5.397859091132284e-06, "loss": 1.7209, "step": 84718 }, { "epoch": 2.82, "grad_norm": 0.7251296043395996, "learning_rate": 5.395885452446069e-06, "loss": 1.6632, "step": 84719 }, { "epoch": 2.82, "grad_norm": 0.735687792301178, "learning_rate": 5.393912171365866e-06, "loss": 1.6458, "step": 84720 }, { "epoch": 2.82, "grad_norm": 0.7539585828781128, "learning_rate": 5.391939247894006e-06, "loss": 1.7416, "step": 84721 }, { "epoch": 2.82, "grad_norm": 0.718461811542511, "learning_rate": 5.389966682032987e-06, "loss": 1.6862, "step": 84722 }, { "epoch": 2.82, "grad_norm": 0.7157906889915466, "learning_rate": 5.387994473785107e-06, "loss": 1.684, "step": 84723 }, { "epoch": 2.82, "grad_norm": 0.711220383644104, "learning_rate": 5.3860226231527634e-06, "loss": 1.7006, "step": 84724 }, { "epoch": 2.82, "grad_norm": 0.7315189838409424, "learning_rate": 5.384051130138422e-06, "loss": 1.6718, "step": 84725 }, { "epoch": 2.82, "grad_norm": 0.7155025005340576, "learning_rate": 5.382079994744415e-06, "loss": 1.7201, "step": 84726 }, { "epoch": 2.82, "grad_norm": 0.699186384677887, "learning_rate": 5.380109216973138e-06, "loss": 1.6664, "step": 84727 }, { "epoch": 2.82, "grad_norm": 0.7204903364181519, "learning_rate": 5.378138796827025e-06, "loss": 1.741, "step": 84728 }, { "epoch": 2.82, "grad_norm": 0.7142680287361145, "learning_rate": 5.376168734308439e-06, "loss": 1.7028, "step": 84729 }, { "epoch": 2.82, "grad_norm": 0.751409649848938, "learning_rate": 5.374199029419779e-06, "loss": 1.7103, "step": 84730 }, { "epoch": 2.82, "grad_norm": 0.7108844518661499, "learning_rate": 5.372229682163376e-06, "loss": 1.6475, "step": 84731 }, { "epoch": 2.82, "grad_norm": 0.7303513884544373, "learning_rate": 5.370260692541728e-06, "loss": 1.6712, "step": 84732 }, { "epoch": 2.82, "grad_norm": 0.7219781279563904, "learning_rate": 5.368292060557167e-06, "loss": 1.6553, "step": 84733 }, { "epoch": 2.82, "grad_norm": 0.7446967959403992, "learning_rate": 5.36632378621209e-06, "loss": 1.6858, "step": 84734 }, { "epoch": 2.82, "grad_norm": 0.7050424218177795, "learning_rate": 5.364355869508863e-06, "loss": 1.6902, "step": 84735 }, { "epoch": 2.82, "grad_norm": 0.7178629636764526, "learning_rate": 5.362388310449884e-06, "loss": 1.6568, "step": 84736 }, { "epoch": 2.82, "grad_norm": 0.7142528891563416, "learning_rate": 5.360421109037583e-06, "loss": 1.6452, "step": 84737 }, { "epoch": 2.82, "grad_norm": 0.7063028216362, "learning_rate": 5.358454265274326e-06, "loss": 1.6322, "step": 84738 }, { "epoch": 2.82, "grad_norm": 0.7375496625900269, "learning_rate": 5.356487779162477e-06, "loss": 1.6348, "step": 84739 }, { "epoch": 2.82, "grad_norm": 0.6970670223236084, "learning_rate": 5.354521650704402e-06, "loss": 1.773, "step": 84740 }, { "epoch": 2.82, "grad_norm": 0.7096136212348938, "learning_rate": 5.352555879902564e-06, "loss": 1.6667, "step": 84741 }, { "epoch": 2.82, "grad_norm": 0.7147908210754395, "learning_rate": 5.35059046675933e-06, "loss": 1.6848, "step": 84742 }, { "epoch": 2.82, "grad_norm": 0.7296630144119263, "learning_rate": 5.34862541127703e-06, "loss": 1.6525, "step": 84743 }, { "epoch": 2.82, "grad_norm": 0.7261667847633362, "learning_rate": 5.346660713458062e-06, "loss": 1.6561, "step": 84744 }, { "epoch": 2.82, "grad_norm": 0.7221246957778931, "learning_rate": 5.3446963733048905e-06, "loss": 1.7015, "step": 84745 }, { "epoch": 2.82, "grad_norm": 0.7026962041854858, "learning_rate": 5.342732390819815e-06, "loss": 1.6751, "step": 84746 }, { "epoch": 2.82, "grad_norm": 0.730157732963562, "learning_rate": 5.340768766005232e-06, "loss": 1.7301, "step": 84747 }, { "epoch": 2.82, "grad_norm": 0.7401634454727173, "learning_rate": 5.3388054988635745e-06, "loss": 1.6779, "step": 84748 }, { "epoch": 2.82, "grad_norm": 0.755073606967926, "learning_rate": 5.336842589397173e-06, "loss": 1.627, "step": 84749 }, { "epoch": 2.82, "grad_norm": 0.7305670976638794, "learning_rate": 5.3348800376083914e-06, "loss": 1.7166, "step": 84750 }, { "epoch": 2.82, "grad_norm": 0.7207271456718445, "learning_rate": 5.332917843499696e-06, "loss": 1.6214, "step": 84751 }, { "epoch": 2.82, "grad_norm": 0.7370839715003967, "learning_rate": 5.330956007073417e-06, "loss": 1.7578, "step": 84752 }, { "epoch": 2.82, "grad_norm": 0.7274673581123352, "learning_rate": 5.32899452833192e-06, "loss": 1.7035, "step": 84753 }, { "epoch": 2.82, "grad_norm": 0.7126139998435974, "learning_rate": 5.327033407277603e-06, "loss": 1.7219, "step": 84754 }, { "epoch": 2.82, "grad_norm": 0.714167058467865, "learning_rate": 5.325072643912864e-06, "loss": 1.6734, "step": 84755 }, { "epoch": 2.82, "grad_norm": 0.7174511551856995, "learning_rate": 5.323112238240101e-06, "loss": 1.6682, "step": 84756 }, { "epoch": 2.82, "grad_norm": 0.7098531126976013, "learning_rate": 5.3211521902616125e-06, "loss": 1.6447, "step": 84757 }, { "epoch": 2.82, "grad_norm": 0.6987460255622864, "learning_rate": 5.31919249997983e-06, "loss": 1.6491, "step": 84758 }, { "epoch": 2.82, "grad_norm": 0.7061235904693604, "learning_rate": 5.31723316739715e-06, "loss": 1.7035, "step": 84759 }, { "epoch": 2.82, "grad_norm": 0.7261021733283997, "learning_rate": 5.315274192515906e-06, "loss": 1.6034, "step": 84760 }, { "epoch": 2.82, "grad_norm": 0.7031315565109253, "learning_rate": 5.3133155753384946e-06, "loss": 1.7137, "step": 84761 }, { "epoch": 2.82, "grad_norm": 0.7360368371009827, "learning_rate": 5.311357315867348e-06, "loss": 1.6678, "step": 84762 }, { "epoch": 2.82, "grad_norm": 0.6935992240905762, "learning_rate": 5.3093994141047646e-06, "loss": 1.6522, "step": 84763 }, { "epoch": 2.82, "grad_norm": 0.7069858312606812, "learning_rate": 5.307441870053142e-06, "loss": 1.6484, "step": 84764 }, { "epoch": 2.82, "grad_norm": 0.7134643197059631, "learning_rate": 5.305484683714878e-06, "loss": 1.7154, "step": 84765 }, { "epoch": 2.82, "grad_norm": 0.690399706363678, "learning_rate": 5.303527855092305e-06, "loss": 1.6311, "step": 84766 }, { "epoch": 2.82, "grad_norm": 0.7254294157028198, "learning_rate": 5.301571384187853e-06, "loss": 1.6947, "step": 84767 }, { "epoch": 2.82, "grad_norm": 0.7443127036094666, "learning_rate": 5.299615271003854e-06, "loss": 1.6994, "step": 84768 }, { "epoch": 2.82, "grad_norm": 0.7236685752868652, "learning_rate": 5.2976595155427075e-06, "loss": 1.6814, "step": 84769 }, { "epoch": 2.82, "grad_norm": 0.7421358227729797, "learning_rate": 5.2957041178068094e-06, "loss": 1.5905, "step": 84770 }, { "epoch": 2.82, "grad_norm": 0.7259116768836975, "learning_rate": 5.293749077798459e-06, "loss": 1.5926, "step": 84771 }, { "epoch": 2.82, "grad_norm": 0.7166339159011841, "learning_rate": 5.291794395520088e-06, "loss": 1.6821, "step": 84772 }, { "epoch": 2.82, "grad_norm": 0.7275989651679993, "learning_rate": 5.289840070974094e-06, "loss": 1.6593, "step": 84773 }, { "epoch": 2.82, "grad_norm": 0.7538553476333618, "learning_rate": 5.2878861041627415e-06, "loss": 1.6307, "step": 84774 }, { "epoch": 2.82, "grad_norm": 0.7384706139564514, "learning_rate": 5.28593249508853e-06, "loss": 1.7532, "step": 84775 }, { "epoch": 2.82, "grad_norm": 0.7089442610740662, "learning_rate": 5.283979243753722e-06, "loss": 1.6384, "step": 84776 }, { "epoch": 2.82, "grad_norm": 0.7110801935195923, "learning_rate": 5.282026350160784e-06, "loss": 1.6056, "step": 84777 }, { "epoch": 2.82, "grad_norm": 0.7411686778068542, "learning_rate": 5.280073814312047e-06, "loss": 1.7132, "step": 84778 }, { "epoch": 2.82, "grad_norm": 0.714415431022644, "learning_rate": 5.278121636209842e-06, "loss": 1.6491, "step": 84779 }, { "epoch": 2.82, "grad_norm": 0.705081582069397, "learning_rate": 5.276169815856601e-06, "loss": 1.6636, "step": 84780 }, { "epoch": 2.82, "grad_norm": 0.7264607548713684, "learning_rate": 5.2742183532546226e-06, "loss": 1.6723, "step": 84781 }, { "epoch": 2.82, "grad_norm": 0.6850084662437439, "learning_rate": 5.272267248406336e-06, "loss": 1.6288, "step": 84782 }, { "epoch": 2.82, "grad_norm": 0.7124112844467163, "learning_rate": 5.270316501314109e-06, "loss": 1.6426, "step": 84783 }, { "epoch": 2.82, "grad_norm": 0.751599133014679, "learning_rate": 5.268366111980271e-06, "loss": 1.7313, "step": 84784 }, { "epoch": 2.82, "grad_norm": 0.7107487916946411, "learning_rate": 5.266416080407221e-06, "loss": 1.6548, "step": 84785 }, { "epoch": 2.82, "grad_norm": 0.7246785759925842, "learning_rate": 5.26446640659729e-06, "loss": 1.72, "step": 84786 }, { "epoch": 2.82, "grad_norm": 0.7144066095352173, "learning_rate": 5.262517090552909e-06, "loss": 1.672, "step": 84787 }, { "epoch": 2.82, "grad_norm": 0.7068929076194763, "learning_rate": 5.260568132276411e-06, "loss": 1.6337, "step": 84788 }, { "epoch": 2.82, "grad_norm": 0.7081635594367981, "learning_rate": 5.2586195317701255e-06, "loss": 1.7167, "step": 84789 }, { "epoch": 2.82, "grad_norm": 0.7125139832496643, "learning_rate": 5.2566712890364185e-06, "loss": 1.6059, "step": 84790 }, { "epoch": 2.82, "grad_norm": 0.7648492455482483, "learning_rate": 5.254723404077721e-06, "loss": 1.6908, "step": 84791 }, { "epoch": 2.82, "grad_norm": 0.7178220152854919, "learning_rate": 5.252775876896398e-06, "loss": 1.7463, "step": 84792 }, { "epoch": 2.82, "grad_norm": 0.6974956393241882, "learning_rate": 5.2508287074947145e-06, "loss": 1.7307, "step": 84793 }, { "epoch": 2.82, "grad_norm": 0.7066496014595032, "learning_rate": 5.248881895875134e-06, "loss": 1.6644, "step": 84794 }, { "epoch": 2.82, "grad_norm": 0.7235285639762878, "learning_rate": 5.246935442039956e-06, "loss": 1.5863, "step": 84795 }, { "epoch": 2.82, "grad_norm": 0.7157194018363953, "learning_rate": 5.2449893459915795e-06, "loss": 1.7323, "step": 84796 }, { "epoch": 2.82, "grad_norm": 0.696563720703125, "learning_rate": 5.243043607732334e-06, "loss": 1.6161, "step": 84797 }, { "epoch": 2.82, "grad_norm": 0.7036587595939636, "learning_rate": 5.2410982272646505e-06, "loss": 1.6729, "step": 84798 }, { "epoch": 2.82, "grad_norm": 0.7037519216537476, "learning_rate": 5.239153204590796e-06, "loss": 1.7017, "step": 84799 }, { "epoch": 2.82, "grad_norm": 0.7623644471168518, "learning_rate": 5.2372085397132e-06, "loss": 1.738, "step": 84800 }, { "epoch": 2.82, "grad_norm": 0.688145637512207, "learning_rate": 5.235264232634195e-06, "loss": 1.6898, "step": 84801 }, { "epoch": 2.82, "grad_norm": 0.7470177412033081, "learning_rate": 5.233320283356179e-06, "loss": 1.7817, "step": 84802 }, { "epoch": 2.82, "grad_norm": 0.7092921137809753, "learning_rate": 5.231376691881417e-06, "loss": 1.6434, "step": 84803 }, { "epoch": 2.82, "grad_norm": 0.7353253960609436, "learning_rate": 5.2294334582123735e-06, "loss": 1.6889, "step": 84804 }, { "epoch": 2.82, "grad_norm": 0.7177830934524536, "learning_rate": 5.227490582351379e-06, "loss": 1.5733, "step": 84805 }, { "epoch": 2.82, "grad_norm": 0.7094946503639221, "learning_rate": 5.225548064300766e-06, "loss": 1.7203, "step": 84806 }, { "epoch": 2.82, "grad_norm": 0.6946574449539185, "learning_rate": 5.2236059040628664e-06, "loss": 1.6277, "step": 84807 }, { "epoch": 2.82, "grad_norm": 0.7112878561019897, "learning_rate": 5.221664101640111e-06, "loss": 1.7251, "step": 84808 }, { "epoch": 2.82, "grad_norm": 0.7068688869476318, "learning_rate": 5.2197226570348305e-06, "loss": 1.607, "step": 84809 }, { "epoch": 2.82, "grad_norm": 0.7192091345787048, "learning_rate": 5.217781570249357e-06, "loss": 1.6562, "step": 84810 }, { "epoch": 2.82, "grad_norm": 0.7205297350883484, "learning_rate": 5.215840841286056e-06, "loss": 1.6562, "step": 84811 }, { "epoch": 2.82, "grad_norm": 0.7174113988876343, "learning_rate": 5.213900470147325e-06, "loss": 1.6929, "step": 84812 }, { "epoch": 2.82, "grad_norm": 0.7064372301101685, "learning_rate": 5.211960456835462e-06, "loss": 1.7077, "step": 84813 }, { "epoch": 2.82, "grad_norm": 0.72488933801651, "learning_rate": 5.210020801352832e-06, "loss": 1.6626, "step": 84814 }, { "epoch": 2.82, "grad_norm": 0.6974496245384216, "learning_rate": 5.2080815037017995e-06, "loss": 1.6984, "step": 84815 }, { "epoch": 2.82, "grad_norm": 0.7376123070716858, "learning_rate": 5.2061425638847635e-06, "loss": 1.6938, "step": 84816 }, { "epoch": 2.82, "grad_norm": 0.7155008316040039, "learning_rate": 5.2042039819039875e-06, "loss": 1.7125, "step": 84817 }, { "epoch": 2.82, "grad_norm": 0.7324795722961426, "learning_rate": 5.202265757761903e-06, "loss": 1.7087, "step": 84818 }, { "epoch": 2.82, "grad_norm": 0.707088053226471, "learning_rate": 5.200327891460809e-06, "loss": 1.7184, "step": 84819 }, { "epoch": 2.82, "grad_norm": 0.7120850682258606, "learning_rate": 5.19839038300307e-06, "loss": 1.7323, "step": 84820 }, { "epoch": 2.82, "grad_norm": 0.6944456100463867, "learning_rate": 5.196453232391051e-06, "loss": 1.6336, "step": 84821 }, { "epoch": 2.82, "grad_norm": 0.7369551658630371, "learning_rate": 5.194516439627117e-06, "loss": 1.7692, "step": 84822 }, { "epoch": 2.82, "grad_norm": 0.7194834351539612, "learning_rate": 5.192580004713631e-06, "loss": 1.6906, "step": 84823 }, { "epoch": 2.82, "grad_norm": 0.7036744356155396, "learning_rate": 5.19064392765286e-06, "loss": 1.635, "step": 84824 }, { "epoch": 2.82, "grad_norm": 0.6958379745483398, "learning_rate": 5.188708208447234e-06, "loss": 1.6865, "step": 84825 }, { "epoch": 2.82, "grad_norm": 0.7655808329582214, "learning_rate": 5.186772847099085e-06, "loss": 1.6862, "step": 84826 }, { "epoch": 2.82, "grad_norm": 0.7476609349250793, "learning_rate": 5.1848378436107455e-06, "loss": 1.6748, "step": 84827 }, { "epoch": 2.82, "grad_norm": 0.7236590385437012, "learning_rate": 5.182903197984578e-06, "loss": 1.6433, "step": 84828 }, { "epoch": 2.82, "grad_norm": 0.7148109674453735, "learning_rate": 5.1809689102229155e-06, "loss": 1.7613, "step": 84829 }, { "epoch": 2.82, "grad_norm": 0.7147946357727051, "learning_rate": 5.179034980328156e-06, "loss": 1.6622, "step": 84830 }, { "epoch": 2.82, "grad_norm": 0.7232279777526855, "learning_rate": 5.177101408302564e-06, "loss": 1.7434, "step": 84831 }, { "epoch": 2.82, "grad_norm": 0.7109604477882385, "learning_rate": 5.175168194148538e-06, "loss": 1.6362, "step": 84832 }, { "epoch": 2.82, "grad_norm": 0.723049521446228, "learning_rate": 5.173235337868442e-06, "loss": 1.6909, "step": 84833 }, { "epoch": 2.82, "grad_norm": 0.6932013034820557, "learning_rate": 5.171302839464575e-06, "loss": 1.6234, "step": 84834 }, { "epoch": 2.82, "grad_norm": 0.6991833448410034, "learning_rate": 5.169370698939301e-06, "loss": 1.7363, "step": 84835 }, { "epoch": 2.82, "grad_norm": 0.715298593044281, "learning_rate": 5.167438916294986e-06, "loss": 1.6853, "step": 84836 }, { "epoch": 2.82, "grad_norm": 0.6936913728713989, "learning_rate": 5.16550749153396e-06, "loss": 1.633, "step": 84837 }, { "epoch": 2.82, "grad_norm": 0.7093436121940613, "learning_rate": 5.163576424658522e-06, "loss": 1.5924, "step": 84838 }, { "epoch": 2.82, "grad_norm": 0.699628472328186, "learning_rate": 5.161645715671103e-06, "loss": 1.6905, "step": 84839 }, { "epoch": 2.82, "grad_norm": 0.708788275718689, "learning_rate": 5.159715364574035e-06, "loss": 1.7154, "step": 84840 }, { "epoch": 2.82, "grad_norm": 0.7023673057556152, "learning_rate": 5.157785371369549e-06, "loss": 1.7058, "step": 84841 }, { "epoch": 2.82, "grad_norm": 0.6983683705329895, "learning_rate": 5.155855736060144e-06, "loss": 1.6911, "step": 84842 }, { "epoch": 2.82, "grad_norm": 0.7068371176719666, "learning_rate": 5.153926458648017e-06, "loss": 1.6479, "step": 84843 }, { "epoch": 2.82, "grad_norm": 0.7234398126602173, "learning_rate": 5.1519975391356325e-06, "loss": 1.6483, "step": 84844 }, { "epoch": 2.82, "grad_norm": 0.7090158462524414, "learning_rate": 5.1500689775252905e-06, "loss": 1.7153, "step": 84845 }, { "epoch": 2.82, "grad_norm": 0.717334508895874, "learning_rate": 5.148140773819287e-06, "loss": 1.6599, "step": 84846 }, { "epoch": 2.82, "grad_norm": 0.7061054706573486, "learning_rate": 5.1462129280199886e-06, "loss": 1.6826, "step": 84847 }, { "epoch": 2.82, "grad_norm": 0.7159286141395569, "learning_rate": 5.144285440129758e-06, "loss": 1.6552, "step": 84848 }, { "epoch": 2.82, "grad_norm": 0.7111492156982422, "learning_rate": 5.142358310150929e-06, "loss": 1.6374, "step": 84849 }, { "epoch": 2.82, "grad_norm": 0.6987879872322083, "learning_rate": 5.140431538085798e-06, "loss": 1.6521, "step": 84850 }, { "epoch": 2.82, "grad_norm": 0.7138738036155701, "learning_rate": 5.138505123936765e-06, "loss": 1.7374, "step": 84851 }, { "epoch": 2.82, "grad_norm": 0.6858060359954834, "learning_rate": 5.136579067706159e-06, "loss": 1.6631, "step": 84852 }, { "epoch": 2.82, "grad_norm": 0.7130612730979919, "learning_rate": 5.13465336939628e-06, "loss": 1.6586, "step": 84853 }, { "epoch": 2.82, "grad_norm": 0.7283089756965637, "learning_rate": 5.132728029009492e-06, "loss": 1.7041, "step": 84854 }, { "epoch": 2.82, "grad_norm": 0.7062923908233643, "learning_rate": 5.130803046548126e-06, "loss": 1.6811, "step": 84855 }, { "epoch": 2.82, "grad_norm": 0.7058293223381042, "learning_rate": 5.128878422014482e-06, "loss": 1.663, "step": 84856 }, { "epoch": 2.82, "grad_norm": 0.7101329565048218, "learning_rate": 5.126954155410956e-06, "loss": 1.6289, "step": 84857 }, { "epoch": 2.82, "grad_norm": 0.7175499200820923, "learning_rate": 5.125030246739881e-06, "loss": 1.6819, "step": 84858 }, { "epoch": 2.82, "grad_norm": 0.7070112228393555, "learning_rate": 5.123106696003554e-06, "loss": 1.7033, "step": 84859 }, { "epoch": 2.82, "grad_norm": 0.7245713472366333, "learning_rate": 5.121183503204307e-06, "loss": 1.761, "step": 84860 }, { "epoch": 2.82, "grad_norm": 0.7081893682479858, "learning_rate": 5.119260668344505e-06, "loss": 1.6119, "step": 84861 }, { "epoch": 2.82, "grad_norm": 0.710621178150177, "learning_rate": 5.117338191426512e-06, "loss": 1.7674, "step": 84862 }, { "epoch": 2.82, "grad_norm": 0.7273666262626648, "learning_rate": 5.1154160724525935e-06, "loss": 1.6073, "step": 84863 }, { "epoch": 2.82, "grad_norm": 0.7410369515419006, "learning_rate": 5.113494311425081e-06, "loss": 1.649, "step": 84864 }, { "epoch": 2.82, "grad_norm": 0.6936812400817871, "learning_rate": 5.111572908346373e-06, "loss": 1.6861, "step": 84865 }, { "epoch": 2.82, "grad_norm": 0.7208617925643921, "learning_rate": 5.1096518632187665e-06, "loss": 1.7059, "step": 84866 }, { "epoch": 2.82, "grad_norm": 0.7129029631614685, "learning_rate": 5.1077311760445936e-06, "loss": 1.6353, "step": 84867 }, { "epoch": 2.82, "grad_norm": 0.7034139037132263, "learning_rate": 5.105810846826153e-06, "loss": 1.7085, "step": 84868 }, { "epoch": 2.82, "grad_norm": 0.7435577511787415, "learning_rate": 5.103890875565874e-06, "loss": 1.7515, "step": 84869 }, { "epoch": 2.82, "grad_norm": 0.7111769318580627, "learning_rate": 5.101971262265958e-06, "loss": 1.707, "step": 84870 }, { "epoch": 2.82, "grad_norm": 0.6906779408454895, "learning_rate": 5.100052006928801e-06, "loss": 1.6925, "step": 84871 }, { "epoch": 2.82, "grad_norm": 0.7239329218864441, "learning_rate": 5.0981331095567345e-06, "loss": 1.6331, "step": 84872 }, { "epoch": 2.82, "grad_norm": 0.6999911665916443, "learning_rate": 5.096214570152124e-06, "loss": 1.6499, "step": 84873 }, { "epoch": 2.82, "grad_norm": 0.710813045501709, "learning_rate": 5.0942963887172e-06, "loss": 1.714, "step": 84874 }, { "epoch": 2.82, "grad_norm": 0.7219248414039612, "learning_rate": 5.092378565254396e-06, "loss": 1.7561, "step": 84875 }, { "epoch": 2.82, "grad_norm": 0.7252510190010071, "learning_rate": 5.090461099765975e-06, "loss": 1.7149, "step": 84876 }, { "epoch": 2.82, "grad_norm": 0.7360213398933411, "learning_rate": 5.088543992254268e-06, "loss": 1.6524, "step": 84877 }, { "epoch": 2.82, "grad_norm": 0.7223905920982361, "learning_rate": 5.086627242721608e-06, "loss": 1.6473, "step": 84878 }, { "epoch": 2.82, "grad_norm": 0.7339324355125427, "learning_rate": 5.08471085117036e-06, "loss": 1.7405, "step": 84879 }, { "epoch": 2.82, "grad_norm": 0.7222998738288879, "learning_rate": 5.082794817602787e-06, "loss": 1.6648, "step": 84880 }, { "epoch": 2.82, "grad_norm": 0.7149393558502197, "learning_rate": 5.0808791420212545e-06, "loss": 1.6936, "step": 84881 }, { "epoch": 2.82, "grad_norm": 0.6980138421058655, "learning_rate": 5.078963824428095e-06, "loss": 1.6672, "step": 84882 }, { "epoch": 2.82, "grad_norm": 0.7151352167129517, "learning_rate": 5.0770488648256395e-06, "loss": 1.7248, "step": 84883 }, { "epoch": 2.82, "grad_norm": 0.7278764247894287, "learning_rate": 5.075134263216152e-06, "loss": 1.7111, "step": 84884 }, { "epoch": 2.82, "grad_norm": 0.7043798565864563, "learning_rate": 5.073220019602031e-06, "loss": 1.6802, "step": 84885 }, { "epoch": 2.82, "grad_norm": 0.707690954208374, "learning_rate": 5.071306133985509e-06, "loss": 1.6583, "step": 84886 }, { "epoch": 2.82, "grad_norm": 0.6971388459205627, "learning_rate": 5.069392606369016e-06, "loss": 1.6735, "step": 84887 }, { "epoch": 2.82, "grad_norm": 0.720538854598999, "learning_rate": 5.067479436754784e-06, "loss": 1.6407, "step": 84888 }, { "epoch": 2.82, "grad_norm": 0.7129759788513184, "learning_rate": 5.065566625145212e-06, "loss": 1.653, "step": 84889 }, { "epoch": 2.82, "grad_norm": 0.6999219059944153, "learning_rate": 5.063654171542597e-06, "loss": 1.6953, "step": 84890 }, { "epoch": 2.82, "grad_norm": 0.7297405004501343, "learning_rate": 5.061742075949204e-06, "loss": 1.6765, "step": 84891 }, { "epoch": 2.82, "grad_norm": 0.7154021263122559, "learning_rate": 5.059830338367432e-06, "loss": 1.5952, "step": 84892 }, { "epoch": 2.82, "grad_norm": 0.7074235081672668, "learning_rate": 5.057918958799545e-06, "loss": 1.6108, "step": 84893 }, { "epoch": 2.82, "grad_norm": 0.7277186512947083, "learning_rate": 5.056007937247908e-06, "loss": 1.6315, "step": 84894 }, { "epoch": 2.82, "grad_norm": 0.7181740999221802, "learning_rate": 5.054097273714819e-06, "loss": 1.6947, "step": 84895 }, { "epoch": 2.82, "grad_norm": 0.7257813215255737, "learning_rate": 5.052186968202576e-06, "loss": 1.7337, "step": 84896 }, { "epoch": 2.82, "grad_norm": 0.706092119216919, "learning_rate": 5.050277020713545e-06, "loss": 1.6658, "step": 84897 }, { "epoch": 2.82, "grad_norm": 0.7132312059402466, "learning_rate": 5.048367431250022e-06, "loss": 1.737, "step": 84898 }, { "epoch": 2.82, "grad_norm": 0.7245402336120605, "learning_rate": 5.046458199814307e-06, "loss": 1.6341, "step": 84899 }, { "epoch": 2.82, "grad_norm": 0.7192668914794922, "learning_rate": 5.044549326408698e-06, "loss": 1.6471, "step": 84900 }, { "epoch": 2.82, "grad_norm": 0.7294384837150574, "learning_rate": 5.042640811035592e-06, "loss": 1.7503, "step": 84901 }, { "epoch": 2.82, "grad_norm": 0.6986597776412964, "learning_rate": 5.040732653697288e-06, "loss": 1.6875, "step": 84902 }, { "epoch": 2.82, "grad_norm": 0.718795120716095, "learning_rate": 5.038824854396017e-06, "loss": 1.6578, "step": 84903 }, { "epoch": 2.82, "grad_norm": 0.7411977052688599, "learning_rate": 5.036917413134178e-06, "loss": 1.7511, "step": 84904 }, { "epoch": 2.82, "grad_norm": 0.7208781242370605, "learning_rate": 5.0350103299140686e-06, "loss": 1.645, "step": 84905 }, { "epoch": 2.82, "grad_norm": 0.7298987507820129, "learning_rate": 5.033103604737954e-06, "loss": 1.6676, "step": 84906 }, { "epoch": 2.82, "grad_norm": 0.7266309261322021, "learning_rate": 5.031197237608198e-06, "loss": 1.6878, "step": 84907 }, { "epoch": 2.82, "grad_norm": 0.7460664510726929, "learning_rate": 5.0292912285271325e-06, "loss": 1.6754, "step": 84908 }, { "epoch": 2.82, "grad_norm": 0.7223484516143799, "learning_rate": 5.027385577497056e-06, "loss": 1.6786, "step": 84909 }, { "epoch": 2.82, "grad_norm": 0.7092345952987671, "learning_rate": 5.025480284520234e-06, "loss": 1.6352, "step": 84910 }, { "epoch": 2.82, "grad_norm": 0.8016958236694336, "learning_rate": 5.023575349599063e-06, "loss": 1.6775, "step": 84911 }, { "epoch": 2.83, "grad_norm": 0.7298030853271484, "learning_rate": 5.021670772735775e-06, "loss": 1.7095, "step": 84912 }, { "epoch": 2.83, "grad_norm": 0.7233594059944153, "learning_rate": 5.019766553932736e-06, "loss": 1.7686, "step": 84913 }, { "epoch": 2.83, "grad_norm": 0.718195915222168, "learning_rate": 5.0178626931922095e-06, "loss": 1.6878, "step": 84914 }, { "epoch": 2.83, "grad_norm": 0.700295627117157, "learning_rate": 5.015959190516561e-06, "loss": 1.6654, "step": 84915 }, { "epoch": 2.83, "grad_norm": 0.7332824468612671, "learning_rate": 5.0140560459080546e-06, "loss": 1.5996, "step": 84916 }, { "epoch": 2.83, "grad_norm": 0.6894644498825073, "learning_rate": 5.012153259369023e-06, "loss": 1.6163, "step": 84917 }, { "epoch": 2.83, "grad_norm": 0.7101451754570007, "learning_rate": 5.010250830901763e-06, "loss": 1.6716, "step": 84918 }, { "epoch": 2.83, "grad_norm": 0.7269331216812134, "learning_rate": 5.0083487605086406e-06, "loss": 1.7169, "step": 84919 }, { "epoch": 2.83, "grad_norm": 0.7020220756530762, "learning_rate": 5.0064470481918526e-06, "loss": 1.6764, "step": 84920 }, { "epoch": 2.83, "grad_norm": 0.7280166745185852, "learning_rate": 5.004545693953799e-06, "loss": 1.6333, "step": 84921 }, { "epoch": 2.83, "grad_norm": 0.7008727192878723, "learning_rate": 5.002644697796809e-06, "loss": 1.6466, "step": 84922 }, { "epoch": 2.83, "grad_norm": 0.7160087823867798, "learning_rate": 5.000744059723083e-06, "loss": 1.6466, "step": 84923 }, { "epoch": 2.83, "grad_norm": 0.7226203083992004, "learning_rate": 4.998843779734985e-06, "loss": 1.7214, "step": 84924 }, { "epoch": 2.83, "grad_norm": 0.7156966328620911, "learning_rate": 4.9969438578348455e-06, "loss": 1.7404, "step": 84925 }, { "epoch": 2.83, "grad_norm": 0.6927924752235413, "learning_rate": 4.995044294024963e-06, "loss": 1.6503, "step": 84926 }, { "epoch": 2.83, "grad_norm": 0.7122078537940979, "learning_rate": 4.993145088307604e-06, "loss": 1.6801, "step": 84927 }, { "epoch": 2.83, "grad_norm": 0.7031843662261963, "learning_rate": 4.991246240685099e-06, "loss": 1.6594, "step": 84928 }, { "epoch": 2.83, "grad_norm": 0.710185170173645, "learning_rate": 4.989347751159811e-06, "loss": 1.6955, "step": 84929 }, { "epoch": 2.83, "grad_norm": 0.7299142479896545, "learning_rate": 4.987449619733941e-06, "loss": 1.6955, "step": 84930 }, { "epoch": 2.83, "grad_norm": 0.7004104852676392, "learning_rate": 4.9855518464098185e-06, "loss": 1.6717, "step": 84931 }, { "epoch": 2.83, "grad_norm": 0.7296895384788513, "learning_rate": 4.983654431189777e-06, "loss": 1.669, "step": 84932 }, { "epoch": 2.83, "grad_norm": 0.7316220998764038, "learning_rate": 4.981757374076145e-06, "loss": 1.682, "step": 84933 }, { "epoch": 2.83, "grad_norm": 0.7172284126281738, "learning_rate": 4.979860675071157e-06, "loss": 1.7444, "step": 84934 }, { "epoch": 2.83, "grad_norm": 0.7210600972175598, "learning_rate": 4.977964334177176e-06, "loss": 1.6886, "step": 84935 }, { "epoch": 2.83, "grad_norm": 0.6803613305091858, "learning_rate": 4.976068351396467e-06, "loss": 1.6621, "step": 84936 }, { "epoch": 2.83, "grad_norm": 0.7291024327278137, "learning_rate": 4.974172726731329e-06, "loss": 1.6478, "step": 84937 }, { "epoch": 2.83, "grad_norm": 0.7359159588813782, "learning_rate": 4.97227746018406e-06, "loss": 1.6746, "step": 84938 }, { "epoch": 2.83, "grad_norm": 0.7295602560043335, "learning_rate": 4.970382551757024e-06, "loss": 1.6113, "step": 84939 }, { "epoch": 2.83, "grad_norm": 0.7105718851089478, "learning_rate": 4.968488001452453e-06, "loss": 1.6871, "step": 84940 }, { "epoch": 2.83, "grad_norm": 0.7169318199157715, "learning_rate": 4.966593809272645e-06, "loss": 1.6658, "step": 84941 }, { "epoch": 2.83, "grad_norm": 0.7294068336486816, "learning_rate": 4.964699975219933e-06, "loss": 1.6165, "step": 84942 }, { "epoch": 2.83, "grad_norm": 0.6896467208862305, "learning_rate": 4.962806499296612e-06, "loss": 1.6489, "step": 84943 }, { "epoch": 2.83, "grad_norm": 0.7065828442573547, "learning_rate": 4.96091338150495e-06, "loss": 1.6319, "step": 84944 }, { "epoch": 2.83, "grad_norm": 0.7041622400283813, "learning_rate": 4.9590206218473095e-06, "loss": 1.6743, "step": 84945 }, { "epoch": 2.83, "grad_norm": 0.6939728856086731, "learning_rate": 4.95712822032589e-06, "loss": 1.6927, "step": 84946 }, { "epoch": 2.83, "grad_norm": 0.7383939623832703, "learning_rate": 4.955236176943089e-06, "loss": 1.7264, "step": 84947 }, { "epoch": 2.83, "grad_norm": 0.70822674036026, "learning_rate": 4.9533444917011385e-06, "loss": 1.6979, "step": 84948 }, { "epoch": 2.83, "grad_norm": 0.7334461808204651, "learning_rate": 4.95145316460237e-06, "loss": 1.7099, "step": 84949 }, { "epoch": 2.83, "grad_norm": 0.7403647303581238, "learning_rate": 4.9495621956490815e-06, "loss": 1.676, "step": 84950 }, { "epoch": 2.83, "grad_norm": 0.7183471322059631, "learning_rate": 4.9476715848435045e-06, "loss": 1.6602, "step": 84951 }, { "epoch": 2.83, "grad_norm": 0.6969502568244934, "learning_rate": 4.945781332188004e-06, "loss": 1.6293, "step": 84952 }, { "epoch": 2.83, "grad_norm": 0.7188791036605835, "learning_rate": 4.9438914376848435e-06, "loss": 1.746, "step": 84953 }, { "epoch": 2.83, "grad_norm": 0.7121206521987915, "learning_rate": 4.942001901336323e-06, "loss": 1.682, "step": 84954 }, { "epoch": 2.83, "grad_norm": 0.7007235288619995, "learning_rate": 4.94011272314474e-06, "loss": 1.7194, "step": 84955 }, { "epoch": 2.83, "grad_norm": 0.7325807809829712, "learning_rate": 4.938223903112392e-06, "loss": 1.6629, "step": 84956 }, { "epoch": 2.83, "grad_norm": 0.7011801600456238, "learning_rate": 4.936335441241579e-06, "loss": 1.697, "step": 84957 }, { "epoch": 2.83, "grad_norm": 0.7475784420967102, "learning_rate": 4.934447337534531e-06, "loss": 1.6227, "step": 84958 }, { "epoch": 2.83, "grad_norm": 0.7348014712333679, "learning_rate": 4.9325595919936455e-06, "loss": 1.6793, "step": 84959 }, { "epoch": 2.83, "grad_norm": 0.7127964496612549, "learning_rate": 4.930672204621089e-06, "loss": 1.6889, "step": 84960 }, { "epoch": 2.83, "grad_norm": 0.7336981892585754, "learning_rate": 4.928785175419292e-06, "loss": 1.6344, "step": 84961 }, { "epoch": 2.83, "grad_norm": 0.7247886657714844, "learning_rate": 4.926898504390453e-06, "loss": 1.6933, "step": 84962 }, { "epoch": 2.83, "grad_norm": 0.7263188362121582, "learning_rate": 4.925012191536837e-06, "loss": 1.7205, "step": 84963 }, { "epoch": 2.83, "grad_norm": 0.7276647686958313, "learning_rate": 4.9231262368608085e-06, "loss": 1.7187, "step": 84964 }, { "epoch": 2.83, "grad_norm": 0.7158090472221375, "learning_rate": 4.921240640364632e-06, "loss": 1.6915, "step": 84965 }, { "epoch": 2.83, "grad_norm": 0.7203104496002197, "learning_rate": 4.9193554020506065e-06, "loss": 1.6878, "step": 84966 }, { "epoch": 2.83, "grad_norm": 0.7111321687698364, "learning_rate": 4.917470521920963e-06, "loss": 1.6754, "step": 84967 }, { "epoch": 2.83, "grad_norm": 0.7194738388061523, "learning_rate": 4.915585999978067e-06, "loss": 1.6463, "step": 84968 }, { "epoch": 2.83, "grad_norm": 0.7287900447845459, "learning_rate": 4.913701836224182e-06, "loss": 1.7167, "step": 84969 }, { "epoch": 2.83, "grad_norm": 0.7135137319564819, "learning_rate": 4.911818030661541e-06, "loss": 1.707, "step": 84970 }, { "epoch": 2.83, "grad_norm": 0.7165096402168274, "learning_rate": 4.9099345832925076e-06, "loss": 1.6609, "step": 84971 }, { "epoch": 2.83, "grad_norm": 0.6983310580253601, "learning_rate": 4.908051494119347e-06, "loss": 1.7027, "step": 84972 }, { "epoch": 2.83, "grad_norm": 0.7035436630249023, "learning_rate": 4.906168763144291e-06, "loss": 1.6947, "step": 84973 }, { "epoch": 2.83, "grad_norm": 0.6992400884628296, "learning_rate": 4.904286390369672e-06, "loss": 1.6475, "step": 84974 }, { "epoch": 2.83, "grad_norm": 0.7297051548957825, "learning_rate": 4.902404375797786e-06, "loss": 1.6605, "step": 84975 }, { "epoch": 2.83, "grad_norm": 0.7434754967689514, "learning_rate": 4.900522719430899e-06, "loss": 1.7416, "step": 84976 }, { "epoch": 2.83, "grad_norm": 0.7070671319961548, "learning_rate": 4.898641421271277e-06, "loss": 1.5931, "step": 84977 }, { "epoch": 2.83, "grad_norm": 0.7057556509971619, "learning_rate": 4.896760481321216e-06, "loss": 1.5888, "step": 84978 }, { "epoch": 2.83, "grad_norm": 0.7189981341362, "learning_rate": 4.894879899583082e-06, "loss": 1.7073, "step": 84979 }, { "epoch": 2.83, "grad_norm": 0.7224413752555847, "learning_rate": 4.8929996760590065e-06, "loss": 1.6899, "step": 84980 }, { "epoch": 2.83, "grad_norm": 0.7121046185493469, "learning_rate": 4.891119810751354e-06, "loss": 1.6686, "step": 84981 }, { "epoch": 2.83, "grad_norm": 0.7107365727424622, "learning_rate": 4.889240303662423e-06, "loss": 1.6292, "step": 84982 }, { "epoch": 2.83, "grad_norm": 0.7199280261993408, "learning_rate": 4.887361154794478e-06, "loss": 1.6865, "step": 84983 }, { "epoch": 2.83, "grad_norm": 0.6853580474853516, "learning_rate": 4.885482364149751e-06, "loss": 1.6469, "step": 84984 }, { "epoch": 2.83, "grad_norm": 0.701112687587738, "learning_rate": 4.883603931730607e-06, "loss": 1.6809, "step": 84985 }, { "epoch": 2.83, "grad_norm": 0.705929160118103, "learning_rate": 4.8817258575393096e-06, "loss": 1.6595, "step": 84986 }, { "epoch": 2.83, "grad_norm": 0.7336041927337646, "learning_rate": 4.879848141578058e-06, "loss": 1.6761, "step": 84987 }, { "epoch": 2.83, "grad_norm": 0.734032154083252, "learning_rate": 4.877970783849183e-06, "loss": 1.7316, "step": 84988 }, { "epoch": 2.83, "grad_norm": 0.7089349627494812, "learning_rate": 4.876093784354984e-06, "loss": 1.7173, "step": 84989 }, { "epoch": 2.83, "grad_norm": 0.7253734469413757, "learning_rate": 4.8742171430977585e-06, "loss": 1.6431, "step": 84990 }, { "epoch": 2.83, "grad_norm": 0.7087300419807434, "learning_rate": 4.8723408600797044e-06, "loss": 1.6899, "step": 84991 }, { "epoch": 2.83, "grad_norm": 0.7371627688407898, "learning_rate": 4.870464935303154e-06, "loss": 1.6709, "step": 84992 }, { "epoch": 2.83, "grad_norm": 0.7263990640640259, "learning_rate": 4.868589368770403e-06, "loss": 1.6391, "step": 84993 }, { "epoch": 2.83, "grad_norm": 0.7164966464042664, "learning_rate": 4.866714160483653e-06, "loss": 1.6107, "step": 84994 }, { "epoch": 2.83, "grad_norm": 0.7129846811294556, "learning_rate": 4.8648393104452345e-06, "loss": 1.7099, "step": 84995 }, { "epoch": 2.83, "grad_norm": 0.7384836077690125, "learning_rate": 4.862964818657411e-06, "loss": 1.6657, "step": 84996 }, { "epoch": 2.83, "grad_norm": 0.7157517671585083, "learning_rate": 4.861090685122515e-06, "loss": 1.6113, "step": 84997 }, { "epoch": 2.83, "grad_norm": 0.7338219285011292, "learning_rate": 4.859216909842712e-06, "loss": 1.6396, "step": 84998 }, { "epoch": 2.83, "grad_norm": 0.7388607859611511, "learning_rate": 4.857343492820365e-06, "loss": 1.5933, "step": 84999 }, { "epoch": 2.83, "grad_norm": 0.723376989364624, "learning_rate": 4.855470434057706e-06, "loss": 1.6459, "step": 85000 }, { "epoch": 2.83, "grad_norm": 0.7195221185684204, "learning_rate": 4.853597733557002e-06, "loss": 1.709, "step": 85001 }, { "epoch": 2.83, "grad_norm": 0.7219961285591125, "learning_rate": 4.8517253913205486e-06, "loss": 1.7496, "step": 85002 }, { "epoch": 2.83, "grad_norm": 0.6910726428031921, "learning_rate": 4.849853407350612e-06, "loss": 1.6237, "step": 85003 }, { "epoch": 2.83, "grad_norm": 0.7285144925117493, "learning_rate": 4.84798178164949e-06, "loss": 1.6744, "step": 85004 }, { "epoch": 2.83, "grad_norm": 0.7283731698989868, "learning_rate": 4.846110514219381e-06, "loss": 1.6759, "step": 85005 }, { "epoch": 2.83, "grad_norm": 0.6910764575004578, "learning_rate": 4.84423960506265e-06, "loss": 1.661, "step": 85006 }, { "epoch": 2.83, "grad_norm": 0.7541298270225525, "learning_rate": 4.842369054181528e-06, "loss": 1.6522, "step": 85007 }, { "epoch": 2.83, "grad_norm": 0.7221614718437195, "learning_rate": 4.840498861578246e-06, "loss": 1.5657, "step": 85008 }, { "epoch": 2.83, "grad_norm": 0.7158764004707336, "learning_rate": 4.8386290272551366e-06, "loss": 1.7208, "step": 85009 }, { "epoch": 2.83, "grad_norm": 0.6951395869255066, "learning_rate": 4.836759551214397e-06, "loss": 1.6489, "step": 85010 }, { "epoch": 2.83, "grad_norm": 0.7124832272529602, "learning_rate": 4.8348904334583935e-06, "loss": 1.7179, "step": 85011 }, { "epoch": 2.83, "grad_norm": 0.710480272769928, "learning_rate": 4.833021673989324e-06, "loss": 1.6693, "step": 85012 }, { "epoch": 2.83, "grad_norm": 0.74128657579422, "learning_rate": 4.831153272809451e-06, "loss": 1.6545, "step": 85013 }, { "epoch": 2.83, "grad_norm": 0.6876128315925598, "learning_rate": 4.8292852299211095e-06, "loss": 1.7507, "step": 85014 }, { "epoch": 2.83, "grad_norm": 0.7234985828399658, "learning_rate": 4.827417545326495e-06, "loss": 1.7584, "step": 85015 }, { "epoch": 2.83, "grad_norm": 0.7167624235153198, "learning_rate": 4.825550219027907e-06, "loss": 1.6753, "step": 85016 }, { "epoch": 2.83, "grad_norm": 0.7254146337509155, "learning_rate": 4.82368325102761e-06, "loss": 1.5963, "step": 85017 }, { "epoch": 2.83, "grad_norm": 0.7275232672691345, "learning_rate": 4.821816641327869e-06, "loss": 1.7057, "step": 85018 }, { "epoch": 2.83, "grad_norm": 0.7415966987609863, "learning_rate": 4.819950389930982e-06, "loss": 1.6626, "step": 85019 }, { "epoch": 2.83, "grad_norm": 0.7277699708938599, "learning_rate": 4.818084496839147e-06, "loss": 1.7198, "step": 85020 }, { "epoch": 2.83, "grad_norm": 0.7272372841835022, "learning_rate": 4.816218962054663e-06, "loss": 1.7155, "step": 85021 }, { "epoch": 2.83, "grad_norm": 0.696101725101471, "learning_rate": 4.8143537855798275e-06, "loss": 1.7418, "step": 85022 }, { "epoch": 2.83, "grad_norm": 0.6869904398918152, "learning_rate": 4.812488967416839e-06, "loss": 1.6303, "step": 85023 }, { "epoch": 2.83, "grad_norm": 0.708791971206665, "learning_rate": 4.810624507567995e-06, "loss": 1.6525, "step": 85024 }, { "epoch": 2.83, "grad_norm": 0.706240713596344, "learning_rate": 4.808760406035561e-06, "loss": 1.6053, "step": 85025 }, { "epoch": 2.83, "grad_norm": 0.713992714881897, "learning_rate": 4.806896662821835e-06, "loss": 1.6846, "step": 85026 }, { "epoch": 2.83, "grad_norm": 0.7387030720710754, "learning_rate": 4.805033277928983e-06, "loss": 1.7002, "step": 85027 }, { "epoch": 2.83, "grad_norm": 0.7240799069404602, "learning_rate": 4.803170251359367e-06, "loss": 1.6835, "step": 85028 }, { "epoch": 2.83, "grad_norm": 0.7036980390548706, "learning_rate": 4.801307583115221e-06, "loss": 1.6847, "step": 85029 }, { "epoch": 2.83, "grad_norm": 0.7235662937164307, "learning_rate": 4.799445273198743e-06, "loss": 1.6223, "step": 85030 }, { "epoch": 2.83, "grad_norm": 0.7003276944160461, "learning_rate": 4.797583321612264e-06, "loss": 1.6644, "step": 85031 }, { "epoch": 2.83, "grad_norm": 0.7040411233901978, "learning_rate": 4.795721728358048e-06, "loss": 1.6434, "step": 85032 }, { "epoch": 2.83, "grad_norm": 0.7021441459655762, "learning_rate": 4.793860493438295e-06, "loss": 1.6239, "step": 85033 }, { "epoch": 2.83, "grad_norm": 0.7180413007736206, "learning_rate": 4.791999616855302e-06, "loss": 1.7131, "step": 85034 }, { "epoch": 2.83, "grad_norm": 0.7017638087272644, "learning_rate": 4.7901390986113675e-06, "loss": 1.6009, "step": 85035 }, { "epoch": 2.83, "grad_norm": 0.701637327671051, "learning_rate": 4.7882789387086566e-06, "loss": 1.6444, "step": 85036 }, { "epoch": 2.83, "grad_norm": 0.7157856225967407, "learning_rate": 4.7864191371494995e-06, "loss": 1.6235, "step": 85037 }, { "epoch": 2.83, "grad_norm": 0.7251574993133545, "learning_rate": 4.784559693936096e-06, "loss": 1.6929, "step": 85038 }, { "epoch": 2.83, "grad_norm": 0.6994104981422424, "learning_rate": 4.782700609070811e-06, "loss": 1.5629, "step": 85039 }, { "epoch": 2.83, "grad_norm": 0.726164698600769, "learning_rate": 4.780841882555775e-06, "loss": 1.6514, "step": 85040 }, { "epoch": 2.83, "grad_norm": 0.7056708931922913, "learning_rate": 4.778983514393286e-06, "loss": 1.6692, "step": 85041 }, { "epoch": 2.83, "grad_norm": 0.6883857846260071, "learning_rate": 4.777125504585677e-06, "loss": 1.6634, "step": 85042 }, { "epoch": 2.83, "grad_norm": 0.7342357635498047, "learning_rate": 4.775267853135112e-06, "loss": 1.7303, "step": 85043 }, { "epoch": 2.83, "grad_norm": 0.7268386483192444, "learning_rate": 4.773410560043822e-06, "loss": 1.6882, "step": 85044 }, { "epoch": 2.83, "grad_norm": 0.6901726722717285, "learning_rate": 4.77155362531414e-06, "loss": 1.6125, "step": 85045 }, { "epoch": 2.83, "grad_norm": 0.7037569880485535, "learning_rate": 4.7696970489483285e-06, "loss": 1.7034, "step": 85046 }, { "epoch": 2.83, "grad_norm": 0.753811776638031, "learning_rate": 4.767840830948588e-06, "loss": 1.6729, "step": 85047 }, { "epoch": 2.83, "grad_norm": 0.7227481007575989, "learning_rate": 4.765984971317149e-06, "loss": 1.6637, "step": 85048 }, { "epoch": 2.83, "grad_norm": 0.700718879699707, "learning_rate": 4.764129470056344e-06, "loss": 1.6711, "step": 85049 }, { "epoch": 2.83, "grad_norm": 0.7459710836410522, "learning_rate": 4.762274327168403e-06, "loss": 1.7407, "step": 85050 }, { "epoch": 2.83, "grad_norm": 0.7193220853805542, "learning_rate": 4.760419542655491e-06, "loss": 1.6617, "step": 85051 }, { "epoch": 2.83, "grad_norm": 0.7029712200164795, "learning_rate": 4.758565116520008e-06, "loss": 1.6167, "step": 85052 }, { "epoch": 2.83, "grad_norm": 0.7335234880447388, "learning_rate": 4.756711048764084e-06, "loss": 1.6461, "step": 85053 }, { "epoch": 2.83, "grad_norm": 0.7108204364776611, "learning_rate": 4.754857339389984e-06, "loss": 1.6937, "step": 85054 }, { "epoch": 2.83, "grad_norm": 0.7154815793037415, "learning_rate": 4.753003988400006e-06, "loss": 1.6701, "step": 85055 }, { "epoch": 2.83, "grad_norm": 0.7606089115142822, "learning_rate": 4.751150995796383e-06, "loss": 1.6841, "step": 85056 }, { "epoch": 2.83, "grad_norm": 0.7169823050498962, "learning_rate": 4.749298361581377e-06, "loss": 1.664, "step": 85057 }, { "epoch": 2.83, "grad_norm": 0.7106077075004578, "learning_rate": 4.74744608575719e-06, "loss": 1.6579, "step": 85058 }, { "epoch": 2.83, "grad_norm": 0.6924021244049072, "learning_rate": 4.74559416832615e-06, "loss": 1.6822, "step": 85059 }, { "epoch": 2.83, "grad_norm": 0.7175449728965759, "learning_rate": 4.743742609290424e-06, "loss": 1.7532, "step": 85060 }, { "epoch": 2.83, "grad_norm": 0.715816080570221, "learning_rate": 4.741891408652276e-06, "loss": 1.6417, "step": 85061 }, { "epoch": 2.83, "grad_norm": 0.7120370268821716, "learning_rate": 4.740040566413972e-06, "loss": 1.7224, "step": 85062 }, { "epoch": 2.83, "grad_norm": 0.7145702838897705, "learning_rate": 4.738190082577742e-06, "loss": 1.7402, "step": 85063 }, { "epoch": 2.83, "grad_norm": 0.7185682058334351, "learning_rate": 4.736339957145885e-06, "loss": 1.6679, "step": 85064 }, { "epoch": 2.83, "grad_norm": 0.6957857608795166, "learning_rate": 4.734490190120565e-06, "loss": 1.6544, "step": 85065 }, { "epoch": 2.83, "grad_norm": 0.7259982824325562, "learning_rate": 4.732640781504116e-06, "loss": 1.6928, "step": 85066 }, { "epoch": 2.83, "grad_norm": 0.7180848717689514, "learning_rate": 4.7307917312987e-06, "loss": 1.6487, "step": 85067 }, { "epoch": 2.83, "grad_norm": 0.719616711139679, "learning_rate": 4.728943039506583e-06, "loss": 1.7028, "step": 85068 }, { "epoch": 2.83, "grad_norm": 0.7544274926185608, "learning_rate": 4.727094706130064e-06, "loss": 1.6734, "step": 85069 }, { "epoch": 2.83, "grad_norm": 0.7057140469551086, "learning_rate": 4.725246731171306e-06, "loss": 1.7576, "step": 85070 }, { "epoch": 2.83, "grad_norm": 0.7277612686157227, "learning_rate": 4.7233991146326425e-06, "loss": 1.6422, "step": 85071 }, { "epoch": 2.83, "grad_norm": 0.6968892216682434, "learning_rate": 4.721551856516237e-06, "loss": 1.6445, "step": 85072 }, { "epoch": 2.83, "grad_norm": 0.7126756906509399, "learning_rate": 4.719704956824322e-06, "loss": 1.6577, "step": 85073 }, { "epoch": 2.83, "grad_norm": 0.7052072286605835, "learning_rate": 4.717858415559228e-06, "loss": 1.6418, "step": 85074 }, { "epoch": 2.83, "grad_norm": 0.7245686054229736, "learning_rate": 4.716012232723121e-06, "loss": 1.7085, "step": 85075 }, { "epoch": 2.83, "grad_norm": 0.7262837290763855, "learning_rate": 4.714166408318299e-06, "loss": 1.6877, "step": 85076 }, { "epoch": 2.83, "grad_norm": 0.723313570022583, "learning_rate": 4.712320942346926e-06, "loss": 1.6891, "step": 85077 }, { "epoch": 2.83, "grad_norm": 0.7531492710113525, "learning_rate": 4.710475834811333e-06, "loss": 1.6947, "step": 85078 }, { "epoch": 2.83, "grad_norm": 0.7146569490432739, "learning_rate": 4.708631085713688e-06, "loss": 1.6746, "step": 85079 }, { "epoch": 2.83, "grad_norm": 0.7130572199821472, "learning_rate": 4.706786695056252e-06, "loss": 1.6285, "step": 85080 }, { "epoch": 2.83, "grad_norm": 0.7331850528717041, "learning_rate": 4.704942662841293e-06, "loss": 1.6382, "step": 85081 }, { "epoch": 2.83, "grad_norm": 0.7400864958763123, "learning_rate": 4.703098989070975e-06, "loss": 1.7796, "step": 85082 }, { "epoch": 2.83, "grad_norm": 0.6989880204200745, "learning_rate": 4.701255673747628e-06, "loss": 1.6875, "step": 85083 }, { "epoch": 2.83, "grad_norm": 0.7369735836982727, "learning_rate": 4.699412716873418e-06, "loss": 1.6483, "step": 85084 }, { "epoch": 2.83, "grad_norm": 0.7498431205749512, "learning_rate": 4.697570118450644e-06, "loss": 1.803, "step": 85085 }, { "epoch": 2.83, "grad_norm": 0.7245792746543884, "learning_rate": 4.695727878481503e-06, "loss": 1.6389, "step": 85086 }, { "epoch": 2.83, "grad_norm": 0.7008271813392639, "learning_rate": 4.693885996968194e-06, "loss": 1.6705, "step": 85087 }, { "epoch": 2.83, "grad_norm": 0.7429440021514893, "learning_rate": 4.692044473913015e-06, "loss": 1.6801, "step": 85088 }, { "epoch": 2.83, "grad_norm": 0.6913250088691711, "learning_rate": 4.69020330931823e-06, "loss": 1.6619, "step": 85089 }, { "epoch": 2.83, "grad_norm": 0.716224193572998, "learning_rate": 4.688362503185972e-06, "loss": 1.6834, "step": 85090 }, { "epoch": 2.83, "grad_norm": 0.7373382449150085, "learning_rate": 4.6865220555185375e-06, "loss": 1.6698, "step": 85091 }, { "epoch": 2.83, "grad_norm": 0.7215979695320129, "learning_rate": 4.684681966318193e-06, "loss": 1.6836, "step": 85092 }, { "epoch": 2.83, "grad_norm": 0.7315484881401062, "learning_rate": 4.682842235587103e-06, "loss": 1.6822, "step": 85093 }, { "epoch": 2.83, "grad_norm": 0.721930205821991, "learning_rate": 4.681002863327499e-06, "loss": 1.7116, "step": 85094 }, { "epoch": 2.83, "grad_norm": 0.7325246334075928, "learning_rate": 4.679163849541678e-06, "loss": 1.6901, "step": 85095 }, { "epoch": 2.83, "grad_norm": 0.7252049446105957, "learning_rate": 4.677325194231873e-06, "loss": 1.6405, "step": 85096 }, { "epoch": 2.83, "grad_norm": 0.7329496741294861, "learning_rate": 4.675486897400216e-06, "loss": 1.6763, "step": 85097 }, { "epoch": 2.83, "grad_norm": 0.7274261116981506, "learning_rate": 4.673648959049036e-06, "loss": 1.6847, "step": 85098 }, { "epoch": 2.83, "grad_norm": 0.7108176946640015, "learning_rate": 4.671811379180535e-06, "loss": 1.6569, "step": 85099 }, { "epoch": 2.83, "grad_norm": 0.7114920616149902, "learning_rate": 4.669974157796941e-06, "loss": 1.6597, "step": 85100 }, { "epoch": 2.83, "grad_norm": 0.7270299792289734, "learning_rate": 4.668137294900454e-06, "loss": 1.6934, "step": 85101 }, { "epoch": 2.83, "grad_norm": 0.7292996644973755, "learning_rate": 4.666300790493338e-06, "loss": 1.7581, "step": 85102 }, { "epoch": 2.83, "grad_norm": 0.718455970287323, "learning_rate": 4.664464644577859e-06, "loss": 1.7358, "step": 85103 }, { "epoch": 2.83, "grad_norm": 0.6924705505371094, "learning_rate": 4.662628857156181e-06, "loss": 1.6571, "step": 85104 }, { "epoch": 2.83, "grad_norm": 0.7049375176429749, "learning_rate": 4.660793428230536e-06, "loss": 1.642, "step": 85105 }, { "epoch": 2.83, "grad_norm": 0.7203252911567688, "learning_rate": 4.658958357803188e-06, "loss": 1.6156, "step": 85106 }, { "epoch": 2.83, "grad_norm": 0.7261182069778442, "learning_rate": 4.657123645876371e-06, "loss": 1.6539, "step": 85107 }, { "epoch": 2.83, "grad_norm": 0.7133212089538574, "learning_rate": 4.655289292452247e-06, "loss": 1.7123, "step": 85108 }, { "epoch": 2.83, "grad_norm": 0.696530282497406, "learning_rate": 4.653455297533115e-06, "loss": 1.7227, "step": 85109 }, { "epoch": 2.83, "grad_norm": 0.7078615427017212, "learning_rate": 4.651621661121174e-06, "loss": 1.7356, "step": 85110 }, { "epoch": 2.83, "grad_norm": 0.6958601474761963, "learning_rate": 4.649788383218622e-06, "loss": 1.6421, "step": 85111 }, { "epoch": 2.83, "grad_norm": 0.7324366569519043, "learning_rate": 4.647955463827724e-06, "loss": 1.6673, "step": 85112 }, { "epoch": 2.83, "grad_norm": 0.7099127173423767, "learning_rate": 4.646122902950677e-06, "loss": 1.6391, "step": 85113 }, { "epoch": 2.83, "grad_norm": 0.699560821056366, "learning_rate": 4.644290700589748e-06, "loss": 1.5922, "step": 85114 }, { "epoch": 2.83, "grad_norm": 0.7384169697761536, "learning_rate": 4.6424588567471e-06, "loss": 1.6817, "step": 85115 }, { "epoch": 2.83, "grad_norm": 0.7148697376251221, "learning_rate": 4.640627371424998e-06, "loss": 1.6402, "step": 85116 }, { "epoch": 2.83, "grad_norm": 0.7236222624778748, "learning_rate": 4.638796244625675e-06, "loss": 1.7413, "step": 85117 }, { "epoch": 2.83, "grad_norm": 0.7253979444503784, "learning_rate": 4.6369654763512954e-06, "loss": 1.7196, "step": 85118 }, { "epoch": 2.83, "grad_norm": 0.7040033936500549, "learning_rate": 4.635135066604156e-06, "loss": 1.6257, "step": 85119 }, { "epoch": 2.83, "grad_norm": 0.6815304160118103, "learning_rate": 4.6333050153864235e-06, "loss": 1.6616, "step": 85120 }, { "epoch": 2.83, "grad_norm": 0.7172221541404724, "learning_rate": 4.631475322700362e-06, "loss": 1.6682, "step": 85121 }, { "epoch": 2.83, "grad_norm": 0.707231879234314, "learning_rate": 4.629645988548136e-06, "loss": 1.6297, "step": 85122 }, { "epoch": 2.83, "grad_norm": 0.7066168785095215, "learning_rate": 4.627817012932011e-06, "loss": 1.6786, "step": 85123 }, { "epoch": 2.83, "grad_norm": 0.728742241859436, "learning_rate": 4.625988395854219e-06, "loss": 1.6653, "step": 85124 }, { "epoch": 2.83, "grad_norm": 0.7257081866264343, "learning_rate": 4.6241601373168905e-06, "loss": 1.7365, "step": 85125 }, { "epoch": 2.83, "grad_norm": 0.6856765151023865, "learning_rate": 4.622332237322391e-06, "loss": 1.6964, "step": 85126 }, { "epoch": 2.83, "grad_norm": 0.7482712864875793, "learning_rate": 4.620504695872784e-06, "loss": 1.6857, "step": 85127 }, { "epoch": 2.83, "grad_norm": 0.7174128890037537, "learning_rate": 4.618677512970403e-06, "loss": 1.6141, "step": 85128 }, { "epoch": 2.83, "grad_norm": 0.7107883095741272, "learning_rate": 4.616850688617446e-06, "loss": 1.6889, "step": 85129 }, { "epoch": 2.83, "grad_norm": 0.7265114784240723, "learning_rate": 4.615024222816077e-06, "loss": 1.6892, "step": 85130 }, { "epoch": 2.83, "grad_norm": 0.6983305811882019, "learning_rate": 4.613198115568562e-06, "loss": 1.6788, "step": 85131 }, { "epoch": 2.83, "grad_norm": 0.6986527442932129, "learning_rate": 4.611372366877064e-06, "loss": 1.7064, "step": 85132 }, { "epoch": 2.83, "grad_norm": 0.7050473690032959, "learning_rate": 4.609546976743883e-06, "loss": 1.6538, "step": 85133 }, { "epoch": 2.83, "grad_norm": 0.7302001118659973, "learning_rate": 4.60772194517115e-06, "loss": 1.6844, "step": 85134 }, { "epoch": 2.83, "grad_norm": 0.7406210899353027, "learning_rate": 4.605897272161163e-06, "loss": 1.6587, "step": 85135 }, { "epoch": 2.83, "grad_norm": 0.7203188538551331, "learning_rate": 4.604072957716087e-06, "loss": 1.6906, "step": 85136 }, { "epoch": 2.83, "grad_norm": 0.7005533576011658, "learning_rate": 4.602249001838087e-06, "loss": 1.6536, "step": 85137 }, { "epoch": 2.83, "grad_norm": 0.720623791217804, "learning_rate": 4.600425404529495e-06, "loss": 1.7174, "step": 85138 }, { "epoch": 2.83, "grad_norm": 0.7138240337371826, "learning_rate": 4.598602165792442e-06, "loss": 1.6825, "step": 85139 }, { "epoch": 2.83, "grad_norm": 0.7332348823547363, "learning_rate": 4.596779285629126e-06, "loss": 1.7767, "step": 85140 }, { "epoch": 2.83, "grad_norm": 0.7234665751457214, "learning_rate": 4.5949567640418124e-06, "loss": 1.6799, "step": 85141 }, { "epoch": 2.83, "grad_norm": 0.7285174131393433, "learning_rate": 4.5931346010326995e-06, "loss": 1.6397, "step": 85142 }, { "epoch": 2.83, "grad_norm": 0.6906268000602722, "learning_rate": 4.591312796604019e-06, "loss": 1.6043, "step": 85143 }, { "epoch": 2.83, "grad_norm": 0.7318015098571777, "learning_rate": 4.589491350757934e-06, "loss": 1.705, "step": 85144 }, { "epoch": 2.83, "grad_norm": 0.6848835945129395, "learning_rate": 4.5876702634966785e-06, "loss": 1.7389, "step": 85145 }, { "epoch": 2.83, "grad_norm": 0.7205559611320496, "learning_rate": 4.585849534822483e-06, "loss": 1.6162, "step": 85146 }, { "epoch": 2.83, "grad_norm": 0.7159959673881531, "learning_rate": 4.584029164737512e-06, "loss": 1.704, "step": 85147 }, { "epoch": 2.83, "grad_norm": 0.7007376551628113, "learning_rate": 4.5822091532439984e-06, "loss": 1.6717, "step": 85148 }, { "epoch": 2.83, "grad_norm": 0.7053379416465759, "learning_rate": 4.580389500344239e-06, "loss": 1.6848, "step": 85149 }, { "epoch": 2.83, "grad_norm": 0.7101966142654419, "learning_rate": 4.5785702060402665e-06, "loss": 1.8057, "step": 85150 }, { "epoch": 2.83, "grad_norm": 0.7039379477500916, "learning_rate": 4.576751270334378e-06, "loss": 1.5866, "step": 85151 }, { "epoch": 2.83, "grad_norm": 0.6929148435592651, "learning_rate": 4.574932693228839e-06, "loss": 1.6544, "step": 85152 }, { "epoch": 2.83, "grad_norm": 0.7194517850875854, "learning_rate": 4.573114474725781e-06, "loss": 1.6224, "step": 85153 }, { "epoch": 2.83, "grad_norm": 0.7076815962791443, "learning_rate": 4.5712966148274355e-06, "loss": 1.6682, "step": 85154 }, { "epoch": 2.83, "grad_norm": 0.6971375346183777, "learning_rate": 4.569479113535968e-06, "loss": 1.678, "step": 85155 }, { "epoch": 2.83, "grad_norm": 0.6908131241798401, "learning_rate": 4.567661970853709e-06, "loss": 1.6653, "step": 85156 }, { "epoch": 2.83, "grad_norm": 0.7252277731895447, "learning_rate": 4.56584518678269e-06, "loss": 1.6973, "step": 85157 }, { "epoch": 2.83, "grad_norm": 0.7071323990821838, "learning_rate": 4.564028761325245e-06, "loss": 1.6356, "step": 85158 }, { "epoch": 2.83, "grad_norm": 0.7158690094947815, "learning_rate": 4.562212694483536e-06, "loss": 1.7223, "step": 85159 }, { "epoch": 2.83, "grad_norm": 0.7185355424880981, "learning_rate": 4.5603969862597955e-06, "loss": 1.6415, "step": 85160 }, { "epoch": 2.83, "grad_norm": 0.7122470140457153, "learning_rate": 4.5585816366561555e-06, "loss": 1.666, "step": 85161 }, { "epoch": 2.83, "grad_norm": 0.7272886633872986, "learning_rate": 4.5567666456748804e-06, "loss": 1.7102, "step": 85162 }, { "epoch": 2.83, "grad_norm": 0.7180963754653931, "learning_rate": 4.554952013318169e-06, "loss": 1.6466, "step": 85163 }, { "epoch": 2.83, "grad_norm": 0.7281172871589661, "learning_rate": 4.55313773958822e-06, "loss": 1.6008, "step": 85164 }, { "epoch": 2.83, "grad_norm": 0.7210816740989685, "learning_rate": 4.551323824487196e-06, "loss": 1.7146, "step": 85165 }, { "epoch": 2.83, "grad_norm": 0.7078545689582825, "learning_rate": 4.549510268017365e-06, "loss": 1.7818, "step": 85166 }, { "epoch": 2.83, "grad_norm": 0.7336082458496094, "learning_rate": 4.547697070180889e-06, "loss": 1.7299, "step": 85167 }, { "epoch": 2.83, "grad_norm": 0.7271122932434082, "learning_rate": 4.545884230979968e-06, "loss": 1.62, "step": 85168 }, { "epoch": 2.83, "grad_norm": 0.710774302482605, "learning_rate": 4.544071750416833e-06, "loss": 1.6426, "step": 85169 }, { "epoch": 2.83, "grad_norm": 0.730725109577179, "learning_rate": 4.542259628493649e-06, "loss": 1.5844, "step": 85170 }, { "epoch": 2.83, "grad_norm": 0.7124871015548706, "learning_rate": 4.5404478652126155e-06, "loss": 1.6066, "step": 85171 }, { "epoch": 2.83, "grad_norm": 0.7188882827758789, "learning_rate": 4.538636460575929e-06, "loss": 1.7005, "step": 85172 }, { "epoch": 2.83, "grad_norm": 0.7042807340621948, "learning_rate": 4.536825414585854e-06, "loss": 1.676, "step": 85173 }, { "epoch": 2.83, "grad_norm": 0.7090288400650024, "learning_rate": 4.5350147272445235e-06, "loss": 1.67, "step": 85174 }, { "epoch": 2.83, "grad_norm": 0.718364953994751, "learning_rate": 4.533204398554102e-06, "loss": 1.6927, "step": 85175 }, { "epoch": 2.83, "grad_norm": 0.7235681414604187, "learning_rate": 4.531394428516888e-06, "loss": 1.6991, "step": 85176 }, { "epoch": 2.83, "grad_norm": 0.7201272249221802, "learning_rate": 4.529584817135046e-06, "loss": 1.6774, "step": 85177 }, { "epoch": 2.83, "grad_norm": 0.721235454082489, "learning_rate": 4.527775564410674e-06, "loss": 1.6612, "step": 85178 }, { "epoch": 2.83, "grad_norm": 0.7251482605934143, "learning_rate": 4.5259666703461036e-06, "loss": 1.6536, "step": 85179 }, { "epoch": 2.83, "grad_norm": 0.7453942894935608, "learning_rate": 4.5241581349434674e-06, "loss": 1.6598, "step": 85180 }, { "epoch": 2.83, "grad_norm": 0.7010053396224976, "learning_rate": 4.5223499582049625e-06, "loss": 1.6868, "step": 85181 }, { "epoch": 2.83, "grad_norm": 0.7156862616539001, "learning_rate": 4.520542140132755e-06, "loss": 1.6847, "step": 85182 }, { "epoch": 2.83, "grad_norm": 0.7126536965370178, "learning_rate": 4.518734680729108e-06, "loss": 1.6193, "step": 85183 }, { "epoch": 2.83, "grad_norm": 0.723537266254425, "learning_rate": 4.516927579996188e-06, "loss": 1.7052, "step": 85184 }, { "epoch": 2.83, "grad_norm": 0.7144880294799805, "learning_rate": 4.515120837936159e-06, "loss": 1.6857, "step": 85185 }, { "epoch": 2.83, "grad_norm": 0.729121744632721, "learning_rate": 4.513314454551253e-06, "loss": 1.7417, "step": 85186 }, { "epoch": 2.83, "grad_norm": 0.7110587954521179, "learning_rate": 4.511508429843635e-06, "loss": 1.6427, "step": 85187 }, { "epoch": 2.83, "grad_norm": 0.6962743997573853, "learning_rate": 4.509702763815504e-06, "loss": 1.6631, "step": 85188 }, { "epoch": 2.83, "grad_norm": 0.7002445459365845, "learning_rate": 4.507897456469089e-06, "loss": 1.6696, "step": 85189 }, { "epoch": 2.83, "grad_norm": 0.7200983166694641, "learning_rate": 4.506092507806491e-06, "loss": 1.6982, "step": 85190 }, { "epoch": 2.83, "grad_norm": 0.6993901133537292, "learning_rate": 4.504287917830007e-06, "loss": 1.7172, "step": 85191 }, { "epoch": 2.83, "grad_norm": 0.7220920920372009, "learning_rate": 4.502483686541736e-06, "loss": 1.6279, "step": 85192 }, { "epoch": 2.83, "grad_norm": 0.6933149099349976, "learning_rate": 4.5006798139439415e-06, "loss": 1.6286, "step": 85193 }, { "epoch": 2.83, "grad_norm": 0.7393463253974915, "learning_rate": 4.498876300038756e-06, "loss": 1.641, "step": 85194 }, { "epoch": 2.83, "grad_norm": 0.7342756390571594, "learning_rate": 4.4970731448284114e-06, "loss": 1.7628, "step": 85195 }, { "epoch": 2.83, "grad_norm": 0.6972056031227112, "learning_rate": 4.495270348315105e-06, "loss": 1.6395, "step": 85196 }, { "epoch": 2.83, "grad_norm": 0.7295450568199158, "learning_rate": 4.493467910500936e-06, "loss": 1.7376, "step": 85197 }, { "epoch": 2.83, "grad_norm": 0.7288779020309448, "learning_rate": 4.491665831388203e-06, "loss": 1.7088, "step": 85198 }, { "epoch": 2.83, "grad_norm": 0.7125480771064758, "learning_rate": 4.489864110979036e-06, "loss": 1.7198, "step": 85199 }, { "epoch": 2.83, "grad_norm": 0.7102552652359009, "learning_rate": 4.4880627492756336e-06, "loss": 1.7363, "step": 85200 }, { "epoch": 2.83, "grad_norm": 0.7169830799102783, "learning_rate": 4.486261746280162e-06, "loss": 1.7023, "step": 85201 }, { "epoch": 2.83, "grad_norm": 0.7663822770118713, "learning_rate": 4.4844611019948515e-06, "loss": 1.7001, "step": 85202 }, { "epoch": 2.83, "grad_norm": 0.7156370282173157, "learning_rate": 4.482660816421868e-06, "loss": 1.6279, "step": 85203 }, { "epoch": 2.83, "grad_norm": 0.7206651568412781, "learning_rate": 4.480860889563376e-06, "loss": 1.6579, "step": 85204 }, { "epoch": 2.83, "grad_norm": 0.7136954069137573, "learning_rate": 4.479061321421573e-06, "loss": 1.6856, "step": 85205 }, { "epoch": 2.83, "grad_norm": 0.6933123469352722, "learning_rate": 4.4772621119986914e-06, "loss": 1.6099, "step": 85206 }, { "epoch": 2.83, "grad_norm": 0.721257746219635, "learning_rate": 4.47546326129683e-06, "loss": 1.7049, "step": 85207 }, { "epoch": 2.83, "grad_norm": 0.7105205059051514, "learning_rate": 4.473664769318186e-06, "loss": 1.7525, "step": 85208 }, { "epoch": 2.83, "grad_norm": 0.7316020131111145, "learning_rate": 4.471866636065024e-06, "loss": 1.6709, "step": 85209 }, { "epoch": 2.83, "grad_norm": 0.7102006077766418, "learning_rate": 4.470068861539444e-06, "loss": 1.669, "step": 85210 }, { "epoch": 2.83, "grad_norm": 0.7312036752700806, "learning_rate": 4.468271445743643e-06, "loss": 1.7496, "step": 85211 }, { "epoch": 2.84, "grad_norm": 0.712530255317688, "learning_rate": 4.4664743886798195e-06, "loss": 1.7227, "step": 85212 }, { "epoch": 2.84, "grad_norm": 0.7224620580673218, "learning_rate": 4.464677690350205e-06, "loss": 1.7472, "step": 85213 }, { "epoch": 2.84, "grad_norm": 0.7160933613777161, "learning_rate": 4.462881350756864e-06, "loss": 1.6838, "step": 85214 }, { "epoch": 2.84, "grad_norm": 0.7207462787628174, "learning_rate": 4.461085369902062e-06, "loss": 1.6668, "step": 85215 }, { "epoch": 2.84, "grad_norm": 0.718773603439331, "learning_rate": 4.459289747787964e-06, "loss": 1.7018, "step": 85216 }, { "epoch": 2.84, "grad_norm": 0.69638991355896, "learning_rate": 4.457494484416735e-06, "loss": 1.7035, "step": 85217 }, { "epoch": 2.84, "grad_norm": 0.7210717797279358, "learning_rate": 4.455699579790539e-06, "loss": 1.7044, "step": 85218 }, { "epoch": 2.84, "grad_norm": 0.7097727060317993, "learning_rate": 4.453905033911609e-06, "loss": 1.6689, "step": 85219 }, { "epoch": 2.84, "grad_norm": 0.7386488318443298, "learning_rate": 4.452110846782109e-06, "loss": 1.6533, "step": 85220 }, { "epoch": 2.84, "grad_norm": 0.705721378326416, "learning_rate": 4.45031701840417e-06, "loss": 1.682, "step": 85221 }, { "epoch": 2.84, "grad_norm": 0.7217435836791992, "learning_rate": 4.448523548779992e-06, "loss": 1.6576, "step": 85222 }, { "epoch": 2.84, "grad_norm": 0.7233836650848389, "learning_rate": 4.446730437911772e-06, "loss": 1.6564, "step": 85223 }, { "epoch": 2.84, "grad_norm": 0.7196474075317383, "learning_rate": 4.444937685801675e-06, "loss": 1.6895, "step": 85224 }, { "epoch": 2.84, "grad_norm": 0.7715643644332886, "learning_rate": 4.443145292451866e-06, "loss": 1.7279, "step": 85225 }, { "epoch": 2.84, "grad_norm": 0.701675295829773, "learning_rate": 4.441353257864544e-06, "loss": 1.6874, "step": 85226 }, { "epoch": 2.84, "grad_norm": 0.7046975493431091, "learning_rate": 4.439561582041873e-06, "loss": 1.6758, "step": 85227 }, { "epoch": 2.84, "grad_norm": 0.7126518487930298, "learning_rate": 4.437770264985985e-06, "loss": 1.6348, "step": 85228 }, { "epoch": 2.84, "grad_norm": 0.7008804678916931, "learning_rate": 4.435979306699111e-06, "loss": 1.6822, "step": 85229 }, { "epoch": 2.84, "grad_norm": 0.7534410953521729, "learning_rate": 4.43418870718345e-06, "loss": 1.6961, "step": 85230 }, { "epoch": 2.84, "grad_norm": 0.722501277923584, "learning_rate": 4.4323984664411e-06, "loss": 1.6742, "step": 85231 }, { "epoch": 2.84, "grad_norm": 0.711006760597229, "learning_rate": 4.43060858447426e-06, "loss": 1.6029, "step": 85232 }, { "epoch": 2.84, "grad_norm": 0.7053952217102051, "learning_rate": 4.428819061285127e-06, "loss": 1.6122, "step": 85233 }, { "epoch": 2.84, "grad_norm": 0.7025180459022522, "learning_rate": 4.427029896875867e-06, "loss": 1.673, "step": 85234 }, { "epoch": 2.84, "grad_norm": 0.7368703484535217, "learning_rate": 4.425241091248644e-06, "loss": 1.6279, "step": 85235 }, { "epoch": 2.84, "grad_norm": 0.6996113061904907, "learning_rate": 4.423452644405623e-06, "loss": 1.7016, "step": 85236 }, { "epoch": 2.84, "grad_norm": 0.7093006372451782, "learning_rate": 4.421664556348936e-06, "loss": 1.6309, "step": 85237 }, { "epoch": 2.84, "grad_norm": 0.7133088707923889, "learning_rate": 4.419876827080848e-06, "loss": 1.5762, "step": 85238 }, { "epoch": 2.84, "grad_norm": 0.7138113975524902, "learning_rate": 4.418089456603458e-06, "loss": 1.6805, "step": 85239 }, { "epoch": 2.84, "grad_norm": 0.7128497958183289, "learning_rate": 4.4163024449189625e-06, "loss": 1.6903, "step": 85240 }, { "epoch": 2.84, "grad_norm": 0.714003324508667, "learning_rate": 4.414515792029527e-06, "loss": 1.5772, "step": 85241 }, { "epoch": 2.84, "grad_norm": 0.7142007350921631, "learning_rate": 4.412729497937284e-06, "loss": 1.6537, "step": 85242 }, { "epoch": 2.84, "grad_norm": 0.7241101264953613, "learning_rate": 4.410943562644498e-06, "loss": 1.6698, "step": 85243 }, { "epoch": 2.84, "grad_norm": 0.7092976570129395, "learning_rate": 4.4091579861532e-06, "loss": 1.691, "step": 85244 }, { "epoch": 2.84, "grad_norm": 0.7239671349525452, "learning_rate": 4.407372768465689e-06, "loss": 1.7388, "step": 85245 }, { "epoch": 2.84, "grad_norm": 0.7075321078300476, "learning_rate": 4.405587909584063e-06, "loss": 1.6831, "step": 85246 }, { "epoch": 2.84, "grad_norm": 0.7216363549232483, "learning_rate": 4.4038034095104535e-06, "loss": 1.6546, "step": 85247 }, { "epoch": 2.84, "grad_norm": 0.7278081178665161, "learning_rate": 4.402019268247126e-06, "loss": 1.71, "step": 85248 }, { "epoch": 2.84, "grad_norm": 0.7196482419967651, "learning_rate": 4.400235485796178e-06, "loss": 1.6979, "step": 85249 }, { "epoch": 2.84, "grad_norm": 0.7164190411567688, "learning_rate": 4.398452062159774e-06, "loss": 1.7299, "step": 85250 }, { "epoch": 2.84, "grad_norm": 0.710712194442749, "learning_rate": 4.396668997340114e-06, "loss": 1.6128, "step": 85251 }, { "epoch": 2.84, "grad_norm": 0.718353271484375, "learning_rate": 4.394886291339328e-06, "loss": 1.6667, "step": 85252 }, { "epoch": 2.84, "grad_norm": 0.7188165783882141, "learning_rate": 4.393103944159615e-06, "loss": 1.65, "step": 85253 }, { "epoch": 2.84, "grad_norm": 0.7195471525192261, "learning_rate": 4.3913219558031066e-06, "loss": 1.6524, "step": 85254 }, { "epoch": 2.84, "grad_norm": 0.7170213460922241, "learning_rate": 4.389540326271968e-06, "loss": 1.7086, "step": 85255 }, { "epoch": 2.84, "grad_norm": 0.7142798900604248, "learning_rate": 4.387759055568396e-06, "loss": 1.6879, "step": 85256 }, { "epoch": 2.84, "grad_norm": 0.710888683795929, "learning_rate": 4.385978143694491e-06, "loss": 1.7465, "step": 85257 }, { "epoch": 2.84, "grad_norm": 0.7096754312515259, "learning_rate": 4.384197590652449e-06, "loss": 1.7257, "step": 85258 }, { "epoch": 2.84, "grad_norm": 0.7171422243118286, "learning_rate": 4.382417396444471e-06, "loss": 1.6428, "step": 85259 }, { "epoch": 2.84, "grad_norm": 0.7177843451499939, "learning_rate": 4.380637561072687e-06, "loss": 1.5761, "step": 85260 }, { "epoch": 2.84, "grad_norm": 0.7152390480041504, "learning_rate": 4.378858084539228e-06, "loss": 1.7167, "step": 85261 }, { "epoch": 2.84, "grad_norm": 0.7016693949699402, "learning_rate": 4.377078966846293e-06, "loss": 1.6829, "step": 85262 }, { "epoch": 2.84, "grad_norm": 0.7086196541786194, "learning_rate": 4.375300207996013e-06, "loss": 1.6542, "step": 85263 }, { "epoch": 2.84, "grad_norm": 0.7201067209243774, "learning_rate": 4.373521807990554e-06, "loss": 1.6325, "step": 85264 }, { "epoch": 2.84, "grad_norm": 0.7340890169143677, "learning_rate": 4.371743766832081e-06, "loss": 1.6536, "step": 85265 }, { "epoch": 2.84, "grad_norm": 0.746159553527832, "learning_rate": 4.369966084522791e-06, "loss": 1.7488, "step": 85266 }, { "epoch": 2.84, "grad_norm": 0.7281225323677063, "learning_rate": 4.368188761064751e-06, "loss": 1.6697, "step": 85267 }, { "epoch": 2.84, "grad_norm": 0.6875993609428406, "learning_rate": 4.36641179646019e-06, "loss": 1.7222, "step": 85268 }, { "epoch": 2.84, "grad_norm": 0.6930356621742249, "learning_rate": 4.3646351907112405e-06, "loss": 1.714, "step": 85269 }, { "epoch": 2.84, "grad_norm": 0.7291475534439087, "learning_rate": 4.362858943820069e-06, "loss": 1.6822, "step": 85270 }, { "epoch": 2.84, "grad_norm": 0.7301099300384521, "learning_rate": 4.361083055788839e-06, "loss": 1.6433, "step": 85271 }, { "epoch": 2.84, "grad_norm": 0.7360248565673828, "learning_rate": 4.359307526619649e-06, "loss": 1.7104, "step": 85272 }, { "epoch": 2.84, "grad_norm": 0.7279203534126282, "learning_rate": 4.357532356314764e-06, "loss": 1.6035, "step": 85273 }, { "epoch": 2.84, "grad_norm": 0.7041947245597839, "learning_rate": 4.355757544876248e-06, "loss": 1.7197, "step": 85274 }, { "epoch": 2.84, "grad_norm": 0.6982681751251221, "learning_rate": 4.353983092306268e-06, "loss": 1.6612, "step": 85275 }, { "epoch": 2.84, "grad_norm": 0.7190449237823486, "learning_rate": 4.3522089986069875e-06, "loss": 1.6945, "step": 85276 }, { "epoch": 2.84, "grad_norm": 0.7099564671516418, "learning_rate": 4.350435263780605e-06, "loss": 1.5773, "step": 85277 }, { "epoch": 2.84, "grad_norm": 0.7123045921325684, "learning_rate": 4.348661887829186e-06, "loss": 1.641, "step": 85278 }, { "epoch": 2.84, "grad_norm": 0.7062070369720459, "learning_rate": 4.346888870754928e-06, "loss": 1.5797, "step": 85279 }, { "epoch": 2.84, "grad_norm": 0.699790358543396, "learning_rate": 4.3451162125600294e-06, "loss": 1.5795, "step": 85280 }, { "epoch": 2.84, "grad_norm": 0.7286095023155212, "learning_rate": 4.343343913246555e-06, "loss": 1.6919, "step": 85281 }, { "epoch": 2.84, "grad_norm": 0.6827612519264221, "learning_rate": 4.3415719728167046e-06, "loss": 1.6296, "step": 85282 }, { "epoch": 2.84, "grad_norm": 0.6972943544387817, "learning_rate": 4.339800391272641e-06, "loss": 1.683, "step": 85283 }, { "epoch": 2.84, "grad_norm": 0.7301568388938904, "learning_rate": 4.3380291686164966e-06, "loss": 1.6897, "step": 85284 }, { "epoch": 2.84, "grad_norm": 0.7238737940788269, "learning_rate": 4.336258304850404e-06, "loss": 1.7138, "step": 85285 }, { "epoch": 2.84, "grad_norm": 0.7104557156562805, "learning_rate": 4.3344877999765604e-06, "loss": 1.6921, "step": 85286 }, { "epoch": 2.84, "grad_norm": 0.7057027816772461, "learning_rate": 4.332717653997064e-06, "loss": 1.6608, "step": 85287 }, { "epoch": 2.84, "grad_norm": 0.7397149801254272, "learning_rate": 4.330947866914048e-06, "loss": 1.7856, "step": 85288 }, { "epoch": 2.84, "grad_norm": 0.7023425102233887, "learning_rate": 4.329178438729741e-06, "loss": 1.7119, "step": 85289 }, { "epoch": 2.84, "grad_norm": 0.7435614466667175, "learning_rate": 4.327409369446244e-06, "loss": 1.6958, "step": 85290 }, { "epoch": 2.84, "grad_norm": 0.7141907215118408, "learning_rate": 4.325640659065688e-06, "loss": 1.7256, "step": 85291 }, { "epoch": 2.84, "grad_norm": 0.7377022504806519, "learning_rate": 4.323872307590237e-06, "loss": 1.6448, "step": 85292 }, { "epoch": 2.84, "grad_norm": 0.7178150415420532, "learning_rate": 4.322104315022057e-06, "loss": 1.6615, "step": 85293 }, { "epoch": 2.84, "grad_norm": 0.7245741486549377, "learning_rate": 4.320336681363279e-06, "loss": 1.7012, "step": 85294 }, { "epoch": 2.84, "grad_norm": 0.7138855457305908, "learning_rate": 4.318569406616035e-06, "loss": 1.6326, "step": 85295 }, { "epoch": 2.84, "grad_norm": 0.6999387145042419, "learning_rate": 4.316802490782489e-06, "loss": 1.6945, "step": 85296 }, { "epoch": 2.84, "grad_norm": 0.6905850172042847, "learning_rate": 4.315035933864741e-06, "loss": 1.6909, "step": 85297 }, { "epoch": 2.84, "grad_norm": 0.7229224443435669, "learning_rate": 4.313269735865021e-06, "loss": 1.6759, "step": 85298 }, { "epoch": 2.84, "grad_norm": 0.7220214009284973, "learning_rate": 4.311503896785395e-06, "loss": 1.7637, "step": 85299 }, { "epoch": 2.84, "grad_norm": 0.7049511075019836, "learning_rate": 4.309738416628061e-06, "loss": 1.6516, "step": 85300 }, { "epoch": 2.84, "grad_norm": 0.7171815037727356, "learning_rate": 4.3079732953951175e-06, "loss": 1.6733, "step": 85301 }, { "epoch": 2.84, "grad_norm": 0.714316189289093, "learning_rate": 4.306208533088728e-06, "loss": 1.6385, "step": 85302 }, { "epoch": 2.84, "grad_norm": 0.7057321071624756, "learning_rate": 4.30444412971106e-06, "loss": 1.6498, "step": 85303 }, { "epoch": 2.84, "grad_norm": 0.7165088653564453, "learning_rate": 4.302680085264176e-06, "loss": 1.6244, "step": 85304 }, { "epoch": 2.84, "grad_norm": 0.7180933356285095, "learning_rate": 4.300916399750309e-06, "loss": 1.5831, "step": 85305 }, { "epoch": 2.84, "grad_norm": 0.7165421843528748, "learning_rate": 4.299153073171557e-06, "loss": 1.6303, "step": 85306 }, { "epoch": 2.84, "grad_norm": 0.7170317769050598, "learning_rate": 4.297390105530052e-06, "loss": 1.6825, "step": 85307 }, { "epoch": 2.84, "grad_norm": 0.7224535346031189, "learning_rate": 4.295627496827958e-06, "loss": 1.708, "step": 85308 }, { "epoch": 2.84, "grad_norm": 0.7183341383934021, "learning_rate": 4.293865247067374e-06, "loss": 1.6769, "step": 85309 }, { "epoch": 2.84, "grad_norm": 0.700553297996521, "learning_rate": 4.292103356250498e-06, "loss": 1.7305, "step": 85310 }, { "epoch": 2.84, "grad_norm": 0.7258979678153992, "learning_rate": 4.290341824379395e-06, "loss": 1.6544, "step": 85311 }, { "epoch": 2.84, "grad_norm": 0.7141077518463135, "learning_rate": 4.288580651456297e-06, "loss": 1.6618, "step": 85312 }, { "epoch": 2.84, "grad_norm": 0.7040296196937561, "learning_rate": 4.286819837483269e-06, "loss": 1.7007, "step": 85313 }, { "epoch": 2.84, "grad_norm": 0.7055218815803528, "learning_rate": 4.285059382462475e-06, "loss": 1.7337, "step": 85314 }, { "epoch": 2.84, "grad_norm": 0.7165026068687439, "learning_rate": 4.283299286396047e-06, "loss": 1.6589, "step": 85315 }, { "epoch": 2.84, "grad_norm": 0.7105382084846497, "learning_rate": 4.28153954928615e-06, "loss": 1.6899, "step": 85316 }, { "epoch": 2.84, "grad_norm": 0.7533925175666809, "learning_rate": 4.2797801711348504e-06, "loss": 1.6707, "step": 85317 }, { "epoch": 2.84, "grad_norm": 0.7031950354576111, "learning_rate": 4.278021151944311e-06, "loss": 1.6975, "step": 85318 }, { "epoch": 2.84, "grad_norm": 0.7316505312919617, "learning_rate": 4.276262491716698e-06, "loss": 1.7078, "step": 85319 }, { "epoch": 2.84, "grad_norm": 0.690851628780365, "learning_rate": 4.274504190454175e-06, "loss": 1.6996, "step": 85320 }, { "epoch": 2.84, "grad_norm": 0.7180716395378113, "learning_rate": 4.272746248158776e-06, "loss": 1.7379, "step": 85321 }, { "epoch": 2.84, "grad_norm": 0.6898835301399231, "learning_rate": 4.2709886648326976e-06, "loss": 1.6811, "step": 85322 }, { "epoch": 2.84, "grad_norm": 0.7104926109313965, "learning_rate": 4.269231440478105e-06, "loss": 1.6326, "step": 85323 }, { "epoch": 2.84, "grad_norm": 0.7239654660224915, "learning_rate": 4.267474575097029e-06, "loss": 1.5837, "step": 85324 }, { "epoch": 2.84, "grad_norm": 0.7171204686164856, "learning_rate": 4.265718068691704e-06, "loss": 1.6418, "step": 85325 }, { "epoch": 2.84, "grad_norm": 0.7063620090484619, "learning_rate": 4.263961921264225e-06, "loss": 1.697, "step": 85326 }, { "epoch": 2.84, "grad_norm": 0.7271502017974854, "learning_rate": 4.2622061328167255e-06, "loss": 1.7251, "step": 85327 }, { "epoch": 2.84, "grad_norm": 0.7313531041145325, "learning_rate": 4.260450703351303e-06, "loss": 1.6383, "step": 85328 }, { "epoch": 2.84, "grad_norm": 0.7047405242919922, "learning_rate": 4.258695632870124e-06, "loss": 1.6336, "step": 85329 }, { "epoch": 2.84, "grad_norm": 0.7442591786384583, "learning_rate": 4.256940921375351e-06, "loss": 1.6561, "step": 85330 }, { "epoch": 2.84, "grad_norm": 0.7245948314666748, "learning_rate": 4.2551865688690515e-06, "loss": 1.6828, "step": 85331 }, { "epoch": 2.84, "grad_norm": 0.7141144275665283, "learning_rate": 4.253432575353355e-06, "loss": 1.606, "step": 85332 }, { "epoch": 2.84, "grad_norm": 0.7242233157157898, "learning_rate": 4.2516789408304605e-06, "loss": 1.6524, "step": 85333 }, { "epoch": 2.84, "grad_norm": 0.7184818983078003, "learning_rate": 4.249925665302434e-06, "loss": 1.6835, "step": 85334 }, { "epoch": 2.84, "grad_norm": 0.7200132012367249, "learning_rate": 4.248172748771406e-06, "loss": 1.7303, "step": 85335 }, { "epoch": 2.84, "grad_norm": 0.736296534538269, "learning_rate": 4.246420191239508e-06, "loss": 1.6946, "step": 85336 }, { "epoch": 2.84, "grad_norm": 0.6961928606033325, "learning_rate": 4.244667992708939e-06, "loss": 1.6863, "step": 85337 }, { "epoch": 2.84, "grad_norm": 0.7159520983695984, "learning_rate": 4.242916153181697e-06, "loss": 1.7011, "step": 85338 }, { "epoch": 2.84, "grad_norm": 0.7333495616912842, "learning_rate": 4.241164672660013e-06, "loss": 1.6281, "step": 85339 }, { "epoch": 2.84, "grad_norm": 0.6933469176292419, "learning_rate": 4.239413551145987e-06, "loss": 1.6632, "step": 85340 }, { "epoch": 2.84, "grad_norm": 0.7241949439048767, "learning_rate": 4.237662788641716e-06, "loss": 1.6637, "step": 85341 }, { "epoch": 2.84, "grad_norm": 0.7064530849456787, "learning_rate": 4.235912385149332e-06, "loss": 1.763, "step": 85342 }, { "epoch": 2.84, "grad_norm": 0.7037320137023926, "learning_rate": 4.234162340670999e-06, "loss": 1.6167, "step": 85343 }, { "epoch": 2.84, "grad_norm": 0.6978058218955994, "learning_rate": 4.232412655208817e-06, "loss": 1.623, "step": 85344 }, { "epoch": 2.84, "grad_norm": 0.7251667976379395, "learning_rate": 4.2306633287648826e-06, "loss": 1.7186, "step": 85345 }, { "epoch": 2.84, "grad_norm": 0.7136713862419128, "learning_rate": 4.228914361341329e-06, "loss": 1.6411, "step": 85346 }, { "epoch": 2.84, "grad_norm": 0.6980944275856018, "learning_rate": 4.22716575294032e-06, "loss": 1.6384, "step": 85347 }, { "epoch": 2.84, "grad_norm": 0.7137028574943542, "learning_rate": 4.225417503563954e-06, "loss": 1.7329, "step": 85348 }, { "epoch": 2.84, "grad_norm": 0.7209651470184326, "learning_rate": 4.223669613214331e-06, "loss": 1.687, "step": 85349 }, { "epoch": 2.84, "grad_norm": 0.7457283735275269, "learning_rate": 4.221922081893614e-06, "loss": 1.687, "step": 85350 }, { "epoch": 2.84, "grad_norm": 0.7238229513168335, "learning_rate": 4.220174909603902e-06, "loss": 1.6577, "step": 85351 }, { "epoch": 2.84, "grad_norm": 0.7080745697021484, "learning_rate": 4.218428096347292e-06, "loss": 1.6652, "step": 85352 }, { "epoch": 2.84, "grad_norm": 0.7198505997657776, "learning_rate": 4.216681642125985e-06, "loss": 1.6476, "step": 85353 }, { "epoch": 2.84, "grad_norm": 0.7015817761421204, "learning_rate": 4.214935546941978e-06, "loss": 1.6068, "step": 85354 }, { "epoch": 2.84, "grad_norm": 0.7326024174690247, "learning_rate": 4.213189810797502e-06, "loss": 1.6944, "step": 85355 }, { "epoch": 2.84, "grad_norm": 0.722436785697937, "learning_rate": 4.211444433694589e-06, "loss": 1.6818, "step": 85356 }, { "epoch": 2.84, "grad_norm": 0.7148340344429016, "learning_rate": 4.209699415635437e-06, "loss": 1.6616, "step": 85357 }, { "epoch": 2.84, "grad_norm": 0.7041400074958801, "learning_rate": 4.207954756622145e-06, "loss": 1.5794, "step": 85358 }, { "epoch": 2.84, "grad_norm": 0.7047148942947388, "learning_rate": 4.206210456656745e-06, "loss": 1.6714, "step": 85359 }, { "epoch": 2.84, "grad_norm": 0.6935598850250244, "learning_rate": 4.204466515741467e-06, "loss": 1.6637, "step": 85360 }, { "epoch": 2.84, "grad_norm": 0.709804356098175, "learning_rate": 4.202722933878377e-06, "loss": 1.6798, "step": 85361 }, { "epoch": 2.84, "grad_norm": 0.7075809240341187, "learning_rate": 4.200979711069607e-06, "loss": 1.6387, "step": 85362 }, { "epoch": 2.84, "grad_norm": 0.7148510813713074, "learning_rate": 4.199236847317255e-06, "loss": 1.6618, "step": 85363 }, { "epoch": 2.84, "grad_norm": 0.7274636626243591, "learning_rate": 4.197494342623453e-06, "loss": 1.7313, "step": 85364 }, { "epoch": 2.84, "grad_norm": 0.712925136089325, "learning_rate": 4.195752196990299e-06, "loss": 1.6076, "step": 85365 }, { "epoch": 2.84, "grad_norm": 0.7164525985717773, "learning_rate": 4.194010410419924e-06, "loss": 1.6924, "step": 85366 }, { "epoch": 2.84, "grad_norm": 0.7090144157409668, "learning_rate": 4.192268982914426e-06, "loss": 1.6341, "step": 85367 }, { "epoch": 2.84, "grad_norm": 2.401805877685547, "learning_rate": 4.190527914475905e-06, "loss": 1.7155, "step": 85368 }, { "epoch": 2.84, "grad_norm": 0.7092202305793762, "learning_rate": 4.188787205106558e-06, "loss": 1.7248, "step": 85369 }, { "epoch": 2.84, "grad_norm": 0.689290463924408, "learning_rate": 4.187046854808418e-06, "loss": 1.6708, "step": 85370 }, { "epoch": 2.84, "grad_norm": 0.7168005108833313, "learning_rate": 4.1853068635835816e-06, "loss": 1.724, "step": 85371 }, { "epoch": 2.84, "grad_norm": 0.7237147688865662, "learning_rate": 4.183567231434215e-06, "loss": 1.6354, "step": 85372 }, { "epoch": 2.84, "grad_norm": 0.7130434513092041, "learning_rate": 4.181827958362449e-06, "loss": 1.7042, "step": 85373 }, { "epoch": 2.84, "grad_norm": 0.7167375087738037, "learning_rate": 4.180089044370283e-06, "loss": 1.6992, "step": 85374 }, { "epoch": 2.84, "grad_norm": 0.7154916524887085, "learning_rate": 4.178350489459947e-06, "loss": 1.5995, "step": 85375 }, { "epoch": 2.84, "grad_norm": 0.7119356989860535, "learning_rate": 4.176612293633508e-06, "loss": 1.6835, "step": 85376 }, { "epoch": 2.84, "grad_norm": 0.728720486164093, "learning_rate": 4.1748744568930625e-06, "loss": 1.7239, "step": 85377 }, { "epoch": 2.84, "grad_norm": 0.7035266160964966, "learning_rate": 4.173136979240743e-06, "loss": 1.7117, "step": 85378 }, { "epoch": 2.84, "grad_norm": 0.7454506754875183, "learning_rate": 4.171399860678648e-06, "loss": 1.71, "step": 85379 }, { "epoch": 2.84, "grad_norm": 0.70481938123703, "learning_rate": 4.169663101208876e-06, "loss": 1.7104, "step": 85380 }, { "epoch": 2.84, "grad_norm": 0.7212980389595032, "learning_rate": 4.167926700833557e-06, "loss": 1.6384, "step": 85381 }, { "epoch": 2.84, "grad_norm": 0.7198649048805237, "learning_rate": 4.166190659554758e-06, "loss": 1.6923, "step": 85382 }, { "epoch": 2.84, "grad_norm": 0.7342529296875, "learning_rate": 4.164454977374643e-06, "loss": 1.6637, "step": 85383 }, { "epoch": 2.84, "grad_norm": 0.6857544183731079, "learning_rate": 4.162719654295277e-06, "loss": 1.6243, "step": 85384 }, { "epoch": 2.84, "grad_norm": 0.7011622190475464, "learning_rate": 4.160984690318759e-06, "loss": 1.6758, "step": 85385 }, { "epoch": 2.84, "grad_norm": 0.7101407051086426, "learning_rate": 4.159250085447252e-06, "loss": 1.7063, "step": 85386 }, { "epoch": 2.84, "grad_norm": 0.7196353077888489, "learning_rate": 4.157515839682824e-06, "loss": 1.6699, "step": 85387 }, { "epoch": 2.84, "grad_norm": 0.6850074529647827, "learning_rate": 4.15578195302757e-06, "loss": 1.7342, "step": 85388 }, { "epoch": 2.84, "grad_norm": 0.7497273683547974, "learning_rate": 4.154048425483592e-06, "loss": 1.6952, "step": 85389 }, { "epoch": 2.84, "grad_norm": 0.7290159463882446, "learning_rate": 4.152315257053051e-06, "loss": 1.7599, "step": 85390 }, { "epoch": 2.84, "grad_norm": 0.7186112403869629, "learning_rate": 4.150582447737949e-06, "loss": 1.705, "step": 85391 }, { "epoch": 2.84, "grad_norm": 0.7178022861480713, "learning_rate": 4.148849997540482e-06, "loss": 1.632, "step": 85392 }, { "epoch": 2.84, "grad_norm": 0.7112796306610107, "learning_rate": 4.1471179064627155e-06, "loss": 1.6658, "step": 85393 }, { "epoch": 2.84, "grad_norm": 0.7545676231384277, "learning_rate": 4.145386174506782e-06, "loss": 1.6887, "step": 85394 }, { "epoch": 2.84, "grad_norm": 0.7310532331466675, "learning_rate": 4.143654801674711e-06, "loss": 1.7121, "step": 85395 }, { "epoch": 2.84, "grad_norm": 0.7206052541732788, "learning_rate": 4.14192378796867e-06, "loss": 1.7277, "step": 85396 }, { "epoch": 2.84, "grad_norm": 0.7030428647994995, "learning_rate": 4.140193133390757e-06, "loss": 1.6497, "step": 85397 }, { "epoch": 2.84, "grad_norm": 0.7445310354232788, "learning_rate": 4.138462837943035e-06, "loss": 1.6472, "step": 85398 }, { "epoch": 2.84, "grad_norm": 0.6960429549217224, "learning_rate": 4.136732901627637e-06, "loss": 1.6199, "step": 85399 }, { "epoch": 2.84, "grad_norm": 0.7213122248649597, "learning_rate": 4.135003324446629e-06, "loss": 1.709, "step": 85400 }, { "epoch": 2.84, "grad_norm": 0.7103843688964844, "learning_rate": 4.1332741064021735e-06, "loss": 1.6954, "step": 85401 }, { "epoch": 2.84, "grad_norm": 0.7591196298599243, "learning_rate": 4.131545247496304e-06, "loss": 1.6888, "step": 85402 }, { "epoch": 2.84, "grad_norm": 0.7310637831687927, "learning_rate": 4.129816747731152e-06, "loss": 1.701, "step": 85403 }, { "epoch": 2.84, "grad_norm": 0.7006551027297974, "learning_rate": 4.128088607108815e-06, "loss": 1.6944, "step": 85404 }, { "epoch": 2.84, "grad_norm": 0.7041528224945068, "learning_rate": 4.1263608256313585e-06, "loss": 1.6303, "step": 85405 }, { "epoch": 2.84, "grad_norm": 0.71909499168396, "learning_rate": 4.124633403300914e-06, "loss": 1.6911, "step": 85406 }, { "epoch": 2.84, "grad_norm": 0.7026910781860352, "learning_rate": 4.12290634011958e-06, "loss": 1.5784, "step": 85407 }, { "epoch": 2.84, "grad_norm": 0.7088595032691956, "learning_rate": 4.121179636089456e-06, "loss": 1.6464, "step": 85408 }, { "epoch": 2.84, "grad_norm": 0.702298641204834, "learning_rate": 4.119453291212571e-06, "loss": 1.7027, "step": 85409 }, { "epoch": 2.84, "grad_norm": 0.716122031211853, "learning_rate": 4.117727305491126e-06, "loss": 1.7425, "step": 85410 }, { "epoch": 2.84, "grad_norm": 0.732516348361969, "learning_rate": 4.11600167892715e-06, "loss": 1.7376, "step": 85411 }, { "epoch": 2.84, "grad_norm": 0.7124748229980469, "learning_rate": 4.114276411522744e-06, "loss": 1.6131, "step": 85412 }, { "epoch": 2.84, "grad_norm": 0.6983824968338013, "learning_rate": 4.112551503280037e-06, "loss": 1.6524, "step": 85413 }, { "epoch": 2.84, "grad_norm": 0.7023789286613464, "learning_rate": 4.1108269542010634e-06, "loss": 1.6459, "step": 85414 }, { "epoch": 2.84, "grad_norm": 0.7114470601081848, "learning_rate": 4.109102764287953e-06, "loss": 1.6496, "step": 85415 }, { "epoch": 2.84, "grad_norm": 0.7185537815093994, "learning_rate": 4.107378933542771e-06, "loss": 1.6437, "step": 85416 }, { "epoch": 2.84, "grad_norm": 0.7083699107170105, "learning_rate": 4.105655461967683e-06, "loss": 1.7149, "step": 85417 }, { "epoch": 2.84, "grad_norm": 0.6923017501831055, "learning_rate": 4.10393234956472e-06, "loss": 1.7262, "step": 85418 }, { "epoch": 2.84, "grad_norm": 0.7092944383621216, "learning_rate": 4.102209596335948e-06, "loss": 1.6253, "step": 85419 }, { "epoch": 2.84, "grad_norm": 0.7151747941970825, "learning_rate": 4.1004872022835315e-06, "loss": 1.6775, "step": 85420 }, { "epoch": 2.84, "grad_norm": 0.7445584535598755, "learning_rate": 4.0987651674095015e-06, "loss": 1.7124, "step": 85421 }, { "epoch": 2.84, "grad_norm": 0.7163399457931519, "learning_rate": 4.0970434917160234e-06, "loss": 1.6702, "step": 85422 }, { "epoch": 2.84, "grad_norm": 0.7089292407035828, "learning_rate": 4.0953221752050955e-06, "loss": 1.6916, "step": 85423 }, { "epoch": 2.84, "grad_norm": 0.7124245762825012, "learning_rate": 4.093601217878817e-06, "loss": 1.6713, "step": 85424 }, { "epoch": 2.84, "grad_norm": 0.699720025062561, "learning_rate": 4.091880619739352e-06, "loss": 1.6519, "step": 85425 }, { "epoch": 2.84, "grad_norm": 0.6927521824836731, "learning_rate": 4.090160380788732e-06, "loss": 1.6523, "step": 85426 }, { "epoch": 2.84, "grad_norm": 0.7137513756752014, "learning_rate": 4.088440501029056e-06, "loss": 1.6508, "step": 85427 }, { "epoch": 2.84, "grad_norm": 0.7207457423210144, "learning_rate": 4.086720980462388e-06, "loss": 1.6605, "step": 85428 }, { "epoch": 2.84, "grad_norm": 0.7064084410667419, "learning_rate": 4.085001819090894e-06, "loss": 1.6855, "step": 85429 }, { "epoch": 2.84, "grad_norm": 0.7091094851493835, "learning_rate": 4.083283016916572e-06, "loss": 1.7013, "step": 85430 }, { "epoch": 2.84, "grad_norm": 0.7150405049324036, "learning_rate": 4.0815645739415535e-06, "loss": 1.6282, "step": 85431 }, { "epoch": 2.84, "grad_norm": 0.7029138207435608, "learning_rate": 4.079846490167871e-06, "loss": 1.6164, "step": 85432 }, { "epoch": 2.84, "grad_norm": 0.7268930077552795, "learning_rate": 4.0781287655977215e-06, "loss": 1.7488, "step": 85433 }, { "epoch": 2.84, "grad_norm": 0.7336599230766296, "learning_rate": 4.076411400233104e-06, "loss": 1.6958, "step": 85434 }, { "epoch": 2.84, "grad_norm": 0.7235193848609924, "learning_rate": 4.074694394076084e-06, "loss": 1.622, "step": 85435 }, { "epoch": 2.84, "grad_norm": 0.6982839107513428, "learning_rate": 4.072977747128825e-06, "loss": 1.6903, "step": 85436 }, { "epoch": 2.84, "grad_norm": 0.7176514267921448, "learning_rate": 4.07126145939336e-06, "loss": 1.7579, "step": 85437 }, { "epoch": 2.84, "grad_norm": 0.7830415368080139, "learning_rate": 4.069545530871754e-06, "loss": 1.6995, "step": 85438 }, { "epoch": 2.84, "grad_norm": 0.6959712505340576, "learning_rate": 4.067829961566105e-06, "loss": 1.7565, "step": 85439 }, { "epoch": 2.84, "grad_norm": 0.7154224514961243, "learning_rate": 4.066114751478578e-06, "loss": 1.7032, "step": 85440 }, { "epoch": 2.84, "grad_norm": 0.6885727643966675, "learning_rate": 4.064399900611104e-06, "loss": 1.6616, "step": 85441 }, { "epoch": 2.84, "grad_norm": 0.7085163593292236, "learning_rate": 4.062685408965882e-06, "loss": 1.5963, "step": 85442 }, { "epoch": 2.84, "grad_norm": 0.6985262036323547, "learning_rate": 4.060971276544944e-06, "loss": 1.6242, "step": 85443 }, { "epoch": 2.84, "grad_norm": 0.7004123330116272, "learning_rate": 4.059257503350422e-06, "loss": 1.6562, "step": 85444 }, { "epoch": 2.84, "grad_norm": 0.6869428157806396, "learning_rate": 4.057544089384312e-06, "loss": 1.6595, "step": 85445 }, { "epoch": 2.84, "grad_norm": 1.370970368385315, "learning_rate": 4.055831034648749e-06, "loss": 1.7214, "step": 85446 }, { "epoch": 2.84, "grad_norm": 0.7155375480651855, "learning_rate": 4.054118339145829e-06, "loss": 1.6365, "step": 85447 }, { "epoch": 2.84, "grad_norm": 0.7386050820350647, "learning_rate": 4.052406002877551e-06, "loss": 1.7634, "step": 85448 }, { "epoch": 2.84, "grad_norm": 0.7122869491577148, "learning_rate": 4.05069402584608e-06, "loss": 1.6923, "step": 85449 }, { "epoch": 2.84, "grad_norm": 0.7121013402938843, "learning_rate": 4.048982408053447e-06, "loss": 1.6701, "step": 85450 }, { "epoch": 2.84, "grad_norm": 0.725667417049408, "learning_rate": 4.047271149501785e-06, "loss": 1.6691, "step": 85451 }, { "epoch": 2.84, "grad_norm": 0.720439612865448, "learning_rate": 4.045560250193059e-06, "loss": 1.6899, "step": 85452 }, { "epoch": 2.84, "grad_norm": 0.7125551104545593, "learning_rate": 4.043849710129465e-06, "loss": 1.6905, "step": 85453 }, { "epoch": 2.84, "grad_norm": 0.707452118396759, "learning_rate": 4.0421395293130375e-06, "loss": 1.6594, "step": 85454 }, { "epoch": 2.84, "grad_norm": 0.7699207663536072, "learning_rate": 4.040429707745807e-06, "loss": 1.6758, "step": 85455 }, { "epoch": 2.84, "grad_norm": 0.6859758496284485, "learning_rate": 4.038720245429905e-06, "loss": 1.7089, "step": 85456 }, { "epoch": 2.84, "grad_norm": 0.7236328721046448, "learning_rate": 4.037011142367397e-06, "loss": 1.6476, "step": 85457 }, { "epoch": 2.84, "grad_norm": 0.7073836922645569, "learning_rate": 4.035302398560348e-06, "loss": 1.6149, "step": 85458 }, { "epoch": 2.84, "grad_norm": 0.7115163803100586, "learning_rate": 4.033594014010821e-06, "loss": 1.736, "step": 85459 }, { "epoch": 2.84, "grad_norm": 0.706188976764679, "learning_rate": 4.031885988720917e-06, "loss": 1.6774, "step": 85460 }, { "epoch": 2.84, "grad_norm": 0.7142396569252014, "learning_rate": 4.030178322692701e-06, "loss": 1.6915, "step": 85461 }, { "epoch": 2.84, "grad_norm": 0.7098792195320129, "learning_rate": 4.028471015928203e-06, "loss": 1.6889, "step": 85462 }, { "epoch": 2.84, "grad_norm": 0.7028581500053406, "learning_rate": 4.026764068429556e-06, "loss": 1.6559, "step": 85463 }, { "epoch": 2.84, "grad_norm": 0.6938060522079468, "learning_rate": 4.025057480198824e-06, "loss": 1.6088, "step": 85464 }, { "epoch": 2.84, "grad_norm": 0.7179743051528931, "learning_rate": 4.023351251238038e-06, "loss": 1.6148, "step": 85465 }, { "epoch": 2.84, "grad_norm": 0.7241809964179993, "learning_rate": 4.021645381549299e-06, "loss": 1.7297, "step": 85466 }, { "epoch": 2.84, "grad_norm": 0.7308387160301208, "learning_rate": 4.0199398711346695e-06, "loss": 1.6833, "step": 85467 }, { "epoch": 2.84, "grad_norm": 0.7194288969039917, "learning_rate": 4.01823471999625e-06, "loss": 1.7294, "step": 85468 }, { "epoch": 2.84, "grad_norm": 0.7178738713264465, "learning_rate": 4.016529928136037e-06, "loss": 1.5877, "step": 85469 }, { "epoch": 2.84, "grad_norm": 0.7112857699394226, "learning_rate": 4.014825495556162e-06, "loss": 1.7076, "step": 85470 }, { "epoch": 2.84, "grad_norm": 0.7039231061935425, "learning_rate": 4.013121422258691e-06, "loss": 1.6859, "step": 85471 }, { "epoch": 2.84, "grad_norm": 0.725423276424408, "learning_rate": 4.011417708245657e-06, "loss": 1.728, "step": 85472 }, { "epoch": 2.84, "grad_norm": 0.7092841267585754, "learning_rate": 4.009714353519156e-06, "loss": 1.6206, "step": 85473 }, { "epoch": 2.84, "grad_norm": 0.6931368112564087, "learning_rate": 4.008011358081287e-06, "loss": 1.6349, "step": 85474 }, { "epoch": 2.84, "grad_norm": 0.6985228657722473, "learning_rate": 4.0063087219340485e-06, "loss": 1.71, "step": 85475 }, { "epoch": 2.84, "grad_norm": 0.7225198149681091, "learning_rate": 4.004606445079539e-06, "loss": 1.7368, "step": 85476 }, { "epoch": 2.84, "grad_norm": 0.7219876646995544, "learning_rate": 4.0029045275198235e-06, "loss": 1.7887, "step": 85477 }, { "epoch": 2.84, "grad_norm": 0.6943843364715576, "learning_rate": 4.001202969256967e-06, "loss": 1.5818, "step": 85478 }, { "epoch": 2.84, "grad_norm": 0.7181384563446045, "learning_rate": 3.999501770293034e-06, "loss": 1.7324, "step": 85479 }, { "epoch": 2.84, "grad_norm": 0.6987687349319458, "learning_rate": 3.997800930630124e-06, "loss": 1.674, "step": 85480 }, { "epoch": 2.84, "grad_norm": 0.7258250713348389, "learning_rate": 3.996100450270234e-06, "loss": 1.6909, "step": 85481 }, { "epoch": 2.84, "grad_norm": 0.7066239714622498, "learning_rate": 3.994400329215463e-06, "loss": 1.6684, "step": 85482 }, { "epoch": 2.84, "grad_norm": 0.6969390511512756, "learning_rate": 3.992700567467877e-06, "loss": 1.646, "step": 85483 }, { "epoch": 2.84, "grad_norm": 0.7425370812416077, "learning_rate": 3.991001165029573e-06, "loss": 1.6796, "step": 85484 }, { "epoch": 2.84, "grad_norm": 0.7417460083961487, "learning_rate": 3.989302121902549e-06, "loss": 1.6271, "step": 85485 }, { "epoch": 2.84, "grad_norm": 0.7245123982429504, "learning_rate": 3.987603438088904e-06, "loss": 1.6935, "step": 85486 }, { "epoch": 2.84, "grad_norm": 0.7276366353034973, "learning_rate": 3.9859051135907035e-06, "loss": 1.7028, "step": 85487 }, { "epoch": 2.84, "grad_norm": 0.7159578800201416, "learning_rate": 3.984207148409979e-06, "loss": 1.6798, "step": 85488 }, { "epoch": 2.84, "grad_norm": 0.7272099852561951, "learning_rate": 3.982509542548795e-06, "loss": 1.6403, "step": 85489 }, { "epoch": 2.84, "grad_norm": 0.7251699566841125, "learning_rate": 3.980812296009283e-06, "loss": 1.6773, "step": 85490 }, { "epoch": 2.84, "grad_norm": 0.7213993072509766, "learning_rate": 3.9791154087934096e-06, "loss": 1.6503, "step": 85491 }, { "epoch": 2.84, "grad_norm": 0.7166209816932678, "learning_rate": 3.977418880903272e-06, "loss": 1.7567, "step": 85492 }, { "epoch": 2.84, "grad_norm": 0.7160850763320923, "learning_rate": 3.975722712340934e-06, "loss": 1.6604, "step": 85493 }, { "epoch": 2.84, "grad_norm": 0.7090333700180054, "learning_rate": 3.974026903108463e-06, "loss": 1.6643, "step": 85494 }, { "epoch": 2.84, "grad_norm": 0.7192767858505249, "learning_rate": 3.972331453207889e-06, "loss": 1.6305, "step": 85495 }, { "epoch": 2.84, "grad_norm": 0.6895714998245239, "learning_rate": 3.970636362641311e-06, "loss": 1.6735, "step": 85496 }, { "epoch": 2.84, "grad_norm": 0.7136694192886353, "learning_rate": 3.96894163141076e-06, "loss": 1.6309, "step": 85497 }, { "epoch": 2.84, "grad_norm": 0.7206308245658875, "learning_rate": 3.967247259518269e-06, "loss": 1.6588, "step": 85498 }, { "epoch": 2.84, "grad_norm": 0.7320873141288757, "learning_rate": 3.965553246965936e-06, "loss": 1.6789, "step": 85499 }, { "epoch": 2.84, "grad_norm": 0.7034644484519958, "learning_rate": 3.963859593755825e-06, "loss": 1.7012, "step": 85500 }, { "epoch": 2.84, "grad_norm": 0.7396697998046875, "learning_rate": 3.962166299889935e-06, "loss": 1.6292, "step": 85501 }, { "epoch": 2.84, "grad_norm": 0.7290945053100586, "learning_rate": 3.960473365370365e-06, "loss": 1.6629, "step": 85502 }, { "epoch": 2.84, "grad_norm": 0.7267711758613586, "learning_rate": 3.958780790199178e-06, "loss": 1.6303, "step": 85503 }, { "epoch": 2.84, "grad_norm": 0.7097706198692322, "learning_rate": 3.957088574378409e-06, "loss": 1.6777, "step": 85504 }, { "epoch": 2.84, "grad_norm": 0.705563485622406, "learning_rate": 3.955396717910086e-06, "loss": 1.6762, "step": 85505 }, { "epoch": 2.84, "grad_norm": 0.7185316681861877, "learning_rate": 3.953705220796311e-06, "loss": 1.6923, "step": 85506 }, { "epoch": 2.84, "grad_norm": 0.7322611808776855, "learning_rate": 3.952014083039146e-06, "loss": 1.7036, "step": 85507 }, { "epoch": 2.84, "grad_norm": 0.7272025346755981, "learning_rate": 3.950323304640557e-06, "loss": 1.7164, "step": 85508 }, { "epoch": 2.84, "grad_norm": 0.7060540914535522, "learning_rate": 3.948632885602676e-06, "loss": 1.7102, "step": 85509 }, { "epoch": 2.84, "grad_norm": 0.7025871872901917, "learning_rate": 3.946942825927568e-06, "loss": 1.6436, "step": 85510 }, { "epoch": 2.84, "grad_norm": 0.7318547368049622, "learning_rate": 3.9452531256172315e-06, "loss": 1.6635, "step": 85511 }, { "epoch": 2.84, "grad_norm": 0.732193648815155, "learning_rate": 3.9435637846736976e-06, "loss": 1.6786, "step": 85512 }, { "epoch": 2.85, "grad_norm": 0.7094175219535828, "learning_rate": 3.941874803099099e-06, "loss": 1.6921, "step": 85513 }, { "epoch": 2.85, "grad_norm": 0.7045371532440186, "learning_rate": 3.940186180895433e-06, "loss": 1.6658, "step": 85514 }, { "epoch": 2.85, "grad_norm": 0.7079792618751526, "learning_rate": 3.938497918064765e-06, "loss": 1.6978, "step": 85515 }, { "epoch": 2.85, "grad_norm": 0.7140040397644043, "learning_rate": 3.936810014609093e-06, "loss": 1.6991, "step": 85516 }, { "epoch": 2.85, "grad_norm": 0.695078432559967, "learning_rate": 3.9351224705305495e-06, "loss": 1.6458, "step": 85517 }, { "epoch": 2.85, "grad_norm": 0.7068710923194885, "learning_rate": 3.933435285831166e-06, "loss": 1.6991, "step": 85518 }, { "epoch": 2.85, "grad_norm": 0.7206384539604187, "learning_rate": 3.93174846051294e-06, "loss": 1.7234, "step": 85519 }, { "epoch": 2.85, "grad_norm": 0.7007952332496643, "learning_rate": 3.930061994577938e-06, "loss": 1.694, "step": 85520 }, { "epoch": 2.85, "grad_norm": 0.7089071869850159, "learning_rate": 3.928375888028257e-06, "loss": 1.616, "step": 85521 }, { "epoch": 2.85, "grad_norm": 0.7088674306869507, "learning_rate": 3.926690140865863e-06, "loss": 1.8142, "step": 85522 }, { "epoch": 2.85, "grad_norm": 0.7249894142150879, "learning_rate": 3.925004753092853e-06, "loss": 1.6903, "step": 85523 }, { "epoch": 2.85, "grad_norm": 0.7217085957527161, "learning_rate": 3.9233197247112605e-06, "loss": 1.7113, "step": 85524 }, { "epoch": 2.85, "grad_norm": 0.7403610944747925, "learning_rate": 3.921635055723149e-06, "loss": 1.7111, "step": 85525 }, { "epoch": 2.85, "grad_norm": 0.7149261832237244, "learning_rate": 3.919950746130518e-06, "loss": 1.5726, "step": 85526 }, { "epoch": 2.85, "grad_norm": 0.7397162914276123, "learning_rate": 3.918266795935465e-06, "loss": 1.7653, "step": 85527 }, { "epoch": 2.85, "grad_norm": 0.725570023059845, "learning_rate": 3.916583205140023e-06, "loss": 1.6893, "step": 85528 }, { "epoch": 2.85, "grad_norm": 0.7140948176383972, "learning_rate": 3.914899973746188e-06, "loss": 1.7171, "step": 85529 }, { "epoch": 2.85, "grad_norm": 0.7283084988594055, "learning_rate": 3.913217101756061e-06, "loss": 1.7073, "step": 85530 }, { "epoch": 2.85, "grad_norm": 0.7046284675598145, "learning_rate": 3.911534589171672e-06, "loss": 1.7359, "step": 85531 }, { "epoch": 2.85, "grad_norm": 0.7368215322494507, "learning_rate": 3.90985243599502e-06, "loss": 1.7243, "step": 85532 }, { "epoch": 2.85, "grad_norm": 0.7218031287193298, "learning_rate": 3.9081706422282035e-06, "loss": 1.7153, "step": 85533 }, { "epoch": 2.85, "grad_norm": 0.6994079351425171, "learning_rate": 3.906489207873254e-06, "loss": 1.6615, "step": 85534 }, { "epoch": 2.85, "grad_norm": 0.7042787671089172, "learning_rate": 3.904808132932169e-06, "loss": 1.7174, "step": 85535 }, { "epoch": 2.85, "grad_norm": 0.7074452042579651, "learning_rate": 3.903127417407015e-06, "loss": 1.7515, "step": 85536 }, { "epoch": 2.85, "grad_norm": 0.724873960018158, "learning_rate": 3.9014470612998894e-06, "loss": 1.7287, "step": 85537 }, { "epoch": 2.85, "grad_norm": 0.7122411131858826, "learning_rate": 3.8997670646127244e-06, "loss": 1.6915, "step": 85538 }, { "epoch": 2.85, "grad_norm": 1.0429142713546753, "learning_rate": 3.898087427347618e-06, "loss": 1.7846, "step": 85539 }, { "epoch": 2.85, "grad_norm": 0.7175348997116089, "learning_rate": 3.896408149506636e-06, "loss": 1.6806, "step": 85540 }, { "epoch": 2.85, "grad_norm": 0.7465538382530212, "learning_rate": 3.894729231091775e-06, "loss": 1.7178, "step": 85541 }, { "epoch": 2.85, "grad_norm": 0.7305488586425781, "learning_rate": 3.893050672105069e-06, "loss": 1.6207, "step": 85542 }, { "epoch": 2.85, "grad_norm": 0.7120901346206665, "learning_rate": 3.8913724725485815e-06, "loss": 1.6645, "step": 85543 }, { "epoch": 2.85, "grad_norm": 0.7391465306282043, "learning_rate": 3.889694632424345e-06, "loss": 1.6855, "step": 85544 }, { "epoch": 2.85, "grad_norm": 0.7151649594306946, "learning_rate": 3.888017151734357e-06, "loss": 1.6271, "step": 85545 }, { "epoch": 2.85, "grad_norm": 0.7213641405105591, "learning_rate": 3.886340030480717e-06, "loss": 1.6895, "step": 85546 }, { "epoch": 2.85, "grad_norm": 0.7132819890975952, "learning_rate": 3.884663268665422e-06, "loss": 1.7306, "step": 85547 }, { "epoch": 2.85, "grad_norm": 0.7303305864334106, "learning_rate": 3.882986866290505e-06, "loss": 1.6702, "step": 85548 }, { "epoch": 2.85, "grad_norm": 0.7167704105377197, "learning_rate": 3.88131082335803e-06, "loss": 1.642, "step": 85549 }, { "epoch": 2.85, "grad_norm": 0.689928412437439, "learning_rate": 3.87963513987003e-06, "loss": 1.6957, "step": 85550 }, { "epoch": 2.85, "grad_norm": 0.7253089547157288, "learning_rate": 3.877959815828469e-06, "loss": 1.6856, "step": 85551 }, { "epoch": 2.85, "grad_norm": 0.7152189016342163, "learning_rate": 3.876284851235445e-06, "loss": 1.6711, "step": 85552 }, { "epoch": 2.85, "grad_norm": 0.7031149864196777, "learning_rate": 3.874610246093024e-06, "loss": 1.6296, "step": 85553 }, { "epoch": 2.85, "grad_norm": 0.7053837776184082, "learning_rate": 3.872936000403171e-06, "loss": 1.6537, "step": 85554 }, { "epoch": 2.85, "grad_norm": 0.7275559306144714, "learning_rate": 3.871262114167917e-06, "loss": 1.7048, "step": 85555 }, { "epoch": 2.85, "grad_norm": 0.7207129001617432, "learning_rate": 3.869588587389327e-06, "loss": 1.7818, "step": 85556 }, { "epoch": 2.85, "grad_norm": 0.7057152986526489, "learning_rate": 3.867915420069467e-06, "loss": 1.6693, "step": 85557 }, { "epoch": 2.85, "grad_norm": 0.7184110879898071, "learning_rate": 3.866242612210269e-06, "loss": 1.6952, "step": 85558 }, { "epoch": 2.85, "grad_norm": 0.7137261629104614, "learning_rate": 3.864570163813829e-06, "loss": 1.6636, "step": 85559 }, { "epoch": 2.85, "grad_norm": 0.6994958519935608, "learning_rate": 3.862898074882182e-06, "loss": 1.7222, "step": 85560 }, { "epoch": 2.85, "grad_norm": 0.7152261137962341, "learning_rate": 3.8612263454173566e-06, "loss": 1.7049, "step": 85561 }, { "epoch": 2.85, "grad_norm": 0.7030189633369446, "learning_rate": 3.859554975421352e-06, "loss": 1.6704, "step": 85562 }, { "epoch": 2.85, "grad_norm": 0.7351958155632019, "learning_rate": 3.857883964896202e-06, "loss": 1.6859, "step": 85563 }, { "epoch": 2.85, "grad_norm": 0.7063068747520447, "learning_rate": 3.85621331384397e-06, "loss": 1.6367, "step": 85564 }, { "epoch": 2.85, "grad_norm": 0.7174084782600403, "learning_rate": 3.854543022266654e-06, "loss": 1.7127, "step": 85565 }, { "epoch": 2.85, "grad_norm": 0.7088407874107361, "learning_rate": 3.852873090166253e-06, "loss": 1.6931, "step": 85566 }, { "epoch": 2.85, "grad_norm": 0.7274243831634521, "learning_rate": 3.851203517544865e-06, "loss": 1.6935, "step": 85567 }, { "epoch": 2.85, "grad_norm": 0.7036515474319458, "learning_rate": 3.849534304404489e-06, "loss": 1.6563, "step": 85568 }, { "epoch": 2.85, "grad_norm": 0.725531816482544, "learning_rate": 3.847865450747123e-06, "loss": 1.7458, "step": 85569 }, { "epoch": 2.85, "grad_norm": 0.713769257068634, "learning_rate": 3.846196956574798e-06, "loss": 1.6851, "step": 85570 }, { "epoch": 2.85, "grad_norm": 0.7152313590049744, "learning_rate": 3.844528821889581e-06, "loss": 1.733, "step": 85571 }, { "epoch": 2.85, "grad_norm": 0.7315589189529419, "learning_rate": 3.842861046693468e-06, "loss": 1.716, "step": 85572 }, { "epoch": 2.85, "grad_norm": 0.6960344314575195, "learning_rate": 3.841193630988459e-06, "loss": 1.6944, "step": 85573 }, { "epoch": 2.85, "grad_norm": 0.6928904056549072, "learning_rate": 3.839526574776619e-06, "loss": 1.6207, "step": 85574 }, { "epoch": 2.85, "grad_norm": 0.7541763782501221, "learning_rate": 3.837859878059979e-06, "loss": 1.6605, "step": 85575 }, { "epoch": 2.85, "grad_norm": 0.6975626349449158, "learning_rate": 3.8361935408405045e-06, "loss": 1.6254, "step": 85576 }, { "epoch": 2.85, "grad_norm": 0.7486221790313721, "learning_rate": 3.8345275631202935e-06, "loss": 1.7238, "step": 85577 }, { "epoch": 2.85, "grad_norm": 0.7183699011802673, "learning_rate": 3.832861944901311e-06, "loss": 1.6711, "step": 85578 }, { "epoch": 2.85, "grad_norm": 0.7093244791030884, "learning_rate": 3.831196686185556e-06, "loss": 1.7107, "step": 85579 }, { "epoch": 2.85, "grad_norm": 0.7270461916923523, "learning_rate": 3.829531786975127e-06, "loss": 1.6667, "step": 85580 }, { "epoch": 2.85, "grad_norm": 0.7221649885177612, "learning_rate": 3.827867247272021e-06, "loss": 1.7094, "step": 85581 }, { "epoch": 2.85, "grad_norm": 0.7031689882278442, "learning_rate": 3.826203067078237e-06, "loss": 1.6752, "step": 85582 }, { "epoch": 2.85, "grad_norm": 0.733588695526123, "learning_rate": 3.824539246395774e-06, "loss": 1.6839, "step": 85583 }, { "epoch": 2.85, "grad_norm": 0.6960190534591675, "learning_rate": 3.822875785226698e-06, "loss": 1.6438, "step": 85584 }, { "epoch": 2.85, "grad_norm": 0.7065496444702148, "learning_rate": 3.8212126835730384e-06, "loss": 1.7214, "step": 85585 }, { "epoch": 2.85, "grad_norm": 0.738057017326355, "learning_rate": 3.819549941436728e-06, "loss": 1.6168, "step": 85586 }, { "epoch": 2.85, "grad_norm": 0.7095769643783569, "learning_rate": 3.8178875588198985e-06, "loss": 1.688, "step": 85587 }, { "epoch": 2.85, "grad_norm": 0.712039053440094, "learning_rate": 3.816225535724482e-06, "loss": 1.5908, "step": 85588 }, { "epoch": 2.85, "grad_norm": 0.7158843278884888, "learning_rate": 3.8145638721525427e-06, "loss": 1.6725, "step": 85589 }, { "epoch": 2.85, "grad_norm": 0.7040872573852539, "learning_rate": 3.81290256810608e-06, "loss": 1.6071, "step": 85590 }, { "epoch": 2.85, "grad_norm": 0.7115730047225952, "learning_rate": 3.8112416235871246e-06, "loss": 1.6885, "step": 85591 }, { "epoch": 2.85, "grad_norm": 0.7134527564048767, "learning_rate": 3.8095810385976753e-06, "loss": 1.6921, "step": 85592 }, { "epoch": 2.85, "grad_norm": 0.6896224021911621, "learning_rate": 3.807920813139731e-06, "loss": 1.6401, "step": 85593 }, { "epoch": 2.85, "grad_norm": 0.7089859843254089, "learning_rate": 3.8062609472153227e-06, "loss": 1.7219, "step": 85594 }, { "epoch": 2.85, "grad_norm": 0.7052633762359619, "learning_rate": 3.8046014408264823e-06, "loss": 1.6571, "step": 85595 }, { "epoch": 2.85, "grad_norm": 0.7202720046043396, "learning_rate": 3.802942293975242e-06, "loss": 1.6704, "step": 85596 }, { "epoch": 2.85, "grad_norm": 0.7059216499328613, "learning_rate": 3.801283506663566e-06, "loss": 1.709, "step": 85597 }, { "epoch": 2.85, "grad_norm": 0.7367543578147888, "learning_rate": 3.799625078893487e-06, "loss": 1.6089, "step": 85598 }, { "epoch": 2.85, "grad_norm": 0.709156334400177, "learning_rate": 3.7979670106670358e-06, "loss": 1.593, "step": 85599 }, { "epoch": 2.85, "grad_norm": 0.7322020530700684, "learning_rate": 3.7963093019861777e-06, "loss": 1.6055, "step": 85600 }, { "epoch": 2.85, "grad_norm": 0.7246361970901489, "learning_rate": 3.7946519528529783e-06, "loss": 1.7404, "step": 85601 }, { "epoch": 2.85, "grad_norm": 0.694662868976593, "learning_rate": 3.792994963269402e-06, "loss": 1.6094, "step": 85602 }, { "epoch": 2.85, "grad_norm": 0.7145894765853882, "learning_rate": 3.791338333237515e-06, "loss": 1.6627, "step": 85603 }, { "epoch": 2.85, "grad_norm": 0.7431232333183289, "learning_rate": 3.789682062759314e-06, "loss": 1.7304, "step": 85604 }, { "epoch": 2.85, "grad_norm": 0.7685593962669373, "learning_rate": 3.788026151836765e-06, "loss": 1.6881, "step": 85605 }, { "epoch": 2.85, "grad_norm": 0.6948065757751465, "learning_rate": 3.786370600471933e-06, "loss": 1.6534, "step": 85606 }, { "epoch": 2.85, "grad_norm": 0.7085559964179993, "learning_rate": 3.784715408666783e-06, "loss": 1.5889, "step": 85607 }, { "epoch": 2.85, "grad_norm": 0.7138121724128723, "learning_rate": 3.7830605764233136e-06, "loss": 1.6812, "step": 85608 }, { "epoch": 2.85, "grad_norm": 0.7190598249435425, "learning_rate": 3.7814061037435896e-06, "loss": 1.5911, "step": 85609 }, { "epoch": 2.85, "grad_norm": 0.726906418800354, "learning_rate": 3.7797519906296092e-06, "loss": 1.7108, "step": 85610 }, { "epoch": 2.85, "grad_norm": 1.6751015186309814, "learning_rate": 3.778098237083371e-06, "loss": 1.7439, "step": 85611 }, { "epoch": 2.85, "grad_norm": 0.7178936004638672, "learning_rate": 3.7764448431068406e-06, "loss": 1.6746, "step": 85612 }, { "epoch": 2.85, "grad_norm": 0.7015847563743591, "learning_rate": 3.7747918087020825e-06, "loss": 1.6631, "step": 85613 }, { "epoch": 2.85, "grad_norm": 0.7060803771018982, "learning_rate": 3.773139133871095e-06, "loss": 1.6167, "step": 85614 }, { "epoch": 2.85, "grad_norm": 0.7099664807319641, "learning_rate": 3.77148681861581e-06, "loss": 1.6879, "step": 85615 }, { "epoch": 2.85, "grad_norm": 0.7079488635063171, "learning_rate": 3.7698348629383257e-06, "loss": 1.7477, "step": 85616 }, { "epoch": 2.85, "grad_norm": 0.6931410431861877, "learning_rate": 3.768183266840641e-06, "loss": 1.677, "step": 85617 }, { "epoch": 2.85, "grad_norm": 0.705386757850647, "learning_rate": 3.7665320303247205e-06, "loss": 1.6498, "step": 85618 }, { "epoch": 2.85, "grad_norm": 0.753813624382019, "learning_rate": 3.764881153392563e-06, "loss": 1.6583, "step": 85619 }, { "epoch": 2.85, "grad_norm": 0.7038636803627014, "learning_rate": 3.7632306360461993e-06, "loss": 1.7323, "step": 85620 }, { "epoch": 2.85, "grad_norm": 0.6987013816833496, "learning_rate": 3.761580478287629e-06, "loss": 1.627, "step": 85621 }, { "epoch": 2.85, "grad_norm": 0.7099648118019104, "learning_rate": 3.75993068011885e-06, "loss": 1.673, "step": 85622 }, { "epoch": 2.85, "grad_norm": 0.7153835892677307, "learning_rate": 3.7582812415418606e-06, "loss": 1.6727, "step": 85623 }, { "epoch": 2.85, "grad_norm": 0.7405449748039246, "learning_rate": 3.7566321625586925e-06, "loss": 1.6649, "step": 85624 }, { "epoch": 2.85, "grad_norm": 0.7053982615470886, "learning_rate": 3.754983443171311e-06, "loss": 1.7112, "step": 85625 }, { "epoch": 2.85, "grad_norm": 0.7159618735313416, "learning_rate": 3.753335083381681e-06, "loss": 1.6799, "step": 85626 }, { "epoch": 2.85, "grad_norm": 0.720893383026123, "learning_rate": 3.7516870831919345e-06, "loss": 1.6492, "step": 85627 }, { "epoch": 2.85, "grad_norm": 0.7047932744026184, "learning_rate": 3.750039442603936e-06, "loss": 1.684, "step": 85628 }, { "epoch": 2.85, "grad_norm": 0.7538225054740906, "learning_rate": 3.748392161619751e-06, "loss": 1.6276, "step": 85629 }, { "epoch": 2.85, "grad_norm": 0.7081869840621948, "learning_rate": 3.7467452402413445e-06, "loss": 1.717, "step": 85630 }, { "epoch": 2.85, "grad_norm": 0.7218213081359863, "learning_rate": 3.745098678470815e-06, "loss": 1.7201, "step": 85631 }, { "epoch": 2.85, "grad_norm": 0.7046061158180237, "learning_rate": 3.743452476310027e-06, "loss": 1.6763, "step": 85632 }, { "epoch": 2.85, "grad_norm": 0.7148858308792114, "learning_rate": 3.741806633761013e-06, "loss": 1.6909, "step": 85633 }, { "epoch": 2.85, "grad_norm": 0.7079116106033325, "learning_rate": 3.7401611508258377e-06, "loss": 1.666, "step": 85634 }, { "epoch": 2.85, "grad_norm": 0.7034997940063477, "learning_rate": 3.738516027506466e-06, "loss": 1.6212, "step": 85635 }, { "epoch": 2.85, "grad_norm": 0.7398078441619873, "learning_rate": 3.73687126380483e-06, "loss": 1.7086, "step": 85636 }, { "epoch": 2.85, "grad_norm": 0.7129539847373962, "learning_rate": 3.7352268597230286e-06, "loss": 1.6005, "step": 85637 }, { "epoch": 2.85, "grad_norm": 0.7162463068962097, "learning_rate": 3.733582815262992e-06, "loss": 1.6587, "step": 85638 }, { "epoch": 2.85, "grad_norm": 0.6944640278816223, "learning_rate": 3.73193913042672e-06, "loss": 1.6686, "step": 85639 }, { "epoch": 2.85, "grad_norm": 0.7019626498222351, "learning_rate": 3.7302958052162103e-06, "loss": 1.6038, "step": 85640 }, { "epoch": 2.85, "grad_norm": 0.7350980043411255, "learning_rate": 3.7286528396334948e-06, "loss": 1.7083, "step": 85641 }, { "epoch": 2.85, "grad_norm": 0.7094780206680298, "learning_rate": 3.7270102336805717e-06, "loss": 1.6619, "step": 85642 }, { "epoch": 2.85, "grad_norm": 0.7395114898681641, "learning_rate": 3.72536798735934e-06, "loss": 1.7323, "step": 85643 }, { "epoch": 2.85, "grad_norm": 0.711935818195343, "learning_rate": 3.7237261006718977e-06, "loss": 1.6479, "step": 85644 }, { "epoch": 2.85, "grad_norm": 0.7258585095405579, "learning_rate": 3.72208457362021e-06, "loss": 1.7204, "step": 85645 }, { "epoch": 2.85, "grad_norm": 0.7496588230133057, "learning_rate": 3.720443406206208e-06, "loss": 1.7212, "step": 85646 }, { "epoch": 2.85, "grad_norm": 0.7038087248802185, "learning_rate": 3.718802598431958e-06, "loss": 1.6093, "step": 85647 }, { "epoch": 2.85, "grad_norm": 0.700925886631012, "learning_rate": 3.7171621502994243e-06, "loss": 1.6509, "step": 85648 }, { "epoch": 2.85, "grad_norm": 0.7041703462600708, "learning_rate": 3.715522061810605e-06, "loss": 1.6355, "step": 85649 }, { "epoch": 2.85, "grad_norm": 0.718309760093689, "learning_rate": 3.7138823329674992e-06, "loss": 1.6307, "step": 85650 }, { "epoch": 2.85, "grad_norm": 0.718824565410614, "learning_rate": 3.7122429637720717e-06, "loss": 1.6554, "step": 85651 }, { "epoch": 2.85, "grad_norm": 0.7547932863235474, "learning_rate": 3.710603954226321e-06, "loss": 1.6979, "step": 85652 }, { "epoch": 2.85, "grad_norm": 0.7273061275482178, "learning_rate": 3.708965304332245e-06, "loss": 1.6438, "step": 85653 }, { "epoch": 2.85, "grad_norm": 0.6999568343162537, "learning_rate": 3.7073270140918434e-06, "loss": 1.6991, "step": 85654 }, { "epoch": 2.85, "grad_norm": 0.7522774338722229, "learning_rate": 3.7056890835070465e-06, "loss": 1.6171, "step": 85655 }, { "epoch": 2.85, "grad_norm": 0.7047615051269531, "learning_rate": 3.7040515125799195e-06, "loss": 1.6786, "step": 85656 }, { "epoch": 2.85, "grad_norm": 0.7002080082893372, "learning_rate": 3.7024143013124285e-06, "loss": 1.6451, "step": 85657 }, { "epoch": 2.85, "grad_norm": 0.6916027665138245, "learning_rate": 3.7007774497065047e-06, "loss": 1.5778, "step": 85658 }, { "epoch": 2.85, "grad_norm": 0.7289189100265503, "learning_rate": 3.6991409577642127e-06, "loss": 1.7189, "step": 85659 }, { "epoch": 2.85, "grad_norm": 0.7065927982330322, "learning_rate": 3.6975048254875185e-06, "loss": 1.6933, "step": 85660 }, { "epoch": 2.85, "grad_norm": 0.7238661050796509, "learning_rate": 3.695869052878353e-06, "loss": 1.6387, "step": 85661 }, { "epoch": 2.85, "grad_norm": 0.6977479457855225, "learning_rate": 3.6942336399387817e-06, "loss": 1.6637, "step": 85662 }, { "epoch": 2.85, "grad_norm": 0.7180624604225159, "learning_rate": 3.6925985866707364e-06, "loss": 1.6406, "step": 85663 }, { "epoch": 2.85, "grad_norm": 0.7160996794700623, "learning_rate": 3.6909638930762153e-06, "loss": 1.7437, "step": 85664 }, { "epoch": 2.85, "grad_norm": 0.7296161651611328, "learning_rate": 3.689329559157217e-06, "loss": 1.7513, "step": 85665 }, { "epoch": 2.85, "grad_norm": 0.7280240058898926, "learning_rate": 3.687695584915673e-06, "loss": 1.7009, "step": 85666 }, { "epoch": 2.85, "grad_norm": 0.7083579301834106, "learning_rate": 3.686061970353682e-06, "loss": 1.6506, "step": 85667 }, { "epoch": 2.85, "grad_norm": 0.6952793002128601, "learning_rate": 3.684428715473109e-06, "loss": 1.6237, "step": 85668 }, { "epoch": 2.85, "grad_norm": 0.7306268215179443, "learning_rate": 3.682795820275952e-06, "loss": 1.7949, "step": 85669 }, { "epoch": 2.85, "grad_norm": 0.7350814938545227, "learning_rate": 3.6811632847642768e-06, "loss": 1.6526, "step": 85670 }, { "epoch": 2.85, "grad_norm": 0.729456901550293, "learning_rate": 3.6795311089399815e-06, "loss": 1.7082, "step": 85671 }, { "epoch": 2.85, "grad_norm": 0.7277998328208923, "learning_rate": 3.6778992928050645e-06, "loss": 1.6176, "step": 85672 }, { "epoch": 2.85, "grad_norm": 0.7504552602767944, "learning_rate": 3.6762678363614904e-06, "loss": 1.6904, "step": 85673 }, { "epoch": 2.85, "grad_norm": 0.7055904865264893, "learning_rate": 3.6746367396113586e-06, "loss": 1.7002, "step": 85674 }, { "epoch": 2.85, "grad_norm": 0.723422646522522, "learning_rate": 3.6730060025564665e-06, "loss": 1.7123, "step": 85675 }, { "epoch": 2.85, "grad_norm": 0.7040573358535767, "learning_rate": 3.6713756251989135e-06, "loss": 1.6653, "step": 85676 }, { "epoch": 2.85, "grad_norm": 0.7246603965759277, "learning_rate": 3.669745607540664e-06, "loss": 1.7129, "step": 85677 }, { "epoch": 2.85, "grad_norm": 0.7261252403259277, "learning_rate": 3.668115949583683e-06, "loss": 1.6893, "step": 85678 }, { "epoch": 2.85, "grad_norm": 0.7174392342567444, "learning_rate": 3.6664866513299027e-06, "loss": 1.629, "step": 85679 }, { "epoch": 2.85, "grad_norm": 0.7276219725608826, "learning_rate": 3.664857712781355e-06, "loss": 1.7427, "step": 85680 }, { "epoch": 2.85, "grad_norm": 0.7163518071174622, "learning_rate": 3.663229133940071e-06, "loss": 1.6983, "step": 85681 }, { "epoch": 2.85, "grad_norm": 0.7464030981063843, "learning_rate": 3.661600914807883e-06, "loss": 1.6299, "step": 85682 }, { "epoch": 2.85, "grad_norm": 0.7160347104072571, "learning_rate": 3.659973055386889e-06, "loss": 1.7191, "step": 85683 }, { "epoch": 2.85, "grad_norm": 0.7170818448066711, "learning_rate": 3.658345555679021e-06, "loss": 1.6883, "step": 85684 }, { "epoch": 2.85, "grad_norm": 0.7393361926078796, "learning_rate": 3.656718415686277e-06, "loss": 1.7164, "step": 85685 }, { "epoch": 2.85, "grad_norm": 0.7287125587463379, "learning_rate": 3.6550916354105562e-06, "loss": 1.6491, "step": 85686 }, { "epoch": 2.85, "grad_norm": 0.7419789433479309, "learning_rate": 3.653465214853923e-06, "loss": 1.6916, "step": 85687 }, { "epoch": 2.85, "grad_norm": 0.6904694437980652, "learning_rate": 3.651839154018343e-06, "loss": 1.6673, "step": 85688 }, { "epoch": 2.85, "grad_norm": 0.6971414089202881, "learning_rate": 3.650213452905748e-06, "loss": 1.6252, "step": 85689 }, { "epoch": 2.85, "grad_norm": 0.7201665043830872, "learning_rate": 3.648588111518103e-06, "loss": 1.6625, "step": 85690 }, { "epoch": 2.85, "grad_norm": 0.7376270890235901, "learning_rate": 3.6469631298574387e-06, "loss": 1.7254, "step": 85691 }, { "epoch": 2.85, "grad_norm": 0.7038443684577942, "learning_rate": 3.645338507925688e-06, "loss": 1.668, "step": 85692 }, { "epoch": 2.85, "grad_norm": 0.6986677646636963, "learning_rate": 3.6437142457248157e-06, "loss": 1.6504, "step": 85693 }, { "epoch": 2.85, "grad_norm": 0.7243886590003967, "learning_rate": 3.6420903432568204e-06, "loss": 1.6603, "step": 85694 }, { "epoch": 2.85, "grad_norm": 0.7344019412994385, "learning_rate": 3.640466800523667e-06, "loss": 1.6829, "step": 85695 }, { "epoch": 2.85, "grad_norm": 0.7159256935119629, "learning_rate": 3.6388436175273207e-06, "loss": 1.6478, "step": 85696 }, { "epoch": 2.85, "grad_norm": 0.6928927898406982, "learning_rate": 3.637220794269713e-06, "loss": 1.7168, "step": 85697 }, { "epoch": 2.85, "grad_norm": 0.7297525405883789, "learning_rate": 3.635598330752909e-06, "loss": 1.7453, "step": 85698 }, { "epoch": 2.85, "grad_norm": 0.722386360168457, "learning_rate": 3.6339762269788074e-06, "loss": 1.7257, "step": 85699 }, { "epoch": 2.85, "grad_norm": 0.7316710948944092, "learning_rate": 3.632354482949373e-06, "loss": 1.6466, "step": 85700 }, { "epoch": 2.85, "grad_norm": 0.7292280197143555, "learning_rate": 3.630733098666605e-06, "loss": 1.654, "step": 85701 }, { "epoch": 2.85, "grad_norm": 0.7310788631439209, "learning_rate": 3.629112074132434e-06, "loss": 1.6993, "step": 85702 }, { "epoch": 2.85, "grad_norm": 0.7155560851097107, "learning_rate": 3.6274914093488595e-06, "loss": 1.6571, "step": 85703 }, { "epoch": 2.85, "grad_norm": 0.7208957672119141, "learning_rate": 3.625871104317879e-06, "loss": 1.7297, "step": 85704 }, { "epoch": 2.85, "grad_norm": 0.6805044412612915, "learning_rate": 3.6242511590413913e-06, "loss": 1.7268, "step": 85705 }, { "epoch": 2.85, "grad_norm": 0.6990224123001099, "learning_rate": 3.6226315735213953e-06, "loss": 1.5982, "step": 85706 }, { "epoch": 2.85, "grad_norm": 0.7138506770133972, "learning_rate": 3.621012347759855e-06, "loss": 1.684, "step": 85707 }, { "epoch": 2.85, "grad_norm": 0.7067584991455078, "learning_rate": 3.6193934817587365e-06, "loss": 1.6881, "step": 85708 }, { "epoch": 2.85, "grad_norm": 0.7217222452163696, "learning_rate": 3.617774975520038e-06, "loss": 1.6281, "step": 85709 }, { "epoch": 2.85, "grad_norm": 0.7535019516944885, "learning_rate": 3.616156829045658e-06, "loss": 1.7085, "step": 85710 }, { "epoch": 2.85, "grad_norm": 0.6983001828193665, "learning_rate": 3.614539042337594e-06, "loss": 1.6793, "step": 85711 }, { "epoch": 2.85, "grad_norm": 0.7170397639274597, "learning_rate": 3.6129216153978123e-06, "loss": 1.6446, "step": 85712 }, { "epoch": 2.85, "grad_norm": 0.7057632803916931, "learning_rate": 3.6113045482282775e-06, "loss": 1.6909, "step": 85713 }, { "epoch": 2.85, "grad_norm": 0.728669285774231, "learning_rate": 3.609687840830955e-06, "loss": 1.6794, "step": 85714 }, { "epoch": 2.85, "grad_norm": 0.6970016360282898, "learning_rate": 3.608071493207776e-06, "loss": 1.7053, "step": 85715 }, { "epoch": 2.85, "grad_norm": 0.7281619906425476, "learning_rate": 3.606455505360739e-06, "loss": 1.7484, "step": 85716 }, { "epoch": 2.85, "grad_norm": 0.6977095007896423, "learning_rate": 3.6048398772917765e-06, "loss": 1.6866, "step": 85717 }, { "epoch": 2.85, "grad_norm": 0.7053695917129517, "learning_rate": 3.603224609002919e-06, "loss": 1.6294, "step": 85718 }, { "epoch": 2.85, "grad_norm": 0.7143104672431946, "learning_rate": 3.601609700496e-06, "loss": 1.5865, "step": 85719 }, { "epoch": 2.85, "grad_norm": 0.728882908821106, "learning_rate": 3.5999951517731163e-06, "loss": 1.6169, "step": 85720 }, { "epoch": 2.85, "grad_norm": 0.7176527976989746, "learning_rate": 3.598380962836167e-06, "loss": 1.6537, "step": 85721 }, { "epoch": 2.85, "grad_norm": 0.7307997345924377, "learning_rate": 3.596767133687084e-06, "loss": 1.7365, "step": 85722 }, { "epoch": 2.85, "grad_norm": 0.7104251980781555, "learning_rate": 3.5951536643278657e-06, "loss": 1.6727, "step": 85723 }, { "epoch": 2.85, "grad_norm": 0.7365193367004395, "learning_rate": 3.5935405547604437e-06, "loss": 1.6336, "step": 85724 }, { "epoch": 2.85, "grad_norm": 0.7222488522529602, "learning_rate": 3.5919278049867827e-06, "loss": 1.67, "step": 85725 }, { "epoch": 2.85, "grad_norm": 0.7048193216323853, "learning_rate": 3.590315415008849e-06, "loss": 1.6793, "step": 85726 }, { "epoch": 2.85, "grad_norm": 0.7235596776008606, "learning_rate": 3.588703384828606e-06, "loss": 1.7164, "step": 85727 }, { "epoch": 2.85, "grad_norm": 0.7068056464195251, "learning_rate": 3.5870917144480204e-06, "loss": 1.6998, "step": 85728 }, { "epoch": 2.85, "grad_norm": 0.733039140701294, "learning_rate": 3.585480403869023e-06, "loss": 1.5586, "step": 85729 }, { "epoch": 2.85, "grad_norm": 0.7104909420013428, "learning_rate": 3.5838694530935795e-06, "loss": 1.6691, "step": 85730 }, { "epoch": 2.85, "grad_norm": 0.7096444368362427, "learning_rate": 3.5822588621236546e-06, "loss": 1.6268, "step": 85731 }, { "epoch": 2.85, "grad_norm": 0.7095276117324829, "learning_rate": 3.580648630961147e-06, "loss": 1.5748, "step": 85732 }, { "epoch": 2.85, "grad_norm": 0.7186088562011719, "learning_rate": 3.5790387596080884e-06, "loss": 1.6525, "step": 85733 }, { "epoch": 2.85, "grad_norm": 0.71369469165802, "learning_rate": 3.57742924806641e-06, "loss": 1.6841, "step": 85734 }, { "epoch": 2.85, "grad_norm": 0.7164143323898315, "learning_rate": 3.5758200963380447e-06, "loss": 1.7075, "step": 85735 }, { "epoch": 2.85, "grad_norm": 0.7189555764198303, "learning_rate": 3.5742113044249233e-06, "loss": 1.6097, "step": 85736 }, { "epoch": 2.85, "grad_norm": 0.7220624685287476, "learning_rate": 3.572602872329078e-06, "loss": 1.7263, "step": 85737 }, { "epoch": 2.85, "grad_norm": 0.7021587491035461, "learning_rate": 3.5709948000524067e-06, "loss": 1.7207, "step": 85738 }, { "epoch": 2.85, "grad_norm": 0.7130181789398193, "learning_rate": 3.5693870875968757e-06, "loss": 1.6888, "step": 85739 }, { "epoch": 2.85, "grad_norm": 0.7008494138717651, "learning_rate": 3.5677797349644155e-06, "loss": 1.7029, "step": 85740 }, { "epoch": 2.85, "grad_norm": 0.741253137588501, "learning_rate": 3.5661727421570254e-06, "loss": 1.6877, "step": 85741 }, { "epoch": 2.85, "grad_norm": 0.7018686532974243, "learning_rate": 3.5645661091766033e-06, "loss": 1.6, "step": 85742 }, { "epoch": 2.85, "grad_norm": 0.7238032817840576, "learning_rate": 3.562959836025081e-06, "loss": 1.6738, "step": 85743 }, { "epoch": 2.85, "grad_norm": 0.733034074306488, "learning_rate": 3.561353922704524e-06, "loss": 1.7551, "step": 85744 }, { "epoch": 2.85, "grad_norm": 0.724216103553772, "learning_rate": 3.5597483692167638e-06, "loss": 1.6158, "step": 85745 }, { "epoch": 2.85, "grad_norm": 0.7304940819740295, "learning_rate": 3.5581431755637655e-06, "loss": 1.6951, "step": 85746 }, { "epoch": 2.85, "grad_norm": 0.7532188296318054, "learning_rate": 3.556538341747528e-06, "loss": 1.7181, "step": 85747 }, { "epoch": 2.85, "grad_norm": 0.7120028138160706, "learning_rate": 3.554933867770016e-06, "loss": 1.667, "step": 85748 }, { "epoch": 2.85, "grad_norm": 0.7050902843475342, "learning_rate": 3.5533297536330607e-06, "loss": 1.6689, "step": 85749 }, { "epoch": 2.85, "grad_norm": 0.7235134840011597, "learning_rate": 3.5517259993387283e-06, "loss": 1.7251, "step": 85750 }, { "epoch": 2.85, "grad_norm": 0.7105157375335693, "learning_rate": 3.5501226048889166e-06, "loss": 1.6503, "step": 85751 }, { "epoch": 2.85, "grad_norm": 0.7261075377464294, "learning_rate": 3.5485195702855908e-06, "loss": 1.6562, "step": 85752 }, { "epoch": 2.85, "grad_norm": 0.7067257761955261, "learning_rate": 3.5469168955306495e-06, "loss": 1.6744, "step": 85753 }, { "epoch": 2.85, "grad_norm": 0.7322530150413513, "learning_rate": 3.545314580626091e-06, "loss": 1.6949, "step": 85754 }, { "epoch": 2.85, "grad_norm": 0.7191324234008789, "learning_rate": 3.543712625573847e-06, "loss": 1.6577, "step": 85755 }, { "epoch": 2.85, "grad_norm": 0.7108035087585449, "learning_rate": 3.542111030375816e-06, "loss": 1.6081, "step": 85756 }, { "epoch": 2.85, "grad_norm": 0.7014500498771667, "learning_rate": 3.540509795033997e-06, "loss": 1.7218, "step": 85757 }, { "epoch": 2.85, "grad_norm": 0.7423103451728821, "learning_rate": 3.5389089195503207e-06, "loss": 1.6275, "step": 85758 }, { "epoch": 2.85, "grad_norm": 0.7217530012130737, "learning_rate": 3.537308403926753e-06, "loss": 1.6462, "step": 85759 }, { "epoch": 2.85, "grad_norm": 0.7071548104286194, "learning_rate": 3.5357082481651923e-06, "loss": 1.6847, "step": 85760 }, { "epoch": 2.85, "grad_norm": 0.682712733745575, "learning_rate": 3.5341084522676035e-06, "loss": 1.6638, "step": 85761 }, { "epoch": 2.85, "grad_norm": 0.710155189037323, "learning_rate": 3.5325090162359183e-06, "loss": 1.6302, "step": 85762 }, { "epoch": 2.85, "grad_norm": 0.7224849462509155, "learning_rate": 3.5309099400720685e-06, "loss": 1.6485, "step": 85763 }, { "epoch": 2.85, "grad_norm": 0.6891863346099854, "learning_rate": 3.529311223778053e-06, "loss": 1.6754, "step": 85764 }, { "epoch": 2.85, "grad_norm": 0.691624641418457, "learning_rate": 3.5277128673557364e-06, "loss": 1.6348, "step": 85765 }, { "epoch": 2.85, "grad_norm": 0.7108355164527893, "learning_rate": 3.5261148708071173e-06, "loss": 1.5736, "step": 85766 }, { "epoch": 2.85, "grad_norm": 0.7232012152671814, "learning_rate": 3.524517234134061e-06, "loss": 1.6842, "step": 85767 }, { "epoch": 2.85, "grad_norm": 0.711591899394989, "learning_rate": 3.5229199573386324e-06, "loss": 1.652, "step": 85768 }, { "epoch": 2.85, "grad_norm": 0.7208305597305298, "learning_rate": 3.5213230404226633e-06, "loss": 1.6623, "step": 85769 }, { "epoch": 2.85, "grad_norm": 0.7239527702331543, "learning_rate": 3.5197264833880855e-06, "loss": 1.6369, "step": 85770 }, { "epoch": 2.85, "grad_norm": 0.7040490508079529, "learning_rate": 3.518130286236931e-06, "loss": 1.6766, "step": 85771 }, { "epoch": 2.85, "grad_norm": 0.7200002670288086, "learning_rate": 3.516534448971031e-06, "loss": 1.6677, "step": 85772 }, { "epoch": 2.85, "grad_norm": 0.7167665362358093, "learning_rate": 3.5149389715924183e-06, "loss": 1.655, "step": 85773 }, { "epoch": 2.85, "grad_norm": 0.73697429895401, "learning_rate": 3.5133438541029903e-06, "loss": 1.7607, "step": 85774 }, { "epoch": 2.85, "grad_norm": 0.7335237264633179, "learning_rate": 3.5117490965046457e-06, "loss": 1.6425, "step": 85775 }, { "epoch": 2.85, "grad_norm": 0.7201403379440308, "learning_rate": 3.51015469879935e-06, "loss": 1.6724, "step": 85776 }, { "epoch": 2.85, "grad_norm": 0.7232730388641357, "learning_rate": 3.508560660989035e-06, "loss": 1.705, "step": 85777 }, { "epoch": 2.85, "grad_norm": 0.6927248239517212, "learning_rate": 3.5069669830756653e-06, "loss": 1.6359, "step": 85778 }, { "epoch": 2.85, "grad_norm": 0.7064985036849976, "learning_rate": 3.5053736650611397e-06, "loss": 1.6406, "step": 85779 }, { "epoch": 2.85, "grad_norm": 0.7235140204429626, "learning_rate": 3.503780706947423e-06, "loss": 1.7127, "step": 85780 }, { "epoch": 2.85, "grad_norm": 0.7313904762268066, "learning_rate": 3.5021881087364144e-06, "loss": 1.64, "step": 85781 }, { "epoch": 2.85, "grad_norm": 0.725723147392273, "learning_rate": 3.5005958704300785e-06, "loss": 1.6301, "step": 85782 }, { "epoch": 2.85, "grad_norm": 0.7233861088752747, "learning_rate": 3.4990039920302805e-06, "loss": 1.6561, "step": 85783 }, { "epoch": 2.85, "grad_norm": 0.7077915668487549, "learning_rate": 3.4974124735390852e-06, "loss": 1.6456, "step": 85784 }, { "epoch": 2.85, "grad_norm": 0.6988189220428467, "learning_rate": 3.4958213149582912e-06, "loss": 1.6517, "step": 85785 }, { "epoch": 2.85, "grad_norm": 0.7131619453430176, "learning_rate": 3.494230516289864e-06, "loss": 1.7435, "step": 85786 }, { "epoch": 2.85, "grad_norm": 0.7110528349876404, "learning_rate": 3.492640077535802e-06, "loss": 1.6761, "step": 85787 }, { "epoch": 2.85, "grad_norm": 0.731143593788147, "learning_rate": 3.49104999869797e-06, "loss": 1.6904, "step": 85788 }, { "epoch": 2.85, "grad_norm": 0.7123526334762573, "learning_rate": 3.4894602797783e-06, "loss": 1.663, "step": 85789 }, { "epoch": 2.85, "grad_norm": 0.7205601930618286, "learning_rate": 3.4878709207787236e-06, "loss": 1.7098, "step": 85790 }, { "epoch": 2.85, "grad_norm": 0.7250813841819763, "learning_rate": 3.4862819217012395e-06, "loss": 1.6662, "step": 85791 }, { "epoch": 2.85, "grad_norm": 0.7257047295570374, "learning_rate": 3.484693282547679e-06, "loss": 1.696, "step": 85792 }, { "epoch": 2.85, "grad_norm": 0.713631272315979, "learning_rate": 3.483105003320008e-06, "loss": 1.7224, "step": 85793 }, { "epoch": 2.85, "grad_norm": 0.7244154810905457, "learning_rate": 3.481517084020158e-06, "loss": 1.6661, "step": 85794 }, { "epoch": 2.85, "grad_norm": 0.7065724730491638, "learning_rate": 3.4799295246500934e-06, "loss": 1.6842, "step": 85795 }, { "epoch": 2.85, "grad_norm": 0.708873450756073, "learning_rate": 3.478342325211647e-06, "loss": 1.6236, "step": 85796 }, { "epoch": 2.85, "grad_norm": 0.6928454637527466, "learning_rate": 3.4767554857068504e-06, "loss": 1.7215, "step": 85797 }, { "epoch": 2.85, "grad_norm": 0.727109968662262, "learning_rate": 3.475169006137568e-06, "loss": 1.5855, "step": 85798 }, { "epoch": 2.85, "grad_norm": 0.7050884962081909, "learning_rate": 3.4735828865057325e-06, "loss": 1.7259, "step": 85799 }, { "epoch": 2.85, "grad_norm": 0.7150662541389465, "learning_rate": 3.471997126813275e-06, "loss": 1.6867, "step": 85800 }, { "epoch": 2.85, "grad_norm": 0.7124833464622498, "learning_rate": 3.470411727062128e-06, "loss": 1.6981, "step": 85801 }, { "epoch": 2.85, "grad_norm": 0.6958113312721252, "learning_rate": 3.4688266872542226e-06, "loss": 1.6664, "step": 85802 }, { "epoch": 2.85, "grad_norm": 0.6995101571083069, "learning_rate": 3.467242007391424e-06, "loss": 1.6893, "step": 85803 }, { "epoch": 2.85, "grad_norm": 0.71136873960495, "learning_rate": 3.4656576874757314e-06, "loss": 1.6697, "step": 85804 }, { "epoch": 2.85, "grad_norm": 0.7088474035263062, "learning_rate": 3.464073727509076e-06, "loss": 1.5869, "step": 85805 }, { "epoch": 2.85, "grad_norm": 0.7613969445228577, "learning_rate": 3.4624901274932893e-06, "loss": 1.705, "step": 85806 }, { "epoch": 2.85, "grad_norm": 0.6983827948570251, "learning_rate": 3.460906887430337e-06, "loss": 1.656, "step": 85807 }, { "epoch": 2.85, "grad_norm": 0.6946719288825989, "learning_rate": 3.459324007322184e-06, "loss": 1.6428, "step": 85808 }, { "epoch": 2.85, "grad_norm": 0.7095943093299866, "learning_rate": 3.4577414871707286e-06, "loss": 1.678, "step": 85809 }, { "epoch": 2.85, "grad_norm": 0.6841698288917542, "learning_rate": 3.45615932697787e-06, "loss": 1.6568, "step": 85810 }, { "epoch": 2.85, "grad_norm": 0.7204495668411255, "learning_rate": 3.454577526745539e-06, "loss": 1.7326, "step": 85811 }, { "epoch": 2.85, "grad_norm": 0.7218604683876038, "learning_rate": 3.4529960864756346e-06, "loss": 1.6838, "step": 85812 }, { "epoch": 2.86, "grad_norm": 0.7225685119628906, "learning_rate": 3.4514150061701216e-06, "loss": 1.6452, "step": 85813 }, { "epoch": 2.86, "grad_norm": 0.6950451731681824, "learning_rate": 3.449834285830866e-06, "loss": 1.6857, "step": 85814 }, { "epoch": 2.86, "grad_norm": 0.7061862349510193, "learning_rate": 3.448253925459865e-06, "loss": 1.6374, "step": 85815 }, { "epoch": 2.86, "grad_norm": 0.7210797667503357, "learning_rate": 3.446673925058985e-06, "loss": 1.7268, "step": 85816 }, { "epoch": 2.86, "grad_norm": 0.7622020840644836, "learning_rate": 3.44509428463009e-06, "loss": 1.6997, "step": 85817 }, { "epoch": 2.86, "grad_norm": 0.6955482363700867, "learning_rate": 3.4435150041752125e-06, "loss": 1.627, "step": 85818 }, { "epoch": 2.86, "grad_norm": 0.6986416578292847, "learning_rate": 3.441936083696217e-06, "loss": 1.6338, "step": 85819 }, { "epoch": 2.86, "grad_norm": 0.7164591550827026, "learning_rate": 3.44035752319497e-06, "loss": 1.693, "step": 85820 }, { "epoch": 2.86, "grad_norm": 0.7473805546760559, "learning_rate": 3.4387793226734684e-06, "loss": 1.6367, "step": 85821 }, { "epoch": 2.86, "grad_norm": 0.7092168927192688, "learning_rate": 3.437201482133578e-06, "loss": 1.6461, "step": 85822 }, { "epoch": 2.86, "grad_norm": 0.7136355638504028, "learning_rate": 3.435624001577231e-06, "loss": 1.6263, "step": 85823 }, { "epoch": 2.86, "grad_norm": 0.7204087376594543, "learning_rate": 3.434046881006325e-06, "loss": 1.7577, "step": 85824 }, { "epoch": 2.86, "grad_norm": 0.7106577754020691, "learning_rate": 3.432470120422792e-06, "loss": 1.6415, "step": 85825 }, { "epoch": 2.86, "grad_norm": 0.7069382071495056, "learning_rate": 3.4308937198285645e-06, "loss": 1.6734, "step": 85826 }, { "epoch": 2.86, "grad_norm": 0.7315168380737305, "learning_rate": 3.4293176792255405e-06, "loss": 1.7748, "step": 85827 }, { "epoch": 2.86, "grad_norm": 0.6970791220664978, "learning_rate": 3.4277419986156184e-06, "loss": 1.6814, "step": 85828 }, { "epoch": 2.86, "grad_norm": 0.7163436412811279, "learning_rate": 3.426166678000697e-06, "loss": 1.7556, "step": 85829 }, { "epoch": 2.86, "grad_norm": 0.72779780626297, "learning_rate": 3.424591717382741e-06, "loss": 1.7212, "step": 85830 }, { "epoch": 2.86, "grad_norm": 0.6846323609352112, "learning_rate": 3.4230171167636154e-06, "loss": 1.6166, "step": 85831 }, { "epoch": 2.86, "grad_norm": 0.7263363003730774, "learning_rate": 3.4214428761452196e-06, "loss": 1.7477, "step": 85832 }, { "epoch": 2.86, "grad_norm": 0.7023088335990906, "learning_rate": 3.419868995529551e-06, "loss": 1.704, "step": 85833 }, { "epoch": 2.86, "grad_norm": 0.6939601302146912, "learning_rate": 3.418295474918409e-06, "loss": 1.6389, "step": 85834 }, { "epoch": 2.86, "grad_norm": 0.7243515253067017, "learning_rate": 3.416722314313791e-06, "loss": 1.7074, "step": 85835 }, { "epoch": 2.86, "grad_norm": 0.7113097906112671, "learning_rate": 3.415149513717563e-06, "loss": 1.6587, "step": 85836 }, { "epoch": 2.86, "grad_norm": 0.7020828127861023, "learning_rate": 3.413577073131657e-06, "loss": 1.6714, "step": 85837 }, { "epoch": 2.86, "grad_norm": 0.7385215163230896, "learning_rate": 3.4120049925579707e-06, "loss": 1.7277, "step": 85838 }, { "epoch": 2.86, "grad_norm": 0.7049936652183533, "learning_rate": 3.4104332719983694e-06, "loss": 1.6629, "step": 85839 }, { "epoch": 2.86, "grad_norm": 0.7289213538169861, "learning_rate": 3.408861911454852e-06, "loss": 1.7218, "step": 85840 }, { "epoch": 2.86, "grad_norm": 0.7199788093566895, "learning_rate": 3.40729091092925e-06, "loss": 1.7087, "step": 85841 }, { "epoch": 2.86, "grad_norm": 0.7252668738365173, "learning_rate": 3.405720270423495e-06, "loss": 1.7596, "step": 85842 }, { "epoch": 2.86, "grad_norm": 0.7165847420692444, "learning_rate": 3.4041499899394863e-06, "loss": 1.6819, "step": 85843 }, { "epoch": 2.86, "grad_norm": 0.7152347564697266, "learning_rate": 3.402580069479155e-06, "loss": 1.6275, "step": 85844 }, { "epoch": 2.86, "grad_norm": 0.6827407479286194, "learning_rate": 3.4010105090443994e-06, "loss": 1.6562, "step": 85845 }, { "epoch": 2.86, "grad_norm": 0.6980375647544861, "learning_rate": 3.399441308637085e-06, "loss": 1.6759, "step": 85846 }, { "epoch": 2.86, "grad_norm": 0.717539370059967, "learning_rate": 3.3978724682591774e-06, "loss": 1.7373, "step": 85847 }, { "epoch": 2.86, "grad_norm": 0.7170429825782776, "learning_rate": 3.396303987912541e-06, "loss": 1.5994, "step": 85848 }, { "epoch": 2.86, "grad_norm": 0.7095314264297485, "learning_rate": 3.3947358675990743e-06, "loss": 1.5954, "step": 85849 }, { "epoch": 2.86, "grad_norm": 0.7373716235160828, "learning_rate": 3.393168107320676e-06, "loss": 1.6411, "step": 85850 }, { "epoch": 2.86, "grad_norm": 0.7161532640457153, "learning_rate": 3.391600707079345e-06, "loss": 1.6036, "step": 85851 }, { "epoch": 2.86, "grad_norm": 0.7297824025154114, "learning_rate": 3.3900336668768456e-06, "loss": 1.5888, "step": 85852 }, { "epoch": 2.86, "grad_norm": 0.7198025584220886, "learning_rate": 3.3884669867151437e-06, "loss": 1.6617, "step": 85853 }, { "epoch": 2.86, "grad_norm": 0.7348089218139648, "learning_rate": 3.386900666596171e-06, "loss": 1.6641, "step": 85854 }, { "epoch": 2.86, "grad_norm": 0.7067909240722656, "learning_rate": 3.3853347065217582e-06, "loss": 1.6102, "step": 85855 }, { "epoch": 2.86, "grad_norm": 0.7210779190063477, "learning_rate": 3.383769106493872e-06, "loss": 1.6973, "step": 85856 }, { "epoch": 2.86, "grad_norm": 0.7070773839950562, "learning_rate": 3.382203866514377e-06, "loss": 1.6137, "step": 85857 }, { "epoch": 2.86, "grad_norm": 0.7301733493804932, "learning_rate": 3.3806389865852045e-06, "loss": 1.7287, "step": 85858 }, { "epoch": 2.86, "grad_norm": 0.7498623132705688, "learning_rate": 3.379074466708187e-06, "loss": 1.773, "step": 85859 }, { "epoch": 2.86, "grad_norm": 0.7133178114891052, "learning_rate": 3.3775103068852894e-06, "loss": 1.7052, "step": 85860 }, { "epoch": 2.86, "grad_norm": 0.70012366771698, "learning_rate": 3.37594650711841e-06, "loss": 1.6667, "step": 85861 }, { "epoch": 2.86, "grad_norm": 0.7145631313323975, "learning_rate": 3.374383067409414e-06, "loss": 1.6859, "step": 85862 }, { "epoch": 2.86, "grad_norm": 0.7076095938682556, "learning_rate": 3.372819987760167e-06, "loss": 1.69, "step": 85863 }, { "epoch": 2.86, "grad_norm": 0.7447693943977356, "learning_rate": 3.3712572681726336e-06, "loss": 1.6622, "step": 85864 }, { "epoch": 2.86, "grad_norm": 0.7200042605400085, "learning_rate": 3.3696949086487458e-06, "loss": 1.5888, "step": 85865 }, { "epoch": 2.86, "grad_norm": 0.7307028770446777, "learning_rate": 3.368132909190302e-06, "loss": 1.7845, "step": 85866 }, { "epoch": 2.86, "grad_norm": 0.7053325772285461, "learning_rate": 3.366571269799201e-06, "loss": 1.6998, "step": 85867 }, { "epoch": 2.86, "grad_norm": 0.7071318030357361, "learning_rate": 3.3650099904774077e-06, "loss": 1.6726, "step": 85868 }, { "epoch": 2.86, "grad_norm": 0.7243421673774719, "learning_rate": 3.3634490712268203e-06, "loss": 1.6396, "step": 85869 }, { "epoch": 2.86, "grad_norm": 0.7102017402648926, "learning_rate": 3.361888512049238e-06, "loss": 1.6812, "step": 85870 }, { "epoch": 2.86, "grad_norm": 0.7059640884399414, "learning_rate": 3.3603283129466585e-06, "loss": 1.6966, "step": 85871 }, { "epoch": 2.86, "grad_norm": 0.7408326268196106, "learning_rate": 3.3587684739209144e-06, "loss": 1.7533, "step": 85872 }, { "epoch": 2.86, "grad_norm": 0.714441180229187, "learning_rate": 3.3572089949739033e-06, "loss": 1.6159, "step": 85873 }, { "epoch": 2.86, "grad_norm": 0.7050857543945312, "learning_rate": 3.3556498761075577e-06, "loss": 1.6659, "step": 85874 }, { "epoch": 2.86, "grad_norm": 0.7166159749031067, "learning_rate": 3.354091117323743e-06, "loss": 1.6246, "step": 85875 }, { "epoch": 2.86, "grad_norm": 0.6840755343437195, "learning_rate": 3.352532718624357e-06, "loss": 1.6404, "step": 85876 }, { "epoch": 2.86, "grad_norm": 0.6951856017112732, "learning_rate": 3.3509746800112645e-06, "loss": 1.6424, "step": 85877 }, { "epoch": 2.86, "grad_norm": 0.7123969197273254, "learning_rate": 3.3494170014863987e-06, "loss": 1.7384, "step": 85878 }, { "epoch": 2.86, "grad_norm": 0.7296673655509949, "learning_rate": 3.3478596830516234e-06, "loss": 1.7499, "step": 85879 }, { "epoch": 2.86, "grad_norm": 0.7205913662910461, "learning_rate": 3.346302724708805e-06, "loss": 1.7222, "step": 85880 }, { "epoch": 2.86, "grad_norm": 0.7182649374008179, "learning_rate": 3.3447461264599073e-06, "loss": 1.6244, "step": 85881 }, { "epoch": 2.86, "grad_norm": 0.7168107628822327, "learning_rate": 3.3431898883067633e-06, "loss": 1.7154, "step": 85882 }, { "epoch": 2.86, "grad_norm": 0.7092968225479126, "learning_rate": 3.341634010251304e-06, "loss": 1.7175, "step": 85883 }, { "epoch": 2.86, "grad_norm": 0.7039974927902222, "learning_rate": 3.340078492295328e-06, "loss": 1.6153, "step": 85884 }, { "epoch": 2.86, "grad_norm": 0.7301732897758484, "learning_rate": 3.3385233344408345e-06, "loss": 1.6321, "step": 85885 }, { "epoch": 2.86, "grad_norm": 0.693382978439331, "learning_rate": 3.3369685366896883e-06, "loss": 1.6342, "step": 85886 }, { "epoch": 2.86, "grad_norm": 0.71123868227005, "learning_rate": 3.3354140990436873e-06, "loss": 1.6793, "step": 85887 }, { "epoch": 2.86, "grad_norm": 0.7107028365135193, "learning_rate": 3.3338600215048305e-06, "loss": 1.7443, "step": 85888 }, { "epoch": 2.86, "grad_norm": 0.7127625942230225, "learning_rate": 3.3323063040749165e-06, "loss": 1.692, "step": 85889 }, { "epoch": 2.86, "grad_norm": 0.7093437910079956, "learning_rate": 3.330752946755877e-06, "loss": 1.6897, "step": 85890 }, { "epoch": 2.86, "grad_norm": 0.7278985977172852, "learning_rate": 3.3291999495496434e-06, "loss": 1.7059, "step": 85891 }, { "epoch": 2.86, "grad_norm": 0.6948025226593018, "learning_rate": 3.3276473124579816e-06, "loss": 1.6843, "step": 85892 }, { "epoch": 2.86, "grad_norm": 0.7150570154190063, "learning_rate": 3.3260950354828563e-06, "loss": 1.6955, "step": 85893 }, { "epoch": 2.86, "grad_norm": 0.700950026512146, "learning_rate": 3.324543118626166e-06, "loss": 1.7022, "step": 85894 }, { "epoch": 2.86, "grad_norm": 0.6927697062492371, "learning_rate": 3.3229915618897763e-06, "loss": 1.6858, "step": 85895 }, { "epoch": 2.86, "grad_norm": 0.7109455466270447, "learning_rate": 3.3214403652755185e-06, "loss": 1.7232, "step": 85896 }, { "epoch": 2.86, "grad_norm": 0.7447023391723633, "learning_rate": 3.319889528785358e-06, "loss": 1.6485, "step": 85897 }, { "epoch": 2.86, "grad_norm": 0.7147560715675354, "learning_rate": 3.318339052421126e-06, "loss": 1.6604, "step": 85898 }, { "epoch": 2.86, "grad_norm": 0.7054796814918518, "learning_rate": 3.316788936184689e-06, "loss": 1.7112, "step": 85899 }, { "epoch": 2.86, "grad_norm": 0.7185775637626648, "learning_rate": 3.3152391800779777e-06, "loss": 1.7532, "step": 85900 }, { "epoch": 2.86, "grad_norm": 0.6879701614379883, "learning_rate": 3.3136897841028577e-06, "loss": 1.7386, "step": 85901 }, { "epoch": 2.86, "grad_norm": 0.7151492834091187, "learning_rate": 3.312140748261194e-06, "loss": 1.6705, "step": 85902 }, { "epoch": 2.86, "grad_norm": 0.750497579574585, "learning_rate": 3.3105920725548516e-06, "loss": 1.7592, "step": 85903 }, { "epoch": 2.86, "grad_norm": 0.6997981071472168, "learning_rate": 3.3090437569857963e-06, "loss": 1.6419, "step": 85904 }, { "epoch": 2.86, "grad_norm": 0.6997954249382019, "learning_rate": 3.307495801555826e-06, "loss": 1.7147, "step": 85905 }, { "epoch": 2.86, "grad_norm": 0.7151723504066467, "learning_rate": 3.305948206266806e-06, "loss": 1.6891, "step": 85906 }, { "epoch": 2.86, "grad_norm": 0.7156449556350708, "learning_rate": 3.3044009711206686e-06, "loss": 1.621, "step": 85907 }, { "epoch": 2.86, "grad_norm": 0.7269806265830994, "learning_rate": 3.3028540961192784e-06, "loss": 1.6923, "step": 85908 }, { "epoch": 2.86, "grad_norm": 0.7136183381080627, "learning_rate": 3.3013075812645008e-06, "loss": 1.6588, "step": 85909 }, { "epoch": 2.86, "grad_norm": 0.7367125153541565, "learning_rate": 3.299761426558201e-06, "loss": 1.6816, "step": 85910 }, { "epoch": 2.86, "grad_norm": 0.7257960438728333, "learning_rate": 3.2982156320023103e-06, "loss": 1.6355, "step": 85911 }, { "epoch": 2.86, "grad_norm": 0.7167015671730042, "learning_rate": 3.2966701975986608e-06, "loss": 1.6576, "step": 85912 }, { "epoch": 2.86, "grad_norm": 0.7082834839820862, "learning_rate": 3.2951251233491184e-06, "loss": 1.6555, "step": 85913 }, { "epoch": 2.86, "grad_norm": 0.7237726449966431, "learning_rate": 3.293580409255581e-06, "loss": 1.7337, "step": 85914 }, { "epoch": 2.86, "grad_norm": 0.7355261445045471, "learning_rate": 3.2920360553199464e-06, "loss": 1.7631, "step": 85915 }, { "epoch": 2.86, "grad_norm": 0.7061637043952942, "learning_rate": 3.290492061544048e-06, "loss": 1.6525, "step": 85916 }, { "epoch": 2.86, "grad_norm": 0.7127474546432495, "learning_rate": 3.2889484279297495e-06, "loss": 1.5933, "step": 85917 }, { "epoch": 2.86, "grad_norm": 0.7134885787963867, "learning_rate": 3.2874051544789835e-06, "loss": 1.7506, "step": 85918 }, { "epoch": 2.86, "grad_norm": 0.7233611345291138, "learning_rate": 3.285862241193582e-06, "loss": 1.7033, "step": 85919 }, { "epoch": 2.86, "grad_norm": 0.7150059938430786, "learning_rate": 3.2843196880754095e-06, "loss": 1.6311, "step": 85920 }, { "epoch": 2.86, "grad_norm": 0.7172717452049255, "learning_rate": 3.2827774951263654e-06, "loss": 1.6693, "step": 85921 }, { "epoch": 2.86, "grad_norm": 0.7112948894500732, "learning_rate": 3.281235662348314e-06, "loss": 1.7013, "step": 85922 }, { "epoch": 2.86, "grad_norm": 0.7015835046768188, "learning_rate": 3.2796941897430873e-06, "loss": 1.684, "step": 85923 }, { "epoch": 2.86, "grad_norm": 0.7226515412330627, "learning_rate": 3.2781530773126176e-06, "loss": 1.6258, "step": 85924 }, { "epoch": 2.86, "grad_norm": 0.7025743126869202, "learning_rate": 3.276612325058736e-06, "loss": 1.6975, "step": 85925 }, { "epoch": 2.86, "grad_norm": 0.7151070237159729, "learning_rate": 3.2750719329833752e-06, "loss": 1.6775, "step": 85926 }, { "epoch": 2.86, "grad_norm": 0.6885090470314026, "learning_rate": 3.2735319010883e-06, "loss": 1.598, "step": 85927 }, { "epoch": 2.86, "grad_norm": 0.7297663688659668, "learning_rate": 3.271992229375475e-06, "loss": 1.6277, "step": 85928 }, { "epoch": 2.86, "grad_norm": 0.7644185423851013, "learning_rate": 3.270452917846733e-06, "loss": 1.7855, "step": 85929 }, { "epoch": 2.86, "grad_norm": 0.7247161269187927, "learning_rate": 3.2689139665039056e-06, "loss": 1.7024, "step": 85930 }, { "epoch": 2.86, "grad_norm": 0.7131440043449402, "learning_rate": 3.2673753753488907e-06, "loss": 1.7038, "step": 85931 }, { "epoch": 2.86, "grad_norm": 0.7333903908729553, "learning_rate": 3.2658371443835875e-06, "loss": 1.7481, "step": 85932 }, { "epoch": 2.86, "grad_norm": 0.7193090915679932, "learning_rate": 3.2642992736098604e-06, "loss": 1.7355, "step": 85933 }, { "epoch": 2.86, "grad_norm": 0.7211631536483765, "learning_rate": 3.2627617630295087e-06, "loss": 1.6928, "step": 85934 }, { "epoch": 2.86, "grad_norm": 0.7450770139694214, "learning_rate": 3.2612246126444643e-06, "loss": 1.7393, "step": 85935 }, { "epoch": 2.86, "grad_norm": 0.744667649269104, "learning_rate": 3.259687822456558e-06, "loss": 1.7111, "step": 85936 }, { "epoch": 2.86, "grad_norm": 0.7458032965660095, "learning_rate": 3.258151392467656e-06, "loss": 1.6631, "step": 85937 }, { "epoch": 2.86, "grad_norm": 0.7262945771217346, "learning_rate": 3.25661532267969e-06, "loss": 1.6998, "step": 85938 }, { "epoch": 2.86, "grad_norm": 0.7176047563552856, "learning_rate": 3.2550796130943913e-06, "loss": 1.7254, "step": 85939 }, { "epoch": 2.86, "grad_norm": 0.7250109910964966, "learning_rate": 3.2535442637137587e-06, "loss": 1.6666, "step": 85940 }, { "epoch": 2.86, "grad_norm": 0.6989259123802185, "learning_rate": 3.252009274539591e-06, "loss": 1.7172, "step": 85941 }, { "epoch": 2.86, "grad_norm": 0.7036862373352051, "learning_rate": 3.250474645573753e-06, "loss": 1.6539, "step": 85942 }, { "epoch": 2.86, "grad_norm": 0.7186323404312134, "learning_rate": 3.24894037681811e-06, "loss": 1.7179, "step": 85943 }, { "epoch": 2.86, "grad_norm": 0.7239373326301575, "learning_rate": 3.247406468274527e-06, "loss": 1.6459, "step": 85944 }, { "epoch": 2.86, "grad_norm": 0.7185497283935547, "learning_rate": 3.2458729199449028e-06, "loss": 1.7502, "step": 85945 }, { "epoch": 2.86, "grad_norm": 0.7205610275268555, "learning_rate": 3.2443397318310027e-06, "loss": 1.7054, "step": 85946 }, { "epoch": 2.86, "grad_norm": 0.6927973031997681, "learning_rate": 3.2428069039347913e-06, "loss": 1.6268, "step": 85947 }, { "epoch": 2.86, "grad_norm": 0.7170466780662537, "learning_rate": 3.2412744362581013e-06, "loss": 1.6847, "step": 85948 }, { "epoch": 2.86, "grad_norm": 0.7239366769790649, "learning_rate": 3.2397423288027635e-06, "loss": 1.6633, "step": 85949 }, { "epoch": 2.86, "grad_norm": 0.7009006142616272, "learning_rate": 3.238210581570644e-06, "loss": 1.6633, "step": 85950 }, { "epoch": 2.86, "grad_norm": 0.7083168625831604, "learning_rate": 3.2366791945636072e-06, "loss": 1.6532, "step": 85951 }, { "epoch": 2.86, "grad_norm": 0.7270852327346802, "learning_rate": 3.235148167783519e-06, "loss": 1.6928, "step": 85952 }, { "epoch": 2.86, "grad_norm": 0.7286361455917358, "learning_rate": 3.2336175012322443e-06, "loss": 1.6919, "step": 85953 }, { "epoch": 2.86, "grad_norm": 0.7128716707229614, "learning_rate": 3.2320871949116144e-06, "loss": 1.6896, "step": 85954 }, { "epoch": 2.86, "grad_norm": 0.7172110676765442, "learning_rate": 3.2305572488235286e-06, "loss": 1.7352, "step": 85955 }, { "epoch": 2.86, "grad_norm": 0.7070506811141968, "learning_rate": 3.2290276629697855e-06, "loss": 1.7145, "step": 85956 }, { "epoch": 2.86, "grad_norm": 0.7415258288383484, "learning_rate": 3.227498437352316e-06, "loss": 1.5934, "step": 85957 }, { "epoch": 2.86, "grad_norm": 0.7017788887023926, "learning_rate": 3.2259695719729194e-06, "loss": 1.6625, "step": 85958 }, { "epoch": 2.86, "grad_norm": 0.7033968567848206, "learning_rate": 3.2244410668334606e-06, "loss": 1.6737, "step": 85959 }, { "epoch": 2.86, "grad_norm": 0.7305701971054077, "learning_rate": 3.2229129219358052e-06, "loss": 1.6337, "step": 85960 }, { "epoch": 2.86, "grad_norm": 0.7258926630020142, "learning_rate": 3.221385137281818e-06, "loss": 1.692, "step": 85961 }, { "epoch": 2.86, "grad_norm": 0.7172313332557678, "learning_rate": 3.2198577128733306e-06, "loss": 1.6339, "step": 85962 }, { "epoch": 2.86, "grad_norm": 0.725104570388794, "learning_rate": 3.218330648712175e-06, "loss": 1.6187, "step": 85963 }, { "epoch": 2.86, "grad_norm": 0.7234256863594055, "learning_rate": 3.2168039448002837e-06, "loss": 1.7283, "step": 85964 }, { "epoch": 2.86, "grad_norm": 0.724984884262085, "learning_rate": 3.215277601139454e-06, "loss": 1.6832, "step": 85965 }, { "epoch": 2.86, "grad_norm": 0.7366685271263123, "learning_rate": 3.2137516177315525e-06, "loss": 1.6263, "step": 85966 }, { "epoch": 2.86, "grad_norm": 0.699653685092926, "learning_rate": 3.21222599457841e-06, "loss": 1.7396, "step": 85967 }, { "epoch": 2.86, "grad_norm": 0.7111740708351135, "learning_rate": 3.2107007316819257e-06, "loss": 1.6456, "step": 85968 }, { "epoch": 2.86, "grad_norm": 0.7368929386138916, "learning_rate": 3.209175829043864e-06, "loss": 1.5943, "step": 85969 }, { "epoch": 2.86, "grad_norm": 0.7314693331718445, "learning_rate": 3.2076512866661574e-06, "loss": 1.6762, "step": 85970 }, { "epoch": 2.86, "grad_norm": 0.7188987731933594, "learning_rate": 3.206127104550671e-06, "loss": 1.7245, "step": 85971 }, { "epoch": 2.86, "grad_norm": 0.704334020614624, "learning_rate": 3.204603282699203e-06, "loss": 1.6579, "step": 85972 }, { "epoch": 2.86, "grad_norm": 0.7061088681221008, "learning_rate": 3.2030798211135857e-06, "loss": 1.7137, "step": 85973 }, { "epoch": 2.86, "grad_norm": 0.7191943526268005, "learning_rate": 3.201556719795684e-06, "loss": 1.5707, "step": 85974 }, { "epoch": 2.86, "grad_norm": 0.7357586622238159, "learning_rate": 3.2000339787474296e-06, "loss": 1.6112, "step": 85975 }, { "epoch": 2.86, "grad_norm": 0.7035436034202576, "learning_rate": 3.1985115979705545e-06, "loss": 1.6404, "step": 85976 }, { "epoch": 2.86, "grad_norm": 0.7196898460388184, "learning_rate": 3.196989577466924e-06, "loss": 1.6739, "step": 85977 }, { "epoch": 2.86, "grad_norm": 0.7057459354400635, "learning_rate": 3.19546791723847e-06, "loss": 1.7861, "step": 85978 }, { "epoch": 2.86, "grad_norm": 0.7008549571037292, "learning_rate": 3.193946617286991e-06, "loss": 1.6985, "step": 85979 }, { "epoch": 2.86, "grad_norm": 1.0341770648956299, "learning_rate": 3.1924256776142852e-06, "loss": 1.647, "step": 85980 }, { "epoch": 2.86, "grad_norm": 0.7143669724464417, "learning_rate": 3.1909050982222516e-06, "loss": 1.7218, "step": 85981 }, { "epoch": 2.86, "grad_norm": 0.7108094096183777, "learning_rate": 3.1893848791127886e-06, "loss": 1.64, "step": 85982 }, { "epoch": 2.86, "grad_norm": 0.6999958753585815, "learning_rate": 3.1878650202875945e-06, "loss": 1.6396, "step": 85983 }, { "epoch": 2.86, "grad_norm": 0.7040035724639893, "learning_rate": 3.1863455217486346e-06, "loss": 1.7442, "step": 85984 }, { "epoch": 2.86, "grad_norm": 0.7411465644836426, "learning_rate": 3.1848263834977404e-06, "loss": 1.631, "step": 85985 }, { "epoch": 2.86, "grad_norm": 0.7057061791419983, "learning_rate": 3.1833076055367115e-06, "loss": 1.615, "step": 85986 }, { "epoch": 2.86, "grad_norm": 0.7089652419090271, "learning_rate": 3.1817891878674117e-06, "loss": 1.699, "step": 85987 }, { "epoch": 2.86, "grad_norm": 0.7120859622955322, "learning_rate": 3.1802711304916738e-06, "loss": 1.6891, "step": 85988 }, { "epoch": 2.86, "grad_norm": 0.7392752170562744, "learning_rate": 3.1787534334113627e-06, "loss": 1.6902, "step": 85989 }, { "epoch": 2.86, "grad_norm": 0.7135013937950134, "learning_rate": 3.1772360966283107e-06, "loss": 1.6627, "step": 85990 }, { "epoch": 2.86, "grad_norm": 0.7113804817199707, "learning_rate": 3.1757191201443487e-06, "loss": 1.6417, "step": 85991 }, { "epoch": 2.86, "grad_norm": 0.712553083896637, "learning_rate": 3.174202503961343e-06, "loss": 1.6776, "step": 85992 }, { "epoch": 2.86, "grad_norm": 0.727313756942749, "learning_rate": 3.172686248081158e-06, "loss": 1.6631, "step": 85993 }, { "epoch": 2.86, "grad_norm": 0.7079598307609558, "learning_rate": 3.1711703525055256e-06, "loss": 1.7201, "step": 85994 }, { "epoch": 2.86, "grad_norm": 0.7103861570358276, "learning_rate": 3.169654817236411e-06, "loss": 1.6218, "step": 85995 }, { "epoch": 2.86, "grad_norm": 0.7098195552825928, "learning_rate": 3.16813964227558e-06, "loss": 1.6729, "step": 85996 }, { "epoch": 2.86, "grad_norm": 0.7107264995574951, "learning_rate": 3.166624827624864e-06, "loss": 1.7086, "step": 85997 }, { "epoch": 2.86, "grad_norm": 0.7145516872406006, "learning_rate": 3.1651103732861614e-06, "loss": 1.6434, "step": 85998 }, { "epoch": 2.86, "grad_norm": 0.7263019680976868, "learning_rate": 3.1635962792612714e-06, "loss": 1.6626, "step": 85999 }, { "epoch": 2.86, "grad_norm": 0.7040740847587585, "learning_rate": 3.1620825455520582e-06, "loss": 1.6399, "step": 86000 }, { "epoch": 2.86, "grad_norm": 0.6912954449653625, "learning_rate": 3.1605691721602877e-06, "loss": 1.6287, "step": 86001 }, { "epoch": 2.86, "grad_norm": 0.7151660323143005, "learning_rate": 3.1590561590878914e-06, "loss": 1.6618, "step": 86002 }, { "epoch": 2.86, "grad_norm": 0.7390580773353577, "learning_rate": 3.1575435063366684e-06, "loss": 1.6824, "step": 86003 }, { "epoch": 2.86, "grad_norm": 0.7074776887893677, "learning_rate": 3.1560312139084166e-06, "loss": 1.7, "step": 86004 }, { "epoch": 2.86, "grad_norm": 0.7014397978782654, "learning_rate": 3.1545192818050345e-06, "loss": 1.6778, "step": 86005 }, { "epoch": 2.86, "grad_norm": 0.7025156021118164, "learning_rate": 3.153007710028288e-06, "loss": 1.6969, "step": 86006 }, { "epoch": 2.86, "grad_norm": 0.7077581286430359, "learning_rate": 3.1514964985801083e-06, "loss": 1.7048, "step": 86007 }, { "epoch": 2.86, "grad_norm": 0.7271154522895813, "learning_rate": 3.149985647462261e-06, "loss": 1.6422, "step": 86008 }, { "epoch": 2.86, "grad_norm": 0.707903265953064, "learning_rate": 3.1484751566765444e-06, "loss": 1.6111, "step": 86009 }, { "epoch": 2.86, "grad_norm": 0.7120108604431152, "learning_rate": 3.146965026224857e-06, "loss": 1.6092, "step": 86010 }, { "epoch": 2.86, "grad_norm": 0.7075866460800171, "learning_rate": 3.145455256109064e-06, "loss": 1.6656, "step": 86011 }, { "epoch": 2.86, "grad_norm": 0.7119481563568115, "learning_rate": 3.1439458463308974e-06, "loss": 1.6679, "step": 86012 }, { "epoch": 2.86, "grad_norm": 0.724906861782074, "learning_rate": 3.142436796892256e-06, "loss": 1.7912, "step": 86013 }, { "epoch": 2.86, "grad_norm": 0.7221556901931763, "learning_rate": 3.140928107794971e-06, "loss": 1.7177, "step": 86014 }, { "epoch": 2.86, "grad_norm": 0.727605938911438, "learning_rate": 3.1394197790408416e-06, "loss": 1.7104, "step": 86015 }, { "epoch": 2.86, "grad_norm": 0.7026481628417969, "learning_rate": 3.1379118106317323e-06, "loss": 1.7083, "step": 86016 }, { "epoch": 2.86, "grad_norm": 0.6854761242866516, "learning_rate": 3.1364042025694094e-06, "loss": 1.5963, "step": 86017 }, { "epoch": 2.86, "grad_norm": 0.7213518023490906, "learning_rate": 3.1348969548558366e-06, "loss": 1.5659, "step": 86018 }, { "epoch": 2.86, "grad_norm": 0.7243087291717529, "learning_rate": 3.1333900674926805e-06, "loss": 1.7233, "step": 86019 }, { "epoch": 2.86, "grad_norm": 0.7437992691993713, "learning_rate": 3.131883540481872e-06, "loss": 1.6471, "step": 86020 }, { "epoch": 2.86, "grad_norm": 0.6923764944076538, "learning_rate": 3.1303773738252436e-06, "loss": 1.6609, "step": 86021 }, { "epoch": 2.86, "grad_norm": 0.7064529657363892, "learning_rate": 3.12887156752456e-06, "loss": 1.6616, "step": 86022 }, { "epoch": 2.86, "grad_norm": 0.735008955001831, "learning_rate": 3.127366121581687e-06, "loss": 1.6741, "step": 86023 }, { "epoch": 2.86, "grad_norm": 0.7507296204566956, "learning_rate": 3.125861035998456e-06, "loss": 1.6452, "step": 86024 }, { "epoch": 2.86, "grad_norm": 0.7334257960319519, "learning_rate": 3.124356310776699e-06, "loss": 1.7096, "step": 86025 }, { "epoch": 2.86, "grad_norm": 0.7402667999267578, "learning_rate": 3.1228519459182145e-06, "loss": 1.642, "step": 86026 }, { "epoch": 2.86, "grad_norm": 0.7282229065895081, "learning_rate": 3.1213479414248343e-06, "loss": 1.6866, "step": 86027 }, { "epoch": 2.86, "grad_norm": 0.7227491736412048, "learning_rate": 3.119844297298424e-06, "loss": 1.7792, "step": 86028 }, { "epoch": 2.86, "grad_norm": 0.7175904512405396, "learning_rate": 3.1183410135407817e-06, "loss": 1.665, "step": 86029 }, { "epoch": 2.86, "grad_norm": 0.7161931991577148, "learning_rate": 3.116838090153706e-06, "loss": 1.7129, "step": 86030 }, { "epoch": 2.86, "grad_norm": 0.6998881697654724, "learning_rate": 3.1153355271390623e-06, "loss": 1.614, "step": 86031 }, { "epoch": 2.86, "grad_norm": 0.7166510224342346, "learning_rate": 3.113833324498649e-06, "loss": 1.626, "step": 86032 }, { "epoch": 2.86, "grad_norm": 0.7010955810546875, "learning_rate": 3.112331482234298e-06, "loss": 1.6796, "step": 86033 }, { "epoch": 2.86, "grad_norm": 0.7153357863426208, "learning_rate": 3.1108300003478416e-06, "loss": 1.5633, "step": 86034 }, { "epoch": 2.86, "grad_norm": 0.7322056293487549, "learning_rate": 3.1093288788410774e-06, "loss": 1.6768, "step": 86035 }, { "epoch": 2.86, "grad_norm": 0.7013855576515198, "learning_rate": 3.1078281177158715e-06, "loss": 1.6751, "step": 86036 }, { "epoch": 2.86, "grad_norm": 0.7345395088195801, "learning_rate": 3.1063277169739553e-06, "loss": 1.7498, "step": 86037 }, { "epoch": 2.86, "grad_norm": 0.7342270016670227, "learning_rate": 3.1048276766172608e-06, "loss": 1.6466, "step": 86038 }, { "epoch": 2.86, "grad_norm": 0.7181203961372375, "learning_rate": 3.1033279966475867e-06, "loss": 1.7526, "step": 86039 }, { "epoch": 2.86, "grad_norm": 0.7079238891601562, "learning_rate": 3.1018286770666643e-06, "loss": 1.6848, "step": 86040 }, { "epoch": 2.86, "grad_norm": 0.7108802199363708, "learning_rate": 3.100329717876393e-06, "loss": 1.6646, "step": 86041 }, { "epoch": 2.86, "grad_norm": 0.7547106742858887, "learning_rate": 3.098831119078604e-06, "loss": 1.6732, "step": 86042 }, { "epoch": 2.86, "grad_norm": 0.7209820747375488, "learning_rate": 3.097332880675063e-06, "loss": 1.5926, "step": 86043 }, { "epoch": 2.86, "grad_norm": 0.7075986266136169, "learning_rate": 3.095835002667635e-06, "loss": 1.6573, "step": 86044 }, { "epoch": 2.86, "grad_norm": 0.7132576704025269, "learning_rate": 3.094337485058085e-06, "loss": 1.6643, "step": 86045 }, { "epoch": 2.86, "grad_norm": 0.7277747988700867, "learning_rate": 3.0928403278483117e-06, "loss": 1.607, "step": 86046 }, { "epoch": 2.86, "grad_norm": 0.7365464568138123, "learning_rate": 3.091343531040014e-06, "loss": 1.7159, "step": 86047 }, { "epoch": 2.86, "grad_norm": 0.7101015448570251, "learning_rate": 3.0898470946351227e-06, "loss": 1.6318, "step": 86048 }, { "epoch": 2.86, "grad_norm": 0.7413166165351868, "learning_rate": 3.0883510186354042e-06, "loss": 1.6346, "step": 86049 }, { "epoch": 2.86, "grad_norm": 0.7053492665290833, "learning_rate": 3.0868553030426903e-06, "loss": 1.6585, "step": 86050 }, { "epoch": 2.86, "grad_norm": 0.7462313771247864, "learning_rate": 3.0853599478587455e-06, "loss": 1.6106, "step": 86051 }, { "epoch": 2.86, "grad_norm": 0.7528289556503296, "learning_rate": 3.0838649530854686e-06, "loss": 1.6677, "step": 86052 }, { "epoch": 2.86, "grad_norm": 0.7036153078079224, "learning_rate": 3.0823703187246253e-06, "loss": 1.6927, "step": 86053 }, { "epoch": 2.86, "grad_norm": 0.7300113439559937, "learning_rate": 3.0808760447780132e-06, "loss": 1.6938, "step": 86054 }, { "epoch": 2.86, "grad_norm": 0.717729926109314, "learning_rate": 3.0793821312474986e-06, "loss": 1.7553, "step": 86055 }, { "epoch": 2.86, "grad_norm": 0.7200865149497986, "learning_rate": 3.077888578134813e-06, "loss": 1.6693, "step": 86056 }, { "epoch": 2.86, "grad_norm": 0.7423977255821228, "learning_rate": 3.0763953854418878e-06, "loss": 1.7511, "step": 86057 }, { "epoch": 2.86, "grad_norm": 0.7268580198287964, "learning_rate": 3.074902553170422e-06, "loss": 1.6743, "step": 86058 }, { "epoch": 2.86, "grad_norm": 0.714721143245697, "learning_rate": 3.0734100813222805e-06, "loss": 1.6708, "step": 86059 }, { "epoch": 2.86, "grad_norm": 0.7482766509056091, "learning_rate": 3.0719179698992958e-06, "loss": 1.6881, "step": 86060 }, { "epoch": 2.86, "grad_norm": 0.7066271901130676, "learning_rate": 3.070426218903199e-06, "loss": 1.6974, "step": 86061 }, { "epoch": 2.86, "grad_norm": 0.7315541505813599, "learning_rate": 3.0689348283359228e-06, "loss": 1.7258, "step": 86062 }, { "epoch": 2.86, "grad_norm": 0.714113712310791, "learning_rate": 3.067443798199165e-06, "loss": 1.6199, "step": 86063 }, { "epoch": 2.86, "grad_norm": 0.6996475458145142, "learning_rate": 3.0659531284947915e-06, "loss": 1.7057, "step": 86064 }, { "epoch": 2.86, "grad_norm": 0.7180629372596741, "learning_rate": 3.0644628192246e-06, "loss": 1.6043, "step": 86065 }, { "epoch": 2.86, "grad_norm": 0.7342509627342224, "learning_rate": 3.06297287039039e-06, "loss": 1.6357, "step": 86066 }, { "epoch": 2.86, "grad_norm": 0.7203882932662964, "learning_rate": 3.061483281993993e-06, "loss": 1.6786, "step": 86067 }, { "epoch": 2.86, "grad_norm": 0.7068182826042175, "learning_rate": 3.0599940540371735e-06, "loss": 1.6534, "step": 86068 }, { "epoch": 2.86, "grad_norm": 0.7029397487640381, "learning_rate": 3.058505186521798e-06, "loss": 1.5808, "step": 86069 }, { "epoch": 2.86, "grad_norm": 0.71368008852005, "learning_rate": 3.057016679449631e-06, "loss": 1.6821, "step": 86070 }, { "epoch": 2.86, "grad_norm": 0.7188969850540161, "learning_rate": 3.0555285328225376e-06, "loss": 1.6988, "step": 86071 }, { "epoch": 2.86, "grad_norm": 0.6955441236495972, "learning_rate": 3.05404074664225e-06, "loss": 1.6722, "step": 86072 }, { "epoch": 2.86, "grad_norm": 0.703334629535675, "learning_rate": 3.0525533209105666e-06, "loss": 1.5991, "step": 86073 }, { "epoch": 2.86, "grad_norm": 0.7211922407150269, "learning_rate": 3.0510662556293865e-06, "loss": 1.6976, "step": 86074 }, { "epoch": 2.86, "grad_norm": 0.7052315473556519, "learning_rate": 3.0495795508004408e-06, "loss": 1.6604, "step": 86075 }, { "epoch": 2.86, "grad_norm": 0.7100421190261841, "learning_rate": 3.048093206425528e-06, "loss": 1.668, "step": 86076 }, { "epoch": 2.86, "grad_norm": 0.7244681715965271, "learning_rate": 3.0466072225064807e-06, "loss": 1.6473, "step": 86077 }, { "epoch": 2.86, "grad_norm": 0.7014181613922119, "learning_rate": 3.0451215990451307e-06, "loss": 1.6437, "step": 86078 }, { "epoch": 2.86, "grad_norm": 0.7324681878089905, "learning_rate": 3.0436363360432093e-06, "loss": 1.6526, "step": 86079 }, { "epoch": 2.86, "grad_norm": 0.7240416407585144, "learning_rate": 3.042151433502582e-06, "loss": 1.7422, "step": 86080 }, { "epoch": 2.86, "grad_norm": 0.707704484462738, "learning_rate": 3.0406668914250477e-06, "loss": 1.6215, "step": 86081 }, { "epoch": 2.86, "grad_norm": 0.7233095169067383, "learning_rate": 3.039182709812371e-06, "loss": 1.7022, "step": 86082 }, { "epoch": 2.86, "grad_norm": 0.7203469276428223, "learning_rate": 3.037698888666351e-06, "loss": 1.5904, "step": 86083 }, { "epoch": 2.86, "grad_norm": 0.6937691569328308, "learning_rate": 3.0362154279888195e-06, "loss": 1.6815, "step": 86084 }, { "epoch": 2.86, "grad_norm": 0.7745183706283569, "learning_rate": 3.034732327781575e-06, "loss": 1.7176, "step": 86085 }, { "epoch": 2.86, "grad_norm": 0.7442912459373474, "learning_rate": 3.0332495880464156e-06, "loss": 1.6968, "step": 86086 }, { "epoch": 2.86, "grad_norm": 0.6945040822029114, "learning_rate": 3.0317672087851076e-06, "loss": 1.7133, "step": 86087 }, { "epoch": 2.86, "grad_norm": 0.7032086849212646, "learning_rate": 3.030285189999515e-06, "loss": 1.6391, "step": 86088 }, { "epoch": 2.86, "grad_norm": 0.7090505361557007, "learning_rate": 3.0288035316913706e-06, "loss": 1.6707, "step": 86089 }, { "epoch": 2.86, "grad_norm": 0.6955490708351135, "learning_rate": 3.027322233862506e-06, "loss": 1.7374, "step": 86090 }, { "epoch": 2.86, "grad_norm": 0.7396837472915649, "learning_rate": 3.0258412965147192e-06, "loss": 1.7155, "step": 86091 }, { "epoch": 2.86, "grad_norm": 0.7382696270942688, "learning_rate": 3.024360719649843e-06, "loss": 1.6789, "step": 86092 }, { "epoch": 2.86, "grad_norm": 0.724981963634491, "learning_rate": 3.022880503269576e-06, "loss": 1.7202, "step": 86093 }, { "epoch": 2.86, "grad_norm": 0.7114169597625732, "learning_rate": 3.0214006473757823e-06, "loss": 1.669, "step": 86094 }, { "epoch": 2.86, "grad_norm": 0.7400606870651245, "learning_rate": 3.0199211519702617e-06, "loss": 1.6687, "step": 86095 }, { "epoch": 2.86, "grad_norm": 0.7121870517730713, "learning_rate": 3.018442017054812e-06, "loss": 1.6172, "step": 86096 }, { "epoch": 2.86, "grad_norm": 0.723680317401886, "learning_rate": 3.0169632426311983e-06, "loss": 1.706, "step": 86097 }, { "epoch": 2.86, "grad_norm": 0.7202287316322327, "learning_rate": 3.0154848287012533e-06, "loss": 1.6778, "step": 86098 }, { "epoch": 2.86, "grad_norm": 0.7235866189002991, "learning_rate": 3.0140067752667417e-06, "loss": 1.7598, "step": 86099 }, { "epoch": 2.86, "grad_norm": 0.7336843609809875, "learning_rate": 3.012529082329462e-06, "loss": 1.7529, "step": 86100 }, { "epoch": 2.86, "grad_norm": 0.7336046695709229, "learning_rate": 3.0110517498912134e-06, "loss": 1.6949, "step": 86101 }, { "epoch": 2.86, "grad_norm": 0.7221285700798035, "learning_rate": 3.0095747779537937e-06, "loss": 1.7432, "step": 86102 }, { "epoch": 2.86, "grad_norm": 0.7133669853210449, "learning_rate": 3.0080981665190016e-06, "loss": 1.7243, "step": 86103 }, { "epoch": 2.86, "grad_norm": 0.7208104729652405, "learning_rate": 3.006621915588603e-06, "loss": 1.6784, "step": 86104 }, { "epoch": 2.86, "grad_norm": 0.714184045791626, "learning_rate": 3.0051460251643954e-06, "loss": 1.6888, "step": 86105 }, { "epoch": 2.86, "grad_norm": 0.7158931493759155, "learning_rate": 3.0036704952482115e-06, "loss": 1.6908, "step": 86106 }, { "epoch": 2.86, "grad_norm": 0.7017032504081726, "learning_rate": 3.002195325841783e-06, "loss": 1.7367, "step": 86107 }, { "epoch": 2.86, "grad_norm": 0.7164244055747986, "learning_rate": 3.0007205169469085e-06, "loss": 1.6809, "step": 86108 }, { "epoch": 2.86, "grad_norm": 0.714561402797699, "learning_rate": 2.9992460685654528e-06, "loss": 1.6738, "step": 86109 }, { "epoch": 2.86, "grad_norm": 0.7286989688873291, "learning_rate": 2.997771980699115e-06, "loss": 1.7499, "step": 86110 }, { "epoch": 2.86, "grad_norm": 0.7041742205619812, "learning_rate": 2.996298253349727e-06, "loss": 1.6934, "step": 86111 }, { "epoch": 2.86, "grad_norm": 0.7209047079086304, "learning_rate": 2.994824886519087e-06, "loss": 1.6683, "step": 86112 }, { "epoch": 2.86, "grad_norm": 0.7297049760818481, "learning_rate": 2.993351880208961e-06, "loss": 1.7303, "step": 86113 }, { "epoch": 2.87, "grad_norm": 0.69354647397995, "learning_rate": 2.991879234421146e-06, "loss": 1.6655, "step": 86114 }, { "epoch": 2.87, "grad_norm": 0.710176944732666, "learning_rate": 2.990406949157409e-06, "loss": 1.7072, "step": 86115 }, { "epoch": 2.87, "grad_norm": 0.7309039235115051, "learning_rate": 2.988935024419581e-06, "loss": 1.7366, "step": 86116 }, { "epoch": 2.87, "grad_norm": 0.7001127600669861, "learning_rate": 2.987463460209394e-06, "loss": 1.6577, "step": 86117 }, { "epoch": 2.87, "grad_norm": 0.7170467972755432, "learning_rate": 2.98599225652868e-06, "loss": 1.6051, "step": 86118 }, { "epoch": 2.87, "grad_norm": 0.6848675608634949, "learning_rate": 2.984521413379237e-06, "loss": 1.6215, "step": 86119 }, { "epoch": 2.87, "grad_norm": 0.7064405679702759, "learning_rate": 2.983050930762798e-06, "loss": 1.7003, "step": 86120 }, { "epoch": 2.87, "grad_norm": 0.7230512499809265, "learning_rate": 2.981580808681161e-06, "loss": 1.6362, "step": 86121 }, { "epoch": 2.87, "grad_norm": 0.6976283192634583, "learning_rate": 2.9801110471361246e-06, "loss": 1.6742, "step": 86122 }, { "epoch": 2.87, "grad_norm": 0.7191935181617737, "learning_rate": 2.9786416461294536e-06, "loss": 1.6581, "step": 86123 }, { "epoch": 2.87, "grad_norm": 0.7043571472167969, "learning_rate": 2.977172605662981e-06, "loss": 1.7681, "step": 86124 }, { "epoch": 2.87, "grad_norm": 0.7053083777427673, "learning_rate": 2.9757039257384373e-06, "loss": 1.6118, "step": 86125 }, { "epoch": 2.87, "grad_norm": 0.7180808782577515, "learning_rate": 2.9742356063576224e-06, "loss": 1.709, "step": 86126 }, { "epoch": 2.87, "grad_norm": 0.7170239090919495, "learning_rate": 2.9727676475223337e-06, "loss": 1.6675, "step": 86127 }, { "epoch": 2.87, "grad_norm": 0.7190744280815125, "learning_rate": 2.9713000492343375e-06, "loss": 1.6781, "step": 86128 }, { "epoch": 2.87, "grad_norm": 0.7019932866096497, "learning_rate": 2.9698328114954317e-06, "loss": 1.591, "step": 86129 }, { "epoch": 2.87, "grad_norm": 0.7505635023117065, "learning_rate": 2.9683659343073486e-06, "loss": 1.7319, "step": 86130 }, { "epoch": 2.87, "grad_norm": 0.7068366408348083, "learning_rate": 2.9668994176719197e-06, "loss": 1.6904, "step": 86131 }, { "epoch": 2.87, "grad_norm": 0.6972448229789734, "learning_rate": 2.9654332615909104e-06, "loss": 1.6446, "step": 86132 }, { "epoch": 2.87, "grad_norm": 0.7043095231056213, "learning_rate": 2.9639674660661197e-06, "loss": 1.6966, "step": 86133 }, { "epoch": 2.87, "grad_norm": 0.7236080765724182, "learning_rate": 2.9625020310992785e-06, "loss": 1.6681, "step": 86134 }, { "epoch": 2.87, "grad_norm": 0.7106295228004456, "learning_rate": 2.96103695669222e-06, "loss": 1.6482, "step": 86135 }, { "epoch": 2.87, "grad_norm": 0.6973078846931458, "learning_rate": 2.959572242846675e-06, "loss": 1.6502, "step": 86136 }, { "epoch": 2.87, "grad_norm": 0.704879105091095, "learning_rate": 2.9581078895644096e-06, "loss": 1.6655, "step": 86137 }, { "epoch": 2.87, "grad_norm": 0.7242377400398254, "learning_rate": 2.956643896847288e-06, "loss": 1.6264, "step": 86138 }, { "epoch": 2.87, "grad_norm": 0.7305393815040588, "learning_rate": 2.9551802646970104e-06, "loss": 1.6697, "step": 86139 }, { "epoch": 2.87, "grad_norm": 0.7258008122444153, "learning_rate": 2.953716993115374e-06, "loss": 1.7772, "step": 86140 }, { "epoch": 2.87, "grad_norm": 0.7080219388008118, "learning_rate": 2.952254082104177e-06, "loss": 1.6701, "step": 86141 }, { "epoch": 2.87, "grad_norm": 0.718139111995697, "learning_rate": 2.9507915316651865e-06, "loss": 1.7019, "step": 86142 }, { "epoch": 2.87, "grad_norm": 0.6987659335136414, "learning_rate": 2.949329341800133e-06, "loss": 1.6699, "step": 86143 }, { "epoch": 2.87, "grad_norm": 0.7126451134681702, "learning_rate": 2.947867512510815e-06, "loss": 1.7117, "step": 86144 }, { "epoch": 2.87, "grad_norm": 0.740834653377533, "learning_rate": 2.9464060437990323e-06, "loss": 1.7084, "step": 86145 }, { "epoch": 2.87, "grad_norm": 0.7074415683746338, "learning_rate": 2.9449449356665823e-06, "loss": 1.6073, "step": 86146 }, { "epoch": 2.87, "grad_norm": 0.6985158324241638, "learning_rate": 2.9434841881151305e-06, "loss": 1.6444, "step": 86147 }, { "epoch": 2.87, "grad_norm": 0.7129272818565369, "learning_rate": 2.942023801146576e-06, "loss": 1.6235, "step": 86148 }, { "epoch": 2.87, "grad_norm": 0.7046099901199341, "learning_rate": 2.9405637747626163e-06, "loss": 1.6162, "step": 86149 }, { "epoch": 2.87, "grad_norm": 0.7175098657608032, "learning_rate": 2.9391041089650513e-06, "loss": 1.7146, "step": 86150 }, { "epoch": 2.87, "grad_norm": 0.7360674738883972, "learning_rate": 2.9376448037556123e-06, "loss": 1.711, "step": 86151 }, { "epoch": 2.87, "grad_norm": 0.7082218527793884, "learning_rate": 2.936185859136131e-06, "loss": 1.6553, "step": 86152 }, { "epoch": 2.87, "grad_norm": 0.7152047753334045, "learning_rate": 2.9347272751083393e-06, "loss": 1.7174, "step": 86153 }, { "epoch": 2.87, "grad_norm": 0.7168424129486084, "learning_rate": 2.9332690516740366e-06, "loss": 1.6208, "step": 86154 }, { "epoch": 2.87, "grad_norm": 0.6921853423118591, "learning_rate": 2.931811188834954e-06, "loss": 1.65, "step": 86155 }, { "epoch": 2.87, "grad_norm": 0.7064806222915649, "learning_rate": 2.93035368659289e-06, "loss": 1.6621, "step": 86156 }, { "epoch": 2.87, "grad_norm": 0.7182333469390869, "learning_rate": 2.9288965449496105e-06, "loss": 1.6751, "step": 86157 }, { "epoch": 2.87, "grad_norm": 0.7095283269882202, "learning_rate": 2.927439763906847e-06, "loss": 1.7318, "step": 86158 }, { "epoch": 2.87, "grad_norm": 0.7080217003822327, "learning_rate": 2.9259833434664317e-06, "loss": 1.6161, "step": 86159 }, { "epoch": 2.87, "grad_norm": 0.7022665739059448, "learning_rate": 2.924527283630129e-06, "loss": 1.6573, "step": 86160 }, { "epoch": 2.87, "grad_norm": 0.7112647891044617, "learning_rate": 2.9230715843996054e-06, "loss": 1.6551, "step": 86161 }, { "epoch": 2.87, "grad_norm": 0.721347451210022, "learning_rate": 2.921616245776759e-06, "loss": 1.72, "step": 86162 }, { "epoch": 2.87, "grad_norm": 0.7155653834342957, "learning_rate": 2.920161267763288e-06, "loss": 1.7546, "step": 86163 }, { "epoch": 2.87, "grad_norm": 0.7278963327407837, "learning_rate": 2.918706650360958e-06, "loss": 1.6784, "step": 86164 }, { "epoch": 2.87, "grad_norm": 0.7108485698699951, "learning_rate": 2.917252393571534e-06, "loss": 1.6662, "step": 86165 }, { "epoch": 2.87, "grad_norm": 0.6852520704269409, "learning_rate": 2.915798497396815e-06, "loss": 1.7344, "step": 86166 }, { "epoch": 2.87, "grad_norm": 0.7377050518989563, "learning_rate": 2.9143449618385327e-06, "loss": 1.6799, "step": 86167 }, { "epoch": 2.87, "grad_norm": 0.7047385573387146, "learning_rate": 2.9128917868984527e-06, "loss": 1.6019, "step": 86168 }, { "epoch": 2.87, "grad_norm": 0.7103474736213684, "learning_rate": 2.911438972578373e-06, "loss": 1.6516, "step": 86169 }, { "epoch": 2.87, "grad_norm": 0.727776050567627, "learning_rate": 2.9099865188800254e-06, "loss": 1.6931, "step": 86170 }, { "epoch": 2.87, "grad_norm": 0.6909055709838867, "learning_rate": 2.9085344258051425e-06, "loss": 1.7029, "step": 86171 }, { "epoch": 2.87, "grad_norm": 0.7185567617416382, "learning_rate": 2.907082693355556e-06, "loss": 1.7083, "step": 86172 }, { "epoch": 2.87, "grad_norm": 0.7301134467124939, "learning_rate": 2.9056313215329974e-06, "loss": 1.6626, "step": 86173 }, { "epoch": 2.87, "grad_norm": 0.7080726623535156, "learning_rate": 2.9041803103392325e-06, "loss": 1.7501, "step": 86174 }, { "epoch": 2.87, "grad_norm": 0.7394862174987793, "learning_rate": 2.902729659775993e-06, "loss": 1.6529, "step": 86175 }, { "epoch": 2.87, "grad_norm": 0.7203226089477539, "learning_rate": 2.9012793698450777e-06, "loss": 1.7431, "step": 86176 }, { "epoch": 2.87, "grad_norm": 0.7113202214241028, "learning_rate": 2.899829440548251e-06, "loss": 1.6422, "step": 86177 }, { "epoch": 2.87, "grad_norm": 0.7070216536521912, "learning_rate": 2.8983798718872463e-06, "loss": 1.6522, "step": 86178 }, { "epoch": 2.87, "grad_norm": 0.6972479224205017, "learning_rate": 2.896930663863828e-06, "loss": 1.6362, "step": 86179 }, { "epoch": 2.87, "grad_norm": 0.7153266668319702, "learning_rate": 2.895481816479761e-06, "loss": 1.6778, "step": 86180 }, { "epoch": 2.87, "grad_norm": 0.7364394068717957, "learning_rate": 2.8940333297368113e-06, "loss": 1.7636, "step": 86181 }, { "epoch": 2.87, "grad_norm": 0.7060179114341736, "learning_rate": 2.8925852036367103e-06, "loss": 1.7094, "step": 86182 }, { "epoch": 2.87, "grad_norm": 0.7122740149497986, "learning_rate": 2.8911374381812238e-06, "loss": 1.7139, "step": 86183 }, { "epoch": 2.87, "grad_norm": 0.7097738981246948, "learning_rate": 2.8896900333721496e-06, "loss": 1.6607, "step": 86184 }, { "epoch": 2.87, "grad_norm": 0.7105559706687927, "learning_rate": 2.88824298921122e-06, "loss": 1.5424, "step": 86185 }, { "epoch": 2.87, "grad_norm": 0.7142146825790405, "learning_rate": 2.8867963057001677e-06, "loss": 1.7208, "step": 86186 }, { "epoch": 2.87, "grad_norm": 0.7130978107452393, "learning_rate": 2.885349982840757e-06, "loss": 1.7103, "step": 86187 }, { "epoch": 2.87, "grad_norm": 0.7176600694656372, "learning_rate": 2.883904020634786e-06, "loss": 1.6495, "step": 86188 }, { "epoch": 2.87, "grad_norm": 0.7317922115325928, "learning_rate": 2.882458419083955e-06, "loss": 1.7377, "step": 86189 }, { "epoch": 2.87, "grad_norm": 0.736221432685852, "learning_rate": 2.8810131781900282e-06, "loss": 1.6717, "step": 86190 }, { "epoch": 2.87, "grad_norm": 0.6968823671340942, "learning_rate": 2.879568297954804e-06, "loss": 1.5905, "step": 86191 }, { "epoch": 2.87, "grad_norm": 0.7121399641036987, "learning_rate": 2.8781237783799816e-06, "loss": 1.6763, "step": 86192 }, { "epoch": 2.87, "grad_norm": 0.7072712779045105, "learning_rate": 2.876679619467359e-06, "loss": 1.6983, "step": 86193 }, { "epoch": 2.87, "grad_norm": 0.7209585309028625, "learning_rate": 2.8752358212186356e-06, "loss": 1.6987, "step": 86194 }, { "epoch": 2.87, "grad_norm": 0.7133920192718506, "learning_rate": 2.873792383635609e-06, "loss": 1.6827, "step": 86195 }, { "epoch": 2.87, "grad_norm": 0.7021650671958923, "learning_rate": 2.8723493067200453e-06, "loss": 1.6828, "step": 86196 }, { "epoch": 2.87, "grad_norm": 0.7020955085754395, "learning_rate": 2.870906590473643e-06, "loss": 1.6136, "step": 86197 }, { "epoch": 2.87, "grad_norm": 0.7070885300636292, "learning_rate": 2.8694642348982e-06, "loss": 1.6813, "step": 86198 }, { "epoch": 2.87, "grad_norm": 0.708401083946228, "learning_rate": 2.868022239995449e-06, "loss": 1.6693, "step": 86199 }, { "epoch": 2.87, "grad_norm": 0.7057903409004211, "learning_rate": 2.8665806057670883e-06, "loss": 1.6633, "step": 86200 }, { "epoch": 2.87, "grad_norm": 0.7155871391296387, "learning_rate": 2.8651393322149497e-06, "loss": 1.7523, "step": 86201 }, { "epoch": 2.87, "grad_norm": 0.6969539523124695, "learning_rate": 2.8636984193407987e-06, "loss": 1.6706, "step": 86202 }, { "epoch": 2.87, "grad_norm": 0.6923090219497681, "learning_rate": 2.862257867146267e-06, "loss": 1.6617, "step": 86203 }, { "epoch": 2.87, "grad_norm": 0.7292482256889343, "learning_rate": 2.860817675633187e-06, "loss": 1.667, "step": 86204 }, { "epoch": 2.87, "grad_norm": 0.755643367767334, "learning_rate": 2.8593778448033233e-06, "loss": 1.5866, "step": 86205 }, { "epoch": 2.87, "grad_norm": 0.7373555898666382, "learning_rate": 2.857938374658375e-06, "loss": 1.715, "step": 86206 }, { "epoch": 2.87, "grad_norm": 0.7123203277587891, "learning_rate": 2.856499265200107e-06, "loss": 1.6653, "step": 86207 }, { "epoch": 2.87, "grad_norm": 0.7111400961875916, "learning_rate": 2.855060516430252e-06, "loss": 1.6614, "step": 86208 }, { "epoch": 2.87, "grad_norm": 0.7088444232940674, "learning_rate": 2.8536221283506076e-06, "loss": 1.6417, "step": 86209 }, { "epoch": 2.87, "grad_norm": 0.7439244985580444, "learning_rate": 2.852184100962873e-06, "loss": 1.689, "step": 86210 }, { "epoch": 2.87, "grad_norm": 0.7109181880950928, "learning_rate": 2.8507464342687802e-06, "loss": 1.6748, "step": 86211 }, { "epoch": 2.87, "grad_norm": 0.7235444188117981, "learning_rate": 2.8493091282701273e-06, "loss": 1.7664, "step": 86212 }, { "epoch": 2.87, "grad_norm": 0.7132527828216553, "learning_rate": 2.8478721829686135e-06, "loss": 1.6193, "step": 86213 }, { "epoch": 2.87, "grad_norm": 0.7006654143333435, "learning_rate": 2.8464355983660035e-06, "loss": 1.7349, "step": 86214 }, { "epoch": 2.87, "grad_norm": 0.7164623737335205, "learning_rate": 2.84499937446403e-06, "loss": 1.7598, "step": 86215 }, { "epoch": 2.87, "grad_norm": 0.7248967885971069, "learning_rate": 2.8435635112644573e-06, "loss": 1.7434, "step": 86216 }, { "epoch": 2.87, "grad_norm": 0.6983792781829834, "learning_rate": 2.842128008769018e-06, "loss": 1.6656, "step": 86217 }, { "epoch": 2.87, "grad_norm": 0.7214042544364929, "learning_rate": 2.8406928669794106e-06, "loss": 1.6786, "step": 86218 }, { "epoch": 2.87, "grad_norm": 0.7151840925216675, "learning_rate": 2.8392580858974665e-06, "loss": 1.6777, "step": 86219 }, { "epoch": 2.87, "grad_norm": 0.7267051339149475, "learning_rate": 2.837823665524852e-06, "loss": 1.6785, "step": 86220 }, { "epoch": 2.87, "grad_norm": 0.7128389477729797, "learning_rate": 2.836389605863332e-06, "loss": 1.7097, "step": 86221 }, { "epoch": 2.87, "grad_norm": 0.7141631841659546, "learning_rate": 2.834955906914671e-06, "loss": 1.6336, "step": 86222 }, { "epoch": 2.87, "grad_norm": 0.7169339656829834, "learning_rate": 2.8335225686806017e-06, "loss": 1.7288, "step": 86223 }, { "epoch": 2.87, "grad_norm": 0.7143639922142029, "learning_rate": 2.8320895911627896e-06, "loss": 1.6462, "step": 86224 }, { "epoch": 2.87, "grad_norm": 0.7430977821350098, "learning_rate": 2.8306569743630657e-06, "loss": 1.643, "step": 86225 }, { "epoch": 2.87, "grad_norm": 0.7106483578681946, "learning_rate": 2.8292247182831627e-06, "loss": 1.6537, "step": 86226 }, { "epoch": 2.87, "grad_norm": 0.7210830450057983, "learning_rate": 2.8277928229247794e-06, "loss": 1.6459, "step": 86227 }, { "epoch": 2.87, "grad_norm": 0.7304577827453613, "learning_rate": 2.826361288289647e-06, "loss": 1.6782, "step": 86228 }, { "epoch": 2.87, "grad_norm": 0.7004780769348145, "learning_rate": 2.8249301143795644e-06, "loss": 1.6978, "step": 86229 }, { "epoch": 2.87, "grad_norm": 0.7099141478538513, "learning_rate": 2.8234993011962305e-06, "loss": 1.6567, "step": 86230 }, { "epoch": 2.87, "grad_norm": 0.7392059564590454, "learning_rate": 2.8220688487413433e-06, "loss": 1.6846, "step": 86231 }, { "epoch": 2.87, "grad_norm": 0.7068904042243958, "learning_rate": 2.8206387570167022e-06, "loss": 1.6271, "step": 86232 }, { "epoch": 2.87, "grad_norm": 0.7122978568077087, "learning_rate": 2.8192090260240053e-06, "loss": 1.6337, "step": 86233 }, { "epoch": 2.87, "grad_norm": 0.7337987422943115, "learning_rate": 2.8177796557650176e-06, "loss": 1.6849, "step": 86234 }, { "epoch": 2.87, "grad_norm": 0.7092958092689514, "learning_rate": 2.816350646241439e-06, "loss": 1.6462, "step": 86235 }, { "epoch": 2.87, "grad_norm": 0.7045466899871826, "learning_rate": 2.814921997455033e-06, "loss": 1.6795, "step": 86236 }, { "epoch": 2.87, "grad_norm": 0.7170265316963196, "learning_rate": 2.8134937094074993e-06, "loss": 1.6776, "step": 86237 }, { "epoch": 2.87, "grad_norm": 0.7116108536720276, "learning_rate": 2.8120657821006363e-06, "loss": 1.6436, "step": 86238 }, { "epoch": 2.87, "grad_norm": 0.7095945477485657, "learning_rate": 2.8106382155361097e-06, "loss": 1.6868, "step": 86239 }, { "epoch": 2.87, "grad_norm": 0.7105479836463928, "learning_rate": 2.8092110097156837e-06, "loss": 1.6786, "step": 86240 }, { "epoch": 2.87, "grad_norm": 0.7174169421195984, "learning_rate": 2.8077841646410915e-06, "loss": 1.6465, "step": 86241 }, { "epoch": 2.87, "grad_norm": 0.7089610695838928, "learning_rate": 2.8063576803140643e-06, "loss": 1.6739, "step": 86242 }, { "epoch": 2.87, "grad_norm": 0.7267738580703735, "learning_rate": 2.804931556736334e-06, "loss": 1.6553, "step": 86243 }, { "epoch": 2.87, "grad_norm": 0.7069406509399414, "learning_rate": 2.8035057939095995e-06, "loss": 1.706, "step": 86244 }, { "epoch": 2.87, "grad_norm": 0.6799718141555786, "learning_rate": 2.8020803918356594e-06, "loss": 1.6735, "step": 86245 }, { "epoch": 2.87, "grad_norm": 0.7080395221710205, "learning_rate": 2.8006553505161787e-06, "loss": 1.7195, "step": 86246 }, { "epoch": 2.87, "grad_norm": 0.6733883619308472, "learning_rate": 2.799230669952923e-06, "loss": 1.675, "step": 86247 }, { "epoch": 2.87, "grad_norm": 0.7027841210365295, "learning_rate": 2.7978063501475913e-06, "loss": 1.6495, "step": 86248 }, { "epoch": 2.87, "grad_norm": 0.710845410823822, "learning_rate": 2.796382391101948e-06, "loss": 1.7164, "step": 86249 }, { "epoch": 2.87, "grad_norm": 0.7244032621383667, "learning_rate": 2.794958792817692e-06, "loss": 1.6247, "step": 86250 }, { "epoch": 2.87, "grad_norm": 0.7017459273338318, "learning_rate": 2.793535555296589e-06, "loss": 1.6529, "step": 86251 }, { "epoch": 2.87, "grad_norm": 0.7073562741279602, "learning_rate": 2.792112678540337e-06, "loss": 1.7159, "step": 86252 }, { "epoch": 2.87, "grad_norm": 0.7261552810668945, "learning_rate": 2.790690162550635e-06, "loss": 1.6886, "step": 86253 }, { "epoch": 2.87, "grad_norm": 0.7296094298362732, "learning_rate": 2.7892680073292817e-06, "loss": 1.6211, "step": 86254 }, { "epoch": 2.87, "grad_norm": 0.7179263234138489, "learning_rate": 2.787846212877942e-06, "loss": 1.6752, "step": 86255 }, { "epoch": 2.87, "grad_norm": 0.7251678109169006, "learning_rate": 2.786424779198382e-06, "loss": 1.6986, "step": 86256 }, { "epoch": 2.87, "grad_norm": 0.7096547484397888, "learning_rate": 2.7850037062922993e-06, "loss": 1.696, "step": 86257 }, { "epoch": 2.87, "grad_norm": 0.7030725479125977, "learning_rate": 2.783582994161426e-06, "loss": 1.7531, "step": 86258 }, { "epoch": 2.87, "grad_norm": 0.6980786323547363, "learning_rate": 2.782162642807528e-06, "loss": 1.6769, "step": 86259 }, { "epoch": 2.87, "grad_norm": 0.7261795997619629, "learning_rate": 2.7807426522322374e-06, "loss": 1.6499, "step": 86260 }, { "epoch": 2.87, "grad_norm": 0.6907135844230652, "learning_rate": 2.779323022437352e-06, "loss": 1.6484, "step": 86261 }, { "epoch": 2.87, "grad_norm": 0.7385106086730957, "learning_rate": 2.7779037534245707e-06, "loss": 1.6573, "step": 86262 }, { "epoch": 2.87, "grad_norm": 0.718308687210083, "learning_rate": 2.776484845195659e-06, "loss": 1.6719, "step": 86263 }, { "epoch": 2.87, "grad_norm": 0.7230985164642334, "learning_rate": 2.7750662977522486e-06, "loss": 1.605, "step": 86264 }, { "epoch": 2.87, "grad_norm": 0.7164308428764343, "learning_rate": 2.7736481110961384e-06, "loss": 1.6529, "step": 86265 }, { "epoch": 2.87, "grad_norm": 0.6934950351715088, "learning_rate": 2.7722302852290267e-06, "loss": 1.5787, "step": 86266 }, { "epoch": 2.87, "grad_norm": 0.702354371547699, "learning_rate": 2.7708128201526126e-06, "loss": 1.6633, "step": 86267 }, { "epoch": 2.87, "grad_norm": 0.7046837210655212, "learning_rate": 2.769395715868661e-06, "loss": 1.6155, "step": 86268 }, { "epoch": 2.87, "grad_norm": 0.7071489095687866, "learning_rate": 2.76797897237887e-06, "loss": 1.6111, "step": 86269 }, { "epoch": 2.87, "grad_norm": 0.7191953659057617, "learning_rate": 2.7665625896849397e-06, "loss": 1.7223, "step": 86270 }, { "epoch": 2.87, "grad_norm": 0.7116073966026306, "learning_rate": 2.765146567788601e-06, "loss": 1.6704, "step": 86271 }, { "epoch": 2.87, "grad_norm": 0.7105345129966736, "learning_rate": 2.763730906691619e-06, "loss": 1.6429, "step": 86272 }, { "epoch": 2.87, "grad_norm": 0.7322598099708557, "learning_rate": 2.7623156063956595e-06, "loss": 1.6742, "step": 86273 }, { "epoch": 2.87, "grad_norm": 0.6958869695663452, "learning_rate": 2.7609006669024215e-06, "loss": 1.6312, "step": 86274 }, { "epoch": 2.87, "grad_norm": 0.7172794938087463, "learning_rate": 2.7594860882136695e-06, "loss": 1.6407, "step": 86275 }, { "epoch": 2.87, "grad_norm": 0.7032223343849182, "learning_rate": 2.7580718703311024e-06, "loss": 1.7287, "step": 86276 }, { "epoch": 2.87, "grad_norm": 0.7189263701438904, "learning_rate": 2.7566580132564518e-06, "loss": 1.656, "step": 86277 }, { "epoch": 2.87, "grad_norm": 0.7204654216766357, "learning_rate": 2.755244516991384e-06, "loss": 1.6643, "step": 86278 }, { "epoch": 2.87, "grad_norm": 0.708685576915741, "learning_rate": 2.7538313815376965e-06, "loss": 1.7229, "step": 86279 }, { "epoch": 2.87, "grad_norm": 0.7131606936454773, "learning_rate": 2.7524186068970554e-06, "loss": 1.6684, "step": 86280 }, { "epoch": 2.87, "grad_norm": 0.7286781072616577, "learning_rate": 2.751006193071159e-06, "loss": 1.6816, "step": 86281 }, { "epoch": 2.87, "grad_norm": 0.6933572292327881, "learning_rate": 2.7495941400617727e-06, "loss": 1.6473, "step": 86282 }, { "epoch": 2.87, "grad_norm": 0.7178099751472473, "learning_rate": 2.7481824478705616e-06, "loss": 1.6546, "step": 86283 }, { "epoch": 2.87, "grad_norm": 0.7206009030342102, "learning_rate": 2.7467711164992577e-06, "loss": 1.6901, "step": 86284 }, { "epoch": 2.87, "grad_norm": 0.7276264429092407, "learning_rate": 2.74536014594956e-06, "loss": 1.6891, "step": 86285 }, { "epoch": 2.87, "grad_norm": 0.70633864402771, "learning_rate": 2.7439495362232332e-06, "loss": 1.66, "step": 86286 }, { "epoch": 2.87, "grad_norm": 0.7233249545097351, "learning_rate": 2.742539287321943e-06, "loss": 1.6965, "step": 86287 }, { "epoch": 2.87, "grad_norm": 0.7111359238624573, "learning_rate": 2.741129399247388e-06, "loss": 1.6441, "step": 86288 }, { "epoch": 2.87, "grad_norm": 0.700626790523529, "learning_rate": 2.7397198720013335e-06, "loss": 1.6808, "step": 86289 }, { "epoch": 2.87, "grad_norm": 0.721724808216095, "learning_rate": 2.738310705585445e-06, "loss": 1.6979, "step": 86290 }, { "epoch": 2.87, "grad_norm": 0.7069079875946045, "learning_rate": 2.7369019000014536e-06, "loss": 1.6848, "step": 86291 }, { "epoch": 2.87, "grad_norm": 0.7384541034698486, "learning_rate": 2.735493455251059e-06, "loss": 1.7145, "step": 86292 }, { "epoch": 2.87, "grad_norm": 0.7053834199905396, "learning_rate": 2.734085371335959e-06, "loss": 1.6649, "step": 86293 }, { "epoch": 2.87, "grad_norm": 0.706807017326355, "learning_rate": 2.7326776482579193e-06, "loss": 1.6214, "step": 86294 }, { "epoch": 2.87, "grad_norm": 0.7212318181991577, "learning_rate": 2.731270286018572e-06, "loss": 1.712, "step": 86295 }, { "epoch": 2.87, "grad_norm": 0.7395330667495728, "learning_rate": 2.7298632846196822e-06, "loss": 1.6415, "step": 86296 }, { "epoch": 2.87, "grad_norm": 0.7225078344345093, "learning_rate": 2.728456644062915e-06, "loss": 1.6685, "step": 86297 }, { "epoch": 2.87, "grad_norm": 0.7383258938789368, "learning_rate": 2.7270503643500364e-06, "loss": 1.754, "step": 86298 }, { "epoch": 2.87, "grad_norm": 0.7463290095329285, "learning_rate": 2.725644445482711e-06, "loss": 1.6027, "step": 86299 }, { "epoch": 2.87, "grad_norm": 0.7388125658035278, "learning_rate": 2.7242388874626043e-06, "loss": 1.6678, "step": 86300 }, { "epoch": 2.87, "grad_norm": 0.7164449095726013, "learning_rate": 2.722833690291515e-06, "loss": 1.6794, "step": 86301 }, { "epoch": 2.87, "grad_norm": 0.7362709641456604, "learning_rate": 2.721428853971075e-06, "loss": 1.7009, "step": 86302 }, { "epoch": 2.87, "grad_norm": 0.7092019319534302, "learning_rate": 2.7200243785030494e-06, "loss": 1.6558, "step": 86303 }, { "epoch": 2.87, "grad_norm": 0.7053685784339905, "learning_rate": 2.7186202638891043e-06, "loss": 1.6352, "step": 86304 }, { "epoch": 2.87, "grad_norm": 0.7220644354820251, "learning_rate": 2.7172165101309373e-06, "loss": 1.6066, "step": 86305 }, { "epoch": 2.87, "grad_norm": 0.7326117753982544, "learning_rate": 2.715813117230281e-06, "loss": 1.6832, "step": 86306 }, { "epoch": 2.87, "grad_norm": 0.6977185010910034, "learning_rate": 2.714410085188834e-06, "loss": 1.6499, "step": 86307 }, { "epoch": 2.87, "grad_norm": 0.7222151160240173, "learning_rate": 2.7130074140082612e-06, "loss": 1.6264, "step": 86308 }, { "epoch": 2.87, "grad_norm": 0.7081060409545898, "learning_rate": 2.7116051036903285e-06, "loss": 1.6458, "step": 86309 }, { "epoch": 2.87, "grad_norm": 0.728312611579895, "learning_rate": 2.7102031542366675e-06, "loss": 1.625, "step": 86310 }, { "epoch": 2.87, "grad_norm": 0.6946282982826233, "learning_rate": 2.7088015656490105e-06, "loss": 1.6662, "step": 86311 }, { "epoch": 2.87, "grad_norm": 0.7085241675376892, "learning_rate": 2.707400337929122e-06, "loss": 1.6957, "step": 86312 }, { "epoch": 2.87, "grad_norm": 0.7211646437644958, "learning_rate": 2.7059994710786016e-06, "loss": 1.65, "step": 86313 }, { "epoch": 2.87, "grad_norm": 0.7254989147186279, "learning_rate": 2.7045989650991806e-06, "loss": 1.5784, "step": 86314 }, { "epoch": 2.87, "grad_norm": 0.7066301107406616, "learning_rate": 2.7031988199926246e-06, "loss": 1.7115, "step": 86315 }, { "epoch": 2.87, "grad_norm": 0.6840009689331055, "learning_rate": 2.7017990357605324e-06, "loss": 1.6054, "step": 86316 }, { "epoch": 2.87, "grad_norm": 0.7094839811325073, "learning_rate": 2.700399612404669e-06, "loss": 1.6072, "step": 86317 }, { "epoch": 2.87, "grad_norm": 0.7181428670883179, "learning_rate": 2.6990005499267e-06, "loss": 1.7148, "step": 86318 }, { "epoch": 2.87, "grad_norm": 0.6978926062583923, "learning_rate": 2.69760184832839e-06, "loss": 1.7147, "step": 86319 }, { "epoch": 2.87, "grad_norm": 0.704943060874939, "learning_rate": 2.6962035076113387e-06, "loss": 1.6181, "step": 86320 }, { "epoch": 2.87, "grad_norm": 0.7184610366821289, "learning_rate": 2.6948055277772775e-06, "loss": 1.6961, "step": 86321 }, { "epoch": 2.87, "grad_norm": 0.7119206786155701, "learning_rate": 2.6934079088279715e-06, "loss": 1.6729, "step": 86322 }, { "epoch": 2.87, "grad_norm": 0.7388748526573181, "learning_rate": 2.6920106507650195e-06, "loss": 1.645, "step": 86323 }, { "epoch": 2.87, "grad_norm": 0.715158224105835, "learning_rate": 2.690613753590187e-06, "loss": 1.6652, "step": 86324 }, { "epoch": 2.87, "grad_norm": 0.7113208174705505, "learning_rate": 2.6892172173051065e-06, "loss": 1.6522, "step": 86325 }, { "epoch": 2.87, "grad_norm": 0.7059022188186646, "learning_rate": 2.687821041911542e-06, "loss": 1.695, "step": 86326 }, { "epoch": 2.87, "grad_norm": 0.7045459151268005, "learning_rate": 2.68642522741116e-06, "loss": 1.6913, "step": 86327 }, { "epoch": 2.87, "grad_norm": 0.7212966084480286, "learning_rate": 2.685029773805625e-06, "loss": 1.6564, "step": 86328 }, { "epoch": 2.87, "grad_norm": 0.6844040155410767, "learning_rate": 2.6836346810966693e-06, "loss": 1.6784, "step": 86329 }, { "epoch": 2.87, "grad_norm": 0.7086369395256042, "learning_rate": 2.6822399492859912e-06, "loss": 1.6672, "step": 86330 }, { "epoch": 2.87, "grad_norm": 0.7139421105384827, "learning_rate": 2.6808455783752235e-06, "loss": 1.6763, "step": 86331 }, { "epoch": 2.87, "grad_norm": 0.7135997414588928, "learning_rate": 2.6794515683661313e-06, "loss": 1.665, "step": 86332 }, { "epoch": 2.87, "grad_norm": 0.7104310393333435, "learning_rate": 2.678057919260379e-06, "loss": 1.7067, "step": 86333 }, { "epoch": 2.87, "grad_norm": 0.6996405124664307, "learning_rate": 2.676664631059633e-06, "loss": 1.6644, "step": 86334 }, { "epoch": 2.87, "grad_norm": 0.7331873178482056, "learning_rate": 2.6752717037656246e-06, "loss": 1.6946, "step": 86335 }, { "epoch": 2.87, "grad_norm": 0.7432098388671875, "learning_rate": 2.6738791373800196e-06, "loss": 1.6641, "step": 86336 }, { "epoch": 2.87, "grad_norm": 0.719414472579956, "learning_rate": 2.6724869319045494e-06, "loss": 1.6447, "step": 86337 }, { "epoch": 2.87, "grad_norm": 0.6933688521385193, "learning_rate": 2.6710950873408132e-06, "loss": 1.6436, "step": 86338 }, { "epoch": 2.87, "grad_norm": 0.7473446130752563, "learning_rate": 2.6697036036906093e-06, "loss": 1.7786, "step": 86339 }, { "epoch": 2.87, "grad_norm": 0.7064957022666931, "learning_rate": 2.66831248095557e-06, "loss": 1.6225, "step": 86340 }, { "epoch": 2.87, "grad_norm": 0.713364839553833, "learning_rate": 2.6669217191373603e-06, "loss": 1.608, "step": 86341 }, { "epoch": 2.87, "grad_norm": 0.7018292546272278, "learning_rate": 2.665531318237679e-06, "loss": 1.6924, "step": 86342 }, { "epoch": 2.87, "grad_norm": 0.7201235294342041, "learning_rate": 2.6641412782582917e-06, "loss": 1.6514, "step": 86343 }, { "epoch": 2.87, "grad_norm": 0.7455183267593384, "learning_rate": 2.6627515992007964e-06, "loss": 1.7206, "step": 86344 }, { "epoch": 2.87, "grad_norm": 0.7033712267875671, "learning_rate": 2.6613622810668922e-06, "loss": 1.6251, "step": 86345 }, { "epoch": 2.87, "grad_norm": 0.7143012881278992, "learning_rate": 2.659973323858344e-06, "loss": 1.6895, "step": 86346 }, { "epoch": 2.87, "grad_norm": 0.7463070154190063, "learning_rate": 2.658584727576718e-06, "loss": 1.6993, "step": 86347 }, { "epoch": 2.87, "grad_norm": 0.7341316938400269, "learning_rate": 2.6571964922237786e-06, "loss": 1.6906, "step": 86348 }, { "epoch": 2.87, "grad_norm": 0.7129380702972412, "learning_rate": 2.655808617801192e-06, "loss": 1.6613, "step": 86349 }, { "epoch": 2.87, "grad_norm": 0.714942991733551, "learning_rate": 2.6544211043106555e-06, "loss": 1.6789, "step": 86350 }, { "epoch": 2.87, "grad_norm": 0.7107158899307251, "learning_rate": 2.653033951753836e-06, "loss": 1.631, "step": 86351 }, { "epoch": 2.87, "grad_norm": 0.7334840297698975, "learning_rate": 2.6516471601323973e-06, "loss": 1.6709, "step": 86352 }, { "epoch": 2.87, "grad_norm": 0.7248815298080444, "learning_rate": 2.6502607294480394e-06, "loss": 1.6195, "step": 86353 }, { "epoch": 2.87, "grad_norm": 0.7150375247001648, "learning_rate": 2.6488746597024934e-06, "loss": 1.7123, "step": 86354 }, { "epoch": 2.87, "grad_norm": 0.7598444819450378, "learning_rate": 2.6474889508973916e-06, "loss": 1.6335, "step": 86355 }, { "epoch": 2.87, "grad_norm": 0.7149099707603455, "learning_rate": 2.6461036030343997e-06, "loss": 1.629, "step": 86356 }, { "epoch": 2.87, "grad_norm": 0.7432836294174194, "learning_rate": 2.644718616115249e-06, "loss": 1.6483, "step": 86357 }, { "epoch": 2.87, "grad_norm": 0.7376551032066345, "learning_rate": 2.643333990141572e-06, "loss": 1.7172, "step": 86358 }, { "epoch": 2.87, "grad_norm": 0.7196870446205139, "learning_rate": 2.6419497251151003e-06, "loss": 1.627, "step": 86359 }, { "epoch": 2.87, "grad_norm": 0.7008451223373413, "learning_rate": 2.6405658210374657e-06, "loss": 1.6439, "step": 86360 }, { "epoch": 2.87, "grad_norm": 0.7289102077484131, "learning_rate": 2.6391822779104012e-06, "loss": 1.6774, "step": 86361 }, { "epoch": 2.87, "grad_norm": 0.7040697336196899, "learning_rate": 2.6377990957355378e-06, "loss": 1.6566, "step": 86362 }, { "epoch": 2.87, "grad_norm": 0.7140939831733704, "learning_rate": 2.6364162745145745e-06, "loss": 1.6707, "step": 86363 }, { "epoch": 2.87, "grad_norm": 0.7111567854881287, "learning_rate": 2.6350338142491766e-06, "loss": 1.6256, "step": 86364 }, { "epoch": 2.87, "grad_norm": 0.7135821580886841, "learning_rate": 2.633651714941043e-06, "loss": 1.5492, "step": 86365 }, { "epoch": 2.87, "grad_norm": 0.7368764281272888, "learning_rate": 2.6322699765918386e-06, "loss": 1.6777, "step": 86366 }, { "epoch": 2.87, "grad_norm": 0.7211518287658691, "learning_rate": 2.630888599203229e-06, "loss": 1.7295, "step": 86367 }, { "epoch": 2.87, "grad_norm": 0.7261766791343689, "learning_rate": 2.629507582776913e-06, "loss": 1.6933, "step": 86368 }, { "epoch": 2.87, "grad_norm": 0.710227370262146, "learning_rate": 2.6281269273145887e-06, "loss": 1.7623, "step": 86369 }, { "epoch": 2.87, "grad_norm": 0.7220383286476135, "learning_rate": 2.6267466328178886e-06, "loss": 1.6733, "step": 86370 }, { "epoch": 2.87, "grad_norm": 0.7440745234489441, "learning_rate": 2.625366699288478e-06, "loss": 1.732, "step": 86371 }, { "epoch": 2.87, "grad_norm": 0.7010273933410645, "learning_rate": 2.6239871267280886e-06, "loss": 1.6267, "step": 86372 }, { "epoch": 2.87, "grad_norm": 0.7151015400886536, "learning_rate": 2.6226079151383528e-06, "loss": 1.6988, "step": 86373 }, { "epoch": 2.87, "grad_norm": 0.6827728748321533, "learning_rate": 2.6212290645209354e-06, "loss": 1.6448, "step": 86374 }, { "epoch": 2.87, "grad_norm": 0.7201138138771057, "learning_rate": 2.6198505748775356e-06, "loss": 1.715, "step": 86375 }, { "epoch": 2.87, "grad_norm": 0.6938297152519226, "learning_rate": 2.6184724462098518e-06, "loss": 1.6595, "step": 86376 }, { "epoch": 2.87, "grad_norm": 0.7274729609489441, "learning_rate": 2.617094678519482e-06, "loss": 1.587, "step": 86377 }, { "epoch": 2.87, "grad_norm": 0.7174072861671448, "learning_rate": 2.6157172718081265e-06, "loss": 1.7886, "step": 86378 }, { "epoch": 2.87, "grad_norm": 0.7326534986495972, "learning_rate": 2.614340226077516e-06, "loss": 1.723, "step": 86379 }, { "epoch": 2.87, "grad_norm": 0.7524913549423218, "learning_rate": 2.6129635413292828e-06, "loss": 1.7613, "step": 86380 }, { "epoch": 2.87, "grad_norm": 0.698396623134613, "learning_rate": 2.611587217565059e-06, "loss": 1.6399, "step": 86381 }, { "epoch": 2.87, "grad_norm": 0.7302539348602295, "learning_rate": 2.610211254786576e-06, "loss": 1.5552, "step": 86382 }, { "epoch": 2.87, "grad_norm": 0.7019535303115845, "learning_rate": 2.6088356529955e-06, "loss": 1.6213, "step": 86383 }, { "epoch": 2.87, "grad_norm": 0.7120167016983032, "learning_rate": 2.6074604121934297e-06, "loss": 1.6331, "step": 86384 }, { "epoch": 2.87, "grad_norm": 0.7310933470726013, "learning_rate": 2.606085532382096e-06, "loss": 1.6565, "step": 86385 }, { "epoch": 2.87, "grad_norm": 0.7333632707595825, "learning_rate": 2.604711013563199e-06, "loss": 1.642, "step": 86386 }, { "epoch": 2.87, "grad_norm": 0.7099767923355103, "learning_rate": 2.603336855738336e-06, "loss": 1.66, "step": 86387 }, { "epoch": 2.87, "grad_norm": 0.7359635829925537, "learning_rate": 2.6019630589091733e-06, "loss": 1.6728, "step": 86388 }, { "epoch": 2.87, "grad_norm": 0.7034708857536316, "learning_rate": 2.6005896230774424e-06, "loss": 1.7203, "step": 86389 }, { "epoch": 2.87, "grad_norm": 0.719007134437561, "learning_rate": 2.599216548244776e-06, "loss": 1.6503, "step": 86390 }, { "epoch": 2.87, "grad_norm": 0.7217336297035217, "learning_rate": 2.597843834412805e-06, "loss": 1.6546, "step": 86391 }, { "epoch": 2.87, "grad_norm": 0.6999595761299133, "learning_rate": 2.596471481583262e-06, "loss": 1.6735, "step": 86392 }, { "epoch": 2.87, "grad_norm": 0.7294695377349854, "learning_rate": 2.595099489757779e-06, "loss": 1.656, "step": 86393 }, { "epoch": 2.87, "grad_norm": 0.6956878900527954, "learning_rate": 2.5937278589380215e-06, "loss": 1.6919, "step": 86394 }, { "epoch": 2.87, "grad_norm": 0.7163317203521729, "learning_rate": 2.5923565891256547e-06, "loss": 1.7211, "step": 86395 }, { "epoch": 2.87, "grad_norm": 0.7195461392402649, "learning_rate": 2.590985680322344e-06, "loss": 1.6543, "step": 86396 }, { "epoch": 2.87, "grad_norm": 0.7025277018547058, "learning_rate": 2.5896151325297878e-06, "loss": 1.6781, "step": 86397 }, { "epoch": 2.87, "grad_norm": 0.7090453505516052, "learning_rate": 2.5882449457495847e-06, "loss": 1.6528, "step": 86398 }, { "epoch": 2.87, "grad_norm": 0.7013808488845825, "learning_rate": 2.586875119983434e-06, "loss": 1.6141, "step": 86399 }, { "epoch": 2.87, "grad_norm": 0.7227686047554016, "learning_rate": 2.585505655233e-06, "loss": 1.6905, "step": 86400 }, { "epoch": 2.87, "grad_norm": 0.6987131834030151, "learning_rate": 2.584136551499949e-06, "loss": 1.713, "step": 86401 }, { "epoch": 2.87, "grad_norm": 0.7218641638755798, "learning_rate": 2.5827678087859125e-06, "loss": 1.6813, "step": 86402 }, { "epoch": 2.87, "grad_norm": 0.6905079483985901, "learning_rate": 2.5813994270925897e-06, "loss": 1.6775, "step": 86403 }, { "epoch": 2.87, "grad_norm": 0.7075169086456299, "learning_rate": 2.580031406421612e-06, "loss": 1.6821, "step": 86404 }, { "epoch": 2.87, "grad_norm": 0.7072692513465881, "learning_rate": 2.578663746774645e-06, "loss": 1.6463, "step": 86405 }, { "epoch": 2.87, "grad_norm": 0.7130236625671387, "learning_rate": 2.5772964481533876e-06, "loss": 1.6781, "step": 86406 }, { "epoch": 2.87, "grad_norm": 0.7123041749000549, "learning_rate": 2.575929510559438e-06, "loss": 1.5861, "step": 86407 }, { "epoch": 2.87, "grad_norm": 0.7199356555938721, "learning_rate": 2.5745629339945285e-06, "loss": 1.6581, "step": 86408 }, { "epoch": 2.87, "grad_norm": 0.7008245587348938, "learning_rate": 2.5731967184602243e-06, "loss": 1.6645, "step": 86409 }, { "epoch": 2.87, "grad_norm": 0.7090876698493958, "learning_rate": 2.5718308639582906e-06, "loss": 1.7295, "step": 86410 }, { "epoch": 2.87, "grad_norm": 0.7138057351112366, "learning_rate": 2.5704653704902933e-06, "loss": 1.6656, "step": 86411 }, { "epoch": 2.87, "grad_norm": 0.719203770160675, "learning_rate": 2.5691002380579306e-06, "loss": 1.708, "step": 86412 }, { "epoch": 2.87, "grad_norm": 0.7032755017280579, "learning_rate": 2.567735466662868e-06, "loss": 1.6401, "step": 86413 }, { "epoch": 2.88, "grad_norm": 0.699733316898346, "learning_rate": 2.5663710563067375e-06, "loss": 1.7013, "step": 86414 }, { "epoch": 2.88, "grad_norm": 0.7224995493888855, "learning_rate": 2.565007006991204e-06, "loss": 1.7356, "step": 86415 }, { "epoch": 2.88, "grad_norm": 0.7321990132331848, "learning_rate": 2.5636433187179673e-06, "loss": 1.7074, "step": 86416 }, { "epoch": 2.88, "grad_norm": 0.6988779306411743, "learning_rate": 2.5622799914885914e-06, "loss": 1.709, "step": 86417 }, { "epoch": 2.88, "grad_norm": 0.7264604568481445, "learning_rate": 2.5609170253048096e-06, "loss": 1.7107, "step": 86418 }, { "epoch": 2.88, "grad_norm": 0.6999284029006958, "learning_rate": 2.5595544201682193e-06, "loss": 1.6495, "step": 86419 }, { "epoch": 2.88, "grad_norm": 0.710770308971405, "learning_rate": 2.5581921760805536e-06, "loss": 1.6207, "step": 86420 }, { "epoch": 2.88, "grad_norm": 0.7075141668319702, "learning_rate": 2.556830293043377e-06, "loss": 1.6601, "step": 86421 }, { "epoch": 2.88, "grad_norm": 0.7484373450279236, "learning_rate": 2.5554687710583888e-06, "loss": 1.6764, "step": 86422 }, { "epoch": 2.88, "grad_norm": 0.6948704719543457, "learning_rate": 2.554107610127254e-06, "loss": 1.6575, "step": 86423 }, { "epoch": 2.88, "grad_norm": 0.7295592427253723, "learning_rate": 2.5527468102515712e-06, "loss": 1.6155, "step": 86424 }, { "epoch": 2.88, "grad_norm": 0.7198755145072937, "learning_rate": 2.5513863714330727e-06, "loss": 1.5965, "step": 86425 }, { "epoch": 2.88, "grad_norm": 0.7249225378036499, "learning_rate": 2.550026293673324e-06, "loss": 1.7417, "step": 86426 }, { "epoch": 2.88, "grad_norm": 0.726514995098114, "learning_rate": 2.5486665769740232e-06, "loss": 1.6411, "step": 86427 }, { "epoch": 2.88, "grad_norm": 0.7044986486434937, "learning_rate": 2.547307221336836e-06, "loss": 1.7336, "step": 86428 }, { "epoch": 2.88, "grad_norm": 0.709126889705658, "learning_rate": 2.545948226763361e-06, "loss": 1.6692, "step": 86429 }, { "epoch": 2.88, "grad_norm": 0.7019938230514526, "learning_rate": 2.5445895932553305e-06, "loss": 1.6576, "step": 86430 }, { "epoch": 2.88, "grad_norm": 0.7312509417533875, "learning_rate": 2.5432313208142762e-06, "loss": 1.6929, "step": 86431 }, { "epoch": 2.88, "grad_norm": 0.7215519547462463, "learning_rate": 2.5418734094419303e-06, "loss": 1.7503, "step": 86432 }, { "epoch": 2.88, "grad_norm": 0.7215045690536499, "learning_rate": 2.540515859139958e-06, "loss": 1.6881, "step": 86433 }, { "epoch": 2.88, "grad_norm": 0.7090092301368713, "learning_rate": 2.5391586699099244e-06, "loss": 1.6899, "step": 86434 }, { "epoch": 2.88, "grad_norm": 0.7200117707252502, "learning_rate": 2.5378018417535284e-06, "loss": 1.7131, "step": 86435 }, { "epoch": 2.88, "grad_norm": 0.7237758636474609, "learning_rate": 2.536445374672469e-06, "loss": 1.6235, "step": 86436 }, { "epoch": 2.88, "grad_norm": 0.7196474671363831, "learning_rate": 2.5350892686682778e-06, "loss": 1.6867, "step": 86437 }, { "epoch": 2.88, "grad_norm": 0.733917772769928, "learning_rate": 2.5337335237426536e-06, "loss": 1.679, "step": 86438 }, { "epoch": 2.88, "grad_norm": 0.696635365486145, "learning_rate": 2.532378139897262e-06, "loss": 1.7699, "step": 86439 }, { "epoch": 2.88, "grad_norm": 0.7195039987564087, "learning_rate": 2.5310231171337345e-06, "loss": 1.6619, "step": 86440 }, { "epoch": 2.88, "grad_norm": 0.7289290428161621, "learning_rate": 2.529668455453737e-06, "loss": 1.7532, "step": 86441 }, { "epoch": 2.88, "grad_norm": 0.7308027744293213, "learning_rate": 2.528314154858835e-06, "loss": 1.6821, "step": 86442 }, { "epoch": 2.88, "grad_norm": 0.7163458466529846, "learning_rate": 2.5269602153507927e-06, "loss": 1.7366, "step": 86443 }, { "epoch": 2.88, "grad_norm": 0.7224231362342834, "learning_rate": 2.5256066369311767e-06, "loss": 1.6837, "step": 86444 }, { "epoch": 2.88, "grad_norm": 0.7145205736160278, "learning_rate": 2.5242534196016185e-06, "loss": 1.71, "step": 86445 }, { "epoch": 2.88, "grad_norm": 0.7089710831642151, "learning_rate": 2.5229005633637833e-06, "loss": 1.6741, "step": 86446 }, { "epoch": 2.88, "grad_norm": 0.7128982543945312, "learning_rate": 2.521548068219337e-06, "loss": 1.743, "step": 86447 }, { "epoch": 2.88, "grad_norm": 0.709201991558075, "learning_rate": 2.520195934169911e-06, "loss": 1.6845, "step": 86448 }, { "epoch": 2.88, "grad_norm": 0.7148386240005493, "learning_rate": 2.5188441612171038e-06, "loss": 1.641, "step": 86449 }, { "epoch": 2.88, "grad_norm": 0.7104814052581787, "learning_rate": 2.517492749362615e-06, "loss": 1.6962, "step": 86450 }, { "epoch": 2.88, "grad_norm": 0.7048357129096985, "learning_rate": 2.516141698608043e-06, "loss": 1.6959, "step": 86451 }, { "epoch": 2.88, "grad_norm": 0.7583226561546326, "learning_rate": 2.5147910089550195e-06, "loss": 1.69, "step": 86452 }, { "epoch": 2.88, "grad_norm": 0.7005681991577148, "learning_rate": 2.513440680405243e-06, "loss": 1.6183, "step": 86453 }, { "epoch": 2.88, "grad_norm": 0.7058177590370178, "learning_rate": 2.512090712960313e-06, "loss": 1.6393, "step": 86454 }, { "epoch": 2.88, "grad_norm": 0.7131307125091553, "learning_rate": 2.510741106621861e-06, "loss": 1.6949, "step": 86455 }, { "epoch": 2.88, "grad_norm": 0.699763298034668, "learning_rate": 2.509391861391552e-06, "loss": 1.694, "step": 86456 }, { "epoch": 2.88, "grad_norm": 0.6900521516799927, "learning_rate": 2.5080429772710187e-06, "loss": 1.6578, "step": 86457 }, { "epoch": 2.88, "grad_norm": 0.7287224531173706, "learning_rate": 2.506694454261826e-06, "loss": 1.6744, "step": 86458 }, { "epoch": 2.88, "grad_norm": 0.7322261929512024, "learning_rate": 2.5053462923657062e-06, "loss": 1.6316, "step": 86459 }, { "epoch": 2.88, "grad_norm": 0.7282660603523254, "learning_rate": 2.5039984915842916e-06, "loss": 1.7105, "step": 86460 }, { "epoch": 2.88, "grad_norm": 0.7112753391265869, "learning_rate": 2.50265105191918e-06, "loss": 1.6428, "step": 86461 }, { "epoch": 2.88, "grad_norm": 0.7296653985977173, "learning_rate": 2.5013039733719707e-06, "loss": 1.616, "step": 86462 }, { "epoch": 2.88, "grad_norm": 0.6954441070556641, "learning_rate": 2.499957255944396e-06, "loss": 1.7267, "step": 86463 }, { "epoch": 2.88, "grad_norm": 0.7030386924743652, "learning_rate": 2.4986108996380207e-06, "loss": 1.6443, "step": 86464 }, { "epoch": 2.88, "grad_norm": 0.7089195251464844, "learning_rate": 2.4972649044544768e-06, "loss": 1.6718, "step": 86465 }, { "epoch": 2.88, "grad_norm": 0.7123879194259644, "learning_rate": 2.4959192703954635e-06, "loss": 1.718, "step": 86466 }, { "epoch": 2.88, "grad_norm": 0.6944735050201416, "learning_rate": 2.4945739974625124e-06, "loss": 1.695, "step": 86467 }, { "epoch": 2.88, "grad_norm": 0.7022601366043091, "learning_rate": 2.4932290856573557e-06, "loss": 1.6086, "step": 86468 }, { "epoch": 2.88, "grad_norm": 0.7135390639305115, "learning_rate": 2.4918845349815585e-06, "loss": 1.7181, "step": 86469 }, { "epoch": 2.88, "grad_norm": 0.7057861685752869, "learning_rate": 2.4905403454367868e-06, "loss": 1.6153, "step": 86470 }, { "epoch": 2.88, "grad_norm": 0.7058477997779846, "learning_rate": 2.4891965170246717e-06, "loss": 1.6289, "step": 86471 }, { "epoch": 2.88, "grad_norm": 0.8070417642593384, "learning_rate": 2.4878530497468465e-06, "loss": 1.664, "step": 86472 }, { "epoch": 2.88, "grad_norm": 0.7193227410316467, "learning_rate": 2.4865099436049086e-06, "loss": 1.6499, "step": 86473 }, { "epoch": 2.88, "grad_norm": 0.7113029360771179, "learning_rate": 2.4851671986004906e-06, "loss": 1.7107, "step": 86474 }, { "epoch": 2.88, "grad_norm": 0.72870934009552, "learning_rate": 2.4838248147352914e-06, "loss": 1.7071, "step": 86475 }, { "epoch": 2.88, "grad_norm": 0.6803756356239319, "learning_rate": 2.4824827920108757e-06, "loss": 1.6687, "step": 86476 }, { "epoch": 2.88, "grad_norm": 0.6819782853126526, "learning_rate": 2.4811411304288763e-06, "loss": 1.6727, "step": 86477 }, { "epoch": 2.88, "grad_norm": 0.6988289952278137, "learning_rate": 2.479799829990925e-06, "loss": 1.6409, "step": 86478 }, { "epoch": 2.88, "grad_norm": 0.7206593155860901, "learning_rate": 2.478458890698687e-06, "loss": 1.6172, "step": 86479 }, { "epoch": 2.88, "grad_norm": 0.695311963558197, "learning_rate": 2.477118312553761e-06, "loss": 1.7613, "step": 86480 }, { "epoch": 2.88, "grad_norm": 0.7205370664596558, "learning_rate": 2.4757780955577457e-06, "loss": 1.7118, "step": 86481 }, { "epoch": 2.88, "grad_norm": 0.7065708637237549, "learning_rate": 2.4744382397123398e-06, "loss": 1.6565, "step": 86482 }, { "epoch": 2.88, "grad_norm": 0.7110329866409302, "learning_rate": 2.4730987450191087e-06, "loss": 1.6684, "step": 86483 }, { "epoch": 2.88, "grad_norm": 0.7435050010681152, "learning_rate": 2.4717596114796844e-06, "loss": 1.6384, "step": 86484 }, { "epoch": 2.88, "grad_norm": 0.7083305716514587, "learning_rate": 2.470420839095699e-06, "loss": 1.6722, "step": 86485 }, { "epoch": 2.88, "grad_norm": 0.6982372403144836, "learning_rate": 2.4690824278688183e-06, "loss": 1.6619, "step": 86486 }, { "epoch": 2.88, "grad_norm": 0.7077123522758484, "learning_rate": 2.4677443778006067e-06, "loss": 1.6968, "step": 86487 }, { "epoch": 2.88, "grad_norm": 0.70501309633255, "learning_rate": 2.466406688892697e-06, "loss": 1.57, "step": 86488 }, { "epoch": 2.88, "grad_norm": 0.7222523093223572, "learning_rate": 2.4650693611467544e-06, "loss": 1.6394, "step": 86489 }, { "epoch": 2.88, "grad_norm": 0.7137986421585083, "learning_rate": 2.4637323945643774e-06, "loss": 1.6992, "step": 86490 }, { "epoch": 2.88, "grad_norm": 0.7005040645599365, "learning_rate": 2.462395789147198e-06, "loss": 1.7373, "step": 86491 }, { "epoch": 2.88, "grad_norm": 0.6981419920921326, "learning_rate": 2.461059544896815e-06, "loss": 1.7065, "step": 86492 }, { "epoch": 2.88, "grad_norm": 0.735406756401062, "learning_rate": 2.459723661814894e-06, "loss": 1.6368, "step": 86493 }, { "epoch": 2.88, "grad_norm": 0.7003939747810364, "learning_rate": 2.458388139903e-06, "loss": 1.6549, "step": 86494 }, { "epoch": 2.88, "grad_norm": 0.7126381397247314, "learning_rate": 2.457052979162766e-06, "loss": 1.6902, "step": 86495 }, { "epoch": 2.88, "grad_norm": 0.7008252739906311, "learning_rate": 2.455718179595856e-06, "loss": 1.6788, "step": 86496 }, { "epoch": 2.88, "grad_norm": 0.7092629671096802, "learning_rate": 2.4543837412038693e-06, "loss": 1.7007, "step": 86497 }, { "epoch": 2.88, "grad_norm": 0.7229631543159485, "learning_rate": 2.4530496639884043e-06, "loss": 1.6619, "step": 86498 }, { "epoch": 2.88, "grad_norm": 0.7180470824241638, "learning_rate": 2.4517159479510606e-06, "loss": 1.7, "step": 86499 }, { "epoch": 2.88, "grad_norm": 0.713695764541626, "learning_rate": 2.4503825930935693e-06, "loss": 1.6489, "step": 86500 }, { "epoch": 2.88, "grad_norm": 0.7174515128135681, "learning_rate": 2.44904959941743e-06, "loss": 1.6722, "step": 86501 }, { "epoch": 2.88, "grad_norm": 0.7152426838874817, "learning_rate": 2.4477169669242737e-06, "loss": 1.6724, "step": 86502 }, { "epoch": 2.88, "grad_norm": 0.7413308024406433, "learning_rate": 2.4463846956157994e-06, "loss": 1.7264, "step": 86503 }, { "epoch": 2.88, "grad_norm": 0.7120952010154724, "learning_rate": 2.4450527854935397e-06, "loss": 1.673, "step": 86504 }, { "epoch": 2.88, "grad_norm": 0.7172218561172485, "learning_rate": 2.4437212365591594e-06, "loss": 1.7007, "step": 86505 }, { "epoch": 2.88, "grad_norm": 0.7225146293640137, "learning_rate": 2.442390048814258e-06, "loss": 1.6914, "step": 86506 }, { "epoch": 2.88, "grad_norm": 0.7019079923629761, "learning_rate": 2.441059222260433e-06, "loss": 1.6631, "step": 86507 }, { "epoch": 2.88, "grad_norm": 0.7318419814109802, "learning_rate": 2.4397287568993506e-06, "loss": 1.6977, "step": 86508 }, { "epoch": 2.88, "grad_norm": 0.7119751572608948, "learning_rate": 2.438398652732543e-06, "loss": 1.6649, "step": 86509 }, { "epoch": 2.88, "grad_norm": 0.7351966500282288, "learning_rate": 2.437068909761708e-06, "loss": 1.6541, "step": 86510 }, { "epoch": 2.88, "grad_norm": 0.7075884342193604, "learning_rate": 2.4357395279884453e-06, "loss": 1.6752, "step": 86511 }, { "epoch": 2.88, "grad_norm": 0.7273311018943787, "learning_rate": 2.4344105074143194e-06, "loss": 1.6683, "step": 86512 }, { "epoch": 2.88, "grad_norm": 0.7205510139465332, "learning_rate": 2.4330818480409962e-06, "loss": 1.6342, "step": 86513 }, { "epoch": 2.88, "grad_norm": 0.7038922905921936, "learning_rate": 2.4317535498700745e-06, "loss": 1.6545, "step": 86514 }, { "epoch": 2.88, "grad_norm": 0.7275090217590332, "learning_rate": 2.4304256129031194e-06, "loss": 1.6693, "step": 86515 }, { "epoch": 2.88, "grad_norm": 0.7384580969810486, "learning_rate": 2.42909803714183e-06, "loss": 1.6968, "step": 86516 }, { "epoch": 2.88, "grad_norm": 0.7274699211120605, "learning_rate": 2.427770822587738e-06, "loss": 1.6325, "step": 86517 }, { "epoch": 2.88, "grad_norm": 0.7188307642936707, "learning_rate": 2.4264439692425084e-06, "loss": 1.6845, "step": 86518 }, { "epoch": 2.88, "grad_norm": 0.7340680360794067, "learning_rate": 2.4251174771077075e-06, "loss": 1.7423, "step": 86519 }, { "epoch": 2.88, "grad_norm": 0.7333883047103882, "learning_rate": 2.423791346185e-06, "loss": 1.6962, "step": 86520 }, { "epoch": 2.88, "grad_norm": 0.7095609903335571, "learning_rate": 2.4224655764759514e-06, "loss": 1.6908, "step": 86521 }, { "epoch": 2.88, "grad_norm": 0.7197745442390442, "learning_rate": 2.4211401679821607e-06, "loss": 1.6327, "step": 86522 }, { "epoch": 2.88, "grad_norm": 0.7283521294593811, "learning_rate": 2.4198151207052594e-06, "loss": 1.7526, "step": 86523 }, { "epoch": 2.88, "grad_norm": 0.7099887132644653, "learning_rate": 2.4184904346468804e-06, "loss": 1.7177, "step": 86524 }, { "epoch": 2.88, "grad_norm": 0.7042769193649292, "learning_rate": 2.4171661098085884e-06, "loss": 1.6161, "step": 86525 }, { "epoch": 2.88, "grad_norm": 0.732586145401001, "learning_rate": 2.4158421461920486e-06, "loss": 1.6204, "step": 86526 }, { "epoch": 2.88, "grad_norm": 0.6848323941230774, "learning_rate": 2.4145185437987935e-06, "loss": 1.7066, "step": 86527 }, { "epoch": 2.88, "grad_norm": 0.7236788272857666, "learning_rate": 2.4131953026304883e-06, "loss": 1.6352, "step": 86528 }, { "epoch": 2.88, "grad_norm": 0.715756356716156, "learning_rate": 2.4118724226886656e-06, "loss": 1.6999, "step": 86529 }, { "epoch": 2.88, "grad_norm": 0.6905934810638428, "learning_rate": 2.4105499039750564e-06, "loss": 1.7321, "step": 86530 }, { "epoch": 2.88, "grad_norm": 0.7187817692756653, "learning_rate": 2.409227746491127e-06, "loss": 1.6959, "step": 86531 }, { "epoch": 2.88, "grad_norm": 0.7103245258331299, "learning_rate": 2.4079059502385758e-06, "loss": 1.702, "step": 86532 }, { "epoch": 2.88, "grad_norm": 0.7248085141181946, "learning_rate": 2.406584515219001e-06, "loss": 1.6851, "step": 86533 }, { "epoch": 2.88, "grad_norm": 0.7476834058761597, "learning_rate": 2.4052634414339357e-06, "loss": 1.7783, "step": 86534 }, { "epoch": 2.88, "grad_norm": 0.7257972955703735, "learning_rate": 2.403942728885078e-06, "loss": 1.5516, "step": 86535 }, { "epoch": 2.88, "grad_norm": 0.7326291799545288, "learning_rate": 2.4026223775739595e-06, "loss": 1.7941, "step": 86536 }, { "epoch": 2.88, "grad_norm": 0.7061753869056702, "learning_rate": 2.401302387502213e-06, "loss": 1.6649, "step": 86537 }, { "epoch": 2.88, "grad_norm": 0.7123842239379883, "learning_rate": 2.399982758671437e-06, "loss": 1.6841, "step": 86538 }, { "epoch": 2.88, "grad_norm": 0.7237001657485962, "learning_rate": 2.39866349108323e-06, "loss": 1.7102, "step": 86539 }, { "epoch": 2.88, "grad_norm": 0.7171374559402466, "learning_rate": 2.3973445847391915e-06, "loss": 1.6717, "step": 86540 }, { "epoch": 2.88, "grad_norm": 0.7110683917999268, "learning_rate": 2.396026039640919e-06, "loss": 1.6871, "step": 86541 }, { "epoch": 2.88, "grad_norm": 0.7184274792671204, "learning_rate": 2.3947078557900458e-06, "loss": 1.6904, "step": 86542 }, { "epoch": 2.88, "grad_norm": 0.7154545187950134, "learning_rate": 2.3933900331881696e-06, "loss": 1.6824, "step": 86543 }, { "epoch": 2.88, "grad_norm": 0.6945563554763794, "learning_rate": 2.392072571836823e-06, "loss": 1.6563, "step": 86544 }, { "epoch": 2.88, "grad_norm": 0.7366393804550171, "learning_rate": 2.3907554717376378e-06, "loss": 1.6434, "step": 86545 }, { "epoch": 2.88, "grad_norm": 0.7212002277374268, "learning_rate": 2.389438732892246e-06, "loss": 1.6757, "step": 86546 }, { "epoch": 2.88, "grad_norm": 0.6958487629890442, "learning_rate": 2.3881223553022467e-06, "loss": 1.6532, "step": 86547 }, { "epoch": 2.88, "grad_norm": 0.707460880279541, "learning_rate": 2.386806338969172e-06, "loss": 1.7072, "step": 86548 }, { "epoch": 2.88, "grad_norm": 0.6852501630783081, "learning_rate": 2.3854906838947197e-06, "loss": 1.7067, "step": 86549 }, { "epoch": 2.88, "grad_norm": 0.7418104410171509, "learning_rate": 2.3841753900803896e-06, "loss": 1.6934, "step": 86550 }, { "epoch": 2.88, "grad_norm": 0.7126257419586182, "learning_rate": 2.382860457527813e-06, "loss": 1.6758, "step": 86551 }, { "epoch": 2.88, "grad_norm": 0.6987924575805664, "learning_rate": 2.3815458862385895e-06, "loss": 1.5904, "step": 86552 }, { "epoch": 2.88, "grad_norm": 0.7082959413528442, "learning_rate": 2.3802316762143834e-06, "loss": 1.7538, "step": 86553 }, { "epoch": 2.88, "grad_norm": 0.704649806022644, "learning_rate": 2.378917827456661e-06, "loss": 1.6682, "step": 86554 }, { "epoch": 2.88, "grad_norm": 0.7187624573707581, "learning_rate": 2.3776043399670872e-06, "loss": 1.6882, "step": 86555 }, { "epoch": 2.88, "grad_norm": 0.7046942114830017, "learning_rate": 2.376291213747261e-06, "loss": 1.7105, "step": 86556 }, { "epoch": 2.88, "grad_norm": 0.7029476165771484, "learning_rate": 2.3749784487987477e-06, "loss": 1.6157, "step": 86557 }, { "epoch": 2.88, "grad_norm": 0.7295773029327393, "learning_rate": 2.373666045123179e-06, "loss": 1.6432, "step": 86558 }, { "epoch": 2.88, "grad_norm": 0.7238325476646423, "learning_rate": 2.3723540027220877e-06, "loss": 1.7186, "step": 86559 }, { "epoch": 2.88, "grad_norm": 0.7084000706672668, "learning_rate": 2.3710423215971717e-06, "loss": 1.577, "step": 86560 }, { "epoch": 2.88, "grad_norm": 0.7134264707565308, "learning_rate": 2.369731001749897e-06, "loss": 1.6472, "step": 86561 }, { "epoch": 2.88, "grad_norm": 0.722255527973175, "learning_rate": 2.3684200431819287e-06, "loss": 1.7889, "step": 86562 }, { "epoch": 2.88, "grad_norm": 0.7361605763435364, "learning_rate": 2.3671094458948658e-06, "loss": 1.6591, "step": 86563 }, { "epoch": 2.88, "grad_norm": 0.7154978513717651, "learning_rate": 2.3657992098902734e-06, "loss": 1.6176, "step": 86564 }, { "epoch": 2.88, "grad_norm": 0.7164750695228577, "learning_rate": 2.364489335169717e-06, "loss": 1.664, "step": 86565 }, { "epoch": 2.88, "grad_norm": 0.7077615261077881, "learning_rate": 2.3631798217348287e-06, "loss": 1.6447, "step": 86566 }, { "epoch": 2.88, "grad_norm": 0.7274336814880371, "learning_rate": 2.3618706695872403e-06, "loss": 1.6689, "step": 86567 }, { "epoch": 2.88, "grad_norm": 0.7058611512184143, "learning_rate": 2.3605618787284173e-06, "loss": 1.6665, "step": 86568 }, { "epoch": 2.88, "grad_norm": 0.7145814895629883, "learning_rate": 2.3592534491600257e-06, "loss": 1.6824, "step": 86569 }, { "epoch": 2.88, "grad_norm": 0.743366539478302, "learning_rate": 2.3579453808836635e-06, "loss": 1.7555, "step": 86570 }, { "epoch": 2.88, "grad_norm": 0.7093009948730469, "learning_rate": 2.3566376739009294e-06, "loss": 1.5932, "step": 86571 }, { "epoch": 2.88, "grad_norm": 0.705413818359375, "learning_rate": 2.3553303282133227e-06, "loss": 1.6751, "step": 86572 }, { "epoch": 2.88, "grad_norm": 0.7098374962806702, "learning_rate": 2.3540233438225087e-06, "loss": 1.6876, "step": 86573 }, { "epoch": 2.88, "grad_norm": 0.7105583548545837, "learning_rate": 2.3527167207300857e-06, "loss": 1.7108, "step": 86574 }, { "epoch": 2.88, "grad_norm": 0.7079576253890991, "learning_rate": 2.3514104589375525e-06, "loss": 1.6564, "step": 86575 }, { "epoch": 2.88, "grad_norm": 0.7190975546836853, "learning_rate": 2.350104558446575e-06, "loss": 1.6502, "step": 86576 }, { "epoch": 2.88, "grad_norm": 0.738572895526886, "learning_rate": 2.3487990192587514e-06, "loss": 1.6911, "step": 86577 }, { "epoch": 2.88, "grad_norm": 0.7040686011314392, "learning_rate": 2.3474938413755807e-06, "loss": 1.7341, "step": 86578 }, { "epoch": 2.88, "grad_norm": 0.7109041810035706, "learning_rate": 2.3461890247986946e-06, "loss": 1.6606, "step": 86579 }, { "epoch": 2.88, "grad_norm": 0.7375638484954834, "learning_rate": 2.344884569529726e-06, "loss": 1.7586, "step": 86580 }, { "epoch": 2.88, "grad_norm": 0.7242216467857361, "learning_rate": 2.3435804755701725e-06, "loss": 1.7208, "step": 86581 }, { "epoch": 2.88, "grad_norm": 0.7122944593429565, "learning_rate": 2.3422767429216673e-06, "loss": 1.6194, "step": 86582 }, { "epoch": 2.88, "grad_norm": 0.7508760690689087, "learning_rate": 2.3409733715857747e-06, "loss": 1.6556, "step": 86583 }, { "epoch": 2.88, "grad_norm": 0.7234535217285156, "learning_rate": 2.339670361564061e-06, "loss": 1.6661, "step": 86584 }, { "epoch": 2.88, "grad_norm": 0.7146101593971252, "learning_rate": 2.338367712858158e-06, "loss": 1.6649, "step": 86585 }, { "epoch": 2.88, "grad_norm": 0.7312964200973511, "learning_rate": 2.3370654254695975e-06, "loss": 1.6982, "step": 86586 }, { "epoch": 2.88, "grad_norm": 0.6979174613952637, "learning_rate": 2.3357634994000117e-06, "loss": 1.6494, "step": 86587 }, { "epoch": 2.88, "grad_norm": 0.7198779582977295, "learning_rate": 2.334461934650933e-06, "loss": 1.6718, "step": 86588 }, { "epoch": 2.88, "grad_norm": 0.6899697184562683, "learning_rate": 2.3331607312239597e-06, "loss": 1.7057, "step": 86589 }, { "epoch": 2.88, "grad_norm": 0.6947011947631836, "learning_rate": 2.3318598891206575e-06, "loss": 1.6853, "step": 86590 }, { "epoch": 2.88, "grad_norm": 0.7174506187438965, "learning_rate": 2.3305594083426247e-06, "loss": 1.6767, "step": 86591 }, { "epoch": 2.88, "grad_norm": 0.7273083925247192, "learning_rate": 2.3292592888914607e-06, "loss": 1.6925, "step": 86592 }, { "epoch": 2.88, "grad_norm": 0.7266303300857544, "learning_rate": 2.327959530768697e-06, "loss": 1.7531, "step": 86593 }, { "epoch": 2.88, "grad_norm": 0.713555634021759, "learning_rate": 2.3266601339758995e-06, "loss": 1.628, "step": 86594 }, { "epoch": 2.88, "grad_norm": 0.7112220525741577, "learning_rate": 2.3253610985147e-06, "loss": 1.7115, "step": 86595 }, { "epoch": 2.88, "grad_norm": 0.7354918718338013, "learning_rate": 2.3240624243866633e-06, "loss": 1.6279, "step": 86596 }, { "epoch": 2.88, "grad_norm": 0.7278653979301453, "learning_rate": 2.322764111593356e-06, "loss": 1.6879, "step": 86597 }, { "epoch": 2.88, "grad_norm": 0.7439611554145813, "learning_rate": 2.321466160136343e-06, "loss": 1.753, "step": 86598 }, { "epoch": 2.88, "grad_norm": 0.7124906778335571, "learning_rate": 2.3201685700171892e-06, "loss": 1.6197, "step": 86599 }, { "epoch": 2.88, "grad_norm": 0.7246280908584595, "learning_rate": 2.318871341237527e-06, "loss": 1.6896, "step": 86600 }, { "epoch": 2.88, "grad_norm": 0.7268248796463013, "learning_rate": 2.3175744737988555e-06, "loss": 1.7428, "step": 86601 }, { "epoch": 2.88, "grad_norm": 0.704282283782959, "learning_rate": 2.3162779677028065e-06, "loss": 1.7218, "step": 86602 }, { "epoch": 2.88, "grad_norm": 0.7164686322212219, "learning_rate": 2.3149818229509785e-06, "loss": 1.6568, "step": 86603 }, { "epoch": 2.88, "grad_norm": 0.7387976050376892, "learning_rate": 2.3136860395448377e-06, "loss": 1.6533, "step": 86604 }, { "epoch": 2.88, "grad_norm": 0.6965373754501343, "learning_rate": 2.312390617486015e-06, "loss": 1.6956, "step": 86605 }, { "epoch": 2.88, "grad_norm": 0.7064059972763062, "learning_rate": 2.3110955567761437e-06, "loss": 1.6706, "step": 86606 }, { "epoch": 2.88, "grad_norm": 0.729480504989624, "learning_rate": 2.309800857416688e-06, "loss": 1.7025, "step": 86607 }, { "epoch": 2.88, "grad_norm": 0.7054423689842224, "learning_rate": 2.3085065194093143e-06, "loss": 1.6288, "step": 86608 }, { "epoch": 2.88, "grad_norm": 0.6989044547080994, "learning_rate": 2.307212542755488e-06, "loss": 1.7026, "step": 86609 }, { "epoch": 2.88, "grad_norm": 0.7088131904602051, "learning_rate": 2.30591892745694e-06, "loss": 1.6847, "step": 86610 }, { "epoch": 2.88, "grad_norm": 0.6989866495132446, "learning_rate": 2.3046256735150703e-06, "loss": 1.6371, "step": 86611 }, { "epoch": 2.88, "grad_norm": 0.73459392786026, "learning_rate": 2.3033327809315104e-06, "loss": 1.6702, "step": 86612 }, { "epoch": 2.88, "grad_norm": 0.6977370381355286, "learning_rate": 2.302040249707893e-06, "loss": 1.5984, "step": 86613 }, { "epoch": 2.88, "grad_norm": 0.7356947660446167, "learning_rate": 2.3007480798457157e-06, "loss": 1.6729, "step": 86614 }, { "epoch": 2.88, "grad_norm": 0.7116339206695557, "learning_rate": 2.299456271346545e-06, "loss": 1.6764, "step": 86615 }, { "epoch": 2.88, "grad_norm": 0.7102339267730713, "learning_rate": 2.2981648242119786e-06, "loss": 1.681, "step": 86616 }, { "epoch": 2.88, "grad_norm": 0.7123045325279236, "learning_rate": 2.2968737384436164e-06, "loss": 1.6102, "step": 86617 }, { "epoch": 2.88, "grad_norm": 0.7027876377105713, "learning_rate": 2.295583014042923e-06, "loss": 1.6815, "step": 86618 }, { "epoch": 2.88, "grad_norm": 0.7278251647949219, "learning_rate": 2.2942926510115645e-06, "loss": 1.7053, "step": 86619 }, { "epoch": 2.88, "grad_norm": 0.6953791975975037, "learning_rate": 2.2930026493510724e-06, "loss": 1.7136, "step": 86620 }, { "epoch": 2.88, "grad_norm": 0.7037448883056641, "learning_rate": 2.291713009063012e-06, "loss": 1.6896, "step": 86621 }, { "epoch": 2.88, "grad_norm": 0.7166125178337097, "learning_rate": 2.290423730148916e-06, "loss": 1.6337, "step": 86622 }, { "epoch": 2.88, "grad_norm": 0.7135825157165527, "learning_rate": 2.289134812610416e-06, "loss": 1.715, "step": 86623 }, { "epoch": 2.88, "grad_norm": 0.6988332867622375, "learning_rate": 2.287846256449011e-06, "loss": 1.6016, "step": 86624 }, { "epoch": 2.88, "grad_norm": 0.7260006070137024, "learning_rate": 2.2865580616662995e-06, "loss": 1.7085, "step": 86625 }, { "epoch": 2.88, "grad_norm": 0.7481397390365601, "learning_rate": 2.285270228263847e-06, "loss": 1.7011, "step": 86626 }, { "epoch": 2.88, "grad_norm": 0.718938946723938, "learning_rate": 2.283982756243219e-06, "loss": 1.6936, "step": 86627 }, { "epoch": 2.88, "grad_norm": 0.7238461971282959, "learning_rate": 2.282695645605981e-06, "loss": 1.6439, "step": 86628 }, { "epoch": 2.88, "grad_norm": 0.7423531413078308, "learning_rate": 2.2814088963536316e-06, "loss": 1.7174, "step": 86629 }, { "epoch": 2.88, "grad_norm": 0.7120040059089661, "learning_rate": 2.2801225084878362e-06, "loss": 1.6482, "step": 86630 }, { "epoch": 2.88, "grad_norm": 0.7057477831840515, "learning_rate": 2.2788364820100933e-06, "loss": 1.6784, "step": 86631 }, { "epoch": 2.88, "grad_norm": 0.7195237874984741, "learning_rate": 2.277550816921969e-06, "loss": 1.5876, "step": 86632 }, { "epoch": 2.88, "grad_norm": 0.7508690357208252, "learning_rate": 2.2762655132250286e-06, "loss": 1.7118, "step": 86633 }, { "epoch": 2.88, "grad_norm": 0.7081615328788757, "learning_rate": 2.274980570920837e-06, "loss": 1.6376, "step": 86634 }, { "epoch": 2.88, "grad_norm": 0.7172956466674805, "learning_rate": 2.2736959900109598e-06, "loss": 1.643, "step": 86635 }, { "epoch": 2.88, "grad_norm": 0.7356090545654297, "learning_rate": 2.272411770496929e-06, "loss": 1.6965, "step": 86636 }, { "epoch": 2.88, "grad_norm": 0.7149043083190918, "learning_rate": 2.271127912380344e-06, "loss": 1.6322, "step": 86637 }, { "epoch": 2.88, "grad_norm": 2.4791653156280518, "learning_rate": 2.269844415662736e-06, "loss": 1.5983, "step": 86638 }, { "epoch": 2.88, "grad_norm": 0.7180779576301575, "learning_rate": 2.2685612803456376e-06, "loss": 1.6944, "step": 86639 }, { "epoch": 2.88, "grad_norm": 0.741147518157959, "learning_rate": 2.2672785064306807e-06, "loss": 1.7162, "step": 86640 }, { "epoch": 2.88, "grad_norm": 0.7439766526222229, "learning_rate": 2.265996093919331e-06, "loss": 1.7201, "step": 86641 }, { "epoch": 2.88, "grad_norm": 0.7158960700035095, "learning_rate": 2.2647140428132537e-06, "loss": 1.6739, "step": 86642 }, { "epoch": 2.88, "grad_norm": 0.6991497874259949, "learning_rate": 2.2634323531139142e-06, "loss": 1.6199, "step": 86643 }, { "epoch": 2.88, "grad_norm": 0.758918046951294, "learning_rate": 2.262151024822878e-06, "loss": 1.5872, "step": 86644 }, { "epoch": 2.88, "grad_norm": 0.7273806929588318, "learning_rate": 2.260870057941744e-06, "loss": 1.758, "step": 86645 }, { "epoch": 2.88, "grad_norm": 0.7175052762031555, "learning_rate": 2.2595894524720436e-06, "loss": 1.6268, "step": 86646 }, { "epoch": 2.88, "grad_norm": 0.7184449434280396, "learning_rate": 2.258309208415343e-06, "loss": 1.7437, "step": 86647 }, { "epoch": 2.88, "grad_norm": 0.7144462466239929, "learning_rate": 2.257029325773141e-06, "loss": 1.6939, "step": 86648 }, { "epoch": 2.88, "grad_norm": 0.7207565903663635, "learning_rate": 2.255749804547069e-06, "loss": 1.7799, "step": 86649 }, { "epoch": 2.88, "grad_norm": 0.7382733225822449, "learning_rate": 2.2544706447386597e-06, "loss": 1.7275, "step": 86650 }, { "epoch": 2.88, "grad_norm": 0.7185640335083008, "learning_rate": 2.2531918463494114e-06, "loss": 1.6379, "step": 86651 }, { "epoch": 2.88, "grad_norm": 0.7128387689590454, "learning_rate": 2.2519134093809567e-06, "loss": 1.6489, "step": 86652 }, { "epoch": 2.88, "grad_norm": 0.7191601395606995, "learning_rate": 2.2506353338347606e-06, "loss": 1.6586, "step": 86653 }, { "epoch": 2.88, "grad_norm": 0.7284631729125977, "learning_rate": 2.2493576197124887e-06, "loss": 1.7165, "step": 86654 }, { "epoch": 2.88, "grad_norm": 0.7266050577163696, "learning_rate": 2.248080267015573e-06, "loss": 1.7766, "step": 86655 }, { "epoch": 2.88, "grad_norm": 0.724523663520813, "learning_rate": 2.2468032757456456e-06, "loss": 1.6822, "step": 86656 }, { "epoch": 2.88, "grad_norm": 0.7038936018943787, "learning_rate": 2.2455266459042055e-06, "loss": 1.698, "step": 86657 }, { "epoch": 2.88, "grad_norm": 0.712853729724884, "learning_rate": 2.244250377492818e-06, "loss": 1.6818, "step": 86658 }, { "epoch": 2.88, "grad_norm": 0.725787878036499, "learning_rate": 2.242974470513048e-06, "loss": 1.6555, "step": 86659 }, { "epoch": 2.88, "grad_norm": 0.7193959951400757, "learning_rate": 2.241698924966462e-06, "loss": 1.6765, "step": 86660 }, { "epoch": 2.88, "grad_norm": 0.7090781331062317, "learning_rate": 2.240423740854558e-06, "loss": 1.6305, "step": 86661 }, { "epoch": 2.88, "grad_norm": 0.7167237997055054, "learning_rate": 2.2391489181789015e-06, "loss": 1.7271, "step": 86662 }, { "epoch": 2.88, "grad_norm": 0.726445734500885, "learning_rate": 2.2378744569410247e-06, "loss": 1.6779, "step": 86663 }, { "epoch": 2.88, "grad_norm": 0.709396481513977, "learning_rate": 2.2366003571425593e-06, "loss": 1.6987, "step": 86664 }, { "epoch": 2.88, "grad_norm": 0.7380255460739136, "learning_rate": 2.2353266187849385e-06, "loss": 1.6974, "step": 86665 }, { "epoch": 2.88, "grad_norm": 0.7324984669685364, "learning_rate": 2.2340532418697597e-06, "loss": 1.8008, "step": 86666 }, { "epoch": 2.88, "grad_norm": 0.7140693664550781, "learning_rate": 2.2327802263985897e-06, "loss": 1.6779, "step": 86667 }, { "epoch": 2.88, "grad_norm": 0.6914137005805969, "learning_rate": 2.231507572372926e-06, "loss": 1.6758, "step": 86668 }, { "epoch": 2.88, "grad_norm": 0.6895257830619812, "learning_rate": 2.2302352797943344e-06, "loss": 1.7212, "step": 86669 }, { "epoch": 2.88, "grad_norm": 0.7101930975914001, "learning_rate": 2.228963348664381e-06, "loss": 1.627, "step": 86670 }, { "epoch": 2.88, "grad_norm": 0.6984235048294067, "learning_rate": 2.2276917789845973e-06, "loss": 1.6707, "step": 86671 }, { "epoch": 2.88, "grad_norm": 0.7173252701759338, "learning_rate": 2.226420570756482e-06, "loss": 1.6639, "step": 86672 }, { "epoch": 2.88, "grad_norm": 0.7263761162757874, "learning_rate": 2.2251497239816673e-06, "loss": 1.6983, "step": 86673 }, { "epoch": 2.88, "grad_norm": 0.7142544984817505, "learning_rate": 2.223879238661619e-06, "loss": 1.5938, "step": 86674 }, { "epoch": 2.88, "grad_norm": 0.7063428163528442, "learning_rate": 2.222609114797902e-06, "loss": 1.6824, "step": 86675 }, { "epoch": 2.88, "grad_norm": 0.7050638198852539, "learning_rate": 2.221339352392082e-06, "loss": 1.6657, "step": 86676 }, { "epoch": 2.88, "grad_norm": 0.7555017471313477, "learning_rate": 2.220069951445691e-06, "loss": 1.6468, "step": 86677 }, { "epoch": 2.88, "grad_norm": 0.7154121994972229, "learning_rate": 2.2188009119602277e-06, "loss": 1.6699, "step": 86678 }, { "epoch": 2.88, "grad_norm": 0.7286520600318909, "learning_rate": 2.217532233937258e-06, "loss": 1.7099, "step": 86679 }, { "epoch": 2.88, "grad_norm": 0.701684832572937, "learning_rate": 2.216263917378347e-06, "loss": 1.6263, "step": 86680 }, { "epoch": 2.88, "grad_norm": 0.7243286371231079, "learning_rate": 2.2149959622850265e-06, "loss": 1.6602, "step": 86681 }, { "epoch": 2.88, "grad_norm": 0.7190982699394226, "learning_rate": 2.213728368658796e-06, "loss": 1.681, "step": 86682 }, { "epoch": 2.88, "grad_norm": 0.707417368888855, "learning_rate": 2.21246113650122e-06, "loss": 1.7244, "step": 86683 }, { "epoch": 2.88, "grad_norm": 0.7154077887535095, "learning_rate": 2.2111942658138982e-06, "loss": 1.6785, "step": 86684 }, { "epoch": 2.88, "grad_norm": 0.7209326028823853, "learning_rate": 2.2099277565982286e-06, "loss": 1.7614, "step": 86685 }, { "epoch": 2.88, "grad_norm": 0.6956631541252136, "learning_rate": 2.2086616088558774e-06, "loss": 1.6654, "step": 86686 }, { "epoch": 2.88, "grad_norm": 0.7174220681190491, "learning_rate": 2.2073958225883424e-06, "loss": 1.6807, "step": 86687 }, { "epoch": 2.88, "grad_norm": 0.7243437170982361, "learning_rate": 2.2061303977971236e-06, "loss": 1.7589, "step": 86688 }, { "epoch": 2.88, "grad_norm": 0.7136988043785095, "learning_rate": 2.204865334483785e-06, "loss": 1.6867, "step": 86689 }, { "epoch": 2.88, "grad_norm": 0.7312511801719666, "learning_rate": 2.2036006326498934e-06, "loss": 1.6772, "step": 86690 }, { "epoch": 2.88, "grad_norm": 0.7011743783950806, "learning_rate": 2.2023362922969134e-06, "loss": 1.6402, "step": 86691 }, { "epoch": 2.88, "grad_norm": 0.7317359447479248, "learning_rate": 2.201072313426444e-06, "loss": 1.7224, "step": 86692 }, { "epoch": 2.88, "grad_norm": 0.710922122001648, "learning_rate": 2.199808696039984e-06, "loss": 1.6567, "step": 86693 }, { "epoch": 2.88, "grad_norm": 0.7054300904273987, "learning_rate": 2.1985454401390657e-06, "loss": 1.6723, "step": 86694 }, { "epoch": 2.88, "grad_norm": 0.6880603432655334, "learning_rate": 2.197282545725254e-06, "loss": 1.6529, "step": 86695 }, { "epoch": 2.88, "grad_norm": 0.7387524247169495, "learning_rate": 2.1960200128000484e-06, "loss": 1.6959, "step": 86696 }, { "epoch": 2.88, "grad_norm": 0.701663613319397, "learning_rate": 2.1947578413650136e-06, "loss": 1.65, "step": 86697 }, { "epoch": 2.88, "grad_norm": 0.7014908194541931, "learning_rate": 2.1934960314216487e-06, "loss": 1.7303, "step": 86698 }, { "epoch": 2.88, "grad_norm": 0.7350507974624634, "learning_rate": 2.192234582971486e-06, "loss": 1.7143, "step": 86699 }, { "epoch": 2.88, "grad_norm": 0.7175554037094116, "learning_rate": 2.190973496016124e-06, "loss": 1.6081, "step": 86700 }, { "epoch": 2.88, "grad_norm": 0.7196253538131714, "learning_rate": 2.189712770556995e-06, "loss": 1.6855, "step": 86701 }, { "epoch": 2.88, "grad_norm": 0.7124195098876953, "learning_rate": 2.1884524065956643e-06, "loss": 1.6589, "step": 86702 }, { "epoch": 2.88, "grad_norm": 0.7194953560829163, "learning_rate": 2.1871924041336973e-06, "loss": 1.7064, "step": 86703 }, { "epoch": 2.88, "grad_norm": 0.7204241156578064, "learning_rate": 2.185932763172593e-06, "loss": 1.6946, "step": 86704 }, { "epoch": 2.88, "grad_norm": 0.7179980278015137, "learning_rate": 2.1846734837138837e-06, "loss": 1.7463, "step": 86705 }, { "epoch": 2.88, "grad_norm": 0.6944922208786011, "learning_rate": 2.1834145657591007e-06, "loss": 1.6831, "step": 86706 }, { "epoch": 2.88, "grad_norm": 0.7115748524665833, "learning_rate": 2.182156009309777e-06, "loss": 1.6993, "step": 86707 }, { "epoch": 2.88, "grad_norm": 0.7271412014961243, "learning_rate": 2.1808978143674103e-06, "loss": 1.6417, "step": 86708 }, { "epoch": 2.88, "grad_norm": 0.6923679113388062, "learning_rate": 2.1796399809335676e-06, "loss": 1.6268, "step": 86709 }, { "epoch": 2.88, "grad_norm": 0.7149317860603333, "learning_rate": 2.1783825090097796e-06, "loss": 1.6784, "step": 86710 }, { "epoch": 2.88, "grad_norm": 0.747590184211731, "learning_rate": 2.1771253985975456e-06, "loss": 1.6552, "step": 86711 }, { "epoch": 2.88, "grad_norm": 0.7096456289291382, "learning_rate": 2.1758686496983646e-06, "loss": 1.6598, "step": 86712 }, { "epoch": 2.88, "grad_norm": 0.7183581590652466, "learning_rate": 2.174612262313835e-06, "loss": 1.7326, "step": 86713 }, { "epoch": 2.88, "grad_norm": 0.6966190934181213, "learning_rate": 2.1733562364454227e-06, "loss": 1.7021, "step": 86714 }, { "epoch": 2.89, "grad_norm": 0.7187510132789612, "learning_rate": 2.1721005720946927e-06, "loss": 1.5967, "step": 86715 }, { "epoch": 2.89, "grad_norm": 0.7121354937553406, "learning_rate": 2.170845269263144e-06, "loss": 1.7357, "step": 86716 }, { "epoch": 2.89, "grad_norm": 0.7336416840553284, "learning_rate": 2.1695903279523087e-06, "loss": 1.7267, "step": 86717 }, { "epoch": 2.89, "grad_norm": 0.698001503944397, "learning_rate": 2.1683357481636854e-06, "loss": 1.6463, "step": 86718 }, { "epoch": 2.89, "grad_norm": 0.7232924103736877, "learning_rate": 2.1670815298988063e-06, "loss": 1.6931, "step": 86719 }, { "epoch": 2.89, "grad_norm": 0.7114301919937134, "learning_rate": 2.1658276731592706e-06, "loss": 1.6528, "step": 86720 }, { "epoch": 2.89, "grad_norm": 0.7203410267829895, "learning_rate": 2.1645741779464765e-06, "loss": 1.6455, "step": 86721 }, { "epoch": 2.89, "grad_norm": 0.7048210501670837, "learning_rate": 2.163321044262023e-06, "loss": 1.6279, "step": 86722 }, { "epoch": 2.89, "grad_norm": 3.3476669788360596, "learning_rate": 2.1620682721074424e-06, "loss": 1.7541, "step": 86723 }, { "epoch": 2.89, "grad_norm": 0.7081549763679504, "learning_rate": 2.1608158614841996e-06, "loss": 1.6661, "step": 86724 }, { "epoch": 2.89, "grad_norm": 0.720976710319519, "learning_rate": 2.1595638123938276e-06, "loss": 1.7448, "step": 86725 }, { "epoch": 2.89, "grad_norm": 0.6989789605140686, "learning_rate": 2.1583121248378576e-06, "loss": 1.6714, "step": 86726 }, { "epoch": 2.89, "grad_norm": 0.7284356355667114, "learning_rate": 2.1570607988178556e-06, "loss": 1.7597, "step": 86727 }, { "epoch": 2.89, "grad_norm": 0.7045952677726746, "learning_rate": 2.155809834335287e-06, "loss": 1.6107, "step": 86728 }, { "epoch": 2.89, "grad_norm": 0.713657557964325, "learning_rate": 2.1545592313916504e-06, "loss": 1.6575, "step": 86729 }, { "epoch": 2.89, "grad_norm": 0.7103090286254883, "learning_rate": 2.1533089899885113e-06, "loss": 1.653, "step": 86730 }, { "epoch": 2.89, "grad_norm": 0.6804417967796326, "learning_rate": 2.1520591101273686e-06, "loss": 1.6646, "step": 86731 }, { "epoch": 2.89, "grad_norm": 0.7041581273078918, "learning_rate": 2.150809591809721e-06, "loss": 1.7185, "step": 86732 }, { "epoch": 2.89, "grad_norm": 0.7281069159507751, "learning_rate": 2.1495604350371342e-06, "loss": 1.7561, "step": 86733 }, { "epoch": 2.89, "grad_norm": 0.7040800452232361, "learning_rate": 2.1483116398111067e-06, "loss": 1.6165, "step": 86734 }, { "epoch": 2.89, "grad_norm": 0.720975935459137, "learning_rate": 2.147063206133104e-06, "loss": 1.6201, "step": 86735 }, { "epoch": 2.89, "grad_norm": 0.7277584671974182, "learning_rate": 2.145815134004691e-06, "loss": 1.7141, "step": 86736 }, { "epoch": 2.89, "grad_norm": 0.7079058289527893, "learning_rate": 2.144567423427368e-06, "loss": 1.7058, "step": 86737 }, { "epoch": 2.89, "grad_norm": 0.6977480053901672, "learning_rate": 2.1433200744026655e-06, "loss": 1.6705, "step": 86738 }, { "epoch": 2.89, "grad_norm": 0.7308145761489868, "learning_rate": 2.1420730869320833e-06, "loss": 1.6982, "step": 86739 }, { "epoch": 2.89, "grad_norm": 0.7068014144897461, "learning_rate": 2.14082646101712e-06, "loss": 1.6106, "step": 86740 }, { "epoch": 2.89, "grad_norm": 0.7324582934379578, "learning_rate": 2.1395801966593406e-06, "loss": 1.686, "step": 86741 }, { "epoch": 2.89, "grad_norm": 0.7235798835754395, "learning_rate": 2.1383342938601778e-06, "loss": 1.7045, "step": 86742 }, { "epoch": 2.89, "grad_norm": 0.7096745371818542, "learning_rate": 2.1370887526211967e-06, "loss": 1.6322, "step": 86743 }, { "epoch": 2.89, "grad_norm": 0.7375652194023132, "learning_rate": 2.1358435729439293e-06, "loss": 1.6521, "step": 86744 }, { "epoch": 2.89, "grad_norm": 0.712457001209259, "learning_rate": 2.134598754829875e-06, "loss": 1.7, "step": 86745 }, { "epoch": 2.89, "grad_norm": 0.7169688940048218, "learning_rate": 2.1333542982804986e-06, "loss": 1.7489, "step": 86746 }, { "epoch": 2.89, "grad_norm": 0.7424932718276978, "learning_rate": 2.1321102032973324e-06, "loss": 1.7613, "step": 86747 }, { "epoch": 2.89, "grad_norm": 0.7266610264778137, "learning_rate": 2.1308664698819087e-06, "loss": 1.7304, "step": 86748 }, { "epoch": 2.89, "grad_norm": 0.72511225938797, "learning_rate": 2.1296230980357266e-06, "loss": 1.6629, "step": 86749 }, { "epoch": 2.89, "grad_norm": 0.7153609395027161, "learning_rate": 2.1283800877603176e-06, "loss": 1.6901, "step": 86750 }, { "epoch": 2.89, "grad_norm": 0.7095752954483032, "learning_rate": 2.127137439057114e-06, "loss": 1.6322, "step": 86751 }, { "epoch": 2.89, "grad_norm": 0.7431415915489197, "learning_rate": 2.1258951519277147e-06, "loss": 1.7217, "step": 86752 }, { "epoch": 2.89, "grad_norm": 0.7013086080551147, "learning_rate": 2.124653226373552e-06, "loss": 1.633, "step": 86753 }, { "epoch": 2.89, "grad_norm": 0.728678822517395, "learning_rate": 2.123411662396224e-06, "loss": 1.7293, "step": 86754 }, { "epoch": 2.89, "grad_norm": 0.7096284627914429, "learning_rate": 2.1221704599971635e-06, "loss": 1.7177, "step": 86755 }, { "epoch": 2.89, "grad_norm": 0.7036064863204956, "learning_rate": 2.120929619177869e-06, "loss": 1.7332, "step": 86756 }, { "epoch": 2.89, "grad_norm": 0.7339829802513123, "learning_rate": 2.119689139939906e-06, "loss": 1.7386, "step": 86757 }, { "epoch": 2.89, "grad_norm": 0.7510045170783997, "learning_rate": 2.1184490222847073e-06, "loss": 1.7055, "step": 86758 }, { "epoch": 2.89, "grad_norm": 0.7163822054862976, "learning_rate": 2.1172092662138705e-06, "loss": 1.6745, "step": 86759 }, { "epoch": 2.89, "grad_norm": 0.7222087979316711, "learning_rate": 2.115969871728829e-06, "loss": 1.6449, "step": 86760 }, { "epoch": 2.89, "grad_norm": 0.7462313771247864, "learning_rate": 2.1147308388311135e-06, "loss": 1.6651, "step": 86761 }, { "epoch": 2.89, "grad_norm": 0.7283856272697449, "learning_rate": 2.113492167522224e-06, "loss": 1.6464, "step": 86762 }, { "epoch": 2.89, "grad_norm": 0.736365020275116, "learning_rate": 2.1122538578036584e-06, "loss": 1.7001, "step": 86763 }, { "epoch": 2.89, "grad_norm": 0.7685292363166809, "learning_rate": 2.1110159096769498e-06, "loss": 1.6545, "step": 86764 }, { "epoch": 2.89, "grad_norm": 0.7155110239982605, "learning_rate": 2.1097783231435297e-06, "loss": 1.7068, "step": 86765 }, { "epoch": 2.89, "grad_norm": 0.736016571521759, "learning_rate": 2.108541098204997e-06, "loss": 1.626, "step": 86766 }, { "epoch": 2.89, "grad_norm": 0.7332614660263062, "learning_rate": 2.107304234862783e-06, "loss": 1.717, "step": 86767 }, { "epoch": 2.89, "grad_norm": 0.7078966498374939, "learning_rate": 2.1060677331183883e-06, "loss": 1.5781, "step": 86768 }, { "epoch": 2.89, "grad_norm": 0.7205331325531006, "learning_rate": 2.1048315929733438e-06, "loss": 1.6495, "step": 86769 }, { "epoch": 2.89, "grad_norm": 0.7284080982208252, "learning_rate": 2.1035958144291487e-06, "loss": 1.6672, "step": 86770 }, { "epoch": 2.89, "grad_norm": 0.6890690922737122, "learning_rate": 2.1023603974873017e-06, "loss": 1.6391, "step": 86771 }, { "epoch": 2.89, "grad_norm": 0.7029536962509155, "learning_rate": 2.101125342149268e-06, "loss": 1.739, "step": 86772 }, { "epoch": 2.89, "grad_norm": 0.7744051814079285, "learning_rate": 2.0998906484166135e-06, "loss": 1.7208, "step": 86773 }, { "epoch": 2.89, "grad_norm": 0.7101483345031738, "learning_rate": 2.0986563162907697e-06, "loss": 1.663, "step": 86774 }, { "epoch": 2.89, "grad_norm": 0.6964287161827087, "learning_rate": 2.0974223457732696e-06, "loss": 1.6957, "step": 86775 }, { "epoch": 2.89, "grad_norm": 0.7256146669387817, "learning_rate": 2.0961887368656115e-06, "loss": 1.6519, "step": 86776 }, { "epoch": 2.89, "grad_norm": 0.7152480483055115, "learning_rate": 2.0949554895692943e-06, "loss": 1.6739, "step": 86777 }, { "epoch": 2.89, "grad_norm": 0.7291382551193237, "learning_rate": 2.0937226038857833e-06, "loss": 1.7203, "step": 86778 }, { "epoch": 2.89, "grad_norm": 0.7144036293029785, "learning_rate": 2.092490079816611e-06, "loss": 1.6988, "step": 86779 }, { "epoch": 2.89, "grad_norm": 0.7171955704689026, "learning_rate": 2.091257917363276e-06, "loss": 1.6714, "step": 86780 }, { "epoch": 2.89, "grad_norm": 0.7255555987358093, "learning_rate": 2.0900261165272435e-06, "loss": 1.6535, "step": 86781 }, { "epoch": 2.89, "grad_norm": 0.7115652561187744, "learning_rate": 2.0887946773100127e-06, "loss": 1.6087, "step": 86782 }, { "epoch": 2.89, "grad_norm": 0.7349223494529724, "learning_rate": 2.0875635997131157e-06, "loss": 1.7327, "step": 86783 }, { "epoch": 2.89, "grad_norm": 0.7478563189506531, "learning_rate": 2.086332883738018e-06, "loss": 1.7201, "step": 86784 }, { "epoch": 2.89, "grad_norm": 0.7269076108932495, "learning_rate": 2.0851025293862177e-06, "loss": 1.721, "step": 86785 }, { "epoch": 2.89, "grad_norm": 0.7076333165168762, "learning_rate": 2.0838725366591813e-06, "loss": 1.692, "step": 86786 }, { "epoch": 2.89, "grad_norm": 0.7509286403656006, "learning_rate": 2.0826429055584737e-06, "loss": 1.7074, "step": 86787 }, { "epoch": 2.89, "grad_norm": 0.7230279445648193, "learning_rate": 2.081413636085494e-06, "loss": 1.687, "step": 86788 }, { "epoch": 2.89, "grad_norm": 0.731221079826355, "learning_rate": 2.0801847282417738e-06, "loss": 1.6984, "step": 86789 }, { "epoch": 2.89, "grad_norm": 0.713645875453949, "learning_rate": 2.0789561820288457e-06, "loss": 1.7067, "step": 86790 }, { "epoch": 2.89, "grad_norm": 0.7146527171134949, "learning_rate": 2.077727997448175e-06, "loss": 1.6401, "step": 86791 }, { "epoch": 2.89, "grad_norm": 0.7349591255187988, "learning_rate": 2.076500174501228e-06, "loss": 1.6775, "step": 86792 }, { "epoch": 2.89, "grad_norm": 0.695870041847229, "learning_rate": 2.075272713189502e-06, "loss": 1.6075, "step": 86793 }, { "epoch": 2.89, "grad_norm": 0.6935808062553406, "learning_rate": 2.07404561351453e-06, "loss": 1.634, "step": 86794 }, { "epoch": 2.89, "grad_norm": 0.7189136743545532, "learning_rate": 2.072818875477744e-06, "loss": 1.6299, "step": 86795 }, { "epoch": 2.89, "grad_norm": 0.7012672424316406, "learning_rate": 2.0715924990806432e-06, "loss": 1.6953, "step": 86796 }, { "epoch": 2.89, "grad_norm": 0.6956136226654053, "learning_rate": 2.0703664843247923e-06, "loss": 1.6939, "step": 86797 }, { "epoch": 2.89, "grad_norm": 0.7172461748123169, "learning_rate": 2.0691408312115577e-06, "loss": 1.6626, "step": 86798 }, { "epoch": 2.89, "grad_norm": 0.7215287685394287, "learning_rate": 2.067915539742504e-06, "loss": 1.7099, "step": 86799 }, { "epoch": 2.89, "grad_norm": 0.7121401429176331, "learning_rate": 2.066690609919097e-06, "loss": 1.6704, "step": 86800 }, { "epoch": 2.89, "grad_norm": 0.7292162179946899, "learning_rate": 2.0654660417428694e-06, "loss": 1.6631, "step": 86801 }, { "epoch": 2.89, "grad_norm": 0.6981422305107117, "learning_rate": 2.064241835215252e-06, "loss": 1.6318, "step": 86802 }, { "epoch": 2.89, "grad_norm": 0.7167717814445496, "learning_rate": 2.063017990337712e-06, "loss": 1.6111, "step": 86803 }, { "epoch": 2.89, "grad_norm": 0.7409773468971252, "learning_rate": 2.0617945071117804e-06, "loss": 1.6797, "step": 86804 }, { "epoch": 2.89, "grad_norm": 0.7160041928291321, "learning_rate": 2.060571385538956e-06, "loss": 1.6656, "step": 86805 }, { "epoch": 2.89, "grad_norm": 0.710201621055603, "learning_rate": 2.0593486256206715e-06, "loss": 1.5964, "step": 86806 }, { "epoch": 2.89, "grad_norm": 0.7132889628410339, "learning_rate": 2.0581262273584586e-06, "loss": 1.6517, "step": 86807 }, { "epoch": 2.89, "grad_norm": 0.7589665055274963, "learning_rate": 2.056904190753783e-06, "loss": 1.6718, "step": 86808 }, { "epoch": 2.89, "grad_norm": 0.703027606010437, "learning_rate": 2.0556825158081104e-06, "loss": 1.692, "step": 86809 }, { "epoch": 2.89, "grad_norm": 0.7008530497550964, "learning_rate": 2.054461202522939e-06, "loss": 1.7004, "step": 86810 }, { "epoch": 2.89, "grad_norm": 0.71156245470047, "learning_rate": 2.053240250899768e-06, "loss": 1.6256, "step": 86811 }, { "epoch": 2.89, "grad_norm": 0.7003494501113892, "learning_rate": 2.052019660940063e-06, "loss": 1.6274, "step": 86812 }, { "epoch": 2.89, "grad_norm": 0.7316851019859314, "learning_rate": 2.050799432645289e-06, "loss": 1.6226, "step": 86813 }, { "epoch": 2.89, "grad_norm": 0.7263155579566956, "learning_rate": 2.0495795660169458e-06, "loss": 1.669, "step": 86814 }, { "epoch": 2.89, "grad_norm": 0.6943524479866028, "learning_rate": 2.0483600610564975e-06, "loss": 1.6542, "step": 86815 }, { "epoch": 2.89, "grad_norm": 0.689626932144165, "learning_rate": 2.0471409177654776e-06, "loss": 1.6901, "step": 86816 }, { "epoch": 2.89, "grad_norm": 0.7016677856445312, "learning_rate": 2.045922136145317e-06, "loss": 1.6557, "step": 86817 }, { "epoch": 2.89, "grad_norm": 0.7179994583129883, "learning_rate": 2.0447037161974824e-06, "loss": 1.7409, "step": 86818 }, { "epoch": 2.89, "grad_norm": 0.7031381130218506, "learning_rate": 2.0434856579235047e-06, "loss": 1.7442, "step": 86819 }, { "epoch": 2.89, "grad_norm": 0.7092125415802002, "learning_rate": 2.042267961324784e-06, "loss": 1.7331, "step": 86820 }, { "epoch": 2.89, "grad_norm": 0.7158517241477966, "learning_rate": 2.041050626402918e-06, "loss": 1.7007, "step": 86821 }, { "epoch": 2.89, "grad_norm": 0.7432621717453003, "learning_rate": 2.0398336531592396e-06, "loss": 1.7822, "step": 86822 }, { "epoch": 2.89, "grad_norm": 0.7114534974098206, "learning_rate": 2.0386170415953474e-06, "loss": 1.6592, "step": 86823 }, { "epoch": 2.89, "grad_norm": 0.7124859094619751, "learning_rate": 2.0374007917126734e-06, "loss": 1.6874, "step": 86824 }, { "epoch": 2.89, "grad_norm": 0.6958396434783936, "learning_rate": 2.0361849035126833e-06, "loss": 1.6683, "step": 86825 }, { "epoch": 2.89, "grad_norm": 0.7293384671211243, "learning_rate": 2.0349693769968755e-06, "loss": 1.698, "step": 86826 }, { "epoch": 2.89, "grad_norm": 0.7142768502235413, "learning_rate": 2.0337542121666828e-06, "loss": 1.6921, "step": 86827 }, { "epoch": 2.89, "grad_norm": 0.7143285274505615, "learning_rate": 2.032539409023637e-06, "loss": 1.6647, "step": 86828 }, { "epoch": 2.89, "grad_norm": 0.7108948230743408, "learning_rate": 2.0313249675691367e-06, "loss": 1.6129, "step": 86829 }, { "epoch": 2.89, "grad_norm": 0.7315136790275574, "learning_rate": 2.030110887804748e-06, "loss": 1.6473, "step": 86830 }, { "epoch": 2.89, "grad_norm": 0.694579541683197, "learning_rate": 2.0288971697319025e-06, "loss": 1.6149, "step": 86831 }, { "epoch": 2.89, "grad_norm": 0.7075429558753967, "learning_rate": 2.0276838133520656e-06, "loss": 1.5525, "step": 86832 }, { "epoch": 2.89, "grad_norm": 0.7314189672470093, "learning_rate": 2.0264708186667032e-06, "loss": 1.7063, "step": 86833 }, { "epoch": 2.89, "grad_norm": 0.7033988833427429, "learning_rate": 2.0252581856773144e-06, "loss": 1.6365, "step": 86834 }, { "epoch": 2.89, "grad_norm": 0.7123337984085083, "learning_rate": 2.024045914385364e-06, "loss": 1.6903, "step": 86835 }, { "epoch": 2.89, "grad_norm": 0.7309693694114685, "learning_rate": 2.022834004792284e-06, "loss": 1.6411, "step": 86836 }, { "epoch": 2.89, "grad_norm": 0.7206984162330627, "learning_rate": 2.021622456899641e-06, "loss": 1.697, "step": 86837 }, { "epoch": 2.89, "grad_norm": 0.7078829407691956, "learning_rate": 2.020411270708766e-06, "loss": 1.722, "step": 86838 }, { "epoch": 2.89, "grad_norm": 0.7233463525772095, "learning_rate": 2.0192004462212584e-06, "loss": 1.6939, "step": 86839 }, { "epoch": 2.89, "grad_norm": 0.7194907665252686, "learning_rate": 2.017989983438517e-06, "loss": 1.7289, "step": 86840 }, { "epoch": 2.89, "grad_norm": 0.687492311000824, "learning_rate": 2.0167798823620405e-06, "loss": 1.6581, "step": 86841 }, { "epoch": 2.89, "grad_norm": 0.7183266878128052, "learning_rate": 2.0155701429932615e-06, "loss": 1.6615, "step": 86842 }, { "epoch": 2.89, "grad_norm": 0.7204580903053284, "learning_rate": 2.014360765333711e-06, "loss": 1.6405, "step": 86843 }, { "epoch": 2.89, "grad_norm": 0.7023001313209534, "learning_rate": 2.0131517493848226e-06, "loss": 1.6772, "step": 86844 }, { "epoch": 2.89, "grad_norm": 0.721407413482666, "learning_rate": 2.0119430951480274e-06, "loss": 1.6973, "step": 86845 }, { "epoch": 2.89, "grad_norm": 0.7373275756835938, "learning_rate": 2.0107348026248248e-06, "loss": 1.6523, "step": 86846 }, { "epoch": 2.89, "grad_norm": 0.6992021203041077, "learning_rate": 2.0095268718167136e-06, "loss": 1.6496, "step": 86847 }, { "epoch": 2.89, "grad_norm": 0.7090148329734802, "learning_rate": 2.0083193027251252e-06, "loss": 1.626, "step": 86848 }, { "epoch": 2.89, "grad_norm": 0.7202072143554688, "learning_rate": 2.0071120953515265e-06, "loss": 1.6836, "step": 86849 }, { "epoch": 2.89, "grad_norm": 0.7254557013511658, "learning_rate": 2.0059052496973483e-06, "loss": 1.6356, "step": 86850 }, { "epoch": 2.89, "grad_norm": 0.7003669142723083, "learning_rate": 2.004698765764157e-06, "loss": 1.6668, "step": 86851 }, { "epoch": 2.89, "grad_norm": 0.6873292326927185, "learning_rate": 2.003492643553317e-06, "loss": 1.6373, "step": 86852 }, { "epoch": 2.89, "grad_norm": 0.6975076198577881, "learning_rate": 2.0022868830663284e-06, "loss": 1.707, "step": 86853 }, { "epoch": 2.89, "grad_norm": 0.7322094440460205, "learning_rate": 2.001081484304656e-06, "loss": 1.6331, "step": 86854 }, { "epoch": 2.89, "grad_norm": 0.7047854661941528, "learning_rate": 1.9998764472697994e-06, "loss": 1.7057, "step": 86855 }, { "epoch": 2.89, "grad_norm": 0.7249500751495361, "learning_rate": 1.998671771963123e-06, "loss": 1.6782, "step": 86856 }, { "epoch": 2.89, "grad_norm": 0.7379369735717773, "learning_rate": 1.9974674583861927e-06, "loss": 1.7289, "step": 86857 }, { "epoch": 2.89, "grad_norm": 0.7313603162765503, "learning_rate": 1.9962635065404407e-06, "loss": 1.6699, "step": 86858 }, { "epoch": 2.89, "grad_norm": 0.702923595905304, "learning_rate": 1.9950599164272663e-06, "loss": 1.7188, "step": 86859 }, { "epoch": 2.89, "grad_norm": 0.6947427988052368, "learning_rate": 1.993856688048201e-06, "loss": 1.7262, "step": 86860 }, { "epoch": 2.89, "grad_norm": 0.699198305606842, "learning_rate": 1.9926538214047107e-06, "loss": 1.6345, "step": 86861 }, { "epoch": 2.89, "grad_norm": 0.7328593134880066, "learning_rate": 1.9914513164982272e-06, "loss": 1.7359, "step": 86862 }, { "epoch": 2.89, "grad_norm": 0.699825644493103, "learning_rate": 1.9902491733301827e-06, "loss": 1.664, "step": 86863 }, { "epoch": 2.89, "grad_norm": 0.7410964965820312, "learning_rate": 1.9890473919020765e-06, "loss": 1.6986, "step": 86864 }, { "epoch": 2.89, "grad_norm": 0.7084566354751587, "learning_rate": 1.987845972215374e-06, "loss": 1.6388, "step": 86865 }, { "epoch": 2.89, "grad_norm": 0.7407596707344055, "learning_rate": 1.9866449142714735e-06, "loss": 1.745, "step": 86866 }, { "epoch": 2.89, "grad_norm": 0.6994770169258118, "learning_rate": 1.9854442180719078e-06, "loss": 1.6405, "step": 86867 }, { "epoch": 2.89, "grad_norm": 0.7222241163253784, "learning_rate": 1.9842438836181084e-06, "loss": 1.7347, "step": 86868 }, { "epoch": 2.89, "grad_norm": 0.7141907215118408, "learning_rate": 1.983043910911508e-06, "loss": 1.6661, "step": 86869 }, { "epoch": 2.89, "grad_norm": 0.7095251679420471, "learning_rate": 1.9818442999536055e-06, "loss": 1.654, "step": 86870 }, { "epoch": 2.89, "grad_norm": 0.7173052430152893, "learning_rate": 1.9806450507457993e-06, "loss": 1.6288, "step": 86871 }, { "epoch": 2.89, "grad_norm": 0.6995502710342407, "learning_rate": 1.979446163289622e-06, "loss": 1.6259, "step": 86872 }, { "epoch": 2.89, "grad_norm": 0.6910722255706787, "learning_rate": 1.9782476375864385e-06, "loss": 1.7571, "step": 86873 }, { "epoch": 2.89, "grad_norm": 0.7170472741127014, "learning_rate": 1.9770494736377816e-06, "loss": 1.7011, "step": 86874 }, { "epoch": 2.89, "grad_norm": 0.7224447131156921, "learning_rate": 1.97585167144505e-06, "loss": 1.6467, "step": 86875 }, { "epoch": 2.89, "grad_norm": 0.7241698503494263, "learning_rate": 1.9746542310097756e-06, "loss": 1.6611, "step": 86876 }, { "epoch": 2.89, "grad_norm": 0.707691490650177, "learning_rate": 1.9734571523333242e-06, "loss": 1.6199, "step": 86877 }, { "epoch": 2.89, "grad_norm": 0.7416242361068726, "learning_rate": 1.972260435417161e-06, "loss": 1.6405, "step": 86878 }, { "epoch": 2.89, "grad_norm": 0.7105342149734497, "learning_rate": 1.9710640802627854e-06, "loss": 1.6761, "step": 86879 }, { "epoch": 2.89, "grad_norm": 0.710500180721283, "learning_rate": 1.9698680868716287e-06, "loss": 1.6254, "step": 86880 }, { "epoch": 2.89, "grad_norm": 0.738923966884613, "learning_rate": 1.9686724552451237e-06, "loss": 1.6386, "step": 86881 }, { "epoch": 2.89, "grad_norm": 0.6932250261306763, "learning_rate": 1.967477185384736e-06, "loss": 1.7463, "step": 86882 }, { "epoch": 2.89, "grad_norm": 0.7052393555641174, "learning_rate": 1.966282277291931e-06, "loss": 1.7049, "step": 86883 }, { "epoch": 2.89, "grad_norm": 0.7255229353904724, "learning_rate": 1.9650877309681733e-06, "loss": 1.7221, "step": 86884 }, { "epoch": 2.89, "grad_norm": 0.716594934463501, "learning_rate": 1.9638935464148298e-06, "loss": 1.6424, "step": 86885 }, { "epoch": 2.89, "grad_norm": 0.7266459465026855, "learning_rate": 1.9626997236334653e-06, "loss": 1.7246, "step": 86886 }, { "epoch": 2.89, "grad_norm": 0.7164273262023926, "learning_rate": 1.9615062626254117e-06, "loss": 1.7038, "step": 86887 }, { "epoch": 2.89, "grad_norm": 0.692433774471283, "learning_rate": 1.960313163392202e-06, "loss": 1.6746, "step": 86888 }, { "epoch": 2.89, "grad_norm": 0.7157347798347473, "learning_rate": 1.959120425935268e-06, "loss": 1.685, "step": 86889 }, { "epoch": 2.89, "grad_norm": 0.7102046012878418, "learning_rate": 1.9579280502560414e-06, "loss": 1.6183, "step": 86890 }, { "epoch": 2.89, "grad_norm": 0.7306455969810486, "learning_rate": 1.9567360363559547e-06, "loss": 1.7058, "step": 86891 }, { "epoch": 2.89, "grad_norm": 0.743619441986084, "learning_rate": 1.955544384236507e-06, "loss": 1.6386, "step": 86892 }, { "epoch": 2.89, "grad_norm": 0.6822600364685059, "learning_rate": 1.954353093899097e-06, "loss": 1.6223, "step": 86893 }, { "epoch": 2.89, "grad_norm": 0.7117613554000854, "learning_rate": 1.95316216534519e-06, "loss": 1.6515, "step": 86894 }, { "epoch": 2.89, "grad_norm": 0.7002654671669006, "learning_rate": 1.9519715985762185e-06, "loss": 1.6098, "step": 86895 }, { "epoch": 2.89, "grad_norm": 0.7149968147277832, "learning_rate": 1.950781393593648e-06, "loss": 1.6408, "step": 86896 }, { "epoch": 2.89, "grad_norm": 0.7207584381103516, "learning_rate": 1.94959155039891e-06, "loss": 1.6867, "step": 86897 }, { "epoch": 2.89, "grad_norm": 0.7532623410224915, "learning_rate": 1.948402068993471e-06, "loss": 1.6406, "step": 86898 }, { "epoch": 2.89, "grad_norm": 0.7103479504585266, "learning_rate": 1.9472129493787292e-06, "loss": 1.5955, "step": 86899 }, { "epoch": 2.89, "grad_norm": 0.7235873341560364, "learning_rate": 1.9460241915561837e-06, "loss": 1.6384, "step": 86900 }, { "epoch": 2.89, "grad_norm": 0.7429914474487305, "learning_rate": 1.944835795527233e-06, "loss": 1.7289, "step": 86901 }, { "epoch": 2.89, "grad_norm": 0.6835757493972778, "learning_rate": 1.94364776129331e-06, "loss": 1.6909, "step": 86902 }, { "epoch": 2.89, "grad_norm": 0.7386698126792908, "learning_rate": 1.942460088855913e-06, "loss": 1.674, "step": 86903 }, { "epoch": 2.89, "grad_norm": 0.7050136923789978, "learning_rate": 1.9412727782164407e-06, "loss": 1.752, "step": 86904 }, { "epoch": 2.89, "grad_norm": 0.740638017654419, "learning_rate": 1.9400858293763588e-06, "loss": 1.7697, "step": 86905 }, { "epoch": 2.89, "grad_norm": 0.7362223863601685, "learning_rate": 1.9388992423370664e-06, "loss": 1.6324, "step": 86906 }, { "epoch": 2.89, "grad_norm": 0.7051526308059692, "learning_rate": 1.937713017100062e-06, "loss": 1.703, "step": 86907 }, { "epoch": 2.89, "grad_norm": 0.6936110854148865, "learning_rate": 1.936527153666778e-06, "loss": 1.6432, "step": 86908 }, { "epoch": 2.89, "grad_norm": 0.7100581526756287, "learning_rate": 1.93534165203858e-06, "loss": 1.6553, "step": 86909 }, { "epoch": 2.89, "grad_norm": 0.6953029036521912, "learning_rate": 1.9341565122169665e-06, "loss": 1.6452, "step": 86910 }, { "epoch": 2.89, "grad_norm": 0.7203551530838013, "learning_rate": 1.9329717342034037e-06, "loss": 1.715, "step": 86911 }, { "epoch": 2.89, "grad_norm": 0.7187741994857788, "learning_rate": 1.931787317999289e-06, "loss": 1.6687, "step": 86912 }, { "epoch": 2.89, "grad_norm": 0.694844663143158, "learning_rate": 1.930603263606023e-06, "loss": 1.6854, "step": 86913 }, { "epoch": 2.89, "grad_norm": 0.7023638486862183, "learning_rate": 1.929419571025137e-06, "loss": 1.6679, "step": 86914 }, { "epoch": 2.89, "grad_norm": 0.6911448836326599, "learning_rate": 1.9282362402579967e-06, "loss": 1.6735, "step": 86915 }, { "epoch": 2.89, "grad_norm": 0.7162976264953613, "learning_rate": 1.927053271306067e-06, "loss": 1.6398, "step": 86916 }, { "epoch": 2.89, "grad_norm": 0.7178483605384827, "learning_rate": 1.925870664170748e-06, "loss": 1.6647, "step": 86917 }, { "epoch": 2.89, "grad_norm": 0.7282514572143555, "learning_rate": 1.924688418853537e-06, "loss": 1.694, "step": 86918 }, { "epoch": 2.89, "grad_norm": 0.7060625553131104, "learning_rate": 1.9235065353558343e-06, "loss": 1.5165, "step": 86919 }, { "epoch": 2.89, "grad_norm": 0.726751983165741, "learning_rate": 1.922325013679038e-06, "loss": 1.7245, "step": 86920 }, { "epoch": 2.89, "grad_norm": 0.7136314511299133, "learning_rate": 1.921143853824647e-06, "loss": 1.6712, "step": 86921 }, { "epoch": 2.89, "grad_norm": 0.7448207139968872, "learning_rate": 1.9199630557940605e-06, "loss": 1.748, "step": 86922 }, { "epoch": 2.89, "grad_norm": 0.6907296180725098, "learning_rate": 1.9187826195887433e-06, "loss": 1.6931, "step": 86923 }, { "epoch": 2.89, "grad_norm": 0.7003848552703857, "learning_rate": 1.917602545210062e-06, "loss": 1.6677, "step": 86924 }, { "epoch": 2.89, "grad_norm": 0.7040340304374695, "learning_rate": 1.9164228326595145e-06, "loss": 1.625, "step": 86925 }, { "epoch": 2.89, "grad_norm": 0.7175061702728271, "learning_rate": 1.9152434819384998e-06, "loss": 1.6862, "step": 86926 }, { "epoch": 2.89, "grad_norm": 0.7197614312171936, "learning_rate": 1.914064493048484e-06, "loss": 1.6261, "step": 86927 }, { "epoch": 2.89, "grad_norm": 0.7102797031402588, "learning_rate": 1.912885865990832e-06, "loss": 1.6244, "step": 86928 }, { "epoch": 2.89, "grad_norm": 0.7028360366821289, "learning_rate": 1.911707600767043e-06, "loss": 1.7, "step": 86929 }, { "epoch": 2.89, "grad_norm": 0.7485870122909546, "learning_rate": 1.9105296973785155e-06, "loss": 1.6768, "step": 86930 }, { "epoch": 2.89, "grad_norm": 0.7377358078956604, "learning_rate": 1.909352155826682e-06, "loss": 1.7247, "step": 86931 }, { "epoch": 2.89, "grad_norm": 0.7175230383872986, "learning_rate": 1.9081749761129748e-06, "loss": 1.6809, "step": 86932 }, { "epoch": 2.89, "grad_norm": 0.7135884165763855, "learning_rate": 1.906998158238826e-06, "loss": 1.6548, "step": 86933 }, { "epoch": 2.89, "grad_norm": 0.7125316262245178, "learning_rate": 1.9058217022056676e-06, "loss": 1.5776, "step": 86934 }, { "epoch": 2.89, "grad_norm": 0.7016177773475647, "learning_rate": 1.9046456080148987e-06, "loss": 1.7301, "step": 86935 }, { "epoch": 2.89, "grad_norm": 0.7199211716651917, "learning_rate": 1.9034698756679845e-06, "loss": 1.6791, "step": 86936 }, { "epoch": 2.89, "grad_norm": 0.7146279811859131, "learning_rate": 1.9022945051662908e-06, "loss": 1.7306, "step": 86937 }, { "epoch": 2.89, "grad_norm": 0.7125699520111084, "learning_rate": 1.9011194965113497e-06, "loss": 1.7071, "step": 86938 }, { "epoch": 2.89, "grad_norm": 0.7105122208595276, "learning_rate": 1.8999448497044934e-06, "loss": 1.6625, "step": 86939 }, { "epoch": 2.89, "grad_norm": 0.7296292185783386, "learning_rate": 1.8987705647471873e-06, "loss": 1.5812, "step": 86940 }, { "epoch": 2.89, "grad_norm": 0.9713767766952515, "learning_rate": 1.897596641640864e-06, "loss": 1.6162, "step": 86941 }, { "epoch": 2.89, "grad_norm": 0.7323529720306396, "learning_rate": 1.8964230803869217e-06, "loss": 1.7324, "step": 86942 }, { "epoch": 2.89, "grad_norm": 0.7221137881278992, "learning_rate": 1.8952498809867934e-06, "loss": 1.7063, "step": 86943 }, { "epoch": 2.89, "grad_norm": 0.7028900980949402, "learning_rate": 1.8940770434419105e-06, "loss": 1.717, "step": 86944 }, { "epoch": 2.89, "grad_norm": 0.7238753437995911, "learning_rate": 1.8929045677537059e-06, "loss": 1.6758, "step": 86945 }, { "epoch": 2.89, "grad_norm": 0.7142861485481262, "learning_rate": 1.891732453923578e-06, "loss": 1.6984, "step": 86946 }, { "epoch": 2.89, "grad_norm": 0.7201676368713379, "learning_rate": 1.8905607019529589e-06, "loss": 1.6003, "step": 86947 }, { "epoch": 2.89, "grad_norm": 0.7016129493713379, "learning_rate": 1.8893893118432812e-06, "loss": 1.66, "step": 86948 }, { "epoch": 2.89, "grad_norm": 0.7159241437911987, "learning_rate": 1.888218283595977e-06, "loss": 1.658, "step": 86949 }, { "epoch": 2.89, "grad_norm": 0.7047913074493408, "learning_rate": 1.887047617212445e-06, "loss": 1.6797, "step": 86950 }, { "epoch": 2.89, "grad_norm": 0.715522825717926, "learning_rate": 1.885877312694084e-06, "loss": 1.6444, "step": 86951 }, { "epoch": 2.89, "grad_norm": 0.7130834460258484, "learning_rate": 1.8847073700423599e-06, "loss": 1.6758, "step": 86952 }, { "epoch": 2.89, "grad_norm": 0.7300429940223694, "learning_rate": 1.8835377892586712e-06, "loss": 1.6398, "step": 86953 }, { "epoch": 2.89, "grad_norm": 0.7088781595230103, "learning_rate": 1.8823685703444835e-06, "loss": 1.6165, "step": 86954 }, { "epoch": 2.89, "grad_norm": 0.7107840180397034, "learning_rate": 1.8811997133011291e-06, "loss": 1.6936, "step": 86955 }, { "epoch": 2.89, "grad_norm": 0.7391632795333862, "learning_rate": 1.8800312181300737e-06, "loss": 1.6617, "step": 86956 }, { "epoch": 2.89, "grad_norm": 0.7122860550880432, "learning_rate": 1.878863084832749e-06, "loss": 1.7061, "step": 86957 }, { "epoch": 2.89, "grad_norm": 0.7518372535705566, "learning_rate": 1.8776953134105544e-06, "loss": 1.7621, "step": 86958 }, { "epoch": 2.89, "grad_norm": 0.7176523804664612, "learning_rate": 1.8765279038649217e-06, "loss": 1.6957, "step": 86959 }, { "epoch": 2.89, "grad_norm": 0.7098841071128845, "learning_rate": 1.8753608561972168e-06, "loss": 1.6954, "step": 86960 }, { "epoch": 2.89, "grad_norm": 0.7013289332389832, "learning_rate": 1.8741941704089715e-06, "loss": 1.6427, "step": 86961 }, { "epoch": 2.89, "grad_norm": 0.7070513367652893, "learning_rate": 1.8730278465014515e-06, "loss": 1.6541, "step": 86962 }, { "epoch": 2.89, "grad_norm": 0.7097174525260925, "learning_rate": 1.871861884476189e-06, "loss": 1.6729, "step": 86963 }, { "epoch": 2.89, "grad_norm": 0.6999095678329468, "learning_rate": 1.8706962843345496e-06, "loss": 1.6195, "step": 86964 }, { "epoch": 2.89, "grad_norm": 0.7118659615516663, "learning_rate": 1.8695310460779656e-06, "loss": 1.6732, "step": 86965 }, { "epoch": 2.89, "grad_norm": 0.6883590817451477, "learning_rate": 1.8683661697078355e-06, "loss": 1.6904, "step": 86966 }, { "epoch": 2.89, "grad_norm": 0.7018361687660217, "learning_rate": 1.8672016552255586e-06, "loss": 1.6584, "step": 86967 }, { "epoch": 2.89, "grad_norm": 0.6859732866287231, "learning_rate": 1.8660375026326335e-06, "loss": 1.5934, "step": 86968 }, { "epoch": 2.89, "grad_norm": 0.6948935985565186, "learning_rate": 1.8648737119303591e-06, "loss": 1.7144, "step": 86969 }, { "epoch": 2.89, "grad_norm": 0.7145975828170776, "learning_rate": 1.8637102831202011e-06, "loss": 1.5745, "step": 86970 }, { "epoch": 2.89, "grad_norm": 0.7059310674667358, "learning_rate": 1.8625472162035915e-06, "loss": 1.7611, "step": 86971 }, { "epoch": 2.89, "grad_norm": 0.7463650703430176, "learning_rate": 1.8613845111819292e-06, "loss": 1.7378, "step": 86972 }, { "epoch": 2.89, "grad_norm": 0.742457389831543, "learning_rate": 1.8602221680565798e-06, "loss": 1.6643, "step": 86973 }, { "epoch": 2.89, "grad_norm": 0.6936466693878174, "learning_rate": 1.8590601868290423e-06, "loss": 1.6171, "step": 86974 }, { "epoch": 2.89, "grad_norm": 0.7078041434288025, "learning_rate": 1.8578985675006486e-06, "loss": 1.7247, "step": 86975 }, { "epoch": 2.89, "grad_norm": 0.746373176574707, "learning_rate": 1.856737310072831e-06, "loss": 1.7026, "step": 86976 }, { "epoch": 2.89, "grad_norm": 0.6982853412628174, "learning_rate": 1.8555764145469887e-06, "loss": 1.7096, "step": 86977 }, { "epoch": 2.89, "grad_norm": 0.7020034790039062, "learning_rate": 1.8544158809245868e-06, "loss": 1.7092, "step": 86978 }, { "epoch": 2.89, "grad_norm": 0.7165846228599548, "learning_rate": 1.8532557092069577e-06, "loss": 1.6418, "step": 86979 }, { "epoch": 2.89, "grad_norm": 0.705174446105957, "learning_rate": 1.852095899395567e-06, "loss": 1.659, "step": 86980 }, { "epoch": 2.89, "grad_norm": 0.7054978013038635, "learning_rate": 1.8509364514918135e-06, "loss": 1.7461, "step": 86981 }, { "epoch": 2.89, "grad_norm": 0.7207755446434021, "learning_rate": 1.8497773654970627e-06, "loss": 1.7004, "step": 86982 }, { "epoch": 2.89, "grad_norm": 0.7285929322242737, "learning_rate": 1.84861864141278e-06, "loss": 1.7929, "step": 86983 }, { "epoch": 2.89, "grad_norm": 0.7288230657577515, "learning_rate": 1.8474602792403315e-06, "loss": 1.7183, "step": 86984 }, { "epoch": 2.89, "grad_norm": 0.7194995284080505, "learning_rate": 1.8463022789811155e-06, "loss": 1.6891, "step": 86985 }, { "epoch": 2.89, "grad_norm": 0.7130406498908997, "learning_rate": 1.8451446406365976e-06, "loss": 1.6163, "step": 86986 }, { "epoch": 2.89, "grad_norm": 0.7311615347862244, "learning_rate": 1.8439873642081104e-06, "loss": 1.6561, "step": 86987 }, { "epoch": 2.89, "grad_norm": 0.7216118574142456, "learning_rate": 1.8428304496971191e-06, "loss": 1.6468, "step": 86988 }, { "epoch": 2.89, "grad_norm": 0.6923026442527771, "learning_rate": 1.8416738971049893e-06, "loss": 1.6215, "step": 86989 }, { "epoch": 2.89, "grad_norm": 0.7312139868736267, "learning_rate": 1.84051770643312e-06, "loss": 1.6765, "step": 86990 }, { "epoch": 2.89, "grad_norm": 0.7272156476974487, "learning_rate": 1.8393618776829434e-06, "loss": 1.701, "step": 86991 }, { "epoch": 2.89, "grad_norm": 0.7504847049713135, "learning_rate": 1.838206410855858e-06, "loss": 1.6902, "step": 86992 }, { "epoch": 2.89, "grad_norm": 0.7129701375961304, "learning_rate": 1.83705130595323e-06, "loss": 1.7482, "step": 86993 }, { "epoch": 2.89, "grad_norm": 0.7191038727760315, "learning_rate": 1.8358965629765243e-06, "loss": 1.7058, "step": 86994 }, { "epoch": 2.89, "grad_norm": 0.7288554310798645, "learning_rate": 1.8347421819270735e-06, "loss": 1.6159, "step": 86995 }, { "epoch": 2.89, "grad_norm": 0.7161875367164612, "learning_rate": 1.8335881628063431e-06, "loss": 1.7191, "step": 86996 }, { "epoch": 2.89, "grad_norm": 0.7438691258430481, "learning_rate": 1.8324345056156987e-06, "loss": 1.6871, "step": 86997 }, { "epoch": 2.89, "grad_norm": 0.7068687081336975, "learning_rate": 1.8312812103565388e-06, "loss": 1.6693, "step": 86998 }, { "epoch": 2.89, "grad_norm": 0.7186086773872375, "learning_rate": 1.8301282770302628e-06, "loss": 1.7196, "step": 86999 }, { "epoch": 2.89, "grad_norm": 0.711487352848053, "learning_rate": 1.8289757056382692e-06, "loss": 1.6578, "step": 87000 }, { "epoch": 2.89, "grad_norm": 0.6793249249458313, "learning_rate": 1.8278234961819903e-06, "loss": 1.7171, "step": 87001 }, { "epoch": 2.89, "grad_norm": 0.7358523607254028, "learning_rate": 1.826671648662792e-06, "loss": 1.6806, "step": 87002 }, { "epoch": 2.89, "grad_norm": 0.7571550607681274, "learning_rate": 1.825520163082106e-06, "loss": 1.6637, "step": 87003 }, { "epoch": 2.89, "grad_norm": 0.7068659067153931, "learning_rate": 1.8243690394412647e-06, "loss": 1.7169, "step": 87004 }, { "epoch": 2.89, "grad_norm": 0.7534132599830627, "learning_rate": 1.8232182777417005e-06, "loss": 1.6837, "step": 87005 }, { "epoch": 2.89, "grad_norm": 0.7338154315948486, "learning_rate": 1.8220678779848451e-06, "loss": 1.6282, "step": 87006 }, { "epoch": 2.89, "grad_norm": 0.7277446389198303, "learning_rate": 1.8209178401720647e-06, "loss": 1.6622, "step": 87007 }, { "epoch": 2.89, "grad_norm": 0.709118664264679, "learning_rate": 1.8197681643047579e-06, "loss": 1.6509, "step": 87008 }, { "epoch": 2.89, "grad_norm": 0.6980403661727905, "learning_rate": 1.8186188503842902e-06, "loss": 1.6315, "step": 87009 }, { "epoch": 2.89, "grad_norm": 0.7365996837615967, "learning_rate": 1.8174698984120938e-06, "loss": 1.7027, "step": 87010 }, { "epoch": 2.89, "grad_norm": 0.701797604560852, "learning_rate": 1.8163213083895677e-06, "loss": 1.668, "step": 87011 }, { "epoch": 2.89, "grad_norm": 0.7169272303581238, "learning_rate": 1.8151730803180776e-06, "loss": 1.692, "step": 87012 }, { "epoch": 2.89, "grad_norm": 0.7190538644790649, "learning_rate": 1.814025214199022e-06, "loss": 1.6849, "step": 87013 }, { "epoch": 2.89, "grad_norm": 0.6962148547172546, "learning_rate": 1.8128777100338331e-06, "loss": 1.6207, "step": 87014 }, { "epoch": 2.89, "grad_norm": 0.7019088864326477, "learning_rate": 1.8117305678238437e-06, "loss": 1.8178, "step": 87015 }, { "epoch": 2.9, "grad_norm": 0.6959017515182495, "learning_rate": 1.8105837875705186e-06, "loss": 1.6626, "step": 87016 }, { "epoch": 2.9, "grad_norm": 0.7432883381843567, "learning_rate": 1.8094373692751908e-06, "loss": 1.6919, "step": 87017 }, { "epoch": 2.9, "grad_norm": 0.7157779932022095, "learning_rate": 1.8082913129392585e-06, "loss": 1.6549, "step": 87018 }, { "epoch": 2.9, "grad_norm": 0.7007668614387512, "learning_rate": 1.8071456185641209e-06, "loss": 1.7029, "step": 87019 }, { "epoch": 2.9, "grad_norm": 0.7200929522514343, "learning_rate": 1.8060002861511769e-06, "loss": 1.6763, "step": 87020 }, { "epoch": 2.9, "grad_norm": 0.7378020286560059, "learning_rate": 1.8048553157018253e-06, "loss": 1.6807, "step": 87021 }, { "epoch": 2.9, "grad_norm": 0.7124655246734619, "learning_rate": 1.8037107072174316e-06, "loss": 1.7511, "step": 87022 }, { "epoch": 2.9, "grad_norm": 0.7142220139503479, "learning_rate": 1.802566460699395e-06, "loss": 1.6689, "step": 87023 }, { "epoch": 2.9, "grad_norm": 0.7483083605766296, "learning_rate": 1.8014225761491142e-06, "loss": 1.7068, "step": 87024 }, { "epoch": 2.9, "grad_norm": 0.7105426788330078, "learning_rate": 1.8002790535679545e-06, "loss": 1.6775, "step": 87025 }, { "epoch": 2.9, "grad_norm": 0.7212274670600891, "learning_rate": 1.7991358929573152e-06, "loss": 1.6879, "step": 87026 }, { "epoch": 2.9, "grad_norm": 0.702706515789032, "learning_rate": 1.797993094318595e-06, "loss": 1.6715, "step": 87027 }, { "epoch": 2.9, "grad_norm": 0.7174346446990967, "learning_rate": 1.7968506576531928e-06, "loss": 1.7419, "step": 87028 }, { "epoch": 2.9, "grad_norm": 0.7169245481491089, "learning_rate": 1.7957085829624408e-06, "loss": 1.7212, "step": 87029 }, { "epoch": 2.9, "grad_norm": 0.7491458058357239, "learning_rate": 1.7945668702477711e-06, "loss": 1.7512, "step": 87030 }, { "epoch": 2.9, "grad_norm": 0.7211005091667175, "learning_rate": 1.793425519510583e-06, "loss": 1.702, "step": 87031 }, { "epoch": 2.9, "grad_norm": 0.697439968585968, "learning_rate": 1.7922845307522082e-06, "loss": 1.5953, "step": 87032 }, { "epoch": 2.9, "grad_norm": 0.7220268845558167, "learning_rate": 1.7911439039740794e-06, "loss": 1.7245, "step": 87033 }, { "epoch": 2.9, "grad_norm": 0.699127733707428, "learning_rate": 1.7900036391775286e-06, "loss": 1.7107, "step": 87034 }, { "epoch": 2.9, "grad_norm": 0.7108253836631775, "learning_rate": 1.7888637363640212e-06, "loss": 1.6521, "step": 87035 }, { "epoch": 2.9, "grad_norm": 0.7218320369720459, "learning_rate": 1.7877241955348897e-06, "loss": 1.6988, "step": 87036 }, { "epoch": 2.9, "grad_norm": 0.7322519421577454, "learning_rate": 1.7865850166914997e-06, "loss": 1.7136, "step": 87037 }, { "epoch": 2.9, "grad_norm": 0.7282550930976868, "learning_rate": 1.7854461998352497e-06, "loss": 1.7049, "step": 87038 }, { "epoch": 2.9, "grad_norm": 0.7178974151611328, "learning_rate": 1.7843077449675724e-06, "loss": 1.6761, "step": 87039 }, { "epoch": 2.9, "grad_norm": 0.7207310795783997, "learning_rate": 1.7831696520897664e-06, "loss": 1.6357, "step": 87040 }, { "epoch": 2.9, "grad_norm": 0.7418516874313354, "learning_rate": 1.782031921203264e-06, "loss": 1.6823, "step": 87041 }, { "epoch": 2.9, "grad_norm": 0.7147839665412903, "learning_rate": 1.7808945523093975e-06, "loss": 1.7025, "step": 87042 }, { "epoch": 2.9, "grad_norm": 0.7121623158454895, "learning_rate": 1.7797575454096325e-06, "loss": 1.6116, "step": 87043 }, { "epoch": 2.9, "grad_norm": 0.715747058391571, "learning_rate": 1.7786209005052677e-06, "loss": 1.7051, "step": 87044 }, { "epoch": 2.9, "grad_norm": 0.7305224537849426, "learning_rate": 1.7774846175977686e-06, "loss": 1.6832, "step": 87045 }, { "epoch": 2.9, "grad_norm": 0.7306429147720337, "learning_rate": 1.7763486966884344e-06, "loss": 1.6076, "step": 87046 }, { "epoch": 2.9, "grad_norm": 0.6896217465400696, "learning_rate": 1.7752131377786305e-06, "loss": 1.5894, "step": 87047 }, { "epoch": 2.9, "grad_norm": 0.6991376280784607, "learning_rate": 1.7740779408698225e-06, "loss": 1.7862, "step": 87048 }, { "epoch": 2.9, "grad_norm": 0.7156060934066772, "learning_rate": 1.7729431059633426e-06, "loss": 1.5833, "step": 87049 }, { "epoch": 2.9, "grad_norm": 0.7438567876815796, "learning_rate": 1.7718086330605563e-06, "loss": 1.6408, "step": 87050 }, { "epoch": 2.9, "grad_norm": 0.7053043246269226, "learning_rate": 1.7706745221628626e-06, "loss": 1.611, "step": 87051 }, { "epoch": 2.9, "grad_norm": 0.7397882342338562, "learning_rate": 1.7695407732715939e-06, "loss": 1.718, "step": 87052 }, { "epoch": 2.9, "grad_norm": 0.7568322420120239, "learning_rate": 1.768407386388182e-06, "loss": 1.6459, "step": 87053 }, { "epoch": 2.9, "grad_norm": 0.6930181980133057, "learning_rate": 1.7672743615139928e-06, "loss": 1.692, "step": 87054 }, { "epoch": 2.9, "grad_norm": 0.7447502613067627, "learning_rate": 1.7661416986503917e-06, "loss": 1.6959, "step": 87055 }, { "epoch": 2.9, "grad_norm": 0.7240067720413208, "learning_rate": 1.7650093977987446e-06, "loss": 1.6717, "step": 87056 }, { "epoch": 2.9, "grad_norm": 0.7347863912582397, "learning_rate": 1.7638774589604165e-06, "loss": 1.6263, "step": 87057 }, { "epoch": 2.9, "grad_norm": 0.7200778126716614, "learning_rate": 1.7627458821368067e-06, "loss": 1.7101, "step": 87058 }, { "epoch": 2.9, "grad_norm": 0.7002933621406555, "learning_rate": 1.7616146673292808e-06, "loss": 1.7034, "step": 87059 }, { "epoch": 2.9, "grad_norm": 0.7433010339736938, "learning_rate": 1.7604838145392375e-06, "loss": 1.7121, "step": 87060 }, { "epoch": 2.9, "grad_norm": 0.7239660620689392, "learning_rate": 1.759353323768009e-06, "loss": 1.7104, "step": 87061 }, { "epoch": 2.9, "grad_norm": 0.7087541818618774, "learning_rate": 1.7582231950169611e-06, "loss": 1.7182, "step": 87062 }, { "epoch": 2.9, "grad_norm": 0.719411313533783, "learning_rate": 1.7570934282874927e-06, "loss": 1.7228, "step": 87063 }, { "epoch": 2.9, "grad_norm": 0.7363374829292297, "learning_rate": 1.7559640235809691e-06, "loss": 1.6649, "step": 87064 }, { "epoch": 2.9, "grad_norm": 0.7061141729354858, "learning_rate": 1.7548349808987894e-06, "loss": 1.7266, "step": 87065 }, { "epoch": 2.9, "grad_norm": 0.7064626216888428, "learning_rate": 1.7537063002422525e-06, "loss": 1.6862, "step": 87066 }, { "epoch": 2.9, "grad_norm": 0.7304026484489441, "learning_rate": 1.752577981612824e-06, "loss": 1.6659, "step": 87067 }, { "epoch": 2.9, "grad_norm": 0.7176229953765869, "learning_rate": 1.751450025011769e-06, "loss": 1.7181, "step": 87068 }, { "epoch": 2.9, "grad_norm": 0.695002555847168, "learning_rate": 1.7503224304405205e-06, "loss": 1.6186, "step": 87069 }, { "epoch": 2.9, "grad_norm": 0.7121684551239014, "learning_rate": 1.7491951979004436e-06, "loss": 1.6476, "step": 87070 }, { "epoch": 2.9, "grad_norm": 0.6954081654548645, "learning_rate": 1.7480683273929376e-06, "loss": 1.6684, "step": 87071 }, { "epoch": 2.9, "grad_norm": 0.7327346205711365, "learning_rate": 1.7469418189192675e-06, "loss": 1.6198, "step": 87072 }, { "epoch": 2.9, "grad_norm": 0.7311503887176514, "learning_rate": 1.7458156724808991e-06, "loss": 1.7217, "step": 87073 }, { "epoch": 2.9, "grad_norm": 0.7287306189537048, "learning_rate": 1.744689888079165e-06, "loss": 1.6357, "step": 87074 }, { "epoch": 2.9, "grad_norm": 0.7197616100311279, "learning_rate": 1.7435644657154302e-06, "loss": 1.7296, "step": 87075 }, { "epoch": 2.9, "grad_norm": 0.7068798542022705, "learning_rate": 1.7424394053910274e-06, "loss": 1.6932, "step": 87076 }, { "epoch": 2.9, "grad_norm": 0.7003357410430908, "learning_rate": 1.7413147071073886e-06, "loss": 1.6325, "step": 87077 }, { "epoch": 2.9, "grad_norm": 0.7315394282341003, "learning_rate": 1.740190370865846e-06, "loss": 1.7254, "step": 87078 }, { "epoch": 2.9, "grad_norm": 0.703731119632721, "learning_rate": 1.7390663966677653e-06, "loss": 1.6744, "step": 87079 }, { "epoch": 2.9, "grad_norm": 0.7125867009162903, "learning_rate": 1.7379427845145123e-06, "loss": 1.5781, "step": 87080 }, { "epoch": 2.9, "grad_norm": 0.7358243465423584, "learning_rate": 1.736819534407452e-06, "loss": 1.6846, "step": 87081 }, { "epoch": 2.9, "grad_norm": 0.7101327776908875, "learning_rate": 1.7356966463479504e-06, "loss": 1.7033, "step": 87082 }, { "epoch": 2.9, "grad_norm": 0.7167671322822571, "learning_rate": 1.7345741203373398e-06, "loss": 1.6286, "step": 87083 }, { "epoch": 2.9, "grad_norm": 0.7125840187072754, "learning_rate": 1.7334519563770188e-06, "loss": 1.7154, "step": 87084 }, { "epoch": 2.9, "grad_norm": 0.735206663608551, "learning_rate": 1.7323301544683531e-06, "loss": 1.7174, "step": 87085 }, { "epoch": 2.9, "grad_norm": 0.7103078365325928, "learning_rate": 1.7312087146126751e-06, "loss": 1.578, "step": 87086 }, { "epoch": 2.9, "grad_norm": 0.7068130373954773, "learning_rate": 1.7300876368113837e-06, "loss": 1.6505, "step": 87087 }, { "epoch": 2.9, "grad_norm": 0.6985688805580139, "learning_rate": 1.7289669210658107e-06, "loss": 1.6997, "step": 87088 }, { "epoch": 2.9, "grad_norm": 0.7117661237716675, "learning_rate": 1.7278465673773223e-06, "loss": 1.738, "step": 87089 }, { "epoch": 2.9, "grad_norm": 0.6936747431755066, "learning_rate": 1.7267265757472504e-06, "loss": 1.6304, "step": 87090 }, { "epoch": 2.9, "grad_norm": 0.7039965987205505, "learning_rate": 1.7256069461770272e-06, "loss": 1.6029, "step": 87091 }, { "epoch": 2.9, "grad_norm": 0.7102837562561035, "learning_rate": 1.7244876786679517e-06, "loss": 1.7366, "step": 87092 }, { "epoch": 2.9, "grad_norm": 0.704203724861145, "learning_rate": 1.7233687732213896e-06, "loss": 1.725, "step": 87093 }, { "epoch": 2.9, "grad_norm": 0.7133066058158875, "learning_rate": 1.7222502298387064e-06, "loss": 1.6722, "step": 87094 }, { "epoch": 2.9, "grad_norm": 0.7137861847877502, "learning_rate": 1.7211320485212676e-06, "loss": 1.6599, "step": 87095 }, { "epoch": 2.9, "grad_norm": 0.7362331748008728, "learning_rate": 1.7200142292704389e-06, "loss": 1.708, "step": 87096 }, { "epoch": 2.9, "grad_norm": 0.7143536806106567, "learning_rate": 1.7188967720875524e-06, "loss": 1.6458, "step": 87097 }, { "epoch": 2.9, "grad_norm": 0.7192238569259644, "learning_rate": 1.7177796769739738e-06, "loss": 1.7237, "step": 87098 }, { "epoch": 2.9, "grad_norm": 0.7148776650428772, "learning_rate": 1.716662943931102e-06, "loss": 1.6306, "step": 87099 }, { "epoch": 2.9, "grad_norm": 0.7115827798843384, "learning_rate": 1.7155465729602026e-06, "loss": 1.6039, "step": 87100 }, { "epoch": 2.9, "grad_norm": 0.7159334421157837, "learning_rate": 1.7144305640626743e-06, "loss": 1.727, "step": 87101 }, { "epoch": 2.9, "grad_norm": 0.7279819250106812, "learning_rate": 1.713314917239883e-06, "loss": 1.6821, "step": 87102 }, { "epoch": 2.9, "grad_norm": 0.7017807364463806, "learning_rate": 1.712199632493194e-06, "loss": 1.6486, "step": 87103 }, { "epoch": 2.9, "grad_norm": 0.7147924900054932, "learning_rate": 1.71108470982394e-06, "loss": 1.605, "step": 87104 }, { "epoch": 2.9, "grad_norm": 0.6968778967857361, "learning_rate": 1.709970149233486e-06, "loss": 1.6762, "step": 87105 }, { "epoch": 2.9, "grad_norm": 0.7184897065162659, "learning_rate": 1.7088559507231647e-06, "loss": 1.6855, "step": 87106 }, { "epoch": 2.9, "grad_norm": 0.7136459350585938, "learning_rate": 1.7077421142943415e-06, "loss": 1.6802, "step": 87107 }, { "epoch": 2.9, "grad_norm": 0.7154205441474915, "learning_rate": 1.706628639948382e-06, "loss": 1.7477, "step": 87108 }, { "epoch": 2.9, "grad_norm": 0.7102634906768799, "learning_rate": 1.7055155276866184e-06, "loss": 1.6829, "step": 87109 }, { "epoch": 2.9, "grad_norm": 0.7315943837165833, "learning_rate": 1.704402777510383e-06, "loss": 1.6543, "step": 87110 }, { "epoch": 2.9, "grad_norm": 0.7099265456199646, "learning_rate": 1.7032903894210747e-06, "loss": 1.668, "step": 87111 }, { "epoch": 2.9, "grad_norm": 0.7074689269065857, "learning_rate": 1.7021783634200258e-06, "loss": 1.7386, "step": 87112 }, { "epoch": 2.9, "grad_norm": 0.7275381684303284, "learning_rate": 1.7010666995085687e-06, "loss": 1.6943, "step": 87113 }, { "epoch": 2.9, "grad_norm": 0.7284765243530273, "learning_rate": 1.6999553976880352e-06, "loss": 1.686, "step": 87114 }, { "epoch": 2.9, "grad_norm": 0.7324272990226746, "learning_rate": 1.6988444579598581e-06, "loss": 1.7012, "step": 87115 }, { "epoch": 2.9, "grad_norm": 0.7096917033195496, "learning_rate": 1.6977338803253027e-06, "loss": 1.7233, "step": 87116 }, { "epoch": 2.9, "grad_norm": 0.7047420740127563, "learning_rate": 1.6966236647857345e-06, "loss": 1.7051, "step": 87117 }, { "epoch": 2.9, "grad_norm": 0.7183281183242798, "learning_rate": 1.6955138113425525e-06, "loss": 1.5969, "step": 87118 }, { "epoch": 2.9, "grad_norm": 0.7092010974884033, "learning_rate": 1.6944043199970225e-06, "loss": 1.6307, "step": 87119 }, { "epoch": 2.9, "grad_norm": 0.695754885673523, "learning_rate": 1.6932951907505433e-06, "loss": 1.6459, "step": 87120 }, { "epoch": 2.9, "grad_norm": 0.7062552571296692, "learning_rate": 1.6921864236044803e-06, "loss": 1.6888, "step": 87121 }, { "epoch": 2.9, "grad_norm": 0.7224965691566467, "learning_rate": 1.6910780185600991e-06, "loss": 1.7656, "step": 87122 }, { "epoch": 2.9, "grad_norm": 0.7209351658821106, "learning_rate": 1.6899699756188323e-06, "loss": 1.7234, "step": 87123 }, { "epoch": 2.9, "grad_norm": 0.687858521938324, "learning_rate": 1.6888622947819785e-06, "loss": 1.639, "step": 87124 }, { "epoch": 2.9, "grad_norm": 0.7142052054405212, "learning_rate": 1.6877549760508702e-06, "loss": 1.6159, "step": 87125 }, { "epoch": 2.9, "grad_norm": 0.7358061671257019, "learning_rate": 1.6866480194269062e-06, "loss": 1.7134, "step": 87126 }, { "epoch": 2.9, "grad_norm": 0.7561478614807129, "learning_rate": 1.6855414249113851e-06, "loss": 1.7226, "step": 87127 }, { "epoch": 2.9, "grad_norm": 0.7091332077980042, "learning_rate": 1.6844351925056733e-06, "loss": 1.6323, "step": 87128 }, { "epoch": 2.9, "grad_norm": 0.7042245268821716, "learning_rate": 1.6833293222110689e-06, "loss": 1.6413, "step": 87129 }, { "epoch": 2.9, "grad_norm": 0.6945261359214783, "learning_rate": 1.6822238140289712e-06, "loss": 1.6439, "step": 87130 }, { "epoch": 2.9, "grad_norm": 0.688338041305542, "learning_rate": 1.6811186679607125e-06, "loss": 1.6812, "step": 87131 }, { "epoch": 2.9, "grad_norm": 0.7147067189216614, "learning_rate": 1.6800138840075917e-06, "loss": 1.592, "step": 87132 }, { "epoch": 2.9, "grad_norm": 0.6889280080795288, "learning_rate": 1.6789094621709742e-06, "loss": 1.6585, "step": 87133 }, { "epoch": 2.9, "grad_norm": 0.7116443514823914, "learning_rate": 1.6778054024522258e-06, "loss": 1.7369, "step": 87134 }, { "epoch": 2.9, "grad_norm": 0.7124090194702148, "learning_rate": 1.6767017048526786e-06, "loss": 1.6758, "step": 87135 }, { "epoch": 2.9, "grad_norm": 0.6834173798561096, "learning_rate": 1.675598369373632e-06, "loss": 1.5737, "step": 87136 }, { "epoch": 2.9, "grad_norm": 0.7036054730415344, "learning_rate": 1.674495396016451e-06, "loss": 1.6533, "step": 87137 }, { "epoch": 2.9, "grad_norm": 0.7461703419685364, "learning_rate": 1.6733927847825013e-06, "loss": 1.6579, "step": 87138 }, { "epoch": 2.9, "grad_norm": 0.7200632095336914, "learning_rate": 1.672290535673082e-06, "loss": 1.7059, "step": 87139 }, { "epoch": 2.9, "grad_norm": 0.7158608436584473, "learning_rate": 1.6711886486895254e-06, "loss": 1.758, "step": 87140 }, { "epoch": 2.9, "grad_norm": 0.7213146090507507, "learning_rate": 1.6700871238332302e-06, "loss": 1.6417, "step": 87141 }, { "epoch": 2.9, "grad_norm": 0.7253803610801697, "learning_rate": 1.6689859611054957e-06, "loss": 1.6743, "step": 87142 }, { "epoch": 2.9, "grad_norm": 0.7357842326164246, "learning_rate": 1.6678851605076204e-06, "loss": 1.7278, "step": 87143 }, { "epoch": 2.9, "grad_norm": 0.7392193675041199, "learning_rate": 1.6667847220409703e-06, "loss": 1.6361, "step": 87144 }, { "epoch": 2.9, "grad_norm": 0.7186273336410522, "learning_rate": 1.6656846457069107e-06, "loss": 1.7399, "step": 87145 }, { "epoch": 2.9, "grad_norm": 0.708023726940155, "learning_rate": 1.6645849315067406e-06, "loss": 1.6204, "step": 87146 }, { "epoch": 2.9, "grad_norm": 0.7128475308418274, "learning_rate": 1.6634855794418257e-06, "loss": 1.6295, "step": 87147 }, { "epoch": 2.9, "grad_norm": 0.7255902886390686, "learning_rate": 1.6623865895134647e-06, "loss": 1.6777, "step": 87148 }, { "epoch": 2.9, "grad_norm": 0.7269093990325928, "learning_rate": 1.6612879617230234e-06, "loss": 1.7375, "step": 87149 }, { "epoch": 2.9, "grad_norm": 0.7178415060043335, "learning_rate": 1.6601896960718008e-06, "loss": 1.6145, "step": 87150 }, { "epoch": 2.9, "grad_norm": 0.7067270278930664, "learning_rate": 1.659091792561129e-06, "loss": 1.6831, "step": 87151 }, { "epoch": 2.9, "grad_norm": 0.7046260237693787, "learning_rate": 1.6579942511924072e-06, "loss": 1.6447, "step": 87152 }, { "epoch": 2.9, "grad_norm": 0.7131506204605103, "learning_rate": 1.6568970719669006e-06, "loss": 1.6416, "step": 87153 }, { "epoch": 2.9, "grad_norm": 0.7058994174003601, "learning_rate": 1.655800254885975e-06, "loss": 1.6691, "step": 87154 }, { "epoch": 2.9, "grad_norm": 0.7082066535949707, "learning_rate": 1.6547037999509293e-06, "loss": 1.6577, "step": 87155 }, { "epoch": 2.9, "grad_norm": 0.7180933952331543, "learning_rate": 1.6536077071631293e-06, "loss": 1.707, "step": 87156 }, { "epoch": 2.9, "grad_norm": 0.7110099792480469, "learning_rate": 1.6525119765238737e-06, "loss": 1.6491, "step": 87157 }, { "epoch": 2.9, "grad_norm": 0.7025518417358398, "learning_rate": 1.6514166080345281e-06, "loss": 1.6751, "step": 87158 }, { "epoch": 2.9, "grad_norm": 0.7298074960708618, "learning_rate": 1.6503216016963915e-06, "loss": 1.7197, "step": 87159 }, { "epoch": 2.9, "grad_norm": 0.7041650414466858, "learning_rate": 1.6492269575107963e-06, "loss": 1.6829, "step": 87160 }, { "epoch": 2.9, "grad_norm": 0.6981751322746277, "learning_rate": 1.648132675479108e-06, "loss": 1.6064, "step": 87161 }, { "epoch": 2.9, "grad_norm": 0.7190916538238525, "learning_rate": 1.6470387556026253e-06, "loss": 1.6334, "step": 87162 }, { "epoch": 2.9, "grad_norm": 0.7268191576004028, "learning_rate": 1.6459451978826477e-06, "loss": 1.6815, "step": 87163 }, { "epoch": 2.9, "grad_norm": 0.7253336310386658, "learning_rate": 1.6448520023205402e-06, "loss": 1.7507, "step": 87164 }, { "epoch": 2.9, "grad_norm": 0.7231842875480652, "learning_rate": 1.6437591689176354e-06, "loss": 1.7461, "step": 87165 }, { "epoch": 2.9, "grad_norm": 0.7064908742904663, "learning_rate": 1.6426666976752324e-06, "loss": 1.6923, "step": 87166 }, { "epoch": 2.9, "grad_norm": 0.7112972736358643, "learning_rate": 1.6415745885946963e-06, "loss": 1.5956, "step": 87167 }, { "epoch": 2.9, "grad_norm": 0.7052752375602722, "learning_rate": 1.640482841677293e-06, "loss": 1.7116, "step": 87168 }, { "epoch": 2.9, "grad_norm": 0.6987553238868713, "learning_rate": 1.6393914569243883e-06, "loss": 1.7391, "step": 87169 }, { "epoch": 2.9, "grad_norm": 0.7343831658363342, "learning_rate": 1.638300434337314e-06, "loss": 1.6312, "step": 87170 }, { "epoch": 2.9, "grad_norm": 0.7091054320335388, "learning_rate": 1.637209773917403e-06, "loss": 1.7418, "step": 87171 }, { "epoch": 2.9, "grad_norm": 0.7204294800758362, "learning_rate": 1.63611947566592e-06, "loss": 1.6477, "step": 87172 }, { "epoch": 2.9, "grad_norm": 0.7012979388237, "learning_rate": 1.6350295395842316e-06, "loss": 1.6601, "step": 87173 }, { "epoch": 2.9, "grad_norm": 0.7446849346160889, "learning_rate": 1.6339399656736696e-06, "loss": 1.6351, "step": 87174 }, { "epoch": 2.9, "grad_norm": 0.7344626784324646, "learning_rate": 1.632850753935533e-06, "loss": 1.6132, "step": 87175 }, { "epoch": 2.9, "grad_norm": 0.7066991925239563, "learning_rate": 1.6317619043711538e-06, "loss": 1.7295, "step": 87176 }, { "epoch": 2.9, "grad_norm": 0.7117072343826294, "learning_rate": 1.6306734169818647e-06, "loss": 1.6064, "step": 87177 }, { "epoch": 2.9, "grad_norm": 0.7195749282836914, "learning_rate": 1.6295852917689644e-06, "loss": 1.6906, "step": 87178 }, { "epoch": 2.9, "grad_norm": 0.7284306883811951, "learning_rate": 1.6284975287337854e-06, "loss": 1.6381, "step": 87179 }, { "epoch": 2.9, "grad_norm": 0.6973680257797241, "learning_rate": 1.6274101278776596e-06, "loss": 1.6505, "step": 87180 }, { "epoch": 2.9, "grad_norm": 0.7164179682731628, "learning_rate": 1.6263230892018863e-06, "loss": 1.6591, "step": 87181 }, { "epoch": 2.9, "grad_norm": 0.7156745195388794, "learning_rate": 1.6252364127077977e-06, "loss": 1.6588, "step": 87182 }, { "epoch": 2.9, "grad_norm": 0.7308073043823242, "learning_rate": 1.6241500983966927e-06, "loss": 1.6578, "step": 87183 }, { "epoch": 2.9, "grad_norm": 0.734174907207489, "learning_rate": 1.6230641462699034e-06, "loss": 1.7429, "step": 87184 }, { "epoch": 2.9, "grad_norm": 0.7120406627655029, "learning_rate": 1.6219785563287623e-06, "loss": 1.6741, "step": 87185 }, { "epoch": 2.9, "grad_norm": 0.710195779800415, "learning_rate": 1.6208933285745684e-06, "loss": 1.6707, "step": 87186 }, { "epoch": 2.9, "grad_norm": 0.7285313606262207, "learning_rate": 1.619808463008654e-06, "loss": 1.6125, "step": 87187 }, { "epoch": 2.9, "grad_norm": 0.7021244168281555, "learning_rate": 1.618723959632351e-06, "loss": 1.6675, "step": 87188 }, { "epoch": 2.9, "grad_norm": 0.703430712223053, "learning_rate": 1.6176398184469253e-06, "loss": 1.6534, "step": 87189 }, { "epoch": 2.9, "grad_norm": 0.7262624502182007, "learning_rate": 1.6165560394537092e-06, "loss": 1.6548, "step": 87190 }, { "epoch": 2.9, "grad_norm": 0.7262382507324219, "learning_rate": 1.6154726226540348e-06, "loss": 1.7256, "step": 87191 }, { "epoch": 2.9, "grad_norm": 0.7100058197975159, "learning_rate": 1.6143895680492347e-06, "loss": 1.6747, "step": 87192 }, { "epoch": 2.9, "grad_norm": 0.7270747423171997, "learning_rate": 1.6133068756405743e-06, "loss": 1.7591, "step": 87193 }, { "epoch": 2.9, "grad_norm": 0.7175282835960388, "learning_rate": 1.6122245454294192e-06, "loss": 1.6858, "step": 87194 }, { "epoch": 2.9, "grad_norm": 0.7528483867645264, "learning_rate": 1.6111425774170683e-06, "loss": 1.7046, "step": 87195 }, { "epoch": 2.9, "grad_norm": 0.7071280479431152, "learning_rate": 1.6100609716047875e-06, "loss": 1.753, "step": 87196 }, { "epoch": 2.9, "grad_norm": 0.7085615992546082, "learning_rate": 1.6089797279939087e-06, "loss": 1.6461, "step": 87197 }, { "epoch": 2.9, "grad_norm": 0.6897340416908264, "learning_rate": 1.6078988465858312e-06, "loss": 1.6726, "step": 87198 }, { "epoch": 2.9, "grad_norm": 0.7117021679878235, "learning_rate": 1.6068183273817536e-06, "loss": 1.6496, "step": 87199 }, { "epoch": 2.9, "grad_norm": 0.7115265727043152, "learning_rate": 1.605738170383042e-06, "loss": 1.6751, "step": 87200 }, { "epoch": 2.9, "grad_norm": 0.7190409898757935, "learning_rate": 1.6046583755909947e-06, "loss": 1.7066, "step": 87201 }, { "epoch": 2.9, "grad_norm": 0.7083532214164734, "learning_rate": 1.6035789430069446e-06, "loss": 1.6933, "step": 87202 }, { "epoch": 2.9, "grad_norm": 0.7144142985343933, "learning_rate": 1.6024998726321569e-06, "loss": 1.6694, "step": 87203 }, { "epoch": 2.9, "grad_norm": 0.7096062898635864, "learning_rate": 1.6014211644679643e-06, "loss": 1.6511, "step": 87204 }, { "epoch": 2.9, "grad_norm": 0.7189050316810608, "learning_rate": 1.6003428185156985e-06, "loss": 1.6381, "step": 87205 }, { "epoch": 2.9, "grad_norm": 0.7006919980049133, "learning_rate": 1.5992648347766257e-06, "loss": 1.6609, "step": 87206 }, { "epoch": 2.9, "grad_norm": 0.7272128462791443, "learning_rate": 1.5981872132521112e-06, "loss": 1.6535, "step": 87207 }, { "epoch": 2.9, "grad_norm": 0.7100549340248108, "learning_rate": 1.5971099539434208e-06, "loss": 1.5781, "step": 87208 }, { "epoch": 2.9, "grad_norm": 0.7348807454109192, "learning_rate": 1.596033056851853e-06, "loss": 1.6495, "step": 87209 }, { "epoch": 2.9, "grad_norm": 0.7081620097160339, "learning_rate": 1.5949565219787408e-06, "loss": 1.6432, "step": 87210 }, { "epoch": 2.9, "grad_norm": 0.7249874472618103, "learning_rate": 1.5938803493253827e-06, "loss": 1.7184, "step": 87211 }, { "epoch": 2.9, "grad_norm": 0.711149275302887, "learning_rate": 1.592804538893111e-06, "loss": 1.7374, "step": 87212 }, { "epoch": 2.9, "grad_norm": 0.7126777768135071, "learning_rate": 1.5917290906831914e-06, "loss": 1.6884, "step": 87213 }, { "epoch": 2.9, "grad_norm": 0.7255933284759521, "learning_rate": 1.590654004696923e-06, "loss": 1.6882, "step": 87214 }, { "epoch": 2.9, "grad_norm": 0.7108809351921082, "learning_rate": 1.5895792809356379e-06, "loss": 1.7211, "step": 87215 }, { "epoch": 2.9, "grad_norm": 0.7163113951683044, "learning_rate": 1.5885049194006683e-06, "loss": 1.6689, "step": 87216 }, { "epoch": 2.9, "grad_norm": 0.7452579736709595, "learning_rate": 1.5874309200932467e-06, "loss": 1.6671, "step": 87217 }, { "epoch": 2.9, "grad_norm": 0.7095690369606018, "learning_rate": 1.5863572830147387e-06, "loss": 1.69, "step": 87218 }, { "epoch": 2.9, "grad_norm": 0.7159183025360107, "learning_rate": 1.5852840081664097e-06, "loss": 1.6536, "step": 87219 }, { "epoch": 2.9, "grad_norm": 0.7190700173377991, "learning_rate": 1.5842110955495924e-06, "loss": 1.6705, "step": 87220 }, { "epoch": 2.9, "grad_norm": 0.7166720032691956, "learning_rate": 1.583138545165552e-06, "loss": 1.6561, "step": 87221 }, { "epoch": 2.9, "grad_norm": 0.7301390767097473, "learning_rate": 1.582066357015621e-06, "loss": 1.6265, "step": 87222 }, { "epoch": 2.9, "grad_norm": 0.7056423425674438, "learning_rate": 1.5809945311010986e-06, "loss": 1.6702, "step": 87223 }, { "epoch": 2.9, "grad_norm": 0.7179102897644043, "learning_rate": 1.5799230674232832e-06, "loss": 1.7082, "step": 87224 }, { "epoch": 2.9, "grad_norm": 0.706823468208313, "learning_rate": 1.5788519659834408e-06, "loss": 1.6874, "step": 87225 }, { "epoch": 2.9, "grad_norm": 0.7294257879257202, "learning_rate": 1.5777812267829372e-06, "loss": 1.691, "step": 87226 }, { "epoch": 2.9, "grad_norm": 0.7491395473480225, "learning_rate": 1.576710849823004e-06, "loss": 1.6784, "step": 87227 }, { "epoch": 2.9, "grad_norm": 0.7235738635063171, "learning_rate": 1.5756408351050075e-06, "loss": 1.6257, "step": 87228 }, { "epoch": 2.9, "grad_norm": 0.7222704291343689, "learning_rate": 1.5745711826301798e-06, "loss": 1.7366, "step": 87229 }, { "epoch": 2.9, "grad_norm": 0.7016928791999817, "learning_rate": 1.5735018923998865e-06, "loss": 1.6495, "step": 87230 }, { "epoch": 2.9, "grad_norm": 0.7096341252326965, "learning_rate": 1.5724329644153599e-06, "loss": 1.6748, "step": 87231 }, { "epoch": 2.9, "grad_norm": 0.7208343148231506, "learning_rate": 1.5713643986779323e-06, "loss": 1.6845, "step": 87232 }, { "epoch": 2.9, "grad_norm": 0.6884499192237854, "learning_rate": 1.5702961951889026e-06, "loss": 1.6296, "step": 87233 }, { "epoch": 2.9, "grad_norm": 0.7252598404884338, "learning_rate": 1.5692283539495699e-06, "loss": 1.6664, "step": 87234 }, { "epoch": 2.9, "grad_norm": 0.6980568170547485, "learning_rate": 1.568160874961233e-06, "loss": 1.6795, "step": 87235 }, { "epoch": 2.9, "grad_norm": 0.7170736193656921, "learning_rate": 1.5670937582251575e-06, "loss": 1.67, "step": 87236 }, { "epoch": 2.9, "grad_norm": 0.6978077292442322, "learning_rate": 1.5660270037426759e-06, "loss": 1.6277, "step": 87237 }, { "epoch": 2.9, "grad_norm": 0.7291116714477539, "learning_rate": 1.5649606115150536e-06, "loss": 1.6615, "step": 87238 }, { "epoch": 2.9, "grad_norm": 0.711865246295929, "learning_rate": 1.5638945815435566e-06, "loss": 1.7376, "step": 87239 }, { "epoch": 2.9, "grad_norm": 0.7051241397857666, "learning_rate": 1.5628289138295502e-06, "loss": 1.6806, "step": 87240 }, { "epoch": 2.9, "grad_norm": 0.735105574131012, "learning_rate": 1.5617636083743334e-06, "loss": 1.6574, "step": 87241 }, { "epoch": 2.9, "grad_norm": 0.7075233459472656, "learning_rate": 1.5606986651791386e-06, "loss": 1.6469, "step": 87242 }, { "epoch": 2.9, "grad_norm": 0.7152836322784424, "learning_rate": 1.5596340842452648e-06, "loss": 1.6916, "step": 87243 }, { "epoch": 2.9, "grad_norm": 0.7060520648956299, "learning_rate": 1.5585698655740442e-06, "loss": 1.6905, "step": 87244 }, { "epoch": 2.9, "grad_norm": 0.6890841126441956, "learning_rate": 1.5575060091667425e-06, "loss": 1.6559, "step": 87245 }, { "epoch": 2.9, "grad_norm": 0.7114985585212708, "learning_rate": 1.5564425150246252e-06, "loss": 1.6754, "step": 87246 }, { "epoch": 2.9, "grad_norm": 0.6959336400032043, "learning_rate": 1.5553793831490247e-06, "loss": 1.6331, "step": 87247 }, { "epoch": 2.9, "grad_norm": 0.7163196802139282, "learning_rate": 1.5543166135412732e-06, "loss": 1.7519, "step": 87248 }, { "epoch": 2.9, "grad_norm": 0.7187525033950806, "learning_rate": 1.5532542062025365e-06, "loss": 1.6395, "step": 87249 }, { "epoch": 2.9, "grad_norm": 0.6984168887138367, "learning_rate": 1.5521921611342136e-06, "loss": 1.6475, "step": 87250 }, { "epoch": 2.9, "grad_norm": 0.7312516570091248, "learning_rate": 1.5511304783375366e-06, "loss": 1.6528, "step": 87251 }, { "epoch": 2.9, "grad_norm": 0.7264183759689331, "learning_rate": 1.550069157813838e-06, "loss": 1.6569, "step": 87252 }, { "epoch": 2.9, "grad_norm": 0.7118927240371704, "learning_rate": 1.54900819956435e-06, "loss": 1.6454, "step": 87253 }, { "epoch": 2.9, "grad_norm": 0.7178137898445129, "learning_rate": 1.5479476035904049e-06, "loss": 1.7846, "step": 87254 }, { "epoch": 2.9, "grad_norm": 0.6966906785964966, "learning_rate": 1.5468873698933015e-06, "loss": 1.6764, "step": 87255 }, { "epoch": 2.9, "grad_norm": 0.7297613620758057, "learning_rate": 1.5458274984742725e-06, "loss": 1.6201, "step": 87256 }, { "epoch": 2.9, "grad_norm": 0.724092960357666, "learning_rate": 1.5447679893346166e-06, "loss": 1.6445, "step": 87257 }, { "epoch": 2.9, "grad_norm": 0.6885631084442139, "learning_rate": 1.543708842475666e-06, "loss": 1.6651, "step": 87258 }, { "epoch": 2.9, "grad_norm": 0.7193512916564941, "learning_rate": 1.5426500578986868e-06, "loss": 1.6923, "step": 87259 }, { "epoch": 2.9, "grad_norm": 0.6884008049964905, "learning_rate": 1.541591635604944e-06, "loss": 1.6964, "step": 87260 }, { "epoch": 2.9, "grad_norm": 0.7172251343727112, "learning_rate": 1.5405335755957037e-06, "loss": 1.6322, "step": 87261 }, { "epoch": 2.9, "grad_norm": 0.7169118523597717, "learning_rate": 1.5394758778723316e-06, "loss": 1.6168, "step": 87262 }, { "epoch": 2.9, "grad_norm": 0.719725489616394, "learning_rate": 1.5384185424360262e-06, "loss": 1.7335, "step": 87263 }, { "epoch": 2.9, "grad_norm": 0.7273129224777222, "learning_rate": 1.537361569288087e-06, "loss": 1.7019, "step": 87264 }, { "epoch": 2.9, "grad_norm": 0.6859561204910278, "learning_rate": 1.536304958429846e-06, "loss": 1.628, "step": 87265 }, { "epoch": 2.9, "grad_norm": 0.7210403084754944, "learning_rate": 1.5352487098625687e-06, "loss": 1.6854, "step": 87266 }, { "epoch": 2.9, "grad_norm": 0.702391505241394, "learning_rate": 1.5341928235874878e-06, "loss": 1.6589, "step": 87267 }, { "epoch": 2.9, "grad_norm": 0.7202837467193604, "learning_rate": 1.5331372996059355e-06, "loss": 1.7572, "step": 87268 }, { "epoch": 2.9, "grad_norm": 0.721032440662384, "learning_rate": 1.5320821379191773e-06, "loss": 1.6398, "step": 87269 }, { "epoch": 2.9, "grad_norm": 0.7205551266670227, "learning_rate": 1.5310273385284788e-06, "loss": 1.6098, "step": 87270 }, { "epoch": 2.9, "grad_norm": 0.7125186920166016, "learning_rate": 1.5299729014351393e-06, "loss": 1.5776, "step": 87271 }, { "epoch": 2.9, "grad_norm": 0.7048821449279785, "learning_rate": 1.5289188266404573e-06, "loss": 1.6726, "step": 87272 }, { "epoch": 2.9, "grad_norm": 0.6769325733184814, "learning_rate": 1.5278651141456989e-06, "loss": 1.646, "step": 87273 }, { "epoch": 2.9, "grad_norm": 0.7146686911582947, "learning_rate": 1.526811763952096e-06, "loss": 1.6348, "step": 87274 }, { "epoch": 2.9, "grad_norm": 0.7449716925621033, "learning_rate": 1.5257587760609813e-06, "loss": 1.6337, "step": 87275 }, { "epoch": 2.9, "grad_norm": 0.6981210708618164, "learning_rate": 1.5247061504736202e-06, "loss": 1.6409, "step": 87276 }, { "epoch": 2.9, "grad_norm": 0.7211596369743347, "learning_rate": 1.5236538871912785e-06, "loss": 1.7334, "step": 87277 }, { "epoch": 2.9, "grad_norm": 0.7239404916763306, "learning_rate": 1.522601986215255e-06, "loss": 1.7243, "step": 87278 }, { "epoch": 2.9, "grad_norm": 0.719170093536377, "learning_rate": 1.5215504475468154e-06, "loss": 1.6567, "step": 87279 }, { "epoch": 2.9, "grad_norm": 0.7227811813354492, "learning_rate": 1.5204992711872255e-06, "loss": 1.6367, "step": 87280 }, { "epoch": 2.9, "grad_norm": 0.7190456390380859, "learning_rate": 1.519448457137784e-06, "loss": 1.6504, "step": 87281 }, { "epoch": 2.9, "grad_norm": 0.7085090279579163, "learning_rate": 1.5183980053997568e-06, "loss": 1.6895, "step": 87282 }, { "epoch": 2.9, "grad_norm": 0.7129526138305664, "learning_rate": 1.5173479159743762e-06, "loss": 1.6412, "step": 87283 }, { "epoch": 2.9, "grad_norm": 0.7007506489753723, "learning_rate": 1.5162981888629743e-06, "loss": 1.6151, "step": 87284 }, { "epoch": 2.9, "grad_norm": 0.707528293132782, "learning_rate": 1.5152488240668504e-06, "loss": 1.6638, "step": 87285 }, { "epoch": 2.9, "grad_norm": 0.7245059013366699, "learning_rate": 1.5141998215871697e-06, "loss": 1.59, "step": 87286 }, { "epoch": 2.9, "grad_norm": 0.7261926531791687, "learning_rate": 1.5131511814252984e-06, "loss": 1.6794, "step": 87287 }, { "epoch": 2.9, "grad_norm": 0.7253779768943787, "learning_rate": 1.5121029035825015e-06, "loss": 1.669, "step": 87288 }, { "epoch": 2.9, "grad_norm": 0.6979191303253174, "learning_rate": 1.511054988060012e-06, "loss": 1.6575, "step": 87289 }, { "epoch": 2.9, "grad_norm": 0.7362483739852905, "learning_rate": 1.5100074348590951e-06, "loss": 1.7158, "step": 87290 }, { "epoch": 2.9, "grad_norm": 0.6905445456504822, "learning_rate": 1.5089602439810833e-06, "loss": 1.5716, "step": 87291 }, { "epoch": 2.9, "grad_norm": 0.6951873302459717, "learning_rate": 1.5079134154272088e-06, "loss": 1.6101, "step": 87292 }, { "epoch": 2.9, "grad_norm": 0.7122370600700378, "learning_rate": 1.5068669491987372e-06, "loss": 1.6648, "step": 87293 }, { "epoch": 2.9, "grad_norm": 0.7223191261291504, "learning_rate": 1.5058208452969678e-06, "loss": 1.7544, "step": 87294 }, { "epoch": 2.9, "grad_norm": 0.69788658618927, "learning_rate": 1.5047751037231325e-06, "loss": 1.649, "step": 87295 }, { "epoch": 2.9, "grad_norm": 0.7110037803649902, "learning_rate": 1.5037297244785308e-06, "loss": 1.6876, "step": 87296 }, { "epoch": 2.9, "grad_norm": 0.7179409265518188, "learning_rate": 1.5026847075643945e-06, "loss": 1.6868, "step": 87297 }, { "epoch": 2.9, "grad_norm": 0.7139394283294678, "learning_rate": 1.5016400529820561e-06, "loss": 1.7063, "step": 87298 }, { "epoch": 2.9, "grad_norm": 0.7151699066162109, "learning_rate": 1.5005957607327146e-06, "loss": 1.5945, "step": 87299 }, { "epoch": 2.9, "grad_norm": 0.7143771648406982, "learning_rate": 1.499551830817669e-06, "loss": 1.6905, "step": 87300 }, { "epoch": 2.9, "grad_norm": 0.7208724617958069, "learning_rate": 1.4985082632382184e-06, "loss": 1.8136, "step": 87301 }, { "epoch": 2.9, "grad_norm": 0.7101803421974182, "learning_rate": 1.4974650579955617e-06, "loss": 1.6275, "step": 87302 }, { "epoch": 2.9, "grad_norm": 0.7008322477340698, "learning_rate": 1.4964222150909976e-06, "loss": 1.6651, "step": 87303 }, { "epoch": 2.9, "grad_norm": 0.692263662815094, "learning_rate": 1.4953797345258256e-06, "loss": 1.6498, "step": 87304 }, { "epoch": 2.9, "grad_norm": 0.7222788333892822, "learning_rate": 1.4943376163012777e-06, "loss": 1.6098, "step": 87305 }, { "epoch": 2.9, "grad_norm": 0.7326556444168091, "learning_rate": 1.4932958604185863e-06, "loss": 1.6974, "step": 87306 }, { "epoch": 2.9, "grad_norm": 0.6904497742652893, "learning_rate": 1.4922544668790505e-06, "loss": 1.5878, "step": 87307 }, { "epoch": 2.9, "grad_norm": 0.7061376571655273, "learning_rate": 1.4912134356839689e-06, "loss": 1.715, "step": 87308 }, { "epoch": 2.9, "grad_norm": 0.7312967777252197, "learning_rate": 1.4901727668345742e-06, "loss": 1.7262, "step": 87309 }, { "epoch": 2.9, "grad_norm": 0.7256363034248352, "learning_rate": 1.4891324603320986e-06, "loss": 1.7176, "step": 87310 }, { "epoch": 2.9, "grad_norm": 0.7192728519439697, "learning_rate": 1.4880925161778413e-06, "loss": 1.6752, "step": 87311 }, { "epoch": 2.9, "grad_norm": 0.7291514873504639, "learning_rate": 1.4870529343730675e-06, "loss": 1.7095, "step": 87312 }, { "epoch": 2.9, "grad_norm": 0.7581080794334412, "learning_rate": 1.4860137149190098e-06, "loss": 1.7184, "step": 87313 }, { "epoch": 2.9, "grad_norm": 0.7081425786018372, "learning_rate": 1.484974857816934e-06, "loss": 1.7042, "step": 87314 }, { "epoch": 2.9, "grad_norm": 0.7262223958969116, "learning_rate": 1.4839363630681722e-06, "loss": 1.6697, "step": 87315 }, { "epoch": 2.91, "grad_norm": 0.7074277997016907, "learning_rate": 1.48289823067389e-06, "loss": 1.6886, "step": 87316 }, { "epoch": 2.91, "grad_norm": 0.7385504245758057, "learning_rate": 1.4818604606353868e-06, "loss": 1.6537, "step": 87317 }, { "epoch": 2.91, "grad_norm": 0.7118422389030457, "learning_rate": 1.4808230529538945e-06, "loss": 1.729, "step": 87318 }, { "epoch": 2.91, "grad_norm": 0.7297842502593994, "learning_rate": 1.4797860076307456e-06, "loss": 1.7472, "step": 87319 }, { "epoch": 2.91, "grad_norm": 0.6909191608428955, "learning_rate": 1.4787493246671057e-06, "loss": 1.7028, "step": 87320 }, { "epoch": 2.91, "grad_norm": 0.7198286652565002, "learning_rate": 1.477713004064307e-06, "loss": 1.6747, "step": 87321 }, { "epoch": 2.91, "grad_norm": 0.7120246291160583, "learning_rate": 1.4766770458235822e-06, "loss": 1.6809, "step": 87322 }, { "epoch": 2.91, "grad_norm": 0.7223899960517883, "learning_rate": 1.4756414499461966e-06, "loss": 1.6291, "step": 87323 }, { "epoch": 2.91, "grad_norm": 0.7082858085632324, "learning_rate": 1.4746062164333494e-06, "loss": 1.6009, "step": 87324 }, { "epoch": 2.91, "grad_norm": 0.6937037706375122, "learning_rate": 1.4735713452864063e-06, "loss": 1.6691, "step": 87325 }, { "epoch": 2.91, "grad_norm": 0.7073825597763062, "learning_rate": 1.4725368365065326e-06, "loss": 1.5732, "step": 87326 }, { "epoch": 2.91, "grad_norm": 0.707317590713501, "learning_rate": 1.4715026900949944e-06, "loss": 1.6597, "step": 87327 }, { "epoch": 2.91, "grad_norm": 0.734091579914093, "learning_rate": 1.4704689060530906e-06, "loss": 1.7131, "step": 87328 }, { "epoch": 2.91, "grad_norm": 0.7173585295677185, "learning_rate": 1.46943548438202e-06, "loss": 1.6542, "step": 87329 }, { "epoch": 2.91, "grad_norm": 0.7123258113861084, "learning_rate": 1.4684024250831151e-06, "loss": 1.6344, "step": 87330 }, { "epoch": 2.91, "grad_norm": 0.7056831121444702, "learning_rate": 1.4673697281575414e-06, "loss": 1.6729, "step": 87331 }, { "epoch": 2.91, "grad_norm": 0.7166821956634521, "learning_rate": 1.4663373936066313e-06, "loss": 1.6778, "step": 87332 }, { "epoch": 2.91, "grad_norm": 0.7218307256698608, "learning_rate": 1.465305421431584e-06, "loss": 1.7019, "step": 87333 }, { "epoch": 2.91, "grad_norm": 0.7024481296539307, "learning_rate": 1.464273811633665e-06, "loss": 1.6345, "step": 87334 }, { "epoch": 2.91, "grad_norm": 0.7078641653060913, "learning_rate": 1.46324256421414e-06, "loss": 1.6473, "step": 87335 }, { "epoch": 2.91, "grad_norm": 0.7113996148109436, "learning_rate": 1.462211679174241e-06, "loss": 1.651, "step": 87336 }, { "epoch": 2.91, "grad_norm": 0.7146902084350586, "learning_rate": 1.4611811565152342e-06, "loss": 1.6534, "step": 87337 }, { "epoch": 2.91, "grad_norm": 0.728080153465271, "learning_rate": 1.4601509962383518e-06, "loss": 1.6764, "step": 87338 }, { "epoch": 2.91, "grad_norm": 0.7237749099731445, "learning_rate": 1.4591211983448593e-06, "loss": 1.6369, "step": 87339 }, { "epoch": 2.91, "grad_norm": 0.7067568898200989, "learning_rate": 1.4580917628360222e-06, "loss": 1.7338, "step": 87340 }, { "epoch": 2.91, "grad_norm": 0.7129952311515808, "learning_rate": 1.4570626897130733e-06, "loss": 1.6238, "step": 87341 }, { "epoch": 2.91, "grad_norm": 0.7087551355361938, "learning_rate": 1.4560339789772447e-06, "loss": 1.6181, "step": 87342 }, { "epoch": 2.91, "grad_norm": 0.7093091607093811, "learning_rate": 1.455005630629802e-06, "loss": 1.6698, "step": 87343 }, { "epoch": 2.91, "grad_norm": 0.7758076786994934, "learning_rate": 1.453977644672011e-06, "loss": 1.6877, "step": 87344 }, { "epoch": 2.91, "grad_norm": 0.6912688612937927, "learning_rate": 1.4529500211051036e-06, "loss": 1.6561, "step": 87345 }, { "epoch": 2.91, "grad_norm": 0.7070893049240112, "learning_rate": 1.451922759930313e-06, "loss": 1.5686, "step": 87346 }, { "epoch": 2.91, "grad_norm": 0.7212236523628235, "learning_rate": 1.4508958611489375e-06, "loss": 1.6413, "step": 87347 }, { "epoch": 2.91, "grad_norm": 0.7298855185508728, "learning_rate": 1.4498693247621429e-06, "loss": 1.6791, "step": 87348 }, { "epoch": 2.91, "grad_norm": 0.7146205902099609, "learning_rate": 1.4488431507712284e-06, "loss": 1.7112, "step": 87349 }, { "epoch": 2.91, "grad_norm": 0.7188093662261963, "learning_rate": 1.4478173391774261e-06, "loss": 1.6266, "step": 87350 }, { "epoch": 2.91, "grad_norm": 0.7165138721466064, "learning_rate": 1.446791889982002e-06, "loss": 1.6577, "step": 87351 }, { "epoch": 2.91, "grad_norm": 0.7218223214149475, "learning_rate": 1.4457668031861547e-06, "loss": 1.7218, "step": 87352 }, { "epoch": 2.91, "grad_norm": 0.7068058848381042, "learning_rate": 1.44474207879115e-06, "loss": 1.6946, "step": 87353 }, { "epoch": 2.91, "grad_norm": 0.7243188619613647, "learning_rate": 1.4437177167982872e-06, "loss": 1.6887, "step": 87354 }, { "epoch": 2.91, "grad_norm": 0.7331728935241699, "learning_rate": 1.4426937172087315e-06, "loss": 1.6044, "step": 87355 }, { "epoch": 2.91, "grad_norm": 0.7079300880432129, "learning_rate": 1.441670080023749e-06, "loss": 1.6569, "step": 87356 }, { "epoch": 2.91, "grad_norm": 0.689611554145813, "learning_rate": 1.440646805244572e-06, "loss": 1.6402, "step": 87357 }, { "epoch": 2.91, "grad_norm": 0.7264997959136963, "learning_rate": 1.439623892872499e-06, "loss": 1.6713, "step": 87358 }, { "epoch": 2.91, "grad_norm": 0.735001802444458, "learning_rate": 1.4386013429087294e-06, "loss": 1.8049, "step": 87359 }, { "epoch": 2.91, "grad_norm": 0.7268579602241516, "learning_rate": 1.4375791553544624e-06, "loss": 1.5841, "step": 87360 }, { "epoch": 2.91, "grad_norm": 0.715553879737854, "learning_rate": 1.43655733021103e-06, "loss": 1.642, "step": 87361 }, { "epoch": 2.91, "grad_norm": 0.7132915258407593, "learning_rate": 1.4355358674796314e-06, "loss": 1.6667, "step": 87362 }, { "epoch": 2.91, "grad_norm": 0.7458128333091736, "learning_rate": 1.4345147671614653e-06, "loss": 1.7891, "step": 87363 }, { "epoch": 2.91, "grad_norm": 0.7260573506355286, "learning_rate": 1.4334940292577978e-06, "loss": 1.6657, "step": 87364 }, { "epoch": 2.91, "grad_norm": 0.7210357785224915, "learning_rate": 1.4324736537699278e-06, "loss": 1.6543, "step": 87365 }, { "epoch": 2.91, "grad_norm": 0.7406495809555054, "learning_rate": 1.4314536406989874e-06, "loss": 1.697, "step": 87366 }, { "epoch": 2.91, "grad_norm": 0.7199910879135132, "learning_rate": 1.4304339900463091e-06, "loss": 1.6502, "step": 87367 }, { "epoch": 2.91, "grad_norm": 0.7286159992218018, "learning_rate": 1.429414701813092e-06, "loss": 1.7304, "step": 87368 }, { "epoch": 2.91, "grad_norm": 0.7015101909637451, "learning_rate": 1.4283957760005682e-06, "loss": 1.6443, "step": 87369 }, { "epoch": 2.91, "grad_norm": 0.7403640151023865, "learning_rate": 1.427377212609937e-06, "loss": 1.6987, "step": 87370 }, { "epoch": 2.91, "grad_norm": 0.7172501683235168, "learning_rate": 1.4263590116425305e-06, "loss": 1.6226, "step": 87371 }, { "epoch": 2.91, "grad_norm": 0.7131529450416565, "learning_rate": 1.4253411730995146e-06, "loss": 1.5999, "step": 87372 }, { "epoch": 2.91, "grad_norm": 0.717715859413147, "learning_rate": 1.4243236969821547e-06, "loss": 1.7187, "step": 87373 }, { "epoch": 2.91, "grad_norm": 0.7042504549026489, "learning_rate": 1.4233065832916502e-06, "loss": 1.7169, "step": 87374 }, { "epoch": 2.91, "grad_norm": 0.6922062635421753, "learning_rate": 1.4222898320292664e-06, "loss": 1.6454, "step": 87375 }, { "epoch": 2.91, "grad_norm": 0.7184600234031677, "learning_rate": 1.421273443196236e-06, "loss": 1.7155, "step": 87376 }, { "epoch": 2.91, "grad_norm": 0.701277494430542, "learning_rate": 1.4202574167937574e-06, "loss": 1.7093, "step": 87377 }, { "epoch": 2.91, "grad_norm": 0.7244959473609924, "learning_rate": 1.419241752823097e-06, "loss": 1.5774, "step": 87378 }, { "epoch": 2.91, "grad_norm": 0.7005719542503357, "learning_rate": 1.4182264512855201e-06, "loss": 1.6929, "step": 87379 }, { "epoch": 2.91, "grad_norm": 0.7144527435302734, "learning_rate": 1.4172115121822258e-06, "loss": 1.6928, "step": 87380 }, { "epoch": 2.91, "grad_norm": 0.7293574213981628, "learning_rate": 1.41619693551438e-06, "loss": 1.6182, "step": 87381 }, { "epoch": 2.91, "grad_norm": 0.7154162526130676, "learning_rate": 1.4151827212833477e-06, "loss": 1.6431, "step": 87382 }, { "epoch": 2.91, "grad_norm": 0.694669246673584, "learning_rate": 1.414168869490262e-06, "loss": 1.633, "step": 87383 }, { "epoch": 2.91, "grad_norm": 0.7302179336547852, "learning_rate": 1.413155380136355e-06, "loss": 1.7045, "step": 87384 }, { "epoch": 2.91, "grad_norm": 0.7054436206817627, "learning_rate": 1.4121422532228921e-06, "loss": 1.6667, "step": 87385 }, { "epoch": 2.91, "grad_norm": 0.7154290080070496, "learning_rate": 1.4111294887511392e-06, "loss": 1.6789, "step": 87386 }, { "epoch": 2.91, "grad_norm": 0.7424696683883667, "learning_rate": 1.410117086722229e-06, "loss": 1.6601, "step": 87387 }, { "epoch": 2.91, "grad_norm": 0.7095430493354797, "learning_rate": 1.4091050471374599e-06, "loss": 1.6482, "step": 87388 }, { "epoch": 2.91, "grad_norm": 1.2223750352859497, "learning_rate": 1.4080933699980645e-06, "loss": 1.6775, "step": 87389 }, { "epoch": 2.91, "grad_norm": 0.7180934548377991, "learning_rate": 1.4070820553052087e-06, "loss": 1.633, "step": 87390 }, { "epoch": 2.91, "grad_norm": 0.7157856822013855, "learning_rate": 1.4060711030601913e-06, "loss": 1.6402, "step": 87391 }, { "epoch": 2.91, "grad_norm": 0.7131544947624207, "learning_rate": 1.4050605132641778e-06, "loss": 1.647, "step": 87392 }, { "epoch": 2.91, "grad_norm": 0.713556706905365, "learning_rate": 1.4040502859184344e-06, "loss": 1.6615, "step": 87393 }, { "epoch": 2.91, "grad_norm": 0.6975935101509094, "learning_rate": 1.403040421024193e-06, "loss": 1.6658, "step": 87394 }, { "epoch": 2.91, "grad_norm": 0.7278270125389099, "learning_rate": 1.4020309185826528e-06, "loss": 1.7503, "step": 87395 }, { "epoch": 2.91, "grad_norm": 0.7025076150894165, "learning_rate": 1.4010217785950462e-06, "loss": 1.6974, "step": 87396 }, { "epoch": 2.91, "grad_norm": 0.7277597784996033, "learning_rate": 1.4000130010626055e-06, "loss": 1.6671, "step": 87397 }, { "epoch": 2.91, "grad_norm": 0.7282235026359558, "learning_rate": 1.3990045859865629e-06, "loss": 1.7006, "step": 87398 }, { "epoch": 2.91, "grad_norm": 0.7324784994125366, "learning_rate": 1.3979965333681176e-06, "loss": 1.6837, "step": 87399 }, { "epoch": 2.91, "grad_norm": 0.692915678024292, "learning_rate": 1.3969888432085353e-06, "loss": 1.6624, "step": 87400 }, { "epoch": 2.91, "grad_norm": 0.7296526432037354, "learning_rate": 1.3959815155089815e-06, "loss": 1.6393, "step": 87401 }, { "epoch": 2.91, "grad_norm": 0.7090296745300293, "learning_rate": 1.394974550270722e-06, "loss": 1.6972, "step": 87402 }, { "epoch": 2.91, "grad_norm": 0.7065009474754333, "learning_rate": 1.393967947494956e-06, "loss": 1.6882, "step": 87403 }, { "epoch": 2.91, "grad_norm": 0.7206660509109497, "learning_rate": 1.3929617071829159e-06, "loss": 1.638, "step": 87404 }, { "epoch": 2.91, "grad_norm": 0.7169309258460999, "learning_rate": 1.3919558293358335e-06, "loss": 1.6614, "step": 87405 }, { "epoch": 2.91, "grad_norm": 0.7037211060523987, "learning_rate": 1.3909503139549083e-06, "loss": 1.6595, "step": 87406 }, { "epoch": 2.91, "grad_norm": 0.7126043438911438, "learning_rate": 1.3899451610413725e-06, "loss": 1.6242, "step": 87407 }, { "epoch": 2.91, "grad_norm": 0.7324702739715576, "learning_rate": 1.3889403705964586e-06, "loss": 1.6657, "step": 87408 }, { "epoch": 2.91, "grad_norm": 0.7270193099975586, "learning_rate": 1.3879359426213654e-06, "loss": 1.7173, "step": 87409 }, { "epoch": 2.91, "grad_norm": 0.7304059267044067, "learning_rate": 1.3869318771173254e-06, "loss": 1.7537, "step": 87410 }, { "epoch": 2.91, "grad_norm": 0.7261778712272644, "learning_rate": 1.3859281740855377e-06, "loss": 1.6737, "step": 87411 }, { "epoch": 2.91, "grad_norm": 0.7048375606536865, "learning_rate": 1.384924833527268e-06, "loss": 1.6383, "step": 87412 }, { "epoch": 2.91, "grad_norm": 0.7017114758491516, "learning_rate": 1.3839218554436483e-06, "loss": 1.6307, "step": 87413 }, { "epoch": 2.91, "grad_norm": 0.7146753668785095, "learning_rate": 1.3829192398359778e-06, "loss": 1.6046, "step": 87414 }, { "epoch": 2.91, "grad_norm": 0.7253085970878601, "learning_rate": 1.3819169867054559e-06, "loss": 1.6041, "step": 87415 }, { "epoch": 2.91, "grad_norm": 0.7283204793930054, "learning_rate": 1.380915096053281e-06, "loss": 1.6967, "step": 87416 }, { "epoch": 2.91, "grad_norm": 0.728898823261261, "learning_rate": 1.3799135678806528e-06, "loss": 1.6663, "step": 87417 }, { "epoch": 2.91, "grad_norm": 0.7299256920814514, "learning_rate": 1.3789124021888364e-06, "loss": 1.6372, "step": 87418 }, { "epoch": 2.91, "grad_norm": 0.7153737545013428, "learning_rate": 1.377911598978998e-06, "loss": 1.7028, "step": 87419 }, { "epoch": 2.91, "grad_norm": 0.7443116903305054, "learning_rate": 1.3769111582523696e-06, "loss": 1.7717, "step": 87420 }, { "epoch": 2.91, "grad_norm": 0.7003663182258606, "learning_rate": 1.3759110800101837e-06, "loss": 1.6852, "step": 87421 }, { "epoch": 2.91, "grad_norm": 0.7075363397598267, "learning_rate": 1.3749113642536725e-06, "loss": 1.7157, "step": 87422 }, { "epoch": 2.91, "grad_norm": 0.7001937627792358, "learning_rate": 1.3739120109839686e-06, "loss": 1.6651, "step": 87423 }, { "epoch": 2.91, "grad_norm": 0.7236202955245972, "learning_rate": 1.3729130202023708e-06, "loss": 1.6979, "step": 87424 }, { "epoch": 2.91, "grad_norm": 0.7016127109527588, "learning_rate": 1.371914391910045e-06, "loss": 1.7194, "step": 87425 }, { "epoch": 2.91, "grad_norm": 0.6963725090026855, "learning_rate": 1.3709161261082236e-06, "loss": 1.6607, "step": 87426 }, { "epoch": 2.91, "grad_norm": 0.7369576096534729, "learning_rate": 1.3699182227980721e-06, "loss": 1.6722, "step": 87427 }, { "epoch": 2.91, "grad_norm": 0.7254681587219238, "learning_rate": 1.3689206819808562e-06, "loss": 1.6598, "step": 87428 }, { "epoch": 2.91, "grad_norm": 0.7379758954048157, "learning_rate": 1.3679235036578084e-06, "loss": 1.6855, "step": 87429 }, { "epoch": 2.91, "grad_norm": 0.6957765221595764, "learning_rate": 1.366926687830061e-06, "loss": 1.7384, "step": 87430 }, { "epoch": 2.91, "grad_norm": 0.6934278607368469, "learning_rate": 1.3659302344988466e-06, "loss": 1.6975, "step": 87431 }, { "epoch": 2.91, "grad_norm": 0.7124670147895813, "learning_rate": 1.3649341436654305e-06, "loss": 1.7337, "step": 87432 }, { "epoch": 2.91, "grad_norm": 0.7144582867622375, "learning_rate": 1.3639384153309784e-06, "loss": 1.6772, "step": 87433 }, { "epoch": 2.91, "grad_norm": 0.7003138661384583, "learning_rate": 1.3629430494966898e-06, "loss": 1.7119, "step": 87434 }, { "epoch": 2.91, "grad_norm": 0.705248236656189, "learning_rate": 1.3619480461637633e-06, "loss": 1.7111, "step": 87435 }, { "epoch": 2.91, "grad_norm": 0.7175677418708801, "learning_rate": 1.3609534053334648e-06, "loss": 1.6893, "step": 87436 }, { "epoch": 2.91, "grad_norm": 0.7124783992767334, "learning_rate": 1.3599591270069598e-06, "loss": 1.7529, "step": 87437 }, { "epoch": 2.91, "grad_norm": 0.7079154253005981, "learning_rate": 1.3589652111854476e-06, "loss": 1.6567, "step": 87438 }, { "epoch": 2.91, "grad_norm": 0.7609965205192566, "learning_rate": 1.3579716578701605e-06, "loss": 1.6669, "step": 87439 }, { "epoch": 2.91, "grad_norm": 0.6949870586395264, "learning_rate": 1.3569784670622974e-06, "loss": 1.5795, "step": 87440 }, { "epoch": 2.91, "grad_norm": 0.7323452234268188, "learning_rate": 1.3559856387630575e-06, "loss": 1.654, "step": 87441 }, { "epoch": 2.91, "grad_norm": 0.7323567867279053, "learning_rate": 1.3549931729736395e-06, "loss": 1.7236, "step": 87442 }, { "epoch": 2.91, "grad_norm": 0.7171201705932617, "learning_rate": 1.3540010696952762e-06, "loss": 1.7065, "step": 87443 }, { "epoch": 2.91, "grad_norm": 0.7241992354393005, "learning_rate": 1.3530093289291333e-06, "loss": 1.6377, "step": 87444 }, { "epoch": 2.91, "grad_norm": 0.7085179686546326, "learning_rate": 1.3520179506764429e-06, "loss": 1.7664, "step": 87445 }, { "epoch": 2.91, "grad_norm": 0.7090603113174438, "learning_rate": 1.3510269349383706e-06, "loss": 1.7177, "step": 87446 }, { "epoch": 2.91, "grad_norm": 0.7299798130989075, "learning_rate": 1.3500362817161824e-06, "loss": 1.6576, "step": 87447 }, { "epoch": 2.91, "grad_norm": 0.6984334588050842, "learning_rate": 1.349045991011044e-06, "loss": 1.6717, "step": 87448 }, { "epoch": 2.91, "grad_norm": 0.7109236717224121, "learning_rate": 1.3480560628241543e-06, "loss": 1.6748, "step": 87449 }, { "epoch": 2.91, "grad_norm": 0.7358440160751343, "learning_rate": 1.3470664971567124e-06, "loss": 1.6793, "step": 87450 }, { "epoch": 2.91, "grad_norm": 0.7284649610519409, "learning_rate": 1.3460772940099174e-06, "loss": 1.7167, "step": 87451 }, { "epoch": 2.91, "grad_norm": 0.7350218296051025, "learning_rate": 1.3450884533850014e-06, "loss": 1.698, "step": 87452 }, { "epoch": 2.91, "grad_norm": 0.7153809666633606, "learning_rate": 1.3440999752831305e-06, "loss": 1.6705, "step": 87453 }, { "epoch": 2.91, "grad_norm": 0.7643969655036926, "learning_rate": 1.3431118597055368e-06, "loss": 1.7111, "step": 87454 }, { "epoch": 2.91, "grad_norm": 0.7018419504165649, "learning_rate": 1.3421241066533862e-06, "loss": 1.7395, "step": 87455 }, { "epoch": 2.91, "grad_norm": 0.7252427935600281, "learning_rate": 1.3411367161278774e-06, "loss": 1.7675, "step": 87456 }, { "epoch": 2.91, "grad_norm": 0.7131103277206421, "learning_rate": 1.340149688130243e-06, "loss": 1.6878, "step": 87457 }, { "epoch": 2.91, "grad_norm": 0.7116093635559082, "learning_rate": 1.339163022661649e-06, "loss": 1.6974, "step": 87458 }, { "epoch": 2.91, "grad_norm": 0.716898500919342, "learning_rate": 1.3381767197233273e-06, "loss": 1.7315, "step": 87459 }, { "epoch": 2.91, "grad_norm": 0.722127377986908, "learning_rate": 1.3371907793164439e-06, "loss": 1.7263, "step": 87460 }, { "epoch": 2.91, "grad_norm": 0.7321711182594299, "learning_rate": 1.3362052014421975e-06, "loss": 1.6522, "step": 87461 }, { "epoch": 2.91, "grad_norm": 0.7199527621269226, "learning_rate": 1.3352199861017875e-06, "loss": 1.7073, "step": 87462 }, { "epoch": 2.91, "grad_norm": 0.7310931086540222, "learning_rate": 1.334235133296413e-06, "loss": 1.6699, "step": 87463 }, { "epoch": 2.91, "grad_norm": 0.6959978938102722, "learning_rate": 1.333250643027306e-06, "loss": 1.6768, "step": 87464 }, { "epoch": 2.91, "grad_norm": 0.7334083318710327, "learning_rate": 1.332266515295566e-06, "loss": 1.7027, "step": 87465 }, { "epoch": 2.91, "grad_norm": 0.7039456367492676, "learning_rate": 1.3312827501024914e-06, "loss": 1.6705, "step": 87466 }, { "epoch": 2.91, "grad_norm": 0.6957787275314331, "learning_rate": 1.330299347449215e-06, "loss": 1.6654, "step": 87467 }, { "epoch": 2.91, "grad_norm": 0.738332211971283, "learning_rate": 1.3293163073369695e-06, "loss": 1.7385, "step": 87468 }, { "epoch": 2.91, "grad_norm": 0.7431467175483704, "learning_rate": 1.3283336297669201e-06, "loss": 1.6777, "step": 87469 }, { "epoch": 2.91, "grad_norm": 0.727479875087738, "learning_rate": 1.327351314740266e-06, "loss": 1.7075, "step": 87470 }, { "epoch": 2.91, "grad_norm": 0.7466192245483398, "learning_rate": 1.3263693622581728e-06, "loss": 1.6869, "step": 87471 }, { "epoch": 2.91, "grad_norm": 0.7727543115615845, "learning_rate": 1.3253877723219064e-06, "loss": 1.72, "step": 87472 }, { "epoch": 2.91, "grad_norm": 0.7004233598709106, "learning_rate": 1.324406544932566e-06, "loss": 1.6212, "step": 87473 }, { "epoch": 2.91, "grad_norm": 0.685073971748352, "learning_rate": 1.3234256800914168e-06, "loss": 1.598, "step": 87474 }, { "epoch": 2.91, "grad_norm": 0.7161511182785034, "learning_rate": 1.3224451777995914e-06, "loss": 1.7265, "step": 87475 }, { "epoch": 2.91, "grad_norm": 0.7090803980827332, "learning_rate": 1.321465038058356e-06, "loss": 1.6559, "step": 87476 }, { "epoch": 2.91, "grad_norm": 0.7058828473091125, "learning_rate": 1.3204852608688089e-06, "loss": 1.6934, "step": 87477 }, { "epoch": 2.91, "grad_norm": 0.7139720320701599, "learning_rate": 1.319505846232216e-06, "loss": 1.632, "step": 87478 }, { "epoch": 2.91, "grad_norm": 0.7414632439613342, "learning_rate": 1.3185267941497103e-06, "loss": 1.6726, "step": 87479 }, { "epoch": 2.91, "grad_norm": 0.7189115881919861, "learning_rate": 1.3175481046225234e-06, "loss": 1.6624, "step": 87480 }, { "epoch": 2.91, "grad_norm": 0.7191892266273499, "learning_rate": 1.3165697776518213e-06, "loss": 1.6509, "step": 87481 }, { "epoch": 2.91, "grad_norm": 0.7086495757102966, "learning_rate": 1.3155918132388032e-06, "loss": 1.6886, "step": 87482 }, { "epoch": 2.91, "grad_norm": 0.7198225259780884, "learning_rate": 1.3146142113846015e-06, "loss": 1.7264, "step": 87483 }, { "epoch": 2.91, "grad_norm": 0.7005159854888916, "learning_rate": 1.3136369720904816e-06, "loss": 1.6639, "step": 87484 }, { "epoch": 2.91, "grad_norm": 0.7072810530662537, "learning_rate": 1.3126600953576094e-06, "loss": 1.6385, "step": 87485 }, { "epoch": 2.91, "grad_norm": 0.721617579460144, "learning_rate": 1.3116835811871506e-06, "loss": 1.6601, "step": 87486 }, { "epoch": 2.91, "grad_norm": 0.7196215391159058, "learning_rate": 1.3107074295803043e-06, "loss": 1.639, "step": 87487 }, { "epoch": 2.91, "grad_norm": 0.7390624284744263, "learning_rate": 1.309731640538203e-06, "loss": 1.6756, "step": 87488 }, { "epoch": 2.91, "grad_norm": 0.7083775401115417, "learning_rate": 1.3087562140621454e-06, "loss": 1.6929, "step": 87489 }, { "epoch": 2.91, "grad_norm": 0.6957364082336426, "learning_rate": 1.3077811501531976e-06, "loss": 1.6504, "step": 87490 }, { "epoch": 2.91, "grad_norm": 0.7136264443397522, "learning_rate": 1.306806448812625e-06, "loss": 1.599, "step": 87491 }, { "epoch": 2.91, "grad_norm": 0.7178378701210022, "learning_rate": 1.3058321100415604e-06, "loss": 1.6836, "step": 87492 }, { "epoch": 2.91, "grad_norm": 0.6999669075012207, "learning_rate": 1.3048581338412022e-06, "loss": 1.6992, "step": 87493 }, { "epoch": 2.91, "grad_norm": 0.7077808380126953, "learning_rate": 1.3038845202127502e-06, "loss": 1.6576, "step": 87494 }, { "epoch": 2.91, "grad_norm": 0.6963715553283691, "learning_rate": 1.3029112691573696e-06, "loss": 1.6373, "step": 87495 }, { "epoch": 2.91, "grad_norm": 0.7196106314659119, "learning_rate": 1.3019383806762263e-06, "loss": 1.6665, "step": 87496 }, { "epoch": 2.91, "grad_norm": 0.7102732062339783, "learning_rate": 1.3009658547705526e-06, "loss": 1.6645, "step": 87497 }, { "epoch": 2.91, "grad_norm": 0.7360972762107849, "learning_rate": 1.2999936914414477e-06, "loss": 1.6676, "step": 87498 }, { "epoch": 2.91, "grad_norm": 0.7029061317443848, "learning_rate": 1.2990218906901772e-06, "loss": 1.6775, "step": 87499 }, { "epoch": 2.91, "grad_norm": 0.7215591073036194, "learning_rate": 1.2980504525178736e-06, "loss": 1.6762, "step": 87500 }, { "epoch": 2.91, "grad_norm": 0.7119336724281311, "learning_rate": 1.2970793769257026e-06, "loss": 1.6436, "step": 87501 }, { "epoch": 2.91, "grad_norm": 0.7161189317703247, "learning_rate": 1.2961086639148965e-06, "loss": 1.7008, "step": 87502 }, { "epoch": 2.91, "grad_norm": 0.6845943927764893, "learning_rate": 1.2951383134865879e-06, "loss": 1.6609, "step": 87503 }, { "epoch": 2.91, "grad_norm": 0.7178553342819214, "learning_rate": 1.2941683256419754e-06, "loss": 1.6446, "step": 87504 }, { "epoch": 2.91, "grad_norm": 0.723655641078949, "learning_rate": 1.2931987003822252e-06, "loss": 1.6703, "step": 87505 }, { "epoch": 2.91, "grad_norm": 0.7509146332740784, "learning_rate": 1.2922294377085363e-06, "loss": 1.7346, "step": 87506 }, { "epoch": 2.91, "grad_norm": 0.7239576578140259, "learning_rate": 1.2912605376220408e-06, "loss": 1.6628, "step": 87507 }, { "epoch": 2.91, "grad_norm": 0.7049323320388794, "learning_rate": 1.2902920001239715e-06, "loss": 1.6549, "step": 87508 }, { "epoch": 2.91, "grad_norm": 0.7129767537117004, "learning_rate": 1.2893238252154602e-06, "loss": 1.7318, "step": 87509 }, { "epoch": 2.91, "grad_norm": 0.7139130234718323, "learning_rate": 1.2883560128976734e-06, "loss": 1.7144, "step": 87510 }, { "epoch": 2.91, "grad_norm": 1.398058295249939, "learning_rate": 1.2873885631718427e-06, "loss": 1.6863, "step": 87511 }, { "epoch": 2.91, "grad_norm": 0.6998907923698425, "learning_rate": 1.2864214760391012e-06, "loss": 1.7111, "step": 87512 }, { "epoch": 2.91, "grad_norm": 0.7305141091346741, "learning_rate": 1.2854547515006141e-06, "loss": 1.661, "step": 87513 }, { "epoch": 2.91, "grad_norm": 0.7243033051490784, "learning_rate": 1.2844883895575809e-06, "loss": 1.6757, "step": 87514 }, { "epoch": 2.91, "grad_norm": 0.7307886481285095, "learning_rate": 1.2835223902111668e-06, "loss": 1.6889, "step": 87515 }, { "epoch": 2.91, "grad_norm": 0.7285243272781372, "learning_rate": 1.2825567534625714e-06, "loss": 1.7381, "step": 87516 }, { "epoch": 2.91, "grad_norm": 0.746335506439209, "learning_rate": 1.2815914793128934e-06, "loss": 1.6641, "step": 87517 }, { "epoch": 2.91, "grad_norm": 0.7361961007118225, "learning_rate": 1.2806265677633653e-06, "loss": 1.701, "step": 87518 }, { "epoch": 2.91, "grad_norm": 0.72506183385849, "learning_rate": 1.279662018815153e-06, "loss": 1.6835, "step": 87519 }, { "epoch": 2.91, "grad_norm": 0.7030667662620544, "learning_rate": 1.2786978324693887e-06, "loss": 1.698, "step": 87520 }, { "epoch": 2.91, "grad_norm": 0.7124332785606384, "learning_rate": 1.2777340087273046e-06, "loss": 1.7718, "step": 87521 }, { "epoch": 2.91, "grad_norm": 0.7515363097190857, "learning_rate": 1.2767705475900004e-06, "loss": 1.6662, "step": 87522 }, { "epoch": 2.91, "grad_norm": 0.7165995240211487, "learning_rate": 1.2758074490587078e-06, "loss": 1.7063, "step": 87523 }, { "epoch": 2.91, "grad_norm": 0.7223864197731018, "learning_rate": 1.2748447131345596e-06, "loss": 1.6546, "step": 87524 }, { "epoch": 2.91, "grad_norm": 0.6811335682868958, "learning_rate": 1.2738823398187547e-06, "loss": 1.656, "step": 87525 }, { "epoch": 2.91, "grad_norm": 0.7036352753639221, "learning_rate": 1.2729203291124257e-06, "loss": 1.7433, "step": 87526 }, { "epoch": 2.91, "grad_norm": 0.710835874080658, "learning_rate": 1.2719586810167715e-06, "loss": 1.6987, "step": 87527 }, { "epoch": 2.91, "grad_norm": 0.7034050822257996, "learning_rate": 1.2709973955329245e-06, "loss": 1.664, "step": 87528 }, { "epoch": 2.91, "grad_norm": 0.7268102765083313, "learning_rate": 1.2700364726620838e-06, "loss": 1.7809, "step": 87529 }, { "epoch": 2.91, "grad_norm": 0.6930689811706543, "learning_rate": 1.2690759124053818e-06, "loss": 1.6965, "step": 87530 }, { "epoch": 2.91, "grad_norm": 0.6847373843193054, "learning_rate": 1.2681157147640176e-06, "loss": 1.5968, "step": 87531 }, { "epoch": 2.91, "grad_norm": 0.7153847813606262, "learning_rate": 1.2671558797391568e-06, "loss": 1.6467, "step": 87532 }, { "epoch": 2.91, "grad_norm": 0.7208176851272583, "learning_rate": 1.266196407331932e-06, "loss": 1.8167, "step": 87533 }, { "epoch": 2.91, "grad_norm": 0.6964519023895264, "learning_rate": 1.2652372975435422e-06, "loss": 1.7022, "step": 87534 }, { "epoch": 2.91, "grad_norm": 0.6920991539955139, "learning_rate": 1.2642785503751528e-06, "loss": 1.6697, "step": 87535 }, { "epoch": 2.91, "grad_norm": 0.7068705558776855, "learning_rate": 1.2633201658278968e-06, "loss": 1.6438, "step": 87536 }, { "epoch": 2.91, "grad_norm": 0.6918234825134277, "learning_rate": 1.2623621439029396e-06, "loss": 1.6512, "step": 87537 }, { "epoch": 2.91, "grad_norm": 0.7253605127334595, "learning_rate": 1.261404484601447e-06, "loss": 1.6052, "step": 87538 }, { "epoch": 2.91, "grad_norm": 0.7019437551498413, "learning_rate": 1.2604471879246514e-06, "loss": 1.6487, "step": 87539 }, { "epoch": 2.91, "grad_norm": 0.7098146080970764, "learning_rate": 1.2594902538736184e-06, "loss": 1.674, "step": 87540 }, { "epoch": 2.91, "grad_norm": 0.7052416801452637, "learning_rate": 1.258533682449514e-06, "loss": 1.6251, "step": 87541 }, { "epoch": 2.91, "grad_norm": 0.6968437433242798, "learning_rate": 1.2575774736535704e-06, "loss": 1.695, "step": 87542 }, { "epoch": 2.91, "grad_norm": 0.7205172181129456, "learning_rate": 1.25662162748692e-06, "loss": 1.6714, "step": 87543 }, { "epoch": 2.91, "grad_norm": 0.7340284585952759, "learning_rate": 1.2556661439506954e-06, "loss": 1.6589, "step": 87544 }, { "epoch": 2.91, "grad_norm": 0.7196042537689209, "learning_rate": 1.2547110230460622e-06, "loss": 1.6293, "step": 87545 }, { "epoch": 2.91, "grad_norm": 0.7391659021377563, "learning_rate": 1.2537562647742194e-06, "loss": 1.6652, "step": 87546 }, { "epoch": 2.91, "grad_norm": 0.7380684614181519, "learning_rate": 1.2528018691362995e-06, "loss": 1.7072, "step": 87547 }, { "epoch": 2.91, "grad_norm": 0.7200605869293213, "learning_rate": 1.251847836133435e-06, "loss": 1.7377, "step": 87548 }, { "epoch": 2.91, "grad_norm": 0.707874596118927, "learning_rate": 1.2508941657668247e-06, "loss": 1.6153, "step": 87549 }, { "epoch": 2.91, "grad_norm": 0.7117130160331726, "learning_rate": 1.2499408580376014e-06, "loss": 1.685, "step": 87550 }, { "epoch": 2.91, "grad_norm": 0.7271466255187988, "learning_rate": 1.2489879129469305e-06, "loss": 1.6176, "step": 87551 }, { "epoch": 2.91, "grad_norm": 0.7282879948616028, "learning_rate": 1.2480353304959778e-06, "loss": 1.646, "step": 87552 }, { "epoch": 2.91, "grad_norm": 0.6942479014396667, "learning_rate": 1.247083110685909e-06, "loss": 1.6124, "step": 87553 }, { "epoch": 2.91, "grad_norm": 0.7320467233657837, "learning_rate": 1.2461312535178236e-06, "loss": 1.7242, "step": 87554 }, { "epoch": 2.91, "grad_norm": 0.728463351726532, "learning_rate": 1.2451797589929202e-06, "loss": 1.7415, "step": 87555 }, { "epoch": 2.91, "grad_norm": 0.7151385545730591, "learning_rate": 1.2442286271123647e-06, "loss": 1.633, "step": 87556 }, { "epoch": 2.91, "grad_norm": 0.733113706111908, "learning_rate": 1.2432778578772894e-06, "loss": 1.6876, "step": 87557 }, { "epoch": 2.91, "grad_norm": 0.7308529019355774, "learning_rate": 1.2423274512888604e-06, "loss": 1.7699, "step": 87558 }, { "epoch": 2.91, "grad_norm": 0.725845217704773, "learning_rate": 1.241377407348243e-06, "loss": 1.6634, "step": 87559 }, { "epoch": 2.91, "grad_norm": 0.713609516620636, "learning_rate": 1.2404277260565365e-06, "loss": 1.6812, "step": 87560 }, { "epoch": 2.91, "grad_norm": 0.7033336758613586, "learning_rate": 1.23947840741494e-06, "loss": 1.6388, "step": 87561 }, { "epoch": 2.91, "grad_norm": 0.7250018119812012, "learning_rate": 1.2385294514246192e-06, "loss": 1.6509, "step": 87562 }, { "epoch": 2.91, "grad_norm": 0.7070820331573486, "learning_rate": 1.2375808580866731e-06, "loss": 1.6475, "step": 87563 }, { "epoch": 2.91, "grad_norm": 0.7082956433296204, "learning_rate": 1.2366326274023341e-06, "loss": 1.6429, "step": 87564 }, { "epoch": 2.91, "grad_norm": 0.6985484957695007, "learning_rate": 1.235684759372635e-06, "loss": 1.6872, "step": 87565 }, { "epoch": 2.91, "grad_norm": 0.7072573304176331, "learning_rate": 1.2347372539988409e-06, "loss": 1.618, "step": 87566 }, { "epoch": 2.91, "grad_norm": 0.7135812640190125, "learning_rate": 1.2337901112820513e-06, "loss": 1.731, "step": 87567 }, { "epoch": 2.91, "grad_norm": 0.7271652817726135, "learning_rate": 1.2328433312233988e-06, "loss": 1.7131, "step": 87568 }, { "epoch": 2.91, "grad_norm": 0.7046600580215454, "learning_rate": 1.2318969138240819e-06, "loss": 1.6653, "step": 87569 }, { "epoch": 2.91, "grad_norm": 0.7311784029006958, "learning_rate": 1.230950859085167e-06, "loss": 1.6591, "step": 87570 }, { "epoch": 2.91, "grad_norm": 0.7202016711235046, "learning_rate": 1.2300051670079192e-06, "loss": 1.7018, "step": 87571 }, { "epoch": 2.91, "grad_norm": 0.7219116687774658, "learning_rate": 1.2290598375933713e-06, "loss": 1.7211, "step": 87572 }, { "epoch": 2.91, "grad_norm": 0.7162120342254639, "learning_rate": 1.2281148708427557e-06, "loss": 1.6549, "step": 87573 }, { "epoch": 2.91, "grad_norm": 0.716957151889801, "learning_rate": 1.2271702667571714e-06, "loss": 1.6371, "step": 87574 }, { "epoch": 2.91, "grad_norm": 0.7210601568222046, "learning_rate": 1.2262260253377843e-06, "loss": 1.686, "step": 87575 }, { "epoch": 2.91, "grad_norm": 0.7189896702766418, "learning_rate": 1.2252821465857599e-06, "loss": 1.5891, "step": 87576 }, { "epoch": 2.91, "grad_norm": 0.6998282670974731, "learning_rate": 1.2243386305021974e-06, "loss": 1.6861, "step": 87577 }, { "epoch": 2.91, "grad_norm": 0.7003896832466125, "learning_rate": 1.2233954770882625e-06, "loss": 1.7135, "step": 87578 }, { "epoch": 2.91, "grad_norm": 0.7295739054679871, "learning_rate": 1.222452686345121e-06, "loss": 1.7682, "step": 87579 }, { "epoch": 2.91, "grad_norm": 0.7196715474128723, "learning_rate": 1.2215102582738722e-06, "loss": 1.7148, "step": 87580 }, { "epoch": 2.91, "grad_norm": 0.7109767198562622, "learning_rate": 1.2205681928756815e-06, "loss": 1.622, "step": 87581 }, { "epoch": 2.91, "grad_norm": 0.7283567786216736, "learning_rate": 1.219626490151715e-06, "loss": 1.7185, "step": 87582 }, { "epoch": 2.91, "grad_norm": 0.7179147005081177, "learning_rate": 1.2186851501031047e-06, "loss": 1.6809, "step": 87583 }, { "epoch": 2.91, "grad_norm": 0.7132318615913391, "learning_rate": 1.2177441727309834e-06, "loss": 1.6276, "step": 87584 }, { "epoch": 2.91, "grad_norm": 0.7167736291885376, "learning_rate": 1.2168035580365166e-06, "loss": 1.7249, "step": 87585 }, { "epoch": 2.91, "grad_norm": 0.7125964164733887, "learning_rate": 1.2158633060208033e-06, "loss": 1.7033, "step": 87586 }, { "epoch": 2.91, "grad_norm": 0.6921564340591431, "learning_rate": 1.2149234166850098e-06, "loss": 1.6974, "step": 87587 }, { "epoch": 2.91, "grad_norm": 0.7291275262832642, "learning_rate": 1.2139838900302678e-06, "loss": 1.7096, "step": 87588 }, { "epoch": 2.91, "grad_norm": 0.7141581773757935, "learning_rate": 1.2130447260577436e-06, "loss": 1.6499, "step": 87589 }, { "epoch": 2.91, "grad_norm": 0.7168323993682861, "learning_rate": 1.212105924768536e-06, "loss": 1.7367, "step": 87590 }, { "epoch": 2.91, "grad_norm": 0.7450543642044067, "learning_rate": 1.211167486163811e-06, "loss": 1.6529, "step": 87591 }, { "epoch": 2.91, "grad_norm": 0.7128297686576843, "learning_rate": 1.2102294102447341e-06, "loss": 1.6498, "step": 87592 }, { "epoch": 2.91, "grad_norm": 0.7207175493240356, "learning_rate": 1.2092916970124044e-06, "loss": 1.7009, "step": 87593 }, { "epoch": 2.91, "grad_norm": 0.7180522084236145, "learning_rate": 1.2083543464679545e-06, "loss": 1.6422, "step": 87594 }, { "epoch": 2.91, "grad_norm": 0.7377081513404846, "learning_rate": 1.2074173586125502e-06, "loss": 1.7189, "step": 87595 }, { "epoch": 2.91, "grad_norm": 0.7200934290885925, "learning_rate": 1.2064807334473237e-06, "loss": 1.654, "step": 87596 }, { "epoch": 2.91, "grad_norm": 0.7122731804847717, "learning_rate": 1.2055444709733742e-06, "loss": 1.6259, "step": 87597 }, { "epoch": 2.91, "grad_norm": 0.7107717990875244, "learning_rate": 1.2046085711918673e-06, "loss": 1.665, "step": 87598 }, { "epoch": 2.91, "grad_norm": 0.7343170642852783, "learning_rate": 1.203673034103969e-06, "loss": 1.7482, "step": 87599 }, { "epoch": 2.91, "grad_norm": 0.7221397757530212, "learning_rate": 1.2027378597107783e-06, "loss": 1.6568, "step": 87600 }, { "epoch": 2.91, "grad_norm": 0.7186150550842285, "learning_rate": 1.2018030480134277e-06, "loss": 1.666, "step": 87601 }, { "epoch": 2.91, "grad_norm": 0.7214311957359314, "learning_rate": 1.2008685990130828e-06, "loss": 1.6968, "step": 87602 }, { "epoch": 2.91, "grad_norm": 0.7136741876602173, "learning_rate": 1.199934512710843e-06, "loss": 1.6733, "step": 87603 }, { "epoch": 2.91, "grad_norm": 0.7079361081123352, "learning_rate": 1.19900078910784e-06, "loss": 1.653, "step": 87604 }, { "epoch": 2.91, "grad_norm": 0.7272855639457703, "learning_rate": 1.1980674282052406e-06, "loss": 1.7676, "step": 87605 }, { "epoch": 2.91, "grad_norm": 0.7150033712387085, "learning_rate": 1.1971344300041764e-06, "loss": 1.6597, "step": 87606 }, { "epoch": 2.91, "grad_norm": 0.7178701162338257, "learning_rate": 1.1962017945057467e-06, "loss": 1.6146, "step": 87607 }, { "epoch": 2.91, "grad_norm": 0.7040074467658997, "learning_rate": 1.195269521711084e-06, "loss": 1.6876, "step": 87608 }, { "epoch": 2.91, "grad_norm": 0.7183012366294861, "learning_rate": 1.1943376116213543e-06, "loss": 1.7196, "step": 87609 }, { "epoch": 2.91, "grad_norm": 0.7193953990936279, "learning_rate": 1.1934060642376898e-06, "loss": 1.6369, "step": 87610 }, { "epoch": 2.91, "grad_norm": 0.7111978530883789, "learning_rate": 1.1924748795611895e-06, "loss": 1.6959, "step": 87611 }, { "epoch": 2.91, "grad_norm": 0.7002620697021484, "learning_rate": 1.1915440575929857e-06, "loss": 1.7041, "step": 87612 }, { "epoch": 2.91, "grad_norm": 0.7202231884002686, "learning_rate": 1.1906135983342446e-06, "loss": 1.7391, "step": 87613 }, { "epoch": 2.91, "grad_norm": 0.7116766571998596, "learning_rate": 1.189683501786065e-06, "loss": 1.6923, "step": 87614 }, { "epoch": 2.91, "grad_norm": 0.7313278913497925, "learning_rate": 1.1887537679495462e-06, "loss": 1.6852, "step": 87615 }, { "epoch": 2.91, "grad_norm": 0.6949100494384766, "learning_rate": 1.187824396825887e-06, "loss": 1.6358, "step": 87616 }, { "epoch": 2.92, "grad_norm": 0.6953855156898499, "learning_rate": 1.186895388416187e-06, "loss": 1.6195, "step": 87617 }, { "epoch": 2.92, "grad_norm": 0.6968443393707275, "learning_rate": 1.1859667427215446e-06, "loss": 1.679, "step": 87618 }, { "epoch": 2.92, "grad_norm": 0.7056388854980469, "learning_rate": 1.185038459743126e-06, "loss": 1.7195, "step": 87619 }, { "epoch": 2.92, "grad_norm": 0.698308527469635, "learning_rate": 1.1841105394820306e-06, "loss": 1.5966, "step": 87620 }, { "epoch": 2.92, "grad_norm": 0.7012568712234497, "learning_rate": 1.1831829819394234e-06, "loss": 1.6474, "step": 87621 }, { "epoch": 2.92, "grad_norm": 0.7148640155792236, "learning_rate": 1.1822557871163708e-06, "loss": 1.6537, "step": 87622 }, { "epoch": 2.92, "grad_norm": 0.7429904937744141, "learning_rate": 1.1813289550140382e-06, "loss": 1.7329, "step": 87623 }, { "epoch": 2.92, "grad_norm": 0.7348300814628601, "learning_rate": 1.1804024856335582e-06, "loss": 1.6463, "step": 87624 }, { "epoch": 2.92, "grad_norm": 0.7365766763687134, "learning_rate": 1.1794763789760297e-06, "loss": 1.6784, "step": 87625 }, { "epoch": 2.92, "grad_norm": 0.7210452556610107, "learning_rate": 1.1785506350425854e-06, "loss": 1.6089, "step": 87626 }, { "epoch": 2.92, "grad_norm": 0.7074697613716125, "learning_rate": 1.1776252538343577e-06, "loss": 1.6598, "step": 87627 }, { "epoch": 2.92, "grad_norm": 0.7185583114624023, "learning_rate": 1.1767002353524458e-06, "loss": 1.6549, "step": 87628 }, { "epoch": 2.92, "grad_norm": 0.711484432220459, "learning_rate": 1.175775579598015e-06, "loss": 1.6902, "step": 87629 }, { "epoch": 2.92, "grad_norm": 0.7384675741195679, "learning_rate": 1.174851286572165e-06, "loss": 1.6307, "step": 87630 }, { "epoch": 2.92, "grad_norm": 0.6981161832809448, "learning_rate": 1.1739273562759943e-06, "loss": 1.6673, "step": 87631 }, { "epoch": 2.92, "grad_norm": 0.7153753042221069, "learning_rate": 1.1730037887106691e-06, "loss": 1.6252, "step": 87632 }, { "epoch": 2.92, "grad_norm": 0.7406242489814758, "learning_rate": 1.172080583877255e-06, "loss": 1.7335, "step": 87633 }, { "epoch": 2.92, "grad_norm": 0.7203413844108582, "learning_rate": 1.171157741776918e-06, "loss": 1.5753, "step": 87634 }, { "epoch": 2.92, "grad_norm": 0.7055490016937256, "learning_rate": 1.1702352624107903e-06, "loss": 1.7239, "step": 87635 }, { "epoch": 2.92, "grad_norm": 0.7352749705314636, "learning_rate": 1.1693131457799377e-06, "loss": 1.7492, "step": 87636 }, { "epoch": 2.92, "grad_norm": 0.7078011631965637, "learning_rate": 1.1683913918855258e-06, "loss": 1.6278, "step": 87637 }, { "epoch": 2.92, "grad_norm": 0.7071688771247864, "learning_rate": 1.1674700007286542e-06, "loss": 1.6591, "step": 87638 }, { "epoch": 2.92, "grad_norm": 0.7017045617103577, "learning_rate": 1.1665489723104216e-06, "loss": 1.6395, "step": 87639 }, { "epoch": 2.92, "grad_norm": 0.7017437815666199, "learning_rate": 1.1656283066319938e-06, "loss": 1.6402, "step": 87640 }, { "epoch": 2.92, "grad_norm": 0.7482925653457642, "learning_rate": 1.1647080036944367e-06, "loss": 1.5276, "step": 87641 }, { "epoch": 2.92, "grad_norm": 0.6991694569587708, "learning_rate": 1.1637880634989162e-06, "loss": 1.7005, "step": 87642 }, { "epoch": 2.92, "grad_norm": 0.7108073830604553, "learning_rate": 1.1628684860465309e-06, "loss": 1.7585, "step": 87643 }, { "epoch": 2.92, "grad_norm": 0.7332447171211243, "learning_rate": 1.161949271338347e-06, "loss": 1.6412, "step": 87644 }, { "epoch": 2.92, "grad_norm": 0.7445539236068726, "learning_rate": 1.1610304193755637e-06, "loss": 1.6869, "step": 87645 }, { "epoch": 2.92, "grad_norm": 0.6959365010261536, "learning_rate": 1.1601119301592466e-06, "loss": 1.6524, "step": 87646 }, { "epoch": 2.92, "grad_norm": 0.7099642753601074, "learning_rate": 1.1591938036905279e-06, "loss": 1.6925, "step": 87647 }, { "epoch": 2.92, "grad_norm": 0.7015548348426819, "learning_rate": 1.158276039970507e-06, "loss": 1.6628, "step": 87648 }, { "epoch": 2.92, "grad_norm": 0.7139059901237488, "learning_rate": 1.1573586390003165e-06, "loss": 1.7353, "step": 87649 }, { "epoch": 2.92, "grad_norm": 0.7091451287269592, "learning_rate": 1.156441600781055e-06, "loss": 1.7811, "step": 87650 }, { "epoch": 2.92, "grad_norm": 2.1442747116088867, "learning_rate": 1.1555249253138222e-06, "loss": 1.6874, "step": 87651 }, { "epoch": 2.92, "grad_norm": 0.717266857624054, "learning_rate": 1.1546086125997832e-06, "loss": 1.6999, "step": 87652 }, { "epoch": 2.92, "grad_norm": 0.7120277881622314, "learning_rate": 1.1536926626400045e-06, "loss": 1.672, "step": 87653 }, { "epoch": 2.92, "grad_norm": 0.7172760963439941, "learning_rate": 1.152777075435618e-06, "loss": 1.6796, "step": 87654 }, { "epoch": 2.92, "grad_norm": 0.6930083632469177, "learning_rate": 1.1518618509876898e-06, "loss": 1.6588, "step": 87655 }, { "epoch": 2.92, "grad_norm": 0.7237930297851562, "learning_rate": 1.1509469892974188e-06, "loss": 1.6847, "step": 87656 }, { "epoch": 2.92, "grad_norm": 0.694406270980835, "learning_rate": 1.1500324903658375e-06, "loss": 1.6197, "step": 87657 }, { "epoch": 2.92, "grad_norm": 0.7219744920730591, "learning_rate": 1.1491183541940786e-06, "loss": 1.7181, "step": 87658 }, { "epoch": 2.92, "grad_norm": 0.6963517665863037, "learning_rate": 1.148204580783274e-06, "loss": 1.6957, "step": 87659 }, { "epoch": 2.92, "grad_norm": 0.7349977493286133, "learning_rate": 1.14729117013449e-06, "loss": 1.6345, "step": 87660 }, { "epoch": 2.92, "grad_norm": 0.7137062549591064, "learning_rate": 1.1463781222488922e-06, "loss": 1.7124, "step": 87661 }, { "epoch": 2.92, "grad_norm": 0.712183952331543, "learning_rate": 1.145465437127513e-06, "loss": 1.6583, "step": 87662 }, { "epoch": 2.92, "grad_norm": 0.7158782482147217, "learning_rate": 1.1445531147715515e-06, "loss": 1.7269, "step": 87663 }, { "epoch": 2.92, "grad_norm": 0.691252589225769, "learning_rate": 1.1436411551820401e-06, "loss": 1.6992, "step": 87664 }, { "epoch": 2.92, "grad_norm": 0.7203620076179504, "learning_rate": 1.1427295583601448e-06, "loss": 1.7198, "step": 87665 }, { "epoch": 2.92, "grad_norm": 0.7137953639030457, "learning_rate": 1.1418183243068978e-06, "loss": 1.6695, "step": 87666 }, { "epoch": 2.92, "grad_norm": 0.707961916923523, "learning_rate": 1.1409074530234986e-06, "loss": 1.6626, "step": 87667 }, { "epoch": 2.92, "grad_norm": 0.7504483461380005, "learning_rate": 1.1399969445109458e-06, "loss": 1.6339, "step": 87668 }, { "epoch": 2.92, "grad_norm": 0.7049588561058044, "learning_rate": 1.139086798770439e-06, "loss": 1.6618, "step": 87669 }, { "epoch": 2.92, "grad_norm": 0.6878817677497864, "learning_rate": 1.1381770158030435e-06, "loss": 1.7029, "step": 87670 }, { "epoch": 2.92, "grad_norm": 0.7078022360801697, "learning_rate": 1.1372675956098586e-06, "loss": 1.6787, "step": 87671 }, { "epoch": 2.92, "grad_norm": 0.6999809145927429, "learning_rate": 1.1363585381919838e-06, "loss": 1.632, "step": 87672 }, { "epoch": 2.92, "grad_norm": 0.7028233408927917, "learning_rate": 1.1354498435505511e-06, "loss": 1.5915, "step": 87673 }, { "epoch": 2.92, "grad_norm": 0.7375526428222656, "learning_rate": 1.1345415116866596e-06, "loss": 1.6084, "step": 87674 }, { "epoch": 2.92, "grad_norm": 0.7064631581306458, "learning_rate": 1.1336335426013753e-06, "loss": 1.6938, "step": 87675 }, { "epoch": 2.92, "grad_norm": 0.7297243475914001, "learning_rate": 1.1327259362958307e-06, "loss": 1.635, "step": 87676 }, { "epoch": 2.92, "grad_norm": 0.7438315153121948, "learning_rate": 1.1318186927711248e-06, "loss": 1.6534, "step": 87677 }, { "epoch": 2.92, "grad_norm": 0.7540494799613953, "learning_rate": 1.1309118120283567e-06, "loss": 1.6789, "step": 87678 }, { "epoch": 2.92, "grad_norm": 0.7092021107673645, "learning_rate": 1.1300052940685922e-06, "loss": 1.5815, "step": 87679 }, { "epoch": 2.92, "grad_norm": 0.7226136326789856, "learning_rate": 1.1290991388929972e-06, "loss": 1.6515, "step": 87680 }, { "epoch": 2.92, "grad_norm": 0.7397915124893188, "learning_rate": 1.1281933465026705e-06, "loss": 1.7187, "step": 87681 }, { "epoch": 2.92, "grad_norm": 0.7353094816207886, "learning_rate": 1.1272879168986115e-06, "loss": 1.7239, "step": 87682 }, { "epoch": 2.92, "grad_norm": 0.7304105162620544, "learning_rate": 1.1263828500820527e-06, "loss": 1.6719, "step": 87683 }, { "epoch": 2.92, "grad_norm": 0.740595281124115, "learning_rate": 1.125478146053993e-06, "loss": 1.686, "step": 87684 }, { "epoch": 2.92, "grad_norm": 0.7441162467002869, "learning_rate": 1.124573804815565e-06, "loss": 1.722, "step": 87685 }, { "epoch": 2.92, "grad_norm": 0.6987056732177734, "learning_rate": 1.1236698263678678e-06, "loss": 1.6516, "step": 87686 }, { "epoch": 2.92, "grad_norm": 0.7103855609893799, "learning_rate": 1.1227662107120005e-06, "loss": 1.6511, "step": 87687 }, { "epoch": 2.92, "grad_norm": 0.690814733505249, "learning_rate": 1.1218629578490624e-06, "loss": 1.699, "step": 87688 }, { "epoch": 2.92, "grad_norm": 0.7170409560203552, "learning_rate": 1.1209600677801189e-06, "loss": 1.6142, "step": 87689 }, { "epoch": 2.92, "grad_norm": 0.7041873931884766, "learning_rate": 1.1200575405063362e-06, "loss": 1.6445, "step": 87690 }, { "epoch": 2.92, "grad_norm": 0.7173724174499512, "learning_rate": 1.1191553760287463e-06, "loss": 1.692, "step": 87691 }, { "epoch": 2.92, "grad_norm": 0.6926884651184082, "learning_rate": 1.118253574348449e-06, "loss": 1.637, "step": 87692 }, { "epoch": 2.92, "grad_norm": 0.7161290049552917, "learning_rate": 1.117352135466576e-06, "loss": 1.6544, "step": 87693 }, { "epoch": 2.92, "grad_norm": 0.7165527939796448, "learning_rate": 1.1164510593841934e-06, "loss": 1.6568, "step": 87694 }, { "epoch": 2.92, "grad_norm": 0.7162632942199707, "learning_rate": 1.1155503461024006e-06, "loss": 1.7006, "step": 87695 }, { "epoch": 2.92, "grad_norm": 0.7249800562858582, "learning_rate": 1.1146499956222966e-06, "loss": 1.749, "step": 87696 }, { "epoch": 2.92, "grad_norm": 0.7297449111938477, "learning_rate": 1.1137500079449468e-06, "loss": 1.6752, "step": 87697 }, { "epoch": 2.92, "grad_norm": 0.7235588431358337, "learning_rate": 1.1128503830714842e-06, "loss": 1.7207, "step": 87698 }, { "epoch": 2.92, "grad_norm": 0.7108457088470459, "learning_rate": 1.111951121002974e-06, "loss": 1.6748, "step": 87699 }, { "epoch": 2.92, "grad_norm": 0.70747309923172, "learning_rate": 1.111052221740516e-06, "loss": 1.7523, "step": 87700 }, { "epoch": 2.92, "grad_norm": 0.708460807800293, "learning_rate": 1.1101536852851757e-06, "loss": 1.7189, "step": 87701 }, { "epoch": 2.92, "grad_norm": 0.7316096425056458, "learning_rate": 1.1092555116381186e-06, "loss": 1.7204, "step": 87702 }, { "epoch": 2.92, "grad_norm": 0.7081661224365234, "learning_rate": 1.1083577008003774e-06, "loss": 1.7484, "step": 87703 }, { "epoch": 2.92, "grad_norm": 0.7498350143432617, "learning_rate": 1.1074602527730182e-06, "loss": 1.5774, "step": 87704 }, { "epoch": 2.92, "grad_norm": 0.6961832642555237, "learning_rate": 1.1065631675572063e-06, "loss": 1.6649, "step": 87705 }, { "epoch": 2.92, "grad_norm": 0.7134292721748352, "learning_rate": 1.105666445153941e-06, "loss": 1.698, "step": 87706 }, { "epoch": 2.92, "grad_norm": 0.7066043019294739, "learning_rate": 1.1047700855643882e-06, "loss": 1.6205, "step": 87707 }, { "epoch": 2.92, "grad_norm": 0.7337446808815002, "learning_rate": 1.1038740887895803e-06, "loss": 1.6018, "step": 87708 }, { "epoch": 2.92, "grad_norm": 0.7177882790565491, "learning_rate": 1.1029784548306497e-06, "loss": 1.7223, "step": 87709 }, { "epoch": 2.92, "grad_norm": 0.712699294090271, "learning_rate": 1.1020831836886622e-06, "loss": 1.6696, "step": 87710 }, { "epoch": 2.92, "grad_norm": 0.7352083921432495, "learning_rate": 1.101188275364684e-06, "loss": 1.6653, "step": 87711 }, { "epoch": 2.92, "grad_norm": 0.6844063401222229, "learning_rate": 1.100293729859847e-06, "loss": 1.6463, "step": 87712 }, { "epoch": 2.92, "grad_norm": 0.7182022333145142, "learning_rate": 1.099399547175217e-06, "loss": 1.7237, "step": 87713 }, { "epoch": 2.92, "grad_norm": 0.7194019556045532, "learning_rate": 1.0985057273118602e-06, "loss": 1.6985, "step": 87714 }, { "epoch": 2.92, "grad_norm": 0.7037332057952881, "learning_rate": 1.0976122702708755e-06, "loss": 1.738, "step": 87715 }, { "epoch": 2.92, "grad_norm": 0.7046526670455933, "learning_rate": 1.0967191760533621e-06, "loss": 1.7392, "step": 87716 }, { "epoch": 2.92, "grad_norm": 0.727409303188324, "learning_rate": 1.0958264446603526e-06, "loss": 1.6734, "step": 87717 }, { "epoch": 2.92, "grad_norm": 0.7068496942520142, "learning_rate": 1.0949340760930125e-06, "loss": 1.728, "step": 87718 }, { "epoch": 2.92, "grad_norm": 0.7038159370422363, "learning_rate": 1.094042070352341e-06, "loss": 1.5529, "step": 87719 }, { "epoch": 2.92, "grad_norm": 0.7469079494476318, "learning_rate": 1.0931504274395043e-06, "loss": 1.6958, "step": 87720 }, { "epoch": 2.92, "grad_norm": 0.6888067722320557, "learning_rate": 1.0922591473555008e-06, "loss": 1.7034, "step": 87721 }, { "epoch": 2.92, "grad_norm": 0.7016533613204956, "learning_rate": 1.0913682301014637e-06, "loss": 1.659, "step": 87722 }, { "epoch": 2.92, "grad_norm": 0.70451819896698, "learning_rate": 1.0904776756784916e-06, "loss": 1.6332, "step": 87723 }, { "epoch": 2.92, "grad_norm": 0.7023131251335144, "learning_rate": 1.0895874840876173e-06, "loss": 1.6004, "step": 87724 }, { "epoch": 2.92, "grad_norm": 0.7154170870780945, "learning_rate": 1.0886976553299398e-06, "loss": 1.7523, "step": 87725 }, { "epoch": 2.92, "grad_norm": 0.7442548274993896, "learning_rate": 1.0878081894065582e-06, "loss": 1.6862, "step": 87726 }, { "epoch": 2.92, "grad_norm": 0.7080310583114624, "learning_rate": 1.086919086318505e-06, "loss": 1.7023, "step": 87727 }, { "epoch": 2.92, "grad_norm": 0.7249414324760437, "learning_rate": 1.0860303460669129e-06, "loss": 1.6795, "step": 87728 }, { "epoch": 2.92, "grad_norm": 0.687739908695221, "learning_rate": 1.085141968652814e-06, "loss": 1.6212, "step": 87729 }, { "epoch": 2.92, "grad_norm": 0.6940589547157288, "learning_rate": 1.0842539540773409e-06, "loss": 1.7149, "step": 87730 }, { "epoch": 2.92, "grad_norm": 0.7235862612724304, "learning_rate": 1.083366302341493e-06, "loss": 1.6664, "step": 87731 }, { "epoch": 2.92, "grad_norm": 0.7085829973220825, "learning_rate": 1.0824790134464357e-06, "loss": 1.6181, "step": 87732 }, { "epoch": 2.92, "grad_norm": 0.7287658452987671, "learning_rate": 1.0815920873932015e-06, "loss": 1.5911, "step": 87733 }, { "epoch": 2.92, "grad_norm": 0.7189622521400452, "learning_rate": 1.0807055241828566e-06, "loss": 1.6391, "step": 87734 }, { "epoch": 2.92, "grad_norm": 0.6957589387893677, "learning_rate": 1.0798193238164999e-06, "loss": 1.5915, "step": 87735 }, { "epoch": 2.92, "grad_norm": 0.7119829058647156, "learning_rate": 1.078933486295197e-06, "loss": 1.6358, "step": 87736 }, { "epoch": 2.92, "grad_norm": 0.7214164733886719, "learning_rate": 1.078048011620014e-06, "loss": 1.671, "step": 87737 }, { "epoch": 2.92, "grad_norm": 0.7204959392547607, "learning_rate": 1.0771628997920502e-06, "loss": 1.5873, "step": 87738 }, { "epoch": 2.92, "grad_norm": 0.7143762707710266, "learning_rate": 1.0762781508123708e-06, "loss": 1.7133, "step": 87739 }, { "epoch": 2.92, "grad_norm": 0.7106159925460815, "learning_rate": 1.075393764682042e-06, "loss": 1.6229, "step": 87740 }, { "epoch": 2.92, "grad_norm": 0.7089928388595581, "learning_rate": 1.0745097414021297e-06, "loss": 1.697, "step": 87741 }, { "epoch": 2.92, "grad_norm": 0.7717981934547424, "learning_rate": 1.073626080973733e-06, "loss": 1.7152, "step": 87742 }, { "epoch": 2.92, "grad_norm": 0.7122656106948853, "learning_rate": 1.0727427833979174e-06, "loss": 1.7024, "step": 87743 }, { "epoch": 2.92, "grad_norm": 0.7049041986465454, "learning_rate": 1.0718598486757158e-06, "loss": 1.6825, "step": 87744 }, { "epoch": 2.92, "grad_norm": 0.7282825112342834, "learning_rate": 1.0709772768082604e-06, "loss": 1.6314, "step": 87745 }, { "epoch": 2.92, "grad_norm": 0.690805971622467, "learning_rate": 1.0700950677965503e-06, "loss": 1.5873, "step": 87746 }, { "epoch": 2.92, "grad_norm": 0.687137246131897, "learning_rate": 1.0692132216417514e-06, "loss": 1.6393, "step": 87747 }, { "epoch": 2.92, "grad_norm": 0.7130340933799744, "learning_rate": 1.068331738344863e-06, "loss": 1.7263, "step": 87748 }, { "epoch": 2.92, "grad_norm": 0.7210566997528076, "learning_rate": 1.0674506179069508e-06, "loss": 1.7045, "step": 87749 }, { "epoch": 2.92, "grad_norm": 0.7173851728439331, "learning_rate": 1.0665698603291473e-06, "loss": 1.6495, "step": 87750 }, { "epoch": 2.92, "grad_norm": 0.7289031147956848, "learning_rate": 1.0656894656124514e-06, "loss": 1.641, "step": 87751 }, { "epoch": 2.92, "grad_norm": 0.7199503779411316, "learning_rate": 1.064809433757996e-06, "loss": 1.677, "step": 87752 }, { "epoch": 2.92, "grad_norm": 0.7181756496429443, "learning_rate": 1.06392976476678e-06, "loss": 1.6496, "step": 87753 }, { "epoch": 2.92, "grad_norm": 0.7436856031417847, "learning_rate": 1.0630504586399358e-06, "loss": 1.6569, "step": 87754 }, { "epoch": 2.92, "grad_norm": 0.7089174389839172, "learning_rate": 1.0621715153784627e-06, "loss": 1.6557, "step": 87755 }, { "epoch": 2.92, "grad_norm": 0.7276403307914734, "learning_rate": 1.061292934983493e-06, "loss": 1.7158, "step": 87756 }, { "epoch": 2.92, "grad_norm": 0.73245769739151, "learning_rate": 1.060414717456093e-06, "loss": 1.6912, "step": 87757 }, { "epoch": 2.92, "grad_norm": 0.7284479737281799, "learning_rate": 1.0595368627972612e-06, "loss": 1.7161, "step": 87758 }, { "epoch": 2.92, "grad_norm": 0.705696165561676, "learning_rate": 1.0586593710081303e-06, "loss": 1.6635, "step": 87759 }, { "epoch": 2.92, "grad_norm": 0.7021349668502808, "learning_rate": 1.057782242089733e-06, "loss": 1.5995, "step": 87760 }, { "epoch": 2.92, "grad_norm": 0.7034788131713867, "learning_rate": 1.0569054760431351e-06, "loss": 1.6828, "step": 87761 }, { "epoch": 2.92, "grad_norm": 0.7199275493621826, "learning_rate": 1.0560290728694022e-06, "loss": 1.6947, "step": 87762 }, { "epoch": 2.92, "grad_norm": 0.6951501369476318, "learning_rate": 1.0551530325696e-06, "loss": 1.6756, "step": 87763 }, { "epoch": 2.92, "grad_norm": 0.7069395780563354, "learning_rate": 1.054277355144828e-06, "loss": 1.7045, "step": 87764 }, { "epoch": 2.92, "grad_norm": 0.6908290386199951, "learning_rate": 1.0534020405960852e-06, "loss": 1.6184, "step": 87765 }, { "epoch": 2.92, "grad_norm": 0.7056846022605896, "learning_rate": 1.0525270889244707e-06, "loss": 1.657, "step": 87766 }, { "epoch": 2.92, "grad_norm": 0.6987403631210327, "learning_rate": 1.0516525001310505e-06, "loss": 1.6989, "step": 87767 }, { "epoch": 2.92, "grad_norm": 0.7221487164497375, "learning_rate": 1.0507782742168569e-06, "loss": 1.7247, "step": 87768 }, { "epoch": 2.92, "grad_norm": 0.7137084603309631, "learning_rate": 1.049904411182989e-06, "loss": 1.6652, "step": 87769 }, { "epoch": 2.92, "grad_norm": 0.7460306286811829, "learning_rate": 1.0490309110304795e-06, "loss": 1.6332, "step": 87770 }, { "epoch": 2.92, "grad_norm": 0.7265251874923706, "learning_rate": 1.0481577737603942e-06, "loss": 1.6666, "step": 87771 }, { "epoch": 2.92, "grad_norm": 0.7260429263114929, "learning_rate": 1.0472849993737986e-06, "loss": 1.6658, "step": 87772 }, { "epoch": 2.92, "grad_norm": 0.6883369088172913, "learning_rate": 1.0464125878717588e-06, "loss": 1.6457, "step": 87773 }, { "epoch": 2.92, "grad_norm": 0.7382622957229614, "learning_rate": 1.0455405392553074e-06, "loss": 1.6787, "step": 87774 }, { "epoch": 2.92, "grad_norm": 0.7528507709503174, "learning_rate": 1.0446688535255432e-06, "loss": 1.614, "step": 87775 }, { "epoch": 2.92, "grad_norm": 0.7318259477615356, "learning_rate": 1.0437975306834988e-06, "loss": 1.6857, "step": 87776 }, { "epoch": 2.92, "grad_norm": 0.7405751347541809, "learning_rate": 1.042926570730207e-06, "loss": 1.6758, "step": 87777 }, { "epoch": 2.92, "grad_norm": 0.7107816338539124, "learning_rate": 1.0420559736668e-06, "loss": 1.6698, "step": 87778 }, { "epoch": 2.92, "grad_norm": 0.7229297161102295, "learning_rate": 1.0411857394942436e-06, "loss": 1.6358, "step": 87779 }, { "epoch": 2.92, "grad_norm": 0.7148313522338867, "learning_rate": 1.0403158682136704e-06, "loss": 1.675, "step": 87780 }, { "epoch": 2.92, "grad_norm": 0.6895236372947693, "learning_rate": 1.0394463598260793e-06, "loss": 1.662, "step": 87781 }, { "epoch": 2.92, "grad_norm": 0.6994098424911499, "learning_rate": 1.03857721433257e-06, "loss": 1.6757, "step": 87782 }, { "epoch": 2.92, "grad_norm": 0.7219440340995789, "learning_rate": 1.0377084317341743e-06, "loss": 1.714, "step": 87783 }, { "epoch": 2.92, "grad_norm": 0.7133350968360901, "learning_rate": 1.0368400120319586e-06, "loss": 1.6969, "step": 87784 }, { "epoch": 2.92, "grad_norm": 0.7132937908172607, "learning_rate": 1.035971955226922e-06, "loss": 1.6075, "step": 87785 }, { "epoch": 2.92, "grad_norm": 0.7076192498207092, "learning_rate": 1.0351042613201966e-06, "loss": 1.6735, "step": 87786 }, { "epoch": 2.92, "grad_norm": 0.6980190873146057, "learning_rate": 1.0342369303128483e-06, "loss": 1.645, "step": 87787 }, { "epoch": 2.92, "grad_norm": 0.7207100987434387, "learning_rate": 1.03336996220581e-06, "loss": 1.7074, "step": 87788 }, { "epoch": 2.92, "grad_norm": 0.7221444249153137, "learning_rate": 1.0325033570002472e-06, "loss": 1.6202, "step": 87789 }, { "epoch": 2.92, "grad_norm": 0.7211833596229553, "learning_rate": 1.0316371146971926e-06, "loss": 1.653, "step": 87790 }, { "epoch": 2.92, "grad_norm": 0.7127280831336975, "learning_rate": 1.030771235297645e-06, "loss": 1.6918, "step": 87791 }, { "epoch": 2.92, "grad_norm": 0.7218490839004517, "learning_rate": 1.0299057188027038e-06, "loss": 1.7041, "step": 87792 }, { "epoch": 2.92, "grad_norm": 0.7098951935768127, "learning_rate": 1.0290405652134016e-06, "loss": 1.7232, "step": 87793 }, { "epoch": 2.92, "grad_norm": 0.7303228974342346, "learning_rate": 1.028175774530804e-06, "loss": 1.6627, "step": 87794 }, { "epoch": 2.92, "grad_norm": 0.7117412090301514, "learning_rate": 1.0273113467559434e-06, "loss": 1.6572, "step": 87795 }, { "epoch": 2.92, "grad_norm": 0.7059006690979004, "learning_rate": 1.0264472818898862e-06, "loss": 1.7019, "step": 87796 }, { "epoch": 2.92, "grad_norm": 0.7169507145881653, "learning_rate": 1.0255835799336643e-06, "loss": 1.6863, "step": 87797 }, { "epoch": 2.92, "grad_norm": 0.7121005058288574, "learning_rate": 1.0247202408883104e-06, "loss": 1.7839, "step": 87798 }, { "epoch": 2.92, "grad_norm": 0.7317458391189575, "learning_rate": 1.0238572647549237e-06, "loss": 1.6702, "step": 87799 }, { "epoch": 2.92, "grad_norm": 0.6877133250236511, "learning_rate": 1.0229946515345034e-06, "loss": 1.6561, "step": 87800 }, { "epoch": 2.92, "grad_norm": 0.7006348371505737, "learning_rate": 1.0221324012281151e-06, "loss": 1.6719, "step": 87801 }, { "epoch": 2.92, "grad_norm": 0.7126889228820801, "learning_rate": 1.0212705138368249e-06, "loss": 1.6519, "step": 87802 }, { "epoch": 2.92, "grad_norm": 0.7223337292671204, "learning_rate": 1.020408989361632e-06, "loss": 1.7624, "step": 87803 }, { "epoch": 2.92, "grad_norm": 0.725985050201416, "learning_rate": 1.0195478278036351e-06, "loss": 1.6494, "step": 87804 }, { "epoch": 2.92, "grad_norm": 0.7343516945838928, "learning_rate": 1.018687029163834e-06, "loss": 1.7438, "step": 87805 }, { "epoch": 2.92, "grad_norm": 0.7143105864524841, "learning_rate": 1.0178265934432939e-06, "loss": 1.6941, "step": 87806 }, { "epoch": 2.92, "grad_norm": 0.7119623422622681, "learning_rate": 1.016966520643081e-06, "loss": 1.7399, "step": 87807 }, { "epoch": 2.92, "grad_norm": 0.7226859331130981, "learning_rate": 1.0161068107642279e-06, "loss": 1.6431, "step": 87808 }, { "epoch": 2.92, "grad_norm": 0.6831344962120056, "learning_rate": 1.0152474638077335e-06, "loss": 1.6515, "step": 87809 }, { "epoch": 2.92, "grad_norm": 0.7053737044334412, "learning_rate": 1.014388479774697e-06, "loss": 1.6259, "step": 87810 }, { "epoch": 2.92, "grad_norm": 0.7246862649917603, "learning_rate": 1.0135298586661177e-06, "loss": 1.7028, "step": 87811 }, { "epoch": 2.92, "grad_norm": 0.6939854621887207, "learning_rate": 1.0126716004830948e-06, "loss": 1.6466, "step": 87812 }, { "epoch": 2.92, "grad_norm": 0.6898483037948608, "learning_rate": 1.011813705226594e-06, "loss": 1.6642, "step": 87813 }, { "epoch": 2.92, "grad_norm": 0.7044515013694763, "learning_rate": 1.0109561728977145e-06, "loss": 1.638, "step": 87814 }, { "epoch": 2.92, "grad_norm": 0.716227114200592, "learning_rate": 1.0100990034974887e-06, "loss": 1.6588, "step": 87815 }, { "epoch": 2.92, "grad_norm": 0.7190520167350769, "learning_rate": 1.0092421970269493e-06, "loss": 1.6626, "step": 87816 }, { "epoch": 2.92, "grad_norm": 0.7112449407577515, "learning_rate": 1.0083857534871287e-06, "loss": 1.6714, "step": 87817 }, { "epoch": 2.92, "grad_norm": 2.1741788387298584, "learning_rate": 1.0075296728790594e-06, "loss": 1.7001, "step": 87818 }, { "epoch": 2.92, "grad_norm": 0.6967266798019409, "learning_rate": 1.0066739552038072e-06, "loss": 1.7392, "step": 87819 }, { "epoch": 2.92, "grad_norm": 0.7602484822273254, "learning_rate": 1.0058186004624047e-06, "loss": 1.6958, "step": 87820 }, { "epoch": 2.92, "grad_norm": 0.7293437719345093, "learning_rate": 1.0049636086558843e-06, "loss": 1.6615, "step": 87821 }, { "epoch": 2.92, "grad_norm": 0.7223900556564331, "learning_rate": 1.0041089797852786e-06, "loss": 1.7431, "step": 87822 }, { "epoch": 2.92, "grad_norm": 0.6974925398826599, "learning_rate": 1.00325471385162e-06, "loss": 1.5949, "step": 87823 }, { "epoch": 2.92, "grad_norm": 0.7150141000747681, "learning_rate": 1.0024008108559745e-06, "loss": 1.6604, "step": 87824 }, { "epoch": 2.92, "grad_norm": 0.7211406826972961, "learning_rate": 1.001547270799341e-06, "loss": 1.6605, "step": 87825 }, { "epoch": 2.92, "grad_norm": 0.7007771134376526, "learning_rate": 1.0006940936827857e-06, "loss": 1.6375, "step": 87826 }, { "epoch": 2.92, "grad_norm": 0.6951805353164673, "learning_rate": 9.998412795073074e-07, "loss": 1.6377, "step": 87827 }, { "epoch": 2.92, "grad_norm": 0.7094458341598511, "learning_rate": 9.989888282740054e-07, "loss": 1.6828, "step": 87828 }, { "epoch": 2.92, "grad_norm": 0.6966691017150879, "learning_rate": 9.981367399838457e-07, "loss": 1.6255, "step": 87829 }, { "epoch": 2.92, "grad_norm": 0.7414534687995911, "learning_rate": 9.972850146379274e-07, "loss": 1.6634, "step": 87830 }, { "epoch": 2.92, "grad_norm": 0.7275567054748535, "learning_rate": 9.964336522372162e-07, "loss": 1.7337, "step": 87831 }, { "epoch": 2.92, "grad_norm": 0.7435397505760193, "learning_rate": 9.95582652782778e-07, "loss": 1.6833, "step": 87832 }, { "epoch": 2.92, "grad_norm": 0.7043883800506592, "learning_rate": 9.947320162756456e-07, "loss": 1.6424, "step": 87833 }, { "epoch": 2.92, "grad_norm": 0.7383511662483215, "learning_rate": 9.938817427168844e-07, "loss": 1.732, "step": 87834 }, { "epoch": 2.92, "grad_norm": 0.6934077739715576, "learning_rate": 9.930318321074604e-07, "loss": 1.6778, "step": 87835 }, { "epoch": 2.92, "grad_norm": 0.6939266920089722, "learning_rate": 9.921822844484395e-07, "loss": 1.6422, "step": 87836 }, { "epoch": 2.92, "grad_norm": 0.6988297700881958, "learning_rate": 9.913330997408542e-07, "loss": 1.6148, "step": 87837 }, { "epoch": 2.92, "grad_norm": 0.7434981465339661, "learning_rate": 9.904842779857368e-07, "loss": 1.6963, "step": 87838 }, { "epoch": 2.92, "grad_norm": 0.712238609790802, "learning_rate": 9.896358191841202e-07, "loss": 1.5559, "step": 87839 }, { "epoch": 2.92, "grad_norm": 0.739876925945282, "learning_rate": 9.887877233370367e-07, "loss": 1.7338, "step": 87840 }, { "epoch": 2.92, "grad_norm": 0.7158666849136353, "learning_rate": 9.879399904455187e-07, "loss": 1.705, "step": 87841 }, { "epoch": 2.92, "grad_norm": 1.3153468370437622, "learning_rate": 9.870926205105656e-07, "loss": 1.6968, "step": 87842 }, { "epoch": 2.92, "grad_norm": 0.6858247518539429, "learning_rate": 9.862456135332097e-07, "loss": 1.6423, "step": 87843 }, { "epoch": 2.92, "grad_norm": 0.7252835035324097, "learning_rate": 9.85398969514517e-07, "loss": 1.642, "step": 87844 }, { "epoch": 2.92, "grad_norm": 0.683087944984436, "learning_rate": 9.845526884554866e-07, "loss": 1.6502, "step": 87845 }, { "epoch": 2.92, "grad_norm": 0.7333763241767883, "learning_rate": 9.837067703571178e-07, "loss": 1.6867, "step": 87846 }, { "epoch": 2.92, "grad_norm": 0.705869197845459, "learning_rate": 9.828612152205095e-07, "loss": 1.7073, "step": 87847 }, { "epoch": 2.92, "grad_norm": 0.7187111377716064, "learning_rate": 9.820160230466612e-07, "loss": 1.6931, "step": 87848 }, { "epoch": 2.92, "grad_norm": 0.7142063975334167, "learning_rate": 9.81171193836572e-07, "loss": 1.7613, "step": 87849 }, { "epoch": 2.92, "grad_norm": 0.7205972671508789, "learning_rate": 9.803267275912741e-07, "loss": 1.745, "step": 87850 }, { "epoch": 2.92, "grad_norm": 0.7424718737602234, "learning_rate": 9.794826243118004e-07, "loss": 1.6214, "step": 87851 }, { "epoch": 2.92, "grad_norm": 0.7439335584640503, "learning_rate": 9.786388839991833e-07, "loss": 1.6826, "step": 87852 }, { "epoch": 2.92, "grad_norm": 0.7254867553710938, "learning_rate": 9.777955066544219e-07, "loss": 1.6254, "step": 87853 }, { "epoch": 2.92, "grad_norm": 0.7255082726478577, "learning_rate": 9.76952492278582e-07, "loss": 1.6961, "step": 87854 }, { "epoch": 2.92, "grad_norm": 0.7185009717941284, "learning_rate": 9.76109840872663e-07, "loss": 1.6149, "step": 87855 }, { "epoch": 2.92, "grad_norm": 0.7146256566047668, "learning_rate": 9.75267552437664e-07, "loss": 1.703, "step": 87856 }, { "epoch": 2.92, "grad_norm": 0.7142822742462158, "learning_rate": 9.744256269746509e-07, "loss": 1.6519, "step": 87857 }, { "epoch": 2.92, "grad_norm": 0.7270548343658447, "learning_rate": 9.735840644846226e-07, "loss": 1.7087, "step": 87858 }, { "epoch": 2.92, "grad_norm": 0.7285364270210266, "learning_rate": 9.72742864968612e-07, "loss": 1.703, "step": 87859 }, { "epoch": 2.92, "grad_norm": 0.7425850629806519, "learning_rate": 9.71902028427618e-07, "loss": 1.7123, "step": 87860 }, { "epoch": 2.92, "grad_norm": 0.7063404321670532, "learning_rate": 9.710615548626732e-07, "loss": 1.6396, "step": 87861 }, { "epoch": 2.92, "grad_norm": 0.687725841999054, "learning_rate": 9.702214442748102e-07, "loss": 1.6578, "step": 87862 }, { "epoch": 2.92, "grad_norm": 0.729333221912384, "learning_rate": 9.69381696665028e-07, "loss": 1.6931, "step": 87863 }, { "epoch": 2.92, "grad_norm": 0.6933491230010986, "learning_rate": 9.685423120343593e-07, "loss": 1.5738, "step": 87864 }, { "epoch": 2.92, "grad_norm": 0.7142621874809265, "learning_rate": 9.677032903838367e-07, "loss": 1.6854, "step": 87865 }, { "epoch": 2.92, "grad_norm": 0.7089407444000244, "learning_rate": 9.668646317144258e-07, "loss": 1.7301, "step": 87866 }, { "epoch": 2.92, "grad_norm": 0.7235726118087769, "learning_rate": 9.660263360272257e-07, "loss": 1.6572, "step": 87867 }, { "epoch": 2.92, "grad_norm": 0.7228441834449768, "learning_rate": 9.651884033231694e-07, "loss": 1.6594, "step": 87868 }, { "epoch": 2.92, "grad_norm": 0.6972167491912842, "learning_rate": 9.643508336033224e-07, "loss": 1.6051, "step": 87869 }, { "epoch": 2.92, "grad_norm": 0.7216610908508301, "learning_rate": 9.63513626868717e-07, "loss": 1.6852, "step": 87870 }, { "epoch": 2.92, "grad_norm": 0.721820056438446, "learning_rate": 9.626767831203196e-07, "loss": 1.68, "step": 87871 }, { "epoch": 2.92, "grad_norm": 0.7224811315536499, "learning_rate": 9.618403023591959e-07, "loss": 1.6933, "step": 87872 }, { "epoch": 2.92, "grad_norm": 0.7428323030471802, "learning_rate": 9.610041845863115e-07, "loss": 1.6632, "step": 87873 }, { "epoch": 2.92, "grad_norm": 0.7272889614105225, "learning_rate": 9.601684298027323e-07, "loss": 1.6867, "step": 87874 }, { "epoch": 2.92, "grad_norm": 0.7113829255104065, "learning_rate": 9.593330380094244e-07, "loss": 1.6504, "step": 87875 }, { "epoch": 2.92, "grad_norm": 0.7015852332115173, "learning_rate": 9.5849800920742e-07, "loss": 1.6699, "step": 87876 }, { "epoch": 2.92, "grad_norm": 0.7431821227073669, "learning_rate": 9.576633433977521e-07, "loss": 1.68, "step": 87877 }, { "epoch": 2.92, "grad_norm": 0.6974363327026367, "learning_rate": 9.568290405814195e-07, "loss": 1.6406, "step": 87878 }, { "epoch": 2.92, "grad_norm": 0.7179713249206543, "learning_rate": 9.559951007594214e-07, "loss": 1.7075, "step": 87879 }, { "epoch": 2.92, "grad_norm": 0.7385579347610474, "learning_rate": 9.551615239328237e-07, "loss": 1.6545, "step": 87880 }, { "epoch": 2.92, "grad_norm": 0.7022047638893127, "learning_rate": 9.543283101025589e-07, "loss": 1.6954, "step": 87881 }, { "epoch": 2.92, "grad_norm": 0.7094034552574158, "learning_rate": 9.534954592696597e-07, "loss": 1.6973, "step": 87882 }, { "epoch": 2.92, "grad_norm": 0.6819038391113281, "learning_rate": 9.526629714351919e-07, "loss": 1.6831, "step": 87883 }, { "epoch": 2.92, "grad_norm": 0.7116353511810303, "learning_rate": 9.518308466001212e-07, "loss": 1.6872, "step": 87884 }, { "epoch": 2.92, "grad_norm": 0.7321003675460815, "learning_rate": 9.50999084765447e-07, "loss": 1.6935, "step": 87885 }, { "epoch": 2.92, "grad_norm": 0.7098479866981506, "learning_rate": 9.501676859322349e-07, "loss": 1.6794, "step": 87886 }, { "epoch": 2.92, "grad_norm": 0.71567302942276, "learning_rate": 9.493366501014177e-07, "loss": 1.7019, "step": 87887 }, { "epoch": 2.92, "grad_norm": 0.7112722396850586, "learning_rate": 9.485059772740611e-07, "loss": 1.6442, "step": 87888 }, { "epoch": 2.92, "grad_norm": 0.7204645872116089, "learning_rate": 9.476756674511643e-07, "loss": 1.6416, "step": 87889 }, { "epoch": 2.92, "grad_norm": 0.7046114802360535, "learning_rate": 9.468457206337266e-07, "loss": 1.6833, "step": 87890 }, { "epoch": 2.92, "grad_norm": 0.7213255167007446, "learning_rate": 9.460161368227137e-07, "loss": 1.6692, "step": 87891 }, { "epoch": 2.92, "grad_norm": 0.7098421454429626, "learning_rate": 9.451869160191916e-07, "loss": 1.6406, "step": 87892 }, { "epoch": 2.92, "grad_norm": 0.7242003679275513, "learning_rate": 9.443580582241927e-07, "loss": 1.7063, "step": 87893 }, { "epoch": 2.92, "grad_norm": 0.7249899506568909, "learning_rate": 9.435295634386497e-07, "loss": 1.7042, "step": 87894 }, { "epoch": 2.92, "grad_norm": 0.7476027011871338, "learning_rate": 9.427014316635617e-07, "loss": 1.7516, "step": 87895 }, { "epoch": 2.92, "grad_norm": 0.715523898601532, "learning_rate": 9.418736628999945e-07, "loss": 1.6276, "step": 87896 }, { "epoch": 2.92, "grad_norm": 0.7176732420921326, "learning_rate": 9.410462571489808e-07, "loss": 1.7482, "step": 87897 }, { "epoch": 2.92, "grad_norm": 0.701745867729187, "learning_rate": 9.402192144114195e-07, "loss": 1.7289, "step": 87898 }, { "epoch": 2.92, "grad_norm": 0.7347975969314575, "learning_rate": 9.393925346883435e-07, "loss": 1.7708, "step": 87899 }, { "epoch": 2.92, "grad_norm": 0.7139703631401062, "learning_rate": 9.385662179808185e-07, "loss": 1.6901, "step": 87900 }, { "epoch": 2.92, "grad_norm": 0.7199437618255615, "learning_rate": 9.377402642898102e-07, "loss": 1.6471, "step": 87901 }, { "epoch": 2.92, "grad_norm": 0.7280640006065369, "learning_rate": 9.36914673616318e-07, "loss": 1.7239, "step": 87902 }, { "epoch": 2.92, "grad_norm": 0.7028438448905945, "learning_rate": 9.360894459613411e-07, "loss": 1.63, "step": 87903 }, { "epoch": 2.92, "grad_norm": 0.7159480452537537, "learning_rate": 9.352645813259118e-07, "loss": 1.7572, "step": 87904 }, { "epoch": 2.92, "grad_norm": 0.7100269198417664, "learning_rate": 9.34440079710963e-07, "loss": 1.7903, "step": 87905 }, { "epoch": 2.92, "grad_norm": 0.7025331258773804, "learning_rate": 9.336159411175603e-07, "loss": 1.6348, "step": 87906 }, { "epoch": 2.92, "grad_norm": 0.6794587969779968, "learning_rate": 9.327921655466697e-07, "loss": 1.6587, "step": 87907 }, { "epoch": 2.92, "grad_norm": 0.7201390862464905, "learning_rate": 9.319687529993236e-07, "loss": 1.6801, "step": 87908 }, { "epoch": 2.92, "grad_norm": 0.712080180644989, "learning_rate": 9.31145703476488e-07, "loss": 1.68, "step": 87909 }, { "epoch": 2.92, "grad_norm": 0.7223151326179504, "learning_rate": 9.303230169791954e-07, "loss": 1.7174, "step": 87910 }, { "epoch": 2.92, "grad_norm": 0.6955550909042358, "learning_rate": 9.295006935084115e-07, "loss": 1.6713, "step": 87911 }, { "epoch": 2.92, "grad_norm": 0.7250065207481384, "learning_rate": 9.286787330651357e-07, "loss": 1.7058, "step": 87912 }, { "epoch": 2.92, "grad_norm": 0.7193049788475037, "learning_rate": 9.278571356504006e-07, "loss": 1.6778, "step": 87913 }, { "epoch": 2.92, "grad_norm": 0.7337837219238281, "learning_rate": 9.270359012651718e-07, "loss": 1.6916, "step": 87914 }, { "epoch": 2.92, "grad_norm": 0.7229757905006409, "learning_rate": 9.262150299104487e-07, "loss": 1.61, "step": 87915 }, { "epoch": 2.92, "grad_norm": 0.7627267241477966, "learning_rate": 9.253945215872305e-07, "loss": 1.5953, "step": 87916 }, { "epoch": 2.93, "grad_norm": 0.7301108837127686, "learning_rate": 9.245743762965163e-07, "loss": 1.6876, "step": 87917 }, { "epoch": 2.93, "grad_norm": 0.7139788269996643, "learning_rate": 9.237545940393387e-07, "loss": 1.6632, "step": 87918 }, { "epoch": 2.93, "grad_norm": 0.7078360319137573, "learning_rate": 9.229351748165969e-07, "loss": 1.6964, "step": 87919 }, { "epoch": 2.93, "grad_norm": 0.730495810508728, "learning_rate": 9.221161186293902e-07, "loss": 1.6931, "step": 87920 }, { "epoch": 2.93, "grad_norm": 0.7088075876235962, "learning_rate": 9.21297425478651e-07, "loss": 1.7099, "step": 87921 }, { "epoch": 2.93, "grad_norm": 0.7478273510932922, "learning_rate": 9.204790953653785e-07, "loss": 1.6765, "step": 87922 }, { "epoch": 2.93, "grad_norm": 0.7158570885658264, "learning_rate": 9.196611282906052e-07, "loss": 1.6441, "step": 87923 }, { "epoch": 2.93, "grad_norm": 0.71531081199646, "learning_rate": 9.188435242552639e-07, "loss": 1.6195, "step": 87924 }, { "epoch": 2.93, "grad_norm": 0.7167599201202393, "learning_rate": 9.180262832604202e-07, "loss": 1.7027, "step": 87925 }, { "epoch": 2.93, "grad_norm": 0.7254042625427246, "learning_rate": 9.172094053069734e-07, "loss": 1.682, "step": 87926 }, { "epoch": 2.93, "grad_norm": 0.72218918800354, "learning_rate": 9.163928903960227e-07, "loss": 1.6191, "step": 87927 }, { "epoch": 2.93, "grad_norm": 0.7011627554893494, "learning_rate": 9.155767385284673e-07, "loss": 1.7127, "step": 87928 }, { "epoch": 2.93, "grad_norm": 0.7148575782775879, "learning_rate": 9.147609497053398e-07, "loss": 1.7321, "step": 87929 }, { "epoch": 2.93, "grad_norm": 0.722088098526001, "learning_rate": 9.139455239276061e-07, "loss": 1.7387, "step": 87930 }, { "epoch": 2.93, "grad_norm": 0.6779134273529053, "learning_rate": 9.131304611962987e-07, "loss": 1.6228, "step": 87931 }, { "epoch": 2.93, "grad_norm": 0.7334203124046326, "learning_rate": 9.123157615123833e-07, "loss": 1.5952, "step": 87932 }, { "epoch": 2.93, "grad_norm": 0.718636155128479, "learning_rate": 9.11501424876826e-07, "loss": 1.7056, "step": 87933 }, { "epoch": 2.93, "grad_norm": 0.7183997631072998, "learning_rate": 9.106874512906593e-07, "loss": 1.7162, "step": 87934 }, { "epoch": 2.93, "grad_norm": 0.7334965467453003, "learning_rate": 9.098738407548489e-07, "loss": 1.7282, "step": 87935 }, { "epoch": 2.93, "grad_norm": 0.7093665599822998, "learning_rate": 9.090605932703943e-07, "loss": 1.7169, "step": 87936 }, { "epoch": 2.93, "grad_norm": 0.7093299627304077, "learning_rate": 9.082477088382278e-07, "loss": 1.6267, "step": 87937 }, { "epoch": 2.93, "grad_norm": 0.7355244159698486, "learning_rate": 9.074351874594155e-07, "loss": 1.7024, "step": 87938 }, { "epoch": 2.93, "grad_norm": 0.7055225968360901, "learning_rate": 9.066230291348897e-07, "loss": 1.6298, "step": 87939 }, { "epoch": 2.93, "grad_norm": 0.7050502300262451, "learning_rate": 9.058112338656831e-07, "loss": 1.7105, "step": 87940 }, { "epoch": 2.93, "grad_norm": 0.7128443121910095, "learning_rate": 9.049998016527282e-07, "loss": 1.7339, "step": 87941 }, { "epoch": 2.93, "grad_norm": 0.7023982405662537, "learning_rate": 9.041887324970243e-07, "loss": 1.7019, "step": 87942 }, { "epoch": 2.93, "grad_norm": 0.7077203392982483, "learning_rate": 9.033780263995704e-07, "loss": 1.687, "step": 87943 }, { "epoch": 2.93, "grad_norm": 0.6830316781997681, "learning_rate": 9.02567683361366e-07, "loss": 1.7119, "step": 87944 }, { "epoch": 2.93, "grad_norm": 0.7203587293624878, "learning_rate": 9.017577033833434e-07, "loss": 1.6758, "step": 87945 }, { "epoch": 2.93, "grad_norm": 0.760162353515625, "learning_rate": 9.009480864665686e-07, "loss": 1.7148, "step": 87946 }, { "epoch": 2.93, "grad_norm": 0.7359699010848999, "learning_rate": 9.001388326119408e-07, "loss": 1.758, "step": 87947 }, { "epoch": 2.93, "grad_norm": 0.7051188349723816, "learning_rate": 8.993299418204592e-07, "loss": 1.6239, "step": 87948 }, { "epoch": 2.93, "grad_norm": 0.7295319437980652, "learning_rate": 8.985214140931229e-07, "loss": 1.6739, "step": 87949 }, { "epoch": 2.93, "grad_norm": 0.6846954226493835, "learning_rate": 8.977132494309314e-07, "loss": 1.6038, "step": 87950 }, { "epoch": 2.93, "grad_norm": 0.7174320220947266, "learning_rate": 8.969054478348503e-07, "loss": 1.6887, "step": 87951 }, { "epoch": 2.93, "grad_norm": 0.7109305262565613, "learning_rate": 8.960980093058123e-07, "loss": 1.6079, "step": 87952 }, { "epoch": 2.93, "grad_norm": 0.7131219506263733, "learning_rate": 8.952909338448832e-07, "loss": 1.614, "step": 87953 }, { "epoch": 2.93, "grad_norm": 0.7028744220733643, "learning_rate": 8.944842214529957e-07, "loss": 1.7796, "step": 87954 }, { "epoch": 2.93, "grad_norm": 0.72461998462677, "learning_rate": 8.936778721311155e-07, "loss": 1.6921, "step": 87955 }, { "epoch": 2.93, "grad_norm": 0.7195224165916443, "learning_rate": 8.92871885880242e-07, "loss": 1.6724, "step": 87956 }, { "epoch": 2.93, "grad_norm": 0.7160794138908386, "learning_rate": 8.920662627013742e-07, "loss": 1.8123, "step": 87957 }, { "epoch": 2.93, "grad_norm": 0.6975870728492737, "learning_rate": 8.912610025954447e-07, "loss": 1.7085, "step": 87958 }, { "epoch": 2.93, "grad_norm": 0.7348197102546692, "learning_rate": 8.904561055634529e-07, "loss": 1.6228, "step": 87959 }, { "epoch": 2.93, "grad_norm": 0.7193140387535095, "learning_rate": 8.896515716063645e-07, "loss": 1.6641, "step": 87960 }, { "epoch": 2.93, "grad_norm": 0.7246749997138977, "learning_rate": 8.888474007251789e-07, "loss": 1.673, "step": 87961 }, { "epoch": 2.93, "grad_norm": 0.7314433455467224, "learning_rate": 8.880435929208618e-07, "loss": 1.7311, "step": 87962 }, { "epoch": 2.93, "grad_norm": 0.723111629486084, "learning_rate": 8.872401481943791e-07, "loss": 1.6818, "step": 87963 }, { "epoch": 2.93, "grad_norm": 0.7326736450195312, "learning_rate": 8.864370665467302e-07, "loss": 1.7558, "step": 87964 }, { "epoch": 2.93, "grad_norm": 0.7186259031295776, "learning_rate": 8.856343479788807e-07, "loss": 1.6532, "step": 87965 }, { "epoch": 2.93, "grad_norm": 0.7273601293563843, "learning_rate": 8.848319924917968e-07, "loss": 1.6485, "step": 87966 }, { "epoch": 2.93, "grad_norm": 0.7165175080299377, "learning_rate": 8.840300000864442e-07, "loss": 1.6818, "step": 87967 }, { "epoch": 2.93, "grad_norm": 0.731117844581604, "learning_rate": 8.832283707638221e-07, "loss": 1.7111, "step": 87968 }, { "epoch": 2.93, "grad_norm": 0.7214232683181763, "learning_rate": 8.824271045248632e-07, "loss": 1.7077, "step": 87969 }, { "epoch": 2.93, "grad_norm": 0.7165326476097107, "learning_rate": 8.816262013705666e-07, "loss": 1.689, "step": 87970 }, { "epoch": 2.93, "grad_norm": 0.718244731426239, "learning_rate": 8.808256613018982e-07, "loss": 1.6992, "step": 87971 }, { "epoch": 2.93, "grad_norm": 0.7111842036247253, "learning_rate": 8.800254843198573e-07, "loss": 1.6643, "step": 87972 }, { "epoch": 2.93, "grad_norm": 0.7087041139602661, "learning_rate": 8.792256704253764e-07, "loss": 1.6648, "step": 87973 }, { "epoch": 2.93, "grad_norm": 0.7035544514656067, "learning_rate": 8.784262196194547e-07, "loss": 1.711, "step": 87974 }, { "epoch": 2.93, "grad_norm": 0.7017228603363037, "learning_rate": 8.776271319030581e-07, "loss": 1.7183, "step": 87975 }, { "epoch": 2.93, "grad_norm": 0.7310735583305359, "learning_rate": 8.768284072771193e-07, "loss": 1.7019, "step": 87976 }, { "epoch": 2.93, "grad_norm": 0.72295081615448, "learning_rate": 8.760300457426373e-07, "loss": 1.6299, "step": 87977 }, { "epoch": 2.93, "grad_norm": 0.7114651799201965, "learning_rate": 8.752320473006114e-07, "loss": 1.6873, "step": 87978 }, { "epoch": 2.93, "grad_norm": 0.6967694163322449, "learning_rate": 8.744344119519741e-07, "loss": 1.6874, "step": 87979 }, { "epoch": 2.93, "grad_norm": 0.7238132357597351, "learning_rate": 8.736371396976915e-07, "loss": 1.5963, "step": 87980 }, { "epoch": 2.93, "grad_norm": 0.7339874505996704, "learning_rate": 8.728402305387294e-07, "loss": 1.6733, "step": 87981 }, { "epoch": 2.93, "grad_norm": 0.6939124464988708, "learning_rate": 8.720436844760536e-07, "loss": 1.6367, "step": 87982 }, { "epoch": 2.93, "grad_norm": 0.7190373539924622, "learning_rate": 8.712475015106635e-07, "loss": 1.7411, "step": 87983 }, { "epoch": 2.93, "grad_norm": 0.7091006636619568, "learning_rate": 8.704516816435247e-07, "loss": 1.5932, "step": 87984 }, { "epoch": 2.93, "grad_norm": 0.7435954809188843, "learning_rate": 8.696562248755368e-07, "loss": 1.6684, "step": 87985 }, { "epoch": 2.93, "grad_norm": 0.7280799150466919, "learning_rate": 8.688611312077321e-07, "loss": 1.7327, "step": 87986 }, { "epoch": 2.93, "grad_norm": 0.7163841724395752, "learning_rate": 8.680664006410764e-07, "loss": 1.6996, "step": 87987 }, { "epoch": 2.93, "grad_norm": 0.702594518661499, "learning_rate": 8.672720331764693e-07, "loss": 1.6044, "step": 87988 }, { "epoch": 2.93, "grad_norm": 0.7148596048355103, "learning_rate": 8.664780288149431e-07, "loss": 1.6496, "step": 87989 }, { "epoch": 2.93, "grad_norm": 0.6980943083763123, "learning_rate": 8.656843875574638e-07, "loss": 1.6486, "step": 87990 }, { "epoch": 2.93, "grad_norm": 0.7200788855552673, "learning_rate": 8.648911094049305e-07, "loss": 1.7125, "step": 87991 }, { "epoch": 2.93, "grad_norm": 0.7024518847465515, "learning_rate": 8.640981943583425e-07, "loss": 1.6933, "step": 87992 }, { "epoch": 2.93, "grad_norm": 0.7438412308692932, "learning_rate": 8.633056424186991e-07, "loss": 1.7522, "step": 87993 }, { "epoch": 2.93, "grad_norm": 0.7279141545295715, "learning_rate": 8.625134535868994e-07, "loss": 1.721, "step": 87994 }, { "epoch": 2.93, "grad_norm": 0.7375730276107788, "learning_rate": 8.617216278639427e-07, "loss": 1.6696, "step": 87995 }, { "epoch": 2.93, "grad_norm": 0.6908912062644958, "learning_rate": 8.609301652507616e-07, "loss": 1.7071, "step": 87996 }, { "epoch": 2.93, "grad_norm": 0.7379933595657349, "learning_rate": 8.601390657483554e-07, "loss": 1.7047, "step": 87997 }, { "epoch": 2.93, "grad_norm": 0.7284236550331116, "learning_rate": 8.593483293576564e-07, "loss": 1.6309, "step": 87998 }, { "epoch": 2.93, "grad_norm": 0.7345636487007141, "learning_rate": 8.585579560796307e-07, "loss": 1.6773, "step": 87999 }, { "epoch": 2.93, "grad_norm": 0.716506838798523, "learning_rate": 8.577679459152442e-07, "loss": 1.6658, "step": 88000 }, { "epoch": 2.93, "grad_norm": 0.7083752155303955, "learning_rate": 8.569782988654628e-07, "loss": 1.6853, "step": 88001 }, { "epoch": 2.93, "grad_norm": 0.7144430875778198, "learning_rate": 8.561890149311857e-07, "loss": 1.6175, "step": 88002 }, { "epoch": 2.93, "grad_norm": 0.7008103728294373, "learning_rate": 8.554000941134786e-07, "loss": 1.6187, "step": 88003 }, { "epoch": 2.93, "grad_norm": 0.6910714507102966, "learning_rate": 8.546115364132078e-07, "loss": 1.5974, "step": 88004 }, { "epoch": 2.93, "grad_norm": 0.6989102959632874, "learning_rate": 8.538233418313722e-07, "loss": 1.6481, "step": 88005 }, { "epoch": 2.93, "grad_norm": 0.734573245048523, "learning_rate": 8.530355103689046e-07, "loss": 1.7172, "step": 88006 }, { "epoch": 2.93, "grad_norm": 0.714847981929779, "learning_rate": 8.522480420267708e-07, "loss": 1.6803, "step": 88007 }, { "epoch": 2.93, "grad_norm": 0.6938487887382507, "learning_rate": 8.514609368059366e-07, "loss": 1.7254, "step": 88008 }, { "epoch": 2.93, "grad_norm": 0.7179138660430908, "learning_rate": 8.506741947073681e-07, "loss": 1.6616, "step": 88009 }, { "epoch": 2.93, "grad_norm": 0.7044998407363892, "learning_rate": 8.498878157319977e-07, "loss": 1.6631, "step": 88010 }, { "epoch": 2.93, "grad_norm": 0.7143754959106445, "learning_rate": 8.491017998807581e-07, "loss": 1.7661, "step": 88011 }, { "epoch": 2.93, "grad_norm": 0.7082863450050354, "learning_rate": 8.483161471546484e-07, "loss": 1.7391, "step": 88012 }, { "epoch": 2.93, "grad_norm": 0.7260385751724243, "learning_rate": 8.475308575546013e-07, "loss": 1.7565, "step": 88013 }, { "epoch": 2.93, "grad_norm": 0.6944968700408936, "learning_rate": 8.46745931081616e-07, "loss": 1.6171, "step": 88014 }, { "epoch": 2.93, "grad_norm": 0.9773566126823425, "learning_rate": 8.459613677365584e-07, "loss": 1.6368, "step": 88015 }, { "epoch": 2.93, "grad_norm": 0.7286249399185181, "learning_rate": 8.451771675204278e-07, "loss": 1.6252, "step": 88016 }, { "epoch": 2.93, "grad_norm": 0.7365728616714478, "learning_rate": 8.443933304341899e-07, "loss": 1.6471, "step": 88017 }, { "epoch": 2.93, "grad_norm": 0.6960657835006714, "learning_rate": 8.436098564787775e-07, "loss": 1.6499, "step": 88018 }, { "epoch": 2.93, "grad_norm": 0.7372989654541016, "learning_rate": 8.42826745655123e-07, "loss": 1.6817, "step": 88019 }, { "epoch": 2.93, "grad_norm": 0.7177969217300415, "learning_rate": 8.420439979641924e-07, "loss": 1.6598, "step": 88020 }, { "epoch": 2.93, "grad_norm": 0.7271260023117065, "learning_rate": 8.412616134069849e-07, "loss": 1.7468, "step": 88021 }, { "epoch": 2.93, "grad_norm": 0.7250335216522217, "learning_rate": 8.404795919843999e-07, "loss": 1.7134, "step": 88022 }, { "epoch": 2.93, "grad_norm": 0.6854361891746521, "learning_rate": 8.396979336973697e-07, "loss": 1.6342, "step": 88023 }, { "epoch": 2.93, "grad_norm": 0.7273924350738525, "learning_rate": 8.389166385468604e-07, "loss": 1.6775, "step": 88024 }, { "epoch": 2.93, "grad_norm": 0.7279046773910522, "learning_rate": 8.381357065338379e-07, "loss": 1.6107, "step": 88025 }, { "epoch": 2.93, "grad_norm": 0.7296062707901001, "learning_rate": 8.373551376592346e-07, "loss": 1.6942, "step": 88026 }, { "epoch": 2.93, "grad_norm": 0.6997887492179871, "learning_rate": 8.365749319240167e-07, "loss": 1.6975, "step": 88027 }, { "epoch": 2.93, "grad_norm": 0.7128846049308777, "learning_rate": 8.357950893291165e-07, "loss": 1.6141, "step": 88028 }, { "epoch": 2.93, "grad_norm": 0.7036845684051514, "learning_rate": 8.350156098754668e-07, "loss": 1.661, "step": 88029 }, { "epoch": 2.93, "grad_norm": 0.7195878624916077, "learning_rate": 8.342364935640333e-07, "loss": 1.6746, "step": 88030 }, { "epoch": 2.93, "grad_norm": 0.7013348340988159, "learning_rate": 8.33457740395782e-07, "loss": 1.6555, "step": 88031 }, { "epoch": 2.93, "grad_norm": 0.7188277244567871, "learning_rate": 8.326793503716122e-07, "loss": 1.6449, "step": 88032 }, { "epoch": 2.93, "grad_norm": 0.7410628199577332, "learning_rate": 8.319013234924898e-07, "loss": 1.6818, "step": 88033 }, { "epoch": 2.93, "grad_norm": 0.6985955238342285, "learning_rate": 8.311236597593474e-07, "loss": 1.6806, "step": 88034 }, { "epoch": 2.93, "grad_norm": 0.7374977469444275, "learning_rate": 8.303463591731507e-07, "loss": 1.6999, "step": 88035 }, { "epoch": 2.93, "grad_norm": 0.7194402813911438, "learning_rate": 8.295694217348325e-07, "loss": 1.6384, "step": 88036 }, { "epoch": 2.93, "grad_norm": 0.7349668741226196, "learning_rate": 8.287928474453587e-07, "loss": 1.6704, "step": 88037 }, { "epoch": 2.93, "grad_norm": 0.7324483394622803, "learning_rate": 8.280166363056284e-07, "loss": 1.7407, "step": 88038 }, { "epoch": 2.93, "grad_norm": 0.7147864103317261, "learning_rate": 8.272407883166077e-07, "loss": 1.6289, "step": 88039 }, { "epoch": 2.93, "grad_norm": 0.7100442051887512, "learning_rate": 8.26465303479229e-07, "loss": 1.5899, "step": 88040 }, { "epoch": 2.93, "grad_norm": 0.7440606951713562, "learning_rate": 8.256901817944251e-07, "loss": 1.6331, "step": 88041 }, { "epoch": 2.93, "grad_norm": 0.7289294004440308, "learning_rate": 8.249154232631949e-07, "loss": 1.7103, "step": 88042 }, { "epoch": 2.93, "grad_norm": 0.7347691655158997, "learning_rate": 8.241410278864047e-07, "loss": 1.6759, "step": 88043 }, { "epoch": 2.93, "grad_norm": 0.7255192995071411, "learning_rate": 8.233669956650202e-07, "loss": 1.6391, "step": 88044 }, { "epoch": 2.93, "grad_norm": 0.6898139119148254, "learning_rate": 8.225933266000073e-07, "loss": 1.6232, "step": 88045 }, { "epoch": 2.93, "grad_norm": 0.7012319564819336, "learning_rate": 8.218200206922654e-07, "loss": 1.7366, "step": 88046 }, { "epoch": 2.93, "grad_norm": 0.6894725561141968, "learning_rate": 8.210470779427602e-07, "loss": 1.6507, "step": 88047 }, { "epoch": 2.93, "grad_norm": 0.7061280012130737, "learning_rate": 8.202744983524245e-07, "loss": 1.658, "step": 88048 }, { "epoch": 2.93, "grad_norm": 0.7136741280555725, "learning_rate": 8.195022819222241e-07, "loss": 1.6945, "step": 88049 }, { "epoch": 2.93, "grad_norm": 0.7245976328849792, "learning_rate": 8.18730428653025e-07, "loss": 1.7065, "step": 88050 }, { "epoch": 2.93, "grad_norm": 0.741837739944458, "learning_rate": 8.179589385457929e-07, "loss": 1.6682, "step": 88051 }, { "epoch": 2.93, "grad_norm": 0.7346532344818115, "learning_rate": 8.17187811601494e-07, "loss": 1.6213, "step": 88052 }, { "epoch": 2.93, "grad_norm": 0.7049350142478943, "learning_rate": 8.164170478210607e-07, "loss": 1.6393, "step": 88053 }, { "epoch": 2.93, "grad_norm": 0.7184505462646484, "learning_rate": 8.156466472054257e-07, "loss": 1.6693, "step": 88054 }, { "epoch": 2.93, "grad_norm": 0.7204436659812927, "learning_rate": 8.148766097554882e-07, "loss": 1.6318, "step": 88055 }, { "epoch": 2.93, "grad_norm": 0.7281306385993958, "learning_rate": 8.14106935472214e-07, "loss": 1.7142, "step": 88056 }, { "epoch": 2.93, "grad_norm": 0.7188349366188049, "learning_rate": 8.13337624356536e-07, "loss": 1.7157, "step": 88057 }, { "epoch": 2.93, "grad_norm": 0.7160754203796387, "learning_rate": 8.125686764093531e-07, "loss": 1.6587, "step": 88058 }, { "epoch": 2.93, "grad_norm": 0.6977787613868713, "learning_rate": 8.118000916316647e-07, "loss": 1.6469, "step": 88059 }, { "epoch": 2.93, "grad_norm": 0.738057553768158, "learning_rate": 8.110318700243368e-07, "loss": 1.7113, "step": 88060 }, { "epoch": 2.93, "grad_norm": 2.0910282135009766, "learning_rate": 8.102640115883686e-07, "loss": 1.6878, "step": 88061 }, { "epoch": 2.93, "grad_norm": 0.7540250420570374, "learning_rate": 8.094965163246259e-07, "loss": 1.6973, "step": 88062 }, { "epoch": 2.93, "grad_norm": 0.7000032663345337, "learning_rate": 8.087293842340748e-07, "loss": 1.6419, "step": 88063 }, { "epoch": 2.93, "grad_norm": 0.7263560891151428, "learning_rate": 8.07962615317681e-07, "loss": 1.7261, "step": 88064 }, { "epoch": 2.93, "grad_norm": 0.6951110363006592, "learning_rate": 8.071962095762774e-07, "loss": 1.7004, "step": 88065 }, { "epoch": 2.93, "grad_norm": 0.719042181968689, "learning_rate": 8.064301670108963e-07, "loss": 1.7459, "step": 88066 }, { "epoch": 2.93, "grad_norm": 0.7225151062011719, "learning_rate": 8.056644876224038e-07, "loss": 1.6292, "step": 88067 }, { "epoch": 2.93, "grad_norm": 2.7205047607421875, "learning_rate": 8.048991714117659e-07, "loss": 1.6712, "step": 88068 }, { "epoch": 2.93, "grad_norm": 0.7356605529785156, "learning_rate": 8.041342183798816e-07, "loss": 1.7095, "step": 88069 }, { "epoch": 2.93, "grad_norm": 0.7169854640960693, "learning_rate": 8.033696285277169e-07, "loss": 1.6923, "step": 88070 }, { "epoch": 2.93, "grad_norm": 0.7291061878204346, "learning_rate": 8.026054018561379e-07, "loss": 1.6979, "step": 88071 }, { "epoch": 2.93, "grad_norm": 0.711797833442688, "learning_rate": 8.018415383661437e-07, "loss": 1.7392, "step": 88072 }, { "epoch": 2.93, "grad_norm": 0.7415231466293335, "learning_rate": 8.010780380586002e-07, "loss": 1.6904, "step": 88073 }, { "epoch": 2.93, "grad_norm": 0.7158424258232117, "learning_rate": 8.003149009345067e-07, "loss": 1.5918, "step": 88074 }, { "epoch": 2.93, "grad_norm": 0.723768949508667, "learning_rate": 7.995521269947291e-07, "loss": 1.6566, "step": 88075 }, { "epoch": 2.93, "grad_norm": 0.7161976099014282, "learning_rate": 7.987897162402002e-07, "loss": 1.7082, "step": 88076 }, { "epoch": 2.93, "grad_norm": 0.7166377902030945, "learning_rate": 7.980276686718523e-07, "loss": 1.6714, "step": 88077 }, { "epoch": 2.93, "grad_norm": 0.7224079966545105, "learning_rate": 7.972659842906182e-07, "loss": 1.7426, "step": 88078 }, { "epoch": 2.93, "grad_norm": 0.6916365623474121, "learning_rate": 7.96504663097397e-07, "loss": 1.6427, "step": 88079 }, { "epoch": 2.93, "grad_norm": 0.6979309916496277, "learning_rate": 7.957437050931548e-07, "loss": 1.6372, "step": 88080 }, { "epoch": 2.93, "grad_norm": 0.7064327597618103, "learning_rate": 7.949831102787907e-07, "loss": 1.6592, "step": 88081 }, { "epoch": 2.93, "grad_norm": 0.7233479022979736, "learning_rate": 7.942228786552374e-07, "loss": 1.6436, "step": 88082 }, { "epoch": 2.93, "grad_norm": 0.7068910598754883, "learning_rate": 7.934630102234274e-07, "loss": 1.6111, "step": 88083 }, { "epoch": 2.93, "grad_norm": 0.7046634554862976, "learning_rate": 7.927035049842601e-07, "loss": 1.7275, "step": 88084 }, { "epoch": 2.93, "grad_norm": 0.7160484194755554, "learning_rate": 7.919443629386346e-07, "loss": 1.7474, "step": 88085 }, { "epoch": 2.93, "grad_norm": 0.723568320274353, "learning_rate": 7.91185584087517e-07, "loss": 1.6912, "step": 88086 }, { "epoch": 2.93, "grad_norm": 0.7177993059158325, "learning_rate": 7.904271684318397e-07, "loss": 1.6512, "step": 88087 }, { "epoch": 2.93, "grad_norm": 0.7329396605491638, "learning_rate": 7.896691159724688e-07, "loss": 1.7022, "step": 88088 }, { "epoch": 2.93, "grad_norm": 0.7459511756896973, "learning_rate": 7.889114267103369e-07, "loss": 1.6709, "step": 88089 }, { "epoch": 2.93, "grad_norm": 0.7351070046424866, "learning_rate": 7.881541006464098e-07, "loss": 1.5924, "step": 88090 }, { "epoch": 2.93, "grad_norm": 0.7227987051010132, "learning_rate": 7.873971377815869e-07, "loss": 1.7368, "step": 88091 }, { "epoch": 2.93, "grad_norm": 0.7348740696907043, "learning_rate": 7.866405381167673e-07, "loss": 1.5999, "step": 88092 }, { "epoch": 2.93, "grad_norm": 0.6821458339691162, "learning_rate": 7.858843016528504e-07, "loss": 1.6331, "step": 88093 }, { "epoch": 2.93, "grad_norm": 0.7071097493171692, "learning_rate": 7.851284283908022e-07, "loss": 1.6564, "step": 88094 }, { "epoch": 2.93, "grad_norm": 0.7148565053939819, "learning_rate": 7.843729183315217e-07, "loss": 1.6918, "step": 88095 }, { "epoch": 2.93, "grad_norm": 0.7161763310432434, "learning_rate": 7.836177714759418e-07, "loss": 1.7151, "step": 88096 }, { "epoch": 2.93, "grad_norm": 0.7176972031593323, "learning_rate": 7.828629878249282e-07, "loss": 1.5785, "step": 88097 }, { "epoch": 2.93, "grad_norm": 0.704360842704773, "learning_rate": 7.82108567379447e-07, "loss": 1.7011, "step": 88098 }, { "epoch": 2.93, "grad_norm": 0.7404303550720215, "learning_rate": 7.813545101403973e-07, "loss": 1.6309, "step": 88099 }, { "epoch": 2.93, "grad_norm": 0.7147646546363831, "learning_rate": 7.806008161087118e-07, "loss": 1.6255, "step": 88100 }, { "epoch": 2.93, "grad_norm": 0.7194124460220337, "learning_rate": 7.798474852852898e-07, "loss": 1.6805, "step": 88101 }, { "epoch": 2.93, "grad_norm": 0.7099538445472717, "learning_rate": 7.790945176709973e-07, "loss": 1.6986, "step": 88102 }, { "epoch": 2.93, "grad_norm": 0.7055350542068481, "learning_rate": 7.783419132668666e-07, "loss": 1.7254, "step": 88103 }, { "epoch": 2.93, "grad_norm": 0.701755702495575, "learning_rate": 7.775896720736974e-07, "loss": 1.7073, "step": 88104 }, { "epoch": 2.93, "grad_norm": 0.7232934236526489, "learning_rate": 7.768377940924552e-07, "loss": 1.6908, "step": 88105 }, { "epoch": 2.93, "grad_norm": 0.7326337695121765, "learning_rate": 7.760862793240396e-07, "loss": 1.6816, "step": 88106 }, { "epoch": 2.93, "grad_norm": 0.7034075856208801, "learning_rate": 7.753351277693498e-07, "loss": 1.7095, "step": 88107 }, { "epoch": 2.93, "grad_norm": 0.7225069403648376, "learning_rate": 7.745843394293516e-07, "loss": 1.6856, "step": 88108 }, { "epoch": 2.93, "grad_norm": 0.7202649712562561, "learning_rate": 7.738339143048778e-07, "loss": 1.6383, "step": 88109 }, { "epoch": 2.93, "grad_norm": 0.7221314311027527, "learning_rate": 7.730838523969274e-07, "loss": 1.6034, "step": 88110 }, { "epoch": 2.93, "grad_norm": 0.7197795510292053, "learning_rate": 7.723341537063333e-07, "loss": 1.6967, "step": 88111 }, { "epoch": 2.93, "grad_norm": 0.7212469577789307, "learning_rate": 7.715848182340278e-07, "loss": 1.7067, "step": 88112 }, { "epoch": 2.93, "grad_norm": 0.722762942314148, "learning_rate": 7.708358459809439e-07, "loss": 1.6045, "step": 88113 }, { "epoch": 2.93, "grad_norm": 0.695949375629425, "learning_rate": 7.700872369479804e-07, "loss": 1.649, "step": 88114 }, { "epoch": 2.93, "grad_norm": 0.7180254459381104, "learning_rate": 7.693389911360371e-07, "loss": 1.6334, "step": 88115 }, { "epoch": 2.93, "grad_norm": 0.6815071105957031, "learning_rate": 7.685911085460128e-07, "loss": 1.6517, "step": 88116 }, { "epoch": 2.93, "grad_norm": 0.7023671865463257, "learning_rate": 7.678435891788737e-07, "loss": 1.6632, "step": 88117 }, { "epoch": 2.93, "grad_norm": 0.6889193058013916, "learning_rate": 7.670964330354524e-07, "loss": 1.7203, "step": 88118 }, { "epoch": 2.93, "grad_norm": 0.7235903739929199, "learning_rate": 7.663496401166813e-07, "loss": 1.7092, "step": 88119 }, { "epoch": 2.93, "grad_norm": 0.7160719633102417, "learning_rate": 7.6560321042346e-07, "loss": 1.6435, "step": 88120 }, { "epoch": 2.93, "grad_norm": 0.7426625490188599, "learning_rate": 7.648571439567541e-07, "loss": 1.6471, "step": 88121 }, { "epoch": 2.93, "grad_norm": 0.7299069166183472, "learning_rate": 7.641114407173632e-07, "loss": 1.6831, "step": 88122 }, { "epoch": 2.93, "grad_norm": 0.7145212292671204, "learning_rate": 7.633661007062864e-07, "loss": 1.6386, "step": 88123 }, { "epoch": 2.93, "grad_norm": 0.7195875644683838, "learning_rate": 7.626211239243896e-07, "loss": 1.6662, "step": 88124 }, { "epoch": 2.93, "grad_norm": 0.711936354637146, "learning_rate": 7.618765103725721e-07, "loss": 1.7093, "step": 88125 }, { "epoch": 2.93, "grad_norm": 0.731351912021637, "learning_rate": 7.611322600517333e-07, "loss": 1.6732, "step": 88126 }, { "epoch": 2.93, "grad_norm": 0.7406325936317444, "learning_rate": 7.603883729628057e-07, "loss": 1.6894, "step": 88127 }, { "epoch": 2.93, "grad_norm": 0.7223564982414246, "learning_rate": 7.596448491066886e-07, "loss": 1.7135, "step": 88128 }, { "epoch": 2.93, "grad_norm": 0.7229708433151245, "learning_rate": 7.58901688484248e-07, "loss": 1.7185, "step": 88129 }, { "epoch": 2.93, "grad_norm": 0.7236228585243225, "learning_rate": 7.581588910964164e-07, "loss": 1.6645, "step": 88130 }, { "epoch": 2.93, "grad_norm": 0.7075331211090088, "learning_rate": 7.574164569440932e-07, "loss": 1.6489, "step": 88131 }, { "epoch": 2.93, "grad_norm": 0.7249305844306946, "learning_rate": 7.566743860281776e-07, "loss": 1.6655, "step": 88132 }, { "epoch": 2.93, "grad_norm": 0.7197398543357849, "learning_rate": 7.559326783495689e-07, "loss": 1.6876, "step": 88133 }, { "epoch": 2.93, "grad_norm": 0.6974866986274719, "learning_rate": 7.551913339091664e-07, "loss": 1.6347, "step": 88134 }, { "epoch": 2.93, "grad_norm": 0.7817209362983704, "learning_rate": 7.544503527078694e-07, "loss": 1.6808, "step": 88135 }, { "epoch": 2.93, "grad_norm": 0.7398257851600647, "learning_rate": 7.537097347465437e-07, "loss": 1.7188, "step": 88136 }, { "epoch": 2.93, "grad_norm": 0.7277480959892273, "learning_rate": 7.529694800261554e-07, "loss": 1.6602, "step": 88137 }, { "epoch": 2.93, "grad_norm": 0.7237522006034851, "learning_rate": 7.522295885475704e-07, "loss": 1.623, "step": 88138 }, { "epoch": 2.93, "grad_norm": 0.7002103924751282, "learning_rate": 7.51490060311688e-07, "loss": 1.6925, "step": 88139 }, { "epoch": 2.93, "grad_norm": 0.7107692956924438, "learning_rate": 7.507508953194075e-07, "loss": 1.6693, "step": 88140 }, { "epoch": 2.93, "grad_norm": 0.7144153118133545, "learning_rate": 7.500120935715947e-07, "loss": 1.6974, "step": 88141 }, { "epoch": 2.93, "grad_norm": 0.9918152689933777, "learning_rate": 7.492736550692157e-07, "loss": 1.701, "step": 88142 }, { "epoch": 2.93, "grad_norm": 0.724473774433136, "learning_rate": 7.485355798131031e-07, "loss": 1.6863, "step": 88143 }, { "epoch": 2.93, "grad_norm": 0.6973331570625305, "learning_rate": 7.477978678041895e-07, "loss": 1.6401, "step": 88144 }, { "epoch": 2.93, "grad_norm": 0.7217606902122498, "learning_rate": 7.470605190433409e-07, "loss": 1.6711, "step": 88145 }, { "epoch": 2.93, "grad_norm": 0.7159648537635803, "learning_rate": 7.463235335314566e-07, "loss": 1.6847, "step": 88146 }, { "epoch": 2.93, "grad_norm": 0.7295052409172058, "learning_rate": 7.45586911269469e-07, "loss": 1.695, "step": 88147 }, { "epoch": 2.93, "grad_norm": 0.7094637155532837, "learning_rate": 7.44850652258211e-07, "loss": 1.7206, "step": 88148 }, { "epoch": 2.93, "grad_norm": 0.727388322353363, "learning_rate": 7.441147564986482e-07, "loss": 1.7676, "step": 88149 }, { "epoch": 2.93, "grad_norm": 0.6980685591697693, "learning_rate": 7.433792239916136e-07, "loss": 1.6375, "step": 88150 }, { "epoch": 2.93, "grad_norm": 0.718998908996582, "learning_rate": 7.426440547380396e-07, "loss": 1.6941, "step": 88151 }, { "epoch": 2.93, "grad_norm": 0.7318689227104187, "learning_rate": 7.419092487387923e-07, "loss": 1.6954, "step": 88152 }, { "epoch": 2.93, "grad_norm": 0.7075233459472656, "learning_rate": 7.411748059947708e-07, "loss": 1.6686, "step": 88153 }, { "epoch": 2.93, "grad_norm": 0.7067462205886841, "learning_rate": 7.404407265069079e-07, "loss": 1.7198, "step": 88154 }, { "epoch": 2.93, "grad_norm": 0.7272852659225464, "learning_rate": 7.397070102760027e-07, "loss": 1.6794, "step": 88155 }, { "epoch": 2.93, "grad_norm": 0.7305079698562622, "learning_rate": 7.389736573030214e-07, "loss": 1.6252, "step": 88156 }, { "epoch": 2.93, "grad_norm": 0.7279039621353149, "learning_rate": 7.382406675888297e-07, "loss": 1.6605, "step": 88157 }, { "epoch": 2.93, "grad_norm": 0.7416858673095703, "learning_rate": 7.37508041134327e-07, "loss": 1.7027, "step": 88158 }, { "epoch": 2.93, "grad_norm": 0.7223691344261169, "learning_rate": 7.367757779403794e-07, "loss": 1.6522, "step": 88159 }, { "epoch": 2.93, "grad_norm": 0.700090229511261, "learning_rate": 7.360438780079192e-07, "loss": 1.6647, "step": 88160 }, { "epoch": 2.93, "grad_norm": 0.7092449069023132, "learning_rate": 7.353123413377793e-07, "loss": 1.5944, "step": 88161 }, { "epoch": 2.93, "grad_norm": 0.6977773904800415, "learning_rate": 7.345811679308922e-07, "loss": 1.5974, "step": 88162 }, { "epoch": 2.93, "grad_norm": 0.7097225785255432, "learning_rate": 7.338503577881238e-07, "loss": 1.6611, "step": 88163 }, { "epoch": 2.93, "grad_norm": 0.7101948261260986, "learning_rate": 7.331199109103736e-07, "loss": 1.6549, "step": 88164 }, { "epoch": 2.93, "grad_norm": 0.7245469093322754, "learning_rate": 7.323898272985073e-07, "loss": 1.616, "step": 88165 }, { "epoch": 2.93, "grad_norm": 0.6990386843681335, "learning_rate": 7.316601069534245e-07, "loss": 1.6119, "step": 88166 }, { "epoch": 2.93, "grad_norm": 0.7000585794448853, "learning_rate": 7.309307498760241e-07, "loss": 1.684, "step": 88167 }, { "epoch": 2.93, "grad_norm": 0.7217163443565369, "learning_rate": 7.302017560671725e-07, "loss": 1.7006, "step": 88168 }, { "epoch": 2.93, "grad_norm": 0.7151641249656677, "learning_rate": 7.294731255277353e-07, "loss": 1.6661, "step": 88169 }, { "epoch": 2.93, "grad_norm": 0.7259722352027893, "learning_rate": 7.287448582586452e-07, "loss": 1.7319, "step": 88170 }, { "epoch": 2.93, "grad_norm": 0.7092758417129517, "learning_rate": 7.280169542607683e-07, "loss": 1.7244, "step": 88171 }, { "epoch": 2.93, "grad_norm": 0.6963596940040588, "learning_rate": 7.272894135349705e-07, "loss": 1.6853, "step": 88172 }, { "epoch": 2.93, "grad_norm": 0.6875899434089661, "learning_rate": 7.265622360821843e-07, "loss": 1.6523, "step": 88173 }, { "epoch": 2.93, "grad_norm": 0.7244042754173279, "learning_rate": 7.258354219032092e-07, "loss": 1.7003, "step": 88174 }, { "epoch": 2.93, "grad_norm": 0.7288080453872681, "learning_rate": 7.25108970999011e-07, "loss": 1.6963, "step": 88175 }, { "epoch": 2.93, "grad_norm": 0.7066967487335205, "learning_rate": 7.24382883370389e-07, "loss": 1.6255, "step": 88176 }, { "epoch": 2.93, "grad_norm": 0.7117196917533875, "learning_rate": 7.236571590183093e-07, "loss": 1.6204, "step": 88177 }, { "epoch": 2.93, "grad_norm": 0.7079688906669617, "learning_rate": 7.229317979436045e-07, "loss": 1.6443, "step": 88178 }, { "epoch": 2.93, "grad_norm": 0.7081841826438904, "learning_rate": 7.222068001471404e-07, "loss": 1.6302, "step": 88179 }, { "epoch": 2.93, "grad_norm": 0.6866125464439392, "learning_rate": 7.214821656298498e-07, "loss": 1.5931, "step": 88180 }, { "epoch": 2.93, "grad_norm": 0.712840735912323, "learning_rate": 7.207578943925985e-07, "loss": 1.6432, "step": 88181 }, { "epoch": 2.93, "grad_norm": 0.7173147797584534, "learning_rate": 7.200339864362192e-07, "loss": 1.6346, "step": 88182 }, { "epoch": 2.93, "grad_norm": 0.7164830565452576, "learning_rate": 7.193104417616114e-07, "loss": 1.7421, "step": 88183 }, { "epoch": 2.93, "grad_norm": 0.6970800161361694, "learning_rate": 7.185872603697074e-07, "loss": 1.6979, "step": 88184 }, { "epoch": 2.93, "grad_norm": 0.7314414978027344, "learning_rate": 7.178644422613067e-07, "loss": 1.6985, "step": 88185 }, { "epoch": 2.93, "grad_norm": 0.6965166926383972, "learning_rate": 7.171419874373085e-07, "loss": 1.6783, "step": 88186 }, { "epoch": 2.93, "grad_norm": 0.711276113986969, "learning_rate": 7.164198958986455e-07, "loss": 1.7278, "step": 88187 }, { "epoch": 2.93, "grad_norm": 0.7234082221984863, "learning_rate": 7.156981676461171e-07, "loss": 1.6843, "step": 88188 }, { "epoch": 2.93, "grad_norm": 0.7036129832267761, "learning_rate": 7.149768026806557e-07, "loss": 1.6583, "step": 88189 }, { "epoch": 2.93, "grad_norm": 0.711720883846283, "learning_rate": 7.142558010030941e-07, "loss": 1.6295, "step": 88190 }, { "epoch": 2.93, "grad_norm": 0.7177839875221252, "learning_rate": 7.135351626143315e-07, "loss": 1.687, "step": 88191 }, { "epoch": 2.93, "grad_norm": 0.7101942300796509, "learning_rate": 7.128148875152673e-07, "loss": 1.6655, "step": 88192 }, { "epoch": 2.93, "grad_norm": 0.7186697125434875, "learning_rate": 7.120949757067007e-07, "loss": 1.7174, "step": 88193 }, { "epoch": 2.93, "grad_norm": 0.7308750152587891, "learning_rate": 7.113754271895977e-07, "loss": 1.6808, "step": 88194 }, { "epoch": 2.93, "grad_norm": 0.7044524550437927, "learning_rate": 7.10656241964791e-07, "loss": 1.6698, "step": 88195 }, { "epoch": 2.93, "grad_norm": 0.6960828304290771, "learning_rate": 7.099374200331131e-07, "loss": 1.6172, "step": 88196 }, { "epoch": 2.93, "grad_norm": 0.7243966460227966, "learning_rate": 7.092189613954636e-07, "loss": 1.6886, "step": 88197 }, { "epoch": 2.93, "grad_norm": 0.7130317091941833, "learning_rate": 7.085008660527746e-07, "loss": 1.6541, "step": 88198 }, { "epoch": 2.93, "grad_norm": 0.7097373008728027, "learning_rate": 7.077831340058459e-07, "loss": 1.6288, "step": 88199 }, { "epoch": 2.93, "grad_norm": 0.72503662109375, "learning_rate": 7.070657652555434e-07, "loss": 1.7189, "step": 88200 }, { "epoch": 2.93, "grad_norm": 0.7386298775672913, "learning_rate": 7.063487598027995e-07, "loss": 1.6931, "step": 88201 }, { "epoch": 2.93, "grad_norm": 0.7066513895988464, "learning_rate": 7.056321176484469e-07, "loss": 1.6352, "step": 88202 }, { "epoch": 2.93, "grad_norm": 0.7508530020713806, "learning_rate": 7.049158387933184e-07, "loss": 1.6424, "step": 88203 }, { "epoch": 2.93, "grad_norm": 0.7104189991950989, "learning_rate": 7.041999232383466e-07, "loss": 1.7112, "step": 88204 }, { "epoch": 2.93, "grad_norm": 0.7470372915267944, "learning_rate": 7.034843709843974e-07, "loss": 1.6989, "step": 88205 }, { "epoch": 2.93, "grad_norm": 0.7220064997673035, "learning_rate": 7.027691820323034e-07, "loss": 1.707, "step": 88206 }, { "epoch": 2.93, "grad_norm": 0.7360732555389404, "learning_rate": 7.020543563829306e-07, "loss": 1.7249, "step": 88207 }, { "epoch": 2.93, "grad_norm": 0.7163268327713013, "learning_rate": 7.013398940371783e-07, "loss": 1.6821, "step": 88208 }, { "epoch": 2.93, "grad_norm": 0.7044983506202698, "learning_rate": 7.006257949959126e-07, "loss": 1.6524, "step": 88209 }, { "epoch": 2.93, "grad_norm": 0.7061431407928467, "learning_rate": 6.99912059259966e-07, "loss": 1.7125, "step": 88210 }, { "epoch": 2.93, "grad_norm": 0.71886146068573, "learning_rate": 6.991986868302712e-07, "loss": 1.7367, "step": 88211 }, { "epoch": 2.93, "grad_norm": 0.7190206050872803, "learning_rate": 6.984856777075943e-07, "loss": 1.6663, "step": 88212 }, { "epoch": 2.93, "grad_norm": 0.7396865487098694, "learning_rate": 6.977730318928676e-07, "loss": 1.668, "step": 88213 }, { "epoch": 2.93, "grad_norm": 0.7121427059173584, "learning_rate": 6.970607493869906e-07, "loss": 1.641, "step": 88214 }, { "epoch": 2.93, "grad_norm": 0.7610666751861572, "learning_rate": 6.963488301907294e-07, "loss": 1.7557, "step": 88215 }, { "epoch": 2.93, "grad_norm": 0.6998034119606018, "learning_rate": 6.956372743050165e-07, "loss": 1.6617, "step": 88216 }, { "epoch": 2.93, "grad_norm": 0.7176901698112488, "learning_rate": 6.949260817306846e-07, "loss": 1.6986, "step": 88217 }, { "epoch": 2.94, "grad_norm": 0.7294079661369324, "learning_rate": 6.942152524686329e-07, "loss": 1.6563, "step": 88218 }, { "epoch": 2.94, "grad_norm": 0.7026865482330322, "learning_rate": 6.935047865196941e-07, "loss": 1.6651, "step": 88219 }, { "epoch": 2.94, "grad_norm": 0.7151399254798889, "learning_rate": 6.927946838847342e-07, "loss": 1.7242, "step": 88220 }, { "epoch": 2.94, "grad_norm": 0.7149091362953186, "learning_rate": 6.920849445646526e-07, "loss": 1.6749, "step": 88221 }, { "epoch": 2.94, "grad_norm": 0.7344176173210144, "learning_rate": 6.913755685602484e-07, "loss": 1.7506, "step": 88222 }, { "epoch": 2.94, "grad_norm": 0.7020137906074524, "learning_rate": 6.906665558724212e-07, "loss": 1.6749, "step": 88223 }, { "epoch": 2.94, "grad_norm": 0.7320052981376648, "learning_rate": 6.899579065020367e-07, "loss": 1.6478, "step": 88224 }, { "epoch": 2.94, "grad_norm": 0.69402015209198, "learning_rate": 6.892496204499276e-07, "loss": 1.6044, "step": 88225 }, { "epoch": 2.94, "grad_norm": 0.7123215794563293, "learning_rate": 6.8854169771696e-07, "loss": 1.7177, "step": 88226 }, { "epoch": 2.94, "grad_norm": 0.7187520861625671, "learning_rate": 6.878341383039998e-07, "loss": 1.667, "step": 88227 }, { "epoch": 2.94, "grad_norm": 0.7304444313049316, "learning_rate": 6.87126942211913e-07, "loss": 1.5358, "step": 88228 }, { "epoch": 2.94, "grad_norm": 0.7114192247390747, "learning_rate": 6.864201094415656e-07, "loss": 1.725, "step": 88229 }, { "epoch": 2.94, "grad_norm": 0.7152076363563538, "learning_rate": 6.857136399937902e-07, "loss": 1.6202, "step": 88230 }, { "epoch": 2.94, "grad_norm": 0.7064322233200073, "learning_rate": 6.850075338694528e-07, "loss": 1.6334, "step": 88231 }, { "epoch": 2.94, "grad_norm": 0.7248771786689758, "learning_rate": 6.843017910694193e-07, "loss": 1.6965, "step": 88232 }, { "epoch": 2.94, "grad_norm": 0.7060858607292175, "learning_rate": 6.835964115945558e-07, "loss": 1.6148, "step": 88233 }, { "epoch": 2.94, "grad_norm": 0.7135586142539978, "learning_rate": 6.828913954456949e-07, "loss": 1.6282, "step": 88234 }, { "epoch": 2.94, "grad_norm": 0.7182673811912537, "learning_rate": 6.821867426237026e-07, "loss": 1.6902, "step": 88235 }, { "epoch": 2.94, "grad_norm": 0.725652813911438, "learning_rate": 6.814824531294116e-07, "loss": 1.6984, "step": 88236 }, { "epoch": 2.94, "grad_norm": 0.6992691159248352, "learning_rate": 6.807785269637212e-07, "loss": 1.6839, "step": 88237 }, { "epoch": 2.94, "grad_norm": 0.727399468421936, "learning_rate": 6.800749641274638e-07, "loss": 1.6733, "step": 88238 }, { "epoch": 2.94, "grad_norm": 0.712303102016449, "learning_rate": 6.793717646214725e-07, "loss": 1.7032, "step": 88239 }, { "epoch": 2.94, "grad_norm": 0.7211863398551941, "learning_rate": 6.786689284466462e-07, "loss": 1.6554, "step": 88240 }, { "epoch": 2.94, "grad_norm": 0.7166098356246948, "learning_rate": 6.779664556038178e-07, "loss": 1.6457, "step": 88241 }, { "epoch": 2.94, "grad_norm": 0.7027130722999573, "learning_rate": 6.772643460937866e-07, "loss": 1.7144, "step": 88242 }, { "epoch": 2.94, "grad_norm": 0.695546567440033, "learning_rate": 6.765625999174851e-07, "loss": 1.6837, "step": 88243 }, { "epoch": 2.94, "grad_norm": 0.7015969157218933, "learning_rate": 6.75861217075746e-07, "loss": 1.6204, "step": 88244 }, { "epoch": 2.94, "grad_norm": 0.7322543263435364, "learning_rate": 6.751601975694021e-07, "loss": 1.7034, "step": 88245 }, { "epoch": 2.94, "grad_norm": 0.7126030921936035, "learning_rate": 6.74459541399286e-07, "loss": 1.6852, "step": 88246 }, { "epoch": 2.94, "grad_norm": 0.7165553569793701, "learning_rate": 6.737592485662968e-07, "loss": 1.6896, "step": 88247 }, { "epoch": 2.94, "grad_norm": 0.7006857991218567, "learning_rate": 6.730593190712674e-07, "loss": 1.6715, "step": 88248 }, { "epoch": 2.94, "grad_norm": 0.7160361409187317, "learning_rate": 6.723597529150304e-07, "loss": 1.6731, "step": 88249 }, { "epoch": 2.94, "grad_norm": 0.7156871557235718, "learning_rate": 6.716605500984185e-07, "loss": 1.627, "step": 88250 }, { "epoch": 2.94, "grad_norm": 0.7290903925895691, "learning_rate": 6.709617106223308e-07, "loss": 1.7516, "step": 88251 }, { "epoch": 2.94, "grad_norm": 0.7301285862922668, "learning_rate": 6.702632344876002e-07, "loss": 1.6221, "step": 88252 }, { "epoch": 2.94, "grad_norm": 0.7300190329551697, "learning_rate": 6.695651216950593e-07, "loss": 1.7013, "step": 88253 }, { "epoch": 2.94, "grad_norm": 0.7090561389923096, "learning_rate": 6.688673722455406e-07, "loss": 1.6384, "step": 88254 }, { "epoch": 2.94, "grad_norm": 0.7403436899185181, "learning_rate": 6.681699861399437e-07, "loss": 1.6149, "step": 88255 }, { "epoch": 2.94, "grad_norm": 0.6987327933311462, "learning_rate": 6.674729633790676e-07, "loss": 1.6491, "step": 88256 }, { "epoch": 2.94, "grad_norm": 0.7247498035430908, "learning_rate": 6.667763039637786e-07, "loss": 1.7196, "step": 88257 }, { "epoch": 2.94, "grad_norm": 0.6786371469497681, "learning_rate": 6.660800078949091e-07, "loss": 1.6478, "step": 88258 }, { "epoch": 2.94, "grad_norm": 0.7000301480293274, "learning_rate": 6.653840751733252e-07, "loss": 1.6262, "step": 88259 }, { "epoch": 2.94, "grad_norm": 0.7111653089523315, "learning_rate": 6.646885057998597e-07, "loss": 1.6812, "step": 88260 }, { "epoch": 2.94, "grad_norm": 0.7326690554618835, "learning_rate": 6.639932997753783e-07, "loss": 1.7402, "step": 88261 }, { "epoch": 2.94, "grad_norm": 0.7342681884765625, "learning_rate": 6.632984571006805e-07, "loss": 1.6951, "step": 88262 }, { "epoch": 2.94, "grad_norm": 0.7113246321678162, "learning_rate": 6.626039777766323e-07, "loss": 1.6238, "step": 88263 }, { "epoch": 2.94, "grad_norm": 0.7087495923042297, "learning_rate": 6.619098618040663e-07, "loss": 1.6528, "step": 88264 }, { "epoch": 2.94, "grad_norm": 0.7207111716270447, "learning_rate": 6.612161091838486e-07, "loss": 1.6507, "step": 88265 }, { "epoch": 2.94, "grad_norm": 0.7545249462127686, "learning_rate": 6.605227199168117e-07, "loss": 1.7183, "step": 88266 }, { "epoch": 2.94, "grad_norm": 0.7212694883346558, "learning_rate": 6.598296940037884e-07, "loss": 1.6637, "step": 88267 }, { "epoch": 2.94, "grad_norm": 0.7049521207809448, "learning_rate": 6.591370314456445e-07, "loss": 1.6726, "step": 88268 }, { "epoch": 2.94, "grad_norm": 0.730228066444397, "learning_rate": 6.584447322431796e-07, "loss": 1.7559, "step": 88269 }, { "epoch": 2.94, "grad_norm": 0.7141953110694885, "learning_rate": 6.577527963972596e-07, "loss": 1.5845, "step": 88270 }, { "epoch": 2.94, "grad_norm": 0.7150218486785889, "learning_rate": 6.570612239087169e-07, "loss": 1.693, "step": 88271 }, { "epoch": 2.94, "grad_norm": 0.7161262035369873, "learning_rate": 6.563700147784179e-07, "loss": 1.6607, "step": 88272 }, { "epoch": 2.94, "grad_norm": 0.7385642528533936, "learning_rate": 6.556791690071616e-07, "loss": 1.638, "step": 88273 }, { "epoch": 2.94, "grad_norm": 0.7054411172866821, "learning_rate": 6.549886865958143e-07, "loss": 1.6762, "step": 88274 }, { "epoch": 2.94, "grad_norm": 0.7241217494010925, "learning_rate": 6.542985675452083e-07, "loss": 1.7253, "step": 88275 }, { "epoch": 2.94, "grad_norm": 0.7291851043701172, "learning_rate": 6.536088118561766e-07, "loss": 1.6633, "step": 88276 }, { "epoch": 2.94, "grad_norm": 0.7162320613861084, "learning_rate": 6.529194195295517e-07, "loss": 1.629, "step": 88277 }, { "epoch": 2.94, "grad_norm": 0.7272858023643494, "learning_rate": 6.522303905661663e-07, "loss": 1.6131, "step": 88278 }, { "epoch": 2.94, "grad_norm": 0.7108355164527893, "learning_rate": 6.51541724966853e-07, "loss": 1.694, "step": 88279 }, { "epoch": 2.94, "grad_norm": 0.7318372130393982, "learning_rate": 6.508534227325112e-07, "loss": 1.6931, "step": 88280 }, { "epoch": 2.94, "grad_norm": 0.7268322110176086, "learning_rate": 6.501654838638736e-07, "loss": 1.6857, "step": 88281 }, { "epoch": 2.94, "grad_norm": 0.7208976745605469, "learning_rate": 6.494779083618729e-07, "loss": 1.7123, "step": 88282 }, { "epoch": 2.94, "grad_norm": 0.683295726776123, "learning_rate": 6.487906962272749e-07, "loss": 1.6559, "step": 88283 }, { "epoch": 2.94, "grad_norm": 0.7298375964164734, "learning_rate": 6.481038474609457e-07, "loss": 1.7242, "step": 88284 }, { "epoch": 2.94, "grad_norm": 0.7106852531433105, "learning_rate": 6.474173620636846e-07, "loss": 1.6658, "step": 88285 }, { "epoch": 2.94, "grad_norm": 0.7307783365249634, "learning_rate": 6.467312400363911e-07, "loss": 1.6895, "step": 88286 }, { "epoch": 2.94, "grad_norm": 0.6998794078826904, "learning_rate": 6.46045481379831e-07, "loss": 1.7025, "step": 88287 }, { "epoch": 2.94, "grad_norm": 0.7280604243278503, "learning_rate": 6.453600860948705e-07, "loss": 1.6473, "step": 88288 }, { "epoch": 2.94, "grad_norm": 0.7302277088165283, "learning_rate": 6.44675054182342e-07, "loss": 1.672, "step": 88289 }, { "epoch": 2.94, "grad_norm": 0.7178184986114502, "learning_rate": 6.439903856430784e-07, "loss": 1.692, "step": 88290 }, { "epoch": 2.94, "grad_norm": 0.7211116552352905, "learning_rate": 6.433060804778789e-07, "loss": 1.6771, "step": 88291 }, { "epoch": 2.94, "grad_norm": 0.7416261434555054, "learning_rate": 6.426221386876096e-07, "loss": 1.6891, "step": 88292 }, { "epoch": 2.94, "grad_norm": 0.7522254586219788, "learning_rate": 6.419385602730698e-07, "loss": 1.6948, "step": 88293 }, { "epoch": 2.94, "grad_norm": 0.7181442975997925, "learning_rate": 6.412553452351255e-07, "loss": 1.6749, "step": 88294 }, { "epoch": 2.94, "grad_norm": 0.718241274356842, "learning_rate": 6.405724935746093e-07, "loss": 1.6912, "step": 88295 }, { "epoch": 2.94, "grad_norm": 0.7225910425186157, "learning_rate": 6.398900052922873e-07, "loss": 1.7169, "step": 88296 }, { "epoch": 2.94, "grad_norm": 0.7102421522140503, "learning_rate": 6.392078803890588e-07, "loss": 1.6275, "step": 88297 }, { "epoch": 2.94, "grad_norm": 0.7074919939041138, "learning_rate": 6.385261188657231e-07, "loss": 1.6453, "step": 88298 }, { "epoch": 2.94, "grad_norm": 0.6993672251701355, "learning_rate": 6.37844720723113e-07, "loss": 1.6751, "step": 88299 }, { "epoch": 2.94, "grad_norm": 0.6973468065261841, "learning_rate": 6.371636859620277e-07, "loss": 1.6484, "step": 88300 }, { "epoch": 2.94, "grad_norm": 0.7271873950958252, "learning_rate": 6.364830145833332e-07, "loss": 1.6752, "step": 88301 }, { "epoch": 2.94, "grad_norm": 0.7249451279640198, "learning_rate": 6.358027065878291e-07, "loss": 1.6694, "step": 88302 }, { "epoch": 2.94, "grad_norm": 0.6991939544677734, "learning_rate": 6.351227619763477e-07, "loss": 1.6742, "step": 88303 }, { "epoch": 2.94, "grad_norm": 0.7089908719062805, "learning_rate": 6.344431807497219e-07, "loss": 1.6813, "step": 88304 }, { "epoch": 2.94, "grad_norm": 0.8259212970733643, "learning_rate": 6.337639629087843e-07, "loss": 1.6592, "step": 88305 }, { "epoch": 2.94, "grad_norm": 0.72282475233078, "learning_rate": 6.330851084543343e-07, "loss": 1.6481, "step": 88306 }, { "epoch": 2.94, "grad_norm": 0.7184907793998718, "learning_rate": 6.324066173872044e-07, "loss": 1.6847, "step": 88307 }, { "epoch": 2.94, "grad_norm": 0.718818187713623, "learning_rate": 6.317284897082608e-07, "loss": 1.6211, "step": 88308 }, { "epoch": 2.94, "grad_norm": 0.7302594780921936, "learning_rate": 6.310507254182695e-07, "loss": 1.7302, "step": 88309 }, { "epoch": 2.94, "grad_norm": 0.7217432260513306, "learning_rate": 6.303733245180631e-07, "loss": 1.6265, "step": 88310 }, { "epoch": 2.94, "grad_norm": 0.6996895670890808, "learning_rate": 6.296962870084743e-07, "loss": 1.668, "step": 88311 }, { "epoch": 2.94, "grad_norm": 0.7210146188735962, "learning_rate": 6.290196128903357e-07, "loss": 1.7142, "step": 88312 }, { "epoch": 2.94, "grad_norm": 0.7159505486488342, "learning_rate": 6.283433021644136e-07, "loss": 1.6163, "step": 88313 }, { "epoch": 2.94, "grad_norm": 0.727729856967926, "learning_rate": 6.276673548316069e-07, "loss": 1.6405, "step": 88314 }, { "epoch": 2.94, "grad_norm": 0.7131351232528687, "learning_rate": 6.269917708927152e-07, "loss": 1.6784, "step": 88315 }, { "epoch": 2.94, "grad_norm": 0.7160390019416809, "learning_rate": 6.263165503485379e-07, "loss": 1.6908, "step": 88316 }, { "epoch": 2.94, "grad_norm": 0.7160800099372864, "learning_rate": 6.256416931998742e-07, "loss": 1.7741, "step": 88317 }, { "epoch": 2.94, "grad_norm": 0.7020857930183411, "learning_rate": 6.249671994475902e-07, "loss": 1.7012, "step": 88318 }, { "epoch": 2.94, "grad_norm": 0.7194355130195618, "learning_rate": 6.242930690924853e-07, "loss": 1.633, "step": 88319 }, { "epoch": 2.94, "grad_norm": 0.6877264380455017, "learning_rate": 6.23619302135392e-07, "loss": 1.5988, "step": 88320 }, { "epoch": 2.94, "grad_norm": 0.6997120976448059, "learning_rate": 6.229458985771096e-07, "loss": 1.6923, "step": 88321 }, { "epoch": 2.94, "grad_norm": 0.7357913255691528, "learning_rate": 6.222728584184377e-07, "loss": 1.6984, "step": 88322 }, { "epoch": 2.94, "grad_norm": 0.7313761115074158, "learning_rate": 6.21600181660209e-07, "loss": 1.718, "step": 88323 }, { "epoch": 2.94, "grad_norm": 0.7131913304328918, "learning_rate": 6.209278683032892e-07, "loss": 1.6637, "step": 88324 }, { "epoch": 2.94, "grad_norm": 0.6936728954315186, "learning_rate": 6.202559183484112e-07, "loss": 1.6499, "step": 88325 }, { "epoch": 2.94, "grad_norm": 0.7175161242485046, "learning_rate": 6.19584331796441e-07, "loss": 1.657, "step": 88326 }, { "epoch": 2.94, "grad_norm": 0.7427799701690674, "learning_rate": 6.189131086482113e-07, "loss": 1.7292, "step": 88327 }, { "epoch": 2.94, "grad_norm": 0.7194551825523376, "learning_rate": 6.18242248904488e-07, "loss": 1.6688, "step": 88328 }, { "epoch": 2.94, "grad_norm": 0.7234857082366943, "learning_rate": 6.175717525661039e-07, "loss": 1.6943, "step": 88329 }, { "epoch": 2.94, "grad_norm": 0.7127530574798584, "learning_rate": 6.169016196338583e-07, "loss": 1.7202, "step": 88330 }, { "epoch": 2.94, "grad_norm": 0.7280381321907043, "learning_rate": 6.162318501086172e-07, "loss": 1.5922, "step": 88331 }, { "epoch": 2.94, "grad_norm": 0.7227334380149841, "learning_rate": 6.155624439911466e-07, "loss": 1.6937, "step": 88332 }, { "epoch": 2.94, "grad_norm": 0.7263554334640503, "learning_rate": 6.148934012822793e-07, "loss": 1.6506, "step": 88333 }, { "epoch": 2.94, "grad_norm": 0.7052320837974548, "learning_rate": 6.142247219827812e-07, "loss": 1.7132, "step": 88334 }, { "epoch": 2.94, "grad_norm": 0.7256937026977539, "learning_rate": 6.135564060935516e-07, "loss": 1.7402, "step": 88335 }, { "epoch": 2.94, "grad_norm": 0.7165642976760864, "learning_rate": 6.128884536153233e-07, "loss": 1.6621, "step": 88336 }, { "epoch": 2.94, "grad_norm": 0.7369531989097595, "learning_rate": 6.12220864548929e-07, "loss": 1.6316, "step": 88337 }, { "epoch": 2.94, "grad_norm": 0.7555084824562073, "learning_rate": 6.115536388952013e-07, "loss": 1.7335, "step": 88338 }, { "epoch": 2.94, "grad_norm": 0.7379702925682068, "learning_rate": 6.108867766549397e-07, "loss": 1.6256, "step": 88339 }, { "epoch": 2.94, "grad_norm": 0.7107239365577698, "learning_rate": 6.102202778289434e-07, "loss": 1.6225, "step": 88340 }, { "epoch": 2.94, "grad_norm": 0.7067577242851257, "learning_rate": 6.095541424180117e-07, "loss": 1.6587, "step": 88341 }, { "epoch": 2.94, "grad_norm": 0.6878073215484619, "learning_rate": 6.088883704229774e-07, "loss": 1.6224, "step": 88342 }, { "epoch": 2.94, "grad_norm": 0.6988743543624878, "learning_rate": 6.082229618446399e-07, "loss": 1.6688, "step": 88343 }, { "epoch": 2.94, "grad_norm": 0.7099615931510925, "learning_rate": 6.075579166837985e-07, "loss": 1.7057, "step": 88344 }, { "epoch": 2.94, "grad_norm": 0.7379029989242554, "learning_rate": 6.068932349412859e-07, "loss": 1.6566, "step": 88345 }, { "epoch": 2.94, "grad_norm": 0.7298815846443176, "learning_rate": 6.062289166179013e-07, "loss": 1.7428, "step": 88346 }, { "epoch": 2.94, "grad_norm": 0.725622832775116, "learning_rate": 6.055649617144109e-07, "loss": 1.6547, "step": 88347 }, { "epoch": 2.94, "grad_norm": 0.6962169408798218, "learning_rate": 6.049013702316807e-07, "loss": 1.6326, "step": 88348 }, { "epoch": 2.94, "grad_norm": 0.7401785850524902, "learning_rate": 6.042381421704434e-07, "loss": 1.6667, "step": 88349 }, { "epoch": 2.94, "grad_norm": 0.7108137607574463, "learning_rate": 6.03575277531565e-07, "loss": 1.7041, "step": 88350 }, { "epoch": 2.94, "grad_norm": 0.7228471636772156, "learning_rate": 6.029127763158448e-07, "loss": 1.711, "step": 88351 }, { "epoch": 2.94, "grad_norm": 0.7015248537063599, "learning_rate": 6.022506385240822e-07, "loss": 1.7266, "step": 88352 }, { "epoch": 2.94, "grad_norm": 0.7065655589103699, "learning_rate": 6.015888641570765e-07, "loss": 1.7007, "step": 88353 }, { "epoch": 2.94, "grad_norm": 0.7324745059013367, "learning_rate": 6.009274532155939e-07, "loss": 1.6364, "step": 88354 }, { "epoch": 2.94, "grad_norm": 0.7113736271858215, "learning_rate": 6.002664057005003e-07, "loss": 1.6855, "step": 88355 }, { "epoch": 2.94, "grad_norm": 0.7047135233879089, "learning_rate": 5.996057216125616e-07, "loss": 1.6972, "step": 88356 }, { "epoch": 2.94, "grad_norm": 0.699641227722168, "learning_rate": 5.989454009525773e-07, "loss": 1.6066, "step": 88357 }, { "epoch": 2.94, "grad_norm": 0.6911715269088745, "learning_rate": 5.982854437213802e-07, "loss": 1.6767, "step": 88358 }, { "epoch": 2.94, "grad_norm": 0.7239226698875427, "learning_rate": 5.976258499197028e-07, "loss": 1.6925, "step": 88359 }, { "epoch": 2.94, "grad_norm": 0.703782856464386, "learning_rate": 5.969666195484446e-07, "loss": 1.7145, "step": 88360 }, { "epoch": 2.94, "grad_norm": 0.7062090635299683, "learning_rate": 5.963077526083382e-07, "loss": 1.6349, "step": 88361 }, { "epoch": 2.94, "grad_norm": 0.7292895317077637, "learning_rate": 5.956492491001829e-07, "loss": 1.7427, "step": 88362 }, { "epoch": 2.94, "grad_norm": 0.7168923020362854, "learning_rate": 5.949911090248117e-07, "loss": 1.678, "step": 88363 }, { "epoch": 2.94, "grad_norm": 0.7096969485282898, "learning_rate": 5.943333323829901e-07, "loss": 1.6174, "step": 88364 }, { "epoch": 2.94, "grad_norm": 0.7180601954460144, "learning_rate": 5.936759191755513e-07, "loss": 1.7033, "step": 88365 }, { "epoch": 2.94, "grad_norm": 0.7662486433982849, "learning_rate": 5.930188694032612e-07, "loss": 1.7392, "step": 88366 }, { "epoch": 2.94, "grad_norm": 0.7080763578414917, "learning_rate": 5.92362183066919e-07, "loss": 1.681, "step": 88367 }, { "epoch": 2.94, "grad_norm": 0.6999995708465576, "learning_rate": 5.917058601673575e-07, "loss": 1.6111, "step": 88368 }, { "epoch": 2.94, "grad_norm": 0.7290814518928528, "learning_rate": 5.910499007053426e-07, "loss": 1.6598, "step": 88369 }, { "epoch": 2.94, "grad_norm": 0.713141679763794, "learning_rate": 5.903943046816739e-07, "loss": 1.6597, "step": 88370 }, { "epoch": 2.94, "grad_norm": 0.7294203042984009, "learning_rate": 5.897390720971507e-07, "loss": 1.7056, "step": 88371 }, { "epoch": 2.94, "grad_norm": 0.7361660003662109, "learning_rate": 5.890842029525722e-07, "loss": 1.7168, "step": 88372 }, { "epoch": 2.94, "grad_norm": 0.6957782506942749, "learning_rate": 5.884296972487379e-07, "loss": 1.7127, "step": 88373 }, { "epoch": 2.94, "grad_norm": 0.7056798338890076, "learning_rate": 5.877755549864139e-07, "loss": 1.672, "step": 88374 }, { "epoch": 2.94, "grad_norm": 0.7075009942054749, "learning_rate": 5.871217761664326e-07, "loss": 1.6895, "step": 88375 }, { "epoch": 2.94, "grad_norm": 0.7064365744590759, "learning_rate": 5.864683607895604e-07, "loss": 1.5971, "step": 88376 }, { "epoch": 2.94, "grad_norm": 0.7153639197349548, "learning_rate": 5.858153088565965e-07, "loss": 1.6507, "step": 88377 }, { "epoch": 2.94, "grad_norm": 0.7429250478744507, "learning_rate": 5.851626203683402e-07, "loss": 1.7168, "step": 88378 }, { "epoch": 2.94, "grad_norm": 0.739998459815979, "learning_rate": 5.845102953255909e-07, "loss": 1.7062, "step": 88379 }, { "epoch": 2.94, "grad_norm": 0.7401579022407532, "learning_rate": 5.838583337290815e-07, "loss": 1.7702, "step": 88380 }, { "epoch": 2.94, "grad_norm": 0.709079921245575, "learning_rate": 5.83206735579711e-07, "loss": 1.6476, "step": 88381 }, { "epoch": 2.94, "grad_norm": 0.7456045746803284, "learning_rate": 5.825555008781791e-07, "loss": 1.7061, "step": 88382 }, { "epoch": 2.94, "grad_norm": 0.7247651815414429, "learning_rate": 5.819046296253182e-07, "loss": 1.7264, "step": 88383 }, { "epoch": 2.94, "grad_norm": 0.7309249043464661, "learning_rate": 5.812541218218947e-07, "loss": 1.7452, "step": 88384 }, { "epoch": 2.94, "grad_norm": 0.7036260962486267, "learning_rate": 5.806039774687077e-07, "loss": 1.6898, "step": 88385 }, { "epoch": 2.94, "grad_norm": 0.7147625684738159, "learning_rate": 5.799541965665899e-07, "loss": 1.5964, "step": 88386 }, { "epoch": 2.94, "grad_norm": 0.7126675844192505, "learning_rate": 5.793047791162408e-07, "loss": 1.673, "step": 88387 }, { "epoch": 2.94, "grad_norm": 0.7318317294120789, "learning_rate": 5.786557251185265e-07, "loss": 1.6533, "step": 88388 }, { "epoch": 2.94, "grad_norm": 0.7327439188957214, "learning_rate": 5.780070345742128e-07, "loss": 1.7821, "step": 88389 }, { "epoch": 2.94, "grad_norm": 0.7152711153030396, "learning_rate": 5.773587074840658e-07, "loss": 1.6777, "step": 88390 }, { "epoch": 2.94, "grad_norm": 0.7052120566368103, "learning_rate": 5.767107438488849e-07, "loss": 1.6447, "step": 88391 }, { "epoch": 2.94, "grad_norm": 0.7212008833885193, "learning_rate": 5.760631436694696e-07, "loss": 1.6332, "step": 88392 }, { "epoch": 2.94, "grad_norm": 0.7130514979362488, "learning_rate": 5.754159069465858e-07, "loss": 1.73, "step": 88393 }, { "epoch": 2.94, "grad_norm": 0.7224787473678589, "learning_rate": 5.747690336810329e-07, "loss": 1.7351, "step": 88394 }, { "epoch": 2.94, "grad_norm": 0.707978367805481, "learning_rate": 5.741225238736102e-07, "loss": 1.7134, "step": 88395 }, { "epoch": 2.94, "grad_norm": 0.714872419834137, "learning_rate": 5.734763775250506e-07, "loss": 1.6555, "step": 88396 }, { "epoch": 2.94, "grad_norm": 0.7177491188049316, "learning_rate": 5.728305946361866e-07, "loss": 1.6199, "step": 88397 }, { "epoch": 2.94, "grad_norm": 0.7101594805717468, "learning_rate": 5.721851752077844e-07, "loss": 1.6969, "step": 88398 }, { "epoch": 2.94, "grad_norm": 0.7388175129890442, "learning_rate": 5.715401192406432e-07, "loss": 1.7259, "step": 88399 }, { "epoch": 2.94, "grad_norm": 0.7039344310760498, "learning_rate": 5.708954267355293e-07, "loss": 1.6377, "step": 88400 }, { "epoch": 2.94, "grad_norm": 0.7412598729133606, "learning_rate": 5.702510976932084e-07, "loss": 1.7506, "step": 88401 }, { "epoch": 2.94, "grad_norm": 0.7160114049911499, "learning_rate": 5.696071321145135e-07, "loss": 1.7368, "step": 88402 }, { "epoch": 2.94, "grad_norm": 0.7357205748558044, "learning_rate": 5.689635300001772e-07, "loss": 1.7222, "step": 88403 }, { "epoch": 2.94, "grad_norm": 0.7303117513656616, "learning_rate": 5.683202913510321e-07, "loss": 1.6686, "step": 88404 }, { "epoch": 2.94, "grad_norm": 0.7284665703773499, "learning_rate": 5.676774161677778e-07, "loss": 1.6588, "step": 88405 }, { "epoch": 2.94, "grad_norm": 0.7028308510780334, "learning_rate": 5.670349044512801e-07, "loss": 1.6344, "step": 88406 }, { "epoch": 2.94, "grad_norm": 0.6970722675323486, "learning_rate": 5.663927562022719e-07, "loss": 1.6801, "step": 88407 }, { "epoch": 2.94, "grad_norm": 0.6990272402763367, "learning_rate": 5.657509714215525e-07, "loss": 1.7102, "step": 88408 }, { "epoch": 2.94, "grad_norm": 0.7011814117431641, "learning_rate": 5.651095501098879e-07, "loss": 1.6265, "step": 88409 }, { "epoch": 2.94, "grad_norm": 0.7419118881225586, "learning_rate": 5.644684922680442e-07, "loss": 1.6297, "step": 88410 }, { "epoch": 2.94, "grad_norm": 0.7112725973129272, "learning_rate": 5.638277978968208e-07, "loss": 1.6455, "step": 88411 }, { "epoch": 2.94, "grad_norm": 0.7107559442520142, "learning_rate": 5.631874669970171e-07, "loss": 1.6591, "step": 88412 }, { "epoch": 2.94, "grad_norm": 0.7349080443382263, "learning_rate": 5.625474995693324e-07, "loss": 1.6083, "step": 88413 }, { "epoch": 2.94, "grad_norm": 0.7319327592849731, "learning_rate": 5.619078956146328e-07, "loss": 1.631, "step": 88414 }, { "epoch": 2.94, "grad_norm": 0.7011697888374329, "learning_rate": 5.612686551336509e-07, "loss": 1.6679, "step": 88415 }, { "epoch": 2.94, "grad_norm": 0.7386459112167358, "learning_rate": 5.606297781271862e-07, "loss": 1.7959, "step": 88416 }, { "epoch": 2.94, "grad_norm": 0.7257667779922485, "learning_rate": 5.599912645959714e-07, "loss": 1.7017, "step": 88417 }, { "epoch": 2.94, "grad_norm": 0.7098146677017212, "learning_rate": 5.593531145408059e-07, "loss": 1.745, "step": 88418 }, { "epoch": 2.94, "grad_norm": 0.7039772272109985, "learning_rate": 5.58715327962489e-07, "loss": 1.7163, "step": 88419 }, { "epoch": 2.94, "grad_norm": 0.7206647992134094, "learning_rate": 5.580779048617534e-07, "loss": 1.6567, "step": 88420 }, { "epoch": 2.94, "grad_norm": 0.7357628345489502, "learning_rate": 5.574408452393986e-07, "loss": 1.6155, "step": 88421 }, { "epoch": 2.94, "grad_norm": 0.7148263454437256, "learning_rate": 5.568041490961572e-07, "loss": 1.6685, "step": 88422 }, { "epoch": 2.94, "grad_norm": 0.7015324831008911, "learning_rate": 5.56167816432862e-07, "loss": 1.702, "step": 88423 }, { "epoch": 2.94, "grad_norm": 0.7058310508728027, "learning_rate": 5.555318472502456e-07, "loss": 1.6754, "step": 88424 }, { "epoch": 2.94, "grad_norm": 0.7119352221488953, "learning_rate": 5.548962415491076e-07, "loss": 1.6272, "step": 88425 }, { "epoch": 2.94, "grad_norm": 0.7319616675376892, "learning_rate": 5.542609993302138e-07, "loss": 1.6539, "step": 88426 }, { "epoch": 2.94, "grad_norm": 0.7118710279464722, "learning_rate": 5.536261205943304e-07, "loss": 1.6633, "step": 88427 }, { "epoch": 2.94, "grad_norm": 0.7351296544075012, "learning_rate": 5.529916053421901e-07, "loss": 1.7006, "step": 88428 }, { "epoch": 2.94, "grad_norm": 0.7433065176010132, "learning_rate": 5.523574535746256e-07, "loss": 1.6558, "step": 88429 }, { "epoch": 2.94, "grad_norm": 0.7328322529792786, "learning_rate": 5.517236652923695e-07, "loss": 1.7487, "step": 88430 }, { "epoch": 2.94, "grad_norm": 0.7115082740783691, "learning_rate": 5.510902404961882e-07, "loss": 1.7107, "step": 88431 }, { "epoch": 2.94, "grad_norm": 0.7130744457244873, "learning_rate": 5.504571791868473e-07, "loss": 1.6434, "step": 88432 }, { "epoch": 2.94, "grad_norm": 0.7479337453842163, "learning_rate": 5.498244813651465e-07, "loss": 1.6816, "step": 88433 }, { "epoch": 2.94, "grad_norm": 0.7098616361618042, "learning_rate": 5.491921470318517e-07, "loss": 1.6887, "step": 88434 }, { "epoch": 2.94, "grad_norm": 0.7053055763244629, "learning_rate": 5.485601761876956e-07, "loss": 1.6296, "step": 88435 }, { "epoch": 2.94, "grad_norm": 0.7356210350990295, "learning_rate": 5.479285688334778e-07, "loss": 1.635, "step": 88436 }, { "epoch": 2.94, "grad_norm": 0.7488554120063782, "learning_rate": 5.472973249699642e-07, "loss": 1.6971, "step": 88437 }, { "epoch": 2.94, "grad_norm": 0.7141504287719727, "learning_rate": 5.466664445978874e-07, "loss": 1.7339, "step": 88438 }, { "epoch": 2.94, "grad_norm": 0.6947594881057739, "learning_rate": 5.46035927718047e-07, "loss": 1.6989, "step": 88439 }, { "epoch": 2.94, "grad_norm": 0.7347703576087952, "learning_rate": 5.454057743311757e-07, "loss": 1.6779, "step": 88440 }, { "epoch": 2.94, "grad_norm": 0.7327628135681152, "learning_rate": 5.447759844380728e-07, "loss": 1.7357, "step": 88441 }, { "epoch": 2.94, "grad_norm": 0.7031639814376831, "learning_rate": 5.441465580395044e-07, "loss": 1.6522, "step": 88442 }, { "epoch": 2.94, "grad_norm": 0.7057579159736633, "learning_rate": 5.435174951362031e-07, "loss": 1.6454, "step": 88443 }, { "epoch": 2.94, "grad_norm": 0.7042790651321411, "learning_rate": 5.428887957289352e-07, "loss": 1.7023, "step": 88444 }, { "epoch": 2.94, "grad_norm": 0.6867678165435791, "learning_rate": 5.422604598184999e-07, "loss": 1.6555, "step": 88445 }, { "epoch": 2.94, "grad_norm": 0.7404681444168091, "learning_rate": 5.416324874056299e-07, "loss": 1.7457, "step": 88446 }, { "epoch": 2.94, "grad_norm": 0.7066713571548462, "learning_rate": 5.410048784911247e-07, "loss": 1.6631, "step": 88447 }, { "epoch": 2.94, "grad_norm": 0.7413641810417175, "learning_rate": 5.403776330756837e-07, "loss": 1.6059, "step": 88448 }, { "epoch": 2.94, "grad_norm": 0.6999456286430359, "learning_rate": 5.397507511601062e-07, "loss": 1.6871, "step": 88449 }, { "epoch": 2.94, "grad_norm": 0.7200958132743835, "learning_rate": 5.391242327451251e-07, "loss": 1.6189, "step": 88450 }, { "epoch": 2.94, "grad_norm": 0.7337656617164612, "learning_rate": 5.384980778315396e-07, "loss": 1.6133, "step": 88451 }, { "epoch": 2.94, "grad_norm": 0.7409459352493286, "learning_rate": 5.378722864201157e-07, "loss": 1.7474, "step": 88452 }, { "epoch": 2.94, "grad_norm": 0.7195855379104614, "learning_rate": 5.372468585115864e-07, "loss": 1.6808, "step": 88453 }, { "epoch": 2.94, "grad_norm": 0.7166253328323364, "learning_rate": 5.366217941066842e-07, "loss": 1.6699, "step": 88454 }, { "epoch": 2.94, "grad_norm": 0.6950371265411377, "learning_rate": 5.359970932062418e-07, "loss": 1.7046, "step": 88455 }, { "epoch": 2.94, "grad_norm": 0.725325882434845, "learning_rate": 5.353727558109256e-07, "loss": 1.6406, "step": 88456 }, { "epoch": 2.94, "grad_norm": 0.734950602054596, "learning_rate": 5.347487819216012e-07, "loss": 1.7291, "step": 88457 }, { "epoch": 2.94, "grad_norm": 0.727074384689331, "learning_rate": 5.34125171538935e-07, "loss": 1.7588, "step": 88458 }, { "epoch": 2.94, "grad_norm": 0.7148144245147705, "learning_rate": 5.335019246636929e-07, "loss": 1.6871, "step": 88459 }, { "epoch": 2.94, "grad_norm": 0.729790210723877, "learning_rate": 5.328790412966744e-07, "loss": 1.6727, "step": 88460 }, { "epoch": 2.94, "grad_norm": 0.7410292625427246, "learning_rate": 5.322565214386453e-07, "loss": 1.7556, "step": 88461 }, { "epoch": 2.94, "grad_norm": 0.6957546472549438, "learning_rate": 5.316343650903054e-07, "loss": 1.6797, "step": 88462 }, { "epoch": 2.94, "grad_norm": 0.7143041491508484, "learning_rate": 5.310125722524205e-07, "loss": 1.6908, "step": 88463 }, { "epoch": 2.94, "grad_norm": 0.7040392756462097, "learning_rate": 5.303911429257901e-07, "loss": 1.6719, "step": 88464 }, { "epoch": 2.94, "grad_norm": 0.6977264285087585, "learning_rate": 5.297700771111468e-07, "loss": 1.6352, "step": 88465 }, { "epoch": 2.94, "grad_norm": 0.756851077079773, "learning_rate": 5.291493748092235e-07, "loss": 1.7611, "step": 88466 }, { "epoch": 2.94, "grad_norm": 0.717985987663269, "learning_rate": 5.285290360207861e-07, "loss": 1.6186, "step": 88467 }, { "epoch": 2.94, "grad_norm": 0.7173498868942261, "learning_rate": 5.279090607466008e-07, "loss": 1.7044, "step": 88468 }, { "epoch": 2.94, "grad_norm": 0.7473304867744446, "learning_rate": 5.272894489873669e-07, "loss": 1.6828, "step": 88469 }, { "epoch": 2.94, "grad_norm": 0.7054020762443542, "learning_rate": 5.266702007439171e-07, "loss": 1.7124, "step": 88470 }, { "epoch": 2.94, "grad_norm": 0.7406615018844604, "learning_rate": 5.260513160169511e-07, "loss": 1.6996, "step": 88471 }, { "epoch": 2.94, "grad_norm": 0.7000913619995117, "learning_rate": 5.254327948072679e-07, "loss": 1.6158, "step": 88472 }, { "epoch": 2.94, "grad_norm": 0.710375189781189, "learning_rate": 5.248146371155337e-07, "loss": 1.7395, "step": 88473 }, { "epoch": 2.94, "grad_norm": 0.6942849159240723, "learning_rate": 5.241968429425813e-07, "loss": 1.6047, "step": 88474 }, { "epoch": 2.94, "grad_norm": 0.7140818238258362, "learning_rate": 5.235794122891102e-07, "loss": 1.6291, "step": 88475 }, { "epoch": 2.94, "grad_norm": 0.705445408821106, "learning_rate": 5.229623451558861e-07, "loss": 1.5912, "step": 88476 }, { "epoch": 2.94, "grad_norm": 0.6927760243415833, "learning_rate": 5.223456415436755e-07, "loss": 1.6119, "step": 88477 }, { "epoch": 2.94, "grad_norm": 0.7328810691833496, "learning_rate": 5.217293014532109e-07, "loss": 1.7093, "step": 88478 }, { "epoch": 2.94, "grad_norm": 0.7221850752830505, "learning_rate": 5.211133248852251e-07, "loss": 1.6957, "step": 88479 }, { "epoch": 2.94, "grad_norm": 0.7025380730628967, "learning_rate": 5.204977118404841e-07, "loss": 1.6412, "step": 88480 }, { "epoch": 2.94, "grad_norm": 0.7239664793014526, "learning_rate": 5.19882462319754e-07, "loss": 1.7064, "step": 88481 }, { "epoch": 2.94, "grad_norm": 0.7037520408630371, "learning_rate": 5.192675763237675e-07, "loss": 1.6604, "step": 88482 }, { "epoch": 2.94, "grad_norm": 0.7404728531837463, "learning_rate": 5.186530538532241e-07, "loss": 1.6507, "step": 88483 }, { "epoch": 2.94, "grad_norm": 0.6848458051681519, "learning_rate": 5.180388949089231e-07, "loss": 1.6452, "step": 88484 }, { "epoch": 2.94, "grad_norm": 0.7292208075523376, "learning_rate": 5.174250994915974e-07, "loss": 1.6659, "step": 88485 }, { "epoch": 2.94, "grad_norm": 0.727519154548645, "learning_rate": 5.168116676020128e-07, "loss": 1.6209, "step": 88486 }, { "epoch": 2.94, "grad_norm": 0.6887317299842834, "learning_rate": 5.16198599240869e-07, "loss": 1.7092, "step": 88487 }, { "epoch": 2.94, "grad_norm": 0.7069714069366455, "learning_rate": 5.155858944089319e-07, "loss": 1.7214, "step": 88488 }, { "epoch": 2.94, "grad_norm": 0.7153195142745972, "learning_rate": 5.149735531069676e-07, "loss": 1.6585, "step": 88489 }, { "epoch": 2.94, "grad_norm": 0.7388176321983337, "learning_rate": 5.143615753356756e-07, "loss": 1.6931, "step": 88490 }, { "epoch": 2.94, "grad_norm": 0.720984935760498, "learning_rate": 5.137499610958218e-07, "loss": 1.622, "step": 88491 }, { "epoch": 2.94, "grad_norm": 0.6996602416038513, "learning_rate": 5.13138710388139e-07, "loss": 1.6955, "step": 88492 }, { "epoch": 2.94, "grad_norm": 0.7174298763275146, "learning_rate": 5.125278232134267e-07, "loss": 1.7594, "step": 88493 }, { "epoch": 2.94, "grad_norm": 0.7134184241294861, "learning_rate": 5.119172995723175e-07, "loss": 1.7397, "step": 88494 }, { "epoch": 2.94, "grad_norm": 0.7223871350288391, "learning_rate": 5.113071394656442e-07, "loss": 1.628, "step": 88495 }, { "epoch": 2.94, "grad_norm": 0.7167102098464966, "learning_rate": 5.106973428941397e-07, "loss": 1.6247, "step": 88496 }, { "epoch": 2.94, "grad_norm": 0.7130624055862427, "learning_rate": 5.100879098584698e-07, "loss": 1.6845, "step": 88497 }, { "epoch": 2.94, "grad_norm": 0.7267957925796509, "learning_rate": 5.094788403594674e-07, "loss": 1.661, "step": 88498 }, { "epoch": 2.94, "grad_norm": 0.6892845630645752, "learning_rate": 5.088701343977985e-07, "loss": 1.6364, "step": 88499 }, { "epoch": 2.94, "grad_norm": 0.7073089480400085, "learning_rate": 5.082617919742627e-07, "loss": 1.7451, "step": 88500 }, { "epoch": 2.94, "grad_norm": 0.699691653251648, "learning_rate": 5.076538130895591e-07, "loss": 1.6577, "step": 88501 }, { "epoch": 2.94, "grad_norm": 0.7262097597122192, "learning_rate": 5.070461977444207e-07, "loss": 1.6344, "step": 88502 }, { "epoch": 2.94, "grad_norm": 0.736213207244873, "learning_rate": 5.064389459396134e-07, "loss": 1.704, "step": 88503 }, { "epoch": 2.94, "grad_norm": 0.7367511987686157, "learning_rate": 5.058320576758701e-07, "loss": 1.6742, "step": 88504 }, { "epoch": 2.94, "grad_norm": 0.741602897644043, "learning_rate": 5.052255329538901e-07, "loss": 1.6615, "step": 88505 }, { "epoch": 2.94, "grad_norm": 0.7248732447624207, "learning_rate": 5.046193717744729e-07, "loss": 1.5831, "step": 88506 }, { "epoch": 2.94, "grad_norm": 0.73231440782547, "learning_rate": 5.040135741382844e-07, "loss": 1.6742, "step": 88507 }, { "epoch": 2.94, "grad_norm": 0.7157759070396423, "learning_rate": 5.034081400461242e-07, "loss": 1.7148, "step": 88508 }, { "epoch": 2.94, "grad_norm": 0.7163374423980713, "learning_rate": 5.028030694986585e-07, "loss": 1.6982, "step": 88509 }, { "epoch": 2.94, "grad_norm": 0.7008354067802429, "learning_rate": 5.021983624967196e-07, "loss": 1.6422, "step": 88510 }, { "epoch": 2.94, "grad_norm": 0.704503059387207, "learning_rate": 5.015940190409407e-07, "loss": 1.6144, "step": 88511 }, { "epoch": 2.94, "grad_norm": 0.7268832325935364, "learning_rate": 5.009900391320876e-07, "loss": 1.7028, "step": 88512 }, { "epoch": 2.94, "grad_norm": 0.7174320816993713, "learning_rate": 5.003864227709265e-07, "loss": 1.6897, "step": 88513 }, { "epoch": 2.94, "grad_norm": 0.7096271514892578, "learning_rate": 4.9978316995819e-07, "loss": 1.6783, "step": 88514 }, { "epoch": 2.94, "grad_norm": 0.7175285220146179, "learning_rate": 4.991802806945444e-07, "loss": 1.6808, "step": 88515 }, { "epoch": 2.94, "grad_norm": 0.7093785405158997, "learning_rate": 4.98577754980789e-07, "loss": 1.6903, "step": 88516 }, { "epoch": 2.94, "grad_norm": 0.7147769331932068, "learning_rate": 4.979755928176232e-07, "loss": 1.6998, "step": 88517 }, { "epoch": 2.95, "grad_norm": 0.7082722783088684, "learning_rate": 4.973737942057798e-07, "loss": 1.6768, "step": 88518 }, { "epoch": 2.95, "grad_norm": 0.6979376077651978, "learning_rate": 4.967723591459916e-07, "loss": 1.6316, "step": 88519 }, { "epoch": 2.95, "grad_norm": 0.7081893086433411, "learning_rate": 4.961712876389912e-07, "loss": 1.6885, "step": 88520 }, { "epoch": 2.95, "grad_norm": 0.7243577241897583, "learning_rate": 4.955705796855447e-07, "loss": 1.6176, "step": 88521 }, { "epoch": 2.95, "grad_norm": 0.7324608564376831, "learning_rate": 4.949702352863183e-07, "loss": 1.797, "step": 88522 }, { "epoch": 2.95, "grad_norm": 0.7256304025650024, "learning_rate": 4.94370254442078e-07, "loss": 1.6677, "step": 88523 }, { "epoch": 2.95, "grad_norm": 0.7245521545410156, "learning_rate": 4.937706371535233e-07, "loss": 1.6329, "step": 88524 }, { "epoch": 2.95, "grad_norm": 0.7252072095870972, "learning_rate": 4.931713834214201e-07, "loss": 1.6731, "step": 88525 }, { "epoch": 2.95, "grad_norm": 0.7119973301887512, "learning_rate": 4.925724932464681e-07, "loss": 1.7097, "step": 88526 }, { "epoch": 2.95, "grad_norm": 0.7600393295288086, "learning_rate": 4.919739666293998e-07, "loss": 1.7072, "step": 88527 }, { "epoch": 2.95, "grad_norm": 0.7411617040634155, "learning_rate": 4.91375803570948e-07, "loss": 1.7387, "step": 88528 }, { "epoch": 2.95, "grad_norm": 0.7096425294876099, "learning_rate": 4.907780040718124e-07, "loss": 1.6353, "step": 88529 }, { "epoch": 2.95, "grad_norm": 0.7112734317779541, "learning_rate": 4.901805681327587e-07, "loss": 1.7063, "step": 88530 }, { "epoch": 2.95, "grad_norm": 0.698641836643219, "learning_rate": 4.895834957545197e-07, "loss": 1.6742, "step": 88531 }, { "epoch": 2.95, "grad_norm": 0.6990596652030945, "learning_rate": 4.889867869377617e-07, "loss": 1.7405, "step": 88532 }, { "epoch": 2.95, "grad_norm": 0.7189215421676636, "learning_rate": 4.88390441683284e-07, "loss": 1.7564, "step": 88533 }, { "epoch": 2.95, "grad_norm": 0.7321504354476929, "learning_rate": 4.877944599917194e-07, "loss": 1.6896, "step": 88534 }, { "epoch": 2.95, "grad_norm": 0.7150334715843201, "learning_rate": 4.871988418638673e-07, "loss": 1.6688, "step": 88535 }, { "epoch": 2.95, "grad_norm": 0.707719087600708, "learning_rate": 4.86603587300427e-07, "loss": 1.6499, "step": 88536 }, { "epoch": 2.95, "grad_norm": 0.7244405746459961, "learning_rate": 4.860086963020982e-07, "loss": 1.7146, "step": 88537 }, { "epoch": 2.95, "grad_norm": 0.7391776442527771, "learning_rate": 4.854141688696467e-07, "loss": 1.6872, "step": 88538 }, { "epoch": 2.95, "grad_norm": 0.7228808999061584, "learning_rate": 4.848200050037721e-07, "loss": 1.6065, "step": 88539 }, { "epoch": 2.95, "grad_norm": 0.7203527688980103, "learning_rate": 4.842262047051737e-07, "loss": 1.7012, "step": 88540 }, { "epoch": 2.95, "grad_norm": 0.7253996729850769, "learning_rate": 4.836327679746177e-07, "loss": 1.6854, "step": 88541 }, { "epoch": 2.95, "grad_norm": 0.7255129218101501, "learning_rate": 4.830396948127701e-07, "loss": 1.6462, "step": 88542 }, { "epoch": 2.95, "grad_norm": 0.707452118396759, "learning_rate": 4.824469852204305e-07, "loss": 1.6533, "step": 88543 }, { "epoch": 2.95, "grad_norm": 0.7139027714729309, "learning_rate": 4.818546391982314e-07, "loss": 1.6261, "step": 88544 }, { "epoch": 2.95, "grad_norm": 0.7395693063735962, "learning_rate": 4.812626567469058e-07, "loss": 1.6332, "step": 88545 }, { "epoch": 2.95, "grad_norm": 0.7008017897605896, "learning_rate": 4.806710378672529e-07, "loss": 1.6589, "step": 88546 }, { "epoch": 2.95, "grad_norm": 0.7113940119743347, "learning_rate": 4.800797825598724e-07, "loss": 1.673, "step": 88547 }, { "epoch": 2.95, "grad_norm": 0.6955315470695496, "learning_rate": 4.794888908255967e-07, "loss": 1.6642, "step": 88548 }, { "epoch": 2.95, "grad_norm": 0.704960286617279, "learning_rate": 4.788983626650589e-07, "loss": 1.648, "step": 88549 }, { "epoch": 2.95, "grad_norm": 0.7011182308197021, "learning_rate": 4.783081980790249e-07, "loss": 1.6729, "step": 88550 }, { "epoch": 2.95, "grad_norm": 0.7229310870170593, "learning_rate": 4.777183970681609e-07, "loss": 1.7052, "step": 88551 }, { "epoch": 2.95, "grad_norm": 0.7083204984664917, "learning_rate": 4.771289596332328e-07, "loss": 1.6222, "step": 88552 }, { "epoch": 2.95, "grad_norm": 0.7181442975997925, "learning_rate": 4.765398857749403e-07, "loss": 1.7062, "step": 88553 }, { "epoch": 2.95, "grad_norm": 0.7126918435096741, "learning_rate": 4.75951175494016e-07, "loss": 1.6715, "step": 88554 }, { "epoch": 2.95, "grad_norm": 0.7238638401031494, "learning_rate": 4.75362828791126e-07, "loss": 1.7099, "step": 88555 }, { "epoch": 2.95, "grad_norm": 0.6880627870559692, "learning_rate": 4.747748456670031e-07, "loss": 1.6744, "step": 88556 }, { "epoch": 2.95, "grad_norm": 0.7134836912155151, "learning_rate": 4.7418722612238004e-07, "loss": 1.7296, "step": 88557 }, { "epoch": 2.95, "grad_norm": 0.7294192314147949, "learning_rate": 4.7359997015798954e-07, "loss": 1.7023, "step": 88558 }, { "epoch": 2.95, "grad_norm": 0.7295337915420532, "learning_rate": 4.7301307777449783e-07, "loss": 1.6847, "step": 88559 }, { "epoch": 2.95, "grad_norm": 0.7346518039703369, "learning_rate": 4.7242654897260423e-07, "loss": 1.6676, "step": 88560 }, { "epoch": 2.95, "grad_norm": 0.7291755676269531, "learning_rate": 4.7184038375307485e-07, "loss": 1.669, "step": 88561 }, { "epoch": 2.95, "grad_norm": 0.726844847202301, "learning_rate": 4.7125458211660916e-07, "loss": 1.6025, "step": 88562 }, { "epoch": 2.95, "grad_norm": 0.7636372447013855, "learning_rate": 4.7066914406390656e-07, "loss": 1.6781, "step": 88563 }, { "epoch": 2.95, "grad_norm": 0.7330821752548218, "learning_rate": 4.7008406959566647e-07, "loss": 1.6435, "step": 88564 }, { "epoch": 2.95, "grad_norm": 0.6972318291664124, "learning_rate": 4.694993587126217e-07, "loss": 1.6598, "step": 88565 }, { "epoch": 2.95, "grad_norm": 0.713943600654602, "learning_rate": 4.689150114154716e-07, "loss": 1.659, "step": 88566 }, { "epoch": 2.95, "grad_norm": 1.1949042081832886, "learning_rate": 4.6833102770491573e-07, "loss": 1.6793, "step": 88567 }, { "epoch": 2.95, "grad_norm": 0.7066300511360168, "learning_rate": 4.677474075816534e-07, "loss": 1.623, "step": 88568 }, { "epoch": 2.95, "grad_norm": 0.6850287914276123, "learning_rate": 4.671641510464508e-07, "loss": 1.7053, "step": 88569 }, { "epoch": 2.95, "grad_norm": 0.7222548723220825, "learning_rate": 4.665812580999406e-07, "loss": 1.7035, "step": 88570 }, { "epoch": 2.95, "grad_norm": 0.7169924378395081, "learning_rate": 4.6599872874288904e-07, "loss": 1.6619, "step": 88571 }, { "epoch": 2.95, "grad_norm": 0.7104811072349548, "learning_rate": 4.6541656297596207e-07, "loss": 1.635, "step": 88572 }, { "epoch": 2.95, "grad_norm": 0.7103774547576904, "learning_rate": 4.6483476079989257e-07, "loss": 1.6513, "step": 88573 }, { "epoch": 2.95, "grad_norm": 0.7345322966575623, "learning_rate": 4.642533222154132e-07, "loss": 1.6891, "step": 88574 }, { "epoch": 2.95, "grad_norm": 0.7241993546485901, "learning_rate": 4.636722472231569e-07, "loss": 1.7338, "step": 88575 }, { "epoch": 2.95, "grad_norm": 0.6901041269302368, "learning_rate": 4.630915358238563e-07, "loss": 1.5855, "step": 88576 }, { "epoch": 2.95, "grad_norm": 0.7087405323982239, "learning_rate": 4.625111880182775e-07, "loss": 1.6317, "step": 88577 }, { "epoch": 2.95, "grad_norm": 0.698424220085144, "learning_rate": 4.6193120380702e-07, "loss": 1.6609, "step": 88578 }, { "epoch": 2.95, "grad_norm": 0.6975263953208923, "learning_rate": 4.6135158319088317e-07, "loss": 1.665, "step": 88579 }, { "epoch": 2.95, "grad_norm": 0.712035596370697, "learning_rate": 4.607723261705332e-07, "loss": 1.7263, "step": 88580 }, { "epoch": 2.95, "grad_norm": 0.7027772068977356, "learning_rate": 4.601934327466361e-07, "loss": 1.6442, "step": 88581 }, { "epoch": 2.95, "grad_norm": 0.706420361995697, "learning_rate": 4.5961490291995806e-07, "loss": 1.7375, "step": 88582 }, { "epoch": 2.95, "grad_norm": 0.711776614189148, "learning_rate": 4.5903673669116515e-07, "loss": 1.6413, "step": 88583 }, { "epoch": 2.95, "grad_norm": 0.7195972204208374, "learning_rate": 4.584589340609568e-07, "loss": 1.6377, "step": 88584 }, { "epoch": 2.95, "grad_norm": 0.72378009557724, "learning_rate": 4.578814950300658e-07, "loss": 1.7137, "step": 88585 }, { "epoch": 2.95, "grad_norm": 0.7305885553359985, "learning_rate": 4.573044195991582e-07, "loss": 1.5981, "step": 88586 }, { "epoch": 2.95, "grad_norm": 0.7158303260803223, "learning_rate": 4.567277077689335e-07, "loss": 1.7209, "step": 88587 }, { "epoch": 2.95, "grad_norm": 0.7236142754554749, "learning_rate": 4.561513595401245e-07, "loss": 1.6671, "step": 88588 }, { "epoch": 2.95, "grad_norm": 0.7398062348365784, "learning_rate": 4.5557537491339725e-07, "loss": 1.6323, "step": 88589 }, { "epoch": 2.95, "grad_norm": 0.7259565591812134, "learning_rate": 4.5499975388948453e-07, "loss": 1.6877, "step": 88590 }, { "epoch": 2.95, "grad_norm": 0.7393112778663635, "learning_rate": 4.5442449646905245e-07, "loss": 1.6629, "step": 88591 }, { "epoch": 2.95, "grad_norm": 0.6997356414794922, "learning_rate": 4.5384960265280045e-07, "loss": 1.677, "step": 88592 }, { "epoch": 2.95, "grad_norm": 0.6851559281349182, "learning_rate": 4.5327507244146135e-07, "loss": 1.664, "step": 88593 }, { "epoch": 2.95, "grad_norm": 0.7208687663078308, "learning_rate": 4.5270090583570115e-07, "loss": 1.6755, "step": 88594 }, { "epoch": 2.95, "grad_norm": 0.7207005023956299, "learning_rate": 4.5212710283621944e-07, "loss": 1.7076, "step": 88595 }, { "epoch": 2.95, "grad_norm": 0.7340804934501648, "learning_rate": 4.515536634437489e-07, "loss": 1.6114, "step": 88596 }, { "epoch": 2.95, "grad_norm": 0.7222067713737488, "learning_rate": 4.509805876589223e-07, "loss": 1.7391, "step": 88597 }, { "epoch": 2.95, "grad_norm": 0.7170166373252869, "learning_rate": 4.504078754824725e-07, "loss": 1.6926, "step": 88598 }, { "epoch": 2.95, "grad_norm": 0.7054387331008911, "learning_rate": 4.4983552691509885e-07, "loss": 1.6743, "step": 88599 }, { "epoch": 2.95, "grad_norm": 0.6964465975761414, "learning_rate": 4.492635419575008e-07, "loss": 1.6854, "step": 88600 }, { "epoch": 2.95, "grad_norm": 0.6996426582336426, "learning_rate": 4.486919206103445e-07, "loss": 1.6984, "step": 88601 }, { "epoch": 2.95, "grad_norm": 0.7034519910812378, "learning_rate": 4.4812066287432947e-07, "loss": 1.6583, "step": 88602 }, { "epoch": 2.95, "grad_norm": 0.7110907435417175, "learning_rate": 4.4754976875018835e-07, "loss": 1.6171, "step": 88603 }, { "epoch": 2.95, "grad_norm": 0.700718879699707, "learning_rate": 4.4697923823855395e-07, "loss": 1.6959, "step": 88604 }, { "epoch": 2.95, "grad_norm": 0.7027817368507385, "learning_rate": 4.464090713401591e-07, "loss": 1.7501, "step": 88605 }, { "epoch": 2.95, "grad_norm": 0.6911084651947021, "learning_rate": 4.4583926805570323e-07, "loss": 1.608, "step": 88606 }, { "epoch": 2.95, "grad_norm": 0.7261682748794556, "learning_rate": 4.4526982838585244e-07, "loss": 1.6658, "step": 88607 }, { "epoch": 2.95, "grad_norm": 0.7230227589607239, "learning_rate": 4.447007523313062e-07, "loss": 1.7188, "step": 88608 }, { "epoch": 2.95, "grad_norm": 0.7221447825431824, "learning_rate": 4.441320398927639e-07, "loss": 1.6966, "step": 88609 }, { "epoch": 2.95, "grad_norm": 0.7268012762069702, "learning_rate": 4.435636910708917e-07, "loss": 1.6425, "step": 88610 }, { "epoch": 2.95, "grad_norm": 0.704113781452179, "learning_rate": 4.429957058663891e-07, "loss": 1.6602, "step": 88611 }, { "epoch": 2.95, "grad_norm": 0.7187651991844177, "learning_rate": 4.4242808427998876e-07, "loss": 1.6909, "step": 88612 }, { "epoch": 2.95, "grad_norm": 0.7314991354942322, "learning_rate": 4.4186082631232354e-07, "loss": 1.7219, "step": 88613 }, { "epoch": 2.95, "grad_norm": 0.7322597503662109, "learning_rate": 4.412939319640929e-07, "loss": 1.7514, "step": 88614 }, { "epoch": 2.95, "grad_norm": 0.7348767518997192, "learning_rate": 4.4072740123599624e-07, "loss": 1.6382, "step": 88615 }, { "epoch": 2.95, "grad_norm": 0.7266146540641785, "learning_rate": 4.4016123412869974e-07, "loss": 1.6402, "step": 88616 }, { "epoch": 2.95, "grad_norm": 0.7454017400741577, "learning_rate": 4.395954306429361e-07, "loss": 1.6687, "step": 88617 }, { "epoch": 2.95, "grad_norm": 0.7270025014877319, "learning_rate": 4.3902999077937153e-07, "loss": 1.7029, "step": 88618 }, { "epoch": 2.95, "grad_norm": 0.6804966926574707, "learning_rate": 4.3846491453867204e-07, "loss": 1.628, "step": 88619 }, { "epoch": 2.95, "grad_norm": 0.7002461552619934, "learning_rate": 4.379002019215372e-07, "loss": 1.6675, "step": 88620 }, { "epoch": 2.95, "grad_norm": 0.7247559428215027, "learning_rate": 4.3733585292866634e-07, "loss": 1.6478, "step": 88621 }, { "epoch": 2.95, "grad_norm": 0.7182140350341797, "learning_rate": 4.367718675607257e-07, "loss": 1.7084, "step": 88622 }, { "epoch": 2.95, "grad_norm": 0.7171761393547058, "learning_rate": 4.362082458183813e-07, "loss": 1.6627, "step": 88623 }, { "epoch": 2.95, "grad_norm": 0.71321040391922, "learning_rate": 4.35644987702366e-07, "loss": 1.6516, "step": 88624 }, { "epoch": 2.95, "grad_norm": 0.7247194051742554, "learning_rate": 4.3508209321334587e-07, "loss": 1.6715, "step": 88625 }, { "epoch": 2.95, "grad_norm": 0.7117575407028198, "learning_rate": 4.3451956235198703e-07, "loss": 1.699, "step": 88626 }, { "epoch": 2.95, "grad_norm": 0.7149596214294434, "learning_rate": 4.3395739511895564e-07, "loss": 1.7409, "step": 88627 }, { "epoch": 2.95, "grad_norm": 0.7274103164672852, "learning_rate": 4.333955915149845e-07, "loss": 1.6399, "step": 88628 }, { "epoch": 2.95, "grad_norm": 0.6891951560974121, "learning_rate": 4.3283415154073965e-07, "loss": 1.5965, "step": 88629 }, { "epoch": 2.95, "grad_norm": 0.7165884375572205, "learning_rate": 4.322730751968873e-07, "loss": 1.6778, "step": 88630 }, { "epoch": 2.95, "grad_norm": 0.7075752019882202, "learning_rate": 4.3171236248412675e-07, "loss": 1.6844, "step": 88631 }, { "epoch": 2.95, "grad_norm": 0.7214900255203247, "learning_rate": 4.3115201340312436e-07, "loss": 1.6036, "step": 88632 }, { "epoch": 2.95, "grad_norm": 0.711114764213562, "learning_rate": 4.305920279545461e-07, "loss": 1.6886, "step": 88633 }, { "epoch": 2.95, "grad_norm": 0.7332329750061035, "learning_rate": 4.300324061391247e-07, "loss": 1.68, "step": 88634 }, { "epoch": 2.95, "grad_norm": 0.6885697245597839, "learning_rate": 4.294731479574598e-07, "loss": 1.6275, "step": 88635 }, { "epoch": 2.95, "grad_norm": 0.724332332611084, "learning_rate": 4.289142534103174e-07, "loss": 1.6846, "step": 88636 }, { "epoch": 2.95, "grad_norm": 0.6868316531181335, "learning_rate": 4.28355722498297e-07, "loss": 1.6764, "step": 88637 }, { "epoch": 2.95, "grad_norm": 0.6870315074920654, "learning_rate": 4.2779755522213135e-07, "loss": 1.7325, "step": 88638 }, { "epoch": 2.95, "grad_norm": 0.7273620963096619, "learning_rate": 4.2723975158248656e-07, "loss": 1.6659, "step": 88639 }, { "epoch": 2.95, "grad_norm": 0.7253521680831909, "learning_rate": 4.2668231157999557e-07, "loss": 1.681, "step": 88640 }, { "epoch": 2.95, "grad_norm": 0.728890597820282, "learning_rate": 4.261252352154243e-07, "loss": 1.7228, "step": 88641 }, { "epoch": 2.95, "grad_norm": 0.7103411555290222, "learning_rate": 4.2556852248937234e-07, "loss": 1.6759, "step": 88642 }, { "epoch": 2.95, "grad_norm": 0.7405029535293579, "learning_rate": 4.2501217340253913e-07, "loss": 1.7166, "step": 88643 }, { "epoch": 2.95, "grad_norm": 0.7266761660575867, "learning_rate": 4.2445618795559077e-07, "loss": 1.7053, "step": 88644 }, { "epoch": 2.95, "grad_norm": 0.722917914390564, "learning_rate": 4.2390056614922674e-07, "loss": 1.7453, "step": 88645 }, { "epoch": 2.95, "grad_norm": 0.714960515499115, "learning_rate": 4.2334530798411317e-07, "loss": 1.7023, "step": 88646 }, { "epoch": 2.95, "grad_norm": 0.7211999297142029, "learning_rate": 4.227904134608828e-07, "loss": 1.638, "step": 88647 }, { "epoch": 2.95, "grad_norm": 0.7182736396789551, "learning_rate": 4.2223588258030184e-07, "loss": 1.6226, "step": 88648 }, { "epoch": 2.95, "grad_norm": 0.7324472069740295, "learning_rate": 4.2168171534293635e-07, "loss": 1.717, "step": 88649 }, { "epoch": 2.95, "grad_norm": 0.7213246822357178, "learning_rate": 4.211279117495192e-07, "loss": 1.6456, "step": 88650 }, { "epoch": 2.95, "grad_norm": 0.9055055975914001, "learning_rate": 4.205744718007165e-07, "loss": 1.7272, "step": 88651 }, { "epoch": 2.95, "grad_norm": 0.7648158669471741, "learning_rate": 4.2002139549719426e-07, "loss": 1.7207, "step": 88652 }, { "epoch": 2.95, "grad_norm": 0.7238383889198303, "learning_rate": 4.194686828396521e-07, "loss": 1.7043, "step": 88653 }, { "epoch": 2.95, "grad_norm": 0.7187127470970154, "learning_rate": 4.1891633382868937e-07, "loss": 1.6671, "step": 88654 }, { "epoch": 2.95, "grad_norm": 0.7005726099014282, "learning_rate": 4.1836434846503897e-07, "loss": 1.7613, "step": 88655 }, { "epoch": 2.95, "grad_norm": 0.6965086460113525, "learning_rate": 4.178127267493669e-07, "loss": 1.679, "step": 88656 }, { "epoch": 2.95, "grad_norm": 0.7083746194839478, "learning_rate": 4.1726146868230616e-07, "loss": 1.6312, "step": 88657 }, { "epoch": 2.95, "grad_norm": 0.6954235434532166, "learning_rate": 4.16710574264556e-07, "loss": 1.6421, "step": 88658 }, { "epoch": 2.95, "grad_norm": 0.7022032141685486, "learning_rate": 4.161600434967827e-07, "loss": 1.6498, "step": 88659 }, { "epoch": 2.95, "grad_norm": 0.7221933603286743, "learning_rate": 4.1560987637965226e-07, "loss": 1.6313, "step": 88660 }, { "epoch": 2.95, "grad_norm": 0.7233765721321106, "learning_rate": 4.1506007291383093e-07, "loss": 1.6552, "step": 88661 }, { "epoch": 2.95, "grad_norm": 0.7202804088592529, "learning_rate": 4.1451063309998477e-07, "loss": 1.7178, "step": 88662 }, { "epoch": 2.95, "grad_norm": 0.71084064245224, "learning_rate": 4.1396155693877997e-07, "loss": 1.6112, "step": 88663 }, { "epoch": 2.95, "grad_norm": 0.7019546627998352, "learning_rate": 4.1341284443088263e-07, "loss": 1.6572, "step": 88664 }, { "epoch": 2.95, "grad_norm": 0.7015858292579651, "learning_rate": 4.128644955769256e-07, "loss": 1.6668, "step": 88665 }, { "epoch": 2.95, "grad_norm": 0.7262701988220215, "learning_rate": 4.1231651037767486e-07, "loss": 1.6436, "step": 88666 }, { "epoch": 2.95, "grad_norm": 2.140110492706299, "learning_rate": 4.1176888883369676e-07, "loss": 1.7222, "step": 88667 }, { "epoch": 2.95, "grad_norm": 0.7059779167175293, "learning_rate": 4.112216309456573e-07, "loss": 1.6993, "step": 88668 }, { "epoch": 2.95, "grad_norm": 0.7284367680549622, "learning_rate": 4.106747367142893e-07, "loss": 1.7647, "step": 88669 }, { "epoch": 2.95, "grad_norm": 0.7112372517585754, "learning_rate": 4.1012820614022555e-07, "loss": 1.7285, "step": 88670 }, { "epoch": 2.95, "grad_norm": 0.7119159698486328, "learning_rate": 4.0958203922409895e-07, "loss": 1.6776, "step": 88671 }, { "epoch": 2.95, "grad_norm": 0.7347295880317688, "learning_rate": 4.090362359666088e-07, "loss": 1.7301, "step": 88672 }, { "epoch": 2.95, "grad_norm": 0.7156150937080383, "learning_rate": 4.0849079636842143e-07, "loss": 1.6545, "step": 88673 }, { "epoch": 2.95, "grad_norm": 0.7012208104133606, "learning_rate": 4.079457204301695e-07, "loss": 1.6965, "step": 88674 }, { "epoch": 2.95, "grad_norm": 0.7073178887367249, "learning_rate": 4.074010081525192e-07, "loss": 1.7411, "step": 88675 }, { "epoch": 2.95, "grad_norm": 0.7202371954917908, "learning_rate": 4.068566595361367e-07, "loss": 1.6929, "step": 88676 }, { "epoch": 2.95, "grad_norm": 0.7120630145072937, "learning_rate": 4.063126745817213e-07, "loss": 1.6311, "step": 88677 }, { "epoch": 2.95, "grad_norm": 0.7079012989997864, "learning_rate": 4.0576905328987275e-07, "loss": 1.7212, "step": 88678 }, { "epoch": 2.95, "grad_norm": 0.7542592287063599, "learning_rate": 4.052257956612903e-07, "loss": 1.7217, "step": 88679 }, { "epoch": 2.95, "grad_norm": 0.7042189836502075, "learning_rate": 4.0468290169660687e-07, "loss": 1.6942, "step": 88680 }, { "epoch": 2.95, "grad_norm": 0.7322903275489807, "learning_rate": 4.041403713964886e-07, "loss": 1.6631, "step": 88681 }, { "epoch": 2.95, "grad_norm": 0.7066929340362549, "learning_rate": 4.0359820476163486e-07, "loss": 1.6501, "step": 88682 }, { "epoch": 2.95, "grad_norm": 0.7212472558021545, "learning_rate": 4.030564017926452e-07, "loss": 1.7392, "step": 88683 }, { "epoch": 2.95, "grad_norm": 0.6936079859733582, "learning_rate": 4.0251496249018577e-07, "loss": 1.6813, "step": 88684 }, { "epoch": 2.95, "grad_norm": 0.7048816084861755, "learning_rate": 4.01973886854956e-07, "loss": 1.686, "step": 88685 }, { "epoch": 2.95, "grad_norm": 0.7275445461273193, "learning_rate": 4.0143317488758877e-07, "loss": 1.6298, "step": 88686 }, { "epoch": 2.95, "grad_norm": 0.7358987927436829, "learning_rate": 4.008928265887168e-07, "loss": 1.5955, "step": 88687 }, { "epoch": 2.95, "grad_norm": 0.6929184198379517, "learning_rate": 4.003528419590396e-07, "loss": 1.6538, "step": 88688 }, { "epoch": 2.95, "grad_norm": 0.7169516086578369, "learning_rate": 3.998132209991567e-07, "loss": 1.6661, "step": 88689 }, { "epoch": 2.95, "grad_norm": 0.7297662496566772, "learning_rate": 3.9927396370980083e-07, "loss": 1.7595, "step": 88690 }, { "epoch": 2.95, "grad_norm": 0.6942123770713806, "learning_rate": 3.9873507009153815e-07, "loss": 1.6088, "step": 88691 }, { "epoch": 2.95, "grad_norm": 0.7085337042808533, "learning_rate": 3.981965401451015e-07, "loss": 1.6584, "step": 88692 }, { "epoch": 2.95, "grad_norm": 0.7174707055091858, "learning_rate": 3.9765837387109033e-07, "loss": 1.7003, "step": 88693 }, { "epoch": 2.95, "grad_norm": 0.7058349251747131, "learning_rate": 3.9712057127017085e-07, "loss": 1.6394, "step": 88694 }, { "epoch": 2.95, "grad_norm": 0.7182595729827881, "learning_rate": 3.965831323430424e-07, "loss": 1.7, "step": 88695 }, { "epoch": 2.95, "grad_norm": 0.7101952433586121, "learning_rate": 3.9604605709027126e-07, "loss": 1.7249, "step": 88696 }, { "epoch": 2.95, "grad_norm": 0.7187913060188293, "learning_rate": 3.9550934551259017e-07, "loss": 1.6675, "step": 88697 }, { "epoch": 2.95, "grad_norm": 0.7075382471084595, "learning_rate": 3.9497299761059864e-07, "loss": 1.7029, "step": 88698 }, { "epoch": 2.95, "grad_norm": 0.7113085389137268, "learning_rate": 3.944370133849628e-07, "loss": 1.7451, "step": 88699 }, { "epoch": 2.95, "grad_norm": 0.7235764861106873, "learning_rate": 3.939013928363488e-07, "loss": 1.6769, "step": 88700 }, { "epoch": 2.95, "grad_norm": 0.7064234018325806, "learning_rate": 3.933661359653895e-07, "loss": 1.6628, "step": 88701 }, { "epoch": 2.95, "grad_norm": 0.6930119395256042, "learning_rate": 3.928312427727176e-07, "loss": 1.6599, "step": 88702 }, { "epoch": 2.95, "grad_norm": 0.7275763154029846, "learning_rate": 3.9229671325903265e-07, "loss": 1.7044, "step": 88703 }, { "epoch": 2.95, "grad_norm": 0.7067446112632751, "learning_rate": 3.917625474249342e-07, "loss": 1.7202, "step": 88704 }, { "epoch": 2.95, "grad_norm": 0.710341215133667, "learning_rate": 3.912287452710883e-07, "loss": 1.6575, "step": 88705 }, { "epoch": 2.95, "grad_norm": 0.6976802945137024, "learning_rate": 3.906953067981611e-07, "loss": 1.6373, "step": 88706 }, { "epoch": 2.95, "grad_norm": 0.7248265147209167, "learning_rate": 3.901622320067854e-07, "loss": 1.6681, "step": 88707 }, { "epoch": 2.95, "grad_norm": 0.7145610451698303, "learning_rate": 3.896295208975941e-07, "loss": 1.7148, "step": 88708 }, { "epoch": 2.95, "grad_norm": 0.6892235279083252, "learning_rate": 3.8909717347128667e-07, "loss": 1.6764, "step": 88709 }, { "epoch": 2.95, "grad_norm": 0.6993215680122375, "learning_rate": 3.885651897284292e-07, "loss": 1.6798, "step": 88710 }, { "epoch": 2.95, "grad_norm": 0.7092376351356506, "learning_rate": 3.880335696696879e-07, "loss": 1.7479, "step": 88711 }, { "epoch": 2.95, "grad_norm": 0.7158111333847046, "learning_rate": 3.875023132957622e-07, "loss": 1.6443, "step": 88712 }, { "epoch": 2.95, "grad_norm": 0.7022130489349365, "learning_rate": 3.869714206072849e-07, "loss": 1.6275, "step": 88713 }, { "epoch": 2.95, "grad_norm": 0.7008366584777832, "learning_rate": 3.864408916048223e-07, "loss": 1.6392, "step": 88714 }, { "epoch": 2.95, "grad_norm": 0.7025336623191833, "learning_rate": 3.8591072628910704e-07, "loss": 1.6478, "step": 88715 }, { "epoch": 2.95, "grad_norm": 0.7055830955505371, "learning_rate": 3.853809246607387e-07, "loss": 1.6137, "step": 88716 }, { "epoch": 2.95, "grad_norm": 0.6918035745620728, "learning_rate": 3.8485148672038336e-07, "loss": 1.6988, "step": 88717 }, { "epoch": 2.95, "grad_norm": 0.6978312134742737, "learning_rate": 3.843224124686739e-07, "loss": 1.7467, "step": 88718 }, { "epoch": 2.95, "grad_norm": 0.7305780649185181, "learning_rate": 3.8379370190624315e-07, "loss": 1.6748, "step": 88719 }, { "epoch": 2.95, "grad_norm": 0.7216647267341614, "learning_rate": 3.8326535503375723e-07, "loss": 1.6677, "step": 88720 }, { "epoch": 2.95, "grad_norm": 0.7190180420875549, "learning_rate": 3.8273737185181566e-07, "loss": 1.6651, "step": 88721 }, { "epoch": 2.95, "grad_norm": 0.7010040879249573, "learning_rate": 3.822097523610845e-07, "loss": 1.6758, "step": 88722 }, { "epoch": 2.95, "grad_norm": 0.7210472822189331, "learning_rate": 3.8168249656223005e-07, "loss": 1.6419, "step": 88723 }, { "epoch": 2.95, "grad_norm": 0.7072400450706482, "learning_rate": 3.811556044558517e-07, "loss": 1.6854, "step": 88724 }, { "epoch": 2.95, "grad_norm": 0.7044793963432312, "learning_rate": 3.8062907604258233e-07, "loss": 1.7382, "step": 88725 }, { "epoch": 2.95, "grad_norm": 0.7122548818588257, "learning_rate": 3.801029113231213e-07, "loss": 1.7149, "step": 88726 }, { "epoch": 2.95, "grad_norm": 0.7046758532524109, "learning_rate": 3.7957711029803496e-07, "loss": 1.6765, "step": 88727 }, { "epoch": 2.95, "grad_norm": 0.7100145816802979, "learning_rate": 3.7905167296798933e-07, "loss": 1.656, "step": 88728 }, { "epoch": 2.95, "grad_norm": 0.7021245360374451, "learning_rate": 3.785265993336506e-07, "loss": 1.6323, "step": 88729 }, { "epoch": 2.95, "grad_norm": 0.712566077709198, "learning_rate": 3.7800188939565157e-07, "loss": 1.6856, "step": 88730 }, { "epoch": 2.95, "grad_norm": 0.6875772476196289, "learning_rate": 3.7747754315459177e-07, "loss": 1.674, "step": 88731 }, { "epoch": 2.95, "grad_norm": 0.6902663111686707, "learning_rate": 3.7695356061110404e-07, "loss": 1.5977, "step": 88732 }, { "epoch": 2.95, "grad_norm": 0.7169798612594604, "learning_rate": 3.764299417658878e-07, "loss": 1.6681, "step": 88733 }, { "epoch": 2.95, "grad_norm": 0.6995959281921387, "learning_rate": 3.759066866195093e-07, "loss": 1.6743, "step": 88734 }, { "epoch": 2.95, "grad_norm": 0.748909056186676, "learning_rate": 3.753837951726346e-07, "loss": 1.6789, "step": 88735 }, { "epoch": 2.95, "grad_norm": 0.7084731459617615, "learning_rate": 3.7486126742589664e-07, "loss": 1.6763, "step": 88736 }, { "epoch": 2.95, "grad_norm": 0.7052492499351501, "learning_rate": 3.7433910337996143e-07, "loss": 1.6656, "step": 88737 }, { "epoch": 2.95, "grad_norm": 0.7119078040122986, "learning_rate": 3.7381730303539524e-07, "loss": 1.6295, "step": 88738 }, { "epoch": 2.95, "grad_norm": 0.6972931027412415, "learning_rate": 3.7329586639286423e-07, "loss": 1.6372, "step": 88739 }, { "epoch": 2.95, "grad_norm": 0.7176127433776855, "learning_rate": 3.727747934530345e-07, "loss": 1.6668, "step": 88740 }, { "epoch": 2.95, "grad_norm": 0.7216664552688599, "learning_rate": 3.722540842165056e-07, "loss": 1.7182, "step": 88741 }, { "epoch": 2.95, "grad_norm": 0.7471210956573486, "learning_rate": 3.71733738683877e-07, "loss": 1.6953, "step": 88742 }, { "epoch": 2.95, "grad_norm": 0.7245919108390808, "learning_rate": 3.712137568558482e-07, "loss": 1.6868, "step": 88743 }, { "epoch": 2.95, "grad_norm": 0.7280116081237793, "learning_rate": 3.706941387329854e-07, "loss": 1.6653, "step": 88744 }, { "epoch": 2.95, "grad_norm": 0.7342683672904968, "learning_rate": 3.7017488431598794e-07, "loss": 1.6516, "step": 88745 }, { "epoch": 2.95, "grad_norm": 0.7199809551239014, "learning_rate": 3.696559936054222e-07, "loss": 1.6651, "step": 88746 }, { "epoch": 2.95, "grad_norm": 0.711434006690979, "learning_rate": 3.691374666019542e-07, "loss": 1.6074, "step": 88747 }, { "epoch": 2.95, "grad_norm": 0.7304748892784119, "learning_rate": 3.686193033062168e-07, "loss": 1.6837, "step": 88748 }, { "epoch": 2.95, "grad_norm": 0.7325760126113892, "learning_rate": 3.6810150371880954e-07, "loss": 1.6823, "step": 88749 }, { "epoch": 2.95, "grad_norm": 0.724045991897583, "learning_rate": 3.675840678403985e-07, "loss": 1.7669, "step": 88750 }, { "epoch": 2.95, "grad_norm": 0.7375677227973938, "learning_rate": 3.670669956715833e-07, "loss": 1.6385, "step": 88751 }, { "epoch": 2.95, "grad_norm": 0.7099628448486328, "learning_rate": 3.6655028721299664e-07, "loss": 1.6899, "step": 88752 }, { "epoch": 2.95, "grad_norm": 0.7208730578422546, "learning_rate": 3.660339424652714e-07, "loss": 1.611, "step": 88753 }, { "epoch": 2.95, "grad_norm": 0.7161159515380859, "learning_rate": 3.655179614290071e-07, "loss": 1.6613, "step": 88754 }, { "epoch": 2.95, "grad_norm": 0.7273638844490051, "learning_rate": 3.6500234410490326e-07, "loss": 1.7401, "step": 88755 }, { "epoch": 2.95, "grad_norm": 0.715166449546814, "learning_rate": 3.6448709049349265e-07, "loss": 1.6627, "step": 88756 }, { "epoch": 2.95, "grad_norm": 0.725395143032074, "learning_rate": 3.6397220059547484e-07, "loss": 1.7004, "step": 88757 }, { "epoch": 2.95, "grad_norm": 0.7204145789146423, "learning_rate": 3.6345767441141595e-07, "loss": 1.6549, "step": 88758 }, { "epoch": 2.95, "grad_norm": 0.6972922086715698, "learning_rate": 3.6294351194201543e-07, "loss": 1.6301, "step": 88759 }, { "epoch": 2.95, "grad_norm": 0.7011711001396179, "learning_rate": 3.624297131878062e-07, "loss": 1.6915, "step": 88760 }, { "epoch": 2.95, "grad_norm": 0.7281026244163513, "learning_rate": 3.6191627814948776e-07, "loss": 1.6733, "step": 88761 }, { "epoch": 2.95, "grad_norm": 0.7132049202919006, "learning_rate": 3.6140320682762623e-07, "loss": 1.6674, "step": 88762 }, { "epoch": 2.95, "grad_norm": 0.7180752158164978, "learning_rate": 3.6089049922288783e-07, "loss": 1.7394, "step": 88763 }, { "epoch": 2.95, "grad_norm": 0.7308452725410461, "learning_rate": 3.60378155335872e-07, "loss": 1.6252, "step": 88764 }, { "epoch": 2.95, "grad_norm": 0.7337245345115662, "learning_rate": 3.598661751672449e-07, "loss": 1.6252, "step": 88765 }, { "epoch": 2.95, "grad_norm": 0.731916069984436, "learning_rate": 3.5935455871753947e-07, "loss": 1.6654, "step": 88766 }, { "epoch": 2.95, "grad_norm": 0.7164723873138428, "learning_rate": 3.5884330598745516e-07, "loss": 1.7405, "step": 88767 }, { "epoch": 2.95, "grad_norm": 0.7050173878669739, "learning_rate": 3.583324169775581e-07, "loss": 1.5974, "step": 88768 }, { "epoch": 2.95, "grad_norm": 0.6920865774154663, "learning_rate": 3.578218916885145e-07, "loss": 1.6404, "step": 88769 }, { "epoch": 2.95, "grad_norm": 0.7050734758377075, "learning_rate": 3.5731173012095714e-07, "loss": 1.6923, "step": 88770 }, { "epoch": 2.95, "grad_norm": 0.7159554362297058, "learning_rate": 3.56801932275419e-07, "loss": 1.671, "step": 88771 }, { "epoch": 2.95, "grad_norm": 0.7474454641342163, "learning_rate": 3.562924981525994e-07, "loss": 1.7106, "step": 88772 }, { "epoch": 2.95, "grad_norm": 0.7274790406227112, "learning_rate": 3.5578342775309796e-07, "loss": 1.754, "step": 88773 }, { "epoch": 2.95, "grad_norm": 0.7052665948867798, "learning_rate": 3.552747210775142e-07, "loss": 1.6142, "step": 88774 }, { "epoch": 2.95, "grad_norm": 0.7250223755836487, "learning_rate": 3.547663781264476e-07, "loss": 1.7342, "step": 88775 }, { "epoch": 2.95, "grad_norm": 0.7293108701705933, "learning_rate": 3.5425839890059757e-07, "loss": 1.6304, "step": 88776 }, { "epoch": 2.95, "grad_norm": 0.6985323429107666, "learning_rate": 3.537507834004971e-07, "loss": 1.6084, "step": 88777 }, { "epoch": 2.95, "grad_norm": 0.7238544225692749, "learning_rate": 3.5324353162677897e-07, "loss": 1.719, "step": 88778 }, { "epoch": 2.95, "grad_norm": 0.7071573734283447, "learning_rate": 3.5273664358010933e-07, "loss": 1.6691, "step": 88779 }, { "epoch": 2.95, "grad_norm": 0.7107959389686584, "learning_rate": 3.5223011926105437e-07, "loss": 1.6912, "step": 88780 }, { "epoch": 2.95, "grad_norm": 0.7402689456939697, "learning_rate": 3.5172395867021364e-07, "loss": 1.7143, "step": 88781 }, { "epoch": 2.95, "grad_norm": 0.7103473544120789, "learning_rate": 3.5121816180825323e-07, "loss": 1.6839, "step": 88782 }, { "epoch": 2.95, "grad_norm": 0.7232757210731506, "learning_rate": 3.507127286757394e-07, "loss": 1.689, "step": 88783 }, { "epoch": 2.95, "grad_norm": 0.7254912257194519, "learning_rate": 3.5020765927333826e-07, "loss": 1.6263, "step": 88784 }, { "epoch": 2.95, "grad_norm": 0.7024499177932739, "learning_rate": 3.4970295360161604e-07, "loss": 1.6365, "step": 88785 }, { "epoch": 2.95, "grad_norm": 0.7284456491470337, "learning_rate": 3.491986116612389e-07, "loss": 1.7056, "step": 88786 }, { "epoch": 2.95, "grad_norm": 0.7305552363395691, "learning_rate": 3.4869463345273965e-07, "loss": 1.6901, "step": 88787 }, { "epoch": 2.95, "grad_norm": 0.7207750082015991, "learning_rate": 3.4819101897678446e-07, "loss": 1.6511, "step": 88788 }, { "epoch": 2.95, "grad_norm": 0.7303215265274048, "learning_rate": 3.476877682339729e-07, "loss": 1.6534, "step": 88789 }, { "epoch": 2.95, "grad_norm": 0.7180381417274475, "learning_rate": 3.471848812249378e-07, "loss": 1.7309, "step": 88790 }, { "epoch": 2.95, "grad_norm": 0.7232392430305481, "learning_rate": 3.4668235795024535e-07, "loss": 1.6704, "step": 88791 }, { "epoch": 2.95, "grad_norm": 0.7455477714538574, "learning_rate": 3.4618019841052836e-07, "loss": 1.5996, "step": 88792 }, { "epoch": 2.95, "grad_norm": 0.7334597706794739, "learning_rate": 3.4567840260641967e-07, "loss": 1.6272, "step": 88793 }, { "epoch": 2.95, "grad_norm": 0.7134358286857605, "learning_rate": 3.4517697053848546e-07, "loss": 1.7164, "step": 88794 }, { "epoch": 2.95, "grad_norm": 0.7134519815444946, "learning_rate": 3.4467590220735864e-07, "loss": 1.709, "step": 88795 }, { "epoch": 2.95, "grad_norm": 0.7276396155357361, "learning_rate": 3.441751976136387e-07, "loss": 1.7121, "step": 88796 }, { "epoch": 2.95, "grad_norm": 0.7145563364028931, "learning_rate": 3.436748567579251e-07, "loss": 1.6778, "step": 88797 }, { "epoch": 2.95, "grad_norm": 0.7076607346534729, "learning_rate": 3.4317487964088396e-07, "loss": 1.6665, "step": 88798 }, { "epoch": 2.95, "grad_norm": 0.7343770861625671, "learning_rate": 3.4267526626304833e-07, "loss": 1.7486, "step": 88799 }, { "epoch": 2.95, "grad_norm": 0.7184363007545471, "learning_rate": 3.421760166250509e-07, "loss": 1.6488, "step": 88800 }, { "epoch": 2.95, "grad_norm": 0.7161813378334045, "learning_rate": 3.4167713072749123e-07, "loss": 1.6999, "step": 88801 }, { "epoch": 2.95, "grad_norm": 0.696876585483551, "learning_rate": 3.411786085709689e-07, "loss": 1.66, "step": 88802 }, { "epoch": 2.95, "grad_norm": 0.7026320695877075, "learning_rate": 3.4068045015615e-07, "loss": 1.6638, "step": 88803 }, { "epoch": 2.95, "grad_norm": 0.7474796772003174, "learning_rate": 3.4018265548356737e-07, "loss": 1.6698, "step": 88804 }, { "epoch": 2.95, "grad_norm": 0.7195459008216858, "learning_rate": 3.3968522455385397e-07, "loss": 1.6757, "step": 88805 }, { "epoch": 2.95, "grad_norm": 0.7002943754196167, "learning_rate": 3.3918815736760917e-07, "loss": 1.613, "step": 88806 }, { "epoch": 2.95, "grad_norm": 0.7286636829376221, "learning_rate": 3.386914539254326e-07, "loss": 1.6496, "step": 88807 }, { "epoch": 2.95, "grad_norm": 0.7248395085334778, "learning_rate": 3.3819511422792376e-07, "loss": 1.6671, "step": 88808 }, { "epoch": 2.95, "grad_norm": 0.7198775410652161, "learning_rate": 3.376991382757155e-07, "loss": 1.6777, "step": 88809 }, { "epoch": 2.95, "grad_norm": 0.6947154402732849, "learning_rate": 3.3720352606937396e-07, "loss": 1.6651, "step": 88810 }, { "epoch": 2.95, "grad_norm": 0.698535144329071, "learning_rate": 3.367082776094987e-07, "loss": 1.6293, "step": 88811 }, { "epoch": 2.95, "grad_norm": 0.7061992883682251, "learning_rate": 3.3621339289672253e-07, "loss": 1.5684, "step": 88812 }, { "epoch": 2.95, "grad_norm": 0.7063260674476624, "learning_rate": 3.3571887193164505e-07, "loss": 1.716, "step": 88813 }, { "epoch": 2.95, "grad_norm": 0.6883248090744019, "learning_rate": 3.3522471471483234e-07, "loss": 1.6401, "step": 88814 }, { "epoch": 2.95, "grad_norm": 0.7573577165603638, "learning_rate": 3.3473092124688404e-07, "loss": 1.6747, "step": 88815 }, { "epoch": 2.95, "grad_norm": 0.7073036432266235, "learning_rate": 3.3423749152846625e-07, "loss": 1.7006, "step": 88816 }, { "epoch": 2.95, "grad_norm": 0.7041804790496826, "learning_rate": 3.3374442556007855e-07, "loss": 1.6333, "step": 88817 }, { "epoch": 2.95, "grad_norm": 0.6983028650283813, "learning_rate": 3.332517233424203e-07, "loss": 1.6584, "step": 88818 }, { "epoch": 2.96, "grad_norm": 0.7253610491752625, "learning_rate": 3.327593848760246e-07, "loss": 1.7037, "step": 88819 }, { "epoch": 2.96, "grad_norm": 0.7057068943977356, "learning_rate": 3.3226741016149083e-07, "loss": 1.6868, "step": 88820 }, { "epoch": 2.96, "grad_norm": 0.7229793071746826, "learning_rate": 3.3177579919941853e-07, "loss": 1.6972, "step": 88821 }, { "epoch": 2.96, "grad_norm": 0.7013037204742432, "learning_rate": 3.312845519904406e-07, "loss": 1.6772, "step": 88822 }, { "epoch": 2.96, "grad_norm": 0.7308284640312195, "learning_rate": 3.3079366853515645e-07, "loss": 1.6831, "step": 88823 }, { "epoch": 2.96, "grad_norm": 0.7228513956069946, "learning_rate": 3.3030314883409904e-07, "loss": 1.6757, "step": 88824 }, { "epoch": 2.96, "grad_norm": 0.7176613807678223, "learning_rate": 3.298129928879012e-07, "loss": 1.7263, "step": 88825 }, { "epoch": 2.96, "grad_norm": 0.6982353329658508, "learning_rate": 3.2932320069716243e-07, "loss": 1.6272, "step": 88826 }, { "epoch": 2.96, "grad_norm": 0.725082516670227, "learning_rate": 3.28833772262449e-07, "loss": 1.6981, "step": 88827 }, { "epoch": 2.96, "grad_norm": 0.7093110680580139, "learning_rate": 3.2834470758439367e-07, "loss": 1.6782, "step": 88828 }, { "epoch": 2.96, "grad_norm": 0.701694667339325, "learning_rate": 3.2785600666359604e-07, "loss": 1.6138, "step": 88829 }, { "epoch": 2.96, "grad_norm": 0.73260498046875, "learning_rate": 3.273676695005889e-07, "loss": 1.7399, "step": 88830 }, { "epoch": 2.96, "grad_norm": 0.7121375799179077, "learning_rate": 3.268796960960052e-07, "loss": 1.6522, "step": 88831 }, { "epoch": 2.96, "grad_norm": 0.7623316049575806, "learning_rate": 3.2639208645044434e-07, "loss": 1.6892, "step": 88832 }, { "epoch": 2.96, "grad_norm": 0.7048251032829285, "learning_rate": 3.2590484056450596e-07, "loss": 1.7327, "step": 88833 }, { "epoch": 2.96, "grad_norm": 0.7292963862419128, "learning_rate": 3.254179584387562e-07, "loss": 1.7766, "step": 88834 }, { "epoch": 2.96, "grad_norm": 0.709656298160553, "learning_rate": 3.249314400737946e-07, "loss": 1.6172, "step": 88835 }, { "epoch": 2.96, "grad_norm": 0.7230091094970703, "learning_rate": 3.244452854701873e-07, "loss": 1.7281, "step": 88836 }, { "epoch": 2.96, "grad_norm": 0.7001585960388184, "learning_rate": 3.239594946286006e-07, "loss": 1.6631, "step": 88837 }, { "epoch": 2.96, "grad_norm": 0.7073041796684265, "learning_rate": 3.2347406754953397e-07, "loss": 1.6087, "step": 88838 }, { "epoch": 2.96, "grad_norm": 0.725760817527771, "learning_rate": 3.2298900423358696e-07, "loss": 1.6857, "step": 88839 }, { "epoch": 2.96, "grad_norm": 0.7080152034759521, "learning_rate": 3.2250430468142576e-07, "loss": 1.6217, "step": 88840 }, { "epoch": 2.96, "grad_norm": 0.7040406465530396, "learning_rate": 3.220199688935832e-07, "loss": 1.6688, "step": 88841 }, { "epoch": 2.96, "grad_norm": 0.7200474143028259, "learning_rate": 3.215359968706255e-07, "loss": 1.722, "step": 88842 }, { "epoch": 2.96, "grad_norm": 0.7084319591522217, "learning_rate": 3.210523886131855e-07, "loss": 1.6112, "step": 88843 }, { "epoch": 2.96, "grad_norm": 0.7265652418136597, "learning_rate": 3.205691441218294e-07, "loss": 1.7027, "step": 88844 }, { "epoch": 2.96, "grad_norm": 0.7411604523658752, "learning_rate": 3.200862633971568e-07, "loss": 1.7381, "step": 88845 }, { "epoch": 2.96, "grad_norm": 0.6992365121841431, "learning_rate": 3.1960374643973383e-07, "loss": 1.6446, "step": 88846 }, { "epoch": 2.96, "grad_norm": 0.7267987132072449, "learning_rate": 3.191215932501601e-07, "loss": 1.6014, "step": 88847 }, { "epoch": 2.96, "grad_norm": 0.7203852534294128, "learning_rate": 3.1863980382900165e-07, "loss": 1.7012, "step": 88848 }, { "epoch": 2.96, "grad_norm": 0.7201332449913025, "learning_rate": 3.181583781768582e-07, "loss": 1.5917, "step": 88849 }, { "epoch": 2.96, "grad_norm": 0.7009822130203247, "learning_rate": 3.1767731629432915e-07, "loss": 1.6673, "step": 88850 }, { "epoch": 2.96, "grad_norm": 0.7401792407035828, "learning_rate": 3.171966181819807e-07, "loss": 1.7286, "step": 88851 }, { "epoch": 2.96, "grad_norm": 0.7044999599456787, "learning_rate": 3.167162838404125e-07, "loss": 1.6486, "step": 88852 }, { "epoch": 2.96, "grad_norm": 0.7055780291557312, "learning_rate": 3.162363132701573e-07, "loss": 1.6714, "step": 88853 }, { "epoch": 2.96, "grad_norm": 0.7478792667388916, "learning_rate": 3.1575670647188135e-07, "loss": 1.6225, "step": 88854 }, { "epoch": 2.96, "grad_norm": 0.7226471900939941, "learning_rate": 3.152774634460842e-07, "loss": 1.6424, "step": 88855 }, { "epoch": 2.96, "grad_norm": 0.7185769081115723, "learning_rate": 3.147985841933987e-07, "loss": 1.6335, "step": 88856 }, { "epoch": 2.96, "grad_norm": 0.7060601115226746, "learning_rate": 3.14320068714391e-07, "loss": 1.5917, "step": 88857 }, { "epoch": 2.96, "grad_norm": 0.7000666856765747, "learning_rate": 3.1384191700962735e-07, "loss": 1.6904, "step": 88858 }, { "epoch": 2.96, "grad_norm": 0.7111974954605103, "learning_rate": 3.133641290797406e-07, "loss": 1.7009, "step": 88859 }, { "epoch": 2.96, "grad_norm": 0.716644287109375, "learning_rate": 3.128867049252304e-07, "loss": 1.5914, "step": 88860 }, { "epoch": 2.96, "grad_norm": 0.7182100415229797, "learning_rate": 3.1240964454676274e-07, "loss": 1.6101, "step": 88861 }, { "epoch": 2.96, "grad_norm": 0.7412385940551758, "learning_rate": 3.1193294794483736e-07, "loss": 1.6881, "step": 88862 }, { "epoch": 2.96, "grad_norm": 0.7142832279205322, "learning_rate": 3.11456615120087e-07, "loss": 1.6852, "step": 88863 }, { "epoch": 2.96, "grad_norm": 0.6938793063163757, "learning_rate": 3.1098064607304464e-07, "loss": 1.609, "step": 88864 }, { "epoch": 2.96, "grad_norm": 0.7173779010772705, "learning_rate": 3.10505040804343e-07, "loss": 1.6874, "step": 88865 }, { "epoch": 2.96, "grad_norm": 0.7252392172813416, "learning_rate": 3.1002979931451513e-07, "loss": 1.7142, "step": 88866 }, { "epoch": 2.96, "grad_norm": 0.7044895887374878, "learning_rate": 3.095549216041604e-07, "loss": 1.6662, "step": 88867 }, { "epoch": 2.96, "grad_norm": 0.7214853763580322, "learning_rate": 3.090804076738451e-07, "loss": 1.6783, "step": 88868 }, { "epoch": 2.96, "grad_norm": 0.7107964754104614, "learning_rate": 3.086062575241355e-07, "loss": 1.6834, "step": 88869 }, { "epoch": 2.96, "grad_norm": 0.709907054901123, "learning_rate": 3.081324711556643e-07, "loss": 1.6821, "step": 88870 }, { "epoch": 2.96, "grad_norm": 0.7112919092178345, "learning_rate": 3.076590485689312e-07, "loss": 1.6597, "step": 88871 }, { "epoch": 2.96, "grad_norm": 0.7148525714874268, "learning_rate": 3.071859897645357e-07, "loss": 1.6058, "step": 88872 }, { "epoch": 2.96, "grad_norm": 0.7061890363693237, "learning_rate": 3.067132947430773e-07, "loss": 1.6209, "step": 88873 }, { "epoch": 2.96, "grad_norm": 0.7055331468582153, "learning_rate": 3.062409635050889e-07, "loss": 1.7361, "step": 88874 }, { "epoch": 2.96, "grad_norm": 0.7153136134147644, "learning_rate": 3.0576899605120333e-07, "loss": 1.6234, "step": 88875 }, { "epoch": 2.96, "grad_norm": 0.7173448801040649, "learning_rate": 3.052973923819202e-07, "loss": 1.6739, "step": 88876 }, { "epoch": 2.96, "grad_norm": 0.7067510485649109, "learning_rate": 3.0482615249787234e-07, "loss": 1.672, "step": 88877 }, { "epoch": 2.96, "grad_norm": 0.7105202078819275, "learning_rate": 3.043552763995927e-07, "loss": 1.6328, "step": 88878 }, { "epoch": 2.96, "grad_norm": 0.7325412631034851, "learning_rate": 3.038847640876474e-07, "loss": 1.7684, "step": 88879 }, { "epoch": 2.96, "grad_norm": 0.7253923416137695, "learning_rate": 3.034146155626693e-07, "loss": 1.7311, "step": 88880 }, { "epoch": 2.96, "grad_norm": 0.7338435053825378, "learning_rate": 3.029448308251581e-07, "loss": 1.714, "step": 88881 }, { "epoch": 2.96, "grad_norm": 0.7028384208679199, "learning_rate": 3.024754098757465e-07, "loss": 1.6689, "step": 88882 }, { "epoch": 2.96, "grad_norm": 0.7105569243431091, "learning_rate": 3.0200635271493413e-07, "loss": 1.6788, "step": 88883 }, { "epoch": 2.96, "grad_norm": 0.7249917387962341, "learning_rate": 3.015376593433538e-07, "loss": 1.7484, "step": 88884 }, { "epoch": 2.96, "grad_norm": 0.7155511975288391, "learning_rate": 3.010693297615385e-07, "loss": 1.6879, "step": 88885 }, { "epoch": 2.96, "grad_norm": 0.7192348837852478, "learning_rate": 3.006013639700544e-07, "loss": 1.6813, "step": 88886 }, { "epoch": 2.96, "grad_norm": 0.7053659558296204, "learning_rate": 3.00133761969501e-07, "loss": 1.6644, "step": 88887 }, { "epoch": 2.96, "grad_norm": 0.7264072299003601, "learning_rate": 2.9966652376041125e-07, "loss": 1.6631, "step": 88888 }, { "epoch": 2.96, "grad_norm": 0.7287042140960693, "learning_rate": 2.991996493433846e-07, "loss": 1.673, "step": 88889 }, { "epoch": 2.96, "grad_norm": 0.7167556881904602, "learning_rate": 2.9873313871895397e-07, "loss": 1.7145, "step": 88890 }, { "epoch": 2.96, "grad_norm": 0.7364509701728821, "learning_rate": 2.98266991887719e-07, "loss": 1.7369, "step": 88891 }, { "epoch": 2.96, "grad_norm": 0.7163571715354919, "learning_rate": 2.978012088502124e-07, "loss": 1.7043, "step": 88892 }, { "epoch": 2.96, "grad_norm": 0.736236572265625, "learning_rate": 2.9733578960703385e-07, "loss": 1.7705, "step": 88893 }, { "epoch": 2.96, "grad_norm": 0.7310611009597778, "learning_rate": 2.968707341587162e-07, "loss": 1.7056, "step": 88894 }, { "epoch": 2.96, "grad_norm": 0.6977512240409851, "learning_rate": 2.9640604250582566e-07, "loss": 1.6432, "step": 88895 }, { "epoch": 2.96, "grad_norm": 0.7412763833999634, "learning_rate": 2.9594171464896177e-07, "loss": 1.6644, "step": 88896 }, { "epoch": 2.96, "grad_norm": 0.7121925950050354, "learning_rate": 2.954777505886574e-07, "loss": 1.6558, "step": 88897 }, { "epoch": 2.96, "grad_norm": 0.7366506457328796, "learning_rate": 2.9501415032547883e-07, "loss": 1.7271, "step": 88898 }, { "epoch": 2.96, "grad_norm": 0.7021796703338623, "learning_rate": 2.945509138600255e-07, "loss": 1.5599, "step": 88899 }, { "epoch": 2.96, "grad_norm": 0.7106358408927917, "learning_rate": 2.9408804119279703e-07, "loss": 1.6516, "step": 88900 }, { "epoch": 2.96, "grad_norm": 0.6992387175559998, "learning_rate": 2.9362553232439303e-07, "loss": 1.5771, "step": 88901 }, { "epoch": 2.96, "grad_norm": 0.7138094305992126, "learning_rate": 2.9316338725534625e-07, "loss": 1.6985, "step": 88902 }, { "epoch": 2.96, "grad_norm": 0.7103444933891296, "learning_rate": 2.927016059862897e-07, "loss": 1.7224, "step": 88903 }, { "epoch": 2.96, "grad_norm": 0.7275103330612183, "learning_rate": 2.922401885176895e-07, "loss": 1.7128, "step": 88904 }, { "epoch": 2.96, "grad_norm": 0.711077094078064, "learning_rate": 2.917791348501786e-07, "loss": 1.6678, "step": 88905 }, { "epoch": 2.96, "grad_norm": 0.7160847187042236, "learning_rate": 2.913184449842565e-07, "loss": 1.7046, "step": 88906 }, { "epoch": 2.96, "grad_norm": 0.6948296427726746, "learning_rate": 2.908581189205228e-07, "loss": 1.6272, "step": 88907 }, { "epoch": 2.96, "grad_norm": 0.7301759123802185, "learning_rate": 2.903981566595437e-07, "loss": 1.6429, "step": 88908 }, { "epoch": 2.96, "grad_norm": 0.7112939357757568, "learning_rate": 2.8993855820185205e-07, "loss": 1.6962, "step": 88909 }, { "epoch": 2.96, "grad_norm": 0.7053059339523315, "learning_rate": 2.8947932354801417e-07, "loss": 1.6685, "step": 88910 }, { "epoch": 2.96, "grad_norm": 0.7047949433326721, "learning_rate": 2.8902045269859617e-07, "loss": 1.6119, "step": 88911 }, { "epoch": 2.96, "grad_norm": 0.7106944918632507, "learning_rate": 2.8856194565413104e-07, "loss": 1.7818, "step": 88912 }, { "epoch": 2.96, "grad_norm": 0.712498664855957, "learning_rate": 2.881038024152182e-07, "loss": 1.6419, "step": 88913 }, { "epoch": 2.96, "grad_norm": 0.698220431804657, "learning_rate": 2.876460229823574e-07, "loss": 1.6144, "step": 88914 }, { "epoch": 2.96, "grad_norm": 0.6949982047080994, "learning_rate": 2.87188607356148e-07, "loss": 1.632, "step": 88915 }, { "epoch": 2.96, "grad_norm": 0.7245653867721558, "learning_rate": 2.86731555537123e-07, "loss": 1.7167, "step": 88916 }, { "epoch": 2.96, "grad_norm": 0.7223190069198608, "learning_rate": 2.862748675258486e-07, "loss": 1.7358, "step": 88917 }, { "epoch": 2.96, "grad_norm": 0.7269719243049622, "learning_rate": 2.85818543322891e-07, "loss": 1.6491, "step": 88918 }, { "epoch": 2.96, "grad_norm": 0.732697606086731, "learning_rate": 2.853625829287498e-07, "loss": 1.6876, "step": 88919 }, { "epoch": 2.96, "grad_norm": 0.7023510932922363, "learning_rate": 2.8490698634405785e-07, "loss": 1.7048, "step": 88920 }, { "epoch": 2.96, "grad_norm": 0.7074896097183228, "learning_rate": 2.844517535693147e-07, "loss": 1.695, "step": 88921 }, { "epoch": 2.96, "grad_norm": 0.7332183718681335, "learning_rate": 2.8399688460508664e-07, "loss": 1.7924, "step": 88922 }, { "epoch": 2.96, "grad_norm": 0.7122454643249512, "learning_rate": 2.835423794519065e-07, "loss": 1.6551, "step": 88923 }, { "epoch": 2.96, "grad_norm": 0.726270854473114, "learning_rate": 2.8308823811037384e-07, "loss": 1.7017, "step": 88924 }, { "epoch": 2.96, "grad_norm": 0.7207183241844177, "learning_rate": 2.826344605809883e-07, "loss": 1.596, "step": 88925 }, { "epoch": 2.96, "grad_norm": 0.7297680377960205, "learning_rate": 2.821810468643493e-07, "loss": 1.637, "step": 88926 }, { "epoch": 2.96, "grad_norm": 0.7282513976097107, "learning_rate": 2.817279969609565e-07, "loss": 1.694, "step": 88927 }, { "epoch": 2.96, "grad_norm": 0.6969172954559326, "learning_rate": 2.812753108714094e-07, "loss": 1.595, "step": 88928 }, { "epoch": 2.96, "grad_norm": 0.7343285083770752, "learning_rate": 2.808229885962077e-07, "loss": 1.7076, "step": 88929 }, { "epoch": 2.96, "grad_norm": 0.7585160732269287, "learning_rate": 2.8037103013595074e-07, "loss": 1.6752, "step": 88930 }, { "epoch": 2.96, "grad_norm": 0.6967222094535828, "learning_rate": 2.799194354911383e-07, "loss": 1.6897, "step": 88931 }, { "epoch": 2.96, "grad_norm": 0.6914344429969788, "learning_rate": 2.7946820466233646e-07, "loss": 1.7126, "step": 88932 }, { "epoch": 2.96, "grad_norm": 0.6991274952888489, "learning_rate": 2.790173376501115e-07, "loss": 1.6826, "step": 88933 }, { "epoch": 2.96, "grad_norm": 0.7247390151023865, "learning_rate": 2.785668344549963e-07, "loss": 1.6504, "step": 88934 }, { "epoch": 2.96, "grad_norm": 0.7169676423072815, "learning_rate": 2.7811669507752377e-07, "loss": 1.6623, "step": 88935 }, { "epoch": 2.96, "grad_norm": 0.7302485704421997, "learning_rate": 2.7766691951829344e-07, "loss": 1.6058, "step": 88936 }, { "epoch": 2.96, "grad_norm": 0.7143549919128418, "learning_rate": 2.7721750777780494e-07, "loss": 1.7102, "step": 88937 }, { "epoch": 2.96, "grad_norm": 0.6930866241455078, "learning_rate": 2.767684598565911e-07, "loss": 1.6507, "step": 88938 }, { "epoch": 2.96, "grad_norm": 0.7390211224555969, "learning_rate": 2.7631977575521824e-07, "loss": 1.7064, "step": 88939 }, { "epoch": 2.96, "grad_norm": 0.7143072485923767, "learning_rate": 2.758714554742525e-07, "loss": 1.7311, "step": 88940 }, { "epoch": 2.96, "grad_norm": 0.717278778553009, "learning_rate": 2.7542349901419347e-07, "loss": 1.6809, "step": 88941 }, { "epoch": 2.96, "grad_norm": 0.7025363445281982, "learning_rate": 2.749759063756074e-07, "loss": 1.6156, "step": 88942 }, { "epoch": 2.96, "grad_norm": 0.7136560082435608, "learning_rate": 2.745286775590605e-07, "loss": 1.7541, "step": 88943 }, { "epoch": 2.96, "grad_norm": 0.6962054371833801, "learning_rate": 2.7408181256505233e-07, "loss": 1.6243, "step": 88944 }, { "epoch": 2.96, "grad_norm": 0.7056282758712769, "learning_rate": 2.736353113941492e-07, "loss": 1.568, "step": 88945 }, { "epoch": 2.96, "grad_norm": 0.7447604537010193, "learning_rate": 2.731891740469172e-07, "loss": 1.6268, "step": 88946 }, { "epoch": 2.96, "grad_norm": 0.70933598279953, "learning_rate": 2.7274340052385604e-07, "loss": 1.6408, "step": 88947 }, { "epoch": 2.96, "grad_norm": 0.7219752073287964, "learning_rate": 2.7229799082549855e-07, "loss": 1.708, "step": 88948 }, { "epoch": 2.96, "grad_norm": 0.723459005355835, "learning_rate": 2.7185294495241097e-07, "loss": 1.7193, "step": 88949 }, { "epoch": 2.96, "grad_norm": 0.6947771906852722, "learning_rate": 2.714082629051595e-07, "loss": 1.6737, "step": 88950 }, { "epoch": 2.96, "grad_norm": 0.7200278043746948, "learning_rate": 2.7096394468424375e-07, "loss": 1.6415, "step": 88951 }, { "epoch": 2.96, "grad_norm": 0.7011930346488953, "learning_rate": 2.7051999029019664e-07, "loss": 1.6109, "step": 88952 }, { "epoch": 2.96, "grad_norm": 0.7132478356361389, "learning_rate": 2.700763997236177e-07, "loss": 1.6324, "step": 88953 }, { "epoch": 2.96, "grad_norm": 0.7188234925270081, "learning_rate": 2.6963317298497325e-07, "loss": 1.7313, "step": 88954 }, { "epoch": 2.96, "grad_norm": 0.71665358543396, "learning_rate": 2.691903100748294e-07, "loss": 1.7172, "step": 88955 }, { "epoch": 2.96, "grad_norm": 0.6787727475166321, "learning_rate": 2.687478109937191e-07, "loss": 1.7204, "step": 88956 }, { "epoch": 2.96, "grad_norm": 0.7081052660942078, "learning_rate": 2.6830567574220863e-07, "loss": 1.6645, "step": 88957 }, { "epoch": 2.96, "grad_norm": 0.7260532975196838, "learning_rate": 2.678639043207975e-07, "loss": 1.6779, "step": 88958 }, { "epoch": 2.96, "grad_norm": 0.7275497317314148, "learning_rate": 2.674224967300187e-07, "loss": 1.7585, "step": 88959 }, { "epoch": 2.96, "grad_norm": 0.7270209193229675, "learning_rate": 2.669814529704717e-07, "loss": 1.6863, "step": 88960 }, { "epoch": 2.96, "grad_norm": 0.7089122533798218, "learning_rate": 2.665407730426228e-07, "loss": 1.6537, "step": 88961 }, { "epoch": 2.96, "grad_norm": 0.7533680200576782, "learning_rate": 2.661004569470049e-07, "loss": 1.6552, "step": 88962 }, { "epoch": 2.96, "grad_norm": 0.7028031945228577, "learning_rate": 2.6566050468421753e-07, "loss": 1.6752, "step": 88963 }, { "epoch": 2.96, "grad_norm": 0.7131065130233765, "learning_rate": 2.65220916254727e-07, "loss": 1.7426, "step": 88964 }, { "epoch": 2.96, "grad_norm": 0.7217835187911987, "learning_rate": 2.647816916590995e-07, "loss": 1.6535, "step": 88965 }, { "epoch": 2.96, "grad_norm": 0.7075662612915039, "learning_rate": 2.643428308978679e-07, "loss": 1.6703, "step": 88966 }, { "epoch": 2.96, "grad_norm": 0.7356857657432556, "learning_rate": 2.639043339715652e-07, "loss": 1.757, "step": 88967 }, { "epoch": 2.96, "grad_norm": 0.7265695333480835, "learning_rate": 2.634662008806909e-07, "loss": 1.6846, "step": 88968 }, { "epoch": 2.96, "grad_norm": 0.7124454975128174, "learning_rate": 2.6302843162584466e-07, "loss": 1.5935, "step": 88969 }, { "epoch": 2.96, "grad_norm": 0.7259595990180969, "learning_rate": 2.625910262074926e-07, "loss": 1.6129, "step": 88970 }, { "epoch": 2.96, "grad_norm": 0.6916905045509338, "learning_rate": 2.621539846262011e-07, "loss": 1.6454, "step": 88971 }, { "epoch": 2.96, "grad_norm": 0.7000467777252197, "learning_rate": 2.617173068824696e-07, "loss": 1.7106, "step": 88972 }, { "epoch": 2.96, "grad_norm": 0.7237275838851929, "learning_rate": 2.6128099297686443e-07, "loss": 1.658, "step": 88973 }, { "epoch": 2.96, "grad_norm": 0.724262535572052, "learning_rate": 2.608450429099185e-07, "loss": 1.6655, "step": 88974 }, { "epoch": 2.96, "grad_norm": 0.7066609263420105, "learning_rate": 2.6040945668209803e-07, "loss": 1.7033, "step": 88975 }, { "epoch": 2.96, "grad_norm": 0.7051746845245361, "learning_rate": 2.5997423429400257e-07, "loss": 1.6514, "step": 88976 }, { "epoch": 2.96, "grad_norm": 1.7260288000106812, "learning_rate": 2.595393757461317e-07, "loss": 1.7133, "step": 88977 }, { "epoch": 2.96, "grad_norm": 0.7042157649993896, "learning_rate": 2.5910488103901837e-07, "loss": 1.6721, "step": 88978 }, { "epoch": 2.96, "grad_norm": 0.6962214112281799, "learning_rate": 2.586707501731955e-07, "loss": 1.6928, "step": 88979 }, { "epoch": 2.96, "grad_norm": 0.7144585251808167, "learning_rate": 2.5823698314916264e-07, "loss": 1.6532, "step": 88980 }, { "epoch": 2.96, "grad_norm": 0.7419148683547974, "learning_rate": 2.57803579967486e-07, "loss": 1.6947, "step": 88981 }, { "epoch": 2.96, "grad_norm": 0.7235355377197266, "learning_rate": 2.5737054062866524e-07, "loss": 1.6907, "step": 88982 }, { "epoch": 2.96, "grad_norm": 0.7332738637924194, "learning_rate": 2.569378651332332e-07, "loss": 1.6478, "step": 88983 }, { "epoch": 2.96, "grad_norm": 0.7214443683624268, "learning_rate": 2.5650555348168956e-07, "loss": 1.7815, "step": 88984 }, { "epoch": 2.96, "grad_norm": 0.6936025619506836, "learning_rate": 2.5607360567460043e-07, "loss": 1.6192, "step": 88985 }, { "epoch": 2.96, "grad_norm": 0.7044260501861572, "learning_rate": 2.556420217124655e-07, "loss": 1.6956, "step": 88986 }, { "epoch": 2.96, "grad_norm": 0.7050586342811584, "learning_rate": 2.5521080159581765e-07, "loss": 1.7191, "step": 88987 }, { "epoch": 2.96, "grad_norm": 0.7210046648979187, "learning_rate": 2.5477994532518973e-07, "loss": 1.6764, "step": 88988 }, { "epoch": 2.96, "grad_norm": 0.7226278185844421, "learning_rate": 2.5434945290108145e-07, "loss": 1.6895, "step": 88989 }, { "epoch": 2.96, "grad_norm": 0.6958476901054382, "learning_rate": 2.5391932432405895e-07, "loss": 1.6903, "step": 88990 }, { "epoch": 2.96, "grad_norm": 0.7132488489151001, "learning_rate": 2.534895595945552e-07, "loss": 1.6654, "step": 88991 }, { "epoch": 2.96, "grad_norm": 0.7250694036483765, "learning_rate": 2.5306015871320307e-07, "loss": 1.6462, "step": 88992 }, { "epoch": 2.96, "grad_norm": 0.7222781181335449, "learning_rate": 2.5263112168043554e-07, "loss": 1.6895, "step": 88993 }, { "epoch": 2.96, "grad_norm": 0.7063312530517578, "learning_rate": 2.522024484968188e-07, "loss": 1.6602, "step": 88994 }, { "epoch": 2.96, "grad_norm": 0.7171367406845093, "learning_rate": 2.517741391628858e-07, "loss": 1.6342, "step": 88995 }, { "epoch": 2.96, "grad_norm": 0.7241997122764587, "learning_rate": 2.513461936791028e-07, "loss": 1.6765, "step": 88996 }, { "epoch": 2.96, "grad_norm": 0.7024735808372498, "learning_rate": 2.5091861204603605e-07, "loss": 1.6358, "step": 88997 }, { "epoch": 2.96, "grad_norm": 0.7164335250854492, "learning_rate": 2.5049139426415177e-07, "loss": 1.6402, "step": 88998 }, { "epoch": 2.96, "grad_norm": 0.7015941739082336, "learning_rate": 2.5006454033404953e-07, "loss": 1.674, "step": 88999 }, { "epoch": 2.96, "grad_norm": 0.7304680347442627, "learning_rate": 2.496380502561957e-07, "loss": 1.6521, "step": 89000 }, { "epoch": 2.96, "grad_norm": 0.6980277299880981, "learning_rate": 2.492119240310897e-07, "loss": 1.7245, "step": 89001 }, { "epoch": 2.96, "grad_norm": 0.7007582783699036, "learning_rate": 2.487861616592979e-07, "loss": 1.7265, "step": 89002 }, { "epoch": 2.96, "grad_norm": 0.7329075336456299, "learning_rate": 2.483607631413198e-07, "loss": 1.7223, "step": 89003 }, { "epoch": 2.96, "grad_norm": 0.7502955794334412, "learning_rate": 2.4793572847765506e-07, "loss": 1.6757, "step": 89004 }, { "epoch": 2.96, "grad_norm": 0.7076334953308105, "learning_rate": 2.475110576688033e-07, "loss": 1.6209, "step": 89005 }, { "epoch": 2.96, "grad_norm": 0.7193751931190491, "learning_rate": 2.470867507153307e-07, "loss": 1.7694, "step": 89006 }, { "epoch": 2.96, "grad_norm": 0.7354468703269958, "learning_rate": 2.466628076177368e-07, "loss": 1.7, "step": 89007 }, { "epoch": 2.96, "grad_norm": 0.7148746252059937, "learning_rate": 2.462392283765213e-07, "loss": 1.6607, "step": 89008 }, { "epoch": 2.96, "grad_norm": 0.7430957555770874, "learning_rate": 2.458160129921838e-07, "loss": 1.7355, "step": 89009 }, { "epoch": 2.96, "grad_norm": 0.7198997735977173, "learning_rate": 2.453931614652904e-07, "loss": 1.6337, "step": 89010 }, { "epoch": 2.96, "grad_norm": 0.7136810421943665, "learning_rate": 2.4497067379630754e-07, "loss": 1.7847, "step": 89011 }, { "epoch": 2.96, "grad_norm": 0.7240933775901794, "learning_rate": 2.4454854998576803e-07, "loss": 1.7142, "step": 89012 }, { "epoch": 2.96, "grad_norm": 0.7084397077560425, "learning_rate": 2.441267900341715e-07, "loss": 1.6962, "step": 89013 }, { "epoch": 2.96, "grad_norm": 0.7082173824310303, "learning_rate": 2.437053939420508e-07, "loss": 1.7133, "step": 89014 }, { "epoch": 2.96, "grad_norm": 0.6974192261695862, "learning_rate": 2.432843617099056e-07, "loss": 1.5867, "step": 89015 }, { "epoch": 2.96, "grad_norm": 0.7194492220878601, "learning_rate": 2.428636933382355e-07, "loss": 1.6339, "step": 89016 }, { "epoch": 2.96, "grad_norm": 0.7285757064819336, "learning_rate": 2.424433888275734e-07, "loss": 1.7227, "step": 89017 }, { "epoch": 2.96, "grad_norm": 0.7004488706588745, "learning_rate": 2.420234481784189e-07, "loss": 1.6391, "step": 89018 }, { "epoch": 2.96, "grad_norm": 0.7410651445388794, "learning_rate": 2.416038713912383e-07, "loss": 1.7206, "step": 89019 }, { "epoch": 2.96, "grad_norm": 0.732176661491394, "learning_rate": 2.4118465846663104e-07, "loss": 1.7081, "step": 89020 }, { "epoch": 2.96, "grad_norm": 0.7185454964637756, "learning_rate": 2.4076580940506354e-07, "loss": 1.7354, "step": 89021 }, { "epoch": 2.96, "grad_norm": 0.690170407295227, "learning_rate": 2.40347324207002e-07, "loss": 1.6933, "step": 89022 }, { "epoch": 2.96, "grad_norm": 0.7300515174865723, "learning_rate": 2.3992920287301263e-07, "loss": 1.7076, "step": 89023 }, { "epoch": 2.96, "grad_norm": 0.6912572979927063, "learning_rate": 2.395114454035618e-07, "loss": 1.6656, "step": 89024 }, { "epoch": 2.96, "grad_norm": 0.6981350779533386, "learning_rate": 2.390940517992157e-07, "loss": 1.6692, "step": 89025 }, { "epoch": 2.96, "grad_norm": 0.6995006203651428, "learning_rate": 2.386770220604073e-07, "loss": 1.6303, "step": 89026 }, { "epoch": 2.96, "grad_norm": 0.6990725994110107, "learning_rate": 2.3826035618766948e-07, "loss": 1.5895, "step": 89027 }, { "epoch": 2.96, "grad_norm": 0.7088079452514648, "learning_rate": 2.3784405418153518e-07, "loss": 1.7249, "step": 89028 }, { "epoch": 2.96, "grad_norm": 0.7115911245346069, "learning_rate": 2.37428116042504e-07, "loss": 1.7327, "step": 89029 }, { "epoch": 2.96, "grad_norm": 0.7186174392700195, "learning_rate": 2.3701254177104224e-07, "loss": 1.6651, "step": 89030 }, { "epoch": 2.96, "grad_norm": 0.6997056007385254, "learning_rate": 2.3659733136768278e-07, "loss": 1.6397, "step": 89031 }, { "epoch": 2.96, "grad_norm": 0.7189664244651794, "learning_rate": 2.3618248483295854e-07, "loss": 1.6564, "step": 89032 }, { "epoch": 2.96, "grad_norm": 0.7057174444198608, "learning_rate": 2.357680021673025e-07, "loss": 1.7133, "step": 89033 }, { "epoch": 2.96, "grad_norm": 0.7029011249542236, "learning_rate": 2.353538833712809e-07, "loss": 1.6266, "step": 89034 }, { "epoch": 2.96, "grad_norm": 0.7288114428520203, "learning_rate": 2.3494012844536002e-07, "loss": 1.6869, "step": 89035 }, { "epoch": 2.96, "grad_norm": 0.7152042984962463, "learning_rate": 2.3452673739003947e-07, "loss": 1.6854, "step": 89036 }, { "epoch": 2.96, "grad_norm": 0.7123602628707886, "learning_rate": 2.3411371020585212e-07, "loss": 1.7464, "step": 89037 }, { "epoch": 2.96, "grad_norm": 0.7148884534835815, "learning_rate": 2.337010468932643e-07, "loss": 1.7051, "step": 89038 }, { "epoch": 2.96, "grad_norm": 0.6945803165435791, "learning_rate": 2.332887474528089e-07, "loss": 1.7423, "step": 89039 }, { "epoch": 2.96, "grad_norm": 0.7136140465736389, "learning_rate": 2.3287681188498553e-07, "loss": 1.757, "step": 89040 }, { "epoch": 2.96, "grad_norm": 0.7164090871810913, "learning_rate": 2.3246524019026047e-07, "loss": 1.6356, "step": 89041 }, { "epoch": 2.96, "grad_norm": 0.7045975923538208, "learning_rate": 2.3205403236916664e-07, "loss": 1.7013, "step": 89042 }, { "epoch": 2.96, "grad_norm": 0.7123813629150391, "learning_rate": 2.3164318842217033e-07, "loss": 1.7164, "step": 89043 }, { "epoch": 2.96, "grad_norm": 0.7127957940101624, "learning_rate": 2.3123270834980445e-07, "loss": 1.6196, "step": 89044 }, { "epoch": 2.96, "grad_norm": 0.6864105463027954, "learning_rate": 2.3082259215256858e-07, "loss": 1.5849, "step": 89045 }, { "epoch": 2.96, "grad_norm": 0.7229416370391846, "learning_rate": 2.3041283983092906e-07, "loss": 1.734, "step": 89046 }, { "epoch": 2.96, "grad_norm": 0.7296978831291199, "learning_rate": 2.3000345138538545e-07, "loss": 1.6552, "step": 89047 }, { "epoch": 2.96, "grad_norm": 0.7109853029251099, "learning_rate": 2.2959442681647067e-07, "loss": 1.6695, "step": 89048 }, { "epoch": 2.96, "grad_norm": 0.7050858736038208, "learning_rate": 2.29185766124651e-07, "loss": 1.6839, "step": 89049 }, { "epoch": 2.96, "grad_norm": 0.7158948183059692, "learning_rate": 2.2877746931045938e-07, "loss": 1.6772, "step": 89050 }, { "epoch": 2.96, "grad_norm": 0.7151447534561157, "learning_rate": 2.2836953637432875e-07, "loss": 1.6583, "step": 89051 }, { "epoch": 2.96, "grad_norm": 0.7071959376335144, "learning_rate": 2.2796196731679207e-07, "loss": 1.6633, "step": 89052 }, { "epoch": 2.96, "grad_norm": 0.7142632603645325, "learning_rate": 2.275547621383489e-07, "loss": 1.6853, "step": 89053 }, { "epoch": 2.96, "grad_norm": 0.7453218102455139, "learning_rate": 2.2714792083946555e-07, "loss": 1.6695, "step": 89054 }, { "epoch": 2.96, "grad_norm": 0.750217080116272, "learning_rate": 2.2674144342067491e-07, "loss": 1.7123, "step": 89055 }, { "epoch": 2.96, "grad_norm": 0.7400214672088623, "learning_rate": 2.263353298824433e-07, "loss": 1.6921, "step": 89056 }, { "epoch": 2.96, "grad_norm": 0.7273057699203491, "learning_rate": 2.259295802252703e-07, "loss": 1.7695, "step": 89057 }, { "epoch": 2.96, "grad_norm": 0.7217917442321777, "learning_rate": 2.2552419444965553e-07, "loss": 1.6109, "step": 89058 }, { "epoch": 2.96, "grad_norm": 0.744845449924469, "learning_rate": 2.2511917255606525e-07, "loss": 1.8146, "step": 89059 }, { "epoch": 2.96, "grad_norm": 0.710314154624939, "learning_rate": 2.2471451454503242e-07, "loss": 1.6603, "step": 89060 }, { "epoch": 2.96, "grad_norm": 0.6952383518218994, "learning_rate": 2.2431022041702328e-07, "loss": 1.6854, "step": 89061 }, { "epoch": 2.96, "grad_norm": 0.71136873960495, "learning_rate": 2.2390629017250418e-07, "loss": 1.6484, "step": 89062 }, { "epoch": 2.96, "grad_norm": 0.7197391986846924, "learning_rate": 2.2350272381204126e-07, "loss": 1.6913, "step": 89063 }, { "epoch": 2.96, "grad_norm": 0.7185917496681213, "learning_rate": 2.2309952133603426e-07, "loss": 1.5694, "step": 89064 }, { "epoch": 2.96, "grad_norm": 0.7067528367042542, "learning_rate": 2.2269668274504937e-07, "loss": 1.6613, "step": 89065 }, { "epoch": 2.96, "grad_norm": 0.705702006816864, "learning_rate": 2.2229420803951957e-07, "loss": 1.6717, "step": 89066 }, { "epoch": 2.96, "grad_norm": 0.7356595993041992, "learning_rate": 2.2189209721994449e-07, "loss": 1.6894, "step": 89067 }, { "epoch": 2.96, "grad_norm": 0.7212298512458801, "learning_rate": 2.214903502868237e-07, "loss": 1.6394, "step": 89068 }, { "epoch": 2.96, "grad_norm": 0.7203306555747986, "learning_rate": 2.2108896724065685e-07, "loss": 1.7332, "step": 89069 }, { "epoch": 2.96, "grad_norm": 0.7157836556434631, "learning_rate": 2.2068794808194344e-07, "loss": 1.6603, "step": 89070 }, { "epoch": 2.96, "grad_norm": 0.6963735222816467, "learning_rate": 2.2028729281111657e-07, "loss": 1.665, "step": 89071 }, { "epoch": 2.96, "grad_norm": 0.7112756371498108, "learning_rate": 2.1988700142867577e-07, "loss": 1.7479, "step": 89072 }, { "epoch": 2.96, "grad_norm": 0.7200623154640198, "learning_rate": 2.1948707393512065e-07, "loss": 1.6579, "step": 89073 }, { "epoch": 2.96, "grad_norm": 0.7343794107437134, "learning_rate": 2.1908751033098414e-07, "loss": 1.6655, "step": 89074 }, { "epoch": 2.96, "grad_norm": 0.712566077709198, "learning_rate": 2.1868831061666592e-07, "loss": 1.6919, "step": 89075 }, { "epoch": 2.96, "grad_norm": 0.7020881175994873, "learning_rate": 2.1828947479269888e-07, "loss": 1.712, "step": 89076 }, { "epoch": 2.96, "grad_norm": 0.7345583438873291, "learning_rate": 2.178910028595493e-07, "loss": 1.7026, "step": 89077 }, { "epoch": 2.96, "grad_norm": 0.726523220539093, "learning_rate": 2.174928948177168e-07, "loss": 1.7253, "step": 89078 }, { "epoch": 2.96, "grad_norm": 0.6949173808097839, "learning_rate": 2.170951506676677e-07, "loss": 1.6417, "step": 89079 }, { "epoch": 2.96, "grad_norm": 0.7011012434959412, "learning_rate": 2.1669777040990154e-07, "loss": 1.7681, "step": 89080 }, { "epoch": 2.96, "grad_norm": 0.706976056098938, "learning_rate": 2.1630075404491798e-07, "loss": 1.7735, "step": 89081 }, { "epoch": 2.96, "grad_norm": 0.7106432318687439, "learning_rate": 2.1590410157311665e-07, "loss": 1.6937, "step": 89082 }, { "epoch": 2.96, "grad_norm": 0.7165296673774719, "learning_rate": 2.1550781299509712e-07, "loss": 1.6629, "step": 89083 }, { "epoch": 2.96, "grad_norm": 0.6948760747909546, "learning_rate": 2.1511188831122573e-07, "loss": 1.6614, "step": 89084 }, { "epoch": 2.96, "grad_norm": 0.7123053669929504, "learning_rate": 2.1471632752206867e-07, "loss": 1.6507, "step": 89085 }, { "epoch": 2.96, "grad_norm": 0.7009862065315247, "learning_rate": 2.1432113062809232e-07, "loss": 1.6023, "step": 89086 }, { "epoch": 2.96, "grad_norm": 0.7005477547645569, "learning_rate": 2.1392629762972958e-07, "loss": 1.6812, "step": 89087 }, { "epoch": 2.96, "grad_norm": 0.7246676683425903, "learning_rate": 2.1353182852751337e-07, "loss": 1.7421, "step": 89088 }, { "epoch": 2.96, "grad_norm": 0.744737446308136, "learning_rate": 2.1313772332187674e-07, "loss": 1.6501, "step": 89089 }, { "epoch": 2.96, "grad_norm": 0.7164499759674072, "learning_rate": 2.1274398201331923e-07, "loss": 1.6384, "step": 89090 }, { "epoch": 2.96, "grad_norm": 0.7177512049674988, "learning_rate": 2.1235060460230712e-07, "loss": 1.6145, "step": 89091 }, { "epoch": 2.96, "grad_norm": 0.6957389712333679, "learning_rate": 2.1195759108937338e-07, "loss": 1.7247, "step": 89092 }, { "epoch": 2.96, "grad_norm": 0.7190104126930237, "learning_rate": 2.1156494147491764e-07, "loss": 1.731, "step": 89093 }, { "epoch": 2.96, "grad_norm": 0.7272550463676453, "learning_rate": 2.1117265575943952e-07, "loss": 1.6632, "step": 89094 }, { "epoch": 2.96, "grad_norm": 0.7233896851539612, "learning_rate": 2.1078073394343863e-07, "loss": 1.6912, "step": 89095 }, { "epoch": 2.96, "grad_norm": 0.7139876484870911, "learning_rate": 2.1038917602738126e-07, "loss": 1.7074, "step": 89096 }, { "epoch": 2.96, "grad_norm": 0.7009860277175903, "learning_rate": 2.09997982011767e-07, "loss": 1.7187, "step": 89097 }, { "epoch": 2.96, "grad_norm": 0.7068358063697815, "learning_rate": 2.096071518969955e-07, "loss": 1.6496, "step": 89098 }, { "epoch": 2.96, "grad_norm": 0.7058581113815308, "learning_rate": 2.0921668568363302e-07, "loss": 1.5946, "step": 89099 }, { "epoch": 2.96, "grad_norm": 0.7089740633964539, "learning_rate": 2.0882658337207925e-07, "loss": 1.674, "step": 89100 }, { "epoch": 2.96, "grad_norm": 0.7217512726783752, "learning_rate": 2.0843684496283375e-07, "loss": 1.6114, "step": 89101 }, { "epoch": 2.96, "grad_norm": 0.7277395725250244, "learning_rate": 2.0804747045639614e-07, "loss": 1.6528, "step": 89102 }, { "epoch": 2.96, "grad_norm": 0.7159837484359741, "learning_rate": 2.076584598531994e-07, "loss": 1.6232, "step": 89103 }, { "epoch": 2.96, "grad_norm": 0.7144953012466431, "learning_rate": 2.0726981315374314e-07, "loss": 1.565, "step": 89104 }, { "epoch": 2.96, "grad_norm": 0.737328052520752, "learning_rate": 2.0688153035849365e-07, "loss": 1.6326, "step": 89105 }, { "epoch": 2.96, "grad_norm": 0.7350755929946899, "learning_rate": 2.0649361146791721e-07, "loss": 1.6757, "step": 89106 }, { "epoch": 2.96, "grad_norm": 0.6987537145614624, "learning_rate": 2.0610605648248013e-07, "loss": 1.7292, "step": 89107 }, { "epoch": 2.96, "grad_norm": 0.701482355594635, "learning_rate": 2.057188654026487e-07, "loss": 1.6417, "step": 89108 }, { "epoch": 2.96, "grad_norm": 0.6946278214454651, "learning_rate": 2.0533203822888922e-07, "loss": 1.5968, "step": 89109 }, { "epoch": 2.96, "grad_norm": 0.7169883847236633, "learning_rate": 2.0494557496170128e-07, "loss": 1.718, "step": 89110 }, { "epoch": 2.96, "grad_norm": 0.705906093120575, "learning_rate": 2.0455947560155118e-07, "loss": 1.6265, "step": 89111 }, { "epoch": 2.96, "grad_norm": 0.7456182837486267, "learning_rate": 2.0417374014887189e-07, "loss": 1.7696, "step": 89112 }, { "epoch": 2.96, "grad_norm": 0.7189915776252747, "learning_rate": 2.0378836860416304e-07, "loss": 1.6801, "step": 89113 }, { "epoch": 2.96, "grad_norm": 0.6998583674430847, "learning_rate": 2.0340336096789088e-07, "loss": 1.6857, "step": 89114 }, { "epoch": 2.96, "grad_norm": 0.7337658405303955, "learning_rate": 2.0301871724048846e-07, "loss": 1.6791, "step": 89115 }, { "epoch": 2.96, "grad_norm": 0.7169531583786011, "learning_rate": 2.0263443742248864e-07, "loss": 1.6851, "step": 89116 }, { "epoch": 2.96, "grad_norm": 0.7021292448043823, "learning_rate": 2.022505215142911e-07, "loss": 1.6226, "step": 89117 }, { "epoch": 2.96, "grad_norm": 0.7347263097763062, "learning_rate": 2.0186696951636217e-07, "loss": 1.6741, "step": 89118 }, { "epoch": 2.96, "grad_norm": 0.7111538648605347, "learning_rate": 2.0148378142923472e-07, "loss": 1.6089, "step": 89119 }, { "epoch": 2.97, "grad_norm": 0.725570559501648, "learning_rate": 2.0110095725330843e-07, "loss": 1.6767, "step": 89120 }, { "epoch": 2.97, "grad_norm": 0.7178313732147217, "learning_rate": 2.0071849698908292e-07, "loss": 1.6773, "step": 89121 }, { "epoch": 2.97, "grad_norm": 0.7058882117271423, "learning_rate": 2.003364006370245e-07, "loss": 1.6307, "step": 89122 }, { "epoch": 2.97, "grad_norm": 0.7208091020584106, "learning_rate": 1.9995466819756613e-07, "loss": 1.581, "step": 89123 }, { "epoch": 2.97, "grad_norm": 0.699958086013794, "learning_rate": 1.995732996712074e-07, "loss": 1.6602, "step": 89124 }, { "epoch": 2.97, "grad_norm": 0.7307637929916382, "learning_rate": 1.991922950583813e-07, "loss": 1.7443, "step": 89125 }, { "epoch": 2.97, "grad_norm": 0.7146826982498169, "learning_rate": 1.9881165435955414e-07, "loss": 1.6959, "step": 89126 }, { "epoch": 2.97, "grad_norm": 0.708503246307373, "learning_rate": 1.9843137757519223e-07, "loss": 1.7192, "step": 89127 }, { "epoch": 2.97, "grad_norm": 0.7077648639678955, "learning_rate": 1.9805146470579515e-07, "loss": 1.6742, "step": 89128 }, { "epoch": 2.97, "grad_norm": 0.7200367450714111, "learning_rate": 1.9767191575176255e-07, "loss": 1.7374, "step": 89129 }, { "epoch": 2.97, "grad_norm": 0.7209355235099792, "learning_rate": 1.9729273071356077e-07, "loss": 1.6589, "step": 89130 }, { "epoch": 2.97, "grad_norm": 0.7361205220222473, "learning_rate": 1.969139095917227e-07, "loss": 1.7482, "step": 89131 }, { "epoch": 2.97, "grad_norm": 0.7100914716720581, "learning_rate": 1.9653545238661472e-07, "loss": 1.6665, "step": 89132 }, { "epoch": 2.97, "grad_norm": 0.7099350094795227, "learning_rate": 1.9615735909876974e-07, "loss": 1.6524, "step": 89133 }, { "epoch": 2.97, "grad_norm": 0.7256110906600952, "learning_rate": 1.957796297285874e-07, "loss": 1.6866, "step": 89134 }, { "epoch": 2.97, "grad_norm": 0.7199167609214783, "learning_rate": 1.9540226427656734e-07, "loss": 1.6394, "step": 89135 }, { "epoch": 2.97, "grad_norm": 0.7004965543746948, "learning_rate": 1.9502526274317587e-07, "loss": 1.7168, "step": 89136 }, { "epoch": 2.97, "grad_norm": 0.7267307639122009, "learning_rate": 1.9464862512881264e-07, "loss": 1.6861, "step": 89137 }, { "epoch": 2.97, "grad_norm": 0.7455264925956726, "learning_rate": 1.9427235143401053e-07, "loss": 1.6983, "step": 89138 }, { "epoch": 2.97, "grad_norm": 0.7157851457595825, "learning_rate": 1.93896441659136e-07, "loss": 1.7256, "step": 89139 }, { "epoch": 2.97, "grad_norm": 0.6998191475868225, "learning_rate": 1.9352089580475517e-07, "loss": 1.6569, "step": 89140 }, { "epoch": 2.97, "grad_norm": 0.7055470943450928, "learning_rate": 1.9314571387123444e-07, "loss": 1.672, "step": 89141 }, { "epoch": 2.97, "grad_norm": 0.6959055662155151, "learning_rate": 1.9277089585904016e-07, "loss": 1.6108, "step": 89142 }, { "epoch": 2.97, "grad_norm": 0.7283502221107483, "learning_rate": 1.9239644176867185e-07, "loss": 1.7313, "step": 89143 }, { "epoch": 2.97, "grad_norm": 0.7255504727363586, "learning_rate": 1.9202235160056256e-07, "loss": 1.6391, "step": 89144 }, { "epoch": 2.97, "grad_norm": 0.7251956462860107, "learning_rate": 1.9164862535517856e-07, "loss": 1.748, "step": 89145 }, { "epoch": 2.97, "grad_norm": 0.7218412756919861, "learning_rate": 1.912752630329195e-07, "loss": 1.676, "step": 89146 }, { "epoch": 2.97, "grad_norm": 0.6855006814002991, "learning_rate": 1.9090226463431836e-07, "loss": 1.6234, "step": 89147 }, { "epoch": 2.97, "grad_norm": 0.7216755151748657, "learning_rate": 1.9052963015974142e-07, "loss": 1.6529, "step": 89148 }, { "epoch": 2.97, "grad_norm": 0.7240895628929138, "learning_rate": 1.9015735960972166e-07, "loss": 1.677, "step": 89149 }, { "epoch": 2.97, "grad_norm": 0.712662935256958, "learning_rate": 1.8978545298465875e-07, "loss": 1.7178, "step": 89150 }, { "epoch": 2.97, "grad_norm": 0.7012699246406555, "learning_rate": 1.8941391028505226e-07, "loss": 1.6543, "step": 89151 }, { "epoch": 2.97, "grad_norm": 0.7097323536872864, "learning_rate": 1.890427315113019e-07, "loss": 1.6296, "step": 89152 }, { "epoch": 2.97, "grad_norm": 0.721169114112854, "learning_rate": 1.8867191666387393e-07, "loss": 1.6351, "step": 89153 }, { "epoch": 2.97, "grad_norm": 0.7086066603660583, "learning_rate": 1.8830146574320138e-07, "loss": 1.6504, "step": 89154 }, { "epoch": 2.97, "grad_norm": 0.7194010615348816, "learning_rate": 1.879313787497838e-07, "loss": 1.7132, "step": 89155 }, { "epoch": 2.97, "grad_norm": 0.7155421376228333, "learning_rate": 1.8756165568405424e-07, "loss": 1.6892, "step": 89156 }, { "epoch": 2.97, "grad_norm": 0.7157976031303406, "learning_rate": 1.871922965464123e-07, "loss": 1.6129, "step": 89157 }, { "epoch": 2.97, "grad_norm": 0.7270631790161133, "learning_rate": 1.8682330133735767e-07, "loss": 1.6546, "step": 89158 }, { "epoch": 2.97, "grad_norm": 0.712954580783844, "learning_rate": 1.864546700573566e-07, "loss": 1.7474, "step": 89159 }, { "epoch": 2.97, "grad_norm": 0.7092362642288208, "learning_rate": 1.8608640270677543e-07, "loss": 1.6381, "step": 89160 }, { "epoch": 2.97, "grad_norm": 0.6922418475151062, "learning_rate": 1.8571849928614714e-07, "loss": 1.6679, "step": 89161 }, { "epoch": 2.97, "grad_norm": 0.7417173385620117, "learning_rate": 1.8535095979587132e-07, "loss": 1.7028, "step": 89162 }, { "epoch": 2.97, "grad_norm": 0.7091414928436279, "learning_rate": 1.8498378423638106e-07, "loss": 1.5867, "step": 89163 }, { "epoch": 2.97, "grad_norm": 0.7300631403923035, "learning_rate": 1.8461697260817587e-07, "loss": 1.6983, "step": 89164 }, { "epoch": 2.97, "grad_norm": 0.7170823216438293, "learning_rate": 1.842505249116555e-07, "loss": 1.6579, "step": 89165 }, { "epoch": 2.97, "grad_norm": 0.6965575218200684, "learning_rate": 1.8388444114728618e-07, "loss": 1.7133, "step": 89166 }, { "epoch": 2.97, "grad_norm": 0.7031216621398926, "learning_rate": 1.8351872131550094e-07, "loss": 1.7187, "step": 89167 }, { "epoch": 2.97, "grad_norm": 0.721356987953186, "learning_rate": 1.8315336541673275e-07, "loss": 1.6217, "step": 89168 }, { "epoch": 2.97, "grad_norm": 0.7118719220161438, "learning_rate": 1.8278837345144793e-07, "loss": 1.7, "step": 89169 }, { "epoch": 2.97, "grad_norm": 0.6997671723365784, "learning_rate": 1.8242374542007942e-07, "loss": 1.6395, "step": 89170 }, { "epoch": 2.97, "grad_norm": 0.7275553941726685, "learning_rate": 1.8205948132309357e-07, "loss": 1.6866, "step": 89171 }, { "epoch": 2.97, "grad_norm": 0.7232556939125061, "learning_rate": 1.8169558116089e-07, "loss": 1.6265, "step": 89172 }, { "epoch": 2.97, "grad_norm": 0.6987749338150024, "learning_rate": 1.8133204493393506e-07, "loss": 1.6877, "step": 89173 }, { "epoch": 2.97, "grad_norm": 0.7087854146957397, "learning_rate": 1.80968872642695e-07, "loss": 1.6129, "step": 89174 }, { "epoch": 2.97, "grad_norm": 0.7198914885520935, "learning_rate": 1.806060642875362e-07, "loss": 1.6636, "step": 89175 }, { "epoch": 2.97, "grad_norm": 0.7313875555992126, "learning_rate": 1.8024361986899162e-07, "loss": 1.7405, "step": 89176 }, { "epoch": 2.97, "grad_norm": 0.6984263062477112, "learning_rate": 1.7988153938742754e-07, "loss": 1.7727, "step": 89177 }, { "epoch": 2.97, "grad_norm": 0.6946272850036621, "learning_rate": 1.7951982284331034e-07, "loss": 1.593, "step": 89178 }, { "epoch": 2.97, "grad_norm": 0.7277640104293823, "learning_rate": 1.7915847023707296e-07, "loss": 1.6907, "step": 89179 }, { "epoch": 2.97, "grad_norm": 0.7318249940872192, "learning_rate": 1.7879748156918172e-07, "loss": 1.7075, "step": 89180 }, { "epoch": 2.97, "grad_norm": 0.713212788105011, "learning_rate": 1.7843685684006957e-07, "loss": 1.6381, "step": 89181 }, { "epoch": 2.97, "grad_norm": 0.7096737623214722, "learning_rate": 1.7807659605010293e-07, "loss": 1.6419, "step": 89182 }, { "epoch": 2.97, "grad_norm": 0.7199617028236389, "learning_rate": 1.7771669919981467e-07, "loss": 1.6281, "step": 89183 }, { "epoch": 2.97, "grad_norm": 0.7050474286079407, "learning_rate": 1.773571662896045e-07, "loss": 1.6851, "step": 89184 }, { "epoch": 2.97, "grad_norm": 0.7227017879486084, "learning_rate": 1.7699799731987206e-07, "loss": 1.7443, "step": 89185 }, { "epoch": 2.97, "grad_norm": 0.6916788816452026, "learning_rate": 1.76639192291117e-07, "loss": 1.6314, "step": 89186 }, { "epoch": 2.97, "grad_norm": 0.7231669425964355, "learning_rate": 1.7628075120373896e-07, "loss": 1.6335, "step": 89187 }, { "epoch": 2.97, "grad_norm": 0.7225458025932312, "learning_rate": 1.7592267405817096e-07, "loss": 1.6742, "step": 89188 }, { "epoch": 2.97, "grad_norm": 0.7105371952056885, "learning_rate": 1.7556496085487926e-07, "loss": 1.5966, "step": 89189 }, { "epoch": 2.97, "grad_norm": 0.7099661231040955, "learning_rate": 1.7520761159426355e-07, "loss": 1.6896, "step": 89190 }, { "epoch": 2.97, "grad_norm": 0.7180418968200684, "learning_rate": 1.7485062627675682e-07, "loss": 1.6457, "step": 89191 }, { "epoch": 2.97, "grad_norm": 0.7092763185501099, "learning_rate": 1.7449400490279208e-07, "loss": 1.6379, "step": 89192 }, { "epoch": 2.97, "grad_norm": 0.7376013398170471, "learning_rate": 1.741377474728356e-07, "loss": 1.6425, "step": 89193 }, { "epoch": 2.97, "grad_norm": 0.7224565148353577, "learning_rate": 1.7378185398728705e-07, "loss": 1.7052, "step": 89194 }, { "epoch": 2.97, "grad_norm": 0.7569984197616577, "learning_rate": 1.7342632444661275e-07, "loss": 1.7501, "step": 89195 }, { "epoch": 2.97, "grad_norm": 0.7326304316520691, "learning_rate": 1.7307115885121236e-07, "loss": 1.6905, "step": 89196 }, { "epoch": 2.97, "grad_norm": 0.7021769285202026, "learning_rate": 1.7271635720151887e-07, "loss": 1.7272, "step": 89197 }, { "epoch": 2.97, "grad_norm": 0.7173925638198853, "learning_rate": 1.7236191949796529e-07, "loss": 1.6516, "step": 89198 }, { "epoch": 2.97, "grad_norm": 0.7136772274971008, "learning_rate": 1.720078457409846e-07, "loss": 1.7151, "step": 89199 }, { "epoch": 2.97, "grad_norm": 0.7016538977622986, "learning_rate": 1.7165413593104305e-07, "loss": 1.5643, "step": 89200 }, { "epoch": 2.97, "grad_norm": 0.7047231793403625, "learning_rate": 1.7130079006850706e-07, "loss": 1.6544, "step": 89201 }, { "epoch": 2.97, "grad_norm": 0.7192487716674805, "learning_rate": 1.709478081538429e-07, "loss": 1.6592, "step": 89202 }, { "epoch": 2.97, "grad_norm": 0.7004533410072327, "learning_rate": 1.7059519018745027e-07, "loss": 1.7371, "step": 89203 }, { "epoch": 2.97, "grad_norm": 0.7047131657600403, "learning_rate": 1.7024293616979546e-07, "loss": 1.7716, "step": 89204 }, { "epoch": 2.97, "grad_norm": 0.7156720161437988, "learning_rate": 1.6989104610127812e-07, "loss": 1.7399, "step": 89205 }, { "epoch": 2.97, "grad_norm": 0.7074504494667053, "learning_rate": 1.695395199823313e-07, "loss": 1.6691, "step": 89206 }, { "epoch": 2.97, "grad_norm": 0.7257494926452637, "learning_rate": 1.6918835781342121e-07, "loss": 1.566, "step": 89207 }, { "epoch": 2.97, "grad_norm": 0.7330068349838257, "learning_rate": 1.6883755959491429e-07, "loss": 1.7021, "step": 89208 }, { "epoch": 2.97, "grad_norm": 0.7215515971183777, "learning_rate": 1.684871253272435e-07, "loss": 1.6543, "step": 89209 }, { "epoch": 2.97, "grad_norm": 0.7364212274551392, "learning_rate": 1.6813705501087514e-07, "loss": 1.6666, "step": 89210 }, { "epoch": 2.97, "grad_norm": 0.7210608124732971, "learning_rate": 1.677873486462089e-07, "loss": 1.6896, "step": 89211 }, { "epoch": 2.97, "grad_norm": 0.6935977339744568, "learning_rate": 1.6743800623364445e-07, "loss": 1.6197, "step": 89212 }, { "epoch": 2.97, "grad_norm": 0.7390869855880737, "learning_rate": 1.6708902777364806e-07, "loss": 1.7341, "step": 89213 }, { "epoch": 2.97, "grad_norm": 0.7047054171562195, "learning_rate": 1.6674041326665276e-07, "loss": 1.6806, "step": 89214 }, { "epoch": 2.97, "grad_norm": 0.7144773006439209, "learning_rate": 1.663921627130249e-07, "loss": 1.6729, "step": 89215 }, { "epoch": 2.97, "grad_norm": 0.7321762442588806, "learning_rate": 1.6604427611323078e-07, "loss": 1.7438, "step": 89216 }, { "epoch": 2.97, "grad_norm": 0.7314969897270203, "learning_rate": 1.6569675346767009e-07, "loss": 1.731, "step": 89217 }, { "epoch": 2.97, "grad_norm": 0.7299802899360657, "learning_rate": 1.653495947767758e-07, "loss": 1.648, "step": 89218 }, { "epoch": 2.97, "grad_norm": 0.7434834241867065, "learning_rate": 1.6500280004098088e-07, "loss": 1.6934, "step": 89219 }, { "epoch": 2.97, "grad_norm": 0.7424590587615967, "learning_rate": 1.6465636926068503e-07, "loss": 1.6308, "step": 89220 }, { "epoch": 2.97, "grad_norm": 0.7290630340576172, "learning_rate": 1.6431030243632126e-07, "loss": 1.6529, "step": 89221 }, { "epoch": 2.97, "grad_norm": 1.2595711946487427, "learning_rate": 1.6396459956832253e-07, "loss": 1.6125, "step": 89222 }, { "epoch": 2.97, "grad_norm": 0.7277949452400208, "learning_rate": 1.6361926065705523e-07, "loss": 1.6379, "step": 89223 }, { "epoch": 2.97, "grad_norm": 0.7261173129081726, "learning_rate": 1.6327428570301893e-07, "loss": 1.6945, "step": 89224 }, { "epoch": 2.97, "grad_norm": 0.7174659967422485, "learning_rate": 1.629296747065467e-07, "loss": 1.6444, "step": 89225 }, { "epoch": 2.97, "grad_norm": 0.7214691042900085, "learning_rate": 1.625854276681382e-07, "loss": 1.7032, "step": 89226 }, { "epoch": 2.97, "grad_norm": 0.7159216403961182, "learning_rate": 1.6224154458812644e-07, "loss": 1.6823, "step": 89227 }, { "epoch": 2.97, "grad_norm": 0.7231970429420471, "learning_rate": 1.6189802546701103e-07, "loss": 1.653, "step": 89228 }, { "epoch": 2.97, "grad_norm": 0.7177218794822693, "learning_rate": 1.6155487030515833e-07, "loss": 1.6853, "step": 89229 }, { "epoch": 2.97, "grad_norm": 0.7089893221855164, "learning_rate": 1.6121207910300137e-07, "loss": 1.6216, "step": 89230 }, { "epoch": 2.97, "grad_norm": 0.7036902904510498, "learning_rate": 1.6086965186093981e-07, "loss": 1.7306, "step": 89231 }, { "epoch": 2.97, "grad_norm": 0.7050042748451233, "learning_rate": 1.6052758857943992e-07, "loss": 1.6349, "step": 89232 }, { "epoch": 2.97, "grad_norm": 0.7345260977745056, "learning_rate": 1.601858892588348e-07, "loss": 1.7105, "step": 89233 }, { "epoch": 2.97, "grad_norm": 0.7108215689659119, "learning_rate": 1.5984455389962404e-07, "loss": 1.7176, "step": 89234 }, { "epoch": 2.97, "grad_norm": 0.7346654534339905, "learning_rate": 1.5950358250217398e-07, "loss": 1.6683, "step": 89235 }, { "epoch": 2.97, "grad_norm": 0.7104904055595398, "learning_rate": 1.5916297506688436e-07, "loss": 1.6029, "step": 89236 }, { "epoch": 2.97, "grad_norm": 0.708751380443573, "learning_rate": 1.5882273159422143e-07, "loss": 1.6711, "step": 89237 }, { "epoch": 2.97, "grad_norm": 0.7010232210159302, "learning_rate": 1.584828520845516e-07, "loss": 1.584, "step": 89238 }, { "epoch": 2.97, "grad_norm": 0.7197749018669128, "learning_rate": 1.581433365383078e-07, "loss": 1.6571, "step": 89239 }, { "epoch": 2.97, "grad_norm": 0.7096144556999207, "learning_rate": 1.5780418495588976e-07, "loss": 1.7074, "step": 89240 }, { "epoch": 2.97, "grad_norm": 0.7050350904464722, "learning_rate": 1.5746539733773046e-07, "loss": 1.6819, "step": 89241 }, { "epoch": 2.97, "grad_norm": 0.7092511653900146, "learning_rate": 1.5712697368419625e-07, "loss": 1.6843, "step": 89242 }, { "epoch": 2.97, "grad_norm": 0.7362343668937683, "learning_rate": 1.5678891399575345e-07, "loss": 1.7346, "step": 89243 }, { "epoch": 2.97, "grad_norm": 0.7289867997169495, "learning_rate": 1.564512182728017e-07, "loss": 1.7386, "step": 89244 }, { "epoch": 2.97, "grad_norm": 0.695478081703186, "learning_rate": 1.5611388651574076e-07, "loss": 1.6758, "step": 89245 }, { "epoch": 2.97, "grad_norm": 0.693813145160675, "learning_rate": 1.557769187249369e-07, "loss": 1.6969, "step": 89246 }, { "epoch": 2.97, "grad_norm": 0.7033001780509949, "learning_rate": 1.554403149008898e-07, "loss": 1.7209, "step": 89247 }, { "epoch": 2.97, "grad_norm": 0.7189276814460754, "learning_rate": 1.5510407504393252e-07, "loss": 1.6322, "step": 89248 }, { "epoch": 2.97, "grad_norm": 0.7181630730628967, "learning_rate": 1.54768199154498e-07, "loss": 1.7184, "step": 89249 }, { "epoch": 2.97, "grad_norm": 0.7334738373756409, "learning_rate": 1.5443268723298596e-07, "loss": 1.7097, "step": 89250 }, { "epoch": 2.97, "grad_norm": 0.7154043912887573, "learning_rate": 1.5409753927982938e-07, "loss": 1.6868, "step": 89251 }, { "epoch": 2.97, "grad_norm": 0.7207633256912231, "learning_rate": 1.5376275529542792e-07, "loss": 1.6969, "step": 89252 }, { "epoch": 2.97, "grad_norm": 0.6919594407081604, "learning_rate": 1.5342833528014798e-07, "loss": 1.6527, "step": 89253 }, { "epoch": 2.97, "grad_norm": 0.7327781319618225, "learning_rate": 1.5309427923445582e-07, "loss": 1.7233, "step": 89254 }, { "epoch": 2.97, "grad_norm": 0.7241110801696777, "learning_rate": 1.5276058715871787e-07, "loss": 1.5907, "step": 89255 }, { "epoch": 2.97, "grad_norm": 0.7208619117736816, "learning_rate": 1.5242725905333375e-07, "loss": 1.6983, "step": 89256 }, { "epoch": 2.97, "grad_norm": 0.7084715366363525, "learning_rate": 1.5209429491873648e-07, "loss": 1.6448, "step": 89257 }, { "epoch": 2.97, "grad_norm": 0.7180132269859314, "learning_rate": 1.5176169475532573e-07, "loss": 1.6729, "step": 89258 }, { "epoch": 2.97, "grad_norm": 0.7068860530853271, "learning_rate": 1.5142945856346788e-07, "loss": 1.6807, "step": 89259 }, { "epoch": 2.97, "grad_norm": 0.7241700291633606, "learning_rate": 1.5109758634359593e-07, "loss": 1.7127, "step": 89260 }, { "epoch": 2.97, "grad_norm": 0.7123303413391113, "learning_rate": 1.5076607809614283e-07, "loss": 1.7116, "step": 89261 }, { "epoch": 2.97, "grad_norm": 0.705880880355835, "learning_rate": 1.5043493382144168e-07, "loss": 1.7322, "step": 89262 }, { "epoch": 2.97, "grad_norm": 0.721798300743103, "learning_rate": 1.5010415351995875e-07, "loss": 1.6654, "step": 89263 }, { "epoch": 2.97, "grad_norm": 0.7013983130455017, "learning_rate": 1.4977373719206043e-07, "loss": 1.6183, "step": 89264 }, { "epoch": 2.97, "grad_norm": 0.7063929438591003, "learning_rate": 1.494436848381464e-07, "loss": 1.6721, "step": 89265 }, { "epoch": 2.97, "grad_norm": 0.7238739728927612, "learning_rate": 1.4911399645864963e-07, "loss": 1.7185, "step": 89266 }, { "epoch": 2.97, "grad_norm": 0.7160861492156982, "learning_rate": 1.487846720539365e-07, "loss": 1.6455, "step": 89267 }, { "epoch": 2.97, "grad_norm": 0.7003065347671509, "learning_rate": 1.4845571162440672e-07, "loss": 1.6974, "step": 89268 }, { "epoch": 2.97, "grad_norm": 0.7139933109283447, "learning_rate": 1.4812711517049326e-07, "loss": 1.6166, "step": 89269 }, { "epoch": 2.97, "grad_norm": 0.7301734089851379, "learning_rate": 1.4779888269252914e-07, "loss": 1.6737, "step": 89270 }, { "epoch": 2.97, "grad_norm": 0.7204110622406006, "learning_rate": 1.4747101419101404e-07, "loss": 1.7164, "step": 89271 }, { "epoch": 2.97, "grad_norm": 0.709690511226654, "learning_rate": 1.4714350966624766e-07, "loss": 1.6776, "step": 89272 }, { "epoch": 2.97, "grad_norm": 0.7166682481765747, "learning_rate": 1.4681636911866301e-07, "loss": 1.6468, "step": 89273 }, { "epoch": 2.97, "grad_norm": 0.7098791599273682, "learning_rate": 1.4648959254869308e-07, "loss": 1.6766, "step": 89274 }, { "epoch": 2.97, "grad_norm": 0.7035363912582397, "learning_rate": 1.4616317995667093e-07, "loss": 1.6562, "step": 89275 }, { "epoch": 2.97, "grad_norm": 0.7099013924598694, "learning_rate": 1.4583713134306286e-07, "loss": 1.7315, "step": 89276 }, { "epoch": 2.97, "grad_norm": 0.6797789931297302, "learning_rate": 1.455114467082019e-07, "loss": 1.7204, "step": 89277 }, { "epoch": 2.97, "grad_norm": 0.7173094749450684, "learning_rate": 1.4518612605252112e-07, "loss": 1.6585, "step": 89278 }, { "epoch": 2.97, "grad_norm": 0.689169704914093, "learning_rate": 1.448611693763868e-07, "loss": 1.679, "step": 89279 }, { "epoch": 2.97, "grad_norm": 0.7029163241386414, "learning_rate": 1.4453657668019868e-07, "loss": 1.6137, "step": 89280 }, { "epoch": 2.97, "grad_norm": 0.7012061476707458, "learning_rate": 1.4421234796438974e-07, "loss": 1.6702, "step": 89281 }, { "epoch": 2.97, "grad_norm": 0.7354585528373718, "learning_rate": 1.4388848322932634e-07, "loss": 1.7076, "step": 89282 }, { "epoch": 2.97, "grad_norm": 0.7109313607215881, "learning_rate": 1.4356498247537484e-07, "loss": 1.6888, "step": 89283 }, { "epoch": 2.97, "grad_norm": 0.7500882744789124, "learning_rate": 1.4324184570296826e-07, "loss": 1.6804, "step": 89284 }, { "epoch": 2.97, "grad_norm": 0.7316304445266724, "learning_rate": 1.4291907291250627e-07, "loss": 1.6596, "step": 89285 }, { "epoch": 2.97, "grad_norm": 0.7267947196960449, "learning_rate": 1.4259666410432192e-07, "loss": 1.6825, "step": 89286 }, { "epoch": 2.97, "grad_norm": 0.6997333765029907, "learning_rate": 1.422746192788815e-07, "loss": 1.5987, "step": 89287 }, { "epoch": 2.97, "grad_norm": 0.7194944620132446, "learning_rate": 1.419529384364848e-07, "loss": 1.6632, "step": 89288 }, { "epoch": 2.97, "grad_norm": 0.7407404780387878, "learning_rate": 1.4163162157763143e-07, "loss": 1.7656, "step": 89289 }, { "epoch": 2.97, "grad_norm": 0.7291738986968994, "learning_rate": 1.4131066870262108e-07, "loss": 1.6808, "step": 89290 }, { "epoch": 2.97, "grad_norm": 0.7188913226127625, "learning_rate": 1.409900798118868e-07, "loss": 1.7625, "step": 89291 }, { "epoch": 2.97, "grad_norm": 0.7013539671897888, "learning_rate": 1.4066985490579496e-07, "loss": 1.6204, "step": 89292 }, { "epoch": 2.97, "grad_norm": 0.7114933729171753, "learning_rate": 1.4034999398477854e-07, "loss": 1.6657, "step": 89293 }, { "epoch": 2.97, "grad_norm": 0.684369683265686, "learning_rate": 1.4003049704917057e-07, "loss": 1.6221, "step": 89294 }, { "epoch": 2.97, "grad_norm": 0.7147894501686096, "learning_rate": 1.3971136409937078e-07, "loss": 1.6938, "step": 89295 }, { "epoch": 2.97, "grad_norm": 0.7107553482055664, "learning_rate": 1.3939259513581215e-07, "loss": 1.6624, "step": 89296 }, { "epoch": 2.97, "grad_norm": 0.725799560546875, "learning_rate": 1.390741901588277e-07, "loss": 1.7268, "step": 89297 }, { "epoch": 2.97, "grad_norm": 0.716264545917511, "learning_rate": 1.3875614916885047e-07, "loss": 1.7107, "step": 89298 }, { "epoch": 2.97, "grad_norm": 0.6992454528808594, "learning_rate": 1.3843847216621352e-07, "loss": 1.7704, "step": 89299 }, { "epoch": 2.97, "grad_norm": 0.7497986555099487, "learning_rate": 1.381211591513165e-07, "loss": 1.6871, "step": 89300 }, { "epoch": 2.97, "grad_norm": 0.7044886350631714, "learning_rate": 1.378042101245924e-07, "loss": 1.7271, "step": 89301 }, { "epoch": 2.97, "grad_norm": 0.731690526008606, "learning_rate": 1.3748762508637434e-07, "loss": 1.683, "step": 89302 }, { "epoch": 2.97, "grad_norm": 0.695506751537323, "learning_rate": 1.3717140403706194e-07, "loss": 1.6697, "step": 89303 }, { "epoch": 2.97, "grad_norm": 0.7467887997627258, "learning_rate": 1.3685554697705493e-07, "loss": 1.7559, "step": 89304 }, { "epoch": 2.97, "grad_norm": 0.7195727825164795, "learning_rate": 1.3654005390671964e-07, "loss": 1.6083, "step": 89305 }, { "epoch": 2.97, "grad_norm": 0.7045428156852722, "learning_rate": 1.3622492482642244e-07, "loss": 1.6424, "step": 89306 }, { "epoch": 2.97, "grad_norm": 0.7394439578056335, "learning_rate": 1.3591015973659637e-07, "loss": 1.7235, "step": 89307 }, { "epoch": 2.97, "grad_norm": 0.7431936264038086, "learning_rate": 1.3559575863757445e-07, "loss": 1.6943, "step": 89308 }, { "epoch": 2.97, "grad_norm": 0.7279718518257141, "learning_rate": 1.3528172152975635e-07, "loss": 1.6047, "step": 89309 }, { "epoch": 2.97, "grad_norm": 0.7484533190727234, "learning_rate": 1.349680484135085e-07, "loss": 1.6597, "step": 89310 }, { "epoch": 2.97, "grad_norm": 0.7056964635848999, "learning_rate": 1.3465473928923053e-07, "loss": 1.6585, "step": 89311 }, { "epoch": 2.97, "grad_norm": 0.7089388370513916, "learning_rate": 1.3434179415732215e-07, "loss": 1.636, "step": 89312 }, { "epoch": 2.97, "grad_norm": 0.7147250175476074, "learning_rate": 1.3402921301811641e-07, "loss": 1.638, "step": 89313 }, { "epoch": 2.97, "grad_norm": 0.7010677456855774, "learning_rate": 1.3371699587204632e-07, "loss": 1.6965, "step": 89314 }, { "epoch": 2.97, "grad_norm": 0.7252724170684814, "learning_rate": 1.334051427194449e-07, "loss": 1.6758, "step": 89315 }, { "epoch": 2.97, "grad_norm": 0.7248331904411316, "learning_rate": 1.3309365356067858e-07, "loss": 1.7472, "step": 89316 }, { "epoch": 2.97, "grad_norm": 0.719531238079071, "learning_rate": 1.327825283961803e-07, "loss": 1.7272, "step": 89317 }, { "epoch": 2.97, "grad_norm": 0.7031083703041077, "learning_rate": 1.3247176722631648e-07, "loss": 1.7105, "step": 89318 }, { "epoch": 2.97, "grad_norm": 0.7201492786407471, "learning_rate": 1.3216137005142015e-07, "loss": 1.7124, "step": 89319 }, { "epoch": 2.97, "grad_norm": 0.6976636052131653, "learning_rate": 1.31851336871891e-07, "loss": 1.675, "step": 89320 }, { "epoch": 2.97, "grad_norm": 0.7211883068084717, "learning_rate": 1.3154166768812868e-07, "loss": 1.6981, "step": 89321 }, { "epoch": 2.97, "grad_norm": 0.7290927171707153, "learning_rate": 1.3123236250049963e-07, "loss": 1.679, "step": 89322 }, { "epoch": 2.97, "grad_norm": 0.694450855255127, "learning_rate": 1.3092342130937016e-07, "loss": 1.6791, "step": 89323 }, { "epoch": 2.97, "grad_norm": 0.7234998345375061, "learning_rate": 1.306148441151067e-07, "loss": 1.605, "step": 89324 }, { "epoch": 2.97, "grad_norm": 0.7064207196235657, "learning_rate": 1.3030663091807557e-07, "loss": 1.7119, "step": 89325 }, { "epoch": 2.97, "grad_norm": 0.7153577208518982, "learning_rate": 1.299987817186765e-07, "loss": 1.6878, "step": 89326 }, { "epoch": 2.97, "grad_norm": 0.7333077192306519, "learning_rate": 1.2969129651727583e-07, "loss": 1.7056, "step": 89327 }, { "epoch": 2.97, "grad_norm": 0.6888773441314697, "learning_rate": 1.2938417531427325e-07, "loss": 1.6753, "step": 89328 }, { "epoch": 2.97, "grad_norm": 0.7128931283950806, "learning_rate": 1.2907741811000183e-07, "loss": 1.6784, "step": 89329 }, { "epoch": 2.97, "grad_norm": 0.7068431973457336, "learning_rate": 1.2877102490482793e-07, "loss": 1.5808, "step": 89330 }, { "epoch": 2.97, "grad_norm": 0.7201963067054749, "learning_rate": 1.2846499569915125e-07, "loss": 1.767, "step": 89331 }, { "epoch": 2.97, "grad_norm": 0.7267277240753174, "learning_rate": 1.2815933049333815e-07, "loss": 1.683, "step": 89332 }, { "epoch": 2.97, "grad_norm": 0.7140545845031738, "learning_rate": 1.27854029287755e-07, "loss": 1.6731, "step": 89333 }, { "epoch": 2.97, "grad_norm": 0.7064712643623352, "learning_rate": 1.275490920827682e-07, "loss": 1.6386, "step": 89334 }, { "epoch": 2.97, "grad_norm": 0.6865198612213135, "learning_rate": 1.2724451887874408e-07, "loss": 1.64, "step": 89335 }, { "epoch": 2.97, "grad_norm": 0.708393394947052, "learning_rate": 1.2694030967608238e-07, "loss": 1.6589, "step": 89336 }, { "epoch": 2.97, "grad_norm": 0.7165075540542603, "learning_rate": 1.2663646447511612e-07, "loss": 1.7445, "step": 89337 }, { "epoch": 2.97, "grad_norm": 0.6953901052474976, "learning_rate": 1.2633298327624496e-07, "loss": 1.6403, "step": 89338 }, { "epoch": 2.97, "grad_norm": 0.7026835680007935, "learning_rate": 1.2602986607980203e-07, "loss": 1.7057, "step": 89339 }, { "epoch": 2.97, "grad_norm": 0.7146754860877991, "learning_rate": 1.2572711288618697e-07, "loss": 1.6654, "step": 89340 }, { "epoch": 2.97, "grad_norm": 0.7216092348098755, "learning_rate": 1.2542472369576618e-07, "loss": 1.6231, "step": 89341 }, { "epoch": 2.97, "grad_norm": 0.7018845081329346, "learning_rate": 1.251226985088727e-07, "loss": 1.6435, "step": 89342 }, { "epoch": 2.97, "grad_norm": 0.7178841233253479, "learning_rate": 1.248210373259062e-07, "loss": 1.7411, "step": 89343 }, { "epoch": 2.97, "grad_norm": 0.6940580606460571, "learning_rate": 1.2451974014723308e-07, "loss": 1.6587, "step": 89344 }, { "epoch": 2.97, "grad_norm": 0.7121627926826477, "learning_rate": 1.2421880697318643e-07, "loss": 1.5924, "step": 89345 }, { "epoch": 2.97, "grad_norm": 0.7438734173774719, "learning_rate": 1.2391823780416588e-07, "loss": 1.752, "step": 89346 }, { "epoch": 2.97, "grad_norm": 0.7131689190864563, "learning_rate": 1.2361803264050453e-07, "loss": 1.6639, "step": 89347 }, { "epoch": 2.97, "grad_norm": 0.6997666954994202, "learning_rate": 1.2331819148260202e-07, "loss": 1.6403, "step": 89348 }, { "epoch": 2.97, "grad_norm": 0.7144688367843628, "learning_rate": 1.2301871433079148e-07, "loss": 1.772, "step": 89349 }, { "epoch": 2.97, "grad_norm": 0.7238982319831848, "learning_rate": 1.2271960118547254e-07, "loss": 1.6667, "step": 89350 }, { "epoch": 2.97, "grad_norm": 0.7139837145805359, "learning_rate": 1.2242085204697827e-07, "loss": 1.6994, "step": 89351 }, { "epoch": 2.97, "grad_norm": 0.7036874294281006, "learning_rate": 1.2212246691570838e-07, "loss": 1.6847, "step": 89352 }, { "epoch": 2.97, "grad_norm": 0.7051188945770264, "learning_rate": 1.218244457919626e-07, "loss": 1.657, "step": 89353 }, { "epoch": 2.97, "grad_norm": 0.7000240683555603, "learning_rate": 1.2152678867614063e-07, "loss": 1.6222, "step": 89354 }, { "epoch": 2.97, "grad_norm": 0.7362019419670105, "learning_rate": 1.2122949556860883e-07, "loss": 1.6365, "step": 89355 }, { "epoch": 2.97, "grad_norm": 0.7435840368270874, "learning_rate": 1.2093256646970029e-07, "loss": 1.7043, "step": 89356 }, { "epoch": 2.97, "grad_norm": 0.7115821242332458, "learning_rate": 1.2063600137981466e-07, "loss": 1.7554, "step": 89357 }, { "epoch": 2.97, "grad_norm": 0.7059130668640137, "learning_rate": 1.2033980029928503e-07, "loss": 1.5798, "step": 89358 }, { "epoch": 2.97, "grad_norm": 0.7038042545318604, "learning_rate": 1.2004396322847775e-07, "loss": 1.6354, "step": 89359 }, { "epoch": 2.97, "grad_norm": 0.7128820419311523, "learning_rate": 1.1974849016775921e-07, "loss": 1.637, "step": 89360 }, { "epoch": 2.97, "grad_norm": 0.7440701723098755, "learning_rate": 1.194533811174625e-07, "loss": 1.6742, "step": 89361 }, { "epoch": 2.97, "grad_norm": 0.6902424693107605, "learning_rate": 1.1915863607798725e-07, "loss": 1.6478, "step": 89362 }, { "epoch": 2.97, "grad_norm": 0.7140523195266724, "learning_rate": 1.1886425504966657e-07, "loss": 1.6347, "step": 89363 }, { "epoch": 2.97, "grad_norm": 0.7095884680747986, "learning_rate": 1.185702380328335e-07, "loss": 1.7817, "step": 89364 }, { "epoch": 2.97, "grad_norm": 0.6903372406959534, "learning_rate": 1.1827658502788772e-07, "loss": 1.6854, "step": 89365 }, { "epoch": 2.97, "grad_norm": 0.6908970475196838, "learning_rate": 1.1798329603516232e-07, "loss": 1.6819, "step": 89366 }, { "epoch": 2.97, "grad_norm": 0.71285480260849, "learning_rate": 1.1769037105502366e-07, "loss": 1.6508, "step": 89367 }, { "epoch": 2.97, "grad_norm": 0.7084619998931885, "learning_rate": 1.1739781008783811e-07, "loss": 1.6688, "step": 89368 }, { "epoch": 2.97, "grad_norm": 0.7237693667411804, "learning_rate": 1.1710561313393874e-07, "loss": 1.6259, "step": 89369 }, { "epoch": 2.97, "grad_norm": 0.706594705581665, "learning_rate": 1.1681378019369192e-07, "loss": 1.7021, "step": 89370 }, { "epoch": 2.97, "grad_norm": 0.7254645824432373, "learning_rate": 1.1652231126743073e-07, "loss": 1.7056, "step": 89371 }, { "epoch": 2.97, "grad_norm": 0.7058889865875244, "learning_rate": 1.1623120635552152e-07, "loss": 1.6363, "step": 89372 }, { "epoch": 2.97, "grad_norm": 0.7279918789863586, "learning_rate": 1.1594046545833069e-07, "loss": 1.6832, "step": 89373 }, { "epoch": 2.97, "grad_norm": 0.7066236734390259, "learning_rate": 1.156500885762246e-07, "loss": 1.613, "step": 89374 }, { "epoch": 2.97, "grad_norm": 0.7146978378295898, "learning_rate": 1.15360075709503e-07, "loss": 1.6882, "step": 89375 }, { "epoch": 2.97, "grad_norm": 0.6915406584739685, "learning_rate": 1.150704268585656e-07, "loss": 1.7244, "step": 89376 }, { "epoch": 2.97, "grad_norm": 0.7143644690513611, "learning_rate": 1.1478114202374544e-07, "loss": 1.6933, "step": 89377 }, { "epoch": 2.97, "grad_norm": 0.6981685757637024, "learning_rate": 1.144922212053756e-07, "loss": 1.6735, "step": 89378 }, { "epoch": 2.97, "grad_norm": 0.7101819515228271, "learning_rate": 1.1420366440385576e-07, "loss": 1.6544, "step": 89379 }, { "epoch": 2.97, "grad_norm": 0.703381359577179, "learning_rate": 1.1391547161948567e-07, "loss": 1.6335, "step": 89380 }, { "epoch": 2.97, "grad_norm": 0.7012556195259094, "learning_rate": 1.136276428526317e-07, "loss": 1.6832, "step": 89381 }, { "epoch": 2.97, "grad_norm": 0.7242064476013184, "learning_rate": 1.1334017810366025e-07, "loss": 1.7348, "step": 89382 }, { "epoch": 2.97, "grad_norm": 0.7290605902671814, "learning_rate": 1.1305307737290436e-07, "loss": 1.6445, "step": 89383 }, { "epoch": 2.97, "grad_norm": 0.7238924503326416, "learning_rate": 1.1276634066069712e-07, "loss": 1.7375, "step": 89384 }, { "epoch": 2.97, "grad_norm": 0.7309536933898926, "learning_rate": 1.1247996796743819e-07, "loss": 1.6853, "step": 89385 }, { "epoch": 2.97, "grad_norm": 0.7194295525550842, "learning_rate": 1.1219395929342734e-07, "loss": 1.6609, "step": 89386 }, { "epoch": 2.97, "grad_norm": 0.7037970423698425, "learning_rate": 1.1190831463903094e-07, "loss": 1.7072, "step": 89387 }, { "epoch": 2.97, "grad_norm": 0.7241553068161011, "learning_rate": 1.1162303400458206e-07, "loss": 1.652, "step": 89388 }, { "epoch": 2.97, "grad_norm": 0.6894849538803101, "learning_rate": 1.1133811739044706e-07, "loss": 1.6831, "step": 89389 }, { "epoch": 2.97, "grad_norm": 0.7112880349159241, "learning_rate": 1.1105356479695904e-07, "loss": 1.694, "step": 89390 }, { "epoch": 2.97, "grad_norm": 0.7144472599029541, "learning_rate": 1.1076937622445103e-07, "loss": 1.6708, "step": 89391 }, { "epoch": 2.97, "grad_norm": 0.7195826768875122, "learning_rate": 1.1048555167328943e-07, "loss": 1.676, "step": 89392 }, { "epoch": 2.97, "grad_norm": 0.7069827318191528, "learning_rate": 1.1020209114380729e-07, "loss": 1.6858, "step": 89393 }, { "epoch": 2.97, "grad_norm": 0.7112785577774048, "learning_rate": 1.09918994636371e-07, "loss": 1.688, "step": 89394 }, { "epoch": 2.97, "grad_norm": 0.7148208022117615, "learning_rate": 1.0963626215131361e-07, "loss": 1.6423, "step": 89395 }, { "epoch": 2.97, "grad_norm": 0.7043128609657288, "learning_rate": 1.0935389368893488e-07, "loss": 1.6538, "step": 89396 }, { "epoch": 2.97, "grad_norm": 0.7387744188308716, "learning_rate": 1.090718892496345e-07, "loss": 1.7004, "step": 89397 }, { "epoch": 2.97, "grad_norm": 0.7265222072601318, "learning_rate": 1.0879024883374554e-07, "loss": 1.6806, "step": 89398 }, { "epoch": 2.97, "grad_norm": 0.698046863079071, "learning_rate": 1.0850897244156775e-07, "loss": 1.6151, "step": 89399 }, { "epoch": 2.97, "grad_norm": 0.7146203517913818, "learning_rate": 1.0822806007350082e-07, "loss": 1.7012, "step": 89400 }, { "epoch": 2.97, "grad_norm": 0.7421941757202148, "learning_rate": 1.079475117298445e-07, "loss": 1.7198, "step": 89401 }, { "epoch": 2.97, "grad_norm": 0.688580334186554, "learning_rate": 1.0766732741096518e-07, "loss": 1.6426, "step": 89402 }, { "epoch": 2.97, "grad_norm": 0.6963129639625549, "learning_rate": 1.073875071171626e-07, "loss": 1.6302, "step": 89403 }, { "epoch": 2.97, "grad_norm": 0.7031875848770142, "learning_rate": 1.0710805084883644e-07, "loss": 1.728, "step": 89404 }, { "epoch": 2.97, "grad_norm": 0.7237538695335388, "learning_rate": 1.0682895860628648e-07, "loss": 1.6316, "step": 89405 }, { "epoch": 2.97, "grad_norm": 0.7374908328056335, "learning_rate": 1.0655023038984579e-07, "loss": 1.6666, "step": 89406 }, { "epoch": 2.97, "grad_norm": 0.7091728448867798, "learning_rate": 1.0627186619988071e-07, "loss": 1.6975, "step": 89407 }, { "epoch": 2.97, "grad_norm": 0.7145042419433594, "learning_rate": 1.0599386603669103e-07, "loss": 1.6845, "step": 89408 }, { "epoch": 2.97, "grad_norm": 0.7005380988121033, "learning_rate": 1.057162299006764e-07, "loss": 1.6663, "step": 89409 }, { "epoch": 2.97, "grad_norm": 0.7148807644844055, "learning_rate": 1.0543895779210332e-07, "loss": 1.6418, "step": 89410 }, { "epoch": 2.97, "grad_norm": 0.7112060785293579, "learning_rate": 1.0516204971133813e-07, "loss": 1.6936, "step": 89411 }, { "epoch": 2.97, "grad_norm": 0.7347946166992188, "learning_rate": 1.048855056587472e-07, "loss": 1.6652, "step": 89412 }, { "epoch": 2.97, "grad_norm": 0.7133280038833618, "learning_rate": 1.0460932563459701e-07, "loss": 1.6851, "step": 89413 }, { "epoch": 2.97, "grad_norm": 0.6944757699966431, "learning_rate": 1.0433350963928722e-07, "loss": 1.6813, "step": 89414 }, { "epoch": 2.97, "grad_norm": 0.7143931984901428, "learning_rate": 1.040580576731509e-07, "loss": 1.6468, "step": 89415 }, { "epoch": 2.97, "grad_norm": 0.7009772062301636, "learning_rate": 1.0378296973645451e-07, "loss": 1.6721, "step": 89416 }, { "epoch": 2.97, "grad_norm": 0.7215220928192139, "learning_rate": 1.0350824582963102e-07, "loss": 1.6659, "step": 89417 }, { "epoch": 2.97, "grad_norm": 0.7006799578666687, "learning_rate": 1.032338859529136e-07, "loss": 1.6678, "step": 89418 }, { "epoch": 2.97, "grad_norm": 0.723449170589447, "learning_rate": 1.0295989010670192e-07, "loss": 1.7031, "step": 89419 }, { "epoch": 2.98, "grad_norm": 0.6950380206108093, "learning_rate": 1.0268625829129574e-07, "loss": 1.642, "step": 89420 }, { "epoch": 2.98, "grad_norm": 0.7235198020935059, "learning_rate": 1.0241299050706142e-07, "loss": 1.7161, "step": 89421 }, { "epoch": 2.98, "grad_norm": 0.7022227048873901, "learning_rate": 1.0214008675429874e-07, "loss": 1.6472, "step": 89422 }, { "epoch": 2.98, "grad_norm": 0.7238948345184326, "learning_rate": 1.0186754703337407e-07, "loss": 1.6814, "step": 89423 }, { "epoch": 2.98, "grad_norm": 0.7118136882781982, "learning_rate": 1.0159537134455387e-07, "loss": 1.7204, "step": 89424 }, { "epoch": 2.98, "grad_norm": 0.728948712348938, "learning_rate": 1.013235596882378e-07, "loss": 1.7051, "step": 89425 }, { "epoch": 2.98, "grad_norm": 0.7117724418640137, "learning_rate": 1.0105211206472563e-07, "loss": 1.6491, "step": 89426 }, { "epoch": 2.98, "grad_norm": 0.7064178586006165, "learning_rate": 1.0078102847431713e-07, "loss": 1.6637, "step": 89427 }, { "epoch": 2.98, "grad_norm": 0.7273569107055664, "learning_rate": 1.0051030891741196e-07, "loss": 1.7213, "step": 89428 }, { "epoch": 2.98, "grad_norm": 0.7175732254981995, "learning_rate": 1.002399533942766e-07, "loss": 1.7606, "step": 89429 }, { "epoch": 2.98, "grad_norm": 0.702130913734436, "learning_rate": 9.99699619052774e-08, "loss": 1.7337, "step": 89430 }, { "epoch": 2.98, "grad_norm": 0.7259854078292847, "learning_rate": 9.970033445071413e-08, "loss": 1.6927, "step": 89431 }, { "epoch": 2.98, "grad_norm": 0.6922032237052917, "learning_rate": 9.943107103091986e-08, "loss": 1.6341, "step": 89432 }, { "epoch": 2.98, "grad_norm": 0.7003270983695984, "learning_rate": 9.916217164622764e-08, "loss": 1.6724, "step": 89433 }, { "epoch": 2.98, "grad_norm": 0.7157498598098755, "learning_rate": 9.889363629697056e-08, "loss": 1.643, "step": 89434 }, { "epoch": 2.98, "grad_norm": 0.7329017519950867, "learning_rate": 9.862546498348167e-08, "loss": 1.7319, "step": 89435 }, { "epoch": 2.98, "grad_norm": 0.7183963656425476, "learning_rate": 9.835765770606075e-08, "loss": 1.6798, "step": 89436 }, { "epoch": 2.98, "grad_norm": 0.7048867344856262, "learning_rate": 9.809021446504084e-08, "loss": 1.6465, "step": 89437 }, { "epoch": 2.98, "grad_norm": 0.7121943831443787, "learning_rate": 9.782313526075503e-08, "loss": 1.6188, "step": 89438 }, { "epoch": 2.98, "grad_norm": 0.6974303722381592, "learning_rate": 9.755642009353637e-08, "loss": 1.7074, "step": 89439 }, { "epoch": 2.98, "grad_norm": 0.7514863610267639, "learning_rate": 9.729006896368463e-08, "loss": 1.7335, "step": 89440 }, { "epoch": 2.98, "grad_norm": 0.7036253809928894, "learning_rate": 9.702408187153287e-08, "loss": 1.68, "step": 89441 }, { "epoch": 2.98, "grad_norm": 0.7250053882598877, "learning_rate": 9.675845881741417e-08, "loss": 1.6188, "step": 89442 }, { "epoch": 2.98, "grad_norm": 0.7108876705169678, "learning_rate": 9.649319980162829e-08, "loss": 1.6373, "step": 89443 }, { "epoch": 2.98, "grad_norm": 0.7182340621948242, "learning_rate": 9.622830482450827e-08, "loss": 1.6724, "step": 89444 }, { "epoch": 2.98, "grad_norm": 0.7247309684753418, "learning_rate": 9.59637738863872e-08, "loss": 1.7462, "step": 89445 }, { "epoch": 2.98, "grad_norm": 0.7264664769172668, "learning_rate": 9.569960698759815e-08, "loss": 1.6842, "step": 89446 }, { "epoch": 2.98, "grad_norm": 0.6881443858146667, "learning_rate": 9.543580412840757e-08, "loss": 1.6347, "step": 89447 }, { "epoch": 2.98, "grad_norm": 0.7049579620361328, "learning_rate": 9.517236530918182e-08, "loss": 1.7052, "step": 89448 }, { "epoch": 2.98, "grad_norm": 0.7107298374176025, "learning_rate": 9.490929053022067e-08, "loss": 1.696, "step": 89449 }, { "epoch": 2.98, "grad_norm": 0.725871205329895, "learning_rate": 9.464657979185719e-08, "loss": 1.7611, "step": 89450 }, { "epoch": 2.98, "grad_norm": 0.7220050096511841, "learning_rate": 9.438423309442445e-08, "loss": 1.691, "step": 89451 }, { "epoch": 2.98, "grad_norm": 0.6872537136077881, "learning_rate": 9.412225043818888e-08, "loss": 1.6748, "step": 89452 }, { "epoch": 2.98, "grad_norm": 0.7192925810813904, "learning_rate": 9.386063182351688e-08, "loss": 1.6945, "step": 89453 }, { "epoch": 2.98, "grad_norm": 0.7036410570144653, "learning_rate": 9.35993772506749e-08, "loss": 1.6764, "step": 89454 }, { "epoch": 2.98, "grad_norm": 0.7203681468963623, "learning_rate": 9.333848672006262e-08, "loss": 1.75, "step": 89455 }, { "epoch": 2.98, "grad_norm": 0.7118709683418274, "learning_rate": 9.307796023191316e-08, "loss": 1.7015, "step": 89456 }, { "epoch": 2.98, "grad_norm": 1.284140706062317, "learning_rate": 9.281779778659293e-08, "loss": 1.688, "step": 89457 }, { "epoch": 2.98, "grad_norm": 0.740011990070343, "learning_rate": 9.255799938440167e-08, "loss": 1.7096, "step": 89458 }, { "epoch": 2.98, "grad_norm": 0.7043868899345398, "learning_rate": 9.229856502567245e-08, "loss": 1.6778, "step": 89459 }, { "epoch": 2.98, "grad_norm": 0.7064666748046875, "learning_rate": 9.203949471067173e-08, "loss": 1.6438, "step": 89460 }, { "epoch": 2.98, "grad_norm": 0.7225857973098755, "learning_rate": 9.178078843976588e-08, "loss": 1.6758, "step": 89461 }, { "epoch": 2.98, "grad_norm": 0.7093683481216431, "learning_rate": 9.152244621325466e-08, "loss": 1.6717, "step": 89462 }, { "epoch": 2.98, "grad_norm": 0.7226139307022095, "learning_rate": 9.126446803143783e-08, "loss": 1.6698, "step": 89463 }, { "epoch": 2.98, "grad_norm": 0.7104567885398865, "learning_rate": 9.100685389461515e-08, "loss": 1.6965, "step": 89464 }, { "epoch": 2.98, "grad_norm": 0.6880026459693909, "learning_rate": 9.074960380315299e-08, "loss": 1.6172, "step": 89465 }, { "epoch": 2.98, "grad_norm": 0.7268453240394592, "learning_rate": 9.04927177573178e-08, "loss": 1.7269, "step": 89466 }, { "epoch": 2.98, "grad_norm": 0.7197022438049316, "learning_rate": 9.023619575744268e-08, "loss": 1.6624, "step": 89467 }, { "epoch": 2.98, "grad_norm": 0.6932232975959778, "learning_rate": 8.998003780382735e-08, "loss": 1.6787, "step": 89468 }, { "epoch": 2.98, "grad_norm": 0.7054702639579773, "learning_rate": 8.972424389677158e-08, "loss": 1.7307, "step": 89469 }, { "epoch": 2.98, "grad_norm": 0.7092863917350769, "learning_rate": 8.946881403660844e-08, "loss": 1.7244, "step": 89470 }, { "epoch": 2.98, "grad_norm": 0.7246730923652649, "learning_rate": 8.92137482236377e-08, "loss": 1.6447, "step": 89471 }, { "epoch": 2.98, "grad_norm": 0.7170482873916626, "learning_rate": 8.89590464581924e-08, "loss": 1.6788, "step": 89472 }, { "epoch": 2.98, "grad_norm": 0.764376699924469, "learning_rate": 8.870470874053903e-08, "loss": 1.699, "step": 89473 }, { "epoch": 2.98, "grad_norm": 0.7107927203178406, "learning_rate": 8.845073507101063e-08, "loss": 1.6696, "step": 89474 }, { "epoch": 2.98, "grad_norm": 0.7254685163497925, "learning_rate": 8.819712544990699e-08, "loss": 1.6414, "step": 89475 }, { "epoch": 2.98, "grad_norm": 0.6978160738945007, "learning_rate": 8.794387987756113e-08, "loss": 1.7161, "step": 89476 }, { "epoch": 2.98, "grad_norm": 0.7266972064971924, "learning_rate": 8.769099835427285e-08, "loss": 1.7207, "step": 89477 }, { "epoch": 2.98, "grad_norm": 0.7242863774299622, "learning_rate": 8.743848088030858e-08, "loss": 1.6939, "step": 89478 }, { "epoch": 2.98, "grad_norm": 0.7016141414642334, "learning_rate": 8.71863274560014e-08, "loss": 1.7043, "step": 89479 }, { "epoch": 2.98, "grad_norm": 0.6879832744598389, "learning_rate": 8.693453808168439e-08, "loss": 1.6185, "step": 89480 }, { "epoch": 2.98, "grad_norm": 0.7149257063865662, "learning_rate": 8.668311275762397e-08, "loss": 1.7593, "step": 89481 }, { "epoch": 2.98, "grad_norm": 0.716937780380249, "learning_rate": 8.643205148415322e-08, "loss": 1.6659, "step": 89482 }, { "epoch": 2.98, "grad_norm": 0.7242597937583923, "learning_rate": 8.618135426153861e-08, "loss": 1.6536, "step": 89483 }, { "epoch": 2.98, "grad_norm": 0.7065774202346802, "learning_rate": 8.593102109011319e-08, "loss": 1.6585, "step": 89484 }, { "epoch": 2.98, "grad_norm": 0.7156051397323608, "learning_rate": 8.568105197021002e-08, "loss": 1.6979, "step": 89485 }, { "epoch": 2.98, "grad_norm": 0.7497351765632629, "learning_rate": 8.543144690206228e-08, "loss": 1.7591, "step": 89486 }, { "epoch": 2.98, "grad_norm": 0.7305268049240112, "learning_rate": 8.51822058860363e-08, "loss": 1.7489, "step": 89487 }, { "epoch": 2.98, "grad_norm": 0.7112934589385986, "learning_rate": 8.493332892239857e-08, "loss": 1.6963, "step": 89488 }, { "epoch": 2.98, "grad_norm": 0.7234588265419006, "learning_rate": 8.468481601144883e-08, "loss": 1.681, "step": 89489 }, { "epoch": 2.98, "grad_norm": 0.7131369113922119, "learning_rate": 8.443666715352016e-08, "loss": 1.7722, "step": 89490 }, { "epoch": 2.98, "grad_norm": 0.7333066463470459, "learning_rate": 8.4188882348879e-08, "loss": 1.6022, "step": 89491 }, { "epoch": 2.98, "grad_norm": 0.7148618698120117, "learning_rate": 8.394146159785842e-08, "loss": 1.6962, "step": 89492 }, { "epoch": 2.98, "grad_norm": 0.7016778588294983, "learning_rate": 8.369440490075819e-08, "loss": 1.6867, "step": 89493 }, { "epoch": 2.98, "grad_norm": 0.7138075232505798, "learning_rate": 8.344771225781144e-08, "loss": 1.6719, "step": 89494 }, { "epoch": 2.98, "grad_norm": 0.7300087213516235, "learning_rate": 8.320138366941786e-08, "loss": 1.5783, "step": 89495 }, { "epoch": 2.98, "grad_norm": 0.734025776386261, "learning_rate": 8.29554191358106e-08, "loss": 1.662, "step": 89496 }, { "epoch": 2.98, "grad_norm": 0.7326563000679016, "learning_rate": 8.270981865728943e-08, "loss": 1.719, "step": 89497 }, { "epoch": 2.98, "grad_norm": 0.7273863554000854, "learning_rate": 8.246458223418739e-08, "loss": 1.7412, "step": 89498 }, { "epoch": 2.98, "grad_norm": 0.7309143543243408, "learning_rate": 8.221970986677095e-08, "loss": 1.6601, "step": 89499 }, { "epoch": 2.98, "grad_norm": 0.7033976912498474, "learning_rate": 8.197520155533987e-08, "loss": 1.6824, "step": 89500 }, { "epoch": 2.98, "grad_norm": 0.7262818217277527, "learning_rate": 8.173105730022722e-08, "loss": 1.6719, "step": 89501 }, { "epoch": 2.98, "grad_norm": 0.7216227650642395, "learning_rate": 8.148727710166614e-08, "loss": 1.6234, "step": 89502 }, { "epoch": 2.98, "grad_norm": 0.7008147239685059, "learning_rate": 8.1243860960023e-08, "loss": 1.726, "step": 89503 }, { "epoch": 2.98, "grad_norm": 0.6955161690711975, "learning_rate": 8.100080887553095e-08, "loss": 1.5836, "step": 89504 }, { "epoch": 2.98, "grad_norm": 0.7367697358131409, "learning_rate": 8.075812084855638e-08, "loss": 1.6356, "step": 89505 }, { "epoch": 2.98, "grad_norm": 0.7297234535217285, "learning_rate": 8.05157968792991e-08, "loss": 1.7562, "step": 89506 }, { "epoch": 2.98, "grad_norm": 0.7410033345222473, "learning_rate": 8.02738369681255e-08, "loss": 1.6827, "step": 89507 }, { "epoch": 2.98, "grad_norm": 0.6912130117416382, "learning_rate": 8.003224111530204e-08, "loss": 1.6751, "step": 89508 }, { "epoch": 2.98, "grad_norm": 0.7095595002174377, "learning_rate": 7.979100932116178e-08, "loss": 1.621, "step": 89509 }, { "epoch": 2.98, "grad_norm": 0.7144859433174133, "learning_rate": 7.955014158593786e-08, "loss": 1.7174, "step": 89510 }, { "epoch": 2.98, "grad_norm": 0.7242352366447449, "learning_rate": 7.930963790993006e-08, "loss": 1.735, "step": 89511 }, { "epoch": 2.98, "grad_norm": 0.702507495880127, "learning_rate": 7.906949829347142e-08, "loss": 1.6602, "step": 89512 }, { "epoch": 2.98, "grad_norm": 0.725846529006958, "learning_rate": 7.882972273682841e-08, "loss": 1.7204, "step": 89513 }, { "epoch": 2.98, "grad_norm": 0.738724946975708, "learning_rate": 7.859031124030079e-08, "loss": 1.7412, "step": 89514 }, { "epoch": 2.98, "grad_norm": 0.7225534915924072, "learning_rate": 7.835126380415501e-08, "loss": 1.7177, "step": 89515 }, { "epoch": 2.98, "grad_norm": 0.7113532423973083, "learning_rate": 7.811258042872414e-08, "loss": 1.6825, "step": 89516 }, { "epoch": 2.98, "grad_norm": 0.7039865255355835, "learning_rate": 7.787426111427464e-08, "loss": 1.6963, "step": 89517 }, { "epoch": 2.98, "grad_norm": 0.7310783863067627, "learning_rate": 7.763630586107294e-08, "loss": 1.6761, "step": 89518 }, { "epoch": 2.98, "grad_norm": 0.718311607837677, "learning_rate": 7.739871466941883e-08, "loss": 1.6395, "step": 89519 }, { "epoch": 2.98, "grad_norm": 0.6987720131874084, "learning_rate": 7.716148753964534e-08, "loss": 1.6135, "step": 89520 }, { "epoch": 2.98, "grad_norm": 0.7153143882751465, "learning_rate": 7.692462447198567e-08, "loss": 1.6814, "step": 89521 }, { "epoch": 2.98, "grad_norm": 0.7160747051239014, "learning_rate": 7.668812546673953e-08, "loss": 1.7246, "step": 89522 }, { "epoch": 2.98, "grad_norm": 0.7251052260398865, "learning_rate": 7.64519905242067e-08, "loss": 1.6787, "step": 89523 }, { "epoch": 2.98, "grad_norm": 0.761381208896637, "learning_rate": 7.621621964468694e-08, "loss": 1.6538, "step": 89524 }, { "epoch": 2.98, "grad_norm": 0.7082502841949463, "learning_rate": 7.59808128284467e-08, "loss": 1.6916, "step": 89525 }, { "epoch": 2.98, "grad_norm": 0.7125245928764343, "learning_rate": 7.574577007575244e-08, "loss": 1.635, "step": 89526 }, { "epoch": 2.98, "grad_norm": 0.7165546417236328, "learning_rate": 7.551109138693723e-08, "loss": 1.6081, "step": 89527 }, { "epoch": 2.98, "grad_norm": 0.7001973986625671, "learning_rate": 7.527677676226752e-08, "loss": 1.6649, "step": 89528 }, { "epoch": 2.98, "grad_norm": 0.7123634815216064, "learning_rate": 7.504282620197643e-08, "loss": 1.6742, "step": 89529 }, { "epoch": 2.98, "grad_norm": 0.7157856822013855, "learning_rate": 7.480923970643037e-08, "loss": 1.7639, "step": 89530 }, { "epoch": 2.98, "grad_norm": 0.7088088393211365, "learning_rate": 7.457601727586249e-08, "loss": 1.7067, "step": 89531 }, { "epoch": 2.98, "grad_norm": 0.7125083208084106, "learning_rate": 7.434315891053921e-08, "loss": 1.6695, "step": 89532 }, { "epoch": 2.98, "grad_norm": 0.6994816660881042, "learning_rate": 7.411066461079363e-08, "loss": 1.6981, "step": 89533 }, { "epoch": 2.98, "grad_norm": 0.7243650555610657, "learning_rate": 7.387853437689217e-08, "loss": 1.6405, "step": 89534 }, { "epoch": 2.98, "grad_norm": 0.7113857269287109, "learning_rate": 7.364676820913462e-08, "loss": 1.7411, "step": 89535 }, { "epoch": 2.98, "grad_norm": 0.6960175633430481, "learning_rate": 7.341536610772081e-08, "loss": 1.7362, "step": 89536 }, { "epoch": 2.98, "grad_norm": 0.7139425277709961, "learning_rate": 7.318432807301711e-08, "loss": 1.676, "step": 89537 }, { "epoch": 2.98, "grad_norm": 0.732978105545044, "learning_rate": 7.295365410528998e-08, "loss": 1.6385, "step": 89538 }, { "epoch": 2.98, "grad_norm": 0.7236682176589966, "learning_rate": 7.272334420477254e-08, "loss": 1.7456, "step": 89539 }, { "epoch": 2.98, "grad_norm": 0.7020851969718933, "learning_rate": 7.24933983717979e-08, "loss": 1.6995, "step": 89540 }, { "epoch": 2.98, "grad_norm": 0.6905412673950195, "learning_rate": 7.22638166066325e-08, "loss": 1.71, "step": 89541 }, { "epoch": 2.98, "grad_norm": 0.6884204745292664, "learning_rate": 7.203459890954277e-08, "loss": 1.6259, "step": 89542 }, { "epoch": 2.98, "grad_norm": 0.7103954553604126, "learning_rate": 7.18057452807952e-08, "loss": 1.6221, "step": 89543 }, { "epoch": 2.98, "grad_norm": 0.7112722992897034, "learning_rate": 7.15772557206895e-08, "loss": 1.7101, "step": 89544 }, { "epoch": 2.98, "grad_norm": 0.7173799872398376, "learning_rate": 7.134913022949218e-08, "loss": 1.6343, "step": 89545 }, { "epoch": 2.98, "grad_norm": 0.7032500505447388, "learning_rate": 7.112136880750297e-08, "loss": 1.6653, "step": 89546 }, { "epoch": 2.98, "grad_norm": 0.7116439938545227, "learning_rate": 7.089397145495501e-08, "loss": 1.6909, "step": 89547 }, { "epoch": 2.98, "grad_norm": 0.7014762163162231, "learning_rate": 7.066693817218138e-08, "loss": 1.6236, "step": 89548 }, { "epoch": 2.98, "grad_norm": 0.7263009548187256, "learning_rate": 7.044026895938194e-08, "loss": 1.7221, "step": 89549 }, { "epoch": 2.98, "grad_norm": 0.7144307494163513, "learning_rate": 7.021396381692301e-08, "loss": 1.6549, "step": 89550 }, { "epoch": 2.98, "grad_norm": 0.7025648951530457, "learning_rate": 6.998802274500449e-08, "loss": 1.5834, "step": 89551 }, { "epoch": 2.98, "grad_norm": 0.7292138338088989, "learning_rate": 6.976244574392609e-08, "loss": 1.6682, "step": 89552 }, { "epoch": 2.98, "grad_norm": 0.7075831890106201, "learning_rate": 6.953723281395429e-08, "loss": 1.6939, "step": 89553 }, { "epoch": 2.98, "grad_norm": 0.7288347482681274, "learning_rate": 6.931238395538885e-08, "loss": 1.6447, "step": 89554 }, { "epoch": 2.98, "grad_norm": 0.7224664092063904, "learning_rate": 6.908789916849622e-08, "loss": 1.6546, "step": 89555 }, { "epoch": 2.98, "grad_norm": 0.7312880754470825, "learning_rate": 6.886377845350954e-08, "loss": 1.7184, "step": 89556 }, { "epoch": 2.98, "grad_norm": 0.7027137875556946, "learning_rate": 6.864002181076189e-08, "loss": 1.624, "step": 89557 }, { "epoch": 2.98, "grad_norm": 0.7308427691459656, "learning_rate": 6.84166292404531e-08, "loss": 1.7097, "step": 89558 }, { "epoch": 2.98, "grad_norm": 0.7183175086975098, "learning_rate": 6.819360074291624e-08, "loss": 1.7552, "step": 89559 }, { "epoch": 2.98, "grad_norm": 0.7189648151397705, "learning_rate": 6.797093631841776e-08, "loss": 1.6433, "step": 89560 }, { "epoch": 2.98, "grad_norm": 0.6948801875114441, "learning_rate": 6.774863596719082e-08, "loss": 1.6374, "step": 89561 }, { "epoch": 2.98, "grad_norm": 0.6975497007369995, "learning_rate": 6.752669968953516e-08, "loss": 1.7612, "step": 89562 }, { "epoch": 2.98, "grad_norm": 0.707665741443634, "learning_rate": 6.730512748568396e-08, "loss": 1.622, "step": 89563 }, { "epoch": 2.98, "grad_norm": 0.7338147759437561, "learning_rate": 6.708391935593693e-08, "loss": 1.7094, "step": 89564 }, { "epoch": 2.98, "grad_norm": 0.7059934735298157, "learning_rate": 6.686307530056056e-08, "loss": 1.7219, "step": 89565 }, { "epoch": 2.98, "grad_norm": 0.7218593955039978, "learning_rate": 6.664259531982131e-08, "loss": 1.6194, "step": 89566 }, { "epoch": 2.98, "grad_norm": 0.7303345799446106, "learning_rate": 6.64224794140189e-08, "loss": 1.6522, "step": 89567 }, { "epoch": 2.98, "grad_norm": 0.7364373803138733, "learning_rate": 6.620272758335321e-08, "loss": 1.6315, "step": 89568 }, { "epoch": 2.98, "grad_norm": 0.7167160511016846, "learning_rate": 6.598333982812398e-08, "loss": 1.6345, "step": 89569 }, { "epoch": 2.98, "grad_norm": 0.7002585530281067, "learning_rate": 6.576431614859768e-08, "loss": 1.664, "step": 89570 }, { "epoch": 2.98, "grad_norm": 0.7064306139945984, "learning_rate": 6.554565654504073e-08, "loss": 1.6929, "step": 89571 }, { "epoch": 2.98, "grad_norm": 0.7045961022377014, "learning_rate": 6.532736101771962e-08, "loss": 1.6716, "step": 89572 }, { "epoch": 2.98, "grad_norm": 0.7094568014144897, "learning_rate": 6.51094295669008e-08, "loss": 1.6334, "step": 89573 }, { "epoch": 2.98, "grad_norm": 0.7043636441230774, "learning_rate": 6.48918621928507e-08, "loss": 1.6008, "step": 89574 }, { "epoch": 2.98, "grad_norm": 0.6862604022026062, "learning_rate": 6.467465889580248e-08, "loss": 1.7603, "step": 89575 }, { "epoch": 2.98, "grad_norm": 0.7255522012710571, "learning_rate": 6.44578196760559e-08, "loss": 1.649, "step": 89576 }, { "epoch": 2.98, "grad_norm": 0.7515931129455566, "learning_rate": 6.424134453387742e-08, "loss": 1.5719, "step": 89577 }, { "epoch": 2.98, "grad_norm": 0.6930950284004211, "learning_rate": 6.402523346953347e-08, "loss": 1.614, "step": 89578 }, { "epoch": 2.98, "grad_norm": 0.7404042482376099, "learning_rate": 6.380948648322392e-08, "loss": 1.7211, "step": 89579 }, { "epoch": 2.98, "grad_norm": 0.7115441560745239, "learning_rate": 6.359410357528183e-08, "loss": 1.6507, "step": 89580 }, { "epoch": 2.98, "grad_norm": 0.7247424721717834, "learning_rate": 6.337908474594033e-08, "loss": 1.6844, "step": 89581 }, { "epoch": 2.98, "grad_norm": 0.7156510949134827, "learning_rate": 6.316442999546589e-08, "loss": 1.6796, "step": 89582 }, { "epoch": 2.98, "grad_norm": 0.72149658203125, "learning_rate": 6.295013932412496e-08, "loss": 1.6833, "step": 89583 }, { "epoch": 2.98, "grad_norm": 0.7175925970077515, "learning_rate": 6.273621273215068e-08, "loss": 1.7264, "step": 89584 }, { "epoch": 2.98, "grad_norm": 0.7138338685035706, "learning_rate": 6.252265021984282e-08, "loss": 1.6684, "step": 89585 }, { "epoch": 2.98, "grad_norm": 0.7009298801422119, "learning_rate": 6.23094517874012e-08, "loss": 1.7749, "step": 89586 }, { "epoch": 2.98, "grad_norm": 0.7182037830352783, "learning_rate": 6.209661743515892e-08, "loss": 1.6454, "step": 89587 }, { "epoch": 2.98, "grad_norm": 0.7285581827163696, "learning_rate": 6.18841471633158e-08, "loss": 1.6273, "step": 89588 }, { "epoch": 2.98, "grad_norm": 0.7022002935409546, "learning_rate": 6.16720409721716e-08, "loss": 1.6786, "step": 89589 }, { "epoch": 2.98, "grad_norm": 0.7007156610488892, "learning_rate": 6.146029886195947e-08, "loss": 1.7089, "step": 89590 }, { "epoch": 2.98, "grad_norm": 0.7123658061027527, "learning_rate": 6.124892083291255e-08, "loss": 1.6309, "step": 89591 }, { "epoch": 2.98, "grad_norm": 0.7034574151039124, "learning_rate": 6.103790688533061e-08, "loss": 1.6859, "step": 89592 }, { "epoch": 2.98, "grad_norm": 0.7146779894828796, "learning_rate": 6.08272570194801e-08, "loss": 1.7846, "step": 89593 }, { "epoch": 2.98, "grad_norm": 0.7224600911140442, "learning_rate": 6.061697123556086e-08, "loss": 1.7126, "step": 89594 }, { "epoch": 2.98, "grad_norm": 0.7090418934822083, "learning_rate": 6.040704953387265e-08, "loss": 1.6976, "step": 89595 }, { "epoch": 2.98, "grad_norm": 0.735394299030304, "learning_rate": 6.019749191464862e-08, "loss": 1.712, "step": 89596 }, { "epoch": 2.98, "grad_norm": 0.7211940288543701, "learning_rate": 5.998829837815522e-08, "loss": 1.6619, "step": 89597 }, { "epoch": 2.98, "grad_norm": 0.7140891551971436, "learning_rate": 5.97794689246589e-08, "loss": 1.6498, "step": 89598 }, { "epoch": 2.98, "grad_norm": 0.7269036769866943, "learning_rate": 5.957100355435951e-08, "loss": 1.6621, "step": 89599 }, { "epoch": 2.98, "grad_norm": 0.7132998704910278, "learning_rate": 5.93629022675568e-08, "loss": 1.7157, "step": 89600 }, { "epoch": 2.98, "grad_norm": 0.7176714539527893, "learning_rate": 5.915516506451723e-08, "loss": 1.7509, "step": 89601 }, { "epoch": 2.98, "grad_norm": 0.7386249303817749, "learning_rate": 5.8947791945440635e-08, "loss": 1.6888, "step": 89602 }, { "epoch": 2.98, "grad_norm": 0.7140979170799255, "learning_rate": 5.8740782910626784e-08, "loss": 1.7075, "step": 89603 }, { "epoch": 2.98, "grad_norm": 0.7143259048461914, "learning_rate": 5.8534137960308815e-08, "loss": 1.7137, "step": 89604 }, { "epoch": 2.98, "grad_norm": 0.7086209058761597, "learning_rate": 5.832785709471988e-08, "loss": 1.6626, "step": 89605 }, { "epoch": 2.98, "grad_norm": 0.7023457288742065, "learning_rate": 5.812194031415973e-08, "loss": 1.7664, "step": 89606 }, { "epoch": 2.98, "grad_norm": 0.6920817494392395, "learning_rate": 5.791638761879491e-08, "loss": 1.6249, "step": 89607 }, { "epoch": 2.98, "grad_norm": 0.7242317795753479, "learning_rate": 5.7711199008958485e-08, "loss": 1.6679, "step": 89608 }, { "epoch": 2.98, "grad_norm": 0.7002889513969421, "learning_rate": 5.7506374484850294e-08, "loss": 1.6319, "step": 89609 }, { "epoch": 2.98, "grad_norm": 0.7097138166427612, "learning_rate": 5.7301914046770094e-08, "loss": 1.7201, "step": 89610 }, { "epoch": 2.98, "grad_norm": 2.2786362171173096, "learning_rate": 5.709781769488442e-08, "loss": 1.7647, "step": 89611 }, { "epoch": 2.98, "grad_norm": 0.6915926337242126, "learning_rate": 5.6894085429526335e-08, "loss": 1.674, "step": 89612 }, { "epoch": 2.98, "grad_norm": 0.7041783332824707, "learning_rate": 5.669071725086238e-08, "loss": 1.73, "step": 89613 }, { "epoch": 2.98, "grad_norm": 0.7479941248893738, "learning_rate": 5.6487713159225624e-08, "loss": 1.714, "step": 89614 }, { "epoch": 2.98, "grad_norm": 0.7198660373687744, "learning_rate": 5.628507315478259e-08, "loss": 1.7041, "step": 89615 }, { "epoch": 2.98, "grad_norm": 0.7191025018692017, "learning_rate": 5.6082797237833046e-08, "loss": 1.6403, "step": 89616 }, { "epoch": 2.98, "grad_norm": 0.7200097441673279, "learning_rate": 5.588088540861013e-08, "loss": 1.7029, "step": 89617 }, { "epoch": 2.98, "grad_norm": 0.7536282539367676, "learning_rate": 5.5679337667347e-08, "loss": 1.622, "step": 89618 }, { "epoch": 2.98, "grad_norm": 0.7020387053489685, "learning_rate": 5.54781540142768e-08, "loss": 1.6334, "step": 89619 }, { "epoch": 2.98, "grad_norm": 0.6974932551383972, "learning_rate": 5.5277334449665975e-08, "loss": 1.6404, "step": 89620 }, { "epoch": 2.98, "grad_norm": 0.719086766242981, "learning_rate": 5.507687897378099e-08, "loss": 1.6556, "step": 89621 }, { "epoch": 2.98, "grad_norm": 0.7074486017227173, "learning_rate": 5.487678758682168e-08, "loss": 1.6421, "step": 89622 }, { "epoch": 2.98, "grad_norm": 0.6904970407485962, "learning_rate": 5.4677060289021194e-08, "loss": 1.5797, "step": 89623 }, { "epoch": 2.98, "grad_norm": 0.7259562611579895, "learning_rate": 5.4477697080679285e-08, "loss": 1.682, "step": 89624 }, { "epoch": 2.98, "grad_norm": 0.7370617985725403, "learning_rate": 5.42786979619958e-08, "loss": 1.6774, "step": 89625 }, { "epoch": 2.98, "grad_norm": 0.7205872535705566, "learning_rate": 5.408006293323719e-08, "loss": 1.6991, "step": 89626 }, { "epoch": 2.98, "grad_norm": 0.7363560795783997, "learning_rate": 5.38817919946366e-08, "loss": 1.6554, "step": 89627 }, { "epoch": 2.98, "grad_norm": 0.703140914440155, "learning_rate": 5.368388514639388e-08, "loss": 1.6707, "step": 89628 }, { "epoch": 2.98, "grad_norm": 0.7174264788627625, "learning_rate": 5.348634238880878e-08, "loss": 1.6973, "step": 89629 }, { "epoch": 2.98, "grad_norm": 0.727916419506073, "learning_rate": 5.328916372208114e-08, "loss": 1.6688, "step": 89630 }, { "epoch": 2.98, "grad_norm": 0.7003166675567627, "learning_rate": 5.309234914647742e-08, "loss": 1.68, "step": 89631 }, { "epoch": 2.98, "grad_norm": 0.7158441543579102, "learning_rate": 5.289589866223076e-08, "loss": 1.7, "step": 89632 }, { "epoch": 2.98, "grad_norm": 0.7148171067237854, "learning_rate": 5.2699812269541006e-08, "loss": 1.7201, "step": 89633 }, { "epoch": 2.98, "grad_norm": 0.6968005299568176, "learning_rate": 5.2504089968707917e-08, "loss": 1.7111, "step": 89634 }, { "epoch": 2.98, "grad_norm": 0.7152717709541321, "learning_rate": 5.230873175993133e-08, "loss": 1.7055, "step": 89635 }, { "epoch": 2.98, "grad_norm": 0.7058077454566956, "learning_rate": 5.2113737643477707e-08, "loss": 1.6397, "step": 89636 }, { "epoch": 2.98, "grad_norm": 0.7031914591789246, "learning_rate": 5.1919107619513566e-08, "loss": 1.7356, "step": 89637 }, { "epoch": 2.98, "grad_norm": 0.7151660323143005, "learning_rate": 5.172484168837199e-08, "loss": 1.7133, "step": 89638 }, { "epoch": 2.98, "grad_norm": 0.7094568014144897, "learning_rate": 5.15309398502195e-08, "loss": 1.6459, "step": 89639 }, { "epoch": 2.98, "grad_norm": 0.7361462712287903, "learning_rate": 5.133740210532256e-08, "loss": 1.6485, "step": 89640 }, { "epoch": 2.98, "grad_norm": 0.7203391194343567, "learning_rate": 5.1144228453880995e-08, "loss": 1.7052, "step": 89641 }, { "epoch": 2.98, "grad_norm": 0.6938827633857727, "learning_rate": 5.0951418896194586e-08, "loss": 1.7025, "step": 89642 }, { "epoch": 2.98, "grad_norm": 0.7116053104400635, "learning_rate": 5.075897343242985e-08, "loss": 1.713, "step": 89643 }, { "epoch": 2.98, "grad_norm": 0.7237592935562134, "learning_rate": 5.056689206285325e-08, "loss": 1.6909, "step": 89644 }, { "epoch": 2.98, "grad_norm": 0.7091855406761169, "learning_rate": 5.037517478769793e-08, "loss": 1.6831, "step": 89645 }, { "epoch": 2.98, "grad_norm": 0.7122329473495483, "learning_rate": 5.018382160719703e-08, "loss": 1.657, "step": 89646 }, { "epoch": 2.98, "grad_norm": 0.710378885269165, "learning_rate": 4.9992832521550395e-08, "loss": 1.6897, "step": 89647 }, { "epoch": 2.98, "grad_norm": 0.7040544748306274, "learning_rate": 4.980220753105779e-08, "loss": 1.6887, "step": 89648 }, { "epoch": 2.98, "grad_norm": 0.7285603880882263, "learning_rate": 4.961194663588575e-08, "loss": 1.6484, "step": 89649 }, { "epoch": 2.98, "grad_norm": 0.731874406337738, "learning_rate": 4.942204983630071e-08, "loss": 1.691, "step": 89650 }, { "epoch": 2.98, "grad_norm": 0.718865156173706, "learning_rate": 4.923251713250254e-08, "loss": 1.7639, "step": 89651 }, { "epoch": 2.98, "grad_norm": 0.7204341292381287, "learning_rate": 4.9043348524757666e-08, "loss": 1.6383, "step": 89652 }, { "epoch": 2.98, "grad_norm": 0.731351375579834, "learning_rate": 4.885454401326594e-08, "loss": 1.6202, "step": 89653 }, { "epoch": 2.98, "grad_norm": 0.6999291181564331, "learning_rate": 4.8666103598260506e-08, "loss": 1.5835, "step": 89654 }, { "epoch": 2.98, "grad_norm": 0.7308564186096191, "learning_rate": 4.847802728000783e-08, "loss": 1.6519, "step": 89655 }, { "epoch": 2.98, "grad_norm": 0.6986929774284363, "learning_rate": 4.8290315058674424e-08, "loss": 1.6488, "step": 89656 }, { "epoch": 2.98, "grad_norm": 0.6994500160217285, "learning_rate": 4.8102966934560064e-08, "loss": 1.6886, "step": 89657 }, { "epoch": 2.98, "grad_norm": 0.7112637758255005, "learning_rate": 4.7915982907831276e-08, "loss": 1.7204, "step": 89658 }, { "epoch": 2.98, "grad_norm": 0.7403002977371216, "learning_rate": 4.7729362978721206e-08, "loss": 1.6417, "step": 89659 }, { "epoch": 2.98, "grad_norm": 0.7218179106712341, "learning_rate": 4.754310714749632e-08, "loss": 1.7212, "step": 89660 }, { "epoch": 2.98, "grad_norm": 0.7172815203666687, "learning_rate": 4.7357215414356445e-08, "loss": 1.72, "step": 89661 }, { "epoch": 2.98, "grad_norm": 0.7044506669044495, "learning_rate": 4.717168777950142e-08, "loss": 1.6693, "step": 89662 }, { "epoch": 2.98, "grad_norm": 0.7361209392547607, "learning_rate": 4.698652424323102e-08, "loss": 1.7625, "step": 89663 }, { "epoch": 2.98, "grad_norm": 0.7291886806488037, "learning_rate": 4.680172480567845e-08, "loss": 1.7456, "step": 89664 }, { "epoch": 2.98, "grad_norm": 0.7465614676475525, "learning_rate": 4.661728946714349e-08, "loss": 1.7199, "step": 89665 }, { "epoch": 2.98, "grad_norm": 0.7082479000091553, "learning_rate": 4.643321822779267e-08, "loss": 1.6765, "step": 89666 }, { "epoch": 2.98, "grad_norm": 0.7348753809928894, "learning_rate": 4.6249511087892434e-08, "loss": 1.6965, "step": 89667 }, { "epoch": 2.98, "grad_norm": 0.7199978232383728, "learning_rate": 4.606616804764263e-08, "loss": 1.6591, "step": 89668 }, { "epoch": 2.98, "grad_norm": 0.6957361102104187, "learning_rate": 4.58831891072764e-08, "loss": 1.6887, "step": 89669 }, { "epoch": 2.98, "grad_norm": 0.7162355184555054, "learning_rate": 4.570057426702689e-08, "loss": 1.7397, "step": 89670 }, { "epoch": 2.98, "grad_norm": 0.7324234247207642, "learning_rate": 4.551832352706064e-08, "loss": 1.7136, "step": 89671 }, { "epoch": 2.98, "grad_norm": 0.6968914866447449, "learning_rate": 4.533643688767741e-08, "loss": 1.6351, "step": 89672 }, { "epoch": 2.98, "grad_norm": 0.7044692635536194, "learning_rate": 4.5154914349043724e-08, "loss": 1.6877, "step": 89673 }, { "epoch": 2.98, "grad_norm": 0.7130236625671387, "learning_rate": 4.497375591139274e-08, "loss": 1.6606, "step": 89674 }, { "epoch": 2.98, "grad_norm": 0.7295461893081665, "learning_rate": 4.47929615749576e-08, "loss": 1.7063, "step": 89675 }, { "epoch": 2.98, "grad_norm": 0.7103893756866455, "learning_rate": 4.461253133993814e-08, "loss": 1.7017, "step": 89676 }, { "epoch": 2.98, "grad_norm": 0.7202942967414856, "learning_rate": 4.4432465206567516e-08, "loss": 1.7094, "step": 89677 }, { "epoch": 2.98, "grad_norm": 0.7090148329734802, "learning_rate": 4.425276317504556e-08, "loss": 1.6528, "step": 89678 }, { "epoch": 2.98, "grad_norm": 0.726152241230011, "learning_rate": 4.407342524563873e-08, "loss": 1.6808, "step": 89679 }, { "epoch": 2.98, "grad_norm": 0.7071967720985413, "learning_rate": 4.3894451418480245e-08, "loss": 1.6568, "step": 89680 }, { "epoch": 2.98, "grad_norm": 0.7085794806480408, "learning_rate": 4.371584169386988e-08, "loss": 1.7824, "step": 89681 }, { "epoch": 2.98, "grad_norm": 0.7080298066139221, "learning_rate": 4.353759607200746e-08, "loss": 1.6854, "step": 89682 }, { "epoch": 2.98, "grad_norm": 0.7139812111854553, "learning_rate": 4.335971455305953e-08, "loss": 1.6365, "step": 89683 }, { "epoch": 2.98, "grad_norm": 0.7094331979751587, "learning_rate": 4.318219713729254e-08, "loss": 1.6553, "step": 89684 }, { "epoch": 2.98, "grad_norm": 0.6968823671340942, "learning_rate": 4.3005043824906327e-08, "loss": 1.6622, "step": 89685 }, { "epoch": 2.98, "grad_norm": 0.7163028120994568, "learning_rate": 4.282825461610073e-08, "loss": 1.71, "step": 89686 }, { "epoch": 2.98, "grad_norm": 0.706415593624115, "learning_rate": 4.265182951114221e-08, "loss": 1.7016, "step": 89687 }, { "epoch": 2.98, "grad_norm": 0.6947708129882812, "learning_rate": 4.247576851016399e-08, "loss": 1.7009, "step": 89688 }, { "epoch": 2.98, "grad_norm": 0.7199729084968567, "learning_rate": 4.2300071613465823e-08, "loss": 1.6939, "step": 89689 }, { "epoch": 2.98, "grad_norm": 0.7113937735557556, "learning_rate": 4.212473882118095e-08, "loss": 1.721, "step": 89690 }, { "epoch": 2.98, "grad_norm": 0.7056722640991211, "learning_rate": 4.194977013357581e-08, "loss": 1.6731, "step": 89691 }, { "epoch": 2.98, "grad_norm": 0.7356200814247131, "learning_rate": 4.177516555085025e-08, "loss": 1.6549, "step": 89692 }, { "epoch": 2.98, "grad_norm": 0.7142913937568665, "learning_rate": 4.160092507320412e-08, "loss": 1.7069, "step": 89693 }, { "epoch": 2.98, "grad_norm": 0.7364025115966797, "learning_rate": 4.142704870087055e-08, "loss": 1.6375, "step": 89694 }, { "epoch": 2.98, "grad_norm": 0.7088301777839661, "learning_rate": 4.125353643401608e-08, "loss": 1.6184, "step": 89695 }, { "epoch": 2.98, "grad_norm": 0.710199773311615, "learning_rate": 4.108038827290716e-08, "loss": 1.6915, "step": 89696 }, { "epoch": 2.98, "grad_norm": 0.7259058356285095, "learning_rate": 4.090760421774364e-08, "loss": 1.6452, "step": 89697 }, { "epoch": 2.98, "grad_norm": 0.7143166661262512, "learning_rate": 4.0735184268692046e-08, "loss": 1.6816, "step": 89698 }, { "epoch": 2.98, "grad_norm": 0.7207430005073547, "learning_rate": 4.056312842601883e-08, "loss": 1.686, "step": 89699 }, { "epoch": 2.98, "grad_norm": 0.7212077975273132, "learning_rate": 4.0391436689890535e-08, "loss": 1.6882, "step": 89700 }, { "epoch": 2.98, "grad_norm": 0.7151658535003662, "learning_rate": 4.0220109060540294e-08, "loss": 1.7021, "step": 89701 }, { "epoch": 2.98, "grad_norm": 0.6922717690467834, "learning_rate": 4.004914553813465e-08, "loss": 1.687, "step": 89702 }, { "epoch": 2.98, "grad_norm": 0.7056776285171509, "learning_rate": 3.987854612294006e-08, "loss": 1.7458, "step": 89703 }, { "epoch": 2.98, "grad_norm": 0.751134991645813, "learning_rate": 3.9708310815123046e-08, "loss": 1.6795, "step": 89704 }, { "epoch": 2.98, "grad_norm": 0.7157471776008606, "learning_rate": 3.953843961491676e-08, "loss": 1.6979, "step": 89705 }, { "epoch": 2.98, "grad_norm": 0.6969681978225708, "learning_rate": 3.9368932522521044e-08, "loss": 1.6504, "step": 89706 }, { "epoch": 2.98, "grad_norm": 0.835628867149353, "learning_rate": 3.919978953810243e-08, "loss": 1.7357, "step": 89707 }, { "epoch": 2.98, "grad_norm": 0.6992894411087036, "learning_rate": 3.9031010661927374e-08, "loss": 1.6842, "step": 89708 }, { "epoch": 2.98, "grad_norm": 0.7048760056495667, "learning_rate": 3.8862595894162405e-08, "loss": 1.7195, "step": 89709 }, { "epoch": 2.98, "grad_norm": 0.7206544280052185, "learning_rate": 3.8694545235007366e-08, "loss": 1.6458, "step": 89710 }, { "epoch": 2.98, "grad_norm": 0.7705947160720825, "learning_rate": 3.85268586846954e-08, "loss": 1.6992, "step": 89711 }, { "epoch": 2.98, "grad_norm": 0.7168102860450745, "learning_rate": 3.835953624339305e-08, "loss": 1.7182, "step": 89712 }, { "epoch": 2.98, "grad_norm": 0.6997820734977722, "learning_rate": 3.8192577911366765e-08, "loss": 1.6316, "step": 89713 }, { "epoch": 2.98, "grad_norm": 0.6958315968513489, "learning_rate": 3.8025983688749764e-08, "loss": 1.5931, "step": 89714 }, { "epoch": 2.98, "grad_norm": 0.7245219945907593, "learning_rate": 3.78597535757752e-08, "loss": 1.7359, "step": 89715 }, { "epoch": 2.98, "grad_norm": 0.7368714213371277, "learning_rate": 3.769388757264291e-08, "loss": 1.7042, "step": 89716 }, { "epoch": 2.98, "grad_norm": 0.7317964434623718, "learning_rate": 3.752838567955274e-08, "loss": 1.7625, "step": 89717 }, { "epoch": 2.98, "grad_norm": 0.7347371578216553, "learning_rate": 3.736324789670453e-08, "loss": 1.7095, "step": 89718 }, { "epoch": 2.98, "grad_norm": 0.703370213508606, "learning_rate": 3.7198474224298115e-08, "loss": 1.6035, "step": 89719 }, { "epoch": 2.98, "grad_norm": 0.7255433201789856, "learning_rate": 3.7034064662533336e-08, "loss": 1.708, "step": 89720 }, { "epoch": 2.99, "grad_norm": 0.7171375155448914, "learning_rate": 3.6870019211610034e-08, "loss": 1.6399, "step": 89721 }, { "epoch": 2.99, "grad_norm": 0.7196328639984131, "learning_rate": 3.670633787172805e-08, "loss": 1.7285, "step": 89722 }, { "epoch": 2.99, "grad_norm": 0.7324182391166687, "learning_rate": 3.654302064308723e-08, "loss": 1.7156, "step": 89723 }, { "epoch": 2.99, "grad_norm": 0.7319592833518982, "learning_rate": 3.638006752592071e-08, "loss": 1.6774, "step": 89724 }, { "epoch": 2.99, "grad_norm": 0.7115584015846252, "learning_rate": 3.621747852036172e-08, "loss": 1.6193, "step": 89725 }, { "epoch": 2.99, "grad_norm": 0.7014127373695374, "learning_rate": 3.6055253626610105e-08, "loss": 1.7038, "step": 89726 }, { "epoch": 2.99, "grad_norm": 0.7496302723884583, "learning_rate": 3.5893392844932314e-08, "loss": 1.683, "step": 89727 }, { "epoch": 2.99, "grad_norm": 0.7071042656898499, "learning_rate": 3.573189617546157e-08, "loss": 1.6272, "step": 89728 }, { "epoch": 2.99, "grad_norm": 0.7359775304794312, "learning_rate": 3.557076361843103e-08, "loss": 1.7221, "step": 89729 }, { "epoch": 2.99, "grad_norm": 0.700978696346283, "learning_rate": 3.540999517400722e-08, "loss": 1.6658, "step": 89730 }, { "epoch": 2.99, "grad_norm": 0.7009759545326233, "learning_rate": 3.5249590842389985e-08, "loss": 1.6371, "step": 89731 }, { "epoch": 2.99, "grad_norm": 0.704526424407959, "learning_rate": 3.508955062381247e-08, "loss": 1.5898, "step": 89732 }, { "epoch": 2.99, "grad_norm": 0.7036017775535583, "learning_rate": 3.4929874518407896e-08, "loss": 1.6635, "step": 89733 }, { "epoch": 2.99, "grad_norm": 0.7219986319541931, "learning_rate": 3.477056252640942e-08, "loss": 1.6592, "step": 89734 }, { "epoch": 2.99, "grad_norm": 0.7117276787757874, "learning_rate": 3.4611614648016874e-08, "loss": 1.687, "step": 89735 }, { "epoch": 2.99, "grad_norm": 0.6995108127593994, "learning_rate": 3.445303088336349e-08, "loss": 1.5922, "step": 89736 }, { "epoch": 2.99, "grad_norm": 0.7250204086303711, "learning_rate": 3.429481123271571e-08, "loss": 1.628, "step": 89737 }, { "epoch": 2.99, "grad_norm": 0.7307953238487244, "learning_rate": 3.4136955696240084e-08, "loss": 1.6423, "step": 89738 }, { "epoch": 2.99, "grad_norm": 0.7222985029220581, "learning_rate": 3.397946427413645e-08, "loss": 1.6881, "step": 89739 }, { "epoch": 2.99, "grad_norm": 0.7310677766799927, "learning_rate": 3.382233696657133e-08, "loss": 1.6341, "step": 89740 }, { "epoch": 2.99, "grad_norm": 0.7152147889137268, "learning_rate": 3.366557377374457e-08, "loss": 1.6241, "step": 89741 }, { "epoch": 2.99, "grad_norm": 0.7052931785583496, "learning_rate": 3.350917469585601e-08, "loss": 1.6654, "step": 89742 }, { "epoch": 2.99, "grad_norm": 0.714235782623291, "learning_rate": 3.335313973307219e-08, "loss": 1.5727, "step": 89743 }, { "epoch": 2.99, "grad_norm": 0.72461998462677, "learning_rate": 3.319746888562624e-08, "loss": 1.6209, "step": 89744 }, { "epoch": 2.99, "grad_norm": 0.7106794118881226, "learning_rate": 3.304216215365141e-08, "loss": 1.7161, "step": 89745 }, { "epoch": 2.99, "grad_norm": 0.7244460582733154, "learning_rate": 3.2887219537380825e-08, "loss": 1.6061, "step": 89746 }, { "epoch": 2.99, "grad_norm": 0.7244864702224731, "learning_rate": 3.2732641036981036e-08, "loss": 1.7013, "step": 89747 }, { "epoch": 2.99, "grad_norm": 0.7273374795913696, "learning_rate": 3.257842665265187e-08, "loss": 1.6603, "step": 89748 }, { "epoch": 2.99, "grad_norm": 0.7444977164268494, "learning_rate": 3.242457638459317e-08, "loss": 1.6498, "step": 89749 }, { "epoch": 2.99, "grad_norm": 0.741865336894989, "learning_rate": 3.227109023293817e-08, "loss": 1.6572, "step": 89750 }, { "epoch": 2.99, "grad_norm": 0.7255001068115234, "learning_rate": 3.211796819792001e-08, "loss": 1.6817, "step": 89751 }, { "epoch": 2.99, "grad_norm": 0.7239222526550293, "learning_rate": 3.1965210279705224e-08, "loss": 1.6723, "step": 89752 }, { "epoch": 2.99, "grad_norm": 0.7113567590713501, "learning_rate": 3.181281647852696e-08, "loss": 1.7026, "step": 89753 }, { "epoch": 2.99, "grad_norm": 0.7228789329528809, "learning_rate": 3.166078679448514e-08, "loss": 1.7348, "step": 89754 }, { "epoch": 2.99, "grad_norm": 0.7532861828804016, "learning_rate": 3.15091212278129e-08, "loss": 1.7159, "step": 89755 }, { "epoch": 2.99, "grad_norm": 0.7246652245521545, "learning_rate": 3.1357819778710106e-08, "loss": 1.7432, "step": 89756 }, { "epoch": 2.99, "grad_norm": 0.7142633199691772, "learning_rate": 3.1206882447343264e-08, "loss": 1.7022, "step": 89757 }, { "epoch": 2.99, "grad_norm": 0.6997956037521362, "learning_rate": 3.105630923387892e-08, "loss": 1.6841, "step": 89758 }, { "epoch": 2.99, "grad_norm": 0.7074575424194336, "learning_rate": 3.090610013851691e-08, "loss": 1.6858, "step": 89759 }, { "epoch": 2.99, "grad_norm": 0.7321091294288635, "learning_rate": 3.075625516142377e-08, "loss": 1.6544, "step": 89760 }, { "epoch": 2.99, "grad_norm": 0.726235032081604, "learning_rate": 3.060677430279934e-08, "loss": 1.7078, "step": 89761 }, { "epoch": 2.99, "grad_norm": 0.720075786113739, "learning_rate": 3.045765756284346e-08, "loss": 1.654, "step": 89762 }, { "epoch": 2.99, "grad_norm": 0.7099003195762634, "learning_rate": 3.030890494168936e-08, "loss": 1.7242, "step": 89763 }, { "epoch": 2.99, "grad_norm": 0.7267922163009644, "learning_rate": 3.016051643953688e-08, "loss": 1.6533, "step": 89764 }, { "epoch": 2.99, "grad_norm": 0.7052190899848938, "learning_rate": 3.0012492056585846e-08, "loss": 1.6602, "step": 89765 }, { "epoch": 2.99, "grad_norm": 0.7374911308288574, "learning_rate": 2.9864831793002805e-08, "loss": 1.7309, "step": 89766 }, { "epoch": 2.99, "grad_norm": 0.7179743051528931, "learning_rate": 2.9717535648954295e-08, "loss": 1.6558, "step": 89767 }, { "epoch": 2.99, "grad_norm": 0.7229877710342407, "learning_rate": 2.9570603624640142e-08, "loss": 1.6725, "step": 89768 }, { "epoch": 2.99, "grad_norm": 0.7138329744338989, "learning_rate": 2.942403572022689e-08, "loss": 1.6978, "step": 89769 }, { "epoch": 2.99, "grad_norm": 0.7079559564590454, "learning_rate": 2.927783193588107e-08, "loss": 1.6608, "step": 89770 }, { "epoch": 2.99, "grad_norm": 0.7069641947746277, "learning_rate": 2.9131992271802516e-08, "loss": 1.6217, "step": 89771 }, { "epoch": 2.99, "grad_norm": 0.7206045389175415, "learning_rate": 2.898651672815777e-08, "loss": 1.6393, "step": 89772 }, { "epoch": 2.99, "grad_norm": 0.7376465797424316, "learning_rate": 2.884140530514667e-08, "loss": 1.6467, "step": 89773 }, { "epoch": 2.99, "grad_norm": 0.7114169597625732, "learning_rate": 2.869665800290244e-08, "loss": 1.6806, "step": 89774 }, { "epoch": 2.99, "grad_norm": 0.700481653213501, "learning_rate": 2.855227482162492e-08, "loss": 1.7367, "step": 89775 }, { "epoch": 2.99, "grad_norm": 0.6901559829711914, "learning_rate": 2.8408255761480648e-08, "loss": 1.6492, "step": 89776 }, { "epoch": 2.99, "grad_norm": 0.6918450593948364, "learning_rate": 2.8264600822669458e-08, "loss": 1.6486, "step": 89777 }, { "epoch": 2.99, "grad_norm": 0.7015253305435181, "learning_rate": 2.8121310005324582e-08, "loss": 1.7308, "step": 89778 }, { "epoch": 2.99, "grad_norm": 0.7034042477607727, "learning_rate": 2.7978383309645858e-08, "loss": 1.7104, "step": 89779 }, { "epoch": 2.99, "grad_norm": 0.7044275403022766, "learning_rate": 2.7835820735799817e-08, "loss": 1.6886, "step": 89780 }, { "epoch": 2.99, "grad_norm": 0.7081897854804993, "learning_rate": 2.7693622283986306e-08, "loss": 1.6682, "step": 89781 }, { "epoch": 2.99, "grad_norm": 0.718031644821167, "learning_rate": 2.755178795430524e-08, "loss": 1.6531, "step": 89782 }, { "epoch": 2.99, "grad_norm": 0.7139561176300049, "learning_rate": 2.741031774702307e-08, "loss": 1.6837, "step": 89783 }, { "epoch": 2.99, "grad_norm": 0.7209934592247009, "learning_rate": 2.7269211662239722e-08, "loss": 1.6897, "step": 89784 }, { "epoch": 2.99, "grad_norm": 0.7213584780693054, "learning_rate": 2.7128469700188338e-08, "loss": 1.6586, "step": 89785 }, { "epoch": 2.99, "grad_norm": 0.7198702692985535, "learning_rate": 2.6988091860968842e-08, "loss": 1.6922, "step": 89786 }, { "epoch": 2.99, "grad_norm": 0.7154945135116577, "learning_rate": 2.684807814478107e-08, "loss": 1.6823, "step": 89787 }, { "epoch": 2.99, "grad_norm": 0.722574770450592, "learning_rate": 2.6708428551824866e-08, "loss": 1.664, "step": 89788 }, { "epoch": 2.99, "grad_norm": 0.6959447264671326, "learning_rate": 2.6569143082233456e-08, "loss": 1.6553, "step": 89789 }, { "epoch": 2.99, "grad_norm": 0.6989906430244446, "learning_rate": 2.643022173620668e-08, "loss": 1.6173, "step": 89790 }, { "epoch": 2.99, "grad_norm": 0.699767529964447, "learning_rate": 2.6291664513877763e-08, "loss": 1.5871, "step": 89791 }, { "epoch": 2.99, "grad_norm": 0.7030163407325745, "learning_rate": 2.6153471415446546e-08, "loss": 1.6455, "step": 89792 }, { "epoch": 2.99, "grad_norm": 0.7096848487854004, "learning_rate": 2.601564244104626e-08, "loss": 1.686, "step": 89793 }, { "epoch": 2.99, "grad_norm": 0.707859456539154, "learning_rate": 2.5878177590876736e-08, "loss": 1.5997, "step": 89794 }, { "epoch": 2.99, "grad_norm": 0.7148249745368958, "learning_rate": 2.574107686507121e-08, "loss": 1.6982, "step": 89795 }, { "epoch": 2.99, "grad_norm": 0.7223853468894958, "learning_rate": 2.5604340263829516e-08, "loss": 1.6306, "step": 89796 }, { "epoch": 2.99, "grad_norm": 0.715533971786499, "learning_rate": 2.546796778731819e-08, "loss": 1.6856, "step": 89797 }, { "epoch": 2.99, "grad_norm": 0.6932092905044556, "learning_rate": 2.533195943567046e-08, "loss": 1.6772, "step": 89798 }, { "epoch": 2.99, "grad_norm": 0.7195848226547241, "learning_rate": 2.519631520908616e-08, "loss": 1.6865, "step": 89799 }, { "epoch": 2.99, "grad_norm": 0.7037591338157654, "learning_rate": 2.5061035107698525e-08, "loss": 1.6929, "step": 89800 }, { "epoch": 2.99, "grad_norm": 0.7374110817909241, "learning_rate": 2.492611913170739e-08, "loss": 1.7002, "step": 89801 }, { "epoch": 2.99, "grad_norm": 0.720950186252594, "learning_rate": 2.4791567281245983e-08, "loss": 1.6792, "step": 89802 }, { "epoch": 2.99, "grad_norm": 0.7255581617355347, "learning_rate": 2.465737955648084e-08, "loss": 1.7562, "step": 89803 }, { "epoch": 2.99, "grad_norm": 0.7239595651626587, "learning_rate": 2.4523555957578488e-08, "loss": 1.7253, "step": 89804 }, { "epoch": 2.99, "grad_norm": 0.7037073969841003, "learning_rate": 2.4390096484738774e-08, "loss": 1.6173, "step": 89805 }, { "epoch": 2.99, "grad_norm": 0.7223981022834778, "learning_rate": 2.4257001138061616e-08, "loss": 1.6696, "step": 89806 }, { "epoch": 2.99, "grad_norm": 0.7199692726135254, "learning_rate": 2.4124269917746852e-08, "loss": 1.6683, "step": 89807 }, { "epoch": 2.99, "grad_norm": 0.7098456025123596, "learning_rate": 2.399190282392771e-08, "loss": 1.7449, "step": 89808 }, { "epoch": 2.99, "grad_norm": 0.695798933506012, "learning_rate": 2.3859899856804032e-08, "loss": 1.6772, "step": 89809 }, { "epoch": 2.99, "grad_norm": 0.708401083946228, "learning_rate": 2.3728261016509042e-08, "loss": 1.743, "step": 89810 }, { "epoch": 2.99, "grad_norm": 0.7138535976409912, "learning_rate": 2.3596986303209276e-08, "loss": 1.6645, "step": 89811 }, { "epoch": 2.99, "grad_norm": 0.7113345861434937, "learning_rate": 2.3466075717071263e-08, "loss": 1.6484, "step": 89812 }, { "epoch": 2.99, "grad_norm": 0.7361097931861877, "learning_rate": 2.3335529258228235e-08, "loss": 1.6893, "step": 89813 }, { "epoch": 2.99, "grad_norm": 0.728891134262085, "learning_rate": 2.3205346926846724e-08, "loss": 1.7327, "step": 89814 }, { "epoch": 2.99, "grad_norm": 0.7106532454490662, "learning_rate": 2.3075528723126568e-08, "loss": 1.6384, "step": 89815 }, { "epoch": 2.99, "grad_norm": 0.7415845990180969, "learning_rate": 2.2946074647167688e-08, "loss": 1.6437, "step": 89816 }, { "epoch": 2.99, "grad_norm": 0.7021026611328125, "learning_rate": 2.2816984699169926e-08, "loss": 1.6397, "step": 89817 }, { "epoch": 2.99, "grad_norm": 0.7046361565589905, "learning_rate": 2.268825887926651e-08, "loss": 1.6276, "step": 89818 }, { "epoch": 2.99, "grad_norm": 0.6943432688713074, "learning_rate": 2.2559897187623964e-08, "loss": 1.6557, "step": 89819 }, { "epoch": 2.99, "grad_norm": 0.736326277256012, "learning_rate": 2.2431899624408832e-08, "loss": 1.6618, "step": 89820 }, { "epoch": 2.99, "grad_norm": 0.6996721625328064, "learning_rate": 2.230426618972103e-08, "loss": 1.6524, "step": 89821 }, { "epoch": 2.99, "grad_norm": 0.7098087668418884, "learning_rate": 2.2176996883793707e-08, "loss": 1.6949, "step": 89822 }, { "epoch": 2.99, "grad_norm": 0.7309055924415588, "learning_rate": 2.2050091706726782e-08, "loss": 1.6844, "step": 89823 }, { "epoch": 2.99, "grad_norm": 0.7084145545959473, "learning_rate": 2.1923550658686784e-08, "loss": 1.6063, "step": 89824 }, { "epoch": 2.99, "grad_norm": 0.700600266456604, "learning_rate": 2.1797373739840252e-08, "loss": 1.6756, "step": 89825 }, { "epoch": 2.99, "grad_norm": 0.6995044946670532, "learning_rate": 2.167156095032041e-08, "loss": 1.6606, "step": 89826 }, { "epoch": 2.99, "grad_norm": 0.7275677919387817, "learning_rate": 2.15461122903271e-08, "loss": 1.6352, "step": 89827 }, { "epoch": 2.99, "grad_norm": 0.6980292201042175, "learning_rate": 2.1421027759926933e-08, "loss": 1.6086, "step": 89828 }, { "epoch": 2.99, "grad_norm": 0.7007117867469788, "learning_rate": 2.129630735935306e-08, "loss": 1.6453, "step": 89829 }, { "epoch": 2.99, "grad_norm": 0.7086539268493652, "learning_rate": 2.1171951088705397e-08, "loss": 1.674, "step": 89830 }, { "epoch": 2.99, "grad_norm": 0.7165157198905945, "learning_rate": 2.1047958948183786e-08, "loss": 1.6133, "step": 89831 }, { "epoch": 2.99, "grad_norm": 0.7338929176330566, "learning_rate": 2.0924330937888145e-08, "loss": 1.7227, "step": 89832 }, { "epoch": 2.99, "grad_norm": 0.7187749743461609, "learning_rate": 2.0801067058018316e-08, "loss": 1.6838, "step": 89833 }, { "epoch": 2.99, "grad_norm": 0.707946240901947, "learning_rate": 2.067816730867422e-08, "loss": 1.6993, "step": 89834 }, { "epoch": 2.99, "grad_norm": 0.708346426486969, "learning_rate": 2.055563169002239e-08, "loss": 1.6412, "step": 89835 }, { "epoch": 2.99, "grad_norm": 0.7304289937019348, "learning_rate": 2.0433460202229357e-08, "loss": 1.6936, "step": 89836 }, { "epoch": 2.99, "grad_norm": 0.7350155711174011, "learning_rate": 2.0311652845395042e-08, "loss": 1.6822, "step": 89837 }, { "epoch": 2.99, "grad_norm": 0.7065809965133667, "learning_rate": 2.0190209619752595e-08, "loss": 1.6981, "step": 89838 }, { "epoch": 2.99, "grad_norm": 0.6976998448371887, "learning_rate": 2.0069130525368626e-08, "loss": 1.6618, "step": 89839 }, { "epoch": 2.99, "grad_norm": 0.7077053785324097, "learning_rate": 1.9948415562442977e-08, "loss": 1.6947, "step": 89840 }, { "epoch": 2.99, "grad_norm": 0.7329757213592529, "learning_rate": 1.9828064731075567e-08, "loss": 1.7283, "step": 89841 }, { "epoch": 2.99, "grad_norm": 0.7305493354797363, "learning_rate": 1.9708078031432928e-08, "loss": 1.731, "step": 89842 }, { "epoch": 2.99, "grad_norm": 0.7066233158111572, "learning_rate": 1.9588455463681597e-08, "loss": 1.6741, "step": 89843 }, { "epoch": 2.99, "grad_norm": 0.723895251750946, "learning_rate": 1.9469197027954798e-08, "loss": 1.625, "step": 89844 }, { "epoch": 2.99, "grad_norm": 0.6877209544181824, "learning_rate": 1.9350302724385758e-08, "loss": 1.5965, "step": 89845 }, { "epoch": 2.99, "grad_norm": 0.7275615930557251, "learning_rate": 1.9231772553107704e-08, "loss": 1.7971, "step": 89846 }, { "epoch": 2.99, "grad_norm": 0.7365493178367615, "learning_rate": 1.911360651432048e-08, "loss": 1.6787, "step": 89847 }, { "epoch": 2.99, "grad_norm": 0.7018072009086609, "learning_rate": 1.899580460809069e-08, "loss": 1.6141, "step": 89848 }, { "epoch": 2.99, "grad_norm": 0.7499540448188782, "learning_rate": 1.8878366834618187e-08, "loss": 1.7476, "step": 89849 }, { "epoch": 2.99, "grad_norm": 0.7022475004196167, "learning_rate": 1.8761293194036186e-08, "loss": 1.7135, "step": 89850 }, { "epoch": 2.99, "grad_norm": 0.6837924718856812, "learning_rate": 1.864458368647792e-08, "loss": 1.6482, "step": 89851 }, { "epoch": 2.99, "grad_norm": 0.7218972444534302, "learning_rate": 1.8528238312076616e-08, "loss": 1.7132, "step": 89852 }, { "epoch": 2.99, "grad_norm": 0.7439326643943787, "learning_rate": 1.8412257070965498e-08, "loss": 1.638, "step": 89853 }, { "epoch": 2.99, "grad_norm": 0.7219247221946716, "learning_rate": 1.8296639963311098e-08, "loss": 1.634, "step": 89854 }, { "epoch": 2.99, "grad_norm": 0.7699110507965088, "learning_rate": 1.818138698924665e-08, "loss": 1.6892, "step": 89855 }, { "epoch": 2.99, "grad_norm": 0.7068146467208862, "learning_rate": 1.8066498148905372e-08, "loss": 1.6071, "step": 89856 }, { "epoch": 2.99, "grad_norm": 0.7236623764038086, "learning_rate": 1.7951973442453803e-08, "loss": 1.6699, "step": 89857 }, { "epoch": 2.99, "grad_norm": 0.7403216361999512, "learning_rate": 1.783781286999186e-08, "loss": 1.6454, "step": 89858 }, { "epoch": 2.99, "grad_norm": 0.7114595174789429, "learning_rate": 1.7724016431686082e-08, "loss": 1.6867, "step": 89859 }, { "epoch": 2.99, "grad_norm": 0.7176669836044312, "learning_rate": 1.7610584127636385e-08, "loss": 1.7244, "step": 89860 }, { "epoch": 2.99, "grad_norm": 0.7215732932090759, "learning_rate": 1.74975159580093e-08, "loss": 1.6193, "step": 89861 }, { "epoch": 2.99, "grad_norm": 0.7098478078842163, "learning_rate": 1.7384811922971366e-08, "loss": 1.677, "step": 89862 }, { "epoch": 2.99, "grad_norm": 0.7122134566307068, "learning_rate": 1.7272472022622495e-08, "loss": 1.6551, "step": 89863 }, { "epoch": 2.99, "grad_norm": 0.7150301933288574, "learning_rate": 1.7160496257062617e-08, "loss": 1.6641, "step": 89864 }, { "epoch": 2.99, "grad_norm": 0.717175304889679, "learning_rate": 1.7048884626524872e-08, "loss": 1.652, "step": 89865 }, { "epoch": 2.99, "grad_norm": 0.7180827856063843, "learning_rate": 1.693763713104257e-08, "loss": 1.7359, "step": 89866 }, { "epoch": 2.99, "grad_norm": 0.6998769044876099, "learning_rate": 1.682675377081555e-08, "loss": 1.6798, "step": 89867 }, { "epoch": 2.99, "grad_norm": 0.7270341515541077, "learning_rate": 1.6716234545977037e-08, "loss": 1.6644, "step": 89868 }, { "epoch": 2.99, "grad_norm": 0.707017183303833, "learning_rate": 1.6606079456626954e-08, "loss": 1.6698, "step": 89869 }, { "epoch": 2.99, "grad_norm": 0.6895521879196167, "learning_rate": 1.649628850293183e-08, "loss": 1.6157, "step": 89870 }, { "epoch": 2.99, "grad_norm": 0.7279193997383118, "learning_rate": 1.6386861684991592e-08, "loss": 1.68, "step": 89871 }, { "epoch": 2.99, "grad_norm": 0.703283429145813, "learning_rate": 1.627779900297277e-08, "loss": 1.6644, "step": 89872 }, { "epoch": 2.99, "grad_norm": 0.7382070422172546, "learning_rate": 1.616910045697528e-08, "loss": 1.6813, "step": 89873 }, { "epoch": 2.99, "grad_norm": 0.708962082862854, "learning_rate": 1.6060766047132356e-08, "loss": 1.6928, "step": 89874 }, { "epoch": 2.99, "grad_norm": 0.7046459317207336, "learning_rate": 1.5952795773643834e-08, "loss": 1.7161, "step": 89875 }, { "epoch": 2.99, "grad_norm": 0.7252553701400757, "learning_rate": 1.584518963654302e-08, "loss": 1.7189, "step": 89876 }, { "epoch": 2.99, "grad_norm": 0.6890027523040771, "learning_rate": 1.573794763602976e-08, "loss": 1.656, "step": 89877 }, { "epoch": 2.99, "grad_norm": 0.7373629808425903, "learning_rate": 1.5631069772203964e-08, "loss": 1.6964, "step": 89878 }, { "epoch": 2.99, "grad_norm": 0.7135644555091858, "learning_rate": 1.5524556045198865e-08, "loss": 1.6859, "step": 89879 }, { "epoch": 2.99, "grad_norm": 0.7196259498596191, "learning_rate": 1.541840645514769e-08, "loss": 1.7113, "step": 89880 }, { "epoch": 2.99, "grad_norm": 0.8710106015205383, "learning_rate": 1.531262100215036e-08, "loss": 1.7581, "step": 89881 }, { "epoch": 2.99, "grad_norm": 0.7275951504707336, "learning_rate": 1.5207199686406714e-08, "loss": 1.6846, "step": 89882 }, { "epoch": 2.99, "grad_norm": 0.7105118632316589, "learning_rate": 1.510214250798336e-08, "loss": 1.6191, "step": 89883 }, { "epoch": 2.99, "grad_norm": 0.7194824814796448, "learning_rate": 1.4997449467013534e-08, "loss": 1.7066, "step": 89884 }, { "epoch": 2.99, "grad_norm": 0.7354907393455505, "learning_rate": 1.4893120563663762e-08, "loss": 1.6875, "step": 89885 }, { "epoch": 2.99, "grad_norm": 0.6931095123291016, "learning_rate": 1.4789155798000662e-08, "loss": 1.669, "step": 89886 }, { "epoch": 2.99, "grad_norm": 0.7319729328155518, "learning_rate": 1.4685555170224073e-08, "loss": 1.6886, "step": 89887 }, { "epoch": 2.99, "grad_norm": 0.7317204475402832, "learning_rate": 1.45823186803673e-08, "loss": 1.6681, "step": 89888 }, { "epoch": 2.99, "grad_norm": 0.7080873847007751, "learning_rate": 1.4479446328630184e-08, "loss": 1.7051, "step": 89889 }, { "epoch": 2.99, "grad_norm": 0.7283245325088501, "learning_rate": 1.4376938115145952e-08, "loss": 1.6835, "step": 89890 }, { "epoch": 2.99, "grad_norm": 0.7327734231948853, "learning_rate": 1.4274794039947913e-08, "loss": 1.7192, "step": 89891 }, { "epoch": 2.99, "grad_norm": 0.7329038381576538, "learning_rate": 1.417301410326921e-08, "loss": 1.6539, "step": 89892 }, { "epoch": 2.99, "grad_norm": 0.7097464799880981, "learning_rate": 1.4071598305176456e-08, "loss": 1.6021, "step": 89893 }, { "epoch": 2.99, "grad_norm": 0.7095870971679688, "learning_rate": 1.3970546645769575e-08, "loss": 1.6808, "step": 89894 }, { "epoch": 2.99, "grad_norm": 0.7134867906570435, "learning_rate": 1.38698591252151e-08, "loss": 1.7869, "step": 89895 }, { "epoch": 2.99, "grad_norm": 0.7064955234527588, "learning_rate": 1.3769535743646254e-08, "loss": 1.6851, "step": 89896 }, { "epoch": 2.99, "grad_norm": 0.7095929384231567, "learning_rate": 1.3669576501129652e-08, "loss": 1.7218, "step": 89897 }, { "epoch": 2.99, "grad_norm": 0.7253790497779846, "learning_rate": 1.3569981397831831e-08, "loss": 1.6083, "step": 89898 }, { "epoch": 2.99, "grad_norm": 0.7200081944465637, "learning_rate": 1.3470750433852706e-08, "loss": 1.6171, "step": 89899 }, { "epoch": 2.99, "grad_norm": 0.7131836414337158, "learning_rate": 1.3371883609325507e-08, "loss": 1.6816, "step": 89900 }, { "epoch": 2.99, "grad_norm": 0.7233197689056396, "learning_rate": 1.3273380924350152e-08, "loss": 1.6237, "step": 89901 }, { "epoch": 2.99, "grad_norm": 0.7255286574363708, "learning_rate": 1.317524237905987e-08, "loss": 1.6113, "step": 89902 }, { "epoch": 2.99, "grad_norm": 0.729508101940155, "learning_rate": 1.3077467973587885e-08, "loss": 1.6839, "step": 89903 }, { "epoch": 2.99, "grad_norm": 0.6979607939720154, "learning_rate": 1.2980057708034119e-08, "loss": 1.6454, "step": 89904 }, { "epoch": 2.99, "grad_norm": 0.6886531114578247, "learning_rate": 1.2883011582531799e-08, "loss": 1.6565, "step": 89905 }, { "epoch": 2.99, "grad_norm": 0.720362663269043, "learning_rate": 1.2786329597180845e-08, "loss": 1.6956, "step": 89906 }, { "epoch": 2.99, "grad_norm": 0.7031475901603699, "learning_rate": 1.2690011752114482e-08, "loss": 1.6454, "step": 89907 }, { "epoch": 2.99, "grad_norm": 0.7355026602745056, "learning_rate": 1.2594058047432631e-08, "loss": 1.658, "step": 89908 }, { "epoch": 2.99, "grad_norm": 0.7255572080612183, "learning_rate": 1.2498468483235214e-08, "loss": 1.6899, "step": 89909 }, { "epoch": 2.99, "grad_norm": 0.7019323110580444, "learning_rate": 1.240324305968876e-08, "loss": 1.6497, "step": 89910 }, { "epoch": 2.99, "grad_norm": 0.7318872809410095, "learning_rate": 1.2308381776893195e-08, "loss": 1.6165, "step": 89911 }, { "epoch": 2.99, "grad_norm": 0.73118656873703, "learning_rate": 1.2213884634948434e-08, "loss": 1.747, "step": 89912 }, { "epoch": 2.99, "grad_norm": 0.7101847529411316, "learning_rate": 1.21197516339544e-08, "loss": 1.6702, "step": 89913 }, { "epoch": 2.99, "grad_norm": 0.7264187335968018, "learning_rate": 1.2025982774077624e-08, "loss": 1.7982, "step": 89914 }, { "epoch": 2.99, "grad_norm": 0.7183493375778198, "learning_rate": 1.1932578055384723e-08, "loss": 1.7004, "step": 89915 }, { "epoch": 2.99, "grad_norm": 0.7043435573577881, "learning_rate": 1.183953747800892e-08, "loss": 1.6498, "step": 89916 }, { "epoch": 2.99, "grad_norm": 0.7280462384223938, "learning_rate": 1.1746861042050138e-08, "loss": 1.6745, "step": 89917 }, { "epoch": 2.99, "grad_norm": 0.6980963349342346, "learning_rate": 1.16545487476416e-08, "loss": 1.6737, "step": 89918 }, { "epoch": 2.99, "grad_norm": 0.6974228620529175, "learning_rate": 1.1562600594849925e-08, "loss": 1.6855, "step": 89919 }, { "epoch": 2.99, "grad_norm": 0.7102028727531433, "learning_rate": 1.1471016583841641e-08, "loss": 1.6855, "step": 89920 }, { "epoch": 2.99, "grad_norm": 0.7272266745567322, "learning_rate": 1.1379796714716671e-08, "loss": 1.6549, "step": 89921 }, { "epoch": 2.99, "grad_norm": 0.6914275288581848, "learning_rate": 1.1288940987574935e-08, "loss": 1.6614, "step": 89922 }, { "epoch": 2.99, "grad_norm": 0.7121408581733704, "learning_rate": 1.1198449402516351e-08, "loss": 1.7145, "step": 89923 }, { "epoch": 2.99, "grad_norm": 0.6993416547775269, "learning_rate": 1.1108321959674149e-08, "loss": 1.6505, "step": 89924 }, { "epoch": 2.99, "grad_norm": 0.6956596970558167, "learning_rate": 1.101855865911494e-08, "loss": 1.6578, "step": 89925 }, { "epoch": 2.99, "grad_norm": 0.7253254055976868, "learning_rate": 1.0929159501005259e-08, "loss": 1.738, "step": 89926 }, { "epoch": 2.99, "grad_norm": 0.7177782654762268, "learning_rate": 1.0840124485445022e-08, "loss": 1.5957, "step": 89927 }, { "epoch": 2.99, "grad_norm": 0.730538010597229, "learning_rate": 1.0751453612500849e-08, "loss": 1.6999, "step": 89928 }, { "epoch": 2.99, "grad_norm": 0.7266733050346375, "learning_rate": 1.0663146882272656e-08, "loss": 1.6694, "step": 89929 }, { "epoch": 2.99, "grad_norm": 0.7044885754585266, "learning_rate": 1.0575204294926975e-08, "loss": 1.6416, "step": 89930 }, { "epoch": 2.99, "grad_norm": 0.723537027835846, "learning_rate": 1.048762585056373e-08, "loss": 1.5894, "step": 89931 }, { "epoch": 2.99, "grad_norm": 0.7094442844390869, "learning_rate": 1.0400411549249532e-08, "loss": 1.6086, "step": 89932 }, { "epoch": 2.99, "grad_norm": 0.7151303291320801, "learning_rate": 1.0313561391084301e-08, "loss": 1.7062, "step": 89933 }, { "epoch": 2.99, "grad_norm": 0.7236931920051575, "learning_rate": 1.0227075376234572e-08, "loss": 1.6123, "step": 89934 }, { "epoch": 2.99, "grad_norm": 0.7217910289764404, "learning_rate": 1.0140953504733652e-08, "loss": 1.6419, "step": 89935 }, { "epoch": 2.99, "grad_norm": 0.7174800038337708, "learning_rate": 1.0055195776748071e-08, "loss": 1.6462, "step": 89936 }, { "epoch": 2.99, "grad_norm": 0.70970618724823, "learning_rate": 9.969802192344444e-09, "loss": 1.6831, "step": 89937 }, { "epoch": 2.99, "grad_norm": 0.7318726778030396, "learning_rate": 9.884772751622693e-09, "loss": 1.734, "step": 89938 }, { "epoch": 2.99, "grad_norm": 0.7098978757858276, "learning_rate": 9.800107454716043e-09, "loss": 1.6918, "step": 89939 }, { "epoch": 2.99, "grad_norm": 0.7264195084571838, "learning_rate": 9.71580630169111e-09, "loss": 1.7276, "step": 89940 }, { "epoch": 2.99, "grad_norm": 0.7139619588851929, "learning_rate": 9.631869292681115e-09, "loss": 1.6162, "step": 89941 }, { "epoch": 2.99, "grad_norm": 0.7026705145835876, "learning_rate": 9.548296427785983e-09, "loss": 1.6765, "step": 89942 }, { "epoch": 2.99, "grad_norm": 0.7343330979347229, "learning_rate": 9.465087707072327e-09, "loss": 1.6894, "step": 89943 }, { "epoch": 2.99, "grad_norm": 0.6999895572662354, "learning_rate": 9.382243130706679e-09, "loss": 1.6271, "step": 89944 }, { "epoch": 2.99, "grad_norm": 0.7072983384132385, "learning_rate": 9.299762698722346e-09, "loss": 1.7379, "step": 89945 }, { "epoch": 2.99, "grad_norm": 0.7087540030479431, "learning_rate": 9.217646411252555e-09, "loss": 1.756, "step": 89946 }, { "epoch": 2.99, "grad_norm": 0.738904595375061, "learning_rate": 9.135894268397226e-09, "loss": 1.7122, "step": 89947 }, { "epoch": 2.99, "grad_norm": 0.7127410173416138, "learning_rate": 9.054506270222972e-09, "loss": 1.6702, "step": 89948 }, { "epoch": 2.99, "grad_norm": 0.69558185338974, "learning_rate": 8.97348241686302e-09, "loss": 1.6602, "step": 89949 }, { "epoch": 2.99, "grad_norm": 0.7154304385185242, "learning_rate": 8.892822708417291e-09, "loss": 1.7343, "step": 89950 }, { "epoch": 2.99, "grad_norm": 0.7319222092628479, "learning_rate": 8.812527144985704e-09, "loss": 1.6919, "step": 89951 }, { "epoch": 2.99, "grad_norm": 0.7113019227981567, "learning_rate": 8.732595726634873e-09, "loss": 1.6489, "step": 89952 }, { "epoch": 2.99, "grad_norm": 0.7275658845901489, "learning_rate": 8.653028453498022e-09, "loss": 1.6761, "step": 89953 }, { "epoch": 2.99, "grad_norm": 0.6881547570228577, "learning_rate": 8.57382532564177e-09, "loss": 1.6586, "step": 89954 }, { "epoch": 2.99, "grad_norm": 0.7072262763977051, "learning_rate": 8.494986343199339e-09, "loss": 1.7187, "step": 89955 }, { "epoch": 2.99, "grad_norm": 0.7233368158340454, "learning_rate": 8.416511506204038e-09, "loss": 1.6506, "step": 89956 }, { "epoch": 2.99, "grad_norm": 0.7345568537712097, "learning_rate": 8.338400814822399e-09, "loss": 1.681, "step": 89957 }, { "epoch": 2.99, "grad_norm": 0.688405454158783, "learning_rate": 8.26065426908773e-09, "loss": 1.7164, "step": 89958 }, { "epoch": 2.99, "grad_norm": 0.6935625076293945, "learning_rate": 8.183271869133256e-09, "loss": 1.6585, "step": 89959 }, { "epoch": 2.99, "grad_norm": 0.7107686400413513, "learning_rate": 8.1062536150589e-09, "loss": 1.674, "step": 89960 }, { "epoch": 2.99, "grad_norm": 0.7229512333869934, "learning_rate": 8.029599506931272e-09, "loss": 1.6017, "step": 89961 }, { "epoch": 2.99, "grad_norm": 0.7828412652015686, "learning_rate": 7.953309544850295e-09, "loss": 1.6728, "step": 89962 }, { "epoch": 2.99, "grad_norm": 0.7278653979301453, "learning_rate": 7.877383728915887e-09, "loss": 1.7042, "step": 89963 }, { "epoch": 2.99, "grad_norm": 0.6995159983634949, "learning_rate": 7.801822059227969e-09, "loss": 1.6736, "step": 89964 }, { "epoch": 2.99, "grad_norm": 0.738233208656311, "learning_rate": 7.726624535853154e-09, "loss": 1.6739, "step": 89965 }, { "epoch": 2.99, "grad_norm": 0.7229743599891663, "learning_rate": 7.651791158891363e-09, "loss": 1.6892, "step": 89966 }, { "epoch": 2.99, "grad_norm": 0.7102816700935364, "learning_rate": 7.577321928442515e-09, "loss": 1.7239, "step": 89967 }, { "epoch": 2.99, "grad_norm": 0.7088515758514404, "learning_rate": 7.503216844573224e-09, "loss": 1.6544, "step": 89968 }, { "epoch": 2.99, "grad_norm": 0.7256464958190918, "learning_rate": 7.429475907416715e-09, "loss": 1.7185, "step": 89969 }, { "epoch": 2.99, "grad_norm": 0.7301880121231079, "learning_rate": 7.356099117039605e-09, "loss": 1.654, "step": 89970 }, { "epoch": 2.99, "grad_norm": 0.7453582286834717, "learning_rate": 7.28308647354181e-09, "loss": 1.6751, "step": 89971 }, { "epoch": 2.99, "grad_norm": 0.7404434084892273, "learning_rate": 7.210437976989947e-09, "loss": 1.6118, "step": 89972 }, { "epoch": 2.99, "grad_norm": 0.7152707576751709, "learning_rate": 7.138153627483934e-09, "loss": 1.6637, "step": 89973 }, { "epoch": 2.99, "grad_norm": 0.7042438983917236, "learning_rate": 7.066233425090384e-09, "loss": 1.7253, "step": 89974 }, { "epoch": 2.99, "grad_norm": 0.6968204975128174, "learning_rate": 6.994677369942525e-09, "loss": 1.6649, "step": 89975 }, { "epoch": 2.99, "grad_norm": 0.6927152872085571, "learning_rate": 6.923485462073663e-09, "loss": 1.641, "step": 89976 }, { "epoch": 2.99, "grad_norm": 0.7164127230644226, "learning_rate": 6.852657701617026e-09, "loss": 1.6638, "step": 89977 }, { "epoch": 2.99, "grad_norm": 0.6986566185951233, "learning_rate": 6.782194088639226e-09, "loss": 1.6557, "step": 89978 }, { "epoch": 2.99, "grad_norm": 0.6848740577697754, "learning_rate": 6.712094623206876e-09, "loss": 1.5878, "step": 89979 }, { "epoch": 2.99, "grad_norm": 0.7173728346824646, "learning_rate": 6.642359305453204e-09, "loss": 1.6852, "step": 89980 }, { "epoch": 2.99, "grad_norm": 0.7059535384178162, "learning_rate": 6.572988135411517e-09, "loss": 1.661, "step": 89981 }, { "epoch": 2.99, "grad_norm": 0.7201339602470398, "learning_rate": 6.50398111321504e-09, "loss": 1.6892, "step": 89982 }, { "epoch": 2.99, "grad_norm": 0.7012719511985779, "learning_rate": 6.435338238897081e-09, "loss": 1.632, "step": 89983 }, { "epoch": 2.99, "grad_norm": 0.7225137948989868, "learning_rate": 6.3670595125575596e-09, "loss": 1.6767, "step": 89984 }, { "epoch": 2.99, "grad_norm": 0.7297612428665161, "learning_rate": 6.299144934329703e-09, "loss": 1.6532, "step": 89985 }, { "epoch": 2.99, "grad_norm": 0.741442859172821, "learning_rate": 6.2315945042135106e-09, "loss": 1.7021, "step": 89986 }, { "epoch": 2.99, "grad_norm": 0.7025765180587769, "learning_rate": 6.16440822234221e-09, "loss": 1.6798, "step": 89987 }, { "epoch": 2.99, "grad_norm": 0.6932403445243835, "learning_rate": 6.097586088782413e-09, "loss": 1.6428, "step": 89988 }, { "epoch": 2.99, "grad_norm": 0.707822859287262, "learning_rate": 6.031128103600735e-09, "loss": 1.6378, "step": 89989 }, { "epoch": 2.99, "grad_norm": 0.7060595154762268, "learning_rate": 5.965034266930402e-09, "loss": 1.646, "step": 89990 }, { "epoch": 2.99, "grad_norm": 0.7134023904800415, "learning_rate": 5.89930457880472e-09, "loss": 1.7126, "step": 89991 }, { "epoch": 2.99, "grad_norm": 0.6992998719215393, "learning_rate": 5.833939039290303e-09, "loss": 1.6709, "step": 89992 }, { "epoch": 2.99, "grad_norm": 0.7256197929382324, "learning_rate": 5.768937648520378e-09, "loss": 1.7299, "step": 89993 }, { "epoch": 2.99, "grad_norm": 0.7144259810447693, "learning_rate": 5.704300406528251e-09, "loss": 1.6059, "step": 89994 }, { "epoch": 2.99, "grad_norm": 0.7002360820770264, "learning_rate": 5.640027313413842e-09, "loss": 1.6639, "step": 89995 }, { "epoch": 2.99, "grad_norm": 0.6862877011299133, "learning_rate": 5.576118369243765e-09, "loss": 1.7443, "step": 89996 }, { "epoch": 2.99, "grad_norm": 0.7002548575401306, "learning_rate": 5.51257357411794e-09, "loss": 1.7501, "step": 89997 }, { "epoch": 2.99, "grad_norm": 0.7127252817153931, "learning_rate": 5.44939292810298e-09, "loss": 1.6603, "step": 89998 }, { "epoch": 2.99, "grad_norm": 0.7156891822814941, "learning_rate": 5.386576431265499e-09, "loss": 1.6729, "step": 89999 }, { "epoch": 2.99, "grad_norm": 0.6955249309539795, "learning_rate": 5.32412408367211e-09, "loss": 1.616, "step": 90000 }, { "epoch": 2.99, "grad_norm": 0.7103074789047241, "learning_rate": 5.262035885422733e-09, "loss": 1.6898, "step": 90001 }, { "epoch": 2.99, "grad_norm": 0.7089178562164307, "learning_rate": 5.200311836583981e-09, "loss": 1.7019, "step": 90002 }, { "epoch": 2.99, "grad_norm": 0.7123584151268005, "learning_rate": 5.138951937222468e-09, "loss": 1.6855, "step": 90003 }, { "epoch": 2.99, "grad_norm": 0.7159867286682129, "learning_rate": 5.077956187404808e-09, "loss": 1.6617, "step": 90004 }, { "epoch": 2.99, "grad_norm": 0.7167028784751892, "learning_rate": 5.0173245872642264e-09, "loss": 1.6849, "step": 90005 }, { "epoch": 2.99, "grad_norm": 0.7028828263282776, "learning_rate": 4.957057136800724e-09, "loss": 1.6391, "step": 90006 }, { "epoch": 2.99, "grad_norm": 0.730334997177124, "learning_rate": 4.89715383611422e-09, "loss": 1.6665, "step": 90007 }, { "epoch": 2.99, "grad_norm": 0.709270179271698, "learning_rate": 4.837614685304636e-09, "loss": 1.6671, "step": 90008 }, { "epoch": 2.99, "grad_norm": 0.7007349729537964, "learning_rate": 4.778439684371971e-09, "loss": 1.6608, "step": 90009 }, { "epoch": 2.99, "grad_norm": 0.7244731187820435, "learning_rate": 4.7196288334827585e-09, "loss": 1.6527, "step": 90010 }, { "epoch": 2.99, "grad_norm": 0.6946003437042236, "learning_rate": 4.661182132636998e-09, "loss": 1.6718, "step": 90011 }, { "epoch": 2.99, "grad_norm": 0.7097477316856384, "learning_rate": 4.6030995819346106e-09, "loss": 1.6015, "step": 90012 }, { "epoch": 2.99, "grad_norm": 0.7351959943771362, "learning_rate": 4.545381181475516e-09, "loss": 1.7462, "step": 90013 }, { "epoch": 2.99, "grad_norm": 0.7348819375038147, "learning_rate": 4.488026931259714e-09, "loss": 1.6471, "step": 90014 }, { "epoch": 2.99, "grad_norm": 0.6893934011459351, "learning_rate": 4.4310368313871246e-09, "loss": 1.6743, "step": 90015 }, { "epoch": 2.99, "grad_norm": 0.7003682851791382, "learning_rate": 4.374410881957668e-09, "loss": 1.6889, "step": 90016 }, { "epoch": 2.99, "grad_norm": 0.7469631433486938, "learning_rate": 4.318149083004652e-09, "loss": 1.6282, "step": 90017 }, { "epoch": 2.99, "grad_norm": 0.7288298010826111, "learning_rate": 4.262251434594688e-09, "loss": 1.7593, "step": 90018 }, { "epoch": 2.99, "grad_norm": 0.7063400149345398, "learning_rate": 4.2067179368276975e-09, "loss": 1.6619, "step": 90019 }, { "epoch": 2.99, "grad_norm": 0.7019123435020447, "learning_rate": 4.151548589736986e-09, "loss": 1.6963, "step": 90020 }, { "epoch": 3.0, "grad_norm": 0.7308429479598999, "learning_rate": 4.096743393422474e-09, "loss": 1.6544, "step": 90021 }, { "epoch": 3.0, "grad_norm": 0.716836154460907, "learning_rate": 4.04230234791747e-09, "loss": 1.7131, "step": 90022 }, { "epoch": 3.0, "grad_norm": 0.6997467279434204, "learning_rate": 3.988225453321892e-09, "loss": 1.7663, "step": 90023 }, { "epoch": 3.0, "grad_norm": 0.6964295506477356, "learning_rate": 3.934512709669047e-09, "loss": 1.6582, "step": 90024 }, { "epoch": 3.0, "grad_norm": 0.7348100543022156, "learning_rate": 3.881164117025548e-09, "loss": 1.7074, "step": 90025 }, { "epoch": 3.0, "grad_norm": 0.7283039689064026, "learning_rate": 3.828179675458009e-09, "loss": 1.7812, "step": 90026 }, { "epoch": 3.0, "grad_norm": 0.7088822722434998, "learning_rate": 3.775559385066351e-09, "loss": 1.6631, "step": 90027 }, { "epoch": 3.0, "grad_norm": 0.7040581107139587, "learning_rate": 3.7233032458838797e-09, "loss": 1.665, "step": 90028 }, { "epoch": 3.0, "grad_norm": 0.7270787954330444, "learning_rate": 3.671411257977208e-09, "loss": 1.7076, "step": 90029 }, { "epoch": 3.0, "grad_norm": 0.7176010012626648, "learning_rate": 3.6198834214129503e-09, "loss": 1.7211, "step": 90030 }, { "epoch": 3.0, "grad_norm": 0.7122527956962585, "learning_rate": 3.5687197362577192e-09, "loss": 1.746, "step": 90031 }, { "epoch": 3.0, "grad_norm": 0.7418135404586792, "learning_rate": 3.517920202544822e-09, "loss": 1.728, "step": 90032 }, { "epoch": 3.0, "grad_norm": 0.6938209533691406, "learning_rate": 3.4674848204074845e-09, "loss": 1.6703, "step": 90033 }, { "epoch": 3.0, "grad_norm": 0.7228359580039978, "learning_rate": 3.417413589812401e-09, "loss": 1.658, "step": 90034 }, { "epoch": 3.0, "grad_norm": 0.714810311794281, "learning_rate": 3.3677065108594912e-09, "loss": 1.6469, "step": 90035 }, { "epoch": 3.0, "grad_norm": 0.7117576003074646, "learning_rate": 3.3183635836486754e-09, "loss": 1.6521, "step": 90036 }, { "epoch": 3.0, "grad_norm": 0.6998233795166016, "learning_rate": 3.2693848081799535e-09, "loss": 1.7334, "step": 90037 }, { "epoch": 3.0, "grad_norm": 0.7412175536155701, "learning_rate": 3.2207701845532453e-09, "loss": 1.621, "step": 90038 }, { "epoch": 3.0, "grad_norm": 0.746456503868103, "learning_rate": 3.1725197128351643e-09, "loss": 1.6937, "step": 90039 }, { "epoch": 3.0, "grad_norm": 0.7078187465667725, "learning_rate": 3.1246333930257107e-09, "loss": 1.5999, "step": 90040 }, { "epoch": 3.0, "grad_norm": 0.7057276368141174, "learning_rate": 3.077111225258111e-09, "loss": 1.7734, "step": 90041 }, { "epoch": 3.0, "grad_norm": 0.7043732404708862, "learning_rate": 3.0299532095323653e-09, "loss": 1.6572, "step": 90042 }, { "epoch": 3.0, "grad_norm": 0.7230173945426941, "learning_rate": 2.983159345915087e-09, "loss": 1.7187, "step": 90043 }, { "epoch": 3.0, "grad_norm": 0.7049376368522644, "learning_rate": 2.9367296344728897e-09, "loss": 1.6183, "step": 90044 }, { "epoch": 3.0, "grad_norm": 0.7069651484489441, "learning_rate": 2.8906640752723865e-09, "loss": 1.6778, "step": 90045 }, { "epoch": 3.0, "grad_norm": 0.7177916765213013, "learning_rate": 2.8449626683801907e-09, "loss": 1.6948, "step": 90046 }, { "epoch": 3.0, "grad_norm": 0.7224699258804321, "learning_rate": 2.7996254137963026e-09, "loss": 1.6976, "step": 90047 }, { "epoch": 3.0, "grad_norm": 0.6979720592498779, "learning_rate": 2.754652311620642e-09, "loss": 1.7006, "step": 90048 }, { "epoch": 3.0, "grad_norm": 0.7060432434082031, "learning_rate": 2.7100433619198225e-09, "loss": 1.7386, "step": 90049 }, { "epoch": 3.0, "grad_norm": 0.7277218699455261, "learning_rate": 2.665798564693844e-09, "loss": 1.6894, "step": 90050 }, { "epoch": 3.0, "grad_norm": 0.7174431681632996, "learning_rate": 2.6219179200759333e-09, "loss": 1.6558, "step": 90051 }, { "epoch": 3.0, "grad_norm": 0.6983670592308044, "learning_rate": 2.5784014280327834e-09, "loss": 1.6693, "step": 90052 }, { "epoch": 3.0, "grad_norm": 0.7140798568725586, "learning_rate": 2.5352490886643153e-09, "loss": 1.6937, "step": 90053 }, { "epoch": 3.0, "grad_norm": 0.7010242938995361, "learning_rate": 2.4924609020038345e-09, "loss": 1.6637, "step": 90054 }, { "epoch": 3.0, "grad_norm": 0.7162585258483887, "learning_rate": 2.4500368681512615e-09, "loss": 1.6708, "step": 90055 }, { "epoch": 3.0, "grad_norm": 0.7106179594993591, "learning_rate": 2.407976987106597e-09, "loss": 1.6739, "step": 90056 }, { "epoch": 3.0, "grad_norm": 0.7150555849075317, "learning_rate": 2.3662812589031466e-09, "loss": 1.7187, "step": 90057 }, { "epoch": 3.0, "grad_norm": 0.7161328792572021, "learning_rate": 2.324949683674138e-09, "loss": 1.6629, "step": 90058 }, { "epoch": 3.0, "grad_norm": 0.7193678021430969, "learning_rate": 2.2839822613862637e-09, "loss": 1.7089, "step": 90059 }, { "epoch": 3.0, "grad_norm": 0.7276776432991028, "learning_rate": 2.243378992139444e-09, "loss": 1.6442, "step": 90060 }, { "epoch": 3.0, "grad_norm": 0.7158994674682617, "learning_rate": 2.203139875966986e-09, "loss": 1.6453, "step": 90061 }, { "epoch": 3.0, "grad_norm": 0.7074949145317078, "learning_rate": 2.1632649129021963e-09, "loss": 1.6712, "step": 90062 }, { "epoch": 3.0, "grad_norm": 0.7259308695793152, "learning_rate": 2.123754103011688e-09, "loss": 1.653, "step": 90063 }, { "epoch": 3.0, "grad_norm": 0.7145777344703674, "learning_rate": 2.0846074463620745e-09, "loss": 1.6313, "step": 90064 }, { "epoch": 3.0, "grad_norm": 0.7414413094520569, "learning_rate": 2.0458249429533557e-09, "loss": 1.6726, "step": 90065 }, { "epoch": 3.0, "grad_norm": 0.7212628126144409, "learning_rate": 2.0074065928521453e-09, "loss": 1.737, "step": 90066 }, { "epoch": 3.0, "grad_norm": 0.7099500894546509, "learning_rate": 1.9693523961583635e-09, "loss": 1.7091, "step": 90067 }, { "epoch": 3.0, "grad_norm": 0.7131494879722595, "learning_rate": 1.9316623528387034e-09, "loss": 1.6734, "step": 90068 }, { "epoch": 3.0, "grad_norm": 0.7400573492050171, "learning_rate": 1.894336462959778e-09, "loss": 1.6956, "step": 90069 }, { "epoch": 3.0, "grad_norm": 0.7151015996932983, "learning_rate": 1.857374726588201e-09, "loss": 1.6314, "step": 90070 }, { "epoch": 3.0, "grad_norm": 0.7210039496421814, "learning_rate": 1.8207771437572794e-09, "loss": 1.669, "step": 90071 }, { "epoch": 3.0, "grad_norm": 0.7058656811714172, "learning_rate": 1.784543714533626e-09, "loss": 1.6932, "step": 90072 }, { "epoch": 3.0, "grad_norm": 0.7294831275939941, "learning_rate": 1.7486744389172413e-09, "loss": 1.6714, "step": 90073 }, { "epoch": 3.0, "grad_norm": 0.7150159478187561, "learning_rate": 1.7131693169747386e-09, "loss": 1.7255, "step": 90074 }, { "epoch": 3.0, "grad_norm": 0.6916465759277344, "learning_rate": 1.6780283487727309e-09, "loss": 1.6314, "step": 90075 }, { "epoch": 3.0, "grad_norm": 0.7246805429458618, "learning_rate": 1.6432515343112185e-09, "loss": 1.6442, "step": 90076 }, { "epoch": 3.0, "grad_norm": 0.7135049104690552, "learning_rate": 1.6088388736568147e-09, "loss": 1.6388, "step": 90077 }, { "epoch": 3.0, "grad_norm": 0.7448392510414124, "learning_rate": 1.5747903668428262e-09, "loss": 1.6725, "step": 90078 }, { "epoch": 3.0, "grad_norm": 0.72979736328125, "learning_rate": 1.5411060139358667e-09, "loss": 1.69, "step": 90079 }, { "epoch": 3.0, "grad_norm": 0.7140032052993774, "learning_rate": 1.5077858149359356e-09, "loss": 1.6112, "step": 90080 }, { "epoch": 3.0, "grad_norm": 0.7177388668060303, "learning_rate": 1.4748297699096468e-09, "loss": 1.715, "step": 90081 }, { "epoch": 3.0, "grad_norm": 0.7084556818008423, "learning_rate": 1.4422378788903066e-09, "loss": 1.7137, "step": 90082 }, { "epoch": 3.0, "grad_norm": 0.7129862904548645, "learning_rate": 1.4100101419112219e-09, "loss": 1.5828, "step": 90083 }, { "epoch": 3.0, "grad_norm": 0.7357227802276611, "learning_rate": 1.3781465590056996e-09, "loss": 1.6852, "step": 90084 }, { "epoch": 3.0, "grad_norm": 0.7073168754577637, "learning_rate": 1.3466471302403526e-09, "loss": 1.6973, "step": 90085 }, { "epoch": 3.0, "grad_norm": 0.7730443477630615, "learning_rate": 1.315511855648488e-09, "loss": 1.6904, "step": 90086 }, { "epoch": 3.0, "grad_norm": 2.309098243713379, "learning_rate": 1.2847407352301054e-09, "loss": 1.6724, "step": 90087 }, { "epoch": 3.0, "grad_norm": 0.7303604483604431, "learning_rate": 1.2543337690518186e-09, "loss": 1.6626, "step": 90088 }, { "epoch": 3.0, "grad_norm": 0.7399726510047913, "learning_rate": 1.2242909571802406e-09, "loss": 1.6567, "step": 90089 }, { "epoch": 3.0, "grad_norm": 0.6948496103286743, "learning_rate": 1.1946122995820651e-09, "loss": 1.6518, "step": 90090 }, { "epoch": 3.0, "grad_norm": 0.7109876871109009, "learning_rate": 1.1652977963572118e-09, "loss": 1.6265, "step": 90091 }, { "epoch": 3.0, "grad_norm": 0.7131643295288086, "learning_rate": 1.136347447505681e-09, "loss": 1.6457, "step": 90092 }, { "epoch": 3.0, "grad_norm": 0.7095128297805786, "learning_rate": 1.1077612530607793e-09, "loss": 1.677, "step": 90093 }, { "epoch": 3.0, "grad_norm": 0.7097160220146179, "learning_rate": 1.07953921308912e-09, "loss": 1.6647, "step": 90094 }, { "epoch": 3.0, "grad_norm": 0.7089536786079407, "learning_rate": 1.0516813275907033e-09, "loss": 1.5818, "step": 90095 }, { "epoch": 3.0, "grad_norm": 0.7046404480934143, "learning_rate": 1.0241875966321422e-09, "loss": 1.6914, "step": 90096 }, { "epoch": 3.0, "grad_norm": 0.7076646089553833, "learning_rate": 9.970580202134371e-10, "loss": 1.6563, "step": 90097 }, { "epoch": 3.0, "grad_norm": 0.7202074527740479, "learning_rate": 9.702925983678944e-10, "loss": 1.5876, "step": 90098 }, { "epoch": 3.0, "grad_norm": 0.7338581085205078, "learning_rate": 9.438913311621276e-10, "loss": 1.6921, "step": 90099 }, { "epoch": 3.0, "grad_norm": 0.7160314321517944, "learning_rate": 9.178542185961368e-10, "loss": 1.6914, "step": 90100 }, { "epoch": 3.0, "grad_norm": 0.724092960357666, "learning_rate": 8.921812607032286e-10, "loss": 1.6957, "step": 90101 }, { "epoch": 3.0, "grad_norm": 0.7073420882225037, "learning_rate": 8.668724575500164e-10, "loss": 1.7467, "step": 90102 }, { "epoch": 3.0, "grad_norm": 0.7115579843521118, "learning_rate": 8.419278091365e-10, "loss": 1.6928, "step": 90103 }, { "epoch": 3.0, "grad_norm": 0.7178702354431152, "learning_rate": 8.173473154626798e-10, "loss": 1.6734, "step": 90104 }, { "epoch": 3.0, "grad_norm": 0.7202517986297607, "learning_rate": 7.931309766284755e-10, "loss": 1.7046, "step": 90105 }, { "epoch": 3.0, "grad_norm": 0.725452721118927, "learning_rate": 7.692787926338872e-10, "loss": 1.731, "step": 90106 }, { "epoch": 3.0, "grad_norm": 0.705165684223175, "learning_rate": 7.457907634789151e-10, "loss": 1.6421, "step": 90107 }, { "epoch": 3.0, "grad_norm": 0.7095891833305359, "learning_rate": 7.226668892301723e-10, "loss": 1.6799, "step": 90108 }, { "epoch": 3.0, "grad_norm": 0.7191282510757446, "learning_rate": 6.999071698876591e-10, "loss": 1.7137, "step": 90109 }, { "epoch": 3.0, "grad_norm": 0.7212612628936768, "learning_rate": 6.775116055179885e-10, "loss": 1.7354, "step": 90110 }, { "epoch": 3.0, "grad_norm": 0.7117142081260681, "learning_rate": 6.554801961211608e-10, "loss": 1.6776, "step": 90111 }, { "epoch": 3.0, "grad_norm": 0.7027281522750854, "learning_rate": 6.33812941697176e-10, "loss": 1.6886, "step": 90112 }, { "epoch": 3.0, "grad_norm": 0.6933090686798096, "learning_rate": 6.125098423126474e-10, "loss": 1.6025, "step": 90113 }, { "epoch": 3.0, "grad_norm": 0.6955003142356873, "learning_rate": 5.915708980008815e-10, "loss": 1.6789, "step": 90114 }, { "epoch": 3.0, "grad_norm": 0.697888970375061, "learning_rate": 5.709961087285719e-10, "loss": 1.7784, "step": 90115 }, { "epoch": 3.0, "grad_norm": 0.7182146906852722, "learning_rate": 5.507854745956386e-10, "loss": 1.7006, "step": 90116 }, { "epoch": 3.0, "grad_norm": 0.7002754807472229, "learning_rate": 5.309389955687749e-10, "loss": 1.6903, "step": 90117 }, { "epoch": 3.0, "grad_norm": 0.7541040778160095, "learning_rate": 5.11456671714594e-10, "loss": 1.5736, "step": 90118 }, { "epoch": 3.0, "grad_norm": 0.7120195627212524, "learning_rate": 4.923385030330962e-10, "loss": 1.7721, "step": 90119 }, { "epoch": 3.0, "grad_norm": 0.7050670385360718, "learning_rate": 4.73584489557588e-10, "loss": 1.7028, "step": 90120 }, { "epoch": 3.0, "grad_norm": 0.722956657409668, "learning_rate": 4.551946312880694e-10, "loss": 1.6893, "step": 90121 }, { "epoch": 3.0, "grad_norm": 0.7066588401794434, "learning_rate": 4.371689282578472e-10, "loss": 1.7204, "step": 90122 }, { "epoch": 3.0, "grad_norm": 0.7346042394638062, "learning_rate": 4.19507380500228e-10, "loss": 1.7099, "step": 90123 }, { "epoch": 3.0, "grad_norm": 0.7280279994010925, "learning_rate": 4.0220998804851856e-10, "loss": 1.6854, "step": 90124 }, { "epoch": 3.0, "grad_norm": 0.7229366302490234, "learning_rate": 3.852767508694121e-10, "loss": 1.6853, "step": 90125 }, { "epoch": 3.0, "grad_norm": 0.748342752456665, "learning_rate": 3.687076690628288e-10, "loss": 1.6118, "step": 90126 }, { "epoch": 3.0, "grad_norm": 0.7141092419624329, "learning_rate": 3.5250274256215517e-10, "loss": 1.6153, "step": 90127 }, { "epoch": 3.0, "grad_norm": 0.73686283826828, "learning_rate": 3.3666197146731133e-10, "loss": 1.7253, "step": 90128 }, { "epoch": 3.0, "grad_norm": 0.7071231603622437, "learning_rate": 3.211853557449906e-10, "loss": 1.6869, "step": 90129 }, { "epoch": 3.0, "grad_norm": 0.6933568120002747, "learning_rate": 3.0607289542849965e-10, "loss": 1.6659, "step": 90130 }, { "epoch": 3.0, "grad_norm": 0.6940087080001831, "learning_rate": 2.9132459055114523e-10, "loss": 1.6248, "step": 90131 }, { "epoch": 3.0, "grad_norm": 0.7632293105125427, "learning_rate": 2.7694044107962055e-10, "loss": 1.5894, "step": 90132 }, { "epoch": 3.0, "grad_norm": 0.7033204436302185, "learning_rate": 2.6292044711384576e-10, "loss": 1.7074, "step": 90133 }, { "epoch": 3.0, "grad_norm": 0.7598486542701721, "learning_rate": 2.4926460862051414e-10, "loss": 1.6482, "step": 90134 }, { "epoch": 3.0, "grad_norm": 0.720717191696167, "learning_rate": 2.3597292559962567e-10, "loss": 1.6252, "step": 90135 }, { "epoch": 3.0, "grad_norm": 0.7322536706924438, "learning_rate": 2.2304539808448708e-10, "loss": 1.6424, "step": 90136 }, { "epoch": 3.0, "grad_norm": 0.731955885887146, "learning_rate": 2.1048202610840504e-10, "loss": 1.7269, "step": 90137 }, { "epoch": 3.0, "grad_norm": 0.7170238494873047, "learning_rate": 1.9828280967137954e-10, "loss": 1.6292, "step": 90138 }, { "epoch": 3.0, "grad_norm": 0.6950033903121948, "learning_rate": 1.864477488067173e-10, "loss": 1.6663, "step": 90139 }, { "epoch": 3.0, "grad_norm": 0.7352867722511292, "learning_rate": 1.7497684351441832e-10, "loss": 1.595, "step": 90140 }, { "epoch": 3.0, "grad_norm": 0.7017592787742615, "learning_rate": 1.6387009379448257e-10, "loss": 1.7042, "step": 90141 }, { "epoch": 3.0, "grad_norm": 0.7006906270980835, "learning_rate": 1.5312749964691007e-10, "loss": 1.664, "step": 90142 }, { "epoch": 3.0, "grad_norm": 0.6961073875427246, "learning_rate": 1.4274906113831418e-10, "loss": 1.6391, "step": 90143 }, { "epoch": 3.0, "grad_norm": 0.7266169786453247, "learning_rate": 1.3273477826869494e-10, "loss": 1.6887, "step": 90144 }, { "epoch": 3.0, "grad_norm": 0.7231229543685913, "learning_rate": 1.2308465100474562e-10, "loss": 1.6543, "step": 90145 }, { "epoch": 3.0, "grad_norm": 0.7536802887916565, "learning_rate": 1.1379867941307963e-10, "loss": 1.718, "step": 90146 }, { "epoch": 3.0, "grad_norm": 0.7248817682266235, "learning_rate": 1.0487686346039026e-10, "loss": 1.6809, "step": 90147 }, { "epoch": 3.0, "grad_norm": 0.7154732346534729, "learning_rate": 9.631920317998421e-11, "loss": 1.6915, "step": 90148 }, { "epoch": 3.0, "grad_norm": 0.7302026152610779, "learning_rate": 8.812569857186146e-11, "loss": 1.5916, "step": 90149 }, { "epoch": 3.0, "grad_norm": 0.6906124949455261, "learning_rate": 8.029634966932874e-11, "loss": 1.6377, "step": 90150 }, { "epoch": 3.0, "grad_norm": 0.7156795859336853, "learning_rate": 7.283115647238602e-11, "loss": 1.7059, "step": 90151 }, { "epoch": 3.0, "grad_norm": 0.7144204378128052, "learning_rate": 6.57301189810333e-11, "loss": 1.7222, "step": 90152 }, { "epoch": 3.0, "grad_norm": 0.7273514866828918, "learning_rate": 5.899323719527061e-11, "loss": 1.6876, "step": 90153 }, { "epoch": 3.0, "grad_norm": 0.688289225101471, "learning_rate": 5.2620511148404596e-11, "loss": 1.6874, "step": 90154 }, { "epoch": 3.0, "grad_norm": 0.708712637424469, "learning_rate": 4.66119408071286e-11, "loss": 1.6624, "step": 90155 }, { "epoch": 3.0, "grad_norm": 0.780951201915741, "learning_rate": 4.096752623805599e-11, "loss": 1.7678, "step": 90156 }, { "epoch": 3.0, "grad_norm": 0.7019810676574707, "learning_rate": 3.5687267407880085e-11, "loss": 1.6631, "step": 90157 }, { "epoch": 3.0, "grad_norm": 0.7326233983039856, "learning_rate": 3.077116431660087e-11, "loss": 1.7562, "step": 90158 }, { "epoch": 3.0, "grad_norm": 0.71384197473526, "learning_rate": 2.621921699752505e-11, "loss": 1.684, "step": 90159 }, { "epoch": 3.0, "grad_norm": 0.7519753575325012, "learning_rate": 2.2031425450652617e-11, "loss": 1.6788, "step": 90160 }, { "epoch": 3.0, "grad_norm": 0.7054581046104431, "learning_rate": 1.8207789675983575e-11, "loss": 1.6321, "step": 90161 }, { "epoch": 3.0, "grad_norm": 0.6931939125061035, "learning_rate": 1.4748309673517923e-11, "loss": 1.6146, "step": 90162 }, { "epoch": 3.0, "grad_norm": 0.718680739402771, "learning_rate": 1.1652985443255658e-11, "loss": 1.7488, "step": 90163 }, { "epoch": 3.0, "grad_norm": 0.7225616574287415, "learning_rate": 8.921816985196784e-12, "loss": 1.6924, "step": 90164 }, { "epoch": 3.0, "grad_norm": 0.7338488698005676, "learning_rate": 6.5548043326479895e-12, "loss": 1.6556, "step": 90165 }, { "epoch": 3.0, "grad_norm": 0.6985626220703125, "learning_rate": 4.5519474523025844e-12, "loss": 1.6879, "step": 90166 }, { "epoch": 3.0, "grad_norm": 0.7022411823272705, "learning_rate": 2.91324637746726e-12, "loss": 1.6843, "step": 90167 }, { "epoch": 3.0, "grad_norm": 0.6901928186416626, "learning_rate": 1.6387010748353247e-12, "loss": 1.6236, "step": 90168 }, { "epoch": 3.0, "grad_norm": 0.7078284621238708, "learning_rate": 7.283115777134696e-13, "loss": 1.6602, "step": 90169 }, { "epoch": 3.0, "grad_norm": 0.7014002799987793, "learning_rate": 1.820778861016947e-13, "loss": 1.616, "step": 90170 }, { "epoch": 3.0, "grad_norm": 1.5132673978805542, "learning_rate": 0.0, "loss": 1.6296, "step": 90171 }, { "epoch": 3.0, "step": 90171, "total_flos": 1.661530583602299e+17, "train_loss": 1.7905100863402754, "train_runtime": 8265.8627, "train_samples_per_second": 698.148, "train_steps_per_second": 10.909 } ], "logging_steps": 1.0, "max_steps": 90171, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5000, "total_flos": 1.661530583602299e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }